From 88b1833ebc600b4269be123146e02ebec2c3a19f Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Tue, 4 Oct 2016 20:12:46 -0400 Subject: [PATCH 001/530] Cosmetic changes in nnet3 code --- src/nnet3/nnet-compile.cc | 2 +- src/nnet3/nnet-compile.h | 1 - src/nnet3/nnet-computation.cc | 8 +++----- src/nnet3/nnet-compute.h | 5 ++--- src/nnet3/nnet-optimize-utils.h | 5 ++++- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/nnet3/nnet-compile.cc b/src/nnet3/nnet-compile.cc index d58a58e6f2b..85dec668fe8 100644 --- a/src/nnet3/nnet-compile.cc +++ b/src/nnet3/nnet-compile.cc @@ -132,7 +132,7 @@ void Compiler::ComputeDerivNeeded( unordered_set::iterator iter = input_steps.begin(), end = input_steps.end(); - // if some step that we depends on needs a derivative, we need the derivative. + // if some step that we depend on needs a derivative, we need the derivative. for (; iter != end; ++iter) { int32 dep_step = *iter; KALDI_ASSERT(dep_step < step); diff --git a/src/nnet3/nnet-compile.h b/src/nnet3/nnet-compile.h index 2d187bb6876..4dda38ae723 100644 --- a/src/nnet3/nnet-compile.h +++ b/src/nnet3/nnet-compile.h @@ -100,7 +100,6 @@ class Compiler { // this sets up cindex_id_to_location_. void CreateLocationInfo(const std::vector > &by_step); - // Computes the set of step-indexes of preceding steps that this step depends // on. Assumes CreateLocationInfo() has already been called. Requires // 'step_index' only to handle a special case, that if 'this_step' is a diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index 5c0e8911037..1cccaa11d0c 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -1,7 +1,5 @@ // nnet3/nnet-computation.cc -// nnet3/nnet-computation.cc - // Copyright 2015 Johns Hopkins University (author: Daniel Povey) // 2015 Xiaohui Zhang @@ -232,7 +230,7 @@ void NnetComputation::Command::Read(std::istream &is, bool binary) { command_type = static_cast(command_type_int); } else { std::string command_type_str; - getline(is, command_type_str); + getline(is, command_type_str); if (command_type_str == "kAllocMatrixZeroed") { command_type = kAllocMatrixZeroed; } else if (command_type_str == "kAllocMatrixUndefined") { @@ -690,7 +688,7 @@ void NnetComputation::Read(std::istream &is, bool binary) { std::vector component_precomputed_indexes_tmp; for (size_t c = 0; c < num_component_precomputed_indexes; c++) { bool is_null; // a boolean indicating whether the pointer should be NULL. - ReadBasicType(is, binary, &is_null); + ReadBasicType(is, binary, &is_null); if (!is_null) { ComponentPrecomputedIndexes* p = ComponentPrecomputedIndexes::ReadNew(is, binary); component_precomputed_indexes_tmp.push_back(p); @@ -786,7 +784,7 @@ void NnetComputation::Write(std::ostream &os, bool binary) const { for (size_t c = 0; c < submatrices.size(); c++) { submatrices[c].Write(os, binary); } - + if (!binary) os << std::endl; WriteToken(os, binary, ""); WriteBasicType(os, binary, component_precomputed_indexes.size()); diff --git a/src/nnet3/nnet-compute.h b/src/nnet3/nnet-compute.h index abf7a0df12c..d1c28e8bd7c 100644 --- a/src/nnet3/nnet-compute.h +++ b/src/nnet3/nnet-compute.h @@ -77,9 +77,8 @@ class NnetComputer { CuMatrix *input); /// This function calls AcceptInput() in turn on all the inputs in the - /// training example (provide example.io; this interface makes it easy to work - /// with CCTC examples too). It needs "nnet" only in order to distinguish - /// inputs from outputs. + /// training example. It needs "nnet" only in order to distinguish inputs + /// from outputs. void AcceptInputs(const Nnet &nnet, const std::vector &io); diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index d82867252ec..26f9eab84dd 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -139,7 +139,10 @@ struct NnetOptimizeOptions; // Forward declaration. automatically detect that there are duplicate submatrices, and will merge them, as well as removing the now-unused matrix indexes. After merging, we will mark the variables (i.e. row-ranges) underlying s1 and s2 as being - "dirty" so they can no longer be merged during the lifetime of this class. + "dirty" so they can no longer be merged during the lifetime of this class-- + this is so we don't have to think to hard; we apply this optimization + multiple times until it makes no change (see + nnet-optimize.cc:VariableMerginOptimization()). */ class VariableMergingOptimizer { public: From 671bb1423a72aca279487b0ad2c805a93a68caaa Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Sat, 8 Oct 2016 01:20:33 -0400 Subject: [PATCH 002/530] Some code refactoring that will make it easier to implement online recognition in nnet3. Not fully debugged. --- src/nnet3/nnet-am-decodable-simple.cc | 2 +- src/nnet3/nnet-analyze.cc | 230 ++++++------ src/nnet3/nnet-analyze.h | 31 +- src/nnet3/nnet-chain-diagnostics.cc | 12 +- src/nnet3/nnet-chain-training.cc | 12 +- src/nnet3/nnet-compile.cc | 162 ++++++--- src/nnet3/nnet-compile.h | 39 +- src/nnet3/nnet-computation.cc | 127 ++++--- src/nnet3/nnet-computation.h | 65 ++-- src/nnet3/nnet-compute-test.cc | 23 +- src/nnet3/nnet-compute.cc | 265 ++++++-------- src/nnet3/nnet-compute.h | 87 +++-- src/nnet3/nnet-derivative-test.cc | 14 +- src/nnet3/nnet-diagnostics.cc | 4 +- src/nnet3/nnet-discriminative-diagnostics.cc | 28 +- src/nnet3/nnet-discriminative-training.cc | 26 +- src/nnet3/nnet-optimize-test.cc | 16 +- src/nnet3/nnet-optimize-utils.cc | 353 +++++-------------- src/nnet3/nnet-optimize-utils.h | 120 ++----- src/nnet3/nnet-optimize.cc | 93 ++++- src/nnet3/nnet-optimize.h | 9 + src/nnet3/nnet-training.cc | 18 +- src/nnet3/online-nnet3-decodable-simple.cc | 2 +- 23 files changed, 835 insertions(+), 903 deletions(-) diff --git a/src/nnet3/nnet-am-decodable-simple.cc b/src/nnet3/nnet-am-decodable-simple.cc index bc851790a05..9116c9461ac 100644 --- a/src/nnet3/nnet-am-decodable-simple.cc +++ b/src/nnet3/nnet-am-decodable-simple.cc @@ -261,7 +261,7 @@ void DecodableNnetSimple::DoNnetComputation( ivector_feats_cu.Row(0).CopyFromVec(ivector); computer.AcceptInput("ivector", &ivector_feats_cu); } - computer.Forward(); + computer.Run(); CuMatrix cu_output; computer.GetOutputDestructive("output", &cu_output); // subtract log-prior (divide by prior) diff --git a/src/nnet3/nnet-analyze.cc b/src/nnet3/nnet-analyze.cc index 29ff2f01fb1..c030dad7d9b 100644 --- a/src/nnet3/nnet-analyze.cc +++ b/src/nnet3/nnet-analyze.cc @@ -278,8 +278,7 @@ void ComputeCommandAttributes( switch (c.command_type) { case kAllocMatrixZeroed: case kAllocMatrixFromOtherZeroed: - vars.AppendVariablesForMatrix(c.arg1, &attr.variables_written); - attr.matrices_written.push_back(c.arg1); + vars.RecordAccessForSubmatrix(c.arg1, kWriteAccess, &attr); break; case kAllocMatrixUndefined: // nothing is written here. case kDeallocMatrix: // ditto. @@ -372,6 +371,14 @@ void ComputeCommandAttributes( vars.RecordAccessForSubmatrix(c.arg2, kReadAccess, &attr); break; } + case kAcceptInput: { + vars.RecordAccessForSubmatrix(c.arg1, kWriteAccess, &attr); + break; + } + case kProvideOutput: { + vars.RecordAccessForSubmatrix(c.arg1, kReadAccess, &attr); + break; + } case kNoOperation: case kNoOperationMarker: break; @@ -478,68 +485,65 @@ void ComputeMatrixAccesses( Access(c, kWriteAccess)); } } - // Now set up allocate_command and deallocate_command. + // Now set up allocate_command, deallocate_command, + // is_input and is_output. const NnetComputation::Command &command = computation.commands[c]; - int32 matrix_index = command.arg1, - matrix_index2 = command.arg2; + int32 matrix_index1, matrix_index2; + switch (command.command_type) { case kAllocMatrixZeroed: case kAllocMatrixUndefined: - if ((*matrix_accesses)[matrix_index].allocate_command != -1) - KALDI_ERR << "Matrix " << matrix_index << " initialized twice."; - (*matrix_accesses)[matrix_index].allocate_command = c; + if (!computation.IsWholeMatrix(command.arg1)) + KALDI_ERR << "Command does not operate on whole matrix"; + matrix_index1 = computation.submatrices[command.arg1].matrix_index; + if ((*matrix_accesses)[matrix_index1].allocate_command != -1) + KALDI_ERR << "Matrix " << matrix_index1 << " initialized twice."; + (*matrix_accesses)[matrix_index1].allocate_command = c; break; case kAllocMatrixFromOther: case kAllocMatrixFromOtherZeroed: - if ((*matrix_accesses)[matrix_index].allocate_command != -1) - KALDI_ERR << "Matrix " << matrix_index << " initialized twice."; - (*matrix_accesses)[matrix_index].allocate_command = c; + if (!computation.IsWholeMatrix(command.arg1)) + KALDI_ERR << "Command does not operate on whole matrix"; + matrix_index1 = computation.submatrices[command.arg1].matrix_index; + KALDI_ASSERT(computation.IsWholeMatrix(command.arg2)); + matrix_index2 = computation.submatrices[command.arg2].matrix_index; + if ((*matrix_accesses)[matrix_index1].allocate_command != -1) + KALDI_ERR << "Matrix " << matrix_index1 << " initialized twice."; + (*matrix_accesses)[matrix_index1].allocate_command = c; if ((*matrix_accesses)[matrix_index2].deallocate_command != -1) - KALDI_ERR << "Matrix " << matrix_index << " destroyed twice."; + KALDI_ERR << "Matrix " << matrix_index2 << " destroyed twice."; (*matrix_accesses)[matrix_index2].deallocate_command = c; break; case kDeallocMatrix: - if ((*matrix_accesses)[matrix_index].deallocate_command != -1) - KALDI_ERR << "Matrix " << matrix_index << " destroyed twice."; - (*matrix_accesses)[matrix_index].deallocate_command = c; + if (!computation.IsWholeMatrix(command.arg1)) + KALDI_ERR << "Command does not operate on whole matrix"; + matrix_index1 = computation.submatrices[command.arg1].matrix_index; + if ((*matrix_accesses)[matrix_index1].deallocate_command != -1) + KALDI_ERR << "Matrix " << matrix_index1 << " destroyed twice."; + (*matrix_accesses)[matrix_index1].deallocate_command = c; + break; + case kAcceptInput: + if (!computation.IsWholeMatrix(command.arg1)) + KALDI_ERR << "Command does not operate on whole matrix"; + matrix_index1 = computation.submatrices[command.arg1].matrix_index; + (*matrix_accesses)[matrix_index1].is_input = true; + // If a certain matrix is accepted as input multiple times, we + // count the first one as allocating it (the second will just + // allocate it again, which is harmless). + if ((*matrix_accesses)[matrix_index1].allocate_command == -1) + (*matrix_accesses)[matrix_index1].allocate_command = c; + break; + case kProvideOutput: + if (!computation.IsWholeMatrix(command.arg1)) + KALDI_ERR << "Command does not operate on whole matrix"; + matrix_index1 = computation.submatrices[command.arg1].matrix_index; + (*matrix_accesses)[matrix_index1].is_output = true; break; default: ; } } - // now set up the is_input and is_output fields. - unordered_map >::const_iterator - iter = computation.input_output_info.begin(), - end = computation.input_output_info.end(); - for (; iter != end; ++iter) { - int32 node_index = iter->first, - value_matrix_index = iter->second.first, - deriv_matrix_index = iter->second.second; - KALDI_ASSERT(value_matrix_index > 0 && value_matrix_index < num_matrices); - if (nnet.IsInputNode(node_index)) { - // the assert checks for repeats - KALDI_ASSERT(!(*matrix_accesses)[value_matrix_index].is_input); - (*matrix_accesses)[value_matrix_index].is_input = true; - if (deriv_matrix_index != 0) { - // the derivatives, if requested, would be outputs of the computation, - // even though the node is an input node. - KALDI_ASSERT(!(*matrix_accesses)[deriv_matrix_index].is_output); - (*matrix_accesses)[deriv_matrix_index].is_output = true; - } - } else { - KALDI_ASSERT(nnet.IsOutputNode(node_index)); - // the assert checks for repeats - KALDI_ASSERT(!(*matrix_accesses)[value_matrix_index].is_output); - (*matrix_accesses)[value_matrix_index].is_output = true; - if (deriv_matrix_index != 0) { - // the derivatives, if provided, would be inputs to the computation, - // even though the node is an output node. - KALDI_ASSERT(!(*matrix_accesses)[deriv_matrix_index].is_input); - (*matrix_accesses)[deriv_matrix_index].is_input = true; - } - } - } } @@ -575,8 +579,7 @@ void ComputationChecker::CheckComputationRewrite() const { int32 num_variables = a_.variable_accesses.size(); for (int32 v = 0; v < num_variables; v++) { const std::vector &accesses = a_.variable_accesses[v]; - int32 matrix_index = a_.variables.GetMatrixForVariable(v); - if (accesses.empty() && ! a_.matrix_accesses[matrix_index].is_input) { + if (accesses.empty()) { KALDI_ERR << "Variable " << v << " = " << a_.variables.DescribeVariable(v) << "is never used."; } @@ -610,17 +613,13 @@ void ComputationChecker::CheckComputationUndefined() const { int32 num_variables = a_.variable_accesses.size(); for (int32 v = 0; v < num_variables; v++) { const std::vector &accesses = a_.variable_accesses[v]; - int32 matrix_index = a_.variables.GetMatrixForVariable(v); - bool is_input = a_.matrix_accesses[matrix_index].is_input; - if (! is_input) { - if (accesses.empty()) - KALDI_ERR << "Variable " << v << " == " - << a_.variables.DescribeVariable(v) << "is never used."; - if (accesses[0].access_type != kWriteAccess) - KALDI_ERR << "Variable " << v << " == " - << a_.variables.DescribeVariable(v) - << "is read before it is written to"; - } + if (accesses.empty()) + KALDI_ERR << "Variable " << v << " == " + << a_.variables.DescribeVariable(v) << "is never used."; + if (accesses[0].access_type != kWriteAccess) + KALDI_ERR << "Variable " << v << " == " + << a_.variables.DescribeVariable(v) + << " is read before it is written to"; } } @@ -637,45 +636,35 @@ void ComputationChecker::CheckComputationMatrixAccesses() const { for (int32 matrix_index = 1; matrix_index < num_matrices; matrix_index++) { const MatrixAccesses &accesses = a_.matrix_accesses[matrix_index]; - if (accesses.is_input) { - if (accesses.allocate_command != -1) - KALDI_ERR << "Input matrix is initialized."; - } else { - if (accesses.allocate_command == -1) - KALDI_ERR << "Matrix m" << matrix_index << "is not initialized."; - if (accesses.accesses.empty()) { - KALDI_ERR << "Matrix m" << matrix_index << " is never accessed."; - } else if (accesses.accesses.front().command_index < - accesses.allocate_command) { - KALDI_ERR << "Matrix m" << matrix_index << " is accessed before " - "it is initialized"; - } + if (accesses.allocate_command == -1) + KALDI_ERR << "Matrix m" << matrix_index << "is not initialized."; + if (accesses.accesses.empty()) { + KALDI_ERR << "Matrix m" << matrix_index << " is never accessed."; + } else if (accesses.accesses.front().command_index < + accesses.allocate_command) { + KALDI_ERR << "Matrix m" << matrix_index << " is accessed before " + "it is initialized"; } - if (accesses.is_output) { - if (accesses.deallocate_command != -1) - KALDI_ERR << "Output matrix is destroyed."; - } else { - if (accesses.deallocate_command == -1) - KALDI_ERR << "Matrix m" << matrix_index << " is not destroyed."; - if (accesses.accesses.empty()) { - if (accesses.is_input) { - // we allow there to be no accesses if it is an input, e.g. if an - // output derivative is supplied for some reason but never used. - // We'll warn, though (once). - if (!computation_checker_warned_unused_input) { - KALDI_WARN << "Matrix m" << matrix_index << " is never accessed. " - "Allowing because it is an input (un-needed input or " - "derivative?) Will warn only once."; - computation_checker_warned_unused_input = true; - } - } else { - KALDI_ERR << "Matrix m" << matrix_index << " is never accessed."; + + if (accesses.accesses.empty()) { + if (accesses.is_input) { + // we allow there to be no accesses if it is an input, e.g. if an + // output derivative is supplied for some reason but never used. + // We'll warn, though (once). + if (!computation_checker_warned_unused_input) { + KALDI_WARN << "Matrix m" << matrix_index << " is never accessed. " + "Allowing because it is an input (un-needed input or " + "derivative?) Will warn only once."; + computation_checker_warned_unused_input = true; } - } else if (accesses.accesses.back().command_index >= - accesses.deallocate_command) { - KALDI_ERR << "Matrix m" << matrix_index << " is accessed after " - "it is destroyed"; + } else { + KALDI_ERR << "Matrix m" << matrix_index << " is never accessed."; } + } else if (accesses.deallocate_command != -1 && + accesses.accesses.back().command_index >= + accesses.deallocate_command) { + KALDI_ERR << "Matrix m" << matrix_index << " is accessed after " + "it is destroyed"; } } } @@ -687,7 +676,6 @@ void ComputationChecker::CheckComputationMatrixAccesses() const { */ void ComputationChecker::CheckComputationIndexes() const { int32 num_commands = computation_.commands.size(), - num_matrices = computation_.matrices.size(), num_submatrices = computation_.submatrices.size(); const std::vector &submatrices = computation_.submatrices; @@ -698,18 +686,21 @@ void ComputationChecker::CheckComputationIndexes() const { case kAllocMatrixZeroed: case kAllocMatrixUndefined: case kDeallocMatrix: - if (c.arg1 < 1 || c.arg1 >= num_matrices) - KALDI_ERR << "matrix index out of range."; + if (c.arg1 < 1 || c.arg1 >= num_submatrices || + !computation_.IsWholeMatrix(c.arg1)) + KALDI_ERR << "submatrix index out of range or invalid"; break; case kAllocMatrixFromOther: case kAllocMatrixFromOtherZeroed: - if (c.arg1 < 1 || c.arg1 >= num_matrices || - c.arg2 < 1 || c.arg2 >= num_matrices) - KALDI_ERR << "matrix index out of range."; - if (computation_.matrices[c.arg1].num_rows != - computation_.matrices[c.arg2].num_rows || - computation_.matrices[c.arg1].num_cols != - computation_.matrices[c.arg2].num_cols) + if (c.arg1 < 1 || c.arg1 >= num_submatrices || + !computation_.IsWholeMatrix(c.arg1) || + c.arg2 < 1 || c.arg2 >= num_submatrices || + !computation_.IsWholeMatrix(c.arg2)) + KALDI_ERR << "submatrix index out of range or invalid"; + if (computation_.submatrices[c.arg1].num_rows != + computation_.submatrices[c.arg2].num_rows || + computation_.submatrices[c.arg1].num_cols != + computation_.submatrices[c.arg2].num_cols) KALDI_ERR << "Dimension mismatch in kAllocMatrixFromOther* command"; break; case kPropagate: { @@ -914,6 +905,16 @@ void ComputationChecker::CheckComputationIndexes() const { } break; } + case kAcceptInput: case kProvideOutput: { + if (c.arg1 < 1 || c.arg1 >= num_submatrices || + !computation_.IsWholeMatrix(c.arg1)) + KALDI_ERR << "submatrix index out of range or invalid"; + // note: we may later change the following condition to allow component + // nodes. we allow it on output node because of derivatives. + if (!nnet_.IsInputNode(c.arg2) && !nnet_.IsOutputNode(c.arg2)) + KALDI_ERR << "Invalid network node"; + break; + } case kNoOperation: case kNoOperationMarker: break; @@ -1000,9 +1001,6 @@ void ComputeMatrixToSubmatrix( int32 ComputationAnalysis::FirstAccess(int32 s) const { KALDI_ASSERT(static_cast(s) < computation_.submatrices.size() && s>0); - int32 matrix_index = computation_.submatrices[s].matrix_index; - if (analyzer_.matrix_accesses[matrix_index].is_input) - return -1; int32 ans = computation_.commands.size(); std::vector variable_indexes; analyzer_.variables.AppendVariablesForSubmatrix(s, &variable_indexes); @@ -1034,8 +1032,6 @@ int32 ComputationAnalysis::FirstAccess(int32 s) const { int32 ComputationAnalysis::FirstMatrixAccess(int32 m) const { KALDI_ASSERT(static_cast(m) < computation_.matrices.size() && m > 0); - if (analyzer_.matrix_accesses[m].is_input) - return -1; int32 ans = computation_.commands.size(); const std::vector &accesses = analyzer_.matrix_accesses[m].accesses; @@ -1043,7 +1039,12 @@ int32 ComputationAnalysis::FirstMatrixAccess(int32 m) const { access_end = accesses.end(); for (; access_iter != access_end; ++access_iter) { int32 command_index = access_iter->command_index; - if (command_index != analyzer_.matrix_accesses[m].allocate_command) { + CommandType command_type = + computation_.commands[command_index].command_type; + if (command_type != kAllocMatrixUndefined && + command_type != kAllocMatrixZeroed && + command_type != kAllocMatrixFromOther && + command_type != kAllocMatrixFromOtherZeroed) { ans = std::min(ans, command_index); break; // break from access_iter loop (an optimization) } @@ -1054,8 +1055,6 @@ int32 ComputationAnalysis::FirstMatrixAccess(int32 m) const { int32 ComputationAnalysis::LastMatrixAccess(int32 m) const { KALDI_ASSERT(static_cast(m) < computation_.matrices.size() && m > 0); - if (analyzer_.matrix_accesses[m].is_output) - return computation_.commands.size(); int32 ans = -1; const std::vector &accesses = analyzer_.matrix_accesses[m].accesses; @@ -1072,9 +1071,6 @@ int32 ComputationAnalysis::LastMatrixAccess(int32 m) const { int32 ComputationAnalysis::LastAccess(int32 s) const { KALDI_ASSERT(static_cast(s) < computation_.submatrices.size() && s>0); - int32 matrix_index = computation_.submatrices[s].matrix_index; - if (analyzer_.matrix_accesses[matrix_index].is_output) - return computation_.commands.size(); int32 ans = -1; std::vector variable_indexes; analyzer_.variables.AppendVariablesForSubmatrix(s, &variable_indexes); diff --git a/src/nnet3/nnet-analyze.h b/src/nnet3/nnet-analyze.h index 28a62e996b8..8b02d6376e9 100644 --- a/src/nnet3/nnet-analyze.h +++ b/src/nnet3/nnet-analyze.h @@ -145,6 +145,7 @@ class ComputationVariables { int32 matrix_index, std::vector *variable_indexes) const; + // Appends to variable_indexes the sorted list of variables corresponding to a // submatrix index. void AppendVariablesForSubmatrix( @@ -311,23 +312,20 @@ class ComputationAnalysis { const Analyzer &analyzer): computation_(computation), analyzer_(analyzer) { } - /// If the matrix underlying submatrix 's' is an input then this returns -1; - /// otherwise it returns the first command (read or write) that is not an - /// allocation command, that accesses any part of 's' [note: deallocation does - /// not count as a read or write operation]. If there is no such command, it - /// returns num_commands. + /// Returns the first command (read or write) that is not a kAlloc* command, + /// that accesses any part of 's' [note: deallocation does not count as a read + /// or write operation]. If there is no such command, it returns + /// num_commands. /// s must be >0 (i.e. not the empty submatrix). int32 FirstAccess(int32 s) const; - /// If the matrix underlying submatrix 's' is an output then this returns - /// num-commands; otherwise it returns the last non-deallocation command - /// that accesses any part of submatrix 's'; if there is no such command it - /// returns -1. + /// Returns the last non-deallocation command that accesses any part of + /// submatrix 's'; if there is no such command it returns -1. /// s must be >0 (i.e. not the empty submatrix). int32 LastAccess(int32 s) const; /// Returns the last command-index that accesses any part of submatrix 's' as - /// a write operation, or -1 if there is no such operation. Not: deallocation + /// a write operation, or -1 if there is no such operation. Note: deallocation /// does not count as a write operation. /// s must be >0 (i.e. not the empty submatrix). int32 LastWriteAccess(int32 s) const; @@ -339,16 +337,13 @@ class ComputationAnalysis { /// s must be >0 (i.e. not the empty submatrix). int32 DataInvalidatedCommand(int32 c, int32 s) const; - /// If matrix 'm' is an input then this returns -1; otherwise it returns the - /// first command (read or write) that is not an allocation command, that - /// accesses any part of 'm' [note: deallocation does not count as a read or - /// write operation]. If there is no such command, it returns num_commands. - /// m must be >0 (i.e. not the empty matrix). + /// Returns the first command (read or write or accept-input) that is not an + /// kAllocate* command, that accesses any part of 'm' [note: deallocation does + /// not count as a read or write operation]. If there is no such command, it + /// returns num_commands. m must be >0 (i.e. not the empty matrix). int32 FirstMatrixAccess(int32 m) const; - - /// If matrix 'm' is an output then this returns num-commands; otherwise it - /// returns the last non-deallocation command that accesses any part of + /// Returns the last non-deallocation command that accesses any part of /// matrix 'm'; if there is no such command it returns -1. m must be >0 /// (i.e. not the empty matrix). int32 LastMatrixAccess(int32 m) const; diff --git a/src/nnet3/nnet-chain-diagnostics.cc b/src/nnet3/nnet-chain-diagnostics.cc index 46e2b0c01dc..b6b39816337 100644 --- a/src/nnet3/nnet-chain-diagnostics.cc +++ b/src/nnet3/nnet-chain-diagnostics.cc @@ -82,10 +82,10 @@ void NnetChainComputeProb::Compute(const NnetChainExample &chain_eg) { nnet_, deriv_nnet_); // give the inputs to the computer object. computer.AcceptInputs(nnet_, chain_eg.inputs); - computer.Forward(); + computer.Run(); this->ProcessOutputs(chain_eg, &computer); if (nnet_config_.compute_deriv) - computer.Backward(); + computer.Run(); } void NnetChainComputeProb::ProcessOutputs(const NnetChainExample &eg, @@ -111,15 +111,15 @@ void NnetChainComputeProb::ProcessOutputs(const NnetChainExample &eg, if (use_xent) xent_deriv.Resize(nnet_output.NumRows(), nnet_output.NumCols(), kUndefined); - + BaseFloat tot_like, tot_l2_term, tot_weight; - + ComputeChainObjfAndDeriv(chain_config_, den_graph_, sup.supervision, nnet_output, &tot_like, &tot_l2_term, &tot_weight, (nnet_config_.compute_deriv ? &nnet_output_deriv : NULL), (use_xent ? &xent_deriv : NULL)); - + // note: in this context we don't want to apply 'sup.deriv_weights' because // this code is used only in combination, where it's part of an L-BFGS // optimization algorithm, and in that case if there is a mismatch between @@ -134,7 +134,7 @@ void NnetChainComputeProb::ProcessOutputs(const NnetChainExample &eg, totals.tot_l2_term += tot_l2_term; if (nnet_config_.compute_deriv) - computer->AcceptOutputDeriv(sup.name, &nnet_output_deriv); + computer->AcceptInput(sup.name, &nnet_output_deriv); if (use_xent) { ChainObjectiveInfo &xent_totals = objf_info_[xent_name]; diff --git a/src/nnet3/nnet-chain-training.cc b/src/nnet3/nnet-chain-training.cc index 3f08710fd38..ee7368722f1 100644 --- a/src/nnet3/nnet-chain-training.cc +++ b/src/nnet3/nnet-chain-training.cc @@ -54,7 +54,7 @@ NnetChainTrainer::NnetChainTrainer(const NnetChainTrainingOptions &opts, KALDI_WARN << "Could not open cached computation. " "Probably this is the first training iteration."; } - } + } } @@ -73,10 +73,10 @@ void NnetChainTrainer::Train(const NnetChainExample &chain_eg) { *nnet_, delta_nnet_); // give the inputs to the computer object. computer.AcceptInputs(*nnet_, chain_eg.inputs); - computer.Forward(); + computer.Run(); this->ProcessOutputs(chain_eg, &computer); - computer.Backward(); + computer.Run(); UpdateParamsWithMaxChange(); } @@ -134,7 +134,7 @@ void NnetChainTrainer::ProcessOutputs(const NnetChainExample &eg, xent_deriv.MulRowsVec(cu_deriv_weights); } - computer->AcceptOutputDeriv(sup.name, &nnet_output_deriv); + computer->AcceptInput(sup.name, &nnet_output_deriv); objf_info_[sup.name].UpdateStats(sup.name, opts_.nnet_config.print_interval, num_minibatches_processed_++, @@ -142,7 +142,7 @@ void NnetChainTrainer::ProcessOutputs(const NnetChainExample &eg, if (use_xent) { xent_deriv.Scale(opts_.chain_config.xent_regularize); - computer->AcceptOutputDeriv(xent_name, &xent_deriv); + computer->AcceptInput(xent_name, &xent_deriv); } } } @@ -273,7 +273,7 @@ NnetChainTrainer::~NnetChainTrainer() { Output ko(opts_.nnet_config.write_cache, opts_.nnet_config.binary_write_cache); compiler_.WriteCache(ko.Stream(), opts_.nnet_config.binary_write_cache); KALDI_LOG << "Wrote computation cache to " << opts_.nnet_config.write_cache; - } + } delete delta_nnet_; } diff --git a/src/nnet3/nnet-compile.cc b/src/nnet3/nnet-compile.cc index 85dec668fe8..461728eb479 100644 --- a/src/nnet3/nnet-compile.cc +++ b/src/nnet3/nnet-compile.cc @@ -21,6 +21,7 @@ #include #include "nnet3/nnet-compile.h" #include "nnet3/nnet-compile-utils.h" +#include "nnet3/nnet-optimize.h" // just for ConsolidateIoOperations(). namespace kaldi { namespace nnet3 { @@ -51,29 +52,68 @@ void Compiler::CreateComputation(const CompilerOptions &opts, ComputeDerivNeeded(steps, &deriv_needed); CreateStepInfo(deriv_needed, &steps, computation); AddCommands(deriv_needed, computation); + // the following command reorders commands so kAcceptInput and kProvideOutput + // appear in the desired places. + ConsolidateIoOperations(nnet_, computation); if (opts.output_debug_info) OutputDebugInfo(computation); } void Compiler::AddCommands(const std::vector &deriv_needed, NnetComputation *computation) { - SetInputOutputInfo(computation); computation->need_model_derivative = request_.need_model_derivative; int32 arbitrary_factor = 8; computation->commands.reserve(computation->matrices.size() * arbitrary_factor); - AllocateMatrices(computation); + + std::vector whole_submatrices; + computation->GetWholeSubmatrices(&whole_submatrices); + AllocateMatrices(whole_submatrices, computation); SetUpPrecomputedIndexes(computation); int32 num_steps = steps_.size(); for (int32 step = 0; step < num_steps; step++) DoForwardComputation(step, computation); - // mark the end of the forward phase. - computation->commands.push_back( - NnetComputation::Command(kNoOperationMarker)); + + AddCommandsAfterPropagate(deriv_needed, computation); + for (int32 step = num_steps - 1; step >= 0; step--) if (deriv_needed[step]) DoBackwardComputation(step, computation); - DeallocateMatrices(computation); + DeallocateMatrices(whole_submatrices, computation); +} + +void Compiler::AddCommandsAfterPropagate(const std::vector &deriv_needed, + NnetComputation *computation) { + // mark the end of the forward phase. + computation->commands.push_back( + NnetComputation::Command(kNoOperationMarker)); + + std::vector deriv_input_commands; + + // We handle output nodes here-- add commands that relate to us providing + // outputs to the user; then, if applicable, we add commands to direct us to + // accept derivatives w.r.t. those outputs from the user. + int32 num_steps = steps_.size(); + for (int32 step = 0; step < num_steps; step++) { + const StepInfo &step_info = steps_[step]; + if (nnet_.IsOutputNode(step_info.node_index)) { + int32 node_index = step_info.node_index, + submatrix_index = step_info.value; + KALDI_ASSERT(computation->IsWholeMatrix(submatrix_index)); + NnetComputation::Command c(kProvideOutput, submatrix_index, node_index); + computation->commands.push_back(c); + if (deriv_needed[step]) { + int32 deriv_submatrix_index = step_info.deriv; + KALDI_ASSERT(deriv_submatrix_index > 0); + KALDI_ASSERT(computation->IsWholeMatrix(deriv_submatrix_index)); + NnetComputation::Command c(kAcceptInput, deriv_submatrix_index, node_index); + deriv_input_commands.push_back(c); + } + } + } + computation->commands.insert(computation->commands.end(), + deriv_input_commands.begin(), + deriv_input_commands.end()); } @@ -316,37 +356,18 @@ void Compiler::CreateLocationInfo( } } -void Compiler::SetInputOutputInfo(NnetComputation *computation) const { - KALDI_ASSERT(computation->input_output_info.empty()); - int32 num_steps = steps_.size(); - for (int32 step = 0; step < num_steps; step++) { - const StepInfo &this_info = steps_[step]; - int32 node_index = this_info.node_index; - if (nnet_.IsInputNode(node_index) || nnet_.IsOutputNode(node_index)) { - // There should be only one step for each input or output node. - KALDI_ASSERT(computation->input_output_info.count(node_index) == 0); - int32 value_matrix_index = - computation->submatrices[this_info.value].matrix_index; - int32 deriv_matrix_index = 0; - if (this_info.deriv != 0) - deriv_matrix_index = - computation->submatrices[this_info.deriv].matrix_index; - computation->input_output_info[node_index] = - std::pair(value_matrix_index, deriv_matrix_index); - } - } -} - - void Compiler::DoForwardComputation(int32 step, NnetComputation *computation) const { KALDI_ASSERT(step < static_cast(steps_.size())); const StepInfo &step_info = steps_[step]; const NetworkNode &node = nnet_.GetNode(step_info.node_index); switch (node.node_type) { - case kInput: case kDimRange: break; // Nothing to do. + case kInput: // Note: input nodes appear before other node types. + AddForwardStepInput(step, computation); + break; + case kDimRange: break; // Nothing to do. case kComponent: - AddPropagateStep(step, computation); + AddForwardStepComponent(step, computation); break; case kDescriptor: DoForwardComputationDescriptor(step, computation); @@ -758,9 +779,13 @@ void Compiler::DoBackwardComputation(int32 step, const NetworkNode &node = nnet_.GetNode(node_index); switch (node.node_type) { - case kInput: case kDimRange: break; // Nothing to do. + case kInput: + AddBackwardStepInput(step, computation); + break; + case kDimRange: + break; // Nothing to do. case kComponent: - AddBackpropStep(step, computation); + AddBackwardStepComponent(step, computation); break; case kDescriptor: DoBackwardComputationDescriptor(step, computation); @@ -770,9 +795,28 @@ void Compiler::DoBackwardComputation(int32 step, } } +// This just adds a command of type kAcceptInput that directs the computer to +// expect input from the user. Because inputs are always listed first in +// 'steps', these will precede the actual commands. +void Compiler::AddForwardStepInput(int32 step, + NnetComputation *computation) const { + KALDI_ASSERT(static_cast(step) < steps_.size()); + const StepInfo &step_info = steps_[step]; + int32 node_index = step_info.node_index, + submatrix_index = step_info.value; + KALDI_ASSERT(computation->IsWholeMatrix(submatrix_index)); + + const NetworkNode &node = nnet_.GetNode(node_index); + // actually currently the node type would always be kInput. + KALDI_ASSERT(node.node_type == kInput || node.node_type == kComponent); + + NnetComputation::Command c(kAcceptInput, submatrix_index, node_index); + computation->commands.push_back(c); +} + -void Compiler::AddPropagateStep(int32 step, - NnetComputation *computation) const { +void Compiler::AddForwardStepComponent(int32 step, + NnetComputation *computation) const { KALDI_ASSERT(static_cast(step) < steps_.size()); const StepInfo &step_info = steps_[step]; int32 input_step = step - 1; @@ -781,9 +825,6 @@ void Compiler::AddPropagateStep(int32 step, const NetworkNode &node = nnet_.GetNode(node_index); KALDI_ASSERT(node.node_type == kComponent); - // in setting the following two variables, we use the fact that the submatrix - // index of each submatrix that represents an entire matrix, is the same as - // the matrix index of that matrix. int32 input_submatrix_index = input_step_info.value, output_submatrix_index = step_info.value; NnetComputation::Command c(kPropagate, @@ -805,8 +846,26 @@ void Compiler::AddPropagateStep(int32 step, } -void Compiler::AddBackpropStep(int32 step, - NnetComputation *computation) const { +void Compiler::AddBackwardStepInput(int32 step, + NnetComputation *computation) const { + KALDI_ASSERT(static_cast(step) < steps_.size()); + const StepInfo &step_info = steps_[step]; + int32 node_index = step_info.node_index, + deriv_submatrix_index = step_info.deriv; + if (deriv_submatrix_index == 0) + return; // Nothing to do. + KALDI_ASSERT(computation->IsWholeMatrix(deriv_submatrix_index)); + const NetworkNode &node = nnet_.GetNode(node_index); + // actually, currently the node type would always be kInput. + KALDI_ASSERT(node.node_type == kInput || node.node_type == kComponent); + + NnetComputation::Command c(kProvideOutput, deriv_submatrix_index, node_index); + computation->commands.push_back(c); +} + + +void Compiler::AddBackwardStepComponent(int32 step, + NnetComputation *computation) const { KALDI_ASSERT(static_cast(step) < steps_.size()); const StepInfo &step_info = steps_[step]; int32 input_step = step - 1; @@ -817,9 +876,6 @@ void Compiler::AddBackpropStep(int32 step, int32 component_index = node.u.component_index; const Component *component = nnet_.GetComponent(component_index); - // in setting the following two variables, we use the fact that the submatrix - // index of each submatrix that represents an entire matrix, is the same as - // the matrix index of that matrix. int32 input_submatrix_index = input_step_info.value, output_submatrix_index = step_info.value, input_deriv_submatrix_index = input_step_info.deriv, @@ -845,7 +901,8 @@ void Compiler::AddBackpropStep(int32 step, -void Compiler::AllocateMatrices(NnetComputation *computation) const { +void Compiler::AllocateMatrices(const std::vector &whole_submatrices, + NnetComputation *computation) const { KALDI_ASSERT(computation->commands.empty()); // Work out which matrices are inputs to the computation (or output-derivs, // which are also supplied as inputs to the computation); we won't be setting @@ -874,14 +931,17 @@ void Compiler::AllocateMatrices(NnetComputation *computation) const { } } - for (int32 m = 1; m < computation->matrices.size(); m++) { + int32 num_matrices = computation->matrices.size(); + for (int32 m = 1; m < num_matrices; m++) { // Later in the optimization phase, it turns out that zeroing is not // necessary for some matrices, we'll turn these commands into // kAllocMatrixUndefined. // We don't set up the matrices that are inputs to the computation; // this happens when the user provides the input. if (input_and_oderiv_matrices.count(m) == 0) { - NnetComputation::Command c(kAllocMatrixZeroed, m); + // get a submatrix index that refers to the entire matrix. + int32 submatrix_index = whole_submatrices[m]; + NnetComputation::Command c(kAllocMatrixZeroed, submatrix_index); computation->commands.push_back(c); } } @@ -927,7 +987,8 @@ void Compiler::SetUpPrecomputedIndexes( } -void Compiler::DeallocateMatrices(NnetComputation *computation) { +void Compiler::DeallocateMatrices(const std::vector &whole_submatrices, + NnetComputation *computation) { // This adds the commands to destroy all the matrices- but not the // ones that might be needed as outputs of the computation. The ones that // are spared from destruction are those corresponding to outputs of the @@ -968,10 +1029,13 @@ void Compiler::DeallocateMatrices(NnetComputation *computation) { } } // note: matrix-index 0 is the empty matrix. - for (int32 m = 1; m < num_matrices; m++) - if (will_destroy[m]) + for (int32 m = 1; m < num_matrices; m++) { + if (will_destroy[m]) { + int32 submatrix_index = whole_submatrices[m]; computation->commands.push_back( - NnetComputation::Command(kDeallocMatrix, m)); + NnetComputation::Command(kDeallocMatrix, submatrix_index)); + } + } } void Compiler::OutputDebugInfo(NnetComputation *computation) const { diff --git a/src/nnet3/nnet-compile.h b/src/nnet3/nnet-compile.h index 4dda38ae723..195ac36006a 100644 --- a/src/nnet3/nnet-compile.h +++ b/src/nnet3/nnet-compile.h @@ -60,7 +60,6 @@ class Compiler { // multiple commands. struct StepInfo { int32 node_index; // network-node index - bool is_input; // true if step corresponds to an input to the computation. int32 value; // sub-matrix index of value that this step outputs. int32 deriv; // sub-matrix index of derivative at the output of this step; zero // if not used (note: index zero is reserved for the empty @@ -93,8 +92,8 @@ class Compiler { // backprop. std::vector > > > input_locations_list; - StepInfo(): node_index(-1), is_input(false), value(0), - deriv(0), precomputed_indexes_index(0) { } + StepInfo(): node_index(-1), value(0), deriv(0), + precomputed_indexes_index(0) { } }; // this sets up cindex_id_to_location_. @@ -143,17 +142,16 @@ class Compiler { // Adds to the computation object the information about the matrix sizes void DefineMatrices(NnetComputation *computation) const; - // sets up the input_output_info of the computation (this says where the - // values and derivatives for the inputs and outputs live). - void SetInputOutputInfo(NnetComputation *computation) const; - // Sets up sub-matrix indexes for nodes of type Descriptor (needed mainly // because Descriptors in general have many parts corresponding to // feature-dimension ranges, and they live in sub-matrices. void DefineSubmatrices(NnetComputation *computation); // Adds to the computation object the commands to allocate the matrices. - void AllocateMatrices(NnetComputation *computation) const; + // 'whole_submatrices' is as created by computation->GetWholeSubmatrices(), it + // gives us the index of a submatrix containing the whole of each matrix. + void AllocateMatrices(const std::vector &whole_submatrices, + NnetComputation *computation) const; // Sets up the precomputed indexes for each component, and sets the // precomputed_indexes_index value for each step. @@ -165,7 +163,11 @@ class Compiler { // Called from DoForwardComputation, handles the case where the step corresponds // to a Component. - void AddPropagateStep(int32 step, NnetComputation *computation) const; + void AddForwardStepComponent(int32 step, NnetComputation *computation) const; + + // Called from DoForwardComputation, handles the case where the step corresponds + // to an input node. + void AddForwardStepInput(int32 step, NnetComputation *computation) const; // Called from DoForwardComputation, handles the case where the step @@ -246,7 +248,12 @@ class Compiler { // Called from DoBackwardComputation, handles the case where the step corresponds // to a Component. - void AddBackpropStep(int32 step, NnetComputation *computation) const; + void AddBackwardStepComponent(int32 step, NnetComputation *computation) const; + + // Called from DoBackwardComputation, handles the case where the step + // corresponds to an input. If applicable, this generates a command for the + // network to provide the derivative w.r.t. the input, to the user. + void AddBackwardStepInput(int32 step, NnetComputation *computation) const; // Called from DoBackwardComputation, handles the case where the step // corresponds to type kDescriptor. @@ -284,11 +291,21 @@ class Compiler { // deinitialize all the matrices, except those that may be requested by // the user after the computation is done (i.e. outputs of the network, // and input derivatives). - void DeallocateMatrices(NnetComputation *computation); + // 'whole_submatrices' is as created by computation->GetWholeSubmatrices(), it + // gives us the index of a submatrix containing the whole of each matrix. + void DeallocateMatrices(const std::vector &whole_submatrices, + NnetComputation *computation); // sets up the debug_info member of "computation". void OutputDebugInfo(NnetComputation *computation) const; + + // this function, called from AddCommands, adds the output and input + // commands that happen after the forward pass and before the backward + // pass. + void AddCommandsAfterPropagate(const std::vector &deriv_needed, + NnetComputation *computation); + void AddCommands(const std::vector &deriv_needed, NnetComputation *computation); diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index 1cccaa11d0c..62d12c2e93f 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -267,6 +267,10 @@ void NnetComputation::Command::Read(std::istream &is, bool binary) { command_type = kAddToRowsMulti; } else if (command_type_str == "kAddRowRanges") { command_type = kAddRowRanges; + } else if (command_type_str == "kAcceptInput") { + command_type = kAcceptInput; + } else if (command_type_str == "kProvideOutput") { + command_type = kProvideOutput; } else if (command_type_str == "kNoOperation") { command_type = kNoOperation; } else if (command_type_str == "kNoOperationMarker") { @@ -352,6 +356,12 @@ void NnetComputation::Command::Write(std::ostream &os, bool binary) const { case kAddRowRanges: os << "kAddRowRanges\n"; break; + case kAcceptInput: + os << "kAcceptInput\n"; + break; + case kProvideOutput: + os << "kProvideOutput\n"; + break; case kNoOperation: os << "kNoOperation\n"; break; @@ -482,28 +492,30 @@ static void PrintCommand(std::ostream &os, const NnetComputation::Command &c = computation.commands[command_index]; switch (c.command_type) { case kAllocMatrixZeroed: - os << "m" << c.arg1 << " = zeros(" - << computation.matrices[c.arg1].num_rows - << ',' << computation.matrices[c.arg1].num_cols << ")\n"; + os << submatrix_strings[c.arg1] << " = zeros(" + << computation.submatrices[c.arg1].num_rows + << ',' << computation.submatrices[c.arg1].num_cols << ")\n"; break; case kAllocMatrixUndefined: - os << "m" << c.arg1 << " = undefined(" - << computation.matrices[c.arg1].num_rows - << ',' << computation.matrices[c.arg1].num_cols << ")\n"; + os << submatrix_strings[c.arg1] << " = undefined(" + << computation.submatrices[c.arg1].num_rows + << ',' << computation.submatrices[c.arg1].num_cols << ")\n"; break; case kDeallocMatrix: - os << "m" << c.arg1 << " = []\n"; + os << submatrix_strings[c.arg1] << " = []\n"; break; case kAllocMatrixFromOther: - os << "m" << c.arg1 << ".swap(m" << c.arg2 << ") [dim = " - << computation.matrices[c.arg1].num_rows << " x " - << computation.matrices[c.arg1].num_cols << "]\n"; + os << submatrix_strings[c.arg1] << ".swap(" + << submatrix_strings[c.arg2] << ") [dim = " + << computation.submatrices[c.arg1].num_rows << " x " + << computation.submatrices[c.arg1].num_cols << "]\n"; break; case kAllocMatrixFromOtherZeroed: - os << "m" << c.arg1 << ".swap(m" << c.arg2 << ") [dim = " - << computation.matrices[c.arg1].num_rows << " x " - << computation.matrices[c.arg1].num_cols << "]; m" - << c.arg1 << ".zero();\n"; + os << submatrix_strings[c.arg1] << ".swap(" + << submatrix_strings[c.arg2] << ") [dim = " + << computation.submatrices[c.arg1].num_rows << " x " + << computation.submatrices[c.arg1].num_cols << "]; " + << submatrix_strings[c.arg1] << ".zero();\n"; break; case kPropagate: os << nnet.GetComponentName(c.arg1) << ".Propagate("; @@ -572,6 +584,14 @@ static void PrintCommand(std::ostream &os, os << "])\n"; break; } + case kAcceptInput: + os << submatrix_strings[c.arg1] << " = user input [for node: '" + << nnet.GetNodeName(c.arg2) << "']\n"; + break; + case kProvideOutput: + os << "output " << submatrix_strings[c.arg1] << " to user" + << " [for node: '" << nnet.GetNodeName(c.arg2) << "']\n"; + break; case kNoOperation: os << "[no-op]\n"; break; @@ -601,20 +621,6 @@ static void PrintComputationPreamble( os << ", "; } os << "\n"; - // show which matrices the inputs and outputs map to. - for (unordered_map >::const_iterator iter = - c.input_output_info.begin(); iter != c.input_output_info.end(); - ++iter) { - int32 node_index = iter->first, - value_matrix_index = iter->second.first, - deriv_matrix_index = iter->second.second; - os << nnet.GetNodeName(node_index) << ".value -> m" - << value_matrix_index << "\n"; - if (deriv_matrix_index != 0) { - os << nnet.GetNodeName(node_index) << ".deriv -> m" - << deriv_matrix_index << "\n"; - } - } if (!c.matrix_debug_info.empty()) { os << "# The following show how matrices correspond to network-nodes and\n" << "# cindex-ids. Format is: matrix = .[value|deriv][ ]\n" @@ -647,9 +653,25 @@ void NnetComputation::Print(std::ostream &os, const Nnet &nnet) const { } void NnetComputation::Read(std::istream &is, bool binary) { + int32 version = 2, // must be in sync with 'version' in Write. + version_in = 1; // defaults to 1 if no version specified. + ExpectToken(is, binary, ""); + std::string token; + ReadToken(is, binary, &token); + if (token == "") { + ReadBasicType(is, binary, &version_in); + ExpectToken(is, binary, ""); + } else { + KALDI_ASSERT(token == ""); + } + if (version_in != version) { + KALDI_ERR << "Reading NnetComputation failed because version in " + << version_in << " != " << version << "... you can " + << "ignore this error if the program continues afterward, " + << "it would only affect speed."; + } size_t num_matrices; - ExpectToken(is, binary, ""); ReadBasicType(is, binary, &num_matrices); KALDI_ASSERT(num_matrices >= 0); matrices.resize(num_matrices); @@ -728,21 +750,6 @@ void NnetComputation::Read(std::istream &is, bool binary) { ReadIntegerPairVector(is, binary, &(indexes_ranges[c])); } - size_t num_input_output_info; - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &num_input_output_info); - KALDI_ASSERT(num_input_output_info >= 0); - input_output_info.clear(); - ExpectToken(is, binary, ""); - for (size_t c = 0; c < num_input_output_info; c++) { - int32 key; - std::pair val; - ReadBasicType(is, binary, &key); - ReadBasicType(is, binary, &(val.first)); - ReadBasicType(is, binary, &(val.second)); - input_output_info.insert(std::pair >(key, val)); - } - size_t num_commands; ExpectToken(is, binary, ""); ReadBasicType(is, binary, &num_commands); @@ -761,7 +768,10 @@ void NnetComputation::Read(std::istream &is, bool binary) { } void NnetComputation::Write(std::ostream &os, bool binary) const { + int32 version = 2; // Must be in sync with version in Read. WriteToken(os, binary, ""); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, version); WriteToken(os, binary, ""); WriteBasicType(os, binary, matrices.size()); WriteToken(os, binary, ""); @@ -822,18 +832,6 @@ void NnetComputation::Write(std::ostream &os, bool binary) const { WriteIntegerPairVector(os, binary, indexes_ranges[c]); } - if (!binary) os << std::endl; - WriteToken(os, binary, ""); - WriteBasicType(os, binary, input_output_info.size()); - WriteToken(os, binary, ""); - std::map > input_output_info_cp(input_output_info.begin(), input_output_info.end()); - for (std::map >::const_iterator iter = - input_output_info_cp.begin(); iter != input_output_info_cp.end(); ++iter) { - WriteBasicType(os, binary, iter->first); - WriteBasicType(os, binary, iter->second.first); - WriteBasicType(os, binary, iter->second.second); - } - if (!binary) os << std::endl; WriteToken(os, binary, ""); WriteBasicType(os, binary, commands.size()); @@ -1046,7 +1044,6 @@ NnetComputation::NnetComputation(const NnetComputation &other): indexes(other.indexes), indexes_multi(other.indexes_multi), indexes_ranges(other.indexes_ranges), - input_output_info(other.input_output_info), commands(other.commands), need_model_derivative(other.need_model_derivative), indexes_cuda(other.indexes_cuda), @@ -1065,7 +1062,6 @@ NnetComputation& NnetComputation::operator = (const NnetComputation &other) { indexes = other.indexes; indexes_multi = other.indexes_multi; indexes_ranges = other.indexes_ranges; - input_output_info = other.input_output_info; commands = other.commands; need_model_derivative = other.need_model_derivative; indexes_cuda = other.indexes_cuda; @@ -1081,5 +1077,20 @@ NnetComputation& NnetComputation::operator = (const NnetComputation &other) { return *this; } + +void NnetComputation::GetWholeSubmatrices( + std::vector *whole_submatrices) const { + whole_submatrices->resize(matrices.size(), 0); + int32 num_submatrices = submatrices.size(); + for (int32 s = 1; s < num_submatrices; s++) { + if (IsWholeMatrix(s)) { + int32 m = submatrices[s].matrix_index; + (*whole_submatrices)[m] = s; + } + } +} + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-computation.h b/src/nnet3/nnet-computation.h index 0d0b13547bf..6097b059d23 100644 --- a/src/nnet3/nnet-computation.h +++ b/src/nnet3/nnet-computation.h @@ -91,9 +91,9 @@ struct IoSpecification { void Swap(IoSpecification *other); void Read(std::istream &istream, bool binary); - + void Write(std::ostream &ostream, bool binary) const; - + bool operator== (const IoSpecification &other) const; }; @@ -147,7 +147,7 @@ struct ComputationRequest { void Read(std::istream &istream, bool binary); void Write(std::ostream &ostream, bool binary) const; - + bool operator== (const ComputationRequest &other) const; }; @@ -158,14 +158,19 @@ struct ComputationRequest { the NnetComputation. We declare it outside that class because it's so frequently used and we got tired of typing NnetComputation:: everywhere. We document the commands here. - - - kAllocMatrixUndefined: Allocate a matrix. arg1 = index of matrix. - - kAllocMatrixZeroed: Allocate and zero a matrix. arg1 = index of matrix. - - kDeallocMatrix: Deallocate a matrix. arg1 = index of matrix. - - kAllocMatrixFromOther: initialize matrix indexed arg1 using memory - from matrix indexed arg2 (using shallow swap). - - kAllocMatrixFromOtherZeroed: initialize matrix indexed arg1 using memory - from matrix indexed arg2 (using shallow swap), then zero the matrix + Note: for operations that naturally need to operate on entire matrices + (i.e. allocation commands and input and output commands), we use the + submatrix indexes of them, which turns out to be more convenient for + optimization; but these submatrix indexes must refer to the whole of + a matrix. + + - kAllocMatrixUndefined: Allocate a matrix. arg1 = submatrix index. + - kAllocMatrixZeroed: Allocate and zero a matrix. arg1 = submatrix index. + - kDeallocMatrix: Deallocate a matrix. arg1 = submatrix index. + - kAllocMatrixFromOther: initialize matrix with submatrix index arg1 using memory + from matrix with submatrix index arg2 (using shallow swap). + - kAllocMatrixFromOtherZeroed: initialize matrix with submatrix index arg1 using memory + from matrix with submatrix index arg2 (using shallow swap), then zero the matrix we just allocated. - kPropagate: Forward computation of neural net, see Component::Propagate() - arg1 is is component-index in neural net @@ -204,8 +209,19 @@ struct ComputationRequest { - kAddRowRanges: call \ref CuMatrix::AddRowRanges() "AddRowRanges()" on sub-matrix arg1, with arg2 as source sub-matrix, and indexes given indexes_ranges[arg3]. + - kAcceptInput: accepts a matrix of input from the user, which may be either + features, or derivatives w.r.t. the output. arg1 is the submatrix index of + a whole matrix that the input goes to, and arg2 is the index of the network + node associated with it (e.g. the node of "input" or "ivector"), for + puroses of double checking. + - kProvideOutput: outputs a matrix to the user: either a network output, or a + matrix of derivatives w.r.t. an input. arg1 is the submatrix index of the + output (which we expect to be a whole matrix), arg2 is the index of the + network node associated with it (e.g. the node for "output"). - kNoOperation: does nothing (sometimes useful during optimization) - - kNoOperationMarker: does nothing, but used to mark end of forward commands. + - kNoOperationMarker: does nothing, but used to mark end of a block + of commands (like forward commands). + */ enum CommandType { kAllocMatrixUndefined, kAllocMatrixZeroed, @@ -213,7 +229,9 @@ enum CommandType { kPropagate, kStoreStats, kBackprop, kBackpropNoModelUpdate, kMatrixCopy, kMatrixAdd, kCopyRows, kAddRows, kCopyRowsMulti, kCopyToRowsMulti, kAddRowsMulti, kAddToRowsMulti, - kAddRowRanges, kNoOperation, kNoOperationMarker }; + kAddRowRanges, kAcceptInput, kProvideOutput, + kNoOperation, kNoOperationMarker }; + // struct NnetComputation defines the specific steps of a neural-net @@ -272,9 +290,9 @@ struct NnetComputation { }; // "matrices" describes the sizes of the matrices that we use as variables in - // the computation [note: index zero is reserved for an empty matrix]. Most - // commands refer to submatrices below (note: each matrix will have its own - // sub-matrix that just refers to the entire matrix). + // the computation [note: index zero is reserved for an empty matrix]. Note: + // we generally don't refer to matrices, even ones known to be whole matrices, + // using their matrix index directly, but via their submatrix indexes. std::vector matrices; // debug information for each of the matrices (indexed by matrix-index), only @@ -312,11 +330,11 @@ struct NnetComputation { // end-index) std::vector > > indexes_ranges; - // Information about where the values and derivatives of inputs and outputs of - // the neural net live. Indexed by the node_index (the same index as used for - // the nodes_ array in the Nnet), each pair is (value_matrix_index, - // deriv_matrix_index), with 0 for derivatives that are not present. - unordered_map > input_output_info; +// // Information about where the values and derivatives of inputs and outputs of +// // the neural net live. Indexed by the node_index (the same index as used for +// // the nodes_ array in the Nnet), each pair is (value_matrix_index, +// // deriv_matrix_index), with 0 for derivatives that are not present. +// unordered_map > input_output_info; // The sequence of commands. std::vector commands; @@ -369,6 +387,11 @@ struct NnetComputation { void GetSubmatrixStrings(const Nnet &nnet, std::vector *submat_strings) const; + // This function outputs a vector, indexed by matrix index, that gives you for + // each matrix, the index of a submatrix which refers to the whole of that + // matrix (or 0 if there is no such submatrix, which should not happen). + void GetWholeSubmatrices(std::vector *whole_submatrices) const; + // This function outputs information similar to Print(), but outputs the // preamble as a string and a vector of strings, one per command (with no diff --git a/src/nnet3/nnet-compute-test.cc b/src/nnet3/nnet-compute-test.cc index 6cdde0015f2..f69d4d3036a 100644 --- a/src/nnet3/nnet-compute-test.cc +++ b/src/nnet3/nnet-compute-test.cc @@ -142,7 +142,7 @@ void UnitTestNnetCompute() { KALDI_LOG << "Input sum is " << temp.Sum(); computer.AcceptInput(request.inputs[i].name, &temp); } - computer.Forward(); + computer.Run(); const CuMatrixBase &output(computer.GetOutput("output")); TestNnetDecodable(request, inputs, nnet, output); @@ -151,15 +151,16 @@ void UnitTestNnetCompute() { CuMatrix output_deriv(output.NumRows(), output.NumCols()); output_deriv.SetRandn(); // output_deriv sum won't be informative so don't print it. - if (request.outputs[0].has_deriv) - computer.AcceptOutputDeriv("output", &output_deriv); - computer.Backward(); - for (size_t i = 0; i < request.inputs.size(); i++) { - if (request.inputs[i].has_deriv) { - const CuMatrixBase &in_deriv = - computer.GetInputDeriv(request.inputs[i].name); - KALDI_LOG << "Input-deriv sum for input '" - << request.inputs[i].name << "' is " << in_deriv.Sum(); + if (request.outputs[0].has_deriv) { + computer.AcceptInput("output", &output_deriv); + computer.Run(); + for (size_t i = 0; i < request.inputs.size(); i++) { + if (request.inputs[i].has_deriv) { + const CuMatrixBase &in_deriv = + computer.GetOutput(request.inputs[i].name); + KALDI_LOG << "Input-deriv sum for input '" + << request.inputs[i].name << "' is " << in_deriv.Sum(); + } } } } @@ -171,7 +172,7 @@ void UnitTestNnetCompute() { int main() { using namespace kaldi; using namespace kaldi::nnet3; - //SetVerboseLevel(2); + SetVerboseLevel(4); for (kaldi::int32 loop = 0; loop < 2; loop++) { diff --git a/src/nnet3/nnet-compute.cc b/src/nnet3/nnet-compute.cc index 34f5df523f1..b497e34aac4 100644 --- a/src/nnet3/nnet-compute.cc +++ b/src/nnet3/nnet-compute.cc @@ -30,7 +30,7 @@ NnetComputer::NnetComputer(const NnetComputeOptions &options, const Nnet &nnet, Nnet *nnet_to_update): options_(options), computation_(computation), nnet_(nnet), - nnet_to_update_(nnet_to_update) { + program_counter_(0), nnet_to_update_(nnet_to_update) { KALDI_ASSERT(computation.indexes_cuda.size() == computation.indexes.size() && computation.indexes_ranges_cuda.size() == computation.indexes_ranges.size() && "You must call NnetComputation::ComputeCudaIndexes() before " @@ -147,29 +147,37 @@ void NnetComputer::DebugAfterExecute(int32 command, void NnetComputer::ExecuteCommand(int32 command) { const NnetComputation::Command &c = computation_.commands[command]; + int32 m1, m2; try { switch (c.command_type) { case kAllocMatrixZeroed: - matrices_[c.arg1].Resize(computation_.matrices[c.arg1].num_rows, - computation_.matrices[c.arg1].num_cols, + m1 = computation_.submatrices[c.arg1].matrix_index; + matrices_[m1].Resize(computation_.matrices[m1].num_rows, + computation_.matrices[m1].num_cols, kSetZero, - computation_.matrices[c.arg1].stride_type); + computation_.matrices[m1].stride_type); break; case kAllocMatrixUndefined: - matrices_[c.arg1].Resize(computation_.matrices[c.arg1].num_rows, - computation_.matrices[c.arg1].num_cols, + m1 = computation_.submatrices[c.arg1].matrix_index; + matrices_[m1].Resize(computation_.matrices[m1].num_rows, + computation_.matrices[m1].num_cols, kUndefined, - computation_.matrices[c.arg1].stride_type); + computation_.matrices[m1].stride_type); break; case kDeallocMatrix: - matrices_[c.arg1].Resize(0, 0); + m1 = computation_.submatrices[c.arg1].matrix_index; + matrices_[m1].Resize(0, 0); break; case kAllocMatrixFromOther: - matrices_[c.arg1].Swap(&(matrices_[c.arg2])); + m1 = computation_.submatrices[c.arg1].matrix_index; + m2 = computation_.submatrices[c.arg2].matrix_index; + matrices_[m1].Swap(&(matrices_[m2])); break; case kAllocMatrixFromOtherZeroed: - matrices_[c.arg1].Swap(&(matrices_[c.arg2])); - matrices_[c.arg1].SetZero(); + m1 = computation_.submatrices[c.arg1].matrix_index; + m2 = computation_.submatrices[c.arg2].matrix_index; + matrices_[m1].Swap(&(matrices_[m2])); + matrices_[m1].SetZero(); break; case kPropagate: { const Component *component = nnet_.GetComponent(c.arg1); @@ -352,69 +360,56 @@ void NnetComputer::GetPointers(int32 indexes_multi_index, reinterpret_cast*>(pointers)); } -void NnetComputer::Forward() { - CheckInputs(false); - int32 size = computation_.commands.size(), i = 0; +void NnetComputer::Run() { const std::vector &c = computation_.commands; - CommandDebugInfo info; - Timer timer; - double total_elapsed_previous = 0.0; - - for (; i < size && c[i].command_type != kNoOperationMarker; - i++) { - if (debug_) - DebugBeforeExecute(i, &info); - ExecuteCommand(i); - if (debug_) { - double total_elapsed_now = timer.Elapsed(); - DebugAfterExecute(i, info, total_elapsed_now - total_elapsed_previous); - total_elapsed_previous = total_elapsed_now; - } - - } - -} + int32 num_commands = c.size(); + if (program_counter_ >= num_commands) + KALDI_ERR << "Running computation that has already finished."; + CheckNoPendingIo(); -void NnetComputer::Backward() { - CheckInputs(true); - int32 size = computation_.commands.size(), i = 0; - const std::vector &c = computation_.commands; - for (; i < size && c[i].command_type != kNoOperationMarker; - i++); CommandDebugInfo info; Timer timer; double total_elapsed_previous = 0.0; - for (; i < size; i++) { + for (; program_counter_ < num_commands; program_counter_++) { + if (c[program_counter_].command_type == kAcceptInput || + c[program_counter_].command_type == kProvideOutput) { + // We have hit a part of the computation that requires user + // interaction, e.g. the end of the forward or backward phase. + break; + } if (debug_) - DebugBeforeExecute(i, &info); - ExecuteCommand(i); + DebugBeforeExecute(program_counter_, &info); + ExecuteCommand(program_counter_); if (debug_) { double total_elapsed_now = timer.Elapsed(); - DebugAfterExecute(i, info, total_elapsed_now - total_elapsed_previous); + DebugAfterExecute(program_counter_, info, + total_elapsed_now - total_elapsed_previous); total_elapsed_previous = total_elapsed_now; } } } -void NnetComputer::AcceptInput(const std::string &input_name, +void NnetComputer::AcceptInput(const std::string &node_name, CuMatrix *input) { - bool is_output = false, is_deriv = false; - int32 matrix_index = GetMatrixIndex(input_name, is_output, is_deriv); - KALDI_ASSERT(static_cast(matrix_index) < matrices_.size()); + bool is_output = false; + int32 matrix_index = GetIoMatrixIndex(node_name, is_output); + const NnetComputation::MatrixInfo &matrix_info = computation_.matrices[matrix_index]; - if (input->NumRows() != matrix_info.num_rows) - KALDI_ERR << "Num-rows mismatch for input '" << input_name + if (input->NumRows() != matrix_info.num_rows) { + KALDI_ERR << "Num-rows mismatch for input '" << node_name << "': " << matrix_info.num_rows << " in computation-request, " << input->NumRows() << " provided."; - if (input->NumCols() != matrix_info.num_cols) - KALDI_ERR << "Num-cols mismatch for input '" << input_name + } + if (input->NumCols() != matrix_info.num_cols) { + KALDI_ERR << "Num-cols mismatch for input '" << node_name << "': " << matrix_info.num_cols << " in computation-request, " << input->NumCols() << " provided."; + } if (matrix_info.stride_type == kDefaultStride || input->Stride() == input->NumCols()) { matrices_[matrix_index].Swap(input); @@ -423,130 +418,96 @@ void NnetComputer::AcceptInput(const std::string &input_name, matrix_info.num_cols, kUndefined, kStrideEqualNumCols); matrices_[matrix_index].CopyFromMat(*input); + input->Resize(0, 0); } - input->Resize(0, 0); } -const CuMatrixBase &NnetComputer::GetInputDeriv( - const std::string &input_name) const { - bool is_output = false, is_deriv = true; - int32 matrix_index = GetMatrixIndex(input_name, is_output, is_deriv); - if (matrices_[matrix_index].NumRows() == 0) - KALDI_ERR << "GetInputDeriv called before it is ready (before Backward()?)"; - return matrices_[matrix_index]; -} - - const CuMatrixBase &NnetComputer::GetOutput( - const std::string &output_name) const { - bool is_output = true, is_deriv = false; - int32 matrix_index = GetMatrixIndex(output_name, is_output, is_deriv); - if (matrices_[matrix_index].NumRows() == 0) - KALDI_ERR << "GetOutput called when output not ready (before Forward()?)"; + const std::string &node_name) { + bool is_output = true; + int32 matrix_index = GetIoMatrixIndex(node_name, is_output); + KALDI_ASSERT(matrices_[matrix_index].NumRows() != 0); return matrices_[matrix_index]; } -void NnetComputer::GetOutputDestructive( - const std::string &output_name, - CuMatrix *output) { - bool is_output = true, is_deriv = false; - int32 matrix_index = GetMatrixIndex(output_name, is_output, is_deriv); - if (matrices_[matrix_index].NumRows() == 0) - KALDI_ERR << "GetOutput called when output not ready (before Forward()?)"; - output->Resize(0, 0); + +void NnetComputer::GetOutputDestructive(const std::string &node_name, + CuMatrix *output) { + bool is_output = true; + int32 matrix_index = GetIoMatrixIndex(node_name, is_output); + KALDI_ASSERT(matrices_[matrix_index].NumRows() != 0); matrices_[matrix_index].Swap(output); + matrices_[matrix_index].Resize(0, 0); } -void NnetComputer::AcceptOutputDeriv(const std::string &output_name, - CuMatrix *output_deriv) { - bool is_output = true, is_deriv = true; - int32 matrix_index = GetMatrixIndex(output_name, is_output, is_deriv); - KALDI_ASSERT(static_cast(matrix_index) < matrices_.size()); - const NnetComputation::MatrixInfo &matrix_info = - computation_.matrices[matrix_index]; - if (output_deriv->NumRows() != matrix_info.num_rows) - KALDI_ERR << "Num-rows mismatch for output-deriv '" << output_name - << "': " << matrix_info.num_rows - << " in computation-request, " << output_deriv->NumRows() - << " provided."; - if (output_deriv->NumCols() != matrix_info.num_cols) - KALDI_ERR << "Num-cols mismatch for output_deriv '" << output_name - << "': " << matrix_info.num_cols - << " in computation-request, " << output_deriv->NumCols() - << " provided."; - if (matrix_info.stride_type == kDefaultStride || - output_deriv->Stride() == output_deriv->NumCols()) { - matrices_[matrix_index].Swap(output_deriv); - } else { - matrices_[matrix_index].Resize(matrix_info.num_rows, - matrix_info.num_cols, - kUndefined, kStrideEqualNumCols); - matrices_[matrix_index].CopyFromMat(*output_deriv); +void NnetComputer::CheckNoPendingIo() { + const std::vector &c = computation_.commands; + while (program_counter_ < static_cast(c.size()) && + (c[program_counter_].command_type == kAcceptInput || + c[program_counter_].command_type == kProvideOutput)) { + pending_commands_.push_back(program_counter_); + program_counter_++; + } + while (!pending_commands_.empty()) { + // the order here doesn't really matter; we go from back to front + // as it's more efficient, not that efficiency really matters here. + int32 last_command = pending_commands_.back(); + if (c[last_command].command_type == kProvideOutput) { + // we can ignore that we didn't provide output to the user. + KALDI_VLOG(3) << "Output to node '" << nnet_.GetNodeName(c[last_command].arg2) + << "' was available but not used."; + pending_commands_.pop_back(); + } else { + // we can't ignore if we needed input from the user that hasn't been + // provided. + KALDI_ASSERT(c[last_command].command_type == kAcceptInput); + int32 node = c[last_command].arg2; + KALDI_ERR << "Cannot run computation because we did not get input for node '" + << nnet_.GetNodeName(node) << "'"; + } } - output_deriv->Resize(0, 0); } -int32 NnetComputer::GetMatrixIndex( - const std::string &node_name, bool is_output, bool is_deriv) const { +int32 NnetComputer::GetIoMatrixIndex(const std::string &node_name, bool is_output) { + const std::vector &c = computation_.commands; int32 node_index = nnet_.GetNodeIndex(node_name); if (node_index == -1) KALDI_ERR << "No node named '" << node_name << "'in network."; - if (is_output) { - if (!nnet_.IsOutputNode(node_index)) - KALDI_ERR << "Expecting output node; node named '" - << node_name << "' is not output node."; - } else { - if (nnet_.IsOutputNode(node_index)) - KALDI_ERR << "Expecting input node or component node; node named '" - << node_name << "' is output node."; - } - unordered_map >::const_iterator - iter = computation_.input_output_info.find(node_index), - end = computation_.input_output_info.end(); - if (iter == end) - KALDI_ERR << "Not expecting input or output for node named '" << node_name - << "' (not in computation request)"; - std::pair locations = iter->second; - int32 location; - if (is_deriv) { - location = locations.second; - if (locations.second <= 0) // No deriv expected. - KALDI_ERR << "Not expecting derivative information for node named '" - << node_name << "' (not in computation request)"; - } else { - location = locations.first; + // first make sure all the I/O commands that we immediately expect, are listed + // in 'pending_commands_'. + while (program_counter_ < static_cast(computation_.commands.size()) && + (c[program_counter_].command_type == kAcceptInput || + c[program_counter_].command_type == kProvideOutput || + c[program_counter_].command_type == kNoOperationMarker)) { + if (c[program_counter_].command_type != kNoOperationMarker) + pending_commands_.push_back(program_counter_); + program_counter_++; } - KALDI_ASSERT(static_cast(location) < matrices_.size()); - return location; -} - -void NnetComputer::CheckInputs(bool check_output_deriv) const { - unordered_map >::const_iterator - iter = computation_.input_output_info.begin(), - end = computation_.input_output_info.end(); - for (; iter != end; ++iter) { - int32 node_index = iter->first, - value_matrix_index = iter->second.first, - deriv_matrix_index = iter->second.second; - std::string name = nnet_.GetNodeName(node_index); - if (nnet_.IsOutputNode(node_index)) { - if (check_output_deriv && deriv_matrix_index > 0) { - KALDI_ASSERT(static_cast(deriv_matrix_index) < matrices_.size()); - if (matrices_[deriv_matrix_index].NumRows() == 0) - KALDI_ERR << "Output-derivative required but not provided for node '" - << name << "'."; - } - } else { - if (!check_output_deriv) { - if (matrices_[value_matrix_index].NumRows() == 0) - KALDI_ERR << "Input required but not provided for node '" - << name << "'."; - } + for (size_t i = 0; i < pending_commands_.size(); i++) { + int32 command = pending_commands_[i]; + bool this_command_is_output = + (c[command].command_type == kProvideOutput); + int32 this_submatrix_index = c[command].arg1, + this_node_index = c[command].arg2; + if (this_command_is_output == is_output && node_index == this_node_index) { + pending_commands_.erase(pending_commands_.begin() + i); + if (!(computation_.IsWholeMatrix(this_submatrix_index))) + KALDI_ERR << "Getting input or output that is not a whole matrix " + << "(probably some optimization code needs to be changed)"; + return computation_.submatrices[this_submatrix_index].matrix_index; } } + // if you get the following error it will likely be a bug in the calling code, + // or possibly due to giving the wrong egs. + KALDI_ERR << "Could not " + << (is_output ? "provide output " : " accept input ") + << "for network node " << node_name + << " (it is not expected at this point in the computation)"; + return 0; // Suppress compiler warnings; this line will never be reached. } + void NnetComputer::AcceptInputs(const Nnet &nnet, const std::vector &io_vec) { for (size_t i = 0; i < io_vec.size(); i++) { diff --git a/src/nnet3/nnet-compute.h b/src/nnet3/nnet-compute.h index d1c28e8bd7c..32839755828 100644 --- a/src/nnet3/nnet-compute.h +++ b/src/nnet3/nnet-compute.h @@ -53,8 +53,8 @@ struct NnetComputeOptions { "computation" object. You call in sequence, the constructor, then AcceptInput() [or AcceptInputs()], - then Forward(), then GetOutput(), then if applicable (Backward(), then if - applicable GetInputDeriv()). + then Run(), then GetOutput() [and if applicable, AcceptOutputDeriv], then if + there is a backward computation, Run() [then, if applicable, GetInputDeriv()]. */ class NnetComputer { public: @@ -67,52 +67,55 @@ class NnetComputer { const Nnet &nnet, Nnet *nnet_to_update); - /// e.g. AcceptInput ("input", input_mat). Will crash if there is no - /// input node with the given name. This function is destructive of "input" - /// as it takes it using the Swap function of CuMatrix. - /// Must have the same number of rows as the corresponding input described - /// in the ComputationRequest e.g. the indexes.size() in the corresponding + /// e.g. AcceptInput ("input", &input_mat), or for derivatives w.r.t. the + /// output, AcceptInput("output", output_deriv_mat). Will crash if there is + /// no input or output node with the given name. This function is destructive + /// of "input" as it takes it using the Swap function of CuMatrix. Must have + /// the same number of rows as the corresponding input described in the + /// ComputationRequest e.g. the indexes.size() in the corresponding /// IoSpecification. - void AcceptInput(const std::string &input_name, + void AcceptInput(const std::string &node_name, CuMatrix *input); - /// This function calls AcceptInput() in turn on all the inputs in the - /// training example. It needs "nnet" only in order to distinguish inputs + /// This convenience function calls AcceptInput() in turn on all the inputs in + /// the training example. It needs "nnet" only in order to distinguish inputs /// from outputs. void AcceptInputs(const Nnet &nnet, const std::vector &io); - // Does the forward computation. - void Forward(); + /// This does either the forward or backward computation, depending + /// when it is called (in a typical computation, the first time you call + /// this it will do the forward computation; then you'll take the outputs + /// and provide derivatives; and the second time you call it, it will do + /// the backward computation. There used to be two separate functions + /// Forward() and Backward(). + void Run(); - // e.g. GetOutput ("output"). Will crash if no such output. - const CuMatrixBase &GetOutput(const std::string &output_name) const; + // e.g. GetOutput("output"). This function can also be used to get + // derivatives w.r.t. inputs. It's non-const because it may only + // be called once and it keeps track of that. + const CuMatrixBase &GetOutput(const std::string &node_name); // Version of GetOutput that calls Swap(), destroying the output stored inside // this object. You should probably not use this if you plan to call - // Backward() on the same NnetComputer object, it may lead to a crash. + // Backward() on the same NnetComputer object, or it's a recurret + // computation-- it may lead to a crash. void GetOutputDestructive(const std::string &output_name, CuMatrix *output); - /// e.g. AcceptOutputDeriv("output", &output_deriv_mat). - void AcceptOutputDeriv(const std::string &output_name, - CuMatrix *output_deriv); - - - // Does the backward computation. - void Backward(); - - // e.g. GetInputDeriv ("input"). Will crash if no such input derivative. - // You may only call this if you requested this input derivative in the - // ComputationRequest. - const CuMatrixBase &GetInputDeriv( - const std::string &input_name) const; private: const NnetComputeOptions &options_; const NnetComputation &computation_; const Nnet &nnet_; + int32 program_counter_; // command index to execute next. + // To deal with inputs and outputs that are not provided/taken by the user in + // the same order as listed in the computation, pending_commands_ contains a + // list of program commands that were skipped over but are in the queue to be + // executed. + std::vector pending_commands_; + Nnet *nnet_to_update_; bool debug_; // command_attributes_ is only used if debug_=true. @@ -125,15 +128,26 @@ class NnetComputer { // The matrices used in the computation. std::vector > matrices_; + // executes the command in computation_.commands[command]. void ExecuteCommand(int32 command); - // Returns the matrix index where the input or output matrix index for - // "node_name" is stored (or its corresponding derivative, if is_deriv==true). - // "is_output" tells the code that this is an output node, as opposed to an - // input node; it's used only for checking. - int32 GetMatrixIndex(const std::string &node_name, - bool is_output, bool is_deriv) const; + // Returns the matrix index where the input (if is_output==false) or output + // matrix index for "node_name" is stored. This looks at the next command (at + // program_counter_) and in pending_commands_, and sees whether we were + // expecting any input or output for this node, and if there is a match, + // returns it and "consumes" the command by either advancing program_counter_ + // or consuming something from pending_commands_. + // If there is not a match (i.e. we were not expecting this type of I/O + // at this point in the computation), it prints an error and dies. + int32 GetIoMatrixIndex(const std::string &node_name, bool is_output); + + + // This function, called from Run(), checks that there is no pending I/O + // that we were waiting for, that would block the running of the + // computation; it crashes if there was pending input, and ignores and + // skips over any pending output. + void CheckNoPendingIo(); CuSubMatrix GetSubMatrix(int32 submatrix_index); @@ -144,11 +158,6 @@ class NnetComputer { int32 num_cols, CuArray *pointers); - // with check_output_deriv = false, checks we have all inputs. - // with check_output_deriv = true, checks we have all required output-derivs. - void CheckInputs(bool check_output_deriv) const; - - struct CommandDebugInfo { // Uncentered standard deviations of elements of all matrices that this // command writes. Dimension is the same as diff --git a/src/nnet3/nnet-derivative-test.cc b/src/nnet3/nnet-derivative-test.cc index a12ca2ae0af..1f8aa7dcfec 100644 --- a/src/nnet3/nnet-derivative-test.cc +++ b/src/nnet3/nnet-derivative-test.cc @@ -198,7 +198,7 @@ void UnitTestNnetModelDerivatives() { } KALDI_LOG << "Running forward computation"; - computer.Forward(); + computer.Run(); const CuMatrixBase &output(computer.GetOutput("output")); KALDI_LOG << "Output sum for pass " << pass << " is " << output.Sum(); @@ -208,9 +208,9 @@ void UnitTestNnetModelDerivatives() { if (pass == 0) { // we need to do the backward computation (to get the model derivative) CuMatrix temp(output_deriv); - computer.AcceptOutputDeriv("output", &temp); + computer.AcceptInput("output", &temp); KALDI_LOG << "Running backward computation"; - computer.Backward(); + computer.Run(); } else { // work out the predicted objf-change as dot-product of deriv and // parameter-change. The expression below can be interpreted as @@ -369,7 +369,7 @@ void UnitTestNnetInputDerivatives() { } KALDI_LOG << "Running forward computation"; - computer.Forward(); + computer.Run(); const CuMatrixBase &output(computer.GetOutput("output")); KALDI_LOG << "Output sum for pass " << pass << " is " << output.Sum(); @@ -379,11 +379,11 @@ void UnitTestNnetInputDerivatives() { if (pass == 0) { // We need to compute the input derivatives. CuMatrix temp(output_deriv); - computer.AcceptOutputDeriv("output", &temp); + computer.AcceptInput("output", &temp); KALDI_LOG << "Running backward computation"; - computer.Backward(); + computer.Run(); for (size_t i = 0; i < request.inputs.size(); i++) { - input_derivs[i] = computer.GetInputDeriv(request.inputs[i].name); + input_derivs[i] = computer.GetOutput(request.inputs[i].name); KALDI_LOG << "Input-deriv norm for '" << request.inputs[i].name << "' is " << input_derivs[i].FrobeniusNorm(); } diff --git a/src/nnet3/nnet-diagnostics.cc b/src/nnet3/nnet-diagnostics.cc index 7f7d485ffe0..e7adeffeb09 100644 --- a/src/nnet3/nnet-diagnostics.cc +++ b/src/nnet3/nnet-diagnostics.cc @@ -69,10 +69,10 @@ void NnetComputeProb::Compute(const NnetExample &eg) { nnet_, deriv_nnet_); // give the inputs to the computer object. computer.AcceptInputs(nnet_, eg.io); - computer.Forward(); + computer.Run(); this->ProcessOutputs(eg, &computer); if (config_.compute_deriv) - computer.Backward(); + computer.Run(); } void NnetComputeProb::ProcessOutputs(const NnetExample &eg, diff --git a/src/nnet3/nnet-discriminative-diagnostics.cc b/src/nnet3/nnet-discriminative-diagnostics.cc index 10f0811c12e..417a6fa05ac 100644 --- a/src/nnet3/nnet-discriminative-diagnostics.cc +++ b/src/nnet3/nnet-discriminative-diagnostics.cc @@ -73,7 +73,7 @@ void NnetDiscriminativeComputeObjf::Compute(const NnetDiscriminativeExample &eg) use_xent_derivative = false; ComputationRequest request; - GetDiscriminativeComputationRequest(nnet_, eg, + GetDiscriminativeComputationRequest(nnet_, eg, need_model_derivative, store_component_stats, use_xent_regularization, use_xent_derivative, @@ -83,10 +83,10 @@ void NnetDiscriminativeComputeObjf::Compute(const NnetDiscriminativeExample &eg) nnet_, deriv_nnet_); // give the inputs to the computer object. computer.AcceptInputs(nnet_, eg.inputs); - computer.Forward(); + computer.Run(); this->ProcessOutputs(eg, &computer); if (nnet_config_.compute_deriv) - computer.Backward(); + computer.Run(); } void NnetDiscriminativeComputeObjf::ProcessOutputs( @@ -104,7 +104,7 @@ void NnetDiscriminativeComputeObjf::ProcessOutputs( KALDI_ERR << "Network has no output named " << sup.name; const CuMatrixBase &nnet_output = computer->GetOutput(sup.name); - + bool use_xent = (discriminative_config_.xent_regularize != 0.0); std::string xent_name = sup.name + "-xent"; // typically "output-xent". CuMatrix nnet_output_deriv, xent_deriv; @@ -112,18 +112,18 @@ void NnetDiscriminativeComputeObjf::ProcessOutputs( if (nnet_config_.compute_deriv) nnet_output_deriv.Resize(nnet_output.NumRows(), nnet_output.NumCols(), kUndefined); - + if (use_xent) xent_deriv.Resize(nnet_output.NumRows(), nnet_output.NumCols(), kUndefined); if (objf_info_.count(sup.name) == 0) - objf_info_.insert(std::make_pair(sup.name, + objf_info_.insert(std::make_pair(sup.name, discriminative::DiscriminativeObjectiveInfo(discriminative_config_))); discriminative::DiscriminativeObjectiveInfo *stats = &(objf_info_[sup.name]); - discriminative::ComputeDiscriminativeObjfAndDeriv(discriminative_config_, + discriminative::ComputeDiscriminativeObjfAndDeriv(discriminative_config_, tmodel_, log_priors_, sup.supervision, nnet_output, stats, @@ -132,11 +132,11 @@ void NnetDiscriminativeComputeObjf::ProcessOutputs( (use_xent ? &xent_deriv : NULL)); if (nnet_config_.compute_deriv) - computer->AcceptOutputDeriv(sup.name, &nnet_output_deriv); - + computer->AcceptInput(sup.name, &nnet_output_deriv); + if (use_xent) { if (objf_info_.count(xent_name) == 0) - objf_info_.insert(std::make_pair(xent_name, + objf_info_.insert(std::make_pair(xent_name, discriminative::DiscriminativeObjectiveInfo(discriminative_config_))); discriminative::DiscriminativeObjectiveInfo &xent_stats = objf_info_[xent_name]; @@ -149,7 +149,7 @@ void NnetDiscriminativeComputeObjf::ProcessOutputs( xent_stats.tot_t_weighted += stats->tot_t_weighted; xent_stats.tot_objf += xent_objf; } - + num_minibatches_processed_++; } } @@ -168,21 +168,21 @@ bool NnetDiscriminativeComputeObjf::PrintTotalStats() const { BaseFloat tot_weight = info.tot_t_weighted; BaseFloat tot_objective = info.TotalObjf( discriminative_config_.criterion); - + info.PrintAll(discriminative_config_.criterion); if (info.tot_l2_term == 0.0) { KALDI_LOG << "Overall " << discriminative_config_.criterion << " objective for '" << name << "' is " - << (tot_objective / tot_weight) + << (tot_objective / tot_weight) << " per frame, " << "over " << tot_weight << " frames."; } else { KALDI_LOG << "Overall " << discriminative_config_.criterion << " objective for '" << name << "' is " - << (tot_objective / tot_weight) + << (tot_objective / tot_weight) << " + " << (info.tot_l2_term / tot_weight) << " per frame, " << "over " << tot_weight << " frames."; diff --git a/src/nnet3/nnet-discriminative-training.cc b/src/nnet3/nnet-discriminative-training.cc index e4f6bf9d463..472c5658a61 100644 --- a/src/nnet3/nnet-discriminative-training.cc +++ b/src/nnet3/nnet-discriminative-training.cc @@ -55,7 +55,7 @@ NnetDiscriminativeTrainer::NnetDiscriminativeTrainer( KALDI_WARN << "Could not open cached computation. " "Probably this is the first training iteration."; } - } + } log_priors_.ApplyLog(); } @@ -77,10 +77,10 @@ void NnetDiscriminativeTrainer::Train(const NnetDiscriminativeExample &eg) { (delta_nnet_ == NULL ? nnet_ : delta_nnet_)); // give the inputs to the computer object. computer.AcceptInputs(*nnet_, eg.inputs); - computer.Forward(); + computer.Run(); this->ProcessOutputs(eg, &computer); - computer.Backward(); + computer.Run(); if (delta_nnet_ != NULL) { BaseFloat scale = (1.0 - nnet_config.momentum); @@ -124,7 +124,7 @@ void NnetDiscriminativeTrainer::ProcessOutputs(const NnetDiscriminativeExample & CuMatrix nnet_output_deriv(nnet_output.NumRows(), nnet_output.NumCols(), kUndefined); - + bool use_xent = (opts_.discriminative_config.xent_regularize != 0.0); std::string xent_name = sup.name + "-xent"; // typically "output-xent". CuMatrix xent_deriv; @@ -138,14 +138,14 @@ void NnetDiscriminativeTrainer::ProcessOutputs(const NnetDiscriminativeExample & objf_info_[sup.name].stats.Configure(opts_.discriminative_config); objf_info_[sup.name].stats.Reset(); } - - ComputeDiscriminativeObjfAndDeriv(opts_.discriminative_config, + + ComputeDiscriminativeObjfAndDeriv(opts_.discriminative_config, tmodel_, log_priors_, sup.supervision, nnet_output, - &stats, + &stats, &nnet_output_deriv, (use_xent ? &xent_deriv : NULL)); - + if (use_xent) { // this block computes the cross-entropy objective. const CuMatrixBase &xent_output = computer->GetOutput(xent_name); @@ -173,16 +173,16 @@ void NnetDiscriminativeTrainer::ProcessOutputs(const NnetDiscriminativeExample & xent_deriv.MulRowsVec(cu_deriv_weights); } - computer->AcceptOutputDeriv(sup.name, &nnet_output_deriv); + computer->AcceptInput(sup.name, &nnet_output_deriv); objf_info_[sup.name].UpdateStats(sup.name, opts_.discriminative_config.criterion, opts_.nnet_config.print_interval, num_minibatches_processed_++, stats); - + if (use_xent) { xent_deriv.Scale(opts_.discriminative_config.xent_regularize); - computer->AcceptOutputDeriv(xent_name, &xent_deriv); + computer->AcceptInput(xent_name, &xent_deriv); } } } @@ -249,11 +249,11 @@ bool DiscriminativeObjectiveFunctionInfo::PrintTotalStats(const std::string &nam NnetDiscriminativeTrainer::~NnetDiscriminativeTrainer() { delete delta_nnet_; - + if (opts_.nnet_config.write_cache != "") { Output ko(opts_.nnet_config.write_cache, opts_.nnet_config.binary_write_cache); compiler_.WriteCache(ko.Stream(), opts_.nnet_config.binary_write_cache); - } + } } diff --git a/src/nnet3/nnet-optimize-test.cc b/src/nnet3/nnet-optimize-test.cc index 8fa1ef87e36..4d61f6f9f4a 100644 --- a/src/nnet3/nnet-optimize-test.cc +++ b/src/nnet3/nnet-optimize-test.cc @@ -117,9 +117,9 @@ static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config) { computer_opt.AcceptInput(request.inputs[i].name, &temp2); } KALDI_LOG << "Running non-optimized forward computation"; - computer.Forward(); + computer.Run(); KALDI_LOG << "Running optimized forward computation"; - computer_opt.Forward(); + computer_opt.Run(); const CuMatrixBase &output(computer.GetOutput("output")); KALDI_LOG << "Output sum (not optimized) is " << output.Sum(); @@ -136,20 +136,20 @@ static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config) { CuMatrix output_deriv_opt(output_deriv); if (request.outputs[0].has_deriv) { - computer.AcceptOutputDeriv("output", &output_deriv); - computer_opt.AcceptOutputDeriv("output", &output_deriv_opt); + computer.AcceptInput("output", &output_deriv); + computer_opt.AcceptInput("output", &output_deriv_opt); } KALDI_LOG << "Running non-optimized backward computation"; - computer.Backward(); + computer.Run(); KALDI_LOG << "Running optimized backward computation"; - computer_opt.Backward(); + computer_opt.Run(); for (size_t i = 0; i < request.inputs.size(); i++) { if (request.inputs[i].has_deriv) { const CuMatrixBase &in_deriv = - computer.GetInputDeriv(request.inputs[i].name); + computer.GetOutput(request.inputs[i].name); const CuMatrixBase &in_deriv_opt = - computer_opt.GetInputDeriv(request.inputs[i].name); + computer_opt.GetOutput(request.inputs[i].name); KALDI_LOG << "Input-deriv sum for input '" << request.inputs[i].name << "' (non-optimized) is " << in_deriv.Sum(); KALDI_LOG << "Input-deriv sum for input '" << request.inputs[i].name diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index df7f975db86..85a6c926bcd 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -33,8 +33,12 @@ void IdentifySubmatrixArgs(NnetComputation::Command *c, case kAllocMatrixZeroed: case kAllocMatrixUndefined: case kDeallocMatrix: + submatrix_args->push_back(&c->arg1); + break; case kAllocMatrixFromOther: case kAllocMatrixFromOtherZeroed: + submatrix_args->push_back(&c->arg1); + submatrix_args->push_back(&c->arg2); break; case kPropagate: submatrix_args->push_back(&c->arg3); @@ -64,6 +68,9 @@ void IdentifySubmatrixArgs(NnetComputation::Command *c, case kCopyToRowsMulti: submatrix_args->push_back(&c->arg1); break; + case kAcceptInput: case kProvideOutput: + submatrix_args->push_back(&c->arg1); + break; case kNoOperation: case kNoOperationMarker: break; @@ -87,40 +94,13 @@ void IdentifySubmatrixArgs(std::vector *commands, } -void IdentifyMatrixArgs(std::vector *commands, - std::vector *matrix_args) { - matrix_args->clear(); - std::vector::iterator iter = commands->begin(), - end = commands->end(); - std::vector this_matrix_args; - for (; iter != end; ++iter) { - IdentifyMatrixArgs(&(*iter), &this_matrix_args); - matrix_args->insert(matrix_args->end(), - this_matrix_args.begin(), - this_matrix_args.end()); - } -} - -void IdentifyMatrixArgsInComputation(bool include_in_submatrices, - NnetComputation *computation, +void IdentifyMatrixArgsInComputation(NnetComputation *computation, std::vector *matrix_args) { - IdentifyMatrixArgs(&(computation->commands), matrix_args); int32 num_submatrices = computation->submatrices.size(); - matrix_args->reserve(matrix_args->size() + - (include_in_submatrices ? - computation->submatrices.size() : 0) + - 2 * computation->input_output_info.size()); - if (include_in_submatrices) - for (int32 s = 1; s < num_submatrices; s++) - matrix_args->push_back(&(computation->submatrices[s].matrix_index)); - unordered_map >::iterator - iter = computation->input_output_info.begin(), - end = computation->input_output_info.end(); - for (; iter != end; ++iter) { - matrix_args->push_back(&(iter->second.first)); - matrix_args->push_back(&(iter->second.second)); - } + matrix_args->reserve(computation->submatrices.size()); + for (int32 s = 1; s < num_submatrices; s++) + matrix_args->push_back(&(computation->submatrices[s].matrix_index)); } @@ -167,25 +147,6 @@ void IdentifyIndexesArgs(std::vector *commands, -void IdentifyMatrixArgs(NnetComputation::Command *c, - std::vector *matrix_args) { - matrix_args->clear(); - switch (c->command_type) { - case kAllocMatrixZeroed: - case kAllocMatrixUndefined: - case kDeallocMatrix: - matrix_args->push_back(&c->arg1); - break; - case kAllocMatrixFromOther: - case kAllocMatrixFromOtherZeroed: - matrix_args->push_back(&c->arg1); - matrix_args->push_back(&c->arg2); - break; - default: - break; - } -} - // static int32 ComputationRenumberer::CreateRenumbering( const std::vector &used, @@ -276,22 +237,10 @@ void ComputationRenumberer::ComputeMatrixIsUsed() { matrix_is_used_.clear(); matrix_is_used_.resize(computation_->matrices.size(), false); matrix_is_used_[0] = true; - - std::vector matrix_args; - bool include_in_submatrices = false; - IdentifyMatrixArgsInComputation(include_in_submatrices, - computation_, &matrix_args); - std::vector::iterator iter = matrix_args.begin(), - end = matrix_args.end(); - for (; iter != end; ++iter) { - int32 matrix_index = **iter; - if (matrix_index > 0) - matrix_is_used_[matrix_index] = true; - } // We also need to take into account when matrices are used indirectly via // submatrices (which is actually the main way they are accessed). - int32 num_submatrices_orig = computation_->submatrices.size(); - for (int32 s = 1; s < num_submatrices_orig; s++) { + int32 num_submatrices = computation_->submatrices.size(); + for (int32 s = 1; s < num_submatrices; s++) { int32 matrix_index = computation_->submatrices[s].matrix_index; if (submatrix_is_used_[s]) matrix_is_used_[matrix_index] = true; @@ -355,20 +304,15 @@ void ComputationRenumberer::RenumberSubmatrices() { void ComputationRenumberer::RenumberMatrices() { std::vector matrix_args; - bool include_in_submatrices = true; - IdentifyMatrixArgsInComputation(include_in_submatrices, - computation_, &matrix_args); - std::vector::iterator iter = matrix_args.begin(), - end = matrix_args.end(); - for (; iter != end; ++iter) { - if (**iter > 0) { - int32 new_matrix_index = old_to_new_matrix_[**iter]; - // old_to_new_matrix_[s] for s > 0 is only <= 0 (actually, -1) for - // submatrices that are never accessed, and these should never appear - // in this list. - KALDI_ASSERT(new_matrix_index > 0); - **iter = new_matrix_index; - } + int32 num_submatrices = computation_->submatrices.size(); + for (int32 s = 1; s < num_submatrices; s++) { + int32 *matrix_index = &(computation_->submatrices[s].matrix_index); + // old_to_new_matrix_[s] for s > 0 is only <= 0 (actually, -1) for + // submatrices that are never accessed, and these should never appear + // in this list. (presumably because we renumber the submatrices first). + int32 new_matrix_index = old_to_new_matrix_[*matrix_index]; + KALDI_ASSERT(new_matrix_index > 0); + *matrix_index = new_matrix_index; } std::vector new_matrices; @@ -615,80 +559,6 @@ void RemoveNoOps(NnetComputation *computation) { computation->commands.resize(output_iter - computation->commands.begin()); } -/// Wherever matrix orig_matrix_index appears in the input of the network -/// (i.e. in computation->input_output_info), replaces it with new_matrix_index. -/// Returns true if it did replace it. -bool ReplaceInInput( - const Nnet &nnet, - int32 orig_matrix_index, int32 new_matrix_index, - NnetComputation *computation) { - bool ans = false; - int32 num_matrices = computation->matrices.size(); - KALDI_ASSERT(orig_matrix_index > 0 && orig_matrix_index < num_matrices && - new_matrix_index > 0 && new_matrix_index < num_matrices); - unordered_map >::iterator - iter = computation->input_output_info.begin(), - end = computation->input_output_info.end(); - for (; iter != end; ++iter) { - int32 network_node = iter->first, - &value_matrix_index = iter->second.first, - &deriv_matrix_index = iter->second.second; - if (nnet.IsOutputNode(network_node)) { - // deriv_matrix_index would be an input to the computation. - if (deriv_matrix_index == orig_matrix_index) { - deriv_matrix_index = new_matrix_index; - ans = true; - } - } else { - // value_matrix_index would be an input to the computation. - if (value_matrix_index == orig_matrix_index) { - value_matrix_index = new_matrix_index; - ans = true; - } - } - } - return ans; -} - - -/// Wherever matrix orig_matrix_index appears in the output of the network -/// (i.e. in computation->input_output_info), replaces it with new_matrix_index. -/// Returns true if it did replace it. -bool ReplaceInOutput( - const Nnet &nnet, int32 orig_matrix_index, int32 new_matrix_index, - NnetComputation *computation) { - bool ans = false; - int32 num_matrices = computation->matrices.size(); - KALDI_ASSERT(orig_matrix_index > 0 && orig_matrix_index < num_matrices && - new_matrix_index > 0 && new_matrix_index < num_matrices); - unordered_map >::iterator - iter = computation->input_output_info.begin(), - end = computation->input_output_info.end(); - for (; iter != end; ++iter) { - int32 network_node = iter->first, - &value_matrix_index = iter->second.first, - &deriv_matrix_index = iter->second.second; - if (nnet.IsOutputNode(network_node)) { - // value_matrix_index would be an output of the computation. - if (value_matrix_index == orig_matrix_index) { - value_matrix_index = new_matrix_index; - ans = true; - } - } else { - // deriv_matrix_index would be an output of the computation. - if (deriv_matrix_index == orig_matrix_index) { - // we'd only have derivatives for actual inputs. [note: we also allow - // users to provide inputs for component nodes, but these would not have - // derivatives.] - KALDI_ASSERT(nnet.IsInputNode(network_node)); - deriv_matrix_index = new_matrix_index; - ans = true; - } - } - } - return ans; -} - VariableMergingOptimizer::VariableMergingOptimizer( const NnetOptimizeOptions &config, @@ -747,10 +617,10 @@ bool VariableMergingOptimizer::MergeVariables() { if (s1 > 0 && s2 > 0) { std::pair p = MayBeMerged(command_index, s1, s2); if (p.first) { - DoLeftMerge(command_index, s1, s2); + DoMerge(command_index, s1, s2); merged = true; } else if (p.second) { - DoRightMerge(command_index, s1, s2); + DoMerge(command_index, s2, s1); merged = true; } } @@ -800,45 +670,33 @@ void VariableMergingOptimizer::MarkAsDirty(int32 s) { } } -void VariableMergingOptimizer::DoRightMerge(int32 command_index, - int32 s1, int32 s2) { - // Prevent further optimizations touching s1 or s2 (we can - // try again in a later round of optimization, with a new - // instance of this class). - MarkAsDirty(s1); - MarkAsDirty(s2); - - int32 m1 = computation_->submatrices[s1].matrix_index, - m2 = computation_->submatrices[s2].matrix_index; - KALDI_ASSERT(m1 != m2 && m1 > 0 && m2 > 0); - { // modify submatrices for submatrices of m1 to effectively be sub-matrices of - // s2 instead (they will refer to m2 as the matrix_index). - std::vector::const_iterator iter = matrix_to_submatrix_[m1].begin(), - end = matrix_to_submatrix_[m1].end(); +void VariableMergingOptimizer::DoMerge(int32 command_index, + int32 s_to_keep, + int32 s_to_discard) { + // Prevent further optimizations touching either submatrix (we can try again + // in a later round of optimization, with a new instance of this class). + MarkAsDirty(s_to_keep); + MarkAsDirty(s_to_discard); + + int32 m_to_keep = computation_->submatrices[s_to_keep].matrix_index, + m_to_discard = computation_->submatrices[s_to_discard].matrix_index; + KALDI_ASSERT(m_to_keep != m_to_discard && m_to_keep > 0 && m_to_discard > 0); + + { // modify submatrices of m_to_discard to effectively be sub-matrices of + // s_to_keep instead (they will refer to m_to_keep as the matrix_index). + std::vector::const_iterator iter = + matrix_to_submatrix_[m_to_discard].begin(), + end = matrix_to_submatrix_[m_to_discard].end(); for (; iter != end; ++iter) { int32 submatrix_index = *iter; - KALDI_ASSERT(computation_->submatrices[submatrix_index].matrix_index==m1); + KALDI_ASSERT(computation_->submatrices[submatrix_index].matrix_index + == m_to_discard); computation_->submatrices[submatrix_index] = - GetSubMatrixOfSubMatrix(*computation_, submatrix_index, s2); + GetSubMatrixOfSubMatrix(*computation_, submatrix_index, + s_to_keep); } } - const std::vector &matrix_accesses = analyzer_.matrix_accesses; - // - If m1 was an input, replace it as an input with m2 - bool replaced = ReplaceInInput(nnet_, m1, m2, computation_); - KALDI_ASSERT(replaced == matrix_accesses[m1].is_input); - if (replaced) { // Remove the command that allocates m2. - int32 alloc_command = matrix_accesses[m2].allocate_command; - KALDI_ASSERT(alloc_command != -1); - computation_->commands[alloc_command].command_type = - kNoOperation; - } - // we keep matrix m2 (so m2 is m_to_keep, m1 is m_to_discard). - DoMergeCommon(command_index, m2, m1); -} -void VariableMergingOptimizer::DoMergeCommon(int32 command_index, - int32 m_to_keep, - int32 m_to_discard) { ComputationAnalysis analysis(*computation_, analyzer_); NnetComputation::Command &c = computation_->commands[command_index]; const std::vector &matrix_accesses = @@ -852,52 +710,59 @@ void VariableMergingOptimizer::DoMergeCommon(int32 command_index, c.arg2 = -1; } - // - If both m_to_keep and m_to_discard have commands that deallocate them, - // keep only the allocation command for m_to_keep, and make sure it's after - // the last access of m_to_discard (otherwise delete any deallocation - // command). + // We want to ensure that there is only one deallocation command. + // If neither matrix is an output, then there will be 2 deallocation + // commands and we keep the one for m_to_keep (which, if the sizes + // differ, will be the larger of the two, so it's the one whose + // submatrix index refers to the entirety of the matrix). + // If one of them is an output, then remove the deallocation command + // of whichever one is not an output. + // As a simplification to the logic above: if the 'discard' matrix + // has a deallocation command (i.e. if that matrix was not an output) + // then remove it; otherwise remove the deallocation command of + // the 'keep' matrix. + int32 dealloc_keep = matrix_accesses[m_to_keep].deallocate_command, dealloc_discard = matrix_accesses[m_to_discard].deallocate_command; - if (dealloc_keep != -1 && dealloc_discard != -1) { - KALDI_ASSERT(analysis.LastMatrixAccess(m_to_discard) < dealloc_keep); + if (dealloc_discard != -1) { computation_->commands[dealloc_discard].command_type = kNoOperation; } else { - if (dealloc_keep != -1) - computation_->commands[dealloc_keep].command_type = - kNoOperation; - if (dealloc_discard != -1) - computation_->commands[dealloc_discard].command_type = - kNoOperation; - } - - // - If both m_to_keep and m_to_discard have commands that allocate them, - // keep only the allocation command for m_to_keep and make sure it's - // before the first access of m_to_discard. - // (otherwise delete any allocation command). - int32 alloc_keep = matrix_accesses[m_to_keep].allocate_command, - alloc_discard = matrix_accesses[m_to_discard].allocate_command; - if (alloc_keep != -1 && alloc_discard != -1) { + KALDI_ASSERT(dealloc_keep != -1); + computation_->commands[dealloc_keep].command_type = kNoOperation; + } + + { + // - Both m_to_keep and m_to_discard will have commands that allocate + // them, as all matrices do (note, kAcceptInput counts as an allocation + // command). If one of them is kAcceptInput, then delete the other one. + // Otherwise delete the "discard" one. As a simplification of the logic + // of the previous sentence: if the "discard" allocate command is + // kAcceptInput then delete the "keep" allocate command, else delete + // the "discard" allocate command. + // Note: after we renumber the submatrices, they both refer to the + // same underlying matrix, but we need to refer to them using a + // submatrix that refers to the entire matrix. The one we keep will + // always refer to the entire matrix. (In the case where one of + // them is an input, both submatrices are guaranteed to refer to the + // entire matrix). + int32 alloc_keep = matrix_accesses[m_to_keep].allocate_command, + alloc_discard = matrix_accesses[m_to_discard].allocate_command; + + KALDI_ASSERT(alloc_keep != -1 && alloc_discard != -1); KALDI_ASSERT(analysis.FirstMatrixAccess(m_to_discard) > alloc_keep); + NnetComputation::Command &keep_alloc_command = computation_->commands[alloc_keep], &discard_alloc_command = computation_->commands[alloc_discard]; - discard_alloc_command.command_type = kNoOperation; - if (keep_alloc_command.command_type == kAllocMatrixUndefined) { - keep_alloc_command.command_type = kAllocMatrixZeroed; - } else if (keep_alloc_command.command_type == kAllocMatrixFromOther) { - keep_alloc_command.command_type = kAllocMatrixFromOtherZeroed; + if (discard_alloc_command.command_type == kAcceptInput) { + keep_alloc_command.command_type = kNoOperation; + } else { + discard_alloc_command.command_type = kNoOperation; } - } else { - if (alloc_keep != -1) - computation_->commands[alloc_keep].command_type = - kNoOperation; - if (alloc_discard != -1) - computation_->commands[alloc_discard].command_type = - kNoOperation; } // If the matrix to discard had stride_type == kStrideEqualNumCols, set the - // matrix to keep's stride_type to kStrideEqualNuMCols. + // matrix to keep's stride_type to kStrideEqualNumCols. if (computation_->matrices[m_to_discard].stride_type == kStrideEqualNumCols) { computation_->matrices[m_to_keep].stride_type = kStrideEqualNumCols; // ... and perform an additional check. @@ -908,43 +773,6 @@ void VariableMergingOptimizer::DoMergeCommon(int32 command_index, } } -void VariableMergingOptimizer::DoLeftMerge(int32 command_index, - int32 s1, int32 s2) { - // Prevent further optimizations touching s1 or s2 (we can - // try again in a later round of optimization, with a new - // instance of this class). - MarkAsDirty(s1); - MarkAsDirty(s2); - - int32 m1 = computation_->submatrices[s1].matrix_index, - m2 = computation_->submatrices[s2].matrix_index; - KALDI_ASSERT(m1 != m2 && m1 > 0 && m2 > 0); - { // modify submatrices for submatrices of m2 to effectively be sub-matrices of - // s1 instead (they will refer to m1 as the matrix_index). - std::vector::const_iterator iter = matrix_to_submatrix_[m2].begin(), - end = matrix_to_submatrix_[m2].end(); - for (; iter != end; ++iter) { - int32 submatrix_index = *iter; - KALDI_ASSERT(computation_->submatrices[submatrix_index].matrix_index==m2); - computation_->submatrices[submatrix_index] = - GetSubMatrixOfSubMatrix(*computation_, submatrix_index, s1); - } - } - const std::vector &matrix_accesses = analyzer_.matrix_accesses; - // - If m2 was an output, replace it as an input with m1. - bool replaced = ReplaceInOutput(nnet_, m2, m1, computation_); - KALDI_ASSERT(replaced == matrix_accesses[m2].is_output); - if (replaced) { // Remove the command that deallocates m1. - int32 dealloc_command = matrix_accesses[m1].deallocate_command; - KALDI_ASSERT(dealloc_command != -1); - computation_->commands[dealloc_command].command_type = - kNoOperation; - } - // we keep matrix m1 (so m1 is m_to_keep, m2 is m_to_discard). - DoMergeCommon(command_index, m1, m2); -} - - std::pair VariableMergingOptimizer::MayBeMerged( @@ -1067,14 +895,14 @@ int32 ModelUpdateConsolidator::ConsolidateSubmatrices( int32 new_whole_submatrix = computation_->NewMatrix(num_rows, num_cols, stride_type); // Add a command at the very start, to initialize this new matrix. - int32 new_matrix_index = - computation_->submatrices[new_whole_submatrix].matrix_index; // we can later on optimize this zeroed initialization to an undefined // initialization. extra_commands_[0].push_back( - NnetComputation::Command(kAllocMatrixZeroed, new_matrix_index)); + NnetComputation::Command(kAllocMatrixZeroed, new_whole_submatrix)); final_deallocate_commands_.push_back( - NnetComputation::Command(kDeallocMatrix, new_matrix_index)); + NnetComputation::Command(kDeallocMatrix, new_whole_submatrix)); + int32 new_matrix_index = + computation_->submatrices[new_whole_submatrix].matrix_index; if (!computation_->matrix_debug_info.empty()) computation_->matrix_debug_info[new_matrix_index].Swap(&debug_info); @@ -1311,6 +1139,7 @@ void DerivativeTimeLimiter::ModifyCommand(NnetComputation::Command *command) { MapAddRowRangesCommand(command); break; } + case kAcceptInput: case kProvideOutput: case kNoOperation: case kNoOperationMarker: break; default: diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index 26f9eab84dd..95fdffc8656 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -52,14 +52,11 @@ struct NnetOptimizeOptions; // Forward declaration. may be sub-matrices of larger matrices. Note: the following - - Define last-access(submatrix) as: - If matrix-of(submatrix) is an output, then num-commands, otherwise the + - Define last-access(submatrix) as the last command that accesses that submatrix for either read or write. [note: deallocation does not count as a read or write operation]. - - Define first-access(submatrix) as: - If matrix-of(submatrix) is an input, then -1, otherwise the first command - that is *not* an allocation command that accessed that submatrix for either - read or write. + - Define first-access(submatrix) as the first command not of type kAlloc* + that accessed that submatrix for either read or write. - Define last-write-access(submatrix) as the last command-index that accessed the submatrix in a write operation, or -1 if there is no such command (this could happen for inputs). @@ -99,41 +96,27 @@ struct NnetOptimizeOptions; // Forward declaration. Otherwise (cases (b) and (c), in-place propagate or backprop), we insist that: - first-access(s2) == C - last-access(s1) == C - Note: in either case, these conditions imply that s2 is not an input and s1 is - not an output. + Note: in either case, these conditions imply that m2/s2 is not an input and m1/s1 is + not an output. [i.e. s1 *may* be an input and s2 *may* be an output]. + + We can explain the procedure for both left-merge and right-merge in one, because + it's the same. Define s_to_keep and m_to_keep as s1 and m1 if we're left-merging + and s2 and m2 if we're right-merging, and s_to_discard and m_to_discard the opposite + way. + + The procedure to merge in general is as follows: - The sequence of things we have to do for a right-merge (in which we delete - s1,m1) is as follows: - All submatrices that reference m1, make them reference m2 instead. - [later we'll renumber so that there are no duplicates.] - - If m1 was an input, replace it as an input with m2 and remove the - command that allocated m2. - - If it was an assignment [case (a)], replace the assignment command with a - no-op. - - If both m1 and m2 have commands that allocate them, keep only the - allocation command for m2, and make sure that it zeroes the data (we can - later change to undefined if allowed) and that it's before the first - non-allocation access of m1. Otherwise remove any allocation commands - (the merged variable is an input). - - If both m1 and m2 have commands that deallocate them, keep only the - deallocation command for m2, and make sure that it's after the last - access of m1 (otherwise delete any deallocation command, because m2 must - be an output). [note: previously we kept the later of the 2 commands, - but this had the effect of making inaccurate the Analyzer info for - a matrix (m2) that might later be used. - - If m1 had stride_type == kStrideEqualNumCols, set m2's stride_type - to kStrideEqualNuMCols. - - - The sequence of things we have to do for a right-merge (in which we delete - s1,m1) is as follows: - - All submatrices that reference m2, make them reference m1 instead. - [later we'll renumber so that there are no duplicates.] - - If m2 was an output, replace it as an output with m1 and remove the - command that deallocated m1. - ... the last four bullet-points, regarding removing the assignment command, - and allocation and deallocation, and stride-type, are the same as for a - left-merge, except swap m1 and m2. + [later we'll renumber so that there are no duplicates.] This automatically + takes care of making the input and output and allocation/deallocation + commands refer to the right matrix, in most cases. + - We need to get rid of duplicate or unnecessary allocation commands: + If m_to_discard is an input then get rid of the allocation command for + m_to_keep; otherwise get rid of the allocation command of m_to_discard. + - We need to get rid of duplicate or unnecessary deallocation commands: + If m_to_discard is an output then get rid of the deallocation command + for m_to_keep; otherwise get rid of the deallocation command for + m_to_discard. At the end when we call RemoveOrphanMatrices(), the renumbering code will automatically detect that there are duplicate submatrices, and will merge @@ -173,20 +156,10 @@ class VariableMergingOptimizer { /// @param s2 [in] A submatrix-index s2 > 0 std::pair MayBeMerged(int32 command, int32 s1, int32 s2) const; - // performs the left merge. Search for left-merge in the comment - // above the class declaration for details. - void DoLeftMerge(int32 command_index, int32 s1, int32 s2); - - // performs the right merge. Search for right-merge in the comment - // above the class declaration for details. - void DoRightMerge(int32 command_index, int32 s1, int32 s2); - - // Performs the actions common to both left and right merges, regarding - // removing the assignment command, and allocation and deallocation (called - // from DoLeftMerge and DoRightMerge). The m_to_keep and m_to_discard - // are the matrix-indexes we will keep and discard respectively. - void DoMergeCommon(int32 command_index, int32 m_to_keep, - int32 m_to_discard); + // Merges to matrices, whether left merge or right merge. s_to_keep and + // s_to_discard are the submatrix-indexes we will keep and discard + // respectively (these are s1 and s2 in some order. + void DoMerge(int32 command_index, int32 s_to_keep, int32 m_to_discard); /// Marks the variables underlying submatrix 's' as dirty void MarkAsDirty(int32 s); @@ -545,21 +518,6 @@ void RenumberComputation(NnetComputation *computation); /// Removes commands of type kNoOperation in the computation. void RemoveNoOps(NnetComputation *computation); -/// Wherever matrix orig_matrix_index appears in the input of the network -/// (i.e. in computation->input_output_info), replaces it with new_matrix_index. -/// Returns true if it did replace it. -bool ReplaceInInput( - const Nnet &nnet, int32 orig_matrix_index, int32 new_matrix_index, - NnetComputation *computation); - -/// A helper function used in some optimization functions. -/// Wherever matrix orig_matrix_index appears in the output of the network -/// (i.e. in computation->input_output_info), replaces it with new_matrix_index. -/// Returns true if it did replace it. -bool ReplaceInOutput( - const Nnet &nnet, int32 orig_matrix_index, int32 new_matrix_index, - NnetComputation *computation); - /// This function outputs to "submatrix_args" the addresses of a subset of /// arguments arg1 through arg6 in "command", that correspond to the indexes of /// submatrices. This is useful in renumbering code. Note: some of the @@ -586,32 +544,6 @@ void IdentifySubmatrixArgsInComputation(NnetComputation *computation, std::vector *submatrix_args); -/// This function outputs to "matrix_args" the addresses of a subset of the -/// arguments arg1 through arg6 in "command", that correspond to the indexes of -/// matrices. This is useful in renumbering code. (Note: only a few types of -/// command use matrix indexes). -void IdentifyMatrixArgs(NnetComputation::Command *command, - std::vector *matrix_args); - -/// This function outputs to "matrix_args" the addresses of a subset of the -/// arguments arg1 through arg6 in commands in "commands", that correspond to -/// the indexes of matrices. This is useful in renumbering code. (Note: only a -/// few types of command use matrix indexes). -void IdentifyMatrixArgs(std::vector *command, - std::vector *matrix_args); - -/// This function outputs to "matrix_args" the addresses of indexes inside -/// 'computation' that correspond to matrices. These live inside -/// computation->commands and computation->input_output_info; and if -/// 'include_from_submatrices' is true, then the matrix-indexes present in -/// computation->submatrices[*].matrix_index will be included too. Zeros may be -/// present if there were optional arguments; we do include pointers to them, -/// but you can just ignore them. -void IdentifyMatrixArgsInComputation(bool include_from_submatrices, - NnetComputation *computation, - std::vector *matrix_args); - - /// Identifies in the vector of commands, arguments that correspond to indexes /// into the computation's indexes_multi array, and outputs a list of pointers /// to those arguments to 'indexes_multi_args'. Useful in renumbering code. diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 08a28e22025..6bc444487f8 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -182,9 +182,8 @@ void RemoveUnnecessaryZeroing(const Nnet &nnet, continue; // nothing to do. if (computation->commands[allocate_command].command_type != kAllocMatrixZeroed) { - KALDI_ASSERT(computation->commands[allocate_command].command_type == - kAllocMatrixUndefined); - continue; // already leaving it undefined, so nothing to do. + continue; // already leaving it undefined, or it's an input, so nothing + // to do. } std::vector variables_for_matrix; a.variables.AppendVariablesForMatrix(matrix_index, &variables_for_matrix); @@ -283,7 +282,8 @@ void RemoveUnnecessaryAllocation(const Nnet &nnet, if (command.command_type == kAllocMatrixZeroed || command.command_type == kAllocMatrixUndefined || command.command_type == kDeallocMatrix) { - int32 m = command.arg1, num_rows = computation->matrices[m].num_rows, + int32 s = command.arg1, m = computation->submatrices[s].matrix_index, + num_rows = computation->matrices[m].num_rows, num_cols = computation->matrices[m].num_cols, num_cols_mod = num_cols * ( computation->matrices[m].stride_type == kDefaultStride ? 1 : -1); @@ -457,6 +457,15 @@ void Optimize(const NnetOptimizeOptions &config, if (GetVerboseLevel() >= 4) CheckComputation(nnet, request, *computation, false); + + // The following is not configurable because it is necessary for + // the computation to run correctly (we do it after compilation too, + // but the operations may have been put out of order by + // other optimizations.) + ConsolidateIoOperations(nnet, computation); + + if (GetVerboseLevel() >= 4) + CheckComputation(nnet, request, *computation, false); } // ComputationRequests are distinguished by the names and indexes @@ -613,6 +622,82 @@ const NnetComputation* CachingOptimizingCompiler::Compile( return computation; } +/// Split the computation up into segments bounded internally by kNoOperationMarker. +/// For each segment, a pair of command-indexes (start, end) is output to the vector +/// 'segments', so the commands in the segment (not including kNoOperationMarker) +/// are numbered from start ... end - 1. +static void SplitComputationIntoSegments( + const NnetComputation &computation, + std::vector > *segments) { + + int32 num_commands = computation.commands.size(); + segments->clear(); + int32 cur_start = 0; + for (int32 c = 0; c < num_commands; c++) { + if (computation.commands[c].command_type == kNoOperationMarker) { + segments->push_back(std::pair(cur_start, c)); + cur_start = c + 1; + } + } + segments->push_back(std::pair(cur_start, num_commands)); +} + + +void ConsolidateIoOperations(const Nnet &nnet, + NnetComputation *computation) { + // These segments, represented as (start-index, end-index), + // are segments of the computation separated by kNoOperationMarker. + std::vector > segments; + SplitComputationIntoSegments(*computation, &segments); + + int32 num_commands = computation->commands.size(); + std::vector reordered_commands(num_commands); + // put kNoOperationMarker between all segments in the reordered commands. + for (size_t s = 0; s + 1 < segments.size(); s++) + reordered_commands[segments[s].second].command_type = kNoOperationMarker; + + // for each segment we'll divide the commands up into those that must appear + // at the left (start) of the segment, those that must appear in the middle + // and those that must appear at the right (end). + std::vector left_commands, middle_commands, right_commands; + + for (size_t s = 0; s < segments.size(); s++) { + int32 segment_start = segments[s].first, + segment_end = segments[s].second; + left_commands.clear(); + middle_commands.clear(); + right_commands.clear(); + for (int32 c = segment_start; c < segment_end; c++) { + if (computation->commands[c].command_type == kProvideOutput && + nnet.IsInputNode(computation->commands[c].arg2)) { + right_commands.push_back(c); + } else if (computation->commands[c].command_type == kProvideOutput || + computation->commands[c].command_type == kAcceptInput) { + left_commands.push_back(c); + } else { + middle_commands.push_back(c); + } + } + std::vector::const_iterator iter = left_commands.begin(), + end = left_commands.end(); + int32 c = segment_start; + for (; iter != end; ++iter, ++c) + reordered_commands[c] = computation->commands[*iter]; + iter = middle_commands.begin(); + end = middle_commands.end(); + for (; iter != end; ++iter, ++c) + reordered_commands[c] = computation->commands[*iter]; + iter = right_commands.begin(); + end = right_commands.end(); + for (; iter != end; ++iter, ++c) + reordered_commands[c] = computation->commands[*iter]; + KALDI_ASSERT(c == segment_end); + } + computation->commands.swap(reordered_commands); +} + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index e04aff302c9..ea3a483f266 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -261,6 +261,15 @@ void RemoveUnnecessaryAllocation(const Nnet &nnet, NnetComputation *computation); +/// This optimization puts the I/O operations (kAcceptInput and kProvideOutput +/// at the very beginning or end of segments of computation. Specifically: +/// first the computation is broken up into segments delimited by kNoOperationMarker. +/// Then, for each segment, all I/O operations are moved to the start of the segment, +/// *except for* kProvideOutput for inpu nodes (where the network provides an +/// input-deriv), which is moved to the end of the segment. +void ConsolidateIoOperations(const Nnet &nnet, + NnetComputation *computation); + } // namespace nnet3 diff --git a/src/nnet3/nnet-training.cc b/src/nnet3/nnet-training.cc index 87d64e27871..2378c607ebf 100644 --- a/src/nnet3/nnet-training.cc +++ b/src/nnet3/nnet-training.cc @@ -52,7 +52,7 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config, KALDI_WARN << "Could not open cached computation. " "Probably this is the first training iteration."; } - } + } } @@ -68,10 +68,10 @@ void NnetTrainer::Train(const NnetExample &eg) { *nnet_, delta_nnet_); // give the inputs to the computer object. computer.AcceptInputs(*nnet_, eg.io); - computer.Forward(); + computer.Run(); this->ProcessOutputs(eg, &computer); - computer.Backward(); + computer.Run(); UpdateParamsWithMaxChange(); } @@ -276,7 +276,7 @@ bool ObjectiveFunctionInfo::PrintTotalStats(const std::string &name) const { << (tot_objf / tot_weight) << " over " << tot_weight << " frames."; } else { KALDI_LOG << "Overall average objective function for '" << name << "' is " - << objf << " + " << aux_objf << " = " << sum_objf + << objf << " + " << aux_objf << " = " << sum_objf << " over " << tot_weight << " frames."; } KALDI_LOG << "[this line is to be parsed by a script:] " @@ -290,7 +290,7 @@ NnetTrainer::~NnetTrainer() { Output ko(config_.write_cache, config_.binary_write_cache); compiler_.WriteCache(ko.Stream(), config_.binary_write_cache); KALDI_LOG << "Wrote computation cache to " << config_.write_cache; - } + } delete delta_nnet_; } @@ -324,7 +324,7 @@ void ComputeObjectiveFunction(const GeneralMatrix &supervision, CuMatrix output_deriv(output.NumRows(), output.NumCols(), kUndefined); cu_post.CopyToMat(&output_deriv); - computer->AcceptOutputDeriv(output_name, &output_deriv); + computer->AcceptInput(output_name, &output_deriv); } break; } @@ -335,7 +335,7 @@ void ComputeObjectiveFunction(const GeneralMatrix &supervision, *tot_weight = cu_post.Sum(); *tot_objf = TraceMatMat(output, cu_post, kTrans); if (supply_deriv) - computer->AcceptOutputDeriv(output_name, &cu_post); + computer->AcceptInput(output_name, &cu_post); break; } case kCompressedMatrix: { @@ -346,7 +346,7 @@ void ComputeObjectiveFunction(const GeneralMatrix &supervision, *tot_weight = cu_post.Sum(); *tot_objf = TraceMatMat(output, cu_post, kTrans); if (supply_deriv) - computer->AcceptOutputDeriv(output_name, &cu_post); + computer->AcceptInput(output_name, &cu_post); break; } } @@ -362,7 +362,7 @@ void ComputeObjectiveFunction(const GeneralMatrix &supervision, *tot_weight = diff.NumRows(); *tot_objf = -0.5 * TraceMatMat(diff, diff, kTrans); if (supply_deriv) - computer->AcceptOutputDeriv(output_name, &diff); + computer->AcceptInput(output_name, &diff); break; } default: diff --git a/src/nnet3/online-nnet3-decodable-simple.cc b/src/nnet3/online-nnet3-decodable-simple.cc index c93394dfebd..010dc80991a 100644 --- a/src/nnet3/online-nnet3-decodable-simple.cc +++ b/src/nnet3/online-nnet3-decodable-simple.cc @@ -204,7 +204,7 @@ void DecodableNnet3SimpleOnline::DoNnetComputation( ivector_feats_cu.Row(0).CopyFromVec(ivector); computer.AcceptInput("ivector", &ivector_feats_cu); } - computer.Forward(); + computer.Run(); CuMatrix cu_output; computer.GetOutputDestructive("output", &cu_output); // subtract log-prior (divide by prior) From 931723f5ab07d7de01fc3915b669a7830c3b397b Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 8 Oct 2016 17:14:16 -0400 Subject: [PATCH 003/530] Some bug fixes to previous commit (RE refactoring code in nnet3). --- src/nnet3/am-nnet-simple.h | 2 +- src/nnet3/nnet-computation.cc | 11 ++++- src/nnet3/nnet-computation.h | 2 +- src/nnet3/nnet-compute-test.cc | 3 +- src/nnet3/nnet-compute.cc | 2 +- src/nnet3/nnet-derivative-test.cc | 20 +++++---- src/nnet3/nnet-optimize-test.cc | 38 ++++++++--------- src/nnet3/nnet-optimize-utils.cc | 71 ++++++++++++++----------------- src/nnet3/nnet-optimize-utils.h | 15 ++++--- 9 files changed, 84 insertions(+), 80 deletions(-) diff --git a/src/nnet3/am-nnet-simple.h b/src/nnet3/am-nnet-simple.h index 5178c2a054d..c3d8301aa5a 100644 --- a/src/nnet3/am-nnet-simple.h +++ b/src/nnet3/am-nnet-simple.h @@ -94,7 +94,7 @@ class AmNnetSimple { /// This function works out the left_context_ and right_context_ variables /// from the network (it's a rather complex calculation). You should call /// this if you have structurally changed the nnet without calling SetNnet(), - /// e.g. using non-const GetNnet(). void SetContext(); + /// e.g. using non-const GetNnet(). void SetContext(); private: diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index 62d12c2e93f..6b6dc8328f5 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -1080,14 +1080,21 @@ NnetComputation& NnetComputation::operator = (const NnetComputation &other) { void NnetComputation::GetWholeSubmatrices( std::vector *whole_submatrices) const { - whole_submatrices->resize(matrices.size(), 0); - int32 num_submatrices = submatrices.size(); + int32 num_matrices = matrices.size(), + num_submatrices = submatrices.size(); + whole_submatrices->clear(); + whole_submatrices->resize(num_matrices, 0); for (int32 s = 1; s < num_submatrices; s++) { if (IsWholeMatrix(s)) { int32 m = submatrices[s].matrix_index; (*whole_submatrices)[m] = s; } } + for (int32 m = 1; m < num_matrices; m++) { + KALDI_ASSERT((*whole_submatrices)[m] != 0 && + "Matrix exists with no submatrix that is " + "the whole of it."); + } } diff --git a/src/nnet3/nnet-computation.h b/src/nnet3/nnet-computation.h index 6097b059d23..ba0eaada1a0 100644 --- a/src/nnet3/nnet-computation.h +++ b/src/nnet3/nnet-computation.h @@ -389,7 +389,7 @@ struct NnetComputation { // This function outputs a vector, indexed by matrix index, that gives you for // each matrix, the index of a submatrix which refers to the whole of that - // matrix (or 0 if there is no such submatrix, which should not happen). + // matrix; it makes sure that each matrix has such a submatrix. void GetWholeSubmatrices(std::vector *whole_submatrices) const; diff --git a/src/nnet3/nnet-compute-test.cc b/src/nnet3/nnet-compute-test.cc index f69d4d3036a..afe7da86dc1 100644 --- a/src/nnet3/nnet-compute-test.cc +++ b/src/nnet3/nnet-compute-test.cc @@ -172,7 +172,7 @@ void UnitTestNnetCompute() { int main() { using namespace kaldi; using namespace kaldi::nnet3; - SetVerboseLevel(4); + // SetVerboseLevel(4); for (kaldi::int32 loop = 0; loop < 2; loop++) { @@ -190,4 +190,3 @@ int main() { return 0; } - diff --git a/src/nnet3/nnet-compute.cc b/src/nnet3/nnet-compute.cc index b497e34aac4..7171e6b0273 100644 --- a/src/nnet3/nnet-compute.cc +++ b/src/nnet3/nnet-compute.cc @@ -501,7 +501,7 @@ int32 NnetComputer::GetIoMatrixIndex(const std::string &node_name, bool is_outpu // if you get the following error it will likely be a bug in the calling code, // or possibly due to giving the wrong egs. KALDI_ERR << "Could not " - << (is_output ? "provide output " : " accept input ") + << (is_output ? "provide output " : "accept input ") << "for network node " << node_name << " (it is not expected at this point in the computation)"; return 0; // Suppress compiler warnings; this line will never be reached. diff --git a/src/nnet3/nnet-derivative-test.cc b/src/nnet3/nnet-derivative-test.cc index 1f8aa7dcfec..0f5f2f6d54a 100644 --- a/src/nnet3/nnet-derivative-test.cc +++ b/src/nnet3/nnet-derivative-test.cc @@ -314,13 +314,6 @@ void UnitTestNnetInputDerivatives() { compute_opts.debug = true; computation.ComputeCudaIndexes(); - // the only reason we might need to provide the &nnet parameter is if the - // StoreStats() operation had been requested. We made sure no model update - // is being performed. - NnetComputer computer(compute_opts, - computation, - nnet, - &nnet); int32 num_directions = 3; // must be >= 1. Best if it's >1, will reduce // the probability of random failures. @@ -349,8 +342,18 @@ void UnitTestNnetInputDerivatives() { // Other passes are with various differently-perturbed versions of // the features. for (int32 pass = 0; pass <= num_directions + 1; pass++) { + // the only reason we might need to provide the &nnet parameter is if the + // StoreStats() operation had been requested. We made sure no model update + // is being performed. + NnetComputer computer(compute_opts, + computation, + nnet, + &nnet); + + // provide the input to the computations. for (size_t i = 0; i < request.inputs.size(); i++) { + CuMatrix temp(inputs[i]); if (pass > 0 && pass <= num_directions) { // Perturb the input randomly. delta_inputs[i].Resize(inputs[i].NumRows(), inputs[i].NumCols()); @@ -425,7 +428,7 @@ void UnitTestNnetInputDerivatives() { int main() { using namespace kaldi; using namespace kaldi::nnet3; - //SetVerboseLevel(2); + // SetVerboseLevel(4); for (kaldi::int32 loop = 0; loop < 2; loop++) { @@ -444,4 +447,3 @@ int main() { return 0; } - diff --git a/src/nnet3/nnet-optimize-test.cc b/src/nnet3/nnet-optimize-test.cc index 4d61f6f9f4a..97662acc556 100644 --- a/src/nnet3/nnet-optimize-test.cc +++ b/src/nnet3/nnet-optimize-test.cc @@ -138,26 +138,26 @@ static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config) { if (request.outputs[0].has_deriv) { computer.AcceptInput("output", &output_deriv); computer_opt.AcceptInput("output", &output_deriv_opt); - } - KALDI_LOG << "Running non-optimized backward computation"; - computer.Run(); - KALDI_LOG << "Running optimized backward computation"; - computer_opt.Run(); - for (size_t i = 0; i < request.inputs.size(); i++) { - if (request.inputs[i].has_deriv) { - const CuMatrixBase &in_deriv = - computer.GetOutput(request.inputs[i].name); - const CuMatrixBase &in_deriv_opt = - computer_opt.GetOutput(request.inputs[i].name); - KALDI_LOG << "Input-deriv sum for input '" << request.inputs[i].name - << "' (non-optimized) is " << in_deriv.Sum(); - KALDI_LOG << "Input-deriv sum for input '" << request.inputs[i].name - << "' (optimized) is " << in_deriv_opt.Sum(); - if (!ApproxEqual(in_deriv, in_deriv_opt)) { - KALDI_WARN << "Non-optimized and optimized versions of the " - << "computation give different input-derivs."; - return false; + KALDI_LOG << "Running non-optimized backward computation"; + computer.Run(); + KALDI_LOG << "Running optimized backward computation"; + computer_opt.Run(); + for (size_t i = 0; i < request.inputs.size(); i++) { + if (request.inputs[i].has_deriv) { + const CuMatrixBase &in_deriv = + computer.GetOutput(request.inputs[i].name); + const CuMatrixBase &in_deriv_opt = + computer_opt.GetOutput(request.inputs[i].name); + KALDI_LOG << "Input-deriv sum for input '" << request.inputs[i].name + << "' (non-optimized) is " << in_deriv.Sum(); + KALDI_LOG << "Input-deriv sum for input '" << request.inputs[i].name + << "' (optimized) is " << in_deriv_opt.Sum(); + if (!ApproxEqual(in_deriv, in_deriv_opt)) { + KALDI_WARN << "Non-optimized and optimized versions of the " + << "computation give different input-derivs."; + return false; + } } } } diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index 85a6c926bcd..de9d2b43a2b 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -919,7 +919,7 @@ int32 ModelUpdateConsolidator::ConsolidateSubmatrices( // submatrix numbered 'new_submatrix' the contents of the submatrix numbered // 'submatrices[i]'. Note: we hope that a later pass of optimization // (VariableMergingOptimization) will remove this redundant copy by - // having the operation that created it right directly to the location + // having the operation that created it write directly to the location // we want it to be. NnetComputation::Command c(kMatrixCopy, new_submatrix, submatrices[i]); extra_commands_[commands[i]].push_back(c); @@ -1123,8 +1123,8 @@ void DerivativeTimeLimiter::ModifyCommand(NnetComputation::Command *command) { command->arg5 = mapped_output_deriv_submatrix; command->arg6 = mapped_input_deriv_submatrix; } - } break; + } case kMatrixCopy: case kMatrixAdd: MapSimpleMatrixCommand(command); break; @@ -1162,7 +1162,7 @@ void DerivativeTimeLimiter::MapSimpleMatrixCommand(NnetComputation::Command *c) c->command_type = kNoOperation; return; } - // left_prune1 is the nmber of rows pruned away on the left for submatrix1. + // left_prune1 is the number of rows pruned away on the left for submatrix1. int32 orig_num_rows = computation_->submatrices[submatrix1].num_rows, left_prune1, left_prune2, right_prune1, right_prune2; GetPruneValues(submatrix1, submatrix1_mapped, &left_prune1, &right_prune1); @@ -1184,7 +1184,7 @@ void DerivativeTimeLimiter::MapSimpleMatrixCommand(NnetComputation::Command *c) } else { int32 num_rows = orig_num_rows - left_prune - right_prune; // note: the call NewSubMatrix effectively gives us a sub-matrix of a - // subm-matrix. + // sub-matrix. c->arg1 = computation_->NewSubMatrix(submatrix1, left_prune, num_rows, 0, -1); c->arg2 = computation_->NewSubMatrix(submatrix2, @@ -1394,7 +1394,7 @@ void DerivativeTimeLimiter::LimitDerivTimes() { max_deriv_time_ == std::numeric_limits::max()) return; // nothing to do. - EnsureMatricesHaveEntireSubmatrices(); + computation_->GetWholeSubmatrices(&whole_submatrices_); ComputeMatrixPruneInfo(); ComputeSubmatrixMaps(); ModifyCommands(); @@ -1403,20 +1403,6 @@ void DerivativeTimeLimiter::LimitDerivTimes() { RenumberComputation(computation_); } -void DerivativeTimeLimiter::EnsureMatricesHaveEntireSubmatrices() { - int32 num_matrices = computation_->matrices.size(), - num_submatrices = computation_->submatrices.size(); - entire_submatrix_.clear(); - entire_submatrix_.resize(num_matrices, -1); - entire_submatrix_[0] = 0; - for (int32 s = 1; s < num_submatrices; s++) - if (computation_->IsWholeMatrix(s)) - entire_submatrix_[computation_->submatrices[s].matrix_index] = s; - for (int32 m = 1; m < num_matrices; m++) - if (entire_submatrix_[m] == -1) - entire_submatrix_[m] = computation_->NewSubMatrix(m, 0, -1, 0, -1); -} - void DerivativeTimeLimiter::ComputeMatrixPruneInfo() { KALDI_ASSERT(computation_->matrix_debug_info.size() == computation_->matrices.size() && @@ -1517,20 +1503,20 @@ void DerivativeTimeLimiter::ModifyCommands() { // desired range are never accessed), and false otherwise. bool DerivativeTimeLimiter::CanLimitMatrix(const Analyzer &analyzer, int32 m) const { - int32 s_entire = entire_submatrix_[m]; // submatrix consisting of + int32 s_whole = whole_submatrices_[m]; // submatrix consisting of // all of the matrix. - int32 s_mapped = submatrix_map_[s_entire]; // the matrix limited in time. - KALDI_ASSERT(s_mapped != 0 && s_mapped != s_entire); - std::vector entire_variables, mapped_variables; - analyzer.variables.AppendVariablesForSubmatrix(s_entire, - &entire_variables); + int32 s_mapped = submatrix_map_[s_whole]; // the matrix limited in time. + KALDI_ASSERT(s_mapped != 0 && s_mapped != s_whole); + std::vector whole_variables, mapped_variables; + analyzer.variables.AppendVariablesForSubmatrix(s_whole, + &whole_variables); analyzer.variables.AppendVariablesForSubmatrix(s_mapped, &mapped_variables); - KALDI_ASSERT(entire_variables.size() > mapped_variables.size()); - std::vector excluded_variables(entire_variables.size() - + KALDI_ASSERT(whole_variables.size() > mapped_variables.size()); + std::vector excluded_variables(whole_variables.size() - mapped_variables.size()); std::vector::iterator end_iter = - std::set_difference(entire_variables.begin(), entire_variables.end(), + std::set_difference(whole_variables.begin(), whole_variables.end(), mapped_variables.begin(), mapped_variables.end(), excluded_variables.begin()); KALDI_ASSERT(end_iter == excluded_variables.end()); @@ -1579,15 +1565,24 @@ void DerivativeTimeLimiter::LimitMatrices(const std::vector &will_limit) { // rows to the left. submat_info.row_offset = new_row_begin; } else { - // This submatrix is not entirely the kept range of the matrix. - // We assume that this submatrix is never accessed directly (as when - // we modified the computation we ensured this). We - // give it a valid but stupid size of num-rows=1, num-cols=1, so - // that if it ever does get accessed it should produce an error. - submat_info.row_offset = 0; - submat_info.num_rows = 1; - submat_info.col_offset = 0; - submat_info.num_cols = 1; + // This submatrix is not entirely inside the kept range of the matrix. + // We assume that this submatrix is never accessed directly except (if + // it was the whole matrix) for in allocation and deallocation commands, + // since when we modified the computation we ensured this. + if (computation_->IsWholeMatrix(s)) { + // If it was the whole matrix then it may be used in allocation and + // deallocation commands, so we should modify it to be the whole of the + // new matrix, which will have fewer rows than before. + submat_info.num_rows = matrix_num_rows; + } else { + // We believe this matrix should never be used. We give it a valid + // but stupid size of num-rows=1, num-cols=1, so that if it ever does + // get accessed it should produce an error. + submat_info.row_offset = 0; + submat_info.num_rows = 1; + submat_info.col_offset = 0; + submat_info.num_cols = 1; + } } } } @@ -1614,7 +1609,7 @@ void DerivativeTimeLimiter::LimitMatrices(const std::vector &will_limit) { void DerivativeTimeLimiter::PruneMatrices() { Analyzer analyzer; analyzer.Init(nnet_, *computation_); - KALDI_ASSERT(computation_->matrices.size() == entire_submatrix_.size()); + KALDI_ASSERT(computation_->matrices.size() == whole_submatrices_.size()); int32 num_matrices = computation_->matrices.size(); std::vector will_limit(num_matrices, false); bool will_limit_at_least_one = false; diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index 95fdffc8656..05ce4bf8f41 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -362,6 +362,13 @@ class ComputationRenumberer { }; +// Class DerivativeTimeLimiter is used inside LimitDerivativeTimes(). +// Its function is to modify the computation so that we don't work +// with derivatives outside of a specified range of t values; this is +// useful, for instance, in BLSTMs where you might have a fair amount of +// left and right context in the training examples but don't want to +// propagate the derivatives to there. +// // We require that the computation have debug info set up // (!matrix_debug_info.empty()) and that this be the first // optimization you perform. This means that the debug_info will @@ -378,11 +385,6 @@ class DerivativeTimeLimiter { private: - // This command ensures that for each matrix m there is a corresponding - // submatrix that spans the entire matrix, and stores its index in - // entire_submatrix_[m]. - void EnsureMatricesHaveEntireSubmatrices(); - // sets up matrix_prune_info_. void ComputeMatrixPruneInfo(); @@ -478,7 +480,7 @@ class DerivativeTimeLimiter { // for each matrix index > 0, the index of a submatrix that consists of // the entirety of that matrix. - std::vector entire_submatrix_; + std::vector whole_submatrices_; std::vector matrix_prune_info_; @@ -590,4 +592,3 @@ void IdentifyIndexesRangesArgs(std::vector *commands, #endif - From 109b7e998834119a828bae9617da2547c1c30063 Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Mon, 10 Oct 2016 18:49:20 -0400 Subject: [PATCH 004/530] Further refactoring to nnet3 compilation to make it easier to implement online computation. --- src/nnet3/nnet-compile.cc | 182 +++++++----- src/nnet3/nnet-compile.h | 37 ++- src/nnet3/nnet-computation-graph.cc | 412 +++++++++++++++++----------- src/nnet3/nnet-computation-graph.h | 97 ++++--- src/nnet3/nnet-optimize.cc | 11 +- src/nnet3/nnet-optimize.h | 9 +- src/nnet3/nnet-utils.cc | 4 +- 7 files changed, 468 insertions(+), 284 deletions(-) diff --git a/src/nnet3/nnet-compile.cc b/src/nnet3/nnet-compile.cc index 461728eb479..8e70ecb4c4c 100644 --- a/src/nnet3/nnet-compile.cc +++ b/src/nnet3/nnet-compile.cc @@ -28,30 +28,66 @@ namespace nnet3 { Compiler::Compiler( const ComputationRequest &request, - const Nnet &nnet): request_(request), nnet_(nnet) { } + const Nnet &nnet): nnet_(nnet) { + requests_.push_back(&request); +} +Compiler::Compiler( + const std::vector &requests, + const Nnet &nnet): requests_(requests), nnet_(nnet) { + KALDI_ASSERT(requests_.size() >= 1); + // We are currently not supporting getting model derivatives for multi-segment + // (online) computations. + if (requests_.size() != 1) { + for (size_t i = 0; i < requests_.size(); i++) { + KALDI_ASSERT(!requests_[i]->need_model_derivative); + KALDI_ASSERT(requests_[i]->store_component_stats == + requests_[0]->store_component_stats); + } + } +} void Compiler::CreateComputation(const CompilerOptions &opts, NnetComputation *computation) { computation->Clear(); - ComputationGraphBuilder builder(nnet_, request_, &graph_); - builder.Compute(); - if (!builder.AllOutputsAreComputable()) { - builder.ExplainWhyAllOutputsNotComputable(); // prints logging info - KALDI_ERR << "Not all outputs were computable, cannot create computation."; + ComputationGraphBuilder builder(nnet_, &graph_); + for (size_t segment = 0; segment < requests_.size(); segment++) { + builder.Compute(*(requests_[segment])); + if (!builder.AllOutputsAreComputable()) { + builder.ExplainWhyAllOutputsNotComputable(); // prints logging info + KALDI_ERR << "Not all outputs were computable, cannot create computation."; + } + builder.Prune(); } - builder.Prune(); // see function declaration's comment for meaning of "phases". - std::vector > phases; - ComputeComputationPhases(nnet_, graph_, &phases); + std::vector > > phases_per_segment; + ComputeComputationPhases(nnet_, graph_, &phases_per_segment); std::vector > steps; - ComputeComputationSteps(nnet_, request_, phases, &graph_, &steps); - phases.clear(); + steps.reserve(1000); + + // maps each step to the segment in which it appears. in the normal case + // (non-online computation), a vector of all zeros. + std::vector step_to_segment; + + for (size_t segment = 0; segment < requests_.size(); segment++) { + std::vector > this_segment_steps; + ComputeComputationSteps(nnet_, *(requests_[segment]), + phases_per_segment[segment], &graph_, + &this_segment_steps); + for (size_t i = 0; i < this_segment_steps.size(); i++) { + steps.push_back(std::vector()); + steps.back().swap(this_segment_steps[i]); + step_to_segment.push_back(segment); + } + } + // TODO (?) check that the total num_cindexes in the steps in >= + // graph->cindexes.size(). could do it inside CreateLocationInfo(). + phases_per_segment.clear(); CreateLocationInfo(steps); std::vector deriv_needed; - ComputeDerivNeeded(steps, &deriv_needed); - CreateStepInfo(deriv_needed, &steps, computation); - AddCommands(deriv_needed, computation); + ComputeDerivNeeded(steps, step_to_segment, &deriv_needed); + CreateStepInfo(deriv_needed, step_to_segment, &steps, computation); + AddCommands(deriv_needed, step_to_segment, computation); // the following command reorders commands so kAcceptInput and kProvideOutput // appear in the desired places. ConsolidateIoOperations(nnet_, computation); @@ -60,8 +96,9 @@ void Compiler::CreateComputation(const CompilerOptions &opts, } void Compiler::AddCommands(const std::vector &deriv_needed, + const std::vector &step_to_segment, NnetComputation *computation) { - computation->need_model_derivative = request_.need_model_derivative; + computation->need_model_derivative = requests_[0]->need_model_derivative; int32 arbitrary_factor = 8; computation->commands.reserve(computation->matrices.size() * arbitrary_factor); @@ -69,51 +106,27 @@ void Compiler::AddCommands(const std::vector &deriv_needed, std::vector whole_submatrices; computation->GetWholeSubmatrices(&whole_submatrices); AllocateMatrices(whole_submatrices, computation); - SetUpPrecomputedIndexes(computation); + SetUpPrecomputedIndexes(step_to_segment, computation); int32 num_steps = steps_.size(); - for (int32 step = 0; step < num_steps; step++) + for (int32 step = 0; step < num_steps; step++) { DoForwardComputation(step, computation); + if (step + 1 < static_cast(step_to_segment.size()) && + step_to_segment[step + 1] != step_to_segment[step]) { + // insert a marker that separates segments of the computation. + computation->commands.push_back( + NnetComputation::Command(kNoOperationMarker)); + } + } - AddCommandsAfterPropagate(deriv_needed, computation); - - for (int32 step = num_steps - 1; step >= 0; step--) - if (deriv_needed[step]) - DoBackwardComputation(step, computation); - DeallocateMatrices(whole_submatrices, computation); -} - -void Compiler::AddCommandsAfterPropagate(const std::vector &deriv_needed, - NnetComputation *computation) { // mark the end of the forward phase. computation->commands.push_back( NnetComputation::Command(kNoOperationMarker)); - std::vector deriv_input_commands; + for (int32 step = num_steps - 1; step >= 0; step--) + if (deriv_needed[step]) + DoBackwardComputation(step, computation); - // We handle output nodes here-- add commands that relate to us providing - // outputs to the user; then, if applicable, we add commands to direct us to - // accept derivatives w.r.t. those outputs from the user. - int32 num_steps = steps_.size(); - for (int32 step = 0; step < num_steps; step++) { - const StepInfo &step_info = steps_[step]; - if (nnet_.IsOutputNode(step_info.node_index)) { - int32 node_index = step_info.node_index, - submatrix_index = step_info.value; - KALDI_ASSERT(computation->IsWholeMatrix(submatrix_index)); - NnetComputation::Command c(kProvideOutput, submatrix_index, node_index); - computation->commands.push_back(c); - if (deriv_needed[step]) { - int32 deriv_submatrix_index = step_info.deriv; - KALDI_ASSERT(deriv_submatrix_index > 0); - KALDI_ASSERT(computation->IsWholeMatrix(deriv_submatrix_index)); - NnetComputation::Command c(kAcceptInput, deriv_submatrix_index, node_index); - deriv_input_commands.push_back(c); - } - } - } - computation->commands.insert(computation->commands.end(), - deriv_input_commands.begin(), - deriv_input_commands.end()); + DeallocateMatrices(whole_submatrices, step_to_segment, computation); } @@ -153,7 +166,11 @@ void Compiler::ComputeStepDependencies( void Compiler::ComputeDerivNeeded( const std::vector > &steps, + const std::vector &step_to_segment, std::vector *deriv_needed) { + KALDI_ASSERT(steps.size() == step_to_segment.size() && + step_to_segment[0] == 0 && + step_to_segment.back() + 1 == requests_.size()); deriv_needed->clear(); int32 num_steps = steps.size(); deriv_needed->resize(num_steps, false); @@ -181,24 +198,26 @@ void Compiler::ComputeDerivNeeded( } // if this step is an input and the user requested the derivative w.r.t. that // input, we need the derivative. + const ComputationRequest &request = *(requests_[step_to_segment[step]]); + if (is_input) { - int32 input_index = request_.IndexForInput(node_name); + int32 input_index = request.IndexForInput(node_name); KALDI_ASSERT(input_index != -1); - if (request_.inputs[input_index].has_deriv) + if (request.inputs[input_index].has_deriv) (*deriv_needed)[step] = true; } // if this step is an output and the user is providing the derivative w.r.t. that // output, we need a place to store the derivative, so we set (*deriv_needed) to // true. if (nnet_.IsOutputNode(node_index)) { - int32 output_index = request_.IndexForOutput(node_name); + int32 output_index = request.IndexForOutput(node_name); KALDI_ASSERT(output_index != -1); - if (request_.outputs[output_index].has_deriv) + if (request.outputs[output_index].has_deriv) (*deriv_needed)[step] = true; } // If this is an updatable Component node and the user requested model // derivatives (e.g. during training), we need this step's derivative. - if (nnet_.IsComponentNode(node_index) && request_.need_model_derivative) { + if (nnet_.IsComponentNode(node_index) && request.need_model_derivative) { const NetworkNode &node = nnet_.GetNode(node_index); const Component *c = nnet_.GetComponent(node.u.component_index); if (c->Properties() & kUpdatableComponent) @@ -244,6 +263,7 @@ MatrixStrideType Compiler::GetStrideType(int32 node_index) const { // function destroys it. void Compiler::CreateStepInfo( const std::vector &deriv_needed, + const std::vector &step_to_segment, std::vector > *by_step, NnetComputation *computation) { KALDI_ASSERT(!by_step->empty()); @@ -252,6 +272,7 @@ void Compiler::CreateStepInfo( for (int32 step = 0; step < num_steps; step++) { StepInfo &this_info = steps_[step]; this_info.output_cindex_ids.swap((*by_step)[step]); + this_info.segment = step_to_segment[step]; int32 num_ids = this_info.output_cindex_ids.size(); this_info.output_indexes.resize(num_ids); for (int32 row_index = 0; row_index < num_ids; row_index++) @@ -336,11 +357,14 @@ void Compiler::CreateStepInfo( void Compiler::CreateLocationInfo( const std::vector > &by_step) { cindex_id_to_location_.clear(); - int32 num_cindex_ids = graph_.cindexes.size(); + int32 num_cindex_ids = graph_.cindexes.size(), + total_cindex_ids = 0; cindex_id_to_location_.resize(num_cindex_ids, std::pair(-1,-1)); int32 num_steps = by_step.size(); for (int32 step = 0; step < num_steps; step++) { + // output_cindex_ids is the cindex_ids that this step produces. const std::vector &output_cindex_ids = by_step[step]; + total_cindex_ids += output_cindex_ids.size(); int32 num_rows = output_cindex_ids.size(); for (int32 row = 0; row < num_rows; row++) { int32 cindex_id = output_cindex_ids[row]; @@ -354,6 +378,11 @@ void Compiler::CreateLocationInfo( cindex_id_to_location_[cindex_id] = std::pair(step, row); } } + // All cindex_ids in the graph must be present in a step, which is why + // we make the following assert. In general this will be with equality, + // but I believe there might be some weird edge cases, maybe involving + // kDimRange nodes, that would make this not true. [not 100% sure.] + KALDI_ASSERT(total_cindex_ids >= num_cindex_ids); } void Compiler::DoForwardComputation(int32 step, @@ -383,6 +412,17 @@ void Compiler::DoForwardComputationDescriptor( int32 num_parts = steps_[step].value_parts.size(); for (int32 part = 0; part < num_parts; part++) DoForwardComputationSumDescriptor(step, part, computation); + const StepInfo &step_info = steps_[step]; + if (nnet_.IsOutputNode(step_info.node_index)) { + // If the node is an output then we need to add commands to provide the + // output to the user, and possibly to get derivatives w.r.t. the output + // from the user. + int32 node_index = step_info.node_index, + submatrix_index = step_info.value; + KALDI_ASSERT(computation->IsWholeMatrix(submatrix_index)); + NnetComputation::Command c(kProvideOutput, submatrix_index, node_index); + computation->commands.push_back(c); + } } @@ -762,6 +802,15 @@ void Compiler::DoBackwardComputationFromIndexes( void Compiler::DoBackwardComputationDescriptor( int32 step, NnetComputation *computation) { StepInfo &step_info = steps_[step]; + if (nnet_.IsOutputNode(step_info.node_index) && + step_info.deriv > 0) { + int32 deriv_submatrix_index = step_info.deriv; + KALDI_ASSERT(computation->IsWholeMatrix(deriv_submatrix_index)); + NnetComputation::Command c(kAcceptInput, deriv_submatrix_index, + step_info.node_index); + computation->commands.push_back(c); + } + // the top-level descriptor has a bunch of parts that we concatenate features // over. int32 num_parts = step_info.value_parts.size(); @@ -834,7 +883,7 @@ void Compiler::AddForwardStepComponent(int32 step, output_submatrix_index); computation->commands.push_back(c); - if (request_.store_component_stats) { + if (requests_[0]->store_component_stats) { const Component *c = nnet_.GetComponent(node.u.component_index); if (c->Properties() & kStoresStats) { NnetComputation::Command c(kStoreStats, @@ -949,6 +998,7 @@ void Compiler::AllocateMatrices(const std::vector &whole_submatrices, void Compiler::SetUpPrecomputedIndexes( + const std::vector &step_to_segment, NnetComputation *computation) { int32 num_steps = steps_.size(); KALDI_ASSERT(computation->component_precomputed_indexes.empty()); @@ -969,9 +1019,10 @@ void Compiler::SetUpPrecomputedIndexes( const Component *component = nnet_.GetComponent(component_index); - bool need_derivs = request_.NeedDerivatives(); + const ComputationRequest &request = *(requests_[step_to_segment[step]]); + bool need_derivs = request.NeedDerivatives(); ComponentPrecomputedIndexes *precomputed_indexes = - component->PrecomputeIndexes(request_.misc_info, + component->PrecomputeIndexes(request.misc_info, input_indexes, output_indexes, need_derivs); if (precomputed_indexes == NULL) { @@ -986,8 +1037,8 @@ void Compiler::SetUpPrecomputedIndexes( } } - void Compiler::DeallocateMatrices(const std::vector &whole_submatrices, + const std::vector &step_to_segment, NnetComputation *computation) { // This adds the commands to destroy all the matrices- but not the // ones that might be needed as outputs of the computation. The ones that @@ -1000,6 +1051,7 @@ void Compiler::DeallocateMatrices(const std::vector &whole_submatrices, int32 num_steps = steps_.size(); for (int32 step = 0; step < num_steps; step++) { const StepInfo &step_info = steps_[step]; + const ComputationRequest &request = *(requests_[step_to_segment[step]]); if (nnet_.IsOutputNode(step_info.node_index)) { // steps corresponding to output nodes need to have their "value" kept. int32 value_matrix_index = @@ -1011,11 +1063,11 @@ void Compiler::DeallocateMatrices(const std::vector &whole_submatrices, // need to worry about whether outputs were requested, because if they // were not requested we would not be computing them in the first place). std::string input_name = nnet_.GetNodeNames()[step_info.node_index]; - int32 i = 0, num_inputs = request_.inputs.size(); + int32 i = 0, num_inputs = request.inputs.size(); bool has_deriv = false; for (; i < num_inputs; i++) { - if (input_name == request_.inputs[i].name) { - has_deriv = request_.inputs[i].has_deriv; + if (input_name == request.inputs[i].name) { + has_deriv = request.inputs[i].has_deriv; break; } } diff --git a/src/nnet3/nnet-compile.h b/src/nnet3/nnet-compile.h index 195ac36006a..8b9e738d251 100644 --- a/src/nnet3/nnet-compile.h +++ b/src/nnet3/nnet-compile.h @@ -43,14 +43,23 @@ struct CompilerOptions { /// nnet-optimize.h. class Compiler { public: + // Constructor that takes one computation request (this is the normal case). Compiler(const ComputationRequest &request, const Nnet &nnet); + // Constructor with a sequence of computation requests, for multiple + // computation segments (used when creating online computations). + Compiler(const std::vector &request, + const Nnet &nnet); + void CreateComputation(const CompilerOptions &opts, NnetComputation *computation); private: - const ComputationRequest &request_; + // requests_ is the sequence of computation requests, one for each segment; it + // will contain just one element in the normal case, but more when we're + // compiling a multi-segment / 'online' computation. + std::vector requests_; const Nnet &nnet_; ComputationGraph graph_; @@ -65,6 +74,11 @@ class Compiler { // if not used (note: index zero is reserved for the empty // matrix). + int32 segment; // normally 0 except for online/multi-segment computations, + // identifies the segment of which this step is a part (each + // segment in the sequence has a different + // ComputationRequest). + // precomputed_indexes_index is the index into the // component_precomputed_indexes array in the NnetComputation, or zero if // none needed. @@ -92,7 +106,7 @@ class Compiler { // backprop. std::vector > > > input_locations_list; - StepInfo(): node_index(-1), value(0), deriv(0), + StepInfo(): node_index(-1), value(0), deriv(0), segment(0), precomputed_indexes_index(0) { } }; @@ -112,12 +126,19 @@ class Compiler { // whether, for that step, we need to allocate the matrix of derivatives // (interpret this as being at the output of that step). This variable // also tells us whether we need to execute the backprop code for that step. + // 'steps' is a vector of steps; each step is a list of cindexes. + // 'step_to_segment', which should have the same dimension as 'steps', + // maps from step index to the segment it occurs in (only interesting + // for multi-segment/online computations). + // 'deriv_needed' will be given the same length as 'steps'. void ComputeDerivNeeded(const std::vector > &steps, + const std::vector &step_to_segment, std::vector *deriv_needed); // this sets up steps_, destroying the input "by_step" in the process. It // also sets various matrix and sub-matrix sizes in "computation". void CreateStepInfo(const std::vector &deriv_needed, + const std::vector &step_to_segment, std::vector > *by_step, NnetComputation *computation); @@ -155,7 +176,8 @@ class Compiler { // Sets up the precomputed indexes for each component, and sets the // precomputed_indexes_index value for each step. - void SetUpPrecomputedIndexes(NnetComputation *computation); + void SetUpPrecomputedIndexes(const std::vector &step_to_segment, + NnetComputation *computation); // Adds to "computation" the command(s) for the forward computation // for this step. @@ -294,19 +316,14 @@ class Compiler { // 'whole_submatrices' is as created by computation->GetWholeSubmatrices(), it // gives us the index of a submatrix containing the whole of each matrix. void DeallocateMatrices(const std::vector &whole_submatrices, + const std::vector &step_to_segment, NnetComputation *computation); // sets up the debug_info member of "computation". void OutputDebugInfo(NnetComputation *computation) const; - - // this function, called from AddCommands, adds the output and input - // commands that happen after the forward pass and before the backward - // pass. - void AddCommandsAfterPropagate(const std::vector &deriv_needed, - NnetComputation *computation); - void AddCommands(const std::vector &deriv_needed, + const std::vector &step_to_segment, NnetComputation *computation); }; diff --git a/src/nnet3/nnet-computation-graph.cc b/src/nnet3/nnet-computation-graph.cc index 43427fb39e4..cf43ca9f804 100644 --- a/src/nnet3/nnet-computation-graph.cc +++ b/src/nnet3/nnet-computation-graph.cc @@ -54,52 +54,72 @@ int32 ComputationGraph::GetCindexId(const Cindex &cindex) const { } -void ComputationGraph::Renumber(const std::vector &keep) { - int32 num_cindex_ids = cindexes.size(); - KALDI_ASSERT(keep.size() == num_cindex_ids); - ComputationGraph temp_graph; - std::vector old2new(num_cindex_ids, -1), new2old; - new2old.reserve(num_cindex_ids); - for (int32 j = 0; j < num_cindex_ids; j++) { +void ComputationGraph::Renumber(int32 start_cindex_id, + const std::vector &keep) { + int32 old_num_cindex_ids = cindexes.size(); + KALDI_ASSERT(keep.size() == old_num_cindex_ids - start_cindex_id); + // count_before_renumbering is the number of cindex_ids >= start_cindex_id, + // before renumbering. + int32 count_before_renumbering = old_num_cindex_ids - start_cindex_id; + std::vector old2new(count_before_renumbering, -1), new2old; + new2old.reserve(old_num_cindex_ids); + for (int32 j = 0; j < count_before_renumbering; j++) { if (keep[j]) { - old2new[j] = new2old.size(); - new2old.push_back(j); + old2new[j] = new2old.size() + start_cindex_id; + new2old.push_back(j + start_cindex_id); } } - int32 new_num_cindex_ids = new2old.size(); - if (new_num_cindex_ids == num_cindex_ids) { + // count_after_renumbering is the number of cindex_ids >= start_cindex_id, + // after renumbering. + int32 count_after_renumbering = new2old.size(), + new_num_cindex_ids = start_cindex_id + count_after_renumbering; + if (count_after_renumbering == count_before_renumbering) { // this is an optimization for when we are not deleting any // cindex-ids. return; } - temp_graph.cindexes.resize(new_num_cindex_ids); - temp_graph.is_input.resize(new_num_cindex_ids); - temp_graph.dependencies.resize(new_num_cindex_ids); - for (int32 c = 0; c < new_num_cindex_ids; c++) { - int32 d = new2old[c]; - temp_graph.cindexes[c] = cindexes[d]; - temp_graph.is_input[c] = is_input[d]; - temp_graph.dependencies[c].reserve(dependencies[d].size()); + + for (int32 old_cindex_id = start_cindex_id; + old_cindex_id < old_num_cindex_ids; old_cindex_id++) { + int32 new_cindex_id = old2new[old_cindex_id - start_cindex_id]; + Cindex &cindex = cindexes[old_cindex_id]; + if (new_cindex_id == -1) { + cindex_to_cindex_id_.erase(cindex); + } else if (new_cindex_id != old_cindex_id) { + cindex_to_cindex_id_[cindex] = new_cindex_id; + } + } + + std::vector temp; + for (int32 c = start_cindex_id; c < new_num_cindex_ids; c++) { + int32 d = new2old[c - start_cindex_id]; + // note: d >= c, which is why we do not overwrite data here. + KALDI_PARANOID_ASSERT(d >= c); + cindexes[c] = cindexes[d]; + is_input[c] = is_input[d]; + // if c == d, we need to create a temporary copy. + const std::vector &src_dependencies = + (c == d ? (temp = dependencies[d]) : dependencies[d]); std::vector::const_iterator - iter = dependencies[d].begin(), end = dependencies[d].end(); + iter = src_dependencies.begin(), end = src_dependencies.end(); + dependencies[c].clear(); for (; iter != end; ++iter) { - int32 old_dep = *iter, new_dep = old2new[old_dep]; - if (new_dep != -1) - temp_graph.dependencies[c].push_back(new_dep); - else - KALDI_ERR << "Dependency on nonexistent cindex-id"; + int32 old_dep = *iter; + if (old_dep < start_cindex_id) { + dependencies[c].push_back(old_dep); + } else { + int32 new_dep = old2new[old_dep - start_cindex_id]; + if (new_dep != -1) + dependencies[c].push_back(new_dep); + else + KALDI_ERR << "Dependency on nonexistent cindex-id"; + } } } - // at this point, rather than setting up cindex_to_cindex_id_ on the temporary - // graph, we copy cindexes, is_input and dependencies to this graph, and then - // set up cindex_to_cindex_id_ locally. - cindexes.swap(temp_graph.cindexes); - is_input.swap(temp_graph.is_input); - dependencies.swap(temp_graph.dependencies); - cindex_to_cindex_id_.clear(); - for (int32 c = 0; c < new_num_cindex_ids; c++) - cindex_to_cindex_id_[cindexes[c]] = c; + cindexes.resize(new_num_cindex_ids); + is_input.resize(new_num_cindex_ids); + dependencies.resize(new_num_cindex_ids); } void ComputationGraphBuilder::PrintCindexId(std::ostream &os, @@ -229,17 +249,17 @@ void ComputationGraphBuilder::AddCindexId(int32 cindex_id, void ComputationGraphBuilder::AddInputs() { int32 num_added = 0; - for (int32 i = 0; i < request_.inputs.size(); i++) { - int32 n = nnet_.GetNodeIndex(request_.inputs[i].name); + for (int32 i = 0; i < request_->inputs.size(); i++) { + int32 n = nnet_.GetNodeIndex(request_->inputs[i].name); if (n == -1) KALDI_ERR << "Network has no input with name " - << request_.inputs[i].name; + << request_->inputs[i].name; NodeType t = nnet_.GetNode(n).node_type; KALDI_ASSERT((t == kInput || t == kComponent) && "Inputs to graph only allowed for Input and Component nodes."); - for (int32 j = 0; j < request_.inputs[i].indexes.size(); j++) { - Cindex cindex(n, request_.inputs[i].indexes[j]); + for (int32 j = 0; j < request_->inputs[i].indexes.size(); j++) { + Cindex cindex(n, request_->inputs[i].indexes[j]); bool is_input = true, is_new; int32 cindex_id = graph_->GetCindexId(cindex, is_input, &is_new); KALDI_ASSERT(is_new && "Input index seems to be listed more than once"); @@ -252,13 +272,13 @@ void ComputationGraphBuilder::AddInputs() { void ComputationGraphBuilder::AddOutputs() { int32 num_added = 0; - for (int32 i = 0; i < request_.outputs.size(); i++) { - int32 n = nnet_.GetNodeIndex(request_.outputs[i].name); + for (int32 i = 0; i < request_->outputs.size(); i++) { + int32 n = nnet_.GetNodeIndex(request_->outputs[i].name); if (n == -1) KALDI_ERR << "Network has no output with name " - << request_.outputs[i].name; - for (int32 j = 0; j < request_.outputs[i].indexes.size(); j++) { - Cindex cindex(n, request_.outputs[i].indexes[j]); + << request_->outputs[i].name; + for (int32 j = 0; j < request_->outputs[i].indexes.size(); j++) { + Cindex cindex(n, request_->outputs[i].indexes[j]); bool is_input = false, is_new; int32 cindex_id = graph_->GetCindexId(cindex, is_input, &is_new); KALDI_ASSERT(is_new && "Output index seems to be listed more than once"); @@ -328,7 +348,7 @@ void ComputationGraphBuilder::ExplainWhyAllOutputsNotComputable() const { KALDI_LOG << num_not_computable << " output cindexes out of " << num_outputs_total << " were not computable."; std::ostringstream os; - request_.Print(os); + request_->Print(os); KALDI_LOG << "Computation request was: " << os.str(); if (num_not_computable > num_print) KALDI_LOG << "Printing the reasons for " << num_print << " of these."; @@ -392,7 +412,7 @@ void ComputationGraphBuilder::PruneDependencies(int32 cindex_id) { // in the set of inputs to the component that are computable. IndexSet index_set(*graph_, computable_info_, node_id - 1, dont_care); std::vector used_indexes; - bool ans = c->IsComputable(request_.misc_info, index, index_set, + bool ans = c->IsComputable(request_->misc_info, index, index_set, &used_indexes); // If the next assert fails it could be a failure in the assumption that // making more inputs available will never change something from not being @@ -429,8 +449,23 @@ void ComputationGraphBuilder::PruneDependencies(int32 cindex_id) { dependencies.swap(used_cindex_ids); } -void ComputationGraphBuilder::Compute() { - KALDI_ASSERT(current_distance_ == -1 && "Compute() called twice?"); +ComputationGraphBuilder::ComputationGraphBuilder( + const Nnet &nnet, + ComputationGraph *graph): + nnet_(nnet), request_(NULL), graph_(graph), + current_distance_(-1) { + KALDI_ASSERT(graph_->cindexes.empty() && + "ComputationGraphBuilder initialized with nonempty graph."); +} + + +void ComputationGraphBuilder::Compute(const ComputationRequest &request) { + if (request_ != NULL && graph_->segment_ends.empty()) { + // this check is relevant to multi-segment (i.e. online) computations. + KALDI_ERR << "You are calling things in the wrong order: should be " + << "Compute(), Prune(), Compute, Prune(), ..."; + } + request_ = &request; AddInputs(); AddOutputs(); // sets current_distance_ to 0. // max_distance for debugging, to detect infinite recursion. @@ -449,7 +484,9 @@ void ComputationGraphBuilder::Compute() { if (current_distance_ == max_distance) KALDI_ERR << "Loop detected while building computation graph (bad " << "network topology?)"; - Check(); + + if (RandInt(1, 2 * (graph_->segment_ends.size() + 1)) == 1) + Check(); } @@ -531,30 +568,52 @@ void ComputationGraphBuilder::Check() const { } void ComputationGraphBuilder::Prune() { + // Since Prune() is called for each segment in turn [note: there + // will be only 1 segment in the normal non-online case], we + // only prune for the current, just-added segment. + int32 start_cindex_id = (graph_->segment_ends.empty() ? 0 : + graph_->segment_ends.back()); int32 num_cindex_ids = graph_->cindexes.size(); // Prune the dependencies to just those that are used (to remove // optional dependencies that don't end up getting used). - for (int32 cindex_id = 0; cindex_id < num_cindex_ids; cindex_id++) + for (int32 cindex_id = start_cindex_id; + cindex_id < num_cindex_ids; cindex_id++) PruneDependencies(cindex_id); - depend_on_this_.clear(); // not valid any more after pruning dependencies. + // the following clears the elements of depend_on_this from start_cindex_id to + // num_cindex_ids - 1, without touching the entire array. + depend_on_this_.resize(start_cindex_id); + depend_on_this_.resize(num_cindex_ids); std::vector required; - ComputeRequiredArray(&required); + ComputeRequiredArray(start_cindex_id, &required); - std::vector keep(num_cindex_ids, false); - for (int32 c = 0; c < num_cindex_ids; c++) { - if (required[c] || graph_->is_input[c]) { + std::vector keep(num_cindex_ids - start_cindex_id, false); + for (int32 c = start_cindex_id; c < num_cindex_ids; c++) { + if (required[c - start_cindex_id] || graph_->is_input[c]) { KALDI_ASSERT(computable_info_[c] == kComputable && "You are calling Prune when not everything is computable."); - keep[c] = true; + keep[c - start_cindex_id] = true; } } - graph_->Renumber(keep); - // The following variables will not be valid any more after the renumbering, - // so clear them. - computable_info_.clear(); - computable_queue_.clear(); - usable_count_.clear(); + graph_->Renumber(start_cindex_id, keep); + // We also need to renumber computable_info_ and usable_count_, which + // graph_->Renumber doesn't do for us, but we can make some shortcuts. We set + // all computable_info_ to kComputable because actually it all was kComputable + // (we checked when deciding what to keep); and we set the usable_count_ to 1 + // for all the cindex_ids we just added... this is not 100% accurate + // according to the way we defined usable_count_, but it prevents additional + // computation since it is > 0 (notice that IncrementUsableCount and + // DecrementUsableCount may do some work when the usable_count goes to zero or + // from zero. Anyway, the usable-count for these cindex_ids for those "older + // segments" is not critical. [this information only gets used if we process + // additional segments as part of the compilation of an online computation.] + int32 new_num_cindex_ids = graph_->cindexes.size(); + computable_info_.resize(start_cindex_id); + computable_info_.resize(new_num_cindex_ids, (char)kComputable); + usable_count_.resize(start_cindex_id); + usable_count_.resize(new_num_cindex_ids, 1); + KALDI_ASSERT(computable_queue_.empty()); + graph_->segment_ends.push_back(new_num_cindex_ids); } // Add cindex_ids that this cindex_id depends on. @@ -584,7 +643,7 @@ void ComputationGraphBuilder::AddDependencies(int32 cindex_id) { int32 c = node.u.component_index; const Component *component = nnet_.GetComponent(c); std::vector input_indexes; - component->GetInputIndexes(request_.misc_info, index, + component->GetInputIndexes(request_->misc_info, index, &input_indexes); input_cindexes.resize(input_indexes.size()); for (size_t i = 0; i < input_indexes.size(); i++) { @@ -690,14 +749,14 @@ ComputationGraphBuilder::ComputeComputableInfo(int32 cindex_id) const int32 input_node_id = node_id - 1; { IndexSet index_set(*graph_, computable_info_, input_node_id, false); - if (c->IsComputable(request_.misc_info, index, index_set, NULL)) { + if (c->IsComputable(request_->misc_info, index, index_set, NULL)) { // it's computable even without counting kUnknown inputs as computable // [treat_unknown_as_computable = false] -> definitely computable. return kComputable; } } IndexSet index_set2(*graph_, computable_info_, input_node_id, true); - if (!c->IsComputable(request_.misc_info, index, index_set2, NULL)) { + if (!c->IsComputable(request_->misc_info, index, index_set2, NULL)) { // it's not computable even when counting kUnknown inputs as computable // [treat_unknown_as_computable = true] -> definitely not computable. return kNotComputable; @@ -731,9 +790,9 @@ void ComputationGraphBuilder::GetComputableInfo( KALDI_ASSERT(!computable_info_.empty() && "You need to call this before Prune()!"); computable->clear(); - computable->resize(request_.outputs.size()); - for (size_t i = 0; i < request_.outputs.size(); i++) { - const IoSpecification &output = request_.outputs[i]; + computable->resize(request_->outputs.size()); + for (size_t i = 0; i < request_->outputs.size(); i++) { + const IoSpecification &output = request_->outputs[i]; int32 n = nnet_.GetNodeIndex(output.name); KALDI_ASSERT(n != -1); int32 size = output.indexes.size(); @@ -861,19 +920,26 @@ void ComputationGraphBuilder::BuildGraphOneIter() { } void ComputationGraphBuilder::ComputeRequiredArray( + int32 start_cindex_id, std::vector *required) const { int32 num_cindex_ids = graph_->cindexes.size(); + KALDI_ASSERT(num_cindex_ids >= start_cindex_id); KALDI_ASSERT(computable_info_.size() == num_cindex_ids); required->clear(); - required->resize(num_cindex_ids, false); + required->resize(num_cindex_ids - start_cindex_id, false); + + // would be bool, but indexing c++ bool may be slow. + std::vector is_output_node(nnet_.NumNodes()); + for (int32 n = 0; n < nnet_.NumNodes(); n++) + is_output_node[n] = (char)(nnet_.IsOutputNode(n) ? 1 : 0); std::vector queue; - for (int32 c = 0; c < num_cindex_ids; c++) { + for (int32 c = start_cindex_id; c < num_cindex_ids; c++) { // First put the output cindex_ids into the queue. int32 node_id = graph_->cindexes[c].first; - if (nnet_.IsOutputNode(node_id)) { - (*required)[c] = true; + if (is_output_node[node_id]) { + (*required)[c - start_cindex_id] = true; queue.push_back(c); } } @@ -885,16 +951,17 @@ void ComputationGraphBuilder::ComputeRequiredArray( end = dependencies.end(); for (; iter != end; ++iter) { int32 d = *iter; - if (!(*required)[d]){ - (*required)[d] = true; + if (!(*required)[d - start_cindex_id]){ + (*required)[d - start_cindex_id] = true; queue.push_back(d); } } } // just check that we don't have any cindex_ids which are required but have // usable_count_ == 0; this would indicate a bug somewhere. - for (int32 c = 0; c < num_cindex_ids; c++) - KALDI_ASSERT(!((*required)[c] && (usable_count_[c] == 0))); + for (int32 c = start_cindex_id; c < num_cindex_ids; c++) + KALDI_ASSERT(!((*required)[c - start_cindex_id] && + (usable_count_[c] == 0))); } @@ -956,27 +1023,27 @@ void AddInputToGraph(const ComputationRequest &request, /** This function outputs to dependencies_subset[c], for each cindex_id c, the subset of elements d of graph.dependencies[c] such that - cindex_id_to_epoch[d] == cindex_id_to_epoch[c]. That is, it's + cindex_id_to_segment_and_epoch[d] == cindex_id_to_segment_and_epoch[c]. That is, it's the dependency graph of the entire computation, but removing - links that go from one epoch to another epoch. Topologically, - 'dependencies_subset' would therefor consist of a bunch of + links that go from one segment/epoch to another segment/epoch. Topologically, + 'dependencies_subset' would therefore consist of a bunch of disconnected graphs. */ static void ComputeDependenciesSubset( const ComputationGraph &graph, - const std::vector &cindex_id_to_epoch, + const std::vector &cindex_id_to_segment_and_epoch, std::vector > *dependencies_subset) { int32 num_cindex_ids = graph.cindexes.size(); - KALDI_ASSERT(cindex_id_to_epoch.size() == num_cindex_ids); + KALDI_ASSERT(cindex_id_to_segment_and_epoch.size() == num_cindex_ids); dependencies_subset->resize(num_cindex_ids); for (int32 cindex_id = 0; cindex_id < num_cindex_ids; cindex_id++) { - int32 phase_index = cindex_id_to_epoch[cindex_id]; + int32 phase_index = cindex_id_to_segment_and_epoch[cindex_id]; const std::vector &dependencies = graph.dependencies[cindex_id]; std::vector &dep_subset = (*dependencies_subset)[cindex_id]; int32 num_dep = dependencies.size(); for (int32 i = 0; i < num_dep; i++) { int32 d = dependencies[i]; - if (cindex_id_to_epoch[d] == phase_index) + if (cindex_id_to_segment_and_epoch[d] == phase_index) dep_subset.push_back(d); } } @@ -1000,27 +1067,27 @@ static void ComputeDependenciesSubset( /// /// \param nnet [in] The neural net /// \param graph [in] The computation graph -/// \param cindex_id_to_epoch [out] A vector that maps cindex_id to -/// epoch index, as obtained by adding one to the output of -/// ComputeNnetComputationOrder; however, input cindex_ids (those for -/// which is_input[cindex_id] is true) always map to 0. -/// Note: the epoch-index only depends on the neural network's -/// topology of nodes; a node in the network should always map to -/// the same epoch-index regardless of the computation, and -/// we assign cindexes to epochs just based on what node the -/// cindexes are part of. -/// \param epochs [out] The same information as -/// cindex_id_to_epoch, but in a different format: for each -/// epoch, a list of cindex_ids with that epoch index. -/// \param epoch_is_trivial [out] A vector of bool, indexed by -/// epoch index that's true if this epoch index corresponds -/// to just a single NetworkNode. (and also true for epoch index 0, -/// which corresponds only to inputs to the network). +/// \param cindex_id_to_segment_and_epoch [out] A vector that maps cindex_id to +/// a number that is the same if two cindex_ids are in the same +/// segment and same epoch, and different otherwise. This +/// number combines the segment index and the epoch index; the +/// details are not important to the calling code. +/// \param epochs_per_segment [out] This is a listing of all the +/// cindex_ids in the computation graph, divided up first +/// by segment and then by epoch. +/// \param epoch_is_trivial [out] A vector of bool, indexed by the epoch +/// index which is the same as the second index of +/// 'epochs_per_segment', that's true if this epoch index corresponds +/// to just a single NetworkNode (and also true for epoch indexes +/// corresponding to inputs to the network, which will be the first +/// epoch of each segment). This depends on the neural network +/// structure only. + static void ComputeEpochInfo( const Nnet &nnet, const ComputationGraph &graph, - std::vector *cindex_id_to_epoch, - std::vector > *epochs, + std::vector *cindex_id_to_segment_and_epoch, + std::vector > > *epochs_per_segment, std::vector *epoch_is_trivial) { // node_to_epoch maps each nnet node to an index >= 0 that tells us coarsely @@ -1041,10 +1108,14 @@ static void ComputeEpochInfo( node_to_epoch[i]++; int32 num_nodes = nnet.NumNodes(), num_cindex_ids = graph.cindexes.size(), + num_segments = graph.segment_ends.size(), num_epoch_indexes = 1 + *std::max_element(node_to_epoch.begin(), node_to_epoch.end()); KALDI_ASSERT(node_to_epoch.size() == num_nodes); + epochs_per_segment->clear(); + epochs_per_segment->resize(num_segments); + // epoch_to_num_nodes is only used so we know whether each epoch // index corresponds to multiple nodes; if it's just one node then we know // the computation is very simple and we can do an optimization. @@ -1057,15 +1128,24 @@ static void ComputeEpochInfo( KALDI_ASSERT(o == 0 || epoch_to_num_nodes[o] > 0); (*epoch_is_trivial)[o] = (epoch_to_num_nodes[o] <= 1); } - - cindex_id_to_epoch->resize(num_cindex_ids); - epochs->resize(num_epoch_indexes); - for (int32 cindex_id = 0; cindex_id < num_cindex_ids; cindex_id++) { - int32 node_index = graph.cindexes[cindex_id].first, - epoch_index = (graph.is_input[cindex_id] ? 0 : - node_to_epoch[node_index]); - (*cindex_id_to_epoch)[cindex_id] = epoch_index; - (*epochs)[epoch_index].push_back(cindex_id); + cindex_id_to_segment_and_epoch->resize(num_cindex_ids); + KALDI_ASSERT(graph.segment_ends.back() == num_cindex_ids); + int32 cur_segment_start = 0, cur_segment_end; + for (int32 segment = 0; segment < num_segments; segment++) { + cur_segment_end = graph.segment_ends[segment]; + std::vector > &epochs = (*epochs_per_segment)[segment]; + epochs.resize(num_epoch_indexes); + + for (int32 cindex_id = cur_segment_start; + cindex_id < cur_segment_end; cindex_id++) { + int32 node_index = graph.cindexes[cindex_id].first, + epoch_index = (graph.is_input[cindex_id] ? 0 : + node_to_epoch[node_index]); + (*cindex_id_to_segment_and_epoch)[cindex_id] = + epoch_index + segment * num_epoch_indexes; + epochs[epoch_index].push_back(cindex_id); + } + cur_segment_start = cur_segment_end; } } @@ -1168,6 +1248,14 @@ static int32 SumVectorSizes(const std::vector > &vec) { return ans; } +static int32 SumVectorSizes(const std::vector > > &vec) { + int32 ans = 0; + for (size_t i = 0; i < vec.size(); i++) + ans += SumVectorSizes(vec[i]); + return ans; +} + + /* this function is called from ComputeComputationPhases; it handles the part of the computation from one epoch (this code was broken out to avoid that @@ -1187,10 +1275,11 @@ static int32 SumVectorSizes(const std::vector > &vec) { in things like TDNNs. @param [in] dependencies_subset A subset of 'graph.dependencies' corresponding just to dependencies within the same epoch (not specifically - this epoch; for all epochs). E.g. for a cindex_id c + this epoch; for all epochs). In general, for a cindex_id c dependencies[c] is a list of other cindex_ids d1, d2, such that in order to compute c we must first compute - d1, d2 and so on. + d1, d2 and so on (plus d1, d2, etc. must be from the + same epoch as c). @param [in] depends_on_subset The graph-transpose of dependencies_subset; for cindex_id c, depends_on_subset[c] is the list of cindex_ids that directly depend on cindex_id c, @@ -1198,26 +1287,26 @@ static int32 SumVectorSizes(const std::vector > &vec) { @param [in] epoch_is_trivial A bool that's true if this epoch is trivial (meaning it consists of just one component)... this enables a faster code path in this common case. - @param [in,out] phase_indexes This vector, to some elements of which this function writes - each time it is called, maps from cindex_id to the - 'phase index'. A phase index is a number identifying - the phases [like coarse steps] of the computation, with - zero for the first phase, one for the second, etc. - We work out how many phase indexes have been used already - by previous epochs, from phases->size(). Actually, - phase_indexes is really just a temporary variable used - by this function, that we allocate outside this - function for efficiency. It is initialized to - -1 outside this function; different invocations of - this function work with different elements of the - vector. - @param [in,out] phases This is the output of this function. Each time - we add a new phase, we append a vector to *phases. - E.g. (*phases)[0] is the sorted list of cindexes - in the first phase of the computation... and so on. - Note, this function is called multiple times, and - each time we add one or more phases to this vector, - so its size grows. + @param [in,out] phase_indexes This vector, to some elements of which this + function writes each time it is called, maps from + cindex_id to the 'phase index'. A phase index is a + number identifying the phases [like coarse steps] of + the computation, with zero for the first phase, one + for the second, etc. We work out how many phase + indexes have been used already by previous epochs, + from phases->size(). Actually, phase_indexes is + really just a temporary variable used by this + function, that we allocate outside this function for + efficiency. It is initialized to -1 outside this + function; different invocations of this function work + with different non-overlapping elements of the vector. + @param [in,out] phases This is the output of this + function. Each time we add a new phase, we append a + vector to *phases. E.g. (*phases)[0] is the sorted + list of cindexes in the first phase of the + computation... and so on. Note, this function is + called multiple times, and each time we add one or + more phases to this vector, so its size grows. */ static inline void ComputeComputationPhasesForEpoch( const Nnet &nnet, @@ -1321,17 +1410,17 @@ static inline void ComputeComputationPhasesForEpoch( void ComputeComputationPhases( const Nnet &nnet, const ComputationGraph &graph, - std::vector > *phases) { + std::vector > > *phases_per_segment) { using namespace computation_graph; int32 num_cindex_ids = graph.cindexes.size(); - std::vector cindex_id_to_epoch; - std::vector > epochs; + std::vector cindex_id_to_segment_and_epoch; + std::vector > > epochs_per_segment; std::vector epoch_is_trivial; - ComputeEpochInfo(nnet, graph, &cindex_id_to_epoch, - &epochs, &epoch_is_trivial); + ComputeEpochInfo(nnet, graph, &cindex_id_to_segment_and_epoch, + &epochs_per_segment, &epoch_is_trivial); - KALDI_ASSERT(SumVectorSizes(epochs) == num_cindex_ids); + KALDI_ASSERT(SumVectorSizes(epochs_per_segment) == num_cindex_ids); // dependencies_subset contains just the subset of dependencies // of each cindex_id, that have the same epoch index as @@ -1339,8 +1428,10 @@ void ComputeComputationPhases( // cindexes within a certain epoch (relevant for things like // LSTMs). std::vector > dependencies_subset; - ComputeDependenciesSubset(graph, cindex_id_to_epoch, + ComputeDependenciesSubset(graph, cindex_id_to_segment_and_epoch, &dependencies_subset); + // destroy cindex_id_to_segment_and_epoch, it's no longer needed. + { std::vector temp; temp.swap(cindex_id_to_segment_and_epoch); } // depend_on_subset is a subset of the normal "depend_on" list (i.e. a list of // all cindex_ids that depend on the current cindex_id), limited to just those @@ -1348,31 +1439,32 @@ void ComputeComputationPhases( std::vector > depend_on_subset; ComputeGraphTranspose(dependencies_subset, &depend_on_subset); - int32 num_epoch_indexes = epoch_is_trivial.size(); + int32 num_epoch_indexes = epoch_is_trivial.size(), + num_segments = graph.segment_ends.size(); // "phase_indexes" is used inside ComputeComputationPhasesForEpoch. std::vector phase_indexes(num_cindex_ids, -1); - if (phases) { - phases->clear(); - phases->reserve(50); // minimize unnecessary copies. 50 is very - // arbitrarily chosen. + phases_per_segment->clear(); + phases_per_segment->resize(num_segments); + + for (int32 segment = 0; segment < num_segments; segment++) { + phases_per_segment->reserve(50); // minimize unnecessary copies. 50 is + // very arbitrarily chosen. + for (int32 epoch = 0; epoch < num_epoch_indexes; epoch++) + ComputeComputationPhasesForEpoch(nnet, graph, + epochs_per_segment[segment][epoch], + dependencies_subset, + depend_on_subset, + epoch_is_trivial[epoch], + &phase_indexes, + &((*phases_per_segment)[segment])); } - for (int32 epoch = 0; - epoch < num_epoch_indexes; - epoch++) - ComputeComputationPhasesForEpoch(nnet, graph, - epochs[epoch], - dependencies_subset, - depend_on_subset, - epoch_is_trivial[epoch], - &phase_indexes, phases); - // make sure everything was computable. If the next assert fails it's likely // a bug in this function or in PruneComputataionGraph. - KALDI_ASSERT(SumVectorSizes(*phases) == num_cindex_ids); + KALDI_ASSERT(SumVectorSizes(*phases_per_segment) == num_cindex_ids); } CindexSet::CindexSet(const ComputationGraph &graph): @@ -1835,7 +1927,6 @@ void ComputeComputationSteps( ComputationGraph *graph, std::vector > *steps) { using namespace compute_computation_steps; - steps->clear(); AddInputSteps(nnet, request, *graph, steps); { std::vector > component_steps; @@ -1847,16 +1938,9 @@ void ComputeComputationSteps( ReorderIndexes(nnet, request, *graph, steps); AddDimRangeSteps(nnet, graph, steps); AddOutputSteps(nnet, request, *graph, steps); - - int32 num_cindexes = 0; - for (int32 i = 0; i < steps->size(); i++) - num_cindexes += (*steps)[i].size(); - // The next line has ">=" not "==" because it is possible (although unlikely - // in normal setups) that some cindexes of Descriptors which are at the inputs - // of Components, - KALDI_ASSERT(num_cindexes >= graph->cindexes.size()); } + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-computation-graph.h b/src/nnet3/nnet-computation-graph.h index 660e20d36ad..41087123421 100644 --- a/src/nnet3/nnet-computation-graph.h +++ b/src/nnet3/nnet-computation-graph.h @@ -62,6 +62,23 @@ struct ComputationGraph { /// those that are used (which will vary depending on availability). std::vector > dependencies; + /// This variable is only of particular interest in a 'multi-segment' + /// computation, which is used while creating computations for 'online' + /// operation (for the kind of situation where you provide some input; run the + /// computation; get some output, provide some more input for larger 't' + /// values, etc.). In this context, a 'segment' is a continuous range of + /// cindex_ids, and a segment_end is one past the end of each segment, which + /// is the same as the beginning of the next segment, if there is one. In the + /// case of a fully-created computation graph with only one segment, this will + /// contain just one value which equals the number of cindex_ids. + /// This information is needed to correctly order the computation, because + /// + /// the computation graph itself does not contain dependencies that encode the + /// ordering of segments (and even if it did contain those dependencies, it's + /// not really compatible with the way we use the scc's in the graph structure + /// of the network to order the computation). + std::vector segment_ends; + /// Maps a Cindex to an integer cindex_id. If not present, then add it (with /// the corresponding "is_input" flag set to the value "input") and set /// *is_new to true. If present, set is_new to false and return the existing @@ -72,9 +89,12 @@ struct ComputationGraph { /// -1 if the Cindex is not present, and the user should check for this. int32 GetCindexId(const Cindex &cindex) const; - /// This function renumbers the cindex-ids, keeping only for which keep[c] is - /// true. The "keep" array must be the same size as this->cindexes. - void Renumber(const std::vector &keep); + /// This function renumbers the cindex-ids (but only those with index c >= start_cindex_id, + // keeping only for which keep[c - start_cindex_id] is + /// true. The "keep" array must be the same size as this->cindexes.size() - + /// start_cindex_id. + void Renumber(int32 start_cindex_id, + const std::vector &keep); /// This function, useful for debugging/visualization purposes, @@ -97,13 +117,18 @@ struct ComputationGraph { class ComputationGraphBuilder { public: ComputationGraphBuilder(const Nnet &nnet, - const ComputationRequest &request, - ComputationGraph *graph): - nnet_(nnet), request_(request), graph_(graph), current_distance_(-1) { } - - // Does the initial computation (populating the graph and computing - // whether each required cindex_id is computable), without the pruning. - void Compute(); + ComputationGraph *graph); + + // Does the initial computation (populating the graph and computing whether + // each required cindex_id is computable), without the pruning. In the normal + // case you call this just once with one 'request', but in the 'online' case + // you call Compute() [then maybe check AllOutputsAreComputable()] then + // Prune() multiple times, with a sequence of different requests for + // increasing time values. + // Note: it sets the class member request_ to the address of 'request', so + // you should not let 'request' go out of scope while this class might + // still use it (e.g. until you call Compute() with a different + void Compute(const ComputationRequest &request); // Returns true if all requested outputs are computable. To be called after // Compute() but before Prune((). @@ -211,14 +236,18 @@ class ComputationGraphBuilder { // PruneDependencies() to remove unused dependencies, so it will only say // something is required if it is really accessed in the computation. // We'll later use this to remove unnecessary cindexes. - void ComputeRequiredArray(std::vector *required) const; + // 'start_cindex_id' is the cindex_id from which the 'required' array is + // to start (normally zero, but may be nonzero in multi-segment computations); + // so 'required' is indexed by cindex_id - start_cindex_id. + void ComputeRequiredArray(int32 start_cindex_id, + std::vector *required) const; // this function, to be called from Compute(), does some sanity checks to // verify that the internal state is consistent. void Check() const; const Nnet &nnet_; - const ComputationRequest &request_; + const ComputationRequest *request_; ComputationGraph *graph_; // this is the transpose of graph_->dependencies; it tells us @@ -248,7 +277,7 @@ class ComputationGraphBuilder { std::vector usable_count_; // current_distance_ >= 0 is the distance to the output, of the cindex_ids in - // current_queue_; + // current_queue_. int32 current_distance_; // the cindex_ids in current_queue_ are at distance "current_distance" to the // output and have not yet had their dependencies processed. @@ -322,23 +351,29 @@ class IndexSet { @param [in] nnet The neural network this computation is for @param [in] graph The computation graph that we're computing phases for. - @param [out] phases The phases. Suppose the computation can be completed - in 20 phases, then phases->size() will be 20 at exit, and - (*phases)[0] will be a sorted list of cindex_ids. that - belong to the first phase, and so on. (Remember, a - cindex_id is an index into graph->cindexes; it compactly - identifies a cindex.) The sets represented by the - elements of 'phases' will be disjoint and will cover all - elements in [0 .. computation.cindexes.size() - 1]. - - This function will be crash if the computation cannot - actualy be computed. Note: we assume you have called - PruneComputationGraph() before this function. + @param [out] phases_per_segment The phases, listed separately for each + segment of the computation [there will be just one segment in + the normal case, more in the online-recognition case]. Consider + just one segment for now. Suppose the computation can be + completed in 20 phases, then (*phases)[0].size() will be 20 at + exit, and (*phases)[0][0] will be a sorted list of cindex_ids. + that belong to the first phase, and so on. (Remember, a + cindex_id is an index into graph->cindexes; it compactly + identifies a cindex.) The sets represented by the int32's in + 'phases_per_segment' will be disjoint and will cover all + elements in [0 .. computation.cindexes.size() - 1]. + + Note: we assume you have called PruneComputationGraph() before + this function. Even so, this function will be crash if the + computation cannot actually be computed-- there are some + mal-formed computations where you can build the computation graph + but not the ordering of cindexes because there are dependencies + forward and backward in time that intertwine. */ void ComputeComputationPhases( const Nnet &nnet, const ComputationGraph &computation_graph, - std::vector > *phases); + std::vector > > *phases_per_segment); /** @@ -351,9 +386,9 @@ void ComputeComputationPhases( - All cindex_ids within a given step correspond to the same node in the graph - All dependencies of cindex_ids within a given step have been computed in earlier steps. - . -There are also some extra, more obscure properties that the sequence of steps -must satisfy: + + There are also some extra, more obscure properties that the sequence of steps + must satisfy: - Any input or output in the ComputationRequest must be in one step, with the Indexes in the same order as specified in the ComputationRequest. (Note: inputs can be for nodes of type kComponent as well as kInput). @@ -366,8 +401,8 @@ must satisfy: Indexes appearing in the same order. (This lets us use a sub-matrix for the kDimRange node). -The reason why computation_graph is not provided as a const argument is -that in order to ensure the final property we may have to add a few new cindex_ids. + The reason why computation_graph is not provided as a const argument is that in + order to ensure the final property we may have to add a few new cindex_ids. */ void ComputeComputationSteps( const Nnet &nnet, diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 6bc444487f8..cb869ac65bf 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -657,8 +657,9 @@ void ConsolidateIoOperations(const Nnet &nnet, reordered_commands[segments[s].second].command_type = kNoOperationMarker; // for each segment we'll divide the commands up into those that must appear - // at the left (start) of the segment, those that must appear in the middle - // and those that must appear at the right (end). + // at the left of the segment (kAcceptInput for inputs and output-derivs), those + // that must appear in the middle (most commands), those that must appear + // on the right (kProvideOutput for output nodes and input derivatives). std::vector left_commands, middle_commands, right_commands; for (size_t s = 0; s < segments.size(); s++) { @@ -668,11 +669,9 @@ void ConsolidateIoOperations(const Nnet &nnet, middle_commands.clear(); right_commands.clear(); for (int32 c = segment_start; c < segment_end; c++) { - if (computation->commands[c].command_type == kProvideOutput && - nnet.IsInputNode(computation->commands[c].arg2)) { + if (computation->commands[c].command_type == kProvideOutput) { right_commands.push_back(c); - } else if (computation->commands[c].command_type == kProvideOutput || - computation->commands[c].command_type == kAcceptInput) { + } else if (computation->commands[c].command_type == kAcceptInput) { left_commands.push_back(c); } else { middle_commands.push_back(c); diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index ea3a483f266..29e76e624de 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -261,12 +261,9 @@ void RemoveUnnecessaryAllocation(const Nnet &nnet, NnetComputation *computation); -/// This optimization puts the I/O operations (kAcceptInput and kProvideOutput -/// at the very beginning or end of segments of computation. Specifically: -/// first the computation is broken up into segments delimited by kNoOperationMarker. -/// Then, for each segment, all I/O operations are moved to the start of the segment, -/// *except for* kProvideOutput for inpu nodes (where the network provides an -/// input-deriv), which is moved to the end of the segment. +/// This optimization puts the input operations (kAcceptInput) and output +/// operations (kProvideOutput) at the very beginning or end of segments of +/// computation, respectively. void ConsolidateIoOperations(const Nnet &nnet, NnetComputation *computation); diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index d65193d9a54..ff963208bfe 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -68,8 +68,8 @@ void EvaluateComputationRequest( const ComputationRequest &request, std::vector > *is_computable) { ComputationGraph graph; - ComputationGraphBuilder builder(nnet, request, &graph); - builder.Compute(); + ComputationGraphBuilder builder(nnet, &graph); + builder.Compute(request); builder.GetComputableInfo(is_computable); if (GetVerboseLevel() >= 2) { std::ostringstream graph_pretty; From f5a2731bc274620d69fb1f35c2151a86b0d95ec2 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 10 Oct 2016 21:02:54 -0400 Subject: [PATCH 005/530] Fix a few bugs shown up by valgrind testing --- src/chainbin/nnet3-chain-acc-lda-stats.cc | 4 +-- src/nnet3/nnet-compile-utils-test.cc | 34 ++++++++++--------- src/nnet3/nnet-component-test.cc | 14 ++++---- src/nnet3/nnet-computation.cc | 5 +++ src/nnet3bin/nnet3-acc-lda-stats.cc | 2 +- src/nnet3bin/nnet3-compute-from-egs.cc | 16 ++++----- .../nnet3-discriminative-compute-from-egs.cc | 17 ++++------ 7 files changed, 47 insertions(+), 45 deletions(-) diff --git a/src/chainbin/nnet3-chain-acc-lda-stats.cc b/src/chainbin/nnet3-chain-acc-lda-stats.cc index 3f092879b6e..b195f5ba1fb 100644 --- a/src/chainbin/nnet3-chain-acc-lda-stats.cc +++ b/src/chainbin/nnet3-chain-acc-lda-stats.cc @@ -54,7 +54,7 @@ class NnetChainLdaStatsAccumulator { NnetComputer computer(options, computation, nnet_, NULL); computer.AcceptInputs(nnet_, eg.inputs); - computer.Forward(); + computer.Run(); const CuMatrixBase &nnet_output = computer.GetOutput("output"); AccStatsFromOutput(eg, nnet_output); } @@ -202,5 +202,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/nnet3/nnet-compile-utils-test.cc b/src/nnet3/nnet-compile-utils-test.cc index e5c9e24cc46..53820abf32a 100644 --- a/src/nnet3/nnet-compile-utils-test.cc +++ b/src/nnet3/nnet-compile-utils-test.cc @@ -71,10 +71,10 @@ void UnitTestSplitLocationsBackward(bool verbose) { int32 minibatch_size = Rand() % 1024 + 100; int32 num_submat_indexes = Rand() % 10 + 1; int32 max_submat_list_size = Rand() % 10 + 1; - int32 min_num_kAddRows = Rand() % 2; // minimum number of kAddRows compatible + int32 min_num_kaddrows = Rand() % 2; // minimum number of kAddRows compatible // lists expected in the final split lists. This value will be used to // create input submat_lists so that this is guaranteed - max_submat_list_size = min_num_kAddRows + max_submat_list_size; + max_submat_list_size = min_num_kaddrows + max_submat_list_size; std::vector > all_pairs; all_pairs.reserve(minibatch_size * max_submat_list_size); @@ -95,8 +95,8 @@ void UnitTestSplitLocationsBackward(bool verbose) { num_locations : max_generated_submat_list_size; submat_lists[i].reserve(num_locations); for (int32 j = 0; j < num_locations; j++) { - if (j <= min_num_kAddRows) - // since we need min_num_kAddRows in the split_lists we ensure that + if (j <= min_num_kaddrows) + // since we need min_num_kaddrows in the split_lists we ensure that // we add a pair with the same first element in all the submat_lists submat_lists[i].push_back(std::make_pair(submat_indexes[j], Rand() % minibatch_size)); @@ -148,7 +148,7 @@ void UnitTestSplitLocationsBackward(bool verbose) { PrintVectorVectorPair(split_lists); KALDI_LOG << "==========================="; } - int32 num_kAddRows_in_output = 0; + int32 num_kaddrows_in_output = 0; int32 first_value; std::vector second_values; // ensure that elements in submat_lists are also present @@ -163,7 +163,7 @@ void UnitTestSplitLocationsBackward(bool verbose) { KALDI_ASSERT((split_lists[i][j].first == first_value) && (split_lists[i][j].second == second_values[j])); } - num_kAddRows_in_output++; + num_kaddrows_in_output++; } for (int32 j = 0; j < split_lists[i].size(); j++) { if (split_lists[i][j].first == -1) @@ -178,7 +178,7 @@ void UnitTestSplitLocationsBackward(bool verbose) { KALDI_ASSERT(all_pairs.size() == 0); // ensure that there are at least as many kAddRows compatible split_lists as // specified - KALDI_ASSERT(num_kAddRows_in_output >= min_num_kAddRows); + KALDI_ASSERT(num_kaddrows_in_output >= min_num_kaddrows); } @@ -276,10 +276,10 @@ void UnitTestSplitLocations(bool verbose) { int32 minibatch_size = Rand() % 1024 + 100; int32 num_submat_indexes = Rand() % 10 + 1; int32 max_submat_list_size = Rand() % 10 + 1; - int32 min_num_kAddRows = Rand() % 2; // minimum number of kAddRows compatible + int32 min_num_kaddrows = Rand() % 2; // minimum number of kAddRows compatible // lists expected in the final split lists. This value will be used to // create input submat_lists so that this is guaranteed - max_submat_list_size = min_num_kAddRows + max_submat_list_size; + max_submat_list_size = min_num_kaddrows + max_submat_list_size; std::vector > all_pairs; all_pairs.reserve(minibatch_size * max_submat_list_size); @@ -300,12 +300,14 @@ void UnitTestSplitLocations(bool verbose) { num_locations : max_generated_submat_list_size; submat_lists[i].reserve(num_locations); for (int32 j = 0; j < num_locations; j++) { - if (j <= min_num_kAddRows) - // since we need min_num_kAddRows in the split_lists we ensure that + // note from dan: I edited the following line to resolve a valgrind error + // but cannot really understand at this point what this code is doing. + if (j <= min_num_kaddrows && j < num_submat_indexes) { + // since we need min_num_kaddrows in the split_lists we ensure that // we add a pair with the same first element in all the submat_lists submat_lists[i].push_back(std::make_pair(submat_indexes[j], - Rand() % minibatch_size)); - + Rand() % minibatch_size)); + } submat_lists[i].push_back( std::make_pair(submat_indexes[Rand() % num_submat_indexes], Rand() % minibatch_size)); @@ -323,7 +325,7 @@ void UnitTestSplitLocations(bool verbose) { KALDI_LOG << "==========================="; KALDI_LOG << split_lists.size(); } - int32 num_kAddRows_in_output = 0; + int32 num_kaddrows_in_output = 0; int32 first_value; std::vector second_values; // ensure that elements in submat_lists are also present @@ -337,7 +339,7 @@ void UnitTestSplitLocations(bool verbose) { KALDI_ASSERT((split_lists[i][j].first == first_value) && (split_lists[i][j].second == second_values[j])); } - num_kAddRows_in_output++; + num_kaddrows_in_output++; } for (int32 j = 0; j < split_lists[i].size(); j++) { if (split_lists[i][j].first == -1) @@ -352,7 +354,7 @@ void UnitTestSplitLocations(bool verbose) { KALDI_ASSERT(all_pairs.size() == 0); // ensure that there are at least as many kAddRows compatible split_lists as // specified - KALDI_ASSERT(num_kAddRows_in_output >= min_num_kAddRows); + KALDI_ASSERT(num_kaddrows_in_output >= min_num_kaddrows); } } // namespace nnet2 diff --git a/src/nnet3/nnet-component-test.cc b/src/nnet3/nnet-component-test.cc index 3cc6af1c70d..1cb96563b77 100644 --- a/src/nnet3/nnet-component-test.cc +++ b/src/nnet3/nnet-component-test.cc @@ -25,9 +25,9 @@ namespace kaldi { namespace nnet3 { // Reset seeds for test time for RandomComponent static void ResetSeed(int32 rand_seed, const Component &c) { - RandomComponent *rand_component = + RandomComponent *rand_component = const_cast(dynamic_cast(&c)); - + if (rand_component != NULL) { srand(rand_seed); rand_component->ResetGenerator(); @@ -48,8 +48,10 @@ static bool StringsApproxEqual(const std::string &a, // if it's not the last digit in the string, goto fail if (pos + 1 != size && isdigit(a[pos+1])) goto fail; + if (pos == 0) + goto fail; size_t pos2; - for (pos2 = pos - 1; pos2 > 0; pos2--) { + for (pos2 = static_cast(pos) - 1; pos2 > 0; pos2--) { if (a[pos2] == '.') break; // we accept this difference: we went backwards and found a '.' if (!isdigit(a[pos2])) // we reject this difference: we went back and // found non-digit before '.' -> not floating @@ -198,7 +200,7 @@ void TestSimpleComponentPropagateProperties(const Component &c) { int32 properties = c.Properties(); Component *c_copy = NULL, *c_copy_scaled = NULL; int32 rand_seed = Rand(); - + if (RandInt(0, 1) == 0) c_copy = c.Copy(); // This will test backprop with an updatable component. if (RandInt(0, 1) == 0 && @@ -234,7 +236,7 @@ void TestSimpleComponentPropagateProperties(const Component &c) { if ((properties & kPropagateAdds) && (properties & kPropagateInPlace)) { KALDI_ERR << "kPropagateAdds and kPropagateInPlace flags are incompatible."; } - + ResetSeed(rand_seed, c); c.Propagate(NULL, input_data, &output_data1); @@ -327,7 +329,7 @@ bool TestSimpleComponentDataDerivative(const Component &c, output_deriv(num_rows, output_dim, kSetZero, output_stride_type); input_data.SetRandn(); output_deriv.SetRandn(); - + ResetSeed(rand_seed, c); c.Propagate(NULL, input_data, &output_data); diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index 6b6dc8328f5..af2267577cc 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -701,6 +701,11 @@ void NnetComputation::Read(std::istream &is, bool binary) { } + // delete any existing pointers in component_precomputed_indexes. + for (size_t i = 0; i < component_precomputed_indexes.size(); i++) + delete component_precomputed_indexes[i]; + component_precomputed_indexes.clear(); + size_t num_component_precomputed_indexes; ExpectToken(is, binary, ""); ReadBasicType(is, binary, &num_component_precomputed_indexes); diff --git a/src/nnet3bin/nnet3-acc-lda-stats.cc b/src/nnet3bin/nnet3-acc-lda-stats.cc index 0b3b537855e..c8911a4a39f 100644 --- a/src/nnet3bin/nnet3-acc-lda-stats.cc +++ b/src/nnet3bin/nnet3-acc-lda-stats.cc @@ -46,7 +46,7 @@ class NnetLdaStatsAccumulator { NnetComputer computer(options, computation, nnet_, NULL); computer.AcceptInputs(nnet_, eg.io); - computer.Forward(); + computer.Run(); const CuMatrixBase &nnet_output = computer.GetOutput("output"); AccStatsFromOutput(eg, nnet_output); } diff --git a/src/nnet3bin/nnet3-compute-from-egs.cc b/src/nnet3bin/nnet3-compute-from-egs.cc index 66eace0dab5..648b5e1408f 100644 --- a/src/nnet3bin/nnet3-compute-from-egs.cc +++ b/src/nnet3bin/nnet3-compute-from-egs.cc @@ -46,7 +46,7 @@ class NnetComputerFromEg { options.debug = true; NnetComputer computer(options, computation, nnet_, NULL); computer.AcceptInputs(nnet_, eg.io); - computer.Forward(); + computer.Run(); const CuMatrixBase &nnet_output = computer.GetOutput("output"); output->Resize(nnet_output.NumRows(), nnet_output.NumCols()); nnet_output.CopyToMat(output); @@ -54,7 +54,7 @@ class NnetComputerFromEg { private: const Nnet &nnet_; CachingOptimizingCompiler compiler_; - + }; } @@ -76,7 +76,7 @@ int main(int argc, char *argv[]) { "e.g.:\n" "nnet3-compute-from-egs --apply-exp=true 0.raw ark:1.egs ark:- | matrix-sum-rows ark:- ... \n" "See also: nnet3-compute\n"; - + bool binary_write = true, apply_exp = false; std::string use_gpu = "yes"; @@ -89,7 +89,7 @@ int main(int argc, char *argv[]) { "yes|no|optional|wait, only has effect if compiled with CUDA"); po.Read(argc, argv); - + if (po.NumArgs() != 3) { po.PrintUsage(); exit(1); @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) { #if HAVE_CUDA==1 CuDevice::Instantiate().SelectGpuId(use_gpu); #endif - + std::string nnet_rxfilename = po.GetArg(1), examples_rspecifier = po.GetArg(2), matrix_wspecifier = po.GetArg(3); @@ -109,10 +109,10 @@ int main(int argc, char *argv[]) { NnetComputerFromEg computer(nnet); int64 num_egs = 0; - + SequentialNnetExampleReader example_reader(examples_rspecifier); BaseFloatMatrixWriter matrix_writer(matrix_wspecifier); - + for (; !example_reader.Done(); example_reader.Next(), num_egs++) { Matrix output; computer.Compute(example_reader.Value(), &output); @@ -131,5 +131,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/nnet3bin/nnet3-discriminative-compute-from-egs.cc b/src/nnet3bin/nnet3-discriminative-compute-from-egs.cc index 7736290d1d5..d8b0f469beb 100644 --- a/src/nnet3bin/nnet3-discriminative-compute-from-egs.cc +++ b/src/nnet3bin/nnet3-discriminative-compute-from-egs.cc @@ -46,7 +46,7 @@ class NnetComputerFromEg { options.debug = true; NnetComputer computer(options, computation, nnet_, NULL); computer.AcceptInputs(nnet_, eg.io); - computer.Forward(); + computer.Run(); const CuMatrixBase &nnet_output = computer.GetOutput("output"); output->Resize(nnet_output.NumRows(), nnet_output.NumCols()); nnet_output.CopyToMat(output); @@ -54,7 +54,7 @@ class NnetComputerFromEg { private: const Nnet &nnet_; CachingOptimizingCompiler compiler_; - + }; } @@ -80,7 +80,7 @@ int main(int argc, char *argv[]) { "e.g.:\n" "nnet3-discriminative-compute-from-egs --apply-exp=true 0.raw ark:1.degs ark:- | matrix-sum-rows ark:- ... \n" "See also: nnet3-compute nnet3-compute-from-egs\n"; - + bool binary_write = true, apply_exp = false; std::string use_gpu = "yes"; @@ -93,7 +93,7 @@ int main(int argc, char *argv[]) { "yes|no|optional|wait, only has effect if compiled with CUDA"); po.Read(argc, argv); - + if (po.NumArgs() != 3) { po.PrintUsage(); exit(1); @@ -102,7 +102,7 @@ int main(int argc, char *argv[]) { #if HAVE_CUDA==1 CuDevice::Instantiate().SelectGpuId(use_gpu); #endif - + std::string nnet_rxfilename = po.GetArg(1), examples_rspecifier = po.GetArg(2), matrix_wspecifier = po.GetArg(3); @@ -113,10 +113,10 @@ int main(int argc, char *argv[]) { NnetComputerFromEg computer(nnet); int64 num_egs = 0; - + SequentialNnetDiscriminativeExampleReader example_reader(examples_rspecifier); BaseFloatMatrixWriter matrix_writer(matrix_wspecifier); - + for (; !example_reader.Done(); example_reader.Next(), num_egs++) { Matrix output; NnetExample eg; @@ -146,6 +146,3 @@ int main(int argc, char *argv[]) { return -1; } } - - - From 91ae41c184305b86579d75b8a137f574daa7ed04 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 15 Oct 2016 15:29:21 -0400 Subject: [PATCH 006/530] Refactoring generation of the computation 'steps' for more clarity and to allow online computation. Add a basic test for multi-segment computation. --- src/nnet3/nnet-compile-test.cc | 62 ++- src/nnet3/nnet-compile.cc | 59 +-- src/nnet3/nnet-compile.h | 3 - src/nnet3/nnet-computation-graph.cc | 778 +++++++++++++++------------- src/nnet3/nnet-computation-graph.h | 159 +++++- src/nnet3/nnet-test-utils.cc | 2 + src/nnet3/nnet-test-utils.h | 2 + 7 files changed, 630 insertions(+), 435 deletions(-) diff --git a/src/nnet3/nnet-compile-test.cc b/src/nnet3/nnet-compile-test.cc index d405fd0f5fa..c0e1b6f8b5b 100644 --- a/src/nnet3/nnet-compile-test.cc +++ b/src/nnet3/nnet-compile-test.cc @@ -28,7 +28,6 @@ namespace nnet3 { void UnitTestNnetCompile() { for (int32 n = 0; n < 20; n++) { struct NnetGenerationOptions gen_config; - std::vector configs; GenerateConfigSequence(gen_config, &configs); Nnet nnet; @@ -56,6 +55,66 @@ void UnitTestNnetCompile() { } } + +// this tests compilation where there are more than one +// computation-request... this is to test some of the +// low-level utilities that will be used in online computation. +void UnitTestNnetCompileMulti() { + for (int32 n = 0; n < 20; n++) { + struct NnetGenerationOptions gen_config; + gen_config.allow_use_of_x_dim = false; + + std::vector configs; + GenerateConfigSequence(gen_config, &configs); + Nnet nnet; + for (size_t j = 0; j < configs.size(); j++) { + KALDI_LOG << "Input config[" << j << "] is: " << configs[j]; + std::istringstream is(configs[j]); + nnet.ReadConfig(is); + } + + ComputationRequest request1, request2; + std::vector > inputs1, inputs2; + ComputeExampleComputationRequestSimple(nnet, &request1, &inputs1); + ComputeExampleComputationRequestSimple(nnet, &request2, &inputs2); + + + KALDI_LOG << "Computation request 1 is:"; + request1.Print(std::cerr); + KALDI_LOG << "Computation request 2 is:"; + request2.Print(std::cerr); + + std::vector requests; + request2.store_component_stats = request1.store_component_stats; + request1.need_model_derivative = false; + request2.need_model_derivative = false; + requests.push_back(&request1); + requests.push_back(&request2); + + // set all the x indexes to 1 for request 2 (they would otherwise + // be zero). This ensures that there is no overlap + // between the inputs and outputs on the two requests. + for (int32 i = 0; i < request2.inputs.size(); i++) + for (int32 j = 0; j < request2.inputs[i].indexes.size(); j++) + request2.inputs[i].indexes[j].x = 1; + for (int32 i = 0; i < request2.outputs.size(); i++) + for (int32 j = 0; j < request2.outputs[i].indexes.size(); j++) + request2.outputs[i].indexes[j].x = 1; + + + NnetComputation computation; + Compiler compiler(requests, nnet); + + CompilerOptions opts; + compiler.CreateComputation(opts, &computation); + + std::ostringstream os; + computation.Print(os, nnet); + KALDI_LOG << "Generated computation is: " << os.str(); + } +} + + } // namespace nnet3 } // namespace kaldi @@ -65,6 +124,7 @@ int main() { // SetVerboseLevel(2); UnitTestNnetCompile(); + UnitTestNnetCompileMulti(); KALDI_LOG << "Nnet tests succeeded."; diff --git a/src/nnet3/nnet-compile.cc b/src/nnet3/nnet-compile.cc index 8e70ecb4c4c..ab4ea9917e3 100644 --- a/src/nnet3/nnet-compile.cc +++ b/src/nnet3/nnet-compile.cc @@ -69,21 +69,23 @@ void Compiler::CreateComputation(const CompilerOptions &opts, // (non-online computation), a vector of all zeros. std::vector step_to_segment; - for (size_t segment = 0; segment < requests_.size(); segment++) { - std::vector > this_segment_steps; - ComputeComputationSteps(nnet_, *(requests_[segment]), - phases_per_segment[segment], &graph_, - &this_segment_steps); - for (size_t i = 0; i < this_segment_steps.size(); i++) { - steps.push_back(std::vector()); - steps.back().swap(this_segment_steps[i]); - step_to_segment.push_back(segment); + + { + ComputationStepsComputer steps_computer(nnet_, &graph_, &steps, + &cindex_id_to_location_); + + for (size_t segment = 0; segment < requests_.size(); segment++) { + steps_computer.ComputeForSegment(*(requests_[segment]), + phases_per_segment[segment]); + while (step_to_segment.size() < steps.size()) + step_to_segment.push_back(segment); + + // save memory, by deleting the phases we just consumed. + std::vector > temp; + phases_per_segment[segment].swap(temp); } + steps_computer.Check(); } - // TODO (?) check that the total num_cindexes in the steps in >= - // graph->cindexes.size(). could do it inside CreateLocationInfo(). - phases_per_segment.clear(); - CreateLocationInfo(steps); std::vector deriv_needed; ComputeDerivNeeded(steps, step_to_segment, &deriv_needed); CreateStepInfo(deriv_needed, step_to_segment, &steps, computation); @@ -354,37 +356,6 @@ void Compiler::CreateStepInfo( } } -void Compiler::CreateLocationInfo( - const std::vector > &by_step) { - cindex_id_to_location_.clear(); - int32 num_cindex_ids = graph_.cindexes.size(), - total_cindex_ids = 0; - cindex_id_to_location_.resize(num_cindex_ids, std::pair(-1,-1)); - int32 num_steps = by_step.size(); - for (int32 step = 0; step < num_steps; step++) { - // output_cindex_ids is the cindex_ids that this step produces. - const std::vector &output_cindex_ids = by_step[step]; - total_cindex_ids += output_cindex_ids.size(); - int32 num_rows = output_cindex_ids.size(); - for (int32 row = 0; row < num_rows; row++) { - int32 cindex_id = output_cindex_ids[row]; - if (cindex_id_to_location_[cindex_id].first != -1) { - int32 node_id = graph_.cindexes[cindex_id].first; - if (nnet_.GetNode(node_id).node_type != kDescriptor || - nnet_.GetNode(node_id + 1).node_type != kComponent) - KALDI_ERR << "Cindexes may appear in >1 step only if they are " - "Descriptors for Component inputs: code error."; - } - cindex_id_to_location_[cindex_id] = std::pair(step, row); - } - } - // All cindex_ids in the graph must be present in a step, which is why - // we make the following assert. In general this will be with equality, - // but I believe there might be some weird edge cases, maybe involving - // kDimRange nodes, that would make this not true. [not 100% sure.] - KALDI_ASSERT(total_cindex_ids >= num_cindex_ids); -} - void Compiler::DoForwardComputation(int32 step, NnetComputation *computation) const { KALDI_ASSERT(step < static_cast(steps_.size())); diff --git a/src/nnet3/nnet-compile.h b/src/nnet3/nnet-compile.h index 8b9e738d251..36fcf84fbf1 100644 --- a/src/nnet3/nnet-compile.h +++ b/src/nnet3/nnet-compile.h @@ -110,9 +110,6 @@ class Compiler { precomputed_indexes_index(0) { } }; - // this sets up cindex_id_to_location_. - void CreateLocationInfo(const std::vector > &by_step); - // Computes the set of step-indexes of preceding steps that this step depends // on. Assumes CreateLocationInfo() has already been called. Requires // 'step_index' only to handle a special case, that if 'this_step' is a diff --git a/src/nnet3/nnet-computation-graph.cc b/src/nnet3/nnet-computation-graph.cc index cf43ca9f804..422a14bfe4c 100644 --- a/src/nnet3/nnet-computation-graph.cc +++ b/src/nnet3/nnet-computation-graph.cc @@ -465,6 +465,7 @@ void ComputationGraphBuilder::Compute(const ComputationRequest &request) { KALDI_ERR << "You are calling things in the wrong order: should be " << "Compute(), Prune(), Compute, Prune(), ..."; } + int32 cur_segment_start = graph_->cindexes.size(); request_ = &request; AddInputs(); AddOutputs(); // sets current_distance_ to 0. @@ -474,7 +475,7 @@ void ComputationGraphBuilder::Compute(const ComputationRequest &request) { BuildGraphOneIter(); // only check rarely if we're running at low verbose level. if (GetVerboseLevel() >= 3 || RandInt(1, (current_distance_ + 1)) == 1) - Check(); + Check(cur_segment_start); // TODO: come up with a scheme to delay when we call // UpdateAllComputableInfo(). UpdateAllComputableInfo(); @@ -486,13 +487,13 @@ void ComputationGraphBuilder::Compute(const ComputationRequest &request) { << "network topology?)"; if (RandInt(1, 2 * (graph_->segment_ends.size() + 1)) == 1) - Check(); + Check(cur_segment_start); } -void ComputationGraphBuilder::Check() const { +void ComputationGraphBuilder::Check(int32 start_cindex_id) const { int32 num_cindex_ids = graph_->cindexes.size(); - for (int32 cindex_id = 0; cindex_id < num_cindex_ids; + for (int32 cindex_id = start_cindex_id; cindex_id < num_cindex_ids; cindex_id += 1 + RandInt(0, num_cindex_ids / 100)) { { // check depend_on_this. std::vector depend_on_this = depend_on_this_[cindex_id]; @@ -513,12 +514,16 @@ void ComputationGraphBuilder::Check() const { KALDI_ASSERT(IsSortedAndUniq(dependencies)); for (size_t j = 0; j < size; j++) { int32 dep_cindex_id = dependencies[j]; - // make sure appears in appropriate depend_on_this_ array. - const std::vector &dep = depend_on_this_[dep_cindex_id]; - KALDI_ASSERT(std::count(dep.begin(), dep.end(), cindex_id) == 1); + if (dep_cindex_id >= start_cindex_id) { + // make sure appears in appropriate depend_on_this_ array. + const std::vector &dep = depend_on_this_[dep_cindex_id]; + KALDI_ASSERT(std::count(dep.begin(), dep.end(), cindex_id) == 1); + } } } - { // check usable_count_. + + { + // check usable_count_ int32 node_index = graph_->cindexes[cindex_id].first; int32 usable_count = usable_count_[cindex_id], usable_count_recomputed = nnet_.IsOutputNode(node_index) ? 1 : 0; @@ -1521,423 +1526,452 @@ bool IndexSet::operator () (const Index &index) const { } +ComputationStepsComputer::ComputationStepsComputer( + const Nnet &nnet, + ComputationGraph *graph, + std::vector > *steps, + std::vector > *locations): + nnet_(nnet), graph_(graph), steps_(steps), locations_(locations) { + steps_->clear(); + locations_->clear(); + int32 num_cindexes = graph_->cindexes.size(); + // leave a little space in case a few cindexes are added (unlikely + // but could happen with dim-range nodes). + locations_->reserve(num_cindexes + num_cindexes / 10); + locations_->resize(num_cindexes, std::pair(-1, -1)); +} -namespace compute_computation_steps { -// namespace for some helper functions for ComputeComputationSteps. - -/// Adds a "step" for each of the inputs in the ComputationRequest. -/// Does this in the same order in which they were declared in -/// the request (this order won't matter at all). -/// returns the total number of cindex_ids that correspond to inputs. -int32 AddInputSteps(const Nnet &nnet, - const ComputationRequest &request, - const ComputationGraph &graph, - std::vector > *steps) { - KALDI_ASSERT(steps->empty()); - steps->reserve(50); // will minimize unnecessary copies of vectors. - unordered_set all_nodes; // to make sure nothing is listed twice. - int32 num_cindex_ids = 0; - for (int32 i = 0; i < request.inputs.size(); i++) { - int32 n = nnet.GetNodeIndex(request.inputs[i].name); - if (n == -1) - KALDI_ERR << "Network has no output with name " - << request.inputs[i].name; - // ensure no input node is listed twice. - KALDI_ASSERT(all_nodes.count(n) == 0 && "Invalid computation request: " - "double listing of node."); - all_nodes.insert(n); - KALDI_ASSERT(!request.inputs[i].indexes.empty() && - "Computation request had no indexes for input "); - steps->push_back(std::vector()); - std::vector &this_step = steps->back(); - this_step.resize(request.inputs[i].indexes.size()); - for (int32 j = 0; j < request.inputs[i].indexes.size(); j++) { - Cindex cindex(n, request.inputs[i].indexes[j]); - int32 cindex_id = graph.GetCindexId(cindex); - KALDI_ASSERT(cindex_id != -1); // would be code error. - this_step[j] = cindex_id; +void ComputationStepsComputer::ComputeForSegment( + const ComputationRequest &request, + const std::vector > &phases) { + int32 this_num_phases = phases.size(); + for (int32 i = 0; i < this_num_phases; i++) { + std::vector > sub_phases; + SplitIntoSubPhases(phases[i], &sub_phases); + for (size_t j = 0; j < sub_phases.size(); j++) { + ProcessSubPhase(request, sub_phases[j]); } - num_cindex_ids += request.inputs[i].indexes.size(); } - return num_cindex_ids; } +void ComputationStepsComputer::ProcessInputOrOutputStep( + const ComputationRequest &request, + bool is_output, + const std::vector &sub_phase) { + int32 io_node = sub_phase[0].first; + if (is_output){ + KALDI_ASSERT(nnet_.IsOutputNode(io_node)); + } else { + KALDI_ASSERT(nnet_.IsInputNode(io_node)); + } + std::string node_name = nnet_.GetNodeName(io_node); + const std::vector &inputs_or_outputs = + (is_output ? request.outputs : request.inputs); + int32 io_index = -1; + for (size_t i = 0; i < inputs_or_outputs.size(); i++) + if (inputs_or_outputs[i].name == node_name) + io_index = i; + KALDI_ASSERT(io_index >= 0); + const std::vector &io_indexes = inputs_or_outputs[io_index].indexes; + std::vector io_cindexes(io_indexes.size()); + for (size_t i = 0, size = io_cindexes.size(); i < size; i++) { + io_cindexes[i].first = io_node; + io_cindexes[i].second = io_indexes[i]; + } + KALDI_ASSERT(io_cindexes.size() == sub_phase.size()); + // we expect the list of cindexes in 'io_cindexes' to be identical to + // that in 'sub_phase' (but they don't have to be in the same order)... for now we check the size, we'll spot-check + // that they are the same later. + // The actual output in 'steps' must be in the same order as + int32 step_index = AddStep(io_cindexes); + // Now spot-check that the cindexes in 'sub_phase' are the same as those + // we just added. [note: they don't have to be in the same order, but + // they should be the same set.] + for (size_t i = 0; i < sub_phase.size(); i += 10) { + const Cindex &cindex = sub_phase[i]; + int32 cindex_id = graph_->GetCindexId(cindex); + KALDI_ASSERT(cindex_id >= 0 && (*locations_)[cindex_id].first == step_index); + } +} -/// Adds a "step" for each of the outputs in the ComputationRequest. This will -/// be done after adding steps for all the inputs and then all the -/// non(input/output)s. Does this in the same order in which they were declared -/// in the request (this won't matter at all). -void AddOutputSteps(const Nnet &nnet, - const ComputationRequest &request, - const ComputationGraph &graph, - std::vector > *steps) { - std::set all_nodes; // to make sure nothing listed twice. - for (int32 i = 0; i < request.outputs.size(); i++) { - int32 n = nnet.GetNodeIndex(request.outputs[i].name); - if (n == -1) - KALDI_ERR << "Network has no output with name " - << request.outputs[i].name; - // ensure no output node is listed twice. - KALDI_ASSERT(all_nodes.count(n) == 0 && "Invalid computation request: " - "double listing of node."); - all_nodes.insert(n); - KALDI_ASSERT(!request.outputs[i].indexes.empty() && - "Computation request had no indexes for output "); - steps->push_back(std::vector()); - std::vector &this_step = steps->back(); - this_step.resize(request.outputs[i].indexes.size()); - for (int32 j = 0; j < request.outputs[i].indexes.size(); j++) { - Cindex cindex(n, request.outputs[i].indexes[j]); - int32 cindex_id = graph.GetCindexId(cindex); - KALDI_ASSERT(cindex_id != -1); // would be code error. - this_step[j] = cindex_id; +int32 ComputationStepsComputer::AddStep(const std::vector &cindexes, + bool add_if_absent) { + int32 step_index = steps_->size(); + steps_->push_back(std::vector()); + std::vector &step = steps_->back(); // vector of cindex_id. + step.resize(cindexes.size()); + size_t row_index = 0; + std::vector::const_iterator iter = cindexes.begin(), + end = cindexes.end(); + std::vector::iterator out_iter = step.begin(); + std::pair *locations = &((*locations_)[0]); + if (!add_if_absent) { + // this version of GetCindexId will not add CindexIds. + for (; iter != end; ++iter, ++out_iter, ++row_index) { + int32 cindex_id = graph_->GetCindexId(*iter); + *out_iter = cindex_id; + locations[cindex_id].first = step_index; + locations[cindex_id].second = row_index; + } + } else { + for (; iter != end; ++iter, ++out_iter, ++row_index) { + bool is_input = false; // only relevant if we have to add the cindex to + // the computation graph, which we won't for + // inputs (we only might for dim-range nodes). + bool added; + int32 cindex_id = graph_->GetCindexId(*iter, is_input, &added); + *out_iter = cindex_id; + if (added) { + KALDI_ASSERT(cindex_id == static_cast(locations_->size())); + locations_->resize(cindex_id + 1); + locations_->back().first = step_index; + locations_->back().second = row_index; + locations = &((*locations_)[0]); // in case it was reallocated + } else { + locations[cindex_id].first = step_index; + locations[cindex_id].second = row_index; + } } } + return step_index; } -/// Convert the cindex_ids in the vector "cindex_ids" to cindexes, but only -/// keeping those that correspond to nodes of type kComponent. -/// Asserts that none of these cindexes have the "is_input" set to true. -/// [this is possible because we call this only for phases >1, and inputs -/// should not be there.] -static void ExtractOnlyComponentCindexes(const std::vector &cindex_ids, - const ComputationGraph &graph, - const Nnet &nnet, - std::vector *cindexes) { - cindexes->clear(); - cindexes->reserve(cindex_ids.size()); + +int32 ComputationStepsComputer::AddStep(std::vector *cindex_ids) { + int32 step_index = steps_->size(); + KALDI_ASSERT(!cindex_ids->empty()); + steps_->push_back(std::vector()); + steps_->back().swap(*cindex_ids); + std::vector::const_iterator iter = steps_->back().begin(), + end = steps_->back().end(); + int32 row_index = 0; + std::pair *locations = &((*locations_)[0]); + size_t num_cindexes = graph_->cindexes.size(); + for (; iter != end; ++iter, ++row_index) { + int32 cindex_id = *iter; + KALDI_ASSERT(static_cast(cindex_id) < num_cindexes); + locations[cindex_id].first = step_index; + locations[cindex_id].second = row_index; + } + return step_index; +} + + +void ComputationStepsComputer::ConvertToCindexes( + const std::vector &cindex_ids, + std::vector *cindexes) const { + cindexes->resize(cindex_ids.size()); + size_t num_cindexes = graph_->cindexes.size(); std::vector::const_iterator iter = cindex_ids.begin(), - end = cindex_ids.end(); - for (; iter != end; ++iter) { + end = cindex_ids.end(); + std::vector::iterator out_iter = cindexes->begin(); + for (; iter != end; ++iter, ++out_iter) { int32 cindex_id = *iter; - const Cindex &cindex = graph.cindexes[cindex_id]; - if (nnet.IsComponentNode(cindex.first)) { - KALDI_ASSERT(!graph.is_input[cindex_id]); - cindexes->push_back(cindex); - } + KALDI_ASSERT(static_cast(cindex_id) < num_cindexes); + *out_iter = graph_->cindexes[cindex_id]; } } -/// Outputs into component_steps, steps corresponding to all Cindexes that -/// correspond to Component nodes and that are not inputs to the network. (note -/// that a Cindex for a Component node that's provided as an input to the -/// network is not case we anticipate being common, but it's possible in the -/// framework). Note, a step is just a list of cindex_ids that can all be computed -/// at the same time. -static void AddComponentSteps( - const Nnet &nnet, - const ComputationGraph &graph, - const std::vector > &phases, - std::vector > *component_steps) { - int32 num_phase_indexes = phases.size(); - - std::vector cindexes; - - // We don't include phase_index = 0, because all inputs to the network - // (whether the node index is type kInput or kComponent) will be assigned to - // phase_index 0, and no non-inputs should be there (we checked this). - for (int32 phase_index = 1; phase_index < num_phase_indexes; phase_index++) { - ExtractOnlyComponentCindexes(phases[phase_index], graph, nnet, &cindexes); - - // now "cindexes" contains all Cindexes that are from Component nodes (and - // we have made sure that none of these are being provided as inputs). - // Sorting this array gives us the ordering we want, where Cindexes from - // different node-ids are separated into contiguous ranges, and within each - // range, they are sorted by Index. - std::sort(cindexes.begin(), cindexes.end()); - - std::vector::iterator iter = cindexes.begin(), end = cindexes.end(); - while (iter != end) { - // each pass through this while loop processes one batch of cindex_ids; - // each batch has a particular node-index. - std::vector::iterator cur_end = iter; - int32 this_node_id = iter->first; - while (cur_end != end && cur_end->first == this_node_id) - cur_end++; - // the range [iter, cur_end) is nonempty and contains all the same node-id. - int32 size = cur_end - iter; - component_steps->push_back(std::vector()); - std::vector &this_step = component_steps->back(); - this_step.resize(size); - for (int32 i = 0; i < size; i++, iter++) - this_step[i] = graph.GetCindexId(*iter); - KALDI_ASSERT(iter == cur_end); - // at this point iter will point to either the end of the "cindexes" - // vector, or the beginning of the next set of Cindexes to process. - } + +void ComputationStepsComputer::ConvertToCindexIds( + const std::vector &cindexes, + std::vector *cindex_ids) const { + cindex_ids->resize(cindexes.size()); + std::vector::const_iterator iter = cindexes.begin(), + end = cindexes.end(); + std::vector::iterator out_iter = cindex_ids->begin(); + for (; iter != end; ++iter, ++out_iter) { + int32 cindex_id = graph_->GetCindexId(*iter); + KALDI_ASSERT(cindex_id >= 0); + *out_iter = cindex_id; } } -/// You call this function after calling AddInputSteps to add steps for inputs -/// to "all_steps", then calling AddComponentSteps to output steps for -/// components to "component_steps". This function moves the component steps -/// from "component_steps" to "all_steps", while preceding each component step -/// with a corresponding step for setting up the input to that component (i.e. a -/// step for the preceding Descriptor). The reason we do it like this is (a) to -/// ensure that the step for the input to the Component, which comes from a -/// Descriptor, comes immediately before it, which is convenient; and (b) -/// because it's possible in certain rather weird setups, some Cindexes -/// corresponding to the Descriptors at the inputs of Components will end up -/// being listed in two separate steps; and if we added the input-descriptor -/// steps using the same mechanism as AddComponentSteps, we wouldn't be able to -/// correctly capture this duplication. -static void AddComponentInputSteps( - const ComputationGraph &graph, - std::vector > *component_steps, - std::vector > *all_steps) { +// static +void ComputationStepsComputer::ConvertToIndexes( + const std::vector &cindexes, + std::vector *indexes) { + indexes->resize(cindexes.size()); + std::vector::const_iterator iter = cindexes.begin(), + end = cindexes.end(); + std::vector::iterator out_iter = indexes->begin(); + for (; iter != end; ++iter, ++out_iter) + *out_iter = iter->second; +} + +// static +void ComputationStepsComputer::ConvertToCindexes( + const std::vector &indexes, + int32 node_index, + std::vector *cindexes) { + KALDI_ASSERT(node_index >= 0); + cindexes->resize(indexes.size()); + std::vector::const_iterator iter = indexes.begin(), + end = indexes.end(); + std::vector::iterator out_iter = cindexes->begin(); + for (; iter != end; ++iter, ++out_iter) { + out_iter->first = node_index; + out_iter->second = *iter; + } +} - int32 space_for_outputs = 10; // arbitrary. - all_steps->reserve(all_steps->size() + - component_steps->size() * 2 + space_for_outputs); - for (size_t i = 0; i < component_steps->size(); i++) { - std::vector &component_step = (*component_steps)[i]; - KALDI_ASSERT(!component_step.empty()); - // First make a step for the descriptor at the input of this Component. - unordered_set descriptor_cindex_ids; - std::vector::iterator iter = component_step.begin(), - end = component_step.end(); + +void ComputationStepsComputer::ProcessComponentStep( + const std::vector &step) { + KALDI_ASSERT(!step.empty()); + int32 component_node_index = step.front().first; + int32 component_input_index = component_node_index - 1; + KALDI_ASSERT(nnet_.IsComponentNode(component_node_index)); + const NetworkNode &node = nnet_.GetNode(component_node_index); + int32 c = node.u.component_index; + const Component *component = nnet_.GetComponent(c); + if (component->Properties() & kSimpleComponent) { + // for simple components, the input cindexes will be the same as the + // output ones except for the node index, so we do a shortcut that's + // faster (no following dependencies). + std::vector input_step(step.size()); + input_step.resize(step.size()); + std::vector::iterator iter = input_step.begin(), + end = input_step.end(); + std::vector::const_iterator src = step.begin(); + for (; iter != end; ++iter,++src) { + iter->first = component_input_index; + iter->second = src->second; + } + AddStep(input_step); + AddStep(step); + } else { + std::vector step_cindex_ids; + ConvertToCindexIds(step, &step_cindex_ids); + // to get the input cindexes we need to follow dependencies back. + unordered_set input_cindex_ids; + std::vector::iterator iter = step_cindex_ids.begin(), + end = step_cindex_ids.end(); for (; iter != end; ++iter) { int32 c = *iter; - const std::vector &dependencies = graph.dependencies[c]; + const std::vector &dependencies = graph_->dependencies[c]; std::vector::const_iterator dep_iter = dependencies.begin(), dep_end = dependencies.end(); for (; dep_iter != dep_end; ++dep_iter) { int32 d = *dep_iter; - descriptor_cindex_ids.insert(d); + input_cindex_ids.insert(d); } } // Convert to Cindexes so we can sort them as Cindexes. - std::vector descriptor_cindexes; - descriptor_cindexes.reserve(descriptor_cindex_ids.size()); - unordered_set::iterator set_iter = descriptor_cindex_ids.begin(), - set_end = descriptor_cindex_ids.end(); + std::vector input_step; + input_step.reserve(input_cindex_ids.size()); + unordered_set::iterator set_iter = input_cindex_ids.begin(), + set_end = input_cindex_ids.end(); for (; set_iter != set_end; ++set_iter) { int32 c = *set_iter; - descriptor_cindexes.push_back(graph.cindexes[c]); + input_step.push_back(graph_->cindexes[c]); } - // sort the cindexes. - std::sort(descriptor_cindexes.begin(), descriptor_cindexes.end()); - - // We technically allow a Component with no input, e.g. in case where for - // some reason it decides it has no dependencies, e.g. it has a constant - // output. In this case we create an empty step, to preserve the property - // that the step for the Component's input comes immediately before the step - // for the Component itself. - if (!descriptor_cindexes.empty()) { - // Make sure all these cindexes come from the same node_id, which should - // be the one immediately preceding the Component node_id of - // "component_step". - int32 node_id = descriptor_cindexes.front().first; - KALDI_ASSERT(descriptor_cindexes.back().first == node_id && - graph.cindexes[component_step.front()].first == node_id + 1); - } - // Now that we've sorted, convert back to cindex_ids (this list will be - // the "step"). - int32 size = descriptor_cindexes.size(); - std::vector descriptor_step(size); - for (int32 i = 0; i < size; i++) { - descriptor_step[i] = graph.GetCindexId(descriptor_cindexes[i]); - KALDI_ASSERT(descriptor_step[i] != -1); + // sort the input cindexes. + std::sort(input_step.begin(), input_step.end()); + + if (component->Properties() & kReordersIndexes) { + std::vector indexes, input_indexes; + ConvertToIndexes(input_step, &input_indexes); + ConvertToIndexes(step, &indexes); + + // the component wants to have the opportunity to change the + // order of these indexes from their default. + component->ReorderIndexes(&input_indexes, &indexes); + + // Now convert back from indexes to cindexes (we know the + // node-index in each case) + std::vector reordered_step; + ConvertToCindexes(indexes, component_node_index, &reordered_step); + ConvertToCindexes(input_indexes, component_input_index, &input_step); + AddStep(input_step); + AddStep(reordered_step); + } else { + AddStep(input_step); + // it's more efficient to add the step with cindex_ids; and we have these + // available, so we do it that way. (in the other branch where + // the flag kReordersIndexes was present, we couldn't do this because + // of the reordering). + AddStep(&step_cindex_ids); } - // efficiently add descriptor_step to the end of all_steps. - all_steps->push_back(std::vector()); - all_steps->back().swap(descriptor_step); - - // efficiently add component_step to the end of all_steps (this destroys the - // input, which we won't be needing any more). - all_steps->push_back(std::vector()); - all_steps->back().swap(component_step); } - component_steps->clear(); } -static void CreateCindexIdToStep( - const ComputationGraph &graph, - const std::vector > &all_steps, - std::vector *cindex_id_to_step) { - int32 num_cindex_ids = graph.cindexes.size(); - cindex_id_to_step->clear(); - cindex_id_to_step->resize(num_cindex_ids, -1); - int32 num_steps = all_steps.size(); - for (int32 step = 0; step < num_steps; step++) { - std::vector::const_iterator iter = all_steps[step].begin(), - end = all_steps[step].end(); - for (; iter != end; ++iter) { - int32 cindex_id = *iter; - (*cindex_id_to_step)[cindex_id] = step; - } +void ComputationStepsComputer::ConvertToLocations( + const std::vector &cindex_ids, + std::vector > *locations) const { + locations->resize(cindex_ids.size()); + std::vector::const_iterator iter = cindex_ids.begin(), + end = cindex_ids.end(); + std::vector >::iterator out_iter = + locations->begin(); + // note, locations_ and locations are different variables. + std::pair *locations_ptr = &((*locations_)[0]); + size_t num_cindexes = locations_->size(); + for (; iter != end; ++iter, ++out_iter) { + int32 cindex_id = *iter; + KALDI_ASSERT(static_cast(cindex_id) < num_cindexes); + int32 step = locations_ptr[cindex_id].first, + row = locations_ptr[cindex_id].second; + KALDI_ASSERT(step >= 0); + out_iter->first = step; + out_iter->second = row; } } -/// This function inserts into "all_steps", which at this point should contain -/// all but the output steps, steps corresponding to any nodes of type kDimRange. -/// "graph" is non-const as there are situations in which we might need to -/// add cindexes for nodes of type kDimRange. -static void AddDimRangeSteps( - const Nnet &nnet, - ComputationGraph *graph, - std::vector > *all_steps) { - int32 num_nodes = nnet.NumNodes(); - bool dim_range_node_exists = false; - std::vector is_dim_range_node(num_nodes, '\0'); - for (int32 n = 0; n < num_nodes; n++) { - if (nnet.IsDimRangeNode(n)) { - is_dim_range_node[n] = (char)1; - dim_range_node_exists = true; +void ComputationStepsComputer::ProcessDimRangeSubPhase( + const std::vector &sub_phase) { + int32 dim_range_node = sub_phase[0].first; + KALDI_ASSERT(nnet_.IsDimRangeNode(dim_range_node)); + const NetworkNode &node = nnet_.GetNode(dim_range_node); + // 'input_node_index' is the node index of the component or input node + // that this dim-range node gets its input from. + int32 input_node_index = node.u.node_index; + // input_cindexes will give us the cindexes of the component or input node + // that is the input to this dim-range node + std::vector input_cindexes(sub_phase); + for (std::vector::iterator iter = input_cindexes.begin(), + end = input_cindexes.end(); iter != end; ++iter) + iter->first = input_node_index; + std::vector input_cindex_ids; + ConvertToCindexIds(input_cindexes, &input_cindex_ids); + std::vector > locations; + ConvertToLocations(input_cindex_ids, &locations); + std::sort(locations.begin(), locations.end()); + KALDI_ASSERT(!locations.empty()); + std::vector >::const_iterator + locations_iter = locations.begin(), + locations_end = locations.end(); + // Each unique .first number in locations (i.e. each source step, and they + // will all correspond to component-output or input steps) will generate one + // 'step' of type kDimRange. Because dim-range nodes must be contiguous + // ranges of a source step (since they are represented as sub-matrices), for + // each source step we work out the first and last row-index (i.e. first and + // last .second member of locations) and use that to reconstruct the range. + + // each element of 'steps' will be (source_step, (begin_row, end_row)) so that + // the source of the dim-range node is indexes begin_row ... end_row-1 in that + // source step. + std::vector > > steps; + + int32 cur_source_step = locations_iter->first, + cur_row_begin = locations_iter->second, + cur_row_end = cur_row_begin + 1; + while (1) { + ++locations_iter; + if (locations_iter == locations_end || + locations_iter->first != cur_source_step) { + // we reached the end of a run of the same step. + std::pair > this_step; + this_step.first = cur_source_step; + this_step.second.first = cur_row_begin; + this_step.second.second = cur_row_end; + steps.push_back(this_step); + if (locations_iter != locations_end) { + cur_source_step = locations_iter->first; + cur_row_begin = locations_iter->second; + cur_row_end = cur_row_begin + 1; + } else { + break; + } + } else { + cur_row_end = locations_iter->second + 1; } } - if (!dim_range_node_exists) - return; - std::vector cindex_id_to_step; - CreateCindexIdToStep(*graph, *all_steps, &cindex_id_to_step); - int32 num_steps = all_steps->size(); - - // We are going to insert steps for nodes of type kDimRange just after the - // kInput or kComponent steps that the kDimRange nodes refer to. - // new_nodes_per_step will be a list of any nodes of type kDimRange that - // have input corresponding to something in that step. - std::vector > new_nodes_per_step(num_steps); - int32 num_cindex_ids = graph->cindexes.size(); - std::vector::const_iterator iter = graph->cindexes.begin(); - for (int32 i = 0; i < num_cindex_ids; i++,iter++) { - const Cindex &cindex = *iter; - int32 node_index = cindex.first; - if (!is_dim_range_node[node_index]) - continue; - const NetworkNode &node = nnet.GetNode(node_index); - Cindex input_cindex(node.u.node_index, cindex.second); - int32 input_cindex_id = graph->GetCindexId(input_cindex); - KALDI_ASSERT(input_cindex_id != -1); - int32 input_step = cindex_id_to_step[input_cindex_id]; - KALDI_ASSERT(input_step != -1); - new_nodes_per_step[input_step].insert(node_index); - } - int32 num_new_steps = 0, space_for_output = 10; - for (int32 step = 0; step < num_steps; step++) - num_new_steps += new_nodes_per_step[step].size(); - - // we'll later swap all_steps_out with all_steps. - std::vector > all_steps_out; - all_steps_out.reserve(num_steps + num_new_steps + space_for_output); - for (int32 step = 0; step < num_steps; step++) { - std::vector &this_step = (*all_steps)[step]; - int32 cur_out_index = all_steps_out.size(); - all_steps_out.push_back(std::vector()); // make space for this step. - std::set::iterator iter = new_nodes_per_step[step].begin(), - end = new_nodes_per_step[step].end(); - for (; iter != end; ++iter) { - int32 node = *iter, size = this_step.size(); - std::vector new_step(size); - for (int32 i = 0; i < size; i++) { - int32 cindex_id = this_step[i]; - Cindex dimrange_cindex(node, graph->cindexes[cindex_id].second); - bool input = false, is_new; - int32 dimrange_cindex_id = graph->GetCindexId(dimrange_cindex, - input, &is_new); - new_step[i] = dimrange_cindex_id; - if (is_new) { // if we newly added this cindex_id, note the dependency - // on its input. - graph->dependencies[dimrange_cindex_id].push_back(cindex_id); - } - } - all_steps_out.push_back(std::vector()); - all_steps_out.back().swap(new_step); - } - all_steps_out[cur_out_index].swap(this_step); + for (size_t i = 0; i < steps.size(); i++) { + // iterating over different source steps, although normally + // there will be just one. + int32 source_step = steps[i].first, + row_begin = steps[i].second.first, + row_end = steps[i].second.second; + // 'source' is just the elements of the source step that we're consuming. + std::vector source((*steps_)[source_step].begin() + row_begin, + (*steps_)[source_step].begin() + row_end); + std::vector cindexes; + ConvertToCindexes(source, &cindexes); + std::vector::iterator iter = cindexes.begin(), + end = cindexes.end(); + for (; iter != end; ++iter) + iter->first = dim_range_node; + bool add_if_absent = true; + // this add_if_absent says, even if cindexes were not in the graph, + // add them. This is possible in principle; it's to satisfy the + // requirement that DimRangeNodes be implemented as contiguous ranges + // of rows of component nodes or input nodes. + AddStep(cindexes, add_if_absent); } - all_steps->swap(all_steps_out); } +void ComputationStepsComputer::ProcessSubPhase( + const ComputationRequest &request, + const std::vector &sub_phase) { + KALDI_ASSERT(!sub_phase.empty()); + int32 node_index = sub_phase[0].first; + KALDI_ASSERT(sub_phase.back().first == node_index); + if (nnet_.IsComponentNode(node_index)) { + ProcessComponentStep(sub_phase); + } else if (nnet_.IsInputNode(node_index)) { + ProcessInputOrOutputStep(request, false, sub_phase); + } else if (nnet_.IsOutputNode(node_index)) { + ProcessInputOrOutputStep(request, true, sub_phase); + } else if (nnet_.IsDimRangeNode(node_index)) { + // this might turn out to be multiple steps, see the code. + ProcessDimRangeSubPhase(sub_phase); + } else if (nnet_.IsComponentInputNode(node_index)) { + // We actually do nothing with these sub-phases, because they are processed + // when we process the associated component's sub-phase/step. Doing it this + // way resolves certain problems. + return; + } else { + KALDI_ERR << "Unknown node type."; + } +} -/// This function would not be necessary if we had not added the ReorderIndexes -/// function to class Component. It is responsible for possibly modifying the -/// order of the inputs and outputs of non-simple Components, and also possibly -/// removing some inputs if the Component has decided it doesn't need them. It -/// may be a while before this is ever used for something. An example use is -/// that maybe in convolutional nets or simple models, some components may want, -/// efficiency or convenience, a certain ordering of the input that differs from -/// the normal order. -void ReorderIndexes(const Nnet &nnet, - const ComputationRequest &request, - const ComputationGraph &graph, - std::vector > *steps) { - - for (int32 step = 0; step < steps->size(); step++) { - std::vector &cindex_ids = (*steps)[step]; - if (cindex_ids.empty()) continue; - int32 cindex_id = cindex_ids.front(); - int32 node_index = graph.cindexes[cindex_id].first; - const NetworkNode &node = nnet.GetNode(node_index); - if (node.node_type != kComponent || - graph.is_input[cindex_id]) - continue; // nothing to do if an input, or if not a Component. - - int32 c = node.u.component_index; - const Component *component = nnet.GetComponent(c); - if (!(component->Properties() & kReordersIndexes)) - continue; // nothing to do if it doesn't modify indexes. - KALDI_ASSERT(step > 0); // or should have continued already. - - // preceding step will be Cindexes from the input Descriptor. - std::vector &input_cindex_ids = (*steps)[step - 1]; - - int32 size = cindex_ids.size(), input_size = input_cindex_ids.size(); - std::vector indexes(size), input_indexes(input_size); - - for (int32 i = 0; i < size; i++) - indexes[i] = graph.cindexes[cindex_ids[i]].second; - for (int32 i = 0; i < input_size; i++) - input_indexes[i] = graph.cindexes[input_cindex_ids[i]].second; - - component->ReorderIndexes(&input_indexes, &indexes); - // size should not change. - KALDI_ASSERT(input_indexes.size() == input_size && indexes.size() == size); - - if (size > 0) { - int32 node_index = graph.cindexes[cindex_ids.front()].first; - for (int32 i = 0; i < size; i++) { - Cindex cindex(node_index, indexes[i]); - cindex_ids[i] = graph.GetCindexId(cindex); - } - } - if (input_size > 0) { - int32 input_node_index = graph.cindexes[input_cindex_ids.front()].first; - for (int32 i = 0; i < input_size; i++) { - Cindex cindex(input_node_index, input_indexes[i]); - input_cindex_ids[i] = graph.GetCindexId(cindex); - } - } - // note: cindex_ids and input_cindex_ids are references, so we have - // changed *steps by writing to them in the above two loops. +void ComputationStepsComputer::Check() const { + int32 num_cindexes = graph_->cindexes.size(); + KALDI_ASSERT(locations_->size() == num_cindexes); + for (int32 c = 0; c < num_cindexes; c++) { + int32 step = (*locations_)[c].first, + row = (*locations_)[c].second; + KALDI_ASSERT(step >= 0 && row >= 0 && + (*steps_)[step][row] == c); } } -} // namespace compute_computation_steps. - -void ComputeComputationSteps( - const Nnet &nnet, - const ComputationRequest &request, - const std::vector > &phases, - ComputationGraph *graph, - std::vector > *steps) { - using namespace compute_computation_steps; - AddInputSteps(nnet, request, *graph, steps); - { - std::vector > component_steps; - AddComponentSteps(nnet, *graph, phases, &component_steps); - AddComponentInputSteps(*graph, &component_steps, steps); - } - // output steps don't get reordered so we do the reordering before adding - // them. - ReorderIndexes(nnet, request, *graph, steps); - AddDimRangeSteps(nnet, graph, steps); - AddOutputSteps(nnet, request, *graph, steps); +void ComputationStepsComputer::SplitIntoSubPhases( + const std::vector &phase, + std::vector > *sub_phases) const { + std::vector phase_cindexes; + ConvertToCindexes(phase, &phase_cindexes); + KALDI_ASSERT(!phase_cindexes.empty()); + std::sort(phase_cindexes.begin(), phase_cindexes.end()); + // 'sub_phase_begins' is the indexes onto 'phase_cindees' that + // start a run of the same node-index + std::vector segment_begins; + int32 cur_node_index = -1; + size_t size = phase_cindexes.size(); + for (size_t i = 0; i < size; i++) { + if (phase_cindexes[i].first != cur_node_index) { + cur_node_index = phase_cindexes[i].first; + segment_begins.push_back(i); + } + } + size_t num_sub_phases = segment_begins.size(); + segment_begins.push_back(size); + sub_phases->clear(); + sub_phases->resize(num_sub_phases); + for (size_t i = 0; i < num_sub_phases; i++) { + size_t this_begin = segment_begins[i], + this_end = segment_begins[i+1]; + (*sub_phases)[i].insert((*sub_phases)[i].end(), + phase_cindexes.begin() + this_begin, + phase_cindexes.begin() + this_end); + } } diff --git a/src/nnet3/nnet-computation-graph.h b/src/nnet3/nnet-computation-graph.h index 41087123421..863add7fd2d 100644 --- a/src/nnet3/nnet-computation-graph.h +++ b/src/nnet3/nnet-computation-graph.h @@ -83,7 +83,7 @@ struct ComputationGraph { /// the corresponding "is_input" flag set to the value "input") and set /// *is_new to true. If present, set is_new to false and return the existing /// cindex_id. - int32 GetCindexId(const Cindex &cindex, bool input, bool *is_new); + int32 GetCindexId(const Cindex &cindex, bool is_input, bool *is_new); /// Const version of GetCindexId that does not add CindexIds. It will return /// -1 if the Cindex is not present, and the user should check for this. @@ -243,8 +243,10 @@ class ComputationGraphBuilder { std::vector *required) const; // this function, to be called from Compute(), does some sanity checks to - // verify that the internal state is consistent. - void Check() const; + // verify that the internal state is consistent. It only does this for the + // current 'segment' of the computation, starting from 'start_cindex_id' (this + // will be 0 in normal, single-segment computations). + void Check(int32 start_cindex_id) const; const Nnet &nnet_; const ComputationRequest *request_; @@ -377,7 +379,7 @@ void ComputeComputationPhases( /** - This function arranges the cindex_ids of the computation into a sequence of + This class arranges the cindex_ids of the computation into a sequence of lists called "steps", which will correspond roughly to the commands in the compiled computation. The steps are finer than phases. (See \ref dnn3_compile_steps for more info). To summarize the properties that @@ -386,30 +388,157 @@ void ComputeComputationPhases( - All cindex_ids within a given step correspond to the same node in the graph - All dependencies of cindex_ids within a given step have been computed in earlier steps. + - All cindex_ids within a given step share the same location when + computed (i.e. a matrix or submatix) There are also some extra, more obscure properties that the sequence of steps must satisfy: - - Any input or output in the ComputationRequest must be in one step, with the - Indexes in the same order as specified in the ComputationRequest. (Note: - inputs can be for nodes of type kComponent as well as kInput). + + - Any input or output specified in a ComputationRequest must be in one step, + with the Indexes in the same order as specified in the ComputationRequest. + (Note: inputs can be for nodes of type kComponent as well as kInput). - If a step corresponds to a node of type kComponent (and does not correspond to an input in the ComputationRequest), then the immediately preceding step must correspond to a node of type kDescriptor, and the sequence of Indexes in the two steps must be identical. - If a step corresponds to a node of type kDimRange, then there must be - another step corresponding to the source node, with exactly the same + a preceding step corresponding to the source node, with exactly the same Indexes appearing in the same order. (This lets us use a sub-matrix for - the kDimRange node). + the kDimRange node). We guarantee this by adding extra cindexes to the + kDimRange steps as needed. The reason why computation_graph is not provided as a const argument is that in order to ensure the final property we may have to add a few new cindex_ids. */ -void ComputeComputationSteps( - const Nnet &nnet, - const ComputationRequest &request, - const std::vector > &phases, - ComputationGraph *computation_graph, - std::vector > *steps); + +class ComputationStepsComputer { + public: + /// Constructor. + /// @param [in] nnet The neural network that this computation is for. + /// @param [in,out] graph The computation graph that we're computing the steps + /// for. It's only non-const because in certain + /// unusual cases relating to nodes of type kDimRange, + /// we may need to add new cindexes. + /// @param [out] steps The main output of this class, which is + /// a sequence of steps, each step being an ordered list of cindex_ids. + /// It just gets cleared in the constructor; it's set up + /// when you call ComputeForSegment(). + /// @param [out] locations The additional output of this class, which is a function + /// of the information in 'steps'. The array + /// 'locations' is indexed by cindex_id, and each one is a pair + /// (step-index, index-into-step), so that for any cindex_id c, + /// (*steps)[locations[c].first][locations[c].second] == c. + /// It's possible in principle if there are non-simple + /// Components, that for node corresponding to component-input + /// descriptors, a cindex might be present in more than one step, + /// so it doesn't follow that if (*steps)[i][j] == c, then + /// locations[c] == (i,j). + ComputationStepsComputer(const Nnet &nnet, + ComputationGraph *graph, + std::vector > *steps, + std::vector > *locations); + + /// You call this once for each segment, in order (note: for normal, + /// non-online computations, there is only one segment). + void ComputeForSegment(const ComputationRequest &request, + const std::vector > &phases); + + /// This is only to be called after you have called ComputeForSegment + /// for all the segments. + void Check() const; + private: + + // Adds step(s) for one "sub-phase". A sub-phase is the set of cindex_ids from + // one phase that have the same node index. Note: for nodes that are + // component-input descriptors, we don't actually create the step here, we + // create it just before creating the step for its component, and we recreate + // the list of cindexes from those from the component. The reason is that + // there are situations where doing it directly from the raw_step would not do + // the right thing (especially with non-simple components, it's possible that + // the cindexes component-input descriptors could be used twice by two + // different components).. + void ProcessSubPhase(const ComputationRequest &request, + const std::vector &sub_phase); + + // Called from ProcessSubPhase- for the case where it's a DimRangeNode. + void ProcessDimRangeSubPhase(const std::vector &sub_phase); + + // Called from ProcessSubPhase- for the case where it's an input or output node. + void ProcessInputOrOutputStep(const ComputationRequest &request, + bool is_output, + const std::vector &sub_phase); + + // Called from ProcessSubPhase- for the case where it's a component node. + void ProcessComponentStep(const std::vector &step); + + + // Splits a phase up into multiple "sub-phases", which are just the cindexes + // from a phase that are from a single node, sorted. At this point we + // represent them as Cindexes, not cindex_ids. For efficiency and because it + // would be discarded anyway, it discards any raw steps that correspond to + // component-input descriptors because these are not processed inside + // ProcessSubPhase(). + void SplitIntoSubPhases(const std::vector &phase, + std::vector > *sub_phase) const; + + // This low-level function used by functions like ProcessComponentStep, + // ProcessInputStep and so on, adds one step to 'steps_' (converting from + // Cindex to cindex_ids), and updates 'locations' appropriately. It returns + // the step index that we just added (== size of steps_ at entry). + // If you specify add_if_absent = true, it will add any Cindexes that were + // not already present, to the graph. [this option is only to be used + // in processing dim-range nodes. + int32 AddStep(const std::vector &cindexes, + bool add_if_absent = false); + + // This is an alternative interface to AddStep() that takes a list of + // cindex_ids instead of cindexes (it's destructive of that list). + int32 AddStep(std::vector *cindex_ids); + + + // This utility function uses graph_ to convert a vector of cindex_ids into + // Cindexes. + void ConvertToCindexes(const std::vector &cindex_ids, + std::vector *cindexes) const; + + // Converts a vector of Cindexes to a vector of Indexes, by + // stripping out the node index. + static void ConvertToIndexes(const std::vector &cindexes, + std::vector *indexes); + + // Converts a vector of Indexes to Cindexes, using a supplied + // node index. + static void ConvertToCindexes(const std::vector &indexes, + int32 node_index, + std::vector *cindexes); + + + // This utility function uses graph_ to convert a vector of cindex_ids into + // Cindexes. It will crash if the cindexes were not present in the graph. + void ConvertToCindexIds(const std::vector &cindexes, + std::vector *cindex_ids) const; + + // This utility function uses the 'locations_' array to convert the cindex_ids + // in 'cindex_ids' into an array (of the same length) of locations, i.e. of + // pairs (step, index-into-step), so that if cindex_ids[i] = c, then + // (*locations)[i] will be set to (*locations_)[c]. It will die if + // one of the locations was not defined, i.e. was the pair (-1, -1). + void ConvertToLocations( + const std::vector &cindex_ids, + std::vector > *locations) const; + + + const Nnet &nnet_; + ComputationGraph *graph_; + /// steps_ is a pointer to an output that's passed in in the constructor. + std::vector > *steps_; + /// locations_ is a map from cindex_id to the pair of indexes into steps_ where + /// that cindex_id resides, so if (*locations_)[c] = (i,j), then + /// (*steps_)[i][j] == c. This is also an output (we get the pointer in + /// the constructor). + std::vector > *locations_; +}; + } // namespace nnet3 diff --git a/src/nnet3/nnet-test-utils.cc b/src/nnet3/nnet-test-utils.cc index 0b000b5b4ef..e02b9cd139a 100644 --- a/src/nnet3/nnet-test-utils.cc +++ b/src/nnet3/nnet-test-utils.cc @@ -1019,6 +1019,8 @@ void GenerateConfigSequence( GenerateConfigSequenceCnn(opts, configs); break; case 8: + if (!opts.allow_use_of_x_dim) + goto start; GenerateConfigSequenceDistribute(opts, configs); break; case 9: diff --git a/src/nnet3/nnet-test-utils.h b/src/nnet3/nnet-test-utils.h index 18e4960f9bd..d2034dcfdc6 100644 --- a/src/nnet3/nnet-test-utils.h +++ b/src/nnet3/nnet-test-utils.h @@ -38,6 +38,7 @@ struct NnetGenerationOptions { bool allow_multiple_inputs; bool allow_multiple_outputs; bool allow_final_nonlinearity; + bool allow_use_of_x_dim; // if set to a value >0, the output-dim of the network // will be set to this value. int32 output_dim; @@ -50,6 +51,7 @@ struct NnetGenerationOptions { allow_multiple_inputs(true), allow_multiple_outputs(false), allow_final_nonlinearity(true), + allow_use_of_x_dim(true), output_dim(-1) { } }; From 1e92e2afa5667f56758ed3ab74f4d11437cfb691 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 15 Oct 2016 19:27:32 -0400 Subject: [PATCH 007/530] Some minor refactoring to make online computation easier (remove unused/unnecessary 'request' args for optimization). --- src/nnet3/nnet-analyze.cc | 1 - src/nnet3/nnet-analyze.h | 3 +-- src/nnet3/nnet-compute-test.cc | 2 +- src/nnet3/nnet-derivative-test.cc | 4 ++-- src/nnet3/nnet-optimize-test.cc | 2 +- src/nnet3/nnet-optimize-utils.cc | 3 +-- src/nnet3/nnet-optimize-utils.h | 2 -- src/nnet3/nnet-optimize.cc | 36 +++++++++++++++---------------- src/nnet3/nnet-optimize.h | 3 --- 9 files changed, 24 insertions(+), 32 deletions(-) diff --git a/src/nnet3/nnet-analyze.cc b/src/nnet3/nnet-analyze.cc index c030dad7d9b..1144ff997e2 100644 --- a/src/nnet3/nnet-analyze.cc +++ b/src/nnet3/nnet-analyze.cc @@ -974,7 +974,6 @@ void ComputationChecker::CheckComputationDebugInfo() const { } void CheckComputation(const Nnet &nnet, - const ComputationRequest &request, const NnetComputation &computation, bool check_rewrite) { CheckComputationOptions opts; diff --git a/src/nnet3/nnet-analyze.h b/src/nnet3/nnet-analyze.h index 8b02d6376e9..c1911d36457 100644 --- a/src/nnet3/nnet-analyze.h +++ b/src/nnet3/nnet-analyze.h @@ -417,9 +417,8 @@ class ComputationChecker { /// This is a convenience interface for class ComputationChecker. Call it with -/// check_rewrite = true only if the optimization is pre-optimization. +/// check_rewrite = true only if the computation is pre-optimization. void CheckComputation(const Nnet &nnet, - const ComputationRequest &request, const NnetComputation &computation, bool check_rewrite = false); diff --git a/src/nnet3/nnet-compute-test.cc b/src/nnet3/nnet-compute-test.cc index afe7da86dc1..c485cc06636 100644 --- a/src/nnet3/nnet-compute-test.cc +++ b/src/nnet3/nnet-compute-test.cc @@ -119,7 +119,7 @@ void UnitTestNnetCompute() { if (RandInt(0, 1) == 0) { NnetOptimizeOptions opt_config; - Optimize(opt_config, nnet, request, &computation); + Optimize(opt_config, nnet, &computation); { std::ostringstream os; computation.Print(os, nnet); diff --git a/src/nnet3/nnet-derivative-test.cc b/src/nnet3/nnet-derivative-test.cc index 0f5f2f6d54a..511a6dc6bf9 100644 --- a/src/nnet3/nnet-derivative-test.cc +++ b/src/nnet3/nnet-derivative-test.cc @@ -139,7 +139,7 @@ void UnitTestNnetModelDerivatives() { if (limit_deriv_times) SetDerivTimesOptions(request, &opt_config); - Optimize(opt_config, nnet, request, &computation); + Optimize(opt_config, nnet, &computation); std::ostringstream os; computation.Print(os, nnet); KALDI_LOG << "Optimized computation is: " << os.str(); @@ -303,7 +303,7 @@ void UnitTestNnetInputDerivatives() { if (RandInt(0, 3) != 0 && allow_optimization) { NnetOptimizeOptions opt_config; // opt_config.initialize_undefined = false; // temp - Optimize(opt_config, nnet, request, &computation); + Optimize(opt_config, nnet, &computation); std::ostringstream os; computation.Print(os, nnet); KALDI_LOG << "Optimized computation is: " << os.str(); diff --git a/src/nnet3/nnet-optimize-test.cc b/src/nnet3/nnet-optimize-test.cc index 97662acc556..7b64d67b72c 100644 --- a/src/nnet3/nnet-optimize-test.cc +++ b/src/nnet3/nnet-optimize-test.cc @@ -71,7 +71,7 @@ static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config) { NnetComputation computation_opt(computation); { - Optimize(opt_config, nnet, request, &computation_opt); + Optimize(opt_config, nnet, &computation_opt); std::ostringstream os; computation_opt.Print(os, nnet); KALDI_LOG << "Optimized computation is: " << os.str(); diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index de9d2b43a2b..ba8c056f418 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -563,9 +563,8 @@ void RemoveNoOps(NnetComputation *computation) { VariableMergingOptimizer::VariableMergingOptimizer( const NnetOptimizeOptions &config, const Nnet &nnet, - const ComputationRequest &request, NnetComputation *computation): - config_(config), nnet_(nnet), request_(request), + config_(config), nnet_(nnet), computation_(computation), already_called_merge_variables_(false) { analyzer_.Init(nnet, *computation); diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index 05ce4bf8f41..0d3a03be224 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -131,7 +131,6 @@ class VariableMergingOptimizer { public: VariableMergingOptimizer(const NnetOptimizeOptions &config, const Nnet &nnet, - const ComputationRequest &request, NnetComputation *computation); // Note: you can call this only once. If it returns true, it means it has // merged variables. In this case, you have the option to instantiate another @@ -168,7 +167,6 @@ class VariableMergingOptimizer { const NnetOptimizeOptions &config_; const Nnet &nnet_; - const ComputationRequest &request_; NnetComputation *computation_; Analyzer analyzer_; diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index cb869ac65bf..e2022d596ef 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -330,12 +330,11 @@ void RemoveUnnecessaryAllocation(const Nnet &nnet, void VariableMergingOptimization(const NnetOptimizeOptions &config, const Nnet &nnet, - const ComputationRequest &request, NnetComputation *computation) { bool changed = true; while (changed) { changed = false; - VariableMergingOptimizer opt(config, nnet, request, computation); + VariableMergingOptimizer opt(config, nnet, computation); if (opt.MergeVariables()) changed = true; } @@ -344,10 +343,12 @@ void VariableMergingOptimization(const NnetOptimizeOptions &config, // This is a simplified top-level interface to the model-update consolidation // code from class ModelUpdateConsolidator. void ConsolidateModelUpdate(const Nnet &nnet, - const ComputationRequest &request, NnetComputation *computation) { - if (!request.need_model_derivative) - return; // An optimization; there would be nothing to do in this case. + // This following if-statement is an optimization: if the computation + // request(s) had need_model_derivative == false, there would be nothing to + // optimize, so don't bother trying. + if (!computation->need_model_derivative) + return; ModelUpdateConsolidator consolidator(nnet, computation); consolidator.ConsolidateModelUpdate(); } @@ -405,13 +406,12 @@ void ConvertAdditionToAssignment(const Nnet &nnet, void Optimize(const NnetOptimizeOptions &config, const Nnet &nnet, - const ComputationRequest &request, NnetComputation *computation) { if (!config.optimize) return; if (GetVerboseLevel() >= 4) - CheckComputation(nnet, request, *computation, true); + CheckComputation(nnet, *computation, true); // this will do nothing unless --min-deriv-time or --max-deriv-time was // set. @@ -419,44 +419,44 @@ void Optimize(const NnetOptimizeOptions &config, computation); if (GetVerboseLevel() >= 4) - CheckComputation(nnet, request, *computation, true); + CheckComputation(nnet, *computation, true); if (config.consolidate_model_update) - ConsolidateModelUpdate(nnet, request, computation); + ConsolidateModelUpdate(nnet, computation); if (GetVerboseLevel() >= 4) - CheckComputation(nnet, request, *computation, true); + CheckComputation(nnet, *computation, true); if (config.convert_addition) ConvertAdditionToAssignment(nnet, computation); if (GetVerboseLevel() >= 4) - CheckComputation(nnet, request, *computation, true); + CheckComputation(nnet, *computation, true); if (config.remove_assignments || config.backprop_in_place || config.propagate_in_place) - VariableMergingOptimization(config, nnet, request, computation); + VariableMergingOptimization(config, nnet, computation); if (GetVerboseLevel() >= 4) - CheckComputation(nnet, request, *computation, false); + CheckComputation(nnet, *computation, false); if (config.initialize_undefined) RemoveUnnecessaryZeroing(nnet, computation); if (GetVerboseLevel() >= 4) - CheckComputation(nnet, request, *computation, false); + CheckComputation(nnet, *computation, false); if (config.move_sizing_commands) MoveSizingCommands(nnet, computation); if (GetVerboseLevel() >= 4) - CheckComputation(nnet, request, *computation, false); + CheckComputation(nnet, *computation, false); if (config.allocate_from_other) RemoveUnnecessaryAllocation(nnet, computation); if (GetVerboseLevel() >= 4) - CheckComputation(nnet, request, *computation, false); + CheckComputation(nnet, *computation, false); // The following is not configurable because it is necessary for // the computation to run correctly (we do it after compilation too, @@ -465,7 +465,7 @@ void Optimize(const NnetOptimizeOptions &config, ConsolidateIoOperations(nnet, computation); if (GetVerboseLevel() >= 4) - CheckComputation(nnet, request, *computation, false); + CheckComputation(nnet, *computation, false); } // ComputationRequests are distinguished by the names and indexes @@ -601,7 +601,7 @@ const NnetComputation* CachingOptimizingCompiler::Compile( ComputationChecker checker(check_config, nnet_, *computation); checker.Check(); } - Optimize(opt_config_, nnet_, *request, computation); + Optimize(opt_config_, nnet_, computation); if (GetVerboseLevel() >= verbose_cutoff) { std::ostringstream os; computation->Print(os, nnet_); diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index 29e76e624de..127b30b5278 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -108,7 +108,6 @@ struct NnetOptimizeOptions { /// This is the top-level function for optimizing a computation. void Optimize(const NnetOptimizeOptions &config, const Nnet &nnet, - const ComputationRequest &request, NnetComputation *computation); // Hash function for ComputationRequest. It converts @@ -228,7 +227,6 @@ void LimitDerivativeTimes(const Nnet &nnet, /// class ModelUpdateConsolidator. Will fail if called a /// second time. void ConsolidateModelUpdate(const Nnet &nnet, - const ComputationRequest &request, NnetComputation *computation); /// This converts addition operations (things with Add in their names) to @@ -241,7 +239,6 @@ void ConvertAdditionToAssignment(const Nnet &nnet, /// This wraps class VariableMergingOptimizer in a simplified interface. void VariableMergingOptimization(const NnetOptimizeOptions &config, const Nnet &nnet, - const ComputationRequest &request, NnetComputation *computation); From 621304144178abb0eb3ca3eabfcf4d5b9b47c44b Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Tue, 25 Oct 2016 20:37:31 -0400 Subject: [PATCH 008/530] Further progress [note, this is partial work, backing up. Search for TODO. --- src/matrix/compressed-matrix.h | 10 +- src/nnet3/Makefile | 4 +- src/nnet3/nnet-analyze.cc | 9 ++ src/nnet3/nnet-analyze.h | 8 ++ src/nnet3/nnet-common.cc | 21 +++- src/nnet3/nnet-common.h | 5 + src/nnet3/nnet-compile-test.cc | 60 ++++++++++- src/nnet3/nnet-compile.h | 2 +- src/nnet3/nnet-computation-graph.cc | 2 +- src/nnet3/nnet-computation.cc | 2 +- src/nnet3/nnet-example-utils.h | 2 +- src/nnet3/nnet-nnet.h | 20 ++-- src/nnet3/nnet-optimize-utils.cc | 148 ++++++++++++++++++++++++++++ src/nnet3/nnet-optimize-utils.h | 15 ++- src/nnet3/nnet-optimize.h | 4 +- src/nnet3/nnet-test-utils.cc | 10 +- src/nnet3/nnet-test-utils.h | 2 + src/nnet3/nnet-utils.cc | 15 ++- src/nnet3/nnet-utils.h | 4 +- 19 files changed, 306 insertions(+), 37 deletions(-) diff --git a/src/matrix/compressed-matrix.h b/src/matrix/compressed-matrix.h index 603134ab800..4e4238c43da 100644 --- a/src/matrix/compressed-matrix.h +++ b/src/matrix/compressed-matrix.h @@ -47,7 +47,7 @@ class CompressedMatrix { CompressedMatrix(): data_(NULL) { } ~CompressedMatrix() { Clear(); } - + template CompressedMatrix(const MatrixBase &mat): data_(NULL) { CopyFromMat(mat); } @@ -73,7 +73,7 @@ class CompressedMatrix { template CompressedMatrix &operator = (const MatrixBase &mat); // assignment operator. - + /// Copies contents to matrix. Note: mat must have the correct size. /// kNoTrans case uses a temporary. template @@ -81,7 +81,7 @@ class CompressedMatrix { MatrixTransposeType trans = kNoTrans) const; void Write(std::ostream &os, bool binary) const; - + void Read(std::istream &is, bool binary); /// Returns number of rows (or zero for emtpy matrix). @@ -113,7 +113,7 @@ class CompressedMatrix { void Swap(CompressedMatrix *other) { std::swap(data_, other->data_); } void Clear(); - + friend class Matrix; friend class Matrix; private: @@ -163,7 +163,7 @@ class CompressedMatrix { static inline float CharToFloat(float p0, float p25, float p75, float p100, unsigned char value); - + void *data_; // first GlobalHeader, then PerColHeader (repeated), then // the byte data for each column (repeated). Note: don't intersperse // the byte data with the PerColHeaders, because of alignment issues. diff --git a/src/nnet3/Makefile b/src/nnet3/Makefile index 65384f5a338..60629ab1cbe 100644 --- a/src/nnet3/Makefile +++ b/src/nnet3/Makefile @@ -28,7 +28,7 @@ OBJFILES = nnet-common.o nnet-compile.o nnet-component-itf.o \ discriminative-supervision.o nnet-discriminative-example.o \ nnet-discriminative-diagnostics.o \ discriminative-training.o nnet-discriminative-training.o \ - online-nnet3-decodable-simple.o + online-nnet3-decodable-simple.o nnet-compile-online.o LIBNAME = kaldi-nnet3 @@ -37,6 +37,6 @@ ADDLIBS = ../chain/kaldi-chain.a ../cudamatrix/kaldi-cudamatrix.a \ ../lat/kaldi-lat.a ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a \ ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ ../tree/kaldi-tree.a ../util/kaldi-util.a ../thread/kaldi-thread.a \ - ../matrix/kaldi-matrix.a ../base/kaldi-base.a + ../matrix/kaldi-matrix.a ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/nnet3/nnet-analyze.cc b/src/nnet3/nnet-analyze.cc index 1144ff997e2..e53f46198b6 100644 --- a/src/nnet3/nnet-analyze.cc +++ b/src/nnet3/nnet-analyze.cc @@ -1233,5 +1233,14 @@ void Analyzer::Init(const Nnet &nnet, const NnetComputation &computation) { &matrix_accesses); } +void GetSegmentEnds(const NnetComputation &computation, + std::vector *command_indexes) { + int32 num_commands = computation.commands.size(); + command_indexes->clear(); + for (int32 c = 0; c < num_commands; c++) + if (computation.commands[c].command_type == kNoOperationMarker) + command_indexes->push_back(c); +} + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-analyze.h b/src/nnet3/nnet-analyze.h index c1911d36457..7109575e415 100644 --- a/src/nnet3/nnet-analyze.h +++ b/src/nnet3/nnet-analyze.h @@ -416,6 +416,14 @@ class ComputationChecker { }; +/// This utility function works out from a computation, the locations of the +/// 'segment ends'. This is useful for online compilation, where the +/// computation has multiple segments corresponding to new pieces of input data +/// to process. The implementation of the function is extremely simple; it +/// just gives you the locations of commands of type 'kNoOperationMarker'. +void GetSegmentEnds(const NnetComputation &computation, + std::vector *command_indexes); + /// This is a convenience interface for class ComputationChecker. Call it with /// check_rewrite = true only if the computation is pre-optimization. void CheckComputation(const Nnet &nnet, diff --git a/src/nnet3/nnet-common.cc b/src/nnet3/nnet-common.cc index 9df01d4f048..cc3e8d2c79c 100644 --- a/src/nnet3/nnet-common.cc +++ b/src/nnet3/nnet-common.cc @@ -164,7 +164,7 @@ static void WriteCindexVectorElementBinary( // [node_1: index_1 index_2] [node_2: index_3 index_4] os.put('|'); WriteBasicType(os, binary, node_index); - } + } if (i == 0) { if (index.n == 0 && index.x == 0 && std::abs(index.t) < 125) { @@ -274,11 +274,11 @@ void WriteCindexVector(std::ostream &os, bool binary, os.put('['); WriteBasicType(os, binary, node_index); os.put(':'); - } + } vec[i].second.Write(os, binary); if (i == size - 1) os.put(']'); - } + } } else { for (int32 i = 0; i < size; i++) WriteCindexVectorElementBinary(os, vec, i); @@ -320,7 +320,7 @@ void ReadCindexVector(std::istream &is, bool binary, (*vec)[i].first = (*vec)[i-1].first; } (*vec)[i].second.Read(is, binary); - if (i == size - 1) { + if (i == size - 1) { is >> std::ws; if (is.peek() == static_cast(']')) { is.get(); @@ -352,6 +352,19 @@ size_t CindexHasher::operator () (const Cindex &cindex) const { } +size_t CindexVectorHasher::operator () ( + const std::vector &cindex_vector) const { + // this is an arbitrarily chosen prime. + size_t kPrime = 23539, ans = 0; + std::vector::const_iterator iter = cindex_vector.begin(), + end = cindex_vector.end(); + CindexHasher cindex_hasher; + for (; iter != end; ++iter) + ans = cindex_hasher(*iter) + kPrime * ans; + return ans; +} + + std::ostream &operator << (std::ostream &ostream, const Index &index) { return ostream << '(' << index.n << ' ' << index.t << ' ' << index.x << ')'; } diff --git a/src/nnet3/nnet-common.h b/src/nnet3/nnet-common.h index f8140e62f12..e6e3abe705e 100644 --- a/src/nnet3/nnet-common.h +++ b/src/nnet3/nnet-common.h @@ -108,6 +108,11 @@ struct CindexHasher { }; +struct CindexVectorHasher { + size_t operator () (const std::vector &cindex_vector) const; +}; + + // this will only be used for pretty-printing. void PrintCindex(std::ostream &ostream, const Cindex &cindex, diff --git a/src/nnet3/nnet-compile-test.cc b/src/nnet3/nnet-compile-test.cc index c0e1b6f8b5b..da08253093a 100644 --- a/src/nnet3/nnet-compile-test.cc +++ b/src/nnet3/nnet-compile-test.cc @@ -19,6 +19,7 @@ #include "nnet3/nnet-nnet.h" #include "nnet3/nnet-compile.h" +#include "nnet3/nnet-compile-online.h" #include "nnet3/nnet-test-utils.h" namespace kaldi { @@ -115,17 +116,74 @@ void UnitTestNnetCompileMulti() { } + +void UnitTestNnetCompileOnline() { + for (int32 n = 0; n < 20; n++) { + struct NnetGenerationOptions gen_config; + gen_config.allow_ivector = true; + + std::vector configs; + GenerateConfigSequence(gen_config, &configs); + Nnet nnet; + for (size_t j = 0; j < configs.size(); j++) { + KALDI_LOG << "Input config[" << j << "] is: " << configs[j]; + std::istringstream is(configs[j]); + nnet.ReadConfig(is); + } + + ComputationRequest request1, request2, request3; + int32 chunk_size_min = RandInt(5, 15); + int32 frame_subsampling_factor = RandInt(1, 3), + extra_left_context_begin = RandInt(0, 10), + extra_right_context = RandInt(0, 10), + num_sequences = RandInt(1, 2); + int32 chunk_size = GetChunkSize(nnet, frame_subsampling_factor, + chunk_size_min), + ivector_period = chunk_size; + + + + ModifyNnetIvectorPeriod(ivector_period, &nnet); + KALDI_LOG << "Nnet info after modifying ivector period is: " + << nnet.Info(); + CreateOnlineComputationRequestSimple( + nnet, chunk_size, frame_subsampling_factor, + ivector_period, extra_left_context_begin, extra_right_context, + num_sequences, &request1, &request2, &request3); + + KALDI_LOG << "Computation request 1 is:"; + request1.Print(std::cerr); + KALDI_LOG << "Computation request 2 is:"; + request2.Print(std::cerr); + KALDI_LOG << "Computation request 3 is:"; + request3.Print(std::cerr); + + NnetOptimizeOptions optimize_opts; + // todo: set optimize-online=true. + NnetComputation computation; + CompileOnline(nnet, optimize_opts, + request1, request2, request3, + &computation); + KALDI_LOG << "Compiled online computation is "; + computation.Print(std::cerr, nnet); + } +} + + + } // namespace nnet3 } // namespace kaldi int main() { using namespace kaldi; using namespace kaldi::nnet3; - // SetVerboseLevel(2); + SetVerboseLevel(2); + UnitTestNnetCompileOnline(); UnitTestNnetCompile(); UnitTestNnetCompileMulti(); + KALDI_LOG << "Nnet tests succeeded."; return 0; diff --git a/src/nnet3/nnet-compile.h b/src/nnet3/nnet-compile.h index 36fcf84fbf1..20114206ceb 100644 --- a/src/nnet3/nnet-compile.h +++ b/src/nnet3/nnet-compile.h @@ -1,6 +1,6 @@ // nnet3/nnet-compile.h -// Copyright 2015 Johns Hopkins University (author: Daniel Povey) +// Copyright 2015-2016 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // diff --git a/src/nnet3/nnet-computation-graph.cc b/src/nnet3/nnet-computation-graph.cc index 422a14bfe4c..1761dd1b775 100644 --- a/src/nnet3/nnet-computation-graph.cc +++ b/src/nnet3/nnet-computation-graph.cc @@ -956,7 +956,7 @@ void ComputationGraphBuilder::ComputeRequiredArray( end = dependencies.end(); for (; iter != end; ++iter) { int32 d = *iter; - if (!(*required)[d - start_cindex_id]){ + if (d >= start_cindex_id && !(*required)[d - start_cindex_id]){ (*required)[d - start_cindex_id] = true; queue.push_back(d); } diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index af2267577cc..907bd2633d8 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -596,7 +596,7 @@ static void PrintCommand(std::ostream &os, os << "[no-op]\n"; break; case kNoOperationMarker: - os << "# begin backward commands\n"; + os << "# computation segment separator [e.g., begin backward commands]\n"; break; default: KALDI_ERR << "Un-handled command type."; diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h index 3e309e18915..6ebffcf1d50 100644 --- a/src/nnet3/nnet-example-utils.h +++ b/src/nnet3/nnet-example-utils.h @@ -51,7 +51,7 @@ void ShiftExampleTimes(int32 t_offset, /** This function takes a NnetExample (which should already have been frame-selected, if desired, and merged into a minibatch) and produces a - ComputationRequest. It ssumes you don't want the derivatives w.r.t. the + ComputationRequest. It assumes you don't want the derivatives w.r.t. the inputs; if you do, you can create/modify the ComputationRequest manually. Assumes that if need_model_derivative is true, you will be supplying derivatives w.r.t. all outputs. diff --git a/src/nnet3/nnet-nnet.h b/src/nnet3/nnet-nnet.h index 16e8333d5b1..e999f20f4f5 100644 --- a/src/nnet3/nnet-nnet.h +++ b/src/nnet3/nnet-nnet.h @@ -249,7 +249,17 @@ class Nnet { void ResetGenerators(); // resets random-number generators for all // random components. You must also set srand() for this to be // effective. - + + + // This function outputs to "config_lines" the lines of a config file. If you + // provide include_dim=false, this will enable you to reconstruct the nodes in + // the network (but not the components, which need to be written separately). + // If you provide include_dim=true, it also adds extra information about + // node dimensions which is useful for a human reader but won't be + // accepted as the config-file format. + void GetConfigLines(bool include_dim, + std::vector *config_lines) const; + private: void Destroy(); @@ -261,14 +271,6 @@ class Nnet { // include dimension information that would not be provided in a config file. std::string GetAsConfigLine(int32 node_index, bool include_dim) const; - // This function outputs to "config_lines" the lines of a config file. If you - // provide include_dim=false, this will enable you to reconstruct the nodes in - // the network (but not the components, which need to be written separately). - // If you provide include_dim=true, it also adds extra information about - // node dimensions which is useful for a human reader but won't be - // accepted as the config-file format. - void GetConfigLines(bool include_dim, - std::vector *config_lines) const; // This function is used when reading config files; it exists in order to // handle replacement of existing nodes. The two input vectors have the same diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index ba8c056f418..3b7dda18e96 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -1661,5 +1661,153 @@ void LimitDerivativeTimes(const Nnet &nnet, limiter.LimitDerivTimes(); } + +class ComputationOnlineOptimizer { + public: + ComputationOnlineOptimizer(const Nnet &nnet, + NnetComputation *computation): + nnet_(nnet), computation_(computation) { } + bool Optimize(); + + private: + // This function creates a mapping from a matrix-index > 0, + // to a pair (time_offset, unique_id) that represents the debug-info + // for that matrix-id in computation.debug_info. + // The output vector is indexed by the matrix-index in the computation (the + // zeroth member is not valid). It requires that the + // The 'time_offset' is equal to the 't' value of the zeroth element of the + // cindexes vetor. The 'unique_id' is an integer that uniquely identifies + // what we get from subtracting the 'time_offset' from each 't' value of + // that 'cindexes' vector, and then pairing it up with the 'is_deriv' + // value of the DebugInfo. That is, if two 'cindexes' vectors differ only + // by a time offset, and the 'is_deriv' values are the same they will map to the same + // unique_id. + static void CreateMatrixPairs(const NnetComputation &computation, + std::vector > *matrix_to_pair); + + + /// Given a list of command indexes ('segment_end_commands') which are + /// expected to be command indexes of the kNoOperationMarker at segment + /// boundaries, this function outputs for each of these command indexes a list + /// of matrices which are 'active' at that point in time. By 'active' we mean + /// that the matrix has been written to before that time (note, we don't count + /// initialization with zeros as being written to); and will be read after + /// that time. These is the list of matrices that 'need to be in scope' + /// at those points in time. '*active_matrices' is indexed by the + /// same index as 'segment_end_commands', and is then a list of active + /// matrices, in numerical order of matrix index. + static void FindActiveMatrices(const NnetComputation &computation, + const std::vector &segment_end_commands, + const Analyzer &analyzer, + std::vector > *active_matrices); + + + const Nnet &nnet_; + NnetComputation *computation_; + Analyzer analyzer_; + std::vector > matrix_to_pair_; + + std::vector segment_end_commands_; + + +}; + + +// static +void ComputationOnlineOptimizer::CreateMatrixPairs( + const NnetComputation &computation, + std::vector > *matrix_to_pair) { + typedef unordered_map, int32, + CindexVectorHasher> MapType; + int32 cur_vector_id = 1; + // Note: cindex_map just maps the vector to a unique value, + // and then we manually work out a unique id that takes into + // account the 'is_deriv' values. + MapType cindex_map; + int32 num_matrices = computation.matrices.size(); + matrix_to_pair->resize(num_matrices); + KALDI_ASSERT(computation.matrix_debug_info.size() == num_matrices); + for (int32 m = 1; m < num_matrices; m++) { + KALDI_ASSERT(!computation.matrix_debug_info[m].cindexes.empty()); + std::vector cindexes = computation.matrix_debug_info[m].cindexes; + int32 t_offset = cindexes[0].second.t; + for (std::vector::iterator iter = cindexes.begin(); + iter != cindexes.end(); ++iter) + iter->second.t -= t_offset; + MapType::const_iterator iter = cindex_map.find(cindexes); + int32 vector_id; + if (iter != cindex_map.end()) { + vector_id = iter->second; + } else { + vector_id = cur_vector_id++; + cindex_map[cindexes] = vector_id; + } + bool is_deriv = computation.matrix_debug_info[m].is_deriv; + int32 unique_id = 2 * vector_id + (is_deriv ? 1 : 0); + (*matrix_to_pair)[m].first = t_offset; + (*matrix_to_pair)[m].second = unique_id; + } +} + + +// static +void ComputationOnlineOptimizer::FindActiveMatrices( + const NnetComputation &computation, + const std::vector &segment_end_commands, + const Analyzer &analyzer, + std::vector > *active_matrices) { + int32 num_matrices = computation.matrices.size(); + int32 num_segments = segment_end_commands.size(); + active_matrices->clear(); + active_matrices->resize(num_segments); + // this object just makes available some extra functions. + ComputationAnalysis analysis(computation, analyzer); + for (int32 s = 0; s + 1 < num_segments; s++) { + KALDI_ASSERT(segment_end_commands[s] < segment_end_commands[s+1]); + } + // the following vector gives us, for each matrix index, a submatrix index + // that covers the whole of that matrix (needed by interface of 'analysis' object). + std::vector whole_submatrices; + computation.GetWholeSubmatrices(&whole_submatrices); + for (int32 m = 1; m < num_matrices; m++) { + // the following are command indexes, comparable with the indexes + // in 'segment_end_commands'. + int32 s = whole_submatrices[m]; // submatrix consisting of the whole of + // 'm'. + int32 first_access = analysis.FirstAccess(s), + last_access = analysis.LastAccess(s); + std::vector::const_iterator iter = segment_end_commands.begin(), + end = segment_end_commands.end(); + for (; iter != end; ++iter) { + int32 segment_end = *iter; + if (first_access < segment_end && last_access > segment_end) { + // TODO. + } + } + } + +} + +bool ComputationOnlineOptimizer::Optimize() { + analyzer_.Init(nnet_, *computation_); + KALDI_ASSERT(!computation_->matrix_debug_info.empty() && + "You must request matrix debug info when compiling " + "online computations."); + + // TODO. + + return false; +} + + +bool OptimizeOnlineComputation(const Nnet &nnet, + NnetComputation *computation) { + ComputationOnlineOptimizer optimizer(nnet, computation); + return optimizer.Optimize(); +} + + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index 0d3a03be224..11a04354016 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -507,7 +507,6 @@ void LimitDerivativeTimes(const Nnet &nnet, int32 max_deriv_time, NnetComputation *computation); - /// This function detects submatrices, matrices, and members of indexes_multi /// and indexes that are never used (e.g. due to changes made in other /// optimization code), and removes them from the computation by way of suitable @@ -533,7 +532,6 @@ void IdentifySubmatrixArgs(NnetComputation::Command *command, void IdentifySubmatrixArgs(std::vector *commands, std::vector *submatrix_args); - /// This function outputs to "submatrix_args" the addresses of integers in /// 'computation' that correspond to submatrices. These may be present in /// 'commands', and in 'indexes_multi'. This is useful in renumbering code. @@ -568,7 +566,18 @@ void IdentifyIndexesArgs(std::vector *commands, void IdentifyIndexesRangesArgs(std::vector *commands, std::vector *indexes_ranges_args); - +/// This function tries to optimize computation 'computation' for an 'online' +/// computation. It expects as input a computation with no backprop but with +/// multiple 'segments' separated by command kNoOperation, where each segment +/// corresponds to a new chunk of input and output. It tries to locate a pair +/// of segment boundaries, with command indexes c1 and c2, where the active +/// matrices have the same debug-info other than a time offset and can be +/// identified with each other, and the no-op command at c2 can be replaced with +/// 'got c1', creating a computation that 'goes on forever'. +/// It returns true if it successfully did this. [If this happens, the +/// whole computation may have to be regenerated with more segments.] +bool OptimizeOnlineComputation(const Nnet &nnet, + NnetComputation *computation); /* diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index 127b30b5278..a0208343f7d 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -1,7 +1,7 @@ // nnet3/nnet-optimize.h -// Copyright 2015 Johns Hopkins University (author: Daniel Povey) -// 2015 Xiaohui Zhang +// Copyright 2015-2016 Johns Hopkins University (author: Daniel Povey) +// 2015 Xiaohui Zhang // See ../../COPYING for clarification regarding multiple authors // diff --git a/src/nnet3/nnet-test-utils.cc b/src/nnet3/nnet-test-utils.cc index e02b9cd139a..ae0481fa332 100644 --- a/src/nnet3/nnet-test-utils.cc +++ b/src/nnet3/nnet-test-utils.cc @@ -104,11 +104,15 @@ void GenerateConfigSequenceSimple( splice_context.push_back(0); int32 input_dim = 10 + Rand() % 20, - spliced_dim = input_dim * splice_context.size(), output_dim = (opts.output_dim > 0 ? opts.output_dim : 100 + Rand() % 200), hidden_dim = 40 + Rand() % 50; + int32 ivector_dim = 10 + Rand() % 20; + if (RandInt(0, 1) == 0 || !opts.allow_ivector) + ivector_dim = 0; + int32 spliced_dim = input_dim * splice_context.size() + ivector_dim; + bool use_final_nonlinearity = (opts.allow_final_nonlinearity && RandInt(0, 1) == 0); os << "component name=affine1 type=NaturalGradientAffineComponent input-dim=" @@ -127,8 +131,12 @@ void GenerateConfigSequenceSimple( } } os << "input-node name=input dim=" << input_dim << std::endl; + if (ivector_dim != 0) + os << "input-node name=ivector dim=" << ivector_dim << std::endl; os << "component-node name=affine1_node component=affine1 input=Append("; + if (ivector_dim != 0) + os << "ReplaceIndex(ivector, t, 0), "; for (size_t i = 0; i < splice_context.size(); i++) { int32 offset = splice_context[i]; os << "Offset(input, " << offset << ")"; diff --git a/src/nnet3/nnet-test-utils.h b/src/nnet3/nnet-test-utils.h index d2034dcfdc6..b6976f70ab1 100644 --- a/src/nnet3/nnet-test-utils.h +++ b/src/nnet3/nnet-test-utils.h @@ -39,6 +39,7 @@ struct NnetGenerationOptions { bool allow_multiple_outputs; bool allow_final_nonlinearity; bool allow_use_of_x_dim; + bool allow_ivector; // if set to a value >0, the output-dim of the network // will be set to this value. int32 output_dim; @@ -52,6 +53,7 @@ struct NnetGenerationOptions { allow_multiple_outputs(false), allow_final_nonlinearity(true), allow_use_of_x_dim(true), + allow_ivector(false), output_dim(-1) { } }; diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index ff963208bfe..ed20257c7fe 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -74,7 +74,7 @@ void EvaluateComputationRequest( if (GetVerboseLevel() >= 2) { std::ostringstream graph_pretty; graph.Print(graph_pretty, nnet.GetNodeNames()); - KALDI_VLOG(2) << "Graph is " << graph_pretty.str(); + KALDI_VLOG(3) << "Graph is " << graph_pretty.str(); } } @@ -103,9 +103,16 @@ static void ComputeSimpleNnetContextForShift( input.indexes.push_back(Index(n, t)); output.indexes.push_back(Index(n, t)); } - // the assumption here is that the network just requires the ivector at time - // t=0. - ivector.indexes.push_back(Index(n, 0)); + + // most networks will just require the ivector at time t = 0, + // but this might not always be the case, and some might use rounding + // descriptors with the iVector which might require it at an earlier + // frame than the regular input, so we provide the iVector in as wide a range + // as it might possibly be needed. + for (int32 t = input_start - nnet.Modulus(); t < input_end; t++) { + ivector.indexes.push_back(Index(n, t)); + } + ComputationRequest request; request.inputs.push_back(input); diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index 1e0dcefd703..bef783886e2 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -97,7 +97,7 @@ bool IsSimpleNnet(const Nnet &nnet); void ZeroComponentStats(Nnet *nnet); -/// ComputeNnetContext computes the left-context and right-context of a nnet. +/// ComputeSimpleNnetContext computes the left-context and right-context of a nnet. /// The nnet must satisfy IsSimpleNnet(nnet). /// /// It does this by constructing a ComputationRequest with a certain number of inputs @@ -180,7 +180,7 @@ void ConvertRepeatedToBlockAffine(Nnet *nnet); /// Info() function (we need this in the CTC code). std::string NnetInfo(const Nnet &nnet); -/// This function sets the dropout proportion in all dropout component to +/// This function sets the dropout proportion in all dropout component to /// dropout_proportion value. void SetDropoutProportion(BaseFloat dropout_proportion, Nnet *nnet); From 2a6dff406a2ab692d3b0931426ee6d695de4613a Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 7 Nov 2016 00:51:34 -0500 Subject: [PATCH 009/530] Going some way towards optimization for online decoding (identified pieces to splice). --- src/nnet3/nnet-optimize-utils.cc | 371 +++++++++++++++++++++++++++++-- src/nnet3/nnet-optimize.cc | 55 +++-- src/nnet3/nnet-optimize.h | 7 +- 3 files changed, 390 insertions(+), 43 deletions(-) diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index 3b7dda18e96..d2d6daf2a47 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -1670,8 +1670,13 @@ class ComputationOnlineOptimizer { bool Optimize(); private: + + // Figures out the time shift between the successive computation requests. + static int32 FindTimeShift(const NnetComputation &computation, + const std::vector &segment_ends); + // This function creates a mapping from a matrix-index > 0, - // to a pair (time_offset, unique_id) that represents the debug-info + // to a pair (unique_id, time_offset) that represents the debug-info // for that matrix-id in computation.debug_info. // The output vector is indexed by the matrix-index in the computation (the // zeroth member is not valid). It requires that the @@ -1682,10 +1687,83 @@ class ComputationOnlineOptimizer { // value of the DebugInfo. That is, if two 'cindexes' vectors differ only // by a time offset, and the 'is_deriv' values are the same they will map to the same // unique_id. + // The output 'matrix_to_pair' is indexed by matrix index (the zeroth element is + // not set). static void CreateMatrixPairs(const NnetComputation &computation, std::vector > *matrix_to_pair); + // This very simple helper function reverses the map 'matrix_to_pair' so we can + // do the reverse lookup. It outputs a map from pair to matrix index m, where + // 1 <= m < matrix_to_pair.size(). + static void GetPairToMatrixMap( + std::vector > &matrix_to_pair, + unordered_map, int32, PairHasher > *pair_to_matrix); + + + // Given a vector of lists, one list for each segment, of the active matrices + // at the end of that segment, this function converts those lists into a + // different representation where each matrix is reprented as a pair instead + // of as a single int32. 'active_pairs' will have the same dimensions as + // 'active_matrices'. + static void ConvertListsToPairLists( + const std::vector > &active_matrices, + const std::vector > &matrix_to_pair, + std::vector > > *active_pairs); + + // This function modifies the lists of active matrices per segment + // (represented as pairs) in 'active_pairs' by sorting them and + // then subtracting the time-offset of the first pair in each + // list ((*active_pair)[seg][0].second), from all elements in that list. + // It puts the subtracted offset in (*time_offsets)[seg]. This change + // of representation makes it easy to tell whether the sets of active + // matrices for different segments are identical up to a time-offset. + static void NormalizePairLists( + std::vector > > *active_pairs, + std::vector *time_offsets); + + // This function looks in the matrix 'active_pairs' for the first pair of + // identical values, i.e. it is looking for i < j for which + // normalized_active_pairs[i] == normalized_active_pairs[j]. If there + // is such a pair it outputs them to *seg1 and *seg2, and returns true; + // otherwise it returns false. + // + // Update to the above: It turns out that under some circumstances, the + // original function found repeats that were not "really" repeats (the + // matrices were not time shifted) The situation was a bit obscure (it was a + // non-recurrent setup with a lot of extra-right-context, where some inputs + // were never used), but to prevent it happening again we are now checking + // in addition to the above, that the time-shift between the segments + // (i.e. time_offsets[j] - time_offsets[i]), has the "expected value" + // based on the assumption that each segment should be shifted relative + // to the previous segment, by 'time_shift_per_segment'. + static bool FindFirstRepeat( + const std::vector > > &normalized_active_pairs, + const std::vector &time_offsets, + int32 time_shift_per_segment, + int32 *seg1, int32 *seg2); + + // Converts a list of pairs (e.g. one of the elements of the output of + // 'ConvertListsToPairLists)', back into a list of matrix indexes, using the + // map 'pair_to_matrix'. + static void PairListToMatrixList( + const std::vector > &pair_list, + const unordered_map, int32, PairHasher > &pair_to_matrix, + std::vector *matrix_list); + + + // This function just does some checking (via asserts), that + // the lists of matrices 'list1' and 'list2' are of the same length, + // that time_difference > 0, that each matrix with index m = list2[i] is of the + // same dimension as the list1[i], with Cindexes that are the same except for + // the time index being greater by 'time_difference' + static void CheckIdentifiedMatrices( + const NnetComputation &computation, + const std::vector &list1, + const std::vector &list2, + int32 time_difference); + + /// Given a list of command indexes ('segment_end_commands') which are /// expected to be command indexes of the kNoOperationMarker at segment /// boundaries, this function outputs for each of these command indexes a list @@ -1697,8 +1775,8 @@ class ComputationOnlineOptimizer { /// same index as 'segment_end_commands', and is then a list of active /// matrices, in numerical order of matrix index. static void FindActiveMatrices(const NnetComputation &computation, - const std::vector &segment_end_commands, const Analyzer &analyzer, + const std::vector &segment_end_commands, std::vector > *active_matrices); @@ -1713,6 +1791,56 @@ class ComputationOnlineOptimizer { }; +// static +int32 ComputationOnlineOptimizer::FindTimeShift( + const NnetComputation &computation, + const std::vector &segment_ends) { + KALDI_ASSERT(segment_ends.size() >= 3); + // Ignore the first segment as it tends to be a special case + // (it has more left context), + int32 second_segment_begin = segment_ends[0], + third_segment_begin = segment_ends[1], + fourth_segment_begin = segment_ends[2]; + int32 first_output_command_seg2 = -1, + first_output_command_seg3 = -1; + for (int32 c = second_segment_begin; c < third_segment_begin; c++) + if (computation.commands[c].command_type == kProvideOutput && + first_output_command_seg2 < 0) + first_output_command_seg2 = c; + for (int32 c = third_segment_begin; c < fourth_segment_begin; c++) + if (computation.commands[c].command_type == kProvideOutput && + first_output_command_seg3 < 0) + first_output_command_seg3 = c; + if (first_output_command_seg2 < 0 || + first_output_command_seg3 < 0) + KALDI_ERR << "Could not locate output commands for segments 2 and 3."; + const NnetComputation::Command + &command2 = computation.commands[first_output_command_seg2], + &command3 = computation.commands[first_output_command_seg3]; + int32 seg2_node = command2.arg2, seg3_node = command3.arg2; + KALDI_ASSERT(seg2_node == seg3_node); + int32 seg2_submatrix = command2.arg1, + seg3_submatrix = command3.arg1; + KALDI_ASSERT(computation.IsWholeMatrix(seg2_submatrix) && + computation.IsWholeMatrix(seg3_submatrix)); + int32 seg2_matrix = computation.submatrices[seg2_submatrix].matrix_index, + seg3_matrix = computation.submatrices[seg3_submatrix].matrix_index; + KALDI_ASSERT(computation.matrices[seg2_matrix].num_rows == + computation.matrices[seg3_matrix].num_rows); + KALDI_ASSERT(!computation.matrix_debug_info.empty()); + const NnetComputation::MatrixDebugInfo + &debug_info2 = computation.matrix_debug_info[seg2_matrix], + &debug_info3 = computation.matrix_debug_info[seg3_matrix]; + int32 t_offset = debug_info3.cindexes[0].second.t - + debug_info2.cindexes[0].second.t; + int32 num_rows = debug_info2.cindexes.size(); + for (int32 r = 0; r < num_rows; r++) { + KALDI_ASSERT(debug_info3.cindexes[r].second.t == + debug_info2.cindexes[r].second.t + t_offset); + } + return t_offset; +} + // static void ComputationOnlineOptimizer::CreateMatrixPairs( const NnetComputation &computation, @@ -1744,27 +1872,135 @@ void ComputationOnlineOptimizer::CreateMatrixPairs( } bool is_deriv = computation.matrix_debug_info[m].is_deriv; int32 unique_id = 2 * vector_id + (is_deriv ? 1 : 0); - (*matrix_to_pair)[m].first = t_offset; - (*matrix_to_pair)[m].second = unique_id; + (*matrix_to_pair)[m].first = unique_id; + (*matrix_to_pair)[m].second = t_offset; + } +} + +// static +void ComputationOnlineOptimizer::GetPairToMatrixMap( + std::vector > &matrix_to_pair, + unordered_map, int32, PairHasher > *pair_to_matrix) { + int32 num_matrices = matrix_to_pair.size(); + // actually there are one fewer matrices than num_matrices. + pair_to_matrix->clear(); + for (int32 m = 1; m < num_matrices; m++) + (*pair_to_matrix)[matrix_to_pair[m]] = m; +} + + +// static +void ComputationOnlineOptimizer::ConvertListsToPairLists( + const std::vector > &active_matrices, + const std::vector > &matrix_to_pair, + std::vector > > *active_pairs) { + active_pairs->clear(); + active_pairs->resize(active_matrices.size()); + int32 num_matrices = matrix_to_pair.size(); + for (size_t seg = 0; seg < active_matrices.size(); seg++) { + const std::vector &this_active_matrix_list = active_matrices[seg]; + std::vector > &this_active_pair_list = + (*active_pairs)[seg]; + this_active_pair_list.resize(this_active_matrix_list.size()); + std::vector::const_iterator iter = this_active_matrix_list.begin(), + end = this_active_matrix_list.end(); + std::vector >::iterator + out_iter = this_active_pair_list.begin(); + for (; iter != end; ++iter, ++out_iter) { + KALDI_ASSERT(*iter > 0 && *iter < num_matrices); + *out_iter = matrix_to_pair[*iter]; + } + } +} + +// static +void ComputationOnlineOptimizer::NormalizePairLists( + std::vector > > *active_pairs, + std::vector *time_offsets) { + int32 num_segments = active_pairs->size(); + time_offsets->resize(num_segments); + for (int32 seg = 0; seg < num_segments; seg++) { + std::vector > &this_pairs = (*active_pairs)[seg]; + std::sort(this_pairs.begin(), this_pairs.end()); + int32 this_offset; + if (!this_pairs.empty()) { + this_offset = this_pairs[0].second; + } else { + // if this_pairs is empty, produce arbitrary offsets that are increasing + // (this will keep some self-testing code happy). + if (seg == 0) { this_offset = 0; } + else { this_offset = (*time_offsets)[seg - 1] + 1; } + } + (*time_offsets)[seg] = this_offset; + std::vector >::iterator + iter = this_pairs.begin(), end = this_pairs.end(); + for (; iter != end; ++iter) + iter->second -= this_offset; + } +} + + +// static +bool ComputationOnlineOptimizer::FindFirstRepeat( + const std::vector > > &normalized_active_pairs, + const std::vector &time_offsets, + int32 time_shift_per_segment, + int32 *seg1, int32 *seg2) { + int32 num_segments = normalized_active_pairs.size(); + // This algorithm may seem like it would be very slow, but the number of + // segments will normally be quite small (e.g. 10), and the comparison of + // elements of 'normalized_active_pairs' should be fast in cases where they + // differ. + for (int32 s = 0; s < num_segments; s++) { + for (int32 t = s + 1; t < num_segments; t++) { + if (time_offsets[t] - time_offsets[s] == (t - s) * time_shift_per_segment + && normalized_active_pairs[s] == normalized_active_pairs[t]) { + *seg1 = s; + *seg2 = t; + return true; + } + } } + return false; } +// static +void ComputationOnlineOptimizer::PairListToMatrixList( + const std::vector > &pair_list, + const unordered_map, int32, PairHasher > &pair_to_matrix, + std::vector *matrix_list) { + matrix_list->resize(pair_list.size()); + std::vector >::const_iterator + iter = pair_list.begin(), end = pair_list.end(); + std::vector::iterator out_iter = matrix_list->begin(); + for (; iter != end; ++iter, ++out_iter) { + unordered_map, int32, + PairHasher >::const_iterator + map_iter = pair_to_matrix.find(*iter); + if (map_iter == pair_to_matrix.end()) { + KALDI_ERR << "Could not find pair in map (code error)"; + } + *out_iter = map_iter->second; + } +} + + // static void ComputationOnlineOptimizer::FindActiveMatrices( const NnetComputation &computation, - const std::vector &segment_end_commands, const Analyzer &analyzer, + const std::vector &segment_end_commands, std::vector > *active_matrices) { int32 num_matrices = computation.matrices.size(); int32 num_segments = segment_end_commands.size(); active_matrices->clear(); active_matrices->resize(num_segments); - // this object just makes available some extra functions. + // this object just makes available some extra functions, vs. the Analyzer + // object. ComputationAnalysis analysis(computation, analyzer); - for (int32 s = 0; s + 1 < num_segments; s++) { - KALDI_ASSERT(segment_end_commands[s] < segment_end_commands[s+1]); - } + KALDI_ASSERT(IsSortedAndUniq(segment_end_commands)); + // the following vector gives us, for each matrix index, a submatrix index // that covers the whole of that matrix (needed by interface of 'analysis' object). std::vector whole_submatrices; @@ -1772,31 +2008,130 @@ void ComputationOnlineOptimizer::FindActiveMatrices( for (int32 m = 1; m < num_matrices; m++) { // the following are command indexes, comparable with the indexes // in 'segment_end_commands'. - int32 s = whole_submatrices[m]; // submatrix consisting of the whole of + int32 s = whole_submatrices[m], // submatrix consisting of the whole of // 'm'. - int32 first_access = analysis.FirstAccess(s), + first_access = analysis.FirstAccess(s), last_access = analysis.LastAccess(s); - std::vector::const_iterator iter = segment_end_commands.begin(), - end = segment_end_commands.end(); - for (; iter != end; ++iter) { - int32 segment_end = *iter; + for (int32 seg = 0; seg < num_segments; seg++) { + int32 segment_end = segment_end_commands[seg]; if (first_access < segment_end && last_access > segment_end) { - // TODO. + // If the block of time during which the matrix is accessed, includes + // this segment end-point, then the matrix is considered 'active' at + // that time. + (*active_matrices)[seg].push_back(m); } } } +} +// static +void ComputationOnlineOptimizer::CheckIdentifiedMatrices( + const NnetComputation &computation, + const std::vector &list1, + const std::vector &list2, + int32 time_difference) { + KALDI_ASSERT(time_difference > 0); + KALDI_ASSERT(list1.size() == list2.size()); + KALDI_ASSERT(!computation.matrix_debug_info.empty()); + for (size_t i = 0; i < list1.size(); i++) { + int32 m1 = list1[i], m2 = list2[i]; + const NnetComputation::MatrixInfo + &matrix_info1 = computation.matrices[m1], + &matrix_info2 = computation.matrices[m2]; + KALDI_ASSERT(matrix_info1.num_rows == matrix_info2.num_rows && + matrix_info1.num_cols == matrix_info2.num_cols && + matrix_info1.stride_type == matrix_info2.stride_type); + const NnetComputation::MatrixDebugInfo + &debug_info1 = computation.matrix_debug_info[m1], + &debug_info2 = computation.matrix_debug_info[m2]; + KALDI_ASSERT(debug_info1.is_deriv == debug_info2.is_deriv); + KALDI_ASSERT(debug_info1.cindexes.size() == debug_info2.cindexes.size()); + std::vector::const_iterator iter1 = debug_info1.cindexes.begin(), + end1 = debug_info1.cindexes.end(), + iter2 = debug_info2.cindexes.begin(); + for (; iter1 != end1; iter1++,iter2++) { + KALDI_ASSERT(iter2->first == iter1->first && + iter2->second.n == iter1->second.n && + iter2->second.t == iter1->second.t + time_difference && + iter2->second.x == iter1->second.x); + } + } } + bool ComputationOnlineOptimizer::Optimize() { analyzer_.Init(nnet_, *computation_); KALDI_ASSERT(!computation_->matrix_debug_info.empty() && "You must request matrix debug info when compiling " "online computations."); - // TODO. + // get the indexes of the separator commands at the ends of segments. + std::vector segment_ends; + GetSegmentEnds(*computation_, &segment_ends); + int32 time_shift_per_segment = FindTimeShift(*computation_, + segment_ends); + + // Ignore the end of the very last segment- it is not a candidate for a + // 'splice point'. What we're doing here is like creating a tape loop; we + // have to find a place where the list of variables is the same except for a + // time offset. + // [note: it's not exactly like a tape loop because the prologue can + // vary... the sequence is of the form like a b b b b b .. ] + segment_ends.pop_back(); + + + std::vector > active_matrices; + // Find the list of matrices active at each of those segment-end-command + // times. + FindActiveMatrices(*computation_, analyzer_, segment_ends, + &active_matrices); + + // Find a representation of the matrices of the computation as pairs + // (unique_id, time_offset) that are more amenable to finding + // matrices that represet lists of Cindexes that differ only by + // a time offset. + std::vector > matrix_to_pair; + CreateMatrixPairs(*computation_, &matrix_to_pair); + + // Create the reverse map from pair to matrix index; we'll need it. + unordered_map, int32, PairHasher > pair_to_matrix; + GetPairToMatrixMap(matrix_to_pair, &pair_to_matrix); + + // get lists of matrix per segment in the pair representation. + std::vector > > pair_lists; + ConvertListsToPairLists(active_matrices, matrix_to_pair, + &pair_lists); + + std::vector time_offsets; + NormalizePairLists(&pair_lists, &time_offsets); + + int32 seg1, seg2; + + if (!FindFirstRepeat(pair_lists, + time_offsets, + time_shift_per_segment, + &seg1, &seg2)) { + KALDI_VLOG(2) << "Could not find repeats of variables."; + return false; + } - return false; + // reverse the normalization for segments seg1 and seg2. + for (size_t i = 0; i < pair_lists[seg1].size(); i++) + pair_lists[seg1][i].second += time_offsets[seg1]; + for (size_t i = 0; i < pair_lists[seg2].size(); i++) + pair_lists[seg2][i].second += time_offsets[seg2]; + std::vector seg1_matrices, seg2_matrices; + PairListToMatrixList(pair_lists[seg1], pair_to_matrix, &seg1_matrices); + PairListToMatrixList(pair_lists[seg2], pair_to_matrix, &seg2_matrices); + + int32 time_difference = time_offsets[seg2] - time_offsets[seg1]; + CheckIdentifiedMatrices(*computation_, seg1_matrices, seg2_matrices, + time_difference); + + // HERE, do whatever kind of identification we have to do between the two + // lists of matrices. + + return true; } diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index e2022d596ef..824b467174e 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -407,9 +407,6 @@ void ConvertAdditionToAssignment(const Nnet &nnet, void Optimize(const NnetOptimizeOptions &config, const Nnet &nnet, NnetComputation *computation) { - if (!config.optimize) - return; - if (GetVerboseLevel() >= 4) CheckComputation(nnet, *computation, true); @@ -421,42 +418,52 @@ void Optimize(const NnetOptimizeOptions &config, if (GetVerboseLevel() >= 4) CheckComputation(nnet, *computation, true); - if (config.consolidate_model_update) + if (config.optimize && config.consolidate_model_update) ConsolidateModelUpdate(nnet, computation); if (GetVerboseLevel() >= 4) CheckComputation(nnet, *computation, true); - if (config.convert_addition) + if (config.optimize && config.convert_addition) { ConvertAdditionToAssignment(nnet, computation); + if (GetVerboseLevel() >= 4) + CheckComputation(nnet, *computation, true); + } - if (GetVerboseLevel() >= 4) - CheckComputation(nnet, *computation, true); - - if (config.remove_assignments || config.backprop_in_place || - config.propagate_in_place) + if (config.optimize && + (config.remove_assignments || config.backprop_in_place || + config.propagate_in_place)) { VariableMergingOptimization(config, nnet, computation); + if (GetVerboseLevel() >= 4) + CheckComputation(nnet, *computation, false); + } - if (GetVerboseLevel() >= 4) - CheckComputation(nnet, *computation, false); - - if (config.initialize_undefined) + if (config.optimize && config.initialize_undefined) { RemoveUnnecessaryZeroing(nnet, computation); + if (GetVerboseLevel() >= 4) + CheckComputation(nnet, *computation, false); + } - if (GetVerboseLevel() >= 4) - CheckComputation(nnet, *computation, false); - - if (config.move_sizing_commands) + if (config.optimize && config.move_sizing_commands) { MoveSizingCommands(nnet, computation); + if (GetVerboseLevel() >= 4) + CheckComputation(nnet, *computation, false); + } - if (GetVerboseLevel() >= 4) - CheckComputation(nnet, *computation, false); + // the online computation optimization has to go before + // 'RemoveUnnecessaryAllocation()'. We don't gate this by 'config.optimize' + // because it's necessary for online computation to run. + if (config.optimize_online_computation){ + OptimizeOnlineComputation(nnet, computation); + if (GetVerboseLevel() >= 4) + CheckComputation(nnet, *computation, false); + } - if (config.allocate_from_other) + if (config.optimize && config.allocate_from_other) { RemoveUnnecessaryAllocation(nnet, computation); - - if (GetVerboseLevel() >= 4) - CheckComputation(nnet, *computation, false); + if (GetVerboseLevel() >= 4) + CheckComputation(nnet, *computation, false); + } // The following is not configurable because it is necessary for // the computation to run correctly (we do it after compilation too, diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index a0208343f7d..303b08a4150 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -46,6 +46,10 @@ struct NnetOptimizeOptions { bool allocate_from_other; int32 min_deriv_time; int32 max_deriv_time; + // optimize_online_computation is a 'hidden config' not available from + // the command line; it's set to true to enable the optimization for + // online computation that turns a linear computation into a loop. + bool optimize_online_computation; NnetOptimizeOptions(): optimize(true), consolidate_model_update(true), @@ -59,7 +63,8 @@ struct NnetOptimizeOptions { move_sizing_commands(true), allocate_from_other(true), min_deriv_time(std::numeric_limits::min()), - max_deriv_time(std::numeric_limits::max()) { } + max_deriv_time(std::numeric_limits::max()), + optimize_online_computation(false) { } void Register(OptionsItf *opts) { opts->Register("optimize", &optimize, "Set this to false to turn off all " From 5cba3b8f523763605db38f5eacce1a1245242ca1 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 10 Nov 2016 21:19:46 -0500 Subject: [PATCH 010/530] Get the online optimization code working to the point where the tests run. --- src/nnet3/nnet-analyze.cc | 132 ++++++++++-------- src/nnet3/nnet-analyze.h | 21 ++- src/nnet3/nnet-compile-test.cc | 2 +- src/nnet3/nnet-computation.cc | 16 +++ src/nnet3/nnet-computation.h | 8 +- src/nnet3/nnet-optimize-utils.cc | 226 ++++++++++++++++++++++++++++--- src/nnet3/nnet-optimize-utils.h | 14 +- src/nnet3/nnet-optimize.cc | 40 +++++- 8 files changed, 371 insertions(+), 88 deletions(-) diff --git a/src/nnet3/nnet-analyze.cc b/src/nnet3/nnet-analyze.cc index e53f46198b6..d76624b93a1 100644 --- a/src/nnet3/nnet-analyze.cc +++ b/src/nnet3/nnet-analyze.cc @@ -212,7 +212,7 @@ std::string ComputationVariables::DescribeVariable(int32 variable) const { num_column_variables = column_split_points_[matrix_index].size() - 1, num_row_variables = row_split_points_[matrix_index].size() - 1, column_variable = offset % num_column_variables, - row_variable = offset / num_row_variables; + row_variable = offset / num_column_variables; KALDI_ASSERT(column_variable >= 0 && row_variable >= 0 && row_variable < num_row_variables && column_variable < num_column_variables); @@ -381,6 +381,8 @@ void ComputeCommandAttributes( } case kNoOperation: case kNoOperationMarker: + case kNoOperationLabel: + case kGotoLabel: break; default: KALDI_ERR << "Unknown command type."; @@ -558,7 +560,6 @@ ComputationChecker::ComputationChecker( void ComputationChecker::Check() { CheckComputationIndexes(); a_.Init(nnet_, computation_); - CheckComputationOrder(); CheckComputationMatrixAccesses(); CheckComputationUndefined(); CheckComputationDebugInfo(); @@ -580,8 +581,12 @@ void ComputationChecker::CheckComputationRewrite() const { for (int32 v = 0; v < num_variables; v++) { const std::vector &accesses = a_.variable_accesses[v]; if (accesses.empty()) { - KALDI_ERR << "Variable " << v << " = " << a_.variables.DescribeVariable(v) - << "is never used."; + if (config_.check_unused_variables) { + KALDI_ERR << "Variable " << v << " = " << a_.variables.DescribeVariable(v) + << " is never used."; + } else { + continue; + } } int32 num_accesses = accesses.size(); int32 first_pure_read = -1; @@ -597,8 +602,8 @@ void ComputationChecker::CheckComputationRewrite() const { if (accesses[access].access_type != kReadAccess) { KALDI_ERR << "Variable " << v << " = " << a_.variables.DescribeVariable(v) - << "is modified after being read " - << "(this is not expected before optimization)"; + << " is modified after being read" + << " (this is not expected before optimization)"; } } } @@ -613,13 +618,17 @@ void ComputationChecker::CheckComputationUndefined() const { int32 num_variables = a_.variable_accesses.size(); for (int32 v = 0; v < num_variables; v++) { const std::vector &accesses = a_.variable_accesses[v]; - if (accesses.empty()) - KALDI_ERR << "Variable " << v << " == " - << a_.variables.DescribeVariable(v) << "is never used."; - if (accesses[0].access_type != kWriteAccess) - KALDI_ERR << "Variable " << v << " == " - << a_.variables.DescribeVariable(v) - << " is read before it is written to"; + if (accesses.empty()) { + if (config_.check_unused_variables) { + KALDI_ERR << "Variable " << v << " == " + << a_.variables.DescribeVariable(v) << "is never used."; + } + } else { + if (accesses[0].access_type != kWriteAccess) + KALDI_ERR << "Variable " << v << " == " + << a_.variables.DescribeVariable(v) + << " is read before it is written to"; + } } } @@ -637,7 +646,7 @@ void ComputationChecker::CheckComputationMatrixAccesses() const { for (int32 matrix_index = 1; matrix_index < num_matrices; matrix_index++) { const MatrixAccesses &accesses = a_.matrix_accesses[matrix_index]; if (accesses.allocate_command == -1) - KALDI_ERR << "Matrix m" << matrix_index << "is not initialized."; + KALDI_ERR << "Matrix m" << matrix_index << " is not initialized."; if (accesses.accesses.empty()) { KALDI_ERR << "Matrix m" << matrix_index << " is never accessed."; } else if (accesses.accesses.front().command_index < @@ -917,49 +926,24 @@ void ComputationChecker::CheckComputationIndexes() const { } case kNoOperation: case kNoOperationMarker: + case kNoOperationLabel: + break; + case kGotoLabel: { + int32 label_index = c.arg1; + if (label_index < 0 || label_index >= command_index || + computation_.commands[label_index].command_type != kNoOperationLabel) + KALDI_ERR << "kGotoLabel command has invalid destination index."; break; + if (command_index + 1 != num_commands) { + KALDI_ERR << "kGotoLabel is not the last command in the computation"; + } + } default: KALDI_ERR << "Unknown command type."; } } } - -// make sure Propagate comes before kNoOperationMarker and Backprop comes after -// it, and that the value of computation_computation_end matches the position of -// kNoOpMarker. -void ComputationChecker::CheckComputationOrder() const { - int32 num_commands = computation_.commands.size(); - int32 num_markers = 0, marker_location = 0; - for (int32 c = 0; c < num_commands; c++) { - if (computation_.commands[c].command_type == - kNoOperationMarker) { - marker_location = c; - num_markers++; - } - } - if (num_markers != 1) - KALDI_ERR << "Expected exactly one kNoOperationMarker marker."; - - for (int32 c = 0; c < num_commands; c++) { - CommandType command_type = - computation_.commands[c].command_type; - if (c != marker_location && - command_type == kNoOperationMarker) - KALDI_ERR << "Found kNoOpMarker in unexpected place"; - if (c < marker_location && - (command_type == kBackprop || - command_type == kBackpropNoModelUpdate)) - KALDI_ERR << "Backprop occurs before kNoOpMarker"; - if (c > marker_location && - command_type == kPropagate) - KALDI_ERR << "Propagate occurs after kNoOpMarker"; - if (c > marker_location && - command_type == kStoreStats) - KALDI_ERR << "StoreStats occurs after kNoOpMarker"; - } -} - void ComputationChecker::CheckComputationDebugInfo() const { if (computation_.matrix_debug_info.empty()) return; if (computation_.matrix_debug_info.size() != @@ -973,15 +957,57 @@ void ComputationChecker::CheckComputationDebugInfo() const { } } -void CheckComputation(const Nnet &nnet, - const NnetComputation &computation, - bool check_rewrite) { + +// note: 'computation' is not a reference, it's copied so that we +// can modify it internally. +static void CheckComputationOnline(const Nnet &nnet, + NnetComputation computation, + bool check_rewrite) { + int32 num_commands = computation.commands.size(); + KALDI_ASSERT(computation.commands[num_commands-1].command_type == kGotoLabel); + for (int32 c = num_commands - 2; + c >= 0 && computation.commands[c].command_type == kAllocMatrixFromOther; + c--) { + // this command can be interpreted as "initialize matrix referred to by + // c.arg2 with the matrix referred to by c.arg2". + // Because this would be interpreted by the analysis code as initializing a + // matrix that has already been initialized, we turn this into a command + // that just deallocates the matrix in c.arg2. [note: all these indexes + // are actually submatrix indexes]. + computation.commands[c].command_type = kDeallocMatrix; + std::swap(computation.commands[c].arg1, computation.commands[c].arg2); + } + CheckComputationOptions opts; opts.check_rewrite = check_rewrite; + opts.check_unused_variables = false; + // We can always do this check with online computations, since they do not + // have the RemoveUnnecessaryAllocation() optimization applied. ComputationChecker checker(opts, nnet, computation); checker.Check(); } +void CheckComputation(const Nnet &nnet, + const NnetComputation &computation, + bool check_rewrite) { + try { + if (!computation.commands.empty() && + computation.commands.back().command_type == kGotoLabel) { + // Online computations need to be treated specially. + CheckComputationOnline(nnet, computation, check_rewrite); + } else { + CheckComputationOptions opts; + opts.check_rewrite = check_rewrite; + ComputationChecker checker(opts, nnet, computation); + checker.Check(); + } + } catch (...) { + computation.Print(std::cerr, nnet); + KALDI_ERR << "Computation check failed for computation printed above " + "(actual error message is above computation)"; + } +} + void ComputeMatrixToSubmatrix( const NnetComputation &computation, std::vector > *mat_to_submat) { diff --git a/src/nnet3/nnet-analyze.h b/src/nnet3/nnet-analyze.h index 7109575e415..4a827c05eb0 100644 --- a/src/nnet3/nnet-analyze.h +++ b/src/nnet3/nnet-analyze.h @@ -381,11 +381,22 @@ struct CheckComputationOptions { // do the check_rewrite check only for a non-optimized computation, it may // legitimately fail after optimization. see code for details. bool check_rewrite; - - CheckComputationOptions(): check_rewrite(false) { } + // If 'check_unused_variables' is true, it checks for unused variables + // (e.g. unused partsof matrices). We only set it false for online + // computations, where there can be instances where a part of a matrix is + // apparently never accessed (until we consider that the matrix is swapped + // with another). + bool check_unused_variables; + + CheckComputationOptions(): + check_rewrite(false), check_unused_variables(true) { } }; +// Note: this checker class does not work for online computations (that have a +// kGoto statement), but the function CheckComputation() is able to detect such +// computations and modify them in such a way that they can be checked by this +// class (and then do extra checks). class ComputationChecker { public: ComputationChecker(const CheckComputationOptions &config, @@ -395,10 +406,6 @@ class ComputationChecker { private: // various dimension consistency checks and checks on properties. void CheckComputationIndexes() const; - // make sure Propagate comes before kNoOpMarker and Backprop comes after it, - // and that the value of forward_computation_end matches the position of - // kNoOpMarker. - void CheckComputationOrder() const; // checks for a situation where an undefined variable is read. void CheckComputationUndefined() const; // checks that all writes are done before reads. details with implementation. @@ -426,6 +433,8 @@ void GetSegmentEnds(const NnetComputation &computation, /// This is a convenience interface for class ComputationChecker. Call it with /// check_rewrite = true only if the computation is pre-optimization. +/// If the computation is an 'online' computation, this function treats +/// it specially. void CheckComputation(const Nnet &nnet, const NnetComputation &computation, bool check_rewrite = false); diff --git a/src/nnet3/nnet-compile-test.cc b/src/nnet3/nnet-compile-test.cc index da08253093a..eaff78ad4c6 100644 --- a/src/nnet3/nnet-compile-test.cc +++ b/src/nnet3/nnet-compile-test.cc @@ -177,7 +177,7 @@ void UnitTestNnetCompileOnline() { int main() { using namespace kaldi; using namespace kaldi::nnet3; - SetVerboseLevel(2); + SetVerboseLevel(4); UnitTestNnetCompileOnline(); UnitTestNnetCompile(); diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index 907bd2633d8..a7b49499bb3 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -275,6 +275,10 @@ void NnetComputation::Command::Read(std::istream &is, bool binary) { command_type = kNoOperation; } else if (command_type_str == "kNoOperationMarker") { command_type = kNoOperationMarker; + } else if (command_type_str == "kNoOperationLabel") { + command_type = kNoOperationLabel; + } else if (command_type_str == "kGotoLabel") { + command_type = kGotoLabel; } else { KALDI_ERR << "Un-handled command type."; } @@ -368,6 +372,12 @@ void NnetComputation::Command::Write(std::ostream &os, bool binary) const { case kNoOperationMarker: os << "kNoOperationMarker\n"; break; + case kNoOperationLabel: + os << "kNoOperationLabel\n"; + break; + case kGotoLabel: + os << "kGotoLabel\n"; + break; default: KALDI_ERR << "Un-handled command type."; } @@ -598,6 +608,12 @@ static void PrintCommand(std::ostream &os, case kNoOperationMarker: os << "# computation segment separator [e.g., begin backward commands]\n"; break; + case kNoOperationLabel: + os << "[label for goto statement]\n"; + break; + case kGotoLabel: + os << "goto c" << c.arg1 << "\n"; + break; default: KALDI_ERR << "Un-handled command type."; } diff --git a/src/nnet3/nnet-computation.h b/src/nnet3/nnet-computation.h index ba0eaada1a0..857dde1547b 100644 --- a/src/nnet3/nnet-computation.h +++ b/src/nnet3/nnet-computation.h @@ -221,6 +221,12 @@ struct ComputationRequest { - kNoOperation: does nothing (sometimes useful during optimization) - kNoOperationMarker: does nothing, but used to mark end of a block of commands (like forward commands). + - kNoOperationLabel: does nothing, but is the destination for + the kGotoLabel command. + - kGotoLabel: jumps to the kNoOperationLabel command. arg1 must + be set to the location of that command. Since there are no + conditionals, this should be the last command, as remaining + commands will be unreachable. */ enum CommandType { @@ -230,7 +236,7 @@ enum CommandType { kMatrixCopy, kMatrixAdd, kCopyRows, kAddRows, kCopyRowsMulti, kCopyToRowsMulti, kAddRowsMulti, kAddToRowsMulti, kAddRowRanges, kAcceptInput, kProvideOutput, - kNoOperation, kNoOperationMarker }; + kNoOperation, kNoOperationMarker, kNoOperationLabel, kGotoLabel }; diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index d2d6daf2a47..7a0fafb0b5e 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -73,6 +73,8 @@ void IdentifySubmatrixArgs(NnetComputation::Command *c, break; case kNoOperation: case kNoOperationMarker: + case kNoOperationLabel: + case kGotoLabel: break; default: KALDI_ERR << "Unknown command type."; @@ -1724,19 +1726,21 @@ class ComputationOnlineOptimizer { // This function looks in the matrix 'active_pairs' for the first pair of // identical values, i.e. it is looking for i < j for which - // normalized_active_pairs[i] == normalized_active_pairs[j]. If there - // is such a pair it outputs them to *seg1 and *seg2, and returns true; - // otherwise it returns false. + // normalized_active_pairs[i] == normalized_active_pairs[j]. (However, the + // pair i,j must satisfy an extra condition, see below). If a pair + // i,j exists satisfying these conditions, this function outputs them to *seg1 + // and *seg2, and returns true; otherwise it returns false. // - // Update to the above: It turns out that under some circumstances, the - // original function found repeats that were not "really" repeats (the - // matrices were not time shifted) The situation was a bit obscure (it was a - // non-recurrent setup with a lot of extra-right-context, where some inputs - // were never used), but to prevent it happening again we are now checking - // in addition to the above, that the time-shift between the segments - // (i.e. time_offsets[j] - time_offsets[i]), has the "expected value" - // based on the assumption that each segment should be shifted relative - // to the previous segment, by 'time_shift_per_segment'. + // Extra condition: + // It turns out that under some circumstances, we can + // fine repeats that were not "really" repeats (the matrices were not time + // shifted) The situation was a bit obscure (it was a non-recurrent setup with + // a lot of extra-right-context, where some inputs were never used), but to + // prevent it happening again we are now checking in addition to the above, + // that the time-shift between the segments (i.e. time_offsets[j] - + // time_offsets[i]), has the "expected value" based on the assumption that + // each segment should be shifted relative to the previous segment, by + // 'time_shift_per_segment'. static bool FindFirstRepeat( const std::vector > > &normalized_active_pairs, const std::vector &time_offsets, @@ -1764,6 +1768,43 @@ class ComputationOnlineOptimizer { int32 time_difference); + // Given two command indexes command1 < command2 pointing to commands of type + // kNoOperationMarker, this function modifies the computation by + // removing all commands after command2, replacing command2 with a kGotoLabel + // command pointing to command1 and then inserting just before command1 + // a marker of type kNoOperationLabel. + static void FormInfiniteLoop(int32 command1, int32 command2, + NnetComputation *computation); + + // This is to be called after FormInfiniteLoop. It inserts, just before + // the final kGotoLabel command, commands that initialize + // each of the matrices in list 'matrices1' from the corresponding + // matrix in 'matrices2', using the kAllocMatrixFromOther command. + // This effectively does, for example, matrices1[i] = matrices2[i], + // while initializing matrices1[i] and deallocating matrices2[i]; + // it's implemented as a shallow swap. + // It does this in such an order that even if the two lists are + // not disjoint, the right thing happens. + static void AddMatrixSwapCommands( + const std::vector &matrices1, + const std::vector &matrices2, + NnetComputation *computation); + + + // Called from AddMatrixSwapCommands, this function figures out for us + // an acceptable order in which to execute the kAllocMatrixFromOther + // commands. This is easy to do if matrices1 and matrices2 are disjoint + // sets, but has to be done more carefully if they overlap. + // The output is a list of pairs where each pair (a, b) comes from + // from matrices1 and matrices2 in the same position, i.e. + // a = matrices1[i] and b = matrices2[i]. + static void GetMatrixSwapOrder( + const std::vector &matrices1, + const std::vector &matrices2, + std::vector > *swaps); + + + /// Given a list of command indexes ('segment_end_commands') which are /// expected to be command indexes of the kNoOperationMarker at segment /// boundaries, this function outputs for each of these command indexes a list @@ -1774,6 +1815,7 @@ class ComputationOnlineOptimizer { /// at those points in time. '*active_matrices' is indexed by the /// same index as 'segment_end_commands', and is then a list of active /// matrices, in numerical order of matrix index. + /// Note: for each i, (*active_matrices)[i] will be sorted and unique. static void FindActiveMatrices(const NnetComputation &computation, const Analyzer &analyzer, const std::vector &segment_end_commands, @@ -1951,10 +1993,22 @@ bool ComputationOnlineOptimizer::FindFirstRepeat( // segments will normally be quite small (e.g. 10), and the comparison of // elements of 'normalized_active_pairs' should be fast in cases where they // differ. + KALDI_ASSERT(num_segments >= 2); + + bool perform_time_offset_check = true; + if (normalized_active_pairs.back().empty()) { + // If there are no variables active after the end of the last-but-one segment + // (which is the last element in segment_ends, since we remove the end of the + // very last segment), then don't perform the check related to + // time-offsets, it's not relevant. [this would probably be a computation + // that doesn't require any context]. + perform_time_offset_check = false; + } for (int32 s = 0; s < num_segments; s++) { for (int32 t = s + 1; t < num_segments; t++) { - if (time_offsets[t] - time_offsets[s] == (t - s) * time_shift_per_segment - && normalized_active_pairs[s] == normalized_active_pairs[t]) { + if ((!perform_time_offset_check || + time_offsets[t]-time_offsets[s] == (t-s) * time_shift_per_segment) && + normalized_active_pairs[s] == normalized_active_pairs[t]) { *seg1 = s; *seg2 = t; return true; @@ -2059,6 +2113,114 @@ void ComputationOnlineOptimizer::CheckIdentifiedMatrices( } +// static +void ComputationOnlineOptimizer::GetMatrixSwapOrder( + const std::vector &matrices1, + const std::vector &matrices2, + std::vector > *swaps) { + KALDI_ASSERT(matrices1.size() == matrices2.size()); + swaps->clear(); + int32 num_matrices = matrices1.size(); + std::vector processed(num_matrices, false); + std::vector queue; + + // num_loops is just for infinite-loop detection. + int32 num_loops = 0; + for (; static_cast(swaps->size()) < num_matrices; num_loops++) { + for (int32 i = 0; i < num_matrices; i++) { + if (processed[i]) + continue; + int32 m1 = matrices1[i], m2 = matrices2[i]; + std::vector::const_iterator iter = + std::lower_bound(matrices2.begin(), matrices2.end(), m1); + if (iter == matrices2.end() || *iter != m1) { + // Matrix m1 does not appear in the list 'matrices2', so + // we are safe to process it at any time. + swaps->push_back(std::pair(m1, m2)); + processed[i] = true; + } else { + int32 m1_pos_in_matrices2 = iter - matrices2.begin(); + if (processed[m1_pos_in_matrices2]) { + // We're safe to do this swap now, because the matrix m1 has already + // appeared on the RHS of a swap, and by this point has been + // deallocated, in effect. + swaps->push_back(std::pair(m1, m2)); + processed[i] = true; + } + // else do nothing, we cannot process m1 yet because + // at this point in the computation it is still allocated. + } + } + // The following assert is to check that we don't loop infinitely. We can + // prove that infinite looping won't happen, after on proving that there can + // be no cycles like (m1, m2), (m2, m3), (m3, m1) (the length of 3 is chosen + // arbitrarily as an example). If such a cycle existed, we can reach a + // contradiction based on the time-index (t) of the first cindex in m1. + // Define t1 = that time index, t2 the same for m2, t3 the same for m3. The + // existence of the three pairs [as pairs like (matrices1[i], matrices2[i])] + // implies that t2 > t1, t3 > t2, and t1 > t3 respectively, but this is + // impossible. + // This shows that all chains of dependencies must terminate. + KALDI_ASSERT(num_loops <= num_matrices); + } +} + +// static +void ComputationOnlineOptimizer::AddMatrixSwapCommands( + const std::vector &matrices1, + const std::vector &matrices2, + NnetComputation *computation) { + std::vector > swaps; + // Note: in 'easy' cases where matrices1 and matrices2 are disjoint, + // 'swaps' will just be the vector { (matrices1[0],matrices2[0]), + // (matrices1[1],matrices2[1]), ... }, + // but in some cases these may need to get reordered. + GetMatrixSwapOrder(matrices1, matrices2, &swaps); + + NnetComputation::Command goto_label_command = computation->commands.back(); + KALDI_ASSERT(goto_label_command.command_type == kGotoLabel); + computation->commands.pop_back(); + + // the following vector gives us, for each matrix index, a submatrix index + // that covers the whole of that matrix (needed because the commands + // require submatrix indexes) + std::vector whole_submatrices; + computation->GetWholeSubmatrices(&whole_submatrices); + size_t num_matrices = whole_submatrices.size(); + + for (size_t i = 0; i < swaps.size(); i++) { + int32 m1 = swaps[i].first, m2 = swaps[i].second; + KALDI_ASSERT(static_cast(m1) < num_matrices && + static_cast(m2) < num_matrices); + int32 s1 = whole_submatrices[m1], s2 = whole_submatrices[m2]; + computation->commands.push_back( + NnetComputation::Command( + kAllocMatrixFromOther, s1, s2)); + } + computation->commands.push_back(goto_label_command); +} + +// static +void ComputationOnlineOptimizer::FormInfiniteLoop( + int32 command1, int32 command2, + NnetComputation *computation) { + KALDI_ASSERT(static_cast(computation->commands.size()) >= + command2 + 1 && command1 < command2); + KALDI_ASSERT( + computation->commands[command1].command_type == kNoOperationMarker && + computation->commands[command2].command_type == kNoOperationMarker); + // Remove any commands after 'command2'. + computation->commands.resize(command2 + 1); + computation->commands[command2].command_type = kGotoLabel; + computation->commands[command2].arg1 = command1; + NnetComputation::Command c(kNoOperationLabel); + computation->commands.insert(computation->commands.begin() + command1, + c); + // Now the kNoOperationLabel command is at position 'command1'. +} + + + bool ComputationOnlineOptimizer::Optimize() { analyzer_.Init(nnet_, *computation_); KALDI_ASSERT(!computation_->matrix_debug_info.empty() && @@ -2105,8 +2267,9 @@ bool ComputationOnlineOptimizer::Optimize() { std::vector time_offsets; NormalizePairLists(&pair_lists, &time_offsets); + // Note: seg1 and seg2 are indexes into 'segment_ends', representing + // points in time (that happen to be the ends of segments). int32 seg1, seg2; - if (!FindFirstRepeat(pair_lists, time_offsets, time_shift_per_segment, @@ -2128,20 +2291,45 @@ bool ComputationOnlineOptimizer::Optimize() { CheckIdentifiedMatrices(*computation_, seg1_matrices, seg2_matrices, time_difference); - // HERE, do whatever kind of identification we have to do between the two - // lists of matrices. + + FormInfiniteLoop(segment_ends[seg1], segment_ends[seg2], computation_); + + AddMatrixSwapCommands(seg1_matrices, seg2_matrices, computation_); + + RenumberComputation(computation_); + + FixGotoLabel(computation_); return true; } -bool OptimizeOnlineComputation(const Nnet &nnet, +void OptimizeOnlineComputation(const Nnet &nnet, NnetComputation *computation) { ComputationOnlineOptimizer optimizer(nnet, computation); - return optimizer.Optimize(); + optimizer.Optimize(); } +void FixGotoLabel(NnetComputation *computation) { + int32 num_commands = computation->commands.size(); + if (num_commands == 0) + return; + if (computation->commands[num_commands-1].command_type == kGotoLabel) { + int32 dest_command = computation->commands[num_commands-1].arg1; + if (static_cast(dest_command) < computation->commands.size() && + computation->commands[dest_command].command_type == kNoOperationLabel) + return; // nothing to fix. + for (int32 c = 0; c + 1 < num_commands; c++) { + if (computation->commands[c].command_type == kNoOperationLabel) { + computation->commands[num_commands-1].arg1 = c; + return; + } + } + KALDI_ERR << "Label not found."; + } +} + } // namespace nnet3 diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index 11a04354016..f2448f46fe5 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -574,12 +574,20 @@ void IdentifyIndexesRangesArgs(std::vector *commands, /// matrices have the same debug-info other than a time offset and can be /// identified with each other, and the no-op command at c2 can be replaced with /// 'got c1', creating a computation that 'goes on forever'. -/// It returns true if it successfully did this. [If this happens, the -/// whole computation may have to be regenerated with more segments.] -bool OptimizeOnlineComputation(const Nnet &nnet, +/// If it can't do this, it does nothing. You can figure out that this is the +/// case by checking whether kGotoLabel is the last command in the computation. +/// [If this optimization fails, the whole computation may have to be +/// regenerated with more segments.] +void OptimizeOnlineComputation(const Nnet &nnet, NnetComputation *computation); +/// This function ensures that the arg1 of a final command of type kGotoLabel is +/// the same as the command with type kNoOperationLabel. This is necessary +/// if you do any other type of optimization after 'OptimizeOnlineComputation()'. +void FixGotoLabel(NnetComputation *computation); + + /* Possible TODO: diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 824b467174e..0128d6b8c1b 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -325,6 +325,7 @@ void RemoveUnnecessaryAllocation(const Nnet &nnet, kAllocMatrixFromOtherZeroed; } RemoveNoOps(computation); + FixGotoLabel(computation); } @@ -459,7 +460,11 @@ void Optimize(const NnetOptimizeOptions &config, CheckComputation(nnet, *computation, false); } - if (config.optimize && config.allocate_from_other) { + if (config.optimize && config.allocate_from_other && + !config.optimize_online_computation) { + // Don't do this if it's an online computation because we're not sure if it + // would be correct in that case, as written. In any case the performance + // benefit is tiny. RemoveUnnecessaryAllocation(nnet, computation); if (GetVerboseLevel() >= 4) CheckComputation(nnet, *computation, false); @@ -471,6 +476,9 @@ void Optimize(const NnetOptimizeOptions &config, // other optimizations.) ConsolidateIoOperations(nnet, computation); + if (config.optimize_online_computation) + FixGotoLabel(computation); + if (GetVerboseLevel() >= 4) CheckComputation(nnet, *computation, false); } @@ -629,10 +637,10 @@ const NnetComputation* CachingOptimizingCompiler::Compile( return computation; } -/// Split the computation up into segments bounded internally by kNoOperationMarker. -/// For each segment, a pair of command-indexes (start, end) is output to the vector -/// 'segments', so the commands in the segment (not including kNoOperationMarker) -/// are numbered from start ... end - 1. +/// Split the computation up into segments bounded by kNoOperationMarker. For +/// each segment, a pair of command-indexes (start, end) is output to the vector +/// 'segments', so the commands in the segment (not including +/// kNoOperationMarker) are numbered from start ... end - 1. static void SplitComputationIntoSegments( const NnetComputation &computation, std::vector > *segments) { @@ -652,6 +660,10 @@ static void SplitComputationIntoSegments( void ConsolidateIoOperations(const Nnet &nnet, NnetComputation *computation) { + bool ends_with_goto = + (!computation->commands.empty() && + computation->commands.back().command_type == kGotoLabel); + // These segments, represented as (start-index, end-index), // are segments of the computation separated by kNoOperationMarker. std::vector > segments; @@ -700,6 +712,24 @@ void ConsolidateIoOperations(const Nnet &nnet, KALDI_ASSERT(c == segment_end); } computation->commands.swap(reordered_commands); + + if (ends_with_goto) { + // If, before this operation, the last command was kGotoLael, remove all + // commands that have been reordered to go after the kGotoLabel command + // [they would be unreachable anyway.] This relates to online computations. + // It may seem wrong that we are just removing these + // kAcceptInput/kProvideOutput commands, but the reason it's OK + // (and preserves equivalence with the code prior to this function call), + // is that the corresponding commands have also been moved past the + // kNoOperationLabel command that the goto jumps to, so those commands + // will actually get run. + // We don't actually check this here (it would lead to a crash when + // the computation was executed, if something is wrong in this logic). + while (!computation->commands.empty() && + computation->commands.back().command_type != kGotoLabel) + computation->commands.pop_back(); + FixGotoLabel(computation); + } } From 8092189e1f7d2927c00552f3f11072e60a73af7c Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 10 Nov 2016 22:37:03 -0500 Subject: [PATCH 011/530] Add a couple of previously omitted files --- src/nnet3/nnet-compile-online.cc | 336 +++++++++++++++++++++++++++++++ src/nnet3/nnet-compile-online.h | 181 +++++++++++++++++ 2 files changed, 517 insertions(+) create mode 100644 src/nnet3/nnet-compile-online.cc create mode 100644 src/nnet3/nnet-compile-online.h diff --git a/src/nnet3/nnet-compile-online.cc b/src/nnet3/nnet-compile-online.cc new file mode 100644 index 00000000000..21430d79bbc --- /dev/null +++ b/src/nnet3/nnet-compile-online.cc @@ -0,0 +1,336 @@ +// nnet3/nnet-compile-online.cc + +// Copyright 2016 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "nnet3/nnet-compile-online.h" +#include "nnet3/nnet-utils.h" + +namespace kaldi { +namespace nnet3 { + + +void ModifyNnetIvectorPeriod(int32 ivector_period, + Nnet *nnet) { + KALDI_ASSERT(ivector_period > 0); + std::vector config_lines; + nnet->GetConfigLines(false, &config_lines); + std::ostringstream config_to_read; + for (size_t i = 0; i < config_lines.size(); i++) { + std::string s = config_lines[i]; + ConfigLine config_line; + bool b = config_line.ParseLine(config_lines[i]); + KALDI_ASSERT(b && "Could not parse config line."); + if (config_line.FirstToken() == "component-node") { + std::string whole_line = config_lines[i]; + std::string to_search_for = "ReplaceIndex(ivector, t, 0)"; + std::string::size_type pos = whole_line.find(to_search_for); + if (pos != std::string::npos) { + std::ostringstream to_replace_with; + to_replace_with << "Round(ivector, " << ivector_period << ")"; + whole_line.replace(pos, to_search_for.size(), to_replace_with.str()); + config_to_read << whole_line << "\n"; + } + } + } + if (!config_to_read.str().empty()) { + std::istringstream is(config_to_read.str()); + nnet->ReadConfig(is); + } +} + + +int32 GetChunkSize(const Nnet &nnet, + int32 frame_subsampling_factor, + int32 advised_chunk_size) { + int32 modulus = nnet.Modulus(); + KALDI_ASSERT(modulus > 0 && frame_subsampling_factor > 0 && + advised_chunk_size > 0); + int32 chunk_size = advised_chunk_size; + while (1) { + if (chunk_size % modulus == 0 && + chunk_size % frame_subsampling_factor == 0) + return chunk_size; + chunk_size++; + } +} + + +/// Mod(m, n), defined for integers m and n where n > 0, returns +/// the modulus m % n, defined as the integer 0 <= i < n +/// such that i and m are congruent modulo n; for instance, +/// Mod(13, 10) = 3. +/// This is like the % operation in C/C++, except that it always returns a +/// positive value even for negative m; in 99% of cases where it makes a +/// difference, this is what you want. In the C/C++ standard, the sign of a % b +/// for negative a is not specified (except by relation with the division '/' +/// operator), but in practice it would be <= 0 for almost all implementations. +template I Mod(I m, I n) { + if (m >= 0) return m % n; + else return -((-m) % n); +} + + +static void CreateComputationRequestInternal( + int32 begin_input_t, int32 end_input_t, + int32 begin_output_t, int32 end_output_t, + int32 num_sequences, + int32 frame_subsampling_factor, + const std::set &ivector_times, + ComputationRequest *request) { + request->inputs.reserve(2); + request->inputs.clear(); + request->inputs.resize(1 + (ivector_times.empty() ? 0 : 1)); + request->inputs[0].name = "input"; + request->inputs[0].has_deriv = false; + request->outputs.clear(); + request->outputs.resize(1); + request->outputs[0].name = "output"; + request->outputs[0].has_deriv = false; + if (!ivector_times.empty()) { + request->inputs[1].name = "ivector"; + request->inputs[1].has_deriv = false; + } + + // in the computation request the 'n' indexes (the sequence/utterance indexes) + // have the larger stride than 't', although this is opposite to the way it's + // done inside the computation. This is for user convenience where it may be + // easier to deal with submatrixes per sequence. + for (int32 n = 0; n < num_sequences; n++) { + int32 x = 0; + for (int32 t = begin_input_t; t < end_input_t; t++) { + request->inputs[0].indexes.push_back(Index(n, t, x)); + } + for (int32 t = begin_output_t; + t < end_output_t; + t += frame_subsampling_factor) + request->outputs[0].indexes.push_back(Index(n, t, x)); + } + if (!ivector_times.empty()) { + request->inputs.resize(2); + request->inputs[1].name = "ivector"; + request->inputs[1].has_deriv = false; + for (int32 n = 0; n < num_sequences; n++) { + // note: std::sets store things in sorted order. + for (std::set::const_iterator iter = ivector_times.begin(); + iter != ivector_times.end(); ++iter) { + int32 t = *iter, x = 0; + request->inputs[1].indexes.push_back(Index(n, t, x)); + } + } + } +} + + +void CreateOnlineComputationRequestSimple(const Nnet &nnet, + int32 chunk_size, + int32 frame_subsampling_factor, + int32 ivector_period, + int32 extra_left_context_begin, + int32 extra_right_context, + int32 num_sequences, + ComputationRequest *request1, + ComputationRequest *request2, + ComputationRequest *request3) { + bool has_ivector = (nnet.InputDim("ivector") > 0); + int32 left_context, right_context; + ComputeSimpleNnetContext(nnet, &left_context, &right_context); + KALDI_ASSERT(chunk_size % frame_subsampling_factor == 0 && + chunk_size % nnet.Modulus() == 0 && + chunk_size % ivector_period == 0); + KALDI_ASSERT(extra_left_context_begin >= 0 && extra_right_context >= 0); + // note, 'end' is one past the last one. + int32 chunk1_input_begin_t = - left_context - extra_left_context_begin, + chunk1_input_end_t = chunk_size + right_context + extra_right_context, + chunk2_input_begin_t = chunk1_input_end_t, + chunk2_input_end_t = chunk2_input_begin_t + chunk_size, + chunk3_input_begin_t = chunk2_input_end_t, + chunk3_input_end_t = chunk3_input_begin_t + chunk_size; + + + // work out the times at which i-vectors are required. + std::set ivector_times1, ivector_times2, ivector_times3; + if (has_ivector) { + for (int32 t = chunk1_input_begin_t; t < chunk1_input_end_t; t++) { + int32 ivector_t = t - Mod(t, ivector_period); + ivector_times1.insert(ivector_t); + } + for (int32 t = chunk2_input_begin_t; t < chunk2_input_end_t; t++) { + int32 ivector_t = t - Mod(t, ivector_period); + if (ivector_times1.count(ivector_t) == 0) + ivector_times2.insert(ivector_t); + } + for (int32 t = chunk3_input_begin_t; t < chunk3_input_end_t; t++) { + int32 ivector_t = t - Mod(t, ivector_period); + if (ivector_times1.count(ivector_t) == 0 && + ivector_times2.count(ivector_t) == 0) { + ivector_times3.insert(ivector_t); + } + } + } + + CreateComputationRequestInternal( + chunk1_input_begin_t, chunk1_input_end_t, + 0, chunk_size, + num_sequences, frame_subsampling_factor, + ivector_times1, + request1); + + CreateComputationRequestInternal( + chunk2_input_begin_t, chunk2_input_end_t, + chunk_size, chunk_size * 2, + num_sequences, frame_subsampling_factor, + ivector_times2, + request2); + + CreateComputationRequestInternal( + chunk3_input_begin_t, chunk3_input_end_t, + chunk_size * 2, chunk_size * 3, + num_sequences, frame_subsampling_factor, + ivector_times3, + request3); + +} + + + +void AddTimeOffsetToComputationRequest(int32 t_offset, + ComputationRequest *request) { + for (size_t i = 0; i < request->inputs.size(); i++) { + size_t size = request->inputs[i].indexes.size(); + for (size_t j = 0; j < size; j++) + request->inputs[i].indexes[j].t += t_offset; + } + for (size_t i = 0; i < request->outputs.size(); i++) { + size_t size = request->outputs[i].indexes.size(); + for (size_t j = 0; j < size; j++) + request->outputs[i].indexes[j].t += t_offset; + } +} + + + +static bool ExtrapolateComputationRequest( + const ComputationRequest &request1, + const ComputationRequest &request2, + ComputationRequest *request3) { + // accepts two computation requests 'request1' and 'request2' that + // must be identical except for a time offset, and creates 'request3' + // that is the extrapolation of the next term in sequence. + *request3 = request2; + KALDI_ASSERT(!request1.inputs.empty() && !request1.inputs[0].indexes.empty() && + !request2.inputs.empty() && !request2.inputs[0].indexes.empty()); + int32 t_offset = request2.inputs[0].indexes[0].t - + request1.inputs[0].indexes[0].t; + // the following is just to make sure that the inputs are structurally + // equivalent. + AddTimeOffsetToComputationRequest(-t_offset, request3); + if (!(*request3 == request1)) + return false; // there is somse structural difference, or + // the time offset is not consistent. + // the following reverses the last call to AddTimeOffsetToComputationRequest, + // then adds the offset we want. + AddTimeOffsetToComputationRequest(2 * t_offset, request3); + return true; +} + + +/* Internal version of CompileOnline where + you specify the the number of computation requests (must be >= 3). + Returns true on success. + It's possible for the optimization to fail if you give too small + a value of 'num_requests' (this depends on the network topology), + and in that case this function will return false and you should re-try + with a higher value of num_requests. + */ +static bool CompileOnlineInternal( + const Nnet &nnet, + NnetOptimizeOptions optimize_opts, + const ComputationRequest &request1, + const ComputationRequest &request2, + const ComputationRequest &request3, + int32 num_requests, + NnetComputation *computation) { + KALDI_ASSERT(num_requests >= 3); + std::vector extra_requests(num_requests - 3); + const ComputationRequest *prev_request = &request2; + const ComputationRequest *cur_request = &request3; + for (int32 i = 0; i < num_requests - 3; i++) { + if (!ExtrapolateComputationRequest(*prev_request, *cur_request, + &(extra_requests[i]))) { + KALDI_LOG << "prev_request is:"; + prev_request->Print(std::cerr); + KALDI_LOG << "cur_request is:"; + cur_request->Print(std::cerr); + KALDI_ERR << "Computation requests do not have the right relationship"; + } + prev_request = cur_request; + cur_request = &(extra_requests[i]); + } + + std::vector requests; + requests.push_back(&request1); + requests.push_back(&request2); + requests.push_back(&request3); + for (int32 i = 0; i < num_requests - 3; i++) + requests.push_back(&(extra_requests[i])); + Compiler compiler(requests, nnet); + CompilerOptions compiler_opts; + compiler.CreateComputation(compiler_opts, computation); + optimize_opts.optimize_online_computation = true; + + Optimize(optimize_opts, nnet, computation); + + return computation->commands.size() != 0 && + computation->commands.back().command_type == kGotoLabel; +} + +void CompileOnline(const Nnet &nnet, + const NnetOptimizeOptions &optimize_opts, + const ComputationRequest &request1, + const ComputationRequest &request2, + const ComputationRequest &request3, + NnetComputation *computation) { + int32 num_requests1 = 5, factor = 2, max_requests = 100, + num_requests; + + for (num_requests = num_requests1; num_requests <= max_requests; + num_requests *= factor) { + if (CompileOnlineInternal(nnet, optimize_opts, + request1, request2, request3, + num_requests, computation)) { + return; + } else { + KALDI_VLOG(2) << "Online compilation failed with " + << num_requests << " requests, trying " + << (num_requests * factor); + } + } + KALDI_ERR << "Online compilation failed with " + << (num_requests/factor) << " requests, which " + << "we expect should be enough... something " + << "went wrong."; +} + + + + + + + +} // namespace nnet3 +} // namespace kaldi diff --git a/src/nnet3/nnet-compile-online.h b/src/nnet3/nnet-compile-online.h new file mode 100644 index 00000000000..100c741fe82 --- /dev/null +++ b/src/nnet3/nnet-compile-online.h @@ -0,0 +1,181 @@ +// nnet3/nnet-compile-online.h + +// Copyright 2016 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_NNET3_NNET_COMPILE_ONLINE_H_ +#define KALDI_NNET3_NNET_COMPILE_ONLINE_H_ + +#include "nnet3/nnet-optimize.h" +#include "nnet3/nnet-utils.h" + +#include + +namespace kaldi { +namespace nnet3 { + + +/** + CompileOnline() provides an internal interface for 'online' computation. + It's usable for inference only (not training), meaning that backprop is + not supported (for now, at least). CompileOnline() allows you to do the + neural net computation for small chunks with increasing 't' values, and + naturally cache the intermediate activations (rather than recomputing them + every time you see new input data). + + This function does both compilation and optimization, so it's like a combination of + Compiler::CreateComputation() [nnet-compile.h] and Optimize() [nnet-optimize.h]. + + You provide 3 computation requests. request1 is the first computation + request of an utterance (or other type of segment) that contains any required + extra left context in the input. request2 and request3 are the second and + third computation request, and must have exactly the same structure, except + for a fixed time offset (change in 't' index) between them. This will be + extrapolated to an infinite sequence of further requests (request4, + request5, etc.). In practice the way it's done is that we extrapolate + to a small finite number of requests (like 10), and then attempt to + identify a common structure in the computation where, after processing, + as an example, the 3nd computation request, the active variables can + be identified with those present at, say, the 7th computation request, and + we then cut and splice the computation together at this points, like + making a tape loop, by adding a goto statement that jumps from the end of + the 7th computation request to the end of the 3rd computation request. + We also have to identify the variables with each other (merge variables). + + That's done in the optimization code. + */ +void CompileOnline(const Nnet &nnet, + const NnetOptimizeOptions &optimize_opts, + const ComputationRequest &request1, + const ComputationRequest &request2, + const ComputationRequest &request3, + NnetComputation *computation); + +/* + This function gives you a suitable chunk size, which is the smallest number >= + 'advised_chunk_size' that is an exact multiple of nnet.Modulus() and + frame_subsampling_factor. This will ensure that all the chunks have the same + structure, which makes compiling the online computation a little more + straightforward. + */ +int32 GetChunkSize(const Nnet &nnet, + int32 frame_subsampling_factor, + int32 advised_chunk_size); + +/** + This function modifies the descriptors in the neural network to change the + periodicity with which it expects to read an iVector at its input. + + We normally train neural networks that expect to see an iVector at frame zero + only; this is because we train on fixed-size chunks and the iVector doesn't + change that much within each chunk. However, expecting just one iVector + isn't that convenient for online recognition because it changes with + time, so we modify the iVector input period in the network by replacing + expressions like ReplaceIndex(ivector, t, 0) or just "t", with + Round(ivector, 10) [assuming ivector_period == 10]. This won't work + in every conceivable network, but it does do what you want in the + cases of interest. + + It does this in a rather simple way, by getting the config lines that + correspond to descriptors, and doing a search-and-replace. It's + maybe not ideal, but it was the easiest way to do it. + + */ +void ModifyNnetIvectorPeriod(int32 ivector_period, + Nnet *nnet); + +/** + This function creates computation request suitable for giving to ComputeOnline(). + It's intended for use with a 'simple' nnet (one satisfying IsSimpleNnet()), and this + basically means that the inputs must be named "input" and possibly "ivector", + and that there is an output named "output", and that those are the ones you + care about (it won't generate any other outputs or use any other inputs). + + If you want to use online computation for different types of neural net, you + should use the deeper interface, CompileOnline(). + + @param [in] nnet The neural net this computation request is to be used with. + This is used to check whether the neural net accepts iVectors, + and to work out the left-context and right-context required + by the network. + @param [in] chunk_size The number of frames of output that will be generated + for each chunk (note: this is the shift in the t-index, which will not + equal the number of output frames if frame_subsampling_factor != 1). + Note: it is required that chunk_size be a multiple of ivector_period, + frame_subsampling_factor, and nnet.Modulus(). You should use + GetChunkSize() to compute the chunk size, giving it an advisory/ + minimum chunksize, to make sure it satisfies these properties. + @param [in] frame_subsampling_factor This will normally be 1, but may be + more than 1 (e.g. 3) in chain systems; it determines the frame-skipping + on the output, so we evaluate the output with 't' at multiples of + this value. + @param [in] ivector_period The period with which iVectors are to be supplied + to the network (if you're using iVectors). Not necessarily the + same as the period with which the ivectors are extracted or + stored on disk (--online-ivector-period). You will normally set + this to the chunk size. It must divide the chunk size (if you're + using iVectors) Note: you should call ModifyNnetIvectorPeriod on + 'nnet' before calling this function; otherwise the neural net + will most likely not actually be able to consume the iVector with + this frequency. + @param [in] extra_left_context_begin The additional left-context that + should be supplied to the network on top of the minimum + that the network requires. We call this extra_left_context_begin + because this only relates to the start of the utterance (t=0). + @param [in] num_sequences The number of separate 'n' values to put in the computation; + normally this will be just 1, but it can be increased to allow + simultaneous operation on multiple streams of input. + @param [out] request1 The first of the 3 requests that this function + generates, that the user should then supply to CompileOnline(). + Note: this will tend to be the largest computation request in + terms of input, because we have to provide enough left and right + context that it can evaluate the first chunk. Note: as + elsewhere, the job of duplicating first and last frames enough to + provide the required left/right context to the network, is left + to the caller (at runtime, not during compilation). + @param [out] request2 The second of the 3 requests that this function generates. + Caution: none of the inputs and outputs should overlap. + @param [out] request3 The third of the 3 requests that this function generates. + It will be the same as request2, except for a time offset. +*/ +void CreateOnlineComputationRequestSimple(const Nnet &nnet, + int32 chunk_size, + int32 frame_subsampling_factor, + int32 ivector_period, + int32 extra_left_context_begin, + int32 extra_right_context, + int32 num_sequences, + ComputationRequest *request1, + ComputationRequest *request2, + ComputationRequest *request3); + +struct NnetSimpleOnlineComputationOptions { + +}; + +void CreateLoopedComputationSimple( + const Nnet &nnet, // ... TODO... + ); + + + + +} // namespace nnet3 +} // namespace kaldi + + +#endif From 484d5491e764cc8b22ed7f4f7635c41b2896226d Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 10 Nov 2016 23:15:22 -0500 Subject: [PATCH 012/530] Change name from online to looped (less confusable) --- src/nnet3/Makefile | 2 +- ...mpile-online.cc => nnet-compile-looped.cc} | 20 +++++------ ...compile-online.h => nnet-compile-looped.h} | 32 ++++++++--------- src/nnet3/nnet-compile-test.cc | 16 ++++----- src/nnet3/nnet-optimize-utils.cc | 36 +++++++++---------- src/nnet3/nnet-optimize-utils.h | 6 ++-- src/nnet3/nnet-optimize.cc | 16 ++++----- src/nnet3/nnet-optimize.h | 8 ++--- 8 files changed, 68 insertions(+), 68 deletions(-) rename src/nnet3/{nnet-compile-online.cc => nnet-compile-looped.cc} (96%) rename src/nnet3/{nnet-compile-online.h => nnet-compile-looped.h} (92%) diff --git a/src/nnet3/Makefile b/src/nnet3/Makefile index 60629ab1cbe..8dfa3120fac 100644 --- a/src/nnet3/Makefile +++ b/src/nnet3/Makefile @@ -28,7 +28,7 @@ OBJFILES = nnet-common.o nnet-compile.o nnet-component-itf.o \ discriminative-supervision.o nnet-discriminative-example.o \ nnet-discriminative-diagnostics.o \ discriminative-training.o nnet-discriminative-training.o \ - online-nnet3-decodable-simple.o nnet-compile-online.o + online-nnet3-decodable-simple.o nnet-compile-looped.o LIBNAME = kaldi-nnet3 diff --git a/src/nnet3/nnet-compile-online.cc b/src/nnet3/nnet-compile-looped.cc similarity index 96% rename from src/nnet3/nnet-compile-online.cc rename to src/nnet3/nnet-compile-looped.cc index 21430d79bbc..71329d2e8fe 100644 --- a/src/nnet3/nnet-compile-online.cc +++ b/src/nnet3/nnet-compile-looped.cc @@ -1,4 +1,4 @@ -// nnet3/nnet-compile-online.cc +// nnet3/nnet-compile-looped.cc // Copyright 2016 Johns Hopkins University (author: Daniel Povey) @@ -17,7 +17,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#include "nnet3/nnet-compile-online.h" +#include "nnet3/nnet-compile-looped.h" #include "nnet3/nnet-utils.h" namespace kaldi { @@ -136,7 +136,7 @@ static void CreateComputationRequestInternal( } -void CreateOnlineComputationRequestSimple(const Nnet &nnet, +void CreateLoopedComputationRequestSimple(const Nnet &nnet, int32 chunk_size, int32 frame_subsampling_factor, int32 ivector_period, @@ -249,7 +249,7 @@ static bool ExtrapolateComputationRequest( } -/* Internal version of CompileOnline where +/* Internal version of CompileLooped where you specify the the number of computation requests (must be >= 3). Returns true on success. It's possible for the optimization to fail if you give too small @@ -257,7 +257,7 @@ static bool ExtrapolateComputationRequest( and in that case this function will return false and you should re-try with a higher value of num_requests. */ -static bool CompileOnlineInternal( +static bool CompileLoopedInternal( const Nnet &nnet, NnetOptimizeOptions optimize_opts, const ComputationRequest &request1, @@ -291,7 +291,7 @@ static bool CompileOnlineInternal( Compiler compiler(requests, nnet); CompilerOptions compiler_opts; compiler.CreateComputation(compiler_opts, computation); - optimize_opts.optimize_online_computation = true; + optimize_opts.optimize_looped_computation = true; Optimize(optimize_opts, nnet, computation); @@ -299,7 +299,7 @@ static bool CompileOnlineInternal( computation->commands.back().command_type == kGotoLabel; } -void CompileOnline(const Nnet &nnet, +void CompileLooped(const Nnet &nnet, const NnetOptimizeOptions &optimize_opts, const ComputationRequest &request1, const ComputationRequest &request2, @@ -310,17 +310,17 @@ void CompileOnline(const Nnet &nnet, for (num_requests = num_requests1; num_requests <= max_requests; num_requests *= factor) { - if (CompileOnlineInternal(nnet, optimize_opts, + if (CompileLoopedInternal(nnet, optimize_opts, request1, request2, request3, num_requests, computation)) { return; } else { - KALDI_VLOG(2) << "Online compilation failed with " + KALDI_VLOG(2) << "Looped compilation failed with " << num_requests << " requests, trying " << (num_requests * factor); } } - KALDI_ERR << "Online compilation failed with " + KALDI_ERR << "Looped compilation failed with " << (num_requests/factor) << " requests, which " << "we expect should be enough... something " << "went wrong."; diff --git a/src/nnet3/nnet-compile-online.h b/src/nnet3/nnet-compile-looped.h similarity index 92% rename from src/nnet3/nnet-compile-online.h rename to src/nnet3/nnet-compile-looped.h index 100c741fe82..00a97292798 100644 --- a/src/nnet3/nnet-compile-online.h +++ b/src/nnet3/nnet-compile-looped.h @@ -1,4 +1,4 @@ -// nnet3/nnet-compile-online.h +// nnet3/nnet-compile-looped.h // Copyright 2016 Johns Hopkins University (author: Daniel Povey) @@ -17,8 +17,8 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#ifndef KALDI_NNET3_NNET_COMPILE_ONLINE_H_ -#define KALDI_NNET3_NNET_COMPILE_ONLINE_H_ +#ifndef KALDI_NNET3_NNET_COMPILE_LOOPED_H_ +#define KALDI_NNET3_NNET_COMPILE_LOOPED_H_ #include "nnet3/nnet-optimize.h" #include "nnet3/nnet-utils.h" @@ -30,9 +30,9 @@ namespace nnet3 { /** - CompileOnline() provides an internal interface for 'online' computation. + CompileLooped() provides an internal interface for 'looped' computation. It's usable for inference only (not training), meaning that backprop is - not supported (for now, at least). CompileOnline() allows you to do the + not supported (for now, at least). CompileLooped() allows you to do the neural net computation for small chunks with increasing 't' values, and naturally cache the intermediate activations (rather than recomputing them every time you see new input data). @@ -58,7 +58,7 @@ namespace nnet3 { That's done in the optimization code. */ -void CompileOnline(const Nnet &nnet, +void CompileLooped(const Nnet &nnet, const NnetOptimizeOptions &optimize_opts, const ComputationRequest &request1, const ComputationRequest &request2, @@ -69,7 +69,7 @@ void CompileOnline(const Nnet &nnet, This function gives you a suitable chunk size, which is the smallest number >= 'advised_chunk_size' that is an exact multiple of nnet.Modulus() and frame_subsampling_factor. This will ensure that all the chunks have the same - structure, which makes compiling the online computation a little more + structure, which makes compiling the looped computation a little more straightforward. */ int32 GetChunkSize(const Nnet &nnet, @@ -83,7 +83,7 @@ int32 GetChunkSize(const Nnet &nnet, We normally train neural networks that expect to see an iVector at frame zero only; this is because we train on fixed-size chunks and the iVector doesn't change that much within each chunk. However, expecting just one iVector - isn't that convenient for online recognition because it changes with + isn't that convenient for looped recognition because it changes with time, so we modify the iVector input period in the network by replacing expressions like ReplaceIndex(ivector, t, 0) or just "t", with Round(ivector, 10) [assuming ivector_period == 10]. This won't work @@ -99,14 +99,14 @@ void ModifyNnetIvectorPeriod(int32 ivector_period, Nnet *nnet); /** - This function creates computation request suitable for giving to ComputeOnline(). + This function creates computation request suitable for giving to ComputeLooped(). It's intended for use with a 'simple' nnet (one satisfying IsSimpleNnet()), and this basically means that the inputs must be named "input" and possibly "ivector", and that there is an output named "output", and that those are the ones you care about (it won't generate any other outputs or use any other inputs). - If you want to use online computation for different types of neural net, you - should use the deeper interface, CompileOnline(). + If you want to use looped computation for different types of neural net, you + should use the deeper interface, CompileLooped(). @param [in] nnet The neural net this computation request is to be used with. This is used to check whether the neural net accepts iVectors, @@ -140,7 +140,7 @@ void ModifyNnetIvectorPeriod(int32 ivector_period, normally this will be just 1, but it can be increased to allow simultaneous operation on multiple streams of input. @param [out] request1 The first of the 3 requests that this function - generates, that the user should then supply to CompileOnline(). + generates, that the user should then supply to CompileLooped(). Note: this will tend to be the largest computation request in terms of input, because we have to provide enough left and right context that it can evaluate the first chunk. Note: as @@ -152,7 +152,7 @@ void ModifyNnetIvectorPeriod(int32 ivector_period, @param [out] request3 The third of the 3 requests that this function generates. It will be the same as request2, except for a time offset. */ -void CreateOnlineComputationRequestSimple(const Nnet &nnet, +void CreateLoopedComputationRequestSimple(const Nnet &nnet, int32 chunk_size, int32 frame_subsampling_factor, int32 ivector_period, @@ -163,12 +163,12 @@ void CreateOnlineComputationRequestSimple(const Nnet &nnet, ComputationRequest *request2, ComputationRequest *request3); -struct NnetSimpleOnlineComputationOptions { - +struct NnetSimpleLoopedComputationOptions { + // TODO }; void CreateLoopedComputationSimple( - const Nnet &nnet, // ... TODO... + const Nnet &nnet // ... TODO... ); diff --git a/src/nnet3/nnet-compile-test.cc b/src/nnet3/nnet-compile-test.cc index eaff78ad4c6..1b9c0d3e381 100644 --- a/src/nnet3/nnet-compile-test.cc +++ b/src/nnet3/nnet-compile-test.cc @@ -19,7 +19,7 @@ #include "nnet3/nnet-nnet.h" #include "nnet3/nnet-compile.h" -#include "nnet3/nnet-compile-online.h" +#include "nnet3/nnet-compile-looped.h" #include "nnet3/nnet-test-utils.h" namespace kaldi { @@ -59,7 +59,7 @@ void UnitTestNnetCompile() { // this tests compilation where there are more than one // computation-request... this is to test some of the -// low-level utilities that will be used in online computation. +// low-level utilities that will be used in looped computation. void UnitTestNnetCompileMulti() { for (int32 n = 0; n < 20; n++) { struct NnetGenerationOptions gen_config; @@ -117,7 +117,7 @@ void UnitTestNnetCompileMulti() { -void UnitTestNnetCompileOnline() { +void UnitTestNnetCompileLooped() { for (int32 n = 0; n < 20; n++) { struct NnetGenerationOptions gen_config; gen_config.allow_ivector = true; @@ -146,7 +146,7 @@ void UnitTestNnetCompileOnline() { ModifyNnetIvectorPeriod(ivector_period, &nnet); KALDI_LOG << "Nnet info after modifying ivector period is: " << nnet.Info(); - CreateOnlineComputationRequestSimple( + CreateLoopedComputationRequestSimple( nnet, chunk_size, frame_subsampling_factor, ivector_period, extra_left_context_begin, extra_right_context, num_sequences, &request1, &request2, &request3); @@ -159,12 +159,12 @@ void UnitTestNnetCompileOnline() { request3.Print(std::cerr); NnetOptimizeOptions optimize_opts; - // todo: set optimize-online=true. + // todo: set optimize-looped=true. NnetComputation computation; - CompileOnline(nnet, optimize_opts, + CompileLooped(nnet, optimize_opts, request1, request2, request3, &computation); - KALDI_LOG << "Compiled online computation is "; + KALDI_LOG << "Compiled looped computation is "; computation.Print(std::cerr, nnet); } } @@ -179,7 +179,7 @@ int main() { using namespace kaldi::nnet3; SetVerboseLevel(4); - UnitTestNnetCompileOnline(); + UnitTestNnetCompileLooped(); UnitTestNnetCompile(); UnitTestNnetCompileMulti(); diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index 7a0fafb0b5e..4f9d3ec078c 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -1664,9 +1664,9 @@ void LimitDerivativeTimes(const Nnet &nnet, } -class ComputationOnlineOptimizer { +class ComputationLoopedOptimizer { public: - ComputationOnlineOptimizer(const Nnet &nnet, + ComputationLoopedOptimizer(const Nnet &nnet, NnetComputation *computation): nnet_(nnet), computation_(computation) { } bool Optimize(); @@ -1834,7 +1834,7 @@ class ComputationOnlineOptimizer { // static -int32 ComputationOnlineOptimizer::FindTimeShift( +int32 ComputationLoopedOptimizer::FindTimeShift( const NnetComputation &computation, const std::vector &segment_ends) { KALDI_ASSERT(segment_ends.size() >= 3); @@ -1884,7 +1884,7 @@ int32 ComputationOnlineOptimizer::FindTimeShift( } // static -void ComputationOnlineOptimizer::CreateMatrixPairs( +void ComputationLoopedOptimizer::CreateMatrixPairs( const NnetComputation &computation, std::vector > *matrix_to_pair) { typedef unordered_map, int32, @@ -1920,7 +1920,7 @@ void ComputationOnlineOptimizer::CreateMatrixPairs( } // static -void ComputationOnlineOptimizer::GetPairToMatrixMap( +void ComputationLoopedOptimizer::GetPairToMatrixMap( std::vector > &matrix_to_pair, unordered_map, int32, PairHasher > *pair_to_matrix) { int32 num_matrices = matrix_to_pair.size(); @@ -1932,7 +1932,7 @@ void ComputationOnlineOptimizer::GetPairToMatrixMap( // static -void ComputationOnlineOptimizer::ConvertListsToPairLists( +void ComputationLoopedOptimizer::ConvertListsToPairLists( const std::vector > &active_matrices, const std::vector > &matrix_to_pair, std::vector > > *active_pairs) { @@ -1956,7 +1956,7 @@ void ComputationOnlineOptimizer::ConvertListsToPairLists( } // static -void ComputationOnlineOptimizer::NormalizePairLists( +void ComputationLoopedOptimizer::NormalizePairLists( std::vector > > *active_pairs, std::vector *time_offsets) { int32 num_segments = active_pairs->size(); @@ -1983,7 +1983,7 @@ void ComputationOnlineOptimizer::NormalizePairLists( // static -bool ComputationOnlineOptimizer::FindFirstRepeat( +bool ComputationLoopedOptimizer::FindFirstRepeat( const std::vector > > &normalized_active_pairs, const std::vector &time_offsets, int32 time_shift_per_segment, @@ -2019,7 +2019,7 @@ bool ComputationOnlineOptimizer::FindFirstRepeat( } // static -void ComputationOnlineOptimizer::PairListToMatrixList( +void ComputationLoopedOptimizer::PairListToMatrixList( const std::vector > &pair_list, const unordered_map, int32, PairHasher > &pair_to_matrix, std::vector *matrix_list) { @@ -2041,7 +2041,7 @@ void ComputationOnlineOptimizer::PairListToMatrixList( // static -void ComputationOnlineOptimizer::FindActiveMatrices( +void ComputationLoopedOptimizer::FindActiveMatrices( const NnetComputation &computation, const Analyzer &analyzer, const std::vector &segment_end_commands, @@ -2079,7 +2079,7 @@ void ComputationOnlineOptimizer::FindActiveMatrices( } // static -void ComputationOnlineOptimizer::CheckIdentifiedMatrices( +void ComputationLoopedOptimizer::CheckIdentifiedMatrices( const NnetComputation &computation, const std::vector &list1, const std::vector &list2, @@ -2114,7 +2114,7 @@ void ComputationOnlineOptimizer::CheckIdentifiedMatrices( // static -void ComputationOnlineOptimizer::GetMatrixSwapOrder( +void ComputationLoopedOptimizer::GetMatrixSwapOrder( const std::vector &matrices1, const std::vector &matrices2, std::vector > *swaps) { @@ -2166,7 +2166,7 @@ void ComputationOnlineOptimizer::GetMatrixSwapOrder( } // static -void ComputationOnlineOptimizer::AddMatrixSwapCommands( +void ComputationLoopedOptimizer::AddMatrixSwapCommands( const std::vector &matrices1, const std::vector &matrices2, NnetComputation *computation) { @@ -2201,7 +2201,7 @@ void ComputationOnlineOptimizer::AddMatrixSwapCommands( } // static -void ComputationOnlineOptimizer::FormInfiniteLoop( +void ComputationLoopedOptimizer::FormInfiniteLoop( int32 command1, int32 command2, NnetComputation *computation) { KALDI_ASSERT(static_cast(computation->commands.size()) >= @@ -2221,11 +2221,11 @@ void ComputationOnlineOptimizer::FormInfiniteLoop( -bool ComputationOnlineOptimizer::Optimize() { +bool ComputationLoopedOptimizer::Optimize() { analyzer_.Init(nnet_, *computation_); KALDI_ASSERT(!computation_->matrix_debug_info.empty() && "You must request matrix debug info when compiling " - "online computations."); + "looped computations."); // get the indexes of the separator commands at the ends of segments. std::vector segment_ends; @@ -2304,9 +2304,9 @@ bool ComputationOnlineOptimizer::Optimize() { } -void OptimizeOnlineComputation(const Nnet &nnet, +void OptimizeLoopedComputation(const Nnet &nnet, NnetComputation *computation) { - ComputationOnlineOptimizer optimizer(nnet, computation); + ComputationLoopedOptimizer optimizer(nnet, computation); optimizer.Optimize(); } diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index f2448f46fe5..29f05add695 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -566,7 +566,7 @@ void IdentifyIndexesArgs(std::vector *commands, void IdentifyIndexesRangesArgs(std::vector *commands, std::vector *indexes_ranges_args); -/// This function tries to optimize computation 'computation' for an 'online' +/// This function tries to optimize computation 'computation' for an 'looped' /// computation. It expects as input a computation with no backprop but with /// multiple 'segments' separated by command kNoOperation, where each segment /// corresponds to a new chunk of input and output. It tries to locate a pair @@ -578,13 +578,13 @@ void IdentifyIndexesRangesArgs(std::vector *commands, /// case by checking whether kGotoLabel is the last command in the computation. /// [If this optimization fails, the whole computation may have to be /// regenerated with more segments.] -void OptimizeOnlineComputation(const Nnet &nnet, +void OptimizeLoopedComputation(const Nnet &nnet, NnetComputation *computation); /// This function ensures that the arg1 of a final command of type kGotoLabel is /// the same as the command with type kNoOperationLabel. This is necessary -/// if you do any other type of optimization after 'OptimizeOnlineComputation()'. +/// if you do any other type of optimization after 'OptimizeLoopedComputation()'. void FixGotoLabel(NnetComputation *computation); diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 0128d6b8c1b..44c611e2b4a 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -451,18 +451,18 @@ void Optimize(const NnetOptimizeOptions &config, CheckComputation(nnet, *computation, false); } - // the online computation optimization has to go before + // the looped computation optimization has to go before // 'RemoveUnnecessaryAllocation()'. We don't gate this by 'config.optimize' - // because it's necessary for online computation to run. - if (config.optimize_online_computation){ - OptimizeOnlineComputation(nnet, computation); + // because it's necessary for looped computation to run. + if (config.optimize_looped_computation){ + OptimizeLoopedComputation(nnet, computation); if (GetVerboseLevel() >= 4) CheckComputation(nnet, *computation, false); } if (config.optimize && config.allocate_from_other && - !config.optimize_online_computation) { - // Don't do this if it's an online computation because we're not sure if it + !config.optimize_looped_computation) { + // Don't do this if it's an looped computation because we're not sure if it // would be correct in that case, as written. In any case the performance // benefit is tiny. RemoveUnnecessaryAllocation(nnet, computation); @@ -476,7 +476,7 @@ void Optimize(const NnetOptimizeOptions &config, // other optimizations.) ConsolidateIoOperations(nnet, computation); - if (config.optimize_online_computation) + if (config.optimize_looped_computation) FixGotoLabel(computation); if (GetVerboseLevel() >= 4) @@ -716,7 +716,7 @@ void ConsolidateIoOperations(const Nnet &nnet, if (ends_with_goto) { // If, before this operation, the last command was kGotoLael, remove all // commands that have been reordered to go after the kGotoLabel command - // [they would be unreachable anyway.] This relates to online computations. + // [they would be unreachable anyway.] This relates to looped computations. // It may seem wrong that we are just removing these // kAcceptInput/kProvideOutput commands, but the reason it's OK // (and preserves equivalence with the code prior to this function call), diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index 303b08a4150..27871552017 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -46,10 +46,10 @@ struct NnetOptimizeOptions { bool allocate_from_other; int32 min_deriv_time; int32 max_deriv_time; - // optimize_online_computation is a 'hidden config' not available from + // optimize_looped_computation is a 'hidden config' not available from // the command line; it's set to true to enable the optimization for - // online computation that turns a linear computation into a loop. - bool optimize_online_computation; + // looped computation that turns a linear computation into a loop. + bool optimize_looped_computation; NnetOptimizeOptions(): optimize(true), consolidate_model_update(true), @@ -64,7 +64,7 @@ struct NnetOptimizeOptions { allocate_from_other(true), min_deriv_time(std::numeric_limits::min()), max_deriv_time(std::numeric_limits::max()), - optimize_online_computation(false) { } + optimize_looped_computation(false) { } void Register(OptionsItf *opts) { opts->Register("optimize", &optimize, "Set this to false to turn off all " From c605ade151d29bbcbaa77551cd94911071c10df8 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 13 Nov 2016 19:19:38 -0500 Subject: [PATCH 013/530] Finishing the decodable objects (not yet for online computatoin), and add tests, and debug to the extent that the tests succeed. --- src/nnet3/Makefile | 3 +- src/nnet3/decodable-simple-looped.cc | 251 ++++++++++++++++++++ src/nnet3/decodable-simple-looped.h | 328 ++++++++++++++++++++++++++ src/nnet3/nnet-am-decodable-simple.cc | 43 ++-- src/nnet3/nnet-am-decodable-simple.h | 24 +- src/nnet3/nnet-compile-looped.h | 7 - src/nnet3/nnet-computation.h | 4 +- src/nnet3/nnet-compute-test.cc | 70 +++++- src/nnet3/nnet-compute.cc | 16 +- src/nnet3/nnet-compute.h | 4 +- src/nnet3/nnet-graph.cc | 24 +- src/nnet3/nnet-graph.h | 8 + src/nnet3/nnet-optimize-utils.cc | 222 ++++++++++++++++- src/nnet3/nnet-optimize-utils.h | 184 +-------------- src/nnet3/nnet-optimize.cc | 117 ++++++--- src/nnet3/nnet-optimize.h | 5 + src/nnet3/nnet-test-utils.cc | 2 + src/nnet3/nnet-test-utils.h | 2 + src/nnet3/nnet-utils.cc | 8 + src/nnet3/nnet-utils.h | 3 + 20 files changed, 1053 insertions(+), 272 deletions(-) create mode 100644 src/nnet3/decodable-simple-looped.cc create mode 100644 src/nnet3/decodable-simple-looped.h diff --git a/src/nnet3/Makefile b/src/nnet3/Makefile index 8dfa3120fac..ef50f9960e1 100644 --- a/src/nnet3/Makefile +++ b/src/nnet3/Makefile @@ -28,7 +28,8 @@ OBJFILES = nnet-common.o nnet-compile.o nnet-component-itf.o \ discriminative-supervision.o nnet-discriminative-example.o \ nnet-discriminative-diagnostics.o \ discriminative-training.o nnet-discriminative-training.o \ - online-nnet3-decodable-simple.o nnet-compile-looped.o + online-nnet3-decodable-simple.o nnet-compile-looped.o \ + decodable-simple-looped.o LIBNAME = kaldi-nnet3 diff --git a/src/nnet3/decodable-simple-looped.cc b/src/nnet3/decodable-simple-looped.cc new file mode 100644 index 00000000000..9e580dc121f --- /dev/null +++ b/src/nnet3/decodable-simple-looped.cc @@ -0,0 +1,251 @@ +// nnet3/decodable-simple-looped.cc + +// Copyright 2016 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "nnet3/decodable-simple-looped.h" +#include "nnet3/nnet-utils.h" +#include "nnet3/nnet-compile-looped.h" + +namespace kaldi { +namespace nnet3 { + + +DecodableNnetSimpleLoopedInfo::DecodableNnetSimpleLoopedInfo( + const NnetSimpleLoopedComputationOptions &opts, + Nnet *nnet): + opts_(opts), nnet_(*nnet) { + Init(opts, nnet); +} + +DecodableNnetSimpleLoopedInfo::DecodableNnetSimpleLoopedInfo( + const NnetSimpleLoopedComputationOptions &opts, + const Vector &priors, + Nnet *nnet): + opts_(opts), nnet_(*nnet), log_priors_(priors) { + if (log_priors_.Dim() != 0) + log_priors_.ApplyLog(); + Init(opts, nnet); +} + + +DecodableNnetSimpleLoopedInfo::DecodableNnetSimpleLoopedInfo( + const NnetSimpleLoopedComputationOptions &opts, + AmNnetSimple *am_nnet): + opts_(opts), nnet_(am_nnet->GetNnet()), log_priors_(am_nnet->Priors()) { + if (log_priors_.Dim() != 0) + log_priors_.ApplyLog(); + Init(opts, &(am_nnet->GetNnet())); +} + + +void DecodableNnetSimpleLoopedInfo::Init( + const NnetSimpleLoopedComputationOptions &opts, + Nnet *nnet) { + opts.Check(); + KALDI_ASSERT(IsSimpleNnet(*nnet)); + has_ivectors_ = (nnet->InputDim("ivector") > 0); + int32 left_context, right_context; + ComputeSimpleNnetContext(*nnet, &left_context, &right_context); + frames_left_context_ = left_context + opts.extra_left_context_initial; + frames_right_context_ = right_context; + frames_per_chunk_ = GetChunkSize(*nnet, opts_.frame_subsampling_factor, + opts.frames_per_chunk); + output_dim_ = nnet->OutputDim("output"); + KALDI_ASSERT(output_dim_ > 0); + // note, ivector_period is hardcoded to the same as frames_per_chunk_. + int32 ivector_period = frames_per_chunk_; + if (has_ivectors_) + ModifyNnetIvectorPeriod(ivector_period, nnet); + + ComputationRequest request1, request2, request3; + int32 num_sequences = 1; // we're processing one utterance at a time. + int32 extra_right_context = 0; + CreateLoopedComputationRequestSimple(*nnet, frames_per_chunk_, + opts_.frame_subsampling_factor, + ivector_period, opts.extra_left_context_initial, + extra_right_context, + num_sequences, + &request1, &request2, &request3); + + CompileLooped(*nnet, opts_.optimize_config, request1, request2, request3, + &computation_); + computation_.ComputeCudaIndexes(); + KALDI_LOG << "Computation is:"; + computation_.Print(std::cerr, *nnet); +} + + +DecodableNnetSimpleLooped::DecodableNnetSimpleLooped( + const DecodableNnetSimpleLoopedInfo &info, + const MatrixBase &feats, + const VectorBase *ivector, + const MatrixBase *online_ivectors, + int32 online_ivector_period): + info_(info), + computer_(info_.opts_.compute_config, info_.computation_, + info_.nnet_, NULL), + feats_(feats), + ivector_(ivector), online_ivector_feats_(online_ivectors), + online_ivector_period_(online_ivector_period), + num_chunks_computed_(0), + current_log_post_subsampled_offset_(-1) { + num_subsampled_frames_ = + (feats_.NumRows() + info_.opts_.frame_subsampling_factor - 1) / + info_.opts_.frame_subsampling_factor; + KALDI_ASSERT(!(ivector != NULL && online_ivectors != NULL)); + KALDI_ASSERT(!(online_ivectors != NULL && online_ivector_period <= 0 && + "You need to set the --online-ivector-period option!")); +} + + +void DecodableNnetSimpleLooped::GetOutputForFrame( + int32 subsampled_frame, VectorBase *output) { + KALDI_ASSERT(subsampled_frame >= current_log_post_subsampled_offset_ && + "Frames must be accessed in order."); + while (subsampled_frame >= current_log_post_subsampled_offset_ + + current_log_post_.NumRows()) + AdvanceChunk(); + output->CopyFromVec(current_log_post_.Row( + subsampled_frame - current_log_post_subsampled_offset_)); +} + +int32 DecodableNnetSimpleLooped::GetIvectorDim() const { + if (ivector_ != NULL) + return ivector_->Dim(); + else if (online_ivector_feats_ != NULL) + return online_ivector_feats_->NumCols(); + else + return 0; +} + + +void DecodableNnetSimpleLooped::AdvanceChunk() { + int32 begin_input_frame, end_input_frame; + if (num_chunks_computed_ == 0) { + begin_input_frame = -info_.frames_left_context_; + // note: end is last plus one. + end_input_frame = info_.frames_per_chunk_ + info_.frames_right_context_; + } else { + begin_input_frame = num_chunks_computed_ * info_.frames_per_chunk_; + end_input_frame = begin_input_frame + info_.frames_per_chunk_; + } + CuMatrix feats_chunk(end_input_frame - begin_input_frame, + feats_.NumCols(), kUndefined); + + int32 num_features = feats_.NumRows(); + if (begin_input_frame >= 0 && end_input_frame <= num_features) { + SubMatrix this_feats(feats_, + begin_input_frame, + end_input_frame - begin_input_frame, + 0, feats_.NumCols()); + feats_chunk.CopyFromMat(this_feats); + } else { + Matrix this_feats(end_input_frame - begin_input_frame, + feats_.NumCols()); + for (int32 r = begin_input_frame; r < end_input_frame; r++) { + int32 input_frame = r; + if (input_frame < 0) input_frame = 0; + if (input_frame >= num_features) input_frame = num_features - 1; + this_feats.Row(r - begin_input_frame).CopyFromVec( + feats_.Row(input_frame)); + } + feats_chunk.CopyFromMat(this_feats); + } + computer_.AcceptInput("input", &feats_chunk); + + if (info_.has_ivectors_) { + Vector ivector; + GetCurrentIvector(end_input_frame, &ivector); + CuMatrix cu_ivector(1, ivector.Dim()); + cu_ivector.Row(0).CopyFromVec(ivector); + computer_.AcceptInput("ivector", &cu_ivector); + } + computer_.Run(); + + { + // on GPU if we're using one, while avoiding unnecessary copies if we're not + // using the GPU. + + // Note: it's possible in theory that if you had weird recurrence that went + // directly from the output, the call to GetOutputDestructive() would cause + // a crash on the next chunk. But we don't anticipate this will happen in + // practice. + CuMatrix output; + computer_.GetOutputDestructive("output", &output); + + if (info_.log_priors_.Dim() != 0) { + // subtract log-prior (divide by prior) + output.AddVecToRows(-1.0, info_.log_priors_); + } + // apply the acoustic scale + output.Scale(info_.opts_.acoustic_scale); + current_log_post_.Resize(0, 0); + current_log_post_.Swap(&output); + } + KALDI_ASSERT(current_log_post_.NumRows() == info_.frames_per_chunk_ / + info_.opts_.frame_subsampling_factor && + current_log_post_.NumCols() == info_.output_dim_); + + num_chunks_computed_++; + + current_log_post_subsampled_offset_ = + (num_chunks_computed_ - 1) * + (info_.frames_per_chunk_ / info_.opts_.frame_subsampling_factor); +} + + +void DecodableNnetSimpleLooped::GetCurrentIvector(int32 input_frame, + Vector *ivector) { + if (!info_.has_ivectors_) + return; + if (ivector_ != NULL) { + *ivector = *ivector_; + return; + } else if (online_ivector_feats_ == NULL) { + KALDI_ERR << "Neural net expects iVectors but none provided."; + } + KALDI_ASSERT(online_ivector_period_ > 0); + int32 ivector_frame = input_frame / online_ivector_period_; + KALDI_ASSERT(ivector_frame >= 0); + if (ivector_frame >= online_ivector_feats_->NumRows()) + ivector_frame = online_ivector_feats_->NumRows() - 1; + KALDI_ASSERT(ivector_frame >= 0 && "ivector matrix cannot be empty."); + *ivector = online_ivector_feats_->Row(ivector_frame); +} + + +DecodableAmNnetSimpleLooped::DecodableAmNnetSimpleLooped( + const DecodableNnetSimpleLoopedInfo &info, + const TransitionModel &trans_model, + const MatrixBase &feats, + const VectorBase *ivector, + const MatrixBase *online_ivectors, + int32 online_ivector_period): + decodable_nnet_(info, feats, ivector, online_ivectors, online_ivector_period), + trans_model_(trans_model) { } + +BaseFloat DecodableAmNnetSimpleLooped::LogLikelihood(int32 frame, + int32 transition_id) { + int32 pdf_id = trans_model_.TransitionIdToPdf(transition_id); + return decodable_nnet_.GetOutput(frame, pdf_id); +} + + + +} // namespace nnet3 +} // namespace kaldi diff --git a/src/nnet3/decodable-simple-looped.h b/src/nnet3/decodable-simple-looped.h new file mode 100644 index 00000000000..fe40c220f8f --- /dev/null +++ b/src/nnet3/decodable-simple-looped.h @@ -0,0 +1,328 @@ +// nnet3/decodable-simple-looped.h + +// Copyright 2016 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_NNET3_DECODABLE_SIMPLE_LOOPED_H_ +#define KALDI_NNET3_DECODABLE_SIMPLE_LOOPED_H_ + +#include +#include "base/kaldi-common.h" +#include "gmm/am-diag-gmm.h" +#include "hmm/transition-model.h" +#include "itf/decodable-itf.h" +#include "nnet3/nnet-optimize.h" +#include "nnet3/nnet-compute.h" +#include "nnet3/am-nnet-simple.h" + +namespace kaldi { +namespace nnet3 { + +// See also nnet-am-decodable-simple.h, which is a decodable object that's based +// on breaking up the input into fixed chunks. The decodable object defined here is based on +// 'looped' computations, which naturally handle infinite left-context (but are +// only ideal for systems that have only recurrence in the forward direction, +// i.e. not BLSTMs... because there isn't a natural way to enforce extra right +// context for each chunk.) + + +// Note: the 'simple' in the name means it applies to networks for which +// IsSimpleNnet(nnet) would return true. 'looped' means we use looped +// computations, with a kGotoLabel statement at the end of it. +struct NnetSimpleLoopedComputationOptions { + int32 extra_left_context_initial; + int32 frame_subsampling_factor; + int32 frames_per_chunk; + BaseFloat acoustic_scale; + bool debug_computation; + NnetOptimizeOptions optimize_config; + NnetComputeOptions compute_config; + + NnetSimpleLoopedComputationOptions(): + extra_left_context_initial(0), + frame_subsampling_factor(1), + frames_per_chunk(20), + acoustic_scale(0.1), + debug_computation(false) { } + + void Check() const { + KALDI_ASSERT(extra_left_context_initial >= 0 && + frame_subsampling_factor > 0 && frames_per_chunk > 0 && + acoustic_scale > 0.0); + } + + void Register(OptionsItf *opts) { + opts->Register("extra-left-context-initial", &extra_left_context_initial, + "Extra left context to use at the first frame of an utterance (note: " + "this will just consist of repeats of the first frame, and should not " + "usually be necessary."); + opts->Register("frame-subsampling-factor", &frame_subsampling_factor, + "Required if the frame-rate of the output (e.g. in 'chain' " + "models) is less than the frame-rate of the original " + "alignment."); + opts->Register("acoustic-scale", &acoustic_scale, + "Scaling factor for acoustic log-likelihoods"); + opts->Register("frames-per-chunk", &frames_per_chunk, + "Number of frames in each chunk that is separately evaluated " + "by the neural net. Measured before any subsampling, if the " + "--frame-subsampling-factor options is used (i.e. counts " + "input frames. This is only advisory (may be rounded up " + "if needed."); + opts->Register("debug-computation", &debug_computation, "If true, turn on " + "debug for the actual computation (very verbose!)"); + + // register the optimization options with the prefix "optimization". + ParseOptions optimization_opts("optimization", opts); + optimize_config.Register(&optimization_opts); + + // register the compute options with the prefix "computation". + ParseOptions compute_opts("computation", opts); + compute_config.Register(&compute_opts); + } +}; + +// forward declaration. +class DecodableNnetSimpleLooped; + + +/** + When you instantiate class DecodableNnetSimpleLooped, you should give it + a const reference to this class, that has been previously initialized. + */ +class DecodableNnetSimpleLoopedInfo { + public: + // The constructor takes a non-const pointer to 'nnet' because it may have to + // modify it to be able to take multiple iVectors. + DecodableNnetSimpleLoopedInfo(const NnetSimpleLoopedComputationOptions &opts, + Nnet *nnet); + + DecodableNnetSimpleLoopedInfo(const NnetSimpleLoopedComputationOptions &opts, + AmNnetSimple *nnet); + + // this constructor is for use in testing. + DecodableNnetSimpleLoopedInfo(const NnetSimpleLoopedComputationOptions &opts, + const Vector &priors, + Nnet *nnet); + + protected: + void Init(const NnetSimpleLoopedComputationOptions &opts, + Nnet *nnet); + + friend class DecodableNnetSimpleLooped; + + + const NnetSimpleLoopedComputationOptions &opts_; + const Nnet &nnet_; + + // the log priors (or the empty vector if the priors are not set in the model) + CuVector log_priors_; + + + // frames_left_context equals the model left context plus any extra left context. + int32 frames_left_context_; + // frames_right_context is the same as the right-context of the model. + int32 frames_right_context_; + // The frames_per_chunk_ equals the number of input frames we need for each + // chunk (except for the first chunk). This divided by + // opts_.frame_subsampling_factor gives the number of output frames. + int32 frames_per_chunk_; + + // The output dimension of the neural network. + int32 output_dim_; + + // True if the neural net accepts iVectors. If so, the neural net will have been modified + // to accept the iVectors + bool has_ivectors_; + + // The compiled, 'looped' computation. + NnetComputation computation_; +}; + +/* + This class handles the neural net computation; it's mostly accessed + via other wrapper classes. + + It can accept just input features, or input features plus iVectors. */ +class DecodableNnetSimpleLooped { + public: + /** + This constructor takes features as input, and you can either supply a + single iVector input, estimated in batch-mode ('ivector'), or 'online' + iVectors ('online_ivectors' and 'online_ivector_period', or none at all. + Note: it stores references to all arguments to the constructor, so don't + delete them till this goes out of scope. + + @param [in] info This helper class contains all the static pre-computed information + this class needs, and contains a pointer to the neural net. + @param [in] feats The input feature matrix. + @param [in] ivector If you are using iVectors estimated in batch mode, + a pointer to the iVector, else NULL. + @param [in] ivector If you are using iVectors estimated in batch mode, + a pointer to the iVector, else NULL. + @param [in] online_ivectors + If you are using iVectors estimated 'online' + a pointer to the iVectors, else NULL. + @param [in] online_ivector_period If you are using iVectors estimated 'online' + (i.e. if online_ivectors != NULL) gives the periodicity + (in frames) with which the iVectors are estimated. + */ + DecodableNnetSimpleLooped(const DecodableNnetSimpleLoopedInfo &info, + const MatrixBase &feats, + const VectorBase *ivector = NULL, + const MatrixBase *online_ivectors = NULL, + int32 online_ivector_period = 1); + + + // returns the number of frames of likelihoods. The same as feats_.NumRows() + // in the normal case (but may be less if opts_.frame_subsampling_factor != + // 1). + inline int32 NumFrames() const { return num_subsampled_frames_; } + + inline int32 OutputDim() const { return info_.output_dim_; } + + // Gets the output for a particular frame, with 0 <= frame < NumFrames(). + // 'output' must be correctly sized (with dimension OutputDim()). Note: + // you're expected to call this, and GetOutput(), in an order of increasing + // frames. If you deviate from this, one of these calls may crash. + void GetOutputForFrame(int32 subsampled_frame, + VectorBase *output); + + // Gets the output for a particular frame and pdf_id, with + // 0 <= subsampled_frame < NumFrames(), + // and 0 <= pdf_id < OutputDim(). + inline BaseFloat GetOutput(int32 subsampled_frame, int32 pdf_id) { + KALDI_ASSERT(subsampled_frame >= current_log_post_subsampled_offset_ && + "Frames must be accessed in order."); + while (subsampled_frame >= current_log_post_subsampled_offset_ + + current_log_post_.NumRows()) + AdvanceChunk(); + return current_log_post_(subsampled_frame - + current_log_post_subsampled_offset_, + pdf_id); + } + private: + KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableNnetSimpleLooped); + + // This function does the computation for the next chunk. + void AdvanceChunk(); + + void AdvanceChunkInternal(const MatrixBase &input_feats, + const VectorBase &ivector); + + // Gets the iVector for the specified frame., if we are + // using iVectors (else does nothing). + void GetCurrentIvector(int32 input_frame, + Vector *ivector); + + // returns dimension of the provided iVectors if supplied, or 0 otherwise. + int32 GetIvectorDim() const; + + const DecodableNnetSimpleLoopedInfo &info_; + + NnetComputer computer_; + + const MatrixBase &feats_; + // note: num_subsampled_frames_ will equal feats_.NumRows() in the normal case + // when opts_.frame_subsampling_factor == 1. + int32 num_subsampled_frames_; + + // ivector_ is the iVector if we're using iVectors that are estimated in batch + // mode. + const VectorBase *ivector_; + + // online_ivector_feats_ is the iVectors if we're using online-estimated ones. + const MatrixBase *online_ivector_feats_; + // online_ivector_period_ helps us interpret online_ivector_feats_; it's the + // number of frames the rows of ivector_feats are separated by. + int32 online_ivector_period_; + + // The current log-posteriors that we got from the last time we + // ran the computation. + Matrix current_log_post_; + + // The number of chunks we have computed so far. + int32 num_chunks_computed_; + + // The time-offset of the current log-posteriors, equals + // (num_chunks_computed_ - 1) * + // (info_.frames_per_chunk_ / info_.opts_.frame_subsampling_factor). + int32 current_log_post_subsampled_offset_; +}; + +class DecodableAmNnetSimpleLooped: public DecodableInterface { + public: + /** + This constructor takes features as input, and you can either supply a + single iVector input, estimated in batch-mode ('ivector'), or 'online' + iVectors ('online_ivectors' and 'online_ivector_period', or none at all. + Note: it stores references to all arguments to the constructor, so don't + delete them till this goes out of scope. + + + @param [in] info This helper class contains all the static pre-computed information + this class needs, and contains a pointer to the neural net. If + you want prior subtraction to be done, you should have initialized + this with the constructor that takes class AmNnetSimple. + @param [in] trans_model The transition model to use. This takes care of the + mapping from transition-id (which is an arg to + LogLikelihood()) to pdf-id (which is used internally). + @param [in] feats A pointer to the input feature matrix; must be non-NULL. + We + @param [in] ivector If you are using iVectors estimated in batch mode, + a pointer to the iVector, else NULL. + @param [in] ivector If you are using iVectors estimated in batch mode, + a pointer to the iVector, else NULL. + @param [in] online_ivectors + If you are using iVectors estimated 'online' + a pointer to the iVectors, else NULL. + @param [in] online_ivector_period If you are using iVectors estimated 'online' + (i.e. if online_ivectors != NULL) gives the periodicity + (in frames) with which the iVectors are estimated. + */ + DecodableAmNnetSimpleLooped(const DecodableNnetSimpleLoopedInfo &info, + const TransitionModel &trans_model, + const MatrixBase &feats, + const VectorBase *ivector = NULL, + const MatrixBase *online_ivectors = NULL, + int32 online_ivector_period = 1); + + + virtual BaseFloat LogLikelihood(int32 frame, int32 transition_id); + + virtual inline int32 NumFramesReady() const { + return decodable_nnet_.NumFrames(); + } + + virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); } + + virtual bool IsLastFrame(int32 frame) const { + KALDI_ASSERT(frame < NumFramesReady()); + return (frame == NumFramesReady() - 1); + } + + private: + KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableAmNnetSimpleLooped); + DecodableNnetSimpleLooped decodable_nnet_; + const TransitionModel &trans_model_; +}; + + + +} // namespace nnet3 +} // namespace kaldi + +#endif // KALDI_NNET3_DECODABLE_SIMPLE_LOOPED_H_ diff --git a/src/nnet3/nnet-am-decodable-simple.cc b/src/nnet3/nnet-am-decodable-simple.cc index 9116c9461ac..9d2176965b1 100644 --- a/src/nnet3/nnet-am-decodable-simple.cc +++ b/src/nnet3/nnet-am-decodable-simple.cc @@ -276,32 +276,37 @@ void DecodableNnetSimple::DoNnetComputation( } void DecodableNnetSimple::CheckAndFixConfigs() { - static bool warned_modulus = false, - warned_subsampling = false; + static bool warned_frames_per_chunk = false; int32 nnet_modulus = nnet_.Modulus(); if (opts_.frame_subsampling_factor < 1 || opts_.frames_per_chunk < 1) KALDI_ERR << "--frame-subsampling-factor and --frames-per-chunk must be > 0"; - if (opts_.frames_per_chunk % opts_.frame_subsampling_factor != 0) { - int32 f = opts_.frame_subsampling_factor, - frames_per_chunk = f * ((opts_.frames_per_chunk + f - 1) / f); - if (!warned_subsampling) { - warned_subsampling = true; - KALDI_LOG << "Increasing --frames-per-chunk from " - << opts_.frames_per_chunk << " to " - << frames_per_chunk << " to make it a multiple of " - << "--frame-subsampling-factor=" - << opts_.frame_subsampling_factor; + KALDI_ASSERT(nnet_modulus > 0); + int32 n = Lcm(opts_.frame_subsampling_factor, nnet_modulus); + + if (opts_.frames_per_chunk % n != 0) { + // round up to the nearest multiple of n. + int32 frames_per_chunk = n * ((opts_.frames_per_chunk + n - 1) / n); + if (!warned_frames_per_chunk) { + warned_frames_per_chunk = true; + if (nnet_modulus == 1) { + // simpler error message. + KALDI_LOG << "Increasing --frames-per-chunk from " + << opts_.frames_per_chunk << " to " + << frames_per_chunk << " to make it a multiple of " + << "--frame-subsampling-factor=" + << opts_.frame_subsampling_factor; + } else { + KALDI_LOG << "Increasing --frames-per-chunk from " + << opts_.frames_per_chunk << " to " + << frames_per_chunk << " due to " + << "--frame-subsampling-factor=" + << opts_.frame_subsampling_factor << " and " + << "nnet shift-invariance modulus = " << nnet_modulus; + } } opts_.frames_per_chunk = frames_per_chunk; } - if (opts_.frames_per_chunk % nnet_modulus != 0 && !warned_modulus) { - warned_modulus = true; - KALDI_WARN << "It may be more efficient to set the --frames-per-chunk " - << "(currently " << opts_.frames_per_chunk << " to a " - << "multiple of the network's shift-invariance modulus " - << nnet_modulus; - } } diff --git a/src/nnet3/nnet-am-decodable-simple.h b/src/nnet3/nnet-am-decodable-simple.h index 5f7a0307abe..e604765e09a 100644 --- a/src/nnet3/nnet-am-decodable-simple.h +++ b/src/nnet3/nnet-am-decodable-simple.h @@ -33,6 +33,11 @@ namespace kaldi { namespace nnet3 { +// See also the decodable object in decodable-simple-looped.h, which is better +// and faster in most situations, including TDNNs and LSTMs (but not for +// BLSTMs). + + // Note: the 'simple' in the name means it applies to networks // for which IsSimpleNnet(nnet) would return true. struct NnetSimpleComputationOptions { @@ -251,9 +256,11 @@ class DecodableAmNnetSimple: public DecodableInterface { @param [in] opts The options class. Warning: it includes an acoustic weight, whose default is 0.1; you may sometimes want to change this to 1.0. - @param [in] nnet The neural net that we're going to do the computation with - @param [in] priors Vector of priors-- if supplied and nonempty, we subtract - the log of these priors from the nnet output. + @param [in] trans_model The transition model to use. This takes care of the + mapping from transition-id (which is an arg to + LogLikelihood()) to pdf-id (which is used internally). + @param [in] am_nnet The neural net that we're going to do the computation with; + we also get the priors to divide by, if applicable, from here. @param [in] feats A pointer to the input feature matrix; must be non-NULL. We @param [in] ivector If you are using iVectors estimated in batch mode, @@ -329,13 +336,12 @@ class DecodableAmNnetSimpleParallel: public DecodableInterface { @param [in] opts The options class. Warning: it includes an acoustic weight, whose default is 0.1; you may sometimes want to change this to 1.0. - @param [in] nnet The neural net that we're going to do the computation with - @param [in] priors Vector of priors-- if supplied and nonempty, we subtract - the log of these priors from the nnet output. + @param [in] trans_model The transition model to use. This takes care of the + mapping from transition-id (which is an arg to + LogLikelihood()) to pdf-id (which is used internally). + @param [in] am_nnet The neural net that we're going to do the computation with; + it may provide priors to divide by. @param [in] feats A pointer to the input feature matrix; must be non-NULL. - We - @param [in] ivector If you are using iVectors estimated in batch mode, - a pointer to the iVector, else NULL. @param [in] ivector If you are using iVectors estimated in batch mode, a pointer to the iVector, else NULL. @param [in] online_ivectors diff --git a/src/nnet3/nnet-compile-looped.h b/src/nnet3/nnet-compile-looped.h index 00a97292798..f6ff47045fe 100644 --- a/src/nnet3/nnet-compile-looped.h +++ b/src/nnet3/nnet-compile-looped.h @@ -163,13 +163,6 @@ void CreateLoopedComputationRequestSimple(const Nnet &nnet, ComputationRequest *request2, ComputationRequest *request3); -struct NnetSimpleLoopedComputationOptions { - // TODO -}; - -void CreateLoopedComputationSimple( - const Nnet &nnet // ... TODO... - ); diff --git a/src/nnet3/nnet-computation.h b/src/nnet3/nnet-computation.h index 857dde1547b..da3a43bd15f 100644 --- a/src/nnet3/nnet-computation.h +++ b/src/nnet3/nnet-computation.h @@ -168,7 +168,9 @@ struct ComputationRequest { - kAllocMatrixZeroed: Allocate and zero a matrix. arg1 = submatrix index. - kDeallocMatrix: Deallocate a matrix. arg1 = submatrix index. - kAllocMatrixFromOther: initialize matrix with submatrix index arg1 using memory - from matrix with submatrix index arg2 (using shallow swap). + from matrix with submatrix index arg2 (using shallow swap). Note: the + code relating to the 'looped' computation relies on the fact that this is + a swap, so kSwapMatrix might be a better name, but we're keeping the old name. - kAllocMatrixFromOtherZeroed: initialize matrix with submatrix index arg1 using memory from matrix with submatrix index arg2 (using shallow swap), then zero the matrix we just allocated. diff --git a/src/nnet3/nnet-compute-test.cc b/src/nnet3/nnet-compute-test.cc index c485cc06636..81cc67f71ae 100644 --- a/src/nnet3/nnet-compute-test.cc +++ b/src/nnet3/nnet-compute-test.cc @@ -24,6 +24,8 @@ #include "nnet3/nnet-test-utils.h" #include "nnet3/nnet-optimize.h" #include "nnet3/nnet-compute.h" +#include "nnet3/nnet-am-decodable-simple.h" +#include "nnet3/decodable-simple-looped.h" namespace kaldi { namespace nnet3 { @@ -71,13 +73,64 @@ void UnitTestComputationRequestIo(ComputationRequest *request) { } } -void TestNnetDecodable(const ComputationRequest &request, - const std::vector > &inputs, - const Nnet &nnet, - const CuMatrixBase &reference_output) { - // DecodableAmNnetSimpleOptions opts; - // This is a placeholder for where we'll eventually test either the decodable - // object or something similar to it (e.g. a base class) +// this checks that a couple of different decodable objects give the same +// answer. +void TestNnetDecodable(Nnet *nnet) { + int32 num_frames = 5 + RandInt(1, 100), + input_dim = nnet->InputDim("input"), + output_dim = nnet->OutputDim("output"), + ivector_dim = std::max(0, nnet->InputDim("ivector")); + Matrix input(num_frames, input_dim); + + + input.SetRandn(); + Vector ivector(ivector_dim); + ivector.SetRandn(); + + Vector priors(RandInt(0, 1) == 0 ? output_dim : 0); + if (priors.Dim() != 0) { + priors.SetRandn(); + priors.ApplyExp(); + } + + Matrix output1(num_frames, output_dim), + output2(num_frames, output_dim); + + { + NnetSimpleComputationOptions opts; + opts.frames_per_chunk = RandInt(5, 25); + CachingOptimizingCompiler compiler(*nnet); + DecodableNnetSimple decodable(opts, *nnet, priors, input, &compiler, + (ivector_dim != 0 ? &ivector : NULL)); + for (int32 t = 0; t < num_frames; t++) { + SubVector row(output1, t); + decodable.GetOutputForFrame(t, &row); + } + } + + { + NnetSimpleLoopedComputationOptions opts; + // caution: this may modify nnet, by changing how it consumes iVectors. + DecodableNnetSimpleLoopedInfo info(opts, priors, nnet); + DecodableNnetSimpleLooped decodable(info, input, + (ivector_dim != 0 ? &ivector : NULL)); + for (int32 t = 0; t < num_frames; t++) { + SubVector row(output2, t); + decodable.GetOutputForFrame(t, &row); + } + } + + + if (!NnetIsRecurrent(*nnet) && + nnet->Info().find("statistics-extraction") == std::string::npos) { + // this equivalence will not hold for recurrent nnets or those that + // have the statistics-extraction/statistics-pooling layers. + for (int32 t = 0; t < num_frames; t++) { + SubVector row1(output1, t), + row2(output2, t); + KALDI_ASSERT(row1.ApproxEqual(row2)); + } + } } void UnitTestNnetCompute() { @@ -145,8 +198,6 @@ void UnitTestNnetCompute() { computer.Run(); const CuMatrixBase &output(computer.GetOutput("output")); - TestNnetDecodable(request, inputs, nnet, output); - KALDI_LOG << "Output sum is " << output.Sum(); CuMatrix output_deriv(output.NumRows(), output.NumCols()); output_deriv.SetRandn(); @@ -163,6 +214,7 @@ void UnitTestNnetCompute() { } } } + TestNnetDecodable(&nnet); } } diff --git a/src/nnet3/nnet-compute.cc b/src/nnet3/nnet-compute.cc index 7171e6b0273..75c0c464c90 100644 --- a/src/nnet3/nnet-compute.cc +++ b/src/nnet3/nnet-compute.cc @@ -145,8 +145,8 @@ void NnetComputer::DebugAfterExecute(int32 command, } -void NnetComputer::ExecuteCommand(int32 command) { - const NnetComputation::Command &c = computation_.commands[command]; +void NnetComputer::ExecuteCommand() { + const NnetComputation::Command &c = computation_.commands[program_counter_]; int32 m1, m2; try { switch (c.command_type) { @@ -279,7 +279,11 @@ void NnetComputer::ExecuteCommand(int32 command) { dest.AddRowRanges(src, pairs); break; } - case kNoOperation: case kNoOperationMarker: + case kNoOperation: case kNoOperationMarker: case kNoOperationLabel: + break; + case kGotoLabel: + KALDI_ASSERT(computation_.commands[c.arg1].command_type == kNoOperationLabel); + program_counter_ = c.arg1; break; default: KALDI_ERR << "Invalid command in computation"; @@ -290,12 +294,12 @@ void NnetComputer::ExecuteCommand(int32 command) { computation_.GetCommandStrings(nnet_, &preamble, &command_strings_); KALDI_WARN << "Printing some background info since error was detected"; KALDI_LOG << preamble; - for (int32 prev_c = 0; prev_c < command; prev_c++) + for (int32 prev_c = 0; prev_c < program_counter_; prev_c++) KALDI_LOG << command_strings_[prev_c]; } // the following will re-throw the error, but now we've printed more info // about what went wrong. - KALDI_ERR << "Error running command " << command_strings_[command]; + KALDI_ERR << "Error running command " << command_strings_[program_counter_]; } } @@ -381,7 +385,7 @@ void NnetComputer::Run() { } if (debug_) DebugBeforeExecute(program_counter_, &info); - ExecuteCommand(program_counter_); + ExecuteCommand(); if (debug_) { double total_elapsed_now = timer.Elapsed(); DebugAfterExecute(program_counter_, info, diff --git a/src/nnet3/nnet-compute.h b/src/nnet3/nnet-compute.h index 32839755828..0f7da2e01be 100644 --- a/src/nnet3/nnet-compute.h +++ b/src/nnet3/nnet-compute.h @@ -129,8 +129,8 @@ class NnetComputer { std::vector > matrices_; - // executes the command in computation_.commands[command]. - void ExecuteCommand(int32 command); + // executes the command in computation_.commands[program_counter_]. + void ExecuteCommand(); // Returns the matrix index where the input (if is_output==false) or output // matrix index for "node_name" is stored. This looks at the next command (at diff --git a/src/nnet3/nnet-graph.cc b/src/nnet3/nnet-graph.cc index e66a34fc26a..a0216b9189f 100644 --- a/src/nnet3/nnet-graph.cc +++ b/src/nnet3/nnet-graph.cc @@ -39,7 +39,7 @@ void NnetToDirectedGraph(const Nnet &nnet, switch (node.node_type) { case kInput: break; // no node dependencies. - case kDescriptor: + case kDescriptor: node.descriptor.GetNodeDependencies(&node_dependencies); break; case kComponent: @@ -265,7 +265,7 @@ std::string PrintGraphToString(const std::vector > &graph) { void ComputeNnetComputationEpochs(const Nnet &nnet, std::vector *node_to_epoch) { KALDI_ASSERT(node_to_epoch != NULL); - + std::vector > graph; NnetToDirectedGraph(nnet, &graph); KALDI_VLOG(6) << "graph is: " << PrintGraphToString(graph); @@ -276,7 +276,7 @@ void ComputeNnetComputationEpochs(const Nnet &nnet, std::vector > scc_graph; MakeSccGraph(graph, sccs, &scc_graph); KALDI_VLOG(6) << "scc graph is: " << PrintGraphToString(scc_graph); - + std::vector scc_node_to_epoch; ComputeTopSortOrder(scc_graph, &scc_node_to_epoch); if (GetVerboseLevel() >= 6) { @@ -285,7 +285,7 @@ void ComputeNnetComputationEpochs(const Nnet &nnet, os << scc_node_to_epoch[i] << ", "; KALDI_VLOG(6) << "scc_node_to_epoch is: " << os.str(); } - + node_to_epoch->clear(); node_to_epoch->resize(graph.size()); for (int32 i = 0; i < sccs.size(); ++i) { @@ -297,5 +297,21 @@ void ComputeNnetComputationEpochs(const Nnet &nnet, } } +bool GraphHasCycles(const std::vector > &graph) { + std::vector > sccs; + FindSccs(graph, &sccs); + for (size_t i = 0; i < sccs.size(); i++) { + if (sccs[i].size() > 1) + return true; + } + // the next code checks for links from a state to itself. + int32 num_nodes = graph.size(); + for (size_t i = 0; i < num_nodes; i++) + for (std::vector::const_iterator iter = graph[i].begin(), + end = graph[i].end(); iter != end; ++iter) + if (*iter == i) return true; + return false; +} + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-graph.h b/src/nnet3/nnet-graph.h index 24c26176742..27e3fd609f3 100644 --- a/src/nnet3/nnet-graph.h +++ b/src/nnet3/nnet-graph.h @@ -55,10 +55,18 @@ void NnetToDirectedGraph(const Nnet &nnet, /// of destination-nodes of arcs coming from the current node), /// partition it into strongly connected components (i.e. within /// each SCC, all nodes are reachable from all other nodes). +/// Each element of 'sccs' is a list of node indexes that are +/// in that scc. void FindSccs(const std::vector > &graph, std::vector > *sccs); +/// This function returns 'true' if the graph represented in 'graph' +/// contains cycles (including cycles where a single node has an arc +/// to itself). +bool GraphHasCycles(const std::vector > &graph); + + /// Given a list of sccs of a graph (e.g. as computed by FindSccs), compute a /// directed graph on the sccs. Of course this directed graph will be acyclic. void MakeSccGraph(const std::vector > &graph, diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index 4f9d3ec078c..4d199d4a0d6 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -147,7 +147,112 @@ void IdentifyIndexesArgs(std::vector *commands, } } +// We declare this class in the .cc file, we don't need to export it. +// It's used inside RenumberComputation. +class ComputationRenumberer { + public: + ComputationRenumberer(NnetComputation *computation): + computation_(computation) { } + void Renumber(); + private: + // this function removes unused vectors within the indexes_multi_ array, i.e. + // ones that are not referenced in the computation. + void RemoveUnusedIndexesMulti(); + // this function computes the submatrix_is_used_ vector, saying whether each + // of the original submatrices is referenced somewhere. + void ComputeSubmatrixIsUsed(); + // this function computes the matrix_is_used_ vector (from the + // submatrix_is_used_ vector, from computation_->input_output_info, and from + // computation_->commands, saying whether each of the original matrices is + // referenced somewhere, directly or indirectly. + void ComputeMatrixIsUsed(); + // This function sets up mappings from old to new matrix and submatrix indexes, + // writing to num_{,sub}matrices_new_ and old_to_new_{,sub}matrix_. + void SetUpMappings(); + // This function renumbers submatrix indexes appearing within commands and + // indexes_multi_, and then removes unused submatrices from the list of + // submatrices while leaving the matrix-indexes at their old values (they will + // be mapped by RenumberMatrices()). + void RenumberSubmatrices(); + // renumber matrix indexes appearing within 'commmands', within 'submatrices' + // and 'input_output_info'; renumber 'matrices' and if applicable + // 'debug_info'. + void RenumberMatrices(); + // removes duplicates within the indexes_multi array itself. + void RemoveIndexesMultiDuplicates(); + // removes unused elements and duplicates within 'computation->indexes' + void RenumberIndexes(); + // removes unused elements and duplicates within 'computation->indexes_ranges' + void RenumberIndexesRanges(); + + struct SubMatrixHasher { + SubMatrixHasher() { } + size_t operator () (const NnetComputation::SubMatrixInfo &submat) const { + // these numbers are arbitrarily chosen primes. + return submat.matrix_index + + 19553 * submat.row_offset + + 29297 * submat.num_rows + + 42209 * submat.col_offset + + 56527 * submat.num_cols; + } + }; + + + // Here, T will be int32 or std::pair + template + struct PointerCompare { + // This provides an operator < on two vectors of ints or pairs of ints. It + // is designed to provide a total order on the vectors while accessing as + // small a portion of the vectors' data as possible. It's used in removing + // duplicates from computation_->indexes_multi and computation_->indexes. + // First it compares the length, then it does lexicographical compare. + bool operator ()(const std::vector *ptr1, + const std::vector *ptr2) const { + size_t size1 = ptr1->size(), size2 = ptr2->size(); + if (size1 < size2) return true; + else if (size1 > size2) return false; + else return (*ptr1 < *ptr2); // use the std::vector operator <, which is + // lexicographical comparison. + } + }; + + /// creates a renumbering that removes the elements in "to_remove", + /// e.g. if old_num_elements = 3 and to_remove = [1], would output + /// the vector [ 0, -1, 1 ]. + static void CreateRenumbering(int32 old_num_elements, + const std::vector &to_remove, + std::vector *renumbering); + + /// creates a renumbering from old to new index that removes the unused + /// elements, e.g. if used == [ true, false, true, true], would output the + /// vector [ 0, -1, 1, 2 ]. Returns number of new elements, i.e. the + /// number of elements of 'used' that were true. + static int32 CreateRenumbering(const std::vector &used, + std::vector *renumbering); + + // vector of bool indexed by original submatrix-index, that is true if a + // submatrix-index is used somewhere in the computation (always true for + // the zeroth element). + std::vector submatrix_is_used_; + // vector of bool indexed by original submatrix-index, that is true if a + // submatrix-index will be kept; this is like submatrix_is_used_; but for + // duplicate submatrices, all but the first duplicate will be marked false). + std::vector submatrix_is_kept_; + // vector of bool indexed by original-matrix-index > 0, that is true if a + // matrix-index is used somewhere in the computation, directly or indirectly. + // always true for the zeroth element. + std::vector matrix_is_used_; + NnetComputation *computation_; + int32 num_matrices_new_; + int32 num_submatrices_new_; + std::vector old_to_new_matrix_; // numbered by orig-matrix-index, gives + // new-matrix-index. -1 for removed + // ones. + std::vector old_to_new_submatrix_; // numbered by orig-submatrix-index, + // gives new-submatrix-index. -1 + // for removed ones. +}; // static int32 ComputationRenumberer::CreateRenumbering( @@ -547,6 +652,7 @@ void RenumberComputation(NnetComputation *computation) { renumberer.Renumber(); } + void RemoveNoOps(NnetComputation *computation) { std::vector::iterator input_iter = computation->commands.begin(), @@ -844,6 +950,77 @@ std::pair VariableMergingOptimizer::MayBeMerged( return std::pair(false,false); } + +/** This class is responsible for consolidating the model-update part of + backprop commands, for components in (e.g.) recurrent networks that need to + have many separate backprop commands, into more efficient single commands + operating on consolidated data in larger matrices. This is useful for + recurrent networks. */ +class ModelUpdateConsolidator { + public: + ModelUpdateConsolidator(const Nnet &nnet, + NnetComputation *computation); + void ConsolidateModelUpdate(); + private: + void ConsolidateUpdateForComponent( + int32 component, + const std::vector &backprop_commands); + + /// This function, called at the end of ConsolidateModelUpdate(), takes the + /// commands that we have put in extra_commands_, final_commands_ and + /// final_deallocate_commands_, and puts them in the appropriate place in + /// computation->commands_. + void AddCommandsToComputation(); + + /// You call this function when you want to consolidate the values of a list + /// of submatrices taken just prior to particular commands. The input + /// 'commands' and 'submatrices' lists must be the same size, and size must be + /// > 1. This function will create a new matrix that is the row-wise + /// concatentation of all these submatrices, with values taken just prior to + /// the respective command indexes. This function will will add to + /// extra_commands_ the commands to do the copying at the appropriate places + /// (at the supplied command indexes; they will be inserted just before). The + /// return value is the submatrix index of a submatrix that represents the + /// whole of the consolidated matrix. This command will insert, at the + /// beginning of the computation (in extra_commands_[0]), a command to + /// initialize the matrix; and will append to final_deallocate_commands_ the + /// commands to deallocate the matrix. If computation_->matrix_debug_info is + /// nonempty, this function will also update computation_->matrix_debug_info + /// with suitable values for the newly added matrix + int32 ConsolidateSubmatrices( + const std::vector &commands, + const std::vector &submatrices); + + /// This function, called from ConsolidateSubmatrices, will + /// update 'debug_info' by appending the corresponding 'indexes' from + /// the existing debug info for this submatrix. It will also set + /// the 'is_deriv' of '*debug_info' to the same value as the + /// debug info for 'submatrix_index', and set the 'node_index' to the + /// 'node_index' in the debug info for that submatrix-index. + /// It requires that computation_->matrix_debug_info be nonempty. + void AppendDebugInfoForSubmatrix( + int32 submatrix_index, + NnetComputation::MatrixDebugInfo *debug_info) const; + + const Nnet &nnet_; + NnetComputation *computation_; + + // Indexed by the original command index in *computation_ (and sized to the + // original number of commands in *computation_ before we added anything), + // extra_commands_[c] contains a list of commands that need to be inserted + // just before command c in the previously existing computation. + std::vector > extra_commands_; + + // This is as list of kBackprop commands that will be placed after the + // commands in 'computation_->commands' and 'extra_commands_', but before + // the 'final_deallocate_commands_'. + std::vector final_commands_; + // This is a list of commands to deallocate our 'consolidated' matrices; the + // commands will be placed after the commands in 'final_commands_'. + std::vector final_deallocate_commands_; +}; + + void ModelUpdateConsolidator::AppendDebugInfoForSubmatrix( int32 submatrix_index, NnetComputation::MatrixDebugInfo *debug_info) const { @@ -867,7 +1044,6 @@ void ModelUpdateConsolidator::AppendDebugInfoForSubmatrix( src_info.cindexes.begin() + row_end); } - // see comment by declaration in header. int32 ModelUpdateConsolidator::ConsolidateSubmatrices( const std::vector &commands, @@ -1041,6 +1217,19 @@ void ModelUpdateConsolidator::ConsolidateModelUpdate() { AddCommandsToComputation(); } + +void ConsolidateModelUpdate(const Nnet &nnet, + NnetComputation *computation) { + // This following if-statement is an optimization: if the computation + // request(s) had need_model_derivative == false, there would be nothing to + // optimize, so don't bother trying. + if (!computation->need_model_derivative) + return; + ModelUpdateConsolidator consolidator(nnet, computation); + consolidator.ConsolidateModelUpdate(); +} + + // inline void DerivativeTimeLimiter::GetPruneValues(int32 initial_submatrix, int32 new_submatrix, @@ -2315,18 +2504,29 @@ void FixGotoLabel(NnetComputation *computation) { int32 num_commands = computation->commands.size(); if (num_commands == 0) return; - if (computation->commands[num_commands-1].command_type == kGotoLabel) { - int32 dest_command = computation->commands[num_commands-1].arg1; - if (static_cast(dest_command) < computation->commands.size() && - computation->commands[dest_command].command_type == kNoOperationLabel) - return; // nothing to fix. - for (int32 c = 0; c + 1 < num_commands; c++) { - if (computation->commands[c].command_type == kNoOperationLabel) { - computation->commands[num_commands-1].arg1 = c; - return; + for (int32 c = num_commands - 1; c >= 0; c--) { + if (computation->commands[c].command_type == kGotoLabel) { + int32 dest_command = computation->commands[c].arg1; + if (static_cast(dest_command) < computation->commands.size() && + computation->commands[dest_command].command_type == kNoOperationLabel) + return; // nothing to fix. + for (int32 d = 0; d + 1 < num_commands; d++) { + if (computation->commands[d].command_type == kNoOperationLabel) { + computation->commands[c].arg1 = d; + return; + } } + KALDI_ERR << "Label not found."; + } else if (computation->commands[c].command_type == kProvideOutput) { + // sometimes kProvideOutput commands are temporarily ordered after + // the kGotoLabel command, and we need to work in that case. + continue; + } else { + // it loks like there is no 'goto' command in this computation- + // if there were, it would be right at the end, possibly followed by + // kProvideOutput commands. + break; } - KALDI_ERR << "Label not found."; } } diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index 29f05add695..e289ff9126c 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -182,182 +182,20 @@ class VariableMergingOptimizer { }; -/** This class is responsible for consolidating the model-update part of - backprop commands, for components in (e.g.) recurrent networks that need to - have many separate backprop commands, into more efficient single commands - operating on consolidated data in larger matrices. This is useful for - recurrent networks. */ -class ModelUpdateConsolidator { - public: - ModelUpdateConsolidator(const Nnet &nnet, - NnetComputation *computation); - void ConsolidateModelUpdate(); - private: - void ConsolidateUpdateForComponent( - int32 component, - const std::vector &backprop_commands); - - /// This function, called at the end of ConsolidateModelUpdate(), takes the - /// commands that we have put in extra_commands_, final_commands_ and - /// final_deallocate_commands_, and puts them in the appropriate place in - /// computation->commands_. - void AddCommandsToComputation(); - - /// You call this function when you want to consolidate the values of a list - /// of submatrices taken just prior to particular commands. The input - /// 'commands' and 'submatrices' lists must be the same size, and size must be - /// > 1. This function will create a new matrix that is the row-wise - /// concatentation of all these submatrices, with values taken just prior to - /// the respective command indexes. This function will will add to - /// extra_commands_ the commands to do the copying at the appropriate places - /// (at the supplied command indexes; they will be inserted just before). The - /// return value is the submatrix index of a submatrix that represents the - /// whole of the consolidated matrix. This command will insert, at the - /// beginning of the computation (in extra_commands_[0]), a command to - /// initialize the matrix; and will append to final_deallocate_commands_ the - /// commands to deallocate the matrix. If computation_->matrix_debug_info is - /// nonempty, this function will also update computation_->matrix_debug_info - /// with suitable values for the newly added matrix - int32 ConsolidateSubmatrices( - const std::vector &commands, - const std::vector &submatrices); - - /// This function, called from ConsolidateSubmatrices, will - /// update 'debug_info' by appending the corresponding 'indexes' from - /// the existing debug info for this submatrix. It will also set - /// the 'is_deriv' of '*debug_info' to the same value as the - /// debug info for 'submatrix_index', and set the 'node_index' to the - /// 'node_index' in the debug info for that submatrix-index. - /// It requires that computation_->matrix_debug_info be nonempty. - void AppendDebugInfoForSubmatrix( - int32 submatrix_index, - NnetComputation::MatrixDebugInfo *debug_info) const; - - const Nnet &nnet_; - NnetComputation *computation_; - - // Indexed by the original command index in *computation_ (and sized to the - // original number of commands in *computation_ before we added anything), - // extra_commands_[c] contains a list of commands that need to be inserted - // just before command c in the previously existing computation. - std::vector > extra_commands_; - - // This is as list of kBackprop commands that will be placed after the - // commands in 'computation_->commands' and 'extra_commands_', but before - // the 'final_deallocate_commands_'. - std::vector final_commands_; - // This is a list of commands to deallocate our 'consolidated' matrices; the - // commands will be placed after the commands in 'final_commands_'. - std::vector final_deallocate_commands_; -}; - - -// We declare this class in the .cc file, we don't need to export it. -// It's used inside RenumberComputation. -class ComputationRenumberer { - public: - ComputationRenumberer(NnetComputation *computation): - computation_(computation) { } - - void Renumber(); - private: - // this function removes unused vectors within the indexes_multi_ array, i.e. - // ones that are not referenced in the computation. - void RemoveUnusedIndexesMulti(); - // this function computes the submatrix_is_used_ vector, saying whether each - // of the original submatrices is referenced somewhere. - void ComputeSubmatrixIsUsed(); - // this function computes the matrix_is_used_ vector (from the - // submatrix_is_used_ vector, from computation_->input_output_info, and from - // computation_->commands, saying whether each of the original matrices is - // referenced somewhere, directly or indirectly. - void ComputeMatrixIsUsed(); - // This function sets up mappings from old to new matrix and submatrix indexes, - // writing to num_{,sub}matrices_new_ and old_to_new_{,sub}matrix_. - void SetUpMappings(); - // This function renumbers submatrix indexes appearing within commands and - // indexes_multi_, and then removes unused submatrices from the list of - // submatrices while leaving the matrix-indexes at their old values (they will - // be mapped by RenumberMatrices()). - void RenumberSubmatrices(); - // renumber matrix indexes appearing within 'commmands', within 'submatrices' - // and 'input_output_info'; renumber 'matrices' and if applicable - // 'debug_info'. - void RenumberMatrices(); - // removes duplicates within the indexes_multi array itself. - void RemoveIndexesMultiDuplicates(); - // removes unused elements and duplicates within 'computation->indexes' - void RenumberIndexes(); - // removes unused elements and duplicates within 'computation->indexes_ranges' - void RenumberIndexesRanges(); - - struct SubMatrixHasher { - SubMatrixHasher() { } - size_t operator () (const NnetComputation::SubMatrixInfo &submat) const { - // these numbers are arbitrarily chosen primes. - return submat.matrix_index + - 19553 * submat.row_offset + - 29297 * submat.num_rows + - 42209 * submat.col_offset + - 56527 * submat.num_cols; - } - }; +/** + This optimization consolidates + the model-update part of + backprop commands, for components in (e.g.) recurrent networks that need to + have many separate backprop commands, into more efficient single commands + operating on consolidated data in larger matrices. This is useful for + recurrent networks. The resulting computation separates the backprop for + data-derivatives from the model-update part of backprop. + */ +void ConsolidateModelUpdate(const Nnet &nnet, + NnetComputation *computation); - // Here, T will be int32 or std::pair - template - struct PointerCompare { - // This provides an operator < on two vectors of ints or pairs of ints. It - // is designed to provide a total order on the vectors while accessing as - // small a portion of the vectors' data as possible. It's used in removing - // duplicates from computation_->indexes_multi and computation_->indexes. - // First it compares the length, then it does lexicographical compare. - bool operator ()(const std::vector *ptr1, - const std::vector *ptr2) const { - size_t size1 = ptr1->size(), size2 = ptr2->size(); - if (size1 < size2) return true; - else if (size1 > size2) return false; - else return (*ptr1 < *ptr2); // use the std::vector operator <, which is - // lexicographical comparison. - } - }; - /// creates a renumbering that removes the elements in "to_remove", - /// e.g. if old_num_elements = 3 and to_remove = [1], would output - /// the vector [ 0, -1, 1 ]. - static void CreateRenumbering(int32 old_num_elements, - const std::vector &to_remove, - std::vector *renumbering); - - /// creates a renumbering from old to new index that removes the unused - /// elements, e.g. if used == [ true, false, true, true], would output the - /// vector [ 0, -1, 1, 2 ]. Returns number of new elements, i.e. the - /// number of elements of 'used' that were true. - static int32 CreateRenumbering(const std::vector &used, - std::vector *renumbering); - - // vector of bool indexed by original submatrix-index, that is true if a - // submatrix-index is used somewhere in the computation (always true for - // the zeroth element). - std::vector submatrix_is_used_; - // vector of bool indexed by original submatrix-index, that is true if a - // submatrix-index will be kept; this is like submatrix_is_used_; but for - // duplicate submatrices, all but the first duplicate will be marked false). - std::vector submatrix_is_kept_; - // vector of bool indexed by original-matrix-index > 0, that is true if a - // matrix-index is used somewhere in the computation, directly or indirectly. - // always true for the zeroth element. - std::vector matrix_is_used_; - NnetComputation *computation_; - int32 num_matrices_new_; - int32 num_submatrices_new_; - std::vector old_to_new_matrix_; // numbered by orig-matrix-index, gives - // new-matrix-index. -1 for removed - // ones. - std::vector old_to_new_submatrix_; // numbered by orig-submatrix-index, - // gives new-submatrix-index. -1 - // for removed ones. -}; // Class DerivativeTimeLimiter is used inside LimitDerivativeTimes(). diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 44c611e2b4a..61e286a4b0d 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -341,19 +341,6 @@ void VariableMergingOptimization(const NnetOptimizeOptions &config, } } -// This is a simplified top-level interface to the model-update consolidation -// code from class ModelUpdateConsolidator. -void ConsolidateModelUpdate(const Nnet &nnet, - NnetComputation *computation) { - // This following if-statement is an optimization: if the computation - // request(s) had need_model_derivative == false, there would be nothing to - // optimize, so don't bother trying. - if (!computation->need_model_derivative) - return; - ModelUpdateConsolidator consolidator(nnet, computation); - consolidator.ConsolidateModelUpdate(); -} - void ConvertAdditionToAssignment(const Nnet &nnet, NnetComputation *computation) { @@ -657,6 +644,91 @@ static void SplitComputationIntoSegments( segments->push_back(std::pair(cur_start, num_commands)); } +// This is a helper function used in ConsolidateIoOperations(). +// +// Suppose we had something like this before ConsolidateIoOperations() (as would +// be printed by Print() + +// c90: output m50 to user [for node: 'output'] +// ... +// c100: [label for goto statement] +// c101: # computation segment separator [e.g., begin backward commands] +// ... +// c105: m62 = user input [for node: 'input'] +// ... +// c190: output m79 to user [for node: 'output'] +// ... +// c200: goto c100 +// +// this would get reordered to the following by ConsolidateIoOperations +// (the bulk of the code, before this function is called): +// +// c99: [label for goto statement] +// c100: output m50 to user [for node: 'output'] +// c101: # computation segment separator [e.g., begin backward commands] +// c102: m62 = user input [for node: 'input'] +// ... +// c199: goto c199 +// c200: output m79 to user [for node: 'output'] +// +// Now command c200 is unreachable, but there is a similar command at c100 +// (after the goto) that will substitute. However, the matrix indexes are different. +// So we need to change the above so that the last two commands read: +// c199: m50.swap(m79} +// c200: goto c199 +void FixGotoOutputReordering(const Nnet &nnet, + NnetComputation *computation) { + FixGotoLabel(computation); // make sure the destination label of the goto statement was + // correct. + int32 goto_command_index = -1; + for (int32 c = computation->commands.size(); c >= 0; c--) + if (computation->commands[c].command_type == kGotoLabel) + goto_command_index = c; + KALDI_ASSERT(goto_command_index > 0); + int32 goto_label_index = computation->commands[goto_command_index].arg1; + + std::vector output_commands_after_goto, + output_commands_after_label; + for (int32 c = goto_command_index + 1; + c < static_cast(computation->commands.size()); c++) { + KALDI_ASSERT(computation->commands[c].command_type == kProvideOutput); + output_commands_after_goto.push_back(c); + } + for (int32 c = goto_label_index + 1; + c < goto_command_index; c++) { // note: we break from this loop. + CommandType t = computation->commands[c].command_type; + if (t == kProvideOutput) + output_commands_after_label.push_back(c); + else if (t != kNoOperationMarker && t != kAcceptInput) + break; + } + if (output_commands_after_goto.size() != output_commands_after_label.size()) { + computation->Print(std::cerr, nnet); + KALDI_ERR << "Could not fix goto/output reordering, size mismatch."; + } + NnetComputation::Command goto_command = computation->commands[goto_command_index]; + // be we'll be replacing the final kProvideOutput commands with + // kAllocMatrixFromOther [i.e. swap commands], and moving them one command + // backward; later we'll put the goto command at the end. + for (size_t i = 0; i < output_commands_after_goto.size(); i++) { + int32 c1 = output_commands_after_label[i], + c2 = output_commands_after_goto[i], + new_c2 = c2 - 1; + int32 s1 = computation->commands[c1].arg1, + s2 = computation->commands[c2].arg1; + // The following assert checks that the network node-index is the same... + // the idea is that the outputs should have been provided in the same order. + // I can think of no reason why the order might be different. + KALDI_ASSERT(computation->commands[c1].arg2 == + computation->commands[c1].arg2); + computation->commands[new_c2].command_type = kAllocMatrixFromOther; + computation->commands[new_c2].arg1 = s1; + computation->commands[new_c2].arg2 = s2; + } + // ... and move the goto command to the end. + computation->commands.back() = goto_command; +} + void ConsolidateIoOperations(const Nnet &nnet, NnetComputation *computation) { @@ -713,23 +785,8 @@ void ConsolidateIoOperations(const Nnet &nnet, } computation->commands.swap(reordered_commands); - if (ends_with_goto) { - // If, before this operation, the last command was kGotoLael, remove all - // commands that have been reordered to go after the kGotoLabel command - // [they would be unreachable anyway.] This relates to looped computations. - // It may seem wrong that we are just removing these - // kAcceptInput/kProvideOutput commands, but the reason it's OK - // (and preserves equivalence with the code prior to this function call), - // is that the corresponding commands have also been moved past the - // kNoOperationLabel command that the goto jumps to, so those commands - // will actually get run. - // We don't actually check this here (it would lead to a crash when - // the computation was executed, if something is wrong in this logic). - while (!computation->commands.empty() && - computation->commands.back().command_type != kGotoLabel) - computation->commands.pop_back(); - FixGotoLabel(computation); - } + if (ends_with_goto) + FixGotoOutputReordering(nnet, computation); } diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index 27871552017..c01563f11cb 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -266,6 +266,11 @@ void RemoveUnnecessaryAllocation(const Nnet &nnet, /// This optimization puts the input operations (kAcceptInput) and output /// operations (kProvideOutput) at the very beginning or end of segments of /// computation, respectively. +/// +/// This is actually necessary for computations to be run easily, because if these +/// commands were interspersed with the regular commands, you'd have to +/// call computer.Run() between the individual AcceptInput() and GetOutput() +/// function calls. void ConsolidateIoOperations(const Nnet &nnet, NnetComputation *computation); diff --git a/src/nnet3/nnet-test-utils.cc b/src/nnet3/nnet-test-utils.cc index ae0481fa332..8547955e22a 100644 --- a/src/nnet3/nnet-test-utils.cc +++ b/src/nnet3/nnet-test-utils.cc @@ -1035,6 +1035,8 @@ void GenerateConfigSequence( GenerateConfigSequenceCompositeBlock(opts, configs); break; case 10: + if (!opts.allow_statistics_pooling) + goto start; GenerateConfigSequenceStatistics(opts, configs); break; case 11: diff --git a/src/nnet3/nnet-test-utils.h b/src/nnet3/nnet-test-utils.h index b6976f70ab1..a9616281bdc 100644 --- a/src/nnet3/nnet-test-utils.h +++ b/src/nnet3/nnet-test-utils.h @@ -40,6 +40,7 @@ struct NnetGenerationOptions { bool allow_final_nonlinearity; bool allow_use_of_x_dim; bool allow_ivector; + bool allow_statistics_pooling; // if set to a value >0, the output-dim of the network // will be set to this value. int32 output_dim; @@ -54,6 +55,7 @@ struct NnetGenerationOptions { allow_final_nonlinearity(true), allow_use_of_x_dim(true), allow_ivector(false), + allow_statistics_pooling(true), output_dim(-1) { } }; diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index ed20257c7fe..dbe676de1ef 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -707,6 +707,14 @@ void ReadEditConfig(std::istream &edit_config_is, Nnet *nnet) { } +/// Returns true if 'nnet' has some kind of recurrency. +bool NnetIsRecurrent(const Nnet &nnet) { + std::vector > graph; + NnetToDirectedGraph(nnet, &graph); + return GraphHasCycles(graph); +} + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index bef783886e2..c0bdc7f86c8 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -151,6 +151,9 @@ void AddNnet(const Nnet &src, BaseFloat alpha, Nnet *dest); void AddNnetComponents(const Nnet &src, const Vector &alphas, BaseFloat scale, Nnet *dest); +/// Returns true if 'nnet' has some kind of recurrency. +bool NnetIsRecurrent(const Nnet &nnet); + /// Returns the total of the number of parameters in the updatable components of /// the nnet. int32 NumParameters(const Nnet &src); From 5a8e7ffdb594e81c4d8a331e3ef1b94ab90ca95b Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 13 Nov 2016 19:43:49 -0500 Subject: [PATCH 014/530] Add decoding program nnet3-latgen-faster-looped --- src/nnet3bin/Makefile | 2 +- src/nnet3bin/nnet3-latgen-faster-looped.cc | 266 +++++++++++++++++++++ 2 files changed, 267 insertions(+), 1 deletion(-) create mode 100644 src/nnet3bin/nnet3-latgen-faster-looped.cc diff --git a/src/nnet3bin/Makefile b/src/nnet3bin/Makefile index d46c56a1044..fd576404f1d 100644 --- a/src/nnet3bin/Makefile +++ b/src/nnet3bin/Makefile @@ -17,7 +17,7 @@ BINFILES = nnet3-init nnet3-info nnet3-get-egs nnet3-copy-egs nnet3-subset-egs \ nnet3-discriminative-merge-egs nnet3-discriminative-shuffle-egs \ nnet3-discriminative-compute-objf nnet3-discriminative-train \ discriminative-get-supervision nnet3-discriminative-subset-egs \ - nnet3-discriminative-compute-from-egs + nnet3-discriminative-compute-from-egs nnet3-latgen-faster-looped OBJFILES = diff --git a/src/nnet3bin/nnet3-latgen-faster-looped.cc b/src/nnet3bin/nnet3-latgen-faster-looped.cc new file mode 100644 index 00000000000..ee6867ff352 --- /dev/null +++ b/src/nnet3bin/nnet3-latgen-faster-looped.cc @@ -0,0 +1,266 @@ +// nnet3bin/nnet3-latgen-faster-looped.cc + +// Copyright 2012-2016 Johns Hopkins University (author: Daniel Povey) +// 2014 Guoguo Chen + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "tree/context-dep.h" +#include "hmm/transition-model.h" +#include "fstext/fstext-lib.h" +#include "decoder/decoder-wrappers.h" +#include "nnet3/decodable-simple-looped.h" +#include "base/timer.h" + + +int main(int argc, char *argv[]) { + // note: making this program work with GPUs is as simple as initializing the + // device, but it probably won't make a huge difference in speed for typical + // setups. + try { + using namespace kaldi; + using namespace kaldi::nnet3; + typedef kaldi::int32 int32; + using fst::SymbolTable; + using fst::VectorFst; + using fst::StdArc; + + const char *usage = + "Generate lattices using nnet3 neural net model.\n" + "[this version uses the 'looped' computation, which may be slightly faster for\n" + "many architectures, but should not be used for backwards-recurrent architectures\n" + "such as BLSTMs.\n" + "Usage: nnet3-latgen-faster-looped [options] " + " [ [] ]\n"; + ParseOptions po(usage); + Timer timer; + bool allow_partial = false; + LatticeFasterDecoderConfig config; + NnetSimpleLoopedComputationOptions decodable_opts; + + std::string word_syms_filename; + std::string ivector_rspecifier, + online_ivector_rspecifier, + utt2spk_rspecifier; + int32 online_ivector_period = 0; + config.Register(&po); + decodable_opts.Register(&po); + po.Register("word-symbol-table", &word_syms_filename, + "Symbol table for words [for debug output]"); + po.Register("allow-partial", &allow_partial, + "If true, produce output even if end state was not reached."); + po.Register("ivectors", &ivector_rspecifier, "Rspecifier for " + "iVectors as vectors (i.e. not estimated online); per utterance " + "by default, or per speaker if you provide the --utt2spk option."); + po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier for " + "iVectors estimated online, as matrices. If you supply this," + " you must set the --online-ivector-period option."); + po.Register("online-ivector-period", &online_ivector_period, "Number of frames " + "between iVectors in matrices supplied to the --online-ivectors " + "option"); + + po.Read(argc, argv); + + if (po.NumArgs() < 4 || po.NumArgs() > 6) { + po.PrintUsage(); + exit(1); + } + + std::string model_in_filename = po.GetArg(1), + fst_in_str = po.GetArg(2), + feature_rspecifier = po.GetArg(3), + lattice_wspecifier = po.GetArg(4), + words_wspecifier = po.GetOptArg(5), + alignment_wspecifier = po.GetOptArg(6); + + TransitionModel trans_model; + AmNnetSimple am_nnet; + { + bool binary; + Input ki(model_in_filename, &binary); + trans_model.Read(ki.Stream(), binary); + am_nnet.Read(ki.Stream(), binary); + } + + bool determinize = config.determinize_lattice; + CompactLatticeWriter compact_lattice_writer; + LatticeWriter lattice_writer; + if (! (determinize ? compact_lattice_writer.Open(lattice_wspecifier) + : lattice_writer.Open(lattice_wspecifier))) + KALDI_ERR << "Could not open table for writing lattices: " + << lattice_wspecifier; + + RandomAccessBaseFloatMatrixReader online_ivector_reader( + online_ivector_rspecifier); + RandomAccessBaseFloatVectorReaderMapped ivector_reader( + ivector_rspecifier, utt2spk_rspecifier); + + Int32VectorWriter words_writer(words_wspecifier); + Int32VectorWriter alignment_writer(alignment_wspecifier); + + fst::SymbolTable *word_syms = NULL; + if (word_syms_filename != "") + if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename))) + KALDI_ERR << "Could not read symbol table from file " + << word_syms_filename; + + double tot_like = 0.0; + kaldi::int64 frame_count = 0; + int num_success = 0, num_fail = 0; + + // this object contains precomputed stuff that is used by all decodable + // objects. It takes a pointer to am_nnet because if it has iVectors it has + // to modify the nnet to accept iVectors at intervals. + DecodableNnetSimpleLoopedInfo decodable_info(decodable_opts, + &am_nnet); + + + if (ClassifyRspecifier(fst_in_str, NULL, NULL) == kNoRspecifier) { + SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); + + // Input FST is just one FST, not a table of FSTs. + VectorFst *decode_fst = fst::ReadFstKaldi(fst_in_str); + timer.Reset(); + + { + LatticeFasterDecoder decoder(*decode_fst, config); + + for (; !feature_reader.Done(); feature_reader.Next()) { + std::string utt = feature_reader.Key(); + const Matrix &features (feature_reader.Value()); + if (features.NumRows() == 0) { + KALDI_WARN << "Zero-length utterance: " << utt; + num_fail++; + continue; + } + const Matrix *online_ivectors = NULL; + const Vector *ivector = NULL; + if (!ivector_rspecifier.empty()) { + if (!ivector_reader.HasKey(utt)) { + KALDI_WARN << "No iVector available for utterance " << utt; + num_fail++; + continue; + } else { + ivector = &ivector_reader.Value(utt); + } + } + if (!online_ivector_rspecifier.empty()) { + if (!online_ivector_reader.HasKey(utt)) { + KALDI_WARN << "No online iVector available for utterance " << utt; + num_fail++; + continue; + } else { + online_ivectors = &online_ivector_reader.Value(utt); + } + } + + + DecodableAmNnetSimpleLooped nnet_decodable( + decodable_info, trans_model, features, ivector, online_ivectors, + online_ivector_period); + + double like; + if (DecodeUtteranceLatticeFaster( + decoder, nnet_decodable, trans_model, word_syms, utt, + decodable_opts.acoustic_scale, determinize, allow_partial, + &alignment_writer, &words_writer, &compact_lattice_writer, + &lattice_writer, + &like)) { + tot_like += like; + frame_count += features.NumRows(); + num_success++; + } else num_fail++; + } + } + delete decode_fst; // delete this only after decoder goes out of scope. + } else { // We have different FSTs for different utterances. + SequentialTableReader fst_reader(fst_in_str); + RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); + for (; !fst_reader.Done(); fst_reader.Next()) { + std::string utt = fst_reader.Key(); + if (!feature_reader.HasKey(utt)) { + KALDI_WARN << "Not decoding utterance " << utt + << " because no features available."; + num_fail++; + continue; + } + const Matrix &features = feature_reader.Value(utt); + if (features.NumRows() == 0) { + KALDI_WARN << "Zero-length utterance: " << utt; + num_fail++; + continue; + } + + LatticeFasterDecoder decoder(fst_reader.Value(), config); + + const Matrix *online_ivectors = NULL; + const Vector *ivector = NULL; + if (!ivector_rspecifier.empty()) { + if (!ivector_reader.HasKey(utt)) { + KALDI_WARN << "No iVector available for utterance " << utt; + num_fail++; + continue; + } else { + ivector = &ivector_reader.Value(utt); + } + } + if (!online_ivector_rspecifier.empty()) { + if (!online_ivector_reader.HasKey(utt)) { + KALDI_WARN << "No online iVector available for utterance " << utt; + num_fail++; + continue; + } else { + online_ivectors = &online_ivector_reader.Value(utt); + } + } + + DecodableAmNnetSimpleLooped nnet_decodable( + decodable_info, trans_model, features, ivector, online_ivectors, + online_ivector_period); + + double like; + if (DecodeUtteranceLatticeFaster( + decoder, nnet_decodable, trans_model, word_syms, utt, + decodable_opts.acoustic_scale, determinize, allow_partial, + &alignment_writer, &words_writer, &compact_lattice_writer, + &lattice_writer, &like)) { + tot_like += like; + frame_count += features.NumRows(); + num_success++; + } else num_fail++; + } + } + + double elapsed = timer.Elapsed(); + KALDI_LOG << "Time taken "<< elapsed + << "s: real-time factor assuming 100 frames/sec is " + << (elapsed*100.0/frame_count); + KALDI_LOG << "Done " << num_success << " utterances, failed for " + << num_fail; + KALDI_LOG << "Overall log-likelihood per frame is " << (tot_like/frame_count) << " over " + << frame_count<<" frames."; + + delete word_syms; + if (num_success != 0) return 0; + else return 1; + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} From 95590f3d0e2a1e57946dffdb1e31fe008c51835d Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 13 Nov 2016 21:33:18 -0500 Subject: [PATCH 015/530] Fix bug discovered by testing code --- src/nnet3/decodable-simple-looped.cc | 9 ++++++--- src/nnet3bin/nnet3-latgen-faster-looped.cc | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/nnet3/decodable-simple-looped.cc b/src/nnet3/decodable-simple-looped.cc index 9e580dc121f..0df4c3b6c31 100644 --- a/src/nnet3/decodable-simple-looped.cc +++ b/src/nnet3/decodable-simple-looped.cc @@ -85,8 +85,10 @@ void DecodableNnetSimpleLoopedInfo::Init( CompileLooped(*nnet, opts_.optimize_config, request1, request2, request3, &computation_); computation_.ComputeCudaIndexes(); - KALDI_LOG << "Computation is:"; - computation_.Print(std::cerr, *nnet); + if (GetVerboseLevel() >= 3) { + KALDI_VLOG(3) << "Computation is:"; + computation_.Print(std::cerr, *nnet); + } } @@ -141,7 +143,8 @@ void DecodableNnetSimpleLooped::AdvanceChunk() { // note: end is last plus one. end_input_frame = info_.frames_per_chunk_ + info_.frames_right_context_; } else { - begin_input_frame = num_chunks_computed_ * info_.frames_per_chunk_; + begin_input_frame = num_chunks_computed_ * info_.frames_per_chunk_ + + info_.frames_right_context_; end_input_frame = begin_input_frame + info_.frames_per_chunk_; } CuMatrix feats_chunk(end_input_frame - begin_input_frame, diff --git a/src/nnet3bin/nnet3-latgen-faster-looped.cc b/src/nnet3bin/nnet3-latgen-faster-looped.cc index ee6867ff352..9ad20fd8764 100644 --- a/src/nnet3bin/nnet3-latgen-faster-looped.cc +++ b/src/nnet3bin/nnet3-latgen-faster-looped.cc @@ -171,9 +171,9 @@ int main(int argc, char *argv[]) { } - DecodableAmNnetSimpleLooped nnet_decodable( - decodable_info, trans_model, features, ivector, online_ivectors, - online_ivector_period); + DecodableAmNnetSimpleLooped nnet_decodable( + decodable_info, trans_model, features, ivector, online_ivectors, + online_ivector_period); double like; if (DecodeUtteranceLatticeFaster( From 3f0444a741509e84148d7fbd4fb160f613b95bcc Mon Sep 17 00:00:00 2001 From: Tom Ko Date: Wed, 30 Nov 2016 22:08:51 -0500 Subject: [PATCH 016/530] Fix bug discovered by TDNN decoding script --- src/nnet3/decodable-simple-looped.cc | 25 +++++++++++++++++++------ src/nnet3/decodable-simple-looped.h | 5 +++++ src/nnet3/nnet-compile-looped.cc | 14 ++++++++------ 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/nnet3/decodable-simple-looped.cc b/src/nnet3/decodable-simple-looped.cc index 0df4c3b6c31..bb9a38632a1 100644 --- a/src/nnet3/decodable-simple-looped.cc +++ b/src/nnet3/decodable-simple-looped.cc @@ -72,7 +72,6 @@ void DecodableNnetSimpleLoopedInfo::Init( if (has_ivectors_) ModifyNnetIvectorPeriod(ivector_period, nnet); - ComputationRequest request1, request2, request3; int32 num_sequences = 1; // we're processing one utterance at a time. int32 extra_right_context = 0; CreateLoopedComputationRequestSimple(*nnet, frames_per_chunk_, @@ -80,9 +79,9 @@ void DecodableNnetSimpleLoopedInfo::Init( ivector_period, opts.extra_left_context_initial, extra_right_context, num_sequences, - &request1, &request2, &request3); + &request1_, &request2_, &request3_); - CompileLooped(*nnet, opts_.optimize_config, request1, request2, request3, + CompileLooped(*nnet, opts_.optimize_config, request1_, request2_, request3_, &computation_); computation_.ComputeCudaIndexes(); if (GetVerboseLevel() >= 3) { @@ -172,11 +171,25 @@ void DecodableNnetSimpleLooped::AdvanceChunk() { computer_.AcceptInput("input", &feats_chunk); if (info_.has_ivectors_) { + KALDI_ASSERT(info_.request1_.inputs.size() == 2); + // all but the 1st chunk should have 1 iVector, but no need + // to assume this. + int32 num_ivectors = (num_chunks_computed_ == 0 ? + info_.request1_.inputs[1].indexes.size() : + info_.request2_.inputs[1].indexes.size()); + KALDI_ASSERT(num_ivectors > 0); + Vector ivector; + // we just get the iVector from the last input frame we needed... + // we don't bother trying to be 'accurate' in getting the iVectors + // for their 'correct' frames, because in general using the + // iVector from as large 't' as possible will be better. GetCurrentIvector(end_input_frame, &ivector); - CuMatrix cu_ivector(1, ivector.Dim()); - cu_ivector.Row(0).CopyFromVec(ivector); - computer_.AcceptInput("ivector", &cu_ivector); + Matrix ivectors(num_ivectors, + ivector.Dim()); + ivectors.CopyRowsFromVec(ivector); + CuMatrix cu_ivectors(ivectors); + computer_.AcceptInput("ivector", &cu_ivectors); } computer_.Run(); diff --git a/src/nnet3/decodable-simple-looped.h b/src/nnet3/decodable-simple-looped.h index fe40c220f8f..5aba5b10505 100644 --- a/src/nnet3/decodable-simple-looped.h +++ b/src/nnet3/decodable-simple-looped.h @@ -148,6 +148,11 @@ class DecodableNnetSimpleLoopedInfo { // to accept the iVectors bool has_ivectors_; + // The 3 computation requests that are used to create the looped + // computation are stored in the class, as we need them to work out + // exactly shich iVectors are needed. + ComputationRequest request1_, request2_, request3_; + // The compiled, 'looped' computation. NnetComputation computation_; }; diff --git a/src/nnet3/nnet-compile-looped.cc b/src/nnet3/nnet-compile-looped.cc index 71329d2e8fe..d77f19ef13c 100644 --- a/src/nnet3/nnet-compile-looped.cc +++ b/src/nnet3/nnet-compile-looped.cc @@ -80,8 +80,9 @@ int32 GetChunkSize(const Nnet &nnet, /// for negative a is not specified (except by relation with the division '/' /// operator), but in practice it would be <= 0 for almost all implementations. template I Mod(I m, I n) { - if (m >= 0) return m % n; - else return -((-m) % n); + I ans = m % n; + if (ans < 0) ans += n; + return ans; } @@ -171,15 +172,16 @@ void CreateLoopedComputationRequestSimple(const Nnet &nnet, } for (int32 t = chunk2_input_begin_t; t < chunk2_input_end_t; t++) { int32 ivector_t = t - Mod(t, ivector_period); - if (ivector_times1.count(ivector_t) == 0) + if (ivector_times2.count(ivector_t) == 0 && + ivector_times1.count(ivector_t) == 0) ivector_times2.insert(ivector_t); } for (int32 t = chunk3_input_begin_t; t < chunk3_input_end_t; t++) { int32 ivector_t = t - Mod(t, ivector_period); - if (ivector_times1.count(ivector_t) == 0 && - ivector_times2.count(ivector_t) == 0) { + if (ivector_times3.count(ivector_t) == 0 && + ivector_times2.count(ivector_t) == 0 && + ivector_times1.count(ivector_t) == 0) ivector_times3.insert(ivector_t); - } } } From 1057b836788cfc8ad457df93e076052080c7fa5e Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 7 Dec 2016 21:13:33 -0500 Subject: [PATCH 017/530] Early parts of 'shortcut' compilation --- src/nnet3/nnet-computation.h | 6 +-- src/nnet3/nnet-optimize-utils.cc | 17 ++++++++ src/nnet3/nnet-optimize-utils.h | 49 ++++++++++++++++++++- src/nnet3/nnet-optimize.cc | 48 ++++++++++++++++---- src/nnet3/nnet-optimize.h | 75 ++++++++++++++++++++++++-------- 5 files changed, 164 insertions(+), 31 deletions(-) diff --git a/src/nnet3/nnet-computation.h b/src/nnet3/nnet-computation.h index 0d0b13547bf..a5f8cc2aca7 100644 --- a/src/nnet3/nnet-computation.h +++ b/src/nnet3/nnet-computation.h @@ -91,9 +91,9 @@ struct IoSpecification { void Swap(IoSpecification *other); void Read(std::istream &istream, bool binary); - + void Write(std::ostream &ostream, bool binary) const; - + bool operator== (const IoSpecification &other) const; }; @@ -147,7 +147,7 @@ struct ComputationRequest { void Read(std::istream &istream, bool binary); void Write(std::ostream &ostream, bool binary) const; - + bool operator== (const ComputationRequest &other) const; }; diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index df7f975db86..b2ebb22ad71 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -1829,6 +1829,23 @@ void DerivativeTimeLimiter::PruneMatrices() { LimitMatrices(will_limit); } + +int32 MaxOutputTimeInRequest(const ComputationRequest &request) { + int32 ans = std::numeric_limits::min(); + for (size_t i = 0; i < request.outputs.size(); i++) { + std::vector indexes &indexes = request.outputs[i].indexes; + std::vector indexes::const_iterator iter = indexes.begin(), + end = indexes.end(); + for (; iter != end; ++iter) + if (iter.t > ans) + ans = iter.t; + } + if (ans == std::numeric_limits::min()) { + KALDI_ERR << "Failed to find any output indexes in computation request."; + } + return ans; +} + void LimitDerivativeTimes(const Nnet &nnet, int32 min_deriv_time, int32 max_deriv_time, diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index d82867252ec..e224983f847 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -522,6 +522,12 @@ class DerivativeTimeLimiter { std::vector prune_info_; }; + +// This utility function, used in code that calls LimitDerivativeTimes(), returns +// the largest time 't' in any of the 'outputs' in the computation request, +// or crashes if there are no outputs (or no cindexes in those outputs). +int32 MaxOutputTimeInRequest(const ComputationRequest &request); + // This is the top-level interface to limit the times on which derivatives are // computed (e.g. for truncated BPTT); internally it uses class // DerivativeLimiter. Will do nothing if min_deriv_time and max_deriv_time are @@ -532,6 +538,48 @@ void LimitDerivativeTimes(const Nnet &nnet, NnetComputation *computation); +/** This function, used in 'shortcut' compilation where we first compile a + smaller computation with the same structure but only 2 distinct 'n' + values, works out whether a computation is 'decomposable'; if so, + it returns true and outputs the 'mini_request' with the same structure, + and the number of 'n' values. + + A computation is decomposable if the following conditions hold: + + - All of its inputs and outputs contain 'n' values for all 0 <= n < N, + for some N > 2. [we output this 'N' as 'num_n_values']. + - All of its inputs and outputs have 'regular' structure. + + What it means for an input or output (i.e. an IoSpecification) to have a + 'regular' structure, is as follows: + - The 't' and 'x' values present are the same for each 'n', + - The order in which the indexes appear is EITHER of the following: + - The 'n' varies the most rapidly, i.e. the order is: + (t1,x1,0), (t1,x1,1) ... (t1,x1,N-1) \ + (t2,x2,0), (t2,x2,1) ... (t2,x2,N-1) ... + - The 'n' varies the least rapidly, i.e. the order is: + (t1,x1,0), (t2,x2,0) ... \ + (t1,x1,1), (t2,x2,1) ... \ + ... \ + (t1,x2,N-1), (t2,x2,N-1) ... + In either case, there does not have to be any particular rhyme or + reason to the order of the t and x values, the regularity on 'n' is + all that we care about. + */ +bool ComputationIsDecomposable(const ComputationRequest &request, + ComputationRequest *mini_request, + int32 *num_n_values); + +/** + This function is used in 'shortcut' compilation + */ +bool ExpandComputation(const Computation &computation, + int32 num_n_vlues, + Computation *expanded_computation) + + + + /// This function detects submatrices, matrices, and members of indexes_multi /// and indexes that are never used (e.g. due to changes made in other /// optimization code), and removes them from the computation by way of suitable @@ -655,4 +703,3 @@ void IdentifyIndexesRangesArgs(std::vector *commands, #endif - diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 08a28e22025..9d6ff739768 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -52,7 +52,15 @@ void NnetOptimizeOptions::Read(std::istream &is, bool binary) { ReadBasicType(is, binary, &min_deriv_time); ExpectToken(is, binary, ""); ReadBasicType(is, binary, &max_deriv_time); - ExpectToken(is, binary, ""); + std::string tok; + ReadToken(is, binary, &tok); + if (tok == "") { + ReadBasicType(is, binary, &max_deriv_time_relative); + ReadToken(is, binary, &tok); + } + + + KALDI_ASSERT(tok == ""); } void NnetOptimizeOptions::Write(std::ostream &os, bool binary) const { @@ -83,6 +91,8 @@ void NnetOptimizeOptions::Write(std::ostream &os, bool binary) const { WriteBasicType(os, binary, min_deriv_time); WriteToken(os, binary, ""); WriteBasicType(os, binary, max_deriv_time); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, max_deriv_time_relative); WriteToken(os, binary, ""); } @@ -99,7 +109,8 @@ bool NnetOptimizeOptions::operator == (const NnetOptimizeOptions &other) const { other.move_sizing_commands == move_sizing_commands && other.allocate_from_other == allocate_from_other && other.min_deriv_time == min_deriv_time && - other.max_deriv_time == max_deriv_time); + other.max_deriv_time == max_deriv_time && + other.max_deriv_time_relative == max_deriv_time_relative); } // move commands that resize matrices to as late/early as possible. @@ -413,10 +424,16 @@ void Optimize(const NnetOptimizeOptions &config, if (GetVerboseLevel() >= 4) CheckComputation(nnet, request, *computation, true); - // this will do nothing unless --min-deriv-time or --max-deriv-time was - // set. - LimitDerivativeTimes(nnet, config.min_deriv_time, config.max_deriv_time, - computation); + { // Call LimitDerivativeTimes(). + // this will do nothing unless --min-deriv-time or --max-deriv-time + // or --max-deriv-time-relative was set. + int32 max_deriv_time = config.max_deriv_time; + if (config.max_deriv_time_relative != std::numeric_limits::max()) + max_deriv_time = config.max_deriv_time_relative + + MaxOutputTimeInRequest(request); + LimitDerivativeTimes(nnet, config.min_deriv_time, + max_deriv_time, computation); + } if (GetVerboseLevel() >= 4) CheckComputation(nnet, request, *computation, true); @@ -478,11 +495,26 @@ size_t ComputationRequestHasher::operator() (const ComputationRequest *cr) const size_t ComputationRequestHasher::IoSpecificationToInt(const IoSpecification& spec) const { size_t ans; + size_t n = 19; // this value is used to extract only a subset of elements to hash; + // it makes the hasher faster. StringHasher string_hasher; ans = string_hasher(spec.name); std::vector::const_iterator itr = spec.indexes.begin(), - end = spec.indexes.end(); - for (; itr != end; ++itr) { + end = spec.indexes.end(), + med = end; + if (med > itr + n) + med = iter + n; + + for (; itr != med; ++itr) { + ans += (*itr).n * 1619; + ans += (*itr).t * 15649; + ans += (*itr).x * 89809; + } + // after the first 'n' values, look only at every n'th value. this makes the + // hashing much faster, and in the kinds of structures that we actually deal + // with, we shouldn't get unnecessary hash collisions as a result of this + // optimization. + for (; iter < end; itr += n) { ans += (*itr).n * 1619; ans += (*itr).t * 15649; ans += (*itr).x * 89809; diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index e04aff302c9..732f11e29ac 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -29,7 +29,7 @@ namespace kaldi { namespace nnet3 { -// Options class for optimizing a NnetComputation The main projected use for +// Options class for optimizing a NnetComputation. The main projected use for // this is in debugging the optimization code itself, so that if an error is // detected, we can work out which optimization was responsible for the error. struct NnetOptimizeOptions { @@ -46,20 +46,23 @@ struct NnetOptimizeOptions { bool allocate_from_other; int32 min_deriv_time; int32 max_deriv_time; - - NnetOptimizeOptions(): optimize(true), - consolidate_model_update(true), - propagate_in_place(true), - backprop_in_place(true), - convert_addition(true), - remove_assignments(true), - allow_left_merge(true), - allow_right_merge(true), - initialize_undefined(true), - move_sizing_commands(true), - allocate_from_other(true), - min_deriv_time(std::numeric_limits::min()), - max_deriv_time(std::numeric_limits::max()) { } + int32 max_deriv_time_relative; + + NnetOptimizeOptions(): + optimize(true), + consolidate_model_update(true), + propagate_in_place(true), + backprop_in_place(true), + convert_addition(true), + remove_assignments(true), + allow_left_merge(true), + allow_right_merge(true), + initialize_undefined(true), + move_sizing_commands(true), + allocate_from_other(true), + min_deriv_time(std::numeric_limits::min()), + max_deriv_time(std::numeric_limits::max()), + max_deriv_time_relative(std::numeric_limits::max()) {} void Register(OptionsItf *opts) { opts->Register("optimize", &optimize, "Set this to false to turn off all " @@ -99,6 +102,12 @@ struct NnetOptimizeOptions { "the maximum t value that you want derivatives to be computed " "at when updating the model. This is an optimization that " "saves time in the backprop phase for recurrent frameworks"); + opts->Register("max-deriv-time-relative", &max_deriv_time_relative, + "An alternative mechanism for setting the --max-deriv-time, " + "suitable for situations where the length of the egs is " + "variable. If set, it is equivalent to setting the " + "--max-deriv-time to this value plus the largest 't' value " + "in any 'output' node of the computation request."); } void Read(std::istream &is, bool binary); void Write(std::ostream &os, bool binary) const; @@ -130,20 +139,47 @@ struct ComputationRequestPtrEqual { } }; + + +struct CachingOptimizingCompilerOptions { + bool use_shortcut; + int32 write_cache; + int32 cache_capacity; + + + + CachingOptimizingCompilerOptions(): + use_shortcut(true), + cache_capacity(64) { } + + void Register(OptionsItf *opts) { + opts->Register("use-shortcut", &use_shortcut, + "If true, use the 'shortcut' in compilation whereby " + "computation requests with regular structure are identified " + "as such, a computation with a smaller number of distinct " + "values of 'n' is compiled (e.g. 2), and the compiled " + "computation is expanded to match the size of the real " + "computation request."); + opts->Register("cache-capacity", &cache_capacity, + "Determines how many computations the computation-cache will " + "store (most-recently-used)."); + } +}; + /// This class enables you to do the compilation and optimization in one call, /// and also ensures that if the ComputationRequest is identical to the previous /// one, the compilation process is not repeated. class CachingOptimizingCompiler { public: CachingOptimizingCompiler(const Nnet &nnet, - const int32 capacity = 20): - nnet_(nnet), cache_capacity_(capacity) { } + const CachingOptimizingCompilerOptions &config): + nnet_(nnet), config_(config), cache_capacity_(capacity) { } /// Note: nnet is retained as a const reference but opt_config is copied. CachingOptimizingCompiler(const Nnet &nnet, const NnetOptimizeOptions &opt_config, - const int32 capacity = 20): - nnet_(nnet), opt_config_(opt_config), cache_capacity_(capacity) { } + const CachingOptimizingCompilerOptions &config): + nnet_(nnet), config_(config), opt_config_(opt_config) { } ~CachingOptimizingCompiler(); /// Does the compilation and returns a const pointer to @@ -155,6 +191,7 @@ class CachingOptimizingCompiler { void WriteCache(std::ostream &os, bool binary) const; private: const Nnet &nnet_; + CachingOptimizingCompilerOptions config_; NnetOptimizeOptions opt_config_; // The access queue for keeping track of the freshness of computation. From f6d307b24e682d2a9559a52cf72e94be1fecaba2 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 7 Dec 2016 21:13:33 -0500 Subject: [PATCH 018/530] Early parts of 'shortcut' compilation --- src/nnet3/nnet-computation.h | 6 +-- src/nnet3/nnet-optimize-utils.cc | 45 +++++++++++++++++++ src/nnet3/nnet-optimize-utils.h | 68 ++++++++++++++++++++++++++++- src/nnet3/nnet-optimize.cc | 48 ++++++++++++++++---- src/nnet3/nnet-optimize.h | 75 ++++++++++++++++++++++++-------- 5 files changed, 211 insertions(+), 31 deletions(-) diff --git a/src/nnet3/nnet-computation.h b/src/nnet3/nnet-computation.h index 0d0b13547bf..a5f8cc2aca7 100644 --- a/src/nnet3/nnet-computation.h +++ b/src/nnet3/nnet-computation.h @@ -91,9 +91,9 @@ struct IoSpecification { void Swap(IoSpecification *other); void Read(std::istream &istream, bool binary); - + void Write(std::ostream &ostream, bool binary) const; - + bool operator== (const IoSpecification &other) const; }; @@ -147,7 +147,7 @@ struct ComputationRequest { void Read(std::istream &istream, bool binary); void Write(std::ostream &ostream, bool binary) const; - + bool operator== (const ComputationRequest &other) const; }; diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index df7f975db86..75e5b34bfb7 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -1829,6 +1829,23 @@ void DerivativeTimeLimiter::PruneMatrices() { LimitMatrices(will_limit); } + +int32 MaxOutputTimeInRequest(const ComputationRequest &request) { + int32 ans = std::numeric_limits::min(); + for (size_t i = 0; i < request.outputs.size(); i++) { + std::vector indexes &indexes = request.outputs[i].indexes; + std::vector indexes::const_iterator iter = indexes.begin(), + end = indexes.end(); + for (; iter != end; ++iter) + if (iter.t > ans) + ans = iter.t; + } + if (ans == std::numeric_limits::min()) { + KALDI_ERR << "Failed to find any output indexes in computation request."; + } + return ans; +} + void LimitDerivativeTimes(const Nnet &nnet, int32 min_deriv_time, int32 max_deriv_time, @@ -1838,5 +1855,33 @@ void LimitDerivativeTimes(const Nnet &nnet, limiter.LimitDerivTimes(); } +// This class implements the internals of the ExpandComputation() function. +class ComputationExpander { + public: + ComputationExpander(const Computation &computation, + bool need_debug_info, + int32 num_n_values, + Computation *expanded_computation): + computation_(computation), + need_debug_info_(need_debug_info), + num_n_values_(num_n_values), + expanded_computation_(expanded_computation) { } + + // This function call implements the functionality of the class, + // expanding the computation. + bool Expand(); + + private: + + const Computation &computation_; + bool need_debug_info_; + int32 num_n_values_; + Computation *expanded_computation_; + + +}; + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index d82867252ec..84697407a1e 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -522,6 +522,12 @@ class DerivativeTimeLimiter { std::vector prune_info_; }; + +// This utility function, used in code that calls LimitDerivativeTimes(), returns +// the largest time 't' in any of the 'outputs' in the computation request, +// or crashes if there are no outputs (or no cindexes in those outputs). +int32 MaxOutputTimeInRequest(const ComputationRequest &request); + // This is the top-level interface to limit the times on which derivatives are // computed (e.g. for truncated BPTT); internally it uses class // DerivativeLimiter. Will do nothing if min_deriv_time and max_deriv_time are @@ -532,6 +538,67 @@ void LimitDerivativeTimes(const Nnet &nnet, NnetComputation *computation); +/** This function, used in 'shortcut' compilation where we first compile a + smaller computation with the same structure but only 2 distinct 'n' + values, works out whether a computation is 'decomposable'; if so, + it returns true and outputs the 'mini_request' with the same structure, + and the number of 'n' values. + + A computation is decomposable if the following conditions hold: + + - All of its inputs and outputs contain 'n' values for all 0 <= n < N, + for some N > 2. [we output this 'N' as 'num_n_values']. + - All of its inputs and outputs have 'regular' structure. + + What it means for an input or output (i.e. an IoSpecification) to have a + 'regular' structure, is as follows: + - The 't' and 'x' values present are the same for each 'n', + - The order in which the indexes appear is EITHER of the following: + - The 'n' varies the most rapidly, i.e. the order is: + (t1,x1,0), (t1,x1,1) ... (t1,x1,N-1) \ + (t2,x2,0), (t2,x2,1) ... (t2,x2,N-1) ... + - The 'n' varies the least rapidly, i.e. the order is: + (t1,x1,0), (t2,x2,0) ... \ + (t1,x1,1), (t2,x2,1) ... \ + ... \ + (t1,x2,N-1), (t2,x2,N-1) ... + In either case, there does not have to be any particular rhyme or + reason to the order of the t and x values, the regularity on 'n' is + all that we care about. + */ +bool ComputationIsDecomposable(const ComputationRequest &request, + ComputationRequest *mini_request, + int32 *num_n_values); // TODO: implement this. + + +/** + This function is used in 'shortcut' compilation to expand a computation + that has been compiled for exactly 2 'n' values, to one that is suitable + for some num_n_values > 2. + @param [in] computation The computation that was compiled for exactly + 2 'n' values (n=0 and n=1) + @param [in] need_debug_info True if we want to retain the 'debug_info' + in the output 'expanded_computation'. In any + case, the 'debug_info' is required in the + input computation. + @param [in] num_n_values The number of 'n' values we want in the output + computation + @param [out] expanded_computation The expanded computation. + + @return This function returns true if it succeeded, and false if it + could not expand the computation for some reason (e.g. there + was some non-simple component where the 'PrecomputedIndexes' + object could not be suitably expanded. If it returns false, + the output 'expanded_computation' is undefined (may contain junk). + */ +bool ExpandComputation(const Computation &computation, + bool need_debug_info, + int32 num_n_values, + Computation *expanded_computation); + + + + /// This function detects submatrices, matrices, and members of indexes_multi /// and indexes that are never used (e.g. due to changes made in other /// optimization code), and removes them from the computation by way of suitable @@ -655,4 +722,3 @@ void IdentifyIndexesRangesArgs(std::vector *commands, #endif - diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 08a28e22025..9d6ff739768 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -52,7 +52,15 @@ void NnetOptimizeOptions::Read(std::istream &is, bool binary) { ReadBasicType(is, binary, &min_deriv_time); ExpectToken(is, binary, ""); ReadBasicType(is, binary, &max_deriv_time); - ExpectToken(is, binary, ""); + std::string tok; + ReadToken(is, binary, &tok); + if (tok == "") { + ReadBasicType(is, binary, &max_deriv_time_relative); + ReadToken(is, binary, &tok); + } + + + KALDI_ASSERT(tok == ""); } void NnetOptimizeOptions::Write(std::ostream &os, bool binary) const { @@ -83,6 +91,8 @@ void NnetOptimizeOptions::Write(std::ostream &os, bool binary) const { WriteBasicType(os, binary, min_deriv_time); WriteToken(os, binary, ""); WriteBasicType(os, binary, max_deriv_time); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, max_deriv_time_relative); WriteToken(os, binary, ""); } @@ -99,7 +109,8 @@ bool NnetOptimizeOptions::operator == (const NnetOptimizeOptions &other) const { other.move_sizing_commands == move_sizing_commands && other.allocate_from_other == allocate_from_other && other.min_deriv_time == min_deriv_time && - other.max_deriv_time == max_deriv_time); + other.max_deriv_time == max_deriv_time && + other.max_deriv_time_relative == max_deriv_time_relative); } // move commands that resize matrices to as late/early as possible. @@ -413,10 +424,16 @@ void Optimize(const NnetOptimizeOptions &config, if (GetVerboseLevel() >= 4) CheckComputation(nnet, request, *computation, true); - // this will do nothing unless --min-deriv-time or --max-deriv-time was - // set. - LimitDerivativeTimes(nnet, config.min_deriv_time, config.max_deriv_time, - computation); + { // Call LimitDerivativeTimes(). + // this will do nothing unless --min-deriv-time or --max-deriv-time + // or --max-deriv-time-relative was set. + int32 max_deriv_time = config.max_deriv_time; + if (config.max_deriv_time_relative != std::numeric_limits::max()) + max_deriv_time = config.max_deriv_time_relative + + MaxOutputTimeInRequest(request); + LimitDerivativeTimes(nnet, config.min_deriv_time, + max_deriv_time, computation); + } if (GetVerboseLevel() >= 4) CheckComputation(nnet, request, *computation, true); @@ -478,11 +495,26 @@ size_t ComputationRequestHasher::operator() (const ComputationRequest *cr) const size_t ComputationRequestHasher::IoSpecificationToInt(const IoSpecification& spec) const { size_t ans; + size_t n = 19; // this value is used to extract only a subset of elements to hash; + // it makes the hasher faster. StringHasher string_hasher; ans = string_hasher(spec.name); std::vector::const_iterator itr = spec.indexes.begin(), - end = spec.indexes.end(); - for (; itr != end; ++itr) { + end = spec.indexes.end(), + med = end; + if (med > itr + n) + med = iter + n; + + for (; itr != med; ++itr) { + ans += (*itr).n * 1619; + ans += (*itr).t * 15649; + ans += (*itr).x * 89809; + } + // after the first 'n' values, look only at every n'th value. this makes the + // hashing much faster, and in the kinds of structures that we actually deal + // with, we shouldn't get unnecessary hash collisions as a result of this + // optimization. + for (; iter < end; itr += n) { ans += (*itr).n * 1619; ans += (*itr).t * 15649; ans += (*itr).x * 89809; diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index e04aff302c9..732f11e29ac 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -29,7 +29,7 @@ namespace kaldi { namespace nnet3 { -// Options class for optimizing a NnetComputation The main projected use for +// Options class for optimizing a NnetComputation. The main projected use for // this is in debugging the optimization code itself, so that if an error is // detected, we can work out which optimization was responsible for the error. struct NnetOptimizeOptions { @@ -46,20 +46,23 @@ struct NnetOptimizeOptions { bool allocate_from_other; int32 min_deriv_time; int32 max_deriv_time; - - NnetOptimizeOptions(): optimize(true), - consolidate_model_update(true), - propagate_in_place(true), - backprop_in_place(true), - convert_addition(true), - remove_assignments(true), - allow_left_merge(true), - allow_right_merge(true), - initialize_undefined(true), - move_sizing_commands(true), - allocate_from_other(true), - min_deriv_time(std::numeric_limits::min()), - max_deriv_time(std::numeric_limits::max()) { } + int32 max_deriv_time_relative; + + NnetOptimizeOptions(): + optimize(true), + consolidate_model_update(true), + propagate_in_place(true), + backprop_in_place(true), + convert_addition(true), + remove_assignments(true), + allow_left_merge(true), + allow_right_merge(true), + initialize_undefined(true), + move_sizing_commands(true), + allocate_from_other(true), + min_deriv_time(std::numeric_limits::min()), + max_deriv_time(std::numeric_limits::max()), + max_deriv_time_relative(std::numeric_limits::max()) {} void Register(OptionsItf *opts) { opts->Register("optimize", &optimize, "Set this to false to turn off all " @@ -99,6 +102,12 @@ struct NnetOptimizeOptions { "the maximum t value that you want derivatives to be computed " "at when updating the model. This is an optimization that " "saves time in the backprop phase for recurrent frameworks"); + opts->Register("max-deriv-time-relative", &max_deriv_time_relative, + "An alternative mechanism for setting the --max-deriv-time, " + "suitable for situations where the length of the egs is " + "variable. If set, it is equivalent to setting the " + "--max-deriv-time to this value plus the largest 't' value " + "in any 'output' node of the computation request."); } void Read(std::istream &is, bool binary); void Write(std::ostream &os, bool binary) const; @@ -130,20 +139,47 @@ struct ComputationRequestPtrEqual { } }; + + +struct CachingOptimizingCompilerOptions { + bool use_shortcut; + int32 write_cache; + int32 cache_capacity; + + + + CachingOptimizingCompilerOptions(): + use_shortcut(true), + cache_capacity(64) { } + + void Register(OptionsItf *opts) { + opts->Register("use-shortcut", &use_shortcut, + "If true, use the 'shortcut' in compilation whereby " + "computation requests with regular structure are identified " + "as such, a computation with a smaller number of distinct " + "values of 'n' is compiled (e.g. 2), and the compiled " + "computation is expanded to match the size of the real " + "computation request."); + opts->Register("cache-capacity", &cache_capacity, + "Determines how many computations the computation-cache will " + "store (most-recently-used)."); + } +}; + /// This class enables you to do the compilation and optimization in one call, /// and also ensures that if the ComputationRequest is identical to the previous /// one, the compilation process is not repeated. class CachingOptimizingCompiler { public: CachingOptimizingCompiler(const Nnet &nnet, - const int32 capacity = 20): - nnet_(nnet), cache_capacity_(capacity) { } + const CachingOptimizingCompilerOptions &config): + nnet_(nnet), config_(config), cache_capacity_(capacity) { } /// Note: nnet is retained as a const reference but opt_config is copied. CachingOptimizingCompiler(const Nnet &nnet, const NnetOptimizeOptions &opt_config, - const int32 capacity = 20): - nnet_(nnet), opt_config_(opt_config), cache_capacity_(capacity) { } + const CachingOptimizingCompilerOptions &config): + nnet_(nnet), config_(config), opt_config_(opt_config) { } ~CachingOptimizingCompiler(); /// Does the compilation and returns a const pointer to @@ -155,6 +191,7 @@ class CachingOptimizingCompiler { void WriteCache(std::ostream &os, bool binary) const; private: const Nnet &nnet_; + CachingOptimizingCompilerOptions config_; NnetOptimizeOptions opt_config_; // The access queue for keeping track of the freshness of computation. From 8a55b0aba625e817d94daa1f4af14baad6db4a4a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 16:22:57 -0500 Subject: [PATCH 019/530] Bug fix in nnet3-latgen-faster which missed uttspk option --- src/nnet3bin/nnet3-latgen-faster.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nnet3bin/nnet3-latgen-faster.cc b/src/nnet3bin/nnet3-latgen-faster.cc index 5a090acb5b5..e0f21e723e7 100644 --- a/src/nnet3bin/nnet3-latgen-faster.cc +++ b/src/nnet3bin/nnet3-latgen-faster.cc @@ -65,6 +65,8 @@ int main(int argc, char *argv[]) { po.Register("ivectors", &ivector_rspecifier, "Rspecifier for " "iVectors as vectors (i.e. not estimated online); per utterance " "by default, or per speaker if you provide the --utt2spk option."); + po.Register("utt2spk", &utt2spk_rspecifier, "Rspecifier for " + "utt2spk option used to get ivectors per speaker"); po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier for " "iVectors estimated online, as matrices. If you supply this," " you must set the --online-ivector-period option."); From 12c619f180ce7ebed2790cdaef847399126ccf4f Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 16:28:57 -0500 Subject: [PATCH 020/530] Bug fix in sparse-matrix.cc --- src/matrix/sparse-matrix.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/matrix/sparse-matrix.cc b/src/matrix/sparse-matrix.cc index 477d36f190a..2ef909f66dd 100644 --- a/src/matrix/sparse-matrix.cc +++ b/src/matrix/sparse-matrix.cc @@ -714,6 +714,7 @@ void GeneralMatrix::Compress() { void GeneralMatrix::Uncompress() { if (cmat_.NumRows() != 0) { + mat_.Resize(cmat_.NumRows(), cmat_.NumCols(), kUndefined); cmat_.CopyToMat(&mat_); cmat_.Clear(); } From 21e6e9f5855fdf3279222dc4a27b51be48811b8c Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 21:40:46 -0500 Subject: [PATCH 021/530] asr_diarization: Adding get_frame_shift.sh --- egs/wsj/s5/utils/data/get_frame_shift.sh | 30 ++++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/egs/wsj/s5/utils/data/get_frame_shift.sh b/egs/wsj/s5/utils/data/get_frame_shift.sh index d032c9c17fa..f5a3bac9009 100755 --- a/egs/wsj/s5/utils/data/get_frame_shift.sh +++ b/egs/wsj/s5/utils/data/get_frame_shift.sh @@ -38,23 +38,27 @@ if [ ! -s $dir/utt2dur ]; then utils/data/get_utt2dur.sh $dir 1>&2 fi -if [ ! -f $dir/feats.scp ]; then - echo "$0: $dir/feats.scp does not exist" 1>&2 - exit 1 -fi +if [ ! -f $dir/frame_shift ]; then + if [ ! -f $dir/feats.scp ]; then + echo "$0: $dir/feats.scp does not exist" 1>&2 + exit 1 + fi -temp=$(mktemp /tmp/tmp.XXXX) + temp=$(mktemp /tmp/tmp.XXXX) -feat-to-len "scp:head -n 10 $dir/feats.scp|" ark,t:- > $temp + feat-to-len "scp:head -n 10 $dir/feats.scp|" ark,t:- > $temp -if [ -z $temp ]; then - echo "$0: error running feat-to-len" 1>&2 - exit 1 -fi + if [ -z $temp ]; then + echo "$0: error running feat-to-len" 1>&2 + exit 1 + fi -head -n 10 $dir/utt2dur | paste - $temp | \ - awk '{ dur += $2; frames += $4; } END { shift = dur / frames; if (shift > 0.01 && shift < 0.0102) shift = 0.01; print shift; }' || exit 1; + frame_shift=$(head -n 10 $dir/utt2dur | paste - $temp | awk '{ dur += $2; frames += $4; } END { shift = dur / frames; if (shift > 0.01 && shift < 0.0102) shift = 0.01; print shift; }') || exit 1; + + echo $frame_shift > $dir/frame_shift + rm $temp +fi -rm $temp +cat $dir/frame_shift exit 0 From ecdae90d76bb191ff33544879c73c60eeba476d3 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 01:46:18 -0500 Subject: [PATCH 022/530] Pass --no-text option to validate data dir in speed perturbation --- egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh | 6 +++++- egs/wsj/s5/utils/perturb_data_dir_speed.sh | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh b/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh index c575166534e..4b12a94eee9 100755 --- a/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh +++ b/egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh @@ -43,5 +43,9 @@ utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_spe rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir" -utils/validate_data_dir.sh --no-feats $destdir +if [ -f $srcdir/text ]; then + utils/validate_data_dir.sh --no-feats $destdir +else + utils/validate_data_dir.sh --no-feats --no-text $destdir +fi diff --git a/egs/wsj/s5/utils/perturb_data_dir_speed.sh b/egs/wsj/s5/utils/perturb_data_dir_speed.sh index 20ff86755eb..e3d56d58b9c 100755 --- a/egs/wsj/s5/utils/perturb_data_dir_speed.sh +++ b/egs/wsj/s5/utils/perturb_data_dir_speed.sh @@ -112,4 +112,9 @@ cat $srcdir/utt2dur | utils/apply_map.pl -f 1 $destdir/utt_map | \ rm $destdir/spk_map $destdir/utt_map 2>/dev/null echo "$0: generated speed-perturbed version of data in $srcdir, in $destdir" -utils/validate_data_dir.sh --no-feats $destdir + +if [ -f $srcdir/text ]; then + utils/validate_data_dir.sh --no-feats $destdir +else + utils/validate_data_dir.sh --no-feats --no-text $destdir +fi From 5b7f150de474e1a58511b5c5a4e481254300eb7f Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 16:23:33 -0500 Subject: [PATCH 023/530] Print Cuda profile in nnet3-compute --- src/nnet3bin/nnet3-compute.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/nnet3bin/nnet3-compute.cc b/src/nnet3bin/nnet3-compute.cc index 9305ef7e6b6..d46220c7ffd 100644 --- a/src/nnet3bin/nnet3-compute.cc +++ b/src/nnet3bin/nnet3-compute.cc @@ -159,6 +159,9 @@ int main(int argc, char *argv[]) { num_success++; } +#if HAVE_CUDA==1 + CuDevice::Instantiate().PrintProfile(); +#endif double elapsed = timer.Elapsed(); KALDI_LOG << "Time taken "<< elapsed << "s: real-time factor assuming 100 frames/sec is " From a1a5e0e863a0250529959c294462da580490acfe Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sun, 6 Nov 2016 10:37:19 -0500 Subject: [PATCH 024/530] asr_diarization: Fix stats printing --- src/nnet3/nnet-component-itf.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/nnet3/nnet-component-itf.cc b/src/nnet3/nnet-component-itf.cc index 00dd802e091..f94843b725e 100644 --- a/src/nnet3/nnet-component-itf.cc +++ b/src/nnet3/nnet-component-itf.cc @@ -323,7 +323,7 @@ std::string NonlinearComponent::Info() const { stream << ", self-repair-upper-threshold=" << self_repair_upper_threshold_; if (self_repair_scale_ != 0.0) stream << ", self-repair-scale=" << self_repair_scale_; - if (count_ > 0 && value_sum_.Dim() == dim_ && deriv_sum_.Dim() == dim_) { + if (count_ > 0 && value_sum_.Dim() == dim_) { stream << ", count=" << std::setprecision(3) << count_ << std::setprecision(6); stream << ", self-repaired-proportion=" @@ -333,10 +333,12 @@ std::string NonlinearComponent::Info() const { Vector value_avg(value_avg_dbl); value_avg.Scale(1.0 / count_); stream << ", value-avg=" << SummarizeVector(value_avg); - Vector deriv_avg_dbl(deriv_sum_); - Vector deriv_avg(deriv_avg_dbl); - deriv_avg.Scale(1.0 / count_); - stream << ", deriv-avg=" << SummarizeVector(deriv_avg); + if (deriv_sum_.Dim() == dim_) { + Vector deriv_avg_dbl(deriv_sum_); + Vector deriv_avg(deriv_avg_dbl); + deriv_avg.Scale(1.0 / count_); + stream << ", deriv-avg=" << SummarizeVector(deriv_avg); + } } return stream.str(); } From 5d162053e299a5c04264b1fc2ae6bed2c270c8be Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 01:27:59 -0500 Subject: [PATCH 025/530] asr_diarization: Add --skip-dims option to apply-cmvn-sliding --- src/featbin/apply-cmvn-sliding.cc | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/featbin/apply-cmvn-sliding.cc b/src/featbin/apply-cmvn-sliding.cc index 4a6d02d16cd..105319761b5 100644 --- a/src/featbin/apply-cmvn-sliding.cc +++ b/src/featbin/apply-cmvn-sliding.cc @@ -35,10 +35,13 @@ int main(int argc, char *argv[]) { "Useful for speaker-id; see also apply-cmvn-online\n" "\n" "Usage: apply-cmvn-sliding [options] \n"; - + + std::string skip_dims_str; ParseOptions po(usage); SlidingWindowCmnOptions opts; opts.Register(&po); + po.Register("skip-dims", &skip_dims_str, "Dimensions for which to skip " + "normalization: colon-separated list of integers, e.g. 13:14:15)"); po.Read(argc, argv); @@ -47,15 +50,24 @@ int main(int argc, char *argv[]) { exit(1); } + std::vector skip_dims; // optionally use "fake" + // (zero-mean/unit-variance) stats for some + // dims to disable normalization. + if (!SplitStringToIntegers(skip_dims_str, ":", false, &skip_dims)) { + KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of " + << "integers)"; + } + + int32 num_done = 0, num_err = 0; - + std::string feat_rspecifier = po.GetArg(1); std::string feat_wspecifier = po.GetArg(2); SequentialBaseFloatMatrixReader feat_reader(feat_rspecifier); BaseFloatMatrixWriter feat_writer(feat_wspecifier); - - for (;!feat_reader.Done(); feat_reader.Next()) { + + for (; !feat_reader.Done(); feat_reader.Next()) { std::string utt = feat_reader.Key(); Matrix feat(feat_reader.Value()); if (feat.NumRows() == 0) { @@ -67,7 +79,7 @@ int main(int argc, char *argv[]) { feat.NumCols(), kUndefined); SlidingWindowCmn(opts, feat, &cmvn_feat); - + feat_writer.Write(utt, cmvn_feat); num_done++; } From 4c5cd5438b6166abe3fdee40489fc840d7c950b1 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Tue, 22 Nov 2016 11:23:08 -0500 Subject: [PATCH 026/530] asr_diarization: Adding length-tolerace to extract ivector scripts --- .../s5/steps/online/nnet2/extract_ivectors.sh | 8 +++---- src/bin/weight-post.cc | 23 ++++++++++++------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh b/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh index f27baecd673..2f55053efd5 100755 --- a/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh +++ b/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh @@ -172,8 +172,8 @@ if [ $sub_speaker_frames -gt 0 ]; then feat-to-len scp:$data/feats.scp ark,t:- > $dir/utt_counts || exit 1; fi if ! [ $(wc -l <$dir/utt_counts) -eq $(wc -l <$data/feats.scp) ]; then - echo "$0: error getting per-utterance counts." - exit 0; + echo "$0: error getting per-utterance counts. Number of lines in $dir/utt_counts differs from $data/feats.scp" + exit 1; fi cat $data/spk2utt | python -c " import sys @@ -229,8 +229,8 @@ if [ $stage -le 2 ]; then if [ ! -z "$ali_or_decode_dir" ]; then $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \ gmm-global-get-post --n=$num_gselect --min-post=$min_post $srcdir/final.dubm "$gmm_feats" ark:- \| \ - weight-post ark:- "ark,s,cs:gunzip -c $dir/weights.gz|" ark:- \| \ - ivector-extract --acoustic-weight=$posterior_scale --compute-objf-change=true \ + weight-post --length-tolerance=1 ark:- "ark,s,cs:gunzip -c $dir/weights.gz|" ark:- \| \ + ivector-extract --length-tolerance=1 --acoustic-weight=$posterior_scale --compute-objf-change=true \ --max-count=$max_count --spk2utt=ark:$this_sdata/JOB/spk2utt \ $srcdir/final.ie "$feats" ark,s,cs:- ark,t:$dir/ivectors_spk.JOB.ark || exit 1; else diff --git a/src/bin/weight-post.cc b/src/bin/weight-post.cc index d536896eaaa..bbaad465195 100644 --- a/src/bin/weight-post.cc +++ b/src/bin/weight-post.cc @@ -26,32 +26,38 @@ int main(int argc, char *argv[]) { try { using namespace kaldi; - typedef kaldi::int32 int32; + typedef kaldi::int32 int32; + + int32 length_tolerance = 2; const char *usage = "Takes archives (typically per-utterance) of posteriors and per-frame weights,\n" "and weights the posteriors by the per-frame weights\n" "\n" "Usage: weight-post \n"; - + ParseOptions po(usage); + + po.Register("length-tolerance", &length_tolerance, + "Tolerate this many frames of length mismatch"); + po.Read(argc, argv); if (po.NumArgs() != 3) { po.PrintUsage(); exit(1); } - + std::string post_rspecifier = po.GetArg(1), weights_rspecifier = po.GetArg(2), post_wspecifier = po.GetArg(3); SequentialPosteriorReader posterior_reader(post_rspecifier); RandomAccessBaseFloatVectorReader weights_reader(weights_rspecifier); - PosteriorWriter post_writer(post_wspecifier); - + PosteriorWriter post_writer(post_wspecifier); + int32 num_done = 0, num_err = 0; - + for (; !posterior_reader.Done(); posterior_reader.Next()) { std::string key = posterior_reader.Key(); Posterior post = posterior_reader.Value(); @@ -61,7 +67,8 @@ int main(int argc, char *argv[]) { continue; } const Vector &weights = weights_reader.Value(key); - if (weights.Dim() != static_cast(post.size())) { + if (std::abs(weights.Dim() - static_cast(post.size())) > + length_tolerance) { KALDI_WARN << "Weights for utterance " << key << " have wrong size, " << weights.Dim() << " vs. " << post.size(); @@ -71,7 +78,7 @@ int main(int argc, char *argv[]) { for (size_t i = 0; i < post.size(); i++) { if (weights(i) == 0.0) post[i].clear(); for (size_t j = 0; j < post[i].size(); j++) - post[i][j].second *= weights(i); + post[i][j].second *= i < weights.Dim() ? weights(i) : 0.0; } post_writer.Write(key, post); num_done++; From a71da1a4c59714bc0862e6b9a54e23197ee7c3bb Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 25 Nov 2016 15:45:40 -0500 Subject: [PATCH 027/530] asr_diarization: Adding --do-average option to matrix-sum-rows --- src/bin/matrix-sum-rows.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/bin/matrix-sum-rows.cc b/src/bin/matrix-sum-rows.cc index 7e60483eef2..ee6504ba2b1 100644 --- a/src/bin/matrix-sum-rows.cc +++ b/src/bin/matrix-sum-rows.cc @@ -34,9 +34,13 @@ int main(int argc, char *argv[]) { "e.g.: matrix-sum-rows ark:- ark:- | vector-sum ark:- sum.vec\n" "See also: matrix-sum, vector-sum\n"; + bool do_average = false; ParseOptions po(usage); + po.Register("do-average", &do_average, + "Do average instead of sum"); + po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -45,28 +49,28 @@ int main(int argc, char *argv[]) { } std::string rspecifier = po.GetArg(1); std::string wspecifier = po.GetArg(2); - + SequentialBaseFloatMatrixReader mat_reader(rspecifier); BaseFloatVectorWriter vec_writer(wspecifier); - + int32 num_done = 0; int64 num_rows_done = 0; - + for (; !mat_reader.Done(); mat_reader.Next()) { std::string key = mat_reader.Key(); Matrix mat(mat_reader.Value()); Vector vec(mat.NumCols()); - vec.AddRowSumMat(1.0, mat, 0.0); + vec.AddRowSumMat(!do_average ? 1.0 : 1.0 / mat.NumRows(), mat, 0.0); // Do the summation in double, to minimize roundoff. Vector float_vec(vec); vec_writer.Write(key, float_vec); num_done++; num_rows_done += mat.NumRows(); } - + KALDI_LOG << "Summed rows " << num_done << " matrices, " << num_rows_done << " rows in total."; - + return (num_done != 0 ? 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); From 8fa2b211a218473362c907f24312c0c7275fcc0d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 23 Sep 2016 23:15:58 -0400 Subject: [PATCH 028/530] asr_diarization: Added weight-pdf-post, vector-to-feat, kaldi-matrix softmax per row, copy-matrix apply-log, matrix-add-offset, matrix-dot-product --- src/bin/Makefile | 3 +- src/bin/copy-matrix.cc | 37 ++++++- src/bin/matrix-add-offset.cc | 84 ++++++++++++++++ src/bin/matrix-dot-product.cc | 183 ++++++++++++++++++++++++++++++++++ src/bin/vector-scale.cc | 37 +++++-- src/bin/weight-matrix.cc | 84 ++++++++++++++++ src/bin/weight-pdf-post.cc | 154 ++++++++++++++++++++++++++++ src/featbin/Makefile | 3 +- src/featbin/extract-column.cc | 84 ++++++++++++++++ src/featbin/vector-to-feat.cc | 100 +++++++++++++++++++ src/matrix/kaldi-matrix.cc | 9 ++ src/matrix/kaldi-matrix.h | 5 + 12 files changed, 767 insertions(+), 16 deletions(-) create mode 100644 src/bin/matrix-add-offset.cc create mode 100644 src/bin/matrix-dot-product.cc create mode 100644 src/bin/weight-matrix.cc create mode 100644 src/bin/weight-pdf-post.cc create mode 100644 src/featbin/extract-column.cc create mode 100644 src/featbin/vector-to-feat.cc diff --git a/src/bin/Makefile b/src/bin/Makefile index 687040889b3..3dc59fe8112 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -24,7 +24,8 @@ BINFILES = align-equal align-equal-compiled acc-tree-stats \ matrix-logprob matrix-sum \ build-pfile-from-ali get-post-on-ali tree-info am-info \ vector-sum matrix-sum-rows est-pca sum-lda-accs sum-mllt-accs \ - transform-vec align-text matrix-dim + transform-vec align-text matrix-dim weight-pdf-post weight-matrix \ + matrix-add-offset matrix-dot-product OBJFILES = diff --git a/src/bin/copy-matrix.cc b/src/bin/copy-matrix.cc index d7b8181c64c..56f2e51d90f 100644 --- a/src/bin/copy-matrix.cc +++ b/src/bin/copy-matrix.cc @@ -36,16 +36,30 @@ int main(int argc, char *argv[]) { " e.g.: copy-matrix --binary=false 1.mat -\n" " copy-matrix ark:2.trans ark,t:-\n" "See also: copy-feats\n"; - + bool binary = true; + bool apply_log = false; + bool apply_exp = false; + bool apply_softmax_per_row = false; + BaseFloat apply_power = 1.0; BaseFloat scale = 1.0; + ParseOptions po(usage); po.Register("binary", &binary, "Write in binary mode (only relevant if output is a wxfilename)"); po.Register("scale", &scale, "This option can be used to scale the matrices being copied."); - + po.Register("apply-log", &apply_log, + "This option can be used to apply log on the matrices. " + "Must be avoided if matrix has negative quantities."); + po.Register("apply-exp", &apply_exp, + "This option can be used to apply exp on the matrices"); + po.Register("apply-power", &apply_power, + "This option can be used to apply a power on the matrices"); + po.Register("apply-softmax-per-row", &apply_softmax_per_row, + "This option can be used to apply softmax per row of the matrices"); + po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -53,6 +67,10 @@ int main(int argc, char *argv[]) { exit(1); } + if ( (apply_log && apply_exp) || (apply_softmax_per_row && apply_exp) || + (apply_softmax_per_row && apply_log) ) + KALDI_ERR << "Only one of apply-log, apply-exp and " + << "apply-softmax-per-row can be given"; std::string matrix_in_fn = po.GetArg(1), matrix_out_fn = po.GetArg(2); @@ -68,11 +86,15 @@ int main(int argc, char *argv[]) { if (in_is_rspecifier != out_is_wspecifier) KALDI_ERR << "Cannot mix archives with regular files (copying matrices)"; - + if (!in_is_rspecifier) { Matrix mat; ReadKaldiObject(matrix_in_fn, &mat); if (scale != 1.0) mat.Scale(scale); + if (apply_log) mat.ApplyLog(); + if (apply_exp) mat.ApplyExp(); + if (apply_softmax_per_row) mat.ApplySoftMaxPerRow(); + if (apply_power != 1.0) mat.ApplyPow(apply_power); Output ko(matrix_out_fn, binary); mat.Write(ko.Stream(), binary); KALDI_LOG << "Copied matrix to " << matrix_out_fn; @@ -82,9 +104,14 @@ int main(int argc, char *argv[]) { BaseFloatMatrixWriter writer(matrix_out_fn); SequentialBaseFloatMatrixReader reader(matrix_in_fn); for (; !reader.Done(); reader.Next(), num_done++) { - if (scale != 1.0) { + if (scale != 1.0 || apply_log || apply_exp || + apply_power != 1.0 || apply_softmax_per_row) { Matrix mat(reader.Value()); - mat.Scale(scale); + if (scale != 1.0) mat.Scale(scale); + if (apply_log) mat.ApplyLog(); + if (apply_exp) mat.ApplyExp(); + if (apply_softmax_per_row) mat.ApplySoftMaxPerRow(); + if (apply_power != 1.0) mat.ApplyPow(apply_power); writer.Write(reader.Key(), mat); } else { writer.Write(reader.Key(), reader.Value()); diff --git a/src/bin/matrix-add-offset.cc b/src/bin/matrix-add-offset.cc new file mode 100644 index 00000000000..90f72ba3254 --- /dev/null +++ b/src/bin/matrix-add-offset.cc @@ -0,0 +1,84 @@ +// bin/matrix-add-offset.cc + +// Copyright 2015 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-matrix.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Add an offset vector to the rows of matrices in a table.\n" + "\n" + "Usage: matrix-add-offset [options] " + " \n" + "e.g.: matrix-add-offset log_post.mat neg_priors.vec log_like.mat\n" + "See also: matrix-sum-rows, matrix-sum, vector-sum\n"; + + + ParseOptions po(usage); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + std::string rspecifier = po.GetArg(1); + std::string vector_rxfilename = po.GetArg(2); + std::string wspecifier = po.GetArg(3); + + SequentialBaseFloatMatrixReader mat_reader(rspecifier); + BaseFloatMatrixWriter mat_writer(wspecifier); + + int32 num_done = 0; + + Vector vec; + { + bool binary_in; + Input ki(vector_rxfilename, &binary_in); + vec.Read(ki.Stream(), binary_in); + } + + for (; !mat_reader.Done(); mat_reader.Next()) { + std::string key = mat_reader.Key(); + Matrix mat(mat_reader.Value()); + if (vec.Dim() != mat.NumCols()) { + KALDI_ERR << "Mismatch in vector dimension and " + << "number of columns in matrix; " + << vec.Dim() << " vs " << mat.NumCols(); + } + mat.AddVecToRows(1.0, vec); + mat_writer.Write(key, mat); + num_done++; + } + + KALDI_LOG << "Added offset to " << num_done << " matrices."; + + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/bin/matrix-dot-product.cc b/src/bin/matrix-dot-product.cc new file mode 100644 index 00000000000..a292cab9a40 --- /dev/null +++ b/src/bin/matrix-dot-product.cc @@ -0,0 +1,183 @@ +// bin/matrix-dot-product.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-matrix.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Get element-wise dot product of matrices. Always returns a matrix " + "that is the same size as the first matrix.\n" + "If there is a mismatch in number of rows, the utterance is skipped, " + "unless the mismatch is within a tolerance. If the second matrix has " + "number of rows that is larger than the first matrix by less than the " + "specified tolerance, then a submatrix of the second matrix is " + "multiplied element-wise with the first matrix.\n" + "\n" + "Usage: matrix-dot-product [options] " + "[ ...] " + "\n" + " e.g.: matrix-dot-product ark:1.weights ark:2.weights " + "ark:combine.weights\n" + "or \n" + "Usage: matrix-dot-product [options] " + "[ ...] " + "\n" + " e.g.: matrix-sum --binary=false 1.mat 2.mat product.mat\n" + "See also: matrix-sum, matrix-sum-rows\n"; + + bool binary = true; + int32 length_tolerance = 0; + + ParseOptions po(usage); + + po.Register("binary", &binary, "If true, write output as binary (only " + "relevant for usage types two or three"); + po.Register("length-tolerance", &length_tolerance, + "Tolerance length mismatch of this many frames"); + + po.Read(argc, argv); + + if (po.NumArgs() < 2) { + po.PrintUsage(); + exit(1); + } + + int32 N = po.NumArgs(); + std::string matrix_in_fn1 = po.GetArg(1), + matrix_out_fn = po.GetArg(N); + + if (ClassifyWspecifier(matrix_out_fn, NULL, NULL, NULL) != kNoWspecifier) { + // output to table. + + // Output matrix + BaseFloatMatrixWriter matrix_writer(matrix_out_fn); + + // Input matrices + SequentialBaseFloatMatrixReader matrix_reader1(matrix_in_fn1); + std::vector + matrix_readers(N-2, + static_cast(NULL)); + std::vector matrix_in_fns(N-2); + for (int32 i = 2; i < N; ++i) { + matrix_readers[i-2] = new RandomAccessBaseFloatMatrixReader( + po.GetArg(i)); + matrix_in_fns[i-2] = po.GetArg(i); + } + int32 n_utts = 0, n_total_matrices = 0, + n_success = 0, n_missing = 0, n_other_errors = 0; + + for (; !matrix_reader1.Done(); matrix_reader1.Next()) { + std::string key = matrix_reader1.Key(); + Matrix matrix1 = matrix_reader1.Value(); + matrix_reader1.FreeCurrent(); + n_utts++; + n_total_matrices++; + + Matrix matrix_out(matrix1); + + int32 i = 0; + for (i = 0; i < N-2; ++i) { + bool failed = false; // Indicates failure for this key. + if (matrix_readers[i]->HasKey(key)) { + const Matrix &matrix2 = matrix_readers[i]->Value(key); + n_total_matrices++; + if (SameDim(matrix2, matrix_out)) { + matrix_out.MulElements(matrix2); + } else { + KALDI_WARN << "Dimension mismatch for utterance " << key + << " : " << matrix2.NumRows() << " by " + << matrix2.NumCols() << " for " + << "system " << (i + 2) << ", rspecifier: " + << matrix_in_fns[i] << " vs " << matrix_out.NumRows() + << " by " << matrix_out.NumCols() + << " primary matrix, rspecifier:" << matrix_in_fn1; + if (matrix2.NumRows() - matrix_out.NumRows() <= + length_tolerance) { + KALDI_WARN << "Tolerated length mismatch for key " << key; + matrix_out.MulElements(matrix2.Range(0, matrix_out.NumRows(), + 0, matrix2.NumCols())); + } else { + KALDI_WARN << "Skipping key " << key; + failed = true; + n_other_errors++; + } + } + } else { + KALDI_WARN << "No matrix found for utterance " << key << " for " + << "system " << (i + 2) << ", rspecifier: " + << matrix_in_fns[i]; + failed = true; + n_missing++; + } + + if (failed) break; + } + + if (i != N-2) // Skipping utterance + continue; + + matrix_writer.Write(key, matrix_out); + n_success++; + } + + KALDI_LOG << "Processed " << n_utts << " utterances: with a total of " + << n_total_matrices << " matrices across " << (N-1) + << " different systems."; + KALDI_LOG << "Produced output for " << n_success << " utterances; " + << n_missing << " total missing matrices and skipped " + << n_other_errors << "matrices."; + + DeletePointers(&matrix_readers); + + return (n_success != 0 && n_missing < (n_success - n_missing)) ? 0 : 1; + } else { + for (int32 i = 1; i < N; i++) { + if (ClassifyRspecifier(po.GetArg(i), NULL, NULL) != kNoRspecifier) { + KALDI_ERR << "Wrong usage: if last argument is not " + << "table, the other arguments must not be tables."; + } + } + + Matrix mat1; + ReadKaldiObject(po.GetArg(1), &mat1); + + for (int32 i = 2; i < N; i++) { + Matrix mat; + ReadKaldiObject(po.GetArg(i), &mat); + + mat1.MulElements(mat); + } + + WriteKaldiObject(mat1, po.GetArg(N), binary); + KALDI_LOG << "Multiplied " << (po.NumArgs() - 1) << " matrices; " + << "wrote product to " << PrintableWxfilename(po.GetArg(N)); + + return 0; + } + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/bin/vector-scale.cc b/src/bin/vector-scale.cc index 60d4d3121d2..ea68ae31ad0 100644 --- a/src/bin/vector-scale.cc +++ b/src/bin/vector-scale.cc @@ -30,11 +30,14 @@ int main(int argc, char *argv[]) { const char *usage = "Scale a set of vectors in a Table (useful for speaker vectors and " "per-frame weights)\n" - "Usage: vector-scale [options] \n"; + "Usage: vector-scale [options] \n"; ParseOptions po(usage); BaseFloat scale = 1.0; + bool binary = false; + po.Register("binary", &binary, "If true, write output as binary " + "not relevant for archives"); po.Register("scale", &scale, "Scaling factor for vectors"); po.Read(argc, argv); @@ -43,17 +46,33 @@ int main(int argc, char *argv[]) { exit(1); } - std::string rspecifier = po.GetArg(1); - std::string wspecifier = po.GetArg(2); + std::string vector_in_fn = po.GetArg(1); + std::string vector_out_fn = po.GetArg(2); - BaseFloatVectorWriter vec_writer(wspecifier); - - SequentialBaseFloatVectorReader vec_reader(rspecifier); - for (; !vec_reader.Done(); vec_reader.Next()) { - Vector vec(vec_reader.Value()); + if (ClassifyWspecifier(vector_in_fn, NULL, NULL, NULL) != kNoWspecifier) { + if (ClassifyRspecifier(vector_in_fn, NULL, NULL) == kNoRspecifier) { + KALDI_ERR << "Cannot mix archives and regular files"; + } + BaseFloatVectorWriter vec_writer(vector_out_fn); + SequentialBaseFloatVectorReader vec_reader(vector_in_fn); + for (; !vec_reader.Done(); vec_reader.Next()) { + Vector vec(vec_reader.Value()); + vec.Scale(scale); + vec_writer.Write(vec_reader.Key(), vec); + } + } else { + if (ClassifyRspecifier(vector_in_fn, NULL, NULL) != kNoRspecifier) { + KALDI_ERR << "Cannot mix archives and regular files"; + } + bool binary_in; + Input ki(vector_in_fn, &binary_in); + Vector vec; + vec.Read(ki.Stream(), binary_in); vec.Scale(scale); - vec_writer.Write(vec_reader.Key(), vec); + Output ko(vector_out_fn, binary); + vec.Write(ko.Stream(), binary); } + return 0; } catch(const std::exception &e) { std::cerr << e.what(); diff --git a/src/bin/weight-matrix.cc b/src/bin/weight-matrix.cc new file mode 100644 index 00000000000..c6823b8da29 --- /dev/null +++ b/src/bin/weight-matrix.cc @@ -0,0 +1,84 @@ +// bin/weight-matrix.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + typedef kaldi::int32 int32; + + const char *usage = + "Takes archives (typically per-utterance) of features and " + "per-frame weights,\n" + "and weights the features by the per-frame weights\n" + "\n" + "Usage: weight-matrix " + "\n"; + + ParseOptions po(usage); + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string matrix_rspecifier = po.GetArg(1), + weights_rspecifier = po.GetArg(2), + matrix_wspecifier = po.GetArg(3); + + SequentialBaseFloatMatrixReader matrix_reader(matrix_rspecifier); + RandomAccessBaseFloatVectorReader weights_reader(weights_rspecifier); + BaseFloatMatrixWriter matrix_writer(matrix_wspecifier); + + int32 num_done = 0, num_err = 0; + + for (; !matrix_reader.Done(); matrix_reader.Next()) { + std::string key = matrix_reader.Key(); + Matrix mat = matrix_reader.Value(); + if (!weights_reader.HasKey(key)) { + KALDI_WARN << "No weight vectors for utterance " << key; + num_err++; + continue; + } + const Vector &weights = weights_reader.Value(key); + if (weights.Dim() != mat.NumRows()) { + KALDI_WARN << "Weights for utterance " << key + << " have wrong size, " << weights.Dim() + << " vs. " << mat.NumRows(); + num_err++; + continue; + } + mat.MulRowsVec(weights); + matrix_writer.Write(key, mat); + num_done++; + } + KALDI_LOG << "Applied per-frame weights for " << num_done + << " matrices; errors on " << num_err; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/bin/weight-pdf-post.cc b/src/bin/weight-pdf-post.cc new file mode 100644 index 00000000000..c7477a046c8 --- /dev/null +++ b/src/bin/weight-pdf-post.cc @@ -0,0 +1,154 @@ +// bin/weight-pdf-post.cc + +// Copyright 2015 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "gmm/am-diag-gmm.h" +#include "hmm/transition-model.h" +#include "hmm/hmm-utils.h" +#include "hmm/posterior.h" + +namespace kaldi { + +void WeightPdfPost(const ConstIntegerSet &pdf_set, + BaseFloat pdf_scale, + Posterior *post) { + for (size_t i = 0; i < post->size(); i++) { + std::vector > this_post; + this_post.reserve((*post)[i].size()); + for (size_t j = 0; j < (*post)[i].size(); j++) { + int32 pdf_id = (*post)[i][j].first; + BaseFloat weight = (*post)[i][j].second; + if (pdf_set.count(pdf_id) != 0) { // is a silence. + if (pdf_scale != 0.0) + this_post.push_back(std::make_pair(pdf_id, weight*pdf_scale)); + } else { + this_post.push_back(std::make_pair(pdf_id, weight)); + } + } + (*post)[i].swap(this_post); + } +} + +void WeightPdfPostDistributed(const ConstIntegerSet &pdf_set, + BaseFloat pdf_scale, + Posterior *post) { + for (size_t i = 0; i < post->size(); i++) { + std::vector > this_post; + this_post.reserve((*post)[i].size()); + BaseFloat sil_weight = 0.0, nonsil_weight = 0.0; + for (size_t j = 0; j < (*post)[i].size(); j++) { + int32 pdf_id = (*post)[i][j].first; + BaseFloat weight = (*post)[i][j].second; + if (pdf_set.count(pdf_id) != 0) + sil_weight += weight; + else + nonsil_weight += weight; + } + // This "distributed" weighting approach doesn't make sense if we have + // negative weights. + KALDI_ASSERT(sil_weight >= 0.0 && nonsil_weight >= 0.0); + if (sil_weight + nonsil_weight == 0.0) continue; + BaseFloat frame_scale = (sil_weight * pdf_scale + nonsil_weight) / + (sil_weight + nonsil_weight); + if (frame_scale != 0.0) { + for (size_t j = 0; j < (*post)[i].size(); j++) { + int32 pdf_id = (*post)[i][j].first; + BaseFloat weight = (*post)[i][j].second; + this_post.push_back(std::make_pair(pdf_id, weight * frame_scale)); + } + } + (*post)[i].swap(this_post); + } +} + +} // namespace kaldi + +int main(int argc, char *argv[]) { + using namespace kaldi; + typedef kaldi::int32 int32; + try { + const char *usage = + "Apply weight to specific pdfs or tids in posts\n" + "Usage: weight-pdf-post [options] " + " \n" + "e.g.:\n" + " weight-pdf-post 0.00001 0:2 ark:1.post ark:nosil.post\n"; + + ParseOptions po(usage); + + bool distribute = false; + + po.Register("distribute", &distribute, "If true, rather than weighting the " + "individual posteriors, apply the weighting to the " + "whole frame: " + "i.e. on time t, scale all posterior entries by " + "p(sil)*silence-weight + p(non-sil)*1.0"); + + po.Read(argc, argv); + + if (po.NumArgs() != 4) { + po.PrintUsage(); + exit(1); + } + + std::string pdf_weight_str = po.GetArg(1), + pdfs_str = po.GetArg(2), + posteriors_rspecifier = po.GetArg(3), + posteriors_wspecifier = po.GetArg(4); + + BaseFloat pdf_weight = 0.0; + if (!ConvertStringToReal(pdf_weight_str, &pdf_weight)) + KALDI_ERR << "Invalid pdf-weight parameter: expected float, got \"" + << pdf_weight << '"'; + std::vector pdfs; + if (!SplitStringToIntegers(pdfs_str, ":", false, &pdfs)) + KALDI_ERR << "Invalid pdf string string " << pdfs_str; + if (pdfs.empty()) + KALDI_WARN <<"No pdf specified, this will have no effect"; + ConstIntegerSet pdf_set(pdfs); // faster lookup. + + int32 num_posteriors = 0; + SequentialPosteriorReader posterior_reader(posteriors_rspecifier); + PosteriorWriter posterior_writer(posteriors_wspecifier); + + for (; !posterior_reader.Done(); posterior_reader.Next()) { + num_posteriors++; + // Posterior is vector > > + Posterior post = posterior_reader.Value(); + // Posterior is vector > > + if (distribute) + WeightPdfPostDistributed(pdf_set, + pdf_weight, &post); + else + WeightPdfPost(pdf_set, + pdf_weight, &post); + + posterior_writer.Write(posterior_reader.Key(), post); + } + KALDI_LOG << "Done " << num_posteriors << " posteriors."; + return (num_posteriors != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/featbin/Makefile b/src/featbin/Makefile index dc2bea215d8..e1a9a1ebe0d 100644 --- a/src/featbin/Makefile +++ b/src/featbin/Makefile @@ -15,7 +15,8 @@ BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats \ process-kaldi-pitch-feats compare-feats wav-to-duration add-deltas-sdc \ compute-and-process-kaldi-pitch-feats modify-cmvn-stats wav-copy \ wav-reverberate append-vector-to-feats detect-sinusoids shift-feats \ - concat-feats append-post-to-feats post-to-feats + concat-feats append-post-to-feats post-to-feats vector-to-feat \ + extract-column OBJFILES = diff --git a/src/featbin/extract-column.cc b/src/featbin/extract-column.cc new file mode 100644 index 00000000000..7fa6644af03 --- /dev/null +++ b/src/featbin/extract-column.cc @@ -0,0 +1,84 @@ +// featbin/extract-column.cc + +// Copyright 2015 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-matrix.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace std; + + const char *usage = + "Extract a column out of a matrix. \n" + "This is most useful to extract log-energies \n" + "from feature files\n" + "\n" + "Usage: extract-column [options] --column-index= " + " \n" + " e.g. extract-column ark:feats-in.ark ark:energies.ark\n" + "See also: select-feats, subset-feats, subsample-feats, extract-rows\n"; + + ParseOptions po(usage); + + int32 column_index = 0; + + po.Register("column-index", &column_index, + "Index of column to extract"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + string feat_rspecifier = po.GetArg(1); + string vector_wspecifier = po.GetArg(2); + + SequentialBaseFloatMatrixReader reader(feat_rspecifier); + BaseFloatVectorWriter writer(vector_wspecifier); + + int32 num_done = 0, num_err = 0; + + string line; + + for (; !reader.Done(); reader.Next(), num_done++) { + const Matrix& feats(reader.Value()); + Vector col(feats.NumRows()); + if (column_index >= feats.NumCols()) { + KALDI_ERR << "Column index " << column_index << " is " + << "not less than number of columns " << feats.NumCols(); + } + col.CopyColFromMat(feats, column_index); + writer.Write(reader.Key(), col); + } + + KALDI_LOG << "Processed " << num_done << " matrices successfully; " + << "errors on " << num_err; + + return (num_done > 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/featbin/vector-to-feat.cc b/src/featbin/vector-to-feat.cc new file mode 100644 index 00000000000..1fe521db864 --- /dev/null +++ b/src/featbin/vector-to-feat.cc @@ -0,0 +1,100 @@ +// featbin/vector-to-feat.cc + +// Copyright 2015 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-matrix.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Convert a vector into a single feature so that it can be appended \n" + "to other feature matrices\n" + "Usage: vector-to-feats \n" + "or: vector-to-feats \n" + "e.g.: vector-to-feats scp:weights.scp ark:weight_feats.ark\n" + " or: vector-to-feats weight_vec feat_mat\n" + "See also: copy-feats, copy-matrix, paste-feats, \n" + "subsample-feats, splice-feats\n"; + + ParseOptions po(usage); + bool compress = false, binary = true; + + po.Register("binary", &binary, "Binary-mode output (not relevant if writing " + "to archive)"); + po.Register("compress", &compress, "If true, write output in compressed form" + "(only currently supported for wxfilename, i.e. archive/script," + "output)"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + int32 num_done = 0; + + if (ClassifyRspecifier(po.GetArg(1), NULL, NULL) != kNoRspecifier) { + std::string vector_rspecifier = po.GetArg(1); + std::string feature_wspecifier = po.GetArg(2); + + SequentialBaseFloatVectorReader vector_reader(vector_rspecifier); + BaseFloatMatrixWriter feat_writer(feature_wspecifier); + CompressedMatrixWriter compressed_feat_writer(feature_wspecifier); + + for (; !vector_reader.Done(); vector_reader.Next(), ++num_done) { + const Vector &vec = vector_reader.Value(); + Matrix feat(vec.Dim(), 1); + feat.CopyColFromVec(vec, 0); + + if (!compress) + feat_writer.Write(vector_reader.Key(), feat); + else + compressed_feat_writer.Write(vector_reader.Key(), + CompressedMatrix(feat)); + } + KALDI_LOG << "Converted " << num_done << " vectors into features"; + return (num_done != 0 ? 0 : 1); + } + + KALDI_ASSERT(!compress && "Compression not yet supported for single files"); + + std::string vector_rxfilename = po.GetArg(1), + feature_wxfilename = po.GetArg(2); + + Vector vec; + ReadKaldiObject(vector_rxfilename, &vec); + + Matrix feat(vec.Dim(), 1); + feat.CopyColFromVec(vec, 0); + + WriteKaldiObject(feat, feature_wxfilename, binary); + + KALDI_LOG << "Converted vector " << PrintableRxfilename(vector_rxfilename) + << " to " << PrintableWxfilename(feature_wxfilename); + return 0; + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/matrix/kaldi-matrix.cc b/src/matrix/kaldi-matrix.cc index 34003e8a550..4c3948ba2f5 100644 --- a/src/matrix/kaldi-matrix.cc +++ b/src/matrix/kaldi-matrix.cc @@ -2533,6 +2533,15 @@ Real MatrixBase::ApplySoftMax() { return max + Log(sum); } +template +void MatrixBase::ApplySoftMaxPerRow() { + for (MatrixIndexT i = 0; i < num_rows_; i++) { + Row(i).ApplySoftMax(); + kaldi::ApproxEqual(Row(i).Sum(), 1.0); + } + KALDI_ASSERT(Max() <= 1.0 && Min() >= 0.0); +} + template void MatrixBase::Tanh(const MatrixBase &src) { KALDI_ASSERT(SameDim(*this, src)); diff --git a/src/matrix/kaldi-matrix.h b/src/matrix/kaldi-matrix.h index e254fcad118..dccd52a9af4 100644 --- a/src/matrix/kaldi-matrix.h +++ b/src/matrix/kaldi-matrix.h @@ -453,6 +453,11 @@ class MatrixBase { /// Apply soft-max to the collection of all elements of the /// matrix and return normalizer (log sum of exponentials). Real ApplySoftMax(); + + /// Softmax nonlinearity + /// Y = Softmax(X) : Yij = e^Xij / sum_k(e^Xik), done to each row + /// for each row, the max value is first subtracted for good numerical stability + void ApplySoftMaxPerRow(); /// Set each element to the sigmoid of the corresponding element of "src". void Sigmoid(const MatrixBase &src); From fb43c8ca9ff2910313ee49ca5a143e11ef6c9b7d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sun, 25 Sep 2016 15:02:24 -0400 Subject: [PATCH 029/530] asr_diarization: Modify subsegment_feats and add fix_subsegmented_feats.pl --- .../s5/utils/data/fix_subsegmented_feats.pl | 79 +++++++++++++++++++ egs/wsj/s5/utils/data/get_subsegment_feats.sh | 46 +++++++++++ egs/wsj/s5/utils/data/subsegment_data_dir.sh | 38 ++++++--- src/util/kaldi-holder.cc | 44 ++++++++++- src/util/kaldi-holder.h | 5 ++ 5 files changed, 201 insertions(+), 11 deletions(-) create mode 100755 egs/wsj/s5/utils/data/fix_subsegmented_feats.pl create mode 100755 egs/wsj/s5/utils/data/get_subsegment_feats.sh diff --git a/egs/wsj/s5/utils/data/fix_subsegmented_feats.pl b/egs/wsj/s5/utils/data/fix_subsegmented_feats.pl new file mode 100755 index 00000000000..bd8aeb8e409 --- /dev/null +++ b/egs/wsj/s5/utils/data/fix_subsegmented_feats.pl @@ -0,0 +1,79 @@ +#!/usr/bin/env perl + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +use warnings; + +# This script modifies the feats ranges and ensures that they don't +# exceed the max number of frames supplied in utt2max_frames. +# utt2max_frames can be computed by using +# steps/segmentation/get_reco2num_frames.sh +# cut -d ' ' -f 1,2 /segments | utils/apply_map.pl -f 2 /reco2num_frames > /utt2max_frames + +(scalar @ARGV == 1) or die "Usage: fix_subsegmented_feats.pl "; + +my $utt2max_frames_file = $ARGV[0]; + +open MAX_FRAMES, $utt2max_frames_file or die "fix_subsegmented_feats.pl: Could not open file $utt2max_frames_file"; + +my %utt2max_frames; + +while () { + chomp; + my @F = split; + + (scalar @F == 2) or die "fix_subsegmented_feats.pl: Invalid line $_ in $utt2max_frames_file"; + + $utt2max_frames{$F[0]} = $F[1]; +} + +while () { + my $line = $_; + + if (m/\[([^][]*)\]\[([^][]*)\]\s*$/) { + print ("fix_subsegmented_feats.pl: this script only supports single indices"); + exit(1); + } + + my $before_range = ""; + my $range = ""; + + if (m/^(.*)\[([^][]*)\]\s*$/) { + $before_range = $1; + $range = $2; + } else { + print; + next; + } + + my @F = split(/ /, $before_range); + my $utt = shift @F; + defined $utt2max_frames{$utt} or die "fix_subsegmented_feats.pl: Could not find key $utt in $utt2num_frames_file.\nError with line $line"; + + if ($range !~ m/^(\d*):(\d*)([,]?.*)$/) { + print STDERR "fix_subsegmented_feats.pl: could not make sense of input line $_"; + exit(1); + } + + my $row_start = $1; + my $row_end = $2; + my $col_range = $3; + + if ($row_end >= $utt2max_frames{$utt}) { + print STDERR "Fixed row_end for $utt from $row_end to $utt2max_frames{$utt}-1\n"; + $row_end = $utt2max_frames{$utt} - 1; + } + + if ($row_start ne "") { + $range = "$row_start:$row_end"; + } else { + $range = ""; + } + + if ($col_range ne "") { + $range .= ",$col_range"; + } + + print ("$utt " . join(" ", @F) . "[" . $range . "]\n"); +} diff --git a/egs/wsj/s5/utils/data/get_subsegment_feats.sh b/egs/wsj/s5/utils/data/get_subsegment_feats.sh new file mode 100755 index 00000000000..6baba68eedd --- /dev/null +++ b/egs/wsj/s5/utils/data/get_subsegment_feats.sh @@ -0,0 +1,46 @@ +#! /bin/bash + +# Copyright 2016 Johns Hopkins University (Author: Dan Povey) +# 2016 Vimal Manohar +# Apache 2.0. + +if [ $# -ne 4 ]; then + echo "This scripts gets subsegmented_feats (by adding ranges to data/feats.scp) " + echo "for the subsegments file. This is does one part of the " + echo "functionality in subsegment_data_dir.sh, which additionally " + echo "creates a new subsegmented data directory." + echo "Usage: $0 " + echo " e.g.: $0 data/train/feats.scp 0.01 0.015 subsegments" + exit 1 +fi + +feats=$1 +frame_shift=$2 +frame_overlap=$3 +subsegments=$4 + +# The subsegments format is . +# e.g. 'utt_foo-1 utt_foo 7.21 8.93' +# The first awk command replaces this with the format: +# +# e.g. 'utt_foo-1 utt_foo 721 893' +# and the apply_map.pl command replaces 'utt_foo' (the 2nd field) with its corresponding entry +# from the original wav.scp, so we get a line like: +# e.g. 'utt_foo-1 foo-bar.ark:514231 721 892' +# Note: the reason we subtract one from the last time is that it's going to +# represent the 'last' frame, not the 'end' frame [i.e. not one past the last], +# in the matlab-like, but zero-indexed [first:last] notion. For instance, a segment with 1 frame +# would have start-time 0.00 and end-time 0.01, which would become the frame range +# [0:0] +# The second awk command turns this into something like +# utt_foo-1 foo-bar.ark:514231[721:892] +# It has to be a bit careful because the format actually allows for more general things +# like pipes that might contain spaces, so it has to be able to produce output like the +# following: +# utt_foo-1 some command|[721:892] +# Lastly, utils/data/normalize_data_range.pl will only do something nontrivial if +# the original data-dir already had data-ranges in square brackets. +awk -v s=$frame_shift -v fovlp=$frame_overlap '{print $1, $2, int(($3/s)+0.5), int(($4-fovlp)/s+0.5);}' <$subsegments| \ + utils/apply_map.pl -f 2 $feats | \ + awk '{p=NF-1; for (n=1;n " + echo " $0 [options] [] " echo "This script sub-segments a data directory. is to" echo "have lines of the form " echo "and is of the form ... ." echo "This script appropriately combines the with the original" echo "segments file, if necessary, and if not, creates a segments file." + echo " is an optional argument." echo "e.g.:" echo " $0 data/train [options] exp/tri3b_resegment/segments exp/tri3b_resegment/text data/train_resegmented" echo " Options:" @@ -50,11 +51,23 @@ export LC_ALL=C srcdir=$1 subsegments=$2 -new_text=$3 -dir=$4 +no_text=true +if [ $# -eq 4 ]; then + new_text=$3 + dir=$4 + no_text=false -for f in "$subsegments" "$new_text" "$srcdir/utt2spk"; do + if [ ! -f "$new_text" ]; then + echo "$0: no such file $new_text" + exit 1 + fi + +else + dir=$3 +fi + +for f in "$subsegments" "$srcdir/utt2spk"; do if [ ! -f "$f" ]; then echo "$0: no such file $f" exit 1; @@ -65,9 +78,11 @@ if ! mkdir -p $dir; then echo "$0: failed to create directory $dir" fi -if ! cmp <(awk '{print $1}' <$subsegments) <(awk '{print $1}' <$new_text); then - echo "$0: expected the first fields of the files $subsegments and $new_text to be identical" - exit 1 +if ! $no_text; then + if ! cmp <(awk '{print $1}' <$subsegments) <(awk '{print $1}' <$new_text); then + echo "$0: expected the first fields of the files $subsegments and $new_text to be identical" + exit 1 + fi fi # create the utt2spk in $dir @@ -86,8 +101,11 @@ awk '{print $1, $2}' < $subsegments > $dir/new2old_utt utils/apply_map.pl -f 2 $srcdir/utt2spk < $dir/new2old_utt >$dir/utt2spk # .. and the new spk2utt file. utils/utt2spk_to_spk2utt.pl <$dir/utt2spk >$dir/spk2utt -# the new text file is just what the user provides. -cp $new_text $dir/text + +if ! $no_text; then + # the new text file is just what the user provides. + cp $new_text $dir/text +fi # copy the source wav.scp cp $srcdir/wav.scp $dir diff --git a/src/util/kaldi-holder.cc b/src/util/kaldi-holder.cc index a26bdf2ce29..a86f09a2030 100644 --- a/src/util/kaldi-holder.cc +++ b/src/util/kaldi-holder.cc @@ -34,7 +34,7 @@ bool ExtractObjectRange(const Matrix &input, const std::string &range, SplitStringToVector(range, ",", false, &splits); if (!((splits.size() == 1 && !splits[0].empty()) || (splits.size() == 2 && !splits[0].empty() && !splits[1].empty()))) { - KALDI_ERR << "Invalid range specifier: " << range; + KALDI_ERR << "Invalid range specifier for matrix: " << range; return false; } std::vector row_range, col_range; @@ -75,6 +75,48 @@ template bool ExtractObjectRange(const Matrix &, const std::string &, template bool ExtractObjectRange(const Matrix &, const std::string &, Matrix *); +template +bool ExtractObjectRange(const Vector &input, const std::string &range, + Vector *output) { + if (range.empty()) { + KALDI_ERR << "Empty range specifier."; + return false; + } + std::vector splits; + SplitStringToVector(range, ",", false, &splits); + if (!((splits.size() == 1 && !splits[0].empty()))) { + KALDI_ERR << "Invalid range specifier for vector: " << range; + return false; + } + std::vector index_range; + bool status = true; + if (splits[0] != ":") + status = SplitStringToIntegers(splits[0], ":", false, &index_range); + + if (index_range.size() == 0) { + index_range.push_back(0); + index_range.push_back(input.Dim() - 1); + } + + if (!(status && index_range.size() == 2 && + index_range[0] >= 0 && index_range[0] <= index_range[1] && + index_range[1] < input.Dim())) { + KALDI_ERR << "Invalid range specifier: " << range + << " for vector of size " << input.Dim(); + return false; + } + int32 size = index_range[1] - index_range[0] + 1; + output->Resize(size, kUndefined); + output->CopyFromVec(input.Range(index_range[0], size)); + return true; +} + +// template instantiation +template bool ExtractObjectRange(const Vector &, const std::string &, + Vector *); +template bool ExtractObjectRange(const Vector &, const std::string &, + Vector *); + bool ExtractRangeSpecifier(const std::string &rxfilename_with_range, std::string *data_rxfilename, std::string *range) { diff --git a/src/util/kaldi-holder.h b/src/util/kaldi-holder.h index 06d7ec8e745..9ab148387ee 100644 --- a/src/util/kaldi-holder.h +++ b/src/util/kaldi-holder.h @@ -242,6 +242,11 @@ template bool ExtractObjectRange(const Matrix &input, const std::string &range, Matrix *output); +/// The template is specialized types Vector and Vector. +template +bool ExtractObjectRange(const Vector &input, const std::string &range, + Vector *output); + // In SequentialTableReaderScriptImpl and RandomAccessTableReaderScriptImpl, for // cases where the scp contained 'range specifiers' (things in square brackets From d7e0b7f47050bc126b45d9d45f55b833b37aa8ea Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Tue, 30 Aug 2016 16:48:35 -0400 Subject: [PATCH 030/530] asr_diarization: Utility scripts get_reco2utt, get_utt2dur and get_segments_for_data, get_utt2num_frames, get_reco2num_frames, get_reco2dur, convert_ali_to_vec, quantize_vector, convert_rttm_to_utt2spk_and_segments, get_frame_shift_from_config --- .../steps/segmentation/convert_ali_to_vec.pl | 17 +++ .../convert_rttm_to_utt2spk_and_segments.py | 79 +++++++++++++ .../convert_utt2spk_and_segments_to_rttm.py | 65 +++++++++++ .../get_frame_shift_info_from_config.pl | 21 ++++ .../s5/steps/segmentation/quantize_vector.pl | 28 +++++ .../utils/data/convert_data_dir_to_whole.sh | 108 ++++++++++++++++++ egs/wsj/s5/utils/data/get_reco2dur.sh | 87 ++++++++++++++ egs/wsj/s5/utils/data/get_reco2num_frames.sh | 28 +++++ egs/wsj/s5/utils/data/get_reco2utt.sh | 21 ++++ .../s5/utils/data/get_segments_for_data.sh | 2 +- egs/wsj/s5/utils/data/get_utt2dur.sh | 2 +- egs/wsj/s5/utils/data/get_utt2num_frames.sh | 42 +++++++ 12 files changed, 498 insertions(+), 2 deletions(-) create mode 100755 egs/wsj/s5/steps/segmentation/convert_ali_to_vec.pl create mode 100755 egs/wsj/s5/steps/segmentation/convert_rttm_to_utt2spk_and_segments.py create mode 100755 egs/wsj/s5/steps/segmentation/convert_utt2spk_and_segments_to_rttm.py create mode 100755 egs/wsj/s5/steps/segmentation/get_frame_shift_info_from_config.pl create mode 100755 egs/wsj/s5/steps/segmentation/quantize_vector.pl create mode 100755 egs/wsj/s5/utils/data/convert_data_dir_to_whole.sh create mode 100755 egs/wsj/s5/utils/data/get_reco2dur.sh create mode 100755 egs/wsj/s5/utils/data/get_reco2num_frames.sh create mode 100755 egs/wsj/s5/utils/data/get_reco2utt.sh create mode 100755 egs/wsj/s5/utils/data/get_utt2num_frames.sh diff --git a/egs/wsj/s5/steps/segmentation/convert_ali_to_vec.pl b/egs/wsj/s5/steps/segmentation/convert_ali_to_vec.pl new file mode 100755 index 00000000000..c0d1a9eeae2 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/convert_ali_to_vec.pl @@ -0,0 +1,17 @@ +#! /usr/bin/perl + +# Converts a kaldi integer vector in text format to +# a kaldi vector in text format by adding a pair +# of square brackets around the data. +# Assumes the first column to be the utterance id. + +while (<>) { + chomp; + my @F = split; + + printf ("$F[0] [ "); + for (my $i = 1; $i <= $#F; $i++) { + printf ("$F[$i] "); + } + print ("]"); +} diff --git a/egs/wsj/s5/steps/segmentation/convert_rttm_to_utt2spk_and_segments.py b/egs/wsj/s5/steps/segmentation/convert_rttm_to_utt2spk_and_segments.py new file mode 100755 index 00000000000..23dc5a14f09 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/convert_rttm_to_utt2spk_and_segments.py @@ -0,0 +1,79 @@ +#! /usr/bin/env python + +"""This script converts an RTTM with +speaker info into kaldi utt2spk and segments""" + +import argparse + +def get_args(): + parser = argparse.ArgumentParser( + description="""This script converts an RTTM with + speaker info into kaldi utt2spk and segments""") + parser.add_argument("--use-reco-id-as-spkr", type=str, + choices=["true", "false"], + help="Use the recording ID based on RTTM and " + "reco2file_and_channel as the speaker") + parser.add_argument("rttm_file", type=str, + help="""Input RTTM file. + The format of the RTTM file is + """ + """ """) + parser.add_argument("reco2file_and_channel", type=str, + help="""Input reco2file_and_channel. + The format is .""") + parser.add_argument("utt2spk", type=str, + help="Output utt2spk file") + parser.add_argument("segments", type=str, + help="Output segments file") + + args = parser.parse_args() + + args.use_reco_id_as_spkr = bool(args.use_reco_id_as_spkr == "true") + + return args + +def main(): + args = get_args() + + file_and_channel2reco = {} + for line in open(args.reco2file_and_channel): + parts = line.strip().split() + file_and_channel2reco[(parts[1], parts[2])] = parts[0] + + utt2spk_writer = open(args.utt2spk, 'w') + segments_writer = open(args.segments, 'w') + for line in open(args.rttm_file): + parts = line.strip().split() + if parts[0] != "SPEAKER": + continue + + file_id = parts[1] + channel = parts[2] + + try: + reco = file_and_channel2reco[(file_id, channel)] + except KeyError as e: + raise Exception("Could not find recording with " + "(file_id, channel) " + "= ({0},{1}) in {2}: {3}\n".format( + file_id, channel, + args.reco2file_and_channel, str(e))) + + start_time = float(parts[3]) + end_time = start_time + float(parts[4]) + + if args.use_reco_id_as_spkr: + spkr = reco + else: + spkr = parts[7] + + st = int(start_time * 100) + end = int(end_time * 100) + utt = "{0}-{1:06d}-{2:06d}".format(spkr, st, end) + + utt2spk_writer.write("{0} {1}\n".format(utt, spkr)) + segments_writer.write("{0} {1} {2:7.2f} {3:7.2f}\n".format( + utt, reco, start_time, end_time)) + +if __name__ == '__main__': + main() diff --git a/egs/wsj/s5/steps/segmentation/convert_utt2spk_and_segments_to_rttm.py b/egs/wsj/s5/steps/segmentation/convert_utt2spk_and_segments_to_rttm.py new file mode 100755 index 00000000000..1443259286b --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/convert_utt2spk_and_segments_to_rttm.py @@ -0,0 +1,65 @@ +#! /usr/bin/env python + +"""This script converts kaldi-style utt2spk and segments to an RTTM""" + +import argparse + +def get_args(): + parser = argparse.ArgumentParser( + description="""This script converts kaldi-style utt2spk and + segments to an RTTM""") + + parser.add_argument("utt2spk", type=str, + help="Input utt2spk file") + parser.add_argument("segments", type=str, + help="Input segments file") + parser.add_argument("reco2file_and_channel", type=str, + help="""Input reco2file_and_channel. + The format is .""") + parser.add_argument("rttm_file", type=str, + help="Output RTTM file") + + args = parser.parse_args() + return args + +def main(): + args = get_args() + + reco2file_and_channel = {} + for line in open(args.reco2file_and_channel): + parts = line.strip().split() + reco2file_and_channel[parts[0]] = (parts[1], parts[2]) + + utt2spk = {} + with open(args.utt2spk, 'r') as utt2spk_reader: + for line in utt2spk_reader: + parts = line.strip().split() + utt2spk[parts[0]] = parts[1] + + with open(args.rttm_file, 'w') as rttm_writer: + for line in open(args.segments, 'r'): + parts = line.strip().split() + + utt = parts[0] + spkr = utt2spk[utt] + + reco = parts[1] + + try: + file_id, channel = reco2file_and_channel[reco] + except KeyError as e: + raise Exception("Could not find recording {0} in {1}: " + "{2}\n".format(reco, + args.reco2file_and_channel, + str(e))) + + start_time = float(parts[2]) + duration = float(parts[3]) - start_time + + rttm_writer.write("SPEAKER {0} {1} {2:7.2f} {3:7.2f} " + " {4} \n".format( + file_id, channel, start_time, + duration, spkr)) + +if __name__ == '__main__': + main() diff --git a/egs/wsj/s5/steps/segmentation/get_frame_shift_info_from_config.pl b/egs/wsj/s5/steps/segmentation/get_frame_shift_info_from_config.pl new file mode 100755 index 00000000000..79a42aa9852 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/get_frame_shift_info_from_config.pl @@ -0,0 +1,21 @@ +#! /usr/bin/perl +use strict; +use warnings; + +# This script parses a features config file such as conf/mfcc.conf +# and returns the pair of values frame_shift and frame_overlap in seconds. + +my $frame_shift = 0.01; +my $frame_overlap = 0.015; + +while (<>) { + if (m/--frame-length=(\d+)/) { + $frame_shift = $1 / 1000; + } + + if (m/--window-length=(\d+)/) { + $frame_overlap = $1 / 1000 - $frame_shift; + } +} + +print "$frame_shift $frame_overlap\n"; diff --git a/egs/wsj/s5/steps/segmentation/quantize_vector.pl b/egs/wsj/s5/steps/segmentation/quantize_vector.pl new file mode 100755 index 00000000000..0bccebade4c --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/quantize_vector.pl @@ -0,0 +1,28 @@ +#!/usr/bin/perl + +# This script convert per-frame speech probabilities into +# 0-1 labels. + +@ARGV <= 1 or die "Usage: quantize_vector.pl [threshold]"; + +my $t = 0.5; + +if (scalar @ARGV == 1) { + $t = $ARGV[0]; +} + +while () { + chomp; + my @F = split; + + my $str = "$F[0]"; + for (my $i = 2; $i < $#F; $i++) { + if ($F[$i] >= $t) { + $str = "$str 1"; + } else { + $str = "$str 0"; + } + } + + print ("$str\n"); +} diff --git a/egs/wsj/s5/utils/data/convert_data_dir_to_whole.sh b/egs/wsj/s5/utils/data/convert_data_dir_to_whole.sh new file mode 100755 index 00000000000..f55f60c4774 --- /dev/null +++ b/egs/wsj/s5/utils/data/convert_data_dir_to_whole.sh @@ -0,0 +1,108 @@ +#! /bin/bash + +# This scripts converts a data directory into a "whole" data directory +# by removing the segments and using the recordings themselves as +# utterances + +set -o pipefail + +. path.sh + +cmd=run.pl +stage=-1 + +. parse_options.sh + +if [ $# -ne 2 ]; then + echo "Usage: convert_data_dir_to_whole.sh " + echo " e.g.: convert_data_dir_to_whole.sh data/dev data/dev_whole" + exit 1 +fi + +data=$1 +dir=$2 + +if [ ! -f $data/segments ]; then + # Data directory already does not contain segments. So just copy it. + utils/copy_data_dir.sh $data $dir + exit 0 +fi + +mkdir -p $dir +cp $data/wav.scp $dir +cp $data/reco2file_and_channel $dir +rm -f $dir/{utt2spk,text} || true + +[ -f $data/stm ] && cp $data/stm $dir +[ -f $data/glm ] && cp $data/glm $dir + +text_files= +[ -f $data/text ] && text_files="$data/text $dir/text" + +# Combine utt2spk and text from the segments into utt2spk and text for the whole +# recording. +cat $data/segments | perl -e ' +if (scalar @ARGV == 4) { + ($utt2spk_in, $utt2spk_out, $text_in, $text_out) = @ARGV; +} elsif (scalar @ARGV == 2) { + ($utt2spk_in, $utt2spk_out) = @ARGV; +} else { + die "Unexpected number of arguments"; +} + +if (defined $text_in) { + open(TI, "<$text_in") || die "Error: fail to open $text_in\n"; + open(TO, ">$text_out") || die "Error: fail to open $text_out\n"; +} +open(UI, "<$utt2spk_in") || die "Error: fail to open $utt2spk_in\n"; +open(UO, ">$utt2spk_out") || die "Error: fail to open $utt2spk_out\n"; + +my %file2utt = (); +while () { + chomp; + my @col = split; + @col >= 4 or die "bad line $_\n"; + + if (! defined $file2utt{$col[1]}) { + $file2utt{$col[1]} = []; + } + push @{$file2utt{$col[1]}}, $col[0]; +} + +my %text = (); +my %utt2spk = (); + +while () { + chomp; + my @col = split; + $utt2spk{$col[0]} = $col[1]; +} + +if (defined $text_in) { + while () { + chomp; + my @col = split; + @col >= 1 or die "bad line $_\n"; + + my $utt = shift @col; + $text{$utt} = join(" ", @col); + } +} + +foreach $file (keys %file2utt) { + my @utts = @{$file2utt{$file}}; + #print STDERR $file . " " . join(" ", @utts) . "\n"; + print UO "$file $file\n"; + + if (defined $text_in) { + $text_line = ""; + print TO "$file $text_line\n"; + } +} +' $data/utt2spk $dir/utt2spk $text_files + +sort -u $dir/utt2spk > $dir/utt2spk.tmp +mv $dir/utt2spk.tmp $dir/utt2spk +utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt + +utils/fix_data_dir.sh $dir diff --git a/egs/wsj/s5/utils/data/get_reco2dur.sh b/egs/wsj/s5/utils/data/get_reco2dur.sh new file mode 100755 index 00000000000..7d2ccb71769 --- /dev/null +++ b/egs/wsj/s5/utils/data/get_reco2dur.sh @@ -0,0 +1,87 @@ +#!/bin/bash + +# Copyright 2016 Johns Hopkins University (author: Daniel Povey) +# Apache 2.0 + +# This script operates on a data directory, such as in data/train/, and adds the +# reco2dur file if it does not already exist. The file 'reco2dur' maps from +# utterance to the duration of the utterance in seconds. This script works it +# out from the 'segments' file, or, if not present, from the wav.scp file (it +# first tries interrogating the headers, and if this fails, it reads the wave +# files in entirely.) + +frame_shift=0.01 + +. utils/parse_options.sh +. ./path.sh + +if [ $# != 1 ]; then + echo "Usage: $0 [options] " + echo "e.g.:" + echo " $0 data/train" + echo " Options:" + echo " --frame-shift # frame shift in seconds. Only relevant when we are" + echo " # getting duration from feats.scp (default: 0.01). " + exit 1 +fi + +export LC_ALL=C + +data=$1 + +if [ -s $data/reco2dur ] && \ + [ $(cat $data/wav.scp | wc -l) -eq $(cat $data/reco2dur | wc -l) ]; then + echo "$0: $data/reco2dur already exists with the expected length. We won't recompute it." + exit 0; +fi + +# if the wav.scp contains only lines of the form +# utt1 /foo/bar/sph2pipe -f wav /baz/foo.sph | +if cat $data/wav.scp | perl -e ' + while (<>) { s/\|\s*$/ |/; # make sure final | is preceded by space. + @A = split; if (!($#A == 5 && $A[1] =~ m/sph2pipe$/ && + $A[2] eq "-f" && $A[3] eq "wav" && $A[5] eq "|")) { exit(1); } + $utt = $A[0]; $sphere_file = $A[4]; + + if (!open(F, "<$sphere_file")) { die "Error opening sphere file $sphere_file"; } + $sample_rate = -1; $sample_count = -1; + for ($n = 0; $n <= 30; $n++) { + $line = ; + if ($line =~ m/sample_rate -i (\d+)/) { $sample_rate = $1; } + if ($line =~ m/sample_count -i (\d+)/) { $sample_count = $1; } + if ($line =~ m/end_head/) { break; } + } + close(F); + if ($sample_rate == -1 || $sample_count == -1) { + die "could not parse sphere header from $sphere_file"; + } + $duration = $sample_count * 1.0 / $sample_rate; + print "$utt $duration\n"; + } ' > $data/reco2dur; then + echo "$0: successfully obtained utterance lengths from sphere-file headers" +else + echo "$0: could not get utterance lengths from sphere-file headers, using wav-to-duration" + if ! command -v wav-to-duration >/dev/null; then + echo "$0: wav-to-duration is not on your path" + exit 1; + fi + + read_entire_file=false + if cat $data/wav.scp | grep -q 'sox.*speed'; then + read_entire_file=true + echo "$0: reading from the entire wav file to fix the problem caused by sox commands with speed perturbation. It is going to be slow." + echo "... It is much faster if you call get_reco2dur.sh *before* doing the speed perturbation via e.g. perturb_data_dir_speed.sh or " + echo "... perturb_data_dir_speed_3way.sh." + fi + + if ! wav-to-duration --read-entire-file=$read_entire_file scp:$data/wav.scp ark,t:$data/reco2dur 2>&1 | grep -v 'nonzero return status'; then + echo "$0: there was a problem getting the durations; moving $data/reco2dur to $data/.backup/" + mkdir -p $data/.backup/ + mv $data/reco2dur $data/.backup/ + fi +fi + +echo "$0: computed $data/reco2dur" + +exit 0 + diff --git a/egs/wsj/s5/utils/data/get_reco2num_frames.sh b/egs/wsj/s5/utils/data/get_reco2num_frames.sh new file mode 100755 index 00000000000..03ab7b40616 --- /dev/null +++ b/egs/wsj/s5/utils/data/get_reco2num_frames.sh @@ -0,0 +1,28 @@ +#! /bin/bash + +cmd=run.pl +nj=4 + +frame_shift=0.01 +frame_overlap=0.015 + +. utils/parse_options.sh + +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +data=$1 + +if [ -f $data/reco2num_frames ]; then + echo "$0: $data/reco2num_frames already present!" + exit 0; +fi + +utils/data/get_reco2dur.sh $data +awk -v fs=$frame_shift -v fovlp=$frame_overlap \ + '{print $1" "int( ($2 - fovlp) / fs)}' $data/reco2dur > $data/reco2num_frames + +echo "$0: Computed and wrote $data/reco2num_frames" + diff --git a/egs/wsj/s5/utils/data/get_reco2utt.sh b/egs/wsj/s5/utils/data/get_reco2utt.sh new file mode 100755 index 00000000000..6c30f812cfe --- /dev/null +++ b/egs/wsj/s5/utils/data/get_reco2utt.sh @@ -0,0 +1,21 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +if [ $# -ne 1 ]; then + echo "This script creates a reco2utt file in the data directory, " + echo "which is analogous to spk2utt file but with the first column " + echo "as recording instead of speaker." + echo "Usage: get_reco2utt.sh " + echo " e.g.: get_reco2utt.sh data/train" + exit 1 +fi + +data=$1 + +if [ ! -s $data/segments ]; then + utils/data/get_segments_for_data.sh $data > $data/segments +fi + +cut -d ' ' -f 1,2 $data/segments | utils/utt2spk_to_spk2utt.pl > $data/reco2utt diff --git a/egs/wsj/s5/utils/data/get_segments_for_data.sh b/egs/wsj/s5/utils/data/get_segments_for_data.sh index 694acc6a256..7adc4c465d3 100755 --- a/egs/wsj/s5/utils/data/get_segments_for_data.sh +++ b/egs/wsj/s5/utils/data/get_segments_for_data.sh @@ -19,7 +19,7 @@ fi data=$1 -if [ ! -f $data/utt2dur ]; then +if [ ! -s $data/utt2dur ]; then utils/data/get_utt2dur.sh $data 1>&2 || exit 1; fi diff --git a/egs/wsj/s5/utils/data/get_utt2dur.sh b/egs/wsj/s5/utils/data/get_utt2dur.sh index f14fc2c5e81..c415e8dfb81 100755 --- a/egs/wsj/s5/utils/data/get_utt2dur.sh +++ b/egs/wsj/s5/utils/data/get_utt2dur.sh @@ -35,7 +35,7 @@ if [ -s $data/utt2dur ] && \ exit 0; fi -if [ -f $data/segments ]; then +if [ -s $data/segments ]; then echo "$0: working out $data/utt2dur from $data/segments" cat $data/segments | awk '{len=$4-$3; print $1, len;}' > $data/utt2dur elif [ -f $data/wav.scp ]; then diff --git a/egs/wsj/s5/utils/data/get_utt2num_frames.sh b/egs/wsj/s5/utils/data/get_utt2num_frames.sh new file mode 100755 index 00000000000..e2921601ec9 --- /dev/null +++ b/egs/wsj/s5/utils/data/get_utt2num_frames.sh @@ -0,0 +1,42 @@ +#! /bin/bash + +cmd=run.pl +nj=4 + +frame_shift=0.01 +frame_overlap=0.015 + +. utils/parse_options.sh + +if [ $# -ne 1 ]; then + echo "This script writes a file utt2num_frames with the " + echo "number of frames in each utterance as measured based on the " + echo "duration of the utterances (in utt2dur) and the specified " + echo "frame_shift and frame_overlap." + echo "Usage: $0 " + exit 1 +fi + +data=$1 + +if [ -f $data/utt2num_frames ]; then + echo "$0: $data/utt2num_frames already present!" + exit 0; +fi + +if [ ! -f $data/feats.scp ]; then + utils/data/get_utt2dur.sh $data + awk -v fs=$frame_shift -v fovlp=$frame_overlap \ + '{print $1" "int( ($2 - fovlp) / fs)}' $data/utt2dur > $data/utt2num_frames + exit 0 +fi + +utils/split_data.sh $data $nj || exit 1 +$cmd JOB=1:$nj $data/log/get_utt2num_frames.JOB.log \ + feat-to-len scp:$data/split${nj}/JOB/feats.scp ark,t:$data/split$nj/JOB/utt2num_frames || exit 1 + +for n in `seq $nj`; do + cat $data/split$nj/$n/utt2num_frames +done > $data/utt2num_frames + +echo "$0: Computed and wrote $data/utt2num_frames" From 4c646132d8ffc3b880bc180c170ed5f59a369c6e Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 25 Nov 2016 17:33:49 -0500 Subject: [PATCH 031/530] asr_diarization: SAD post-processing --- egs/wsj/s5/steps/segmentation/get_sad_map.py | 156 ++++++++++++++++++ .../internal/convert_ali_to_vad.sh | 54 ++++++ .../internal/post_process_segments.sh | 129 +++++++++++++++ .../post_process_sad_to_segments.sh | 103 ++++++++++++ 4 files changed, 442 insertions(+) create mode 100755 egs/wsj/s5/steps/segmentation/get_sad_map.py create mode 100755 egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh create mode 100755 egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh create mode 100755 egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh diff --git a/egs/wsj/s5/steps/segmentation/get_sad_map.py b/egs/wsj/s5/steps/segmentation/get_sad_map.py new file mode 100755 index 00000000000..9160503c7ad --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/get_sad_map.py @@ -0,0 +1,156 @@ +#! /usr/bin/env python + +"""This script prints a mapping from phones to speech +activity labels +0 for silence, 1 for speech, 2 for noise and 3 for OOV. +Other labels can be optionally defined. +e.g. If 1, 2 and 3 are silence phones, 4, 5 and 6 are speech phones, +the SAD map would be +1 0 +2 0 +3 0 +4 1 +5 1 +6 1. +The silence and speech are read from the phones/silence.txt and +phones/nonsilence.txt from the lang directory. +An initial SAD map can be provided using --init-sad-map to override +the above default mapping of phones. This is useful to say map + or noise phones to separate SAD labels. +""" + +import argparse + + +class StrToBoolAction(argparse.Action): + """ A custom action to convert bools from shell format i.e., true/false + to python format i.e., True/False """ + def __call__(self, parser, namespace, values, option_string=None): + try: + if values == "true": + setattr(namespace, self.dest, True) + elif values == "true": + setattr(namespace, self.dest, False) + else: + raise ValueError + except ValueError: + raise Exception("Unknown value {0} for --{1}".format(values, + self.dest)) + + +class NullstrToNoneAction(argparse.Action): + """ A custom action to convert empty strings passed by shell + to None in python. This is necessary as shell scripts print null + strings when a variable is not specified. We could use the more apt + None in python. """ + def __call__(self, parser, namespace, values, option_string=None): + if values.strip() == "": + setattr(namespace, self.dest, None) + else: + setattr(namespace, self.dest, values) + + +def get_args(): + parser = argparse.ArgumentParser( + description="""This script prints a mapping from phones to speech + activity labels + 0 for silence, 1 for speech, 2 for noise and 3 for OOV. + Other labels can be optionally defined. + e.g. If 1, 2 and 3 are silence phones, 4, 5 and 6 are speech phones, + the SAD map would be + 1 0 + 2 0 + 3 0 + 4 1 + 5 1 + 6 1. + The silence and speech are read from the phones/silence.txt and + phones/nonsilence.txt from the lang directory. + An initial SAD map can be provided using --init-sad-map to override + the above default mapping of phones. This is useful to say map + or noise phones to separate SAD labels. + """) + + parser.add_argument("--init-sad-map", type=str, action=NullstrToNoneAction, + help="""Initial SAD map that will be used to override + the default mapping using phones/silence.txt and + phones/nonsilence.txt. Does not need to specify labels + for all the phones. + e.g. + 3 + 2""") + + noise_group = parser.add_mutually_exclusive_group() + noise_group.add_argument("--noise-phones-file", type=str, + action=NullstrToNoneAction, + help="Map noise phones from file to label 2") + noise_group.add_argument("--noise-phones-list", type=str, + action=NullstrToNoneAction, + help="A colon-separated list of noise phones to " + "map to label 2") + parser.add_argument("--unk", type=str, action=NullstrToNoneAction, + help="""UNK phone, if provided will be mapped to + label 3""") + + parser.add_argument("--map-noise-to-sil", type=str, + action=StrToBoolAction, + choices=["true", "false"], default=False, + help="""Map noise phones to silence before writing the + map. i.e. anything with label 2 is mapped to + label 0.""") + parser.add_argument("--map-unk-to-speech", type=str, + action=StrToBoolAction, + choices=["true", "false"], default=False, + help="""Map UNK phone to speech before writing the map + i.e. anything with label 3 is mapped to label 1.""") + + parser.add_argument("lang_dir") + + args = parser.parse_args() + + return args + + +def main(): + args = get_args() + + sad_map = {} + + for line in open('{0}/phones/nonsilence.txt'.format(args.lang_dir)): + parts = line.strip().split() + sad_map[parts[0]] = 1 + + for line in open('{0}/phones/silence.txt'.format(args.lang_dir)): + parts = line.strip().split() + sad_map[parts[0]] = 0 + + if args.init_sad_map is not None: + for line in open(args.init_sad_map): + parts = line.strip().split() + try: + sad_map[parts[0]] = int(parts[1]) + except Exception: + raise Exception("Invalid line " + line) + + if args.unk is not None: + sad_map[args.unk] = 3 + + noise_phones = {} + if args.noise_phones_file is not None: + for line in open(args.noise_phones_file): + parts = line.strip().split() + noise_phones[parts[0]] = 1 + + if args.noise_phones_list is not None: + for x in args.noise_phones_list.split(":"): + noise_phones[x] = 1 + + for x, l in sad_map.iteritems(): + if l == 2 and args.map_noise_to_sil: + l = 0 + if l == 3 and args.map_unk_to_speech: + l = 1 + print ("{0} {1}".format(x, l)) + +if __name__ == "__main__": + main() diff --git a/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh b/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh new file mode 100755 index 00000000000..353e6d4664e --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh @@ -0,0 +1,54 @@ +#! /bin/bash + +set -o pipefail +set -e +set -u + +. path.sh + +cmd=run.pl + +frame_shift=0.01 +frame_subsampling_factor=1 + +. parse_options.sh + +if [ $# -ne 4 ]; then + echo "This script converts the alignment in the alignment directory " + echo "to speech activity segments based on the provided phone-map." + echo "Usage: $0 exp/tri3_ali data/lang/phones/sad.map exp/tri3_ali_vad" + exit 1 +fi + +ali_dir=$1 +phone_map=$2 +dir=$3 + +for f in $phone_map $ali_dir/ali.1.gz; do + [ ! -f $f ] && echo "$0: Could not find $f" && exit 1 +done + +mkdir -p $dir + +nj=`cat $ali_dir/num_jobs` || exit 1 +echo $nj > $dir/num_jobs + +if [ -f $ali_dir/frame_subsampling_factor ]; then + frame_subsampling_factor=`cat $ali_dir/frame_subsampling_factor` +fi + +ali_frame_shift=`perl -e "print ($frame_shift * $frame_subsampling_factor);"` +ali_frame_overlap=`perl -e "print ($ali_frame_shift * 1.5);"` + +dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}` + +$cmd JOB=1:$nj $dir/log/get_sad.JOB.log \ + segmentation-init-from-ali \ + "ark:gunzip -c ${ali_dir}/ali.JOB.gz | ali-to-phones --per-frame ${ali_dir}/final.mdl ark:- ark:- |" \ + ark:- \| segmentation-copy --label-map=$phone_map ark:- ark:- \| \ + segmentation-post-process --merge-adjacent-segments ark:- \ + ark,scp:$dir/sad_seg.JOB.ark,$dir/sad_seg.JOB.scp + +for n in `seq $nj`; do + cat $dir/sad_seg.$n.scp +done | sort -k1,1 > $dir/sad_seg.scp diff --git a/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh b/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh new file mode 100755 index 00000000000..c2750b4a895 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh @@ -0,0 +1,129 @@ +#! /bin/bash + +# Copyright 2015-16 Vimal Manohar +# Apache 2.0. + +set -e +set -o pipefail +set -u + +. path.sh + +cmd=run.pl +stage=-10 + +# General segmentation options +pad_length=50 # Pad speech segments by this many frames on either side +max_blend_length=10 # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=50 # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=50 # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=1000 # Segments that are longer than this are split into + # overlapping frames. +overlap_length=100 # Overlapping frames when segments are split. + # See the above option. +min_silence_length=30 # Min silence length at which to split very long segments + +frame_shift=0.01 + +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "This script post-processes a speech activity segmentation to create " + echo "a kaldi-style data directory." + echo "See the comments for the kind of post-processing options." + echo "Usage: $0 " + echo " e.g.: $0 data/dev_aspire_whole exp/vad_dev_aspire data/dev_aspire_seg" + exit 1 +fi + +data_dir=$1 +dir=$2 +segmented_data_dir=$3 + +for f in $dir/orig_segmentation.1.gz $data_dir/segments; do + if [ ! -f $f ]; then + echo "$0: Could not find $f" + exit 1 + fi +done + +cat < $dir/segmentation.conf +pad_length=$pad_length # Pad speech segments by this many frames on either side +max_blend_length=$max_blend_length # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=$max_intersegment_length # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=$post_pad_length # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=$max_segment_length # Segments that are longer than this are split into + # overlapping frames. +overlap_length=$overlap_length # Overlapping frames when segments are split. + # See the above option. +min_silence_length=$min_silence_length # Min silence length at which to split very long segments + +frame_shift=$frame_shift +EOF + +nj=`cat $dir/num_jobs` || exit 1 + +if [ $stage -le 1 ]; then + rm -r $segmented_data_dir || true + utils/data/convert_data_dir_to_whole.sh $data_dir $segmented_data_dir || exit 1 + rm $segmented_data_dir/text +fi + +if [ $stage -le 2 ]; then + # Post-process the orignal SAD segmentation using the following steps: + # 1) blend short speech segments of less than $max_blend_length frames + # into silence + # 2) Remove all silence frames and widen speech segments by padding + # $pad_length frames + # 3) Merge adjacent segments that have an intersegment length of less than + # $max_intersegment_length frames + # 4) Widen speech segments again after merging + # 5) Split segments into segments of $max_segment_length at the point where + # the original segmentation had silence + # 6) Split segments into overlapping segments of max length + # $max_segment_length and overlap $overlap_length + # 7) Convert segmentation to kaldi segments and utt2spk + $cmd JOB=1:$nj $dir/log/post_process_segmentation.JOB.log \ + gunzip -c $dir/orig_segmentation.JOB.gz \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=0 ark:- ark:- \| \ + segmentation-post-process --max-blend-length=$max_blend_length --blend-short-segments-class=1 ark:- ark:- \| \ + segmentation-post-process --remove-labels=0 --pad-label=1 --pad-length=$pad_length ark:- ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=$max_intersegment_length ark:- ark:- \| \ + segmentation-post-process --pad-label=1 --pad-length=$post_pad_length ark:- ark:- \| \ + segmentation-split-segments --alignments="ark,s,cs:gunzip -c $dir/orig_segmentation.JOB.gz | segmentation-to-ali ark:- ark:- |" \ + --max-segment-length=$max_segment_length --min-alignment-chunk-length=$min_silence_length --ali-label=0 ark:- ark:- \| \ + segmentation-split-segments \ + --max-segment-length=$max_segment_length --overlap-length=$overlap_length ark:- ark:- \| \ + segmentation-to-segments --frame-shift=$frame_shift ark:- \ + ark,t:$dir/utt2spk.JOB $dir/segments.JOB || exit 1 +fi + +for n in `seq $nj`; do + cat $dir/utt2spk.$n +done > $segmented_data_dir/utt2spk + +for n in `seq $nj`; do + cat $dir/segments.$n +done > $segmented_data_dir/segments + +if [ ! -s $segmented_data_dir/utt2spk ] || [ ! -s $segmented_data_dir/segments ]; then + echo "$0: Segmentation failed to generate segments or utt2spk!" + exit 1 +fi + +utils/utt2spk_to_spk2utt.pl $segmented_data_dir/utt2spk > $segmented_data_dir/spk2utt || exit 1 +utils/fix_data_dir.sh $segmented_data_dir + +if [ ! -s $segmented_data_dir/utt2spk ] || [ ! -s $segmented_data_dir/segments ]; then + echo "$0: Segmentation failed to generate segments or utt2spk!" + exit 1 +fi + diff --git a/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh b/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh new file mode 100755 index 00000000000..f4011f20a03 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh @@ -0,0 +1,103 @@ +#! /bin/bash + +# Copyright 2015 Vimal Manohar +# Apache 2.0. + +set -e -o pipefail -u +. path.sh + +cmd=run.pl +stage=-10 + +segmentation_config=conf/segmentation.conf +nj=18 + +frame_shift=0.01 +weight_threshold=0.5 +ali_suffix=_acwt0.1 + +phone_map= + +. utils/parse_options.sh + +if [ $# -ne 5 ] && [ $# -ne 4 ]; then + echo "This script converts an alignment directory containing per-frame SAD " + echo "labels or per-frame speech probabilities into kaldi-style " + echo "segmented data directory. " + echo "This script first converts the per-frame labels or weights into " + echo "segmentation and then calls " + echo "steps/segmentation/internal/post_process_sad_to_segments.sh, " + echo "which does the actual post-processing step." + echo "Usage: $0 ( |) " + echo " e.g.: $0 data/dev_aspire_whole exp/vad_dev_aspire data/dev_aspire_seg" + exit 1 +fi + +data_dir=$1 +vad_dir= + +if [ $# -eq 5 ]; then + lang=$2 + vad_dir=$3 + shift; shift; shift +else + weights_scp=$2 + shift; shift +fi + +dir=$1 +segmented_data_dir=$2 + +cat $data_dir/segments | awk '{print $1" "$2}' | \ + utils/utt2spk_to_spk2utt.pl > $data_dir/reco2utt + +utils/split_data.sh $data_dir $nj + +for n in `seq $nj`; do + cat $data_dir/split$nj/$n/segments | awk '{print $1" "$2}' | \ + utils/utt2spk_to_spk2utt.pl > $data_dir/split$nj/$n/reco2utt +done + + +mkdir -p $dir + +if [ ! -z "$vad_dir" ]; then + nj=`cat $vad_dir/num_jobs` || exit 1 + + if [ -z "$phone_map" ]; then + phone_map=$dir/phone_map + + { + cat $lang/phones/silence.int | awk '{print $1" 0"}'; + cat $lang/phones/nonsilence.int | awk '{print $1" 1"}'; + } | sort -k1,1 -n > $dir/phone_map + fi + + if [ $stage -le 0 ]; then + # Convert the original SAD into segmentation + $cmd JOB=1:$nj $dir/log/segmentation.JOB.log \ + segmentation-init-from-ali --reco2utt-rspecifier="ark,t:$data_dir/split$nj/JOB/reco2utt" \ + --segmentation-rspecifier="ark:segmentation-init-from-segments --shift-to-zero=false --frame-shift=$frame_shift $data_dir/split$nj/JOB/segments ark:- |" \ + "ark:gunzip -c $vad_dir/ali${ali_suffix}.JOB.gz |" ark:- \| \ + segmentation-copy --label-map=$phone_map ark:- \ + "ark:| gzip -c > $dir/orig_segmentation.JOB.gz" + fi +else + for n in `seq $nj`; do + utils/filter_scp.pl $data_dir/split$nj/$n/reco2utt $weights_scp > $dir/weights.$n.scp + done + + $cmd JOB=1:$nj $dir/log/weights_to_segments.JOB.log \ + copy-vector scp:$dir/weights.JOB.scp ark,t:- \| \ + awk -v t=$weight_threshold '{printf $1; for (i=3; i < NF; i++) { if ($i >= t) printf (" 1"); else printf (" 0"); }; print "";}' \| \ + segmentation-init-from-ali --reco2utt-rspecifier="ark,t:$data_dir/split$nj/JOB/reco2utt" \ + --segmentation-rspecifier="ark:segmentation-init-from-segments --shift-to-zero=false --frame-shift=$frame_shift $data_dir/split$nj/JOB/segments ark:- |" \ + ark,t:- "ark:| gzip -c > $dir/orig_segmentation.JOB.gz" +fi + +echo $nj > $dir/num_jobs + +steps/segmentation/internal/post_process_segments.sh \ + --stage $stage --cmd "$cmd" \ + --config $segmentation_config --frame-shift $frame_shift \ + $data_dir $dir $segmented_data_dir From 1913caf15d255feef6dbe4355764848a4496a6cd Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 01:58:27 -0500 Subject: [PATCH 032/530] asr_diarization: Modify modify_speaker_info to add --respect-recording-info option --- egs/wsj/s5/utils/data/modify_speaker_info.sh | 25 ++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/egs/wsj/s5/utils/data/modify_speaker_info.sh b/egs/wsj/s5/utils/data/modify_speaker_info.sh index f75e9be5f67..e42f0df551d 100755 --- a/egs/wsj/s5/utils/data/modify_speaker_info.sh +++ b/egs/wsj/s5/utils/data/modify_speaker_info.sh @@ -37,6 +37,7 @@ utts_per_spk_max=-1 seconds_per_spk_max=-1 respect_speaker_info=true +respect_recording_info=true # end configuration section . utils/parse_options.sh @@ -93,10 +94,26 @@ else utt2dur_opt= fi -utils/data/internal/modify_speaker_info.py \ - $utt2dur_opt --respect-speaker-info=$respect_speaker_info \ - --utts-per-spk-max=$utts_per_spk_max --seconds-per-spk-max=$seconds_per_spk_max \ - <$srcdir/utt2spk >$destdir/utt2spk +if ! $respect_speaker_info && $respect_recording_info; then + if [ -f $srcdir/segments ]; then + cat $srcdir/segments | awk '{print $1" "$2}' | \ + utils/data/internal/modify_speaker_info.py \ + $utt2dur_opt --respect-speaker-info=true \ + --utts-per-spk-max=$utts_per_spk_max --seconds-per-spk-max=$seconds_per_spk_max \ + >$destdir/utt2spk + else + cat $srcdir/wav.scp | awk '{print $1" "$2}' | \ + utils/data/internal/modify_speaker_info.py \ + $utt2dur_opt --respect-speaker-info=true \ + --utts-per-spk-max=$utts_per_spk_max --seconds-per-spk-max=$seconds_per_spk_max \ + >$destdir/utt2spk + fi +else + utils/data/internal/modify_speaker_info.py \ + $utt2dur_opt --respect-speaker-info=$respect_speaker_info \ + --utts-per-spk-max=$utts_per_spk_max --seconds-per-spk-max=$seconds_per_spk_max \ + <$srcdir/utt2spk >$destdir/utt2spk +fi utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt From bd98bea8b40e16a13b12de4fdb204442d341bb25 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 02:00:15 -0500 Subject: [PATCH 033/530] asr_diarization: Modify subset_data_dir.sh, copy_data_dir.sh to copy reco2file_and_channel and modify subset_data_dir.sh to add more options --- egs/wsj/s5/utils/copy_data_dir.sh | 7 ++++--- egs/wsj/s5/utils/fix_data_dir.sh | 9 +++++++-- egs/wsj/s5/utils/subset_data_dir.sh | 5 +++++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/egs/wsj/s5/utils/copy_data_dir.sh b/egs/wsj/s5/utils/copy_data_dir.sh index 008233daf62..222bc708527 100755 --- a/egs/wsj/s5/utils/copy_data_dir.sh +++ b/egs/wsj/s5/utils/copy_data_dir.sh @@ -83,15 +83,16 @@ fi if [ -f $srcdir/segments ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments cp $srcdir/wav.scp $destdir - if [ -f $srcdir/reco2file_and_channel ]; then - cp $srcdir/reco2file_and_channel $destdir/ - fi else # no segments->wav indexed by utt. if [ -f $srcdir/wav.scp ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp fi fi +if [ -f $srcdir/reco2file_and_channel ]; then + cp $srcdir/reco2file_and_channel $destdir/ +fi + if [ -f $srcdir/text ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text fi diff --git a/egs/wsj/s5/utils/fix_data_dir.sh b/egs/wsj/s5/utils/fix_data_dir.sh index 0333d628544..33e710a605f 100755 --- a/egs/wsj/s5/utils/fix_data_dir.sh +++ b/egs/wsj/s5/utils/fix_data_dir.sh @@ -6,6 +6,11 @@ # It puts the original contents of data-dir into # data-dir/.backup +utt_extra_files= +spk_extra_files= + +. utils/parse_options.sh + if [ $# != 1 ]; then echo "Usage: utils/data/fix_data_dir.sh " echo "e.g.: utils/data/fix_data_dir.sh data/train" @@ -110,7 +115,7 @@ function filter_speakers { filter_file $tmpdir/speakers $data/spk2utt utils/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk - for s in cmvn.scp spk2gender; do + for s in cmvn.scp spk2gender $spk_extra_files; do f=$data/$s if [ -f $f ]; then filter_file $tmpdir/speakers $f @@ -158,7 +163,7 @@ function filter_utts { fi fi - for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2dur utt2num_frames $maybe_wav; do + for x in utt2spk utt2uniq feats.scp vad.scp text segments utt2lang utt2dur utt2num_frames $maybe_wav $utt_extra_files; do if [ -f $data/$x ]; then cp $data/$x $data/.backup/$x if ! cmp -s $data/$x <( utils/filter_scp.pl $tmpdir/utts $data/$x ) ; then diff --git a/egs/wsj/s5/utils/subset_data_dir.sh b/egs/wsj/s5/utils/subset_data_dir.sh index 5fe3217ddad..9533d0216c9 100755 --- a/egs/wsj/s5/utils/subset_data_dir.sh +++ b/egs/wsj/s5/utils/subset_data_dir.sh @@ -108,6 +108,7 @@ function do_filtering { [ -f $srcdir/vad.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp [ -f $srcdir/utt2lang ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/utt2lang >$destdir/utt2lang [ -f $srcdir/utt2dur ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/utt2dur >$destdir/utt2dur + [ -f $srcdir/utt2uniq ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/utt2uniq >$destdir/utt2uniq [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp [ -f $srcdir/spk2warp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2warp >$destdir/spk2warp [ -f $srcdir/utt2warp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/utt2warp >$destdir/utt2warp @@ -126,6 +127,10 @@ function do_filtering { [ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm rm $destdir/reco + else + awk '{print $1;}' $destdir/wav.scp | sort | uniq > $destdir/reco + [ -f $srcdir/reco2file_and_channel ] && \ + utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel fi srcutts=`cat $srcdir/utt2spk | wc -l` destutts=`cat $destdir/utt2spk | wc -l` From 64d863ac0c6c950c2da04fe6370b8dd18544feff Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 00:04:34 -0500 Subject: [PATCH 034/530] asr_diarization: Moved evaluate_segmentation.pl to steps/segmentation --- .../local/resegment/evaluate_segmentation.pl | 199 +----------------- .../segmentation/evaluate_segmentation.pl | 198 +++++++++++++++++ 2 files changed, 199 insertions(+), 198 deletions(-) mode change 100755 => 120000 egs/babel/s5c/local/resegment/evaluate_segmentation.pl create mode 100755 egs/wsj/s5/steps/segmentation/evaluate_segmentation.pl diff --git a/egs/babel/s5c/local/resegment/evaluate_segmentation.pl b/egs/babel/s5c/local/resegment/evaluate_segmentation.pl deleted file mode 100755 index 06a762d7762..00000000000 --- a/egs/babel/s5c/local/resegment/evaluate_segmentation.pl +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env perl - -# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar -# Apache 2.0 - -################################################################################ -# -# This script was written to check the goodness of automatic segmentation tools -# It assumes input in the form of two Kaldi segments files, i.e. a file each of -# whose lines contain four space-separated values: -# -# UtteranceID FileID StartTime EndTime -# -# It computes # missed frames, # false positives and # overlapping frames. -# -################################################################################ - -if ($#ARGV == 1) { - $ReferenceSegmentation = $ARGV[0]; - $HypothesizedSegmentation = $ARGV[1]; - printf STDERR ("Comparing reference segmentation\n\t%s\nwith proposed segmentation\n\t%s\n", - $ReferenceSegmentation, - $HypothesizedSegmentation); -} else { - printf STDERR "This program compares the reference segmenation with the proposted segmentation\n"; - printf STDERR "Usage: $0 reference_segments_filename proposed_segments_filename\n"; - printf STDERR "e.g. $0 data/dev10h/segments data/dev10h.seg/segments\n"; - exit (0); -} - -################################################################################ -# First read the reference segmentation, and -# store the start- and end-times of all segments in each file. -################################################################################ - -open (SEGMENTS, "cat $ReferenceSegmentation | sort -k2,2 -k3n,3 -k4n,4 |") - || die "Unable to open $ReferenceSegmentation"; -$numLines = 0; -while ($line=) { - chomp $line; - @field = split("[ \t]+", $line); - unless ($#field == 3) { - exit (1); - printf STDERR "Skipping unparseable line in file $ReferenceSegmentation\n\t$line\n"; - next; - } - $fileID = $field[1]; - unless (exists $firstSeg{$fileID}) { - $firstSeg{$fileID} = $numLines; - $actualSpeech{$fileID} = 0.0; - $hypothesizedSpeech{$fileID} = 0.0; - $foundSpeech{$fileID} = 0.0; - $falseAlarm{$fileID} = 0.0; - $minStartTime{$fileID} = 0.0; - $maxEndTime{$fileID} = 0.0; - } - $refSegName[$numLines] = $field[0]; - $refSegStart[$numLines] = $field[2]; - $refSegEnd[$numLines] = $field[3]; - $actualSpeech{$fileID} += ($field[3]-$field[2]); - $minStartTime{$fileID} = $field[2] if ($minStartTime{$fileID}>$field[2]); - $maxEndTime{$fileID} = $field[3] if ($maxEndTime{$fileID}<$field[3]); - $lastSeg{$fileID} = $numLines; - ++$numLines; -} -close(SEGMENTS); -print STDERR "Read $numLines segments from $ReferenceSegmentation\n"; - -################################################################################ -# Process hypothesized segments sequentially, and gather speech/nonspeech stats -################################################################################ - -open (SEGMENTS, "cat $HypothesizedSegmentation | sort -k2,2 -k1,1 |") - # Kaldi segments files are sorted by UtteranceID, but we re-sort them here - # so that all segments of a file are read together, sorted by start-time. - || die "Unable to open $HypothesizedSegmentation"; -$numLines = 0; -$totalHypSpeech = 0.0; -$totalFoundSpeech = 0.0; -$totalFalseAlarm = 0.0; -$numShortSegs = 0; -$numLongSegs = 0; -while ($line=) { - chomp $line; - @field = split("[ \t]+", $line); - unless ($#field == 3) { - exit (1); - printf STDERR "Skipping unparseable line in file $HypothesizedSegmentation\n\t$line\n"; - next; - } - $fileID = $field[1]; - $segStart = $field[2]; - $segEnd = $field[3]; - if (exists $firstSeg{$fileID}) { - # This FileID exists in the reference segmentation - # So gather statistics for this UtteranceID - $hypothesizedSpeech{$fileID} += ($segEnd-$segStart); - $totalHypSpeech += ($segEnd-$segStart); - if (($segStart>=$maxEndTime{$fileID}) || ($segEnd<=$minStartTime{$fileID})) { - # This entire segment is a false alarm - $falseAlarm{$fileID} += ($segEnd-$segStart); - $totalFalseAlarm += ($segEnd-$segStart); - } else { - # This segment may overlap one or more reference segments - $p = $firstSeg{$fileID}; - while ($refSegEnd[$p]<=$segStart) { - ++$p; - } - # The overlap, if any, begins at the reference segment p - $q = $lastSeg{$fileID}; - while ($refSegStart[$q]>=$segEnd) { - --$q; - } - # The overlap, if any, ends at the reference segment q - if ($q<$p) { - # This segment sits entirely in the nonspeech region - # between the two reference speech segments q and p - $falseAlarm{$fileID} += ($segEnd-$segStart); - $totalFalseAlarm += ($segEnd-$segStart); - } else { - if (($segEnd-$segStart)<0.20) { - # For diagnosing Pascal's VAD segmentation - print STDOUT "Found short speech region $line\n"; - ++$numShortSegs; - } elsif (($segEnd-$segStart)>60.0) { - ++$numLongSegs; - # For diagnosing Pascal's VAD segmentation - print STDOUT "Found long speech region $line\n"; - } - # There is some overlap with segments p through q - for ($s=$p; $s<=$q; ++$s) { - if ($segStart<$refSegStart[$s]) { - # There is a leading false alarm portion before s - $falseAlarm{$fileID} += ($refSegStart[$s]-$segStart); - $totalFalseAlarm += ($refSegStart[$s]-$segStart); - $segStart=$refSegStart[$s]; - } - $speechPortion = ($refSegEnd[$s]<$segEnd) ? - ($refSegEnd[$s]-$segStart) : ($segEnd-$segStart); - $foundSpeech{$fileID} += $speechPortion; - $totalFoundSpeech += $speechPortion; - $segStart=$refSegEnd[$s]; - } - if ($segEnd>$segStart) { - # There is a trailing false alarm portion after q - $falseAlarm{$fileID} += ($segEnd-$segStart); - $totalFalseAlarm += ($segEnd-$segStart); - } - } - } - } else { - # This FileID does not exist in the reference segmentation - # So all this speech counts as a false alarm - exit (1); - printf STDERR ("Unexpected fileID in hypothesized segments: %s", $fileID); - $totalFalseAlarm += ($segEnd-$segStart); - } - ++$numLines; -} -close(SEGMENTS); -print STDERR "Read $numLines segments from $HypothesizedSegmentation\n"; - -################################################################################ -# Now that all hypothesized segments have been processed, compute needed stats -################################################################################ - -$totalActualSpeech = 0.0; -$totalNonSpeechEst = 0.0; # This is just a crude estimate of total nonspeech. -foreach $fileID (sort keys %actualSpeech) { - $totalActualSpeech += $actualSpeech{$fileID}; - $totalNonSpeechEst += $maxEndTime{$fileID} - $actualSpeech{$fileID}; - ####################################################################### - # Print file-wise statistics to STDOUT; can pipe to /dev/null is needed - ####################################################################### - printf STDOUT ("%s: %.2f min actual speech, %.2f min hypothesized: %.2f min overlap (%d\%), %.2f min false alarm (~%d\%)\n", - $fileID, - ($actualSpeech{$fileID}/60.0), - ($hypothesizedSpeech{$fileID}/60.0), - ($foundSpeech{$fileID}/60.0), - ($foundSpeech{$fileID}*100/($actualSpeech{$fileID}+0.01)), - ($falseAlarm{$fileID}/60.0), - ($falseAlarm{$fileID}*100/($maxEndTime{$fileID}-$actualSpeech{$fileID}+0.01))); -} - -################################################################################ -# Finally, we have everything needed to report the segmentation statistics. -################################################################################ - -printf STDERR ("------------------------------------------------------------------------\n"); -printf STDERR ("TOTAL: %.2f hrs actual speech, %.2f hrs hypothesized: %.2f hrs overlap (%d\%), %.2f hrs false alarm (~%d\%)\n", - ($totalActualSpeech/3600.0), - ($totalHypSpeech/3600.0), - ($totalFoundSpeech/3600.0), - ($totalFoundSpeech*100/($totalActualSpeech+0.000001)), - ($totalFalseAlarm/3600.0), - ($totalFalseAlarm*100/($totalNonSpeechEst+0.000001))); -printf STDERR ("\t$numShortSegs segments < 0.2 sec and $numLongSegs segments > 60.0 sec\n"); -printf STDERR ("------------------------------------------------------------------------\n"); diff --git a/egs/babel/s5c/local/resegment/evaluate_segmentation.pl b/egs/babel/s5c/local/resegment/evaluate_segmentation.pl new file mode 120000 index 00000000000..09276466c2b --- /dev/null +++ b/egs/babel/s5c/local/resegment/evaluate_segmentation.pl @@ -0,0 +1 @@ +../../steps/segmentation/evaluate_segmentation.py \ No newline at end of file diff --git a/egs/wsj/s5/steps/segmentation/evaluate_segmentation.pl b/egs/wsj/s5/steps/segmentation/evaluate_segmentation.pl new file mode 100755 index 00000000000..06a762d7762 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/evaluate_segmentation.pl @@ -0,0 +1,198 @@ +#!/usr/bin/env perl + +# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar +# Apache 2.0 + +################################################################################ +# +# This script was written to check the goodness of automatic segmentation tools +# It assumes input in the form of two Kaldi segments files, i.e. a file each of +# whose lines contain four space-separated values: +# +# UtteranceID FileID StartTime EndTime +# +# It computes # missed frames, # false positives and # overlapping frames. +# +################################################################################ + +if ($#ARGV == 1) { + $ReferenceSegmentation = $ARGV[0]; + $HypothesizedSegmentation = $ARGV[1]; + printf STDERR ("Comparing reference segmentation\n\t%s\nwith proposed segmentation\n\t%s\n", + $ReferenceSegmentation, + $HypothesizedSegmentation); +} else { + printf STDERR "This program compares the reference segmenation with the proposted segmentation\n"; + printf STDERR "Usage: $0 reference_segments_filename proposed_segments_filename\n"; + printf STDERR "e.g. $0 data/dev10h/segments data/dev10h.seg/segments\n"; + exit (0); +} + +################################################################################ +# First read the reference segmentation, and +# store the start- and end-times of all segments in each file. +################################################################################ + +open (SEGMENTS, "cat $ReferenceSegmentation | sort -k2,2 -k3n,3 -k4n,4 |") + || die "Unable to open $ReferenceSegmentation"; +$numLines = 0; +while ($line=) { + chomp $line; + @field = split("[ \t]+", $line); + unless ($#field == 3) { + exit (1); + printf STDERR "Skipping unparseable line in file $ReferenceSegmentation\n\t$line\n"; + next; + } + $fileID = $field[1]; + unless (exists $firstSeg{$fileID}) { + $firstSeg{$fileID} = $numLines; + $actualSpeech{$fileID} = 0.0; + $hypothesizedSpeech{$fileID} = 0.0; + $foundSpeech{$fileID} = 0.0; + $falseAlarm{$fileID} = 0.0; + $minStartTime{$fileID} = 0.0; + $maxEndTime{$fileID} = 0.0; + } + $refSegName[$numLines] = $field[0]; + $refSegStart[$numLines] = $field[2]; + $refSegEnd[$numLines] = $field[3]; + $actualSpeech{$fileID} += ($field[3]-$field[2]); + $minStartTime{$fileID} = $field[2] if ($minStartTime{$fileID}>$field[2]); + $maxEndTime{$fileID} = $field[3] if ($maxEndTime{$fileID}<$field[3]); + $lastSeg{$fileID} = $numLines; + ++$numLines; +} +close(SEGMENTS); +print STDERR "Read $numLines segments from $ReferenceSegmentation\n"; + +################################################################################ +# Process hypothesized segments sequentially, and gather speech/nonspeech stats +################################################################################ + +open (SEGMENTS, "cat $HypothesizedSegmentation | sort -k2,2 -k1,1 |") + # Kaldi segments files are sorted by UtteranceID, but we re-sort them here + # so that all segments of a file are read together, sorted by start-time. + || die "Unable to open $HypothesizedSegmentation"; +$numLines = 0; +$totalHypSpeech = 0.0; +$totalFoundSpeech = 0.0; +$totalFalseAlarm = 0.0; +$numShortSegs = 0; +$numLongSegs = 0; +while ($line=) { + chomp $line; + @field = split("[ \t]+", $line); + unless ($#field == 3) { + exit (1); + printf STDERR "Skipping unparseable line in file $HypothesizedSegmentation\n\t$line\n"; + next; + } + $fileID = $field[1]; + $segStart = $field[2]; + $segEnd = $field[3]; + if (exists $firstSeg{$fileID}) { + # This FileID exists in the reference segmentation + # So gather statistics for this UtteranceID + $hypothesizedSpeech{$fileID} += ($segEnd-$segStart); + $totalHypSpeech += ($segEnd-$segStart); + if (($segStart>=$maxEndTime{$fileID}) || ($segEnd<=$minStartTime{$fileID})) { + # This entire segment is a false alarm + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } else { + # This segment may overlap one or more reference segments + $p = $firstSeg{$fileID}; + while ($refSegEnd[$p]<=$segStart) { + ++$p; + } + # The overlap, if any, begins at the reference segment p + $q = $lastSeg{$fileID}; + while ($refSegStart[$q]>=$segEnd) { + --$q; + } + # The overlap, if any, ends at the reference segment q + if ($q<$p) { + # This segment sits entirely in the nonspeech region + # between the two reference speech segments q and p + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } else { + if (($segEnd-$segStart)<0.20) { + # For diagnosing Pascal's VAD segmentation + print STDOUT "Found short speech region $line\n"; + ++$numShortSegs; + } elsif (($segEnd-$segStart)>60.0) { + ++$numLongSegs; + # For diagnosing Pascal's VAD segmentation + print STDOUT "Found long speech region $line\n"; + } + # There is some overlap with segments p through q + for ($s=$p; $s<=$q; ++$s) { + if ($segStart<$refSegStart[$s]) { + # There is a leading false alarm portion before s + $falseAlarm{$fileID} += ($refSegStart[$s]-$segStart); + $totalFalseAlarm += ($refSegStart[$s]-$segStart); + $segStart=$refSegStart[$s]; + } + $speechPortion = ($refSegEnd[$s]<$segEnd) ? + ($refSegEnd[$s]-$segStart) : ($segEnd-$segStart); + $foundSpeech{$fileID} += $speechPortion; + $totalFoundSpeech += $speechPortion; + $segStart=$refSegEnd[$s]; + } + if ($segEnd>$segStart) { + # There is a trailing false alarm portion after q + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } + } + } + } else { + # This FileID does not exist in the reference segmentation + # So all this speech counts as a false alarm + exit (1); + printf STDERR ("Unexpected fileID in hypothesized segments: %s", $fileID); + $totalFalseAlarm += ($segEnd-$segStart); + } + ++$numLines; +} +close(SEGMENTS); +print STDERR "Read $numLines segments from $HypothesizedSegmentation\n"; + +################################################################################ +# Now that all hypothesized segments have been processed, compute needed stats +################################################################################ + +$totalActualSpeech = 0.0; +$totalNonSpeechEst = 0.0; # This is just a crude estimate of total nonspeech. +foreach $fileID (sort keys %actualSpeech) { + $totalActualSpeech += $actualSpeech{$fileID}; + $totalNonSpeechEst += $maxEndTime{$fileID} - $actualSpeech{$fileID}; + ####################################################################### + # Print file-wise statistics to STDOUT; can pipe to /dev/null is needed + ####################################################################### + printf STDOUT ("%s: %.2f min actual speech, %.2f min hypothesized: %.2f min overlap (%d\%), %.2f min false alarm (~%d\%)\n", + $fileID, + ($actualSpeech{$fileID}/60.0), + ($hypothesizedSpeech{$fileID}/60.0), + ($foundSpeech{$fileID}/60.0), + ($foundSpeech{$fileID}*100/($actualSpeech{$fileID}+0.01)), + ($falseAlarm{$fileID}/60.0), + ($falseAlarm{$fileID}*100/($maxEndTime{$fileID}-$actualSpeech{$fileID}+0.01))); +} + +################################################################################ +# Finally, we have everything needed to report the segmentation statistics. +################################################################################ + +printf STDERR ("------------------------------------------------------------------------\n"); +printf STDERR ("TOTAL: %.2f hrs actual speech, %.2f hrs hypothesized: %.2f hrs overlap (%d\%), %.2f hrs false alarm (~%d\%)\n", + ($totalActualSpeech/3600.0), + ($totalHypSpeech/3600.0), + ($totalFoundSpeech/3600.0), + ($totalFoundSpeech*100/($totalActualSpeech+0.000001)), + ($totalFalseAlarm/3600.0), + ($totalFalseAlarm*100/($totalNonSpeechEst+0.000001))); +printf STDERR ("\t$numShortSegs segments < 0.2 sec and $numLongSegs segments > 60.0 sec\n"); +printf STDERR ("------------------------------------------------------------------------\n"); From 7478ae14ed98b95619b5f75e3549b3b5c34380d2 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 01:34:26 -0500 Subject: [PATCH 035/530] asr_diarization: Modify perturb_data_dir_volume.sh to write reco2vol and have limits --- .../s5/utils/data/perturb_data_dir_volume.sh | 60 ++++++++++++++++--- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh b/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh index bc76939643c..185c7abf426 100755 --- a/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh +++ b/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh @@ -7,6 +7,11 @@ # the wav.scp to perturb the volume (typically useful for training data when # using systems that don't have cepstral mean normalization). +reco2vol= +force=false +scale_low=0.125 +scale_high=2 + . utils/parse_options.sh if [ $# != 1 ]; then @@ -25,30 +30,67 @@ if [ ! -f $data/wav.scp ]; then exit 1 fi -if grep -q "sox --vol" $data/wav.scp; then +if ! $force && grep -q "sox --vol" $data/wav.scp; then echo "$0: It looks like the data was already volume perturbed. Not doing anything." exit 0 fi -cat $data/wav.scp | python -c " +if [ -z "$reco2vol" ]; then + cat $data/wav.scp | python -c " import sys, os, subprocess, re, random random.seed(0) -scale_low = 1.0/8 -scale_high = 2.0 +scale_low = $scale_low +scale_high = $scale_high +volume_writer = open('$data/reco2vol', 'w') +for line in sys.stdin.readlines(): + if len(line.strip()) == 0: + continue + # Handle three cases of rxfilenames appropriately; 'input piped command', 'file offset' and 'filename' + vol = random.uniform(scale_low, scale_high) + + parts = line.strip().split() + if line.strip()[-1] == '|': + print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), vol) + elif re.search(':[0-9]+$', line.strip()) is not None: + print '{id} wav-copy {wav} - | sox --vol {vol} -t wav - -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = vol) + else: + print '{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = vol) + volume_writer.write('{id} {vol}\n'.format(id = parts[0], vol = vol)) +" > $data/wav.scp_scaled || exit 1; +else + cat $data/wav.scp | python -c " +import sys, os, subprocess, re +volumes = {} +for line in open('$reco2vol'): + if len(line.strip()) == 0: + continue + parts = line.strip().split() + volumes[parts[0]] = float(parts[1]) + for line in sys.stdin.readlines(): if len(line.strip()) == 0: continue # Handle three cases of rxfilenames appropriately; 'input piped command', 'file offset' and 'filename' + + parts = line.strip().split() + id = parts[0] + + if id not in volumes: + raise Exception('Could not find volume for id {id}'.format(id = id)) + + vol = volumes[id] + if line.strip()[-1] == '|': - print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) + print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), vol) elif re.search(':[0-9]+$', line.strip()) is not None: - parts = line.split() - print '{id} wav-copy {wav} - | sox --vol {vol} -t wav - -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high)) + print '{id} wav-copy {wav} - | sox --vol {vol} -t wav - -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = vol) else: - parts = line.split() - print '{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high)) + print '{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = vol) " > $data/wav.scp_scaled || exit 1; + cp $reco2vol $data/reco2vol +fi + len1=$(cat $data/wav.scp | wc -l) len2=$(cat $data/wav.scp_scaled | wc -l) if [ "$len1" != "$len2" ]; then From aaa35ff772a51cf14bedb60ffa3b68dea60fba02 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 01:36:35 -0500 Subject: [PATCH 036/530] asr_diarization: Get reverberated version of scp --- .../s5/steps/segmentation/get_reverb_scp.pl | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100755 egs/wsj/s5/steps/segmentation/get_reverb_scp.pl diff --git a/egs/wsj/s5/steps/segmentation/get_reverb_scp.pl b/egs/wsj/s5/steps/segmentation/get_reverb_scp.pl new file mode 100755 index 00000000000..57f63b517f2 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/get_reverb_scp.pl @@ -0,0 +1,58 @@ +#! /usr/bin/perl +use strict; +use warnings; + +my $field_begin = -1; +my $field_end = -1; + +if ($ARGV[0] eq "-f") { + shift @ARGV; + my $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } +} + +if (scalar @ARGV != 1 && scalar @ARGV != 2 ) { + print "Usage: get_reverb_scp.pl [-f -] [] < input_scp > output_scp\n"; + exit(1); +} + +my $num_reps = $ARGV[0]; +my $prefix = "rev"; + +if (scalar @ARGV == 2) { + $prefix = $ARGV[1]; +} + +while () { + chomp; + my @A = split; + + for (my $i = 1; $i <= $num_reps; $i++) { + for (my $pos = 0; $pos <= $#A; $pos++) { + my $a = $A[$pos]; + if ( ($field_begin < 0 || $pos >= $field_begin) + && ($field_end < 0 || $pos <= $field_end) ) { + if ($a =~ m/^(sp[0-9.]+-)(.+)$/) { + $a = $1 . "$prefix" . $i . "_" . $2; + } else { + $a = "$prefix" . $i . "_" . $a; + } + } + print $a . " "; + } + print "\n"; + } +} From e04f86f04d8bd993f3300a2619cc643aef46b15c Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 01:36:13 -0500 Subject: [PATCH 037/530] asr_diarization: Adding script split_data_on_reco.sh --- .../steps/segmentation/split_data_on_reco.sh | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100755 egs/wsj/s5/steps/segmentation/split_data_on_reco.sh diff --git a/egs/wsj/s5/steps/segmentation/split_data_on_reco.sh b/egs/wsj/s5/steps/segmentation/split_data_on_reco.sh new file mode 100755 index 00000000000..4c167d99a1e --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/split_data_on_reco.sh @@ -0,0 +1,29 @@ +#! /bin/bash + +set -e + +if [ $# -ne 3 ]; then + echo "Usage: split_data_on_reco.sh " + exit 1 +fi + +ref_data=$1 +data=$2 +nj=$3 + +utils/data/get_reco2utt.sh $ref_data +utils/data/get_reco2utt.sh $data + +utils/split_data.sh --per-reco $ref_data $nj + +for n in `seq $nj`; do + srn=$ref_data/split${nj}reco/$n + dsn=$data/split${nj}reco/$n + + mkdir -p $dsn + + utils/data/get_reco2utt.sh $srn + utils/filter_scp.pl $srn/reco2utt $data/reco2utt > $dsn/reco2utt + utils/spk2utt_to_utt2spk.pl $dsn/reco2utt > $dsn/utt2reco + utils/subset_data_dir.sh --utt-list $dsn/utt2reco $data $dsn +done From 3dc469299dc054a416a12ae2fae29b92f752546a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 01:39:18 -0500 Subject: [PATCH 038/530] asr_diarization: add per-reco option to split_data.sh --- egs/wsj/s5/utils/split_data.sh | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/egs/wsj/s5/utils/split_data.sh b/egs/wsj/s5/utils/split_data.sh index e44a4ab6359..646830481db 100755 --- a/egs/wsj/s5/utils/split_data.sh +++ b/egs/wsj/s5/utils/split_data.sh @@ -16,9 +16,14 @@ # limitations under the License. split_per_spk=true +split_per_reco=false if [ "$1" == "--per-utt" ]; then split_per_spk=false shift +elif [ "$1" == "--per-reco" ]; then + split_per_spk=false + split_per_reco=true + shift fi if [ $# != 2 ]; then @@ -59,10 +64,14 @@ if [ -f $data/text ] && [ $nu -ne $nt ]; then echo "** use utils/fix_data_dir.sh to fix this." fi - if $split_per_spk; then utt2spk_opt="--utt2spk=$data/utt2spk" utt="" +elif $split_per_reco; then + utils/data/get_reco2utt.sh $data + utils/spk2utt_to_utt2spk.pl $data/reco2utt > $data/utt2reco + utt2spk_opt="--utt2spk=$data/utt2reco" + utt="reco" else utt2spk_opt= utt="utt" @@ -86,6 +95,7 @@ if ! $need_to_split; then fi utt2spks=$(for n in `seq $numsplit`; do echo $data/split${numsplit}${utt}/$n/utt2spk; done) +utt2recos=$(for n in `seq $numsplit`; do echo $data/split${numsplit}${utt}/$n/utt2reco; done) directories=$(for n in `seq $numsplit`; do echo $data/split${numsplit}${utt}/$n; done) @@ -100,11 +110,20 @@ fi which lockfile >&/dev/null && lockfile -l 60 $data/.split_lock trap 'rm -f $data/.split_lock' EXIT HUP INT PIPE TERM -utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1 +if $split_per_reco; then + utils/split_scp.pl $utt2spk_opt $data/utt2reco $utt2recos || exit 1 +else + utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1 +fi for n in `seq $numsplit`; do dsn=$data/split${numsplit}${utt}/$n - utils/utt2spk_to_spk2utt.pl $dsn/utt2spk > $dsn/spk2utt || exit 1; + + if $split_per_reco; then + utils/filter_scp.pl $dsn/utt2reco $data/utt2spk > $dsn/utt2spk + fi + + utils/utt2spk_to_spk2utt.pl $dsn/utt2spk > $dsn/spk2utt || exit 1 done maybe_wav_scp= From bfec70247b20d1e71792dca5f5d97a8ffcda4430 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 15:47:38 -0500 Subject: [PATCH 039/530] asr_diarization: Added deriv weights and xent per dim objective --- src/nnet3/nnet-diagnostics.cc | 24 ++++--- src/nnet3/nnet-diagnostics.h | 13 +++- src/nnet3/nnet-example-utils.cc | 65 ++++++++++++++----- src/nnet3/nnet-example.cc | 70 ++++++++++++++++----- src/nnet3/nnet-example.h | 35 ++++++++++- src/nnet3/nnet-nnet.cc | 12 +++- src/nnet3/nnet-nnet.h | 9 ++- src/nnet3/nnet-training.cc | 98 +++++++++++++++++++++++++---- src/nnet3/nnet-training.h | 11 +++- src/nnet3bin/nnet3-acc-lda-stats.cc | 14 ++++- src/nnet3bin/nnet3-copy-egs.cc | 14 +++++ 11 files changed, 302 insertions(+), 63 deletions(-) diff --git a/src/nnet3/nnet-diagnostics.cc b/src/nnet3/nnet-diagnostics.cc index 7f7d485ffe0..64abe8a0578 100644 --- a/src/nnet3/nnet-diagnostics.cc +++ b/src/nnet3/nnet-diagnostics.cc @@ -92,20 +92,24 @@ void NnetComputeProb::ProcessOutputs(const NnetExample &eg, << "mismatch for '" << io.name << "': " << output.NumCols() << " (nnet) vs. " << io.features.NumCols() << " (egs)\n"; } + + const Vector *deriv_weights = NULL; + if (config_.apply_deriv_weights && io.deriv_weights.Dim() > 0) + deriv_weights = &(io.deriv_weights); { BaseFloat tot_weight, tot_objf; bool supply_deriv = config_.compute_deriv; ComputeObjectiveFunction(io.features, obj_type, io.name, supply_deriv, computer, - &tot_weight, &tot_objf); + &tot_weight, &tot_objf, deriv_weights); SimpleObjectiveInfo &totals = objf_info_[io.name]; totals.tot_weight += tot_weight; totals.tot_objective += tot_objf; } - if (obj_type == kLinear && config_.compute_accuracy) { + if (config_.compute_accuracy) { BaseFloat tot_weight, tot_accuracy; ComputeAccuracy(io.features, output, - &tot_weight, &tot_accuracy); + &tot_weight, &tot_accuracy, deriv_weights); SimpleObjectiveInfo &totals = accuracy_info_[io.name]; totals.tot_weight += tot_weight; totals.tot_objective += tot_accuracy; @@ -156,7 +160,8 @@ bool NnetComputeProb::PrintTotalStats() const { void ComputeAccuracy(const GeneralMatrix &supervision, const CuMatrixBase &nnet_output, BaseFloat *tot_weight_out, - BaseFloat *tot_accuracy_out) { + BaseFloat *tot_accuracy_out, + const Vector *deriv_weights) { int32 num_rows = nnet_output.NumRows(), num_cols = nnet_output.NumCols(); KALDI_ASSERT(supervision.NumRows() == num_rows && @@ -181,24 +186,27 @@ void ComputeAccuracy(const GeneralMatrix &supervision, for (int32 r = 0; r < num_rows; r++) { SubVector vec(mat, r); BaseFloat row_sum = vec.Sum(); - KALDI_ASSERT(row_sum >= 0.0); + // KALDI_ASSERT(row_sum >= 0.0); // For conventional ASR systems int32 best_index; vec.Max(&best_index); // discard max value. + if (deriv_weights) + row_sum *= (*deriv_weights)(r); tot_weight += row_sum; if (best_index == best_index_cpu[r]) tot_accuracy += row_sum; } break; - } case kFullMatrix: { const Matrix &mat = supervision.GetFullMatrix(); for (int32 r = 0; r < num_rows; r++) { SubVector vec(mat, r); BaseFloat row_sum = vec.Sum(); - KALDI_ASSERT(row_sum >= 0.0); + // KALDI_ASSERT(row_sum >= 0.0); // For conventional ASR systems int32 best_index; vec.Max(&best_index); // discard max value. + if (deriv_weights) + row_sum *= (*deriv_weights)(r); tot_weight += row_sum; if (best_index == best_index_cpu[r]) tot_accuracy += row_sum; @@ -212,6 +220,8 @@ void ComputeAccuracy(const GeneralMatrix &supervision, BaseFloat row_sum = row.Sum(); int32 best_index; row.Max(&best_index); + if (deriv_weights) + row_sum *= (*deriv_weights)(r); KALDI_ASSERT(best_index < num_cols); tot_weight += row_sum; if (best_index == best_index_cpu[r]) diff --git a/src/nnet3/nnet-diagnostics.h b/src/nnet3/nnet-diagnostics.h index 298548857dd..6ed6c4a33a7 100644 --- a/src/nnet3/nnet-diagnostics.h +++ b/src/nnet3/nnet-diagnostics.h @@ -36,7 +36,6 @@ struct SimpleObjectiveInfo { double tot_objective; SimpleObjectiveInfo(): tot_weight(0.0), tot_objective(0.0) { } - }; @@ -44,12 +43,15 @@ struct NnetComputeProbOptions { bool debug_computation; bool compute_deriv; bool compute_accuracy; + bool apply_deriv_weights; + NnetOptimizeOptions optimize_config; NnetComputeOptions compute_config; NnetComputeProbOptions(): debug_computation(false), compute_deriv(false), - compute_accuracy(true) { } + compute_accuracy(true), + apply_deriv_weights(true) { } void Register(OptionsItf *opts) { // compute_deriv is not included in the command line options // because it's not relevant for nnet3-compute-prob. @@ -57,6 +59,9 @@ struct NnetComputeProbOptions { "debug for the actual computation (very verbose!)"); opts->Register("compute-accuracy", &compute_accuracy, "If true, compute " "accuracy values as well as objective functions"); + opts->Register("apply-deriv-weights", &apply_deriv_weights, + "Apply per-frame deriv weights"); + // register the optimization options with the prefix "optimization". ParseOptions optimization_opts("optimization", opts); optimize_config.Register(&optimization_opts); @@ -102,6 +107,7 @@ class NnetComputeProb { const Nnet &GetDeriv() const; ~NnetComputeProb(); + private: void ProcessOutputs(const NnetExample &eg, NnetComputer *computer); @@ -152,7 +158,8 @@ class NnetComputeProb { void ComputeAccuracy(const GeneralMatrix &supervision, const CuMatrixBase &nnet_output, BaseFloat *tot_weight, - BaseFloat *tot_accuracy); + BaseFloat *tot_accuracy, + const Vector *deriv_weights = NULL); } // namespace nnet3 diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc index 30f7840f6f8..39922153db4 100644 --- a/src/nnet3/nnet-example-utils.cc +++ b/src/nnet3/nnet-example-utils.cc @@ -63,9 +63,9 @@ static void GetIoSizes(const std::vector &src, KALDI_ASSERT(*names_iter == io.name); int32 i = names_iter - names_begin; int32 this_dim = io.features.NumCols(); - if (dims[i] == -1) + if (dims[i] == -1) { dims[i] = this_dim; - else if(dims[i] != this_dim) { + } else if (dims[i] != this_dim) { KALDI_ERR << "Merging examples with inconsistent feature dims: " << dims[i] << " vs. " << this_dim << " for '" << io.name << "'."; @@ -87,9 +87,20 @@ static void MergeIo(const std::vector &src, const std::vector &sizes, bool compress, NnetExample *merged_eg) { + // The total number of Indexes we have across all examples. int32 num_feats = names.size(); + std::vector cur_size(num_feats, 0); + + // The features in the different NnetIo in the Indexes across all examples std::vector > output_lists(num_feats); + + // The deriv weights in the different NnetIo in the Indexes across all + // examples + std::vector const*> > + output_deriv_weights(num_feats); + + // Initialize the merged_eg merged_eg->io.clear(); merged_eg->io.resize(num_feats); for (int32 f = 0; f < num_feats; f++) { @@ -100,22 +111,29 @@ static void MergeIo(const std::vector &src, io.indexes.resize(size); } - std::vector::const_iterator names_begin = names.begin(), + std::vector::const_iterator names_begin = names.begin(); names_end = names.end(); - std::vector::const_iterator iter = src.begin(), end = src.end(); - for (int32 n = 0; iter != end; ++iter,++n) { - std::vector::const_iterator iter2 = iter->io.begin(), - end2 = iter->io.end(); - for (; iter2 != end2; ++iter2) { - const NnetIo &io = *iter2; + std::vector::const_iterator eg_iter = src.begin(), + eg_end = src.end(); + for (int32 n = 0; eg_iter != eg_end; ++eg_iter, ++n) { + std::vector::const_iterator io_iter = eg_iter->io.begin(), + io_end = eg_iter->io.end(); + for (; io_iter != io_end; ++io_iter) { + const NnetIo &io = *io_iter; std::vector::const_iterator names_iter = std::lower_bound(names_begin, names_end, io.name); KALDI_ASSERT(*names_iter == io.name); + int32 f = names_iter - names_begin; - int32 this_size = io.indexes.size(), - &this_offset = cur_size[f]; + int32 this_size = io.indexes.size(); + int32 &this_offset = cur_size[f]; KALDI_ASSERT(this_size + this_offset <= sizes[f]); + + // Add f^th Io's features and deriv_weights output_lists[f].push_back(&(io.features)); + output_deriv_weights[f].push_back(&(io.deriv_weights)); + + // Work on the Indexes for the f^th Io in merged_eg NnetIo &output_io = merged_eg->io[f]; std::copy(io.indexes.begin(), io.indexes.end(), output_io.indexes.begin() + this_offset); @@ -139,10 +157,26 @@ static void MergeIo(const std::vector &src, // the following won't do anything if the features were sparse. merged_eg->io[f].features.Compress(); } - } -} + Vector &this_deriv_weights = merged_eg->io[f].deriv_weights; + if (output_deriv_weights[f][0]->Dim() > 0) { + this_deriv_weights.Resize( + merged_eg->io[f].indexes.size(), kUndefined); + KALDI_ASSERT(this_deriv_weights.Dim() == + merged_eg->io[f].features.NumRows()); + std::vector const*>::const_iterator + it = output_deriv_weights[f].begin(), + end = output_deriv_weights[f].end(); + + for (int32 i = 0, cur_offset = 0; it != end; ++it, i++) { + KALDI_ASSERT((*it)->Dim() == output_lists[f][i]->NumRows()); + this_deriv_weights.Range(cur_offset, (*it)->Dim()).CopyFromVec(**it); + cur_offset += (*it)->Dim(); + } + } + } +} void MergeExamples(const std::vector &src, bool compress, @@ -282,9 +316,8 @@ void RoundUpNumFrames(int32 frame_subsampling_factor, KALDI_ERR << "--num-frames-overlap=" << (*num_frames_overlap) << " < " << "--num-frames=" << (*num_frames); } - } -} // namespace nnet3 -} // namespace kaldi +} // namespace nnet3 +} // namespace kaldi diff --git a/src/nnet3/nnet-example.cc b/src/nnet3/nnet-example.cc index 9a34258e0ee..11305f55324 100644 --- a/src/nnet3/nnet-example.cc +++ b/src/nnet3/nnet-example.cc @@ -19,6 +19,7 @@ // limitations under the License. #include "nnet3/nnet-example.h" +#include "nnet3/nnet-example-utils.h" #include "lat/lattice-functions.h" #include "hmm/posterior.h" @@ -31,6 +32,8 @@ void NnetIo::Write(std::ostream &os, bool binary) const { WriteToken(os, binary, name); WriteIndexVector(os, binary, indexes); features.Write(os, binary); + WriteToken(os, binary, ""); // for DerivWeights. Want to save space. + WriteVectorAsChar(os, binary, deriv_weights); WriteToken(os, binary, ""); KALDI_ASSERT(static_cast(features.NumRows()) == indexes.size()); } @@ -40,7 +43,14 @@ void NnetIo::Read(std::istream &is, bool binary) { ReadToken(is, binary, &name); ReadIndexVector(is, binary, &indexes); features.Read(is, binary); - ExpectToken(is, binary, ""); + std::string token; + ReadToken(is, binary, &token); + // in the future this back-compatibility code can be reworked. + if (token != "") { + KALDI_ASSERT(token == ""); + ReadVectorAsChar(is, binary, &deriv_weights); + ExpectToken(is, binary, ""); + } } bool NnetIo::operator == (const NnetIo &other) const { @@ -52,40 +62,70 @@ bool NnetIo::operator == (const NnetIo &other) const { Matrix this_mat, other_mat; features.GetMatrix(&this_mat); other.features.GetMatrix(&other_mat); - return ApproxEqual(this_mat, other_mat); + return (ApproxEqual(this_mat, other_mat) && + deriv_weights.ApproxEqual(other.deriv_weights)); } NnetIo::NnetIo(const std::string &name, - int32 t_begin, const MatrixBase &feats): + int32 t_begin, const MatrixBase &feats, + int32 skip_frame): name(name), features(feats) { - int32 num_rows = feats.NumRows(); - KALDI_ASSERT(num_rows > 0); - indexes.resize(num_rows); // sets all n,t,x to zeros. - for (int32 i = 0; i < num_rows; i++) - indexes[i].t = t_begin + i; + int32 num_skipped_rows = feats.NumRows(); + KALDI_ASSERT(num_skipped_rows > 0); + indexes.resize(num_skipped_rows); // sets all n,t,x to zeros. + for (int32 i = 0; i < num_skipped_rows; i++) + indexes[i].t = t_begin + i * skip_frame; +} + +NnetIo::NnetIo(const std::string &name, + const VectorBase &deriv_weights, + int32 t_begin, const MatrixBase &feats, + int32 skip_frame): + name(name), features(feats), deriv_weights(deriv_weights) { + int32 num_skipped_rows = feats.NumRows(); + KALDI_ASSERT(num_skipped_rows > 0); + indexes.resize(num_skipped_rows); // sets all n,t,x to zeros. + for (int32 i = 0; i < num_skipped_rows; i++) + indexes[i].t = t_begin + i * skip_frame; } void NnetIo::Swap(NnetIo *other) { name.swap(other->name); indexes.swap(other->indexes); features.Swap(&(other->features)); + deriv_weights.Swap(&(other->deriv_weights)); } NnetIo::NnetIo(const std::string &name, int32 dim, int32 t_begin, - const Posterior &labels): + const Posterior &labels, + int32 skip_frame): name(name) { - int32 num_rows = labels.size(); - KALDI_ASSERT(num_rows > 0); + int32 num_skipped_rows = labels.size(); + KALDI_ASSERT(num_skipped_rows > 0); SparseMatrix sparse_feats(dim, labels); features = sparse_feats; - indexes.resize(num_rows); // sets all n,t,x to zeros. - for (int32 i = 0; i < num_rows; i++) - indexes[i].t = t_begin + i; + indexes.resize(num_skipped_rows); // sets all n,t,x to zeros. + for (int32 i = 0; i < num_skipped_rows; i++) + indexes[i].t = t_begin + i * skip_frame; } - +NnetIo::NnetIo(const std::string &name, + const VectorBase &deriv_weights, + int32 dim, + int32 t_begin, + const Posterior &labels, + int32 skip_frame): + name(name), deriv_weights(deriv_weights) { + int32 num_skipped_rows = labels.size(); + KALDI_ASSERT(num_skipped_rows > 0); + SparseMatrix sparse_feats(dim, labels); + features = sparse_feats; + indexes.resize(num_skipped_rows); // sets all n,t,x to zeros. + for (int32 i = 0; i < num_skipped_rows; i++) + indexes[i].t = t_begin + i * skip_frame; +} void NnetExample::Write(std::ostream &os, bool binary) const { // Note: weight, label, input_frames and spk_info are members. This is a diff --git a/src/nnet3/nnet-example.h b/src/nnet3/nnet-example.h index 1df7cd1e78e..b1ae42a78c9 100644 --- a/src/nnet3/nnet-example.h +++ b/src/nnet3/nnet-example.h @@ -45,12 +45,32 @@ struct NnetIo { /// a Matrix, or SparseMatrix (a SparseMatrix would be the natural format for posteriors). GeneralMatrix features; + /// This is a vector of per-frame weights, required to be between 0 and 1, + /// that is applied to the derivative during training (but not during model + /// combination, where the derivatives need to agree with the computed objf + /// values for the optimization code to work). + /// If this vector is empty it means we're not applying per-frame weights, + /// so it's equivalent to a vector of all ones. This vector is written + /// to disk compactly as unsigned char. + Vector deriv_weights; + /// This constructor creates NnetIo with name "name", indexes with n=0, x=0, /// and t values ranging from t_begin to t_begin + feats.NumRows() - 1, and /// the provided features. t_begin should be the frame that feats.Row(0) /// represents. NnetIo(const std::string &name, - int32 t_begin, const MatrixBase &feats); + int32 t_begin, + const MatrixBase &feats, + int32 skip_frame = 1); + + /// This is similar to the above constructor but also takes in a + /// a deriv weights argument. + NnetIo(const std::string &name, + const VectorBase &deriv_weights, + int32 t_begin, + const MatrixBase &feats, + int32 skip_frame = 1); + /// This constructor sets "name" to the provided string, sets "indexes" with /// n=0, x=0, and t from t_begin to t_begin + labels.size() - 1, and the labels @@ -58,7 +78,17 @@ struct NnetIo { NnetIo(const std::string &name, int32 dim, int32 t_begin, - const Posterior &labels); + const Posterior &labels, + int32 skip_frame = 1); + + /// This is similar to the above constructor but also takes in a + /// a deriv weights argument. + NnetIo(const std::string &name, + const VectorBase &deriv_weights, + int32 dim, + int32 t_begin, + const Posterior &labels, + int32 skip_frame = 1); void Swap(NnetIo *other); @@ -80,7 +110,6 @@ struct NnetIo { /// more frames of input, used for standard cross-entropy training of neural /// nets (and possibly for other objective functions). struct NnetExample { - /// "io" contains the input and output. In principle there can be multiple /// types of both input and output, with different names. The order is /// irrelevant. diff --git a/src/nnet3/nnet-nnet.cc b/src/nnet3/nnet-nnet.cc index ad5f715a294..4fcbbc70a1f 100644 --- a/src/nnet3/nnet-nnet.cc +++ b/src/nnet3/nnet-nnet.cc @@ -84,8 +84,14 @@ std::string Nnet::GetAsConfigLine(int32 node_index, bool include_dim) const { node.descriptor.WriteConfig(ans, node_names_); if (include_dim) ans << " dim=" << node.Dim(*this); - ans << " objective=" << (node.u.objective_type == kLinear ? "linear" : - "quadratic"); + + if (node.u.objective_type == kLinear) + ans << " objective=linear"; + else if (node.u.objective_type == kQuadratic) + ans << " objective=quadratic"; + else if (node.u.objective_type == kXentPerDim) + ans << " objective=xent-per-dim"; + break; case kComponent: ans << "component-node name=" << name << " component=" @@ -390,6 +396,8 @@ void Nnet::ProcessOutputNodeConfigLine( nodes_[node_index].u.objective_type = kLinear; } else if (objective_type == "quadratic") { nodes_[node_index].u.objective_type = kQuadratic; + } else if (objective_type == "xent-per-dim") { + nodes_[node_index].u.objective_type = kXentPerDim; } else { KALDI_ERR << "Invalid objective type: " << objective_type; } diff --git a/src/nnet3/nnet-nnet.h b/src/nnet3/nnet-nnet.h index 16e8333d5b1..b9ed3c1052b 100644 --- a/src/nnet3/nnet-nnet.h +++ b/src/nnet3/nnet-nnet.h @@ -49,7 +49,12 @@ namespace nnet3 { /// - Objective type kQuadratic is used to mean the objective function /// f(x, y) = -0.5 (x-y).(x-y), which is to be maximized, as in the kLinear /// case. -enum ObjectiveType { kLinear, kQuadratic }; +/// - Objective type kXentPerDim is the objective function that is used +/// to learn a set of bernoulli random variables. +/// f(x, y) = x * y + (1-x) * Log(1-Exp(y)), where +/// x is the true probability of class 1 and +/// y is the predicted log probability of class 1 +enum ObjectiveType { kLinear, kQuadratic, kXentPerDim }; enum NodeType { kInput, kDescriptor, kComponent, kDimRange, kNone }; @@ -249,7 +254,7 @@ class Nnet { void ResetGenerators(); // resets random-number generators for all // random components. You must also set srand() for this to be // effective. - + private: void Destroy(); diff --git a/src/nnet3/nnet-training.cc b/src/nnet3/nnet-training.cc index 87d64e27871..9d957afe1de 100644 --- a/src/nnet3/nnet-training.cc +++ b/src/nnet3/nnet-training.cc @@ -39,7 +39,7 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config, // natural-gradient updates. SetZero(is_gradient, delta_nnet_); const int32 num_updatable = NumUpdatableComponents(*delta_nnet_); - num_max_change_per_component_applied_.resize(num_updatable, 0); + num_max_change_per_component_applied_.resize(num_updatable, 0); num_max_change_global_applied_ = 0; if (config_.read_cache != "") { @@ -52,7 +52,7 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config, KALDI_WARN << "Could not open cached computation. " "Probably this is the first training iteration."; } - } + } } @@ -88,9 +88,12 @@ void NnetTrainer::ProcessOutputs(const NnetExample &eg, ObjectiveType obj_type = nnet_->GetNode(node_index).u.objective_type; BaseFloat tot_weight, tot_objf; bool supply_deriv = true; + const Vector *deriv_weights = NULL; + if (config_.apply_deriv_weights && io.deriv_weights.Dim() > 0) + deriv_weights = &(io.deriv_weights); ComputeObjectiveFunction(io.features, obj_type, io.name, supply_deriv, computer, - &tot_weight, &tot_objf); + &tot_weight, &tot_objf, deriv_weights); objf_info_[io.name].UpdateStats(io.name, config_.print_interval, num_minibatches_processed_++, tot_weight, tot_objf); @@ -167,7 +170,7 @@ void NnetTrainer::UpdateParamsWithMaxChange() { << " / " << num_updatable << " Updatable Components." << "(smallest factor=" << min_scale << " on " << component_name_with_min_scale - << " with max-change=" << max_change_with_min_scale <<"). "; + << " with max-change=" << max_change_with_min_scale <<"). "; if (param_delta > config_.max_param_change) ostr << "Global max-change factor was " << config_.max_param_change / param_delta @@ -276,7 +279,7 @@ bool ObjectiveFunctionInfo::PrintTotalStats(const std::string &name) const { << (tot_objf / tot_weight) << " over " << tot_weight << " frames."; } else { KALDI_LOG << "Overall average objective function for '" << name << "' is " - << objf << " + " << aux_objf << " = " << sum_objf + << objf << " + " << aux_objf << " = " << sum_objf << " over " << tot_weight << " frames."; } KALDI_LOG << "[this line is to be parsed by a script:] " @@ -290,7 +293,7 @@ NnetTrainer::~NnetTrainer() { Output ko(config_.write_cache, config_.binary_write_cache); compiler_.WriteCache(ko.Stream(), config_.binary_write_cache); KALDI_LOG << "Wrote computation cache to " << config_.write_cache; - } + } delete delta_nnet_; } @@ -300,7 +303,8 @@ void ComputeObjectiveFunction(const GeneralMatrix &supervision, bool supply_deriv, NnetComputer *computer, BaseFloat *tot_weight, - BaseFloat *tot_objf) { + BaseFloat *tot_objf, + const VectorBase *deriv_weights) { const CuMatrixBase &output = computer->GetOutput(output_name); if (output.NumCols() != supervision.NumCols()) @@ -309,6 +313,51 @@ void ComputeObjectiveFunction(const GeneralMatrix &supervision, << " (nnet) vs. " << supervision.NumCols() << " (egs)\n"; switch (objective_type) { + case kXentPerDim: { + // objective is x * log(y) + (1-x) * log(1-y) + CuMatrix cu_post(supervision.NumRows(), supervision.NumCols(), + kUndefined); // x + cu_post.CopyFromGeneralMat(supervision); + + CuMatrix n_cu_post(cu_post.NumRows(), cu_post.NumCols()); + n_cu_post.Set(1.0); + n_cu_post.AddMat(-1.0, cu_post); // 1-x + + CuMatrix log_prob(output); // y + log_prob.ApplyLog(); // log(y) + + CuMatrix n_output(output.NumRows(), + output.NumCols(), kSetZero); + n_output.Set(1.0); + n_output.AddMat(-1.0, output); // 1-y + n_output.ApplyLog(); // log(1-y) + + BaseFloat num_elements = static_cast(cu_post.NumRows()); + if (deriv_weights) { + CuVector cu_deriv_weights(*deriv_weights); + num_elements = cu_deriv_weights.Sum(); + cu_post.MulRowsVec(cu_deriv_weights); + n_cu_post.MulRowsVec(cu_deriv_weights); + } + + *tot_weight = num_elements * cu_post.NumCols(); + *tot_objf = TraceMatMat(log_prob, cu_post, kTrans) + + TraceMatMat(n_output, n_cu_post, kTrans); + + if (supply_deriv) { + // deriv is x / y - (1-x) / (1-y) + n_output.ApplyExp(); // 1-y + n_cu_post.DivElements(n_output); // 1-x / (1-y) + + log_prob.ApplyExp(); // y + cu_post.DivElements(log_prob); // x / y + + cu_post.AddMat(-1.0, n_cu_post); // x / y - (1-x) / (1-y) + computer->AcceptOutputDeriv(output_name, &cu_post); + } + + break; + } case kLinear: { // objective is x * y. switch (supervision.Type()) { @@ -318,20 +367,38 @@ void ComputeObjectiveFunction(const GeneralMatrix &supervision, // The cross-entropy objective is computed by a simple dot product, // because after the LogSoftmaxLayer, the output is already in the form // of log-likelihoods that are normalized to sum to one. - *tot_weight = cu_post.Sum(); - *tot_objf = TraceMatSmat(output, cu_post, kTrans); - if (supply_deriv) { + if (deriv_weights) { CuMatrix output_deriv(output.NumRows(), output.NumCols(), kUndefined); cu_post.CopyToMat(&output_deriv); - computer->AcceptOutputDeriv(output_name, &output_deriv); + CuVector cu_deriv_weights(*deriv_weights); + output_deriv.MulRowsVec(cu_deriv_weights); + *tot_weight = cu_deriv_weights.Sum(); + *tot_objf = TraceMatMat(output, output_deriv, kTrans); + if (supply_deriv) { + computer->AcceptOutputDeriv(output_name, &output_deriv); + } + } else { + *tot_weight = cu_post.Sum(); + *tot_objf = TraceMatSmat(output, cu_post, kTrans); + if (supply_deriv) { + CuMatrix output_deriv(output.NumRows(), output.NumCols(), + kUndefined); + cu_post.CopyToMat(&output_deriv); + computer->AcceptOutputDeriv(output_name, &output_deriv); + } } + break; } case kFullMatrix: { // there is a redundant matrix copy in here if we're not using a GPU // but we don't anticipate this code branch being used in many cases. CuMatrix cu_post(supervision.GetFullMatrix()); + if (deriv_weights) { + CuVector cu_deriv_weights(*deriv_weights); + cu_post.MulRowsVec(cu_deriv_weights); + } *tot_weight = cu_post.Sum(); *tot_objf = TraceMatMat(output, cu_post, kTrans); if (supply_deriv) @@ -343,6 +410,10 @@ void ComputeObjectiveFunction(const GeneralMatrix &supervision, supervision.GetMatrix(&post); CuMatrix cu_post; cu_post.Swap(&post); + if (deriv_weights) { + CuVector cu_deriv_weights(*deriv_weights); + cu_post.MulRowsVec(cu_deriv_weights); + } *tot_weight = cu_post.Sum(); *tot_objf = TraceMatMat(output, cu_post, kTrans); if (supply_deriv) @@ -360,6 +431,11 @@ void ComputeObjectiveFunction(const GeneralMatrix &supervision, diff.CopyFromGeneralMat(supervision); diff.AddMat(-1.0, output); *tot_weight = diff.NumRows(); + if (deriv_weights) { + CuVector cu_deriv_weights(*deriv_weights); + diff.MulRowsVec(cu_deriv_weights); + *tot_weight = deriv_weights->Sum(); + } *tot_objf = -0.5 * TraceMatMat(diff, diff, kTrans); if (supply_deriv) computer->AcceptOutputDeriv(output_name, &diff); diff --git a/src/nnet3/nnet-training.h b/src/nnet3/nnet-training.h index 70c90267c66..7b22bc75211 100644 --- a/src/nnet3/nnet-training.h +++ b/src/nnet3/nnet-training.h @@ -42,6 +42,8 @@ struct NnetTrainerOptions { BaseFloat max_param_change; NnetOptimizeOptions optimize_config; NnetComputeOptions compute_config; + bool apply_deriv_weights; + NnetTrainerOptions(): zero_component_stats(true), store_component_stats(true), @@ -49,7 +51,8 @@ struct NnetTrainerOptions { debug_computation(false), momentum(0.0), binary_write_cache(true), - max_param_change(2.0) { } + max_param_change(2.0), + apply_deriv_weights(true) { } void Register(OptionsItf *opts) { opts->Register("store-component-stats", &store_component_stats, "If true, store activations and derivatives for nonlinear " @@ -69,6 +72,9 @@ struct NnetTrainerOptions { "so that the 'effective' learning rate is the same as " "before (because momentum would normally increase the " "effective learning rate by 1/(1-momentum))"); + opts->Register("apply-deriv-weights", &apply_deriv_weights, + "If true, apply the per-frame derivative weights stored with " + "the example"); opts->Register("read-cache", &read_cache, "the location where we can read " "the cached computation from"); opts->Register("write-cache", &write_cache, "the location where we want to " @@ -226,7 +232,8 @@ void ComputeObjectiveFunction(const GeneralMatrix &supervision, bool supply_deriv, NnetComputer *computer, BaseFloat *tot_weight, - BaseFloat *tot_objf); + BaseFloat *tot_objf, + const VectorBase* deriv_weights = NULL); diff --git a/src/nnet3bin/nnet3-acc-lda-stats.cc b/src/nnet3bin/nnet3-acc-lda-stats.cc index 0b3b537855e..b41c4a6704d 100644 --- a/src/nnet3bin/nnet3-acc-lda-stats.cc +++ b/src/nnet3bin/nnet3-acc-lda-stats.cc @@ -87,13 +87,18 @@ class NnetLdaStatsAccumulator { // but we're about to do an outer product, so this doesn't dominate. Vector row(cu_row); + BaseFloat deriv_weight = 1.0; + if (output_supervision->deriv_weights.Dim() > 0 && r < output_supervision->deriv_weights.Dim()) { + deriv_weight = output_supervision->deriv_weights(r); + } + const SparseVector &post(smat.Row(r)); const std::pair *post_data = post.Data(), *post_end = post_data + post.NumElements(); for (; post_data != post_end; ++post_data) { MatrixIndexT pdf = post_data->first; BaseFloat weight = post_data->second; - BaseFloat pruned_weight = RandPrune(weight, rand_prune); + BaseFloat pruned_weight = RandPrune(weight, rand_prune) * deriv_weight; if (pruned_weight != 0.0) lda_stats_.Accumulate(row, pdf, pruned_weight); } @@ -110,11 +115,16 @@ class NnetLdaStatsAccumulator { // but we're about to do an outer product, so this doesn't dominate. Vector row(cu_row); + BaseFloat deriv_weight = 1.0; + if (output_supervision->deriv_weights.Dim() > 0 && r < output_supervision->deriv_weights.Dim()) { + deriv_weight = output_supervision->deriv_weights(r); + } + SubVector post(output_mat, r); int32 num_pdfs = post.Dim(); for (int32 pdf = 0; pdf < num_pdfs; pdf++) { BaseFloat weight = post(pdf); - BaseFloat pruned_weight = RandPrune(weight, rand_prune); + BaseFloat pruned_weight = RandPrune(weight, rand_prune) * deriv_weight; if (pruned_weight != 0.0) lda_stats_.Accumulate(row, pdf, pruned_weight); } diff --git a/src/nnet3bin/nnet3-copy-egs.cc b/src/nnet3bin/nnet3-copy-egs.cc index efb51f51910..0b82d91353a 100644 --- a/src/nnet3bin/nnet3-copy-egs.cc +++ b/src/nnet3bin/nnet3-copy-egs.cc @@ -137,6 +137,7 @@ void FilterExample(const NnetExample &eg, if (!is_input_or_output) { // Just copy everything. io_out.indexes = io_in.indexes; io_out.features = io_in.features; + io_out.deriv_weights = io_in.deriv_weights; } else { const std::vector &indexes_in = io_in.indexes; std::vector &indexes_out = io_out.indexes; @@ -157,6 +158,19 @@ void FilterExample(const NnetExample &eg, } } KALDI_ASSERT(iter_out == keep.end()); + + if (io_in.deriv_weights.Dim() > 0) { + io_out.deriv_weights.Resize(num_kept, kUndefined); + int32 in_dim = 0, out_dim = 0; + iter_out = keep.begin(); + for (; iter_out != keep.end(); ++iter_out, in_dim++) { + if (*iter_out) + io_out.deriv_weights(out_dim++) = io_in.deriv_weights(in_dim); + } + KALDI_ASSERT(out_dim == num_kept); + KALDI_ASSERT(iter_out == keep.end()); + } + if (num_kept == 0) KALDI_ERR << "FilterExample removed all indexes for '" << name << "'"; From 99dcd967e4c0fce094456469b249943f4a1ec464 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 15:54:29 -0500 Subject: [PATCH 040/530] asr_diarization: Adding compress format option --- src/matrix/compressed-matrix.cc | 52 +++++++++++++++++++-------------- src/matrix/compressed-matrix.h | 14 +++++---- src/matrix/sparse-matrix.cc | 4 +-- src/matrix/sparse-matrix.h | 6 ++-- src/nnet3/nnet-example.cc | 4 +-- src/nnet3/nnet-example.h | 17 +++++++++-- 6 files changed, 61 insertions(+), 36 deletions(-) diff --git a/src/matrix/compressed-matrix.cc b/src/matrix/compressed-matrix.cc index 2ac2c544bc8..6fc365c8f03 100644 --- a/src/matrix/compressed-matrix.cc +++ b/src/matrix/compressed-matrix.cc @@ -24,14 +24,14 @@ namespace kaldi { -//static +//static MatrixIndexT CompressedMatrix::DataSize(const GlobalHeader &header) { // Returns size in bytes of the data. if (header.format == 1) { return sizeof(GlobalHeader) + header.num_cols * (sizeof(PerColHeader) + header.num_rows); } else { - KALDI_ASSERT(header.format == 2) ; + KALDI_ASSERT(header.format == 2); return sizeof(GlobalHeader) + 2 * header.num_rows * header.num_cols; } @@ -40,7 +40,7 @@ MatrixIndexT CompressedMatrix::DataSize(const GlobalHeader &header) { template void CompressedMatrix::CopyFromMat( - const MatrixBase &mat) { + const MatrixBase &mat, int32 format) { if (data_ != NULL) { delete [] static_cast(data_); // call delete [] because was allocated with new float[] data_ = NULL; @@ -52,7 +52,7 @@ void CompressedMatrix::CopyFromMat( KALDI_COMPILE_TIME_ASSERT(sizeof(global_header) == 20); // otherwise // something weird is happening and our code probably won't work or // won't be robust across platforms. - + // Below, the point of the "safety_margin" is that the minimum // and maximum values in the matrix shouldn't coincide with // the minimum and maximum ranges of the 16-bit range, because @@ -80,16 +80,22 @@ void CompressedMatrix::CopyFromMat( global_header.num_rows = mat.NumRows(); global_header.num_cols = mat.NumCols(); - if (mat.NumRows() > 8) { - global_header.format = 1; // format where each row has a PerColHeader. + if (format <= 0) { + if (mat.NumRows() > 8) { + global_header.format = 1; // format where each row has a PerColHeader. + } else { + global_header.format = 2; // format where all data is uint16. + } + } else if (format == 1 || format == 2) { + global_header.format = format; } else { - global_header.format = 2; // format where all data is uint16. + KALDI_ERR << "Error format for compression:format should be <=2."; } - + int32 data_size = DataSize(global_header); data_ = AllocateData(data_size); - + *(reinterpret_cast(data_)) = global_header; if (global_header.format == 1) { @@ -124,10 +130,12 @@ void CompressedMatrix::CopyFromMat( // Instantiate the template for float and double. template -void CompressedMatrix::CopyFromMat(const MatrixBase &mat); +void CompressedMatrix::CopyFromMat(const MatrixBase &mat, + int32 format); template -void CompressedMatrix::CopyFromMat(const MatrixBase &mat); +void CompressedMatrix::CopyFromMat(const MatrixBase &mat, + int32 format); CompressedMatrix::CompressedMatrix( @@ -146,10 +154,10 @@ CompressedMatrix::CompressedMatrix( if (old_num_rows == 0) { return; } // Zero-size matrix stored as zero pointer. if (num_rows == 0 || num_cols == 0) { return; } - + GlobalHeader new_global_header; KALDI_COMPILE_TIME_ASSERT(sizeof(new_global_header) == 20); - + GlobalHeader *old_global_header = reinterpret_cast(cmat.Data()); new_global_header = *old_global_header; @@ -159,10 +167,10 @@ CompressedMatrix::CompressedMatrix( // We don't switch format from 1 -> 2 (in case of size reduction) yet; if this // is needed, we will do this below by creating a temporary Matrix. new_global_header.format = old_global_header->format; - + data_ = AllocateData(DataSize(new_global_header)); // allocate memory *(reinterpret_cast(data_)) = new_global_header; - + if (old_global_header->format == 1) { // Both have the format where we have a PerColHeader and then compress as // chars... @@ -196,7 +204,7 @@ CompressedMatrix::CompressedMatrix( reinterpret_cast(old_global_header + 1); uint16 *new_data = reinterpret_cast(reinterpret_cast(data_) + 1); - + old_data += col_offset + (old_num_cols * row_offset); for (int32 row = 0; row < num_rows; row++) { @@ -281,7 +289,7 @@ void CompressedMatrix::ComputeColHeader( // Now, sdata.begin(), sdata.begin() + quarter_nr, and sdata.begin() + // 3*quarter_nr, and sdata.end() - 1, contain the elements that would appear // at those positions in sorted order. - + header->percentile_0 = std::min(FloatToUint16(global_header, sdata[0]), 65532); header->percentile_25 = @@ -297,7 +305,7 @@ void CompressedMatrix::ComputeColHeader( header->percentile_100 = std::max( FloatToUint16(global_header, sdata[num_rows-1]), header->percentile_75 + static_cast(1)); - + } else { // handle this pathological case. std::sort(sdata.begin(), sdata.end()); // Note: we know num_rows is at least 1. @@ -382,7 +390,7 @@ void CompressedMatrix::CompressColumn( unsigned char *byte_data) { ComputeColHeader(global_header, data, stride, num_rows, header); - + float p0 = Uint16ToFloat(global_header, header->percentile_0), p25 = Uint16ToFloat(global_header, header->percentile_25), p75 = Uint16ToFloat(global_header, header->percentile_75), @@ -491,7 +499,7 @@ void CompressedMatrix::CopyToMat(MatrixBase *mat, mat->CopyFromMat(temp, kTrans); return; } - + if (data_ == NULL) { KALDI_ASSERT(mat->NumRows() == 0); KALDI_ASSERT(mat->NumCols() == 0); @@ -501,7 +509,7 @@ void CompressedMatrix::CopyToMat(MatrixBase *mat, int32 num_cols = h->num_cols, num_rows = h->num_rows; KALDI_ASSERT(mat->NumRows() == num_rows); KALDI_ASSERT(mat->NumCols() == num_cols); - + if (h->format == 1) { PerColHeader *per_col_header = reinterpret_cast(h+1); unsigned char *byte_data = reinterpret_cast(per_col_header + @@ -625,7 +633,7 @@ void CompressedMatrix::CopyToMat(int32 row_offset, GlobalHeader *h = reinterpret_cast(data_); int32 num_rows = h->num_rows, num_cols = h->num_cols, tgt_cols = dest->NumCols(), tgt_rows = dest->NumRows(); - + if (h->format == 1) { // format where we have a per-column header and use one byte per // element. diff --git a/src/matrix/compressed-matrix.h b/src/matrix/compressed-matrix.h index 603134ab800..a9dd1e4fcd2 100644 --- a/src/matrix/compressed-matrix.h +++ b/src/matrix/compressed-matrix.h @@ -35,12 +35,12 @@ namespace kaldi { /// column). /// The basic idea is for each column (in the normal configuration) -/// we work out the values at the 0th, 25th, 50th and 100th percentiles +/// we work out the values at the 0th, 25th, 75th and 100th percentiles /// and store them as 16-bit integers; we then encode each value in /// the column as a single byte, in 3 separate ranges with different -/// linear encodings (0-25th, 25-50th, 50th-100th). -/// If the matrix has 8 rows or fewer, we simply store all values as -/// uint16. +/// linear encodings (0-25th, 25-75th, 75th-100th). +/// If the matrix has 8 rows or fewer or format=2, we simply store all values +/// as uint16. class CompressedMatrix { public: @@ -49,7 +49,9 @@ class CompressedMatrix { ~CompressedMatrix() { Clear(); } template - CompressedMatrix(const MatrixBase &mat): data_(NULL) { CopyFromMat(mat); } + CompressedMatrix(const MatrixBase &mat, int32 format = 0): data_(NULL) { + CopyFromMat(mat, format); + } /// Initializer that can be used to select part of an existing /// CompressedMatrix without un-compressing and re-compressing (note: unlike @@ -65,7 +67,7 @@ class CompressedMatrix { /// This will resize *this and copy the contents of mat to *this. template - void CopyFromMat(const MatrixBase &mat); + void CopyFromMat(const MatrixBase &mat, int32 format = 0); CompressedMatrix(const CompressedMatrix &mat); diff --git a/src/matrix/sparse-matrix.cc b/src/matrix/sparse-matrix.cc index 2ef909f66dd..777819ed677 100644 --- a/src/matrix/sparse-matrix.cc +++ b/src/matrix/sparse-matrix.cc @@ -705,9 +705,9 @@ MatrixIndexT GeneralMatrix::NumCols() const { } -void GeneralMatrix::Compress() { +void GeneralMatrix::Compress(int32 format) { if (mat_.NumRows() != 0) { - cmat_.CopyFromMat(mat_); + cmat_.CopyFromMat(mat_, format); mat_.Resize(0, 0); } } diff --git a/src/matrix/sparse-matrix.h b/src/matrix/sparse-matrix.h index 9f9362542e1..88619da3034 100644 --- a/src/matrix/sparse-matrix.h +++ b/src/matrix/sparse-matrix.h @@ -228,8 +228,10 @@ class GeneralMatrix { public: GeneralMatrixType Type() const; - void Compress(); // If it was a full matrix, compresses, changing Type() to - // kCompressedMatrix; otherwise does nothing. + /// If it was a full matrix, compresses, changing Type() to + /// kCompressedMatrix; otherwise does nothing. + /// format shows the compression format. + void Compress(int32 format = 0); void Uncompress(); // If it was a compressed matrix, uncompresses, changing // Type() to kFullMatrix; otherwise does nothing. diff --git a/src/nnet3/nnet-example.cc b/src/nnet3/nnet-example.cc index 11305f55324..89d40b9ef89 100644 --- a/src/nnet3/nnet-example.cc +++ b/src/nnet3/nnet-example.cc @@ -154,12 +154,12 @@ void NnetExample::Read(std::istream &is, bool binary) { } -void NnetExample::Compress() { +void NnetExample::Compress(int32 format) { std::vector::iterator iter = io.begin(), end = io.end(); // calling features.Compress() will do nothing if they are sparse or already // compressed. for (; iter != end; ++iter) - iter->features.Compress(); + iter->features.Compress(format); } } // namespace nnet3 diff --git a/src/nnet3/nnet-example.h b/src/nnet3/nnet-example.h index b1ae42a78c9..f097369443a 100644 --- a/src/nnet3/nnet-example.h +++ b/src/nnet3/nnet-example.h @@ -94,6 +94,14 @@ struct NnetIo { NnetIo() { } + // Compress the features in this NnetIo structure with specified format. + // the "format" will be 1 for the original format where each column has a + // PerColHeader, and 2 for the format, where everything is represented as + // 16-bit integers. + // If format <= 0, then format 1 will be used, unless the matrix has 8 or + // fewer rows (in which case format 2 will be used). + void Compress(int32 format = 0) { features.Compress(format); } + // Use default copy constructor and assignment operators. void Write(std::ostream &os, bool binary) const; @@ -124,8 +132,13 @@ struct NnetExample { void Swap(NnetExample *other) { io.swap(other->io); } - /// Compresses any (input) features that are not sparse. - void Compress(); + // Compresses any features that are not sparse and not compressed. + // The "format" is 1 for the original format where each column has a + // PerColHeader, and 2 for the format, where everything is represented as + // 16-bit integers. + // If format <= 0, then format 1 will be used, unless the matrix has 8 or + // fewer rows (in which case format 2 will be used). + void Compress(int32 format = 0); /// Caution: this operator == is not very efficient. It's only used in /// testing code. From fb4737eedf96e0e6f2de5d7d9adc19057ad9d234 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 15:54:56 -0500 Subject: [PATCH 041/530] asr_diarization: nnet3-get-egs etc. modified with deriv weights and compress format --- src/nnet3bin/nnet3-get-egs-dense-targets.cc | 150 +++++++++++++++++--- src/nnet3bin/nnet3-get-egs.cc | 82 +++++++++-- 2 files changed, 199 insertions(+), 33 deletions(-) diff --git a/src/nnet3bin/nnet3-get-egs-dense-targets.cc b/src/nnet3bin/nnet3-get-egs-dense-targets.cc index 23bf8922a5b..502e0700f27 100644 --- a/src/nnet3bin/nnet3-get-egs-dense-targets.cc +++ b/src/nnet3bin/nnet3-get-egs-dense-targets.cc @@ -32,9 +32,13 @@ namespace nnet3 { static void ProcessFile(const MatrixBase &feats, const MatrixBase *ivector_feats, + const VectorBase *deriv_weights, + const MatrixBase *l2reg_targets, const MatrixBase &targets, const std::string &utt_id, bool compress, + int32 input_compress_format, + int32 feats_compress_format, int32 num_targets, int32 left_context, int32 right_context, @@ -42,9 +46,9 @@ static void ProcessFile(const MatrixBase &feats, int64 *num_frames_written, int64 *num_egs_written, NnetExampleWriter *example_writer) { - KALDI_ASSERT(feats.NumRows() == static_cast(targets.NumRows())); - - for (int32 t = 0; t < feats.NumRows(); t += frames_per_eg) { + //KALDI_ASSERT(feats.NumRows() == static_cast(targets.NumRows())); + int min_size = std::min(feats.NumRows(), targets.NumRows()); + for (int32 t = 0; t < min_size; t += frames_per_eg) { // actual_frames_per_eg is the number of frames with actual targets. // At the end of the file, we pad with the last frame repeated @@ -52,18 +56,18 @@ static void ProcessFile(const MatrixBase &feats, // for recompilations). // TODO: We might need to ignore the end of the file. int32 actual_frames_per_eg = std::min(frames_per_eg, - feats.NumRows() - t); + min_size - t); int32 tot_frames = left_context + frames_per_eg + right_context; - Matrix input_frames(tot_frames, feats.NumCols()); + Matrix input_frames(tot_frames, feats.NumCols(), kUndefined); // Set up "input_frames". for (int32 j = -left_context; j < frames_per_eg + right_context; j++) { int32 t2 = j + t; if (t2 < 0) t2 = 0; - if (t2 >= feats.NumRows()) t2 = feats.NumRows() - 1; + if (t2 >= min_size) t2 = min_size - 1; SubVector src(feats, t2), dest(input_frames, j + left_context); dest.CopyFromVec(src); @@ -75,8 +79,11 @@ static void ProcessFile(const MatrixBase &feats, eg.io.push_back(NnetIo("input", - left_context, input_frames)); + if (compress) + eg.io.back().Compress(input_compress_format); + // if applicable, add the iVector feature. - if (ivector_feats != NULL) { + if (ivector_feats) { // try to get closest frame to middle of window to get // a representative iVector. int32 closest_frame = t + (actual_frames_per_eg / 2); @@ -102,17 +109,57 @@ static void ProcessFile(const MatrixBase &feats, for (int32 i = actual_frames_per_eg; i < frames_per_eg; i++) { // Copy the i^th row of the target matrix from the last row of the // input targets matrix - KALDI_ASSERT(t + actual_frames_per_eg - 1 == feats.NumRows() - 1); + KALDI_ASSERT(t + actual_frames_per_eg - 1 == min_size - 1); SubVector this_target_dest(targets_dest, i); SubVector this_target_src(targets, t+actual_frames_per_eg-1); this_target_dest.CopyFromVec(this_target_src); } - // push this created targets matrix into the eg - eg.io.push_back(NnetIo("output", 0, targets_dest)); + if (!deriv_weights) { + // push this created targets matrix into the eg + eg.io.push_back(NnetIo("output", 0, targets_dest)); + } else { + Vector this_deriv_weights(targets_dest.NumRows()); + int32 frames_to_copy = std::min(t + actual_frames_per_eg, deriv_weights->Dim()) - t; + this_deriv_weights.Range(0, frames_to_copy).CopyFromVec(deriv_weights->Range(t, frames_to_copy)); + if (this_deriv_weights.Sum() == 0) continue; // Ignore frames that have frame weights 0 + eg.io.push_back(NnetIo("output", this_deriv_weights, 0, targets_dest)); + } + + if (l2reg_targets) { + // add the labels. + Matrix l2reg_targets_dest(frames_per_eg, l2reg_targets->NumCols()); + for (int32 i = 0; i < actual_frames_per_eg; i++) { + // Copy the i^th row of the target matrix from the (t+i)^th row of the + // input targets matrix + SubVector this_target_dest(l2reg_targets_dest, i); + SubVector this_target_src(*l2reg_targets, t+i); + this_target_dest.CopyFromVec(this_target_src); + } + + // Copy the last frame's target to the padded frames + for (int32 i = actual_frames_per_eg; i < frames_per_eg; i++) { + // Copy the i^th row of the target matrix from the last row of the + // input targets matrix + KALDI_ASSERT(t + actual_frames_per_eg - 1 == feats.NumRows() - 1); + SubVector this_target_dest(l2reg_targets_dest, i); + SubVector this_target_src(*l2reg_targets, t+actual_frames_per_eg-1); + this_target_dest.CopyFromVec(this_target_src); + } + + if (!deriv_weights) { + eg.io.push_back(NnetIo("output-l2reg", 0, l2reg_targets_dest)); + } else { + Vector this_deriv_weights(l2reg_targets_dest.NumRows()); + int32 frames_to_copy = std::min(t + actual_frames_per_eg, deriv_weights->Dim()) - t; + this_deriv_weights.Range(0, frames_to_copy).CopyFromVec(deriv_weights->Range(t, frames_to_copy)); + if (this_deriv_weights.Sum() == 0) continue; // Ignore frames that have frame weights 0 + eg.io.push_back(NnetIo("output-l2reg", this_deriv_weights, 0, l2reg_targets_dest)); + } + } if (compress) - eg.Compress(); + eg.Compress(feats_compress_format); std::ostringstream os; os << utt_id << "-" << t; @@ -155,14 +202,20 @@ int main(int argc, char *argv[]) { bool compress = true; + int32 input_compress_format = 0, feats_compress_format = 0; int32 num_targets = -1, left_context = 0, right_context = 0, - num_frames = 1, length_tolerance = 100; + num_frames = 1, length_tolerance = 2; - std::string ivector_rspecifier; + std::string ivector_rspecifier, deriv_weights_rspecifier, + l2reg_targets_rspecifier; ParseOptions po(usage); po.Register("compress", &compress, "If true, write egs in " "compressed format."); + po.Register("compress-format", &feats_compress_format, "Format for " + "compressing all feats in general"); + po.Register("input-compress-format", &input_compress_format, "Format for " + "compressing input feats e.g. Use 2 for compressing wave"); po.Register("num-targets", &num_targets, "Number of targets for the neural network"); po.Register("left-context", &left_context, "Number of frames of left " "context the neural net requires."); @@ -174,6 +227,13 @@ int main(int argc, char *argv[]) { "features, as matrix."); po.Register("length-tolerance", &length_tolerance, "Tolerance for " "difference in num-frames between feat and ivector matrices"); + po.Register("deriv-weights-rspecifier", &deriv_weights_rspecifier, + "Per-frame weights (only binary - 0 or 1) that specifies " + "whether a frame's gradient must be backpropagated or not. " + "Not specifying this is equivalent to specifying a vector of " + "all 1s."); + po.Register("l2reg-targets-rspecifier", &l2reg_targets_rspecifier, + "Add l2 regularizer targets"); po.Read(argc, argv); @@ -194,6 +254,8 @@ int main(int argc, char *argv[]) { RandomAccessBaseFloatMatrixReader matrix_reader(matrix_rspecifier); NnetExampleWriter example_writer(examples_wspecifier); RandomAccessBaseFloatMatrixReader ivector_reader(ivector_rspecifier); + RandomAccessBaseFloatVectorReader deriv_weights_reader(deriv_weights_rspecifier); + RandomAccessBaseFloatMatrixReader l2reg_targets_reader(l2reg_targets_rspecifier); int32 num_done = 0, num_err = 0; int64 num_frames_written = 0, num_egs_written = 0; @@ -206,10 +268,10 @@ int main(int argc, char *argv[]) { num_err++; } else { const Matrix &target_matrix = matrix_reader.Value(key); - if (target_matrix.NumRows() != feats.NumRows()) { - KALDI_WARN << "Target matrix has wrong size " - << target_matrix.NumRows() - << " versus " << feats.NumRows(); + if ((target_matrix.NumRows() - feats.NumRows()) > length_tolerance) { + KALDI_WARN << "Length difference between feats " << feats.NumRows() + << " and target matrix " << target_matrix.NumRows() + << "exceeds tolerance " << length_tolerance; num_err++; continue; } @@ -226,7 +288,7 @@ int main(int argc, char *argv[]) { } } - if (ivector_feats != NULL && + if (ivector_feats && (abs(feats.NumRows() - ivector_feats->NumRows()) > length_tolerance || ivector_feats->NumRows() == 0)) { KALDI_WARN << "Length difference between feats " << feats.NumRows() @@ -235,8 +297,56 @@ int main(int argc, char *argv[]) { num_err++; continue; } - - ProcessFile(feats, ivector_feats, target_matrix, key, compress, + + const Vector *deriv_weights = NULL; + if (!deriv_weights_rspecifier.empty()) { + if (!deriv_weights_reader.HasKey(key)) { + KALDI_WARN << "No deriv weights for utterance " << key; + num_err++; + continue; + } else { + // this address will be valid until we call HasKey() or Value() + // again. + deriv_weights = &(deriv_weights_reader.Value(key)); + } + } + + if (deriv_weights && + (abs(feats.NumRows() - deriv_weights->Dim()) > length_tolerance + || deriv_weights->Dim() == 0)) { + KALDI_WARN << "Length difference between feats " << feats.NumRows() + << " and deriv weights " << deriv_weights->Dim() + << " exceeds tolerance " << length_tolerance; + num_err++; + continue; + } + + const Matrix *l2reg_target_matrix = NULL; + if (!l2reg_targets_rspecifier.empty()) { + if (!l2reg_targets_reader.HasKey(key)) { + KALDI_WARN << "No l2 regularizer targets for utterance " << key; + num_err++; + continue; + } + { + // this address will be valid until we call HasKey() or Value() + // again. + l2reg_target_matrix = &(l2reg_targets_reader.Value(key)); + + if (l2reg_target_matrix->NumRows() != feats.NumRows()) { + KALDI_WARN << "l2 regularizer target matrix has wrong size " + << l2reg_target_matrix->NumRows() + << " versus " << feats.NumRows(); + num_err++; + continue; + } + } + } + + + ProcessFile(feats, ivector_feats, deriv_weights, + l2reg_target_matrix, target_matrix, + key, compress, input_compress_format, feats_compress_format, num_targets, left_context, right_context, num_frames, &num_frames_written, &num_egs_written, &example_writer); diff --git a/src/nnet3bin/nnet3-get-egs.cc b/src/nnet3bin/nnet3-get-egs.cc index 75f264f1ceb..dbf8b636305 100644 --- a/src/nnet3bin/nnet3-get-egs.cc +++ b/src/nnet3bin/nnet3-get-egs.cc @@ -32,9 +32,12 @@ namespace nnet3 { static void ProcessFile(const MatrixBase &feats, const MatrixBase *ivector_feats, + const VectorBase *deriv_weights, const Posterior &pdf_post, const std::string &utt_id, bool compress, + int32 input_compress_format, + int32 feats_compress_format, int32 num_pdfs, int32 left_context, int32 right_context, @@ -42,16 +45,16 @@ static void ProcessFile(const MatrixBase &feats, int64 *num_frames_written, int64 *num_egs_written, NnetExampleWriter *example_writer) { - KALDI_ASSERT(feats.NumRows() == static_cast(pdf_post.size())); - - for (int32 t = 0; t < feats.NumRows(); t += frames_per_eg) { + //KALDI_ASSERT(feats.NumRows() == static_cast(pdf_post.size())); + int32 min_size = std::min(feats.NumRows(), static_cast(pdf_post.size())); + for (int32 t = 0; t < min_size; t += frames_per_eg) { // actual_frames_per_eg is the number of frames with nonzero // posteriors. At the end of the file we pad with zero posteriors // so that all examples have the same structure (prevents the need // for recompilations). int32 actual_frames_per_eg = std::min(frames_per_eg, - feats.NumRows() - t); + min_size - t); int32 tot_frames = left_context + frames_per_eg + right_context; @@ -62,7 +65,7 @@ static void ProcessFile(const MatrixBase &feats, for (int32 j = -left_context; j < frames_per_eg + right_context; j++) { int32 t2 = j + t; if (t2 < 0) t2 = 0; - if (t2 >= feats.NumRows()) t2 = feats.NumRows() - 1; + if (t2 >= min_size) t2 = min_size - 1; SubVector src(feats, t2), dest(input_frames, j + left_context); dest.CopyFromVec(src); @@ -73,9 +76,12 @@ static void ProcessFile(const MatrixBase &feats, // call the regular input "input". eg.io.push_back(NnetIo("input", - left_context, input_frames)); + + if (compress) + eg.io.back().Compress(input_compress_format); // if applicable, add the iVector feature. - if (ivector_feats != NULL) { + if (ivector_feats) { // try to get closest frame to middle of window to get // a representative iVector. int32 closest_frame = t + (actual_frames_per_eg / 2); @@ -92,10 +98,20 @@ static void ProcessFile(const MatrixBase &feats, for (int32 i = 0; i < actual_frames_per_eg; i++) labels[i] = pdf_post[t + i]; // remaining posteriors for frames are empty. - eg.io.push_back(NnetIo("output", num_pdfs, 0, labels)); + + if (!deriv_weights) { + eg.io.push_back(NnetIo("output", num_pdfs, 0, labels)); + } else { + Vector this_deriv_weights(frames_per_eg); + int32 frames_to_copy = std::min(t + actual_frames_per_eg, deriv_weights->Dim()) - t; + this_deriv_weights.Range(0, frames_to_copy).CopyFromVec(deriv_weights->Range(t, frames_to_copy)); + if (this_deriv_weights.Sum() == 0) continue; // Ignore frames that have frame weights 0 + eg.io.push_back(NnetIo("output", this_deriv_weights, num_pdfs, 0, labels)); + } + if (compress) - eg.Compress(); + eg.Compress(feats_compress_format); std::ostringstream os; os << utt_id << "-" << t; @@ -140,14 +156,19 @@ int main(int argc, char *argv[]) { bool compress = true; + int32 input_compress_format = 0, feats_compress_format = 0; int32 num_pdfs = -1, left_context = 0, right_context = 0, num_frames = 1, length_tolerance = 100; - std::string ivector_rspecifier; + std::string ivector_rspecifier, deriv_weights_rspecifier; ParseOptions po(usage); po.Register("compress", &compress, "If true, write egs in " "compressed format."); + po.Register("compress-format", &feats_compress_format, "Format for " + "compressing all feats in general"); + po.Register("input-compress-format", &input_compress_format, "Format for " + "compressing input feats e.g. Use 2 for compressing wave"); po.Register("num-pdfs", &num_pdfs, "Number of pdfs in the acoustic " "model"); po.Register("left-context", &left_context, "Number of frames of left " @@ -160,6 +181,11 @@ int main(int argc, char *argv[]) { "features, as a matrix."); po.Register("length-tolerance", &length_tolerance, "Tolerance for " "difference in num-frames between feat and ivector matrices"); + po.Register("deriv-weights-rspecifier", &deriv_weights_rspecifier, + "Per-frame weights (only binary - 0 or 1) that specifies " + "whether a frame's gradient must be backpropagated or not. " + "Not specifying this is equivalent to specifying a vector of " + "all 1s."); po.Read(argc, argv); @@ -181,6 +207,7 @@ int main(int argc, char *argv[]) { RandomAccessPosteriorReader pdf_post_reader(pdf_post_rspecifier); NnetExampleWriter example_writer(examples_wspecifier); RandomAccessBaseFloatMatrixReader ivector_reader(ivector_rspecifier); + RandomAccessBaseFloatVectorReader deriv_weights_reader(deriv_weights_rspecifier); int32 num_done = 0, num_err = 0; int64 num_frames_written = 0, num_egs_written = 0; @@ -192,13 +219,17 @@ int main(int argc, char *argv[]) { KALDI_WARN << "No pdf-level posterior for key " << key; num_err++; } else { - const Posterior &pdf_post = pdf_post_reader.Value(key); - if (pdf_post.size() != feats.NumRows()) { + Posterior pdf_post = pdf_post_reader.Value(key); + if (abs(static_cast(pdf_post.size()) - feats.NumRows()) > length_tolerance + || pdf_post.size() < feats.NumRows()) { KALDI_WARN << "Posterior has wrong size " << pdf_post.size() << " versus " << feats.NumRows(); num_err++; continue; } + while (static_cast(pdf_post.size()) > feats.NumRows()) { + pdf_post.pop_back(); + } const Matrix *ivector_feats = NULL; if (!ivector_rspecifier.empty()) { if (!ivector_reader.HasKey(key)) { @@ -212,7 +243,7 @@ int main(int argc, char *argv[]) { } } - if (ivector_feats != NULL && + if (ivector_feats && (abs(feats.NumRows() - ivector_feats->NumRows()) > length_tolerance || ivector_feats->NumRows() == 0)) { KALDI_WARN << "Length difference between feats " << feats.NumRows() @@ -221,8 +252,33 @@ int main(int argc, char *argv[]) { num_err++; continue; } + + const Vector *deriv_weights = NULL; + if (!deriv_weights_rspecifier.empty()) { + if (!deriv_weights_reader.HasKey(key)) { + KALDI_WARN << "No deriv weights for utterance " << key; + num_err++; + continue; + } else { + // this address will be valid until we call HasKey() or Value() + // again. + deriv_weights = &(deriv_weights_reader.Value(key)); + } + } + + if (deriv_weights && + (abs(feats.NumRows() - deriv_weights->Dim()) > length_tolerance + || deriv_weights->Dim() == 0)) { + KALDI_WARN << "Length difference between feats " << feats.NumRows() + << " and deriv weights " << deriv_weights->Dim() + << " exceeds tolerance " << length_tolerance; + num_err++; + continue; + } + - ProcessFile(feats, ivector_feats, pdf_post, key, compress, + ProcessFile(feats, ivector_feats, deriv_weights, pdf_post, + key, compress, input_compress_format, feats_compress_format, num_pdfs, left_context, right_context, num_frames, &num_frames_written, &num_egs_written, &example_writer); From 94367327d1766774c7a77e94324e6393cc905aa3 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 7 Dec 2016 00:22:31 -0500 Subject: [PATCH 042/530] asr_diarization: Log and Exp component --- src/nnet3/nnet-component-itf.cc | 9 +- src/nnet3/nnet-component-itf.h | 8 ++ src/nnet3/nnet-simple-component.cc | 166 +++++++++++++++++++++++++++-- src/nnet3/nnet-simple-component.h | 67 ++++++++++++ 4 files changed, 243 insertions(+), 7 deletions(-) diff --git a/src/nnet3/nnet-component-itf.cc b/src/nnet3/nnet-component-itf.cc index f94843b725e..695dbb6de56 100644 --- a/src/nnet3/nnet-component-itf.cc +++ b/src/nnet3/nnet-component-itf.cc @@ -89,6 +89,10 @@ Component* Component::NewComponentOfType(const std::string &component_type) { ans = new SoftmaxComponent(); } else if (component_type == "LogSoftmaxComponent") { ans = new LogSoftmaxComponent(); + } else if (component_type == "LogComponent") { + ans = new LogComponent(); + } else if (component_type == "ExpComponent") { + ans = new ExpComponent(); } else if (component_type == "RectifiedLinearComponent") { ans = new RectifiedLinearComponent(); } else if (component_type == "NormalizeComponent") { @@ -310,11 +314,14 @@ std::string NonlinearComponent::Info() const { std::stringstream stream; if (InputDim() == OutputDim()) { stream << Type() << ", dim=" << InputDim(); - } else { + } else if (OutputDim() - InputDim() == 1) { // Note: this is a very special case tailored for class NormalizeComponent. stream << Type() << ", input-dim=" << InputDim() << ", output-dim=" << OutputDim() << ", add-log-stddev=true"; + } else { + stream << Type() << ", input-dim=" << InputDim() + << ", output-dim=" << OutputDim(); } if (self_repair_lower_threshold_ != BaseFloat(kUnsetThreshold)) diff --git a/src/nnet3/nnet-component-itf.h b/src/nnet3/nnet-component-itf.h index e5974b46f46..3013c485ea4 100644 --- a/src/nnet3/nnet-component-itf.h +++ b/src/nnet3/nnet-component-itf.h @@ -403,6 +403,11 @@ class UpdatableComponent: public Component { /// Sets the learning rate directly, bypassing learning_rate_factor_. virtual void SetActualLearningRate(BaseFloat lrate) { learning_rate_ = lrate; } + /// Sets the learning rate factor + virtual void SetLearningRateFactor(BaseFloat lrate_factor) { + learning_rate_factor_ = lrate_factor; + } + /// Gets the learning rate of gradient descent. Note: if you call /// SetLearningRate(x), and learning_rate_factor_ != 1.0, /// a different value than x will returned. @@ -413,6 +418,9 @@ class UpdatableComponent: public Component { /// NnetTrainer by querying the max-changes for each component. /// See NnetTrainer::UpdateParamsWithMaxChange() in nnet3/nnet-training.cc. BaseFloat MaxChange() const { return max_change_; } + + /// Gets the learning rate factor + BaseFloat LearningRateFactor() const { return learning_rate_factor_; } virtual std::string Info() const; diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index 58908a0fe09..aa56dce1f23 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -2517,6 +2517,26 @@ void ConstantFunctionComponent::UnVectorize(const VectorBase ¶ms) output_.CopyFromVec(params); } +void ExpComponent::Propagate(const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in, + CuMatrixBase *out) const { + // Applied exp function + out->CopyFromMat(in); + out->ApplyExp(); +} + +void ExpComponent::Backprop(const std::string &debug_info, + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &,//in_value, + const CuMatrixBase &out_value, + const CuMatrixBase &out_deriv, + Component *to_update, + CuMatrixBase *in_deriv) const { + if (in_deriv != NULL) { + in_deriv->CopyFromMat(out_value); + in_deriv->MulElements(out_deriv); + } +} NaturalGradientAffineComponent::NaturalGradientAffineComponent(): max_change_per_sample_(0.0), @@ -2568,10 +2588,15 @@ void NaturalGradientAffineComponent::Read(std::istream &is, bool binary) { ReadBasicType(is, binary, &max_change_scale_stats_); ReadToken(is, binary, &token); } - if (token != "" && - token != "") - KALDI_ERR << "Expected or " - << ", got " << token; + + std::ostringstream ostr_beg, ostr_end; + ostr_beg << "<" << Type() << ">"; // e.g. "" + ostr_end << ""; // e.g. "" + + if (token != ostr_end.str() && + token != ostr_beg.str()) + KALDI_ERR << "Expected " << ostr_beg.str() << " or " + << ostr_end.str() << ", got " << token; SetNaturalGradientConfigs(); } @@ -2720,7 +2745,10 @@ void NaturalGradientAffineComponent::Write(std::ostream &os, WriteBasicType(os, binary, active_scaling_count_); WriteToken(os, binary, ""); WriteBasicType(os, binary, max_change_scale_stats_); - WriteToken(os, binary, ""); + + std::ostringstream ostr_end; + ostr_end << ""; // e.g. "" + WriteToken(os, binary, ostr_end.str()); } std::string NaturalGradientAffineComponent::Info() const { @@ -3095,6 +3123,126 @@ void SoftmaxComponent::StoreStats(const CuMatrixBase &out_value) { StoreStatsInternal(out_value, NULL); } +std::string LogComponent::Info() const { + std::stringstream stream; + stream << NonlinearComponent::Info() + << ", log-floor=" << log_floor_; + return stream.str(); +} + +void LogComponent::InitFromConfig(ConfigLine *cfl) { + cfl->GetValue("log-floor", &log_floor_); + NonlinearComponent::InitFromConfig(cfl); +} + +void LogComponent::Propagate(const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in, + CuMatrixBase *out) const { + // Apllies log function (x >= epsi ? log(x) : log(epsi)). + out->CopyFromMat(in); + out->ApplyFloor(log_floor_); + out->ApplyLog(); +} + +void LogComponent::Backprop(const std::string &debug_info, + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in_value, + const CuMatrixBase &out_value, + const CuMatrixBase &out_deriv, + Component *to_update, + CuMatrixBase *in_deriv) const { + if (in_deriv != NULL) { + CuMatrix divided_in_value(in_value), floored_in_value(in_value); + divided_in_value.Set(1.0); + floored_in_value.CopyFromMat(in_value); + floored_in_value.ApplyFloor(log_floor_); // (x > epsi ? x : epsi) + + divided_in_value.DivElements(floored_in_value); // (x > epsi ? 1/x : 1/epsi) + in_deriv->CopyFromMat(in_value); + in_deriv->Add(-1.0 * log_floor_); // (x - epsi) + in_deriv->ApplyHeaviside(); // (x > epsi ? 1 : 0) + in_deriv->MulElements(divided_in_value); // (dy/dx: x > epsi ? 1/x : 0) + in_deriv->MulElements(out_deriv); // dF/dx = dF/dy * dy/dx + } +} + +void LogComponent::Read(std::istream &is, bool binary) { + std::ostringstream ostr_beg, ostr_end; + ostr_beg << "<" << Type() << ">"; // e.g. "" + ostr_end << ""; // e.g. "" + ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), ""); + ReadBasicType(is, binary, &dim_); // Read dimension. + ExpectToken(is, binary, ""); + value_sum_.Read(is, binary); + ExpectToken(is, binary, ""); + deriv_sum_.Read(is, binary); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &count_); + value_sum_.Scale(count_); + deriv_sum_.Scale(count_); + + std::string token; + ReadToken(is, binary, &token); + if (token == "") { + ReadBasicType(is, binary, &self_repair_lower_threshold_); + ReadToken(is, binary, &token); + } + if (token == "") { + ReadBasicType(is, binary, &self_repair_upper_threshold_); + ReadToken(is, binary, &token); + } + if (token == "") { + ReadBasicType(is, binary, &self_repair_scale_); + ReadToken(is, binary, &token); + } + if (token == "") { + ReadBasicType(is, binary, &log_floor_); + ReadToken(is, binary, &token); + } + if (token != ostr_end.str()) { + KALDI_ERR << "Expected token " << ostr_end.str() + << ", got " << token; + } +} + +void LogComponent::Write(std::ostream &os, bool binary) const { + std::ostringstream ostr_beg, ostr_end; + ostr_beg << "<" << Type() << ">"; // e.g. "" + ostr_end << ""; // e.g. "" + WriteToken(os, binary, ostr_beg.str()); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, dim_); + // Write the values and derivatives in a count-normalized way, for + // greater readability in text form. + WriteToken(os, binary, ""); + Vector temp(value_sum_); + if (count_ != 0.0) temp.Scale(1.0 / count_); + temp.Write(os, binary); + WriteToken(os, binary, ""); + + temp.Resize(deriv_sum_.Dim(), kUndefined); + temp.CopyFromVec(deriv_sum_); + if (count_ != 0.0) temp.Scale(1.0 / count_); + temp.Write(os, binary); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, count_); + if (self_repair_lower_threshold_ != kUnsetThreshold) { + WriteToken(os, binary, ""); + WriteBasicType(os, binary, self_repair_lower_threshold_); + } + if (self_repair_upper_threshold_ != kUnsetThreshold) { + WriteToken(os, binary, ""); + WriteBasicType(os, binary, self_repair_upper_threshold_); + } + if (self_repair_scale_ != 0.0) { + WriteToken(os, binary, ""); + WriteBasicType(os, binary, self_repair_scale_); + } + WriteToken(os, binary, ""); + WriteBasicType(os, binary, log_floor_); + WriteToken(os, binary, ostr_end.str()); +} + void LogSoftmaxComponent::Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase &in, @@ -3135,12 +3283,18 @@ void FixedScaleComponent::InitFromConfig(ConfigLine *cfl) { Init(vec); } else { int32 dim; + BaseFloat scale = 1.0; + bool scale_ok = cfl->GetValue("scale", &scale); if (!cfl->GetValue("dim", &dim) || cfl->HasUnusedValues()) KALDI_ERR << "Invalid initializer for layer of type " << Type() << ": \"" << cfl->WholeLine() << "\""; KALDI_ASSERT(dim > 0); CuVector vec(dim); - vec.SetRandn(); + if (scale_ok) { + vec.Set(scale); + } else { + vec.SetRandn(); + } Init(vec); } } diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h index f09a989759a..95a32bbe7a3 100644 --- a/src/nnet3/nnet-simple-component.h +++ b/src/nnet3/nnet-simple-component.h @@ -697,6 +697,71 @@ class LogSoftmaxComponent: public NonlinearComponent { LogSoftmaxComponent &operator = (const LogSoftmaxComponent &other); // Disallow. }; +// The LogComponent outputs the log of input values as y = Log(max(x, epsi)) +class LogComponent: public NonlinearComponent { + public: + explicit LogComponent(const LogComponent &other): + NonlinearComponent(other), log_floor_(other.log_floor_) { } + LogComponent(): log_floor_(1e-20) { } + virtual std::string Type() const { return "LogComponent"; } + virtual int32 Properties() const { + return kSimpleComponent|kBackpropNeedsInput|kStoresStats; + } + + virtual std::string Info() const; + + virtual void InitFromConfig(ConfigLine *cfl); + + virtual void Propagate(const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in, + CuMatrixBase *out) const; + virtual void Backprop(const std::string &debug_info, + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in_value, + const CuMatrixBase &out_value, + const CuMatrixBase &out_deriv, + Component *to_update, + CuMatrixBase *in_deriv) const; + + virtual Component* Copy() const { return new LogComponent(*this); } + + virtual void Read(std::istream &is, bool binary); + + virtual void Write(std::ostream &os, bool binary) const; + + private: + LogComponent &operator = (const LogComponent &other); // Disallow. + BaseFloat log_floor_; +}; + + +// The ExpComponent outputs the exp of input values as y = Exp(x) +class ExpComponent: public NonlinearComponent { + public: + explicit ExpComponent(const ExpComponent &other): + NonlinearComponent(other) { } + ExpComponent() { } + virtual std::string Type() const { return "ExpComponent"; } + virtual int32 Properties() const { + return kSimpleComponent|kBackpropNeedsOutput|kStoresStats; + } + virtual void Propagate(const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in, + CuMatrixBase *out) const; + virtual void Backprop(const std::string &debug_info, + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &, + const CuMatrixBase &out_value, + const CuMatrixBase &, + Component *to_update, + CuMatrixBase *in_deriv) const; + + virtual Component* Copy() const { return new ExpComponent(*this); } + private: + ExpComponent &operator = (const ExpComponent &other); // Disallow. +}; + + /// Keywords: natural gradient descent, NG-SGD, naturalgradient. For /// the top-level of the natural gradient code look here, and also in /// nnet-precondition-online.h. @@ -826,6 +891,8 @@ class FixedAffineComponent: public Component { // Function to provide access to linear_params_. const CuMatrix &LinearParams() const { return linear_params_; } + const CuVector &BiasParams() const { return bias_params_; } + protected: friend class AffineComponent; CuMatrix linear_params_; From 828544e0cf681c1e755a8162a487cfc314308eea Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 16:29:51 -0500 Subject: [PATCH 043/530] asr_diarization: Adding ScaleGradientComponent --- src/nnet3/nnet-component-itf.cc | 2 + src/nnet3/nnet-component-test.cc | 5 ++ src/nnet3/nnet-simple-component.cc | 81 ++++++++++++++++++++++++++++++ src/nnet3/nnet-simple-component.h | 40 +++++++++++++++ src/nnet3/nnet-test-utils.cc | 20 ++++++-- 5 files changed, 145 insertions(+), 3 deletions(-) diff --git a/src/nnet3/nnet-component-itf.cc b/src/nnet3/nnet-component-itf.cc index 695dbb6de56..389b9876b3c 100644 --- a/src/nnet3/nnet-component-itf.cc +++ b/src/nnet3/nnet-component-itf.cc @@ -123,6 +123,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) { ans = new NoOpComponent(); } else if (component_type == "ClipGradientComponent") { ans = new ClipGradientComponent(); + } else if (component_type == "ScaleGradientComponent") { + ans = new ScaleGradientComponent(); } else if (component_type == "ElementwiseProductComponent") { ans = new ElementwiseProductComponent(); } else if (component_type == "ConvolutionComponent") { diff --git a/src/nnet3/nnet-component-test.cc b/src/nnet3/nnet-component-test.cc index 3cc6af1c70d..a2e5e23436c 100644 --- a/src/nnet3/nnet-component-test.cc +++ b/src/nnet3/nnet-component-test.cc @@ -379,6 +379,11 @@ bool TestSimpleComponentDataDerivative(const Component &c, KALDI_LOG << "Accepting deriv differences since " << "it is ClipGradientComponent."; return true; + } + else if (c.Type() == "ScaleGradientComponent") { + KALDI_LOG << "Accepting deriv differences since " + << "it is ScaleGradientComponent."; + return true; } return ans; } diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index aa56dce1f23..fcfd4b9affa 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -922,6 +922,87 @@ void ClipGradientComponent::Add(BaseFloat alpha, const Component &other_in) { num_clipped_ += alpha * other->num_clipped_; } + +void ScaleGradientComponent::Init(const CuVectorBase &scales) { + KALDI_ASSERT(scales.Dim() != 0); + scales_ = scales; +} + + +void ScaleGradientComponent::InitFromConfig(ConfigLine *cfl) { + std::string filename; + // Accepts "scales" config (for filename) or "dim" -> random init, for testing. + if (cfl->GetValue("scales", &filename)) { + if (cfl->HasUnusedValues()) + KALDI_ERR << "Invalid initializer for layer of type " + << Type() << ": \"" << cfl->WholeLine() << "\""; + CuVector vec; + ReadKaldiObject(filename, &vec); + Init(vec); + } else { + int32 dim; + BaseFloat scale = 1.0; + bool scale_ok = cfl->GetValue("scale", &scale); + if (!cfl->GetValue("dim", &dim) || cfl->HasUnusedValues()) + KALDI_ERR << "Invalid initializer for layer of type " + << Type() << ": \"" << cfl->WholeLine() << "\""; + KALDI_ASSERT(dim > 0); + CuVector vec(dim); + if (scale_ok) { + vec.Set(scale); + } else { + vec.SetRandn(); + } + Init(vec); + } +} + + +std::string ScaleGradientComponent::Info() const { + std::ostringstream stream; + stream << Component::Info(); + PrintParameterStats(stream, "scales", scales_, true); + return stream.str(); +} + +void ScaleGradientComponent::Propagate(const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in, + CuMatrixBase *out) const { + out->CopyFromMat(in); // does nothing if same matrix. +} + +void ScaleGradientComponent::Backprop(const std::string &debug_info, + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &, // in_value + const CuMatrixBase &, // out_value + const CuMatrixBase &out_deriv, + Component *, // to_update + CuMatrixBase *in_deriv) const { + in_deriv->CopyFromMat(out_deriv); // does nothing if same memory. + in_deriv->MulColsVec(scales_); +} + +Component* ScaleGradientComponent::Copy() const { + ScaleGradientComponent *ans = new ScaleGradientComponent(); + ans->scales_ = scales_; + return ans; +} + + +void ScaleGradientComponent::Write(std::ostream &os, bool binary) const { + WriteToken(os, binary, ""); + WriteToken(os, binary, ""); + scales_.Write(os, binary); + WriteToken(os, binary, ""); +} + +void ScaleGradientComponent::Read(std::istream &is, bool binary) { + ExpectOneOrTwoTokens(is, binary, "", ""); + scales_.Read(is, binary); + ExpectToken(is, binary, ""); +} + + void TanhComponent::Propagate(const ComponentPrecomputedIndexes *indexes, const CuMatrixBase &in, CuMatrixBase *out) const { diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h index 95a32bbe7a3..ff9ec5fd26b 100644 --- a/src/nnet3/nnet-simple-component.h +++ b/src/nnet3/nnet-simple-component.h @@ -1196,6 +1196,46 @@ class ClipGradientComponent: public Component { }; +// Applied a per-element scale only on the gradient during back propagation +// Duplicates the input during forward propagation +class ScaleGradientComponent : public Component { + public: + ScaleGradientComponent() { } + virtual std::string Type() const { return "ScaleGradientComponent"; } + virtual std::string Info() const; + virtual int32 Properties() const { + return kSimpleComponent|kLinearInInput|kPropagateInPlace|kBackpropInPlace; + } + + void Init(const CuVectorBase &scales); + + // The ConfigLine cfl contains only the option scales=, + // where the string is the filename of a Kaldi-format matrix to read. + virtual void InitFromConfig(ConfigLine *cfl); + + virtual int32 InputDim() const { return scales_.Dim(); } + virtual int32 OutputDim() const { return scales_.Dim(); } + + virtual void Propagate(const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in, + CuMatrixBase *out) const; + virtual void Backprop(const std::string &debug_info, + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &, // in_value + const CuMatrixBase &, // out_value + const CuMatrixBase &out_deriv, + Component *, // to_update + CuMatrixBase *in_deriv) const; + virtual Component* Copy() const; + virtual void Read(std::istream &is, bool binary); + virtual void Write(std::ostream &os, bool binary) const; + + protected: + CuVector scales_; + KALDI_DISALLOW_COPY_AND_ASSIGN(ScaleGradientComponent); +}; + + /** PermuteComponent changes the order of the columns (i.e. the feature or activation dimensions). Output dimension i is mapped to input dimension column_map_[i], so it's like doing: diff --git a/src/nnet3/nnet-test-utils.cc b/src/nnet3/nnet-test-utils.cc index 170ea51ca8f..da519fa1cd3 100644 --- a/src/nnet3/nnet-test-utils.cc +++ b/src/nnet3/nnet-test-utils.cc @@ -1104,7 +1104,7 @@ void ComputeExampleComputationRequestSimple( static void GenerateRandomComponentConfig(std::string *component_type, std::string *config) { - int32 n = RandInt(0, 30); + int32 n = RandInt(0, 33); BaseFloat learning_rate = 0.001 * RandInt(1, 3); std::ostringstream os; @@ -1401,8 +1401,7 @@ static void GenerateRandomComponentConfig(std::string *component_type, *component_type = "DropoutComponent"; os << "dim=" << RandInt(1, 200) << " dropout-proportion=" << RandUniform(); - break; - } + } case 30: { *component_type = "LstmNonlinearityComponent"; // set self-repair scale to zero so the derivative tests will pass. @@ -1410,6 +1409,21 @@ static void GenerateRandomComponentConfig(std::string *component_type, << " self-repair-scale=0.0"; break; } + case 31: { + *component_type = "LogComponent"; + os << "dim=" << RandInt(1, 50); + break; + } + case 32: { + *component_type = "ExpComponent"; + os << "dim=" << RandInt(1, 50); + break; + } + case 33: { + *component_type = "ScaleGradientComponent"; + os << "dim=" << RandInt(1, 100); + break; + } default: KALDI_ERR << "Error generating random component"; } From b80cf2456cc40223fe8db4b6a98923bd7b685dbd Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 21:44:34 -0500 Subject: [PATCH 044/530] asr_diarization: Adding AddGradientSacaleLayer to components.py --- egs/wsj/s5/steps/nnet3/components.py | 36 +++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/nnet3/components.py b/egs/wsj/s5/steps/nnet3/components.py index 3fb92117d78..82566d2e37d 100644 --- a/egs/wsj/s5/steps/nnet3/components.py +++ b/egs/wsj/s5/steps/nnet3/components.py @@ -55,6 +55,35 @@ def AddNoOpLayer(config_lines, name, input): return {'descriptor': '{0}_noop'.format(name), 'dimension': input['dimension']} +def AddGradientScaleLayer(config_lines, name, input, scale = 1.0, scales_vec = None): + components = config_lines['components'] + component_nodes = config_lines['component-nodes'] + + if scales_vec is None: + components.append('component name={0}_gradient_scale type=ScaleGradientComponent dim={1} scale={2}'.format(name, input['dimension'], scale)) + else: + components.append('component name={0}_gradient_scale type=ScaleGradientComponent scales={2}'.format(name, scales_vec)) + + component_nodes.append('component-node name={0}_gradient_scale component={0}_gradient_scale input={1}'.format(name, input['descriptor'])) + + return {'descriptor': '{0}_gradient_scale'.format(name), + 'dimension': input['dimension']} + +def AddFixedScaleLayer(config_lines, name, input, + scale = 1.0, scales_vec = None): + components = config_lines['components'] + component_nodes = config_lines['component-nodes'] + + if scales_vec is None: + components.append('component name={0}-fixed-scale type=FixedScaleComponent dim={1} scale={2}'.format(name, input['dimension'], scale)) + else: + components.append('component name={0}-fixed-scale type=FixedScaleComponent scales={2}'.format(name, scales_vec)) + + component_nodes.append('component-node name={0}-fixed-scale component={0}-fixed-scale input={1}'.format(name, input['descriptor'])) + + return {'descriptor': '{0}-fixed-scale'.format(name), + 'dimension': input['dimension']} + def AddLdaLayer(config_lines, name, input, lda_file): return AddFixedAffineLayer(config_lines, name, input, lda_file) @@ -257,7 +286,9 @@ def AddFinalLayer(config_lines, input, output_dim, include_log_softmax = True, add_final_sigmoid = False, name_affix = None, - objective_type = "linear"): + objective_type = "linear", + objective_scale = 1.0, + objective_scales_vec = None): components = config_lines['components'] component_nodes = config_lines['component-nodes'] @@ -283,6 +314,9 @@ def AddFinalLayer(config_lines, input, output_dim, prev_layer_output = AddSigmoidLayer(config_lines, final_node_prefix, prev_layer_output) # we use the same name_affix as a prefix in for affine/scale nodes but as a # suffix for output node + if (objective_scale != 1.0 or objective_scales_vec is not None): + prev_layer_output = AddGradientScaleLayer(config_lines, final_node_prefix, prev_layer_output, objective_scale, objective_scales_vec) + AddOutputLayer(config_lines, prev_layer_output, label_delay, suffix = name_affix, objective_type = objective_type) def AddLstmLayer(config_lines, From 9ef542248d88c30a99d1df2d98618a6e071bec82 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 21:40:35 -0500 Subject: [PATCH 045/530] asr_diarization: Adding get_egs changes into get_egs_targets --- egs/wsj/s5/steps/nnet3/get_egs_targets.sh | 111 ++++++++++++++++------ 1 file changed, 83 insertions(+), 28 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh index 7fbc24858b5..cfecf88df38 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh @@ -24,6 +24,8 @@ feat_type=raw # set it to 'lda' to use LDA features. target_type=sparse # dense to have dense targets, # sparse to have posteriors targets num_targets= # required for target-type=sparse with raw nnet +deriv_weights_scp= +l2_regularizer_targets= frames_per_eg=8 # number of frames of labels per example. more->less disk space and # less time preparing egs, but more I/O during training. # note: the script may reduce this if reduce_frames_per_eg is true. @@ -44,6 +46,12 @@ reduce_frames_per_eg=true # If true, this script may reduce the frames_per_eg # equal to the user-specified value. num_utts_subset=300 # number of utterances in validation and training # subsets used for shrinkage and diagnostics. +num_utts_subset_valid= # number of utterances in validation + # subsets used for shrinkage and diagnostics + # if provided, overrides num-utts-subset +num_utts_subset_train= # number of utterances in training + # subsets used for shrinkage and diagnostics. + # if provided, overrides num-utts-subset num_valid_frames_combine=0 # #valid frames for combination weights at the very end. num_train_frames_combine=10000 # # train frames for the above. num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs @@ -59,6 +67,7 @@ stage=0 nj=6 # This should be set to the maximum number of jobs you are # comfortable to run in parallel; you can increase it if your disk # speed is greater and you have more machines. +srand=0 # rand seed for nnet3-copy-egs and nnet3-shuffle-egs online_ivector_dir= # can be used if we are including speaker information as iVectors. cmvn_opts= # can be used for specifying CMVN options, if feature type is not lda (if lda, # it doesn't make sense to use different options than were used as input to the @@ -111,9 +120,18 @@ utils/split_data.sh $data $nj mkdir -p $dir/log $dir/info +[ -z "$num_utts_subset_valid" ] && num_utts_subset_valid=$num_utts_subset +[ -z "$num_utts_subset_train" ] && num_utts_subset_train=$num_utts_subset + +num_utts=$(cat $data/utt2spk | wc -l) +if ! [ $num_utts -gt $[$num_utts_subset_valid*4] ]; then + echo "$0: number of utterances $num_utts in your training data is too small versus --num-utts-subset=$num_utts_subset" + echo "... you probably have so little data that it doesn't make sense to train a neural net." + exit 1 +fi # Get list of validation utterances. -awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset | sort \ +awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset_valid | sort \ > $dir/valid_uttlist || exit 1; if [ -f $data/utt2uniq ]; then # this matters if you use data augmentation. @@ -128,7 +146,7 @@ if [ -f $data/utt2uniq ]; then # this matters if you use data augmentation. fi awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \ - utils/shuffle_list.pl | head -$num_utts_subset | sort > $dir/train_subset_uttlist || exit 1; + utils/shuffle_list.pl | head -$num_utts_subset_train > $dir/train_subset_uttlist || exit 1; if [ ! -z "$transform_dir" ] && [ -f $transform_dir/trans.1 ] && [ $feat_type != "raw" ]; then echo "$0: using transforms from $transform_dir" @@ -145,15 +163,33 @@ if [ -f $transform_dir/raw_trans.1 ] && [ $feat_type == "raw" ]; then fi fi +nj_subset=$nj +if [ $nj_subset -gt `cat $dir/train_subset_uttlist | wc -l` ]; then + nj_subset=`cat $dir/train_subset_uttlist | wc -l` +fi + +if [ $nj_subset -gt `cat $dir/valid_uttlist | wc -l` ]; then + nj_subset=`cat $dir/valid_uttlist | wc -l` +fi + +valid_uttlist_all= +train_subset_uttlist_all= +for n in `seq $nj_subset`; do + valid_uttlist_all="$valid_uttlist_all $dir/valid_uttlist.$n" + train_subset_uttlist_all="$train_subset_uttlist_all $dir/train_subset_uttlist.$n" +done + +utils/split_scp.pl $dir/valid_uttlist $valid_uttlist_all +utils/split_scp.pl $dir/train_subset_uttlist $train_subset_uttlist_all ## Set up features. echo "$0: feature type is $feat_type" case $feat_type in raw) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- |" - valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |" - train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |" + valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist.JOB $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |" + train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist.JOB $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |" echo $cmvn_opts >$dir/cmvn_opts # caution: the top-level nnet training script should copy this to its own dir now. ;; lda) @@ -164,8 +200,8 @@ case $feat_type in echo "You cannot supply --cmvn-opts option if feature type is LDA." && exit 1; cmvn_opts=$(cat $dir/cmvn_opts) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" - valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" - train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist.JOB $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist.JOB $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" ;; *) echo "$0: invalid feature type --feat-type '$feat_type'" && exit 1; esac @@ -182,8 +218,8 @@ if [ ! -z "$online_ivector_dir" ]; then ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" - valid_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" - train_subset_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" + valid_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist.JOB $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" + train_subset_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist.JOB $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" else echo 0 >$dir/info/ivector_dim fi @@ -255,9 +291,13 @@ fi egs_opts="--left-context=$left_context --right-context=$right_context --compress=$compress" +[ ! -z "$deriv_weights_scp" ] && egs_opts="$egs_opts --deriv-weights-rspecifier=scp:$deriv_weights_scp" +[ ! -z "$l2_regularizer_targets" ] && egs_opts="$egs_opts --l2reg-targets-rspecifier=scp:$l2_regularizer_targets" + [ -z $valid_left_context ] && valid_left_context=$left_context; [ -z $valid_right_context ] && valid_right_context=$right_context; valid_egs_opts="--left-context=$valid_left_context --right-context=$valid_right_context --compress=$compress" +[ ! -z "$deriv_weights_scp" ] && valid_egs_opts="$valid_egs_opts --deriv-weights-rspecifier=scp:$deriv_weights_scp" echo $left_context > $dir/info/left_context echo $right_context > $dir/info/right_context @@ -281,15 +321,15 @@ case $target_type in "dense") get_egs_program="nnet3-get-egs-dense-targets --num-targets=$num_targets" - targets="ark:utils/filter_scp.pl --exclude $dir/valid_uttlist $targets_scp_split | copy-feats scp:- ark:- |" - valid_targets="ark:utils/filter_scp.pl $dir/valid_uttlist $targets_scp | copy-feats scp:- ark:- |" - train_subset_targets="ark:utils/filter_scp.pl $dir/train_subset_uttlist $targets_scp | copy-feats scp:- ark:- |" + targets="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $targets_scp_split | copy-feats scp:- ark:- |" + valid_targets="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist.JOB $targets_scp | copy-feats scp:- ark:- |" + train_subset_targets="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist.JOB $targets_scp | copy-feats scp:- ark:- |" ;; "sparse") get_egs_program="nnet3-get-egs --num-pdfs=$num_targets" - targets="ark:utils/filter_scp.pl --exclude $dir/valid_uttlist $targets_scp_split | ali-to-post scp:- ark:- |" - valid_targets="ark:utils/filter_scp.pl $dir/valid_uttlist $targets_scp | ali-to-post scp:- ark:- |" - train_subset_targets="ark:utils/filter_scp.pl $dir/train_subset_uttlist $targets_scp | ali-to-post scp:- ark:- |" + targets="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $targets_scp_split | ali-to-post scp:- ark:- |" + valid_targets="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist.JOB $targets_scp | ali-to-post scp:- ark:- |" + train_subset_targets="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist.JOB $targets_scp | ali-to-post scp:- ark:- |" ;; default) echo "$0: Unknown --target-type $target_type. Choices are dense and sparse" @@ -299,31 +339,43 @@ esac if [ $stage -le 3 ]; then echo "$0: Getting validation and training subset examples." rm -f $dir/.error 2>/dev/null - $cmd $dir/log/create_valid_subset.log \ + $cmd JOB=1:$nj_subset $dir/log/create_valid_subset.JOB.log \ $get_egs_program \ $valid_ivector_opt $valid_egs_opts "$valid_feats" \ "$valid_targets" \ - "ark:$dir/valid_all.egs" || touch $dir/.error & - $cmd $dir/log/create_train_subset.log \ + "ark:$dir/valid_all.JOB.egs" || touch $dir/.error & + $cmd JOB=1:$nj_subset $dir/log/create_train_subset.JOB.log \ $get_egs_program \ $train_subset_ivector_opt $valid_egs_opts "$train_subset_feats" \ "$train_subset_targets" \ - "ark:$dir/train_subset_all.egs" || touch $dir/.error & + "ark:$dir/train_subset_all.JOB.egs" || touch $dir/.error & wait; + + valid_egs_all= + train_subset_egs_all= + for n in `seq $nj_subset`; do + valid_egs_all="$valid_egs_all $dir/valid_all.$n.egs" + train_subset_egs_all="$train_subset_egs_all $dir/train_subset_all.$n.egs" + done + [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1 echo "... Getting subsets of validation examples for diagnostics and combination." $cmd $dir/log/create_valid_subset_combine.log \ - nnet3-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \ + cat $valid_egs_all \| \ + nnet3-subset-egs --n=$num_valid_frames_combine ark:- \ ark:$dir/valid_combine.egs || touch $dir/.error & $cmd $dir/log/create_valid_subset_diagnostic.log \ - nnet3-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \ + cat $valid_egs_all \| \ + nnet3-subset-egs --n=$num_frames_diagnostic ark:- \ ark:$dir/valid_diagnostic.egs || touch $dir/.error & $cmd $dir/log/create_train_subset_combine.log \ - nnet3-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \ + cat $train_subset_egs_all \| \ + nnet3-subset-egs --n=$num_train_frames_combine ark:- \ ark:$dir/train_combine.egs || touch $dir/.error & $cmd $dir/log/create_train_subset_diagnostic.log \ - nnet3-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \ + cat $train_subset_egs_all \| \ + nnet3-subset-egs --n=$num_frames_diagnostic ark:- \ ark:$dir/train_diagnostic.egs || touch $dir/.error & wait sleep 5 # wait for file system to sync. @@ -332,7 +384,7 @@ if [ $stage -le 3 ]; then for f in $dir/{combine,train_diagnostic,valid_diagnostic}.egs; do [ ! -s $f ] && echo "No examples in file $f" && exit 1; done - rm -f $dir/valid_all.egs $dir/train_subset_all.egs $dir/{train,valid}_combine.egs + rm $dir/valid_all.*.egs $dir/train_subset_all.*.egs $dir/{train,valid}_combine.egs fi if [ $stage -le 4 ]; then @@ -349,7 +401,7 @@ if [ $stage -le 4 ]; then $get_egs_program \ $ivector_opt $egs_opts --num-frames=$frames_per_eg "$feats" "$targets" \ ark:- \| \ - nnet3-copy-egs --random=true --srand=JOB ark:- $egs_list || exit 1; + nnet3-copy-egs --random=true --srand=\$[JOB+$srand] ark:- $egs_list || exit 1; fi if [ $stage -le 5 ]; then @@ -365,7 +417,7 @@ if [ $stage -le 5 ]; then if [ $archives_multiple == 1 ]; then # normal case. $cmd --max-jobs-run $nj JOB=1:$num_archives_intermediate $dir/log/shuffle.JOB.log \ - nnet3-shuffle-egs --srand=JOB "ark:cat $egs_list|" ark:$dir/egs.JOB.ark || exit 1; + nnet3-shuffle-egs --srand=\$[JOB+$srand] "ark:cat $egs_list|" ark:$dir/egs.JOB.ark || exit 1; else # we need to shuffle the 'intermediate archives' and then split into the # final archives. we create soft links to manage this splitting, because @@ -381,12 +433,14 @@ if [ $stage -le 5 ]; then done done $cmd --max-jobs-run $nj JOB=1:$num_archives_intermediate $dir/log/shuffle.JOB.log \ - nnet3-shuffle-egs --srand=JOB "ark:cat $egs_list|" ark:- \| \ + nnet3-shuffle-egs --srand=\$[JOB+$srand] "ark:cat $egs_list|" ark:- \| \ nnet3-copy-egs ark:- $output_archives || exit 1; fi fi +wait + if [ $stage -le 6 ]; then echo "$0: removing temporary archives" for x in $(seq $nj); do @@ -400,10 +454,11 @@ if [ $stage -le 6 ]; then # there are some extra soft links that we should delete. for f in $dir/egs.*.*.ark; do rm $f; done fi - echo "$0: removing temporary" + echo "$0: removing temporary stuff" # Ignore errors below because trans.* might not exist. rm -f $dir/trans.{ark,scp} $dir/targets.*.scp 2>/dev/null fi -echo "$0: Finished preparing training examples" +wait +echo "$0: Finished preparing training examples" From 3827e1c8c558832531ab857d8e93f95d8ae22c98 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 16:24:41 -0500 Subject: [PATCH 046/530] asr_diarization: Multiple outputs in nnet3 --- src/nnet3/nnet-combine.cc | 25 ++++++++++++++++----- src/nnet3/nnet-diagnostics.h | 5 +++++ src/nnet3/nnet-training.cc | 5 +++-- src/nnet3bin/nnet3-copy-egs.cc | 6 ++--- src/nnet3bin/nnet3-merge-egs.cc | 2 +- src/nnet3bin/nnet3-show-progress.cc | 34 ++++++++++++++++++++++++----- 6 files changed, 59 insertions(+), 18 deletions(-) diff --git a/src/nnet3/nnet-combine.cc b/src/nnet3/nnet-combine.cc index 45c1f74477b..d40c63bd3e7 100644 --- a/src/nnet3/nnet-combine.cc +++ b/src/nnet3/nnet-combine.cc @@ -424,15 +424,28 @@ double NnetCombiner::ComputeObjfAndDerivFromNnet( end = egs_.end(); for (; iter != end; ++iter) prob_computer_->Compute(*iter); - const SimpleObjectiveInfo *objf_info = prob_computer_->GetObjective("output"); - if (objf_info == NULL) - KALDI_ERR << "Error getting objective info (unsuitable egs?)"; - KALDI_ASSERT(objf_info->tot_weight > 0.0); + + double tot_weight = 0.0; + double tot_objf = 0.0; + + { + const unordered_map &objf_info = prob_computer_->GetAllObjectiveInfo(); + unordered_map::const_iterator objf_it = objf_info.begin(), + objf_end = objf_info.end(); + + for (; objf_it != objf_end; ++objf_it) { + tot_objf += objf_it->second.tot_objective; + tot_weight += objf_it->second.tot_weight; + } + } + + KALDI_ASSERT(tot_weight > 0.0); + const Nnet &deriv = prob_computer_->GetDeriv(); VectorizeNnet(deriv, nnet_params_deriv); // we prefer to deal with normalized objective functions. - nnet_params_deriv->Scale(1.0 / objf_info->tot_weight); - return objf_info->tot_objective / objf_info->tot_weight; + nnet_params_deriv->Scale(1.0 / tot_weight); + return tot_objf / tot_weight; } diff --git a/src/nnet3/nnet-diagnostics.h b/src/nnet3/nnet-diagnostics.h index 6ed6c4a33a7..59f0cd16f47 100644 --- a/src/nnet3/nnet-diagnostics.h +++ b/src/nnet3/nnet-diagnostics.h @@ -102,6 +102,11 @@ class NnetComputeProb { // or NULL if there is no such info. const SimpleObjectiveInfo *GetObjective(const std::string &output_name) const; + // return objective info for all outputs + const unordered_map & GetAllObjectiveInfo() const { + return objf_info_; + } + // if config.compute_deriv == true, returns a reference to the // computed derivative. Otherwise crashes. const Nnet &GetDeriv() const; diff --git a/src/nnet3/nnet-training.cc b/src/nnet3/nnet-training.cc index 9d957afe1de..bdbe244a648 100644 --- a/src/nnet3/nnet-training.cc +++ b/src/nnet3/nnet-training.cc @@ -188,11 +188,12 @@ bool NnetTrainer::PrintTotalStats() const { unordered_map::const_iterator iter = objf_info_.begin(), end = objf_info_.end(); - bool ans = false; + bool ans = true; for (; iter != end; ++iter) { const std::string &name = iter->first; const ObjectiveFunctionInfo &info = iter->second; - ans = ans || info.PrintTotalStats(name); + if (!info.PrintTotalStats(name)) + ans = false; } PrintMaxChangeStats(); return ans; diff --git a/src/nnet3bin/nnet3-copy-egs.cc b/src/nnet3bin/nnet3-copy-egs.cc index 0b82d91353a..ceb415ffe87 100644 --- a/src/nnet3bin/nnet3-copy-egs.cc +++ b/src/nnet3bin/nnet3-copy-egs.cc @@ -58,7 +58,7 @@ bool ContainsSingleExample(const NnetExample &eg, end = io.indexes.end(); // Should not have an empty input/output type. KALDI_ASSERT(!io.indexes.empty()); - if (io.name == "input" || io.name == "output") { + if (io.name == "input" || io.name.find("output") != std::string::npos) { int32 min_t = iter->t, max_t = iter->t; for (; iter != end; ++iter) { int32 this_t = iter->t; @@ -75,7 +75,7 @@ bool ContainsSingleExample(const NnetExample &eg, *min_input_t = min_t; *max_input_t = max_t; } else { - KALDI_ASSERT(io.name == "output"); + KALDI_ASSERT(io.name.find("output") != std::string::npos); done_output = true; *min_output_t = min_t; *max_output_t = max_t; @@ -127,7 +127,7 @@ void FilterExample(const NnetExample &eg, min_t = min_input_t; max_t = max_input_t; is_input_or_output = true; - } else if (name == "output") { + } else if (name.find("output") != std::string::npos) { min_t = min_output_t; max_t = max_output_t; is_input_or_output = true; diff --git a/src/nnet3bin/nnet3-merge-egs.cc b/src/nnet3bin/nnet3-merge-egs.cc index 8627671f53a..7415db8d12a 100644 --- a/src/nnet3bin/nnet3-merge-egs.cc +++ b/src/nnet3bin/nnet3-merge-egs.cc @@ -30,7 +30,7 @@ namespace nnet3 { // or crashes if it is not there. int32 NumOutputIndexes(const NnetExample &eg) { for (size_t i = 0; i < eg.io.size(); i++) - if (eg.io[i].name == "output") + if (eg.io[i].name.find("output") != std::string::npos) return eg.io[i].indexes.size(); KALDI_ERR << "No output named 'output' in the eg."; return 0; // Suppress compiler warning. diff --git a/src/nnet3bin/nnet3-show-progress.cc b/src/nnet3bin/nnet3-show-progress.cc index 10898dc0ca6..785d3d0aa88 100644 --- a/src/nnet3bin/nnet3-show-progress.cc +++ b/src/nnet3bin/nnet3-show-progress.cc @@ -107,17 +107,39 @@ int main(int argc, char *argv[]) { eg_end = examples.end(); for (; eg_iter != eg_end; ++eg_iter) prob_computer.Compute(*eg_iter); - const SimpleObjectiveInfo *objf_info = prob_computer.GetObjective("output"); - double objf_per_frame = objf_info->tot_objective / objf_info->tot_weight; + + double tot_weight = 0.0; + + { + const unordered_map &objf_info = prob_computer.GetAllObjectiveInfo(); + + unordered_map::const_iterator objf_it = objf_info.begin(), + objf_end = objf_info.end(); + + + for (; objf_it != objf_end; ++objf_it) { + double objf_per_frame = objf_it->second.tot_objective / objf_it->second.tot_weight; + + if (objf_it->first == "output") { + KALDI_LOG << "At position " << middle + << ", objf per frame is " << objf_per_frame; + } else { + KALDI_LOG << "At position " << middle + << ", objf per frame for '" << objf_it->first + << "' is " << objf_per_frame; + } + + tot_weight += objf_it->second.tot_weight; + } + } + const Nnet &nnet_gradient = prob_computer.GetDeriv(); - KALDI_LOG << "At position " << middle - << ", objf per frame is " << objf_per_frame; Vector old_dotprod(num_updatable), new_dotprod(num_updatable); ComponentDotProducts(nnet_gradient, nnet1, &old_dotprod); ComponentDotProducts(nnet_gradient, nnet2, &new_dotprod); - old_dotprod.Scale(1.0 / objf_info->tot_weight); - new_dotprod.Scale(1.0 / objf_info->tot_weight); + old_dotprod.Scale(1.0 / tot_weight); + new_dotprod.Scale(1.0 / tot_weight); diff.AddVec(1.0/ num_segments, new_dotprod); diff.AddVec(-1.0 / num_segments, old_dotprod); KALDI_VLOG(1) << "By segment " << s << ", objf change is " From e9535d8aa5f5ed373edae0128347c433c85fe44b Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 22:08:00 -0500 Subject: [PATCH 047/530] raw_python_script: Made LSTM and TDNN raw configs similar --- egs/wsj/s5/steps/nnet3/lstm/make_configs.py | 62 +++++++++++++++------ 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/lstm/make_configs.py b/egs/wsj/s5/steps/nnet3/lstm/make_configs.py index 205b6034fad..9fb9fad1d0c 100755 --- a/egs/wsj/s5/steps/nnet3/lstm/make_configs.py +++ b/egs/wsj/s5/steps/nnet3/lstm/make_configs.py @@ -56,6 +56,18 @@ def GetArgs(): parser.add_argument("--max-change-per-component-final", type=float, help="Enforces per-component max change for the final affine layer. " "if 0 it would not be enforced.", default=1.5) + parser.add_argument("--add-lda", type=str, action=nnet3_train_lib.StrToBoolAction, + help="If \"true\" an LDA matrix computed from the input features " + "(spliced according to the first set of splice-indexes) will be used as " + "the first Affine layer. This affine layer's parameters are fixed during training. " + "This variable needs to be set to \"false\" when using dense-targets.", + default=True, choices = ["false", "true"]) + parser.add_argument("--add-final-sigmoid", type=str, action=nnet3_train_lib.StrToBoolAction, + help="add a sigmoid layer as the final layer. Applicable only if skip-final-softmax is true.", + choices=['true', 'false'], default = False) + parser.add_argument("--objective-type", type=str, default="linear", + choices = ["linear", "quadratic", "xent"], + help = "the type of objective; i.e. quadratic or linear or cross-entropy per dim") # LSTM options parser.add_argument("--num-lstm-layers", type=int, @@ -217,7 +229,9 @@ def ParseLstmDelayString(lstm_delay): raise ValueError("invalid --lstm-delay argument, too-short element: " + lstm_delay) elif len(indexes) == 2 and indexes[0] * indexes[1] >= 0: - raise ValueError('Warning: ' + str(indexes) + ' is not a standard BLSTM mode. There should be a negative delay for the forward, and a postive delay for the backward.') + raise ValueError('Warning: ' + str(indexes) + + ' is not a standard BLSTM mode. ' + + 'There should be a negative delay for the forward, and a postive delay for the backward.') if len(indexes) == 2 and indexes[0] > 0: # always a negative delay followed by a postive delay indexes[0], indexes[1] = indexes[1], indexes[0] lstm_delay_array.append(indexes) @@ -227,29 +241,35 @@ def ParseLstmDelayString(lstm_delay): return lstm_delay_array -def MakeConfigs(config_dir, feat_dim, ivector_dim, num_targets, +def MakeConfigs(config_dir, feat_dim, ivector_dim, num_targets, add_lda, splice_indexes, lstm_delay, cell_dim, hidden_dim, recurrent_projection_dim, non_recurrent_projection_dim, num_lstm_layers, num_hidden_layers, norm_based_clipping, clipping_threshold, zeroing_threshold, zeroing_interval, ng_per_element_scale_options, ng_affine_options, - label_delay, include_log_softmax, xent_regularize, + label_delay, include_log_softmax, add_final_sigmoid, + objective_type, xent_regularize, self_repair_scale_nonlinearity, self_repair_scale_clipgradient, max_change_per_component, max_change_per_component_final): config_lines = {'components':[], 'component-nodes':[]} config_files={} - prev_layer_output = nodes.AddInputLayer(config_lines, feat_dim, splice_indexes[0], ivector_dim) + prev_layer_output = nodes.AddInputLayer(config_lines, feat_dim, splice_indexes[0], + ivector_dim) # Add the init config lines for estimating the preconditioning matrices init_config_lines = copy.deepcopy(config_lines) init_config_lines['components'].insert(0, '# Config file for initializing neural network prior to') init_config_lines['components'].insert(0, '# preconditioning matrix computation') - nodes.AddOutputLayer(init_config_lines, prev_layer_output) + nodes.AddOutputLayer(init_config_lines, prev_layer_output, label_delay = label_delay, objective_type = objective_type) config_files[config_dir + '/init.config'] = init_config_lines - prev_layer_output = nodes.AddLdaLayer(config_lines, "L0", prev_layer_output, config_dir + '/lda.mat') + # add_lda needs to be set "false" when using dense targets, + # or if the task is not a simple classification task + # (e.g. regression, multi-task) + if add_lda: + prev_layer_output = nodes.AddLdaLayer(config_lines, "L0", prev_layer_output, args.config_dir + '/lda.mat') for i in range(num_lstm_layers): if len(lstm_delay[i]) == 2: # add a bi-directional LSTM layer @@ -284,7 +304,7 @@ def MakeConfigs(config_dir, feat_dim, ivector_dim, num_targets, max_change_per_component = max_change_per_component) # make the intermediate config file for layerwise discriminative # training - nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, ng_affine_options, max_change_per_component = max_change_per_component_final, label_delay = label_delay, include_log_softmax = include_log_softmax) + nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, ng_affine_options, max_change_per_component = max_change_per_component_final, label_delay = label_delay, include_log_softmax = include_log_softmax, add_final_sigmoid = add_final_sigmoid, objective_type = objective_type) if xent_regularize != 0.0: @@ -302,7 +322,7 @@ def MakeConfigs(config_dir, feat_dim, ivector_dim, num_targets, ng_affine_options, self_repair_scale = self_repair_scale_nonlinearity, max_change_per_component = max_change_per_component) # make the intermediate config file for layerwise discriminative # training - nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, ng_affine_options, max_change_per_component = max_change_per_component_final, label_delay = label_delay, include_log_softmax = include_log_softmax) + nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, ng_affine_options, max_change_per_component = max_change_per_component_final, label_delay = label_delay, include_log_softmax = include_log_softmax, add_final_sigmoid = add_final_sigmoid, objective_type = objective_type) if xent_regularize != 0.0: nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, @@ -331,24 +351,30 @@ def ProcessSpliceIndexes(config_dir, splice_indexes, label_delay, num_lstm_layer if (num_hidden_layers < num_lstm_layers): raise Exception("num-lstm-layers : number of lstm layers has to be greater than number of layers, decided based on splice-indexes") - # write the files used by other scripts like steps/nnet3/get_egs.sh - f = open(config_dir + "/vars", "w") - print('model_left_context=' + str(left_context), file=f) - print('model_right_context=' + str(right_context), file=f) - print('num_hidden_layers=' + str(num_hidden_layers), file=f) - # print('initial_right_context=' + str(splice_array[0][-1]), file=f) - f.close() - return [left_context, right_context, num_hidden_layers, splice_indexes] def Main(): args = GetArgs() - [left_context, right_context, num_hidden_layers, splice_indexes] = ProcessSpliceIndexes(args.config_dir, args.splice_indexes, args.label_delay, args.num_lstm_layers) + [left_context, right_context, + num_hidden_layers, splice_indexes] = ProcessSpliceIndexes(args.config_dir, args.splice_indexes, + args.label_delay, args.num_lstm_layers) + + # write the files used by other scripts like steps/nnet3/get_egs.sh + f = open(args.config_dir + "/vars", "w") + print('model_left_context=' + str(left_context), file=f) + print('model_right_context=' + str(right_context), file=f) + print('num_hidden_layers=' + str(num_hidden_layers), file=f) + print('num_targets=' + str(args.num_targets), file=f) + print('objective_type=' + str(args.objective_type), file=f) + print('add_lda=' + ("true" if args.add_lda else "false"), file=f) + print('include_log_softmax=' + ("true" if args.include_log_softmax else "false"), file=f) + f.close() MakeConfigs(config_dir = args.config_dir, feat_dim = args.feat_dim, ivector_dim = args.ivector_dim, num_targets = args.num_targets, + add_lda = args.add_lda, splice_indexes = splice_indexes, lstm_delay = args.lstm_delay, cell_dim = args.cell_dim, hidden_dim = args.hidden_dim, @@ -364,6 +390,8 @@ def Main(): ng_affine_options = args.ng_affine_options, label_delay = args.label_delay, include_log_softmax = args.include_log_softmax, + add_final_sigmoid = args.add_final_sigmoid, + objective_type = args.objective_type, xent_regularize = args.xent_regularize, self_repair_scale_nonlinearity = args.self_repair_scale_nonlinearity, self_repair_scale_clipgradient = args.self_repair_scale_clipgradient, From 7806dd6bd4a8986d9c876f010a63b61bc9a71251 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 15:02:30 -0500 Subject: [PATCH 048/530] asr_diarization: Create prepare_unsad_data.sh --- .../local/segmentation/prepare_unsad_data.sh | 537 ++++++++++++++++++ 1 file changed, 537 insertions(+) create mode 100755 egs/aspire/s5/local/segmentation/prepare_unsad_data.sh diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh new file mode 100755 index 00000000000..12097811ec9 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh @@ -0,0 +1,537 @@ +#!/bin/bash + +# This script prepares speech labels and deriv weights for +# training unsad network for speech activity detection and music detection. + +set -u +set -o pipefail +set -e + +. path.sh + +stage=-2 +cmd=queue.pl +reco_nj=40 +nj=100 + +# Options to be passed to get_sad_map.py +map_noise_to_sil=true # Map noise phones to silence label (0) +map_unk_to_speech=true # Map unk phones to speech label (1) +sad_map= # Initial mapping from phones to speech/non-speech labels. + # Overrides the default mapping using phones/silence.txt + # and phones/nonsilence.txt + +# Options for feature extraction +feat_type=mfcc # mfcc or plp +add_pitch=false # Add pitch features + +config_dir=conf +feat_config= +pitch_config= + +mfccdir=mfcc +plpdir=plp + +speed_perturb=true + +sat_model_dir= # Model directory used for getting alignments +lang_test= # Language directory used to build graph. + # If its not provided, $lang will be used instead. + +. utils/parse_options.sh + +if [ $# -ne 5 ]; then + echo "This script takes a data directory and creates a new data directory " + echo "and speech activity labels" + echo "for the purpose of training a Universal Speech Activity Detector." + echo "Usage: $0 [options] " + echo " e.g.: $0 data/train_100k data/lang exp/tri4a exp/vad_data_prep" + echo "" + echo "Main options (for others, see top of script file)" + echo " --config # config file containing options" + echo " --cmd (run.pl|/queue.pl ) # how to run jobs." + echo " --reco-nj <#njobs|4> # Split a whole data directory into these many pieces" + echo " --nj <#njobs|4> # Split a segmented data directory into these many pieces" + exit 1 +fi + +data_dir=$1 +lang=$2 +model_dir=$3 +dir=$4 + +if [ $feat_type != "plp" ] && [ $feat_type != "mfcc" ]; then + echo "$0: --feat-type must be plp or mfcc. Must match the model_dir used." + exit 1 +fi + +[ -z "$feat_config" ] && feat_config=$config_dir/$feat_type.conf +[ -z "$pitch_config" ] && pitch_config=$config_dir/pitch.conf + +extra_files= + +if $add_pitch; then + extra_files="$extra_files $pitch_config" +fi + +for f in $feat_config $extra_files; do + if [ ! -f $f ]; then + echo "$f could not be found" + exit 1 + fi +done + +mkdir -p $dir + +function make_mfcc { + local nj=$nj + local mfcc_config=$feat_config + local add_pitch=$add_pitch + local cmd=$cmd + local pitch_config=$pitch_config + + while [ $# -gt 0 ]; do + if [ $1 == "--nj" ]; then + nj=$2 + shift; shift; + elif [ $1 == "--mfcc-config" ]; then + mfcc_config=$2 + shift; shift; + elif [ $1 == "--add-pitch" ]; then + add_pitch=$2 + shift; shift; + elif [ $1 == "--cmd" ]; then + cmd=$2 + shift; shift; + elif [ $1 == "--pitch-config" ]; then + pitch_config=$2 + shift; shift; + else + break + fi + done + + if [ $# -ne 3 ]; then + echo "Usage: make_mfcc " + exit 1 + fi + + if $add_pitch; then + steps/make_mfcc_pitch.sh --cmd "$cmd" --nj $nj \ + --mfcc-config $mfcc_config --pitch-config $pitch_config $1 $2 $3 || exit 1 + else + steps/make_mfcc.sh --cmd "$cmd" --nj $nj \ + --mfcc-config $mfcc_config $1 $2 $3 || exit 1 + fi + +} + +function make_plp { + local nj=$nj + local mfcc_config=$feat_config + local add_pitch=$add_pitch + local cmd=$cmd + local pitch_config=$pitch_config + + while [ $# -gt 0 ]; do + if [ $1 == "--nj" ]; then + nj=$2 + shift; shift; + elif [ $1 == "--plp-config" ]; then + plp_config=$2 + shift; shift; + elif [ $1 == "--add-pitch" ]; then + add_pitch=$2 + shift; shift; + elif [ $1 == "--cmd" ]; then + cmd=$2 + shift; shift; + elif [ $1 == "--pitch-config" ]; then + pitch_config=$2 + shift; shift; + else + break + fi + done + + if [ $# -ne 3 ]; then + echo "Usage: make_plp " + exit 1 + fi + + if $add_pitch; then + steps/make_plp_pitch.sh --cmd "$cmd" --nj $nj \ + --plp-config $plp_config --pitch-config $pitch_config $1 $2 $3 || exit 1 + else + steps/make_plp.sh --cmd "$cmd" --nj $nj \ + --plp-config $plp_config $1 $2 $3 || exit 1 + fi +} + +frame_shift_info=`cat $feat_config | steps/segmentation/get_frame_shift_info_from_config.pl` || exit 1 + +frame_shift=`echo $frame_shift_info | awk '{print $1}'` +frame_overlap=`echo $frame_shift_info | awk '{print $2}'` + +data_id=$(basename $data_dir) +whole_data_dir=${data_dir}_whole +whole_data_id=${data_id}_whole + +if [ $stage -le -2 ]; then + steps/segmentation/get_sad_map.py \ + --init-sad-map="$sad_map" \ + --map-noise-to-sil=$map_noise_to_sil \ + --map-unk-to-speech=$map_unk_to_speech \ + $lang | utils/sym2int.pl -f 1 $lang/phones.txt > $dir/sad_map + + utils/data/convert_data_dir_to_whole.sh ${data_dir} ${whole_data_dir} + utils/data/get_utt2dur.sh ${whole_data_dir} +fi + +if $speed_perturb; then + plpdir=${plpdir}_sp + mfccdir=${mfccdir}_sp + + + if [ $stage -le -1 ]; then + utils/data/perturb_data_dir_speed_3way.sh ${whole_data_dir} ${whole_data_dir}_sp + utils/data/perturb_data_dir_speed_3way.sh ${data_dir} ${data_dir}_sp + + if [ $feat_type == "mfcc" ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + make_mfcc --cmd "$cmd --max-jobs-run 40" --nj $nj \ + --mfcc-config $feat_config \ + --add-pitch $add_pitch --pitch-config $pitch_config \ + ${whole_data_dir}_sp exp/make_mfcc $mfccdir || exit 1 + steps/compute_cmvn_stats.sh \ + ${whole_data_dir}_sp exp/make_mfcc $mfccdir || exit 1 + elif [ $feat_type == "plp" ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $plpdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$plpdir/storage $plpdir/storage + fi + + make_plp --cmd "$cmd --max-jobs-run 40" --nj $nj \ + --plp-config $feat_config \ + --add-pitch $add_pitch --pitch-config $pitch_config \ + ${whole_data_dir}_sp exp/make_plp $plpdir || exit 1 + steps/compute_cmvn_stats.sh \ + ${whole_data_dir}_sp exp/make_plp $plpdir || exit 1 + else + echo "$0: Unknown feat-type $feat_type. Must be mfcc or plp." + exit 1 + fi + + utils/fix_data_dir.sh ${whole_data_dir}_sp + fi + + data_dir=${data_dir}_sp + whole_data_dir=${whole_data_dir}_sp + data_id=${data_id}_sp +fi + + +############################################################################### +# Compute length of recording +############################################################################### + +utils/data/get_reco2utt.sh $data_dir + +if [ $stage -le 0 ]; then + steps/segmentation/get_utt2num_frames.sh \ + --frame-shift $frame_shift --frame-overlap $frame_overlap \ + --cmd "$cmd" --nj $reco_nj $whole_data_dir + + awk '{print $1" "$2}' ${data_dir}/segments | utils/apply_map.pl -f 2 ${whole_data_dir}/utt2num_frames > $data_dir/utt2max_frames + utils/data/subsegment_feats.sh ${whole_data_dir}/feats.scp \ + $frame_shift $frame_overlap ${data_dir}/segments | \ + utils/data/fix_subsegmented_feats.pl $data_dir/utt2max_frames \ + > ${data_dir}/feats.scp + + if [ $feat_type == mfcc ]; then + steps/compute_cmvn_stats.sh ${data_dir} exp/make_mfcc/${data_id} $mfccdir + else + steps/compute_cmvn_stats.sh ${data_dir} exp/make_plp/${data_id} $plpdir + fi + + utils/fix_data_dir.sh $data_dir +fi + +if [ -z "$sat_model_dir" ]; then + ali_dir=${model_dir}_ali_${data_id} + if [ $stage -le 2 ]; then + steps/align_si.sh --nj $nj --cmd "$cmd" \ + ${data_dir} ${lang} ${model_dir} ${model_dir}_ali_${data_id} || exit 1 + fi +else + ali_dir=${sat_model_dir}_ali_${data_id} + #obtain the alignment of the perturbed data + if [ $stage -le 2 ]; then + steps/align_fmllr.sh --nj $nj --cmd "$cmd" \ + ${data_dir} ${lang} ${sat_model_dir} ${sat_model_dir}_ali_${data_id} || exit 1 + fi +fi + + +# All the data from this point is speed perturbed. + +data_id=$(basename $data_dir) +utils/split_data.sh $data_dir $nj + +############################################################################### +# Convert alignment for the provided segments into +# initial SAD labels at utterance-level in segmentation format +############################################################################### + +vad_dir=$dir/`basename ${ali_dir}`_vad_${data_id} +if [ $stage -le 3 ]; then + steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$cmd" \ + $data_dir $ali_dir \ + $dir/sad_map $vad_dir +fi + +[ ! -s $vad_dir/sad_seg.scp ] && echo "$0: $vad_dir/vad.scp is empty" && exit 1 + +if [ $stage -le 4 ]; then + utils/copy_data_dir.sh $data_dir $dir/${data_id}_manual_segments + + awk '{print $1" "$2}' $dir/${data_id}_manual_segments/segments | sort -k1,1 > $dir/${data_id}_manual_segments/utt2spk + utils/utt2spk_to_spk2utt.pl $dir/${data_id}_manual_segments/utt2spk | sort -k1,1 > $dir/${data_id}_manual_segments/spk2utt + + if [ $feat_type == mfcc ]; then + steps/compute_cmvn_stats.sh $dir/${data_id}_manual_segments exp/make_mfcc/${data_id}_manual_segments $mfccdir + else + steps/compute_cmvn_stats.sh $dir/${data_id}_manual_segments exp/make_plp/${data_id}_manual_segments $plpdir + fi + + utils/fix_data_dir.sh $dir/${data_id}_manual_segments || true # Might fail because utt2spk will be not sorted on both utts and spks +fi + + +#utils/split_data.sh --per-reco $data_dir $reco_nj +#segmentation-combine-segments ark,s:$vad_dir/sad_seg.scp +# "ark,s:segmentation-init-from-segments --shift-to-zero=false --frame-shift=$ali_frame_shift --frame-overlap=$ali_frame_overlap ${data}/split${reco_nj}reco/JOB/segments ark:- |" \ +# "ark:cat ${data}/split${reco_nj}reco/JOB/segments | cut -d ' ' -f 1,2 | utils/utt2spk_to_spk2utt.pl | sort -k1,1 |" ark:- + +############################################################################### + + +# Create extended data directory that consists of the provided +# segments along with the segments outside it. +# This is basically dividing the whole recording into pieces +# consisting of pieces corresponding to the provided segments +# and outside the provided segments. + +############################################################################### +# Create segments outside of the manual segments +############################################################################### + +outside_data_dir=$dir/${data_id}_outside +if [ $stage -le 5 ]; then + rm -rf $outside_data_dir + mkdir -p $outside_data_dir/split${reco_nj}reco + + for f in wav.scp reco2file_and_channel stm glm; do + [ -f ${data_dir}/$f ] && cp ${data_dir}/$f $outside_data_dir + done + + steps/segmentation/split_data_on_reco.sh $data_dir $whole_data_dir $reco_nj + + for n in `seq $reco_nj`; do + dsn=$whole_data_dir/split${reco_nj}reco/$n + awk '{print $2}' $dsn/segments | \ + utils/filter_scp.pl /dev/stdin $whole_data_dir/utt2num_frames > \ + $dsn/utt2num_frames + mkdir -p $outside_data_dir/split${reco_nj}reco/$n + done + + $cmd JOB=1:$reco_nj $outside_data_dir/log/get_empty_segments.JOB.log \ + segmentation-init-from-segments --frame-shift=$frame_shift \ + --frame-overlap=$frame_overlap --shift-to-zero=false \ + ${data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- \ + "ark,t:cut -d ' ' -f 1,2 ${data_dir}/split${reco_nj}reco/JOB/segments | utils/utt2spk_to_spk2utt.pl |" ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=0 \ + "ark:segmentation-init-from-lengths --label=1 ark,t:${whole_data_dir}/split${reco_nj}reco/JOB/utt2num_frames ark:- |" \ + ark:- ark:- \| \ + segmentation-post-process --remove-labels=0 --max-segment-length=1000 \ + --post-process-label=1 --overlap-length=50 \ + ark:- ark:- \| segmentation-to-segments --single-speaker=true \ + --frame-shift=$frame_shift --frame-overlap=$frame_overlap \ + ark:- ark,t:$outside_data_dir/split${reco_nj}reco/JOB/utt2spk \ + $outside_data_dir/split${reco_nj}reco/JOB/segments || exit 1 + + for n in `seq $reco_nj`; do + cat $outside_data_dir/split${reco_nj}reco/$n/utt2spk + done | sort -k1,1 > $outside_data_dir/utt2spk + + for n in `seq $reco_nj`; do + cat $outside_data_dir/split${reco_nj}reco/$n/segments + done | sort -k1,1 > $outside_data_dir/segments + + utils/fix_data_dir.sh $outside_data_dir + +fi + + +if [ $stage -le 6 ]; then + utils/data/get_reco2utt.sh $outside_data_dir + awk '{print $1" "$2}' $outside_data_dir/segments | utils/apply_map.pl -f 2 $whole_data_dir/utt2num_frames > $outside_data_dir/utt2max_frames + + utils/data/subsegment_feats.sh ${whole_data_dir}/feats.scp \ + $frame_shift $frame_overlap ${outside_data_dir}/segments | \ + utils/data/fix_subsegmented_feats.pl $outside_data_dir/utt2max_framres \ + > ${outside_data_dir}/feats.scp + +fi + +extended_data_dir=$dir/${data_id}_extended +if [ $stage -le 7 ]; then + cp $dir/${data_id}_manual_segments/cmvn.scp ${outside_data_dir} || exit 1 + utils/fix_data_dir.sh $outside_data_dir + + utils/combine_data.sh $extended_data_dir $data_dir $outside_data_dir + + steps/segmentation/split_data_on_reco.sh $data_dir $extended_data_dir $reco_nj +fi + +############################################################################### +# Create graph for decoding +############################################################################### + +# TODO: By default, we use word LM. If required, we can think +# consider phone LM. +graph_dir=$model_dir/graph +if [ $stage -le 8 ]; then + if [ ! -d $graph_dir ]; then + utils/mkgraph.sh ${lang_test} $model_dir $graph_dir || exit 1 + fi +fi + +############################################################################### +# Decode extended data directory +############################################################################### + + +# Decode without lattice (get only best path) +if [ $stage -le 8 ]; then + steps/decode_nolats.sh --cmd "$cmd --mem 2G" --nj $nj \ + --max-active 1000 --beam 10.0 --write-words false \ + --write-alignments true \ + $graph_dir ${extended_data_dir} \ + ${model_dir}/decode_${data_id}_extended || exit 1 + cp ${model_dir}/final.mdl ${model_dir}/decode_${data_id}_extended +fi + +model_id=`basename $model_dir` + +# Get VAD based on the decoded best path +decode_vad_dir=$dir/${model_id}_decode_vad_${data_id} +if [ $stage -le 9 ]; then + steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$cmd" \ + $extended_data_dir ${model_dir}/decode_${data_id}_extended \ + $dir/sad_map $decode_vad_dir +fi + +[ ! -s $decode_vad_dir/sad_seg.scp ] && echo "$0: $decode_vad_dir/vad.scp is empty" && exit 1 + +vad_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $vad_dir ${PWD}` + +if [ $stage -le 10 ]; then + segmentation-init-from-segments --frame-shift=$frame_shift \ + --frame-overlap=$frame_overlap --segment-label=0 \ + $outside_data_dir/segments \ + ark,scp:$vad_dir/outside_sad_seg.ark,$vad_dir/outside_sad_seg.scp +fi + +reco_vad_dir=$dir/${model_id}_reco_vad_${data_id} +mkdir -p $reco_vad_dir +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $reco_vad_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$reco_vad_dir/storage $reco_vad_dir/storage +fi + +reco_vad_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $reco_vad_dir ${PWD}` + +echo $reco_nj > $reco_vad_dir/num_jobs + +if [ $stage -le 11 ]; then + $cmd JOB=1:$reco_nj $reco_vad_dir/log/intersect_vad.JOB.log \ + segmentation-intersect-segments --mismatch-label=10 \ + "scp:cat $vad_dir/sad_seg.scp $vad_dir/outside_sad_seg.scp | sort -k1,1 | utils/filter_scp.pl $extended_data_dir/split${reco_nj}reco/JOB/utt2spk |" \ + "scp:utils/filter_scp.pl $extended_data_dir/split${reco_nj}reco/JOB/utt2spk $decode_vad_dir/sad_seg.scp |" \ + ark:- \| segmentation-post-process --remove-labels=10 \ + --merge-adjacent-segments --max-intersegment-length=10 ark:- ark:- \| \ + segmentation-combine-segments ark:- "ark:segmentation-init-from-segments --shift-to-zero=false $extended_data_dir/split${reco_nj}reco/JOB/segments ark:- |" \ + ark,t:$extended_data_dir/split${reco_nj}reco/JOB/reco2utt \ + ark,scp:$reco_vad_dir/sad_seg.JOB.ark,$reco_vad_dir/sad_seg.JOB.scp + for n in `seq $reco_nj`; do + cat $reco_vad_dir/sad_seg.$n.scp + done > $reco_vad_dir/sad_seg.scp +fi + +set +e +for n in `seq $reco_nj`; do + utils/create_data_link.pl $reco_vad_dir/deriv_weights.$n.ark + utils/create_data_link.pl $reco_vad_dir/deriv_weights_for_uncorrupted.$n.ark + utils/create_data_link.pl $reco_vad_dir/speech_feat.$n.ark +done +set -e + +if [ $stage -le 12 ]; then + $cmd JOB=1:$reco_nj $reco_vad_dir/log/get_deriv_weights.JOB.log \ + segmentation-post-process --merge-labels=0:1:2:3 --merge-dst-label=1 \ + scp:$reco_vad_dir/sad_seg.JOB.scp ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$reco_vad_dir/deriv_weights.JOB.ark,$reco_vad_dir/deriv_weights.JOB.scp + + for n in `seq $reco_nj`; do + cat $reco_vad_dir/deriv_weights.$n.scp + done > $reco_vad_dir/deriv_weights.scp +fi + +if [ $stage -le 13 ]; then + $cmd JOB=1:$reco_nj $reco_vad_dir/log/get_deriv_weights_for_uncorrupted.JOB.log \ + segmentation-post-process --remove-labels=1:2:3 scp:$reco_vad_dir/sad_seg.JOB.scp \ + ark:- \| segmentation-post-process --merge-labels=0 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$reco_vad_dir/deriv_weights_for_uncorrupted.JOB.ark,$reco_vad_dir/deriv_weights_for_uncorrupted.JOB.scp + for n in `seq $reco_nj`; do + cat $reco_vad_dir/deriv_weights_for_uncorrupted.$n.scp + done > $reco_vad_dir/deriv_weights_for_uncorrupted.scp +fi + +if [ $stage -le 14 ]; then + $cmd JOB=1:$reco_nj $reco_vad_dir/log/get_speech_labels.JOB.log \ + segmentation-post-process --keep-label=1 scp:$reco_vad_dir/sad_seg.JOB.scp ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames \ + ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| vector-to-feat ark:- ark:- \| copy-feats --compress \ + ark:- ark,scp:$reco_vad_dir/speech_feat.JOB.ark,$reco_vad_dir/speech_feat.JOB.scp + for n in `seq $reco_nj`; do + cat $reco_vad_dir/speech_feat.$n.scp + done > $reco_vad_dir/speech_feat.scp +fi + +if [ $stage -le 15 ]; then + $cmd JOB=1:$reco_nj $reco_vad_dir/log/convert_manual_segments_to_deriv_weights.JOB.log \ + segmentation-init-from-segments --shift-to-zero=false \ + $data_dir/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- \ + ark:$data_dir/split${reco_nj}reco/JOB/reco2utt ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames \ + ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$reco_vad_dir/deriv_weights_manual_seg.JOB.ark,$reco_vad_dir/deriv_weights_manual_seg.JOB.scp + + for n in `seq $reco_nj`; do + cat $reco_vad_dir/deriv_weights_manual_seg.$n.scp + done > $reco_vad_dir/deriv_weights_manual_seg.scp +fi + +echo "$0: Finished creating corpus for training Universal SAD with data in $whole_data_dir and labels in $reco_vad_dir" From b281cea71e83f00d581830dd75b14ced64cc0cae Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 01:18:00 -0500 Subject: [PATCH 049/530] asr_diarization: Temporary changes to mfcc_hires_bp.conf and path.sh in aspire --- egs/aspire/s5/conf/mfcc_hires_bp.conf | 13 +++++++++++++ egs/aspire/s5/path.sh | 4 ++++ 2 files changed, 17 insertions(+) create mode 100644 egs/aspire/s5/conf/mfcc_hires_bp.conf diff --git a/egs/aspire/s5/conf/mfcc_hires_bp.conf b/egs/aspire/s5/conf/mfcc_hires_bp.conf new file mode 100644 index 00000000000..64292e8b489 --- /dev/null +++ b/egs/aspire/s5/conf/mfcc_hires_bp.conf @@ -0,0 +1,13 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=8000 # Switchboard is sampled at 8kHz +--num-mel-bins=28 +--num-ceps=28 +--cepstral-lifter=0 +--low-freq=330 # low cutoff frequency for mel bins +--high-freq=-1000 # high cutoff frequently, relative to Nyquist of 4000 (=3000) + + diff --git a/egs/aspire/s5/path.sh b/egs/aspire/s5/path.sh index 1a6fb5f891b..5c0d3a92f19 100755 --- a/egs/aspire/s5/path.sh +++ b/egs/aspire/s5/path.sh @@ -2,4 +2,8 @@ export KALDI_ROOT=`pwd`/../../.. export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh +export PATH=/home/vmanoha1/kaldi-raw-signal/src/segmenterbin:$PATH +export PATH=$KALDI_ROOT/tools/sph2pipe_v2.5:$PATH +export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH +export PYTHONPATH=steps:${PYTHONPATH} export LC_ALL=C From 30bb9645ccd14f3c10b0b120eb3e1579b046a7d7 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 01:17:37 -0500 Subject: [PATCH 050/530] asr_diarization: Modified reverberation script by moving some functions to library and adding extra options --- .../steps/data/data_dir_manipulation_lib.py | 420 ++++++++++++- egs/wsj/s5/steps/data/reverberate_data_dir.py | 553 ++++-------------- src/featbin/wav-reverberate.cc | 110 +++- 3 files changed, 632 insertions(+), 451 deletions(-) diff --git a/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py b/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py index 1f7253d4891..7f1a5f74fe2 100644 --- a/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py +++ b/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py @@ -1,4 +1,10 @@ -import subprocess +#!/usr/bin/env python +# Copyright 2016 Tom Ko +# 2016 Vimal Manohar +# Apache 2.0 + +from __future__ import print_function +import subprocess, random, argparse, os, shlex, warnings def RunKaldiCommand(command, wait = True): """ Runs commands frequently seen in Kaldi scripts. These are usually a @@ -16,3 +22,415 @@ def RunKaldiCommand(command, wait = True): else: return p +class list_cyclic_iterator: + def __init__(self, list): + self.list_index = 0 + self.list = list + random.shuffle(self.list) + + def next(self): + item = self.list[self.list_index] + self.list_index = (self.list_index + 1) % len(self.list) + return item + +# This functions picks an item from the collection according to the associated probability distribution. +# The probability estimate of each item in the collection is stored in the "probability" field of +# the particular item. x : a collection (list or dictionary) where the values contain a field called probability +def PickItemWithProbability(x): + if isinstance(x, dict): + plist = list(set(x.values())) + else: + plist = x + total_p = sum(item.probability for item in plist) + p = random.uniform(0, total_p) + accumulate_p = 0 + for item in plist: + if accumulate_p + item.probability >= p: + return item + accumulate_p += item.probability + assert False, "Shouldn't get here as the accumulated probability should always equal to 1" + +# This function smooths the probability distribution in the list +def SmoothProbabilityDistribution(list, smoothing_weight=0.0, target_sum=1.0): + if len(list) > 0: + num_unspecified = 0 + accumulated_prob = 0 + for item in list: + if item.probability is None: + num_unspecified += 1 + else: + accumulated_prob += item.probability + + # Compute the probability for the items without specifying their probability + uniform_probability = 0 + if num_unspecified > 0 and accumulated_prob < 1: + uniform_probability = (1 - accumulated_prob) / float(num_unspecified) + elif num_unspecified > 0 and accumulate_prob >= 1: + warnings.warn("The sum of probabilities specified by user is larger than or equal to 1. " + "The items without probabilities specified will be given zero to their probabilities.") + + for item in list: + if item.probability is None: + item.probability = uniform_probability + else: + # smooth the probability + item.probability = (1 - smoothing_weight) * item.probability + smoothing_weight * uniform_probability + + # Normalize the probability + sum_p = sum(item.probability for item in list) + for item in list: + item.probability = item.probability / sum_p * target_sum + + return list + +# This function parses a file and pack the data into a dictionary +# It is useful for parsing file like wav.scp, utt2spk, text...etc +def ParseFileToDict(file, assert2fields = False, value_processor = None): + if value_processor is None: + value_processor = lambda x: x[0] + + dict = {} + for line in open(file, 'r'): + parts = line.split() + if assert2fields: + assert(len(parts) == 2) + + dict[parts[0]] = value_processor(parts[1:]) + return dict + +# This function creates a file and write the content of a dictionary into it +def WriteDictToFile(dict, file_name): + file = open(file_name, 'w') + keys = dict.keys() + keys.sort() + for key in keys: + value = dict[key] + if type(value) in [list, tuple] : + if type(value) is tuple: + value = list(value) + value.sort() + value = ' '.join([ str(x) for x in value ]) + file.write('{0} {1}\n'.format(key, value)) + file.close() + + +# This function creates the utt2uniq file from the utterance id in utt2spk file +def CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, include_original, prefix): + corrupted_utt2uniq = {} + # Parse the utt2spk to get the utterance id + utt2spk = ParseFileToDict(input_dir + "/utt2spk", value_processor = lambda x: " ".join(x)) + keys = utt2spk.keys() + keys.sort() + if include_original: + start_index = 0 + else: + start_index = 1 + + for i in range(start_index, num_replicas+1): + for utt_id in keys: + new_utt_id = GetNewId(utt_id, prefix, i) + corrupted_utt2uniq[new_utt_id] = utt_id + + WriteDictToFile(corrupted_utt2uniq, output_dir + "/utt2uniq") + +# This function generates a new id from the input id +# This is needed when we have to create multiple copies of the original data +# E.g. GetNewId("swb0035", prefix="rvb", copy=1) returns a string "rvb1_swb0035" +def GetNewId(id, prefix=None, copy=0): + if prefix is not None: + new_id = prefix + str(copy) + "_" + id + else: + new_id = id + + return new_id + +# This function replicate the entries in files like segments, utt2spk, text +def AddPrefixToFields(input_file, output_file, num_replicas, include_original, prefix, field = [0]): + list = map(lambda x: x.strip(), open(input_file)) + f = open(output_file, "w") + if include_original: + start_index = 0 + else: + start_index = 1 + + for i in range(start_index, num_replicas+1): + for line in list: + if len(line) > 0 and line[0] != ';': + split1 = line.split() + for j in field: + split1[j] = GetNewId(split1[j], prefix, i) + print(" ".join(split1), file=f) + else: + print(line, file=f) + f.close() + +def CopyDataDirFiles(input_dir, output_dir, num_replicas, include_original, prefix): + if not os.path.isfile(output_dir + "/wav.scp"): + raise Exception("CopyDataDirFiles function expects output_dir to contain wav.scp already") + + AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replicas, include_original, prefix, field = [0,1]) + RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt" + .format(output_dir = output_dir)) + + if os.path.isfile(input_dir + "/utt2uniq"): + AddPrefixToFields(input_dir + "/utt2uniq", output_dir + "/utt2uniq", num_replicas, include_original, prefix, field =[0]) + else: + # Create the utt2uniq file + CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, include_original, prefix) + + if os.path.isfile(input_dir + "/text"): + AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replicas, prefix, include_original, field =[0]) + if os.path.isfile(input_dir + "/segments"): + AddPrefixToFields(input_dir + "/segments", output_dir + "/segments", num_replicas, prefix, include_original, field = [0,1]) + if os.path.isfile(input_dir + "/reco2file_and_channel"): + AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replicas, include_original, prefix, field = [0,1]) + + AddPrefixToFields(input_dir + "/reco2dur", output_dir + "/reco2dur", num_replicas, include_original, prefix, field = [0]) + + RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}" + .format(output_dir = output_dir)) + + +# This function parse the array of rir set parameter strings. +# It will assign probabilities to those rir sets which don't have a probability +# It will also check the existence of the rir list files. +def ParseSetParameterStrings(set_para_array): + set_list = [] + for set_para in set_para_array: + set = lambda: None + setattr(set, "filename", None) + setattr(set, "probability", None) + parts = set_para.split(',') + if len(parts) == 2: + set.probability = float(parts[0]) + set.filename = parts[1].strip() + else: + set.filename = parts[0].strip() + if not os.path.isfile(set.filename): + raise Exception(set.filename + " not found") + set_list.append(set) + + return SmoothProbabilityDistribution(set_list) + + +# This function creates the RIR list +# Each rir object in the list contains the following attributes: +# rir_id, room_id, receiver_position_id, source_position_id, rt60, drr, probability +# Please refer to the help messages in the parser for the meaning of these attributes +def ParseRirList(rir_set_para_array, smoothing_weight, sampling_rate = None): + rir_parser = argparse.ArgumentParser() + rir_parser.add_argument('--rir-id', type=str, required=True, help='This id is unique for each RIR and the noise may associate with a particular RIR by refering to this id') + rir_parser.add_argument('--room-id', type=str, required=True, help='This is the room that where the RIR is generated') + rir_parser.add_argument('--receiver-position-id', type=str, default=None, help='receiver position id') + rir_parser.add_argument('--source-position-id', type=str, default=None, help='source position id') + rir_parser.add_argument('--rt60', type=float, default=None, help='RT60 is the time required for reflections of a direct sound to decay 60 dB.') + rir_parser.add_argument('--drr', type=float, default=None, help='Direct-to-reverberant-ratio of the impulse response.') + rir_parser.add_argument('--cte', type=float, default=None, help='Early-to-late index of the impulse response.') + rir_parser.add_argument('--probability', type=float, default=None, help='probability of the impulse response.') + rir_parser.add_argument('rir_rspecifier', type=str, help="""rir rspecifier, it can be either a filename or a piped command. + E.g. data/impulses/Room001-00001.wav or "sox data/impulses/Room001-00001.wav -t wav - |" """) + + set_list = ParseSetParameterStrings(rir_set_para_array) + + rir_list = [] + for rir_set in set_list: + current_rir_list = map(lambda x: rir_parser.parse_args(shlex.split(x.strip())),open(rir_set.filename)) + for rir in current_rir_list: + if sampling_rate is not None: + # check if the rspecifier is a pipe or not + if len(rir.rir_rspecifier.split()) == 1: + rir.rir_rspecifier = "sox {0} -r {1} -t wav - |".format(rir.rir_rspecifier, sampling_rate) + else: + rir.rir_rspecifier = "{0} sox -t wav - -r {1} -t wav - |".format(rir.rir_rspecifier, sampling_rate) + + rir_list += SmoothProbabilityDistribution(current_rir_list, smoothing_weight, rir_set.probability) + + return rir_list + + +# This dunction checks if the inputs are approximately equal assuming they are floats. +def almost_equal(value_1, value_2, accuracy = 10**-8): + return abs(value_1 - value_2) < accuracy + +# This function converts a list of RIRs into a dictionary of RIRs indexed by the room-id. +# Its values are objects with two attributes: a local RIR list +# and the probability of the corresponding room +# Please look at the comments at ParseRirList() for the attributes that a RIR object contains +def MakeRoomDict(rir_list): + room_dict = {} + for rir in rir_list: + if rir.room_id not in room_dict: + # add new room + room_dict[rir.room_id] = lambda: None + setattr(room_dict[rir.room_id], "rir_list", []) + setattr(room_dict[rir.room_id], "probability", 0) + room_dict[rir.room_id].rir_list.append(rir) + + # the probability of the room is the sum of probabilities of its RIR + for key in room_dict.keys(): + room_dict[key].probability = sum(rir.probability for rir in room_dict[key].rir_list) + + assert almost_equal(sum(room_dict[key].probability for key in room_dict.keys()), 1.0) + + return room_dict + + +# This function creates the point-source noise list +# and the isotropic noise dictionary from the noise information file +# The isotropic noise dictionary is indexed by the room +# and its value is the corrresponding isotropic noise list +# Each noise object in the list contains the following attributes: +# noise_id, noise_type, bg_fg_type, room_linkage, probability, noise_rspecifier +# Please refer to the help messages in the parser for the meaning of these attributes +def ParseNoiseList(noise_set_para_array, smoothing_weight, sampling_rate = None): + noise_parser = argparse.ArgumentParser() + noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id') + noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"]) + noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foreground noise, for background noises, ' + 'they will be extended before addition to cover the whole speech; for foreground noise, they will be kept ' + 'to their original duration and added at a random point of the speech.', choices = ["background", "foreground"]) + noise_parser.add_argument('--room-linkage', type=str, default=None, help='required if isotropic, should not be specified if point-source.') + noise_parser.add_argument('--probability', type=float, default=None, help='probability of the noise.') + noise_parser.add_argument('noise_rspecifier', type=str, help="""noise rspecifier, it can be either a filename or a piped command. + E.g. type5_noise_cirline_ofc_ambient1.wav or "sox type5_noise_cirline_ofc_ambient1.wav -t wav - |" """) + + set_list = ParseSetParameterStrings(noise_set_para_array) + + pointsource_noise_list = [] + iso_noise_dict = {} + for noise_set in set_list: + current_noise_list = map(lambda x: noise_parser.parse_args(shlex.split(x.strip())),open(noise_set.filename)) + current_pointsource_noise_list = [] + for noise in current_noise_list: + if sampling_rate is not None: + # check if the rspecifier is a pipe or not + if len(noise.noise_rspecifier.split()) == 1: + noise.noise_rspecifier = "sox {0} -r {1} -t wav - |".format(noise.noise_rspecifier, sampling_rate) + else: + noise.noise_rspecifier = "{0} sox -t wav - -r {1} -t wav - |".format(noise.noise_rspecifier, sampling_rate) + + if noise.noise_type == "isotropic": + if noise.room_linkage is None: + raise Exception("--room-linkage must be specified if --noise-type is isotropic") + else: + if noise.room_linkage not in iso_noise_dict: + iso_noise_dict[noise.room_linkage] = [] + iso_noise_dict[noise.room_linkage].append(noise) + else: + current_pointsource_noise_list.append(noise) + + pointsource_noise_list += SmoothProbabilityDistribution(current_pointsource_noise_list, smoothing_weight, noise_set.probability) + + # ensure the point-source noise probabilities sum to 1 + pointsource_noise_list = SmoothProbabilityDistribution(pointsource_noise_list, smoothing_weight, 1.0) + if len(pointsource_noise_list) > 0: + assert almost_equal(sum(noise.probability for noise in pointsource_noise_list), 1.0) + + # ensure the isotropic noise source probabilities for a given room sum to 1 + for key in iso_noise_dict.keys(): + iso_noise_dict[key] = SmoothProbabilityDistribution(iso_noise_dict[key]) + assert almost_equal(sum(noise.probability for noise in iso_noise_dict[key]), 1.0) + + return (pointsource_noise_list, iso_noise_dict) + +def AddPointSourceNoise(room, # the room selected + pointsource_noise_list, # the point source noise list + pointsource_noise_addition_probability, # Probability of adding point-source noises + foreground_snrs, # the SNR for adding the foreground noises + background_snrs, # the SNR for adding the background noises + speech_dur, # duration of the recording + max_noises_recording, # Maximum number of point-source noises that can be added + noise_addition_descriptor # descriptor to store the information of the noise added + ): + num_noises_added = 0 + if len(pointsource_noise_list) > 0 and random.random() < pointsource_noise_addition_probability and max_noises_recording >= 1: + for k in range(random.randint(1, max_noises_recording)): + num_noises_added = num_noises_added + 1 + # pick the RIR to reverberate the point-source noise + noise = PickItemWithProbability(pointsource_noise_list) + noise_rir = PickItemWithProbability(room.rir_list) + # If it is a background noise, the noise will be extended and be added to the whole speech + # if it is a foreground noise, the noise will not extended and be added at a random time of the speech + if noise.bg_fg_type == "background": + noise_rvb_command = """wav-reverberate --impulse-response="{0}" --duration={1}""".format(noise_rir.rir_rspecifier, speech_dur) + noise_addition_descriptor['start_times'].append(0) + noise_addition_descriptor['snrs'].append(background_snrs.next()) + noise_addition_descriptor['durations'].append(speech_dur) + noise_addition_descriptor['noise_ids'].append(noise.noise_id) + else: + noise_rvb_command = """wav-reverberate --impulse-response="{0}" """.format(noise_rir.rir_rspecifier) + noise_addition_descriptor['start_times'].append(round(random.random() * speech_dur, 2)) + noise_addition_descriptor['snrs'].append(foreground_snrs.next()) + noise_addition_descriptor['durations'].append(-1) + noise_addition_descriptor['noise_ids'].append(noise.noise_id) + + # check if the rspecifier is a pipe or not + if len(noise.noise_rspecifier.split()) == 1: + noise_addition_descriptor['noise_io'].append("{1} {0} - |".format(noise.noise_rspecifier, noise_rvb_command)) + else: + noise_addition_descriptor['noise_io'].append("{0} {1} - - |".format(noise.noise_rspecifier, noise_rvb_command)) + +# This function randomly decides whether to reverberate, and sample a RIR if it does +# It also decides whether to add the appropriate noises +# This function return the string of options to the binary wav-reverberate +def GenerateReverberationOpts(room_dict, # the room dictionary, please refer to MakeRoomDict() for the format + pointsource_noise_list, # the point source noise list + iso_noise_dict, # the isotropic noise dictionary + foreground_snrs, # the SNR for adding the foreground noises + background_snrs, # the SNR for adding the background noises + speech_rvb_probability, # Probability of reverberating a speech signal + isotropic_noise_addition_probability, # Probability of adding isotropic noises + pointsource_noise_addition_probability, # Probability of adding point-source noises + speech_dur, # duration of the recording + max_noises_recording # Maximum number of point-source noises that can be added + ): + impulse_response_opts = "" + additive_noise_opts = "" + + noise_addition_descriptor = {'noise_io': [], + 'start_times': [], + 'snrs': [], + 'noise_ids': [], + 'durations': [] + } + # Randomly select the room + # Here the room probability is a sum of the probabilities of the RIRs recorded in the room. + room = PickItemWithProbability(room_dict) + # Randomly select the RIR in the room + speech_rir = PickItemWithProbability(room.rir_list) + if random.random() < speech_rvb_probability: + # pick the RIR to reverberate the speech + impulse_response_opts = """--impulse-response="{0}" """.format(speech_rir.rir_rspecifier) + + rir_iso_noise_list = [] + if speech_rir.room_id in iso_noise_dict: + rir_iso_noise_list = iso_noise_dict[speech_rir.room_id] + # Add the corresponding isotropic noise associated with the selected RIR + if len(rir_iso_noise_list) > 0 and random.random() < isotropic_noise_addition_probability: + isotropic_noise = PickItemWithProbability(rir_iso_noise_list) + # extend the isotropic noise to the length of the speech waveform + # check if the rspecifier is really a pipe + if len(isotropic_noise.noise_rspecifier.split()) == 1: + noise_addition_descriptor['noise_io'].append("wav-reverberate --duration={1} {0} - |".format(isotropic_noise.noise_rspecifier, speech_dur)) + else: + noise_addition_descriptor['noise_io'].append("{0} wav-reverberate --duration={1} - - |".format(isotropic_noise.noise_rspecifier, speech_dur)) + noise_addition_descriptor['start_times'].append(0) + noise_addition_descriptor['snrs'].append(background_snrs.next()) + noise_addition_descriptor['noise_ids'].append(isotropic_noise.noise_id) + noise_addition_descriptor['durations'].append(speech_dur) + + AddPointSourceNoise(room, # the room selected + pointsource_noise_list, # the point source noise list + pointsource_noise_addition_probability, # Probability of adding point-source noises + foreground_snrs, # the SNR for adding the foreground noises + background_snrs, # the SNR for adding the background noises + speech_dur, # duration of the recording + max_noises_recording, # Maximum number of point-source noises that can be added + noise_addition_descriptor # descriptor to store the information of the noise added + ) + + assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['start_times']) + assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['snrs']) + + return [impulse_response_opts, noise_addition_descriptor] + diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py index 0083efa4939..69bc5e08b3b 100755 --- a/egs/wsj/s5/steps/data/reverberate_data_dir.py +++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py @@ -5,7 +5,7 @@ # we're using python 3.x style print but want it to work in python 2.x, from __future__ import print_function -import argparse, shlex, glob, math, os, random, sys, warnings, copy, imp, ast +import argparse, glob, math, os, random, sys, warnings, copy, imp, ast data_lib = imp.load_source('dml', 'steps/data/data_dir_manipulation_lib.py') @@ -20,7 +20,7 @@ def GetArgs(): "--random-seed 1 data/train data/train_rvb", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("--rir-set-parameters", type=str, action='append', required = True, dest = "rir_set_para_array", + parser.add_argument("--rir-set-parameters", type=str, action='append', required = True, dest = "rir_set_para_array", help="Specifies the parameters of an RIR set. " "Supports the specification of mixture_weight and rir_list_file_name. The mixture weight is optional. " "The default mixture weight is the probability mass remaining after adding the mixture weights " @@ -71,6 +71,9 @@ def GetArgs(): "the RIRs/noises will be resampled to the rate of the source data.") parser.add_argument("--include-original-data", type=str, help="If true, the output data includes one copy of the original data", choices=['true', 'false'], default = "false") + parser.add_argument("--output-additive-noise-dir", type=str, help="Output directory corresponding to the additive noise part of the data corruption") + parser.add_argument("--output-reverb-dir", type=str, help="Output directory corresponding to the reverberated signal part of the data corruption") + parser.add_argument("input_dir", help="Input data directory") parser.add_argument("output_dir", @@ -87,12 +90,29 @@ def CheckArgs(args): if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) - ## Check arguments + ## Check arguments. + if args.prefix is None: if args.num_replicas > 1 or args.include_original_data == "true": args.prefix = "rvb" warnings.warn("--prefix is set to 'rvb' as more than one copy of data is generated") + if args.output_reverb_dir is not None: + if args.output_reverb_dir == "": + args.output_reverb_dir = None + + if args.output_reverb_dir is not None: + if not os.path.exists(args.output_reverb_dir): + os.makedirs(args.output_reverb_dir) + + if args.output_additive_noise_dir is not None: + if args.output_additive_noise_dir == "": + args.output_additive_noise_dir = None + + if args.output_additive_noise_dir is not None: + if not os.path.exists(args.output_additive_noise_dir): + os.makedirs(args.output_additive_noise_dir) + if not args.num_replicas > 0: raise Exception("--num-replications cannot be non-positive") @@ -104,7 +124,7 @@ def CheckArgs(args): if args.isotropic_noise_addition_probability < 0 or args.isotropic_noise_addition_probability > 1: raise Exception("--isotropic-noise-addition-probability must be between 0 and 1") - + if args.rir_smoothing_weight < 0 or args.rir_smoothing_weight > 1: raise Exception("--rir-smoothing-weight must be between 0 and 1") @@ -113,208 +133,20 @@ def CheckArgs(args): if args.max_noises_per_minute < 0: raise Exception("--max-noises-per-minute cannot be negative") - + if args.source_sampling_rate is not None and args.source_sampling_rate <= 0: raise Exception("--source-sampling-rate cannot be non-positive") return args -class list_cyclic_iterator: - def __init__(self, list): - self.list_index = 0 - self.list = list - random.shuffle(self.list) - - def next(self): - item = self.list[self.list_index] - self.list_index = (self.list_index + 1) % len(self.list) - return item - - -# This functions picks an item from the collection according to the associated probability distribution. -# The probability estimate of each item in the collection is stored in the "probability" field of -# the particular item. x : a collection (list or dictionary) where the values contain a field called probability -def PickItemWithProbability(x): - if isinstance(x, dict): - plist = list(set(x.values())) - else: - plist = x - total_p = sum(item.probability for item in plist) - p = random.uniform(0, total_p) - accumulate_p = 0 - for item in plist: - if accumulate_p + item.probability >= p: - return item - accumulate_p += item.probability - assert False, "Shouldn't get here as the accumulated probability should always equal to 1" - - -# This function parses a file and pack the data into a dictionary -# It is useful for parsing file like wav.scp, utt2spk, text...etc -def ParseFileToDict(file, assert2fields = False, value_processor = None): - if value_processor is None: - value_processor = lambda x: x[0] - - dict = {} - for line in open(file, 'r'): - parts = line.split() - if assert2fields: - assert(len(parts) == 2) - - dict[parts[0]] = value_processor(parts[1:]) - return dict - -# This function creates a file and write the content of a dictionary into it -def WriteDictToFile(dict, file_name): - file = open(file_name, 'w') - keys = dict.keys() - keys.sort() - for key in keys: - value = dict[key] - if type(value) in [list, tuple] : - if type(value) is tuple: - value = list(value) - value.sort() - value = ' '.join(str(value)) - file.write('{0} {1}\n'.format(key, value)) - file.close() - - -# This function creates the utt2uniq file from the utterance id in utt2spk file -def CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, include_original, prefix): - corrupted_utt2uniq = {} - # Parse the utt2spk to get the utterance id - utt2spk = ParseFileToDict(input_dir + "/utt2spk", value_processor = lambda x: " ".join(x)) - keys = utt2spk.keys() - keys.sort() - if include_original: - start_index = 0 - else: - start_index = 1 - - for i in range(start_index, num_replicas+1): - for utt_id in keys: - new_utt_id = GetNewId(utt_id, prefix, i) - corrupted_utt2uniq[new_utt_id] = utt_id - - WriteDictToFile(corrupted_utt2uniq, output_dir + "/utt2uniq") - - -def AddPointSourceNoise(noise_addition_descriptor, # descriptor to store the information of the noise added - room, # the room selected - pointsource_noise_list, # the point source noise list - pointsource_noise_addition_probability, # Probability of adding point-source noises - foreground_snrs, # the SNR for adding the foreground noises - background_snrs, # the SNR for adding the background noises - speech_dur, # duration of the recording - max_noises_recording # Maximum number of point-source noises that can be added - ): - if len(pointsource_noise_list) > 0 and random.random() < pointsource_noise_addition_probability and max_noises_recording >= 1: - for k in range(random.randint(1, max_noises_recording)): - # pick the RIR to reverberate the point-source noise - noise = PickItemWithProbability(pointsource_noise_list) - noise_rir = PickItemWithProbability(room.rir_list) - # If it is a background noise, the noise will be extended and be added to the whole speech - # if it is a foreground noise, the noise will not extended and be added at a random time of the speech - if noise.bg_fg_type == "background": - noise_rvb_command = """wav-reverberate --impulse-response="{0}" --duration={1}""".format(noise_rir.rir_rspecifier, speech_dur) - noise_addition_descriptor['start_times'].append(0) - noise_addition_descriptor['snrs'].append(background_snrs.next()) - else: - noise_rvb_command = """wav-reverberate --impulse-response="{0}" """.format(noise_rir.rir_rspecifier) - noise_addition_descriptor['start_times'].append(round(random.random() * speech_dur, 2)) - noise_addition_descriptor['snrs'].append(foreground_snrs.next()) - - # check if the rspecifier is a pipe or not - if len(noise.noise_rspecifier.split()) == 1: - noise_addition_descriptor['noise_io'].append("{1} {0} - |".format(noise.noise_rspecifier, noise_rvb_command)) - else: - noise_addition_descriptor['noise_io'].append("{0} {1} - - |".format(noise.noise_rspecifier, noise_rvb_command)) - - return noise_addition_descriptor - - -# This function randomly decides whether to reverberate, and sample a RIR if it does -# It also decides whether to add the appropriate noises -# This function return the string of options to the binary wav-reverberate -def GenerateReverberationOpts(room_dict, # the room dictionary, please refer to MakeRoomDict() for the format - pointsource_noise_list, # the point source noise list - iso_noise_dict, # the isotropic noise dictionary - foreground_snrs, # the SNR for adding the foreground noises - background_snrs, # the SNR for adding the background noises - speech_rvb_probability, # Probability of reverberating a speech signal - isotropic_noise_addition_probability, # Probability of adding isotropic noises - pointsource_noise_addition_probability, # Probability of adding point-source noises - speech_dur, # duration of the recording - max_noises_recording # Maximum number of point-source noises that can be added - ): - reverberate_opts = "" - noise_addition_descriptor = {'noise_io': [], - 'start_times': [], - 'snrs': []} - # Randomly select the room - # Here the room probability is a sum of the probabilities of the RIRs recorded in the room. - room = PickItemWithProbability(room_dict) - # Randomly select the RIR in the room - speech_rir = PickItemWithProbability(room.rir_list) - if random.random() < speech_rvb_probability: - # pick the RIR to reverberate the speech - reverberate_opts += """--impulse-response="{0}" """.format(speech_rir.rir_rspecifier) - - rir_iso_noise_list = [] - if speech_rir.room_id in iso_noise_dict: - rir_iso_noise_list = iso_noise_dict[speech_rir.room_id] - # Add the corresponding isotropic noise associated with the selected RIR - if len(rir_iso_noise_list) > 0 and random.random() < isotropic_noise_addition_probability: - isotropic_noise = PickItemWithProbability(rir_iso_noise_list) - # extend the isotropic noise to the length of the speech waveform - # check if the rspecifier is a pipe or not - if len(isotropic_noise.noise_rspecifier.split()) == 1: - noise_addition_descriptor['noise_io'].append("wav-reverberate --duration={1} {0} - |".format(isotropic_noise.noise_rspecifier, speech_dur)) - else: - noise_addition_descriptor['noise_io'].append("{0} wav-reverberate --duration={1} - - |".format(isotropic_noise.noise_rspecifier, speech_dur)) - noise_addition_descriptor['start_times'].append(0) - noise_addition_descriptor['snrs'].append(background_snrs.next()) - - noise_addition_descriptor = AddPointSourceNoise(noise_addition_descriptor, # descriptor to store the information of the noise added - room, # the room selected - pointsource_noise_list, # the point source noise list - pointsource_noise_addition_probability, # Probability of adding point-source noises - foreground_snrs, # the SNR for adding the foreground noises - background_snrs, # the SNR for adding the background noises - speech_dur, # duration of the recording - max_noises_recording # Maximum number of point-source noises that can be added - ) - - assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['start_times']) - assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['snrs']) - if len(noise_addition_descriptor['noise_io']) > 0: - reverberate_opts += "--additive-signals='{0}' ".format(','.join(noise_addition_descriptor['noise_io'])) - reverberate_opts += "--start-times='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['start_times']))) - reverberate_opts += "--snrs='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['snrs']))) - - return reverberate_opts - -# This function generates a new id from the input id -# This is needed when we have to create multiple copies of the original data -# E.g. GetNewId("swb0035", prefix="rvb", copy=1) returns a string "rvb1_swb0035" -def GetNewId(id, prefix=None, copy=0): - if prefix is not None: - new_id = prefix + str(copy) + "_" + id - else: - new_id = id - - return new_id - - # This is the main function to generate pipeline command for the corruption # The generic command of wav-reverberate will be like: -# wav-reverberate --duration=t --impulse-response=rir.wav +# wav-reverberate --duration=t --impulse-response=rir.wav # --additive-signals='noise1.wav,noise2.wav' --snrs='snr1,snr2' --start-times='s1,s2' input.wav output.wav def GenerateReverberatedWavScp(wav_scp, # a dictionary whose values are the Kaldi-IO strings of the speech recordings durations, # a dictionary whose values are the duration (in sec) of the speech recordings - output_dir, # output directory to write the corrupted wav.scp + output_dir, # output directory to write the corrupted wav.scp room_dict, # the room dictionary, please refer to MakeRoomDict() for the format pointsource_noise_list, # the point source noise list iso_noise_dict, # the isotropic noise dictionary @@ -327,13 +159,20 @@ def GenerateReverberatedWavScp(wav_scp, # a dictionary whose values are the Kal shift_output, # option whether to shift the output waveform isotropic_noise_addition_probability, # Probability of adding isotropic noises pointsource_noise_addition_probability, # Probability of adding point-source noises - max_noises_per_minute # maximum number of point-source noises that can be added to a recording according to its duration + max_noises_per_minute, # maximum number of point-source noises that can be added to a recording according to its duration + output_reverb_dir = None, + output_additive_noise_dir = None ): - foreground_snrs = list_cyclic_iterator(foreground_snr_array) - background_snrs = list_cyclic_iterator(background_snr_array) + foreground_snrs = data_lib.list_cyclic_iterator(foreground_snr_array) + background_snrs = data_lib.list_cyclic_iterator(background_snr_array) corrupted_wav_scp = {} + reverb_wav_scp = {} + additive_noise_wav_scp = {} keys = wav_scp.keys() keys.sort() + + additive_signals_info = {} + if include_original: start_index = 0 else: @@ -346,51 +185,71 @@ def GenerateReverberatedWavScp(wav_scp, # a dictionary whose values are the Kal if len(wav_original_pipe.split()) == 1: wav_original_pipe = "cat {0} |".format(wav_original_pipe) speech_dur = durations[recording_id] - max_noises_recording = math.floor(max_noises_per_minute * speech_dur / 60) - - reverberate_opts = GenerateReverberationOpts(room_dict, # the room dictionary, please refer to MakeRoomDict() for the format - pointsource_noise_list, # the point source noise list - iso_noise_dict, # the isotropic noise dictionary - foreground_snrs, # the SNR for adding the foreground noises - background_snrs, # the SNR for adding the background noises - speech_rvb_probability, # Probability of reverberating a speech signal - isotropic_noise_addition_probability, # Probability of adding isotropic noises - pointsource_noise_addition_probability, # Probability of adding point-source noises - speech_dur, # duration of the recording - max_noises_recording # Maximum number of point-source noises that can be added - ) + max_noises_recording = math.ceil(max_noises_per_minute * speech_dur / 60) + + [impulse_response_opts, noise_addition_descriptor] = data_lib.GenerateReverberationOpts(room_dict, # the room dictionary, please refer to MakeRoomDict() for the format + pointsource_noise_list, # the point source noise list + iso_noise_dict, # the isotropic noise dictionary + foreground_snrs, # the SNR for adding the foreground noises + background_snrs, # the SNR for adding the background noises + speech_rvb_probability, # Probability of reverberating a speech signal + isotropic_noise_addition_probability, # Probability of adding isotropic noises + pointsource_noise_addition_probability, # Probability of adding point-source noises + speech_dur, # duration of the recording + max_noises_recording # Maximum number of point-source noises that can be added + ) + additive_noise_opts = "" + + if len(noise_addition_descriptor['noise_io']) > 0: + additive_noise_opts += "--additive-signals='{0}' ".format(','.join(noise_addition_descriptor['noise_io'])) + additive_noise_opts += "--start-times='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['start_times']))) + additive_noise_opts += "--snrs='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['snrs']))) + + reverberate_opts = impulse_response_opts + additive_noise_opts + + new_recording_id = data_lib.GetNewId(recording_id, prefix, i) # prefix using index 0 is reserved for original data e.g. rvb0_swb0035 corresponds to the swb0035 recording in original data if reverberate_opts == "" or i == 0: - wav_corrupted_pipe = "{0}".format(wav_original_pipe) + wav_corrupted_pipe = "{0}".format(wav_original_pipe) else: wav_corrupted_pipe = "{0} wav-reverberate --shift-output={1} {2} - - |".format(wav_original_pipe, shift_output, reverberate_opts) - new_recording_id = GetNewId(recording_id, prefix, i) corrupted_wav_scp[new_recording_id] = wav_corrupted_pipe - WriteDictToFile(corrupted_wav_scp, output_dir + "/wav.scp") + if output_reverb_dir is not None: + if impulse_response_opts == "": + wav_reverb_pipe = "{0}".format(wav_original_pipe) + else: + wav_reverb_pipe = "{0} wav-reverberate --shift-output={1} --reverb-out-wxfilename=- {2} - /dev/null |".format(wav_original_pipe, shift_output, reverberate_opts) + reverb_wav_scp[new_recording_id] = wav_reverb_pipe + if output_additive_noise_dir is not None: + if additive_noise_opts != "": + wav_additive_noise_pipe = "{0} wav-reverberate --shift-output={1} --additive-noise-out-wxfilename=- {2} - /dev/null |".format(wav_original_pipe, shift_output, reverberate_opts) + additive_noise_wav_scp[new_recording_id] = wav_additive_noise_pipe -# This function replicate the entries in files like segments, utt2spk, text -def AddPrefixToFields(input_file, output_file, num_replicas, include_original, prefix, field = [0]): - list = map(lambda x: x.strip(), open(input_file)) - f = open(output_file, "w") - if include_original: - start_index = 0 - else: - start_index = 1 - - for i in range(start_index, num_replicas+1): - for line in list: - if len(line) > 0 and line[0] != ';': - split1 = line.split() - for j in field: - split1[j] = GetNewId(split1[j], prefix, i) - print(" ".join(split1), file=f) - else: - print(line, file=f) - f.close() + if additive_noise_opts != "": + additive_signals_info[new_recording_id] = [ + ':'.join(x) + for x in zip(noise_addition_descriptor['noise_ids'], + [ str(x) for x in noise_addition_descriptor['start_times'] ], + [ str(x) for x in noise_addition_descriptor['durations'] ]) + ] + + # Write for each new recording, the id, start time and durations + # of the signals. Duration is -1 for the foreground noise and needs to + # be extracted separately if required by determining the durations + # using the wav file + data_lib.WriteDictToFile(additive_signals_info, output_dir + "/additive_signals_info.txt") + + data_lib.WriteDictToFile(corrupted_wav_scp, output_dir + "/wav.scp") + + if output_reverb_dir is not None: + data_lib.WriteDictToFile(reverb_wav_scp, output_reverb_dir + "/wav.scp") + + if output_additive_noise_dir is not None: + data_lib.WriteDictToFile(additive_noise_wav_scp, output_additive_noise_dir + "/wav.scp") # This function creates multiple copies of the necessary files, e.g. utt2spk, wav.scp ... @@ -408,10 +267,12 @@ def CreateReverberatedCopy(input_dir, shift_output, # option whether to shift the output waveform isotropic_noise_addition_probability, # Probability of adding isotropic noises pointsource_noise_addition_probability, # Probability of adding point-source noises - max_noises_per_minute # maximum number of point-source noises that can be added to a recording according to its duration + max_noises_per_minute, # maximum number of point-source noises that can be added to a recording according to its duration + output_reverb_dir = None, + output_additive_noise_dir = None ): - - wav_scp = ParseFileToDict(input_dir + "/wav.scp", value_processor = lambda x: " ".join(x)) + + wav_scp = data_lib.ParseFileToDict(input_dir + "/wav.scp", value_processor = lambda x: " ".join(x)) if not os.path.isfile(input_dir + "/reco2dur"): print("Getting the duration of the recordings..."); read_entire_file="false" @@ -421,225 +282,38 @@ def CreateReverberatedCopy(input_dir, read_entire_file="true" break data_lib.RunKaldiCommand("wav-to-duration --read-entire-file={1} scp:{0}/wav.scp ark,t:{0}/reco2dur".format(input_dir, read_entire_file)) - durations = ParseFileToDict(input_dir + "/reco2dur", value_processor = lambda x: float(x[0])) + durations = data_lib.ParseFileToDict(input_dir + "/reco2dur", value_processor = lambda x: float(x[0])) foreground_snr_array = map(lambda x: float(x), foreground_snr_string.split(':')) background_snr_array = map(lambda x: float(x), background_snr_string.split(':')) GenerateReverberatedWavScp(wav_scp, durations, output_dir, room_dict, pointsource_noise_list, iso_noise_dict, - foreground_snr_array, background_snr_array, num_replicas, include_original, prefix, - speech_rvb_probability, shift_output, isotropic_noise_addition_probability, - pointsource_noise_addition_probability, max_noises_per_minute) + foreground_snr_array, background_snr_array, num_replicas, include_original, prefix, + speech_rvb_probability, shift_output, isotropic_noise_addition_probability, + pointsource_noise_addition_probability, max_noises_per_minute, + output_reverb_dir = output_reverb_dir, + output_additive_noise_dir = output_additive_noise_dir) - AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replicas, include_original, prefix, field = [0,1]) - data_lib.RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt" - .format(output_dir = output_dir)) + data_lib.CopyDataDirFiles(input_dir, output_dir, num_replicas, include_original, prefix) - if os.path.isfile(input_dir + "/utt2uniq"): - AddPrefixToFields(input_dir + "/utt2uniq", output_dir + "/utt2uniq", num_replicas, include_original, prefix, field =[0]) - else: - # Create the utt2uniq file - CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, include_original, prefix) - - if os.path.isfile(input_dir + "/text"): - AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replicas, include_original, prefix, field =[0]) - if os.path.isfile(input_dir + "/segments"): - AddPrefixToFields(input_dir + "/segments", output_dir + "/segments", num_replicas, include_original, prefix, field = [0,1]) - if os.path.isfile(input_dir + "/reco2file_and_channel"): - AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replicas, include_original, prefix, field = [0,1]) - - data_lib.RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}" - .format(output_dir = output_dir)) - - -# This function smooths the probability distribution in the list -def SmoothProbabilityDistribution(list, smoothing_weight=0.0, target_sum=1.0): - if len(list) > 0: - num_unspecified = 0 - accumulated_prob = 0 - for item in list: - if item.probability is None: - num_unspecified += 1 - else: - accumulated_prob += item.probability - - # Compute the probability for the items without specifying their probability - uniform_probability = 0 - if num_unspecified > 0 and accumulated_prob < 1: - uniform_probability = (1 - accumulated_prob) / float(num_unspecified) - elif num_unspecified > 0 and accumulate_prob >= 1: - warnings.warn("The sum of probabilities specified by user is larger than or equal to 1. " - "The items without probabilities specified will be given zero to their probabilities.") - - for item in list: - if item.probability is None: - item.probability = uniform_probability - else: - # smooth the probability - item.probability = (1 - smoothing_weight) * item.probability + smoothing_weight * uniform_probability - - # Normalize the probability - sum_p = sum(item.probability for item in list) - for item in list: - item.probability = item.probability / sum_p * target_sum - - return list - - -# This function parse the array of rir set parameter strings. -# It will assign probabilities to those rir sets which don't have a probability -# It will also check the existence of the rir list files. -def ParseSetParameterStrings(set_para_array): - set_list = [] - for set_para in set_para_array: - set = lambda: None - setattr(set, "filename", None) - setattr(set, "probability", None) - parts = set_para.split(',') - if len(parts) == 2: - set.probability = float(parts[0]) - set.filename = parts[1].strip() - else: - set.filename = parts[0].strip() - if not os.path.isfile(set.filename): - raise Exception(set.filename + " not found") - set_list.append(set) - - return SmoothProbabilityDistribution(set_list) - - -# This function creates the RIR list -# Each rir object in the list contains the following attributes: -# rir_id, room_id, receiver_position_id, source_position_id, rt60, drr, probability -# Please refer to the help messages in the parser for the meaning of these attributes -def ParseRirList(rir_set_para_array, smoothing_weight, sampling_rate = None): - rir_parser = argparse.ArgumentParser() - rir_parser.add_argument('--rir-id', type=str, required=True, help='This id is unique for each RIR and the noise may associate with a particular RIR by refering to this id') - rir_parser.add_argument('--room-id', type=str, required=True, help='This is the room that where the RIR is generated') - rir_parser.add_argument('--receiver-position-id', type=str, default=None, help='receiver position id') - rir_parser.add_argument('--source-position-id', type=str, default=None, help='source position id') - rir_parser.add_argument('--rt60', type=float, default=None, help='RT60 is the time required for reflections of a direct sound to decay 60 dB.') - rir_parser.add_argument('--drr', type=float, default=None, help='Direct-to-reverberant-ratio of the impulse response.') - rir_parser.add_argument('--cte', type=float, default=None, help='Early-to-late index of the impulse response.') - rir_parser.add_argument('--probability', type=float, default=None, help='probability of the impulse response.') - rir_parser.add_argument('rir_rspecifier', type=str, help="""rir rspecifier, it can be either a filename or a piped command. - E.g. data/impulses/Room001-00001.wav or "sox data/impulses/Room001-00001.wav -t wav - |" """) - - set_list = ParseSetParameterStrings(rir_set_para_array) - - rir_list = [] - for rir_set in set_list: - current_rir_list = map(lambda x: rir_parser.parse_args(shlex.split(x.strip())),open(rir_set.filename)) - for rir in current_rir_list: - if sampling_rate is not None: - # check if the rspecifier is a pipe or not - if len(rir.rir_rspecifier.split()) == 1: - rir.rir_rspecifier = "sox {0} -r {1} -t wav - |".format(rir.rir_rspecifier, sampling_rate) - else: - rir.rir_rspecifier = "{0} sox -t wav - -r {1} -t wav - |".format(rir.rir_rspecifier, sampling_rate) - - rir_list += SmoothProbabilityDistribution(current_rir_list, smoothing_weight, rir_set.probability) - - return rir_list - - -# This dunction checks if the inputs are approximately equal assuming they are floats. -def almost_equal(value_1, value_2, accuracy = 10**-8): - return abs(value_1 - value_2) < accuracy - -# This function converts a list of RIRs into a dictionary of RIRs indexed by the room-id. -# Its values are objects with two attributes: a local RIR list -# and the probability of the corresponding room -# Please look at the comments at ParseRirList() for the attributes that a RIR object contains -def MakeRoomDict(rir_list): - room_dict = {} - for rir in rir_list: - if rir.room_id not in room_dict: - # add new room - room_dict[rir.room_id] = lambda: None - setattr(room_dict[rir.room_id], "rir_list", []) - setattr(room_dict[rir.room_id], "probability", 0) - room_dict[rir.room_id].rir_list.append(rir) - - # the probability of the room is the sum of probabilities of its RIR - for key in room_dict.keys(): - room_dict[key].probability = sum(rir.probability for rir in room_dict[key].rir_list) - - assert almost_equal(sum(room_dict[key].probability for key in room_dict.keys()), 1.0) - - return room_dict - - -# This function creates the point-source noise list -# and the isotropic noise dictionary from the noise information file -# The isotropic noise dictionary is indexed by the room -# and its value is the corrresponding isotropic noise list -# Each noise object in the list contains the following attributes: -# noise_id, noise_type, bg_fg_type, room_linkage, probability, noise_rspecifier -# Please refer to the help messages in the parser for the meaning of these attributes -def ParseNoiseList(noise_set_para_array, smoothing_weight, sampling_rate = None): - noise_parser = argparse.ArgumentParser() - noise_parser.add_argument('--noise-id', type=str, required=True, help='noise id') - noise_parser.add_argument('--noise-type', type=str, required=True, help='the type of noise; i.e. isotropic or point-source', choices = ["isotropic", "point-source"]) - noise_parser.add_argument('--bg-fg-type', type=str, default="background", help='background or foreground noise, for background noises, ' - 'they will be extended before addition to cover the whole speech; for foreground noise, they will be kept ' - 'to their original duration and added at a random point of the speech.', choices = ["background", "foreground"]) - noise_parser.add_argument('--room-linkage', type=str, default=None, help='required if isotropic, should not be specified if point-source.') - noise_parser.add_argument('--probability', type=float, default=None, help='probability of the noise.') - noise_parser.add_argument('noise_rspecifier', type=str, help="""noise rspecifier, it can be either a filename or a piped command. - E.g. type5_noise_cirline_ofc_ambient1.wav or "sox type5_noise_cirline_ofc_ambient1.wav -t wav - |" """) - - set_list = ParseSetParameterStrings(noise_set_para_array) - - pointsource_noise_list = [] - iso_noise_dict = {} - for noise_set in set_list: - current_noise_list = map(lambda x: noise_parser.parse_args(shlex.split(x.strip())),open(noise_set.filename)) - current_pointsource_noise_list = [] - for noise in current_noise_list: - if sampling_rate is not None: - # check if the rspecifier is a pipe or not - if len(noise.noise_rspecifier.split()) == 1: - noise.noise_rspecifier = "sox {0} -r {1} -t wav - |".format(noise.noise_rspecifier, sampling_rate) - else: - noise.noise_rspecifier = "{0} sox -t wav - -r {1} -t wav - |".format(noise.noise_rspecifier, sampling_rate) + if output_reverb_dir is not None: + data_lib.CopyDataDirFiles(input_dir, output_reverb_dir, num_replicas, include_original, prefix) - if noise.noise_type == "isotropic": - if noise.room_linkage is None: - raise Exception("--room-linkage must be specified if --noise-type is isotropic") - else: - if noise.room_linkage not in iso_noise_dict: - iso_noise_dict[noise.room_linkage] = [] - iso_noise_dict[noise.room_linkage].append(noise) - else: - current_pointsource_noise_list.append(noise) - - pointsource_noise_list += SmoothProbabilityDistribution(current_pointsource_noise_list, smoothing_weight, noise_set.probability) - - # ensure the point-source noise probabilities sum to 1 - pointsource_noise_list = SmoothProbabilityDistribution(pointsource_noise_list, smoothing_weight, 1.0) - if len(pointsource_noise_list) > 0: - assert almost_equal(sum(noise.probability for noise in pointsource_noise_list), 1.0) - - # ensure the isotropic noise source probabilities for a given room sum to 1 - for key in iso_noise_dict.keys(): - iso_noise_dict[key] = SmoothProbabilityDistribution(iso_noise_dict[key]) - assert almost_equal(sum(noise.probability for noise in iso_noise_dict[key]), 1.0) - - return (pointsource_noise_list, iso_noise_dict) + if output_additive_noise_dir is not None: + data_lib.CopyDataDirFiles(input_dir, output_additive_noise_dir, num_replicas, include_original, prefix) def Main(): args = GetArgs() random.seed(args.random_seed) - rir_list = ParseRirList(args.rir_set_para_array, args.rir_smoothing_weight, args.source_sampling_rate) + rir_list = data_lib.ParseRirList(args.rir_set_para_array, args.rir_smoothing_weight, args.source_sampling_rate) print("Number of RIRs is {0}".format(len(rir_list))) pointsource_noise_list = [] iso_noise_dict = {} if args.noise_set_para_array is not None: - pointsource_noise_list, iso_noise_dict = ParseNoiseList(args.noise_set_para_array, args.noise_smoothing_weight, args.source_sampling_rate) + pointsource_noise_list, iso_noise_dict = data_lib.ParseNoiseList(args.noise_set_para_array, args.noise_smoothing_weight, args.source_sampling_rate) print("Number of point-source noises is {0}".format(len(pointsource_noise_list))) print("Number of isotropic noises is {0}".format(sum(len(iso_noise_dict[key]) for key in iso_noise_dict.keys()))) - room_dict = MakeRoomDict(rir_list) + room_dict = data_lib.MakeRoomDict(rir_list) if args.include_original_data == "true": include_original = True @@ -660,8 +334,11 @@ def Main(): shift_output = args.shift_output, isotropic_noise_addition_probability = args.isotropic_noise_addition_probability, pointsource_noise_addition_probability = args.pointsource_noise_addition_probability, - max_noises_per_minute = args.max_noises_per_minute) + max_noises_per_minute = args.max_noises_per_minute, + output_reverb_dir = args.output_reverb_dir, + output_additive_noise_dir = args.output_additive_noise_dir) if __name__ == "__main__": Main() + diff --git a/src/featbin/wav-reverberate.cc b/src/featbin/wav-reverberate.cc index a9e6d3509c1..3b92f6e0b3e 100644 --- a/src/featbin/wav-reverberate.cc +++ b/src/featbin/wav-reverberate.cc @@ -156,6 +156,8 @@ int main(int argc, char *argv[]) { bool normalize_output = true; BaseFloat volume = 0; BaseFloat duration = 0; + std::string reverb_wxfilename; + std::string additive_noise_wxfilename; po.Register("multi-channel-output", &multi_channel_output, "Specifies if the output should be multi-channel or not"); @@ -212,6 +214,14 @@ int main(int argc, char *argv[]) { "after reverberating and possibly adding noise. " "If you set this option to a nonzero value, it will be as " "if you had also specified --normalize-output=false."); + po.Register("reverb-out-wxfilename", &reverb_wxfilename, + "Output the reverberated wave file, i.e. before adding the " + "additive noise. " + "Useful for computing SNR features or for debugging"); + po.Register("additive-noise-out-wxfilename", + &additive_noise_wxfilename, + "Output the additive noise file used to corrupt the input wave." + "Useful for computing SNR features or for debugging"); po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -314,10 +324,23 @@ int main(int argc, char *argv[]) { int32 num_samp_output = (duration > 0 ? samp_freq_input * duration : (shift_output ? num_samp_input : num_samp_input + num_samp_rir - 1)); + Matrix out_matrix(num_output_channels, num_samp_output); + Matrix out_reverb_matrix; + if (!reverb_wxfilename.empty()) + out_reverb_matrix.Resize(num_output_channels, num_samp_output); + + Matrix out_noise_matrix; + if (!additive_noise_wxfilename.empty()) + out_noise_matrix.Resize(num_output_channels, num_samp_output); + for (int32 output_channel = 0; output_channel < num_output_channels; output_channel++) { Vector input(num_samp_input); + + Vector out_reverb(0); + Vector out_noise(0); + input.CopyRowFromMat(input_matrix, input_channel); float power_before_reverb = VecVec(input, input) / input.Dim(); @@ -337,6 +360,16 @@ int main(int argc, char *argv[]) { } } + if (!reverb_wxfilename.empty()) { + out_reverb.Resize(input.Dim()); + out_reverb.CopyFromVec(input); + } + + if (!additive_noise_wxfilename.empty()) { + out_noise.Resize(input.Dim()); + out_noise.SetZero(); + } + if (additive_signal_matrices.size() > 0) { Vector noise(0); int32 this_noise_channel = (multi_channel_output ? output_channel : noise_channel); @@ -345,33 +378,86 @@ int main(int argc, char *argv[]) { for (int32 i = 0; i < additive_signal_matrices.size(); i++) { noise.Resize(additive_signal_matrices[i].NumCols()); noise.CopyRowFromMat(additive_signal_matrices[i], this_noise_channel); - AddNoise(&noise, snr_vector[i], start_time_vector[i], - samp_freq_input, early_energy, &input); + + if (!additive_noise_wxfilename.empty()) { + AddNoise(&noise, snr_vector[i], start_time_vector[i], + samp_freq_input, early_energy, &out_noise); + } else { + AddNoise(&noise, snr_vector[i], start_time_vector[i], + samp_freq_input, early_energy, &input); + } + } + + if (!additive_noise_wxfilename.empty()) { + input.AddVec(1.0, out_noise); } } float power_after_reverb = VecVec(input, input) / input.Dim(); - if (volume > 0) + if (volume > 0) { input.Scale(volume); - else if (normalize_output) + out_reverb.Scale(volume); + out_noise.Scale(volume); + } else if (normalize_output) { input.Scale(sqrt(power_before_reverb / power_after_reverb)); + out_reverb.Scale(sqrt(power_before_reverb / power_after_reverb)); + out_noise.Scale(sqrt(power_before_reverb / power_after_reverb)); + } if (num_samp_output <= num_samp_input) { // trim the signal from the start out_matrix.CopyRowFromVec(input.Range(shift_index, num_samp_output), output_channel); + + if (!reverb_wxfilename.empty()) { + out_reverb_matrix.CopyRowFromVec(out_reverb.Range(shift_index, num_samp_output), output_channel); + } + + if (!additive_noise_wxfilename.empty()) { + out_noise_matrix.CopyRowFromVec(out_noise.Range(shift_index, num_samp_output), output_channel); + } } else { - // repeat the signal to fill up the duration - Vector extended_input(num_samp_output); - extended_input.SetZero(); - AddVectorsOfUnequalLength(input.Range(shift_index, num_samp_input), &extended_input); - out_matrix.CopyRowFromVec(extended_input, output_channel); + { + // repeat the signal to fill up the duration + Vector extended_input(num_samp_output); + extended_input.SetZero(); + AddVectorsOfUnequalLength(input.Range(shift_index, num_samp_input), &extended_input); + out_matrix.CopyRowFromVec(extended_input, output_channel); + } + if (!reverb_wxfilename.empty()) { + // repeat the signal to fill up the duration + Vector extended_input(num_samp_output); + extended_input.SetZero(); + AddVectorsOfUnequalLength(out_reverb.Range(shift_index, num_samp_input), &extended_input); + out_reverb_matrix.CopyRowFromVec(extended_input, output_channel); + } + if (!additive_noise_wxfilename.empty()) { + // repeat the signal to fill up the duration + Vector extended_input(num_samp_output); + extended_input.SetZero(); + AddVectorsOfUnequalLength(out_noise.Range(shift_index, num_samp_input), &extended_input); + out_noise_matrix.CopyRowFromVec(extended_input, output_channel); + } } } + + { + WaveData out_wave(samp_freq_input, out_matrix); + Output ko(output_wave_file, false); + out_wave.Write(ko.Stream()); + } + + if (!reverb_wxfilename.empty()) { + WaveData out_wave(samp_freq_input, out_reverb_matrix); + Output ko(reverb_wxfilename, false); + out_wave.Write(ko.Stream()); + } - WaveData out_wave(samp_freq_input, out_matrix); - Output ko(output_wave_file, false); - out_wave.Write(ko.Stream()); + if (!additive_noise_wxfilename.empty()) { + WaveData out_wave(samp_freq_input, out_noise_matrix); + Output ko(additive_noise_wxfilename, false); + out_wave.Write(ko.Stream()); + } return 0; } catch(const std::exception &e) { From 9ca5aa09507d1826bf36341663ca812b8fa0de8a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 22:33:53 -0500 Subject: [PATCH 051/530] asr_diarization: Add extra_egs_copy_cmd --- .../nnet3/train/frame_level_objf/common.py | 64 ++++++++++++------- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 12 +++- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 12 +++- 3 files changed, 59 insertions(+), 29 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index 87cae801e90..d0cb2a52758 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -30,7 +30,8 @@ def train_new_models(dir, iter, srand, num_jobs, shuffle_buffer_size, minibatch_size, cache_read_opt, run_opts, frames_per_eg=-1, - min_deriv_time=None, max_deriv_time=None): + min_deriv_time=None, max_deriv_time=None, + extra_egs_copy_cmd=""): """ Called from train_one_iteration(), this model does one iteration of training with 'num_jobs' jobs, and writes files like exp/tdnn_a/24.{1,2,3,..}.raw @@ -92,7 +93,7 @@ def train_new_models(dir, iter, srand, num_jobs, --max-param-change={max_param_change} \ {deriv_time_opts} "{raw_model}" \ "ark,bg:nnet3-copy-egs {frame_opts} {context_opts} """ - """ark:{egs_dir}/egs.{archive_index}.ark ark:- |""" + """ark:{egs_dir}/egs.{archive_index}.ark ark:- |{extra_egs_copy_cmd}""" """nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} """ """--srand={srand} ark:- ark:- | """ """nnet3-merge-egs --minibatch-size={minibatch_size} """ @@ -115,7 +116,9 @@ def train_new_models(dir, iter, srand, num_jobs, raw_model=raw_model_string, context_opts=context_opts, egs_dir=egs_dir, archive_index=archive_index, shuffle_buffer_size=shuffle_buffer_size, - minibatch_size=minibatch_size), wait=False) + minibatch_size=minibatch_size, + extra_egs_copy_cmd=extra_egs_copy_cmd), + wait=False) processes.append(process_handle) @@ -143,7 +146,8 @@ def train_one_iteration(dir, iter, srand, egs_dir, min_deriv_time=None, max_deriv_time=None, shrinkage_value=1.0, get_raw_nnet_from_am=True, - background_process_handler=None): + background_process_handler=None, + extra_egs_copy_cmd=""): """ Called from steps/nnet3/train_*.py scripts for one iteration of neural network training @@ -192,7 +196,8 @@ def train_one_iteration(dir, iter, srand, egs_dir, run_opts=run_opts, mb_size=cv_minibatch_size, get_raw_nnet_from_am=get_raw_nnet_from_am, wait=False, - background_process_handler=background_process_handler) + background_process_handler=background_process_handler, + extra_egs_copy_cmd=extra_egs_copy_cmd) if iter > 0: # Runs in the background @@ -202,7 +207,8 @@ def train_one_iteration(dir, iter, srand, egs_dir, run_opts=run_opts, mb_size=cv_minibatch_size, wait=False, get_raw_nnet_from_am=get_raw_nnet_from_am, - background_process_handler=background_process_handler) + background_process_handler=background_process_handler, + extra_egs_copy_cmd=extra_egs_copy_cmd) # an option for writing cache (storing pairs of nnet-computations # and computation-requests) during training. @@ -276,7 +282,8 @@ def train_one_iteration(dir, iter, srand, egs_dir, cache_read_opt=cache_read_opt, run_opts=run_opts, frames_per_eg=frames_per_eg, min_deriv_time=min_deriv_time, - max_deriv_time=max_deriv_time) + max_deriv_time=max_deriv_time, + extra_egs_copy_cmd=extra_egs_copy_cmd) [models_to_average, best_model] = common_train_lib.get_successful_models( num_jobs, '{0}/log/train.{1}.%.log'.format(dir, iter)) @@ -375,7 +382,8 @@ def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts, def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, right_context, run_opts, mb_size=256, wait=False, background_process_handler=None, - get_raw_nnet_from_am=True): + get_raw_nnet_from_am=True, + extra_egs_copy_cmd=""): if get_raw_nnet_from_am: model = "nnet3-am-copy --raw=true {dir}/{iter}.mdl - |".format( dir=dir, iter=iter) @@ -389,7 +397,7 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, """ {command} {dir}/log/compute_prob_valid.{iter}.log \ nnet3-compute-prob "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ - ark:{egs_dir}/valid_diagnostic.egs ark:- | \ + ark:{egs_dir}/valid_diagnostic.egs ark:- |{extra_egs_copy_cmd} \ nnet3-merge-egs --minibatch-size={mb_size} ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, @@ -397,14 +405,15 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, context_opts=context_opts, mb_size=mb_size, model=model, - egs_dir=egs_dir), + egs_dir=egs_dir, + extra_egs_copy_cmd=extra_egs_copy_cmd), wait=wait, background_process_handler=background_process_handler) common_lib.run_job( """{command} {dir}/log/compute_prob_train.{iter}.log \ nnet3-compute-prob "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ - ark:{egs_dir}/train_diagnostic.egs ark:- | \ + ark:{egs_dir}/train_diagnostic.egs ark:- |{extra_egs_copy_cmd} \ nnet3-merge-egs --minibatch-size={mb_size} ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, @@ -412,14 +421,16 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, context_opts=context_opts, mb_size=mb_size, model=model, - egs_dir=egs_dir), + egs_dir=egs_dir, + extra_egs_copy_cmd=extra_egs_copy_cmd), wait=wait, background_process_handler=background_process_handler) def compute_progress(dir, iter, egs_dir, left_context, right_context, run_opts, mb_size=256, background_process_handler=None, wait=False, - get_raw_nnet_from_am=True): + get_raw_nnet_from_am=True, + extra_egs_copy_cmd=""): if get_raw_nnet_from_am: prev_model = "nnet3-am-copy --raw=true {0}/{1}.mdl - |".format( dir, iter - 1) @@ -436,7 +447,7 @@ def compute_progress(dir, iter, egs_dir, left_context, right_context, nnet3-info "{model}" '&&' \ nnet3-show-progress --use-gpu=no "{prev_model}" "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ - ark:{egs_dir}/train_diagnostic.egs ark:- | \ + ark:{egs_dir}/train_diagnostic.egs ark:- |{extra_egs_copy_cmd} \ nnet3-merge-egs --minibatch-size={mb_size} ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, @@ -445,14 +456,16 @@ def compute_progress(dir, iter, egs_dir, left_context, right_context, context_opts=context_opts, mb_size=mb_size, prev_model=prev_model, - egs_dir=egs_dir), + egs_dir=egs_dir, + extra_egs_copy_cmd=extra_egs_copy_cmd), wait=wait, background_process_handler=background_process_handler) def combine_models(dir, num_iters, models_to_combine, egs_dir, left_context, right_context, run_opts, background_process_handler=None, - chunk_width=None, get_raw_nnet_from_am=True): + chunk_width=None, get_raw_nnet_from_am=True, + extra_egs_copy_cmd=""): """ Function to do model combination In the nnet3 setup, the logic @@ -499,7 +512,7 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir, --enforce-sum-to-one=true --enforce-positive-weights=true \ --verbose=3 {raw_models} \ "ark,bg:nnet3-copy-egs {context_opts} \ - ark:{egs_dir}/combine.egs ark:- | \ + ark:{egs_dir}/combine.egs ark:- |{extra_egs_copy_cmd} \ nnet3-merge-egs --measure-output-frames=false \ --minibatch-size={mbsize} ark:- ark:- |" \ "{out_model}" @@ -509,7 +522,8 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir, context_opts=context_opts, mbsize=mbsize, out_model=out_model, - egs_dir=egs_dir)) + egs_dir=egs_dir, + extra_egs_copy_cmd=extra_egs_copy_cmd)) # Compute the probability of the final, combined model with # the same subset we used for the previous compute_probs, as the @@ -519,14 +533,16 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir, dir=dir, iter='combined', egs_dir=egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, wait=False, - background_process_handler=background_process_handler) + background_process_handler=background_process_handler, + extra_egs_copy_cmd=extra_egs_copy_cmd) else: compute_train_cv_probabilities( dir=dir, iter='final', egs_dir=egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, wait=False, background_process_handler=background_process_handler, - get_raw_nnet_from_am=False) + get_raw_nnet_from_am=False, + extra_egs_copy_cmd=extra_egs_copy_cmd) def get_realign_iters(realign_times, num_iters, @@ -639,7 +655,8 @@ def adjust_am_priors(dir, input_model, avg_posterior_vector, output_model, def compute_average_posterior(dir, iter, egs_dir, num_archives, prior_subset_size, left_context, right_context, - run_opts, get_raw_nnet_from_am=True): + run_opts, get_raw_nnet_from_am=True, + extra_egs_copy_cmd=""): """ Computes the average posterior of the network Note: this just uses CPUs, using a smallish subset of data. """ @@ -663,7 +680,7 @@ def compute_average_posterior(dir, iter, egs_dir, num_archives, """{command} JOB=1:{num_jobs_compute_prior} {prior_queue_opt} \ {dir}/log/get_post.{iter}.JOB.log \ nnet3-copy-egs {context_opts} \ - ark:{egs_dir}/egs.{egs_part}.ark ark:- \| \ + ark:{egs_dir}/egs.{egs_part}.ark ark:- \| {extra_egs_copy_cmd}\ nnet3-subset-egs --srand=JOB --n={prior_subset_size} \ ark:- ark:- \| \ nnet3-merge-egs --measure-output-frames=true \ @@ -679,7 +696,8 @@ def compute_average_posterior(dir, iter, egs_dir, num_archives, iter=iter, prior_subset_size=prior_subset_size, egs_dir=egs_dir, egs_part=egs_part, context_opts=context_opts, - prior_gpu_opt=run_opts.prior_gpu_opt)) + prior_gpu_opt=run_opts.prior_gpu_opt, + extra_egs_copy_cmd=extra_egs_copy_cmd)) # make sure there is time for $dir/post.{iter}.*.vec to appear. time.sleep(5) diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index b67ba8792a8..d7651889d83 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -53,6 +53,9 @@ def get_args(): parser.add_argument("--egs.frames-per-eg", type=int, dest='frames_per_eg', default=8, help="Number of output labels per example") + parser.add_argument("--egs.extra-copy-cmd", type=str, + dest='extra_egs_copy_cmd', default = "", + help="""Modify egs before passing it to training"""); # trainer options parser.add_argument("--trainer.prior-subset-size", type=int, @@ -322,7 +325,8 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): shuffle_buffer_size=args.shuffle_buffer_size, run_opts=run_opts, get_raw_nnet_from_am=False, - background_process_handler=background_process_handler) + background_process_handler=background_process_handler, + extra_egs_copy_cmd=args.extra_egs_copy_cmd) if args.cleanup: # do a clean up everythin but the last 2 models, under certain @@ -353,7 +357,8 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): left_context=left_context, right_context=right_context, run_opts=run_opts, background_process_handler=background_process_handler, - get_raw_nnet_from_am=False) + get_raw_nnet_from_am=False, + extra_egs_copy_cmd=args.extra_egs_copy_cmd) if include_log_softmax and args.stage <= num_iters + 1: logger.info("Getting average posterior for purposes of " @@ -363,7 +368,8 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, left_context=left_context, right_context=right_context, prior_subset_size=args.prior_subset_size, run_opts=run_opts, - get_raw_nnet_from_am=False) + get_raw_nnet_from_am=False, + extra_egs_copy_cmd=args.extra_egs_copy_cmd) if args.cleanup: logger.info("Cleaning up the experiment directory " diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index 29df61ab546..ae038445fc0 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -69,6 +69,9 @@ def get_args(): help="""Number of left steps used in the estimation of LSTM state before prediction of the first label. Overrides the default value in CommonParser""") + parser.add_argument("--egs.extra-copy-cmd", type=str, + dest='extra_egs_copy_cmd', default = "", + help="""Modify egs before passing it to training"""); # trainer options parser.add_argument("--trainer.samples-per-iter", type=int, @@ -424,7 +427,8 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): cv_minibatch_size=args.cv_minibatch_size, run_opts=run_opts, get_raw_nnet_from_am=False, - background_process_handler=background_process_handler) + background_process_handler=background_process_handler, + extra_egs_copy_cmd=args.extra_egs_copy_cmd) if args.cleanup: # do a clean up everythin but the last 2 models, under certain @@ -455,7 +459,8 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): left_context=left_context, right_context=right_context, run_opts=run_opts, chunk_width=args.chunk_width, background_process_handler=background_process_handler, - get_raw_nnet_from_am=False) + get_raw_nnet_from_am=False, + extra_egs_copy_cmd=args.extra_egs_copy_cmd) if include_log_softmax and args.stage <= num_iters + 1: logger.info("Getting average posterior for purposes of " @@ -465,7 +470,8 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, left_context=left_context, right_context=right_context, prior_subset_size=args.prior_subset_size, run_opts=run_opts, - get_raw_nnet_from_am=False) + get_raw_nnet_from_am=False, + extra_egs_copy_cmd=args.extra_egs_copy_cmd) if args.cleanup: logger.info("Cleaning up the experiment directory " From c5796c3c206795f6ce5ecbe154e7c1be592981b1 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 21:49:35 -0500 Subject: [PATCH 052/530] asr_diarization: Create get_egs.py supporting multiple targets and get-egs-multiple-targets --- egs/wsj/s5/steps/libs/data.py | 57 ++ .../steps/nnet3/get_egs_multiple_targets.py | 910 ++++++++++++++++++ src/nnet3bin/Makefile | 2 +- .../nnet3-get-egs-multiple-targets.cc | 538 +++++++++++ 4 files changed, 1506 insertions(+), 1 deletion(-) create mode 100644 egs/wsj/s5/steps/libs/data.py create mode 100755 egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py create mode 100644 src/nnet3bin/nnet3-get-egs-multiple-targets.cc diff --git a/egs/wsj/s5/steps/libs/data.py b/egs/wsj/s5/steps/libs/data.py new file mode 100644 index 00000000000..44895cae1a4 --- /dev/null +++ b/egs/wsj/s5/steps/libs/data.py @@ -0,0 +1,57 @@ +import os + +import libs.common as common_lib + +def get_frame_shift(data_dir): + frame_shift = common_lib.run_kaldi_command("utils/data/get_frame_shift.sh {0}".format(data_dir))[0] + return float(frame_shift.strip()) + +def generate_utt2dur(data_dir): + common_lib.run_kaldi_command("utils/data/get_utt2dur.sh {0}".format(data_dir)) + +def get_utt2dur(data_dir): + generate_utt2dur(data_dir) + utt2dur = {} + for line in open('{0}/utt2dur'.format(data_dir), 'r').readlines(): + parts = line.split() + utt2dur[parts[0]] = float(parts[1]) + return utt2dur + +def get_utt2uniq(data_dir): + utt2uniq_file = '{0}/utt2uniq'.format(data_dir) + if not os.path.exists(utt2uniq_file): + return None, None + utt2uniq = {} + uniq2utt = {} + for line in open(utt2uniq_file, 'r').readlines(): + parts = line.split() + utt2uniq[parts[0]] = parts[1] + if uniq2utt.has_key(parts[1]): + uniq2utt[parts[1]].append(parts[0]) + else: + uniq2utt[parts[1]] = [parts[0]] + return utt2uniq, uniq2utt + +def get_num_frames(data_dir, utts = None): + generate_utt2dur(data_dir) + frame_shift = get_frame_shift(data_dir) + total_duration = 0 + utt2dur = get_utt2dur(data_dir) + if utts is None: + utts = utt2dur.keys() + for utt in utts: + total_duration = total_duration + utt2dur[utt] + return int(float(total_duration)/frame_shift) + +def create_data_links(file_names): + # if file_names already exist create_data_link.pl returns with code 1 + # so we just delete them before calling create_data_link.pl + for file_name in file_names: + try_to_delete(file_name) + common_lib.run_kaldi_command(" utils/create_data_link.pl {0}".format(" ".join(file_names))) + +def try_to_delete(file_name): + try: + os.remove(file_name) + except OSError: + pass diff --git a/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py b/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py new file mode 100755 index 00000000000..16e1f98a019 --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py @@ -0,0 +1,910 @@ +#!/usr/bin/env python + +# Copyright 2016 Vijayaditya Peddinti +# 2016 Vimal Manohar +# Apache 2.0. + +from __future__ import print_function +import os +import argparse +import sys +import logging +import shlex +import random +import math +import glob + +import libs.data as data_lib +import libs.common as common_lib + +logger = logging.getLogger('libs') +logger.setLevel(logging.INFO) +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " + "%(funcName)s - %(levelname)s ] %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) +logger.info('Getting egs for training') + + +def get_args(): + # we add compulsary arguments as named arguments for readability + parser = argparse.ArgumentParser( + description="""Generates training examples used to train the 'nnet3' + network (and also the validation examples used for diagnostics), + and puts them in separate archives.""", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument("--cmd", type=str, default="run.pl", + help="Specifies the script to launch jobs." + " e.g. queue.pl for launching on SGE cluster run.pl" + " for launching on local machine") + # feat options + parser.add_argument("--feat.dir", type=str, dest='feat_dir', required=True, + help="Directory with features used for training " + "the neural network.") + parser.add_argument("--feat.online-ivector-dir", type=str, + dest='online_ivector_dir', + default=None, action=common_lib.NullstrToNoneAction, + help="directory with the ivectors extracted in an " + "online fashion.") + parser.add_argument("--feat.cmvn-opts", type=str, dest='cmvn_opts', + default=None, action=common_lib.NullstrToNoneAction, + help="A string specifying '--norm-means' and " + "'--norm-vars' values") + parser.add_argument("--feat.apply-cmvn-sliding", type=str, + dest='apply_cmvn_sliding', + default=False, action=common_lib.StrToBoolAction, + help="Apply CMVN sliding, instead of per-utteance " + "or speakers") + + # egs extraction options + parser.add_argument("--frames-per-eg", type=int, default=8, + help="""Number of frames of labels per example. + more->less disk space and less time preparing egs, but + more I/O during training. + note: the script may reduce this if + reduce-frames-per-eg is true.""") + parser.add_argument("--left-context", type=int, default=4, + help="""Amount of left-context per eg (i.e. extra + frames of input features not present in the output + supervision).""") + parser.add_argument("--right-context", type=int, default=4, + help="Amount of right-context per eg") + parser.add_argument("--valid-left-context", type=int, default=None, + help="""Amount of left-context for validation egs, + typically used in recurrent architectures to ensure + matched condition with training egs""") + parser.add_argument("--valid-right-context", type=int, default=None, + help="""Amount of right-context for validation egs, + typically used in recurrent architectures to ensure + matched condition with training egs""") + parser.add_argument("--compress-input", type=str, default=True, + action=common_lib.StrToBoolAction, + choices=["true", "false"], + help="If false, disables compression. Might be " + "necessary to check if results will be affected.") + parser.add_argument("--input-compress-format", type=int, default=0, + help="Format used for compressing the input features") + + parser.add_argument("--reduce-frames-per-eg", type=str, default=True, + action=common_lib.StrToBoolAction, + choices=["true", "false"], + help="""If true, this script may reduce the + frames-per-eg if there is only one archive and even + with the reduced frames-per-eg, the number of + samples-per-iter that would result is less than or + equal to the user-specified value.""") + + parser.add_argument("--num-utts-subset", type=int, default=300, + help="Number of utterances in validation and training" + " subsets used for shrinkage and diagnostics") + parser.add_argument("--num-utts-subset-valid", type=int, + help="Number of utterances in validation" + " subset used for diagnostics") + parser.add_argument("--num-utts-subset-train", type=int, + help="Number of utterances in training" + " subset used for shrinkage and diagnostics") + parser.add_argument("--num-train-egs-combine", type=int, default=10000, + help="Training examples for combination weights at the" + " very end.") + parser.add_argument("--num-valid-egs-combine", type=int, default=0, + help="Validation examples for combination weights at " + "the very end.") + parser.add_argument("--num-egs-diagnostic", type=int, default=4000, + help="Numer of frames for 'compute-probs' jobs") + + parser.add_argument("--samples-per-iter", type=int, default=400000, + help="""This is the target number of egs in each + archive of egs (prior to merging egs). We probably + should have called it egs_per_iter. This is just a + guideline; it will pick a number that divides the + number of samples in the entire data.""") + + parser.add_argument("--stage", type=int, default=0, + help="Stage to start running script from") + parser.add_argument("--num-jobs", type=int, default=6, + help="""This should be set to the maximum number of + jobs you are comfortable to run in parallel; you can + increase it if your disk speed is greater and you have + more machines.""") + parser.add_argument("--srand", type=int, default=0, + help="Rand seed for nnet3-copy-egs and " + "nnet3-shuffle-egs") + + parser.add_argument("--targets-parameters", type=str, action='append', + required=True, dest='targets_para_array', + help="""Parameters for targets. Each set of parameters + corresponds to a separate output node of the neural + network. The targets can be sparse or dense. + The parameters used are: + --targets-rspecifier= + # rspecifier for the targets, can be alignment or + # matrix. + --num-targets= + # targets dimension. required for sparse feats. + --target-type=""") + + parser.add_argument("--dir", type=str, required=True, + help="Directory to store the examples") + + print(' '.join(sys.argv)) + print(sys.argv) + + args = parser.parse_args() + + args = process_args(args) + + return args + + +def process_args(args): + # process the options + if args.num_utts_subset_valid is None: + args.num_utts_subset_valid = args.num_utts_subset + + if args.num_utts_subset_train is None: + args.num_utts_subset_train = args.num_utts_subset + + if args.valid_left_context is None: + args.valid_left_context = args.left_context + if args.valid_right_context is None: + args.valid_right_context = args.right_context + + if (args.left_context < 0 or args.right_context < 0 + or args.valid_left_context < 0 or args.valid_right_context < 0): + raise Exception( + "--{,valid-}{left,right}-context should be non-negative") + + return args + + +def check_for_required_files(feat_dir, targets_scps, online_ivector_dir=None): + required_files = ['{0}/feats.scp'.format(feat_dir), + '{0}/cmvn.scp'.format(feat_dir)] + if online_ivector_dir is not None: + required_files.append('{0}/ivector_online.scp'.format( + online_ivector_dir)) + required_files.append('{0}/ivector_period'.format( + online_ivector_dir)) + + for file in required_files: + if not os.path.isfile(file): + raise Exception('Expected {0} to exist.'.format(file)) + + +def parse_targets_parameters_array(para_array): + targets_parser = argparse.ArgumentParser() + targets_parser.add_argument("--output-name", type=str, required=True, + help="Name of the output. e.g. output-xent") + targets_parser.add_argument("--dim", type=int, default=-1, + help="Target dimension (required for sparse " + "targets") + targets_parser.add_argument("--target-type", type=str, default="dense", + choices=["dense", "sparse"], + help="Dense for matrix format") + targets_parser.add_argument("--targets-scp", type=str, required=True, + help="Scp file of targets; can be posteriors " + "or matrices") + targets_parser.add_argument("--compress", type=str, default=True, + action=common_lib.StrToBoolAction, + help="Specifies whether the output must be " + "compressed") + targets_parser.add_argument("--compress-format", type=int, default=0, + help="Format for compressing target") + targets_parser.add_argument("--deriv-weights-scp", type=str, default="", + help="Per-frame deriv weights for this output") + targets_parser.add_argument("--scp2ark-cmd", type=str, default="", + help="""The command that is used to convert + targets scp to archive. e.g. An scp of + alignments can be converted to posteriors using + ali-to-post""") + + targets_parameters = [targets_parser.parse_args(shlex.split(x)) + for x in para_array] + + for t in targets_parameters: + if not os.path.isfile(t.targets_scp): + raise Exception("Expected {0} to exist.".format(t.targets_scp)) + + if (t.target_type == "dense"): + dim = common_lib.get_feat_dim_from_scp(t.targets_scp) + if (t.dim != -1 and t.dim != dim): + raise Exception('Mismatch in --dim provided and feat dim for ' + 'file {0}; {1} vs {2}'.format(t.targets_scp, + t.dim, dim)) + t.dim = -dim + + return targets_parameters + + +def sample_utts(feat_dir, num_utts_subset, min_duration, exclude_list=None): + utt2durs_dict = data_lib.get_utt2dur(feat_dir) + utt2durs = utt2durs_dict.items() + utt2uniq, uniq2utt = data_lib.get_utt2uniq(feat_dir) + if num_utts_subset is None: + num_utts_subset = len(utt2durs) + if exclude_list is not None: + num_utts_subset = num_utts_subset - len(exclude_list) + + random.shuffle(utt2durs) + sampled_utts = [] + + index = 0 + num_trials = 0 + while (len(sampled_utts) < num_utts_subset + and num_trials <= len(utt2durs)): + if utt2durs[index][-1] >= min_duration: + if utt2uniq is not None: + uniq_id = utt2uniq[utt2durs[index][0]] + utts2add = uniq2utt[uniq_id] + else: + utts2add = [utt2durs[index][0]] + exclude_utt = False + if exclude_list is not None: + for utt in utts2add: + if utt in exclude_list: + exclude_utt = True + break + if not exclude_utt: + for utt in utts2add: + sampled_utts.append(utt) + + index = index + 1 + num_trials = num_trials + 1 + if exclude_list is not None: + assert(len(set(exclude_list).intersection(sampled_utts)) == 0) + if len(sampled_utts) < num_utts_subset: + raise Exception( + """Number of utterances which have duration of at least {md} + seconds is really low (required={rl}, available={al}). Please + check your data.""".format( + md=min_duration, al=len(sampled_utts), rl=num_utts_subset)) + + sampled_utts_durs = [] + for utt in sampled_utts: + sampled_utts_durs.append([utt, utt2durs_dict[utt]]) + return sampled_utts, sampled_utts_durs + + +def write_list(listd, file_name): + file_handle = open(file_name, 'w') + assert(type(listd) == list) + for item in listd: + file_handle.write(str(item)+"\n") + file_handle.close() + + +def get_max_open_files(): + stdout, stderr = common_lib.run_kaldi_command("ulimit -n") + return int(stdout) + + +def get_feat_ivector_strings(dir, feat_dir, split_feat_dir, + cmvn_opt_string, ivector_dir=None, + apply_cmvn_sliding=False): + + if not apply_cmvn_sliding: + train_feats = ("ark,s,cs:utils/filter_scp.pl --exclude " + "{dir}/valid_uttlist {sdir}/JOB/feats.scp | " + "apply-cmvn {cmvn} --utt2spk=ark:{sdir}/JOB/utt2spk " + "scp:{sdir}/JOB/cmvn.scp scp:- ark:- |".format( + dir=dir, sdir=split_feat_dir, + cmvn=cmvn_opt_string)) + valid_feats = ("ark,s,cs:utils/filter_scp.pl {dir}/valid_uttlist " + "{fdir}/feats.scp | " + "apply-cmvn {cmvn} --utt2spk=ark:{fdir}/utt2spk " + "scp:{fdir}/cmvn.scp scp:- ark:- |".format( + dir=dir, fdir=feat_dir, cmvn=cmvn_opt_string)) + train_subset_feats = ("ark,s,cs:utils/filter_scp.pl " + "{dir}/train_subset_uttlist {fdir}/feats.scp | " + "apply-cmvn {cmvn} --utt2spk=ark:{fdir}/utt2spk " + "scp:{fdir}/cmvn.scp scp:- ark:- |".format( + dir=dir, fdir=feat_dir, + cmvn=cmvn_opt_string)) + + def feats_subset_func(subset_list): + return ("ark,s,cs:utils/filter_scp.pl {subset_list} " + "{fdir}/feats.scp | " + "apply-cmvn {cmvn} --utt2spk=ark:{fdir}/utt2spk " + "scp:{fdir}/cmvn.scp scp:- ark:- |".format( + dir=dir, subset_list=subset_list, + fdir=feat_dir, cmvn=cmvn_opt_string)) + + else: + train_feats = ("ark,s,cs:utils/filter_scp.pl --exclude " + "{dir}/valid_uttlist {sdir}/JOB/feats.scp | " + "apply-cmvn-sliding scp:{sdir}/JOB/cmvn.scp scp:- " + "ark:- |".format(dir=dir, sdir=split_feat_dir, + cmvn=cmvn_opt_string)) + + def feats_subset_func(subset_list): + return ("ark,s,cs:utils/filter_scp.pl {subset_list} " + "{fdir}/feats.scp | " + "apply-cmvn-sliding {cmvn} scp:{fdir}/cmvn.scp scp:- " + "ark:- |".format(dir=dir, subset_list=subset_list, + fdir=feat_dir, cmvn=cmvn_opt_string)) + + train_subset_feats = feats_subset_func( + "{0}/train_subset_uttlist".format(dir)) + valid_feats = feats_subset_func("{0}/valid_uttlist".format(dir)) + + if ivector_dir is not None: + ivector_period = common_lib.GetIvectorPeriod(ivector_dir) + ivector_opt = ("--ivectors='ark,s,cs:utils/filter_scp.pl " + "{sdir}/JOB/utt2spk {idir}/ivector_online.scp | " + "subsample-feats --n=-{period} scp:- ark:- |'".format( + sdir=split_feat_dir, idir=ivector_dir, + period=ivector_period)) + valid_ivector_opt = ("--ivectors='ark,s,cs:utils/filter_scp.pl " + "{dir}/valid_uttlist {idir}/ivector_online.scp | " + "subsample-feats --n=-{period} " + "scp:- ark:- |'".format( + dir=dir, idir=ivector_dir, + period=ivector_period)) + train_subset_ivector_opt = ( + "--ivectors='ark,s,cs:utils/filter_scp.pl " + "{dir}/train_subset_uttlist {idir}/ivector_online.scp | " + "subsample-feats --n=-{period} scp:- ark:- |'".format( + dir=dir, idir=ivector_dir, period=ivector_period)) + else: + ivector_opt = '' + valid_ivector_opt = '' + train_subset_ivector_opt = '' + + return {'train_feats': train_feats, + 'valid_feats': valid_feats, + 'train_subset_feats': train_subset_feats, + 'feats_subset_func': feats_subset_func, + 'ivector_opts': ivector_opt, + 'valid_ivector_opts': valid_ivector_opt, + 'train_subset_ivector_opts': train_subset_ivector_opt, + 'feat_dim': common_lib.get_feat_dim(feat_dir), + 'ivector_dim': common_lib.get_ivector_dim(ivector_dir)} + + +def get_egs_options(targets_parameters, frames_per_eg, + left_context, right_context, + valid_left_context, valid_right_context, + compress_input, + input_compress_format=0, length_tolerance=0): + + train_egs_opts = [] + train_egs_opts.append("--left-context={0}".format(left_context)) + train_egs_opts.append("--right-context={0}".format(right_context)) + train_egs_opts.append("--num-frames={0}".format(frames_per_eg)) + train_egs_opts.append("--compress-input={0}".format(compress_input)) + train_egs_opts.append("--input-compress-format={0}".format( + input_compress_format)) + train_egs_opts.append("--compress-targets={0}".format( + ':'.join(["true" if t.compress else "false" + for t in targets_parameters]))) + train_egs_opts.append("--targets-compress-formats={0}".format( + ':'.join([str(t.compress_format) + for t in targets_parameters]))) + train_egs_opts.append("--length-tolerance={0}".format(length_tolerance)) + train_egs_opts.append("--output-names={0}".format( + ':'.join([t.output_name + for t in targets_parameters]))) + train_egs_opts.append("--output-dims={0}".format( + ':'.join([str(t.dim) + for t in targets_parameters]))) + + valid_egs_opts = ( + "--left-context={vlc} --right-context={vrc} " + "--num-frames={n} --compress-input={comp} " + "--input-compress-format={icf} --compress-targets={ct} " + "--targets-compress-formats={tcf} --length-tolerance={tol} " + "--output-names={names} --output-dims={dims}".format( + vlc=valid_left_context, vrc=valid_right_context, n=frames_per_eg, + comp=compress_input, icf=input_compress_format, + ct=':'.join(["true" if t.compress else "false" + for t in targets_parameters]), + tcf=':'.join([str(t.compress_format) + for t in targets_parameters]), + tol=length_tolerance, + names=':'.join([t.output_name + for t in targets_parameters]), + dims=':'.join([str(t.dim) for t in targets_parameters]))) + + return {'train_egs_opts': " ".join(train_egs_opts), + 'valid_egs_opts': valid_egs_opts} + + +def get_targets_list(targets_parameters, subset_list): + targets_list = [] + for t in targets_parameters: + rspecifier = "ark,s,cs:" if t.scp2ark_cmd != "" else "scp,s,cs:" + rspecifier += get_subset_rspecifier(t.targets_scp, subset_list) + rspecifier += t.scp2ark_cmd + deriv_weights_rspecifier = "" + if t.deriv_weights_scp != "": + deriv_weights_rspecifier = "scp,s,cs:{0}".format( + get_subset_rspecifier(t.deriv_weights_scp, subset_list)) + this_targets = '''"{rspecifier}" "{dw}"'''.format( + rspecifier=rspecifier, dw=deriv_weights_rspecifier) + + targets_list.append(this_targets) + return " ".join(targets_list) + + +def get_subset_rspecifier(scp_file, subset_list): + if scp_file == "": + return "" + return "utils/filter_scp.pl {subset} {scp} |".format(subset=subset_list, + scp=scp_file) + + +def split_scp(scp_file, num_jobs): + out_scps = ["{0}.{1}".format(scp_file, n) for n in range(1, num_jobs + 1)] + common_lib.run_kaldi_command("utils/split_scp.pl {scp} {oscps}".format( + scp=scp_file, + oscps=' '.join(out_scps))) + return out_scps + + +def generate_valid_train_subset_egs(dir, targets_parameters, + feat_ivector_strings, egs_opts, + num_train_egs_combine, + num_valid_egs_combine, + num_egs_diagnostic, cmd, + num_jobs=1): + wait_pids = [] + + logger.info("Creating validation and train subset examples.") + + split_scp('{0}/valid_uttlist'.format(dir), num_jobs) + split_scp('{0}/train_subset_uttlist'.format(dir), num_jobs) + + valid_pid = common_lib.run_kaldi_command( + """{cmd} JOB=1:{nj} {dir}/log/create_valid_subset.JOB.log \ + nnet3-get-egs-multiple-targets {v_iv_opt} {v_egs_opt} "{v_feats}" \ + {targets} ark:{dir}/valid_all.JOB.egs""".format( + cmd=cmd, nj=num_jobs, dir=dir, + v_egs_opt=egs_opts['valid_egs_opts'], + v_iv_opt=feat_ivector_strings['valid_ivector_opts'], + v_feats=feat_ivector_strings['feats_subset_func']( + '{dir}/valid_uttlist.JOB'.format(dir=dir)), + targets=get_targets_list( + targets_parameters, + '{dir}/valid_uttlist.JOB'.format(dir=dir))), + wait=False) + + train_pid = common_lib.run_kaldi_command( + """{cmd} JOB=1:{nj} {dir}/log/create_train_subset.JOB.log \ + nnet3-get-egs-multiple-targets {t_iv_opt} {v_egs_opt} "{t_feats}" \ + {targets} ark:{dir}/train_subset_all.JOB.egs""".format( + cmd=cmd, nj=num_jobs, dir=dir, + v_egs_opt=egs_opts['valid_egs_opts'], + t_iv_opt=feat_ivector_strings['train_subset_ivector_opts'], + t_feats=feat_ivector_strings['feats_subset_func']( + '{dir}/train_subset_uttlist.JOB'.format(dir=dir)), + targets=get_targets_list( + targets_parameters, + '{dir}/train_subset_uttlist.JOB'.format(dir=dir))), + wait=False) + + wait_pids.append(valid_pid) + wait_pids.append(train_pid) + + for pid in wait_pids: + stdout, stderr = pid.communicate() + if pid.returncode != 0: + raise Exception(stderr) + + valid_egs_all = ' '.join(['{dir}/valid_all.{n}.egs'.format(dir=dir, n=n) + for n in range(1, num_jobs + 1)]) + train_subset_egs_all = ' '.join(['{dir}/train_subset_all.{n}.egs'.format( + dir=dir, n=n) + for n in range(1, num_jobs + 1)]) + + wait_pids = [] + logger.info("... Getting subsets of validation examples for diagnostics " + " and combination.") + pid = common_lib.run_kaldi_command( + """{cmd} {dir}/log/create_valid_subset_combine.log \ + cat {valid_egs_all} \| nnet3-subset-egs --n={nve_combine} ark:- \ + ark:{dir}/valid_combine.egs""".format( + cmd=cmd, dir=dir, valid_egs_all=valid_egs_all, + nve_combine=num_valid_egs_combine), + wait=False) + wait_pids.append(pid) + + pid = common_lib.run_kaldi_command( + """{cmd} {dir}/log/create_valid_subset_diagnostic.log \ + cat {valid_egs_all} \| nnet3-subset-egs --n={ne_diagnostic} ark:- \ + ark:{dir}/valid_diagnostic.egs""".format( + cmd=cmd, dir=dir, valid_egs_all=valid_egs_all, + ne_diagnostic=num_egs_diagnostic), + wait=False) + wait_pids.append(pid) + + pid = common_lib.run_kaldi_command( + """{cmd} {dir}/log/create_train_subset_combine.log \ + cat {train_subset_egs_all} \| \ + nnet3-subset-egs --n={nte_combine} ark:- \ + ark:{dir}/train_combine.egs""".format( + cmd=cmd, dir=dir, train_subset_egs_all=train_subset_egs_all, + nte_combine=num_train_egs_combine), + wait=False) + wait_pids.append(pid) + + pid = common_lib.run_kaldi_command( + """{cmd} {dir}/log/create_train_subset_diagnostic.log \ + cat {train_subset_egs_all} \| \ + nnet3-subset-egs --n={ne_diagnostic} ark:- \ + ark:{dir}/train_diagnostic.egs""".format( + cmd=cmd, dir=dir, train_subset_egs_all=train_subset_egs_all, + ne_diagnostic=num_egs_diagnostic), wait=False) + wait_pids.append(pid) + + for pid in wait_pids: + stdout, stderr = pid.communicate() + if pid.returncode != 0: + raise Exception(stderr) + + common_lib.run_kaldi_command( + """cat {dir}/valid_combine.egs {dir}/train_combine.egs > \ + {dir}/combine.egs""".format(dir=dir)) + + # perform checks + for file_name in ('{0}/combine.egs {0}/train_diagnostic.egs ' + '{0}/valid_diagnostic.egs'.format(dir).split()): + if os.path.getsize(file_name) == 0: + raise Exception("No examples in {0}".format(file_name)) + + # clean-up + for x in ('{0}/valid_all.*.egs {0}/train_subset_all.*.egs ' + '{0}/train_combine.egs ' + '{0}/valid_combine.egs'.format(dir).split()): + for file_name in glob.glob(x): + os.remove(file_name) + + +def generate_training_examples_internal(dir, targets_parameters, feat_dir, + train_feats_string, + train_egs_opts_string, + ivector_opts, + num_jobs, frames_per_eg, + samples_per_iter, cmd, srand=0, + reduce_frames_per_eg=True, + only_shuffle=False, + dry_run=False): + + # The examples will go round-robin to egs_list. Note: we omit the + # 'normalization.fst' argument while creating temporary egs: the phase of + # egs preparation that involves the normalization FST is quite + # CPU-intensive and it's more convenient to do it later, in the 'shuffle' + # stage. Otherwise to make it efficient we need to use a large 'nj', like + # 40, and in that case there can be too many small files to deal with, + # because the total number of files is the product of 'nj' by + # 'num_archives_intermediate', which might be quite large. + num_frames = data_lib.get_num_frames(feat_dir) + num_archives = (num_frames) / (frames_per_eg * samples_per_iter) + 1 + + reduced = False + while (reduce_frames_per_eg and frames_per_eg > 1 and + num_frames / ((frames_per_eg-1)*samples_per_iter) == 0): + frames_per_eg -= 1 + num_archives = 1 + reduced = True + + if reduced: + logger.info("Reduced frames-per-eg to {0} " + "because amount of data is small".format(frames_per_eg)) + + max_open_files = get_max_open_files() + num_archives_intermediate = num_archives + archives_multiple = 1 + while (num_archives_intermediate+4) > max_open_files: + archives_multiple = archives_multiple + 1 + num_archives_intermediate = int(math.ceil(float(num_archives) + / archives_multiple)) + num_archives = num_archives_intermediate * archives_multiple + egs_per_archive = num_frames/(frames_per_eg * num_archives) + + if egs_per_archive > samples_per_iter: + raise Exception( + """egs_per_archive({epa}) > samples_per_iter({fpi}). + This is an error in the logic for determining + egs_per_archive""".format(epa=egs_per_archive, + fpi=samples_per_iter)) + + if dry_run: + cleanup(dir, archives_multiple) + return {'num_frames': num_frames, + 'num_archives': num_archives, + 'egs_per_archive': egs_per_archive} + + logger.info("Splitting a total of {nf} frames into {na} archives, " + "each with {epa} egs.".format(nf=num_frames, na=num_archives, + epa=egs_per_archive)) + + if os.path.isdir('{0}/storage'.format(dir)): + # this is a striped directory, so create the softlinks + data_lib.create_data_links(["{dir}/egs.{x}.ark".format(dir=dir, x=x) + for x in range(1, num_archives + 1)]) + for x in range(1, num_archives_intermediate + 1): + data_lib.create_data_links( + ["{dir}/egs_orig.{y}.{x}.ark".format(dir=dir, x=x, y=y) + for y in range(1, num_jobs + 1)]) + + split_feat_dir = "{0}/split{1}".format(feat_dir, num_jobs) + egs_list = ' '.join(['ark:{dir}/egs_orig.JOB.{ark_num}.ark'.format( + dir=dir, ark_num=x) + for x in range(1, num_archives_intermediate + 1)]) + + if not only_shuffle: + common_lib.run_kaldi_command( + """{cmd} JOB=1:{nj} {dir}/log/get_egs.JOB.log \ + nnet3-get-egs-multiple-targets {iv_opts} {egs_opts} \ + "{feats}" {targets} ark:- \| \ + nnet3-copy-egs --random=true --srand=$[JOB+{srand}] \ + ark:- {egs_list}""".format( + cmd=cmd, nj=num_jobs, dir=dir, srand=srand, + iv_opts=ivector_opts, egs_opts=train_egs_opts_string, + feats=train_feats_string, + targets=get_targets_list(targets_parameters, + '{sdir}/JOB/utt2spk'.format( + sdir=split_feat_dir)), + egs_list=egs_list)) + + logger.info("Recombining and shuffling order of archives on disk") + egs_list = ' '.join(['{dir}/egs_orig.{n}.JOB.ark'.format(dir=dir, n=x) + for x in range(1, num_jobs + 1)]) + + if archives_multiple == 1: + # there are no intermediate archives so just shuffle egs across + # jobs and dump them into a single output + common_lib.run_kaldi_command( + """{cmd} --max-jobs-run {msjr} JOB=1:{nai} \ + {dir}/log/shuffle.JOB.log \ + nnet3-shuffle-egs --srand=$[JOB+{srand}] \ + "ark:cat {egs_list}|" ark:{dir}/egs.JOB.ark""".format( + cmd=cmd, msjr=num_jobs, + nai=num_archives_intermediate, srand=srand, + dir=dir, egs_list=egs_list)) + else: + # there are intermediate archives so we shuffle egs across jobs + # and split them into archives_multiple output archives + output_archives = ' '.join(["ark:{dir}/egs.JOB.{ark_num}.ark".format( + dir=dir, ark_num=x) + for x in range(1, archives_multiple + 1)]) + # archives were created as egs.x.y.ark + # linking them to egs.i.ark format which is expected by the training + # scripts + for i in range(1, num_archives_intermediate + 1): + for j in range(1, archives_multiple + 1): + archive_index = (i-1) * archives_multiple + j + common_lib.force_sym_link( + "egs.{0}.ark".format(archive_index), + "{dir}/egs.{i}.{j}.ark".format(dir=dir, i=i, j=j)) + + common_lib.run_kaldi_command( + """{cmd} --max-jobs-run {msjr} JOB=1:{nai} \ + {dir}/log/shuffle.JOB.log \ + nnet3-shuffle-egs --srand=$[JOB+{srand}] \ + "ark:cat {egs_list}|" ark:- \| \ + nnet3-copy-egs ark:- {oarks}""".format( + cmd=cmd, msjr=num_jobs, + nai=num_archives_intermediate, srand=srand, + dir=dir, egs_list=egs_list, oarks=output_archives)) + + cleanup(dir, archives_multiple) + return {'num_frames': num_frames, + 'num_archives': num_archives, + 'egs_per_archive': egs_per_archive} + + +def cleanup(dir, archives_multiple): + logger.info("Removing temporary archives in {0}.".format(dir)) + for file_name in glob.glob("{0}/egs_orig*".format(dir)): + real_path = os.path.realpath(file_name) + data_lib.try_to_delete(real_path) + data_lib.try_to_delete(file_name) + + if archives_multiple > 1: + # there will be some extra soft links we want to delete + for file_name in glob.glob('{0}/egs.*.*.ark'.format(dir)): + os.remove(file_name) + + +def create_directory(dir): + import errno + try: + os.makedirs(dir) + except OSError, e: + if e.errno == errno.EEXIST: + pass + + +def generate_training_examples(dir, targets_parameters, feat_dir, + feat_ivector_strings, egs_opts, + frame_shift, frames_per_eg, samples_per_iter, + cmd, num_jobs, srand=0, + only_shuffle=False, dry_run=False): + + # generate the training options string with the given chunk_width + train_egs_opts = egs_opts['train_egs_opts'] + # generate the feature vector string with the utt list for the + # current chunk width + train_feats = feat_ivector_strings['train_feats'] + + if os.path.isdir('{0}/storage'.format(dir)): + real_paths = [os.path.realpath(x).strip("/") + for x in glob.glob('{0}/storage/*'.format(dir))] + common_lib.run_kaldi_command( + """utils/create_split_dir.pl {target_dirs} \ + {dir}/storage""".format( + target_dirs=" ".join(real_paths), dir=dir)) + + info = generate_training_examples_internal( + dir=dir, targets_parameters=targets_parameters, + feat_dir=feat_dir, train_feats_string=train_feats, + train_egs_opts_string=train_egs_opts, + ivector_opts=feat_ivector_strings['ivector_opts'], + num_jobs=num_jobs, frames_per_eg=frames_per_eg, + samples_per_iter=samples_per_iter, cmd=cmd, + srand=srand, + only_shuffle=only_shuffle, + dry_run=dry_run) + + return info + + +def write_egs_info(info, info_dir): + for x in ['num_frames', 'num_archives', 'egs_per_archive', + 'feat_dim', 'ivector_dim', + 'left_context', 'right_context', 'frames_per_eg']: + write_list([info['{0}'.format(x)]], '{0}/{1}'.format(info_dir, x)) + + +def generate_egs(egs_dir, feat_dir, targets_para_array, + online_ivector_dir=None, + frames_per_eg=8, + left_context=4, + right_context=4, + valid_left_context=None, + valid_right_context=None, + cmd="run.pl", stage=0, + cmvn_opts=None, apply_cmvn_sliding=False, + compress_input=True, + input_compress_format=0, + num_utts_subset=300, + num_train_egs_combine=1000, + num_valid_egs_combine=0, + num_egs_diagnostic=4000, + samples_per_iter=400000, + num_jobs=6, + srand=0): + + for directory in '{0}/log {0}/info'.format(egs_dir).split(): + create_directory(directory) + + print (cmvn_opts if cmvn_opts is not None else '', + file=open('{0}/cmvn_opts'.format(egs_dir), 'w')) + print ("true" if apply_cmvn_sliding else "false", + file=open('{0}/apply_cmvn_sliding'.format(egs_dir), 'w')) + + targets_parameters = parse_targets_parameters_array(targets_para_array) + + # Check files + check_for_required_files(feat_dir, + [t.targets_scp for t in targets_parameters], + online_ivector_dir) + + frame_shift = data_lib.get_frame_shift(feat_dir) + min_duration = frames_per_eg * frame_shift + valid_utts = sample_utts(feat_dir, num_utts_subset, min_duration)[0] + train_subset_utts = sample_utts(feat_dir, num_utts_subset, min_duration, + exclude_list=valid_utts)[0] + train_utts, train_utts_durs = sample_utts(feat_dir, None, -1, + exclude_list=valid_utts) + + write_list(valid_utts, '{0}/valid_uttlist'.format(egs_dir)) + write_list(train_subset_utts, '{0}/train_subset_uttlist'.format(egs_dir)) + write_list(train_utts, '{0}/train_uttlist'.format(egs_dir)) + + # split the training data into parts for individual jobs + # we will use the same number of jobs as that used for alignment + split_feat_dir = common_lib.split_data(feat_dir, num_jobs) + feat_ivector_strings = get_feat_ivector_strings( + dir=egs_dir, feat_dir=feat_dir, split_feat_dir=split_feat_dir, + cmvn_opt_string=cmvn_opts, + ivector_dir=online_ivector_dir, + apply_cmvn_sliding=apply_cmvn_sliding) + + egs_opts = get_egs_options(targets_parameters=targets_parameters, + frames_per_eg=frames_per_eg, + left_context=left_context, + right_context=right_context, + valid_left_context=valid_left_context, + valid_right_context=valid_right_context, + compress_input=compress_input, + input_compress_format=input_compress_format) + + if stage <= 2: + logger.info("Generating validation and training subset examples") + + generate_valid_train_subset_egs( + dir=egs_dir, + targets_parameters=targets_parameters, + feat_ivector_strings=feat_ivector_strings, + egs_opts=egs_opts, + num_train_egs_combine=num_train_egs_combine, + num_valid_egs_combine=num_valid_egs_combine, + num_egs_diagnostic=num_egs_diagnostic, + cmd=cmd, + num_jobs=num_jobs) + + logger.info("Generating training examples on disk.") + info = generate_training_examples( + dir=egs_dir, + targets_parameters=targets_parameters, + feat_dir=feat_dir, + feat_ivector_strings=feat_ivector_strings, + egs_opts=egs_opts, + frame_shift=frame_shift, + frames_per_eg=frames_per_eg, + samples_per_iter=samples_per_iter, + cmd=cmd, + num_jobs=num_jobs, + srand=srand, + only_shuffle=True if stage > 3 else False, + dry_run=True if stage > 4 else False) + + info['feat_dim'] = feat_ivector_strings['feat_dim'] + info['ivector_dim'] = feat_ivector_strings['ivector_dim'] + info['left_context'] = left_context + info['right_context'] = right_context + info['frames_per_eg'] = frames_per_eg + + write_egs_info(info, '{dir}/info'.format(dir=egs_dir)) + + +def main(): + args = get_args() + generate_egs(args.dir, args.feat_dir, args.targets_para_array, + online_ivector_dir=args.online_ivector_dir, + frames_per_eg=args.frames_per_eg, + left_context=args.left_context, + right_context=args.right_context, + valid_left_context=args.valid_left_context, + valid_right_context=args.valid_right_context, + cmd=args.cmd, stage=args.stage, + cmvn_opts=args.cmvn_opts, + apply_cmvn_sliding=args.apply_cmvn_sliding, + compress_input=args.compress_input, + input_compress_format=args.input_compress_format, + num_utts_subset=args.num_utts_subset, + num_train_egs_combine=args.num_train_egs_combine, + num_valid_egs_combine=args.num_valid_egs_combine, + num_egs_diagnostic=args.num_egs_diagnostic, + samples_per_iter=args.samples_per_iter, + num_jobs=args.num_jobs, + srand=args.srand) + + +if __name__ == "__main__": + main() diff --git a/src/nnet3bin/Makefile b/src/nnet3bin/Makefile index d46c56a1044..aeb3dc1dc03 100644 --- a/src/nnet3bin/Makefile +++ b/src/nnet3bin/Makefile @@ -17,7 +17,7 @@ BINFILES = nnet3-init nnet3-info nnet3-get-egs nnet3-copy-egs nnet3-subset-egs \ nnet3-discriminative-merge-egs nnet3-discriminative-shuffle-egs \ nnet3-discriminative-compute-objf nnet3-discriminative-train \ discriminative-get-supervision nnet3-discriminative-subset-egs \ - nnet3-discriminative-compute-from-egs + nnet3-discriminative-compute-from-egs nnet3-get-egs-multiple-targets OBJFILES = diff --git a/src/nnet3bin/nnet3-get-egs-multiple-targets.cc b/src/nnet3bin/nnet3-get-egs-multiple-targets.cc new file mode 100644 index 00000000000..49f0dde4af7 --- /dev/null +++ b/src/nnet3bin/nnet3-get-egs-multiple-targets.cc @@ -0,0 +1,538 @@ +// nnet3bin/nnet3-get-egs-multiple-targets.cc + +// Copyright 2012-2015 Johns Hopkins University (author: Daniel Povey) +// 2014-2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "hmm/transition-model.h" +#include "hmm/posterior.h" +#include "nnet3/nnet-example.h" +#include "nnet3/nnet-example-utils.h" + +namespace kaldi { +namespace nnet3 { + +bool ToBool(std::string str) { + std::transform(str.begin(), str.end(), str.begin(), ::tolower); + + if ((str.compare("true") == 0) || (str.compare("t") == 0) + || (str.compare("1") == 0)) + return true; + if ((str.compare("false") == 0) || (str.compare("f") == 0) + || (str.compare("0") == 0)) + return false; + KALDI_ERR << "Invalid format for boolean argument [expected true or false]: " + << str; + return false; // never reached +} + +static void ProcessFile(const MatrixBase &feats, + const MatrixBase *ivector_feats, + const std::vector &output_names, + const std::vector &output_dims, + const std::vector* > &dense_target_matrices, + const std::vector &posteriors, + const std::vector* > &deriv_weights, + const std::string &utt_id, + bool compress_input, + int32 input_compress_format, + const std::vector &compress_targets, + const std::vector &targets_compress_formats, + int32 left_context, + int32 right_context, + int32 frames_per_eg, + std::vector *num_frames_written, + std::vector *num_egs_written, + NnetExampleWriter *example_writer) { + KALDI_ASSERT(output_names.size() > 0); + //KALDI_ASSERT(feats.NumRows() == static_cast(targets.NumRows())); + for (int32 t = 0; t < feats.NumRows(); t += frames_per_eg) { + + int32 tot_frames = left_context + frames_per_eg + right_context; + + Matrix input_frames(tot_frames, feats.NumCols(), kUndefined); + + // Set up "input_frames". + for (int32 j = -left_context; j < frames_per_eg + right_context; j++) { + int32 t2 = j + t; + if (t2 < 0) t2 = 0; + if (t2 >= feats.NumRows()) t2 = feats.NumRows() - 1; + SubVector src(feats, t2), + dest(input_frames, j + left_context); + dest.CopyFromVec(src); + } + + NnetExample eg; + + // call the regular input "input". + eg.io.push_back(NnetIo("input", - left_context, + input_frames)); + + if (compress_input) + eg.io.back().Compress(input_compress_format); + + // if applicable, add the iVector feature. + if (ivector_feats) { + int32 actual_frames_per_eg = std::min(frames_per_eg, + feats.NumRows() - t); + // try to get closest frame to middle of window to get + // a representative iVector. + int32 closest_frame = t + (actual_frames_per_eg / 2); + KALDI_ASSERT(ivector_feats->NumRows() > 0); + if (closest_frame >= ivector_feats->NumRows()) + closest_frame = ivector_feats->NumRows() - 1; + Matrix ivector(1, ivector_feats->NumCols()); + ivector.Row(0).CopyFromVec(ivector_feats->Row(closest_frame)); + eg.io.push_back(NnetIo("ivector", 0, ivector)); + } + + int32 num_outputs_added = 0; + + for (int32 n = 0; n < output_names.size(); n++) { + Vector this_deriv_weights(0); + if (deriv_weights[n]) { + // actual_frames_per_eg is the number of frames with actual targets. + // At the end of the file, we pad with the last frame repeated + // so that all examples have the same structure (prevents the need + // for recompilations). + int32 actual_frames_per_eg = std::min(std::min(frames_per_eg, + feats.NumRows() - t), deriv_weights[n]->Dim() - t); + + this_deriv_weights.Resize(frames_per_eg); + int32 frames_to_copy = std::min(t + actual_frames_per_eg, + deriv_weights[n]->Dim()) - t; + this_deriv_weights.Range(0, frames_to_copy).CopyFromVec(deriv_weights[n]->Range(t, frames_to_copy)); + if (this_deriv_weights.Sum() == 0) { + continue; // Ignore frames that have frame weights 0 + } + } + + if (dense_target_matrices[n]) { + const MatrixBase &targets = *dense_target_matrices[n]; + Matrix targets_dest(frames_per_eg, targets.NumCols()); + + // actual_frames_per_eg is the number of frames with actual targets. + // At the end of the file, we pad with the last frame repeated + // so that all examples have the same structure (prevents the need + // for recompilations). + int32 actual_frames_per_eg = std::min(std::min(frames_per_eg, + feats.NumRows() - t), targets.NumRows() - t); + + for (int32 i = 0; i < actual_frames_per_eg; i++) { + // Copy the i^th row of the target matrix from the (t+i)^th row of the + // input targets matrix + SubVector this_target_dest(targets_dest, i); + SubVector this_target_src(targets, t+i); + this_target_dest.CopyFromVec(this_target_src); + } + + // Copy the last frame's target to the padded frames + for (int32 i = actual_frames_per_eg; i < frames_per_eg; i++) { + // Copy the i^th row of the target matrix from the last row of the + // input targets matrix + KALDI_ASSERT(t + actual_frames_per_eg - 1 == targets.NumRows() - 1); + SubVector this_target_dest(targets_dest, i); + SubVector this_target_src(targets, t+actual_frames_per_eg-1); + this_target_dest.CopyFromVec(this_target_src); + } + + if (deriv_weights[n]) { + eg.io.push_back(NnetIo(output_names[n], this_deriv_weights, 0, targets_dest)); + } else { + eg.io.push_back(NnetIo(output_names[n], 0, targets_dest)); + } + } else if (posteriors[n]) { + const Posterior &pdf_post = *(posteriors[n]); + + // actual_frames_per_eg is the number of frames with actual targets. + // At the end of the file, we pad with the last frame repeated + // so that all examples have the same structure (prevents the need + // for recompilations). + int32 actual_frames_per_eg = std::min(std::min(frames_per_eg, + feats.NumRows() - t), static_cast(pdf_post.size()) - t); + + Posterior labels(frames_per_eg); + for (int32 i = 0; i < actual_frames_per_eg; i++) + labels[i] = pdf_post[t + i]; + // remaining posteriors for frames are empty. + + if (deriv_weights[n]) { + eg.io.push_back(NnetIo(output_names[n], this_deriv_weights, output_dims[n], 0, labels)); + } else { + eg.io.push_back(NnetIo(output_names[n], output_dims[n], 0, labels)); + } + } else + continue; + if (compress_targets[n]) + eg.io.back().Compress(targets_compress_formats[n]); + + num_outputs_added++; + (*num_frames_written)[n] += frames_per_eg; // Actually actual_frames_per_eg, but that depends on the different output. For simplification, frames_per_eg is used. + (*num_egs_written)[n] += 1; + } + + if (num_outputs_added == 0) continue; + + std::ostringstream os; + os << utt_id << "-" << t; + + std::string key = os.str(); // key is - + + KALDI_ASSERT(NumOutputs(eg) == num_outputs_added); + + example_writer->Write(key, eg); + } +} + + +} // namespace nnet2 +} // namespace kaldi + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace kaldi::nnet3; + typedef kaldi::int32 int32; + typedef kaldi::int64 int64; + + const char *usage = + "Get frame-by-frame examples of data for nnet3 neural network training.\n" + "This program is similar to nnet3-get-egs, but the targets here are " + "dense matrices instead of posteriors (sparse matrices).\n" + "This is useful when you want the targets to be continuous real-valued " + "with the neural network possibly trained with a quadratic objective\n" + "\n" + "Usage: nnet3-get-egs-multiple-targets [options] " + " ::[:] " + "[ :: ... ] \n" + "\n" + "Here is any random string for output node name, \n" + " is the rspecifier for either dense targets in matrix format or sparse targets in posterior format,\n" + "and is the target dimension of output node for sparse targets or -1 for dense targets\n" + "\n" + "An example [where $feats expands to the actual features]:\n" + "nnet-get-egs-multiple-targets --left-context=12 \\\n" + "--right-context=9 --num-frames=8 \"$feats\" \\\n" + "output-snr:\"ark:copy-matrix ark:exp/snrs/snr.1.ark ark:- |\":-1 \n" + " ark:- \n"; + + + bool compress_input = true; + int32 input_compress_format = 0; + int32 left_context = 0, right_context = 0, + num_frames = 1, length_tolerance = 2; + + std::string ivector_rspecifier, + targets_compress_formats_str, + compress_targets_str; + std::string output_dims_str; + std::string output_names_str; + + ParseOptions po(usage); + po.Register("compress-input", &compress_input, "If true, write egs in " + "compressed format."); + po.Register("input-compress-format", &input_compress_format, "Format for " + "compressing input feats e.g. Use 2 for compressing wave"); + po.Register("compress-targets", &compress_targets_str, "CSL of whether " + "targets must be compressed for each of the outputs"); + po.Register("targets-compress-formats", &targets_compress_formats_str, "Format for " + "compressing all feats in general"); + po.Register("left-context", &left_context, "Number of frames of left " + "context the neural net requires."); + po.Register("right-context", &right_context, "Number of frames of right " + "context the neural net requires."); + po.Register("num-frames", &num_frames, "Number of frames with labels " + "that each example contains."); + po.Register("ivectors", &ivector_rspecifier, "Rspecifier of ivector " + "features, as matrix."); + po.Register("length-tolerance", &length_tolerance, "Tolerance for " + "difference in num-frames between feat and ivector matrices"); + po.Register("output-dims", &output_dims_str, "CSL of output node dims"); + po.Register("output-names", &output_names_str, "CSL of output node names"); + //po.Register("deriv-weights-rspecifiers", &deriv_weights_rspecifiers_str, + // "CSL of per-frame weights (only binary - 0 or 1) that specifies " + // "whether a frame's gradient must be backpropagated or not. " + // "Not specifying this is equivalent to specifying a vector of " + // "all 1s."); + + po.Read(argc, argv); + + if (po.NumArgs() < 3) { + po.PrintUsage(); + exit(1); + } + + std::string feature_rspecifier = po.GetArg(1), + examples_wspecifier = po.GetArg(po.NumArgs()); + + // Read in all the training files. + SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier); + RandomAccessBaseFloatMatrixReader ivector_reader(ivector_rspecifier); + NnetExampleWriter example_writer(examples_wspecifier); + + int32 num_outputs = (po.NumArgs() - 2) / 2; + KALDI_ASSERT(num_outputs > 0); + + std::vector deriv_weights_readers(num_outputs, + static_cast(NULL)); + std::vector dense_targets_readers(num_outputs, + static_cast(NULL)); + std::vector sparse_targets_readers(num_outputs, + static_cast(NULL)); + + + std::vector compress_targets(1, true); + std::vector compress_targets_vector; + + if (!compress_targets_str.empty()) { + SplitStringToVector(compress_targets_str, ":,", + true, &compress_targets_vector); + } + + if (compress_targets_vector.size() == 1 && num_outputs != 1) { + KALDI_WARN << "compress-targets is of size 1. " + << "Extending it to size num-outputs=" << num_outputs; + compress_targets[0] = ToBool(compress_targets_vector[0]); + compress_targets.resize(num_outputs, ToBool(compress_targets_vector[0])); + } else { + if (compress_targets_vector.size() != num_outputs) { + KALDI_ERR << "Mismatch in length of compress-targets and num-outputs; " + << compress_targets_vector.size() << " vs " << num_outputs; + } + for (int32 n = 0; n < num_outputs; n++) { + compress_targets[n] = ToBool(compress_targets_vector[n]); + } + } + + std::vector targets_compress_formats(1, 1); + if (!targets_compress_formats_str.empty()) { + SplitStringToIntegers(targets_compress_formats_str, ":,", + true, &targets_compress_formats); + } + + if (targets_compress_formats.size() == 1 && num_outputs != 1) { + KALDI_WARN << "targets-compress-formats is of size 1. " + << "Extending it to size num-outputs=" << num_outputs; + targets_compress_formats.resize(num_outputs, targets_compress_formats[0]); + } + + if (targets_compress_formats.size() != num_outputs) { + KALDI_ERR << "Mismatch in length of targets-compress-formats and num-outputs; " + << targets_compress_formats.size() << " vs " << num_outputs; + } + + std::vector output_dims(num_outputs); + SplitStringToIntegers(output_dims_str, ":,", + true, &output_dims); + + std::vector output_names(num_outputs); + SplitStringToVector(output_names_str, ":,", true, &output_names); + + //std::vector deriv_weights_rspecifiers; + //if (!deriv_weights_rspecifiers_str.empty()) { + // std::vector parts; + // SplitStringToVector(deriv_weights_rspecifiers_str, ":,", + // false, &deriv_weights_rspecifiers); + + // if (deriv_weights_rspecifiers.size() != num_outputs) { + // KALDI_ERR << "Expecting the number of deriv-weights-rspecifiers to " + // << "be equal to the number of outputs"; + // } + //} + + std::vector targets_rspecifiers(num_outputs); + std::vector deriv_weights_rspecifiers(num_outputs); + + for (int32 n = 0; n < num_outputs; n++) { + const std::string &targets_rspecifier = po.GetArg(2*n + 2); + const std::string &deriv_weights_rspecifier = po.GetArg(2*n + 3); + + targets_rspecifiers[n] = targets_rspecifier; + deriv_weights_rspecifiers[n] = deriv_weights_rspecifier; + + if (output_dims[n] >= 0) { + sparse_targets_readers[n] = new RandomAccessPosteriorReader(targets_rspecifier); + } else { + dense_targets_readers[n] = new RandomAccessBaseFloatMatrixReader(targets_rspecifier); + } + + if (!deriv_weights_rspecifier.empty()) + deriv_weights_readers[n] = new RandomAccessBaseFloatVectorReader(deriv_weights_rspecifier); + + KALDI_LOG << "output-name=" << output_names[n] + << " target-dim=" << output_dims[n] + << " targets-rspecifier=\"" << targets_rspecifiers[n] << "\"" + << " deriv-weights-rspecifier=\"" << deriv_weights_rspecifiers[n] << "\"" + << " compress-target=" << (compress_targets[n] ? "true" : "false") + << " target-compress-format=" << targets_compress_formats[n]; + } + + int32 num_done = 0, num_err = 0; + + std::vector num_frames_written(num_outputs, 0); + std::vector num_egs_written(num_outputs, 0); + + for (; !feat_reader.Done(); feat_reader.Next()) { + std::string key = feat_reader.Key(); + const Matrix &feats = feat_reader.Value(); + + const Matrix *ivector_feats = NULL; + if (!ivector_rspecifier.empty()) { + if (!ivector_reader.HasKey(key)) { + KALDI_WARN << "No iVectors for utterance " << key; + num_err++; + continue; + } else { + // this address will be valid until we call HasKey() or Value() + // again. + ivector_feats = &(ivector_reader.Value(key)); + } + } + + if (ivector_feats && + (abs(feats.NumRows() - ivector_feats->NumRows()) > length_tolerance + || ivector_feats->NumRows() == 0)) { + KALDI_WARN << "Length difference between feats " << feats.NumRows() + << " and iVectors " << ivector_feats->NumRows() + << "exceeds tolerance " << length_tolerance; + num_err++; + continue; + } + + std::vector* > dense_targets(num_outputs, static_cast* >(NULL)); + std::vector sparse_targets(num_outputs, static_cast(NULL)); + std::vector* > deriv_weights(num_outputs, static_cast* >(NULL)); + + int32 num_outputs_found = 0; + for (int32 n = 0; n < num_outputs; n++) { + if (dense_targets_readers[n]) { + if (!dense_targets_readers[n]->HasKey(key)) { + KALDI_WARN << "No dense targets matrix for key " << key << " in " + << "rspecifier " << targets_rspecifiers[n] + << " for output " << output_names[n]; + continue; + } + const MatrixBase *target_matrix = &(dense_targets_readers[n]->Value(key)); + + if ((target_matrix->NumRows() - feats.NumRows()) > length_tolerance) { + KALDI_WARN << "Length difference between feats " << feats.NumRows() + << " and target matrix " << target_matrix->NumRows() + << "exceeds tolerance " << length_tolerance; + num_err++; + continue; + } + + dense_targets[n] = target_matrix; + } else { + if (!sparse_targets_readers[n]->HasKey(key)) { + KALDI_WARN << "No sparse target matrix for key " << key << " in " + << "rspecifier " << targets_rspecifiers[n] + << " for output " << output_names[n]; + continue; + } + const Posterior *posterior = &(sparse_targets_readers[n]->Value(key)); + + if (abs(static_cast(posterior->size()) - feats.NumRows()) > length_tolerance + || posterior->size() < feats.NumRows()) { + KALDI_WARN << "Posterior has wrong size " << posterior->size() + << " versus " << feats.NumRows(); + num_err++; + continue; + } + + sparse_targets[n] = posterior; + } + + if (deriv_weights_readers[n]) { + if (!deriv_weights_readers[n]->HasKey(key)) { + KALDI_WARN << "No deriv weights for key " << key << " in " + << "rspecifier " << deriv_weights_rspecifiers[n] + << " for output " << output_names[n]; + num_err++; + sparse_targets[n] = NULL; + dense_targets[n] = NULL; + continue; + } else { + // this address will be valid until we call HasKey() or Value() + // again. + deriv_weights[n] = &(deriv_weights_readers[n]->Value(key)); + } + } + + if (deriv_weights[n] && + (abs(feats.NumRows() - deriv_weights[n]->Dim()) > length_tolerance + || deriv_weights[n]->Dim() == 0)) { + KALDI_WARN << "Length difference between feats " << feats.NumRows() + << " and deriv weights " << deriv_weights[n]->Dim() + << " exceeds tolerance " << length_tolerance; + num_err++; + sparse_targets[n] = NULL; + dense_targets[n] = NULL; + deriv_weights[n] = NULL; + continue; + } + + num_outputs_found++; + } + + if (num_outputs_found == 0) { + KALDI_WARN << "No output found for key " << key; + num_err++; + continue; + } + + ProcessFile(feats, ivector_feats, output_names, output_dims, + dense_targets, sparse_targets, + deriv_weights, key, + compress_input, input_compress_format, + compress_targets, targets_compress_formats, + left_context, right_context, num_frames, + &num_frames_written, &num_egs_written, + &example_writer); + num_done++; + } + + int64 max_num_egs_written = 0, max_num_frames_written = 0; + for (int32 n = 0; n < num_outputs; n++) { + delete dense_targets_readers[n]; + delete sparse_targets_readers[n]; + delete deriv_weights_readers[n]; + if (num_egs_written[n] == 0) return false; + if (num_egs_written[n] > max_num_egs_written) { + max_num_egs_written = num_egs_written[n]; + max_num_frames_written = num_frames_written[n]; + } + } + + KALDI_LOG << "Finished generating examples, " + << "successfully processed " << num_done + << " feature files, wrote at most " << max_num_egs_written << " examples, " + << " with at most " << max_num_frames_written << " egs in total; " + << num_err << " files had errors."; + + return (num_err > num_done ? 1 : 0); + } catch(const std::exception &e) { + std::cerr << e.what() << '\n'; + return -1; + } +} + + From 4baeb72a22f53cceba36afb55d6683b872443d34 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 19:02:00 -0500 Subject: [PATCH 053/530] asr_diarization: Modify the egs binaries and utilities to support multiple outputs in egs --- src/nnet3/nnet-example-utils.cc | 9 +++- src/nnet3/nnet-example-utils.h | 2 + src/nnet3/nnet-example.cc | 3 ++ src/nnet3bin/nnet3-compute-from-egs.cc | 10 +++-- src/nnet3bin/nnet3-copy-egs.cc | 59 +++++++++++++++++++++++++- src/nnet3bin/nnet3-merge-egs.cc | 6 ++- 6 files changed, 81 insertions(+), 8 deletions(-) diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc index 39922153db4..548fb842385 100644 --- a/src/nnet3/nnet-example-utils.cc +++ b/src/nnet3/nnet-example-utils.cc @@ -111,7 +111,7 @@ static void MergeIo(const std::vector &src, io.indexes.resize(size); } - std::vector::const_iterator names_begin = names.begin(); + std::vector::const_iterator names_begin = names.begin(), names_end = names.end(); std::vector::const_iterator eg_iter = src.begin(), eg_end = src.end(); @@ -318,6 +318,13 @@ void RoundUpNumFrames(int32 frame_subsampling_factor, } } +int32 NumOutputs(const NnetExample &eg) { + int32 num_outputs = 0; + for (size_t i = 0; i < eg.io.size(); i++) + if (eg.io[i].name.find("output") != std::string::npos) + num_outputs++; + return num_outputs; +} } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h index 3e309e18915..d223c5eb5d1 100644 --- a/src/nnet3/nnet-example-utils.h +++ b/src/nnet3/nnet-example-utils.h @@ -80,6 +80,8 @@ void RoundUpNumFrames(int32 frame_subsampling_factor, int32 *num_frames, int32 *num_frames_overlap); +// Returns the number of outputs in an eg +int32 NumOutputs(const NnetExample &eg); } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-example.cc b/src/nnet3/nnet-example.cc index 89d40b9ef89..2ad90c0f11d 100644 --- a/src/nnet3/nnet-example.cc +++ b/src/nnet3/nnet-example.cc @@ -128,6 +128,9 @@ NnetIo::NnetIo(const std::string &name, } void NnetExample::Write(std::ostream &os, bool binary) const { +#ifdef KALDI_PARANOID + KALDI_ASSERT(NumOutputs(eg) > 0); +#endif // Note: weight, label, input_frames and spk_info are members. This is a // struct. WriteToken(os, binary, ""); diff --git a/src/nnet3bin/nnet3-compute-from-egs.cc b/src/nnet3bin/nnet3-compute-from-egs.cc index 66eace0dab5..e35e67bbeb5 100644 --- a/src/nnet3bin/nnet3-compute-from-egs.cc +++ b/src/nnet3bin/nnet3-compute-from-egs.cc @@ -36,7 +36,8 @@ class NnetComputerFromEg { // Compute the output (which will have the same number of rows as the number // of Indexes in the output of the eg), and put it in "output". - void Compute(const NnetExample &eg, Matrix *output) { + void Compute(const NnetExample &eg, const std::string &output_name, + Matrix *output) { ComputationRequest request; bool need_backprop = false, store_stats = false; GetComputationRequest(nnet_, eg, need_backprop, store_stats, &request); @@ -47,7 +48,7 @@ class NnetComputerFromEg { NnetComputer computer(options, computation, nnet_, NULL); computer.AcceptInputs(nnet_, eg.io); computer.Forward(); - const CuMatrixBase &nnet_output = computer.GetOutput("output"); + const CuMatrixBase &nnet_output = computer.GetOutput(output_name); output->Resize(nnet_output.NumRows(), nnet_output.NumCols()); nnet_output.CopyToMat(output); } @@ -80,11 +81,14 @@ int main(int argc, char *argv[]) { bool binary_write = true, apply_exp = false; std::string use_gpu = "yes"; + std::string output_name = "output"; ParseOptions po(usage); po.Register("binary", &binary_write, "Write output in binary mode"); po.Register("apply-exp", &apply_exp, "If true, apply exp function to " "output"); + po.Register("output-name", &output_name, "Do computation for " + "specified output"); po.Register("use-gpu", &use_gpu, "yes|no|optional|wait, only has effect if compiled with CUDA"); @@ -115,7 +119,7 @@ int main(int argc, char *argv[]) { for (; !example_reader.Done(); example_reader.Next(), num_egs++) { Matrix output; - computer.Compute(example_reader.Value(), &output); + computer.Compute(example_reader.Value(), output_name, &output); KALDI_ASSERT(output.NumRows() != 0); if (apply_exp) output.ApplyExp(); diff --git a/src/nnet3bin/nnet3-copy-egs.cc b/src/nnet3bin/nnet3-copy-egs.cc index ceb415ffe87..2702ae5fae9 100644 --- a/src/nnet3bin/nnet3-copy-egs.cc +++ b/src/nnet3bin/nnet3-copy-egs.cc @@ -23,10 +23,29 @@ #include "hmm/transition-model.h" #include "nnet3/nnet-example.h" #include "nnet3/nnet-example-utils.h" +#include namespace kaldi { namespace nnet3 { +bool KeepOutputs(const std::vector &keep_outputs, + NnetExample *eg) { + std::vector io_new; + int32 num_outputs = 0; + for (std::vector::iterator it = eg->io.begin(); + it != eg->io.end(); ++it) { + if (it->name.find("output") != std::string::npos) { + if (!std::binary_search(keep_outputs.begin(), keep_outputs.end(), it->name)) + continue; + num_outputs++; + } + io_new.push_back(*it); + } + eg->io.swap(io_new); + + return num_outputs; +} + // returns an integer randomly drawn with expected value "expected_count" // (will be either floor(expected_count) or ceil(expected_count)). int32 GetCount(double expected_count) { @@ -257,6 +276,22 @@ bool SelectFromExample(const NnetExample &eg, return true; } +bool RemoveZeroDerivOutputs(NnetExample *eg) { + std::vector io_new; + int32 num_outputs = 0; + for (std::vector::iterator it = eg->io.begin(); + it != eg->io.end(); ++it) { + if (it->name.find("output") != std::string::npos) { + if (it->deriv_weights.Dim() > 0 && it->deriv_weights.Sum() == 0) + continue; + num_outputs++; + } + io_new.push_back(*it); + } + eg->io.swap(io_new); + + return (num_outputs > 0); +} } // namespace nnet3 } // namespace kaldi @@ -284,6 +319,8 @@ int main(int argc, char *argv[]) { int32 srand_seed = 0; int32 frame_shift = 0; BaseFloat keep_proportion = 1.0; + std::string keep_outputs_str; + bool remove_zero_deriv_outputs = false; // The following config variables, if set, can be used to extract a single // frame of labels from a multi-frame example, and/or to reduce the amount @@ -315,7 +352,11 @@ int main(int argc, char *argv[]) { "feature left-context that we output."); po.Register("right-context", &right_context, "Can be used to truncate the " "feature right-context that we output."); - + po.Register("keep-outputs", &keep_outputs_str, "Comma separated list of " + "output nodes to keep"); + po.Register("remove-zero-deriv-outputs", &remove_zero_deriv_outputs, + "Remove outputs that do not contribute to the objective " + "because of zero deriv-weights"); po.Read(argc, argv); @@ -335,17 +376,29 @@ int main(int argc, char *argv[]) { for (int32 i = 0; i < num_outputs; i++) example_writers[i] = new NnetExampleWriter(po.GetArg(i+2)); + std::vector keep_outputs; + if (!keep_outputs_str.empty()) { + SplitStringToVector(keep_outputs_str, ",:", true, &keep_outputs); + std::sort(keep_outputs.begin(), keep_outputs.end()); + } int64 num_read = 0, num_written = 0; for (; !example_reader.Done(); example_reader.Next(), num_read++) { // count is normally 1; could be 0, or possibly >1. int32 count = GetCount(keep_proportion); std::string key = example_reader.Key(); - const NnetExample &eg = example_reader.Value(); + NnetExample eg(example_reader.Value()); + + if (!keep_outputs_str.empty()) { + if (!KeepOutputs(keep_outputs, &eg)) continue; + } + for (int32 c = 0; c < count; c++) { int32 index = (random ? Rand() : num_written) % num_outputs; if (frame_str == "" && left_context == -1 && right_context == -1 && frame_shift == 0) { + if (remove_zero_deriv_outputs) + if (!RemoveZeroDerivOutputs(&eg)) continue; example_writers[index]->Write(key, eg); num_written++; } else { // the --frame option or context options were set. @@ -354,6 +407,8 @@ int main(int argc, char *argv[]) { frame_shift, &eg_modified)) { // this branch of the if statement will almost always be taken (should only // not be taken for shorter-than-normal egs from the end of a file. + if (remove_zero_deriv_outputs) + if (!RemoveZeroDerivOutputs(&eg_modified)) continue; example_writers[index]->Write(key, eg_modified); num_written++; } diff --git a/src/nnet3bin/nnet3-merge-egs.cc b/src/nnet3bin/nnet3-merge-egs.cc index 7415db8d12a..30096ab9988 100644 --- a/src/nnet3bin/nnet3-merge-egs.cc +++ b/src/nnet3bin/nnet3-merge-egs.cc @@ -26,8 +26,10 @@ namespace kaldi { namespace nnet3 { -// returns the number of indexes/frames in the NnetIo named "output" in the eg, -// or crashes if it is not there. +// returns the number of indexes/frames in the output NnetIo +// assumes the output name starts with "output" and only looks at the +// first such output to get the indexes size. +// crashes if it there is no such output int32 NumOutputIndexes(const NnetExample &eg) { for (size_t i = 0; i < eg.io.size(); i++) if (eg.io[i].name.find("output") != std::string::npos) From 687b0f19864c3a706c6e4657b51d78809159435a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 00:28:58 -0500 Subject: [PATCH 054/530] asr_diarization: Adding local/snr/make_sad_tdnn_configs.py and stats component --- .../segmentation/make_sad_tdnn_configs.py | 616 ++++++++++++++++++ egs/wsj/s5/steps/nnet3/components.py | 112 +++- 2 files changed, 720 insertions(+), 8 deletions(-) create mode 100755 egs/aspire/s5/local/segmentation/make_sad_tdnn_configs.py diff --git a/egs/aspire/s5/local/segmentation/make_sad_tdnn_configs.py b/egs/aspire/s5/local/segmentation/make_sad_tdnn_configs.py new file mode 100755 index 00000000000..e859a3593ce --- /dev/null +++ b/egs/aspire/s5/local/segmentation/make_sad_tdnn_configs.py @@ -0,0 +1,616 @@ +#!/usr/bin/env python + +# we're using python 3.x style print but want it to work in python 2.x, +from __future__ import print_function +import os +import argparse +import shlex +import sys +import warnings +import copy +import imp +import ast + +nodes = imp.load_source('', 'steps/nnet3/components.py') +import libs.common as common_lib + +def GetArgs(): + # we add compulsary arguments as named arguments for readability + parser = argparse.ArgumentParser(description="Writes config files and variables " + "for TDNNs creation and training", + epilog="See steps/nnet3/tdnn/train.sh for example.") + + # Only one of these arguments can be specified, and one of them has to + # be compulsarily specified + feat_group = parser.add_mutually_exclusive_group(required = True) + feat_group.add_argument("--feat-dim", type=int, + help="Raw feature dimension, e.g. 13") + feat_group.add_argument("--feat-dir", type=str, + help="Feature directory, from which we derive the feat-dim") + + # only one of these arguments can be specified + ivector_group = parser.add_mutually_exclusive_group(required = False) + ivector_group.add_argument("--ivector-dim", type=int, + help="iVector dimension, e.g. 100", default=0) + ivector_group.add_argument("--ivector-dir", type=str, + help="iVector dir, which will be used to derive the ivector-dim ", default=None) + + num_target_group = parser.add_mutually_exclusive_group(required = True) + num_target_group.add_argument("--num-targets", type=int, + help="number of network targets (e.g. num-pdf-ids/num-leaves)") + num_target_group.add_argument("--ali-dir", type=str, + help="alignment directory, from which we derive the num-targets") + num_target_group.add_argument("--tree-dir", type=str, + help="directory with final.mdl, from which we derive the num-targets") + num_target_group.add_argument("--output-node-parameters", type=str, action='append', + dest='output_node_para_array', + help = "Define output nodes' and their parameters like output-suffix, dim, objective-type etc") + # CNN options + parser.add_argument('--cnn.layer', type=str, action='append', dest = "cnn_layer", + help="CNN parameters at each CNN layer, e.g. --filt-x-dim=3 --filt-y-dim=8 " + "--filt-x-step=1 --filt-y-step=1 --num-filters=256 --pool-x-size=1 --pool-y-size=3 " + "--pool-z-size=1 --pool-x-step=1 --pool-y-step=3 --pool-z-step=1, " + "when CNN layers are used, no LDA will be added", default = None) + parser.add_argument("--cnn.bottleneck-dim", type=int, dest = "cnn_bottleneck_dim", + help="Output dimension of the linear layer at the CNN output " + "for dimension reduction, e.g. 256." + "The default zero means this layer is not needed.", default=0) + + # General neural network options + parser.add_argument("--splice-indexes", type=str, required = True, + help="Splice indexes at each layer, e.g. '-3,-2,-1,0,1,2,3' " + "If CNN layers are used the first set of splice indexes will be used as input " + "to the first CNN layer and later splice indexes will be interpreted as indexes " + "for the TDNNs.") + parser.add_argument("--add-lda", type=str, action=common_lib.StrToBoolAction, + help="If \"true\" an LDA matrix computed from the input features " + "(spliced according to the first set of splice-indexes) will be used as " + "the first Affine layer. This affine layer's parameters are fixed during training. " + "This variable needs to be set to \"false\" when using dense-targets.\n" + "If --cnn.layer is specified this option will be forced to \"false\".", + default=True, choices = ["false", "true"]) + + parser.add_argument("--include-log-softmax", type=str, action=common_lib.StrToBoolAction, + help="add the final softmax layer ", default=True, choices = ["false", "true"]) + parser.add_argument("--add-final-sigmoid", type=str, action=common_lib.StrToBoolAction, + help="add a final sigmoid layer as alternate to log-softmax-layer. " + "Can only be used if include-log-softmax is false. " + "This is useful in cases where you want the output to be " + "like probabilities between 0 and 1. Typically the nnet " + "is trained with an objective such as quadratic", + default=False, choices = ["false", "true"]) + + parser.add_argument("--objective-type", type=str, + help = "the type of objective; i.e. quadratic or linear", + default="linear", choices = ["linear", "quadratic"]) + parser.add_argument("--xent-regularize", type=float, + help="For chain models, if nonzero, add a separate output for cross-entropy " + "regularization (with learning-rate-factor equal to the inverse of this)", + default=0.0) + parser.add_argument("--final-layer-normalize-target", type=float, + help="RMS target for final layer (set to <1 if final layer learns too fast", + default=1.0) + parser.add_argument("--subset-dim", type=int, default=0, + help="dimension of the subset of units to be sent to the central frame") + parser.add_argument("--pnorm-input-dim", type=int, + help="input dimension to p-norm nonlinearities") + parser.add_argument("--pnorm-output-dim", type=int, + help="output dimension of p-norm nonlinearities") + relu_dim_group = parser.add_mutually_exclusive_group(required = False) + relu_dim_group.add_argument("--relu-dim", type=int, + help="dimension of all ReLU nonlinearity layers") + relu_dim_group.add_argument("--relu-dim-final", type=int, + help="dimension of the last ReLU nonlinearity layer. Dimensions increase geometrically from the first through the last ReLU layer.", default=None) + parser.add_argument("--relu-dim-init", type=int, + help="dimension of the first ReLU nonlinearity layer. Dimensions increase geometrically from the first through the last ReLU layer.", default=None) + + parser.add_argument("--self-repair-scale-nonlinearity", type=float, + help="A non-zero value activates the self-repair mechanism in the sigmoid and tanh non-linearities of the LSTM", default=None) + + + parser.add_argument("--use-presoftmax-prior-scale", type=str, action=common_lib.StrToBoolAction, + help="if true, a presoftmax-prior-scale is added", + choices=['true', 'false'], default = True) + + # Options to convert input MFCC into Fbank features. This is useful when a + # LDA layer is not added (such as when using dense targets) + parser.add_argument("--cnn.cepstral-lifter", type=float, dest = "cepstral_lifter", + help="The factor used for determining the liftering vector in the production of MFCC. " + "User has to ensure that it matches the lifter used in MFCC generation, " + "e.g. 22.0", default=22.0) + + parser.add_argument("config_dir", + help="Directory to write config files and variables") + + print(' '.join(sys.argv)) + + args = parser.parse_args() + args = CheckArgs(args) + + return args + +def CheckArgs(args): + if not os.path.exists(args.config_dir): + os.makedirs(args.config_dir) + + ## Check arguments. + if args.feat_dir is not None: + args.feat_dim = common_lib.get_feat_dim(args.feat_dir) + + if args.ivector_dir is not None: + args.ivector_dim = common_lib.get_ivector_dim(args.ivector_dir) + + if not args.feat_dim > 0: + raise Exception("feat-dim has to be postive") + + if len(args.output_node_para_array) == 0: + if args.ali_dir is not None: + args.num_targets = common_lib.get_number_of_leaves_from_tree(args.ali_dir) + elif args.tree_dir is not None: + args.num_targets = common_lib.get_number_of_leaves_from_tree(args.tree_dir) + if not args.num_targets > 0: + print(args.num_targets) + raise Exception("num_targets has to be positive") + args.output_node_para_array.append( + "--dim={0} --objective-type={1} --include-log-softmax={2} --add-final-sigmoid={3} --xent-regularize={4}".format( + args.num_targets, args.objective_type, + "true" if args.include_log_softmax else "false", + "true" if args.add_final_sigmoid else "false", + args.xent_regularize)) + + if not args.ivector_dim >= 0: + raise Exception("ivector-dim has to be non-negative") + + if (args.subset_dim < 0): + raise Exception("--subset-dim has to be non-negative") + + if not args.relu_dim is None: + if not args.pnorm_input_dim is None or not args.pnorm_output_dim is None or not args.relu_dim_init is None: + raise Exception("--relu-dim argument not compatible with " + "--pnorm-input-dim or --pnorm-output-dim or --relu-dim-init options"); + args.nonlin_input_dim = args.relu_dim + args.nonlin_output_dim = args.relu_dim + args.nonlin_output_dim_final = None + args.nonlin_output_dim_init = None + args.nonlin_type = 'relu' + + elif not args.relu_dim_final is None: + if not args.pnorm_input_dim is None or not args.pnorm_output_dim is None: + raise Exception("--relu-dim-final argument not compatible with " + "--pnorm-input-dim or --pnorm-output-dim options") + if args.relu_dim_init is None: + raise Exception("--relu-dim-init argument should also be provided with --relu-dim-final") + if args.relu_dim_init > args.relu_dim_final: + raise Exception("--relu-dim-init has to be no larger than --relu-dim-final") + args.nonlin_input_dim = None + args.nonlin_output_dim = None + args.nonlin_output_dim_final = args.relu_dim_final + args.nonlin_output_dim_init = args.relu_dim_init + args.nonlin_type = 'relu' + + else: + if not args.relu_dim_init is None: + raise Exception("--relu-dim-final argument not compatible with " + "--pnorm-input-dim or --pnorm-output-dim options") + if not args.pnorm_input_dim > 0 or not args.pnorm_output_dim > 0: + raise Exception("--relu-dim not set, so expected --pnorm-input-dim and " + "--pnorm-output-dim to be provided."); + args.nonlin_input_dim = args.pnorm_input_dim + args.nonlin_output_dim = args.pnorm_output_dim + if (args.nonlin_input_dim < args.nonlin_output_dim) or (args.nonlin_input_dim % args.nonlin_output_dim != 0): + raise Exception("Invalid --pnorm-input-dim {0} and --pnorm-output-dim {1}".format(args.nonlin_input_dim, args.nonlin_output_dim)) + args.nonlin_output_dim_final = None + args.nonlin_output_dim_init = None + args.nonlin_type = 'pnorm' + + if args.add_lda and args.cnn_layer is not None: + args.add_lda = False + warnings.warn("--add-lda is set to false as CNN layers are used.") + + return args + +def AddConvMaxpLayer(config_lines, name, input, args): + if '3d-dim' not in input: + raise Exception("The input to AddConvMaxpLayer() needs '3d-dim' parameters.") + + input = nodes.AddConvolutionLayer(config_lines, name, input, + input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2], + args.filt_x_dim, args.filt_y_dim, + args.filt_x_step, args.filt_y_step, + args.num_filters, input['vectorization']) + + if args.pool_x_size > 1 or args.pool_y_size > 1 or args.pool_z_size > 1: + input = nodes.AddMaxpoolingLayer(config_lines, name, input, + input['3d-dim'][0], input['3d-dim'][1], input['3d-dim'][2], + args.pool_x_size, args.pool_y_size, args.pool_z_size, + args.pool_x_step, args.pool_y_step, args.pool_z_step) + + return input + +# The ivectors are processed through an affine layer parallel to the CNN layers, +# then concatenated with the CNN output and passed to the deeper part of the network. +def AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir, feat_dim, splice_indexes=[0], ivector_dim=0): + cnn_args = ParseCnnString(cnn_layer) + num_cnn_layers = len(cnn_args) + # We use an Idct layer here to convert MFCC to FBANK features + common_lib.write_idct_matrix(feat_dim, cepstral_lifter, config_dir.strip() + "/idct.mat") + prev_layer_output = {'descriptor': "input", + 'dimension': feat_dim} + prev_layer_output = nodes.AddFixedAffineLayer(config_lines, "Idct", prev_layer_output, config_dir.strip() + '/idct.mat') + + list = [('Offset({0}, {1})'.format(prev_layer_output['descriptor'],n) if n != 0 else prev_layer_output['descriptor']) for n in splice_indexes] + splice_descriptor = "Append({0})".format(", ".join(list)) + cnn_input_dim = len(splice_indexes) * feat_dim + prev_layer_output = {'descriptor': splice_descriptor, + 'dimension': cnn_input_dim, + '3d-dim': [len(splice_indexes), feat_dim, 1], + 'vectorization': 'yzx'} + + for cl in range(0, num_cnn_layers): + prev_layer_output = AddConvMaxpLayer(config_lines, "L{0}".format(cl), prev_layer_output, cnn_args[cl]) + + if cnn_bottleneck_dim > 0: + prev_layer_output = nodes.AddAffineLayer(config_lines, "cnn-bottleneck", prev_layer_output, cnn_bottleneck_dim, "") + + if ivector_dim > 0: + iv_layer_output = {'descriptor': 'ReplaceIndex(ivector, t, 0)', + 'dimension': ivector_dim} + iv_layer_output = nodes.AddAffineLayer(config_lines, "ivector", iv_layer_output, ivector_dim, "") + prev_layer_output['descriptor'] = 'Append({0}, {1})'.format(prev_layer_output['descriptor'], iv_layer_output['descriptor']) + prev_layer_output['dimension'] = prev_layer_output['dimension'] + iv_layer_output['dimension'] + + return prev_layer_output + +def PrintConfig(file_name, config_lines): + f = open(file_name, 'w') + f.write("\n".join(config_lines['components'])+"\n") + f.write("\n#Component nodes\n") + f.write("\n".join(config_lines['component-nodes'])+"\n") + f.close() + +def ParseCnnString(cnn_param_string_list): + cnn_parser = argparse.ArgumentParser(description="cnn argument parser") + + cnn_parser.add_argument("--filt-x-dim", required=True, type=int) + cnn_parser.add_argument("--filt-y-dim", required=True, type=int) + cnn_parser.add_argument("--filt-x-step", type=int, default = 1) + cnn_parser.add_argument("--filt-y-step", type=int, default = 1) + cnn_parser.add_argument("--num-filters", required=True, type=int) + cnn_parser.add_argument("--pool-x-size", type=int, default = 1) + cnn_parser.add_argument("--pool-y-size", type=int, default = 1) + cnn_parser.add_argument("--pool-z-size", type=int, default = 1) + cnn_parser.add_argument("--pool-x-step", type=int, default = 1) + cnn_parser.add_argument("--pool-y-step", type=int, default = 1) + cnn_parser.add_argument("--pool-z-step", type=int, default = 1) + + cnn_args = [] + for cl in range(0, len(cnn_param_string_list)): + cnn_args.append(cnn_parser.parse_args(shlex.split(cnn_param_string_list[cl]))) + + return cnn_args + +def ParseSpliceString(splice_indexes): + splice_array = [] + left_context = 0 + right_context = 0 + split_on_spaces = splice_indexes.split(); # we already checked the string is nonempty. + if len(split_on_spaces) < 1: + raise Exception("invalid splice-indexes argument, too short: " + + splice_indexes) + try: + for string in split_on_spaces: + this_splices = string.split(",") + if len(this_splices) < 1: + raise Exception("invalid splice-indexes argument, too-short element: " + + splice_indexes) + # the rest of this block updates left_context and right_context, and + # does some checking. + leftmost_splice = 10000 + rightmost_splice = -10000 + + int_list = [] + for s in this_splices: + try: + n = int(s) + if n < leftmost_splice: + leftmost_splice = n + if n > rightmost_splice: + rightmost_splice = n + int_list.append(n) + except ValueError: + #if len(splice_array) == 0: + # raise Exception("First dimension of splicing array must not have averaging [yet]") + try: + x = nodes.StatisticsConfig(s, { 'dimension':100, + 'descriptor': 'foo'} ) + int_list.append(s) + except Exception as e: + raise Exception("The following element of the splicing array is not a valid specifier " + "of statistics: {0}\nGot {1}".format(s, str(e))) + splice_array.append(int_list) + + if leftmost_splice == 10000 or rightmost_splice == -10000: + raise Exception("invalid element of --splice-indexes: " + string) + left_context += -leftmost_splice + right_context += rightmost_splice + except ValueError as e: + raise Exception("invalid --splice-indexes argument " + args.splice_indexes + " " + str(e)) + + left_context = max(0, left_context) + right_context = max(0, right_context) + + return {'left_context':left_context, + 'right_context':right_context, + 'splice_indexes':splice_array, + 'num_hidden_layers':len(splice_array) + } + +def AddPriorsAccumulator(config_lines, name, input): + components = config_lines['components'] + component_nodes = config_lines['component-nodes'] + + components.append("component name={0}_softmax type=SoftmaxComponent dim={1}".format(name, input['dimension'])) + component_nodes.append("component-node name={0}_softmax component={0}_softmax input={1}".format(name, input['descriptor'])) + + return {'descriptor': '{0}_softmax'.format(name), + 'dimension': input['dimension']} + +def AddFinalLayer(config_lines, input, output_dim, + ng_affine_options = " param-stddev=0 bias-stddev=0 ", + label_delay=None, + use_presoftmax_prior_scale = False, + prior_scale_file = None, + include_log_softmax = True, + add_final_sigmoid = False, + name_affix = None, + objective_type = "linear", + objective_scale = 1.0, + objective_scales_vec = None): + components = config_lines['components'] + component_nodes = config_lines['component-nodes'] + + if name_affix is not None: + final_node_prefix = 'Final-' + str(name_affix) + else: + final_node_prefix = 'Final' + + prev_layer_output = nodes.AddAffineLayer(config_lines, + final_node_prefix , input, output_dim, + ng_affine_options) + if include_log_softmax: + if use_presoftmax_prior_scale : + components.append('component name={0}-fixed-scale type=FixedScaleComponent scales={1}'.format(final_node_prefix, prior_scale_file)) + component_nodes.append('component-node name={0}-fixed-scale component={0}-fixed-scale input={1}'.format(final_node_prefix, + prev_layer_output['descriptor'])) + prev_layer_output['descriptor'] = "{0}-fixed-scale".format(final_node_prefix) + prev_layer_output = nodes.AddSoftmaxLayer(config_lines, final_node_prefix, prev_layer_output) + + elif add_final_sigmoid: + # Useful when you need the final outputs to be probabilities + # between 0 and 1. + # Usually used with an objective-type such as "quadratic" + prev_layer_output = nodes.AddSigmoidLayer(config_lines, final_node_prefix, prev_layer_output) + + # we use the same name_affix as a prefix in for affine/scale nodes but as a + # suffix for output node + if (objective_scale != 1.0 or objective_scales_vec is not None): + prev_layer_output = nodes.AddGradientScaleLayer(config_lines, final_node_prefix, prev_layer_output, objective_scale, objective_scales_vec) + + nodes.AddOutputLayer(config_lines, prev_layer_output, label_delay, suffix = name_affix, objective_type = objective_type) + +def AddOutputLayers(config_lines, prev_layer_output, output_nodes, + ng_affine_options = "", label_delay = 0): + + for o in output_nodes: + # make the intermediate config file for layerwise discriminative + # training + AddFinalLayer(config_lines, prev_layer_output, o.dim, + ng_affine_options, label_delay = label_delay, + include_log_softmax = o.include_log_softmax, + add_final_sigmoid = o.add_final_sigmoid, + objective_type = o.objective_type, + name_affix = o.output_suffix) + + if o.xent_regularize != 0.0: + nodes.AddFinalLayer(config_lines, prev_layer_output, o.dim, + include_log_softmax = True, + label_delay = label_delay, + name_affix = o.output_suffix + '_xent') + +# The function signature of MakeConfigs is changed frequently as it is intended for local use in this script. +def MakeConfigs(config_dir, splice_indexes_string, + cnn_layer, cnn_bottleneck_dim, cepstral_lifter, + feat_dim, ivector_dim, add_lda, + nonlin_type, nonlin_input_dim, nonlin_output_dim, subset_dim, + nonlin_output_dim_init, nonlin_output_dim_final, + use_presoftmax_prior_scale, final_layer_normalize_target, + output_nodes, self_repair_scale): + + parsed_splice_output = ParseSpliceString(splice_indexes_string.strip()) + + left_context = parsed_splice_output['left_context'] + right_context = parsed_splice_output['right_context'] + num_hidden_layers = parsed_splice_output['num_hidden_layers'] + splice_indexes = parsed_splice_output['splice_indexes'] + input_dim = len(parsed_splice_output['splice_indexes'][0]) + feat_dim + ivector_dim + + prior_scale_file = '{0}/presoftmax_prior_scale.vec'.format(config_dir) + + config_lines = {'components':[], 'component-nodes':[]} + + config_files={} + prev_layer_output = nodes.AddInputLayer(config_lines, feat_dim, splice_indexes[0], + ivector_dim) + + # Add the init config lines for estimating the preconditioning matrices + init_config_lines = copy.deepcopy(config_lines) + init_config_lines['components'].insert(0, '# Config file for initializing neural network prior to') + init_config_lines['components'].insert(0, '# preconditioning matrix computation') + + for o in output_nodes: + nodes.AddOutputLayer(init_config_lines, prev_layer_output, + objective_type = o.objective_type, suffix = o.output_suffix) + + config_files[config_dir + '/init.config'] = init_config_lines + + if cnn_layer is not None: + prev_layer_output = AddCnnLayers(config_lines, cnn_layer, cnn_bottleneck_dim, cepstral_lifter, config_dir, + feat_dim, splice_indexes[0], ivector_dim) + + # add_lda needs to be set "false" when using dense targets, + # or if the task is not a simple classification task + # (e.g. regression, multi-task) + if add_lda: + prev_layer_output = nodes.AddLdaLayer(config_lines, "L0", prev_layer_output, config_dir + '/lda.mat') + + left_context = 0 + right_context = 0 + # we moved the first splice layer to before the LDA.. + # so the input to the first affine layer is going to [0] index + splice_indexes[0] = [0] + + if not nonlin_output_dim is None: + nonlin_output_dims = [nonlin_output_dim] * num_hidden_layers + elif nonlin_output_dim_init < nonlin_output_dim_final and num_hidden_layers == 1: + raise Exception("num-hidden-layers has to be greater than 1 if relu-dim-init and relu-dim-final is different.") + else: + # computes relu-dim for each hidden layer. They increase geometrically across layers + factor = pow(float(nonlin_output_dim_final) / nonlin_output_dim_init, 1.0 / (num_hidden_layers - 1)) if num_hidden_layers > 1 else 1 + nonlin_output_dims = [int(round(nonlin_output_dim_init * pow(factor, i))) for i in range(0, num_hidden_layers)] + assert(nonlin_output_dims[-1] >= nonlin_output_dim_final - 1 and nonlin_output_dims[-1] <= nonlin_output_dim_final + 1) # due to rounding error + nonlin_output_dims[-1] = nonlin_output_dim_final # It ensures that the dim of the last hidden layer is exactly the same as what is specified + + for i in range(0, num_hidden_layers): + # make the intermediate config file for layerwise discriminative training + + # prepare the spliced input + if not (len(splice_indexes[i]) == 1 and splice_indexes[i][0] == 0): + try: + zero_index = splice_indexes[i].index(0) + except ValueError: + zero_index = None + # I just assume the prev_layer_output_descriptor is a simple forwarding descriptor + prev_layer_output_descriptor = prev_layer_output['descriptor'] + subset_output = prev_layer_output + if subset_dim > 0: + # if subset_dim is specified the script expects a zero in the splice indexes + assert(zero_index is not None) + subset_node_config = ("dim-range-node name=Tdnn_input_{0} " + "input-node={1} dim-offset={2} dim={3}".format( + i, prev_layer_output_descriptor, 0, subset_dim)) + subset_output = {'descriptor' : 'Tdnn_input_{0}'.format(i), + 'dimension' : subset_dim} + config_lines['component-nodes'].append(subset_node_config) + appended_descriptors = [] + appended_dimension = 0 + for j in range(len(splice_indexes[i])): + if j == zero_index: + appended_descriptors.append(prev_layer_output['descriptor']) + appended_dimension += prev_layer_output['dimension'] + continue + try: + offset = int(splice_indexes[i][j]) + # it's an integer offset. + appended_descriptors.append('Offset({0}, {1})'.format( + subset_output['descriptor'], splice_indexes[i][j])) + appended_dimension += subset_output['dimension'] + except ValueError: + # it's not an integer offset, so assume it specifies the + # statistics-extraction. + stats = nodes.StatisticsConfig(splice_indexes[i][j], prev_layer_output) + stats_layer = stats.AddLayer(config_lines, "Tdnn_stats_{0}".format(i)) + appended_descriptors.append(stats_layer['descriptor']) + appended_dimension += stats_layer['dimension'] + + prev_layer_output = {'descriptor' : "Append({0})".format(" , ".join(appended_descriptors)), + 'dimension' : appended_dimension} + else: + # this is a normal affine node + pass + + if nonlin_type == "relu": + prev_layer_output = nodes.AddAffRelNormLayer(config_lines, "Tdnn_{0}".format(i), + prev_layer_output, nonlin_output_dims[i], + self_repair_scale=self_repair_scale, + norm_target_rms=1.0 if i < num_hidden_layers -1 else final_layer_normalize_target) + elif nonlin_type == "pnorm": + prev_layer_output = nodes.AddAffPnormLayer(config_lines, "Tdnn_{0}".format(i), + prev_layer_output, nonlin_input_dim, nonlin_output_dim, + norm_target_rms=1.0 if i < num_hidden_layers -1 else final_layer_normalize_target) + else: + raise Exception("Unknown nonlinearity type") + # a final layer is added after each new layer as we are generating + # configs for layer-wise discriminative training + + AddOutputLayers(config_lines, prev_layer_output, output_nodes) + + config_files['{0}/layer{1}.config'.format(config_dir, i + 1)] = config_lines + config_lines = {'components':[], 'component-nodes':[]} + + left_context += int(parsed_splice_output['left_context']) + right_context += int(parsed_splice_output['right_context']) + + # write the files used by other scripts like steps/nnet3/get_egs.sh + f = open(config_dir + "/vars", "w") + print('model_left_context=' + str(left_context), file=f) + print('model_right_context=' + str(right_context), file=f) + print('num_hidden_layers=' + str(num_hidden_layers), file=f) + print('add_lda=' + ('true' if add_lda else 'false'), file=f) + f.close() + + # printing out the configs + # init.config used to train lda-mllt train + for key in config_files.keys(): + PrintConfig(key, config_files[key]) + +def ParseOutputNodesParameters(para_array): + output_parser = argparse.ArgumentParser() + output_parser.add_argument('--output-suffix', type=str, action=common_lib.NullstrToNoneAction, + help = "Name of the output node. e.g. output-xent") + output_parser.add_argument('--dim', type=int, required=True, + help = "Dimension of the output node") + output_parser.add_argument("--include-log-softmax", type=str, action=common_lib.StrToBoolAction, + help="add the final softmax layer ", + default=True, choices = ["false", "true"]) + output_parser.add_argument("--add-final-sigmoid", type=str, action=common_lib.StrToBoolAction, + help="add a sigmoid layer as the final layer. Applicable only if skip-final-softmax is true.", + choices=['true', 'false'], default = False) + output_parser.add_argument("--objective-type", type=str, default="linear", + choices = ["linear", "quadratic","xent-per-dim"], + help = "the type of objective; i.e. quadratic or linear") + output_parser.add_argument("--xent-regularize", type=float, + help="For chain models, if nonzero, add a separate output for cross-entropy " + "regularization (with learning-rate-factor equal to the inverse of this)", + default=0.0) + + output_nodes = [ output_parser.parse_args(shlex.split(x)) for x in para_array ] + + return output_nodes + +def Main(): + args = GetArgs() + + output_nodes = ParseOutputNodesParameters(args.output_node_para_array) + + MakeConfigs(config_dir = args.config_dir, + feat_dim = args.feat_dim, ivector_dim = args.ivector_dim, + add_lda = args.add_lda, + cepstral_lifter = args.cepstral_lifter, + splice_indexes_string = args.splice_indexes, + cnn_layer = args.cnn_layer, + cnn_bottleneck_dim = args.cnn_bottleneck_dim, + nonlin_type = args.nonlin_type, + nonlin_input_dim = args.nonlin_input_dim, + nonlin_output_dim = args.nonlin_output_dim, + subset_dim = args.subset_dim, + nonlin_output_dim_init = args.nonlin_output_dim_init, + nonlin_output_dim_final = args.nonlin_output_dim_final, + use_presoftmax_prior_scale = args.use_presoftmax_prior_scale, + final_layer_normalize_target = args.final_layer_normalize_target, + output_nodes = output_nodes, + self_repair_scale = args.self_repair_scale_nonlinearity) + +if __name__ == "__main__": + Main() + + diff --git a/egs/wsj/s5/steps/nnet3/components.py b/egs/wsj/s5/steps/nnet3/components.py index 82566d2e37d..c811297cda8 100644 --- a/egs/wsj/s5/steps/nnet3/components.py +++ b/egs/wsj/s5/steps/nnet3/components.py @@ -6,6 +6,7 @@ import sys import warnings import copy +import re from operator import itemgetter def GetSumDescriptor(inputs): @@ -30,17 +31,33 @@ def AddInputLayer(config_lines, feat_dim, splice_indexes=[0], ivector_dim=0): components = config_lines['components'] component_nodes = config_lines['component-nodes'] output_dim = 0 - components.append('input-node name=input dim=' + str(feat_dim)) - list = [('Offset(input, {0})'.format(n) if n != 0 else 'input') for n in splice_indexes] - output_dim += len(splice_indexes) * feat_dim + components.append('input-node name=input dim={0}'.format(feat_dim)) + prev_layer_output = {'descriptor': "input", + 'dimension': feat_dim} + inputs = [] + for n in splice_indexes: + try: + offset = int(n) + if offset == 0: + inputs.append(prev_layer_output['descriptor']) + else: + inputs.append('Offset({0}, {1})'.format( + prev_layer_output['descriptor'], offset)) + output_dim += prev_layer_output['dimension'] + except ValueError: + stats = StatisticsConfig(n, prev_layer_output) + stats_layer = stats.AddLayer(config_lines, "Tdnn_stats_{0}".format(0)) + inputs.append(stats_layer['descriptor']) + output_dim += stats_layer['dimension'] + if ivector_dim > 0: - components.append('input-node name=ivector dim=' + str(ivector_dim)) - list.append('ReplaceIndex(ivector, t, 0)') + components.append('input-node name=ivector dim={0}'.format(ivector_dim)) + inputs.append('ReplaceIndex(ivector, t, 0)') output_dim += ivector_dim - if len(list) > 1: - splice_descriptor = "Append({0})".format(", ".join(list)) + if len(inputs) > 1: + splice_descriptor = "Append({0})".format(", ".join(inputs)) else: - splice_descriptor = list[0] + splice_descriptor = inputs[0] print(splice_descriptor) return {'descriptor': splice_descriptor, 'dimension': output_dim} @@ -519,3 +536,82 @@ def AddBLstmLayer(config_lines, 'dimension':output_dim } +# this is a bit like a struct, initialized from a string, which describes how to +# set up the statistics-pooling and statistics-extraction components. +# An example string is 'mean(-99:3:9::99)', which means, compute the mean of +# data within a window of -99 to +99, with distinct means computed every 9 frames +# (we round to get the appropriate one), and with the input extracted on multiples +# of 3 frames (so this will force the input to this layer to be evaluated +# every 3 frames). Another example string is 'mean+stddev(-99:3:9:99)', +# which will also cause the standard deviation to be computed. +class StatisticsConfig: + # e.g. c = StatisticsConfig('mean+stddev(-99:3:9:99)', 400, 'jesus1-forward-output-affine') + def __init__(self, config_string, input): + + self.input_dim = input['dimension'] + self.input_descriptor = input['descriptor'] + + m = re.search("(mean|mean\+stddev|mean\+count|mean\+stddev\+count)\((-?\d+):(-?\d+):(-?\d+):(-?\d+)\)", + config_string) + if m == None: + raise Exception("Invalid splice-index or statistics-config string: " + config_string) + self.output_stddev = (m.group(1) in ['mean+stddev', 'mean+stddev+count']) + self.output_log_counts = (m.group(1) in ['mean+count', 'mean+stddev+count']) + self.left_context = -int(m.group(2)) + self.input_period = int(m.group(3)) + self.stats_period = int(m.group(4)) + self.right_context = int(m.group(5)) + if not (self.left_context > 0 and self.right_context > 0 and + self.input_period > 0 and self.stats_period > 0 and + self.left_context % self.stats_period == 0 and + self.right_context % self.stats_period == 0 and + self.stats_period % self.input_period == 0): + raise Exception("Invalid configuration of statistics-extraction: " + config_string) + + # OutputDim() returns the output dimension of the node that this produces. + def OutputDim(self): + return (self.input_dim * (2 if self.output_stddev else 1) + + 1 if self.output_log_counts else 0) + + # OutputDims() returns an array of output dimensions, consisting of + # [ input-dim ] if just "mean" was specified, otherwise + # [ input-dim input-dim ] + def OutputDims(self): + output_dims = [ self.input_dim ] + if self.output_stddev: + output_dims.append(self.input_dim) + if self.output_log_counts: + output_dims.append(1) + return output_dims + + # Descriptor() returns the textual form of the descriptor by which the + # output of this node is to be accessed. + def Descriptor(self, name): + return 'Round({0}-pooling-{1}-{2}, {3})'.format(name, self.left_context, self.right_context, + self.stats_period) + + def AddLayer(self, config_lines, name): + components = config_lines['components'] + component_nodes = config_lines['component-nodes'] + + components.append('component name={name}-extraction-{lc}-{rc} type=StatisticsExtractionComponent input-dim={dim} ' + 'input-period={input_period} output-period={output_period} include-variance={var} '.format( + name = name, lc = self.left_context, rc = self.right_context, + dim = self.input_dim, input_period = self.input_period, output_period = self.stats_period, + var = ('true' if self.output_stddev else 'false'))) + component_nodes.append('component-node name={name}-extraction-{lc}-{rc} component={name}-extraction-{lc}-{rc} input={input} '.format( + name = name, lc = self.left_context, rc = self.right_context, input = self.input_descriptor)) + stats_dim = 1 + self.input_dim * (2 if self.output_stddev else 1) + components.append('component name={name}-pooling-{lc}-{rc} type=StatisticsPoolingComponent input-dim={dim} ' + 'input-period={input_period} left-context={lc} right-context={rc} num-log-count-features={count} ' + 'output-stddevs={var} '.format(name = name, lc = self.left_context, rc = self.right_context, + dim = stats_dim, input_period = self.stats_period, + count = 1 if self.output_log_counts else 0, + var = ('true' if self.output_stddev else 'false'))) + component_nodes.append('component-node name={name}-pooling-{lc}-{rc} component={name}-pooling-{lc}-{rc} input={name}-extraction-{lc}-{rc} '.format( + name = name, lc = self.left_context, rc = self.right_context)) + + return { 'dimension': self.OutputDim(), + 'descriptor': self.Descriptor(name), + 'dimensions': self.OutputDims() + } From fbc0333e6b79b6e4ffab7c3be2fa15a396ea4f6e Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 00:09:46 -0500 Subject: [PATCH 055/530] asr_diarization: compute_output.sh, SAD decoding scripts and do_segmentation_data_dir.sh --- egs/wsj/s5/steps/nnet3/compute_output.sh | 179 ++++++++++++++++++ egs/wsj/s5/steps/segmentation/decode_sad.sh | 42 ++++ .../segmentation/decode_sad_to_segments.sh | 97 ++++++++++ .../segmentation/do_segmentation_data_dir.sh | 134 +++++++++++++ .../steps/segmentation/internal/make_G_fst.py | 52 +++++ .../segmentation/internal/make_sad_graph.sh | 83 ++++++++ .../internal/post_process_segments.sh | 41 +--- .../segmentation/internal/prepare_sad_lang.py | 94 +++++++++ .../post_process_sad_to_segments.sh | 69 +++++-- .../post_process_sad_to_subsegments.sh | 69 +++++++ 10 files changed, 804 insertions(+), 56 deletions(-) create mode 100755 egs/wsj/s5/steps/nnet3/compute_output.sh create mode 100755 egs/wsj/s5/steps/segmentation/decode_sad.sh create mode 100755 egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh create mode 100755 egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh create mode 100755 egs/wsj/s5/steps/segmentation/internal/make_G_fst.py create mode 100755 egs/wsj/s5/steps/segmentation/internal/make_sad_graph.sh create mode 100755 egs/wsj/s5/steps/segmentation/internal/prepare_sad_lang.py create mode 100644 egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh diff --git a/egs/wsj/s5/steps/nnet3/compute_output.sh b/egs/wsj/s5/steps/nnet3/compute_output.sh new file mode 100755 index 00000000000..f49790bc578 --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/compute_output.sh @@ -0,0 +1,179 @@ +#!/bin/bash + +# Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey). +# 2016 Vimal Manohar +# Apache 2.0. + +# This script does decoding with a neural-net. If the neural net was built on +# top of fMLLR transforms from a conventional system, you should provide the +# --transform-dir option. + +# Begin configuration section. +stage=1 +transform_dir= # dir to find fMLLR transforms. +nj=4 # number of jobs. If --transform-dir set, must match that number! +cmd=run.pl +use_gpu=false +frames_per_chunk=50 +ivector_scale=1.0 +iter=final +extra_left_context=0 +extra_right_context=0 +extra_left_context_initial=-1 +extra_right_context_final=-1 +frame_subsampling_factor=1 +feat_type= +compress=false +online_ivector_dir= +post_vec= +output_name= +get_raw_nnet_from_am=true +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo "e.g.: steps/nnet3/compute_output.sh --nj 8 \\" + echo "--online-ivector-dir exp/nnet3/ivectors_test_eval92 \\" + echo " data/test_eval92_hires exp/nnet3/tdnn exp/nnet3/tdnn/output" + echo "main options (for others, see top of script file)" + echo " --transform-dir # directory of previous decoding" + echo " # where we can find transforms for SAT systems." + echo " --config # config containing options" + echo " --nj # number of parallel jobs" + echo " --cmd # Command to run in parallel with" + echo " --iter # Iteration of model to decode; default is final." + exit 1; +fi + +data=$1 +srcdir=$2 +dir=$3 + +if $get_raw_nnet_from_am; then + [ ! -f $srcdir/$iter.mdl ] && echo "$0: no such file $srcdir/$iter.mdl" && exit 1 + model="nnet3-am-copy --raw=true $srcdir/$iter.mdl - |" +else + [ ! -f $srcdir/$iter.raw ] && echo "$0: no such file $srcdir/$iter.raw" && exit 1 + model="nnet3-copy $srcdir/$iter.raw - |" +fi + +mkdir -p $dir/log +echo "rename-node old-name=$output_name new-name=output" > $dir/edits.config + +if [ ! -z "$output_name" ]; then + model="$model nnet3-copy --edits-config=$dir/edits.config - - |" +else + output_name=output +fi + +[ ! -z "$online_ivector_dir" ] && \ + extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period" + +for f in $data/feats.scp $extra_files; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1; +done + +sdata=$data/split$nj; +cmvn_opts=`cat $srcdir/cmvn_opts` || exit 1; + +[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; +echo $nj > $dir/num_jobs + + +## Set up features. +if [ -z "$feat_type" ]; then + if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=raw; fi + echo "$0: feature type is $feat_type" +fi + +splice_opts=`cat $srcdir/splice_opts 2>/dev/null` + +case $feat_type in + raw) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |";; + lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |" + ;; + *) echo "$0: invalid feature type $feat_type" && exit 1; +esac +if [ ! -z "$transform_dir" ]; then + echo "$0: using transforms from $transform_dir" + [ ! -s $transform_dir/num_jobs ] && \ + echo "$0: expected $transform_dir/num_jobs to contain the number of jobs." && exit 1; + nj_orig=$(cat $transform_dir/num_jobs) + + if [ $feat_type == "raw" ]; then trans=raw_trans; + else trans=trans; fi + if [ $feat_type == "lda" ] && \ + ! cmp $transform_dir/../final.mat $srcdir/final.mat && \ + ! cmp $transform_dir/final.mat $srcdir/final.mat; then + echo "$0: LDA transforms differ between $srcdir and $transform_dir" + exit 1; + fi + if [ ! -f $transform_dir/$trans.1 ]; then + echo "$0: expected $transform_dir/$trans.1 to exist (--transform-dir option)" + exit 1; + fi + if [ $nj -ne $nj_orig ]; then + # Copy the transforms into an archive with an index. + for n in $(seq $nj_orig); do cat $transform_dir/$trans.$n; done | \ + copy-feats ark:- ark,scp:$dir/$trans.ark,$dir/$trans.scp || exit 1; + feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk scp:$dir/$trans.scp ark:- ark:- |" + else + feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/$trans.JOB ark:- ark:- |" + fi +elif grep 'transform-feats --utt2spk' $srcdir/log/train.1.log >&/dev/null; then + echo "$0: **WARNING**: you seem to be using a neural net system trained with transforms," + echo " but you are not providing the --transform-dir option in test time." +fi +## + +if [ ! -z "$online_ivector_dir" ]; then + ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; + ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" +fi + +frame_subsampling_opt= +if [ $frame_subsampling_factor -ne 1 ]; then + # e.g. for 'chain' systems + frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor" +fi + +output_wspecifier="ark:| copy-feats --compress=$compress ark:- ark:- | gzip -c > $dir/nnet_output.JOB.gz" + +if [ ! -z $post_vec ]; then + if [ $stage -le 1 ]; then + copy-vector --binary=false $post_vec - | \ + awk '{for (i = 2; i < NF; i++) { sum += i; }; + printf ("["); + for (i = 2; i < NF; i++) { printf " "log(i/sum); }; + print (" ]");}' > $dir/log_priors.vec + fi + + output_wspecifier="ark:| matrix-add-offset ark:- 'vector-scale --scale=-1.0 $dir/log_priors.vec - |' ark:- | copy-feats --compress=$compress ark:- ark:- | gzip -c > $dir/log_likes.JOB.gz" +fi + +gpu_opt="--use-gpu=no" +gpu_queue_opt= + +if $use_gpu; then + gpu_queue_opt="--gpu 1" + gpu_opt="--use-gpu=yes" +fi + +if [ $stage -le 2 ]; then + $cmd $gpu_queue_opt JOB=1:$nj $dir/log/compute_output.JOB.log \ + nnet3-compute $gpu_opt $ivector_opts $frame_subsampling_opt \ + --frames-per-chunk=$frames_per_chunk \ + --extra-left-context=$extra_left_context \ + --extra-right-context=$extra_right_context \ + --extra-left-context-initial=$extra_left_context_initial \ + --extra-right-context-final=$extra_right_context_final \ + "$model" "$feats" "$output_wspecifier" || exit 1; +fi + +exit 0; + diff --git a/egs/wsj/s5/steps/segmentation/decode_sad.sh b/egs/wsj/s5/steps/segmentation/decode_sad.sh new file mode 100755 index 00000000000..9758d36e24e --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/decode_sad.sh @@ -0,0 +1,42 @@ +#! /bin/bash + +set -e +set -o pipefail + +cmd=run.pl +acwt=0.1 +beam=8 +max_active=1000 + +. path.sh + +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: $0 " + echo " e.g.: $0 " + exit 1 +fi + +graph_dir=$1 +log_likes_dir=$2 +dir=$3 + +nj=`cat $log_likes_dir/num_jobs` +echo $nj > $dir/num_jobs + +for f in $dir/trans.mdl $log_likes_dir/log_likes.1.gz $graph_dir/HCLG.fst; do + if [ ! -f $f ]; then + echo "$0: Could not find file $f" + fi +done + +decoder_opts+=(--acoustic-scale=$acwt --beam=$beam --max-active=$max_active) + +$cmd JOB=1:$nj $dir/log/decode.JOB.log \ + decode-faster-mapped ${decoder_opts[@]} \ + $dir/trans.mdl \ + $graph_dir/HCLG.fst "ark:gunzip -c $log_likes_dir/log_likes.JOB.gz |" \ + ark:/dev/null ark:- \| \ + ali-to-phones --per-frame $dir/trans.mdl ark:- \ + "ark:|gzip -c > $dir/ali.JOB.gz" diff --git a/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh b/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh new file mode 100755 index 00000000000..8f4ed60dfda --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh @@ -0,0 +1,97 @@ +#! /bin/bash + +set -e +set -o pipefail +set -u + +stage=-1 +segmentation_config=conf/segmentation.conf +cmd=run.pl + +# Viterbi options +min_silence_duration=30 # minimum number of frames for silence +min_speech_duration=30 # minimum number of frames for speech +frame_subsampling_factor=1 +nonsil_transition_probability=0.1 +sil_transition_probability=0.1 +sil_prior=0.5 +speech_prior=0.5 + +# Decoding options +acwt=1 +beam=10 +max_active=7000 + +. utils/parse_options.sh + +if [ $# -ne 4 ]; then + echo "Usage: $0 " + echo " e.g.: $0 data/babel_bengali_dev10h exp/nnet3_sad_snr/tdnn_b_n4/sad_babel_bengali_dev10h exp/nnet3_sad_snr/tdnn_b_n4/segmentation_babel_bengali_dev10h exp/nnet3_sad_snr/tdnn_b_n4/segmentation_babel_bengali_dev10h/babel_bengali_dev10h.seg" + exit 1 +fi + +data=$1 +sad_likes_dir=$2 +dir=$3 +out_data=$4 + +t=sil${sil_prior}_sp${speech_prior} +lang=$dir/lang_test_${t} + +min_silence_duration=`perl -e "print (int($min_silence_duration / $frame_subsampling_factor))"` +min_speech_duration=`perl -e "print (int($min_speech_duration / $frame_subsampling_factor))"` + +if [ $stage -le 1 ]; then + mkdir -p $lang + + steps/segmentation/internal/prepare_sad_lang.py \ + --phone-transition-parameters="--phone-list=1 --min-duration=$min_silence_duration --end-transition-probability=$sil_transition_probability" \ + --phone-transition-parameters="--phone-list=2 --min-duration=$min_speech_duration --end-transition-probability=$nonsil_transition_probability" $lang + + cp $lang/phones.txt $lang/words.txt +fi + +feat_dim=2 # dummy. We don't need this. +if [ $stage -le 2 ]; then + $cmd $dir/log/create_transition_model.log gmm-init-mono \ + $lang/topo $feat_dim - $dir/tree \| \ + copy-transition-model --binary=false - $dir/trans.mdl || exit 1 +fi + +if [ $stage -le 3 ]; then + cat > $lang/word2prior < $lang/G.fst +fi + +graph_dir=$dir/graph_test_${t} + +if [ $stage -le 4 ]; then + $cmd $dir/log/make_vad_graph.log \ + steps/segmentation/internal/make_sad_graph.sh --iter trans \ + $lang $dir $dir/graph_test_${t} || exit 1 +fi + +if [ $stage -le 5 ]; then + steps/segmentation/decode_sad.sh \ + --acwt $acwt --beam $beam --max-active $max_active \ + $graph_dir $sad_likes_dir $dir +fi + +if [ $stage -le 6 ]; then + cat > $lang/phone2sad_map < 8kHz sampling frequency. +do_downsampling=false + +# Segmentation configs +min_silence_duration=30 +min_speech_duration=30 +segmentation_config=conf/segmentation_speech.conf + +echo $* + +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: $0 " + echo " e.g.: $0 ~/workspace/egs/ami/s5b/data/sdm1/dev data/ami_sdm1_dev exp/nnet3_sad_snr/nnet_tdnn_j_n4" + exit 1 +fi + +src_data_dir=$1 +data_dir=$2 +sad_nnet_dir=$3 + +affix=${affix:+_$affix} +feat_affix=${feat_affix:+_$feat_affix} + +data_id=`basename $data_dir` +sad_dir=${sad_nnet_dir}/${sad_name}${affix}_${data_id}_whole${feat_affix} +seg_dir=${sad_nnet_dir}/${segmentation_name}${affix}_${data_id}_whole${feat_affix} + +export PATH="$KALDI_ROOT/tools/sph2pipe_v2.5/:$PATH" +[ ! -z `which sph2pipe` ] + +if [ $stage -le 0 ]; then + utils/data/convert_data_dir_to_whole.sh $src_data_dir ${data_dir}_whole + + if $do_downsampling; then + freq=`cat $mfcc_config | perl -pe 's/\s*#.*//g' | grep "sample-frequency=" | awk -F'=' '{if (NF == 0) print 16000; else print $2}'` + sox=`which sox` + + cat $src_data_dir/wav.scp | python -c "import sys +for line in sys.stdin.readlines(): + splits = line.strip().split() + if splits[-1] == '|': + out_line = line.strip() + ' $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |' + else: + out_line = 'cat {0} {1} | $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |'.format(splits[0], ' '.join(splits[1:])) + print (out_line)" > ${data_dir}_whole/wav.scp + fi + + utils/copy_data_dir.sh ${data_dir}_whole ${data_dir}_whole${feat_affix}_hires +fi + +test_data_dir=${data_dir}_whole${feat_affix}_hires + +if [ $stage -le 1 ]; then + steps/make_mfcc.sh --mfcc-config $mfcc_config --nj $reco_nj --cmd "$train_cmd" \ + ${data_dir}_whole${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} mfcc_hires + steps/compute_cmvn_stats.sh ${data_dir}_whole${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} mfcc_hires +fi + +post_vec=$sad_nnet_dir/post_${output_name}.vec +if [ ! -f $sad_nnet_dir/post_${output_name}.vec ]; then + echo "$0: Could not find $sad_nnet_dir/post_${output_name}.vec. See the last stage of local/segmentation/run_train_sad.sh" + exit 1 +fi + +if [ $stage -le 2 ]; then + steps/nnet3/compute_output.sh --nj $reco_nj --cmd "$train_cmd" \ + --post-vec "$post_vec" \ + --iter $iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk 150 \ + --stage $sad_stage --output-name $output_name \ + --frame-subsampling-factor $frame_subsampling_factor \ + --get-raw-nnet-from-am false ${test_data_dir} $sad_nnet_dir $sad_dir +fi + +if [ $stage -le 3 ]; then + steps/segmentation/decode_sad_to_segments.sh \ + --frame-subsampling-factor $frame_subsampling_factor \ + --min-silence-duration $min_silence_duration \ + --min-speech-duration $min_speech_duration \ + --segmentation-config $segmentation_config --cmd "$train_cmd" \ + ${test_data_dir} $sad_dir $seg_dir $seg_dir/${data_id}_seg +fi + +# Subsegment data directory +if [ $stage -le 4 ]; then + rm $seg_dir/${data_id}_seg/feats.scp || true + utils/data/get_reco2num_frames.sh ${test_data_dir} + awk '{print $1" "$2}' ${seg_dir}/${data_id}_seg/segments | \ + utils/apply_map.pl -f 2 ${test_data_dir}/reco2num_frames > \ + $seg_dir/${data_id}_seg/utt2max_frames + + frame_shift_info=`cat $mfcc_config | steps/segmentation/get_frame_shift_info_from_config.pl` + utils/data/get_subsegment_feats.sh ${test_data_dir}/feats.scp \ + $frame_shift_info $seg_dir/${data_id}_seg/segments | \ + utils/data/fix_subsegmented_feats.pl ${seg_dir}/${data_id}_seg/utt2max_frames > \ + $seg_dir/${data_id}_seg/feats.scp + steps/compute_cmvn_stats.sh --fake $seg_dir/${data_id}_seg +fi diff --git a/egs/wsj/s5/steps/segmentation/internal/make_G_fst.py b/egs/wsj/s5/steps/segmentation/internal/make_G_fst.py new file mode 100755 index 00000000000..5ad7e867d10 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/make_G_fst.py @@ -0,0 +1,52 @@ +#! /usr/bin/env python + +from __future__ import print_function +import argparse, math + +def ParseArgs(): + parser = argparse.ArgumentParser("""Make a simple unigram FST for +decoding for segmentation purpose.""") + + parser.add_argument("--word2prior-map", type=str, required=True, + help = "A file with priors for different words") + parser.add_argument("--end-probability", type=float, default=0.01, + help = "Ending probability") + + args = parser.parse_args() + + return args + +def ReadMap(map_file): + out_map = {} + sum_prob = 0 + for line in open(map_file): + parts = line.strip().split() + if len(parts) == 0: + continue + if len(parts) != 2: + raise Exception("Invalid line {0} in {1}".format(line.strip(), map_file)) + + if parts[0] in out_map: + raise Exception("Duplicate entry of {0} in {1}".format(parts[0], map_file)) + + prob = float(parts[1]) + out_map[parts[0]] = prob + + sum_prob += prob + + return (out_map, sum_prob) + +def Main(): + args = ParseArgs() + + word2prior, sum_prob = ReadMap(args.word2prior_map) + sum_prob += args.end_probability + + for w,p in word2prior.iteritems(): + print ("0 0 {word} {word} {log_p}".format(word = w, + log_p = -math.log(p / sum_prob))) + print ("0 {log_p}".format(word = w, + log_p = -math.log(args.end_probability / sum_prob))) + +if __name__ == '__main__': + Main() diff --git a/egs/wsj/s5/steps/segmentation/internal/make_sad_graph.sh b/egs/wsj/s5/steps/segmentation/internal/make_sad_graph.sh new file mode 100755 index 00000000000..5edb3eb2bb6 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/make_sad_graph.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar + +# Begin configuration section. +stage=0 +cmd=run.pl +iter=final # use $iter.mdl from $model_dir +tree=tree +tscale=1.0 # transition scale. +loopscale=0.1 # scale for self-loops. +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo " e.g.: $0 exp/vad_dev/lang exp/vad_dev exp/vad_dev/graph" + echo "Makes the graph in \$dir, corresponding to the model in \$model_dir" + exit 1; +fi + +lang=$1 +model=$2/$iter.mdl +tree=$2/$tree +dir=$3 + +for f in $lang/G.fst $model $tree; do + if [ ! -f $f ]; then + echo "$0: expected $f to exist" + exit 1; + fi +done + +mkdir -p $dir $lang/tmp + +clg=$lang/tmp/CLG.fst + +if [[ ! -s $clg || $clg -ot $lang/G.fst ]]; then + echo "$0: creating CLG." + + fstcomposecontext --context-size=1 --central-position=0 \ + $lang/tmp/ilabels < $lang/G.fst | \ + fstarcsort --sort_type=ilabel > $clg + fstisstochastic $clg || echo "[info]: CLG not stochastic." +fi + +if [[ ! -s $dir/Ha.fst || $dir/Ha.fst -ot $model || $dir/Ha.fst -ot $lang/tmp/ilabels ]]; then + make-h-transducer --disambig-syms-out=$dir/disambig_tid.int \ + --transition-scale=$tscale $lang/tmp/ilabels $tree $model \ + > $dir/Ha.fst || exit 1; +fi + +if [[ ! -s $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || $dir/HCLGa.fst -ot $clg ]]; then + fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \ + | fstrmsymbols $dir/disambig_tid.int | fstrmepslocal | \ + fstminimizeencoded > $dir/HCLGa.fst || exit 1; + fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic" +fi + +if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then + add-self-loops --self-loop-scale=$loopscale --reorder=true \ + $model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1; + + if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then + # No point doing this test if transition-scale not 1, as it is bound to fail. + fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic." + fi +fi + +# keep a copy of the lexicon and a list of silence phones with HCLG... +# this means we can decode without reference to the $lang directory. + +cp $lang/words.txt $dir/ || exit 1; +cp $lang/phones.txt $dir/ 2> /dev/null # ignore the error if it's not there. + +# to make const fst: +# fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst +am-info --print-args=false $model | grep pdfs | awk '{print $NF}' > $dir/num_pdfs + diff --git a/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh b/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh index c2750b4a895..e37d5dc2f62 100755 --- a/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh +++ b/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh @@ -51,31 +51,11 @@ for f in $dir/orig_segmentation.1.gz $data_dir/segments; do fi done -cat < $dir/segmentation.conf -pad_length=$pad_length # Pad speech segments by this many frames on either side -max_blend_length=$max_blend_length # Maximum duration of speech that will be removed as part - # of smoothing process. This is only if there are no other - # speech segments nearby. -max_intersegment_length=$max_intersegment_length # Merge nearby speech segments if the silence - # between them is less than this many frames. -post_pad_length=$post_pad_length # Pad speech segments by this many frames on either side - # after the merging process using max_intersegment_length -max_segment_length=$max_segment_length # Segments that are longer than this are split into - # overlapping frames. -overlap_length=$overlap_length # Overlapping frames when segments are split. - # See the above option. -min_silence_length=$min_silence_length # Min silence length at which to split very long segments - -frame_shift=$frame_shift -EOF - nj=`cat $dir/num_jobs` || exit 1 -if [ $stage -le 1 ]; then - rm -r $segmented_data_dir || true - utils/data/convert_data_dir_to_whole.sh $data_dir $segmented_data_dir || exit 1 - rm $segmented_data_dir/text -fi +[ $pad_length -eq -1 ] && pad_length= +[ $post_pad_length -eq -1 ] && post_pad_length= +[ $max_blend_length -eq -1 ] && max_blend_length= if [ $stage -le 2 ]; then # Post-process the orignal SAD segmentation using the following steps: @@ -94,10 +74,10 @@ if [ $stage -le 2 ]; then $cmd JOB=1:$nj $dir/log/post_process_segmentation.JOB.log \ gunzip -c $dir/orig_segmentation.JOB.gz \| \ segmentation-post-process --merge-adjacent-segments --max-intersegment-length=0 ark:- ark:- \| \ - segmentation-post-process --max-blend-length=$max_blend_length --blend-short-segments-class=1 ark:- ark:- \| \ - segmentation-post-process --remove-labels=0 --pad-label=1 --pad-length=$pad_length ark:- ark:- \| \ + segmentation-post-process ${max_blend_length:+--max-blend-length=$max_blend_length --blend-short-segments-class=1} ark:- ark:- \| \ + segmentation-post-process --remove-labels=0 ${pad_length:+--pad-label=1 --pad-length=$pad_length} ark:- ark:- \| \ segmentation-post-process --merge-adjacent-segments --max-intersegment-length=$max_intersegment_length ark:- ark:- \| \ - segmentation-post-process --pad-label=1 --pad-length=$post_pad_length ark:- ark:- \| \ + segmentation-post-process ${post_pad_length:+--pad-label=1 --pad-length=$post_pad_length} ark:- ark:- \| \ segmentation-split-segments --alignments="ark,s,cs:gunzip -c $dir/orig_segmentation.JOB.gz | segmentation-to-ali ark:- ark:- |" \ --max-segment-length=$max_segment_length --min-alignment-chunk-length=$min_silence_length --ali-label=0 ark:- ark:- \| \ segmentation-split-segments \ @@ -118,12 +98,3 @@ if [ ! -s $segmented_data_dir/utt2spk ] || [ ! -s $segmented_data_dir/segments ] echo "$0: Segmentation failed to generate segments or utt2spk!" exit 1 fi - -utils/utt2spk_to_spk2utt.pl $segmented_data_dir/utt2spk > $segmented_data_dir/spk2utt || exit 1 -utils/fix_data_dir.sh $segmented_data_dir - -if [ ! -s $segmented_data_dir/utt2spk ] || [ ! -s $segmented_data_dir/segments ]; then - echo "$0: Segmentation failed to generate segments or utt2spk!" - exit 1 -fi - diff --git a/egs/wsj/s5/steps/segmentation/internal/prepare_sad_lang.py b/egs/wsj/s5/steps/segmentation/internal/prepare_sad_lang.py new file mode 100755 index 00000000000..17b039015d2 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/prepare_sad_lang.py @@ -0,0 +1,94 @@ +#! /usr/bin/env python + +from __future__ import print_function +import argparse, shlex + +def GetArgs(): + parser = argparse.ArgumentParser(description="""This script generates a lang +directory for purpose of segmentation. It takes as arguments the list of phones, +the corresponding min durations and end transition probability.""") + + parser.add_argument("--phone-transition-parameters", dest='phone_transition_para_array', + type=str, action='append', required = True, + help = "Options to build topology. \n" + "--phone-list= # Colon-separated list of phones\n" + "--min-duration= # Min duration for the phones\n" + "--end-transition-probability= # Probability of the end transition after the minimum duration\n") + parser.add_argument("dir", type=str, + help = "Output lang directory") + args = parser.parse_args() + return args + + +def ParsePhoneTransitionParameters(para_array): + parser = argparse.ArgumentParser() + + parser.add_argument("--phone-list", type=str, required=True, + help="Colon-separated list of phones") + parser.add_argument("--min-duration", type=int, default=3, + help="Minimum number of states for the phone") + parser.add_argument("--end-transition-probability", type=float, default=0.1, + help="Probability of the end transition after the minimum duration") + + phone_transition_parameters = [ parser.parse_args(shlex.split(x)) for x in para_array ] + + for t in phone_transition_parameters: + if (t.end_transition_probability > 1.0 or + t.end_transition_probability < 0.0): + raise ValueError("Expected --end-transition-probability to be " + "between 0 and 1, got {0} for phones {1}".format( + t.end_transition_probability, t.phone_list)) + if t.min_duration > 100 or t.min_duration < 1: + raise ValueError("Expected --min-duration to be " + "between 1 and 100, got {0} for phones {1}".format( + t.min_duration, t.phone_list)) + + t.phone_list = t.phone_list.split(":") + + return phone_transition_parameters + +def GetPhoneMap(phone_transition_parameters): + phone2int = {} + n = 1 + for t in phone_transition_parameters: + for p in t.phone_list: + if p in phone2int: + raise Exception("Phone {0} found in multiple topologies".format(p)) + phone2int[p] = n + n += 1 + + return phone2int + +def Main(): + args = GetArgs() + phone_transition_parameters = ParsePhoneTransitionParameters(args.phone_transition_para_array) + + phone2int = GetPhoneMap(phone_transition_parameters) + + topo = open("{0}/topo".format(args.dir), 'w') + + print ("", file = topo) + + for t in phone_transition_parameters: + print ("", file = topo) + print ("", file = topo) + print ("{0}".format(" ".join([str(phone2int[p]) for p in t.phone_list])), file = topo) + print ("", file = topo) + + for state in range(0, t.min_duration-1): + print(" {0} 0 {1} 1.0 ".format(state, state + 1), file = topo) + print(" {state} 0 {state} {self_prob} {next_state} {next_prob} ".format( + state = t.min_duration - 1, next_state = t.min_duration, + self_prob = 1 - t.end_transition_probability, + next_prob = t.end_transition_probability), file = topo) + print(" {state} ".format(state = t.min_duration), file = topo) # Final state + print ("", file = topo) + print ("", file = topo) + + phones_file = open("{0}/phones.txt".format(args.dir), 'w') + + for p,n in sorted(list(phone2int.items()), key = lambda x:x[1]): + print ("{0} {1}".format(p, n), file = phones_file) + +if __name__ == '__main__': + Main() diff --git a/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh b/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh index f4011f20a03..c1006d09678 100755 --- a/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh +++ b/egs/wsj/s5/steps/segmentation/post_process_sad_to_segments.sh @@ -16,7 +16,9 @@ frame_shift=0.01 weight_threshold=0.5 ali_suffix=_acwt0.1 -phone_map= +frame_subsampling_factor=1 + +phone2sad_map= . utils/parse_options.sh @@ -48,56 +50,81 @@ fi dir=$1 segmented_data_dir=$2 -cat $data_dir/segments | awk '{print $1" "$2}' | \ - utils/utt2spk_to_spk2utt.pl > $data_dir/reco2utt - -utils/split_data.sh $data_dir $nj - -for n in `seq $nj`; do - cat $data_dir/split$nj/$n/segments | awk '{print $1" "$2}' | \ - utils/utt2spk_to_spk2utt.pl > $data_dir/split$nj/$n/reco2utt -done - +utils/data/get_reco2utt.sh $data_dir mkdir -p $dir if [ ! -z "$vad_dir" ]; then nj=`cat $vad_dir/num_jobs` || exit 1 + + utils/split_data.sh $data_dir $nj - if [ -z "$phone_map" ]; then - phone_map=$dir/phone_map + for n in `seq $nj`; do + cat $data_dir/split$nj/$n/segments | awk '{print $1" "$2}' | \ + utils/utt2spk_to_spk2utt.pl > $data_dir/split$nj/$n/reco2utt + done + + if [ -z "$phone2sad_map" ]; then + phone2sad_map=$dir/phone2sad_map { cat $lang/phones/silence.int | awk '{print $1" 0"}'; cat $lang/phones/nonsilence.int | awk '{print $1" 1"}'; - } | sort -k1,1 -n > $dir/phone_map + } | sort -k1,1 -n > $dir/phone2sad_map fi + frame_shift_subsampled=`perl -e "print ($frame_subsampling_factor * $frame_shift)"` + if [ $stage -le 0 ]; then # Convert the original SAD into segmentation $cmd JOB=1:$nj $dir/log/segmentation.JOB.log \ - segmentation-init-from-ali --reco2utt-rspecifier="ark,t:$data_dir/split$nj/JOB/reco2utt" \ - --segmentation-rspecifier="ark:segmentation-init-from-segments --shift-to-zero=false --frame-shift=$frame_shift $data_dir/split$nj/JOB/segments ark:- |" \ + segmentation-init-from-ali \ "ark:gunzip -c $vad_dir/ali${ali_suffix}.JOB.gz |" ark:- \| \ - segmentation-copy --label-map=$phone_map ark:- \ + segmentation-combine-segments ark:- \ + "ark:segmentation-init-from-segments --shift-to-zero=false --frame-shift=$frame_shift_subsampled $data_dir/split$nj/JOB/segments ark:- |" \ + "ark,t:$data_dir/split$nj/JOB/reco2utt" ark:- \| \ + segmentation-copy --label-map=$phone2sad_map \ + --frame-subsampling-factor=$frame_subsampling_factor ark:- \ "ark:| gzip -c > $dir/orig_segmentation.JOB.gz" fi else + utils/split_data.sh $data_dir $nj + for n in `seq $nj`; do - utils/filter_scp.pl $data_dir/split$nj/$n/reco2utt $weights_scp > $dir/weights.$n.scp + utils/data/get_reco2utt.sh $data_dir/split$nj/$n + utils/filter_scp.pl $data_dir/split$nj/$n/reco2utt $weights_scp > \ + $dir/weights.$n.scp done $cmd JOB=1:$nj $dir/log/weights_to_segments.JOB.log \ copy-vector scp:$dir/weights.JOB.scp ark,t:- \| \ awk -v t=$weight_threshold '{printf $1; for (i=3; i < NF; i++) { if ($i >= t) printf (" 1"); else printf (" 0"); }; print "";}' \| \ - segmentation-init-from-ali --reco2utt-rspecifier="ark,t:$data_dir/split$nj/JOB/reco2utt" \ - --segmentation-rspecifier="ark:segmentation-init-from-segments --shift-to-zero=false --frame-shift=$frame_shift $data_dir/split$nj/JOB/segments ark:- |" \ - ark,t:- "ark:| gzip -c > $dir/orig_segmentation.JOB.gz" + segmentation-init-from-ali \ + ark,t:- ark:- \| segmentation-combine-segments ark:- \ + "ark:segmentation-init-from-segments --shift-to-zero=false --frame-shift=$frame_shift_subsampled $data_dir/split$nj/JOB/segments ark:- |" \ + "ark,t:$data_dir/split$nj/JOB/reco2utt" ark:- \| \ + segmentation-copy --frame-subsampling-factor=$frame_subsampling_factor \ + ark:- "ark:| gzip -c > $dir/orig_segmentation.JOB.gz" fi echo $nj > $dir/num_jobs +if [ $stage -le 1 ]; then + rm -r $segmented_data_dir || true + utils/data/convert_data_dir_to_whole.sh $data_dir $segmented_data_dir || exit 1 + rm $segmented_data_dir/text || true +fi + steps/segmentation/internal/post_process_segments.sh \ --stage $stage --cmd "$cmd" \ --config $segmentation_config --frame-shift $frame_shift \ $data_dir $dir $segmented_data_dir + +utils/utt2spk_to_spk2utt.pl $segmented_data_dir/utt2spk > $segmented_data_dir/spk2utt || exit 1 +utils/fix_data_dir.sh $segmented_data_dir + +if [ ! -s $segmented_data_dir/utt2spk ] || [ ! -s $segmented_data_dir/segments ]; then + echo "$0: Segmentation failed to generate segments or utt2spk!" + exit 1 +fi + diff --git a/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh b/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh new file mode 100644 index 00000000000..8cfcaa40cda --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh @@ -0,0 +1,69 @@ +#! /bin/bash + +# Copyright 2015 Vimal Manohar +# Apache 2.0. + +set -e -o pipefail -u +. path.sh + +cmd=run.pl +stage=-10 + +segmentation_config=conf/segmentation.conf +nj=18 + +frame_shift=0.01 + +. utils/parse_options.sh + +if [ $# -ne 5 ]; then + echo "Usage: $0 " + echo " e.g.: $0 data/dev_aspire_whole exp/vad_dev_aspire data/dev_aspire_seg" + exit 1 +fi + +data_dir=$1 +phone2sad_map=$2 +vad_dir=$3 +dir=$4 +segmented_data_dir=$5 + +mkdir -p $dir + +nj=`cat $vad_dir/num_jobs` || exit 1 + +utils/split_data.sh $data_dir $nj + +if [ $stage -le 0 ]; then + # Convert the original SAD into segmentation + $cmd JOB=1:$nj $dir/log/segmentation.JOB.log \ + segmentation-init-from-ali \ + "ark:gunzip -c $vad_dir/ali.JOB.gz |" ark:- \| \ + segmentation-copy --label-map=$phone2sad_map ark:- \ + "ark:| gzip -c > $dir/orig_segmentation.JOB.gz" +fi + +echo $nj > $dir/num_jobs + +if [ $stage -le 1 ]; then + rm -r $segmented_data_dir || true + utils/data/convert_data_dir_to_whole.sh $data_dir $segmented_data_dir || exit 1 + rm $segmented_data_dir/text || true +fi + +steps/segmentation/internal/post_process_segments.sh \ + --stage $stage --cmd "$cmd" \ + --config $segmentation_config --frame-shift $frame_shift \ + $data_dir $dir $segmented_data_dir + +mv $segmented_data_dir/segments $segmented_data_dir/sub_segments +utils/data/subsegment_data_dir.sh $data_dir $segmented_data_dir/sub_segments $segmented_data_dir + +utils/utt2spk_to_spk2utt.pl $segmented_data_dir/utt2spk > $segmented_data_dir/spk2utt || exit 1 +utils/fix_data_dir.sh $segmented_data_dir + +if [ ! -s $segmented_data_dir/utt2spk ] || [ ! -s $segmented_data_dir/segments ]; then + echo "$0: Segmentation failed to generate segments or utt2spk!" + exit 1 +fi + From e80e4a99d52f855e5c15fbc23d5c96593e65e410 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 18 Nov 2016 22:07:53 -0500 Subject: [PATCH 056/530] asr_diarization: Adding min-extra-left-context --- egs/wsj/s5/steps/libs/common.py | 1 + .../nnet3/train/frame_level_objf/common.py | 11 +++++++ egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 32 +++++++++++++++++-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/steps/libs/common.py b/egs/wsj/s5/steps/libs/common.py index 1e0608525ba..f2a336cd640 100644 --- a/egs/wsj/s5/steps/libs/common.py +++ b/egs/wsj/s5/steps/libs/common.py @@ -315,6 +315,7 @@ def split_data(data, num_jobs): run_kaldi_command("utils/split_data.sh {data} {num_jobs}".format( data=data, num_jobs=num_jobs)) + return "{0}/split{1}".format(data, num_jobs) def read_kaldi_matrix(matrix_file): diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index d0cb2a52758..55508daf02c 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -31,6 +31,7 @@ def train_new_models(dir, iter, srand, num_jobs, cache_read_opt, run_opts, frames_per_eg=-1, min_deriv_time=None, max_deriv_time=None, + min_left_context=None, min_right_context=None, extra_egs_copy_cmd=""): """ Called from train_one_iteration(), this model does one iteration of training with 'num_jobs' jobs, and writes files like @@ -64,6 +65,13 @@ def train_new_models(dir, iter, srand, num_jobs, deriv_time_opts.append("--optimization.max-deriv-time={0}".format( max_deriv_time)) + this_random = random.Random(srand) + if min_left_context is not None: + left_context = this_random.randint(min_left_context, left_context) + + if min_right_context is not None: + right_context = this_random.randint(min_right_context, right_context) + context_opts = "--left-context={0} --right-context={1}".format( left_context, right_context) @@ -144,6 +152,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, run_opts, cv_minibatch_size=256, frames_per_eg=-1, min_deriv_time=None, max_deriv_time=None, + min_left_context=None, min_right_context=None, shrinkage_value=1.0, get_raw_nnet_from_am=True, background_process_handler=None, @@ -283,6 +292,8 @@ def train_one_iteration(dir, iter, srand, egs_dir, frames_per_eg=frames_per_eg, min_deriv_time=min_deriv_time, max_deriv_time=max_deriv_time, + min_left_context=min_left_context, + min_right_context=min_right_context, extra_egs_copy_cmd=extra_egs_copy_cmd) [models_to_average, best_model] = common_train_lib.get_successful_models( diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index ae038445fc0..e4af318fb57 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -72,6 +72,18 @@ def get_args(): parser.add_argument("--egs.extra-copy-cmd", type=str, dest='extra_egs_copy_cmd', default = "", help="""Modify egs before passing it to training"""); + parser.add_argument("--trainer.min-chunk-left-context", type=int, + dest='min_chunk_left_context', default=None, + help="""If provided and is less than + --egs.chunk-left-context, then the chunk left context + is randomized between egs.chunk-left-context and + this value.""") + parser.add_argument("--trainer.min-chunk-right-context", type=int, + dest='min_chunk_right_context', default=None, + help="""If provided and is less than + --egs.chunk-right-context, then the chunk right context + is randomized between egs.chunk-right-context and + this value.""") # trainer options parser.add_argument("--trainer.samples-per-iter", type=int, @@ -184,6 +196,12 @@ def process_args(args): "--trainer.deriv-truncate-margin.".format( args.deriv_truncate_margin)) + if args.min_chunk_left_context is None: + args.min_chunk_left_context = args.chunk_left_context + + if args.min_chunk_right_context is None: + args.min_chunk_right_context = args.chunk_right_context + if (not os.path.exists(args.dir) or not os.path.exists(args.dir+"/configs")): raise Exception("This scripts expects {0} to exist and have a configs " @@ -254,12 +272,18 @@ def train(args, run_opts, background_process_handler): # discriminative pretraining num_hidden_layers = variables['num_hidden_layers'] add_lda = common_lib.str_to_bool(variables['add_lda']) - include_log_softmax = common_lib.str_to_bool( - variables['include_log_softmax']) except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) + try: + include_log_softmax = common_lib.str_to_bool( + variables['include_log_softmax']) + except KeyError as e: + logger.warning("KeyError {0}: Using default include-log-softmax value " + "as False.".format(str(e))) + include_log_softmax = False + left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context @@ -419,6 +443,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): add_layers_period=args.add_layers_period, left_context=left_context, right_context=right_context, + min_left_context=args.min_chunk_left_context + + model_left_context, + min_right_context=args.min_chunk_right_context + + model_right_context, min_deriv_time=min_deriv_time, max_deriv_time=max_deriv_time, momentum=args.momentum, From b79f0faa9f04522d0aa4589108da84a833f7102b Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sun, 27 Nov 2016 01:47:22 -0500 Subject: [PATCH 057/530] asr_diarization: Segmentation tools --- src/Makefile | 11 +- src/segmenter/Makefile | 16 + src/segmenter/segment.cc | 35 + src/segmenter/segment.h | 78 ++ src/segmenter/segmentation-io-test.cc | 63 ++ src/segmenter/segmentation-post-processor.cc | 198 +++++ src/segmenter/segmentation-post-processor.h | 168 ++++ src/segmenter/segmentation-test.cc | 226 ++++++ src/segmenter/segmentation-utils.cc | 743 ++++++++++++++++++ src/segmenter/segmentation-utils.h | 337 ++++++++ src/segmenter/segmentation.cc | 201 +++++ src/segmenter/segmentation.h | 144 ++++ src/segmenterbin/Makefile | 36 + ...entation-combine-segments-to-recordings.cc | 114 +++ .../segmentation-combine-segments.cc | 128 +++ src/segmenterbin/segmentation-copy.cc | 232 ++++++ .../segmentation-create-subsegments.cc | 175 +++++ src/segmenterbin/segmentation-get-stats.cc | 125 +++ .../segmentation-init-from-ali.cc | 91 +++ .../segmentation-init-from-lengths.cc | 82 ++ .../segmentation-init-from-segments.cc | 179 +++++ .../segmentation-intersect-ali.cc | 99 +++ .../segmentation-intersect-segments.cc | 145 ++++ .../segmentation-merge-recordings.cc | 101 +++ src/segmenterbin/segmentation-merge.cc | 146 ++++ src/segmenterbin/segmentation-post-process.cc | 142 ++++ .../segmentation-remove-segments.cc | 155 ++++ .../segmentation-split-segments.cc | 194 +++++ src/segmenterbin/segmentation-to-ali.cc | 99 +++ src/segmenterbin/segmentation-to-rttm.cc | 255 ++++++ src/segmenterbin/segmentation-to-segments.cc | 133 ++++ tools/config/common_path.sh | 1 + 32 files changed, 4847 insertions(+), 5 deletions(-) create mode 100644 src/segmenter/Makefile create mode 100644 src/segmenter/segment.cc create mode 100644 src/segmenter/segment.h create mode 100644 src/segmenter/segmentation-io-test.cc create mode 100644 src/segmenter/segmentation-post-processor.cc create mode 100644 src/segmenter/segmentation-post-processor.h create mode 100644 src/segmenter/segmentation-test.cc create mode 100644 src/segmenter/segmentation-utils.cc create mode 100644 src/segmenter/segmentation-utils.h create mode 100644 src/segmenter/segmentation.cc create mode 100644 src/segmenter/segmentation.h create mode 100644 src/segmenterbin/Makefile create mode 100644 src/segmenterbin/segmentation-combine-segments-to-recordings.cc create mode 100644 src/segmenterbin/segmentation-combine-segments.cc create mode 100644 src/segmenterbin/segmentation-copy.cc create mode 100644 src/segmenterbin/segmentation-create-subsegments.cc create mode 100644 src/segmenterbin/segmentation-get-stats.cc create mode 100644 src/segmenterbin/segmentation-init-from-ali.cc create mode 100644 src/segmenterbin/segmentation-init-from-lengths.cc create mode 100644 src/segmenterbin/segmentation-init-from-segments.cc create mode 100644 src/segmenterbin/segmentation-intersect-ali.cc create mode 100644 src/segmenterbin/segmentation-intersect-segments.cc create mode 100644 src/segmenterbin/segmentation-merge-recordings.cc create mode 100644 src/segmenterbin/segmentation-merge.cc create mode 100644 src/segmenterbin/segmentation-post-process.cc create mode 100644 src/segmenterbin/segmentation-remove-segments.cc create mode 100644 src/segmenterbin/segmentation-split-segments.cc create mode 100644 src/segmenterbin/segmentation-to-ali.cc create mode 100644 src/segmenterbin/segmentation-to-rttm.cc create mode 100644 src/segmenterbin/segmentation-to-segments.cc diff --git a/src/Makefile b/src/Makefile index 9905be869a0..a42f78f4742 100644 --- a/src/Makefile +++ b/src/Makefile @@ -6,16 +6,16 @@ SHELL := /bin/bash SUBDIRS = base matrix util feat tree thread gmm transform sgmm \ - fstext hmm lm decoder lat kws cudamatrix nnet \ + fstext hmm lm decoder lat kws cudamatrix nnet segmenter \ bin fstbin gmmbin fgmmbin sgmmbin featbin \ nnetbin latbin sgmm2 sgmm2bin nnet2 nnet3 chain nnet3bin nnet2bin kwsbin \ - ivector ivectorbin online2 online2bin lmbin chainbin + ivector ivectorbin online2 online2bin lmbin chainbin segmenterbin MEMTESTDIRS = base matrix util feat tree thread gmm transform sgmm \ - fstext hmm lm decoder lat nnet kws chain \ + fstext hmm lm decoder lat nnet kws chain segmenter \ bin fstbin gmmbin fgmmbin sgmmbin featbin \ nnetbin latbin sgmm2 nnet2 nnet3 nnet2bin nnet3bin sgmm2bin kwsbin \ - ivector ivectorbin online2 online2bin lmbin + ivector ivectorbin online2 online2bin lmbin segmenterbin CUDAMEMTESTDIR = cudamatrix @@ -155,7 +155,7 @@ $(EXT_SUBDIRS) : mklibdir bin fstbin gmmbin fgmmbin sgmmbin sgmm2bin featbin nnetbin nnet2bin nnet3bin chainbin latbin ivectorbin lmbin kwsbin online2bin: \ base matrix util feat tree thread gmm transform sgmm sgmm2 fstext hmm \ - lm decoder lat cudamatrix nnet nnet2 nnet3 ivector chain kws online2 + lm decoder lat cudamatrix nnet nnet2 nnet3 ivector chain kws online2 segmenter #2)The libraries have inter-dependencies base: base/.depend.mk @@ -179,6 +179,7 @@ nnet2: base util matrix thread lat gmm hmm tree transform cudamatrix nnet3: base util matrix thread lat gmm hmm tree transform cudamatrix chain fstext chain: lat hmm tree fstext matrix cudamatrix util thread base ivector: base util matrix thread transform tree gmm +segmenter: base matrix util gmm thread #3)Dependencies for optional parts of Kaldi onlinebin: base matrix util feat tree gmm transform sgmm sgmm2 fstext hmm lm decoder lat cudamatrix nnet nnet2 online thread # python-kaldi-decoding: base matrix util feat tree thread gmm transform sgmm sgmm2 fstext hmm decoder lat online diff --git a/src/segmenter/Makefile b/src/segmenter/Makefile new file mode 100644 index 00000000000..03df6132050 --- /dev/null +++ b/src/segmenter/Makefile @@ -0,0 +1,16 @@ +all: + +include ../kaldi.mk + +TESTFILES = segmentation-io-test + +OBJFILES = segment.o segmentation.o segmentation-utils.o \ + segmentation-post-processor.o + +LIBNAME = kaldi-segmenter + +ADDLIBS = ../gmm/kaldi-gmm.a \ + ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a ../thread/kaldi-thread.a + +include ../makefiles/default_rules.mk + diff --git a/src/segmenter/segment.cc b/src/segmenter/segment.cc new file mode 100644 index 00000000000..b4f485c26bc --- /dev/null +++ b/src/segmenter/segment.cc @@ -0,0 +1,35 @@ +#include "segmenter/segment.h" + +namespace kaldi { +namespace segmenter { + +void Segment::Write(std::ostream &os, bool binary) const { + if (binary) { + os.write(reinterpret_cast(&start_frame), sizeof(start_frame)); + os.write(reinterpret_cast(&end_frame), sizeof(start_frame)); + os.write(reinterpret_cast(&class_id), sizeof(class_id)); + } else { + WriteBasicType(os, binary, start_frame); + WriteBasicType(os, binary, end_frame); + WriteBasicType(os, binary, Label()); + } +} + +void Segment::Read(std::istream &is, bool binary) { + if (binary) { + is.read(reinterpret_cast(&start_frame), sizeof(start_frame)); + is.read(reinterpret_cast(&end_frame), sizeof(end_frame)); + is.read(reinterpret_cast(&class_id), sizeof(class_id)); + } else { + ReadBasicType(is, binary, &start_frame); + ReadBasicType(is, binary, &end_frame); + int32 label; + ReadBasicType(is, binary, &label); + SetLabel(label); + } + + KALDI_ASSERT(end_frame >= start_frame && start_frame >= 0); +} + +} // end namespace segmenter +} // end namespace kaldi diff --git a/src/segmenter/segment.h b/src/segmenter/segment.h new file mode 100644 index 00000000000..1657affc875 --- /dev/null +++ b/src/segmenter/segment.h @@ -0,0 +1,78 @@ +#ifndef KALDI_SEGMENTER_SEGMENT_H_ +#define KALDI_SEGMENTER_SEGMENT_H_ + +#include "base/kaldi-common.h" +#include "matrix/kaldi-matrix.h" + +namespace kaldi { +namespace segmenter { + +/** + * This structure defines a single segment. It consists of the following basic + * properties: + * 1) start_frame : This is the frame index of the first frame in the + * segment. + * 2) end_frame : This is the frame index of the last frame in the segment. + * Note that the end_frame is included in the segment. + * 3) class_id : This is the class corresponding to the segments. For e.g., + * could be 0, 1 or 2 depending on whether the segment is + * silence, speech or noise. In general, it can be any + * integer class label. +**/ + +struct Segment { + int32 start_frame; + int32 end_frame; + int32 class_id; + + // Accessors for labels or class id. This is useful in the future when + // we might change the type of label. + inline int32 Label() const { return class_id; } + inline void SetLabel(int32 label) { class_id = label; } + inline int32 Length() const { return end_frame - start_frame + 1; } + + // This is the default constructor that sets everything to undefined values. + Segment() : start_frame(-1), end_frame(-1), class_id(-1) { } + + // This constructor initializes the segmented with the provided start and end + // frames and the segment label. This is the main constructor. + Segment(int32 start, int32 end, int32 label) : + start_frame(start), end_frame(end), class_id(label) { } + + void Write(std::ostream &os, bool binary) const; + void Read(std::istream &is, bool binary); + + // This is a function that returns the size of the elements in the structure. + // It is used during I/O in binary mode, which checks for the total size + // required to store the segment. + static size_t SizeInBytes() { + return (sizeof(int32) + sizeof(int32) + sizeof(int32)); + } +}; + +/** + * Comparator to order segments based on start frame +**/ + +class SegmentComparator { + public: + bool operator() (const Segment &lhs, const Segment &rhs) const { + return lhs.start_frame < rhs.start_frame; + } +}; + +/** + * Comparator to order segments based on length +**/ + +class SegmentLengthComparator { + public: + bool operator() (const Segment &lhs, const Segment &rhs) const { + return lhs.Length() < rhs.Length(); + } +}; + +} // end namespace segmenter +} // end namespace kaldi + +#endif // KALDI_SEGMENTER_SEGMENT_H_ diff --git a/src/segmenter/segmentation-io-test.cc b/src/segmenter/segmentation-io-test.cc new file mode 100644 index 00000000000..f019a653a4a --- /dev/null +++ b/src/segmenter/segmentation-io-test.cc @@ -0,0 +1,63 @@ +// segmenter/segmentation-io-test.cc + +// Copyright 2015 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "segmenter/segmentation.h" + +namespace kaldi { +namespace segmenter { + +void UnitTestSegmentationIo() { + Segmentation seg; + int32 max_length = RandInt(0, 1000), + max_segment_length = max_length / 10, + num_classes = RandInt(0, 3); + + if (max_segment_length == 0) + max_segment_length = 1; + + seg.GenRandomSegmentation(max_length, max_segment_length, num_classes); + + bool binary = ( RandInt(0,1) == 0 ); + std::ostringstream os; + + seg.Write(os, binary); + + Segmentation seg2; + std::istringstream is(os.str()); + seg2.Read(is, binary); + + std::ostringstream os2; + seg2.Write(os2, binary); + + KALDI_ASSERT(os2.str() == os.str()); +} + +} // namespace segmenter +} // namespace kaldi + +int main() { + using namespace kaldi; + using namespace kaldi::segmenter; + + for (int32 i = 0; i < 100; i++) + UnitTestSegmentationIo(); + return 0; +} + + diff --git a/src/segmenter/segmentation-post-processor.cc b/src/segmenter/segmentation-post-processor.cc new file mode 100644 index 00000000000..2c97e31db56 --- /dev/null +++ b/src/segmenter/segmentation-post-processor.cc @@ -0,0 +1,198 @@ +// segmenter/segmentation-post-processor.h + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "segmenter/segmentation-utils.h" +#include "segmenter/segmentation-post-processor.h" + +namespace kaldi { +namespace segmenter { + +static inline bool IsMergingLabelsToBeDone( + const SegmentationPostProcessingOptions &opts) { + return (!opts.merge_labels_csl.empty() || opts.merge_dst_label != -1); +} + +static inline bool IsPaddingSegmentsToBeDone( + const SegmentationPostProcessingOptions &opts) { + return (opts.pad_label != -1 || opts.pad_length != -1); +} + +static inline bool IsShrinkingSegmentsToBeDone( + const SegmentationPostProcessingOptions &opts) { + return (opts.shrink_label != -1 || opts.shrink_length != -1); +} + +static inline bool IsBlendingShortSegmentsToBeDone( + const SegmentationPostProcessingOptions &opts) { + return (opts.blend_short_segments_class != -1 || opts.max_blend_length != -1); +} + +static inline bool IsRemovingSegmentsToBeDone( + const SegmentationPostProcessingOptions &opts) { + return (!opts.remove_labels_csl.empty()); +} + +static inline bool IsMergingAdjacentSegmentsToBeDone( + const SegmentationPostProcessingOptions &opts) { + return (opts.merge_adjacent_segments); +} + +static inline bool IsSplittingSegmentsToBeDone( + const SegmentationPostProcessingOptions &opts) { + return (opts.max_segment_length != -1); +} + + +SegmentationPostProcessor::SegmentationPostProcessor( + const SegmentationPostProcessingOptions &opts) : opts_(opts) { + if (!opts_.remove_labels_csl.empty()) { + if (!SplitStringToIntegers(opts_.remove_labels_csl, ":", + false, &remove_labels_)) { + KALDI_ERR << "Bad value for --remove-labels option: " + << opts_.remove_labels_csl; + } + std::sort(remove_labels_.begin(), remove_labels_.end()); + } + + if (!opts_.merge_labels_csl.empty()) { + if (!SplitStringToIntegers(opts_.merge_labels_csl, ":", + false, &merge_labels_)) { + KALDI_ERR << "Bad value for --merge-labels option: " + << opts_.merge_labels_csl; + } + std::sort(merge_labels_.begin(), merge_labels_.end()); + } + + Check(); +} + +void SegmentationPostProcessor::Check() const { + if (IsPaddingSegmentsToBeDone(opts_) && opts_.pad_label < 0) { + KALDI_ERR << "Invalid value " << opts_.pad_label << " for option " + << "--pad-label. It must be non-negative."; + } + + if (IsPaddingSegmentsToBeDone(opts_) && opts_.pad_length <= 0) { + KALDI_ERR << "Invalid value " << opts_.pad_length << " for option " + << "--pad-length. It must be positive."; + } + + if (IsShrinkingSegmentsToBeDone(opts_) && opts_.shrink_label < 0) { + KALDI_ERR << "Invalid value " << opts_.shrink_label << " for option " + << "--shrink-label. It must be non-negative."; + } + + if (IsShrinkingSegmentsToBeDone(opts_) && opts_.shrink_length <= 0) { + KALDI_ERR << "Invalid value " << opts_.shrink_length << " for option " + << "--shrink-length. It must be positive."; + } + + if (IsBlendingShortSegmentsToBeDone(opts_) && + opts_.blend_short_segments_class < 0) { + KALDI_ERR << "Invalid value " << opts_.blend_short_segments_class + << " for option " << "--blend-short-segments-class. " + << "It must be non-negative."; + } + + if (IsBlendingShortSegmentsToBeDone(opts_) && opts_.max_blend_length <= 0) { + KALDI_ERR << "Invalid value " << opts_.max_blend_length << " for option " + << "--max-blend-length. It must be positive."; + } + + if (IsRemovingSegmentsToBeDone(opts_) && remove_labels_[0] < 0) { + KALDI_ERR << "Invalid value " << opts_.remove_labels_csl + << " for option " << "--remove-labels. " + << "The labels must be non-negative."; + } + + if (IsMergingAdjacentSegmentsToBeDone(opts_) && + opts_.max_intersegment_length < 0) { + KALDI_ERR << "Invalid value " << opts_.max_intersegment_length + << " for option " + << "--max-intersegment-length. It must be non-negative."; + } + + if (IsSplittingSegmentsToBeDone(opts_) && opts_.max_segment_length <= 0) { + KALDI_ERR << "Invalid value " << opts_.max_segment_length + << " for option " + << "--max-segment-length. It must be positive."; + } + + if (opts_.post_process_label != -1 && opts_.post_process_label < 0) { + KALDI_ERR << "Invalid value " << opts_.post_process_label << " for option " + << "--post-process-label. It must be non-negative."; + } +} + +bool SegmentationPostProcessor::PostProcess(Segmentation *seg) const { + DoMergingLabels(seg); + DoPaddingSegments(seg); + DoShrinkingSegments(seg); + DoBlendingShortSegments(seg); + DoRemovingSegments(seg); + DoMergingAdjacentSegments(seg); + DoSplittingSegments(seg); + + return true; +} + +void SegmentationPostProcessor::DoMergingLabels(Segmentation *seg) const { + if (!IsMergingLabelsToBeDone(opts_)) return; + MergeLabels(merge_labels_, opts_.merge_dst_label, seg); +} + +void SegmentationPostProcessor::DoPaddingSegments(Segmentation *seg) const { + if (!IsPaddingSegmentsToBeDone(opts_)) return; + PadSegments(opts_.pad_label, opts_.pad_length, seg); +} + +void SegmentationPostProcessor::DoShrinkingSegments(Segmentation *seg) const { + if (!IsShrinkingSegmentsToBeDone(opts_)) return; + ShrinkSegments(opts_.shrink_label, opts_.shrink_length, seg); +} + +void SegmentationPostProcessor::DoBlendingShortSegments( + Segmentation *seg) const { + if (!IsBlendingShortSegmentsToBeDone(opts_)) return; + BlendShortSegmentsWithNeighbors(opts_.blend_short_segments_class, + opts_.max_blend_length, + opts_.max_intersegment_length, seg); +} + +void SegmentationPostProcessor::DoRemovingSegments(Segmentation *seg) const { + if (!IsRemovingSegmentsToBeDone(opts_)) return; + RemoveSegments(remove_labels_, seg); +} + +void SegmentationPostProcessor::DoMergingAdjacentSegments( + Segmentation *seg) const { + if (!IsMergingAdjacentSegmentsToBeDone(opts_)) return; + MergeAdjacentSegments(opts_.max_intersegment_length, seg); +} + +void SegmentationPostProcessor::DoSplittingSegments(Segmentation *seg) const { + if (!IsSplittingSegmentsToBeDone(opts_)) return; + SplitSegments(opts_.max_segment_length, + opts_.max_segment_length / 2, + opts_.overlap_length, + opts_.post_process_label, seg); +} + +} // end namespace segmenter +} // end namespace kaldi diff --git a/src/segmenter/segmentation-post-processor.h b/src/segmenter/segmentation-post-processor.h new file mode 100644 index 00000000000..01a23b93b1b --- /dev/null +++ b/src/segmenter/segmentation-post-processor.h @@ -0,0 +1,168 @@ +// segmenter/segmentation-post-processor.h + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_SEGMENTER_SEGMENTATION_POST_PROCESSOR_H_ +#define KALDI_SEGMENTER_SEGMENTATION_POST_PROCESSOR_H_ + +#include "base/kaldi-common.h" +#include "itf/options-itf.h" +#include "segmenter/segmentation.h" + +namespace kaldi { +namespace segmenter { + +/** + * Structure for some common options related to segmentation that would be used + * in multiple segmentation programs. Some of the operations include merging, + * filtering etc. +**/ + +struct SegmentationPostProcessingOptions { + std::string merge_labels_csl; + int32 merge_dst_label; + + int32 pad_label; + int32 pad_length; + + int32 shrink_label; + int32 shrink_length; + + int32 blend_short_segments_class; + int32 max_blend_length; + + std::string remove_labels_csl; + + bool merge_adjacent_segments; + int32 max_intersegment_length; + + int32 max_segment_length; + int32 overlap_length; + + int32 post_process_label; + + SegmentationPostProcessingOptions() : + merge_dst_label(-1), + pad_label(-1), pad_length(-1), + shrink_label(-1), shrink_length(-1), + blend_short_segments_class(-1), max_blend_length(-1), + merge_adjacent_segments(false), max_intersegment_length(0), + max_segment_length(-1), overlap_length(0), + post_process_label(-1) { } + + void Register(OptionsItf *opts) { + opts->Register("merge-labels", &merge_labels_csl, "Merge labels into a " + "single label defined by merge-dst-label. " + "The labels are specified as a colon-separated list. " + "Refer to the MergeLabels() code for details. " + "Used in conjunction with the option --merge-dst-label"); + opts->Register("merge-dst-label", &merge_dst_label, + "Merge labels specified by merge-labels into this label. " + "Refer to the MergeLabels() code for details. " + "Used in conjunction with the option --merge-labels."); + opts->Register("pad-label", &pad_label, + "Pad segments of this label by pad_length frames." + "Refer to the PadSegments() code for details. " + "Used in conjunction with the option --pad-length."); + opts->Register("pad-length", &pad_length, "Pad segments by this many " + "frames on either side. " + "Refer to the PadSegments() code for details. " + "Used in conjunction with the option --pad-label."); + opts->Register("shrink-label", &shrink_label, + "Shrink segments of this label by shrink_length frames. " + "Refer to the ShrinkSegments() code for details. " + "Used in conjunction with the option --shrink-length."); + opts->Register("shrink-length", &shrink_length, "Shrink segments by this " + "many frames on either side. " + "Refer to the ShrinkSegments() code for details. " + "Used in conjunction with the option --shrink-label."); + opts->Register("blend-short-segments-class", &blend_short_segments_class, + "The label for which the short segments are to be " + "blended with the neighboring segments that are less than " + "max_intersegment_length frames away. " + "Refer to BlendShortSegments() code for details. " + "Used in conjunction with the option --max-blend-length " + "and --max-intersegment-length."); + opts->Register("max-blend-length", &max_blend_length, + "The maximum length of segment in number of frames that " + "will be blended with the neighboring segments provided " + "they both have the same label. " + "Refer to BlendShortSegments() code for details. " + "Used in conjunction with the option " + "--blend-short-segments-class"); + opts->Register("remove-labels", &remove_labels_csl, + "Remove any segment whose label is contained in " + "remove_labels_csl. " + "Refer to the RemoveLabels() code for details."); + opts->Register("merge-adjacent-segments", &merge_adjacent_segments, + "Merge adjacent segments of the same label if they are " + "within max-intersegment-length distance. " + "Refer to the MergeAdjacentSegments() code for details. " + "Used in conjunction with the option " + "--max-intersegment-length\n"); + opts->Register("max-intersegment-length", &max_intersegment_length, + "The maximum intersegment length that is allowed for " + "two adjacent segments to be merged. " + "Refer to the MergeAdjacentSegments() code for details. " + "Used in conjunction with the option " + "--merge-adjacent-segments or " + "--blend-short-segments-class\n"); + opts->Register("max-segment-length", &max_segment_length, + "If segment is longer than this length, split it into " + "pieces with less than these many frames. " + "Refer to the SplitSegments() code for details. " + "Used in conjunction with the option --overlap-length."); + opts->Register("overlap-length", &overlap_length, + "When splitting segments longer than max-segment-length, " + "have the pieces overlap by these many frames. " + "Refer to the SplitSegments() code for details. " + "Used in conjunction with the option --max-segment-length."); + opts->Register("post-process-label", &post_process_label, + "Do post processing only on this label. This option is " + "applicable to only a few operations including " + "SplitSegments"); + } +}; + +class SegmentationPostProcessor { + public: + explicit SegmentationPostProcessor( + const SegmentationPostProcessingOptions &opts); + + bool PostProcess(Segmentation *seg) const; + + void DoMergingLabels(Segmentation *seg) const; + void DoPaddingSegments(Segmentation *seg) const; + void DoShrinkingSegments(Segmentation *seg) const; + void DoBlendingShortSegments(Segmentation *seg) const; + void DoRemovingSegments(Segmentation *seg) const; + void DoMergingAdjacentSegments(Segmentation *seg) const; + void DoSplittingSegments(Segmentation *seg) const; + + private: + const SegmentationPostProcessingOptions &opts_; + std::vector merge_labels_; + std::vector remove_labels_; + + void Check() const; +}; + +} // end namespace segmenter +} // end namespace kaldi + +#endif // KALDI_SEGMENTER_SEGMENTATION_POST_PROCESSOR_H_ diff --git a/src/segmenter/segmentation-test.cc b/src/segmenter/segmentation-test.cc new file mode 100644 index 00000000000..7654b23b119 --- /dev/null +++ b/src/segmenter/segmentation-test.cc @@ -0,0 +1,226 @@ +// segmenter/segmentation-test.cc + +// Copyright 2015 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "segmenter/segmentation.h" + +namespace kaldi { +namespace segmenter { + +void GenerateRandomSegmentation(int32 max_length, int32 num_classes, + Segmentation *segmentation) { + Clear(); + int32 s = max_length; + int32 e = max_length; + + while (s >= 0) { + int32 chunk_size = rand() % (max_length / 10); + s = e - chunk_size + 1; + int32 k = rand() % num_classes; + + if (k != 0) { + segmentation.Emplace(s, e, k); + } + e = s - 1; + } + Check(); +} + + +int32 GenerateRandomAlignment(int32 max_length, int32 num_classes, + std::vector *ali) { + int32 N = RandInt(1, max_length); + int32 C = RandInt(1, num_classes); + + ali->clear(); + + int32 len = 0; + while (len < N) { + int32 c = RandInt(0, C-1); + int32 n = std::min(RandInt(1, N), N - len); + ali->insert(ali->begin() + len, n, c); + len += n; + } + KALDI_ASSERT(ali->size() == N && len == N); + + int32 state = -1, num_segments = 0; + for (std::vector::const_iterator it = ali->begin(); + it != ali->end(); ++it) { + if (*it != state) num_segments++; + state = *it; + } + + return num_segments; +} + +void TestConversionToAlignment() { + std::vector ali; + int32 max_length = 1000, num_classes = 3; + int32 num_segments = GenerateRandomAlignment(max_length, num_classes, &ali); + + Segmentation seg; + KALDI_ASSERT(num_segments == seg.InsertFromAlignment(ali, 0)); + + std::vector out_ali; + { + seg.ConvertToAlignment(&out_ali); + KALDI_ASSERT(ali == out_ali); + } + + { + seg.ConvertToAlignment(&out_ali, num_classes, max_length * 2); + std::vector tmp_ali(out_ali.begin(), out_ali.begin() + ali.size()); + KALDI_ASSERT(ali == tmp_ali); + for (std::vector::const_iterator it = out_ali.begin() + ali.size(); + it != out_ali.end(); ++it) { + KALDI_ASSERT(*it == num_classes); + } + } + + seg.Clear(); + KALDI_ASSERT(num_segments == seg.InsertFromAlignment(ali, max_length)); + { + seg.ConvertToAlignment(&out_ali, num_classes, max_length * 2); + + for (std::vector::const_iterator it = out_ali.begin(); + it != out_ali.begin() + max_length; ++it) { + KALDI_ASSERT(*it == num_classes); + } + std::vector tmp_ali(out_ali.begin() + max_length, out_ali.begin() + max_length + ali.size()); + KALDI_ASSERT(tmp_ali == ali); + + for (std::vector::const_iterator it = out_ali.begin() + max_length + ali.size(); + it != out_ali.end(); ++it) { + KALDI_ASSERT(*it == num_classes); + } + } +} + +void TestRemoveSegments() { + std::vector ali; + int32 max_length = 1000, num_classes = 10; + int32 num_segments = GenerateRandomAlignment(max_length, num_classes, &ali); + + Segmentation seg; + KALDI_ASSERT(num_segments == seg.InsertFromAlignment(ali, 0)); + + for (int32 i = 0; i < num_classes; i++) { + Segmentation out_seg(seg); + out_seg.RemoveSegments(i); + std::vector out_ali; + out_seg.ConvertToAlignment(&out_ali, i, ali.size()); + KALDI_ASSERT(ali == out_ali); + } + + { + std::vector classes; + for (int32 i = 0; i < 3; i++) + classes.push_back(RandInt(0, num_classes - 1)); + std::sort(classes.begin(), classes.end()); + + Segmentation out_seg1(seg); + out_seg1.RemoveSegments(classes); + + Segmentation out_seg2(seg); + for (std::vector::const_iterator it = classes.begin(); + it != classes.end(); ++it) + out_seg2.RemoveSegments(*it); + + std::vector out_ali1, out_ali2; + out_seg1.ConvertToAlignment(&out_ali1); + out_seg2.ConvertToAlignment(&out_ali2); + + KALDI_ASSERT(out_ali1 == out_ali2); + } +} + +void TestIntersectSegments() { + int32 max_length = 100, num_classes = 3; + + std::vector primary_ali; + GenerateRandomAlignment(max_length, num_classes, &primary_ali); + + std::vector secondary_ali; + GenerateRandomAlignment(max_length, num_classes, &secondary_ali); + + Segmentation primary_seg; + primary_seg.InsertFromAlignment(primary_ali); + + Segmentation secondary_seg; + secondary_seg.InsertFromAlignment(secondary_ali); + + { + Segmentation out_seg; + primary_seg.IntersectSegments(secondary_seg, &out_seg, num_classes); + + std::vector out_ali; + out_seg.ConvertToAlignment(&out_ali); + + std::vector oracle_ali(primary_ali.size()); + + for (size_t i = 0; i < oracle_ali.size(); i++) { + int32 p = (i < primary_ali.size()) ? primary_ali[i] : -1; + int32 s = (i < secondary_ali.size()) ? secondary_ali[i] : -2; + + oracle_ali[i] = (p == s) ? p : num_classes; + } + + KALDI_ASSERT(oracle_ali == out_ali); + } + + { + Segmentation out_seg; + primary_seg.IntersectSegments(secondary_seg, &out_seg); + + std::vector out_ali; + out_seg.ConvertToAlignment(&out_ali, num_classes); + + std::vector oracle_ali(out_ali.size()); + + for (size_t i = 0; i < oracle_ali.size(); i++) { + int32 p = (i < primary_ali.size()) ? primary_ali[i] : -1; + int32 s = (i < secondary_ali.size()) ? secondary_ali[i] : -2; + + oracle_ali[i] = (p == s) ? p : num_classes; + } + + KALDI_ASSERT(oracle_ali == out_ali); + } + +} + +void UnitTestSegmentation() { + TestConversionToAlignment(); + TestRemoveSegments(); + TestIntersectSegments(); +} + +} // namespace segmenter +} // namespace kaldi + +int main() { + using namespace kaldi; + using namespace kaldi::segmenter; + + for (int32 i = 0; i < 10; i++) + UnitTestSegmentation(); + return 0; +} + + + diff --git a/src/segmenter/segmentation-utils.cc b/src/segmenter/segmentation-utils.cc new file mode 100644 index 00000000000..3adc178d66d --- /dev/null +++ b/src/segmenter/segmentation-utils.cc @@ -0,0 +1,743 @@ +// segmenter/segmentation-utils.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "segmenter/segmentation-utils.h" + +namespace kaldi { +namespace segmenter { + +void MergeLabels(const std::vector &merge_labels, + int32 dest_label, + Segmentation *segmentation) { + KALDI_ASSERT(segmentation); + + // Check if sorted and unique + KALDI_ASSERT(std::adjacent_find(merge_labels.begin(), + merge_labels.end(), std::greater()) + == merge_labels.end()); + + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ++it) { + if (std::binary_search(merge_labels.begin(), merge_labels.end(), + it->Label())) { + it->SetLabel(dest_label); + } + } +#ifdef KALDI_PARANOID + segmentation->Check(); +#endif +} + +void RelabelSegmentsUsingMap(const unordered_map &label_map, + Segmentation *segmentation) { + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ++it) { + unordered_map::const_iterator map_it = label_map.find( + it->Label()); + if (map_it == label_map.end()) + KALDI_ERR << "Could not find label " << it->Label() << " in label map."; + + it->SetLabel(map_it->second); + } +} + +void RelabelAllSegments(int32 label, Segmentation *segmentation) { + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ++it) + it->SetLabel(label); +} + +void ScaleFrameShift(BaseFloat factor, Segmentation *segmentation) { + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ++it) { + it->start_frame *= factor; + it->end_frame *= factor; + } +} + +void RemoveSegments(int32 label, Segmentation *segmentation) { + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ) { + if (it->Label() == label) { + it = segmentation->Erase(it); + } else { + ++it; + } + } +#ifdef KALDI_PARANOID + segmentation->Check(); +#endif +} + +void RemoveSegments(const std::vector &labels, + Segmentation *segmentation) { + // Check if sorted and unique + KALDI_ASSERT(std::adjacent_find(labels.begin(), + labels.end(), std::greater()) == labels.end()); + + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ) { + if (std::binary_search(labels.begin(), labels.end(), it->Label())) { + it = segmentation->Erase(it); + } else { + ++it; + } + } +#ifdef KALDI_PARANOID + segmentation->Check(); +#endif +} + +// Opposite of RemoveSegments() +void KeepSegments(int32 label, Segmentation *segmentation) { + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ) { + if (it->Label() != label) { + it = segmentation->Erase(it); + } else { + ++it; + } + } +#ifdef KALDI_PARANOID + segmentation->Check(); +#endif +} + +// TODO(Vimal): Write test function for this. +void SplitInputSegmentation(const Segmentation &in_segmentation, + int32 segment_length, + Segmentation *out_segmentation) { + out_segmentation->Clear(); + for (SegmentList::const_iterator it = in_segmentation.Begin(); + it != in_segmentation.End(); ++it) { + int32 length = it->Length(); + + // Since ceil is used, this results in all pieces to be smaller than + // segment_length rather than being larger. + int32 num_chunks = std::ceil(static_cast(length) + / segment_length); + int32 actual_segment_length = static_cast(length) / num_chunks; + + int32 start_frame = it->start_frame; + for (int32 j = 0; j < num_chunks; j++) { + int32 end_frame = std::min(start_frame + actual_segment_length - 1, + it->end_frame); + out_segmentation->EmplaceBack(start_frame, end_frame, it->Label()); + start_frame = end_frame + 1; + } + } +#ifdef KALDI_PARANOID + out_segmentation->Check(); +#endif +} + +// TODO(Vimal): Write test function for this. +void SplitSegments(int32 segment_length, int32 min_remainder, + int32 overlap_length, int32 segment_label, + Segmentation *segmentation) { + KALDI_ASSERT(segmentation); + KALDI_ASSERT(segment_length > 0 && min_remainder > 0); + KALDI_ASSERT(overlap_length >= 0); + + KALDI_ASSERT(overlap_length < segment_length); + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ++it) { + if (segment_label != -1 && it->Label() != segment_label) continue; + + int32 start_frame = it->start_frame; + int32 length = it->Length(); + + if (length > segment_length + min_remainder) { + // Split segment + // To show what this is doing, consider the following example, where it is + // currently pointing to B. + // A <--> B <--> C + + // Modify the start_frame of the current frame. This prepares the current + // segment to be used as the "next segment" when we move the iterator in + // the next statement. + // In the example, the start_frame for B has just been modified. + it->start_frame = start_frame + segment_length - overlap_length; + + // Create a new segment and add it to the where the current iterator is. + // The statement below results in this: + // A <--> B1 <--> B <--> C + // with the iterator it pointing at B1. So when the iterator is + // incremented in the for loop, it will point to B again, but whose + // start_frame had been modified. + it = segmentation->Emplace(it, start_frame, + start_frame + segment_length - 1, + it->Label()); + } + } +#ifdef KALDI_PARANOID + segmentation->Check(); +#endif +} + +// TODO(Vimal): Write test code for this +void SplitSegmentsUsingAlignment(int32 segment_length, + int32 segment_label, + const std::vector &ali, + int32 ali_label, + int32 min_silence_length, + Segmentation *segmentation) { + KALDI_ASSERT(segmentation); + KALDI_ASSERT(segment_length > 0); + + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End();) { + // Safety check. In practice, should never fail. + KALDI_ASSERT(segmentation->Dim() <= ali.size()); + + if (segment_label != -1 && it->Label() != segment_label) { + ++it; + continue; + } + + int32 start_frame = it->start_frame; + int32 length = it->Length(); + int32 label = it->Label(); + + if (length <= segment_length) { + ++it; + continue; + } + + // Split segment + // To show what this is doing, consider the following example, where it is + // currently pointing to B. + // A <--> B <--> C + + Segmentation ali_segmentation; + InsertFromAlignment(ali, start_frame, + start_frame + length, + 0, &ali_segmentation, NULL); + KeepSegments(ali_label, &ali_segmentation); + MergeAdjacentSegments(0, &ali_segmentation); + + // Get largest alignment chunk where label == ali_label + SegmentList::iterator s_it = ali_segmentation.MaxElement(); + + if (s_it == ali_segmentation.End() || s_it->Length() < min_silence_length) { + ++it; + continue; + } + + KALDI_ASSERT(s_it->start_frame >= start_frame); + KALDI_ASSERT(s_it->end_frame <= start_frame + length); + + // Modify the start_frame of the current frame. This prepares the current + // segment to be used as the "next segment" when we move the iterator in + // the next statement. + // In the example, the start_frame for B has just been modified. + int32 end_frame; + if (s_it->Length() > 1) { + end_frame = s_it->start_frame + s_it->Length() / 2 - 2; + it->start_frame = end_frame + 2; + } else { + end_frame = s_it->start_frame - 1; + it->start_frame = s_it->end_frame + 1; + } + + // end_frame is within this current segment + KALDI_ASSERT(end_frame < start_frame + length); + // The first new segment length is smaller than the old segment length + KALDI_ASSERT(end_frame - start_frame + 1 < length); + + // The second new segment length is smaller than the old segment length + KALDI_ASSERT(it->end_frame - end_frame - 1 < length); + + if (it->Length() < 0) { + // This is possible when the beginning of the segment is silence + it = segmentation->Erase(it); + } + + // Create a new segment and add it to the where the current iterator is. + // The statement below results in this: + // A <--> B1 <--> B <--> C + // with the iterator it pointing at B1. + if (end_frame >= start_frame) { + it = segmentation->Emplace(it, start_frame, end_frame, label); + } + } +#ifdef KALDI_PARANOID + segmentation->Check(); +#endif +} + +// TODO(Vimal): Write test code for this +void IntersectSegmentationAndAlignment(const Segmentation &in_segmentation, + const std::vector &alignment, + int32 ali_label, + int32 min_align_chunk_length, + Segmentation *out_segmentation) { + KALDI_ASSERT(out_segmentation); + + for (SegmentList::const_iterator it = in_segmentation.Begin(); + it != in_segmentation.End(); ++it) { + Segmentation filter_segmentation; + InsertFromAlignment(alignment, it->start_frame, + std::min(it->end_frame + 1, + static_cast(alignment.size())), + 0, &filter_segmentation, NULL); + + for (SegmentList::const_iterator f_it = filter_segmentation.Begin(); + f_it != filter_segmentation.End(); ++f_it) { + if (f_it->Length() < min_align_chunk_length) continue; + if (ali_label != -1 && f_it->Label() != ali_label) continue; + out_segmentation->EmplaceBack(f_it->start_frame, f_it->end_frame, + it->Label()); + } + } +} + +void SubSegmentUsingNonOverlappingSegments( + const Segmentation &primary_segmentation, + const Segmentation &secondary_segmentation, int32 secondary_label, + int32 subsegment_label, int32 unmatched_label, + Segmentation *out_segmentation) { + KALDI_ASSERT(out_segmentation); + KALDI_ASSERT(secondary_segmentation.Dim() > 0); + + std::vector alignment; + ConvertToAlignment(secondary_segmentation, -1, -1, 0, &alignment); + + for (SegmentList::const_iterator it = primary_segmentation.Begin(); + it != primary_segmentation.End(); ++it) { + if (it->end_frame >= alignment.size()) { + alignment.resize(it->end_frame + 1, -1); + } + Segmentation filter_segmentation; + InsertFromAlignment(alignment, it->start_frame, it->end_frame + 1, + 0, &filter_segmentation, NULL); + + for (SegmentList::const_iterator f_it = filter_segmentation.Begin(); + f_it != filter_segmentation.End(); ++f_it) { + int32 label = (unmatched_label > 0 ? unmatched_label : it->Label()); + if (f_it->Label() == secondary_label) { + if (subsegment_label >= 0) { + label = subsegment_label; + } else { + label = f_it->Label(); + } + } + out_segmentation->EmplaceBack(f_it->start_frame, f_it->end_frame, + label); + } + } +} + +// TODO(Vimal): Write test code for this +void MergeAdjacentSegments(int32 max_intersegment_length, + Segmentation *segmentation) { + SegmentList::iterator it = segmentation->Begin(), + prev_it = segmentation->Begin(); + + while (it != segmentation->End()) { + KALDI_ASSERT(it->start_frame >= prev_it->start_frame); + + if (it != segmentation->Begin() && + it->Label() == prev_it->Label() && + prev_it->end_frame + max_intersegment_length + 1 >= it->start_frame) { + // merge segments + if (prev_it->end_frame < it->end_frame) { + // If the previous segment end before the current segment, then + // extend the previous segment to the end_frame of the current + // segment and remove the current segment. + prev_it->end_frame = it->end_frame; + } // else simply remove the current segment. + it = segmentation->Erase(it); + } else { + // no merging of segments + prev_it = it; + ++it; + } + } + +#ifdef KALDI_PARANOID + segmentation->Check(); +#endif +} + +void PadSegments(int32 label, int32 length, Segmentation *segmentation) { + KALDI_ASSERT(segmentation); + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ++it) { + if (it->Label() != label) continue; + + it->start_frame -= length; + it->end_frame += length; + + if (it->start_frame < 0) it->start_frame = 0; + } +} + +void WidenSegments(int32 label, int32 length, Segmentation *segmentation) { + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ++it) { + if (it->Label() == label) { + if (it != segmentation->Begin()) { + // it is not the beginning of the segmentation, so we can widen it on + // the start_frame side + SegmentList::iterator prev_it = it; + --prev_it; + it->start_frame -= length; + if (prev_it->Label() == label && it->start_frame < prev_it->end_frame) { + // After widening this segment, it overlaps the previous segment that + // also has the same class_id. Then turn this segment into a composite + // one + it->start_frame = prev_it->start_frame; + // and remove the previous segment from the list. + segmentation->Erase(prev_it); + } else if (prev_it->Label() != label && + it->start_frame < prev_it->end_frame) { + // Previous segment is not the same class_id, so we cannot turn this + // into a composite segment. + if (it->start_frame <= prev_it->start_frame) { + // The extended segment absorbs the previous segment into it + // So remove the previous segment + segmentation->Erase(prev_it); + } else { + // The extended segment reduces the length of the previous + // segment. But does not completely overlap it. + prev_it->end_frame -= length; + if (prev_it->end_frame < prev_it->start_frame) + segmentation->Erase(prev_it); + } + } + if (it->start_frame < 0) it->start_frame = 0; + } else { + it->start_frame -= length; + if (it->start_frame < 0) it->start_frame = 0; + } + + SegmentList::iterator next_it = it; + ++next_it; + + if (next_it != segmentation->End()) + // We do not know the length of the file. + // So we don't want to extend the last one. + it->end_frame += length; // Line (1) + } else { // if (it->Label() != label) + if (it != segmentation->Begin()) { + SegmentList::iterator prev_it = it; + --prev_it; + if (prev_it->end_frame >= it->end_frame) { + // The extended previous segment in Line (1) completely + // overlaps the current segment. So remove the current segment. + it = segmentation->Erase(it); + // So that we can increment in the for loop + --it; // TODO(Vimal): This is buggy. + } else if (prev_it->end_frame >= it->start_frame) { + // The extended previous segment in Line (1) reduces the length of + // this segment. + it->start_frame = prev_it->end_frame + 1; + } + } + } + } +} + +void ShrinkSegments(int32 label, int32 length, Segmentation *segmentation) { + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ) { + if (it->Label() == label) { + if (it->Length() <= 2 * length) { + it = segmentation->Erase(it); + } else { + it->start_frame += length; + it->end_frame -= length; + ++it; + } + } else { + ++it; + } + } + +#ifdef KALDI_PARANOID + segmentation->Check(); +#endif +} + +void BlendShortSegmentsWithNeighbors(int32 label, int32 max_length, + int32 max_intersegment_length, + Segmentation *segmentation) { + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ) { + if (it == segmentation->Begin()) { + // Can't blend the first segment + ++it; + continue; + } + + SegmentList::iterator next_it = it; + ++next_it; + + if (next_it == segmentation->End()) // End of segmentation + break; + + SegmentList::iterator prev_it = it; + --prev_it; + + // If the previous and current segments have different labels, + // then ensure that they are not overlapping + KALDI_ASSERT(it->start_frame >= prev_it->start_frame && + (prev_it->Label() == it->Label() || + prev_it->end_frame < it->start_frame)); + + KALDI_ASSERT(next_it->start_frame >= it->start_frame && + (it->Label() == next_it->Label() || + it->end_frame < next_it->start_frame)); + + if (next_it->Label() != prev_it->Label() || it->Label() != label || + it->Length() >= max_length || + next_it->start_frame - it->end_frame - 1 > max_intersegment_length || + it->start_frame - prev_it->end_frame - 1 > max_intersegment_length) { + ++it; + continue; + } + + prev_it->end_frame = next_it->end_frame; + segmentation->Erase(it); + it = segmentation->Erase(next_it); + } +#ifdef KALDI_PARANOID + segmentation->Check(); +#endif +} + +bool ConvertToAlignment(const Segmentation &segmentation, + int32 default_label, int32 length, + int32 tolerance, + std::vector *alignment) { + KALDI_ASSERT(alignment); + alignment->clear(); + + if (length != -1) { + KALDI_ASSERT(length >= 0); + alignment->resize(length, default_label); + } + + SegmentList::const_iterator it = segmentation.Begin(); + for (; it != segmentation.End(); ++it) { + if (length != -1 && it->end_frame >= length + tolerance) { + KALDI_WARN << "End frame (" << it->end_frame << ") " + << ">= length (" << length + << ") + tolerance (" << tolerance << ")." + << "Conversion failed."; + return false; + } + + int32 end_frame = it->end_frame; + if (length == -1) { + alignment->resize(it->end_frame + 1, default_label); + } else { + if (it->end_frame >= length) + end_frame = length - 1; + } + + KALDI_ASSERT(end_frame < alignment->size()); + for (int32 i = it->start_frame; i <= end_frame; i++) { + (*alignment)[i] = it->Label(); + } + } + return true; +} + +int32 InsertFromAlignment(const std::vector &alignment, + int32 start, int32 end, + int32 start_time_offset, + Segmentation *segmentation, + std::vector *frame_counts_per_class) { + KALDI_ASSERT(segmentation); + + if (end <= start) return 0; // nothing to insert + + // Correct boundaries + if (end > alignment.size()) end = alignment.size(); + if (start < 0) start = 0; + + KALDI_ASSERT(end > start); // This is possible if end was originally + // greater than alignment.size(). + // The user must resize alignment appropriately + // before passing to this function. + + int32 num_segments = 0; + int32 state = -100, start_frame = -1; + for (int32 i = start; i < end; i++) { + KALDI_ASSERT(alignment[i] >= -1); + if (alignment[i] != state) { + // Change of state i.e. a different class id. + // So the previous segment has ended. + if (start_frame != -1) { + // start_frame == -1 in the beginning of the alignment. That is just + // initialization step and hence no creation of segment. + segmentation->EmplaceBack(start_frame + start_time_offset, + i-1 + start_time_offset, state); + num_segments++; + + if (frame_counts_per_class && state > 0) { + if (frame_counts_per_class->size() <= state) { + frame_counts_per_class->resize(state + 1, 0); + } + (*frame_counts_per_class)[state] += i - start_frame; + } + } + start_frame = i; + state = alignment[i]; + } + } + + KALDI_ASSERT(state >= -1 && start_frame >= 0 && start_frame < end); + segmentation->EmplaceBack(start_frame + start_time_offset, + end-1 + start_time_offset, state); + num_segments++; + if (frame_counts_per_class && state > 0) { + if (frame_counts_per_class->size() <= state) { + frame_counts_per_class->resize(state + 1, 0); + } + (*frame_counts_per_class)[state] += end - start_frame; + } + +#ifdef KALDI_PARANOID + segmentation->Check(); +#endif + + return num_segments; +} + +int32 InsertFromSegmentation( + const Segmentation &in_segmentation, int32 start_time_offset, + bool sort, + Segmentation *out_segmentation, + std::vector *frame_counts_per_class) { + KALDI_ASSERT(out_segmentation); + + if (in_segmentation.Dim() == 0) return 0; // nothing to insert + + int32 num_segments = 0; + + for (SegmentList::const_iterator it = in_segmentation.Begin(); + it != in_segmentation.End(); ++it) { + out_segmentation->EmplaceBack(it->start_frame + start_time_offset, + it->end_frame + start_time_offset, + it->Label()); + num_segments++; + if (frame_counts_per_class) { + if (frame_counts_per_class->size() <= it->Label()) { + frame_counts_per_class->resize(it->Label() + 1, 0); + } + (*frame_counts_per_class)[it->Label()] += it->Length(); + } + } + + if (sort) out_segmentation->Sort(); + +#ifdef KALDI_PARANOID + out_segmentation->Check(); +#endif + + return num_segments; +} + +void ExtendSegmentation(const Segmentation &in_segmentation, + bool sort, + Segmentation *segmentation) { + InsertFromSegmentation(in_segmentation, 0, sort, segmentation, NULL); +} + +bool GetClassCountsPerFrame( + const Segmentation &segmentation, + int32 length, int32 tolerance, + std::vector > *class_counts_per_frame) { + KALDI_ASSERT(class_counts_per_frame); + + if (length != -1) { + KALDI_ASSERT(length >= 0); + class_counts_per_frame->resize(length, std::map()); + } + + SegmentList::const_iterator it = segmentation.Begin(); + for (; it != segmentation.End(); ++it) { + if (length != -1 && it->end_frame >= length + tolerance) { + KALDI_WARN << "End frame (" << it->end_frame << ") " + << ">= length + tolerance (" << length + tolerance << ")." + << "Conversion failed."; + return false; + } + + int32 end_frame = it->end_frame; + if (length == -1) { + class_counts_per_frame->resize(it->end_frame + 1, + std::map()); + } else { + if (it->end_frame >= length) + end_frame = length - 1; + } + + KALDI_ASSERT(end_frame < class_counts_per_frame->size()); + for (int32 i = it->start_frame; i <= end_frame; i++) { + std::map &this_class_counts = (*class_counts_per_frame)[i]; + std::map::iterator c_it = this_class_counts.lower_bound( + it->Label()); + if (c_it == this_class_counts.end() || it->Label() < c_it->first) { + this_class_counts.insert(c_it, std::make_pair(it->Label(), 1)); + } else { + c_it->second++; + } + } + } + + return true; +} + +bool IsNonOverlapping(const Segmentation &segmentation) { + std::vector vec; + for (SegmentList::const_iterator it = segmentation.Begin(); + it != segmentation.End(); ++it) { + vec.resize(it->end_frame + 1, false); + for (int32 i = it->start_frame; i <= it->end_frame; i++) { + if (vec[i]) return false; + vec[i] = true; + } + } + return true; +} + +void Sort(Segmentation *segmentation) { + segmentation->Sort(); +} + +void TruncateToLength(int32 length, Segmentation *segmentation) { + for (SegmentList::iterator it = segmentation->Begin(); + it != segmentation->End(); ) { + if (it->start_frame >= length) { + it = segmentation->Erase(it); + continue; + } + + if (it->end_frame >= length) + it->end_frame = length - 1; + ++it; + } +} + +} // end namespace segmenter +} // end namespace kaldi diff --git a/src/segmenter/segmentation-utils.h b/src/segmenter/segmentation-utils.h new file mode 100644 index 00000000000..9401722ccb7 --- /dev/null +++ b/src/segmenter/segmentation-utils.h @@ -0,0 +1,337 @@ +// segmenter/segmentation-utils.h + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_SEGMENTER_SEGMENTATION_UTILS_H_ +#define KALDI_SEGMENTER_SEGMENTATION_UTILS_H_ + +#include "segmenter/segmentation.h" + +namespace kaldi { +namespace segmenter { + +/** + * This function is very straight forward. It just merges the labels in + * merge_labels to the class-id dest_label. This means any segment that + * originally had the class-id as any of the labels in merge_labels would end + * up having the class-id dest_label. + **/ +void MergeLabels(const std::vector &merge_labels, + int32 dest_label, Segmentation *segmentation); + +// Relabel segments using a map from old to new label. +// If segment label is not found in the map, the function exits with +// an error. +void RelabelSegmentsUsingMap(const unordered_map &label_map, + Segmentation *segmentation); + +// Relabel all segments to class-id label +void RelabelAllSegments(int32 label, Segmentation *segmentation); + +// Scale frame shift by this factor. +// Usually frame length is 0.01 and frame shift 0.015. But sometimes +// the alignments are obtained using a subsampling factor of 3. This +// function can be used to maintain consistency among different +// alignments and segmentations. +void ScaleFrameShift(BaseFloat factor, Segmentation *segmentation); + +/** + * This is very straight forward. It removes all segments of label "label" +**/ +void RemoveSegments(int32 label, Segmentation *segmentation); + +/** + * This is very straight forward. It removes any segment whose label is + * contained in the vector "labels" +**/ +void RemoveSegments(const std::vector &labels, + Segmentation *segmentation); + +// Keep only segments of label "label" +void KeepSegments(int32 label, Segmentation *segmentation); + +/** + * This function splits an input segmentation in_segmentation into pieces of + * approximately segment_length. Each piece is given the same class id as the + * original segment. + * + * The way this function is written is that it first figures out the number of + * pieces that the segment must be broken into. Then it creates that many pieces + * of equal size (actual_segment_length). This mimics some of the approaches + * used at script level +**/ +void SplitInputSegmentation(const Segmentation &in_segmentation, + int32 segment_length, + Segmentation *out_segmentation); + +/** + * This function splits the segments in the the segmentation + * into pieces of segment_length. + * But if the last remaining piece is smaller than min_remainder, then the last + * piece is merged to the piece before it, resulting in a piece that is of + * length < segment_length + min_remainder. + * If overlap_length > 0, then the created pieces overlap by these many frames. + * If segment_label == -1, then all segments are split. + * Otherwise, only the segments with this label are split. + * + * The way this function works it is it looks at the current segment length and + * checks if it is larger than segment_length + min_remainder. If it is larger, + * then it must be split. To do this, it first modifies the start_frame of + * the current frame to start_frame + segment_length - overlap. + * It then creates a new segment of length segment_length from the original + * start_frame to start_frame + segment_length - 1 and adds it just before the + * current segment. So in the next iteration, we would actually be back to the + * same segment, but whose start_frame had just been modified. +**/ +void SplitSegments(int32 segment_length, + int32 min_remainder, int32 overlap_length, + int32 segment_label, + Segmentation *segmentation); + +/** + * Split this segmentation into pieces of size segment_length, + * but only if possible by creating split points at the + * middle of the chunk where alignment == ali_label and + * the chunk is at least min_segment_length frames long + * + * min_remainder, segment_label serve the same purpose as in the + * above SplitSegments function. +**/ +void SplitSegmentsUsingAlignment(int32 segment_length, + int32 segment_label, + const std::vector &alignment, + int32 alignment_label, + int32 min_align_chunk_length, + Segmentation *segmentation); + +/** + * This function is a standard intersection of the set of times represented by + * the segmentation in_segmentation and the set of times of where + * alignment contains ali_label for at least min_align_chunk_length + * consecutive frames +**/ +void IntersectSegmentationAndAlignment(const Segmentation &in_segmentation, + const std::vector &alignment, + int32 ali_label, + int32 min_align_chunk_length, + Segmentation *out_segmentation); + +/** + * This function is a little complicated in what it does. But this is required + * for one of the applications. + * This function creates a new segmentation by sub-segmenting an arbitrary + * "primary_segmentation" and assign new label "subsegment_label" to regions + * where the "primary_segmentation" intersects the non-overlapping + * "secondary_segmentation" segments with label "secondary_label". + * This is similar to the function "IntersectSegments", but instead of keeping + * only the filtered subsegments, all the subsegments are kept, while only + * changing the class_id of the filtered sub-segments. + * The label for the newly created subsegments is determined as follows: + * if secondary segment's label == secondary_label: + * if subsegment_label > 0: + * label = subsegment_label + * else: + * label = secondary_label + * else: + * if unmatched_label > 0: + * label = unmatched_label + * else: + * label = primary_label +**/ +void SubSegmentUsingNonOverlappingSegments( + const Segmentation &primary_segmentation, + const Segmentation &secondary_segmentation, int32 secondary_label, + int32 subsegment_label, int32 unmatched_label, + Segmentation *out_segmentation); + +/** + * This function is used to merge segments next to each other in the SegmentList + * and within a distance of max_intersegment_length frames from each other, + * provided the segments are of the same label. + * This function requires the segmentation to be sorted before passing it. + **/ +void MergeAdjacentSegments(int32 max_intersegment_length, + Segmentation *segmentation); + +/** + * This function is used to pad segments of label "label" by "length" + * frames on either side of the segment. + * This is useful to pad segments of speech. +**/ +void PadSegments(int32 label, int32 length, Segmentation *segmentation); + +/** + * This function is used to widen segments of label "label" by "length" + * frames on either side of the segment. + * This is similar to PadSegments, but while widening, it also reduces the + * length of the segment adjacent to it. + * This may not be required in some applications, but it is ok for speech / + * silence. By this process, we are calling frames within a "length" number of + * frames near the speech segment as speech and hence we reduce the width of the + * silence segment before it. +**/ +void WidenSegments(int32 label, int32 length, Segmentation *segmentation); + +/** + * This function is used to shrink segments of class_id "label" by "length" + * frames on either side of the segment. + * If the whole segment is smaller than 2*length, then the segment is + * removed entirely. +**/ +void ShrinkSegments(int32 label, int32 length, Segmentation *segmentation); + +/** + * This function blends segments of label "label" that are shorter than + * "max_length" frames, provided the segments before and after it are of the + * same label "other_label" and the distance to the neighbor is less than + * "max_intersegment_distance". + * After blending, the three segments have the same label "other_label" and + * hence can be merged into a composite segment. + * An example where this is useful is when there is a short segment of silence + * with speech segments on either sides. Then the short segment of silence is + * removed and called speech instead. The three continguous segments of speech + * are merged into a single composite segment. +**/ +void BlendShortSegmentsWithNeighbors(int32 label, int32 max_length, + int32 max_intersegment_distance, + Segmentation *segmentation); + +/** + * This function is used to convert the segmentation into frame-level alignment + * with the label for each frame begin the class_id of segment the frame belongs + * to. + * The arguments are used to provided extended functionality that are required + * for most cases. + * default_label : the label that is used as filler in regions where the frame + * is not in any of the segments. In most applications, certain + * segments are removed, such as the ones that are silence. Then + * the segments would not span the entire duration of the file. + * e.g. + * 10 35 1 + * 41 190 2 + * ... + * Here there is no segment from 36-40. These frames are + * filled with default_label. + * length : the number of frames required in the alignment. + * If set to -1, then this length is ignored. + * In most applications, the length of the alignment required is + * known. Usually it must match the length of the features + * (obtained using feat-to-len). Then the alignment is resized + * to this length and filled with default_label. The segments + * are then read and the frames corresponding to the segments + * are relabeled with the class_id of the respective segments. + * tolerance : the tolerance in number of frames that we allow for the + * frame index corresponding to the end_frame of the last + * segment. Applicable when length != -1. + * Since, we use 25 ms widows with 10 ms frame shift, + * it is possible that the features length is 2 frames less than + * the end of the last segment. So the user can set the + * tolerance to 2 in order to avoid returning with error in this + * function. + * Function returns true is successful. +**/ +bool ConvertToAlignment(const Segmentation &segmentation, + int32 default_label, int32 length, + int32 tolerance, + std::vector *alignment); + +/** + * Insert segments created from alignment starting from frame index "start" + * until and excluding frame index "end". + * The inserted segments are shifted by "start_time_offset". + * "start_time_offset" is useful when the "alignment" is per-utterance, in which + * case the start time of the utterance can be provided as the + * "start_time_offset" + * The function returns the number of segments created. + * If "frame_counts_per_class" is provided, then the number of frames per class + * is accumulated there. +**/ +int32 InsertFromAlignment(const std::vector &alignment, + int32 start, int32 end, + int32 start_time_offset, + Segmentation *segmentation, + std::vector *frame_counts_per_class = NULL); + +/** + * Insert segments from in_segmentation, but shift them by + * start_time offset. + * If sort is true, then the final segmentation is sorted. + * It is useful in some applications to set sort to false. + * Returns number of segments inserted. +**/ +int32 InsertFromSegmentation(const Segmentation &in_segmentation, + int32 start_time_offset, bool sort, + Segmentation *segmentation, + std::vector *frame_counts_per_class = NULL); + +/** + * Extend a segmentation by adding another one. + * If "sort" is set to true, then resultant segmentation would be sorted. + * If its known that the other segmentation must all be after this segmentation, + * then the user may set "sort" false. +**/ +void ExtendSegmentation(const Segmentation &in_segmentation, bool sort, + Segmentation *segmentation); + +/** + * This function is used to get per-frame count of number of classes. + * The output is in the format of a vector of maps. + * class_counts_per_frame: A pointer to a vector of maps use to get the output. + * The size of the vector is the number of frames. + * For each frame, there is a map from the "class_id" + * to the number of segments where the label the + * corresponding "class_id". + * The size of the map gives the number of unique + * labels in this frame e.g. number of speakers. + * The count for each "class_id" is the number + * of segments with that "class_id" at that frame. + * length : the number of frames required in the output. + * In most applications, this length is known. + * Usually it must match the length of the features (obtained + * using feat-to-len). Then the output is resized to this + * length. The map is empty for frames where no segments are + * seen. + * tolerance : the tolerance in number of frames that we allow for the + * frame index corresponding to the end_frame of the last + * segment. Since, we use 25 ms widows with 10 ms frame shift, + * it is possible that the features length is 2 frames less than + * the end of the last segment. So the user can set the + * tolerance to 2 in order to avoid returning an error in this + * function. + * Function returns true is successful. +**/ +bool GetClassCountsPerFrame( + const Segmentation &segmentation, + int32 length, int32 tolerance, + std::vector > *class_counts_per_frame); + +// Checks if segmentation is non-overlapping +bool IsNonOverlapping(const Segmentation &segmentation); + +// Sorts segments on start frame. +void Sort(Segmentation *segmentation); + +// Truncate segmentation to "length". +// Removes any segments with "start_time" >= "length" +// and truncates any segments with "end_time" >= "length" +void TruncateToLength(int32 length, Segmentation *segmentation); + +} // end namespace segmenter +} // end namespace kaldi + +#endif // KALDI_SEGMENTER_SEGMENTATION_UTILS_H_ diff --git a/src/segmenter/segmentation.cc b/src/segmenter/segmentation.cc new file mode 100644 index 00000000000..fb83ed5476b --- /dev/null +++ b/src/segmenter/segmentation.cc @@ -0,0 +1,201 @@ +// segmenter/segmentation.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "segmenter/segmentation.h" +#include + +namespace kaldi { +namespace segmenter { + +void Segmentation::PushBack(const Segment &seg) { + dim_++; + segments_.push_back(seg); +} + +SegmentList::iterator Segmentation::Insert(SegmentList::iterator it, + const Segment &seg) { + dim_++; + return segments_.insert(it, seg); +} + +void Segmentation::EmplaceBack(int32 start_frame, int32 end_frame, + int32 class_id) { + dim_++; + Segment seg(start_frame, end_frame, class_id); + segments_.push_back(seg); +} + +SegmentList::iterator Segmentation::Emplace(SegmentList::iterator it, + int32 start_frame, int32 end_frame, + int32 class_id) { + dim_++; + Segment seg(start_frame, end_frame, class_id); + return segments_.insert(it, seg); +} + +SegmentList::iterator Segmentation::Erase(SegmentList::iterator it) { + dim_--; + return segments_.erase(it); +} + +void Segmentation::Clear() { + segments_.clear(); + dim_ = 0; +} + +void Segmentation::Read(std::istream &is, bool binary) { + Clear(); + + if (binary) { + int32 sz = is.peek(); + if (sz == Segment::SizeInBytes()) { + is.get(); + } else { + KALDI_ERR << "Segmentation::Read: expected to see Segment of size " + << Segment::SizeInBytes() << ", saw instead " << sz + << ", at file position " << is.tellg(); + } + + int32 segmentssz; + is.read(reinterpret_cast(&segmentssz), sizeof(segmentssz)); + if (is.fail() || segmentssz < 0) + KALDI_ERR << "Segmentation::Read: read failure at file position " + << is.tellg(); + + for (int32 i = 0; i < segmentssz; i++) { + Segment seg; + seg.Read(is, binary); + segments_.push_back(seg); + } + dim_ = segmentssz; + } else { + if (int c = is.peek() != static_cast('[')) { + KALDI_ERR << "Segmentation::Read: expected to see [, saw " + << static_cast(c) << ", at file position " << is.tellg(); + } + is.get(); // consume the '[' + is >> std::ws; + while (is.peek() != static_cast(']')) { + KALDI_ASSERT(!is.eof()); + Segment seg; + seg.Read(is, binary); + segments_.push_back(seg); + dim_++; + is >> std::ws; + } + is.get(); + KALDI_ASSERT(!is.eof()); + } +#ifdef KALDI_PARANOID + Check(); +#endif +} + +void Segmentation::Write(std::ostream &os, bool binary) const { +#ifdef KALDI_PARANOID + Check(); +#endif + + SegmentList::const_iterator it = Begin(); + if (binary) { + char sz = Segment::SizeInBytes(); + os.write(&sz, 1); + + int32 segmentssz = static_cast(Dim()); + KALDI_ASSERT((size_t)segmentssz == Dim()); + + os.write(reinterpret_cast(&segmentssz), sizeof(segmentssz)); + + for (; it != End(); ++it) { + it->Write(os, binary); + } + } else { + os << "[ "; + for (; it != End(); ++it) { + it->Write(os, binary); + os << std::endl; + } + os << "]" << std::endl; + } +} + +void Segmentation::Check() const { + int32 dim = 0; + for (SegmentList::const_iterator it = Begin(); it != End(); ++it, dim++) { + KALDI_ASSERT(it->start_frame >= 0); + KALDI_ASSERT(it->end_frame >= 0); + KALDI_ASSERT(it->Label() >= 0); + } + KALDI_ASSERT(dim == dim_); +} + +void Segmentation::Sort() { + segments_.sort(SegmentComparator()); +} + +void Segmentation::SortByLength() { + segments_.sort(SegmentLengthComparator()); +} + +SegmentList::iterator Segmentation::MinElement() { + return std::min_element(segments_.begin(), segments_.end(), + SegmentLengthComparator()); +} + +SegmentList::iterator Segmentation::MaxElement() { + return std::max_element(segments_.begin(), segments_.end(), + SegmentLengthComparator()); +} + +Segmentation::Segmentation() { + Clear(); +} + + +void Segmentation::GenRandomSegmentation(int32 max_length, + int32 max_segment_length, + int32 num_classes) { + Clear(); + int32 st = 0; + int32 end = 0; + + while (st > max_length) { + int32 segment_length = RandInt(0, max_segment_length); + + end = st + segment_length - 1; + + // Choose random class id + int32 k = RandInt(-1, num_classes - 1); + + if (k >= 0) { + Segment seg(st, end, k); + segments_.push_back(seg); + dim_++; + } + + // Choose random shift i.e. the distance between two adjacent segments + int32 shift = RandInt(0, max_segment_length); + st = end + shift; + } + + Check(); +} + +} // namespace segmenter +} // namespace kaldi diff --git a/src/segmenter/segmentation.h b/src/segmenter/segmentation.h new file mode 100644 index 00000000000..aa408374751 --- /dev/null +++ b/src/segmenter/segmentation.h @@ -0,0 +1,144 @@ +// segmenter/segmentation.h + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_SEGMENTER_SEGMENTATION_H_ +#define KALDI_SEGMENTER_SEGMENTATION_H_ + +#include +#include "base/kaldi-common.h" +#include "matrix/kaldi-matrix.h" +#include "util/kaldi-table.h" +#include "segmenter/segment.h" + +namespace kaldi { +namespace segmenter { + +// Segments are stored as a doubly-linked-list. This could be changed later +// if needed. Hence defining a typedef SegmentList. +typedef std::list SegmentList; + +// Declare class +class SegmentationPostProcessor; + +/** + * The main class to store segmentation and do operations on it. The segments + * are stored in the structure SegmentList, which is currently a doubly-linked + * list. + * See the .cc file for details of implementation of the different functions. + * This file gives only a small description of the functions. +**/ + +class Segmentation { + public: + // Inserts the segment at the back of the list. + void PushBack(const Segment &seg); + + // Inserts the segment before the segment at the position specified by the + // iterator "it". + SegmentList::iterator Insert(SegmentList::iterator it, + const Segment &seg); + + // The following function is a wrapper to the + // emplace_back functionality of a STL list of Segments + // and inserts a new segment to the back of the list. + void EmplaceBack(int32 start_frame, int32 end_frame, int32 class_id); + + // The following function is a wrapper to the + // emplace functionality of a STL list of segments + // and inserts a segment at the position specified by the iterator "it". + // Returns an iterator to the inserted segment. + SegmentList::iterator Emplace(SegmentList::iterator it, + int32 start_frame, int32 end_frame, + int32 class_id); + + // Call erase operation on the SegmentList and returns the iterator pointing + // to the next segment in the SegmentList and also decrements dim_. + SegmentList::iterator Erase(SegmentList::iterator it); + + // Reset segmentation i.e. clear all values + void Clear(); + + // Read segmentation object from input stream + void Read(std::istream &is, bool binary); + + // Write segmentation object to output stream + void Write(std::ostream &os, bool binary) const; + + // Check if all segments have class_id >=0 and if dim_ matches the number of + // segments. + void Check() const; + + // Sort the segments on the start_frame + void Sort(); + + // Sort the segments on the length + void SortByLength(); + + // Returns an iterator to the smallest segment akin to std::min_element + SegmentList::iterator MinElement(); + + // Returns an iterator to the largest segment akin to std::max_element + SegmentList::iterator MaxElement(); + + // Generate a random segmentation for debugging purposes. + // Arguments: + // max_length: The maximum length of the random segmentation to be + // generated. + // max_segment_length: Maximum length of a segment in the segmentation + // num_classes: Maximum number of classes in the generated segmentation + void GenRandomSegmentation(int32 max_length, int32 max_segment_length, + int32 num_classes); + + // Public accessors + inline int32 Dim() const { return dim_; } + SegmentList::iterator Begin() { return segments_.begin(); } + SegmentList::const_iterator Begin() const { return segments_.begin(); } + SegmentList::iterator End() { return segments_.end(); } + SegmentList::const_iterator End() const { return segments_.end(); } + + Segment& Back() { return segments_.back(); } + const Segment& Back() const { return segments_.back(); } + + const SegmentList* Data() const { return &segments_; } + + // Default constructor + Segmentation(); + + private: + // number of segments in the segmentation + int32 dim_; + + // list of segments in the segmentation + SegmentList segments_; + + friend class SegmentationPostProcessor; +}; + +typedef TableWriter > SegmentationWriter; +typedef SequentialTableReader > + SequentialSegmentationReader; +typedef RandomAccessTableReader > + RandomAccessSegmentationReader; +typedef RandomAccessTableReaderMapped > + RandomAccessSegmentationReaderMapped; + +} // end namespace segmenter +} // end namespace kaldi + +#endif // KALDI_SEGMENTER_SEGMENTATION_H_ diff --git a/src/segmenterbin/Makefile b/src/segmenterbin/Makefile new file mode 100644 index 00000000000..1f0efe71181 --- /dev/null +++ b/src/segmenterbin/Makefile @@ -0,0 +1,36 @@ + +all: + +EXTRA_CXXFLAGS = -Wno-sign-compare +include ../kaldi.mk + +BINFILES = segmentation-copy segmentation-get-stats \ + segmentation-init-from-ali segmentation-to-ali \ + segmentation-init-from-segments segmentation-to-segments \ + segmentation-combine-segments segmentation-merge-recordings \ + segmentation-create-subsegments segmentation-intersect-ali \ + segmentation-to-rttm segmentation-post-process \ + segmentation-merge segmentation-split-segments \ + segmentation-remove-segments \ + segmentation-init-from-lengths \ + segmentation-combine-segments-to-recordings \ + segmentation-create-overlapped-subsegments \ + segmentation-intersect-segments \ + segmentation-init-from-additive-signals-info #\ + gmm-acc-pdf-stats-segmentation \ + gmm-est-segmentation gmm-update-segmentation \ + segmentation-init-from-diarization \ + segmentation-compute-class-ctm-conf \ + combine-vector-segments + +OBJFILES = + + + +TESTFILES = + +ADDLIBS = ../hmm/kaldi-hmm.a ../gmm/kaldi-gmm.a ../segmenter/kaldi-segmenter.a ../tree/kaldi-tree.a \ + ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a ../thread/kaldi-thread.a + +include ../makefiles/default_rules.mk + diff --git a/src/segmenterbin/segmentation-combine-segments-to-recordings.cc b/src/segmenterbin/segmentation-combine-segments-to-recordings.cc new file mode 100644 index 00000000000..acf71265577 --- /dev/null +++ b/src/segmenterbin/segmentation-combine-segments-to-recordings.cc @@ -0,0 +1,114 @@ +// segmenterbin/segmentation-combine-segments-to-recordings.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Combine kaldi segments in segmentation format to " + "recording-level segmentation\n" + "A reco2utt file is used to specify which utterances are contained " + "in a recording.\n" + "This program expects the input segmentation to be a kaldi segment " + "converted to segmentation using segmentation-init-from-segments. " + "For other segmentations, the user can use the binary " + "segmentation-combine-segments instead.\n" + "\n" + "Usage: segmentation-combine-segments-to-recording [options] " + " " + "\n" + " e.g.: segmentation-combine-segments-to-recording \\\n" + "'ark:segmentation-init-from-segments --shift-to-zero=false " + "data/dev/segments ark:- |' ark,t:data/dev/reco2utt ark:file.seg\n" + "See also: segmentation-combine-segments, " + "segmentation-merge, segmentation-merge-recordings, " + "segmentation-post-process --merge-adjacent-segments\n"; + + ParseOptions po(usage); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string segmentation_rspecifier = po.GetArg(1), + reco2utt_rspecifier = po.GetArg(2), + segmentation_wspecifier = po.GetArg(3); + + SequentialTokenVectorReader reco2utt_reader(reco2utt_rspecifier); + RandomAccessSegmentationReader segmentation_reader( + segmentation_rspecifier); + SegmentationWriter segmentation_writer(segmentation_wspecifier); + + int32 num_done = 0, num_segmentations = 0, num_err = 0; + + for (; !reco2utt_reader.Done(); reco2utt_reader.Next()) { + const std::vector &utts = reco2utt_reader.Value(); + const std::string &reco_id = reco2utt_reader.Key(); + + Segmentation out_segmentation; + + for (std::vector::const_iterator it = utts.begin(); + it != utts.end(); ++it) { + if (!segmentation_reader.HasKey(*it)) { + KALDI_WARN << "Could not find utterance " << *it << " in " + << "segments segmentation " + << segmentation_rspecifier; + num_err++; + continue; + } + + const Segmentation &segmentation = segmentation_reader.Value(*it); + if (segmentation.Dim() != 1) { + KALDI_ERR << "Segments segmentation for utt " << *it << " is not " + << "kaldi segment converted to segmentation format " + << "in " << segmentation_rspecifier; + } + const Segment &segment = *(segmentation.Begin()); + + out_segmentation.PushBack(segment); + + num_done++; + } + + Sort(&out_segmentation); + segmentation_writer.Write(reco_id, out_segmentation); + num_segmentations++; + } + + KALDI_LOG << "Combined " << num_done << " utterance-level segments " + << "into " << num_segmentations + << " recording-level segmentations; failed with " + << num_err << " utterances."; + + return ((num_done > 0 && num_err < num_done) ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-combine-segments.cc b/src/segmenterbin/segmentation-combine-segments.cc new file mode 100644 index 00000000000..7034a8a1734 --- /dev/null +++ b/src/segmenterbin/segmentation-combine-segments.cc @@ -0,0 +1,128 @@ +// segmenterbin/segmentation-combine-segments.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Combine utterance-level segmentations in an archive to " + "recording-level segmentations using the kaldi segments to map " + "utterances to their positions in the recordings.\n" + "A reco2utt file is used to specify which utterances belong to each " + "recording.\n" + "\n" + "Usage: segmentation-combine-segments [options] " + " " + " " + " \n" + " e.g.: segmentation-combine-segments ark:utt.seg " + "'ark:segmentation-init-from-segments --shift-to-zero=false " + "data/dev/segments ark:- |' ark,t:data/dev/reco2utt ark:file.seg\n" + "See also: segmentation-combine-segments-to-recording, " + "segmentation-merge, segmentatin-merge-recordings, " + "segmentation-post-process --merge-adjacent-segments\n"; + + ParseOptions po(usage); + + po.Read(argc, argv); + + if (po.NumArgs() != 4) { + po.PrintUsage(); + exit(1); + } + + std::string utt_segmentation_rspecifier = po.GetArg(1), + segments_segmentation_rspecifier = po.GetArg(2), + reco2utt_rspecifier = po.GetArg(3), + segmentation_wspecifier = po.GetArg(4); + + SequentialTokenVectorReader reco2utt_reader(reco2utt_rspecifier); + RandomAccessSegmentationReader segments_segmentation_reader( + segments_segmentation_rspecifier); + RandomAccessSegmentationReader utt_segmentation_reader( + utt_segmentation_rspecifier); + SegmentationWriter segmentation_writer(segmentation_wspecifier); + + int32 num_done = 0, num_segmentations = 0, num_err = 0; + int64 num_segments = 0; + + for (; !reco2utt_reader.Done(); reco2utt_reader.Next()) { + const std::vector &utts = reco2utt_reader.Value(); + const std::string &reco_id = reco2utt_reader.Key(); + + Segmentation out_segmentation; + + for (std::vector::const_iterator it = utts.begin(); + it != utts.end(); ++it) { + if (!segments_segmentation_reader.HasKey(*it)) { + KALDI_WARN << "Could not find utterance " << *it << " in " + << "segments segmentation " + << segments_segmentation_rspecifier; + num_err++; + continue; + } + + const Segmentation &segments_segmentation = + segments_segmentation_reader.Value(*it); + if (segments_segmentation.Dim() != 1) { + KALDI_ERR << "Segments segmentation for utt " << *it << " is not " + << "kaldi segment converted to segmentation format " + << "in " << segments_segmentation_rspecifier; + } + const Segment &segment = *(segments_segmentation.Begin()); + + if (!utt_segmentation_reader.HasKey(*it)) { + KALDI_WARN << "Could not find utterance " << *it << " in " + << "segmentation " << utt_segmentation_rspecifier; + num_err++; + continue; + } + const Segmentation &utt_segmentation + = utt_segmentation_reader.Value(*it); + + num_segments += InsertFromSegmentation(utt_segmentation, + segment.start_frame, false, + &out_segmentation, NULL); + num_done++; + } + + Sort(&out_segmentation); + segmentation_writer.Write(reco_id, out_segmentation); + num_segmentations++; + } + + KALDI_LOG << "Combined " << num_done << " utterance-level segmentations " + << "into " << num_segmentations + << " recording-level segmentations; failed with " + << num_err << " utterances; " + << "wrote a total of " << num_segments << " segments."; + + return ((num_done > 0 && num_err < num_done) ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-copy.cc b/src/segmenterbin/segmentation-copy.cc new file mode 100644 index 00000000000..26d0f47682d --- /dev/null +++ b/src/segmenterbin/segmentation-copy.cc @@ -0,0 +1,232 @@ +// segmenterbin/segmentation-copy.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Copy segmentation or archives of segmentation.\n" + "If label-map is supplied, then apply the mapping to the labels \n" + "when copying.\n" + "If utt2label-rspecifier is supplied, then ignore the \n" + "original labels, and map all the segments of an utterance using \n" + "the supplied utt2label map.\n" + "\n" + "Usage: segmentation-copy [options] " + "\n" + " e.g.: segmentation-copy ark:1.seg ark,t:-\n" + " or \n" + " segmentation-copy [options] " + "\n" + " e.g.: segmentation-copy --binary=false foo -\n"; + + bool binary = true; + std::string label_map_rxfilename, utt2label_rspecifier; + std::string include_rxfilename, exclude_rxfilename; + int32 keep_label = -1; + BaseFloat frame_subsampling_factor = 1; + + ParseOptions po(usage); + + po.Register("binary", &binary, + "Write in binary mode " + "(only relevant if output is a wxfilename)"); + po.Register("label-map", &label_map_rxfilename, + "File with mapping from old to new labels"); + po.Register("frame-subsampling-factor", &frame_subsampling_factor, + "Change frame rate by this factor"); + po.Register("utt2label-rspecifier", &utt2label_rspecifier, + "Mapping for each utterance to an integer label"); + po.Register("keep-label", &keep_label, + "If supplied, only segments of this label are written out"); + po.Register("include", &include_rxfilename, + "Text file, the first field of each" + " line being interpreted as an " + "utterance-id whose features will be included"); + po.Register("exclude", &exclude_rxfilename, + "Text file, the first field of each " + "line being interpreted as an utterance-id" + " whose features will be excluded"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + // all these "fn"'s are either rspecifiers or filenames. + + std::string segmentation_in_fn = po.GetArg(1), + segmentation_out_fn = po.GetArg(2); + + // Read mapping from old to new labels + unordered_map label_map; + if (!label_map_rxfilename.empty()) { + Input ki(label_map_rxfilename); + std::string line; + while (std::getline(ki.Stream(), line)) { + std::vector splits; + SplitStringToVector(line, " ", true, &splits); + + if (splits.size() != 2) + KALDI_ERR << "Invalid format of line " << line + << " in " << label_map_rxfilename; + + label_map[std::atoi(splits[0].c_str())] = std::atoi(splits[1].c_str()); + } + } + + unordered_set include_set; + if (include_rxfilename != "") { + if (exclude_rxfilename != "") { + KALDI_ERR << "should not have both --exclude and --include option!"; + } + Input ki(include_rxfilename); + std::string line; + while (std::getline(ki.Stream(), line)) { + std::vector split_line; + SplitStringToVector(line, " \t\r", true, &split_line); + KALDI_ASSERT(!split_line.empty() && + "Empty line encountered in input from --include option"); + include_set.insert(split_line[0]); + } + } + + unordered_set exclude_set; + if (exclude_rxfilename != "") { + if (include_rxfilename != "") { + KALDI_ERR << "should not have both --exclude and --include option!"; + } + Input ki(exclude_rxfilename); + std::string line; + while (std::getline(ki.Stream(), line)) { + std::vector split_line; + SplitStringToVector(line, " \t\r", true, &split_line); + KALDI_ASSERT(!split_line.empty() && + "Empty line encountered in input from --exclude option"); + exclude_set.insert(split_line[0]); + } + } + + bool in_is_rspecifier = + (ClassifyRspecifier(segmentation_in_fn, NULL, NULL) + != kNoRspecifier), + out_is_wspecifier = + (ClassifyWspecifier(segmentation_out_fn, NULL, NULL, NULL) + != kNoWspecifier); + + if (in_is_rspecifier != out_is_wspecifier) + KALDI_ERR << "Cannot mix regular files and archives"; + + int64 num_done = 0, num_err = 0; + + if (!in_is_rspecifier) { + Segmentation segmentation; + { + bool binary_in; + Input ki(segmentation_in_fn, &binary_in); + segmentation.Read(ki.Stream(), binary_in); + } + + if (!label_map_rxfilename.empty()) + RelabelSegmentsUsingMap(label_map, &segmentation); + + if (keep_label != -1) + KeepSegments(keep_label, &segmentation); + + if (frame_subsampling_factor != 1.0) { + ScaleFrameShift(frame_subsampling_factor, &segmentation); + } + + if (!utt2label_rspecifier.empty()) + KALDI_ERR << "It makes no sense to specify utt2label-rspecifier " + << "when not reading segmentation archives."; + + Output ko(segmentation_out_fn, binary); + segmentation.Write(ko.Stream(), binary); + + KALDI_LOG << "Copied segmentation to " << segmentation_out_fn; + return 0; + } else { + RandomAccessInt32Reader utt2label_reader(utt2label_rspecifier); + + SegmentationWriter writer(segmentation_out_fn); + SequentialSegmentationReader reader(segmentation_in_fn); + + for (; !reader.Done(); reader.Next()) { + const std::string &key = reader.Key(); + + if (include_rxfilename != "" && include_set.count(key) == 0) { + continue; + } + + if (exclude_rxfilename != "" && include_set.count(key) > 0) { + continue; + } + + if (label_map_rxfilename.empty() && + frame_subsampling_factor == 1.0 && + utt2label_rspecifier.empty() && + keep_label == -1) { + writer.Write(key, reader.Value()); + } else { + Segmentation segmentation = reader.Value(); + if (!label_map_rxfilename.empty()) + RelabelSegmentsUsingMap(label_map, &segmentation); + if (!utt2label_rspecifier.empty()) { + if (!utt2label_reader.HasKey(key)) { + KALDI_WARN << "Utterance " << key + << " not found in utt2label map " + << utt2label_rspecifier; + num_err++; + continue; + } + + RelabelAllSegments(utt2label_reader.Value(key), &segmentation); + } + if (keep_label != -1) + KeepSegments(keep_label, &segmentation); + + if (frame_subsampling_factor != 1.0) + ScaleFrameShift(frame_subsampling_factor, &segmentation); + + writer.Write(key, segmentation); + } + + num_done++; + } + + KALDI_LOG << "Copied " << num_done << " segmentation; failed with " + << num_err << " segmentations"; + return (num_done != 0 ? 0 : 1); + } + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-create-subsegments.cc b/src/segmenterbin/segmentation-create-subsegments.cc new file mode 100644 index 00000000000..9d7f4c08b6d --- /dev/null +++ b/src/segmenterbin/segmentation-create-subsegments.cc @@ -0,0 +1,175 @@ +// segmenterbin/segmentation-create-subsegments.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Create sub-segmentation of a segmentation by intersecting with " + "segments from a 'filter' segmentation. \n" + "The labels for the new subsegments are decided " + "depending on whether the label of 'filter' segment " + "matches the specified 'filter_label' or not:\n" + " if filter segment's label == filter_label: \n" + " if subsegment_label is specified:\n" + " label = subsegment_label\n" + " else: \n" + " label = filter_label \n" + " else: \n" + " if unmatched_label is specified:\n" + " label = unmatched_label\n" + " else\n:" + " label = primary_label\n" + "See the function SubSegmentUsingNonOverlappingSegments() " + "for more details.\n" + "\n" + "Usage: segmentation-create-subsegments [options] " + " " + " \n" + " or : segmentation-create-subsegments [options] " + " " + " \n" + " e.g.: segmentation-create-subsegments --binary=false " + "--filter-label=1 --subsegment-label=1000 foo bar -\n" + " segmentation-create-subsegments --filter-label=1 " + "--subsegment-label=1000 ark:1.foo ark:1.bar ark:-\n"; + + bool binary = true, ignore_missing = false; + int32 filter_label = -1, subsegment_label = -1, unmatched_label = -1; + ParseOptions po(usage); + + po.Register("binary", &binary, + "Write in binary mode " + "(only relevant if output is a wxfilename)"); + po.Register("filter-label", &filter_label, + "The label on which filtering is done."); + po.Register("subsegment-label", &subsegment_label, + "If non-negative, change the class-id of the matched regions " + "in the intersection of the two segmentations to this label."); + po.Register("unmatched-label", &unmatched_label, + "If non-negative, change the class-id of the unmatched " + "regions in the intersection of the two segmentations " + "to this label."); + po.Register("ignore-missing", &ignore_missing, "Ignore missing " + "segmentations in filter. If this is set true, then the " + "segmentations with missing key in filter are written " + "without any modification."); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string segmentation_in_fn = po.GetArg(1), + secondary_segmentation_in_fn = po.GetArg(2), + segmentation_out_fn = po.GetArg(3); + + // all these "fn"'s are either rspecifiers or filenames. + + bool in_is_rspecifier = + (ClassifyRspecifier(segmentation_in_fn, NULL, NULL) + != kNoRspecifier), + filter_is_rspecifier = + (ClassifyRspecifier(secondary_segmentation_in_fn, NULL, NULL) + != kNoRspecifier), + out_is_wspecifier = + (ClassifyWspecifier(segmentation_out_fn, NULL, NULL, NULL) + != kNoWspecifier); + + if (in_is_rspecifier != out_is_wspecifier || + in_is_rspecifier != filter_is_rspecifier) + KALDI_ERR << "Cannot mix regular files and archives"; + + int64 num_done = 0, num_err = 0; + + if (!in_is_rspecifier) { + Segmentation segmentation; + { + bool binary_in; + Input ki(segmentation_in_fn, &binary_in); + segmentation.Read(ki.Stream(), binary_in); + } + Segmentation secondary_segmentation; + { + bool binary_in; + Input ki(secondary_segmentation_in_fn, &binary_in); + secondary_segmentation.Read(ki.Stream(), binary_in); + } + + Segmentation new_segmentation; + SubSegmentUsingNonOverlappingSegments( + segmentation, secondary_segmentation, filter_label, subsegment_label, + unmatched_label, &new_segmentation); + Output ko(segmentation_out_fn, binary); + new_segmentation.Write(ko.Stream(), binary); + + KALDI_LOG << "Created subsegments of " << segmentation_in_fn + << " based on " << secondary_segmentation_in_fn + << " and wrote to " << segmentation_out_fn; + return 0; + } else { + SegmentationWriter writer(segmentation_out_fn); + SequentialSegmentationReader reader(segmentation_in_fn); + RandomAccessSegmentationReader filter_reader( + secondary_segmentation_in_fn); + + for (; !reader.Done(); reader.Next(), num_done++) { + const Segmentation &segmentation = reader.Value(); + const std::string &key = reader.Key(); + + if (!filter_reader.HasKey(key)) { + KALDI_WARN << "Could not find filter segmentation for utterance " + << key; + if (!ignore_missing) + num_err++; + else + writer.Write(key, segmentation); + continue; + } + const Segmentation &secondary_segmentation = filter_reader.Value(key); + + Segmentation new_segmentation; + SubSegmentUsingNonOverlappingSegments(segmentation, + secondary_segmentation, + filter_label, subsegment_label, + unmatched_label, + &new_segmentation); + + writer.Write(key, new_segmentation); + } + + KALDI_LOG << "Created subsegments for " << num_done << " segmentations; " + << "failed with " << num_err << " segmentations"; + + return ((num_done != 0 && num_err < num_done) ? 0 : 1); + } + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-get-stats.cc b/src/segmenterbin/segmentation-get-stats.cc new file mode 100644 index 00000000000..b25d6913f06 --- /dev/null +++ b/src/segmenterbin/segmentation-get-stats.cc @@ -0,0 +1,125 @@ +// segmenterbin/segmentation-get-per-frame-stats.cc + +// Copyright 2016 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Get per-frame stats from segmentation. \n" + "Currently supported stats are \n" + " num-overlaps: Number of overlapping segments common to this frame\n" + " num-classes: Number of distinct classes common to this frame\n" + "\n" + "Usage: segmentation-get-stats [options] " + " \n" + " e.g.: segmentation-get-stats ark:1.seg ark:/dev/null " + "ark:num_classes.ark\n"; + + ParseOptions po(usage); + + std::string lengths_rspecifier; + int32 length_tolerance = 2; + + po.Register("lengths-rspecifier", &lengths_rspecifier, + "Archive of frame lengths of the utterances. " + "Fills up any extra length with zero stats."); + po.Register("length-tolerance", &length_tolerance, + "Tolerate shortage of this many frames in the specified " + "lengths file"); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string segmentation_rspecifier = po.GetArg(1), + num_overlaps_wspecifier = po.GetArg(2), + num_classes_wspecifier = po.GetArg(3); + + int64 num_done = 0, num_err = 0; + + SequentialSegmentationReader reader(segmentation_rspecifier); + Int32VectorWriter num_overlaps_writer(num_overlaps_wspecifier); + Int32VectorWriter num_classes_writer(num_classes_wspecifier); + + RandomAccessInt32Reader lengths_reader(lengths_rspecifier); + + for (; !reader.Done(); reader.Next(), num_done++) { + const Segmentation &segmentation = reader.Value(); + const std::string &key = reader.Key(); + + int32 length = -1; + if (!lengths_rspecifier.empty()) { + if (!lengths_reader.HasKey(key)) { + KALDI_WARN << "Could not find length for key " << key; + num_err++; + continue; + } + length = lengths_reader.Value(key); + } + + std::vector > class_counts_per_frame; + if (!GetClassCountsPerFrame(segmentation, length, + length_tolerance, + &class_counts_per_frame)) { + KALDI_WARN << "Failed getting stats for key " << key; + num_err++; + continue; + } + + if (length == -1) + length = class_counts_per_frame.size(); + + std::vector num_classes_per_frame(length, 0); + std::vector num_overlaps_per_frame(length, 0); + + for (int32 i = 0; i < class_counts_per_frame.size(); i++) { + std::map &class_counts = class_counts_per_frame[i]; + + for (std::map::const_iterator it = class_counts.begin(); + it != class_counts.end(); ++it) { + if (it->second > 0) + num_classes_per_frame[i]++; + num_overlaps_per_frame[i] += it->second; + } + } + + num_classes_writer.Write(key, num_classes_per_frame); + num_overlaps_writer.Write(key, num_overlaps_per_frame); + + num_done++; + } + + KALDI_LOG << "Got stats for " << num_done << " segmentations; failed with " + << num_err << " segmentations"; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-init-from-ali.cc b/src/segmenterbin/segmentation-init-from-ali.cc new file mode 100644 index 00000000000..a98a54368c9 --- /dev/null +++ b/src/segmenterbin/segmentation-init-from-ali.cc @@ -0,0 +1,91 @@ +// segmenterbin/segmentation-init-from-ali.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Initialize utterance-level segmentations from alignments file. \n" + "The user can pass this to segmentation-combine-segments to " + "create recording-level segmentations." + "\n" + "Usage: segmentation-init-from-ali [options] " + " \n" + " e.g.: segmentation-init-from-ali ark:1.ali ark:-\n" + "See also: segmentation-init-from-segments, " + "segmentation-combine-segments\n"; + + ParseOptions po(usage); + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string ali_rspecifier = po.GetArg(1), + segmentation_wspecifier = po.GetArg(2); + + SegmentationWriter segmentation_writer(segmentation_wspecifier); + + int32 num_done = 0, num_segmentations = 0; + int64 num_segments = 0; + int64 num_err = 0; + + std::vector frame_counts_per_class; + + SequentialInt32VectorReader alignment_reader(ali_rspecifier); + + for (; !alignment_reader.Done(); alignment_reader.Next()) { + const std::string &key = alignment_reader.Key(); + const std::vector &alignment = alignment_reader.Value(); + + Segmentation segmentation; + + num_segments += InsertFromAlignment(alignment, 0, alignment.size(), + 0, &segmentation, + &frame_counts_per_class); + + Sort(&segmentation); + segmentation_writer.Write(key, segmentation); + + num_done++; + num_segmentations++; + } + + KALDI_LOG << "Processed " << num_done << " utterances; failed with " + << num_err << " utterances; " + << "wrote " << num_segmentations << " segmentations " + << "with a total of " << num_segments << " segments."; + KALDI_LOG << "Number of frames for the different classes are : "; + WriteIntegerVector(KALDI_LOG, false, frame_counts_per_class); + + return ((num_done > 0 && num_err < num_done) ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-init-from-lengths.cc b/src/segmenterbin/segmentation-init-from-lengths.cc new file mode 100644 index 00000000000..28c998c220b --- /dev/null +++ b/src/segmenterbin/segmentation-init-from-lengths.cc @@ -0,0 +1,82 @@ +// segmenterbin/segmentation-init-from-lengths.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Initialize segmentations from frame lengths file\n" + "\n" + "Usage: segmentation-init-from-lengths [options] " + " \n" + " e.g.: segmentation-init-from-lengths " + "\"ark:feat-to-len scp:feats.scp ark:- |\" ark:-\n" + "\n" + "See also: segmentation-init-from-ali, " + "segmentation-init-from-segments\n"; + + int32 label = 1; + + ParseOptions po(usage); + + po.Register("label", &label, "Label to assign to the created segments"); + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string lengths_rspecifier = po.GetArg(1), + segmentation_wspecifier = po.GetArg(2); + + SequentialInt32Reader lengths_reader(lengths_rspecifier); + SegmentationWriter segmentation_writer(segmentation_wspecifier); + + int32 num_done = 0; + + for (; !lengths_reader.Done(); lengths_reader.Next()) { + const std::string &key = lengths_reader.Key(); + const int32 &length = lengths_reader.Value(); + + Segmentation segmentation; + + if (length > 0) { + segmentation.EmplaceBack(0, length - 1, label); + } + + segmentation_writer.Write(key, segmentation); + num_done++; + } + + KALDI_LOG << "Created " << num_done << " segmentations."; + + return (num_done > 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-init-from-segments.cc b/src/segmenterbin/segmentation-init-from-segments.cc new file mode 100644 index 00000000000..c39996b5ef4 --- /dev/null +++ b/src/segmenterbin/segmentation-init-from-segments.cc @@ -0,0 +1,179 @@ +// segmenterbin/segmentation-init-from-segments.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation.h" + +// If segments file contains +// Alpha-001 Alpha 0.00 0.16 +// Alpha-002 Alpha 1.50 4.10 +// Beta-001 Beta 0.50 2.66 +// Beta-002 Beta 3.50 5.20 +// the output segmentation will contain +// Alpha-001 [ 0 16 1 ] +// Alpha-002 [ 0 360 1 ] +// Beta-001 [ 0 216 1 ] +// Beta-002 [ 0 170 1 ] +// If --shift-to-zero=false is provided, then the output will contain +// Alpha-001 [ 0 16 1 ] +// Alpha-002 [ 150 410 1 ] +// Beta-001 [ 50 266 1 ] +// Beta-002 [ 350 520 1 ] +// +// If the following utt2label-rspecifier was provided: +// Alpha-001 2 +// Alpha-002 2 +// Beta-001 4 +// Beta-002 4 +// then the output segmentation will contain +// Alpha-001 [ 0 16 2 ] +// Alpha-002 [ 0 360 2 ] +// Beta-001 [ 0 216 4 ] +// Beta-002 [ 0 170 4 ] + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Convert segments from segments file into utterance-level " + "segmentation format. \n" + "The user can convert the segmenation to recording-level using " + "the binary segmentation-combine-segments-to-recording.\n" + "\n" + "Usage: segmentation-init-from-segments [options] " + " \n" + " e.g.: segmentation-init-from-segments segments ark:-\n"; + + int32 segment_label = 1; + BaseFloat frame_shift = 0.01, frame_overlap = 0.015; + std::string utt2label_rspecifier; + bool shift_to_zero = true; + + ParseOptions po(usage); + + po.Register("segment-label", &segment_label, + "Label for all the segments in the segmentations"); + po.Register("utt2label-rspecifier", &utt2label_rspecifier, + "Mapping for each utterance to an integer label. " + "If supplied, these labels will be used as the segment " + "labels"); + po.Register("shift-to-zero", &shift_to_zero, + "Shift all segments to 0th frame"); + po.Register("frame-shift", &frame_shift, "Frame shift in seconds"); + po.Register("frame-overlap", &frame_overlap, "Frame overlap in seconds"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string segments_rxfilename = po.GetArg(1), + segmentation_wspecifier = po.GetArg(2); + + SegmentationWriter writer(segmentation_wspecifier); + RandomAccessInt32Reader utt2label_reader(utt2label_rspecifier); + + Input ki(segments_rxfilename); + + int64 num_lines = 0, num_done = 0; + + std::string line; + + while (std::getline(ki.Stream(), line)) { + num_lines++; + + std::vector split_line; + // Split the line by space or tab and check the number of fields in each + // line. There must be 4 fields--segment name , reacording wav file name, + // start time, end time; 5th field (channel info) is optional. + SplitStringToVector(line, " \t\r", true, &split_line); + if (split_line.size() != 4 && split_line.size() != 5) { + KALDI_WARN << "Invalid line in segments file: " << line; + continue; + } + std::string utt = split_line[0], + reco = split_line[1], + start_str = split_line[2], + end_str = split_line[3]; + + // Convert the start time and endtime to real from string. Segment is + // ignored if start or end time cannot be converted to real. + double start, end; + if (!ConvertStringToReal(start_str, &start)) { + KALDI_WARN << "Invalid line in segments file [bad start]: " << line; + continue; + } + if (!ConvertStringToReal(end_str, &end)) { + KALDI_WARN << "Invalid line in segments file [bad end]: " << line; + continue; + } + + // start time must not be negative; start time must not be greater than + // end time, except if end time is -1 + if (start < 0 || (end != -1.0 && end <= 0) || + ((start >= end) && (end > 0))) { + KALDI_WARN << "Invalid line in segments file " + << "[empty or invalid segment]: " << line; + continue; + } + + if (split_line.size() >= 5) + KALDI_ERR << "Not supporting channel in segments file"; + + Segmentation segmentation; + + if (!utt2label_rspecifier.empty()) { + if (!utt2label_reader.HasKey(utt)) { + KALDI_WARN << "Could not find utterance " << utt << " in " + << utt2label_rspecifier; + continue; + } + + segment_label = utt2label_reader.Value(utt); + } + + int32 length = round((end - frame_overlap)/ frame_shift) + - round(start / frame_shift); + + if (shift_to_zero) + segmentation.EmplaceBack(0, length, segment_label); + else + segmentation.EmplaceBack(round(start / frame_shift), + round((end-frame_overlap) / frame_shift) - 1, + segment_label); + + writer.Write(utt, segmentation); + num_done++; + } + + KALDI_LOG << "Successfully processed " << num_done << " lines out of " + << num_lines << " in the segments file"; + + return (num_done > num_lines / 2 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-intersect-ali.cc b/src/segmenterbin/segmentation-intersect-ali.cc new file mode 100644 index 00000000000..a551eee02ce --- /dev/null +++ b/src/segmenterbin/segmentation-intersect-ali.cc @@ -0,0 +1,99 @@ +// segmenterbin/segmentation-intersect-ali.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Intersect (like sets) segmentation with an alignment and retain \n" + "only segments where the alignment is the specified label. \n" + "\n" + "Usage: segmentation-intersect-alignment [options] " + " " + "\n" + " e.g.: segmentation-intersect-alignment --binary=false ark:foo.seg " + "ark:filter.ali ark,t:-\n" + "See also: segmentation-combine-segments, " + "segmentation-intersect-segments, segmentation-create-subsegments\n"; + + ParseOptions po(usage); + + int32 ali_label = 0, min_alignment_chunk_length = 0; + + po.Register("ali-label", &ali_label, + "Intersect only at this label of alignments"); + po.Register("min-alignment-chunk-length", &min_alignment_chunk_length, + "The minimmum number of consecutive frames of ali_label in " + "alignment at which the segments can be intersected."); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string segmentation_rspecifier = po.GetArg(1), + ali_rspecifier = po.GetArg(2), + segmentation_wspecifier = po.GetArg(3); + + int32 num_done = 0, num_err = 0; + + SegmentationWriter writer(segmentation_wspecifier); + SequentialSegmentationReader segmentation_reader(segmentation_rspecifier); + RandomAccessInt32VectorReader alignment_reader(ali_rspecifier); + + for (; !segmentation_reader.Done(); segmentation_reader.Next()) { + const Segmentation &segmentation = segmentation_reader.Value(); + const std::string &key = segmentation_reader.Key(); + + if (!alignment_reader.HasKey(key)) { + KALDI_WARN << "Could not find segmentation for key " << key + << " in " << ali_rspecifier; + num_err++; + continue; + } + const std::vector &ali = alignment_reader.Value(key); + + Segmentation out_segmentation; + IntersectSegmentationAndAlignment(segmentation, ali, ali_label, + min_alignment_chunk_length, + &out_segmentation); + out_segmentation.Sort(); + + writer.Write(key, out_segmentation); + num_done++; + } + + KALDI_LOG << "Intersected " << num_done + << " segmentations with alignments; failed with " + << num_err << " segmentations"; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-intersect-segments.cc b/src/segmenterbin/segmentation-intersect-segments.cc new file mode 100644 index 00000000000..1c9861ba453 --- /dev/null +++ b/src/segmenterbin/segmentation-intersect-segments.cc @@ -0,0 +1,145 @@ +// segmenterbin/segmentation-intersect-segments.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +namespace kaldi { +namespace segmenter { + +void IntersectSegmentationsNonOverlapping( + const Segmentation &in_segmentation, + const Segmentation &secondary_segmentation, + int32 mismatch_label, + Segmentation *out_segmentation) { + KALDI_ASSERT(out_segmentation); + KALDI_ASSERT(secondary_segmentation.Dim() > 0); + + std::vector alignment; + ConvertToAlignment(secondary_segmentation, -1, -1, 0, &alignment); + + for (SegmentList::const_iterator it = in_segmentation.Begin(); + it != in_segmentation.End(); ++it) { + if (it->end_frame >= alignment.size()) { + alignment.resize(it->end_frame + 1, -1); + } + Segmentation filter_segmentation; + InsertFromAlignment(alignment, it->start_frame, it->end_frame + 1, + 0, &filter_segmentation, NULL); + + for (SegmentList::const_iterator f_it = filter_segmentation.Begin(); + f_it != filter_segmentation.End(); ++f_it) { + int32 label = it->Label(); + if (f_it->Label() != it->Label()) { + if (mismatch_label == -1) continue; + label = mismatch_label; + } + + out_segmentation->EmplaceBack(f_it->start_frame, f_it->end_frame, + label); + } + } +} + +} +} + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Intersect segments from two archives by retaining only regions .\n" + "where the primary and secondary segments match on label\n" + "\n" + "Usage: segmentation-intersect-segments [options] " + " " + "\n" + " e.g.: segmentation-intersect-segments ark:foo.seg ark:bar.seg " + "ark,t:-\n" + "See also: segmentation-create-subsegments, " + "segmentation-intersect-ali\n"; + + int32 mismatch_label = -1; + bool assume_non_overlapping_secondary = true; + + ParseOptions po(usage); + + po.Register("mismatch-label", &mismatch_label, + "Intersect only where secondary segment has this label"); + po.Register("assume-non-overlapping-secondary", & + assume_non_overlapping_secondary, + "Assume secondary segments are non-overlapping"); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string primary_rspecifier = po.GetArg(1), + secondary_rspecifier = po.GetArg(2), + segmentation_writer = po.GetArg(3); + + if (!assume_non_overlapping_secondary) { + KALDI_ERR << "Secondary segment must be non-overlapping for now"; + } + + int64 num_done = 0, num_err = 0; + + SegmentationWriter writer(segmentation_writer); + SequentialSegmentationReader primary_reader(primary_rspecifier); + RandomAccessSegmentationReader secondary_reader(secondary_rspecifier); + + for (; !primary_reader.Done(); primary_reader.Next()) { + const Segmentation &segmentation = primary_reader.Value(); + const std::string &key = primary_reader.Key(); + + if (!secondary_reader.HasKey(key)) { + KALDI_WARN << "Could not find segmentation for key " << key + << " in " << secondary_rspecifier; + num_err++; + continue; + } + const Segmentation &secondary_segmentation = secondary_reader.Value(key); + + Segmentation out_segmentation; + IntersectSegmentationsNonOverlapping(segmentation, + secondary_segmentation, + mismatch_label, + &out_segmentation); + + Sort(&out_segmentation); + + writer.Write(key, out_segmentation); + num_done++; + } + + KALDI_LOG << "Intersected " << num_done << " segmentations; failed with " + << num_err << " segmentations"; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-merge-recordings.cc b/src/segmenterbin/segmentation-merge-recordings.cc new file mode 100644 index 00000000000..85b5108be29 --- /dev/null +++ b/src/segmenterbin/segmentation-merge-recordings.cc @@ -0,0 +1,101 @@ +// segmenterbin/segmentation-merge-recordings.cc + +// Copyright 2016 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Merge segmentations of different recordings into one segmentation " + "using a mapping from new to old recording name\n" + "\n" + "Usage: segmentation-merge-recordings [options] " + " \n" + " e.g.: segmentation-merge-recordings ark:sdm2ihm_reco.map " + "ark:ihm_seg.ark ark:sdm_seg.ark\n"; + + ParseOptions po(usage); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string new2old_list_rspecifier = po.GetArg(1); + std::string segmentation_rspecifier = po.GetArg(2), + segmentation_wspecifier = po.GetArg(3); + + SequentialTokenVectorReader new2old_reader(new2old_list_rspecifier); + RandomAccessSegmentationReader segmentation_reader( + segmentation_rspecifier); + SegmentationWriter segmentation_writer(segmentation_wspecifier); + + int32 num_new_segmentations = 0, num_old_segmentations = 0; + int64 num_segments = 0, num_err = 0; + + for (; !new2old_reader.Done(); new2old_reader.Next()) { + const std::vector &old_key_list = new2old_reader.Value(); + const std::string &new_key = new2old_reader.Key(); + + KALDI_ASSERT(old_key_list.size() > 0); + + Segmentation segmentation; + + for (std::vector::const_iterator it = old_key_list.begin(); + it != old_key_list.end(); ++it) { + num_old_segmentations++; + + if (!segmentation_reader.HasKey(*it)) { + KALDI_WARN << "Could not find key " << *it << " in " + << "old segmentation " << segmentation_rspecifier; + num_err++; + continue; + } + + const Segmentation &this_segmentation = segmentation_reader.Value(*it); + + num_segments += InsertFromSegmentation(this_segmentation, 0, NULL, + &segmentation); + } + Sort(&segmentation); + + segmentation_writer.Write(new_key, segmentation); + + num_new_segmentations++; + } + + KALDI_LOG << "Merged " << num_old_segmentations << " old segmentations " + << "into " << num_new_segmentations << " new segmentations; " + << "created overall " << num_segments << " segments; " + << "failed to merge " << num_err << " old segmentations"; + + return (num_new_segmentations > 0 && num_err < num_old_segmentations / 2); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-merge.cc b/src/segmenterbin/segmentation-merge.cc new file mode 100644 index 00000000000..21e9a410e15 --- /dev/null +++ b/src/segmenterbin/segmentation-merge.cc @@ -0,0 +1,146 @@ +// segmenterbin/segmentation-merge.cc + +// Copyright 2015 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Merge corresponding segments from multiple archives or files.\n" + "i.e. for each utterance in the first segmentation, the segments " + "from all the supplied segmentations are merged and put in a single " + "segmentation." + "\n" + "Usage: segmentation-merge [options] " + " ... " + "\n" + " e.g.: segmentation-merge ark:foo.seg ark:bar.seg ark,t:-\n" + " or \n" + " segmentation-merge " + " ... " + "\n" + " e.g.: segmentation-merge --binary=false foo bar -\n" + "See also: segmentation-copy, segmentation-merge-recordings, " + "segmentation-post-process --merge-labels\n"; + + bool binary = true; + bool sort = true; + + ParseOptions po(usage); + + po.Register("binary", &binary, + "Write in binary mode " + "(only relevant if output is a wxfilename)"); + po.Register("sort", &sort, "Sort the segements after merging"); + + po.Read(argc, argv); + + if (po.NumArgs() <= 2) { + po.PrintUsage(); + exit(1); + } + + std::string segmentation_in_fn = po.GetArg(1), + segmentation_out_fn = po.GetArg(po.NumArgs()); + + // all these "fn"'s are either rspecifiers or filenames. + bool in_is_rspecifier = + (ClassifyRspecifier(segmentation_in_fn, NULL, NULL) + != kNoRspecifier), + out_is_wspecifier = + (ClassifyWspecifier(segmentation_out_fn, NULL, NULL, NULL) + != kNoWspecifier); + + if (in_is_rspecifier != out_is_wspecifier) + KALDI_ERR << "Cannot mix regular files and archives"; + + int64 num_done = 0, num_err = 0; + + if (!in_is_rspecifier) { + Segmentation segmentation; + { + bool binary_in; + Input ki(segmentation_in_fn, &binary_in); + segmentation.Read(ki.Stream(), binary_in); + } + + for (int32 i = 2; i < po.NumArgs(); i++) { + bool binary_in; + Input ki(po.GetArg(i), &binary_in); + Segmentation other_segmentation; + other_segmentation.Read(ki.Stream(), binary_in); + ExtendSegmentation(other_segmentation, false, + &segmentation); + } + + Sort(&segmentation); + + Output ko(segmentation_out_fn, binary); + segmentation.Write(ko.Stream(), binary); + + KALDI_LOG << "Merged segmentations to " << segmentation_out_fn; + return 0; + } else { + SegmentationWriter writer(segmentation_out_fn); + SequentialSegmentationReader reader(segmentation_in_fn); + std::vector other_readers( + po.NumArgs()-2, + static_cast(NULL)); + + for (size_t i = 0; i < po.NumArgs()-2; i++) { + other_readers[i] = new RandomAccessSegmentationReader(po.GetArg(i+2)); + } + + for (; !reader.Done(); reader.Next()) { + Segmentation segmentation(reader.Value()); + std::string key = reader.Key(); + + for (size_t i = 0; i < po.NumArgs()-2; i++) { + if (!other_readers[i]->HasKey(key)) { + KALDI_WARN << "Could not find segmentation for key " << key + << " in " << po.GetArg(i+2); + num_err++; + } + const Segmentation &other_segmentation = + other_readers[i]->Value(key); + ExtendSegmentation(other_segmentation, false, + &segmentation); + } + + Sort(&segmentation); + + writer.Write(key, segmentation); + num_done++; + } + + KALDI_LOG << "Merged " << num_done << " segmentation; failed with " + << num_err << " segmentations"; + return (num_done != 0 ? 0 : 1); + } + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-post-process.cc b/src/segmenterbin/segmentation-post-process.cc new file mode 100644 index 00000000000..921ee5dc5d8 --- /dev/null +++ b/src/segmenterbin/segmentation-post-process.cc @@ -0,0 +1,142 @@ +// segmenterbin/segmentation-post-process.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-post-processor.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Post processing of segmentation that does the following operations " + "in order: \n" + "1) Merge labels: Merge labels specified in --merge-labels into a " + "single label specified by --merge-dst-label. \n" + "2) Padding segments: Pad segments of label specified by --pad-label " + "by a few frames as specified by --pad-length. \n" + "3) Shrink segments: Shrink segments of label specified by " + "--shrink-label by a few frames as specified by --shrink-length. \n" + "4) Blend segments with neighbors: Blend short segments of class-id " + "specified by --blend-short-segments-class that are " + "shorter than --max-blend-length frames with their " + "respective neighbors if both the neighbors are within " + "a distance of --max-intersegment-length frames.\n" + "5) Remove segments: Remove segments of class-ids contained " + "in --remove-labels.\n" + "6) Merge adjacent segments: Merge adjacent segments of the same " + "label if they are within a distance of --max-intersegment-length " + "frames.\n" + "7) Split segments: Split segments that are longer than " + "--max-segment-length frames into overlapping segments " + "with an overlap of --overlap-length frames. \n" + "Usage: segmentation-post-process [options] " + "\n" + " or : segmentation-post-process [options] " + "\n" + " e.g.: segmentation-post-process --binary=false foo -\n" + " segmentation-post-process ark:foo.seg ark,t:-\n" + "See also: segmentation-merge, segmentation-copy, " + "segmentation-remove-segments\n"; + + bool binary = true; + + ParseOptions po(usage); + + SegmentationPostProcessingOptions opts; + + po.Register("binary", &binary, + "Write in binary mode " + "(only relevant if output is a wxfilename)"); + + opts.Register(&po); + + po.Read(argc, argv); + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + SegmentationPostProcessor post_processor(opts); + + std::string segmentation_in_fn = po.GetArg(1), + segmentation_out_fn = po.GetArg(2); + + bool in_is_rspecifier = + (ClassifyRspecifier(segmentation_in_fn, NULL, NULL) + != kNoRspecifier), + out_is_wspecifier = + (ClassifyWspecifier(segmentation_out_fn, NULL, NULL, NULL) + != kNoWspecifier); + + if (in_is_rspecifier != out_is_wspecifier) + KALDI_ERR << "Cannot mix regular files and archives"; + + int64 num_done = 0, num_err = 0; + + if (!in_is_rspecifier) { + Segmentation segmentation; + { + bool binary_in; + Input ki(segmentation_in_fn, &binary_in); + segmentation.Read(ki.Stream(), binary_in); + } + if (post_processor.PostProcess(&segmentation)) { + Output ko(segmentation_out_fn, binary); + Sort(&segmentation); + segmentation.Write(ko.Stream(), binary); + KALDI_LOG << "Post-processed segmentation " << segmentation_in_fn + << " and wrote " << segmentation_out_fn; + return 0; + } + KALDI_LOG << "Failed post-processing segmentation " + << segmentation_in_fn; + return 1; + } + + SegmentationWriter writer(segmentation_out_fn); + SequentialSegmentationReader reader(segmentation_in_fn); + for (; !reader.Done(); reader.Next()) { + Segmentation segmentation(reader.Value()); + const std::string &key = reader.Key(); + + if (!post_processor.PostProcess(&segmentation)) { + num_err++; + continue; + } + + Sort(&segmentation); + + writer.Write(key, segmentation); + num_done++; + } + + KALDI_LOG << "Successfully post-processed " << num_done + << " segmentations; " + << "failed with " << num_err << " segmentations"; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-remove-segments.cc b/src/segmenterbin/segmentation-remove-segments.cc new file mode 100644 index 00000000000..ce3ef2de6fd --- /dev/null +++ b/src/segmenterbin/segmentation-remove-segments.cc @@ -0,0 +1,155 @@ +// segmenterbin/segmentation-remove-segments.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Remove segments of particular class_id (e.g silence or noise) " + "or a set of class_ids.\n" + "The labels to removed can be made utterance-specific by passing " + "--remove-labels-rspecifier option.\n" + "\n" + "Usage: segmentation-remove-segments [options] " + " \n" + " or : segmentation-remove-segments [options] " + " \n" + "\n" + " e.g.: segmentation-remove-segments --remove-label=0 ark:foo.ark " + "ark:foo.speech.ark\n" + "See also: segmentation-post-process --remove-labels, " + "segmentation-post-process --max-blend-length, segmentation-copy\n"; + + bool binary = true; + + int32 remove_label = -1; + std::string remove_labels_rspecifier = ""; + + ParseOptions po(usage); + + po.Register("binary", &binary, + "Write in binary mode " + "(only relevant if output is a wxfilename)"); + po.Register("remove-label", &remove_label, "Remove segments of this label"); + po.Register("remove-labels-rspecifier", &remove_labels_rspecifier, + "Specify colon separated list of labels for each key"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string segmentation_in_fn = po.GetArg(1), + segmentation_out_fn = po.GetArg(2); + + // all these "fn"'s are either rspecifiers or filenames. + + bool in_is_rspecifier = + (ClassifyRspecifier(segmentation_in_fn, NULL, NULL) + != kNoRspecifier), + out_is_wspecifier = + (ClassifyWspecifier(segmentation_out_fn, NULL, NULL, NULL) + != kNoWspecifier); + + if (in_is_rspecifier != out_is_wspecifier) + KALDI_ERR << "Cannot mix regular files and archives"; + + int64 num_done = 0, num_missing = 0; + + if (!in_is_rspecifier) { + Segmentation segmentation; + { + bool binary_in; + Input ki(segmentation_in_fn, &binary_in); + segmentation.Read(ki.Stream(), binary_in); + } + if (!remove_labels_rspecifier.empty()) { + KALDI_ERR << "It does not make sense to specify " + << "--remove-labels-rspecifier " + << "for single segmentation"; + } + + RemoveSegments(remove_label, &segmentation); + + { + Output ko(segmentation_out_fn, binary); + segmentation.Write(ko.Stream(), binary); + } + + KALDI_LOG << "Removed segments and wrote segmentation to " + << segmentation_out_fn; + + return 0; + } else { + SegmentationWriter writer(segmentation_out_fn); + SequentialSegmentationReader reader(segmentation_in_fn); + + RandomAccessTokenReader remove_labels_reader(remove_labels_rspecifier); + + for (; !reader.Done(); reader.Next(), num_done++) { + Segmentation segmentation(reader.Value()); + std::string key = reader.Key(); + + if (!remove_labels_rspecifier.empty()) { + if (!remove_labels_reader.HasKey(key)) { + KALDI_WARN << "No remove-labels found for recording " << key; + num_missing++; + writer.Write(key, segmentation); + continue; + } + + std::vector remove_labels; + const std::string& remove_labels_str = + remove_labels_reader.Value(key); + + if (!SplitStringToIntegers(remove_labels_str, ":,", false, + &remove_labels)) { + KALDI_ERR << "Bad colon-separated list " + << remove_labels_str << " for key " << key + << " in " << remove_labels_rspecifier; + } + + remove_label = remove_labels[0]; + + RemoveSegments(remove_labels, &segmentation); + } else { + RemoveSegments(remove_label, &segmentation); + } + writer.Write(key, segmentation); + } + + KALDI_LOG << "Removed segments " << "from " << num_done + << " segmentations; " + << "remove-labels list missing for " << num_missing; + return (num_done != 0 ? 0 : 1); + } + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-split-segments.cc b/src/segmenterbin/segmentation-split-segments.cc new file mode 100644 index 00000000000..a45211b28ca --- /dev/null +++ b/src/segmenterbin/segmentation-split-segments.cc @@ -0,0 +1,194 @@ +// segmenterbin/segmentation-split-segments.cc + +// Copyright 2016 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Split long segments optionally using alignment.\n" + "The splitting works in two possible ways:\n" + " 1) If alignment is not provided: The segments are split if they\n" + " are longer than --max-segment-length frames into overlapping\n" + " segments with an overlap of --overlap-length frames.\n" + " 2) If alignment is provided: The segments are split if they\n" + " are longer than --max-segment-length frames at the region \n" + " where there is a contiguous segment of --ali-label in the \n" + " alignment that is at least --min-alignment-chunk-length frames \n" + " long.\n" + "Usage: segmentation-split-segments [options] " + " \n" + " or : segmentation-split-segments [options] " + " \n" + " e.g.: segmentation-split-segments --binary=false foo -\n" + " segmentation-split-segments ark:foo.seg ark,t:-\n" + "See also: segmentation-post-process\n"; + + bool binary = true; + int32 max_segment_length = -1; + int32 min_remainder = -1; + int32 overlap_length = 0; + int32 split_label = -1; + int32 ali_label = 0; + int32 min_alignment_chunk_length = 2; + + std::string alignments_in_fn; + + ParseOptions po(usage); + + po.Register("binary", &binary, + "Write in binary mode " + "(only relevant if output is a wxfilename)"); + po.Register("max-segment-length", &max_segment_length, + "If segment is longer than this length, split it into " + "pieces with less than these many frames. " + "Refer to the SplitSegments() code for details. " + "Used in conjunction with the option --overlap-length."); + po.Register("min-remainder", &min_remainder, + "The minimum remainder left after splitting that will " + "prevent a splitting from begin done. " + "Set to max-segment-length / 2, if not specified. " + "Applicable only when alignments is not specified."); + po.Register("overlap-length", &overlap_length, + "When splitting segments longer than max-segment-length, " + "have the pieces overlap by these many frames. " + "Refer to the SplitSegments() code for details. " + "Used in conjunction with the option --max-segment-length."); + po.Register("split-label", &split_label, + "If supplied, split only segments of these labels. " + "Otherwise, split all segments."); + po.Register("alignments", &alignments_in_fn, + "A single alignment file or archive of alignment used " + "for splitting, " + "depending on whether the input segmentation is single file " + "or archive"); + po.Register("ali-label", &ali_label, + "Split at this label of alignments"); + po.Register("min-alignment-chunk-length", &min_alignment_chunk_length, + "The minimum number of frames of alignment with ali_label " + "at which to split the segments"); + + po.Read(argc, argv); + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string segmentation_in_fn = po.GetArg(1), + segmentation_out_fn = po.GetArg(2); + + bool in_is_rspecifier = + (ClassifyRspecifier(segmentation_in_fn, NULL, NULL) + != kNoRspecifier), + out_is_wspecifier = + (ClassifyWspecifier(segmentation_out_fn, NULL, NULL, NULL) + != kNoWspecifier); + + if (in_is_rspecifier != out_is_wspecifier) + KALDI_ERR << "Cannot mix regular files and archives"; + + if (min_remainder == -1) { + min_remainder = max_segment_length / 2; + } + + int64 num_done = 0, num_err = 0; + + if (!in_is_rspecifier) { + std::vector ali; + + Segmentation segmentation; + { + bool binary_in; + Input ki(segmentation_in_fn, &binary_in); + segmentation.Read(ki.Stream(), binary_in); + } + + if (!alignments_in_fn.empty()) { + { + bool binary_in; + Input ki(alignments_in_fn, &binary_in); + ReadIntegerVector(ki.Stream(), binary_in, &ali); + } + SplitSegmentsUsingAlignment(max_segment_length, + split_label, ali, ali_label, + min_alignment_chunk_length, + &segmentation); + } else { + SplitSegments(max_segment_length, min_remainder, + overlap_length, split_label, &segmentation); + } + + Sort(&segmentation); + + { + Output ko(segmentation_out_fn, binary); + segmentation.Write(ko.Stream(), binary); + } + + KALDI_LOG << "Split segmentation " << segmentation_in_fn + << " and wrote " << segmentation_out_fn; + return 0; + } + + SegmentationWriter writer(segmentation_out_fn); + SequentialSegmentationReader reader(segmentation_in_fn); + RandomAccessInt32VectorReader ali_reader(alignments_in_fn); + + for (; !reader.Done(); reader.Next()) { + Segmentation segmentation(reader.Value()); + const std::string &key = reader.Key(); + + if (!alignments_in_fn.empty()) { + if (!ali_reader.HasKey(key)) { + KALDI_WARN << "Could not find key " << key + << " in alignments " << alignments_in_fn; + num_err++; + continue; + } + SplitSegmentsUsingAlignment(max_segment_length, split_label, + ali_reader.Value(key), ali_label, + min_alignment_chunk_length, + &segmentation); + } else { + SplitSegments(max_segment_length, min_remainder, + overlap_length, split_label, + &segmentation); + } + + Sort(&segmentation); + + writer.Write(key, segmentation); + num_done++; + } + + KALDI_LOG << "Successfully split " << num_done + << " segmentations; " + << "failed with " << num_err << " segmentations"; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-to-ali.cc b/src/segmenterbin/segmentation-to-ali.cc new file mode 100644 index 00000000000..9a618247a42 --- /dev/null +++ b/src/segmenterbin/segmentation-to-ali.cc @@ -0,0 +1,99 @@ +// segmenterbin/segmentation-to-ali.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Convert segmentation to alignment\n" + "\n" + "Usage: segmentation-to-ali [options] " + "\n" + " e.g.: segmentation-to-ali ark:1.seg ark:1.ali\n"; + + std::string lengths_rspecifier; + int32 default_label = 0, length_tolerance = 2; + + ParseOptions po(usage); + + po.Register("lengths-rspecifier", &lengths_rspecifier, + "Archive of frame lengths " + "of the utterances. Fills up any extra length with " + "the specified default-label"); + po.Register("default-label", &default_label, "Fill any extra length " + "with this label"); + po.Register("length-tolerance", &length_tolerance, "Tolerate shortage of " + "this many frames in the specified lengths file"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string segmentation_rspecifier = po.GetArg(1); + std::string alignment_wspecifier = po.GetArg(2); + + RandomAccessInt32Reader lengths_reader(lengths_rspecifier); + + SequentialSegmentationReader segmentation_reader(segmentation_rspecifier); + Int32VectorWriter alignment_writer(alignment_wspecifier); + + int32 num_err = 0, num_done = 0; + for (; !segmentation_reader.Done(); segmentation_reader.Next()) { + const Segmentation &segmentation = segmentation_reader.Value(); + const std::string &key = segmentation_reader.Key(); + + int32 length = -1; + if (lengths_rspecifier != "") { + if (!lengths_reader.HasKey(key)) { + KALDI_WARN << "Could not find length for utterance " << key; + num_err++; + continue; + } + length = lengths_reader.Value(key); + } + + std::vector ali; + if (!ConvertToAlignment(segmentation, default_label, length, + length_tolerance, &ali)) { + KALDI_WARN << "Conversion failed for utterance " << key; + num_err++; + continue; + } + alignment_writer.Write(key, ali); + num_done++; + } + + KALDI_LOG << "Converted " << num_done << " segmentations into alignments; " + << "failed with " << num_err << " segmentations"; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/src/segmenterbin/segmentation-to-rttm.cc b/src/segmenterbin/segmentation-to-rttm.cc new file mode 100644 index 00000000000..6ffd1a8b1e8 --- /dev/null +++ b/src/segmenterbin/segmentation-to-rttm.cc @@ -0,0 +1,255 @@ +// segmenterbin/segmentation-to-rttm.cc + +// Copyright 2015-16 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation.h" + +namespace kaldi { +namespace segmenter { + +/** + * This function is used to write the segmentation in RTTM format. Each class is + * treated as a "SPEAKER". If map_to_speech_and_sil is true, then the class_id 0 + * is treated as SILENCE and every other class_id as SPEECH. The argument + * start_time is used to set what the time corresponding to the 0 frame in the + * segment. Each segment is converted into the following line, + * SPEAKER 1 + * ,where + * is the file_id supplied as an argument + * is the start time of the segment in seconds + * is the length of the segment in seconds + * is the class_id stored in the segment. If map_to_speech_and_sil is + * set true then is either SPEECH or SILENCE. + * The function retunns the largest class_id that it encounters. +**/ + +int32 WriteRttm(const Segmentation &segmentation, + std::ostream &os, const std::string &file_id, + const std::string &channel, + BaseFloat frame_shift, BaseFloat start_time, + bool map_to_speech_and_sil) { + SegmentList::const_iterator it = segmentation.Begin(); + int32 largest_class = 0; + for (; it != segmentation.End(); ++it) { + os << "SPEAKER " << file_id << " " << channel << " " + << it->start_frame * frame_shift + start_time << " " + << (it->Length()) * frame_shift << " "; + if (map_to_speech_and_sil) { + switch (it->Label()) { + case 1: + os << "SPEECH "; + break; + default: + os << "SILENCE "; + break; + } + largest_class = 1; + } else { + if (it->Label() >= 0) { + os << it->Label() << " "; + if (it->Label() > largest_class) + largest_class = it->Label(); + } + } + os << "" << std::endl; + } + return largest_class; +} + +} +} + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Convert segmentation into RTTM\n" + "\n" + "Usage: segmentation-to-rttm [options] \n" + " e.g.: segmentation-to-rttm ark:1.seg -\n"; + + bool map_to_speech_and_sil = true; + + BaseFloat frame_shift = 0.01; + std::string segments_rxfilename; + std::string reco2file_and_channel_rxfilename; + ParseOptions po(usage); + + po.Register("frame-shift", &frame_shift, "Frame shift in seconds"); + po.Register("segments", &segments_rxfilename, "Segments file"); + po.Register("reco2file-and-channel", &reco2file_and_channel_rxfilename, "reco2file_and_channel file"); + po.Register("map-to-speech-and-sil", &map_to_speech_and_sil, "Map all classes to SPEECH and SILENCE"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + unordered_map utt2file; + unordered_map utt2start_time; + + if (!segments_rxfilename.empty()) { + Input ki(segments_rxfilename); // no binary argment: never binary. + int32 i = 0; + std::string line; + /* read each line from segments file */ + while (std::getline(ki.Stream(), line)) { + std::vector split_line; + // Split the line by space or tab and check the number of fields in each + // line. There must be 4 fields--segment name , reacording wav file name, + // start time, end time; 5th field (channel info) is optional. + SplitStringToVector(line, " \t\r", true, &split_line); + if (split_line.size() != 4 && split_line.size() != 5) { + KALDI_WARN << "Invalid line in segments file: " << line; + continue; + } + std::string segment = split_line[0], + utterance = split_line[1], + start_str = split_line[2], + end_str = split_line[3]; + + // Convert the start time and endtime to real from string. Segment is + // ignored if start or end time cannot be converted to real. + double start, end; + if (!ConvertStringToReal(start_str, &start)) { + KALDI_WARN << "Invalid line in segments file [bad start]: " << line; + continue; + } + if (!ConvertStringToReal(end_str, &end)) { + KALDI_WARN << "Invalid line in segments file [bad end]: " << line; + continue; + } + // start time must not be negative; start time must not be greater than + // end time, except if end time is -1 + if (start < 0 || end <= 0 || start >= end) { + KALDI_WARN << "Invalid line in segments file [empty or invalid segment]: " + << line; + continue; + } + int32 channel = -1; // means channel info is unspecified. + // if each line has 5 elements then 5th element must be channel identifier + if(split_line.size() == 5) { + if (!ConvertStringToInteger(split_line[4], &channel) || channel < 0) { + KALDI_WARN << "Invalid line in segments file [bad channel]: " << line; + continue; + } + } + + utt2file.insert(std::make_pair(segment, utterance)); + utt2start_time.insert(std::make_pair(segment, start)); + i++; + } + KALDI_LOG << "Read " << i << " lines from " << segments_rxfilename; + } + + unordered_map , StringHasher> reco2file_and_channel; + + if (!reco2file_and_channel_rxfilename.empty()) { + Input ki(reco2file_and_channel_rxfilename); // no binary argment: never binary. + + int32 i = 0; + std::string line; + /* read each line from reco2file_and_channel file */ + while (std::getline(ki.Stream(), line)) { + std::vector split_line; + SplitStringToVector(line, " \t\r", true, &split_line); + if (split_line.size() != 3) { + KALDI_WARN << "Invalid line in reco2file_and_channel file: " << line; + continue; + } + + const std::string &reco_id = split_line[0]; + const std::string &file_id = split_line[1]; + const std::string &channel = split_line[2]; + + reco2file_and_channel.insert(std::make_pair(reco_id, std::make_pair(file_id, channel))); + i++; + } + + KALDI_LOG << "Read " << i << " lines from " << reco2file_and_channel_rxfilename; + } + + unordered_set seen_files; + + std::string segmentation_rspecifier = po.GetArg(1), + rttm_out_wxfilename = po.GetArg(2); + + int64 num_done = 0, num_err = 0; + + Output ko(rttm_out_wxfilename, false); + SequentialSegmentationReader reader(segmentation_rspecifier); + for (; !reader.Done(); reader.Next(), num_done++) { + Segmentation segmentation(reader.Value()); + const std::string &key = reader.Key(); + + std::string reco_id = key; + BaseFloat start_time = 0.0; + if (!segments_rxfilename.empty()) { + if (utt2file.count(key) == 0 || utt2start_time.count(key) == 0) + KALDI_ERR << "Could not find key " << key << " in segments " + << segments_rxfilename; + KALDI_ASSERT(utt2file.count(key) > 0 && utt2start_time.count(key) > 0); + reco_id = utt2file[key]; + start_time = utt2start_time[key]; + } + + std::string file_id, channel; + if (!reco2file_and_channel_rxfilename.empty()) { + if (reco2file_and_channel.count(reco_id) == 0) + KALDI_ERR << "Could not find recording " << reco_id + << " in " << reco2file_and_channel_rxfilename; + file_id = reco2file_and_channel[reco_id].first; + channel = reco2file_and_channel[reco_id].second; + } else { + file_id = reco_id; + channel = "1"; + } + + int32 largest_class = WriteRttm(segmentation, ko.Stream(), file_id, channel, frame_shift, start_time, map_to_speech_and_sil); + + if (map_to_speech_and_sil) { + if (seen_files.count(reco_id) == 0) { + ko.Stream() << "SPKR-INFO " << file_id << " " << channel << " unknown SILENCE \n"; + ko.Stream() << "SPKR-INFO " << file_id << " " << channel << " unknown SPEECH \n"; + seen_files.insert(reco_id); + } + } else { + for (int32 i = 0; i < largest_class; i++) { + ko.Stream() << "SPKR-INFO " << file_id << " " << channel << " unknown " << i << " \n"; + } + } + } + + KALDI_LOG << "Copied " << num_done << " segmentation; failed with " + << num_err << " segmentations"; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + + + diff --git a/src/segmenterbin/segmentation-to-segments.cc b/src/segmenterbin/segmentation-to-segments.cc new file mode 100644 index 00000000000..c57aa827ead --- /dev/null +++ b/src/segmenterbin/segmentation-to-segments.cc @@ -0,0 +1,133 @@ +// segmenterbin/segmentation-to-segments.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Convert segmentation to a segments file and utt2spk file." + "Assumes that the input segmentations are indexed by reco-id and " + "treats speakers from different recording as distinct speakers." + "\n" + "Usage: segmentation-to-segments [options] " + " \n" + " e.g.: segmentation-to-segments ark:foo.seg ark,t:utt2spk segments\n"; + + BaseFloat frame_shift = 0.01, frame_overlap = 0.015; + bool single_speaker = false, per_utt_speaker = false; + ParseOptions po(usage); + + po.Register("frame-shift", &frame_shift, "Frame shift in seconds"); + po.Register("frame-overlap", &frame_overlap, "Frame overlap in seconds"); + po.Register("single-speaker", &single_speaker, "If this is set true, " + "then all the utterances in a recording are mapped to the " + "same speaker"); + po.Register("per-utt-speaker", &per_utt_speaker, + "If this is set true, then each utterance is mapped to distint " + "speaker with spkr_id = utt_id"); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + if (frame_shift < 0.001 || frame_shift > 1) { + KALDI_ERR << "Invalid frame-shift " << frame_shift << "; must be in " + << "the range [0.001,1]"; + } + + if (frame_overlap < 0 || frame_overlap > 1) { + KALDI_ERR << "Invalid frame-overlap " << frame_overlap << "; must be in " + << "the range [0,1]"; + } + + std::string segmentation_rspecifier = po.GetArg(1), + utt2spk_wspecifier = po.GetArg(2), + segments_wxfilename = po.GetArg(3); + + SequentialSegmentationReader reader(segmentation_rspecifier); + TokenWriter utt2spk_writer(utt2spk_wspecifier); + + Output ko(segments_wxfilename, false); + + int32 num_done = 0; + int64 num_segments = 0; + + for (; !reader.Done(); reader.Next(), num_done++) { + const Segmentation &segmentation = reader.Value(); + const std::string &key = reader.Key(); + + for (SegmentList::const_iterator it = segmentation.Begin(); + it != segmentation.End(); ++it) { + BaseFloat start_time = it->start_frame * frame_shift; + BaseFloat end_time = (it->end_frame + 1) * frame_shift + frame_overlap; + + std::ostringstream oss; + + if (!single_speaker) { + oss << key << "-" << it->Label(); + } else { + oss << key; + } + + std::string spk = oss.str(); + + oss << "-"; + oss << std::setw(6) << std::setfill('0') << it->start_frame; + oss << std::setw(1) << "-"; + oss << std::setw(6) << std::setfill('0') + << it->end_frame + 1 + + static_cast(frame_overlap / frame_shift); + + std::string utt = oss.str(); + + if (per_utt_speaker) + utt2spk_writer.Write(utt, utt); + else + utt2spk_writer.Write(utt, spk); + + ko.Stream() << utt << " " << key << " "; + ko.Stream() << std::fixed << std::setprecision(3) << start_time << " "; + ko.Stream() << std::setprecision(3) << end_time << "\n"; + + num_segments++; + } + } + + KALDI_LOG << "Converted " << num_done << " segmentations to segments; " + << "wrote " << num_segments << " segments"; + + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + diff --git a/tools/config/common_path.sh b/tools/config/common_path.sh index 3e2ea50d685..36b5350dd8e 100644 --- a/tools/config/common_path.sh +++ b/tools/config/common_path.sh @@ -20,4 +20,5 @@ ${KALDI_ROOT}/src/online2bin:\ ${KALDI_ROOT}/src/onlinebin:\ ${KALDI_ROOT}/src/sgmm2bin:\ ${KALDI_ROOT}/src/sgmmbin:\ +${KALDI_ROOT}/src/segmenterbin:\ $PATH From 8c11f77f8a2af11c69e142992b6fa80b3dbc845d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Tue, 29 Nov 2016 23:08:33 -0500 Subject: [PATCH 058/530] asr_diarization: Adding do_corruption_data_dir.sh for corruption with MUSAN noise --- .../segmentation/do_corruption_data_dir.sh | 140 ++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100755 egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh new file mode 100755 index 00000000000..36bf4c93306 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh @@ -0,0 +1,140 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -u +set -o pipefail + +. path.sh + +stage=0 +corruption_stage=-10 +corrupt_only=false + +# Data options +data_dir=data/train_si284 # Expecting whole data directory. +speed_perturb=true +num_data_reps=5 # Number of corrupted versions +snrs="20:10:15:5:0:-5" +foreground_snrs="20:10:15:5:0:-5" +background_snrs="20:10:15:5:0:-5" +base_rirs=simulated + +# Parallel options +reco_nj=40 +cmd=queue.pl + +# Options for feature extraction +mfcc_config=conf/mfcc_hires_bp_vh.conf +feat_suffix=hires_bp_vh + +reco_vad_dir= # Output of prepare_unsad_data.sh. + # If provided, the speech labels and deriv weights will be + # copied into the output data directory. + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + exit 1 +fi + +data_id=`basename ${data_dir}` + +rvb_opts=() +if [ "$base_rirs" == "simulated" ]; then + # This is the config for the system using simulated RIRs and point-source noises + rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") + rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") + rvb_opts+=(--noise-set-parameters RIRS_NOISES/pointsource_noises/noise_list) +else + # This is the config for the JHU ASpIRE submission system + rvb_opts+=(--rir-set-parameters "1.0, RIRS_NOISES/real_rirs_isotropic_noises/rir_list") + rvb_opts+=(--noise-set-parameters RIRS_NOISES/real_rirs_isotropic_noises/noise_list) +fi + +corrupted_data_id=${data_id}_corrupted + +if [ $stage -le 1 ]; then + python steps/data/reverberate_data_dir.py \ + "${rvb_opts[@]}" \ + --prefix="rev" \ + --foreground-snrs=$foreground_snrs \ + --background-snrs=$background_snrs \ + --speech-rvb-probability=1 \ + --pointsource-noise-addition-probability=1 \ + --isotropic-noise-addition-probability=1 \ + --num-replications=$num_data_reps \ + --max-noises-per-minute=1 \ + data/${data_id} data/${corrupted_data_id} +fi + +corrupted_data_dir=data/${corrupted_data_id} + +if $speed_perturb; then + if [ $stage -le 2 ]; then + ## Assuming whole data directories + for x in $clean_data_dir $corrupted_data_dir $noise_data_dir; do + cp $x/reco2dur $x/utt2dur + utils/data/perturb_data_dir_speed_3way.sh $x ${x}_sp + done + fi + + corrupted_data_dir=${corrupted_data_dir}_sp + corrupted_data_id=${corrupted_data_id}_sp + + if [ $stage -le 3 ]; then + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 \ + ${corrupted_data_dir} + fi +fi + +if $corrupt_only; then + echo "$0: Got corrupted data directory in ${corrupted_data_dir}" + exit 0 +fi + +mfccdir=`basename $mfcc_config` +mfccdir=${mfccdir%%.conf} + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage +fi + +if [ $stage -le 4 ]; then + if [ ! -z $feat_suffix ]; then + utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + fi + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$cmd" --nj $reco_nj \ + $corrupted_data_dir exp/make_${mfccdir}/${corrupted_data_id} $mfccdir + steps/compute_cmvn_stats.sh --fake \ + $corrupted_data_dir exp/make_${mfccdir}/${corrupted_data_id} $mfccdir +else + if [ ! -z $feat_suffix ]; then + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + fi +fi + +if [ $stage -le 8 ]; then + if [ ! -z "$reco_vad_dir" ]; then + if [ ! -f $reco_vad_dir/speech_feat.scp ]; then + echo "$0: Could not find file $reco_vad_dir/speech_feat.scp" + exit 1 + fi + + cat $reco_vad_dir/speech_feat.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ + sort -k1,1 > ${corrupted_data_dir}/speech_feat.scp + + cat $reco_vad_dir/deriv_weights.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ + sort -k1,1 > ${corrupted_data_dir}/deriv_weights.scp + fi +fi + +exit 0 From 5fccac18df0d48db0d5611f8edbf7708f35ac67f Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 30 Nov 2016 00:38:56 -0500 Subject: [PATCH 059/530] asr_diarization: Add do_corruption_data_dir_music.sh for corruption with MUSAN music --- .../do_corruption_data_dir_music.sh | 203 ++++++++++++++++++ .../s5/local/segmentation/make_musan_music.py | 69 ++++++ .../segmentation/train_stats_sad_music.sh | 172 +++++++++++++++ 3 files changed, 444 insertions(+) create mode 100755 egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh create mode 100755 egs/aspire/s5/local/segmentation/make_musan_music.py create mode 100644 egs/aspire/s5/local/segmentation/train_stats_sad_music.sh diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh new file mode 100755 index 00000000000..214cba347da --- /dev/null +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh @@ -0,0 +1,203 @@ +#!/bin/bash +set -e +set -u +set -o pipefail + +. path.sh +. cmd.sh + +num_data_reps=5 +data_dir=data/train_si284 + +nj=40 +reco_nj=40 + +stage=0 +corruption_stage=-10 + +pad_silence=false + +mfcc_config=conf/mfcc_hires_bp_vh.conf +feat_suffix=hires_bp_vh +mfcc_irm_config=conf/mfcc_hires_bp.conf + +dry_run=false +corrupt_only=false +speed_perturb=true + +reco_vad_dir= + +max_jobs_run=20 + +foreground_snrs="5:2:1:0:-2:-5:-10:-20" +background_snrs="5:2:1:0:-2:-5:-10:-20" + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + exit 1 +fi + +data_id=`basename ${data_dir}` + +rvb_opts=() +# This is the config for the system using simulated RIRs and point-source noises +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") +rvb_opts+=(--noise-set-parameters RIRS_NOISES/music/music_list) + +music_utt2num_frames=RIRS_NOISES/music/split_utt2num_frames + +corrupted_data_id=${data_id}_music_corrupted +orig_corrupted_data_id=$corrupted_data_id + +if [ $stage -le 1 ]; then + python steps/data/reverberate_data_dir.py \ + "${rvb_opts[@]}" \ + --prefix="music" \ + --foreground-snrs=$foreground_snrs \ + --background-snrs=$background_snrs \ + --speech-rvb-probability=1 \ + --pointsource-noise-addition-probability=1 \ + --isotropic-noise-addition-probability=1 \ + --num-replications=$num_data_reps \ + --max-noises-per-minute=5 \ + data/${data_id} data/${corrupted_data_id} +fi + +if $dry_run; then + exit 0 +fi + +corrupted_data_dir=data/${corrupted_data_id} +orig_corrupted_data_dir=$corrupted_data_dir + +if $speed_perturb; then + if [ $stage -le 2 ]; then + ## Assuming whole data directories + for x in $corrupted_data_dir; do + cp $x/reco2dur $x/utt2dur + utils/data/perturb_data_dir_speed_3way.sh $x ${x}_sp + done + fi + + corrupted_data_dir=${corrupted_data_dir}_sp + corrupted_data_id=${corrupted_data_id}_sp + + if [ $stage -le 3 ]; then + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 \ + ${corrupted_data_dir} + fi +fi + +if $corrupt_only; then + echo "$0: Got corrupted data directory in ${corrupted_data_dir}" + exit 0 +fi + +mfccdir=`basename $mfcc_config` +mfccdir=${mfccdir%%.conf} + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage +fi + +if [ $stage -le 4 ]; then + if [ ! -z $feat_suffix ]; then + utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + fi + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$train_cmd" --nj $reco_nj \ + $corrupted_data_dir exp/make_${mfccdir}/${corrupted_data_id} $mfccdir + steps/compute_cmvn_stats.sh --fake \ + $corrupted_data_dir exp/make_${mfccdir}/${corrupted_data_id} $mfccdir +else + if [ ! -z $feat_suffix ]; then + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + fi +fi + +if [ $stage -le 8 ]; then + if [ ! -z "$reco_vad_dir" ]; then + if [ ! -f $reco_vad_dir/speech_feat.scp ]; then + echo "$0: Could not find file $reco_vad_dir/speech_feat.scp" + exit 1 + fi + + cat $reco_vad_dir/speech_feat.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "music" | \ + sort -k1,1 > ${corrupted_data_dir}/speech_feat.scp + + cat $reco_vad_dir/deriv_weights.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "music" | \ + sort -k1,1 > ${corrupted_data_dir}/deriv_weights.scp + fi +fi + +# music_dir is without speed perturbation +music_dir=exp/make_music_labels/${orig_corrupted_data_id} +music_data_dir=$music_dir/music_data + +mkdir -p $music_data_dir + +if [ $stage -le 10 ]; then + utils/data/get_utt2num_frames.sh $corrupted_data_dir + utils/split_data.sh --per-reco ${orig_corrupted_data_dir} $reco_nj + + cp $orig_corrupted_data_dir/wav.scp $music_data_dir + + # Combine the VAD from the base recording and the VAD from the overlapping segments + # to create per-frame labels of the number of overlapping speech segments + # Unreliable segments are regions where no VAD labels were available for the + # overlapping segments. These can be later removed by setting deriv weights to 0. + $train_cmd JOB=1:$reco_nj $music_dir/log/get_music_seg.JOB.log \ + segmentation-init-from-additive-signals-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + --additive-signals-segmentation-rspecifier="ark:segmentation-init-from-lengths ark:$music_utt2num_frames ark:- |" \ + "ark:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${reco_nj}reco/JOB/utt2spk $corrupted_data_dir/utt2num_frames | segmentation-init-from-lengths --label=1 ark:- ark:- | segmentation-post-process --remove-labels=1 ark:- ark:- |" \ + ark,t:$orig_corrupted_data_dir/additive_signals_info.txt \ + ark:- \| \ + segmentation-post-process --merge-adjacent-segments ark:- \ + ark:- \| \ + segmentation-to-segments ark:- ark:$music_data_dir/utt2spk.JOB \ + $music_data_dir/segments.JOB + + for n in `seq $reco_nj`; do cat $music_data_dir/utt2spk.$n; done > $music_data_dir/utt2spk + for n in `seq $reco_nj`; do cat $music_data_dir/segments.$n; done > $music_data_dir/segments + + utils/fix_data_dir.sh $music_data_dir + + if $speed_perturb; then + utils/data/perturb_data_dir_speed_3way.sh $music_data_dir ${music_data_dir}_sp + fi +fi + +if $speed_perturb; then + music_data_dir=${music_data_dir}_sp +fi + +label_dir=music_labels + +mkdir -p $label_dir +label_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $label_dir ${PWD}` + +if [ $stage -le 11 ]; then + utils/split_data.sh --per-reco ${music_data_dir} $reco_nj + + $train_cmd JOB=1:$reco_nj $music_dir/log/get_music_labels.JOB.log \ + utils/data/get_reco2utt.sh ${music_data_dir}/split${reco_nj}reco/JOB '&&' \ + segmentation-init-from-segments --shift-to-zero=false \ + ${music_data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- ark,t:${music_data_dir}/split${reco_nj}reco/JOB/reco2utt \ + ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ + ark,scp:$label_dir/music_labels_${corrupted_data_id}.JOB.ark,$label_dir/music_labels_${corrupted_data_id}.JOB.scp +fi + +for n in `seq $reco_nj`; do + cat $label_dir/music_labels_${corrupted_data_id}.$n.scp +done > ${corrupted_data_dir}/music_labels.scp + +exit 0 diff --git a/egs/aspire/s5/local/segmentation/make_musan_music.py b/egs/aspire/s5/local/segmentation/make_musan_music.py new file mode 100755 index 00000000000..5d13078de63 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/make_musan_music.py @@ -0,0 +1,69 @@ +#! /usr/bin/env python + +from __future__ import print_function +import argparse +import os + + +def _get_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("--use-vocals", type=str, default="false", + choices=["true", "false"], + help="If true, also add music with vocals in the " + "output music-set-parameters") + parser.add_argument("root_dir", type=str, + help="Root directory of MUSAN corpus") + parser.add_argument("music_list", type=argparse.FileType('w'), + help="Convert music list into noise-set-paramters " + "for steps/data/reverberate_data_dir.py") + + args = parser.parse_args() + + args.use_vocals = True if args.use_vocals == "true" else False + return args + + +def read_vocals(annotations): + vocals = {} + for line in open(annotations): + parts = line.strip().split() + if parts[2] == "Y": + vocals[parts[0]] = True + return vocals + + +def write_music(utt, file_path, music_list): + print ('{utt} {file_path}'.format( + utt=utt, file_path=file_path), file=music_list) + + +def prepare_music_set(root_dir, use_vocals, music_list): + vocals = {} + music_dir = os.path.join(root_dir, "music") + for root, dirs, files in os.walk(music_dir): + if os.path.exists(os.path.join(root, "ANNOTATIONS")): + vocals = read_vocals(os.path.join(root, "ANNOTATIONS")) + + for f in files: + file_path = os.path.join(root, f) + if f.endswith(".wav"): + utt = str(f).replace(".wav", "") + if not use_vocals and utt in vocals: + continue + write_music(utt, file_path, music_list) + music_list.close() + + +def main(): + args = _get_args() + + try: + prepare_music_set(args.root_dir, args.use_vocals, + args.music_list) + finally: + args.music_list.close() + + +if __name__ == '__main__': + main() diff --git a/egs/aspire/s5/local/segmentation/train_stats_sad_music.sh b/egs/aspire/s5/local/segmentation/train_stats_sad_music.sh new file mode 100644 index 00000000000..8242b83c747 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/train_stats_sad_music.sh @@ -0,0 +1,172 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +splice_indexes="-3,-2,-1,0,1,2,3 -6,0,mean+count(-99:3:9:99) -9,0,3 0" +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_azteec_whole_sp_corrupted_hires + +speech_feat_scp= +music_labels_scp= + +deriv_weights_scp= + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_hidden_layers=`echo $splice_indexes | perl -ane 'print scalar @F'` || exit 1 +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix}_n${num_hidden_layers} + +if ! cuda-compiled; then + cat < Date: Wed, 30 Nov 2016 16:29:14 -0500 Subject: [PATCH 060/530] asr_diarization: Recipe for music-id on broadcast news --- .../v1/local/run_nnet3_music_id.sh | 217 ++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 egs/bn_music_speech/v1/local/run_nnet3_music_id.sh diff --git a/egs/bn_music_speech/v1/local/run_nnet3_music_id.sh b/egs/bn_music_speech/v1/local/run_nnet3_music_id.sh new file mode 100644 index 00000000000..d96acdabaaa --- /dev/null +++ b/egs/bn_music_speech/v1/local/run_nnet3_music_id.sh @@ -0,0 +1,217 @@ +#!/bin/bash + +set -e +set -o pipefail +set -u + +. path.sh +. cmd.sh + +feat_affix=bp_vh +affix= +reco_nj=32 + +stage=-1 + +# SAD network config +iter=final +extra_left_context=100 # Set to some large value +extra_right_context=20 + + +# Configs +frame_subsampling_factor=1 + +min_silence_duration=3 # minimum number of frames for silence +min_speech_duration=3 # minimum number of frames for speech +min_music_duration=3 # minimum number of frames for music +music_transition_probability=0.1 +sil_transition_probability=0.1 +speech_transition_probability=0.1 +sil_prior=0.3 +speech_prior=0.4 +music_prior=0.3 + +# Decoding options +acwt=1 +beam=10 +max_active=7000 + +mfcc_config=conf/mfcc_hires_bp.conf + +echo $* + +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: $0 " + echo " e.g.: $0 data/bn exp/nnet3_sad_snr/tdnn_j_n4 exp/dnn_music_id" + exit 1 +fi + +# Set to true if the test data has > 8kHz sampling frequency. +do_downsampling=true + +data_dir=$1 +sad_nnet_dir=$2 +dir=$3 + +data_id=`basename $data_dir` + +export PATH="$KALDI_ROOT/tools/sph2pipe_v2.5/:$PATH" +[ ! -z `which sph2pipe` ] + +for f in $sad_nnet_dir/$iter.raw $sad_nnet_dir/post_output-speech.vec $sad_nnet_dir/post_output-music.vec; do + if [ ! -f $f ]; then + echo "$0: Could not find $f. See the local/segmentation/run_train_sad.sh" + exit 1 + fi +done + +mkdir -p $dir + +new_data_dir=$dir/${data_id} +if [ $stage -le 0 ]; then + utils/data/convert_data_dir_to_whole.sh $data_dir ${new_data_dir}_whole + + freq=`cat $mfcc_config | perl -pe 's/\s*#.*//g' | grep "sample-frequency=" | awk -F'=' '{if (NF == 0) print 16000; else print $2}'` + sox=`which sox` + + cat $data_dir/wav.scp | python -c "import sys +for line in sys.stdin.readlines(): + splits = line.strip().split() + if splits[-1] == '|': + out_line = line.strip() + ' $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |' + else: + out_line = 'cat {0} {1} | $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |'.format(splits[0], ' '.join(splits[1:])) + print (out_line)" > ${new_data_dir}_whole/wav.scp + + utils/copy_data_dir.sh ${new_data_dir}_whole ${new_data_dir}_whole_bp_hires +fi + +test_data_dir=${new_data_dir}_whole_bp_hires + +if [ $stage -le 1 ]; then + steps/make_mfcc.sh --mfcc-config $mfcc_config --nj $reco_nj --cmd "$train_cmd" \ + ${new_data_dir}_whole_bp_hires exp/make_hires/${data_id}_whole_bp mfcc_hires + steps/compute_cmvn_stats.sh ${new_data_dir}_whole_bp_hires exp/make_hires/${data_id}_whole_bp mfcc_hires +fi + +if [ $stage -le 2 ]; then + output_name=output-speech + post_vec=$sad_nnet_dir/post_${output_name}.vec + steps/nnet3/compute_output.sh --nj $reco_nj --cmd "$train_cmd" \ + --post-vec "$post_vec" \ + --iter $iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk 150 \ + --output-name $output_name \ + --frame-subsampling-factor $frame_subsampling_factor \ + --get-raw-nnet-from-am false ${test_data_dir} $sad_nnet_dir $dir/sad_${data_id}_whole_bp +fi + +if [ $stage -le 3 ]; then + output_name=output-music + post_vec=$sad_nnet_dir/post_${output_name}.vec + steps/nnet3/compute_output.sh --nj $reco_nj --cmd "$train_cmd" \ + --post-vec "$post_vec" \ + --iter $iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk 150 \ + --output-name $output_name \ + --frame-subsampling-factor $frame_subsampling_factor \ + --get-raw-nnet-from-am false ${test_data_dir} $sad_nnet_dir $dir/music_${data_id}_whole_bp +fi + +if [ $stage -le 4 ]; then + $train_cmd JOB=1:$reco_nj $dir/get_average_likes.JOB.log \ + paste-feats \ + "ark:gunzip -c $dir/sad_${data_id}_whole_bp/log_likes.JOB.gz | extract-feature-segments ark:- 'utils/filter_scp.pl -f 2 ${test_data_dir}/split$reco_nj/JOB/utt2spk $data_dir/segments |' ark:- |" \ + "ark:gunzip -c $dir/music_${data_id}_whole_bp/log_likes.JOB.gz | select-feats 1 ark:- ark:- | extract-feature-segments ark:- 'utils/filter_scp.pl -f 2 ${test_data_dir}/split$reco_nj/JOB/utt2spk $data_dir/segments |' ark:- |" \ + ark:- \| \ + matrix-sum-rows --do-average ark:- ark,t:$dir/average_likes.JOB.ark + + for n in `seq $reco_nj`; do + cat $dir/average_likes.$n.ark + done | awk '{print $1" "( exp($3) + exp($5) + 0.01) / (exp($4) + 0.01)}' | \ + local/print_scores.py /dev/stdin | compute-eer - +fi + +lang=$dir/lang + +if [ $stage -le 5 ]; then + mkdir -p $lang + + # Create a lang directory with phones.txt and topo with + # silence, music and speech phones. + steps/segmentation/internal/prepare_sad_lang.py \ + --phone-transition-parameters="--phone-list=1 --min-duration=$min_silence_duration --end-transition-probability=$sil_transition_probability" \ + --phone-transition-parameters="--phone-list=2 --min-duration=$min_speech_duration --end-transition-probability=$speech_transition_probability" \ + --phone-transition-parameters="--phone-list=3 --min-duration=$min_music_duration --end-transition-probability=$music_transition_probability" \ + $lang + + cp $lang/phones.txt $lang/words.txt +fi + +feat_dim=2 # dummy. We don't need this. +if [ $stage -le 6 ]; then + $train_cmd $dir/log/create_transition_model.log gmm-init-mono \ + $lang/topo $feat_dim - $dir/tree \| \ + copy-transition-model --binary=false - $dir/trans.mdl || exit 1 +fi + +# Make unigram G.fst +if [ $stage -le 7 ]; then + cat > $lang/word2prior < $lang/G.fst +fi + +graph_dir=$dir/graph_test + +if [ $stage -le 8 ]; then + $train_cmd $dir/log/make_vad_graph.log \ + steps/segmentation/internal/make_sad_graph.sh --iter trans \ + $lang $dir $dir/graph_test || exit 1 +fi + +seg_dir=$dir/segmentation_${data_id}_whole_bp +mkdir -p $seg_dir + +if [ $stage -le 9 ]; then + decoder_opts+=(--acoustic-scale=$acwt --beam=$beam --max-active=$max_active) + $train_cmd JOB=1:$reco_nj $dir/decode.JOB.log \ + paste-feats \ + "ark:gunzip -c $dir/sad_${data_id}_whole_bp/log_likes.JOB.gz | extract-feature-segments ark:- 'utils/filter_scp.pl -f 2 ${test_data_dir}/split$reco_nj/JOB/utt2spk $data_dir/segments |' ark:- |" \ + "ark:gunzip -c $dir/music_${data_id}_whole_bp/log_likes.JOB.gz | select-feats 1 ark:- ark:- | extract-feature-segments ark:- 'utils/filter_scp.pl -f 2 ${test_data_dir}/split$reco_nj/JOB/utt2spk $data_dir/segments |' ark:- |" \ + ark:- \| decode-faster-mapped ${decoder_opts[@]} \ + $dir/trans.mdl $graph_dir/HCLG.fst ark:- \ + ark:/dev/null ark:- \| \ + ali-to-phones --per-frame $dir/trans.mdl ark:- \ + "ark:|gzip -c > $seg_dir/ali.JOB.gz" +fi + +include_silence=true +if [ $stage -le 10 ]; then + $train_cmd JOB=1:$reco_nj $dir/log/get_class_id.JOB.log \ + ali-to-post "ark:gunzip -c $seg_dir/ali.JOB.gz |" ark:- \| \ + post-to-feats --post-dim=4 ark:- ark:- \| \ + matrix-sum-rows --do-average ark:- ark,t:- \| \ + sid/vector_to_music_labels.pl ${include_silence:+--include-silence-in-music} '>' $dir/ratio.JOB + + for n in `seq $reco_nj`; do + cat $dir/ratio.$n + done > $dir/ratio + + cat $dir/ratio | local/print_scores.py /dev/stdin | compute-eer - +fi + +# LOG (compute-eer:main():compute-eer.cc:136) Equal error rate is 0.860585%, at threshold 1.99361 From 82bfc5a60512a064787e494a5ab8fe1a173caf68 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 25 Nov 2016 01:06:26 -0500 Subject: [PATCH 061/530] asr_diarization: Utilities invert_vector.pl and vector_get_max.pl --- .../s5/steps/segmentation/invert_vector.pl | 20 ++++++++++++++ .../s5/steps/segmentation/vector_get_max.pl | 26 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100755 egs/wsj/s5/steps/segmentation/invert_vector.pl create mode 100644 egs/wsj/s5/steps/segmentation/vector_get_max.pl diff --git a/egs/wsj/s5/steps/segmentation/invert_vector.pl b/egs/wsj/s5/steps/segmentation/invert_vector.pl new file mode 100755 index 00000000000..c16243a0b93 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/invert_vector.pl @@ -0,0 +1,20 @@ +#! /usr/bin/perl +use strict; +use warnings; + +while () { + chomp; + my @F = split; + my $utt = shift @F; + shift @F; + + print "$utt [ "; + for (my $i = 0; $i < $#F; $i++) { + if ($F[$i] == 0) { + print "1 "; + } else { + print 1.0/$F[$i] . " "; + } + } + print "]\n"; +} diff --git a/egs/wsj/s5/steps/segmentation/vector_get_max.pl b/egs/wsj/s5/steps/segmentation/vector_get_max.pl new file mode 100644 index 00000000000..abb8ea977a2 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/vector_get_max.pl @@ -0,0 +1,26 @@ +#! /usr/bin/perl + +use warnings; +use strict; + +while (<>) { + chomp; + if (m/^\S+\s+\[.+\]\s*$/) { + my @F = split; + my $utt = shift @F; + shift; + + my $max_id = 0; + my $max = $F[0]; + for (my $i = 1; $i < $#F; $i++) { + if ($F[$i] > $max) { + $max_id = $i; + $max = $F[$i]; + } + } + + print "$utt $max_id\n"; + } else { + die "Invalid line $_\n"; + } +} From 709ac923ddc07ff2ca568fafa7e62a6a50f5ea8c Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 30 Nov 2016 17:03:12 -0500 Subject: [PATCH 062/530] asr_diarization: Recipe for segmentation on AMI SDM dev set --- .../s5b/local/prepare_parallel_train_data.sh | 24 ++-- .../segmentation/run_segmentation_ami.sh | 128 ++++++++++++++++++ 2 files changed, 140 insertions(+), 12 deletions(-) create mode 100755 egs/aspire/s5/local/segmentation/run_segmentation_ami.sh diff --git a/egs/ami/s5b/local/prepare_parallel_train_data.sh b/egs/ami/s5b/local/prepare_parallel_train_data.sh index b049c906c3b..b551bacfb92 100755 --- a/egs/ami/s5b/local/prepare_parallel_train_data.sh +++ b/egs/ami/s5b/local/prepare_parallel_train_data.sh @@ -5,6 +5,10 @@ # but the wav data is copied from data/ihm. This is a little tricky because the # utterance ids are different between the different mics +train_set=train + +. utils/parse_options.sh + if [ $# != 1 ]; then echo "Usage: $0 [sdm1|mdm8]" @@ -18,12 +22,10 @@ if [ $mic == "ihm" ]; then exit 1; fi -train_set=train - . cmd.sh . ./path.sh -for f in data/ihm/train/utt2spk data/$mic/train/utt2spk; do +for f in data/ihm/${train_set}/utt2spk data/$mic/${train_set}/utt2spk; do if [ ! -f $f ]; then echo "$0: expected file $f to exist" exit 1 @@ -32,12 +34,12 @@ done set -e -o pipefail -mkdir -p data/$mic/train_ihmdata +mkdir -p data/$mic/${train_set}_ihmdata # the utterance-ids and speaker ids will be from the SDM or MDM data -cp data/$mic/train/{spk2utt,text,utt2spk} data/$mic/train_ihmdata/ +cp data/$mic/${train_set}/{spk2utt,text,utt2spk} data/$mic/${train_set}_ihmdata/ # the recording-ids will be from the IHM data. -cp data/ihm/train/{wav.scp,reco2file_and_channel} data/$mic/train_ihmdata/ +cp data/ihm/${train_set}/{wav.scp,reco2file_and_channel} data/$mic/${train_set}_ihmdata/ # map sdm/mdm segments to the ihm segments @@ -47,19 +49,17 @@ mic_base_upcase=$(echo $mic | sed 's/[0-9]//g' | tr 'a-z' 'A-Z') # It has lines like: # AMI_EN2001a_H02_FEO065_0021133_0021442 AMI_EN2001a_SDM_FEO065_0021133_0021442 -tmpdir=data/$mic/train_ihmdata/ +tmpdir=data/$mic/${train_set}_ihmdata/ -awk '{print $1, $1}' $tmpdir/ihmutt2utt # Map the 1st field of the segments file from the ihm data (the 1st field being # the utterance-id) to the corresponding SDM or MDM utterance-id. The other # fields remain the same (e.g. we want the recording-ids from the IHM data). -utils/apply_map.pl -f 1 $tmpdir/ihmutt2utt data/$mic/train_ihmdata/segments - -utils/fix_data_dir.sh data/$mic/train_ihmdata +utils/apply_map.pl -f 1 $tmpdir/ihmutt2utt data/$mic/${train_set}_ihmdata/segments -rm $tmpdir/ihmutt2utt +utils/fix_data_dir.sh data/$mic/${train_set}_ihmdata exit 0; diff --git a/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh new file mode 100755 index 00000000000..46ebf013b82 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh @@ -0,0 +1,128 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +. cmd.sh +. path.sh + +set -e +set -o pipefail +set -u + +stage=-1 +nnet_dir=exp/nnet3_sad_snr/nnet_tdnn_k_n4 + +. utils/parse_options.sh + +export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH + +src_dir=/export/a09/vmanoha1/workspace_asr_diarization/egs/ami/s5b # AMI src_dir +dir=exp/sad_ami_sdm1_dev/ref + +mkdir -p $dir + +# Expecting user to have done run.sh to run the AMI recipe in $src_dir for +# both sdm and ihm microphone conditions + +if [ $stage -le 1 ]; then + ( + cd $src_dir + local/prepare_parallel_train_data.sh --train-set dev sdm1 + + awk '{print $1" "$2}' $src_dir/data/ihm/dev/segments > \ + $src_dir/data/ihm/dev/utt2reco + awk '{print $1" "$2}' $src_dir/data/sdm1/dev/segments > \ + $src_dir/data/sdm1/dev/utt2reco + + cat $src_dir/data/sdm1/dev_ihmdata/ihmutt2utt | \ + utils/apply_map.pl -f 1 $src_dir/data/ihm/dev/utt2reco | \ + utils/apply_map.pl -f 2 $src_dir/data/sdm1/dev/utt2reco | \ + sort -u > $src_dir/data/sdm1/dev_ihmdata/ihm2sdm_reco + ) +fi + +if [ $stage -le 2 ]; then + ( + cd $src_dir + utils/data/get_reco2utt.sh $src_dir/data/sdm1/dev + ) + + phone_map=$dir/phone_map + steps/segmentation/get_sad_map.py \ + $src_dir/data/lang | utils/sym2int.pl -f 1 $src_dir/data/lang/phones.txt > \ + $phone_map +fi + +if [ $stage -le 3 ]; then + # Expecting user to have run local/run_cleanup_segmentation.sh in $src_dir + ( + cd $src_dir + steps/align_fmllr.sh --nj 18 --cmd "$train_cmd" \ + data/sdm1/dev_ihmdata data/lang \ + exp/ihm/tri3_cleaned \ + exp/sdm1/tri3_cleaned_dev_ihmdata + ) +fi + +if [ $stage -le 4 ]; then + steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$train_cmd" \ + $src_dir/exp/sdm1/tri3_cleaned_dev_ihmdata $phone_map $dir +fi + +echo "A 1" > $dir/channel_map +cat $src_dir/data/sdm1/dev/reco2file_and_channel | \ + utils/apply_map.pl -f 3 $dir/channel_map > $dir/reco2file_and_channel + +if [ $stage -le 5 ]; then + $train_cmd $dir/log/get_ref_rttm.log \ + segmentation-combine-segments scp:$dir/sad_seg.scp \ + "ark:segmentation-init-from-segments --shift-to-zero=false $src_dir/data/sdm1/dev_ihmdata/segments ark:- |" \ + ark,t:$src_dir/data/sdm1/dev_ihmdata/reco2utt ark:- \| \ + segmentation-merge-recordings \ + "ark,t:utils/utt2spk_to_spk2utt.pl $src_dir/data/sdm1/dev_ihmdata/ihm2sdm_reco |" \ + ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + ark:- $dir/ref.rttm +fi + +if [ $stage -le 6 ]; then + $train_cmd $dir/log/get_uem.log \ + segmentation-init-from-segments --shift-to-zero=false $src_dir/data/sdm1/dev/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/dev/reco2utt ark:- \| \ + segmentation-post-process --remove-labels=0 --merge-adjacent-segments \ + --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + ark:- - \| grep SPEECH \| grep SPEAKER \| \ + rttmSmooth.pl -s 0 \| awk '{ print $2" "$3" "$4" "$5+$4 }' '>' $dir/uem +fi + +hyp_dir=$nnet_dir/segmentation_ami_sdm1_dev_whole_bp + +if [ $stage -le 7 ]; then + steps/segmentation/do_segmentation_data_dir.sh --reco-nj 18 \ + --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp --do-downsampling true \ + --extra-left-context 100 --extra-right-context 20 \ + --output-name output-speech --frame-subsampling-factor 6 \ + $src_dir/data/sdm1/dev data/ami_sdm1_dev $nnet_dir +fi + + +if [ $stage -le 8 ]; then + utils/data/get_reco2utt.sh $src_dir/data/sdm1/dev_ihmdata + + steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \ + $hyp_dir/ami_sdm1_dev_seg/utt2spk \ + $hyp_dir/ami_sdm1_dev_seg/segments \ + $dir/reco2file_and_channel \ + /dev/stdout | spkr2sad.pl > $hyp_dir/sys.rttm +fi + +if [ $stage -le 9 ]; then + md-eval.pl -s <(cat $hyp_dir/sys.rttm | grep speech | rttmSmooth.pl -s 0) \ + -r <(cat $dir/ref.rttm | grep SPEECH | rttmSmooth.pl -s 0 ) \ + -u $dir/uem -c 0.25 +fi + +#md-eval.pl -s <( segmentation-init-from-segments --shift-to-zero=false exp/nnet3_sad_snr/nnet_tdnn_j_n4/segmentation_ami_sdm1_dev_whole_bp/ami_sdm1_dev_seg/segments ark:- | segmentation-combine-segments-to-recordings ark:- ark,t:exp/nnet3_sad_snr/nnet_tdnn_j_n4/segmentation_ami_sdm1_dev_whole_bp/ami_sdm1_dev_seg/reco2utt ark:- | segmentation-to-ali --length-tolerance=1000 --lengths-rspecifier=ark,t:data/ami_sdm1_dev_whole_bp_hires/utt2num_frames ark:- ark:- | +#segmentation-init-from-ali ark:- ark:- | segmentation-to-rttm ark:- - | grep SPEECH | rttmSmooth.pl -s 0) From 64d1456d831ea5cd33e61fc384a85bff16855d6e Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 30 Nov 2016 17:01:59 -0500 Subject: [PATCH 063/530] asr_diarization: Fisher recipe from data preparation, training nnet and testing on AMI --- .../local/segmentation/prepare_fisher_data.sh | 88 +++++++++++++++++++ .../s5/local/segmentation/run_fisher.sh | 23 +++++ 2 files changed, 111 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/prepare_fisher_data.sh create mode 100644 egs/aspire/s5/local/segmentation/run_fisher.sh diff --git a/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh b/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh new file mode 100644 index 00000000000..1344e185a02 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh @@ -0,0 +1,88 @@ +#! /bin/bash + +# This script prepares Fisher data for training a speech activity detection +# and music detection system + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +. path.sh +. cmd.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + echo "This script is to serve as an example recipe." + echo "Edit the script to change variables if needed." + exit 1 +fi + +dir=exp/unsad/make_unsad_fisher_train_100k # Work dir +subset=150 + +# All the paths below can be modified to any absolute path. + +# The original data directory which will be converted to a whole (recording-level) directory. +train_data_dir=data/fisher_train_100k + +model_dir=exp/tri3a # Model directory used for decoding +sat_model_dir=exp/tri4a # Model directory used for getting alignments +lang=data/lang # Language directory +lang_test=data/lang_test # Language directory used to build graph + +# Hard code the mapping from phones to SAD labels +# 0 for silence, 1 for speech, 2 for noise, 3 for unk +cat < $dir/fisher_sad.map +sil 0 +sil_B 0 +sil_E 0 +sil_I 0 +sil_S 0 +laughter 2 +laughter_B 2 +laughter_E 2 +laughter_I 2 +laughter_S 2 +noise 2 +noise_B 2 +noise_E 2 +noise_I 2 +noise_S 2 +oov 3 +oov_B 3 +oov_E 3 +oov_I 3 +oov_S 3 +EOF + +# Expecting the user to have done run.sh to have $model_dir, +# $sat_model_dir, $lang, $lang_test, $train_data_dir +local/segmentation/prepare_unsad_data.sh \ + --sad-map $dir/fisher_sad.map \ + --config-dir conf \ + --reco-nj 40 --nj 100 --cmd "$train_cmd" \ + --sat-model $sat_model_dir \ + --lang-test $lang_test \ + $train_data_dir $lang $model_dir $dir + +data_dir=${train_data_dir}_whole + +if [ ! -z $subset ]; then + # Work on a subset + utils/subset_data_dir.sh ${data_dir} $subset \ + ${data_dir}_$subset + data_dir=${data_dir}_$subset +fi + +reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp + +# Add noise from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir.sh + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +# Add music from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir_music.sh + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf diff --git a/egs/aspire/s5/local/segmentation/run_fisher.sh b/egs/aspire/s5/local/segmentation/run_fisher.sh new file mode 100644 index 00000000000..e39ef5f3a91 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/run_fisher.sh @@ -0,0 +1,23 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +local/segmentation/prepare_fisher_data.sh + +utils/combine_data.sh --extra-files "speech_feat.scp deriv_weights.scp deriv_weights_manual_seg.scp music_labels.scp" \ + data/fisher_train_100k_whole_all_corrupted_sp_hires_bp \ + data/fisher_train_100k_whole_corrupted_sp_hires_bp \ + data/fisher_train_100k_whole_music_corrupted_sp_hires_bp + +local/segmentation/train_stats_sad_music.sh \ + --train-data-dir data/fisher_train_100k_whole_all_corrupted_sp_hires_bp \ + --speech-feat-scp data/fisher_train_100k_whole_corrupted_sp_hires_bp/speech_feat.scp \ + --deriv-weights-scp data/fisher_train_100k_whole_corrupted_sp_hires_bp/deriv_weights.scp \ + --music-labels-scp data/fisher_train-100k_whole_music_corrupted_sp_hires_bp/music_labels.scp \ + --max-param-change 0.2 \ + --num-epochs 2 --affix k \ + --splice-indexes "-3,-2,-1,0,1,2,3 -6,0,mean+count(-99:3:9:99) -9,0,3 0" + +local/segmentation/run_segmentation_ami.sh \ + --nnet-dir exp/nnet3_sad_snr/nnet_tdnn_k_n4 From 99ce6c816ca70670f9b555e9d0a79667c24f8b91 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 01:18:19 -0500 Subject: [PATCH 064/530] asr_diarization: created compute-snr-targets --- src/featbin/Makefile | 2 +- src/featbin/compute-snr-targets.cc | 273 +++++++++++++++++++++++++++++ src/matrix/kaldi-matrix.cc | 81 +++++++++ src/matrix/kaldi-matrix.h | 4 + 4 files changed, 359 insertions(+), 1 deletion(-) create mode 100644 src/featbin/compute-snr-targets.cc diff --git a/src/featbin/Makefile b/src/featbin/Makefile index e1a9a1ebe0d..aaa4abca24c 100644 --- a/src/featbin/Makefile +++ b/src/featbin/Makefile @@ -16,7 +16,7 @@ BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats \ compute-and-process-kaldi-pitch-feats modify-cmvn-stats wav-copy \ wav-reverberate append-vector-to-feats detect-sinusoids shift-feats \ concat-feats append-post-to-feats post-to-feats vector-to-feat \ - extract-column + extract-column compute-snr-targets OBJFILES = diff --git a/src/featbin/compute-snr-targets.cc b/src/featbin/compute-snr-targets.cc new file mode 100644 index 00000000000..cdb7ef66c2a --- /dev/null +++ b/src/featbin/compute-snr-targets.cc @@ -0,0 +1,273 @@ +// featbin/compute-snr-targets.cc + +// Copyright 2015-2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-matrix.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Compute snr targets using clean and noisy speech features.\n" + "The targets can be of 3 types -- \n" + "Irm (Ideal Ratio Mask) = Clean fbank / (Clean fbank + Noise fbank)\n" + "FbankMask = Clean fbank / Noisy fbank\n" + "Snr (Signal To Noise Ratio) = Clean fbank / Noise fbank\n" + "Both input and output features are assumed to be in log domain.\n" + "ali-rspecifier and silence-phones are used to identify whether " + "a particular frame is \"clean\" or not. Silence frames in " + "\"clean\" fbank are treated as \"noise\" and hence the SNR for those " + "frames are -inf in log scale.\n" + "Usage: compute-snr-targets [options] \n" + " or compute-snr-targets [options] --binary-targets \n" + "e.g.: compute-snr-targets scp:clean.scp scp:noisy.scp ark:targets.ark\n"; + + std::string target_type = "Irm"; + std::string ali_rspecifier; + std::string silence_phones_str; + std::string floor_str = "-inf", ceiling_str = "inf"; + int32 length_tolerance = 0; + bool binary_targets = false; + int32 target_dim = -1; + + ParseOptions po(usage); + po.Register("target_type", &target_type, "Target type can be FbankMask or IRM"); + po.Register("ali-rspecifier", &ali_rspecifier, "If provided, all the " + "energy in the silence region of clean file is considered noise"); + po.Register("silence-phones", &silence_phones_str, "Comma-separated list of " + "silence phones"); + po.Register("floor", &floor_str, "If specified, the target is floored at " + "this value. You may want to do this if you are using targets " + "in original log form as is usual in the case of Snr, but may " + "not if you are applying Exp() as is usual in the case of Irm"); + po.Register("ceiling", &ceiling_str, "If specified, the target is ceiled " + "at this value. You may want to do this if you expect " + "infinities or very large values, particularly for Snr targets."); + po.Register("length-tolerance", &length_tolerance, "Tolerate differences " + "in utterance lengths of these many frames"); + po.Register("binary-targets", &binary_targets, "If specified, then the " + "targets are created considering each frame to be either " + "completely signal or completely noise as decided by the " + "ali-rspecifier option. When ali-rspecifier is not specified, " + "then the entire utterance is considered to be just signal." + "If this option is specified, then only a single argument " + "-- the clean features -- is must be specified."); + po.Register("target-dim", &target_dim, "Overrides the target dimension. " + "Applicable only with --binary-targets is specified"); + + po.Read(argc, argv); + + if (po.NumArgs() != 3 && po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::vector silence_phones; + if (!silence_phones_str.empty()) { + if (!SplitStringToIntegers(silence_phones_str, ":", false, &silence_phones)) { + KALDI_ERR << "Invalid silence-phones string " << silence_phones_str; + } + std::sort(silence_phones.begin(), silence_phones.end()); + } + + double floor = kLogZeroDouble, ceiling = -kLogZeroDouble; + + if (floor_str != "-inf") + if (!ConvertStringToReal(floor_str, &floor)) { + KALDI_ERR << "Invalid --floor value " << floor_str; + } + + if (ceiling_str != "inf") + if (!ConvertStringToReal(ceiling_str, &ceiling)) { + KALDI_ERR << "Invalid --ceiling value " << ceiling_str; + } + + int32 num_done = 0, num_err = 0, num_success = 0; + int64 num_sil_frames = 0; + int64 num_speech_frames = 0; + + if (!binary_targets) { + // This is the 'normal' case, where we have both clean and + // noise/corrupted input features. + // The word 'noisy' in the variable names is used to mean 'corrupted'. + std::string clean_rspecifier = po.GetArg(1), + noisy_rspecifier = po.GetArg(2), + targets_wspecifier = po.GetArg(3); + + SequentialBaseFloatMatrixReader noisy_reader(noisy_rspecifier); + RandomAccessBaseFloatMatrixReader clean_reader(clean_rspecifier); + BaseFloatMatrixWriter kaldi_writer(targets_wspecifier); + + RandomAccessInt32VectorReader alignment_reader(ali_rspecifier); + + for (; !noisy_reader.Done(); noisy_reader.Next(), num_done++) { + const std::string &key = noisy_reader.Key(); + Matrix total_energy(noisy_reader.Value()); + // Although this is called 'energy', it is actually log filterbank + // features of noise or corrupted files + // Actually noise feats in the case of Irm and Snr + + // TODO: Support multiple corrupted version for a particular clean file + std::string uniq_key = key; + if (!clean_reader.HasKey(uniq_key)) { + KALDI_WARN << "Could not find uniq key " << uniq_key << " " + << "in clean feats " << clean_rspecifier; + num_err++; + continue; + } + + Matrix clean_energy(clean_reader.Value(uniq_key)); + + if (target_type == "Irm") { + total_energy.LogAddExpMat(1.0, clean_energy, kNoTrans); + } + + if (!ali_rspecifier.empty()) { + if (!alignment_reader.HasKey(uniq_key)) { + KALDI_WARN << "Could not find uniq key " << uniq_key + << "in alignment " << ali_rspecifier; + num_err++; + continue; + } + const std::vector &ali = alignment_reader.Value(key); + + if (std::abs(static_cast (ali.size()) - clean_energy.NumRows()) > length_tolerance) { + KALDI_WARN << "Mismatch in number of frames in alignment " + << "and feats; " << static_cast(ali.size()) + << " vs " << clean_energy.NumRows(); + num_err++; + continue; + } + + int32 length = std::min(static_cast(ali.size()), clean_energy.NumRows()); + if (ali.size() < length) + // TODO: Support this case + KALDI_ERR << "This code currently does not support the case " + << "where alignment smaller than features because " + << "it is not expected to happen"; + + KALDI_ASSERT(clean_energy.NumRows() == length); + KALDI_ASSERT(total_energy.NumRows() == length); + + if (clean_energy.NumRows() < length) clean_energy.Resize(length, clean_energy.NumCols(), kCopyData); + if (total_energy.NumRows() < length) total_energy.Resize(length, total_energy.NumCols(), kCopyData); + + for (int32 i = 0; i < clean_energy.NumRows(); i++) { + if (std::binary_search(silence_phones.begin(), silence_phones.end(), ali[i])) { + clean_energy.Row(i).Set(kLogZeroDouble); + num_sil_frames++; + } else num_speech_frames++; + } + } + + clean_energy.AddMat(-1.0, total_energy); + if (ceiling_str != "inf") { + clean_energy.ApplyCeiling(ceiling); + } + + if (floor_str != "-inf") { + clean_energy.ApplyFloor(floor); + } + + kaldi_writer.Write(key, Matrix(clean_energy)); + num_success++; + } + } else { + // Copying tables of features. + std::string feats_rspecifier = po.GetArg(1), + targets_wspecifier = po.GetArg(2); + + SequentialBaseFloatMatrixReader feats_reader(feats_rspecifier); + BaseFloatMatrixWriter kaldi_writer(targets_wspecifier); + + RandomAccessInt32VectorReader alignment_reader(ali_rspecifier); + + int64 num_sil_frames = 0; + int64 num_speech_frames = 0; + + for (; !feats_reader.Done(); feats_reader.Next(), num_done++) { + const std::string &key = feats_reader.Key(); + const Matrix &feats = feats_reader.Value(); + + Matrix targets; + + if (target_dim < 0) + targets.Resize(feats.NumRows(), feats.NumCols()); + else + targets.Resize(feats.NumRows(), target_dim); + + if (target_type == "Snr") + targets.Set(-kLogZeroDouble); + + if (!ali_rspecifier.empty()) { + if (!alignment_reader.HasKey(key)) { + KALDI_WARN << "Could not find uniq key " << key + << " in alignment " << ali_rspecifier; + num_err++; + continue; + } + + const std::vector &ali = alignment_reader.Value(key); + + if (std::abs(static_cast (ali.size()) - feats.NumRows()) > length_tolerance) { + KALDI_WARN << "Mismatch in number of frames in alignment " + << "and feats; " << static_cast(ali.size()) + << " vs " << feats.NumRows(); + num_err++; + continue; + } + + int32 length = std::min(static_cast(ali.size()), feats.NumRows()); + KALDI_ASSERT(ali.size() >= length); + + for (int32 i = 0; i < feats.NumRows(); i++) { + if (std::binary_search(silence_phones.begin(), silence_phones.end(), ali[i])) { + targets.Row(i).Set(kLogZeroDouble); + num_sil_frames++; + } else { + num_speech_frames++; + } + } + + if (ceiling_str != "inf") { + targets.ApplyCeiling(ceiling); + } + + if (floor_str != "-inf") { + targets.ApplyFloor(floor); + } + + kaldi_writer.Write(key, targets); + } + } + } + + KALDI_LOG << "Computed SNR targets for " << num_success + << " out of " << num_done << " utterances; failed for " + << num_err; + KALDI_LOG << "Got [ " << num_speech_frames << "," + << num_sil_frames << "] frames of silence and speech"; + return (num_success > 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/matrix/kaldi-matrix.cc b/src/matrix/kaldi-matrix.cc index 4c3948ba2f5..0b5191e1e7a 100644 --- a/src/matrix/kaldi-matrix.cc +++ b/src/matrix/kaldi-matrix.cc @@ -396,6 +396,87 @@ void MatrixBase::AddMat(const Real alpha, const MatrixBase& A, } } +template +void MatrixBase::LogAddExpMat(const Real alpha, const MatrixBase& A, + MatrixTransposeType transA) { + if (alpha == 0) return; + + if (&A == this) { + if (transA == kNoTrans) { + Add(alpha + 1.0); + } else { + KALDI_ASSERT(num_rows_ == num_cols_ && "AddMat: adding to self (transposed): not symmetric."); + Real *data = data_; + if (alpha == 1.0) { // common case-- handle separately. + for (MatrixIndexT row = 0; row < num_rows_; row++) { + for (MatrixIndexT col = 0; col < row; col++) { + Real *lower = data + (row * stride_) + col, + *upper = data + (col * stride_) + row; + Real sum = LogAdd(*lower, *upper); + *lower = *upper = sum; + } + *(data + (row * stride_) + row) += Log(2.0); // diagonal. + } + } else { + for (MatrixIndexT row = 0; row < num_rows_; row++) { + for (MatrixIndexT col = 0; col < row; col++) { + Real *lower = data + (row * stride_) + col, + *upper = data + (col * stride_) + row; + Real lower_tmp = *lower; + if (alpha > 0) { + *lower = LogAdd(*lower, Log(alpha) + *upper); + *upper = LogAdd(*upper, Log(alpha) + lower_tmp); + } else { + KALDI_ASSERT(alpha < 0); + *lower = LogSub(*lower, Log(-alpha) + *upper); + *upper = LogSub(*upper, Log(-alpha) + lower_tmp); + } + } + if (alpha > -1.0) + *(data + (row * stride_) + row) += Log(1.0 + alpha); // diagonal. + else + KALDI_ERR << "Cannot subtract log-matrices if the difference is " + << "negative"; + } + } + } + } else { + int aStride = (int) A.stride_; + Real *adata = A.data_, *data = data_; + if (transA == kNoTrans) { + KALDI_ASSERT(A.num_rows_ == num_rows_ && A.num_cols_ == num_cols_); + if (num_rows_ == 0) return; + for (MatrixIndexT row = 0; row < num_rows_; row++) { + for (MatrixIndexT col = 0; col < num_cols_; col++) { + Real *value = data + (row * stride_) + col, + *aValue = adata + (row * aStride) + col; + if (alpha > 0) + *value = LogAdd(*value, Log(alpha) + *aValue); + else { + KALDI_ASSERT(alpha < 0); + *value = LogSub(*value, Log(-alpha) + *aValue); + } + } + } + } else { + KALDI_ASSERT(A.num_cols_ == num_rows_ && A.num_rows_ == num_cols_); + if (num_rows_ == 0) return; + for (MatrixIndexT row = 0; row < num_rows_; row++) { + for (MatrixIndexT col = 0; col < num_cols_; col++) { + Real *value = data + (row * stride_) + col, + *aValue = adata + (col * aStride) + row; + if (alpha > 0) + *value = LogAdd(*value, Log(alpha) + *aValue); + else { + KALDI_ASSERT(alpha < 0); + *value = LogSub(*value, Log(-alpha) + *aValue); + } + } + } + } + } +} + template template void MatrixBase::AddSp(const Real alpha, const SpMatrix &S) { diff --git a/src/matrix/kaldi-matrix.h b/src/matrix/kaldi-matrix.h index dccd52a9af4..b5a6bc7521d 100644 --- a/src/matrix/kaldi-matrix.h +++ b/src/matrix/kaldi-matrix.h @@ -548,6 +548,10 @@ class MatrixBase { /// *this += alpha * M [or M^T] void AddMat(const Real alpha, const MatrixBase &M, MatrixTransposeType transA = kNoTrans); + + /// *this += alpha * M [or M^T] when the matrices are stored as log + void LogAddExpMat(const Real alpha, const MatrixBase &M, + MatrixTransposeType transA = kNoTrans); /// *this = beta * *this + alpha * M M^T, for symmetric matrices. It only /// updates the lower triangle of *this. It will leave the matrix asymmetric; From ef36cf5f41d3800ff3768ceb34816c3c53159151 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 21:56:24 -0500 Subject: [PATCH 065/530] asr_diarization: make_snr_targets.sh --- .../s5/steps/segmentation/make_snr_targets.sh | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100755 egs/wsj/s5/steps/segmentation/make_snr_targets.sh diff --git a/egs/wsj/s5/steps/segmentation/make_snr_targets.sh b/egs/wsj/s5/steps/segmentation/make_snr_targets.sh new file mode 100755 index 00000000000..71f603a690e --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/make_snr_targets.sh @@ -0,0 +1,104 @@ +#!/bin/bash + +# Copyright 2015-16 Vimal Manohar +# Apache 2.0 +set -e +set -o pipefail + +nj=4 +cmd=run.pl +stage=0 + +data_id= + +compress=true +target_type=Irm +apply_exp=false + +ali_rspecifier= +silence_phones_str=0 + +ignore_noise_dir=false + +ceiling=inf +floor=-inf + +length_tolerance=2 +transform_matrix= + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +if [ $# != 5 ]; then + echo "Usage: $0 [options] --target-type (Irm|Snr) "; + echo " or : $0 [options] --target-type FbankMask "; + echo "e.g.: $0 data/train_clean_fbank data/train_noise_fbank data/train_corrupted_hires exp/make_snr_targets/train snr_targets" + echo "options: " + echo " --nj # number of parallel jobs" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +clean_data=$1 +noise_or_noisy_data=$2 +data=$3 +tmpdir=$4 +targets_dir=$5 + +mkdir -p $targets_dir + +[ -z "$data_id" ] && data_id=`basename $data` + +utils/split_data.sh $clean_data $nj + +for n in `seq $nj`; do + utils/subset_data_dir.sh --utt-list $clean_data/split$nj/$n/utt2spk $noise_or_noisy_data $noise_or_noisy_data/subset${nj}/$n +done + +$ignore_noise_dir && utils/split_data.sh $data $nj + +targets_dir=`perl -e '($data,$pwd)= @ARGV; if($data!~m:^/:) { $data = "$pwd/$data"; } print $data; ' $targets_dir ${PWD}` + +for n in `seq $nj`; do + utils/create_data_link.pl $targets_dir/${data_id}.$n.ark +done + +apply_exp_opts= +if $apply_exp; then + apply_exp_opts=" copy-matrix --apply-exp=true ark:- ark:- |" +fi + +copy_feats_opts="copy-feats" +if [ ! -z "$transform_matrix" ]; then + copy_feats_opts="transform-feats $transform_matrix" +fi + +if [ $stage -le 1 ]; then + if ! $ignore_noise_dir; then + $cmd JOB=1:$nj $tmpdir/make_`basename $targets_dir`_${data_id}.JOB.log \ + compute-snr-targets --length-tolerance=$length_tolerance --target-type=$target_type \ + ${ali_rspecifier:+--ali-rspecifier="$ali_rspecifier" --silence-phones=$silence_phones_str} \ + --floor=$floor --ceiling=$ceiling \ + "ark:$copy_feats_opts scp:$clean_data/split$nj/JOB/feats.scp ark:- |" \ + "ark,s,cs:$copy_feats_opts scp:$noise_or_noisy_data/subset$nj/JOB/feats.scp ark:- |" \ + ark:- \|$apply_exp_opts \ + copy-feats --compress=$compress ark:- \ + ark,scp:$targets_dir/${data_id}.JOB.ark,$targets_dir/${data_id}.JOB.scp || exit 1 + else + feat_dim=$(feat-to-dim scp:$data/feats.scp -) || exit 1 + $cmd JOB=1:$nj $tmpdir/make_`basename $targets_dir`_${data_id}.JOB.log \ + compute-snr-targets --length-tolerance=$length_tolerance --target-type=$target_type \ + ${ali_rspecifier:+--ali-rspecifier="$ali_rspecifier" --silence-phones=$silence_phones_str} \ + --floor=$floor --ceiling=$ceiling --binary-targets --target-dim=$feat_dim \ + scp:$data/split$nj/JOB/feats.scp \ + ark:- \|$apply_exp_opts \ + copy-feats --compress=$compress ark:- \ + ark,scp:$targets_dir/${data_id}.JOB.ark,$targets_dir/${data_id}.JOB.scp || exit 1 + fi +fi + +for n in `seq $nj`; do + cat $targets_dir/${data_id}.$n.scp +done > $data/`basename $targets_dir`.scp From c3da17a90f0a8c22e0abee6e49f672d986d6bf80 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 03:09:24 -0500 Subject: [PATCH 066/530] asr_diarization: Added script to get DCT matrix --- egs/wsj/s5/utils/data/get_dct_matrix.py | 108 ++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100755 egs/wsj/s5/utils/data/get_dct_matrix.py diff --git a/egs/wsj/s5/utils/data/get_dct_matrix.py b/egs/wsj/s5/utils/data/get_dct_matrix.py new file mode 100755 index 00000000000..88b28b5dd5c --- /dev/null +++ b/egs/wsj/s5/utils/data/get_dct_matrix.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python + +# we're using python 3.x style print but want it to work in python 2.x, +from __future__ import print_function +import os, argparse, sys, math, warnings + +import numpy as np + +def ComputeLifterCoeffs(Q, dim): + coeffs = np.zeros((dim)) + for i in range(0, dim): + coeffs[i] = 1.0 + 0.5 * Q * math.sin(math.pi * i / Q); + + return coeffs + +def ComputeIDctMatrix(K, N, cepstral_lifter=0): + matrix = np.zeros((K, N)) + # normalizer for X_0 + normalizer = math.sqrt(1.0 / N); + for j in range(0, N): + matrix[0, j] = normalizer; + # normalizer for other elements + normalizer = math.sqrt(2.0 / N); + for k in range(1, K): + for n in range(0, N): + matrix[k, n] = normalizer * math.cos(math.pi/N * (n + 0.5) * k); + + if cepstral_lifter != 0: + lifter_coeffs = ComputeLifterCoeffs(cepstral_lifter, K) + for k in range(0, K): + matrix[k, :] = matrix[k, :] / lifter_coeffs[k]; + + return matrix.T + +def ComputeDctMatrix(K, N, cepstral_lifter=0): + matrix = np.zeros((K, N)) + # normalizer for X_0 + normalizer = math.sqrt(1.0 / N); + for j in range(0, N): + matrix[0, j] = normalizer; + # normalizer for other elements + normalizer = math.sqrt(2.0 / N); + for k in range(1, K): + for n in range(0, N): + matrix[k, n] = normalizer * math.cos(math.pi/N * (n + 0.5) * k); + + if cepstral_lifter != 0: + lifter_coeffs = ComputeLifterCoeffs(cepstral_lifter, K) + for k in range(0, K): + matrix[k, :] = matrix[k, :] * lifter_coeffs[k]; + + return matrix + +def GetArgs(): + parser = argparse.ArgumentParser(description="Write DCT/IDCT matrix") + parser.add_argument("--cepstral-lifter", type=float, + help="Here we need the scaling factor on cepstra in the production of MFCC" + "to cancel out the effect of lifter, e.g. 22.0", default=22.0) + parser.add_argument("--num-ceps", type=int, + default=13, + help="Number of cepstral dimensions") + parser.add_argument("--num-filters", type=int, + default=23, + help="Number of mel filters") + parser.add_argument("--get-idct-matrix", type=str, default="false", + choices=["true","false"], + help="Get IDCT matrix instead of DCT matrix") + parser.add_argument("--add-zero-column", type=str, default="true", + choices=["true","false"], + help="Add a column to convert the matrix from a linear transform to affine transform") + parser.add_argument("out_file", type=str, + help="Output file") + + args = parser.parse_args() + + return args + +def CheckArgs(args): + if args.num_ceps > args.num_filters: + raise Exception("num-ceps must not be larger than num-filters") + + args.out_file_handle = open(args.out_file, 'w') + + return args + +def Main(): + args = GetArgs() + args = CheckArgs(args) + + if args.get_idct_matrix == "false": + matrix = ComputeDctMatrix(args.num_ceps, args.num_filters, + args.cepstral_lifter) + if args.add_zero_column == "true": + matrix = np.append(matrix, np.zeros((args.num_ceps,1)), 1) + else: + matrix = ComputeIDctMatrix(args.num_ceps, args.num_filters, + args.cepstral_lifter) + + if args.add_zero_column == "true": + matrix = np.append(matrix, np.zeros((args.num_filters,1)), 1) + + print('[ ', file=args.out_file_handle) + np.savetxt(args.out_file_handle, matrix, fmt='%.6e') + print(' ]', file=args.out_file_handle) + +if __name__ == "__main__": + Main() + From 83cbdd6a695c2523396a25f7294692936a34da9b Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Tue, 29 Nov 2016 22:58:53 -0500 Subject: [PATCH 067/530] asr_diarization_clean: Adding run_train_sad.sh --- .../s5/local/segmentation/run_train_sad.sh | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100755 egs/aspire/s5/local/segmentation/run_train_sad.sh diff --git a/egs/aspire/s5/local/segmentation/run_train_sad.sh b/egs/aspire/s5/local/segmentation/run_train_sad.sh new file mode 100755 index 00000000000..9b1f104939a --- /dev/null +++ b/egs/aspire/s5/local/segmentation/run_train_sad.sh @@ -0,0 +1,150 @@ +#!/bin/bash + +# this is the standard "tdnn" system, built in nnet3; it's what we use to +# call multi-splice. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= + +splice_indexes="-3,-2,-1,0,1,2,3 -6,0 -9,0,3 0" +relu_dim=256 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=1 +extra_egs_copy_cmd= + +num_utts_subset_valid=40 +num_utts_subset_train=40 +add_idct=true + +# target options +train_data_dir=data/train_azteec_whole_sp_corrupted_hires + +snr_scp= +speech_feat_scp= + +deriv_weights_scp= +deriv_weights_for_irm_scp= + +egs_dir= +nj=40 +feat_type=raw +config_dir= +compute_objf_opts= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_hidden_layers=`echo $splice_indexes | perl -ane 'print scalar @F'` || exit 1 +if [ -z "$dir" ]; then + dir=exp/nnet3_sad_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix}_n${num_hidden_layers} + +if ! cuda-compiled; then + cat < Date: Fri, 9 Dec 2016 13:58:45 -0800 Subject: [PATCH 068/530] Adding another optimization to convert row-wise to whole-matrix ops where possible. --- src/cudamatrix/cu-math.cc | 12 +- src/nnet3/nnet-compile-looped.cc | 4 +- src/nnet3/nnet-compile.cc | 4 - src/nnet3/nnet-compute-test.cc | 4 +- src/nnet3/nnet-derivative-test.cc | 8 +- src/nnet3/nnet-optimize-test.cc | 4 +- src/nnet3/nnet-optimize-utils.cc | 521 ++++++++++++++++++----------- src/nnet3/nnet-optimize-utils.h | 32 +- src/nnet3/nnet-optimize.cc | 74 +++- src/nnet3/nnet-optimize.h | 45 ++- src/nnet3/nnet-simple-component.cc | 8 +- 11 files changed, 471 insertions(+), 245 deletions(-) diff --git a/src/cudamatrix/cu-math.cc b/src/cudamatrix/cu-math.cc index 047e808ae03..bb55302313a 100644 --- a/src/cudamatrix/cu-math.cc +++ b/src/cudamatrix/cu-math.cc @@ -481,15 +481,15 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, // Sigmoid(i_t_input), Sigmoid(f_t_input), // Tanh(c_part), Sigmoid(o_t_input), Tanh(c_t) Real i_t_self_repair = ( - deriv_sum_in(0, c) / count < sr_config(0) ? sr_config(5) : 0.0); + deriv_sum_in_mat(0, c) / count < sr_config(0) ? sr_config(5) : 0.0); Real f_t_self_repair = ( - deriv_sum_in(1, c) / count < sr_config(1) ? sr_config(6) : 0.0); + deriv_sum_in_mat(1, c) / count < sr_config(1) ? sr_config(6) : 0.0); Real c_part_self_repair = ( - deriv_sum_in(2, c) / count < sr_config(2) ? sr_config(7) : 0.0); + deriv_sum_in_mat(2, c) / count < sr_config(2) ? sr_config(7) : 0.0); Real o_t_self_repair = ( - deriv_sum_in(3, c) / count < sr_config(3) ? sr_config(8) : 0.0); + deriv_sum_in_mat(3, c) / count < sr_config(3) ? sr_config(8) : 0.0); Real c_t_self_repair = ( - deriv_sum_in(4, c) / count < sr_config(4) ? sr_config(9) : 0.0); + deriv_sum_in_mat(4, c) / count < sr_config(4) ? sr_config(9) : 0.0); // Note on how we add self-repair for sigmoids/tanh's. If self-repair // is activated for this unit, then... // For sigmoids we'd add -self_repair_scale * (2 * sigmoid(x) - 1.0) @@ -605,7 +605,7 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, // deriv_sum_out and deriv_sum_in might point to the same memory. for (int32 i = 0; i < 5; i++) (*self_repair_sum_out_mat)(i, c) = - (deriv_sum_in(i, c) / count < sr_config(i) ? num_rows : 0); + (deriv_sum_in_mat(i, c) / count < sr_config(i) ? num_rows : 0); (*deriv_sum_out_mat)(0, c) += i_t_deriv_sum; (*deriv_sum_out_mat)(1, c) += f_t_deriv_sum; diff --git a/src/nnet3/nnet-compile-looped.cc b/src/nnet3/nnet-compile-looped.cc index d77f19ef13c..62f29762580 100644 --- a/src/nnet3/nnet-compile-looped.cc +++ b/src/nnet3/nnet-compile-looped.cc @@ -18,6 +18,7 @@ // limitations under the License. #include "nnet3/nnet-compile-looped.h" +#include "nnet3/nnet-optimize-utils.h" #include "nnet3/nnet-utils.h" namespace kaldi { @@ -295,7 +296,8 @@ static bool CompileLoopedInternal( compiler.CreateComputation(compiler_opts, computation); optimize_opts.optimize_looped_computation = true; - Optimize(optimize_opts, nnet, computation); + Optimize(optimize_opts, nnet, + MaxOutputTimeInRequest(request3), computation); return computation->commands.size() != 0 && computation->commands.back().command_type == kGotoLabel; diff --git a/src/nnet3/nnet-compile.cc b/src/nnet3/nnet-compile.cc index 6ccf2119d64..6290fc2f956 100644 --- a/src/nnet3/nnet-compile.cc +++ b/src/nnet3/nnet-compile.cc @@ -543,10 +543,6 @@ void Compiler::DoForwardComputationFromIndexes( } // if we got to here, it's not just a case of matrix-copy or matrix-add, // but it's still from a single source matrix. - // TODO: detect the case where the indexes are contiguous, but possibly - // with -1's at the beginning or end (e.g. [ -1 2 3 4 5 6 7 8 ]) and make - // it a standard matrix-copy command with new sub-matrices added as needed, - // possibly with a subset of the rows in the original sub-matrices. int32 indexes_index = computation->indexes.size(); computation->indexes.push_back(indexes); CommandType ctype = diff --git a/src/nnet3/nnet-compute-test.cc b/src/nnet3/nnet-compute-test.cc index 81cc67f71ae..c6a271abfbe 100644 --- a/src/nnet3/nnet-compute-test.cc +++ b/src/nnet3/nnet-compute-test.cc @@ -172,7 +172,9 @@ void UnitTestNnetCompute() { if (RandInt(0, 1) == 0) { NnetOptimizeOptions opt_config; - Optimize(opt_config, nnet, &computation); + Optimize(opt_config, nnet, + MaxOutputTimeInRequest(request), + &computation); { std::ostringstream os; computation.Print(os, nnet); diff --git a/src/nnet3/nnet-derivative-test.cc b/src/nnet3/nnet-derivative-test.cc index 511a6dc6bf9..5dbc8a126d1 100644 --- a/src/nnet3/nnet-derivative-test.cc +++ b/src/nnet3/nnet-derivative-test.cc @@ -139,7 +139,9 @@ void UnitTestNnetModelDerivatives() { if (limit_deriv_times) SetDerivTimesOptions(request, &opt_config); - Optimize(opt_config, nnet, &computation); + Optimize(opt_config, nnet, + MaxOutputTimeInRequest(request), + &computation); std::ostringstream os; computation.Print(os, nnet); KALDI_LOG << "Optimized computation is: " << os.str(); @@ -303,7 +305,9 @@ void UnitTestNnetInputDerivatives() { if (RandInt(0, 3) != 0 && allow_optimization) { NnetOptimizeOptions opt_config; // opt_config.initialize_undefined = false; // temp - Optimize(opt_config, nnet, &computation); + Optimize(opt_config, nnet, + MaxOutputTimeInRequest(request), + &computation); std::ostringstream os; computation.Print(os, nnet); KALDI_LOG << "Optimized computation is: " << os.str(); diff --git a/src/nnet3/nnet-optimize-test.cc b/src/nnet3/nnet-optimize-test.cc index 7b64d67b72c..40f8d824a39 100644 --- a/src/nnet3/nnet-optimize-test.cc +++ b/src/nnet3/nnet-optimize-test.cc @@ -71,7 +71,9 @@ static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config) { NnetComputation computation_opt(computation); { - Optimize(opt_config, nnet, &computation_opt); + Optimize(opt_config, nnet, + MaxOutputTimeInRequest(request), + &computation_opt); std::ostringstream os; computation_opt.Print(os, nnet); KALDI_LOG << "Optimized computation is: " << os.str(); diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index d5de20f136c..6744eb91e37 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -691,8 +691,7 @@ bool VariableMergingOptimizer::MergeVariables() { command_index++) { // This loop looks for pairs of sub-matrix indexes s1,s2 that we could // potentially merge into a single variable. - const NnetComputation::Command &c = - computation_->commands[command_index]; + const NnetComputation::Command &c = computation_->commands[command_index]; int32 s1 = -1, s2 = -1; if (c.command_type == kMatrixCopy && config_.remove_assignments) { @@ -1844,22 +1843,6 @@ void DerivativeTimeLimiter::PruneMatrices() { } -int32 MaxOutputTimeInRequest(const ComputationRequest &request) { - int32 ans = std::numeric_limits::min(); - for (size_t i = 0; i < request.outputs.size(); i++) { - std::vector indexes &indexes = request.outputs[i].indexes; - std::vector indexes::const_iterator iter = indexes.begin(), - end = indexes.end(); - for (; iter != end; ++iter) - if (iter.t > ans) - ans = iter.t; - } - if (ans == std::numeric_limits::min()) { - KALDI_ERR << "Failed to find any output indexes in computation request."; - } - return ans; -} - void LimitDerivativeTimes(const Nnet &nnet, int32 min_deriv_time, int32 max_deriv_time, @@ -1869,31 +1852,354 @@ void LimitDerivativeTimes(const Nnet &nnet, limiter.LimitDerivTimes(); } -// This class implements the internals of the ExpandComputation() function. + +/* + This helper function, used in ReplaceRowWithMatrixOps, detects + when the vector 'indexes' has a 'special structure'. The special structure + is: + zero or more -1's, then + a consecutive nonempty sequence of nonnegative numbers, e.g. 6 7 8 9 10, then + zero or more -1's. + + Note: this function assumes that any negative elements of 'indexes' are -1. + If there are elements less than -1, then it is an error, but this function + does not thoroughly check for that. 'indexes' is required to be a nonempty + vector. + + If 'indexes' has the special structure then this function returns true + and sets the following values, which will explain with the following + example in mind: 'indexes = [ -1, -1, 5 6 7 8, -1 ]'. + - '*first_nonnegative_pos' is set to the number of initial -1's (and also + the location of the first nonnegative element): 2 in this case. + - '*first_nonnegative_value' is set to the value of the first nonnegative + element (5 in this case) + - '*num_nonnegative_values' is set to the number of nonnegative values in + the sequence (4 in this case). + If 'indexes' does not have this special structure, then this function returns + false, and the values of '*first_nonnegative_pos', + '*first_nonnegative_value' and '*num_nonnegative_indexes' on exit are + undefined. +*/ +static bool IndexesHaveSpecialStructure(const std::vector &indexes, + int32 *first_nonnegative_pos, + int32 *first_nonnegative_value, + int32 *num_nonnegative_indexes) { + KALDI_ASSERT(!indexes.empty()); + const int32 *indexes_ptr = &(indexes[0]); + size_t pos = 0, size = indexes.size(); + + // Find the first nonnegative element of 'indexes'. + for (; pos < size; ++pos) + if (indexes_ptr[pos] >= 0) + break; + if (pos == size) + return false; // all -1's... should not happen, but not our problem. + *first_nonnegative_pos = static_cast(pos); + int32 n = indexes_ptr[pos]; + *first_nonnegative_value = n; + // Find the first element after '*first_nonnegative_index' that isn't + // consecutive. + for (; pos < size; ++pos,++n) + if (indexes_ptr[pos] != n) + break; + + *num_nonnegative_indexes = n - *first_nonnegative_value; + + // Check that the remaining values are all <0 (assumed equal to -1, but + // checking <0 may be faster as just one instruction). + for (; pos < size; ++pos) + if (indexes_ptr[pos] >= 0) + return false; // does not have the special structure. + + return true; +} + + + +bool ReplaceRowWithMatrixOps(NnetComputation *computation) { + bool ans = false; + int32 num_commands = computation->commands.size(), + num_indexes = computation->indexes.size(); + for (int32 command_index = 0; command_index < num_commands; + command_index++) { + // non-const because we'll be changing it. + NnetComputation::Command &c = computation->commands[command_index]; + + int32 first_nonnegative_pos, + first_nonnegative_value, + num_nonnegative_indexes; + switch (c.command_type) { + case kCopyRows: case kAddRows: { + int32 indexes_index = c.arg3; + KALDI_ASSERT(indexes_index < num_indexes); + const std::vector &indexes = computation->indexes[indexes_index]; + if (IndexesHaveSpecialStructure(indexes, + &first_nonnegative_pos, + &first_nonnegative_value, + &num_nonnegative_indexes)) { + ans = true; + c.arg1 = computation->NewSubMatrix(c.arg1, first_nonnegative_pos, + num_nonnegative_indexes, + 0, -1); + c.arg2 = computation->NewSubMatrix(c.arg2, first_nonnegative_value, + num_nonnegative_indexes, + 0, -1); + c.command_type = (c.command_type == kCopyRows ? kMatrixCopy : + kMatrixAdd); + } + break; + } + default: + continue; + } + } + return ans; +} + +// This class implements the internals of the ExpandComputation() function (used +// in shortcut compilation); see comment by the declaration of +// ExpandComputation() in nnet-optimize-utils.h for overview. class ComputationExpander { public: - ComputationExpander(const Computation &computation, + ComputationExpander(const NnetComputation &computation, bool need_debug_info, int32 num_n_values, - Computation *expanded_computation): + NnetComputation *expanded_computation): computation_(computation), need_debug_info_(need_debug_info), num_n_values_(num_n_values), - expanded_computation_(expanded_computation) { } + expanded_computation_(expanded_computation) { + KALDI_ASSERT(num_n_values > 2); + } // This function call implements the functionality of the class, // expanding the computation. bool Expand(); private: + // This function sets up and computes the 'n_fast' vector (see comment + // by it for what this is. + void InitFastInfo(); + + // This function sets up the 'matrices' vector in 'expanded_computation_'. + // It's quite simple: it just multiplies all the num-rows by num_n_values_ and + // divides by 2, and leaves the num-cols the same. + void ComputeMatrices(); + + // This function, only called if need_debug_info_ is true, sets up + // the 'matrix_debug_info' vector in 'expanded_computation_'. + void ComputeDebugInfo(); + + // This function sets up the 'submatrices' vector in 'expanded_computation_'. + // Column ranges always stay the same, but for row ranges it's a little + // more complicated. + void ComputeSubmatrixInfo(); + + + // This function computes all the PrecomputedIndexes in the + // 'component_precomputed_indexes' member of 'expanded_computation_'. + // They are all generated from scratch, by using the Component::PrecomputedIndexes() + // member function. The 'input_indexes' and 'output_indexes' arguments are worked + // out from the 'debug_info' [if we're not generating debug_info we specially generate + // it for the specific matrices in question], and the 'need_backprop' + // argument is worked out by seeing whether there is a call to Backprop() with + // the same precomputed-indexes element. + void ComputePrecomputedIndexes(); - const Computation &computation_; + // Computes the 'commands' member of the output. This function also adds as + // needed to 'indexes', 'indexes_multi' and 'indexes_ranges' in the output. + // Later on we can call RenumberComputation() to remove any duplicates that + // might result from this. + void ComputeCommands(); + + + // This 'n_fast' vector is indexed by the matrix-index in the computation, + // i.e. the same index as indexes computation_.matrix_info and + // expanded_computation_->matrix_info. For each matrix-index m > 0 it + // contains true if the 'n' varies 'fast', or false if the 'n' index varies + // 'slowly'. By 'fast' and 'slow', we mean in the same sense as is desribed + // in the comment for ComputationIsDecomposable() in nnet-optimize-utils.h. + std::vector n_fast; + + + + + + + const NnetComputation &computation_; bool need_debug_info_; int32 num_n_values_; - Computation *expanded_computation_; + NnetComputation *expanded_computation_; }; + +class ComputationLoopedOptimizer { + public: + ComputationLoopedOptimizer(const Nnet &nnet, + NnetComputation *computation): + nnet_(nnet), computation_(computation) { } + bool Optimize(); + + private: + + // Figures out the time shift between the successive computation requests. + static int32 FindTimeShift(const NnetComputation &computation, + const std::vector &segment_ends); + + // This function creates a mapping from a matrix-index > 0, + // to a pair (unique_id, time_offset) that represents the debug-info + // for that matrix-id in computation.debug_info. + // The output vector is indexed by the matrix-index in the computation (the + // zeroth member is not valid). It requires that the + // The 'time_offset' is equal to the 't' value of the zeroth element of the + // cindexes vetor. The 'unique_id' is an integer that uniquely identifies + // what we get from subtracting the 'time_offset' from each 't' value of + // that 'cindexes' vector, and then pairing it up with the 'is_deriv' + // value of the DebugInfo. That is, if two 'cindexes' vectors differ only + // by a time offset, and the 'is_deriv' values are the same they will map to the same + // unique_id. + // The output 'matrix_to_pair' is indexed by matrix index (the zeroth element is + // not set). + static void CreateMatrixPairs(const NnetComputation &computation, + std::vector > *matrix_to_pair); + + + // This very simple helper function reverses the map 'matrix_to_pair' so we can + // do the reverse lookup. It outputs a map from pair to matrix index m, where + // 1 <= m < matrix_to_pair.size(). + static void GetPairToMatrixMap( + std::vector > &matrix_to_pair, + unordered_map, int32, PairHasher > *pair_to_matrix); + + + // Given a vector of lists, one list for each segment, of the active matrices + // at the end of that segment, this function converts those lists into a + // different representation where each matrix is reprented as a pair instead + // of as a single int32. 'active_pairs' will have the same dimensions as + // 'active_matrices'. + static void ConvertListsToPairLists( + const std::vector > &active_matrices, + const std::vector > &matrix_to_pair, + std::vector > > *active_pairs); + + // This function modifies the lists of active matrices per segment + // (represented as pairs) in 'active_pairs' by sorting them and + // then subtracting the time-offset of the first pair in each + // list ((*active_pair)[seg][0].second), from all elements in that list. + // It puts the subtracted offset in (*time_offsets)[seg]. This change + // of representation makes it easy to tell whether the sets of active + // matrices for different segments are identical up to a time-offset. + static void NormalizePairLists( + std::vector > > *active_pairs, + std::vector *time_offsets); + + // This function looks in the matrix 'active_pairs' for the first pair of + // identical values, i.e. it is looking for i < j for which + // normalized_active_pairs[i] == normalized_active_pairs[j]. (However, the + // pair i,j must satisfy an extra condition, see below). If a pair + // i,j exists satisfying these conditions, this function outputs them to *seg1 + // and *seg2, and returns true; otherwise it returns false. + // + // Extra condition: + // It turns out that under some circumstances, we can + // fine repeats that were not "really" repeats (the matrices were not time + // shifted) The situation was a bit obscure (it was a non-recurrent setup with + // a lot of extra-right-context, where some inputs were never used), but to + // prevent it happening again we are now checking in addition to the above, + // that the time-shift between the segments (i.e. time_offsets[j] - + // time_offsets[i]), has the "expected value" based on the assumption that + // each segment should be shifted relative to the previous segment, by + // 'time_shift_per_segment'. + static bool FindFirstRepeat( + const std::vector > > &normalized_active_pairs, + const std::vector &time_offsets, + int32 time_shift_per_segment, + int32 *seg1, int32 *seg2); + + // Converts a list of pairs (e.g. one of the elements of the output of + // 'ConvertListsToPairLists)', back into a list of matrix indexes, using the + // map 'pair_to_matrix'. + static void PairListToMatrixList( + const std::vector > &pair_list, + const unordered_map, int32, PairHasher > &pair_to_matrix, + std::vector *matrix_list); + + + // This function just does some checking (via asserts), that + // the lists of matrices 'list1' and 'list2' are of the same length, + // that time_difference > 0, that each matrix with index m = list2[i] is of the + // same dimension as the list1[i], with Cindexes that are the same except for + // the time index being greater by 'time_difference' + static void CheckIdentifiedMatrices( + const NnetComputation &computation, + const std::vector &list1, + const std::vector &list2, + int32 time_difference); + + + // Given two command indexes command1 < command2 pointing to commands of type + // kNoOperationMarker, this function modifies the computation by + // removing all commands after command2, replacing command2 with a kGotoLabel + // command pointing to command1 and then inserting just before command1 + // a marker of type kNoOperationLabel. + static void FormInfiniteLoop(int32 command1, int32 command2, + NnetComputation *computation); + + // This is to be called after FormInfiniteLoop. It inserts, just before + // the final kGotoLabel command, commands that initialize + // each of the matrices in list 'matrices1' from the corresponding + // matrix in 'matrices2', using the kAllocMatrixFromOther command. + // This effectively does, for example, matrices1[i] = matrices2[i], + // while initializing matrices1[i] and deallocating matrices2[i]; + // it's implemented as a shallow swap. + // It does this in such an order that even if the two lists are + // not disjoint, the right thing happens. + static void AddMatrixSwapCommands( + const std::vector &matrices1, + const std::vector &matrices2, + NnetComputation *computation); + + + // Called from AddMatrixSwapCommands, this function figures out for us + // an acceptable order in which to execute the kAllocMatrixFromOther + // commands. This is easy to do if matrices1 and matrices2 are disjoint + // sets, but has to be done more carefully if they overlap. + // The output is a list of pairs where each pair (a, b) comes from + // from matrices1 and matrices2 in the same position, i.e. + // a = matrices1[i] and b = matrices2[i]. + static void GetMatrixSwapOrder( + const std::vector &matrices1, + const std::vector &matrices2, + std::vector > *swaps); + + + + /// Given a list of command indexes ('segment_end_commands') which are + /// expected to be command indexes of the kNoOperationMarker at segment + /// boundaries, this function outputs for each of these command indexes a list + /// of matrices which are 'active' at that point in time. By 'active' we mean + /// that the matrix has been written to before that time (note, we don't count + /// initialization with zeros as being written to); and will be read after + /// that time. These is the list of matrices that 'need to be in scope' + /// at those points in time. '*active_matrices' is indexed by the + /// same index as 'segment_end_commands', and is then a list of active + /// matrices, in numerical order of matrix index. + /// Note: for each i, (*active_matrices)[i] will be sorted and unique. + static void FindActiveMatrices(const NnetComputation &computation, + const Analyzer &analyzer, + const std::vector &segment_end_commands, + std::vector > *active_matrices); + + + const Nnet &nnet_; + NnetComputation *computation_; + Analyzer analyzer_; + std::vector > matrix_to_pair_; + + std::vector segment_end_commands_; +}; + // static int32 ComputationLoopedOptimizer::FindTimeShift( const NnetComputation &computation, @@ -2372,173 +2678,6 @@ void OptimizeLoopedComputation(const Nnet &nnet, } -class ComputationLoopedOptimizer { - public: - ComputationLoopedOptimizer(const Nnet &nnet, - NnetComputation *computation): - nnet_(nnet), computation_(computation) { } - bool Optimize(); - - private: - - // Figures out the time shift between the successive computation requests. - static int32 FindTimeShift(const NnetComputation &computation, - const std::vector &segment_ends); - - // This function creates a mapping from a matrix-index > 0, - // to a pair (unique_id, time_offset) that represents the debug-info - // for that matrix-id in computation.debug_info. - // The output vector is indexed by the matrix-index in the computation (the - // zeroth member is not valid). It requires that the - // The 'time_offset' is equal to the 't' value of the zeroth element of the - // cindexes vetor. The 'unique_id' is an integer that uniquely identifies - // what we get from subtracting the 'time_offset' from each 't' value of - // that 'cindexes' vector, and then pairing it up with the 'is_deriv' - // value of the DebugInfo. That is, if two 'cindexes' vectors differ only - // by a time offset, and the 'is_deriv' values are the same they will map to the same - // unique_id. - // The output 'matrix_to_pair' is indexed by matrix index (the zeroth element is - // not set). - static void CreateMatrixPairs(const NnetComputation &computation, - std::vector > *matrix_to_pair); - - - // This very simple helper function reverses the map 'matrix_to_pair' so we can - // do the reverse lookup. It outputs a map from pair to matrix index m, where - // 1 <= m < matrix_to_pair.size(). - static void GetPairToMatrixMap( - std::vector > &matrix_to_pair, - unordered_map, int32, PairHasher > *pair_to_matrix); - - - // Given a vector of lists, one list for each segment, of the active matrices - // at the end of that segment, this function converts those lists into a - // different representation where each matrix is reprented as a pair instead - // of as a single int32. 'active_pairs' will have the same dimensions as - // 'active_matrices'. - static void ConvertListsToPairLists( - const std::vector > &active_matrices, - const std::vector > &matrix_to_pair, - std::vector > > *active_pairs); - - // This function modifies the lists of active matrices per segment - // (represented as pairs) in 'active_pairs' by sorting them and - // then subtracting the time-offset of the first pair in each - // list ((*active_pair)[seg][0].second), from all elements in that list. - // It puts the subtracted offset in (*time_offsets)[seg]. This change - // of representation makes it easy to tell whether the sets of active - // matrices for different segments are identical up to a time-offset. - static void NormalizePairLists( - std::vector > > *active_pairs, - std::vector *time_offsets); - - // This function looks in the matrix 'active_pairs' for the first pair of - // identical values, i.e. it is looking for i < j for which - // normalized_active_pairs[i] == normalized_active_pairs[j]. (However, the - // pair i,j must satisfy an extra condition, see below). If a pair - // i,j exists satisfying these conditions, this function outputs them to *seg1 - // and *seg2, and returns true; otherwise it returns false. - // - // Extra condition: - // It turns out that under some circumstances, we can - // fine repeats that were not "really" repeats (the matrices were not time - // shifted) The situation was a bit obscure (it was a non-recurrent setup with - // a lot of extra-right-context, where some inputs were never used), but to - // prevent it happening again we are now checking in addition to the above, - // that the time-shift between the segments (i.e. time_offsets[j] - - // time_offsets[i]), has the "expected value" based on the assumption that - // each segment should be shifted relative to the previous segment, by - // 'time_shift_per_segment'. - static bool FindFirstRepeat( - const std::vector > > &normalized_active_pairs, - const std::vector &time_offsets, - int32 time_shift_per_segment, - int32 *seg1, int32 *seg2); - - // Converts a list of pairs (e.g. one of the elements of the output of - // 'ConvertListsToPairLists)', back into a list of matrix indexes, using the - // map 'pair_to_matrix'. - static void PairListToMatrixList( - const std::vector > &pair_list, - const unordered_map, int32, PairHasher > &pair_to_matrix, - std::vector *matrix_list); - - - // This function just does some checking (via asserts), that - // the lists of matrices 'list1' and 'list2' are of the same length, - // that time_difference > 0, that each matrix with index m = list2[i] is of the - // same dimension as the list1[i], with Cindexes that are the same except for - // the time index being greater by 'time_difference' - static void CheckIdentifiedMatrices( - const NnetComputation &computation, - const std::vector &list1, - const std::vector &list2, - int32 time_difference); - - - // Given two command indexes command1 < command2 pointing to commands of type - // kNoOperationMarker, this function modifies the computation by - // removing all commands after command2, replacing command2 with a kGotoLabel - // command pointing to command1 and then inserting just before command1 - // a marker of type kNoOperationLabel. - static void FormInfiniteLoop(int32 command1, int32 command2, - NnetComputation *computation); - - // This is to be called after FormInfiniteLoop. It inserts, just before - // the final kGotoLabel command, commands that initialize - // each of the matrices in list 'matrices1' from the corresponding - // matrix in 'matrices2', using the kAllocMatrixFromOther command. - // This effectively does, for example, matrices1[i] = matrices2[i], - // while initializing matrices1[i] and deallocating matrices2[i]; - // it's implemented as a shallow swap. - // It does this in such an order that even if the two lists are - // not disjoint, the right thing happens. - static void AddMatrixSwapCommands( - const std::vector &matrices1, - const std::vector &matrices2, - NnetComputation *computation); - - - // Called from AddMatrixSwapCommands, this function figures out for us - // an acceptable order in which to execute the kAllocMatrixFromOther - // commands. This is easy to do if matrices1 and matrices2 are disjoint - // sets, but has to be done more carefully if they overlap. - // The output is a list of pairs where each pair (a, b) comes from - // from matrices1 and matrices2 in the same position, i.e. - // a = matrices1[i] and b = matrices2[i]. - static void GetMatrixSwapOrder( - const std::vector &matrices1, - const std::vector &matrices2, - std::vector > *swaps); - - - - /// Given a list of command indexes ('segment_end_commands') which are - /// expected to be command indexes of the kNoOperationMarker at segment - /// boundaries, this function outputs for each of these command indexes a list - /// of matrices which are 'active' at that point in time. By 'active' we mean - /// that the matrix has been written to before that time (note, we don't count - /// initialization with zeros as being written to); and will be read after - /// that time. These is the list of matrices that 'need to be in scope' - /// at those points in time. '*active_matrices' is indexed by the - /// same index as 'segment_end_commands', and is then a list of active - /// matrices, in numerical order of matrix index. - /// Note: for each i, (*active_matrices)[i] will be sorted and unique. - static void FindActiveMatrices(const NnetComputation &computation, - const Analyzer &analyzer, - const std::vector &segment_end_commands, - std::vector > *active_matrices); - - - const Nnet &nnet_; - NnetComputation *computation_; - Analyzer analyzer_; - std::vector > matrix_to_pair_; - - std::vector segment_end_commands_; -}; - - void FixGotoLabel(NnetComputation *computation) { int32 num_commands = computation->commands.size(); diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index 8299bef0478..f3f27a12c8e 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -337,10 +337,6 @@ class DerivativeTimeLimiter { }; -// This utility function, used in code that calls LimitDerivativeTimes(), returns -// the largest time 't' in any of the 'outputs' in the computation request, -// or crashes if there are no outputs (or no cindexes in those outputs). -int32 MaxOutputTimeInRequest(const ComputationRequest &request); // This is the top-level interface to limit the times on which derivatives are // computed (e.g. for truncated BPTT); internally it uses class @@ -367,16 +363,16 @@ void LimitDerivativeTimes(const Nnet &nnet, 'regular' structure, is as follows: - The 't' and 'x' values present are the same for each 'n', - The order in which the indexes appear is EITHER of the following: - - The 'n' varies the most rapidly, i.e. the order is: + - The 'n' index varies 'fast', i.e. the order is: (t1,x1,0), (t1,x1,1) ... (t1,x1,N-1) \ (t2,x2,0), (t2,x2,1) ... (t2,x2,N-1) ... - - The 'n' varies the least rapidly, i.e. the order is: + - The 'n' index varies 'slowly', i.e. the order is: (t1,x1,0), (t2,x2,0) ... \ (t1,x1,1), (t2,x2,1) ... \ ... \ (t1,x2,N-1), (t2,x2,N-1) ... In either case, there does not have to be any particular rhyme or - reason to the order of the t and x values, the regularity on 'n' is + reason to the order of the t and x values; the regularity on 'n' is all that we care about. */ bool ComputationIsDecomposable(const ComputationRequest &request, @@ -404,19 +400,27 @@ bool ComputationIsDecomposable(const ComputationRequest &request, object could not be suitably expanded. If it returns false, the output 'expanded_computation' is undefined (may contain junk). */ -bool ExpandComputation(const Computation &computation, +bool ExpandComputation(const NnetComputation &computation, bool need_debug_info, int32 num_n_values, - Computation *expanded_computation); + NnetComputation *expanded_computation); +/// This function detects cases where commands of type kCopyRows, kAddRows or +/// kAddToRows can be converted to commands of type kMatrixCopy or kMatrixAdd, +/// and converts them (this may involve adding submatrices). After doing this +/// you should at some point do RenumberComputation(), which will remove any +/// now-unused members of computation->indexes. +/// This function returns true if it made any changes to the computation. +bool ReplaceRowWithMatrixOps(NnetComputation *computation); -/// This function detects submatrices, matrices, and members of indexes_multi -/// and indexes that are never used (e.g. due to changes made in other -/// optimization code), and removes them from the computation by way of suitable -/// renumbering. It does not remove no-ops from computation->commands_; to do -/// that, call RemoveNoOps(computation). +/// This function detects submatrices and matrices that are never used (e.g. due +/// to changes made in other optimization code), and members of indexes, +/// indexes_multi and indexes_ranges that are unused or are duplicates, and +/// removes them from the computation by way of suitable renumbering. It does +/// not remove no-ops from computation->commands_; to do that, call +/// RemoveNoOps(computation). void RenumberComputation(NnetComputation *computation); /// Removes commands of type kNoOperation in the computation. diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 6e4242ace09..a1a62e3944c 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -34,7 +34,13 @@ void NnetOptimizeOptions::Read(std::istream &is, bool binary) { ReadBasicType(is, binary, &propagate_in_place); ExpectToken(is, binary, ""); ReadBasicType(is, binary, &backprop_in_place); - ExpectToken(is, binary, ""); + std::string tok; + ReadToken(is, binary, &tok); + if (tok == "") { + ReadBasicType(is, binary, &replace_row_with_matrix_ops); + ReadToken(is, binary, &tok); + } + KALDI_ASSERT(tok == ""); ReadBasicType(is, binary, &convert_addition); ExpectToken(is, binary, ""); ReadBasicType(is, binary, &remove_assignments); @@ -52,7 +58,7 @@ void NnetOptimizeOptions::Read(std::istream &is, bool binary) { ReadBasicType(is, binary, &min_deriv_time); ExpectToken(is, binary, ""); ReadBasicType(is, binary, &max_deriv_time); - std::string tok; + ReadToken(is, binary, &tok); if (tok == "") { ReadBasicType(is, binary, &max_deriv_time_relative); @@ -73,6 +79,8 @@ void NnetOptimizeOptions::Write(std::ostream &os, bool binary) const { WriteBasicType(os, binary, propagate_in_place); WriteToken(os, binary, ""); WriteBasicType(os, binary, backprop_in_place); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, replace_row_with_matrix_ops); WriteToken(os, binary, ""); WriteBasicType(os, binary, convert_addition); WriteToken(os, binary, ""); @@ -403,8 +411,27 @@ void ConvertAdditionToAssignment(const Nnet &nnet, } } + +int32 MaxOutputTimeInRequest(const ComputationRequest &request) { + int32 ans = std::numeric_limits::min(); + for (size_t i = 0; i < request.outputs.size(); i++) { + const std::vector &indexes (request.outputs[i].indexes); + std::vector::const_iterator iter = indexes.begin(), + end = indexes.end(); + for (; iter != end; ++iter) + if (iter->t > ans) + ans = iter->t; + } + if (ans == std::numeric_limits::min()) { + KALDI_ERR << "Failed to find any output indexes in computation request."; + } + return ans; +} + + void Optimize(const NnetOptimizeOptions &config, const Nnet &nnet, + int32 max_output_time_in_request, NnetComputation *computation) { if (GetVerboseLevel() >= 4) CheckComputation(nnet, *computation, true); @@ -415,7 +442,7 @@ void Optimize(const NnetOptimizeOptions &config, int32 max_deriv_time = config.max_deriv_time; if (config.max_deriv_time_relative != std::numeric_limits::max()) max_deriv_time = config.max_deriv_time_relative + - MaxOutputTimeInRequest(request); + max_output_time_in_request; LimitDerivativeTimes(nnet, config.min_deriv_time, max_deriv_time, computation); } @@ -443,6 +470,21 @@ void Optimize(const NnetOptimizeOptions &config, CheckComputation(nnet, *computation, false); } + if (config.optimize && config.replace_row_with_matrix_ops) { + if (ReplaceRowWithMatrixOps(computation)) { + // if anything was changed... + + // We have to call RenumberComputation() to get rid of any removed + // indexes... actually this could be a little wasteful, but unfortunately + // it doesn't seem like we'd otherwise be doing any renumbering past this + // point. + RenumberComputation(computation); + if (GetVerboseLevel() >= 4) + CheckComputation(nnet, *computation, false); + } + } + + if (config.optimize && config.initialize_undefined) { RemoveUnnecessaryZeroing(nnet, computation); if (GetVerboseLevel() >= 4) @@ -510,32 +552,32 @@ size_t ComputationRequestHasher::IoSpecificationToInt(const IoSpecification& spe // it makes the hasher faster. StringHasher string_hasher; ans = string_hasher(spec.name); - std::vector::const_iterator itr = spec.indexes.begin(), + std::vector::const_iterator iter = spec.indexes.begin(), end = spec.indexes.end(), med = end; - if (med > itr + n) + if (med > iter + n) med = iter + n; - for (; itr != med; ++itr) { - ans += (*itr).n * 1619; - ans += (*itr).t * 15649; - ans += (*itr).x * 89809; + for (; iter != med; ++iter) { + ans += iter->n * 1619; + ans += iter->t * 15649; + ans += iter->x * 89809; } // after the first 'n' values, look only at every n'th value. this makes the // hashing much faster, and in the kinds of structures that we actually deal // with, we shouldn't get unnecessary hash collisions as a result of this // optimization. - for (; iter < end; itr += n) { - ans += (*itr).n * 1619; - ans += (*itr).t * 15649; - ans += (*itr).x * 89809; + for (; iter < end; iter += n) { + ans += iter->n * 1619; + ans += iter->t * 15649; + ans += iter->x * 89809; } return ans; } void CachingOptimizingCompiler::UpdateCache(const ComputationRequest *request, NnetComputation *computation) { - if (computation_cache_.size() == cache_capacity_) { + if (computation_cache_.size() == config_.cache_capacity) { // full, locate the least-recently-accessed request const CacheType::iterator it = computation_cache_.find(access_queue_.front()); @@ -635,7 +677,9 @@ const NnetComputation* CachingOptimizingCompiler::Compile( ComputationChecker checker(check_config, nnet_, *computation); checker.Check(); } - Optimize(opt_config_, nnet_, computation); + Optimize(opt_config_, nnet_, + MaxOutputTimeInRequest(*request), + computation); if (GetVerboseLevel() >= verbose_cutoff) { std::ostringstream os; computation->Print(os, nnet_); diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index bade4a47a78..86c6427396a 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -37,6 +37,7 @@ struct NnetOptimizeOptions { bool consolidate_model_update; bool propagate_in_place; bool backprop_in_place; + bool replace_row_with_matrix_ops; bool convert_addition; bool remove_assignments; bool allow_left_merge; @@ -57,6 +58,7 @@ struct NnetOptimizeOptions { consolidate_model_update(true), propagate_in_place(true), backprop_in_place(true), + replace_row_with_matrix_ops(true), convert_addition(true), remove_assignments(true), allow_left_merge(true), @@ -119,9 +121,39 @@ struct NnetOptimizeOptions { bool operator == (const NnetOptimizeOptions &other) const; }; -/// This is the top-level function for optimizing a computation. + +/* This utility function, used in code that calls LimitDerivativeTimes() (and + required in code that calls Optimize(), returns the largest time + 't' in any of the 'outputs' in the computation request, or crashes if there + are no outputs (or no cindexes in those outputs). */ +int32 MaxOutputTimeInRequest(const ComputationRequest &request); + + +/** This is the top-level function for optimizing a computation. Note: it + should really be called OptimizeAndPostprocess(), because there is at least + one thing it does (reordering I/O commands) that is necessary for a + computation to be run. + + @param [in] config The options that control, among other things, + which optimizations to apply. + @param [in] nnet The neural net for which the computation is being built + @param [in] max_output_time_in_request This value is only needed when the + max-deriv-time-relative config value is set in + 'config'. It should be set to the largest 't' value + encountered in any of the indexes in the 'output' + IoSpecifications in the ComputationRequests used to + compile the computation. However if there are multiple + ComputationRequests (i.e. it was an online computation) + you can just set it to any value you want, because + backpropagation is not supported so the + max-deriv-time-relative configuration value would not + have any effect. + @param [in,out] computation The computation to be optimized; this function + modifies it in-place. + */ void Optimize(const NnetOptimizeOptions &config, const Nnet &nnet, + int32 max_output_time_in_request, NnetComputation *computation); // Hash function for ComputationRequest. It converts @@ -176,13 +208,15 @@ struct CachingOptimizingCompilerOptions { class CachingOptimizingCompiler { public: CachingOptimizingCompiler(const Nnet &nnet, - const CachingOptimizingCompilerOptions &config): - nnet_(nnet), config_(config), cache_capacity_(capacity) { } + const CachingOptimizingCompilerOptions config = + CachingOptimizingCompilerOptions()): + nnet_(nnet), config_(config) { } /// Note: nnet is retained as a const reference but opt_config is copied. CachingOptimizingCompiler(const Nnet &nnet, const NnetOptimizeOptions &opt_config, - const CachingOptimizingCompilerOptions &config): + const CachingOptimizingCompilerOptions config = + CachingOptimizingCompilerOptions()): nnet_(nnet), config_(config), opt_config_(opt_config) { } ~CachingOptimizingCompiler(); @@ -223,9 +257,6 @@ class CachingOptimizingCompiler { NnetComputation *computation); // This function updates the recently accessed queue. void UpdateAccessQueue(CacheType::iterator &cit); - // This configuration value determines how many unique Computations - // to cache in our most-recently-used cache. - int32 cache_capacity_; }; diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index 58908a0fe09..84a262b1695 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -5312,11 +5312,13 @@ void LstmNonlinearityComponent::InitFromConfig(ConfigLine *cfl) { if (cfl->HasUnusedValues()) KALDI_ERR << "Could not process these elements in initializer: " << cfl->UnusedValues(); - if (!ok) + if (ok) { + Init(cell_dim, param_stddev, tanh_self_repair_threshold, + sigmoid_self_repair_threshold, self_repair_scale); + } else { KALDI_ERR << "Invalid initializer for layer of type " << Type() << ": \"" << cfl->WholeLine() << "\""; - Init(cell_dim, param_stddev, tanh_self_repair_threshold, - sigmoid_self_repair_threshold, self_repair_scale); + } } From 1d1610cd3241fca97fcccfb3f4dc9a660259b71a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Tue, 6 Sep 2016 14:18:40 -0400 Subject: [PATCH 069/530] asr_diarization: Modified online ivector extraction to accept frame weights option --- .../get_ivector_weights_from_ctm_conf.pl | 77 +++++++++++++++++++ .../online/nnet2/extract_ivectors_online.sh | 35 +++++++-- src/online2bin/ivector-extract-online2.cc | 35 ++++++++- 3 files changed, 140 insertions(+), 7 deletions(-) create mode 100755 egs/ami/s5b/local/get_ivector_weights_from_ctm_conf.pl diff --git a/egs/ami/s5b/local/get_ivector_weights_from_ctm_conf.pl b/egs/ami/s5b/local/get_ivector_weights_from_ctm_conf.pl new file mode 100755 index 00000000000..96db9af3638 --- /dev/null +++ b/egs/ami/s5b/local/get_ivector_weights_from_ctm_conf.pl @@ -0,0 +1,77 @@ +#! /usr/bin/perl +use strict; +use warnings; +use Getopt::Long; + +my $pad_frames = 0; +my $silence_weight = 0.00001; +my $scale_weights_by_ctm_conf = "false"; +my $frame_shift = 0.01; + +GetOptions('pad-frames:i' => \$pad_frames, + 'silence-weight:f' => \$frame_shift, + 'scale-weights-by-ctm-conf:s' => \$scale_weights_by_ctm_conf, + 'frame-shift:f' => \$frame_shift); + +if (scalar @ARGV != 1) { + die "Usage: get_ivector_weights_from_ctm_conf.pl < > "; +} + +my $utt2dur = shift @ARGV; + +$pad_frames >= 0 || die "Bad pad-frames value $pad_frames; must be >= 0"; +($scale_weights_by_ctm_conf eq 'false') || ($scale_weights_by_ctm_conf eq 'true') || die "Bad scale-weights-by-ctm-conf $scale_weights_by_ctm_conf; must be true/false"; + +open(L, "<$utt2dur") || die "unable to open utt2dur file $utt2dur"; + +my @all_utts = (); +my %utt2weights; + +while () { + chomp; + my @A = split; + @A == 2 || die "Incorrent format of utt2dur file $_"; + my ($utt, $len) = @A; + + push @all_utts, $utt; + $len = int($len / $frame_shift); + + # Initialize weights for each utterance + my $weights = []; + for (my $n = 0; $n < $len; $n++) { + push @$weights, $silence_weight; + } + $utt2weights{$utt} = $weights; +} +close(L); + +while () { + chomp; + my @A = split; + @A == 6 || die "bad ctm line $_"; + + my $utt = $A[0]; + my $beg = $A[2]; + my $len = $A[3]; + my $beg_int = int($beg / $frame_shift) - $pad_frames; + my $len_int = int($len / $frame_shift) + 2*$pad_frames; + my $conf = $A[5]; + + my $array_ref = $utt2weights{$utt}; + defined $array_ref || die "No length info for utterance $utt"; + + for (my $t = $beg_int; $t < $beg_int + $len_int; $t++) { + if ($t >= 0 && $t < @$array_ref) { + if ($scale_weights_by_ctm_conf eq "false") { + ${$array_ref}[$t] = 1; + } else { + ${$array_ref}[$t] = $conf; + } + } + } +} + +foreach my $utt (keys %utt2weights) { + my $array_ref = $utt2weights{$utt}; + print ($utt, " [ ", join(" ", @$array_ref), " ]\n"); +} diff --git a/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh b/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh index b52de1f516b..f1edd874fa6 100755 --- a/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh +++ b/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh @@ -42,6 +42,9 @@ max_count=0 # The use of this option (e.g. --max-count 100) can make # posterior-scaling, so assuming the posterior-scale is 0.1, # --max-count 100 starts having effect after 1000 frames, or # 10 seconds of data. +weights= +use_most_recent_ivector=true +max_remembered_frames=1000 # End configuration section. @@ -89,6 +92,8 @@ splice_opts=$(cat $srcdir/splice_opts) # involved in online decoding. We need to create a config file for iVector # extraction. +absdir=$(readlink -f $dir) + ieconf=$dir/conf/ivector_extractor.conf echo -n >$ieconf cp $srcdir/online_cmvn.conf $dir/conf/ || exit 1; @@ -103,12 +108,19 @@ echo "--ivector-extractor=$srcdir/final.ie" >>$ieconf echo "--num-gselect=$num_gselect" >>$ieconf echo "--min-post=$min_post" >>$ieconf echo "--posterior-scale=$posterior_scale" >>$ieconf -echo "--max-remembered-frames=1000" >>$ieconf # the default +echo "--max-remembered-frames=$max_remembered_frames" >>$ieconf # the default echo "--max-count=$max_count" >>$ieconf +echo "--use-most-recent-ivector=$use_most_recent_ivector" >>$use_most_recent_ivector +if [ ! -z "$weights" ]; then + if [ -f $weights ] && gunzip -c $weights > /dev/null; then + cp -f $weights $absdir/weights.gz || exit 1 + else + echo "Could not open file $weights" + exit 1 + fi +fi -absdir=$(readlink -f $dir) - for n in $(seq $nj); do # This will do nothing unless the directory $dir/storage exists; # it can be used to distribute the data among multiple machines. @@ -117,10 +129,21 @@ done if [ $stage -le 0 ]; then echo "$0: extracting iVectors" - $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \ - ivector-extract-online2 --config=$ieconf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/feats.scp ark:- \| \ - copy-feats --compress=$compress ark:- \ + if [ ! -z "$weights" ]; then + $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \ + ivector-extract-online2 --config=$ieconf \ + --frame-weights-rspecifier="ark:gunzip -c $absdir/weights.gz |" \ + --length-tolerance=1 \ + ark:$sdata/JOB/spk2utt scp:$sdata/JOB/feats.scp ark:- \| \ + copy-feats --compress=$compress ark:- \ + ark,scp:$absdir/ivector_online.JOB.ark,$absdir/ivector_online.JOB.scp || exit 1; + else + $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \ + ivector-extract-online2 --config=$ieconf \ + ark:$sdata/JOB/spk2utt scp:$sdata/JOB/feats.scp ark:- \| \ + copy-feats --compress=$compress ark:- \ ark,scp:$absdir/ivector_online.JOB.ark,$absdir/ivector_online.JOB.scp || exit 1; + fi fi if [ $stage -le 1 ]; then diff --git a/src/online2bin/ivector-extract-online2.cc b/src/online2bin/ivector-extract-online2.cc index 3251d93b5dd..f597f66763b 100644 --- a/src/online2bin/ivector-extract-online2.cc +++ b/src/online2bin/ivector-extract-online2.cc @@ -55,6 +55,8 @@ int main(int argc, char *argv[]) { g_num_threads = 8; bool repeat = false; + int32 length_tolerance = 0; + std::string frame_weights_rspecifier; po.Register("num-threads", &g_num_threads, "Number of threads to use for computing derived variables " @@ -62,6 +64,12 @@ int main(int argc, char *argv[]) { po.Register("repeat", &repeat, "If true, output the same number of iVectors as input frames " "(including repeated data)."); + po.Register("frame-weights-rspecifier", &frame_weights_rspecifier, + "Archive of frame weights to scale stats"); + po.Register("length-tolerance", &length_tolerance, + "Tolerance on the difference in number of frames " + "for feats and weights"); + po.Read(argc, argv); if (po.NumArgs() != 3) { @@ -82,9 +90,9 @@ int main(int argc, char *argv[]) { SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); + RandomAccessBaseFloatVectorReader frame_weights_reader(frame_weights_rspecifier); BaseFloatMatrixWriter ivector_writer(ivectors_wspecifier); - for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { std::string spk = spk2utt_reader.Key(); const std::vector &uttlist = spk2utt_reader.Value(); @@ -105,6 +113,31 @@ int main(int argc, char *argv[]) { &matrix_feature); ivector_feature.SetAdaptationState(adaptation_state); + + if (!frame_weights_rspecifier.empty()) { + if (!frame_weights_reader.HasKey(utt)) { + KALDI_WARN << "Did not find weights for utterance " << utt; + num_err++; + continue; + } + const Vector &weights = frame_weights_reader.Value(utt); + + if (std::abs(weights.Dim() - feats.NumRows()) > length_tolerance) { + num_err++; + continue; + } + + std::vector > frame_weights; + for (int32 i = 0; i < feats.NumRows(); i++) { + if (i < weights.Dim()) + frame_weights.push_back(std::make_pair(i, weights(i))); + else + frame_weights.push_back(std::make_pair(i, 0.0)); + } + + + ivector_feature.UpdateFrameWeights(frame_weights); + } int32 T = feats.NumRows(), n = (repeat ? 1 : ivector_config.ivector_period), From 88970c81cd4f581660d828a51a335ed5b7161674 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 2 Sep 2016 14:52:24 -0400 Subject: [PATCH 070/530] asr_diarization: Added script to resolve CTM overlaps --- egs/wsj/s5/steps/resolve_ctm_overlaps.py | 149 +++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100755 egs/wsj/s5/steps/resolve_ctm_overlaps.py diff --git a/egs/wsj/s5/steps/resolve_ctm_overlaps.py b/egs/wsj/s5/steps/resolve_ctm_overlaps.py new file mode 100755 index 00000000000..aaee767e7e4 --- /dev/null +++ b/egs/wsj/s5/steps/resolve_ctm_overlaps.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# Copyright 2014 Johns Hopkins University (Authors: Daniel Povey, Vijayaditya Peddinti). +# 2016 Vimal Manohar +# Apache 2.0. + +# Script to combine ctms with overlapping segments + +import sys, math, numpy as np, argparse +break_threshold = 0.01 + +def ReadSegments(segments_file): + segments = {} + for line in open(segments_file).readlines(): + parts = line.strip().split() + segments[parts[0]] = (parts[1], float(parts[2]), float(parts[3])) + return segments + +#def get_breaks(ctm, prev_end): +# breaks = [] +# for i in xrange(0, len(ctm)): +# if ctm[i][2] - prev_end > break_threshold: +# breaks.append([i, ctm[i][2]]) +# prev_end = ctm[i][2] + ctm[i][3] +# return np.array(breaks) + +# Resolve overlaps within segments of the same recording +def ResolveOverlaps(ctms, segments): + total_ctm = [] + if len(ctms) == 0: + raise Exception('Something wrong with the input ctms') + + next_utt = ctms[0][0][0] + for ctm_index in range(len(ctms) - 1): + # Assumption here is that the segments are written in consecutive order? + cur_ctm = ctms[ctm_index] + next_ctm = ctms[ctm_index + 1] + + cur_utt = next_utt + next_utt = next_ctm[0][0] + if (next_utt not in segments): + raise Exception('Could not find utterance %s in segments' % next_utt) + + if len(cur_ctm) > 0: + assert(cur_utt == cur_ctm[0][0]) + + assert(next_utt > cur_utt) + if (cur_utt not in segments): + raise Exception('Could not find utterance %s in segments' % cur_utt) + + # length of this segment + window_length = segments[cur_utt][2] - segments[cur_utt][1] + + # overlap of this segment with the next segment + # Note: It is possible for this to be negative when there is actually + # no overlap between consecutive segments. + overlap = segments[cur_utt][2] - segments[next_utt][1] + + # find the breaks after overlap starts + index = len(cur_ctm) + + for i in xrange(len(cur_ctm)): + if (cur_ctm[i][2] + cur_ctm[i][3]/2.0 > (window_length - overlap/2.0)): + # if midpoint of a hypothesis word is beyond the midpoint of the + # overlap region + index = i + break + + # Ignore the hypotheses beyond this midpoint. They will be considered as + # part of the next segment. + total_ctm += cur_ctm[:index] + + # Ignore the hypotheses of the next utterance that overlaps with the + # current utterance + index = -1 + for i in xrange(len(next_ctm)): + if (next_ctm[i][2] + next_ctm[i][3]/2.0 > (overlap/2.0)): + index = i + break + + if index >= 0: + ctms[ctm_index + 1] = next_ctm[index:] + else: + ctms[ctm_index + 1] = [] + + # merge the last ctm entirely + total_ctm += ctms[-1] + + return total_ctm + +def ReadCtm(ctm_file_lines, segments): + ctms = {} + for key in [ x[0] for x in segments.values() ]: + ctms[key] = [] + + ctm = [] + prev_utt = ctm_file_lines[0].split()[0] + for line in ctm_file_lines: + parts = line.split() + if (prev_utt == parts[0]): + ctm.append([parts[0], parts[1], float(parts[2]), + float(parts[3])] + parts[4:]) + else: + # New utterance. Append the previous utterance's CTM + # into the list for the utterance's recording + ctms[segments[ctm[0][0]][0]].append(ctm) + + assert(parts[0] > prev_utt) + + prev_utt = parts[0] + ctm = [] + ctm.append([parts[0], parts[1], float(parts[2]), + float(parts[3])] + parts[4:]) + + # append the last ctm + ctms[segments[ctm[0][0]][0]].append(ctm) + return ctms + +def WriteCtm(ctm_lines, out_file): + for line in ctm_lines: + out_file.write("{0} {1} {2} {3} {4}\n".format(line[0], line[1], line[2], line[3], " ".join(line[4:]))) + +if __name__ == "__main__": + usage = """ Python script to resolve overlaps in ctms """ + parser = argparse.ArgumentParser(usage) + parser.add_argument('segments', type=str, help = 'use segments to resolve overlaps') + parser.add_argument('ctm_in', type=str, help='input_ctm_file') + parser.add_argument('ctm_out', type=str, help='output_ctm_file') + params = parser.parse_args() + + if params.ctm_in == "-": + params.ctm_in = sys.stdin + else: + params.ctm_in = open(params.ctm_in) + if params.ctm_out == "-": + params.ctm_out = sys.stdout + else: + params.ctm_out = open(params.ctm_out, 'w') + + segments = ReadSegments(params.segments) + + # Read CTMs into a dictionary indexed by the recording + ctms = ReadCtm(params.ctm_in.readlines(), segments) + + for key in sorted(ctms.keys()): + # Process CTMs in the sorted order of recordings + ctm_reco = ctms[key] + ctm_reco = ResolveOverlaps(ctm_reco, segments) + WriteCtm(ctm_reco, params.ctm_out) + params.ctm_out.close() From eb727f101347bf2486b071e8060ff9679d392e4a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 2 Sep 2016 14:47:24 -0400 Subject: [PATCH 071/530] asr_diarization: AMI script without ivectors --- egs/ami/s5b/local/chain/run_tdnn_noivec.sh | 245 +++++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100755 egs/ami/s5b/local/chain/run_tdnn_noivec.sh diff --git a/egs/ami/s5b/local/chain/run_tdnn_noivec.sh b/egs/ami/s5b/local/chain/run_tdnn_noivec.sh new file mode 100755 index 00000000000..d1329dc2bd1 --- /dev/null +++ b/egs/ami/s5b/local/chain/run_tdnn_noivec.sh @@ -0,0 +1,245 @@ +#!/bin/bash + +# This is a chain-training script with TDNN neural networks. +# Please see RESULTS_* for examples of command lines invoking this script. + + +# local/nnet3/run_tdnn.sh --stage 8 --use-ihm-ali true --mic sdm1 # rerunning with biphone +# local/nnet3/run_tdnn.sh --stage 8 --use-ihm-ali false --mic sdm1 + +# local/chain/run_tdnn.sh --use-ihm-ali true --mic sdm1 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 & + +# local/chain/run_tdnn.sh --use-ihm-ali true --mic mdm8 --stage 12 & +# local/chain/run_tdnn.sh --use-ihm-ali true --mic mdm8 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 & + +# local/chain/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned& + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 15 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale-nonlinearity 0.00001 \ + --feat-dir data/$mic/${train_set}_sp_hires_comb \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.cmvn-opts "--norm-means=true --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in dev eval; do + ( + nj_dev=`cat data/$mic/${decode_set}_hires/spk2utt | wc -l` + if [ $nj_dev -gt 30 ]; then + nj_dev=30 + fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj_dev --cmd "$decode_cmd" \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 + From 8d529d98e76ce066889b4c3d634028e5459d7e2d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sat, 10 Dec 2016 17:49:41 -0500 Subject: [PATCH 072/530] asr_diarization: Adding run_tdnn_1a.sh --- egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh index 8df62af8bad..7a38dc80b26 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -226,8 +226,12 @@ if [ $stage -le 18 ]; then rm $dir/.error 2>/dev/null || true for decode_set in dev eval; do ( + nj_dev=`cat data/$mic/${decode_set}_hires/spk2utt | wc -l` + if [ $nj_dev -gt $nj ]; then + nj_dev=$nj + fi steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $nj --cmd "$decode_cmd" \ + --nj $nj_dev --cmd "$decode_cmd" \ --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ --scoring-opts "--min-lmwt 5 " \ $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; @@ -239,4 +243,4 @@ if [ $stage -le 18 ]; then exit 1 fi fi -exit 0 \ No newline at end of file +exit 0 From 37f57138b582efc7e44a1908d23660ee7a0f5545 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sat, 10 Sep 2016 19:57:36 -0400 Subject: [PATCH 073/530] asr_diarization: Modified AMI scoring script to add overlap resolution and non-mbr decoding --- egs/ami/s5b/local/score.sh | 3 ++ egs/ami/s5b/local/score_asclite.sh | 75 +++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/egs/ami/s5b/local/score.sh b/egs/ami/s5b/local/score.sh index 6a077c39644..c186c4b303d 100755 --- a/egs/ami/s5b/local/score.sh +++ b/egs/ami/s5b/local/score.sh @@ -15,6 +15,9 @@ min_lmwt=7 # unused, max_lmwt=15 # unused, iter=final asclite=true +overlap_spk=4 +resolve_overlaps=false # unused +decode_mbr=true #end configuration section. [ -f ./path.sh ] && . ./path.sh diff --git a/egs/ami/s5b/local/score_asclite.sh b/egs/ami/s5b/local/score_asclite.sh index 7327f6246af..0682722214a 100755 --- a/egs/ami/s5b/local/score_asclite.sh +++ b/egs/ami/s5b/local/score_asclite.sh @@ -12,6 +12,8 @@ max_lmwt=15 asclite=true iter=final overlap_spk=4 +resolve_overlaps=false +stm_suffix= # end configuration section. [ -f ./path.sh ] && . ./path.sh . parse_options.sh || exit 1; @@ -36,7 +38,7 @@ hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; hubdir=`dirname $hubscr` -for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ +for f in $data/stm${stm_suffix} $data/glm $lang/words.txt $lang/phones/word_boundary.int \ $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; done @@ -55,36 +57,66 @@ nj=$(cat $dir/num_jobs) mkdir -p $dir/ascoring/log +copy_ctm_script="cat -" +if $resolve_overlaps; then + copy_ctm_script="steps/resolve_ctm_overlaps.py $data/segments - -" +fi + if [ $stage -le 0 ]; then for LMWT in $(seq $min_lmwt $max_lmwt); do rm -f $dir/.error ( - $cmd JOB=1:$nj $dir/ascoring/log/get_ctm.${LMWT}.JOB.log \ - mkdir -p $dir/ascore_${LMWT}/ '&&' \ - lattice-scale --inv-acoustic-scale=${LMWT} "ark:gunzip -c $dir/lat.JOB.gz|" ark:- \| \ - lattice-limit-depth ark:- ark:- \| \ - lattice-push --push-strings=false ark:- ark:- \| \ - lattice-align-words-lexicon --max-expand=10.0 \ - $lang/phones/align_lexicon.int $model ark:- ark:- \| \ - lattice-to-ctm-conf $frame_shift_opt --decode-mbr=$decode_mbr ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/ascore_${LMWT}/${name}.JOB.ctm || touch $dir/.error; + if $decode_mbr; then + $cmd JOB=1:$nj $dir/ascoring/log/get_ctm.${LMWT}.JOB.log \ + mkdir -p $dir/ascore_${LMWT}/ '&&' \ + lattice-scale --inv-acoustic-scale=${LMWT} "ark:gunzip -c $dir/lat.JOB.gz|" ark:- \| \ + lattice-limit-depth ark:- ark:- \| \ + lattice-push --push-strings=false ark:- ark:- \| \ + lattice-align-words-lexicon --max-expand=10.0 \ + $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + lattice-to-ctm-conf $frame_shift_opt --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \ + '>' $dir/ascore_${LMWT}/${name}.JOB.ctm || touch $dir/.error; + else + $cmd JOB=1:$nj $dir/ascoring/log/get_ctm.${LMWT}.JOB.log \ + mkdir -p $dir/ascore_${LMWT}/ '&&' \ + lattice-scale --inv-acoustic-scale=${LMWT} "ark:gunzip -c $dir/lat.JOB.gz|" ark:- \| \ + lattice-limit-depth ark:- ark:- \| \ + lattice-1best ark:- ark:- \| \ + lattice-push --push-strings=false ark:- ark:- \| \ + lattice-align-words-lexicon --max-expand=10.0 \ + $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + nbest-to-ctm $frame_shift_opt ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \ + '>' $dir/ascore_${LMWT}/${name}.JOB.ctm || touch $dir/.error; + fi + # Merge and clean, - for ((n=1; n<=nj; n++)); do cat $dir/ascore_${LMWT}/${name}.${n}.ctm; done > $dir/ascore_${LMWT}/${name}.ctm - rm -f $dir/ascore_${LMWT}/${name}.*.ctm + for ((n=1; n<=nj; n++)); do + cat $dir/ascore_${LMWT}/${name}.${n}.ctm; + rm -f $dir/ascore_${LMWT}/${name}.${n}.ctm + done > $dir/ascore_${LMWT}/${name}.utt.ctm )& done wait; [ -f $dir/.error ] && echo "$0: error during ctm generation. check $dir/ascoring/log/get_ctm.*.log" && exit 1; fi +if [ $stage -le 1 ]; then + for LMWT in $(seq $min_lmwt $max_lmwt); do + cat $dir/ascore_${LMWT}/${name}.utt.ctm | \ + $copy_ctm_script | utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + > $dir/ascore_${LMWT}/${name}.ctm || exit 1 + done +fi + if [ $stage -le 1 ]; then # Remove some stuff we don't want to score, from the ctm. # - we remove hesitations here, otherwise the CTM would have a bug! # (confidences in place of the removed hesitations), - for x in $dir/ascore_*/${name}.ctm; do - cp $x $x.tmpf; + for LMWT in $(seq $min_lmwt $max_lmwt); do + x=$dir/ascore_${LMWT}/${name}.ctm + mv $x $x.tmpf; cat $x.tmpf | grep -i -v -E '\[noise|laughter|vocalized-noise\]' | \ grep -i -v -E ' (ACH|AH|EEE|EH|ER|EW|HA|HEE|HM|HMM|HUH|MM|OOF|UH|UM) ' | \ grep -i -v -E '' > $x; @@ -94,8 +126,9 @@ fi if [ $stage -le 2 ]; then if [ "$asclite" == "true" ]; then - oname=$name + oname=${name} [ ! -z $overlap_spk ] && oname=${name}_o$overlap_spk + oname=${oname}${stm_suffix} echo "asclite is starting" # Run scoring, meaning of hubscr.pl options: # -G .. produce alignment graphs, @@ -109,10 +142,10 @@ if [ $stage -le 2 ]; then # -V .. skip validation of input transcripts, # -h rt-stt .. removes non-lexical items from CTM, $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/score.LMWT.log \ - cp $data/stm $dir/ascore_LMWT/ '&&' \ + cp $data/stm${stm_suffix} $dir/ascore_LMWT/ '&&' \ cp $dir/ascore_LMWT/${name}.ctm $dir/ascore_LMWT/${oname}.ctm '&&' \ $hubscr -G -v -m 1:2 -o$overlap_spk -a -C -B 8192 -p $hubdir -V -l english \ - -h rt-stt -g $data/glm -r $dir/ascore_LMWT/stm $dir/ascore_LMWT/${oname}.ctm || exit 1 + -h rt-stt -g $data/glm -r $dir/ascore_LMWT/stm${stm_suffix} $dir/ascore_LMWT/${oname}.ctm || exit 1 # Compress some scoring outputs : alignment info and graphs, echo -n "compressing asclite outputs " for LMWT in $(seq $min_lmwt $max_lmwt); do @@ -126,8 +159,8 @@ if [ $stage -le 2 ]; then echo done else $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/score.LMWT.log \ - cp $data/stm $dir/ascore_LMWT/ '&&' \ - $hubscr -p $hubdir -v -V -l english -h hub5 -g $data/glm -r $dir/ascore_LMWT/stm $dir/ascore_LMWT/${name}.ctm || exit 1 + cp $data/stm${stm_suffix} $dir/ascore_LMWT/ '&&' \ + $hubscr -p $hubdir -v -V -l english -h hub5 -g $data/glm -r $dir/ascore_LMWT/stm${suffix} $dir/ascore_LMWT/${name}${stm_suffix}.ctm || exit 1 fi fi From c5aeb9159d7a023683cca1cb9c3704ad578893d3 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sat, 10 Sep 2016 19:58:18 -0400 Subject: [PATCH 074/530] asr_diarization: Get times per word from mbr sausage links --- src/lat/sausages.cc | 57 ++++++++++++++++++++++++++++++++++++--------- src/lat/sausages.h | 10 ++++++-- 2 files changed, 54 insertions(+), 13 deletions(-) diff --git a/src/lat/sausages.cc b/src/lat/sausages.cc index 53678efe844..0af1c0f6620 100644 --- a/src/lat/sausages.cc +++ b/src/lat/sausages.cc @@ -51,10 +51,19 @@ void MinimumBayesRisk::MbrDecode() { R_[q] = rhat; } if (R_[q] != 0) { - one_best_times_.push_back(times_[q]); BaseFloat confidence = 0.0; + bool first_time = true; for (int32 j = 0; j < gamma_[q].size(); j++) - if (gamma_[q][j].first == R_[q]) confidence = gamma_[q][j].second; + if (gamma_[q][j].first == R_[q]) { + KALDI_ASSERT(first_time); first_time = false; + confidence = gamma_[q][j].second; + KALDI_ASSERT(confidence > 0); + KALDI_ASSERT(begin_times_[q].count(R_[q]) > 0); + KALDI_ASSERT(end_times_[q].count(R_[q]) > 0); + one_best_times_.push_back(make_pair( + begin_times_[q][R_[q]] / confidence, + end_times_[q][R_[q]] / confidence)); + } one_best_confidences_.push_back(confidence); } } @@ -145,11 +154,13 @@ void MinimumBayesRisk::AccStats() { vector > gamma(Q+1); // temp. form of gamma. // index 1...Q [word] -> occ. + vector > tau_b(Q+1), tau_e(Q+1); + // The tau arrays below are the sums over words of the tau_b // and tau_e timing quantities mentioned in Appendix C of // the paper... we are using these to get averaged times for // the sausage bins, not specifically for the 1-best output. - Vector tau_b(Q+1), tau_e(Q+1); + //Vector tau_b(Q+1), tau_e(Q+1); double Ltmp = EditDistance(N, Q, alpha, alpha_dash, alpha_dash_arc); if (L_ != 0 && Ltmp > L_) { // L_ != 0 is to rule out 1st iter. @@ -189,8 +200,11 @@ void MinimumBayesRisk::AccStats() { // next: gamma(q, w(a)) += beta_dash_arc(q) AddToMap(w_a, beta_dash_arc(q), &(gamma[q])); // next: accumulating times, see decl for tau_b,tau_e - tau_b(q) += state_times_[s_a] * beta_dash_arc(q); - tau_e(q) += state_times_[n] * beta_dash_arc(q); + AddToMap(w_a, state_times_[s_a] * beta_dash_arc(q), &(tau_b[q]), false); + AddToMap(w_a, state_times_[n] * beta_dash_arc(q), &(tau_e[q]), false); + KALDI_ASSERT(tau_b[q].size() == tau_e[q].size()); + //tau_b(q) += state_times_[s_a] * beta_dash_arc(q); + //tau_e(q) += state_times_[n] * beta_dash_arc(q); break; case 2: beta_dash(s_a, q) += beta_dash_arc(q); @@ -203,8 +217,11 @@ void MinimumBayesRisk::AccStats() { // WARNING: there was an error in Appendix C. If we followed // the instructions there the next line would say state_times_[sa], but // it would be wrong. I will try to publish an erratum. - tau_b(q) += state_times_[n] * beta_dash_arc(q); - tau_e(q) += state_times_[n] * beta_dash_arc(q); + AddToMap(0, state_times_[n] * beta_dash_arc(q), &(tau_b[q]), false); + AddToMap(0, state_times_[n] * beta_dash_arc(q), &(tau_e[q]), false); + KALDI_ASSERT(tau_b[q].size() == tau_e[q].size()); + //tau_b(q) += state_times_[n] * beta_dash_arc(q); + //tau_e(q) += state_times_[n] * beta_dash_arc(q); break; default: KALDI_ERR << "Invalid b_arc value"; // error in code. @@ -221,8 +238,11 @@ void MinimumBayesRisk::AccStats() { AddToMap(0, beta_dash_arc(q), &(gamma[q])); // the statements below are actually redundant because // state_times_[1] is zero. - tau_b(q) += state_times_[1] * beta_dash_arc(q); - tau_e(q) += state_times_[1] * beta_dash_arc(q); + //tau_b(q) += state_times_[1] * beta_dash_arc(q); + //tau_e(q) += state_times_[1] * beta_dash_arc(q); + AddToMap(0, state_times_[1] * beta_dash_arc(q), &(tau_b[q]), false); + AddToMap(0, state_times_[1] * beta_dash_arc(q), &(tau_e[q]), false); + KALDI_ASSERT(tau_b[q].size() == tau_e[q].size()); } for (int32 q = 1; q <= Q; q++) { // a check (line 35) double sum = 0.0; @@ -249,9 +269,24 @@ void MinimumBayesRisk::AccStats() { // indexing. times_.clear(); times_.resize(Q); + begin_times_.clear(); + begin_times_.resize(Q); + end_times_.clear(); + end_times_.resize(Q); for (int32 q = 1; q <= Q; q++) { - times_[q-1].first = tau_b(q); - times_[q-1].second = tau_e(q); + KALDI_ASSERT(tau_b[q].size() == tau_e[q].size()); + for (map::iterator iter = tau_b[q].begin(); + iter != tau_b[q].end(); ++iter) { + times_[q-1].first += iter->second; + begin_times_[q-1].insert(make_pair(iter->first, iter->second)); + } + + for (map::iterator iter = tau_e[q].begin(); + iter != tau_e[q].end(); ++iter) { + times_[q-1].second += iter->second; + end_times_[q-1].insert(make_pair(iter->first, iter->second)); + } + if (times_[q-1].first > times_[q-1].second) // this is quite bad. KALDI_WARN << "Times out of order"; if (q > 1 && times_[q-2].second > times_[q-1].first) { diff --git a/src/lat/sausages.h b/src/lat/sausages.h index 8ada15e64b5..4f709bf1703 100644 --- a/src/lat/sausages.h +++ b/src/lat/sausages.h @@ -133,8 +133,8 @@ class MinimumBayesRisk { // used in the algorithm. /// Function used to increment map. - static inline void AddToMap(int32 i, double d, std::map *gamma) { - if (d == 0) return; + static inline void AddToMap(int32 i, double d, std::map *gamma, bool return_if_zero = true) { + if (return_if_zero && d == 0) return; std::pair pr(i, d); std::pair::iterator, bool> ret = gamma->insert(pr); if (!ret.second) // not inserted, so add to contents. @@ -178,6 +178,12 @@ class MinimumBayesRisk { // paper. We sort in reverse order on the second member (posterior), so more // likely word is first. + std::vector > begin_times_; + std::vector > end_times_; + // The average start and end times for each word in a confusion-network bin. + // These are the tau_b and tau_e quantities in Appendix C of the paper. + // Indexed from zero, like gamma_ and R_. + std::vector > times_; // The average start and end times for each confusion-network bin. This // is like an average over words, of the tau_b and tau_e quantities in From ae14b71367da1d51274a7cf664f1e321dac72f97 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sat, 10 Sep 2016 19:58:36 -0400 Subject: [PATCH 075/530] asr_diarization: Adding a general wrapper to chain decoding script --- egs/ami/s5b/local/chain/run_decode.sh | 131 ++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100755 egs/ami/s5b/local/chain/run_decode.sh diff --git a/egs/ami/s5b/local/chain/run_decode.sh b/egs/ami/s5b/local/chain/run_decode.sh new file mode 100755 index 00000000000..545bdc7b157 --- /dev/null +++ b/egs/ami/s5b/local/chain/run_decode.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +set -e +set -o pipefail +set -u + +stage=-1 +decode_stage=1 + +mic=ihm +use_ihm_ali=false +exp_name=tdnn + +nj=20 + +cleanup_affix= +graph_dir= + +decode_set=dev +decode_suffix= + +extractor= +use_ivectors=true +use_offline_ivectors=false +frames_per_chunk=50 + +scoring_opts= + +. path.sh +. cmd.sh + +. parse_options.sh + +new_mic=$mic +if [ $use_ihm_ali == "true" ]; then + new_mic=${mic}_cleanali +fi + +dir=exp/$new_mic/chain${cleanup_affix:+_$cleanup_affix}/${exp_name} + +if [ $stage -le -1 ]; then + mfccdir=mfcc_${mic} + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc.conf \ + --cmd "$train_cmd" data/$mic/${decode_set} exp/make_${mic}/$decode_set $mfccdir || exit 1; + + steps/compute_cmvn_stats.sh data/$mic/${decode_set} exp/make_${mic}/$mic/$decode_set $mfccdir || exit 1; + + utils/fix_data_dir.sh data/$mic/${decode_set} +fi + +if [ $stage -le 0 ]; then + mfccdir=mfcc_${mic}_hires + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + utils/copy_data_dir.sh data/$mic/$decode_set data/$mic/${decode_set}_hires + + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/$mic/${decode_set}_hires exp/make_${mic}_hires/$decode_set $mfccdir || exit 1; + + steps/compute_cmvn_stats.sh data/$mic/${decode_set}_hires exp/make_${mic}_hires/$mic/$decode_set $mfccdir || exit 1; + + utils/fix_data_dir.sh data/$mic/${decode_set}_hires +fi + +if $use_ivectors && [ $stage -le 1 ]; then + if [ -z "$extractor" ]; then + echo "--extractor must be supplied when using ivectors" + exit 1 + fi + + if $use_offline_ivectors; then + steps/online/nnet2/extract_ivectors.sh \ + --cmd "$train_cmd" --nj 8 \ + data/$mic/${decode_set}_hires data/lang $extractor \ + exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_offline_${decode_set} || exit 1 + else + steps/online/nnet2/extract_ivectors_online.sh \ + --cmd "$train_cmd" --nj 8 \ + data/$mic/${decode_set}_hires $extractor \ + exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set} || exit 1 + fi +fi + +final_lm=`cat data/local/lm/final_lm` +LM=$final_lm.pr1-7 + +if [ -z "$graph_dir" ]; then + graph_dir=$dir/graph_${LM} + if [ $stage -le 2 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + fi +fi + +nj=`cat data/$mic/${decode_set}/utt2spk|cut -d' ' -f2|sort -u|wc -l` + +if [ $nj -gt 50 ]; then + nj=50 +fi + +if [ "$frames_per_chunk" -ne 50 ]; then + decode_suffix=${decode_suffix}_cs${frames_per_chunk} +fi + +if [ $stage -le 3 ]; then + ivector_opts= + if $use_ivectors; then + if $use_offline_ivectors; then + ivector_opts="--online-ivector-dir exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_offline_${decode_set}" + decode_suffix=${decode_suffix}_offline + else + ivector_opts="--online-ivector-dir exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set}" + fi + fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --stage $decode_stage --frames-per-chunk $frames_per_chunk \ + --nj $nj --cmd "$decode_cmd" $ivector_opts \ + --scoring-opts "--min-lmwt 5 --decode-mbr false $scoring_opts" \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode${decode_suffix}_${decode_set} || exit 1; +fi From 194030d78d05fdec857642126882388b4470406b Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Tue, 22 Nov 2016 11:18:29 -0500 Subject: [PATCH 076/530] asr_diarization: Adding RT --- egs/ami/s5b/local/make_rt_2004_dev.pl | 1 + egs/ami/s5b/local/make_rt_2004_eval.pl | 1 + egs/ami/s5b/local/make_rt_2005_eval.pl | 1 + egs/ami/s5b/local/run_prepare_rt.sh | 1 + egs/ami/s5b/path.sh | 4 +- egs/rt/s5/cmd.sh | 1 + egs/rt/s5/conf/fbank.conf | 3 + egs/rt/s5/conf/librispeech_mfcc.conf | 1 + egs/rt/s5/conf/mfcc_hires.conf | 10 ++ egs/rt/s5/conf/mfcc_vad.conf | 5 + egs/rt/s5/conf/pitch.conf | 1 + egs/rt/s5/conf/vad_decode_icsi.conf | 40 +++++++ egs/rt/s5/conf/vad_decode_pitch.conf | 55 ++++++++++ egs/rt/s5/conf/vad_icsi_babel.conf | 39 +++++++ egs/rt/s5/conf/vad_icsi_babel_3models.conf | 54 ++++++++++ egs/rt/s5/conf/vad_icsi_rt.conf | 40 +++++++ egs/rt/s5/conf/vad_snr_rt.conf | 35 ++++++ egs/rt/s5/conf/zc_vad.conf | 5 + egs/rt/s5/diarization | 1 + egs/rt/s5/local/make_rt_2004_dev.pl | 64 +++++++++++ egs/rt/s5/local/make_rt_2004_eval.pl | 64 +++++++++++ egs/rt/s5/local/make_rt_2005_eval.pl | 64 +++++++++++ egs/rt/s5/local/run_prepare_rt.sh | 87 +++++++++++++++ egs/rt/s5/local/score.sh | 53 +++++++++ egs/rt/s5/local/score_asclite.sh | 120 +++++++++++++++++++++ egs/rt/s5/local/snr | 1 + egs/rt/s5/path.sh | 5 + egs/rt/s5/sid | 1 + egs/rt/s5/steps | 1 + egs/rt/s5/utils | 1 + src/ivectorbin/Makefile | 4 +- 31 files changed, 761 insertions(+), 2 deletions(-) create mode 120000 egs/ami/s5b/local/make_rt_2004_dev.pl create mode 120000 egs/ami/s5b/local/make_rt_2004_eval.pl create mode 120000 egs/ami/s5b/local/make_rt_2005_eval.pl create mode 120000 egs/ami/s5b/local/run_prepare_rt.sh create mode 120000 egs/rt/s5/cmd.sh create mode 100644 egs/rt/s5/conf/fbank.conf create mode 100644 egs/rt/s5/conf/librispeech_mfcc.conf create mode 100644 egs/rt/s5/conf/mfcc_hires.conf create mode 100644 egs/rt/s5/conf/mfcc_vad.conf create mode 100644 egs/rt/s5/conf/pitch.conf create mode 100644 egs/rt/s5/conf/vad_decode_icsi.conf create mode 100644 egs/rt/s5/conf/vad_decode_pitch.conf create mode 100644 egs/rt/s5/conf/vad_icsi_babel.conf create mode 100644 egs/rt/s5/conf/vad_icsi_babel_3models.conf create mode 100644 egs/rt/s5/conf/vad_icsi_rt.conf create mode 100644 egs/rt/s5/conf/vad_snr_rt.conf create mode 100644 egs/rt/s5/conf/zc_vad.conf create mode 120000 egs/rt/s5/diarization create mode 100755 egs/rt/s5/local/make_rt_2004_dev.pl create mode 100755 egs/rt/s5/local/make_rt_2004_eval.pl create mode 100755 egs/rt/s5/local/make_rt_2005_eval.pl create mode 100755 egs/rt/s5/local/run_prepare_rt.sh create mode 100755 egs/rt/s5/local/score.sh create mode 100755 egs/rt/s5/local/score_asclite.sh create mode 120000 egs/rt/s5/local/snr create mode 100755 egs/rt/s5/path.sh create mode 120000 egs/rt/s5/sid create mode 120000 egs/rt/s5/steps create mode 120000 egs/rt/s5/utils diff --git a/egs/ami/s5b/local/make_rt_2004_dev.pl b/egs/ami/s5b/local/make_rt_2004_dev.pl new file mode 120000 index 00000000000..a0d27619369 --- /dev/null +++ b/egs/ami/s5b/local/make_rt_2004_dev.pl @@ -0,0 +1 @@ +../../../rt/s5/local/make_rt_2004_dev.pl \ No newline at end of file diff --git a/egs/ami/s5b/local/make_rt_2004_eval.pl b/egs/ami/s5b/local/make_rt_2004_eval.pl new file mode 120000 index 00000000000..8b951f9c940 --- /dev/null +++ b/egs/ami/s5b/local/make_rt_2004_eval.pl @@ -0,0 +1 @@ +../../../rt/s5/local/make_rt_2004_eval.pl \ No newline at end of file diff --git a/egs/ami/s5b/local/make_rt_2005_eval.pl b/egs/ami/s5b/local/make_rt_2005_eval.pl new file mode 120000 index 00000000000..6185b83a5a3 --- /dev/null +++ b/egs/ami/s5b/local/make_rt_2005_eval.pl @@ -0,0 +1 @@ +../../../rt/s5/local/make_rt_2005_eval.pl \ No newline at end of file diff --git a/egs/ami/s5b/local/run_prepare_rt.sh b/egs/ami/s5b/local/run_prepare_rt.sh new file mode 120000 index 00000000000..e10f1d53a19 --- /dev/null +++ b/egs/ami/s5b/local/run_prepare_rt.sh @@ -0,0 +1 @@ +../../../rt/s5/local/run_prepare_rt.sh \ No newline at end of file diff --git a/egs/ami/s5b/path.sh b/egs/ami/s5b/path.sh index ad2c93b309b..d8f46e6b8a0 100644 --- a/egs/ami/s5b/path.sh +++ b/egs/ami/s5b/path.sh @@ -10,4 +10,6 @@ SRILM=$KALDI_ROOT/tools/srilm/bin/i686-m64 BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt export PATH=$PATH:$LMBIN:$BEAMFORMIT:$SRILM - +export PATH=$PATH:$KALDI_ROOT/tools/sph2pipe_v2.5 +export PATH=$PATH:/home/vmanoha1/kaldi-waveform/src/segmenterbin +export PATH=$PATH:$KALDI_ROOT/tools/sctk/bin diff --git a/egs/rt/s5/cmd.sh b/egs/rt/s5/cmd.sh new file mode 120000 index 00000000000..19f7e836644 --- /dev/null +++ b/egs/rt/s5/cmd.sh @@ -0,0 +1 @@ +../../wsj/s5/cmd.sh \ No newline at end of file diff --git a/egs/rt/s5/conf/fbank.conf b/egs/rt/s5/conf/fbank.conf new file mode 100644 index 00000000000..07e1639e6ee --- /dev/null +++ b/egs/rt/s5/conf/fbank.conf @@ -0,0 +1,3 @@ +# No non-default options for now. +--num-mel-bins=40 # similar to Google's setup. + diff --git a/egs/rt/s5/conf/librispeech_mfcc.conf b/egs/rt/s5/conf/librispeech_mfcc.conf new file mode 100644 index 00000000000..45d284ad05c --- /dev/null +++ b/egs/rt/s5/conf/librispeech_mfcc.conf @@ -0,0 +1 @@ +--use-energy=false diff --git a/egs/rt/s5/conf/mfcc_hires.conf b/egs/rt/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..434834a6725 --- /dev/null +++ b/egs/rt/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/rt/s5/conf/mfcc_vad.conf b/egs/rt/s5/conf/mfcc_vad.conf new file mode 100644 index 00000000000..22765c6280e --- /dev/null +++ b/egs/rt/s5/conf/mfcc_vad.conf @@ -0,0 +1,5 @@ +--sample-frequency=16000 +--frame-length=25 # the default is 25. +--low-freq=20 # the default. +--high-freq=-600 # the default is zero meaning use the Nyquist (4k in this case). +--num-ceps=13 # higher than the default which is 12. diff --git a/egs/rt/s5/conf/pitch.conf b/egs/rt/s5/conf/pitch.conf new file mode 100644 index 00000000000..e959a19d5b8 --- /dev/null +++ b/egs/rt/s5/conf/pitch.conf @@ -0,0 +1 @@ +--sample-frequency=16000 diff --git a/egs/rt/s5/conf/vad_decode_icsi.conf b/egs/rt/s5/conf/vad_decode_icsi.conf new file mode 100644 index 00000000000..15ba288e3af --- /dev/null +++ b/egs/rt/s5/conf/vad_decode_icsi.conf @@ -0,0 +1,40 @@ +## Features paramters +window_size=100 # 1s +frames_per_gaussian=2000 + +## Phase 1 parameters +num_frames_init_silence=2000 +num_frames_init_sound=10000 +num_frames_init_sound_next=2000 +sil_num_gauss_init=2 +sound_num_gauss_init=2 +sil_max_gauss=2 +sound_max_gauss=6 +sil_gauss_incr=0 +sound_gauss_incr=2 +num_iters=5 +min_sil_variance=0.1 +min_sound_variance=0.01 +min_speech_variance=0.001 + +## Phase 2 parameters +num_frames_init_speech=10000 +speech_num_gauss_init=6 +sil_max_gauss_phase2=7 +sound_max_gauss_phase2=18 +speech_max_gauss_phase2=16 +sil_gauss_incr_phase2=1 +sound_gauss_incr_phase2=2 +speech_gauss_incr_phase2=2 +num_iters_phase2=5 + +## Phase 3 parameters +sil_num_gauss_init_phase3=2 +speech_num_gauss_init_phase3=2 +sil_max_gauss_phase3=5 +speech_max_gauss_phase3=12 +sil_gauss_incr_phase3=1 +speech_gauss_incr_phase3=2 +num_iters_phase3=7 + + diff --git a/egs/rt/s5/conf/vad_decode_pitch.conf b/egs/rt/s5/conf/vad_decode_pitch.conf new file mode 100644 index 00000000000..d7ba1d40093 --- /dev/null +++ b/egs/rt/s5/conf/vad_decode_pitch.conf @@ -0,0 +1,55 @@ +## Features paramters +window_size=10 # 1s +smooth_weights=false +smoothing_window=2 +smooth_mask=true + +## Phase 1 parameters +num_frames_init_silence=200 +num_frames_init_sound=200 +num_frames_init_sound_next=200 +sil_num_gauss_init=2 +sound_num_gauss_init=2 +sil_max_gauss=2 +sound_max_gauss=2 +sil_gauss_incr=1 +sound_gauss_incr=1 +sil_frames_incr=200 +sound_frames_incr=200 +sound_frames_next_incr=200 +num_iters=5 +min_sil_variance=0.1 +min_sound_variance=0.01 +min_speech_variance=0.001 + +## Phase 2 parameters +num_frames_init_speech=5000 +speech_num_gauss_init=6 +sil_max_gauss_phase2=7 +sound_max_gauss_phase2=7 +speech_max_gauss_phase2=16 +sil_gauss_incr_phase2=1 +sound_gauss_incr_phase2=1 +speech_gauss_incr_phase2=2 +num_iters_phase2=20 +window_size_phase2_init=10 +window_size_phase2_next=10 +window_size_incr_iter=5 + +num_frames_init_speech_phase2=100000 +num_frames_init_silence_phase2=200000 +num_frames_init_sound_phase2=200000 +speech_frames_incr_phase2=200000 +sil_frames_incr_phase2=200000 +sound_frames_incr_phase2=200000 + +## Phase 3 parameters +sil_num_gauss_init_phase3=2 +speech_num_gauss_init_phase3=2 +sil_max_gauss_phase3=5 +speech_max_gauss_phase3=12 +sil_gauss_incr_phase3=1 +speech_gauss_incr_phase3=2 +num_iters_phase3=7 + + diff --git a/egs/rt/s5/conf/vad_icsi_babel.conf b/egs/rt/s5/conf/vad_icsi_babel.conf new file mode 100644 index 00000000000..70f651403f5 --- /dev/null +++ b/egs/rt/s5/conf/vad_icsi_babel.conf @@ -0,0 +1,39 @@ +## Features paramters +window_size=10 # 100 ms +frames_per_gaussian=200 + +## Phase 1 parameters +num_frames_init_silence=2000 # 20s - Lowest energy frames selected to initialize Silence GMM +num_frames_init_sound=10000 # 100s - Highest energy frames selected to initialize Sound GMM +num_frames_init_sound_next=2000 # 20s - Highest zero crossing frames selected to initialize Sound GMM +sil_num_gauss_init=2 +sound_num_gauss_init=2 +sil_max_gauss=2 +sound_max_gauss=6 +sil_gauss_incr=0 +sound_gauss_incr=2 +num_iters=5 +min_sil_variance=0.1 +min_sound_variance=0.01 +min_speech_variance=0.001 + +## Phase 2 parameters +speech_num_gauss_init=6 +sil_max_gauss_phase2=7 +sound_max_gauss_phase2=18 +speech_max_gauss_phase2=16 +sil_gauss_incr_phase2=1 +sound_gauss_incr_phase2=2 +speech_gauss_incr_phase2=2 +num_iters_phase2=5 + +## Phase 3 parameters +sil_num_gauss_init_phase3=2 +speech_num_gauss_init_phase3=2 +sil_max_gauss_phase3=5 +speech_max_gauss_phase3=12 +sil_gauss_incr_phase3=1 +speech_gauss_incr_phase3=2 +num_iters_phase3=7 + + diff --git a/egs/rt/s5/conf/vad_icsi_babel_3models.conf b/egs/rt/s5/conf/vad_icsi_babel_3models.conf new file mode 100644 index 00000000000..1196f0d2aff --- /dev/null +++ b/egs/rt/s5/conf/vad_icsi_babel_3models.conf @@ -0,0 +1,54 @@ +## Features paramters +window_size=10 # 100 ms +frames_per_gaussian=200 + +## Phase 1 parameters +num_frames_init_silence=2000 # 20s - Lowest energy frames selected to initialize Silence GMM +num_frames_init_sound=10000 # 100s - Highest energy frames selected to initialize Sound GMM +num_frames_init_sound_next=2000 # 20s - Highest zero crossing frames selected to initialize Sound GMM +sil_num_gauss_init=2 +sound_num_gauss_init=2 +sil_max_gauss=2 +sound_max_gauss=6 +sil_gauss_incr=0 +sound_gauss_incr=2 +num_iters=5 +min_sil_variance=0.1 +min_sound_variance=0.01 +min_speech_variance=0.001 + +## Phase 2 parameters +speech_num_gauss_init=6 +sil_max_gauss_phase2=7 +sound_max_gauss_phase2=18 +speech_max_gauss_phase2=16 +sil_gauss_incr_phase2=1 +sound_gauss_incr_phase2=2 +speech_gauss_incr_phase2=2 +num_iters_phase2=5 + +## Phase 3 parameters +num_frames_silence_phase3_init=2000 +num_frames_speech_phase3_init=2000 +sil_num_gauss_init_phase3=2 +speech_num_gauss_init_phase3=2 +sil_max_gauss_phase3=5 +sil_max_gauss_phase4=8 +speech_max_gauss_phase4=16 +sil_gauss_incr_phase3=1 +sil_gauss_incr_phase4=1 +speech_gauss_incr_phase4=2 +num_iters_phase3=5 +num_iters_phase4=5 + +## Phase 5 parameters +sil_num_gauss_init_phase5=2 +speech_num_gauss_init_phase5=2 +sil_max_gauss_phase5=5 +speech_max_gauss_phase5=12 +sil_gauss_incr_phase5=1 +speech_gauss_incr_phase5=2 +num_iters_phase5=7 + + + diff --git a/egs/rt/s5/conf/vad_icsi_rt.conf b/egs/rt/s5/conf/vad_icsi_rt.conf new file mode 100644 index 00000000000..d19038014db --- /dev/null +++ b/egs/rt/s5/conf/vad_icsi_rt.conf @@ -0,0 +1,40 @@ +## Features paramters +window_size=10 # 100 ms +frames_per_gaussian=200 + +## Phase 1 parameters +num_frames_init_silence=2000 +num_frames_init_sound=10000 +num_frames_init_sound_next=2000 +sil_num_gauss_init=2 +sound_num_gauss_init=2 +sil_max_gauss=2 +sound_max_gauss=6 +sil_gauss_incr=0 +sound_gauss_incr=2 +num_iters=5 +min_sil_variance=0.1 +min_sound_variance=0.01 +min_speech_variance=0.001 + +## Phase 2 parameters +#num_frames_init_speech=10000 +speech_num_gauss_init=6 +sil_max_gauss_phase2=7 +sound_max_gauss_phase2=18 +speech_max_gauss_phase2=16 +sil_gauss_incr_phase2=1 +sound_gauss_incr_phase2=2 +speech_gauss_incr_phase2=2 +num_iters_phase2=5 + +## Phase 3 parameters +sil_num_gauss_init_phase3=2 +speech_num_gauss_init_phase3=2 +sil_max_gauss_phase3=5 +speech_max_gauss_phase3=12 +sil_gauss_incr_phase3=1 +speech_gauss_incr_phase3=2 +num_iters_phase3=7 + + diff --git a/egs/rt/s5/conf/vad_snr_rt.conf b/egs/rt/s5/conf/vad_snr_rt.conf new file mode 100644 index 00000000000..a1029eb8fe6 --- /dev/null +++ b/egs/rt/s5/conf/vad_snr_rt.conf @@ -0,0 +1,35 @@ +## Features paramters +window_size=5 # 5 frame. Window over which initial selection of frames + +frames_per_silence_gaussian=200 # 2s per Gaussian +frames_per_sound_gaussian=200 # 2s per Gaussian +frames_per_speech_gaussian=2000 # 20s per Gaussian + +## Phase 1 parameters +num_frames_init_silence=1000 # 10s - Lowest energy frames selected to initialize Silence GMM +num_frames_init_silence_next=200 # 2s - Lowest energy frames selected to initialize Silence GMM +num_frames_init_sound=1000 # 10s - Highest energy frames selected to initialize Sound GMM +num_frames_init_sound_next=200 # 2s - Highest zero crossing frames selected to initialize Sound GMM +num_frames_init_speech=10000 # 100s - Highest energy frames selected to initialize Sound GMM +sil_num_gauss_init=2 +sound_num_gauss_init=2 +speech_num_gauss_init=6 +sil_max_gauss=7 +sound_max_gauss=12 +speech_max_gauss=16 +sil_gauss_incr=1 +sound_gauss_incr=2 +speech_gauss_incr=2 +num_iters=10 + +## Phase 3 parameters +num_frames_init_silence_phase3=1000 # 10s - Lowest energy frames selected to initialize Silence GMM +num_frames_init_silence_next_phase3=200 # 2s - Lowest energy frames selected to initialize Silence GMM +num_frames_init_speech_phase3=10000 # 100s - Highest energy frames selected to initialize Sound GMM +sil_num_gauss_init=2 +speech_num_gauss_init=6 +sil_max_gauss=7 +speech_max_gauss=16 +sil_gauss_incr=1 +speech_gauss_incr=2 +num_iters_phase3=10 diff --git a/egs/rt/s5/conf/zc_vad.conf b/egs/rt/s5/conf/zc_vad.conf new file mode 100644 index 00000000000..b5d94450709 --- /dev/null +++ b/egs/rt/s5/conf/zc_vad.conf @@ -0,0 +1,5 @@ +--sample-frequency=16000 +--frame-length=25 # the default is 25. +--dither=0.0 +--zero-crossing-threshold=1e-5 + diff --git a/egs/rt/s5/diarization b/egs/rt/s5/diarization new file mode 120000 index 00000000000..ba78a9126af --- /dev/null +++ b/egs/rt/s5/diarization @@ -0,0 +1 @@ +../../sre08/v1/diarization \ No newline at end of file diff --git a/egs/rt/s5/local/make_rt_2004_dev.pl b/egs/rt/s5/local/make_rt_2004_dev.pl new file mode 100755 index 00000000000..8a08dd268a7 --- /dev/null +++ b/egs/rt/s5/local/make_rt_2004_dev.pl @@ -0,0 +1,64 @@ +#!/usr/bin/perl -w +# Copyright 2015 Vimal Manohar +# Apache 2.0. + +use strict; +use File::Basename; + +if (@ARGV != 2) { + print STDERR "Usage: $0 \n" . + " e.g.: $0 /export/corpora5/LDC/LDC2007S11 data\n"; + exit(1); +} + +my ($db_base, $out_dir) = @ARGV; +$out_dir = "$out_dir/rt04_dev"; + +if (system("mkdir -p $out_dir")) { + die "Error making directory $out_dir"; +} + +open(SPKR, ">", "$out_dir/utt2spk") + or die "Could not open the output file $out_dir/utt2spk"; +open(WAV, ">", "$out_dir/wav.scp") + or die "Could not open the output file $out_dir/wav.scp"; +open(RECO2FILE_AND_CHANNEL, ">", "$out_dir/reco2file_and_channel") + or die "Could not open the output file $out_dir/reco2file_and_channel"; + +open(LIST, 'find ' . $db_base . '/data/audio/dev04s -name "*.sph" |'); + + +my $sox =`which sox` || die "Could not find sox in PATH"; +chomp($sox); + +while (my $line = ) { + chomp($line); + my ($file_id, $path, $suffix) = fileparse($line, qr/\.[^.]*/); + if ($suffix =~ /.sph/) { + #print WAV $file_id . " $sox $line -c 1 -b 16 -t wav - |\n"; + print WAV $file_id . " sph2pipe -f wav $line |\n"; + } elsif ($suffix =~ /.wav/) { + print WAV $file_id . " $line |\n"; + } else { + die "$0: Unknown suffix $suffix in $line\n" + } + + print SPKR "$file_id $file_id\n"; + print RECO2FILE_AND_CHANNEL "$file_id $file_id 1\n"; +} + +close(LIST) || die; +close(WAV) || die; +close(SPKR) || die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} + +system("utils/fix_data_dir.sh $out_dir"); + +if (system( + "utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} diff --git a/egs/rt/s5/local/make_rt_2004_eval.pl b/egs/rt/s5/local/make_rt_2004_eval.pl new file mode 100755 index 00000000000..4c1286ea1cc --- /dev/null +++ b/egs/rt/s5/local/make_rt_2004_eval.pl @@ -0,0 +1,64 @@ +#!/usr/bin/perl -w +# Copyright 2015 Vimal Manohar +# Apache 2.0. + +use strict; +use File::Basename; + +if (@ARGV != 2) { + print STDERR "Usage: $0 \n" . + " e.g.: $0 /export/corpora5/LDC/LDC2007S12/package/rt04_eval data\n"; + exit(1); +} + +my ($db_base, $out_dir) = @ARGV; +$out_dir = "$out_dir/rt04_eval"; + +if (system("mkdir -p $out_dir")) { + die "Error making directory $out_dir"; +} + +open(SPKR, ">", "$out_dir/utt2spk") + or die "Could not open the output file $out_dir/utt2spk"; +open(WAV, ">", "$out_dir/wav.scp") + or die "Could not open the output file $out_dir/wav.scp"; +open(RECO2FILE_AND_CHANNEL, ">", "$out_dir/reco2file_and_channel") + or die "Could not open the output file $out_dir/reco2file_and_channel"; + +open(LIST, 'find ' . $db_base . '/data/audio/eval04s -name "*.sph" |'); + +my $sox =`which sox` || die "Could not find sox in PATH"; +chomp($sox); + +while (my $line = ) { + chomp($line); + my ($file_id, $path, $suffix) = fileparse($line, qr/\.[^.]*/); + if ($suffix =~ /.sph/) { + #print WAV $file_id . " $sox $line -c 1 -b 16 -t wav - |\n"; + print WAV $file_id . " sph2pipe -f wav $line |\n"; + } elsif ($suffix =~ /.wav/) { + print WAV $file_id . " $line |\n"; + } else { + die "$0: Unknown suffix $suffix in $line\n" + } + + print SPKR "$file_id $file_id\n"; + print RECO2FILE_AND_CHANNEL "$file_id $file_id 1\n"; +} + +close(LIST) || die; +close(WAV) || die; +close(SPKR) || die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} + +system("utils/fix_data_dir.sh $out_dir"); + +if (system( + "utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} + diff --git a/egs/rt/s5/local/make_rt_2005_eval.pl b/egs/rt/s5/local/make_rt_2005_eval.pl new file mode 100755 index 00000000000..d48dcaae926 --- /dev/null +++ b/egs/rt/s5/local/make_rt_2005_eval.pl @@ -0,0 +1,64 @@ +#!/usr/bin/perl -w +# Copyright 2015 Vimal Manohar +# Apache 2.0. + +use strict; +use File::Basename; + +if (@ARGV != 2) { + print STDERR "Usage: $0 \n" . + " e.g.: $0 /export/corpora5/LDC/LDC2011S06 data\n"; + exit(1); +} + +my ($db_base, $out_dir) = @ARGV; +$out_dir = "$out_dir/rt05_eval"; + +if (system("mkdir -p $out_dir")) { + die "Error making directory $out_dir"; +} + +open(SPKR, ">", "$out_dir/utt2spk") + or die "Could not open the output file $out_dir/utt2spk"; +open(WAV, ">", "$out_dir/wav.scp") + or die "Could not open the output file $out_dir/wav.scp"; +open(RECO2FILE_AND_CHANNEL, ">", "$out_dir/reco2file_and_channel") + or die "Could not open the output file $out_dir/reco2file_and_channel"; + +open(LIST, 'find ' . $db_base . '/data/audio/eval05s -name "*.sph" |'); + +my $sox =`which sox` || die "Could not find sox in PATH"; +chomp($sox); + +while (my $line = ) { + chomp($line); + my ($file_id, $path, $suffix) = fileparse($line, qr/\.[^.]*/); + if ($suffix =~ /.sph/) { + print WAV $file_id . " $sox $line -c 1 -b 16 -t wav - |\n"; + } elsif ($suffix =~ /.wav/) { + print WAV $file_id . " $line |\n"; + } else { + die "$0: Unknown suffix $suffix in $line\n" + } + + print SPKR "$file_id $file_id\n"; + print RECO2FILE_AND_CHANNEL "$file_id $file_id 1\n"; +} + +close(LIST) || die; +close(WAV) || die; +close(SPKR) || die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} + +system("utils/fix_data_dir.sh $out_dir"); + +if (system( + "utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} + + diff --git a/egs/rt/s5/local/run_prepare_rt.sh b/egs/rt/s5/local/run_prepare_rt.sh new file mode 100755 index 00000000000..c431f760dab --- /dev/null +++ b/egs/rt/s5/local/run_prepare_rt.sh @@ -0,0 +1,87 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -o pipefail +set -u + +. path.sh +. cmd.sh + +mic=sdm +task=sad + +. parse_options.sh + +RT04_DEV_ROOT=/export/corpora5/LDC/LDC2007S11 +RT04_EVAL_ROOT=/export/corpora5/LDC/LDC2007S12/package/rt04_eval +RT05_EVAL_ROOT=/export/corpora5/LDC/LDC2011S06 + +if [ ! -f data/rt04_dev/.done ]; then + local/make_rt_2004_dev.pl $RT04_DEV_ROOT data + touch data/rt04_dev/.done +fi + +if [ ! -f data/rt04_eval/.done ]; then + local/make_rt_2004_eval.pl $RT04_EVAL_ROOT data + touch data/rt04_eval/.done +fi + +if [ ! -f data/rt05_eval/.done ]; then + local/make_rt_2005_eval.pl $RT05_EVAL_ROOT data + touch data/rt05_eval/.done +fi + +mkdir -p data/local + +dir=data/local/rt05_eval/$mic/$task +mkdir -p $dir + +if [ $task == "stt" ]; then + cp $RT05_EVAL_ROOT/data/reference/concatenated/rt05s.confmtg.050614.${task}.${mic}.stm $dir/stm +else + cp $RT05_EVAL_ROOT/data/reference/concatenated/rt05s.confmtg.050614.${task}.${mic}.rttm $dir/rttm +fi + +cp $RT05_EVAL_ROOT/data/indicies/expt_05s_${task}ul_eval05s_eng_confmtg_${mic}_1.uem $dir/uem +cat $dir/uem | awk '!/;;/{if (NF > 0) print $1}' | perl -pe 's/(.*)\.sph/$1/g' | sort -u > $dir/list +utils/subset_data_dir.sh --utt-list $dir/list data/rt05_eval data/rt05_eval_${mic}_${task} +[ -f $dir/stm ] && cp $dir/stm data/rt05_eval_${mic}_${task} +[ -f $dir/uem ] && cp $dir/uem data/rt05_eval_${mic}_${task} +[ -f $dir/rttm ] && cp $dir/rttm data/rt05_eval_${mic}_${task} + +dir=data/local/rt04_dev/$mic/$task +mkdir -p $dir + +if [ $task == "stt" ]; then + cp $RT04_DEV_ROOT/data/reference/dev04s/concatenated/dev04s.040809.${mic}.stm $dir/stm +elif [ $task == "spkr" ]; then + cp $RT04_DEV_ROOT/data/reference/dev04s/concatenated/dev04s.040809.${mic}.rttm $dir/rttm +else + cat $RT04_DEV_ROOT/data/reference/dev04s/concatenated/dev04s.040809.${mic}.rttm | spkr2sad.pl | rttmSmooth.pl -s 0 > $dir/rttm +fi +cp $RT04_DEV_ROOT/data/indices/dev04s/dev04s.${mic}.uem $dir/uem +cat $dir/uem | awk '!/;;/{if (NF > 0) print $1}' | perl -pe 's/(.*)\.sph/$1/g' | sort -u > $dir/list +utils/subset_data_dir.sh --utt-list $dir/list data/rt04_dev data/rt04_dev_${mic}_${task} +[ -f $dir/stm ] && cp $dir/stm data/rt04_dev_${mic}_${task} +[ -f $dir/uem ] && cp $dir/uem data/rt04_dev_${mic}_${task} +[ -f $dir/rttm ] && cp $dir/rttm data/rt04_dev_${mic}_${task} + +dir=data/local/rt04_eval/$mic/$task +mkdir -p $dir + +if [ $task == "stt" ]; then + cp $RT04_EVAL_ROOT/data/reference/eval04s/concatenated/eval04s.040511.${mic}.stm $dir/stm +elif [ $task == "spkr" ]; then + cp $RT04_EVAL_ROOT/data/reference/eval04s/concatenated/eval04s.040511.${mic}.rttm $dir/rttm +else + cat $RT04_EVAL_ROOT/data/reference/eval04s/concatenated/eval04s.040511.${mic}.rttm | spkr2sad.pl | rttmSmooth.pl -s 0 > $dir/rttm +fi +cp $RT04_EVAL_ROOT/data/indices/eval04s/eval04s.${mic}.uem $dir/uem +cat $dir/uem | awk '!/;;/{if (NF > 0) print $1}' | perl -pe 's/(.*)\.sph/$1/g' | sort -u > $dir/list +utils/subset_data_dir.sh --utt-list $dir/list data/rt04_eval data/rt04_eval_${mic}_${task} +[ -f $dir/stm ] && cp $dir/stm data/rt04_eval_${mic}_${task} +[ -f $dir/uem ] && cp $dir/uem data/rt04_eval_${mic}_${task} +[ -f $dir/rttm ] && cp $dir/rttm data/rt04_eval_${mic}_${task} diff --git a/egs/rt/s5/local/score.sh b/egs/rt/s5/local/score.sh new file mode 100755 index 00000000000..1c3e2cbe8c4 --- /dev/null +++ b/egs/rt/s5/local/score.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Copyright Johns Hopkins University (Author: Daniel Povey) 2012 +# Copyright University of Edinburgh (Author: Pawel Swietojanski) 2014 +# Apache 2.0 + +orig_args= +for x in "$@"; do orig_args="$orig_args '$x'"; done + +# begin configuration section. we include all the options that score_sclite.sh or +# score_basic.sh might need, or parse_options.sh will die. +cmd=run.pl +stage=0 +min_lmwt=9 # unused, +max_lmwt=15 # unused, +asclite=true +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score.sh [options] " && exit; + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + echo " --asclite (true/false) # score with ascltie instead of sclite (overlapped speech)" + exit 1; +fi + +data=$1 + +mic=$(echo $data | awk -F '/' '{print $2}') +case $mic in + ihm*) + echo "Using sclite for IHM (close talk)," + eval local/score_asclite.sh --asclite false $orig_args + ;; + sdm*) + echo "Using asclite for overlapped speech SDM (single distant mic)," + eval local/score_asclite.sh --asclite $asclite $orig_args + ;; + mdm*) + echo "Using asclite for overlapped speech MDM (multiple distant mics)," + eval local/score_asclite.sh --asclite $asclite $orig_args + ;; + *) + echo "local/score.sh: no ihm/sdm/mdm directories found. AMI recipe assumes data/{ihm,sdm,mdm}/..." + exit 1; + ;; +esac diff --git a/egs/rt/s5/local/score_asclite.sh b/egs/rt/s5/local/score_asclite.sh new file mode 100755 index 00000000000..86b801b975d --- /dev/null +++ b/egs/rt/s5/local/score_asclite.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. +# 2014, University of Edinburgh, (Author: Pawel Swietojanski) + +# begin configuration section. +cmd=run.pl +stage=0 +min_lmwt=9 +max_lmwt=15 +reverse=false +asclite=true +overlap_spk=4 +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score_asclite.sh [--cmd (run.pl|queue.pl...)] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + echo " --reverse (true/false) # score with time reversed features " + exit 1; +fi + +data=$1 +lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. +dir=$3 + +model=$dir/../final.mdl # assume model one level up from decoding dir. + +hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl +[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; +hubdir=`dirname $hubscr` + +for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ + $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; +done + +name=`basename $data`; # e.g. eval2000 + +mkdir -p $dir/ascoring/log + +if [ $stage -le 0 ]; then + if $reverse; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/get_ctm.LMWT.log \ + mkdir -p $dir/ascore_LMWT/ '&&' \ + lattice-1best --lm-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-reverse ark:- ark:- \| \ + lattice-align-words --reorder=false $lang/phones/word_boundary.int $model ark:- ark:- \| \ + nbest-to-ctm ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/ascore_LMWT/$name.ctm || exit 1; + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/get_ctm.LMWT.log \ + mkdir -p $dir/ascore_LMWT/ '&&' \ + lattice-1best --lm-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ + nbest-to-ctm ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/ascore_LMWT/$name.ctm || exit 1; + fi +fi + +if [ $stage -le 1 ]; then +# Remove some stuff we don't want to score, from the ctm. + for x in $dir/ascore_*/$name.ctm; do + cp $x $dir/tmpf; + cat $dir/tmpf | grep -i -v -E '\[noise|laughter|vocalized-noise\]' | \ + grep -i -v -E '' > $x; +# grep -i -v -E '|%HESITATION' > $x; + done +fi + +if [ $stage -le 2 ]; then + if [ "$asclite" == "true" ]; then + oname=$name + [ ! -z $overlap_spk ] && oname=${name}_o$overlap_spk + echo "asclite is starting" + # Run scoring, meaning of hubscr.pl options: + # -G .. produce alignment graphs, + # -v .. verbose, + # -m .. max-memory in GBs, + # -o .. max N of overlapping speakers, + # -a .. use asclite, + # -C .. compression for asclite, + # -B .. blocksize for asclite (kBs?), + # -p .. path for other components, + # -V .. skip validation of input transcripts, + # -h rt-stt .. removes non-lexical items from CTM, + $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/score.LMWT.log \ + cp $data/stm $dir/ascore_LMWT/ '&&' \ + cp $dir/ascore_LMWT/${name}.ctm $dir/ascore_LMWT/${oname}.ctm '&&' \ + $hubscr -G -v -m 1:2 -o$overlap_spk -a -C -B 8192 -p $hubdir -V -l english \ + -h rt-stt -g $data/glm -r $dir/ascore_LMWT/stm $dir/ascore_LMWT/${oname}.ctm || exit 1 + # Compress some scoring outputs : alignment info and graphs, + echo -n "compressing asclite outputs " + for LMWT in $(seq $min_lmwt $max_lmwt); do + ascore=$dir/ascore_${LMWT} + gzip -f $ascore/${oname}.ctm.filt.aligninfo.csv + cp $ascore/${oname}.ctm.filt.alignments/index.html $ascore/${oname}.ctm.filt.overlap.html + tar -C $ascore -czf $ascore/${oname}.ctm.filt.alignments.tar.gz ${oname}.ctm.filt.alignments + rm -r $ascore/${oname}.ctm.filt.alignments + echo -n "LMWT:$LMWT " + done + echo done + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/ascoring/log/score.LMWT.log \ + cp $data/stm $dir/ascore_LMWT/ '&&' \ + $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/ascore_LMWT/stm $dir/ascore_LMWT/${name}.ctm || exit 1 + fi +fi + +exit 0 diff --git a/egs/rt/s5/local/snr b/egs/rt/s5/local/snr new file mode 120000 index 00000000000..6d422e11960 --- /dev/null +++ b/egs/rt/s5/local/snr @@ -0,0 +1 @@ +../../../wsj_noisy/s5/local/snr \ No newline at end of file diff --git a/egs/rt/s5/path.sh b/egs/rt/s5/path.sh new file mode 100755 index 00000000000..8461d980758 --- /dev/null +++ b/egs/rt/s5/path.sh @@ -0,0 +1,5 @@ +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/nnet3bin/:$KALDI_ROOT/src/segmenterbin/:$PWD:$PATH:$KALDI_ROOT/tools/sctk/bin +export PATH=$KALDI_ROOT/tools/sph2pipe_v2.5/:$PATH +export LC_ALL=C diff --git a/egs/rt/s5/sid b/egs/rt/s5/sid new file mode 120000 index 00000000000..5cb0274b7d6 --- /dev/null +++ b/egs/rt/s5/sid @@ -0,0 +1 @@ +../../sre08/v1/sid/ \ No newline at end of file diff --git a/egs/rt/s5/steps b/egs/rt/s5/steps new file mode 120000 index 00000000000..1b186770dd1 --- /dev/null +++ b/egs/rt/s5/steps @@ -0,0 +1 @@ +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/rt/s5/utils b/egs/rt/s5/utils new file mode 120000 index 00000000000..a3279dc8679 --- /dev/null +++ b/egs/rt/s5/utils @@ -0,0 +1 @@ +../../wsj/s5/utils/ \ No newline at end of file diff --git a/src/ivectorbin/Makefile b/src/ivectorbin/Makefile index 71a855762fe..5df22a2bb8a 100644 --- a/src/ivectorbin/Makefile +++ b/src/ivectorbin/Makefile @@ -15,7 +15,9 @@ BINFILES = ivector-extractor-init ivector-extractor-acc-stats \ ivector-subtract-global-mean ivector-plda-scoring \ logistic-regression-train logistic-regression-eval \ logistic-regression-copy create-split-from-vad \ - ivector-extract-online ivector-adapt-plda + ivector-extract-online ivector-adapt-plda \ + ivector-extract-dense ivector-cluster \ + ivector-cluster-plda OBJFILES = From 32977de2bc08b401de488d0d6ae9eed26b68ed69 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sat, 10 Dec 2016 17:11:09 -0500 Subject: [PATCH 077/530] asr_diarization: Adding ami_normalize_transcripts.pl --- .../s5b/local/ami_normalize_transcripts.pl | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 egs/ami/s5b/local/ami_normalize_transcripts.pl diff --git a/egs/ami/s5b/local/ami_normalize_transcripts.pl b/egs/ami/s5b/local/ami_normalize_transcripts.pl new file mode 100644 index 00000000000..772e8b50fec --- /dev/null +++ b/egs/ami/s5b/local/ami_normalize_transcripts.pl @@ -0,0 +1,129 @@ +#!/usr/bin/env perl + +# Copyright 2014 University of Edinburgh (Author: Pawel Swietojanski) +# 2016 Vimal Manohar + +# The script - based on punctuation times - splits segments longer than #words (input parameter) +# and produces bit more more normalised form of transcripts, as follows +# MeetID Channel Spkr stime etime transcripts + +#use List::MoreUtils 'indexes'; +use strict; +use warnings; + +sub normalise_transcripts; + +sub merge_hashes { + my ($h1, $h2) = @_; + my %hash1 = %$h1; my %hash2 = %$h2; + foreach my $key2 ( keys %hash2 ) { + if( exists $hash1{$key2} ) { + warn "Key [$key2] is in both hashes!"; + next; + } else { + $hash1{$key2} = $hash2{$key2}; + } + } + return %hash1; +} + +sub print_hash { + my ($h) = @_; + my %hash = %$h; + foreach my $k (sort keys %hash) { + print "$k : $hash{$k}\n"; + } +} + +sub get_name { + #no warnings; + my $sname = sprintf("%07d_%07d", $_[0]*100, $_[1]*100) || die 'Input undefined!'; + #use warnings; + return $sname; +} + +sub split_on_comma { + + my ($text, $comma_times, $btime, $etime, $max_words_per_seg)= @_; + my %comma_hash = %$comma_times; + + print "Btime, Etime : $btime, $etime\n"; + + my $stime = ($etime+$btime)/2; #split time + my $skey = ""; + my $otime = $btime; + foreach my $k (sort {$comma_hash{$a} cmp $comma_hash{$b} } keys %comma_hash) { + print "Key : $k : $comma_hash{$k}\n"; + my $ktime = $comma_hash{$k}; + if ($ktime==$btime) { next; } + if ($ktime==$etime) { last; } + if (abs($stime-$ktime)/20) { + $st=$comma_hash{$skey}; + $et = $etime; + } + my (@utts) = split (' ', $utts1[$i]); + if ($#utts < $max_words_per_seg) { + my $nm = get_name($st, $et); + print "SplittedOnComma[$i]: $nm : $utts1[$i]\n"; + $transcripts{$nm} = $utts1[$i]; + } else { + print 'Continue splitting!'; + my %transcripts2 = split_on_comma($utts1[$i], \%comma_hash, $st, $et, $max_words_per_seg); + %transcripts = merge_hashes(\%transcripts, \%transcripts2); + } + } + return %transcripts; +} + +sub normalise_transcripts { + my $text = $_; + + #DO SOME ROUGH AND OBVIOUS PRELIMINARY NORMALISATION, AS FOLLOWS + #remove the remaining punctation labels e.g. some text ,0 some text ,1 + $text =~ s/[\.\,\?\!\:][0-9]+//g; + #there are some extra spurious puncations without spaces, e.g. UM,I, replace with space + $text =~ s/[A-Z']+,[A-Z']+/ /g; + #split words combination, ie. ANTI-TRUST to ANTI TRUST (None of them appears in cmudict anyway) + #$text =~ s/(.*)([A-Z])\s+(\-)(.*)/$1$2$3$4/g; + $text =~ s/\-/ /g; + #substitute X_M_L with X. M. L. etc. + $text =~ s/\_/. /g; + #normalise and trim spaces + $text =~ s/^\s*//g; + $text =~ s/\s*$//g; + $text =~ s/\s+/ /g; + #some transcripts are empty with -, nullify (and ignore) them + $text =~ s/^\-$//g; + $text =~ s/\s+\-$//; + # apply few exception for dashed phrases, Mm-Hmm, Uh-Huh, etc. those are frequent in AMI + # and will be added to dictionary + $text =~ s/MM HMM/MM\-HMM/g; + $text =~ s/UH HUH/UH\-HUH/g; + + return $text; +} + +while(<>) { + chomp; + print normalise_transcripts($_) . "\n"; +} + From 4219de118a3e73cf3db0aa7fa346dcdabac3a86e Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 21 Sep 2016 19:53:25 -0400 Subject: [PATCH 078/530] asr_diarization: Two-stage decoding baseline AMI --- .../s5b/local/chain/run_decode_two_stage.sh | 135 ++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100755 egs/ami/s5b/local/chain/run_decode_two_stage.sh diff --git a/egs/ami/s5b/local/chain/run_decode_two_stage.sh b/egs/ami/s5b/local/chain/run_decode_two_stage.sh new file mode 100755 index 00000000000..0d354bfa574 --- /dev/null +++ b/egs/ami/s5b/local/chain/run_decode_two_stage.sh @@ -0,0 +1,135 @@ +#!/bin/bash + +set -e -u +set -o pipefail + +stage=-1 +decode_stage=1 + +mic=ihm +use_ihm_ali=false +exp_name=tdnn + +cleanup_affix= + +decode_set=dev +extractor= +use_ivectors=true +scoring_opts= +lmwt=8 +pad_frames=10 + +. path.sh +. cmd.sh + +. parse_options.sh + +new_mic=$mic +if [ $use_ihm_ali == "true" ]; then + new_mic=${mic}_cleanali +fi + +dir=exp/$new_mic/chain${cleanup_affix:+_$cleanup_affix}/${exp_name} + +nj=20 + +if [ $stage -le -1 ]; then + mfccdir=mfcc_${mic} + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc.conf \ + --cmd "$train_cmd" data/$mic/${decode_set} exp/make_${mic}/$decode_set $mfccdir || exit 1; + + steps/compute_cmvn_stats.sh data/$mic/${decode_set} exp/make_${mic}/$mic/$decode_set $mfccdir || exit 1; + + utils/fix_data_dir.sh data/$mic/${decode_set} +fi + +utils/data/get_utt2dur.sh data/$mic/${decode_set} + +if [ $stage -le 0 ]; then + mfccdir=mfcc_${mic}_hires + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + utils/copy_data_dir.sh data/$mic/$decode_set data/$mic/${decode_set}_hires + + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/$mic/${decode_set}_hires exp/make_${mic}_hires/$decode_set $mfccdir || exit 1; + + steps/compute_cmvn_stats.sh data/$mic/${decode_set}_hires exp/make_${mic}_hires/$mic/$decode_set $mfccdir || exit 1; + + utils/fix_data_dir.sh data/$mic/${decode_set}_hires +fi + +if $use_ivectors && [ $stage -le 1 ]; then + if [ -z "$extractor" ]; then + "--extractor must be supplied when using ivectors" + fi + + steps/online/nnet2/extract_ivectors_online.sh \ + --cmd "$train_cmd" --nj 8 \ + data/$mic/${decode_set}_hires $extractor \ + exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set} || exit 1 +fi + +final_lm=`cat data/local/lm/final_lm` +LM=$final_lm.pr1-7 +graph_dir=$dir/graph_${LM} +if [ $stage -le 2 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +nj=`cat data/$mic/${decode_set}/utt2spk|cut -d' ' -f2|sort -u|wc -l` + +if [ $nj -gt 50 ]; then + nj=50 +fi + +if [ $stage -le 3 ]; then + ivector_opts= + if $use_ivectors; then + ivector_opts="--online-ivector-dir exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set}" + fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --stage $decode_stage \ + --nj $nj --cmd "$decode_cmd" $ivector_opts \ + --scoring-opts "--min-lmwt 5 $scoring_opts" \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; +fi + +ivector_weights=$dir/decode_${decode_set}/ascore_$lmwt/ivector_weights.gz + +if [ $stage -le 4 ]; then + cat $dir/decode_${decode_set}/ascore_$lmwt/${decode_set}_hires.utt.ctm | \ + grep -i -v -E '\[noise|laughter|vocalized-noise\]' | \ + local/get_ivector_weights_from_ctm_conf.pl \ + --pad-frames $pad_frames data/$mic/${decode_set}/utt2dur | \ + gzip -c > $ivector_weights +fi + +if [ $stage -le 5 ]; then + steps/online/nnet2/extract_ivectors_online.sh \ + --cmd "$train_cmd" --nj $nj --weights $ivector_weights \ + data/$mic/${decode_set}_hires $extractor \ + exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set}_stage2 || exit 1 +fi + +if [ $stage -le 6 ]; then + ivector_opts= + if $use_ivectors; then + ivector_opts="--online-ivector-dir exp/$mic/nnet3${cleanup_affix:+_$cleanup_affix}/ivectors_${decode_set}_stage2" + fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --stage $decode_stage \ + --nj $nj --cmd "$decode_cmd" $ivector_opts \ + --scoring-opts "--min-lmwt 5 $scoring_opts" \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set}_stage2 || exit 1; +fi + From 318b52ef67dc678233a9c08edd8db04f8550cfe6 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sat, 10 Dec 2016 17:36:44 -0500 Subject: [PATCH 079/530] asr_diarization: Adding modify_stm.pl to remove beginning and end from scoring --- egs/ami/s5b/local/modify_stm.py | 97 +++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100755 egs/ami/s5b/local/modify_stm.py diff --git a/egs/ami/s5b/local/modify_stm.py b/egs/ami/s5b/local/modify_stm.py new file mode 100755 index 00000000000..52ab6fed1ef --- /dev/null +++ b/egs/ami/s5b/local/modify_stm.py @@ -0,0 +1,97 @@ +#! /usr/bin/env python + +import sys +import collections +import itertools +import argparse + +from collections import defaultdict + +def IgnoreWordList(stm_lines, wordlist): + for i in range(0, len(stm_lines)): + line = stm_lines[i] + splits = line.strip().split() + + line_changed = False + for j in range(5, len(splits)): + if str.lower(splits[j]) in wordlist: + splits[j] = "{{ {0} / @ }}".format(splits[j]) + line_changed = True + + + if line_changed: + stm_lines[i] = " ".join(splits) + +def IgnoreIsolatedWords(stm_lines): + for i in range(0, len(stm_lines)): + line = stm_lines[i] + splits = line.strip().split() + + assert( splits[5][0] != '<' ) + + if len(splits) == 6 and splits[5] != "IGNORE_TIME_SEGMENT_IN_SCORING": + splits.insert(5, "") + else: + splits.insert(5, "") + stm_lines[i] = " ".join(splits) + +def IgnoreBeginnings(stm_lines): + beg_times = defaultdict(itertools.repeat(float("inf")).next) + + lines_to_add = [] + for line in stm_lines: + splits = line.strip().split() + + beg_times[(splits[0],splits[1])] = min(beg_times[(splits[0],splits[1])], float(splits[3])) + + for t,v in beg_times.iteritems(): + lines_to_add.append("{0} {1} {0} 0.0 {2} IGNORE_TIME_SEGMENT_IN_SCORING".format(t[0], t[1], v)) + + stm_lines.extend(lines_to_add) + +def WriteStmLines(stm_lines): + for line in stm_lines: + print(line) + +def GetArgs(): + parser = argparse.ArgumentParser("This script modifies STM to remove certain words and segments from scoring. Use sort +0 -1 +1 -2 +3nb -4 while writing out.", + formatter_class = argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument("--ignore-beginnings", + type = str, choices = ["true", "false"], + help = "Ignore beginnings of the recordings since " + "they are not transcribed") + parser.add_argument("--ignore-isolated-words", + type = str, choices = ["true", "false"], + help = "Remove isolated words from scoring " + "because they may be hard to recognize without " + "speaker diarization") + parser.add_argument("--ignore-word-list", + type = str, + help = "List of words to be ignored") + + args = parser.parse_args() + + return args + +def Main(): + args = GetArgs() + + stm_lines = [ x.strip() for x in sys.stdin.readlines() ] + + print (';; LABEL "NO_ISO", "No isolated words", "Ignoring isolated words"') + print (';; LABEL "ISO", "Isolated words", "isolated words"') + + #if args.ignore_word_list is not None: + # wordlist = {} + # for x in open(args.ignore_word_list).readlines(): + # wordlist[str.lower(x.strip())] = 1 + # IgnoreWordList(stm_lines, wordlist) + + IgnoreIsolatedWords(stm_lines) + IgnoreBeginnings(stm_lines) + + WriteStmLines(stm_lines) + +if __name__ == "__main__": + Main() From f141eb0f86f2e25ec64f0b643735cc3bb649b324 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Tue, 22 Nov 2016 12:42:44 -0500 Subject: [PATCH 080/530] asr_diarization: Removing sctk and other additions to AMI path.sh --- egs/ami/s5b/path.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/egs/ami/s5b/path.sh b/egs/ami/s5b/path.sh index d8f46e6b8a0..b4711d23926 100644 --- a/egs/ami/s5b/path.sh +++ b/egs/ami/s5b/path.sh @@ -9,7 +9,4 @@ LMBIN=$KALDI_ROOT/tools/irstlm/bin SRILM=$KALDI_ROOT/tools/srilm/bin/i686-m64 BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt -export PATH=$PATH:$LMBIN:$BEAMFORMIT:$SRILM -export PATH=$PATH:$KALDI_ROOT/tools/sph2pipe_v2.5 -export PATH=$PATH:/home/vmanoha1/kaldi-waveform/src/segmenterbin -export PATH=$PATH:$KALDI_ROOT/tools/sctk/bin +export PATH=$LMBIN:$BEAMFORMIT:$SRILM:$PATH From d95b27eaac4d011127dcb5a32f635c05a00a60f3 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 18 Nov 2016 00:57:40 -0500 Subject: [PATCH 081/530] asr_diarization: Updating AMI nnet3 recipes --- egs/ami/s5b/local/nnet3/run_blstm.sh | 3 +- egs/ami/s5b/local/nnet3/run_lstm.sh | 7 ++-- egs/ami/s5b/local/nnet3/run_tdnn.sh | 53 +++++++++++++++++++++------- 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/egs/ami/s5b/local/nnet3/run_blstm.sh b/egs/ami/s5b/local/nnet3/run_blstm.sh index 776151fb5aa..e0e7bcfcdcf 100755 --- a/egs/ami/s5b/local/nnet3/run_blstm.sh +++ b/egs/ami/s5b/local/nnet3/run_blstm.sh @@ -7,6 +7,7 @@ remove_egs=true use_ihm_ali=false train_set=train_cleaned ihm_gmm=tri3 +gmm=tri3a_cleaned nnet3_affix=_cleaned # BLSTM params @@ -32,6 +33,7 @@ local/nnet3/run_lstm.sh --affix $affix \ --srand $srand \ --train-stage $train_stage \ --train-set $train_set \ + --gmm $gmm \ --ihm-gmm $ihm_gmm \ --nnet3-affix $nnet3_affix \ --lstm-delay " [-1,1] [-2,2] [-3,3] " \ @@ -49,4 +51,3 @@ local/nnet3/run_lstm.sh --affix $affix \ --num-epochs $num_epochs \ --use-ihm-ali $use_ihm_ali \ --remove-egs $remove_egs - diff --git a/egs/ami/s5b/local/nnet3/run_lstm.sh b/egs/ami/s5b/local/nnet3/run_lstm.sh index c5583e2d0ef..25254629933 100755 --- a/egs/ami/s5b/local/nnet3/run_lstm.sh +++ b/egs/ami/s5b/local/nnet3/run_lstm.sh @@ -225,9 +225,12 @@ if [ $stage -le 14 ]; then [ ! -z $decode_iter ] && model_opts=" --iter $decode_iter "; for decode_set in dev eval; do ( - num_jobs=`cat data/$mic/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + nj_dev=`cat data/$mic/${decode_set}_hires/spk2utt | wc -l` + if [ $nj_dev -gt $nj ]; then + nj_dev=$nj + fi decode_dir=${dir}/decode_${decode_set} - steps/nnet3/decode.sh --nj 250 --cmd "$decode_cmd" \ + steps/nnet3/decode.sh --nj $nj_dev --cmd "$decode_cmd" \ $model_opts \ --extra-left-context $extra_left_context \ --extra-right-context $extra_right_context \ diff --git a/egs/ami/s5b/local/nnet3/run_tdnn.sh b/egs/ami/s5b/local/nnet3/run_tdnn.sh index bbc6ed5c042..7b463f4ce57 100755 --- a/egs/ami/s5b/local/nnet3/run_tdnn.sh +++ b/egs/ami/s5b/local/nnet3/run_tdnn.sh @@ -45,10 +45,12 @@ tdnn_affix= #affix for TDNN directory e.g. "a" or "b", in case we change the co # Options which are not passed through to run_ivector_common.sh train_stage=-10 splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" -remove_egs=true +remove_egs=false relu_dim=850 num_epochs=3 +common_egs_dir= + . cmd.sh . ./path.sh . ./utils/parse_options.sh @@ -122,30 +124,55 @@ fi [ ! -f $ali_dir/ali.1.gz ] && echo "$0: expected $ali_dir/ali.1.gz to exist" && exit 1 if [ $stage -le 12 ]; then + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale-nonlinearity 0.00001 \ + --feat-dir $train_data_dir \ + --ivector-dir $train_ivector_dir \ + --ali-dir $ali_dir \ + --relu-dim $relu_dim \ + --splice-indexes "$splice_indexes" \ + --use-presoftmax-prior-scale true \ + --include-log-softmax true \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 13 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage fi - steps/nnet3/tdnn/train.sh --stage $train_stage \ - --num-epochs $num_epochs --num-jobs-initial 2 --num-jobs-final 12 \ - --splice-indexes "$splice_indexes" \ - --feat-type raw \ - --online-ivector-dir ${train_ivector_dir} \ - --cmvn-opts "--norm-means=false --norm-vars=false" \ - --initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \ + steps/nnet3/train_dnn.py --stage $train_stage \ --cmd "$decode_cmd" \ - --relu-dim "$relu_dim" \ - --remove-egs "$remove_egs" \ - $train_data_dir data/lang $ali_dir $dir + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --egs.dir "$common_egs_dir" \ + --trainer.samples-per-iter 400000 \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.0015 \ + --trainer.optimization.final-effective-lrate 0.00015 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs "$remove_egs" \ + --cleanup true \ + --feat-dir $train_data_dir \ + --lang data/lang \ + --ali-dir $ali_dir \ + --dir $dir fi -if [ $stage -le 12 ]; then +if [ $stage -le 14 ]; then rm $dir/.error || true 2>/dev/null for decode_set in dev eval; do ( + nj_dev=`cat data/$mic/${decode_set}_hires/spk2utt | wc -l` + if [ $nj_dev -gt $nj ]; then + nj_dev=$nj + fi decode_dir=${dir}/decode_${decode_set} - steps/nnet3/decode.sh --nj $nj --cmd "$decode_cmd" \ + steps/nnet3/decode.sh --nj $nj_dev --cmd "$decode_cmd" \ --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ $graph_dir data/$mic/${decode_set}_hires $decode_dir ) & From 0ad80e29586f6dc5acb04c34210774426a5ab4a1 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sat, 10 Dec 2016 17:40:50 -0500 Subject: [PATCH 082/530] asr_diarization: Add initial training scripts for overlapped speech detection on AMI --- egs/ami/s5b/local/run_train_raw_lstm.sh | 143 ++++++++++++++++++++++++ egs/ami/s5b/local/run_train_raw_tdnn.sh | 120 ++++++++++++++++++++ 2 files changed, 263 insertions(+) create mode 100755 egs/ami/s5b/local/run_train_raw_lstm.sh create mode 100644 egs/ami/s5b/local/run_train_raw_tdnn.sh diff --git a/egs/ami/s5b/local/run_train_raw_lstm.sh b/egs/ami/s5b/local/run_train_raw_lstm.sh new file mode 100755 index 00000000000..5c0431fe796 --- /dev/null +++ b/egs/ami/s5b/local/run_train_raw_lstm.sh @@ -0,0 +1,143 @@ +#!/bin/bash + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= + +# LSTM options +splice_indexes="-2,-1,0,1,2 0" +label_delay=0 +num_lstm_layers=2 +cell_dim=64 +hidden_dim=64 +recurrent_projection_dim=32 +non_recurrent_projection_dim=32 +chunk_width=40 +chunk_left_context=40 +lstm_delay="-1 -2" + +# training options +num_epochs=3 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +momentum=0.5 +num_chunk_per_minibatch=256 +samples_per_iter=20000 +remove_egs=false +max_param_change=1 + +num_utts_subset_valid=6 +num_utts_subset_train=6 + +use_dense_targets=false +extra_egs_copy_cmd="nnet3-copy-egs-overlap-detection ark:- ark:- |" + +# target options +train_data_dir=data/sdm1/train_whole_sp_hires_bp +targets_scp=exp/sdm1/overlap_speech_train_cleaned_sp/overlap_feats.scp +deriv_weights_scp=exp/sdm1/overlap_speech_train_cleaned_sp/deriv_weights.scp +egs_dir= +nj=40 +feat_type=raw +config_dir= +compute_objf_opts= + +mic=sdm1 +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_hidden_layers=`echo $splice_indexes | perl -ane 'print scalar @F'` || exit 1 +if [ -z "$dir" ]; then + dir=exp/$mic/nnet3_raw/nnet_lstm +fi + +dir=$dir${affix:+_$affix}_n${num_hidden_layers} +if [ $label_delay -gt 0 ]; then dir=${dir}_ld$label_delay; fi + + +if ! cuda-compiled; then + cat < Date: Fri, 9 Dec 2016 20:15:19 -0500 Subject: [PATCH 083/530] asr_diarization: Babel changes --- .../lang/101-cantonese-limitedLP.official.conf | 2 +- .../lang/105-turkish-limitedLP.official.conf | 2 +- egs/babel/s5c/path.sh | 4 +++- egs/babel/s5c/run-1-main.sh | 1 - egs/babel/s5c/run-4-anydecode.sh | 17 +++++++---------- 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/egs/babel/s5c/conf/lang/101-cantonese-limitedLP.official.conf b/egs/babel/s5c/conf/lang/101-cantonese-limitedLP.official.conf index e5d60c12367..9efcdc6a164 100644 --- a/egs/babel/s5c/conf/lang/101-cantonese-limitedLP.official.conf +++ b/egs/babel/s5c/conf/lang/101-cantonese-limitedLP.official.conf @@ -92,7 +92,7 @@ oovSymbol="" lexiconFlags="--romanized --oov " # Scoring protocols (dummy GLM file to appease the scoring script) -glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm +glmFile=dummy.glm lexicon_file=/export/babel/data/101-cantonese/release-babel101b-v0.4c_sub-train1/conversational/reference_materials/lexicon.sub-train1.txt cer=1 diff --git a/egs/babel/s5c/conf/lang/105-turkish-limitedLP.official.conf b/egs/babel/s5c/conf/lang/105-turkish-limitedLP.official.conf index ae4cb55f4d5..014b519f3b7 100644 --- a/egs/babel/s5c/conf/lang/105-turkish-limitedLP.official.conf +++ b/egs/babel/s5c/conf/lang/105-turkish-limitedLP.official.conf @@ -3,7 +3,7 @@ #speech corpora files location train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training -train_data_list=/export/babel/data/splits/Turkish_Babel105/train.LimitedLP.official.list +train_data_list=/export/babel/data/splits/Turkish_Babel105/train.LimitedLP.list train_nj=16 #RADICAL DEV data files diff --git a/egs/babel/s5c/path.sh b/egs/babel/s5c/path.sh index c8fdbad6ff7..97954c1f560 100755 --- a/egs/babel/s5c/path.sh +++ b/egs/babel/s5c/path.sh @@ -1,5 +1,7 @@ export KALDI_ROOT=`pwd`/../../.. . /export/babel/data/software/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +. $KALDI_ROOT/tools/config/common_path.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +. $KALDI_ROOT/tools/env.sh export LC_ALL=C diff --git a/egs/babel/s5c/run-1-main.sh b/egs/babel/s5c/run-1-main.sh index e01910ffac0..be37cc8dca0 100755 --- a/egs/babel/s5c/run-1-main.sh +++ b/egs/babel/s5c/run-1-main.sh @@ -249,7 +249,6 @@ if [ ! -f exp/tri5/.done ]; then touch exp/tri5/.done fi - ################################################################################ # Ready to start SGMM training ################################################################################ diff --git a/egs/babel/s5c/run-4-anydecode.sh b/egs/babel/s5c/run-4-anydecode.sh index 68b87ea1e27..2071eb94d2d 100755 --- a/egs/babel/s5c/run-4-anydecode.sh +++ b/egs/babel/s5c/run-4-anydecode.sh @@ -10,13 +10,13 @@ dir=dev10h.pem kind= data_only=false fast_path=true -skip_kws=false +skip_kws=true skip_stt=false skip_scoring=false max_states=150000 extra_kws=true vocab_kws=false -tri5_only=false +tri5_only=true wip=0.5 echo "run-4-test.sh $@" @@ -196,7 +196,6 @@ if [ ! -f $dataset_dir/.done ] ; then else echo "Unknown type of the dataset: \"$dataset_segments\"!"; echo "Valid dataset types are: seg, uem, pem"; - exit 1 fi elif [ "$dataset_kind" == "unsupervised" ] ; then if [ "$dataset_segments" == "seg" ] ; then @@ -215,12 +214,10 @@ if [ ! -f $dataset_dir/.done ] ; then else echo "Unknown type of the dataset: \"$dataset_segments\"!"; echo "Valid dataset types are: seg, uem, pem"; - exit 1 fi else echo "Unknown kind of the dataset: \"$dataset_kind\"!"; echo "Valid dataset kinds are: supervised, unsupervised, shadow"; - exit 1 fi if [ ! -f ${dataset_dir}/.plp.done ]; then @@ -284,11 +281,11 @@ if ! $fast_path ; then "${lmwt_plp_extra_opts[@]}" \ ${dataset_dir} data/lang ${decode} - local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ - --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ - --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ - "${lmwt_plp_extra_opts[@]}" \ - ${dataset_dir} data/lang ${decode}.si + #local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + # --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + # --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + # "${lmwt_plp_extra_opts[@]}" \ + # ${dataset_dir} data/lang ${decode}.si fi if $tri5_only; then From 3408934e8cd2823e3caca268f98c2b74b58661e2 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 9 Dec 2016 23:48:49 -0500 Subject: [PATCH 084/530] diarization: Adding num-fft-bins --- src/feat/feature-fbank.cc | 8 ++++---- src/feat/feature-mfcc.cc | 8 ++++---- src/feat/feature-spectrogram.cc | 2 +- src/feat/feature-spectrogram.h | 11 ++++++++++- src/feat/feature-window.cc | 10 +++++----- src/feat/feature-window.h | 11 +++++++++++ src/feat/mel-computations.cc | 16 +++++----------- src/feat/pitch-functions.cc | 12 +++++++++++- src/feat/pitch-functions.h | 9 ++++++++- 9 files changed, 59 insertions(+), 28 deletions(-) diff --git a/src/feat/feature-fbank.cc b/src/feat/feature-fbank.cc index c54069696b5..3c53ef1ec08 100644 --- a/src/feat/feature-fbank.cc +++ b/src/feat/feature-fbank.cc @@ -28,9 +28,9 @@ FbankComputer::FbankComputer(const FbankOptions &opts): if (opts.energy_floor > 0.0) log_energy_floor_ = Log(opts.energy_floor); - int32 padded_window_size = opts.frame_opts.PaddedWindowSize(); - if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two... - srfft_ = new SplitRadixRealFft(padded_window_size); + int32 num_fft_bins = opts.frame_opts.NumFftBins(); + if ((num_fft_bins & (num_fft_bins-1)) == 0) // Is a power of two... + srfft_ = new SplitRadixRealFft(num_fft_bins); // We'll definitely need the filterbanks info for VTLN warping factor 1.0. // [note: this call caches it.] @@ -76,7 +76,7 @@ void FbankComputer::Compute(BaseFloat signal_log_energy, const MelBanks &mel_banks = *(GetMelBanks(vtln_warp)); - KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() && + KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.NumFftBins() && feature->Dim() == this->Dim()); diff --git a/src/feat/feature-mfcc.cc b/src/feat/feature-mfcc.cc index c1962a5c1d1..47912cc8693 100644 --- a/src/feat/feature-mfcc.cc +++ b/src/feat/feature-mfcc.cc @@ -29,7 +29,7 @@ void MfccComputer::Compute(BaseFloat signal_log_energy, BaseFloat vtln_warp, VectorBase *signal_frame, VectorBase *feature) { - KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() && + KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.NumFftBins() && feature->Dim() == this->Dim()); const MelBanks &mel_banks = *(GetMelBanks(vtln_warp)); @@ -98,9 +98,9 @@ MfccComputer::MfccComputer(const MfccOptions &opts): if (opts.energy_floor > 0.0) log_energy_floor_ = Log(opts.energy_floor); - int32 padded_window_size = opts.frame_opts.PaddedWindowSize(); - if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two... - srfft_ = new SplitRadixRealFft(padded_window_size); + int32 num_fft_bins = opts.frame_opts.NumFftBins(); + if ((num_fft_bins & (num_fft_bins-1)) == 0) // Is a power of two... + srfft_ = new SplitRadixRealFft(num_fft_bins); // We'll definitely need the filterbanks info for VTLN warping factor 1.0. // [note: this call caches it.] diff --git a/src/feat/feature-spectrogram.cc b/src/feat/feature-spectrogram.cc index 953f38fc54f..f5f1c420462 100644 --- a/src/feat/feature-spectrogram.cc +++ b/src/feat/feature-spectrogram.cc @@ -48,7 +48,7 @@ void SpectrogramComputer::Compute(BaseFloat signal_log_energy, BaseFloat vtln_warp, VectorBase *signal_frame, VectorBase *feature) { - KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() && + KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.NumFftBins() && feature->Dim() == this->Dim()); diff --git a/src/feat/feature-spectrogram.h b/src/feat/feature-spectrogram.h index ec318556f24..6ca0697ef78 100644 --- a/src/feat/feature-spectrogram.h +++ b/src/feat/feature-spectrogram.h @@ -39,10 +39,13 @@ struct SpectrogramOptions { FrameExtractionOptions frame_opts; BaseFloat energy_floor; bool raw_energy; // If true, compute energy before preemphasis and windowing + bool use_energy; // append an extra dimension with energy to the filter banks + BaseFloat low_freq; // e.g. 20; lower frequency cutoff + BaseFloat high_freq; // an upper frequency cutoff; 0 -> no cutoff, negative SpectrogramOptions() : energy_floor(0.0), // not in log scale: a small value e.g. 1.0e-10 - raw_energy(true) {} + raw_energy(true), use_energy(true), low_freq(0), high_freq(0) {} void Register(OptionsItf *opts) { frame_opts.Register(opts); @@ -50,6 +53,12 @@ struct SpectrogramOptions { "Floor on energy (absolute, not relative) in Spectrogram computation"); opts->Register("raw-energy", &raw_energy, "If true, compute energy before preemphasis and windowing"); + opts->Register("use-energy", &use_energy, + "Add an extra dimension with energy to the spectrogram output."); + opts->Register("low-freq", &low_freq, + "Low cutoff frequency for mel bins"); + opts->Register("high-freq", &high_freq, + "High cutoff frequency for mel bins (if < 0, offset from Nyquist)"); } }; diff --git a/src/feat/feature-window.cc b/src/feat/feature-window.cc index 65c0a2a29c3..7b86e71dbb7 100644 --- a/src/feat/feature-window.cc +++ b/src/feat/feature-window.cc @@ -163,7 +163,7 @@ void ExtractWindow(int64 sample_offset, BaseFloat *log_energy_pre_window) { KALDI_ASSERT(sample_offset >= 0 && wave.Dim() != 0); int32 frame_length = opts.WindowSize(), - frame_length_padded = opts.PaddedWindowSize(); + num_fft_bins = opts.NumFftBins(); int64 num_samples = sample_offset + wave.Dim(), start_sample = FirstSampleOfFrame(f, opts), end_sample = start_sample + frame_length; @@ -175,8 +175,8 @@ void ExtractWindow(int64 sample_offset, KALDI_ASSERT(sample_offset == 0 || start_sample >= sample_offset); } - if (window->Dim() != frame_length_padded) - window->Resize(frame_length_padded, kUndefined); + if (window->Dim() != num_fft_bins) + window->Resize(num_fft_bins, kUndefined); // wave_start and wave_end are start and end indexes into 'wave', for the // piece of wave that we're trying to extract. @@ -206,8 +206,8 @@ void ExtractWindow(int64 sample_offset, } } - if (frame_length_padded > frame_length) - window->Range(frame_length, frame_length_padded - frame_length).SetZero(); + if (num_fft_bins > frame_length) + window->Range(frame_length, num_fft_bins - frame_length).SetZero(); SubVector frame(*window, 0, frame_length); diff --git a/src/feat/feature-window.h b/src/feat/feature-window.h index 287f1bf01f6..4165f43f1f0 100644 --- a/src/feat/feature-window.h +++ b/src/feat/feature-window.h @@ -42,6 +42,7 @@ struct FrameExtractionOptions { std::string window_type; // e.g. Hamming window bool round_to_power_of_two; BaseFloat blackman_coeff; + int32 num_fft_bins; bool snip_edges; // May be "hamming", "rectangular", "povey", "hanning", "blackman" // "povey" is a window I made to be similar to Hamming but to go to zero at the @@ -57,6 +58,7 @@ struct FrameExtractionOptions { window_type("povey"), round_to_power_of_two(true), blackman_coeff(0.42), + num_fft_bins(128), snip_edges(true){ } void Register(OptionsItf *opts) { @@ -77,6 +79,8 @@ struct FrameExtractionOptions { "Constant coefficient for generalized Blackman window."); opts->Register("round-to-power-of-two", &round_to_power_of_two, "If true, round window size to power of two."); + opts->Register("num-fft-bins", &num_fft_bins, + "Number of FFT bins to compute spectrogram"); opts->Register("snip-edges", &snip_edges, "If true, end effects will be handled by outputting only frames that " "completely fit in the file, and the number of frames depends on the " @@ -93,6 +97,13 @@ struct FrameExtractionOptions { return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize()) : WindowSize()); } + int32 NumFftBins() const { + int32 padded_window_size = PaddedWindowSize(); + if (num_fft_bins > padded_window_size) + return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(num_fft_bins) : + num_fft_bins); + return padded_window_size; + } }; diff --git a/src/feat/mel-computations.cc b/src/feat/mel-computations.cc index 714d963f01b..db3f3334ca2 100644 --- a/src/feat/mel-computations.cc +++ b/src/feat/mel-computations.cc @@ -37,13 +37,7 @@ MelBanks::MelBanks(const MelBanksOptions &opts, int32 num_bins = opts.num_bins; if (num_bins < 3) KALDI_ERR << "Must have at least 3 mel bins"; BaseFloat sample_freq = frame_opts.samp_freq; - int32 window_length = static_cast(frame_opts.samp_freq*0.001*frame_opts.frame_length_ms); - int32 window_length_padded = - (frame_opts.round_to_power_of_two ? - RoundUpToNearestPowerOfTwo(window_length) : - window_length); - KALDI_ASSERT(window_length_padded % 2 == 0); - int32 num_fft_bins = window_length_padded/2; + int32 num_fft_bins = frame_opts.NumFftBins(); BaseFloat nyquist = 0.5 * sample_freq; BaseFloat low_freq = opts.low_freq, high_freq; @@ -59,8 +53,8 @@ MelBanks::MelBanks(const MelBanksOptions &opts, << " and high-freq " << high_freq << " vs. nyquist " << nyquist; - BaseFloat fft_bin_width = sample_freq / window_length_padded; - // fft-bin width [think of it as Nyquist-freq / half-window-length] + BaseFloat fft_bin_width = sample_freq / num_fft_bins; + // fft-bin width [think of it as Nyquist-freq / num_fft_bins] BaseFloat mel_low_freq = MelScale(low_freq); BaseFloat mel_high_freq = MelScale(high_freq); @@ -104,9 +98,9 @@ MelBanks::MelBanks(const MelBanksOptions &opts, center_freqs_(bin) = InverseMelScale(center_mel); // this_bin will be a vector of coefficients that is only // nonzero where this mel bin is active. - Vector this_bin(num_fft_bins); + Vector this_bin(num_fft_bins / 2); int32 first_index = -1, last_index = -1; - for (int32 i = 0; i < num_fft_bins; i++) { + for (int32 i = 0; i < num_fft_bins / 2; i++) { BaseFloat freq = (fft_bin_width * i); // Center frequency of this fft // bin. BaseFloat mel = MelScale(freq); diff --git a/src/feat/pitch-functions.cc b/src/feat/pitch-functions.cc index 430e9bdb53a..07e1d181243 100644 --- a/src/feat/pitch-functions.cc +++ b/src/feat/pitch-functions.cc @@ -1402,7 +1402,8 @@ OnlineProcessPitch::OnlineProcessPitch( dim_ ((opts.add_pov_feature ? 1 : 0) + (opts.add_normalized_log_pitch ? 1 : 0) + (opts.add_delta_pitch ? 1 : 0) - + (opts.add_raw_log_pitch ? 1 : 0)) { + + (opts.add_raw_log_pitch ? 1 : 0) + + (opts.add_raw_pov ? 1 : 0)) { KALDI_ASSERT(dim_ > 0 && " At least one of the pitch features should be chosen. " "Check your post-process-pitch options."); @@ -1425,6 +1426,8 @@ void OnlineProcessPitch::GetFrame(int32 frame, (*feat)(index++) = GetDeltaPitchFeature(frame_delayed); if (opts_.add_raw_log_pitch) (*feat)(index++) = GetRawLogPitchFeature(frame_delayed); + if (opts_.add_raw_pov) + (*feat)(index++) = GetRawPov(frame_delayed); KALDI_ASSERT(index == dim_); } @@ -1482,6 +1485,13 @@ BaseFloat OnlineProcessPitch::GetNormalizedLogPitchFeature(int32 frame) { return normalized_log_pitch * opts_.pitch_scale; } +BaseFloat OnlineProcessPitch::GetRawPov(int32 frame) const { + Vector tmp(kRawFeatureDim); + src_->GetFrame(frame, &tmp); // (NCCF, pitch) from pitch extractor + BaseFloat nccf = tmp(0); + return NccfToPov(nccf); +} + // inline void OnlineProcessPitch::GetNormalizationWindow(int32 t, diff --git a/src/feat/pitch-functions.h b/src/feat/pitch-functions.h index 70e85380be6..b94ac661c10 100644 --- a/src/feat/pitch-functions.h +++ b/src/feat/pitch-functions.h @@ -231,6 +231,7 @@ struct ProcessPitchOptions { bool add_normalized_log_pitch; bool add_delta_pitch; bool add_raw_log_pitch; + bool add_raw_pov; ProcessPitchOptions() : pitch_scale(2.0), @@ -245,7 +246,7 @@ struct ProcessPitchOptions { add_pov_feature(true), add_normalized_log_pitch(true), add_delta_pitch(true), - add_raw_log_pitch(false) { } + add_raw_log_pitch(false), add_raw_pov(false) { } void Register(ParseOptions *opts) { @@ -286,6 +287,8 @@ struct ProcessPitchOptions { "features"); opts->Register("add-raw-log-pitch", &add_raw_log_pitch, "If true, log(pitch) is added to output features"); + opts->Register("add-raw-pov", &add_raw_pov, + "If true, add NCCF converted to POV"); } }; @@ -396,6 +399,10 @@ class OnlineProcessPitch: public OnlineFeatureInterface { /// Called from GetFrame(). inline BaseFloat GetNormalizedLogPitchFeature(int32 frame); + /// Computes and retures the raw POV for this frames. + /// Called from GetFrames(). + inline BaseFloat GetRawPov(int32 frame) const; + /// Computes the normalization window sizes. inline void GetNormalizationWindow(int32 frame, int32 src_frames_ready, From 0755e2ccad9f9a1e0886eaf334d007d72a7b5ae6 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 9 Dec 2016 23:50:21 -0500 Subject: [PATCH 085/530] asr_diarization: Removing stats component from make_jesus_configs.py --- egs/wsj/s5/steps/nnet3/make_jesus_configs.py | 67 -------------------- 1 file changed, 67 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/make_jesus_configs.py b/egs/wsj/s5/steps/nnet3/make_jesus_configs.py index 7f3aba2328c..0742fb4f1df 100755 --- a/egs/wsj/s5/steps/nnet3/make_jesus_configs.py +++ b/egs/wsj/s5/steps/nnet3/make_jesus_configs.py @@ -139,73 +139,6 @@ printable_name, old_val, new_val, args.num_jesus_blocks)) setattr(args, name, new_val); -# this is a bit like a struct, initialized from a string, which describes how to -# set up the statistics-pooling and statistics-extraction components. -# An example string is 'mean(-99:3:9::99)', which means, compute the mean of -# data within a window of -99 to +99, with distinct means computed every 9 frames -# (we round to get the appropriate one), and with the input extracted on multiples -# of 3 frames (so this will force the input to this layer to be evaluated -# every 3 frames). Another example string is 'mean+stddev(-99:3:9:99)', -# which will also cause the standard deviation to be computed. -class StatisticsConfig: - # e.g. c = StatisticsConfig('mean+stddev(-99:3:9:99)', 400, 'jesus1-forward-output-affine') - def __init__(self, config_string, input_dim, input_name): - self.input_dim = input_dim - self.input_name = input_name - - m = re.search("(mean|mean\+stddev)\((-?\d+):(-?\d+):(-?\d+):(-?\d+)\)", - config_string) - if m == None: - sys.exit("Invalid splice-index or statistics-config string: " + config_string) - self.output_stddev = (m.group(1) != 'mean') - self.left_context = -int(m.group(2)) - self.input_period = int(m.group(3)) - self.stats_period = int(m.group(4)) - self.right_context = int(m.group(5)) - if not (self.left_context > 0 and self.right_context > 0 and - self.input_period > 0 and self.stats_period > 0 and - self.left_context % self.stats_period == 0 and - self.right_context % self.stats_period == 0 and - self.stats_period % self.input_period == 0): - sys.exit("Invalid configuration of statistics-extraction: " + config_string) - - # OutputDim() returns the output dimension of the node that this produces. - def OutputDim(self): - return self.input_dim * (2 if self.output_stddev else 1) - - # OutputDims() returns an array of output dimensions, consisting of - # [ input-dim ] if just "mean" was specified, otherwise - # [ input-dim input-dim ] - def OutputDims(self): - return [ self.input_dim, self.input_dim ] if self.output_stddev else [ self.input_dim ] - - # Descriptor() returns the textual form of the descriptor by which the - # output of this node is to be accessed. - def Descriptor(self): - return 'Round({0}-pooling-{1}-{2}, {3})'.format(self.input_name, self.left_context, self.right_context, - self.stats_period) - - # This function writes the configuration lines need to compute the specified - # statistics, to the file f. - def WriteConfigs(self, f): - print('component name={0}-extraction-{1}-{2} type=StatisticsExtractionComponent input-dim={3} ' - 'input-period={4} output-period={5} include-variance={6} '.format( - self.input_name, self.left_context, self.right_context, - self.input_dim, self.input_period, self.stats_period, - ('true' if self.output_stddev else 'false')), file=f) - print('component-node name={0}-extraction-{1}-{2} component={0}-extraction-{1}-{2} input={0} '.format( - self.input_name, self.left_context, self.right_context), file=f) - stats_dim = 1 + self.input_dim * (2 if self.output_stddev else 1) - print('component name={0}-pooling-{1}-{2} type=StatisticsPoolingComponent input-dim={3} ' - 'input-period={4} left-context={1} right-context={2} num-log-count-features=0 ' - 'output-stddevs={5} '.format(self.input_name, self.left_context, self.right_context, - stats_dim, self.stats_period, - ('true' if self.output_stddev else 'false')), - file=f) - print('component-node name={0}-pooling-{1}-{2} component={0}-pooling-{1}-{2} input={0}-extraction-{1}-{2} '.format( - self.input_name, self.left_context, self.right_context), file=f) - - ## Work out splice_array From 3cda27b74cffab0d665416863fcb68b44f2df388 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 9 Dec 2016 23:51:51 -0500 Subject: [PATCH 086/530] asr_diarzation: Raw nnet3 changes --- .../nnet3/train/chain_objf/acoustic_model.py | 8 +- egs/wsj/s5/steps/libs/nnet3/train/common.py | 142 +++++++++++++++++- .../nnet3/train/frame_level_objf/common.py | 10 +- egs/wsj/s5/steps/nnet3/chain/train.py | 24 ++- egs/wsj/s5/steps/nnet3/train_dnn.py | 8 + egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 8 + egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 22 ++- egs/wsj/s5/steps/nnet3/train_rnn.py | 60 ++++++-- 8 files changed, 250 insertions(+), 32 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index 0c871f07c2e..7e712ad912e 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -223,7 +223,9 @@ def train_one_iteration(dir, iter, srand, egs_dir, leaky_hmm_coefficient, momentum, max_param_change, shuffle_buffer_size, frame_subsampling_factor, truncate_deriv_weights, - run_opts, background_process_handler=None): + run_opts, + dropout_proportions=None, + background_process_handler=None): """ Called from steps/nnet3/chain/train.py for one iteration for neural network training with LF-MMI objective @@ -302,6 +304,10 @@ def train_one_iteration(dir, iter, srand, egs_dir, cur_num_chunk_per_minibatch = num_chunk_per_minibatch / 2 cur_max_param_change = float(max_param_change) / math.sqrt(2) + if dropout_proportions is not None: + raw_model_string = common_train_lib.apply_dropout( + dropout_proportions, raw_model_string) + train_new_models(dir=dir, iter=iter, srand=srand, num_jobs=num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index dc24b37fdee..f2485e36784 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -22,7 +22,7 @@ logger.addHandler(logging.NullHandler()) -class RunOpts: +class RunOpts(object): """A structure to store run options. Run options like queue.pl and run.pl, along with their memory @@ -318,6 +318,122 @@ def get_learning_rate(iter, num_jobs, num_iters, num_archives_processed, return num_jobs * effective_learning_rate +def parse_dropout_option(num_archives_to_process, dropout_option): + components = dropout_option.strip().split(' ') + dropout_schedule = [] + for component in components: + parts = component.split('=') + + if len(parts) == 2: + component_name = parts[0] + this_dropout_str = parts[1] + elif len(parts) == 1: + component_name = '*' + this_dropout_str = parts[0] + else: + raise Exception("The dropout schedule must be specified in the " + "format 'pattern1=func1 patter2=func2' where " + "the pattern can be omitted for a global function " + "for all components.\n" + "Got {0} in {1}".format(component, dropout_option)) + + this_dropout_values = _parse_dropout_string( + num_archives_to_process, this_dropout_str) + dropout_schedule.append((component_name, this_dropout_values)) + return dropout_schedule + + +def _parse_dropout_string(num_archives_to_process, dropout_str): + dropout_values = [] + parts = dropout_str.strip().split(',') + try: + if len(parts) < 2: + raise Exception("dropout proportion string must specify " + "at least the start and end dropouts") + + dropout_values.append((0, float(parts[0]))) + for i in range(1, len(parts)): + value_x_pair = parts[i].split('@') + if len(value_x_pair) == 1: + dropout_proportion = float(parts[i]) + dropout_values.append((0.5 * num_archives_to_process, + dropout_proportion)) + else: + assert len(value_x_pair) == 2 + dropout_proportion, data_fraction = value_x_pair + dropout_values.append( + (float(data_fraction) * num_archives_to_process, + float(dropout_proportion))) + + dropout_values.append((num_archives_to_process, float(parts[-1]))) + except Exception as e: + logger.error("Unable to parse dropout proportion string {0}. " + "See help for option " + "--dropout-schedule.".format(dropout_str)) + raise e + + # reverse sort so that its easy to retrieve the dropout proportion + # for a particular data fraction + dropout_values.sort(key=lambda x: x[0], reverse=True) + for num_archives, proportion in dropout_values: + assert num_archives <= num_archives_to_process and num_archives >= 0 + assert proportion <= 1 and proportion >= 0 + + return dropout_values + + +def get_dropout_proportions(dropout_schedule, + num_archives_processed): + + dropout_proportions = [] + for component_name, component_dropout_schedule in dropout_schedule: + dropout_proportions.append( + (component_name, + _get_component_dropout(component_dropout_schedule, + num_archives_processed))) + return dropout_proportions + + +def _get_component_dropout(dropout_schedule, num_archives_processed): + if num_archives_processed == 0: + assert dropout_schedule[-1][0] == 0 + return dropout_schedule[-1][1] + try: + (dropout_schedule_index, initial_num_archives, + initial_dropout) = next((i, tup[0], tup[1]) + for i, tup in enumerate(dropout_schedule) + if tup[0] < num_archives_processed) + except StopIteration as e: + logger.error("Could not find num_archives in dropout schedule " + "corresponding to num_archives_processed {0}.\n" + "Maybe something wrong with the parsed " + "dropout schedule {1}.".format( + num_archives_processed, dropout_schedule)) + raise e + + final_num_archives, final_dropout = dropout_schedule[ + dropout_schedule_index - 1] + assert (num_archives_processed > initial_num_archives + and num_archives_processed < final_num_archives) + + return ((num_archives_processed - initial_num_archives) + * (final_dropout - initial_dropout) + / (final_num_archives - initial_num_archives)) + + +def apply_dropout(dropout_proportions, raw_model_string): + edit_config_lines = [] + + for component_name, dropout_proportion in dropout_proportions: + edit_config_lines.append( + "set-dropout-proportion name={0} proportion={1}".format( + component_name, dropout_proportion)) + + return ("""{raw_model_string} nnet3-copy --edits='{edits}' \ + - - |""".format(raw_model_string=raw_model_string, + edits=";".join(edit_config_lines))) + + def do_shrinkage(iter, model_file, shrink_saturation_threshold, get_raw_nnet_from_am=True): @@ -530,6 +646,30 @@ def __init__(self): Note: we implemented it in such a way that it doesn't increase the effective learning rate.""") + self.parser.add_argument("--trainer.dropout-schedule", type=str, + dest='dropout_schedule', default='', + help="""Use this to specify the dropout + schedule. You specify a piecewise linear + function on the domain [0,1], where 0 is the + start and 1 is the end of training; the + function-argument (x) rises linearly with the + amount of data you have seen, not iteration + number (this improves invariance to + num-jobs-{initial-final}). E.g. '0,0.2,0' + means 0 at the start; 0.2 after seeing half + the data; and 0 at the end. You may specify + the x-value of selected points, e.g. + '0,0.2@0.25,0' means that the 0.2 + dropout-proportion is reached a quarter of the + way through the data. The start/end x-values + are at x=0/x=1, and other unspecified x-values + are interpolated between known x-values. You + may specify different rules for different + component-name patterns using 'pattern1=func1 + pattern2=func2', e.g. 'relu*=0,0.1,0 + lstm*=0,0.2,0'. More general should precede + less general patterns, as they are applied + sequentially.""") # General options self.parser.add_argument("--stage", type=int, default=-4, diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index 55508daf02c..37dd36aa392 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -13,6 +13,7 @@ import logging import math import os +import random import time import libs.common as common_lib @@ -153,7 +154,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, cv_minibatch_size=256, frames_per_eg=-1, min_deriv_time=None, max_deriv_time=None, min_left_context=None, min_right_context=None, - shrinkage_value=1.0, + shrinkage_value=1.0, dropout_proportions=None, get_raw_nnet_from_am=True, background_process_handler=None, extra_egs_copy_cmd=""): @@ -280,11 +281,16 @@ def train_one_iteration(dir, iter, srand, egs_dir, except OSError: pass + if dropout_proportions is not None: + raw_model_string = common_train_lib.apply_dropout( + dropout_proportions, raw_model_string) + train_new_models(dir=dir, iter=iter, srand=srand, num_jobs=num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, raw_model_string=raw_model_string, egs_dir=egs_dir, - left_context=left_context, right_context=right_context, + left_context=left_context, + right_context=right_context, momentum=momentum, max_param_change=cur_max_param_change, shuffle_buffer_size=shuffle_buffer_size, minibatch_size=cur_minibatch_size, diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index 0254589be85..ca5c5f098ad 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -134,14 +134,17 @@ def get_args(): shrink-threshold at the non-linearities. E.g. 0.99. Only applicable when the neural net contains sigmoid or tanh units.""") - parser.add_argument("--trainer.optimization.shrink-saturation-threshold", type=float, + parser.add_argument("--trainer.optimization.shrink-saturation-threshold", + type=float, dest='shrink_saturation_threshold', default=0.40, - help="""Threshold that controls when we apply the 'shrinkage' - (i.e. scaling by shrink-value). If the saturation of the - sigmoid and tanh nonlinearities in the neural net (as - measured by steps/nnet3/get_saturation.pl) exceeds this - threshold we scale the parameter matrices with the + help="""Threshold that controls when we apply the + 'shrinkage' (i.e. scaling by shrink-value). If the + saturation of the sigmoid and tanh nonlinearities in + the neural net (as measured by + steps/nnet3/get_saturation.pl) exceeds this threshold + we scale the parameter matrices with the shrink-value.""") + # RNN-specific training options parser.add_argument("--trainer.deriv-truncate-margin", type=int, dest='deriv_truncate_margin', default=None, @@ -307,7 +310,6 @@ def train(args, run_opts, background_process_handler): nnet3-init --srand=-2 {dir}/configs/init.config \ {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) - egs_left_context = left_context + args.frame_subsampling_factor/2 egs_right_context = right_context + args.frame_subsampling_factor/2 @@ -392,6 +394,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): args.initial_effective_lrate, args.final_effective_lrate) + if args.dropout_schedule is not None: + dropout_schedule = common_train_lib.parse_dropout_option( + num_archives_to_process, args.dropout_schedule) + min_deriv_time = None max_deriv_time = None if args.deriv_truncate_margin is not None: @@ -436,6 +442,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), + dropout_proportions=( + None if args.dropout_schedule is None + else common_train_lib.get_dropout_proportions( + dropout_schedule, num_archives_processed)), shrinkage_value=shrinkage_value, num_chunk_per_minibatch=args.num_chunk_per_minibatch, num_hidden_layers=num_hidden_layers, diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index 83170ea1e8e..2813f719606 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -286,6 +286,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): args.initial_effective_lrate, args.final_effective_lrate) + if args.dropout_schedule is not None: + dropout_schedule = common_train_lib.parse_dropout_option( + num_archives_to_process, args.dropout_schedule) + logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) @@ -312,6 +316,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), + dropout_proportions=( + None if args.dropout_schedule is None + else common_train_lib.get_dropout_proportions( + dropout_schedule, num_archives_processed)), minibatch_size=args.minibatch_size, frames_per_eg=args.frames_per_eg, num_hidden_layers=num_hidden_layers, diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index d7651889d83..efeaa13662e 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -288,6 +288,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): args.initial_effective_lrate, args.final_effective_lrate) + if args.dropout_schedule is not None: + dropout_schedule = common_train_lib.parse_dropout_option( + num_archives_to_process, args.dropout_schedule) + logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) @@ -314,6 +318,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), + dropout_proportions=( + None if args.dropout_schedule is None + else common_train_lib.get_dropout_proportions( + dropout_schedule, num_archives_processed)), minibatch_size=args.minibatch_size, frames_per_eg=args.frames_per_eg, num_hidden_layers=num_hidden_layers, diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index e4af318fb57..4a2424e54f5 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -116,13 +116,15 @@ def get_args(): shrink-threshold at the non-linearities. E.g. 0.99. Only applicable when the neural net contains sigmoid or tanh units.""") - parser.add_argument("--trainer.optimization.shrink-saturation-threshold", type=float, + parser.add_argument("--trainer.optimization.shrink-saturation-threshold", + type=float, dest='shrink_saturation_threshold', default=0.40, - help="""Threshold that controls when we apply the 'shrinkage' - (i.e. scaling by shrink-value). If the saturation of the - sigmoid and tanh nonlinearities in the neural net (as - measured by steps/nnet3/get_saturation.pl) exceeds this - threshold we scale the parameter matrices with the + help="""Threshold that controls when we apply the + 'shrinkage' (i.e. scaling by shrink-value). If the + saturation of the sigmoid and tanh nonlinearities in + the neural net (as measured by + steps/nnet3/get_saturation.pl) exceeds this threshold + we scale the parameter matrices with the shrink-value.""") parser.add_argument("--trainer.optimization.cv-minibatch-size", type=int, dest='cv_minibatch_size', default=256, @@ -391,6 +393,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): args.initial_effective_lrate, args.final_effective_lrate) + if args.dropout_schedule is not None: + dropout_schedule = common_train_lib.parse_dropout_option( + num_archives_to_process, args.dropout_schedule) + min_deriv_time = None max_deriv_time = None if args.deriv_truncate_margin is not None: @@ -437,6 +443,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), + dropout_proportions=( + None if args.dropout_schedule is None + else common_train_lib.get_dropout_proportions( + dropout_schedule, num_archives_processed)), shrinkage_value=shrinkage_value, minibatch_size=args.num_chunk_per_minibatch, num_hidden_layers=num_hidden_layers, diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index 482c9a8ee03..a0318f28829 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -65,13 +65,24 @@ def get_args(): used to train an LSTM. Caution: if you double this you should halve --trainer.samples-per-iter.""") - parser.add_argument("--egs.chunk-left-context", type=int, - dest='chunk_left_context', default=40, - help="""Number of left steps used in the estimation of - LSTM state before prediction of the first label""") - - parser.add_argument("--trainer.samples-per-iter", type=int, - dest='samples_per_iter', default=20000, + parser.add_argument("--egs.chunk-left-context", type=int, dest='chunk_left_context', + default = 40, + help="""Number of left steps used in the estimation of LSTM + state before prediction of the first label""") + parser.add_argument("--egs.chunk-right-context", type=int, dest='chunk_right_context', + default = 0, + help="""Number of right steps used in the estimation of BLSTM + state before prediction of the first label""") + parser.add_argument("--trainer.min-extra-left-context", type=int, dest='min_extra_left_context', + default = None, + help="""Number of left steps used in the estimation of LSTM + state before prediction of the first label""") + parser.add_argument("--trainer.min-extra-right-context", type=int, dest='min_extra_right_context', + default = None, + help="""Number of right steps used in the estimation of BLSTM + state before prediction of the first label""") + parser.add_argument("--trainer.samples-per-iter", type=int, dest='samples_per_iter', + default=20000, help="""This is really the number of egs in each archive. Each eg has 'chunk_width' frames in it-- for chunk_width=20, this value (20k) is equivalent @@ -100,13 +111,15 @@ def get_args(): shrink-threshold at the non-linearities. E.g. 0.99. Only applicable when the neural net contains sigmoid or tanh units.""") - parser.add_argument("--trainer.optimization.shrink-saturation-threshold", type=float, + parser.add_argument("--trainer.optimization.shrink-saturation-threshold", + type=float, dest='shrink_saturation_threshold', default=0.40, - help="""Threshold that controls when we apply the 'shrinkage' - (i.e. scaling by shrink-value). If the saturation of the - sigmoid and tanh nonlinearities in the neural net (as - measured by steps/nnet3/get_saturation.pl) exceeds this - threshold we scale the parameter matrices with the + help="""Threshold that controls when we apply the + 'shrinkage' (i.e. scaling by shrink-value). If the + saturation of the sigmoid and tanh nonlinearities in + the neural net (as measured by + steps/nnet3/get_saturation.pl) exceeds this threshold + we scale the parameter matrices with the shrink-value.""") parser.add_argument("--trainer.optimization.cv-minibatch-size", type=int, dest='cv_minibatch_size', default=256, @@ -177,12 +190,19 @@ def process_args(args): "--trainer.deriv-truncate-margin.".format( args.deriv_truncate_margin)) + if args.min_extra_left_context is None: + args.min_extra_left_context = args.chunk_left_context + + if args.min_extra_right_context is None: + args.min_extra_right_context = args.chunk_right_context + if (not os.path.exists(args.dir) or not os.path.exists(args.dir+"/configs")): raise Exception("This scripts expects {0} to exist and have a configs " "directory which is the output of " "make_configs.py script") + if args.transform_dir is None: args.transform_dir = args.ali_dir @@ -363,6 +383,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): args.initial_effective_lrate, args.final_effective_lrate) + if args.dropout_schedule is not None: + dropout_schedule = common_train_lib.parse_dropout_option( + num_archives_to_process, args.dropout_schedule) + min_deriv_time = None max_deriv_time = None if args.deriv_truncate_margin is not None: @@ -408,12 +432,18 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), + dropout_proportions=( + None if args.dropout_schedule is None + else common_train_lib.get_dropout_proportions( + dropout_schedule, num_archives_processed)), shrinkage_value=shrinkage_value, minibatch_size=args.num_chunk_per_minibatch, num_hidden_layers=num_hidden_layers, add_layers_period=args.add_layers_period, - left_context=left_context, - right_context=right_context, + min_left_context = model_left_context + args.min_extra_left_context, + min_right_context = model_right_context + args.min_extra_right_context, + max_left_context = left_context, + max_right_context = right_context, min_deriv_time=min_deriv_time, max_deriv_time=max_deriv_time, momentum=args.momentum, From d2e7742663c34a401fad616a289a59b4f101a0b0 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 9 Dec 2016 23:52:19 -0500 Subject: [PATCH 087/530] asr_diarzation: Minor consmetic change --- egs/wsj/s5/steps/nnet3/lstm/make_configs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/nnet3/lstm/make_configs.py b/egs/wsj/s5/steps/nnet3/lstm/make_configs.py index 9fb9fad1d0c..ff0fb8225ac 100755 --- a/egs/wsj/s5/steps/nnet3/lstm/make_configs.py +++ b/egs/wsj/s5/steps/nnet3/lstm/make_configs.py @@ -319,7 +319,8 @@ def MakeConfigs(config_dir, feat_dim, ivector_dim, num_targets, add_lda, for i in range(num_lstm_layers, num_hidden_layers): prev_layer_output = nodes.AddAffRelNormLayer(config_lines, "L{0}".format(i+1), prev_layer_output, hidden_dim, - ng_affine_options, self_repair_scale = self_repair_scale_nonlinearity, max_change_per_component = max_change_per_component) + ng_affine_options, self_repair_scale = self_repair_scale_nonlinearity, + max_change_per_component = max_change_per_component) # make the intermediate config file for layerwise discriminative # training nodes.AddFinalLayer(config_lines, prev_layer_output, num_targets, ng_affine_options, max_change_per_component = max_change_per_component_final, label_delay = label_delay, include_log_softmax = include_log_softmax, add_final_sigmoid = add_final_sigmoid, objective_type = objective_type) From ebb3c5afd26005f67bcd6ca0b9bc6826ce12fa25 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 9 Dec 2016 23:52:56 -0500 Subject: [PATCH 088/530] asr_diarization: fixing bug in reverberate_data_dir.py --- egs/wsj/s5/steps/data/reverberate_data_dir.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py index 69bc5e08b3b..0080bdba5f0 100755 --- a/egs/wsj/s5/steps/data/reverberate_data_dir.py +++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py @@ -113,6 +113,12 @@ def CheckArgs(args): if not os.path.exists(args.output_additive_noise_dir): os.makedirs(args.output_additive_noise_dir) + ## Check arguments. + + if args.num_replicas > 1 and args.prefix is None: + args.prefix = "rvb" + warnings.warn("--prefix is set to 'rvb' as --num-replications is larger than 1.") + if not args.num_replicas > 0: raise Exception("--num-replications cannot be non-positive") From f99d7cdcd26940a2974249afdeadf526a67a214d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 9 Dec 2016 23:53:38 -0500 Subject: [PATCH 089/530] asr_diarization: Adding learning rate facor --- src/nnet3/nnet-component-itf.h | 6 +-- src/nnet3/nnet-utils.cc | 92 ++++++++++++++++++++++++++++++---- src/nnet3/nnet-utils.h | 14 ++++++ 3 files changed, 98 insertions(+), 14 deletions(-) diff --git a/src/nnet3/nnet-component-itf.h b/src/nnet3/nnet-component-itf.h index 3013c485ea4..600450a578a 100644 --- a/src/nnet3/nnet-component-itf.h +++ b/src/nnet3/nnet-component-itf.h @@ -413,15 +413,15 @@ class UpdatableComponent: public Component { /// a different value than x will returned. BaseFloat LearningRate() const { return learning_rate_; } + /// Gets the learning rate factor + BaseFloat LearningRateFactor() const { return learning_rate_factor_; } + /// Gets per-component max-change value. Note: the components themselves do /// not enforce the per-component max-change; it's enforced in class /// NnetTrainer by querying the max-changes for each component. /// See NnetTrainer::UpdateParamsWithMaxChange() in nnet3/nnet-training.cc. BaseFloat MaxChange() const { return max_change_; } - /// Gets the learning rate factor - BaseFloat LearningRateFactor() const { return learning_rate_factor_; } - virtual std::string Info() const; /// The following new virtual function returns the total dimension of diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index d65193d9a54..49cb16126ed 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -144,7 +144,7 @@ void ComputeSimpleNnetContext(const Nnet &nnet, // This will crash if the total context (left + right) is greater // than window_size. - int32 window_size = 100; + int32 window_size = 150; // by going "<= modulus" instead of "< modulus" we do one more computation // than we really need; it becomes a sanity check. for (int32 input_start = 0; input_start <= modulus; input_start++) @@ -301,6 +301,25 @@ void SetLearningRates(const Vector &learning_rates, KALDI_ASSERT(i == learning_rates.Dim()); } +void SetLearningRateFactors(const Vector &learning_rate_factors, + Nnet *nnet) { + int32 i = 0; + for (int32 c = 0; c < nnet->NumComponents(); c++) { + Component *comp = nnet->GetComponent(c); + if (comp->Properties() & kUpdatableComponent) { + // For now all updatable components inherit from class UpdatableComponent. + // If that changes in future, we will change this code. + UpdatableComponent *uc = dynamic_cast(comp); + if (uc == NULL) + KALDI_ERR << "Updatable component does not inherit from class " + "UpdatableComponent; change this code."; + KALDI_ASSERT(i < learning_rate_factors.Dim()); + uc->SetLearningRateFactor(learning_rate_factors(i++)); + } + } + KALDI_ASSERT(i == learning_rate_factors.Dim()); +} + void GetLearningRates(const Nnet &nnet, Vector *learning_rates) { learning_rates->Resize(NumUpdatableComponents(nnet)); @@ -320,6 +339,25 @@ void GetLearningRates(const Nnet &nnet, KALDI_ASSERT(i == learning_rates->Dim()); } +void GetLearningRateFactors(const Nnet &nnet, + Vector *learning_rate_factors) { + learning_rate_factors->Resize(NumUpdatableComponents(nnet)); + int32 i = 0; + for (int32 c = 0; c < nnet.NumComponents(); c++) { + const Component *comp = nnet.GetComponent(c); + if (comp->Properties() & kUpdatableComponent) { + // For now all updatable components inherit from class UpdatableComponent. + // If that changes in future, we will change this code. + const UpdatableComponent *uc = dynamic_cast(comp); + if (uc == NULL) + KALDI_ERR << "Updatable component does not inherit from class " + "UpdatableComponent; change this code."; + (*learning_rate_factors)(i++) = uc->LearningRateFactor(); + } + } + KALDI_ASSERT(i == learning_rate_factors->Dim()); +} + void ScaleNnetComponents(const Vector &scale_factors, Nnet *nnet) { int32 i = 0; @@ -351,6 +389,25 @@ void ScaleNnet(BaseFloat scale, Nnet *nnet) { } } +void ScaleSingleComponent(BaseFloat scale, Nnet *nnet, std::string component_name) { + if (scale == 1.0) return; + else if (scale == 0.0) { + SetZero(false, nnet); + } else { + for (int32 c = 0; c < nnet->NumComponents(); c++) { + Component *comp = nnet->GetComponent(c); + std::string this_component_type = nnet->GetComponent(c)->Type(); + if (this_component_type == component_name) { + if (comp->Properties() & kUpdatableComponent) + comp->Scale(scale); + else + KALDI_ERR << "component " << component_name + << "is not an updatable component."; + } + } + } +} + void AddNnetComponents(const Nnet &src, const Vector &alphas, BaseFloat scale, Nnet *dest) { if (src.NumComponents() != dest->NumComponents()) @@ -523,16 +580,6 @@ std::string NnetInfo(const Nnet &nnet) { return ostr.str(); } -void SetDropoutProportion(BaseFloat dropout_proportion, - Nnet *nnet) { - for (int32 c = 0; c < nnet->NumComponents(); c++) { - Component *comp = nnet->GetComponent(c); - DropoutComponent *dc = dynamic_cast(comp); - if (dc != NULL) - dc->SetDropoutProportion(dropout_proportion); - } -} - void FindOrphanComponents(const Nnet &nnet, std::vector *components) { int32 num_components = nnet.NumComponents(), num_nodes = nnet.NumNodes(); std::vector is_used(num_components, false); @@ -688,6 +735,29 @@ void ReadEditConfig(std::istream &edit_config_is, Nnet *nnet) { if (outputs_remaining == 0) KALDI_ERR << "All outputs were removed."; nnet->RemoveSomeNodes(nodes_to_remove); + } else if (directive == "set-dropout-proportion") { + std::string name_pattern = "*"; + // name_pattern defaults to '*' if none is given. This pattern + // matches names of components, not nodes. + config_line.GetValue("name", &name_pattern); + BaseFloat proportion = -1; + if (!config_line.GetValue("proportion", &proportion)) { + KALDI_ERR << "In edits-config, expected proportion to be set in line: " + << config_line.WholeLine(); + } + DropoutComponent *component = NULL; + int32 num_dropout_proportions_set = 0; + for (int32 c = 0; c < nnet->NumComponents(); c++) { + if (NameMatchesPattern(nnet->GetComponentName(c).c_str(), + name_pattern.c_str()) && + (component = + dynamic_cast(nnet->GetComponent(c)))) { + component->SetDropoutProportion(proportion); + num_dropout_proportions_set++; + } + } + KALDI_LOG << "Set dropout proportions for " + << num_dropout_proportions_set << " nodes."; } else { KALDI_ERR << "Directive '" << directive << "' is not currently " "supported (reading edit-config)."; diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index 1e0dcefd703..21dbc67be2a 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -127,11 +127,22 @@ void ScaleLearningRate(BaseFloat learning_rate_scale, void SetLearningRates(const Vector &learning_rates, Nnet *nnet); +/// Sets the learning rate factors for all the updatable components in +/// the neural net to the values in 'learning_rate_factors' vector +/// (one for each updatable component). +void SetLearningRateFactors( + const Vector &learning_rate_factors, + Nnet *nnet); + /// Get the learning rates for all the updatable components in the neural net /// (the output must have dim equal to the number of updatable components). void GetLearningRates(const Nnet &nnet, Vector *learning_rates); +/// Get the learning rate factors for all the updatable components in the neural net +void GetLearningRateFactors(const Nnet &nnet, + Vector *learning_rate_factors); + /// Scales the nnet parameters and stats by this scale. void ScaleNnet(BaseFloat scale, Nnet *nnet); @@ -233,6 +244,9 @@ void FindOrphanNodes(const Nnet &nnet, std::vector *nodes); remove internal nodes directly; instead you should use the command 'remove-orphans'. + set-dropout-proportion [name=] proportion= + Sets the dropout rates for any components of type DropoutComponent whose + names match the given (e.g. lstm*). defaults to "*". \endverbatim */ void ReadEditConfig(std::istream &config_file, Nnet *nnet); From b47298741f99f18d0720da20709da0d902595458 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 9 Dec 2016 23:54:17 -0500 Subject: [PATCH 090/530] asr_diarization: Adding dropout --- src/nnet3bin/nnet3-copy.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/nnet3bin/nnet3-copy.cc b/src/nnet3bin/nnet3-copy.cc index e4a41933fff..ce0fb510260 100644 --- a/src/nnet3bin/nnet3-copy.cc +++ b/src/nnet3bin/nnet3-copy.cc @@ -41,8 +41,9 @@ int main(int argc, char *argv[]) { " nnet3-copy --binary=false 0.raw text.raw\n"; bool binary_write = true; + BaseFloat learning_rate = -1, - dropout = 0.0; + dropout = -1; std::string nnet_config, edits_config, edits_str; BaseFloat scale = 1.0; @@ -64,7 +65,10 @@ int main(int argc, char *argv[]) { "will be converted to newlines before parsing. E.g. " "'--edits=remove-orphans'."); po.Register("set-dropout-proportion", &dropout, "Set dropout proportion " - "in all DropoutComponent to this value."); + "in all DropoutComponent to this value. " + "This option is deprecated. Use set-dropout-proportion " + "option in edits-config. See comments in ReadEditConfig() " + "in nnet3/nnet-utils.h."); po.Register("scale", &scale, "The parameter matrices are scaled" " by the specified value."); po.Read(argc, argv); @@ -92,7 +96,10 @@ int main(int argc, char *argv[]) { ScaleNnet(scale, &nnet); if (dropout > 0) - SetDropoutProportion(dropout, &nnet); + KALDI_ERR << "--dropout option is deprecated. " + << "Use set-dropout-proportion " + << "option in edits-config. See comments in ReadEditConfig() " + << "in nnet3/nnet-utils.h."; if (!edits_config.empty()) { Input ki(edits_config); From df3319eff2fadd941cff17fb390c4676838124b3 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 9 Dec 2016 23:54:38 -0500 Subject: [PATCH 091/530] asr_diarization: Adding more info in nnet3-info --- src/nnet3bin/nnet3-info.cc | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/nnet3bin/nnet3-info.cc b/src/nnet3bin/nnet3-info.cc index 6b7fb2c629e..7f8dc82b3ce 100644 --- a/src/nnet3bin/nnet3-info.cc +++ b/src/nnet3bin/nnet3-info.cc @@ -20,6 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "nnet3/nnet-nnet.h" +#include "nnet3/nnet-utils.h" int main(int argc, char *argv[]) { try { @@ -36,7 +37,14 @@ int main(int argc, char *argv[]) { " nnet3-info 0.raw\n" "See also: nnet3-am-info\n"; + bool print_detailed_info = false; + bool print_learning_rates = false; + ParseOptions po(usage); + po.Register("print-detailed-info", &print_detailed_info, + "Print more detailed info"); + po.Register("print-learning-rates", &print_learning_rates, + "Print learning rates of updatable components"); po.Read(argc, argv); @@ -50,7 +58,24 @@ int main(int argc, char *argv[]) { Nnet nnet; ReadKaldiObject(raw_nnet_rxfilename, &nnet); - std::cout << nnet.Info(); + if (print_learning_rates) { + Vector learning_rates; + GetLearningRates(nnet, &learning_rates); + std::cout << "learning-rates: " + << PrintVectorPerUpdatableComponent(nnet, learning_rates) + << "\n"; + + Vector learning_rate_factors; + GetLearningRateFactors(nnet, &learning_rate_factors); + std::cout << "learning-rate-factors: " + << PrintVectorPerUpdatableComponent(nnet, learning_rate_factors) + << "\n"; + } + + if (print_detailed_info) + std::cout << NnetInfo(nnet); + else + std::cout << nnet.Info(); return 0; } catch(const std::exception &e) { From 99c88451245e72916cd0b7b59ec08a62b8a483d3 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 9 Dec 2016 23:56:46 -0500 Subject: [PATCH 092/530] asr_diarization: Minor bug fix in AMI run_cleanup_segmentation.sh --- egs/ami/s5b/local/run_cleanup_segmentation.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/egs/ami/s5b/local/run_cleanup_segmentation.sh b/egs/ami/s5b/local/run_cleanup_segmentation.sh index e2f0b0516ce..9a947ce1fce 100755 --- a/egs/ami/s5b/local/run_cleanup_segmentation.sh +++ b/egs/ami/s5b/local/run_cleanup_segmentation.sh @@ -129,7 +129,6 @@ fi final_lm=`cat data/local/lm/final_lm` LM=$final_lm.pr1-7 - if [ $stage -le 5 ]; then graph_dir=exp/$mic/${gmm}_${cleanup_affix}/graph_$LM nj_dev=$(cat data/$mic/dev/spk2utt | wc -l) @@ -137,9 +136,9 @@ if [ $stage -le 5 ]; then $decode_cmd $graph_dir/mkgraph.log \ utils/mkgraph.sh data/lang_$LM exp/$mic/${gmm}_${cleanup_affix} $graph_dir - steps/decode_fmllr.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \ + steps/decode_fmllr.sh --nj $nj_dev --cmd "$decode_cmd" --config conf/decode.conf \ $graph_dir data/$mic/dev exp/$mic/${gmm}_${cleanup_affix}/decode_dev_$LM - steps/decode_fmllr.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \ + steps/decode_fmllr.sh --nj $nj_eval --cmd "$decode_cmd" --config conf/decode.conf \ $graph_dir data/$mic/eval exp/$mic/${gmm}_${cleanup_affix}/decode_eval_$LM fi From 26b49b1284d2c488dcb862c265c8752addadfd35 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 9 Dec 2016 23:57:19 -0500 Subject: [PATCH 093/530] Adding missing break in nnet-test-utils --- src/nnet3/nnet-test-utils.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nnet3/nnet-test-utils.cc b/src/nnet3/nnet-test-utils.cc index da519fa1cd3..7a6e476ded1 100644 --- a/src/nnet3/nnet-test-utils.cc +++ b/src/nnet3/nnet-test-utils.cc @@ -1401,6 +1401,7 @@ static void GenerateRandomComponentConfig(std::string *component_type, *component_type = "DropoutComponent"; os << "dim=" << RandInt(1, 200) << " dropout-proportion=" << RandUniform(); + break; } case 30: { *component_type = "LstmNonlinearityComponent"; From ad1c10c88e686e45041881b3a34683ba980dfbfc Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 9 Dec 2016 23:58:57 -0500 Subject: [PATCH 094/530] asr_diarization: adding compute-fscore binary --- src/bin/Makefile | 2 +- src/bin/compute-fscore.cc | 153 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 src/bin/compute-fscore.cc diff --git a/src/bin/Makefile b/src/bin/Makefile index 3dc59fe8112..1948ba2d681 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -25,7 +25,7 @@ BINFILES = align-equal align-equal-compiled acc-tree-stats \ build-pfile-from-ali get-post-on-ali tree-info am-info \ vector-sum matrix-sum-rows est-pca sum-lda-accs sum-mllt-accs \ transform-vec align-text matrix-dim weight-pdf-post weight-matrix \ - matrix-add-offset matrix-dot-product + matrix-add-offset matrix-dot-product compute-fscore OBJFILES = diff --git a/src/bin/compute-fscore.cc b/src/bin/compute-fscore.cc new file mode 100644 index 00000000000..eb231fe361e --- /dev/null +++ b/src/bin/compute-fscore.cc @@ -0,0 +1,153 @@ +// bin/compute-fscore.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" + +int main(int argc, char *argv[]) { + using namespace kaldi; + typedef kaldi::int32 int32; + + try { + const char *usage = + "Compute F1-score, precision, recall etc.\n" + "Takes two alignment files and computes statistics\n" + "\n" + "Usage: compute-fscore [options] \n" + " e.g.: compute-fscore ark:data/train/text ark:hyp_text\n"; + + ParseOptions po(usage); + + std::string mode = "strict"; + std::string mask_rspecifier; + + po.Register("mode", &mode, + "Scoring mode: \"present\"|\"all\"|\"strict\":\n" + " \"present\" means score those we have transcriptions for\n" + " \"all\" means treat absent transcriptions as empty\n" + " \"strict\" means die if all in ref not also in hyp"); + po.Register("mask", &mask_rspecifier, + "Only score on frames where mask is 1"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string ref_rspecifier = po.GetArg(1); + std::string hyp_rspecifier = po.GetArg(2); + + if (mode != "strict" && mode != "present" && mode != "all") { + KALDI_ERR << "--mode option invalid: expected \"present\"|\"all\"|\"strict\", got " + << mode; + } + + int64 num_tp = 0, num_fp = 0, num_tn = 0, num_fn = 0, num_frames = 0; + int32 num_absent_sents = 0; + + // Both text and integers are loaded as vector of strings, + SequentialInt32VectorReader ref_reader(ref_rspecifier); + RandomAccessInt32VectorReader hyp_reader(hyp_rspecifier); + RandomAccessInt32VectorReader mask_reader(mask_rspecifier); + + // Main loop, accumulate WER stats, + for (; !ref_reader.Done(); ref_reader.Next()) { + const std::string &key = ref_reader.Key(); + const std::vector &ref_ali = ref_reader.Value(); + std::vector hyp_ali; + if (!hyp_reader.HasKey(key)) { + if (mode == "strict") + KALDI_ERR << "No hypothesis for key " << key << " and strict " + "mode specifier."; + num_absent_sents++; + if (mode == "present") // do not score this one. + continue; + } else { + hyp_ali = hyp_reader.Value(key); + } + + std::vector mask_ali; + if (!mask_rspecifier.empty()) { + if (!mask_reader.HasKey(key)) { + if (mode == "strict") + KALDI_ERR << "No hypothesis for key " << key << " and strict " + "mode specifier."; + num_absent_sents++; + if (mode == "present") // do not score this one. + continue; + } else { + mask_ali = mask_reader.Value(key); + } + } + + for (int32 i = 0; i < ref_ali.size(); i++) { + if ( (i < hyp_ali.size() && hyp_ali[i] != 0 && hyp_ali[i] != 1) || + (i < ref_ali.size() && ref_ali[i] != 0 && ref_ali[i] != 1) || + (i < mask_ali.size() && mask_ali[i] != 0 && mask_ali[i] != 1) ) { + KALDI_ERR << "Expecting alignment to be 0s or 1s"; + } + + if (!mask_rspecifier.empty() && (std::abs(static_cast(ref_ali.size()) - static_cast(mask_ali.size())) > 2) ) + KALDI_ERR << "Length mismatch: mask vs ref"; + + if (!mask_rspecifier.empty() && (i > mask_ali.size() || mask_ali[i] == 0)) continue; + num_frames++; + + if (ref_ali[i] == 1 && i > hyp_ali.size()) { num_fn++; continue; } + if (ref_ali[i] == 0 && i > hyp_ali.size()) { num_tn++; continue; } + + if (ref_ali[i] == 1 && hyp_ali[i] == 1) num_tp++; + else if (ref_ali[i] == 0 && hyp_ali[i] == 1) num_fp++; + else if (ref_ali[i] == 1 && hyp_ali[i] == 0) num_fn++; + else if (ref_ali[i] == 0 && hyp_ali[i] == 0) num_tn++; + else + KALDI_ERR << "Unknown condition"; + } + } + + // Print the ouptut, + std::cout.precision(2); + std::cerr.precision(2); + + BaseFloat precision = static_cast(num_tp) / (num_tp + num_fp); + BaseFloat recall = static_cast(num_tp) / (num_tp + num_fn); + + std::cout << "F1 " << 2 * precision * recall / (precision + recall) << "\n"; + std::cout << "Precision " << precision << "\n"; + std::cout << "Recall " << recall << "\n"; + std::cout << "Specificity " + << static_cast(num_tn) / (num_tn + num_fp) << "\n"; + std::cout << "Accuracy " + << static_cast(num_tp + num_tn) / num_frames << "\n"; + + std::cerr << "TP " << num_tp << "\n"; + std::cerr << "FP " << num_fp << "\n"; + std::cerr << "TN " << num_tn << "\n"; + std::cerr << "FN " << num_fn << "\n"; + std::cerr << "Length " << num_frames << "\n"; + + return 0; + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + From 62e18da82083331baf59e283a1c68d569a6cc44f Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 23 Nov 2016 22:42:54 -0500 Subject: [PATCH 095/530] asr_diarization: Create make_overlapped_data_dir.py for overlapped speech detection --- .../s5/steps/data/make_overlapped_data_dir.py | 541 ++++++++++++++++++ 1 file changed, 541 insertions(+) create mode 100644 egs/wsj/s5/steps/data/make_overlapped_data_dir.py diff --git a/egs/wsj/s5/steps/data/make_overlapped_data_dir.py b/egs/wsj/s5/steps/data/make_overlapped_data_dir.py new file mode 100644 index 00000000000..86137c26e25 --- /dev/null +++ b/egs/wsj/s5/steps/data/make_overlapped_data_dir.py @@ -0,0 +1,541 @@ +#!/usr/bin/env python +# Copyright 2016 Tom Ko +# Apache 2.0 +# script to generate reverberated data + +# we're using python 3.x style print but want it to work in python 2.x, +from __future__ import print_function +import argparse, shlex, glob, math, os, random, sys, warnings, copy, imp, ast + +data_lib = imp.load_source('dml', 'steps/data/data_dir_manipulation_lib.py') + +def GetArgs(): + # we add required arguments as named arguments for readability + parser = argparse.ArgumentParser(description="Reverberate the data directory with an option " + "to add isotropic and point source noises. " + "Usage: reverberate_data_dir.py [options...] " + "E.g. reverberate_data_dir.py --rir-set-parameters rir_list " + "--foreground-snrs 20:10:15:5:0 --background-snrs 20:10:15:5:0 " + "--noise-list-file noise_list --speech-rvb-probability 1 --num-replications 2 " + "--random-seed 1 data/train data/train_rvb", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument("--rir-set-parameters", type=str, action='append', required = True, dest = "rir_set_para_array", + help="Specifies the parameters of an RIR set. " + "Supports the specification of mixture_weight and rir_list_file_name. The mixture weight is optional. " + "The default mixture weight is the probability mass remaining after adding the mixture weights " + "of all the RIR lists, uniformly divided among the RIR lists without mixture weights. " + "E.g. --rir-set-parameters '0.3, rir_list' or 'rir_list' " + "the format of the RIR list file is " + "--rir-id --room-id " + "--receiver-position-id --source-position-id " + "--rt-60 --drr location " + "E.g. --rir-id 00001 --room-id 001 --receiver-position-id 001 --source-position-id 00001 " + "--rt60 0.58 --drr -4.885 data/impulses/Room001-00001.wav") + parser.add_argument("--noise-set-parameters", type=str, action='append', default = None, dest = "noise_set_para_array", + help="Specifies the parameters of an noise set. " + "Supports the specification of mixture_weight and noise_list_file_name. The mixture weight is optional. " + "The default mixture weight is the probability mass remaining after adding the mixture weights " + "of all the noise lists, uniformly divided among the noise lists without mixture weights. " + "E.g. --noise-set-parameters '0.3, noise_list' or 'noise_list' " + "the format of the noise list file is " + "--noise-id --noise-type " + "--bg-fg-type " + "--room-linkage " + "location " + "E.g. --noise-id 001 --noise-type isotropic --rir-id 00019 iso_noise.wav") + parser.add_argument("--speech-segments-set-parameters", type=str, action='append', default = None, dest = "speech_segments_set_para_array", + help="Specifies the speech segments for overlapped speech generation") + parser.add_argument("--num-replications", type=int, dest = "num_replicas", default = 1, + help="Number of replicate to generated for the data") + parser.add_argument('--foreground-snrs', type=str, dest = "foreground_snr_string", default = '20:10:0', help='When foreground noises are being added the script will iterate through these SNRs.') + parser.add_argument('--background-snrs', type=str, dest = "background_snr_string", default = '20:10:0', help='When background noises are being added the script will iterate through these SNRs.') + parser.add_argument('--overlap-snrs', type=str, dest = "overlap_snr_string", default = "20:10:0", help='When overlapping speech segments are being added the script will iterate through these SNRs.') + parser.add_argument('--prefix', type=str, default = None, help='This prefix will modified for each reverberated copy, by adding additional affixes.') + parser.add_argument("--speech-rvb-probability", type=float, default = 1.0, + help="Probability of reverberating a speech signal, e.g. 0 <= p <= 1") + parser.add_argument("--pointsource-noise-addition-probability", type=float, default = 1.0, + help="Probability of adding point-source noises, e.g. 0 <= p <= 1") + parser.add_argument("--isotropic-noise-addition-probability", type=float, default = 1.0, + help="Probability of adding isotropic noises, e.g. 0 <= p <= 1") + parser.add_argument("--overlapped-speech-addition-probability", type=float, default = 1.0, + help="Probability of adding overlapped speech, e.g. 0 <= p <= 1") + parser.add_argument("--rir-smoothing-weight", type=float, default = 0.3, + help="Smoothing weight for the RIR probabilties, e.g. 0 <= p <= 1. If p = 0, no smoothing will be done. " + "The RIR distribution will be mixed with a uniform distribution according to the smoothing weight") + parser.add_argument("--noise-smoothing-weight", type=float, default = 0.3, + help="Smoothing weight for the noise probabilties, e.g. 0 <= p <= 1. If p = 0, no smoothing will be done. " + "The noise distribution will be mixed with a uniform distribution according to the smoothing weight") + parser.add_argument("--overlapped-speech-smoothing-weight", type=float, default = 0.3, + help="The overlapped speech distribution will be mixed with a uniform distribution according to the smoothing weight") + parser.add_argument("--max-noises-per-minute", type=int, default = 2, + help="This controls the maximum number of point-source noises that could be added to a recording according to its duration") + parser.add_argument("--max-overlapped-segments-per-minute", type=int, default = 5, + help="This controls the maximum number of overlapping segments of speech that could be added to a recording per minute") + parser.add_argument('--random-seed', type=int, default=0, help='seed to be used in the randomization of impulses and noises') + parser.add_argument("--shift-output", type=str, help="If true, the reverberated waveform will be shifted by the amount of the peak position of the RIR", + choices=['true', 'false'], default = "true") + parser.add_argument("--output-additive-noise-dir", type=str, help="Output directory corresponding to the additive noise part of the data corruption") + parser.add_argument("--output-reverb-dir", type=str, help="Output directory corresponding to the reverberated signal part of the data corruption") + + parser.add_argument("input_dir", + help="Input data directory") + parser.add_argument("output_dir", + help="Output data directory") + + print(' '.join(sys.argv)) + + args = parser.parse_args() + args = CheckArgs(args) + + return args + +def CheckArgs(args): + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + + if args.output_reverb_dir is not None: + if args.output_reverb_dir == "": + args.output_reverb_dir = None + + if args.output_reverb_dir is not None: + if not os.path.exists(args.output_reverb_dir): + os.makedirs(args.output_reverb_dir) + + if args.output_additive_noise_dir is not None: + if args.output_additive_noise_dir == "": + args.output_additive_noise_dir = None + + if args.output_additive_noise_dir is not None: + if not os.path.exists(args.output_additive_noise_dir): + os.makedirs(args.output_additive_noise_dir) + + ## Check arguments. + + if args.num_replicas > 1 and args.prefix is None: + args.prefix = "rvb" + warnings.warn("--prefix is set to 'rvb' as --num-replications is larger than 1.") + + if not args.num_replicas > 0: + raise Exception("--num-replications cannot be non-positive") + + if args.speech_rvb_probability < 0 or args.speech_rvb_probability > 1: + raise Exception("--speech-rvb-probability must be between 0 and 1") + + if args.pointsource_noise_addition_probability < 0 or args.pointsource_noise_addition_probability > 1: + raise Exception("--pointsource-noise-addition-probability must be between 0 and 1") + + if args.isotropic_noise_addition_probability < 0 or args.isotropic_noise_addition_probability > 1: + raise Exception("--isotropic-noise-addition-probability must be between 0 and 1") + + if args.overlapped_speech_addition_probability < 0 or args.overlapped_speech_addition_probability > 1: + raise Exception("--overlapped-speech-addition-probability must be between 0 and 1") + + if args.rir_smoothing_weight < 0 or args.rir_smoothing_weight > 1: + raise Exception("--rir-smoothing-weight must be between 0 and 1") + + if args.noise_smoothing_weight < 0 or args.noise_smoothing_weight > 1: + raise Exception("--noise-smoothing-weight must be between 0 and 1") + + if args.overlapped_speech_smoothing_weight < 0 or args.overlapped_speech_smoothing_weight > 1: + raise Exception("--overlapped-speech-smoothing-weight must be between 0 and 1") + + if args.max_noises_per_minute < 0: + raise Exception("--max-noises-per-minute cannot be negative") + + if args.max_overlapped_segments_per_minute < 0: + raise Exception("--max-overlapped-segments-per-minute cannot be negative") + + return args + +def ParseSpeechSegmentsList(speech_segments_set_para_array, smoothing_weight): + set_list = [] + for set_para in speech_segments_set_para_array: + set = lambda: None + setattr(set, "wav_scp", None) + setattr(set, "segments", None) + setattr(set, "probability", None) + parts = set_para.split(',') + if len(parts) == 3: + set.probability = float(parts[0]) + set.wav_scp = parts[1].strip() + set.segments = parts[2].strip() + else: + set.wav_scp = parts[0].strip() + set.segments = parts[1].strip() + if not os.path.isfile(set.wav_scp): + raise Exception(set.wav_scp + " not found") + if not os.path.isfile(set.segments): + raise Exception(set.segments + " not found") + set_list.append(set) + + data_lib.SmoothProbabilityDistribution(set_list) + + segments_list = [] + for segments_set in set_list: + current_segments_list = [] + + wav_dict = {} + for s in open(segments_set.wav_scp): + parts = s.strip().split() + wav_dict[parts[0]] = ' '.join(parts[1:]) + + for s in open(segments_set.segments): + parts = s.strip().split() + current_segment = argparse.Namespace() + current_segment.utt_id = parts[0] + current_segment.probability = None + + start_time = float(parts[2]) + end_time = float(parts[3]) + + current_segment.duration = (end_time - start_time) + + wav_rxfilename = wav_dict[parts[1]] + if wav_rxfilename.split()[-1] == '|': + current_segment.wav_rxfilename = "{0} sox -t wav - -t wav - trim {1} {2} |".format(wav_rxfilename, start_time, end_time - start_time) + else: + current_segment.wav_rxfilename = "sox {0} -t wav - trim {1} {2} |".format(wav_rxfilename, start_time, end_time - start_time) + + current_segments_list.append(current_segment) + + segments_list += data_lib.SmoothProbabilityDistribution(current_segments_list, smoothing_weight, segments_set.probability) + + return segments_list + +def AddOverlappedSpeech(room, # the room selected + speech_segments_list, # the speech list + overlapped_speech_addition_probability, # Probability of another speech waveform + snrs, # the SNR for adding the foreground speech + speech_dur, # duration of the recording + max_overlapped_speech_segments, # Maximum number of speech signals that can be added + overlapped_speech_descriptor # descriptor to store the information of the overlapped speech + ): + if (len(speech_segments_list) > 0 and random.random() < overlapped_speech_addition_probability + and max_overlapped_speech_segments >= 1): + for k in range(random.randint(1, max_overlapped_speech_segments)): + # pick the overlapped speech signal and the RIR to + # reverberate the overlapped speech signal + speech_segment = data_lib.PickItemWithProbability(speech_segments_list) + rir = data_lib.PickItemWithProbability(room.rir_list) + + speech_rvb_command = """wav-reverberate --impulse-response="{0}" --shift-output=true """.format(rir.rir_rspecifier) + overlapped_speech_descriptor['start_times'].append(round(random.random() * speech_dur, 2)) + overlapped_speech_descriptor['snrs'].append(snrs.next()) + overlapped_speech_descriptor['utt_ids'].append(speech_segment.utt_id) + overlapped_speech_descriptor['durations'].append(speech_segment.duration) + + if len(speech_segment.wav_rxfilename.split()) == 1: + overlapped_speech_descriptor['speech_segments'].append("{1} {0} - |".format(speech_segment.wav_rxfilename, speech_rvb_command)) + else: + overlapped_speech_descriptor['speech_segments'].append("{0} {1} - - |".format(speech_segment.wav_rxfilename, speech_rvb_command)) + +# This function randomly decides whether to reverberate, and sample a RIR if it does +# It also decides whether to add the appropriate noises +# This function return the string of options to the binary wav-reverberate +def GenerateReverberationAndOverlappedSpeechOpts( + room_dict, # the room dictionary, please refer to MakeRoomDict() for the format + pointsource_noise_list, # the point source noise list + iso_noise_dict, # the isotropic noise dictionary + foreground_snrs, # the SNR for adding the foreground noises + background_snrs, # the SNR for adding the background noises + speech_segments_list, + overlap_snrs, + speech_rvb_probability, # Probability of reverberating a speech signal + isotropic_noise_addition_probability, # Probability of adding isotropic noises + pointsource_noise_addition_probability, # Probability of adding point-source noises + overlapped_speech_addition_probability, + speech_dur, # duration of the recording + max_noises_recording, # Maximum number of point-source noises that can be added + max_overlapped_segments_recording # Maximum number of overlapped segments that can be added + ): + impulse_response_opts = "" + additive_noise_opts = "" + + noise_addition_descriptor = {'noise_io': [], + 'start_times': [], + 'snrs': []} + # Randomly select the room + # Here the room probability is a sum of the probabilities of the RIRs recorded in the room. + room = data_lib.PickItemWithProbability(room_dict) + # Randomly select the RIR in the room + speech_rir = data_lib.PickItemWithProbability(room.rir_list) + if random.random() < speech_rvb_probability: + # pick the RIR to reverberate the speech + impulse_response_opts = """--impulse-response="{0}" """.format(speech_rir.rir_rspecifier) + + rir_iso_noise_list = [] + if speech_rir.room_id in iso_noise_dict: + rir_iso_noise_list = iso_noise_dict[speech_rir.room_id] + # Add the corresponding isotropic noise associated with the selected RIR + if len(rir_iso_noise_list) > 0 and random.random() < isotropic_noise_addition_probability: + isotropic_noise = data_lib.PickItemWithProbability(rir_iso_noise_list) + # extend the isotropic noise to the length of the speech waveform + # check if it is really a pipe + if len(isotropic_noise.noise_rspecifier.split()) == 1: + noise_addition_descriptor['noise_io'].append("wav-reverberate --duration={1} {0} - |".format(isotropic_noise.noise_rspecifier, speech_dur)) + else: + noise_addition_descriptor['noise_io'].append("{0} wav-reverberate --duration={1} - - |".format(isotropic_noise.noise_rspecifier, speech_dur)) + noise_addition_descriptor['start_times'].append(0) + noise_addition_descriptor['snrs'].append(background_snrs.next()) + + data_lib.AddPointSourceNoise(room, # the room selected + pointsource_noise_list, # the point source noise list + pointsource_noise_addition_probability, # Probability of adding point-source noises + foreground_snrs, # the SNR for adding the foreground noises + background_snrs, # the SNR for adding the background noises + speech_dur, # duration of the recording + max_noises_recording, # Maximum number of point-source noises that can be added + noise_addition_descriptor # descriptor to store the information of the noise added + ) + + assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['start_times']) + assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['snrs']) + + overlapped_speech_descriptor = {'speech_segments': [], + 'start_times': [], + 'snrs': [], + 'utt_ids': [], + 'durations': [] + } + + AddOverlappedSpeech(room, + speech_segments_list, # speech segments list + overlapped_speech_addition_probability, + overlap_snrs, + speech_dur, + max_overlapped_segments_recording, + overlapped_speech_descriptor + ) + + if len(overlapped_speech_descriptor['speech_segments']) > 0: + noise_addition_descriptor['noise_io'] += overlapped_speech_descriptor['speech_segments'] + noise_addition_descriptor['start_times'] += overlapped_speech_descriptor['start_times'] + noise_addition_descriptor['snrs'] += overlapped_speech_descriptor['snrs'] + + if len(noise_addition_descriptor['noise_io']) > 0: + additive_noise_opts += "--additive-signals='{0}' ".format(','.join(noise_addition_descriptor['noise_io'])) + additive_noise_opts += "--start-times='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['start_times']))) + additive_noise_opts += "--snrs='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['snrs']))) + + return [impulse_response_opts, additive_noise_opts, + zip(overlapped_speech_descriptor['utt_ids'], [ str(x) for x in overlapped_speech_descriptor['start_times'] ], [ str(x) for x in overlapped_speech_descriptor['durations'] ])] + +# This is the main function to generate pipeline command for the corruption +# The generic command of wav-reverberate will be like: +# wav-reverberate --duration=t --impulse-response=rir.wav +# --additive-signals='noise1.wav,noise2.wav' --snrs='snr1,snr2' --start-times='s1,s2' input.wav output.wav +def GenerateReverberatedWavScpWithOverlappedSpeech( + wav_scp, # a dictionary whose values are the Kaldi-IO strings of the speech recordings + durations, # a dictionary whose values are the duration (in sec) of the speech recordings + output_dir, # output directory to write the corrupted wav.scp + room_dict, # the room dictionary, please refer to MakeRoomDict() for the format + pointsource_noise_list, # the point source noise list + iso_noise_dict, # the isotropic noise dictionary + foreground_snr_array, # the SNR for adding the foreground noises + background_snr_array, # the SNR for adding the background noises + speech_segments_list, # list of speech segments to create overlapped speech + overlap_snr_array, # the SNR for adding overlapped speech + num_replicas, # Number of replicate to generated for the data + prefix, # prefix for the id of the corrupted utterances + speech_rvb_probability, # Probability of reverberating a speech signal + shift_output, # option whether to shift the output waveform + isotropic_noise_addition_probability, # Probability of adding isotropic noises + pointsource_noise_addition_probability, # Probability of adding point-source noises + max_noises_per_minute, # maximum number of point-source noises that can be added to a recording according to its duration + overlapped_speech_addition_probability, + max_overlapped_segments_per_minute, + output_reverb_dir = None, + output_additive_noise_dir = None + ): + foreground_snrs = data_lib.list_cyclic_iterator(foreground_snr_array) + background_snrs = data_lib.list_cyclic_iterator(background_snr_array) + overlap_snrs = data_lib.list_cyclic_iterator(overlap_snr_array) + + + corrupted_wav_scp = {} + reverb_wav_scp = {} + additive_noise_wav_scp = {} + overlapped_segments_info = {} + + keys = wav_scp.keys() + keys.sort() + for i in range(1, num_replicas+1): + for recording_id in keys: + wav_original_pipe = wav_scp[recording_id] + # check if it is really a pipe + if len(wav_original_pipe.split()) == 1: + wav_original_pipe = "cat {0} |".format(wav_original_pipe) + speech_dur = durations[recording_id] + max_noises_recording = math.floor(max_noises_per_minute * speech_dur / 60) + max_overlapped_segments_recording = math.floor(max_overlapped_segments_per_minute * speech_dur / 60) + + [impulse_response_opts, + additive_noise_opts, + overlapped_speech_segments] = GenerateReverberationAndOverlappedSpeechOpts( + room_dict = room_dict, # the room dictionary, please refer to MakeRoomDict() for the format + pointsource_noise_list = pointsource_noise_list, # the point source noise list + iso_noise_dict = iso_noise_dict, # the isotropic noise dictionary + foreground_snrs = foreground_snrs, # the SNR for adding the foreground noises + background_snrs = background_snrs, # the SNR for adding the background noises + speech_segments_list = speech_segments_list, # Speech segments for creating overlapped speech + overlap_snrs = overlap_snrs, # the SNR for adding overlapped speech + speech_rvb_probability = speech_rvb_probability, # Probability of reverberating a speech signal + isotropic_noise_addition_probability = isotropic_noise_addition_probability, # Probability of adding isotropic noises + pointsource_noise_addition_probability = pointsource_noise_addition_probability, # Probability of adding point-source noises + overlapped_speech_addition_probability = overlapped_speech_addition_probability, + speech_dur = speech_dur, # duration of the recording + max_noises_recording = max_noises_recording, # Maximum number of point-source noises that can be added + max_overlapped_segments_recording = max_overlapped_segments_recording + ) + reverberate_opts = impulse_response_opts + additive_noise_opts + + new_recording_id = data_lib.GetNewId(recording_id, prefix, i) + + if reverberate_opts == "": + wav_corrupted_pipe = "{0}".format(wav_original_pipe) + else: + wav_corrupted_pipe = "{0} wav-reverberate --shift-output={1} {2} - - |".format(wav_original_pipe, shift_output, reverberate_opts) + + corrupted_wav_scp[new_recording_id] = wav_corrupted_pipe + + if output_reverb_dir is not None: + if impulse_response_opts == "": + wav_reverb_pipe = "{0}".format(wav_original_pipe) + else: + wav_reverb_pipe = "{0} wav-reverberate --shift-output={1} --reverb-out-wxfilename=- {2} - /dev/null |".format(wav_original_pipe, shift_output, reverberate_opts) + reverb_wav_scp[new_recording_id] = wav_reverb_pipe + + if output_additive_noise_dir is not None: + if additive_noise_opts != "": + wav_additive_noise_pipe = "{0} wav-reverberate --shift-output={1} --additive-noise-out-wxfilename=- {2} - /dev/null |".format(wav_original_pipe, shift_output, reverberate_opts) + additive_noise_wav_scp[new_recording_id] = wav_additive_noise_pipe + + if len(overlapped_speech_segments) > 0: + overlapped_segments_info[new_recording_id] = [ ':'.join(x) for x in overlapped_speech_segments ] + + data_lib.WriteDictToFile(corrupted_wav_scp, output_dir + "/wav.scp") + + # Write for each new recording, the utterance id of the segments and + # the start time at which they are added + data_lib.WriteDictToFile(overlapped_segments_info, output_dir + "/overlapped_segments_info.txt") + + if output_reverb_dir is not None: + data_lib.WriteDictToFile(reverb_wav_scp, output_reverb_dir + "/wav.scp") + + if output_additive_noise_dir is not None: + data_lib.WriteDictToFile(additive_noise_wav_scp, output_additive_noise_dir + "/wav.scp") + + +# This function creates multiple copies of the necessary files, e.g. utt2spk, wav.scp ... +def CreateReverberatedCopy(input_dir, + output_dir, + room_dict, # the room dictionary, please refer to MakeRoomDict() for the format + pointsource_noise_list, # the point source noise list + iso_noise_dict, # the isotropic noise dictionary + speech_segments_list, + foreground_snr_string, # the SNR for adding the foreground noises + background_snr_string, # the SNR for adding the background noises + overlap_snr_string, # the SNR for overlapped speech + num_replicas, # Number of replicate to generated for the data + prefix, # prefix for the id of the corrupted utterances + speech_rvb_probability, # Probability of reverberating a speech signal + shift_output, # option whether to shift the output waveform + isotropic_noise_addition_probability, # Probability of adding isotropic noises + pointsource_noise_addition_probability, # Probability of adding point-source noises + max_noises_per_minute, # maximum number of point-source noises that can be added to a recording according to its duration + overlapped_speech_addition_probability, + max_overlapped_segments_per_minute, + output_reverb_dir = None, + output_additive_noise_dir = None + ): + + wav_scp = data_lib.ParseFileToDict(input_dir + "/wav.scp", value_processor = lambda x: " ".join(x)) + if not os.path.isfile(input_dir + "/reco2dur"): + print("Getting the duration of the recordings..."); + read_entire_file="false" + for value in wav_scp.values(): + # we will add more checks for sox commands which modify the header as we come across these cases in our data + if "sox" in value and "speed" in value: + read_entire_file="true" + break + data_lib.RunKaldiCommand("wav-to-duration --read-entire-file={1} scp:{0}/wav.scp ark,t:{0}/reco2dur".format(input_dir, read_entire_file)) + durations = data_lib.ParseFileToDict(input_dir + "/reco2dur", value_processor = lambda x: float(x[0])) + foreground_snr_array = map(lambda x: float(x), foreground_snr_string.split(':')) + background_snr_array = map(lambda x: float(x), background_snr_string.split(':')) + overlap_snr_array = map(lambda x: float(x), overlap_snr_string.split(':')) + + GenerateReverberatedWavScpWithOverlappedSpeech( + wav_scp = wav_scp, + durations = durations, + output_dir = output_dir, + room_dict = room_dict, + pointsource_noise_list = pointsource_noise_list, + iso_noise_dict = iso_noise_dict, + foreground_snr_array = foreground_snr_array, + background_snr_array = background_snr_array, + speech_segments_list = speech_segments_list, + overlap_snr_array = overlap_snr_array, + num_replicas = num_replicas, prefix = prefix, + speech_rvb_probability = speech_rvb_probability, + shift_output = shift_output, + isotropic_noise_addition_probability = isotropic_noise_addition_probability, + pointsource_noise_addition_probability = pointsource_noise_addition_probability, + max_noises_per_minute = max_noises_per_minute, + overlapped_speech_addition_probability = overlapped_speech_addition_probability, + max_overlapped_segments_per_minute = max_overlapped_segments_per_minute, + output_reverb_dir = output_reverb_dir, + output_additive_noise_dir = output_additive_noise_dir) + + data_lib.CopyDataDirFiles(input_dir, output_dir, num_replicas, prefix) + data_lib.AddPrefixToFields(input_dir + "/reco2dur", output_dir + "/reco2dur", num_replicas, prefix, field = [0]) + + if output_reverb_dir is not None: + data_lib.CopyDataDirFiles(input_dir, output_reverb_dir, num_replicas, prefix) + data_lib.AddPrefixToFields(input_dir + "/reco2dur", output_reverb_dir + "/reco2dur", num_replicas, prefix, field = [0]) + + if output_additive_noise_dir is not None: + data_lib.CopyDataDirFiles(input_dir, output_additive_noise_dir, num_replicas, prefix) + data_lib.AddPrefixToFields(input_dir + "/reco2dur", output_additive_noise_dir + "/reco2dur", num_replicas, prefix, field = [0]) + + +def Main(): + args = GetArgs() + random.seed(args.random_seed) + rir_list = data_lib.ParseRirList(args.rir_set_para_array, args.rir_smoothing_weight) + print("Number of RIRs is {0}".format(len(rir_list))) + pointsource_noise_list = [] + iso_noise_dict = {} + if args.noise_set_para_array is not None: + pointsource_noise_list, iso_noise_dict = data_lib.ParseNoiseList(args.noise_set_para_array, args.noise_smoothing_weight) + print("Number of point-source noises is {0}".format(len(pointsource_noise_list))) + print("Number of isotropic noises is {0}".format(sum(len(iso_noise_dict[key]) for key in iso_noise_dict.keys()))) + room_dict = data_lib.MakeRoomDict(rir_list) + + speech_segments_list = ParseSpeechSegmentsList(args.speech_segments_set_para_array, args.overlapped_speech_smoothing_weight) + + CreateReverberatedCopy(input_dir = args.input_dir, + output_dir = args.output_dir, + room_dict = room_dict, + pointsource_noise_list = pointsource_noise_list, + iso_noise_dict = iso_noise_dict, + speech_segments_list = speech_segments_list, + foreground_snr_string = args.foreground_snr_string, + background_snr_string = args.background_snr_string, + overlap_snr_string = args.overlap_snr_string, + num_replicas = args.num_replicas, + prefix = args.prefix, + speech_rvb_probability = args.speech_rvb_probability, + shift_output = args.shift_output, + isotropic_noise_addition_probability = args.isotropic_noise_addition_probability, + pointsource_noise_addition_probability = args.pointsource_noise_addition_probability, + max_noises_per_minute = args.max_noises_per_minute, + overlapped_speech_addition_probability = args.overlapped_speech_addition_probability, + max_overlapped_segments_per_minute = args.max_overlapped_segments_per_minute, + output_reverb_dir = args.output_reverb_dir, + output_additive_noise_dir = args.output_additive_noise_dir) + +if __name__ == "__main__": + Main() + + From be41b741e9f034d2eda68263aea4bc1845495dfd Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 00:12:19 -0500 Subject: [PATCH 096/530] asr_diarization: Added do_corruption_data_dir_overlapped_speech.sh --- ...o_corruption_data_dir_overlapped_speech.sh | 270 ++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh new file mode 100644 index 00000000000..f387acb8552 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh @@ -0,0 +1,270 @@ +#!/bin/bash +set -e +set -u +set -o pipefail + +. path.sh +. cmd.sh + +num_data_reps=5 +data_dir=data/train_si284 +whole_data_dir=data/train_si284_whole + +nj=40 +reco_nj=40 + +stage=0 +corruption_stage=-10 + +pad_silence=false + +mfcc_config=conf/mfcc_hires_bp_vh.conf +energy_config=conf/log_energy.conf + +dry_run=false +corrupt_only=false +speed_perturb=true + +reco_vad_dir= +utt_vad_dir= + +max_jobs_run=20 + +overlap_snrs="5:2:1:0:-1:-2" +base_rirs=simulated + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + exit 1 +fi + +rvb_opts=() +# This is the config for the system using simulated RIRs and point-source noises +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") +rvb_opts+=(--speech-segments-set-parameters="$data_dir/wav.scp,$data_dir/segments") + +whole_data_id=`basename ${whole_data_dir}` + +corrupted_data_id=${whole_data_id}_ovlp_corrupted +clean_data_id=${whole_data_id}_ovlp_clean +noise_data_id=${whole_data_id}_ovlp_noise + +if [ $stage -le 2 ]; then + python steps/data/make_corrupted_data_dir.py \ + "${rvb_opts[@]}" \ + --prefix="ovlp" \ + --overlap-snrs=$overlap_snrs \ + --speech-rvb-probability=1 \ + --overlapping-speech-addition-probability=1 \ + --num-replications=$num_data_reps \ + --min-overlapping-segments-per-minute=5 \ + --max-overlapping-segments-per-minute=20 \ + --output-additive-noise-dir=data/${noise_data_id} \ + --output-reverb-dir=data/${clean_data_id} \ + data/${whole_data_id} data/${corrupted_data_id} +fi + +if $dry_run; then + exit 0 +fi + +clean_data_dir=data/${clean_data_id} +corrupted_data_dir=data/${corrupted_data_id} +noise_data_dir=data/${noise_data_id} +orig_corrupted_data_dir=$corrupted_data_dir + +if $speed_perturb; then + if [ $stage -le 3 ]; then + ## Assuming whole data directories + for x in $clean_data_dir $corrupted_data_dir $noise_data_dir; do + cp $x/reco2dur $x/utt2dur + utils/data/perturb_data_dir_speed_3way.sh $x ${x}_sp + done + fi + + corrupted_data_dir=${corrupted_data_dir}_sp + clean_data_dir=${clean_data_dir}_sp + noise_data_dir=${noise_data_dir}_sp + + corrupted_data_id=${corrupted_data_id}_sp + clean_data_id=${clean_data_id}_sp + noise_data_id=${noise_data_id}_sp + + if [ $stage -le 4 ]; then + utils/data/perturb_data_dir_volume.sh --force true ${corrupted_data_dir} + utils/data/perturb_data_dir_volume.sh --force true --reco2vol ${corrupted_data_dir}/reco2vol ${clean_data_dir} + utils/data/perturb_data_dir_volume.sh --force true --reco2vol ${corrupted_data_dir}/reco2vol ${noise_data_dir} + fi +fi + +if $corrupt_only; then + echo "$0: Got corrupted data directory in ${corrupted_data_dir}" + exit 0 +fi + +mfccdir=`basename $mfcc_config` +mfccdir=${mfccdir%%.conf} + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage +fi + +if [ $stage -le 5 ]; then + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$train_cmd" --nj $reco_nj \ + $corrupted_data_dir exp/make_hires_bp/${corrupted_data_id} $mfccdir +fi + +if [ $stage -le 6 ]; then + steps/make_mfcc.sh --mfcc-config $energy_config \ + --cmd "$train_cmd" --nj $reco_nj \ + $clean_data_dir exp/make_log_energy/${clean_data_id} log_energy_feats +fi + +if [ $stage -le 7 ]; then + steps/make_mfcc.sh --mfcc-config $energy_config \ + --cmd "$train_cmd" --nj $reco_nj \ + $noise_data_dir exp/make_log_energy/${noise_data_id} log_energy_feats +fi + +if [ -z "$reco_vad_dir" ]; then + echo "reco-vad-dir must be provided" + exit 1 +fi + +targets_dir=irm_targets +if [ $stage -le 8 ]; then + mkdir -p exp/make_irm_targets/${corrupted_data_id} + + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $targets_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$targets_dir/storage $targets_dir/storage + fi + + steps/segmentation/make_snr_targets.sh \ + --nj $nj --cmd "$train_cmd --max-jobs-run $max_jobs_run" \ + --target-type Irm --compress true --apply-exp false \ + ${clean_data_dir} ${noise_data_dir} ${corrupted_data_dir} \ + exp/make_irm_targets/${corrupted_data_id} $targets_dir +fi + +# Data dirs without speed perturbation +overlap_dir=exp/make_overlap_labels/${corrupted_data_id} +unreliable_dir=exp/make_overlap_labels/unreliable_${corrupted_data_id} +overlap_data_dir=$overlap_dir/overlap_data +unreliable_data_dir=$overlap_dir/unreliable_data + +mkdir -p $unreliable_dir + +if [ $stage -le 8 ]; then + cat $reco_vad_dir/sad_seg.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "ovlp" \ + | sort -k1,1 > ${corrupted_data_dir}/sad_seg.scp + utils/data/get_utt2num_frames.sh $corrupted_data_dir + utils/split_data.sh --per-reco ${orig_corrupted_data_dir} $reco_nj + + # Combine the VAD from the base recording and the VAD from the overlapping segments + # to create per-frame labels of the number of overlapping speech segments + # Unreliable segments are regions where no VAD labels were available for the + # overlapping segments. These can be later removed by setting deriv weights to 0. + $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_overlap_seg.JOB.log \ + segmentation-init-from-overlap-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + "scp:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${reco_nj}reco/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" \ + ark,t:$orig_corrupted_data_dir/overlapped_segments_info.txt \ + scp:$utt_vad_dir/sad_seg.scp ark:- ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \| \ + segmentation-copy --keep-label=1 ark:- ark:- \| \ + segmentation-get-stats --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + ark:- ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark +fi + +if [ $stage -le 9 ]; then + mkdir -p $overlap_data_dir $unreliable_data_dir + cp $orig_corrupted_data_dir/wav.scp $overlap_data_dir + cp $orig_corrupted_data_dir/wav.scp $unreliable_data_dir + + # Create segments where there is definitely an overlap. + $train_cmd JOB=1:$reco_nj $overlap_dir/log/process_to_segments.JOB.log \ + segmentation-post-process --remove-labels=0:1 \ + ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-to-segments ark:- ark:$overlap_data_dir/utt2spk.JOB $overlap_data_dir/segments.JOB + + $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_unreliable_segments.JOB.log \ + segmentation-to-segments --single-speaker \ + ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \ + ark:$unreliable_data_dir/utt2spk.JOB $unreliable_data_dir/segments.JOB + + for n in `seq $reco_nj`; do cat $overlap_data_dir/utt2spk.$n; done > $overlap_data_dir/utt2spk + for n in `seq $reco_nj`; do cat $overlap_data_dir/segments.$n; done > $overlap_data_dir/segments + for n in `seq $reco_nj`; do cat $unreliable_data_dir/utt2spk.$n; done > $unreliable_data_dir/utt2spk + for n in `seq $reco_nj`; do cat $unreliable_data_dir/segments.$n; done > $unreliable_data_dir/segments + + utils/fix_data_dir.sh $overlap_data_dir + utils/fix_data_dir.sh $unreliable_data_dir + + if $speed_perturb; then + utils/data/perturb_data_dir_speed_3way.sh $overlap_data_dir ${overlap_data_dir}_sp + utils/data/perturb_data_dir_speed_3way.sh $unreliable_data_dir ${unreliable_data_dir}_sp + fi +fi + +if $speed_perturb; then + overlap_data_dir=${overlap_data_dir}_sp + unreliable_data_dir=${unreliable_data_dir}_sp +fi + +if [ $stage -le 10 ]; then + utils/split_data.sh --per-reco ${overlap_data_dir} $reco_nj + + $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_overlap_speech_labels.JOB.log \ + utils/data/get_reco2utt.sh ${overlap_data_dir}/split${reco_nj}reco/JOB '&&' \ + segmentation-init-from-segments --shift-to-zero=false \ + ${overlap_data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- ark,t:${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt \ + ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ + ark,scp:overlap_labels/overlapped_speech_${corrupted_data_id}.JOB.ark,overlap_labels/overlapped_speech_${corrupted_data_id}.JOB.scp +fi + +for n in `seq $reco_nj`; do + cat overlap_labels/overlapped_speech_${corrupted_data_id}.$n.scp +done > ${corrupted_data_dir}/overlapped_speech_labels.scp + +if [ $stage -le 11 ]; then + utils/data/get_reco2utt.sh ${unreliable_data_dir} + + # First convert the unreliable segments into a recording-level segmentation. + # Initialize a segmentation from utt2num_frames and set to 0, the regions + # of unreliable segments. At this stage deriv weights is 1 for all but the + # unreliable segment regions. + # Initialize a segmentation from the VAD labels and retain only the speech segments. + # Intersect this with the deriv weights segmentation from above. At this stage + # deriv weights is 1 for only the regions where base VAD label is 1 and + # the overlapping segment is not unreliable. Convert this to deriv weights. + $train_cmd JOB=1:$reco_nj $unreliable_dir/log/get_deriv_weights.JOB.log\ + segmentation-init-from-segments --shift-to-zero=false \ + "utils/filter_scp.pl -f 2 ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/segments |" ark:- \| \ + segmentation-combine-segments-to-recordings ark:- "ark,t:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/reco2utt |" \ + ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=0 --ignore-missing \ + "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/utt2num_frames | segmentation-init-from-lengths ark,t:- ark:- |" \ + ark:- ark:- \| \ + segmentation-intersect-segments --mismatch-label=0 \ + "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/sad_seg.scp | segmentation-post-process --remove-labels=0:2:3 scp:- ark:- |" \ + ark:- ark:- \| \ + segmentation-post-process --remove-labels=0 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$unreliable_dir/deriv_weights_for_overlapped_speech.JOB.ark,$unreliable_dir/deriv_weights_for_overlapped_speech.JOB.scp + + for n in `seq $reco_nj`; do + cat $unreliable_dir/deriv_weights_for_overlapped_speech.${n}.scp + done > $corrupted_data_dir/deriv_weights_for_overlapped_speech.scp +fi + +exit 0 From a1be1dd02795ac51b6ccd03a21fa084a553e68ba Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 24 Nov 2016 00:12:46 -0500 Subject: [PATCH 097/530] asr_diarization: Added train_sad_ovlp{,_prob}.sh --- .../s5/local/segmentation/train_sad_ovlp.sh | 144 +++++++++++++++++ .../local/segmentation/train_sad_ovlp_prob.sh | 145 ++++++++++++++++++ 2 files changed, 289 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/train_sad_ovlp.sh create mode 100644 egs/aspire/s5/local/segmentation/train_sad_ovlp_prob.sh diff --git a/egs/aspire/s5/local/segmentation/train_sad_ovlp.sh b/egs/aspire/s5/local/segmentation/train_sad_ovlp.sh new file mode 100644 index 00000000000..2d553875db0 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/train_sad_ovlp.sh @@ -0,0 +1,144 @@ +#!/bin/bash + +# this is the standard "tdnn" system, built in nnet3; it's what we use to +# call multi-splice. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= + +splice_indexes="-3,-2,-1,0,1,2,3 -6,0 -9,0,3 0" +relu_dim=256 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=1 +extra_egs_copy_cmd= + +num_utts_subset_valid=40 +num_utts_subset_train=40 +add_idct=true + +# target options +train_data_dir=data/train_azteec_unsad_music_whole_sp_multi_lessreverb_1k_hires + +snr_scp= +speech_feat_scp= +overlapped_speech_labels_scp= + +deriv_weights_scp= +deriv_weights_for_overlapped_speech_scp= + +egs_dir= +nj=40 +feat_type=raw +config_dir= +compute_objf_opts= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_hidden_layers=`echo $splice_indexes | perl -ane 'print scalar @F'` || exit 1 +if [ -z "$dir" ]; then + dir=exp/nnet3_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix}_n${num_hidden_layers} + +if ! cuda-compiled; then + cat < Date: Tue, 22 Nov 2016 12:44:55 -0500 Subject: [PATCH 098/530] asr_diarization: New copy-egs-overlap-detection in nnet3bin/Makefile --- src/nnet3bin/Makefile | 3 +- .../nnet3-copy-egs-overlap-detection.cc | 187 ++++++++++++++++++ 2 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 src/nnet3bin/nnet3-copy-egs-overlap-detection.cc diff --git a/src/nnet3bin/Makefile b/src/nnet3bin/Makefile index aeb3dc1dc03..2a660da232c 100644 --- a/src/nnet3bin/Makefile +++ b/src/nnet3bin/Makefile @@ -17,7 +17,8 @@ BINFILES = nnet3-init nnet3-info nnet3-get-egs nnet3-copy-egs nnet3-subset-egs \ nnet3-discriminative-merge-egs nnet3-discriminative-shuffle-egs \ nnet3-discriminative-compute-objf nnet3-discriminative-train \ discriminative-get-supervision nnet3-discriminative-subset-egs \ - nnet3-discriminative-compute-from-egs nnet3-get-egs-multiple-targets + nnet3-discriminative-compute-from-egs nnet3-get-egs-multiple-targets \ + nnet3-copy-egs-overlap-detection OBJFILES = diff --git a/src/nnet3bin/nnet3-copy-egs-overlap-detection.cc b/src/nnet3bin/nnet3-copy-egs-overlap-detection.cc new file mode 100644 index 00000000000..3f180a6393e --- /dev/null +++ b/src/nnet3bin/nnet3-copy-egs-overlap-detection.cc @@ -0,0 +1,187 @@ +// nnet3bin/nnet3-copy-egs.cc + +// Copyright 2012-2015 Johns Hopkins University (author: Daniel Povey) +// 2014 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "hmm/transition-model.h" +#include "nnet3/nnet-example.h" +#include "nnet3/nnet-example-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace kaldi::nnet3; + typedef kaldi::int32 int32; + typedef kaldi::int64 int64; + + const char *usage = + "Copy examples (single frames or fixed-size groups of frames) for neural\n" + "network training, possibly changing the binary mode. Supports multiple wspecifiers, in\n" + "which case it will write the examples round-robin to the outputs.\n" + "\n" + "Usage: nnet3-copy-egs [options] \n" + "\n" + "e.g.\n" + "nnet3-copy-egs ark:train.egs ark,t:text.egs\n" + "or:\n" + "nnet3-copy-egs ark:train.egs ark:1.egs\n"; + + ParseOptions po(usage); + + bool add_silence_output = true; + bool add_speech_output = true; + int32 srand_seed = 0; + + std::string keep_proportion_positive_rxfilename; + std::string keep_proportion_negative_rxfilename; + + po.Register("add-silence-output", &add_silence_output, + "Add silence output"); + po.Register("add-speech-output", &add_speech_output, + "Add speech output"); + po.Register("srand", &srand_seed, "Seed for random number generator " + "(only relevant if --keep-proportion-vec is specified"); + po.Register("keep-proportion-positive-vec", &keep_proportion_positive_rxfilename, + "If a dimension of this is <1.0, this program will " + "randomly set deriv weight 0 for this proportion of the input samples of the " + "corresponding positive examples"); + po.Register("keep-proportion-negative-vec", &keep_proportion_negative_rxfilename, + "If a dimension of this is <1.0, this program will " + "randomly set deriv weight 0 for this proportion of the input samples of the " + "corresponding negative examples"); + + Vector p_positive_vec(3); + p_positive_vec.Set(1); + if (!keep_proportion_positive_rxfilename.empty()) + ReadKaldiObject(keep_proportion_positive_rxfilename, &p_positive_vec); + + Vector p_negative_vec(3); + p_negative_vec.Set(1); + if (!keep_proportion_negative_rxfilename.empty()) + ReadKaldiObject(keep_proportion_negative_rxfilename, &p_negative_vec); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string examples_rspecifier = po.GetArg(1); + std::string examples_wspecifier = po.GetArg(2); + + SequentialNnetExampleReader example_reader(examples_rspecifier); + NnetExampleWriter example_writer(examples_wspecifier); + + int64 num_read = 0, num_written = 0; + for (; !example_reader.Done(); example_reader.Next(), num_read++) { + std::string key = example_reader.Key(); + NnetExample eg = example_reader.Value(); + + KALDI_ASSERT(eg.io.size() == 2); + NnetIo &io = eg.io[1]; + + KALDI_ASSERT(io.name == "output"); + + NnetIo silence_output(io); + silence_output.name = "output-silence"; + + NnetIo speech_output(io); + speech_output.name = "output-speech"; + + NnetIo overlap_speech_output(io); + overlap_speech_output.name = "output-overlap_speech"; + + io.features.Uncompress(); + + KALDI_ASSERT(io.features.Type() == kFullMatrix); + const Matrix &feats = io.features.GetFullMatrix(); + + typedef std::vector > SparseVec; + std::vector silence_post(feats.NumRows(), SparseVec()); + std::vector speech_post(feats.NumRows(), SparseVec()); + std::vector overlap_speech_post(feats.NumRows(), SparseVec()); + + Vector silence_deriv_weights(feats.NumRows()); + Vector speech_deriv_weights(feats.NumRows()); + Vector overlap_speech_deriv_weights(feats.NumRows()); + + for (int32 i = 0; i < feats.NumRows(); i++) { + if (feats(i,0) < 0.5) { + silence_deriv_weights(i) = WithProb(p_negative_vec(0)) ? 1.0 : 0.0; + silence_post[i].push_back(std::make_pair(0, 1)); + } else { + silence_deriv_weights(i) = WithProb(p_positive_vec(0)) ? 1.0 : 0.0; + silence_post[i].push_back(std::make_pair(1, 1)); + } + + if (feats(i,1) < 0.5) { + speech_deriv_weights(i) = WithProb(p_negative_vec(1)) ? 1.0 : 0.0; + speech_post[i].push_back(std::make_pair(0, 1)); + } else { + speech_deriv_weights(i) = WithProb(p_positive_vec(1)) ? 1.0 : 0.0; + speech_post[i].push_back(std::make_pair(1, 1)); + } + + if (feats(i,2) < 0.5) { + overlap_speech_deriv_weights(i) = WithProb(p_negative_vec(2)) ? 1.0 : 0.0; + overlap_speech_post[i].push_back(std::make_pair(0, 1)); + } else { + overlap_speech_deriv_weights(i) = WithProb(p_positive_vec(2)) ? 1.0 : 0.0; + overlap_speech_post[i].push_back(std::make_pair(1, 1)); + } + } + + SparseMatrix silence_feats(2, silence_post); + SparseMatrix speech_feats(2, speech_post); + SparseMatrix overlap_speech_feats(2, overlap_speech_post); + + silence_output.features = silence_feats; + speech_output.features = speech_feats; + overlap_speech_output.features = overlap_speech_feats; + + io = overlap_speech_output; + io.deriv_weights.MulElements(overlap_speech_deriv_weights); + + if (add_silence_output) { + silence_output.deriv_weights.MulElements(silence_deriv_weights); + eg.io.push_back(silence_output); + } + + if (add_speech_output) { + speech_output.deriv_weights.MulElements(speech_deriv_weights); + eg.io.push_back(speech_output); + } + + example_writer.Write(key, eg); + num_written++; + } + + KALDI_LOG << "Read " << num_read << " neural-network training examples, wrote " + << num_written; + return (num_written == 0 ? 1 : 0); + } catch(const std::exception &e) { + std::cerr << e.what() << '\n'; + return -1; + } +} + + + From 25a21f9c60edf3186fc7d38ca829afb395d2f153 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sat, 10 Dec 2016 14:30:27 -0500 Subject: [PATCH 099/530] Modifying do_corruption_data_dir_overlapped_speech.sh --- ...o_corruption_data_dir_overlapped_speech.sh | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh index f387acb8552..242dfca8170 100644 --- a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh @@ -1,23 +1,35 @@ #!/bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + set -e set -u set -o pipefail . path.sh -. cmd.sh - -num_data_reps=5 -data_dir=data/train_si284 -whole_data_dir=data/train_si284_whole - -nj=40 -reco_nj=40 stage=0 corruption_stage=-10 +corrupt_only=false + +# Data options +data_dir=data/train_si284 # Excpecting non-whole data directory +speed_perturb=true +num_data_reps=5 # Number of corrupted versions +snrs="20:10:15:5:0:-5" +foreground_snrs="20:10:15:5:0:-5" +background_snrs="20:10:15:5:0:-5" +base_rirs=simulated +# Whole-data directory corresponding to data_dir +whole_data_dir=data/train_si284_whole -pad_silence=false +# Parallel options +reco_nj=40 +nj=40 +cmd=queue.pl +# Options for feature extraction mfcc_config=conf/mfcc_hires_bp_vh.conf energy_config=conf/log_energy.conf From c7ba2080a72d253080a6cb038bceb399a7dd5633 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sun, 11 Dec 2016 13:41:35 -0500 Subject: [PATCH 100/530] dropout_schedule: Changing default in dropout-schedule option --- egs/wsj/s5/steps/libs/nnet3/train/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index f2485e36784..90ee209a092 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -647,7 +647,8 @@ def __init__(self): doesn't increase the effective learning rate.""") self.parser.add_argument("--trainer.dropout-schedule", type=str, - dest='dropout_schedule', default='', + dest='dropout_schedule', default=None, + action=common_lib.NullstrToNoneAction, help="""Use this to specify the dropout schedule. You specify a piecewise linear function on the domain [0,1], where 0 is the From 420b2cbc0451ce6ce4f83fd8878b25eef1e8add8 Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Mon, 12 Dec 2016 20:11:38 -0800 Subject: [PATCH 101/530] Add new type of optimization of per-row commands; finish some of the internal code for shortcut compilation. --- src/nnet3/nnet-compile.cc | 18 +- src/nnet3/nnet-computation.cc | 105 ++-- src/nnet3/nnet-computation.h | 21 +- src/nnet3/nnet-compute.cc | 4 +- src/nnet3/nnet-nnet.h | 4 +- src/nnet3/nnet-optimize-utils.cc | 975 ++++++++++++++++++++++++++++++- src/nnet3/nnet-optimize-utils.h | 39 +- src/nnet3/nnet-optimize.cc | 10 +- src/nnet3/nnet-optimize.h | 9 +- 9 files changed, 1099 insertions(+), 86 deletions(-) diff --git a/src/nnet3/nnet-compile.cc b/src/nnet3/nnet-compile.cc index 6290fc2f956..9ff9743230a 100644 --- a/src/nnet3/nnet-compile.cc +++ b/src/nnet3/nnet-compile.cc @@ -975,7 +975,8 @@ void Compiler::SetUpPrecomputedIndexes( NnetComputation *computation) { int32 num_steps = steps_.size(); KALDI_ASSERT(computation->component_precomputed_indexes.empty()); - computation->component_precomputed_indexes.push_back(NULL); + // the zeroth commponent is special, contains a NULL pointer. + computation->component_precomputed_indexes.resize(1); for (int32 step = 0; step < num_steps; step++) { StepInfo &step_info = steps_[step]; int32 node_index = step_info.node_index; @@ -1005,7 +1006,20 @@ void Compiler::SetUpPrecomputedIndexes( } else { step_info.precomputed_indexes_index = computation->component_precomputed_indexes.size(); - computation->component_precomputed_indexes.push_back(precomputed_indexes); + + NnetComputation::PrecomputedIndexesInfo info; + info.data = precomputed_indexes; + + if (!input_indexes.empty() && input_indexes.back().n == 1 && + !output_indexes.empty() && output_indexes.back().n == 1) { + // If these conditions are true, it's *possible* that we are doing + // 'shortcut' compilation. So just in case that's what's going on, we + // store 'input_indexes' and 'output_indexes, which are needed by + // the ExpandComputation() function that is used in that process. + info.input_indexes = input_indexes; + info.output_indexes = output_indexes; + } + computation->component_precomputed_indexes.push_back(info); } } } diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index a7b49499bb3..d5e6e1654a6 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -75,8 +75,9 @@ int32 ComputationRequest::IndexForOutput( } NnetComputation::~NnetComputation() { - for (size_t i = 0; i < component_precomputed_indexes.size(); i++) - delete component_precomputed_indexes[i]; + // note: component_precomputed_indexes[0].data is the NULL pointer. + for (size_t i = 1; i < component_precomputed_indexes.size(); i++) + delete component_precomputed_indexes[i].data; } void NnetComputation::ComputeCudaIndexes() { @@ -718,8 +719,9 @@ void NnetComputation::Read(std::istream &is, bool binary) { // delete any existing pointers in component_precomputed_indexes. - for (size_t i = 0; i < component_precomputed_indexes.size(); i++) - delete component_precomputed_indexes[i]; + // note: component_precomputed_indexes[0] is the NULL pointer. + for (size_t i = 1; i < component_precomputed_indexes.size(); i++) + delete component_precomputed_indexes[i].data; component_precomputed_indexes.clear(); size_t num_component_precomputed_indexes; @@ -727,20 +729,33 @@ void NnetComputation::Read(std::istream &is, bool binary) { ReadBasicType(is, binary, &num_component_precomputed_indexes); KALDI_ASSERT(num_component_precomputed_indexes >= 0); component_precomputed_indexes.resize(num_component_precomputed_indexes); - ExpectToken(is, binary, ""); - std::vector component_precomputed_indexes_tmp; - for (size_t c = 0; c < num_component_precomputed_indexes; c++) { - bool is_null; // a boolean indicating whether the pointer should be NULL. - ReadBasicType(is, binary, &is_null); - if (!is_null) { + + std::string tok; + ReadToken(is, binary, &tok); + if (tok == "") { + // Older on-disk format, before that code was extended for shortcut + // compilation. + component_precomputed_indexes.clear(); + component_precomputed_indexes.resize(num_component_precomputed_indexes); + for (size_t c = 0; c < num_component_precomputed_indexes; c++) { + bool is_null; // a boolean indicating whether the pointer should be NULL. + ReadBasicType(is, binary, &is_null); + if (!is_null) { + ComponentPrecomputedIndexes* p = ComponentPrecomputedIndexes::ReadNew(is, binary); + component_precomputed_indexes[c].data = p; + } + } + } else { + KALDI_ASSERT(tok == ""); + for (size_t c = 1; c < num_component_precomputed_indexes; c++) { ComponentPrecomputedIndexes* p = ComponentPrecomputedIndexes::ReadNew(is, binary); - component_precomputed_indexes_tmp.push_back(p); - } else { - component_precomputed_indexes_tmp.push_back(NULL); + KALDI_ASSERT(p != NULL); + PrecomputedIndexesInfo &info = component_precomputed_indexes[c]; + info.data = p; + ReadIndexVector(is, binary, &(info.input_indexes)); + ReadIndexVector(is, binary, &(info.output_indexes)); } } - component_precomputed_indexes = component_precomputed_indexes_tmp; - size_t num_indexes; ExpectToken(is, binary, ""); ReadBasicType(is, binary, &num_indexes); @@ -819,14 +834,12 @@ void NnetComputation::Write(std::ostream &os, bool binary) const { if (!binary) os << std::endl; WriteToken(os, binary, ""); WriteBasicType(os, binary, component_precomputed_indexes.size()); - WriteToken(os, binary, ""); - for (size_t c = 0; c < component_precomputed_indexes.size(); c++) { - if (component_precomputed_indexes[c] != NULL) { - WriteBasicType(os, binary, false); // a boolean indicating whether the pointer is NULL. - component_precomputed_indexes[c]->Write(os, binary); - } else { - WriteBasicType(os, binary, true); - } + WriteToken(os, binary, ""); + for (size_t c = 1; c < component_precomputed_indexes.size(); c++) { + const PrecomputedIndexesInfo &info = component_precomputed_indexes[c]; + info.data->Write(os, binary); + WriteIndexVector(os, binary, info.input_indexes); + WriteIndexVector(os, binary, info.output_indexes); } if (!binary) os << std::endl; @@ -1062,6 +1075,7 @@ NnetComputation::NnetComputation(const NnetComputation &other): matrices(other.matrices), matrix_debug_info(other.matrix_debug_info), submatrices(other.submatrices), + component_precomputed_indexes(other.component_precomputed_indexes), indexes(other.indexes), indexes_multi(other.indexes_multi), indexes_ranges(other.indexes_ranges), @@ -1069,33 +1083,30 @@ NnetComputation::NnetComputation(const NnetComputation &other): need_model_derivative(other.need_model_derivative), indexes_cuda(other.indexes_cuda), indexes_ranges_cuda(other.indexes_ranges_cuda) { - for (size_t i = 0; i < other.component_precomputed_indexes.size(); i++) - component_precomputed_indexes.push_back( - other.component_precomputed_indexes[i] == NULL ? NULL : - other.component_precomputed_indexes[i]->Copy()); + for (size_t i = 1; i < component_precomputed_indexes.size(); i++) + component_precomputed_indexes[i].data = + component_precomputed_indexes[i].data->Copy(); } - NnetComputation& NnetComputation::operator = (const NnetComputation &other) { - matrices = other.matrices; - matrix_debug_info = other.matrix_debug_info; - submatrices = other.submatrices; - indexes = other.indexes; - indexes_multi = other.indexes_multi; - indexes_ranges = other.indexes_ranges; - commands = other.commands; - need_model_derivative = other.need_model_derivative; - indexes_cuda = other.indexes_cuda; - indexes_ranges_cuda = other.indexes_ranges_cuda; - - for (size_t i = 0; i < component_precomputed_indexes.size(); i++) - delete component_precomputed_indexes[i]; - component_precomputed_indexes.clear(); - for (size_t i = 0; i < other.component_precomputed_indexes.size(); i++) - component_precomputed_indexes.push_back( - other.component_precomputed_indexes[i] == NULL ? NULL : - other.component_precomputed_indexes[i]->Copy()); - return *this; + matrices = other.matrices; + matrix_debug_info = other.matrix_debug_info; + submatrices = other.submatrices; + indexes = other.indexes; + indexes_multi = other.indexes_multi; + indexes_ranges = other.indexes_ranges; + commands = other.commands; + need_model_derivative = other.need_model_derivative; + indexes_cuda = other.indexes_cuda; + indexes_ranges_cuda = other.indexes_ranges_cuda; + + for (size_t i = 1; i < component_precomputed_indexes.size(); i++) + delete component_precomputed_indexes[i].data; + component_precomputed_indexes = other.component_precomputed_indexes; + for (size_t i = 1; i < component_precomputed_indexes.size(); i++) + component_precomputed_indexes[i].data = + component_precomputed_indexes[i].data->Copy(); + return *this; } diff --git a/src/nnet3/nnet-computation.h b/src/nnet3/nnet-computation.h index da3a43bd15f..fd8cb06d06b 100644 --- a/src/nnet3/nnet-computation.h +++ b/src/nnet3/nnet-computation.h @@ -296,6 +296,25 @@ struct NnetComputation { void Read(std::istream &istream, bool binary); void Write(std::ostream &ostream, bool binary) const; }; + struct PrecomputedIndexesInfo { + // For each step of the computation for which we might possibly need to store + // a ComponentPrecomputedIndexes object (and note that this is only applicable + // for non-simple Components), this struct stores some information. + // The primary data is in 'data', it's an object of type inheriting from + // ComponentPrecomputedIndexes. + // The 'input_indexes' and 'output_indexes' are the vectors that were provided + // to the function Component::PrecomputeIndexes() when generating these + // PrecomputedIndexes objects. They currently only stored in cases where + // the 'n' values in the computation are numbered only zero and one, because + // these types of computations are compiled in 'shortcut' compilation, and + // in that case we'll need these indexes later in order to generate the + // 'expanded' computation (see the function ExpandComputation()). + ComponentPrecomputedIndexes *data; + std::vector input_indexes; + std::vector output_indexes; + PrecomputedIndexesInfo(): data(NULL) { } + }; + // "matrices" describes the sizes of the matrices that we use as variables in // the computation [note: index zero is reserved for an empty matrix]. Note: @@ -323,7 +342,7 @@ struct NnetComputation { // the NULL pointer, which is used for "simple" components and others that do // not require precomputed indexes. // These are owned here. - std::vector component_precomputed_indexes; + std::vector component_precomputed_indexes; // used in kAddRows, kAddToRows, kCopyRows, kCopyToRows. contains row-indexes. std::vector > indexes; diff --git a/src/nnet3/nnet-compute.cc b/src/nnet3/nnet-compute.cc index 75c0c464c90..d01327c8265 100644 --- a/src/nnet3/nnet-compute.cc +++ b/src/nnet3/nnet-compute.cc @@ -182,7 +182,7 @@ void NnetComputer::ExecuteCommand() { case kPropagate: { const Component *component = nnet_.GetComponent(c.arg1); ComponentPrecomputedIndexes *indexes = - computation_.component_precomputed_indexes[c.arg2]; + computation_.component_precomputed_indexes[c.arg2].data; const CuSubMatrix input(GetSubMatrix(c.arg3)); CuSubMatrix output(GetSubMatrix(c.arg4)); component->Propagate(indexes, input, &output); @@ -208,7 +208,7 @@ void NnetComputer::ExecuteCommand() { nnet_to_update_->GetComponent(c.arg1) : NULL); ComponentPrecomputedIndexes *indexes = - computation_.component_precomputed_indexes[c.arg2]; + computation_.component_precomputed_indexes[c.arg2].data; const CuSubMatrix in_value(GetSubMatrix(c.arg3)); const CuSubMatrix out_value(GetSubMatrix(c.arg4)); const CuSubMatrix out_deriv(GetSubMatrix(c.arg5)); diff --git a/src/nnet3/nnet-nnet.h b/src/nnet3/nnet-nnet.h index e999f20f4f5..19cfb3949ad 100644 --- a/src/nnet3/nnet-nnet.h +++ b/src/nnet3/nnet-nnet.h @@ -125,10 +125,10 @@ class Nnet { int32 NumNodes() const { return nodes_.size(); } - /// return component indexed c. not a copy; not owned by caller. + /// Return component indexed c. Not a copy; not owned by caller. Component *GetComponent(int32 c); - /// return component indexed c (const version). not a copy; not owned by + /// Return component indexed c (const version). Not a copy; not owned by /// caller. const Component *GetComponent(int32 c) const; diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index 6744eb91e37..62bda3a17e1 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -1950,21 +1950,266 @@ bool ReplaceRowWithMatrixOps(NnetComputation *computation) { break; } default: - continue; + break; + } + } + return ans; +} + + + +/* + This function, used in SnipSingleRowOp(), + finds the number of leading, and trailing, negative numbers + in a vector of integers. For instance, if vec is + [ -1 -1 2 3 -1 4 5 -1 ] + then '*num_leading_negatives' will be set to 2 and '*num_trailing_negatives' + will be set to 1. If all the numbers in 'vec' are all negative, or 'vec' is + empty, it is an error and this function will invoke KALDI_ERR. +*/ +static void FindNumLeadingAndTrailingNegatives(const std::vector &vec, + int32 *num_leading_negatives, + int32 *num_trailing_negatives) { + KALDI_ASSERT(!vec.empty()); + const int32 *begin = &(vec[0]), *ptr = begin, *end = ptr + vec.size(); + while (ptr != end && *ptr < 0) + ptr++; + // note regarding error message: we assume all negative numbers are -1, due to + // the way this is called, but it only affects how we describe the error. + KALDI_ASSERT(ptr != end && "Vector consists entirely of -1's."); + *num_leading_negatives = ptr - begin; + const int32 *ptr2 = end - 1; + // the following while loop should terminate before falling off the vector, + // because we've established above (in the assertion) that the vector contains + // at least one nonnegative number. + while (*ptr2 < 0) + ptr2--; + KALDI_ASSERT(ptr2 != begin); // would be code error. + *num_trailing_negatives = end - 1 - ptr2; +} + +// This function, called from SnipRowOps, is called when it encounters commands +// of type kCopyRows or kAddRows; it modifies such commands when the indexes +// have leading or trailing -1's,h, to make them operate on a smaller submatrix. +// It returns true if it made a change, and false otherwise. +static bool SnipSingleRowOp(NnetComputation *computation, + int32 command_index) { + NnetComputation::Command &c = computation->commands[command_index]; + KALDI_ASSERT(static_cast(c.arg3) < computation->indexes.size()); + const std::vector &indexes = computation->indexes[c.arg3]; + int32 num_leading_negatives, num_trailing_negatives; + FindNumLeadingAndTrailingNegatives(indexes, + &num_leading_negatives, + &num_trailing_negatives); + if (num_leading_negatives == 0 && num_trailing_negatives == 0) + return false; + + int32 new_num_rows = static_cast(indexes.size()) - + num_leading_negatives - num_trailing_negatives; + KALDI_ASSERT(new_num_rows > 0); + std::vector new_indexes(indexes.begin() + num_leading_negatives, + indexes.begin() + num_leading_negatives + + new_num_rows); + c.arg3 = computation->indexes.size(); + computation->indexes.push_back(std::vector()); + computation->indexes.back().swap(new_indexes); + c.arg1 = computation->NewSubMatrix(c.arg1, + num_leading_negatives, new_num_rows, + 0, -1); + return true; // made a change. +} + + + +/* + This function, used in SnipSingleRowOp(), finds the number of leading, and + trailing, negative values in a vector of pairs of integers. In particular, + it finds the number of leading and trailing pairs whose .first value is negative + (in practice we'll only encounter either (-1,-1) pairs, or pairs of both + nonnegative values). + + For instance, if vec is + [ (-1,-1) (-1,-1) (80,2) (81,3) (-1,-1) (80,4) (81,5) (-1,-1) ] + then '*num_leading_negatives' will be set to 2 and '*num_trailing_negatives' + will be set to 1. If all the .first numbers in 'vec' are all negative, or + 'vec' is empty, it is an error and this function will invoke KALDI_ERR. +*/ +static void FindNumLeadingAndTrailingNegatives( + const std::vector > &vec, + int32 *num_leading_negatives, + int32 *num_trailing_negatives) { + KALDI_ASSERT(!vec.empty()); + const std::pair *begin = &(vec[0]), *ptr = begin, + *end = ptr + vec.size(); + while (ptr != end && ptr->first < 0) + ptr++; + // note regarding error message: we assume all negative numbers are -1, due to + // the way this is called, but it only affects how we describe the error. + KALDI_ASSERT(ptr != end && "Vector consists entirely of -1's."); + *num_leading_negatives = ptr - begin; + const std::pair *ptr2 = end - 1; + // the following while loop should terminate before falling off the vector, + // because we've established above (in the assertion) that the vector contains + // at least one nonnegative number. + while (ptr2->first < 0) + ptr2--; + KALDI_ASSERT(ptr2 != begin); // would be code error. + *num_trailing_negatives = end - 1 - ptr2; +} + + +// This function, called from SnipRowOps, is called when it encounters commands +// of type kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti or kCopyToRowsMulti; +// have leading or trailing (-1,-1) pairs, to make them operate on a smaller +// submatrix. It returns true if it made a change, and false otherwise. +static bool SnipMultiRowOp(NnetComputation *computation, + int32 command_index) { + NnetComputation::Command &c = computation->commands[command_index]; + KALDI_ASSERT(static_cast(c.arg2) < computation->indexes_multi.size()); + const std::vector > &indexes_multi = + computation->indexes_multi[c.arg2]; + int32 num_leading_negatives, num_trailing_negatives; + FindNumLeadingAndTrailingNegatives(indexes_multi, + &num_leading_negatives, + &num_trailing_negatives); + if (num_leading_negatives == 0 && num_trailing_negatives == 0) + return false; + + int32 new_num_rows = static_cast(indexes_multi.size()) - + num_leading_negatives - num_trailing_negatives; + KALDI_ASSERT(new_num_rows > 0); + std::vector > new_indexes_multi( + indexes_multi.begin() + num_leading_negatives, + indexes_multi.begin() + num_leading_negatives + new_num_rows); + c.arg2 = computation->indexes_multi.size(); + computation->indexes_multi.push_back(std::vector >()); + computation->indexes_multi.back().swap(new_indexes_multi); + c.arg1 = computation->NewSubMatrix(c.arg1, + num_leading_negatives, new_num_rows, + 0, -1); + return true; // made a change. +} + + + +/* + This function, used in SnipRangeRowOp(), finds the number of leading, and + trailing values in a vector of pairs of integers, that are the same (i.e. + pairs of the form (x, x) for any x. [This is how we represent an empty + range, which is a kind of no-op, in commands of kCopyRowRanges or + kAddRowRanges. + + For instance, if vec is + [ (0,0) (0,0) (4,5) (6,8) (0,0) (10,12) (14,20) (0,0) ] + then '*num_leading_identicals' will be set to 2 and '*num_trailing_identicals' + will be set to 1. If all pairs in 'vec' are identical, or 'vec' is empty, it + is an error and this function will invoke KALDI_ERR. +*/ +static void FindNumLeadingAndTrailingIdenticals( + const std::vector > &vec, + int32 *num_leading_identicals, + int32 *num_trailing_identicals) { + KALDI_ASSERT(!vec.empty()); + const std::pair *begin = &(vec[0]), *ptr = begin, + *end = ptr + vec.size(); + while (ptr != end && ptr->first == ptr->second) + ptr++; + // note regarding error message: we assume all negative numbers are -1, due to + // the way this is called, but it only affects how we describe the error. + KALDI_ASSERT(ptr != end && "Vector consists entirely of -1's."); + *num_leading_identicals = ptr - begin; + const std::pair *ptr2 = end - 1; + // the following while loop should terminate before falling off the vector, + // because we've established above (in the assertion) that the vector contains + // at least one nonnegative number. + while (ptr2->first == ptr2->second) + ptr2--; + KALDI_ASSERT(ptr2 != begin); // would be code error. + *num_trailing_identicals = end - 1 - ptr2; +} + + +// This function, called from SnipRowOps, is called when it encounters commands +// of type kAddRowRanges that have leading or trailing (x, x) pairs [i.e. pairs +// of identical values; these are how we represent empty ranges], to make them +// operate on a smaller submatrix. It returns true if it made a change, and +// false otherwise. +static bool SnipRangesRowOp(NnetComputation *computation, + int32 command_index) { + NnetComputation::Command &c = computation->commands[command_index]; + KALDI_ASSERT(static_cast(c.arg3) < computation->indexes_ranges.size()); + const std::vector > &indexes_ranges = + computation->indexes_ranges[c.arg3]; + int32 num_leading_identicals, num_trailing_identicals; + FindNumLeadingAndTrailingIdenticals(indexes_ranges, + &num_leading_identicals, + &num_trailing_identicals); + if (num_leading_identicals == 0 && num_trailing_identicals == 0) + return false; + + int32 new_num_rows = static_cast(indexes_ranges.size()) - + num_leading_identicals - num_trailing_identicals; + KALDI_ASSERT(new_num_rows > 0); + std::vector > new_indexes_ranges( + indexes_ranges.begin() + num_leading_identicals, + indexes_ranges.begin() + num_leading_identicals + new_num_rows); + c.arg3 = computation->indexes_ranges.size(); + computation->indexes_ranges.push_back(std::vector >()); + computation->indexes_ranges.back().swap(new_indexes_ranges); + c.arg1 = computation->NewSubMatrix(c.arg1, + num_leading_identicals, new_num_rows, + 0, -1); + return true; // made a change. +} + + + +bool SnipRowOps(NnetComputation *computation) { + bool ans = false; + int32 num_commands = computation->commands.size(); + for (int32 command_index = 0; command_index < num_commands; + command_index++) { + // non-const because we'll be changing it. + NnetComputation::Command &c = computation->commands[command_index]; + + switch (c.command_type) { + case kCopyRows: case kAddRows: { + if (SnipSingleRowOp(computation, command_index)) + ans = true; + break; + } + case kAddRowsMulti: case kAddToRowsMulti: + case kCopyRowsMulti: case kCopyToRowsMulti: { + if (SnipMultiRowOp(computation, command_index)) + ans = true; + break; + } + case kAddRowRanges: { + if (SnipRangesRowOp(computation, command_index)) + ans = true; + break; + } + default: + break; } } return ans; } + + // This class implements the internals of the ExpandComputation() function (used // in shortcut compilation); see comment by the declaration of // ExpandComputation() in nnet-optimize-utils.h for overview. class ComputationExpander { public: - ComputationExpander(const NnetComputation &computation, + ComputationExpander(const Nnet &nnet, + const MiscComputationInfo &misc_info, + const NnetComputation &computation, bool need_debug_info, int32 num_n_values, NnetComputation *expanded_computation): + nnet_(nnet), misc_info_(misc_info), computation_(computation), need_debug_info_(need_debug_info), num_n_values_(num_n_values), @@ -1974,17 +2219,17 @@ class ComputationExpander { // This function call implements the functionality of the class, // expanding the computation. - bool Expand(); + void Expand(); private: - // This function sets up and computes the 'n_fast' vector (see comment - // by it for what this is. + // This function sets up and computes the 'n_fast_' vector (see comment + // by the declaration of 'n_fast_' for what this is. void InitFastInfo(); // This function sets up the 'matrices' vector in 'expanded_computation_'. // It's quite simple: it just multiplies all the num-rows by num_n_values_ and // divides by 2, and leaves the num-cols the same. - void ComputeMatrices(); + void ComputeMatrixInfo(); // This function, only called if need_debug_info_ is true, sets up // the 'matrix_debug_info' vector in 'expanded_computation_'. @@ -1995,6 +2240,23 @@ class ComputationExpander { // more complicated. void ComputeSubmatrixInfo(); + // Expands a command of type kCopyRows or kAddRows; involves adding a new + // element of 'indexes' to expanded_computation_. + void ExpandRowsCommand(const NnetComputation::Command &c_in, + NnetComputation::Command *c_out); + + // Expands a command of type kCopyRowsMulti or kAddRowsMulti, kCopyToRowsMulti + // or kAddToRowsMulti; involves adding a new element of 'indexes_multi' to + // expanded_computation_. + void ExpandRowsMultiCommand(const NnetComputation::Command &c_in, + NnetComputation::Command *c_out); + + + // Expands a command of type kAddRowRanges; involves adding a new element of + // 'indexes_ranges' to expanded_computation_. + void ExpandRowRangesCommand(const NnetComputation::Command &c_in, + NnetComputation::Command *c_out); + // This function computes all the PrecomputedIndexes in the // 'component_precomputed_indexes' member of 'expanded_computation_'. @@ -2013,19 +2275,105 @@ class ComputationExpander { void ComputeCommands(); - // This 'n_fast' vector is indexed by the matrix-index in the computation, + // This command ensure that the debug-info in expanded_computation_ for the + // matrix underlying the submatrix with index 'submatrix_index', exists and is + // set up. In some cases we need the debug info for some matrices in order to + // do the expansion, even if debug info is not requested for the output; in + // those cases we set it up temporarily and clear it before we finish. + void EnsureDebugInfoExists(int32 submatrix_index); + + + + // This function is used in mapping row-indexes into sub-matrices from the + // old to the new computation. It is mostly a wrapper for + // GetNewMatrixLocationInfo, but designed to give row indexes into + // submatrices rather than matrices; see the documentation for + // GetNewMatrixLocationinfo() for details and an explanation of the + // interface. + // This function assumes that ComputeSubmatrixInfo() has already + // been called. + // Note: it returns true if the index 'old_row_index' into submatrix + // indexed 'old_submat_index' corresponds to an Index with n=0; otherwise + // it returns false and does not set the output values. + bool GetNewSubmatLocationInfo(int32 old_submat_index, + int32 old_row_index, + int32 *new_row_index, + int32 *new_n_stride) const; + + + /// This function is used in mapping row-indexes into matrices, from the + /// old to the new computation. + /// @param [in] old_matrix_index The matrix-index > 0, for which we + /// are mapping row-indexes. + /// @param [in] old_row_index The old row-index into the matrix. + /// This MUST be a row-index for which n=0 + /// in the cindexes information. + /// @param [out] new_row_index To '*new_row_index' this funtion outputs + /// the row-index where the cindex referred to in + /// 'old_matrix_index' will reside in the new, + /// expanded computation. + /// @param [out] new_n_stride To '*new_n_stride' this function outputs + /// the 'n stride' in the new computation, which + /// means the amount the row-index increases + /// every time we increase the 'n' value in the + /// cindex by one. + void GetNewMatrixLocationInfo(int32 old_matrix_index, + int32 old_row_index, + int32 *new_row_index, + int32 *new_n_stride) const; + + + + // This function 'expands' a set of indexes; it's called from + // ComputePrecomputedIndexes(). The indexes are expected to + // have the normal kind of regularity, with the 'n' varying either + // the fastest or the slowest of any index. + void ExpandIndexes(const std::vector &indexes, + std::vector *indexes_expanded) const; + + + + // This function, used in ExpandIndexes(), works out whether a vector + // of indexes varies 'fast' in n, or slowly; see the comment for + // ComputationIsDecomposable() in nnet-optimize-utils.h for more explanation + // of the meaning. + // If the vector of indexes does not have the required regular structure w.r.t + // n, this function will throw an exception via KALDI_ERR. + bool GetFastInfo(const std::vector &indexes) const; + + /// This function is analogous to GetNewMatrixLocationInfo, but + /// specialized for the case where you have a vector of Indexes + /// It's used inside ExpandIndexes(). + /// + /// @param [in] 'is_fast' should be true if the 'n' varies fast in the input + /// indexes (i.e. n stride is 1)... + /// @param [in] old_index The index into 'indexes'.. should point to an + /// element with n==0 (note, the element is an Index; + /// and note the capital I, it affects the meaning). + /// @param [out] new_index The index into the expanded indexes vector + /// that this same Index will be located at in the + /// expanded computation. + /// @param [out] new_n_stride The stride of n, i.e. the amount by which the + /// index changes when we increment n by one in the + /// Index. This will actually be the same as in + /// the old computation. + void GetNewLocationInfo(const std::vector &indexes, + bool is_fast, + int32 old_index, + int32 *new_index, + int32 *new_n_stride) const; + + + // This 'n_fast_' vector is indexed by the matrix-index in the computation, // i.e. the same index as indexes computation_.matrix_info and // expanded_computation_->matrix_info. For each matrix-index m > 0 it // contains true if the 'n' varies 'fast', or false if the 'n' index varies // 'slowly'. By 'fast' and 'slow', we mean in the same sense as is desribed // in the comment for ComputationIsDecomposable() in nnet-optimize-utils.h. - std::vector n_fast; - - - - - + std::vector n_fast_; + const Nnet &nnet_; + const MiscComputationInfo &misc_info_; const NnetComputation &computation_; bool need_debug_info_; int32 num_n_values_; @@ -2034,6 +2382,607 @@ class ComputationExpander { +void ComputationExpander::ExpandRowsCommand( + const NnetComputation::Command &c_in, + NnetComputation::Command *c_out) { + // we need to expand the row-indexes in c_in.arg3, and put the index of the + // resulting vector in expanded_computation_->indexes, in 'c_out->arg3'. + + int32 s1 = c_in.arg1, s2 = c_in.arg2; + + // The command that gets called is something like + // submat1.AddRows(submat2, indexes) if submat1 is the submatrix referred to in + // 's1' and submat2 is the submatrix referred to in 's2'. + // 'indexes' has the same size as the num-rows of submat1, and the values + // in the vector are row-indexes into s2. + const std::vector &old_indexes = computation_.indexes[c_in.arg3]; + c_out->arg3 = expanded_computation_->indexes.size(); + expanded_computation_->indexes.push_back(std::vector()); + std::vector &new_indexes = expanded_computation_->indexes.back(); + + int32 old_size = old_indexes.size(), + num_n_values = num_n_values_, + new_size = expanded_computation_->submatrices[s1].num_rows; + KALDI_ASSERT(old_size % 2 == 0 && + old_size == computation_.submatrices[s1].num_rows); + new_indexes.resize(new_size, -1); + + for (int32 i1 = 0; i1 < old_size; i1++) { + int32 new_i1_n0, new_n_stride1; + if (GetNewSubmatLocationInfo(s1, i1, &new_i1_n0, &new_n_stride1)) { + // GetNewSubmatLocationInfo() returns true if this corresponds to + // a Cindex with n == 0. + int32 i2 = old_indexes[i1]; + int32 new_i2_n0, new_n_stride2; + if (i2 < 0) { // if i2 is -1, we'll just fill any relevant positions in + // 'new_indexes' with -1's. + continue; + } else { + bool ans = GetNewSubmatLocationInfo(s2, i2, &new_i2_n0, &new_n_stride2); + KALDI_ASSERT(ans); // source should also be for n==0, because we don't + // (or at least shouldn't) create computations that + // mix up the 'n' values + for (int32 n = 0; n < num_n_values; n++) { + int32 new_i1 = new_i1_n0 + n * new_n_stride1, + new_i2 = new_i2_n0 + new_n_stride2; + new_indexes[new_i1] = new_i2; + } + } + } + } +} + +void ComputationExpander::ExpandRowsMultiCommand( + const NnetComputation::Command &c_in, + NnetComputation::Command *c_out) { + // we need to expand the (submatrix,row)-index pairs in c_in.arg2, and put the + // index of the resulting vector in expanded_computation_->indexes_multi, + // in 'c_out->arg2'. + + int32 s1 = c_in.arg1, + num_rows_old = computation_.submatrices[s1].num_rows, + num_rows_new = expanded_computation_->submatrices[s1].num_rows; + + const std::vector > &old_indexes_multi = + computation_.indexes_multi[c_in.arg2]; + // old_indexes_multi is a vector that has the same size as the num-rows + // of submatrix s1. It contains pairs that are either (-1, -1), or + // pairs (submatrix-index, row-index) referring to other submatrices + // in the computation. + + KALDI_ASSERT(static_cast(old_indexes_multi.size()) == num_rows_old); + KALDI_ASSERT(num_rows_old % 2 == 0); + int32 num_n_values = num_n_values_; + + + c_out->arg2 = expanded_computation_->indexes_multi.size(); + expanded_computation_->indexes_multi.push_back( + std::vector >()); + std::vector > &new_indexes_multi = + expanded_computation_->indexes_multi.back(); + + new_indexes_multi.resize(num_rows_new, + std::pair(-1, -1)); + + for (int32 i1 = 0; i1 < num_rows_old; i1++) { + int32 new_i1_n0, new_n_stride1; + if (GetNewSubmatLocationInfo(s1, i1, &new_i1_n0, &new_n_stride1)) { + // GetNewSubmatLocationInfo() returns true if this corresponds to + // a Cindex with n == 0. + int32 s2 = old_indexes_multi[i1].first, + i2 = old_indexes_multi[i1].second; + int32 new_i2_n0, new_n_stride2; + if (s2 < 0) { // if s2 is -1, we don't have to do anything... we'd have + // to fill any relevant positions in 'new_indexes_multi' + // with (-1,-1)'s, but it's filled with that by default. + continue; + } else { + bool ans = GetNewSubmatLocationInfo(s2, i2, &new_i2_n0, &new_n_stride2); + KALDI_ASSERT(ans); // source should also be for n==0, because we don't + // (or at least shouldn't) create computations that + // mix up the 'n' values + + int32 new_i1 = new_i1_n0, new_i2 = new_i2_n0; + + for (int32 n = 0; n < num_n_values; + n++, new_i1 += new_n_stride1, new_i2 += new_n_stride2) { + new_indexes_multi[new_i1].first = s2; + new_indexes_multi[new_i1].second = new_i2; + } + } + } + } +} + + + +void ComputationExpander::ExpandRowRangesCommand( + const NnetComputation::Command &c_in, + NnetComputation::Command *c_out) { + // we need to expand the pairs of row-indexes in c_in.arg2, and put the index + // of the resulting vector in expanded_computation_->indexes_ranges, in + // 'c_out->arg2'. + + int32 s1 = c_in.arg1, s2 = c_in.arg2, + num_rows_old = computation_.submatrices[s1].num_rows, + num_rows_new = expanded_computation_->submatrices[s1].num_rows; + KALDI_ASSERT(static_cast(c_in.arg3) < + computation_.indexes_ranges.size()); + const std::vector > &old_indexes_ranges = + computation_.indexes_ranges[c_in.arg3]; + // old_indexes_ranges is a vector that has the same size as the num-rows of + // submatrix s1. It contains pairs that are either two copies of the same + // value (in practice the pair (-1, -1)), or pairs (begin-row-index, + // end-row-index) representing the (begin,end) of a range in submatrix s2. + // Note: end-row-index is one past the end of the range, as for C++ iterators. + + KALDI_ASSERT(static_cast(old_indexes_ranges.size()) == num_rows_old); + KALDI_ASSERT(num_rows_old % 2 == 0); + int32 num_n_values = num_n_values_; + + c_out->arg3 = expanded_computation_->indexes_ranges.size(); + expanded_computation_->indexes_ranges.push_back( + std::vector >()); + std::vector > &new_indexes_ranges = + expanded_computation_->indexes_ranges.back(); + + new_indexes_ranges.resize(num_rows_new, + std::pair(-1, -1)); + + for (int32 i1 = 0; i1 < num_rows_old; i1++) { + int32 new_i1_n0, new_n_stride1; + if (GetNewSubmatLocationInfo(s1, i1, &new_i1_n0, &new_n_stride1)) { + // GetNewSubmatLocationInfo() returns true if this corresponds to + // a Cindex with n == 0. + int32 i2_begin = old_indexes_ranges[i1].first, + i2_end = old_indexes_ranges[i1].second; + if (i2_end == i2_begin) + continue; // (-1, -1) pair, meaning an empty range. + // 'new_indexes_ranges' is filled with (-1, -1) pairs as a + // default so we don't have to do anything for these + // elements. + int32 i2_last = i2_end - 1; + int32 new_i2_n0_begin, new_i2_n0_last, + new_n_stride2; // only 1 stride variable; both calls will output + // the same value. + + bool ans1 = GetNewSubmatLocationInfo(s2, i2_begin, &new_i2_n0_begin, + &new_n_stride2), + ans2 = GetNewSubmatLocationInfo(s2, i2_last, &new_i2_n0_last, + &new_n_stride2); + KALDI_ASSERT(ans1 && ans2 && new_i2_n0_last >= new_i2_n0_begin && + new_i2_n0_begin >= 0); + // source should also be for n==0, because we don't (or at least + // shouldn't) create computations that mix up the 'n' values + + + int32 new_i1 = new_i1_n0, + new_i2_begin = new_i2_n0_begin, + new_i2_end = new_i2_n0_last + 1; + for (int32 n = 0; n < num_n_values; + n++, new_i1 += new_n_stride1, new_i2_begin += new_n_stride2, + new_i2_end += new_n_stride2) { + new_indexes_ranges[new_i1].first = new_i2_begin; + new_indexes_ranges[new_i1].second = new_i2_end; + } + } + } +} + + + +void ComputationExpander::ComputeCommands() { + int32 num_commands = computation_.commands.size(); + expanded_computation_->commands.resize(num_commands); + for (int32 command_index = 0; command_index < num_commands; + command_index++) { + const NnetComputation::Command &c = computation_.commands[command_index]; + NnetComputation::Command &c_out = + expanded_computation_->commands[command_index]; + c_out = c; + // Commands that only operate on submatrices, components and + // precomputed-indexes do not have to be changed because we'll take care of + // the expansion by suitably redefining the matrices and submatrices, and + // recreating the precomputed-indexes. + // However, commands that require, 'indexes', 'indexes_multi' or + // 'indexes_ranges' do need to be modified. + switch (c.command_type) { + case kAllocMatrixUndefined: case kAllocMatrixZeroed: + case kDeallocMatrix: case kAllocMatrixFromOther: + case kAllocMatrixFromOtherZeroed: + case kPropagate: case kStoreStats: case kBackprop: + case kBackpropNoModelUpdate: case kMatrixCopy: case kMatrixAdd: + break; + case kCopyRows: case kAddRows: + ExpandRowsCommand(c, &c_out); + break; + case kCopyRowsMulti: case kAddRowsMulti: + case kCopyToRowsMulti: case kAddToRowsMulti: + ExpandRowsMultiCommand(c, &c_out); + break; + case kAddRowRanges: + ExpandRowRangesCommand(c, &c_out); + break; + case kAcceptInput: case kProvideOutput: case kNoOperation: + case kNoOperationMarker: case kNoOperationLabel: case kGotoLabel: + break; + default: + KALDI_ERR << "Un-handled command type"; + } + } +} + + + + +void ComputationExpander::InitFastInfo() { + // note: the zeroth matrix is not a real matrix, it's the empty matrix. + int32 num_matrices = computation_.matrices.size(); + n_fast_.resize(num_matrices); + + // the input computation to class ComputationExpander is required to + // have its debug info set up. + KALDI_ASSERT(!computation_.matrix_debug_info.empty()); + for (int32 m = 1; m < num_matrices; m++) { + int32 num_rows = computation_.matrices[m].num_rows; + // num-rows should be a multiple of 2 because we assume the input computation + // was built for 2 n-values, and has a symmetry where it's doing the same + // computation for each n values. + KALDI_ASSERT(num_rows % 2 == 0); + const NnetComputation::MatrixDebugInfo &debug_info = computation_.matrix_debug_info[m]; + KALDI_ASSERT(debug_info.cindexes.size() == num_rows); + // We require that the 'n' values be in order, which implies that the first + // 'n' value be zero. + KALDI_ASSERT(debug_info.cindexes[0].second.n == 0); + bool is_fast = (debug_info.cindexes[1].second.n == 1); + + bool do_check = (RandInt(0, 2) == 0); + if (do_check) { + // n_stride is the expected difference in row-index between successive + // values of 'n' for otherwise identical cindexes. + int32 n_stride = (is_fast ? 1 : num_rows / 2); + // 'increment' would be 1 if we were checking everything; we do a partial + // check, for speed. + int32 increment = RandInt(1, 10); + for (int32 i = 0; i + n_stride < num_rows; i += increment) { + const Cindex &this_cindex = debug_info.cindexes[i], + &next_cindex = debug_info.cindexes[i + n_stride]; + if (this_cindex.second.n == 0) { + if (!(next_cindex.first == this_cindex.first && + next_cindex.second.n == 1 && + next_cindex.second.t == this_cindex.second.t && + next_cindex.second.x == this_cindex.second.x)) { + KALDI_ERR << "Problem encountered in 'shortcut' compilation: the computation " + << "does not have the expected structure. Try compiling with " + << "--use-shortcut=false."; + } + } + } + } + } +} + + +bool ComputationExpander::GetFastInfo(const std::vector &indexes) const { + KALDI_ASSERT(!indexes.empty()); + int32 num_rows = indexes.size(); + // num-rows should be a multiple of 2 because we assume the input computation + // was built for 2 n-values, and has a symmetry where it's doing the same + // computation for each n values. + KALDI_ASSERT(num_rows % 2 == 0); + + KALDI_ASSERT(indexes[0].n == 0); + bool is_fast = (indexes[1].n == 1); + bool do_check = (RandInt(0, 1) == 0); + + if (do_check) { + // n_stride is the expected difference in row-index between successive + // values of 'n' for otherwise identical cindexes. + int32 n_stride = (is_fast ? 1 : num_rows / 2); + // 'increment' would be 1 if we were checking everything; we do a partial + // check, for speed. + int32 increment = RandInt(1, 5); + for (int32 i = 0; i + n_stride < num_rows; i += increment) { + const Index &this_index = indexes[i], &next_index = indexes[i + n_stride]; + if (this_index.n == 0) { + if (!(next_index.n == 1 && next_index.t == this_index.t && + next_index.x == this_index.x)) { + KALDI_ERR << "Problem encountered in 'shortcut' compilation: the computation " + << "does not have the expected structure. Try compiling with " + << "--use-shortcut=false."; + } + } + } + } + return is_fast; +} + + +void ComputationExpander::Expand() { + InitFastInfo(); + ComputeMatrixInfo(); + if (need_debug_info_) + ComputeDebugInfo(); + else + expanded_computation_->matrix_debug_info.clear(); + ComputeSubmatrixInfo(); + ComputePrecomputedIndexes(); + ComputeCommands(); + + expanded_computation_->need_model_derivative = + computation_.need_model_derivative; +} + +void ComputationExpander::ComputeMatrixInfo() { + int32 num_matrices = computation_.matrices.size(); + expanded_computation_->matrices.resize(num_matrices); + // Matrix zero is a special case; it's the empty matrix. + expanded_computation_->matrices[0] = computation_.matrices[0]; + for (int32 m = 1; m < num_matrices; m++) { + expanded_computation_->matrices[m] = computation_.matrices[m]; + expanded_computation_->matrices[m].num_rows = + (computation_.matrices[m].num_rows / 2) * num_n_values_; + } +} + +void ComputationExpander::ComputeDebugInfo() { + int32 num_matrices = computation_.matrices.size(); + KALDI_ASSERT(computation_.matrix_debug_info.size() == num_matrices); + expanded_computation_->matrix_debug_info.resize(num_matrices); + // Matrix zero is a special case; it's the empty matrix. + expanded_computation_->matrix_debug_info[0] = + computation_.matrix_debug_info[0]; + int32 num_n_values = num_n_values_; + for (int32 m = 1; m < num_matrices; m++) { + const NnetComputation::MatrixDebugInfo &info_in = + computation_.matrix_debug_info[m]; + NnetComputation::MatrixDebugInfo &info_out = + expanded_computation_->matrix_debug_info[m]; + info_out.is_deriv = info_in.is_deriv; + int32 num_rows_in = computation_.matrices[m].num_rows, + num_rows_out = expanded_computation_->matrices[m].num_rows; + KALDI_ASSERT(num_rows_in == info_in.cindexes.size()); + info_out.cindexes.resize(num_rows_out); + const Cindex *cindexes_in = &(info_in.cindexes[0]); + Cindex *cindexes_out = &(info_out.cindexes[0]); + for (int32 r = 0; r < num_rows_in; r++) { + if (info_in.cindexes[r].second.n == 0) { + int32 new_r, new_n_stride; + GetNewMatrixLocationInfo(m, r, &new_r, &new_n_stride); + for (int32 n = 0; n < num_n_values; n++) { + int32 r_out = new_r + n * new_n_stride; + cindexes_out[r_out] = cindexes_in[r]; + cindexes_out[r_out].second.n = n; + } + } + } + } +} + +void ComputationExpander::ComputeSubmatrixInfo() { + int32 num_submatrices = computation_.submatrices.size(); + expanded_computation_->submatrices.resize(num_submatrices); + // Sub-matrix zero is a special case; it's the empty submatrix. + expanded_computation_->submatrices[0] = computation_.submatrices[0]; + for (int32 s = 1; s < num_submatrices; s++) { + const NnetComputation::SubMatrixInfo &info_in = computation_.submatrices[s]; + int32 m = info_in.matrix_index; + const NnetComputation::MatrixDebugInfo &debug_info_in = + computation_.matrix_debug_info[m]; + + + int32 old_n_stride = + (n_fast_[m] ? 1 : computation_.matrices[m].num_rows / 2); + + // we may need to change the row_offset and num_rows. + int32 first_row_in = info_in.row_offset, + last_row_in = first_row_in + info_in.num_rows - 1, + last_row_in_n0 = last_row_in - old_n_stride; + KALDI_ASSERT(debug_info_in.cindexes[first_row_in].second.n == 0 && + debug_info_in.cindexes[last_row_in].second.n == 1 && + debug_info_in.cindexes[last_row_in_n0].second.n == 0); + // the function GetNewMatrixLocationInfo() only works for rows that + // correspond to n == 0, so we work out a location that's otherwise similar + // to the last row but has n == 0, get the 'new' location for that, and + // convert to n == (num_n_values_ - 1). + int32 first_row_out, last_row_out_n0, new_n_stride; + GetNewMatrixLocationInfo(m, first_row_in, + &first_row_out, &new_n_stride); + GetNewMatrixLocationInfo(m, last_row_in_n0, + &last_row_out_n0, &new_n_stride); + int32 last_row_out = last_row_out_n0 + (new_n_stride * (num_n_values_ - 1)), + new_num_rows = (last_row_out + 1 - first_row_out); + KALDI_ASSERT(new_num_rows >= info_in.num_rows); + + NnetComputation::SubMatrixInfo &info_out = + expanded_computation_->submatrices[s]; + info_out.matrix_index = m; + info_out.row_offset = first_row_out; + info_out.num_rows = new_num_rows; + info_out.col_offset = info_in.col_offset; + info_out.num_cols = info_in.num_cols; + } +} + +void ComputationExpander::ComputePrecomputedIndexes() { + // for each element of 'component_precomputed_indexes', + // we will try to work out the command-index of the associated + // Propagate() command and of the associated Backprop() command, + // if it exists. + // We expect that each such element will be associated with + // exactly one Propagate() command and at most one Backprop() command. + int32 num_commands = computation_.commands.size(), + num_precomputed_indexes = computation_.component_precomputed_indexes.size(); + + if (num_precomputed_indexes == 1) + return; // Nothing to compute. Note: element zero of + // component_precomputed_indexes is reserved for NULL. + + std::vector need_backprop(num_precomputed_indexes, false); + + std::vector component_index(num_precomputed_indexes, -1); + + for (int32 command_index = 0; command_index < num_commands; command_index++) { + const NnetComputation::Command &c = computation_.commands[command_index]; + + if (c.command_type == kPropagate && c.arg2 > 0) { + KALDI_ASSERT(c.arg2 < num_precomputed_indexes); + component_index[c.arg2] = c.arg1; + } + if ((c.command_type == kBackprop || + c.command_type == kBackpropNoModelUpdate) && c.arg2 > 0) { + KALDI_ASSERT(c.arg2 < num_precomputed_indexes); + need_backprop[c.arg2] = true; + } + } + + for (size_t p = 1; + p < expanded_computation_->component_precomputed_indexes.size(); + ++p) + delete expanded_computation_->component_precomputed_indexes[p].data; + expanded_computation_->component_precomputed_indexes.clear(); + expanded_computation_->component_precomputed_indexes.resize( + num_precomputed_indexes); + + for (int32 p = 1; p < num_precomputed_indexes; ++p) { + const NnetComputation::PrecomputedIndexesInfo &old_info = + computation_.component_precomputed_indexes[p]; + NnetComputation::PrecomputedIndexesInfo &new_info = + expanded_computation_->component_precomputed_indexes[p]; + KALDI_ASSERT(!old_info.input_indexes.empty() && + !old_info.output_indexes.empty() && + "Input/output indexes not present in precomputed info of " + "computation to be expanded."); + // note: we could place these expanded indexes into 'new_info.input_indexes' + // and 'new_info.output_indexes', but we actually don't need to keep them + // there, because they are only required to be kept in computations where + // the n indexes consist of the set (0, 1), and the computation we're + // creating has more distinct n indexes than that. + std::vector input_indexes, output_indexes; + ExpandIndexes(old_info.input_indexes, &new_info.input_indexes); + ExpandIndexes(old_info.output_indexes, &new_info.output_indexes); + KALDI_ASSERT(component_index[p] >= 0); + const Component *component = nnet_.GetComponent(component_index[p]); + ComponentPrecomputedIndexes *expanded_precomputed_indexes = + component->PrecomputeIndexes(misc_info_, input_indexes, + output_indexes, need_backprop[p]); + // this object should not be null because it was not NULL the + // last time we generated it from the same component, for the + // same computation. + KALDI_ASSERT(expanded_precomputed_indexes != NULL); + new_info.data = expanded_precomputed_indexes; + } +} + + +bool ComputationExpander::GetNewSubmatLocationInfo( + int32 old_submat_index, int32 old_row_index, + int32 *new_row_index, int32 *new_n_stride) const { + int32 matrix_index = computation_.submatrices[old_submat_index].matrix_index, + row_offset = computation_.submatrices[old_submat_index].row_offset; + + const NnetComputation::MatrixDebugInfo &debug_info_in = + computation_.matrix_debug_info[matrix_index]; + if (debug_info_in.cindexes[old_row_index + row_offset].second.n != 0) + return false; + GetNewMatrixLocationInfo(matrix_index, old_row_index + row_offset, + new_row_index, new_n_stride); + *new_row_index -= row_offset; + return true; +} + +void ComputationExpander::GetNewMatrixLocationInfo( + int32 old_matrix_index, int32 old_row_index, + int32 *new_row_index, int32 *new_n_stride) const { + bool n_is_fast = n_fast_[old_matrix_index]; + int32 num_rows = computation_.matrices[old_matrix_index].num_rows; + int32 n_stride; + if (n_is_fast) { + n_stride = 1; + // If the n index varies fast for this matrix, then the old row-index + // should be a multiple of 2 because: + // - we assume that the input computation was built for 2 n-values + // - if n varies fast then the cindexes for this matrix in the input + // computation would come in pairs, for n=(0,1) + // - the cindex that 'old_row_index' represents must be for n=0 + // (this is a requirement of this function) + KALDI_ASSERT(old_row_index % 2 == 0); + *new_n_stride = 1; + // the row-index of the element in question with n=0 will get larger if n + // varies 'fast', because each block of elements with a certain (x,t) value + // grows in size by a factor of num_n_values_ / 2.0. + *new_row_index = (old_row_index / 2) * num_n_values_; + } else { + // n varies more slowly, the cindexes are in blocks where the + // first block has n=0, the second has n=1, and so on. + // Because we assume that the cindex that lives in this location + // has n == 0, its position does not change (so new_row_index == + // old_row_index). + *new_row_index = old_row_index; + *new_n_stride = (num_rows / 2); + } +} + + +void ComputationExpander::ExpandIndexes( + const std::vector &indexes, + std::vector *indexes_expanded) const { + bool is_fast = GetFastInfo(indexes); + int32 num_n_values = num_n_values_, + old_size = indexes.size(), + new_size = (old_size / 2) * num_n_values; + indexes_expanded->resize(new_size); + Index *indexes_expanded_ptr = &((*indexes_expanded)[0]); + for (int32 i = 0; i < old_size; i++) { + if (indexes[i].n == 0) { + int32 new_i_n0, new_n_stride; + int32 t = indexes[i].t, x = indexes[i].x; + GetNewLocationInfo(indexes, is_fast, i, &new_i_n0, &new_n_stride); + for (int32 n = 0; n < num_n_values; n++) { + int32 new_i = new_i_n0 + (n * new_n_stride); + KALDI_ASSERT(new_i < new_size); + indexes_expanded_ptr[new_i].n = n; + indexes_expanded_ptr[new_i].t = t; + indexes_expanded_ptr[new_i].x = x; + } + } + } +} + + +void ComputationExpander::GetNewLocationInfo( + const std::vector &indexes, bool is_fast, + int32 old_index, int32 *new_index, int32 *new_n_stride) const { + int32 num_indexes = indexes.size(); + KALDI_ASSERT(num_indexes > 0 && num_indexes % 2 == 0 && + indexes.front().n == 0 && indexes.back().n == 1); + int32 n_stride; + if (is_fast) { + n_stride = 1; + // If the n index varies fast for this matrix, then the old row-index + // should be a multiple of 2 because: + // - we assume that the input computation was built for 2 n-values + // - if n varies fast then the cindexes for this matrix in the input + // computation would come in pairs, for n=(0,1) + // - the cindex that 'old_row_index' represents must be for n=0 + // (this is a requirement of this function) + KALDI_ASSERT(old_index % 2 == 0); + *new_n_stride = 1; + // the row-index of the element in question with n=0 will get larger if n + // varies 'fast', because each block of elements with a certain (x,t) value + // grows in size by a factor of num_n_values_ / 2.0. + *new_index = (old_index / 2) * num_n_values_; + } else { + // n varies more slowly; the Indexes are in blocks where the + // first block has n=0, the second has n=1, and so on. + // Because we assume that the cindex that lives in this location + // has n == 0, its position does not change (so new_row_index == + // old_row_index). + *new_index = old_index; + *new_n_stride = (num_indexes / 2); + } +} + class ComputationLoopedOptimizer { public: ComputationLoopedOptimizer(const Nnet &nnet, diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index 1b3ebbb44be..9977ca8952a 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -388,6 +388,12 @@ bool ComputationIsDecomposable(const ComputationRequest &request, This function is used in 'shortcut' compilation to expand a computation that has been compiled for exactly 2 'n' values, to one that is suitable for some num_n_values > 2. + @param [in] nnet The neural network for which this computation + is being built. + @param [in] misc_info The same MiscComputationInfo object that was + present in the ComputationRequests that were + originally used to generate the computation + (required to generated the PrecomputedIndexes) @param [in] computation The computation that was compiled for exactly 2 'n' values (n=0 and n=1) @param [in] need_debug_info True if we want to retain the 'debug_info' @@ -398,13 +404,10 @@ bool ComputationIsDecomposable(const ComputationRequest &request, computation @param [out] expanded_computation The expanded computation. - @return This function returns true if it succeeded, and false if it - could not expand the computation for some reason (e.g. there - was some non-simple component where the 'PrecomputedIndexes' - object could not be suitably expanded. If it returns false, - the output 'expanded_computation' is undefined (may contain junk). */ -bool ExpandComputation(const NnetComputation &computation, +void ExpandComputation(const Nnet &nnet, + const MiscComputationInfo &misc_info, + const NnetComputation &computation, bool need_debug_info, int32 num_n_values, NnetComputation *expanded_computation); @@ -413,12 +416,28 @@ bool ExpandComputation(const NnetComputation &computation, /// This function detects cases where commands of type kCopyRows, kAddRows or /// kAddToRows can be converted to commands of type kMatrixCopy or kMatrixAdd, -/// and converts them (this may involve adding submatrices). After doing this -/// you should at some point do RenumberComputation(), which will remove any -/// now-unused members of computation->indexes. -/// This function returns true if it made any changes to the computation. +/// and converts them (this may involve adding submatrices). +/// +/// This function returns true if it made any changes to the computation; if it +/// returns true, then after doing this you should at some point do +/// RenumberComputation(), which will remove any now-unused members of +/// computation->indexes. bool ReplaceRowWithMatrixOps(NnetComputation *computation); +/// This function detects cases where commands of type kCopyRows, kAddRows, +/// kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti, kCopyToRowsMulti or +/// kAddRowRanges use indexes that start or end with -1's or equivalents, +/// and replace them with similar commands that act on a sub-matrix of the +/// matrices they are currently acting on. This will help efficiency by +/// avoiding launching unnecessary copies of the kernel (that don't really +/// have to do anything). +/// +/// This function returns true if it made any changes to the computation; if it +/// returns true, then after doing this you should at some point do +/// RenumberComputation(), which will remove any now-unused members of +/// computation->indexes. +bool SnipRowOps(NnetComputation *computation); + /// This function detects submatrices and matrices that are never used (e.g. due /// to changes made in other optimization code), and members of indexes, /// indexes_multi and indexes_ranges that are unused or are duplicates, and diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index e8b3bed18d4..c0c03a13ab5 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -36,8 +36,8 @@ void NnetOptimizeOptions::Read(std::istream &is, bool binary) { ReadBasicType(is, binary, &backprop_in_place); std::string tok; ReadToken(is, binary, &tok); - if (tok == "") { - ReadBasicType(is, binary, &replace_row_with_matrix_ops); + if (tok == "") { + ReadBasicType(is, binary, &optimize_row_ops); ReadToken(is, binary, &tok); } KALDI_ASSERT(tok == ""); @@ -78,8 +78,8 @@ void NnetOptimizeOptions::Write(std::ostream &os, bool binary) const { WriteBasicType(os, binary, propagate_in_place); WriteToken(os, binary, ""); WriteBasicType(os, binary, backprop_in_place); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, replace_row_with_matrix_ops); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, optimize_row_ops); WriteToken(os, binary, ""); WriteBasicType(os, binary, convert_addition); WriteToken(os, binary, ""); @@ -469,7 +469,7 @@ void Optimize(const NnetOptimizeOptions &config, CheckComputation(nnet, *computation, false); } - if (config.optimize && config.replace_row_with_matrix_ops) { + if (config.optimize && config.optimize_row_ops) { if (ReplaceRowWithMatrixOps(computation)) { // if anything was changed... diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index 86c6427396a..0df50b329a9 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -37,7 +37,7 @@ struct NnetOptimizeOptions { bool consolidate_model_update; bool propagate_in_place; bool backprop_in_place; - bool replace_row_with_matrix_ops; + bool optimize_row_ops; bool convert_addition; bool remove_assignments; bool allow_left_merge; @@ -58,7 +58,7 @@ struct NnetOptimizeOptions { consolidate_model_update(true), propagate_in_place(true), backprop_in_place(true), - replace_row_with_matrix_ops(true), + optimize_row_ops(true), convert_addition(true), remove_assignments(true), allow_left_merge(true), @@ -82,6 +82,9 @@ struct NnetOptimizeOptions { "disable optimization that allows in-place propagation"); opts->Register("backprop-in-place", &backprop_in_place, "Set to false to " "disable optimization that allows in-place backprop"); + opts->Register("optimize-row-ops", &optimize_row_ops, "Set to false to " + "disable certain optimizations that act on operations of " + "type *Row*."); opts->Register("convert-addition", &convert_addition, "Set to false to " "disable the optimization that converts Add commands into " "Copy commands wherever possible."); @@ -182,8 +185,6 @@ struct CachingOptimizingCompilerOptions { int32 write_cache; int32 cache_capacity; - - CachingOptimizingCompilerOptions(): use_shortcut(true), cache_capacity(64) { } From 851e98a993d8232b99c77efc7340ae65c616d29d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 12 Dec 2016 23:35:14 -0500 Subject: [PATCH 102/530] Bug fix in xconfig/basic_layers.py --- egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py index 24eea922968..c612af984b1 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py @@ -349,7 +349,8 @@ def set_default_configs(self): # note: self.config['input'] is a descriptor, '[-1]' means output # the most recent layer. - self.config = { 'input':'[-1]' } + self.config = {'input': '[-1]', + 'dim': -1} def check_configs(self): From 693bd140ebb2f8203a7635518dbb4c0037e78c5a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 12 Dec 2016 23:36:24 -0500 Subject: [PATCH 103/530] asr_diarization: Addind stats_layer to xconfigs --- egs/wsj/s5/steps/libs/nnet3/xconfig/layers.py | 1 + egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py | 3 +- .../steps/libs/nnet3/xconfig/stats_layer.py | 142 ++++++++++++++++++ 3 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 egs/wsj/s5/steps/libs/nnet3/xconfig/stats_layer.py diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/layers.py index 353b9d3bba4..1092be572b4 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/layers.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/layers.py @@ -6,3 +6,4 @@ from basic_layers import * from lstm import * from tdnn import * +from stats_layer import * diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py index 7ccab2f6c6f..7b34481993b 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py @@ -29,7 +29,8 @@ 'lstmp-layer' : xlayers.XconfigLstmpLayer, 'lstmpc-layer' : xlayers.XconfigLstmpcLayer, 'fast-lstm-layer' : xlayers.XconfigFastLstmLayer, - 'fast-lstmp-layer' : xlayers.XconfigFastLstmpLayer + 'fast-lstmp-layer' : xlayers.XconfigFastLstmpLayer, + 'stats-layer': xlayers.XconfigStatsLayer } # Converts a line as parsed by ParseConfigLine() into a first diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/stats_layer.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/stats_layer.py new file mode 100644 index 00000000000..beaf7c8923a --- /dev/null +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/stats_layer.py @@ -0,0 +1,142 @@ +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +""" This module contains the statistics extraction and pooling layer. +""" + +from __future__ import print_function +import re +from libs.nnet3.xconfig.utils import XconfigParserError as xparser_error +from libs.nnet3.xconfig.basic_layers import XconfigLayerBase + + +class XconfigStatsLayer(XconfigLayerBase): + """This class is for parsing lines like + stats-layer name=tdnn1-stats config=mean+stddev(-99:3:9:99) input=tdnn1 + + This adds statistics-pooling and statistics-extraction components. An + example string is 'mean(-99:3:9::99)', which means, compute the mean of + data within a window of -99 to +99, with distinct means computed every 9 + frames (we round to get the appropriate one), and with the input extracted + on multiples of 3 frames (so this will force the input to this layer to be + evaluated every 3 frames). Another example string is + 'mean+stddev(-99:3:9:99)', which will also cause the standard deviation to + be computed. + + The dimension is worked out from the input. mean and stddev add a + dimension of input_dim each to the output dimension. If counts is + specified, an additional dimension is added to the output to store log + counts. + + Parameters of the class, and their defaults: + input='[-1]' [Descriptor giving the input of the layer.] + dim=-1 [Output dimension of layer. If provided, must match the + dimension computed from input] + config='' [Required. Defines what stats must be computed.] + """ + def __init__(self, first_token, key_to_value, prev_names=None): + assert first_token in ['stats-layer'] + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) + + def set_default_configs(self): + self.config = {'input': '[-1]', + 'dim': -1, + 'config': ''} + + def set_derived_configs(self): + config_string = self.config['config'] + if config_string == '': + raise xparser_error("config has to be non-empty", + self.str()) + m = re.search("(mean|mean\+stddev|mean\+count|mean\+stddev\+count)" + "\((-?\d+):(-?\d+):(-?\d+):(-?\d+)\)", + config_string) + if m is None: + raise xparser_error("Invalid statistic-config string: {0}".format( + config_string), self) + + self._output_stddev = (m.group(1) in ['mean+stddev', + 'mean+stddev+count']) + self._output_log_counts = (m.group(1) in ['mean+count', + 'mean+stddev+count']) + self._left_context = -int(m.group(2)) + self._input_period = int(m.group(3)) + self._stats_period = int(m.group(4)) + self._right_context = int(m.group(5)) + + output_dim = (self.descriptors['input']['dim'] + * (2 if self._output_stddev else 1) + + 1 if self._output_log_counts else 0) + + if self.config['dim'] > 0 and self.config['dim'] != output_dim: + raise xparser_error( + "Invalid dim supplied {0:d} != " + "actual output dim {1:d}".format( + self.config['dim'], output_dim)) + self.config['dim'] = output_dim + + def check_configs(self): + if not (self._left_context > 0 and self._right_context > 0 + and self._input_period > 0 and self._stats_period > 0 + and self._left_context % self._stats_period == 0 + and self._right_context % self._stats_period == 0 + and self._stats_period % self._input_period == 0): + raise xparser_error( + "Invalid configuration of statistics-extraction: {0}".format( + self.config['config']), self) + super(XconfigStatsLayer, self).check_configs() + + def _generate_config(self): + input_desc = self.descriptors['input']['final-string'] + input_dim = self.descriptors['input']['dim'] + + configs = [] + configs.append( + 'component name={name}-extraction-{lc}-{rc} ' + 'type=StatisticsExtractionComponent input-dim={dim} ' + 'input-period={input_period} output-period={output_period} ' + 'include-variance={var} '.format( + name=self.name, lc=self._left_context, rc=self._right_context, + dim=input_dim, input_period=self._input_period, + output_period=self._stats_period, + var='true' if self._output_stddev else 'false')) + configs.append( + 'component-node name={name}-extraction-{lc}-{rc} ' + 'component={name}-extraction-{lc}-{rc} input={input} '.format( + name=self.name, lc=self._left_context, rc=self._right_context, + input=input_desc)) + + stats_dim = 1 + input_dim * (2 if self._output_stddev else 1) + configs.append( + 'component name={name}-pooling-{lc}-{rc} ' + 'type=StatisticsPoolingComponent input-dim={dim} ' + 'input-period={input_period} left-context={lc} right-context={rc} ' + 'num-log-count-features={count} output-stddevs={var} '.format( + name=self.name, lc=self._left_context, rc=self._right_context, + dim=stats_dim, input_period=self._stats_period, + count=1 if self._output_log_counts else 0, + var='true' if self._output_stddev else 'false')) + configs.append( + 'component-node name={name}-pooling-{lc}-{rc} ' + 'component={name}-pooling-{lc}-{rc} ' + 'input={name}-extraction-{lc}-{rc} '.format( + name=self.name, lc=self._left_context, rc=self._right_context)) + return configs + + def output_name(self, auxiliary_output=None): + return 'Round({name}-pooling-{lc}-{rc}, {period})'.format( + name=self.name, lc=self._left_context, + rc=self._right_context, period=self._stats_period) + + def output_dim(self, auxiliary_outputs=None): + return self.config['dim'] + + def get_full_config(self): + ans = [] + config_lines = self._generate_config() + + for line in config_lines: + for config_name in ['ref', 'final']: + ans.append((config_name, line)) + + return ans From ed938f63cdd5dc19dba257de765e4773c279fb42 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 12 Dec 2016 23:36:48 -0500 Subject: [PATCH 104/530] asr_diarization: Making xconfigs support more general networks --- egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index c55dae18b19..5edd3303942 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -25,6 +25,9 @@ def get_args(): help='Filename of input xconfig file') parser.add_argument('--config-dir', required=True, help='Directory to write config files and variables') + parser.add_argument('--nnet-edits', type=str, default=None, + action=common_lib.NullstrToNoneAction, + help="Edit network before getting nnet3-info") print(' '.join(sys.argv)) @@ -187,13 +190,19 @@ def write_config_files(config_dir, all_layers): raise -def add_back_compatibility_info(config_dir): +def add_back_compatibility_info(config_dir, nnet_edits=None): """This will be removed when python script refactoring is done.""" common_lib.run_kaldi_command("nnet3-init {0}/ref.config " "{0}/ref.raw".format(config_dir)) - out, err = common_lib.run_kaldi_command("nnet3-info {0}/ref.raw | " - "head -4".format(config_dir)) + model = "{0}/ref.raw".format(config_dir) + if nnet_edits is not None: + model = """nnet3-copy --edits='{0}' {1} - |""".format(nnet_edits, + model) + + print("""nnet3-info "{0}" | head -4""".format(model), file=sys.stderr) + out, err = common_lib.run_kaldi_command("""nnet3-info "{0}" | """ + """head -4""".format(model)) # out looks like this # left-context: 7 # right-context: 0 @@ -226,7 +235,7 @@ def main(): all_layers = xparser.read_xconfig_file(args.xconfig_file) write_expanded_xconfig_files(args.config_dir, all_layers) write_config_files(args.config_dir, all_layers) - add_back_compatibility_info(args.config_dir) + add_back_compatibility_info(args.config_dir, args.nnet_edits) if __name__ == '__main__': From c6ade8c84b7f05c6ac9d6ce53b379c87722d633b Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 12 Dec 2016 23:37:41 -0500 Subject: [PATCH 105/530] asr_diarization: Update do_corruption_data_dir.sh with better default valuesD --- .../segmentation/do_corruption_data_dir.sh | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh index 36bf4c93306..1bfa08370e7 100755 --- a/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh @@ -27,8 +27,8 @@ reco_nj=40 cmd=queue.pl # Options for feature extraction -mfcc_config=conf/mfcc_hires_bp_vh.conf -feat_suffix=hires_bp_vh +mfcc_config=conf/mfcc_hires_bp.conf +feat_suffix=hires_bp reco_vad_dir= # Output of prepare_unsad_data.sh. # If provided, the speech labels and deriv weights will be @@ -105,19 +105,15 @@ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then fi if [ $stage -le 4 ]; then - if [ ! -z $feat_suffix ]; then - utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix - corrupted_data_dir=${corrupted_data_dir}_$feat_suffix - fi + utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix steps/make_mfcc.sh --mfcc-config $mfcc_config \ --cmd "$cmd" --nj $reco_nj \ - $corrupted_data_dir exp/make_${mfccdir}/${corrupted_data_id} $mfccdir + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir steps/compute_cmvn_stats.sh --fake \ - $corrupted_data_dir exp/make_${mfccdir}/${corrupted_data_id} $mfccdir + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir else - if [ ! -z $feat_suffix ]; then - corrupted_data_dir=${corrupted_data_dir}_$feat_suffix - fi + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix fi if [ $stage -le 8 ]; then From d639b319e08b63049e91eb7e0f898a49941d7d68 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 12 Dec 2016 23:37:59 -0500 Subject: [PATCH 106/530] asr_diarization: Update do_corruption_data_dir_overlapped_speech.sh with better default values --- ...o_corruption_data_dir_overlapped_speech.sh | 60 ++++++++++--------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh index 242dfca8170..75dbce578b2 100644 --- a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#! /bin/bash # Copyright 2016 Vimal Manohar # Apache 2.0 @@ -20,9 +20,10 @@ num_data_reps=5 # Number of corrupted versions snrs="20:10:15:5:0:-5" foreground_snrs="20:10:15:5:0:-5" background_snrs="20:10:15:5:0:-5" -base_rirs=simulated +overlap_snrs="5:2:1:0:-1:-2" # Whole-data directory corresponding to data_dir whole_data_dir=data/train_si284_whole +overlap_labels_dir=overlap_labels # Parallel options reco_nj=40 @@ -30,21 +31,15 @@ nj=40 cmd=queue.pl # Options for feature extraction -mfcc_config=conf/mfcc_hires_bp_vh.conf +mfcc_config=conf/mfcc_hires_bp.conf +feat_suffix=hires_bp energy_config=conf/log_energy.conf -dry_run=false -corrupt_only=false -speed_perturb=true - -reco_vad_dir= +reco_vad_dir= # Output of prepare_unsad_data.sh. + # If provided, the speech labels and deriv weights will be + # copied into the output data directory. utt_vad_dir= -max_jobs_run=20 - -overlap_snrs="5:2:1:0:-1:-2" -base_rirs=simulated - . utils/parse_options.sh if [ $# -ne 0 ]; then @@ -64,7 +59,7 @@ corrupted_data_id=${whole_data_id}_ovlp_corrupted clean_data_id=${whole_data_id}_ovlp_clean noise_data_id=${whole_data_id}_ovlp_noise -if [ $stage -le 2 ]; then +if [ $stage -le 1 ]; then python steps/data/make_corrupted_data_dir.py \ "${rvb_opts[@]}" \ --prefix="ovlp" \ @@ -89,7 +84,7 @@ noise_data_dir=data/${noise_data_id} orig_corrupted_data_dir=$corrupted_data_dir if $speed_perturb; then - if [ $stage -le 3 ]; then + if [ $stage -le 2 ]; then ## Assuming whole data directories for x in $clean_data_dir $corrupted_data_dir $noise_data_dir; do cp $x/reco2dur $x/utt2dur @@ -105,8 +100,8 @@ if $speed_perturb; then clean_data_id=${clean_data_id}_sp noise_data_id=${noise_data_id}_sp - if [ $stage -le 4 ]; then - utils/data/perturb_data_dir_volume.sh --force true ${corrupted_data_dir} + if [ $stage -le 3 ]; then + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 --force true ${corrupted_data_dir} utils/data/perturb_data_dir_volume.sh --force true --reco2vol ${corrupted_data_dir}/reco2vol ${clean_data_dir} utils/data/perturb_data_dir_volume.sh --force true --reco2vol ${corrupted_data_dir}/reco2vol ${noise_data_dir} fi @@ -125,19 +120,21 @@ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage fi -if [ $stage -le 5 ]; then +if [ $stage -le 4 ]; then + utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix steps/make_mfcc.sh --mfcc-config $mfcc_config \ --cmd "$train_cmd" --nj $reco_nj \ - $corrupted_data_dir exp/make_hires_bp/${corrupted_data_id} $mfccdir + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir fi -if [ $stage -le 6 ]; then +if [ $stage -le 5 ]; then steps/make_mfcc.sh --mfcc-config $energy_config \ --cmd "$train_cmd" --nj $reco_nj \ $clean_data_dir exp/make_log_energy/${clean_data_id} log_energy_feats fi -if [ $stage -le 7 ]; then +if [ $stage -le 6 ]; then steps/make_mfcc.sh --mfcc-config $energy_config \ --cmd "$train_cmd" --nj $reco_nj \ $noise_data_dir exp/make_log_energy/${noise_data_id} log_energy_feats @@ -164,6 +161,11 @@ if [ $stage -le 8 ]; then exp/make_irm_targets/${corrupted_data_id} $targets_dir fi +# Combine the VAD from the base recording and the VAD from the overlapping segments +# to create per-frame labels of the number of overlapping speech segments +# Unreliable segments are regions where no VAD labels were available for the +# overlapping segments. These can be later removed by setting deriv weights to 0. + # Data dirs without speed perturbation overlap_dir=exp/make_overlap_labels/${corrupted_data_id} unreliable_dir=exp/make_overlap_labels/unreliable_${corrupted_data_id} @@ -179,10 +181,6 @@ if [ $stage -le 8 ]; then utils/data/get_utt2num_frames.sh $corrupted_data_dir utils/split_data.sh --per-reco ${orig_corrupted_data_dir} $reco_nj - # Combine the VAD from the base recording and the VAD from the overlapping segments - # to create per-frame labels of the number of overlapping speech segments - # Unreliable segments are regions where no VAD labels were available for the - # overlapping segments. These can be later removed by setting deriv weights to 0. $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_overlap_seg.JOB.log \ segmentation-init-from-overlap-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ "scp:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${reco_nj}reco/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" \ @@ -200,6 +198,7 @@ if [ $stage -le 9 ]; then cp $orig_corrupted_data_dir/wav.scp $unreliable_data_dir # Create segments where there is definitely an overlap. + # Assume no more than 10 speakers overlap. $train_cmd JOB=1:$reco_nj $overlap_dir/log/process_to_segments.JOB.log \ segmentation-post-process --remove-labels=0:1 \ ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark ark:- \| \ @@ -230,6 +229,9 @@ if $speed_perturb; then unreliable_data_dir=${unreliable_data_dir}_sp fi +# make $overlap_labels_dir an absolute pathname. +overlap_labels_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $overlap_labels_dir ${PWD}` + if [ $stage -le 10 ]; then utils/split_data.sh --per-reco ${overlap_data_dir} $reco_nj @@ -240,11 +242,11 @@ if [ $stage -le 10 ]; then segmentation-combine-segments-to-recordings ark:- ark,t:${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt \ ark:- \| \ segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ - ark,scp:overlap_labels/overlapped_speech_${corrupted_data_id}.JOB.ark,overlap_labels/overlapped_speech_${corrupted_data_id}.JOB.scp + ark,scp:$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.scp fi for n in `seq $reco_nj`; do - cat overlap_labels/overlapped_speech_${corrupted_data_id}.$n.scp + cat $overlap_labels_dir/overlapped_speech_${corrupted_data_id}.$n.scp done > ${corrupted_data_dir}/overlapped_speech_labels.scp if [ $stage -le 11 ]; then @@ -272,10 +274,10 @@ if [ $stage -le 11 ]; then segmentation-post-process --remove-labels=0 ark:- ark:- \| \ segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ - ark,scp:$unreliable_dir/deriv_weights_for_overlapped_speech.JOB.ark,$unreliable_dir/deriv_weights_for_overlapped_speech.JOB.scp + ark,scp:$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.ark,$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.scp for n in `seq $reco_nj`; do - cat $unreliable_dir/deriv_weights_for_overlapped_speech.${n}.scp + cat $overlap_labels_dir/deriv_weights_for_overlapped_speech.${n}.scp done > $corrupted_data_dir/deriv_weights_for_overlapped_speech.scp fi From 8668bcfe7771506cac40775431831b7ff9871ac8 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 12 Dec 2016 23:39:45 -0500 Subject: [PATCH 107/530] asr_diarization: Moving ../egs/aspire/s5/local/segmentation/train_stats_sad_music.sh to tuning --- .../segmentation/train_stats_sad_music.sh | 173 +----------------- .../tuning/train_stats_sad_music_1a.sh | 172 +++++++++++++++++ 2 files changed, 173 insertions(+), 172 deletions(-) mode change 100644 => 120000 egs/aspire/s5/local/segmentation/train_stats_sad_music.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1a.sh diff --git a/egs/aspire/s5/local/segmentation/train_stats_sad_music.sh b/egs/aspire/s5/local/segmentation/train_stats_sad_music.sh deleted file mode 100644 index 8242b83c747..00000000000 --- a/egs/aspire/s5/local/segmentation/train_stats_sad_music.sh +++ /dev/null @@ -1,172 +0,0 @@ -#!/bin/bash - -# This is a script to train a time-delay neural network for speech activity detection (SAD) and -# music-id using statistic pooling component for long-context information. - -set -o pipefail -set -e -set -u - -. cmd.sh - -# At this script level we don't support not running on GPU, as it would be painfully slow. -# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, -# --num-threads 16 and --minibatch-size 128. - -stage=0 -train_stage=-10 -get_egs_stage=-10 -egs_opts= # Directly passed to get_egs_multiple_targets.py - -# TDNN options -splice_indexes="-3,-2,-1,0,1,2,3 -6,0,mean+count(-99:3:9:99) -9,0,3 0" -relu_dim=256 -chunk_width=20 # We use chunk training for training TDNN -extra_left_context=100 # Maximum left context in egs apart from TDNN's left context -extra_right_context=20 # Maximum right context in egs apart from TDNN's right context - -# We randomly select an extra {left,right} context for each job between -# min_extra_*_context and extra_*_context so that the network can get used -# to different contexts used to compute statistics. -min_extra_left_context=20 -min_extra_right_context=0 - -# training options -num_epochs=2 -initial_effective_lrate=0.0003 -final_effective_lrate=0.00003 -num_jobs_initial=3 -num_jobs_final=8 -remove_egs=false -max_param_change=0.2 # Small max-param change for small network -extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs - # such as removing one of the targets - -num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. -num_utts_subset_train=50 - -# target options -train_data_dir=data/train_azteec_whole_sp_corrupted_hires - -speech_feat_scp= -music_labels_scp= - -deriv_weights_scp= - -egs_dir= -nj=40 -feat_type=raw -config_dir= - -dir= -affix=a - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -num_hidden_layers=`echo $splice_indexes | perl -ane 'print scalar @F'` || exit 1 -if [ -z "$dir" ]; then - dir=exp/nnet3_stats_sad_music/nnet_tdnn -fi - -dir=$dir${affix:+_$affix}_n${num_hidden_layers} - -if ! cuda-compiled; then - cat < Date: Mon, 12 Dec 2016 23:40:27 -0500 Subject: [PATCH 108/530] asr_diarization: Bug fix in random extra contexts --- .../s5/steps/libs/nnet3/train/frame_level_objf/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index 37dd36aa392..0b0149ece3d 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -66,13 +66,17 @@ def train_new_models(dir, iter, srand, num_jobs, deriv_time_opts.append("--optimization.max-deriv-time={0}".format( max_deriv_time)) - this_random = random.Random(srand) + this_random = random.Random(srand + iter) + if min_left_context is not None: left_context = this_random.randint(min_left_context, left_context) if min_right_context is not None: right_context = this_random.randint(min_right_context, right_context) + logger.info("On iteration %d, left-context=%d and right-context=%s", + iter, left_context, right_context) + context_opts = "--left-context={0} --right-context={1}".format( left_context, right_context) From 0dc172cbf8f15ce65088be936f6827238d56c763 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 12 Dec 2016 23:41:04 -0500 Subject: [PATCH 109/530] asr_diarization: New tuning scrits for music id --- .../tuning/train_stats_sad_music_1b.sh | 191 ++++++++++++++++++ .../tuning/train_stats_sad_music_1c.sh | 185 +++++++++++++++++ 2 files changed, 376 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1b.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1c.sh diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1b.sh new file mode 100644 index 00000000000..685dd846b26 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1b.sh @@ -0,0 +1,191 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_azteec_whole_sp_corrupted_hires + +speech_feat_scp= +music_labels_scp= + +deriv_weights_scp= + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_hidden_layers=`echo $splice_indexes | perl -ane 'print scalar @F'` || exit 1 +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix}_n${num_hidden_layers} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + # This is disabled for now. + # fixed-affine-layer name=lda input=Append(-3,-2,-1,0,1,2,3) affine-transform-file=$dir/configs/lda.mat + # the first splicing is moved before the lda layer, so no splicing here + # relu-renorm-layer name=tdnn1 dim=625 + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=256 + stats-layer name=tdnn2.stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(Offset(tdnn1, -6), tdnn1, tdnn2.stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn4 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn4 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ +fi + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$train_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=20000 \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --dir=$dir/egs + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=20 \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=64 \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-music.JOB.log \ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $music_labels_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-music.vec.JOB + eval vector-sum $dir/post_output-music.vec.{`seq -s, 100`} $dir/post_output-music.vec +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1c.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1c.sh new file mode 100644 index 00000000000..163ea6df14d --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1c.sh @@ -0,0 +1,185 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_azteec_whole_sp_corrupted_hires + +speech_feat_scp= +music_labels_scp= + +deriv_weights_scp= + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-9, tdnn1@-3, tdnn1, tdnn1@3, tdnn2_stats) dim=256 + stats-layer name=tdnn3_stats config=mean+count(-108:9:27:108) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-27, tdnn2@-9, tdnn2, tdnn2@9, tdnn3_stats) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn4 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn4 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$train_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=20000 \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --dir=$dir/egs + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=20 \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=64 \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-music.JOB.log \ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $music_labels_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-music.vec.JOB + eval vector-sum $dir/post_output-music.vec.{`seq -s, 100`} $dir/post_output-music.vec +fi From 869b6694066c1ca7f8f06b99acbbf117284bbd9e Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 12 Dec 2016 23:42:28 -0500 Subject: [PATCH 110/530] asr_diarization: New overlap detection with stats script --- .../tuning/train_stats_sad_overlap_1a.sh | 203 ++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh new file mode 100644 index 00000000000..425f8230418 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh @@ -0,0 +1,203 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=1 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_azteec_whole_sp_corrupted_hires + +snr_scp= +speech_feat_scp= +overlapped_speech_labels_scp= + +deriv_weights_scp= +deriv_weights_for_overlapped_speech_scp= + +train_data_dir=data/train_aztec_small_unsad_whole_all_corrupted_sp_hires_bp +speech_feat_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/speech_feat.scp +deriv_weights_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/deriv_weights.scp + +snr_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/irm_targets.scp +deriv_weights_for_irm_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/deriv_weights_manual_seg.scp + +deriv_weights_for_overlapped_speech_scp= +overlapped_speech_labels_scp= + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective=quadratic + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$train_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=20000 \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$snr_scp --deriv-weights-scp=$deriv_weights_scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$overlapped_speech_labels_scp --deriv-weights-scp=$deriv_weights_for_overlapped_speech_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --dir=$dir/egs + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=20 \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=64 \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-overlapped_speech\ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $overlapped_speech_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-overlapped_speech.vec.JOB + eval vector-sum $dir/post_output-overlapped_speech.vec.{`seq -s, 100`} $dir/post_output-overlapped_speech.vec +fi From 3b6b460bfc4dea6526910c7bd956c3999a522619 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 12 Dec 2016 23:43:50 -0500 Subject: [PATCH 111/530] asr_diarization: remove junk from ami path.sh --- egs/aspire/s5/path.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/egs/aspire/s5/path.sh b/egs/aspire/s5/path.sh index 5c0d3a92f19..7fb6d91c543 100755 --- a/egs/aspire/s5/path.sh +++ b/egs/aspire/s5/path.sh @@ -2,8 +2,5 @@ export KALDI_ROOT=`pwd`/../../.. export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh -export PATH=/home/vmanoha1/kaldi-raw-signal/src/segmenterbin:$PATH -export PATH=$KALDI_ROOT/tools/sph2pipe_v2.5:$PATH export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH -export PYTHONPATH=steps:${PYTHONPATH} export LC_ALL=C From 65c2cb85f509f7debbe89e4f8d03f31c09d535ea Mon Sep 17 00:00:00 2001 From: Tom Ko Date: Tue, 13 Dec 2016 02:07:12 -0500 Subject: [PATCH 112/530] fix typo in mkgraph.sh --- egs/wsj/s5/utils/mkgraph.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/utils/mkgraph.sh b/egs/wsj/s5/utils/mkgraph.sh index c62f0ccb14f..3305d628f83 100755 --- a/egs/wsj/s5/utils/mkgraph.sh +++ b/egs/wsj/s5/utils/mkgraph.sh @@ -21,7 +21,7 @@ loopscale=0.1 remove_oov=false for x in `seq 4`; do - [ "$1" == "--mono" -o "$1" == "left-biphone" -o "$1" == "--quinphone" ] && shift && \ + [ "$1" == "--mono" -o "$1" == "--left-biphone" -o "$1" == "--quinphone" ] && shift && \ echo "WARNING: the --mono, --left-biphone and --quinphone options are now deprecated and ignored." [ "$1" == "--remove-oov" ] && remove_oov=true && shift; [ "$1" == "--transition-scale" ] && tscale=$2 && shift 2; From e49eaac1fbe5ef9116c54672a114005386a3b070 Mon Sep 17 00:00:00 2001 From: Xingyu Na Date: Tue, 13 Dec 2016 03:03:27 -0500 Subject: [PATCH 113/530] Fix a script bug in gale_mandarin --- egs/gale_mandarin/s5/local/gale_prep_dict.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/gale_mandarin/s5/local/gale_prep_dict.sh b/egs/gale_mandarin/s5/local/gale_prep_dict.sh index f1e39fb452e..bf2391d3bd7 100755 --- a/egs/gale_mandarin/s5/local/gale_prep_dict.sh +++ b/egs/gale_mandarin/s5/local/gale_prep_dict.sh @@ -77,7 +77,8 @@ if [ ! -f conf/g2p_model ]; then fi echo "--- Preparing pronunciations for OOV words ..." -if [ ! -x g2p.py ]; then +g2p=`which g2p.py` +if [ ! -x $g2p ]; then echo "g2p.py is not found. Checkout tools/extra/install_sequitur.sh." exit 1 fi From 7cb6d56122d06fb7c6f387ec2c0c63ace118e952 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Tue, 13 Dec 2016 23:51:02 -0500 Subject: [PATCH 114/530] asr_diarization: Adding segmentation-init-from-additive-signals-info --- ...ntation-init-from-additive-signals-info.cc | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 src/segmenterbin/segmentation-init-from-additive-signals-info.cc diff --git a/src/segmenterbin/segmentation-init-from-additive-signals-info.cc b/src/segmenterbin/segmentation-init-from-additive-signals-info.cc new file mode 100644 index 00000000000..139048ac17b --- /dev/null +++ b/src/segmenterbin/segmentation-init-from-additive-signals-info.cc @@ -0,0 +1,164 @@ +// segmenterbin/segmentation-init-from-overlap-info.cc + +// Copyright 2015-16 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Convert overlapping segments information into segmentation\n" + "\n" + "Usage: segmentation-init-from-additive-signals-info [options] " + " \n" + " e.g.: segmentation-init-from-additive-signals-info --additive-signals-segmentation-rspecifier=ark:utt_segmentation.ark " + "ark:reco_segmentation.ark ark,t:overlapped_segments_info.txt ark:-\n"; + + BaseFloat frame_shift = 0.01; + std::string lengths_rspecifier; + std::string additive_signals_segmentation_rspecifier; + std::string unreliable_segmentation_wspecifier; + + ParseOptions po(usage); + + po.Register("frame-shift", &frame_shift, "Frame shift in seconds"); + po.Register("lengths-rspecifier", &lengths_rspecifier, + "Archive of lengths for recordings; if provided, will be " + "used to truncate the output segmentation."); + po.Register("additive-signals-segmentation-rspecifier", + &additive_signals_segmentation_rspecifier, + "Archive of segmentation of the additive signal which will used " + "instead of an all 1 segmentation"); + po.Register("unreliable-segmentation-wspecifier", + &unreliable_segmentation_wspecifier, + "Applicable when additive-signals-segmentation-rspecifier is " + "provided and some utterances in it are missing"); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string reco_segmentation_rspecifier = po.GetArg(1), + additive_signals_info_rspecifier = po.GetArg(2), + segmentation_wspecifier = po.GetArg(3); + + SequentialSegmentationReader reco_segmentation_reader(reco_segmentation_rspecifier); + RandomAccessTokenVectorReader additive_signals_info_reader(additive_signals_info_rspecifier); + SegmentationWriter writer(segmentation_wspecifier); + + RandomAccessSegmentationReader additive_signals_segmentation_reader(additive_signals_segmentation_rspecifier); + SegmentationWriter unreliable_writer(unreliable_segmentation_wspecifier); + + RandomAccessInt32Reader lengths_reader(lengths_rspecifier); + + int32 num_done = 0, num_err = 0, num_missing = 0; + + for (; !reco_segmentation_reader.Done(); reco_segmentation_reader.Next()) { + const std::string &key = reco_segmentation_reader.Key(); + + if (!additive_signals_info_reader.HasKey(key)) { + KALDI_WARN << "Could not find additive_signals_info for key " << key; + num_missing++; + continue; + } + const std::vector &additive_signals_info = additive_signals_info_reader.Value(key); + + Segmentation segmentation(reco_segmentation_reader.Value()); + Segmentation unreliable_segmentation; + + for (size_t i = 0; i < additive_signals_info.size(); i++) { + std::vector parts; + SplitStringToVector(additive_signals_info[i], ",:", false, &parts); + + if (parts.size() != 3) { + KALDI_ERR << "Invalid format of overlap info " << additive_signals_info[i] + << "for key " << key << " in " << additive_signals_info_rspecifier; + } + const std::string &utt_id = parts[0]; + double start_time; + double duration; + ConvertStringToReal(parts[1], &start_time); + ConvertStringToReal(parts[2], &duration); + + int32 start_frame = round(start_time / frame_shift); + + if (!additive_signals_segmentation_reader.HasKey(utt_id)) { + KALDI_WARN << "Could not find utterance " << utt_id << " in " + << "segmentation " << additive_signals_segmentation_rspecifier; + if (duration < 0) { + KALDI_ERR << "duration < 0 for utt_id " << utt_id << " in " + << "additive_signals_info " << additive_signals_info_rspecifier + << "; additive-signals-segmentation must be provided in such a case"; + } + num_err++; + unreliable_segmentation.EmplaceBack(start_frame, start_frame + duration - 1, 0); + continue; // Treated as non-overlapping even though there + // is overlap + } + + InsertFromSegmentation(additive_signals_segmentation_reader.Value(utt_id), + start_frame, false, &segmentation); + } + + Sort(&segmentation); + if (!lengths_rspecifier.empty()) { + if (!lengths_reader.HasKey(key)) { + KALDI_WARN << "Could not find length for the recording " << key + << "in " << lengths_rspecifier; + continue; + } + TruncateToLength(lengths_reader.Value(key), &segmentation); + } + writer.Write(key, segmentation); + + if (!unreliable_segmentation_wspecifier.empty()) { + Sort(&unreliable_segmentation); + if (!lengths_rspecifier.empty()) { + if (!lengths_reader.HasKey(key)) { + KALDI_WARN << "Could not find length for the recording " << key + << "in " << lengths_rspecifier; + continue; + } + TruncateToLength(lengths_reader.Value(key), &unreliable_segmentation); + } + unreliable_writer.Write(key, unreliable_segmentation); + } + + num_done++; + } + + KALDI_LOG << "Successfully processed " << num_done << " recordings " + << " in additive signals info; failed for " << num_missing + << "; could not get segmentation for " << num_err; + + return (num_done > (num_missing/ 2) ? 0 : 1); + + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + From 8f1ee41a9064aa3f24a9b74a03f8ef5dbef42d09 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 14 Dec 2016 00:07:00 -0500 Subject: [PATCH 115/530] asr_diarization: Update make_overlapped_data_dir.py and data_dir_Manipulation_lib --- .../steps/data/data_dir_manipulation_lib.py | 12 +- .../s5/steps/data/make_overlapped_data_dir.py | 252 +++++++++++------- 2 files changed, 159 insertions(+), 105 deletions(-) diff --git a/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py b/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py index 7f1a5f74fe2..26fb17324dc 100644 --- a/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py +++ b/egs/wsj/s5/steps/data/data_dir_manipulation_lib.py @@ -168,24 +168,24 @@ def CopyDataDirFiles(input_dir, output_dir, num_replicas, include_original, pref if not os.path.isfile(output_dir + "/wav.scp"): raise Exception("CopyDataDirFiles function expects output_dir to contain wav.scp already") - AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replicas, include_original, prefix, field = [0,1]) + AddPrefixToFields(input_dir + "/utt2spk", output_dir + "/utt2spk", num_replicas, include_original=include_original, prefix=prefix, field = [0,1]) RunKaldiCommand("utils/utt2spk_to_spk2utt.pl <{output_dir}/utt2spk >{output_dir}/spk2utt" .format(output_dir = output_dir)) if os.path.isfile(input_dir + "/utt2uniq"): - AddPrefixToFields(input_dir + "/utt2uniq", output_dir + "/utt2uniq", num_replicas, include_original, prefix, field =[0]) + AddPrefixToFields(input_dir + "/utt2uniq", output_dir + "/utt2uniq", num_replicas, include_original=include_original, prefix=prefix, field =[0]) else: # Create the utt2uniq file CreateCorruptedUtt2uniq(input_dir, output_dir, num_replicas, include_original, prefix) if os.path.isfile(input_dir + "/text"): - AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replicas, prefix, include_original, field =[0]) + AddPrefixToFields(input_dir + "/text", output_dir + "/text", num_replicas, include_original=include_original, prefix=prefix, field =[0]) if os.path.isfile(input_dir + "/segments"): - AddPrefixToFields(input_dir + "/segments", output_dir + "/segments", num_replicas, prefix, include_original, field = [0,1]) + AddPrefixToFields(input_dir + "/segments", output_dir + "/segments", num_replicas, prefix=prefix, include_original=include_original, field = [0,1]) if os.path.isfile(input_dir + "/reco2file_and_channel"): - AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replicas, include_original, prefix, field = [0,1]) + AddPrefixToFields(input_dir + "/reco2file_and_channel", output_dir + "/reco2file_and_channel", num_replicas, include_original=include_original, prefix=prefix, field = [0,1]) - AddPrefixToFields(input_dir + "/reco2dur", output_dir + "/reco2dur", num_replicas, include_original, prefix, field = [0]) + AddPrefixToFields(input_dir + "/reco2dur", output_dir + "/reco2dur", num_replicas, include_original=include_original, prefix=prefix, field = [0]) RunKaldiCommand("utils/validate_data_dir.sh --no-feats {output_dir}" .format(output_dir = output_dir)) diff --git a/egs/wsj/s5/steps/data/make_overlapped_data_dir.py b/egs/wsj/s5/steps/data/make_overlapped_data_dir.py index 86137c26e25..e4bf85f9af7 100644 --- a/egs/wsj/s5/steps/data/make_overlapped_data_dir.py +++ b/egs/wsj/s5/steps/data/make_overlapped_data_dir.py @@ -9,6 +9,9 @@ data_lib = imp.load_source('dml', 'steps/data/data_dir_manipulation_lib.py') +sys.path.insert(0, 'steps') +import libs.common as common_lib + def GetArgs(): # we add required arguments as named arguments for readability parser = argparse.ArgumentParser(description="Reverberate the data directory with an option " @@ -32,7 +35,8 @@ def GetArgs(): "--rt-60 --drr location " "E.g. --rir-id 00001 --room-id 001 --receiver-position-id 001 --source-position-id 00001 " "--rt60 0.58 --drr -4.885 data/impulses/Room001-00001.wav") - parser.add_argument("--noise-set-parameters", type=str, action='append', default = None, dest = "noise_set_para_array", + parser.add_argument("--noise-set-parameters", type=str, action='append', + default = None, dest = "noise_set_para_array", help="Specifies the parameters of an noise set. " "Supports the specification of mixture_weight and noise_list_file_name. The mixture weight is optional. " "The default mixture weight is the probability mass remaining after adding the mixture weights " @@ -44,39 +48,56 @@ def GetArgs(): "--room-linkage " "location " "E.g. --noise-id 001 --noise-type isotropic --rir-id 00019 iso_noise.wav") - parser.add_argument("--speech-segments-set-parameters", type=str, action='append', default = None, dest = "speech_segments_set_para_array", - help="Specifies the speech segments for overlapped speech generation") + parser.add_argument("--speech-segments-set-parameters", type=str, action='append', + default = None, dest = "speech_segments_set_para_array", + help="Specifies the speech segments for overlapped speech generation.\n" + "Format: [], wav_scp, segments_list\n"); parser.add_argument("--num-replications", type=int, dest = "num_replicas", default = 1, help="Number of replicate to generated for the data") - parser.add_argument('--foreground-snrs', type=str, dest = "foreground_snr_string", default = '20:10:0', help='When foreground noises are being added the script will iterate through these SNRs.') - parser.add_argument('--background-snrs', type=str, dest = "background_snr_string", default = '20:10:0', help='When background noises are being added the script will iterate through these SNRs.') - parser.add_argument('--overlap-snrs', type=str, dest = "overlap_snr_string", default = "20:10:0", help='When overlapping speech segments are being added the script will iterate through these SNRs.') - parser.add_argument('--prefix', type=str, default = None, help='This prefix will modified for each reverberated copy, by adding additional affixes.') + parser.add_argument('--foreground-snrs', type=str, dest = "foreground_snr_string", + default = '20:10:0', + help='When foreground noises are being added the script will iterate through these SNRs.') + parser.add_argument('--background-snrs', type=str, dest = "background_snr_string", + default = '20:10:0', + help='When background noises are being added the script will iterate through these SNRs.') + parser.add_argument('--overlap-snrs', type=str, dest = "overlap_snr_string", + default = "20:10:0", + help='When overlapping speech segments are being added the script will iterate through these SNRs.') + parser.add_argument('--prefix', type=str, default = None, + help='This prefix will modified for each reverberated copy, by adding additional affixes.') parser.add_argument("--speech-rvb-probability", type=float, default = 1.0, help="Probability of reverberating a speech signal, e.g. 0 <= p <= 1") parser.add_argument("--pointsource-noise-addition-probability", type=float, default = 1.0, help="Probability of adding point-source noises, e.g. 0 <= p <= 1") parser.add_argument("--isotropic-noise-addition-probability", type=float, default = 1.0, help="Probability of adding isotropic noises, e.g. 0 <= p <= 1") - parser.add_argument("--overlapped-speech-addition-probability", type=float, default = 1.0, - help="Probability of adding overlapped speech, e.g. 0 <= p <= 1") + parser.add_argument("--overlapping-speech-addition-probability", type=float, default = 1.0, + help="Probability of adding overlapping speech, e.g. 0 <= p <= 1") parser.add_argument("--rir-smoothing-weight", type=float, default = 0.3, help="Smoothing weight for the RIR probabilties, e.g. 0 <= p <= 1. If p = 0, no smoothing will be done. " "The RIR distribution will be mixed with a uniform distribution according to the smoothing weight") parser.add_argument("--noise-smoothing-weight", type=float, default = 0.3, help="Smoothing weight for the noise probabilties, e.g. 0 <= p <= 1. If p = 0, no smoothing will be done. " "The noise distribution will be mixed with a uniform distribution according to the smoothing weight") - parser.add_argument("--overlapped-speech-smoothing-weight", type=float, default = 0.3, - help="The overlapped speech distribution will be mixed with a uniform distribution according to the smoothing weight") + parser.add_argument("--overlapping-speech-smoothing-weight", type=float, default = 0.3, + help="The overlapping speech distribution will be mixed with a uniform distribution according to the smoothing weight") parser.add_argument("--max-noises-per-minute", type=int, default = 2, help="This controls the maximum number of point-source noises that could be added to a recording according to its duration") - parser.add_argument("--max-overlapped-segments-per-minute", type=int, default = 5, + parser.add_argument("--min-overlapping-segments-per-minute", type=int, default = 1, + help="This controls the minimum number of overlapping segments of speech that could be added to a recording per minute") + parser.add_argument("--max-overlapping-segments-per-minute", type=int, default = 5, help="This controls the maximum number of overlapping segments of speech that could be added to a recording per minute") - parser.add_argument('--random-seed', type=int, default=0, help='seed to be used in the randomization of impulses and noises') - parser.add_argument("--shift-output", type=str, help="If true, the reverberated waveform will be shifted by the amount of the peak position of the RIR", - choices=['true', 'false'], default = "true") - parser.add_argument("--output-additive-noise-dir", type=str, help="Output directory corresponding to the additive noise part of the data corruption") - parser.add_argument("--output-reverb-dir", type=str, help="Output directory corresponding to the reverberated signal part of the data corruption") + parser.add_argument('--random-seed', type=int, default=0, + help='seed to be used in the randomization of impulses and noises') + parser.add_argument("--shift-output", type=str, + help="If true, the reverberated waveform will be shifted by the amount of the peak position of the RIR", + choices=['true', 'false'], default = "true") + parser.add_argument("--output-additive-noise-dir", type=str, + action = common_train_lib.NullstrToNoneAction, default = None, + help="Output directory corresponding to the additive noise part of the data corruption") + parser.add_argument("--output-reverb-dir", type=str, + action = common_train_lib.NullstrToNoneAction, default = None, + help="Output directory corresponding to the reverberated signal part of the data corruption") parser.add_argument("input_dir", help="Input data directory") @@ -128,8 +149,8 @@ def CheckArgs(args): if args.isotropic_noise_addition_probability < 0 or args.isotropic_noise_addition_probability > 1: raise Exception("--isotropic-noise-addition-probability must be between 0 and 1") - if args.overlapped_speech_addition_probability < 0 or args.overlapped_speech_addition_probability > 1: - raise Exception("--overlapped-speech-addition-probability must be between 0 and 1") + if args.overlapping_speech_addition_probability < 0 or args.overlapping_speech_addition_probability > 1: + raise Exception("--overlapping-speech-addition-probability must be between 0 and 1") if args.rir_smoothing_weight < 0 or args.rir_smoothing_weight > 1: raise Exception("--rir-smoothing-weight must be between 0 and 1") @@ -137,14 +158,17 @@ def CheckArgs(args): if args.noise_smoothing_weight < 0 or args.noise_smoothing_weight > 1: raise Exception("--noise-smoothing-weight must be between 0 and 1") - if args.overlapped_speech_smoothing_weight < 0 or args.overlapped_speech_smoothing_weight > 1: - raise Exception("--overlapped-speech-smoothing-weight must be between 0 and 1") + if args.overlapping_speech_smoothing_weight < 0 or args.overlapping_speech_smoothing_weight > 1: + raise Exception("--overlapping-speech-smoothing-weight must be between 0 and 1") if args.max_noises_per_minute < 0: raise Exception("--max-noises-per-minute cannot be negative") - if args.max_overlapped_segments_per_minute < 0: - raise Exception("--max-overlapped-segments-per-minute cannot be negative") + if args.min_overlapping_segments_per_minute < 0: + raise Exception("--min-overlapping-segments-per-minute cannot be negative") + + if args.max_overlapping_segments_per_minute < 0: + raise Exception("--max-overlapping-segments-per-minute cannot be negative") return args @@ -203,32 +227,33 @@ def ParseSpeechSegmentsList(speech_segments_set_para_array, smoothing_weight): return segments_list -def AddOverlappedSpeech(room, # the room selected - speech_segments_list, # the speech list - overlapped_speech_addition_probability, # Probability of another speech waveform - snrs, # the SNR for adding the foreground speech - speech_dur, # duration of the recording - max_overlapped_speech_segments, # Maximum number of speech signals that can be added - overlapped_speech_descriptor # descriptor to store the information of the overlapped speech +def AddOverlappingSpeech(room, # the room selected + speech_segments_list, # the speech list + overlapping_speech_addition_probability, # Probability of another speech waveform + snrs, # the SNR for adding the foreground speech + speech_dur, # duration of the recording + min_overlapping_speech_segments, # Minimum number of speech signals that can be added + max_overlapping_speech_segments, # Maximum number of speech signals that can be added + overlapping_speech_descriptor # descriptor to store the information of the overlapping speech ): - if (len(speech_segments_list) > 0 and random.random() < overlapped_speech_addition_probability - and max_overlapped_speech_segments >= 1): - for k in range(random.randint(1, max_overlapped_speech_segments)): - # pick the overlapped speech signal and the RIR to - # reverberate the overlapped speech signal + if (len(speech_segments_list) > 0 and random.random() < overlapping_speech_addition_probability + and max_overlapping_speech_segments >= 1): + for k in range(random.randint(min_overlapping_speech_segments, max_overlapping_speech_segments)): + # pick the overlapping_speech speech signal and the RIR to + # reverberate the overlapping_speech speech signal speech_segment = data_lib.PickItemWithProbability(speech_segments_list) rir = data_lib.PickItemWithProbability(room.rir_list) speech_rvb_command = """wav-reverberate --impulse-response="{0}" --shift-output=true """.format(rir.rir_rspecifier) - overlapped_speech_descriptor['start_times'].append(round(random.random() * speech_dur, 2)) - overlapped_speech_descriptor['snrs'].append(snrs.next()) - overlapped_speech_descriptor['utt_ids'].append(speech_segment.utt_id) - overlapped_speech_descriptor['durations'].append(speech_segment.duration) + overlapping_speech_descriptor['start_times'].append(round(random.random() * speech_dur, 2)) + overlapping_speech_descriptor['snrs'].append(snrs.next()) + overlapping_speech_descriptor['utt_ids'].append(speech_segment.utt_id) + overlapping_speech_descriptor['durations'].append(speech_segment.duration) if len(speech_segment.wav_rxfilename.split()) == 1: - overlapped_speech_descriptor['speech_segments'].append("{1} {0} - |".format(speech_segment.wav_rxfilename, speech_rvb_command)) + overlapping_speech_descriptor['speech_segments'].append("{1} {0} - |".format(speech_segment.wav_rxfilename, speech_rvb_command)) else: - overlapped_speech_descriptor['speech_segments'].append("{0} {1} - - |".format(speech_segment.wav_rxfilename, speech_rvb_command)) + overlapping_speech_descriptor['speech_segments'].append("{0} {1} - - |".format(speech_segment.wav_rxfilename, speech_rvb_command)) # This function randomly decides whether to reverberate, and sample a RIR if it does # It also decides whether to add the appropriate noises @@ -244,17 +269,21 @@ def GenerateReverberationAndOverlappedSpeechOpts( speech_rvb_probability, # Probability of reverberating a speech signal isotropic_noise_addition_probability, # Probability of adding isotropic noises pointsource_noise_addition_probability, # Probability of adding point-source noises - overlapped_speech_addition_probability, + overlapping_speech_addition_probability, # Probability of adding overlapping speech segments speech_dur, # duration of the recording max_noises_recording, # Maximum number of point-source noises that can be added - max_overlapped_segments_recording # Maximum number of overlapped segments that can be added + min_overlapping_segments_recording, # Minimum number of overlapping segments that can be added + max_overlapping_segments_recording # Maximum number of overlapping segments that can be added ): impulse_response_opts = "" - additive_noise_opts = "" noise_addition_descriptor = {'noise_io': [], 'start_times': [], - 'snrs': []} + 'snrs': [], + 'noise_ids': [], + 'durations': [] + } + # Randomly select the room # Here the room probability is a sum of the probabilities of the RIRs recorded in the room. room = data_lib.PickItemWithProbability(room_dict) @@ -278,6 +307,8 @@ def GenerateReverberationAndOverlappedSpeechOpts( noise_addition_descriptor['noise_io'].append("{0} wav-reverberate --duration={1} - - |".format(isotropic_noise.noise_rspecifier, speech_dur)) noise_addition_descriptor['start_times'].append(0) noise_addition_descriptor['snrs'].append(background_snrs.next()) + noise_addition_descriptor['noise_ids'].append(isotropic_noise.noise_id) + noise_addition_descriptor['durations'].append(speech_dur) data_lib.AddPointSourceNoise(room, # the room selected pointsource_noise_list, # the point source noise list @@ -291,35 +322,29 @@ def GenerateReverberationAndOverlappedSpeechOpts( assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['start_times']) assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['snrs']) - - overlapped_speech_descriptor = {'speech_segments': [], - 'start_times': [], - 'snrs': [], - 'utt_ids': [], - 'durations': [] - } - - AddOverlappedSpeech(room, - speech_segments_list, # speech segments list - overlapped_speech_addition_probability, - overlap_snrs, - speech_dur, - max_overlapped_segments_recording, - overlapped_speech_descriptor + assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['utt_ids']) + assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['durations']) + + overlapping_speech_descriptor = {'speech_segments': [], + 'start_times': [], + 'snrs': [], + 'utt_ids': [], + 'durations': [] + } + + print ("Adding overlapping speech...") + AddOverlappingSpeech(room, + speech_segments_list, # speech segments list + overlapping_speech_addition_probability, + overlap_snrs, + speech_dur, + min_overlapping_segments_recording, + max_overlapping_segments_recording, + overlapping_speech_descriptor ) - if len(overlapped_speech_descriptor['speech_segments']) > 0: - noise_addition_descriptor['noise_io'] += overlapped_speech_descriptor['speech_segments'] - noise_addition_descriptor['start_times'] += overlapped_speech_descriptor['start_times'] - noise_addition_descriptor['snrs'] += overlapped_speech_descriptor['snrs'] - - if len(noise_addition_descriptor['noise_io']) > 0: - additive_noise_opts += "--additive-signals='{0}' ".format(','.join(noise_addition_descriptor['noise_io'])) - additive_noise_opts += "--start-times='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['start_times']))) - additive_noise_opts += "--snrs='{0}' ".format(','.join(map(lambda x:str(x), noise_addition_descriptor['snrs']))) - - return [impulse_response_opts, additive_noise_opts, - zip(overlapped_speech_descriptor['utt_ids'], [ str(x) for x in overlapped_speech_descriptor['start_times'] ], [ str(x) for x in overlapped_speech_descriptor['durations'] ])] + return [impulse_response_opts, noise_addition_descriptor, + overlapping_speech_descriptor] # This is the main function to generate pipeline command for the corruption # The generic command of wav-reverberate will be like: @@ -335,7 +360,7 @@ def GenerateReverberatedWavScpWithOverlappedSpeech( foreground_snr_array, # the SNR for adding the foreground noises background_snr_array, # the SNR for adding the background noises speech_segments_list, # list of speech segments to create overlapped speech - overlap_snr_array, # the SNR for adding overlapped speech + overlap_snr_array, # the SNR for adding overlapping speech num_replicas, # Number of replicate to generated for the data prefix, # prefix for the id of the corrupted utterances speech_rvb_probability, # Probability of reverberating a speech signal @@ -343,20 +368,20 @@ def GenerateReverberatedWavScpWithOverlappedSpeech( isotropic_noise_addition_probability, # Probability of adding isotropic noises pointsource_noise_addition_probability, # Probability of adding point-source noises max_noises_per_minute, # maximum number of point-source noises that can be added to a recording according to its duration - overlapped_speech_addition_probability, - max_overlapped_segments_per_minute, + overlapping_speech_addition_probability, + min_overlapping_segments_per_minute, + max_overlapping_segments_per_minute, output_reverb_dir = None, - output_additive_noise_dir = None + output_additive_noise_dir = None, ): foreground_snrs = data_lib.list_cyclic_iterator(foreground_snr_array) background_snrs = data_lib.list_cyclic_iterator(background_snr_array) overlap_snrs = data_lib.list_cyclic_iterator(overlap_snr_array) - corrupted_wav_scp = {} reverb_wav_scp = {} additive_noise_wav_scp = {} - overlapped_segments_info = {} + overlapping_segments_info = {} keys = wav_scp.keys() keys.sort() @@ -368,26 +393,48 @@ def GenerateReverberatedWavScpWithOverlappedSpeech( wav_original_pipe = "cat {0} |".format(wav_original_pipe) speech_dur = durations[recording_id] max_noises_recording = math.floor(max_noises_per_minute * speech_dur / 60) - max_overlapped_segments_recording = math.floor(max_overlapped_segments_per_minute * speech_dur / 60) + min_overlapping_segments_recording = max(math.floor(min_overlapping_segments_per_minute * speech_dur / 60), 1) + max_overlapping_segments_recording = math.floor(max_overlapping_segments_per_minute * speech_dur / 60) - [impulse_response_opts, - additive_noise_opts, - overlapped_speech_segments] = GenerateReverberationAndOverlappedSpeechOpts( + [impulse_response_opts, noise_addition_descriptor, + overlapping_speech_descriptor] = GenerateReverberationAndOverlappedSpeechOpts( room_dict = room_dict, # the room dictionary, please refer to MakeRoomDict() for the format pointsource_noise_list = pointsource_noise_list, # the point source noise list iso_noise_dict = iso_noise_dict, # the isotropic noise dictionary foreground_snrs = foreground_snrs, # the SNR for adding the foreground noises background_snrs = background_snrs, # the SNR for adding the background noises speech_segments_list = speech_segments_list, # Speech segments for creating overlapped speech - overlap_snrs = overlap_snrs, # the SNR for adding overlapped speech + overlap_snrs = overlap_snrs, # the SNR for adding overlapping speech speech_rvb_probability = speech_rvb_probability, # Probability of reverberating a speech signal isotropic_noise_addition_probability = isotropic_noise_addition_probability, # Probability of adding isotropic noises pointsource_noise_addition_probability = pointsource_noise_addition_probability, # Probability of adding point-source noises - overlapped_speech_addition_probability = overlapped_speech_addition_probability, + overlapping_speech_addition_probability = overlapping_speech_addition_probability, speech_dur = speech_dur, # duration of the recording max_noises_recording = max_noises_recording, # Maximum number of point-source noises that can be added - max_overlapped_segments_recording = max_overlapped_segments_recording + min_overlapping_segments_recording = min_overlapping_segments_recording, + max_overlapping_segments_recording = max_overlapping_segments_recording ) + + additive_noise_opts = "" + + if (len(noise_addition_descriptor['noise_io']) > 0 or + len(overlapping_speech_descriptor['speech_segments']) > 0): + additive_noise_opts += ("--additive-signals='{0}' " + .format(',' + .join(noise_addition_descriptor['noise_io'] + + overlapping_speech_descriptor['speech_segments'])) + ) + additive_noise_opts += ("--start-times='{0}' " + .format(',' + .join(map(lambda x:str(x), noise_addition_descriptor['start_times'] + + overlapping_speech_descriptor['start_times']))) + ) + additive_noise_opts += ("--snrs='{0}' " + .format(',' + .join(map(lambda x:str(x), noise_addition_descriptor['snrs'] + + overlapping_speech_descriptor['snrs']))) + ) + reverberate_opts = impulse_response_opts + additive_noise_opts new_recording_id = data_lib.GetNewId(recording_id, prefix, i) @@ -411,14 +458,19 @@ def GenerateReverberatedWavScpWithOverlappedSpeech( wav_additive_noise_pipe = "{0} wav-reverberate --shift-output={1} --additive-noise-out-wxfilename=- {2} - /dev/null |".format(wav_original_pipe, shift_output, reverberate_opts) additive_noise_wav_scp[new_recording_id] = wav_additive_noise_pipe - if len(overlapped_speech_segments) > 0: - overlapped_segments_info[new_recording_id] = [ ':'.join(x) for x in overlapped_speech_segments ] + if len(overlapping_speech_descriptor['speech_segments']) > 0: + overlapping_segments_info[new_recording_id] = [ + ':'.join(x) + for x in zip(overlapping_speech_descriptor['utt_ids'], + [ str(x) for x in overlapping_speech_descriptor['start_times'] ], + [ str(x) for x in overlapping_speech_descriptor['durations'] ]) + ] data_lib.WriteDictToFile(corrupted_wav_scp, output_dir + "/wav.scp") - # Write for each new recording, the utterance id of the segments and - # the start time at which they are added - data_lib.WriteDictToFile(overlapped_segments_info, output_dir + "/overlapped_segments_info.txt") + # Write for each new recording, the id, start time and durations + # of the overlapping segments + data_lib.WriteDictToFile(overlapping_segments_info, output_dir + "/overlapped_segments_info.txt") if output_reverb_dir is not None: data_lib.WriteDictToFile(reverb_wav_scp, output_reverb_dir + "/wav.scp") @@ -426,7 +478,6 @@ def GenerateReverberatedWavScpWithOverlappedSpeech( if output_additive_noise_dir is not None: data_lib.WriteDictToFile(additive_noise_wav_scp, output_additive_noise_dir + "/wav.scp") - # This function creates multiple copies of the necessary files, e.g. utt2spk, wav.scp ... def CreateReverberatedCopy(input_dir, output_dir, @@ -436,7 +487,7 @@ def CreateReverberatedCopy(input_dir, speech_segments_list, foreground_snr_string, # the SNR for adding the foreground noises background_snr_string, # the SNR for adding the background noises - overlap_snr_string, # the SNR for overlapped speech + overlap_snr_string, # the SNR for overlapping speech num_replicas, # Number of replicate to generated for the data prefix, # prefix for the id of the corrupted utterances speech_rvb_probability, # Probability of reverberating a speech signal @@ -444,8 +495,9 @@ def CreateReverberatedCopy(input_dir, isotropic_noise_addition_probability, # Probability of adding isotropic noises pointsource_noise_addition_probability, # Probability of adding point-source noises max_noises_per_minute, # maximum number of point-source noises that can be added to a recording according to its duration - overlapped_speech_addition_probability, - max_overlapped_segments_per_minute, + overlapping_speech_addition_probability, + min_overlapping_segments_per_minute, + max_overlapping_segments_per_minute, output_reverb_dir = None, output_additive_noise_dir = None ): @@ -482,8 +534,9 @@ def CreateReverberatedCopy(input_dir, isotropic_noise_addition_probability = isotropic_noise_addition_probability, pointsource_noise_addition_probability = pointsource_noise_addition_probability, max_noises_per_minute = max_noises_per_minute, - overlapped_speech_addition_probability = overlapped_speech_addition_probability, - max_overlapped_segments_per_minute = max_overlapped_segments_per_minute, + overlapping_speech_addition_probability = overlapping_speech_addition_probability, + min_overlapping_segments_per_minute = min_overlapping_segments_per_minute, + max_overlapping_segments_per_minute = max_overlapping_segments_per_minute, output_reverb_dir = output_reverb_dir, output_additive_noise_dir = output_additive_noise_dir) @@ -512,7 +565,7 @@ def Main(): print("Number of isotropic noises is {0}".format(sum(len(iso_noise_dict[key]) for key in iso_noise_dict.keys()))) room_dict = data_lib.MakeRoomDict(rir_list) - speech_segments_list = ParseSpeechSegmentsList(args.speech_segments_set_para_array, args.overlapped_speech_smoothing_weight) + speech_segments_list = ParseSpeechSegmentsList(args.speech_segments_set_para_array, args.overlapping_speech_smoothing_weight) CreateReverberatedCopy(input_dir = args.input_dir, output_dir = args.output_dir, @@ -530,8 +583,9 @@ def Main(): isotropic_noise_addition_probability = args.isotropic_noise_addition_probability, pointsource_noise_addition_probability = args.pointsource_noise_addition_probability, max_noises_per_minute = args.max_noises_per_minute, - overlapped_speech_addition_probability = args.overlapped_speech_addition_probability, - max_overlapped_segments_per_minute = args.max_overlapped_segments_per_minute, + overlapping_speech_addition_probability = args.overlapping_speech_addition_probability, + min_overlapping_segments_per_minute = args.min_overlapping_segments_per_minute, + max_overlapping_segments_per_minute = args.max_overlapping_segments_per_minute, output_reverb_dir = args.output_reverb_dir, output_additive_noise_dir = args.output_additive_noise_dir) From 7b3723da82631d76b7ddc001f7f234d226576db5 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 14 Dec 2016 00:07:19 -0500 Subject: [PATCH 116/530] asr_diarization: Update reverberate_data_dir.py --- egs/wsj/s5/steps/data/reverberate_data_dir.py | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py index 0080bdba5f0..9a71126dde3 100755 --- a/egs/wsj/s5/steps/data/reverberate_data_dir.py +++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py @@ -7,7 +7,7 @@ from __future__ import print_function import argparse, glob, math, os, random, sys, warnings, copy, imp, ast -data_lib = imp.load_source('dml', 'steps/data/data_dir_manipulation_lib.py') +import data_dir_manipulation_lib as data_lib def GetArgs(): # we add required arguments as named arguments for readability @@ -71,8 +71,12 @@ def GetArgs(): "the RIRs/noises will be resampled to the rate of the source data.") parser.add_argument("--include-original-data", type=str, help="If true, the output data includes one copy of the original data", choices=['true', 'false'], default = "false") - parser.add_argument("--output-additive-noise-dir", type=str, help="Output directory corresponding to the additive noise part of the data corruption") - parser.add_argument("--output-reverb-dir", type=str, help="Output directory corresponding to the reverberated signal part of the data corruption") + parser.add_argument("--output-additive-noise-dir", type=str, + action = common_lib.NullstrToNoneAction, default = None, + help="Output directory corresponding to the additive noise part of the data corruption") + parser.add_argument("--output-reverb-dir", type=str, + action = common_lib.NullstrToNoneAction, default = None, + help="Output directory corresponding to the reverberated signal part of the data corruption") parser.add_argument("input_dir", help="Input data directory") @@ -97,18 +101,10 @@ def CheckArgs(args): args.prefix = "rvb" warnings.warn("--prefix is set to 'rvb' as more than one copy of data is generated") - if args.output_reverb_dir is not None: - if args.output_reverb_dir == "": - args.output_reverb_dir = None - if args.output_reverb_dir is not None: if not os.path.exists(args.output_reverb_dir): os.makedirs(args.output_reverb_dir) - if args.output_additive_noise_dir is not None: - if args.output_additive_noise_dir == "": - args.output_additive_noise_dir = None - if args.output_additive_noise_dir is not None: if not os.path.exists(args.output_additive_noise_dir): os.makedirs(args.output_additive_noise_dir) @@ -346,5 +342,3 @@ def Main(): if __name__ == "__main__": Main() - - From b9328f700fc353de71d0c95362c99110982c4924 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 14 Dec 2016 00:07:36 -0500 Subject: [PATCH 117/530] asr_diarization: Better way of checking vol perturbation --- .../s5/utils/data/perturb_data_dir_volume.sh | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh b/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh index 185c7abf426..ee3c281bdbb 100755 --- a/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh +++ b/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh @@ -30,7 +30,27 @@ if [ ! -f $data/wav.scp ]; then exit 1 fi -if ! $force && grep -q "sox --vol" $data/wav.scp; then +volume_perturb_done=`head -n100 $data/wav.scp | python -c " +import sys, re +for line in sys.stdin.readlines(): + if len(line.strip()) == 0: + continue + # Handle three cases of rxfilenames appropriately; 'input piped command', 'file offset' and 'filename' + parts = line.strip().split() + if line.strip()[-1] == '|': + if re.search('sox --vol', ' '.join(parts[-11:])): + print 'true' + sys.exit(0) + elif re.search(':[0-9]+$', line.strip()) is not None: + continue + else: + if ' '.join(parts[1:3]) == 'sox --vol': + print 'true' + sys.exit(0) +print 'false' +"` || exit 1 + +if $volume_perturb_done; then echo "$0: It looks like the data was already volume perturbed. Not doing anything." exit 0 fi From 1ac065d241d367bbdbdcff0cc6364e734a59cc8d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 14 Dec 2016 00:08:14 -0500 Subject: [PATCH 118/530] asr_diarization: Updated train_stats_sad_overlap_1a.sh --- .../segmentation/tuning/train_stats_sad_overlap_1a.sh | 8 ++++---- egs/wsj/s5/utils/data/get_reco2dur.sh | 10 +++++++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh index 425f8230418..aae1fd995e0 100644 --- a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh @@ -54,12 +54,12 @@ overlapped_speech_labels_scp= deriv_weights_scp= deriv_weights_for_overlapped_speech_scp= -train_data_dir=data/train_aztec_small_unsad_whole_all_corrupted_sp_hires_bp -speech_feat_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/speech_feat.scp +train_data_dir=data/train_aztec_small_unsad_whole_sad_ovlp_corrupted_sp +speech_feat_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp/speech_feat.scp deriv_weights_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/deriv_weights.scp -snr_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/irm_targets.scp -deriv_weights_for_irm_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/deriv_weights_manual_seg.scp +snr_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp/irm_targets.scp +deriv_weights_for_irm_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp/deriv_weights_manual_seg.scp deriv_weights_for_overlapped_speech_scp= overlapped_speech_labels_scp= diff --git a/egs/wsj/s5/utils/data/get_reco2dur.sh b/egs/wsj/s5/utils/data/get_reco2dur.sh index 7d2ccb71769..5e925fc3e75 100755 --- a/egs/wsj/s5/utils/data/get_reco2dur.sh +++ b/egs/wsj/s5/utils/data/get_reco2dur.sh @@ -11,6 +11,8 @@ # files in entirely.) frame_shift=0.01 +cmd=run.pl +nj=4 . utils/parse_options.sh . ./path.sh @@ -74,11 +76,17 @@ else echo "... perturb_data_dir_speed_3way.sh." fi - if ! wav-to-duration --read-entire-file=$read_entire_file scp:$data/wav.scp ark,t:$data/reco2dur 2>&1 | grep -v 'nonzero return status'; then + utils/split_data.sh $data $nj + if ! $cmd JOB=1:$nj $data/log/get_wav_duration.JOB.log wav-to-duration --read-entire-file=$read_entire_file scp:$data/split$nj/JOB/wav.scp ark,t:$data/split$nj/JOB/reco2dur 2>&1; then echo "$0: there was a problem getting the durations; moving $data/reco2dur to $data/.backup/" mkdir -p $data/.backup/ mv $data/reco2dur $data/.backup/ + exit 1 fi + + for n in `seq $nj`; do + cat $data/split$nj/$n/reco2dur + done > $data/reco2dur fi echo "$0: computed $data/reco2dur" From 7c6e40a8fe020f7dbbecfd2f5dcc3c48035dedb8 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 14 Dec 2016 00:08:45 -0500 Subject: [PATCH 119/530] asr_diarization: New version of corruption_data_dir for overlapped_speech that works on non-whole dirs --- ...o_corruption_data_dir_overlapped_speech.sh | 79 ++++++++++--------- 1 file changed, 40 insertions(+), 39 deletions(-) mode change 100644 => 100755 egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh old mode 100644 new mode 100755 index 75dbce578b2..4d532be4353 --- a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh @@ -21,12 +21,9 @@ snrs="20:10:15:5:0:-5" foreground_snrs="20:10:15:5:0:-5" background_snrs="20:10:15:5:0:-5" overlap_snrs="5:2:1:0:-1:-2" -# Whole-data directory corresponding to data_dir -whole_data_dir=data/train_si284_whole overlap_labels_dir=overlap_labels # Parallel options -reco_nj=40 nj=40 cmd=queue.pl @@ -35,9 +32,6 @@ mfcc_config=conf/mfcc_hires_bp.conf feat_suffix=hires_bp energy_config=conf/log_energy.conf -reco_vad_dir= # Output of prepare_unsad_data.sh. - # If provided, the speech labels and deriv weights will be - # copied into the output data directory. utt_vad_dir= . utils/parse_options.sh @@ -53,11 +47,20 @@ rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_l rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") rvb_opts+=(--speech-segments-set-parameters="$data_dir/wav.scp,$data_dir/segments") -whole_data_id=`basename ${whole_data_dir}` +if [ $stage -le 0 ]; then + steps/segmentation/get_data_dir_with_segmented_wav.py \ + $data_dir ${data_dir}_seg +fi + +data_dir=${data_dir}_seg + +data_id=`basename ${data_dir}` -corrupted_data_id=${whole_data_id}_ovlp_corrupted -clean_data_id=${whole_data_id}_ovlp_clean -noise_data_id=${whole_data_id}_ovlp_noise +corrupted_data_id=${data_id}_ovlp_corrupted +clean_data_id=${data_id}_ovlp_clean +noise_data_id=${data_id}_ovlp_noise + +utils/data/get_reco2dur.sh --cmd $cmd --nj 40 $data_dir if [ $stage -le 1 ]; then python steps/data/make_corrupted_data_dir.py \ @@ -67,15 +70,11 @@ if [ $stage -le 1 ]; then --speech-rvb-probability=1 \ --overlapping-speech-addition-probability=1 \ --num-replications=$num_data_reps \ - --min-overlapping-segments-per-minute=5 \ - --max-overlapping-segments-per-minute=20 \ + --min-overlapping-segments-per-minute=1 \ + --max-overlapping-segments-per-minute=1 \ --output-additive-noise-dir=data/${noise_data_id} \ --output-reverb-dir=data/${clean_data_id} \ - data/${whole_data_id} data/${corrupted_data_id} -fi - -if $dry_run; then - exit 0 + ${data_dir} data/${corrupted_data_id} fi clean_data_dir=data/${clean_data_id} @@ -85,9 +84,7 @@ orig_corrupted_data_dir=$corrupted_data_dir if $speed_perturb; then if [ $stage -le 2 ]; then - ## Assuming whole data directories for x in $clean_data_dir $corrupted_data_dir $noise_data_dir; do - cp $x/reco2dur $x/utt2dur utils/data/perturb_data_dir_speed_3way.sh $x ${x}_sp done fi @@ -101,9 +98,9 @@ if $speed_perturb; then noise_data_id=${noise_data_id}_sp if [ $stage -le 3 ]; then - utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 --force true ${corrupted_data_dir} - utils/data/perturb_data_dir_volume.sh --force true --reco2vol ${corrupted_data_dir}/reco2vol ${clean_data_dir} - utils/data/perturb_data_dir_volume.sh --force true --reco2vol ${corrupted_data_dir}/reco2vol ${noise_data_dir} + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 ${corrupted_data_dir} + utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${clean_data_dir} + utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${noise_data_dir} fi fi @@ -124,19 +121,20 @@ if [ $stage -le 4 ]; then utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix corrupted_data_dir=${corrupted_data_dir}_$feat_suffix steps/make_mfcc.sh --mfcc-config $mfcc_config \ - --cmd "$train_cmd" --nj $reco_nj \ + --cmd "$cmd" --nj $nj \ $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir fi +if false; then if [ $stage -le 5 ]; then steps/make_mfcc.sh --mfcc-config $energy_config \ - --cmd "$train_cmd" --nj $reco_nj \ + --cmd "$cmd" --nj $nj \ $clean_data_dir exp/make_log_energy/${clean_data_id} log_energy_feats fi if [ $stage -le 6 ]; then steps/make_mfcc.sh --mfcc-config $energy_config \ - --cmd "$train_cmd" --nj $reco_nj \ + --cmd "$cmd" --nj $nj \ $noise_data_dir exp/make_log_energy/${noise_data_id} log_energy_feats fi @@ -155,11 +153,12 @@ if [ $stage -le 8 ]; then fi steps/segmentation/make_snr_targets.sh \ - --nj $nj --cmd "$train_cmd --max-jobs-run $max_jobs_run" \ + --nj $nj --cmd "$cmd --max-jobs-run $max_jobs_run" \ --target-type Irm --compress true --apply-exp false \ ${clean_data_dir} ${noise_data_dir} ${corrupted_data_dir} \ exp/make_irm_targets/${corrupted_data_id} $targets_dir fi +fi # Combine the VAD from the base recording and the VAD from the overlapping segments # to create per-frame labels of the number of overlapping speech segments @@ -175,15 +174,15 @@ unreliable_data_dir=$overlap_dir/unreliable_data mkdir -p $unreliable_dir if [ $stage -le 8 ]; then - cat $reco_vad_dir/sad_seg.scp | \ + cat $utt_vad_dir/sad_seg.scp | \ steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "ovlp" \ | sort -k1,1 > ${corrupted_data_dir}/sad_seg.scp utils/data/get_utt2num_frames.sh $corrupted_data_dir - utils/split_data.sh --per-reco ${orig_corrupted_data_dir} $reco_nj + utils/split_data.sh ${orig_corrupted_data_dir} $nj - $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_overlap_seg.JOB.log \ + $cmd JOB=1:$nj $overlap_dir/log/get_overlap_seg.JOB.log \ segmentation-init-from-overlap-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ - "scp:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${reco_nj}reco/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" \ + "scp:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${nj}/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" \ ark,t:$orig_corrupted_data_dir/overlapped_segments_info.txt \ scp:$utt_vad_dir/sad_seg.scp ark:- ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \| \ segmentation-copy --keep-label=1 ark:- ark:- \| \ @@ -192,6 +191,8 @@ if [ $stage -le 8 ]; then segmentation-init-from-ali ark:- ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark fi +exit 1 + if [ $stage -le 9 ]; then mkdir -p $overlap_data_dir $unreliable_data_dir cp $orig_corrupted_data_dir/wav.scp $overlap_data_dir @@ -199,21 +200,21 @@ if [ $stage -le 9 ]; then # Create segments where there is definitely an overlap. # Assume no more than 10 speakers overlap. - $train_cmd JOB=1:$reco_nj $overlap_dir/log/process_to_segments.JOB.log \ + $cmd JOB=1:$nj $overlap_dir/log/process_to_segments.JOB.log \ segmentation-post-process --remove-labels=0:1 \ ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark ark:- \| \ segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ segmentation-to-segments ark:- ark:$overlap_data_dir/utt2spk.JOB $overlap_data_dir/segments.JOB - $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_unreliable_segments.JOB.log \ + $cmd JOB=1:$nj $overlap_dir/log/get_unreliable_segments.JOB.log \ segmentation-to-segments --single-speaker \ ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \ ark:$unreliable_data_dir/utt2spk.JOB $unreliable_data_dir/segments.JOB - for n in `seq $reco_nj`; do cat $overlap_data_dir/utt2spk.$n; done > $overlap_data_dir/utt2spk - for n in `seq $reco_nj`; do cat $overlap_data_dir/segments.$n; done > $overlap_data_dir/segments - for n in `seq $reco_nj`; do cat $unreliable_data_dir/utt2spk.$n; done > $unreliable_data_dir/utt2spk - for n in `seq $reco_nj`; do cat $unreliable_data_dir/segments.$n; done > $unreliable_data_dir/segments + for n in `seq $nj`; do cat $overlap_data_dir/utt2spk.$n; done > $overlap_data_dir/utt2spk + for n in `seq $nj`; do cat $overlap_data_dir/segments.$n; done > $overlap_data_dir/segments + for n in `seq $nj`; do cat $unreliable_data_dir/utt2spk.$n; done > $unreliable_data_dir/utt2spk + for n in `seq $nj`; do cat $unreliable_data_dir/segments.$n; done > $unreliable_data_dir/segments utils/fix_data_dir.sh $overlap_data_dir utils/fix_data_dir.sh $unreliable_data_dir @@ -233,9 +234,9 @@ fi overlap_labels_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $overlap_labels_dir ${PWD}` if [ $stage -le 10 ]; then - utils/split_data.sh --per-reco ${overlap_data_dir} $reco_nj + utils/split_data.sh ${overlap_data_dir} $nj - $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_overlap_speech_labels.JOB.log \ + $cmd JOB=1:$nj $overlap_dir/log/get_overlap_speech_labels.JOB.log \ utils/data/get_reco2utt.sh ${overlap_data_dir}/split${reco_nj}reco/JOB '&&' \ segmentation-init-from-segments --shift-to-zero=false \ ${overlap_data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ @@ -260,7 +261,7 @@ if [ $stage -le 11 ]; then # Intersect this with the deriv weights segmentation from above. At this stage # deriv weights is 1 for only the regions where base VAD label is 1 and # the overlapping segment is not unreliable. Convert this to deriv weights. - $train_cmd JOB=1:$reco_nj $unreliable_dir/log/get_deriv_weights.JOB.log\ + $cmd JOB=1:$reco_nj $unreliable_dir/log/get_deriv_weights.JOB.log\ segmentation-init-from-segments --shift-to-zero=false \ "utils/filter_scp.pl -f 2 ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/segments |" ark:- \| \ segmentation-combine-segments-to-recordings ark:- "ark,t:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/reco2utt |" \ From ac7e7166eb5b7f0327339b451c5bfd9ba154d396 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 14 Dec 2016 00:09:02 -0500 Subject: [PATCH 120/530] asr_diarization: Updated AMI segmentation recipe --- egs/aspire/s5/local/segmentation/run_segmentation_ami.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh index 46ebf013b82..98ff4210780 100755 --- a/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh +++ b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh @@ -75,6 +75,7 @@ cat $src_dir/data/sdm1/dev/reco2file_and_channel | \ utils/apply_map.pl -f 3 $dir/channel_map > $dir/reco2file_and_channel if [ $stage -le 5 ]; then + # Reference RTTM where SPEECH frames are obtainted by combining IHM VAD alignments $train_cmd $dir/log/get_ref_rttm.log \ segmentation-combine-segments scp:$dir/sad_seg.scp \ "ark:segmentation-init-from-segments --shift-to-zero=false $src_dir/data/sdm1/dev_ihmdata/segments ark:- |" \ @@ -87,6 +88,7 @@ if [ $stage -le 5 ]; then fi if [ $stage -le 6 ]; then + # Get an UEM which evaluates only on the manual segments. $train_cmd $dir/log/get_uem.log \ segmentation-init-from-segments --shift-to-zero=false $src_dir/data/sdm1/dev/segments ark:- \| \ segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/dev/reco2utt ark:- \| \ From f6b82adabc1e5a93231a344c638bcde3c6d4435c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20S=C3=B8eborg=20Kirkedal?= Date: Thu, 15 Dec 2016 01:13:38 +0100 Subject: [PATCH 121/530] Swedish changes (#1242) * Made the same modifications to sprakbanken as @jtrmal suggested for sprakbanken_swe and removed deprecated commands from run.sh * Modified python scripts called by sprak_data_prep.sh so they work with python 2 and 3 on the request of @jtrmal (I think they are slower now because we use more regexes). Changed the preprocessing so case is not normalised and altered default behaviour to delete sentence-final '.' rather than convert to a token because it is more often the case that they are not spoken aloud. * Modified run.sh and tuned #leaves and #Gauss on dev set for for GMM-based systems. Changed the scoring scripts in local/ to be similar to WSJ to get better analyses and changed the local/wer* scripts to fit this recipe. * Modify the filters in local/wer_* so they remove accents and umlauts, but particular Danish characters. Corrected error in previous commit that changes openfst version tools/Makefile * Added new lexicon from openslr to copy_dict.sh --- egs/sprakbanken/s5/local/copy_dict.sh | 22 +- egs/sprakbanken/s5/local/create_datasets.sh | 2 +- egs/sprakbanken/s5/local/dict_prep.sh | 129 ++------ .../s5/local/norm_dk/format_text.sh | 11 +- .../s5/local/norm_dk/numbersLow.tbl | 265 +++++++++++++++ .../s5/local/normalize_transcript.py | 17 +- .../s5/local/normalize_transcript_prefixed.py | 30 +- egs/sprakbanken/s5/local/score.sh | 124 ++++++- egs/sprakbanken/s5/local/sprak_data_prep.sh | 62 ++-- egs/sprakbanken/s5/local/wer_hyp_filter | 5 + egs/sprakbanken/s5/local/wer_output_filter | 5 + egs/sprakbanken/s5/local/wer_ref_filter | 5 + egs/sprakbanken/s5/local/writenumbers.py | 1 + egs/sprakbanken/s5/run.sh | 313 +++++++----------- 14 files changed, 620 insertions(+), 371 deletions(-) create mode 100644 egs/sprakbanken/s5/local/norm_dk/numbersLow.tbl create mode 100755 egs/sprakbanken/s5/local/wer_hyp_filter create mode 100755 egs/sprakbanken/s5/local/wer_output_filter create mode 100755 egs/sprakbanken/s5/local/wer_ref_filter diff --git a/egs/sprakbanken/s5/local/copy_dict.sh b/egs/sprakbanken/s5/local/copy_dict.sh index c5cd1fc77b4..5ae5e9697b1 100755 --- a/egs/sprakbanken/s5/local/copy_dict.sh +++ b/egs/sprakbanken/s5/local/copy_dict.sh @@ -1,7 +1,8 @@ #!/bin/bash # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) -# Copyright 2014 Mirsk Digital ApS (Author: Andreas Kirkedal) +# Copyright 2014-15 Mirsk Digital ApS (Author: Andreas Kirkedal) +# Copyright 2016 Andreas Kirkedal # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +17,7 @@ # See the Apache 2 License for the specific language governing permissions and # limitations under the License. -KALDI_ROOT=$(pwd)/../../.. - -exproot=$(pwd) +lex=lexicon-da-nonorm.tgz dir=data/local/dict mkdir -p $dir @@ -31,22 +30,13 @@ cp local/dictsrc/complexphones.txt $dir/nonsilence_phones.txt cp local/dictsrc/extra_questions.txt $dir/extra_questions.txt # Copy pre-made lexicon -wget http://www.openslr.org/resources/8/lexicon-da.tgz --directory-prefix=data/local/data/download -tar -xzf data/local/data/download/lexicon-da.tgz -C $dir +wget http://www.openslr.org/resources/8/$lex --directory-prefix=data/local/data/download +tar -xzf data/local/data/download/$lex -C $dir # silence phones, one per line. -echo SIL > $dir/silence_phones.txt +echo -e "SIL\nSPN" > $dir/silence_phones.txt echo SIL > $dir/optional_silence.txt - - - - -wait - - -## TODO: add cleanup commands - echo "Dictionary preparation succeeded" diff --git a/egs/sprakbanken/s5/local/create_datasets.sh b/egs/sprakbanken/s5/local/create_datasets.sh index b0d87a730e8..891771dbce1 100755 --- a/egs/sprakbanken/s5/local/create_datasets.sh +++ b/egs/sprakbanken/s5/local/create_datasets.sh @@ -24,7 +24,7 @@ fi src=$1 dest=$2 mkdir $dest -python3 local/normalize_transcript_prefixed.py local/norm_dk/numbersUp.tbl $src/text.unnormalised $src/onlyids $src/transcripts.am +python local/normalize_transcript_prefixed.py local/norm_dk/numbersLow.tbl $src/text.unnormalised $src/onlyids $src/transcripts.am local/norm_dk/format_text.sh am $src/transcripts.am > $src/onlytext paste -d ' ' $src/onlyids $src/onlytext > $dest/text for f in wav.scp utt2spk; do diff --git a/egs/sprakbanken/s5/local/dict_prep.sh b/egs/sprakbanken/s5/local/dict_prep.sh index 8ecfa028408..1e37460dbe5 100755 --- a/egs/sprakbanken/s5/local/dict_prep.sh +++ b/egs/sprakbanken/s5/local/dict_prep.sh @@ -2,6 +2,7 @@ # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) # Copyright 2014 Mirsk Digital ApS (Author: Andreas Kirkedal) +# Copyright 2014-2016 Andreas Kirkedal5D # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,57 +20,24 @@ KALDI_ROOT=$(pwd)/../../.. exproot=$(pwd) -dir=data/local/dict +lmdir=data/local/transcript_lm +dictsrc=data/local/dictsrc +dictdir=data/local/dict espeakdir='espeak-1.48.04-source' -mkdir -p $dir +mkdir -p $dictsrc $dictdir # Dictionary preparation: - -# Normalise transcripts and create a transcript file -# Removes '.,:;?' and removes '\' before '\Komma' (dictated ',') -# outputs a normalised transcript without utterance ids and a list of utterance ids -echo "Normalising" - -# Create dir to hold lm files and other non-standard files, useful for debugging -trainsrc=data/local/trainsrc -rm -rf $trainsrc -mkdir $trainsrc -mv data/train/text1 $trainsrc/text1 -python3 local/normalize_transcript_prefixed.py local/norm_dk/numbersUp.tbl $trainsrc/text1 $trainsrc/onlyids $dir/transcripts.tmp - -# Additional normalisation, uppercasing, writing numbers etc. -# and recombine with -local/norm_dk/format_text.sh am $dir/transcripts.tmp > $dir/transcripts.am -cp $dir/transcripts.am $trainsrc/onlytext -paste $trainsrc/onlyids $trainsrc/onlytext > data/train/text -utils/validate_data_dir.sh --no-feat data/train || exit 1; - - - -# lmsents is output by sprak_data_prep.sh and contains -# sentences that are disjoint from the test and dev set -python3 local/normalize_transcript.py local/norm_dk/numbersUp.tbl data/local/data/lmsents $dir/lmsents.norm -wait - # Create wordlist from the AM transcripts -cat $dir/transcripts.am | tr [:blank:] '\n' | sort -u > $dir/wlist.txt & - -# Because training data is read aloud, there are many occurences of the same -# sentence and bias towards the domain. Make a version where -# the sentences are unique to reduce bias. -local/norm_dk/format_text.sh lm $dir/lmsents.norm > $dir/transcripts.txt -sort -u $dir/transcripts.txt > $dir/transcripts.uniq - +cat $lmdir/transcripts.uniq | tr [:blank:] '\n' | sort -u > $dictsrc/wlist.txt & # Install eSpeak if it is not installed already - if hash espeak 2>/dev/null; - then +then echo 'eSpeak installed' - else - cd $KALDI_ROOT/tools || exit 1; +else + cd $KALDI_ROOT/tools || exit 1; wget http://sourceforge.net/projects/espeak/files/espeak/espeak-1.48/${espeakdir}.zip wait unzip -q $espeakdir.zip @@ -81,87 +49,60 @@ if hash espeak 2>/dev/null; cd $exproot || exit 1; fi - - # Wait for the wordlist to be fully created -wait - +wait # Run wordlist through espeak to get phonetics # improvised parallelisation - simple call because 'split' often has different versions -split -l 10000 $dir/wlist.txt $dir/Wtemp_ -for w in $dir/Wtemp_*; do - (cat $w | espeak -q -vda -x > $w.pho) & +split -l 10000 $dictsrc/wlist.txt $dictsrc/Wtemp_ +for w in $dictsrc/Wtemp_*; do + (cat $w | espeak -q -vda -x > $w.pho) & done wait -cat $dir/Wtemp_*.pho > $dir/plist.txt -rm -f $dir/Wtemp_* +cat $dictsrc/Wtemp_*.pho > $dictsrc/plist.txt +rm -f $dictsrc/Wtemp_* # Filter transcription -# Remove diacritics, language annotation ((da), (en), (fr) etc.), insert space between symbols, remove +# Remove diacritics, language annotation ((da), (en), (fr) etc.), insert space between symbols, remove # initial and trailing spaces and collapse 2 or more spaces to one space -cat $dir/plist.txt | perl -pe 's/\([[a-z]{2}\)//g' | perl -pe 's// /g' | perl -pe 's/ a I / aI /g' | perl -pe 's/ d Z / dZ /g' | perl -pe 's/ \? / /g' | perl -pe 's/ ([\#]) /\+ /g' | perl -pe 's/([\@n3]) \- /\1\- /g' | perl -pe "s/[\_\:\!\'\,\|2]//g" | perl -pe 's/ \- / /g' | tr -s ' ' | perl -pe 's/^ +| +$//g' > $dir/plist2.txt +cat $dictsrc/plist.txt | perl -pe 's/\([[a-z]{2}\)//g' | perl -pe 's// /g' | perl -pe 's/ a I / aI /g' | perl -pe 's/ d Z / dZ /g' | perl -pe 's/ \? / /g' | perl -pe 's/ ([\#]) /\+ /g' | perl -pe 's/([\@n3]) \- /\1\- /g' | perl -pe "s/[\_\:\!\'\,\|2]//g" | perl -pe 's/ \- / /g' | tr -s ' ' | perl -pe 's/^ +| +$//g' > $dictsrc/plist2.txt #Some question marks are not caught above -perl -pe 's/ \? / /g' $dir/plist2.txt > $dir/plist3.txt +perl -pe 's/ \? / /g' $dictsrc/plist2.txt > $dictsrc/plist3.txt # Create lexicon.txt and put it in data/local/dict -paste $dir/wlist.txt $dir/plist3.txt > $dir/lexicon1.txt +paste $dictsrc/wlist.txt $dictsrc/plist3.txt > $dictsrc/lexicon1.txt # Remove entries without transcription -grep -P "^.+\t.+$" $dir/lexicon1.txt > $dir/lexicon2.txt +grep -P "^.+\t.+$" $dictsrc/lexicon1.txt > $dictsrc/lexicon2.txt # Copy pre-made phone table with -cp local/dictsrc/complexphones.txt $dir/nonsilence_phones.txt +cp local/dictsrc/complexphones.txt $dictdir/nonsilence_phones.txt # Add "!SIL SIL" to lexicon.txt -echo -e '!SIL\tSIL' > $dir/lex_first -echo -e '\tSPN' >> $dir/lex_first -cat $dir/lexicon2.txt >> $dir/lex_first -mv $dir/lex_first $dir/lexicon.txt +echo -e '!SIL\tSIL' > $dictsrc/lex_first +echo -e '\tSPN' >> $dictsrc/lex_first +cat $dictsrc/lexicon2.txt >> $dictsrc/lex_first +mv $dictsrc/lex_first $dictdir/lexicon.txt # silence phones, one per line. -echo SIL > $dir/silence_phones.txt -echo SIL > $dir/optional_silence.txt - -touch $dir/extra_questions.txt - -# Repeat text preparation on test set, but do not add to dictionary -# Create dir to hold lm files and other non-standard files -testsrc=data/local/testsrc -rm -rf $testsrc -mkdir $testsrc -mv data/test/text1 $testsrc/text1 -python3 local/normalize_transcript_prefixed.py local/norm_dk/numbersUp.tbl $testsrc/text1 $testsrc/onlyids $testsrc/transcripts.am -local/norm_dk/format_text.sh am $testsrc/transcripts.am > $testsrc/onlytext -paste $testsrc/onlyids $testsrc/onlytext > data/test/text -utils/validate_data_dir.sh --no-feat data/test || exit 1; - -# Repeat text preparation on dev set, but do not add to dictionary -# Create dir to hold lm files and other non-standard files -devsrc=data/local/devsrc -rm -rf $devsrc -mkdir $devsrc -mv data/dev/text1 $devsrc/text1 -python3 local/normalize_transcript_prefixed.py local/norm_dk/numbersUp.tbl $devsrc/text1 $devsrc/onlyids $devsrc/transcripts.tmp -local/norm_dk/format_text.sh am $devsrc/transcripts.tmp > $devsrc/onlytext -paste $devsrc/onlyids $devsrc/onlytext > data/dev/text & - -# Also create a file that can be used for reranking using text features -local/norm_dk/format_text.sh lm $devsrc/transcripts.tmp > data/dev/transcripts.txt -sort -u data/dev/transcripts.txt > data/dev/transcripts.uniq - - -utils/validate_data_dir.sh --no-feat data/dev || exit 1; +if [ ! -f $dictdir/silence_phones.txt ]; then + echo SIL > $dictdir/silence_phones.txt +fi +if [ ! -f $dictdir/optional_silence.txt ]; then + echo SIL > $dictdir/optional_silence.txt +fi -## TODO: add cleanup commands +if [ ! -f $dictdir/extra_questions.txt ]; then + touch $dictdir/extra_questions.txt +fi -echo "Normalisation and dictionary preparation succeeded" +echo "Dictionary preparation succeeded" diff --git a/egs/sprakbanken/s5/local/norm_dk/format_text.sh b/egs/sprakbanken/s5/local/norm_dk/format_text.sh index ff85c8cc0ef..abbf975dbdf 100755 --- a/egs/sprakbanken/s5/local/norm_dk/format_text.sh +++ b/egs/sprakbanken/s5/local/norm_dk/format_text.sh @@ -34,8 +34,8 @@ nonum=$tmp/nonum.tmp cat $2 | tr -d '\r' > $src -$dir/expand_abbr_medical.sh $src > $abbr; -$dir/remove_annotation.sh $abbr > $rem; +#$dir/expand_abbr_medical.sh $src > $abbr; +$dir/remove_annotation.sh $src > $rem; if [ $mode != "am" ]; then $dir/sent_split.sh $rem > $line; else @@ -45,10 +45,11 @@ fi $dir/expand_dates.sh $line |\ $dir/format_punct.sh > $num; #python3 $dir/writenumbers.py $dir/numbersUp.tbl $num $nonum; -cat $num | $dir/write_punct.sh | \ +# $dir/write_punct.sh | \ +cat $num | \ perl -pi -e "s/^\n//" | \ -perl -pe 's/ (.{4}.*?)\./ \1/g' | \ -PERLIO=:utf8 perl -pe '$_=uc' +perl -pe 's/ (.{4}.*?)\./ \1/g' +# | PERLIO=:utf8 perl -pe '$_=lc' # Comment this line for debugging wait diff --git a/egs/sprakbanken/s5/local/norm_dk/numbersLow.tbl b/egs/sprakbanken/s5/local/norm_dk/numbersLow.tbl new file mode 100644 index 00000000000..824c0afa3b2 --- /dev/null +++ b/egs/sprakbanken/s5/local/norm_dk/numbersLow.tbl @@ -0,0 +1,265 @@ +¼ en fjerdedel +½ en halv +0 nul +² i anden +enogfirs en og firs +enogfyrre en og fyrre +enoghalvfems en og halvfems +enoghalvfjerds en og halvfjerds +enoghalvtreds en og halvtreds +enogtredive en og tredive +enogtredivte en og tredivte +enogtres en og tres +enogtyvende en og tyvende +femogfirs fem og firs +femogfyrre fem og fyrre +femoghalvfems fem og halvfems +femoghalvfjerds fem og halvfjerds +femoghalvtreds fem og halvtreds +femogtredive fem og tredive +femogtres fem og tres +femogtyve fem og tyve +femogtyvende fem og tyvende +fireogfirs fire og firs +fireogfyrre fire og fyrre +fireoghalvfems fire og halvfems +fireoghalvfjerds fire og halvfjerds +fireoghalvtreds fire og halvtreds +fireogtredive fire og tredive +fireogtres fire og tres +fireogtyve fire og tyve +fireogtyvende fire og tyvende +fyrreogtyvende fyrre og tyvende +niogfirs ni og firs +niogfyrre ni og fyrre +nioghalvfems ni og halvfems +nioghalvfjerds ni og halvfjerds +nioghalvtreds ni og halvtreds +niogtredive ni og tredive +niogtres ni og tres +niogtyvende ni og tyvende +niogtyve ni og tyve +otteogfirs otte og firs +otteogfyrre otte og fyrre +otteoghalvfems otte og halvfems +otteoghalvfjerds otte og halvfjerds +otteoghalvtreds otte og halvtreds +otteogtredive otte og tredive +otteogtres otte og tres +otteogtyvende otte og tyvende +otteogtyve otte og tyve +seksogfirs seks og firs +seksogfyrre seks og fyrre +seksoghalvfems seks og halvfems +seksoghalvfjerds seks og halvfjerds +seksoghalvtreds seks og halvtreds +seksogtredive seks og tredive +seksogtres seks og tres +seksogtyvende seks og tyvende +seksogtyve seks og tyve +syvogfirs syv og firs +syvogfyrre syv og fyrre +syvoghalvfems syv og halvfems +syvoghalvfjerds syv og halvfjerds +syvoghalvtreds syv og halvtreds +syvogtredive syv og tredive +syvogtres syv og tres +syvogtyvende syv og tyvende +syvogtyve syv og tyve +toogfirs to og firs +toogfyrre to og fyrre +tooghalvfems to og halvfems +tooghalvfjerds to og halvfjerds +tooghalvtreds to og halvtreds +toogtredive to og tredive +toogtres to og tres +toogtyvende to og tyvende +toogtyve to og tyve +totusindogatten to tusind og atten +totusindogelleve to tusind og elleve +totusindoget to tusind og et +totusindogfemten to tusind og femten +totusindogfem to tusind og fem +totusindogfire to tusind og fire +totusindogfjorten to tusind og fjorten +totusindogni to tusind og ni +totusindognitten to tusind og nitten +totusindogotte to tusind og otte +totusindogseksten to tusind og seksten +totusindogseks to tusind og seks +totusindogsytten to tusind og sytten +totusindogsyv to tusind og syv +totusindogti to tusind og ti +totusindogtolv to tusind og tolv +totusindogto to tusind og to +totusindogtre to tusind og tre +totusindogtretten to tusind og tretten +totusindogtyve to tusind og tyve +treogfirs tre og firs +treogfyrre tre og fyrre +treoghalvfems tre og halvfems +treoghalvfjerds tre og halvfjerds +treoghalvtreds tre og halvtreds +treogtredive tre og tredive +treogtres tre og tres +treogtyvende tre og tyvende +treogtyve tre og tyve +1 en +1. første +2. anden +2 to +3 tre +3. tredje +4 fire +4. fjerde +5 fem +5. femte +6 seks +6. sjette +7 syv +7. syvende +8 otte +8. ottende +9 ni +9. niende +10 ti +10. tiende +11 elleve +11. ellevte +12 tolv +12. tolvte +13 tretten +13. trettende +14 fjorten +14. fjortende +15 femten +15. femtende +16 seksten +16. sekstende +17 sytten +17. syttende +18 atten +18. attende +19 nitten +19. nittende +20 tyve +20. tyvende +21 en og tyve +21. en og tyvende +22 to og tyve +22. to og tyvende +23 tre og tyve +23. tre og tyvende +24 fire og tyve +24. fire og tyvende +25 fem og tyve +25. fem og tyvende +26 seks og tyve +26. seks og tyvende +27 syv og tyve +27. syv og tyvende +28 otte og tyve +28. otte og tyvende +29 ni og tyve +29. ni og tyvende +30 tredive +30. tredivte +31 en og tredive +31. en og tredivte +32 to og tredive +33 tre og tredive +34 fire og tredive +35 fem og tredive +36 seks og tredive +37 syv og tredive +38 otte og tredive +39 ni og tredive +40 fyrre +40. fyrre og tyvende +41 en og fyrre +42 to og fyrre +43 tre og fyrre +44 fire og fyrre +45 fem og fyrre +46 seks og fyrre +47 syv og fyrre +48 otte og fyrre +49 ni og fyrre +50 halvtreds +50. halvtredsinds tyvende +51 en og halvtreds +52 to og halvtreds +53 tre og halvtreds +54 fire og halvtreds +55 fem og halvtreds +56 seks og halvtreds +57 syv og halvtreds +58 otte og halvtreds +59 ni og halvtreds +60 tres +60. tresinds tyvende +61 en og tres +62 to og tres +63 tre og tres +64 fire og tres +65 fem og tres +66 seks og tres +67 syv og tres +68 otte og tres +69 ni og tres +70 halvfjerds +70. halvfjerdsinds tyvende +71 en og halvfjerds +72 to og halvfjerds +73 tre og halvfjerds +74 fire og halvfjerds +75 fem og halvfjerds +76 seks og halvfjerds +77 syv og halvfjerds +78 otte og halvfjerds +79 ni og halvfjerds +80 firs +80. firsindstyvende +81 en og firs +82 to og firs +83 tre og firs +84 fire og firs +85 fem og firs +86 seks og firs +87 syv og firs +88 otte og firs +89 ni og firs +90 halvfems +90. halvfemsinds tyvende enogtyve en og tyve +91 en og halvfems +92 to og halvfems +93 tre og halvfems +94 fire og halvfems +95 fem og halvfems +96 seks og halvfems +97 syv og halvfems +98 otte og halvfems +99 ni og halvfems +100 hundrede +1000 tusind +2000 to tusind +2001 to tusind og et +2002 to tusind og to +2003 to tusind og tre +2004 to tusind og fire +2005 to tusind og fem +2006 to tusind og seks +2007 to tusind og syv +2008 to tusind og otte +2009 to tusind og ni +2010 to tusind og ti +2011 to tusind og elleve +2012 to tusind og tolv +2013 to tusind og tretten +2014 to tusind og fjorten +2015 to tusind og femten +2016 to tusind og seksten +2017 to tusind og sytten +2018 to tusind og atten +2019 to tusind og nitten +2020 to tusind og tyve diff --git a/egs/sprakbanken/s5/local/normalize_transcript.py b/egs/sprakbanken/s5/local/normalize_transcript.py index f759a39731d..2374418bee7 100755 --- a/egs/sprakbanken/s5/local/normalize_transcript.py +++ b/egs/sprakbanken/s5/local/normalize_transcript.py @@ -1,9 +1,10 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- import codecs import sys import re import writenumbers - +from string import maketrans ## Global vars @@ -16,7 +17,10 @@ "\t": " " } -t_table = str.maketrans(normdict) +from_chars = ''.join(normdict.keys()) +to_chars = ''.join(normdict.values()) + +#t_table = maketrans(from_chars, to_chars) ## Main @@ -27,10 +31,11 @@ for line in transcript: - normtext1 = line.translate(t_table) - normtext2 = re.sub(r' +', ' ', normtext1.strip()) - normtext3 = writenumbers.normNumber(normtext2, numtable) - outtext.write(normtext3.upper() + "\n") + normtext1 = re.sub(r'[\.,:;\?]', '', line) + normtext2 = re.sub(r'[\t\\]', ' ', normtext1) + normtext3 = re.sub(r' +', ' ', normtext2.strip()) + normtext4 = writenumbers.normNumber(normtext3, numtable) + outtext.write(normtext4) transcript.close() outtext.close() diff --git a/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py b/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py index e934533a393..557606ae205 100755 --- a/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py +++ b/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py @@ -1,6 +1,7 @@ #!/usr/bin/env python ''' # Copyright 2013-2014 Mirsk Digital Aps (Author: Andreas Kirkedal) +# Copyright 2014-2016 Andreas Kirkedal # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,16 +25,16 @@ ## Global vars -normdict = {",": " ", - ":": " ", - ";": " ", - "?": " ", - "\\": " ", - "\t": " ", - #".": "" - } +# normdict = {",": " ", +# ":": " ", +# ";": " ", +# "?": " ", +# "\\": " ", +# "\t": " ", +# #".": "" +# } -t_table = str.maketrans(normdict) +# t_table = str.maketrans(normdict) ## Utility function @@ -51,12 +52,13 @@ def getuttid_text(line): for line in textin: utt_id, text = getuttid_text(line) - normtext1 = text.translate(t_table) - normtext2 = re.sub(r' +', ' ', normtext1.strip()) - normtext3 = writenumbers.normNumber(normtext2, numtable) - + normtext1 = re.sub(r'[\.,:;\?]', '', text) + normtext2 = re.sub(r'[\t\\]', ' ', normtext1) + normtext3 = re.sub(r' +', ' ', normtext2.strip()) + normtext4 = writenumbers.normNumber(normtext3, numtable) + outtext.write(normtext4) fid.write(utt_id + "\n") - outtext.write(normtext3) + textin.close() outtext.close() diff --git a/egs/sprakbanken/s5/local/score.sh b/egs/sprakbanken/s5/local/score.sh index abd8149a672..9fcafdc0b5c 100755 --- a/egs/sprakbanken/s5/local/score.sh +++ b/egs/sprakbanken/s5/local/score.sh @@ -1,18 +1,24 @@ #!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey, Yenda Trmal) # Apache 2.0 +# See the script steps/scoring/score_kaldi_cer.sh in case you need to evalutate CER + [ -f ./path.sh ] && . ./path.sh # begin configuration section. cmd=run.pl stage=0 -decode_mbr=true -word_ins_penalty=0.0 +decode_mbr=false +stats=true +beam=6 +word_ins_penalty=0.0,0.5,1.0 min_lmwt=7 max_lmwt=17 +iter=final #end configuration section. +echo "$0 $@" # Print the command line for logging [ -f ./path.sh ] && . ./path.sh . parse_options.sh || exit 1; @@ -37,21 +43,107 @@ for f in $symtab $dir/lat.1.gz $data/text; do [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; done -mkdir -p $dir/scoring/log -cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt +ref_filtering_cmd="cat" +[ -x local/wer_output_filter ] && ref_filtering_cmd="local/wer_output_filter" +[ -x local/wer_ref_filter ] && ref_filtering_cmd="local/wer_ref_filter" +hyp_filtering_cmd="cat" +[ -x local/wer_output_filter ] && hyp_filtering_cmd="local/wer_output_filter" +[ -x local/wer_hyp_filter ] && hyp_filtering_cmd="local/wer_hyp_filter" + + +if $decode_mbr ; then + echo "$0: scoring with MBR, word insertion penalty=$word_ins_penalty" +else + echo "$0: scoring with word insertion penalty=$word_ins_penalty" +fi + + +mkdir -p $dir/scoring_kaldi +cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1; +if [ $stage -le 0 ]; then + + for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + mkdir -p $dir/scoring_kaldi/penalty_$wip/log + + if $decode_mbr ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \ + acwt=\`perl -e \"print 1.0/LMWT\"\`\; \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-mbr-decode --word-symbol-table=$symtab \ + ark:- ark,t:- \| \ + utils/int2sym.pl -f 2- $symtab \| \ + $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1; + + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab ark:- ark,t:- \| \ + utils/int2sym.pl -f 2- $symtab \| \ + $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1; + fi + + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \ + cat $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring_kaldi/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; + + done +fi -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ - lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ - lattice-best-path --word-symbol-table=$symtab \ - ark:- ark,t:$dir/scoring/LMWT.tra || exit 1; -# Note: the double level of quoting for the sed command -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cat $dir/scoring/LMWT.tra \| \ - utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ - compute-wer --text --mode=present \ - ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; + +if [ $stage -le 1 ]; then + + for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + for lmwt in $(seq $min_lmwt $max_lmwt); do + # adding /dev/null to the command list below forces grep to output the filename + grep WER $dir/wer_${lmwt}_${wip} /dev/null + done + done | utils/best_wer.sh >& $dir/scoring_kaldi/best_wer || exit 1 + + best_wer_file=$(awk '{print $NF}' $dir/scoring_kaldi/best_wer) + best_wip=$(echo $best_wer_file | awk -F_ '{print $NF}') + best_lmwt=$(echo $best_wer_file | awk -F_ '{N=NF-1; print $N}') + + if [ -z "$best_lmwt" ]; then + echo "$0: we could not get the details of the best WER from the file $dir/wer_*. Probably something went wrong." + exit 1; + fi + + if $stats; then + mkdir -p $dir/scoring_kaldi/wer_details + echo $best_lmwt > $dir/scoring_kaldi/wer_details/lmwt # record best language model weight + echo $best_wip > $dir/scoring_kaldi/wer_details/wip # record best word insertion penalty + + $cmd $dir/scoring_kaldi/log/stats1.log \ + cat $dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \| \ + align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt.txt ark:- ark,t:- \| \ + utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/wer_details/per_utt \|\ + utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/wer_details/per_spk || exit 1; + + $cmd $dir/scoring_kaldi/log/stats2.log \ + cat $dir/scoring_kaldi/wer_details/per_utt \| \ + utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \ + sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $dir/scoring_kaldi/wer_details/ops || exit 1; + + $cmd $dir/scoring_kaldi/log/wer_bootci.log \ + compute-wer-bootci --mode=present \ + ark:$dir/scoring_kaldi/test_filt.txt ark:$dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \ + '>' $dir/scoring_kaldi/wer_details/wer_bootci || exit 1; + + fi +fi + +# If we got here, the scoring was successful. +# As a small aid to prevent confusion, we remove all wer_{?,??} files; +# these originate from the previous version of the scoring files +# i keep both statement here because it could lead to confusion about +# the capabilities of the script (we don't do cer in the script) +rm $dir/wer_{?,??} 2>/dev/null +rm $dir/cer_{?,??} 2>/dev/null exit 0; diff --git a/egs/sprakbanken/s5/local/sprak_data_prep.sh b/egs/sprakbanken/s5/local/sprak_data_prep.sh index c7a1d048a4f..1b2406620f2 100755 --- a/egs/sprakbanken/s5/local/sprak_data_prep.sh +++ b/egs/sprakbanken/s5/local/sprak_data_prep.sh @@ -2,6 +2,7 @@ # Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) # Copyright 2013-2014 Mirsk Digital Aps (Author: Andreas Kirkedal) +# Copyright 2015-2016 Andreas Kirkedal # Apache 2.0. @@ -21,12 +22,12 @@ utils=`pwd`/utils # This recipe currently relies on version 3 because python3 uses utf8 as internal # string representation -if ! which python3 >&/dev/null; then - echo "Installing python3 since not on your path." - pushd $KALDI_ROOT/tools || exit 1; - extras/install_python3.sh || exit 1; - popd -fi +#if ! which python3 >&/dev/null; then +# echo "Installing python3 since not on your path." +# pushd $KALDI_ROOT/tools || exit 1; +# extras/install_python3.sh || exit 1; +# popd +#fi if [ ! -d $dir/download ]; then mkdir -p $dir/download/0565-1 $dir/download/0565-2 @@ -35,15 +36,15 @@ fi echo "Downloading and unpacking sprakbanken to $dir/corpus_processed. This will take a while." if [ ! -f $dir/download/da.16kHz.0565-1.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-1.tar.gz --directory-prefix=$dir/download ) & + ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-1.tar.gz --directory-prefix=$dir/download ) fi if [ ! -f $dir/download/da.16kHz.0565-2.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-2.tar.gz --directory-prefix=$dir/download ) & + ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-2.tar.gz --directory-prefix=$dir/download ) fi -if [ ! -f $dir/download/da.16kHz.0565-1.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0611.tar.gz --directory-prefix=$dir/download ) & +if [ ! -f $dir/download/da.16kHz.0611.tar.gz ]; then + ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0611.tar.gz --directory-prefix=$dir/download ) fi wait @@ -51,8 +52,8 @@ echo "Corpus files downloaded." if [ ! -d $dir/download/0611 ]; then echo "Unpacking files." - tar -xzf $dir/download/da.16kHz.0565-1.tar.gz -C $dir/download/0565-1 & - tar -xzf $dir/download/da.16kHz.0565-2.tar.gz -C $dir/download/0565-2 & + tar -xzf $dir/download/da.16kHz.0565-1.tar.gz -C $dir/download/0565-1 + tar -xzf $dir/download/da.16kHz.0565-2.tar.gz -C $dir/download/0565-2 tar -xzf $dir/download/da.16kHz.0611.tar.gz -C $dir/download # Note: rename "da 0611 test" to "da_0611_test" for this to work @@ -62,7 +63,7 @@ if [ ! -d $dir/download/0611 ]; then fi -sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe +sph2pipe=$(which sph2pipe) || sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe if [ ! -x $sph2pipe ]; then echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; exit 1; @@ -78,27 +79,25 @@ mkdir -p $dir/corpus_processed/training/0565-1 $dir/corpus_processed/training/05 # Create parallel file lists and text files, but keep sound files in the same location to save disk space # Writes the lists to data/local/data (~ 310h) echo "Creating parallel data for training data." -python3 $local/sprak2kaldi.py $dir/download/0565-1 $dir/corpus_processed/training/0565-1 & # ~130h -python3 $local/sprak2kaldi.py $dir/download/0565-2 $dir/corpus_processed/training/0565-2 & # ~115h -python3 $local/sprak2kaldi.py $dir/download/0611/Stasjon05 $dir/corpus_processed/training/0611_Stasjon05 & # ~51h +python $local/sprak2kaldi.py $dir/download/0565-1 $dir/corpus_processed/training/0565-1 # ~130h +python $local/sprak2kaldi.py $dir/download/0565-2 $dir/corpus_processed/training/0565-2 # ~115h +python $local/sprak2kaldi.py $dir/download/0611/Stasjon05 $dir/corpus_processed/training/0611_Stasjon05 # ~51h ( # Ditto dev set (~ 16h) echo "Creating parallel data for test data." rm -rf $dir/corpus_processed/dev03 mkdir -p $dir/corpus_processed/dev03 - python3 $local/sprak2kaldi.py $dir/download/0611/Stasjon03 $dir/corpus_processed/dev03 & -) & + python $local/sprak2kaldi.py $dir/download/0611/Stasjon03 $dir/corpus_processed/dev03 || exit 1; +) ( # Ditto test set (about 9 hours) echo "Creating parallel data for development data." rm -rf $dir/corpus_processed/test06 mkdir -p $dir/corpus_processed/test06 - python3 $local/sprak2kaldi.py $dir/download/0611/Stasjon06 $dir/corpus_processed/test06 || exit 1; -) & - -wait + python $local/sprak2kaldi.py $dir/download/0611/Stasjon06 $dir/corpus_processed/test06 || exit 1; +) # Create the LM training data # Test and dev data is disjoint from training data, so we use those transcripts) @@ -110,10 +109,10 @@ wait ( echo "Writing the LM text to file and normalising." cat $dir/corpus_processed/training/0565-1/txtlist $dir/corpus_processed/training/0565-2/txtlist | while read l; do cat $l; done > $lmdir/lmsents - python3 local/normalize_transcript.py local/norm_dk/numbersUp.tbl $lmdir/lmsents $lmdir/lmsents.norm + python local/normalize_transcript.py local/norm_dk/numbersLow.tbl $lmdir/lmsents $lmdir/lmsents.norm local/norm_dk/format_text.sh lm $lmdir/lmsents.norm > $lmdir/transcripts.txt sort -u $lmdir/transcripts.txt > $lmdir/transcripts.uniq -) & +) # Combine training file lists echo "Combine file lists." @@ -131,18 +130,15 @@ cp $dir/corpus_processed/test06/sndlist $dir/testsndfiles # Write wav.scp, utt2spk and text.unnormalised for train, test and dev sets with # Use sph2pipe because the wav files are actually sph files echo "Creating wav.scp, utt2spk and text.unnormalised for train, test and dev" -python3 $local/data_prep.py $dir/traintxtfiles $traindir $dir/trainsndfiles $sph2pipe & -python3 $local/data_prep.py $dir/testtxtfiles $testdir $dir/testsndfiles $sph2pipe & -python3 $local/data_prep.py $dir/devtxtfiles $devdir $dir/devsndfiles $sph2pipe & +python $local/data_prep.py $dir/traintxtfiles $traindir $dir/trainsndfiles $sph2pipe +python $local/data_prep.py $dir/testtxtfiles $testdir $dir/testsndfiles $sph2pipe +python $local/data_prep.py $dir/devtxtfiles $devdir $dir/devsndfiles $sph2pipe -wait # Create the main data sets -local/create_datasets.sh $testdir data/test & -local/create_datasets.sh $devdir data/dev & -local/create_datasets.sh $traindir data/train & - -wait +local/create_datasets.sh $testdir data/test +local/create_datasets.sh $devdir data/dev +local/create_datasets.sh $traindir data/train ## TODO diff --git a/egs/sprakbanken/s5/local/wer_hyp_filter b/egs/sprakbanken/s5/local/wer_hyp_filter new file mode 100755 index 00000000000..8ecbdd3ec04 --- /dev/null +++ b/egs/sprakbanken/s5/local/wer_hyp_filter @@ -0,0 +1,5 @@ +#!/bin/bash + +perl -C -pe 's:::g; s:::g; s:::g' | \ +perl -pe 's/é|è|ë/e/g; s/á|à|ä/a/g; s/ó|ò|ö/o/g; s/ú|ù|ü/u/g; s/É|È|Ë/E/g; s/Ó|Ò|Ö/O/g;' | \ +PERLIO=:utf8 perl -pe '$_=lc' diff --git a/egs/sprakbanken/s5/local/wer_output_filter b/egs/sprakbanken/s5/local/wer_output_filter new file mode 100755 index 00000000000..8ecbdd3ec04 --- /dev/null +++ b/egs/sprakbanken/s5/local/wer_output_filter @@ -0,0 +1,5 @@ +#!/bin/bash + +perl -C -pe 's:::g; s:::g; s:::g' | \ +perl -pe 's/é|è|ë/e/g; s/á|à|ä/a/g; s/ó|ò|ö/o/g; s/ú|ù|ü/u/g; s/É|È|Ë/E/g; s/Ó|Ò|Ö/O/g;' | \ +PERLIO=:utf8 perl -pe '$_=lc' diff --git a/egs/sprakbanken/s5/local/wer_ref_filter b/egs/sprakbanken/s5/local/wer_ref_filter new file mode 100755 index 00000000000..8ecbdd3ec04 --- /dev/null +++ b/egs/sprakbanken/s5/local/wer_ref_filter @@ -0,0 +1,5 @@ +#!/bin/bash + +perl -C -pe 's:::g; s:::g; s:::g' | \ +perl -pe 's/é|è|ë/e/g; s/á|à|ä/a/g; s/ó|ò|ö/o/g; s/ú|ù|ü/u/g; s/É|È|Ë/E/g; s/Ó|Ò|Ö/O/g;' | \ +PERLIO=:utf8 perl -pe '$_=lc' diff --git a/egs/sprakbanken/s5/local/writenumbers.py b/egs/sprakbanken/s5/local/writenumbers.py index 452cd3e7e9c..df3235243d4 100755 --- a/egs/sprakbanken/s5/local/writenumbers.py +++ b/egs/sprakbanken/s5/local/writenumbers.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- ''' # Copyright 2014 Author: Andreas Kirkedal diff --git a/egs/sprakbanken/s5/run.sh b/egs/sprakbanken/s5/run.sh index 34c1f18d964..53fd7b1484e 100755 --- a/egs/sprakbanken/s5/run.sh +++ b/egs/sprakbanken/s5/run.sh @@ -4,198 +4,139 @@ ## This relates to the queue. . ./path.sh # so python3 is on the path if not on the system (we made a link to utils/).a -# This is a shell script, but it's recommended that you run the commands one by -# one by copying and pasting into the shell. - - -# Download the corpus and prepare parallel lists of sound files and text files -# Divide the corpus into train, dev and test sets -local/sprak_data_prep.sh || exit 1; - -# Perform text normalisation, prepare dict folder and LM data transcriptions -# This setup uses previsously prepared data. eSpeak must be installed and in PATH to use dict_prep.sh -#local/dict_prep.sh || exit 1; -local/copy_dict.sh || exit 1; - - -utils/prepare_lang.sh data/local/dict "" data/local/lang_tmp data/lang || exit 1; - -# Now make MFCC features. -# mfccdir should be some place with a largish disk where you -# want to store MFCC features. -mfccdir=mfcc - - -# Extract mfccs -# p was added to the rspecifier (scp,p:$logdir/wav.JOB.scp) in make_mfcc.sh because some -# wave files are corrupt -# Will return a warning message because of the corrupt audio files, but compute them anyway -# If this step fails and prints a partial diff, rerun from sprak_data_prep.sh - -steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/test exp/make_mfcc/test mfcc & -steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/dev exp/make_mfcc/dev mfcc & -steps/make_mfcc.sh --nj 10 --cmd $train_cmd data/train exp/make_mfcc/train mfcc || exit 1; -wait - -# Compute cepstral mean and variance normalisation -steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test mfcc & -steps/compute_cmvn_stats.sh data/dev exp/make_mfcc/dev mfcc & -steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train mfcc - -wait - -# Repair data set (remove corrupt data points with corrupt audio) - -utils/fix_data_dir.sh data/test & -utils/fix_data_dir.sh data/dev & -utils/fix_data_dir.sh data/train -wait - -# Train LM with CMUCLMTK -# This setup uses IRSTLM -#local/sprak_train_lm.sh &> data/local/cmuclmtk/lm.log - -# Train LM with irstlm -local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 3 "3g" data/lang data/local/train3_lm &> data/local/3g.log & -local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 4 "4g" data/lang data/local/train4_lm &> data/local/4g.log - -# Make subset with 1k utterances for rapid testing -# Randomly selects 980 utterances from 7 speakers -utils/subset_data_dir.sh --per-spk data/test 140 data/test1k & - -# Now make subset of the training data with the shortest 120k utterances. -utils/subset_data_dir.sh --shortest data/train 120000 data/train_120kshort || exit 1; - -# Train monophone model on short utterances -steps/train_mono.sh --nj 30 --cmd "$train_cmd" \ - data/train_120kshort data/lang exp/mono0a || exit 1; - -# Ensure that LMs are created -wait - -utils/mkgraph.sh data/lang_test_3g exp/mono0a exp/mono0a/graph_3g & -utils/mkgraph.sh data/lang_test_4g exp/mono0a exp/mono0a/graph_4g & - -# Ensure that all graphs are constructed -wait - -steps/decode.sh --nj 7 --cmd "$decode_cmd" \ - exp/mono0a/graph_3g data/test1k exp/mono0a/decode_3g_test1k - -# steps/align_si.sh --boost-silence 1.25 --nj 42 --cmd "$train_cmd" \ -steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - data/train data/lang exp/mono0a exp/mono0a_ali || exit 1; - -# steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ -steps/train_deltas.sh --cmd "$train_cmd" \ - 2000 10000 data/train data/lang exp/mono0a_ali exp/tri1 || exit 1; - -wait - - -utils/mkgraph.sh data/lang_test_3g exp/tri1 exp/tri1/graph_3g & -utils/mkgraph.sh data/lang_test_4g exp/tri1 exp/tri1/graph_4g || exit 1; - -( -steps/decode.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri1/graph_4g data/test1k exp/tri1/decode_4g_test1k || exit 1; -) & - -( -steps/decode.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri1/graph_3g data/test1k exp/tri1/decode_3g_test1k || exit 1; -) & - -wait - -steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - data/train data/lang exp/tri1 exp/tri1_ali || exit 1; - - -# Train tri2a, which is deltas + delta-deltas. -steps/train_deltas.sh --cmd "$train_cmd" \ - 2500 15000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1; - -utils/mkgraph.sh data/lang_test_3g exp/tri2a exp/tri2a/graph_3g || exit 1; - -steps/decode.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri2a/graph_3g data/test1k exp/tri2a/decode_3g_test1k || exit 1; - - -steps/train_lda_mllt.sh --cmd "$train_cmd" \ - --splice-opts "--left-context=5 --right-context=5" \ - 2500 15000 data/train data/lang exp/tri1_ali exp/tri2b || exit 1; - -utils/mkgraph.sh data/lang_test_3g exp/tri2b exp/tri2b/graph_3g || exit 1; -steps/decode.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri2b/graph_3g data/test1k exp/tri2b/decode_3g_test1k || exit 1; - - -steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; - -wait - - -# From 2b system, train 3b which is LDA + MLLT + SAT. -steps/train_sat.sh --cmd "$train_cmd" \ - 2500 15000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; -utils/mkgraph.sh data/lang_test_3g exp/tri3b exp/tri3b/graph_3g || exit 1; -steps/decode_fmllr.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri3b/graph_3g data/test1k exp/tri3b/decode_3g_test1k || exit 1; - - -# Trying 4-gram language model -utils/mkgraph.sh data/lang_test_4g exp/tri3b exp/tri3b/graph_4g || exit 1; - -steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 7 \ - exp/tri3b/graph_4g data/test1k exp/tri3b/decode_4g_test1k || exit 1; - -# This is commented out for now as it's not important for the main recipe. -## Train RNN for reranking -#local/sprak_train_rnnlms.sh data/local/dict data/dev/transcripts.uniq data/local/rnnlms/g_c380_d1k_h100_v130k -## Consumes a lot of memory! Do not run in parallel -#local/sprak_run_rnnlms_tri3b.sh data/lang_test_3g data/local/rnnlms/g_c380_d1k_h100_v130k data/test1k exp/tri3b/decode_3g_test1k - - -# From 3b system -steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ +nj=12 + +stage=0 +. utils/parse_options.sh + +if [ $stage -le 0 ]; then + # Download the corpus and prepare parallel lists of sound files and text files + # Divide the corpus into train, dev and test sets + local/sprak_data_prep.sh || exit 1; +fi + +if [ $stage -le 1 ]; then + # Perform text normalisation, prepare dict folder and LM data transcriptions + # This setup uses previsously prepared data. eSpeak must be installed and in PATH to use dict_prep.sh + # local/dict_prep.sh || exit 1; + local/copy_dict.sh || exit 1; +fi + +if [ $stage -le 2 ]; then + utils/prepare_lang.sh data/local/dict "" data/local/lang_tmp data/lang || exit 1; +fi + +if [ $stage -le 3 ]; then + # Extract mfccs + # p was added to the rspecifier (scp,p:$logdir/wav.JOB.scp) in make_mfcc.sh because some + # wave files are corrupt + # Will return a warning message because of the corrupt audio files, but compute them anyway + # If this step fails and prints a partial diff, rerun from sprak_data_prep.sh + for dataset in train test dev; do + steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" data/$dataset || exit 1; + + # Compute cepstral mean and variance normalisation + steps/compute_cmvn_stats.sh data/$dataset || exit 1; + + # Repair data set (remove corrupt data points with corrupt audio) + utils/fix_data_dir.sh data/$dataset || exit 1; + + done + # Make a subset of the training data with the shortest 120k utterances. + utils/subset_data_dir.sh --shortest data/train 120000 data/train_120kshort || exit 1; +fi + +if [ $stage -le 4 ]; then + # Train LM with irstlm + local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 3 "tg" data/lang data/local/train3_lm &> data/local/tg.log || exit 1; + local/train_irstlm.sh data/local/transcript_lm/transcripts.uniq 4 "fg" data/lang data/local/train4_lm &> data/local/fg.log || exit 1; +fi + +if [ $stage -le 5 ]; then + # Train monophone model on short utterances + steps/train_mono.sh --nj $nj --cmd "$train_cmd" \ + data/train_120kshort data/lang exp/mono0a || exit 1; + utils/mkgraph.sh --mono data/lang_test_tg exp/mono0a exp/mono0a/graph_tg || exit 1; + steps/decode.sh --nj 12 --cmd "$decode_cmd" \ + exp/mono0a/graph_tg data/dev exp/mono0a/decode_tg_dev || exit 1; +fi + +if [ $stage -le 6 ]; then + # Train tri1 (delta+delta-delta) + steps/align_si.sh --nj $nj --cmd "$train_cmd" \ + data/train data/lang exp/mono0a exp/mono0a_ali || exit 1; + steps/train_deltas.sh --cmd "$train_cmd" \ + 3000 40000 data/train data/lang exp/mono0a_ali exp/tri1 || exit 1; + + # Decode dev set with both LMs + utils/mkgraph.sh data/lang_test_tg exp/tri1 exp/tri1/graph_tg || exit 1; + utils/mkgraph.sh data/lang_test_fg exp/tri1 exp/tri1/graph_fg || exit 1; + steps/decode.sh --nj 12 --cmd "$decode_cmd" \ + exp/tri1/graph_fg data/dev exp/tri1/decode_fg_dev || exit 1; + steps/decode.sh --nj 12 --cmd "$decode_cmd" \ + exp/tri1/graph_tg data/dev exp/tri1/decode_tg_dev || exit 1; +fi + +if [ $stage -le 7 ]; then + # Train tri2a (delta + delta-delta) + steps/align_si.sh --nj $nj --cmd "$train_cmd" \ + data/train data/lang exp/tri1 exp/tri1_ali || exit 1; + steps/train_deltas.sh --cmd "$train_cmd" \ + 5000 60000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1; + utils/mkgraph.sh data/lang_test_tg exp/tri2a exp/tri2a/graph_tg || exit 1; + steps/decode.sh --nj 12 --cmd "$decode_cmd" \ + exp/tri2a/graph_tg data/dev exp/tri2a/decode_tg_dev || exit 1; +fi + +if [ $stage -le 8 ]; then + # Train tri2b (LDA+MLLT) + steps/align_si.sh --nj $nj --cmd "$train_cmd" \ + data/train data/lang exp/tri2a exp/tri2a_ali || exit 1; + steps/train_lda_mllt.sh --cmd "$train_cmd" \ + --splice-opts "--left-context=5 --right-context=5" \ + 6500 75000 data/train data/lang exp/tri2a_ali exp/tri2b || exit 1; + utils/mkgraph.sh data/lang_test_tg exp/tri2b exp/tri2b/graph_tg || exit 1; + steps/decode.sh --nj 12 --cmd "$decode_cmd" \ + exp/tri2b/graph_tg data/dev exp/tri2b/decode_tg_dev || exit 1; +fi + +if [ $stage -le 9 ]; then + # From 2b system, train 3b which is LDA + MLLT + SAT. + steps/align_si.sh --nj $nj --cmd "$train_cmd" \ + --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; + steps/train_sat.sh --cmd "$train_cmd" \ + 7500 100000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; + + # Decode dev with 4gram and 3gram LMs + utils/mkgraph.sh data/lang_test_tg exp/tri3b exp/tri3b/graph_tg || exit 1; + steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 12 \ + exp/tri3b/graph_tg data/dev exp/tri3b/decode_tg_dev || exit 1; + utils/mkgraph.sh data/lang_test_fg exp/tri3b exp/tri3b/graph_fg || exit 1; + steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 12 \ + exp/tri3b/graph_fg data/dev exp/tri3b/decode_fg_dev || exit 1; + + # Decode test with 4gram and 3gram LMs + # there are fewer speaker (n=7) and decoding usually ends up waiting + # for a single job so we use --num-threads 2 to speed up + steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 7 --num-threads 2 \ + exp/tri3b/graph_tg data/test exp/tri3b/decode_tg_test || exit 1; + steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 7 --num-threads 2 \ + exp/tri3b/graph_fg data/test exp/tri3b/decode_fg_test || exit 1; +fi + +if [ $stage -le 10 ]; then +# Alignment used to train nnets and sgmms +steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; +fi -# From 3b system, train another SAT system (tri4a) with all the si284 data. - -steps/train_sat.sh --cmd "$train_cmd" \ - 4200 40000 data/train data/lang exp/tri3b_ali exp/tri4a || exit 1; - -utils/mkgraph.sh data/lang_test_3g exp/tri4a exp/tri4a/graph_3g || exit 1; -steps/decode_fmllr.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri4a/graph_3g data/test1k exp/tri4a/decode_3g_test1k || exit 1; - - -steps/train_quick.sh --cmd "$train_cmd" \ - 4200 40000 data/train data/lang exp/tri3b_ali exp/tri4b || exit 1; - -( - utils/mkgraph.sh data/lang_test_3g exp/tri4b exp/tri4b/graph_3g || exit 1; - steps/decode_fmllr.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri4b/graph_3g data/test1k exp/tri4b/decode_3g_test1k || exit 1; -) & - - utils/mkgraph.sh data/lang_test_4g exp/tri4b exp/tri4b/graph_4g || exit 1; - steps/decode_fmllr.sh --nj 7 --cmd "$decode_cmd" \ - exp/tri4b/graph_4g data/test1k exp/tri4b/decode_4g_test1k || exit 1; - -wait - -# alignment used to train nnets and sgmms -steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train data/lang exp/tri4b exp/tri4b_ali || exit 1; +##TODO: Add nnet3 and chain setups ## Works -local/sprak_run_nnet_cpu.sh 3g test1k +#local/sprak_run_nnet_cpu.sh tg dev ## Works -local/sprak_run_sgmm2.sh test1k +#local/sprak_run_sgmm2.sh dev # Getting results [see RESULTS file] From 83f205ab42f819710f2205d47d3d6b9a2387ef4c Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Wed, 14 Dec 2016 17:08:13 -0800 Subject: [PATCH 122/530] Getting shortcut compilation to the point where it's testable (test failing thouth) --- src/nnet3/nnet-common.h | 6 +- src/nnet3/nnet-optimize-test.cc | 78 ++++++++++++++---- src/nnet3/nnet-optimize-utils.cc | 124 ++++++++++++++++++++++++++++ src/nnet3/nnet-optimize-utils.h | 6 +- src/nnet3/nnet-optimize.cc | 137 +++++++++++++++++++++---------- src/nnet3/nnet-optimize.h | 36 ++++++-- 6 files changed, 316 insertions(+), 71 deletions(-) diff --git a/src/nnet3/nnet-common.h b/src/nnet3/nnet-common.h index e6e3abe705e..f76166c0758 100644 --- a/src/nnet3/nnet-common.h +++ b/src/nnet3/nnet-common.h @@ -55,9 +55,9 @@ struct Index { bool operator < (const Index &a) const { if (t < a.t) { return true; } else if (t > a.t) { return false; } - else if (n < a.n) { return true; } - else if (n > a.n) { return false; } - else return (x < a.x); + else if (x < a.x) { return true; } + else if (x > a.x) { return false; } + else return (n < a.n); } Index operator + (const Index &other) const { return Index(n+other.n, t+other.t, x+other.x); diff --git a/src/nnet3/nnet-optimize-test.cc b/src/nnet3/nnet-optimize-test.cc index 40f8d824a39..1a8a00e3abf 100644 --- a/src/nnet3/nnet-optimize-test.cc +++ b/src/nnet3/nnet-optimize-test.cc @@ -27,9 +27,12 @@ namespace kaldi { namespace nnet3 { -// Run the test wothout optimizations and with optimizations specified by the -// parameter. Only print warnings; we'll fail the whole test later. -static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config) { +// Run the test without optimizations and with optimizations specified by the +// configs (the optimized version is done with class CachingOptimizingCompiler). +// Only print warnings; we'll fail the whole test later. +static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config, + CachingOptimizingCompilerOptions compiler_config) { + //opt_config.convert_addition = false; //opt_config.remove_assignments = false; //opt_config.move_sizing_commands = false; @@ -60,7 +63,7 @@ static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config) { { std::ostringstream os; computation.Print(os, nnet); - KALDI_LOG << "Generated computation is: " << os.str(); + KALDI_LOG << "Generated computation with no optimization or shortcut is: " << os.str(); } CheckComputationOptions check_config; // we can do the rewrite check since it's before optimization. @@ -68,12 +71,11 @@ static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config) { ComputationChecker checker(check_config, nnet, computation); checker.Check(); - NnetComputation computation_opt(computation); + CachingOptimizingCompiler opt_compiler(nnet, opt_config, compiler_config); + + const NnetComputation &computation_opt = *opt_compiler.Compile(request); { - Optimize(opt_config, nnet, - MaxOutputTimeInRequest(request), - &computation_opt); std::ostringstream os; computation_opt.Print(os, nnet); KALDI_LOG << "Optimized computation is: " << os.str(); @@ -84,7 +86,8 @@ static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config) { compute_opts.debug = true; computation.ComputeCudaIndexes(); - computation_opt.ComputeCudaIndexes(); + // computation_opt has already had this function called. + Nnet nnet_to_update(nnet); // copy of the nnet that we update... needed to // test the consolidation of backprop commands, // otherwise the optimized and non-optimized @@ -179,6 +182,8 @@ static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config) { // the outputs are the same. static void UnitTestNnetOptimize() { NnetOptimizeOptions optimize_all; + CachingOptimizingCompilerOptions compiler_all; + // randomly sometimes set min_deriv and max_deriv to small/large values, // which will cause some of the LimitDerivativeTimes() code to be called // (without really changing anything). @@ -187,44 +192,83 @@ static void UnitTestNnetOptimize() { // this is useful for debugging as it removes nans: // optimize_all.initialize_undefined = false; - bool success = UnitTestNnetOptimizeWithOptions(optimize_all); + bool success = UnitTestNnetOptimizeWithOptions(optimize_all, + compiler_all); if (success) return; // Test failed with full optimization. Slowly retry with various // optimizations switched off. NnetOptimizeOptions optimize = optimize_all; - optimize.propagate_in_place = false; - bool succ_no_propagate_in_place = UnitTestNnetOptimizeWithOptions(optimize); + CachingOptimizingCompilerOptions compiler = compiler_all; + + compiler.use_shortcut = false; + bool succ_no_shortcut = UnitTestNnetOptimizeWithOptions(optimize, + compiler); + compiler = compiler_all; + + + optimize.propagate_in_place = false; + bool succ_no_propagate_in_place = UnitTestNnetOptimizeWithOptions(optimize, + compiler); optimize = optimize_all; + optimize.backprop_in_place = false; - bool succ_no_backprop_in_place = UnitTestNnetOptimizeWithOptions(optimize); + bool succ_no_backprop_in_place = UnitTestNnetOptimizeWithOptions(optimize, + compiler); + optimize = optimize_all; + optimize.optimize_row_ops = false; + bool succ_no_row_ops = UnitTestNnetOptimizeWithOptions(optimize, + compiler); optimize = optimize_all; - optimize.remove_assignments = false; - bool succ_no_remove_assignments = UnitTestNnetOptimizeWithOptions(optimize); + optimize.convert_addition = false; + bool succ_no_convert_addition = UnitTestNnetOptimizeWithOptions(optimize, + compiler); optimize = optimize_all; + + optimize.remove_assignments = false; + bool succ_no_remove_assignments = UnitTestNnetOptimizeWithOptions(optimize, + compiler); + optimize = optimize_all; + optimize.initialize_undefined = false; - bool succ_no_initialize_undefined = UnitTestNnetOptimizeWithOptions(optimize); + bool succ_no_initialize_undefined = UnitTestNnetOptimizeWithOptions(optimize, + compiler); + optimize = optimize_all; + optimize.allocate_from_other = false; + bool succ_no_allocate_from_other = UnitTestNnetOptimizeWithOptions(optimize, + compiler); optimize = optimize_all; + optimize.move_sizing_commands = false; - bool succ_no_move_sizing_commands = UnitTestNnetOptimizeWithOptions(optimize); + bool succ_no_move_sizing_commands = UnitTestNnetOptimizeWithOptions(optimize, + compiler); + optimize = optimize_all; #define KALDI_SUCCFAIL(b) ((b) ? "SUCCESS" : "FAILURE") KALDI_ERR << "Test failed with all optimizations enabled. Retried test with the " << "following optimizations turned off:" + << "\n use_shortcut ... " << KALDI_SUCCFAIL(succ_no_shortcut) << "\n propagate_in_place ... " << KALDI_SUCCFAIL(succ_no_propagate_in_place) << "\n backprop_in_place ... " << KALDI_SUCCFAIL(succ_no_backprop_in_place) + << "\n optimize_row_ops ... " << KALDI_SUCCFAIL(succ_no_row_ops) + << "\n convert_addition ... " << KALDI_SUCCFAIL(succ_no_convert_addition) << "\n remove_assignments ... " << KALDI_SUCCFAIL(succ_no_remove_assignments) << "\n initialize_undefined ... " << KALDI_SUCCFAIL(succ_no_initialize_undefined) + << "\n allocate_from_other ... " << KALDI_SUCCFAIL(succ_no_allocate_from_other) << "\n move_sizing_commands ... " << KALDI_SUCCFAIL(succ_no_move_sizing_commands); #undef KALDI_SUCCFAIL } + + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index 62bda3a17e1..de25b8bcabb 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -2635,6 +2635,7 @@ void ComputationExpander::InitFastInfo() { // 'n' value be zero. KALDI_ASSERT(debug_info.cindexes[0].second.n == 0); bool is_fast = (debug_info.cindexes[1].second.n == 1); + n_fast_[m] = is_fast; bool do_check = (RandInt(0, 2) == 0); if (do_check) { @@ -2983,6 +2984,129 @@ void ComputationExpander::GetNewLocationInfo( } } + +void ExpandComputation(const Nnet &nnet, + const MiscComputationInfo &misc_info, + const NnetComputation &computation, + bool need_debug_info, + int32 num_n_values, + NnetComputation *expanded_computation) { + ComputationExpander expander(nnet, misc_info, computation, + need_debug_info, num_n_values, + expanded_computation); + expander.Expand(); +} + + + +// This helper function is used in RequestIsDecomposable(); you can work out +// what it does, and why, from the documentation of RequestIsDecomposable() in +// the header. +static bool IoSpecificationIsDecomposable(const IoSpecification &io_spec, + IoSpecification *mini_io_spec, + int32 *num_n_values_out) { + mini_io_spec->name = io_spec.name; + mini_io_spec->has_deriv = io_spec.has_deriv; + const std::vector &indexes = io_spec.indexes; + KALDI_ASSERT(!indexes.empty() && "Empty Indexes in computation request"); + // For a computation to be decomposable, the 'n' values need to vary from 0 to + // N-1 for some N > 2, and they need to be in some kind of regular order with + // suitable repetition-- either with the 'n' values varying the 'fastest', or + // the 'slowest' of all the indexes. + if (indexes[0].n != 0 || indexes.back().n < 2) { + return false; + } + int32 num_n_values = indexes.back().n + 1, + size = indexes.size(); + *num_n_values_out = num_n_values; + if (size % num_n_values != 0) + return false; + bool n_fast = (indexes[1].n == 1); + // if 'n_fast' is true, then the n index varies the fastest (stride == 1), + // otherwise it varies the slowest of any index. We require that it be one of + // these two options, otherwise we declare the computation to be + // non-decomposable. + + mini_io_spec->indexes.resize((size / num_n_values) * 2); + if (n_fast) { + // 'block_size' is the size of blocks with the same x,t values, which are + // expected to have n values 0, 1, ... num_n_values - 1. + // of course each block is of size num_n_values. + int32 num_blocks = size / num_n_values; + const Index *indexes_ptr = &(indexes[0]); + Index *indexes_out = &(mini_io_spec->indexes[0]); + for (int32 block = 0; block < num_blocks; block++) { + *(indexes_out++) = indexes_ptr[0]; // for n == 0 + *(indexes_out++) = indexes_ptr[1]; // for n == 1. + + // we expect all the indexes in this block to have the same x and t + // values, but n values increasing from 0 to num_n_values - 1. + int32 t = indexes_ptr->t, x = indexes_ptr->x; + + for (int32 n = 0; n < num_n_values; n++, indexes_ptr++) { + if (indexes_ptr->n != n || indexes_ptr->t != t || indexes_ptr->x != x) + return false; + } + } + } else { + // 'n' varies the slowest. + int32 block_size = size / num_n_values; + mini_io_spec->indexes.clear(); + mini_io_spec->indexes.insert(mini_io_spec->indexes.end(), + indexes.begin(), + indexes.begin() + 2 * block_size); + + // now verify that it has the expected structure... + for (int32 i = 0; i < block_size; i++) { + const Index *indexes_ptr = &(indexes[i]); + int32 t = indexes_ptr->t, x = indexes_ptr->x; + for (int32 n = 0; n < num_n_values; n++, indexes_ptr += block_size) { + if (indexes_ptr->n != n || indexes_ptr->t != t || indexes_ptr->x != x) + return false; + } + } + } + return true; +} + +bool RequestIsDecomposable(const ComputationRequest &request, + ComputationRequest *mini_request, + int32 *num_n_values) { + size_t num_inputs = request.inputs.size(), + num_outputs = request.outputs.size(); + mini_request->inputs.resize(num_inputs); + mini_request->outputs.resize(num_outputs); + mini_request->need_model_derivative = request.need_model_derivative; + mini_request->store_component_stats = request.store_component_stats; + mini_request->misc_info = request.misc_info; + + KALDI_ASSERT(num_inputs != 0 && num_outputs != 0); + for (size_t i = 0; i < num_inputs; i++) { + int32 this_num_n_values = 0; + if (!IoSpecificationIsDecomposable(request.inputs[i], + &(mini_request->inputs[i]), + &this_num_n_values)) + return false; + if (i == 0) { + *num_n_values = this_num_n_values; + } else { + if (this_num_n_values != *num_n_values) + return false; // .. which would be odd. + } + } + for (size_t i = 0; i < num_outputs; i++) { + int32 this_num_n_values = 0; + if (!IoSpecificationIsDecomposable(request.outputs[i], + &(mini_request->outputs[i]), + &this_num_n_values)) + return false; + if (this_num_n_values != *num_n_values) + return false; // .. which would be odd. + } + return true; +} + + class ComputationLoopedOptimizer { public: ComputationLoopedOptimizer(const Nnet &nnet, diff --git a/src/nnet3/nnet-optimize-utils.h b/src/nnet3/nnet-optimize-utils.h index 9977ca8952a..aec8c21a368 100644 --- a/src/nnet3/nnet-optimize-utils.h +++ b/src/nnet3/nnet-optimize-utils.h @@ -379,9 +379,9 @@ void LimitDerivativeTimes(const Nnet &nnet, reason to the order of the t and x values; the regularity on 'n' is all that we care about. */ -bool ComputationIsDecomposable(const ComputationRequest &request, - ComputationRequest *mini_request, - int32 *num_n_values); // TODO: implement this. +bool RequestIsDecomposable(const ComputationRequest &request, + ComputationRequest *mini_request, + int32 *num_n_values); /** diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index c0c03a13ab5..c2cee31bbcc 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -575,7 +575,7 @@ size_t ComputationRequestHasher::IoSpecificationToInt(const IoSpecification& spe } void CachingOptimizingCompiler::UpdateCache(const ComputationRequest *request, - NnetComputation *computation) { + const NnetComputation *computation) { if (computation_cache_.size() == config_.cache_capacity) { // full, locate the least-recently-accessed request const CacheType::iterator it = @@ -647,58 +647,109 @@ CachingOptimizingCompiler::~CachingOptimizingCompiler() { const NnetComputation* CachingOptimizingCompiler::Compile( const ComputationRequest &in_request) { - NnetComputation *computation; - ComputationRequest *request; // find computation in the cache CacheType::iterator cit = computation_cache_.find(&in_request); if (cit == computation_cache_.end()) { - // if not found, compile and update cache - request = new ComputationRequest; - *request = in_request; - Compiler compiler(*request, nnet_); - CompilerOptions opts; - computation = new NnetComputation; - compiler.CreateComputation(opts, computation); - - int32 verbose_cutoff = 4; - if (GetVerboseLevel() >= verbose_cutoff) { - std::ostringstream os1; - request->Print(os1); - KALDI_LOG << "Computation request is " << os1.str(); - std::ostringstream os2; - computation->Print(os2, nnet_); - KALDI_LOG << "Generated computation is: " << os2.str(); - } - { // some checking. - CheckComputationOptions check_config; - // we can do the rewrite check since it's before optimization. - check_config.check_rewrite = true; - ComputationChecker checker(check_config, nnet_, *computation); - checker.Check(); - } - Optimize(opt_config_, nnet_, - MaxOutputTimeInRequest(*request), - computation); - if (GetVerboseLevel() >= verbose_cutoff) { - std::ostringstream os; - computation->Print(os, nnet_); - KALDI_LOG << "Optimized computation is: " << os.str(); - } - { // check the computation again. - CheckComputationOptions check_config; - ComputationChecker checker(check_config, nnet_, *computation); - checker.Check(); - } - computation->ComputeCudaIndexes(); - UpdateCache(request, computation); + return CompileAndCache(in_request); } else { // if found, update access queue - computation = cit->second.first; + const NnetComputation *computation = cit->second.first; UpdateAccessQueue(cit); + return computation; } +} + +const NnetComputation* CachingOptimizingCompiler::CompileAndCache( + const ComputationRequest &in_request) { + // we need to make a copy of ComputationRequest, because it's stored + // as the key in the cache, and we need to own the pointer. + ComputationRequest *request = new ComputationRequest(in_request); + + const NnetComputation *computation = CompileViaShortcut(*request); + if (computation == NULL) + computation = CompileNoShortcut(*request); + UpdateCache(request, computation); + return computation; +} + + +const NnetComputation* CachingOptimizingCompiler::CompileNoShortcut( + const ComputationRequest &request) { + + Compiler compiler(request, nnet_); + // note: 'opts' only contains 'output_debug_info', which is true by default. + // There may be situations where we'd prefer not to keep it, for speed. + CompilerOptions opts; + NnetComputation *computation = new NnetComputation; + compiler.CreateComputation(opts, computation); + + int32 verbose_cutoff = 4; + if (GetVerboseLevel() >= verbose_cutoff) { + std::ostringstream os1; + request.Print(os1); + KALDI_LOG << "Computation request is " << os1.str(); + std::ostringstream os2; + computation->Print(os2, nnet_); + KALDI_LOG << "Generated computation is: " << os2.str(); + } + { // some checking. Note: there may be a time when we might + // prefer not do to this checking. + CheckComputationOptions check_config; + // we can do the rewrite check since it's before optimization. + check_config.check_rewrite = true; + ComputationChecker checker(check_config, nnet_, *computation); + checker.Check(); + } + Optimize(opt_config_, nnet_, + MaxOutputTimeInRequest(request), + computation); + if (GetVerboseLevel() >= verbose_cutoff) { + std::ostringstream os; + computation->Print(os, nnet_); + KALDI_LOG << "Optimized computation is: " << os.str(); + } + { // check the computation again. + CheckComputationOptions check_config; + ComputationChecker checker(check_config, nnet_, *computation); + checker.Check(); + } + computation->ComputeCudaIndexes(); return computation; } + +const NnetComputation* CachingOptimizingCompiler::CompileViaShortcut( + const ComputationRequest &request) { + if (!config_.use_shortcut) + return NULL; + + int32 num_n_values; + ComputationRequest mini_request; + if (!RequestIsDecomposable(request, &mini_request, &num_n_values)) + return NULL; + + // by invoking Compile() on the mini request, we go through the same + // caching process as for any externally requested computation. + // note: this pointer is not being 'given to us'... it's owned in + // the cache. + const NnetComputation *mini_computation = Compile(mini_request); + + // note: by default we always create debug_info, even in regular compilation. + // (e.g. it defaults to true in CompilerOptions). If it really seems to be a + // significant overhead, we can revisit this at some point in future. + bool need_debug_info = true; + + + NnetComputation *ans = new NnetComputation(); + + ExpandComputation(nnet_, request.misc_info, *mini_computation, + need_debug_info, num_n_values, ans); + + return ans; +} + + + /// Split the computation up into segments bounded by kNoOperationMarker. For /// each segment, a pair of command-indexes (start, end) is output to the vector /// 'segments', so the commands in the segment (not including diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index 0df50b329a9..1ca776d4ee6 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -182,7 +182,6 @@ struct ComputationRequestPtrEqual { struct CachingOptimizingCompilerOptions { bool use_shortcut; - int32 write_cache; int32 cache_capacity; CachingOptimizingCompilerOptions(): @@ -229,6 +228,32 @@ class CachingOptimizingCompiler { void ReadCache(std::istream &is, bool binary); void WriteCache(std::ostream &os, bool binary) const; private: + // This function, called from Compile(), is called when a ComputationRequest + // has been determined not to have already been cached. It otherwise has the + // same interface as Compile(), but assumes that there is nothing cached for + // this computation as yet. It compiles the computation and takes care of + // caching it. + const NnetComputation* CompileAndCache(const ComputationRequest &request); + + + // This function, called from CompileAndCache(), tries to compile the + // ComputationRequest 'request' via 'shortcut' compilation; if this is + // possible, it returns a pointer to a newly allocated computation that it has + // compiled this way (note: this computation will not yet have been placed in + // the computation cache). If this is not possible for some reason + // (e.g. shortcut compilation is disabled in the config; or the computation + // request was not decomposable because of too few n values or irregular or + // unexpected structure), this function returns NULL and you should compile + // via CompileNoShortcut. + const NnetComputation* CompileViaShortcut(const ComputationRequest &request); + + // This function, called from CompileAndCache(), tries to compile the + // ComputationRequest 'request' via the regular (not shortcut) compilation + // process; it returns a pointer to a newly allocated computation that it has + // compiled this way (note: this computation will not yet have been placed in + // the computation cache). + const NnetComputation* CompileNoShortcut(const ComputationRequest &request); + const Nnet &nnet_; CachingOptimizingCompilerOptions config_; NnetOptimizeOptions opt_config_; @@ -245,9 +270,10 @@ class CachingOptimizingCompiler { // Map from computation-request to pair of (computation, and position in // access_queue_). Used for fast lookup of previously compiled computations. // All pointers are owned here. - typedef unordered_map, ComputationRequestHasher, - ComputationRequestPtrEqual> CacheType; + typedef unordered_map, + ComputationRequestHasher, + ComputationRequestPtrEqual> CacheType; CacheType computation_cache_; // This function updates the computation cache. It is called within Compile(). @@ -255,7 +281,7 @@ class CachingOptimizingCompiler { // the queue, and purges the least-recently-accessed request from the queue and // the cache if the capacity is reached. void UpdateCache(const ComputationRequest *request, - NnetComputation *computation); + const NnetComputation *computation); // This function updates the recently accessed queue. void UpdateAccessQueue(CacheType::iterator &cit); }; From d30c2978fde64ff974acc88499d21a64aba1ca43 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 15 Dec 2016 03:31:20 -0500 Subject: [PATCH 123/530] Fix various bugs in shortcut compilation; add further testing code --- src/nnet3/nnet-derivative-test.cc | 46 ++--- src/nnet3/nnet-optimize-test.cc | 280 +++++++++++++++--------------- src/nnet3/nnet-optimize-utils.cc | 86 ++++----- src/nnet3/nnet-optimize.cc | 2 + 4 files changed, 204 insertions(+), 210 deletions(-) diff --git a/src/nnet3/nnet-derivative-test.cc b/src/nnet3/nnet-derivative-test.cc index 5dbc8a126d1..4289b577a25 100644 --- a/src/nnet3/nnet-derivative-test.cc +++ b/src/nnet3/nnet-derivative-test.cc @@ -95,7 +95,7 @@ void UnitTestNnetModelDerivatives() { //gen_config.allow_nonlinearity = false; //gen_config.allow_recursion = false; //gen_config.allow_final_nonlinearity = true; - bool allow_optimization = true; + bool limit_deriv_times = (RandInt(0, 2) == 0); std::vector configs; @@ -118,44 +118,23 @@ void UnitTestNnetModelDerivatives() { // whether input-derivatives are required or not does not matter, // so leave it as it is in that regard. - NnetComputation computation; - Compiler compiler(request, nnet); - - CompilerOptions opts; - compiler.CreateComputation(opts, &computation); - { - std::ostringstream os; - computation.Print(os, nnet); - KALDI_LOG << "Generated computation is: " << os.str(); + NnetOptimizeOptions optimize_opts; + CachingOptimizingCompilerOptions compiler_opts; + if (limit_deriv_times) { + SetDerivTimesOptions(request, &optimize_opts); } - CheckComputationOptions check_config; - // we can do the rewrite check since it's before optimization. - check_config.check_rewrite = true; - ComputationChecker checker(check_config, nnet, computation); - checker.Check(); - if (RandInt(0, 3) != 0 && allow_optimization) { - NnetOptimizeOptions opt_config; - if (limit_deriv_times) - SetDerivTimesOptions(request, &opt_config); + CachingOptimizingCompiler compiler(nnet, optimize_opts, + compiler_opts); - Optimize(opt_config, nnet, - MaxOutputTimeInRequest(request), - &computation); + const NnetComputation &computation = *(compiler.Compile(request)); + + { std::ostringstream os; computation.Print(os, nnet); KALDI_LOG << "Optimized computation is: " << os.str(); - check_config.check_rewrite = false; - ComputationChecker checker_opt(check_config, nnet, computation); - checker_opt.Check(); } - NnetComputeOptions compute_opts; - if (RandInt(0, 1) == 0) - compute_opts.debug = true; - computation.ComputeCudaIndexes(); - - Nnet nnet_deriv(nnet); bool is_gradient = true; SetZero(is_gradient, &nnet_deriv); // forces "simple" update and unit @@ -179,6 +158,11 @@ void UnitTestNnetModelDerivatives() { nnet.OutputDim("output")); output_deriv.SetRandn(); + + NnetComputeOptions compute_opts; + if (RandInt(0, 1) == 0) + compute_opts.debug = true; + // pass 0 is the forward pass with the un-perturbed model. // Other passes are with various differently-perturbed versions of // the model. diff --git a/src/nnet3/nnet-optimize-test.cc b/src/nnet3/nnet-optimize-test.cc index 1a8a00e3abf..0654683aa9c 100644 --- a/src/nnet3/nnet-optimize-test.cc +++ b/src/nnet3/nnet-optimize-test.cc @@ -30,7 +30,8 @@ namespace nnet3 { // Run the test without optimizations and with optimizations specified by the // configs (the optimized version is done with class CachingOptimizingCompiler). // Only print warnings; we'll fail the whole test later. -static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config, +static bool UnitTestNnetOptimizeWithOptions(int32 srand_seed, + NnetOptimizeOptions opt_config, CachingOptimizingCompilerOptions compiler_config) { //opt_config.convert_addition = false; @@ -38,149 +39,150 @@ static bool UnitTestNnetOptimizeWithOptions(NnetOptimizeOptions opt_config, //opt_config.move_sizing_commands = false; //opt_config.allocate_from_other = false; - srand(0); // Every run must be deterministic. - for (int32 n = 0; n < 40; n++) { - struct NnetGenerationOptions gen_config; - - std::vector configs; - GenerateConfigSequence(gen_config, &configs); - Nnet nnet; - for (size_t j = 0; j < configs.size(); j++) { - KALDI_LOG << "Input config[" << j << "] is: " << configs[j]; - std::istringstream is(configs[j]); - nnet.ReadConfig(is); - } + srand(srand_seed); // so that we can compare between differnt optimization types + // with the randomly generated network staying the same. - ComputationRequest request; - std::vector > inputs; - ComputeExampleComputationRequestSimple(nnet, &request, &inputs); + struct NnetGenerationOptions gen_config; - NnetComputation computation; - Compiler compiler(request, nnet); + std::vector configs; + GenerateConfigSequence(gen_config, &configs); + Nnet nnet; + for (size_t j = 0; j < configs.size(); j++) { + KALDI_LOG << "Input config[" << j << "] is: " << configs[j]; + std::istringstream is(configs[j]); + nnet.ReadConfig(is); + } - CompilerOptions opts; - compiler.CreateComputation(opts, &computation); - { - std::ostringstream os; - computation.Print(os, nnet); - KALDI_LOG << "Generated computation with no optimization or shortcut is: " << os.str(); - } - CheckComputationOptions check_config; - // we can do the rewrite check since it's before optimization. - check_config.check_rewrite = true; - ComputationChecker checker(check_config, nnet, computation); - checker.Check(); + ComputationRequest request; + std::vector > inputs; + ComputeExampleComputationRequestSimple(nnet, &request, &inputs); - CachingOptimizingCompiler opt_compiler(nnet, opt_config, compiler_config); + NnetComputation computation; + Compiler compiler(request, nnet); - const NnetComputation &computation_opt = *opt_compiler.Compile(request); + CompilerOptions opts; + compiler.CreateComputation(opts, &computation); + { + std::ostringstream os; + computation.Print(os, nnet); + KALDI_LOG << "Generated computation with no optimization or shortcut is: " << os.str(); + } + CheckComputationOptions check_config; + // we can do the rewrite check since it's before optimization. + check_config.check_rewrite = true; + ComputationChecker checker(check_config, nnet, computation); + checker.Check(); - { - std::ostringstream os; - computation_opt.Print(os, nnet); - KALDI_LOG << "Optimized computation is: " << os.str(); - } + CachingOptimizingCompiler opt_compiler(nnet, opt_config, compiler_config); - NnetComputeOptions compute_opts; - if (RandInt(0, 1) == 0) - compute_opts.debug = true; - - computation.ComputeCudaIndexes(); - // computation_opt has already had this function called. - - Nnet nnet_to_update(nnet); // copy of the nnet that we update... needed to - // test the consolidation of backprop commands, - // otherwise the optimized and non-optimized - // comptuations differ. - bool is_gradient = true; // with natural gradient, the consolidation would - // affect the final model params -> test just the - // gradient. - SetZero(is_gradient, &nnet_to_update); - - NnetComputer computer(compute_opts, - computation, - nnet, - &nnet_to_update); - - Nnet nnet_opt(nnet); // copy of the nnet for the optimized computation. - // necessary in case backprop changes parameters. - Nnet nnet_opt_to_update(nnet_opt); - SetZero(is_gradient, &nnet_opt_to_update); - - // NnetComputer for the optimized version of the computation. - NnetComputer computer_opt(compute_opts, - computation_opt, - nnet_opt, - &nnet_opt_to_update); - - // provide the input to the computations. - for (size_t i = 0; i < request.inputs.size(); i++) { - CuMatrix temp(inputs[i]); - KALDI_LOG << "Input sum is " << temp.Sum(); - computer.AcceptInput(request.inputs[i].name, &temp); - CuMatrix temp2(inputs[i]); - computer_opt.AcceptInput(request.inputs[i].name, &temp2); - } - KALDI_LOG << "Running non-optimized forward computation"; - computer.Run(); - KALDI_LOG << "Running optimized forward computation"; - computer_opt.Run(); + const NnetComputation &computation_opt = *opt_compiler.Compile(request); - const CuMatrixBase &output(computer.GetOutput("output")); - KALDI_LOG << "Output sum (not optimized) is " << output.Sum(); - const CuMatrixBase &output_opt(computer_opt.GetOutput("output")); - KALDI_LOG << "Output sum (optimized) is " << output_opt.Sum(); - if (!ApproxEqual(output, output_opt)) { - KALDI_WARN << "Non-optimized and optimized versions of the computation give " - << "different outputs."; - return false; - } + { + std::ostringstream os; + computation_opt.Print(os, nnet); + KALDI_LOG << "Optimized computation is: " << os.str(); + } + + NnetComputeOptions compute_opts; + if (RandInt(0, 1) == 0) + compute_opts.debug = true; + + computation.ComputeCudaIndexes(); + // computation_opt has already had this function called. + + Nnet nnet_to_update(nnet); // copy of the nnet that we update... needed to + // test the consolidation of backprop commands, + // otherwise the optimized and non-optimized + // comptuations differ. + bool is_gradient = true; // with natural gradient, the consolidation would + // affect the final model params -> test just the + // gradient. + SetZero(is_gradient, &nnet_to_update); + + NnetComputer computer(compute_opts, + computation, + nnet, + &nnet_to_update); + + Nnet nnet_opt(nnet); // copy of the nnet for the optimized computation. + // necessary in case backprop changes parameters. + Nnet nnet_opt_to_update(nnet_opt); + SetZero(is_gradient, &nnet_opt_to_update); + + // NnetComputer for the optimized version of the computation. + NnetComputer computer_opt(compute_opts, + computation_opt, + nnet_opt, + &nnet_opt_to_update); + + // provide the input to the computations. + for (size_t i = 0; i < request.inputs.size(); i++) { + CuMatrix temp(inputs[i]); + KALDI_LOG << "Input sum is " << temp.Sum(); + computer.AcceptInput(request.inputs[i].name, &temp); + CuMatrix temp2(inputs[i]); + computer_opt.AcceptInput(request.inputs[i].name, &temp2); + } + KALDI_LOG << "Running non-optimized forward computation"; + computer.Run(); + KALDI_LOG << "Running optimized forward computation"; + computer_opt.Run(); + + const CuMatrixBase &output(computer.GetOutput("output")); + KALDI_LOG << "Output sum (not optimized) is " << output.Sum(); + const CuMatrixBase &output_opt(computer_opt.GetOutput("output")); + KALDI_LOG << "Output sum (optimized) is " << output_opt.Sum(); + if (!ApproxEqual(output, output_opt)) { + KALDI_WARN << "Non-optimized and optimized versions of the computation give " + << "different outputs."; + return false; + } + + CuMatrix output_deriv(output.NumRows(), output.NumCols()); + output_deriv.SetRandn(); + CuMatrix output_deriv_opt(output_deriv); - CuMatrix output_deriv(output.NumRows(), output.NumCols()); - output_deriv.SetRandn(); - CuMatrix output_deriv_opt(output_deriv); - - if (request.outputs[0].has_deriv) { - computer.AcceptInput("output", &output_deriv); - computer_opt.AcceptInput("output", &output_deriv_opt); - - KALDI_LOG << "Running non-optimized backward computation"; - computer.Run(); - KALDI_LOG << "Running optimized backward computation"; - computer_opt.Run(); - for (size_t i = 0; i < request.inputs.size(); i++) { - if (request.inputs[i].has_deriv) { - const CuMatrixBase &in_deriv = - computer.GetOutput(request.inputs[i].name); - const CuMatrixBase &in_deriv_opt = - computer_opt.GetOutput(request.inputs[i].name); - KALDI_LOG << "Input-deriv sum for input '" << request.inputs[i].name - << "' (non-optimized) is " << in_deriv.Sum(); - KALDI_LOG << "Input-deriv sum for input '" << request.inputs[i].name - << "' (optimized) is " << in_deriv_opt.Sum(); - if (!ApproxEqual(in_deriv, in_deriv_opt)) { - KALDI_WARN << "Non-optimized and optimized versions of the " - << "computation give different input-derivs."; - return false; - } + if (request.outputs[0].has_deriv) { + computer.AcceptInput("output", &output_deriv); + computer_opt.AcceptInput("output", &output_deriv_opt); + + KALDI_LOG << "Running non-optimized backward computation"; + computer.Run(); + KALDI_LOG << "Running optimized backward computation"; + computer_opt.Run(); + for (size_t i = 0; i < request.inputs.size(); i++) { + if (request.inputs[i].has_deriv) { + const CuMatrixBase &in_deriv = + computer.GetOutput(request.inputs[i].name); + const CuMatrixBase &in_deriv_opt = + computer_opt.GetOutput(request.inputs[i].name); + KALDI_LOG << "Input-deriv sum for input '" << request.inputs[i].name + << "' (non-optimized) is " << in_deriv.Sum(); + KALDI_LOG << "Input-deriv sum for input '" << request.inputs[i].name + << "' (optimized) is " << in_deriv_opt.Sum(); + if (!ApproxEqual(in_deriv, in_deriv_opt)) { + KALDI_WARN << "Non-optimized and optimized versions of the " + << "computation give different input-derivs."; + return false; } } } + } - if (!NnetParametersAreIdentical(nnet_to_update, - nnet_opt_to_update, 1.0e-05)) { - KALDI_WARN << "Neural networks differ after training, between " - << "optimized and non-optimized computation."; - return false; - } + if (!NnetParametersAreIdentical(nnet_to_update, + nnet_opt_to_update, 1.0e-05)) { + KALDI_WARN << "Neural networks differ after training, between " + << "optimized and non-optimized computation."; + return false; + } else { + return true; } - return true; } // This test runs the computation with and without optimization, and checks that // the outputs are the same. -static void UnitTestNnetOptimize() { +static void UnitTestNnetOptimizeInternal(int32 srand_seed) { NnetOptimizeOptions optimize_all; CachingOptimizingCompilerOptions compiler_all; @@ -192,7 +194,7 @@ static void UnitTestNnetOptimize() { // this is useful for debugging as it removes nans: // optimize_all.initialize_undefined = false; - bool success = UnitTestNnetOptimizeWithOptions(optimize_all, + bool success = UnitTestNnetOptimizeWithOptions(srand_seed, optimize_all, compiler_all); if (success) return; @@ -204,48 +206,48 @@ static void UnitTestNnetOptimize() { compiler.use_shortcut = false; - bool succ_no_shortcut = UnitTestNnetOptimizeWithOptions(optimize, + bool succ_no_shortcut = UnitTestNnetOptimizeWithOptions(srand_seed, optimize, compiler); compiler = compiler_all; optimize.propagate_in_place = false; - bool succ_no_propagate_in_place = UnitTestNnetOptimizeWithOptions(optimize, + bool succ_no_propagate_in_place = UnitTestNnetOptimizeWithOptions(srand_seed, optimize, compiler); optimize = optimize_all; optimize.backprop_in_place = false; - bool succ_no_backprop_in_place = UnitTestNnetOptimizeWithOptions(optimize, + bool succ_no_backprop_in_place = UnitTestNnetOptimizeWithOptions(srand_seed, optimize, compiler); optimize = optimize_all; optimize.optimize_row_ops = false; - bool succ_no_row_ops = UnitTestNnetOptimizeWithOptions(optimize, + bool succ_no_row_ops = UnitTestNnetOptimizeWithOptions(srand_seed, optimize, compiler); optimize = optimize_all; optimize.convert_addition = false; - bool succ_no_convert_addition = UnitTestNnetOptimizeWithOptions(optimize, + bool succ_no_convert_addition = UnitTestNnetOptimizeWithOptions(srand_seed, optimize, compiler); optimize = optimize_all; optimize.remove_assignments = false; - bool succ_no_remove_assignments = UnitTestNnetOptimizeWithOptions(optimize, + bool succ_no_remove_assignments = UnitTestNnetOptimizeWithOptions(srand_seed, optimize, compiler); optimize = optimize_all; optimize.initialize_undefined = false; - bool succ_no_initialize_undefined = UnitTestNnetOptimizeWithOptions(optimize, + bool succ_no_initialize_undefined = UnitTestNnetOptimizeWithOptions(srand_seed, optimize, compiler); optimize = optimize_all; optimize.allocate_from_other = false; - bool succ_no_allocate_from_other = UnitTestNnetOptimizeWithOptions(optimize, + bool succ_no_allocate_from_other = UnitTestNnetOptimizeWithOptions(srand_seed, optimize, compiler); optimize = optimize_all; optimize.move_sizing_commands = false; - bool succ_no_move_sizing_commands = UnitTestNnetOptimizeWithOptions(optimize, + bool succ_no_move_sizing_commands = UnitTestNnetOptimizeWithOptions(srand_seed, optimize, compiler); optimize = optimize_all; @@ -265,7 +267,13 @@ static void UnitTestNnetOptimize() { #undef KALDI_SUCCFAIL } - +static void UnitTestNnetOptimize() { + for (int32 srand_seed = 0; srand_seed < 40; srand_seed++) { + KALDI_LOG << "About to run UnitTestNnetOptimizeInternal with srand_seed = " + << srand_seed; + UnitTestNnetOptimizeInternal(srand_seed); + } +} diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index de25b8bcabb..41f3acb3916 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -2293,9 +2293,9 @@ class ComputationExpander { // This function assumes that ComputeSubmatrixInfo() has already // been called. // Note: it returns true if the index 'old_row_index' into submatrix - // indexed 'old_submat_index' corresponds to an Index with n=0; otherwise + // indexed 'submat_index' corresponds to an Index with n=0; otherwise // it returns false and does not set the output values. - bool GetNewSubmatLocationInfo(int32 old_submat_index, + bool GetNewSubmatLocationInfo(int32 submat_index, int32 old_row_index, int32 *new_row_index, int32 *new_n_stride) const; @@ -2395,24 +2395,26 @@ void ComputationExpander::ExpandRowsCommand( // 's1' and submat2 is the submatrix referred to in 's2'. // 'indexes' has the same size as the num-rows of submat1, and the values // in the vector are row-indexes into s2. - const std::vector &old_indexes = computation_.indexes[c_in.arg3]; + int32 old_arg3 = c_out->arg3; c_out->arg3 = expanded_computation_->indexes.size(); expanded_computation_->indexes.push_back(std::vector()); std::vector &new_indexes = expanded_computation_->indexes.back(); + const std::vector &old_indexes = computation_.indexes[old_arg3]; int32 old_size = old_indexes.size(), num_n_values = num_n_values_, - new_size = expanded_computation_->submatrices[s1].num_rows; + new_s1_size = expanded_computation_->submatrices[s1].num_rows, + new_s2_size = expanded_computation_->submatrices[s2].num_rows; KALDI_ASSERT(old_size % 2 == 0 && old_size == computation_.submatrices[s1].num_rows); - new_indexes.resize(new_size, -1); + new_indexes.resize(new_s1_size, -1); for (int32 i1 = 0; i1 < old_size; i1++) { int32 new_i1_n0, new_n_stride1; if (GetNewSubmatLocationInfo(s1, i1, &new_i1_n0, &new_n_stride1)) { // GetNewSubmatLocationInfo() returns true if this corresponds to // a Cindex with n == 0. - int32 i2 = old_indexes[i1]; + int32 i2 = old_indexes[i1]; // note: i2 is the row index into submatrix s2. int32 new_i2_n0, new_n_stride2; if (i2 < 0) { // if i2 is -1, we'll just fill any relevant positions in // 'new_indexes' with -1's. @@ -2422,9 +2424,11 @@ void ComputationExpander::ExpandRowsCommand( KALDI_ASSERT(ans); // source should also be for n==0, because we don't // (or at least shouldn't) create computations that // mix up the 'n' values - for (int32 n = 0; n < num_n_values; n++) { - int32 new_i1 = new_i1_n0 + n * new_n_stride1, - new_i2 = new_i2_n0 + new_n_stride2; + + int32 new_i1 = new_i1_n0, new_i2 = new_i2_n0; + for (int32 n = 0; n < num_n_values; + ++n, new_i1 += new_n_stride1, new_i2 += new_n_stride2) { + KALDI_ASSERT(new_i1 < new_s1_size && new_i2 < new_s2_size); new_indexes[new_i1] = new_i2; } } @@ -2443,23 +2447,24 @@ void ComputationExpander::ExpandRowsMultiCommand( num_rows_old = computation_.submatrices[s1].num_rows, num_rows_new = expanded_computation_->submatrices[s1].num_rows; - const std::vector > &old_indexes_multi = - computation_.indexes_multi[c_in.arg2]; - // old_indexes_multi is a vector that has the same size as the num-rows - // of submatrix s1. It contains pairs that are either (-1, -1), or - // pairs (submatrix-index, row-index) referring to other submatrices - // in the computation. - - KALDI_ASSERT(static_cast(old_indexes_multi.size()) == num_rows_old); KALDI_ASSERT(num_rows_old % 2 == 0); int32 num_n_values = num_n_values_; - + int32 old_arg2 = c_out->arg2; c_out->arg2 = expanded_computation_->indexes_multi.size(); expanded_computation_->indexes_multi.push_back( std::vector >()); std::vector > &new_indexes_multi = expanded_computation_->indexes_multi.back(); + const std::vector > &old_indexes_multi = + computation_.indexes_multi[old_arg2]; + // old_indexes_multi is a vector that has the same size as the num-rows + // of submatrix s1. It contains pairs that are either (-1, -1), or + // pairs (submatrix-index, row-index) referring to other submatrices + // in the computation. + + KALDI_ASSERT(static_cast(old_indexes_multi.size()) == num_rows_old); + new_indexes_multi.resize(num_rows_new, std::pair(-1, -1)); @@ -2508,23 +2513,25 @@ void ComputationExpander::ExpandRowRangesCommand( num_rows_new = expanded_computation_->submatrices[s1].num_rows; KALDI_ASSERT(static_cast(c_in.arg3) < computation_.indexes_ranges.size()); - const std::vector > &old_indexes_ranges = - computation_.indexes_ranges[c_in.arg3]; - // old_indexes_ranges is a vector that has the same size as the num-rows of - // submatrix s1. It contains pairs that are either two copies of the same - // value (in practice the pair (-1, -1)), or pairs (begin-row-index, - // end-row-index) representing the (begin,end) of a range in submatrix s2. - // Note: end-row-index is one past the end of the range, as for C++ iterators. - - KALDI_ASSERT(static_cast(old_indexes_ranges.size()) == num_rows_old); KALDI_ASSERT(num_rows_old % 2 == 0); int32 num_n_values = num_n_values_; + + int32 old_arg3 = c_out->arg3; c_out->arg3 = expanded_computation_->indexes_ranges.size(); expanded_computation_->indexes_ranges.push_back( std::vector >()); std::vector > &new_indexes_ranges = expanded_computation_->indexes_ranges.back(); + const std::vector > &old_indexes_ranges = + computation_.indexes_ranges[old_arg3]; + // old_indexes_ranges is a vector that has the same size as the num-rows of + // submatrix s1. It contains pairs that are either two copies of the same + // value (in practice the pair (-1, -1)), or pairs (begin-row-index, + // end-row-index) representing the (begin,end) of a range in submatrix s2. + // Note: end-row-index is one past the end of the range, as for C++ iterators. + + KALDI_ASSERT(static_cast(old_indexes_ranges.size()) == num_rows_old); new_indexes_ranges.resize(num_rows_new, std::pair(-1, -1)); @@ -2815,10 +2822,6 @@ void ComputationExpander::ComputePrecomputedIndexes() { int32 num_commands = computation_.commands.size(), num_precomputed_indexes = computation_.component_precomputed_indexes.size(); - if (num_precomputed_indexes == 1) - return; // Nothing to compute. Note: element zero of - // component_precomputed_indexes is reserved for NULL. - std::vector need_backprop(num_precomputed_indexes, false); std::vector component_index(num_precomputed_indexes, -1); @@ -2860,8 +2863,8 @@ void ComputationExpander::ComputePrecomputedIndexes() { // the n indexes consist of the set (0, 1), and the computation we're // creating has more distinct n indexes than that. std::vector input_indexes, output_indexes; - ExpandIndexes(old_info.input_indexes, &new_info.input_indexes); - ExpandIndexes(old_info.output_indexes, &new_info.output_indexes); + ExpandIndexes(old_info.input_indexes, &input_indexes); + ExpandIndexes(old_info.output_indexes, &output_indexes); KALDI_ASSERT(component_index[p] >= 0); const Component *component = nnet_.GetComponent(component_index[p]); ComponentPrecomputedIndexes *expanded_precomputed_indexes = @@ -2877,18 +2880,19 @@ void ComputationExpander::ComputePrecomputedIndexes() { bool ComputationExpander::GetNewSubmatLocationInfo( - int32 old_submat_index, int32 old_row_index, + int32 submat_index, int32 old_row_index, int32 *new_row_index, int32 *new_n_stride) const { - int32 matrix_index = computation_.submatrices[old_submat_index].matrix_index, - row_offset = computation_.submatrices[old_submat_index].row_offset; + int32 matrix_index = computation_.submatrices[submat_index].matrix_index, + old_row_offset = computation_.submatrices[submat_index].row_offset, + new_row_offset = expanded_computation_->submatrices[submat_index].row_offset; const NnetComputation::MatrixDebugInfo &debug_info_in = computation_.matrix_debug_info[matrix_index]; - if (debug_info_in.cindexes[old_row_index + row_offset].second.n != 0) + if (debug_info_in.cindexes[old_row_index + old_row_offset].second.n != 0) return false; - GetNewMatrixLocationInfo(matrix_index, old_row_index + row_offset, + GetNewMatrixLocationInfo(matrix_index, old_row_index + old_row_offset, new_row_index, new_n_stride); - *new_row_index -= row_offset; + *new_row_index -= new_row_offset; return true; } @@ -2897,9 +2901,7 @@ void ComputationExpander::GetNewMatrixLocationInfo( int32 *new_row_index, int32 *new_n_stride) const { bool n_is_fast = n_fast_[old_matrix_index]; int32 num_rows = computation_.matrices[old_matrix_index].num_rows; - int32 n_stride; if (n_is_fast) { - n_stride = 1; // If the n index varies fast for this matrix, then the old row-index // should be a multiple of 2 because: // - we assume that the input computation was built for 2 n-values @@ -2957,9 +2959,7 @@ void ComputationExpander::GetNewLocationInfo( int32 num_indexes = indexes.size(); KALDI_ASSERT(num_indexes > 0 && num_indexes % 2 == 0 && indexes.front().n == 0 && indexes.back().n == 1); - int32 n_stride; if (is_fast) { - n_stride = 1; // If the n index varies fast for this matrix, then the old row-index // should be a multiple of 2 because: // - we assume that the input computation was built for 2 n-values diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index c2cee31bbcc..6da7699cb93 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -745,6 +745,8 @@ const NnetComputation* CachingOptimizingCompiler::CompileViaShortcut( ExpandComputation(nnet_, request.misc_info, *mini_computation, need_debug_info, num_n_values, ans); + ans->ComputeCudaIndexes(); + return ans; } From 6454ed202bc5348aa22546822d28668b31fca4c9 Mon Sep 17 00:00:00 2001 From: David van Leeuwen Date: Thu, 15 Dec 2016 19:45:41 +0100 Subject: [PATCH 124/530] add `mkdir -p $dir` which might not exist yet (#1266) The next line in the diff ```sh p $lang/phones.txt $dir || exit 1; ``` will turn `$dir` into a file rather than a directory, which causes problems. --- egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh b/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh index cd18ae21f39..cc63adb3e17 100755 --- a/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh +++ b/egs/wsj/s5/steps/online/nnet2/prepare_online_decoding.sh @@ -77,6 +77,7 @@ if [ ! -z "$iedir" ]; then fi utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; +mkdir -p $dir cp $lang/phones.txt $dir || exit 1; dir=$(readlink -f $dir) # Convert $dir to an absolute pathname, so that the From 7becd47e2fbfdba7ea86158fca2dbb4c06843c47 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 15 Dec 2016 18:14:09 -0500 Subject: [PATCH 125/530] Small documentation fix --- src/nnet3/nnet-am-decodable-simple.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nnet3/nnet-am-decodable-simple.h b/src/nnet3/nnet-am-decodable-simple.h index e604765e09a..acf0ba8e63a 100644 --- a/src/nnet3/nnet-am-decodable-simple.h +++ b/src/nnet3/nnet-am-decodable-simple.h @@ -72,11 +72,11 @@ struct NnetSimpleComputationOptions { "of the neural net's inherent right context (may be useful in " "recurrent setups"); opts->Register("extra-left-context-initial", &extra_left_context_initial, - "If >0, overrides the --extra-left-context value at the start " - "of an utterance."); + "If >= 0, overrides the --extra-left-context value at the " + "start of an utterance."); opts->Register("extra-right-context-final", &extra_right_context_final, - "If >0, overrides the --extra-right-context value at the end " - "of an utterance."); + "If >= 0, overrides the --extra-right-context value at the " + "end of an utterance."); opts->Register("frame-subsampling-factor", &frame_subsampling_factor, "Required if the frame-rate of the output (e.g. in 'chain' " "models) is less than the frame-rate of the original " From 60f79dca996395433463446075b8c54ecac09c68 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 15 Dec 2016 18:33:16 -0500 Subject: [PATCH 126/530] Remove no-longer-used option --cut-zero-frames from chain supervision-creation code --- egs/wsj/s5/steps/nnet3/chain/get_egs.sh | 4 +--- src/chain/chain-supervision.cc | 21 --------------------- src/chain/chain-supervision.h | 21 --------------------- src/chainbin/nnet3-chain-get-egs.cc | 24 +++++++----------------- 4 files changed, 8 insertions(+), 62 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh index c7263f41698..7b330f8f717 100755 --- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh @@ -26,8 +26,6 @@ frames_per_eg=25 # number of feature frames example (not counting added contex frames_overlap_per_eg=0 # number of supervised frames of overlap that we aim for per eg. # can be useful to avoid wasted data if you're using --left-deriv-truncate # and --right-deriv-truncate. -cut_zero_frames=-1 # if activated, activates new-style derivative weights.. i'll reorganize - # this if it works well. frame_subsampling_factor=3 # frames-per-second of features we train on divided # by frames-per-second at output of chain model alignment_subsampling_factor=3 # frames-per-second of input alignments divided @@ -294,7 +292,7 @@ if [ $stage -le 2 ]; then fi -egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --num-frames-overlap=$frames_overlap_per_eg --frame-subsampling-factor=$frame_subsampling_factor --compress=$compress --cut-zero-frames=$cut_zero_frames" +egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --num-frames-overlap=$frames_overlap_per_eg --frame-subsampling-factor=$frame_subsampling_factor --compress=$compress" [ -z $valid_left_context ] && valid_left_context=$left_context; diff --git a/src/chain/chain-supervision.cc b/src/chain/chain-supervision.cc index aad1320e0a0..b5597b15667 100644 --- a/src/chain/chain-supervision.cc +++ b/src/chain/chain-supervision.cc @@ -804,26 +804,5 @@ void GetWeightsForRanges(int32 range_length, } -void GetWeightsForRangesNew(int32 range_length, - int32 num_frames_zeroed, - const std::vector &range_starts, - std::vector > *weights) { - KALDI_ASSERT(range_length > 0 && num_frames_zeroed * 2 < range_length); - int32 num_ranges = range_starts.size(); - weights->resize(num_ranges); - for (int32 i = 0; i < num_ranges; i++) { - (*weights)[i].Resize(range_length); - (*weights)[i].Set(1.0); - } - if (num_frames_zeroed == 0) - return; - for (int32 i = 1; i < num_ranges; i++) - (*weights)[i].Range(0, num_frames_zeroed).Set(0.0); - for (int32 i = 0; i + 1 < num_ranges; i++) - (*weights)[i].Range(range_length - num_frames_zeroed, - num_frames_zeroed).Set(0.0); -} - - } // namespace chain } // namespace kaldi diff --git a/src/chain/chain-supervision.h b/src/chain/chain-supervision.h index 2dda8baf1e4..a94f68ade90 100644 --- a/src/chain/chain-supervision.h +++ b/src/chain/chain-supervision.h @@ -402,27 +402,6 @@ void GetWeightsForRanges(int32 range_length, std::vector > *weights); -/// This is a newer version of GetWeightsForRanges with a simpler behavior -/// than GetWeightsForRanges and a different purpose. Instead of aiming to -/// create weights that sum to one over the whole file, the purpose is to -/// zero out the derivative weights for a certain number of frames to each -/// side of every 'cut point' in the numerator lattice [by numerator lattice, -/// what I mean is the FST that we automatically generate from the numerator -/// alignment or lattice]. So we don't zero out the weights for the very -/// beginning or very end of each original utterance, just those where -/// we split the utterance into pieces. We believe there is an incentive -/// for the network to produce deletions near the edges, and this aims to fix -/// this problem. -/// range_length is the length of each range of times (so range_starts[0] -/// represents the start of a range of t values of length 'range_length' -/// and so range_starts[1] etc.), and num_frames_zeroed is the number of frames -/// on each side of the cut point on which we are supposed to zero out the -/// derivative. -void GetWeightsForRangesNew(int32 range_length, - int32 num_frames_zeroed, - const std::vector &range_starts, - std::vector > *weights); - typedef TableWriter > SupervisionWriter; typedef SequentialTableReader > SequentialSupervisionReader; diff --git a/src/chainbin/nnet3-chain-get-egs.cc b/src/chainbin/nnet3-chain-get-egs.cc index cc463d179da..968a50af889 100644 --- a/src/chainbin/nnet3-chain-get-egs.cc +++ b/src/chainbin/nnet3-chain-get-egs.cc @@ -49,7 +49,6 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst, int32 frames_per_eg, int32 frames_overlap_per_eg, int32 frame_subsampling_factor, - int32 cut_zero_frames, int64 *num_frames_written, int64 *num_egs_written, NnetChainExampleWriter *example_writer) { @@ -86,7 +85,7 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst, return ProcessFile(normalization_fst, feats_new, ivector_feats, supervision, utt_id, compress, left_context, right_context, frames_per_eg, frames_overlap_per_eg, frame_subsampling_factor, - cut_zero_frames, num_frames_written, num_egs_written, + num_frames_written, num_egs_written, example_writer); } @@ -116,15 +115,10 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst, // to the edge are not as accurate as they could be, because when we split we // don't know the correct alphas and betas). std::vector > deriv_weights; - if (cut_zero_frames >= 0) - chain::GetWeightsForRangesNew(frames_per_eg_subsampled, - cut_zero_frames / frame_subsampling_factor, - range_starts_subsampled, - &deriv_weights); - else - chain::GetWeightsForRanges(frames_per_eg_subsampled, - range_starts_subsampled, - &deriv_weights); + + chain::GetWeightsForRanges(frames_per_eg_subsampled, + range_starts_subsampled, + &deriv_weights); if (range_starts_subsampled.empty()) { KALDI_WARN << "No output for utterance " << utt_id @@ -250,10 +244,6 @@ int main(int argc, char *argv[]) { ParseOptions po(usage); po.Register("compress", &compress, "If true, write egs in " "compressed format (recommended)"); - po.Register("cut-zero-frames", &cut_zero_frames, "Number of frames " - "(measured before subsampling) to zero the derivative on each " - "side of a cut point (if set, activates new-style derivative " - "weights)"); po.Register("left-context", &left_context, "Number of frames of left " "context the neural net requires."); po.Register("right-context", &right_context, "Number of frames of right " @@ -276,7 +266,7 @@ int main(int argc, char *argv[]) { "frame-rate of the input"); po.Read(argc, argv); - + srand(srand_seed); if (po.NumArgs() < 3 || po.NumArgs() > 4) { @@ -355,7 +345,7 @@ int main(int argc, char *argv[]) { key, compress, left_context, right_context, num_frames, num_frames_overlap, frame_subsampling_factor, - cut_zero_frames, &num_frames_written, &num_egs_written, + &num_frames_written, &num_egs_written, &example_writer)) num_done++; else From bd499c8557fd0ffb517ad75e782edbf37c5402fe Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 15 Dec 2016 19:41:38 -0500 Subject: [PATCH 127/530] asr_diarization: Restructuring do_segmentation_data_dir.sh --- .../segmentation/do_segmentation_data_dir.sh | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh index 663655eef77..9feb421ccd3 100755 --- a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh +++ b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh @@ -42,15 +42,17 @@ echo $* . utils/parse_options.sh -if [ $# -ne 3 ]; then - echo "Usage: $0 " - echo " e.g.: $0 ~/workspace/egs/ami/s5b/data/sdm1/dev data/ami_sdm1_dev exp/nnet3_sad_snr/nnet_tdnn_j_n4" +if [ $# -ne 4 ]; then + echo "Usage: $0 " + echo " e.g.: $0 ~/workspace/egs/ami/s5b/data/sdm1/dev exp/nnet3_sad_snr/nnet_tdnn_j_n4 mfcc_hires_bp data/ami_sdm1_dev" exit 1 fi -src_data_dir=$1 -data_dir=$2 -sad_nnet_dir=$3 +src_data_dir=$1 # The input data directory that needs to be segmented. + # Any segments in that will be ignored. +sad_nnet_dir=$2 # The SAD neural network +mfcc_dir=$3 # The directory to store the features +data_dir=$4 # The output data directory will be ${data_dir}_seg affix=${affix:+_$affix} feat_affix=${feat_affix:+_$feat_affix} @@ -62,8 +64,10 @@ seg_dir=${sad_nnet_dir}/${segmentation_name}${affix}_${data_id}_whole${feat_affi export PATH="$KALDI_ROOT/tools/sph2pipe_v2.5/:$PATH" [ ! -z `which sph2pipe` ] +whole_data_dir=${sad_dir}/${data_id}_whole + if [ $stage -le 0 ]; then - utils/data/convert_data_dir_to_whole.sh $src_data_dir ${data_dir}_whole + utils/data/convert_data_dir_to_whole.sh $src_data_dir ${whole_data_dir} if $do_downsampling; then freq=`cat $mfcc_config | perl -pe 's/\s*#.*//g' | grep "sample-frequency=" | awk -F'=' '{if (NF == 0) print 16000; else print $2}'` @@ -76,18 +80,19 @@ for line in sys.stdin.readlines(): out_line = line.strip() + ' $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |' else: out_line = 'cat {0} {1} | $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |'.format(splits[0], ' '.join(splits[1:])) - print (out_line)" > ${data_dir}_whole/wav.scp + print (out_line)" > ${whole_data_dir}/wav.scp fi - utils/copy_data_dir.sh ${data_dir}_whole ${data_dir}_whole${feat_affix}_hires + utils/copy_data_dir.sh ${whole_data_dir} ${whole_data_dir}${feat_affix}_hires fi -test_data_dir=${data_dir}_whole${feat_affix}_hires +test_data_dir=${whole_data_dir}${feat_affix}_hires if [ $stage -le 1 ]; then steps/make_mfcc.sh --mfcc-config $mfcc_config --nj $reco_nj --cmd "$train_cmd" \ - ${data_dir}_whole${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} mfcc_hires - steps/compute_cmvn_stats.sh ${data_dir}_whole${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} mfcc_hires + ${whole_data_dir}${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} $mfcc_dir + steps/compute_cmvn_stats.sh ${whole_data_dir}${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} $mfcc_dir + utils/fix_data_dir.sh ${whole_data_dir}${feat_affix}_hires fi post_vec=$sad_nnet_dir/post_${output_name}.vec @@ -114,21 +119,23 @@ if [ $stage -le 3 ]; then --min-silence-duration $min_silence_duration \ --min-speech-duration $min_speech_duration \ --segmentation-config $segmentation_config --cmd "$train_cmd" \ - ${test_data_dir} $sad_dir $seg_dir $seg_dir/${data_id}_seg + ${test_data_dir} $sad_dir $seg_dir ${data_dir}_seg fi # Subsegment data directory if [ $stage -le 4 ]; then - rm $seg_dir/${data_id}_seg/feats.scp || true + rm ${data_dir}_seg/feats.scp || true utils/data/get_reco2num_frames.sh ${test_data_dir} - awk '{print $1" "$2}' ${seg_dir}/${data_id}_seg/segments | \ + awk '{print $1" "$2}' ${data_dir}_seg/segments | \ utils/apply_map.pl -f 2 ${test_data_dir}/reco2num_frames > \ - $seg_dir/${data_id}_seg/utt2max_frames + ${data_dir}_seg/utt2max_frames frame_shift_info=`cat $mfcc_config | steps/segmentation/get_frame_shift_info_from_config.pl` utils/data/get_subsegment_feats.sh ${test_data_dir}/feats.scp \ - $frame_shift_info $seg_dir/${data_id}_seg/segments | \ - utils/data/fix_subsegmented_feats.pl ${seg_dir}/${data_id}_seg/utt2max_frames > \ - $seg_dir/${data_id}_seg/feats.scp - steps/compute_cmvn_stats.sh --fake $seg_dir/${data_id}_seg + $frame_shift_info ${data_dir}_seg/segments | \ + utils/data/fix_subsegmented_feats.pl ${data_dir}_seg/utt2max_frames > \ + ${data_dir}_seg/feats.scp + steps/compute_cmvn_stats.sh --fake ${data_dir}_seg + + utils/fix_data_dir.sh ${data_dir}_seg fi From eaa31a4bcdaefc65a242def1a7e59d325505f88d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 15 Dec 2016 19:44:37 -0500 Subject: [PATCH 128/530] asr_diarization: SAD on Aspire --- .../s5/local/multi_condition/get_ctm.sh | 6 +- .../nnet3/prep_test_aspire_segmentation.sh | 230 ++++++++++++++++++ egs/aspire/s5/local/score_aspire.sh | 9 +- 3 files changed, 238 insertions(+), 7 deletions(-) create mode 100755 egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh diff --git a/egs/aspire/s5/local/multi_condition/get_ctm.sh b/egs/aspire/s5/local/multi_condition/get_ctm.sh index f67a1191544..6fc87fec7b0 100755 --- a/egs/aspire/s5/local/multi_condition/get_ctm.sh +++ b/egs/aspire/s5/local/multi_condition/get_ctm.sh @@ -7,7 +7,7 @@ decode_mbr=true filter_ctm_command=cp glm= stm= -window=10 +resolve_overlaps=true overlap=5 [ -f ./path.sh ] && . ./path.sh . parse_options.sh || exit 1; @@ -62,7 +62,9 @@ lattice-align-words-lexicon --output-error-lats=true --output-if-empty=true --ma lattice-to-ctm-conf $frame_shift_opt --decode-mbr=$decode_mbr ark:- $decode_dir/score_$LMWT/penalty_$wip/ctm.overlapping || exit 1; # combine the segment-wise ctm files, while resolving overlaps -python local/multi_condition/resolve_ctm_overlaps.py --overlap $overlap --window-length $window $data_dir/utt2spk $decode_dir/score_$LMWT/penalty_$wip/ctm.overlapping $decode_dir/score_$LMWT/penalty_$wip/ctm.merged || exit 1; +if $resolve_overlaps; then + steps/resolve_ctm_overlaps.py $data_dir/segments $decode_dir/score_$LMWT/penalty_$wip/ctm.overlapping $decode_dir/score_$LMWT/penalty_$wip/ctm.merged || exit 1; +fi merged_ctm=$decode_dir/score_$LMWT/penalty_$wip/ctm.merged cat $merged_ctm | utils/int2sym.pl -f 5 $lang/words.txt | \ diff --git a/egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh b/egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh new file mode 100755 index 00000000000..5f38f6de51f --- /dev/null +++ b/egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh @@ -0,0 +1,230 @@ +#!/bin/bash + +# Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016. Apache 2.0. +# This script generates the ctm files for dev_aspire, test_aspire and eval_aspire +# for scoring with ASpIRE scoring server. +# It also provides the WER for dev_aspire data. + +set -e +set -o pipefail +set -u + +# general opts +iter=final +stage=0 +decode_num_jobs=30 +num_jobs=30 +affix= + +# ivector opts +max_count=75 # parameter for extract_ivectors.sh +sub_speaker_frames=6000 +ivector_scale=0.75 +filter_ctm=true +weights_file= +silence_weight=0.00001 + +# decode opts +pass2_decode_opts="--min-active 1000" +lattice_beam=8 +extra_left_context=0 # change for (B)LSTM +extra_right_context=0 # change for BLSTM +frames_per_chunk=50 # change for (B)LSTM +acwt=0.1 # important to change this when using chain models +post_decode_acwt=1.0 # important to change this when using chain models + +. ./cmd.sh +[ -f ./path.sh ] && . ./path.sh +. utils/parse_options.sh || exit 1; + +if [ $# -ne 5 ]; then + echo "Usage: $0 [options] " + echo " Options:" + echo " --stage (0|1|2) # start scoring script from part-way through." + echo "e.g.:" + echo "$0 dev_aspire data/lang exp/tri5a/graph_pp exp/nnet3/tdnn" + exit 1; +fi + +data_set=$1 +sad_nnet_dir=$2 +lang=$3 # data/lang +graph=$4 #exp/tri5a/graph_pp +dir=$5 # exp/nnet3/tdnn + +model_affix=`basename $dir` +ivector_dir=exp/nnet3 +ivector_affix=${affix:+_$affix}_chain_${model_affix}_iter$iter +affix=_${affix}_iter${iter} +act_data_set=${data_set} # we will modify the data dir, when segmenting it + # so we will keep track of original data dirfor the glm and stm files + +if [[ "$data_set" =~ "test_aspire" ]]; then + out_file=single_dev_test${affix}_$model_affix.ctm +elif [[ "$data_set" =~ "eval_aspire" ]]; then + out_file=single_eval${affix}_$model_affix.ctm +elif [[ "$data_set" =~ "dev_aspire" ]]; then + # we will just decode the directory without oracle segments file + # as we would like to operate in the actual evaluation condition + out_file=single_dev${affix}_${model_affix}.ctm +else + exit 1 +fi + +if [ $stage -le 1 ]; then + steps/segmentation/do_segmentation_data_dir.sh --reco-nj $num_jobs \ + --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp \ + --do-downsampling false --extra-left-context 100 --extra-right-context 20 \ + --output-name output-speech --frame-subsampling-factor 6 \ + data/${data_set} $sad_nnet_dir mfcc_hires_bp data/${data_set} + # Output will be in data/${data_set}_seg +fi + +# uniform segmentation script would have created this dataset +# so update that script if you plan to change this variable +segmented_data_set=${data_set}_seg + +if [ $stage -le 2 ]; then + mfccdir=mfcc_reverb + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + date=$(date +'%m_%d_%H_%M') + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/aspire-$date/s5/$mfccdir/storage $mfccdir/storage + fi + + utils/copy_data_dir.sh data/${segmented_data_set} data/${segmented_data_set}_hires + steps/make_mfcc.sh --nj 30 --cmd "$train_cmd" \ + --mfcc-config conf/mfcc_hires.conf data/${segmented_data_set}_hires \ + exp/make_reverb_hires/${segmented_data_set} $mfccdir + steps/compute_cmvn_stats.sh data/${segmented_data_set}_hires \ + exp/make_reverb_hires/${segmented_data_set} $mfccdir + utils/fix_data_dir.sh data/${segmented_data_set}_hires + utils/validate_data_dir.sh --no-text data/${segmented_data_set}_hires +fi + +decode_dir=$dir/decode_${segmented_data_set}${affix}_pp +false && { +if [ $stage -le 2 ]; then + echo "Extracting i-vectors, stage 1" + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \ + --max-count $max_count \ + data/${segmented_data_set}_hires $ivector_dir/extractor \ + $ivector_dir/ivectors_${segmented_data_set}${ivector_affix}_stage1; + # float comparisons are hard in bash + if [ `bc <<< "$ivector_scale != 1"` -eq 1 ]; then + ivector_scale_affix=_scale$ivector_scale + else + ivector_scale_affix= + fi + + if [ ! -z "$ivector_scale_affix" ]; then + echo "$0: Scaling iVectors, stage 1" + srcdir=$ivector_dir/ivectors_${segmented_data_set}${ivector_affix}_stage1 + outdir=$ivector_dir/ivectors_${segmented_data_set}${ivector_affix}${ivector_scale_affix}_stage1 + mkdir -p $outdir + copy-matrix --scale=$ivector_scale scp:$srcdir/ivector_online.scp ark:- | \ + copy-feats --compress=true ark:- ark,scp:$outdir/ivector_online.ark,$outdir/ivector_online.scp; + cp $srcdir/ivector_period $outdir/ivector_period + fi +fi + +# generate the lattices +if [ $stage -le 3 ]; then + echo "Generating lattices, stage 1" + steps/nnet3/decode.sh --nj $decode_num_jobs --cmd "$decode_cmd" --config conf/decode.config \ + --acwt $acwt --post-decode-acwt $post_decode_acwt \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir $ivector_dir/ivectors_${segmented_data_set}${ivector_affix}${ivector_scale_affix}_stage1 \ + --skip-scoring true --iter $iter \ + $graph data/${segmented_data_set}_hires ${decode_dir}_stage1; +fi + +if [ $stage -le 4 ]; then + if $filter_ctm; then + if [ ! -z $weights_file ]; then + echo "$0: Using provided vad weights file $weights_file" + ivector_extractor_input=$weights_file + else + echo "$0 : Generating vad weights file" + ivector_extractor_input=${decode_dir}_stage1/weights${affix}.gz + local/extract_vad_weights.sh --cmd "$decode_cmd" --iter $iter \ + data/${segmented_data_set}_hires $lang \ + ${decode_dir}_stage1 $ivector_extractor_input + fi + else + # just use all the frames + ivector_extractor_input=${decode_dir}_stage1 + fi +fi + +if [ $stage -le 5 ]; then + echo "Extracting i-vectors, stage 2 with input $ivector_extractor_input" + # this does offline decoding, except we estimate the iVectors per + # speaker, excluding silence (based on alignments from a DNN decoding), with a + # different script. This is just to demonstrate that script. + # the --sub-speaker-frames is optional; if provided, it will divide each speaker + # up into "sub-speakers" of at least that many frames... can be useful if + # acoustic conditions drift over time within the speaker's data. + steps/online/nnet2/extract_ivectors.sh --cmd "$train_cmd" --nj 20 \ + --silence-weight $silence_weight \ + --sub-speaker-frames $sub_speaker_frames --max-count $max_count \ + data/${segmented_data_set}_hires $lang $ivector_dir/extractor \ + $ivector_extractor_input $ivector_dir/ivectors_${segmented_data_set}${ivector_affix}; +fi +} + +if [ $stage -le 5 ]; then + echo "Extracting i-vectors, stage 2" + # this does offline decoding, except we estimate the iVectors per + # speaker, excluding silence (based on alignments from a DNN decoding), with a + # different script. This is just to demonstrate that script. + # the --sub-speaker-frames is optional; if provided, it will divide each speaker + # up into "sub-speakers" of at least that many frames... can be useful if + # acoustic conditions drift over time within the speaker's data. + steps/online/nnet2/extract_ivectors.sh --cmd "$train_cmd" --nj 20 \ + --sub-speaker-frames $sub_speaker_frames --max-count $max_count \ + data/${segmented_data_set}_hires $lang $ivector_dir/extractor \ + $ivector_dir/ivectors_${segmented_data_set}${ivector_affix}; +fi + +if [ $stage -le 6 ]; then + echo "Generating lattices, stage 2 with --acwt $acwt" + rm -f ${decode_dir}_tg/.error + steps/nnet3/decode.sh --nj $decode_num_jobs --cmd "$decode_cmd" --config conf/decode.config $pass2_decode_opts \ + --acwt $acwt --post-decode-acwt $post_decode_acwt \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --skip-scoring true --iter $iter --lattice-beam $lattice_beam \ + --online-ivector-dir $ivector_dir/ivectors_${segmented_data_set}${ivector_affix} \ + $graph data/${segmented_data_set}_hires ${decode_dir}_tg || touch ${decode_dir}_tg/.error + [ -f ${decode_dir}_tg/.error ] && echo "$0: Error decoding" && exit 1; +fi + +if [ $stage -le 7 ]; then + echo "Rescoring lattices" + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + --skip-scoring true \ + ${lang}_pp_test{,_fg} data/${segmented_data_set}_hires \ + ${decode_dir}_{tg,fg}; +fi + +decode_dir=${decode_dir}_fg + +if [ $stage -le 8 ]; then + local/score_aspire.sh --cmd "$decode_cmd" \ + --min-lmwt 1 --max-lmwt 20 \ + --word-ins-penalties "0.0,0.25,0.5,0.75,1.0" \ + --ctm-beam 6 \ + --iter $iter \ + --decode-mbr true \ + --resolve-overlaps false \ + --tune-hyper true \ + $lang $decode_dir $act_data_set $segmented_data_set $out_file +fi + +# Two-pass decoding baseline +# %WER 27.8 | 2120 27217 | 78.2 13.6 8.2 6.0 27.8 75.9 | -0.613 | exp/chain/tdnn_7b/decode_dev_aspire_whole_uniformsegmented_win10_over5_v6_200jobs_iterfinal_pp_fg/score_9/penalty_0.0/ctm.filt.filt.sys +# Using automatic segmentation +# %WER 28.2 | 2120 27214 | 76.5 12.4 11.1 4.7 28.2 75.2 | -0.522 | exp/chain/tdnn_7b/decode_dev_aspire_seg_v7_n_stddev_iterfinal_pp_fg/score_10/penalty_0.0/ctm.filt.filt.sys diff --git a/egs/aspire/s5/local/score_aspire.sh b/egs/aspire/s5/local/score_aspire.sh index 3e35b6d3dae..9c08a6c85d1 100755 --- a/egs/aspire/s5/local/score_aspire.sh +++ b/egs/aspire/s5/local/score_aspire.sh @@ -14,10 +14,9 @@ word_ins_penalties=0.0,0.25,0.5,0.75,1.0 default_wip=0.0 ctm_beam=6 decode_mbr=true -window=30 -overlap=5 cmd=run.pl stage=1 +resolve_overlaps=true tune_hyper=true # if true: # if the data set is "dev_aspire" we check for the # best lmwt and word_insertion_penalty, @@ -89,7 +88,7 @@ if $tune_hyper ; then # or use the default values if [ $stage -le 1 ]; then - if [ "$act_data_set" == "dev_aspire" ]; then + if [[ "$act_data_set" =~ "dev_aspire" ]]; then wip_string=$(echo $word_ins_penalties | sed 's/,/ /g') temp_wips=($wip_string) $cmd WIP=1:${#temp_wips[@]} $decode_dir/scoring/log/score.wip.WIP.log \ @@ -98,8 +97,8 @@ if $tune_hyper ; then echo \$wip \&\& \ $cmd LMWT=$min_lmwt:$max_lmwt $decode_dir/scoring/log/score.LMWT.\$wip.log \ local/multi_condition/get_ctm.sh --filter-ctm-command "$filter_ctm_command" \ - --window $window --overlap $overlap \ --beam $ctm_beam --decode-mbr $decode_mbr \ + --resolve-overlaps $resolve_overlaps \ --glm data/${act_data_set}/glm --stm data/${act_data_set}/stm \ LMWT \$wip $lang data/${segmented_data_set}_hires $model $decode_dir || exit 1; @@ -124,7 +123,7 @@ wipfile.close() fi - if [ "$act_data_set" == "test_aspire" ] || [ "$act_data_set" == "eval_aspire" ]; then + if [[ "$act_data_set" =~ "test_aspire" ]] || [[ "$act_data_set" =~ "eval_aspire" ]]; then # check for the best values from dev_aspire decodes dev_decode_dir=$(echo $decode_dir|sed "s/test_aspire/dev_aspire_whole/g; s/eval_aspire/dev_aspire_whole/g") if [ -f $dev_decode_dir/scoring/bestLMWT ]; then From c9a8da1b43590251164ad83a07f5d6fe3f7c983b Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 15 Dec 2016 19:45:19 -0500 Subject: [PATCH 129/530] asr_diaization: Changes to run_segmentation_ami based on restructuring --- .../s5/local/segmentation/run_segmentation_ami.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh index 98ff4210780..f9374aaf55a 100755 --- a/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh +++ b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh @@ -12,6 +12,8 @@ set -u stage=-1 nnet_dir=exp/nnet3_sad_snr/nnet_tdnn_k_n4 +extra_left_context=100 +extra_right_context=20 . utils/parse_options.sh @@ -99,23 +101,24 @@ if [ $stage -le 6 ]; then rttmSmooth.pl -s 0 \| awk '{ print $2" "$3" "$4" "$5+$4 }' '>' $dir/uem fi -hyp_dir=$nnet_dir/segmentation_ami_sdm1_dev_whole_bp +hyp_dir=${nnet_dir}/segmentation_ami_sdm1_dev_whole_bp/ami_sdm1_dev if [ $stage -le 7 ]; then steps/segmentation/do_segmentation_data_dir.sh --reco-nj 18 \ --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp --do-downsampling true \ - --extra-left-context 100 --extra-right-context 20 \ + --extra-left-context $extra_left_context --extra-right-context $extra_right_context \ --output-name output-speech --frame-subsampling-factor 6 \ - $src_dir/data/sdm1/dev data/ami_sdm1_dev $nnet_dir + $src_dir/data/sdm1/dev $nnet_dir mfcc_hires_bp $hyp_dir fi +hyp_dir=${hyp_dir}_seg if [ $stage -le 8 ]; then utils/data/get_reco2utt.sh $src_dir/data/sdm1/dev_ihmdata steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \ - $hyp_dir/ami_sdm1_dev_seg/utt2spk \ - $hyp_dir/ami_sdm1_dev_seg/segments \ + $hyp_dir/utt2spk \ + $hyp_dir/segments \ $dir/reco2file_and_channel \ /dev/stdout | spkr2sad.pl > $hyp_dir/sys.rttm fi From 5d0b82808287e53f927a2c1948dde93874c0ab9a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 15 Dec 2016 19:46:01 -0500 Subject: [PATCH 130/530] Bug fix in basic_layers.py --- egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py index c612af984b1..38ff36622ec 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py @@ -458,7 +458,7 @@ def check_configs(self): "".format(self.config['dim']), self.str()) if self.config['objective-type'] != 'linear' and \ - self.config['objective_type'] != 'quadratic': + self.config['objective-type'] != 'quadratic': raise xparser_error("In output-layer, objective-type has" " invalid value {0}" "".format(self.config['objective-type']), From 93fe5b3399ff7feb7ad7787605294abf8cc3d1cb Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 15 Dec 2016 19:46:17 -0500 Subject: [PATCH 131/530] asr_diarization: Minor fix in get_egs_multiple_targets.py --- egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py b/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py index 16e1f98a019..fa8a68f5c64 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py +++ b/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py @@ -14,6 +14,7 @@ import math import glob +sys.path.insert(0, 'steps') import libs.data as data_lib import libs.common as common_lib From 87511da98aceb588420f68aa254e4494a8953af7 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 15 Dec 2016 19:47:04 -0500 Subject: [PATCH 132/530] asr_diarization: Change the way do_corruption_data_dir_overlapped_speech.sh works to non-whole dirs --- ...o_corruption_data_dir_overlapped_speech.sh | 172 +++---------- .../prepare_unsad_overlapped_speech_data.sh | 236 ++++++++++++++++++ 2 files changed, 266 insertions(+), 142 deletions(-) create mode 100755 egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data.sh diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh index 4d532be4353..aa1d9adc3e9 100755 --- a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh @@ -15,7 +15,6 @@ corrupt_only=false # Data options data_dir=data/train_si284 # Excpecting non-whole data directory -speed_perturb=true num_data_reps=5 # Number of corrupted versions snrs="20:10:15:5:0:-5" foreground_snrs="20:10:15:5:0:-5" @@ -80,9 +79,9 @@ fi clean_data_dir=data/${clean_data_id} corrupted_data_dir=data/${corrupted_data_id} noise_data_dir=data/${noise_data_id} -orig_corrupted_data_dir=$corrupted_data_dir +orig_corrupted_data_dir=data/${corrupted_data_id} -if $speed_perturb; then +if false; then if [ $stage -le 2 ]; then for x in $clean_data_dir $corrupted_data_dir $noise_data_dir; do utils/data/perturb_data_dir_speed_3way.sh $x ${x}_sp @@ -96,12 +95,12 @@ if $speed_perturb; then corrupted_data_id=${corrupted_data_id}_sp clean_data_id=${clean_data_id}_sp noise_data_id=${noise_data_id}_sp +fi - if [ $stage -le 3 ]; then - utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 ${corrupted_data_dir} - utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${clean_data_dir} - utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${noise_data_dir} - fi +if [ $stage -le 3 ]; then + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 ${corrupted_data_dir} + utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${clean_data_dir} + utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${noise_data_dir} fi if $corrupt_only; then @@ -123,24 +122,32 @@ if [ $stage -le 4 ]; then steps/make_mfcc.sh --mfcc-config $mfcc_config \ --cmd "$cmd" --nj $nj \ $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir +else + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix fi -if false; then +exit 0 + if [ $stage -le 5 ]; then - steps/make_mfcc.sh --mfcc-config $energy_config \ + # clean here is the reverberated first-speaker signal + utils/copy_data_dir.sh $clean_data_dir ${clean_data_dir}_$feat_suffix + clean_data_dir=${clean_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ --cmd "$cmd" --nj $nj \ - $clean_data_dir exp/make_log_energy/${clean_data_id} log_energy_feats + $clean_data_dir exp/make_${feat_suffix}/${clean_data_id} $mfccdir +else + clean_data_dir=${clean_data_dir}_$feat_suffix fi if [ $stage -le 6 ]; then - steps/make_mfcc.sh --mfcc-config $energy_config \ + # noise here is the reverberated second-speaker signal + utils/copy_data_dir.sh $noise_data_dir ${noise_data_dir}_$feat_suffix + noise_data_dir=${noise_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ --cmd "$cmd" --nj $nj \ - $noise_data_dir exp/make_log_energy/${noise_data_id} log_energy_feats -fi - -if [ -z "$reco_vad_dir" ]; then - echo "reco-vad-dir must be provided" - exit 1 + $noise_data_dir exp/make_${feat_suffix}/${noise_data_id} $mfccdir +else + noise_data_dir=${noise_data_dir}_$feat_suffix fi targets_dir=irm_targets @@ -152,134 +159,15 @@ if [ $stage -le 8 ]; then /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$targets_dir/storage $targets_dir/storage fi + # Get SNR targets only for the overlapped speech labels. steps/segmentation/make_snr_targets.sh \ --nj $nj --cmd "$cmd --max-jobs-run $max_jobs_run" \ - --target-type Irm --compress true --apply-exp false \ + --target-type Irm --compress false --apply-exp true \ + --ali-rspecifier "ark,s,cs:cat ${corrupted_data_dir}/sad_seg.scp | segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames scp:- ark:- |" \ + overlapped_speech_labels.scp \ + --silence-phones 0 \ ${clean_data_dir} ${noise_data_dir} ${corrupted_data_dir} \ exp/make_irm_targets/${corrupted_data_id} $targets_dir fi -fi - -# Combine the VAD from the base recording and the VAD from the overlapping segments -# to create per-frame labels of the number of overlapping speech segments -# Unreliable segments are regions where no VAD labels were available for the -# overlapping segments. These can be later removed by setting deriv weights to 0. - -# Data dirs without speed perturbation -overlap_dir=exp/make_overlap_labels/${corrupted_data_id} -unreliable_dir=exp/make_overlap_labels/unreliable_${corrupted_data_id} -overlap_data_dir=$overlap_dir/overlap_data -unreliable_data_dir=$overlap_dir/unreliable_data - -mkdir -p $unreliable_dir - -if [ $stage -le 8 ]; then - cat $utt_vad_dir/sad_seg.scp | \ - steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "ovlp" \ - | sort -k1,1 > ${corrupted_data_dir}/sad_seg.scp - utils/data/get_utt2num_frames.sh $corrupted_data_dir - utils/split_data.sh ${orig_corrupted_data_dir} $nj - - $cmd JOB=1:$nj $overlap_dir/log/get_overlap_seg.JOB.log \ - segmentation-init-from-overlap-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ - "scp:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${nj}/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" \ - ark,t:$orig_corrupted_data_dir/overlapped_segments_info.txt \ - scp:$utt_vad_dir/sad_seg.scp ark:- ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \| \ - segmentation-copy --keep-label=1 ark:- ark:- \| \ - segmentation-get-stats --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ - ark:- ark:- ark:/dev/null \| \ - segmentation-init-from-ali ark:- ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark -fi - -exit 1 - -if [ $stage -le 9 ]; then - mkdir -p $overlap_data_dir $unreliable_data_dir - cp $orig_corrupted_data_dir/wav.scp $overlap_data_dir - cp $orig_corrupted_data_dir/wav.scp $unreliable_data_dir - - # Create segments where there is definitely an overlap. - # Assume no more than 10 speakers overlap. - $cmd JOB=1:$nj $overlap_dir/log/process_to_segments.JOB.log \ - segmentation-post-process --remove-labels=0:1 \ - ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark ark:- \| \ - segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ - segmentation-to-segments ark:- ark:$overlap_data_dir/utt2spk.JOB $overlap_data_dir/segments.JOB - - $cmd JOB=1:$nj $overlap_dir/log/get_unreliable_segments.JOB.log \ - segmentation-to-segments --single-speaker \ - ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \ - ark:$unreliable_data_dir/utt2spk.JOB $unreliable_data_dir/segments.JOB - - for n in `seq $nj`; do cat $overlap_data_dir/utt2spk.$n; done > $overlap_data_dir/utt2spk - for n in `seq $nj`; do cat $overlap_data_dir/segments.$n; done > $overlap_data_dir/segments - for n in `seq $nj`; do cat $unreliable_data_dir/utt2spk.$n; done > $unreliable_data_dir/utt2spk - for n in `seq $nj`; do cat $unreliable_data_dir/segments.$n; done > $unreliable_data_dir/segments - - utils/fix_data_dir.sh $overlap_data_dir - utils/fix_data_dir.sh $unreliable_data_dir - - if $speed_perturb; then - utils/data/perturb_data_dir_speed_3way.sh $overlap_data_dir ${overlap_data_dir}_sp - utils/data/perturb_data_dir_speed_3way.sh $unreliable_data_dir ${unreliable_data_dir}_sp - fi -fi - -if $speed_perturb; then - overlap_data_dir=${overlap_data_dir}_sp - unreliable_data_dir=${unreliable_data_dir}_sp -fi - -# make $overlap_labels_dir an absolute pathname. -overlap_labels_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $overlap_labels_dir ${PWD}` - -if [ $stage -le 10 ]; then - utils/split_data.sh ${overlap_data_dir} $nj - - $cmd JOB=1:$nj $overlap_dir/log/get_overlap_speech_labels.JOB.log \ - utils/data/get_reco2utt.sh ${overlap_data_dir}/split${reco_nj}reco/JOB '&&' \ - segmentation-init-from-segments --shift-to-zero=false \ - ${overlap_data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ - segmentation-combine-segments-to-recordings ark:- ark,t:${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt \ - ark:- \| \ - segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ - ark,scp:$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.scp -fi - -for n in `seq $reco_nj`; do - cat $overlap_labels_dir/overlapped_speech_${corrupted_data_id}.$n.scp -done > ${corrupted_data_dir}/overlapped_speech_labels.scp - -if [ $stage -le 11 ]; then - utils/data/get_reco2utt.sh ${unreliable_data_dir} - - # First convert the unreliable segments into a recording-level segmentation. - # Initialize a segmentation from utt2num_frames and set to 0, the regions - # of unreliable segments. At this stage deriv weights is 1 for all but the - # unreliable segment regions. - # Initialize a segmentation from the VAD labels and retain only the speech segments. - # Intersect this with the deriv weights segmentation from above. At this stage - # deriv weights is 1 for only the regions where base VAD label is 1 and - # the overlapping segment is not unreliable. Convert this to deriv weights. - $cmd JOB=1:$reco_nj $unreliable_dir/log/get_deriv_weights.JOB.log\ - segmentation-init-from-segments --shift-to-zero=false \ - "utils/filter_scp.pl -f 2 ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/segments |" ark:- \| \ - segmentation-combine-segments-to-recordings ark:- "ark,t:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/reco2utt |" \ - ark:- \| \ - segmentation-create-subsegments --filter-label=1 --subsegment-label=0 --ignore-missing \ - "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/utt2num_frames | segmentation-init-from-lengths ark,t:- ark:- |" \ - ark:- ark:- \| \ - segmentation-intersect-segments --mismatch-label=0 \ - "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/sad_seg.scp | segmentation-post-process --remove-labels=0:2:3 scp:- ark:- |" \ - ark:- ark:- \| \ - segmentation-post-process --remove-labels=0 ark:- ark:- \| \ - segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ - steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ - ark,scp:$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.ark,$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.scp - - for n in `seq $reco_nj`; do - cat $overlap_labels_dir/deriv_weights_for_overlapped_speech.${n}.scp - done > $corrupted_data_dir/deriv_weights_for_overlapped_speech.scp -fi exit 0 diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data.sh new file mode 100755 index 00000000000..36eb4de2afe --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data.sh @@ -0,0 +1,236 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -u +set -o pipefail + +. path.sh + +num_data_reps=5 +nj=40 +cmd=queue.pl +stage=-1 + +. utils/parse_options.sh + +if [ $# -ne 5 ]; then + echo "Usage: $0 " + echo " e.g.: $0 data/fisher_train_100k_sp_75k_hires_bp data/fisher_train_100k_sp_75k/overlapped_segments_info.txt exp/unsad/make_unsad_fisher_train_100k_sp/tri4_ali_fisher_train_100k_sp_vad_fisher_train_100k_sp exp/unsad/make_overlap_labels/fisher_train_100k_sp_75k overlap_labels" + exit 1 +fi + +corrupted_data_dir=$1 +orig_corrupted_data_dir=$2 +utt_vad_dir=$3 +tmpdir=$4 +overlap_labels_dir=$5 + +overlapped_segments_info=$orig_corrupted_data_dir/overlapped_segments_info.txt +corrupted_data_id=`basename $orig_corrupted_data_dir` + +for f in $corrupted_data_dir/feats.scp $overlapped_segments_info $utt_vad_dir/sad_seg.scp; do + [ ! -f $f ] && echo "Could not find file $f" && exit 1 +done + +overlap_dir=$tmpdir/make_overlap_labels_${corrupted_data_id} +unreliable_dir=$tmpdir/unreliable_${corrupted_data_id} + +mkdir -p $unreliable_dir + +# make $overlap_labels_dir an absolute pathname. +overlap_labels_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $overlap_labels_dir ${PWD}` + +# Combine the VAD from the base recording and the VAD from the overlapping segments +# to create per-frame labels of the number of overlapping speech segments +# Unreliable segments are regions where no VAD labels were available for the +# overlapping segments. These can be later removed by setting deriv weights to 0. + +if [ $stage -le 1 ]; then + for n in `seq $num_data_reps`; do + cat $utt_vad_dir/sad_seg.scp | \ + awk -v n=$n '{print "ovlp"n"_"$0}' + done | sort -k1,1 > ${corrupted_data_dir}/sad_seg.scp + utils/data/get_utt2num_frames.sh $corrupted_data_dir + utils/split_data.sh ${corrupted_data_dir} $nj + + $cmd JOB=1:$nj $overlap_dir/log/get_overlap_seg.JOB.log \ + segmentation-init-from-additive-signals-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + --additive-signals-segmentation-rspecifier=scp:$utt_vad_dir/sad_seg.scp \ + --unreliable-segmentation-wspecifier="ark:| gzip -c > $unreliable_dir/unreliable_seg.JOB.gz" \ + "scp:utils/filter_scp.pl ${corrupted_data_dir}/split${nj}/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" \ + ark,t:$orig_corrupted_data_dir/overlapped_segments_info.txt ark:- \| \ + segmentation-copy --keep-label=1 ark:- ark:- \| \ + segmentation-get-stats --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + ark:- ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- "ark:| gzip -c > $overlap_dir/overlap_seg.JOB.gz" +fi + +if [ $stage -le 2 ]; then + $cmd JOB=1:$nj $overlap_dir/log/get_overlapped_speech_labels.JOB.log \ + gunzip -c $overlap_dir/overlap_seg.JOB.gz \| \ + segmentation-post-process --remove-labels=0:1 ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ + ark,scp:$overlap_labels_dir/overlapped_speech_labels_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapped_speech_labels_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/overlapped_speech_labels_${corrupted_data_id}.$n.scp + done > ${corrupted_data_dir}/overlapped_speech_labels.scp +fi + +if [ $stage -le 3 ]; then + # First convert the unreliable segments into a segmentation. + # Initialize a segmentation from utt2num_frames and set to 0, the regions + # of unreliable segments. At this stage deriv weights is 1 for all but the + # unreliable segment regions. + # Initialize a segmentation from the overlap labels and retain regions where + # there is speech from at least one speaker. + # Intersect this with the deriv weights segmentation from above. + # At this stage deriv weights is 1 for only the regions where there is + # at least one speaker and the the overlapping segment is not unreliable. + # Convert this to deriv weights. + $cmd JOB=1:$nj $unreliable_dir/log/get_deriv_weights.JOB.log \ + utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/utt2num_frames \| \ + segmentation-init-from-lengths ark,t:- ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=0 --ignore-missing \ + ark:- "ark,s,cs:gunzip -c $unreliable_dir/unreliable_seg.JOB.gz | segmentation-to-segments ark:- - | segmentation-init-from-segments - ark:- |" ark:- \| \ + segmentation-intersect-segments --mismatch-label=0 \ + "ark:gunzip -c $overlap_dir/overlap_seg.JOB.gz | segmentation-post-process --remove-labels=0 --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- |" \ + ark,s,cs:- ark:- \| segmentation-post-process --remove-labels=0 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$overlap_labels_dir/deriv_weights_for_overlapped_speech_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/deriv_weights_for_overlapped_speech_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/deriv_weights_for_overlapped_speech_$corrupted_data_id.${n}.scp + done > $corrupted_data_dir/deriv_weights_for_overlapped_speech.scp +fi + +if [ $stage -le 4 ]; then + # Get only first speaker labels as speech_feat as we are not sure of the energy levels of the other speaker. + $cmd JOB=1:$nj $overlap_dir/log/get_first_speaker_labels.JOB.log \ + gunzip -c $overlap_dir/overlap_seg.JOB.gz \| \ + segmentation-post-process --remove-labels=0 --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| \ + vector-to-feat ark:- \ + ark,scp:$overlap_labels_dir/speech_feat_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/speech_feat_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/speech_feat_${corrupted_data_id}.$n.scp + done > ${corrupted_data_dir}/speech_feat.scp +fi + +if [ $stage -le 5 ]; then + $cmd JOB=1:$nj $unreliable_dir/log/get_deriv_weights.JOB.log \ + utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/utt2num_frames \| \ + segmentation-init-from-lengths ark,t:- ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=0 --ignore-missing \ + ark:- "ark,s,cs:gunzip -c $unreliable_dir/unreliable_seg.JOB.gz | segmentation-to-segments ark:- - | segmentation-init-from-segments - ark:- |" ark:- \| \ + segmentation-post-process --remove-labels=0 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$overlap_labels_dir/deriv_weights_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/deriv_weights_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/deriv_weights_$corrupted_data_id.${n}.scp + done > $corrupted_data_dir/deriv_weights.scp +fi + +exit 0 + +####exit 1 +#### +####if [ $stage -le 9 ]; then +#### mkdir -p $overlap_data_dir $unreliable_data_dir +#### cp $orig_corrupted_data_dir/wav.scp $overlap_data_dir +#### cp $orig_corrupted_data_dir/wav.scp $unreliable_data_dir +#### +#### # Create segments where there is definitely an overlap. +#### # Assume no more than 10 speakers overlap. +#### $cmd JOB=1:$nj $overlap_dir/log/process_to_segments.JOB.log \ +#### segmentation-post-process --remove-labels=0:1 \ +#### ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark ark:- \| \ +#### segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ +#### segmentation-to-segments ark:- ark:$overlap_data_dir/utt2spk.JOB $overlap_data_dir/segments.JOB +#### +#### $cmd JOB=1:$nj $overlap_dir/log/get_unreliable_segments.JOB.log \ +#### segmentation-to-segments --single-speaker \ +#### ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \ +#### ark:$unreliable_data_dir/utt2spk.JOB $unreliable_data_dir/segments.JOB +#### +#### for n in `seq $nj`; do cat $overlap_data_dir/utt2spk.$n; done > $overlap_data_dir/utt2spk +#### for n in `seq $nj`; do cat $overlap_data_dir/segments.$n; done > $overlap_data_dir/segments +#### for n in `seq $nj`; do cat $unreliable_data_dir/utt2spk.$n; done > $unreliable_data_dir/utt2spk +#### for n in `seq $nj`; do cat $unreliable_data_dir/segments.$n; done > $unreliable_data_dir/segments +#### +#### utils/fix_data_dir.sh $overlap_data_dir +#### utils/fix_data_dir.sh $unreliable_data_dir +#### +#### if $speed_perturb; then +#### utils/data/perturb_data_dir_speed_3way.sh $overlap_data_dir ${overlap_data_dir}_sp +#### utils/data/perturb_data_dir_speed_3way.sh $unreliable_data_dir ${unreliable_data_dir}_sp +#### fi +####fi +#### +####if $speed_perturb; then +#### overlap_data_dir=${overlap_data_dir}_sp +#### unreliable_data_dir=${unreliable_data_dir}_sp +####fi +#### +##### make $overlap_labels_dir an absolute pathname. +####overlap_labels_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $overlap_labels_dir ${PWD}` +#### +####if [ $stage -le 10 ]; then +#### utils/split_data.sh ${overlap_data_dir} $nj +#### +#### $cmd JOB=1:$nj $overlap_dir/log/get_overlap_speech_labels.JOB.log \ +#### utils/data/get_reco2utt.sh ${overlap_data_dir}/split${reco_nj}reco/JOB '&&' \ +#### segmentation-init-from-segments --shift-to-zero=false \ +#### ${overlap_data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ +#### segmentation-combine-segments-to-recordings ark:- ark,t:${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt \ +#### ark:- \| \ +#### segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ +#### ark,scp:$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.scp +####fi +#### +####for n in `seq $reco_nj`; do +#### cat $overlap_labels_dir/overlapped_speech_${corrupted_data_id}.$n.scp +####done > ${corrupted_data_dir}/overlapped_speech_labels.scp +#### +####if [ $stage -le 11 ]; then +#### utils/data/get_reco2utt.sh ${unreliable_data_dir} +#### +#### # First convert the unreliable segments into a recording-level segmentation. +#### # Initialize a segmentation from utt2num_frames and set to 0, the regions +#### # of unreliable segments. At this stage deriv weights is 1 for all but the +#### # unreliable segment regions. +#### # Initialize a segmentation from the VAD labels and retain only the speech segments. +#### # Intersect this with the deriv weights segmentation from above. At this stage +#### # deriv weights is 1 for only the regions where base VAD label is 1 and +#### # the overlapping segment is not unreliable. Convert this to deriv weights. +#### $cmd JOB=1:$reco_nj $unreliable_dir/log/get_deriv_weights.JOB.log\ +#### segmentation-init-from-segments --shift-to-zero=false \ +#### "utils/filter_scp.pl -f 2 ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/segments |" ark:- \| \ +#### segmentation-combine-segments-to-recordings ark:- "ark,t:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/reco2utt |" \ +#### ark:- \| \ +#### segmentation-create-subsegments --filter-label=1 --subsegment-label=0 --ignore-missing \ +#### "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/utt2num_frames | segmentation-init-from-lengths ark,t:- ark:- |" \ +#### ark:- ark:- \| \ +#### segmentation-intersect-segments --mismatch-label=0 \ +#### "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/sad_seg.scp | segmentation-post-process --remove-labels=0:2:3 scp:- ark:- |" \ +#### ark:- ark:- \| \ +#### segmentation-post-process --remove-labels=0 ark:- ark:- \| \ +#### segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ +#### steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ +#### ark,scp:$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.ark,$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.scp +#### +#### for n in `seq $reco_nj`; do +#### cat $overlap_labels_dir/deriv_weights_for_overlapped_speech.${n}.scp +#### done > $corrupted_data_dir/deriv_weights_for_overlapped_speech.scp +####fi +#### +####exit 0 From 687ce5d5a3d9e14184de31bae56ab2d205905df1 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 16 Dec 2016 11:55:34 -0800 Subject: [PATCH 133/530] A couple of fixes in chain model training: fix a bug in compilation (leads to crash in rare circumstances); discard partial minibatches when merging egs, which was always the intention. (#1261) --- src/chainbin/nnet3-chain-merge-egs.cc | 10 +++- src/nnet3/nnet-compile-utils.cc | 79 +++++++++++++++++++-------- src/nnet3/nnet-compile.cc | 22 +++----- src/nnet3bin/nnet3-merge-egs.cc | 15 +++-- 4 files changed, 78 insertions(+), 48 deletions(-) diff --git a/src/chainbin/nnet3-chain-merge-egs.cc b/src/chainbin/nnet3-chain-merge-egs.cc index 45dca4051f3..543ad3aa049 100644 --- a/src/chainbin/nnet3-chain-merge-egs.cc +++ b/src/chainbin/nnet3-chain-merge-egs.cc @@ -43,12 +43,17 @@ int main(int argc, char *argv[]) { bool compress = false; int32 minibatch_size = 64; + bool discard_partial_minibatches = true; ParseOptions po(usage); po.Register("minibatch-size", &minibatch_size, "Target size of minibatches " "when merging (see also --measure-output-frames)"); po.Register("compress", &compress, "If true, compress the output examples " "(not recommended unless you are writing to disk"); + po.Register("discard-partial-minibatches", &discard_partial_minibatches, + "discard any partial minibatches of 'uneven' size that may be " + "encountered at the end; 'true' is recommended, to avoid " + "incurring compilation costs."); po.Read(argc, argv); @@ -79,7 +84,8 @@ int main(int argc, char *argv[]) { example_reader.Next(); num_read++; - if (minibatch_ready || (example_reader.Done() && !examples.empty())) { + if (minibatch_ready || (!discard_partial_minibatches && + (example_reader.Done() && !examples.empty()))) { NnetChainExample merged_eg; MergeChainExamples(compress, &examples, &merged_eg); std::ostringstream ostr; @@ -97,5 +103,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/nnet3/nnet-compile-utils.cc b/src/nnet3/nnet-compile-utils.cc index 3ff7c6fe3d8..fa23d4d305a 100644 --- a/src/nnet3/nnet-compile-utils.cc +++ b/src/nnet3/nnet-compile-utils.cc @@ -434,33 +434,62 @@ void EnsureContiguousProperty( } } -// Function to split a list of pairs into vector of lists of unique pairs + + +/** + This function splits a vector of pairs into a list of vectors of pairs. + [note: by 'vector' we mean something that has a meaningful index that we care + about; by 'list' we mean a collection of elements to be iterated over, without + (in this case) meaningful indexes or even order. + + @param [in] list A vector of pairs; these pairs should be either (-1,-1) + or (a,b) for a >= 0, b >= 0. At least one element of 'list' + must be different from (-1,-1). + @param [out] split_lists A list, in arbitrary order, of vectors of pairs. + It has the following relationship with 'list': + - Size: for each j, split_lists[j].size() == list.size(). + - Contents must match input: For each i: + - If list[i] == (-1, -1), then + split_lists[j][i] == (-1, -1) for all j. + - If list[i] != (-1, -1), then + split_lists[j][i] == (-1, -1) for *all but one* j, and + for the remaining j, split_lists[j][i] == list[i]. + - Uniqueness: for no j should split_lists[j] contain + any duplicate elements (except the pair (-1,-1), which is + allowed to exist in duplicate form). + To satisfy the above conditions, this function will create + as many lists in split_lists (i.e. as many j values) as the + number of times that the most frequent pair in 'list' + repeats other than the pair (-1,-1), e.g. if the pair + (10,11) appears 4 times in 'list' and that is the most, + split_lists->size() == 4. +*/ void SplitPairList(std::vector >& list, std::vector > >* split_lists) { split_lists->clear(); + typedef unordered_map, + int32, PairHasher > MapType; + // this maps a pair not equal to -1,-1, to the number of times we've already seen it. + MapType pair_to_count; + int32 cur_num_lists = 0; + for (int32 i = 0; i < list.size(); i++) { - // searching for the new pair in the new_split_lists - bool added_pair = false; - if ( list[i].first == -1) + if (list[i].first == -1) continue; - for (int32 j = 0; j < split_lists->size(); j++) { - std::vector >::const_iterator iter - = std::find_if((*split_lists)[j].begin(), - (*split_lists)[j].end(), - PairIsEqualComparator(list[i])); - if (iter == (*split_lists)[j].end()) { - // this pair is not in this list - (*split_lists)[j][i] = list[i]; - added_pair = true; - break; - } - } - if (!added_pair) { - std::vector > list_of_pairs(list.size(), - std::make_pair(-1, -1)); - list_of_pairs[i] = list[i]; - split_lists->push_back(list_of_pairs); + MapType::iterator iter = pair_to_count.find(list[i]); + int32 this_count; + if (iter == pair_to_count.end()) + pair_to_count[list[i]] = this_count = 1; + else + this_count = (++iter->second); + if (this_count > cur_num_lists) { + KALDI_ASSERT(this_count == cur_num_lists + 1); + split_lists->resize(this_count); + split_lists->back().resize(list.size(), + std::pair(-1, -1)); + cur_num_lists++; } + (*split_lists)[this_count-1][i] = list[i]; } if (split_lists->size() == 0) KALDI_ERR << "Input list has just dummy pairs"; @@ -477,8 +506,12 @@ void SplitLocationsBackward( std::vector second_values; if (ConvertToIndexes(split_lists_intermediate[i], &first_value, &second_values)) { - // the .first values are the same - if (first_value == -1) continue; // don't output anything for this. + // the .first values in split_lists_intermediate[i] are all the same (or + // equal to -1). + if (first_value == -1) { + // all the .first values were equal to -1. this is like a NULL marker. + continue; + } std::vector > second_values_split; EnsureContiguousProperty(second_values, &second_values_split); if (second_values_split.size() == 1) { diff --git a/src/nnet3/nnet-compile.cc b/src/nnet3/nnet-compile.cc index cee9e8f9bd7..42ca5d7a83e 100644 --- a/src/nnet3/nnet-compile.cc +++ b/src/nnet3/nnet-compile.cc @@ -613,22 +613,16 @@ void Compiler::DoBackwardComputationFromSubmatLocations( // trickier to implement efficiently on the GPU, there may be cases // which we will refuse to implement backprop for if we get here. - int32 num_rows = submat_locations.size(); - std::vector >::const_iterator - iter = submat_locations.begin(), end = submat_locations.end(); - int32 first_submat = iter->first; - for (++iter; iter != end; ++iter) - if (iter->first != first_submat) - break; - bool all_same_submatrix = (iter == end); - if (all_same_submatrix) { - int32 input_deriv_submatrix_index = first_submat; - std::vector indexes(num_rows); - for (int32 i = 0; i < num_rows; i++) - indexes[i] = submat_locations[i].second; + + + int32 first_value; + std::vector second_values; + if (ConvertToIndexes(submat_locations, &first_value, + &second_values)) { + int32 input_deriv_submatrix_index = first_value; DoBackwardComputationFromIndexes(deriv_submatrix_index, input_deriv_submatrix_index, - indexes, + second_values, computation); return; } else { diff --git a/src/nnet3bin/nnet3-merge-egs.cc b/src/nnet3bin/nnet3-merge-egs.cc index 8627671f53a..6438653a802 100644 --- a/src/nnet3bin/nnet3-merge-egs.cc +++ b/src/nnet3bin/nnet3-merge-egs.cc @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) { "e.g.\n" "nnet3-merge-egs --minibatch-size=512 ark:1.egs ark:- | nnet3-train-simple ... \n" "See also nnet3-copy-egs\n"; - + bool compress = false; int32 minibatch_size = 512; bool measure_output_frames = true; @@ -74,9 +74,10 @@ int main(int argc, char *argv[]) { po.Register("compress", &compress, "If true, compress the output examples " "(not recommended unless you are writing to disk)"); po.Register("discard-partial-minibatches", &discard_partial_minibatches, - "discard any partial minibatches of 'uneven' size that may be " - "encountered at the end."); - + "discard any partial minibatches of 'uneven' size that may be " + "encountered at the end; 'true' is recommended, to avoid " + "incurring compilation costs."); + po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -89,12 +90,12 @@ int main(int argc, char *argv[]) { SequentialNnetExampleReader example_reader(examples_rspecifier); NnetExampleWriter example_writer(examples_wspecifier); - + std::vector examples; examples.reserve(minibatch_size); int32 cur_num_output_frames = 0; - + int64 num_read = 0, num_written = 0; while (!example_reader.Done()) { const NnetExample &cur_eg = example_reader.Value(); @@ -130,5 +131,3 @@ int main(int argc, char *argv[]) { return -1; } } - - From 37bffd27667196cbe31c2910e0887cac2de8e9e4 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 16 Dec 2016 17:59:29 -0500 Subject: [PATCH 134/530] Some draft code, on the way to changing egs-extraction code to allow different-sized egs, and different begin/end l/r context --- src/chainbin/nnet3-chain-get-egs.cc | 4 +- src/nnet3/nnet-example-utils.h | 93 ++++++++++++++++++++++++++++- src/nnet3bin/nnet3-get-egs.cc | 32 +++++----- 3 files changed, 109 insertions(+), 20 deletions(-) diff --git a/src/chainbin/nnet3-chain-get-egs.cc b/src/chainbin/nnet3-chain-get-egs.cc index 968a50af889..6f77a3c208b 100644 --- a/src/chainbin/nnet3-chain-get-egs.cc +++ b/src/chainbin/nnet3-chain-get-egs.cc @@ -252,9 +252,7 @@ int main(int argc, char *argv[]) { "that each example contains. Will be rounded up to a multiple " "of --frame-subsampling-factor."); po.Register("num-frames-overlap", &num_frames_overlap, "Number of frames of " - "overlap between each example (could be useful in conjunction " - "--min-deriv-time and --max-deriv-time, to avoid wasting data). " - "Each time we shift by --num-frames minus --num-frames-overlap."); + "overlap between each example."); po.Register("ivectors", &ivector_rspecifier, "Rspecifier of ivector " "features, as a matrix."); po.Register("srand", &srand_seed, "Seed for random number generator " diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h index 6ebffcf1d50..c0f76f3bf21 100644 --- a/src/nnet3/nnet-example-utils.h +++ b/src/nnet3/nnet-example-utils.h @@ -71,8 +71,94 @@ void WriteVectorAsChar(std::ostream &os, // Reads data written by WriteVectorAsChar. void ReadVectorAsChar(std::istream &is, - bool binary, - Vector *vec); + bool binary, + Vector *vec); + + +// Warning: after reading in the values from the command line +// (Register() and then then po.Read()), you should then call ComputeDerived() +// to set up the 'derived values' (parses 'num_frames_str'). +struct ExampleExtractionConfig { + int32 left_context; + int32 right_context; + int32 left_context_initial; + int32 right_context_final; + int32 num_frames_overlap; + std::string num_frames_str; + + + // The following parameters are derived parameters, computed by + // ComputeDerived(). + int32 num_frames; // the 'principal' number of frames + std::vector num_frames_alternative; + + ExampleExtractionConfig(): + left_context(0), right_context(0), + left_context_initial(-1), right_context_initial(-1), + num_frames_overlap(0), + num_frames_str("1"), num_frames(-1) { } + + /// This function decodes 'num_frames_str' into 'num_frames' and 'num_frames_alternatives', + /// and ensures that 'num_frames', and the members of num_frames_alternatives' are + /// multiples of 'frame_subsampling_factor'. + /// + void ComputeDerived(); + + void Register(OptionsItf *po) { + po->Register("left-context", &left_context, "Number of frames of left " + "context of input features that are added to each " + "example"); + po->Register("right-context", &right_context, "Number of frames of right " + "context of input features that are added to each " + "example"); + po->Register("left-context-initial", &left_context, "Number of frames " + "of left context of input features that are added to each " + "example at the start of the utterance (if <0, this " + "defaults to the same as --left-context)"); + po->Register("right-context-final", &right_context, "Number of frames " + "of right context of input features that are added to each " + "example at the end of the utterance (if <0, this " + "defaults to the same as --right-context)"); + po->Register("right-context", &right_context, "Number of frames of right " + "context of input features that are added to each " + "example"); + po->Register("num-frames", &num_frames_str, "Number of frames with labels " + "that each example contains (i.e. the left and right context " + "are to be added to this). May just be an integer (e.g. " + "--num-frames=8), or an principal value followed by " + "alternative values to be used at most once for each utterance " + "to deal with odd-sized input, e.g. --num-frames=40,25,50 means " + "that most of the time the number of frames will be 40, but to " + "deal with odd-sized inputs we may also generate egs with these " + "other sizes. All these values will be rounded up to the " + "closest multiple of --frame-subsampling-factor."); + po.Register("num-frames-overlap", &num_frames_overlap, "Number of frames of " + "overlap between adjacent examples (advisory, will not be " + "exactly enforced)"); + po.Register("frame-subsampling-factor", &frame_subsampling_factor, "Used " + "if the frame-rate of the output labels in the generated " + "examples will be less than the frame-rate at the input"); + } +}; + + + +void ComputeExampleTimeInfo(const ExampleExtractionConfig &config, + int32 num_frames_in_utt, + + SplitIntoRanges(int32 num_frames, + int32 frames_per_range, + std::vector *range_starts); + + + +struct ExampleTimeInfo { + int32 first_frame; + int32 num_frames; + int32 left_context; + int32 right_context; +}; + // This function rounds up the quantities 'num_frames' and 'num_frames_overlap' // to the nearest multiple of the frame_subsampling_factor @@ -81,6 +167,9 @@ void RoundUpNumFrames(int32 frame_subsampling_factor, int32 *num_frames_overlap); + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3bin/nnet3-get-egs.cc b/src/nnet3bin/nnet3-get-egs.cc index 75f264f1ceb..897ffad7b48 100644 --- a/src/nnet3bin/nnet3-get-egs.cc +++ b/src/nnet3bin/nnet3-get-egs.cc @@ -43,7 +43,7 @@ static void ProcessFile(const MatrixBase &feats, int64 *num_egs_written, NnetExampleWriter *example_writer) { KALDI_ASSERT(feats.NumRows() == static_cast(pdf_post.size())); - + for (int32 t = 0; t < feats.NumRows(); t += frames_per_eg) { // actual_frames_per_eg is the number of frames with nonzero @@ -57,7 +57,7 @@ static void ProcessFile(const MatrixBase &feats, int32 tot_frames = left_context + frames_per_eg + right_context; Matrix input_frames(tot_frames, feats.NumCols(), kUndefined); - + // Set up "input_frames". for (int32 j = -left_context; j < frames_per_eg + right_context; j++) { int32 t2 = j + t; @@ -69,7 +69,7 @@ static void ProcessFile(const MatrixBase &feats, } NnetExample eg; - + // call the regular input "input". eg.io.push_back(NnetIo("input", - left_context, input_frames)); @@ -93,10 +93,10 @@ static void ProcessFile(const MatrixBase &feats, labels[i] = pdf_post[t + i]; // remaining posteriors for frames are empty. eg.io.push_back(NnetIo("output", num_pdfs, 0, labels)); - + if (compress) eg.Compress(); - + std::ostringstream os; os << utt_id << "-" << t; @@ -137,30 +137,32 @@ int main(int argc, char *argv[]) { "nnet3-get-egs --num-pdfs=2658 --left-context=12 --right-context=9 --num-frames=8 \"$feats\"\\\n" "\"ark:gunzip -c exp/nnet/ali.1.gz | ali-to-pdf exp/nnet/1.nnet ark:- ark:- | ali-to-post ark:- ark:- |\" \\\n" " ark:- \n"; - + bool compress = true; int32 num_pdfs = -1, left_context = 0, right_context = 0, num_frames = 1, length_tolerance = 100; - + std::string ivector_rspecifier; - + ParseOptions po(usage); po.Register("compress", &compress, "If true, write egs in " "compressed format."); po.Register("num-pdfs", &num_pdfs, "Number of pdfs in the acoustic " "model"); po.Register("left-context", &left_context, "Number of frames of left " - "context the neural net requires."); + "context of input features that are added to each " + "example"); po.Register("right-context", &right_context, "Number of frames of right " - "context the neural net requires."); + "context of input features that are added to each " + "example"); po.Register("num-frames", &num_frames, "Number of frames with labels " "that each example contains."); po.Register("ivectors", &ivector_rspecifier, "Rspecifier of ivector " "features, as a matrix."); po.Register("length-tolerance", &length_tolerance, "Tolerance for " "difference in num-frames between feat and ivector matrices"); - + po.Read(argc, argv); if (po.NumArgs() != 3) { @@ -170,7 +172,7 @@ int main(int argc, char *argv[]) { if (num_pdfs <= 0) KALDI_ERR << "--num-pdfs options is required."; - + std::string feature_rspecifier = po.GetArg(1), pdf_post_rspecifier = po.GetArg(2), @@ -181,10 +183,10 @@ int main(int argc, char *argv[]) { RandomAccessPosteriorReader pdf_post_reader(pdf_post_rspecifier); NnetExampleWriter example_writer(examples_wspecifier); RandomAccessBaseFloatMatrixReader ivector_reader(ivector_rspecifier); - + int32 num_done = 0, num_err = 0; int64 num_frames_written = 0, num_egs_written = 0; - + for (; !feat_reader.Done(); feat_reader.Next()) { std::string key = feat_reader.Key(); const Matrix &feats = feat_reader.Value(); @@ -221,7 +223,7 @@ int main(int argc, char *argv[]) { num_err++; continue; } - + ProcessFile(feats, ivector_feats, pdf_post, key, compress, num_pdfs, left_context, right_context, num_frames, &num_frames_written, &num_egs_written, From e0fc5d1287981cf85092c33ff5b9dd617d439dbf Mon Sep 17 00:00:00 2001 From: LvHang Date: Sun, 18 Dec 2016 01:41:30 -0500 Subject: [PATCH 135/530] create temporary files atomically in utils/mkgraph.sh (#1272) Removes the possibility for crashes due to partial files when scripts are interrupted partway. --- egs/wsj/s5/utils/mkgraph.sh | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/egs/wsj/s5/utils/mkgraph.sh b/egs/wsj/s5/utils/mkgraph.sh index 3305d628f83..42204b85e7d 100755 --- a/egs/wsj/s5/utils/mkgraph.sh +++ b/egs/wsj/s5/utils/mkgraph.sh @@ -79,35 +79,45 @@ P=$(tree-info $tree | grep "central-position" | cut -d' ' -f2) || { echo "Error echo "$0: WARNING: chain models need '--self-loop-scale 1.0'"; mkdir -p $lang/tmp +trap "rm -f $lang/tmp/LG.fst.$$" EXIT HUP INT PIPE TERM # Note: [[ ]] is like [ ] but enables certain extra constructs, e.g. || in # place of -o if [[ ! -s $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \ $lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \ fstminimizeencoded | fstpushspecial | \ - fstarcsort --sort_type=ilabel > $lang/tmp/LG.fst || exit 1; + fstarcsort --sort_type=ilabel > $lang/tmp/LG.fst.$$ || exit 1; + mv $lang/tmp/LG.fst.$$ $lang/tmp/LG.fst fstisstochastic $lang/tmp/LG.fst || echo "[info]: LG not stochastic." fi - clg=$lang/tmp/CLG_${N}_${P}.fst - -if [[ ! -s $clg || $clg -ot $lang/tmp/LG.fst ]]; then +clg_tmp=$clg.$$ +ilabels=$lang/tmp/ilabels_${N}_${P} +ilabels_tmp=$ilabels.$$ +trap "rm -f $clg_tmp $ilabels_tmp" EXIT HUP INT PIPE TERM +if [[ ! -s $clg || $clg -ot $lang/tmp/LG.fst \ + || ! -s $ilabels || $ilabels -ot $lang/tmp/LG.fst ]]; then fstcomposecontext --context-size=$N --central-position=$P \ --read-disambig-syms=$lang/phones/disambig.int \ --write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.int \ - $lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst |\ - fstarcsort --sort_type=ilabel > $clg - fstisstochastic $clg || echo "[info]: CLG not stochastic." + $ilabels_tmp < $lang/tmp/LG.fst |\ + fstarcsort --sort_type=ilabel > $clg_tmp + mv $clg_tmp $clg + mv $ilabels_tmp $ilabels + fstisstochastic $clg || echo "[info]: CLG not stochastic." fi +trap "rm -f $dir/Ha.fst.$$" EXIT HUP INT PIPE TERM if [[ ! -s $dir/Ha.fst || $dir/Ha.fst -ot $model \ || $dir/Ha.fst -ot $lang/tmp/ilabels_${N}_${P} ]]; then make-h-transducer --disambig-syms-out=$dir/disambig_tid.int \ --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \ - > $dir/Ha.fst || exit 1; + > $dir/Ha.fst.$$ || exit 1; + mv $dir/Ha.fst.$$ $dir/Ha.fst fi +trap "rm -f $dir/HCLGa.fst.$$" EXIT HUP INT PIPE TERM if [[ ! -s $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \ $dir/HCLGa.fst -ot $clg ]]; then if $remove_oov; then @@ -117,14 +127,16 @@ if [[ ! -s $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \ fi fsttablecompose $dir/Ha.fst "$clg" | fstdeterminizestar --use-log=true \ | fstrmsymbols $dir/disambig_tid.int | fstrmepslocal | \ - fstminimizeencoded > $dir/HCLGa.fst || exit 1; + fstminimizeencoded > $dir/HCLGa.fst.$$ || exit 1; + mv $dir/HCLGa.fst.$$ $dir/HCLGa.fst fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic" fi +trap "rm -f $dir/HCLG.fst.$$" EXIT HUP INT PIPE TERM if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then add-self-loops --self-loop-scale=$loopscale --reorder=true \ - $model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1; - + $model < $dir/HCLGa.fst > $dir/HCLG.fst.$$ || exit 1; + mv $dir/HCLG.fst.$$ $dir/HCLG.fst if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then # No point doing this test if transition-scale not 1, as it is bound to fail. fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic." From 3368166cf2af1f6fadcc9899e67b717385d575b7 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sun, 18 Dec 2016 01:39:54 -0500 Subject: [PATCH 136/530] Bug fix in nnet3 training Conflicts: egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py --- .../steps/libs/nnet3/train/frame_level_objf/common.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index 0b0149ece3d..b8a28d2e2bf 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -98,6 +98,12 @@ def train_new_models(dir, iter, srand, num_jobs, cache_write_opt = "--write-cache={dir}/cache.{iter}".format( dir=dir, iter=iter+1) + minibatch_opts = "--minibatch-size={0}".format(minibatch_size) + + if chunk_level_training: + minibatch_opts = "{0} --measure-output-frames=false".format( + minibatch_size) + process_handle = common_lib.run_job( """{command} {train_queue_opt} {dir}/log/train.{iter}.{job}.log \ nnet3-train {parallel_train_opts} {cache_read_opt} \ @@ -109,8 +115,7 @@ def train_new_models(dir, iter, srand, num_jobs, """ark:{egs_dir}/egs.{archive_index}.ark ark:- |{extra_egs_copy_cmd}""" """nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} """ """--srand={srand} ark:- ark:- | """ - """nnet3-merge-egs --minibatch-size={minibatch_size} """ - """--measure-output-frames=false """ + """nnet3-merge-egs {minibatch_opts} """ """--discard-partial-minibatches=true ark:- ark:- |" \ {dir}/{next_iter}.{job}.raw""".format( command=run_opts.command, @@ -124,12 +129,12 @@ def train_new_models(dir, iter, srand, num_jobs, frame_opts=("" if chunk_level_training else "--frame={0}".format(frame)), + minibatch_opts=minibatch_opts, momentum=momentum, max_param_change=max_param_change, deriv_time_opts=" ".join(deriv_time_opts), raw_model=raw_model_string, context_opts=context_opts, egs_dir=egs_dir, archive_index=archive_index, shuffle_buffer_size=shuffle_buffer_size, - minibatch_size=minibatch_size, extra_egs_copy_cmd=extra_egs_copy_cmd), wait=False) From a9bdebb314913d90d23bb89d45ad3bfc34dd0a9d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Sun, 18 Dec 2016 02:04:58 -0500 Subject: [PATCH 137/530] Bug fix in nnet3 training (#1275) Fixes a bug that would have affected nnet3 (non-chain) TDNN training since PR #1066 was merged 2 weeks ago. Would have slowed it down, and affected results in an unpredictable way. --- .../libs/nnet3/train/frame_level_objf/common.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index 87cae801e90..b4722c1ad9d 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -84,6 +84,12 @@ def train_new_models(dir, iter, srand, num_jobs, cache_write_opt = "--write-cache={dir}/cache.{iter}".format( dir=dir, iter=iter+1) + minibatch_opts = "--minibatch-size={0}".format(minibatch_size) + + if chunk_level_training: + minibatch_opts = "{0} --measure-output-frames=false".format( + minibatch_size) + process_handle = common_lib.run_job( """{command} {train_queue_opt} {dir}/log/train.{iter}.{job}.log \ nnet3-train {parallel_train_opts} {cache_read_opt} \ @@ -95,8 +101,7 @@ def train_new_models(dir, iter, srand, num_jobs, """ark:{egs_dir}/egs.{archive_index}.ark ark:- |""" """nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} """ """--srand={srand} ark:- ark:- | """ - """nnet3-merge-egs --minibatch-size={minibatch_size} """ - """--measure-output-frames=false """ + """nnet3-merge-egs {minibatch_opts} """ """--discard-partial-minibatches=true ark:- ark:- |" \ {dir}/{next_iter}.{job}.raw""".format( command=run_opts.command, @@ -110,12 +115,12 @@ def train_new_models(dir, iter, srand, num_jobs, frame_opts=("" if chunk_level_training else "--frame={0}".format(frame)), + minibatch_opts=minibatch_opts, momentum=momentum, max_param_change=max_param_change, deriv_time_opts=" ".join(deriv_time_opts), raw_model=raw_model_string, context_opts=context_opts, egs_dir=egs_dir, archive_index=archive_index, - shuffle_buffer_size=shuffle_buffer_size, - minibatch_size=minibatch_size), wait=False) + shuffle_buffer_size=shuffle_buffer_size), wait=False) processes.append(process_handle) From ba58e8e55ed1777969e670980dbbc17c0eef530c Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 18 Dec 2016 19:37:10 -0800 Subject: [PATCH 138/530] Revert "Bug fix in minibatch-size in DNN CE training" (#1276) --- .../libs/nnet3/train/frame_level_objf/common.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index b4722c1ad9d..87cae801e90 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -84,12 +84,6 @@ def train_new_models(dir, iter, srand, num_jobs, cache_write_opt = "--write-cache={dir}/cache.{iter}".format( dir=dir, iter=iter+1) - minibatch_opts = "--minibatch-size={0}".format(minibatch_size) - - if chunk_level_training: - minibatch_opts = "{0} --measure-output-frames=false".format( - minibatch_size) - process_handle = common_lib.run_job( """{command} {train_queue_opt} {dir}/log/train.{iter}.{job}.log \ nnet3-train {parallel_train_opts} {cache_read_opt} \ @@ -101,7 +95,8 @@ def train_new_models(dir, iter, srand, num_jobs, """ark:{egs_dir}/egs.{archive_index}.ark ark:- |""" """nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} """ """--srand={srand} ark:- ark:- | """ - """nnet3-merge-egs {minibatch_opts} """ + """nnet3-merge-egs --minibatch-size={minibatch_size} """ + """--measure-output-frames=false """ """--discard-partial-minibatches=true ark:- ark:- |" \ {dir}/{next_iter}.{job}.raw""".format( command=run_opts.command, @@ -115,12 +110,12 @@ def train_new_models(dir, iter, srand, num_jobs, frame_opts=("" if chunk_level_training else "--frame={0}".format(frame)), - minibatch_opts=minibatch_opts, momentum=momentum, max_param_change=max_param_change, deriv_time_opts=" ".join(deriv_time_opts), raw_model=raw_model_string, context_opts=context_opts, egs_dir=egs_dir, archive_index=archive_index, - shuffle_buffer_size=shuffle_buffer_size), wait=False) + shuffle_buffer_size=shuffle_buffer_size, + minibatch_size=minibatch_size), wait=False) processes.append(process_handle) From f8afcbe8e9cfdb766dd11d8b42b91dd444423b12 Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Mon, 19 Dec 2016 12:02:41 -0800 Subject: [PATCH 139/530] Draft of UtteranceSplitter and related code --- src/nnet3/nnet-example-utils.cc | 345 ++++++++++++++++++++++++++++++++ src/nnet3/nnet-example-utils.h | 136 +++++++++++-- 2 files changed, 466 insertions(+), 15 deletions(-) diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc index 30f7840f6f8..547c70578ab 100644 --- a/src/nnet3/nnet-example-utils.cc +++ b/src/nnet3/nnet-example-utils.cc @@ -286,5 +286,350 @@ void RoundUpNumFrames(int32 frame_subsampling_factor, } +/* + This comment describes the idea behind what InitChunkSize() is supposed to do, + and how it relates to the purpose of class UtteranceSplitter. + + Class UtteranceSplitter is supposed to tell us, for a given utterance length, + what chunk sizes to use. The chunk sizes it may choose are: + - zero or more chunks of the 'principal' size (the first-listed value in + num-frames) + - at most two chunks of 'alternative' num-frames (any but the first-listed + num-frames). + + (and an empty list of chunks is not allowed as a split). A split is + effectively a multiset of chunk-sizes (the order will be randomized by the + caller). We represent it in code as a list of chunk-sizes, represented as a + std::vector, which is sorted to get a unique representation without repeats of + different orderings. + + The choice of spilt is determined by a cost-function that depends on the sum + of the chunk-sizes in the split and the length of the utterance: the idea is + that we want the sum of chunk-sizes in the split to be as close as possible to + the utterance length. The cost-function penalizes the sum of chunk-sizes + being smaller than the utterance-length (leading to gaps) twice as much as + when the sum of chunk-sizes is larger than the utterance length. I.e. + cost(chunk_size_sum, utt_length) = (chunk_size_sum > utt_length ? + chunk_size_sum - utt_length : + 2 * (utt_length - chunk_size_sum)) + [but as a special case, set c to infinity if the largest chunk size in the + split is longer than the utterance length; we couldn't, in that case, use + this split for this utterance]. + + + We want to make sure a good variety of combinations of chunk sizes are chosen + in case there are ties from the cost function. For each utterance length + we store the set of splits, whose costs are within 2 + of the best cost available for that utterance length. When asked to find + chunks for a particular utterance of that length, we will choose randomly + from that pool of splits. + */ +void UtteranceSplitter::InitChunkSize() { + int32 max_utterance_length = MaxUtteranceLength(); + + // The 'splits' vector is a list of possible splits (a split being + // a multiset of chunk-sizes, represented as a sorted vector). + // The vector 'splits' is itself sorted. + std::vector > splits; + InitSplits(&splits); + + + // Define a split-index 0 <= s < splits.size() as index into the 'splits' + // vector, and let a cost c >= 0 represent the mismatch between an + // utterance length and the total length of the chunk sizes in a split: + + // c(chunk_size_sum, utt_length) = (chunk_size_sum > utt_length ? + // chunk_size_sum - utt_length : + // 2 * (utt_length - chunk_size_sum)) + // [but as a special case, set c to infinity if the largest chunk size in the + // split is longer than the utterance length; we couldn't, in that case, use + // this split for this utterance]. + + // 'costs_for_length[u][s]', indexed by utterance-length u and then split, + // contains the cost for utterance-length u and split s. + + std::vector > costs_for_length( + max_utterance_length + 1); + int32 num_splits = splits.size(); + + + for (int32 u = 0; u <= max_utterance_length; u++) + pairs_for_length[u].reserve(num_splits); + + for (int32 s = 0; s < num_splits; s++) { + const std::vector &split = splits[s]; + int32 chunk_size_sum = std::accumulate(split.begin(), split.end(), + int32(0)), + max_chunk_size = *std::max_element(split.begin(), split.end()); + for (int32 u = 0; u <= max_utterance_length; u++) { + // c is the cost for this utterance length and this split. We penalize + // gaps twice as strongly as overlaps, based on the intuition that + // completely throwing out frames of data is worse than counting them + // twice. It might be possible to come up with some kind of mathematical + // justification for this based on variance of the estimated gradient. + int32 c = (chunk_size_sum > u ? chunk_size_sum - u : + 2 * (u - chunk_size_sum)); + if (max_chunk_size > u) + c = std::numeric_limits::max(); + pairs_for_length[u].push_back(c); + } + } + + + splits_for_length_.resize(max_utterance_length + 1); + + + for (int32 u = 0; u <= max_utterance_length; u++) { + const std::vector &costs = costs_for_length[u]; + int32 min_cost = std::min_element(costs.begin(), costs.end()); + if (min_cost == std::numeric_limits::max()) { + // All costs were infinity, becaues this utterance-length u is shorter + // than the smallest chunk-size. Leave splits_for_length_[u] as empty + // for this utterance-length, meaning we will not be able to choose any + // split, and such utterances will be discarded. + continue; + } + int32 cost_threshold = 2; // We will choose pseudo-randomly from splits + // that are within this distance from the best + // cost. + std::vector possible_splits; + std::vector::const_iterator iter = costs.begin(), end = costs.end(); + int32 s = 0; + for (; iter != end; ++iter,++s) + if (*iter < min_cost + cost_threshold) + splits_for_length_[u].push_back(splits[s]); + } + + if (GetVerboseLevel() >= 3) { + std::ostringstream os; + for (int32 u = 0; u <= max_utterance_length; u++) { + if (!splits_for_length_[u].empty()) { + os << u << "=("; + std::vector >::const_iterator + iter1 = splits_for_length_[u].begin(), + end1 = splits_for_length_[u].end(); + + while (iter1 != end1) { + std::vector::const_iterator iter2 = iter1->begin(), + end2 = iter1->end(); + while (iter2 != end2) { + os << *iter2; + ++iter2; + if (iter2 != end2) os << ","; + } + ++iter1; + if (iter1 != end1) os << "/"; + } + os << ")"; + if (u < max_utterance_length) os << ", "; + } + } + KALDI_VLOG(3) << "Utterance-length-to-splits map is: " << os.str(); + } +} + + +void GetChunkSizesForUtterance(int32 utterance_length, + std::vector *chunk_sizes) const { + KALDI_ASSERT(!splits_for_length.empty()); + // 'primary_length' is the first-specified num-frames. + // It's the only chunk that may be repeated an arbitrary number + // of times. + int32 primary_length = config_.num_frames[0], + max_tabulated_length = splits_for_length_.size() - 1, + num_primary_length_repeats = 0; + + KALDI_ASSERT(utterance_length >= 0); + while (utterance_length > max_tabulated_length) { + utterance_length -= primary_length; + num_primary_length_repeats++; + } + KALDI_ASSERT(utterance_length >= 0); + const std::vector > &possible_splits = + splits_for_length_[utterance_length]; + int32 num_possible_splits = possible_splits.size(), + randomly_chosen_split = RandInt(0, num_possible_splits - 1); + *chunk_sizes = possible_splits[randomly_chosen_split]; + for (int32 i = 0; i < num_primary_length_repeats; i++) + chunk_sizes->push_back(primary_length); + // Randomize the order in which the chunks appear. + std::random_shuffle(chunk_sizes->begin(), + chunk_sizes->end()); +} + + +int32 UtteranceSplitter::MaxUtteranceLength() const { + int32 num_lengths = config_.num_frames.size(); + KALDI_ASSERT(num_lengths > 0); + // 'primary_length' is the first-specified num-frames. + // It's the only chunk that may be repeated an arbitrary number + // of times. + int32 primary_length = config_.num_frames[0], + max_length = primary_length; + for (int32 i = 0; i < num_lengths; i++) { + KALDI_ASSERT(config_.num_frames[i] > 0); + max_length = std::max(config_.num_frames[i], max_length); + } + return 2 * max_length + primary_length; +} + +void UtteranceSplitter::InitSplits(std::vector > *splits) const { + // we consider splits whose total length is up to MaxUtteranceLength() + + // primary_length. We can be confident without doing a lot of math, that + // multisets above this length will never be chosen for any utterance-length + // up to MaxUtteranceLength(). + int32 primary_length = config_.num_frames[0], + length_ceiling = MaxUtteranceLength() + primary_length; + + typedef std::unordered_set, VectorHasher > SetType; + + SetType splits_set; + + int32 num_lengths = config_.num_frames.size(); + + // The splits we are allow are: zero to two 'alternate' lengths, plus + // an arbitrary number of repeats of the 'primary' length. The repeats + // of the 'primary' length are handled by the inner loop over n. + // The zero two two 'alternate' lengths are handled by the loops over + // i and j. i == 0 and j == 0 are special cases; they mean, no + // alternate is chosen. + for (int32 i = 0; i < num_lengths; i++) { + for (int32 j = 0; j < num_length; j++) { + std::vector vec; + if (i > 0) + vec.push_back(config_.num_frames[i]); + if (j > 0) + vec.push_back(config_.num_frames[j]); + for (int32 n = 0; + std::accumulate(vec.begin(), vec.end(), int32(0)) <= length_ceiling; + ++n, vec.push_back(primary_length)) { + std::sort(vec.begin(), vec.end()); // we don't want to treat different + // orderings of the same values as + // different, so sort them. + if (!vec.empty()) // Don't allow the empty vector as a split. + splits_set.insert(vec); + } + } + } + for (SetType::const_iterator iter = splits_set.begin(); + iter != splits_set.end(); ++iter) + splits->push_back(*iter); + std::sort(splits->begin(), splits->end()); // make the order deterministic, + // for consistency of output + // between runs and C libraries. +} + + +// static +void UtteranceSplitter::DistributeRandomly(int32 n, std::vector *vec) { + KALDI_ASSERT(!vec->empty()); + int32 size = vec->size(); + if (n < 0) { + DistributeRandomly(n, vec); + for (int32 i = 0; i < size; i++) + (*vec)[i] *= -1; + return; + } + // from this point we know n >= 0. + int32 common_part = n / size, + remainder = n % size, i; + for (i = 0; i < remainder; i++) { + (*vec)[i] = common_part + 1; + } + for (; i < size; i++) { + (*vec)[i] = common_part; + } + std::random_shuffle(vec->begin(), vec->end()); + KALDI_ASSERT(std::accumulate(vec->begin(), vec->end(), int32(0)) == n); +} + + +void UtteranceSplitter::GetGapSizes(int32 utterance_length, + bool enforce_subsampling_factor, + const std::vector &chunk_sizes, + std::vector *gap_sizes) const { + if (chunk_sizes.empty()) { + gap_sizes->clear(); + return; + } + if (enforce_subsamping_factor && config_.frame_subsampling_factor > 1) { + int32 sf = config_.frame_subsampling_factor, size = chunk_sizes.size(); + int32 utterance_length_reduced = (utterance_length + (sf - 1)) / sf; + std::vector chunk_sizes_reduced(chunk_sizes); + for (int32 i = 0; i < size; i++) { + KALDI_ASSERT(chunk_sizes[i] % config_.frame_subsampling_factor == 0); + chunk_sizes_reduced[i] /= config_.frame_subsampling_factor; + } + GetGapSizes(utterance_length_reduced, false, + chunk_sizes_reduced, gap_sizes); + KALDI_ASSERT(gap_sizes->size() == static_cast(size)); + for (int32 i = 0; i < size; i++) + (*gap_sizes)[i] *= config_.frame_subsampling_factor; + return; + } + int32 num_chunks = chunk_sizes.size(), + total_of_chunk_sizes = std::accumulate(chunk_sizes.begin(), + chunk_sizes.end(), + int32(0)), + total_gap = utterance_length - total_of_chunk_sizes; + gap_sizes->resize(num_chunks); + + if (total_gap < 0) { + // there is an overlap. Overlaps can only go between chunks, not at the + // beginning or end of the utterance. + if (num_chunks == 1) { + // there needs to be an overlap, but there is only one chunk... this means + // the chunk-size exceeds the utterance length, which is not allowed. + KALDI_ERR << "Chunk size is " << chunk_sizes[0] + << " but utterance length is only " + << utterance_length; + } + + // note the elements of 'overlaps' will be <= 0. + std::vector overlaps(num_chunks - 1); + DistributeRandomly(total_gap, &num_overlap_locations); + (*gap_sizes)[0] = 0; // no gap before 1st chunk. + for (int32 i = 1; i < num_chunks; i++) + (*gap_sizes)[i] = overlaps[i-1]; + } else { + // There may be a gap. Gaps can go at the start or end of the utterance, or + // between segments. + std::vector gaps(num_chunks + 1); + DistributeRandomly(total_gap, &gaps); + // the last element of 'gaps', the one at the end of the utterance, is + // implicit and doesn't have to be written to the output. + for (int32 i = 0; i < num_chunks; i++) + (*gap_sizes)[i] = gaps[i]; + } +} + + +void UtteranceSplitter::GetChunksForUtterance( + int32 utterance_length, + std::vector *chunk_info) const { + std::vector chunk_sizes; + GetChunkSizesForUtterance(utterance_length, &chunk_sizes); + std::vector gaps(chunk_sizes.size()); + GetGapSizes(utterance_length, true, chunk_sizes, &gap_sizes); + int32 num_chunks = chunk_sizes.size(); + chunk_info->resize(num_chunks); + int32 t = 0; + for (int32 i = 0; i < num_chunks; i++) { + t += gaps[i]; + ChunkTimeInfo &info = (*chunk_info)[i]; + info.first_frame = t; + info.num_frames = chunk_sizes[i]; + info.left_context = (i == 0 && config_.left_context_initial >= 0 ? + config_.left_context_initial : config_.left_context); + info.right_context = (i == 0 && config_.right_context_final >= 0 ? + config_.right_context_final : config_.right_context); + t += chunk_sizes[i]; + } + // check that the end of the last chunk doesn't go more than + // 'config_.frame_subsampling_factor - 1' frames past the end + // of the utterance. That amount, we treat as rounding error. + KALDI_ASSERT(t - utterance_length < config_.frame_subsampling_factor); +} + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h index c0f76f3bf21..d02aa336a10 100644 --- a/src/nnet3/nnet-example-utils.h +++ b/src/nnet3/nnet-example-utils.h @@ -89,14 +89,17 @@ struct ExampleExtractionConfig { // The following parameters are derived parameters, computed by // ComputeDerived(). - int32 num_frames; // the 'principal' number of frames - std::vector num_frames_alternative; + + // the first element of the 'num_frames' vector is the 'principal' number of + // frames; the remaining elements are alternatives to the principal number of + // frames, to be used at most once or twice per file. + std::vector num_frames; ExampleExtractionConfig(): left_context(0), right_context(0), left_context_initial(-1), right_context_initial(-1), num_frames_overlap(0), - num_frames_str("1"), num_frames(-1) { } + num_frames_str("1") { } /// This function decodes 'num_frames_str' into 'num_frames' and 'num_frames_alternatives', /// and ensures that 'num_frames', and the members of num_frames_alternatives' are @@ -111,13 +114,13 @@ struct ExampleExtractionConfig { po->Register("right-context", &right_context, "Number of frames of right " "context of input features that are added to each " "example"); - po->Register("left-context-initial", &left_context, "Number of frames " - "of left context of input features that are added to each " - "example at the start of the utterance (if <0, this " + po->Register("left-context-initial", &left_context_initial, "Number of " + "frames of left context of input features that are added to " + "each example at the start of the utterance (if <0, this " "defaults to the same as --left-context)"); - po->Register("right-context-final", &right_context, "Number of frames " - "of right context of input features that are added to each " - "example at the end of the utterance (if <0, this " + po->Register("right-context-final", &right_context_final, "Number of " + "frames of right context of input features that are added " + "to each example at the end of the utterance (if <0, this " "defaults to the same as --right-context)"); po->Register("right-context", &right_context, "Number of frames of right " "context of input features that are added to each " @@ -143,6 +146,115 @@ struct ExampleExtractionConfig { +/** + struct ChunkTimeInfo is used by class Utterane + */ + +struct ChunkTimeInfo { + int32 first_frame; + int32 num_frames; + int32 left_context; + int32 right_context; +}; + + +class UtteranceSplitter { + + UtteranceSplitter(const ExampleExtractionConfig &config); + + + // Given an utterance length, this function creates for you a set of + // chunks into which to split the utterance. Note: this is partly + // random (will call srand()). + void GetChunksForUtterance(int32 utterance_length, + std::vector *chunk_info) const; + + + private: + + + void InitSplitForLength(); + + + // Used in InitSplitForLength(), returns the maximum utterance-length considered + // separately in split_for_length_. [above this, we'll assume that the additional + // length is consumed by multiples of the 'principal' chunk size.] It returns + // the primary chunk-size (config_.num_frames[0]) plus twice the largest of + // any of the allowed chunk sizes (i.e. the max of config_.num_frames) + int32 MaxUtteranceLength() const; + + // Used in InitSplitForLength(), this function outputs the set of allowed + // splits, represented as a sorted list of nonempty vectors (each split is a + // sorted list of chunk-sizes). + void InitSplits(std::vector > *splits) const; + + + // Used in GetChunksForUtterance, this function selects the list of + // chunk-sizes for that utterance (later on, the positions and and left/right + // context information for the chunks will be added to this). We don't call + // this a 'split', although it's also a list of chunk-sizes, because we + // randomize the order in which the chunk sizes appear, whereas for a 'split' + // we sort the chunk-sizes because a 'split' is conceptually an + // order-independent representation. + void GetChunkSizesForUtterance(int32 utterance_length, + std::vector *chunk_sizes) const; + + + // Used in GetChunksForUtterance, this function selects the 'gap sizes' + // before each of the chunks. These 'gap sizes' may be positive (representing + // a gap between chunks, or a number of frames at the beginning of the file that + // don't correspond to a chunk), or may be negative, corresponding to overlaps + // between adjacent chunks. + // + // If config_.frame_subsampling_factor > 1 and enforce_subsampling_factor is + // true, this function will ensure that all elements of 'gap_sizes' are + // multiples of config_.frame_subsampling_factor. (we always enforce this, + // but we set it to false inside a recursion when we recurse). Note: if + // config_.frame_subsampling_factor > 1, it's possible for the last chunk to + // go over 'utterance_length' by up to config_.frame_subsampling_factor - 1 + // frames (i.e. it would require that many frames past the utterance end). + // This will be dealt with when generating egs, by duplicating the last frame. + void GetGapSizes(int32 utterance_length, + bool enforce_subsampling_factor, + const std::vector &chunk_sizes, + std::vector *gap_sizes) const; + + + // this static function, used in GetGapSizes(), writes values to + // a vector 'vec' such the sum of those values equals n. It + // tries to make those values as similar as possible (they will + // differ by at most one), and the location of the larger versus + // smaller values is random. n may be negative. 'vec' must be + // nonempty. + static void DistributeRandomly(int32 n, + std::vector *vec); + + + const ExampleExtractionConfig &config_; + + // The vector 'split_for_length_' is indexed by the num-frames of a file, and + // gives us a list of alternative splits that we can use if the utternace has + // that many frames. For example, if split_for_length[100] = ( (25, 40, 40), + // (40, 65) ), it means we could either split as chunks of size (25, 40, 40) + // or as (40, 65). (we'll later randomize the order). should use one chunk + // of size 25 and two chunks of size 40. In general these won't add up to + // exactly the length of the utterance; we'll have them overlap (or have small + // gaps between them) to account for this, and the details of this will be + // randomly decided per file. If splits_for_length_[u] is empty, it means the + // utterance was shorter than the smallest possible chunk size, so + // we will have to discard the utterance. + + // If an utterance's num-frames is >= split_for_length.size(), the way to find + // the split to use is to keep subtracting the primary num-frames (== + // config_.num_frames[0]) from the utterance length until the resulting + // num-frames is < split_for_length_.size(), chunks, and then add the subtracted + // number of copies of the primary num-frames. + std::vector > > splits_for_length_; + + +}; + + void ComputeExampleTimeInfo(const ExampleExtractionConfig &config, int32 num_frames_in_utt, @@ -152,12 +264,6 @@ void ComputeExampleTimeInfo(const ExampleExtractionConfig &config, -struct ExampleTimeInfo { - int32 first_frame; - int32 num_frames; - int32 left_context; - int32 right_context; -}; // This function rounds up the quantities 'num_frames' and 'num_frames_overlap' From 56f087bdee4602c5c38bbbda8ac2e442d2b90ffb Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 16:47:06 -0500 Subject: [PATCH 140/530] asr_diarization: Adding multilingual egs --- .../allocate_multilingual_examples.py | 282 ++++++++++++++++++ .../s5/steps/nnet3/multilingual/get_egs.sh | 130 ++++++++ 2 files changed, 412 insertions(+) create mode 100644 egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py create mode 100755 egs/wsj/s5/steps/nnet3/multilingual/get_egs.sh diff --git a/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py b/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py new file mode 100644 index 00000000000..cba804b1a66 --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python + +# This script generates egs.Archive.scp and ranges.* used for generating egs.Archive.scp +# for multilingual setup. +# Also this script generates outputs.*.scp and weight.*.scp, where each line +# corresponds to language-id and weight for the same example in egs.*.scp. +# weight.*.scp used to scale the output's posterior during training. +# ranges.*.scp is generated w.r.t frequency distribution of remaining examples +# in each language. +# +# You call this script as (e.g.) +# +# allocate_multilingual_examples.py [opts] num-of-languages example-scp-lists multilingual-egs-dir +# +# allocate_multilingual_examples.py --num-jobs 10 --samples-per-iter 10000 --minibatch-size 512 +# --lang2weight exp/multi/lang2weight 2 "exp/lang1/egs.scp exp/lang2/egs.scp" +# exp/multi/egs +# +# This script outputs specific ranges.* files to the temp directory (exp/multi/egs/temp) +# that will enable you to creat egs.*.scp files for multilingual training. +# exp/multi/egs/temp/ranges.* contains something like the following: +# e.g. +# lang1 0 0 256 +# lang2 1 256 256 +# +# where each line can be interpreted as follows: +# +# +# note that is the zero-based line number in egs.scp for +# that language. +# num-examples is multiple of actual minibatch-size. +# +# +# egs.1.scp is generated using ranges.1.scp as following: +# "num_examples" consecutive examples starting from line "local-scp-line" from +# egs.scp file for language "source-lang" is copied to egs.1.scp. +# +# + +from __future__ import print_function +import re, os, argparse, sys, math, warnings, random, io, imp + +import logging + +sys.path.insert(0, 'steps') +import libs.common as common_lib + +logger = logging.getLogger('libs') +logger.setLevel(logging.INFO) +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " + "%(funcName)s - %(levelname)s ] %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) + + +def GetArgs(): + + parser = argparse.ArgumentParser(description="Writes ranges.*, outputs.* and weights.* files " + "in preparation for dumping egs for multilingual training.", + epilog="Called by steps/nnet3/multilingual/get_egs.sh") + parser.add_argument("--samples-per-iter", type=int, default=40000, + help="The target number of egs in each archive of egs, " + "(prior to merging egs). "); + parser.add_argument("--num-jobs", type=int, default=20, + help="This can be used for better randomness in distributing languages across archives." + ", where egs.job.archive.scp generated randomly and examples are combined " + " across all jobs as eg.archive.scp.") + parser.add_argument("--random-lang", type=str, action=common_lib.StrToBoolAction, + help="If true, the lang-id in ranges.* selected" + " w.r.t frequency distribution of remaining examples in each language," + " otherwise it is selected sequentially.", + default=True, choices = ["false", "true"]) + parser.add_argument("--max-archives", type=int, default=1000, + help="max number of archives used to generate egs.*.scp"); + parser.add_argument("--seed", type=int, default=1, + help="Seed for random number generator") + + parser.add_argument("--minibatch-size", type=int, default=512, + help="The minibatch size used to generate scp files per job. " + "It should be multiple of actual minibatch size."); + + parser.add_argument("--prefix", type=str, default="", + help="Adds a prefix to the range files. This is used to distinguish between the train " + "and diagnostic files.") + + parser.add_argument("--lang2weight", type=str, + help="lang2weight file contains the weight per language to scale output posterior for that language.(format is: " + " )"); +# now the positional arguments + parser.add_argument("num_langs", type=int, + help="num of languages used in multilingual training setup."); + parser.add_argument("egs_scp_lists", type=str, + help="list of egs.scp files per input language." + "e.g. exp/lang1/egs/egs.scp exp/lang2/egs/egs.scp"); + + parser.add_argument("egs_dir", + help="Name of egs directory e.g. exp/multilingual_a/egs"); + + + print(' '.join(sys.argv)) + + args = parser.parse_args() + + return args + + +# Returns a random language number w.r.t +# amount of examples in each language. +# It works based on sampling from a +# discrete distribution, where it returns i +# with prob(i) as (num_egs in lang(i)/ tot_egs). +# tot_egs is sum of lang_len. +def RandomLang(lang_len, tot_egs, random_selection): + assert(tot_egs > 0) + rand_int = random.randint(0, tot_egs - 1) + count = 0 + for l in range(len(lang_len)): + if random_selection: + if rand_int > count and rand_int <= (count + lang_len[l]): + rand_lang = l + break + else: + count += lang_len[l] + else: + if (lang_len[l] > 0): + rand_lang = l + break + assert(rand_lang >= 0 and rand_lang < len(lang_len)) + return rand_lang + +# Read lang2weight file and return lang2weight array +# where lang2weight[i] is weight for language i. +def ReadLang2weight(lang2w_file): + f = open(lang2w_file, "r"); + if f is None: + raise Exception("Error opening lang2weight file " + str(lang2w_file)) + lang2w = [] + for line in f: + a = line.split() + if len(a) != 2: + raise Exception("bad line in lang2weight file " + line) + lang2w.append(int(a[1])) + f.close() + return lang2w + +# struct to keep archives correspond to each job +class ArchiveToJob(): + def __init__(self, job_id, archives_for_job): + self.job_id = job_id + self.archives = archives_for_job + +def Main(): + args = GetArgs() + random.seed(args.seed) + num_langs = args.num_langs + rand_select = args.random_lang + + # read egs.scp for input languages + scp_lists = args.egs_scp_lists.split(); + assert(len(scp_lists) == num_langs); + + scp_files = [open(scp_lists[lang], 'r') for lang in range(num_langs)] + + # computes lang2len, where lang2len[i] shows number of + # examples for language i. + lang2len = [0] * num_langs + for lang in range(num_langs): + lang2len[lang] = sum(1 for line in open(scp_lists[lang])) + logger.info("Number of examples for language {0} is {1}".format(lang, lang2len[lang])) + + # If weights are not provided, the scaling weights + # are one. + if args.lang2weight is None: + lang2weight = [ 1.0 ] * num_langs + else: + lang2weight = ReadLang2Len(args.lang2weight) + assert(len(lang2weight) == num_langs) + + if not os.path.exists(args.egs_dir + "/temp"): + os.makedirs(args.egs_dir + "/temp") + + num_lang_file = open(args.egs_dir + "/info/" + args.prefix + "num_lang", "w"); + print("{0}".format(num_langs), file = num_lang_file) + + + # Each element of all_egs (one per num_archive * num_jobs) is + # an array of 3-tuples (lang-id, local-start-egs-line, num-egs) + all_egs = [] + lang_len = lang2len[:] + tot_num_egs = sum(lang2len[i] for i in range(len(lang2len))) # total num of egs in all languages + num_archives = max(1, min(args.max_archives, tot_num_egs / args.samples_per_iter)) + + + num_arch_file = open(args.egs_dir + "/info/" + args.prefix + "num_archives", "w"); + print("{0}".format(num_archives), file = num_arch_file) + num_arch_file.close() + + this_num_egs_per_archive = tot_num_egs / (num_archives * args.num_jobs) # num of egs per archive + for job_index in range(args.num_jobs): + for archive_index in range(num_archives): + # Temporary scp.job_index.archive_index files to store egs.scp correspond to each archive. + logger.debug("Processing archive {0} for job {1}".format(archive_index + 1, job_index + 1)) + archfile = open(args.egs_dir + "/temp/" + args.prefix + "scp." + str(job_index + 1) + "." + str(archive_index + 1), "w") + + this_egs = [] # this will be array of 2-tuples (lang-id start-frame num-frames) + + num_egs = 0 + while num_egs <= this_num_egs_per_archive: + rem_egs = sum(lang_len[i] for i in range(len(lang_len))) + if rem_egs > 0: + lang_id = RandomLang(lang_len, rem_egs, rand_select) + start_egs = lang2len[lang_id] - lang_len[lang_id] + this_egs.append((lang_id, start_egs, args.minibatch_size)) + for scpline in range(args.minibatch_size): + print("{0} {1}".format(scp_files[lang_id].readline().splitlines()[0], lang_id), file = archfile) + + lang_len[lang_id] = lang_len[lang_id] - args.minibatch_size + num_egs = num_egs + args.minibatch_size; + # If the num of remaining egs in each lang is less than minibatch_size, + # they are discarded. + if lang_len[lang_id] < args.minibatch_size: + lang_len[lang_id] = 0 + logger.debug("Run out of data for language {0}".format(lang_id)) + else: + logger.debug("Run out of data for all languages.") + break + all_egs.append(this_egs) + archfile.close() + + # combine examples across all jobs correspond to each archive. + for archive in range(num_archives): + logger.debug("Processing archive {0} by combining all jobs.".format(archive + 1)) + this_ranges = [] + f = open(args.egs_dir + "/temp/" + args.prefix + "ranges." + str(archive + 1), "w") + o = open(args.egs_dir + "/" + args.prefix + "output." + str(archive + 1), "w") + w = open(args.egs_dir + "/" + args.prefix + "weight." + str(archive + 1), "w") + scp_per_archive_file = open(args.egs_dir + "/" + args.prefix + "egs." + str(archive + 1), "w") + + # check files befor writing. + if f is None: + raise Exception("Error opening file " + args.egs_dir + "/temp/" + args.prefix + "ranges." + str(job + 1)) + if o is None: + raise Exception("Error opening file " + args.egs_dir + "/" + args.prefix + "output." + str(job + 1)) + if w is None: + raise Exception("Error opening file " + args.egs_dir + "/" + args.prefix + "weight." + str(job + 1)) + if scp_per_archive_file is None: + raise Exception("Error opening file " + args.egs_dir + "/" + args.prefix + "egs." + str(archive + 1), "w") + + for job in range(args.num_jobs): + # combine egs.job.archive.scp across all jobs. + scp = args.egs_dir + "/temp/" + args.prefix + "scp." + str(job + 1) + "." + str(archive + 1) + with open(scp, "r") as scpfile: + for line in scpfile: + try: + scp_line = line.splitlines()[0].split() + print("{0} {1}".format(scp_line[0], scp_line[1]), file=scp_per_archive_file) + print("{0} output-{1}".format(scp_line[0], scp_line[2]), file=o) + print("{0} {1}".format(scp_line[0], lang2weight[int(scp_line[2])]), file=w) + except Exception: + logger.error("Failed processing line %s in scp %s", line, + scpfile.name) + raise + os.remove(scp) + + # combine ranges.* across all jobs for archive + for (lang_id, start_eg_line, num_egs) in all_egs[num_archives * job + archive]: + this_ranges.append((lang_id, start_eg_line, num_egs)) + + # write ranges.archive + for (lang_id, start_eg_line, num_egs) in this_ranges: + print("{0} {1} {2}".format(lang_id, start_eg_line, num_egs), file=f) + + scp_per_archive_file.close() + f.close() + o.close() + w.close() + print("allocate_multilingual_examples.py finished generating " + args.prefix + "egs.*.scp and " + args.prefix + "ranges.* and " + args.prefix + "output.*" + args.prefix + "weight.* files") + +if __name__ == "__main__": + Main() diff --git a/egs/wsj/s5/steps/nnet3/multilingual/get_egs.sh b/egs/wsj/s5/steps/nnet3/multilingual/get_egs.sh new file mode 100755 index 00000000000..aa9a911ffb2 --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/multilingual/get_egs.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# +# This script uses separate input egs directory for each language as input, +# to generate egs.*.scp files in multilingual egs directory +# where the scp line points to the original archive for each egs directory. +# $megs/egs.*.scp is randomized w.r.t language id. +# +# Also this script generates egs.JOB.scp, output.JOB.scp and weight.JOB.scp, +# where output file contains language-id for each example +# and weight file contains weights for scaling output posterior +# for each example w.r.t input language. +# + +set -e +set -o pipefail +set -u + +# Begin configuration section. +cmd=run.pl +minibatch_size=512 # multiple of minibatch used during training. +num_jobs=10 # This can be set to max number of jobs to run in parallel; + # Helps for better randomness across languages + # per archive. +samples_per_iter=400000 # this is the target number of egs in each archive of egs + # (prior to merging egs). We probably should have called + # it egs_per_iter. This is just a guideline; it will pick + # a number that divides the number of samples in the + # entire data. +stage=0 + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +num_langs=$1 +shift 1 +args=("$@") +megs_dir=${args[-1]} # multilingual directory +mkdir -p $megs_dir +mkdir -p $megs_dir/info + +if [ ${#args[@]} != $[$num_langs+1] ]; then + echo "$0: Number of input example dirs provided is not compatible with num_langs $num_langs." + echo "Usage:$0 [opts] ... " + echo "Usage:$0 [opts] 2 exp/lang1/egs exp/lang2/egs exp/multi/egs" + exit 1; +fi + +required_files="egs.scp combine.egs.scp train_diagnostic.egs.scp valid_diagnostic.egs.scp" +train_scp_list= +train_diagnostic_scp_list= +valid_diagnostic_scp_list= +combine_scp_list= + +# copy paramters from $egs_dir[0]/info +# into multilingual dir egs_dir/info + +params_to_check="feat_dim ivector_dim left_context right_context frames_per_eg" +for param in $params_to_check; do + cat ${args[0]}/info/$param > $megs_dir/info/$param || exit 1; +done + +for lang in $(seq 0 $[$num_langs-1]);do + multi_egs_dir[$lang]=${args[$lang]} + echo "arg[$lang] = ${args[$lang]}" + for f in $required_files; do + if [ ! -f ${multi_egs_dir[$lang]}/$f ]; then + echo "$0: no such a file ${multi_egs_dir[$lang]}/$f." && exit 1; + fi + done + train_scp_list="$train_scp_list ${args[$lang]}/egs.scp" + train_diagnostic_scp_list="$train_diagnostic_scp_list ${args[$lang]}/train_diagnostic.egs.scp" + valid_diagnostic_scp_list="$valid_diagnostic_scp_list ${args[$lang]}/valid_diagnostic.egs.scp" + combine_scp_list="$combine_scp_list ${args[$lang]}/combine.egs.scp" + + # check parameter dimension to be the same in all egs dirs + for f in $params_to_check; do + f1=`cat $megs_dir/info/$param`; + f2=`cat ${multi_egs_dir[$lang]}/info/$f`; + if [ $f1 != $f1 ]; then + echo "$0: mismatch in dimension for $f parameter in ${multi_egs_dir[$lang]}." + exit 1; + fi + done +done + +if [ $stage -le 0 ]; then + echo "$0: allocating multilingual examples for training." + # Generate egs.*.scp for multilingual setup. + $cmd $megs_dir/log/allocate_multilingual_examples_train.log \ + python steps/nnet3/multilingual/allocate_multilingual_examples.py \ + --minibatch-size $minibatch_size \ + --samples-per-iter $samples_per_iter \ + $num_langs "$train_scp_list" $megs_dir || exit 1; +fi + +if [ $stage -le 1 ]; then + echo "$0: combine combine.egs.scp examples from all langs in $megs_dir/combine.egs.scp." + # Generate combine.egs.scp for multilingual setup. + $cmd $megs_dir/log/allocate_multilingual_examples_combine.log \ + python steps/nnet3/multilingual/allocate_multilingual_examples.py \ + --random-lang false \ + --max-archives 1 --num-jobs 1 \ + --minibatch-size $minibatch_size \ + --prefix "combine." \ + $num_langs "$combine_scp_list" $megs_dir || exit 1; + + echo "$0: combine train_diagnostic.egs.scp examples from all langs in $megs_dir/train_diagnostic.egs.scp." + # Generate train_diagnostic.egs.scp for multilingual setup. + $cmd $megs_dir/log/allocate_multilingual_examples_train_diagnostic.log \ + python steps/nnet3/multilingual/allocate_multilingual_examples.py \ + --random-lang false \ + --max-archives 1 --num-jobs 1 \ + --minibatch-size $minibatch_size \ + --prefix "train_diagnostic." \ + $num_langs "$train_diagnostic_scp_list" $megs_dir || exit 1; + + + echo "$0: combine valid_diagnostic.egs.scp examples from all langs in $megs_dir/valid_diagnostic.egs.scp." + # Generate valid_diagnostic.egs.scp for multilingual setup. + $cmd $megs_dir/log/allocate_multilingual_examples_valid_diagnostic.log \ + python steps/nnet3/multilingual/allocate_multilingual_examples.py \ + --random-lang false --max-archives 1 --num-jobs 1\ + --minibatch-size $minibatch_size \ + --prefix "valid_diagnostic." \ + $num_langs "$valid_diagnostic_scp_list" $megs_dir || exit 1; + +fi + From 34e34e9ac35fe750ff5af0cdf2d891e2a9b561b0 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 18:49:44 -0500 Subject: [PATCH 141/530] asr_diarization: Add fake targets to get-egs-multiple-targets --- .../nnet3-get-egs-multiple-targets.cc | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/nnet3bin/nnet3-get-egs-multiple-targets.cc b/src/nnet3bin/nnet3-get-egs-multiple-targets.cc index 49f0dde4af7..2c5fb364309 100644 --- a/src/nnet3bin/nnet3-get-egs-multiple-targets.cc +++ b/src/nnet3bin/nnet3-get-egs-multiple-targets.cc @@ -236,6 +236,7 @@ int main(int argc, char *argv[]) { bool compress_input = true; + bool add_fake_targets= true; int32 input_compress_format = 0; int32 left_context = 0, right_context = 0, num_frames = 1, length_tolerance = 2; @@ -247,6 +248,9 @@ int main(int argc, char *argv[]) { std::string output_names_str; ParseOptions po(usage); + po.Register("add-fake-targets", &add_fake_targets, + "Add fake targets so that " + "all the egs contain the same number of outputs"); po.Register("compress-input", &compress_input, "If true, write egs in " "compressed format."); po.Register("input-compress-format", &input_compress_format, "Format for " @@ -298,7 +302,6 @@ int main(int argc, char *argv[]) { std::vector sparse_targets_readers(num_outputs, static_cast(NULL)); - std::vector compress_targets(1, true); std::vector compress_targets_vector; @@ -360,7 +363,11 @@ int main(int argc, char *argv[]) { std::vector targets_rspecifiers(num_outputs); std::vector deriv_weights_rspecifiers(num_outputs); - + + std::vector > fake_dense_targets(num_outputs); + std::vector > fake_deriv_weights(num_outputs); + std::vector fake_sparse_targets(num_outputs); + for (int32 n = 0; n < num_outputs; n++) { const std::string &targets_rspecifier = po.GetArg(2*n + 2); const std::string &deriv_weights_rspecifier = po.GetArg(2*n + 3); @@ -428,6 +435,16 @@ int main(int argc, char *argv[]) { KALDI_WARN << "No dense targets matrix for key " << key << " in " << "rspecifier " << targets_rspecifiers[n] << " for output " << output_names[n]; + + if (add_fake_targets) { + fake_dense_targets[n].Resize(feats.NumRows(), -output_dims[n]); + dense_targets[n] = &(fake_dense_targets[n]); + + fake_deriv_weights[n].Resize(feats.NumRows()); + deriv_weights[n] = &(fake_deriv_weights[n]); + + num_outputs_found++; + } continue; } const MatrixBase *target_matrix = &(dense_targets_readers[n]->Value(key)); @@ -446,6 +463,12 @@ int main(int argc, char *argv[]) { KALDI_WARN << "No sparse target matrix for key " << key << " in " << "rspecifier " << targets_rspecifiers[n] << " for output " << output_names[n]; + + if (add_fake_targets) { + fake_sparse_targets[n].resize(feats.NumRows()); + sparse_targets[n] = &(fake_sparse_targets[n]); + num_outputs_found++; + } continue; } const Posterior *posterior = &(sparse_targets_readers[n]->Value(key)); @@ -499,6 +522,12 @@ int main(int argc, char *argv[]) { continue; } + if (add_fake_targets && num_outputs_found != output_names.size()) { + KALDI_WARN << "Not all outputs found for key " << key; + num_err++; + continue; + } + ProcessFile(feats, ivector_feats, output_names, output_dims, dense_targets, sparse_targets, deriv_weights, key, From 2d4eeeb7f1a465a9c7ca4c0f053ff58bb4ae7ae9 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 19:58:29 -0500 Subject: [PATCH 142/530] asr_diarization: Support scaling of nnet3 egs feats --- src/matrix/sparse-matrix.cc | 28 ++++++++++ src/matrix/sparse-matrix.h | 6 +++ src/nnet3bin/nnet3-copy-egs.cc | 93 ++++++++++++++++++++++++++++++++-- 3 files changed, 124 insertions(+), 3 deletions(-) diff --git a/src/matrix/sparse-matrix.cc b/src/matrix/sparse-matrix.cc index 777819ed677..c5bc868f48e 100644 --- a/src/matrix/sparse-matrix.cc +++ b/src/matrix/sparse-matrix.cc @@ -281,6 +281,14 @@ void SparseVector::Resize(MatrixIndexT dim, dim_ = dim; } +template +void SparseVector::Scale(BaseFloat scale) { + typename std::vector >::iterator it = pairs_.begin(); + for (; it != pairs_.end(); ++it) { + it->second *= scale; + } +} + template MatrixIndexT SparseMatrix::NumRows() const { return rows_.size(); @@ -574,6 +582,14 @@ void SparseMatrix::Resize(MatrixIndexT num_rows, rows_[row].Resize(num_cols, kCopyData); } } + +template +void SparseMatrix::Scale(BaseFloat scale) { + for (typename std::vector >::iterator it = rows_.begin(); + it != rows_.end(); ++it) { + it->Scale(scale); + } +} template void SparseMatrix::AppendSparseMatrixRows( @@ -1053,6 +1069,18 @@ void GeneralMatrix::AddToMat(BaseFloat alpha, MatrixBase *mat, } } + +void GeneralMatrix::Scale(BaseFloat scale) { + if(Type() == kCompressedMatrix) + Uncompress(); + if (Type() == kFullMatrix) { + mat_.Scale(scale); + } else if (Type() == kSparseMatrix) { + smat_.Scale(scale); + } +} + + template Real SparseVector::Max(int32 *index_out) const { KALDI_ASSERT(dim_ > 0 && pairs_.size() <= static_cast(dim_)); diff --git a/src/matrix/sparse-matrix.h b/src/matrix/sparse-matrix.h index 88619da3034..8ad62e0ac51 100644 --- a/src/matrix/sparse-matrix.h +++ b/src/matrix/sparse-matrix.h @@ -98,6 +98,8 @@ class SparseVector { /// Resizes to this dimension. resize_type == kUndefined /// behaves the same as kSetZero. void Resize(MatrixIndexT dim, MatrixResizeType resize_type = kSetZero); + + void Scale(BaseFloat scale); void Write(std::ostream &os, bool binary) const; @@ -196,6 +198,8 @@ class SparseMatrix { void Resize(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type = kSetZero); + void Scale(BaseFloat scale); + // Use the Matrix::CopyFromSmat() function to copy from this to Matrix. Also // see Matrix::AddSmat(). There is not very extensive functionality for // SparseMat just yet (e.g. no matrix multiply); we will add things as needed @@ -286,6 +290,8 @@ class GeneralMatrix { void AddToMat(BaseFloat alpha, CuMatrixBase *cu_mat, MatrixTransposeType trans = kNoTrans) const; + void Scale(BaseFloat alpha); + /// Assignment from regular matrix. GeneralMatrix &operator= (const MatrixBase &mat); diff --git a/src/nnet3bin/nnet3-copy-egs.cc b/src/nnet3bin/nnet3-copy-egs.cc index 2702ae5fae9..5189ee4046f 100644 --- a/src/nnet3bin/nnet3-copy-egs.cc +++ b/src/nnet3bin/nnet3-copy-egs.cc @@ -28,6 +28,34 @@ namespace kaldi { namespace nnet3 { +// rename io-name of eg w.r.t io_names list e.g. input/input-1,output/output-1 +// 'input' is renamed to input-1 and 'output' renamed to output-1. +void RenameIoNames(const std::string &io_names, + NnetExample *eg_modified) { + std::vector separated_io_names; + SplitStringToVector(io_names, ",", true, &separated_io_names); + int32 num_modified_io = separated_io_names.size(), + io_size = eg_modified->io.size(); + std::vector orig_io_list; + for (int32 io_ind = 0; io_ind < io_size; io_ind++) + orig_io_list.push_back(eg_modified->io[io_ind].name); + + for (int32 ind = 0; ind < num_modified_io; ind++) { + std::vector rename_io_name; + SplitStringToVector(separated_io_names[ind], "/", true, &rename_io_name); + // find the io in eg with specific name and rename it to new name. + + int32 rename_io_ind = + std::find(orig_io_list.begin(), orig_io_list.end(), rename_io_name[0]) - + orig_io_list.begin(); + + if (rename_io_ind >= io_size) + KALDI_ERR << "No io-node with name " << rename_io_name[0] + << "exists in eg."; + eg_modified->io[rename_io_ind].name = rename_io_name[1]; + } +} + bool KeepOutputs(const std::vector &keep_outputs, NnetExample *eg) { std::vector io_new; @@ -330,6 +358,8 @@ int main(int argc, char *argv[]) { // you can set frame to a number to select a single frame with a particular // offset, or to 'random' to select a random single frame. std::string frame_str; + std::string weight_str; + std::string output_str; ParseOptions po(usage); po.Register("random", &random, "If true, will write frames to output " @@ -357,6 +387,16 @@ int main(int argc, char *argv[]) { po.Register("remove-zero-deriv-outputs", &remove_zero_deriv_outputs, "Remove outputs that do not contribute to the objective " "because of zero deriv-weights"); + po.Register("weights", &weight_str, + "Rspecifier maps the output posterior to each example" + "If provided, the supervision weight for output is scaled." + " Scaling supervision weight is the same as scaling to the derivative during training " + " in case of linear objective." + "The default is one, which means we are not applying per-example weights."); + po.Register("outputs", &output_str, + "Rspecifier maps example old output-name to new output-name in example." + " If provided, the NnetIo with name 'output' in each example " + " is renamed to new output name."); po.Read(argc, argv); @@ -370,6 +410,8 @@ int main(int argc, char *argv[]) { std::string examples_rspecifier = po.GetArg(1); SequentialNnetExampleReader example_reader(examples_rspecifier); + RandomAccessTokenReader output_reader(output_str); + RandomAccessBaseFloatReader egs_weight_reader(weight_str); int32 num_outputs = po.NumArgs() - 1; std::vector example_writers(num_outputs); @@ -382,7 +424,7 @@ int main(int argc, char *argv[]) { std::sort(keep_outputs.begin(), keep_outputs.end()); } - int64 num_read = 0, num_written = 0; + int64 num_read = 0, num_written = 0, num_err = 0; for (; !example_reader.Done(); example_reader.Next(), num_read++) { // count is normally 1; could be 0, or possibly >1. int32 count = GetCount(keep_proportion); @@ -399,16 +441,61 @@ int main(int argc, char *argv[]) { frame_shift == 0) { if (remove_zero_deriv_outputs) if (!RemoveZeroDerivOutputs(&eg)) continue; + if (!weight_str.empty()) { + if (!egs_weight_reader.HasKey(key)) { + KALDI_WARN << "No weight for example key " << key; + num_err++; + continue; + } + BaseFloat weight = egs_weight_reader.Value(key); + for (int32 i = 0; i < eg.io.size(); i++) + if (eg.io[i].name.find("output") != std::string::npos) + eg.io[i].features.Scale(weight); + } + if (!output_str.empty()) { + if (!output_reader.HasKey(key)) { + KALDI_WARN << "No new output-name for example key " << key; + num_err++; + continue; + } + std::string new_output_name = output_reader.Value(key); + // rename output io name to $new_output_name. + std::string rename_io_names = "output/" + new_output_name; + RenameIoNames(rename_io_names, &eg); + } example_writers[index]->Write(key, eg); num_written++; } else { // the --frame option or context options were set. NnetExample eg_modified; if (SelectFromExample(eg, frame_str, left_context, right_context, frame_shift, &eg_modified)) { - // this branch of the if statement will almost always be taken (should only - // not be taken for shorter-than-normal egs from the end of a file. if (remove_zero_deriv_outputs) if (!RemoveZeroDerivOutputs(&eg_modified)) continue; + if (!weight_str.empty()) { + // scale the supervision weight for egs + if (!egs_weight_reader.HasKey(key)) { + KALDI_WARN << "No weight for example key " << key; + num_err++; + continue; + } + int32 weight = egs_weight_reader.Value(key); + for (int32 i = 0; i < eg_modified.io.size(); i++) + if (eg_modified.io[i].name.find("output") != std::string::npos) + eg_modified.io[i].features.Scale(weight); + } + if (!output_str.empty()) { + if (!output_reader.HasKey(key)) { + KALDI_WARN << "No new output-name for example key " << key; + num_err++; + continue; + } + std::string new_output_name = output_reader.Value(key); + // rename output io name to $new_output_name. + std::string rename_io_names = "output/" + new_output_name; + RenameIoNames(rename_io_names, &eg_modified); + } + // this branch of the if statement will almost always be taken (should only + // not be taken for shorter-than-normal egs from the end of a file. example_writers[index]->Write(key, eg_modified); num_written++; } From c1799f1be12294884b84bbdf5f71e0e5ca40c285 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 19:59:17 -0500 Subject: [PATCH 143/530] asr_diarization: Fix bugs and restructure multiple egs targets source --- src/nnet3/nnet-example-utils.cc | 21 +- .../nnet3-get-egs-multiple-targets.cc | 208 +++++++----------- 2 files changed, 96 insertions(+), 133 deletions(-) diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc index 548fb842385..2d9a01550b9 100644 --- a/src/nnet3/nnet-example-utils.cc +++ b/src/nnet3/nnet-example-utils.cc @@ -159,21 +159,22 @@ static void MergeIo(const std::vector &src, } Vector &this_deriv_weights = merged_eg->io[f].deriv_weights; - if (output_deriv_weights[f][0]->Dim() > 0) { - this_deriv_weights.Resize( - merged_eg->io[f].indexes.size(), kUndefined); - KALDI_ASSERT(this_deriv_weights.Dim() == - merged_eg->io[f].features.NumRows()); + this_deriv_weights.Resize( + merged_eg->io[f].indexes.size(), kUndefined); + this_deriv_weights.Set(1.0); + KALDI_ASSERT(this_deriv_weights.Dim() == + merged_eg->io[f].features.NumRows()); - std::vector const*>::const_iterator - it = output_deriv_weights[f].begin(), - end = output_deriv_weights[f].end(); + std::vector const*>::const_iterator + it = output_deriv_weights[f].begin(), + end = output_deriv_weights[f].end(); - for (int32 i = 0, cur_offset = 0; it != end; ++it, i++) { + for (int32 i = 0, cur_offset = 0; it != end; ++it, i++) { + if((*it)->Dim() > 0) { KALDI_ASSERT((*it)->Dim() == output_lists[f][i]->NumRows()); this_deriv_weights.Range(cur_offset, (*it)->Dim()).CopyFromVec(**it); - cur_offset += (*it)->Dim(); } + cur_offset += output_lists[f][i]->NumRows(); } } } diff --git a/src/nnet3bin/nnet3-get-egs-multiple-targets.cc b/src/nnet3bin/nnet3-get-egs-multiple-targets.cc index 2c5fb364309..63ebce5ab0e 100644 --- a/src/nnet3bin/nnet3-get-egs-multiple-targets.cc +++ b/src/nnet3bin/nnet3-get-egs-multiple-targets.cc @@ -44,26 +44,28 @@ bool ToBool(std::string str) { return false; // never reached } -static void ProcessFile(const MatrixBase &feats, - const MatrixBase *ivector_feats, - const std::vector &output_names, - const std::vector &output_dims, - const std::vector* > &dense_target_matrices, - const std::vector &posteriors, - const std::vector* > &deriv_weights, - const std::string &utt_id, - bool compress_input, - int32 input_compress_format, - const std::vector &compress_targets, - const std::vector &targets_compress_formats, - int32 left_context, - int32 right_context, - int32 frames_per_eg, - std::vector *num_frames_written, - std::vector *num_egs_written, - NnetExampleWriter *example_writer) { +static void ProcessFile( + const MatrixBase &feats, + const MatrixBase *ivector_feats, + const std::vector &output_names, + const std::vector &output_dims, + const std::vector* > &dense_target_matrices, + const std::vector &posteriors, + const std::vector* > &deriv_weights, + const std::string &utt_id, + bool compress_input, + int32 input_compress_format, + const std::vector &compress_targets, + const std::vector &targets_compress_formats, + int32 left_context, + int32 right_context, + int32 frames_per_eg, + std::vector *num_frames_written, + std::vector *num_egs_written, + NnetExampleWriter *example_writer) { + KALDI_ASSERT(output_names.size() > 0); - //KALDI_ASSERT(feats.NumRows() == static_cast(targets.NumRows())); + for (int32 t = 0; t < feats.NumRows(); t += frames_per_eg) { int32 tot_frames = left_context + frames_per_eg + right_context; @@ -113,16 +115,15 @@ static void ProcessFile(const MatrixBase &feats, // At the end of the file, we pad with the last frame repeated // so that all examples have the same structure (prevents the need // for recompilations). - int32 actual_frames_per_eg = std::min(std::min(frames_per_eg, - feats.NumRows() - t), deriv_weights[n]->Dim() - t); + int32 actual_frames_per_eg = std::min( + std::min(frames_per_eg, feats.NumRows() - t), + deriv_weights[n]->Dim() - t); this_deriv_weights.Resize(frames_per_eg); int32 frames_to_copy = std::min(t + actual_frames_per_eg, deriv_weights[n]->Dim()) - t; - this_deriv_weights.Range(0, frames_to_copy).CopyFromVec(deriv_weights[n]->Range(t, frames_to_copy)); - if (this_deriv_weights.Sum() == 0) { - continue; // Ignore frames that have frame weights 0 - } + this_deriv_weights.Range(0, frames_to_copy).CopyFromVec( + deriv_weights[n]->Range(t, frames_to_copy)); } if (dense_target_matrices[n]) { @@ -133,8 +134,9 @@ static void ProcessFile(const MatrixBase &feats, // At the end of the file, we pad with the last frame repeated // so that all examples have the same structure (prevents the need // for recompilations). - int32 actual_frames_per_eg = std::min(std::min(frames_per_eg, - feats.NumRows() - t), targets.NumRows() - t); + int32 actual_frames_per_eg = std::min( + std::min(frames_per_eg, feats.NumRows() - t), + targets.NumRows() - t); for (int32 i = 0; i < actual_frames_per_eg; i++) { // Copy the i^th row of the target matrix from the (t+i)^th row of the @@ -150,12 +152,14 @@ static void ProcessFile(const MatrixBase &feats, // input targets matrix KALDI_ASSERT(t + actual_frames_per_eg - 1 == targets.NumRows() - 1); SubVector this_target_dest(targets_dest, i); - SubVector this_target_src(targets, t+actual_frames_per_eg-1); + SubVector this_target_src(targets, + t + actual_frames_per_eg - 1); this_target_dest.CopyFromVec(this_target_src); } if (deriv_weights[n]) { - eg.io.push_back(NnetIo(output_names[n], this_deriv_weights, 0, targets_dest)); + eg.io.push_back(NnetIo(output_names[n], this_deriv_weights, + 0, targets_dest)); } else { eg.io.push_back(NnetIo(output_names[n], 0, targets_dest)); } @@ -166,8 +170,9 @@ static void ProcessFile(const MatrixBase &feats, // At the end of the file, we pad with the last frame repeated // so that all examples have the same structure (prevents the need // for recompilations). - int32 actual_frames_per_eg = std::min(std::min(frames_per_eg, - feats.NumRows() - t), static_cast(pdf_post.size()) - t); + int32 actual_frames_per_eg = std::min( + std::min(frames_per_eg, feats.NumRows() - t), + static_cast(pdf_post.size()) - t); Posterior labels(frames_per_eg); for (int32 i = 0; i < actual_frames_per_eg; i++) @@ -175,7 +180,8 @@ static void ProcessFile(const MatrixBase &feats, // remaining posteriors for frames are empty. if (deriv_weights[n]) { - eg.io.push_back(NnetIo(output_names[n], this_deriv_weights, output_dims[n], 0, labels)); + eg.io.push_back(NnetIo(output_names[n], this_deriv_weights, + output_dims[n], 0, labels)); } else { eg.io.push_back(NnetIo(output_names[n], output_dims[n], 0, labels)); } @@ -185,11 +191,13 @@ static void ProcessFile(const MatrixBase &feats, eg.io.back().Compress(targets_compress_formats[n]); num_outputs_added++; - (*num_frames_written)[n] += frames_per_eg; // Actually actual_frames_per_eg, but that depends on the different output. For simplification, frames_per_eg is used. + // Actually actual_frames_per_eg, but that depends on the different + // output. For simplification, frames_per_eg is used. + (*num_frames_written)[n] += frames_per_eg; (*num_egs_written)[n] += 1; } - if (num_outputs_added == 0) continue; + if (num_outputs_added != output_names.size()) continue; std::ostringstream os; os << utt_id << "-" << t; @@ -236,7 +244,6 @@ int main(int argc, char *argv[]) { bool compress_input = true; - bool add_fake_targets= true; int32 input_compress_format = 0; int32 left_context = 0, right_context = 0, num_frames = 1, length_tolerance = 2; @@ -248,17 +255,14 @@ int main(int argc, char *argv[]) { std::string output_names_str; ParseOptions po(usage); - po.Register("add-fake-targets", &add_fake_targets, - "Add fake targets so that " - "all the egs contain the same number of outputs"); po.Register("compress-input", &compress_input, "If true, write egs in " "compressed format."); po.Register("input-compress-format", &input_compress_format, "Format for " "compressing input feats e.g. Use 2 for compressing wave"); po.Register("compress-targets", &compress_targets_str, "CSL of whether " "targets must be compressed for each of the outputs"); - po.Register("targets-compress-formats", &targets_compress_formats_str, "Format for " - "compressing all feats in general"); + po.Register("targets-compress-formats", &targets_compress_formats_str, + "Format for compressing all feats in general"); po.Register("left-context", &left_context, "Number of frames of left " "context the neural net requires."); po.Register("right-context", &right_context, "Number of frames of right " @@ -271,11 +275,6 @@ int main(int argc, char *argv[]) { "difference in num-frames between feat and ivector matrices"); po.Register("output-dims", &output_dims_str, "CSL of output node dims"); po.Register("output-names", &output_names_str, "CSL of output node names"); - //po.Register("deriv-weights-rspecifiers", &deriv_weights_rspecifiers_str, - // "CSL of per-frame weights (only binary - 0 or 1) that specifies " - // "whether a frame's gradient must be backpropagated or not. " - // "Not specifying this is equivalent to specifying a vector of " - // "all 1s."); po.Read(argc, argv); @@ -295,12 +294,12 @@ int main(int argc, char *argv[]) { int32 num_outputs = (po.NumArgs() - 2) / 2; KALDI_ASSERT(num_outputs > 0); - std::vector deriv_weights_readers(num_outputs, - static_cast(NULL)); - std::vector dense_targets_readers(num_outputs, - static_cast(NULL)); - std::vector sparse_targets_readers(num_outputs, - static_cast(NULL)); + std::vector deriv_weights_readers( + num_outputs, static_cast(NULL)); + std::vector dense_targets_readers( + num_outputs, static_cast(NULL)); + std::vector sparse_targets_readers( + num_outputs, static_cast(NULL)); std::vector compress_targets(1, true); std::vector compress_targets_vector; @@ -338,7 +337,8 @@ int main(int argc, char *argv[]) { } if (targets_compress_formats.size() != num_outputs) { - KALDI_ERR << "Mismatch in length of targets-compress-formats and num-outputs; " + KALDI_ERR << "Mismatch in length of targets-compress-formats " + << " and num-outputs; " << targets_compress_formats.size() << " vs " << num_outputs; } @@ -349,25 +349,9 @@ int main(int argc, char *argv[]) { std::vector output_names(num_outputs); SplitStringToVector(output_names_str, ":,", true, &output_names); - //std::vector deriv_weights_rspecifiers; - //if (!deriv_weights_rspecifiers_str.empty()) { - // std::vector parts; - // SplitStringToVector(deriv_weights_rspecifiers_str, ":,", - // false, &deriv_weights_rspecifiers); - - // if (deriv_weights_rspecifiers.size() != num_outputs) { - // KALDI_ERR << "Expecting the number of deriv-weights-rspecifiers to " - // << "be equal to the number of outputs"; - // } - //} - std::vector targets_rspecifiers(num_outputs); std::vector deriv_weights_rspecifiers(num_outputs); - std::vector > fake_dense_targets(num_outputs); - std::vector > fake_deriv_weights(num_outputs); - std::vector fake_sparse_targets(num_outputs); - for (int32 n = 0; n < num_outputs; n++) { const std::string &targets_rspecifier = po.GetArg(2*n + 2); const std::string &deriv_weights_rspecifier = po.GetArg(2*n + 3); @@ -376,19 +360,24 @@ int main(int argc, char *argv[]) { deriv_weights_rspecifiers[n] = deriv_weights_rspecifier; if (output_dims[n] >= 0) { - sparse_targets_readers[n] = new RandomAccessPosteriorReader(targets_rspecifier); + sparse_targets_readers[n] = new RandomAccessPosteriorReader( + targets_rspecifier); } else { - dense_targets_readers[n] = new RandomAccessBaseFloatMatrixReader(targets_rspecifier); + dense_targets_readers[n] = new RandomAccessBaseFloatMatrixReader( + targets_rspecifier); } if (!deriv_weights_rspecifier.empty()) - deriv_weights_readers[n] = new RandomAccessBaseFloatVectorReader(deriv_weights_rspecifier); + deriv_weights_readers[n] = new RandomAccessBaseFloatVectorReader( + deriv_weights_rspecifier); KALDI_LOG << "output-name=" << output_names[n] << " target-dim=" << output_dims[n] << " targets-rspecifier=\"" << targets_rspecifiers[n] << "\"" - << " deriv-weights-rspecifier=\"" << deriv_weights_rspecifiers[n] << "\"" - << " compress-target=" << (compress_targets[n] ? "true" : "false") + << " deriv-weights-rspecifier=\"" + << deriv_weights_rspecifiers[n] << "\"" + << " compress-target=" + << (compress_targets[n] ? "true" : "false") << " target-compress-format=" << targets_compress_formats[n]; } @@ -405,7 +394,6 @@ int main(int argc, char *argv[]) { if (!ivector_rspecifier.empty()) { if (!ivector_reader.HasKey(key)) { KALDI_WARN << "No iVectors for utterance " << key; - num_err++; continue; } else { // this address will be valid until we call HasKey() or Value() @@ -424,9 +412,12 @@ int main(int argc, char *argv[]) { continue; } - std::vector* > dense_targets(num_outputs, static_cast* >(NULL)); - std::vector sparse_targets(num_outputs, static_cast(NULL)); - std::vector* > deriv_weights(num_outputs, static_cast* >(NULL)); + std::vector* > dense_targets( + num_outputs, static_cast* >(NULL)); + std::vector sparse_targets( + num_outputs, static_cast(NULL)); + std::vector* > deriv_weights( + num_outputs, static_cast* >(NULL)); int32 num_outputs_found = 0; for (int32 n = 0; n < num_outputs; n++) { @@ -435,26 +426,16 @@ int main(int argc, char *argv[]) { KALDI_WARN << "No dense targets matrix for key " << key << " in " << "rspecifier " << targets_rspecifiers[n] << " for output " << output_names[n]; - - if (add_fake_targets) { - fake_dense_targets[n].Resize(feats.NumRows(), -output_dims[n]); - dense_targets[n] = &(fake_dense_targets[n]); - - fake_deriv_weights[n].Resize(feats.NumRows()); - deriv_weights[n] = &(fake_deriv_weights[n]); - - num_outputs_found++; - } - continue; + break; } - const MatrixBase *target_matrix = &(dense_targets_readers[n]->Value(key)); + const MatrixBase *target_matrix = &( + dense_targets_readers[n]->Value(key)); if ((target_matrix->NumRows() - feats.NumRows()) > length_tolerance) { KALDI_WARN << "Length difference between feats " << feats.NumRows() << " and target matrix " << target_matrix->NumRows() << "exceeds tolerance " << length_tolerance; - num_err++; - continue; + break; } dense_targets[n] = target_matrix; @@ -463,22 +444,16 @@ int main(int argc, char *argv[]) { KALDI_WARN << "No sparse target matrix for key " << key << " in " << "rspecifier " << targets_rspecifiers[n] << " for output " << output_names[n]; - - if (add_fake_targets) { - fake_sparse_targets[n].resize(feats.NumRows()); - sparse_targets[n] = &(fake_sparse_targets[n]); - num_outputs_found++; - } - continue; + break; } const Posterior *posterior = &(sparse_targets_readers[n]->Value(key)); - if (abs(static_cast(posterior->size()) - feats.NumRows()) > length_tolerance + if (abs(static_cast(posterior->size()) - feats.NumRows()) + > length_tolerance || posterior->size() < feats.NumRows()) { KALDI_WARN << "Posterior has wrong size " << posterior->size() << " versus " << feats.NumRows(); - num_err++; - continue; + break; } sparse_targets[n] = posterior; @@ -489,10 +464,7 @@ int main(int argc, char *argv[]) { KALDI_WARN << "No deriv weights for key " << key << " in " << "rspecifier " << deriv_weights_rspecifiers[n] << " for output " << output_names[n]; - num_err++; - sparse_targets[n] = NULL; - dense_targets[n] = NULL; - continue; + break; } else { // this address will be valid until we call HasKey() or Value() // again. @@ -500,29 +472,20 @@ int main(int argc, char *argv[]) { } } - if (deriv_weights[n] && - (abs(feats.NumRows() - deriv_weights[n]->Dim()) > length_tolerance - || deriv_weights[n]->Dim() == 0)) { + if (deriv_weights[n] + && (abs(feats.NumRows() - deriv_weights[n]->Dim()) + > length_tolerance + || deriv_weights[n]->Dim() == 0)) { KALDI_WARN << "Length difference between feats " << feats.NumRows() << " and deriv weights " << deriv_weights[n]->Dim() << " exceeds tolerance " << length_tolerance; - num_err++; - sparse_targets[n] = NULL; - dense_targets[n] = NULL; - deriv_weights[n] = NULL; - continue; + break; } num_outputs_found++; } - if (num_outputs_found == 0) { - KALDI_WARN << "No output found for key " << key; - num_err++; - continue; - } - - if (add_fake_targets && num_outputs_found != output_names.size()) { + if (num_outputs_found != num_outputs) { KALDI_WARN << "Not all outputs found for key " << key; num_err++; continue; @@ -553,7 +516,8 @@ int main(int argc, char *argv[]) { KALDI_LOG << "Finished generating examples, " << "successfully processed " << num_done - << " feature files, wrote at most " << max_num_egs_written << " examples, " + << " feature files, wrote at most " << max_num_egs_written + << " examples, " << " with at most " << max_num_frames_written << " egs in total; " << num_err << " files had errors."; @@ -563,5 +527,3 @@ int main(int argc, char *argv[]) { return -1; } } - - From cfb71e500eaec2b7796277ac5945b537e9bd6777 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 20:00:07 -0500 Subject: [PATCH 144/530] asr_diarization: Minor fixes to get_egs_multiple_targets --- .../steps/nnet3/get_egs_multiple_targets.py | 183 +++++++++++++----- 1 file changed, 132 insertions(+), 51 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py b/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py index fa8a68f5c64..72b0cb4edd3 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py +++ b/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py @@ -22,7 +22,7 @@ logger.setLevel(logging.INFO) handler = logging.StreamHandler() handler.setLevel(logging.INFO) -formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " +formatter = logging.Formatter("%(asctime)s [%(pathname)s:%(lineno)s - " "%(funcName)s - %(levelname)s ] %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) @@ -133,6 +133,9 @@ def get_args(): parser.add_argument("--srand", type=int, default=0, help="Rand seed for nnet3-copy-egs and " "nnet3-shuffle-egs") + parser.add_argument("--generate-egs-scp", type=str, + default=False, action=common_lib.StrToBoolAction, + help="Generate scp files in addition to archives") parser.add_argument("--targets-parameters", type=str, action='append', required=True, dest='targets_para_array', @@ -186,9 +189,10 @@ def check_for_required_files(feat_dir, targets_scps, online_ivector_dir=None): '{0}/cmvn.scp'.format(feat_dir)] if online_ivector_dir is not None: required_files.append('{0}/ivector_online.scp'.format( - online_ivector_dir)) + online_ivector_dir)) required_files.append('{0}/ivector_period'.format( - online_ivector_dir)) + online_ivector_dir)) + required_files.extend(targets_scps) for file in required_files: if not os.path.isfile(file): @@ -229,9 +233,9 @@ def parse_targets_parameters_array(para_array): if not os.path.isfile(t.targets_scp): raise Exception("Expected {0} to exist.".format(t.targets_scp)) - if (t.target_type == "dense"): + if t.target_type == "dense": dim = common_lib.get_feat_dim_from_scp(t.targets_scp) - if (t.dim != -1 and t.dim != dim): + if t.dim != -1 and t.dim != dim: raise Exception('Mismatch in --dim provided and feat dim for ' 'file {0}; {1} vs {2}'.format(t.targets_scp, t.dim, dim)) @@ -272,7 +276,10 @@ def sample_utts(feat_dir, num_utts_subset, min_duration, exclude_list=None): for utt in utts2add: sampled_utts.append(utt) - index = index + 1 + else: + logger.info("Skipping utterance %s of length %f", + utt2uniq[utt2durs[index][0]], utt2durs[index][1]) + index = index + 1 num_trials = num_trials + 1 if exclude_list is not None: assert(len(set(exclude_list).intersection(sampled_utts)) == 0) @@ -311,13 +318,13 @@ def get_feat_ivector_strings(dir, feat_dir, split_feat_dir, "{dir}/valid_uttlist {sdir}/JOB/feats.scp | " "apply-cmvn {cmvn} --utt2spk=ark:{sdir}/JOB/utt2spk " "scp:{sdir}/JOB/cmvn.scp scp:- ark:- |".format( - dir=dir, sdir=split_feat_dir, - cmvn=cmvn_opt_string)) + dir=dir, sdir=split_feat_dir, + cmvn=cmvn_opt_string)) valid_feats = ("ark,s,cs:utils/filter_scp.pl {dir}/valid_uttlist " "{fdir}/feats.scp | " "apply-cmvn {cmvn} --utt2spk=ark:{fdir}/utt2spk " "scp:{fdir}/cmvn.scp scp:- ark:- |".format( - dir=dir, fdir=feat_dir, cmvn=cmvn_opt_string)) + dir=dir, fdir=feat_dir, cmvn=cmvn_opt_string)) train_subset_feats = ("ark,s,cs:utils/filter_scp.pl " "{dir}/train_subset_uttlist {fdir}/feats.scp | " "apply-cmvn {cmvn} --utt2spk=ark:{fdir}/utt2spk " @@ -470,7 +477,24 @@ def generate_valid_train_subset_egs(dir, targets_parameters, num_train_egs_combine, num_valid_egs_combine, num_egs_diagnostic, cmd, - num_jobs=1): + num_jobs=1, + generate_egs_scp=False): + + if generate_egs_scp: + valid_combine_output = ("ark,scp:{0}/valid_combine.egs," + "{0}/valid_combine.egs.scp".format(dir)) + valid_diagnostic_output = ("ark,scp:{0}/valid_diagnostic.egs," + "{0}/valid_diagnostic.egs.scp".format(dir)) + train_combine_output = ("ark,scp:{0}/train_combine.egs," + "{0}/train_combine.egs.scp".format(dir)) + train_diagnostic_output = ("ark,scp:{0}/train_diagnostic.egs," + "{0}/train_diagnostic.egs.scp".format(dir)) + else: + valid_combine_output = "ark:{0}/valid_combine.egs".format(dir) + valid_diagnostic_output = "ark:{0}/valid_diagnostic.egs".format(dir) + train_combine_output = "ark:{0}/train_combine.egs".format(dir) + train_diagnostic_output = "ark:{0}/train_diagnostic.egs".format(dir) + wait_pids = [] logger.info("Creating validation and train subset examples.") @@ -481,7 +505,8 @@ def generate_valid_train_subset_egs(dir, targets_parameters, valid_pid = common_lib.run_kaldi_command( """{cmd} JOB=1:{nj} {dir}/log/create_valid_subset.JOB.log \ nnet3-get-egs-multiple-targets {v_iv_opt} {v_egs_opt} "{v_feats}" \ - {targets} ark:{dir}/valid_all.JOB.egs""".format( + {targets} ark,scp:{dir}/valid_all.JOB.egs,""" + """{dir}/valid_all.JOB.egs.scp""".format( cmd=cmd, nj=num_jobs, dir=dir, v_egs_opt=egs_opts['valid_egs_opts'], v_iv_opt=feat_ivector_strings['valid_ivector_opts'], @@ -495,7 +520,8 @@ def generate_valid_train_subset_egs(dir, targets_parameters, train_pid = common_lib.run_kaldi_command( """{cmd} JOB=1:{nj} {dir}/log/create_train_subset.JOB.log \ nnet3-get-egs-multiple-targets {t_iv_opt} {v_egs_opt} "{t_feats}" \ - {targets} ark:{dir}/train_subset_all.JOB.egs""".format( + {targets} ark,scp:{dir}/train_subset_all.JOB.egs,""" + """{dir}/train_subset_all.JOB.egs.scp""".format( cmd=cmd, nj=num_jobs, dir=dir, v_egs_opt=egs_opts['valid_egs_opts'], t_iv_opt=feat_ivector_strings['train_subset_ivector_opts'], @@ -514,50 +540,56 @@ def generate_valid_train_subset_egs(dir, targets_parameters, if pid.returncode != 0: raise Exception(stderr) - valid_egs_all = ' '.join(['{dir}/valid_all.{n}.egs'.format(dir=dir, n=n) - for n in range(1, num_jobs + 1)]) - train_subset_egs_all = ' '.join(['{dir}/train_subset_all.{n}.egs'.format( - dir=dir, n=n) - for n in range(1, num_jobs + 1)]) + valid_egs_all = ' '.join( + ['{dir}/valid_all.{n}.egs.scp'.format(dir=dir, n=n) + for n in range(1, num_jobs + 1)]) + train_subset_egs_all = ' '.join( + ['{dir}/train_subset_all.{n}.egs.scp'.format(dir=dir, n=n) + for n in range(1, num_jobs + 1)]) wait_pids = [] logger.info("... Getting subsets of validation examples for diagnostics " " and combination.") pid = common_lib.run_kaldi_command( """{cmd} {dir}/log/create_valid_subset_combine.log \ - cat {valid_egs_all} \| nnet3-subset-egs --n={nve_combine} ark:- \ - ark:{dir}/valid_combine.egs""".format( + cat {valid_egs_all} \| nnet3-subset-egs --n={nve_combine} \ + scp:- {valid_combine_output}""".format( cmd=cmd, dir=dir, valid_egs_all=valid_egs_all, - nve_combine=num_valid_egs_combine), + nve_combine=num_valid_egs_combine, + valid_combine_output=valid_combine_output), wait=False) wait_pids.append(pid) pid = common_lib.run_kaldi_command( """{cmd} {dir}/log/create_valid_subset_diagnostic.log \ - cat {valid_egs_all} \| nnet3-subset-egs --n={ne_diagnostic} ark:- \ - ark:{dir}/valid_diagnostic.egs""".format( + cat {valid_egs_all} \| nnet3-subset-egs --n={ne_diagnostic} \ + scp:- {valid_diagnostic_output}""".format( cmd=cmd, dir=dir, valid_egs_all=valid_egs_all, - ne_diagnostic=num_egs_diagnostic), + ne_diagnostic=num_egs_diagnostic, + valid_diagnostic_output=valid_diagnostic_output), wait=False) wait_pids.append(pid) pid = common_lib.run_kaldi_command( """{cmd} {dir}/log/create_train_subset_combine.log \ cat {train_subset_egs_all} \| \ - nnet3-subset-egs --n={nte_combine} ark:- \ - ark:{dir}/train_combine.egs""".format( + nnet3-subset-egs --n={nte_combine} \ + scp:- {train_combine_output}""".format( cmd=cmd, dir=dir, train_subset_egs_all=train_subset_egs_all, - nte_combine=num_train_egs_combine), + nte_combine=num_train_egs_combine, + train_combine_output=train_combine_output), wait=False) wait_pids.append(pid) pid = common_lib.run_kaldi_command( """{cmd} {dir}/log/create_train_subset_diagnostic.log \ cat {train_subset_egs_all} \| \ - nnet3-subset-egs --n={ne_diagnostic} ark:- \ - ark:{dir}/train_diagnostic.egs""".format( + nnet3-subset-egs --n={ne_diagnostic} \ + scp:- {train_diagnostic_output}""".format( cmd=cmd, dir=dir, train_subset_egs_all=train_subset_egs_all, - ne_diagnostic=num_egs_diagnostic), wait=False) + ne_diagnostic=num_egs_diagnostic, + train_diagnostic_output=train_diagnostic_output), + wait=False) wait_pids.append(pid) for pid in wait_pids: @@ -569,6 +601,14 @@ def generate_valid_train_subset_egs(dir, targets_parameters, """cat {dir}/valid_combine.egs {dir}/train_combine.egs > \ {dir}/combine.egs""".format(dir=dir)) + if generate_egs_scp: + common_lib.run_kaldi_command( + """cat {dir}/valid_combine.egs.scp {dir}/train_combine.egs.scp > \ + {dir}/combine.egs.scp""".format(dir=dir)) + common_lib.run_kaldi_command( + "rm {dir}/valid_combine.egs.scp {dir}/train_combine.egs.scp" + "".format(dir=dir)) + # perform checks for file_name in ('{0}/combine.egs {0}/train_diagnostic.egs ' '{0}/valid_diagnostic.egs'.format(dir).split()): @@ -577,6 +617,7 @@ def generate_valid_train_subset_egs(dir, targets_parameters, # clean-up for x in ('{0}/valid_all.*.egs {0}/train_subset_all.*.egs ' + '{0}/valid_all.*.egs.scp {0}/train_subset_all.*.egs.scp ' '{0}/train_combine.egs ' '{0}/valid_combine.egs'.format(dir).split()): for file_name in glob.glob(x): @@ -591,7 +632,8 @@ def generate_training_examples_internal(dir, targets_parameters, feat_dir, samples_per_iter, cmd, srand=0, reduce_frames_per_eg=True, only_shuffle=False, - dry_run=False): + dry_run=False, + generate_egs_scp=False): # The examples will go round-robin to egs_list. Note: we omit the # 'normalization.fst' argument while creating temporary egs: the phase of @@ -605,8 +647,8 @@ def generate_training_examples_internal(dir, targets_parameters, feat_dir, num_archives = (num_frames) / (frames_per_eg * samples_per_iter) + 1 reduced = False - while (reduce_frames_per_eg and frames_per_eg > 1 and - num_frames / ((frames_per_eg-1)*samples_per_iter) == 0): + while (reduce_frames_per_eg and frames_per_eg > 1 + and num_frames / ((frames_per_eg-1)*samples_per_iter) == 0): frames_per_eg -= 1 num_archives = 1 reduced = True @@ -652,9 +694,9 @@ def generate_training_examples_internal(dir, targets_parameters, feat_dir, for y in range(1, num_jobs + 1)]) split_feat_dir = "{0}/split{1}".format(feat_dir, num_jobs) - egs_list = ' '.join(['ark:{dir}/egs_orig.JOB.{ark_num}.ark'.format( - dir=dir, ark_num=x) - for x in range(1, num_archives_intermediate + 1)]) + egs_list = ' '.join( + ['ark:{dir}/egs_orig.JOB.{ark_num}.ark'.format(dir=dir, ark_num=x) + for x in range(1, num_archives_intermediate + 1)]) if not only_shuffle: common_lib.run_kaldi_command( @@ -678,20 +720,43 @@ def generate_training_examples_internal(dir, targets_parameters, feat_dir, if archives_multiple == 1: # there are no intermediate archives so just shuffle egs across # jobs and dump them into a single output + + if generate_egs_scp: + output_archive = ("ark,scp:{dir}/egs.JOB.ark," + "{dir}/egs.JOB.scp".format(dir=dir)) + else: + output_archive = "ark:{dir}/egs.JOB.ark".format(dir=dir) + common_lib.run_kaldi_command( """{cmd} --max-jobs-run {msjr} JOB=1:{nai} \ {dir}/log/shuffle.JOB.log \ nnet3-shuffle-egs --srand=$[JOB+{srand}] \ - "ark:cat {egs_list}|" ark:{dir}/egs.JOB.ark""".format( + "ark:cat {egs_list}|" {output_archive}""".format( cmd=cmd, msjr=num_jobs, nai=num_archives_intermediate, srand=srand, - dir=dir, egs_list=egs_list)) + dir=dir, egs_list=egs_list, + output_archive=output_archive)) + + if generate_egs_scp: + out_egs_handle = open("{0}/egs.scp".format(dir), 'w') + for i in range(1, num_archives_intermediate + 1): + for line in open("{0}/egs.{1}.scp".format(dir, i)): + print (line, file=out_egs_handle) + out_egs_handle.close() else: # there are intermediate archives so we shuffle egs across jobs # and split them into archives_multiple output archives - output_archives = ' '.join(["ark:{dir}/egs.JOB.{ark_num}.ark".format( - dir=dir, ark_num=x) - for x in range(1, archives_multiple + 1)]) + if generate_egs_scp: + output_archives = ' '.join( + ["ark,scp:{dir}/egs.JOB.{ark_num}.ark," + "{dir}/egs.JOB.{ark_num}.scp".format( + dir=dir, ark_num=x) + for x in range(1, archives_multiple + 1)]) + else: + output_archives = ' '.join( + ["ark:{dir}/egs.JOB.{ark_num}.ark".format( + dir=dir, ark_num=x) + for x in range(1, archives_multiple + 1)]) # archives were created as egs.x.y.ark # linking them to egs.i.ark format which is expected by the training # scripts @@ -712,6 +777,14 @@ def generate_training_examples_internal(dir, targets_parameters, feat_dir, nai=num_archives_intermediate, srand=srand, dir=dir, egs_list=egs_list, oarks=output_archives)) + if generate_egs_scp: + out_egs_handle = open("{0}/egs.scp".format(dir), 'w') + for i in range(1, num_archives_intermediate + 1): + for j in range(1, archives_multiple + 1): + for line in open("{0}/egs.{1}.{2}.scp".format(dir, i, j)): + print (line, file=out_egs_handle) + out_egs_handle.close() + cleanup(dir, archives_multiple) return {'num_frames': num_frames, 'num_archives': num_archives, @@ -744,7 +817,8 @@ def generate_training_examples(dir, targets_parameters, feat_dir, feat_ivector_strings, egs_opts, frame_shift, frames_per_eg, samples_per_iter, cmd, num_jobs, srand=0, - only_shuffle=False, dry_run=False): + only_shuffle=False, dry_run=False, + generate_egs_scp=False): # generate the training options string with the given chunk_width train_egs_opts = egs_opts['train_egs_opts'] @@ -769,7 +843,8 @@ def generate_training_examples(dir, targets_parameters, feat_dir, samples_per_iter=samples_per_iter, cmd=cmd, srand=srand, only_shuffle=only_shuffle, - dry_run=dry_run) + dry_run=dry_run, + generate_egs_scp=generate_egs_scp) return info @@ -792,13 +867,15 @@ def generate_egs(egs_dir, feat_dir, targets_para_array, cmvn_opts=None, apply_cmvn_sliding=False, compress_input=True, input_compress_format=0, - num_utts_subset=300, + num_utts_subset_train=300, + num_utts_subset_valid=300, num_train_egs_combine=1000, num_valid_egs_combine=0, num_egs_diagnostic=4000, samples_per_iter=400000, num_jobs=6, - srand=0): + srand=0, + generate_egs_scp=False): for directory in '{0}/log {0}/info'.format(egs_dir).split(): create_directory(directory) @@ -817,9 +894,9 @@ def generate_egs(egs_dir, feat_dir, targets_para_array, frame_shift = data_lib.get_frame_shift(feat_dir) min_duration = frames_per_eg * frame_shift - valid_utts = sample_utts(feat_dir, num_utts_subset, min_duration)[0] - train_subset_utts = sample_utts(feat_dir, num_utts_subset, min_duration, - exclude_list=valid_utts)[0] + valid_utts = sample_utts(feat_dir, num_utts_subset_valid, min_duration)[0] + train_subset_utts = sample_utts(feat_dir, num_utts_subset_train, + min_duration, exclude_list=valid_utts)[0] train_utts, train_utts_durs = sample_utts(feat_dir, None, -1, exclude_list=valid_utts) @@ -857,7 +934,8 @@ def generate_egs(egs_dir, feat_dir, targets_para_array, num_valid_egs_combine=num_valid_egs_combine, num_egs_diagnostic=num_egs_diagnostic, cmd=cmd, - num_jobs=num_jobs) + num_jobs=num_jobs, + generate_egs_scp=generate_egs_scp) logger.info("Generating training examples on disk.") info = generate_training_examples( @@ -873,7 +951,8 @@ def generate_egs(egs_dir, feat_dir, targets_para_array, num_jobs=num_jobs, srand=srand, only_shuffle=True if stage > 3 else False, - dry_run=True if stage > 4 else False) + dry_run=True if stage > 4 else False, + generate_egs_scp=generate_egs_scp) info['feat_dim'] = feat_ivector_strings['feat_dim'] info['ivector_dim'] = feat_ivector_strings['ivector_dim'] @@ -898,13 +977,15 @@ def main(): apply_cmvn_sliding=args.apply_cmvn_sliding, compress_input=args.compress_input, input_compress_format=args.input_compress_format, - num_utts_subset=args.num_utts_subset, + num_utts_subset_train=args.num_utts_subset_train, + num_utts_subset_valid=args.num_utts_subset_valid, num_train_egs_combine=args.num_train_egs_combine, num_valid_egs_combine=args.num_valid_egs_combine, num_egs_diagnostic=args.num_egs_diagnostic, samples_per_iter=args.samples_per_iter, num_jobs=args.num_jobs, - srand=args.srand) + srand=args.srand, + generate_egs_scp=args.generate_egs_scp) if __name__ == "__main__": From 54cc83675e91555c8a099a293f00ce1d14165190 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 20:00:55 -0500 Subject: [PATCH 145/530] asr_diarization: Add objective-scale to xconfig output --- .../steps/libs/nnet3/xconfig/basic_layers.py | 51 +++++++++++++++---- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py index 38ff36622ec..f74137da48b 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py @@ -448,7 +448,8 @@ def set_default_configs(self): 'max-change' : 1.5, 'param-stddev' : 0.0, 'bias-stddev' : 0.0, - 'output-delay' : 0 + 'output-delay' : 0, + 'objective-scale': 1.0 } def check_configs(self): @@ -513,6 +514,7 @@ def get_full_config(self): bias_stddev = self.config['bias-stddev'] output_delay = self.config['output-delay'] max_change = self.config['max-change'] + objective_scale = self.config['objective-scale'] # note: ref.config is used only for getting the left-context and # right-context of the network; @@ -553,6 +555,18 @@ def get_full_config(self): ans.append((config_name, line)) cur_node = '{0}.fixed-scale'.format(self.name) + if objective_scale != 1.0: + line = ('component name={0}.objective-scale' + ' type=ScaleGradientComponent scale={1} dim={2}' + ''.format(self.name, objective_scale, output_dim)) + ans.append((config_name, line)) + + line = ('component-node name={0}.objective-scale' + ' component={0}.objective-scale input={1}' + ''.format(self.name, cur_node)) + ans.append((config_name, line)) + cur_node = '{0}.objective-scale'.format(self.name) + if include_log_softmax: line = ('component name={0}.log-softmax' ' type=LogSoftmaxComponent dim={1}' @@ -611,7 +625,24 @@ def set_default_configs(self): 'max-change' : 0.75, 'self-repair-scale' : 1.0e-05, 'target-rms' : 1.0, - 'ng-affine-options' : ''} + 'ng-affine-options' : '', + 'add-log-stddev' : False } + + def set_derived_configs(self): + output_dim = self.config['dim'] + # If not set, the output-dim defaults to the input-dim. + if output_dim <= 0: + self.config['dim'] = self.descriptors['input']['dim'] + + if self.config['add-log-stddev']: + split_layer_name = self.layer_type.split('-') + assert split_layer_name[-1] == 'layer' + nonlinearities = split_layer_name[:-1] + + for nonlinearity in nonlinearities: + if nonlinearity == "renorm": + output_dim += 1 + self.config['output-dim'] = output_dim def check_configs(self): if self.config['dim'] < 0: @@ -633,12 +664,7 @@ def output_name(self, auxiliary_output=None): return '{0}.{1}'.format(self.name, last_nonlinearity) def output_dim(self, auxiliary_output = None): - output_dim = self.config['dim'] - # If not set, the output-dim defaults to the input-dim. - if output_dim <= 0: - output_dim = self.descriptors['input']['dim'] - return output_dim - + return self.config['output-dim'] def get_full_config(self): ans = [] @@ -668,11 +694,13 @@ def _generate_config(self): return self._add_components(input_desc, input_dim, nonlinearities) def _add_components(self, input_desc, input_dim, nonlinearities): - output_dim = self.output_dim() + output_dim = self.config['dim'] self_repair_scale = self.config['self-repair-scale'] target_rms = self.config['target-rms'] max_change = self.config['max-change'] ng_opt_str = self.config['ng-affine-options'] + add_log_stddev = ("true" if self.config['add-log-stddev'] + else "false") configs = [] # First the affine node. @@ -718,8 +746,11 @@ def _add_components(self, input_desc, input_dim, nonlinearities): line = ('component name={0}.{1}' ' type=NormalizeComponent dim={2}' ' target-rms={3}' + ' add-log-stddev={4}' ''.format(self.name, nonlinearity, output_dim, - target_rms)) + target_rms, add_log_stddev)) + if self.config['add-log-stddev']: + output_dim += 1 else: raise xparser_error("Unknown nonlinearity type:" From eb04aabe61a8fa12f80ce22dd2dea2f95450577a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 20:01:39 -0500 Subject: [PATCH 146/530] asr_diarization: Support multitask egs at script level --- egs/wsj/s5/steps/libs/nnet3/train/common.py | 12 ++ .../nnet3/train/frame_level_objf/common.py | 111 +++++++++++++----- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 4 +- 3 files changed, 99 insertions(+), 28 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index 90ee209a092..7ae44cdffae 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -559,6 +559,18 @@ def __init__(self): action=common_lib.NullstrToNoneAction, help="""String to provide options directly to steps/nnet3/get_egs.sh script""") + self.parser.add_argument("--egs.use-multitask-egs", type=str, + dest='use_multitask_egs', + default=True, choices=["true", "false"], + action=common_lib.StrToBoolAction, + help="""Use mutlitask egs created using + allocate_multilingual_egs.py.""") + self.parser.add_argument("--egs.rename-multitask-outputs", type=str, + dest='rename_multitask_outputs', + default=True, choices=["true", "false"], + action=common_lib.StrToBoolAction, + help="""Rename multitask outputs created using + allocate_multilingual_egs.py.""") # trainer options self.parser.add_argument("--trainer.srand", type=int, dest='srand', diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index b8a28d2e2bf..508445e331e 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -33,7 +33,8 @@ def train_new_models(dir, iter, srand, num_jobs, frames_per_eg=-1, min_deriv_time=None, max_deriv_time=None, min_left_context=None, min_right_context=None, - extra_egs_copy_cmd=""): + extra_egs_copy_cmd="", use_multitask_egs=False, + rename_multitask_outputs=False): """ Called from train_one_iteration(), this model does one iteration of training with 'num_jobs' jobs, and writes files like exp/tdnn_a/24.{1,2,3,..}.raw @@ -98,11 +99,49 @@ def train_new_models(dir, iter, srand, num_jobs, cache_write_opt = "--write-cache={dir}/cache.{iter}".format( dir=dir, iter=iter+1) - minibatch_opts = "--minibatch-size={0}".format(minibatch_size) - - if chunk_level_training: - minibatch_opts = "{0} --measure-output-frames=false".format( - minibatch_size) + if use_multitask_egs: + output_rename_opt = "" + if rename_multitask_outputs: + output_rename_opt = ( + "--output=ark:{egs_dir}" + "/output.{archive_index}".format( + egs_dir=egs_dir, archive_index=archive_index)) + egs_rspecifier = ( + "ark,bg:nnet3-copy-egs {frame_opts} {context_opts} " + "{output_rename_opt} " + "--weights=ark:{egs_dir}/weight.{archive_index} " + "scp:{egs_dir}/egs.{archive_index} ark:- | " + "{extra_egs_copy_cmd}" + "nnet3-merge-egs --minibatch-size={minibatch_size} " + "--measure-output-frames=false " + "--discard-partial-minibatches=true ark:- ark:- | " + "nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} " + "ark:- ark:- |".format( + frame_opts=("" if chunk_level_training + else "--frame={0}".format(frame)), + context_opts=context_opts, egs_dir=egs_dir, + output_rename_opt=output_rename_opt, + archive_index=archive_index, + shuffle_buffer_size=shuffle_buffer_size, + extra_egs_copy_cmd=extra_egs_copy_cmd, + minibatch_size=minibatch_size)) + else: + egs_rspecifier = ( + "ark,bg:nnet3-copy-egs {frame_opts} {context_opts} " + "ark:{egs_dir}/egs.{archive_index}.ark ark:- |" + "{extra_egs_copy_cmd}" + "nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} " + "--srand={srand} ark:- ark:- | " + "nnet3-merge-egs --minibatch-size={minibatch_size} " + "--measure-output-frames=false " + "--discard-partial-minibatches=true ark:- ark:- |".format( + frame_opts=("" if chunk_level_training + else "--frame={0}".format(frame)), + context_opts=context_opts, egs_dir=egs_dir, + archive_index=archive_index, + shuffle_buffer_size=shuffle_buffer_size, + extra_egs_copy_cmd=extra_egs_copy_cmd, + minibatch_size=minibatch_size)) process_handle = common_lib.run_job( """{command} {train_queue_opt} {dir}/log/train.{iter}.{job}.log \ @@ -111,12 +150,7 @@ def train_new_models(dir, iter, srand, num_jobs, --momentum={momentum} \ --max-param-change={max_param_change} \ {deriv_time_opts} "{raw_model}" \ - "ark,bg:nnet3-copy-egs {frame_opts} {context_opts} """ - """ark:{egs_dir}/egs.{archive_index}.ark ark:- |{extra_egs_copy_cmd}""" - """nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} """ - """--srand={srand} ark:- ark:- | """ - """nnet3-merge-egs {minibatch_opts} """ - """--discard-partial-minibatches=true ark:- ark:- |" \ + "{egs_rspecifier}" \ {dir}/{next_iter}.{job}.raw""".format( command=run_opts.command, train_queue_opt=run_opts.train_queue_opt, @@ -126,17 +160,10 @@ def train_new_models(dir, iter, srand, num_jobs, parallel_train_opts=run_opts.parallel_train_opts, cache_read_opt=cache_read_opt, cache_write_opt=cache_write_opt, - frame_opts=("" - if chunk_level_training - else "--frame={0}".format(frame)), - minibatch_opts=minibatch_opts, momentum=momentum, max_param_change=max_param_change, deriv_time_opts=" ".join(deriv_time_opts), - raw_model=raw_model_string, context_opts=context_opts, - egs_dir=egs_dir, archive_index=archive_index, - shuffle_buffer_size=shuffle_buffer_size, - extra_egs_copy_cmd=extra_egs_copy_cmd), - wait=False) + raw_model=raw_model_string, + egs_rspecifier=egs_rspecifier), wait=False) processes.append(process_handle) @@ -166,7 +193,8 @@ def train_one_iteration(dir, iter, srand, egs_dir, shrinkage_value=1.0, dropout_proportions=None, get_raw_nnet_from_am=True, background_process_handler=None, - extra_egs_copy_cmd=""): + extra_egs_copy_cmd="", use_multitask_egs=False, + rename_multitask_outputs=False): """ Called from steps/nnet3/train_*.py scripts for one iteration of neural network training @@ -309,7 +337,9 @@ def train_one_iteration(dir, iter, srand, egs_dir, max_deriv_time=max_deriv_time, min_left_context=min_left_context, min_right_context=min_right_context, - extra_egs_copy_cmd=extra_egs_copy_cmd) + extra_egs_copy_cmd=extra_egs_copy_cmd, + use_multitask_egs=use_multitask_egs, + rename_multitask_outputs=rename_multitask_outputs) [models_to_average, best_model] = common_train_lib.get_successful_models( num_jobs, '{0}/log/train.{1}.%.log'.format(dir, iter)) @@ -419,15 +449,22 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, context_opts = "--left-context={lc} --right-context={rc}".format( lc=left_context, rc=right_context) + if os.path.isfile("{0}/valid_diagnostic.egs".format(egs_dir)): + valid_diagnostic_egs = "ark:{0}/valid_diagnostic.egs".format(egs_dir) + else: + valid_diagnostic_egs = "scp:{0}/valid_diagnostic.egs.1".format( + egs_dir) + common_lib.run_job( """ {command} {dir}/log/compute_prob_valid.{iter}.log \ nnet3-compute-prob "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ - ark:{egs_dir}/valid_diagnostic.egs ark:- |{extra_egs_copy_cmd} \ + {egs_rspecifier} ark:- |{extra_egs_copy_cmd} \ nnet3-merge-egs --minibatch-size={mb_size} ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, iter=iter, + egs_rspecifier=valid_diagnostic_egs, context_opts=context_opts, mb_size=mb_size, model=model, @@ -435,15 +472,22 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, extra_egs_copy_cmd=extra_egs_copy_cmd), wait=wait, background_process_handler=background_process_handler) + if os.path.isfile("{0}/train_diagnostic.egs".format(egs_dir)): + train_diagnostic_egs = "ark:{0}/train_diagnostic.egs".format(egs_dir) + else: + train_diagnostic_egs = "scp:{0}/train_diagnostic.egs.1".format( + egs_dir) + common_lib.run_job( """{command} {dir}/log/compute_prob_train.{iter}.log \ nnet3-compute-prob "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ - ark:{egs_dir}/train_diagnostic.egs ark:- |{extra_egs_copy_cmd} \ + {egs_rspecifier} ark:- | {extra_egs_copy_cmd} \ nnet3-merge-egs --minibatch-size={mb_size} ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, iter=iter, + egs_rspecifier=train_diagnostic_egs, context_opts=context_opts, mb_size=mb_size, model=model, @@ -468,16 +512,23 @@ def compute_progress(dir, iter, egs_dir, left_context, right_context, context_opts = "--left-context={lc} --right-context={rc}".format( lc=left_context, rc=right_context) + if os.path.isfile("{0}/train_diagnostic.egs".format(egs_dir)): + train_diagnostic_egs = "ark:{0}/train_diagnostic.egs".format(egs_dir) + else: + train_diagnostic_egs = "scp:{0}/train_diagnostic.egs.1".format( + egs_dir) + common_lib.run_job( """{command} {dir}/log/progress.{iter}.log \ nnet3-info "{model}" '&&' \ nnet3-show-progress --use-gpu=no "{prev_model}" "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ - ark:{egs_dir}/train_diagnostic.egs ark:- |{extra_egs_copy_cmd} \ + {egs_rspecifier} ark:- |{extra_egs_copy_cmd} \ nnet3-merge-egs --minibatch-size={mb_size} ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, iter=iter, + egs_rspecifier=train_diagnostic_egs, model=model, context_opts=context_opts, mb_size=mb_size, @@ -532,19 +583,25 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir, context_opts = "--left-context={lc} --right-context={rc}".format( lc=left_context, rc=right_context) + if os.path.isfile("{0}/combine.egs".format(egs_dir)): + combine_egs = "ark:{0}/combine.egs".format(egs_dir) + else: + combine_egs = "scp:{0}/combine.egs.1".format(egs_dir) + common_lib.run_job( """{command} {combine_queue_opt} {dir}/log/combine.log \ nnet3-combine --num-iters=40 \ --enforce-sum-to-one=true --enforce-positive-weights=true \ --verbose=3 {raw_models} \ "ark,bg:nnet3-copy-egs {context_opts} \ - ark:{egs_dir}/combine.egs ark:- |{extra_egs_copy_cmd} \ + {egs_rspecifier} ark:- |{extra_egs_copy_cmd} \ nnet3-merge-egs --measure-output-frames=false \ --minibatch-size={mbsize} ark:- ark:- |" \ "{out_model}" """.format(command=run_opts.command, combine_queue_opt=run_opts.combine_queue_opt, dir=dir, raw_models=" ".join(raw_model_strings), + egs_rspecifier=combine_egs, context_opts=context_opts, mbsize=mbsize, out_model=out_model, diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index 4a2424e54f5..2bea66dbcbf 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -466,7 +466,9 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): run_opts=run_opts, get_raw_nnet_from_am=False, background_process_handler=background_process_handler, - extra_egs_copy_cmd=args.extra_egs_copy_cmd) + extra_egs_copy_cmd=args.extra_egs_copy_cmd, + use_multitask_egs=args.use_multitask_egs, + rename_multitask_outputs=args.rename_multitask_outputs) if args.cleanup: # do a clean up everythin but the last 2 models, under certain From 8174b3ce5ec5371c7f491f84aa3ac5b3f3fbfc52 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 20:02:19 -0500 Subject: [PATCH 147/530] asr_diarization: Support multi output training diagnostics correctly --- src/nnet3/nnet-chain-training.cc | 4 ++-- src/nnet3/nnet-training.cc | 5 ++--- src/nnet3/nnet-training.h | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/nnet3/nnet-chain-training.cc b/src/nnet3/nnet-chain-training.cc index d9d43006601..91dc0d8ec19 100644 --- a/src/nnet3/nnet-chain-training.cc +++ b/src/nnet3/nnet-chain-training.cc @@ -137,14 +137,14 @@ void NnetChainTrainer::ProcessOutputs(const NnetChainExample &eg, computer->AcceptOutputDeriv(sup.name, &nnet_output_deriv); objf_info_[sup.name].UpdateStats(sup.name, opts_.nnet_config.print_interval, - num_minibatches_processed_++, tot_weight, tot_objf, tot_l2_term); - + if (use_xent) { xent_deriv.Scale(opts_.chain_config.xent_regularize); computer->AcceptOutputDeriv(xent_name, &xent_deriv); } } + num_minibatches_processed_++; } void NnetChainTrainer::UpdateParamsWithMaxChange() { diff --git a/src/nnet3/nnet-training.cc b/src/nnet3/nnet-training.cc index bdbe244a648..803f3570b1a 100644 --- a/src/nnet3/nnet-training.cc +++ b/src/nnet3/nnet-training.cc @@ -95,10 +95,10 @@ void NnetTrainer::ProcessOutputs(const NnetExample &eg, supply_deriv, computer, &tot_weight, &tot_objf, deriv_weights); objf_info_[io.name].UpdateStats(io.name, config_.print_interval, - num_minibatches_processed_++, tot_weight, tot_objf); } } + num_minibatches_processed_++; } void NnetTrainer::UpdateParamsWithMaxChange() { @@ -226,11 +226,10 @@ void NnetTrainer::PrintMaxChangeStats() const { void ObjectiveFunctionInfo::UpdateStats( const std::string &output_name, int32 minibatches_per_phase, - int32 minibatch_counter, BaseFloat this_minibatch_weight, BaseFloat this_minibatch_tot_objf, BaseFloat this_minibatch_tot_aux_objf) { - int32 phase = minibatch_counter / minibatches_per_phase; + int32 phase = num_minibatches++ / minibatches_per_phase; if (phase != current_phase) { KALDI_ASSERT(phase == current_phase + 1); // or doesn't really make sense. PrintStatsForThisPhase(output_name, minibatches_per_phase); diff --git a/src/nnet3/nnet-training.h b/src/nnet3/nnet-training.h index 7b22bc75211..fefaf9ea122 100644 --- a/src/nnet3/nnet-training.h +++ b/src/nnet3/nnet-training.h @@ -98,6 +98,7 @@ struct NnetTrainerOptions { // Also see struct AccuracyInfo, in nnet-diagnostics.h. struct ObjectiveFunctionInfo { int32 current_phase; + int32 num_minibatches; double tot_weight; double tot_objf; @@ -110,7 +111,7 @@ struct ObjectiveFunctionInfo { double tot_aux_objf_this_phase; ObjectiveFunctionInfo(): - current_phase(0), + current_phase(0), num_minibatches(0), tot_weight(0.0), tot_objf(0.0), tot_aux_objf(0.0), tot_weight_this_phase(0.0), tot_objf_this_phase(0.0), tot_aux_objf_this_phase(0.0) { } @@ -121,7 +122,6 @@ struct ObjectiveFunctionInfo { // control how frequently we print logging messages. void UpdateStats(const std::string &output_name, int32 minibatches_per_phase, - int32 minibatch_counter, BaseFloat this_minibatch_weight, BaseFloat this_minibatch_tot_objf, BaseFloat this_minibatch_tot_aux_objf = 0.0); From 59c9a2d3ef7f89f1776be939063f56f38dce3a93 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 20:02:51 -0500 Subject: [PATCH 148/530] asr_diarization: Add data to libs __init__ --- egs/wsj/s5/steps/libs/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/libs/__init__.py b/egs/wsj/s5/steps/libs/__init__.py index 013c95d0b3f..8f3540643c8 100644 --- a/egs/wsj/s5/steps/libs/__init__.py +++ b/egs/wsj/s5/steps/libs/__init__.py @@ -8,4 +8,4 @@ import common -__all__ = ["common"] +__all__ = ["common", "data"] From 38f2515ba275e3836f8af328600cfce3a51894ee Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 20:03:21 -0500 Subject: [PATCH 149/530] asr_diarization: Adding new overlapped speech recipe --- .../tuning/train_stats_sad_overlap_1a.sh | 51 ++-- .../tuning/train_stats_sad_overlap_1b.sh | 239 ++++++++++++++++++ 2 files changed, 266 insertions(+), 24 deletions(-) create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh index aae1fd995e0..c8a7c887fef 100644 --- a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh @@ -20,7 +20,7 @@ egs_opts= # Directly passed to get_egs_multiple_targets.py # TDNN options relu_dim=256 -chunk_width=20 # We use chunk training for training TDNN +chunk_width=40 # We use chunk training for training TDNN extra_left_context=100 # Maximum left context in egs apart from TDNN's left context extra_right_context=20 # Maximum right context in egs apart from TDNN's right context @@ -41,28 +41,23 @@ max_param_change=0.2 # Small max-param change for small network extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs # such as removing one of the targets -num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. -num_utts_subset_train=50 - # target options -train_data_dir=data/train_azteec_whole_sp_corrupted_hires - -snr_scp= -speech_feat_scp= -overlapped_speech_labels_scp= - -deriv_weights_scp= -deriv_weights_for_overlapped_speech_scp= +train_data_dir=data/train_aztec_small_unsad_a +speech_feat_scp=data/train_aztec_small_unsad_a/speech_feat.scp +deriv_weights_scp=data/train_aztec_small_unsad_a/deriv_weights.scp -train_data_dir=data/train_aztec_small_unsad_whole_sad_ovlp_corrupted_sp -speech_feat_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp/speech_feat.scp -deriv_weights_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/deriv_weights.scp +#train_data_dir=data/train_aztec_small_unsad_whole_sad_ovlp_corrupted_sp +#speech_feat_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp/speech_feat.scp +#deriv_weights_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/deriv_weights.scp +#data/train_aztec_small_unsad_whole_all_corrupted_sp_hires_bp +# Only for SAD snr_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp/irm_targets.scp deriv_weights_for_irm_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp/deriv_weights_manual_seg.scp -deriv_weights_for_overlapped_speech_scp= -overlapped_speech_labels_scp= +# Only for overlapped speech detection +deriv_weights_for_overlapped_speech_scp=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp/deriv_weights_for_overlapped_speech.scp +overlapped_speech_labels_scp=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp/overlapped_speech_labels.scp #extra_left_context=79 #extra_right_context=11 @@ -79,6 +74,10 @@ affix=a . ./path.sh . ./utils/parse_options.sh +num_utts=`cat $train_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + if [ -z "$dir" ]; then dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn fi @@ -109,11 +108,15 @@ if [ $stage -le 3 ]; then stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 - relu-renorm-layer name=tdnn4 dim=256 - output-layer name=output-speech include-log-softmax=true dim=2 - output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective=quadratic - output-layer name=output-overlapped_speech include-log-softmax=true dim=2 + relu-renorm-layer name=pre-final-speech dim=256 input=tdnn3 + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e 'print (1.0/6)'` + + relu-renorm-layer name=pre-final-snr dim=256 input=tdnn3 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print 1.0/$num_snr_bins"` + + relu-renorm-layer name=pre-final-overlapped_speech dim=256 input=tdnn3 + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ --config-dir $dir/configs/ \ @@ -145,7 +148,7 @@ if [ -z "$egs_dir" ]; then --num-utts-subset-valid=$num_utts_subset_valid \ --samples-per-iter=20000 \ --stage=$get_egs_stage \ - --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$snr_scp --deriv-weights-scp=$deriv_weights_scp" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$snr_scp --deriv-weights-scp=$deriv_weights_for_irm_scp" \ --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$overlapped_speech_labels_scp --deriv-weights-scp=$deriv_weights_for_overlapped_speech_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ --dir=$dir/egs @@ -155,7 +158,7 @@ fi if [ $stage -le 5 ]; then steps/nnet3/train_raw_rnn.py --stage=$train_stage \ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ - --egs.chunk-width=20 \ + --egs.chunk-width=$chunk_width \ --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ --egs.chunk-left-context=$extra_left_context \ --egs.chunk-right-context=$extra_right_context \ @@ -169,7 +172,7 @@ if [ $stage -le 5 ]; then --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ --trainer.optimization.final-effective-lrate=$final_effective_lrate \ --trainer.optimization.shrink-value=1.0 \ - --trainer.rnn.num-chunk-per-minibatch=64 \ + --trainer.rnn.num-chunk-per-minibatch=128 \ --trainer.deriv-truncate-margin=8 \ --trainer.max-param-change=$max_param_change \ --cmd="$decode_cmd" --nj 40 \ diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh new file mode 100644 index 00000000000..888c25295d6 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh @@ -0,0 +1,239 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=1 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + + relu-renorm-layer name=pre-final-speech dim=256 input=tdnn3 + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print ($num_frames_ovlp / $num_frames_sad) ** 0.25"` + + relu-renorm-layer name=pre-final-snr dim=256 input=tdnn3 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_ovlp / $num_frames_sad) ** 0.25) / $num_snr_bins"` + + relu-renorm-layer name=pre-final-overlapped_speech dim=256 input=tdnn3 + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs_speech/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --minibatch-size $[chunk_width * num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + From 6c9efb6f54aaae4fb4d0274d5c0f3c0e6b4d011f Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 20:04:16 -0500 Subject: [PATCH 150/530] asr_diarization: Add iter option to run_segmentation_ami --- egs/aspire/s5/local/segmentation/run_segmentation_ami.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh index f9374aaf55a..4b98eec9f43 100755 --- a/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh +++ b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh @@ -14,6 +14,7 @@ stage=-1 nnet_dir=exp/nnet3_sad_snr/nnet_tdnn_k_n4 extra_left_context=100 extra_right_context=20 +iter=final . utils/parse_options.sh @@ -107,7 +108,7 @@ if [ $stage -le 7 ]; then steps/segmentation/do_segmentation_data_dir.sh --reco-nj 18 \ --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp --do-downsampling true \ --extra-left-context $extra_left_context --extra-right-context $extra_right_context \ - --output-name output-speech --frame-subsampling-factor 6 \ + --output-name output-speech --frame-subsampling-factor 6 --iter $iter \ $src_dir/data/sdm1/dev $nnet_dir mfcc_hires_bp $hyp_dir fi From 7de8a831d1dc397e4c5ec6e299fe29d140bc2b27 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 20:04:30 -0500 Subject: [PATCH 151/530] asr_diarization: Add iter to aspire segmentation --- egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh b/egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh index 5f38f6de51f..e7f70c0c07f 100755 --- a/egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh +++ b/egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh @@ -16,6 +16,8 @@ decode_num_jobs=30 num_jobs=30 affix= +sad_iter=final + # ivector opts max_count=75 # parameter for extract_ivectors.sh sub_speaker_frames=6000 @@ -73,7 +75,7 @@ fi if [ $stage -le 1 ]; then steps/segmentation/do_segmentation_data_dir.sh --reco-nj $num_jobs \ - --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp \ + --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp --iter $sad_iter \ --do-downsampling false --extra-left-context 100 --extra-right-context 20 \ --output-name output-speech --frame-subsampling-factor 6 \ data/${data_set} $sad_nnet_dir mfcc_hires_bp data/${data_set} From 885d17ec728ba891600fb87799088126b2b3110b Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 20:05:18 -0500 Subject: [PATCH 152/530] asr_diarization: Optional resolve_ctm overlaps in multicondition get_ctm aspire --- egs/aspire/s5/local/multi_condition/get_ctm.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/egs/aspire/s5/local/multi_condition/get_ctm.sh b/egs/aspire/s5/local/multi_condition/get_ctm.sh index 6fc87fec7b0..67c2c0bd87b 100755 --- a/egs/aspire/s5/local/multi_condition/get_ctm.sh +++ b/egs/aspire/s5/local/multi_condition/get_ctm.sh @@ -64,6 +64,8 @@ lattice-to-ctm-conf $frame_shift_opt --decode-mbr=$decode_mbr ark:- $decode_dir/ # combine the segment-wise ctm files, while resolving overlaps if $resolve_overlaps; then steps/resolve_ctm_overlaps.py $data_dir/segments $decode_dir/score_$LMWT/penalty_$wip/ctm.overlapping $decode_dir/score_$LMWT/penalty_$wip/ctm.merged || exit 1; +else + cp $decode_dir/score_$LMWT/penalty_$wip/ctm.overlapping $decode_dir/score_$LMWT/penalty_$wip/ctm.merged || exit 1; fi merged_ctm=$decode_dir/score_$LMWT/penalty_$wip/ctm.merged From 58d62ab0e9e136c7bdf0a2500447c52f9e73edd9 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 19 Dec 2016 20:05:43 -0500 Subject: [PATCH 153/530] asr_diarization: Adding tuning scripts for music and SAD --- .../tuning/train_stats_sad_music_1d.sh | 184 ++++++++++++++ .../tuning/train_stats_sad_music_1e.sh | 229 ++++++++++++++++++ 2 files changed, 413 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1d.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1e.sh diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1d.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1d.sh new file mode 100644 index 00000000000..a013fcc49a7 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1d.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_azteec_whole_sp_corrupted_hires + +speech_feat_scp= +music_labels_scp= + +deriv_weights_scp= + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-9, tdnn1@-3, tdnn1, tdnn1@3, tdnn2_stats) dim=256 + stats-layer name=tdnn3_stats config=mean+count(-108:9:27:108) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-27, tdnn2@-9, tdnn2, tdnn2@9, tdnn3_stats) dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn3 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$train_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=20000 \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --dir=$dir/egs + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=20 \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=64 \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-music.JOB.log \ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $music_labels_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-music.vec.JOB + eval vector-sum $dir/post_output-music.vec.{`seq -s, 100`} $dir/post_output-music.vec +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1e.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1e.sh new file mode 100644 index 00000000000..703865b8ad5 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1e.sh @@ -0,0 +1,229 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1d, but add add-log-stddev to norm layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=79 # Maximum left context in egs apart from TDNN's left context +extra_right_context=11 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=79 +min_extra_right_context=11 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_aztec_small_unsad_whole_all_corrupted_sp_hires_bp + +speech_feat_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/speech_feat.scp +deriv_weights_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/deriv_weights.scp +music_labels_scp=data/train_aztec_small_unsad_whole_music_corrupted_sp_hires_bp/music_labels.scp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 add-log-stddev=true + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-9, tdnn1@-3, tdnn1, tdnn1@3, tdnn2_stats) dim=256 add-log-stddev=true + stats-layer name=tdnn3_stats config=mean+count(-108:9:27:108) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-27, tdnn2@-9, tdnn2, tdnn2@9, tdnn3_stats) dim=256 add-log-stddev=true + + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn3 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` +speech_data_dir=$dir/`basename $train_data_dir`_speech +music_data_dir=$dir/`basename $train_data_dir`_music + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + + . $dir/configs/vars + + utils/subset_data_dir.sh --utt-list $speech_feat_scp ${train_data_dir} $dir/`basename ${train_data_dir}`_speech + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$speech_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + utils/subset_data_dir.sh --utt-list $music_labels_scp ${train_data_dir} $dir/`basename ${train_data_dir}`_music + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + steps/nnet3/multilingual/get_egs.sh \ + --minibatch-size $[chunk_width * num_chunk_per_minibatch] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-music.JOB.log \ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $music_labels_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-music.vec.JOB + eval vector-sum $dir/post_output-music.vec.{`seq -s, 100`} $dir/post_output-music.vec +fi + From ebdf74078fdb3a425e0be383d705e706d5a57091 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 22 Dec 2016 17:15:37 -0500 Subject: [PATCH 154/530] Several minor fixes/changes: fix to steps/dict/train_g2p.sh; add documentation for fstpushspecial; reduce max-iter from 2000 to 200 in push-special.cc --- egs/wsj/s5/steps/dict/train_g2p.sh | 9 +-- src/doc/graph_recipe_test.dox | 116 ++++++++++++++++------------- src/fstext/push-special.cc | 29 ++++---- 3 files changed, 83 insertions(+), 71 deletions(-) diff --git a/egs/wsj/s5/steps/dict/train_g2p.sh b/egs/wsj/s5/steps/dict/train_g2p.sh index 85e1605afba..2e4df49b71b 100755 --- a/egs/wsj/s5/steps/dict/train_g2p.sh +++ b/egs/wsj/s5/steps/dict/train_g2p.sh @@ -3,14 +3,14 @@ # Copyright 2016 Xiaohui Zhang # Apache 2.0 -# Begin configuration section. +# Begin configuration section. iters=5 stage=0 encoding='utf-8' only_words=true cmd=run.pl # a list of silence phones, like data/local/dict/silence_phones.txt -silence_phones= +silence_phones= # End configuration section. echo "$0 $@" # Print the command line for logging @@ -43,7 +43,7 @@ mkdir -p $wdir/log [ ! -f $lexicon ] && echo "$0: Training lexicon does not exist." && exit 1 # Optionally remove words that are mapped to a single silence phone from the lexicon. -if $only_words && [ -z $silence_phones ]; then +if $only_words && [ ! -z "$silence_phones" ]; then awk 'NR==FNR{a[$1] = 1; next} {s=$2;for(i=3;i<=NF;i++) s=s" "$i;a[$1]=s;if(!(s in a)) print $1" "s}' \ $silence_phones > $wdir/lexicon_onlywords.txt lexicon=$wdir/lexicon_onlywords.txt @@ -63,7 +63,7 @@ if [ $stage -le 0 ]; then fi for i in `seq 0 $(($iters-2))`; do - + echo "Training the G2P model (iter $[$i + 1] )" if [ $stage -le $i ]; then @@ -80,4 +80,3 @@ if [ $stage -le $(($i + 2)) ]; then $cmd $wdir/log/test.log \ g2p.py --encoding $encoding --model $wdir/g2p.model.final --test $lexicon fi - diff --git a/src/doc/graph_recipe_test.dox b/src/doc/graph_recipe_test.dox index 860b91a157c..ead544416bd 100644 --- a/src/doc/graph_recipe_test.dox +++ b/src/doc/graph_recipe_test.dox @@ -25,19 +25,19 @@ namespace kaldi { \page graph_recipe_test Decoding-graph creation recipe (test time) Here we explain our normal graph creation approach step by step, along - with certain data-preparation stages that are related to it. + with certain data-preparation stages that are related to it. Most of the details of this approach are not hardcoded into our tools; we are just explaining how it is currently being done. If this section is confusing, the best remedy is probably to read - "Speech Recognition - with Weighted Finite-State Transducers" by Mohri et al. - Be warned: that paper is quite long, and reading it will take at least a + "Speech Recognition + with Weighted Finite-State Transducers" by Mohri et al. + Be warned: that paper is quite long, and reading it will take at least a few hours for those not already familiar with FSTs. Another good resource is the OpenFst website which will provide more context on things like symbol tables. \section graph_symtab Preparing the initial symbol tables - + We need to prepare the OpenFst symbol tables words.txt and phones.txt. These assign integer id's to all the words and phones in our system. Note that OpenFst reserves symbol zero for epsilon. An example of how the @@ -56,7 +56,7 @@ symbol tables look for the WSJ task is: ## tail -2 words.txt }RIGHT-BRACE 123683 #0 123684 -## head data/phones.txt +## head data/phones.txt 0 SIL 1 SPN 2 @@ -65,10 +65,10 @@ AA 4 AA_B 5 \endverbatim The words.txt file contains the single disambiguation symbol "#0" (used for epsilon -on the input of G.fst). This is the last-numbered word in our recipe. Be careful -with this if your +on the input of G.fst). This is the last-numbered word in our recipe. Be careful +with this if your lexicon contains a word "#0". The phones.txt file does not contain disambiguation -symbols but after creating L.fst we will create a file phones_disambig.txt that +symbols but after creating L.fst we will create a file phones_disambig.txt that has the disambiguation symbols in (this is just useful for debugging). \section graph_lexicon Preparing the lexicon L @@ -77,7 +77,7 @@ has the disambiguation symbols in (this is just useful for debugging). Our C++ tools will never interact with this, it will just be used by a script that creates lexicon FST. A small part of our WSJ lexicon is: \verbatim -## head data/lexicon.txt +## head data/lexicon.txt !SIL SIL @@ -93,7 +93,7 @@ they are treated as distinct phones (however, we do handle the tree-building specially for this setup; read about the roots file in \ref tree_building). Notice that we allow words with empty phonetic representations. -This lexicon will be used to create the L.fst used in training (without +This lexicon will be used to create the L.fst used in training (without disambiguation symbols). We also create a lexicon with disambiguation symbols, used in decoding-graph creation. An extract of this file is here: @@ -125,11 +125,11 @@ ZH_S 339 #3 343 \endverbatim The numbers are so high because in this (WSJ) recipe we added -stress and position information to the phones. +stress and position information to the phones. Note that the disambiguation symbols used for the empty words (i.e. \ and \) have to be distinct from those used for the normal -words, so the "normal" disambiguation symbols in this -example start from \#3. +words, so the "normal" disambiguation symbols in this +example start from \#3. The command to convert the lexicon without disambiguation symbols into an FST is: @@ -143,7 +143,7 @@ Here, the script make_lexicon_fst.pl creates the text representation of the FST. The 0.5 is the silence probability (i.e. at the beginning of sentence and after each word, we output silence with probability 0.5; the probability mass assigned to having no silence is -1.0 - 0.5 = 0.5. The rest of the commands in this example +1.0 - 0.5 = 0.5. The rest of the commands in this example relate to converting the FST into compiled form; fstarcsort is necessary because we are going to compose later. @@ -156,9 +156,9 @@ the input symbol is the first phone of that word. It is important both for the efficiency of composition and the effectiveness of minimization that the output symbol should be as early as possible (i.e. at the beginning not the end of the word). At the end of each -word, to handle optional silence, the transition corresponding to -the last phone is in two forms: one to the loop state and one to -the "silence state" which has a transition to the loop state. +word, to handle optional silence, the transition corresponding to +the last phone is in two forms: one to the loop state and one to +the "silence state" which has a transition to the loop state. We don't bother putting optional silence after silence words, which we define as words that have just one phone that is the silence phone. @@ -203,7 +203,7 @@ gunzip -c data_prep/lm.arpa.gz | \ arpa2fst --disambig-symbol=#0 \ --read-symbol-table=data/words.txt - data/G.fst \endverbatim -The last command (fstisstochastic) is a diagnostic step (see \ref fst_algo_stochastic). +The last command (fstisstochastic) is a diagnostic step (see \ref fst_algo_stochastic). In one typical example, it prints out the numbers: \verbatim 9.14233e-05 -0.259833 @@ -211,7 +211,7 @@ In one typical example, it prints out the numbers: The first number is small, so it confirms that there is no state that has the probability mass of its arcs plus final-state significantly less than one. The second number is significant, and this means that there are states that -have "too much" probability mass (the numeric values of the weights in the +have "too much" probability mass (the numeric values of the weights in the FSTs can generally be interpreted as negated log probabilities). Having some states with "too much" probability mass is normal for the FST representations of language models with backoff. During later graph creation steps we will @@ -229,7 +229,8 @@ inside Kaldi processes, not at the script level. \verbatim fsttablecompose data/L_disambig.fst data/G.fst | \ fstdeterminizestar --use-log=true | \ - fstminimizeencoded > somedir/LG.fst + fstminimizeencoded | fstpushspecial | \ + fstarcsort --sort-type=ilabel > somedir/LG.fst \endverbatim There are some small differences from OpenFst's algorithms. We use a more efficient composition algorithm (see \ref fst_algo_composition) @@ -238,16 +239,27 @@ inside Kaldi processes, not at the script level. program fstdeterminizestar. The option --use-log=true asks the program to first cast the FST to the log semiring; this preserves stochasticity (in the log semiring); see \ref fst_algo_stochastic. - - We do minimization with the program "fstminimizeencoded". This is mostly the + + We do minimization with the program "fstminimizeencoded". This is mostly the same as the version of OpenFst's minimization algorithm that applies to weighted acceptors; the only change relevant here is that - it avoids pushing weights, hence preserving stochasticity (see \ref fst_algo_minimization + it avoids pushing weights, hence preserving stochasticity (see \ref fst_algo_minimization for details). + The program "fstpushspecial" is similar to OpenFst's "fstpush" program, but if + the weights don't sum to one it ensures that all the states "sum to" the same + value (possibly different from one), rather than trying to push the "extra" + weight to the start or end of the graph. This has the advantage that it + can never fail ("fstpush" can fail or loop for a very long time if the FST "sums to" infinity); + it is also much faster. See push-special.cc for more detailed documentation. + + The "fstarcsort" stage sorts the arcs in a way that will help later composition + operations to be fast. + + \section graph_clg Preparing CLG - To get a transducer whose inputs are context-dependent phones, we need to prepare an FST + To get a transducer whose inputs are context-dependent phones, we need to prepare an FST called CLG, which is equivalent to C o L o G, where L and G are the lexicon and grammar and C represents the phonetic context. For a triphone system, the input symbols of C would be of the form a/b/c (i.e. triples of phones), and the output symbols would be single @@ -255,7 +267,7 @@ inside Kaldi processes, not at the script level. windows, and how we generalize to different context sizes. Firstly, we describe how we would create the context FST C if we were to make it by itself and compose normally (our scripts do not actually work this way, for efficiency and scalability - reasons). + reasons). \subsection graph_c Making the context transducer @@ -274,7 +286,7 @@ inside Kaldi processes, not at the script level. this doesn't represent a phone since (assuming P = 1), the central element is \ which is not a phone. In this case we let the input symbol of the arc be #-1 which is a special symbol we introduce for this purpose (we don't use - epsilon here as the standard recipe does, as it can lead to nondeterminizability + epsilon here as the standard recipe does, as it can lead to nondeterminizability when there are empty words). The end-of-utterance case is a little complicated. The context FST has, on the @@ -283,7 +295,7 @@ inside Kaldi processes, not at the script level. all symbols we need to flush out the last triphone (e.g. a/b/\, where \ represents undefined context). The natural way to do this would be to have a transition with a/b/\ on its input and \ on its output, from - the state a/b to a final state (e.g. b/\ or a special final state). But this is + the state a/b to a final state (e.g. b/\ or a special final state). But this is inefficient for composition, because if it was not the end of the utterance we would have to explore such transitions before finding them pruned away. Instead we use $ as the end-of-utterance symbol, and make sure it appears once @@ -292,11 +304,11 @@ inside Kaldi processes, not at the script level. avoid the hassle having to work out how many subsequential symbols to add to LG, we just allow it to accept any number of such symbols at the end of utterance. This is acheived by the function AddSubsequentialLoop() and the command-line program - fstaddsubsequentialloop. + fstaddsubsequentialloop. -If we wanted C on its own, we would first need a list of -disambiguation symbols; and we would also need to work out an unused symbol id we could use +If we wanted C on its own, we would first need a list of +disambiguation symbols; and we would also need to work out an unused symbol id we could use for the subsequential symbol, as follows: \verbatim grep '#' data/phones_disambig.txt | awk '{print $2}' > $dir/disambig_phones.list @@ -313,17 +325,17 @@ The program fstmakecontextfst needs the list of phones, a list of disambiguation and the identity of the subsequential symbol. In addition to C.fst, it writes out the file "ilabels" that interprets the symbols on the left of C.fst (see \ref tree_ilabel). The composition with LG can be done as follows: -\verbatim +\verbatim fstaddsubsequentialloop $subseq_sym $dir/LG.fst | \ fsttablecompose $dir/C.fst - > $dir/CLG.fst \endverbatim For printing out C.fst and anything using the same symbols -that index "ilabels", we can make a suitable symbol table using the following +that index "ilabels", we can make a suitable symbol table using the following command: \verbatim fstmakecontextsyms data/phones.txt $dir/ilabels > $dir/context_syms.txt -\endverbatim -This command knows about the "ilabels" format (\ref tree_ilabel). +\endverbatim +This command knows about the "ilabels" format (\ref tree_ilabel). An example random path through the CLG fst (for Resource Management), printed out with this symbol table, is as follows: \verbatim @@ -338,7 +350,7 @@ out with this symbol table, is as follows: 6 7 ay/z/sil 7 8 z/sil/ 8 -\endverbatim +\endverbatim \subsection graph_compose_c Composing with C dynamically @@ -350,7 +362,7 @@ fstcomposecontext --read-disambig-syms=$dir/disambig_phones.list \ --write-disambig-syms=$dir/disambig_ilabels.list \ $dir/ilabels < $dir/LG.fst >$dir/CLG.fst \endverbatim - If we had different context parameters N and P than the defaults (3 and 1), we + If we had different context parameters N and P than the defaults (3 and 1), we would supply extra options to this program. This program writes the file "ilabels" (see \ref tree_ilabel) which interprets the input symbols of CLG.fst. The first few lines of an ilabels file from the Resource @@ -375,29 +387,29 @@ of epsilon, to ensure determinizability. \subsection graph_change_ilabel Reducing the number of context-dependent input symbols - After creating CLG.fst, there is an optional graph creation stage + After creating CLG.fst, there is an optional graph creation stage that can reduce its size. We use the program make-ilabel-transducer, which works out from the decision tree and the HMM topology information, which subsets of context-dependent phones would - correspond to the same compiled graph and can therefore be merged (we pick + correspond to the same compiled graph and can therefore be merged (we pick an arbitrary element of each subset and convert all context windows to that context window). This is a similar concept to HTK's logical-to-physical mapping. The command is: \verbatim make-ilabel-transducer --write-disambig-syms=$dir/disambig_ilabels_remapped.list \ - $dir/ilabels $tree $model $dir/ilabels.remapped > $dir/ilabel_map.fst + $dir/ilabels $tree $model $dir/ilabels.remapped > $dir/ilabel_map.fst \endverbatim This program requires the tree and the model; it outputs a new ilabel_info object called "ilabels.remapped"; this is in the same format as the original "ilabels" file, but has fewer lines. The FST "ilabel_map.fst" is composed with CLG.fst and remaps the labels. After doing this we determinize - and minimize so we can immediately realize any size reductions: + and minimize so we can immediately realize any size reductions: \verbatim fstcompose $dir/ilabel_map.fst $dir/CLG.fst | \ fstdeterminizestar --use-log=true | \ fstminimizeencoded > $dir/CLG2.fst \endverbatim - For typical setups this stage does not actually reduce the graph size + For typical setups this stage does not actually reduce the graph size by very much (5\% to 20\% reduction is typical), and in any case it is only the size of intermediate graph-creation stages that we are reducing by this mechanism. But the savings could become significant @@ -421,24 +433,24 @@ of epsilon, to ensure determinizability. sequences of three arcs. H also has self-loops on the initial state for each of the disambiguation symbols (\#-1, \#0, \#1, \#2, \#3 and so on). - The section of script that makes the H transducer (we call it Ha + The section of script that makes the H transducer (we call it Ha because it lacks self-loops at this point), is: -\verbatim +\verbatim make-h-transducer --disambig-syms-out=$dir/disambig_tstate.list \ --transition-scale=1.0 $dir/ilabels.remapped \ $tree $model > $dir/Ha.fst \endverbatim There is an optional argument to set the transition scale; in our - current training scripts, this scale is 1.0. This scale only + current training scripts, this scale is 1.0. This scale only affects the parts of the transitions that do not relate to self-loop probabilities, and in the normal topology (Bakis model) it has no effect at all; see \ref hmm_scale for more explanation. - In addition to the FST, the program also writes a list of + In addition to the FST, the program also writes a list of disambiguation symbols which must be removed later. - + \section graph_hclg Making HCLG - The first step in making the final graph HCLG is to make the + The first step in making the final graph HCLG is to make the HCLG that lacks self-loops. The command in our current script is as follows: \verbatim @@ -457,15 +469,15 @@ make-h-transducer --disambig-syms-out=$dir/disambig_tstate.list \ \section graph_selfloops Adding self-loops to HCLG Adding self-loops to HCLG is done by the following command: -\verbatim +\verbatim add-self-loops --self-loop-scale=0.1 \ --reorder=true $model < $dir/HCLGa.fst > $dir/HCLG.fst \endverbatim See \ref hmm_scale for an explanation of how the self-loop-scale of 0.1 - is applied (note that it also affects the non-self-loop probabilities). + is applied (note that it also affects the non-self-loop probabilities). For an explanation of the "reorder" option, see \ref hmm_reorder; the "reorder" option increases decoding speed but is not compatible with - the \ref decoder_kaldi "kaldi decoder". + the \ref decoder_kaldi "kaldi decoder". The add-self-loops program does not just add self-loops; it may also have to duplicate states and add epsilon transitions in order to ensure that the self-loops can be added in a consistent way. This @@ -476,7 +488,7 @@ make-h-transducer --disambig-syms-out=$dir/disambig_tstate.list \ G.fst, LG.fst, CLG.fst and HCLGa.fst, but not for HCLG.fst. We do not determinize again after the add-self-loops stage; this would fail because we have already removed the disambiguation symbols. Anyway, - this would be slow and we believe that there is nothing further to be gained from + this would be slow and we believe that there is nothing further to be gained from determinizing and minimizing at this point. diff --git a/src/fstext/push-special.cc b/src/fstext/push-special.cc index 16cc9b41af3..ae91cfae54e 100644 --- a/src/fstext/push-special.cc +++ b/src/fstext/push-special.cc @@ -80,7 +80,7 @@ final-probabilities of the WFST; the details are quite obvious, and the equivalence with the original WFST is easy to show. Our algorithm is in practice an order of magnitude faster than the more generic algorithm for conventional weight-pushing of \cite{Mohri:02}, when applied to cyclic WFSTs. - + */ class PushSpecialClass { @@ -95,7 +95,7 @@ class PushSpecialClass { num_states_ = fst_->NumStates(); initial_state_ = fst_->Start(); occ_.resize(num_states_, 1.0 / sqrt(num_states_)); // unit length - + pred_.resize(num_states_); for (StateId s = 0; s < num_states_; s++) { for (ArcIterator > aiter(*fst, s); @@ -132,21 +132,22 @@ class PushSpecialClass { min_sum = std::min(min_sum, sum); max_sum = std::max(max_sum, sum); } - } + } KALDI_VLOG(4) << "min,max is " << min_sum << " " << max_sum; return kaldi::Log(max_sum / min_sum); // In FST world we'll actually // dealing with logs, so the log of the ratio is more suitable // to compare with delta (makes testing the algorithm easier). } - + void Iterate(float delta) { // This is like the power method to find the top eigenvalue of a matrix. - // We limit it to 2000 iters max, just in case something unanticipated + // We limit it to 200 iters max, just in case something unanticipated // happens, but we should exit due to the "delta" thing, usually after // several tens of iterations. - int iter; - for (iter = 0; iter < 2000; iter++) { + int iter, max_iter = 200; + + for (iter = 0; iter < max_iter; iter++) { std::vector new_occ(num_states_); // We initialize new_occ to 0.1 * occ. A simpler algorithm would // initialize them to zero, so it's like the pure power method. This is @@ -156,7 +157,7 @@ class PushSpecialClass { // ones) all have the same magnitude. for (int i = 0; i < num_states_; i++) new_occ[i] = 0.1 * occ_[i]; - + for (int i = 0; i < num_states_; i++) { std::vector >::const_iterator iter, end = pred_[i].end(); @@ -181,8 +182,8 @@ class PushSpecialClass { KALDI_WARN << "push-special: finished " << iter << " iterations without converging. Output will be inaccurate."; } - - + + // Modifies the FST weights and the final-prob to take account of these potentials. void ModifyFst() { // First get the potentials as negative-logs, like the values @@ -210,13 +211,13 @@ class PushSpecialClass { StateId initial_state_; std::vector occ_; // the top eigenvector of (matrix of weights) transposed. double lambda_; // our current estimate of the top eigenvalue. - + std::vector > > pred_; // List of transitions // into each state. For the start state, this list consists of the list of // states with final-probs, each with their final prob. - + VectorFst *fst_; - + }; @@ -228,7 +229,7 @@ void PushSpecial(VectorFst *fst, float delta) { // gets done in the initializer. } - + } // end namespace fst. From cddb86103bdde45fcc0d59a347442eb98a32ac38 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 23 Dec 2016 16:29:12 -0500 Subject: [PATCH 155/530] Renaming the fast TDNN+LSTM experiment in swbd/s5c to be in the intended sequence. --- egs/swbd/s5c/local/chain/run_tdnn_lstm.sh | 2 +- .../{run_tdnn_fastlstm_1b.sh => run_tdnn_lstm_1c.sh} | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) rename egs/swbd/s5c/local/chain/tuning/{run_tdnn_fastlstm_1b.sh => run_tdnn_lstm_1c.sh} (95%) diff --git a/egs/swbd/s5c/local/chain/run_tdnn_lstm.sh b/egs/swbd/s5c/local/chain/run_tdnn_lstm.sh index a4fa11e0908..9669251c14a 120000 --- a/egs/swbd/s5c/local/chain/run_tdnn_lstm.sh +++ b/egs/swbd/s5c/local/chain/run_tdnn_lstm.sh @@ -1 +1 @@ -tuning/run_tdnn_lstm_1b.sh \ No newline at end of file +tuning/run_tdnn_lstm_1c.sh \ No newline at end of file diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_fastlstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh similarity index 95% rename from egs/swbd/s5c/local/chain/tuning/run_tdnn_fastlstm_1b.sh rename to egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh index 88a191a1348..ea34aefe29f 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_fastlstm_1b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -1,9 +1,12 @@ #!/bin/bash -# Unlike 1a this setup interleaves the TDNN and LSTM layers. +# run_tdnn_lstm_1c.sh is like run_tdnn_lstm_1b.sh but using the +# new 'fast-lstm' layer. Results are slightly improved, plus +# it's faster. See PR #1243 on github, and issue #1237. +# This used to be called run_tdnn_fastlstm_1b.sh. -#System tdnn_lstm_1a_ld5 tdnn_lstm_1b_ld5 tdnn_fastlstm_1b_ld5 -#WER on train_dev(tg) 13.42 13.00 12.91 +#System tdnn_lstm_1a_ld5 tdnn_lstm_1b_ld5 tdnn_lstm_1c_ld5 +#WER on train_dev(tg) 13.42 13.00 12.91 #WER on train_dev(fg) 12.42 12.03 11.98 #WER on eval2000(tg) 15.7 15.3 15.2 #WER on eval2000(fg) 14.2 13.9 13.8 @@ -19,7 +22,7 @@ stage=12 train_stage=-10 get_egs_stage=-10 speed_perturb=true -dir=exp/chain/tdnn_fastlstm_1b # Note: _sp will get added to this if $speed_perturb == true. +dir=exp/chain/tdnn_lstm_1c # Note: _sp will get added to this if $speed_perturb == true. decode_iter= decode_dir_affix= From 6c4b572052a72df8e4459f285bdccee5e6773965 Mon Sep 17 00:00:00 2001 From: Pavel Denisov Date: Sat, 24 Dec 2016 01:28:54 +0300 Subject: [PATCH 156/530] =?UTF-8?q?Update=20arpa2fst=20command=20in=20Wit?= =?UTF-8?q?=20Zieli=C5=84ski=20tutorial=20(#1273)=20(#1286)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/doc/kaldi_for_dummies.dox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doc/kaldi_for_dummies.dox b/src/doc/kaldi_for_dummies.dox index 69d561e8bff..afb3295f50e 100644 --- a/src/doc/kaldi_for_dummies.dox +++ b/src/doc/kaldi_for_dummies.dox @@ -519,7 +519,7 @@ echo "===== MAKING G.fst =====" echo lang=data/lang -cat $local/tmp/lm.arpa | arpa2fst - | fstprint | utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt --osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | fstrmepsilon | fstarcsort --sort_type=ilabel > $lang/G.fst +arpa2fst --disambig-symbol=#0 --read-symbol-table=$lang/words.txt $local/tmp/lm.arpa $lang/G.fst echo echo "===== MONO TRAINING =====" From 53a4df9791ff0eca2ce048aad12a9ffd07f9d1db Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Fri, 23 Dec 2016 20:33:10 -0800 Subject: [PATCH 157/530] Refactoring the example-extraction for nnet3, for more flexibility in the num-frames for examples. This code compiles but is not tested. --- src/chainbin/nnet3-chain-get-egs.cc | 238 +++++-------- src/nnet3/nnet-chain-example.cc | 2 +- src/nnet3/nnet-chain-example.h | 2 +- src/nnet3/nnet-discriminative-example.cc | 4 +- src/nnet3/nnet-discriminative-example.h | 28 +- src/nnet3/nnet-example-utils.cc | 344 ++++++++++++++----- src/nnet3/nnet-example-utils.h | 121 ++++--- src/nnet3bin/nnet3-discriminative-get-egs.cc | 272 ++++++--------- src/nnet3bin/nnet3-get-egs.cc | 167 +++++---- 9 files changed, 655 insertions(+), 523 deletions(-) diff --git a/src/chainbin/nnet3-chain-get-egs.cc b/src/chainbin/nnet3-chain-get-egs.cc index 6f77a3c208b..2a8f5a1c6ad 100644 --- a/src/chainbin/nnet3-chain-get-egs.cc +++ b/src/chainbin/nnet3-chain-get-egs.cc @@ -41,149 +41,107 @@ namespace nnet3 { static bool ProcessFile(const fst::StdVectorFst &normalization_fst, const MatrixBase &feats, const MatrixBase *ivector_feats, + int32 ivector_period, const chain::Supervision &supervision, const std::string &utt_id, bool compress, - int32 left_context, - int32 right_context, - int32 frames_per_eg, - int32 frames_overlap_per_eg, - int32 frame_subsampling_factor, + const UtteranceSplitter &utt_splitter, int64 *num_frames_written, int64 *num_egs_written, NnetChainExampleWriter *example_writer) { + bool ans = true; KALDI_ASSERT(supervision.num_sequences == 1); - int32 num_feature_frames = feats.NumRows(), - num_output_frames = supervision.frames_per_sequence, - num_feature_frames_subsampled = - (num_feature_frames + frame_subsampling_factor - 1)/ - frame_subsampling_factor; - if (num_output_frames != num_feature_frames_subsampled) { - // we tolerate deviations in the num-frames if they are very small (1 output - // frame). - - if (abs(num_output_frames - num_feature_frames_subsampled) > 1) { - KALDI_ERR << "Mismatch in num-frames: chain supervision has " - << num_output_frames - << " versus features/frame_subsampling_factor = " - << num_feature_frames << " / " << frame_subsampling_factor - << " = " << num_feature_frames_subsampled - << ": check that --frame-subsampling-factor option is set " - << "the same as to chain-get-supervision."; - } - int32 new_num_feature_frames = - num_output_frames * frame_subsampling_factor; - // add a few frames at the end to make it match up. - Matrix feats_new(new_num_feature_frames, feats.NumCols(), - kUndefined); - int32 min_feature_frames = std::min(num_feature_frames, - new_num_feature_frames); - feats_new.RowRange(0, min_feature_frames).CopyFromMat( - feats.RowRange(0, min_feature_frames)); - for (int32 i = num_feature_frames; i < new_num_feature_frames; i++) - feats_new.Row(i).CopyFromVec(feats.Row(num_feature_frames - 1)); - return ProcessFile(normalization_fst, feats_new, ivector_feats, - supervision, utt_id, compress, left_context, right_context, - frames_per_eg, frames_overlap_per_eg, frame_subsampling_factor, - num_frames_written, num_egs_written, - example_writer); - } + int32 num_input_frames = feats.NumRows(), + num_output_frames = supervision.frames_per_sequence; - KALDI_ASSERT(frames_per_eg % frame_subsampling_factor == 0); + if (!utt_splitter.LengthsMatch(utt_id, num_input_frames, num_output_frames)) + return false; // LengthsMatch() will have printed a warning. - int32 frames_per_eg_subsampled = frames_per_eg / frame_subsampling_factor, - frames_overlap_subsampled = frames_overlap_per_eg / frame_subsampling_factor, - frames_shift_subsampled = frames_per_eg_subsampled - frames_overlap_subsampled; + std::vector chunks; - if (num_feature_frames_subsampled < frames_per_eg_subsampled) { - KALDI_WARN << "Length of features for utterance " << utt_id - << " is less than than the frames_per_eg (after sub-sampling)."; + if (chunks.empty()) { + KALDI_WARN << "Not producing egs for utterance " << utt_id + << " because it is too short: " + << num_input_frames << " frames."; return false; } - // we don't do any padding, as it would be a bit tricky to pad the 'chain' supervision. - // Instead we select ranges of frames that fully fit within the file; these - // might slightly overlap with each other or have gaps. - std::vector range_starts_subsampled; - chain::SplitIntoRanges(num_feature_frames_subsampled - - frames_overlap_subsampled, - frames_shift_subsampled, - &range_starts_subsampled); - // The 'deriv_weights' make sure we don't count frames twice, and also ensure - // that we tend to avoid having nonzero weights on the derivatives that are - // too close to the edge of the corresponding 'range' (these derivatives close - // to the edge are not as accurate as they could be, because when we split we - // don't know the correct alphas and betas). - std::vector > deriv_weights; - - chain::GetWeightsForRanges(frames_per_eg_subsampled, - range_starts_subsampled, - &deriv_weights); - - if (range_starts_subsampled.empty()) { - KALDI_WARN << "No output for utterance " << utt_id - << " (num-frames=" << num_feature_frames - << ") because too short for --frames-per-eg=" - << frames_per_eg; - return false; - } + int32 frame_subsampling_factor = utt_splitter.Config().frame_subsampling_factor; + + utt_splitter.GetChunksForUtterance(num_input_frames, &chunks); + chain::SupervisionSplitter splitter(supervision); - for (size_t i = 0; i < range_starts_subsampled.size(); i++) { - int32 range_start_subsampled = range_starts_subsampled[i], - range_start = range_start_subsampled * frame_subsampling_factor; + for (size_t c = 0; c < chunks.size(); c++) { + ChunkTimeInfo &chunk = chunks[c]; + + int32 start_frame_subsampled = chunk.first_frame / frame_subsampling_factor, + num_frames_subsampled = chunk.num_frames / frame_subsampling_factor; chain::Supervision supervision_part; - splitter.GetFrameRange(range_start_subsampled, - frames_per_eg_subsampled, + splitter.GetFrameRange(start_frame_subsampled, + num_frames_subsampled, &supervision_part); if (normalization_fst.NumStates() > 0 && !AddWeightToSupervisionFst(normalization_fst, &supervision_part)) { - KALDI_WARN << "For utterance " << utt_id << ", frames " - << range_start << " to " << (range_start + frames_per_eg) + KALDI_WARN << "For utterance " << utt_id << ", feature frames " + << chunk.first_frame << " to " + << (chunk.first_frame + chunk.num_frames) << ", FST was empty after composing with normalization FST. " << "This should be extremely rare (a few per corpus, at most)"; - return false; + ans = false; } int32 first_frame = 0; // we shift the time-indexes of all these parts so // that the supervised part starts from frame 0. + + SubVector output_weights( + &(chunk.output_weights[0]), + static_cast(chunk.output_weights.size())); + NnetChainSupervision nnet_supervision("output", supervision_part, - deriv_weights[i], - first_frame, frame_subsampling_factor); + output_weights, + first_frame, + frame_subsampling_factor); NnetChainExample nnet_chain_eg; nnet_chain_eg.outputs.resize(1); nnet_chain_eg.outputs[0].Swap(&nnet_supervision); nnet_chain_eg.inputs.resize(ivector_feats != NULL ? 2 : 1); - int32 tot_frames = left_context + frames_per_eg + right_context; - Matrix input_frames(tot_frames, feats.NumCols(), kUndefined); + int32 tot_input_frames = chunk.left_context + chunk.num_frames + + chunk.right_context; - // Set up "input_frames". - for (int32 j = -left_context; j < frames_per_eg + right_context; j++) { - int32 t = range_start + j; - if (t < 0) t = 0; - if (t >= feats.NumRows()) t = feats.NumRows() - 1; - SubVector src(feats, t), - dest(input_frames, j + left_context); + Matrix input_frames(tot_input_frames, feats.NumCols(), + kUndefined); + + int32 start_frame = chunk.first_frame - chunk.left_context; + for (int32 t = start_frame; t < start_frame + tot_input_frames; t++) { + int32 t2 = t; + if (t2 < 0) t2 = 0; + if (t2 >= num_input_frames) t2 = num_input_frames - 1; + int32 j = t - start_frame; + SubVector src(feats, t2), + dest(input_frames, j); dest.CopyFromVec(src); } - NnetIo input_io("input", - left_context, - input_frames); + NnetIo input_io("input", -chunk.left_context, input_frames); nnet_chain_eg.inputs[0].Swap(&input_io); if (ivector_feats != NULL) { // if applicable, add the iVector feature. // choose iVector from a random frame in the chunk - int32 ivector_frame = RandInt(range_start, range_start + frames_per_eg - 1); - KALDI_ASSERT(ivector_feats->NumRows() > 0); - if (ivector_frame >= ivector_feats->NumRows()) - ivector_frame = ivector_feats->NumRows() - 1; + int32 ivector_frame = RandInt(start_frame, + start_frame + num_input_frames - 1), + ivector_frame_subsampled = ivector_frame / ivector_period; + if (ivector_frame_subsampled < 0) + ivector_frame_subsampled = 0; + if (ivector_frame_subsampled >= ivector_feats->NumRows()) + ivector_frame_subsampled = ivector_feats->NumRows() - 1; Matrix ivector(1, ivector_feats->NumCols()); - ivector.Row(0).CopyFromVec(ivector_feats->Row(ivector_frame)); + ivector.Row(0).CopyFromVec(ivector_feats->Row(ivector_frame_subsampled)); NnetIo ivector_io("ivector", 0, ivector); nnet_chain_eg.inputs[1].Swap(&ivector_io); } @@ -192,16 +150,16 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst, nnet_chain_eg.Compress(); std::ostringstream os; - os << utt_id << "-" << range_start; + os << utt_id << "-" << chunk.first_frame; std::string key = os.str(); // key is - - *num_frames_written += frames_per_eg; + *num_frames_written += chunk.num_frames; *num_egs_written += 1; example_writer->Write(key, nnet_chain_eg); } - return true; + return ans; } } // namespace nnet2 @@ -233,35 +191,28 @@ int main(int argc, char *argv[]) { "chain-get-supervision.\n"; bool compress = true; - int32 left_context = 0, right_context = 0, num_frames = 1, - num_frames_overlap = 0, length_tolerance = 100, - cut_zero_frames = -1, - frame_subsampling_factor = 1; + int32 length_tolerance = 100, online_ivector_period = 1; + + ExampleGenerationConfig eg_config; // controls num-frames, + // left/right-context, etc. int32 srand_seed = 0; - std::string ivector_rspecifier; + std::string online_ivector_rspecifier; ParseOptions po(usage); po.Register("compress", &compress, "If true, write egs in " - "compressed format (recommended)"); - po.Register("left-context", &left_context, "Number of frames of left " - "context the neural net requires."); - po.Register("right-context", &right_context, "Number of frames of right " - "context the neural net requires."); - po.Register("num-frames", &num_frames, "Number of frames with labels " - "that each example contains. Will be rounded up to a multiple " - "of --frame-subsampling-factor."); - po.Register("num-frames-overlap", &num_frames_overlap, "Number of frames of " - "overlap between each example."); - po.Register("ivectors", &ivector_rspecifier, "Rspecifier of ivector " - "features, as a matrix."); - po.Register("srand", &srand_seed, "Seed for random number generator " - "(only relevant if --pick-random-ivector=true)"); + "compressed format."); + po.Register("ivectors", &online_ivector_rspecifier, "Alias for " + "--online-ivectors option, for back compatibility"); + po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier of " + "ivector features, as a matrix."); + po.Register("online-ivector-period", &online_ivector_period, "Number of " + "frames between iVectors in matrices supplied to the " + "--online-ivectors option"); + po.Register("srand", &srand_seed, "Seed for random number generator "); po.Register("length-tolerance", &length_tolerance, "Tolerance for " "difference in num-frames between feat and ivector matrices"); - po.Register("frame-subsampling-factor", &frame_subsampling_factor, "Used " - "if the frame-rate at the output will be less than the " - "frame-rate of the input"); + eg_config.Register(&po); po.Read(argc, argv); @@ -272,12 +223,6 @@ int main(int argc, char *argv[]) { exit(1); } - if (num_frames <= 0 || left_context < 0 || right_context < 0 || - length_tolerance < 0 || frame_subsampling_factor <= 0) - KALDI_ERR << "One of the integer options is out of the allowed range."; - RoundUpNumFrames(frame_subsampling_factor, - &num_frames, &num_frames_overlap); - std::string normalization_fst_rxfilename, feature_rspecifier, @@ -295,6 +240,9 @@ int main(int argc, char *argv[]) { examples_wspecifier = po.GetArg(4); } + eg_config.ComputeDerived(); + UtteranceSplitter utt_splitter(eg_config); + fst::StdVectorFst normalization_fst; if (!normalization_fst_rxfilename.empty()) { ReadFstKaldi(normalization_fst_rxfilename, &normalization_fst); @@ -305,7 +253,8 @@ int main(int argc, char *argv[]) { chain::RandomAccessSupervisionReader supervision_reader( supervision_rspecifier); NnetChainExampleWriter example_writer(examples_wspecifier); - RandomAccessBaseFloatMatrixReader ivector_reader(ivector_rspecifier); + RandomAccessBaseFloatMatrixReader online_ivector_reader( + online_ivector_rspecifier); int32 num_done = 0, num_err = 0; int64 num_frames_written = 0, num_egs_written = 0; @@ -318,31 +267,32 @@ int main(int argc, char *argv[]) { num_err++; } else { const chain::Supervision &supervision = supervision_reader.Value(key); - const Matrix *ivector_feats = NULL; - if (!ivector_rspecifier.empty()) { - if (!ivector_reader.HasKey(key)) { + const Matrix *online_ivector_feats = NULL; + if (!online_ivector_rspecifier.empty()) { + if (!online_ivector_reader.HasKey(key)) { KALDI_WARN << "No iVectors for utterance " << key; num_err++; continue; } else { // this address will be valid until we call HasKey() or Value() // again. - ivector_feats = &(ivector_reader.Value(key)); + online_ivector_feats = &(online_ivector_reader.Value(key)); } } - if (ivector_feats != NULL && - (abs(feats.NumRows() - ivector_feats->NumRows()) > length_tolerance - || ivector_feats->NumRows() == 0)) { + if (online_ivector_feats != NULL && + (abs(feats.NumRows() - (online_ivector_feats->NumRows() * + online_ivector_period)) > length_tolerance + || online_ivector_feats->NumRows() == 0)) { KALDI_WARN << "Length difference between feats " << feats.NumRows() - << " and iVectors " << ivector_feats->NumRows() - << " exceeds tolerance " << length_tolerance; + << " and iVectors " << online_ivector_feats->NumRows() + << "exceeds tolerance " << length_tolerance; num_err++; continue; } - if (ProcessFile(normalization_fst, feats, ivector_feats, supervision, - key, compress, - left_context, right_context, num_frames, - num_frames_overlap, frame_subsampling_factor, + + if (ProcessFile(normalization_fst, feats, + online_ivector_feats, online_ivector_period, + supervision, key, compress, utt_splitter, &num_frames_written, &num_egs_written, &example_writer)) num_done++; diff --git a/src/nnet3/nnet-chain-example.cc b/src/nnet3/nnet-chain-example.cc index 74e8be80240..0607543b743 100644 --- a/src/nnet3/nnet-chain-example.cc +++ b/src/nnet3/nnet-chain-example.cc @@ -105,7 +105,7 @@ void NnetChainSupervision::Swap(NnetChainSupervision *other) { NnetChainSupervision::NnetChainSupervision( const std::string &name, const chain::Supervision &supervision, - const Vector &deriv_weights, + const VectorBase &deriv_weights, int32 first_frame, int32 frame_skip): name(name), diff --git a/src/nnet3/nnet-chain-example.h b/src/nnet3/nnet-chain-example.h index 323e73da8da..9be298074a4 100644 --- a/src/nnet3/nnet-chain-example.h +++ b/src/nnet3/nnet-chain-example.h @@ -83,7 +83,7 @@ struct NnetChainSupervision { /// is slower than the input, so in this case it might be 2 or 3. NnetChainSupervision(const std::string &name, const chain::Supervision &supervision, - const Vector &deriv_weights, + const VectorBase &deriv_weights, int32 first_frame, int32 frame_skip); diff --git a/src/nnet3/nnet-discriminative-example.cc b/src/nnet3/nnet-discriminative-example.cc index e9a063e268e..5c02998cbcf 100644 --- a/src/nnet3/nnet-discriminative-example.cc +++ b/src/nnet3/nnet-discriminative-example.cc @@ -91,7 +91,7 @@ NnetDiscriminativeSupervision::NnetDiscriminativeSupervision(const NnetDiscrimin NnetDiscriminativeSupervision::NnetDiscriminativeSupervision( const std::string &name, const discriminative::DiscriminativeSupervision &supervision, - const Vector &deriv_weights, + const VectorBase &deriv_weights, int32 first_frame, int32 frame_skip): name(name), @@ -347,7 +347,7 @@ void GetDiscriminativeComputationRequest(const Nnet &nnet, io_spec.name = name; io_spec.indexes = sup.indexes; io_spec.has_deriv = need_model_derivative; - + if (use_xent_regularization) { size_t cur_size = request->outputs.size(); request->outputs.resize(cur_size + 1); diff --git a/src/nnet3/nnet-discriminative-example.h b/src/nnet3/nnet-discriminative-example.h index b2458b0cdcd..bb60f216a82 100644 --- a/src/nnet3/nnet-discriminative-example.h +++ b/src/nnet3/nnet-discriminative-example.h @@ -42,7 +42,7 @@ struct NnetDiscriminativeSupervision { // the name of the output in the neural net; in simple setups it // will just be "output". std::string name; - + // The indexes that the output corresponds to. The size of this vector will // be equal to supervision.num_sequences * supervision.frames_per_sequence. // Be careful about the order of these indexes-- it is a little confusing. @@ -52,7 +52,7 @@ struct NnetDiscriminativeSupervision { // This is done to make the code similar that for the 'chain' model. std::vector indexes; - // The supervision object, containing the numerator and denominator + // The supervision object, containing the numerator and denominator // lattices. discriminative::DiscriminativeSupervision supervision; @@ -68,19 +68,19 @@ struct NnetDiscriminativeSupervision { // so it's equivalent to a vector of all ones. This vector is written // to disk compactly as unsigned char. Vector deriv_weights; - + // Use default assignment operator NnetDiscriminativeSupervision() { } // Initialize the object from an object of type discriminative::Supervision, - // and some extra information. + // and some extra information. // Note: you probably want to set 'name' to "output". // 'first_frame' will often be zero but you can choose (just make it // consistent with how you numbered your inputs), and 'frame_skip' would be 1 // in a vanilla setup, but 3 in the case of 'chain' models NnetDiscriminativeSupervision(const std::string &name, const discriminative::DiscriminativeSupervision &supervision, - const Vector &deriv_weights, + const VectorBase &deriv_weights, int32 first_frame, int32 frame_skip); @@ -89,15 +89,15 @@ struct NnetDiscriminativeSupervision { void Write(std::ostream &os, bool binary) const; void Read(std::istream &is, bool binary); - + void Swap(NnetDiscriminativeSupervision *other); void CheckDim() const; - + bool operator == (const NnetDiscriminativeSupervision &other) const; }; -/// NnetDiscriminativeExample is like NnetExample, but specialized for +/// NnetDiscriminativeExample is like NnetExample, but specialized for /// sequence training. struct NnetDiscriminativeExample { @@ -111,7 +111,7 @@ struct NnetDiscriminativeExample { std::vector outputs; void Write(std::ostream &os, bool binary) const; - + void Read(std::istream &is, bool binary); void Swap(NnetDiscriminativeExample *other); @@ -128,10 +128,10 @@ struct NnetDiscriminativeExample { } }; -/** - Appends the given vector of examples (which must be non-empty) into +/** + Appends the given vector of examples (which must be non-empty) into a single output example. - Intended to be used when forming minibatches for neural net training. If + Intended to be used when forming minibatches for neural net training. If 'compress' it compresses the output features (recommended to save disk space). @@ -149,7 +149,7 @@ void MergeDiscriminativeExamples( void MergeSupervision( const std::vector &inputs, - NnetDiscriminativeSupervision *output); + NnetDiscriminativeSupervision *output); /** Shifts the time-index t of everything in the input of "eg" by adding @@ -179,7 +179,7 @@ void ShiftDiscriminativeExampleTimes(int32 frame_shift, void TruncateDerivWeights(int32 truncate, NnetDiscriminativeExample *eg); -/** This function takes a NnetDiscriminativeExample and produces a +/** This function takes a NnetDiscriminativeExample and produces a ComputationRequest. Assumes you don't want the derivatives w.r.t. the inputs; if you do, you can create the ComputationRequest manually. Assumes that if diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc index 547c70578ab..dc9dedefe43 100644 --- a/src/nnet3/nnet-example-utils.cc +++ b/src/nnet3/nnet-example-utils.cc @@ -21,6 +21,8 @@ #include "nnet3/nnet-example-utils.h" #include "lat/lattice-functions.h" #include "hmm/posterior.h" +#include "util/text-utils.h" +#include namespace kaldi { namespace nnet3 { @@ -282,10 +284,73 @@ void RoundUpNumFrames(int32 frame_subsampling_factor, KALDI_ERR << "--num-frames-overlap=" << (*num_frames_overlap) << " < " << "--num-frames=" << (*num_frames); } +} + +void ExampleGenerationConfig::ComputeDerived() { + if (!SplitStringToIntegers(num_frames_str, ",", false, &num_frames) || + num_frames.empty()) { + KALDI_ERR << "Invalid option (expected comma-separated list of integers): " + << "--num-frames=" << num_frames_str; + } + int32 m = frame_subsampling_factor; + if (m < 1) { + KALDI_ERR << "Invalid value --frame-subsampling-factor=" << m; + } + bool changed = false; + for (size_t i = 0; i < num_frames.size(); i++) { + int32 value = num_frames[i]; + if (value <= 0) { + KALDI_ERR << "Invalid option --num-frames=" << num_frames_str; + } + if (value % m != 0) { + value = m * ((value / m) + 1); + changed = true; + } + num_frames[i] = value; + } + if (changed) { + std::ostringstream rounded_num_frames_str; + for (size_t i = 0; i < num_frames.size(); i++) { + if (i > 0) + rounded_num_frames_str << ','; + rounded_num_frames_str << num_frames[i]; + } + KALDI_LOG << "Rounding up --num-frames=" << num_frames_str + << " to multiples of --frame-subsampling-factor=" << m + << ", to: " << rounded_num_frames_str; + } } +UtteranceSplitter::UtteranceSplitter(const ExampleGenerationConfig &config): + config_(config) { + if (config.num_frames.empty()) { + KALDI_ERR << "You need to call ComputeDerived() on the " + "ExampleGenerationConfig()."; + } + InitSplitForLength(); +} + +float UtteranceSplitter::DefaultDurationOfSplit( + const std::vector &split) const { + if (split.empty()) // not a valid split, but useful to handle this case. + return 0.0; + float principal_num_frames = config_.num_frames[0], + num_frames_overlap = config_.num_frames_overlap; + KALDI_ASSERT(num_frames_overlap < principal_num_frames && + "--num-frames-overlap value is too high"); + float overlap_proportion = num_frames_overlap / principal_num_frames; + float ans = std::accumulate(split.begin(), split.end(), int32(0)); + for (size_t i = 0; i + 1 < split.size(); i++) { + float min_adjacent_chunk_length = std::min(split[i], split[i + 1]), + overlap = overlap_proportion * min_adjacent_chunk_length; + ans -= overlap; + } + KALDI_ASSERT(ans > 0.0); + return ans; +} + /* This comment describes the idea behind what InitChunkSize() is supposed to do, and how it relates to the purpose of class UtteranceSplitter. @@ -293,29 +358,31 @@ void RoundUpNumFrames(int32 frame_subsampling_factor, Class UtteranceSplitter is supposed to tell us, for a given utterance length, what chunk sizes to use. The chunk sizes it may choose are: - zero or more chunks of the 'principal' size (the first-listed value in - num-frames) - - at most two chunks of 'alternative' num-frames (any but the first-listed - num-frames). - - (and an empty list of chunks is not allowed as a split). A split is - effectively a multiset of chunk-sizes (the order will be randomized by the - caller). We represent it in code as a list of chunk-sizes, represented as a - std::vector, which is sorted to get a unique representation without repeats of - different orderings. - - The choice of spilt is determined by a cost-function that depends on the sum - of the chunk-sizes in the split and the length of the utterance: the idea is - that we want the sum of chunk-sizes in the split to be as close as possible to - the utterance length. The cost-function penalizes the sum of chunk-sizes - being smaller than the utterance-length (leading to gaps) twice as much as - when the sum of chunk-sizes is larger than the utterance length. I.e. - cost(chunk_size_sum, utt_length) = (chunk_size_sum > utt_length ? - chunk_size_sum - utt_length : - 2 * (utt_length - chunk_size_sum)) + --num-frames option) + - at most two chunks of 'alternative' num-frames (meaning, any but the + first-listed choice in the --num-frames option). + + (note: an empty list of chunks is not allowed as a split). A split is + a list of chunk-sizes in increasing order (we when we actually split the + utterance into chunks, we may, at random, reverse the order. + + The choice of split to use for a given utterance-length is determined as + follows. Firstly, for each split we compute a 'default duration' (see + DefaultDurationOfSplit()... if --num-frames-overlap is zero, this is just the + sum of the chunk sizes). We then use by a cost-function that depends on + default-duration and the length of the utterance: the idea is that these two + should be as close as possible, but penalizing the default-duration being + larger than the utterance-length (which in the normal case of + --num-frames-overlap=0 would lead to gaps between the segments), twice as much + as the other sign of difference. + + Specifically: + cost(default_duration, utt_length) = (default_duration > utt_length ? + default_duration - utt_length : + 2.0 * (utt_length - default_duration)) [but as a special case, set c to infinity if the largest chunk size in the - split is longer than the utterance length; we couldn't, in that case, use - this split for this utterance]. - + split is longer than the utterance length; we couldn't, in that case, use + this split for this utterance]. We want to make sure a good variety of combinations of chunk sizes are chosen in case there are ties from the cost function. For each utterance length @@ -324,11 +391,11 @@ void RoundUpNumFrames(int32 frame_subsampling_factor, chunks for a particular utterance of that length, we will choose randomly from that pool of splits. */ -void UtteranceSplitter::InitChunkSize() { +void UtteranceSplitter::InitSplitForLength() { int32 max_utterance_length = MaxUtteranceLength(); // The 'splits' vector is a list of possible splits (a split being - // a multiset of chunk-sizes, represented as a sorted vector). + // a sorted vector of chunk-sizes). // The vector 'splits' is itself sorted. std::vector > splits; InitSplits(&splits); @@ -338,9 +405,9 @@ void UtteranceSplitter::InitChunkSize() { // vector, and let a cost c >= 0 represent the mismatch between an // utterance length and the total length of the chunk sizes in a split: - // c(chunk_size_sum, utt_length) = (chunk_size_sum > utt_length ? - // chunk_size_sum - utt_length : - // 2 * (utt_length - chunk_size_sum)) + // c(default_duration, utt_length) = (default_duration > utt_length ? + // default_duration - utt_length : + // 2.0 * (utt_length - default_duration)) // [but as a special case, set c to infinity if the largest chunk size in the // split is longer than the utterance length; we couldn't, in that case, use // this split for this utterance]. @@ -348,52 +415,51 @@ void UtteranceSplitter::InitChunkSize() { // 'costs_for_length[u][s]', indexed by utterance-length u and then split, // contains the cost for utterance-length u and split s. - std::vector > costs_for_length( + std::vector > costs_for_length( max_utterance_length + 1); int32 num_splits = splits.size(); - for (int32 u = 0; u <= max_utterance_length; u++) - pairs_for_length[u].reserve(num_splits); + costs_for_length[u].reserve(num_splits); for (int32 s = 0; s < num_splits; s++) { const std::vector &split = splits[s]; - int32 chunk_size_sum = std::accumulate(split.begin(), split.end(), - int32(0)), - max_chunk_size = *std::max_element(split.begin(), split.end()); + float default_duration = DefaultDurationOfSplit(split); + int32 max_chunk_size = *std::max_element(split.begin(), split.end()); for (int32 u = 0; u <= max_utterance_length; u++) { // c is the cost for this utterance length and this split. We penalize // gaps twice as strongly as overlaps, based on the intuition that // completely throwing out frames of data is worse than counting them - // twice. It might be possible to come up with some kind of mathematical - // justification for this based on variance of the estimated gradient. - int32 c = (chunk_size_sum > u ? chunk_size_sum - u : - 2 * (u - chunk_size_sum)); - if (max_chunk_size > u) - c = std::numeric_limits::max(); - pairs_for_length[u].push_back(c); + // twice. + int32 c = (default_duration > float(u) ? default_duration - u : + 2 * (u - default_duration)); + if (u < max_chunk_size) + c = std::numeric_limits::max(); + costs_for_length[u].push_back(c); } } splits_for_length_.resize(max_utterance_length + 1); - for (int32 u = 0; u <= max_utterance_length; u++) { - const std::vector &costs = costs_for_length[u]; - int32 min_cost = std::min_element(costs.begin(), costs.end()); - if (min_cost == std::numeric_limits::max()) { + const std::vector &costs = costs_for_length[u]; + float min_cost = *std::min_element(costs.begin(), costs.end()); + if (min_cost == std::numeric_limits::max()) { // All costs were infinity, becaues this utterance-length u is shorter // than the smallest chunk-size. Leave splits_for_length_[u] as empty // for this utterance-length, meaning we will not be able to choose any // split, and such utterances will be discarded. continue; } - int32 cost_threshold = 2; // We will choose pseudo-randomly from splits - // that are within this distance from the best - // cost. + float cost_threshold = 1.9999; // We will choose pseudo-randomly from splits + // that are within this distance from the + // best cost. Make the threshold just + // slightly less than 2... this will + // hopefully make the behavior more + // deterministic for ties. std::vector possible_splits; - std::vector::const_iterator iter = costs.begin(), end = costs.end(); + std::vector::const_iterator iter = costs.begin(), end = costs.end(); int32 s = 0; for (; iter != end; ++iter,++s) if (*iter < min_cost + cost_threshold) @@ -429,19 +495,45 @@ void UtteranceSplitter::InitChunkSize() { } -void GetChunkSizesForUtterance(int32 utterance_length, - std::vector *chunk_sizes) const { - KALDI_ASSERT(!splits_for_length.empty()); +bool UtteranceSplitter::LengthsMatch(const std::string &utt, + int32 utterance_length, + int32 supervision_length) const { + int32 sf = config_.frame_subsampling_factor, + expected_supervision_length = (utterance_length + sf - 1) / sf; + if (supervision_length == expected_supervision_length) { + return true; + } else { + if (sf == 1) { + KALDI_WARN << "Supervision does not have expected length for utterance " + << utt << ": expected length = " << utterance_length + << ", got " << supervision_length; + } else { + KALDI_WARN << "Supervision does not have expected length for utterance " + << utt << ": expected length = (" << utterance_length + << " + " << sf << " - 1) / " << sf << " = " + << expected_supervision_length + << ", got: " << supervision_length + << " (note: --frame-subsampling-factor=" << sf << ")"; + } + return false; + } +} + + +void UtteranceSplitter::GetChunkSizesForUtterance( + int32 utterance_length, std::vector *chunk_sizes) const { + KALDI_ASSERT(!splits_for_length_.empty()); // 'primary_length' is the first-specified num-frames. // It's the only chunk that may be repeated an arbitrary number // of times. int32 primary_length = config_.num_frames[0], + num_frames_overlap = config_.num_frames_overlap, max_tabulated_length = splits_for_length_.size() - 1, num_primary_length_repeats = 0; - + KALDI_ASSERT(primary_length - num_frames_overlap > 0); KALDI_ASSERT(utterance_length >= 0); while (utterance_length > max_tabulated_length) { - utterance_length -= primary_length; + utterance_length -= (primary_length - num_frames_overlap); num_primary_length_repeats++; } KALDI_ASSERT(utterance_length >= 0); @@ -452,9 +544,11 @@ void GetChunkSizesForUtterance(int32 utterance_length, *chunk_sizes = possible_splits[randomly_chosen_split]; for (int32 i = 0; i < num_primary_length_repeats; i++) chunk_sizes->push_back(primary_length); - // Randomize the order in which the chunks appear. - std::random_shuffle(chunk_sizes->begin(), - chunk_sizes->end()); + + std::sort(chunk_sizes->begin(), chunk_sizes->end()); + if (RandInt(0, 1) == 0) { + std::reverse(chunk_sizes->begin(), chunk_sizes->end()); + } } @@ -474,14 +568,15 @@ int32 UtteranceSplitter::MaxUtteranceLength() const { } void UtteranceSplitter::InitSplits(std::vector > *splits) const { - // we consider splits whose total length is up to MaxUtteranceLength() + - // primary_length. We can be confident without doing a lot of math, that - // multisets above this length will never be chosen for any utterance-length - // up to MaxUtteranceLength(). + // we consider splits whose default duration (as returned by + // DefaultDurationOfSplit()) is up to MaxUtteranceLength() + primary_length. + // We can be confident without doing a lot of math, that splits above this + // length will never be chosen for any utterance-length up to + // MaxUtteranceLength() (which is the maximum we use). int32 primary_length = config_.num_frames[0], - length_ceiling = MaxUtteranceLength() + primary_length; + default_duration_ceiling = MaxUtteranceLength() + primary_length; - typedef std::unordered_set, VectorHasher > SetType; + typedef unordered_set, VectorHasher > SetType; SetType splits_set; @@ -490,24 +585,23 @@ void UtteranceSplitter::InitSplits(std::vector > *splits) con // The splits we are allow are: zero to two 'alternate' lengths, plus // an arbitrary number of repeats of the 'primary' length. The repeats // of the 'primary' length are handled by the inner loop over n. - // The zero two two 'alternate' lengths are handled by the loops over + // The zero to two 'alternate' lengths are handled by the loops over // i and j. i == 0 and j == 0 are special cases; they mean, no // alternate is chosen. for (int32 i = 0; i < num_lengths; i++) { - for (int32 j = 0; j < num_length; j++) { + for (int32 j = 0; j < num_lengths; j++) { std::vector vec; if (i > 0) vec.push_back(config_.num_frames[i]); if (j > 0) vec.push_back(config_.num_frames[j]); - for (int32 n = 0; - std::accumulate(vec.begin(), vec.end(), int32(0)) <= length_ceiling; - ++n, vec.push_back(primary_length)) { - std::sort(vec.begin(), vec.end()); // we don't want to treat different - // orderings of the same values as - // different, so sort them. + int32 n = 0; + while (DefaultDurationOfSplit(vec) <= default_duration_ceiling) { if (!vec.empty()) // Don't allow the empty vector as a split. splits_set.insert(vec); + n++; + vec.push_back(primary_length); + std::sort(vec.begin(), vec.end()); } } } @@ -521,11 +615,11 @@ void UtteranceSplitter::InitSplits(std::vector > *splits) con // static -void UtteranceSplitter::DistributeRandomly(int32 n, std::vector *vec) { +void UtteranceSplitter::DistributeRandomlyUniform(int32 n, std::vector *vec) { KALDI_ASSERT(!vec->empty()); int32 size = vec->size(); if (n < 0) { - DistributeRandomly(n, vec); + DistributeRandomlyUniform(-n, vec); for (int32 i = 0; i < size; i++) (*vec)[i] *= -1; return; @@ -544,6 +638,48 @@ void UtteranceSplitter::DistributeRandomly(int32 n, std::vector *vec) { } +// static +void UtteranceSplitter::DistributeRandomly(int32 n, + const std::vector &magnitudes, + std::vector *vec) { + KALDI_ASSERT(!vec->empty() && vec->size() == magnitudes.size()); + int32 size = vec->size(); + if (n < 0) { + DistributeRandomly(-n, magnitudes, vec); + for (int32 i = 0; i < size; i++) + (*vec)[i] *= -1; + return; + } + float total_magnitude = std::accumulate(magnitudes.begin(), magnitudes.end(), + int32(0)); + KALDI_ASSERT(total_magnitude > 0); + // note: 'partial_counts' contains the negative of the partial counts, so + // when we sort the larger partial counts come first. + std::vector > partial_counts; + int32 total_count = 0; + for (int32 i = 0; i < size; i++) { + float this_count = float(n) / total_magnitude; + // note: cast of float to int32 rounds towards zero (down, in this + // case, since this_count >= 0). + int32 this_whole_count = static_cast(this_count), + this_partial_count = this_count - this_whole_count; + (*vec)[i] = this_whole_count; + total_count += this_whole_count; + partial_counts.push_back(std::pair(-this_partial_count, i)); + } + KALDI_ASSERT(total_count <= n && total_count + size >= n); + std::sort(partial_counts.begin(), partial_counts.end()); + int32 i = 0; + // Increment by one the elements of the vector that has the largest partial + // count, then the next largest partial count, and so on... until we reach the + // desired total-count 'n'. + for(; total_count < n; i++,total_count++) { + (*vec)[partial_counts[i].second]++; + } + KALDI_ASSERT(std::accumulate(vec->begin(), vec->end(), int32(0)) == n); +} + + void UtteranceSplitter::GetGapSizes(int32 utterance_length, bool enforce_subsampling_factor, const std::vector &chunk_sizes, @@ -552,7 +688,7 @@ void UtteranceSplitter::GetGapSizes(int32 utterance_length, gap_sizes->clear(); return; } - if (enforce_subsamping_factor && config_.frame_subsampling_factor > 1) { + if (enforce_subsampling_factor && config_.frame_subsampling_factor > 1) { int32 sf = config_.frame_subsampling_factor, size = chunk_sizes.size(); int32 utterance_length_reduced = (utterance_length + (sf - 1)) / sf; std::vector chunk_sizes_reduced(chunk_sizes); @@ -576,7 +712,9 @@ void UtteranceSplitter::GetGapSizes(int32 utterance_length, if (total_gap < 0) { // there is an overlap. Overlaps can only go between chunks, not at the - // beginning or end of the utterance. + // beginning or end of the utterance. Also, we try to make the length of + // overlap proportional to the size of the smaller of the two chunks + // that the overlap is between. if (num_chunks == 1) { // there needs to be an overlap, but there is only one chunk... this means // the chunk-size exceeds the utterance length, which is not allowed. @@ -586,16 +724,32 @@ void UtteranceSplitter::GetGapSizes(int32 utterance_length, } // note the elements of 'overlaps' will be <= 0. - std::vector overlaps(num_chunks - 1); - DistributeRandomly(total_gap, &num_overlap_locations); + std::vector magnitudes(num_chunks - 1), + overlaps(num_chunks - 1); + // the 'magnitudes' vector will contain the minimum of the lengths of the + // two adjacent chunks between which are are going to consider having an + // overlap. These will be used to assign the overlap proportional to that + // size. + for (int32 i = 0; i + 1 < num_chunks; i++) { + magnitudes[i] = std::min(chunk_sizes[i], chunk_sizes[i + 1]); + } + DistributeRandomly(total_gap, magnitudes, &overlaps); + for (int32 i = 0; i + 1 < num_chunks; i++) { + // If the following condition does not hold, it's possible we + // could get chunk start-times less than zero. I don't believe + // it's possible for this condition to fail, but we're checking + // for it at this level to make debugging easier, just in case. + KALDI_ASSERT(overlaps[i] <= magnitudes[i]); + } + (*gap_sizes)[0] = 0; // no gap before 1st chunk. for (int32 i = 1; i < num_chunks; i++) (*gap_sizes)[i] = overlaps[i-1]; } else { // There may be a gap. Gaps can go at the start or end of the utterance, or - // between segments. + // between segments. We try to distribute the gaps evenly. std::vector gaps(num_chunks + 1); - DistributeRandomly(total_gap, &gaps); + DistributeRandomlyUniform(total_gap, &gaps); // the last element of 'gaps', the one at the end of the utterance, is // implicit and doesn't have to be written to the output. for (int32 i = 0; i < num_chunks; i++) @@ -610,7 +764,7 @@ void UtteranceSplitter::GetChunksForUtterance( std::vector chunk_sizes; GetChunkSizesForUtterance(utterance_length, &chunk_sizes); std::vector gaps(chunk_sizes.size()); - GetGapSizes(utterance_length, true, chunk_sizes, &gap_sizes); + GetGapSizes(utterance_length, true, chunk_sizes, &gaps); int32 num_chunks = chunk_sizes.size(); chunk_info->resize(num_chunks); int32 t = 0; @@ -622,7 +776,7 @@ void UtteranceSplitter::GetChunksForUtterance( info.left_context = (i == 0 && config_.left_context_initial >= 0 ? config_.left_context_initial : config_.left_context); info.right_context = (i == 0 && config_.right_context_final >= 0 ? - config_.right_context_final : config_.right_context); + config_.right_context_final : config_.right_context); t += chunk_sizes[i]; } // check that the end of the last chunk doesn't go more than @@ -631,5 +785,35 @@ void UtteranceSplitter::GetChunksForUtterance( KALDI_ASSERT(t - utterance_length < config_.frame_subsampling_factor); } +void UtteranceSplitter::SetOutputWeights( + int32 utterance_length, + std::vector *chunk_info) const { + int32 sf = config_.frame_subsampling_factor; + int32 num_output_frames = (utterance_length + sf - 1) / sf; + // num_output_frames is the number of frames of supervision. 'count[t]' will + // be the number of chunks that this output-frame t appears in. Note: the + // 'first_frame' and 'num_frames' members of ChunkTimeInfo will always be + // multiples of frame_subsampling_factor. + std::vector count(num_output_frames, 0); + int32 num_chunks = chunk_info->size(); + for (int32 i = 0; i < num_chunks; i++) { + ChunkTimeInfo &chunk = (*chunk_info)[i]; + for (int32 t = chunk.first_frame / sf; + t < (chunk.first_frame + chunk.num_frames) / sf; + t++) + count[t]++; + } + for (int32 i = 0; i < num_chunks; i++) { + ChunkTimeInfo &chunk = (*chunk_info)[i]; + chunk.output_weights.resize(chunk.num_frames / sf); + int32 t_start = chunk.first_frame / sf; + for (int32 t = t_start; + t < (chunk.first_frame + chunk.num_frames) / sf; + t++) + chunk.output_weights[t - t_start] = 1.0 / count[t]; + } +} + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h index d02aa336a10..754743d581e 100644 --- a/src/nnet3/nnet-example-utils.h +++ b/src/nnet3/nnet-example-utils.h @@ -78,12 +78,13 @@ void ReadVectorAsChar(std::istream &is, // Warning: after reading in the values from the command line // (Register() and then then po.Read()), you should then call ComputeDerived() // to set up the 'derived values' (parses 'num_frames_str'). -struct ExampleExtractionConfig { +struct ExampleGenerationConfig { int32 left_context; int32 right_context; int32 left_context_initial; int32 right_context_final; int32 num_frames_overlap; + int32 frame_subsampling_factor; std::string num_frames_str; @@ -95,16 +96,14 @@ struct ExampleExtractionConfig { // frames, to be used at most once or twice per file. std::vector num_frames; - ExampleExtractionConfig(): + ExampleGenerationConfig(): left_context(0), right_context(0), - left_context_initial(-1), right_context_initial(-1), - num_frames_overlap(0), + left_context_initial(-1), right_context_final(-1), + num_frames_overlap(0), frame_subsampling_factor(1), num_frames_str("1") { } - /// This function decodes 'num_frames_str' into 'num_frames' and 'num_frames_alternatives', - /// and ensures that 'num_frames', and the members of num_frames_alternatives' are - /// multiples of 'frame_subsampling_factor'. - /// + /// This function decodes 'num_frames_str' into 'num_frames', and ensures that + /// the members of 'num_frames' are multiples of 'frame_subsampling_factor'. void ComputeDerived(); void Register(OptionsItf *po) { @@ -135,19 +134,22 @@ struct ExampleExtractionConfig { "deal with odd-sized inputs we may also generate egs with these " "other sizes. All these values will be rounded up to the " "closest multiple of --frame-subsampling-factor."); - po.Register("num-frames-overlap", &num_frames_overlap, "Number of frames of " - "overlap between adjacent examples (advisory, will not be " - "exactly enforced)"); - po.Register("frame-subsampling-factor", &frame_subsampling_factor, "Used " - "if the frame-rate of the output labels in the generated " - "examples will be less than the frame-rate at the input"); + po->Register("num-frames-overlap", &num_frames_overlap, "Number of frames of " + "overlap between adjacent eamples (applies to chunks of size " + "equal to the primary [first-listed] --num-frames value... " + "will be adjusted for different-sized chunks). Advisory; " + "will not be exactly enforced."); + po->Register("frame-subsampling-factor", &frame_subsampling_factor, "Used " + "if the frame-rate of the output labels in the generated " + "examples will be less than the frame-rate at the input"); } }; /** - struct ChunkTimeInfo is used by class Utterane + struct ChunkTimeInfo is used by class UtteranceSplitter to output + information about how we split an utterance into chunks. */ struct ChunkTimeInfo { @@ -155,26 +157,54 @@ struct ChunkTimeInfo { int32 num_frames; int32 left_context; int32 right_context; + // The 'output_weights' member is a vector of length equal to the + // num_frames divided by frame_subsampling_factor from the config. + // It contains values 0 < x <= 1 that represent weightings of + // output-frames. The idea is that if (because of overlaps) a + // frame appears in multiple chunks, we want to downweight it + // so that the total weight remains 1. (Of course, the calling + // code is free to ignore these weights if desired). + std::vector output_weights; }; class UtteranceSplitter { + public: - UtteranceSplitter(const ExampleExtractionConfig &config); + UtteranceSplitter(const ExampleGenerationConfig &config); - // Given an utterance length, this function creates for you a set of - // chunks into which to split the utterance. Note: this is partly - // random (will call srand()). + const ExampleGenerationConfig& Config() const { return config_; } + + // Given an utterance length, this function creates for you a list of chunks + // into which to split the utterance. Note: this is partly random (will call + // srand()). void GetChunksForUtterance(int32 utterance_length, std::vector *chunk_info) const; + // This function returns true if 'supervision_length' (e.g. the length of the + // posterior, lattice or alignment) is what we expect given + // config_.frame_subsampling_factor. If not, it prints a warning (which is + // why the function needs 'utt', and returns false. Note: we round up, so + // writing config_.frame_subsampling_factor as sf, we expect + // supervision_length = (utterance_length + sf - 1) / sf. + bool LengthsMatch(const std::string &utt, + int32 utterance_length, + int32 supervision_length) const; + + private: void InitSplitForLength(); + // This function returns the 'default duration' in frames of a split, which if + // config_.num_frames_overlap is zero is just the sum of chunk sizes in the + // split (i.e. the sum of the vector's elements), but otherwise, we subtract + // the recommended overlap (see code for details). + float DefaultDurationOfSplit(const std::vector &split) const; + // Used in InitSplitForLength(), returns the maximum utterance-length considered // separately in split_for_length_. [above this, we'll assume that the additional @@ -220,17 +250,30 @@ class UtteranceSplitter { std::vector *gap_sizes) const; - // this static function, used in GetGapSizes(), writes values to - // a vector 'vec' such the sum of those values equals n. It - // tries to make those values as similar as possible (they will - // differ by at most one), and the location of the larger versus - // smaller values is random. n may be negative. 'vec' must be - // nonempty. + // this static function, used in GetGapSizes(), writes random values to a + // vector 'vec' such the sum of those values equals n (n may be positive or + // negative). It tries to make those values as similar as possible (they will + // differ by at most one), and the location of the larger versus smaller + // values is random. 'vec' must be nonempty. + static void DistributeRandomlyUniform(int32 n, + std::vector *vec); + + // this static function, used in GetGapSizes(), writes values to a vector + // 'vec' such the sum of those values equals n (n may be positive or + // negative). It tries to make those values, as exactly as it can, + // proportional to the values in 'magnitudes', which must be positive. 'vec' + // must be nonempty, and 'magnitudes' must be the same size as 'vec'. static void DistributeRandomly(int32 n, + const std::vector &magnitudes, std::vector *vec); + // This function is responsible for setting the 'output_weights' + // members of the chunks. + void SetOutputWeights(int32 utterance_lengths, + std::vector *chunk_info) const; + - const ExampleExtractionConfig &config_; + const ExampleGenerationConfig &config_; // The vector 'split_for_length_' is indexed by the num-frames of a file, and // gives us a list of alternative splits that we can use if the utternace has @@ -246,34 +289,16 @@ class UtteranceSplitter { // If an utterance's num-frames is >= split_for_length.size(), the way to find // the split to use is to keep subtracting the primary num-frames (== - // config_.num_frames[0]) from the utterance length until the resulting - // num-frames is < split_for_length_.size(), chunks, and then add the subtracted - // number of copies of the primary num-frames. + // config_.num_frames[0]) minus the num-frames-overlap, from the utterance + // length, until the resulting num-frames is < split_for_length_.size(), + // chunks, and then add the subtracted number of copies of the primary + // num-frames to the split. std::vector > > splits_for_length_; }; -void ComputeExampleTimeInfo(const ExampleExtractionConfig &config, - int32 num_frames_in_utt, - - SplitIntoRanges(int32 num_frames, - int32 frames_per_range, - std::vector *range_starts); - - - - - -// This function rounds up the quantities 'num_frames' and 'num_frames_overlap' -// to the nearest multiple of the frame_subsampling_factor -void RoundUpNumFrames(int32 frame_subsampling_factor, - int32 *num_frames, - int32 *num_frames_overlap); - - - } // namespace nnet3 diff --git a/src/nnet3bin/nnet3-discriminative-get-egs.cc b/src/nnet3bin/nnet3-discriminative-get-egs.cc index 786ed609a33..6055dc3d20c 100644 --- a/src/nnet3bin/nnet3-discriminative-get-egs.cc +++ b/src/nnet3bin/nnet3-discriminative-get-egs.cc @@ -32,155 +32,105 @@ namespace kaldi { namespace nnet3 { -/** - This function does all the processing for one utterance, and outputs the - supervision objects to 'example_writer'. -*/ - -static bool ProcessFile( - const discriminative::SplitDiscriminativeSupervisionOptions &config, +// This function does all the processing for one utterance, and outputs the +// examples to 'example_writer'. +static bool ProcessFile(const discriminative::SplitDiscriminativeSupervisionOptions &config, const TransitionModel &tmodel, const MatrixBase &feats, const MatrixBase *ivector_feats, + int32 ivector_period, const discriminative::DiscriminativeSupervision &supervision, const std::string &utt_id, bool compress, - int32 left_context, - int32 right_context, - int32 frames_per_eg, - int32 frames_overlap_per_eg, - int32 frame_subsampling_factor, + const UtteranceSplitter &utt_splitter, int64 *num_frames_written, int64 *num_egs_written, NnetDiscriminativeExampleWriter *example_writer) { KALDI_ASSERT(supervision.num_sequences == 1); - int32 num_feature_frames = feats.NumRows(), - num_output_frames = supervision.frames_per_sequence, - num_feature_frames_subsampled = - (num_feature_frames + frame_subsampling_factor - 1)/ - frame_subsampling_factor; - if (num_output_frames != num_feature_frames_subsampled) - KALDI_ERR << "Mismatch in num-frames: discriminative supervision has " - << num_output_frames - << " versus features/frame_subsampling_factor = " - << num_feature_frames << " / " << frame_subsampling_factor - << ": check that --frame-subsampling-factor option is set " - << "the same as to discriminative-get-supervision."; - - KALDI_ASSERT(frames_per_eg % frame_subsampling_factor == 0); - - int32 frames_per_eg_subsampled = frames_per_eg / frame_subsampling_factor, - frames_overlap_subsampled = frames_overlap_per_eg / frame_subsampling_factor, - frames_shift_subsampled = frames_per_eg_subsampled - frames_overlap_subsampled; - - if (frames_per_eg != -1 && num_feature_frames_subsampled < frames_per_eg_subsampled) { - KALDI_WARN << "No output for utterance " << utt_id - << " (num-frames=" << num_feature_frames - << ") because too short for --frames-per-eg=" - << frames_per_eg; - return false; - } + int32 num_input_frames = feats.NumRows(), + num_output_frames = supervision.frames_per_sequence; - // we don't do any padding, as it would be a bit tricky to pad the discriminative training supervision. - // Instead we select ranges of frames that fully fit within the file; these - // might slightly overlap with each other or have gaps. - std::vector range_starts_subsampled; - if (frames_per_eg != -1) { - chain::SplitIntoRanges(num_feature_frames_subsampled - - frames_overlap_subsampled, - frames_shift_subsampled, - &range_starts_subsampled); - } else { - range_starts_subsampled.push_back(0); - } - // The 'deriv_weights' make sure we don't count frames twice, and also ensure - // that we tend to avoid having nonzero weights on the derivatives that are - // too close to the edge of the corresponding 'range' (these derivatives close - // to the edge are not as accurate as they could be, because when we split we - // don't know the correct alphas and betas). - std::vector > deriv_weights; - if (frames_per_eg != -1) { - chain::GetWeightsForRanges(frames_per_eg_subsampled, - range_starts_subsampled, - &deriv_weights); - - if (range_starts_subsampled.empty()) { - KALDI_WARN << "No output for utterance " << utt_id - << " (num-frames=" << num_feature_frames - << ") because too short for --frames-per-eg=" - << frames_per_eg; - return false; - } - } else { - deriv_weights.push_back(Vector()); + if (!utt_splitter.LengthsMatch(utt_id, num_input_frames, num_output_frames)) + return false; // LengthsMatch() will have printed a warning. + + std::vector chunks; + + if (chunks.empty()) { + KALDI_WARN << "Not producing egs for utterance " << utt_id + << " because it is too short: " + << num_input_frames << " frames."; } - discriminative::DiscriminativeSupervisionSplitter splitter(config, tmodel, + int32 frame_subsampling_factor = utt_splitter.Config().frame_subsampling_factor; + + utt_splitter.GetChunksForUtterance(num_input_frames, &chunks); + + discriminative::DiscriminativeSupervisionSplitter splitter(config, tmodel, supervision); - for (size_t i = 0; i < range_starts_subsampled.size(); i++) { + for (size_t c = 0; c < chunks.size(); c++) { + ChunkTimeInfo &chunk = chunks[c]; NnetDiscriminativeExample nnet_discriminative_eg; nnet_discriminative_eg.outputs.resize(1); - int32 range_start_subsampled = range_starts_subsampled[i], - range_start = range_start_subsampled * frame_subsampling_factor; - - if (frames_per_eg != -1) { - - discriminative::DiscriminativeSupervision supervision_part; - - splitter.GetFrameRange(range_start_subsampled, - frames_per_eg_subsampled, - (i == 0 ? false : true), - &supervision_part); - - int32 first_frame = 0; // we shift the time-indexes of all these parts so - // that the supervised part starts from frame 0. - NnetDiscriminativeSupervision nnet_supervision("output", supervision_part, - deriv_weights[i], - first_frame, - frame_subsampling_factor); - nnet_discriminative_eg.outputs[0].Swap(&nnet_supervision); - } else { - int32 first_frame = 0; // we shift the time-indexes of all these parts so - // that the supervised part starts from frame 0. - NnetDiscriminativeSupervision nnet_supervision("output", supervision, - deriv_weights[i], - first_frame, - frame_subsampling_factor); - nnet_discriminative_eg.outputs[0].Swap(&nnet_supervision); - } + + int32 start_frame_subsampled = chunk.first_frame / frame_subsampling_factor, + num_frames_subsampled = chunk.num_frames / frame_subsampling_factor; + + discriminative::DiscriminativeSupervision supervision_part; + + splitter.GetFrameRange(start_frame_subsampled, + num_frames_subsampled, + (c == 0 ? false : true), + &supervision_part); + + SubVector output_weights( + &(chunk.output_weights[0]), + static_cast(chunk.output_weights.size())); + + int32 first_frame = 0; // we shift the time-indexes of all these parts so + // that the supervised part starts from frame 0. + NnetDiscriminativeSupervision nnet_supervision("output", supervision_part, + output_weights, + first_frame, + frame_subsampling_factor); + nnet_discriminative_eg.outputs[0].Swap(&nnet_supervision); nnet_discriminative_eg.inputs.resize(ivector_feats != NULL ? 2 : 1); - int32 this_frames_per_eg = frames_per_eg != -1 ? frames_per_eg : supervision.frames_per_sequence; - int32 tot_frames = left_context + this_frames_per_eg + right_context; - Matrix input_frames(tot_frames, feats.NumCols(), kUndefined); + int32 tot_input_frames = chunk.left_context + chunk.num_frames + + chunk.right_context; + + Matrix input_frames(tot_input_frames, feats.NumCols(), + kUndefined); - // Set up "input_frames". - for (int32 j = -left_context; j < this_frames_per_eg + right_context; j++) { - int32 t = range_start + j; - if (t < 0) t = 0; - if (t >= feats.NumRows()) t = feats.NumRows() - 1; - SubVector src(feats, t), - dest(input_frames, j + left_context); + int32 start_frame = chunk.first_frame - chunk.left_context; + for (int32 t = start_frame; t < start_frame + tot_input_frames; t++) { + int32 t2 = t; + if (t2 < 0) t2 = 0; + if (t2 >= num_input_frames) t2 = num_input_frames - 1; + int32 j = t - start_frame; + SubVector src(feats, t2), + dest(input_frames, j); dest.CopyFromVec(src); } - NnetIo input_io("input", - left_context, - input_frames); + + NnetIo input_io("input", -chunk.left_context, input_frames); nnet_discriminative_eg.inputs[0].Swap(&input_io); if (ivector_feats != NULL) { // if applicable, add the iVector feature. - // try to get closest frame to middle of window to get - // a representative iVector. - int32 closest_frame = range_start + this_frames_per_eg / 2; - KALDI_ASSERT(ivector_feats->NumRows() > 0); - if (closest_frame >= ivector_feats->NumRows()) - closest_frame = ivector_feats->NumRows() - 1; + // choose iVector from a random frame in the chunk + int32 ivector_frame = RandInt(start_frame, + start_frame + num_input_frames - 1), + ivector_frame_subsampled = ivector_frame / ivector_period; + if (ivector_frame_subsampled < 0) + ivector_frame_subsampled = 0; + if (ivector_frame_subsampled >= ivector_feats->NumRows()) + ivector_frame_subsampled = ivector_feats->NumRows() - 1; Matrix ivector(1, ivector_feats->NumCols()); - ivector.Row(0).CopyFromVec(ivector_feats->Row(closest_frame)); + ivector.Row(0).CopyFromVec(ivector_feats->Row(ivector_frame_subsampled)); NnetIo ivector_io("ivector", 0, ivector); nnet_discriminative_eg.inputs[1].Swap(&ivector_io); } @@ -189,11 +139,11 @@ static bool ProcessFile( nnet_discriminative_eg.Compress(); std::ostringstream os; - os << utt_id << "-" << range_start; + os << utt_id << "-" << chunk.first_frame; std::string key = os.str(); // key is - - *num_frames_written += this_frames_per_eg; + *num_frames_written += chunk.num_frames; *num_egs_written += 1; example_writer->Write(key, nnet_discriminative_eg); @@ -228,35 +178,28 @@ int main(int argc, char *argv[]) { "discriminative-get-supervision.\n"; bool compress = true; - int32 left_context = 0, right_context = 0, num_frames = 1, - num_frames_overlap = 0, length_tolerance = 100, - frame_subsampling_factor = 1; + int32 length_tolerance = 100, online_ivector_period = 1; + + std::string online_ivector_rspecifier; - std::string ivector_rspecifier; + ExampleGenerationConfig eg_config; // controls num-frames, + // left/right-context, etc. discriminative::SplitDiscriminativeSupervisionOptions splitter_config; ParseOptions po(usage); po.Register("compress", &compress, "If true, write egs in " "compressed format (recommended)"); - po.Register("left-context", &left_context, "Number of frames of left " - "context the neural net requires."); - po.Register("right-context", &right_context, "Number of frames of right " - "context the neural net requires."); - po.Register("num-frames", &num_frames, "Number of frames with labels " - "that each example contains. Will be rounded up to a multiple " - "of --frame-subsampling-factor."); - po.Register("num-frames-overlap", &num_frames_overlap, "Number of frames of " - "overlap between each example (could be useful in conjunction " - "--min-deriv-time and --max-deriv-time, to avoid wasting data). " - "Each time we shift by --num-frames minus --num-frames-overlap."); - po.Register("ivectors", &ivector_rspecifier, "Rspecifier of ivector " + po.Register("ivectors", &online_ivector_rspecifier, "Alias for --online-ivectors " + "option, for back compatibility"); + po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier of ivector " "features, as a matrix."); + po.Register("online-ivector-period", &online_ivector_period, "Number of frames " + "between iVectors in matrices supplied to the --online-ivectors " + "option"); po.Register("length-tolerance", &length_tolerance, "Tolerance for " "difference in num-frames between feat and ivector matrices"); - po.Register("frame-subsampling-factor", &frame_subsampling_factor, "Used " - "if the frame-rate at the output will be less than the " - "frame-rate of the input"); - + eg_config.Register(&po); + ParseOptions splitter_opts("supervision-splitter", &po); splitter_config.Register(&splitter_opts); @@ -267,13 +210,8 @@ int main(int argc, char *argv[]) { exit(1); } - if (left_context < 0 || right_context < 0 || - length_tolerance < 0 || frame_subsampling_factor <= 0) - KALDI_ERR << "One of the integer options is out of the allowed range."; - - if (frame_subsampling_factor != 1) - RoundUpNumFrames(frame_subsampling_factor, - &num_frames, &num_frames_overlap); + eg_config.ComputeDerived(); + UtteranceSplitter utt_splitter(eg_config); std::string model_wxfilename, feature_rspecifier, supervision_rspecifier, @@ -285,7 +223,7 @@ int main(int argc, char *argv[]) { examples_wspecifier = po.GetArg(4); TransitionModel tmodel; - { + { bool binary; Input ki(model_wxfilename, &binary); tmodel.Read(ki.Stream(), binary); @@ -295,7 +233,8 @@ int main(int argc, char *argv[]) { discriminative::RandomAccessDiscriminativeSupervisionReader supervision_reader( supervision_rspecifier); NnetDiscriminativeExampleWriter example_writer(examples_wspecifier); - RandomAccessBaseFloatMatrixReader ivector_reader(ivector_rspecifier); + RandomAccessBaseFloatMatrixReader online_ivector_reader( + online_ivector_rspecifier); int32 num_done = 0, num_err = 0; int64 num_frames_written = 0, num_egs_written = 0; @@ -308,39 +247,35 @@ int main(int argc, char *argv[]) { num_err++; } else { const discriminative::DiscriminativeSupervision &supervision = supervision_reader.Value(key); - const Matrix *ivector_feats = NULL; - if (!ivector_rspecifier.empty()) { - if (!ivector_reader.HasKey(key)) { + const Matrix *online_ivector_feats = NULL; + if (!online_ivector_rspecifier.empty()) { + if (!online_ivector_reader.HasKey(key)) { KALDI_WARN << "No iVectors for utterance " << key; num_err++; continue; } else { // this address will be valid until we call HasKey() or Value() // again. - ivector_feats = &(ivector_reader.Value(key)); + online_ivector_feats = &(online_ivector_reader.Value(key)); } } - if (ivector_feats != NULL && - (std::abs(feats.NumRows() - ivector_feats->NumRows()) > length_tolerance - || ivector_feats->NumRows() == 0)) { + if (online_ivector_feats != NULL && + (abs(feats.NumRows() - (online_ivector_feats->NumRows() * + online_ivector_period)) > length_tolerance + || online_ivector_feats->NumRows() == 0)) { KALDI_WARN << "Length difference between feats " << feats.NumRows() - << " and iVectors " << ivector_feats->NumRows() + << " and iVectors " << online_ivector_feats->NumRows() << "exceeds tolerance " << length_tolerance; num_err++; continue; } + if (ProcessFile(splitter_config, tmodel, - feats, ivector_feats, supervision, - key, compress, left_context, right_context, num_frames, - num_frames_overlap, frame_subsampling_factor, + feats, online_ivector_feats, online_ivector_period, + supervision, key, compress, utt_splitter, &num_frames_written, &num_egs_written, - &example_writer)) - num_done++; - else { - KALDI_WARN << "Failed to process utterance into nnet example " - << "for key " << key; - num_err++; - } + &example_writer)) num_done++; + else num_err++; } } @@ -355,4 +290,3 @@ int main(int argc, char *argv[]) { return -1; } } - diff --git a/src/nnet3bin/nnet3-get-egs.cc b/src/nnet3bin/nnet3-get-egs.cc index 897ffad7b48..6b9dacfa03d 100644 --- a/src/nnet3bin/nnet3-get-egs.cc +++ b/src/nnet3bin/nnet3-get-egs.cc @@ -19,12 +19,12 @@ // limitations under the License. #include - #include "base/kaldi-common.h" #include "util/common-utils.h" #include "hmm/transition-model.h" #include "hmm/posterior.h" #include "nnet3/nnet-example.h" +#include "nnet3/nnet-example-utils.h" namespace kaldi { namespace nnet3 { @@ -32,85 +32,118 @@ namespace nnet3 { static void ProcessFile(const MatrixBase &feats, const MatrixBase *ivector_feats, + int32 ivector_period, const Posterior &pdf_post, const std::string &utt_id, bool compress, int32 num_pdfs, - int32 left_context, - int32 right_context, - int32 frames_per_eg, + const UtteranceSplitter &utt_splitter, int64 *num_frames_written, int64 *num_egs_written, NnetExampleWriter *example_writer) { - KALDI_ASSERT(feats.NumRows() == static_cast(pdf_post.size())); + int32 num_input_frames = feats.NumRows(); + if (!utt_splitter.LengthsMatch(utt_id, num_input_frames, + static_cast(pdf_post.size()))) + return; // LengthsMatch() will have printed a warning. + + std::vector chunks; + + if (chunks.empty()) { + KALDI_WARN << "Not producing egs for utterance " << utt_id + << " because it is too short: " + << num_input_frames << " frames."; + } - for (int32 t = 0; t < feats.NumRows(); t += frames_per_eg) { + // 'frame_subsampling_factor' is not used in any recipes at the time of + // writing, this is being supported to unify the code with the 'chain' recipes + // and in case we need it for some reason in future. + int32 frame_subsampling_factor = + utt_splitter.Config().frame_subsampling_factor; - // actual_frames_per_eg is the number of frames with nonzero - // posteriors. At the end of the file we pad with zero posteriors - // so that all examples have the same structure (prevents the need - // for recompilations). - int32 actual_frames_per_eg = std::min(frames_per_eg, - feats.NumRows() - t); + utt_splitter.GetChunksForUtterance(num_input_frames, &chunks); + for (size_t c = 0; c < chunks.size(); c++) { + const ChunkTimeInfo &chunk = chunks[c]; - int32 tot_frames = left_context + frames_per_eg + right_context; + int32 tot_input_frames = chunk.left_context + chunk.num_frames + + chunk.right_context; - Matrix input_frames(tot_frames, feats.NumCols(), kUndefined); + Matrix input_frames(tot_input_frames, feats.NumCols(), + kUndefined); - // Set up "input_frames". - for (int32 j = -left_context; j < frames_per_eg + right_context; j++) { - int32 t2 = j + t; + int32 start_frame = chunk.first_frame - chunk.left_context; + for (int32 t = start_frame; t < start_frame + tot_input_frames; t++) { + int32 t2 = t; if (t2 < 0) t2 = 0; - if (t2 >= feats.NumRows()) t2 = feats.NumRows() - 1; + if (t2 >= num_input_frames) t2 = num_input_frames - 1; + int32 j = t - start_frame; SubVector src(feats, t2), - dest(input_frames, j + left_context); + dest(input_frames, j); dest.CopyFromVec(src); } NnetExample eg; // call the regular input "input". - eg.io.push_back(NnetIo("input", - left_context, - input_frames)); + eg.io.push_back(NnetIo("input", -chunk.left_context, input_frames)); - // if applicable, add the iVector feature. if (ivector_feats != NULL) { - // try to get closest frame to middle of window to get - // a representative iVector. - int32 closest_frame = t + (actual_frames_per_eg / 2); - KALDI_ASSERT(ivector_feats->NumRows() > 0); - if (closest_frame >= ivector_feats->NumRows()) - closest_frame = ivector_feats->NumRows() - 1; + // if applicable, add the iVector feature. + // choose iVector from a random frame in the chunk + int32 ivector_frame = RandInt(start_frame, + start_frame + num_input_frames - 1), + ivector_frame_subsampled = ivector_frame / ivector_period; + if (ivector_frame_subsampled < 0) + ivector_frame_subsampled = 0; + if (ivector_frame_subsampled >= ivector_feats->NumRows()) + ivector_frame_subsampled = ivector_feats->NumRows() - 1; Matrix ivector(1, ivector_feats->NumCols()); - ivector.Row(0).CopyFromVec(ivector_feats->Row(closest_frame)); + ivector.Row(0).CopyFromVec(ivector_feats->Row(ivector_frame_subsampled)); eg.io.push_back(NnetIo("ivector", 0, ivector)); } - // add the labels. - Posterior labels(frames_per_eg); - for (int32 i = 0; i < actual_frames_per_eg; i++) - labels[i] = pdf_post[t + i]; - // remaining posteriors for frames are empty. - eg.io.push_back(NnetIo("output", num_pdfs, 0, labels)); + // Note: chunk.first_frame and chunk.num_frames will both be + // multiples of frame_subsampling_factor. + int32 start_frame_subsampled = chunk.first_frame / frame_subsampling_factor, + num_frames_subsampled = chunk.num_frames / frame_subsampling_factor; + + KALDI_ASSERT(start_frame_subsampled + num_frames_subsampled - 1 < + static_cast(pdf_post.size())); + + // Note: in all current cases there is no subsampling of output-frames going + // on (--frame-subsampling-factor=1), so you could read + // 'num_frames_subsampled' as just 'num_frames'. + Posterior labels(num_frames_subsampled); + + // TODO: it may be that using these weights is not actually helpful (with + // chain training, it was not), and that setting them all to 1 is better. + // We could add a boolean option to this program to control that; but I + // don't want to add such an option if experiments show that it is not + // helpful. + for (int32 i = 0; i < num_frames_subsampled; i++) { + int32 t = i + start_frame_subsampled; + labels[i] = pdf_post[t]; + for (std::vector >::iterator + iter = labels[i].begin(); iter != labels[i].end(); ++iter) + iter->second *= chunk.output_weights[i]; + } if (compress) eg.Compress(); std::ostringstream os; - os << utt_id << "-" << t; + os << utt_id << "-" << chunk.first_frame; std::string key = os.str(); // key is - - *num_frames_written += actual_frames_per_eg; + *num_frames_written += chunk.num_frames; *num_egs_written += 1; example_writer->Write(key, eg); } } - -} // namespace nnet2 +} // namespace nnet3 } // namespace kaldi int main(int argc, char *argv[]) { @@ -140,28 +173,30 @@ int main(int argc, char *argv[]) { bool compress = true; - int32 num_pdfs = -1, left_context = 0, right_context = 0, - num_frames = 1, length_tolerance = 100; + int32 num_pdfs = -1, length_tolerance = 100, + online_ivector_period = 1; - std::string ivector_rspecifier; + ExampleGenerationConfig eg_config; // controls num-frames, + // left/right-context, etc. + + std::string online_ivector_rspecifier; ParseOptions po(usage); + po.Register("compress", &compress, "If true, write egs in " - "compressed format."); + "compressed format (recommended)."); po.Register("num-pdfs", &num_pdfs, "Number of pdfs in the acoustic " "model"); - po.Register("left-context", &left_context, "Number of frames of left " - "context of input features that are added to each " - "example"); - po.Register("right-context", &right_context, "Number of frames of right " - "context of input features that are added to each " - "example"); - po.Register("num-frames", &num_frames, "Number of frames with labels " - "that each example contains."); - po.Register("ivectors", &ivector_rspecifier, "Rspecifier of ivector " - "features, as a matrix."); + po.Register("ivectors", &online_ivector_rspecifier, "Alias for " + "--online-ivectors option, for back compatibility"); + po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier of " + "ivector features, as a matrix."); + po.Register("online-ivector-period", &online_ivector_period, "Number of " + "frames between iVectors in matrices supplied to the " + "--online-ivectors option"); po.Register("length-tolerance", &length_tolerance, "Tolerance for " "difference in num-frames between feat and ivector matrices"); + eg_config.Register(&po); po.Read(argc, argv); @@ -173,6 +208,8 @@ int main(int argc, char *argv[]) { if (num_pdfs <= 0) KALDI_ERR << "--num-pdfs options is required."; + eg_config.ComputeDerived(); + UtteranceSplitter utt_splitter(eg_config); std::string feature_rspecifier = po.GetArg(1), pdf_post_rspecifier = po.GetArg(2), @@ -182,7 +219,8 @@ int main(int argc, char *argv[]) { SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier); RandomAccessPosteriorReader pdf_post_reader(pdf_post_rspecifier); NnetExampleWriter example_writer(examples_wspecifier); - RandomAccessBaseFloatMatrixReader ivector_reader(ivector_rspecifier); + RandomAccessBaseFloatMatrixReader online_ivector_reader( + online_ivector_rspecifier); int32 num_done = 0, num_err = 0; int64 num_frames_written = 0, num_egs_written = 0; @@ -201,31 +239,32 @@ int main(int argc, char *argv[]) { num_err++; continue; } - const Matrix *ivector_feats = NULL; - if (!ivector_rspecifier.empty()) { - if (!ivector_reader.HasKey(key)) { + const Matrix *online_ivector_feats = NULL; + if (!online_ivector_rspecifier.empty()) { + if (!online_ivector_reader.HasKey(key)) { KALDI_WARN << "No iVectors for utterance " << key; num_err++; continue; } else { // this address will be valid until we call HasKey() or Value() // again. - ivector_feats = &(ivector_reader.Value(key)); + online_ivector_feats = &(online_ivector_reader.Value(key)); } } - if (ivector_feats != NULL && - (abs(feats.NumRows() - ivector_feats->NumRows()) > length_tolerance - || ivector_feats->NumRows() == 0)) { + if (online_ivector_feats != NULL && + (abs(feats.NumRows() - (online_ivector_feats->NumRows() * + online_ivector_period)) > length_tolerance + || online_ivector_feats->NumRows() == 0)) { KALDI_WARN << "Length difference between feats " << feats.NumRows() - << " and iVectors " << ivector_feats->NumRows() + << " and iVectors " << online_ivector_feats->NumRows() << "exceeds tolerance " << length_tolerance; num_err++; continue; } - ProcessFile(feats, ivector_feats, pdf_post, key, compress, - num_pdfs, left_context, right_context, num_frames, + ProcessFile(feats, online_ivector_feats, online_ivector_period, + pdf_post, key, compress, num_pdfs, utt_splitter, &num_frames_written, &num_egs_written, &example_writer); num_done++; From 1a298b62a62892719ac785b1205faa4ec08beadd Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 25 Dec 2016 16:57:44 -0500 Subject: [PATCH 158/530] Cosmetic fix in table-reading code --- src/util/kaldi-table-inl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/util/kaldi-table-inl.h b/src/util/kaldi-table-inl.h index e63db99f0e1..5359d730b85 100644 --- a/src/util/kaldi-table-inl.h +++ b/src/util/kaldi-table-inl.h @@ -1695,8 +1695,8 @@ class RandomAccessTableReaderScriptImpl: virtual const T& Value(const std::string &key) { if (!HasKeyInternal(key, true)) // true == preload. KALDI_ERR << "Could not get item for key " << key - << ", rspecifier is " << rspecifier_ << "[to ignore this, " - << "add the p, (permissive) option to the rspecifier."; + << ", rspecifier is " << rspecifier_ << " [to ignore this, " + << "add the p, (permissive) option to the rspecifier."; KALDI_ASSERT(key_ == key); if (state_ == kHaveObject) { return holder_.Value(); From 6850798ab730f0c36340ac3553c2f1010e6885d1 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 25 Dec 2016 19:50:09 -0500 Subject: [PATCH 159/530] Minor fixes and refactoring in xconfig code, for easier debugging; fix some bugs in error handling in xconfigs. --- .../steps/libs/nnet3/xconfig/basic_layers.py | 107 ++++++++------ egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py | 43 +++--- egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py | 62 ++++----- egs/wsj/s5/steps/libs/nnet3/xconfig/tdnn.py | 7 +- egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py | 131 ++++++++---------- egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 2 +- 6 files changed, 166 insertions(+), 186 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py index 24eea922968..eeacc1fff87 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py @@ -7,10 +7,10 @@ """ from __future__ import print_function -import sys import math +import re +import sys import libs.nnet3.xconfig.utils as xutils -from libs.nnet3.xconfig.utils import XconfigParserError as xparser_error class XconfigLayerBase(object): @@ -33,14 +33,14 @@ def __init__(self, first_token, key_to_value, all_layers): self.layer_type = first_token if not 'name' in key_to_value: - raise xparser_error("Expected 'name' to be specified.", self.str()) + raise RuntimeError("Expected 'name' to be specified.") self.name = key_to_value['name'] if not xutils.is_valid_line_name(self.name): - raise xparser_error("Invalid value: name={0}".format( - key_to_value['name']), self.str()) + raise RuntimeError("Invalid value: name={0}".format( + key_to_value['name'])) for prev_layer in all_layers: if self.name == prev_layer.name: - raise xparser_error("Name '{0}' is used for more than one " + raise RuntimeError("Name '{0}' is used for more than one " "layer.".format(self.name)) # the following, which should be overridden in the child class, sets @@ -66,13 +66,23 @@ def set_configs(self, key_to_value, all_layers): in a more specific way. """ + # First check that there are no keys that don't correspond to any config + # parameter of this layer, and if so, raise an exception with an + # informative message saying what configs are allowed. for key,value in key_to_value.items(): if key != 'name': if not key in self.config: - raise xparser_error("Configuration value {0}={1} was not" - " expected in layer of type {2}" - "".format(key, value, self.layer_type), - self.str()) + configs = ' '.join([ ('{0}->"{1}"'.format(x,y) if isinstance(y, str) + else '{0}->{1}'.format(x,y)) + for x,y in self.config.items() ]) + raise RuntimeError("Configuration value {0}={1} was not " + "expected in layer of type {2}; allowed " + "configs with their defaults: {3}" + .format(key, value, self.layer_type, configs)) + + for key,value in key_to_value.items(): + if key != 'name': + assert key in self.config # we checked above. self.config[key] = xutils.convert_value_to_type(key, type(self.config[key]), value) @@ -82,10 +92,10 @@ def set_configs(self, key_to_value, all_layers): # in self.descriptors[key] for key in self.get_input_descriptor_names(): if not key in self.config: - raise xparser_error("{0}: object of type {1} needs to override" + raise RuntimeError("{0}: object of type {1} needs to override" " get_input_descriptor_names()." - "".format(sys.argv[0], str(type(self))), - self.str()) + "".format(sys.argv[0], str(type(self)))) + descriptor_string = self.config[key] # input string. assert isinstance(descriptor_string, str) desc = self.convert_to_descriptor(descriptor_string, all_layers) @@ -112,9 +122,8 @@ def set_configs(self, key_to_value, all_layers): desc_norm_str2 = desc2.str() # if the following ever fails we'll have to do some debugging. if desc_norm_str != desc_norm_str2: - raise xparser_error("Likely code error: '{0}' != '{1}'" - "".format(desc_norm_str, desc_norm_str2), - self.str()) + raise RuntimeError("Likely code error: '{0}' != '{1}'" + "".format(desc_norm_str, desc_norm_str2)) def str(self): """Converts 'this' to a string which could be printed to @@ -123,13 +132,24 @@ def str(self): (so users can see any defaults). """ - ans = '{0} name={1}'.format(self.layer_type, self.name) - ans += ' ' + ' '.join([ '{0}={1}'.format(key, self.config[key]) - for key in sorted(self.config.keys())]) + list_of_entries = [ '{0} name={1}'.format(self.layer_type, self.name) ] + for key, value in sorted(self.config.items()): + if isinstance(value, str) and re.search('=', value): + # the value is a string that contains an '=' sign, so we need to + # enclose it in double-quotes, otherwise we woudldn't be able to + # parse from that output. + if re.search('"', value): + print("Warning: config '{0}={1}' contains both double-quotes " + "and equals sign; it will not be possible to parse it " + "from the file.".format(key, value), file=sys.stderr) + list_of_entries.append('{0}="{1}"'.format(key, value)) + else: + list_of_entries.append('{0}={1}'.format(key, value)) + + return ' '.join(list_of_entries) return ans def __str__(self): - return self.str() @@ -161,8 +181,8 @@ def convert_to_descriptor(self, descriptor_string, all_layers): # note: 'pos' should point to the 'end of string' marker # that terminates 'tokens'. if pos != len(tokens) - 1: - raise xparser_error("Parsing Descriptor, saw junk at end: " + - ' '.join(tokens[pos:-1]), self.str()) + raise RuntimeError("Parsing Descriptor, saw junk at end: " + + ' '.join(tokens[pos:-1])) return descriptor def get_dim_for_descriptor(self, descriptor, all_layers): @@ -299,9 +319,8 @@ def set_default_configs(self): def check_configs(self): if self.config['dim'] <= 0: - raise xparser_error("Dimension of input-layer '{0}'" - "should be positive.".format(self.name), - self.str()) + raise RuntimeError("Dimension of input-layer '{0}'" + "should be positive.".format(self.name)) def get_input_descriptor_names(self): @@ -453,21 +472,19 @@ def set_default_configs(self): def check_configs(self): if self.config['dim'] <= -1: - raise xparser_error("In output-layer, dim has invalid value {0}" - "".format(self.config['dim']), self.str()) + raise RuntimeError("In output-layer, dim has invalid value {0}" + "".format(self.config['dim'])) if self.config['objective-type'] != 'linear' and \ self.config['objective_type'] != 'quadratic': - raise xparser_error("In output-layer, objective-type has" + raise RuntimeError("In output-layer, objective-type has" " invalid value {0}" - "".format(self.config['objective-type']), - self.str()) + "".format(self.config['objective-type'])) if self.config['learning-rate-factor'] <= 0.0: - raise xparser_error("In output-layer, learning-rate-factor has" + raise RuntimeError("In output-layer, learning-rate-factor has" " invalid value {0}" - "".format(self.config['learning-rate-factor']), - self.str()) + "".format(self.config['learning-rate-factor'])) # you cannot access the output of this layer from other layers... see @@ -484,14 +501,14 @@ def output_name(self, auxiliary_outputs = None): # layer and/or the output of the affine layer available as inputs to # other layers, in some circumstances. # we'll implement that when it's needed. - raise xparser_error("Outputs of output-layer may not be used by other" - " layers", self.str()) + raise RuntimeError("Outputs of output-layer may not be used by other" + " layers") def output_dim(self, auxiliary_output = None): # see comment in output_name(). - raise xparser_error("Outputs of output-layer may not be used by other" - " layers", self.str()) + raise RuntimeError("Outputs of output-layer may not be used by other" + " layers") def get_full_config(self): @@ -614,11 +631,13 @@ def set_default_configs(self): def check_configs(self): if self.config['dim'] < 0: - raise xparser_error("dim has invalid value {0}".format(self.config['dim']), self.str()) + raise RuntimeError("dim has invalid value {0}".format(self.config['dim'])) if self.config['self-repair-scale'] < 0.0 or self.config['self-repair-scale'] > 1.0: - raise xparser_error("self-repair-scale has invalid value {0}".format(self.config['self-repair-scale']), self.str()) + raise RuntimeError("self-repair-scale has invalid value {0}" + .format(self.config['self-repair-scale'])) if self.config['target-rms'] < 0.0: - raise xparser_error("target-rms has invalid value {0}".format(self.config['target-rms']), self.str()) + raise RuntimeError("target-rms has invalid value {0}" + .format(self.config['target-rms'])) def output_name(self, auxiliary_output=None): # at a later stage we might want to expose even the pre-nonlinearity @@ -721,8 +740,8 @@ def _add_components(self, input_desc, input_dim, nonlinearities): target_rms)) else: - raise xparser_error("Unknown nonlinearity type:" - "{0}".format(nonlinearity), self.str()) + raise RuntimeError("Unknown nonlinearity type: {0}" + .format(nonlinearity)) configs.append(line) line = ('component-node name={0}.{1}' @@ -763,7 +782,7 @@ def set_default_configs(self): def check_configs(self): if self.config['affine-transform-file'] is None: - raise xparser_error("affine-transform-file must be set.", self.str()) + raise RuntimeError("affine-transform-file must be set.") def output_name(self, auxiliary_output = None): # Fixed affine layer computes only one vector, there are no intermediate @@ -854,7 +873,7 @@ def set_derived_configs(self): def check_configs(self): if self.config['dim'] <= 0: - raise xparser_error("dim specified is invalid".format(self.name, self.layer_type), self.str()) + raise RuntimeError("dim specified is invalid") def output_name(self, auxiliary_output = None): # affine layer computes only one vector, there are no intermediate diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py index 1ac860ffa9c..cbd31ccea64 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py @@ -9,7 +9,6 @@ import re from libs.nnet3.xconfig.basic_layers import XconfigLayerBase -from libs.nnet3.xconfig.utils import XconfigParserError as xparser_error # This class is for lines like @@ -55,11 +54,11 @@ def set_derived_configs(self): def check_configs(self): key = 'cell-dim' if self.config['cell-dim'] <= 0: - raise xparser_error("cell-dim has invalid value {0}.".format(self.config[key]), self.str()) + raise RuntimeError("cell-dim has invalid value {0}.".format(self.config[key])) for key in ['self-repair-scale-nonlinearity']: if self.config[key] < 0.0 or self.config[key] > 1.0: - raise xparser_error("{0} has invalid value {1}.".format(key, self.config[key])) + raise RuntimeError("{0} has invalid value {1}.".format(key, self.config[key])) def auxiliary_outputs(self): return ['c_t'] @@ -70,7 +69,7 @@ def output_name(self, auxiliary_output = None): if auxiliary_output in self.auxiliary_outputs(): node_name = auxiliary_output else: - raise xparser_error("Unknown auxiliary output name {0}".format(auxiliary_output), self.str()) + raise RuntimeError("Unknown auxiliary output name {0}".format(auxiliary_output)) return '{0}.{1}'.format(self.name, node_name) @@ -81,7 +80,7 @@ def output_dim(self, auxiliary_output = None): return self.config['cell-dim'] # add code for other auxiliary_outputs here when we decide to expose them else: - raise xparser_error("Unknown auxiliary output name {0}".format(auxiliary_output), self.str()) + raise RuntimeError("Unknown auxiliary output name {0}".format(auxiliary_output)) return self.config['cell-dim'] @@ -233,7 +232,6 @@ def generate_lstm_config(self): # ng-affine-options='' [Additional options used for the full matrices in the LSTM, can be used to do things like set biases to initialize to 1] class XconfigLstmpLayer(XconfigLayerBase): def __init__(self, first_token, key_to_value, prev_names = None): - print first_token assert first_token == "lstmp-layer" XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) @@ -267,19 +265,20 @@ def check_configs(self): for key in ['cell-dim', 'recurrent-projection-dim', 'non-recurrent-projection-dim']: if self.config[key] <= 0: - raise xparser_error("{0} has invalid value {1}.".format( - key, self.config[key]), self.str()) + raise RuntimeError("{0} has invalid value {1}.".format( + key, self.config[key])) if (self.config['recurrent-projection-dim'] + self.config['non-recurrent-projection-dim'] > self.config['cell-dim']): - raise xparser_error("recurrent+non-recurrent projection dim exceeds " - "cell dim: {0}".format(self.str())) + raise RuntimeError("recurrent+non-recurrent projection dim exceeds " + "cell dim.") for key in ['self-repair-scale-nonlinearity']: if self.config[key] < 0.0 or self.config[key] > 1.0: - raise xparser_error("{0} has invalid value {2}.".format(self.layer_type, - key, - self.config[key])) + raise RuntimeError("{0} has invalid value {2}." + .format(self.layer_type, key, + self.config[key])) + def auxiliary_outputs(self): return ['c_t'] @@ -602,7 +601,7 @@ def set_derived_configs(self): def check_configs(self): key = 'cell-dim' if self.config['cell-dim'] <= 0: - raise xparser_error("cell-dim has invalid value {0}.".format(self.config[key]), self.str()) + raise RuntimeError("cell-dim has invalid value {0}.".format(self.config[key])) @@ -616,7 +615,7 @@ def output_name(self, auxiliary_output = None): node_name = 'c' self.c_needed = True else: - raise xparser_error("Unknown auxiliary output name {0}".format(auxiliary_output), self.str()) + raise RuntimeError("Unknown auxiliary output name {0}".format(auxiliary_output)) return '{0}.{1}'.format(self.name, node_name) def output_dim(self, auxiliary_output = None): @@ -626,7 +625,7 @@ def output_dim(self, auxiliary_output = None): return self.config['cell-dim'] # add code for other auxiliary_outputs here when we decide to expose them else: - raise xparser_error("Unknown auxiliary output name {0}".format(auxiliary_output), self.str()) + raise RuntimeError("Unknown auxiliary output name {0}".format(auxiliary_output)) return self.config['cell-dim'] def get_full_config(self): @@ -762,14 +761,14 @@ def check_configs(self): for key in ['cell-dim', 'recurrent-projection-dim', 'non-recurrent-projection-dim']: if self.config[key] <= 0: - raise xparser_error("{0} has invalid value {1}.".format( - key, self.config[key]), self.str()) + raise RuntimeError("{0} has invalid value {1}.".format( + key, self.config[key])) if (self.config['recurrent-projection-dim'] + self.config['non-recurrent-projection-dim'] > self.config['cell-dim']): - raise xparser_error("recurrent+non-recurrent projection dim exceeds " - "cell dim: {0}".format(self.str())) + raise RuntimeError("recurrent+non-recurrent projection dim exceeds " + "cell dim") def auxiliary_outputs(self): @@ -781,7 +780,7 @@ def output_name(self, auxiliary_output = None): if auxiliary_output in self.auxiliary_outputs(): node_name = auxiliary_output else: - raise xparser_error("Unknown auxiliary output name {0}".format(auxiliary_output), self.str()) + raise RuntimeError("Unknown auxiliary output name {0}".format(auxiliary_output)) return '{0}.{1}'.format(self.name, node_name) @@ -792,7 +791,7 @@ def output_dim(self, auxiliary_output = None): return self.config['cell-dim'] # add code for other auxiliary_outputs here when we decide to expose them else: - raise xparser_error("Unknown auxiliary output name {0}".format(auxiliary_output), self.str()) + raise RuntimeError("Unknown auxiliary output name {0}".format(auxiliary_output)) return self.config['recurrent-projection-dim'] + \ self.config['non-recurrent-projection-dim'] diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py index 7ccab2f6c6f..cc786d091ac 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py @@ -5,9 +5,12 @@ """ This module contains the top level xconfig parsing functions. """ +from __future__ import print_function + +import sys import libs.nnet3.xconfig.layers as xlayers import libs.nnet3.xconfig.utils as xutils -from libs.nnet3.xconfig.utils import XconfigParserError as xparser_error + # We have to modify this dictionary when adding new layers @@ -32,35 +35,24 @@ 'fast-lstmp-layer' : xlayers.XconfigFastLstmpLayer } -# Converts a line as parsed by ParseConfigLine() into a first -# token e.g. 'input-layer' and a key->value map, into -# an objet inherited from XconfigLayerBase. -# 'prev_names' is a list of previous layer names, it's needed -# to parse things like '[-1]' (meaning: the previous layer) -# when they appear in Desriptors. -def parsed_line_to_xconfig_layer(first_token, key_to_value, prev_names): - - conf_line = first_token + ' ' + ' '.join(['{0}={1}'.format(x,y) for x,y in key_to_value.items()]) - - if not config_to_layer.has_key(first_token): - raise xparser_error("No such layer type.", conf_line) - - try: - return config_to_layer[first_token](first_token, key_to_value, prev_names) - except xparser_error as e: - if e.conf_line is None: - # we want to throw informative errors which point to the xconfig line - e.conf_line = conf_line - raise - -# Uses ParseConfigLine() to turn a config line that has been parsed into -# a first token e.g. 'affine-layer' and a key->value map like { 'dim':'1024', 'name':'affine1' }, -# and then turns this into an object representing that line of the config file. -# 'prev_names' is a list of the names of preceding lines of the +# Turn a config line and a list of previous layers into +# either an object representing that line of the config file; or None +# if the line was empty after removing comments. +# 'prev_layers' is a list of objects corresponding to preceding layers of the # config file. -def config_line_to_object(config_line, prev_names = None): - (first_token, key_to_value) = xutils.parse_config_line(config_line) - return parsed_line_to_xconfig_layer(first_token, key_to_value, prev_names) +def xconfig_line_to_object(config_line, prev_layers = None): + try: + x = xutils.parse_config_line(config_line) + if x is None: + return None + (first_token, key_to_value) = x + if not config_to_layer.has_key(first_token): + raise RuntimeError("No such layer type '{0}'".format(first_token)) + return config_to_layer[first_token](first_token, key_to_value, prev_layers) + except Exception as e: + print("***Exception caught while parsing the following xconfig line:\n" + "*** {0}".format(config_line), file=sys.stderr) + raise e # This function reads an xconfig file and returns it as a list of layers # (usually we use the variable name 'all_layers' elsewhere for this). @@ -77,18 +69,14 @@ def read_xconfig_file(xconfig_filename): line = f.readline() if line == '': break - x = xutils.parse_config_line(line) - if x is None: - continue # line was blank or only comments. - (first_token, key_to_value) = x # the next call will raise an easy-to-understand exception if # it fails. - this_layer = parsed_line_to_xconfig_layer(first_token, - key_to_value, - all_layers) + this_layer = xconfig_line_to_object(line, all_layers) + if this_layer is None: + continue # line was blank after removing comments. all_layers.append(this_layer) if len(all_layers) == 0: - raise xparser_error("{0}: xconfig file '{1}' is empty".format( + raise RuntimeError("{0}: xconfig file '{1}' is empty".format( sys.argv[0], xconfig_filename)) f.close() return all_layers diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/tdnn.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/tdnn.py index 21f9db4f5c8..ed7b6f1f53c 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/tdnn.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/tdnn.py @@ -7,7 +7,6 @@ """ import libs.nnet3.xconfig.utils as xutils -from libs.nnet3.xconfig.utils import XconfigParserError as xparser_error from libs.nnet3.xconfig.basic_layers import XconfigBasicLayer from libs.nnet3.xconfig.basic_layers import XconfigLayerBase @@ -35,7 +34,7 @@ def set_default_configs(self): def check_configs(self): if self.config['splice-indexes'] == '': - raise xparser_error("splice-indexes has to be non-empty", self.str()) + raise RuntimeError("splice-indexes must be non-empty") super(XconfigTdnnLayer, self).check_configs() @@ -60,7 +59,7 @@ def get_splice_indexes(self): try: return map(lambda x: int(x), self.config['splice-indexes'].split(",")) except ValueError: - raise xparser_error("Invalid value for splice-indexes.", str(self)) + raise RuntimeError("Invalid value for splice-indexes.") @staticmethod def splice_input(input_desc, input_dim, @@ -106,5 +105,3 @@ def splice_input(input_desc, input_dim, return ["Append({0})".format(", ".join(appended_descriptors)), appended_dimension, configs] - - diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py index 87c9d880089..d88e0176ab5 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py @@ -10,17 +10,6 @@ import sys -class XconfigParserError(RuntimeError): - def __init__(self, error_msg, conf_line=None): - self.conf_line = conf_line - if conf_line is not None: - self.msg = 'While parsing "{c}" :{e}'.format(c=conf_line, e=error_msg) - else: - self.msg = error_msg - - def __str__(self): - return self.msg - # [utility function used in xconfig_layers.py] # Given a list of objects of type XconfigLayerBase ('all_layers'), # including at least the layers preceding 'current_layer' (and maybe @@ -36,8 +25,8 @@ def get_prev_names(all_layers, current_layer): prev_names_set = set() for name in prev_names: if name in prev_names_set: - raise XconfigParserError("{0}: Layer name {1} is used more than once.".format( - sys.argv[0], name), current_layer.str()) + raise RuntimeError("{0}: Layer name {1} is used more than once.".format( + sys.argv[0], name)) prev_names_set.add(name) return prev_names @@ -49,7 +38,7 @@ def split_layer_name(full_layer_name): assert isinstance(full_layer_name, str) split_name = full_layer_name.split('.') if len(split_name) == 0: - raise XconfigParserError("Bad layer name: " + full_layer_name) + raise RuntimeError("Bad layer name: " + full_layer_name) layer_name = split_name[0] if len(split_name) == 1: auxiliary_output = None @@ -73,15 +62,15 @@ def get_dim_from_layer_name(all_layers, current_layer, full_layer_name): break if layer.get_name() == layer_name: if not auxiliary_output in layer.auxiliary_outputs() and auxiliary_output is not None: - raise XconfigParserError("Layer '{0}' has no such auxiliary output: '{1}' ({0}.{1})".format(layer_name, auxiliary_output), layer.str()) + raise RuntimeError("Layer '{0}' has no such auxiliary output: '{1}' ({0}.{1})".format(layer_name, auxiliary_output)) return layer.output_dim(auxiliary_output) # No such layer was found. if layer_name in [ layer.get_name() for layer in all_layers ]: - raise XconfigParserError("Layer '{0}' was requested before it appeared in " + raise RuntimeError("Layer '{0}' was requested before it appeared in " "the xconfig file (circular dependencies or out-of-order " "layers".format(layer_name)) else: - raise XconfigParserError("No such layer: '{0}'".format(layer_name)) + raise RuntimeError("No such layer: '{0}'".format(layer_name)) # [utility function used in xconfig_layers.py] @@ -98,16 +87,16 @@ def get_string_from_layer_name(all_layers, current_layer, full_layer_name): break if layer.get_name() == layer_name: if not auxiliary_output in layer.auxiliary_outputs() and auxiliary_output is not None: - raise XconfigParserError("Layer '{0}' has no such auxiliary output: '{1}' ({0}.{1})".format( + raise RuntimeError("Layer '{0}' has no such auxiliary output: '{1}' ({0}.{1})".format( layer_name, auxiliary_output)) return layer.output_name(auxiliary_output) # No such layer was found. if layer_name in [ layer.get_name() for layer in all_layers ]: - raise XconfigParserError("Layer '{0}' was requested before it appeared in " + raise RuntimeError("Layer '{0}' was requested before it appeared in " "the xconfig file (circular dependencies or out-of-order " "layers".format(layer_name)) else: - raise XconfigParserError("No such layer: '{0}'".format(layer_name)) + raise RuntimeError("No such layer: '{0}'".format(layer_name)) # This function, used in converting string values in config lines to @@ -121,19 +110,19 @@ def convert_value_to_type(key, dest_type, string_value): elif string_value == "False" or string_value == "false": return False else: - raise XconfigParserError("Invalid configuration value {0}={1} (expected bool)".format( + raise RuntimeError("Invalid configuration value {0}={1} (expected bool)".format( key, string_value)) elif dest_type == type(int()): try: return int(string_value) except: - raise XconfigParserError("Invalid configuration value {0}={1} (expected int)".format( + raise RuntimeError("Invalid configuration value {0}={1} (expected int)".format( key, string_value)) elif dest_type == type(float()): try: return float(string_value) except: - raise XconfigParserError("Invalid configuration value {0}={1} (expected int)".format( + raise RuntimeError("Invalid configuration value {0}={1} (expected int)".format( key, string_value)) elif dest_type == type(str()): return string_value @@ -183,14 +172,14 @@ def __init__(self, # note: 'pos' should point to the 'end of string' marker # that terminates 'tokens'. if pos != len(tokens) - 1: - raise XconfigParserError("Parsing Descriptor, saw junk at end: " + + raise RuntimeError("Parsing Descriptor, saw junk at end: " + ' '.join(tokens[pos:-1])) # copy members from d. self.operator = d.operator self.items = d.items - except XconfigParserError as e: + except RuntimeError as e: traceback.print_tb(sys.exc_info()[2]) - raise XconfigParserError("Error parsing Descriptor '{0}', specific error was: {1}".format( + raise RuntimeError("Error parsing Descriptor '{0}', specific error was: {1}".format( descriptor_string, repr(e))) # This is like the str() function, but it uses the layer_to_string function @@ -239,8 +228,8 @@ def dim(self, layer_to_dim): for desc in self.items[1:]: next_dim = desc.dim(layer_to_dim) if next_dim != dim: - raise XparserError("In descriptor {0}, different fields have different " - "dimensions: {1} != {2}".format(self.str(), dim, next_dim)) + raise RuntimeError("In descriptor {0}, different fields have different " + "dimensions: {1} != {2}".format(self.str(), dim, next_dim)) return dim elif self.operator in [ 'Offset', 'Round', 'ReplaceIndex' ]: # for these operators, only the 1st arg is relevant. @@ -248,7 +237,7 @@ def dim(self, layer_to_dim): elif self.operator == 'Append': return sum([ x.dim(layer_to_dim) for x in self.items]) else: - raise XconfigParserError("Unknown operator {0}".format(self.operator)) + raise RuntimeError("Unknown operator {0}".format(self.operator)) @@ -256,7 +245,7 @@ def dim(self, layer_to_dim): # exception if not. def expect_token(expected_item, seen_item, what_parsing): if seen_item != expected_item: - raise XconfigParserError("parsing {0}, expected '{1}' but got '{2}'".format( + raise RuntimeError("parsing {0}, expected '{1}' but got '{2}'".format( what_parsing, expected_item, seen_item)) # returns true if 'name' is valid as the name of a line (input, layer or output); @@ -298,18 +287,18 @@ def parse_new_descriptor(tokens, pos, prev_names): pos += 1 d.items.append(t_offset) except: - raise XconfigParserError("Parsing Offset(), expected integer, got " + tokens[pos]) + raise RuntimeError("Parsing Offset(), expected integer, got " + tokens[pos]) if tokens[pos] == ')': return (d, pos + 1) elif tokens[pos] != ',': - raise XconfigParserError("Parsing Offset(), expected ')' or ',', got " + tokens[pos]) + raise RuntimeError("Parsing Offset(), expected ')' or ',', got " + tokens[pos]) pos += 1 try: x_offset = int(tokens[pos]) pos += 1 d.items.append(x_offset) except: - raise XconfigParserError("Parsing Offset(), expected integer, got " + tokens[pos]) + raise RuntimeError("Parsing Offset(), expected integer, got " + tokens[pos]) expect_token(')', tokens[pos], 'Offset()') pos += 1 elif first_token in [ 'Append', 'Sum', 'Switch', 'Failover', 'IfDefined' ]: @@ -317,15 +306,15 @@ def parse_new_descriptor(tokens, pos, prev_names): if tokens[pos] == ')': # check num-items is correct for some special cases. if first_token == 'Failover' and len(d.items) != 2: - raise XconfigParserError("Parsing Failover(), expected 2 items but got {0}".format(len(d.items))) + raise RuntimeError("Parsing Failover(), expected 2 items but got {0}".format(len(d.items))) if first_token == 'IfDefined' and len(d.items) != 1: - raise XconfigParserError("Parsing IfDefined(), expected 1 item but got {0}".format(len(d.items))) + raise RuntimeError("Parsing IfDefined(), expected 1 item but got {0}".format(len(d.items))) pos += 1 break elif tokens[pos] == ',': pos += 1 # consume the comma. else: - raise XconfigParserError("Parsing Append(), expected ')' or ',', got " + tokens[pos]) + raise RuntimeError("Parsing Append(), expected ')' or ',', got " + tokens[pos]) (desc, pos) = parse_new_descriptor(tokens, pos, prev_names) d.items.append(desc) @@ -338,7 +327,7 @@ def parse_new_descriptor(tokens, pos, prev_names): pos += 1 d.items.append(t_modulus) except: - raise XconfigParserError("Parsing Offset(), expected integer, got " + tokens[pos]) + raise RuntimeError("Parsing Offset(), expected integer, got " + tokens[pos]) expect_token(')', tokens[pos], 'Round()') pos += 1 elif first_token == 'ReplaceIndex': @@ -348,7 +337,7 @@ def parse_new_descriptor(tokens, pos, prev_names): d.items.append(tokens[pos]) pos += 1 else: - raise XconfigParserError("Parsing ReplaceIndex(), expected 'x' or 't', got " + + raise RuntimeError("Parsing ReplaceIndex(), expected 'x' or 't', got " + tokens[pos]) expect_token(',', tokens[pos], 'ReplaceIndex()') pos += 1 @@ -357,13 +346,13 @@ def parse_new_descriptor(tokens, pos, prev_names): pos += 1 d.items.append(new_value) except: - raise XconfigParserError("Parsing Offset(), expected integer, got " + tokens[pos]) + raise RuntimeError("Parsing Offset(), expected integer, got " + tokens[pos]) expect_token(')', tokens[pos], 'ReplaceIndex()') pos += 1 else: - raise XconfigParserError("code error") + raise RuntimeError("code error") elif first_token in [ 'end of string', '(', ')', ',', '@' ]: - raise XconfigParserError("Expected descriptor, got " + first_token) + raise RuntimeError("Expected descriptor, got " + first_token) elif is_valid_line_name(first_token) or first_token == '[': # This section parses a raw input/layer/output name, e.g. "affine2" # (which must start with an alphabetic character or underscore), @@ -381,7 +370,7 @@ def parse_new_descriptor(tokens, pos, prev_names): offset_t = int(tokens[pos]) pos += 1 except: - raise XconfigParserError("Parse error parsing {0}@{1}".format( + raise RuntimeError("Parse error parsing {0}@{1}".format( first_token, tokens[pos])) if offset_t != 0: inner_d = d @@ -398,11 +387,11 @@ def parse_new_descriptor(tokens, pos, prev_names): try: offset_t = int(first_token) except: - raise XconfigParserError("Parsing descriptor, expected descriptor but got " + + raise RuntimeError("Parsing descriptor, expected descriptor but got " + first_token) assert isinstance(prev_names, list) if len(prev_names) < 1: - raise XconfigParserError("Parsing descriptor, could not interpret '{0}' because " + raise RuntimeError("Parsing descriptor, could not interpret '{0}' because " "there is no previous layer".format(first_token)) d.operator = None # the layer name is the name of the most recent layer. @@ -433,10 +422,10 @@ def replace_bracket_expressions_in_descriptor(descriptor_string, f = fields[i] i += 1 if f == ']': - raise XconfigParserError("Unmatched ']' in descriptor") + raise RuntimeError("Unmatched ']' in descriptor") elif f == '[': if i + 2 >= len(fields): - raise XconfigParserError("Error tokenizing string '{0}': '[' found too close " + raise RuntimeError("Error tokenizing string '{0}': '[' found too close " "to the end of the descriptor.".format(descriptor_string)) assert isinstance(prev_names, list) try: @@ -444,7 +433,7 @@ def replace_bracket_expressions_in_descriptor(descriptor_string, assert offset < 0 and -offset <= len(prev_names) i += 2 # consume the int and the ']'. except: - raise XconfigParserError("Error tokenizing string '{0}': expression [{1}] has an " + raise RuntimeError("Error tokenizing string '{0}': expression [{1}] has an " "invalid or out of range offset.".format(descriptor_string, fields[i])) this_field = prev_names[offset] out_fields.append(this_field) @@ -484,7 +473,8 @@ def tokenize_descriptor(descriptor_string, # (first_token, fields), as (string, dict) e.g. in this case # ('affine-layer', {'name':'affine1', 'input':'Append(-3, 0, 3)" # Note: spaces are allowed in the field names but = signs are -# disallowed, which is why it's possible to parse them. +# disallowed, except when quoted with double quotes, +# which is why it's possible to parse them. # This function also removes comments (anything after '#'). # As a special case, this function will return None if the line # is empty after removing spaces. @@ -493,8 +483,16 @@ def parse_config_line(orig_config_line): # note: splitting on '#' will always give at least one field... python # treats splitting on space as a special case that may give zero fields. config_line = orig_config_line.split('#')[0] - if re.match('[^a-zA-Z0-9\.\-\(\)_\s"]', config_line) is not None: - raise XconfigParserError("Xconfig line has unknown characters.", config_line) + # Note: this set of allowed characters may have to be expanded in future. + x = re.search('[^a-zA-Z0-9\.\-\(\)_=,/\s"]', config_line) + if x is not None: + bad_char = x.group(0) + if bad_char == "'": + raise RuntimeError("Xconfig line has disallowed character ' (use " + "double quotes for strings containing = signs)") + else: + raise RuntimeError("Xconfig line has disallowed character: {0}" + .format(bad_char)) # Now split on space; later we may splice things back together. fields=config_line.split() @@ -503,8 +501,8 @@ def parse_config_line(orig_config_line): first_token = fields[0] # if first_token does not look like 'foo-bar' or 'foo-bar2', then die. if re.match('^[a-z][-a-z0-9]+$', first_token) is None: - raise XconfigParserError("Error parsing config line (first field doesn't look right): {0}".format( - orig_config_line)) + raise RuntimeError("Error parsing config line (first field doesn't look right).") + # get rid of the first field which we put in 'first_token'. fields = fields[1:] @@ -512,7 +510,7 @@ def parse_config_line(orig_config_line): # rest of the line can be of the form 'a=1 b=" x=1 y=2 " c=Append( i1, i2)' positions = map(lambda x: x.start(), re.finditer('"', rest_of_line)) if not len(positions) % 2 == 0: - raise XconfigParserError('"s should occur in pairs', config_line) + raise RuntimeError("Double-quotes should occur in pairs") # add the " enclosed strings and corresponding keys to the dict # and remove them from the rest_of_line @@ -535,42 +533,21 @@ def parse_config_line(orig_config_line): ans_dict = dict() other_fields = re.split(r'\s*([-a-zA-Z0-9_]*)=', rest_of_line) if not (other_fields[0] == '' and len(other_fields) % 2 == 1): - raise XconfigParserError("Could not parse config line: " + orig_config_line) + raise RuntimeError("Could not parse config line."); fields += other_fields[1:] num_variables = len(fields) / 2 for i in range(num_variables): var_name = fields[i * 2] var_value = fields[i * 2 + 1] if re.match(r'[a-zA-Z_]', var_name) is None: - raise XconfigParserError("Expected variable name '{0}' to start with alphabetic character or _, " + raise RuntimeError("Expected variable name '{0}' to start with alphabetic character or _, " "in config line {1}".format(var_name, orig_config_line)) if var_name in ans_dict: - raise XconfigParserError("Config line has multiply defined variable {0}: {1}".format( + raise RuntimeError("Config line has multiply defined variable {0}: {1}".format( var_name, orig_config_line)) ans_dict[var_name] = var_value return (first_token, ans_dict) -# Reads a config file and returns a list of objects, where each object -# represents one line of the file. -def read_config_file(filename): - try: - f = open(filename, "r") - except XconfigParserError as e: - raise XconfigParserError("Error reading config file {0}: {1}".format( - filename, repr(e))) - ans = [] - prev_names = [] - while True: - line = f.readline() - if line == '': - break - x = parse_config_line(line) - if x is None: - continue # blank line - (first_token, key_to_value) = x - layer_object = config_line_to_object(first_token, key_to_value, prev_names) - ans.append(layer_object) - prev_names.append(layer_object.get_name()) def test_library(): tokenize_test = lambda x: tokenize_descriptor(x)[:-1] # remove 'end of string' diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index c55dae18b19..d3abb82c92c 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -105,7 +105,7 @@ def write_expanded_xconfig_files(config_dir, all_layers): '# It contains the same content as ./xconfig but it was parsed,\n' '# default config values were set, \n' '# and Descriptors (input=xxx) were normalized.\n' - '# See also ./xconfig.expanded.1\n\n', + '# See also ./xconfig.expanded.1\n', file=xconfig_file_out) for layer in all_layers: From 88e1e75faffac08ca4fb0c2101146281da2a3d57 Mon Sep 17 00:00:00 2001 From: Feiteng Li Date: Mon, 26 Dec 2016 15:29:25 +0800 Subject: [PATCH 160/530] Fix raw nnet3 training (#1288) --- egs/wsj/s5/steps/nnet3/get_egs_targets.sh | 7 ++++--- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 18 +++++++++++------- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 18 +++++++++++------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh index 7fbc24858b5..309c89cf99d 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh @@ -59,6 +59,7 @@ stage=0 nj=6 # This should be set to the maximum number of jobs you are # comfortable to run in parallel; you can increase it if your disk # speed is greater and you have more machines. +srand=0 online_ivector_dir= # can be used if we are including speaker information as iVectors. cmvn_opts= # can be used for specifying CMVN options, if feature type is not lda (if lda, # it doesn't make sense to use different options than were used as input to the @@ -349,7 +350,7 @@ if [ $stage -le 4 ]; then $get_egs_program \ $ivector_opt $egs_opts --num-frames=$frames_per_eg "$feats" "$targets" \ ark:- \| \ - nnet3-copy-egs --random=true --srand=JOB ark:- $egs_list || exit 1; + nnet3-copy-egs --random=true --srand=\$[JOB+$srand] ark:- $egs_list || exit 1; fi if [ $stage -le 5 ]; then @@ -365,7 +366,7 @@ if [ $stage -le 5 ]; then if [ $archives_multiple == 1 ]; then # normal case. $cmd --max-jobs-run $nj JOB=1:$num_archives_intermediate $dir/log/shuffle.JOB.log \ - nnet3-shuffle-egs --srand=JOB "ark:cat $egs_list|" ark:$dir/egs.JOB.ark || exit 1; + nnet3-shuffle-egs --srand=\$[JOB+$srand] "ark:cat $egs_list|" ark:$dir/egs.JOB.ark || exit 1; else # we need to shuffle the 'intermediate archives' and then split into the # final archives. we create soft links to manage this splitting, because @@ -381,7 +382,7 @@ if [ $stage -le 5 ]; then done done $cmd --max-jobs-run $nj JOB=1:$num_archives_intermediate $dir/log/shuffle.JOB.log \ - nnet3-shuffle-egs --srand=JOB "ark:cat $egs_list|" ark:- \| \ + nnet3-shuffle-egs --srand=\$[JOB+$srand] "ark:cat $egs_list|" ark:- \| \ nnet3-copy-egs ark:- $output_archives || exit 1; fi diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index b67ba8792a8..b8fe4a25384 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -200,20 +200,24 @@ def train(args, run_opts, background_process_handler): if args.use_dense_targets: target_type = "dense" + try: + num_targets = int(variables['num_targets']) + if (common_lib.get_feat_dim_from_scp(args.targets_scp) + != num_targets): + raise Exception("Mismatch between num-targets provided to " + "script vs configs") + except KeyError as e: + num_targets = -1 + else: + target_type = "sparse" try: num_targets = int(variables['num_targets']) except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined " "in {1}".format( str(e), '{0}/configs'.format(args.dir))) - if (common_lib.get_feat_dim_from_scp(args.targets_scp) - != num_targets): - raise Exception("Mismatch between num-targets provided to " - "script vs configs") - else: - target_type = "sparse" - train_lib.raw_model.generate_egs_from_targets( + train_lib.raw_model.generate_egs_using_targets( data=args.feat_dir, targets_scp=args.targets_scp, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index 29df61ab546..8366eccc993 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -279,20 +279,24 @@ def train(args, run_opts, background_process_handler): if args.use_dense_targets: target_type = "dense" + try: + num_targets = int(variables['num_targets']) + if (common_lib.get_feat_dim_from_scp(args.targets_scp) + != num_targets): + raise Exception("Mismatch between num-targets provided to " + "script vs configs") + except KeyError as e: + num_targets = -1 + else: + target_type = "sparse" try: num_targets = int(variables['num_targets']) except KeyError as e: raise Exception("KeyError {0}: Variables need to be defined " "in {1}".format( str(e), '{0}/configs'.format(args.dir))) - if (common_lib.get_feat_dim_from_scp(args.targets_scp) - != num_targets): - raise Exception("Mismatch between num-targets provided to " - "script vs configs") - else: - target_type = "sparse" - train_lib.raw_model.generate_egs_from_targets( + train_lib.raw_model.generate_egs_using_targets( data=args.feat_dir, targets_scp=args.targets_scp, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, From 92563eac2dd197557265d8f18e1bda41593eccae Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Mon, 26 Dec 2016 23:06:41 -0800 Subject: [PATCH 161/530] Some partial work towards getting the new egs-merging process working (currently only for regular nnet3 egs) --- src/nnet3/nnet-common.cc | 30 +++ src/nnet3/nnet-common.h | 7 +- src/nnet3/nnet-computation.cc | 10 +- src/nnet3/nnet-computation.h | 10 +- src/nnet3/nnet-example-utils.cc | 357 ++++++++++++++++++++++++++++++++ src/nnet3/nnet-example-utils.h | 193 +++++++++++++++++ src/nnet3/nnet-example.cc | 48 +++++ src/nnet3/nnet-example.h | 48 +++++ src/nnet3/nnet-optimize.cc | 51 ++--- src/nnet3/nnet-optimize.h | 11 +- src/nnet3bin/nnet3-merge-egs.cc | 8 +- src/util/stl-utils.h | 12 +- src/util/timer.h | 28 --- 13 files changed, 729 insertions(+), 84 deletions(-) delete mode 100644 src/util/timer.h diff --git a/src/nnet3/nnet-common.cc b/src/nnet3/nnet-common.cc index cc3e8d2c79c..04132bc42c4 100644 --- a/src/nnet3/nnet-common.cc +++ b/src/nnet3/nnet-common.cc @@ -364,6 +364,36 @@ size_t CindexVectorHasher::operator () ( return ans; } +size_t IndexVectorHasher::operator () ( + const std::vector &index_vector) const { + size_t n1 = 15, n2 = 10; // n1 and n2 are used to extract only a subset of + // elements to hash; this makes the hasher faster by + // skipping over more elements. Setting n1 large or + // n2 to 1 would make the hasher consider all + // elements. + // all long-ish numbers appearing below are randomly chosen primes. + size_t ans = 1433 + 34949 * index_vector.size(); + std::vector::const_iterator iter = index_vector.begin(), + end = index_vector.end(), med = end; + if (med > iter + n1) + med = iter + n1; + + for (; iter != med; ++iter) { + ans += iter->n * 1619; + ans += iter->t * 15649; + ans += iter->x * 89809; + } + // after the first n1 values, look only at every n2'th value. this makes the + // hashing much faster, and in the kinds of structures that we actually deal + // with, we shouldn't get unnecessary hash collisions as a result of this + // optimization. + for (; iter < end; iter += n2) { + ans += iter->n * 1619; + ans += iter->t * 15649; + ans += iter->x * 89809; + } + return ans; +} std::ostream &operator << (std::ostream &ostream, const Index &index) { return ostream << '(' << index.n << ' ' << index.t << ' ' << index.x << ')'; diff --git a/src/nnet3/nnet-common.h b/src/nnet3/nnet-common.h index f76166c0758..cb5d8c3b944 100644 --- a/src/nnet3/nnet-common.h +++ b/src/nnet3/nnet-common.h @@ -107,11 +107,16 @@ struct CindexHasher { size_t operator () (const Cindex &cindex) const; }; - struct CindexVectorHasher { size_t operator () (const std::vector &cindex_vector) const; }; +// Note: because IndexVectorHasher is used in some things where we really need +// it to be fast, it doesn't look at all the indexes, just most of them. +struct IndexVectorHasher { + size_t operator () (const std::vector &index_vector) const; +}; + // this will only be used for pretty-printing. diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index d5e6e1654a6..55cf23883ea 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -1129,7 +1129,15 @@ void NnetComputation::GetWholeSubmatrices( } } - +size_t IoSpecificationHasher::operator () ( + const IoSpecification &io_spec) const { + StringHasher string_hasher; + IndexVectorHasher indexes_hasher; + // 4261 was chosen at random from a list of primes. + return string_hasher(io_spec.name) + + indexes_hasher(io_spec.indexes) + + (io_spec.has_deriv ? 4261 : 0); +} } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-computation.h b/src/nnet3/nnet-computation.h index fd8cb06d06b..c7972da2102 100644 --- a/src/nnet3/nnet-computation.h +++ b/src/nnet3/nnet-computation.h @@ -65,8 +65,10 @@ struct MiscComputationInfo { // produce. For inputs, the name should correspond to an input or component // node name in the nnet (components are allowed so context can be provided in // recurrent setups); for outputs, the name should be an output node name in the -// Nnet. In the normal case there will just be one input and one output, and -// the indexes will vary only in the t index, with the others all identical. +// Nnet. +// note: this structure is used to represent egs both before and after merging +// into minibatches; if this merging has been done, the indexes will vary in +// the 'n' dimension. struct IoSpecification { std::string name; std::vector indexes; @@ -97,6 +99,10 @@ struct IoSpecification { bool operator== (const IoSpecification &other) const; }; +struct IoSpecificationHasher { + size_t operator () (const IoSpecification &io_spec) const; +}; + // struct ComputationRequest is whatever we need in addition to the // network itself in order to create the structure of a computation. The most diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc index dc9dedefe43..e88eff71e77 100644 --- a/src/nnet3/nnet-example-utils.cc +++ b/src/nnet3/nnet-example-utils.cc @@ -23,6 +23,7 @@ #include "hmm/posterior.h" #include "util/text-utils.h" #include +#include namespace kaldi { namespace nnet3 { @@ -814,6 +815,362 @@ void UtteranceSplitter::SetOutputWeights( } } +// static +bool ExampleMergingConfig::ParseIntSet(const std::string &str, + ExampleMergingConfig::IntSet *int_set) { + std::vector split_str; + SplitStringToVector(str, ",", false, &split_str); + if (split_str.empty()) + return false; + int_set->largest_size = 0; + int_set->ranges.resize(split_str.size()); + for (size_t i = 0; i < split_str.size(); i++) { + std::vector split_range; + // note: because we split on '-', it't not possible to + // get negative values in 'split_range'. + SplitStringToIntegers(str, "-", false, &split_range); + if (split_range.size() < 1 || split_range.size() > 2 || + split_range[0] > split_range[1]) + return false; + int_set->ranges[i].first = split_range[0]; + int_set->ranges[i].second = split_range.back(); + int_set->largest_size = std::max(int_set->largest_size, + split_range.back()); + } + return true; +} + +void ExampleMergingConfig::ComputeDerived() { + if (measure_output_frames != "deprecated") { + KALDI_WARN << "The --measure-output-frames option is deprecated " + "and will be ignored."; + } + if (discard_partial_minibatches != "deprecated") { + KALDI_WARN << "The --discard-partial-minibatches option is deprecated " + "and will be ignored."; + } + std::vector minibatch_size_split; + SplitStringToVector(minibatch_size, "/", false, &minibatch_size_split); + if (minibatch_size_split.empty()) { + KALDI_ERR << "Invalid option --minibatch-size=" << minibatch_size; + } + + rules.resize(minibatch_size_split.size()); + for (size_t i = 0; i < minibatch_size_split.size(); i++) { + int32 &minibatch_size = rules[i].first; + IntSet &int_set = rules[i].second; + // 'this_rule' will be either something like "256" or like "64-128,256" + // (but these two only if minibatch_size_split.size() == 1, or something with + // an example-size specified, like "256=64-128,256" + std::string &this_rule = minibatch_size_split[i]; + if (this_rule.find('=') != std::string::npos) { + std::vector rule_split; // split on '=' + SplitStringToVector(this_rule, "=", false, &rule_split); + if (rule_split.size() != 2) { + KALDI_ERR << "Could not parse option --minibatch-size=" + << minibatch_size; + } + if (!ConvertStringToInteger(rule_split[0], &minibatch_size) || + !ParseIntSet(rule_split[1], &int_set)) + KALDI_ERR << "Could not parse option --minibatch-size=" + << minibatch_size; + + } else { + if (minibatch_size_split.size() != 1) { + KALDI_ERR << "Could not parse option --minibatch-size=" + << minibatch_size << " (all rules must have " + << "minibatch-size specified if >1 rule)"; + } + minibatch_size = 0; + if (!ParseIntSet(this_rule, &int_set)) + KALDI_ERR << "Could not parse option --minibatch-size=" + << minibatch_size; + } + } + { + // check that no size is repeated. + std::vector all_sizes(minibatch_size_split.size()); + for (size_t i = 0; i < minibatch_size_split.size(); i++) + all_sizes[i] = rules[i].first; + std::sort(all_sizes.begin(), all_sizes.end()); + if (!IsSortedAndUniq(all_sizes)) { + KALDI_ERR << "Invalid --minibatch-size=" << minibatch_size + << " (repeated example-sizes)"; + } + } +} + +int32 ExampleMergingConfig::MinibatchSize(int32 size_of_eg, + int32 num_available_egs, + bool input_ended) const { + KALDI_ASSERT(num_available_egs > 0 && size_of_eg > 0); + int32 num_rules = rules.size(); + if (num_rules == 0) + KALDI_ERR << "You need to call ComputeDerived() before calling " + "MinibatchSize()."; + int32 min_distance = std::numeric_limits::max(), + closest_rule_index = 0; + for (int32 i = 0; i < num_rules; i++) { + int32 distance = std::abs(size_of_eg - rules[i].first); + if (distance < min_distance) { + min_distance = distance; + closest_rule_index = i; + } + } + if (!input_ended) { + // until the input ends, we can only use the largest available + // minibatch-size (otherwise, we could expect more later). + int32 largest_size = rules[closest_rule_index].second.largest_size; + if (largest_size <= num_available_egs) + return largest_size; + else + return 0; + } else { + int32 s = rules[closest_rule_index].second.LargestValueInRange( + num_available_egs); + KALDI_ASSERT(s <= num_available_egs); + return s; + } +} + + +void ExampleSizeStats::WroteExample(int32 example_size, + size_t structure_hash, + int32 minibatch_size) { + std::pair p(example_size, structure_hash); + + + unordered_map &h = stats_[p].minibatch_to_num_written; + unordered_map::iterator iter = h.find(minibatch_size); + if (iter == h.end()) + h[minibatch_size] = 1; + else + iter->second += 1; +} + +void ExampleSizeStats::DiscardedExamples(int32 example_size, + size_t structure_hash, + int32 num_discarded) { + std::pair p(example_size, structure_hash); + stats_[p].num_discarded += num_discarded; +} + + +void ExampleSizeStats::PrintStats() const { + PrintAggregateStats(); + PrintSpecificStats(); +} + +void ExampleSizeStats::PrintAggregateStats() const { + // First print some aggregate stats. + int64 num_distinct_egs_types = 0, // number of distinct types of input egs + // (differing in size or structure). + total_discarded_egs = 0, // total number of discarded egs. + total_discarded_egs_size = 0, // total number of discarded egs each multiplied by size + // of that eg + total_non_discarded_egs = 0, // total over all minibatches written, of + // minibatch-size, equals number of input egs + // that were not discarded. + total_non_discarded_egs_size = 0, // total over all minibatches of size-of-eg + // * minibatch-size. + num_minibatches = 0, // total number of minibatches + num_distinct_minibatch_types = 0; // total number of combination of + // (type-of-eg, number of distinct + // minibatch-sizes for that eg-type)- + // reflects the number of time we have + // to compile. + + StatsType::const_iterator eg_iter = stats_.begin(), eg_end = stats_.end(); + + for (; eg_iter != eg_end; ++eg_iter) { + int32 eg_size = eg_iter->first.first; + const StatsForExampleSize &stats = eg_iter->second; + num_distinct_egs_types++; + total_discarded_egs += stats.num_discarded; + total_discarded_egs_size += stats.num_discarded * eg_size; + + unordered_map::const_iterator + mb_iter = stats.minibatch_to_num_written.begin(), + mb_end = stats.minibatch_to_num_written.end(); + for (; mb_iter != mb_end; ++mb_iter) { + int32 mb_size = mb_iter->first, + num_written = mb_iter->second; + num_distinct_minibatch_types++; + num_minibatches += num_written; + total_non_discarded_egs += num_written * mb_size; + total_non_discarded_egs_size += num_written * mb_size * eg_size; + } + } + // the averages are written as integers- we don't really need more precision + // than that. + int64 total_input_egs = total_discarded_egs + total_non_discarded_egs, + total_input_egs_size = + total_discarded_egs_size + total_non_discarded_egs_size; + + float avg_input_egs_size = total_input_egs_size * 1.0 / total_input_egs; + float percent_discarded = total_discarded_egs * 100.0 / total_input_egs; + // note: by minibatch size we mean the number of egs per minibatch, it + // does not take note of the size of the input egs. + float avg_minibatch_size = total_non_discarded_egs * 1.0 / num_minibatches; + + std::ostringstream os; + os << std::setprecision(4); + os << "Processed " << total_input_egs + << " egs of avg. size " << avg_input_egs_size + << " into " << num_minibatches << " minibatches, discarding " + << percent_discarded << "% of egs. Avg minibatch size was " + << avg_minibatch_size << ", distinct types of egs/minibatches " + << "was " << num_distinct_egs_types << "/" + << num_distinct_minibatch_types; + KALDI_LOG << os.str(); +} + +void ExampleSizeStats::PrintSpecificStats() const { + KALDI_LOG << "Merged specific eg types as follows [format: =" + "{->,->.../d=}" + ",={...},... (note,egs-size == number of input " + "frames including context)."; + std::ostringstream os; + + // copy from unordered map to map to get sorting, for consistent output. + typedef std::map, StatsForExampleSize> SortedMapType; + + SortedMapType stats; + stats.insert(stats_.begin(), stats_.end()); + SortedMapType::const_iterator eg_iter = stats.begin(), eg_end = stats.end(); + for (; eg_iter != eg_end; ++eg_iter) { + int32 eg_size = eg_iter->first.first; + if (eg_iter != stats.begin()) + os << ","; + os << eg_size << "={"; + const StatsForExampleSize &stats = eg_iter->second; + unordered_map::const_iterator + mb_iter = stats.minibatch_to_num_written.begin(), + mb_end = stats.minibatch_to_num_written.end(); + for (; mb_iter != mb_end; ++mb_iter) { + int32 mb_size = mb_iter->first, + num_written = mb_iter->second; + if (mb_iter != stats.minibatch_to_num_written.begin()) + os << ","; + os << mb_size << "->" << num_written; + } + os << ",d=" << stats.num_discarded << "}"; + } + KALDI_LOG << os.str(); +} + + + +int32 GetNnetExampleSize(const NnetExample &a) { + int32 ans = 0; + for (size_t i = 0; i < a.io.size(); i++) { + int32 s = a.io[i].indexes.size(); + if (s > ans) + ans = s; + } + return ans; +} + +ExampleMerger::ExampleMerger(const ExampleMergingConfig &config, + NnetExampleWriter *writer): + finished_(false), num_egs_written_(0), + config_(config), writer_(writer) { } + + +void ExampleMerger::AcceptExample(NnetExample *eg) { + KALDI_ASSERT(!finished_); + // If an eg with the same structure as 'eg' is already a key in the + // map, it won't be replaced, but if it's new it will be made + // the key. Also we remove the key before making the vector empty. + // This way we ensure that the eg in the key is always the first + // element of the vector. + std::vector &vec = eg_to_egs_[eg]; + vec.push_back(eg); + int32 eg_size = GetNnetExampleSize(*eg), + num_available = vec.size(); + bool input_ended = false; + int32 minibatch_size = config_.MinibatchSize(eg_size, num_available, + input_ended); + if (minibatch_size != 0) { // we need to write out a merged eg. + KALDI_ASSERT(minibatch_size == num_available); + + std::vector vec_copy(vec); + eg_to_egs_.erase(eg); + + // MergeExamples() expects a vector of NnetExample, not of pointers, + // so use swap to create that without doing any real work. + std::vector egs_to_merge(minibatch_size); + for (int32 i = 0; i < minibatch_size; i++) { + egs_to_merge[i].Swap(vec[i]); + delete vec[i]; // we owned those pointers. + } + WriteMinibatch(egs_to_merge); + } +} + +void ExampleMerger::WriteMinibatch(const std::vector &egs) { + KALDI_ASSERT(!egs.empty()); + int32 eg_size = GetNnetExampleSize(egs[0]); + NnetExampleStructureHasher eg_hasher; + size_t structure_hash = eg_hasher(egs[0]); + int32 minibatch_size = egs.size(); + stats_.WroteExample(eg_size, structure_hash, minibatch_size); + NnetExample merged_eg; + MergeExamples(egs, config_.compress, &merged_eg); + std::ostringstream key; + key << "merged-" << (num_egs_written_++) << "-" << minibatch_size; + writer_->Write(key.str(), merged_eg); +} + +void ExampleMerger::Finish() { + if (finished_) return; // already finished. + finished_ = true; + + // we'll convert the map eg_to_egs_ to a vector of vectors to avoid + // iterator invalidation problems. + std::vector > all_egs; + all_egs.reserve(eg_to_egs_.size()); + + MapType::iterator iter = eg_to_egs_.begin(), end = eg_to_egs_.end(); + for (; iter != end; ++iter) + all_egs.push_back(iter->second); + eg_to_egs_.clear(); + + for (size_t i = 0; i < all_egs.size(); i++) { + int32 minibatch_size; + std::vector &vec = all_egs[i]; + KALDI_ASSERT(!vec.empty()); + int32 eg_size = GetNnetExampleSize(*(vec[0])); + bool input_ended = true; + while (!vec.empty() && + (minibatch_size = config_.MinibatchSize(eg_size, vec.size(), + input_ended)) != 0) { + // MergeExamples() expects a vector of NnetExample, not of pointers, + // so use swap to create that without doing any real work. + std::vector egs_to_merge(minibatch_size); + for (int32 i = 0; i < minibatch_size; i++) { + egs_to_merge[i].Swap(vec[i]); + delete vec[i]; // we owned those pointers. + } + vec.erase(vec.begin(), vec.begin() + minibatch_size); + WriteMinibatch(egs_to_merge); + } + if (!vec.empty()) { + int32 eg_size = GetNnetExampleSize(*(vec[0])); + NnetExampleStructureHasher eg_hasher; + size_t structure_hash = eg_hasher(*(vec[0])); + int32 num_discarded = vec.size(); + stats_.DiscardedExamples(eg_size, structure_hash, num_discarded); + for (int32 i = 0; i < num_discarded; i++) + delete vec[i]; + vec.clear(); + } + } + + + +} + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h index 754743d581e..75a47772fda 100644 --- a/src/nnet3/nnet-example-utils.h +++ b/src/nnet3/nnet-example-utils.h @@ -23,6 +23,7 @@ #include "nnet3/nnet-example.h" #include "nnet3/nnet-computation.h" #include "nnet3/nnet-compute.h" +#include "util/kaldi-table.h" namespace kaldi { namespace nnet3 { @@ -294,11 +295,203 @@ class UtteranceSplitter { // chunks, and then add the subtracted number of copies of the primary // num-frames to the split. std::vector > > splits_for_length_; +}; + + +class ExampleMergingConfig { +public: + // The following configuration values are registered on the command line. + bool compress; + std::string measure_output_frames; // for back-compatibility, not used. + std::string minibatch_size; + std::string discard_partial_minibatches; // for back-compatibility, not used. + + ExampleMergingConfig(): compress(false), + measure_output_frames("deprecated"), + minibatch_size("256"), + discard_partial_minibatches("deprecated") { } + + void Register(OptionsItf *po) { + po->Register("compress", &compress, "If true, compress the output examples " + "(not recommended unless you are writing to disk)"); + po->Register("measure-output-frames", &measure_output_frames, "This " + "value will be ignored (included for back-compatibility)"); + po->Register("discard-partial-minibatches", &discard_partial_minibatches, + "This value will be ignored (included for back-compatibility)"); + po->Register("minibatch-size", &minibatch_size, + "String controlling the minibatch size. May be just an integer, " + "meaning a fixed minibatch size (e.g. --minibatch-size=128). " + "May be a list of ranges and values, e.g. --minibatch-size=32,64 " + "or --minibatch-size=16-32,64,128. All minibatches will be of " + "the largest size until the end of the input is reached; " + "then, increasingly smaller sizes will be allowed. Only egs " + "with the same structure (e.g num-frames) are merged. You may " + "specify different minibatch sizes for different sizes of eg " + "(defined as the maximum number of Indexes on any input), in " + "the format " + "--minibatch-size='eg_size1=mb_sizes1/eg_size2=mb_sizes2', e.g. " + "--minibatch-size=128=64-128,256/256=32-64,128. Egs are given " + "minibatch-sizes based on the specified eg-size closest to " + "their actual size."); + } + + + // this function computes the derived (private) parameters; it must be called + // after the command-line parameters are read and before MinibatchSize() is + // called. + void ComputeDerived(); + + /// This function tells you what minibatch size should be used for this eg. + + /// @param [in] size_of_eg The "size" of the eg, as obtained by + /// GetNnetExampleSize() or a similar function (up + /// to the caller). + /// @param [in] num_available_egs The number of egs of this size that are + /// currently available; should be >0. The + /// value returned will be <= this value, possibly + /// zero. + /// @param [in] input_ended True if the input has ended, false otherwise. + /// This is important because before the input has + /// ended, we will only batch egs into the largest + /// possible minibatch size among the range allowed + /// for that size of eg. + /// @return Returns the minibatch size to use in this + /// situation, as specified by the configuration. + int32 MinibatchSize(int32 size_of_eg, + int32 num_available_egs, + bool input_ended) const; + private: + // struct IntSet is a representation of something like 16-32,64, which is a + // nonempty list of either nonnegative integers or ranges of nonnegative + // integers. Conceptually it represents a set of nonnegative integers. + struct IntSet { + // largest_size is the largest integer in any of the ranges (64 in this + // example). + int32 largest_size; + // e.g. would contain ((16,32), (64,64)) in this example. + std::vector > ranges; + // Returns the largest value in any range (i.e. in the set of + // integers that this struct represents), that is <= max_value, + // or 0 if there is no value in any range that is <= max_value. + // In this example, this function would return the following: + // 128->64, 64->64, 63->32, 31->31, 16->16, 15->0, 0->0 + int32 LargestValueInRange(int32 max_value) const; + }; + static bool ParseIntSet(const std::string &str, IntSet *int_set); + + // 'rules' is derived from the configuration values above by ComputeDerived(), + // and are not set directly on the command line. 'rules' is a list of pairs + // (eg-size, int-set-of-minibatch-sizes); If no explicit eg-sizes were + // specified on the command line (i.e. there was no '=' sign in the + // --minibatch-size option), then we just set the int32 to 0. + std::vector > rules; }; +/// This function returns the 'size' of a nnet-example as defined for purposes +/// of merging egs, which is defined as the largest number of Indexes in any of +/// the inputs or outputs of the example. +int32 GetNnetExampleSize(const NnetExample &a); + + + + + +/// This class is responsible for storing, and displaying in log messages, +/// statistics about how examples of different sizes (c.f. GetNnetExampleSize()) +/// were merged into minibatches, and how many examples were left over and +/// discarded. +class ExampleSizeStats { + public: + + /// Users call this function to inform this class that one minibatch has been + /// written aggregating 'minibatch_size' separate examples of original size + /// 'example_size' (e.g. as determined by GetNnetExampleSize(), but the caller + /// does that. + /// The 'structure_hash' is provided so that this class can distinguish + /// between egs that have the same size but different structure. In the + /// extremely unlikely eventuality that there is a hash collision, it will + /// cause misleading stats to be printed out. + void WroteExample(int32 example_size, size_t structure_hash, + int32 minibatch_size); + + /// Users call this function to inform this class that after processing all + /// the data, for examples of original size 'example_size', 'num_discarded' + /// examples could not be put into a minibatch and were discarded. + void DiscardedExamples(int32 example_size, size_t structure_hash, + int32 num_discarded); + + /// Calling this will cause a log message with information about the + /// examples to be printed. + void PrintStats() const; + + private: + // this struct stores the stats for examples of a particular size and + // structure. + struct StatsForExampleSize { + int32 num_discarded; + // maps from minibatch-size (i.e. number of egs that were + // aggregated into that minibatch), to the number of such + // minibatches written. + unordered_map minibatch_to_num_written; + StatsForExampleSize(): num_discarded(0) { } + }; + + + typedef unordered_map, StatsForExampleSize, + PairHasher > StatsType; + + // this maps from a pair (example_size, structure_hash) to to the stats for + // examples with those characteristics. + StatsType stats_; + + void PrintAggregateStats() const; + void PrintSpecificStats() const; + +}; + + +/// This class is responsible for arranging examples in groups +/// that have the same strucure (i.e. the same input and output +/// indexes), and outputting them in suitable minibatches +/// as defined by ExampleMergingConfig. +class ExampleMerger { + ExampleMerger(const ExampleMergingConfig &config, + NnetExampleWriter *writer); + + // This function accepts an example, and if possible, writes a merged example + // out. The ownership of the pointer 'a' is transferred to this class when + // you call this function. + void AcceptExample(NnetExample *a); + + // This function announces to the class that the input has finished, so it + // should flush out any smaller-sizes minibatches, as dictated by the config. + // This will be called in the destructor, but you can call it explicitly when + // all the input is done if you want to. + // It also prints the stats. + void Finish(); + + ~ExampleMerger() { Finish(); }; + private: + // called by Finish() and AcceptExample(). Merges, updates the + // stats, and writes. + void WriteMinibatch(const std::vector &egs); + + bool finished_; + int32 num_egs_written_; + const ExampleMergingConfig &config_; + NnetExampleWriter *writer_; + ExampleSizeStats stats_; + + // Note: the "key" into the egs is the first element of the vector. + typedef unordered_map, + NnetExampleStructureHasher, + NnetExampleStructureCompare> MapType; + MapType eg_to_egs_; +}; + } // namespace nnet3 diff --git a/src/nnet3/nnet-example.cc b/src/nnet3/nnet-example.cc index 9a34258e0ee..3e87ebba3f5 100644 --- a/src/nnet3/nnet-example.cc +++ b/src/nnet3/nnet-example.cc @@ -122,5 +122,53 @@ void NnetExample::Compress() { iter->features.Compress(); } + +size_t NnetIoStructureHasher::operator () ( + const NnetIo &io) const { + StringHasher string_hasher; + IndexVectorHasher indexes_hasher; + + // numbers appearing here were taken at random from a list of primes. + size_t ans = string_hasher(io.name) + + indexes_hasher(io.indexes) + + 19249 * io.features.NumRows() + + 14731 * io.features.NumCols(); + return ans; +} + + +bool NnetIoStructureCompare::operator () ( + const NnetIo &a, const NnetIo &b) const { + return a.name == b.name && + a.features.NumRows() == b.features.NumRows() && + a.features.NumCols() == b.features.NumCols() && + a.indexes == b.indexes; +} + + +size_t NnetExampleStructureHasher::operator () ( + const NnetExample &eg) const { + // these numbers were chosen at random from a list of primes. + NnetIoStructureHasher io_hasher; + size_t size = eg.io.size(), ans = size * 35099; + for (size_t i = 0; i < size; i++) + ans = ans * 19157 + io_hasher(eg.io[i]); + return ans; +} + +bool NnetExampleStructureCompare::operator () (const NnetExample &a, + const NnetExample &b) const { + NnetIoStructureCompare io_compare; + if (a.io.size() != b.io.size()) + return false; + size_t size = a.io.size(); + for (size_t i = 0; i < size; i++) + if (!io_compare(a.io[i], b.io[i])) + return false; + return true; +} + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-example.h b/src/nnet3/nnet-example.h index 1df7cd1e78e..f08754a2bd3 100644 --- a/src/nnet3/nnet-example.h +++ b/src/nnet3/nnet-example.h @@ -75,6 +75,22 @@ struct NnetIo { }; +/// This hashing object hashes just the structural aspects of the NnetIo object +/// (name, indexes, feature dimension) without looking at the value of features. +/// It will be used in combining egs into batches of all similar structure. +struct NnetIoStructureHasher { + size_t operator () (const NnetIo &a) const; +}; +/// This comparison object compares just the structural aspects of the NnetIo +/// object (name, indexes, feature dimension) without looking at the value of +/// features. It will be used in combining egs into batches of all similar +/// structure. +struct NnetIoStructureCompare { + bool operator () (const NnetIo &a, + const NnetIo &b) const; +}; + + /// NnetExample is the input data and corresponding label (or labels) for one or /// more frames of input, used for standard cross-entropy training of neural @@ -104,6 +120,38 @@ struct NnetExample { }; +/// This hashing object hashes just the structural aspects of the NnetExample +/// without looking at the value of the features. It will be used in combining +/// egs into batches of all similar structure. Note: the hash value is +/// sensitive to the order in which the NnetIo elements (input and outputs) +/// appear, even though the merging is capable of dealing with +/// differently-ordered inputs and outputs (e.g. "input" appearing before +/// vs. after "ivector" or "output"). We don't think anyone would ever have to +/// deal with differently-ordered, but otherwise identical, egs in practice so +/// we don't bother making the hashing function independent of this order. +struct NnetExampleStructureHasher { + size_t operator () (const NnetExample &eg) const; + // We also provide a version of this that works from pointers. + size_t operator () (const NnetExample *eg) const { return (*this)(*eg); } +}; + + +/// This comparator object compares just the structural aspects of the +/// NnetExample without looking at the value of the features. Like +/// NnetExampleStructureHasher, it is sensitive to the order in which the +/// differently-named NnetIo elements appear. This hashing object will be used +/// in combining egs into batches of all similar structure. +struct NnetExampleStructureCompare { + bool operator () (const NnetExample &a, + const NnetExample &b) const; + // We also provide a version of this that works from pointers. + bool operator () (const NnetExample *a, + const NnetExample *b) const { return (*this)(*a, *b); } + +}; + + + typedef TableWriter > NnetExampleWriter; typedef SequentialTableReader > SequentialNnetExampleReader; typedef RandomAccessTableReader > RandomAccessNnetExampleReader; diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 6da7699cb93..54ebf17edc7 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -20,6 +20,7 @@ #include "nnet3/nnet-optimize.h" #include "nnet3/nnet-optimize-utils.h" +#include "base/timer.h" namespace kaldi { namespace nnet3 { @@ -532,45 +533,16 @@ void Optimize(const NnetOptimizeOptions &config, // of inputs and outputs size_t ComputationRequestHasher::operator() (const ComputationRequest *cr) const { size_t ans = 0; + size_t p1 = 4111, p2 = 26951; + IoSpecificationHasher io_hasher; std::vector::const_iterator itr = cr->inputs.begin(), end = cr->inputs.end(); - for (; itr != end; ++itr) { - ans += IoSpecificationToInt(*itr); - } + for (; itr != end; ++itr) + ans = ans * p1 + io_hasher(*itr); itr = cr->outputs.begin(); end = cr->outputs.end(); - for (; itr != end; ++itr) { - ans += IoSpecificationToInt(*itr); - } - return ans; -} - -size_t ComputationRequestHasher::IoSpecificationToInt(const IoSpecification& spec) const { - size_t ans; - size_t n = 19; // this value is used to extract only a subset of elements to hash; - // it makes the hasher faster. - StringHasher string_hasher; - ans = string_hasher(spec.name); - std::vector::const_iterator iter = spec.indexes.begin(), - end = spec.indexes.end(), - med = end; - if (med > iter + n) - med = iter + n; - - for (; iter != med; ++iter) { - ans += iter->n * 1619; - ans += iter->t * 15649; - ans += iter->x * 89809; - } - // after the first 'n' values, look only at every n'th value. this makes the - // hashing much faster, and in the kinds of structures that we actually deal - // with, we shouldn't get unnecessary hash collisions as a result of this - // optimization. - for (; iter < end; iter += n) { - ans += iter->n * 1619; - ans += iter->t * 15649; - ans += iter->x * 89809; - } + for (; itr != end; ++itr) + ans = ans * p2 + io_hasher(*itr); return ans; } @@ -643,20 +615,25 @@ CachingOptimizingCompiler::~CachingOptimizingCompiler() { delete itr->first; delete itr->second.first; } + KALDI_LOG << seconds_taken_ << " seconds taken in nnet3 compilation"; } const NnetComputation* CachingOptimizingCompiler::Compile( const ComputationRequest &in_request) { + Timer timer; + const NnetComputation *ans; // find computation in the cache CacheType::iterator cit = computation_cache_.find(&in_request); if (cit == computation_cache_.end()) { - return CompileAndCache(in_request); + ans = CompileAndCache(in_request); } else { // if found, update access queue const NnetComputation *computation = cit->second.first; UpdateAccessQueue(cit); - return computation; + ans = computation; } + seconds_taken_ += timer.Elapsed(); + return ans; } const NnetComputation* CachingOptimizingCompiler::CompileAndCache( diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index 1ca776d4ee6..ab0721e802a 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -164,9 +164,6 @@ void Optimize(const NnetOptimizeOptions &config, // and output IoSpecifications vectors. struct ComputationRequestHasher { size_t operator()(const ComputationRequest *cr) const; - private: - size_t IoSpecificationToInt(const IoSpecification& spec) const; - static const int kPrime = 7853; }; // Equality function for ComputationRequest pointer @@ -210,14 +207,15 @@ class CachingOptimizingCompiler { CachingOptimizingCompiler(const Nnet &nnet, const CachingOptimizingCompilerOptions config = CachingOptimizingCompilerOptions()): - nnet_(nnet), config_(config) { } + nnet_(nnet), config_(config), seconds_taken_(0.0) { } /// Note: nnet is retained as a const reference but opt_config is copied. CachingOptimizingCompiler(const Nnet &nnet, const NnetOptimizeOptions &opt_config, const CachingOptimizingCompilerOptions config = CachingOptimizingCompilerOptions()): - nnet_(nnet), config_(config), opt_config_(opt_config) { } + nnet_(nnet), config_(config), opt_config_(opt_config), + seconds_taken_(0.0) { } ~CachingOptimizingCompiler(); /// Does the compilation and returns a const pointer to @@ -276,6 +274,9 @@ class CachingOptimizingCompiler { ComputationRequestPtrEqual> CacheType; CacheType computation_cache_; + // time spent in compilation-- for diagnostic messages + double seconds_taken_; + // This function updates the computation cache. It is called within Compile(). // It takes ownership of the pointers. It inserts the request at the end of // the queue, and purges the least-recently-accessed request from the queue and diff --git a/src/nnet3bin/nnet3-merge-egs.cc b/src/nnet3bin/nnet3-merge-egs.cc index 8627671f53a..48ba2986512 100644 --- a/src/nnet3bin/nnet3-merge-egs.cc +++ b/src/nnet3bin/nnet3-merge-egs.cc @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) { "e.g.\n" "nnet3-merge-egs --minibatch-size=512 ark:1.egs ark:- | nnet3-train-simple ... \n" "See also nnet3-copy-egs\n"; - + bool compress = false; int32 minibatch_size = 512; bool measure_output_frames = true; @@ -76,7 +76,7 @@ int main(int argc, char *argv[]) { po.Register("discard-partial-minibatches", &discard_partial_minibatches, "discard any partial minibatches of 'uneven' size that may be " "encountered at the end."); - + po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -89,12 +89,12 @@ int main(int argc, char *argv[]) { SequentialNnetExampleReader example_reader(examples_rspecifier); NnetExampleWriter example_writer(examples_wspecifier); - + std::vector examples; examples.reserve(minibatch_size); int32 cur_num_output_frames = 0; - + int64 num_read = 0, num_written = 0; while (!example_reader.Done()) { const NnetExample &cur_eg = example_reader.Value(); diff --git a/src/util/stl-utils.h b/src/util/stl-utils.h index d37e4d2d203..b5f8f246d95 100644 --- a/src/util/stl-utils.h +++ b/src/util/stl-utils.h @@ -245,16 +245,16 @@ struct VectorHasher { // hashing function for vector. }; /// A hashing function-object for pairs of ints -template +template struct PairHasher { // hashing function for pair - size_t operator()(const std::pair &x) const { - return x.first + x.second * kPrime; + size_t operator()(const std::pair &x) const { + // 7853 was chosen at random from a list of primes. + return x.first + x.second * 7853; } PairHasher() { // Check we're instantiated with an integer type. - KALDI_ASSERT_IS_INTEGER_TYPE(Int); + KALDI_ASSERT_IS_INTEGER_TYPE(Int1); + KALDI_ASSERT_IS_INTEGER_TYPE(Int2); } - private: - static const int kPrime = 7853; }; diff --git a/src/util/timer.h b/src/util/timer.h deleted file mode 100644 index 3b92b48b603..00000000000 --- a/src/util/timer.h +++ /dev/null @@ -1,28 +0,0 @@ -// util/timer.h - -// Copyright 2014 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 - -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -// We are temporarily leaving this file to forward #includes to -// base-timer.h. Its use is deprecated; you should directrly -// #include base/timer.h -#ifndef KALDI_UTIL_TIMER_H_ -#define KALDI_UTIL_TIMER_H_ -#pragma message warning: please do not include util/timer.h, \ - include base/timer.h(it has been moved) -#include "base/timer.h" -#endif // KALDI_UTIL_TIMER_H_ From e5e03a0c86af8d10dbc3ad96961c012065d643df Mon Sep 17 00:00:00 2001 From: "Radagast (jf)" Date: Wed, 28 Dec 2016 06:37:49 +0100 Subject: [PATCH 162/530] nnet3: Remove double return ans introduced in 6850798ab. (#1291) --- egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py index eeacc1fff87..3726eebeb6e 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py @@ -147,7 +147,7 @@ def str(self): list_of_entries.append('{0}={1}'.format(key, value)) return ' '.join(list_of_entries) - return ans + def __str__(self): return self.str() From be2b640cb19e206ac3a5e43452bbcf21a7313aef Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Wed, 28 Dec 2016 13:08:09 -0800 Subject: [PATCH 163/530] Finish upgrades to eg-merging code for chain and discriminative examples. Compiles but not tested. --- src/chainbin/nnet3-chain-merge-egs.cc | 43 ++--- src/nnet3/nnet-chain-example.cc | 161 ++++++++++++++++- src/nnet3/nnet-chain-example.h | 80 +++++++++ src/nnet3/nnet-discriminative-example.cc | 166 +++++++++++++++++- src/nnet3/nnet-discriminative-example.h | 84 ++++++++- src/nnet3/nnet-example-utils.cc | 20 ++- src/nnet3/nnet-example-utils.h | 4 + .../nnet3-discriminative-merge-egs.cc | 43 ++--- src/nnet3bin/nnet3-merge-egs.cc | 57 ++---- src/util/kaldi-holder-inl.h | 1 + 10 files changed, 532 insertions(+), 127 deletions(-) diff --git a/src/chainbin/nnet3-chain-merge-egs.cc b/src/chainbin/nnet3-chain-merge-egs.cc index 45dca4051f3..9c91f997e7a 100644 --- a/src/chainbin/nnet3-chain-merge-egs.cc +++ b/src/chainbin/nnet3-chain-merge-egs.cc @@ -41,14 +41,12 @@ int main(int argc, char *argv[]) { "nnet3-chain-merge-egs --minibatch-size=128 ark:1.cegs ark:- | nnet3-chain-train-simple ... \n" "See also nnet3-chain-copy-egs\n"; - bool compress = false; - int32 minibatch_size = 64; - + ExampleMergingConfig merging_config; + merging_config.minibatch_size = 64; // change the default for this + // program.. anyway it will usually be + // set on the command line. ParseOptions po(usage); - po.Register("minibatch-size", &minibatch_size, "Target size of minibatches " - "when merging (see also --measure-output-frames)"); - po.Register("compress", &compress, "If true, compress the output examples " - "(not recommended unless you are writing to disk"); + merging_config.Register(&po); po.Read(argc, argv); @@ -63,35 +61,14 @@ int main(int argc, char *argv[]) { SequentialNnetChainExampleReader example_reader(examples_rspecifier); NnetChainExampleWriter example_writer(examples_wspecifier); - std::vector examples; - examples.reserve(minibatch_size); - - int64 num_read = 0, num_written = 0; + ChainExampleMerger merger(merging_config, &example_writer); while (!example_reader.Done()) { const NnetChainExample &cur_eg = example_reader.Value(); - examples.resize(examples.size() + 1); - examples.back() = cur_eg; - - bool minibatch_ready = - static_cast(examples.size()) >= minibatch_size; - - // Do Next() now, so we can test example_reader.Done() below . - example_reader.Next(); - num_read++; - - if (minibatch_ready || (example_reader.Done() && !examples.empty())) { - NnetChainExample merged_eg; - MergeChainExamples(compress, &examples, &merged_eg); - std::ostringstream ostr; - ostr << "merged-" << num_written; - num_written++; - std::string output_key = ostr.str(); - example_writer.Write(output_key, merged_eg); - examples.clear(); - } + merger.AcceptExample(new NnetChainExample(cur_eg)); } - KALDI_LOG << "Merged " << num_read << " egs to " << num_written << '.'; - return (num_written != 0 ? 0 : 1); + // the merger itself prints the necessary diagnostics. + merger.Finish(); + return merger.ExitStatus(); } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; diff --git a/src/nnet3/nnet-chain-example.cc b/src/nnet3/nnet-chain-example.cc index 0607543b743..b1c6e60de47 100644 --- a/src/nnet3/nnet-chain-example.cc +++ b/src/nnet3/nnet-chain-example.cc @@ -207,8 +207,8 @@ static void MergeSupervision( std::vector output_supervision; bool compactify = true; AppendSupervision(input_supervision, - compactify, - &output_supervision); + compactify, + &output_supervision); if (output_supervision.size() != 1) KALDI_ERR << "Failed to merge 'chain' examples-- inconsistent lengths " << "or weights?"; @@ -300,7 +300,7 @@ void TruncateDerivWeights(int32 truncate, deriv_weights.Set(1.0); } int32 num_sequences = supervision.supervision.num_sequences, - frames_per_sequence = supervision.supervision.frames_per_sequence; + frames_per_sequence = supervision.supervision.frames_per_sequence; KALDI_ASSERT(2 * truncate < frames_per_sequence); for (int32 t = 0; t < truncate; t++) for (int32 s = 0; s < num_sequences; s++) @@ -421,5 +421,160 @@ void ShiftChainExampleTimes(int32 frame_shift, } } + +size_t NnetChainExampleStructureHasher::operator () ( + const NnetChainExample &eg) const { + // these numbers were chosen at random from a list of primes. + NnetIoStructureHasher io_hasher; + size_t size = eg.inputs.size(), ans = size * 35099; + for (size_t i = 0; i < size; i++) + ans = ans * 19157 + io_hasher(eg.inputs[i]); + for (size_t i = 0; i < eg.outputs.size(); i++) { + const NnetChainSupervision &sup = eg.outputs[i]; + StringHasher string_hasher; + IndexVectorHasher indexes_hasher; + ans = ans * 17957 + + string_hasher(sup.name) + indexes_hasher(sup.indexes); + } + return ans; +} + +bool NnetChainExampleStructureCompare::operator () ( + const NnetChainExample &a, + const NnetChainExample &b) const { + NnetIoStructureCompare io_compare; + if (a.inputs.size() != b.inputs.size() || + a.outputs.size() != b.outputs.size()) + return false; + size_t size = a.inputs.size(); + for (size_t i = 0; i < size; i++) + if (!io_compare(a.inputs[i], b.inputs[i])) + return false; + size = a.outputs.size(); + for (size_t i = 0; i < size; i++) + if (a.outputs[i].name != b.outputs[i].name || + a.outputs[i].indexes != b.outputs[i].indexes) + return false; + return true; +} + + +int32 GetNnetChainExampleSize(const NnetChainExample &a) { + int32 ans = 0; + for (size_t i = 0; i < a.inputs.size(); i++) { + int32 s = a.inputs[i].indexes.size(); + if (s > ans) + ans = s; + } + for (size_t i = 0; i < a.outputs.size(); i++) { + int32 s = a.outputs[i].indexes.size(); + if (s > ans) + ans = s; + } + return ans; +} + + +ChainExampleMerger::ChainExampleMerger(const ExampleMergingConfig &config, + NnetChainExampleWriter *writer): + finished_(false), num_egs_written_(0), + config_(config), writer_(writer) { } + + +void ChainExampleMerger::AcceptExample(NnetChainExample *eg) { + KALDI_ASSERT(!finished_); + // If an eg with the same structure as 'eg' is already a key in the + // map, it won't be replaced, but if it's new it will be made + // the key. Also we remove the key before making the vector empty. + // This way we ensure that the eg in the key is always the first + // element of the vector. + std::vector &vec = eg_to_egs_[eg]; + vec.push_back(eg); + int32 eg_size = GetNnetChainExampleSize(*eg), + num_available = vec.size(); + bool input_ended = false; + int32 minibatch_size = config_.MinibatchSize(eg_size, num_available, + input_ended); + if (minibatch_size != 0) { // we need to write out a merged eg. + KALDI_ASSERT(minibatch_size == num_available); + + std::vector vec_copy(vec); + eg_to_egs_.erase(eg); + + // MergeChainExamples() expects a vector of NnetChainExample, not of pointers, + // so use swap to create that without doing any real work. + std::vector egs_to_merge(minibatch_size); + for (int32 i = 0; i < minibatch_size; i++) { + egs_to_merge[i].Swap(vec[i]); + delete vec[i]; // we owned those pointers. + } + WriteMinibatch(&egs_to_merge); + } +} + +void ChainExampleMerger::WriteMinibatch( + std::vector *egs) { + KALDI_ASSERT(!egs->empty()); + int32 eg_size = GetNnetChainExampleSize((*egs)[0]); + NnetChainExampleStructureHasher eg_hasher; + size_t structure_hash = eg_hasher((*egs)[0]); + int32 minibatch_size = egs->size(); + stats_.WroteExample(eg_size, structure_hash, minibatch_size); + NnetChainExample merged_eg; + MergeChainExamples(config_.compress, egs, &merged_eg); + std::ostringstream key; + key << "merged-" << (num_egs_written_++) << "-" << minibatch_size; + writer_->Write(key.str(), merged_eg); +} + +void ChainExampleMerger::Finish() { + if (finished_) return; // already finished. + finished_ = true; + + // we'll convert the map eg_to_egs_ to a vector of vectors to avoid + // iterator invalidation problems. + std::vector > all_egs; + all_egs.reserve(eg_to_egs_.size()); + + MapType::iterator iter = eg_to_egs_.begin(), end = eg_to_egs_.end(); + for (; iter != end; ++iter) + all_egs.push_back(iter->second); + eg_to_egs_.clear(); + + for (size_t i = 0; i < all_egs.size(); i++) { + int32 minibatch_size; + std::vector &vec = all_egs[i]; + KALDI_ASSERT(!vec.empty()); + int32 eg_size = GetNnetChainExampleSize(*(vec[0])); + bool input_ended = true; + while (!vec.empty() && + (minibatch_size = config_.MinibatchSize(eg_size, vec.size(), + input_ended)) != 0) { + // MergeChainExamples() expects a vector of + // NnetChainExample, not of pointers, so use swap to create that + // without doing any real work. + std::vector egs_to_merge(minibatch_size); + for (int32 i = 0; i < minibatch_size; i++) { + egs_to_merge[i].Swap(vec[i]); + delete vec[i]; // we owned those pointers. + } + vec.erase(vec.begin(), vec.begin() + minibatch_size); + WriteMinibatch(&egs_to_merge); + } + if (!vec.empty()) { + int32 eg_size = GetNnetChainExampleSize(*(vec[0])); + NnetChainExampleStructureHasher eg_hasher; + size_t structure_hash = eg_hasher(*(vec[0])); + int32 num_discarded = vec.size(); + stats_.DiscardedExamples(eg_size, structure_hash, num_discarded); + for (int32 i = 0; i < num_discarded; i++) + delete vec[i]; + vec.clear(); + } + } +} + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-chain-example.h b/src/nnet3/nnet-chain-example.h index 9be298074a4..87b2de77897 100644 --- a/src/nnet3/nnet-chain-example.h +++ b/src/nnet3/nnet-chain-example.h @@ -25,6 +25,7 @@ #include "hmm/posterior.h" #include "util/table-types.h" #include "nnet3/nnet-example.h" +#include "nnet3/nnet-example-utils.h" #include "chain/chain-supervision.h" namespace kaldi { @@ -130,6 +131,31 @@ struct NnetChainExample { } }; +/// This hashing object hashes just the structural aspects of the NnetExample +/// without looking at the value of the features. It will be used in combining +/// egs into batches of all similar structure. +struct NnetChainExampleStructureHasher { + size_t operator () (const NnetChainExample &eg) const; + // We also provide a version of this that works from pointers. + size_t operator () (const NnetChainExample *eg) const { + return (*this)(*eg); + } +}; + + +/// This comparator object compares just the structural aspects of the +/// NnetChainExample without looking at the value of the features. +struct NnetChainExampleStructureCompare { + bool operator () (const NnetChainExample &a, + const NnetChainExample &b) const; + // We also provide a version of this that works from pointers. + bool operator () (const NnetChainExample *a, + const NnetChainExample *b) const { + return (*this)(*a, *b); + } +}; + + /// This function merges a list of NnetChainExample objects into a single one-- /// intended to be used when forming minibatches for neural net training. If @@ -200,6 +226,60 @@ typedef TableWriter > NnetChainExampleWrite typedef SequentialTableReader > SequentialNnetChainExampleReader; typedef RandomAccessTableReader > RandomAccessNnetChainExampleReader; + +/// This function returns the 'size' of a chain example as defined for purposes +/// of merging egs, which is defined as the largest number of Indexes in any of +/// the inputs or outputs of the example. +int32 GetChainNnetExampleSize(const NnetChainExample &a); + + +/// This class is responsible for arranging examples in groups that have the +/// same strucure (i.e. the same input and output indexes), and outputting them +/// in suitable minibatches as defined by ExampleMergingConfig. +class ChainExampleMerger { + public: + ChainExampleMerger(const ExampleMergingConfig &config, + NnetChainExampleWriter *writer); + + // This function accepts an example, and if possible, writes a merged example + // out. The ownership of the pointer 'a' is transferred to this class when + // you call this function. + void AcceptExample(NnetChainExample *a); + + // This function announces to the class that the input has finished, so it + // should flush out any smaller-sizes minibatches, as dictated by the config. + // This will be called in the destructor, but you can call it explicitly when + // all the input is done if you want to. + // It also prints the stats. + void Finish(); + + // returns a suitable exit status for a program. + bool ExitStatus() { return num_egs_written_ > 0; } + + ~ChainExampleMerger() { Finish(); }; + private: + // called by Finish() and AcceptExample(). Merges, updates the stats, and + // writes. The 'egs' is non-const only because the egs are temporarily + // changed inside MergeChainEgs. The pointer 'egs' is still owned + // by the caller. + void WriteMinibatch(std::vector *egs); + + bool finished_; + int32 num_egs_written_; + const ExampleMergingConfig &config_; + NnetChainExampleWriter *writer_; + ExampleSizeStats stats_; + + // Note: the "key" into the egs is the first element of the vector. + typedef unordered_map, + NnetChainExampleStructureHasher, + NnetChainExampleStructureCompare> MapType; +MapType eg_to_egs_; +}; + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-discriminative-example.cc b/src/nnet3/nnet-discriminative-example.cc index 5c02998cbcf..debc91b96c9 100644 --- a/src/nnet3/nnet-discriminative-example.cc +++ b/src/nnet3/nnet-discriminative-example.cc @@ -249,13 +249,15 @@ void MergeSupervision( } -void MergeDiscriminativeExamples(bool compress, - std::vector *input, - NnetDiscriminativeExample *output) { +void MergeDiscriminativeExamples( + bool compress, + std::vector *input, + NnetDiscriminativeExample *output) { int32 num_examples = input->size(); KALDI_ASSERT(num_examples > 0); - // we temporarily make the input-features in 'input' look like regular NnetExamples, - // so that we can recycle the MergeExamples() function. + // we temporarily make the input-features in 'input' look like regular + // NnetExamples, so that we can recycle the + // MergeExamples() function. std::vector eg_inputs(num_examples); for (int32 i = 0; i < num_examples; i++) eg_inputs[i].io.swap((*input)[i].inputs); @@ -414,6 +416,160 @@ void ShiftDiscriminativeExampleTimes(int32 frame_shift, } } +size_t NnetDiscriminativeExampleStructureHasher::operator () ( + const NnetDiscriminativeExample &eg) const { + // these numbers were chosen at random from a list of primes. + NnetIoStructureHasher io_hasher; + size_t size = eg.inputs.size(), ans = size * 35099; + for (size_t i = 0; i < size; i++) + ans = ans * 19157 + io_hasher(eg.inputs[i]); + for (size_t i = 0; i < eg.outputs.size(); i++) { + const NnetDiscriminativeSupervision &sup = eg.outputs[i]; + StringHasher string_hasher; + IndexVectorHasher indexes_hasher; + ans = ans * 17957 + + string_hasher(sup.name) + indexes_hasher(sup.indexes); + } + return ans; +} + +bool NnetDiscriminativeExampleStructureCompare::operator () ( + const NnetDiscriminativeExample &a, + const NnetDiscriminativeExample &b) const { + NnetIoStructureCompare io_compare; + if (a.inputs.size() != b.inputs.size() || + a.outputs.size() != b.outputs.size()) + return false; + size_t size = a.inputs.size(); + for (size_t i = 0; i < size; i++) + if (!io_compare(a.inputs[i], b.inputs[i])) + return false; + size = a.outputs.size(); + for (size_t i = 0; i < size; i++) + if (a.outputs[i].name != b.outputs[i].name || + a.outputs[i].indexes != b.outputs[i].indexes) + return false; + return true; +} + + +int32 GetNnetDiscriminativeExampleSize(const NnetDiscriminativeExample &a) { + int32 ans = 0; + for (size_t i = 0; i < a.inputs.size(); i++) { + int32 s = a.inputs[i].indexes.size(); + if (s > ans) + ans = s; + } + for (size_t i = 0; i < a.outputs.size(); i++) { + int32 s = a.outputs[i].indexes.size(); + if (s > ans) + ans = s; + } + return ans; +} + + +DiscriminativeExampleMerger::DiscriminativeExampleMerger(const ExampleMergingConfig &config, + NnetDiscriminativeExampleWriter *writer): + finished_(false), num_egs_written_(0), + config_(config), writer_(writer) { } + + +void DiscriminativeExampleMerger::AcceptExample(NnetDiscriminativeExample *eg) { + KALDI_ASSERT(!finished_); + // If an eg with the same structure as 'eg' is already a key in the + // map, it won't be replaced, but if it's new it will be made + // the key. Also we remove the key before making the vector empty. + // This way we ensure that the eg in the key is always the first + // element of the vector. + std::vector &vec = eg_to_egs_[eg]; + vec.push_back(eg); + int32 eg_size = GetNnetDiscriminativeExampleSize(*eg), + num_available = vec.size(); + bool input_ended = false; + int32 minibatch_size = config_.MinibatchSize(eg_size, num_available, + input_ended); + if (minibatch_size != 0) { // we need to write out a merged eg. + KALDI_ASSERT(minibatch_size == num_available); + + std::vector vec_copy(vec); + eg_to_egs_.erase(eg); + + // MergeDiscriminativeExamples() expects a vector of NnetDiscriminativeExample, not of pointers, + // so use swap to create that without doing any real work. + std::vector egs_to_merge(minibatch_size); + for (int32 i = 0; i < minibatch_size; i++) { + egs_to_merge[i].Swap(vec[i]); + delete vec[i]; // we owned those pointers. + } + WriteMinibatch(&egs_to_merge); + } +} + +void DiscriminativeExampleMerger::WriteMinibatch( + std::vector *egs) { + KALDI_ASSERT(!egs->empty()); + int32 eg_size = GetNnetDiscriminativeExampleSize((*egs)[0]); + NnetDiscriminativeExampleStructureHasher eg_hasher; + size_t structure_hash = eg_hasher((*egs)[0]); + int32 minibatch_size = egs->size(); + stats_.WroteExample(eg_size, structure_hash, minibatch_size); + NnetDiscriminativeExample merged_eg; + MergeDiscriminativeExamples(config_.compress, egs, &merged_eg); + std::ostringstream key; + key << "merged-" << (num_egs_written_++) << "-" << minibatch_size; + writer_->Write(key.str(), merged_eg); +} + +void DiscriminativeExampleMerger::Finish() { + if (finished_) return; // already finished. + finished_ = true; + + // we'll convert the map eg_to_egs_ to a vector of vectors to avoid + // iterator invalidation problems. + std::vector > all_egs; + all_egs.reserve(eg_to_egs_.size()); + + MapType::iterator iter = eg_to_egs_.begin(), end = eg_to_egs_.end(); + for (; iter != end; ++iter) + all_egs.push_back(iter->second); + eg_to_egs_.clear(); + + for (size_t i = 0; i < all_egs.size(); i++) { + int32 minibatch_size; + std::vector &vec = all_egs[i]; + KALDI_ASSERT(!vec.empty()); + int32 eg_size = GetNnetDiscriminativeExampleSize(*(vec[0])); + bool input_ended = true; + while (!vec.empty() && + (minibatch_size = config_.MinibatchSize(eg_size, vec.size(), + input_ended)) != 0) { + // MergeDiscriminativeExamples() expects a vector of + // NnetDiscriminativeExample, not of pointers, so use swap to create that + // without doing any real work. + std::vector egs_to_merge(minibatch_size); + for (int32 i = 0; i < minibatch_size; i++) { + egs_to_merge[i].Swap(vec[i]); + delete vec[i]; // we owned those pointers. + } + vec.erase(vec.begin(), vec.begin() + minibatch_size); + WriteMinibatch(&egs_to_merge); + } + if (!vec.empty()) { + int32 eg_size = GetNnetDiscriminativeExampleSize(*(vec[0])); + NnetDiscriminativeExampleStructureHasher eg_hasher; + size_t structure_hash = eg_hasher(*(vec[0])); + int32 num_discarded = vec.size(); + stats_.DiscardedExamples(eg_size, structure_hash, num_discarded); + for (int32 i = 0; i < num_discarded; i++) + delete vec[i]; + vec.clear(); + } + } +} + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-discriminative-example.h b/src/nnet3/nnet-discriminative-example.h index bb60f216a82..3a170e6bbd6 100644 --- a/src/nnet3/nnet-discriminative-example.h +++ b/src/nnet3/nnet-discriminative-example.h @@ -26,6 +26,7 @@ #include "util/table-types.h" #include "nnet3/discriminative-supervision.h" #include "nnet3/nnet-example.h" +#include "nnet3/nnet-example-utils.h" #include "hmm/posterior.h" #include "hmm/transition-model.h" @@ -128,6 +129,32 @@ struct NnetDiscriminativeExample { } }; + +/// This hashing object hashes just the structural aspects of the NnetExample +/// without looking at the value of the features. It will be used in combining +/// egs into batches of all similar structure. +struct NnetDiscriminativeExampleStructureHasher { + size_t operator () (const NnetDiscriminativeExample &eg) const; + // We also provide a version of this that works from pointers. + size_t operator () (const NnetDiscriminativeExample *eg) const { + return (*this)(*eg); + } +}; + + +/// This comparator object compares just the structural aspects of the +/// NnetDiscriminativeExample without looking at the value of the features. +struct NnetDiscriminativeExampleStructureCompare { + bool operator () (const NnetDiscriminativeExample &a, + const NnetDiscriminativeExample &b) const; + // We also provide a version of this that works from pointers. + bool operator () (const NnetDiscriminativeExample *a, + const NnetDiscriminativeExample *b) const { + return (*this)(*a, *b); + } +}; + + /** Appends the given vector of examples (which must be non-empty) into a single output example. @@ -140,13 +167,12 @@ struct NnetDiscriminativeExample { MergeExamples() routine while avoiding having to rewrite code. */ void MergeDiscriminativeExamples( - bool compress, std::vector *input, + bool compress, NnetDiscriminativeExample *output); // called from MergeDiscriminativeExamples, this function merges the Supervision // objects into one. Requires (and checks) that they all have the same name. - void MergeSupervision( const std::vector &inputs, NnetDiscriminativeSupervision *output); @@ -194,11 +220,63 @@ void GetDiscriminativeComputationRequest(const Nnet &nnet, bool use_xent_derivative, ComputationRequest *computation_request); - typedef TableWriter > NnetDiscriminativeExampleWriter; typedef SequentialTableReader > SequentialNnetDiscriminativeExampleReader; typedef RandomAccessTableReader > RandomAccessNnetDiscriminativeExampleReader; + +/// This function returns the 'size' of a discriminative example as defined for +/// purposes of merging egs, which is defined as the largest number of Indexes +/// in any of the inputs or outputs of the example. +int32 GetDiscriminativeNnetExampleSize(const NnetDiscriminativeExample &a); + + +/// This class is responsible for arranging examples in groups that have the +/// same strucure (i.e. the same input and output indexes), and outputting them +/// in suitable minibatches as defined by ExampleMergingConfig. +class DiscriminativeExampleMerger { + public: + DiscriminativeExampleMerger(const ExampleMergingConfig &config, + NnetDiscriminativeExampleWriter *writer); + + // This function accepts an example, and if possible, writes a merged example + // out. The ownership of the pointer 'a' is transferred to this class when + // you call this function. + void AcceptExample(NnetDiscriminativeExample *a); + + // This function announces to the class that the input has finished, so it + // should flush out any smaller-sizes minibatches, as dictated by the config. + // This will be called in the destructor, but you can call it explicitly when + // all the input is done if you want to. + // It also prints the stats. + void Finish(); + + // returns a suitable exit status for a program. + bool ExitStatus() { return num_egs_written_ > 0; } + + ~DiscriminativeExampleMerger() { Finish(); }; + private: + // called by Finish() and AcceptExample(). Merges, updates the stats, and + // writes. The 'egs' is non-const only because the egs are temporarily + // changed inside MergeDiscriminativeEgs. The pointer 'egs' is still owned + // by the caller. + void WriteMinibatch(std::vector *egs); + + bool finished_; + int32 num_egs_written_; + const ExampleMergingConfig &config_; + NnetDiscriminativeExampleWriter *writer_; + ExampleSizeStats stats_; + + // Note: the "key" into the egs is the first element of the vector. + typedef unordered_map, + NnetDiscriminativeExampleStructureHasher, + NnetDiscriminativeExampleStructureCompare> MapType; + MapType eg_to_egs_; +}; + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc index e88eff71e77..77395759d8d 100644 --- a/src/nnet3/nnet-example-utils.cc +++ b/src/nnet3/nnet-example-utils.cc @@ -815,6 +815,23 @@ void UtteranceSplitter::SetOutputWeights( } } +int32 ExampleMergingConfig::IntSet::LargestValueInRange(int32 max_value) const { + KALDI_ASSERT(!ranges.empty()); + int32 ans = 0, num_ranges = ranges.size(); + for (int32 i = 0; i < num_ranges; i++) { + int32 possible_ans = 0; + if (max_value >= ranges[i].first) { + if (max_value >= ranges[i].second) + possible_ans = ranges[i].second; + else + possible_ans = max_value; + } + if (possible_ans > ans) + ans = possible_ans; + } + return ans; +} + // static bool ExampleMergingConfig::ParseIntSet(const std::string &str, ExampleMergingConfig::IntSet *int_set) { @@ -1166,9 +1183,6 @@ void ExampleMerger::Finish() { vec.clear(); } } - - - } diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h index 75a47772fda..46d6906ff99 100644 --- a/src/nnet3/nnet-example-utils.h +++ b/src/nnet3/nnet-example-utils.h @@ -458,6 +458,7 @@ class ExampleSizeStats { /// indexes), and outputting them in suitable minibatches /// as defined by ExampleMergingConfig. class ExampleMerger { + public: ExampleMerger(const ExampleMergingConfig &config, NnetExampleWriter *writer); @@ -473,6 +474,9 @@ class ExampleMerger { // It also prints the stats. void Finish(); + // returns a suitable exit status for a program. + bool ExitStatus() { return num_egs_written_ > 0; } + ~ExampleMerger() { Finish(); }; private: // called by Finish() and AcceptExample(). Merges, updates the diff --git a/src/nnet3bin/nnet3-discriminative-merge-egs.cc b/src/nnet3bin/nnet3-discriminative-merge-egs.cc index 5c386bd40b3..0edf960fdf9 100644 --- a/src/nnet3bin/nnet3-discriminative-merge-egs.cc +++ b/src/nnet3bin/nnet3-discriminative-merge-egs.cc @@ -41,14 +41,12 @@ int main(int argc, char *argv[]) { "nnet3-discriminative-merge-egs --minibatch-size=128 ark:1.degs ark:- | nnet3-discriminative-train ... \n" "See also nnet3-discriminative-copy-egs\n"; - bool compress = false; - int32 minibatch_size = 64; - + ExampleMergingConfig merging_config; + merging_config.minibatch_size = 64; // change the default for this + // program.. anyway it will usually be + // set on the command line. ParseOptions po(usage); - po.Register("minibatch-size", &minibatch_size, "Target size of minibatches " - "when merging (see also --measure-output-frames)"); - po.Register("compress", &compress, "If true, compress the output examples " - "(not recommended unless you are writing to disk"); + merging_config.Register(&po); po.Read(argc, argv); @@ -63,35 +61,14 @@ int main(int argc, char *argv[]) { SequentialNnetDiscriminativeExampleReader example_reader(examples_rspecifier); NnetDiscriminativeExampleWriter example_writer(examples_wspecifier); - std::vector examples; - examples.reserve(minibatch_size); - - int64 num_read = 0, num_written = 0; + DiscriminativeExampleMerger merger(merging_config, &example_writer); while (!example_reader.Done()) { const NnetDiscriminativeExample &cur_eg = example_reader.Value(); - examples.resize(examples.size() + 1); - examples.back() = cur_eg; - - bool minibatch_ready = - static_cast(examples.size()) >= minibatch_size; - - // Do Next() now, so we can test example_reader.Done() below . - example_reader.Next(); - num_read++; - - if (minibatch_ready || (example_reader.Done() && !examples.empty())) { - NnetDiscriminativeExample merged_eg; - MergeDiscriminativeExamples(compress, &examples, &merged_eg); - std::ostringstream ostr; - ostr << "merged-" << num_written; - num_written++; - std::string output_key = ostr.str(); - example_writer.Write(output_key, merged_eg); - examples.clear(); - } + merger.AcceptExample(new NnetDiscriminativeExample(cur_eg)); } - KALDI_LOG << "Merged " << num_read << " egs to " << num_written << '.'; - return (num_written != 0 ? 0 : 1); + // the merger itself prints the necessary diagnostics. + merger.Finish(); + return merger.ExitStatus(); } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; diff --git a/src/nnet3bin/nnet3-merge-egs.cc b/src/nnet3bin/nnet3-merge-egs.cc index 48ba2986512..33a65d140f2 100644 --- a/src/nnet3bin/nnet3-merge-egs.cc +++ b/src/nnet3bin/nnet3-merge-egs.cc @@ -59,23 +59,10 @@ int main(int argc, char *argv[]) { "nnet3-merge-egs --minibatch-size=512 ark:1.egs ark:- | nnet3-train-simple ... \n" "See also nnet3-copy-egs\n"; - bool compress = false; - int32 minibatch_size = 512; - bool measure_output_frames = true; - bool discard_partial_minibatches = false; - ParseOptions po(usage); - po.Register("minibatch-size", &minibatch_size, "Target size of minibatches " - "when merging (see also --measure-output-frames)"); - po.Register("measure-output-frames", &measure_output_frames, "If true, " - "--minibatch-size is a target number of total output frames; if " - "false, --minibatch-size is the number of input examples to " - "merge."); - po.Register("compress", &compress, "If true, compress the output examples " - "(not recommended unless you are writing to disk)"); - po.Register("discard-partial-minibatches", &discard_partial_minibatches, - "discard any partial minibatches of 'uneven' size that may be " - "encountered at the end."); + + ExampleMergingConfig merging_config; + merging_config.Register(&po); po.Read(argc, argv); @@ -87,44 +74,20 @@ int main(int argc, char *argv[]) { std::string examples_rspecifier = po.GetArg(1), examples_wspecifier = po.GetArg(2); + merging_config.ComputeDerived(); + SequentialNnetExampleReader example_reader(examples_rspecifier); NnetExampleWriter example_writer(examples_wspecifier); - std::vector examples; - examples.reserve(minibatch_size); - - int32 cur_num_output_frames = 0; + ExampleMerger merger(merging_config, &example_writer); - int64 num_read = 0, num_written = 0; while (!example_reader.Done()) { const NnetExample &cur_eg = example_reader.Value(); - examples.resize(examples.size() + 1); - examples.back() = cur_eg; - cur_num_output_frames += NumOutputIndexes(cur_eg); - bool minibatch_ready = - (measure_output_frames ? - cur_num_output_frames >= minibatch_size : - static_cast(examples.size()) >= minibatch_size); - - // Do Next() now, so we can test example_reader.Done() below . - example_reader.Next(); - num_read++; - - if (minibatch_ready || (!discard_partial_minibatches && - (example_reader.Done() && !examples.empty()))) { - NnetExample merged_eg; - MergeExamples(examples, compress, &merged_eg); - std::ostringstream ostr; - ostr << "merged-" << num_written; - num_written++; - std::string output_key = ostr.str(); - example_writer.Write(output_key, merged_eg); - examples.clear(); - cur_num_output_frames = 0; - } + merger.AcceptExample(new NnetExample(cur_eg)); } - KALDI_LOG << "Merged " << num_read << " egs to " << num_written << '.'; - return (num_written != 0 ? 0 : 1); + // the merger itself prints the necessary diagnostics. + merger.Finish(); + return merger.ExitStatus(); } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; diff --git a/src/util/kaldi-holder-inl.h b/src/util/kaldi-holder-inl.h index 4297af9a2e2..5768d4c6b03 100644 --- a/src/util/kaldi-holder-inl.h +++ b/src/util/kaldi-holder-inl.h @@ -97,6 +97,7 @@ template class KaldiObjectHolder { } void Swap(KaldiObjectHolder *other) { + // the t_ values are pointers so this is a shallow swap. std::swap(t_, other->t_); } From 8a7918f132c0385c9e922126ecad9c3aa48b5706 Mon Sep 17 00:00:00 2001 From: Xingyu Na Date: Thu, 29 Dec 2016 22:02:40 +0800 Subject: [PATCH 164/530] cleanup-gale-mandarin-scoring (#1281) * cleanup-gale-mandarin-scoring * link gale_mandarin scoring script * add wer filters --- egs/gale_mandarin/s5/local/score.sh | 37 +-------- egs/gale_mandarin/s5/local/score_basic.sh | 83 ------------------- egs/gale_mandarin/s5/local/score_sclite.sh | 69 --------------- .../s5/local/score_sclite_conf.sh | 72 ---------------- egs/gale_mandarin/s5/local/wer_hyp_filter | 19 +++++ egs/gale_mandarin/s5/local/wer_ref_filter | 19 +++++ 6 files changed, 39 insertions(+), 260 deletions(-) mode change 100755 => 120000 egs/gale_mandarin/s5/local/score.sh delete mode 100755 egs/gale_mandarin/s5/local/score_basic.sh delete mode 100755 egs/gale_mandarin/s5/local/score_sclite.sh delete mode 100755 egs/gale_mandarin/s5/local/score_sclite_conf.sh create mode 100755 egs/gale_mandarin/s5/local/wer_hyp_filter create mode 100755 egs/gale_mandarin/s5/local/wer_ref_filter diff --git a/egs/gale_mandarin/s5/local/score.sh b/egs/gale_mandarin/s5/local/score.sh deleted file mode 100755 index 96b1e12a5f6..00000000000 --- a/egs/gale_mandarin/s5/local/score.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -orig_args= -for x in "$@"; do orig_args="$orig_args '$x'"; done - -# begin configuration section. we include all the options that score_sclite.sh or -# score_basic.sh might need, or parse_options.sh will die. -cmd=run.pl -stage=0 -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score.sh [options] " && exit; - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 - -if [ -f $data/stm ]; then # use sclite scoring. - echo "$data/stm exists: using local/score_sclite.sh" - eval local/score_sclite.sh $orig_args -else - echo "$data/stm does not exist: using local/score_basic.sh" - eval local/score_basic.sh $orig_args -fi diff --git a/egs/gale_mandarin/s5/local/score.sh b/egs/gale_mandarin/s5/local/score.sh new file mode 120000 index 00000000000..df664a0f1f1 --- /dev/null +++ b/egs/gale_mandarin/s5/local/score.sh @@ -0,0 +1 @@ +../steps/scoring/score_kaldi_cer.sh \ No newline at end of file diff --git a/egs/gale_mandarin/s5/local/score_basic.sh b/egs/gale_mandarin/s5/local/score_basic.sh deleted file mode 100755 index 171701820b7..00000000000 --- a/egs/gale_mandarin/s5/local/score_basic.sh +++ /dev/null @@ -1,83 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_basic.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../final.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/text $lang/words.txt $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - - -function filter_text { - perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; } - while() { @A = split(" ", $_); $id = shift @A; print "$id "; - foreach $a (@A) { if (!defined $bad{$a}) { print "$a "; }} print "\n"; }' \ - '[NOISE]' '[LAUGHTER]' '[VOCALIZED-NOISE]' '' '%HESITATION' -} - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ - lattice-best-path --lm-scale=LMWT --word-symbol-table=$lang/words.txt \ - "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/LMWT.tra || exit 1; - -for lmwt in `seq $min_lmwt $max_lmwt`; do - utils/int2sym.pl -f 2- $lang/words.txt <$dir/scoring/$lmwt.tra | \ - filter_text > $dir/scoring/$lmwt.txt || exit 1; -done - -filter_text <$data/text >$dir/scoring/text.filt - -unset LC_ALL -#for character error rate -cat $dir/scoring/text.filt | awk '{ print $1}' > $dir/scoring/utt_id -cat $dir/scoring/text.filt | awk '{{for (i = 2; i <= NF; i++) printf(" %s", $i);} printf("\n"); }' | sed -e 's/\(\S\)/\1 /g' > $dir/scoring/utt_tra -paste $dir/scoring/utt_id $dir/scoring/utt_tra > $dir/scoring/char.filt - -for lmwt in `seq $min_lmwt $max_lmwt`; do - cat $dir/scoring/$lmwt.txt | awk '{ print $1}' > $dir/scoring/utt_id - cat $dir/scoring/$lmwt.txt | awk '{{for (i = 2; i <= NF; i++) printf(" %s", $i);} printf("\n"); }' | sed -e 's/\(\S\)/\1 /g' > $dir/scoring/utt_tra - paste $dir/scoring/utt_id $dir/scoring/utt_tra > $dir/scoring/${lmwt}.char -done - -rm $dir/scoring/utt_tra $dir/scoring/utt_id - -export LC_ALL=C - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - compute-wer --text --mode=present \ - ark:$dir/scoring/text.filt ark:$dir/scoring/LMWT.txt ">&" $dir/wer_LMWT || exit 1; - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.cer.log \ - compute-wer --text --mode=present \ - ark:$dir/scoring/char.filt ark:$dir/scoring/LMWT.char ">&" $dir/cer_LMWT || exit 1; - -exit 0 diff --git a/egs/gale_mandarin/s5/local/score_sclite.sh b/egs/gale_mandarin/s5/local/score_sclite.sh deleted file mode 100755 index e7fcd8ad07a..00000000000 --- a/egs/gale_mandarin/s5/local/score_sclite.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -stage=0 -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_sclite.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../final.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - -if [ $stage -le 0 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - mkdir -p $dir/score_LMWT/ '&&' \ - lattice-1best --lm-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ - nbest-to-ctm ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; -fi - -if [ $stage -le 1 ]; then -# Remove some stuff we don't want to score, from the ctm. - for x in $dir/score_*/$name.ctm; do - cp $x $dir/tmpf; - cat $dir/tmpf | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ - grep -v -E '|%HESITATION' > $x; - done -fi - -if [ $stage -le 2 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cp $data/stm $dir/score_LMWT/ '&&' \ - $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm || exit 1; -fi - -exit 0 diff --git a/egs/gale_mandarin/s5/local/score_sclite_conf.sh b/egs/gale_mandarin/s5/local/score_sclite_conf.sh deleted file mode 100755 index a6a2759629d..00000000000 --- a/egs/gale_mandarin/s5/local/score_sclite_conf.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -stage=0 -decode_mbr=true -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_sclite_conf.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../final.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - -if [ $stage -le 0 ]; then - # the escaping gets a bit crazy here, sorry... - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - mkdir -p $dir/score_LMWT/ '&&' \ - ACWT=\`perl -e \"print 1.0/LMWT\;\"\` '&&' \ - lattice-align-words $lang/phones/word_boundary.int $model "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-to-ctm-conf --decode-mbr=$decode_mbr --acoustic-scale=\$ACWT ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; -fi - -if [ $stage -le 1 ]; then -# Remove some stuff we don't want to score, from the ctm. - for x in $dir/score_*/$name.ctm; do - cp $x $dir/tmpf; - cat $dir/tmpf | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ - grep -v -E '|%HESITATION' > $x; - done -fi - -if [ $stage -le 2 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cp $data/stm $dir/score_LMWT/ '&&' \ - $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm || exit 1; -fi - -exit 0 diff --git a/egs/gale_mandarin/s5/local/wer_hyp_filter b/egs/gale_mandarin/s5/local/wer_hyp_filter new file mode 100755 index 00000000000..a1bfdb57efc --- /dev/null +++ b/egs/gale_mandarin/s5/local/wer_hyp_filter @@ -0,0 +1,19 @@ +#!/usr/bin/env perl + +@filters=('[NOISE]','[LAUGHTER]','[VOCALIZED-NOISE]','','%HESITATION'); + +foreach $w (@filters) { + $bad{$w} = 1; +} + +while() { + @A = split(" ", $_); + $id = shift @A; + print "$id "; + foreach $a (@A) { + if (!defined $bad{$a}) { + print "$a "; + } + } + print "\n"; +} diff --git a/egs/gale_mandarin/s5/local/wer_ref_filter b/egs/gale_mandarin/s5/local/wer_ref_filter new file mode 100755 index 00000000000..a1bfdb57efc --- /dev/null +++ b/egs/gale_mandarin/s5/local/wer_ref_filter @@ -0,0 +1,19 @@ +#!/usr/bin/env perl + +@filters=('[NOISE]','[LAUGHTER]','[VOCALIZED-NOISE]','','%HESITATION'); + +foreach $w (@filters) { + $bad{$w} = 1; +} + +while() { + @A = split(" ", $_); + $id = shift @A; + print "$id "; + foreach $a (@A) { + if (!defined $bad{$a}) { + print "$a "; + } + } + print "\n"; +} From aa3a9e5f4399ffe16616cb236965ab479a5b4ec0 Mon Sep 17 00:00:00 2001 From: Hossein Hadian Date: Thu, 29 Dec 2016 15:13:21 -0500 Subject: [PATCH 165/530] [egs] Add a new -3,3 layer to nnet3/TDNN script in swbd/s5c (#1287) --- .../s5c/local/nnet3/compare_wer_general.sh | 48 +++++++ egs/swbd/s5c/local/nnet3/run_tdnn.sh | 2 +- egs/swbd/s5c/local/nnet3/tuning/run_tdnn_c.sh | 128 ++++++++++++++++++ egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh | 128 ++++++++++++++++++ 4 files changed, 305 insertions(+), 1 deletion(-) create mode 100644 egs/swbd/s5c/local/nnet3/compare_wer_general.sh create mode 100644 egs/swbd/s5c/local/nnet3/tuning/run_tdnn_c.sh create mode 100644 egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh diff --git a/egs/swbd/s5c/local/nnet3/compare_wer_general.sh b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh new file mode 100644 index 00000000000..11742173120 --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +echo -n "# System " +for x in $*; do printf "% 10s" $x; done +echo + +echo -n "# WER on train_dev(tg) " +for x in $*; do + wer=$(grep WER exp/nnet3/${x}_sp/decode_train_dev_hires_sw1_tg/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "# WER on train_dev(fg) " +for x in $*; do + wer=$(grep WER exp/nnet3/${x}_sp/decode_train_dev_hires_sw1_fsh_fg/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "# WER on eval2000(tg) " +for x in $*; do + wer=$(grep Sum exp/nnet3/${x}_sp/decode_eval2000_hires_sw1_tg/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "# WER on eval2000(fg) " +for x in $*; do + wer=$(grep Sum exp/nnet3/${x}_sp/decode_eval2000_hires_sw1_fsh_fg/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep log-likelihood exp/nnet3/${x}_sp/log/compute_prob_train.combined.log | awk '{print $8}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep log-likelihood exp/nnet3/${x}_sp/log/compute_prob_valid.combined.log | awk '{print $8}') + printf "% 10s" $prob +done +echo + diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn.sh b/egs/swbd/s5c/local/nnet3/run_tdnn.sh index 8189d3c0dba..95976e17d5b 120000 --- a/egs/swbd/s5c/local/nnet3/run_tdnn.sh +++ b/egs/swbd/s5c/local/nnet3/run_tdnn.sh @@ -1 +1 @@ -tuning/run_tdnn_b.sh \ No newline at end of file +tuning/run_tdnn_d.sh \ No newline at end of file diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_c.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_c.sh new file mode 100644 index 00000000000..58312d0057b --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_c.sh @@ -0,0 +1,128 @@ +#!/bin/bash + +# c is as a, but uses xconfig. + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_a tdnn_c +# WER on train_dev(tg) 17.41 17.37 +# WER on train_dev(fg) 16.03 15.94 +# WER on eval2000(tg) 19.7 20.0 +# WER on eval2000(fg) 17.9 18.2 +# Final train prob -1.43675 -1.43781 +# Final valid prob -1.57486 -1.56895 + + +stage=0 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +reporting_email= +remove_egs=true + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,2) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-7,2) dim=1024 + relu-renorm-layer name=tdnn5 dim=1024 + + output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 10 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $ali_dir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +graph_dir=exp/tri4/graph_sw1_tg +if [ $stage -le 11 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh new file mode 100644 index 00000000000..df02fec38fd --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh @@ -0,0 +1,128 @@ +#!/bin/bash + +# d is as c, but with one extra layer. + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_c tdnn_d +# WER on train_dev(tg) 17.37 16.72 +# WER on train_dev(fg) 15.94 15.31 +# WER on eval2000(tg) 20.0 19.2 +# WER on eval2000(fg) 18.2 17.8 +# Final train prob -1.43781 -1.22859 +# Final valid prob -1.56895 -1.354 + +stage=0 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +reporting_email= +remove_egs=true + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,2) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-7,2) dim=1024 + relu-renorm-layer name=tdnn6 dim=1024 + + output-layer name=output input=tdnn6 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 10 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $ali_dir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +graph_dir=exp/tri4/graph_sw1_tg +if [ $stage -le 11 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + From 48239bc4affa8dd0ffb38afe48530334ba1d1856 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Thu, 29 Dec 2016 21:22:07 +0100 Subject: [PATCH 166/530] [tools] correct python package dependency for Debian (thanks: Alexander Gorodetski) (#1293) --- tools/extras/check_dependencies.sh | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tools/extras/check_dependencies.sh b/tools/extras/check_dependencies.sh index b192cce668d..f45402e810e 100755 --- a/tools/extras/check_dependencies.sh +++ b/tools/extras/check_dependencies.sh @@ -12,6 +12,8 @@ function add_packages { opensuse_packages="$opensuse_packages $3"; } +status=0 + if ! which which >&/dev/null; then echo "$0: which is not installed." add_packages which debianutils which @@ -55,18 +57,24 @@ if which python >&/dev/null ; then if which python2.7 >&/dev/null || which python2 >&/dev/null ; then echo "$0: python 2.7 is not the default python. You should either make it" echo "$0: default or create an bash alias for kaldi scripts to run correctly" + status=1 else echo "$0: python 2.7 is not installed" - add_packages python2.7 python2.7 python2.7 + add_packages python2.7 python python2.7 fi fi else - echo "$0: python 2.7 is not installed" - add_packages python2.7 python2.7 python2.7 + if which python2.7 >&/dev/null || which python2 >&/dev/null ; then + echo "$0: python 2.7 is not the default python. You should either make it" + echo "$0: default or create an bash alias for kaldi scripts to run correctly" + status=1 + else + echo "$0: python is not installed (we need python 2.7)" + add_packages python2.7 python python2.7 + fi fi printed=false -status=0 if which apt-get >&/dev/null && ! which zypper >/dev/null; then # if we're using apt-get [but we're not OpenSuse, which uses zypper as the From 1245df5c90e4da405830a049532676a054ac20e7 Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Thu, 29 Dec 2016 15:51:26 -0800 Subject: [PATCH 167/530] [scripts][nnet3] Remove valid-{left,right}-context and priors-{left,right}-context from scripts, making them the same as regular context; use regular chunk-width for validation egs, not 1. --- .../s5/local/chain/run_tdnn_discriminative.sh | 32 +++++++------- .../tuning/run_blstm_6h_discriminative.sh | 32 +++++++------- .../tuning/run_tdnn_6h_discriminative.sh | 30 ++++++------- .../nnet3/train/chain_objf/acoustic_model.py | 9 ---- .../train/frame_level_objf/acoustic_model.py | 5 --- .../nnet3/train/frame_level_objf/raw_model.py | 5 --- egs/wsj/s5/steps/nnet3/chain/get_egs.sh | 29 +++++-------- egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh | 4 ++ egs/wsj/s5/steps/nnet3/get_egs.sh | 28 +++++-------- .../s5/steps/nnet3/get_egs_discriminative.sh | 40 +++++------------- egs/wsj/s5/steps/nnet3/get_egs_targets.sh | 42 ++++++++----------- egs/wsj/s5/steps/nnet3/lstm/train.sh | 5 ++- egs/wsj/s5/steps/nnet3/tdnn/train.sh | 3 ++ egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh | 3 ++ egs/wsj/s5/steps/nnet3/train_dnn.py | 1 - egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 1 - egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 2 - egs/wsj/s5/steps/nnet3/train_rnn.py | 2 - egs/wsj/s5/steps/nnet3/train_tdnn.sh | 3 ++ 19 files changed, 105 insertions(+), 171 deletions(-) diff --git a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh index bda883f16c2..f4d40884058 100755 --- a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh +++ b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh @@ -10,7 +10,7 @@ set -e # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# stage=0 @@ -59,8 +59,8 @@ decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we deci if $use_gpu; then if ! cuda-compiled; then - cat </dev/null || true data_dirs= - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \ $x $train_data_dir exp/shift_hires mfcc_hires utils/fix_data_dir.sh ${train_data_dir}_fs$x @@ -103,7 +103,7 @@ if [ $frame_subsampling_factor -ne 1 ]; then awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp done utils/combine_data.sh ${train_data_dir}_fs $data_dirs - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do rm -r ${train_data_dir}_fs$x done fi @@ -112,9 +112,9 @@ if [ $frame_subsampling_factor -ne 1 ]; then affix=_fs fi - + rm ${train_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true -for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do +for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp done train_ivector_dir=${train_ivector_dir}_fs @@ -133,7 +133,7 @@ fi if [ -z "$lats_dir" ]; then lats_dir=${srcdir}_denlats${affix} if [ $stage -le 2 ]; then - nj=50 + nj=50 # this doesn't really affect anything strongly, except the num-jobs for one of # the phases of get_egs_discriminative.sh below. num_threads_denlats=6 @@ -147,16 +147,13 @@ if [ -z "$lats_dir" ]; then fi fi -model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` -model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` +model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` +model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` left_context=$[model_left_context + extra_left_context] right_context=$[model_right_context + extra_right_context] -valid_left_context=$[valid_left_context + frames_per_eg] -valid_right_context=$[valid_right_context + frames_per_eg] - -cmvn_opts=`cat $srcdir/cmvn_opts` +cmvn_opts=`cat $srcdir/cmvn_opts` if [ -z "$degs_dir" ]; then degs_dir=${srcdir}_degs${affix} @@ -176,8 +173,7 @@ if [ -z "$degs_dir" ]; then --adjust-priors false --acwt 1.0 \ --online-ivector-dir $train_ivector_dir \ --left-context $left_context --right-context $right_context \ - --valid-left-context $valid_left_context --valid-right-context $valid_right_context \ - --priors-left-context $valid_left_context --priors-right-context $valid_right_context $frame_subsampling_opt \ + $frame_subsampling_opt \ --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \ $train_data_dir $lang ${srcdir}_ali${affix} $lats_dir $srcdir/final.mdl $degs_dir ; fi @@ -202,7 +198,7 @@ if [ $stage -le 5 ]; then ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` iter=epoch$[x*frame_subsampling_factor] - + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --acwt 1.0 --post-decode-acwt 10.0 \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ @@ -219,7 +215,7 @@ if [ $stage -le 5 ]; then done done wait - [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 fi if [ $stage -le 6 ] && $cleanup; then diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh index b0264c17d8b..e3884df8711 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh @@ -8,7 +8,7 @@ set -e # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# . cmd.sh @@ -52,7 +52,7 @@ effective_learning_rate=0.000000125 max_param_change=1 num_jobs_nnet=4 num_epochs=4 -regularization_opts="--xent-regularize=0.1 --l2-regularize=0.00005" # Applicable for providing --xent-regularize and --l2-regularize options +regularization_opts="--xent-regularize=0.1 --l2-regularize=0.00005" # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 ## Decode options @@ -60,8 +60,8 @@ decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we deci if $use_gpu; then if ! cuda-compiled; then - cat </dev/null || true data_dirs= - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \ $x $train_data_dir exp/shift_hires/ mfcc_hires utils/fix_data_dir.sh ${train_data_dir}_fs$x @@ -110,7 +110,7 @@ if [ $frame_subsampling_factor -ne 1 ]; then awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done utils/combine_data.sh ${train_data_dir}_fs $data_dirs - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do rm -r ${train_data_dir}_fs$x done fi @@ -119,9 +119,9 @@ if [ $frame_subsampling_factor -ne 1 ]; then affix=_fs fi - + rm ${online_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true -for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do +for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done online_ivector_dir=${online_ivector_dir}_fs @@ -140,7 +140,7 @@ fi if [ -z "$lats_dir" ]; then lats_dir=${srcdir}_denlats${affix} if [ $stage -le 2 ]; then - nj=50 + nj=50 # this doesn't really affect anything strongly, except the num-jobs for one of # the phases of get_egs_discriminative.sh below. num_threads_denlats=6 @@ -154,16 +154,13 @@ if [ -z "$lats_dir" ]; then fi fi -model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` -model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` +model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` +model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` left_context=$[model_left_context + extra_left_context] right_context=$[model_right_context + extra_right_context] -valid_left_context=$[valid_left_context + frames_per_eg] -valid_right_context=$[valid_right_context + frames_per_eg] - -cmvn_opts=`cat $srcdir/cmvn_opts` +cmvn_opts=`cat $srcdir/cmvn_opts` if [ -z "$degs_dir" ]; then degs_dir=${srcdir}_degs${affix} @@ -183,8 +180,7 @@ if [ -z "$degs_dir" ]; then --adjust-priors false --acwt 1.0 \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ - --valid-left-context $valid_left_context --valid-right-context $valid_right_context \ - --priors-left-context $valid_left_context --priors-right-context $valid_right_context $frame_subsampling_opt \ + $frame_subsampling_opt \ --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \ $train_data_dir $lang ${srcdir}_ali${affix} $lats_dir $srcdir/final.mdl $degs_dir ; fi @@ -210,7 +206,7 @@ if [ $stage -le 5 ]; then ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` iter=epoch$x.adj - + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --acwt 1.0 --post-decode-acwt 10.0 \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} $context_opts \ diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh index 85afa7bf9ca..9a7c4ca2859 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh @@ -85,7 +85,7 @@ effective_learning_rate=0.000000125 max_param_change=1 num_jobs_nnet=4 num_epochs=4 -regularization_opts="--xent-regularize=0.1 --l2-regularize=0.00005" # Applicable for providing --xent-regularize and --l2-regularize options +regularization_opts="--xent-regularize=0.1 --l2-regularize=0.00005" # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 ## Decode options @@ -93,8 +93,8 @@ decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we deci if $use_gpu; then if ! cuda-compiled; then - cat </dev/null || true data_dirs= - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \ $x $train_data_dir exp/shift_hires/ mfcc_hires utils/fix_data_dir.sh ${train_data_dir}_fs$x @@ -137,7 +137,7 @@ if [ $frame_subsampling_factor -ne 1 ]; then awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done utils/combine_data.sh ${train_data_dir}_fs $data_dirs - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do rm -r ${train_data_dir}_fs$x done fi @@ -146,9 +146,9 @@ if [ $frame_subsampling_factor -ne 1 ]; then affix=_fs fi - + rm ${online_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true -for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do +for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done online_ivector_dir=${online_ivector_dir}_fs @@ -167,7 +167,7 @@ fi if [ -z "$lats_dir" ]; then lats_dir=${srcdir}_denlats${affix} if [ $stage -le 2 ]; then - nj=50 + nj=50 # this doesn't really affect anything strongly, except the num-jobs for one of # the phases of get_egs_discriminative.sh below. num_threads_denlats=6 @@ -181,16 +181,13 @@ if [ -z "$lats_dir" ]; then fi fi -model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` -model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` +model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` +model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` left_context=$[model_left_context + extra_left_context] right_context=$[model_right_context + extra_right_context] -valid_left_context=$[valid_left_context + frames_per_eg] -valid_right_context=$[valid_right_context + frames_per_eg] - -cmvn_opts=`cat $srcdir/cmvn_opts` +cmvn_opts=`cat $srcdir/cmvn_opts` if [ -z "$degs_dir" ]; then degs_dir=${srcdir}_degs${affix} @@ -210,8 +207,7 @@ if [ -z "$degs_dir" ]; then --adjust-priors false --acwt 1.0 \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ - --valid-left-context $valid_left_context --valid-right-context $valid_right_context \ - --priors-left-context $valid_left_context --priors-right-context $valid_right_context $frame_subsampling_opt \ + $frame_subsampling_opt \ --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \ $train_data_dir $lang ${srcdir}_ali${affix} $lats_dir $srcdir/final.mdl $degs_dir ; fi @@ -237,7 +233,7 @@ if [ $stage -le 5 ]; then ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` iter=epoch$x.adj - + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --acwt 1.0 --post-decode-acwt 10.0 \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index 0c871f07c2e..b9f3c386cbe 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -52,7 +52,6 @@ def create_denominator_fst(dir, tree_dir, run_opts): def generate_chain_egs(dir, data, lat_dir, egs_dir, left_context, right_context, run_opts, stage=0, - valid_left_context=None, valid_right_context=None, left_tolerance=None, right_tolerance=None, frame_subsampling_factor=3, alignment_subsampling_factor=3, @@ -72,8 +71,6 @@ def generate_chain_egs(dir, data, lat_dir, egs_dir, --transform-dir "{transform_dir}" \ --online-ivector-dir "{ivector_dir}" \ --left-context {left_context} --right-context {right_context} \ - --valid-left-context '{valid_left_context}' \ - --valid-right-context '{valid_right_context}' \ --left-tolerance '{left_tolerance}' \ --right-tolerance '{right_tolerance}' \ --frame-subsampling-factor {frame_subsampling_factor} \ @@ -93,12 +90,6 @@ def generate_chain_egs(dir, data, lat_dir, egs_dir, if online_ivector_dir is not None else ''), left_context=left_context, right_context=right_context, - valid_left_context=(valid_left_context - if valid_left_context is not None - else ''), - valid_right_context=(valid_right_context - if valid_right_context is not None - else ''), left_tolerance=(left_tolerance if left_tolerance is not None else ''), diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py index 1360f669f41..f3104f93089 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py @@ -20,7 +20,6 @@ def generate_egs(data, alidir, egs_dir, left_context, right_context, - valid_left_context, valid_right_context, run_opts, stage=0, feat_type='raw', online_ivector_dir=None, samples_per_iter=20000, frames_per_eg=20, srand=0, @@ -40,8 +39,6 @@ def generate_egs(data, alidir, egs_dir, --transform-dir "{transform_dir}" \ --online-ivector-dir "{ivector_dir}" \ --left-context {left_context} --right-context {right_context} \ - --valid-left-context {valid_left_context} \ - --valid-right-context {valid_right_context} \ --stage {stage} \ --samples-per-iter {samples_per_iter} \ --frames-per-eg {frames_per_eg} \ @@ -57,8 +54,6 @@ def generate_egs(data, alidir, egs_dir, if online_ivector_dir is not None else ''), left_context=left_context, right_context=right_context, - valid_left_context=valid_left_context, - valid_right_context=valid_right_context, stage=stage, samples_per_iter=samples_per_iter, frames_per_eg=frames_per_eg, srand=srand, data=data, alidir=alidir, egs_dir=egs_dir, diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py index 58240dd2f1b..0fe8e3d4927 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py @@ -19,7 +19,6 @@ def generate_egs_using_targets(data, targets_scp, egs_dir, left_context, right_context, - valid_left_context, valid_right_context, run_opts, stage=0, feat_type='raw', online_ivector_dir=None, target_type='dense', num_targets=-1, @@ -56,8 +55,6 @@ def generate_egs_using_targets(data, targets_scp, egs_dir, --transform-dir "{transform_dir}" \ --online-ivector-dir "{ivector_dir}" \ --left-context {left_context} --right-context {right_context} \ - --valid-left-context {valid_left_context} \ - --valid-right-context {valid_right_context} \ --stage {stage} \ --samples-per-iter {samples_per_iter} \ --frames-per-eg {frames_per_eg} \ @@ -75,8 +72,6 @@ def generate_egs_using_targets(data, targets_scp, egs_dir, if online_ivector_dir is not None else ''), left_context=left_context, right_context=right_context, - valid_left_context=valid_left_context, - valid_right_context=valid_right_context, stage=stage, samples_per_iter=samples_per_iter, frames_per_eg=frames_per_eg, srand=srand, num_targets=num_targets, diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh index 7b330f8f717..49ec694a19d 100755 --- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh @@ -33,10 +33,6 @@ alignment_subsampling_factor=3 # frames-per-second of input alignments divided left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. -valid_left_context= # amount of left_context for validation egs, typically used in - # recurrent architectures to ensure matched condition with - # training egs -valid_right_context= # amount of right_context for validation egs compress=true # set this to false to disable compression (e.g. if you want to see whether # results are affected). @@ -292,20 +288,14 @@ if [ $stage -le 2 ]; then fi -egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --num-frames-overlap=$frames_overlap_per_eg --frame-subsampling-factor=$frame_subsampling_factor --compress=$compress" +egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --frame-subsampling-factor=$frame_subsampling_factor --compress=$compress" - -[ -z $valid_left_context ] && valid_left_context=$left_context; -[ -z $valid_right_context ] && valid_right_context=$right_context; -# don't do the overlap thing for the validation data. -valid_egs_opts="--left-context=$valid_left_context --right-context=$valid_right_context --num-frames=$frames_per_eg --frame-subsampling-factor=$frame_subsampling_factor --compress=$compress" - -ctc_supervision_all_opts="--lattice-input=true --frame-subsampling-factor=$alignment_subsampling_factor" +chain_supervision_all_opts="--lattice-input=true --frame-subsampling-factor=$alignment_subsampling_factor" [ ! -z $right_tolerance ] && \ - ctc_supervision_all_opts="$ctc_supervision_all_opts --right-tolerance=$right_tolerance" + chain_supervision_all_opts="$chain_supervision_all_opts --right-tolerance=$right_tolerance" [ ! -z $left_tolerance ] && \ - ctc_supervision_all_opts="$ctc_supervision_all_opts --left-tolerance=$left_tolerance" + chain_supervision_all_opts="$chain_supervision_all_opts --left-tolerance=$left_tolerance" echo $left_context > $dir/info/left_context echo $right_context > $dir/info/right_context @@ -320,17 +310,17 @@ if [ $stage -le 3 ]; then $cmd $dir/log/create_valid_subset.log \ lattice-align-phones --replace-output-symbols=true $latdir/final.mdl scp:$dir/lat_special.scp ark:- \| \ - chain-get-supervision $ctc_supervision_all_opts $chaindir/tree $chaindir/0.trans_mdl \ + chain-get-supervision $chain_supervision_all_opts $chaindir/tree $chaindir/0.trans_mdl \ ark:- ark:- \| \ nnet3-chain-get-egs $valid_ivector_opt --srand=$srand \ - $valid_egs_opts $chaindir/normalization.fst \ + $egs_opts $chaindir/normalization.fst \ "$valid_feats" ark,s,cs:- "ark:$dir/valid_all.cegs" || touch $dir/.error & $cmd $dir/log/create_train_subset.log \ lattice-align-phones --replace-output-symbols=true $latdir/final.mdl scp:$dir/lat_special.scp ark:- \| \ - chain-get-supervision $ctc_supervision_all_opts \ + chain-get-supervision $chain_supervision_all_opts \ $chaindir/tree $chaindir/0.trans_mdl ark:- ark:- \| \ nnet3-chain-get-egs $train_subset_ivector_opt --srand=$srand \ - $valid_egs_opts $chaindir/normalization.fst \ + $egs_opts $chaindir/normalization.fst \ "$train_subset_feats" ark,s,cs:- "ark:$dir/train_subset_all.cegs" || touch $dir/.error & wait; [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1 @@ -379,9 +369,10 @@ if [ $stage -le 4 ]; then $cmd JOB=1:$nj $dir/log/get_egs.JOB.log \ utils/filter_scp.pl $sdata/JOB/utt2spk $dir/lat.scp \| \ lattice-align-phones --replace-output-symbols=true $latdir/final.mdl scp:- ark:- \| \ - chain-get-supervision $ctc_supervision_all_opts \ + chain-get-supervision $chain_supervision_all_opts \ $chaindir/tree $chaindir/0.trans_mdl ark:- ark:- \| \ nnet3-chain-get-egs $ivector_opt --srand=\$[JOB+$srand] $egs_opts \ + --num-frames-overlap=$frames_overlap_per_eg \ "$feats" ark,s,cs:- ark:- \| \ nnet3-chain-copy-egs --random=true --srand=\$[JOB+$srand] ark:- $egs_list || exit 1; fi diff --git a/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh b/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh index a8211c5fbc5..ada92e66ff4 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh +++ b/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh @@ -1,5 +1,7 @@ #!/bin/bash +# THIS SCRIPT IS DEPRECATED, see ./train.py + # note, TDNN is the same as what we used to call multisplice. # This version of the script, nnet3/chain/train_tdnn.sh, is for 'chain' systems. @@ -102,6 +104,8 @@ right_deriv_truncate= # number of time-steps to avoid using the deriv of, on th trap 'for pid in $(jobs -pr); do kill -TERM $pid; done' INT QUIT TERM + +echo "$0: THIS SCRIPT IS DEPRECATED" echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi diff --git a/egs/wsj/s5/steps/nnet3/get_egs.sh b/egs/wsj/s5/steps/nnet3/get_egs.sh index 79bfc25fff6..9992285baaa 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs.sh @@ -22,10 +22,6 @@ frames_per_eg=8 # number of frames of labels per example. more->less disk spa left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. -valid_left_context= # amount of left_context for validation egs, typically used in - # recurrent architectures to ensure matched condition with - # training egs -valid_right_context= # amount of right_context for validation egs compress=true # set this to false to disable compression (e.g. if you want to see whether # results are affected). @@ -37,8 +33,8 @@ reduce_frames_per_eg=true # If true, this script may reduce the frames_per_eg num_utts_subset=300 # number of utterances in validation and training # subsets used for shrinkage and diagnostics. num_valid_frames_combine=0 # #valid frames for combination weights at the very end. -num_train_frames_combine=10000 # # train frames for the above. -num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs +num_train_frames_combine=60000 # # train frames for the above. +num_frames_diagnostic=10000 # number of frames for "compute_prob" jobs samples_per_iter=400000 # this is the target number of egs in each archive of egs # (prior to merging egs). We probably should have called # it egs_per_iter. This is just a guideline; it will pick @@ -266,11 +262,7 @@ if [ $stage -le 2 ]; then copy-int-vector ark:- ark,scp:$dir/ali.ark,$dir/ali.scp || exit 1; fi -egs_opts="--left-context=$left_context --right-context=$right_context --compress=$compress" - -[ -z $valid_left_context ] && valid_left_context=$left_context; -[ -z $valid_right_context ] && valid_right_context=$right_context; -valid_egs_opts="--left-context=$valid_left_context --right-context=$valid_right_context --compress=$compress" +egs_opts="--left-context=$left_context --right-context=$right_context --compress=$compress --num-frames=$frames_per_eg" echo $left_context > $dir/info/left_context echo $right_context > $dir/info/right_context @@ -284,28 +276,28 @@ if [ $stage -le 3 ]; then <$dir/ali.scp >$dir/ali_special.scp $cmd $dir/log/create_valid_subset.log \ - nnet3-get-egs --num-pdfs=$num_pdfs $valid_ivector_opt $valid_egs_opts "$valid_feats" \ + nnet3-get-egs --num-pdfs=$num_pdfs $valid_ivector_opt $egs_opts "$valid_feats" \ "ark,s,cs:ali-to-pdf $alidir/final.mdl scp:$dir/ali_special.scp ark:- | ali-to-post ark:- ark:- |" \ "ark:$dir/valid_all.egs" || touch $dir/.error & $cmd $dir/log/create_train_subset.log \ - nnet3-get-egs --num-pdfs=$num_pdfs $train_subset_ivector_opt $valid_egs_opts "$train_subset_feats" \ + nnet3-get-egs --num-pdfs=$num_pdfs $train_subset_ivector_opt $egs_opts "$train_subset_feats" \ "ark,s,cs:ali-to-pdf $alidir/final.mdl scp:$dir/ali_special.scp ark:- | ali-to-post ark:- ark:- |" \ "ark:$dir/train_subset_all.egs" || touch $dir/.error & wait; [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1 echo "... Getting subsets of validation examples for diagnostics and combination." $cmd $dir/log/create_valid_subset_combine.log \ - nnet3-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \ + nnet3-subset-egs --n=$[$num_valid_frames_combine/$frames_per_eg] ark:$dir/valid_all.egs \ ark:$dir/valid_combine.egs || touch $dir/.error & $cmd $dir/log/create_valid_subset_diagnostic.log \ - nnet3-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \ + nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg] ark:$dir/valid_all.egs \ ark:$dir/valid_diagnostic.egs || touch $dir/.error & $cmd $dir/log/create_train_subset_combine.log \ - nnet3-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \ + nnet3-subset-egs --n=$[$num_train_frames_combine/$frames_per_eg] ark:$dir/train_subset_all.egs \ ark:$dir/train_combine.egs || touch $dir/.error & $cmd $dir/log/create_train_subset_diagnostic.log \ - nnet3-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \ + nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg] ark:$dir/train_subset_all.egs \ ark:$dir/train_diagnostic.egs || touch $dir/.error & wait sleep 5 # wait for file system to sync. @@ -328,7 +320,7 @@ if [ $stage -le 4 ]; then echo "$0: Generating training examples on disk" # The examples will go round-robin to egs_list. $cmd JOB=1:$nj $dir/log/get_egs.JOB.log \ - nnet3-get-egs --num-pdfs=$num_pdfs $ivector_opt $egs_opts --num-frames=$frames_per_eg "$feats" \ + nnet3-get-egs --num-pdfs=$num_pdfs $ivector_opt $egs_opts "$feats" \ "ark,s,cs:filter_scp.pl $sdata/JOB/utt2spk $dir/ali.scp | ali-to-pdf $alidir/final.mdl scp:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \ nnet3-copy-egs --random=true --srand=\$[JOB+$srand] ark:- $egs_list || exit 1; fi diff --git a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh index f9a8d677e09..1d3470c49b6 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh @@ -21,13 +21,7 @@ frame_subsampling_factor=1 # ratio between input and output frame-rate of nnet. left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. -valid_left_context= # amount of left_context for validation egs, typically used in - # recurrent architectures to ensure matched condition with - # training egs -valid_right_context= # amount of right_context for validation egs adjust_priors=true -priors_left_context= # amount of left_context for priors egs -priors_right_context= # amount of right_context for priors egs compress=true # set this to false to disable compression (e.g. if you want to see whether # results are affected). num_utts_subset=80 # number of utterances in validation and training @@ -54,7 +48,9 @@ cmvn_opts= # can be used for specifying CMVN options, if feature type is not ld # it doesn't make sense to use different options than were used as input to the # LDA transform). This is used to turn off CMVN in the online-nnet experiments. -num_priors_subset=100 +num_priors_subset=1000 # number of utterances used to calibrate the per-state + # priors. Note: these don't have to be held out from + # the training data. num_archives_priors=10 # End configuration section. @@ -279,37 +275,22 @@ fi splitter_opts="--supervision-splitter.determinize=$determinize --supervision-splitter.minimize=$minimize --supervision-splitter.remove_output_symbols=$remove_output_symbols --supervision-splitter.remove_epsilons=$remove_epsilons --supervision-splitter.collapse-transition-ids=$collapse_transition_ids --supervision-splitter.acoustic-scale=$acwt" -[ -z $valid_left_context ] && valid_left_context=$left_context; -[ -z $valid_right_context ] && valid_right_context=$right_context; - -[ -z $priors_left_context ] && priors_left_context=$left_context; -[ -z $priors_right_context ] && priors_right_context=$right_context; - left_context=$[left_context+frame_subsampling_factor/2] right_context=$[right_context+frame_subsampling_factor/2] -egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --num-frames-overlap=$frames_overlap_per_eg --frame-subsampling-factor=$frame_subsampling_factor --compress=$compress $splitter_opts" - -valid_left_context=$[valid_left_context+frame_subsampling_factor/2] -valid_right_context=$[valid_right_context+frame_subsampling_factor/2] +egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --compress=$compress --frame-subsampling-factor=$frame_subsampling_factor $splitter_opts" -# don't do the overlap thing for the validation data. -valid_egs_opts="--left-context=$valid_left_context --right-context=$valid_right_context --num-frames=$frames_per_eg --frame-subsampling-factor=$frame_subsampling_factor --compress=$compress $splitter_opts" - -priors_left_context=$[priors_left_context+frame_subsampling_factor/2] -priors_right_context=$[priors_right_context+frame_subsampling_factor/2] - -# don't do the overlap thing for the priors computation data. -priors_egs_opts="--left-context=$priors_left_context --right-context=$priors_right_context --num-frames=1 --compress=$compress" +# don't do the overlap thing for the priors computation data-- but do use the +# same num-frames for the eg, which would be much more efficient in case it's a +# recurrent model and has a lot of frames of context. In any case we're not +# doing SGD so there is no benefit in having short chunks. +priors_egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --compress=$compress" supervision_all_opts="--frame-subsampling-factor=$frame_subsampling_factor" echo $left_context > $dir/info/left_context echo $right_context > $dir/info/right_context -echo $priors_left_context > $dir/info/priors_left_context -echo $priors_right_context > $dir/info/priors_right_context - echo $frame_subsampling_factor > $dir/info/frame_subsampling_factor @@ -368,7 +349,7 @@ if [ $stage -le 4 ]; then $cmd $dir/log/create_valid_subset.log \ discriminative-get-supervision $supervision_all_opts \ scp:$dir/ali_special.scp scp:$dir/lat_special.scp ark:- \| \ - nnet3-discriminative-get-egs $valid_ivector_opt $valid_egs_opts \ + nnet3-discriminative-get-egs $valid_ivector_opt $egs_opts \ $dir/final.mdl "$valid_feats" ark,s,cs:- "ark:$dir/valid_diagnostic.degs" || touch $dir/.error & $cmd $dir/log/create_train_subset.log \ @@ -405,6 +386,7 @@ if [ $stage -le 5 ]; then "scp:utils/filter_scp.pl $sdata/JOB/utt2spk $dir/ali.scp |" \ "ark,s,cs:gunzip -c $denlatdir/lat.JOB.gz |" ark:- \| \ nnet3-discriminative-get-egs $ivector_opt $egs_opts \ + --num-frames-overlap=$frames_overlap_per_eg \ $dir/final.mdl "$feats" ark,s,cs:- ark:- \| \ nnet3-discriminative-copy-egs --random=true --srand=JOB ark:- $degs_list || exit 1; fi diff --git a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh index 309c89cf99d..5221916e5c0 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh @@ -1,11 +1,11 @@ #!/bin/bash -# Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey). # 2015-2016 Vimal Manohar # Apache 2.0. # This script is similar to steps/nnet3/get_egs.sh but used -# when getting general targets (not from alignment directory) for raw nnet +# when getting general targets (not from alignment directory) for raw nnet # # This script, which will generally be called from other neural-net training # scripts, extracts the training examples used to train the neural net (and also @@ -21,7 +21,7 @@ # Begin configuration section. cmd=run.pl feat_type=raw # set it to 'lda' to use LDA features. -target_type=sparse # dense to have dense targets, +target_type=sparse # dense to have dense targets, # sparse to have posteriors targets num_targets= # required for target-type=sparse with raw nnet frames_per_eg=8 # number of frames of labels per example. more->less disk space and @@ -30,10 +30,6 @@ frames_per_eg=8 # number of frames of labels per example. more->less disk spa left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. -valid_left_context= # amount of left_context for validation egs, typically used in - # recurrent architectures to ensure matched condition with - # training egs -valid_right_context= # amount of right_context for validation egs compress=true # set this to false to disable compression (e.g. if you want to see whether # results are affected). @@ -45,15 +41,15 @@ reduce_frames_per_eg=true # If true, this script may reduce the frames_per_eg num_utts_subset=300 # number of utterances in validation and training # subsets used for shrinkage and diagnostics. num_valid_frames_combine=0 # #valid frames for combination weights at the very end. -num_train_frames_combine=10000 # # train frames for the above. -num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs +num_train_frames_combine=60000 # # train frames for the above. +num_frames_diagnostic=10000 # number of frames for "compute_prob" jobs samples_per_iter=400000 # this is the target number of egs in each archive of egs # (prior to merging egs). We probably should have called # it egs_per_iter. This is just a guideline; it will pick # a number that divides the number of samples in the # entire data. -transform_dir= +transform_dir= stage=0 nj=6 # This should be set to the maximum number of jobs you are @@ -254,11 +250,7 @@ if [ -e $dir/storage ]; then done fi -egs_opts="--left-context=$left_context --right-context=$right_context --compress=$compress" - -[ -z $valid_left_context ] && valid_left_context=$left_context; -[ -z $valid_right_context ] && valid_right_context=$right_context; -valid_egs_opts="--left-context=$valid_left_context --right-context=$valid_right_context --compress=$compress" +egs_opts="--left-context=$left_context --right-context=$right_context --compress=$compress --num-frames=$frames_per_eg" echo $left_context > $dir/info/left_context echo $right_context > $dir/info/right_context @@ -274,12 +266,12 @@ if [ $target_type == "dense" ]; then fi if [ -z "$num_targets" ]; then - echo "$0: num-targets is not set" + echo "$0: num-targets is not set" exit 1 fi case $target_type in - "dense") + "dense") get_egs_program="nnet3-get-egs-dense-targets --num-targets=$num_targets" targets="ark:utils/filter_scp.pl --exclude $dir/valid_uttlist $targets_scp_split | copy-feats scp:- ark:- |" @@ -289,7 +281,7 @@ case $target_type in "sparse") get_egs_program="nnet3-get-egs --num-pdfs=$num_targets" targets="ark:utils/filter_scp.pl --exclude $dir/valid_uttlist $targets_scp_split | ali-to-post scp:- ark:- |" - valid_targets="ark:utils/filter_scp.pl $dir/valid_uttlist $targets_scp | ali-to-post scp:- ark:- |" + valid_targets="ark:utils/filter_scp.pl $dir/valid_uttlist $targets_scp | ali-to-post scp:- ark:- |" train_subset_targets="ark:utils/filter_scp.pl $dir/train_subset_uttlist $targets_scp | ali-to-post scp:- ark:- |" ;; default) @@ -302,29 +294,29 @@ if [ $stage -le 3 ]; then rm -f $dir/.error 2>/dev/null $cmd $dir/log/create_valid_subset.log \ $get_egs_program \ - $valid_ivector_opt $valid_egs_opts "$valid_feats" \ + $valid_ivector_opt $egs_opts "$valid_feats" \ "$valid_targets" \ "ark:$dir/valid_all.egs" || touch $dir/.error & $cmd $dir/log/create_train_subset.log \ $get_egs_program \ - $train_subset_ivector_opt $valid_egs_opts "$train_subset_feats" \ + $train_subset_ivector_opt $egs_opts "$train_subset_feats" \ "$train_subset_targets" \ "ark:$dir/train_subset_all.egs" || touch $dir/.error & wait; [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1 echo "... Getting subsets of validation examples for diagnostics and combination." $cmd $dir/log/create_valid_subset_combine.log \ - nnet3-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \ + nnet3-subset-egs --n=$[$num_valid_frames_combine/$frames_per_eg] ark:$dir/valid_all.egs \ ark:$dir/valid_combine.egs || touch $dir/.error & $cmd $dir/log/create_valid_subset_diagnostic.log \ - nnet3-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \ + nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg] ark:$dir/valid_all.egs \ ark:$dir/valid_diagnostic.egs || touch $dir/.error & $cmd $dir/log/create_train_subset_combine.log \ - nnet3-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \ + nnet3-subset-egs --n=$[$num_train_frames_combine/$frames_per_eg] ark:$dir/train_subset_all.egs \ ark:$dir/train_combine.egs || touch $dir/.error & $cmd $dir/log/create_train_subset_diagnostic.log \ - nnet3-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \ + nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg] ark:$dir/train_subset_all.egs \ ark:$dir/train_diagnostic.egs || touch $dir/.error & wait sleep 5 # wait for file system to sync. @@ -348,7 +340,7 @@ if [ $stage -le 4 ]; then # The examples will go round-robin to egs_list. $cmd JOB=1:$nj $dir/log/get_egs.JOB.log \ $get_egs_program \ - $ivector_opt $egs_opts --num-frames=$frames_per_eg "$feats" "$targets" \ + $ivector_opt $egs_opts "$feats" "$targets" \ ark:- \| \ nnet3-copy-egs --random=true --srand=\$[JOB+$srand] ark:- $egs_list || exit 1; fi diff --git a/egs/wsj/s5/steps/nnet3/lstm/train.sh b/egs/wsj/s5/steps/nnet3/lstm/train.sh index 5be69aacff0..3f9b7bccb06 100755 --- a/egs/wsj/s5/steps/nnet3/lstm/train.sh +++ b/egs/wsj/s5/steps/nnet3/lstm/train.sh @@ -1,5 +1,7 @@ #!/bin/bash +# THIS SCRIPT IS DEPRECATED, see ../train_rnn.py + # Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey). # 2013 Xiaohui Zhang # 2013 Guoguo Chen @@ -116,6 +118,7 @@ rand_prune=4.0 # speeds up LDA. trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM +echo "$0: THIS SCRIPT IS DEPRECATED" echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi @@ -298,8 +301,6 @@ if [ $stage -le -4 ] && [ -z "$egs_dir" ]; then extra_opts+=(--transform-dir $transform_dir) extra_opts+=(--left-context $left_context) extra_opts+=(--right-context $right_context) - extra_opts+=(--valid-left-context $((chunk_width + left_context))) - extra_opts+=(--valid-right-context $((chunk_width + right_context))) # Note: in RNNs we process sequences of labels rather than single label per sample echo "$0: calling get_egs.sh" diff --git a/egs/wsj/s5/steps/nnet3/tdnn/train.sh b/egs/wsj/s5/steps/nnet3/tdnn/train.sh index 49eeabcd9a8..dfe02931758 100755 --- a/egs/wsj/s5/steps/nnet3/tdnn/train.sh +++ b/egs/wsj/s5/steps/nnet3/tdnn/train.sh @@ -1,5 +1,7 @@ #!/bin/bash +# THIS SCRIPT IS DEPRECATED, see ../train_dnn.py + # note, TDNN is the same as what we used to call multisplice. # Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey). @@ -83,6 +85,7 @@ subset_dim=0 trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM +echo "$0: THIS SCRIPT IS DEPRECATED" echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi diff --git a/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh b/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh index 6fe772f7e0d..560c6d2625a 100755 --- a/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh +++ b/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh @@ -1,5 +1,7 @@ #!/bin/bash +# THIS SCRIPT IS DEPRECATED, see ../train_raw_dnn.py + # note, TDNN is the same as what we used to call multisplice. # Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey). @@ -69,6 +71,7 @@ dense_targets=true # Use dense targets instead of sparse targets trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM +echo "$0: THIS SCRIPT IS DEPRECATED" echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index 83170ea1e8e..03de8ae2298 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -209,7 +209,6 @@ def train(args, run_opts, background_process_handler): train_lib.acoustic_model.generate_egs( data=args.feat_dir, alidir=args.ali_dir, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, - valid_left_context=left_context, valid_right_context=right_context, run_opts=run_opts, frames_per_eg=args.frames_per_eg, srand=args.srand, diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index b8fe4a25384..83881d04e33 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -221,7 +221,6 @@ def train(args, run_opts, background_process_handler): data=args.feat_dir, targets_scp=args.targets_scp, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, - valid_left_context=left_context, valid_right_context=right_context, run_opts=run_opts, frames_per_eg=args.frames_per_eg, srand=args.srand, diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index 8366eccc993..a56368bb30f 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -300,8 +300,6 @@ def train(args, run_opts, background_process_handler): data=args.feat_dir, targets_scp=args.targets_scp, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, - valid_left_context=left_context + args.chunk_width, - valid_right_context=right_context + args.chunk_width, run_opts=run_opts, frames_per_eg=args.chunk_width, srand=args.srand, diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index 482c9a8ee03..cd242237f37 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -286,8 +286,6 @@ def train(args, run_opts, background_process_handler): train_lib.acoustic_model.generate_egs( data=args.feat_dir, alidir=args.ali_dir, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, - valid_left_context=left_context + args.chunk_width, - valid_right_context=right_context + args.chunk_width, run_opts=run_opts, frames_per_eg=args.chunk_width, srand=args.srand, diff --git a/egs/wsj/s5/steps/nnet3/train_tdnn.sh b/egs/wsj/s5/steps/nnet3/train_tdnn.sh index fb7a5a38f49..37540e488c2 100755 --- a/egs/wsj/s5/steps/nnet3/train_tdnn.sh +++ b/egs/wsj/s5/steps/nnet3/train_tdnn.sh @@ -1,5 +1,7 @@ #!/bin/bash +# THIS SCRIPT IS DEPRECATED, see ./train_dnn.py + # note, TDNN is the same as what we used to call multisplice. # Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey). @@ -81,6 +83,7 @@ frames_per_eg=8 # to be passed on to get_egs.sh trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM +echo "$0: THIS SCRIPT IS DEPRECATED" echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi From d99cbe213ea07c94636dd0101774613e121aaded Mon Sep 17 00:00:00 2001 From: "Nickolay V. Shmyrev" Date: Fri, 30 Dec 2016 04:32:49 +0300 Subject: [PATCH 168/530] [scripts] fix utils/lang/make_unk_lm.sh: all_nonsil_phones also required when pocolm is false (#1295) --- egs/wsj/s5/utils/lang/make_unk_lm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/utils/lang/make_unk_lm.sh b/egs/wsj/s5/utils/lang/make_unk_lm.sh index b46ab128b93..2564c53ad4d 100755 --- a/egs/wsj/s5/utils/lang/make_unk_lm.sh +++ b/egs/wsj/s5/utils/lang/make_unk_lm.sh @@ -141,6 +141,7 @@ awk -v dir=$dir -v ff=$first_phone_field \ { ok=1; for (n=ff; n<=NF; n++) { if ($n in sil) ok=0; } if (ok && NF>=ff) { for (n=ff;n<=NF;n++) printf("%s ",$n); print ""; } else { print("make_unk_lm.sh: info: not including dict line: ", $0) >"/dev/stderr" }}' <$src_dict >$dir/training.txt +cat $dir/training.txt | awk '{for(n=1;n<=NF;n++) seen[$n]=1; } END{for (k in seen) print k;}' > $dir/all_nonsil_phones num_dict_lines=$(wc -l <$src_dict) num_train_lines=$(wc -l < $dir/training.txt) @@ -180,7 +181,6 @@ if $use_pocolm; then cat $dir/training.txt | awk -v h=$heldout_ratio '{if(NR%h == 0) print; }' > $dir/pocolm/text/dev.txt cat $dir/training.txt | awk -v h=$heldout_ratio '{if(NR%h != 0) print; }' > $dir/pocolm/text/train.txt - cat $dir/training.txt | awk '{for(n=1;n<=NF;n++) seen[$n]=1; } END{for (k in seen) print k;}' > $dir/all_nonsil_phones # the following options are because we expect the amount of data to be small, # all the data subsampling isn't really needed and will increase the chance of From 078753bfa490f832a3a9b2fa4ca6efeb0723ec1c Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Thu, 29 Dec 2016 18:23:19 -0800 Subject: [PATCH 169/530] Removing the --reduce-frames-per-eg option as a simplification prior to changes to frames-per-eg code --- egs/wsj/s5/steps/nnet3/chain/get_egs.sh | 3 +-- egs/wsj/s5/steps/nnet3/get_egs.sh | 20 ++++-------------- .../s5/steps/nnet3/get_egs_discriminative.sh | 1 - egs/wsj/s5/steps/nnet3/get_egs_targets.sh | 21 ++++--------------- 4 files changed, 9 insertions(+), 36 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh index 49ec694a19d..9018c2e2472 100755 --- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh @@ -21,8 +21,7 @@ cmd=run.pl feat_type=raw # set it to 'lda' to use LDA features. frames_per_eg=25 # number of feature frames example (not counting added context). # more->less disk space and less time preparing egs, but more - # I/O during training. note: the script may reduce this if - # reduce_frames_per_eg is true. + # I/O during training. frames_overlap_per_eg=0 # number of supervised frames of overlap that we aim for per eg. # can be useful to avoid wasted data if you're using --left-deriv-truncate # and --right-deriv-truncate. diff --git a/egs/wsj/s5/steps/nnet3/get_egs.sh b/egs/wsj/s5/steps/nnet3/get_egs.sh index 9992285baaa..5da6b6e0228 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs.sh @@ -18,18 +18,12 @@ cmd=run.pl feat_type=raw # set it to 'lda' to use LDA features. frames_per_eg=8 # number of frames of labels per example. more->less disk space and # less time preparing egs, but more I/O during training. - # note: the script may reduce this if reduce_frames_per_eg is true. left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. compress=true # set this to false to disable compression (e.g. if you want to see whether # results are affected). -reduce_frames_per_eg=true # If true, this script may reduce the frames_per_eg - # if there is only one archive and even with the - # reduced frames_per_eg, the number of - # samples_per_iter that would result is less than or - # equal to the user-specified value. num_utts_subset=300 # number of utterances in validation and training # subsets used for shrinkage and diagnostics. num_valid_frames_combine=0 # #valid frames for combination weights at the very end. @@ -206,17 +200,11 @@ fi # the + 1 is to round up, not down... we assume it doesn't divide exactly. num_archives=$[$num_frames/($frames_per_eg*$samples_per_iter)+1] -# (for small data)- while reduce_frames_per_eg == true and the number of -# archives is 1 and would still be 1 if we reduced frames_per_eg by 1, reduce it -# by 1. -reduced=false -while $reduce_frames_per_eg && [ $frames_per_eg -gt 1 ] && \ - [ $[$num_frames/(($frames_per_eg-1)*$samples_per_iter)] -eq 0 ]; do - frames_per_eg=$[$frames_per_eg-1] - num_archives=1 - reduced=true +if [ $num_archives -eq 1 ]; then + echo "*** $0: warning: the --frames-per-eg is too large to generate one archive with" + echo "*** as many as --samples-per-iter egs in it. Consider reducing --frames-per-eg." + sleep 4 done -$reduced && echo "$0: reduced frames_per_eg to $frames_per_eg because amount of data is small." # We may have to first create a smaller number of larger archives, with number # $num_archives_intermediate, if $num_archives is more than the maximum number diff --git a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh index 1d3470c49b6..243a98360e8 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh @@ -12,7 +12,6 @@ cmd=run.pl feat_type=raw # set it to 'lda' to use LDA features. frames_per_eg=150 # number of frames of labels per example. more->less disk space and # less time preparing egs, but more I/O during training. - # note: the script may reduce this if reduce_frames_per_eg is true. frames_overlap_per_eg=30 # number of supervised frames of overlap that we aim for per eg. # can be useful to avoid wasted data if you're using --left-deriv-truncate # and --right-deriv-truncate. diff --git a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh index 5221916e5c0..46cf6eb0c20 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh @@ -26,18 +26,11 @@ target_type=sparse # dense to have dense targets, num_targets= # required for target-type=sparse with raw nnet frames_per_eg=8 # number of frames of labels per example. more->less disk space and # less time preparing egs, but more I/O during training. - # note: the script may reduce this if reduce_frames_per_eg is true. left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. compress=true # set this to false to disable compression (e.g. if you want to see whether # results are affected). - -reduce_frames_per_eg=true # If true, this script may reduce the frames_per_eg - # if there is only one archive and even with the - # reduced frames_per_eg, the number of - # samples_per_iter that would result is less than or - # equal to the user-specified value. num_utts_subset=300 # number of utterances in validation and training # subsets used for shrinkage and diagnostics. num_valid_frames_combine=0 # #valid frames for combination weights at the very end. @@ -200,17 +193,11 @@ fi # the + 1 is to round up, not down... we assume it doesn't divide exactly. num_archives=$[$num_frames/($frames_per_eg*$samples_per_iter)+1] -# (for small data)- while reduce_frames_per_eg == true and the number of -# archives is 1 and would still be 1 if we reduced frames_per_eg by 1, reduce it -# by 1. -reduced=false -while $reduce_frames_per_eg && [ $frames_per_eg -gt 1 ] && \ - [ $[$num_frames/(($frames_per_eg-1)*$samples_per_iter)] -eq 0 ]; do - frames_per_eg=$[$frames_per_eg-1] - num_archives=1 - reduced=true +if [ $num_archives -eq 1 ]; then + echo "*** $0: warning: the --frames-per-eg is too large to generate one archive with" + echo "*** as many as --samples-per-iter egs in it. Consider reducing --frames-per-eg." + sleep 4 done -$reduced && echo "$0: reduced frames_per_eg to $frames_per_eg because amount of data is small." # We may have to first create a smaller number of larger archives, with number # $num_archives_intermediate, if $num_archives is more than the maximum number From 9b29d2a901f43fbe160f45ca76065d39e3da4771 Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Thu, 29 Dec 2016 19:41:07 -0800 Subject: [PATCH 170/530] Modify scripts to use --max-deriv-time-relative option in place of --max-deriv-time (removes need to know num-frames) --- .../nnet3/train/chain_objf/acoustic_model.py | 12 ++++----- .../nnet3/train/frame_level_objf/common.py | 12 ++++----- egs/wsj/s5/steps/nnet3/chain/train.py | 8 +++--- egs/wsj/s5/steps/nnet3/get_egs.sh | 26 ++++++++++++++----- egs/wsj/s5/steps/nnet3/get_egs_targets.sh | 26 ++++++++++++++----- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 8 +++--- egs/wsj/s5/steps/nnet3/train_rnn.py | 8 +++--- 7 files changed, 64 insertions(+), 36 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index b9f3c386cbe..42e04cba77c 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -108,7 +108,7 @@ def train_new_models(dir, iter, srand, num_jobs, num_archives_processed, num_archives, raw_model_string, egs_dir, left_context, right_context, apply_deriv_weights, - min_deriv_time, max_deriv_time, + min_deriv_time, max_deriv_time_relative, l2_regularize, xent_regularize, leaky_hmm_coefficient, momentum, max_param_change, shuffle_buffer_size, num_chunk_per_minibatch, @@ -130,9 +130,9 @@ def train_new_models(dir, iter, srand, num_jobs, if min_deriv_time is not None: deriv_time_opts.append("--optimization.min-deriv-time={0}".format( min_deriv_time)) - if max_deriv_time is not None: - deriv_time_opts.append("--optimization.max-deriv-time={0}".format( - int(max_deriv_time))) + if max_deriv_time_relative is not None: + deriv_time_opts.append("--optimization.max-deriv-time-relative={0}".format( + int(max_deriv_time_relative))) processes = [] for job in range(1, num_jobs+1): @@ -209,7 +209,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, num_hidden_layers, add_layers_period, left_context, right_context, apply_deriv_weights, min_deriv_time, - max_deriv_time, + max_deriv_time_relative, l2_regularize, xent_regularize, leaky_hmm_coefficient, momentum, max_param_change, shuffle_buffer_size, @@ -301,7 +301,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, left_context=left_context, right_context=right_context, apply_deriv_weights=apply_deriv_weights, min_deriv_time=min_deriv_time, - max_deriv_time=max_deriv_time, + max_deriv_time_relative=max_deriv_time_relative, l2_regularize=l2_regularize, xent_regularize=xent_regularize, leaky_hmm_coefficient=leaky_hmm_coefficient, diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index 87cae801e90..1afa532e914 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -30,7 +30,7 @@ def train_new_models(dir, iter, srand, num_jobs, shuffle_buffer_size, minibatch_size, cache_read_opt, run_opts, frames_per_eg=-1, - min_deriv_time=None, max_deriv_time=None): + min_deriv_time=None, max_deriv_time_relative=None): """ Called from train_one_iteration(), this model does one iteration of training with 'num_jobs' jobs, and writes files like exp/tdnn_a/24.{1,2,3,..}.raw @@ -59,9 +59,9 @@ def train_new_models(dir, iter, srand, num_jobs, if min_deriv_time is not None: deriv_time_opts.append("--optimization.min-deriv-time={0}".format( min_deriv_time)) - if max_deriv_time is not None: - deriv_time_opts.append("--optimization.max-deriv-time={0}".format( - max_deriv_time)) + if max_deriv_time_relative is not None: + deriv_time_opts.append("--optimization.max-deriv-time-relative={0}".format( + max_deriv_time_relative)) context_opts = "--left-context={0} --right-context={1}".format( left_context, right_context) @@ -140,7 +140,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, momentum, max_param_change, shuffle_buffer_size, run_opts, cv_minibatch_size=256, frames_per_eg=-1, - min_deriv_time=None, max_deriv_time=None, + min_deriv_time=None, max_deriv_time_relative=None, shrinkage_value=1.0, get_raw_nnet_from_am=True, background_process_handler=None): @@ -276,7 +276,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, cache_read_opt=cache_read_opt, run_opts=run_opts, frames_per_eg=frames_per_eg, min_deriv_time=min_deriv_time, - max_deriv_time=max_deriv_time) + max_deriv_time_relative=max_deriv_time_relative) [models_to_average, best_model] = common_train_lib.get_successful_models( num_jobs, '{0}/log/train.{1}.%.log'.format(dir, iter)) diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index 0254589be85..f93b7f8f68e 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -393,11 +393,11 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): args.final_effective_lrate) min_deriv_time = None - max_deriv_time = None + max_deriv_time_relative = None if args.deriv_truncate_margin is not None: min_deriv_time = -args.deriv_truncate_margin - model_left_context - max_deriv_time = (args.chunk_width - 1 + args.deriv_truncate_margin - + model_right_context) + max_deriv_time_relative = \ + args.deriv_truncate_margin + model_right_context logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) @@ -444,7 +444,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): right_context=right_context, apply_deriv_weights=args.apply_deriv_weights, min_deriv_time=min_deriv_time, - max_deriv_time=max_deriv_time, + max_deriv_time_relative=max_deriv_time_relative, l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, leaky_hmm_coefficient=args.leaky_hmm_coefficient, diff --git a/egs/wsj/s5/steps/nnet3/get_egs.sh b/egs/wsj/s5/steps/nnet3/get_egs.sh index 5da6b6e0228..e442dce9032 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs.sh @@ -18,6 +18,9 @@ cmd=run.pl feat_type=raw # set it to 'lda' to use LDA features. frames_per_eg=8 # number of frames of labels per example. more->less disk space and # less time preparing egs, but more I/O during training. + # Note: may in general be a comma-separated string of alternative + # durations (more useful when using large chunks, e.g. for BLSTMs); + # the first one (the principal num-frames) is preferred. left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. @@ -66,6 +69,11 @@ if [ $# != 3 ]; then echo " --feat-type # (raw is the default). The feature type you want" echo " # to use as input to the neural net." echo " --frames-per-eg # number of frames per eg on disk" + echo " # May be either a single number or a comma-separated list" + echo " # of alternatives (useful when training LSTMs, where the" + echo " # frames-per-eg is the chunk size, to get variety of chunk" + echo " # sizes). The first in the list is preferred and is used" + echo " # when working out the number of archives etc." echo " --left-context # Number of frames on left side to append for feature input" echo " --right-context # Number of frames on right side to append for feature input" echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics" @@ -198,8 +206,14 @@ else feat_dim=$(cat $dir/info/feat_dim) || exit 1; fi + +# the first field in frames_per_eg (which is a comma-separated list of numbers) +# is the 'principal' frames-per-eg, and for purposes of working out the number +# of archives we assume that this will be the average number of frames per eg. +frames_per_eg_principal=$(echo $frames_per_eg | cut -d, -f1) + # the + 1 is to round up, not down... we assume it doesn't divide exactly. -num_archives=$[$num_frames/($frames_per_eg*$samples_per_iter)+1] +num_archives=$[$num_frames/($frames_per_eg_principal*$samples_per_iter)+1] if [ $num_archives -eq 1 ]; then echo "*** $0: warning: the --frames-per-eg is too large to generate one archive with" echo "*** as many as --samples-per-iter egs in it. Consider reducing --frames-per-eg." @@ -222,7 +236,7 @@ num_archives=$[$archives_multiple*$num_archives_intermediate] echo $num_archives >$dir/info/num_archives echo $frames_per_eg >$dir/info/frames_per_eg # Work out the number of egs per archive -egs_per_archive=$[$num_frames/($frames_per_eg*$num_archives)] +egs_per_archive=$[$num_frames/($frames_per_eg_principal*$num_archives)] ! [ $egs_per_archive -le $samples_per_iter ] && \ echo "$0: script error: egs_per_archive=$egs_per_archive not <= samples_per_iter=$samples_per_iter" \ && exit 1; @@ -275,17 +289,17 @@ if [ $stage -le 3 ]; then [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1 echo "... Getting subsets of validation examples for diagnostics and combination." $cmd $dir/log/create_valid_subset_combine.log \ - nnet3-subset-egs --n=$[$num_valid_frames_combine/$frames_per_eg] ark:$dir/valid_all.egs \ + nnet3-subset-egs --n=$[$num_valid_frames_combine/$frames_per_eg_principal] ark:$dir/valid_all.egs \ ark:$dir/valid_combine.egs || touch $dir/.error & $cmd $dir/log/create_valid_subset_diagnostic.log \ - nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg] ark:$dir/valid_all.egs \ + nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg_principal] ark:$dir/valid_all.egs \ ark:$dir/valid_diagnostic.egs || touch $dir/.error & $cmd $dir/log/create_train_subset_combine.log \ - nnet3-subset-egs --n=$[$num_train_frames_combine/$frames_per_eg] ark:$dir/train_subset_all.egs \ + nnet3-subset-egs --n=$[$num_train_frames_combine/$frames_per_eg_principal] ark:$dir/train_subset_all.egs \ ark:$dir/train_combine.egs || touch $dir/.error & $cmd $dir/log/create_train_subset_diagnostic.log \ - nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg] ark:$dir/train_subset_all.egs \ + nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg_principal] ark:$dir/train_subset_all.egs \ ark:$dir/train_diagnostic.egs || touch $dir/.error & wait sleep 5 # wait for file system to sync. diff --git a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh index 46cf6eb0c20..eeac84db969 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh @@ -26,6 +26,9 @@ target_type=sparse # dense to have dense targets, num_targets= # required for target-type=sparse with raw nnet frames_per_eg=8 # number of frames of labels per example. more->less disk space and # less time preparing egs, but more I/O during training. + # Note: may in general be a comma-separated string of alternative + # durations (more useful when using large chunks, e.g. for BLSTMs); + # the first one (the principal num-frames) is preferred. left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. @@ -73,6 +76,11 @@ if [ $# != 3 ]; then echo " --feat-type # (raw is the default). The feature type you want" echo " # to use as input to the neural net." echo " --frames-per-eg # number of frames per eg on disk" + echo " # May be either a single number or a comma-separated list" + echo " # of alternatives (useful when training LSTMs, where the" + echo " # frames-per-eg is the chunk size, to get variety of chunk" + echo " # sizes). The first in the list is preferred and is used" + echo " # when working out the number of archives etc." echo " --left-context # Number of frames on left side to append for feature input" echo " --right-context # Number of frames on right side to append for feature input" echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics" @@ -191,8 +199,14 @@ else feat_dim=$(cat $dir/info/feat_dim) || exit 1; fi + +# the first field in frames_per_eg (which is a comma-separated list of numbers) +# is the 'principal' frames-per-eg, and for purposes of working out the number +# of archives we assume that this will be the average number of frames per eg. +frames_per_eg_principal=$(echo $frames_per_eg | cut -d, -f1) + # the + 1 is to round up, not down... we assume it doesn't divide exactly. -num_archives=$[$num_frames/($frames_per_eg*$samples_per_iter)+1] +num_archives=$[$num_frames/($frames_per_eg_principal*$samples_per_iter)+1] if [ $num_archives -eq 1 ]; then echo "*** $0: warning: the --frames-per-eg is too large to generate one archive with" echo "*** as many as --samples-per-iter egs in it. Consider reducing --frames-per-eg." @@ -215,7 +229,7 @@ num_archives=$[$archives_multiple*$num_archives_intermediate] echo $num_archives >$dir/info/num_archives echo $frames_per_eg >$dir/info/frames_per_eg # Work out the number of egs per archive -egs_per_archive=$[$num_frames/($frames_per_eg*$num_archives)] +egs_per_archive=$[$num_frames/($frames_per_eg_principal*$num_archives)] ! [ $egs_per_archive -le $samples_per_iter ] && \ echo "$0: script error: egs_per_archive=$egs_per_archive not <= samples_per_iter=$samples_per_iter" \ && exit 1; @@ -293,17 +307,17 @@ if [ $stage -le 3 ]; then [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1 echo "... Getting subsets of validation examples for diagnostics and combination." $cmd $dir/log/create_valid_subset_combine.log \ - nnet3-subset-egs --n=$[$num_valid_frames_combine/$frames_per_eg] ark:$dir/valid_all.egs \ + nnet3-subset-egs --n=$[$num_valid_frames_combine/$frames_per_eg_principal] ark:$dir/valid_all.egs \ ark:$dir/valid_combine.egs || touch $dir/.error & $cmd $dir/log/create_valid_subset_diagnostic.log \ - nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg] ark:$dir/valid_all.egs \ + nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg_principal] ark:$dir/valid_all.egs \ ark:$dir/valid_diagnostic.egs || touch $dir/.error & $cmd $dir/log/create_train_subset_combine.log \ - nnet3-subset-egs --n=$[$num_train_frames_combine/$frames_per_eg] ark:$dir/train_subset_all.egs \ + nnet3-subset-egs --n=$[$num_train_frames_combine/$frames_per_eg_principal] ark:$dir/train_subset_all.egs \ ark:$dir/train_combine.egs || touch $dir/.error & $cmd $dir/log/create_train_subset_diagnostic.log \ - nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg] ark:$dir/train_subset_all.egs \ + nnet3-subset-egs --n=$[$num_frames_diagnostic/$frames_per_eg_principal] ark:$dir/train_subset_all.egs \ ark:$dir/train_diagnostic.egs || touch $dir/.error & wait sleep 5 # wait for file system to sync. diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index a56368bb30f..e80ab93497b 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -367,11 +367,11 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): args.final_effective_lrate) min_deriv_time = None - max_deriv_time = None + max_deriv_time_relative = None if args.deriv_truncate_margin is not None: min_deriv_time = -args.deriv_truncate_margin - model_left_context - max_deriv_time = (args.chunk_width - 1 + args.deriv_truncate_margin - + model_right_context) + max_deriv_time_relative = \ + args.deriv_truncate_margin + model_right_context logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) @@ -419,7 +419,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): left_context=left_context, right_context=right_context, min_deriv_time=min_deriv_time, - max_deriv_time=max_deriv_time, + max_deriv_time_relative=max_deriv_time_relative, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index cd242237f37..fbdb84b5963 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -362,11 +362,11 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): args.final_effective_lrate) min_deriv_time = None - max_deriv_time = None + max_deriv_time_relative = None if args.deriv_truncate_margin is not None: min_deriv_time = -args.deriv_truncate_margin - model_left_context - max_deriv_time = (args.chunk_width - 1 + args.deriv_truncate_margin - + model_right_context) + max_deriv_time_relative = \ + args.deriv_truncate_margin + model_right_context logger.info("Training will run for {0} epochs = " "{1} iterations".format(args.num_epochs, num_iters)) @@ -413,7 +413,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): left_context=left_context, right_context=right_context, min_deriv_time=min_deriv_time, - max_deriv_time=max_deriv_time, + max_deriv_time_relative=max_deriv_time_relative, momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, From daa2962867ef5ffd0cd9f9a44780b526d269cac6 Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Thu, 29 Dec 2016 22:12:17 -0800 Subject: [PATCH 171/530] Modify nnet3 python scripts to accept comma-separated alternatives in --egs.chunk-width options; remove deprecated option --num-bptt-steps. --- egs/ami/s5b/local/nnet3/run_lstm.sh | 1 - egs/hkust/s5/local/nnet3/run_lstm.sh | 6 +-- .../nnet3/train/chain_objf/acoustic_model.py | 6 +-- egs/wsj/s5/steps/libs/nnet3/train/common.py | 35 ++++++++++++++-- .../train/frame_level_objf/acoustic_model.py | 6 +-- .../nnet3/train/frame_level_objf/common.py | 14 ++----- .../nnet3/train/frame_level_objf/raw_model.py | 6 +-- egs/wsj/s5/steps/nnet3/chain/train.py | 22 +++++----- egs/wsj/s5/steps/nnet3/train_dnn.py | 6 +-- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 6 +-- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 40 +++++++------------ egs/wsj/s5/steps/nnet3/train_rnn.py | 39 +++++++----------- 12 files changed, 93 insertions(+), 94 deletions(-) diff --git a/egs/ami/s5b/local/nnet3/run_lstm.sh b/egs/ami/s5b/local/nnet3/run_lstm.sh index c5583e2d0ef..ef5bfb36259 100755 --- a/egs/ami/s5b/local/nnet3/run_lstm.sh +++ b/egs/ami/s5b/local/nnet3/run_lstm.sh @@ -195,7 +195,6 @@ if [ $stage -le 13 ]; then --trainer.optimization.shrink-value 0.99 \ --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ --trainer.optimization.momentum=$momentum \ - --trainer.rnn.num-bptt-steps 30 \ --egs.chunk-width=$chunk_width \ --egs.chunk-left-context=$chunk_left_context \ --egs.chunk-right-context=$chunk_right_context \ diff --git a/egs/hkust/s5/local/nnet3/run_lstm.sh b/egs/hkust/s5/local/nnet3/run_lstm.sh index 7529147c948..f79ad02b543 100755 --- a/egs/hkust/s5/local/nnet3/run_lstm.sh +++ b/egs/hkust/s5/local/nnet3/run_lstm.sh @@ -42,7 +42,6 @@ shrink=0.98 momentum=0.5 adaptive_shrink=true num_chunk_per_minibatch=100 -num_bptt_steps=20 samples_per_iter=20000 remove_egs=true @@ -60,8 +59,8 @@ frames_per_chunk= . ./utils/parse_options.sh if ! cuda-compiled; then - cat < 0, then it implies frame-level training, which is applicable for DNN training. If it is > 0, then each parallel SGE job created, a different frame numbered 0..frames_per_eg-1 is used. - min_deriv_time: Applicable for RNN training. A default value of None - implies a min_deriv_time of 0 is used. During RNN training, its - value is set to chunk_width - num_bptt_steps in the training - script. """ chunk_level_training = False if frames_per_eg > 0 else True @@ -147,16 +143,12 @@ def train_one_iteration(dir, iter, srand, egs_dir, """ Called from steps/nnet3/train_*.py scripts for one iteration of neural network training - Args: + Selected args: frames_per_eg: The default value -1 implies chunk_level_training, which is particularly applicable to RNN training. If it is > 0, then it implies frame-level training, which is applicable for DNN training. If it is > 0, then each parallel SGE job created, a different frame numbered 0..frames_per_eg-1 is used. - min_deriv_time: Applicable for RNN training. A default value of None - implies a min_deriv_time of 0 is used. During RNN training, its - value is set to chunk_width - num_bptt_steps in the training - script. shrinkage_value: If value is 1.0, no shrinkage is done; otherwise parameter values are scaled by this value. get_raw_nnet_from_am: If True, then the network is read and stored as @@ -480,7 +472,7 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir, if chunk_width is not None: # this is an RNN model - mbsize = int(1024.0/(chunk_width)) + mbsize = int(1024.0/(common_train_lib.principal_chunk_width(chunk_width))) else: mbsize = 1024 diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py index 0fe8e3d4927..3f6e85e3644 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py @@ -22,7 +22,7 @@ def generate_egs_using_targets(data, targets_scp, egs_dir, run_opts, stage=0, feat_type='raw', online_ivector_dir=None, target_type='dense', num_targets=-1, - samples_per_iter=20000, frames_per_eg=20, + samples_per_iter=20000, frames_per_eg_str="20", srand=0, egs_opts=None, cmvn_opts=None, transform_dir=None): """ Wrapper for calling steps/nnet3/get_egs_targets.sh @@ -57,7 +57,7 @@ def generate_egs_using_targets(data, targets_scp, egs_dir, --left-context {left_context} --right-context {right_context} \ --stage {stage} \ --samples-per-iter {samples_per_iter} \ - --frames-per-eg {frames_per_eg} \ + --frames-per-eg {frames_per_eg_str} \ --srand {srand} \ --target-type {target_type} \ --num-targets {num_targets} \ @@ -73,7 +73,7 @@ def generate_egs_using_targets(data, targets_scp, egs_dir, else ''), left_context=left_context, right_context=right_context, stage=stage, samples_per_iter=samples_per_iter, - frames_per_eg=frames_per_eg, srand=srand, + frames_per_eg_str=frames_per_eg_str, srand=srand, num_targets=num_targets, data=data, targets_scp=targets_scp, target_type=target_type, diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index f93b7f8f68e..015ea57592e 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -51,11 +51,13 @@ def get_args(): parents=[common_train_lib.CommonParser().parser]) # egs extraction options - parser.add_argument("--egs.chunk-width", type=int, dest='chunk_width', - default=150, - help="""Number of output labels in each example. - Caution: if you double this you should halve - --trainer.samples-per-iter.""") + parser.add_argument("--egs.chunk-width", type=str, dest='chunk_width', + default="20", + help="""Number of frames per chunk in the examples + used to train the RNN. Caution: if you double this you + should halve --trainer.samples-per-iter. May be + a comma-separated list of alternatives: first width + is the 'principal' chunk-width, used preferentially""") # chain options parser.add_argument("--chain.lm-opts", type=str, dest='lm_opts', @@ -183,8 +185,8 @@ def process_args(args): """ Process the options got from get_args() """ - if args.chunk_width < 1: - raise Exception("--egs.chunk-width should have a minimum value of 1") + if not common_train_lib.validate_chunk_width(args.chunk_width): + raise Exception("--egs.chunk-width has an invalid value"); if args.chunk_left_context < 0: raise Exception("--egs.chunk-left-context should be non-negative") @@ -325,7 +327,7 @@ def train(args, run_opts, background_process_handler): right_tolerance=args.right_tolerance, frame_subsampling_factor=args.frame_subsampling_factor, alignment_subsampling_factor=args.alignment_subsampling_factor, - frames_per_eg=args.chunk_width, + frames_per_eg_str=args.chunk_width, srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, @@ -340,10 +342,10 @@ def train(args, run_opts, background_process_handler): egs_dir = args.egs_dir [egs_left_context, egs_right_context, - frames_per_eg, num_archives] = ( + frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, egs_left_context, egs_right_context)) - assert(args.chunk_width == frames_per_eg) + assert(args.chunk_width == frames_per_eg_str) num_archives_expanded = num_archives * args.frame_subsampling_factor if (args.num_jobs_final > num_archives_expanded): diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index 03de8ae2298..8e0fe312306 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -210,7 +210,7 @@ def train(args, run_opts, background_process_handler): data=args.feat_dir, alidir=args.ali_dir, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, - frames_per_eg=args.frames_per_eg, + frames_per_eg_str=str(args.frames_per_eg), srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, @@ -225,10 +225,10 @@ def train(args, run_opts, background_process_handler): egs_dir = args.egs_dir [egs_left_context, egs_right_context, - frames_per_eg, num_archives] = ( + frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, left_context, right_context)) - assert(args.frames_per_eg == frames_per_eg) + assert(str(args.frames_per_eg) == frames_per_eg_str) if (args.num_jobs_final > num_archives): raise Exception('num_jobs_final cannot exceed the number of archives ' diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index 83881d04e33..11dcdc5c5a8 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -222,7 +222,7 @@ def train(args, run_opts, background_process_handler): egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, - frames_per_eg=args.frames_per_eg, + frames_per_eg_str=str(args.frames_per_eg), srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, @@ -239,10 +239,10 @@ def train(args, run_opts, background_process_handler): egs_dir = args.egs_dir [egs_left_context, egs_right_context, - frames_per_eg, num_archives] = ( + frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, left_context, right_context)) - assert(args.frames_per_eg == frames_per_eg) + assert(str(args.frames_per_eg) == frames_per_eg_str) if (args.num_jobs_final > num_archives): raise Exception('num_jobs_final cannot exceed the number of archives ' diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index e80ab93497b..802db47f0fe 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -58,12 +58,13 @@ def get_args(): parents=[common_train_lib.CommonParser().parser]) # egs extraction options - parser.add_argument("--egs.chunk-width", type=int, dest='chunk_width', - default=20, - help="""Number of output labels in the sequence - used to train an LSTM. - Caution: if you double this you should halve - --trainer.samples-per-iter.""") + parser.add_argument("--egs.chunk-width", type=str, dest='chunk_width', + default="20", + help="""Number of frames per chunk in the examples + used to train the RNN. Caution: if you double this you + should halve --trainer.samples-per-iter. May be + a comma-separated list of alternatives: first width + is the 'principal' chunk-width, used preferentially""") parser.add_argument("--egs.chunk-left-context", type=int, dest='chunk_left_context', default=40, help="""Number of left steps used in the estimation of @@ -120,9 +121,6 @@ def get_args(): dest='num_chunk_per_minibatch', default=100, help="Number of sequences to be processed in " "parallel every minibatch") - parser.add_argument("--trainer.rnn.num-bptt-steps", type=int, - dest='num_bptt_steps', default=None, - help="""Deprecated. Kept for back compatibility.""") parser.add_argument("--trainer.deriv-truncate-margin", type=int, dest='deriv_truncate_margin', default=8, help="""Margin (in input frames) around the 'required' @@ -161,8 +159,8 @@ def process_args(args): """ Process the options got from get_args() """ - if args.chunk_width < 1: - raise Exception("--egs.chunk-width should have a minimum value of 1") + if not common_train_lib.validate_chunk_width(args.chunk_width): + raise Exception("--egs.chunk-width has an invalid value"); if args.chunk_left_context < 0: raise Exception("--egs.chunk-left-context should be non-negative") @@ -170,17 +168,6 @@ def process_args(args): if args.chunk_right_context < 0: raise Exception("--egs.chunk-right-context should be non-negative") - if args.num_bptt_steps is not None: - # -2 is used to compensate for the splicing of the input frame, - # assuming that splicing spans from -2 to 2 - args.deriv_truncate_margin = args.num_bptt_steps - args.chunk_width - 2 - logger.warning( - "--trainer.rnn.num-bptt-steps (deprecated) is set by user, and " - "--trainer.deriv-truncate-margin is set to (num-bptt-steps - " - "chunk-width - 2) = {0}. We recommend using the option " - "--trainer.deriv-truncate-margin.".format( - args.deriv_truncate_margin)) - if (not os.path.exists(args.dir) or not os.path.exists(args.dir+"/configs")): raise Exception("This scripts expects {0} to exist and have a configs " @@ -301,7 +288,7 @@ def train(args, run_opts, background_process_handler): egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, - frames_per_eg=args.chunk_width, + frames_per_eg_str=args.chunk_width, srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, @@ -318,10 +305,13 @@ def train(args, run_opts, background_process_handler): egs_dir = args.egs_dir [egs_left_context, egs_right_context, - frames_per_eg, num_archives] = ( + frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, left_context, right_context)) - assert(args.chunk_width == frames_per_eg) + if args.chunk_width != frames_per_eg_str: + raise Exception("mismatch between --egs.chunk-width and the frames_per_eg " + "in the egs dir {0} vs {1}".(args.chunk_width, + frames_per_eg_str)) if (args.num_jobs_final > num_archives): raise Exception('num_jobs_final cannot exceed the number of archives ' diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index fbdb84b5963..efa5b47845c 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -59,12 +59,13 @@ def get_args(): parents=[common_train_lib.CommonParser().parser]) # egs extraction options - parser.add_argument("--egs.chunk-width", type=int, dest='chunk_width', - default=20, - help="""Number of output labels in the sequence - used to train an LSTM. - Caution: if you double this you should halve - --trainer.samples-per-iter.""") + parser.add_argument("--egs.chunk-width", type=str, dest='chunk_width', + default="20", + help="""Number of frames per chunk in the examples + used to train the RNN. Caution: if you double this you + should halve --trainer.samples-per-iter. May be + a comma-separated list of alternatives: first width + is the 'principal' chunk-width, used preferentially""") parser.add_argument("--egs.chunk-left-context", type=int, dest='chunk_left_context', default=40, help="""Number of left steps used in the estimation of @@ -119,9 +120,6 @@ def get_args(): dest='num_chunk_per_minibatch', default=100, help="Number of sequences to be processed in " "parallel every minibatch") - parser.add_argument("--trainer.rnn.num-bptt-steps", type=int, - dest='num_bptt_steps', default=None, - help="""Deprecated. Kept for back compatibility.""") parser.add_argument("--trainer.deriv-truncate-margin", type=int, dest='deriv_truncate_margin', default=8, help="""Margin (in input frames) around the 'required' @@ -157,8 +155,8 @@ def process_args(args): """ Process the options got from get_args() """ - if args.chunk_width < 1: - raise Exception("--egs.chunk-width should have a minimum value of 1") + if not common_train_lib.validate_chunk_width(args.chunk_width): + raise Exception("--egs.chunk-width has an invalid value"); if args.chunk_left_context < 0: raise Exception("--egs.chunk-left-context should be non-negative") @@ -166,17 +164,6 @@ def process_args(args): if args.chunk_right_context < 0: raise Exception("--egs.chunk-right-context should be non-negative") - if args.num_bptt_steps is not None: - # -2 is used to compensate for the splicing of the input frame, - # assuming that splicing spans from -2 to 2 - args.deriv_truncate_margin = args.num_bptt_steps - args.chunk_width - 2 - logger.warning( - "--trainer.rnn.num-bptt-steps (deprecated) is set by user, and " - "--trainer.deriv-truncate-margin is set to (num-bptt-steps - " - "chunk-width - 2) = {0}. We recommend using the option " - "--trainer.deriv-truncate-margin.".format( - args.deriv_truncate_margin)) - if (not os.path.exists(args.dir) or not os.path.exists(args.dir+"/configs")): raise Exception("This scripts expects {0} to exist and have a configs " @@ -287,7 +274,7 @@ def train(args, run_opts, background_process_handler): data=args.feat_dir, alidir=args.ali_dir, egs_dir=default_egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, - frames_per_eg=args.chunk_width, + frames_per_eg_str=args.chunk_width, srand=args.srand, egs_opts=args.egs_opts, cmvn_opts=args.cmvn_opts, @@ -302,10 +289,12 @@ def train(args, run_opts, background_process_handler): egs_dir = args.egs_dir [egs_left_context, egs_right_context, - frames_per_eg, num_archives] = ( + frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, left_context, right_context)) - assert(args.chunk_width == frames_per_eg) + if args.chunk_width != frames_per_eg_str: + raise Exception("mismatch between --egs.chunk-width and the frames_per_eg " + "in the egs dir {0} vs {1}".(args.chunk_width, frames_per_eg_str)) if (args.num_jobs_final > num_archives): raise Exception('num_jobs_final cannot exceed the number of archives ' From b82dadbd86008f271539ee3a14e1f8bad9076ede Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Thu, 29 Dec 2016 23:45:58 -0800 Subject: [PATCH 172/530] Modifying nnet3 scripts to accept more general form of minibatch-size strings (rules, not just ints.) --- .../nnet3/train/chain_objf/acoustic_model.py | 17 +-- egs/wsj/s5/steps/libs/nnet3/train/common.py | 109 ++++++++++++++++-- .../nnet3/train/frame_level_objf/common.py | 36 +++--- egs/wsj/s5/steps/nnet3/chain/train.py | 19 ++- egs/wsj/s5/steps/nnet3/train_dnn.py | 14 ++- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 14 ++- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 30 +++-- egs/wsj/s5/steps/nnet3/train_rnn.py | 30 +++-- 8 files changed, 202 insertions(+), 67 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index d0afa127844..573c157c884 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -111,7 +111,7 @@ def train_new_models(dir, iter, srand, num_jobs, min_deriv_time, max_deriv_time_relative, l2_regularize, xent_regularize, leaky_hmm_coefficient, momentum, max_param_change, - shuffle_buffer_size, num_chunk_per_minibatch, + shuffle_buffer_size, num_chunk_per_minibatch_str, frame_subsampling_factor, truncate_deriv_weights, cache_io_opts, run_opts): """ @@ -184,7 +184,7 @@ def train_new_models(dir, iter, srand, num_jobs, egs_dir=egs_dir, archive_index=archive_index, buf_size=shuffle_buffer_size, cache_io_opts=cur_cache_io_opts, - num_chunk_per_mb=num_chunk_per_minibatch), + num_chunk_per_mb=num_chunk_per_minibatch_str), wait=False) processes.append(process_handle) @@ -205,7 +205,7 @@ def train_new_models(dir, iter, srand, num_jobs, def train_one_iteration(dir, iter, srand, egs_dir, num_jobs, num_archives_processed, num_archives, learning_rate, shrinkage_value, - num_chunk_per_minibatch, + num_chunk_per_minibatch_str, num_hidden_layers, add_layers_period, left_context, right_context, apply_deriv_weights, min_deriv_time, @@ -282,7 +282,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, iter=iter) if do_average: - cur_num_chunk_per_minibatch = num_chunk_per_minibatch + cur_num_chunk_per_minibatch_str = num_chunk_per_minibatch_str cur_max_param_change = max_param_change else: # on iteration zero or when we just added a layer, use a smaller @@ -290,7 +290,8 @@ def train_one_iteration(dir, iter, srand, egs_dir, # the jobs): the model-averaging isn't always helpful when the model is # changing too fast (i.e. it can worsen the objective function), and # the smaller minibatch size will help to keep the update stable. - cur_num_chunk_per_minibatch = num_chunk_per_minibatch / 2 + cur_num_chunk_per_minibatch_str = common_train_lib.halve_minibatch_size_str( + cur_num_chunk_per_minibatch_str) cur_max_param_change = float(max_param_change) / math.sqrt(2) train_new_models(dir=dir, iter=iter, srand=srand, num_jobs=num_jobs, @@ -308,7 +309,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, momentum=momentum, max_param_change=cur_max_param_change, shuffle_buffer_size=shuffle_buffer_size, - num_chunk_per_minibatch=cur_num_chunk_per_minibatch, + num_chunk_per_minibatch_str=cur_num_chunk_per_minibatch_str, frame_subsampling_factor=frame_subsampling_factor, truncate_deriv_weights=truncate_deriv_weights, cache_io_opts=cache_io_opts, run_opts=run_opts) @@ -496,7 +497,7 @@ def compute_progress(dir, iter, run_opts, wait=False, background_process_handler=background_process_handler) -def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch, +def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_str, egs_dir, left_context, right_context, leaky_hmm_coefficient, l2_regularize, xent_regularize, run_opts, background_process_handler=None): @@ -538,7 +539,7 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch, lc=left_context, rc=right_context, l2=l2_regularize, leaky=leaky_hmm_coefficient, dir=dir, raw_models=" ".join(raw_model_strings), - num_chunk_per_mb=num_chunk_per_minibatch, + num_chunk_per_mb=num_chunk_per_minibatch_str, num_iters=num_iters, egs_dir=egs_dir)) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index 336b204abab..b064f517fc0 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -140,18 +140,17 @@ def validate_chunk_width(chunk_width): Expected to be a string representing either an integer, like '20', or a comma-separated list of integers like '20,30,16'""" if not isinstance(chunk_width, str): - return false + return False a = chunk_width.split(","); - if len(a) == 0: - return false + assert len(a) != 0 # would be code error for elem in a: try: i = int(elem) if i < 1: - return false + return False except: - return false - return true + return False + return True def principal_chunk_width(chunk_width): @@ -161,6 +160,91 @@ def principal_chunk_width(chunk_width): raise Exception("Invalid chunk-width {0}".format(chunk_width)) return int(chunk_width.split(",")[0]) + +def validate_minibatch_size_str(minibatch_size_str): + """Validate a minibatch-size string (returns bool). + A minibatch-size string might either be an integer, like '256' + or a rule like '128=64-128/256=32,64', whose format + is: eg-length1=size-range1/eg-length2=size-range2/.... + where the size-range is a comma-separated list of either integers + or ranges. An arbitrary eg will be mapped to the size-range + for the closest of the listed eg-lengths (the eg-length is defined + as the number of input frames, including context frames).""" + if not isinstance(minibatch_size_str, str): + return False + a = minibatch_size_str.split("/") + assert len(a) != 0 # would be code error + + for elem in a: + b = elem.split('=') + # We expect b to have length 2 in the normal case. + if len(b) != 2: + # one-element 'b' is OK if len(a) is 1 (so there is only + # one choice)... this would mean somebody just gave "25" + # or something like that for the minibatch size. + if len(a) == 1 and len(b) == 1: + try: + mb_size = int(b[0]) + return mb_size > 0 + except: + return False + else: + return False + # check that the thing before the '=' sign is a positive integer + try: + i = b[0] + if i <= 0: + return False + except: + return False # not an integer at all. + # check the thing after the '=' sign is a comma-separated list of ranges + ranges = b[1].split(",") + assert len(ranges) > 0 + for range in ranges: + # a range may be either e.g. '64', or '128-256' + try: + c = [ int(x) for x in range.split("-") ] + except: + return False + if len(c) == 1: + if c[0] <= 0: + return False + elif len(c) == 2: + if c[0] <= 0 or c[1] < c[0]: + return False + else: + return False + return True + + +def halve_minibatch_size_str(minibatch_size_str): + """Halve a minibatch-size string, as would be validated by + validate_minibatch_size_str (see docs for that). This halves + all the integer elements of minibatch_size_str that represent minibatch + sizes (as opposed to chunk-lengths) and that are >1.""" + + if not validate_minibatch_size_str(minibatch_size_str): + raise Exception("Invalid minibatch-size string '{0}'".format(minibatch_size_str)) + + a = minibatch_size_str.split("/") + ans = [] + for elem in a: + b = elem.split('=') + # We expect b to have length 2 in the normal case. + if len(b) == 1: + mb_size = int(b[0]) + ans.append(str(max(1, mb_size / 2))) + else: + assert len(b) == 2 + ranges_out = [] + ranges = b[1].split(',') + for range in ranges: + c = [ str(max(1, int(x)/2)) for x in range.split('-') ] + ranges_out.append('-'.join(c)) + ans.append('{0}={1}'.format(b[0], ','.join(ranges_out))) + return '/'.join(ans) + + def copy_egs_properties_to_exp_dir(egs_dir, dir): try: for file in ['cmvn_opts', 'splice_opts', 'final.mat']: @@ -218,7 +302,7 @@ def verify_egs_dir(egs_dir, feat_dim, ivector_dim, frames_per_eg_str = open('{0}/info/frames_per_eg'.format( egs_dir)).readline().rstrip() - if (!validate_chunk_width(frames_per_eg_str)): + if not validate_chunk_width(frames_per_eg_str): raise Exception("Invalid frames_per_eg in directory {0}/info".format( egs_dir)) num_archives = int(open('{0}/info/num_archives'.format( @@ -412,6 +496,13 @@ def remove_model(nnet_dir, iter, num_iters, models_to_combine=None, os.remove(file_name) +def self_test(): + assert halve_minibatch_size_str('64') == '32' + assert halve_minibatch_size_str('1') == '1' + assert halve_minibatch_size_str('128=64/256=40,80-100') == '128=32/256=20,40-50' + assert validate_chunk_width('64') + assert validate_chunk_width('64,25,128') + class CommonParser: """Parser for parsing common options related to nnet3 training. @@ -622,3 +713,7 @@ def __init__(self): help="""Polling frequency in seconds at which the background process handler checks for errors in the processes.""") + + +if __name__ == '__main__': + self_test() diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index a6f09f8b2ce..c1c95b4748f 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -27,7 +27,7 @@ def train_new_models(dir, iter, srand, num_jobs, raw_model_string, egs_dir, left_context, right_context, momentum, max_param_change, - shuffle_buffer_size, minibatch_size, + shuffle_buffer_size, minibatch_size_str, cache_read_opt, run_opts, frames_per_eg=-1, min_deriv_time=None, max_deriv_time_relative=None): @@ -91,7 +91,7 @@ def train_new_models(dir, iter, srand, num_jobs, """ark:{egs_dir}/egs.{archive_index}.ark ark:- |""" """nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} """ """--srand={srand} ark:- ark:- | """ - """nnet3-merge-egs --minibatch-size={minibatch_size} """ + """nnet3-merge-egs --minibatch-size={minibatch_size_str} """ """--measure-output-frames=false """ """--discard-partial-minibatches=true ark:- ark:- |" \ {dir}/{next_iter}.{job}.raw""".format( @@ -111,7 +111,7 @@ def train_new_models(dir, iter, srand, num_jobs, raw_model=raw_model_string, context_opts=context_opts, egs_dir=egs_dir, archive_index=archive_index, shuffle_buffer_size=shuffle_buffer_size, - minibatch_size=minibatch_size), wait=False) + minibatch_size_str=minibatch_size_str), wait=False) processes.append(process_handle) @@ -130,12 +130,12 @@ def train_new_models(dir, iter, srand, num_jobs, def train_one_iteration(dir, iter, srand, egs_dir, num_jobs, num_archives_processed, num_archives, - learning_rate, minibatch_size, + learning_rate, minibatch_size_str, num_hidden_layers, add_layers_period, left_context, right_context, momentum, max_param_change, shuffle_buffer_size, run_opts, - cv_minibatch_size=256, frames_per_eg=-1, + cv_minibatch_size_str='256', frames_per_eg=-1, min_deriv_time=None, max_deriv_time_relative=None, shrinkage_value=1.0, get_raw_nnet_from_am=True, @@ -182,7 +182,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, dir=dir, iter=iter, egs_dir=egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, - mb_size=cv_minibatch_size, + minibatch_size_str=cv_minibatch_size_str, get_raw_nnet_from_am=get_raw_nnet_from_am, wait=False, background_process_handler=background_process_handler) @@ -192,7 +192,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, - mb_size=cv_minibatch_size, wait=False, + minibatch_size_str=cv_minibatch_size_str, wait=False, get_raw_nnet_from_am=get_raw_nnet_from_am, background_process_handler=background_process_handler) @@ -241,7 +241,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, lr=learning_rate, dir=dir, iter=iter)) if do_average: - cur_minibatch_size = minibatch_size + cur_minibatch_size_str = minibatch_size_str cur_max_param_change = max_param_change else: # on iteration zero or when we just added a layer, use a smaller @@ -249,7 +249,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, # the jobs): the model-averaging isn't always helpful when the model is # changing too fast (i.e. it can worsen the objective function), and # the smaller minibatch size will help to keep the update stable. - cur_minibatch_size = minibatch_size / 2 + cur_minibatch_size_str = common_train_lib.halve_minibatch_size_str(minibatch_size_str) cur_max_param_change = float(max_param_change) / math.sqrt(2) try: @@ -264,7 +264,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, left_context=left_context, right_context=right_context, momentum=momentum, max_param_change=cur_max_param_change, shuffle_buffer_size=shuffle_buffer_size, - minibatch_size=cur_minibatch_size, + minibatch_size_str=cur_minibatch_size_str, cache_read_opt=cache_read_opt, run_opts=run_opts, frames_per_eg=frames_per_eg, min_deriv_time=min_deriv_time, @@ -365,7 +365,7 @@ def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts, def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, - right_context, run_opts, mb_size=256, + right_context, run_opts, minibatch_size_str='256', wait=False, background_process_handler=None, get_raw_nnet_from_am=True): if get_raw_nnet_from_am: @@ -382,12 +382,12 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, nnet3-compute-prob "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ ark:{egs_dir}/valid_diagnostic.egs ark:- | \ - nnet3-merge-egs --minibatch-size={mb_size} ark:- \ + nnet3-merge-egs --minibatch-size={minibatch_size_str} ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, iter=iter, context_opts=context_opts, - mb_size=mb_size, + minibatch_size_str=minibatch_size_str, model=model, egs_dir=egs_dir), wait=wait, background_process_handler=background_process_handler) @@ -397,19 +397,19 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, nnet3-compute-prob "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ ark:{egs_dir}/train_diagnostic.egs ark:- | \ - nnet3-merge-egs --minibatch-size={mb_size} ark:- \ + nnet3-merge-egs --minibatch-size={minibatch_size_str} ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, iter=iter, context_opts=context_opts, - mb_size=mb_size, + minibatch_size_str=minibatch_size_str, model=model, egs_dir=egs_dir), wait=wait, background_process_handler=background_process_handler) def compute_progress(dir, iter, egs_dir, left_context, right_context, - run_opts, mb_size=256, + run_opts, minibatch_size_str=256, background_process_handler=None, wait=False, get_raw_nnet_from_am=True): if get_raw_nnet_from_am: @@ -429,13 +429,13 @@ def compute_progress(dir, iter, egs_dir, left_context, right_context, nnet3-show-progress --use-gpu=no "{prev_model}" "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ ark:{egs_dir}/train_diagnostic.egs ark:- | \ - nnet3-merge-egs --minibatch-size={mb_size} ark:- \ + nnet3-merge-egs --minibatch-size={minibatch_size_str} ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, iter=iter, model=model, context_opts=context_opts, - mb_size=mb_size, + minibatch_size_str=minibatch_size_str, prev_model=prev_model, egs_dir=egs_dir), wait=wait, background_process_handler=background_process_handler) diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index 015ea57592e..4f2d7105d2a 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -115,10 +115,14 @@ def get_args(): [input] frames per job. This option is passed to get_egs.sh. Aim for about a minute of training time""") - parser.add_argument("--trainer.num-chunk-per-minibatch", type=int, - dest='num_chunk_per_minibatch', default=512, - help="Number of sequences to be processed in parallel " - "every minibatch") + + parser.add_argument("--trainer.num-chunk-per-minibatch", type=str, + dest='num_chunk_per_minibatch', default='128', + help="""Number of sequences to be processed in + parallel every minibatch. May be a more general + rule as accepted by the --minibatch-size option of + nnet3-merge-egs; run that program without args to see + the format.""") # Parameters for the optimization parser.add_argument("--trainer.optimization.initial-effective-lrate", @@ -188,6 +192,9 @@ def process_args(args): if not common_train_lib.validate_chunk_width(args.chunk_width): raise Exception("--egs.chunk-width has an invalid value"); + if not common_train_lib.validate_minibatch_size_str(args.num_chunk_per_minibatch): + raise Exception("--trainer.num-chunk-per-minibatch has an invalid value"); + if args.chunk_left_context < 0: raise Exception("--egs.chunk-left-context should be non-negative") @@ -439,7 +446,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), shrinkage_value=shrinkage_value, - num_chunk_per_minibatch=args.num_chunk_per_minibatch, + num_chunk_per_minibatch_str=args.num_chunk_per_minibatch, num_hidden_layers=num_hidden_layers, add_layers_period=args.add_layers_period, left_context=left_context, @@ -484,7 +491,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): chain_lib.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, - num_chunk_per_minibatch=args.num_chunk_per_minibatch, + num_chunk_per_minibatch_str=args.num_chunk_per_minibatch, egs_dir=egs_dir, left_context=left_context, right_context=right_context, leaky_hmm_coefficient=args.leaky_hmm_coefficient, diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index 8e0fe312306..3270d114503 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -66,9 +66,12 @@ def get_args(): # Parameters for the optimization parser.add_argument("--trainer.optimization.minibatch-size", - type=float, dest='minibatch_size', default=512, - help="Size of the minibatch used to compute the " - "gradient") + type=str, dest='minibatch_size', default='512', + help="""Size of the minibatch used in SGD training + (argument to nnet3-merge-egs); may be a more general + rule as accepted by the --minibatch-size option of + nnet3-merge-egs; run that program without args to see + the format.""") # General options parser.add_argument("--feat-dir", type=str, required=True, @@ -100,6 +103,9 @@ def process_args(args): if args.frames_per_eg < 1: raise Exception("--egs.frames-per-eg should have a minimum value of 1") + if not common_train_lib.validate_minibatch_size_str(args.minibatch_size): + raise Exception("--trainer.rnn.num-chunk-per-minibatch has an invalid value"); + if (not os.path.exists(args.dir) or not os.path.exists(args.dir+"/configs")): raise Exception("This scripts expects {0} to exist and have a configs " @@ -311,7 +317,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), - minibatch_size=args.minibatch_size, + minibatch_size_str=args.minibatch_size, frames_per_eg=args.frames_per_eg, num_hidden_layers=num_hidden_layers, add_layers_period=args.add_layers_period, diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index 11dcdc5c5a8..b853d77cb27 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -65,9 +65,12 @@ def get_args(): # Parameters for the optimization parser.add_argument("--trainer.optimization.minibatch-size", - type=float, dest='minibatch_size', default=512, - help="Size of the minibatch used to compute the " - "gradient") + type=str, dest='minibatch_size', default='512', + help="""Size of the minibatch used in SGD training + (argument to nnet3-merge-egs); may be a more general + rule as accepted by the --minibatch-size option of + nnet3-merge-egs; run that program without args to see + the format.""") # General options parser.add_argument("--nj", type=int, default=4, @@ -102,6 +105,9 @@ def process_args(args): if args.frames_per_eg < 1: raise Exception("--egs.frames-per-eg should have a minimum value of 1") + if not common_train_lib.validate_minibatch_size_str(args.minibatch_size): + raise Exception("--trainer.optimization.minibatch-size has an invalid value"); + if (not os.path.exists(args.dir) or not os.path.exists(args.dir+"/configs")): raise Exception("This scripts expects {0} to exist and have a configs " @@ -314,7 +320,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), - minibatch_size=args.minibatch_size, + minibatch_size_str=args.minibatch_size, frames_per_eg=args.frames_per_eg, num_hidden_layers=num_hidden_layers, add_layers_period=args.add_layers_period, diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index 802db47f0fe..fe0839d9ac0 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -110,17 +110,21 @@ def get_args(): measured by steps/nnet3/get_saturation.pl) exceeds this threshold we scale the parameter matrices with the shrink-value.""") - parser.add_argument("--trainer.optimization.cv-minibatch-size", type=int, - dest='cv_minibatch_size', default=256, + parser.add_argument("--trainer.optimization.cv-minibatch-size", type=str, + dest='cv_minibatch_size', default='256', help="""Size of the minibatch to be used in diagnostic jobs (use smaller value for BLSTMs to control memory - usage)""") - + usage). May be a more general rule as accepted by the + --minibatch-size option of nnet3-merge-egs; run that + program without args to see the format.""") # RNN specific trainer options - parser.add_argument("--trainer.rnn.num-chunk-per-minibatch", type=int, - dest='num_chunk_per_minibatch', default=100, - help="Number of sequences to be processed in " - "parallel every minibatch") + parser.add_argument("--trainer.rnn.num-chunk-per-minibatch", type=str, + dest='num_chunk_per_minibatch', default='100', + help="""Number of sequences to be processed in + parallel every minibatch. May be a more general + rule as accepted by the --minibatch-size option of + nnet3-merge-egs; run that program without args to see + the format.""") parser.add_argument("--trainer.deriv-truncate-margin", type=int, dest='deriv_truncate_margin', default=8, help="""Margin (in input frames) around the 'required' @@ -162,6 +166,12 @@ def process_args(args): if not common_train_lib.validate_chunk_width(args.chunk_width): raise Exception("--egs.chunk-width has an invalid value"); + if not common_train_lib.validate_minibatch_size_str(args.num_chunk_per_minibatch): + raise Exception("--trainer.rnn.num-chunk-per-minibatch has an invalid value"); + + if not common_train_lib.validate_minibatch_size_str(args.cv_minibatch_size): + raise Exception("--trainer.optimization.cv-minibatch-size has an invalid value"); + if args.chunk_left_context < 0: raise Exception("--egs.chunk-left-context should be non-negative") @@ -403,7 +413,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), shrinkage_value=shrinkage_value, - minibatch_size=args.num_chunk_per_minibatch, + minibatch_size_str=args.num_chunk_per_minibatch, num_hidden_layers=num_hidden_layers, add_layers_period=args.add_layers_period, left_context=left_context, @@ -413,7 +423,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, - cv_minibatch_size=args.cv_minibatch_size, + cv_minibatch_size_str=args.cv_minibatch_size, run_opts=run_opts, get_raw_nnet_from_am=False, background_process_handler=background_process_handler) diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index efa5b47845c..324e7ba3357 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -109,17 +109,21 @@ def get_args(): measured by steps/nnet3/get_saturation.pl) exceeds this threshold we scale the parameter matrices with the shrink-value.""") - parser.add_argument("--trainer.optimization.cv-minibatch-size", type=int, - dest='cv_minibatch_size', default=256, + parser.add_argument("--trainer.optimization.cv-minibatch-size", type=str, + dest='cv_minibatch_size', default='256', help="""Size of the minibatch to be used in diagnostic jobs (use smaller value for BLSTMs to control memory - usage)""") - + usage). May be a more general rule as accepted by the + --minibatch-size option of nnet3-merge-egs; run that + program without args to see the format.""") # RNN specific trainer options - parser.add_argument("--trainer.rnn.num-chunk-per-minibatch", type=int, - dest='num_chunk_per_minibatch', default=100, - help="Number of sequences to be processed in " - "parallel every minibatch") + parser.add_argument("--trainer.rnn.num-chunk-per-minibatch", type=str, + dest='num_chunk_per_minibatch', default='100', + help="""Number of sequences to be processed in + parallel every minibatch. May be a more general + rule as accepted by the --minibatch-size option of + nnet3-merge-egs; run that program without args to see + the format.""") parser.add_argument("--trainer.deriv-truncate-margin", type=int, dest='deriv_truncate_margin', default=8, help="""Margin (in input frames) around the 'required' @@ -158,6 +162,12 @@ def process_args(args): if not common_train_lib.validate_chunk_width(args.chunk_width): raise Exception("--egs.chunk-width has an invalid value"); + if not common_train_lib.validate_minibatch_size_str(args.num_chunk_per_minibatch): + raise Exception("--trainer.rnn.num-chunk-per-minibatch has an invalid value"); + + if not common_train_lib.validate_minibatch_size_str(args.cv_minibatch_size): + raise Exception("--trainer.optimization.cv-minibatch-size has an invalid value"); + if args.chunk_left_context < 0: raise Exception("--egs.chunk-left-context should be non-negative") @@ -396,7 +406,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), shrinkage_value=shrinkage_value, - minibatch_size=args.num_chunk_per_minibatch, + minibatch_size_str=args.num_chunk_per_minibatch, num_hidden_layers=num_hidden_layers, add_layers_period=args.add_layers_period, left_context=left_context, @@ -406,7 +416,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, - cv_minibatch_size=args.cv_minibatch_size, + cv_minibatch_size_str=args.cv_minibatch_size, run_opts=run_opts, background_process_handler=background_process_handler) From ae480248716b37b185c84f525159a4592f27d6a8 Mon Sep 17 00:00:00 2001 From: Pavel Denisov Date: Sat, 31 Dec 2016 00:31:07 +0300 Subject: [PATCH 173/530] =?UTF-8?q?[doc]=20Update=20a=20couple=20of=20mino?= =?UTF-8?q?r=20things=20in=20Wit=20Zieli=C5=84ski=20tutorial=20(#1296)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/doc/kaldi_for_dummies.dox | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/doc/kaldi_for_dummies.dox b/src/doc/kaldi_for_dummies.dox index afb3295f50e..75a58011b1d 100644 --- a/src/doc/kaldi_for_dummies.dox +++ b/src/doc/kaldi_for_dummies.dox @@ -348,8 +348,8 @@ directories. You may find such links in, for example, This script will help you to get decoding results.

Task

-From \c kaldi-trunk/egs/voxforge/local copy the script \c score.sh into -exactly same location in your project (\c kaldi-trunk/egs/digits/local). +From \c kaldi-trunk/egs/voxforge/s5/local copy the script \c score.sh into +similar location in your project (\c kaldi-trunk/egs/digits/local). \subsection kaldi_for_dummies_srilm SRILM installation @@ -418,8 +418,8 @@ export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI # Defining audio data directory (modify it for your installation directory!) export DATA_ROOT="/home/{user}/kaldi-trunk/egs/digits/digits_audio" -# Variable that stores path to MITLM library -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/tools/mitlm-svn/lib +# Enable SRILM +source $KALDI_ROOT/tools/env.sh # Variable needed for proper data sorting export LC_ALL=C From 350fc490d8a128abe309927b137c4c0f6cd110ae Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Fri, 30 Dec 2016 13:54:38 -0800 Subject: [PATCH 174/530] Reverting a couple previous changes to local scripts which turned out to be unnecessary --- egs/ami/s5b/local/nnet3/run_lstm.sh | 1 + egs/hkust/s5/local/nnet3/run_lstm.sh | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/egs/ami/s5b/local/nnet3/run_lstm.sh b/egs/ami/s5b/local/nnet3/run_lstm.sh index ef5bfb36259..c5583e2d0ef 100755 --- a/egs/ami/s5b/local/nnet3/run_lstm.sh +++ b/egs/ami/s5b/local/nnet3/run_lstm.sh @@ -195,6 +195,7 @@ if [ $stage -le 13 ]; then --trainer.optimization.shrink-value 0.99 \ --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ --trainer.optimization.momentum=$momentum \ + --trainer.rnn.num-bptt-steps 30 \ --egs.chunk-width=$chunk_width \ --egs.chunk-left-context=$chunk_left_context \ --egs.chunk-right-context=$chunk_right_context \ diff --git a/egs/hkust/s5/local/nnet3/run_lstm.sh b/egs/hkust/s5/local/nnet3/run_lstm.sh index f79ad02b543..7529147c948 100755 --- a/egs/hkust/s5/local/nnet3/run_lstm.sh +++ b/egs/hkust/s5/local/nnet3/run_lstm.sh @@ -42,6 +42,7 @@ shrink=0.98 momentum=0.5 adaptive_shrink=true num_chunk_per_minibatch=100 +num_bptt_steps=20 samples_per_iter=20000 remove_egs=true @@ -59,8 +60,8 @@ frames_per_chunk= . ./utils/parse_options.sh if ! cuda-compiled; then - cat < Date: Fri, 30 Dec 2016 17:09:37 -0800 Subject: [PATCH 175/530] Change nnet3 python scripts to support {left-right}-context-{initial,final} options --- .../nnet3/train/chain_objf/acoustic_model.py | 11 ++- egs/wsj/s5/steps/libs/nnet3/train/common.py | 67 ++++++++++++++++--- .../train/frame_level_objf/acoustic_model.py | 11 ++- .../nnet3/train/frame_level_objf/raw_model.py | 11 ++- egs/wsj/s5/steps/nnet3/chain/get_egs.sh | 17 ++++- egs/wsj/s5/steps/nnet3/chain/train.py | 18 ++++- egs/wsj/s5/steps/nnet3/get_egs.sh | 17 ++++- .../s5/steps/nnet3/get_egs_discriminative.sh | 24 +++++++ egs/wsj/s5/steps/nnet3/get_egs_targets.sh | 15 ++++- egs/wsj/s5/steps/nnet3/lstm/make_configs.py | 2 + egs/wsj/s5/steps/nnet3/make_jesus_configs.py | 2 + egs/wsj/s5/steps/nnet3/make_tdnn_configs.py | 3 +- egs/wsj/s5/steps/nnet3/tdnn/make_configs.py | 2 + egs/wsj/s5/steps/nnet3/train_dnn.py | 9 ++- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 6 +- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 16 +++-- egs/wsj/s5/steps/nnet3/train_rnn.py | 22 +++--- 17 files changed, 203 insertions(+), 50 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index 573c157c884..3266af13519 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -53,6 +53,7 @@ def generate_chain_egs(dir, data, lat_dir, egs_dir, left_context, right_context, run_opts, stage=0, left_tolerance=None, right_tolerance=None, + left_context_initial=-1, right_context_final=-1, frame_subsampling_factor=3, alignment_subsampling_factor=3, feat_type='raw', online_ivector_dir=None, @@ -70,7 +71,10 @@ def generate_chain_egs(dir, data, lat_dir, egs_dir, --feat-type {feat_type} \ --transform-dir "{transform_dir}" \ --online-ivector-dir "{ivector_dir}" \ - --left-context {left_context} --right-context {right_context} \ + --left-context {left_context} \ + --right-context {right_context} \ + --left-context-initial {left_context_initial} \ + --right-context-final {right_context_final} \ --left-tolerance '{left_tolerance}' \ --right-tolerance '{right_tolerance}' \ --frame-subsampling-factor {frame_subsampling_factor} \ @@ -89,7 +93,10 @@ def generate_chain_egs(dir, data, lat_dir, egs_dir, ivector_dir=(online_ivector_dir if online_ivector_dir is not None else ''), - left_context=left_context, right_context=right_context, + left_context=left_context, + right_context=right_context, + left_context_initial=left_context_initial, + right_context_final=right_context_final, left_tolerance=(left_tolerance if left_tolerance is not None else ''), diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index b064f517fc0..60bd2e69d5a 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -281,7 +281,8 @@ def parse_generic_config_vars_file(var_file): def verify_egs_dir(egs_dir, feat_dim, ivector_dim, - left_context, right_context): + left_context, right_context, + left_context_initial=-1, right_context_final=-1): try: egs_feat_dim = int(open('{0}/info/feat_dim'.format( egs_dir)).readline()) @@ -291,6 +292,17 @@ def verify_egs_dir(egs_dir, feat_dim, ivector_dim, egs_dir)).readline()) egs_right_context = int(open('{0}/info/right_context'.format( egs_dir)).readline()) + try: + egs_left_context_initial = int(open('{0}/info/left_context_initial'.format( + egs_dir)).readline()) + except: # older scripts didn't write this, treat it as -1 in that case. + egs_left_context_initial = -1 + try: + egs_right_context_final = int(open('{0}/info/right_context_final'.format( + egs_dir)).readline()) + except: # older scripts didn't write this, treat it as -1 in that case. + egs_right_context_final = -1 + if (feat_dim != egs_feat_dim) or (ivector_dim != egs_ivector_dim): raise Exception("There is mismatch between featdim/ivector_dim of " "the current experiment and the provided " @@ -298,7 +310,26 @@ def verify_egs_dir(egs_dir, feat_dim, ivector_dim, if (egs_left_context < left_context or egs_right_context < right_context): - raise Exception('The egs have insufficient context') + raise Exception('The egs have insufficient (l,r) context ({0},{1}) ' + 'versus expected ({2},{3})'.format( + egs_left_context, egs_right_context, + left_context, right_context)) + + # the condition on the initial/final context is an equality condition, + # not an inequality condition, as there is no mechanism to 'correct' the + # context (by subtracting context) while copying the egs, like there is + # for the regular left-right context. If the user is determined to use + # previously dumped egs, they may be able to slightly adjust the + # --egs.chunk-left-context-initial and --egs.chunk-right-context-final + # options to make things matched up. [note: the model l/r context gets + # added in, so you have to correct for changes in that.] + if (egs_left_context_initial != left_context_initial or + egs_right_context_final != right_context_final): + raise Exception('The egs have incorrect initial/final (l,r) context ' + '({0},{1}) versus expected ({2},{3}). See code from ' + 'where this exception was raised for more info'.format( + egs_left_context_initial, egs_right_context_final, + left_context_initial, right_context_final)) frames_per_eg_str = open('{0}/info/frames_per_eg'.format( egs_dir)).readline().rstrip() @@ -512,9 +543,10 @@ class CommonParser: in steps/nnet3/train*.py and steps/nnet3/chain/train.py """ - parser = argparse.ArgumentParser(add_help=False) + parser = argparse.ArgumentParser(add_help=False, + default_chunk_left_context=0) - def __init__(self): + def __init__(self, include_chunk_context = True): # feat options self.parser.add_argument("--feat.online-ivector-dir", type=str, dest='online_ivector_dir', default=None, @@ -527,22 +559,39 @@ def __init__(self): help="A string specifying '--norm-means' " "and '--norm-vars' values") - # egs extraction options - self.parser.add_argument("--egs.chunk-left-context", type=int, - dest='chunk_left_context', default=0, - help="""Number of additional frames of input + # egs extraction options. there is no point adding the chunk context + # option for non-RNNs (by which we mean basic TDNN-type topologies), as + # it wouldn't affect anything, so we disable them if we know in advance + # that we're not supporting RNN-type topologies (as in train_dnn.py). + if include_chunk_context: + self.parser.add_argument("--egs.chunk-left-context", type=int, + dest='chunk_left_context', + default=default_chunk_left_context, + help="""Number of additional frames of input to the left of the input chunk. This extra context will be used in the estimation of RNN state before prediction of the first label. In the case of FF-DNN this extra context will be used to allow for frame-shifts""") - self.parser.add_argument("--egs.chunk-right-context", type=int, + self.parser.add_argument("--egs.chunk-right-context", type=int, dest='chunk_right_context', default=0, help="""Number of additional frames of input to the right of the input chunk. This extra context will be used in the estimation of bidirectional RNN state before prediction of the first label.""") + self.parser.add_argument("--egs.chunk-left-context-initial", type=int, + dest='chunk_left_context_initial', default=-1, + help="""Number of additional frames of input + to the left of the *first* input chunk extracted + from an utterance. If negative, defaults to + the same as --egs.chunk-left-context""") + self.parser.add_argument("--egs.chunk-right-context-final", type=int, + dest='chunk_right_context_final', default=-1, + help="""Number of additional frames of input + to the right of the *last* input chunk extracted + from an utterance. If negative, defaults to the + same as --egs.chunk-right-context""") self.parser.add_argument("--egs.transform_dir", type=str, dest='transform_dir', default=None, action=common_lib.NullstrToNoneAction, diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py index ca3d36082fb..47265a19dba 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py @@ -21,6 +21,7 @@ def generate_egs(data, alidir, egs_dir, left_context, right_context, run_opts, stage=0, + left_context_initial=-1, right_context_final=-1, feat_type='raw', online_ivector_dir=None, samples_per_iter=20000, frames_per_eg_str="20", srand=0, egs_opts=None, cmvn_opts=None, transform_dir=None): @@ -38,7 +39,10 @@ def generate_egs(data, alidir, egs_dir, --feat-type {feat_type} \ --transform-dir "{transform_dir}" \ --online-ivector-dir "{ivector_dir}" \ - --left-context {left_context} --right-context {right_context} \ + --left-context {left_context} \ + --right-context {right_context} \ + --left-context-initial {left_context_initial} \ + --right-context-final {right_context_final} \ --stage {stage} \ --samples-per-iter {samples_per_iter} \ --frames-per-eg {frames_per_eg_str} \ @@ -53,7 +57,10 @@ def generate_egs(data, alidir, egs_dir, ivector_dir=(online_ivector_dir if online_ivector_dir is not None else ''), - left_context=left_context, right_context=right_context, + left_context=left_context, + right_context=right_context, + left_context_initial=left_context_initial, + right_context_final=right_context_final, stage=stage, samples_per_iter=samples_per_iter, frames_per_eg_str=frames_per_eg_str, srand=srand, data=data, alidir=alidir, egs_dir=egs_dir, diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py index 3f6e85e3644..037abeb1dd8 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/raw_model.py @@ -20,6 +20,7 @@ def generate_egs_using_targets(data, targets_scp, egs_dir, left_context, right_context, run_opts, stage=0, + left_context_initial=-1, right_context_final=-1, feat_type='raw', online_ivector_dir=None, target_type='dense', num_targets=-1, samples_per_iter=20000, frames_per_eg_str="20", @@ -54,7 +55,10 @@ def generate_egs_using_targets(data, targets_scp, egs_dir, --feat-type {feat_type} \ --transform-dir "{transform_dir}" \ --online-ivector-dir "{ivector_dir}" \ - --left-context {left_context} --right-context {right_context} \ + --left-context {left_context} \ + --right-context {right_context} \ + --left-context-initial {left_context_initial} \ + --right-context-final {right_context_final} \ --stage {stage} \ --samples-per-iter {samples_per_iter} \ --frames-per-eg {frames_per_eg_str} \ @@ -71,7 +75,10 @@ def generate_egs_using_targets(data, targets_scp, egs_dir, ivector_dir=(online_ivector_dir if online_ivector_dir is not None else ''), - left_context=left_context, right_context=right_context, + left_context=left_context, + right_context=right_context, + left_context_initial=left_context_initial, + right_context_final=right_context_final, stage=stage, samples_per_iter=samples_per_iter, frames_per_eg_str=frames_per_eg_str, srand=srand, num_targets=num_targets, diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh index 9018c2e2472..94bf322a514 100755 --- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh @@ -32,6 +32,8 @@ alignment_subsampling_factor=3 # frames-per-second of input alignments divided left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. +left_context_initial=-1 # if >=0, left-context for first chunk of an utterance +right_context_final=-1 # if >=0, right-context for last chunk of an utterance compress=true # set this to false to disable compression (e.g. if you want to see whether # results are affected). @@ -90,8 +92,10 @@ if [ $# != 4 ]; then echo " --frame-subsampling-factor # factor by which num-frames at nnet output is reduced " echo " --frames-per-eg # number of supervised frames per eg on disk" echo " --frames-overlap-per-eg # number of supervised frames of overlap between egs" - echo " --left-context # Number of frames on left side to append for feature input" - echo " --right-context # Number of frames on right side to append for feature input" + echo " --left-context # Number of frames on left side to append for feature input" + echo " --right-context # Number of frames on right side to append for feature input" + echo " --left-context-initial # If >= 0, left-context for first chunk of an utterance" + echo " --right-context-final # If >= 0, right-context for last chunk of an utterance" echo " --num-egs-diagnostic <#frames;4000> # Number of egs used in computing (train,valid) diagnostics" echo " --num-valid-egs-combine <#frames;10000> # Number of egss used in getting combination weights at the" echo " # very end." @@ -264,7 +268,9 @@ echo $egs_per_archive > $dir/info/egs_per_archive echo "$0: creating $num_archives archives, each with $egs_per_archive egs, with" echo "$0: $frames_per_eg labels per example, and (left,right) context = ($left_context,$right_context)" - +if [ $left_context_initial -ge 0 ] || [ $right_context_final -ge 0 ]; then + echo "$0: ... and (left-context-initial,right-context-final) = ($left_context_initial,$right_context_final)" +fi if [ -e $dir/storage ]; then @@ -288,6 +294,9 @@ fi egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --frame-subsampling-factor=$frame_subsampling_factor --compress=$compress" +[ $left_context_initial -ge 0 ] && egs_opts="$egs_opts --left-context-initial=$left_context_initial" +[ $right_context_final -ge 0 ] && egs_opts="$egs_opts --right-context-final=$right_context_final" + chain_supervision_all_opts="--lattice-input=true --frame-subsampling-factor=$alignment_subsampling_factor" [ ! -z $right_tolerance ] && \ @@ -298,6 +307,8 @@ chain_supervision_all_opts="--lattice-input=true --frame-subsampling-factor=$ali echo $left_context > $dir/info/left_context echo $right_context > $dir/info/right_context +echo $left_context_initial > $dir/info/left_context_initial +echo $right_context_final > $dir/info/right_context_final if [ $stage -le 3 ]; then echo "$0: Getting validation and training subset examples." diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index 4f2d7105d2a..635a7eec753 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -294,6 +294,10 @@ def train(args, run_opts, background_process_handler): left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context + left_context_initial = (args.chunk_left_context_initial + model_left_context if + args.chunk_left_context_initial >= 0 else -1) + right_context_final = (args.chunk_right_context_final + model_right_context if + args.chunk_right_context_final >= 0 else -1) # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; @@ -317,8 +321,12 @@ def train(args, run_opts, background_process_handler): {dir}/init.raw""".format(command=run_opts.command, dir=args.dir)) - egs_left_context = left_context + args.frame_subsampling_factor/2 - egs_right_context = right_context + args.frame_subsampling_factor/2 + egs_left_context = left_context + args.frame_subsampling_factor / 2 + egs_right_context = right_context + args.frame_subsampling_factor / 2 + egs_left_context_initial = (left_context_initial + args.frame_subsampling_factor / 2 if + left_context_initial >= 0 else -1) + egs_right_context_final = (right_context_final + args.frame_subsampling_factor / 2 if + right_context_final >= 0 else -1) default_egs_dir = '{0}/egs'.format(args.dir) if (args.stage <= -3) and args.egs_dir is None: @@ -329,6 +337,8 @@ def train(args, run_opts, background_process_handler): lat_dir=args.lat_dir, egs_dir=default_egs_dir, left_context=egs_left_context, right_context=egs_right_context, + left_context_initial=egs_left_context_initial, + right_context_final=egs_right_context_final, run_opts=run_opts, left_tolerance=args.left_tolerance, right_tolerance=args.right_tolerance, @@ -351,7 +361,9 @@ def train(args, run_opts, background_process_handler): [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, - egs_left_context, egs_right_context)) + egs_left_context, egs_right_context, + egs_left_context_initial, + egs_right_context_final) assert(args.chunk_width == frames_per_eg_str) num_archives_expanded = num_archives * args.frame_subsampling_factor diff --git a/egs/wsj/s5/steps/nnet3/get_egs.sh b/egs/wsj/s5/steps/nnet3/get_egs.sh index e442dce9032..6622f3632f7 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs.sh @@ -24,6 +24,8 @@ frames_per_eg=8 # number of frames of labels per example. more->less disk spa left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. +left_context_initial=-1 # if >=0, left-context for first chunk of an utterance +right_context_final=-1 # if >=0, right-context for last chunk of an utterance compress=true # set this to false to disable compression (e.g. if you want to see whether # results are affected). @@ -74,8 +76,10 @@ if [ $# != 3 ]; then echo " # frames-per-eg is the chunk size, to get variety of chunk" echo " # sizes). The first in the list is preferred and is used" echo " # when working out the number of archives etc." - echo " --left-context # Number of frames on left side to append for feature input" - echo " --right-context # Number of frames on right side to append for feature input" + echo " --left-context # Number of frames on left side to append for feature input" + echo " --right-context # Number of frames on right side to append for feature input" + echo " --left-context-initial # If >= 0, left-context for first chunk of an utterance" + echo " --right-context-final # If >= 0, right-context for last chunk of an utterance" echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics" echo " --num-valid-frames-combine <#frames;10000> # Number of frames used in getting combination weights at the" echo " # very end." @@ -245,6 +249,9 @@ echo $egs_per_archive > $dir/info/egs_per_archive echo "$0: creating $num_archives archives, each with $egs_per_archive egs, with" echo "$0: $frames_per_eg labels per example, and (left,right) context = ($left_context,$right_context)" +if [ $left_context_initial -ge 0 ] || [ $right_context_final -ge 0 ]; then + echo "$0: ... and (left-context-initial,right-context-final) = ($left_context_initial,$right_context_final)" +fi @@ -265,9 +272,15 @@ if [ $stage -le 2 ]; then fi egs_opts="--left-context=$left_context --right-context=$right_context --compress=$compress --num-frames=$frames_per_eg" +[ $left_context_initial -ge 0 ] && egs_opts="$egs_opts --left-context-initial=$left_context_initial" +[ $right_context_final -ge 0 ] && egs_opts="$egs_opts --right-context-final=$right_context_final" echo $left_context > $dir/info/left_context echo $right_context > $dir/info/right_context +echo $left_context_initial > $dir/info/left_context_initial +echo $right_context_final > $dir/info/right_context_final + + num_pdfs=$(tree-info --print-args=false $alidir/tree | grep num-pdfs | awk '{print $2}') if [ $stage -le 3 ]; then echo "$0: Getting validation and training subset examples." diff --git a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh index 243a98360e8..d746c895c18 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh @@ -20,6 +20,8 @@ frame_subsampling_factor=1 # ratio between input and output frame-rate of nnet. left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. +left_context_initial=-1 # if >=0, left-context for first chunk of an utterance +right_context_final=-1 # if >=0, right-context for last chunk of an utterance adjust_priors=true compress=true # set this to false to disable compression (e.g. if you want to see whether # results are affected). @@ -75,6 +77,10 @@ if [ $# != 6 ]; then echo " # the middle." echo " --online-ivector-dir # Directory for online-estimated iVectors, used in the" echo " # online-neural-net setup." + echo " --left-context # Number of frames on left side to append for feature input" + echo " --right-context # Number of frames on right side to append for feature input" + echo " --left-context-initial # If >= 0, left-context for first chunk of an utterance" + echo " --right-context-final # If >= 0, right-context for last chunk of an utterance" exit 1; fi @@ -250,6 +256,9 @@ echo $egs_per_archive > $dir/info/egs_per_archive echo "$0: creating $num_archives archives, each with $egs_per_archive egs, with" echo "$0: $frames_per_eg labels per example, and (left,right) context = ($left_context,$right_context)" +if [ $left_context_initial -ge 0 ] || [ $right_context_final -ge 0 ]; then + echo "$0: ... and (left-context-initial,right-context-final) = ($left_context_initial,$right_context_final)" +fi if [ -e $dir/storage ]; then @@ -274,21 +283,36 @@ fi splitter_opts="--supervision-splitter.determinize=$determinize --supervision-splitter.minimize=$minimize --supervision-splitter.remove_output_symbols=$remove_output_symbols --supervision-splitter.remove_epsilons=$remove_epsilons --supervision-splitter.collapse-transition-ids=$collapse_transition_ids --supervision-splitter.acoustic-scale=$acwt" + +# If frame_subsampling_factor > 0, we will later be shifting the egs slightly to +# the left or right as part of training, so we see (e.g.) all shifts of the data +# modulo 3... we need to extend the l/r context slightly to account for this, to +# ensure we see the entire context that the model requires. left_context=$[left_context+frame_subsampling_factor/2] right_context=$[right_context+frame_subsampling_factor/2] +[ $left_context_initial -ge 0 ] && left_context_initial=$[left_context_initial+frame_subsampling_factor/2] +[ $right_context_final -ge 0 ] && right_context_final=$[right_context_final+frame_subsampling_factor/2] egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --compress=$compress --frame-subsampling-factor=$frame_subsampling_factor $splitter_opts" +[ $left_context_initial -ge 0 ] && egs_opts="$egs_opts --left-context-initial=$left_context_initial" +[ $right_context_final -ge 0 ] && egs_opts="$egs_opts --right-context-final=$right_context_final" + # don't do the overlap thing for the priors computation data-- but do use the # same num-frames for the eg, which would be much more efficient in case it's a # recurrent model and has a lot of frames of context. In any case we're not # doing SGD so there is no benefit in having short chunks. priors_egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --compress=$compress" +[ $left_context_initial -ge 0 ] && priors_egs_opts="$priors_egs_opts --left-context-initial=$left_context_initial" +[ $right_context_final -ge 0 ] && priors_egs_opts="$priors_egs_opts --right-context-final=$right_context_final" + supervision_all_opts="--frame-subsampling-factor=$frame_subsampling_factor" echo $left_context > $dir/info/left_context echo $right_context > $dir/info/right_context +echo $left_context_initial > $dir/info/left_context_initial +echo $right_context_final > $dir/info/right_context_final echo $frame_subsampling_factor > $dir/info/frame_subsampling_factor diff --git a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh index eeac84db969..7bd8fa5f983 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh @@ -32,6 +32,8 @@ frames_per_eg=8 # number of frames of labels per example. more->less disk spa left_context=4 # amount of left-context per eg (i.e. extra frames of input features # not present in the output supervision). right_context=4 # amount of right-context per eg. +left_context_initial=-1 # if >=0, left-context for first chunk of an utterance +right_context_final=-1 # if >=0, right-context for last chunk of an utterance compress=true # set this to false to disable compression (e.g. if you want to see whether # results are affected). num_utts_subset=300 # number of utterances in validation and training @@ -81,8 +83,10 @@ if [ $# != 3 ]; then echo " # frames-per-eg is the chunk size, to get variety of chunk" echo " # sizes). The first in the list is preferred and is used" echo " # when working out the number of archives etc." - echo " --left-context # Number of frames on left side to append for feature input" - echo " --right-context # Number of frames on right side to append for feature input" + echo " --left-context # Number of frames on left side to append for feature input" + echo " --right-context # Number of frames on right side to append for feature input" + echo " --left-context-initial # If >= 0, left-context for first chunk of an utterance" + echo " --right-context-final # If >= 0, right-context for last chunk of an utterance" echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics" echo " --num-valid-frames-combine <#frames;10000> # Number of frames used in getting combination weights at the" echo " # very end." @@ -238,6 +242,9 @@ echo $egs_per_archive > $dir/info/egs_per_archive echo "$0: creating $num_archives archives, each with $egs_per_archive egs, with" echo "$0: $frames_per_eg labels per example, and (left,right) context = ($left_context,$right_context)" +if [ $left_context_initial -ge 0 ] || [ $right_context_final -ge 0 ]; then + echo "$0: ... and (left-context-initial,right-context-final) = ($left_context_initial,$right_context_final)" +fi @@ -252,9 +259,13 @@ if [ -e $dir/storage ]; then fi egs_opts="--left-context=$left_context --right-context=$right_context --compress=$compress --num-frames=$frames_per_eg" +[ $left_context_initial -ge 0 ] && egs_opts="$egs_opts --left-context-initial=$left_context_initial" +[ $right_context_final -ge 0 ] && egs_opts="$egs_opts --right-context-final=$right_context_final" echo $left_context > $dir/info/left_context echo $right_context > $dir/info/right_context +echo $left_context_initial > $dir/info/left_context_initial +echo $right_context_final > $dir/info/right_context_final for n in `seq $nj`; do utils/filter_scp.pl $sdata/$n/utt2spk $targets_scp > $dir/targets.$n.scp diff --git a/egs/wsj/s5/steps/nnet3/lstm/make_configs.py b/egs/wsj/s5/steps/nnet3/lstm/make_configs.py index 205b6034fad..b80a8d4045b 100755 --- a/egs/wsj/s5/steps/nnet3/lstm/make_configs.py +++ b/egs/wsj/s5/steps/nnet3/lstm/make_configs.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +# This script is deprecated, please use ../xconfig_to_configs.py + from __future__ import print_function import os import argparse diff --git a/egs/wsj/s5/steps/nnet3/make_jesus_configs.py b/egs/wsj/s5/steps/nnet3/make_jesus_configs.py index 7f3aba2328c..b442ce9715b 100755 --- a/egs/wsj/s5/steps/nnet3/make_jesus_configs.py +++ b/egs/wsj/s5/steps/nnet3/make_jesus_configs.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +# This script is deprecated, please use ./xconfig_to_configs.py + # tdnn or RNN with 'jesus layer' # inputs to jesus layer: diff --git a/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py b/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py index 8403c273a9d..d88355befae 100644 --- a/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py +++ b/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py @@ -1,11 +1,12 @@ #!/usr/bin/env python +# This script is deprecated, please use ../xconfig_to_configs.py + # we're using python 3.x style print but want it to work in python 2.x, from __future__ import print_function import re, os, argparse, sys, math, warnings - parser = argparse.ArgumentParser(description="Writes config files and variables " "for TDNNs creation and training", epilog="See steps/nnet3/train_tdnn.sh for example."); diff --git a/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py b/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py index 48c13a1236c..5445b16e165 100755 --- a/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py +++ b/egs/wsj/s5/steps/nnet3/tdnn/make_configs.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +# This script is deprecated, please use ../xconfig_to_configs.py + # we're using python 3.x style print but want it to work in python 2.x, from __future__ import print_function import os diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index 3270d114503..b5ed26499a4 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -36,19 +36,18 @@ def get_args(): """ Get args from stdin. - We add compulsary arguments as named arguments for readability + We add compulsory arguments as named arguments for readability The common options are defined in the object libs.nnet3.train.common.CommonParser.parser. See steps/libs/nnet3/train/common.py """ - parser = argparse.ArgumentParser( description="""Trains a feed forward DNN acoustic model using the cross-entropy objective. DNNs include simple DNNs, TDNNs and CNNs.""", formatter_class=argparse.ArgumentDefaultsHelpFormatter, conflict_handler='resolve', - parents=[common_train_lib.CommonParser().parser]) + parents=[common_train_lib.CommonParser(include_chunk_context = False).parser]) # egs extraction options parser.add_argument("--egs.frames-per-eg", type=int, dest='frames_per_eg', @@ -191,8 +190,8 @@ def train(args, run_opts, background_process_handler): raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) - left_context = args.chunk_left_context + model_left_context - right_context = args.chunk_right_context + model_right_context + left_context = model_left_context + right_context = model_right_context # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index b853d77cb27..a26e0aa75cf 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -47,7 +47,7 @@ def get_args(): DNNs include simple DNNs, TDNNs and CNNs.""", formatter_class=argparse.ArgumentDefaultsHelpFormatter, conflict_handler='resolve', - parents=[common_train_lib.CommonParser().parser]) + parents=[common_train_lib.CommonParser(include_chunk_context = False).parser]) # egs extraction options parser.add_argument("--egs.frames-per-eg", type=int, dest='frames_per_eg', @@ -184,8 +184,8 @@ def train(args, run_opts, background_process_handler): raise Exception("KeyError {0}: Variables need to be defined in " "{1}".format(str(e), '{0}/configs'.format(args.dir))) - left_context = args.chunk_left_context + model_left_context - right_context = args.chunk_right_context + model_right_context + left_context = model_left_context + right_context = model_right_context # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index fe0839d9ac0..d0fb33a54c5 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -55,7 +55,7 @@ def get_args(): 3. RNNs can also be trained with state preservation training""", formatter_class=argparse.ArgumentDefaultsHelpFormatter, conflict_handler='resolve', - parents=[common_train_lib.CommonParser().parser]) + parents=[common_train_lib.CommonParser(default_chunk_left_context=40).parser]) # egs extraction options parser.add_argument("--egs.chunk-width", type=str, dest='chunk_width', @@ -65,11 +65,6 @@ def get_args(): should halve --trainer.samples-per-iter. May be a comma-separated list of alternatives: first width is the 'principal' chunk-width, used preferentially""") - parser.add_argument("--egs.chunk-left-context", type=int, - dest='chunk_left_context', default=40, - help="""Number of left steps used in the estimation of - LSTM state before prediction of the first label. - Overrides the default value in CommonParser""") # trainer options parser.add_argument("--trainer.samples-per-iter", type=int, @@ -256,6 +251,10 @@ def train(args, run_opts, background_process_handler): left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context + left_context_initial = (args.chunk_left_context_initial + model_left_context if + args.chunk_left_context_initial >= 0 else -1) + right_context_final = (args.chunk_right_context_final + model_right_context if + args.chunk_right_context_final >= 0 else -1) # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; @@ -296,7 +295,10 @@ def train(args, run_opts, background_process_handler): train_lib.raw_model.generate_egs_using_targets( data=args.feat_dir, targets_scp=args.targets_scp, egs_dir=default_egs_dir, - left_context=left_context, right_context=right_context, + left_context=left_context, + right_context=right_context, + left_context_initial=left_context_initial, + right_context_final=right_context_final, run_opts=run_opts, frames_per_eg_str=args.chunk_width, srand=args.srand, diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index 324e7ba3357..2211e826088 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -56,7 +56,7 @@ def get_args(): 3. RNNs can also be trained with state preservation training""", formatter_class=argparse.ArgumentDefaultsHelpFormatter, conflict_handler='resolve', - parents=[common_train_lib.CommonParser().parser]) + parents=[common_train_lib.CommonParser(default_chunk_left_context = 40).parser]) # egs extraction options parser.add_argument("--egs.chunk-width", type=str, dest='chunk_width', @@ -66,11 +66,6 @@ def get_args(): should halve --trainer.samples-per-iter. May be a comma-separated list of alternatives: first width is the 'principal' chunk-width, used preferentially""") - parser.add_argument("--egs.chunk-left-context", type=int, - dest='chunk_left_context', default=40, - help="""Number of left steps used in the estimation of - LSTM state before prediction of the first label""") - parser.add_argument("--trainer.samples-per-iter", type=int, dest='samples_per_iter', default=20000, help="""This is really the number of egs in each @@ -261,6 +256,10 @@ def train(args, run_opts, background_process_handler): left_context = args.chunk_left_context + model_left_context right_context = args.chunk_right_context + model_right_context + left_context_initial = (args.chunk_left_context_initial + model_left_context if + args.chunk_left_context_initial >= 0 else -1) + right_context_final = (args.chunk_right_context_final + model_right_context if + args.chunk_right_context_final >= 0 else -1) # Initialize as "raw" nnet, prior to training the LDA-like preconditioning # matrix. This first config just does any initial splicing that we do; @@ -281,8 +280,12 @@ def train(args, run_opts, background_process_handler): logger.info("Generating egs") train_lib.acoustic_model.generate_egs( - data=args.feat_dir, alidir=args.ali_dir, egs_dir=default_egs_dir, - left_context=left_context, right_context=right_context, + data=args.feat_dir, alidir=args.ali_dir, + egs_dir=default_egs_dir, + left_context=left_context, + right_context=right_context, + left_context_initial=left_context_initial, + right_context_final=right_context_final, run_opts=run_opts, frames_per_eg_str=args.chunk_width, srand=args.srand, @@ -301,7 +304,8 @@ def train(args, run_opts, background_process_handler): [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, - left_context, right_context)) + left_context, right_context, + left_context_initial, right_context_final)) if args.chunk_width != frames_per_eg_str: raise Exception("mismatch between --egs.chunk-width and the frames_per_eg " "in the egs dir {0} vs {1}".(args.chunk_width, frames_per_eg_str)) From b50f7f629e9afc1ff9d01c1885a00efd5bc61697 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 31 Dec 2016 00:07:29 -0500 Subject: [PATCH 176/530] Add diagnostics to UtteranceSplitter; Various fixes. --- egs/wsj/s5/steps/libs/nnet3/train/common.py | 7 +- egs/wsj/s5/steps/nnet3/chain/train.py | 2 +- src/chainbin/nnet3-chain-get-egs.cc | 53 ++--- src/nnet3/nnet-chain-example.h | 2 +- src/nnet3/nnet-discriminative-example.h | 2 +- src/nnet3/nnet-example-utils.cc | 75 +++++- src/nnet3/nnet-example-utils.h | 35 ++- src/nnet3bin/nnet3-discriminative-get-egs.cc | 48 ++-- src/nnet3bin/nnet3-get-egs-dense-targets.cc | 228 ++++++++++--------- src/nnet3bin/nnet3-get-egs.cc | 44 ++-- 10 files changed, 286 insertions(+), 210 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index 60bd2e69d5a..1edaf3972cb 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -543,10 +543,11 @@ class CommonParser: in steps/nnet3/train*.py and steps/nnet3/chain/train.py """ - parser = argparse.ArgumentParser(add_help=False, - default_chunk_left_context=0) + parser = argparse.ArgumentParser(add_help=False) - def __init__(self, include_chunk_context = True): + def __init__(self, + include_chunk_context = True, + default_chunk_left_context=0): # feat options self.parser.add_argument("--feat.online-ivector-dir", type=str, dest='online_ivector_dir', default=None, diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index 635a7eec753..ba661847561 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -363,7 +363,7 @@ def train(args, run_opts, background_process_handler): common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, egs_left_context, egs_right_context, egs_left_context_initial, - egs_right_context_final) + egs_right_context_final)) assert(args.chunk_width == frames_per_eg_str) num_archives_expanded = num_archives * args.frame_subsampling_factor diff --git a/src/chainbin/nnet3-chain-get-egs.cc b/src/chainbin/nnet3-chain-get-egs.cc index 2a8f5a1c6ad..bf1e87d2452 100644 --- a/src/chainbin/nnet3-chain-get-egs.cc +++ b/src/chainbin/nnet3-chain-get-egs.cc @@ -45,20 +45,19 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst, const chain::Supervision &supervision, const std::string &utt_id, bool compress, - const UtteranceSplitter &utt_splitter, - int64 *num_frames_written, - int64 *num_egs_written, + UtteranceSplitter *utt_splitter, NnetChainExampleWriter *example_writer) { - bool ans = true; KALDI_ASSERT(supervision.num_sequences == 1); int32 num_input_frames = feats.NumRows(), num_output_frames = supervision.frames_per_sequence; - if (!utt_splitter.LengthsMatch(utt_id, num_input_frames, num_output_frames)) + if (!utt_splitter->LengthsMatch(utt_id, num_input_frames, num_output_frames)) return false; // LengthsMatch() will have printed a warning. std::vector chunks; + utt_splitter->GetChunksForUtterance(num_input_frames, &chunks); + if (chunks.empty()) { KALDI_WARN << "Not producing egs for utterance " << utt_id << " because it is too short: " @@ -66,11 +65,9 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst, return false; } - int32 frame_subsampling_factor = utt_splitter.Config().frame_subsampling_factor; - - utt_splitter.GetChunksForUtterance(num_input_frames, &chunks); + int32 frame_subsampling_factor = utt_splitter->Config().frame_subsampling_factor; - chain::SupervisionSplitter splitter(supervision); + chain::SupervisionSplitter sup_splitter(supervision); for (size_t c = 0; c < chunks.size(); c++) { ChunkTimeInfo &chunk = chunks[c]; @@ -79,9 +76,9 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst, num_frames_subsampled = chunk.num_frames / frame_subsampling_factor; chain::Supervision supervision_part; - splitter.GetFrameRange(start_frame_subsampled, - num_frames_subsampled, - &supervision_part); + sup_splitter.GetFrameRange(start_frame_subsampled, + num_frames_subsampled, + &supervision_part); if (normalization_fst.NumStates() > 0 && !AddWeightToSupervisionFst(normalization_fst, @@ -91,7 +88,6 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst, << (chunk.first_frame + chunk.num_frames) << ", FST was empty after composing with normalization FST. " << "This should be extremely rare (a few per corpus, at most)"; - ans = false; } int32 first_frame = 0; // we shift the time-indexes of all these parts so @@ -154,12 +150,9 @@ static bool ProcessFile(const fst::StdVectorFst &normalization_fst, std::string key = os.str(); // key is - - *num_frames_written += chunk.num_frames; - *num_egs_written += 1; - example_writer->Write(key, nnet_chain_eg); } - return ans; + return true; } } // namespace nnet2 @@ -256,8 +249,7 @@ int main(int argc, char *argv[]) { RandomAccessBaseFloatMatrixReader online_ivector_reader( online_ivector_rspecifier); - int32 num_done = 0, num_err = 0; - int64 num_frames_written = 0, num_egs_written = 0; + int32 num_err = 0; for (; !feat_reader.Done(); feat_reader.Next()) { std::string key = feat_reader.Key(); @@ -290,23 +282,18 @@ int main(int argc, char *argv[]) { continue; } - if (ProcessFile(normalization_fst, feats, - online_ivector_feats, online_ivector_period, - supervision, key, compress, utt_splitter, - &num_frames_written, &num_egs_written, - &example_writer)) - num_done++; - else + if (!ProcessFile(normalization_fst, feats, + online_ivector_feats, online_ivector_period, + supervision, key, compress, + &utt_splitter, &example_writer)) num_err++; } } - - KALDI_LOG << "Finished generating nnet3-chain examples, " - << "successfully processed " << num_done - << " feature files, wrote " << num_egs_written << " examples, " - << " with " << num_frames_written << " frames in total; " - << num_err << " files had errors."; - return (num_egs_written == 0 || num_err > num_done ? 1 : 0); + if (num_err > 0) + KALDI_WARN << num_err << " utterances had errors and could " + "not be processed."; + // utt_splitter prints stats in its destructor. + return utt_splitter.ExitStatus(); } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; diff --git a/src/nnet3/nnet-chain-example.h b/src/nnet3/nnet-chain-example.h index 87b2de77897..24e68116193 100644 --- a/src/nnet3/nnet-chain-example.h +++ b/src/nnet3/nnet-chain-example.h @@ -268,7 +268,7 @@ class ChainExampleMerger { int32 num_egs_written_; const ExampleMergingConfig &config_; NnetChainExampleWriter *writer_; - ExampleSizeStats stats_; + ExampleMergingStats stats_; // Note: the "key" into the egs is the first element of the vector. typedef unordered_map 1) { + std::ostringstream os; + os << std::setprecision(4); + for (std::map::iterator iter = chunk_size_to_count_.begin(); + iter != chunk_size_to_count_.end(); ++iter) { + int32 chunk_size = iter->first, + num_frames = chunk_size * iter->second; + float percent_of_total = num_frames * 100.0 / total_frames_in_chunks_; + if (iter != chunk_size_to_count_.begin()) os << ", "; + os << chunk_size << " = " << percent_of_total << "%"; + } + KALDI_LOG << "Output frames are distributed among chunk-sizes as follows: " + << os.str(); + } +} + float UtteranceSplitter::DefaultDurationOfSplit( const std::vector &split) const { if (split.empty()) // not a valid split, but useful to handle this case. @@ -761,7 +795,7 @@ void UtteranceSplitter::GetGapSizes(int32 utterance_length, void UtteranceSplitter::GetChunksForUtterance( int32 utterance_length, - std::vector *chunk_info) const { + std::vector *chunk_info) { std::vector chunk_sizes; GetChunkSizesForUtterance(utterance_length, &chunk_sizes); std::vector gaps(chunk_sizes.size()); @@ -780,12 +814,39 @@ void UtteranceSplitter::GetChunksForUtterance( config_.right_context_final : config_.right_context); t += chunk_sizes[i]; } + AccStatsForUtterance(utterance_length, *chunk_info); // check that the end of the last chunk doesn't go more than // 'config_.frame_subsampling_factor - 1' frames past the end // of the utterance. That amount, we treat as rounding error. KALDI_ASSERT(t - utterance_length < config_.frame_subsampling_factor); } +void UtteranceSplitter::AccStatsForUtterance( + int32 utterance_length, + const std::vector &chunk_info) { + total_num_utterances_ += 1; + total_input_frames_ += utterance_length; + + for (size_t c = 0; c < chunk_info.size(); c++) { + int32 chunk_size = chunk_info[c].num_frames; + if (c > 0) { + int32 last_chunk_end = chunk_info[c-1].first_frame + + chunk_info[c-1].num_frames; + if (last_chunk_end > chunk_info[c].first_frame) + total_frames_overlap_ += last_chunk_end - chunk_info[c].first_frame; + } + std::map::iterator iter = chunk_size_to_count_.find( + chunk_size); + if (iter == chunk_size_to_count_.end()) + chunk_size_to_count_[chunk_size] = 1; + else + iter->second++; + total_num_chunks_ += 1; + total_frames_in_chunks_ += chunk_size; + } +} + + void UtteranceSplitter::SetOutputWeights( int32 utterance_length, std::vector *chunk_info) const { @@ -951,7 +1012,7 @@ int32 ExampleMergingConfig::MinibatchSize(int32 size_of_eg, } -void ExampleSizeStats::WroteExample(int32 example_size, +void ExampleMergingStats::WroteExample(int32 example_size, size_t structure_hash, int32 minibatch_size) { std::pair p(example_size, structure_hash); @@ -965,7 +1026,7 @@ void ExampleSizeStats::WroteExample(int32 example_size, iter->second += 1; } -void ExampleSizeStats::DiscardedExamples(int32 example_size, +void ExampleMergingStats::DiscardedExamples(int32 example_size, size_t structure_hash, int32 num_discarded) { std::pair p(example_size, structure_hash); @@ -973,12 +1034,12 @@ void ExampleSizeStats::DiscardedExamples(int32 example_size, } -void ExampleSizeStats::PrintStats() const { +void ExampleMergingStats::PrintStats() const { PrintAggregateStats(); PrintSpecificStats(); } -void ExampleSizeStats::PrintAggregateStats() const { +void ExampleMergingStats::PrintAggregateStats() const { // First print some aggregate stats. int64 num_distinct_egs_types = 0, // number of distinct types of input egs // (differing in size or structure). @@ -1042,7 +1103,7 @@ void ExampleSizeStats::PrintAggregateStats() const { KALDI_LOG << os.str(); } -void ExampleSizeStats::PrintSpecificStats() const { +void ExampleMergingStats::PrintSpecificStats() const { KALDI_LOG << "Merged specific eg types as follows [format: =" "{->,->.../d=}" ",={...},... (note,egs-size == number of input " diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h index 46d6906ff99..66624f69004 100644 --- a/src/nnet3/nnet-example-utils.h +++ b/src/nnet3/nnet-example-utils.h @@ -180,8 +180,9 @@ class UtteranceSplitter { // Given an utterance length, this function creates for you a list of chunks // into which to split the utterance. Note: this is partly random (will call // srand()). + // Accumulates some stats which will be printed out in the destructor. void GetChunksForUtterance(int32 utterance_length, - std::vector *chunk_info) const; + std::vector *chunk_info); // This function returns true if 'supervision_length' (e.g. the length of the @@ -194,6 +195,9 @@ class UtteranceSplitter { int32 utterance_length, int32 supervision_length) const; + ~UtteranceSplitter(); + + int32 ExitStatus() { return (total_frames_in_chunks_ > 0); } private: @@ -250,7 +254,6 @@ class UtteranceSplitter { const std::vector &chunk_sizes, std::vector *gap_sizes) const; - // this static function, used in GetGapSizes(), writes random values to a // vector 'vec' such the sum of those values equals n (n may be positive or // negative). It tries to make those values as similar as possible (they will @@ -270,9 +273,13 @@ class UtteranceSplitter { // This function is responsible for setting the 'output_weights' // members of the chunks. - void SetOutputWeights(int32 utterance_lengths, + void SetOutputWeights(int32 utterance_length, std::vector *chunk_info) const; + // Accumulate stats for diagnostics. + void AccStatsForUtterance(int32 utterance_length, + const std::vector &chunk_info); + const ExampleGenerationConfig &config_; @@ -295,6 +302,21 @@ class UtteranceSplitter { // chunks, and then add the subtracted number of copies of the primary // num-frames to the split. std::vector > > splits_for_length_; + + // Below are stats used for diagnostics. + int32 total_num_utterances_; // total input utterances. + int64 total_input_frames_; // total num-frames over all utterances (before + // splitting) + int64 total_frames_overlap_; // total number of frames that overlap between + // adjacent egs. + int64 total_num_chunks_; + int64 total_frames_in_chunks_; // total of chunk-size times count of that + // chunk. equals the num-frames in all the + // output chunks, added up. + std::map chunk_size_to_count_; // for each chunk size, gives + // the number of chunks with + // that size. + }; @@ -403,9 +425,8 @@ int32 GetNnetExampleSize(const NnetExample &a); /// statistics about how examples of different sizes (c.f. GetNnetExampleSize()) /// were merged into minibatches, and how many examples were left over and /// discarded. -class ExampleSizeStats { +class ExampleMergingStats { public: - /// Users call this function to inform this class that one minibatch has been /// written aggregating 'minibatch_size' separate examples of original size /// 'example_size' (e.g. as determined by GetNnetExampleSize(), but the caller @@ -475,7 +496,7 @@ class ExampleMerger { void Finish(); // returns a suitable exit status for a program. - bool ExitStatus() { return num_egs_written_ > 0; } + bool ExitStatus() { return num_egs_written_ > 0 ? 0 : 1; } ~ExampleMerger() { Finish(); }; private: @@ -487,7 +508,7 @@ class ExampleMerger { int32 num_egs_written_; const ExampleMergingConfig &config_; NnetExampleWriter *writer_; - ExampleSizeStats stats_; + ExampleMergingStats stats_; // Note: the "key" into the egs is the first element of the vector. typedef unordered_map, diff --git a/src/nnet3bin/nnet3-discriminative-get-egs.cc b/src/nnet3bin/nnet3-discriminative-get-egs.cc index 6055dc3d20c..070a88b331d 100644 --- a/src/nnet3bin/nnet3-discriminative-get-egs.cc +++ b/src/nnet3bin/nnet3-discriminative-get-egs.cc @@ -34,6 +34,8 @@ namespace nnet3 { // This function does all the processing for one utterance, and outputs the // examples to 'example_writer'. +// returns true if we got as far as calling GetChunksForUtterance() +// [in which case stats will be accumulated by class UtteranceSplitter] static bool ProcessFile(const discriminative::SplitDiscriminativeSupervisionOptions &config, const TransitionModel &tmodel, const MatrixBase &feats, @@ -42,28 +44,26 @@ static bool ProcessFile(const discriminative::SplitDiscriminativeSupervisionOpti const discriminative::DiscriminativeSupervision &supervision, const std::string &utt_id, bool compress, - const UtteranceSplitter &utt_splitter, - int64 *num_frames_written, - int64 *num_egs_written, + UtteranceSplitter *utt_splitter, NnetDiscriminativeExampleWriter *example_writer) { KALDI_ASSERT(supervision.num_sequences == 1); int32 num_input_frames = feats.NumRows(), num_output_frames = supervision.frames_per_sequence; - if (!utt_splitter.LengthsMatch(utt_id, num_input_frames, num_output_frames)) + if (!utt_splitter->LengthsMatch(utt_id, num_input_frames, num_output_frames)) return false; // LengthsMatch() will have printed a warning. std::vector chunks; + utt_splitter->GetChunksForUtterance(num_input_frames, &chunks); + if (chunks.empty()) { KALDI_WARN << "Not producing egs for utterance " << utt_id << " because it is too short: " << num_input_frames << " frames."; } - int32 frame_subsampling_factor = utt_splitter.Config().frame_subsampling_factor; - - utt_splitter.GetChunksForUtterance(num_input_frames, &chunks); + int32 frame_subsampling_factor = utt_splitter->Config().frame_subsampling_factor; discriminative::DiscriminativeSupervisionSplitter splitter(config, tmodel, supervision); @@ -143,9 +143,6 @@ static bool ProcessFile(const discriminative::SplitDiscriminativeSupervisionOpti std::string key = os.str(); // key is - - *num_frames_written += chunk.num_frames; - *num_egs_written += 1; - example_writer->Write(key, nnet_discriminative_eg); } return true; @@ -187,6 +184,8 @@ int main(int argc, char *argv[]) { discriminative::SplitDiscriminativeSupervisionOptions splitter_config; ParseOptions po(usage); + + eg_config.Register(&po); po.Register("compress", &compress, "If true, write egs in " "compressed format (recommended)"); po.Register("ivectors", &online_ivector_rspecifier, "Alias for --online-ivectors " @@ -198,7 +197,7 @@ int main(int argc, char *argv[]) { "option"); po.Register("length-tolerance", &length_tolerance, "Tolerance for " "difference in num-frames between feat and ivector matrices"); - eg_config.Register(&po); + ParseOptions splitter_opts("supervision-splitter", &po); splitter_config.Register(&splitter_opts); @@ -236,8 +235,7 @@ int main(int argc, char *argv[]) { RandomAccessBaseFloatMatrixReader online_ivector_reader( online_ivector_rspecifier); - int32 num_done = 0, num_err = 0; - int64 num_frames_written = 0, num_egs_written = 0; + int32 num_err = 0; for (; !feat_reader.Done(); feat_reader.Next()) { std::string key = feat_reader.Key(); @@ -269,22 +267,18 @@ int main(int argc, char *argv[]) { num_err++; continue; } - - if (ProcessFile(splitter_config, tmodel, - feats, online_ivector_feats, online_ivector_period, - supervision, key, compress, utt_splitter, - &num_frames_written, &num_egs_written, - &example_writer)) num_done++; - else num_err++; + if (!ProcessFile(splitter_config, tmodel, + feats, online_ivector_feats, online_ivector_period, + supervision, key, compress, + &utt_splitter, &example_writer)) + num_err++; } } - - KALDI_LOG << "Finished generating nnet3-discriminative examples, " - << "successfully processed " << num_done - << " feature files, wrote " << num_egs_written << " examples, " - << " with " << num_frames_written << " frames in total; " - << num_err << " files had errors."; - return (num_egs_written == 0 || num_err > num_done ? 1 : 0); + if (num_err > 0) + KALDI_WARN << num_err << " utterances had errors and could " + "not be processed."; + // utt_splitter prints diagnostics. + return utt_splitter.ExitStatus(); } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; diff --git a/src/nnet3bin/nnet3-get-egs-dense-targets.cc b/src/nnet3bin/nnet3-get-egs-dense-targets.cc index 23bf8922a5b..ddcf5f23555 100644 --- a/src/nnet3bin/nnet3-get-egs-dense-targets.cc +++ b/src/nnet3bin/nnet3-get-egs-dense-targets.cc @@ -25,6 +25,7 @@ #include "hmm/transition-model.h" #include "hmm/posterior.h" #include "nnet3/nnet-example.h" +#include "nnet3/nnet-example-utils.h" namespace kaldi { namespace nnet3 { @@ -32,101 +33,121 @@ namespace nnet3 { static void ProcessFile(const MatrixBase &feats, const MatrixBase *ivector_feats, + int32 ivector_period, const MatrixBase &targets, const std::string &utt_id, bool compress, int32 num_targets, - int32 left_context, - int32 right_context, - int32 frames_per_eg, - int64 *num_frames_written, - int64 *num_egs_written, + UtteranceSplitter *utt_splitter, NnetExampleWriter *example_writer) { - KALDI_ASSERT(feats.NumRows() == static_cast(targets.NumRows())); - - for (int32 t = 0; t < feats.NumRows(); t += frames_per_eg) { - - // actual_frames_per_eg is the number of frames with actual targets. - // At the end of the file, we pad with the last frame repeated - // so that all examples have the same structure (prevents the need - // for recompilations). - // TODO: We might need to ignore the end of the file. - int32 actual_frames_per_eg = std::min(frames_per_eg, - feats.NumRows() - t); - - - int32 tot_frames = left_context + frames_per_eg + right_context; - - Matrix input_frames(tot_frames, feats.NumCols()); - - // Set up "input_frames". - for (int32 j = -left_context; j < frames_per_eg + right_context; j++) { - int32 t2 = j + t; + int32 num_input_frames = feats.NumRows(); + if (!utt_splitter->LengthsMatch(utt_id, num_input_frames, + targets.NumRows())) { + if (targets.NumRows() == 0) + return; + // normally we wouldn't process such an utterance but there may be + // situations when a small disagreement is acceptable. + KALDI_WARN << " .. processing this utterance anyway."; + } + KALDI_ASSERT(num_targets < 0 || targets.NumCols() == num_targets); + + std::vector chunks; + + utt_splitter->GetChunksForUtterance(num_input_frames, &chunks); + + if (chunks.empty()) { + KALDI_WARN << "Not producing egs for utterance " << utt_id + << " because it is too short: " + << num_input_frames << " frames."; + return; + } + + // 'frame_subsampling_factor' is not used in any recipes at the time of + // writing, this is being supported to unify the code with the 'chain' recipes + // and in case we need it for some reason in future. + int32 frame_subsampling_factor = + utt_splitter->Config().frame_subsampling_factor; + + for (size_t c = 0; c < chunks.size(); c++) { + const ChunkTimeInfo &chunk = chunks[c]; + + int32 tot_input_frames = chunk.left_context + chunk.num_frames + + chunk.right_context; + + Matrix input_frames(tot_input_frames, feats.NumCols(), + kUndefined); + + int32 start_frame = chunk.first_frame - chunk.left_context; + for (int32 t = start_frame; t < start_frame + tot_input_frames; t++) { + int32 t2 = t; if (t2 < 0) t2 = 0; - if (t2 >= feats.NumRows()) t2 = feats.NumRows() - 1; + if (t2 >= num_input_frames) t2 = num_input_frames - 1; + int32 j = t - start_frame; SubVector src(feats, t2), - dest(input_frames, j + left_context); + dest(input_frames, j); dest.CopyFromVec(src); } NnetExample eg; - + // call the regular input "input". - eg.io.push_back(NnetIo("input", - left_context, - input_frames)); + eg.io.push_back(NnetIo("input", -chunk.left_context, input_frames)); - // if applicable, add the iVector feature. if (ivector_feats != NULL) { - // try to get closest frame to middle of window to get - // a representative iVector. - int32 closest_frame = t + (actual_frames_per_eg / 2); - KALDI_ASSERT(ivector_feats->NumRows() > 0); - if (closest_frame >= ivector_feats->NumRows()) - closest_frame = ivector_feats->NumRows() - 1; + // if applicable, add the iVector feature. + // choose iVector from a random frame in the chunk + int32 ivector_frame = RandInt(start_frame, + start_frame + num_input_frames - 1), + ivector_frame_subsampled = ivector_frame / ivector_period; + if (ivector_frame_subsampled < 0) + ivector_frame_subsampled = 0; + if (ivector_frame_subsampled >= ivector_feats->NumRows()) + ivector_frame_subsampled = ivector_feats->NumRows() - 1; Matrix ivector(1, ivector_feats->NumCols()); - ivector.Row(0).CopyFromVec(ivector_feats->Row(closest_frame)); + ivector.Row(0).CopyFromVec(ivector_feats->Row(ivector_frame_subsampled)); eg.io.push_back(NnetIo("ivector", 0, ivector)); } + // Note: chunk.first_frame and chunk.num_frames will both be + // multiples of frame_subsampling_factor. + // We expect frame_subsampling_factor to usually be 1 for now. + int32 start_frame_subsampled = chunk.first_frame / frame_subsampling_factor, + num_frames_subsampled = chunk.num_frames / frame_subsampling_factor; + + KALDI_ASSERT(start_frame_subsampled + num_frames_subsampled - 1 < + targets.NumRows()); + + // add the labels. - Matrix targets_dest(frames_per_eg, targets.NumCols()); - for (int32 i = 0; i < actual_frames_per_eg; i++) { + Matrix targets_part(num_frames_subsampled, targets.NumCols()); + for (int32 i = 0; i < num_frames_subsampled; i++) { // Copy the i^th row of the target matrix from the (t+i)^th row of the // input targets matrix - SubVector this_target_dest(targets_dest, i); - SubVector this_target_src(targets, t+i); - this_target_dest.CopyFromVec(this_target_src); - } - - // Copy the last frame's target to the padded frames - for (int32 i = actual_frames_per_eg; i < frames_per_eg; i++) { - // Copy the i^th row of the target matrix from the last row of the - // input targets matrix - KALDI_ASSERT(t + actual_frames_per_eg - 1 == feats.NumRows() - 1); - SubVector this_target_dest(targets_dest, i); - SubVector this_target_src(targets, t+actual_frames_per_eg-1); + int32 t = i + start_frame_subsampled; + if (t >= targets.NumRows()) + t = targets.NumRows() - 1; + SubVector this_target_dest(targets_part, i); + SubVector this_target_src(targets, t); this_target_dest.CopyFromVec(this_target_src); - } + } // push this created targets matrix into the eg - eg.io.push_back(NnetIo("output", 0, targets_dest)); - + eg.io.push_back(NnetIo("output", 0, targets_part)); + if (compress) eg.Compress(); - + std::ostringstream os; - os << utt_id << "-" << t; + os << utt_id << "-" << chunk.first_frame; std::string key = os.str(); // key is - - *num_frames_written += actual_frames_per_eg; - *num_egs_written += 1; - example_writer->Write(key, eg); } } + } // namespace nnet2 } // namespace kaldi @@ -152,29 +173,31 @@ int main(int argc, char *argv[]) { "--right-context=9 --num-frames=8 \"$feats\" \\\n" "\"ark:copy-matrix ark:exp/snrs/snr.1.ark ark:- |\"\n" " ark:- \n"; - + bool compress = true; - int32 num_targets = -1, left_context = 0, right_context = 0, - num_frames = 1, length_tolerance = 100; - - std::string ivector_rspecifier; - + int32 num_targets = -1, length_tolerance = 100, online_ivector_period = 1; + ExampleGenerationConfig eg_config; // controls num-frames, + // left/right-context, etc. + + std::string online_ivector_rspecifier; ParseOptions po(usage); + + eg_config.Register(&po); po.Register("compress", &compress, "If true, write egs in " "compressed format."); - po.Register("num-targets", &num_targets, "Number of targets for the neural network"); - po.Register("left-context", &left_context, "Number of frames of left " - "context the neural net requires."); - po.Register("right-context", &right_context, "Number of frames of right " - "context the neural net requires."); - po.Register("num-frames", &num_frames, "Number of frames with labels " - "that each example contains."); - po.Register("ivectors", &ivector_rspecifier, "Rspecifier of ivector " - "features, as matrix."); + po.Register("num-targets", &num_targets, "Output dimension in egs, " + "only used to check targets have correct dim if supplied."); + po.Register("ivectors", &online_ivector_rspecifier, "Alias for " + "--online-ivectors option, for back compatibility"); + po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier of " + "ivector features, as a matrix."); + po.Register("online-ivector-period", &online_ivector_period, "Number of " + "frames between iVectors in matrices supplied to the " + "--online-ivectors option"); po.Register("length-tolerance", &length_tolerance, "Tolerance for " "difference in num-frames between feat and ivector matrices"); - + po.Read(argc, argv); if (po.NumArgs() != 3) { @@ -182,6 +205,9 @@ int main(int argc, char *argv[]) { exit(1); } + eg_config.ComputeDerived(); + UtteranceSplitter utt_splitter(eg_config); + if (num_targets <= 0) KALDI_ERR << "--num-targets options is required."; @@ -193,11 +219,10 @@ int main(int argc, char *argv[]) { SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier); RandomAccessBaseFloatMatrixReader matrix_reader(matrix_rspecifier); NnetExampleWriter example_writer(examples_wspecifier); - RandomAccessBaseFloatMatrixReader ivector_reader(ivector_rspecifier); - - int32 num_done = 0, num_err = 0; - int64 num_frames_written = 0, num_egs_written = 0; - + RandomAccessBaseFloatMatrixReader online_ivector_reader(online_ivector_rspecifier); + + int32 num_err = 0; + for (; !feat_reader.Done(); feat_reader.Next()) { std::string key = feat_reader.Key(); const Matrix &feats = feat_reader.Value(); @@ -207,52 +232,47 @@ int main(int argc, char *argv[]) { } else { const Matrix &target_matrix = matrix_reader.Value(key); if (target_matrix.NumRows() != feats.NumRows()) { - KALDI_WARN << "Target matrix has wrong size " + KALDI_WARN << "Target matrix has wrong size " << target_matrix.NumRows() << " versus " << feats.NumRows(); num_err++; continue; } - const Matrix *ivector_feats = NULL; - if (!ivector_rspecifier.empty()) { - if (!ivector_reader.HasKey(key)) { + const Matrix *online_ivector_feats = NULL; + if (!online_ivector_rspecifier.empty()) { + if (!online_ivector_reader.HasKey(key)) { KALDI_WARN << "No iVectors for utterance " << key; num_err++; continue; } else { // this address will be valid until we call HasKey() or Value() // again. - ivector_feats = &(ivector_reader.Value(key)); + online_ivector_feats = &(online_ivector_reader.Value(key)); } } - if (ivector_feats != NULL && - (abs(feats.NumRows() - ivector_feats->NumRows()) > length_tolerance - || ivector_feats->NumRows() == 0)) { + if (online_ivector_feats != NULL && + (abs(feats.NumRows() - online_ivector_feats->NumRows()) > length_tolerance + || online_ivector_feats->NumRows() == 0)) { KALDI_WARN << "Length difference between feats " << feats.NumRows() - << " and iVectors " << ivector_feats->NumRows() + << " and iVectors " << online_ivector_feats->NumRows() << "exceeds tolerance " << length_tolerance; num_err++; continue; } - - ProcessFile(feats, ivector_feats, target_matrix, key, compress, - num_targets, left_context, right_context, num_frames, - &num_frames_written, &num_egs_written, - &example_writer); - num_done++; + + ProcessFile(feats, online_ivector_feats, online_ivector_period, + target_matrix, key, compress, num_targets, + &utt_splitter, &example_writer); } } - - KALDI_LOG << "Finished generating examples, " - << "successfully processed " << num_done - << " feature files, wrote " << num_egs_written << " examples, " - << " with " << num_frames_written << " egs in total; " - << num_err << " files had errors."; - return (num_egs_written == 0 || num_err > num_done ? 1 : 0); + if (num_err > 0) + KALDI_WARN << num_err << " utterances had errors and could " + "not be processed."; + // utt_splitter prints stats in its destructor. + return utt_splitter.ExitStatus(); } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; } } - diff --git a/src/nnet3bin/nnet3-get-egs.cc b/src/nnet3bin/nnet3-get-egs.cc index 6b9dacfa03d..562684c30ab 100644 --- a/src/nnet3bin/nnet3-get-egs.cc +++ b/src/nnet3bin/nnet3-get-egs.cc @@ -30,24 +30,24 @@ namespace kaldi { namespace nnet3 { -static void ProcessFile(const MatrixBase &feats, +static bool ProcessFile(const MatrixBase &feats, const MatrixBase *ivector_feats, int32 ivector_period, const Posterior &pdf_post, const std::string &utt_id, bool compress, int32 num_pdfs, - const UtteranceSplitter &utt_splitter, - int64 *num_frames_written, - int64 *num_egs_written, + UtteranceSplitter *utt_splitter, NnetExampleWriter *example_writer) { int32 num_input_frames = feats.NumRows(); - if (!utt_splitter.LengthsMatch(utt_id, num_input_frames, + if (!utt_splitter->LengthsMatch(utt_id, num_input_frames, static_cast(pdf_post.size()))) - return; // LengthsMatch() will have printed a warning. + return false; // LengthsMatch() will have printed a warning. std::vector chunks; + utt_splitter->GetChunksForUtterance(num_input_frames, &chunks); + if (chunks.empty()) { KALDI_WARN << "Not producing egs for utterance " << utt_id << " because it is too short: " @@ -58,9 +58,7 @@ static void ProcessFile(const MatrixBase &feats, // writing, this is being supported to unify the code with the 'chain' recipes // and in case we need it for some reason in future. int32 frame_subsampling_factor = - utt_splitter.Config().frame_subsampling_factor; - - utt_splitter.GetChunksForUtterance(num_input_frames, &chunks); + utt_splitter->Config().frame_subsampling_factor; for (size_t c = 0; c < chunks.size(); c++) { const ChunkTimeInfo &chunk = chunks[c]; @@ -136,11 +134,9 @@ static void ProcessFile(const MatrixBase &feats, std::string key = os.str(); // key is - - *num_frames_written += chunk.num_frames; - *num_egs_written += 1; - example_writer->Write(key, eg); } + return true; } } // namespace nnet3 @@ -222,8 +218,7 @@ int main(int argc, char *argv[]) { RandomAccessBaseFloatMatrixReader online_ivector_reader( online_ivector_rspecifier); - int32 num_done = 0, num_err = 0; - int64 num_frames_written = 0, num_egs_written = 0; + int32 num_err = 0; for (; !feat_reader.Done(); feat_reader.Next()) { std::string key = feat_reader.Key(); @@ -263,20 +258,17 @@ int main(int argc, char *argv[]) { continue; } - ProcessFile(feats, online_ivector_feats, online_ivector_period, - pdf_post, key, compress, num_pdfs, utt_splitter, - &num_frames_written, &num_egs_written, - &example_writer); - num_done++; + if (!ProcessFile(feats, online_ivector_feats, online_ivector_period, + pdf_post, key, compress, num_pdfs, + &utt_splitter, &example_writer)) + num_err++; } } - - KALDI_LOG << "Finished generating examples, " - << "successfully processed " << num_done - << " feature files, wrote " << num_egs_written << " examples, " - << " with " << num_frames_written << " egs in total; " - << num_err << " files had errors."; - return (num_egs_written == 0 || num_err > num_done ? 1 : 0); + if (num_err > 0) + KALDI_WARN << num_err << " utterances had errors and could " + "not be processed."; + // utt_splitter prints stats in its destructor. + return utt_splitter.ExitStatus(); } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; From 93598d2a8a9c96d05d36519f2d5999145cd433cb Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 1 Jan 2017 16:33:38 -0500 Subject: [PATCH 177/530] Various code and script fixes --- .../nnet3/train/chain_objf/acoustic_model.py | 6 +- egs/wsj/s5/steps/libs/nnet3/train/common.py | 93 ++++++++++++------- .../nnet3/train/frame_level_objf/common.py | 2 +- src/chainbin/nnet3-chain-copy-egs.cc | 33 +++++-- src/chainbin/nnet3-chain-merge-egs.cc | 10 +- src/nnet3/nnet-chain-example.cc | 5 +- src/nnet3/nnet-chain-example.h | 8 +- src/nnet3/nnet-chain-training.cc | 3 +- src/nnet3/nnet-component-itf.h | 2 +- src/nnet3/nnet-discriminative-example.cc | 6 +- src/nnet3/nnet-discriminative-example.h | 9 +- src/nnet3/nnet-example-utils.cc | 36 +++---- src/nnet3/nnet-example-utils.h | 31 ++++--- src/nnet3/nnet-nnet.h | 4 +- src/nnet3/nnet-training.cc | 6 +- src/nnet3/nnet-training.h | 5 +- src/nnet3bin/nnet3-copy-egs.cc | 28 +++--- .../nnet3-discriminative-merge-egs.cc | 12 +-- src/nnet3bin/nnet3-get-egs-dense-targets.cc | 3 - src/nnet3bin/nnet3-merge-egs.cc | 2 +- 20 files changed, 172 insertions(+), 132 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index 3266af13519..3e375a1b863 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -298,7 +298,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, # changing too fast (i.e. it can worsen the objective function), and # the smaller minibatch size will help to keep the update stable. cur_num_chunk_per_minibatch_str = common_train_lib.halve_minibatch_size_str( - cur_num_chunk_per_minibatch_str) + num_chunk_per_minibatch_str) cur_max_param_change = float(max_param_change) / math.sqrt(2) train_new_models(dir=dir, iter=iter, srand=srand, num_jobs=num_jobs, @@ -460,7 +460,7 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, "nnet3-am-copy --raw=true {model} - |" {dir}/den.fst \ "ark,bg:nnet3-chain-copy-egs --left-context={lc} \ --right-context={rc} ark:{egs_dir}/valid_diagnostic.cegs \ - ark:- | nnet3-chain-merge-egs ark:- ark:- |" \ + ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \ """.format(command=run_opts.command, dir=dir, iter=iter, model=model, lc=left_context, rc=right_context, l2=l2_regularize, leaky=leaky_hmm_coefficient, @@ -475,7 +475,7 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, "nnet3-am-copy --raw=true {model} - |" {dir}/den.fst \ "ark,bg:nnet3-chain-copy-egs --left-context={lc} \ --right-context={rc} ark:{egs_dir}/train_diagnostic.cegs \ - ark:- | nnet3-chain-merge-egs ark:- ark:- |" \ + ark:- | nnet3-chain-merge-egs --minibatch-size=1:64 ark:- ark:- |" \ """.format(command=run_opts.command, dir=dir, iter=iter, model=model, lc=left_context, rc=right_context, l2=l2_regularize, leaky=leaky_hmm_coefficient, diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index 1edaf3972cb..af8e9793f0a 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -161,13 +161,42 @@ def principal_chunk_width(chunk_width): return int(chunk_width.split(",")[0]) +def validate_range_str(range_str): + """Helper function used inside validate_minibatch_size_str(). + Returns true if range_str is a a comma-separated list of + positive integers and ranges of integers, like '128', + '128,256', or '64-128,256'.""" + if not isinstance(range_str, str): + return False + ranges = range_str.split(",") + assert len(ranges) > 0 + for r in ranges: + # a range may be either e.g. '64', or '128-256' + try: + c = [ int(x) for x in r.split(":") ] + except: + return False + # c should be either e.g. [ 128 ], or [64,128]. + if len(c) == 1: + if c[0] <= 0: + return False + elif len(c) == 2: + if c[0] <= 0 or c[1] < c[0]: + return False + else: + return False + return True + + + def validate_minibatch_size_str(minibatch_size_str): """Validate a minibatch-size string (returns bool). - A minibatch-size string might either be an integer, like '256' - or a rule like '128=64-128/256=32,64', whose format + A minibatch-size string might either be an integer, like '256', + a comma-separated set of integers or ranges like '128,256' or + '64:128,256', or a rule like '128=64:128/256=32,64', whose format is: eg-length1=size-range1/eg-length2=size-range2/.... - where the size-range is a comma-separated list of either integers - or ranges. An arbitrary eg will be mapped to the size-range + where a size-range is a comma-separated list of either integers like '16' + or ranges like '16:32'. An arbitrary eg will be mapped to the size-range for the closest of the listed eg-lengths (the eg-length is defined as the number of input frames, including context frames).""" if not isinstance(minibatch_size_str, str): @@ -183,11 +212,7 @@ def validate_minibatch_size_str(minibatch_size_str): # one choice)... this would mean somebody just gave "25" # or something like that for the minibatch size. if len(a) == 1 and len(b) == 1: - try: - mb_size = int(b[0]) - return mb_size > 0 - except: - return False + return validate_range_str(elem) else: return False # check that the thing before the '=' sign is a positive integer @@ -197,26 +222,29 @@ def validate_minibatch_size_str(minibatch_size_str): return False except: return False # not an integer at all. - # check the thing after the '=' sign is a comma-separated list of ranges - ranges = b[1].split(",") - assert len(ranges) > 0 - for range in ranges: - # a range may be either e.g. '64', or '128-256' - try: - c = [ int(x) for x in range.split("-") ] - except: - return False - if len(c) == 1: - if c[0] <= 0: - return False - elif len(c) == 2: - if c[0] <= 0 or c[1] < c[0]: - return False - else: - return False + + if not validate_range_str(b[1]): + return False return True +def halve_range_str(range_str): + """Helper function used inside halve_minibatch_size_str(). + returns half of a range [but converting resulting zeros to + ones], e.g. '16'->'8', '16,32'->'8,16', '64:128'->'32:64'. + Returns true if range_str is a a comma-separated list of + positive integers and ranges of integers, like '128', + '128,256', or '64-128,256'.""" + + ranges = range_str.split(",") + halved_ranges = [] + for r in ranges: + # a range may be either e.g. '64', or '128:256' + c = [ str(max(1, int(x)/2)) for x in r.split(":") ] + halved_ranges.append(":".join(c)) + return ','.join(halved_ranges) + + def halve_minibatch_size_str(minibatch_size_str): """Halve a minibatch-size string, as would be validated by validate_minibatch_size_str (see docs for that). This halves @@ -232,16 +260,10 @@ def halve_minibatch_size_str(minibatch_size_str): b = elem.split('=') # We expect b to have length 2 in the normal case. if len(b) == 1: - mb_size = int(b[0]) - ans.append(str(max(1, mb_size / 2))) + return halve_range_str(elem) else: assert len(b) == 2 - ranges_out = [] - ranges = b[1].split(',') - for range in ranges: - c = [ str(max(1, int(x)/2)) for x in range.split('-') ] - ranges_out.append('-'.join(c)) - ans.append('{0}={1}'.format(b[0], ','.join(ranges_out))) + ans.append('{0}={1}'.format(b[0], halve_range_str(b[1]))) return '/'.join(ans) @@ -529,8 +551,9 @@ def remove_model(nnet_dir, iter, num_iters, models_to_combine=None, def self_test(): assert halve_minibatch_size_str('64') == '32' + assert halve_minibatch_size_str('64,16:32') == '32,8:16' assert halve_minibatch_size_str('1') == '1' - assert halve_minibatch_size_str('128=64/256=40,80-100') == '128=32/256=20,40-50' + assert halve_minibatch_size_str('128=64/256=40,80:100') == '128=32/256=20,40:50' assert validate_chunk_width('64') assert validate_chunk_width('64,25,128') diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index c1c95b4748f..0826c9f0468 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -365,7 +365,7 @@ def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts, def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, - right_context, run_opts, minibatch_size_str='256', + right_context, run_opts, minibatch_size_str='1:256', wait=False, background_process_handler=None, get_raw_nnet_from_am=True): if get_raw_nnet_from_am: diff --git a/src/chainbin/nnet3-chain-copy-egs.cc b/src/chainbin/nnet3-chain-copy-egs.cc index b0c963595a1..1396932252a 100644 --- a/src/chainbin/nnet3-chain-copy-egs.cc +++ b/src/chainbin/nnet3-chain-copy-egs.cc @@ -201,6 +201,7 @@ void ModifyChainExampleContext(const NnetChainExample &eg, int32 right_context, const int32 frame_subsampling_factor, NnetChainExample *eg_out) { + static bool warned_left = false, warned_right = false; int32 min_input_t, max_input_t, min_output_t, max_output_t; if (!ContainsSingleExample(eg, &min_input_t, &max_input_t, @@ -208,19 +209,31 @@ void ModifyChainExampleContext(const NnetChainExample &eg, KALDI_ERR << "Too late to perform frame selection/context reduction on " << "these examples (already merged?)"; if (left_context != -1) { - if (min_input_t > min_output_t - left_context) - KALDI_ERR << "You requested --left-context=" << left_context - << ", but example only has left-context of " - << (min_output_t - min_input_t); + int32 observed_left_context = min_output_t - min_input_t; + if (!warned_left && observed_left_context < left_context) { + warned_left = true; + KALDI_WARN << "You requested --left-context=" << left_context + << ", but example only has left-context of " + << observed_left_context + << " (will warn only once; this may be harmless if " + "using any --*left-context-initial options)"; + } min_input_t = std::max(min_input_t, min_output_t - left_context); } if (right_context != -1) { - if (max_input_t < max_output_t + right_context + frame_subsampling_factor - 1) - KALDI_ERR << "You requested --right-context=" << right_context - << ", but example only has right-context of " - << (max_input_t - max_output_t - frame_subsampling_factor + 1); - max_input_t = std::min(max_input_t, max_output_t + right_context - + frame_subsampling_factor - 1); + int32 observed_right_context = max_input_t - max_output_t; + + if (right_context != -1) { + if (!warned_right && observed_right_context < right_context) { + warned_right = true; + KALDI_ERR << "You requested --right-context=" << right_context + << ", but example only has right-context of " + << observed_right_context + << " (will warn only once; this may be harmless if " + "using any --*right-context-final options."; + } + max_input_t = std::min(max_input_t, max_output_t + right_context); + } } FilterExample(eg, min_input_t, max_input_t, diff --git a/src/chainbin/nnet3-chain-merge-egs.cc b/src/chainbin/nnet3-chain-merge-egs.cc index ff94598f7eb..a3686d2fc30 100644 --- a/src/chainbin/nnet3-chain-merge-egs.cc +++ b/src/chainbin/nnet3-chain-merge-egs.cc @@ -41,10 +41,9 @@ int main(int argc, char *argv[]) { "nnet3-chain-merge-egs --minibatch-size=128 ark:1.cegs ark:- | nnet3-chain-train-simple ... \n" "See also nnet3-chain-copy-egs\n"; - ExampleMergingConfig merging_config; - merging_config.minibatch_size = 64; // change the default for this - // program.. anyway it will usually be - // set on the command line. + + ExampleMergingConfig merging_config("64"); // 64 is default minibatch size. + ParseOptions po(usage); merging_config.Register(&po); @@ -61,8 +60,9 @@ int main(int argc, char *argv[]) { SequentialNnetChainExampleReader example_reader(examples_rspecifier); NnetChainExampleWriter example_writer(examples_wspecifier); + merging_config.ComputeDerived(); ChainExampleMerger merger(merging_config, &example_writer); - while (!example_reader.Done()) { + for (; !example_reader.Done(); example_reader.Next()) { const NnetChainExample &cur_eg = example_reader.Value(); merger.AcceptExample(new NnetChainExample(cur_eg)); } diff --git a/src/nnet3/nnet-chain-example.cc b/src/nnet3/nnet-chain-example.cc index b1c6e60de47..4f9cb4b92b8 100644 --- a/src/nnet3/nnet-chain-example.cc +++ b/src/nnet3/nnet-chain-example.cc @@ -505,8 +505,8 @@ void ChainExampleMerger::AcceptExample(NnetChainExample *eg) { // so use swap to create that without doing any real work. std::vector egs_to_merge(minibatch_size); for (int32 i = 0; i < minibatch_size; i++) { - egs_to_merge[i].Swap(vec[i]); - delete vec[i]; // we owned those pointers. + egs_to_merge[i].Swap(vec_copy[i]); + delete vec_copy[i]; // we owned those pointers. } WriteMinibatch(&egs_to_merge); } @@ -572,6 +572,7 @@ void ChainExampleMerger::Finish() { vec.clear(); } } + stats_.PrintStats(); } diff --git a/src/nnet3/nnet-chain-example.h b/src/nnet3/nnet-chain-example.h index 24e68116193..ac782a92805 100644 --- a/src/nnet3/nnet-chain-example.h +++ b/src/nnet3/nnet-chain-example.h @@ -247,14 +247,14 @@ class ChainExampleMerger { void AcceptExample(NnetChainExample *a); // This function announces to the class that the input has finished, so it - // should flush out any smaller-sizes minibatches, as dictated by the config. + // should flush out any smaller-sized minibatches, as dictated by the config. // This will be called in the destructor, but you can call it explicitly when - // all the input is done if you want to. - // It also prints the stats. + // all the input is done if you want to; it won't repeat anything if called + // twice. It also prints the stats. void Finish(); // returns a suitable exit status for a program. - bool ExitStatus() { return num_egs_written_ > 0; } + int32 ExitStatus() { Finish(); return (num_egs_written_ > 0 ? 0 : 1); } ~ChainExampleMerger() { Finish(); }; private: diff --git a/src/nnet3/nnet-chain-training.cc b/src/nnet3/nnet-chain-training.cc index bfc67db17be..1e293f588ae 100644 --- a/src/nnet3/nnet-chain-training.cc +++ b/src/nnet3/nnet-chain-training.cc @@ -30,7 +30,8 @@ NnetChainTrainer::NnetChainTrainer(const NnetChainTrainingOptions &opts, opts_(opts), den_graph_(den_fst, nnet->OutputDim("output")), nnet_(nnet), - compiler_(*nnet, opts_.nnet_config.optimize_config), + compiler_(*nnet, opts_.nnet_config.optimize_config, + opts_.nnet_config.compiler_config), num_minibatches_processed_(0) { if (opts.nnet_config.zero_component_stats) ZeroComponentStats(nnet); diff --git a/src/nnet3/nnet-component-itf.h b/src/nnet3/nnet-component-itf.h index e5974b46f46..9dc372340be 100644 --- a/src/nnet3/nnet-component-itf.h +++ b/src/nnet3/nnet-component-itf.h @@ -355,7 +355,7 @@ class RandomComponent: public Component { // This function is required in testing code and in other places we need // consistency in the random number generation (e.g. when optimizing // validation-set performance), but check where else we call srand(). You'll - // need to call srand as well as making this call. + // need to call srand prior to making this call. void ResetGenerator() { random_generator_.SeedGpu(); } protected: CuRand random_generator_; diff --git a/src/nnet3/nnet-discriminative-example.cc b/src/nnet3/nnet-discriminative-example.cc index debc91b96c9..a7330e772a3 100644 --- a/src/nnet3/nnet-discriminative-example.cc +++ b/src/nnet3/nnet-discriminative-example.cc @@ -499,8 +499,8 @@ void DiscriminativeExampleMerger::AcceptExample(NnetDiscriminativeExample *eg) { // so use swap to create that without doing any real work. std::vector egs_to_merge(minibatch_size); for (int32 i = 0; i < minibatch_size; i++) { - egs_to_merge[i].Swap(vec[i]); - delete vec[i]; // we owned those pointers. + egs_to_merge[i].Swap(vec_copy[i]); + delete vec_copy[i]; // we owned those pointers. } WriteMinibatch(&egs_to_merge); } @@ -566,10 +566,10 @@ void DiscriminativeExampleMerger::Finish() { vec.clear(); } } + stats_.PrintStats(); } } // namespace nnet3 } // namespace kaldi - diff --git a/src/nnet3/nnet-discriminative-example.h b/src/nnet3/nnet-discriminative-example.h index 048ee32c4e8..9d9bba0c906 100644 --- a/src/nnet3/nnet-discriminative-example.h +++ b/src/nnet3/nnet-discriminative-example.h @@ -245,14 +245,14 @@ class DiscriminativeExampleMerger { void AcceptExample(NnetDiscriminativeExample *a); // This function announces to the class that the input has finished, so it - // should flush out any smaller-sizes minibatches, as dictated by the config. + // should flush out any smaller-sized minibatches, as dictated by the config. // This will be called in the destructor, but you can call it explicitly when - // all the input is done if you want to. - // It also prints the stats. + // all the input is done if you want to; it won't repeat anything if called + // twice. It also prints the stats. void Finish(); // returns a suitable exit status for a program. - bool ExitStatus() { return num_egs_written_ > 0; } + int32 ExitStatus() { Finish(); return (num_egs_written_ > 0 ? 0 : 1); } ~DiscriminativeExampleMerger() { Finish(); }; private: @@ -281,4 +281,3 @@ class DiscriminativeExampleMerger { } // namespace kaldi #endif // KALDI_NNET3_NNET_DISCRIMINATIVE_EXAMPLE_H_ - diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc index 574ff6fbfae..28578de42fb 100644 --- a/src/nnet3/nnet-example-utils.cc +++ b/src/nnet3/nnet-example-utils.cc @@ -466,10 +466,12 @@ void UtteranceSplitter::InitSplitForLength() { // gaps twice as strongly as overlaps, based on the intuition that // completely throwing out frames of data is worse than counting them // twice. - int32 c = (default_duration > float(u) ? default_duration - u : - 2 * (u - default_duration)); - if (u < max_chunk_size) + float c = (default_duration > float(u) ? default_duration - float(u) : + 2.0 * (u - default_duration)); + if (u < max_chunk_size) // can't fit the largest of the chunks in this + // utterance c = std::numeric_limits::max(); + KALDI_ASSERT(c >= 0); costs_for_length[u].push_back(c); } } @@ -574,6 +576,10 @@ void UtteranceSplitter::GetChunkSizesForUtterance( KALDI_ASSERT(utterance_length >= 0); const std::vector > &possible_splits = splits_for_length_[utterance_length]; + if (possible_splits.empty()) { + chunk_sizes->clear(); + return; + } int32 num_possible_splits = possible_splits.size(), randomly_chosen_split = RandInt(0, num_possible_splits - 1); *chunk_sizes = possible_splits[randomly_chosen_split]; @@ -693,7 +699,7 @@ void UtteranceSplitter::DistributeRandomly(int32 n, std::vector > partial_counts; int32 total_count = 0; for (int32 i = 0; i < size; i++) { - float this_count = float(n) / total_magnitude; + float this_count = n * float(magnitudes[i]) / total_magnitude; // note: cast of float to int32 rounds towards zero (down, in this // case, since this_count >= 0). int32 this_whole_count = static_cast(this_count), @@ -904,11 +910,9 @@ bool ExampleMergingConfig::ParseIntSet(const std::string &str, int_set->ranges.resize(split_str.size()); for (size_t i = 0; i < split_str.size(); i++) { std::vector split_range; - // note: because we split on '-', it't not possible to - // get negative values in 'split_range'. - SplitStringToIntegers(str, "-", false, &split_range); + SplitStringToIntegers(split_str[i], ":", false, &split_range); if (split_range.size() < 1 || split_range.size() > 2 || - split_range[0] > split_range[1]) + split_range[0] > split_range.back() || split_range[0] <= 0) return false; int_set->ranges[i].first = split_range[0]; int_set->ranges[i].second = split_range.back(); @@ -935,7 +939,7 @@ void ExampleMergingConfig::ComputeDerived() { rules.resize(minibatch_size_split.size()); for (size_t i = 0; i < minibatch_size_split.size(); i++) { - int32 &minibatch_size = rules[i].first; + int32 &eg_size = rules[i].first; IntSet &int_set = rules[i].second; // 'this_rule' will be either something like "256" or like "64-128,256" // (but these two only if minibatch_size_split.size() == 1, or something with @@ -948,7 +952,7 @@ void ExampleMergingConfig::ComputeDerived() { KALDI_ERR << "Could not parse option --minibatch-size=" << minibatch_size; } - if (!ConvertStringToInteger(rule_split[0], &minibatch_size) || + if (!ConvertStringToInteger(rule_split[0], &eg_size) || !ParseIntSet(rule_split[1], &int_set)) KALDI_ERR << "Could not parse option --minibatch-size=" << minibatch_size; @@ -957,9 +961,8 @@ void ExampleMergingConfig::ComputeDerived() { if (minibatch_size_split.size() != 1) { KALDI_ERR << "Could not parse option --minibatch-size=" << minibatch_size << " (all rules must have " - << "minibatch-size specified if >1 rule)"; + << "eg-size specified if >1 rule)"; } - minibatch_size = 0; if (!ParseIntSet(this_rule, &int_set)) KALDI_ERR << "Could not parse option --minibatch-size=" << minibatch_size; @@ -1035,8 +1038,8 @@ void ExampleMergingStats::DiscardedExamples(int32 example_size, void ExampleMergingStats::PrintStats() const { - PrintAggregateStats(); PrintSpecificStats(); + PrintAggregateStats(); } void ExampleMergingStats::PrintAggregateStats() const { @@ -1097,7 +1100,7 @@ void ExampleMergingStats::PrintAggregateStats() const { << " egs of avg. size " << avg_input_egs_size << " into " << num_minibatches << " minibatches, discarding " << percent_discarded << "% of egs. Avg minibatch size was " - << avg_minibatch_size << ", distinct types of egs/minibatches " + << avg_minibatch_size << ", #distinct types of egs/minibatches " << "was " << num_distinct_egs_types << "/" << num_distinct_minibatch_types; KALDI_LOG << os.str(); @@ -1179,8 +1182,8 @@ void ExampleMerger::AcceptExample(NnetExample *eg) { // so use swap to create that without doing any real work. std::vector egs_to_merge(minibatch_size); for (int32 i = 0; i < minibatch_size; i++) { - egs_to_merge[i].Swap(vec[i]); - delete vec[i]; // we owned those pointers. + egs_to_merge[i].Swap(vec_copy[i]); + delete vec_copy[i]; // we owned those pointers. } WriteMinibatch(egs_to_merge); } @@ -1244,6 +1247,7 @@ void ExampleMerger::Finish() { vec.clear(); } } + stats_.PrintStats(); } diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h index 66624f69004..021e91959e3 100644 --- a/src/nnet3/nnet-example-utils.h +++ b/src/nnet3/nnet-example-utils.h @@ -197,7 +197,7 @@ class UtteranceSplitter { ~UtteranceSplitter(); - int32 ExitStatus() { return (total_frames_in_chunks_ > 0); } + int32 ExitStatus() { return (total_frames_in_chunks_ > 0 ? 0 : 1); } private: @@ -283,7 +283,7 @@ class UtteranceSplitter { const ExampleGenerationConfig &config_; - // The vector 'split_for_length_' is indexed by the num-frames of a file, and + // The vector 'splits_for_length_' is indexed by the num-frames of a file, and // gives us a list of alternative splits that we can use if the utternace has // that many frames. For example, if split_for_length[100] = ( (25, 40, 40), // (40, 65) ), it means we could either split as chunks of size (25, 40, 40) @@ -328,10 +328,11 @@ class ExampleMergingConfig { std::string minibatch_size; std::string discard_partial_minibatches; // for back-compatibility, not used. - ExampleMergingConfig(): compress(false), - measure_output_frames("deprecated"), - minibatch_size("256"), - discard_partial_minibatches("deprecated") { } + ExampleMergingConfig(const char *default_minibatch_size = "256"): + compress(false), + measure_output_frames("deprecated"), + minibatch_size(default_minibatch_size), + discard_partial_minibatches("deprecated") { } void Register(OptionsItf *po) { po->Register("compress", &compress, "If true, compress the output examples " @@ -344,7 +345,7 @@ class ExampleMergingConfig { "String controlling the minibatch size. May be just an integer, " "meaning a fixed minibatch size (e.g. --minibatch-size=128). " "May be a list of ranges and values, e.g. --minibatch-size=32,64 " - "or --minibatch-size=16-32,64,128. All minibatches will be of " + "or --minibatch-size=16:32,64,128. All minibatches will be of " "the largest size until the end of the input is reached; " "then, increasingly smaller sizes will be allowed. Only egs " "with the same structure (e.g num-frames) are merged. You may " @@ -352,7 +353,7 @@ class ExampleMergingConfig { "(defined as the maximum number of Indexes on any input), in " "the format " "--minibatch-size='eg_size1=mb_sizes1/eg_size2=mb_sizes2', e.g. " - "--minibatch-size=128=64-128,256/256=32-64,128. Egs are given " + "--minibatch-size=128=64:128,256/256=32:64,128. Egs are given " "minibatch-sizes based on the specified eg-size closest to " "their actual size."); } @@ -385,9 +386,9 @@ class ExampleMergingConfig { private: - // struct IntSet is a representation of something like 16-32,64, which is a - // nonempty list of either nonnegative integers or ranges of nonnegative - // integers. Conceptually it represents a set of nonnegative integers. + // struct IntSet is a representation of something like 16:32,64, which is a + // nonempty list of either positive integers or ranges of positive integers. + // Conceptually it represents a set of positive integers. struct IntSet { // largest_size is the largest integer in any of the ranges (64 in this // example). @@ -489,14 +490,14 @@ class ExampleMerger { void AcceptExample(NnetExample *a); // This function announces to the class that the input has finished, so it - // should flush out any smaller-sizes minibatches, as dictated by the config. + // should flush out any smaller-sized minibatches, as dictated by the config. // This will be called in the destructor, but you can call it explicitly when - // all the input is done if you want to. - // It also prints the stats. + // all the input is done if you want to; it won't repeat anything if called + // twice. It also prints the stats. void Finish(); // returns a suitable exit status for a program. - bool ExitStatus() { return num_egs_written_ > 0 ? 0 : 1; } + int32 ExitStatus() { Finish(); return (num_egs_written_ > 0 ? 0 : 1); } ~ExampleMerger() { Finish(); }; private: diff --git a/src/nnet3/nnet-nnet.h b/src/nnet3/nnet-nnet.h index 19cfb3949ad..0e6918de18d 100644 --- a/src/nnet3/nnet-nnet.h +++ b/src/nnet3/nnet-nnet.h @@ -247,8 +247,8 @@ class Nnet { void RemoveSomeNodes(const std::vector &nodes_to_remove); void ResetGenerators(); // resets random-number generators for all - // random components. You must also set srand() for this to be - // effective. + // random components. You must call srand() prior to this call, for this to + // be effective. // This function outputs to "config_lines" the lines of a config file. If you diff --git a/src/nnet3/nnet-training.cc b/src/nnet3/nnet-training.cc index 2378c607ebf..9e534256e3f 100644 --- a/src/nnet3/nnet-training.cc +++ b/src/nnet3/nnet-training.cc @@ -28,7 +28,7 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config, Nnet *nnet): config_(config), nnet_(nnet), - compiler_(*nnet, config_.optimize_config), + compiler_(*nnet, config_.optimize_config, config_.compiler_config), num_minibatches_processed_(0) { if (config.zero_component_stats) ZeroComponentStats(nnet); @@ -39,7 +39,7 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config, // natural-gradient updates. SetZero(is_gradient, delta_nnet_); const int32 num_updatable = NumUpdatableComponents(*delta_nnet_); - num_max_change_per_component_applied_.resize(num_updatable, 0); + num_max_change_per_component_applied_.resize(num_updatable, 0); num_max_change_global_applied_ = 0; if (config_.read_cache != "") { @@ -167,7 +167,7 @@ void NnetTrainer::UpdateParamsWithMaxChange() { << " / " << num_updatable << " Updatable Components." << "(smallest factor=" << min_scale << " on " << component_name_with_min_scale - << " with max-change=" << max_change_with_min_scale <<"). "; + << " with max-change=" << max_change_with_min_scale <<"). "; if (param_delta > config_.max_param_change) ostr << "Global max-change factor was " << config_.max_param_change / param_delta diff --git a/src/nnet3/nnet-training.h b/src/nnet3/nnet-training.h index 70c90267c66..55d3e02ea67 100644 --- a/src/nnet3/nnet-training.h +++ b/src/nnet3/nnet-training.h @@ -42,6 +42,7 @@ struct NnetTrainerOptions { BaseFloat max_param_change; NnetOptimizeOptions optimize_config; NnetComputeOptions compute_config; + CachingOptimizingCompilerOptions compiler_config; NnetTrainerOptions(): zero_component_stats(true), store_component_stats(true), @@ -79,8 +80,8 @@ struct NnetTrainerOptions { // register the optimization options with the prefix "optimization". ParseOptions optimization_opts("optimization", opts); optimize_config.Register(&optimization_opts); - - + ParseOptions compiler_opts("compiler", opts); + compiler_config.Register(&compiler_opts); // register the compute options with the prefix "computation". ParseOptions compute_opts("computation", opts); compute_config.Register(&compute_opts); diff --git a/src/nnet3bin/nnet3-copy-egs.cc b/src/nnet3bin/nnet3-copy-egs.cc index efb51f51910..42413114af3 100644 --- a/src/nnet3bin/nnet3-copy-egs.cc +++ b/src/nnet3bin/nnet3-copy-egs.cc @@ -191,6 +191,7 @@ bool SelectFromExample(const NnetExample &eg, int32 right_context, int32 frame_shift, NnetExample *eg_out) { + static bool warned_left = false, warned_right = false; int32 min_input_t, max_input_t, min_output_t, max_output_t; if (!ContainsSingleExample(eg, &min_input_t, &max_input_t, @@ -214,21 +215,26 @@ bool SelectFromExample(const NnetExample &eg, min_output_t = max_output_t = frame; } } - // There may come a time when we want to remove or make it possible to disable - // the error messages below. The std::max and std::min expressions may seem - // unnecessary but are intended to make life easier if and when we do that. if (left_context != -1) { - if (min_input_t > min_output_t - left_context) - KALDI_ERR << "You requested --left-context=" << left_context - << ", but example only has left-context of " - << (min_output_t - min_input_t); + if (!warned_left && min_input_t > min_output_t - left_context) { + warned_left = true; + KALDI_WARN << "You requested --left-context=" << left_context + << ", but example only has left-context of " + << (min_output_t - min_input_t) + << " (will warn only once; this may be harmless if " + "using any --*left-context-initial options)"; + } min_input_t = std::max(min_input_t, min_output_t - left_context); } if (right_context != -1) { - if (max_input_t < max_output_t + right_context) - KALDI_ERR << "You requested --right-context=" << right_context + if (!warned_right && max_input_t < max_output_t + right_context) { + warned_right = true; + KALDI_WARN << "You requested --right-context=" << right_context << ", but example only has right-context of " - << (max_input_t - max_output_t); + << (max_input_t - max_output_t) + << " (will warn only once; this may be harmless if " + "using any --*right-context-final options."; + } max_input_t = std::min(max_input_t, max_output_t + right_context); } FilterExample(eg, @@ -357,5 +363,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/nnet3bin/nnet3-discriminative-merge-egs.cc b/src/nnet3bin/nnet3-discriminative-merge-egs.cc index 0edf960fdf9..bc4cdfb2941 100644 --- a/src/nnet3bin/nnet3-discriminative-merge-egs.cc +++ b/src/nnet3bin/nnet3-discriminative-merge-egs.cc @@ -41,10 +41,8 @@ int main(int argc, char *argv[]) { "nnet3-discriminative-merge-egs --minibatch-size=128 ark:1.degs ark:- | nnet3-discriminative-train ... \n" "See also nnet3-discriminative-copy-egs\n"; - ExampleMergingConfig merging_config; - merging_config.minibatch_size = 64; // change the default for this - // program.. anyway it will usually be - // set on the command line. + ExampleMergingConfig merging_config("64"); // 64 is default minibatch size. + ParseOptions po(usage); merging_config.Register(&po); @@ -61,8 +59,9 @@ int main(int argc, char *argv[]) { SequentialNnetDiscriminativeExampleReader example_reader(examples_rspecifier); NnetDiscriminativeExampleWriter example_writer(examples_wspecifier); + merging_config.ComputeDerived(); DiscriminativeExampleMerger merger(merging_config, &example_writer); - while (!example_reader.Done()) { + for (; !example_reader.Done(); example_reader.Next()) { const NnetDiscriminativeExample &cur_eg = example_reader.Value(); merger.AcceptExample(new NnetDiscriminativeExample(cur_eg)); } @@ -74,6 +73,3 @@ int main(int argc, char *argv[]) { return -1; } } - - - diff --git a/src/nnet3bin/nnet3-get-egs-dense-targets.cc b/src/nnet3bin/nnet3-get-egs-dense-targets.cc index ddcf5f23555..54d607466b5 100644 --- a/src/nnet3bin/nnet3-get-egs-dense-targets.cc +++ b/src/nnet3bin/nnet3-get-egs-dense-targets.cc @@ -208,9 +208,6 @@ int main(int argc, char *argv[]) { eg_config.ComputeDerived(); UtteranceSplitter utt_splitter(eg_config); - if (num_targets <= 0) - KALDI_ERR << "--num-targets options is required."; - std::string feature_rspecifier = po.GetArg(1), matrix_rspecifier = po.GetArg(2), examples_wspecifier = po.GetArg(3); diff --git a/src/nnet3bin/nnet3-merge-egs.cc b/src/nnet3bin/nnet3-merge-egs.cc index 0d823c449b1..db4508a2835 100644 --- a/src/nnet3bin/nnet3-merge-egs.cc +++ b/src/nnet3bin/nnet3-merge-egs.cc @@ -81,7 +81,7 @@ int main(int argc, char *argv[]) { ExampleMerger merger(merging_config, &example_writer); - while (!example_reader.Done()) { + for (; !example_reader.Done(); example_reader.Next()) { const NnetExample &cur_eg = example_reader.Value(); merger.AcceptExample(new NnetExample(cur_eg)); } From 82fe0a999455a5c4f3d92dd3df20b33ce770c099 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 1 Jan 2017 17:47:55 -0500 Subject: [PATCH 178/530] Change how descriptors behave in nnet3 (Issue #1125) --- src/nnet3/nnet-common.h | 6 ++++++ src/nnet3/nnet-descriptor.cc | 23 ++++++++++++----------- src/nnet3/nnet-descriptor.h | 2 +- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/nnet3/nnet-common.h b/src/nnet3/nnet-common.h index cb5d8c3b944..9134e2545de 100644 --- a/src/nnet3/nnet-common.h +++ b/src/nnet3/nnet-common.h @@ -62,6 +62,12 @@ struct Index { Index operator + (const Index &other) const { return Index(n+other.n, t+other.t, x+other.x); } + Index &operator += (const Index &other) { + n += other.n; + t += other.t; + x += other.x; + return *this; + } void Write(std::ostream &os, bool binary) const; diff --git a/src/nnet3/nnet-descriptor.cc b/src/nnet3/nnet-descriptor.cc index 162a55b8149..d02bc49a5af 100644 --- a/src/nnet3/nnet-descriptor.cc +++ b/src/nnet3/nnet-descriptor.cc @@ -107,9 +107,9 @@ void OffsetForwardingDescriptor::GetNodeDependencies( } Cindex OffsetForwardingDescriptor::MapToInput(const Index &ind) const { - Cindex answer = src_->MapToInput(ind); - answer.second = answer.second + offset_; - return answer; + Index ind_mod(ind); + ind_mod += offset_; + return src_->MapToInput(ind_mod); } @@ -173,12 +173,13 @@ void RoundingForwardingDescriptor::GetNodeDependencies( Cindex RoundingForwardingDescriptor::MapToInput(const Index &ind) const { KALDI_ASSERT(t_modulus_ >= 1); - Cindex ans = src_->MapToInput(ind); - int32 mod = ans.second.t % t_modulus_; + Index ind_mod(ind); + // unfortunately doing "mathematical" modulus is a bit painful in C. + int32 mod = ind_mod.t % t_modulus_; if (mod < 0) mod += t_modulus_; - ans.second.t -= mod; - return ans; + ind_mod.t -= mod; + return src_->MapToInput(ind_mod); } ForwardingDescriptor *RoundingForwardingDescriptor::Copy() const { @@ -199,15 +200,15 @@ void ReplaceIndexForwardingDescriptor::GetNodeDependencies( } Cindex ReplaceIndexForwardingDescriptor::MapToInput(const Index &ind) const { - Cindex ans = src_->MapToInput(ind); + Index ind_mod(ind); switch (variable_name_) { - case kT: ans.second.t = value_; break; - case kX: ans.second.x = value_; break; + case kT: ind_mod.t = value_; break; + case kX: ind_mod.x = value_; break; default: // kN or any other value is not allowed (doesn't make sense // to change the minibatch index in this way). KALDI_ERR << "Invalid variable name"; } - return ans; + return src_->MapToInput(ind_mod); } ForwardingDescriptor *ReplaceIndexForwardingDescriptor::Copy() const { diff --git a/src/nnet3/nnet-descriptor.h b/src/nnet3/nnet-descriptor.h index 93650e84307..e2d2c41772d 100644 --- a/src/nnet3/nnet-descriptor.h +++ b/src/nnet3/nnet-descriptor.h @@ -70,7 +70,7 @@ namespace nnet3 { ;; arguments ::= Switch(, [, ...]) ;; For use in clockwork RNNs or similar, Round() rounds the time-index t of the -;; requested Index to the next-lowest multiple of the integer +;; requested Index to the next-lowest multiple of the integer , ;; and evaluates the input argument for the resulting Index. ::= Round(, ) ;; is an integer ;; ReplaceIndex replaces some (t or x) in the requested Index From 977805cd53112acb0158180fee7f267cb81aa898 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 1 Jan 2017 20:18:30 -0500 Subject: [PATCH 179/530] Various code fixes and additional diagnostics --- src/chainbin/nnet3-chain-compute-prob.cc | 2 - src/nnet3/nnet-chain-diagnostics.cc | 2 +- src/nnet3/nnet-diagnostics.cc | 2 +- src/nnet3/nnet-diagnostics.h | 5 +- src/nnet3/nnet-optimize.cc | 104 ++++++++++++++++++----- src/nnet3/nnet-optimize.h | 42 +++++---- 6 files changed, 116 insertions(+), 41 deletions(-) diff --git a/src/chainbin/nnet3-chain-compute-prob.cc b/src/chainbin/nnet3-chain-compute-prob.cc index 7f9d688777a..830f1e8cee4 100644 --- a/src/chainbin/nnet3-chain-compute-prob.cc +++ b/src/chainbin/nnet3-chain-compute-prob.cc @@ -84,5 +84,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/nnet3/nnet-chain-diagnostics.cc b/src/nnet3/nnet-chain-diagnostics.cc index b6b39816337..54d73a6ead3 100644 --- a/src/nnet3/nnet-chain-diagnostics.cc +++ b/src/nnet3/nnet-chain-diagnostics.cc @@ -32,7 +32,7 @@ NnetChainComputeProb::NnetChainComputeProb( chain_config_(chain_config), den_graph_(den_fst, nnet.OutputDim("output")), nnet_(nnet), - compiler_(nnet, nnet_config_.optimize_config), + compiler_(nnet, nnet_config_.optimize_config, nnet_config_.compiler_config), deriv_nnet_(NULL), num_minibatches_processed_(0) { if (nnet_config_.compute_deriv) { diff --git a/src/nnet3/nnet-diagnostics.cc b/src/nnet3/nnet-diagnostics.cc index e7adeffeb09..d7de17682da 100644 --- a/src/nnet3/nnet-diagnostics.cc +++ b/src/nnet3/nnet-diagnostics.cc @@ -28,7 +28,7 @@ NnetComputeProb::NnetComputeProb(const NnetComputeProbOptions &config, config_(config), nnet_(nnet), deriv_nnet_(NULL), - compiler_(nnet), + compiler_(nnet, config_.optimize_config, config_.compiler_config), num_minibatches_processed_(0) { if (config_.compute_deriv) { deriv_nnet_ = new Nnet(nnet_); diff --git a/src/nnet3/nnet-diagnostics.h b/src/nnet3/nnet-diagnostics.h index 298548857dd..fd2ceb1df9e 100644 --- a/src/nnet3/nnet-diagnostics.h +++ b/src/nnet3/nnet-diagnostics.h @@ -46,6 +46,7 @@ struct NnetComputeProbOptions { bool compute_accuracy; NnetOptimizeOptions optimize_config; NnetComputeOptions compute_config; + CachingOptimizingCompilerOptions compiler_config; NnetComputeProbOptions(): debug_computation(false), compute_deriv(false), @@ -60,7 +61,9 @@ struct NnetComputeProbOptions { // register the optimization options with the prefix "optimization". ParseOptions optimization_opts("optimization", opts); optimize_config.Register(&optimization_opts); - + // register the compiler options with the prefix "compiler". + ParseOptions compiler_opts("compiler", opts); + compiler_config.Register(&compiler_opts); // register the compute options with the prefix "computation". ParseOptions compute_opts("computation", opts); compute_config.Register(&compute_opts); diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 54ebf17edc7..f024d68aed7 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -18,6 +18,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. +#include #include "nnet3/nnet-optimize.h" #include "nnet3/nnet-optimize-utils.h" #include "base/timer.h" @@ -546,6 +547,24 @@ size_t ComputationRequestHasher::operator() (const ComputationRequest *cr) const return ans; } + +CachingOptimizingCompiler::CachingOptimizingCompiler( + const Nnet &nnet, + const CachingOptimizingCompilerOptions config): + nnet_(nnet), config_(config), + seconds_taken_total_(0.0), seconds_taken_compile_(0.0), + seconds_taken_optimize_(0.0), seconds_taken_expand_(0.0), + seconds_taken_check_(0.0), seconds_taken_indexes_(0.0) { } + +CachingOptimizingCompiler::CachingOptimizingCompiler( + const Nnet &nnet, + const NnetOptimizeOptions &opt_config, + const CachingOptimizingCompilerOptions config): + nnet_(nnet), config_(config), opt_config_(opt_config), + seconds_taken_total_(0.0), seconds_taken_compile_(0.0), + seconds_taken_optimize_(0.0), seconds_taken_expand_(0.0), + seconds_taken_check_(0.0), seconds_taken_indexes_(0.0) { } + void CachingOptimizingCompiler::UpdateCache(const ComputationRequest *request, const NnetComputation *computation) { if (computation_cache_.size() == config_.cache_capacity) { @@ -615,12 +634,33 @@ CachingOptimizingCompiler::~CachingOptimizingCompiler() { delete itr->first; delete itr->second.first; } - KALDI_LOG << seconds_taken_ << " seconds taken in nnet3 compilation"; + std::ostringstream os; + double seconds_taken_misc = seconds_taken_total_ - seconds_taken_compile_ + - seconds_taken_optimize_ - seconds_taken_expand_ + - seconds_taken_check_ - seconds_taken_indexes_; + os << std::setprecision(3) << seconds_taken_total_ + << " seconds taken in nnet3 compilation total (breakdown: " + << seconds_taken_compile_ << " compilation, " + << seconds_taken_optimize_ << " optimization, " + << seconds_taken_expand_ << " shortcut expansion, " + << seconds_taken_check_ << " checking, " + << seconds_taken_indexes_ << " computing indexes, " + << seconds_taken_misc << " misc.)"; + KALDI_LOG << os.str(); + // note: the leftover amount is misc things like hashing and == comparisons on + // computation-requests, and calling RequestIsDecomposable(). } const NnetComputation* CachingOptimizingCompiler::Compile( const ComputationRequest &in_request) { Timer timer; + const NnetComputation *ans = CompileInternal(in_request); + seconds_taken_total_ += timer.Elapsed(); + return ans; +} + +const NnetComputation* CachingOptimizingCompiler::CompileInternal( + const ComputationRequest &in_request) { const NnetComputation *ans; // find computation in the cache CacheType::iterator cit = computation_cache_.find(&in_request); @@ -632,7 +672,6 @@ const NnetComputation* CachingOptimizingCompiler::Compile( UpdateAccessQueue(cit); ans = computation; } - seconds_taken_ += timer.Elapsed(); return ans; } @@ -658,7 +697,12 @@ const NnetComputation* CachingOptimizingCompiler::CompileNoShortcut( // There may be situations where we'd prefer not to keep it, for speed. CompilerOptions opts; NnetComputation *computation = new NnetComputation; - compiler.CreateComputation(opts, computation); + + { + Timer timer; + compiler.CreateComputation(opts, computation); + seconds_taken_compile_ += timer.Elapsed(); + } int32 verbose_cutoff = 4; if (GetVerboseLevel() >= verbose_cutoff) { @@ -669,28 +713,43 @@ const NnetComputation* CachingOptimizingCompiler::CompileNoShortcut( computation->Print(os2, nnet_); KALDI_LOG << "Generated computation is: " << os2.str(); } - { // some checking. Note: there may be a time when we might - // prefer not do to this checking. + { // some checking. Note: there may come a time when we might + // prefer to disable this checking. + Timer timer; CheckComputationOptions check_config; // we can do the rewrite check since it's before optimization. check_config.check_rewrite = true; ComputationChecker checker(check_config, nnet_, *computation); checker.Check(); + seconds_taken_check_ += timer.Elapsed(); } - Optimize(opt_config_, nnet_, - MaxOutputTimeInRequest(request), - computation); + + { + Timer timer; + Optimize(opt_config_, nnet_, + MaxOutputTimeInRequest(request), + computation); + seconds_taken_optimize_ += timer.Elapsed(); + } + + if (GetVerboseLevel() >= verbose_cutoff) { std::ostringstream os; computation->Print(os, nnet_); KALDI_LOG << "Optimized computation is: " << os.str(); } { // check the computation again. + Timer timer; CheckComputationOptions check_config; ComputationChecker checker(check_config, nnet_, *computation); checker.Check(); + seconds_taken_check_ += timer.Elapsed(); + } + { + Timer timer; + computation->ComputeCudaIndexes(); + seconds_taken_indexes_ += timer.Elapsed(); } - computation->ComputeCudaIndexes(); return computation; } @@ -705,11 +764,12 @@ const NnetComputation* CachingOptimizingCompiler::CompileViaShortcut( if (!RequestIsDecomposable(request, &mini_request, &num_n_values)) return NULL; - // by invoking Compile() on the mini request, we go through the same - // caching process as for any externally requested computation. - // note: this pointer is not being 'given to us'... it's owned in - // the cache. - const NnetComputation *mini_computation = Compile(mini_request); + // By invoking CompileInternal() on the mini request, we go through the same + // caching process as for any externally requested computation. [the only + // difference from Compile() is that it doesn't call the timer code; this + // avoids double-counting the time taken.] This pointer will not have to be + // deleted by this function; it's owned by the class, in the cache. + const NnetComputation *mini_computation = CompileInternal(mini_request); // note: by default we always create debug_info, even in regular compilation. // (e.g. it defaults to true in CompilerOptions). If it really seems to be a @@ -719,11 +779,17 @@ const NnetComputation* CachingOptimizingCompiler::CompileViaShortcut( NnetComputation *ans = new NnetComputation(); - ExpandComputation(nnet_, request.misc_info, *mini_computation, - need_debug_info, num_n_values, ans); - - ans->ComputeCudaIndexes(); - + { + Timer timer; + ExpandComputation(nnet_, request.misc_info, *mini_computation, + need_debug_info, num_n_values, ans); + seconds_taken_expand_ += timer.Elapsed(); + } + { + Timer timer; + ans->ComputeCudaIndexes(); + seconds_taken_indexes_ += timer.Elapsed(); + } return ans; } diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index ab0721e802a..bbe5269c982 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -206,16 +206,13 @@ class CachingOptimizingCompiler { public: CachingOptimizingCompiler(const Nnet &nnet, const CachingOptimizingCompilerOptions config = - CachingOptimizingCompilerOptions()): - nnet_(nnet), config_(config), seconds_taken_(0.0) { } + CachingOptimizingCompilerOptions()); /// Note: nnet is retained as a const reference but opt_config is copied. CachingOptimizingCompiler(const Nnet &nnet, const NnetOptimizeOptions &opt_config, const CachingOptimizingCompilerOptions config = - CachingOptimizingCompilerOptions()): - nnet_(nnet), config_(config), opt_config_(opt_config), - seconds_taken_(0.0) { } + CachingOptimizingCompilerOptions()); ~CachingOptimizingCompiler(); /// Does the compilation and returns a const pointer to @@ -226,11 +223,17 @@ class CachingOptimizingCompiler { void ReadCache(std::istream &is, bool binary); void WriteCache(std::ostream &os, bool binary) const; private: - // This function, called from Compile(), is called when a ComputationRequest - // has been determined not to have already been cached. It otherwise has the - // same interface as Compile(), but assumes that there is nothing cached for - // this computation as yet. It compiles the computation and takes care of - // caching it. + + // This function just implements the work of Compile(); it's made a separate + // function for the convenience of the timer code, to avoid it being called + // twice (we also call this function directly from inside the class). + const NnetComputation* CompileInternal(const ComputationRequest &request); + + // This function, called from CompileInternal(), is called when a + // ComputationRequest has been determined not to have already been cached. It + // otherwise has the same interface as CompileInternal(), but assumes that + // there is nothing cached for this computation as yet. It compiles the + // computation and takes care of caching it. const NnetComputation* CompileAndCache(const ComputationRequest &request); @@ -274,13 +277,18 @@ class CachingOptimizingCompiler { ComputationRequestPtrEqual> CacheType; CacheType computation_cache_; - // time spent in compilation-- for diagnostic messages - double seconds_taken_; - - // This function updates the computation cache. It is called within Compile(). - // It takes ownership of the pointers. It inserts the request at the end of - // the queue, and purges the least-recently-accessed request from the queue and - // the cache if the capacity is reached. + // seconds spent in various phases of compilation-- for diagnostic messages + double seconds_taken_total_; + double seconds_taken_compile_; + double seconds_taken_optimize_; + double seconds_taken_expand_; + double seconds_taken_check_; + double seconds_taken_indexes_; + + // This function updates the computation cache. It is called within + // CompileInternal(). It takes ownership of the pointers. It inserts the + // request at the end of the queue, and purges the least-recently-accessed + // request from the queue and the cache if the capacity is reached. void UpdateCache(const ComputationRequest *request, const NnetComputation *computation); // This function updates the recently accessed queue. From 5d1f524730843f1cf5442335ee255b2ecb9a16f5 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 15:48:28 -0500 Subject: [PATCH 180/530] [scripts] Remove temporary files in get_egs_discriminative.sh (#1300) --- egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh index f9a8d677e09..c3baa5dbbc8 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh @@ -445,10 +445,13 @@ fi if [ $stage -le 7 ]; then echo "$0: removing temporary archives" - ( - cd $dir - for f in $(ls -l . | grep 'degs_orig' | awk '{ X=NF-1; Y=NF-2; if ($X == "->") print $Y, $NF; }'); do rm $f; done - ) + for x in $(seq $nj); do + for y in $(seq $num_archives_intermediate); do + file=$dir/degs_orig.$x.$y.ark + [ -L $file ] && rm $(readlink -f $file) + rm $file + done + done if [ $archives_multiple -gt 1 ]; then # there are some extra soft links that we should delete. for f in $dir/degs.*.*.ark; do rm $f; done From cefc844d041165c55b3f36060ef4467ad3596b9e Mon Sep 17 00:00:00 2001 From: Xingyu Na Date: Tue, 3 Jan 2017 04:59:07 +0800 Subject: [PATCH 181/530] [egs][scripts] Add pitch recipe for nnet2/3 online setup (hkust) (#1280) --- egs/hkust/README.txt | 3 - egs/hkust/s5/RESULTS | 43 ++-- egs/hkust/s5/conf/mfcc_hires.conf | 10 + egs/hkust/s5/conf/online_cmvn.conf | 1 + egs/hkust/s5/conf/online_pitch.conf | 1 + egs/hkust/s5/conf/pitch.conf | 1 + egs/hkust/s5/local/chain/run_tdnn.sh | 211 ++++++++++++++++++ egs/hkust/s5/local/create_oov_char_lexicon.pl | 4 +- egs/hkust/s5/local/hkust_data_prep.sh | 97 +++----- egs/hkust/s5/local/hkust_format_data.sh | 2 +- egs/hkust/s5/local/hkust_prepare_dict.sh | 159 +++++++------ egs/hkust/s5/local/hkust_segment.py | 5 +- egs/hkust/s5/local/hkust_train_lms.sh | 37 +-- .../s5/local/nnet3/run_ivector_common.sh | 68 +++--- egs/hkust/s5/local/nnet3/run_tdnn.sh | 125 ++++++----- egs/hkust/s5/local/online/run_nnet2_common.sh | 108 +++++++++ egs/hkust/s5/local/online/run_nnet2_ms.sh | 108 +++++++++ egs/hkust/s5/local/run_discriminative.sh | 31 +++ egs/hkust/s5/local/run_sgmm.sh | 22 ++ egs/hkust/s5/local/score.sh | 38 +--- egs/hkust/s5/local/score_basic.sh | 107 --------- egs/hkust/s5/local/score_sclite.sh | 70 ------ egs/hkust/s5/local/score_sclite_conf.sh | 72 ------ egs/hkust/s5/local/wer_hyp_filter | 19 ++ egs/hkust/s5/local/wer_ref_filter | 19 ++ egs/hkust/s5/path.sh | 2 +- egs/hkust/s5/run.sh | 83 +++---- egs/wsj/s5/steps/select_feats.sh | 3 + egs/wsj/s5/utils/data/limit_feature_dim.sh | 48 ++++ tools/extras/install_kaldi_lm.sh | 34 +++ tools/extras/install_mmseg.sh | 68 ++++++ 31 files changed, 979 insertions(+), 620 deletions(-) create mode 100644 egs/hkust/s5/conf/mfcc_hires.conf create mode 100644 egs/hkust/s5/conf/online_cmvn.conf create mode 100644 egs/hkust/s5/conf/online_pitch.conf create mode 100644 egs/hkust/s5/conf/pitch.conf create mode 100755 egs/hkust/s5/local/chain/run_tdnn.sh mode change 100644 => 100755 egs/hkust/s5/local/create_oov_char_lexicon.pl create mode 100755 egs/hkust/s5/local/online/run_nnet2_common.sh create mode 100755 egs/hkust/s5/local/online/run_nnet2_ms.sh create mode 100755 egs/hkust/s5/local/run_discriminative.sh create mode 100755 egs/hkust/s5/local/run_sgmm.sh mode change 100755 => 120000 egs/hkust/s5/local/score.sh delete mode 100755 egs/hkust/s5/local/score_basic.sh delete mode 100755 egs/hkust/s5/local/score_sclite.sh delete mode 100755 egs/hkust/s5/local/score_sclite_conf.sh create mode 100755 egs/hkust/s5/local/wer_hyp_filter create mode 100755 egs/hkust/s5/local/wer_ref_filter create mode 100755 egs/wsj/s5/utils/data/limit_feature_dim.sh create mode 100755 tools/extras/install_kaldi_lm.sh create mode 100755 tools/extras/install_mmseg.sh diff --git a/egs/hkust/README.txt b/egs/hkust/README.txt index 5dbde98b539..752ae0e0897 100644 --- a/egs/hkust/README.txt +++ b/egs/hkust/README.txt @@ -6,6 +6,3 @@ LDC2005S15 : http://www.ldc.upenn.edu/Catalog/catalogEntry.jsp?catalogId=LDC2005 LDC2005T32 : http://www.ldc.upenn.edu/Catalog/catalogEntry.jsp?catalogId=LDC2005T32 s5: The experiments here were based on the above corpus - - - diff --git a/egs/hkust/s5/RESULTS b/egs/hkust/s5/RESULTS index 3c4933bbbb0..6886d21f975 100644 --- a/egs/hkust/s5/RESULTS +++ b/egs/hkust/s5/RESULTS @@ -1,15 +1,30 @@ # for x in exp/*/decode; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done -%WER 80.89 [ 45422 / 56154, 1530 ins, 11018 del, 32874 sub ] exp/mono0a/decode/cer_9 -%WER 60.01 [ 33698 / 56154, 2528 ins, 5961 del, 25209 sub ] exp/tri1/decode/cer_12 -%WER 59.68 [ 33514 / 56154, 2574 ins, 5752 del, 25188 sub ] exp/tri2/decode/cer_12 -%WER 57.25 [ 32148 / 56154, 2484 ins, 5811 del, 23853 sub ] exp/tri3a/decode/cer_13 -%WER 53.47 [ 30026 / 56154, 2789 ins, 5115 del, 22122 sub ] exp/tri4a/decode/cer_13 -%WER 49.72 [ 27921 / 56154, 2833 ins, 4568 del, 20520 sub ] exp/tri5a/decode/cer_13 -%WER 43.95 [ 24681 / 56154, 2106 ins, 3890 del, 18685 sub ] exp/tri5a_mmi_b0.1/decode/cer_10 -%WER 44.60 [ 25044 / 56154, 2121 ins, 4040 del, 18883 sub ] exp/tri5a_mpe/decode/cer_11 -%WER 43.81 [ 24602 / 56154, 2843 ins, 3751 del, 18008 sub ] exp/sgmm2_5a/decode/cer_10 -exp/tri5a_mce/decode/cer_11:%WER 44.74 [ 25125 / 56154, 2112 ins, 4108 del, 18905 sub ] +%WER 80.72 [ 45327 / 56154, 1609 ins, 10856 del, 32862 sub ] exp/mono0a/decode/cer_9 +%WER 58.86 [ 33054 / 56154, 2651 ins, 6240 del, 24163 sub ] exp/tri1/decode/cer_13 +%WER 58.32 [ 32748 / 56154, 2491 ins, 6279 del, 23978 sub ] exp/tri2/decode/cer_14 +%WER 56.49 [ 31719 / 56154, 2601 ins, 5979 del, 23139 sub ] exp/tri3a/decode/cer_13 +%WER 51.75 [ 29060 / 56154, 2879 ins, 5088 del, 21093 sub ] exp/tri4a/decode/cer_13 +%WER 47.36 [ 26596 / 56154, 2740 ins, 4577 del, 19279 sub ] exp/tri5a/decode/cer_13 +%WER 42.55 [ 23894 / 56154, 1877 ins, 4437 del, 17580 sub ] exp/tri5a_mpe/decode/cer_13 +%WER 42.19 [ 23693 / 56154, 2138 ins, 3871 del, 17684 sub ] exp/tri5a_mmi_b0.1/decode/cer_10 +%WER 41.11 [ 23086 / 56154, 2863 ins, 3608 del, 16615 sub ] exp/sgmm2_5a/decode/cer_10 +# nnet2 online results +%WER 38.32 [ 21518 / 56154, 2344 ins, 4273 del, 14901 sub ] exp/nnet2_online/nnet_ms/decode/cer_12 +%WER 38.01 [ 21345 / 56154, 2555 ins, 4173 del, 14617 sub ] exp/nnet2_online/nnet_ms_online/decode/cer_12 +%WER 37.10 [ 20832 / 56154, 2399 ins, 3936 del, 14497 sub ] exp/nnet2_online/nnet_ms_online/decode_per_utt/cer_12 + +# nnet3 online results +%WER 32.77 [ 18400 / 56154, 1971 ins, 3525 del, 12904 sub ] exp/nnet3/tdnn_sp/decode/cer_10 +%WER 33.02 [ 18540 / 56154, 2335 ins, 3251 del, 12954 sub ] exp/nnet3/tdnn_sp_online/decode/cer_9 +%WER 34.01 [ 19098 / 56154, 2195 ins, 3482 del, 13421 sub ] exp/nnet3/tdnn_sp_online/decode_per_utt/cer_10 + +# chain online results +%WER 28.24 [ 15858 / 56154, 1454 ins, 3415 del, 10989 sub ] exp/chain/tdnn_7h_sp/decode/cer_10 +%WER 28.16 [ 15812 / 56154, 1648 ins, 2824 del, 11340 sub ] exp/chain/tdnn_7h_sp_online/decode/cer_9 +%WER 29.55 [ 16594 / 56154, 1547 ins, 3437 del, 11610 sub ] exp/chain/tdnn_7h_sp_online/decode_per_utt/cer_10 + +## results before adding pitch # nnet1 results exp/dnn5b_pretrain-dbn_dnn/decode/cer_10:%WER 39.42 [ 22134 / 56154, 2507 ins, 3730 del, 15897 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode/cer_11:%WER 36.50 [ 20499 / 56154, 1915 ins, 3312 del, 15272 sub ] @@ -18,11 +33,11 @@ exp/cnn5c/decode/cer_10:%WER 40.13 [ 22536 / 56154, 2329 ins, 3962 del, 16245 su exp/cnn5c_pretrain-dbn_dnn/decode/cer_10:%WER 38.80 [ 21790 / 56154, 2470 ins, 3582 del, 15738 sub ] exp/lstm5e/decode/cer_10:%WER 37.61 [ 21121 / 56154, 1829 ins, 3941 del, 15351 sub ] -# nnet2 results +# nnet2 mfcc results exp/nnet2_5d/decode/cer_10:%WER 38.59 [ 21669 / 56154, 2498 ins, 3581 del, 15590 sub ] -# ConvNet with 2 convolutional layers and 2 ReLU layers +# ConvNet using fbank, with 2 convolutional layers and 2 ReLU layers exp/nnet2_convnet/decode/cer_10:%WER 41.19 [ 23129 / 56154, 2599 ins, 3782 del, 16748 sub ] -# nnet3 results (using speed perturbed data) +# nnet3 mfcc results (using speed perturbed data) exp/nnet3/tdnn_sp/decode_dev/cer_10:%WER 33.79 [ 18977 / 56154, 2027 ins, 3485 del, 13465 sub ] -exp/nnet3/lstm_sp_ld5/decode_dev/cer_9:%WER 33.51 [ 18815 / 56154, 1813 ins, 3249 del, 13753 sub ] \ No newline at end of file +exp/nnet3/lstm_sp_ld5/decode_dev/cer_9:%WER 33.51 [ 18815 / 56154, 1813 ins, 3249 del, 13753 sub ] diff --git a/egs/hkust/s5/conf/mfcc_hires.conf b/egs/hkust/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..d870ab04c38 --- /dev/null +++ b/egs/hkust/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=8000 # Switchboard is sampled at 8kHz +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800) diff --git a/egs/hkust/s5/conf/online_cmvn.conf b/egs/hkust/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/hkust/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/hkust/s5/conf/online_pitch.conf b/egs/hkust/s5/conf/online_pitch.conf new file mode 100644 index 00000000000..926bcfca92a --- /dev/null +++ b/egs/hkust/s5/conf/online_pitch.conf @@ -0,0 +1 @@ +--sample-frequency=8000 diff --git a/egs/hkust/s5/conf/pitch.conf b/egs/hkust/s5/conf/pitch.conf new file mode 100644 index 00000000000..926bcfca92a --- /dev/null +++ b/egs/hkust/s5/conf/pitch.conf @@ -0,0 +1 @@ +--sample-frequency=8000 diff --git a/egs/hkust/s5/local/chain/run_tdnn.sh b/egs/hkust/s5/local/chain/run_tdnn.sh new file mode 100755 index 00000000000..4829e9736ca --- /dev/null +++ b/egs/hkust/s5/local/chain/run_tdnn.sh @@ -0,0 +1,211 @@ +#!/bin/bash + +# This script is based on tun_tdnn_7h.sh in swbd chain recipe. + +set -e + +# configs for 'chain' +affix= +stage=12 +train_stage=-10 +get_egs_stage=-10 +dir=exp/chain/tdnn_7h # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= + +# training options +num_epochs=4 +initial_effective_lrate=0.001 +final_effective_lrate=0.0001 +leftmost_questions_truncate=-1 +max_param_change=2.0 +final_layer_normalize_target=0.5 +num_jobs_initial=2 +num_jobs_final=12 +minibatch_size=128 +frames_per_eg=150 +remove_egs=true +common_egs_dir= +xent_regularize=0.1 + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 5000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=43 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=625 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=625 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=625 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=625 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=625 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=625 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=625 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=625 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/hkust-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_eg \ + --trainer.num-chunk-per-minibatch $minibatch_size \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.num-jobs-initial $num_jobs_initial \ + --trainer.optimization.num-jobs-final $num_jobs_final \ + --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ + --trainer.optimization.final-effective-lrate $final_effective_lrate \ + --trainer.max-param-change $max_param_change \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri5a_sp_lats \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +graph_dir=$dir/graph +if [ $stage -le 15 ]; then + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 10 --cmd "$decode_cmd" $iter_opts \ + --online-ivector-dir exp/nnet3/ivectors_dev \ + $graph_dir data/dev_hires $dir/decode || exit 1; +fi + +if [ $stage -le 16 ]; then + steps/online/nnet3/prepare_online_decoding.sh --mfcc-config conf/mfcc_hires.conf \ + --add-pitch true \ + data/lang exp/nnet2_online/extractor "$dir" ${dir}_online || exit 1; +fi + +if [ $stage -le 17 ]; then + # do the actual online decoding with iVectors, carrying info forward from + # previous utterances of the same speaker. + steps/online/nnet3/decode.sh --config conf/decode.config \ + --cmd "$decode_cmd" --nj 10 --acwt 1.0 --post-decode-acwt 10.0 \ + "$graph_dir" data/dev_hires \ + ${dir}_online/decode || exit 1; +fi + +if [ $stage -le 18 ]; then + # this version of the decoding treats each utterance separately + # without carrying forward speaker information. + steps/online/nnet3/decode.sh --config conf/decode.config \ + --cmd "$decode_cmd" --nj 10 --per-utt true --acwt 1.0 --post-decode-acwt 10.0 \ + "$graph_dir" data/dev_hires \ + ${dir}_online/decode_per_utt || exit 1; +fi diff --git a/egs/hkust/s5/local/create_oov_char_lexicon.pl b/egs/hkust/s5/local/create_oov_char_lexicon.pl old mode 100644 new mode 100755 index aaf5d3bcb9b..0c146c9a123 --- a/egs/hkust/s5/local/create_oov_char_lexicon.pl +++ b/egs/hkust/s5/local/create_oov_char_lexicon.pl @@ -1,5 +1,5 @@ -#!/usr/bin/perl -# Copyright 2016 LeSpeech (Author: Xingyu Na) +#!/usr/bin/env perl +# Copyright 2016 Alibaba Robotics Corp. (Author: Xingyu Na) # # A script for char-based Chinese OOV lexicon generation. # diff --git a/egs/hkust/s5/local/hkust_data_prep.sh b/egs/hkust/s5/local/hkust_data_prep.sh index 9fb6fe07cbb..07f3c9677d8 100755 --- a/egs/hkust/s5/local/hkust_data_prep.sh +++ b/egs/hkust/s5/local/hkust_data_prep.sh @@ -1,98 +1,73 @@ #!/bin/bash - -. path.sh +. ./path.sh || exit 1; if [ $# != 2 ]; then - echo "Usage: hkust_data_prep.sh AUDIO_PATH TEXT_PATH" - exit 1; + echo "Usage: $0 " + echo " $0 /export/corpora/LDC03S04 /export/corpora/LDC03T19" + exit 1; fi -HKUST_AUDIO_DIR=$1 -HKUST_TEXT_DIR=$2 +hkust_audio_dir=$1 +hkust_text_dir=$2 train_dir=data/local/train dev_dir=data/local/dev - -case 0 in #goto here - 1) -;; #here: -esac - mkdir -p $train_dir mkdir -p $dev_dir #data directory check -if [ ! -d $HKUST_AUDIO_DIR ] || [ ! -d $HKUST_TEXT_DIR ]; then - echo "Error: run.sh requires two directory arguments" +if [ ! -d $hkust_audio_dir ] || [ ! -d $hkust_text_dir ]; then + echo "Error: $0 requires two directory arguments" exit 1; fi #find sph audio file for train dev resp. -find $HKUST_AUDIO_DIR -iname "*.sph" | grep -i "audio/train" > $train_dir/sph.flist -find $HKUST_AUDIO_DIR -iname "*.sph" | grep -i "audio/dev" > $dev_dir/sph.flist +find $hkust_audio_dir -iname "*.sph" | grep -i "audio/train" > $train_dir/sph.flist || exit 1; +find $hkust_audio_dir -iname "*.sph" | grep -i "audio/dev" > $dev_dir/sph.flist || exit 1; n=`cat $train_dir/sph.flist $dev_dir/sph.flist | wc -l` [ $n -ne 897 ] && \ echo Warning: expected 897 data data files, found $n - #Transcriptions preparation #collect all trans, convert encodings to utf-8, -find $HKUST_TEXT_DIR -iname "*.txt" | grep -i "trans/train" | xargs cat |\ +find $hkust_text_dir -iname "*.txt" | grep -i "trans/train" | xargs cat |\ iconv -f GBK -t utf-8 - | perl -e ' while () { @A = split(" ", $_); if (@A <= 1) { next; } - if ($A[0] eq "#") { $utt_id = $A[1]; } + if ($A[0] eq "#") { $utt_id = $A[1]; } if (@A >= 3) { - $A[2] =~ s:^([AB])\:$:$1:; - printf "%s-%s-%06.0f-%06.0f", $utt_id, $A[2], 100*$A[0] + 0.5, 100*$A[1] + 0.5; - for($n = 3; $n < @A; $n++) { print " $A[$n]" }; - print "\n"; + $A[2] =~ s:^([AB])\:$:$1:; + printf "%s-%s-%06.0f-%06.0f", $utt_id, $A[2], 100*$A[0] + 0.5, 100*$A[1] + 0.5; + for($n = 3; $n < @A; $n++) { print " $A[$n]" }; + print "\n"; } } - ' | sort -k1 > $train_dir/transcripts.txt + ' | sort -k1 > $train_dir/transcripts.txt || exit 1; -find $HKUST_TEXT_DIR -iname "*.txt" | grep -i "trans/dev" | xargs cat |\ +find $hkust_text_dir -iname "*.txt" | grep -i "trans/dev" | xargs cat |\ iconv -f GBK -t utf-8 - | perl -e ' while () { @A = split(" ", $_); if (@A <= 1) { next; } - if ($A[0] eq "#") { $utt_id = $A[1]; } + if ($A[0] eq "#") { $utt_id = $A[1]; } if (@A >= 3) { - $A[2] =~ s:^([AB])\:$:$1:; - printf "%s-%s-%06.0f-%06.0f", $utt_id, $A[2], 100*$A[0] + 0.5, 100*$A[1] + 0.5; - for($n = 3; $n < @A; $n++) { print " $A[$n]" }; - print "\n"; + $A[2] =~ s:^([AB])\:$:$1:; + printf "%s-%s-%06.0f-%06.0f", $utt_id, $A[2], 100*$A[0] + 0.5, 100*$A[1] + 0.5; + for($n = 3; $n < @A; $n++) { print " $A[$n]" }; + print "\n"; } } - ' | sort -k1 > $dev_dir/transcripts.txt - + ' | sort -k1 > $dev_dir/transcripts.txt || exit 1; - -#transcripts normalization and segmentation +#transcripts normalization and segmentation #(this needs external tools), -#Download and configure segment tools -pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'` -export PYTHONPATH=$PYTHONPATH:`pwd`/tools/mmseg-1.3.0/lib/python${pyver}/site-packages -if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then - echo "--- Downloading mmseg-1.3.0 ..." - echo "NOTE: it assumes that you have Python, Setuptools installed on your system!" - wget -P tools http://pypi.python.org/packages/source/m/mmseg/mmseg-1.3.0.tar.gz - tar xf tools/mmseg-1.3.0.tar.gz -C tools - cd tools/mmseg-1.3.0 - mkdir -p lib/python${pyver}/site-packages - python setup.py build - python setup.py install --prefix=. - cd ../.. - if [ ! -d tools/mmseg-1.3.0/lib/python${pyver}/site-packages ]; then - echo "mmseg is not found - installation failed?" - exit 1 - fi -fi +python -c "import mmseg" 2>/dev/null || \ + (echo "mmseg is not found. Checkout tools/extra/install_mmseg.sh" && exit 1;) cat $train_dir/transcripts.txt |\ sed -e 's// /g' |\ @@ -101,7 +76,7 @@ cat $train_dir/transcripts.txt |\ sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\ local/hkust_normalize.pl |\ python local/hkust_segment.py |\ - awk '{if (NF > 1) print $0;}' > $train_dir/text + awk '{if (NF > 1) print $0;}' > $train_dir/text || exit 1; cat $dev_dir/transcripts.txt |\ sed -e 's// /g' |\ @@ -110,11 +85,11 @@ cat $dev_dir/transcripts.txt |\ sed -e 's/((\([^)]\{0,\}\)))/\1/g' |\ local/hkust_normalize.pl |\ python local/hkust_segment.py |\ - awk '{if (NF > 1) print $0;}' > $dev_dir/text + awk '{if (NF > 1) print $0;}' > $dev_dir/text || exit 1; # some data is corrupted. Delete them cat $train_dir/text | grep -v 20040527_210939_A901153_B901154-A-035691-035691 | egrep -v "A:|B:" > tmp -mv tmp $train_dir/text +mv tmp $train_dir/text || exit 1; #Make segment files from transcript #segments file format is: utt-id side-id start-time end-time, e.g.: @@ -129,16 +104,14 @@ awk '{ segment=$1; split(segment,S,"-"); side=S[2]; audioname=S[1];startf=S[3];e print segment " " audioname "-" side " " startf/100 " " endf/100}' <$dev_dir/text > $dev_dir/segments awk '{name = $0; gsub(".sph$","",name); gsub(".*/","",name); print(name " " $0)}' $dev_dir/sph.flist > $dev_dir/sph.scp - - sph2pipe=`cd ../../..; echo $PWD/tools/sph2pipe_v2.5/sph2pipe` [ ! -f $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1; -cat $train_dir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); +cat $train_dir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ sort > $train_dir/wav.scp || exit 1; -cat $dev_dir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); +cat $dev_dir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ sort > $dev_dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -162,6 +135,6 @@ cat $train_dir/utt2spk | sort -k 2 | utils/utt2spk_to_spk2utt.pl > $train_dir/sp cat $dev_dir/segments | awk '{spk=substr($1,1,33); print $1 " " spk}' > $dev_dir/utt2spk || exit 1; cat $dev_dir/utt2spk | sort -k 2 | utils/utt2spk_to_spk2utt.pl > $dev_dir/spk2utt || exit 1; -echo HKUST data preparation succeeded - -exit 1; +echo "$0: HKUST data preparation succeeded" + +exit; diff --git a/egs/hkust/s5/local/hkust_format_data.sh b/egs/hkust/s5/local/hkust_format_data.sh index 33cf8fa22ef..7fc9b701f49 100755 --- a/egs/hkust/s5/local/hkust_format_data.sh +++ b/egs/hkust/s5/local/hkust_format_data.sh @@ -1,7 +1,7 @@ #!/bin/bash # -if [ -f path.sh ]; then . path.sh; fi +if [ -f ./path.sh ]; then . ./path.sh; fi silprob=0.5 mkdir -p data/lang_test data/train data/dev diff --git a/egs/hkust/s5/local/hkust_prepare_dict.sh b/egs/hkust/s5/local/hkust_prepare_dict.sh index 49d2f8feff8..5cd864c52cc 100755 --- a/egs/hkust/s5/local/hkust_prepare_dict.sh +++ b/egs/hkust/s5/local/hkust_prepare_dict.sh @@ -2,74 +2,59 @@ # Copyright 2016 LeSpeech (Author: Xingyu Na) # prepare dictionary for HKUST -# it is done for English and Chinese separately, +# it is done for English and Chinese separately, # For English, we use CMU dictionary, and Sequitur G2P # for OOVs, while all englist phone set will concert to Chinese # phone set at the end. For Chinese, we use an online dictionary, # for OOV, we just produce pronunciation using Charactrt Mapping. - -. path.sh -[ $# != 0 ] && echo "Usage: local/hkust_prepare_dict.sh" && exit 1; +. ./path.sh + +[ $# != 0 ] && echo "Usage: $0" && exit 1; train_dir=data/local/train dev_dir=data/local/dev dict_dir=data/local/dict mkdir -p $dict_dir mkdir -p $dict_dir/lexicon-{en,ch} - + # extract full vocabulary cat $train_dir/text $dev_dir/text | awk '{for (i = 2; i <= NF; i++) print $i}' |\ sed -e 's/ /\n/g' | sort -u | grep -v '\[LAUGHTER\]' | grep -v '\[NOISE\]' |\ - grep -v '\[VOCALIZED-NOISE\]' > $dict_dir/words.txt + grep -v '\[VOCALIZED-NOISE\]' > $dict_dir/words.txt || exit 1; # split into English and Chinese -cat $dict_dir/words.txt | grep '[a-zA-Z]' > $dict_dir/lexicon-en/words-en.txt -cat $dict_dir/words.txt | grep -v '[a-zA-Z]' > $dict_dir/lexicon-ch/words-ch.txt +cat $dict_dir/words.txt | grep '[a-zA-Z]' > $dict_dir/lexicon-en/words-en.txt || exit 1; +cat $dict_dir/words.txt | grep -v '[a-zA-Z]' > $dict_dir/lexicon-ch/words-ch.txt || exit 1; -##### produce pronunciations for english +##### produce pronunciations for english if [ ! -f $dict_dir/cmudict/cmudict.0.7a ]; then echo "--- Downloading CMU dictionary ..." svn co -r 13068 https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict \ $dict_dir/cmudict || exit 1; fi +# format cmudict echo "--- Striping stress and pronunciation variant markers from cmudict ..." perl $dict_dir/cmudict/scripts/make_baseform.pl \ $dict_dir/cmudict/cmudict.0.7a /dev/stdout |\ - sed -e 's:^\([^\s(]\+\)([0-9]\+)\(\s\+\)\(.*\):\1\2\3:' > $dict_dir/cmudict/cmudict-plain.txt + sed -e 's:^\([^\s(]\+\)([0-9]\+)\(\s\+\)\(.*\):\1\2\3:' > $dict_dir/cmudict/cmudict-plain.txt || exit 1; +# extract in-vocab lexicon and oov words echo "--- Searching for English OOV words ..." -gawk 'NR==FNR{words[$1]; next;} !($1 in words)' \ +awk 'NR==FNR{words[$1]; next;} !($1 in words)' \ $dict_dir/cmudict/cmudict-plain.txt $dict_dir/lexicon-en/words-en.txt |\ - egrep -v '<.?s>' > $dict_dir/lexicon-en/words-en-oov.txt + egrep -v '<.?s>' > $dict_dir/lexicon-en/words-en-oov.txt || exit 1; -gawk 'NR==FNR{words[$1]; next;} ($1 in words)' \ +awk 'NR==FNR{words[$1]; next;} ($1 in words)' \ $dict_dir/lexicon-en/words-en.txt $dict_dir/cmudict/cmudict-plain.txt |\ - egrep -v '<.?s>' > $dict_dir/lexicon-en/lexicon-en-iv.txt + egrep -v '<.?s>' > $dict_dir/lexicon-en/lexicon-en-iv.txt || exit 1; wc -l $dict_dir/lexicon-en/words-en-oov.txt wc -l $dict_dir/lexicon-en/lexicon-en-iv.txt -pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'` -export PYTHONPATH=$PYTHONPATH:`pwd`/tools/g2p/lib/python${pyver}/site-packages -if [ ! -f tools/g2p/lib/python${pyver}/site-packages/g2p.py ]; then - echo "--- Downloading Sequitur G2P ..." - echo "NOTE: it assumes that you have Python, NumPy and SWIG installed on your system!" - wget -P tools http://www-i6.informatik.rwth-aachen.de/web/Software/g2p-r1668.tar.gz - tar xf tools/g2p-r1668.tar.gz -C tools - cd tools/g2p - echo '#include ' >> Utility.hh # won't compile on my system w/o this "patch" - python setup.py build - python setup.py install --prefix=. - cd ../.. - if [ ! -f tools/g2p/lib/python${pyver}/site-packages/g2p.py ]; then - echo "Sequitur G2P is not found - installation failed?" - exit 1 - fi -fi - +# setup g2p and generate oov lexicon if [ ! -f conf/g2p_model ]; then echo "--- Downloading a pre-trained Sequitur G2P model ..." wget http://sourceforge.net/projects/kaldi/files/sequitur-model4 -O conf/g2p_model @@ -80,24 +65,31 @@ if [ ! -f conf/g2p_model ]; then fi echo "--- Preparing pronunciations for OOV words ..." -python tools/g2p/lib/python${pyver}/site-packages/g2p.py \ - --model=conf/g2p_model --apply $dict_dir/lexicon-en/words-en-oov.txt > $dict_dir/lexicon-en/lexicon-en-oov.txt +g2p=`which g2p.py` +if [ ! -x $g2p ]; then + echo "g2p.py is not found. Checkout tools/extras/install_sequitur.sh." + exit 1 +fi +g2p.py --model=conf/g2p_model --apply $dict_dir/lexicon-en/words-en-oov.txt \ + > $dict_dir/lexicon-en/lexicon-en-oov.txt || exit 1; +# merge in-vocab and oov lexicon cat $dict_dir/lexicon-en/lexicon-en-oov.txt $dict_dir/lexicon-en/lexicon-en-iv.txt |\ - sort > $dict_dir/lexicon-en/lexicon-en-phn.txt + sort > $dict_dir/lexicon-en/lexicon-en-phn.txt || exit 1; +# convert cmu phoneme to pinyin phonenme mkdir $dict_dir/map -cat conf/cmu2pinyin | awk '{print $1;}' | sort -u > $dict_dir/map/cmu +cat conf/cmu2pinyin | awk '{print $1;}' | sort -u > $dict_dir/map/cmu || exit 1; cat conf/pinyin2cmu | awk -v cmu=$dict_dir/map/cmu \ 'BEGIN{while((getline $dict_dir/map/cmu-used + {for (i = 2; i <=NF; i++) if (dict[$i]) print $i;}' | sort -u > $dict_dir/map/cmu-used || exit 1; cat $dict_dir/map/cmu | awk -v cmu=$dict_dir/map/cmu-used \ 'BEGIN{while((getline $dict_dir/map/cmu-not-used + {if (!dict[$1]) print $1;}' > $dict_dir/map/cmu-not-used || exit 1; -gawk 'NR==FNR{words[$1]; next;} ($1 in words)' \ +awk 'NR==FNR{words[$1]; next;} ($1 in words)' \ $dict_dir/map/cmu-not-used conf/cmu2pinyin |\ - egrep -v '<.?s>' > $dict_dir/map/cmu-py + egrep -v '<.?s>' > $dict_dir/map/cmu-py || exit 1; cat $dict_dir/map/cmu-py | \ perl -e ' @@ -116,9 +108,9 @@ cat $dict_dir/map/cmu-py | \ push(@entry, $W); for($i = 0; $i < @A; $i++) { push(@entry, @{$py2ph{$A[$i]}}); } print "@entry"; - print "\n"; - } -' conf/pinyin2cmu > $dict_dir/map/cmu-cmu + print "\n"; + } +' conf/pinyin2cmu > $dict_dir/map/cmu-cmu || exit 1; cat $dict_dir/lexicon-en/lexicon-en-phn.txt | \ perl -e ' @@ -135,45 +127,46 @@ cat $dict_dir/lexicon-en/lexicon-en-phn.txt | \ @entry = (); $W = shift(@A); push(@entry, $W); - for($i = 0; $i < @A; $i++) { + for($i = 0; $i < @A; $i++) { if (exists $py2ph{$A[$i]}) { push(@entry, @{$py2ph{$A[$i]}}); } else {push(@entry, $A[$i])}; } print "@entry"; - print "\n"; + print "\n"; } -' $dict_dir/map/cmu-cmu > $dict_dir/lexicon-en/lexicon-en.txt +' $dict_dir/map/cmu-cmu > $dict_dir/lexicon-en/lexicon-en.txt || exit 1; -##### produce pronunciations for chinese +##### produce pronunciations for chinese if [ ! -f $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt ]; then + echo "------------- Downloading cedit dictionary ---------------" mkdir -p $dict_dir/cedict - wget -P $dict_dir/cedict http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz + wget -P $dict_dir/cedict http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz gunzip $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz fi cat $dict_dir/cedict/cedict_1_0_ts_utf-8_mdbg.txt | grep -v '#' | awk -F '/' '{print $1}' |\ - perl -e ' + perl -e ' while () { @A = split(" ", $_); print $A[1]; for($n = 2; $n < @A; $n++) { - $A[$n] =~ s:\[?([a-zA-Z0-9\:]+)\]?:$1:; - $tmp = uc($A[$n]); + $A[$n] =~ s:\[?([a-zA-Z0-9\:]+)\]?:$1:; + $tmp = uc($A[$n]); print " $tmp"; } print "\n"; } - ' | sort -k1 > $dict_dir/cedict/ch-dict.txt + ' | sort -k1 > $dict_dir/cedict/ch-dict.txt || exit 1; echo "--- Searching for Chinese OOV words ..." -gawk 'NR==FNR{words[$1]; next;} !($1 in words)' \ +awk 'NR==FNR{words[$1]; next;} !($1 in words)' \ $dict_dir/cedict/ch-dict.txt $dict_dir/lexicon-ch/words-ch.txt |\ - egrep -v '<.?s>' > $dict_dir/lexicon-ch/words-ch-oov.txt + egrep -v '<.?s>' > $dict_dir/lexicon-ch/words-ch-oov.txt || exit 1; -gawk 'NR==FNR{words[$1]; next;} ($1 in words)' \ +awk 'NR==FNR{words[$1]; next;} ($1 in words)' \ $dict_dir/lexicon-ch/words-ch.txt $dict_dir/cedict/ch-dict.txt |\ - egrep -v '<.?s>' > $dict_dir/lexicon-ch/lexicon-ch-iv.txt + egrep -v '<.?s>' > $dict_dir/lexicon-ch/lexicon-ch-iv.txt || exit 1; wc -l $dict_dir/lexicon-ch/words-ch-oov.txt wc -l $dict_dir/lexicon-ch/lexicon-ch-iv.txt @@ -187,10 +180,10 @@ cat $dict_dir/cedict/ch-dict.txt |\ while () { @A = split(" ", $_); $word_len = length($A[0]); - $proun_len = @A - 1 ; + $proun_len = @A - 1 ; if ($word_len == $proun_len) {print $_;} } - ' > $dict_dir/cedict/ch-dict-1.txt + ' > $dict_dir/cedict/ch-dict-1.txt || exit 1; # extract chars cat $dict_dir/cedict/ch-dict-1.txt | awk '{print $1}' |\ @@ -203,12 +196,14 @@ cat $dict_dir/cedict/ch-dict-1.txt | awk '{print $1}' |\ print "$_\n"; } } - ' | grep -v '^$' > $dict_dir/lexicon-ch/ch-char.txt + ' | grep -v '^$' > $dict_dir/lexicon-ch/ch-char.txt || exit 1; # extract individual pinyins -cat $dict_dir/cedict/ch-dict-1.txt | awk '{for(i=2; i<=NF; i++) print $i}' | sed -e 's/ /\n/g' > $dict_dir/lexicon-ch/ch-char-pinyin.txt +cat $dict_dir/cedict/ch-dict-1.txt |\ + awk '{for(i=2; i<=NF; i++) print $i}' |\ + sed -e 's/ /\n/g' > $dict_dir/lexicon-ch/ch-char-pinyin.txt || exit 1; -# first make sure number of characters and pinyins +# first make sure number of characters and pinyins # are equal, so that a char-based dictionary can # be composed. nchars=`wc -l < $dict_dir/lexicon-ch/ch-char.txt` @@ -218,12 +213,13 @@ if [ $nchars -ne $npinyin ]; then exit 1 fi -paste $dict_dir/lexicon-ch/ch-char.txt $dict_dir/lexicon-ch/ch-char-pinyin.txt | sort -u > $dict_dir/lexicon-ch/ch-char-dict.txt +paste $dict_dir/lexicon-ch/ch-char.txt $dict_dir/lexicon-ch/ch-char-pinyin.txt |\ + sort -u > $dict_dir/lexicon-ch/ch-char-dict.txt || exit 1; # create a multiple pronunciation dictionary cat $dict_dir/lexicon-ch/ch-char-dict.txt |\ perl -e ' - my $prev = ""; + my $prev = ""; my $out_line = ""; while () { @A = split(" ", $_); @@ -232,14 +228,15 @@ cat $dict_dir/lexicon-ch/ch-char-dict.txt |\ #print length($prev); if (length($prev) == 0) { $out_line = $_; chomp($out_line);} if (length($prev)>0 && $cur ne $prev) { print $out_line; print "\n"; $out_line = $_; chomp($out_line);} - if (length($prev)>0 && $cur eq $prev) { $out_line = $out_line."/"."$cur_py";} + if (length($prev)>0 && $cur eq $prev) { $out_line = $out_line."/"."$cur_py";} $prev = $cur; } - print $out_line; - ' > $dict_dir/lexicon-ch/ch-char-dict-mp.txt + print $out_line; + ' > $dict_dir/lexicon-ch/ch-char-dict-mp.txt || exit 1; # get lexicon for Chinese OOV words -perl local/create_oov_char_lexicon.pl $dict_dir/lexicon-ch/ch-char-dict-mp.txt $dict_dir/lexicon-ch/words-ch-oov.txt > $dict_dir/lexicon-ch/lexicon-ch-oov.txt +local/create_oov_char_lexicon.pl $dict_dir/lexicon-ch/ch-char-dict-mp.txt \ + $dict_dir/lexicon-ch/words-ch-oov.txt > $dict_dir/lexicon-ch/lexicon-ch-oov.txt || exit 1; # seperate multiple prons for Chinese OOV lexicon cat $dict_dir/lexicon-ch/lexicon-ch-oov.txt |\ @@ -249,8 +246,8 @@ cat $dict_dir/lexicon-ch/lexicon-ch-oov.txt |\ while () { @A = split(" ", $_); @entry = (); - push(@entry, $A[0]); - for($i = 1; $i < @A; $i++ ) { + push(@entry, $A[0]); + for($i = 1; $i < @A; $i++ ) { @py = split("/", $A[$i]); @entry1 = @entry; @entry = (); @@ -258,27 +255,27 @@ cat $dict_dir/lexicon-ch/lexicon-ch-oov.txt |\ for ($k = 0; $k < @py; $k++) { $tmp = $entry1[$j]." ".$py[$k]; push(@entry, $tmp); - } - } + } + } } for ($i = 0; $i < @entry; $i++) { - print $entry[$i]; + print $entry[$i]; print "\n"; - } + } } - ' > $dict_dir/lexicon-ch/lexicon-ch-oov-mp.txt + ' > $dict_dir/lexicon-ch/lexicon-ch-oov-mp.txt || exit 1; # compose IV and OOV lexicons for Chinese cat $dict_dir/lexicon-ch/lexicon-ch-oov-mp.txt $dict_dir/lexicon-ch/lexicon-ch-iv.txt |\ - awk '{if (NF > 1 && $2 ~ /[A-Za-z0-9]+/) print $0;}' > $dict_dir/lexicon-ch/lexicon-ch.txt + awk '{if (NF > 1 && $2 ~ /[A-Za-z0-9]+/) print $0;}' > $dict_dir/lexicon-ch/lexicon-ch.txt || exit 1; # convert Chinese pinyin to CMU format cat $dict_dir/lexicon-ch/lexicon-ch.txt | sed -e 's/U:/V/g' | sed -e 's/ R\([0-9]\)/ ER\1/g'|\ - utils/pinyin_map.pl conf/pinyin2cmu > $dict_dir/lexicon-ch/lexicon-ch-cmu.txt + utils/pinyin_map.pl conf/pinyin2cmu > $dict_dir/lexicon-ch/lexicon-ch-cmu.txt || exit 1; # combine English and Chinese lexicons cat $dict_dir/lexicon-en/lexicon-en.txt $dict_dir/lexicon-ch/lexicon-ch-cmu.txt |\ - sort -u > $dict_dir/lexicon1.txt + sort -u > $dict_dir/lexicon1.txt || exit 1; cat $dict_dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}'| \ sort -u |\ @@ -287,8 +284,8 @@ cat $dict_dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{f while () { $phone = $_; chomp($phone); - chomp($_); - $phone =~ s:([A-Z]+)[0-9]:$1:; + chomp($_); + $phone =~ s:([A-Z]+)[0-9]:$1:; if (exists $ph_cl{$phone}) { push(@{$ph_cl{$phone}}, $_) } else { $ph_cl{$phone} = [$_]; } } @@ -314,4 +311,6 @@ cat $dict_dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", echo ' SPN' ) | \ cat - $dict_dir/lexicon1.txt > $dict_dir/lexicon.txt || exit 1; -exit 1; +echo "$0: HKUST dict preparation succeeded" + +exit; diff --git a/egs/hkust/s5/local/hkust_segment.py b/egs/hkust/s5/local/hkust_segment.py index dff335fc10a..ba5ffc053d5 100755 --- a/egs/hkust/s5/local/hkust_segment.py +++ b/egs/hkust/s5/local/hkust_segment.py @@ -1,7 +1,8 @@ #!/usr/bin/env python #coding:utf-8 -#!/usr/bin/env python + import sys +from __future__ import print_function from mmseg import seg_txt for line in sys.stdin: blks = str.split(line) @@ -12,4 +13,4 @@ continue for j in seg_txt(blks[i]): out_line += " " + j - print out_line + print(out_line) diff --git a/egs/hkust/s5/local/hkust_train_lms.sh b/egs/hkust/s5/local/hkust_train_lms.sh index 4362bdd708f..d6d0b2aa0bc 100755 --- a/egs/hkust/s5/local/hkust_train_lms.sh +++ b/egs/hkust/s5/local/hkust_train_lms.sh @@ -5,44 +5,29 @@ text=data/local/train/text -lexicon=data/local/dict/lexicon.txt +lexicon=data/local/dict/lexicon.txt for f in "$text" "$lexicon"; do [ ! -f $x ] && echo "$0: No such file $f" && exit 1; done # This script takes no arguments. It assumes you have already run -# swbd_p1_data_prep.sh. +# swbd_p1_data_prep.sh. # It takes as input the files #data/local/train/text #data/local/dict/lexicon.txt dir=data/local/lm mkdir -p $dir -export LC_ALL=C # You'll get errors about things being not sorted, if you -# have a different locale. -export PATH=$PATH:`pwd`/../../../tools/kaldi_lm -( # First make sure the kaldi_lm toolkit is installed. - cd ../../../tools || exit 1; - if [ -d kaldi_lm ]; then - echo Not installing the kaldi_lm toolkit since it is already there. - else - echo Downloading and installing the kaldi_lm tools - if [ ! -f kaldi_lm.tar.gz ]; then - wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; - fi - tar -xvzf kaldi_lm.tar.gz || exit 1; - cd kaldi_lm - make || exit 1; - echo Done making the kaldi_lm tools - fi -) || exit 1; - -mkdir -p $dir +kaldi_lm=`which train_lm.sh` +if [ ! -x $kaldi_lm ]; then + echo "train_lm.sh is not found. Checkout tools/extra/install_kaldi_lm.sh" + exit 1 +fi cleantext=$dir/text.no_oov -cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } +cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \ > $cleantext || exit 1; @@ -73,7 +58,7 @@ train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1; # Perplexity over 128254.000000 words is 90.446690 # note: output is -# data/local/lm/3gram-mincount/lm_unpruned.gz +# data/local/lm/3gram-mincount/lm_unpruned.gz exit 0 @@ -94,10 +79,10 @@ cat $dir/word_map | awk '{print $1}' | cat - <(echo ""; echo "" ) > $sdir ngram-count -text $sdir/train -order 3 -limit-vocab -vocab $sdir/wordlist -unk \ -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz -ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout +ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout # 0 zeroprobs, logprob= -250954 ppl= 90.5091 ppl1= 132.482 # Note: perplexity SRILM gives to Kaldi-LM model is same as kaldi-lm reports above. # Difference in WSJ must have been due to different treatment of . -ngram -lm $dir/3gram-mincount/lm_unpruned.gz -ppl $sdir/heldout +ngram -lm $dir/3gram-mincount/lm_unpruned.gz -ppl $sdir/heldout # 0 zeroprobs, logprob= -250913 ppl= 90.4439 ppl1= 132.379 diff --git a/egs/hkust/s5/local/nnet3/run_ivector_common.sh b/egs/hkust/s5/local/nnet3/run_ivector_common.sh index 046f723ca1e..bbdb5796c22 100755 --- a/egs/hkust/s5/local/nnet3/run_ivector_common.sh +++ b/egs/hkust/s5/local/nnet3/run_ivector_common.sh @@ -1,5 +1,7 @@ #!/bin/bash +# This script is modified based on swbd/s5c/local/nnet3/run_ivector_common.sh + # this script contains some common (shared) parts of the run_nnet*.sh scripts. . cmd.sh @@ -7,31 +9,25 @@ stage=0 num_threads_ubm=32 -speed_perturb=true -use_sat_alignments=true +ivector_extractor= set -e . cmd.sh . ./path.sh . ./utils/parse_options.sh -if [ "$use_sat_alignments" == "true" ] ; then - gmm_dir=exp/tri5a - align_script=steps/align_fmllr.sh -else - gmm_dir=exp/tri3a - align_script=steps/align_si.sh -fi +gmm_dir=exp/tri5a +align_script=steps/align_fmllr.sh -if [ $stage -le 1 ]; then - # Create high-resolution MFCC features (with 40 cepstra instead of 13). +if [ $stage -le 1 ] && [ -z $ivector_extractor ]; then + # Create high-resolution MFCC features (with 40 cepstra instead of 13) with pitch. # this shows how you can split across multiple file-systems. we'll split the # MFCC dir across multiple locations. You might want to be careful here, if you # have multiple copies of Kaldi checked out and run the same recipe, not to let # them overwrite each other. mfccdir=mfcc_hires if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then - utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/hkust-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/hkust-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage fi for datadir in train dev; do @@ -51,38 +47,42 @@ for line in sys.stdin.readlines(): mv $dir/wav.scp_scaled $dir/wav.scp fi - steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + steps/make_mfcc_pitch_online.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; + + # make MFCC data dir without pitch to extract iVector + utils/data/limit_feature_dim.sh 0:39 data/${datadir}_hires data/${datadir}_hires_nopitch || exit 1; + steps/compute_cmvn_stats.sh data/${datadir}_hires_nopitch exp/make_hires/$datadir $mfccdir || exit 1; done fi -if [ $stage -le 2 ]; then +if [ $stage -le 2 ] && [ -z $ivector_extractor ]; then # Train a system just for its LDA+MLLT transform. We use --num-iters 13 # because after we get the transform (12th iter is the last), any further # training is pointless. steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ --realign-iters "" \ --splice-opts "--left-context=3 --right-context=3" \ - 5000 10000 data/train_hires data/lang \ + 5000 10000 data/train_hires_nopitch data/lang \ ${gmm_dir}_ali exp/nnet3/tri5 fi -if [ $stage -le 3 ]; then +if [ $stage -le 3 ] && [ -z $ivector_extractor ]; then steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \ --num-frames 700000 \ - --num-threads $num_threads_ubm \ data/train_hires 512 exp/nnet3/tri5 exp/nnet3/diag_ubm fi -if [ $stage -le 4 ]; then +if [ $stage -le 4 ] && [ -z $ivector_extractor ]; then # iVector extractors can in general be sensitive to the amount of data, but # this one has a fairly small dim (defaults to 100) steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ data/train_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; + ivector_extractor=exp/nnet3/extractor fi -if [ $stage -le 5 ] && [ "$speed_perturb" == "true" ]; then +if [ $stage -le 5 ]; then # Although the nnet will be trained by high resolution data, # we still have to perturbe the normal data to get the alignment # _sp stands for speed-perturbed @@ -94,7 +94,7 @@ if [ $stage -le 5 ] && [ "$speed_perturb" == "true" ]; then mfccdir=mfcc_perturbed for x in train_sp; do - steps/make_mfcc.sh --cmd "$train_cmd" --nj 70 \ + steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 70 \ data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done @@ -103,28 +103,31 @@ if [ $stage -le 5 ] && [ "$speed_perturb" == "true" ]; then $align_script --nj 30 --cmd "$train_cmd" \ data/train_sp data/lang $gmm_dir ${gmm_dir}_sp_ali || exit 1 - # Now perturb the high resolution daa + # Now perturb the high resolution data utils/copy_data_dir.sh data/train_sp data/train_sp_hires mfccdir=mfcc_perturbed_hires for x in train_sp_hires; do - steps/make_mfcc.sh --cmd "$train_cmd" --nj 70 --mfcc-config conf/mfcc_hires.conf \ + steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 70 --mfcc-config conf/mfcc_hires.conf \ data/$x exp/make_hires/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_hires/$x $mfccdir || exit 1; + # create MFCC data dir without pitch to extract iVector + utils/data/limit_feature_dim.sh 0:39 data/$x data/${x}_nopitch || exit 1; + steps/compute_cmvn_stats.sh data/${x}_nopitch exp/make_hires/$x $mfccdir || exit 1; done utils/fix_data_dir.sh data/train_sp_hires fi -if [ "$speed_perturb" == "true" ]; then - train_set=train_sp -else - train_set=train +train_set=train_sp +if [ -z $ivector_extractor ]; then + echo "iVector extractor is not found!" + exit 1; fi if [ $stage -le 6 ]; then rm -f exp/nnet3/.error 2>/dev/null - ivectordir=exp/nnet3/ivectors_${train_set}_hires + ivectordir=exp/nnet3/ivectors_${train_set} if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then - utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/hkust-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/hkust-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage fi # We extract iVectors on all the train data, which will be what we train the # system on. With --utts-per-spk-max 2, the script. pairs the utterances @@ -133,11 +136,10 @@ if [ $stage -le 6 ]; then # having a larger number of speakers is helpful for generalization, and to # handle per-utterance decoding well (iVector starts at zero). - steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_hires_max2 + steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires_nopitch data/${train_set}_hires_nopitch_max2 steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ - data/${train_set}_hires_max2 \ - exp/nnet3/extractor \ - exp/nnet3/ivectors_${train_set}_hires \ + data/${train_set}_hires_nopitch_max2 \ + $ivector_extractor $ivectordir \ || touch exp/nnet3/.error [ -f exp/nnet3/.error ] && echo "$0: error extracting iVectors." && exit 1; fi @@ -145,7 +147,7 @@ fi if [ $stage -le 7 ]; then rm -f exp/nnet3/.error 2>/dev/null steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 8 \ - data/dev_hires exp/nnet3/extractor exp/nnet3/ivectors_dev || touch exp/nnet3/.error & + data/dev_hires_nopitch $ivector_extractor exp/nnet3/ivectors_dev || touch exp/nnet3/.error & wait [ -f exp/nnet3/.error ] && echo "$0: error extracting iVectors." && exit 1; fi diff --git a/egs/hkust/s5/local/nnet3/run_tdnn.sh b/egs/hkust/s5/local/nnet3/run_tdnn.sh index 11f12ccf394..35bcc7d7512 100755 --- a/egs/hkust/s5/local/nnet3/run_tdnn.sh +++ b/egs/hkust/s5/local/nnet3/run_tdnn.sh @@ -1,5 +1,7 @@ #!/bin/bash +# This script is based on swbd/s5c/local/nnet3/run_tdnn.sh + # this is the standard "tdnn" system, built in nnet3; it's what we use to # call multi-splice. @@ -10,9 +12,7 @@ set -e stage=0 train_stage=-10 -use_sat_alignments=true affix= -speed_perturb=true common_egs_dir= # training options @@ -33,81 +33,102 @@ use_ivectors=true . ./utils/parse_options.sh if ! cuda-compiled; then - cat < $data_dir/wav.scp_scaled || exit 1; + mv $data_dir/wav.scp_scaled $data_dir/wav.scp + steps/make_mfcc_pitch_online.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/train_scaled_hires exp/make_hires/train_scaled $mfccdir; + steps/compute_cmvn_stats.sh data/train_scaled_hires exp/make_hires/train_scaled $mfccdir; + + # we need these features for the run_nnet2_ms.sh + steps/make_mfcc_pitch_online.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/train_hires exp/make_hires/train $mfccdir; + steps/compute_cmvn_stats.sh data/train_hires exp/make_hires/train $mfccdir; + + # Remove the small number of utterances that couldn't be extracted for some + # reason (e.g. too short; no such file). + utils/fix_data_dir.sh data/train_scaled_hires; + utils/fix_data_dir.sh data/train_hires; + + # Create MFCC+pitchs for the dev set + utils/copy_data_dir.sh data/dev data/dev_hires + steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \ + data/dev_hires exp/make_hires/dev $mfccdir; + steps/compute_cmvn_stats.sh data/dev_hires exp/make_hires/dev $mfccdir; + utils/fix_data_dir.sh data/dev_hires # remove segments with problems + + # Take the MFCCs for training iVector extractors + utils/data/limit_feature_dim.sh 0:39 data/train_scaled_hires data/train_scaled_hires_nopitch || exit 1; + steps/compute_cmvn_stats.sh data/train_scaled_hires_nopitch exp/make_hires/train $mfccdir || exit 1; + utils/data/limit_feature_dim.sh 0:39 data/train_hires data/train_hires_nopitch || exit 1; + steps/compute_cmvn_stats.sh data/train_hires_nopitch exp/make_hires/train $mfccdir || exit 1; + utils/data/limit_feature_dim.sh 0:39 data/dev_hires data/dev_hires_nopitch || exit 1; + steps/compute_cmvn_stats.sh data/dev_hires_nopitch exp/make_hires/dev $mfccdir || exit 1; + + # Take the first 30k utterances (about 1/5th of the data) this will be used + # for the diagubm training + utils/subset_data_dir.sh --first data/train_scaled_hires_nopitch 30000 data/train_scaled_hires_30k + + # create a 100k subset for the lda+mllt training + utils/subset_data_dir.sh --first data/train_scaled_hires_nopitch 100000 data/train_scaled_hires_100k; +fi + +if [ $stage -le 2 ]; then + # We need to build a small system just because we need the LDA+MLLT transform + # to train the diag-UBM on top of. We use --num-iters 13 because after we get + # the transform (12th iter is the last), any further training is pointless. + # this decision is based on fisher_english + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ + --splice-opts "--left-context=3 --right-context=3" \ + 5500 90000 data/train_scaled_hires_100k \ + data/lang exp/tri2_ali_100k exp/nnet2_online/tri3b +fi + +if [ $stage -le 3 ]; then + # To train a diagonal UBM we don't need very much data, so use the smallest subset. + steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \ + data/train_scaled_hires_30k 512 exp/nnet2_online/tri3b exp/nnet2_online/diag_ubm +fi + +if [ $stage -le 4 ]; then + # iVector extractors can be sensitive to the amount of data, but this one has a + # fairly small dim (defaults to 100) so we don't use all of it, we use just the + # 100k subset (just under half the data). + steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ + data/train_scaled_hires_100k exp/nnet2_online/diag_ubm exp/nnet2_online/extractor || exit 1; +fi + +if [ $stage -le 5 ]; then + # We extract iVectors on all the train_nodup data, which will be what we + # train the system on. + + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/train_hires_nopitch data/train_hires_nopitch_max2 + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ + data/train_hires_nopitch_max2 exp/nnet2_online/extractor exp/nnet2_online/ivectors_train || exit 1; +fi + + +exit 0; diff --git a/egs/hkust/s5/local/online/run_nnet2_ms.sh b/egs/hkust/s5/local/online/run_nnet2_ms.sh new file mode 100755 index 00000000000..b935d86fa90 --- /dev/null +++ b/egs/hkust/s5/local/online/run_nnet2_ms.sh @@ -0,0 +1,108 @@ +#!/bin/bash + +. ./cmd.sh +set -e +stage=1 +train_stage=-10 +use_gpu=true +splice_indexes="layer0/-2:-1:0:1:2 layer1/-1:2 layer2/-3:3 layer3/-7:2 layer4/-3:3" +common_egs_dir= +dir=exp/nnet2_online/nnet_ms + +. ./path.sh +. ./utils/parse_options.sh + +if $use_gpu; then + if ! cuda-compiled; then + cat < " && exit; - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 - -if [ -f $data/stm ]; then # use sclite scoring. - echo "$data/stm exists: using local/score_sclite.sh" - eval local/score_sclite.sh $orig_args -else - echo "$data/stm does not exist: using local/score_basic.sh" - eval local/score_basic.sh $orig_args -fi diff --git a/egs/hkust/s5/local/score.sh b/egs/hkust/s5/local/score.sh new file mode 120000 index 00000000000..df664a0f1f1 --- /dev/null +++ b/egs/hkust/s5/local/score.sh @@ -0,0 +1 @@ +../steps/scoring/score_kaldi_cer.sh \ No newline at end of file diff --git a/egs/hkust/s5/local/score_basic.sh b/egs/hkust/s5/local/score_basic.sh deleted file mode 100755 index e54537654be..00000000000 --- a/egs/hkust/s5/local/score_basic.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. -# 2016 LeSpeech (Author: Xingyu Na) - -# begin configuration section. -cmd=run.pl -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_basic.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../final.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/text $lang/words.txt $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - - -function filter_text { - perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; } - while() { @A = split(" ", $_); $id = shift @A; print "$id "; - foreach $a (@A) { if (!defined $bad{$a}) { print "$a "; }} print "\n"; }' \ - '[NOISE]' '[LAUGHTER]' '[VOCALIZED-NOISE]' '' '%HESITATION' -} - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ - lattice-best-path --lm-scale=LMWT --word-symbol-table=$lang/words.txt \ - "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/LMWT.tra || exit 1; - -for lmwt in `seq $min_lmwt $max_lmwt`; do - utils/int2sym.pl -f 2- $lang/words.txt <$dir/scoring/$lmwt.tra | \ - filter_text > $dir/scoring/$lmwt.txt || exit 1; -done - -filter_text <$data/text >$dir/scoring/text.filt - -#for character error rate -cat $dir/scoring/text.filt | awk '{ print $1}' > $dir/scoring/utt_id -cat $dir/scoring/text.filt | awk '{{for (i = 2; i <= NF; i++) printf(" %s", $i);} printf("\n"); }' |\ - perl -e ' - use encoding utf8; - while () { - @words = split(" ", $_); - foreach (@words) { - @chars = split("", $_); - foreach (@chars) { - print "$_ "; - } - } - print "\n"; - } - ' > $dir/scoring/utt_tra -paste $dir/scoring/utt_id $dir/scoring/utt_tra > $dir/scoring/char.filt - -for lmwt in `seq $min_lmwt $max_lmwt`; do - cat $dir/scoring/$lmwt.txt | awk '{ print $1}' > $dir/scoring/utt_id - cat $dir/scoring/$lmwt.txt | awk '{{for (i = 2; i <= NF; i++) printf(" %s", $i);} printf("\n"); }' |\ - perl -e ' - use encoding utf8; - while () { - @words = split(" ", $_); - foreach (@words) { - @chars = split("", $_); - foreach (@chars) { - print "$_ "; - } - } - print "\n"; - } - ' > $dir/scoring/utt_tra - paste $dir/scoring/utt_id $dir/scoring/utt_tra > $dir/scoring/${lmwt}.char -done - -rm $dir/scoring/utt_tra $dir/scoring/utt_id - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - compute-wer --text --mode=present \ - ark:$dir/scoring/text.filt ark:$dir/scoring/LMWT.txt ">&" $dir/wer_LMWT || exit 1; - -$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.cer.log \ - compute-wer --text --mode=present \ - ark:$dir/scoring/char.filt ark:$dir/scoring/LMWT.char ">&" $dir/cer_LMWT || exit 1; - -exit 0 diff --git a/egs/hkust/s5/local/score_sclite.sh b/egs/hkust/s5/local/score_sclite.sh deleted file mode 100755 index c1062440494..00000000000 --- a/egs/hkust/s5/local/score_sclite.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -stage=0 -min_lmwt=7 -max_lmwt=17 -iter=final -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_sclite.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../${iter}.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - -if [ $stage -le 0 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - mkdir -p $dir/score_LMWT/ '&&' \ - lattice-1best --lm-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ - nbest-to-ctm ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; -fi - -if [ $stage -le 1 ]; then -# Remove some stuff we don't want to score, from the ctm. - for x in $dir/score_*/$name.ctm; do - cp $x $dir/tmpf; - cat $dir/tmpf | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ - grep -v -E '|%HESITATION' > $x; - done -fi - -if [ $stage -le 2 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cp $data/stm $dir/score_LMWT/ '&&' \ - $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm || exit 1; -fi - -exit 0 diff --git a/egs/hkust/s5/local/score_sclite_conf.sh b/egs/hkust/s5/local/score_sclite_conf.sh deleted file mode 100755 index a6a2759629d..00000000000 --- a/egs/hkust/s5/local/score_sclite_conf.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -stage=0 -decode_mbr=true -min_lmwt=7 -max_lmwt=17 -#end configuration section. - -[ -f ./path.sh ] && . ./path.sh -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: local/score_sclite_conf.sh [--cmd (run.pl|queue.pl...)] " - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1|2) # start scoring script from part-way through." - echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." - echo " --min_lmwt # minumum LM-weight for lattice rescoring " - echo " --max_lmwt # maximum LM-weight for lattice rescoring " - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -dir=$3 - -model=$dir/../final.mdl # assume model one level up from decoding dir. - -hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl -[ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; -hubdir=`dirname $hubscr` - -for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - -if [ $stage -le 0 ]; then - # the escaping gets a bit crazy here, sorry... - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - mkdir -p $dir/score_LMWT/ '&&' \ - ACWT=\`perl -e \"print 1.0/LMWT\;\"\` '&&' \ - lattice-align-words $lang/phones/word_boundary.int $model "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-to-ctm-conf --decode-mbr=$decode_mbr --acoustic-scale=\$ACWT ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; -fi - -if [ $stage -le 1 ]; then -# Remove some stuff we don't want to score, from the ctm. - for x in $dir/score_*/$name.ctm; do - cp $x $dir/tmpf; - cat $dir/tmpf | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ - grep -v -E '|%HESITATION' > $x; - done -fi - -if [ $stage -le 2 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ - cp $data/stm $dir/score_LMWT/ '&&' \ - $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm || exit 1; -fi - -exit 0 diff --git a/egs/hkust/s5/local/wer_hyp_filter b/egs/hkust/s5/local/wer_hyp_filter new file mode 100755 index 00000000000..a1bfdb57efc --- /dev/null +++ b/egs/hkust/s5/local/wer_hyp_filter @@ -0,0 +1,19 @@ +#!/usr/bin/env perl + +@filters=('[NOISE]','[LAUGHTER]','[VOCALIZED-NOISE]','','%HESITATION'); + +foreach $w (@filters) { + $bad{$w} = 1; +} + +while() { + @A = split(" ", $_); + $id = shift @A; + print "$id "; + foreach $a (@A) { + if (!defined $bad{$a}) { + print "$a "; + } + } + print "\n"; +} diff --git a/egs/hkust/s5/local/wer_ref_filter b/egs/hkust/s5/local/wer_ref_filter new file mode 100755 index 00000000000..a1bfdb57efc --- /dev/null +++ b/egs/hkust/s5/local/wer_ref_filter @@ -0,0 +1,19 @@ +#!/usr/bin/env perl + +@filters=('[NOISE]','[LAUGHTER]','[VOCALIZED-NOISE]','','%HESITATION'); + +foreach $w (@filters) { + $bad{$w} = 1; +} + +while() { + @A = split(" ", $_); + $id = shift @A; + print "$id "; + foreach $a (@A) { + if (!defined $bad{$a}) { + print "$a "; + } + } + print "\n"; +} diff --git a/egs/hkust/s5/path.sh b/egs/hkust/s5/path.sh index 5adfbeec7c2..2d17b17a84a 100755 --- a/egs/hkust/s5/path.sh +++ b/egs/hkust/s5/path.sh @@ -1,5 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. -#export KALDI_ROOT=/home/dpovey/kaldi-trunk-test +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh diff --git a/egs/hkust/s5/run.sh b/egs/hkust/s5/run.sh index bdd3e7797e8..0129dd45df2 100755 --- a/egs/hkust/s5/run.sh +++ b/egs/hkust/s5/run.sh @@ -1,9 +1,9 @@ #!/bin/bash -# Copyright 2012 Chao Weng +# Copyright 2012 Chao Weng +# 2016 Alibaba Robotics Corp. (Author: Xingyu Na) # Apache 2.0 -#exit 1; # This is a shell script, but it's recommended that you run the commands one by # one by copying and pasting into the shell. # Caution: some of the graph creation steps use quite a bit of memory, so you @@ -11,16 +11,13 @@ . cmd.sh -# Data Preparation, +# Data Preparation, local/hkust_data_prep.sh /export/corpora/LDC/LDC2005S15/ /export/corpora/LDC/LDC2005T32/ # Lexicon Preparation, -local/hkust_prepare_dict.sh +local/hkust_prepare_dict.sh || exit 1; - - - -# Phone Sets, questions, L compilation +# Phone Sets, questions, L compilation utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang # LM training @@ -29,35 +26,27 @@ local/hkust_train_lms.sh # G compilation, check LG composition local/hkust_format_data.sh -# Now make MFCC features. +# Now make MFCC plus pitch features. # mfccdir should be some place with a largish disk where you # want to store MFCC features. mfccdir=mfcc -for x in train dev; do - steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/$x exp/make_mfcc/$x $mfccdir || exit 1; +for x in train dev; do + steps/make_mfcc_pitch_online.sh --cmd "$train_cmd" --nj 10 data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done # after this, the next command will remove the small number of utterances # that couldn't be extracted for some reason (e.g. too short; no such file). utils/fix_data_dir.sh data/train || exit 1; +utils/subset_data_dir.sh --first data/train 100000 data/train_100k || exit 1; steps/train_mono.sh --cmd "$train_cmd" --nj 10 \ data/train data/lang exp/mono0a || exit 1; - # Monophone decoding utils/mkgraph.sh data/lang_test exp/mono0a exp/mono0a/graph || exit 1 -# note: local/decode.sh calls the command line once for each -# test, and afterwards averages the WERs into (in this case -# exp/mono/decode/ - - - steps/decode.sh --cmd "$decode_cmd" --config conf/decode.config --nj 10 \ exp/mono0a/graph data/dev exp/mono0a/decode - - # Get alignments from monophone system. steps/align_si.sh --cmd "$train_cmd" --nj 10 \ data/train data/lang exp/mono0a exp/mono_ali || exit 1; @@ -71,8 +60,6 @@ utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph || exit 1; steps/decode.sh --cmd "$decode_cmd" --config conf/decode.config --nj 10 \ exp/tri1/graph data/dev exp/tri1/decode - - # align tri1 steps/align_si.sh --cmd "$train_cmd" --nj 10 \ data/train data/lang exp/tri1 exp/tri1_ali || exit 1; @@ -91,7 +78,10 @@ steps/decode.sh --cmd "$decode_cmd" --config conf/decode.config --nj 10 \ steps/align_si.sh --cmd "$train_cmd" --nj 10 \ data/train data/lang exp/tri2 exp/tri2_ali || exit 1; -# Train tri3a, which is LDA+MLLT, +steps/align_si.sh --cmd "$train_cmd" --nj 10 \ + data/train_100k data/lang exp/tri2 exp/tri2_ali_100k || exit 1; + +# Train tri3a, which is LDA+MLLT, steps/train_lda_mllt.sh --cmd "$train_cmd" \ 2500 20000 data/train data/lang exp/tri2_ali exp/tri3a || exit 1; @@ -123,51 +113,28 @@ utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph || exit 1; steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 10 --config conf/decode.config \ exp/tri5a/graph data/dev exp/tri5a/decode || exit 1; - -# MMI starting from system in tri5a. Use the same data (100k_nodup). -# Later we'll use all of it. steps/align_fmllr.sh --cmd "$train_cmd" --nj 10 \ data/train data/lang exp/tri5a exp/tri5a_ali || exit 1; -steps/make_denlats.sh --cmd "$train_cmd" --nj 10 --transform-dir exp/tri5a_ali \ - --config conf/decode.config \ - data/train data/lang exp/tri5a exp/tri5a_denlats || exit 1; -steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \ - data/train data/lang exp/tri5a_ali exp/tri5a_denlats exp/tri5a_mmi_b0.1 || exit 1; -steps/decode.sh --cmd "$decode_cmd" --nj 10 --config conf/decode.config \ - --transform-dir exp/tri5a/decode \ - exp/tri5a/graph data/dev exp/tri5a_mmi_b0.1/decode || exit 1 ; - -# Do MPE. -steps/train_mpe.sh --cmd "$train_cmd" data/train data/lang exp/tri5a_ali exp/tri5a_denlats exp/tri5a_mpe || exit 1; - -steps/decode.sh --cmd "$decode_cmd" --nj 10 --config conf/decode.config \ - --transform-dir exp/tri5a/decode \ - exp/tri5a/graph data/dev exp/tri5a_mpe/decode || exit 1 ; +# discriminative training +# local/run_discriminative.sh # SGMM system [sgmm5a] -steps/train_ubm.sh --cmd "$train_cmd" \ - 900 data/train data/lang exp/tri5a_ali exp/ubm5a || exit 1; +# local/run_sgmm.sh -steps/train_sgmm2.sh --cmd "$train_cmd" \ - 14000 35000 data/train data/lang exp/tri5a_ali \ - exp/ubm5a/final.ubm exp/sgmm2_5a || exit 1; +# nnet1 dnn +# local/nnet/run_dnn.sh -utils/mkgraph.sh data/lang_test exp/sgmm2_5a exp/sgmm2_5a/graph || exit 1; -steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --config conf/decode.config \ - --transform-dir exp/tri5a/decode \ - exp/sgmm2_5a/graph data/dev exp/sgmm2_5a/decode || exit 1; +# online nnet2 +local/online/run_nnet2_ms.sh -# nnet1 dnn -local/nnet/run_dnn.sh +# online nnet3 +local/nnet3/run_tdnn.sh -# nnet2 -local/nnet2/run_5d.sh -local/nnet2/run_convnet.sh +# online chain +local/chain/run_tdnn.sh # getting results (see RESULTS file) -for x in exp/*/decode; do [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; done 2>/dev/null for x in exp/*/decode; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done 2>/dev/null -exit 1; - +exit 0; diff --git a/egs/wsj/s5/steps/select_feats.sh b/egs/wsj/s5/steps/select_feats.sh index 072dd3194cf..e77c9b53630 100755 --- a/egs/wsj/s5/steps/select_feats.sh +++ b/egs/wsj/s5/steps/select_feats.sh @@ -2,6 +2,9 @@ # Copyright 2014 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 + +# This script is deprecated. Use utils/data/limit_feature_dim.sh. + # This script selects some specified dimensions of the features in the # input data directory. diff --git a/egs/wsj/s5/utils/data/limit_feature_dim.sh b/egs/wsj/s5/utils/data/limit_feature_dim.sh new file mode 100755 index 00000000000..4e64e68d7c7 --- /dev/null +++ b/egs/wsj/s5/utils/data/limit_feature_dim.sh @@ -0,0 +1,48 @@ +#!/bin/bash +77;20003;0c +# Copyright 2016 Alibaba Robotics Corp. (author: Xingyu Na) +# Apache 2.0 + +# The script creates a new data directory by selecting a specified +# dimension range of the features in the source directory. + +. utils/parse_options.sh + +if [ $# != 3 ]; then + echo "Usage: " + echo " $0 " + echo "The script creates a new data directory by selecting a specified" + echo "dimension range of the features in the source directory." + echo "e.g.:" + echo " $0 0:39 data/train_hires_pitch data/train_hires" + exit 1; +fi + +feat_dim_range=$1 +srcdir=$2 +destdir=$3 + +if [ "$destdir" == "$srcdir" ]; then + echo "$0: this script requires and to be different." + exit 1 +fi + +if [ ! -f $srcdir/feats.scp ]; then + echo "$0: no such file $srcdir/feats.scp" + exit 1; +fi + +mkdir -p $destdir +utils/copy_data_dir.sh $srcdir $destdir + +if [ -f $destdir/cmvn.scp ]; then + rm $destdir/cmvn.scp + echo "$0: warning: removing $destdir/cmvn.cp, you will have to regenerate it from the features." +fi + +rm $destdir/feats.scp +sed 's/$/\[:,'${feat_dim_range}'\]/' $srcdir/feats.scp | \ + utils/data/normalize_data_range.pl > $destdir/feats.scp + +[ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text" +utils/validate_data_dir.sh $validate_opts $destdir diff --git a/tools/extras/install_kaldi_lm.sh b/tools/extras/install_kaldi_lm.sh new file mode 100755 index 00000000000..9b5fd23a9c3 --- /dev/null +++ b/tools/extras/install_kaldi_lm.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# The script downloads and installs kaldi_lm + +set -e + +# Make sure we are in the tools/ directory. +if [ `basename $PWD` == extras ]; then + cd .. +fi + +! [ `basename $PWD` == tools ] && \ + echo "You must call this script from the tools/ directory" && exit 1; + +echo "Installing kaldi_lm" + +if [ ! -d "kaldi_lm" ]; then + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; + tar -xvzf kaldi_lm.tar.gz || exit 1; +fi + +cd kaldi_lm +make || exit 1; +cd .. + +( + set +u + + wd=`pwd` + echo "export PATH=\$PATH:$wd/kaldi_lm" +) >> env.sh + +echo >&2 "Installation of kaldi_lm finished successfully" +echo >&2 "Please source tools/env.sh in your path.sh to enable it" diff --git a/tools/extras/install_mmseg.sh b/tools/extras/install_mmseg.sh new file mode 100755 index 00000000000..b931b93674f --- /dev/null +++ b/tools/extras/install_mmseg.sh @@ -0,0 +1,68 @@ +#!/bin/bash +set -e + + +# Make sure we are in the tools/ directory. +if [ `basename $PWD` == extras ]; then + cd .. +fi + +! [ `basename $PWD` == tools ] && \ + echo "You must call this script from the tools/ directory" && exit 1; + + +# Install python-devel package if not already available +# first, makes sure distutils.sysconfig usable +if ! $(python -c "import distutils.sysconfig" &> /dev/null); then + echo "$0: WARNING: python library distutils.sysconfig not usable, this is necessary to figure out the path of Python.h." >&2 + echo "Proceeding with installation." >&2 +else + # get include path for this python version + INCLUDE_PY=$(python -c "from distutils import sysconfig as s; print s.get_config_vars()['INCLUDEPY']") + if [ ! -f "${INCLUDE_PY}/Python.h" ]; then + echo "$0 : ERROR: python-devel/python-dev not installed" >&2 + if which yum >&/dev/null; then + # this is a red-hat system + echo "$0: we recommend that you run (our best guess):" + echo " sudo yum install python-devel" + fi + if which apt-get >&/dev/null; then + # this is a debian system + echo "$0: we recommend that you run (our best guess):" + echo " sudo apt-get install python-dev" + fi + exit 1 + fi +fi + +if [ -d ./mmseg-1.3.0 ] ; then + echo >&2 "$0: Warning: old installation of mmseg found. You should manually" + echo >&2 " delete the directory tools/mmseg and " + echo >&2 " edit the file tools/env.sh and remove manually all references to it" +fi + +if [ ! -d ./mmseg-1.3.0 ] ; then + wget http://pypi.python.org/packages/source/m/mmseg/mmseg-1.3.0.tar.gz + tar xf mmseg-1.3.0.tar.gz +fi + +pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'` +export PYTHONPATH=$PYTHONPATH:`pwd`/mmseg-1.3.0/lib/python${pyver}/site-packages +cd mmseg-1.3.0 +mkdir -p lib/python${pyver}/site-packages +python setup.py build +python setup.py install --prefix `pwd` +cd ../ + +( + set +u + pyver=`python --version 2>&1 | sed -e 's:.*\([2-3]\.[0-9]\+\).*:\1:g'` + wd=`pwd` + + [ -f ./env.sh ] && . ./env.sh + + echo "export PYTHONPATH=\$PYTHONPATH:$wd/mmseg-1.3.0/lib/python${pyver}/site-packages" +) >> env.sh + +echo >&2 "Installation of mmseg finished successfully" +echo >&2 "Please source tools/env.sh in your path.sh to enable it" From 61d6f1ecd7befc6c0cfb36ad14588287583c81cb Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:30:53 -0500 Subject: [PATCH 182/530] segmentation: Modify segmentation codes --- src/segmenter/segment.h | 6 + src/segmenter/segmentation-utils.cc | 42 +++--- src/segmenter/segmentation-utils.h | 4 +- src/segmenter/segmentation.cc | 56 +++++--- src/segmenterbin/Makefile | 3 +- .../class-counts-per-frame-to-labels.cc | 115 +++++++++++++++ .../segmentation-combine-segments.cc | 17 ++- src/segmenterbin/segmentation-copy.cc | 54 +++++-- src/segmenterbin/segmentation-get-stats.cc | 34 +++-- ...ntation-init-from-additive-signals-info.cc | 48 +++---- .../segmentation-init-from-ali.cc | 8 +- .../segmentation-merge-recordings.cc | 3 +- src/segmenterbin/segmentation-to-rttm.cc | 133 +++++++++++------- 13 files changed, 371 insertions(+), 152 deletions(-) create mode 100644 src/segmenterbin/class-counts-per-frame-to-labels.cc diff --git a/src/segmenter/segment.h b/src/segmenter/segment.h index 1657affc875..b54b5367c73 100644 --- a/src/segmenter/segment.h +++ b/src/segmenter/segment.h @@ -48,6 +48,12 @@ struct Segment { static size_t SizeInBytes() { return (sizeof(int32) + sizeof(int32) + sizeof(int32)); } + + void Reset() { + start_frame = -1; + end_frame = -1; + class_id = -1; + } }; /** diff --git a/src/segmenter/segmentation-utils.cc b/src/segmenter/segmentation-utils.cc index 3adc178d66d..c69d7ff3397 100644 --- a/src/segmenter/segmentation-utils.cc +++ b/src/segmenter/segmentation-utils.cc @@ -46,14 +46,26 @@ void MergeLabels(const std::vector &merge_labels, void RelabelSegmentsUsingMap(const unordered_map &label_map, Segmentation *segmentation) { + int32 default_label = -1; + unordered_map::const_iterator it = label_map.find(-1); + if (it != label_map.end()) { + default_label = it->second; + KALDI_ASSERT(default_label != -1); + } + for (SegmentList::iterator it = segmentation->Begin(); it != segmentation->End(); ++it) { unordered_map::const_iterator map_it = label_map.find( it->Label()); - if (map_it == label_map.end()) - KALDI_ERR << "Could not find label " << it->Label() << " in label map."; - - it->SetLabel(map_it->second); + if (map_it == label_map.end()) { + if (default_label == -1) + KALDI_ERR << "Could not find label " << it->Label() + << " in label map."; + else + it->SetLabel(default_label); + } else { + it->SetLabel(map_it->second); + } } } @@ -294,7 +306,7 @@ void IntersectSegmentationAndAlignment(const Segmentation &in_segmentation, it != in_segmentation.End(); ++it) { Segmentation filter_segmentation; InsertFromAlignment(alignment, it->start_frame, - std::min(it->end_frame + 1, + std::min(it->end_frame + 1, static_cast(alignment.size())), 0, &filter_segmentation, NULL); @@ -444,7 +456,7 @@ void WidenSegments(int32 label, int32 length, Segmentation *segmentation) { // overlaps the current segment. So remove the current segment. it = segmentation->Erase(it); // So that we can increment in the for loop - --it; // TODO(Vimal): This is buggy. + --it; // TODO(Vimal): This is buggy. } else if (prev_it->end_frame >= it->start_frame) { // The extended previous segment in Line (1) reduces the length of // this segment. @@ -539,7 +551,7 @@ bool ConvertToAlignment(const Segmentation &segmentation, for (; it != segmentation.End(); ++it) { if (length != -1 && it->end_frame >= length + tolerance) { KALDI_WARN << "End frame (" << it->end_frame << ") " - << ">= length (" << length + << ">= length (" << length << ") + tolerance (" << tolerance << ")." << "Conversion failed."; return false; @@ -565,7 +577,7 @@ int32 InsertFromAlignment(const std::vector &alignment, int32 start, int32 end, int32 start_time_offset, Segmentation *segmentation, - std::vector *frame_counts_per_class) { + std::map *frame_counts_per_class) { KALDI_ASSERT(segmentation); if (end <= start) return 0; // nothing to insert @@ -593,12 +605,8 @@ int32 InsertFromAlignment(const std::vector &alignment, i-1 + start_time_offset, state); num_segments++; - if (frame_counts_per_class && state > 0) { - if (frame_counts_per_class->size() <= state) { - frame_counts_per_class->resize(state + 1, 0); - } + if (frame_counts_per_class) (*frame_counts_per_class)[state] += i - start_frame; - } } start_frame = i; state = alignment[i]; @@ -609,12 +617,8 @@ int32 InsertFromAlignment(const std::vector &alignment, segmentation->EmplaceBack(start_frame + start_time_offset, end-1 + start_time_offset, state); num_segments++; - if (frame_counts_per_class && state > 0) { - if (frame_counts_per_class->size() <= state) { - frame_counts_per_class->resize(state + 1, 0); - } + if (frame_counts_per_class) (*frame_counts_per_class)[state] += end - start_frame; - } #ifdef KALDI_PARANOID segmentation->Check(); @@ -637,7 +641,7 @@ int32 InsertFromSegmentation( for (SegmentList::const_iterator it = in_segmentation.Begin(); it != in_segmentation.End(); ++it) { out_segmentation->EmplaceBack(it->start_frame + start_time_offset, - it->end_frame + start_time_offset, + it->end_frame + start_time_offset, it->Label()); num_segments++; if (frame_counts_per_class) { diff --git a/src/segmenter/segmentation-utils.h b/src/segmenter/segmentation-utils.h index 9401722ccb7..30136ab0a5a 100644 --- a/src/segmenter/segmentation-utils.h +++ b/src/segmenter/segmentation-utils.h @@ -265,7 +265,7 @@ int32 InsertFromAlignment(const std::vector &alignment, int32 start, int32 end, int32 start_time_offset, Segmentation *segmentation, - std::vector *frame_counts_per_class = NULL); + std::map *frame_counts_per_class = NULL); /** * Insert segments from in_segmentation, but shift them by @@ -291,7 +291,7 @@ void ExtendSegmentation(const Segmentation &in_segmentation, bool sort, /** * This function is used to get per-frame count of number of classes. * The output is in the format of a vector of maps. - * class_counts_per_frame: A pointer to a vector of maps use to get the output. + * class_counts_per_frame: A pointer to a vector of maps used to get the output. * The size of the vector is the number of frames. * For each frame, there is a map from the "class_id" * to the number of segments where the label the diff --git a/src/segmenter/segmentation.cc b/src/segmenter/segmentation.cc index fb83ed5476b..01f8b0e8057 100644 --- a/src/segmenter/segmentation.cc +++ b/src/segmenter/segmentation.cc @@ -85,22 +85,36 @@ void Segmentation::Read(std::istream &is, bool binary) { } dim_ = segmentssz; } else { - if (int c = is.peek() != static_cast('[')) { - KALDI_ERR << "Segmentation::Read: expected to see [, saw " - << static_cast(c) << ", at file position " << is.tellg(); + Segment seg; + while (1) { + int i = is.peek(); + if (i == -1) { + KALDI_ERR << "Unexpected EOF"; + } else if (static_cast(i) == '\n') { + if (seg.start_frame != -1) { + KALDI_ERR << "No semicolon before newline (wrong format)"; + } else { + is.get(); + break; + } + } else if (std::isspace(i)) { + is.get(); + } else if (static_cast(i) == ';') { + if (seg.start_frame != -1) { + segments_.push_back(seg); + dim_++; + seg.Reset(); + } else { + is.get(); + KALDI_ASSERT(static_cast(is.peek()) == '\n'); + is.get(); + break; + } + is.get(); + } else { + seg.Read(is, false); + } } - is.get(); // consume the '[' - is >> std::ws; - while (is.peek() != static_cast(']')) { - KALDI_ASSERT(!is.eof()); - Segment seg; - seg.Read(is, binary); - segments_.push_back(seg); - dim_++; - is >> std::ws; - } - is.get(); - KALDI_ASSERT(!is.eof()); } #ifdef KALDI_PARANOID Check(); @@ -126,12 +140,14 @@ void Segmentation::Write(std::ostream &os, bool binary) const { it->Write(os, binary); } } else { - os << "[ "; + if (Dim() == 0) { + os << ";"; + } for (; it != End(); ++it) { it->Write(os, binary); - os << std::endl; + os << "; "; } - os << "]" << std::endl; + os << std::endl; } } @@ -175,8 +191,8 @@ void Segmentation::GenRandomSegmentation(int32 max_length, int32 st = 0; int32 end = 0; - while (st > max_length) { - int32 segment_length = RandInt(0, max_segment_length); + while (st < max_length) { + int32 segment_length = RandInt(1, max_segment_length); end = st + segment_length - 1; diff --git a/src/segmenterbin/Makefile b/src/segmenterbin/Makefile index 1f0efe71181..22a74e70551 100644 --- a/src/segmenterbin/Makefile +++ b/src/segmenterbin/Makefile @@ -16,7 +16,8 @@ BINFILES = segmentation-copy segmentation-get-stats \ segmentation-combine-segments-to-recordings \ segmentation-create-overlapped-subsegments \ segmentation-intersect-segments \ - segmentation-init-from-additive-signals-info #\ + segmentation-init-from-additive-signals-info \ + class-counts-per-frame-to-labels#\ gmm-acc-pdf-stats-segmentation \ gmm-est-segmentation gmm-update-segmentation \ segmentation-init-from-diarization \ diff --git a/src/segmenterbin/class-counts-per-frame-to-labels.cc b/src/segmenterbin/class-counts-per-frame-to-labels.cc new file mode 100644 index 00000000000..85676794e95 --- /dev/null +++ b/src/segmenterbin/class-counts-per-frame-to-labels.cc @@ -0,0 +1,115 @@ +// segmenterbin/class-counts-per-frame-to-labels.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "hmm/posterior.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Converts class-counts-per-frame in the format of vectors of vectors of " + "integers into labels for overlapping SAD.\n" + "If there is a junk-label in the classes in the frame, then the label " + "for the frame is set to the junk-label no matter what other labels " + "are present.\n" + "If there is only a 0 (silence) in the classes in the frame, then the " + "label for the frame is set to 0.\n" + "If there is only one non-zero non-junk class, then the label is set " + "to 1.\n" + "Otherwise, the label is set to 2 (overlapping speakers)\n" + "\n" + "Usage: class-counts-per-frame-to-labels [options] " + " \n"; + + int32 junk_label = -1; + ParseOptions po(usage); + + po.Register("junk-label", &junk_label, + "The label used for segments that are junk. If a frame has " + "a junk label, it will be considered junk segment, no matter " + "what other labels the frame contains. Also frames with no " + "classes seen are labeled junk."); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string in_fn = po.GetArg(1), + out_fn = po.GetArg(2); + + int num_done = 0; + Int32VectorWriter writer(out_fn); + SequentialPosteriorReader reader(in_fn); + for (; !reader.Done(); reader.Next(), num_done++) { + const Posterior &class_counts_per_frame = reader.Value(); + std::vector labels(class_counts_per_frame.size(), junk_label); + + for (size_t i = 0; i < class_counts_per_frame.size(); i++) { + const std::vector > &class_counts = + class_counts_per_frame[i]; + + if (class_counts.size() == 0) { + labels[i] = junk_label; + } else { + bool silence_found = false; + std::vector >::const_iterator it = + class_counts.begin(); + int32 class_counts_in_frame = 0; + for (; it != class_counts.end(); ++it) { + KALDI_ASSERT(it->second > 0); + if (it->first == 0) { + silence_found = true; + } else { + class_counts_in_frame += static_cast(it->second); + if (it->first == junk_label) { + labels[i] = junk_label; + break; + } + } + } + + if (class_counts_in_frame == 0) { + KALDI_ASSERT(silence_found); + labels[i] = 0; + } else if (class_counts_in_frame == 1) { + labels[i] = 1; + } else { + labels[i] = 2; + } + } + } + writer.Write(reader.Key(), labels); + } + KALDI_LOG << "Copied " << num_done << " items."; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + + diff --git a/src/segmenterbin/segmentation-combine-segments.cc b/src/segmenterbin/segmentation-combine-segments.cc index 7034a8a1734..09b789a0921 100644 --- a/src/segmenterbin/segmentation-combine-segments.cc +++ b/src/segmenterbin/segmentation-combine-segments.cc @@ -44,8 +44,16 @@ int main(int argc, char *argv[]) { "segmentation-merge, segmentatin-merge-recordings, " "segmentation-post-process --merge-adjacent-segments\n"; + bool include_missing = false; + ParseOptions po(usage); + po.Register("include-missing-utt-level-segmentations", &include_missing, + "If true, then the segmentations missing in " + "utt-level-segmentation-rspecifier is included in the " + "final output with the label taken from the " + "kaldi-segments-segmentation-rspecifier"); + po.Read(argc, argv); if (po.NumArgs() != 4) { @@ -96,12 +104,17 @@ int main(int argc, char *argv[]) { if (!utt_segmentation_reader.HasKey(*it)) { KALDI_WARN << "Could not find utterance " << *it << " in " << "segmentation " << utt_segmentation_rspecifier; - num_err++; + if (!include_missing) { + num_err++; + } else { + out_segmentation.PushBack(segment); + num_segments++; + } continue; } + const Segmentation &utt_segmentation = utt_segmentation_reader.Value(*it); - num_segments += InsertFromSegmentation(utt_segmentation, segment.start_frame, false, &out_segmentation, NULL); diff --git a/src/segmenterbin/segmentation-copy.cc b/src/segmenterbin/segmentation-copy.cc index 26d0f47682d..e3384170805 100644 --- a/src/segmenterbin/segmentation-copy.cc +++ b/src/segmenterbin/segmentation-copy.cc @@ -31,9 +31,8 @@ int main(int argc, char *argv[]) { "Copy segmentation or archives of segmentation.\n" "If label-map is supplied, then apply the mapping to the labels \n" "when copying.\n" - "If utt2label-rspecifier is supplied, then ignore the \n" - "original labels, and map all the segments of an utterance using \n" - "the supplied utt2label map.\n" + "If utt2label-map-rspecifier is supplied, then an utterance-specific " + "mapping is applied on the original labels\n" "\n" "Usage: segmentation-copy [options] " "\n" @@ -44,7 +43,7 @@ int main(int argc, char *argv[]) { " e.g.: segmentation-copy --binary=false foo -\n"; bool binary = true; - std::string label_map_rxfilename, utt2label_rspecifier; + std::string label_map_rxfilename, utt2label_map_rspecifier; std::string include_rxfilename, exclude_rxfilename; int32 keep_label = -1; BaseFloat frame_subsampling_factor = 1; @@ -58,8 +57,13 @@ int main(int argc, char *argv[]) { "File with mapping from old to new labels"); po.Register("frame-subsampling-factor", &frame_subsampling_factor, "Change frame rate by this factor"); - po.Register("utt2label-rspecifier", &utt2label_rspecifier, - "Mapping for each utterance to an integer label"); + po.Register("utt2label-map-rspecifier", &utt2label_map_rspecifier, + "Utterance-specific mapping from old to new labels. " + "The first column is the utterance id. The next columns are " + "pairs :. If is -1, then " + "that represents the default label map. i.e. Any old label " + "for which the mapping is not defined, will be mapped to the " + "label corresponding to old-label -1."); po.Register("keep-label", &keep_label, "If supplied, only segments of this label are written out"); po.Register("include", &include_rxfilename, @@ -162,8 +166,8 @@ int main(int argc, char *argv[]) { ScaleFrameShift(frame_subsampling_factor, &segmentation); } - if (!utt2label_rspecifier.empty()) - KALDI_ERR << "It makes no sense to specify utt2label-rspecifier " + if (!utt2label_map_rspecifier.empty()) + KALDI_ERR << "It makes no sense to specify utt2label-map-rspecifier " << "when not reading segmentation archives."; Output ko(segmentation_out_fn, binary); @@ -172,7 +176,8 @@ int main(int argc, char *argv[]) { KALDI_LOG << "Copied segmentation to " << segmentation_out_fn; return 0; } else { - RandomAccessInt32Reader utt2label_reader(utt2label_rspecifier); + RandomAccessTokenVectorReader utt2label_map_reader( + utt2label_map_rspecifier); SegmentationWriter writer(segmentation_out_fn); SequentialSegmentationReader reader(segmentation_in_fn); @@ -190,24 +195,43 @@ int main(int argc, char *argv[]) { if (label_map_rxfilename.empty() && frame_subsampling_factor == 1.0 && - utt2label_rspecifier.empty() && + utt2label_map_rspecifier.empty() && keep_label == -1) { writer.Write(key, reader.Value()); } else { Segmentation segmentation = reader.Value(); if (!label_map_rxfilename.empty()) RelabelSegmentsUsingMap(label_map, &segmentation); - if (!utt2label_rspecifier.empty()) { - if (!utt2label_reader.HasKey(key)) { + + if (!utt2label_map_rspecifier.empty()) { + if (!utt2label_map_reader.HasKey(key)) { KALDI_WARN << "Utterance " << key - << " not found in utt2label map " - << utt2label_rspecifier; + << " not found in utt2label_map " + << utt2label_map_rspecifier; num_err++; continue; } - RelabelAllSegments(utt2label_reader.Value(key), &segmentation); + unordered_map utt_label_map; + + const std::vector &utt_label_map_vec = + utt2label_map_reader.Value(key); + std::vector::const_iterator it = + utt_label_map_vec.begin(); + + for (; it != utt_label_map_vec.end(); ++it) { + std::vector vec; + SplitStringToFloats(*it, ":", false, &vec); + if (vec.size() != 2) { + KALDI_ERR << "Invalid utt-label-map " << *it; + } + utt_label_map[static_cast(vec[0])] = + static_cast(vec[1]); + } + + RelabelSegmentsUsingMap(utt_label_map, &segmentation); } + if (keep_label != -1) KeepSegments(keep_label, &segmentation); diff --git a/src/segmenterbin/segmentation-get-stats.cc b/src/segmenterbin/segmentation-get-stats.cc index b25d6913f06..1e39bafec44 100644 --- a/src/segmenterbin/segmentation-get-stats.cc +++ b/src/segmenterbin/segmentation-get-stats.cc @@ -17,7 +17,9 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. +#include #include "base/kaldi-common.h" +#include "hmm/posterior.h" #include "util/common-utils.h" #include "segmenter/segmentation-utils.h" @@ -33,9 +35,10 @@ int main(int argc, char *argv[]) { " num-classes: Number of distinct classes common to this frame\n" "\n" "Usage: segmentation-get-stats [options] " - " \n" + " " + "\n" " e.g.: segmentation-get-stats ark:1.seg ark:/dev/null " - "ark:num_classes.ark\n"; + "ark:num_classes.ark ark:/dev/null\n"; ParseOptions po(usage); @@ -51,20 +54,23 @@ int main(int argc, char *argv[]) { po.Read(argc, argv); - if (po.NumArgs() != 3) { + if (po.NumArgs() != 4) { po.PrintUsage(); exit(1); } std::string segmentation_rspecifier = po.GetArg(1), num_overlaps_wspecifier = po.GetArg(2), - num_classes_wspecifier = po.GetArg(3); + num_classes_wspecifier = po.GetArg(3), + class_counts_per_frame_wspecifier = po.GetArg(4); int64 num_done = 0, num_err = 0; SequentialSegmentationReader reader(segmentation_rspecifier); Int32VectorWriter num_overlaps_writer(num_overlaps_wspecifier); Int32VectorWriter num_classes_writer(num_classes_wspecifier); + PosteriorWriter class_counts_per_frame_writer( + class_counts_per_frame_wspecifier); RandomAccessInt32Reader lengths_reader(lengths_rspecifier); @@ -82,34 +88,42 @@ int main(int argc, char *argv[]) { length = lengths_reader.Value(key); } - std::vector > class_counts_per_frame; + std::vector > class_counts_map_per_frame; if (!GetClassCountsPerFrame(segmentation, length, length_tolerance, - &class_counts_per_frame)) { + &class_counts_map_per_frame)) { KALDI_WARN << "Failed getting stats for key " << key; num_err++; continue; } if (length == -1) - length = class_counts_per_frame.size(); + length = class_counts_map_per_frame.size(); std::vector num_classes_per_frame(length, 0); std::vector num_overlaps_per_frame(length, 0); + Posterior class_counts_per_frame(length, + std::vector >()); - for (int32 i = 0; i < class_counts_per_frame.size(); i++) { - std::map &class_counts = class_counts_per_frame[i]; + for (int32 i = 0; i < class_counts_map_per_frame.size(); i++) { + std::map &class_counts = class_counts_map_per_frame[i]; for (std::map::const_iterator it = class_counts.begin(); it != class_counts.end(); ++it) { - if (it->second > 0) + if (it->second > 0) { num_classes_per_frame[i]++; + class_counts_per_frame[i].push_back( + std::make_pair(it->first, it->second)); + } num_overlaps_per_frame[i] += it->second; } + std::sort(class_counts_per_frame[i].begin(), + class_counts_per_frame[i].end()); } num_classes_writer.Write(key, num_classes_per_frame); num_overlaps_writer.Write(key, num_overlaps_per_frame); + class_counts_per_frame_writer.Write(key, class_counts_per_frame); num_done++; } diff --git a/src/segmenterbin/segmentation-init-from-additive-signals-info.cc b/src/segmenterbin/segmentation-init-from-additive-signals-info.cc index 139048ac17b..ccddb4c2b60 100644 --- a/src/segmenterbin/segmentation-init-from-additive-signals-info.cc +++ b/src/segmenterbin/segmentation-init-from-additive-signals-info.cc @@ -35,9 +35,9 @@ int main(int argc, char *argv[]) { "ark:reco_segmentation.ark ark,t:overlapped_segments_info.txt ark:-\n"; BaseFloat frame_shift = 0.01; + int32 junk_label = -1; std::string lengths_rspecifier; std::string additive_signals_segmentation_rspecifier; - std::string unreliable_segmentation_wspecifier; ParseOptions po(usage); @@ -49,10 +49,9 @@ int main(int argc, char *argv[]) { &additive_signals_segmentation_rspecifier, "Archive of segmentation of the additive signal which will used " "instead of an all 1 segmentation"); - po.Register("unreliable-segmentation-wspecifier", - &unreliable_segmentation_wspecifier, - "Applicable when additive-signals-segmentation-rspecifier is " - "provided and some utterances in it are missing"); + po.Register("junk-label", &junk_label, + "If specified, then unreliable regions are labeled with this " + "label"); po.Read(argc, argv); @@ -70,7 +69,6 @@ int main(int argc, char *argv[]) { SegmentationWriter writer(segmentation_wspecifier); RandomAccessSegmentationReader additive_signals_segmentation_reader(additive_signals_segmentation_rspecifier); - SegmentationWriter unreliable_writer(unreliable_segmentation_wspecifier); RandomAccessInt32Reader lengths_reader(lengths_rspecifier); @@ -84,18 +82,20 @@ int main(int argc, char *argv[]) { num_missing++; continue; } - const std::vector &additive_signals_info = additive_signals_info_reader.Value(key); + const std::vector &additive_signals_info = + additive_signals_info_reader.Value(key); Segmentation segmentation(reco_segmentation_reader.Value()); - Segmentation unreliable_segmentation; for (size_t i = 0; i < additive_signals_info.size(); i++) { std::vector parts; SplitStringToVector(additive_signals_info[i], ",:", false, &parts); if (parts.size() != 3) { - KALDI_ERR << "Invalid format of overlap info " << additive_signals_info[i] - << "for key " << key << " in " << additive_signals_info_rspecifier; + KALDI_ERR << "Invalid format of overlap info " + << additive_signals_info[i] + << "for key " << key << " in " + << additive_signals_info_rspecifier; } const std::string &utt_id = parts[0]; double start_time; @@ -110,17 +110,22 @@ int main(int argc, char *argv[]) { << "segmentation " << additive_signals_segmentation_rspecifier; if (duration < 0) { KALDI_ERR << "duration < 0 for utt_id " << utt_id << " in " - << "additive_signals_info " << additive_signals_info_rspecifier - << "; additive-signals-segmentation must be provided in such a case"; + << "additive_signals_info " + << additive_signals_info_rspecifier + << "; additive-signals-segmentation must be provided " + << "in such a case"; } num_err++; - unreliable_segmentation.EmplaceBack(start_frame, start_frame + duration - 1, 0); + int32 length = round(duration / frame_shift); + segmentation.EmplaceBack(start_frame, start_frame + length - 1, + junk_label); continue; // Treated as non-overlapping even though there // is overlap } - InsertFromSegmentation(additive_signals_segmentation_reader.Value(utt_id), - start_frame, false, &segmentation); + InsertFromSegmentation( + additive_signals_segmentation_reader.Value(utt_id), + start_frame, false, &segmentation); } Sort(&segmentation); @@ -134,19 +139,6 @@ int main(int argc, char *argv[]) { } writer.Write(key, segmentation); - if (!unreliable_segmentation_wspecifier.empty()) { - Sort(&unreliable_segmentation); - if (!lengths_rspecifier.empty()) { - if (!lengths_reader.HasKey(key)) { - KALDI_WARN << "Could not find length for the recording " << key - << "in " << lengths_rspecifier; - continue; - } - TruncateToLength(lengths_reader.Value(key), &unreliable_segmentation); - } - unreliable_writer.Write(key, unreliable_segmentation); - } - num_done++; } diff --git a/src/segmenterbin/segmentation-init-from-ali.cc b/src/segmenterbin/segmentation-init-from-ali.cc index a98a54368c9..452ff56c2d8 100644 --- a/src/segmenterbin/segmentation-init-from-ali.cc +++ b/src/segmenterbin/segmentation-init-from-ali.cc @@ -54,7 +54,7 @@ int main(int argc, char *argv[]) { int64 num_segments = 0; int64 num_err = 0; - std::vector frame_counts_per_class; + std::map frame_counts_per_class; SequentialInt32VectorReader alignment_reader(ali_rspecifier); @@ -80,7 +80,11 @@ int main(int argc, char *argv[]) { << "wrote " << num_segmentations << " segmentations " << "with a total of " << num_segments << " segments."; KALDI_LOG << "Number of frames for the different classes are : "; - WriteIntegerVector(KALDI_LOG, false, frame_counts_per_class); + + std::map::const_iterator it = frame_counts_per_class.begin(); + for (; it != frame_counts_per_class.end(); ++it) { + KALDI_LOG << it->first << " " << it->second << " ; "; + } return ((num_done > 0 && num_err < num_done) ? 0 : 1); } catch(const std::exception &e) { diff --git a/src/segmenterbin/segmentation-merge-recordings.cc b/src/segmenterbin/segmentation-merge-recordings.cc index 85b5108be29..dccd82b0595 100644 --- a/src/segmenterbin/segmentation-merge-recordings.cc +++ b/src/segmenterbin/segmentation-merge-recordings.cc @@ -92,7 +92,8 @@ int main(int argc, char *argv[]) { << "created overall " << num_segments << " segments; " << "failed to merge " << num_err << " old segmentations"; - return (num_new_segmentations > 0 && num_err < num_old_segmentations / 2); + return (num_new_segmentations > 0 && num_err < num_old_segmentations / 2 ? + 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); return -1; diff --git a/src/segmenterbin/segmentation-to-rttm.cc b/src/segmenterbin/segmentation-to-rttm.cc index 6ffd1a8b1e8..8f22d78f3bc 100644 --- a/src/segmenterbin/segmentation-to-rttm.cc +++ b/src/segmenterbin/segmentation-to-rttm.cc @@ -17,6 +17,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. +#include #include "base/kaldi-common.h" #include "util/common-utils.h" #include "segmenter/segmentation.h" @@ -40,42 +41,60 @@ namespace segmenter { * The function retunns the largest class_id that it encounters. **/ -int32 WriteRttm(const Segmentation &segmentation, - std::ostream &os, const std::string &file_id, - const std::string &channel, - BaseFloat frame_shift, BaseFloat start_time, - bool map_to_speech_and_sil) { +void WriteRttm(const Segmentation &segmentation, + const std::string &file_id, + const std::string &channel, + BaseFloat frame_shift, BaseFloat start_time, + bool map_to_speech_and_sil, + int32 no_score_label, std::ostream &os) { SegmentList::const_iterator it = segmentation.Begin(); - int32 largest_class = 0; + + unordered_map classes_map; + std::vector classes_vec; + for (; it != segmentation.End(); ++it) { + if (no_score_label > 0 && it->Label() == no_score_label) { + os << "NOSCORE " << file_id << " " << channel << " " + << it->start_frame * frame_shift + start_time << " " + << (it->Length()) * frame_shift << " \n"; + continue; + } os << "SPEAKER " << file_id << " " << channel << " " - << it->start_frame * frame_shift + start_time << " " + << it->start_frame * frame_shift + start_time << " " << (it->Length()) * frame_shift << " "; if (map_to_speech_and_sil) { switch (it->Label()) { - case 1: - os << "SPEECH "; + case 0: + os << "SILENCE "; break; default: - os << "SILENCE "; + os << "SPEECH "; break; } - largest_class = 1; } else { if (it->Label() >= 0) { os << it->Label() << " "; - if (it->Label() > largest_class) - largest_class = it->Label(); + if (classes_map.count(it->Label()) == 0) { + classes_map[it->Label()] = true; + classes_vec.push_back(it->Label()); + } } } os << "" << std::endl; - } - return largest_class; -} + } -} + if (!map_to_speech_and_sil) { + for (std::vector::const_iterator it = classes_vec.begin(); + it != classes_vec.end(); ++it) { + os << "SPKR-INFO " << file_id << " " << channel + << " unknown " << *it << " \n"; + } + } } +} // namespace segmenter +} // namespace kaldi + int main(int argc, char *argv[]) { try { using namespace kaldi; @@ -84,20 +103,27 @@ int main(int argc, char *argv[]) { const char *usage = "Convert segmentation into RTTM\n" "\n" - "Usage: segmentation-to-rttm [options] \n" + "Usage: segmentation-to-rttm [options] " + "\n" " e.g.: segmentation-to-rttm ark:1.seg -\n"; - + bool map_to_speech_and_sil = true; + int32 no_score_label = -1; BaseFloat frame_shift = 0.01; std::string segments_rxfilename; std::string reco2file_and_channel_rxfilename; ParseOptions po(usage); - + po.Register("frame-shift", &frame_shift, "Frame shift in seconds"); po.Register("segments", &segments_rxfilename, "Segments file"); - po.Register("reco2file-and-channel", &reco2file_and_channel_rxfilename, "reco2file_and_channel file"); - po.Register("map-to-speech-and-sil", &map_to_speech_and_sil, "Map all classes to SPEECH and SILENCE"); + po.Register("reco2file-and-channel", &reco2file_and_channel_rxfilename, + "reco2file_and_channel file"); + po.Register("map-to-speech-and-sil", &map_to_speech_and_sil, + "Map all classes other than 0 to SPEECH"); + po.Register("no-score-label", &no_score_label, + "If specified, then a NOSCORE region is added to RTTM " + "when this label occurs in the segmentation."); po.Read(argc, argv); @@ -105,20 +131,20 @@ int main(int argc, char *argv[]) { po.PrintUsage(); exit(1); } - + unordered_map utt2file; unordered_map utt2start_time; if (!segments_rxfilename.empty()) { - Input ki(segments_rxfilename); // no binary argment: never binary. + Input ki(segments_rxfilename); // no binary argment: never binary. int32 i = 0; std::string line; /* read each line from segments file */ while (std::getline(ki.Stream(), line)) { std::vector split_line; // Split the line by space or tab and check the number of fields in each - // line. There must be 4 fields--segment name , reacording wav file name, - // start time, end time; 5th field (channel info) is optional. + // line. There must be 4 fields--segment name , reacording wav file + // name, start time, end time; 5th field (channel info) is optional. SplitStringToVector(line, " \t\r", true, &split_line); if (split_line.size() != 4 && split_line.size() != 5) { KALDI_WARN << "Invalid line in segments file: " << line; @@ -128,7 +154,7 @@ int main(int argc, char *argv[]) { utterance = split_line[1], start_str = split_line[2], end_str = split_line[3]; - + // Convert the start time and endtime to real from string. Segment is // ignored if start or end time cannot be converted to real. double start, end; @@ -143,15 +169,18 @@ int main(int argc, char *argv[]) { // start time must not be negative; start time must not be greater than // end time, except if end time is -1 if (start < 0 || end <= 0 || start >= end) { - KALDI_WARN << "Invalid line in segments file [empty or invalid segment]: " - << line; + KALDI_WARN << "Invalid line in segments file " + << "[empty or invalid segment]: " + << line; continue; } int32 channel = -1; // means channel info is unspecified. - // if each line has 5 elements then 5th element must be channel identifier - if(split_line.size() == 5) { + // if each line has 5 elements then 5th element must be channel + // identifier + if (split_line.size() == 5) { if (!ConvertStringToInteger(split_line[4], &channel) || channel < 0) { - KALDI_WARN << "Invalid line in segments file [bad channel]: " << line; + KALDI_WARN << "Invalid line in segments file " + << "[bad channel]: " << line; continue; } } @@ -163,10 +192,12 @@ int main(int argc, char *argv[]) { KALDI_LOG << "Read " << i << " lines from " << segments_rxfilename; } - unordered_map , StringHasher> reco2file_and_channel; + unordered_map, + StringHasher> reco2file_and_channel; if (!reco2file_and_channel_rxfilename.empty()) { - Input ki(reco2file_and_channel_rxfilename); // no binary argment: never binary. + // no binary argment: never binary. + Input ki(reco2file_and_channel_rxfilename); int32 i = 0; std::string line; @@ -183,11 +214,13 @@ int main(int argc, char *argv[]) { const std::string &file_id = split_line[1]; const std::string &channel = split_line[2]; - reco2file_and_channel.insert(std::make_pair(reco_id, std::make_pair(file_id, channel))); + reco2file_and_channel.insert( + std::make_pair(reco_id, std::make_pair(file_id, channel))); i++; } - KALDI_LOG << "Read " << i << " lines from " << reco2file_and_channel_rxfilename; + KALDI_LOG << "Read " << i << " lines from " + << reco2file_and_channel_rxfilename; } unordered_set seen_files; @@ -196,18 +229,18 @@ int main(int argc, char *argv[]) { rttm_out_wxfilename = po.GetArg(2); int64 num_done = 0, num_err = 0; - + Output ko(rttm_out_wxfilename, false); SequentialSegmentationReader reader(segmentation_rspecifier); for (; !reader.Done(); reader.Next(), num_done++) { Segmentation segmentation(reader.Value()); const std::string &key = reader.Key(); - std::string reco_id = key; + std::string reco_id = key; BaseFloat start_time = 0.0; if (!segments_rxfilename.empty()) { if (utt2file.count(key) == 0 || utt2start_time.count(key) == 0) - KALDI_ERR << "Could not find key " << key << " in segments " + KALDI_ERR << "Could not find key " << key << " in segments " << segments_rxfilename; KALDI_ASSERT(utt2file.count(key) > 0 && utt2start_time.count(key) > 0); reco_id = utt2file[key]; @@ -216,8 +249,8 @@ int main(int argc, char *argv[]) { std::string file_id, channel; if (!reco2file_and_channel_rxfilename.empty()) { - if (reco2file_and_channel.count(reco_id) == 0) - KALDI_ERR << "Could not find recording " << reco_id + if (reco2file_and_channel.count(reco_id) == 0) + KALDI_ERR << "Could not find recording " << reco_id << " in " << reco2file_and_channel_rxfilename; file_id = reco2file_and_channel[reco_id].first; channel = reco2file_and_channel[reco_id].second; @@ -226,18 +259,18 @@ int main(int argc, char *argv[]) { channel = "1"; } - int32 largest_class = WriteRttm(segmentation, ko.Stream(), file_id, channel, frame_shift, start_time, map_to_speech_and_sil); + WriteRttm(segmentation, file_id, + channel, frame_shift, start_time, + map_to_speech_and_sil, no_score_label, ko.Stream()); if (map_to_speech_and_sil) { if (seen_files.count(reco_id) == 0) { - ko.Stream() << "SPKR-INFO " << file_id << " " << channel << " unknown SILENCE \n"; - ko.Stream() << "SPKR-INFO " << file_id << " " << channel << " unknown SPEECH \n"; + ko.Stream() << "SPKR-INFO " << file_id << " " << channel + << " unknown SILENCE \n"; + ko.Stream() << "SPKR-INFO " << file_id << " " << channel + << " unknown SPEECH \n"; seen_files.insert(reco_id); } - } else { - for (int32 i = 0; i < largest_class; i++) { - ko.Stream() << "SPKR-INFO " << file_id << " " << channel << " unknown " << i << " \n"; - } } } @@ -249,7 +282,3 @@ int main(int argc, char *argv[]) { return -1; } } - - - - From 5ac90c8df123f58eab8236484b9fdbdfe3bdfa38 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:31:35 -0500 Subject: [PATCH 183/530] asr_diarization: Support objective type in basic_layers --- egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py index f74137da48b..be2776c90b8 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py @@ -582,7 +582,9 @@ def get_full_config(self): if output_delay != 0: cur_node = 'Offset({0}, {1})'.format(cur_node, output_delay) - line = ('output-node name={0} input={1}'.format(self.name, cur_node)) + line = ('output-node name={0} input={1} ' + 'objective={2}'.format( + self.name, cur_node, objective_type)) ans.append((config_name, line)) return ans From 6e3889b83c8ad7a455561f37875d8b61d95abc2f Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:32:17 -0500 Subject: [PATCH 184/530] asr_diarization: Update multilingual egs creation --- .../nnet3/multilingual/allocate_multilingual_examples.py | 8 +++++++- egs/wsj/s5/steps/nnet3/multilingual/get_egs.sh | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py b/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py index cba804b1a66..9bc6da53705 100644 --- a/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py +++ b/egs/wsj/s5/steps/nnet3/multilingual/allocate_multilingual_examples.py @@ -214,7 +214,13 @@ def Main(): start_egs = lang2len[lang_id] - lang_len[lang_id] this_egs.append((lang_id, start_egs, args.minibatch_size)) for scpline in range(args.minibatch_size): - print("{0} {1}".format(scp_files[lang_id].readline().splitlines()[0], lang_id), file = archfile) + lines = scp_files[lang_id].readline().splitlines() + try: + print("{0} {1}".format(lines[0], lang_id), file=archfile) + except Exception: + logger.error("Failure to read from file %s, got %s", + scp_files[lang_id].name, lines) + raise lang_len[lang_id] = lang_len[lang_id] - args.minibatch_size num_egs = num_egs + args.minibatch_size; diff --git a/egs/wsj/s5/steps/nnet3/multilingual/get_egs.sh b/egs/wsj/s5/steps/nnet3/multilingual/get_egs.sh index aa9a911ffb2..58ef965de3e 100755 --- a/egs/wsj/s5/steps/nnet3/multilingual/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/multilingual/get_egs.sh @@ -18,6 +18,7 @@ set -u # Begin configuration section. cmd=run.pl minibatch_size=512 # multiple of minibatch used during training. +minibatch_size= num_jobs=10 # This can be set to max number of jobs to run in parallel; # Helps for better randomness across languages # per archive. @@ -85,6 +86,8 @@ for lang in $(seq 0 $[$num_langs-1]);do done done +cp ${multi_egs_dir[$lang]}/cmvn_opts $megs_dir + if [ $stage -le 0 ]; then echo "$0: allocating multilingual examples for training." # Generate egs.*.scp for multilingual setup. From 3d10480b573d724cf990eb58dbd10cb1b191b442 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:33:01 -0500 Subject: [PATCH 185/530] asr_diarization: Add per-dim accuracy to diagnostics --- egs/wsj/s5/steps/libs/nnet3/train/common.py | 16 +- .../nnet3/train/frame_level_objf/common.py | 166 ++++++++++-------- src/nnet3/nnet-diagnostics.cc | 74 ++++++-- src/nnet3/nnet-diagnostics.h | 15 +- 4 files changed, 176 insertions(+), 95 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index 7ae44cdffae..503c3ba622d 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -197,9 +197,10 @@ def verify_egs_dir(egs_dir, feat_dim, ivector_dim, return [egs_left_context, egs_right_context, frames_per_eg, num_archives] - except (IOError, ValueError) as e: - raise Exception("The egs dir {0} has missing or " - "malformed files: {1}".format(egs_dir, e.strerr)) + except (IOError, ValueError): + logger.error("The egs dir {0} has missing or " + "malformed files.".format(egs_dir)) + raise def compute_presoftmax_prior_scale(dir, alidir, num_jobs, run_opts, @@ -561,7 +562,7 @@ def __init__(self): to steps/nnet3/get_egs.sh script""") self.parser.add_argument("--egs.use-multitask-egs", type=str, dest='use_multitask_egs', - default=True, choices=["true", "false"], + default=False, choices=["true", "false"], action=common_lib.StrToBoolAction, help="""Use mutlitask egs created using allocate_multilingual_egs.py.""") @@ -683,6 +684,13 @@ def __init__(self): lstm*=0,0.2,0'. More general should precede less general patterns, as they are applied sequentially.""") + self.parser.add_argument("--trainer.compute-per-dim-accuracy", + dest='compute_per_dim_accuracy', + type=str, choices=['true', 'false'], + default=False, + action=common_lib.StrToBoolAction, + help="Compute train and validation " + "accuracy per-dim") # General options self.parser.add_argument("--stage", type=int, default=-4, diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index 508445e331e..9c8b5d0ee95 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -115,13 +115,14 @@ def train_new_models(dir, iter, srand, num_jobs, "nnet3-merge-egs --minibatch-size={minibatch_size} " "--measure-output-frames=false " "--discard-partial-minibatches=true ark:- ark:- | " - "nnet3-shuffle-egs --buffer-size={shuffle_buffer_size} " + "nnet3-shuffle-egs " + "--buffer-size={shuffle_buffer_size} --srand={srand} " "ark:- ark:- |".format( frame_opts=("" if chunk_level_training else "--frame={0}".format(frame)), context_opts=context_opts, egs_dir=egs_dir, output_rename_opt=output_rename_opt, - archive_index=archive_index, + archive_index=archive_index, srand=iter + srand, shuffle_buffer_size=shuffle_buffer_size, extra_egs_copy_cmd=extra_egs_copy_cmd, minibatch_size=minibatch_size)) @@ -138,7 +139,7 @@ def train_new_models(dir, iter, srand, num_jobs, frame_opts=("" if chunk_level_training else "--frame={0}".format(frame)), context_opts=context_opts, egs_dir=egs_dir, - archive_index=archive_index, + archive_index=archive_index, srand=iter + srand, shuffle_buffer_size=shuffle_buffer_size, extra_egs_copy_cmd=extra_egs_copy_cmd, minibatch_size=minibatch_size)) @@ -154,9 +155,7 @@ def train_new_models(dir, iter, srand, num_jobs, {dir}/{next_iter}.{job}.raw""".format( command=run_opts.command, train_queue_opt=run_opts.train_queue_opt, - dir=dir, iter=iter, srand=iter + srand, - next_iter=iter + 1, - job=job, + dir=dir, iter=iter, next_iter=iter + 1, job=job, parallel_train_opts=run_opts.parallel_train_opts, cache_read_opt=cache_read_opt, cache_write_opt=cache_write_opt, @@ -194,7 +193,8 @@ def train_one_iteration(dir, iter, srand, egs_dir, get_raw_nnet_from_am=True, background_process_handler=None, extra_egs_copy_cmd="", use_multitask_egs=False, - rename_multitask_outputs=False): + rename_multitask_outputs=False, + compute_per_dim_accuracy=False): """ Called from steps/nnet3/train_*.py scripts for one iteration of neural network training @@ -223,9 +223,10 @@ def train_one_iteration(dir, iter, srand, egs_dir, if os.path.exists('{0}/srand'.format(dir)): try: saved_srand = int(open('{0}/srand'.format(dir)).readline().strip()) - except (IOError, ValueError) as e: - raise Exception("Exception while reading the random seed " - "for training: {0}".format(e.str())) + except (IOError, ValueError): + logger.error("Exception while reading the random seed " + "for training.") + raise if srand != saved_srand: logger.warning("The random seed provided to this iteration " "(srand={0}) is different from the one saved last " @@ -244,7 +245,8 @@ def train_one_iteration(dir, iter, srand, egs_dir, mb_size=cv_minibatch_size, get_raw_nnet_from_am=get_raw_nnet_from_am, wait=False, background_process_handler=background_process_handler, - extra_egs_copy_cmd=extra_egs_copy_cmd) + extra_egs_copy_cmd=extra_egs_copy_cmd, + compute_per_dim_accuracy=compute_per_dim_accuracy) if iter > 0: # Runs in the background @@ -395,25 +397,25 @@ def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts, # Write stats with the same format as stats for LDA. common_lib.run_job( - """{command} JOB=1:{num_lda_jobs} {dir}/log/get_lda_stats.JOB.log \ - nnet3-acc-lda-stats --rand-prune={rand_prune} \ - {dir}/init.raw "ark:{egs_dir}/egs.JOB.ark" \ - {dir}/JOB.lda_stats""".format( - command=run_opts.command, - num_lda_jobs=num_lda_jobs, - dir=dir, - egs_dir=egs_dir, - rand_prune=rand_prune)) + """{command} JOB=1:{num_lda_jobs} {dir}/log/get_lda_stats.JOB.log """ + """ nnet3-acc-lda-stats --rand-prune={rand_prune}""" + """ {dir}/init.raw "ark:{egs_dir}/egs.JOB.ark" """ + """ {dir}/JOB.lda_stats""".format( + command=run_opts.command, + num_lda_jobs=num_lda_jobs, + dir=dir, + egs_dir=egs_dir, + rand_prune=rand_prune)) # the above command would have generated dir/{1..num_lda_jobs}.lda_stats lda_stat_files = map(lambda x: '{0}/{1}.lda_stats'.format(dir, x), range(1, num_lda_jobs + 1)) common_lib.run_job( - """{command} {dir}/log/sum_transform_stats.log \ - sum-lda-accs {dir}/lda_stats {lda_stat_files}""".format( - command=run_opts.command, - dir=dir, lda_stat_files=" ".join(lda_stat_files))) + "{command} {dir}/log/sum_transform_stats.log " + "sum-lda-accs {dir}/lda_stats {lda_stat_files}".format( + command=run_opts.command, + dir=dir, lda_stat_files=" ".join(lda_stat_files))) for file in lda_stat_files: try: @@ -426,11 +428,11 @@ def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts, # variant of an LDA transform but without dimensionality reduction. common_lib.run_job( - """{command} {dir}/log/get_transform.log \ - nnet-get-feature-transform {lda_opts} {dir}/lda.mat \ - {dir}/lda_stats""".format( - command=run_opts.command, dir=dir, - lda_opts=lda_opts if lda_opts is not None else "")) + "{command} {dir}/log/get_transform.log" + " nnet-get-feature-transform {lda_opts} {dir}/lda.mat" + " {dir}/lda_stats".format( + command=run_opts.command, dir=dir, + lda_opts=lda_opts if lda_opts is not None else "")) common_lib.force_symlink("../lda.mat", "{0}/configs/lda.mat".format(dir)) @@ -439,7 +441,8 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, right_context, run_opts, mb_size=256, wait=False, background_process_handler=None, get_raw_nnet_from_am=True, - extra_egs_copy_cmd=""): + extra_egs_copy_cmd="", + compute_per_dim_accuracy=False): if get_raw_nnet_from_am: model = "nnet3-am-copy --raw=true {dir}/{iter}.mdl - |".format( dir=dir, iter=iter) @@ -455,21 +458,26 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, valid_diagnostic_egs = "scp:{0}/valid_diagnostic.egs.1".format( egs_dir) + opts = [] + if compute_per_dim_accuracy: + opts.append("--compute-per-dim-accuracy") + common_lib.run_job( - """ {command} {dir}/log/compute_prob_valid.{iter}.log \ - nnet3-compute-prob "{model}" \ - "ark,bg:nnet3-copy-egs {context_opts} \ - {egs_rspecifier} ark:- |{extra_egs_copy_cmd} \ - nnet3-merge-egs --minibatch-size={mb_size} ark:- \ - ark:- |" """.format(command=run_opts.command, - dir=dir, - iter=iter, - egs_rspecifier=valid_diagnostic_egs, - context_opts=context_opts, - mb_size=mb_size, - model=model, - egs_dir=egs_dir, - extra_egs_copy_cmd=extra_egs_copy_cmd), + """{command} {dir}/log/compute_prob_valid.{iter}.log""" + """ nnet3-compute-prob {opts} "{model}" """ + """ "ark,bg:nnet3-copy-egs {context_opts}""" + """ {egs_rspecifier} ark:- |{extra_egs_copy_cmd}""" + """ nnet3-merge-egs --minibatch-size={mb_size} ark:-""" + """ ark:- |" """.format(command=run_opts.command, + opts=' '.join(opts), + dir=dir, + iter=iter, + egs_rspecifier=valid_diagnostic_egs, + context_opts=context_opts, + mb_size=mb_size, + model=model, + egs_dir=egs_dir, + extra_egs_copy_cmd=extra_egs_copy_cmd), wait=wait, background_process_handler=background_process_handler) if os.path.isfile("{0}/train_diagnostic.egs".format(egs_dir)): @@ -479,20 +487,21 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, egs_dir) common_lib.run_job( - """{command} {dir}/log/compute_prob_train.{iter}.log \ - nnet3-compute-prob "{model}" \ - "ark,bg:nnet3-copy-egs {context_opts} \ - {egs_rspecifier} ark:- | {extra_egs_copy_cmd} \ - nnet3-merge-egs --minibatch-size={mb_size} ark:- \ - ark:- |" """.format(command=run_opts.command, - dir=dir, - iter=iter, - egs_rspecifier=train_diagnostic_egs, - context_opts=context_opts, - mb_size=mb_size, - model=model, - egs_dir=egs_dir, - extra_egs_copy_cmd=extra_egs_copy_cmd), + """{command} {dir}/log/compute_prob_train.{iter}.log""" + """ nnet3-compute-prob {opts} "{model}" """ + """ "ark,bg:nnet3-copy-egs {context_opts}""" + """ {egs_rspecifier} ark:- | {extra_egs_copy_cmd}""" + """ nnet3-merge-egs --minibatch-size={mb_size} ark:-""" + """ ark:- |" """.format(command=run_opts.command, + opts=' '.join(opts), + dir=dir, + iter=iter, + egs_rspecifier=train_diagnostic_egs, + context_opts=context_opts, + mb_size=mb_size, + model=model, + egs_dir=egs_dir, + extra_egs_copy_cmd=extra_egs_copy_cmd), wait=wait, background_process_handler=background_process_handler) @@ -519,30 +528,29 @@ def compute_progress(dir, iter, egs_dir, left_context, right_context, egs_dir) common_lib.run_job( - """{command} {dir}/log/progress.{iter}.log \ - nnet3-info "{model}" '&&' \ - nnet3-show-progress --use-gpu=no "{prev_model}" "{model}" \ - "ark,bg:nnet3-copy-egs {context_opts} \ - {egs_rspecifier} ark:- |{extra_egs_copy_cmd} \ - nnet3-merge-egs --minibatch-size={mb_size} ark:- \ - ark:- |" """.format(command=run_opts.command, - dir=dir, - iter=iter, - egs_rspecifier=train_diagnostic_egs, - model=model, - context_opts=context_opts, - mb_size=mb_size, - prev_model=prev_model, - egs_dir=egs_dir, - extra_egs_copy_cmd=extra_egs_copy_cmd), - wait=wait, background_process_handler=background_process_handler) + """{command} {dir}/log/progress.{iter}.log nnet3-info "{model}" """ + """ '&&' nnet3-show-progress --use-gpu=no "{prev_model}" "{model}" """ + """ "ark,bg:nnet3-copy-egs {context_opts}""" + """ {egs_rspecifier} ark:- |{extra_egs_copy_cmd}""" + """ nnet3-merge-egs --minibatch-size={mb_size} ark:-""" + """ ark:- |" """.format(command=run_opts.command, + dir=dir, + iter=iter, + egs_rspecifier=train_diagnostic_egs, + model=model, + context_opts=context_opts, + mb_size=mb_size, + prev_model=prev_model, + egs_dir=egs_dir, + extra_egs_copy_cmd=extra_egs_copy_cmd), + wait=wait, background_process_handler=background_process_handler) def combine_models(dir, num_iters, models_to_combine, egs_dir, left_context, right_context, run_opts, background_process_handler=None, chunk_width=None, get_raw_nnet_from_am=True, - extra_egs_copy_cmd=""): + extra_egs_copy_cmd="", compute_per_dim_accuracy=False): """ Function to do model combination In the nnet3 setup, the logic @@ -617,7 +625,8 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, wait=False, background_process_handler=background_process_handler, - extra_egs_copy_cmd=extra_egs_copy_cmd) + extra_egs_copy_cmd=extra_egs_copy_cmd, + compute_per_dim_accuracy=compute_per_dim_accuracy) else: compute_train_cv_probabilities( dir=dir, iter='final', egs_dir=egs_dir, @@ -625,7 +634,8 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir, run_opts=run_opts, wait=False, background_process_handler=background_process_handler, get_raw_nnet_from_am=False, - extra_egs_copy_cmd=extra_egs_copy_cmd) + extra_egs_copy_cmd=extra_egs_copy_cmd, + compute_per_dim_accuracy=compute_per_dim_accuracy) def get_realign_iters(realign_times, num_iters, diff --git a/src/nnet3/nnet-diagnostics.cc b/src/nnet3/nnet-diagnostics.cc index 64abe8a0578..d00dd31b245 100644 --- a/src/nnet3/nnet-diagnostics.cc +++ b/src/nnet3/nnet-diagnostics.cc @@ -108,22 +108,33 @@ void NnetComputeProb::ProcessOutputs(const NnetExample &eg, } if (config_.compute_accuracy) { BaseFloat tot_weight, tot_accuracy; + PerDimObjectiveInfo &totals = accuracy_info_[io.name]; + + if (config_.compute_per_dim_accuracy && + totals.tot_objective_vec.Dim() == 0) { + totals.tot_objective_vec.Resize(output.NumCols()); + totals.tot_weight_vec.Resize(output.NumCols()); + } + ComputeAccuracy(io.features, output, - &tot_weight, &tot_accuracy, deriv_weights); - SimpleObjectiveInfo &totals = accuracy_info_[io.name]; + &tot_weight, &tot_accuracy, deriv_weights, + config_.compute_per_dim_accuracy ? + &totals.tot_weight_vec : NULL, + config_.compute_per_dim_accuracy ? + &totals.tot_objective_vec : NULL); totals.tot_weight += tot_weight; totals.tot_objective += tot_accuracy; } - num_minibatches_processed_++; } } + num_minibatches_processed_++; } bool NnetComputeProb::PrintTotalStats() const { bool ans = false; - unordered_map::const_iterator - iter, end; { // First print regular objectives + unordered_map::const_iterator iter, end; iter = objf_info_.begin(); end = objf_info_.end(); for (; iter != end; ++iter) { @@ -141,15 +152,34 @@ bool NnetComputeProb::PrintTotalStats() const { ans = true; } } - { // now print accuracies. + { + unordered_map::const_iterator iter, end; + // now print accuracies. iter = accuracy_info_.begin(); end = accuracy_info_.end(); for (; iter != end; ++iter) { const std::string &name = iter->first; - const SimpleObjectiveInfo &info = iter->second; + const PerDimObjectiveInfo &info = iter->second; KALDI_LOG << "Overall accuracy for '" << name << "' is " << (info.tot_objective / info.tot_weight) << " per frame" << ", over " << info.tot_weight << " frames."; + + if (info.tot_weight_vec.Dim() > 0) { + Vector accuracy_vec(info.tot_weight_vec.Dim()); + for (size_t j = 0; j < info.tot_weight_vec.Dim(); j++) { + if (info.tot_weight_vec(j) != 0) { + accuracy_vec(j) = info.tot_objective_vec(j) + / info.tot_weight_vec(j); + } else { + accuracy_vec(j) = -1.0; + } + } + + KALDI_LOG << "Overall per-dim accuracy vector for '" << name + << "' is " << accuracy_vec << " per frame" + << ", over " << info.tot_weight << " frames."; + } // don't bother changing ans; the loop over the regular objective should // already have set it to true if we got any data. } @@ -161,12 +191,19 @@ void ComputeAccuracy(const GeneralMatrix &supervision, const CuMatrixBase &nnet_output, BaseFloat *tot_weight_out, BaseFloat *tot_accuracy_out, - const Vector *deriv_weights) { + const Vector *deriv_weights, + Vector *tot_weight_vec, + Vector *tot_accuracy_vec) { int32 num_rows = nnet_output.NumRows(), num_cols = nnet_output.NumCols(); KALDI_ASSERT(supervision.NumRows() == num_rows && supervision.NumCols() == num_cols); + if (tot_accuracy_vec || tot_weight_vec) + KALDI_ASSERT(tot_accuracy_vec && tot_weight_vec && + tot_accuracy_vec->Dim() == num_cols && + tot_weight_vec->Dim() == num_cols); + CuArray best_index(num_rows); nnet_output.FindRowMaxId(&best_index); std::vector best_index_cpu; @@ -192,8 +229,13 @@ void ComputeAccuracy(const GeneralMatrix &supervision, if (deriv_weights) row_sum *= (*deriv_weights)(r); tot_weight += row_sum; - if (best_index == best_index_cpu[r]) + if (tot_weight_vec) + (*tot_weight_vec)(best_index) += row_sum; + if (best_index == best_index_cpu[r]) { tot_accuracy += row_sum; + if (tot_accuracy_vec) + (*tot_accuracy_vec)(best_index) += row_sum; + } } break; } @@ -208,8 +250,13 @@ void ComputeAccuracy(const GeneralMatrix &supervision, if (deriv_weights) row_sum *= (*deriv_weights)(r); tot_weight += row_sum; - if (best_index == best_index_cpu[r]) + if (tot_weight_vec) + (*tot_weight_vec)(best_index) += row_sum; + if (best_index == best_index_cpu[r]) { tot_accuracy += row_sum; + if (tot_accuracy_vec) + (*tot_accuracy_vec)(best_index) += row_sum; + } } break; } @@ -224,8 +271,13 @@ void ComputeAccuracy(const GeneralMatrix &supervision, row_sum *= (*deriv_weights)(r); KALDI_ASSERT(best_index < num_cols); tot_weight += row_sum; - if (best_index == best_index_cpu[r]) + if (tot_weight_vec) + (*tot_weight_vec)(best_index) += row_sum; + if (best_index == best_index_cpu[r]) { tot_accuracy += row_sum; + if (tot_accuracy_vec) + (*tot_accuracy_vec)(best_index) += row_sum; + } } break; } diff --git a/src/nnet3/nnet-diagnostics.h b/src/nnet3/nnet-diagnostics.h index 59f0cd16f47..a333f0ac6fe 100644 --- a/src/nnet3/nnet-diagnostics.h +++ b/src/nnet3/nnet-diagnostics.h @@ -38,11 +38,17 @@ struct SimpleObjectiveInfo { tot_objective(0.0) { } }; +struct PerDimObjectiveInfo : SimpleObjectiveInfo { + Vector tot_weight_vec; + Vector tot_objective_vec; + PerDimObjectiveInfo(): SimpleObjectiveInfo() { } +}; struct NnetComputeProbOptions { bool debug_computation; bool compute_deriv; bool compute_accuracy; + bool compute_per_dim_accuracy; bool apply_deriv_weights; NnetOptimizeOptions optimize_config; @@ -51,6 +57,7 @@ struct NnetComputeProbOptions { debug_computation(false), compute_deriv(false), compute_accuracy(true), + compute_per_dim_accuracy(false), apply_deriv_weights(true) { } void Register(OptionsItf *opts) { // compute_deriv is not included in the command line options @@ -59,6 +66,8 @@ struct NnetComputeProbOptions { "debug for the actual computation (very verbose!)"); opts->Register("compute-accuracy", &compute_accuracy, "If true, compute " "accuracy values as well as objective functions"); + opts->Register("compute-per-dim-accuracy", &compute_per_dim_accuracy, + "If true, compute accuracy values per-dim"); opts->Register("apply-deriv-weights", &apply_deriv_weights, "Apply per-frame deriv weights"); @@ -128,7 +137,7 @@ class NnetComputeProb { unordered_map objf_info_; - unordered_map accuracy_info_; + unordered_map accuracy_info_; }; @@ -164,7 +173,9 @@ void ComputeAccuracy(const GeneralMatrix &supervision, const CuMatrixBase &nnet_output, BaseFloat *tot_weight, BaseFloat *tot_accuracy, - const Vector *deriv_weights = NULL); + const Vector *deriv_weights = NULL, + Vector *tot_weight_vec = NULL, + Vector *tot_accuracy_vec = NULL); } // namespace nnet3 From a5d78816f06aa279417feebef92ccacab33ba13d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:34:16 -0500 Subject: [PATCH 186/530] sar_diarization: Minor bug fix in ../egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py --- egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py b/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py index 72b0cb4edd3..8e6f1442c7a 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py +++ b/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py @@ -741,7 +741,7 @@ def generate_training_examples_internal(dir, targets_parameters, feat_dir, out_egs_handle = open("{0}/egs.scp".format(dir), 'w') for i in range(1, num_archives_intermediate + 1): for line in open("{0}/egs.{1}.scp".format(dir, i)): - print (line, file=out_egs_handle) + print (line.strip(), file=out_egs_handle) out_egs_handle.close() else: # there are intermediate archives so we shuffle egs across jobs @@ -782,7 +782,7 @@ def generate_training_examples_internal(dir, targets_parameters, feat_dir, for i in range(1, num_archives_intermediate + 1): for j in range(1, archives_multiple + 1): for line in open("{0}/egs.{1}.{2}.scp".format(dir, i, j)): - print (line, file=out_egs_handle) + print (line.strip(), file=out_egs_handle) out_egs_handle.close() cleanup(dir, archives_multiple) From 894279b07c2e026913555dd8db5b58545aaba406 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:35:03 -0500 Subject: [PATCH 187/530] asr_diarization: Some deep restructuring to decode and segmentation --- .../segmentation/decode_sad_to_segments.sh | 7 ++- .../segmentation/do_segmentation_data_dir.sh | 51 +++++++++---------- .../internal/convert_ali_to_vad.sh | 2 +- .../post_process_sad_to_subsegments.sh | 21 ++++++-- 4 files changed, 45 insertions(+), 36 deletions(-) mode change 100644 => 100755 egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh diff --git a/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh b/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh index 8f4ed60dfda..de8ab0d90e8 100755 --- a/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh +++ b/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh @@ -88,10 +88,9 @@ if [ $stage -le 6 ]; then 1 0 2 1 EOF - steps/segmentation/post_process_sad_to_segments.sh \ - --phone2sad-map $lang/phone2sad_map \ - --ali-suffix "" --segmentation-config $segmentation_config \ + steps/segmentation/post_process_sad_to_subsegments.sh \ + --segmentation-config $segmentation_config \ --frame-subsampling-factor $frame_subsampling_factor \ - $data $lang $dir $dir $out_data + $data $lang/phone2sad_map $dir $dir $out_data fi diff --git a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh index 9feb421ccd3..c1e690af366 100755 --- a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh +++ b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh @@ -36,7 +36,10 @@ do_downsampling=false # Segmentation configs min_silence_duration=30 min_speech_duration=30 +sil_prior=0.5 +speech_prior=0.5 segmentation_config=conf/segmentation_speech.conf +convert_data_dir_to_whole=true echo $* @@ -66,33 +69,27 @@ export PATH="$KALDI_ROOT/tools/sph2pipe_v2.5/:$PATH" whole_data_dir=${sad_dir}/${data_id}_whole -if [ $stage -le 0 ]; then - utils/data/convert_data_dir_to_whole.sh $src_data_dir ${whole_data_dir} - - if $do_downsampling; then - freq=`cat $mfcc_config | perl -pe 's/\s*#.*//g' | grep "sample-frequency=" | awk -F'=' '{if (NF == 0) print 16000; else print $2}'` - sox=`which sox` - - cat $src_data_dir/wav.scp | python -c "import sys -for line in sys.stdin.readlines(): - splits = line.strip().split() - if splits[-1] == '|': - out_line = line.strip() + ' $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |' - else: - out_line = 'cat {0} {1} | $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |'.format(splits[0], ' '.join(splits[1:])) - print (out_line)" > ${whole_data_dir}/wav.scp - fi - - utils/copy_data_dir.sh ${whole_data_dir} ${whole_data_dir}${feat_affix}_hires -fi +if $convert_data_dir_to_whole; then + if [ $stage -le 0 ]; then + utils/data/convert_data_dir_to_whole.sh $src_data_dir ${whole_data_dir} + + if $do_downsampling; then + freq=`cat $mfcc_config | perl -pe 's/\s*#.*//g' | grep "sample-frequency=" | awk -F'=' '{if (NF == 0) print 16000; else print $2}'` + utils/data/downsample_data_dir.sh $freq $whole_data_dir + fi -test_data_dir=${whole_data_dir}${feat_affix}_hires + utils/copy_data_dir.sh ${whole_data_dir} ${whole_data_dir}${feat_affix}_hires + fi -if [ $stage -le 1 ]; then - steps/make_mfcc.sh --mfcc-config $mfcc_config --nj $reco_nj --cmd "$train_cmd" \ - ${whole_data_dir}${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} $mfcc_dir - steps/compute_cmvn_stats.sh ${whole_data_dir}${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} $mfcc_dir - utils/fix_data_dir.sh ${whole_data_dir}${feat_affix}_hires + if [ $stage -le 1 ]; then + steps/make_mfcc.sh --mfcc-config $mfcc_config --nj $reco_nj --cmd "$train_cmd" \ + ${whole_data_dir}${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} $mfcc_dir + steps/compute_cmvn_stats.sh ${whole_data_dir}${feat_affix}_hires exp/make_hires/${data_id}_whole${feat_affix} $mfcc_dir + utils/fix_data_dir.sh ${whole_data_dir}${feat_affix}_hires + fi + test_data_dir=${whole_data_dir}${feat_affix}_hires +else + test_data_dir=$src_data_dir fi post_vec=$sad_nnet_dir/post_${output_name}.vec @@ -118,6 +115,8 @@ if [ $stage -le 3 ]; then --frame-subsampling-factor $frame_subsampling_factor \ --min-silence-duration $min_silence_duration \ --min-speech-duration $min_speech_duration \ + --sil-prior $sil_prior \ + --speech-prior $speech_prior \ --segmentation-config $segmentation_config --cmd "$train_cmd" \ ${test_data_dir} $sad_dir $seg_dir ${data_dir}_seg fi @@ -125,7 +124,7 @@ fi # Subsegment data directory if [ $stage -le 4 ]; then rm ${data_dir}_seg/feats.scp || true - utils/data/get_reco2num_frames.sh ${test_data_dir} + utils/data/get_reco2num_frames.sh --cmd "$train_cmd" --nj $reco_nj ${test_data_dir} awk '{print $1" "$2}' ${data_dir}_seg/segments | \ utils/apply_map.pl -f 2 ${test_data_dir}/reco2num_frames > \ ${data_dir}_seg/utt2max_frames diff --git a/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh b/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh index 353e6d4664e..234b5020797 100755 --- a/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh +++ b/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh @@ -13,7 +13,7 @@ frame_subsampling_factor=1 . parse_options.sh -if [ $# -ne 4 ]; then +if [ $# -ne 3 ]; then echo "This script converts the alignment in the alignment directory " echo "to speech activity segments based on the provided phone-map." echo "Usage: $0 exp/tri3_ali data/lang/phones/sad.map exp/tri3_ali_vad" diff --git a/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh b/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh old mode 100644 new mode 100755 index 8cfcaa40cda..0ca6b3dd126 --- a/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh +++ b/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh @@ -12,6 +12,7 @@ stage=-10 segmentation_config=conf/segmentation.conf nj=18 +frame_subsampling_factor=1 frame_shift=0.01 . utils/parse_options.sh @@ -39,26 +40,36 @@ if [ $stage -le 0 ]; then $cmd JOB=1:$nj $dir/log/segmentation.JOB.log \ segmentation-init-from-ali \ "ark:gunzip -c $vad_dir/ali.JOB.gz |" ark:- \| \ - segmentation-copy --label-map=$phone2sad_map ark:- \ + segmentation-copy --label-map=$phone2sad_map \ + --frame-subsampling-factor=$frame_subsampling_factor ark:- \ "ark:| gzip -c > $dir/orig_segmentation.JOB.gz" fi echo $nj > $dir/num_jobs +# Create a temporary directory into which we can create the new segments +# file. if [ $stage -le 1 ]; then rm -r $segmented_data_dir || true utils/data/convert_data_dir_to_whole.sh $data_dir $segmented_data_dir || exit 1 rm $segmented_data_dir/text || true fi -steps/segmentation/internal/post_process_segments.sh \ - --stage $stage --cmd "$cmd" \ - --config $segmentation_config --frame-shift $frame_shift \ - $data_dir $dir $segmented_data_dir +if [ $stage -le 2 ]; then + steps/segmentation/internal/post_process_segments.sh \ + --stage $stage --cmd "$cmd" \ + --config $segmentation_config --frame-shift $frame_shift \ + $data_dir $dir $segmented_data_dir +fi mv $segmented_data_dir/segments $segmented_data_dir/sub_segments utils/data/subsegment_data_dir.sh $data_dir $segmented_data_dir/sub_segments $segmented_data_dir +utils/data/get_reco2num_frames.sh ${data_dir} +mv $segmented_data_dir/feats.scp $segmented_data_dir/feats.scp.tmp +cat $segmented_data_dir/segments | utils/apply_map.pl -f 2 $data_dir/reco2num_frames > $segmetned_data_dir/utt2max_frames +cat $segmented_data_dir/feats.scp.tmp | utils/data/fix_subsegmented_feats.pl $dsegmented_data_dir/utt2max_frames > $segmented_data_dir/feats.scp + utils/utt2spk_to_spk2utt.pl $segmented_data_dir/utt2spk > $segmented_data_dir/spk2utt || exit 1 utils/fix_data_dir.sh $segmented_data_dir From 73eb9431d868336f1fff761b8fc33fcf2f310b9b Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:35:36 -0500 Subject: [PATCH 188/530] asr_diarization: Bug fix in get_reco2num_frames.sh --- egs/wsj/s5/utils/data/get_reco2num_frames.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/utils/data/get_reco2num_frames.sh b/egs/wsj/s5/utils/data/get_reco2num_frames.sh index 03ab7b40616..8df5afdb156 100755 --- a/egs/wsj/s5/utils/data/get_reco2num_frames.sh +++ b/egs/wsj/s5/utils/data/get_reco2num_frames.sh @@ -20,7 +20,7 @@ if [ -f $data/reco2num_frames ]; then exit 0; fi -utils/data/get_reco2dur.sh $data +utils/data/get_reco2dur.sh --cmd "$cmd" --nj $nj $data awk -v fs=$frame_shift -v fovlp=$frame_overlap \ '{print $1" "int( ($2 - fovlp) / fs)}' $data/reco2dur > $data/reco2num_frames From 017774350ff1b4b5af05c6139a7474dedff0f6f3 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:35:56 -0500 Subject: [PATCH 189/530] asr_diarization: Relax some errors in normalize_data_range --- egs/wsj/s5/utils/data/normalize_data_range.pl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/egs/wsj/s5/utils/data/normalize_data_range.pl b/egs/wsj/s5/utils/data/normalize_data_range.pl index f7936d98a31..a7a144fd82e 100755 --- a/egs/wsj/s5/utils/data/normalize_data_range.pl +++ b/egs/wsj/s5/utils/data/normalize_data_range.pl @@ -45,14 +45,13 @@ sub combine_ranges { # though they are supported at the C++ level. if ($start1 eq "" || $start2 eq "" || $end1 eq "" || $end2 == "") { chop $line; - print("normalize_data_range.pl: could not make sense of line $line\n"); + print STDERR ("normalize_data_range.pl: could not make sense of line $line\n"); exit(1) } if ($start1 + $end2 > $end1) { chop $line; - print("normalize_data_range.pl: could not make sense of line $line " . + print STDERR ("normalize_data_range.pl: could not make sense of line $line " . "[second $row_or_column range too large vs first range, $start1 + $end2 > $end1]\n"); - exit(1); } return ($start2+$start1, $end2+$start1); } @@ -72,7 +71,7 @@ sub combine_ranges { # sometimes in scp files, we use the command concat-feats to splice together # two feature matrices. Handling this correctly is complicated and we don't # anticipate needing it, so we just refuse to process this type of data. - print "normalize_data_range.pl: this script cannot [yet] normalize the data ranges " . + print STDERR "normalize_data_range.pl: this script cannot [yet] normalize the data ranges " . "if concat-feats was in the input data\n"; exit(1); } From a638ccad32b7baaedddb5c984e61ac938db0d136 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:37:34 -0500 Subject: [PATCH 190/530] asr_diarization: more tuning scripts for music detection --- .../tuning/train_stats_sad_music_1a.sh | 0 .../tuning/train_stats_sad_music_1b.sh | 0 .../tuning/train_stats_sad_music_1c.sh | 0 .../tuning/train_stats_sad_music_1d.sh | 0 .../tuning/train_stats_sad_music_1e.sh | 0 .../tuning/train_stats_sad_music_1f.sh | 227 +++++++++++++++++ .../tuning/train_stats_sad_music_1g.sh | 234 ++++++++++++++++++ 7 files changed, 461 insertions(+) mode change 100644 => 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1a.sh mode change 100644 => 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1b.sh mode change 100644 => 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1c.sh mode change 100644 => 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1d.sh mode change 100644 => 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1e.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1f.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1g.sh diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1a.sh old mode 100644 new mode 100755 diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1b.sh old mode 100644 new mode 100755 diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1c.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1c.sh old mode 100644 new mode 100755 diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1d.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1d.sh old mode 100644 new mode 100755 diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1e.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1e.sh old mode 100644 new mode 100755 diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1f.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1f.sh new file mode 100755 index 00000000000..0afdd0072ac --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1f.sh @@ -0,0 +1,227 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1e, but removes the stats component. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=79 # Maximum left context in egs apart from TDNN's left context +extra_right_context=11 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +num_utts_subset_valid=50 # "utts" is actually recording. So this is prettly small. +num_utts_subset_train=50 + +# target options +train_data_dir=data/train_aztec_small_unsad_whole_all_corrupted_sp_hires_bp + +speech_feat_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/speech_feat.scp +deriv_weights_scp=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400/deriv_weights.scp +music_labels_scp=data/train_aztec_small_unsad_whole_music_corrupted_sp_hires_bp/music_labels.scp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$train_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-9, tdnn1@-3, tdnn1, tdnn1@3) dim=256 add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-27, tdnn2@-9, tdnn2, tdnn2@9) dim=256 add-log-stddev=true + + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn3 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` +speech_data_dir=$dir/`basename $train_data_dir`_speech +music_data_dir=$dir/`basename $train_data_dir`_music + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + + . $dir/configs/vars + + utils/subset_data_dir.sh --utt-list $speech_feat_scp ${train_data_dir} $dir/`basename ${train_data_dir}`_speech + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$speech_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$speech_feat_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + utils/subset_data_dir.sh --utt-list $music_labels_scp ${train_data_dir} $dir/`basename ${train_data_dir}`_music + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + steps/nnet3/multilingual/get_egs.sh \ + --minibatch-size $[chunk_width * num_chunk_per_minibatch] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$train_data_dir \ + --targets-scp="$speech_feat_scp" \ + --dir=$dir || exit 1 +fi + +if [ $stage -le 6 ]; then + $train_cmd JOB=1:100 $dir/log/compute_post_output-speech.JOB.log \ + extract-column "scp:utils/split_scp.pl -j 100 \$[JOB-1] $speech_feat_scp |" ark,t:- \| \ + steps/segmentation/quantize_vector.pl \| \ + ali-to-post ark,t:- ark:- \| \ + weight-post ark:- scp:$deriv_weights_scp ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-speech.vec.JOB + eval vector-sum $dir/post_output-speech.vec.{`seq -s, 100`} $dir/post_output-speech.vec + + $train_cmd JOB=1:100 $dir/log/compute_post_output-music.JOB.log \ + ali-to-post "scp:utils/split_scp.pl -j 100 \$[JOB-1] $music_labels_scp |" ark:- \| \ + post-to-feats --post-dim=2 ark:- ark:- \| \ + matrix-sum-rows ark:- ark:- \| \ + vector-sum ark:- $dir/post_output-music.vec.JOB + eval vector-sum $dir/post_output-music.vec.{`seq -s, 100`} $dir/post_output-music.vec +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1g.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1g.sh new file mode 100755 index 00000000000..e411b94c893 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_1g.sh @@ -0,0 +1,234 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1e, but removes the stats component in the 3rd layer. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=20 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=79 # Maximum left context in egs apart from TDNN's left context +extra_right_context=11 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +music_data_dir=data/train_aztec_unsad_whole_music_corrupted_sp_hires_bp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 add-log-stddev=true + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-9, tdnn1@-3, tdnn1, tdnn1@3, tdnn2_stats) dim=256 add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-27, tdnn2@-9, tdnn2, tdnn2@9) dim=256 add-log-stddev=true + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn3 + output-layer name=output-speech include-log-softmax=true dim=2 input=tdnn3 objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25)"` + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn3 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_labels_scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi + From 47bf4fd5aa4ee972a7195f904cfca6eb9fed7141 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:38:01 -0500 Subject: [PATCH 191/530] asr_diarization: Add more tuning scripts for sad overlap --- .../tuning/train_lstm_overlapping_sad_1b.sh | 262 +++++++++++++++++ .../tuning/train_lstm_stats_overlap_1a.sh | 202 +++++++++++++ .../tuning/train_lstm_stats_sad_overlap_1a.sh | 259 ++++++++++++++++ .../train_lstm_stats_sad_overlap_ami_1a.sh | 192 ++++++++++++ .../train_lstm_stats_sad_overlap_ami_1b.sh | 192 ++++++++++++ .../tuning/train_rnn_overlap_1a.sh | 184 ++++++++++++ .../tuning/train_rnn_overlap_1b.sh | 184 ++++++++++++ .../tuning/train_stats_overlap_1f.sh | 200 +++++++++++++ .../tuning/train_stats_overlap_1g.sh | 202 +++++++++++++ .../tuning/train_stats_overlap_1h.sh | 202 +++++++++++++ .../tuning/train_stats_overlap_1i.sh | 202 +++++++++++++ .../tuning/train_stats_sad_overlap_1a.sh | 0 .../tuning/train_stats_sad_overlap_1b.sh | 53 ++-- .../tuning/train_stats_sad_overlap_1c.sh | 239 +++++++++++++++ .../tuning/train_stats_sad_overlap_1d.sh | 262 +++++++++++++++++ .../tuning/train_stats_sad_overlap_1f.sh | 272 +++++++++++++++++ .../tuning/train_stats_sad_overlap_1g.sh | 275 +++++++++++++++++ .../tuning/train_stats_sad_overlap_1h.sh | 276 ++++++++++++++++++ 18 files changed, 3632 insertions(+), 26 deletions(-) create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_lstm_overlapping_sad_1b.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_overlap_1a.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_1a.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1a.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1b.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1a.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1b.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1f.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1g.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1h.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1i.sh mode change 100644 => 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh mode change 100644 => 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1c.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1d.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1f.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1g.sh create mode 100755 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1h.sh diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_overlapping_sad_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_overlapping_sad_1b.sh new file mode 100755 index 00000000000..a634060b317 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_overlapping_sad_1b.sh @@ -0,0 +1,262 @@ +#!/bin/bash + +# This is a script to train a LSTM for overlapped speech activity detection +# and SAD. This uses a larger LSTM-TDNN architecture and trains on +# ternary overlapping SAD labels. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=80 # Maximum left context in egs apart from TDNN's left context +extra_right_context=40 # Maximum right context in egs apart from TDNN's right context + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_sad_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_sad_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 input=Append(input@-2, input@-1, input, input@1, input@2) dim=256 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=256 + fast-lstmp-layer name=lstm1 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=256 + fast-lstmp-layer name=lstm2 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-6 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=$speech_scale input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.05 + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=lstm2 max-change=0.75 learning-rate-factor=0.5 + + output-layer name=output-overlapping_sad include-log-softmax=true dim=3 objective-scale=$ovlp_scale input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapping_sad.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_sad_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapping_sad --target-type=sparse --dim=3 --targets-scp=$ovlp_sad_data_dir/overlapping_sad_labels_fixed.scp --deriv-weights-scp=$ovlp_sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_overlap_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_overlap_1a.sh new file mode 100755 index 00000000000..adc4fc81c08 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_overlap_1a.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This scripts is similar to 1f but adds max-change=0.75 and learning-rate-factor=0.02 to the final affine. +# And changed relu-dim to 512. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=512 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=512 + relu-renorm-layer name=tdnn4 dim=512 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_1a.sh new file mode 100755 index 00000000000..52a15686d28 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_1a.sh @@ -0,0 +1,259 @@ +#!/bin/bash + +# This is a script to train a LSTM for overlapped speech activity detection +# and SAD. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 # Maximum left context in egs apart from TDNN's left context +extra_right_context=0 # Maximum right context in egs apart from TDNN's right context + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 input=Append(input@-2, input@-1, input, input@1, input@2) dim=512 + lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=512 + lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-6 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=$speech_scale input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.05 + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=lstm2 max-change=0.75 learning-rate-factor=0.5 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 objective-scale=$ovlp_scale input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1a.sh new file mode 100644 index 00000000000..d003f746c4b --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1a.sh @@ -0,0 +1,192 @@ +#!/bin/bash + +# This is a script to train a LSTM for overlapped speech activity detection +# and SAD. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=128 + +extra_left_context=40 # Maximum left context in egs apart from TDNN's left context +extra_right_context=0 # Maximum right context in egs apart from TDNN's right context + +# training options +num_epochs=8 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +data_dir=data/ami_sdm1_train_whole_hires_bp +labels_scp=exp/sad_ami_sdm1_train/ref/overlapping_sad_labels.scp +deriv_weights_scp=exp/sad_ami_sdm1_train/ref/deriv_weights_for_overlapping_sad.scp + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); $n = ($n > 4000 ? 4000 : $n); print ($n < 6 ? 6 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); $n = ($n > 4000 ? 4000 : $n); print ($n < 6 ? 6 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_ovlp_sad_ami/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 input=Append(input@-2, input@-1, input, input@1, input@2) dim=256 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=256 + lstmp-layer name=lstm1 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6) dim=256 + lstmp-layer name=lstm2 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-6 + + output-layer name=output-overlapping_sad include-log-softmax=true dim=3 input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapping_sad.txt learning-rate-factor=0.05 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapping_sad new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_overlapping_sad + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_overlapping_sad/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_overlapping_sad/storage $dir/egs_overlapping_sad/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-overlapping_sad --target-type=sparse --dim=3 --targets-scp=$labels_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"ali-to-post scp:- ark: |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_overlapping_sad + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=false --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$data_dir \ + --targets-scp="$labels_scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1b.sh new file mode 100644 index 00000000000..3aa4f28f99a --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_stats_sad_overlap_ami_1b.sh @@ -0,0 +1,192 @@ +#!/bin/bash + +# This is a script to train a LSTM for overlapped speech activity detection +# and SAD. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=128 + +extra_left_context=40 # Maximum left context in egs apart from TDNN's left context +extra_right_context=0 # Maximum right context in egs apart from TDNN's right context + +# training options +num_epochs=8 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +data_dir=data/ami_sdm1_train_whole_hires_bp +labels_scp=exp/sad_ami_sdm1_train/ref/overlapping_sad_labels.scp +deriv_weights_scp=exp/sad_ami_sdm1_train/ref/deriv_weights_for_overlapping_sad.scp + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); $n = ($n > 4000 ? 4000 : $n); print ($n < 6 ? 6 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); $n = ($n > 4000 ? 4000 : $n); print ($n < 6 ? 6 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_ovlp_sad_ami/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 input=Append(input@-2, input@-1, input, input@1, input@2) dim=256 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=256 + lstmp-layer name=lstm1 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6) dim=256 + lstmp-layer name=lstm2 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-6 + + output-layer name=output-overlapping_sad include-log-softmax=true dim=3 input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapping_sad.txt learning-rate-factor=0.05 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapping_sad new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_overlapping_sad + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_overlapping_sad/storage ]; then + utils/create_split_dir.pl \ + /export/b{01,02,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_overlapping_sad/storage $dir/egs_overlapping_sad/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-overlapping_sad --target-type=sparse --dim=3 --targets-scp=$labels_scp --deriv-weights-scp=$deriv_weights_scp --scp2ark-cmd=\"ali-to-post scp:- ark: |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_overlapping_sad + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=false --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$data_dir \ + --targets-scp="$labels_scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1a.sh new file mode 100755 index 00000000000..e63c5d8a063 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1a.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +# This is a script to train a lstm for overlapped speech activity detection. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 # Maximum left context in egs apart from TDNN's left context +extra_right_context=0 # Maximum right context in egs apart from TDNN's right context + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 dim=256 input=Append(input@-2, input@-1, input, input@1, input@2) + lstmp-layer name=lstm1 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=256 + lstmp-layer name=lstm2 cell-dim=256 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-6 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1b.sh new file mode 100755 index 00000000000..15235882f90 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_rnn_overlap_1b.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +# This is a script to train a LSTM for overlapped speech activity detection. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 # Maximum left context in egs apart from TDNN's left context +extra_right_context=0 # Maximum right context in egs apart from TDNN's right context + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-2,-1,0,1,2) + + relu-renorm-layer name=tdnn1 dim=512 input=Append(input@-2, input@-1, input, input@1, input@2) + lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=512 + lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-6 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=lstm2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1f.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1f.sh new file mode 100755 index 00000000000..2201f9fd8d1 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1f.sh @@ -0,0 +1,200 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1g.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1g.sh new file mode 100755 index 00000000000..81febb5fa09 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1g.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This scripts is similar to 1f but adds max-change=0.75 and learning-rate-factor=0.1 to the final affine. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.1 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1h.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1h.sh new file mode 100755 index 00000000000..adc4fc81c08 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1h.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This scripts is similar to 1f but adds max-change=0.75 and learning-rate-factor=0.02 to the final affine. +# And changed relu-dim to 512. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=512 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=512 + relu-renorm-layer name=tdnn4 dim=512 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1i.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1i.sh new file mode 100755 index 00000000000..dcd11ad2aa6 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_overlap_1i.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This scripts is similar to 1f but adds max-change=0.75 and learning-rate-factor=0.02 to the final affine. +# Similar to 1g but moved stats pooling to higher layer. Changed splicing to -12 from -9. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=90 # Maximum left context in egs apart from TDNN's left context +extra_right_context=15 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-overlapped_speech ark:- ark:- |" # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=f + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_ovlp/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$ovlp_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=512 + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1) dim=512 + stats-layer name=tdnn3_stats config=mean+count(-96:6:12:96) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-12,tdnn2,tdnn2@6, tdnn3_stats) dim=512 + relu-renorm-layer name=tdnn4 dim=512 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.02 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-overlapped_speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_ovlp + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$ovlp_data_dir \ + --targets-scp="$ovlp_data_dir/overlapped_spech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1a.sh old mode 100644 new mode 100755 diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh old mode 100644 new mode 100755 index 888c25295d6..b562a83f6c3 --- a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1b.sh @@ -1,10 +1,10 @@ #!/bin/bash -# This is a script to train a time-delay neural network for overlapped speech activity detection +# This is a script to train a time-delay neural network for overlapped speech activity detection # using statistic pooling component for long-context information. set -o pipefail -set -e +set -e set -u . cmd.sh @@ -21,17 +21,19 @@ egs_opts= # Directly passed to get_egs_multiple_targets.py # TDNN options relu_dim=256 chunk_width=40 # We use chunk training for training TDNN -extra_left_context=100 # Maximum left context in egs apart from TDNN's left context -extra_right_context=20 # Maximum right context in egs apart from TDNN's right context +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context # We randomly select an extra {left,right} context for each job between # min_extra_*_context and extra_*_context so that the network can get used # to different contexts used to compute statistics. -min_extra_left_context=20 +min_extra_left_context=20 min_extra_right_context=0 # training options -num_epochs=1 +num_epochs=2 initial_effective_lrate=0.0003 final_effective_lrate=0.00003 num_jobs_initial=3 @@ -44,7 +46,7 @@ extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp -#extra_left_context=79 +#extra_left_context=79 #extra_right_context=11 egs_dir= @@ -70,13 +72,13 @@ fi dir=$dir${affix:+_$affix} if ! cuda-compiled; then - cat < $dir/configs/network.xconfig input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input output name=output-temp input=Append(-3,-2,-1,0,1,2,3) - + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 - relu-renorm-layer name=pre-final-speech dim=256 input=tdnn3 - output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print ($num_frames_ovlp / $num_frames_sad) ** 0.25"` + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print ($num_frames_ovlp / $num_frames_sad) ** 0.25"` input=tdnn4 - relu-renorm-layer name=pre-final-snr dim=256 input=tdnn3 - output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_ovlp / $num_frames_sad) ** 0.25) / $num_snr_bins"` + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_ovlp / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn4 - relu-renorm-layer name=pre-final-overlapped_speech dim=256 input=tdnn3 - output-layer name=output-overlapped_speech include-log-softmax=true dim=2 + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ --config-dir $dir/configs/ \ --nnet-edits="rename-node old-name=output-speech new-name=output" - + cat <> $dir/configs/vars add_lda=false EOF @@ -148,11 +148,11 @@ if [ -z "$egs_dir" ]; then if [ $stage -le 2 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then utils/create_split_dir.pl \ - /export/b{03,04,05,06}/$USER/kaldi-data/egs_speech/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage fi - + . $dir/configs/vars - + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ $egs_opts \ --feat.dir="$sad_data_dir" \ @@ -177,7 +177,7 @@ if [ -z "$egs_dir" ]; then fi . $dir/configs/vars - + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ $egs_opts \ --feat.dir="$ovlp_data_dir" \ @@ -196,10 +196,11 @@ if [ -z "$egs_dir" ]; then fi if [ $stage -le 4 ]; then - # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use # the same egs with a different num_chunk_per_minibatch steps/nnet3/multilingual/get_egs.sh \ - --minibatch-size $[chunk_width * num_chunk_per_minibatch * 4] \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ --samples-per-iter $samples_per_iter \ 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi fi @@ -233,7 +234,7 @@ if [ $stage -le 5 ]; then --use-gpu=true \ --use-dense-targets=false \ --feat-dir=$sad_data_dir \ - --targets-scp="$speech_feat_scp" \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ --dir=$dir || exit 1 fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1c.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1c.sh new file mode 100755 index 00000000000..7041b0b3e9b --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1c.sh @@ -0,0 +1,239 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print ($num_frames_ovlp / $num_frames_sad) ** 0.25"` input=tdnn4 + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_ovlp / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn4 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1d.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1d.sh new file mode 100755 index 00000000000..a361435baa1 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1d.sh @@ -0,0 +1,262 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=d + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print ($num_frames_ovlp / $num_frames_sad) ** 0.25"` input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_ovlp / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn4 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1f.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1f.sh new file mode 100755 index 00000000000..7048c40f62b --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1f.sh @@ -0,0 +1,272 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=d + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=$speech_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=tdnn4 max-change=0.75 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 objective-scale=$ovlp_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1g.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1g.sh new file mode 100755 index 00000000000..72e26b5347b --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1g.sh @@ -0,0 +1,275 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This script is same as 1e but adds max-change=0.75 for snr and overlapped_speech outputs +# and learning rate factor 0.1 for the final affine components. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +relu_dim=256 +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=g + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=256 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=256 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=256 + relu-renorm-layer name=tdnn4 dim=256 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=$speech_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=tdnn4 max-change=0.75 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 objective-scale=$ovlp_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.1 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1h.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1h.sh new file mode 100755 index 00000000000..fb1616b9ac7 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_overlap_1h.sh @@ -0,0 +1,276 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for overlapped speech activity detection +# using statistic pooling component for long-context information. + +# This script is same as 1e but adds max-change=0.75 for snr and overlapped_speech outputs +# and learning rate factor 0.01 for the final affine components. +# Decreased learning rate factor of overlapped speech to 0.025 and 0.05 for speech. +# Changed relu-dim to 512 + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +# TDNN options +chunk_width=40 # We use chunk training for training TDNN +num_chunk_per_minibatch=64 + +extra_left_context=100 # Maximum left context in egs apart from TDNN's left context +extra_right_context=20 # Maximum right context in egs apart from TDNN's right context + +# We randomly select an extra {left,right} context for each job between +# min_extra_*_context and extra_*_context so that the network can get used +# to different contexts used to compute statistics. +min_extra_left_context=20 +min_extra_right_context=0 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +ovlp_data_dir=data/train_aztec_unsad_seg_ovlp_corrupted_hires_bp + +#extra_left_context=79 +#extra_right_context=11 + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=g + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $ovlp_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_ovlp_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + output name=output-temp input=Append(-3,-2,-1,0,1,2,3) + + relu-renorm-layer name=tdnn1 input=Append(input@-3, input@-2, input@-1, input, input@1, input@2, input@3) dim=512 + stats-layer name=tdnn2_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn2 input=Append(tdnn1@-6, tdnn1, tdnn2_stats) dim=512 + relu-renorm-layer name=tdnn3 input=Append(-9,0,3) dim=512 + relu-renorm-layer name=tdnn4 dim=512 + + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=$speech_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.05 + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=tdnn4 max-change=0.75 learning-rate-factor=0.5 + + output-layer name=output-overlapped_speech include-log-softmax=true dim=2 objective-scale=$ovlp_scale input=tdnn4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-overlapped_speech.txt max-change=0.75 learning-rate-factor=0.025 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_ovlp/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_ovlp/storage $dir/egs_ovlp/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$ovlp_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/speech_feat.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\"" \ + --targets-parameters="--output-name=output-overlapped_speech --target-type=sparse --dim=2 --targets-scp=$ovlp_data_dir/overlapped_speech_labels_fixed.scp --deriv-weights-scp=$ovlp_data_dir/deriv_weights_for_overlapped_speech.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\"" \ + --generate-egs-scp=true \ + --dir=$dir/egs_ovlp + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_ovlp $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.min-chunk-left-context=$min_extra_left_context \ + --trainer.min-chunk-right-context=$min_extra_right_context \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi From e738191624bce6d8a79de7e410121a59ae89af1b Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:38:28 -0500 Subject: [PATCH 192/530] asr_diarization: Modify overlapping sad recipe for AMI --- ...o_corruption_data_dir_overlapped_speech.sh | 38 ++- .../prepare_unsad_overlapped_speech_data.sh | 67 ++++- ...are_unsad_overlapped_speech_data_simple.sh | 156 ++++++++++ .../segmentation/run_segmentation_ami.sh | 270 ++++++++++++++++-- 4 files changed, 494 insertions(+), 37 deletions(-) create mode 100755 egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data_simple.sh diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh index aa1d9adc3e9..991bec96308 100755 --- a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_overlapped_speech.sh @@ -126,8 +126,44 @@ else corrupted_data_dir=${corrupted_data_dir}_$feat_suffix fi -exit 0 +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d log_energy/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/log_energy/storage log_energy/storage +fi + +if [ $stage -le 5 ]; then + utils/copy_data_dir.sh $clean_data_dir ${clean_data_dir}_log_energy + steps/make_mfcc.sh --mfcc-config conf/log_energy.conf \ + --cmd "$cmd" --nj $nj ${clean_data_dir}_log_energy \ + exp/make_log_energy/${clean_data_id} log_energy +fi + +if [ $stage -le 6 ]; then + utils/copy_data_dir.sh $noise_data_dir ${noise_data_dir}_log_energy + steps/make_mfcc.sh --mfcc-config conf/log_energy.conf \ + --cmd "$cmd" --nj $nj ${noise_data_dir}_log_energy \ + exp/make_log_energy/${noise_data_id} log_energy +fi + +targets_dir=log_snr +if [ $stage -le 7 ]; then + mkdir -p exp/make_log_snr/${corrupted_data_id} + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $targets_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$targets_dir/storage $targets_dir/storage + fi + + # Get log-SNR targets + steps/segmentation/make_snr_targets.sh \ + --nj $nj --cmd "$cmd" \ + --target-type Snr --compress false \ + ${clean_data_dir}_log_energy ${noise_data_dir}_log_energy ${corrupted_data_dir} \ + exp/make_log_snr/${corrupted_data_id} $targets_dir +fi + +exit 0 + if [ $stage -le 5 ]; then # clean here is the reverberated first-speaker signal utils/copy_data_dir.sh $clean_data_dir ${clean_data_dir}_$feat_suffix diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data.sh index 36eb4de2afe..6d21859d7fe 100755 --- a/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data.sh +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data.sh @@ -12,13 +12,14 @@ set -o pipefail num_data_reps=5 nj=40 cmd=queue.pl +snr_db_threshold=10 stage=-1 . utils/parse_options.sh if [ $# -ne 5 ]; then echo "Usage: $0 " - echo " e.g.: $0 data/fisher_train_100k_sp_75k_hires_bp data/fisher_train_100k_sp_75k/overlapped_segments_info.txt exp/unsad/make_unsad_fisher_train_100k_sp/tri4_ali_fisher_train_100k_sp_vad_fisher_train_100k_sp exp/unsad/make_overlap_labels/fisher_train_100k_sp_75k overlap_labels" + echo " e.g.: $0 data/fisher_train_100k_sp_75k_seg_ovlp_corrupted_hires_bp data/fisher_train_100k_sp_75k_seg_ovlp_corrupted exp/unsad/make_unsad_fisher_train_100k/tri4a_ali_fisher_train_100k_sp_vad_fisher_train_100k_sp exp/unsad overlap_labels" exit 1 fi @@ -56,6 +57,15 @@ if [ $stage -le 1 ]; then utils/data/get_utt2num_frames.sh $corrupted_data_dir utils/split_data.sh ${corrupted_data_dir} $nj + # 1) segmentation-init-from-additive-signals-info converts the informtation + # written out but by steps/data/make_corrupted_data_dir.py in overlapped_segments_info.txt + # and converts it to segments. It then adds those segments to the + # segments already present ($corrupted_data_dir/sad_seg.scp) + # 2) Retain only the speech segments (label 1) from these. + # 3) Convert this to overlap stats using segmentation-get-stats, which + # writes for each frame the number of overlapping segments. + # 4) Convert this per-frame "alignment" information to segmentation + # ($overlap_dir/overlap_seg.*.gz). $cmd JOB=1:$nj $overlap_dir/log/get_overlap_seg.JOB.log \ segmentation-init-from-additive-signals-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ --additive-signals-segmentation-rspecifier=scp:$utt_vad_dir/sad_seg.scp \ @@ -69,6 +79,8 @@ if [ $stage -le 1 ]; then fi if [ $stage -le 2 ]; then + # Retain labels >2, i.e. regions where more than 1 speaker overlap. + # Write this out in alignment format as "overlapped_speech_labels" $cmd JOB=1:$nj $overlap_dir/log/get_overlapped_speech_labels.JOB.log \ gunzip -c $overlap_dir/overlap_seg.JOB.gz \| \ segmentation-post-process --remove-labels=0:1 ark:- ark:- \| \ @@ -82,13 +94,15 @@ if [ $stage -le 2 ]; then fi if [ $stage -le 3 ]; then - # First convert the unreliable segments into a segmentation. - # Initialize a segmentation from utt2num_frames and set to 0, the regions - # of unreliable segments. At this stage deriv weights is 1 for all but the - # unreliable segment regions. - # Initialize a segmentation from the overlap labels and retain regions where - # there is speech from at least one speaker. - # Intersect this with the deriv weights segmentation from above. + # 1) Initialize a segmentation where all the frames have label 1 using + # segmentation-init-from-length. + # 2) Use the program segmentation-create-subsegments to set to 0 + # the regions of unreliable segments read from unreliable_seg.*.gz. + # This is the initial deriv weights. At this stage deriv weights is 1 for all + # but the unreliable segment regions. + # 3) Initialize a segmentation from the overlap labels (overlap_seg.*.gz) + # and retain regions where there is speech from at least one speaker. + # 4) Intersect this with the deriv weights segmentation from above. # At this stage deriv weights is 1 for only the regions where there is # at least one speaker and the the overlapping segment is not unreliable. # Convert this to deriv weights. @@ -110,8 +124,8 @@ if [ $stage -le 3 ]; then fi if [ $stage -le 4 ]; then - # Get only first speaker labels as speech_feat as we are not sure of the energy levels of the other speaker. - $cmd JOB=1:$nj $overlap_dir/log/get_first_speaker_labels.JOB.log \ + # Find regions where there is at least one speaker speaking. + $cmd JOB=1:$nj $overlap_dir/log/get_speech_labels.JOB.log \ gunzip -c $overlap_dir/overlap_seg.JOB.gz \| \ segmentation-post-process --remove-labels=0 --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ @@ -125,6 +139,8 @@ if [ $stage -le 4 ]; then fi if [ $stage -le 5 ]; then + # Deriv weights speech / non-speech labels is 1 everywhere but the + # unreliable regions. $cmd JOB=1:$nj $unreliable_dir/log/get_deriv_weights.JOB.log \ utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/utt2num_frames \| \ segmentation-init-from-lengths ark,t:- ark:- \| \ @@ -139,6 +155,37 @@ if [ $stage -le 5 ]; then cat $overlap_labels_dir/deriv_weights_$corrupted_data_id.${n}.scp done > $corrupted_data_dir/deriv_weights.scp fi + +snr_threshold=`perl -e "print $snr_db_threshold / 10.0 * log(10.0)"` + +cat < $overlap_dir/invert_labels.map +0 1 +1 0 +EOF + +if [ $stage -le 6 ]; then + if [ ! -f $corrupted_data_dir/log_snr.scp ]; then + echo "$0: Could not find $corrupted_data_dir/log_snr.scp. Run local/segmentation/do_corruption_data_dir_overlapped_speech.sh." + exit 1 + fi + + $cmd JOB=1:$nj $overlap_dir/log/fix_overlapped_speech_labels.JOB.log \ + copy-matrix --apply-power=1 \ + "scp:utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/log_snr.scp |" \ + ark:- \| extract-column ark:- ark,t:- \| \ + steps/segmentation/quantize_vector.pl $snr_threshold \| \ + segmentation-init-from-ali ark,t:- ark:- \| \ + segmentation-copy --label-map=$overlap_dir/invert_labels.map ark:- ark:- \| \ + segmentation-intersect-segments --mismatch-label=1000 \ + "ark:utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/overlapped_speech_labels.scp | segmentation-init-from-ali scp:- ark:- | segmentation-copy --keep-label=1 ark:- ark:- |" ark:- ark:- \| \ + segmentation-copy --keep-label=1 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + ark:- ark,scp:$overlap_labels_dir/overlapped_speech_labels_fixed_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapped_speech_labels_fixed_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/overlapped_speech_labels_fixed_${corrupted_data_id}.$n.scp + done > $corrupted_data_dir/overlapped_speech_labels_fixed.scp +fi exit 0 diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data_simple.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data_simple.sh new file mode 100755 index 00000000000..73f2abca566 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data_simple.sh @@ -0,0 +1,156 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -u +set -o pipefail + +. path.sh + +num_data_reps=5 +nj=40 +cmd=queue.pl +snr_db_threshold=10 +stage=-1 + +. utils/parse_options.sh + +if [ $# -ne 5 ]; then + echo "Usage: $0 " + echo " e.g.: $0 data/fisher_train_100k_sp_75k_seg_ovlp_corrupted_hires_bp data/fisher_train_100k_sp_75k_seg_ovlp_corrupted exp/unsad/make_unsad_fisher_train_100k/tri4a_ali_fisher_train_100k_sp_vad_fisher_train_100k_sp exp/unsad overlapping_sad_labels" + exit 1 +fi + +corrupted_data_dir=$1 +orig_corrupted_data_dir=$2 +utt_vad_dir=$3 +tmpdir=$4 +overlap_labels_dir=$5 + +overlapped_segments_info=$orig_corrupted_data_dir/overlapped_segments_info.txt +corrupted_data_id=`basename $orig_corrupted_data_dir` + +for f in $corrupted_data_dir/feats.scp $overlapped_segments_info $utt_vad_dir/sad_seg.scp; do + [ ! -f $f ] && echo "Could not find file $f" && exit 1 +done + +overlap_dir=$tmpdir/make_overlapping_sad_labels_${corrupted_data_id} + +# make $overlap_labels_dir an absolute pathname. +overlap_labels_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $overlap_labels_dir ${PWD}` +mkdir -p $overlap_labels_dir + +# Combine the VAD from the base recording and the VAD from the overlapping segments +# to create per-frame labels of the number of overlapping speech segments +# Unreliable segments are regions where no VAD labels were available for the +# overlapping segments. These can be later removed by setting deriv weights to 0. + +if [ $stage -le 1 ]; then + for n in `seq $num_data_reps`; do + cat $utt_vad_dir/sad_seg.scp | \ + awk -v n=$n '{print "ovlp"n"_"$0}' + done | sort -k1,1 > ${corrupted_data_dir}/sad_seg.scp + utils/data/get_utt2num_frames.sh $corrupted_data_dir + utils/split_data.sh ${corrupted_data_dir} $nj + + # 1) segmentation-init-from-additive-signals-info converts the informtation + # written out but by steps/data/make_corrupted_data_dir.py in overlapped_segments_info.txt + # and converts it to segments. It then adds those segments to the + # segments already present ($corrupted_data_dir/sad_seg.scp) + # 2) Retain only the speech segments (label 1) from these. + # 3) Convert this to overlap stats using segmentation-get-stats, which + # writes for each frame the number of overlapping segments. + # 4) Convert this per-frame "alignment" information to segmentation + # ($overlap_dir/overlap_seg.*.gz). + $cmd JOB=1:$nj $overlap_dir/log/get_overlapping_sad_seg.JOB.log \ + segmentation-init-from-additive-signals-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + --junk-label=10000 \ + --additive-signals-segmentation-rspecifier=scp:$utt_vad_dir/sad_seg.scp \ + "scp:utils/filter_scp.pl ${corrupted_data_dir}/split${nj}/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" \ + ark,t:$orig_corrupted_data_dir/overlapped_segments_info.txt ark:- \| \ + segmentation-get-stats --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + ark:- ark:/dev/null ark:/dev/null ark:- \| \ + classes-per-frame-to-labels --junk-label=10000 ark:- ark:- \| \ + segmentation-init-from-ali ark:- \ + "ark:| gzip -c > $overlap_dir/overlap_sad_seg.JOB.gz" +fi + +if [ $stage -le 2 ]; then + # Call labels >2, i.e. regions where more than 1 speaker overlap as overlapping speech. labels = 1 is single speaker and labels = 0 is silence. + # Write this out in alignment format as "overlapping_sad_labels" + $cmd JOB=1:$nj $overlap_dir/log/get_overlapping_sad_labels.JOB.log \ + gunzip -c $overlap_dir/overlap_sad_seg.JOB.gz \| \ + segmentation-post-process --remove-labels=10000 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ + ark,scp:$overlap_labels_dir/overlapping_sad_labels_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapping_sad_labels_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/overlapping_sad_labels_${corrupted_data_id}.$n.scp + done > ${corrupted_data_dir}/overlapping_sad_labels.scp +fi + +if [ $stage -le 3 ]; then + # Find regions where there is at least one speaker speaking. + $cmd JOB=1:$nj $overlap_dir/log/get_speech_feat.JOB.log \ + gunzip -c $overlap_dir/overlap_sad_seg.JOB.gz \| \ + segmentation-post-process --remove-labels=10000 ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| \ + vector-to-feat ark:- \ + ark,scp:$overlap_labels_dir/speech_feat_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/speech_feat_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/speech_feat_${corrupted_data_id}.$n.scp + done > ${corrupted_data_dir}/speech_feat.scp +fi + +if [ $stage -le 4 ]; then + # Deriv weights is 1 everywhere but the + # unreliable regions. + $cmd JOB=1:$nj $overlap_dir/log/get_deriv_weights.JOB.log \ + gunzip -c $overlap_dir/overlap_sad_seg.JOB.gz \| \ + segmentation-post-process --merge-labels=0:1:2 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --merge-labels=10000 --merge-dst-label=0 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$overlap_labels_dir/deriv_weights_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/deriv_weights_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/deriv_weights_$corrupted_data_id.${n}.scp + done > $corrupted_data_dir/deriv_weights.scp +fi + +snr_threshold=`perl -e "print $snr_db_threshold / 10.0 * log(10.0)"` + +cat < $overlap_dir/invert_labels.map +0 2 +1 1 +EOF + +if [ $stage -le 5 ]; then + if [ ! -f $corrupted_data_dir/log_snr.scp ]; then + echo "$0: Could not find $corrupted_data_dir/log_snr.scp. Run local/segmentation/do_corruption_data_dir_overlapped_speech.sh." + exit 1 + fi + + $cmd JOB=1:$nj $overlap_dir/log/fix_overlapping_sad_labels.JOB.log \ + copy-matrix --apply-power=1 \ + "scp:utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/log_snr.scp |" \ + ark:- \| extract-column ark:- ark,t:- \| \ + steps/segmentation/quantize_vector.pl $snr_threshold \| \ + segmentation-init-from-ali ark,t:- ark:- \| \ + segmentation-copy --label-map=$overlap_dir/invert_labels.map ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=1 \ + "ark:utils/filter_scp.pl $corrupted_data_dir/split$nj/JOB/utt2spk $corrupted_data_dir/overlapping_sad_labels.scp | segmentation-init-from-ali scp:- ark:- |" ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + ark:- ark,scp:$overlap_labels_dir/overlapping_sad_labels_fixed_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapping_sad_labels_fixed_${corrupted_data_id}.JOB.scp + + for n in `seq $nj`; do + cat $overlap_labels_dir/overlapping_sad_labels_fixed_${corrupted_data_id}.$n.scp + done > $corrupted_data_dir/overlapping_sad_labels_fixed.scp +fi + +exit 0 diff --git a/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh index 4b98eec9f43..733c6aa53fe 100755 --- a/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh +++ b/egs/aspire/s5/local/segmentation/run_segmentation_ami.sh @@ -14,8 +14,16 @@ stage=-1 nnet_dir=exp/nnet3_sad_snr/nnet_tdnn_k_n4 extra_left_context=100 extra_right_context=20 +task=SAD iter=final +segmentation_stage=-1 +sil_prior=0.7 +speech_prior=0.3 +min_silence_duration=30 +min_speech_duration=10 +frame_subsampling_factor=3 + . utils/parse_options.sh export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH @@ -77,58 +85,268 @@ echo "A 1" > $dir/channel_map cat $src_dir/data/sdm1/dev/reco2file_and_channel | \ utils/apply_map.pl -f 3 $dir/channel_map > $dir/reco2file_and_channel +cat $src_dir/data/sdm1/dev_ihmdata/reco2utt | \ + awk 'BEGIN{i=1} {print $1" "i; i++;}' > \ + $src_dir/data/sdm1/dev_ihmdata/reco.txt + if [ $stage -le 5 ]; then + utils/data/get_reco2num_frames.sh --frame-shift 0.01 --frame-overlap 0.015 \ + --cmd queue.pl --nj 18 \ + $src_dir/data/sdm1/dev + + # Get a filter that selects only regions within the manual segments. + $train_cmd $dir/log/get_manual_segments_regions.log \ + segmentation-init-from-segments --shift-to-zero=false $src_dir/data/sdm1/dev/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/dev/reco2utt ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=1 \ + "ark:segmentation-init-from-lengths --label=0 ark,t:$src_dir/data/sdm1/dev/reco2num_frames ark:- |" ark:- ark,t:- \| \ + perl -ane '$F[3] = 10000; $F[$#F-1] = 10000; print join(" ", @F) . "\n";' \| \ + segmentation-post-process --merge-labels=0:1 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --merge-labels=10000 --merge-dst-label=0 --merge-adjacent-segments \ + --max-intersegment-length=10000 ark,t:- \ + "ark:| gzip -c > $dir/manual_segments_regions.seg.gz" +fi + +if [ $stage -le 6 ]; then # Reference RTTM where SPEECH frames are obtainted by combining IHM VAD alignments - $train_cmd $dir/log/get_ref_rttm.log \ + $train_cmd $dir/log/get_ref_spk_seg.log \ segmentation-combine-segments scp:$dir/sad_seg.scp \ "ark:segmentation-init-from-segments --shift-to-zero=false $src_dir/data/sdm1/dev_ihmdata/segments ark:- |" \ ark,t:$src_dir/data/sdm1/dev_ihmdata/reco2utt ark:- \| \ + segmentation-copy --keep-label=1 ark:- ark:- \| \ + segmentation-copy --utt2label-rspecifier=ark,t:$src_dir/data/sdm1/dev_ihmdata/reco.txt \ + ark:- ark:- \| \ segmentation-merge-recordings \ "ark,t:utils/utt2spk_to_spk2utt.pl $src_dir/data/sdm1/dev_ihmdata/ihm2sdm_reco |" \ - ark:- ark:- \| \ - segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ - ark:- $dir/ref.rttm + ark:- "ark:| gzip -c > $dir/ref_spk_seg.gz" fi - -if [ $stage -le 6 ]; then - # Get an UEM which evaluates only on the manual segments. - $train_cmd $dir/log/get_uem.log \ - segmentation-init-from-segments --shift-to-zero=false $src_dir/data/sdm1/dev/segments ark:- \| \ - segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/dev/reco2utt ark:- \| \ - segmentation-post-process --remove-labels=0 --merge-adjacent-segments \ - --max-intersegment-length=10000 ark:- ark:- \| \ + +if [ $stage -le 7 ]; then + # To get the actual RTTM, we need to add no-score + $train_cmd $dir/log/get_ref_rttm.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ - ark:- - \| grep SPEECH \| grep SPEAKER \| \ - rttmSmooth.pl -s 0 \| awk '{ print $2" "$3" "$4" "$5+$4 }' '>' $dir/uem + --no-score-label=10000 ark:- $dir/ref.rttm + + # Get RTTM for overlapped speech detection with 3 classes + # 0 -> SILENCE, 1 -> SINGLE_SPEAKER, 2 -> OVERLAP + $train_cmd $dir/log/get_overlapping_rttm.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- $dir/overlapping_speech_ref.rttm fi +if [ $stage -le 8 ]; then + # Get a filter that selects only regions of speech + $train_cmd $dir/log/get_speech_filter.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=0 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 \ + ark:- "ark:| gzip -c > $dir/manual_segments_speech_regions.seg.gz" +fi + hyp_dir=${nnet_dir}/segmentation_ami_sdm1_dev_whole_bp/ami_sdm1_dev -if [ $stage -le 7 ]; then +if [ $stage -le 9 ]; then steps/segmentation/do_segmentation_data_dir.sh --reco-nj 18 \ --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp --do-downsampling true \ --extra-left-context $extra_left_context --extra-right-context $extra_right_context \ - --output-name output-speech --frame-subsampling-factor 6 --iter $iter \ + --output-name output-speech --frame-subsampling-factor $frame_subsampling_factor --iter $iter \ + --stage $segmentation_stage \ $src_dir/data/sdm1/dev $nnet_dir mfcc_hires_bp $hyp_dir fi +sad_dir=${nnet_dir}/sad_ami_sdm1_dev_whole_bp/ hyp_dir=${hyp_dir}_seg -if [ $stage -le 8 ]; then +if [ $stage -le 10 ]; then utils/data/get_reco2utt.sh $src_dir/data/sdm1/dev_ihmdata - - steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \ - $hyp_dir/utt2spk \ - $hyp_dir/segments \ - $dir/reco2file_and_channel \ - /dev/stdout | spkr2sad.pl > $hyp_dir/sys.rttm + utils/data/get_reco2utt.sh $hyp_dir + + segmentation-init-from-segments --shift-to-zero=false $hyp_dir/segments ark:- | \ + segmentation-combine-segments-to-recordings ark:- ark,t:$hyp_dir/reco2utt ark:- | \ + segmentation-to-ali --length-tolerance=48 --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + ark:- ark:- | \ + segmentation-init-from-ali ark:- ark:- | \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel ark:- $hyp_dir/sys.rttm + + #steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \ + # $hyp_dir/utt2spk \ + # $hyp_dir/segments \ + # $dir/reco2file_and_channel \ + # /dev/stdout | spkr2sad.pl > $hyp_dir/sys.rttm fi -if [ $stage -le 9 ]; then - md-eval.pl -s <(cat $hyp_dir/sys.rttm | grep speech | rttmSmooth.pl -s 0) \ - -r <(cat $dir/ref.rttm | grep SPEECH | rttmSmooth.pl -s 0 ) \ +if [ $stage -le 11 ]; then + cat < $likes_dir/log_likes.JOB.gz" + cp $sad_dir/num_jobs $likes_dir + fi + else + if [ $stage -le 12 ]; then + steps/segmentation/do_segmentation_data_dir_generic.sh --reco-nj 18 \ + --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp --do-downsampling true \ + --extra-left-context $extra_left_context --extra-right-context $extra_right_context \ + --segmentation-config conf/segmentation_ovlp.conf \ + --output-name output-overlapping_sad \ + --min-durations 30:10:10 --priors 0.5:0.35:0.15 \ + --sad-name ovlp_sad --segmentation-name segmentation_ovlp_sad \ + --frame-subsampling-factor $frame_subsampling_factor --iter $iter \ + --stage $segmentation_stage \ + $src_dir/data/sdm1/dev $nnet_dir mfcc_hires_bp $hyp_dir + fi + + likes_dir=${nnet_dir}/ovlp_sad_ami_sdm1_dev_whole_bp/ + fi + + hyp_dir=${hyp_dir}_seg + mkdir -p $hyp_dir + + seg_dir=${nnet_dir}/segmentation_ovlp_sad_ami_sdm1_dev_whole_bp/ + lang=${seg_dir}/lang + + if [ $stage -le 14 ]; then + mkdir -p $lang + steps/segmentation/internal/prepare_sad_lang.py \ + --phone-transition-parameters="--phone-list=1 --min-duration=10 --end-transition-probability=0.1" \ + --phone-transition-parameters="--phone-list=2 --min-duration=3 --end-transition-probability=0.1" \ + --phone-transition-parameters="--phone-list=3 --min-duration=3 --end-transition-probability=0.1" $lang + cp $lang/phones.txt $lang/words.txt + + feat_dim=2 # dummy. We don't need this. + $train_cmd $seg_dir/log/create_transition_model.log gmm-init-mono \ + $lang/topo $feat_dim - $seg_dir/tree \| \ + copy-transition-model --binary=false - $seg_dir/trans.mdl || exit 1 +fi + + if [ $stage -le 15 ]; then + + cat > $lang/word2prior < $lang/G.fst +fi + + if [ $stage -le 16 ]; then + $train_cmd $seg_dir/log/make_vad_graph.log \ + steps/segmentation/internal/make_sad_graph.sh --iter trans \ + $lang $seg_dir $seg_dir/graph_test || exit 1 + fi + + if [ $stage -le 17 ]; then + steps/segmentation/decode_sad.sh \ + --acwt 1 --beam 10 --max-active 7000 \ + $seg_dir/graph_test $likes_dir $seg_dir + fi + + if [ $stage -le 18 ]; then + cat < $hyp_dir/labels_map +1 0 +2 1 +3 2 +EOF + gunzip -c $seg_dir/ali.*.gz | \ + segmentation-init-from-ali ark:- ark:- | \ + segmentation-copy --frame-subsampling-factor=$frame_subsampling_factor \ + --label-map=$hyp_dir/labels_map ark:- ark:- | \ + segmentation-to-rttm --map-to-speech-and-sil=false \ + --reco2file-and-channel=$dir/reco2file_and_channel ark:- $hyp_dir/sys.rttm + fi + # Get RTTM for overlapped speech detection with 3 classes + # 0 -> SILENCE, 1 -> SINGLE_SPEAKER, 2 -> OVERLAP + $train_cmd $dir/log/get_overlapping_rttm.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/dev/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0 ark:- ark:- |" \ + ark:/dev/null ark:- \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- $dir/overlapping_speech_ref.rttm + + if [ $stage -le 19 ]; then + cat < Date: Mon, 2 Jan 2017 18:40:24 -0500 Subject: [PATCH 193/530] asr_diarization: Fisher+ Babel SAD recipe --- .../prepare_babel_data_overlapped_speech.sh | 112 +++++++++++++++++ .../prepare_fisher_data_overlapped_speech.sh | 113 ++++++++++++++++++ .../s5/local/segmentation/run_fisher_babel.sh | 2 + 3 files changed, 227 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/prepare_babel_data_overlapped_speech.sh create mode 100644 egs/aspire/s5/local/segmentation/prepare_fisher_data_overlapped_speech.sh create mode 100644 egs/aspire/s5/local/segmentation/run_fisher_babel.sh diff --git a/egs/aspire/s5/local/segmentation/prepare_babel_data_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/prepare_babel_data_overlapped_speech.sh new file mode 100644 index 00000000000..2136f42f322 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_babel_data_overlapped_speech.sh @@ -0,0 +1,112 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +# This script prepares Babel data for training speech activity detection, +# music detection, and overlapped speech detection systems. + +. path.sh +. cmd.sh + +set -e +set -o pipefail +set -u + +lang_id=assamese +subset=25 # Number of recordings to keep before speed perturbation and corruption +utt_subset=30000 # Number of utterances to keep after speed perturbation for adding overlapped-speech + +# All the paths below can be modified to any absolute path. +ROOT_DIR=/home/vimal/workspace_waveform/egs/babel/s5c_assamese/ + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + echo "This script is to serve as an example recipe." + echo "Edit the script to change variables if needed." + exit 1 +fi + +dir=exp/unsad/make_unsad_babel_${lang_id}_train # Work dir + +# The original data directory which will be converted to a whole (recording-level) directory. +train_data_dir=$ROOT_DIR/data/train + +model_dir=$ROOT_DIR/exp/tri4 # Model directory used for decoding +sat_model_dir=$ROOT_DIR/exp/tri5 # Model directory used for getting alignments +lang=$ROOT_DIR/data/lang # Language directory +lang_test=$ROOT_DIR/data/lang # Language directory used to build graph + +# Hard code the mapping from phones to SAD labels +# 0 for silence, 1 for speech, 2 for noise, 3 for unk +cat < $dir/babel_sad.map + 3 +_B 3 +_E 3 +_I 3 +_S 3 + 2 +_B 2 +_E 2 +_I 2 +_S 2 + 2 +_B 2 +_E 2 +_I 2 +_S 2 +SIL 0 +SIL_B 0 +SIL_E 0 +SIL_I 0 +SIL_S 0 +EOF + +# Expecting the user to have done run.sh to have $model_dir, +# $sat_model_dir, $lang, $lang_test, $train_data_dir +local/segmentation/prepare_unsad_data.sh \ + --sad-map $dir/babel_sad.map \ + --config-dir $ROOT_DIR/conf \ + --reco-nj 40 --nj 100 --cmd "$train_cmd" \ + --sat-model $sat_model_dir \ + --lang-test $lang_test \ + $train_data_dir $lang $model_dir $dir + +orig_data_dir=${train_data_dir}_sp + +data_dir=${train_data_dir}_whole + +if [ ! -z $subset ]; then + # Work on a subset + utils/subset_data_dir.sh ${data_dir} $subset \ + ${data_dir}_$subset + data_dir=${data_dir}_$subset +fi + +reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp + +# Add noise from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir.sh + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +# Add music from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir_music.sh + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +if [ ! -z $utt_subset ]; then + utils/subset_data_dir.sh ${orig_data_dir} $utt_subset \ + ${orig_data_dir}_`echo $utt_subset | perl -e 's/000$/k/'` + orig_data_dir=${orig_data_dir}_`echo $utt_subset | perl -e 's/000$/k/'` +fi + +# Add overlapping speech from $orig_data_dir/segments and create a new data directory +utt_vad_dir=$dir/`baseline $sat_model_dir`_ali_`basename $train_data_dir`_sp_vad_`basename $train_data_dir`_sp +local/segmentation/do_corruption_data_dir_overlapped_speech.sh \ + --data-dir ${orig_data_dir} \ + --utt-vad-dir $utt_vad_dir diff --git a/egs/aspire/s5/local/segmentation/prepare_fisher_data_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/prepare_fisher_data_overlapped_speech.sh new file mode 100644 index 00000000000..79a03fa9e9d --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_fisher_data_overlapped_speech.sh @@ -0,0 +1,113 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +# This script prepares Fisher data for training speech activity detection, +# music detection, and overlapped speech detection systems. + +. path.sh +. cmd.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + echo "This script is to serve as an example recipe." + echo "Edit the script to change variables if needed." + exit 1 +fi + +dir=exp/unsad/make_unsad_fisher_train_100k # Work dir +subset=60 # Number of recordings to keep before speed perturbation and corruption +utt_subset=75000 # Number of utterances to keep after speed perturbation for adding overlapped-speech + +# All the paths below can be modified to any absolute path. + +# The original data directory which will be converted to a whole (recording-level) directory. +train_data_dir=data/fisher_train_100k + +model_dir=exp/tri3a # Model directory used for decoding +sat_model_dir=exp/tri4a # Model directory used for getting alignments +lang=data/lang # Language directory +lang_test=data/lang_test # Language directory used to build graph + +# Hard code the mapping from phones to SAD labels +# 0 for silence, 1 for speech, 2 for noise, 3 for unk +cat < $dir/fisher_sad.map +sil 0 +sil_B 0 +sil_E 0 +sil_I 0 +sil_S 0 +laughter 2 +laughter_B 2 +laughter_E 2 +laughter_I 2 +laughter_S 2 +noise 2 +noise_B 2 +noise_E 2 +noise_I 2 +noise_S 2 +oov 3 +oov_B 3 +oov_E 3 +oov_I 3 +oov_S 3 +EOF + +# Expecting the user to have done run.sh to have $model_dir, +# $sat_model_dir, $lang, $lang_test, $train_data_dir +local/segmentation/prepare_unsad_data.sh \ + --sad-map $dir/fisher_sad.map \ + --config-dir conf \ + --reco-nj 40 --nj 100 --cmd "$train_cmd" \ + --sat-model $sat_model_dir \ + --lang-test $lang_test \ + $train_data_dir $lang $model_dir $dir + +orig_data_dir=${train_data_dir}_sp + +data_dir=${train_data_dir}_whole + +if [ ! -z $subset ]; then + # Work on a subset + utils/subset_data_dir.sh ${data_dir} $subset \ + ${data_dir}_$subset + data_dir=${data_dir}_$subset +fi + +reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp + +# Add noise from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir.sh \ + --num-data-reps 5 \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +# Add music from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir_music.sh \ + --num-data-reps 5 \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +if [ ! -z $utt_subset ]; then + utils/subset_data_dir.sh ${orig_data_dir} $utt_subset \ + ${orig_data_dir}_`echo $utt_subset | perl -e 's/000$/k/'` + orig_data_dir=${orig_data_dir}_`echo $utt_subset | perl -e 's/000$/k/'` +fi + +# Add overlapping speech from $orig_data_dir/segments and create a new data directory +utt_vad_dir=$dir/`baseline $sat_model_dir`_ali_`basename $train_data_dir`_sp_vad_`basename $train_data_dir`_sp +local/segmentation/do_corruption_data_dir_overlapped_speech.sh \ + --nj 40 --cmd queue.pl \ + --num-data-reps 1 \ + --data-dir ${orig_data_dir} \ + --utt-vad-dir $utt_vad_dir + +local/segmentation/prepare_unsad_overlapped_speech_labels.sh \ + --num-data-reps 1 --nj 40 --cmd queue.pl \ + ${orig_data_dir}_ovlp_corrupted_hires_bp \ + ${orig_data_dir}_ovlp_corrupted/overlapped_segments_info.txt \ + $utt_vad_dir exp/make_overlap_labels overlap_labels diff --git a/egs/aspire/s5/local/segmentation/run_fisher_babel.sh b/egs/aspire/s5/local/segmentation/run_fisher_babel.sh new file mode 100644 index 00000000000..bdf6d3585f7 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/run_fisher_babel.sh @@ -0,0 +1,2 @@ + +utils/combine_data.sh From d16de41d4b0ccf28f114947bd05b80c5004e88fb Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:40:54 -0500 Subject: [PATCH 194/530] asr_diarization: Prepare labels for AMI --- .../s5/local/segmentation/prepare_ami.sh | 213 ++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100755 egs/aspire/s5/local/segmentation/prepare_ami.sh diff --git a/egs/aspire/s5/local/segmentation/prepare_ami.sh b/egs/aspire/s5/local/segmentation/prepare_ami.sh new file mode 100755 index 00000000000..38ed9559c89 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_ami.sh @@ -0,0 +1,213 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +. cmd.sh +. path.sh + +set -e +set -o pipefail +set -u + +stage=-1 + +dataset=dev +nj=18 + +. utils/parse_options.sh + +export PATH=$KALDI_ROOT/tools/sctk/bin:$PATH + +src_dir=/export/a09/vmanoha1/workspace_asr_diarization/egs/ami/s5b # AMI src_dir +dir=exp/sad_ami_sdm1_${dataset}/ref + +mkdir -p $dir + +# Expecting user to have done run.sh to run the AMI recipe in $src_dir for +# both sdm and ihm microphone conditions + +if [ $stage -le 1 ]; then + ( + cd $src_dir + local/prepare_parallel_train_data.sh --train-set ${dataset} sdm1 + + awk '{print $1" "$2}' $src_dir/data/ihm/${dataset}/segments > \ + $src_dir/data/ihm/${dataset}/utt2reco + awk '{print $1" "$2}' $src_dir/data/sdm1/${dataset}/segments > \ + $src_dir/data/sdm1/${dataset}/utt2reco + + cat $src_dir/data/sdm1/${dataset}_ihmdata/ihmutt2utt | \ + utils/filter_scp.pl -f 1 $src_dir/data/ihm/${dataset}/utt2reco | \ + utils/apply_map.pl -f 1 $src_dir/data/ihm/${dataset}/utt2reco | \ + utils/filter_scp.pl -f 2 $src_dir/data/sdm1/${dataset}/utt2reco | \ + utils/apply_map.pl -f 2 $src_dir/data/sdm1/${dataset}/utt2reco | \ + sort -u > $src_dir/data/sdm1/${dataset}_ihmdata/ihm2sdm_reco + ) +fi + +[ ! -s $src_dir/data/sdm1/${dataset}_ihmdata/ihm2sdm_reco ] && echo "Empty $src_dir/data/sdm1/${dataset}_ihmdata/ihm2sdm_reco!" && exit 1 + +phone_map=$dir/phone_map +if [ $stage -le 2 ]; then + ( + cd $src_dir + utils/data/get_reco2utt.sh $src_dir/data/sdm1/${dataset} + + steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" \ + data/sdm1/${dataset}_ihmdata exp/sdm1/make_mfcc mfcc_sdm1 + steps/compute_cmvn_stats.sh \ + data/sdm1/${dataset}_ihmdata exp/sdm1/make_mfcc mfcc_sdm1 + utils/fix_data_dir.sh data/sdm1/${dataset}_ihmdata + ) + + steps/segmentation/get_sad_map.py \ + $src_dir/data/lang | utils/sym2int.pl -f 1 $src_dir/data/lang/phones.txt > \ + $phone_map +fi + +if [ $stage -le 3 ]; then + # Expecting user to have run local/run_cleanup_segmentation.sh in $src_dir + ( + cd $src_dir + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + data/sdm1/${dataset}_ihmdata data/lang \ + exp/ihm/tri3_cleaned \ + exp/sdm1/tri3_cleaned_${dataset}_ihmdata + ) +fi + +if [ $stage -le 4 ]; then + steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$train_cmd" \ + $src_dir/exp/sdm1/tri3_cleaned_${dataset}_ihmdata $phone_map $dir +fi + +echo "A 1" > $dir/channel_map +cat $src_dir/data/sdm1/${dataset}/reco2file_and_channel | \ + utils/apply_map.pl -f 3 $dir/channel_map > $dir/reco2file_and_channel + +utils/data/get_reco2utt.sh $src_dir/data/sdm1/${dataset}_ihmdata +cat $src_dir/data/sdm1/${dataset}_ihmdata/reco2utt | \ + awk 'BEGIN{i=1} {print $1" "i; i++;}' > \ + $src_dir/data/sdm1/${dataset}_ihmdata/reco.txt + +if [ $stage -le 5 ]; then + # Reference RTTM where SPEECH frames are obtainted by combining IHM VAD alignments + cat $src_dir/data/sdm1/${dataset}_ihmdata/reco.txt | \ + awk '{print $1" 1:"$2" 10000:10000 0:0"}' > $dir/ref_spk2label_map + + $train_cmd $dir/log/get_ref_spk_seg.log \ + segmentation-combine-segments --include-missing-utt-level-segmentations scp:$dir/sad_seg.scp \ + "ark:segmentation-init-from-segments --segment-label=10000 --shift-to-zero=false $src_dir/data/sdm1/${dataset}_ihmdata/segments ark:- |" \ + ark,t:$src_dir/data/sdm1/${dataset}_ihmdata/reco2utt ark:- \| \ + segmentation-copy --utt2label-map-rspecifier=ark,t:$dir/ref_spk2label_map \ + ark:- ark:- \| \ + segmentation-merge-recordings \ + "ark,t:utils/utt2spk_to_spk2utt.pl $src_dir/data/sdm1/${dataset}_ihmdata/ihm2sdm_reco |" \ + ark:- "ark:| gzip -c > $dir/ref_spk_seg.gz" +fi + +if [ $stage -le 6 ]; then + utils/data/get_reco2num_frames.sh --frame-shift 0.01 --frame-overlap 0.015 \ + --cmd queue.pl --nj $nj \ + $src_dir/data/sdm1/${dataset} + + # Get a filter that selects only regions within the manual segments. + $train_cmd $dir/log/get_manual_segments_regions.log \ + segmentation-init-from-segments --shift-to-zero=false $src_dir/data/sdm1/${dataset}/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/${dataset}/reco2utt ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=1 \ + "ark:segmentation-init-from-lengths --label=0 ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames ark:- |" ark:- ark,t:- \| \ + perl -ane '$F[3] = 10000; $F[$#F-1] = 10000; print join(" ", @F) . "\n";' \| \ + segmentation-create-subsegments --filter-label=10000 --subsegment-label=10000 \ + ark,t:- "ark:gunzip -c $dir/ref_spk_seg.gz |" ark:- \| \ + segmentation-post-process --merge-labels=0:1 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --merge-labels=10000 --merge-dst-label=0 --merge-adjacent-segments \ + --max-intersegment-length=10000 ark,t:- \ + "ark:| gzip -c > $dir/manual_segments_regions.seg.gz" +fi + +if [ $stage -le 7 ]; then + # To get the actual RTTM, we need to add no-score + $train_cmd $dir/log/get_ref_rttm.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0:10000 ark:- ark:- |" \ + ark:/dev/null ark:- \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- $dir/ref.rttm +fi + + +if [ $stage -le 8 ]; then + # Get RTTM for overlapped speech detection with 3 classes + # 0 -> SILENCE, 1 -> SINGLE_SPEAKER, 2 -> OVERLAP + $train_cmd $dir/log/get_overlapping_rttm.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0:10000 ark:- ark:- |" \ + ark:/dev/null ark:- \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- $dir/overlapping_speech_ref.rttm +fi + +if [ $stage -le 9 ]; then + # Get a filter that selects only regions of speech + $train_cmd $dir/log/get_speech_filter.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0:10000 ark:- ark:- |" \ + ark:/dev/null ark:- \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-create-subsegments --filter-label=0 --subsegment-label=0 \ + ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 \ + ark:- "ark:| gzip -c > $dir/manual_segments_speech_regions.seg.gz" +fi + +# make $dir an absolute pathname. +dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}` + +if [ $stage -le 10 ]; then + $train_cmd $dir/log/get_overlapping_sad.log \ + segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ + "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0:10000 ark:- ark:- |" \ + ark:/dev/null ark:- \| \ + segmentation-init-from-ali ark:- ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 \ + --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ + segmentation-to-ali ark:- ark,scp:$dir/overlapping_sad_labels.ark,$dir/overlapping_sad_labels.scp + + $train_cmd $dir/log/get_deriv_weights_for_overlapping_sad.log \ + segmentation-to-ali "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| \ + copy-vector ark,t: ark,scp:$dir/deriv_weights_for_overlapping_sad.ark,$dir/deriv_weights_for_overlapping_sad.scp +fi + +if false && [ $stage -le 11 ]; then + utils/data/convert_data_dir_to_whole.sh \ + $src_dir/data/sdm1/${dataset} data/ami_sdm1_${dataset}_whole + utils/fix_data_dir.sh \ + data/ami_sdm1_${dataset}_whole + utils/copy_data_dir.sh \ + data/ami_sdm1_${dataset}_whole data/ami_sdm1_${dataset}_whole_hires_bp + utils/data/downsample_data_dir.sh 8000 data/ami_sdm1_${dataset}_whole_hires_bp + + steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_bp.conf --nj $nj \ + data/ami_sdm1_${dataset}_whole_hires_bp exp/make_hires_bp mfcc_hires_bp + steps/compute_cmvn_stats.sh --fake \ + data/ami_sdm1_${dataset}_whole_hires_bp exp/make_hires_bp mfcc_hires_bp + utils/fix_data_dir.sh \ + data/ami_sdm1_${dataset}_whole_hires_bp +fi From 5ac841e33cb47a46f88b66f813e2f1961bea7134 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 2 Jan 2017 18:41:29 -0500 Subject: [PATCH 195/530] asr_diarization: segmentation configs --- egs/aspire/s5/conf/segmentation_music.conf | 14 ++++++++++++++ egs/aspire/s5/conf/segmentation_ovlp.conf | 14 ++++++++++++++ egs/aspire/s5/conf/segmentation_speech.conf | 14 ++++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 egs/aspire/s5/conf/segmentation_music.conf create mode 100644 egs/aspire/s5/conf/segmentation_ovlp.conf create mode 100644 egs/aspire/s5/conf/segmentation_speech.conf diff --git a/egs/aspire/s5/conf/segmentation_music.conf b/egs/aspire/s5/conf/segmentation_music.conf new file mode 100644 index 00000000000..28b5feaf5d5 --- /dev/null +++ b/egs/aspire/s5/conf/segmentation_music.conf @@ -0,0 +1,14 @@ +# General segmentation options +pad_length=-1 # Pad speech segments by this many frames on either side +max_blend_length=-1 # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=0 # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=-1 # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=1000 # Segments that are longer than this are split into + # overlapping frames. +overlap_length=250 # Overlapping frames when segments are split. + # See the above option. +min_silence_length=100000 # Min silence length at which to split very long segments diff --git a/egs/aspire/s5/conf/segmentation_ovlp.conf b/egs/aspire/s5/conf/segmentation_ovlp.conf new file mode 100644 index 00000000000..28b5feaf5d5 --- /dev/null +++ b/egs/aspire/s5/conf/segmentation_ovlp.conf @@ -0,0 +1,14 @@ +# General segmentation options +pad_length=-1 # Pad speech segments by this many frames on either side +max_blend_length=-1 # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=0 # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=-1 # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=1000 # Segments that are longer than this are split into + # overlapping frames. +overlap_length=250 # Overlapping frames when segments are split. + # See the above option. +min_silence_length=100000 # Min silence length at which to split very long segments diff --git a/egs/aspire/s5/conf/segmentation_speech.conf b/egs/aspire/s5/conf/segmentation_speech.conf new file mode 100644 index 00000000000..c4c75b212fc --- /dev/null +++ b/egs/aspire/s5/conf/segmentation_speech.conf @@ -0,0 +1,14 @@ +# General segmentation options +pad_length=20 # Pad speech segments by this many frames on either side +max_relabel_length=10 # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=30 # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=10 # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=1000 # Segments that are longer than this are split into + # overlapping frames. +overlap_length=250 # Overlapping frames when segments are split. + # See the above option. +min_silence_length=20 # Min silence length at which to split very long segments From dc9d3c065a842869b8407be0243b98def63c6d43 Mon Sep 17 00:00:00 2001 From: Xingyu Na Date: Tue, 3 Jan 2017 12:02:59 +0800 Subject: [PATCH 196/530] [egs] fix naming of directories in fisher_swbd/s5/ in chain scripts (#1303) --- egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh | 8 ++++---- egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh index d9b11f9fb21..59e31299bc6 100644 --- a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh +++ b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh @@ -38,8 +38,8 @@ fi dir=$dir${affix:+_$affix} train_set=train_nodup_sp -ali_dir=exp/tri5a_ali_nodup -treedir=exp/chain/tri6_tree_11000 +ali_dir=exp/tri5a_ali_nodup_sp +treedir=exp/chain/tri6_tree lang=data/lang_chain # The iVector-extraction and feature-dumping parts are the same as the standard @@ -54,7 +54,7 @@ if [ $stage -le 9 ]; then # use the same num-jobs as the alignments nj=$(cat $ali_dir/num_jobs) || exit 1; steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/$train_set \ - data/lang exp/tri5a exp/tri5a_lats_nodup_sp + data/lang exp/tri5a exp/tri5a_lats_nodup_sp || exit 1; rm exp/tri5a_lats_nodup_sp/fsts.*.gz # save space fi @@ -75,7 +75,7 @@ if [ $stage -le 11 ]; then # Build a tree using our new topology. steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ --leftmost-questions-truncate -1 \ - --cmd "$train_cmd" 11000 data/$train_set $lang $ali_dir $treedir + --cmd "$train_cmd" 11000 data/$train_set $lang $ali_dir $treedir || exit 1; fi if [ $stage -le 12 ]; then diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh index ba2962e927a..f21fb307c92 100644 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh @@ -34,8 +34,8 @@ fi dir=${dir}${affix:+_$affix} train_set=train_nodup_sp -ali_dir=exp/tri5a_ali_nodup -treedir=exp/chain/tri6_tree_11000 +ali_dir=exp/tri5a_ali_nodup_sp +treedir=exp/chain/tri6_tree lang=data/lang_chain @@ -51,7 +51,7 @@ if [ $stage -le 9 ]; then # use the same num-jobs as the alignments nj=$(cat $ali_dir/num_jobs) || exit 1; steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/$train_set \ - data/lang exp/tri5a exp/tri5a_lats_nodup_sp + data/lang exp/tri5a exp/tri5a_lats_nodup_sp || exit 1; rm exp/tri5a_lats_nodup_sp/fsts.*.gz # save space fi @@ -72,7 +72,7 @@ if [ $stage -le 11 ]; then # Build a tree using our new topology. steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ --leftmost-questions-truncate -1 \ - --cmd "$train_cmd" 11000 data/$train_set $lang $ali_dir $treedir + --cmd "$train_cmd" 11000 data/$train_set $lang $ali_dir $treedir || exit 1; fi if [ $stage -le 12 ]; then From 5c74b2c6b87651ce145d2638d1406a093d0b997f Mon Sep 17 00:00:00 2001 From: Xingyu Na Date: Tue, 3 Jan 2017 13:28:18 +0800 Subject: [PATCH 197/530] fix: chain do not need sp align (#1304) --- egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh | 7 ++++--- egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh | 8 +++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh index 59e31299bc6..b4b830bd57b 100644 --- a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh +++ b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh @@ -37,8 +37,9 @@ EOF fi dir=$dir${affix:+_$affix} +build_tree_train_set=train_nodup train_set=train_nodup_sp -ali_dir=exp/tri5a_ali_nodup_sp +build_tree_ali_dir=exp/tri5a_ali treedir=exp/chain/tri6_tree lang=data/lang_chain @@ -52,7 +53,7 @@ local/nnet3/run_ivector_common.sh --stage $stage \ if [ $stage -le 9 ]; then # Get the alignments as lattices (gives the CTC training more freedom). # use the same num-jobs as the alignments - nj=$(cat $ali_dir/num_jobs) || exit 1; + nj=$(cat $build_tree_ali_dir/num_jobs) || exit 1; steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/$train_set \ data/lang exp/tri5a exp/tri5a_lats_nodup_sp || exit 1; rm exp/tri5a_lats_nodup_sp/fsts.*.gz # save space @@ -75,7 +76,7 @@ if [ $stage -le 11 ]; then # Build a tree using our new topology. steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ --leftmost-questions-truncate -1 \ - --cmd "$train_cmd" 11000 data/$train_set $lang $ali_dir $treedir || exit 1; + --cmd "$train_cmd" 11000 data/$build_tree_train_set $lang $build_tree_ali_dir $treedir || exit 1; fi if [ $stage -le 12 ]; then diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh index f21fb307c92..cbf969e9ad2 100644 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh @@ -33,8 +33,10 @@ EOF fi dir=${dir}${affix:+_$affix} + train_set=train_nodup_sp -ali_dir=exp/tri5a_ali_nodup_sp +build_tree_train_set=train_nodup +build_tree_ali_dir=exp/tri5a_ali treedir=exp/chain/tri6_tree lang=data/lang_chain @@ -49,7 +51,7 @@ local/nnet3/run_ivector_common.sh --stage $stage \ if [ $stage -le 9 ]; then # Get the alignments as lattices (gives the chain training more freedom). # use the same num-jobs as the alignments - nj=$(cat $ali_dir/num_jobs) || exit 1; + nj=$(cat $build_tree_ali_dir/num_jobs) || exit 1; steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/$train_set \ data/lang exp/tri5a exp/tri5a_lats_nodup_sp || exit 1; rm exp/tri5a_lats_nodup_sp/fsts.*.gz # save space @@ -72,7 +74,7 @@ if [ $stage -le 11 ]; then # Build a tree using our new topology. steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ --leftmost-questions-truncate -1 \ - --cmd "$train_cmd" 11000 data/$train_set $lang $ali_dir $treedir || exit 1; + --cmd "$train_cmd" 11000 data/$build_tree_train_set $lang $build_tree_ali_dir $treedir || exit 1; fi if [ $stage -le 12 ]; then From b34e19277b0966a0819d37f5e7487ad86bb84416 Mon Sep 17 00:00:00 2001 From: Xingyu Na Date: Tue, 3 Jan 2017 16:33:36 +0800 Subject: [PATCH 198/530] [src] turn tabs in source code into spaces, per google style guide (#1305) --- src/chainbin/nnet3-chain-merge-egs.cc | 2 +- src/cudamatrix/cu-kernels.cu | 4 +- src/cudamatrix/cu-matrix-speed-test.cc | 29 +++--- src/cudamatrix/cu-matrix-test.cc | 20 ++-- src/cudamatrix/cu-matrix.cc | 52 +++++----- src/cudamatrix/cu-matrix.h | 19 ++-- src/cudamatrix/cu-vector.cc | 14 +-- src/cudamatrix/cublas-wrappers.h | 130 ++++++++++++++----------- src/fstbin/fstmakecontextsyms.cc | 19 ++-- src/fstbin/fstrmsymbols.cc | 14 +-- src/ivector/ivector-extractor.h | 13 ++- src/lm/arpa-file-parser-test.cc | 16 +-- src/matrix/kaldi-matrix.cc | 5 +- src/nnet2/nnet-component.cc | 110 ++++++++++----------- src/nnet3/nnet-general-component.cc | 6 +- src/nnet3/nnet-simple-component.cc | 21 ++-- src/nnet3/nnet-simple-component.h | 6 +- src/nnet3bin/nnet3-merge-egs.cc | 2 +- src/util/basic-filebuf.h | 2 +- 19 files changed, 251 insertions(+), 233 deletions(-) diff --git a/src/chainbin/nnet3-chain-merge-egs.cc b/src/chainbin/nnet3-chain-merge-egs.cc index 543ad3aa049..249b5cec0c0 100644 --- a/src/chainbin/nnet3-chain-merge-egs.cc +++ b/src/chainbin/nnet3-chain-merge-egs.cc @@ -85,7 +85,7 @@ int main(int argc, char *argv[]) { num_read++; if (minibatch_ready || (!discard_partial_minibatches && - (example_reader.Done() && !examples.empty()))) { + (example_reader.Done() && !examples.empty()))) { NnetChainExample merged_eg; MergeChainExamples(compress, &examples, &merged_eg); std::ostringstream ostr; diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index 795b4321413..505c6f7f67f 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -41,7 +41,7 @@ static Real _sum_reduce(Real buffer[]) { __syncthreads(); // perform tree-based reduction (sum) while (nTotalThreads > 1) { - int32_cuda halfPoint = ((1 + nTotalThreads) >> 1); // divide by two + int32_cuda halfPoint = ((1 + nTotalThreads) >> 1); // divide by two // only the first half of the threads will be active. if (threadIdx.x >= halfPoint) { // was < // Get the shared value stored by another thread @@ -52,7 +52,7 @@ static Real _sum_reduce(Real buffer[]) { buffer[threadIdx.x - halfPoint] += temp; } __syncthreads(); - nTotalThreads = ((1 + nTotalThreads) >> 1); // divide by two. + nTotalThreads = ((1 + nTotalThreads) >> 1); // divide by two. } // the result return buffer[0]; diff --git a/src/cudamatrix/cu-matrix-speed-test.cc b/src/cudamatrix/cu-matrix-speed-test.cc index 12b274dc4dc..37257522fa8 100644 --- a/src/cudamatrix/cu-matrix-speed-test.cc +++ b/src/cudamatrix/cu-matrix-speed-test.cc @@ -164,8 +164,8 @@ template void TestCuMatrixTransposeCross(int32 dim) { AssertEqual(ref, Mf); } -template void TestCuMatrixAddMat(int32 dim, - int32 num_row_blocks, int32 num_col_blocks) { +template void TestCuMatrixAddMat(int32 dim, int32 num_row_blocks, + int32 num_col_blocks) { BaseFloat time_in_secs = 0.025; CuMatrix A(dim, dim), B(dim * num_row_blocks, dim * num_col_blocks); A.SetRandn(); @@ -181,14 +181,15 @@ template void TestCuMatrixAddMat(int32 dim, } BaseFloat fdim = dim; BaseFloat gflops = (fdim * fdim * num_row_blocks * num_col_blocks * iter) - / (tim.Elapsed() * 1.0e+09); + / (tim.Elapsed() * 1.0e+09); KALDI_LOG << "For CuMatrix::AddMat" << NameOf() << ", for dim = " - << dim << "numRowBlocks = "<< num_row_blocks << "numColBlocks = " - << num_col_blocks << ", speed was " << gflops << " gigaflops."; + << dim << "numRowBlocks = "<< num_row_blocks << "numColBlocks = " + << num_col_blocks << ", speed was " << gflops << " gigaflops."; } template void TestCuMatrixAddMatBlocks(int32 dim, - int32 num_row_blocks, int32 num_col_blocks) { + int32 num_row_blocks, + int32 num_col_blocks) { BaseFloat time_in_secs = 0.025; CuMatrix A(dim, dim), B(dim * num_row_blocks, dim * num_col_blocks); A.SetRandn(); @@ -200,10 +201,10 @@ template void TestCuMatrixAddMatBlocks(int32 dim, } BaseFloat fdim = dim; BaseFloat gflops = (fdim * fdim * num_row_blocks * num_col_blocks * iter) - / (tim.Elapsed() * 1.0e+09); + / (tim.Elapsed() * 1.0e+09); KALDI_LOG << "For CuMatrix::AddMatBlocks" << NameOf() << ", for dim = " - << dim << ", numRowBlocks = "<< num_row_blocks << ", numColBlocks = " - << num_col_blocks << ", speed was " << gflops << " gigaflops."; + << dim << ", numRowBlocks = "<< num_row_blocks << ", numColBlocks = " + << num_col_blocks << ", speed was " << gflops << " gigaflops."; } template void TestCuMatrixMatMat(int32 dim) { @@ -235,18 +236,18 @@ template void TestCuMatrixMatMatBatched(int32 dim, int32 batchCou a[i]->SetRandn(); b[i]->SetRandn(); A.push_back(new CuSubMatrix(*(a[i]), 0, a[i]->NumRows(), 0, - a[i]->NumCols())); + a[i]->NumCols())); B.push_back(new CuSubMatrix(*(b[i]), 0, b[i]->NumRows(), 0, - b[i]->NumCols())); + b[i]->NumCols())); C.push_back(new CuSubMatrix(*(c[i]), 0, c[i]->NumRows(), 0, - c[i]->NumCols())); + c[i]->NumCols())); } BaseFloat time_in_secs = 0.025; Timer tim; int32 iter = 0; for (;tim.Elapsed() < time_in_secs; iter++) { AddMatMatBatched(static_cast(1.0), C, A, kNoTrans, B, kNoTrans, - static_cast(0.0)); + static_cast(0.0)); } for (int32 i = 0; i< batchCount; i++) { delete a[i]; delete b[i]; delete c[i]; @@ -256,7 +257,7 @@ template void TestCuMatrixMatMatBatched(int32 dim, int32 batchCou BaseFloat fdim = dim; BaseFloat gflops = (fdim * fdim * fdim * iter * batchCount) / (tim.Elapsed() * 1.0e+09); KALDI_LOG << "For CuMatrix::AddMatMatBatched" << NameOf() << ", for dim = " << dim - << ", batchSize = " << batchCount << ", speed was " << gflops << " gigaflops."; + << ", batchSize = " << batchCount << ", speed was " << gflops << " gigaflops."; } template void TestCuMatrixAddDiagVecMat(int32 dim, MatrixTransposeType trans) { diff --git a/src/cudamatrix/cu-matrix-test.cc b/src/cudamatrix/cu-matrix-test.cc index da587e450e3..72abace138d 100644 --- a/src/cudamatrix/cu-matrix-test.cc +++ b/src/cudamatrix/cu-matrix-test.cc @@ -1435,13 +1435,13 @@ static void UnitTestCuMatrixAddMatMatBatched() { Ha[i]->SetRandn(); Hb[i]->SetRandn(); HA.push_back(new SubMatrix(*(Ha[i]), 0, Ha[i]->NumRows(), 0, - Ha[i]->NumCols())); + Ha[i]->NumCols())); HB.push_back(new SubMatrix(*(Hb[i]), 0, Hb[i]->NumRows(), 0, - Hb[i]->NumCols())); + Hb[i]->NumCols())); HC1.push_back(new SubMatrix(*(Hc1[i]), 0, Hc1[i]->NumRows(), 0, - Hc1[i]->NumCols())); + Hc1[i]->NumCols())); HC2.push_back(new SubMatrix(*(Hc2[i]), 0, Hc2[i]->NumRows(), 0, - Hc2[i]->NumCols())); + Hc2[i]->NumCols())); // first create a CuMatrix intance and then creat a CuSubMatrix instance from that Da[i] = new CuMatrix(200, 100); @@ -1451,19 +1451,19 @@ static void UnitTestCuMatrixAddMatMatBatched() { Da[i]->CopyFromMat(*(Ha[i])); Db[i]->CopyFromMat(*(Hb[i])); DA.push_back(new CuSubMatrix(*(Da[i]), 0, Da[i]->NumRows(), 0, - Da[i]->NumCols())); + Da[i]->NumCols())); DB.push_back(new CuSubMatrix(*(Db[i]), 0, Db[i]->NumRows(), 0, - Db[i]->NumCols())); + Db[i]->NumCols())); DC1.push_back(new CuSubMatrix(*(Dc1[i]), 0, Dc1[i]->NumRows(), 0, - Dc1[i]->NumCols())); + Dc1[i]->NumCols())); DC2.push_back(new CuSubMatrix(*(Dc2[i]), 0, Dc2[i]->NumRows(), 0, - Dc2[i]->NumCols())); + Dc2[i]->NumCols())); } AddMatMatBatched(static_cast(0.5f), DC1, DA, kNoTrans, DB, kNoTrans, - static_cast(0.0f)); + static_cast(0.0f)); AddMatMatBatched(static_cast(0.5f), DC2, DA, kTrans, DB, kTrans, - static_cast(0.0f)); + static_cast(0.0f)); // used to store results from DC1 and DC2 for equality check Matrix Hca1(200,200); diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc index f16b7f0bf52..c5f41d5a944 100644 --- a/src/cudamatrix/cu-matrix.cc +++ b/src/cudamatrix/cu-matrix.cc @@ -942,7 +942,7 @@ void CuMatrixBase::AddMat(Real alpha, const CuMatrixBase& A, template void CuMatrixBase::AddMatBlocks(Real alpha, const CuMatrixBase &A, - MatrixTransposeType transA) { + MatrixTransposeType transA) { if (num_rows_ == 0 || num_cols_ == 0) return; int32 num_row_blocks, num_col_blocks; if (transA == kNoTrans) { @@ -961,8 +961,8 @@ void CuMatrixBase::AddMatBlocks(Real alpha, const CuMatrixBase &A, GetBlockSizesForSimpleMatrixOperation(NumRows(), NumCols(), &dimGrid, &dimBlock); cuda_add_mat_blocks(dimGrid, dimBlock, alpha, A.data_, num_row_blocks, - num_col_blocks, data_, Dim(), A.Stride(), - (transA == kTrans ? 1 : 0)); + num_col_blocks, data_, Dim(), A.Stride(), + (transA == kTrans ? 1 : 0)); CU_SAFE_CALL(cudaGetLastError()); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); @@ -980,7 +980,7 @@ void CuMatrixBase::AddMatBlocks(Real alpha, const CuMatrixBase &A, for (int32 i = 0; i < num_row_blocks; i++) { for (int32 j = 0; j < num_col_blocks; j++) { Mat().AddMat(alpha, SubMatrix(A.Mat(), i * nr, nr, j * nc, nc), - transA); + transA); } } } @@ -1097,10 +1097,10 @@ void CuMatrixBase::AddMatMat( if (CuDevice::Instantiate().Enabled()) { Timer tim; CU_SAFE_CALL(cublas_gemm(GetCublasHandle(), - (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), - (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), - m, n, k, alpha, B.data_, B.Stride(), - A.data_, A.Stride(), beta, data_, Stride())); + (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), + (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), + m, n, k, alpha, B.data_, B.Stride(), + A.data_, A.Stride(), beta, data_, Stride())); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else @@ -1151,8 +1151,8 @@ void CuMatrixBase::SymAddMat2( cublasOperation_t trans = (transA == kTrans ? CUBLAS_OP_N : CUBLAS_OP_T); MatrixIndexT A_other_dim = (transA == kNoTrans ? A.num_cols_ : A.num_rows_); CU_SAFE_CALL(cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, trans, - num_rows_, A_other_dim, alpha, A.Data(), - A.Stride(), beta, this->data_, this->stride_)); + num_rows_, A_other_dim, alpha, A.Data(), A.Stride(), + beta, this->data_, this->stride_)); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else @@ -1977,9 +1977,10 @@ double TraceMatMat(const CuMatrixBase &A, template void AddMatMatBatched(const Real alpha, std::vector* > &C, - const std::vector* > &A, MatrixTransposeType transA, - const std::vector* > &B, MatrixTransposeType transB, - const Real beta) { + const std::vector* > &A, + MatrixTransposeType transA, + const std::vector* > &B, + MatrixTransposeType transB, const Real beta) { KALDI_ASSERT(A.size() == B.size() && B.size() == C.size()); int32 size = A.size(); @@ -2032,11 +2033,12 @@ void AddMatMatBatched(const Real alpha, std::vector* > &C, CU_SAFE_CALL(cudaMemcpy(device_abc_array, host_abc_array, 3*size*sizeof(Real*), cudaMemcpyHostToDevice)); CU_SAFE_CALL(cublas_gemmBatched(GetCublasHandle(), - (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), - (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), - m, n, k, alpha, device_b_array, B[0]->Stride(), - device_a_array, A[0]->Stride(), beta, - device_c_array, C[0]->Stride(), size)); + (transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), + (transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N), + m, n, k, alpha, device_b_array, + B[0]->Stride(), device_a_array, + A[0]->Stride(), beta, device_c_array, + C[0]->Stride(), size)); CuDevice::Instantiate().Free(device_abc_array); delete[] host_abc_array; @@ -2053,15 +2055,17 @@ void AddMatMatBatched(const Real alpha, std::vector* > &C, template void AddMatMatBatched(const float alpha, std::vector* > &C, - const std::vector* > &A, MatrixTransposeType transA, - const std::vector* > &B, MatrixTransposeType transB, - const float beta); + const std::vector* > &A, + MatrixTransposeType transA, + const std::vector* > &B, + MatrixTransposeType transB, const float beta); template void AddMatMatBatched(const double alpha, std::vector* > &C, - const std::vector* > &A, MatrixTransposeType transA, - const std::vector* > &B, MatrixTransposeType transB, - const double beta); + const std::vector* > &A, + MatrixTransposeType transA, + const std::vector* > &B, + MatrixTransposeType transB, const double beta); template void CuMatrixBase::CopyRowsFromVec(const CuVectorBase &v) { diff --git a/src/cudamatrix/cu-matrix.h b/src/cudamatrix/cu-matrix.h index 38a6c25071b..4601080ad37 100644 --- a/src/cudamatrix/cu-matrix.h +++ b/src/cudamatrix/cu-matrix.h @@ -51,9 +51,11 @@ Real TraceMatMat(const CuMatrixBase &A, const CuMatrixBase &B, /// C[i] = alpha * A[i](^T)*B[i](^T) + beta * C[i]. template void AddMatMatBatched(const Real alpha, std::vector* > &C, - const std::vector* > &A, MatrixTransposeType transA, - const std::vector* > &B, MatrixTransposeType transB, - const Real beta); + const std::vector* > &A, + MatrixTransposeType transA, + const std::vector* > &B, + MatrixTransposeType transB, + const Real beta); /** * Matrix for CUDA computing. @@ -182,10 +184,13 @@ class CuMatrixBase { const CuSparseMatrix &B, MatrixTransposeType trans); - friend void AddMatMatBatched(const Real alpha, std::vector* > &C, - const std::vector* > &A, MatrixTransposeType transA, - const std::vector* > &B, MatrixTransposeType transB, - const Real beta); + friend void AddMatMatBatched(const Real alpha, + std::vector* > &C, + const std::vector* > &A, + MatrixTransposeType transA, + const std::vector* > &B, + MatrixTransposeType transB, + const Real beta); /// Adds "value" to the diagonal elements of the matrix. The matrix /// *this does not have to be square. diff --git a/src/cudamatrix/cu-vector.cc b/src/cudamatrix/cu-vector.cc index c91a49ca2e4..1a0eefa7019 100644 --- a/src/cudamatrix/cu-vector.cc +++ b/src/cudamatrix/cu-vector.cc @@ -50,7 +50,7 @@ Real VecVec(const CuVectorBase &a, if (CuDevice::Instantiate().Enabled()) { Timer tim; CU_SAFE_CALL(cublas_dot(GetCublasHandle(), a.Dim(), a.Data(), 1, b.Data(), - 1, &result)); + 1, &result)); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif @@ -444,9 +444,9 @@ void CuVectorBase::AddMatVec(const Real alpha, // Everything is backwards in CuBlas. We need to reverse rows, columns, // transpose-ness. CU_SAFE_CALL(cublas_gemv(GetCublasHandle(), - (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T), - M.NumCols(), M.NumRows(), alpha, M.Data(), - M.Stride(), v.Data(), 1, beta, data_, 1)); + (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T), + M.NumCols(), M.NumRows(), alpha, M.Data(), + M.Stride(), v.Data(), 1, beta, data_, 1)); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else @@ -471,7 +471,7 @@ void CuVectorBase::AddSpVec(const Real alpha, // Note: in our opinion the CuSpMatrix represents a lower-triangular matrix, but // in CUBLAS, for some stupid reason, everything is reversed. CU_SAFE_CALL(cublas_spmv(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, Dim(), - alpha, M.Data(), v.Data(), 1, beta, data_, 1)); + alpha, M.Data(), v.Data(), 1, beta, data_, 1)); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else @@ -636,7 +636,7 @@ void CuVectorBase::MulTp(const CuTpMatrix &M, const MatrixTransposeT if (dim_ == 0) return; Timer tim; cublas_tpmv(GetCublasHandle(), (trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T), - M.NumRows(), M.Data(), data_, 1); + M.NumRows(), M.Data(), data_, 1); CuDevice::Instantiate().AccuProfile("CuVectorBase::MulTp", tim.Elapsed()); } else #endif @@ -1081,7 +1081,7 @@ void CuVectorBase::CopyDiagFromMat(const CuMatrix &M) { KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols())); Timer tim; CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, M.Data(), M.Stride() + 1, - data_, 1)); + data_, 1)); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else diff --git a/src/cudamatrix/cublas-wrappers.h b/src/cudamatrix/cublas-wrappers.h index 69a591240a5..b8ea7c8b2c6 100644 --- a/src/cudamatrix/cublas-wrappers.h +++ b/src/cudamatrix/cublas-wrappers.h @@ -25,79 +25,89 @@ namespace kaldi { #if HAVE_CUDA == 1 -inline cublasStatus_t cublas_gemm(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n,int k, float alpha, - const float *A, int lda, const float *B, int ldb, float beta, - float *C, int ldc) { +inline cublasStatus_t cublas_gemm( + cublasHandle_t handle, cublasOperation_t transa, + cublasOperation_t transb, int m, int n,int k, float alpha, + const float *A, int lda, const float *B, int ldb, float beta, + float *C, int ldc) { return cublasSgemm_v2(handle,transa,transb,m,n,k,&alpha,A,lda,B,ldb,&beta,C,ldc); } -inline cublasStatus_t cublas_gemm(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n,int k, double alpha, - const double *A, int lda, const double *B, int ldb, double beta, - double *C, int ldc) { +inline cublasStatus_t cublas_gemm( + cublasHandle_t handle, cublasOperation_t transa, + cublasOperation_t transb, int m, int n,int k, double alpha, + const double *A, int lda, const double *B, int ldb, double beta, + double *C, int ldc) { return cublasDgemm_v2(handle,transa,transb,m,n,k,&alpha,A,lda,B,ldb,&beta,C,ldc); } -inline cublasStatus_t cublas_ger(cublasHandle_t handle, int m, int n, float alpha, - const float *x, int incx, const float *y, int incy, float *A, int lda ) { +inline cublasStatus_t cublas_ger( + cublasHandle_t handle, int m, int n, float alpha, + const float *x, int incx, const float *y, int incy, float *A, int lda ) { return cublasSger_v2(handle,m,n,&alpha,x,incx,y,incy,A,lda); } inline cublasStatus_t cublas_ger(cublasHandle_t handle, int m, int n, double alpha, const double *x, int incx, const double *y, int incy, double *A, int lda ) { return cublasDger_v2(handle,m,n,&alpha,x,incx,y,incy,A,lda); } -inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, float alpha, - const float *A[], int lda, const float *B[], int ldb, float beta, - float *C[], int ldc, int batchCount) { +inline cublasStatus_t cublas_gemmBatched( + cublasHandle_t handle, cublasOperation_t transa, + cublasOperation_t transb, int m, int n, int k, float alpha, + const float *A[], int lda, const float *B[], int ldb, float beta, + float *C[], int ldc, int batchCount) { return cublasSgemmBatched(handle, transa, transb, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, batchCount); } -inline cublasStatus_t cublas_gemmBatched(cublasHandle_t handle, cublasOperation_t transa, - cublasOperation_t transb, int m, int n, int k, double alpha, - const double *A[], int lda, const double *B[], int ldb, double beta, - double *C[], int ldc, int batchCount) { +inline cublasStatus_t cublas_gemmBatched( + cublasHandle_t handle, cublasOperation_t transa, + cublasOperation_t transb, int m, int n, int k, double alpha, + const double *A[], int lda, const double *B[], int ldb, double beta, + double *C[], int ldc, int batchCount) { return cublasDgemmBatched(handle, transa, transb, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, batchCount); } -inline cublasStatus_t cublas_trsm(cublasHandle_t handle, int m, int n, float alpha, - const float* A, int lda, float* B, int ldb) { +inline cublasStatus_t cublas_trsm(cublasHandle_t handle, int m, int n, + float alpha, const float* A, int lda, + float* B, int ldb) { return cublasStrsm_v2(handle,CUBLAS_SIDE_LEFT,CUBLAS_FILL_MODE_UPPER,CUBLAS_OP_N,CUBLAS_DIAG_NON_UNIT,m,n,&alpha,A,lda,B,ldb); } -inline cublasStatus_t cublas_trsm(cublasHandle_t handle, int m, int n, double alpha, - const double* A, int lda, double* B, int ldb) { +inline cublasStatus_t cublas_trsm(cublasHandle_t handle, int m, int n, + double alpha, const double* A, int lda, + double* B, int ldb) { return cublasDtrsm_v2(handle,CUBLAS_SIDE_LEFT,CUBLAS_FILL_MODE_UPPER,CUBLAS_OP_N,CUBLAS_DIAG_NON_UNIT,m,n,&alpha,A,lda,B,ldb); } -inline cublasStatus_t cublas_syrk(cublasHandle_t handle, cublasFillMode_t uplo, - cublasOperation_t trans, int n, int k, float alpha, - const float *A, int lda, float beta, float *C, int ldc) { +inline cublasStatus_t cublas_syrk( + cublasHandle_t handle, cublasFillMode_t uplo, + cublasOperation_t trans, int n, int k, float alpha, + const float *A, int lda, float beta, float *C, int ldc) { return cublasSsyrk_v2(handle,uplo,trans,n,k,&alpha,A,lda,&beta,C,ldc); } -inline cublasStatus_t cublas_syrk(cublasHandle_t handle, cublasFillMode_t uplo, - cublasOperation_t trans, int n, int k, double alpha, - const double *A, int lda, double beta, double *C, int ldc) { +inline cublasStatus_t cublas_syrk( + cublasHandle_t handle, cublasFillMode_t uplo, + cublasOperation_t trans, int n, int k, double alpha, + const double *A, int lda, double beta, double *C, int ldc) { return cublasDsyrk_v2(handle,uplo,trans,n,k,&alpha,A,lda,&beta,C,ldc); } inline cublasStatus_t cublas_dot(cublasHandle_t handle, int n, const float *x, - int incx, const float *y, int incy, float *result) { + int incx, const float *y, int incy, + float *result) { return cublasSdot_v2(handle, n, x, incx, y, incy, result); } inline cublasStatus_t cublas_dot(cublasHandle_t handle, int n, const double *x, - int incx, const double *y, int incy, double *result) { + int incx, const double *y, int incy, + double *result) { return cublasDdot_v2(handle, n, x, incx, y, incy, result); } inline cublasStatus_t cublas_asum(cublasHandle_t handle, int n, const float* x, - int incx, float *result) { + int incx, float *result) { return cublasSasum_v2(handle, n, x, incx, result); } inline cublasStatus_t cublas_asum(cublasHandle_t handle, int n, const double* x, - int incx, double *result) { + int incx, double *result) { return cublasDasum_v2(handle, n, x, incx, result); } inline cublasStatus_t cublas_nrm2(cublasHandle_t handle, int n, const float* x, - int incx, float *result) { + int incx, float *result) { return cublasSnrm2_v2(handle, n, x, incx, result); - } inline cublasStatus_t cublas_nrm2(cublasHandle_t handle, int n, const double* x, - int incx, double *result) { + int incx, double *result) { return cublasDnrm2_v2(handle, n, x, incx, result); } inline cudaError_t cublas_copy(cublasHandle_t handle, int n, const float* x, @@ -115,49 +125,53 @@ inline cudaError_t cublas_copy(cublasHandle_t handle, int n, const double* x, return cudaGetLastError(); } inline cublasStatus_t cublas_copy(cublasHandle_t handle, int n, const float* x, - int incx, float* y, int incy) { + int incx, float* y, int incy) { return cublasScopy_v2(handle,n,x,incx,y,incy); } inline cublasStatus_t cublas_copy(cublasHandle_t handle, int n, const double* x, - int incx, double* y, int incy) { + int incx, double* y, int incy) { return cublasDcopy_v2(handle,n,x,incx,y,incy); } inline cublasStatus_t cublas_scal(cublasHandle_t handle, int n, float alpha, - float* mat, int incx) { + float* mat, int incx) { return cublasSscal_v2(handle, n, &alpha, mat, incx); } inline cublasStatus_t cublas_scal(cublasHandle_t handle, int n, double alpha, - double* mat, int incx) { + double* mat, int incx) { return cublasDscal_v2(handle, n, &alpha, mat, incx); } inline cublasStatus_t cublas_axpy(cublasHandle_t handle, int n, float alpha, - const float* x, int incx, float* y, int incy) { + const float* x, int incx, float* y, int incy) { return cublasSaxpy_v2(handle, n, &alpha, x, incx, y, incy); } inline cublasStatus_t cublas_axpy(cublasHandle_t handle, int n, double alpha, - const double* x, int incx, double* y, int incy) { + const double* x, int incx, double* y, int incy) { return cublasDaxpy_v2(handle, n, &alpha, x, incx, y, incy); } -inline cublasStatus_t cublas_gemv(cublasHandle_t handle, cublasOperation_t trans, - int m, int n, float alpha, const float* A, int lda, const float* x, - int incx, float beta, float* y, int incy) { +inline cublasStatus_t cublas_gemv( + cublasHandle_t handle, cublasOperation_t trans, + int m, int n, float alpha, const float* A, int lda, const float* x, + int incx, float beta, float* y, int incy) { return cublasSgemv_v2(handle,trans,m,n,&alpha,A,lda,x,incx,&beta,y,incy); } -inline cublasStatus_t cublas_gemv(cublasHandle_t handle, cublasOperation_t trans, - int m, int n, double alpha, const double* A, int lda, const double* x, - int incx, double beta, double* y, int incy) { +inline cublasStatus_t cublas_gemv( + cublasHandle_t handle, cublasOperation_t trans, + int m, int n, double alpha, const double* A, int lda, const double* x, + int incx, double beta, double* y, int incy) { return cublasDgemv_v2(handle,trans,m,n,&alpha,A,lda,x,incx,&beta,y,incy); } -inline cublasStatus_t cublas_spmv(cublasHandle_t handle, cublasFillMode_t uplo, - int n, float alpha, const float *AP, const float *x, int incx, - float beta, float *y, int incy) { +inline cublasStatus_t cublas_spmv( + cublasHandle_t handle, cublasFillMode_t uplo, + int n, float alpha, const float *AP, const float *x, int incx, + float beta, float *y, int incy) { return cublasSspmv_v2(handle, uplo, n, &alpha, AP, x, incx, &beta, y, incy); } -inline cublasStatus_t cublas_spmv(cublasHandle_t handle, cublasFillMode_t uplo, - int n, double alpha, const double *AP, const double *x, int incx, - double beta, double *y, int incy) { +inline cublasStatus_t cublas_spmv( + cublasHandle_t handle, cublasFillMode_t uplo, + int n, double alpha, const double *AP, const double *x, int incx, + double beta, double *y, int incy) { return cublasDspmv_v2(handle, uplo, n, &alpha, AP, x, incx, &beta, y, incy); } @@ -167,20 +181,22 @@ inline cublasStatus_t cublas_spmv(cublasHandle_t handle, cublasFillMode_t uplo, // row-by-row, but CUDA views the same layout as upper-triangular, // column-by-column. inline cublasStatus_t cublas_tpmv(cublasHandle_t handle, cublasOperation_t trans, - int n, const float* Ap, float* x, int incx) { + int n, const float* Ap, float* x, int incx) { return cublasStpmv_v2(handle, CUBLAS_FILL_MODE_UPPER, trans, CUBLAS_DIAG_NON_UNIT, n, Ap, x, incx); } inline cublasStatus_t cublas_tpmv(cublasHandle_t handle, cublasOperation_t trans, - int n, const double* Ap, double* x,int incx) { + int n, const double* Ap, double* x,int incx) { return cublasDtpmv_v2(handle, CUBLAS_FILL_MODE_UPPER, trans, CUBLAS_DIAG_NON_UNIT, n, Ap, x, incx); } inline cublasStatus_t cublas_spr(cublasHandle_t handle, cublasFillMode_t uplo, - int n, float alpha, const float *x, int incx, float *AP) { + int n, float alpha, const float *x, int incx, + float *AP) { return cublasSspr_v2(handle, uplo, n, &alpha, x, incx, AP); } inline cublasStatus_t cublas_spr(cublasHandle_t handle, cublasFillMode_t uplo, - int n, double alpha, const double *x, int incx, double *AP) { + int n, double alpha, const double *x, int incx, + double *AP) { return cublasDspr_v2(handle, uplo, n, &alpha, x, incx, AP); } diff --git a/src/fstbin/fstmakecontextsyms.cc b/src/fstbin/fstmakecontextsyms.cc index 9a12c7c05cd..e3c7d279053 100644 --- a/src/fstbin/fstmakecontextsyms.cc +++ b/src/fstbin/fstmakecontextsyms.cc @@ -35,14 +35,14 @@ fstrandgen C.fst | fstprint --isymbols=context_syms.txt --osymbols=phones.txt Example output: -0 1 #0 #0 -1 2 #-1 a -2 3 /a/a a -3 4 a/a/a a -4 5 #0 #0 -5 6 a/a/b b -6 7 a/b/ #$ -7 8 #1 #1 +0 1 #0 #0 +1 2 #-1 a +2 3 /a/a a +3 4 a/a/a a +4 5 #0 #0 +5 6 a/a/b b +6 7 a/b/ #$ +7 8 #1 #1 8 */ @@ -67,7 +67,7 @@ int main(int argc, char *argv[]) { po.Register("initial-disambig", &initial_disambig, "Name for special disambiguation symbol that occurs at start " "of context-dependent phone sequences"); - + po.Read(argc, argv); if (po.NumArgs() < 2 || po.NumArgs() > 3) { @@ -115,4 +115,3 @@ int main(int argc, char *argv[]) { return -1; } } - diff --git a/src/fstbin/fstrmsymbols.cc b/src/fstbin/fstrmsymbols.cc index 75f5ab18654..e713d9e149f 100644 --- a/src/fstbin/fstrmsymbols.cc +++ b/src/fstbin/fstrmsymbols.cc @@ -167,26 +167,26 @@ int main(int argc, char *argv[]) { ( echo "0 0 1 1"; echo " 0 0 3 2"; echo "0 0"; ) | fstcompile | fstrmsymbols "echo 3; echo 4|" | fstprint # should produce: - # 0 0 1 1 - # 0 0 0 2 + # 0 0 1 1 + # 0 0 0 2 # 0 ( echo "0 0 1 1"; echo " 0 0 3 2"; echo "0 0"; ) | fstcompile | fstrmsymbols --apply-to-output=true "echo 2; echo 3|" | fstprint # should produce: - # 0 0 1 1 - # 0 0 3 0 + # 0 0 1 1 + # 0 0 3 0 # 0 ( echo "0 0 1 1"; echo " 0 0 3 2"; echo "0 0"; ) | fstcompile | fstrmsymbols --remove-arcs=true "echo 3; echo 4|" | fstprint # should produce: - # 0 0 1 1 + # 0 0 1 1 # 0 ( echo "0 0 1 1"; echo " 0 0 3 2"; echo "0 0"; ) | fstcompile | fstrmsymbols --penalty=2 "echo 3; echo 4; echo 5|" | fstprint # should produce: - # 0 0 1 1 - # 0 0 3 2 2 + # 0 0 1 1 + # 0 0 3 2 2 # 0 */ diff --git a/src/ivector/ivector-extractor.h b/src/ivector/ivector-extractor.h index 1a9f01027b5..9ae5cf8e046 100644 --- a/src/ivector/ivector-extractor.h +++ b/src/ivector/ivector-extractor.h @@ -367,12 +367,12 @@ class OnlineIvectorEstimationStats { // Override the default assignment operator inline OnlineIvectorEstimationStats &operator=(const OnlineIvectorEstimationStats &other) { - this->prior_offset_ = other.prior_offset_; - this->max_count_ = other.max_count_; - this->num_frames_ = other.num_frames_; - this->quadratic_term_=other.quadratic_term_; - this->linear_term_=other.linear_term_; - return *this; + this->prior_offset_ = other.prior_offset_; + this->max_count_ = other.max_count_; + this->num_frames_ = other.num_frames_; + this->quadratic_term_=other.quadratic_term_; + this->linear_term_=other.linear_term_; + return *this; } protected: @@ -689,4 +689,3 @@ class IvectorExtractorStats { #endif - diff --git a/src/lm/arpa-file-parser-test.cc b/src/lm/arpa-file-parser-test.cc index 5b5421873c4..be69ddc6bf2 100644 --- a/src/lm/arpa-file-parser-test.cc +++ b/src/lm/arpa-file-parser-test.cc @@ -220,18 +220,18 @@ ngram 2=2\n\ ngram 3=2\n\ \n\ \\1-grams: \n\ --5.2 a -3.3\n\ --3.4 \xCE\xB2\n\ -0.0 -2.5\n\ --4.3 \n\ +-5.2 a -3.3\n\ +-3.4 \xCE\xB2\n\ +0.0 -2.5\n\ +-4.3 \n\ \n\ \\2-grams:\t\n\ --1.5 a \xCE\xB2 -3.2\n\ --1.3 a -4.2\n\ +-1.5 a \xCE\xB2 -3.2\n\ +-1.3 a -4.2\n\ \n\ \\3-grams:\n\ --0.3 a \xCE\xB2\n\ --0.2 a \n\ +-0.3 a \xCE\xB2\n\ +-0.2 a \n\ \\end\\"; // Symbol table that is created with predefined test symbols, "a" but no "b". diff --git a/src/matrix/kaldi-matrix.cc b/src/matrix/kaldi-matrix.cc index 34003e8a550..523af1d70ec 100644 --- a/src/matrix/kaldi-matrix.cc +++ b/src/matrix/kaldi-matrix.cc @@ -1443,7 +1443,7 @@ void Matrix::Read(std::istream & is, bool binary, bool add) { delete cur_row; for (size_t i = 0; i < data.size(); i++) if(data[i] != NULL) - delete data[i]; + delete data[i]; // and then go on to "bad" below, where we print error. } bad: @@ -2643,7 +2643,7 @@ void MatrixBase::AddCols(const MatrixBase &src, const MatrixIndexT *index_ptr = &(indices[0]); for (MatrixIndexT c = 0; c < num_cols; c++, index_ptr++) { if (*index_ptr >= 0) - this_data[c] += src_data[*index_ptr]; + this_data[c] += src_data[*index_ptr]; } } } @@ -2859,4 +2859,3 @@ template class SubMatrix; template class SubMatrix; } // namespace kaldi - diff --git a/src/nnet2/nnet-component.cc b/src/nnet2/nnet-component.cc index f807529159e..27ce3111b74 100644 --- a/src/nnet2/nnet-component.cc +++ b/src/nnet2/nnet-component.cc @@ -447,7 +447,7 @@ void MaxoutComponent::Propagate(const ChunkInfo &in_info, in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); - out->GroupMax(in); + out->GroupMax(in); } void MaxoutComponent::Backprop(const ChunkInfo &, // in_info, @@ -455,7 +455,7 @@ void MaxoutComponent::Backprop(const ChunkInfo &, // in_info, const CuMatrixBase &in_value, const CuMatrixBase &out_value, const CuMatrixBase &out_deriv, - Component *to_update, + Component *to_update, CuMatrix *in_deriv) const { in_deriv->Resize(in_value.NumRows(), in_value.NumCols(), kSetZero); in_deriv->GroupMaxDeriv(in_value, out_value); @@ -518,7 +518,7 @@ void PnormComponent::Propagate(const ChunkInfo &in_info, in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); - + out->GroupPnorm(in, p_); } @@ -527,7 +527,7 @@ void PnormComponent::Backprop(const ChunkInfo &, // in_info, const CuMatrixBase &in_value, const CuMatrixBase &out_value, const CuMatrixBase &out_deriv, - Component *to_update, + Component *to_update, // may be identical to "this". CuMatrix *in_deriv) const { in_deriv->Resize(in_value.NumRows(), in_value.NumCols(), kSetZero); @@ -602,7 +602,7 @@ void NormalizeComponent::Backprop(const ChunkInfo &, // in_info, const CuMatrixBase &in_value, const CuMatrixBase &out_value, const CuMatrixBase &out_deriv, - Component *to_update, + Component *to_update, // may be identical to "this". CuMatrix *in_deriv) const { in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols()); @@ -629,7 +629,7 @@ void SigmoidComponent::Propagate(const ChunkInfo &in_info, in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); - + out->Sigmoid(in); } @@ -668,7 +668,7 @@ void TanhComponent::Propagate(const ChunkInfo &in_info, // Apply tanh function to each element of the output... // the tanh function may be written as -1 + ( 2 / (1 + e^{-2 x})), // which is a scaled and shifted sigmoid. - + in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); @@ -731,7 +731,7 @@ void PowerComponent::Propagate(const ChunkInfo &in_info, in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); - + // Apply power operation to each element of the input... out->CopyFromMat(in); out->ApplyPowAbs(power_); @@ -918,7 +918,7 @@ void SoftmaxComponent::Propagate(const ChunkInfo &in_info, in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); - + // Apply softmax function to each row of the output... // for that row, we do // x_i = exp(x_i) / sum_j exp(x_j). @@ -967,7 +967,7 @@ void LogSoftmaxComponent::Propagate(const ChunkInfo &in_info, in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); - + // Applies log softmax function to each row of the output. For each row, we do // x_i = x_i - log(sum_j exp(x_j)) out->ApplyLogSoftMaxPerRow(in); @@ -1173,7 +1173,7 @@ void AffineComponent::Propagate(const ChunkInfo &in_info, in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); - + // No need for asserts as they'll happen within the matrix operations. out->CopyRowsFromVec(bias_params_); // copies bias_params_ to each row // of *out. @@ -2351,7 +2351,7 @@ void PermuteComponent::Propagate(const ChunkInfo &in_info, in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); - + std::vector reverse_reorder(reorder_.size()); for (size_t i = 0; i < reorder_.size(); i++) reverse_reorder[reorder_[i]] = i; @@ -2458,7 +2458,7 @@ void SumGroupComponent::Propagate(const ChunkInfo &in_info, in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); - + out->SumColumnRanges(in, indexes_); } @@ -2535,7 +2535,7 @@ int32 ChunkInfo::GetIndex(int32 offset) const { KALDI_ASSERT((offset <= last_offset_) && (offset >= first_offset_)); return offset - first_offset_; } else { - std::vector::const_iterator iter = + std::vector::const_iterator iter = std::lower_bound(offsets_.begin(), offsets_.end(), offset); // make sure offset is present in the vector KALDI_ASSERT(iter != offsets_.end() && *iter == offset); @@ -2593,7 +2593,7 @@ void SpliceComponent::Propagate(const ChunkInfo &in_info, const CuMatrixBase &in, CuMatrixBase *out) const { - // Check the inputs are correct and resize output + // Check the inputs are correct and resize output in_info.Check(); out_info.Check(); in_info.CheckSize(in); @@ -2622,7 +2622,7 @@ void SpliceComponent::Propagate(const ChunkInfo &in_info, for (int32 chunk = 0; chunk < in_info.NumChunks(); chunk++) { if (chunk == 0) { - // this branch could be used for all chunks in the matrix, + // this branch could be used for all chunks in the matrix, // but is restricted to chunk 0 for efficiency reasons for (int32 c = 0; c < num_splice; c++) { for (int32 out_index = 0; out_index < out_chunk_size; out_index++) { @@ -2706,7 +2706,7 @@ void SpliceComponent::Backprop(const ChunkInfo &in_info, // row of "in" we copy the last part of each row of "out" from (this part is // not subject to splicing, it's assumed constant for each frame of "input". std::vector const_indexes(const_dim == 0 ? 0 : in_deriv->NumRows(), -1); - + for (int32 c = 0; c < indexes.size(); c++) indexes[c].resize(in_deriv->NumRows(), -1); // set to -1 by default, // this gets interpreted by the CopyRows() code @@ -2728,7 +2728,7 @@ void SpliceComponent::Backprop(const ChunkInfo &in_info, for (int32 c = 0; c < num_splice; c++) { for (int32 in_index = 0; in_index < in_chunk_size; in_index++) { int32 last_value = indexes[c][(chunk-1) * in_chunk_size + in_index]; - indexes[c][chunk * in_chunk_size + in_index] = + indexes[c][chunk * in_chunk_size + in_index] = (last_value == -1 ? -1 : last_value + out_chunk_size); } } @@ -3078,7 +3078,7 @@ void DctComponent::Propagate(const ChunkInfo &in_info, out_info.CheckSize(*out); KALDI_ASSERT(num_rows == out_info.NumRows()); KALDI_ASSERT(num_chunks * dct_keep_dim == out_info.NumCols()); - + CuMatrix in_tmp; if (reorder_) { in_tmp = in; @@ -3221,7 +3221,7 @@ void FixedLinearComponent::Propagate(const ChunkInfo &in_info, in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); - + out->AddMatMat(1.0, in, kNoTrans, mat_, kTrans, 0.0); } @@ -3306,7 +3306,7 @@ void FixedAffineComponent::Propagate(const ChunkInfo &in_info, in_info.CheckSize(in); out_info.CheckSize(*out); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); - + out->AddMatMat(1.0, in, kNoTrans, linear_params_, kTrans, 0.0); out->AddVecToRows(1.0, bias_params_); } @@ -3745,9 +3745,9 @@ void Convolutional1dComponent::Resize(int32 input_dim, int32 output_dim) { // display information about component std::string Convolutional1dComponent::Info() const { std::stringstream stream; - BaseFloat filter_params_size = static_cast(filter_params_.NumRows()) + BaseFloat filter_params_size = static_cast(filter_params_.NumRows()) * static_cast(filter_params_.NumCols()); - BaseFloat filter_stddev = + BaseFloat filter_stddev = std::sqrt(TraceMatMat(filter_params_, filter_params_, kTrans) / filter_params_size), bias_stddev = std::sqrt(VecVec(bias_params_, bias_params_) / @@ -3851,7 +3851,7 @@ void Convolutional1dComponent::Propagate(const ChunkInfo &in_info, */ CuMatrix patches(num_frames, filter_dim * num_patches, kUndefined); // column_map is indexed by the column-index of "patches", - // and the value is the corresponding column-index of "in". + // and the value is the corresponding column-index of "in". std::vector column_map(filter_dim * num_patches); // build-up a column selection map @@ -3877,25 +3877,24 @@ void Convolutional1dComponent::Propagate(const ChunkInfo &in_info, std::vector* > tgt_batch, patch_batch, filter_params_batch; CuSubMatrix* filter_params_elem = new CuSubMatrix( - filter_params_, 0, filter_params_.NumRows(), 0, - filter_params_.NumCols()); - + filter_params_, 0, filter_params_.NumRows(), 0, filter_params_.NumCols()); + // form batch in vector container for (int32 p = 0; p < num_patches; p++) { - // form batch in vector container. for filter_params_batch, all elements + // form batch in vector container. for filter_params_batch, all elements // point to the same copy filter_params_elem tgt_batch.push_back(new CuSubMatrix(out->ColRange(p * num_filters, - num_filters))); - patch_batch.push_back(new CuSubMatrix(patches.ColRange(p * filter_dim, - filter_dim))); + num_filters))); + patch_batch.push_back(new CuSubMatrix( + patches.ColRange(p * filter_dim, filter_dim))); filter_params_batch.push_back(filter_params_elem); tgt_batch[p]->AddVecToRows(1.0, bias_params_, 0.0); // add bias } - + // apply all filters - AddMatMatBatched(1.0, tgt_batch, patch_batch, kNoTrans, filter_params_batch, - kTrans, 1.0); + AddMatMatBatched(1.0, tgt_batch, patch_batch, kNoTrans, + filter_params_batch, kTrans, 1.0); // release memory delete filter_params_elem; @@ -4004,25 +4003,24 @@ void Convolutional1dComponent::Backprop(const ChunkInfo &in_info, // backpropagate to vector of matrices // (corresponding to position of a filter) // - std::vector* > patch_deriv_batch, out_deriv_batch, - filter_params_batch; + std::vector* > patch_deriv_batch, out_deriv_batch, + filter_params_batch; CuSubMatrix* filter_params_elem = new CuSubMatrix( - filter_params_, 0, filter_params_.NumRows(), 0, - filter_params_.NumCols()); + filter_params_, 0, filter_params_.NumRows(), 0, filter_params_.NumCols()); // form batch in vector container for (int32 p = 0; p < num_patches; p++) { - // form batch in vector container. for filter_params_batch, all elements + // form batch in vector container. for filter_params_batch, all elements // point to the same copy filter_params_elem patch_deriv_batch.push_back(new CuSubMatrix(patches_deriv.ColRange( - p * filter_dim, filter_dim))); + p * filter_dim, filter_dim))); out_deriv_batch.push_back(new CuSubMatrix(out_deriv.ColRange( - p * num_filters, num_filters))); - filter_params_batch.push_back(filter_params_elem); + p * num_filters, num_filters))); + filter_params_batch.push_back(filter_params_elem); } - AddMatMatBatched(1.0, patch_deriv_batch, out_deriv_batch, kNoTrans, - filter_params_batch, kNoTrans, 0.0); + AddMatMatBatched(1.0, patch_deriv_batch, out_deriv_batch, kNoTrans, + filter_params_batch, kNoTrans, 0.0); // release memory delete filter_params_elem; @@ -4217,27 +4215,27 @@ void Convolutional1dComponent::Update(const CuMatrixBase &in_value, // use all the patches // - // create a single large matrix holding the smaller matrices + // create a single large matrix holding the smaller matrices // from the vector container filters_grad_batch along the rows CuMatrix filters_grad_blocks_batch( - num_patches * filters_grad.NumRows(), filters_grad.NumCols()); + num_patches * filters_grad.NumRows(), filters_grad.NumCols()); - std::vector* > filters_grad_batch, diff_patch_batch, - patch_batch; + std::vector* > filters_grad_batch, diff_patch_batch, + patch_batch; for (int32 p = 0; p < num_patches; p++) { // form batch in vector container filters_grad_batch.push_back(new CuSubMatrix( - filters_grad_blocks_batch.RowRange( - p * filters_grad.NumRows(), - filters_grad.NumRows()))); + filters_grad_blocks_batch.RowRange( + p * filters_grad.NumRows(), + filters_grad.NumRows()))); diff_patch_batch.push_back(new CuSubMatrix(out_deriv.ColRange( - p * num_filters, num_filters))); + p * num_filters, num_filters))); patch_batch.push_back(new CuSubMatrix(patches.ColRange( - p * filter_dim, filter_dim))); + p * filter_dim, filter_dim))); } - AddMatMatBatched(1.0, filters_grad_batch, diff_patch_batch, kTrans, patch_batch, - kNoTrans, 1.0); + AddMatMatBatched(1.0, filters_grad_batch, diff_patch_batch, + kTrans, patch_batch, kNoTrans, 1.0); // add the row blocks together to filters_grad filters_grad.AddMatBlocks(1.0, filters_grad_blocks_batch); @@ -4254,7 +4252,7 @@ void Convolutional1dComponent::Update(const CuMatrixBase &in_value, for (int32 p = 0; p < num_patches; p++) { delete filters_grad_batch[p]; delete diff_patch_batch[p]; - delete patch_batch[p]; + delete patch_batch[p]; } // diff --git a/src/nnet3/nnet-general-component.cc b/src/nnet3/nnet-general-component.cc index b1a2d9327f8..c899f592af9 100644 --- a/src/nnet3/nnet-general-component.cc +++ b/src/nnet3/nnet-general-component.cc @@ -556,7 +556,7 @@ void StatisticsPoolingComponent::InitFromConfig(ConfigLine *cfl) { if (cfl->HasUnusedValues()) KALDI_ERR << "Could not process these elements in initializer: " - << cfl->UnusedValues(); + << cfl->UnusedValues(); // do some basic checks here but Check() will check more completely. if (!ok || input_dim_ <= 0 || left_context_ + right_context_ <= 0 || num_log_count_features_ < 0) @@ -1007,7 +1007,7 @@ void BackpropTruncationComponent::InitFromConfig(ConfigLine *cfl) { zeroing_interval, recurrence_interval); } -// virtual +// virtual Component* BackpropTruncationComponent::Copy() const { BackpropTruncationComponent *ans = new BackpropTruncationComponent(); ans->dim_ = dim_; @@ -1130,7 +1130,7 @@ void BackpropTruncationComponent::Backprop(const std::string &debug_info, zeroing_scales_vec.Add(1.0); // now the element of zeroing_scales_vec is 0.0 if we want to zero its // corresponding sample's gradient, and 1.0 otherwise - + // combines clipping_scales and zeroing_scales and applies combined_scales // to in_deriv all at once CuVector combined_scales(clipping_scales); diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index 58908a0fe09..0c036eb7f99 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -1514,7 +1514,7 @@ void RepeatedAffineComponent::InitFromConfig(ConfigLine *cfl) { num_repeats, param_stddev, bias_mean, bias_stddev); if (cfl->HasUnusedValues()) KALDI_ERR << "Could not process these elements in initializer: " - << cfl->UnusedValues(); + << cfl->UnusedValues(); if (!ok) KALDI_ERR << "Bad initializer " << cfl->WholeLine(); } @@ -3615,7 +3615,7 @@ void ConvolutionComponent::InitFromConfig(ConfigLine *cfl) { } if (cfl->HasUnusedValues()) KALDI_ERR << "Could not process these elements in initializer: " - << cfl->UnusedValues(); + << cfl->UnusedValues(); if (!ok) KALDI_ERR << "Bad initializer " << cfl->WholeLine(); } @@ -3704,8 +3704,7 @@ void ConvolutionComponent::Propagate(const ComponentPrecomputedIndexes *indexes, kUndefined); InputToInputPatches(in, &patches); CuSubMatrix* filter_params_elem = new CuSubMatrix( - filter_params_, 0, filter_params_.NumRows(), 0, - filter_params_.NumCols()); + filter_params_, 0, filter_params_.NumRows(), 0, filter_params_.NumCols()); std::vector* > tgt_batch, patch_batch, filter_params_batch; @@ -3859,10 +3858,9 @@ void ConvolutionComponent::Backprop(const std::string &debug_info, kSetZero); std::vector* > patch_deriv_batch, out_deriv_batch, - filter_params_batch; + filter_params_batch; CuSubMatrix* filter_params_elem = new CuSubMatrix( - filter_params_, 0, filter_params_.NumRows(), 0, - filter_params_.NumCols()); + filter_params_, 0, filter_params_.NumRows(), 0, filter_params_.NumCols()); for (int32 x_step = 0; x_step < num_x_steps; x_step++) { for (int32 y_step = 0; y_step < num_y_steps; y_step++) { @@ -3939,9 +3937,8 @@ void ConvolutionComponent::Update(const std::string &debug_info, for (int32 y_step = 0; y_step < num_y_steps; y_step++) { int32 patch_number = x_step * num_y_steps + y_step; filters_grad_batch.push_back(new CuSubMatrix( - filters_grad_blocks_batch.RowRange( - patch_number * filters_grad.NumRows(), - filters_grad.NumRows()))); + filters_grad_blocks_batch.RowRange( + patch_number * filters_grad.NumRows(), filters_grad.NumRows()))); input_patch_batch.push_back(new CuSubMatrix( input_patches.ColRange(patch_number * filter_dim, filter_dim))); @@ -4413,7 +4410,7 @@ void PermuteComponent::InitFromConfig(ConfigLine *cfl) { << column_map_str; if (cfl->HasUnusedValues()) KALDI_ERR << "Could not process these elements in initializer: " - << cfl->UnusedValues(); + << cfl->UnusedValues(); if (!ok) KALDI_ERR << "Invalid initializer for layer of type " << Type() << ": \"" << cfl->WholeLine() << "\""; @@ -5311,7 +5308,7 @@ void LstmNonlinearityComponent::InitFromConfig(ConfigLine *cfl) { if (cfl->HasUnusedValues()) KALDI_ERR << "Could not process these elements in initializer: " - << cfl->UnusedValues(); + << cfl->UnusedValues(); if (!ok) KALDI_ERR << "Invalid initializer for layer of type " << Type() << ": \"" << cfl->WholeLine() << "\""; diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h index f09a989759a..1106fdc3246 100644 --- a/src/nnet3/nnet-simple-component.h +++ b/src/nnet3/nnet-simple-component.h @@ -1543,7 +1543,7 @@ class ConvolutionComponent: public UpdatableComponent { virtual std::string Type() const { return "ConvolutionComponent"; } virtual int32 Properties() const { return kSimpleComponent|kUpdatableComponent|kBackpropNeedsInput| - kBackpropAdds|kPropagateAdds; + kBackpropAdds|kPropagateAdds; } virtual void Propagate(const ComponentPrecomputedIndexes *indexes, @@ -1600,7 +1600,7 @@ class ConvolutionComponent: public UpdatableComponent { void Resize(int32 input_dim, int32 output_dim); void Update(const std::string &debug_info, - const CuMatrixBase &in_value, + const CuMatrixBase &in_value, const CuMatrixBase &out_deriv); @@ -1872,7 +1872,7 @@ class MaxpoolingComponent: public Component { virtual std::string Type() const { return "MaxpoolingComponent"; } virtual int32 Properties() const { return kSimpleComponent|kBackpropNeedsInput|kBackpropNeedsOutput| - kBackpropAdds; + kBackpropAdds; } virtual void Propagate(const ComponentPrecomputedIndexes *indexes, diff --git a/src/nnet3bin/nnet3-merge-egs.cc b/src/nnet3bin/nnet3-merge-egs.cc index 6438653a802..0e40de8aeae 100644 --- a/src/nnet3bin/nnet3-merge-egs.cc +++ b/src/nnet3bin/nnet3-merge-egs.cc @@ -112,7 +112,7 @@ int main(int argc, char *argv[]) { num_read++; if (minibatch_ready || (!discard_partial_minibatches && - (example_reader.Done() && !examples.empty()))) { + (example_reader.Done() && !examples.empty()))) { NnetExample merged_eg; MergeExamples(examples, compress, &merged_eg); std::ostringstream ostr; diff --git a/src/util/basic-filebuf.h b/src/util/basic-filebuf.h index 1ea1fa8daa2..51cf12f406a 100644 --- a/src/util/basic-filebuf.h +++ b/src/util/basic-filebuf.h @@ -246,7 +246,7 @@ basic_filebuf::swap(basic_filebuf& rhs) { reinterpret_cast(_M_extbuf_min) + n, reinterpret_cast(_M_extbuf_min) + e); } else if (this->pbase() == - reinterpret_cast(rhs._M_extbuf_min)) { + reinterpret_cast(rhs._M_extbuf_min)) { ptrdiff_t n = this->pptr() - this->pbase(); ptrdiff_t e = this->epptr() - this->pbase(); this->setp(reinterpret_cast(_M_extbuf_min), From f51c984a24c769037e81671308b74efa33db264a Mon Sep 17 00:00:00 2001 From: Xingyu Na Date: Wed, 4 Jan 2017 10:06:30 +0800 Subject: [PATCH 199/530] [egs] add missing files in egs/fisher_swbd/s5, thanks @Stanley (#1313) --- egs/fisher_swbd/s5/conf/mfcc_hires.conf | 10 ++++++++++ egs/fisher_swbd/s5/conf/online_cmvn.conf | 1 + egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 egs/fisher_swbd/s5/conf/mfcc_hires.conf create mode 100644 egs/fisher_swbd/s5/conf/online_cmvn.conf diff --git a/egs/fisher_swbd/s5/conf/mfcc_hires.conf b/egs/fisher_swbd/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..d870ab04c38 --- /dev/null +++ b/egs/fisher_swbd/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=8000 # Switchboard is sampled at 8kHz +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800) diff --git a/egs/fisher_swbd/s5/conf/online_cmvn.conf b/egs/fisher_swbd/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/fisher_swbd/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh b/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh index 4d083d61d0e..43cdd2cef65 100644 --- a/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh +++ b/egs/fisher_swbd/s5/local/nnet3/run_ivector_common.sh @@ -92,7 +92,7 @@ for line in sys.stdin.readlines(): # Take the first 30k utterances (about 1/8th of the data) this will be used # for the diagubm training utils/subset_data_dir.sh --first data/${train_set}_hires 30000 data/${train_set}_30k_hires - local/remove_dup_utts.sh 200 data/${train_set}_30k_hires data/${train_set}_30k_nodup_hires # 33hr + utils/data/remove_dup_utts.sh 200 data/${train_set}_30k_hires data/${train_set}_30k_nodup_hires # 33hr fi # ivector extractor training From f9895446d01a0350d1b3756a0337ec03c7ecd206 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 4 Jan 2017 20:10:44 -0500 Subject: [PATCH 200/530] Various unrelated fixes to nnet3 code. --- src/nnet3/nnet-derivative-test.cc | 2 +- src/nnet3/nnet-optimize-utils.cc | 34 ++++++---- src/nnet3/nnet-optimize.cc | 73 +++++++++++----------- src/nnet3/nnet-optimize.h | 5 ++ src/nnet3bin/nnet3-latgen-faster-looped.cc | 14 +++-- src/nnet3bin/nnet3-latgen-faster.cc | 14 +++-- 6 files changed, 84 insertions(+), 58 deletions(-) diff --git a/src/nnet3/nnet-derivative-test.cc b/src/nnet3/nnet-derivative-test.cc index 4289b577a25..1f9e61e2b2a 100644 --- a/src/nnet3/nnet-derivative-test.cc +++ b/src/nnet3/nnet-derivative-test.cc @@ -416,8 +416,8 @@ void UnitTestNnetInputDerivatives() { int main() { using namespace kaldi; using namespace kaldi::nnet3; - // SetVerboseLevel(4); + // SetVerboseLevel(4); for (kaldi::int32 loop = 0; loop < 2; loop++) { #if HAVE_CUDA == 1 diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index 41f3acb3916..adcd5fe22f0 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -1984,14 +1984,14 @@ static void FindNumLeadingAndTrailingNegatives(const std::vector &vec, // at least one nonnegative number. while (*ptr2 < 0) ptr2--; - KALDI_ASSERT(ptr2 != begin); // would be code error. + KALDI_ASSERT(ptr2 >= begin); // or would be code error. *num_trailing_negatives = end - 1 - ptr2; } // This function, called from SnipRowOps, is called when it encounters commands -// of type kCopyRows or kAddRows; it modifies such commands when the indexes -// have leading or trailing -1's,h, to make them operate on a smaller submatrix. -// It returns true if it made a change, and false otherwise. +// of type kAddRows; it modifies such commands when the indexes have leading or +// trailing -1's,h, to make them operate on a smaller submatrix. It returns +// true if it made a change, and false otherwise. static bool SnipSingleRowOp(NnetComputation *computation, int32 command_index) { NnetComputation::Command &c = computation->commands[command_index]; @@ -2010,12 +2010,16 @@ static bool SnipSingleRowOp(NnetComputation *computation, std::vector new_indexes(indexes.begin() + num_leading_negatives, indexes.begin() + num_leading_negatives + new_num_rows); + KALDI_ASSERT(new_indexes.back() >= 0); // TEMP c.arg3 = computation->indexes.size(); computation->indexes.push_back(std::vector()); computation->indexes.back().swap(new_indexes); c.arg1 = computation->NewSubMatrix(c.arg1, num_leading_negatives, new_num_rows, 0, -1); + if (new_num_rows == 15) { + KALDI_LOG << "HERE"; // TEMP + } return true; // made a change. } @@ -2059,9 +2063,9 @@ static void FindNumLeadingAndTrailingNegatives( // This function, called from SnipRowOps, is called when it encounters commands -// of type kAddRowsMulti, kAddToRowsMulti, kCopyRowsMulti or kCopyToRowsMulti; -// have leading or trailing (-1,-1) pairs, to make them operate on a smaller -// submatrix. It returns true if it made a change, and false otherwise. +// of type kAddRowsMulti, kAddToRowsMulti, or kCopyToRowsMulti; have leading or +// trailing (-1,-1) pairs, to make them operate on a smaller submatrix. It +// returns true if it made a change, and false otherwise. static bool SnipMultiRowOp(NnetComputation *computation, int32 command_index) { NnetComputation::Command &c = computation->commands[command_index]; @@ -2093,7 +2097,7 @@ static bool SnipMultiRowOp(NnetComputation *computation, /* - This function, used in SnipRangeRowOp(), finds the number of leading, and + This function, used in SnipRangeRowOp(), finds the number of leading and trailing values in a vector of pairs of integers, that are the same (i.e. pairs of the form (x, x) for any x. [This is how we represent an empty range, which is a kind of no-op, in commands of kCopyRowRanges or @@ -2172,14 +2176,19 @@ bool SnipRowOps(NnetComputation *computation) { // non-const because we'll be changing it. NnetComputation::Command &c = computation->commands[command_index]; + // note: we can't do the snipping for commands of type case kCopyRows and case + // kCopyRowsMulti, because the -1's aren't a pure no-op; they have the + // meaning of setting the destination value to zero, so we can't prune + // them away. + switch (c.command_type) { - case kCopyRows: case kAddRows: { + case kAddRows: { if (SnipSingleRowOp(computation, command_index)) ans = true; break; } case kAddRowsMulti: case kAddToRowsMulti: - case kCopyRowsMulti: case kCopyToRowsMulti: { + case kCopyToRowsMulti: { if (SnipMultiRowOp(computation, command_index)) ans = true; break; @@ -2405,8 +2414,9 @@ void ComputationExpander::ExpandRowsCommand( num_n_values = num_n_values_, new_s1_size = expanded_computation_->submatrices[s1].num_rows, new_s2_size = expanded_computation_->submatrices[s2].num_rows; - KALDI_ASSERT(old_size % 2 == 0 && - old_size == computation_.submatrices[s1].num_rows); + + KALDI_ASSERT(old_size == computation_.submatrices[s1].num_rows); + new_indexes.resize(new_s1_size, -1); for (int32 i1 = 0; i1 < old_size; i1++) { diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index f024d68aed7..fcb0568dd5c 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -437,7 +437,9 @@ void Optimize(const NnetOptimizeOptions &config, if (GetVerboseLevel() >= 4) CheckComputation(nnet, *computation, true); - { // Call LimitDerivativeTimes(). + { // Call LimitDerivativeTimes(); it's important that this + // should come before other optimizations (search for "insist" in + // nnet-optimize-utils.cc for the reasons). // this will do nothing unless --min-deriv-time or --max-deriv-time // or --max-deriv-time-relative was set. int32 max_deriv_time = config.max_deriv_time; @@ -448,18 +450,18 @@ void Optimize(const NnetOptimizeOptions &config, max_deriv_time, computation); } - if (GetVerboseLevel() >= 4) + if (GetVerboseLevel() >= 3) CheckComputation(nnet, *computation, true); if (config.optimize && config.consolidate_model_update) ConsolidateModelUpdate(nnet, computation); - if (GetVerboseLevel() >= 4) + if (GetVerboseLevel() >= 3) CheckComputation(nnet, *computation, true); if (config.optimize && config.convert_addition) { ConvertAdditionToAssignment(nnet, computation); - if (GetVerboseLevel() >= 4) + if (GetVerboseLevel() >= 3) CheckComputation(nnet, *computation, true); } @@ -467,20 +469,19 @@ void Optimize(const NnetOptimizeOptions &config, (config.remove_assignments || config.backprop_in_place || config.propagate_in_place)) { VariableMergingOptimization(config, nnet, computation); - if (GetVerboseLevel() >= 4) + if (GetVerboseLevel() >= 3) CheckComputation(nnet, *computation, false); } - if (config.optimize && config.optimize_row_ops) { - if (ReplaceRowWithMatrixOps(computation)) { - // if anything was changed... - - // We have to call RenumberComputation() to get rid of any removed - // indexes... actually this could be a little wasteful, but unfortunately - // it doesn't seem like we'd otherwise be doing any renumbering past this - // point. + if (config.optimize && (config.snip_row_ops || config.optimize_row_ops)) { + bool must_renumber = false; + if (config.snip_row_ops && SnipRowOps(computation)) + must_renumber = true; + if (config.optimize_row_ops && ReplaceRowWithMatrixOps(computation)) + must_renumber = true; + if (must_renumber) { RenumberComputation(computation); - if (GetVerboseLevel() >= 4) + if (GetVerboseLevel() >= 3) CheckComputation(nnet, *computation, false); } } @@ -488,13 +489,13 @@ void Optimize(const NnetOptimizeOptions &config, if (config.optimize && config.initialize_undefined) { RemoveUnnecessaryZeroing(nnet, computation); - if (GetVerboseLevel() >= 4) - CheckComputation(nnet, *computation, false); + if (GetVerboseLevel() >= 3) + CheckComputation(nnet, *computation, false); } if (config.optimize && config.move_sizing_commands) { MoveSizingCommands(nnet, computation); - if (GetVerboseLevel() >= 4) + if (GetVerboseLevel() >= 3) CheckComputation(nnet, *computation, false); } @@ -503,7 +504,7 @@ void Optimize(const NnetOptimizeOptions &config, // because it's necessary for looped computation to run. if (config.optimize_looped_computation){ OptimizeLoopedComputation(nnet, computation); - if (GetVerboseLevel() >= 4) + if (GetVerboseLevel() >= 3) CheckComputation(nnet, *computation, false); } @@ -513,7 +514,7 @@ void Optimize(const NnetOptimizeOptions &config, // would be correct in that case, as written. In any case the performance // benefit is tiny. RemoveUnnecessaryAllocation(nnet, computation); - if (GetVerboseLevel() >= 4) + if (GetVerboseLevel() >= 3) CheckComputation(nnet, *computation, false); } @@ -526,7 +527,7 @@ void Optimize(const NnetOptimizeOptions &config, if (config.optimize_looped_computation) FixGotoLabel(computation); - if (GetVerboseLevel() >= 4) + if (GetVerboseLevel() >= 3) CheckComputation(nnet, *computation, false); } @@ -634,21 +635,23 @@ CachingOptimizingCompiler::~CachingOptimizingCompiler() { delete itr->first; delete itr->second.first; } - std::ostringstream os; - double seconds_taken_misc = seconds_taken_total_ - seconds_taken_compile_ - - seconds_taken_optimize_ - seconds_taken_expand_ - - seconds_taken_check_ - seconds_taken_indexes_; - os << std::setprecision(3) << seconds_taken_total_ - << " seconds taken in nnet3 compilation total (breakdown: " - << seconds_taken_compile_ << " compilation, " - << seconds_taken_optimize_ << " optimization, " - << seconds_taken_expand_ << " shortcut expansion, " - << seconds_taken_check_ << " checking, " - << seconds_taken_indexes_ << " computing indexes, " - << seconds_taken_misc << " misc.)"; - KALDI_LOG << os.str(); - // note: the leftover amount is misc things like hashing and == comparisons on - // computation-requests, and calling RequestIsDecomposable(). + if (seconds_taken_total_ > 0.0) { + std::ostringstream os; + double seconds_taken_misc = seconds_taken_total_ - seconds_taken_compile_ + - seconds_taken_optimize_ - seconds_taken_expand_ + - seconds_taken_check_ - seconds_taken_indexes_; + os << std::setprecision(3) << seconds_taken_total_ + << " seconds taken in nnet3 compilation total (breakdown: " + << seconds_taken_compile_ << " compilation, " + << seconds_taken_optimize_ << " optimization, " + << seconds_taken_expand_ << " shortcut expansion, " + << seconds_taken_check_ << " checking, " + << seconds_taken_indexes_ << " computing indexes, " + << seconds_taken_misc << " misc.)"; + KALDI_LOG << os.str(); + // note: the leftover amount is misc things like hashing and == comparisons on + // computation-requests, and calling RequestIsDecomposable(). + } } const NnetComputation* CachingOptimizingCompiler::Compile( diff --git a/src/nnet3/nnet-optimize.h b/src/nnet3/nnet-optimize.h index bbe5269c982..538dde2bbc1 100644 --- a/src/nnet3/nnet-optimize.h +++ b/src/nnet3/nnet-optimize.h @@ -48,6 +48,7 @@ struct NnetOptimizeOptions { int32 min_deriv_time; int32 max_deriv_time; int32 max_deriv_time_relative; + bool snip_row_ops; // optimize_looped_computation is a 'hidden config' not available from // the command line; it's set to true to enable the optimization for // looped computation that turns a linear computation into a loop. @@ -69,6 +70,7 @@ struct NnetOptimizeOptions { min_deriv_time(std::numeric_limits::min()), max_deriv_time(std::numeric_limits::max()), max_deriv_time_relative(std::numeric_limits::max()), + snip_row_ops(true), optimize_looped_computation(false) { } void Register(OptionsItf *opts) { @@ -118,6 +120,9 @@ struct NnetOptimizeOptions { "variable. If set, it is equivalent to setting the " "--max-deriv-time to this value plus the largest 't' value " "in any 'output' node of the computation request."); + opts->Register("snip-row-ops", &snip_row_ops, "Set this to false to " + "disable an optimization that reduces the size of certain " + "per-row operations"); } void Read(std::istream &is, bool binary); void Write(std::ostream &os, bool binary) const; diff --git a/src/nnet3bin/nnet3-latgen-faster-looped.cc b/src/nnet3bin/nnet3-latgen-faster-looped.cc index 9ad20fd8764..6e6f5af4410 100644 --- a/src/nnet3bin/nnet3-latgen-faster-looped.cc +++ b/src/nnet3bin/nnet3-latgen-faster-looped.cc @@ -183,7 +183,7 @@ int main(int argc, char *argv[]) { &lattice_writer, &like)) { tot_like += like; - frame_count += features.NumRows(); + frame_count += nnet_decodable.NumFramesReady(); num_success++; } else num_fail++; } @@ -241,20 +241,24 @@ int main(int argc, char *argv[]) { &alignment_writer, &words_writer, &compact_lattice_writer, &lattice_writer, &like)) { tot_like += like; - frame_count += features.NumRows(); + frame_count += nnet_decodable.NumFramesReady(); num_success++; } else num_fail++; } } + kaldi::int64 input_frame_count = + frame_count * decodable_opts.frame_subsampling_factor; + double elapsed = timer.Elapsed(); KALDI_LOG << "Time taken "<< elapsed << "s: real-time factor assuming 100 frames/sec is " - << (elapsed*100.0/frame_count); + << (elapsed * 100.0 / input_frame_count); KALDI_LOG << "Done " << num_success << " utterances, failed for " << num_fail; - KALDI_LOG << "Overall log-likelihood per frame is " << (tot_like/frame_count) << " over " - << frame_count<<" frames."; + KALDI_LOG << "Overall log-likelihood per frame is " + << (tot_like / frame_count) << " over " + << frame_count <<" frames."; delete word_syms; if (num_success != 0) return 0; diff --git a/src/nnet3bin/nnet3-latgen-faster.cc b/src/nnet3bin/nnet3-latgen-faster.cc index 5a090acb5b5..6bd5cd7c453 100644 --- a/src/nnet3bin/nnet3-latgen-faster.cc +++ b/src/nnet3bin/nnet3-latgen-faster.cc @@ -177,7 +177,7 @@ int main(int argc, char *argv[]) { &lattice_writer, &like)) { tot_like += like; - frame_count += features.NumRows(); + frame_count += nnet_decodable.NumFramesReady(); num_success++; } else num_fail++; } @@ -236,20 +236,24 @@ int main(int argc, char *argv[]) { &alignment_writer, &words_writer, &compact_lattice_writer, &lattice_writer, &like)) { tot_like += like; - frame_count += features.NumRows(); + frame_count += nnet_decodable.NumFramesReady(); num_success++; } else num_fail++; } } + kaldi::int64 input_frame_count = + frame_count * decodable_opts.frame_subsampling_factor; + double elapsed = timer.Elapsed(); KALDI_LOG << "Time taken "<< elapsed << "s: real-time factor assuming 100 frames/sec is " - << (elapsed*100.0/frame_count); + << (elapsed * 100.0 / input_frame_count); KALDI_LOG << "Done " << num_success << " utterances, failed for " << num_fail; - KALDI_LOG << "Overall log-likelihood per frame is " << (tot_like/frame_count) << " over " - << frame_count<<" frames."; + KALDI_LOG << "Overall log-likelihood per frame is " + << (tot_like / frame_count) << " over " + << frame_count << " frames."; delete word_syms; if (num_success != 0) return 0; From b8db0d3fdccd93781549752f0138df08c3ab4e94 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 5 Jan 2017 00:40:46 -0500 Subject: [PATCH 201/530] Add code and scripts for LSTM with bounded activation --- egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py | 178 +++++------------- egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py | 1 - src/nnet3/nnet-general-component.cc | 43 +++-- src/nnet3/nnet-general-component.h | 17 +- src/nnet3/nnet-test-utils.cc | 3 + 5 files changed, 93 insertions(+), 149 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py index cbd31ccea64..11db977f7a2 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py @@ -56,6 +56,9 @@ def check_configs(self): if self.config['cell-dim'] <= 0: raise RuntimeError("cell-dim has invalid value {0}.".format(self.config[key])) + if self.config['delay'] == 0: + raise RuntimeError("delay cannot be zero") + for key in ['self-repair-scale-nonlinearity']: if self.config[key] < 0.0 or self.config[key] > 1.0: raise RuntimeError("{0} has invalid value {1}.".format(key, self.config[key])) @@ -268,6 +271,9 @@ def check_configs(self): raise RuntimeError("{0} has invalid value {1}.".format( key, self.config[key])) + if self.config['delay'] == 0: + raise RuntimeError("delay cannot be zero") + if (self.config['recurrent-projection-dim'] + self.config['non-recurrent-projection-dim'] > self.config['cell-dim']): @@ -427,127 +433,6 @@ def generate_lstm_config(self): return configs -# Same as the LSTMP layer except that the matrix multiplications are combined -# we probably keep only version after experimentation. One year old experiments -# show that this version is slightly worse and might require some tuning -class XconfigLstmpcLayer(XconfigLstmpLayer): - def __init__(self, first_token, key_to_value, prev_names = None): - assert first_token == "lstmpc-layer" - XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) - - # convenience function to generate the LSTM config - def generate_lstm_config(self): - # assign some variables to reduce verbosity - name = self.name - # in the below code we will just call descriptor_strings as descriptors for conciseness - input_dim = self.descriptors['input']['dim'] - input_descriptor = self.descriptors['input']['final-string'] - cell_dim = self.config['cell-dim'] - rec_proj_dim = self.config['recurrent-projection-dim'] - nonrec_proj_dim = self.config['non-recurrent-projection-dim'] - delay = self.config['delay'] - - repair_nonlin = self.config['self-repair-scale-nonlinearity'] - repair_nonlin_str = "self-repair-scale={0:.10f}".format(repair_nonlin) if repair_nonlin is not None else '' - bptrunc_str = ("clipping-threshold={0}" - " zeroing-threshold={1}" - " zeroing-interval={2}" - " recurrence-interval={3}" - "".format(self.config['clipping-threshold'], - self.config['zeroing-threshold'], - self.config['zeroing-interval'], - abs(delay))) - affine_str = self.config['ng-affine-options'] - # Natural gradient per element scale parameters - # TODO: decide if we want to keep exposing these options - if re.search('param-mean', ng_per_element_scale_options) is None and \ - re.search('param-stddev', ng_per_element_scale_options) is None: - ng_per_element_scale_options += " param-mean=0.0 param-stddev=1.0 " - pes_str = ng_per_element_scale_options - - configs = [] - # naming convention - # .W_. e.g. Lstm1.W_i.xr for matrix providing output to gate i and operating on an appended vector [x,r] - configs.append("### Begin LTSM layer '{0}'".format(name)) - configs.append("# Full W_ifoc* matrix") - configs.append("component name={0}.W_ifoc.xr type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, input_dim + rec_proj_dim, 4*cell_dim, affine_str)) - configs.append("# note : the cell outputs pass through a diagonal matrix") - - # we will not combine the diagonal matrix operations as one of these has a different delay - configs.append("# note : the cell outputs pass through a diagonal matrix") - configs.append("component name={0}.w_i.c type=NaturalGradientPerElementScaleComponent dim={1} {2}".format(name, cell_dim, pes_str)) - configs.append("component name={0}.w_f.c type=NaturalGradientPerElementScaleComponent dim={1} {2}".format(name, cell_dim, pes_str)) - configs.append("component name={0}.w_o.c type=NaturalGradientPerElementScaleComponent dim={1} {2}".format(name, cell_dim, pes_str)) - - configs.append("# Defining the non-linearities") - configs.append("component name={0}.i type=SigmoidComponent dim={1} {2}".format(name, cell_dim, repair_nonlin_str)) - configs.append("component name={0}.f type=SigmoidComponent dim={1} {2}".format(name, cell_dim, repair_nonlin_str)) - configs.append("component name={0}.o type=SigmoidComponent dim={1} {2}".format(name, cell_dim, repair_nonlin_str)) - configs.append("component name={0}.g type=TanhComponent dim={1} {2}".format(name, cell_dim, repair_nonlin_str)) - configs.append("component name={0}.h type=TanhComponent dim={1} {2}".format(name, cell_dim, repair_nonlin_str)) - - configs.append("# Defining the components for other cell computations") - configs.append("component name={0}.c1 type=ElementwiseProductComponent input-dim={1} output-dim={2}".format(name, 2 * cell_dim, cell_dim)) - configs.append("component name={0}.c2 type=ElementwiseProductComponent input-dim={1} output-dim={2}".format(name, 2 * cell_dim, cell_dim)) - configs.append("component name={0}.m type=ElementwiseProductComponent input-dim={1} output-dim={2}".format(name, 2 * cell_dim, cell_dim)) - configs.append("component name={0}.c type=BackpropTruncationComponent dim={1} {2}".format(name, cell_dim, bptrunc_str)) - - # c1_t and c2_t defined below - configs.append("component-node name={0}.c_t component={0}.c input=Sum({0}.c1_t, {0}.c2_t)".format(name)) - delayed_c_t_descriptor = "IfDefined(Offset({0}.c_t, {1}))".format(name, delay) - rec_connection = '{0}.rp_t'.format(name) - - component_nodes.append("component-node name={0}.ifoc_t component={0}.W_ifoc.xr input=Append({1}, IfDefined(Offset({0}_{2}, {3})))".format(name, input_descriptor, recurrent_connection, lstm_delay)) - - - offset = 0 - component_nodes.append("# i_t") - component_nodes.append("dim-range-node name={0}.i1_t input-node={0}.ifoc_t dim-offset={1} dim={2}".format(name, offset, cell_dim)) - offset += cell_dim - component_nodes.append("component-node name={0}.i2_t component={0}.w_i.cinput={1}".format(name, delayed_c_t_descriptor)) - component_nodes.append("component-node name={0}.i_t component={0}.i input=Sum({0}.i1_t, {0}.i2_t)".format(name)) - - component_nodes.append("# f_t") - component_nodes.append("dim-range-node name={0}.f1_t input-node={0}.ifoc_t dim-offset={1} dim={2}".format(name, offset, cell_dim)) - offset += cell_dim - component_nodes.append("component-node name={0}.f2_t component={0}.w_f.c input={1}".format(name, delayed_c_t_descriptor)) - component_nodes.append("component-node name={0}.f_t component={0}.f input=Sum({0}.f1_t, {0}.f2_t)".format(name)) - - component_nodes.append("# o_t") - component_nodes.append("dim-range-node name={0}.o1_t input-node={0}.ifoc_t dim-offset={1} dim={2}".format(name, offset, cell_dim)) - offset += cell_dim - component_nodes.append("component-node name={0}.o2_t component={0}.w_o.c input={0}.c_t".format(name)) - component_nodes.append("component-node name={0}.o_t component={0}.o input=Sum({0}.o1_t, {0}.o2_t)".format(name)) - - component_nodes.append("# h_t") - component_nodes.append("component-node name={0}.h_t component={0}.h input={0}.c_t".format(name)) - - component_nodes.append("# g_t") - component_nodes.append("dim-range-node name={0}.g1_t input-node={0}.ifoc_t dim-offset={1} dim={2}".format(name, offset, cell_dim)) - offset += cell_dim - component_nodes.append("component-node name={0}.g_t component={0}.g input={0}.g1_t".format(name)) - - - configs.append("# parts of c_t") - configs.append("component-node name={0}.c1_t component={0}.c1 input=Append({0}.f_t, {1})".format(name, delayed_c_t_descriptor)) - configs.append("component-node name={0}.c2_t component={0}.c2 input=Append({0}.i_t, {0}.g_t)".format(name)) - - configs.append("# m_t") - configs.append("component-node name={0}.m_t component={0}.m input=Append({0}.o_t, {0}.h_t)".format(name)) - - # add the recurrent connections - configs.append("# projection matrices : Wrm and Wpm") - configs.append("component name={0}.W_rp.m type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, cell_dim, rec_proj_dim + nonrec_proj_dim, affine_str)) - configs.append("component name={0}.r type=BackpropTruncationComponent dim={1} {2}".format(name, rec_proj_dim, bptrunc_str)) - - configs.append("# r_t and p_t : rp_t will be the output") - configs.append("component-node name={0}.rp_t component={0}.W_rp.m input={0}.m_t".format(name)) - configs.append("dim-range-node name={0}.r_t_preclip input-node={0}.rp_t dim-offset=0 dim={1}".format(name, rec_proj_dim)) - configs.append("component-node name={0}.r_t component={0}.r input={0}.r_t_preclip".format(name)) - configs.append("### End LTSM layer '{0}'".format(name)) - - return configs - # This class is for lines like # 'fast-lstm-layer name=lstm1 input=[-1] delay=-3' @@ -571,6 +456,12 @@ def generate_lstm_config(self): # lstm-nonlinearity-options=' max-change=0.75 ' [Options string to pass into the LSTM nonlinearity component.] # ng-affine-options=' max-change=1.5 ' [Additional options used for the full matrices in the LSTM, can be used to # do things like set biases to initialize to 1] +# max-cell-value=-1 [If >0, an approximate maximum on the contents of the cell (c_t); +# enforced by putting a scaling factor of +# recurrence_scale = 1 - abs(delay)/max_cell_value +# on the recurrence, i.e. the term c_{t-1} in the LSTM equations. +# E.g. setting this to 50 means the activations can't get bigger +# than about 50.] class XconfigFastLstmLayer(XconfigLayerBase): def __init__(self, first_token, key_to_value, prev_names = None): assert first_token == "fast-lstm-layer" @@ -590,7 +481,8 @@ def set_default_configs(self): 'lstm-nonlinearity-options' : ' max-change=0.75', # the affine layer contains 4 of our old layers -> use a # larger max-change than the normal value of 0.75. - 'ng-affine-options' : ' max-change=1.5' + 'ng-affine-options' : ' max-change=1.5', + 'max-cell-value': -1.0 } self.c_needed = False # keep track of whether the 'c' output is needed. @@ -602,6 +494,8 @@ def check_configs(self): key = 'cell-dim' if self.config['cell-dim'] <= 0: raise RuntimeError("cell-dim has invalid value {0}.".format(self.config[key])) + if self.config['delay'] == 0: + raise RuntimeError("delay cannot be zero") @@ -649,17 +543,23 @@ def generate_lstm_config(self): input_descriptor = self.descriptors['input']['final-string'] cell_dim = self.config['cell-dim'] delay = self.config['delay'] + affine_str = self.config['ng-affine-options'] + max_cell_value = self.config['max-cell-value'] + # we expect max_cell_value to be either -1, or large, like 10 or 50. + recurrence_scale = (1.0 if max_cell_value < 0 else + 1.0 - (abs(delay) / max_cell_value)) + assert recurrence_scale > 0 # or user may have set max-cell-value much + # too small. + lstm_str = self.config['lstm-nonlinearity-options'] bptrunc_str = ("clipping-threshold={0}" " zeroing-threshold={1}" " zeroing-interval={2}" " recurrence-interval={3}" + " scale={4}" "".format(self.config['clipping-threshold'], self.config['zeroing-threshold'], self.config['zeroing-interval'], - abs(delay))) - affine_str = self.config['ng-affine-options'] - lstm_str = self.config['lstm-nonlinearity-options'] - + abs(delay), recurrence_scale)) configs = [] @@ -722,6 +622,12 @@ def generate_lstm_config(self): # lstm-nonlinearity-options=' max-change=0.75 ' [Options string to pass into the LSTM nonlinearity component.] # ng-affine-options=' max-change=1.5 ' [Additional options used for the full matrices in the LSTM, can be used to # do things like set biases to initialize to 1] +# max-cell-value=-1 [If >0, an approximate maximum on the contents of the cell (c_t); +# enforced by putting a scaling factor of +# recurrence_scale = 1 - abs(delay)/max_cell_value +# on the recurrence, i.e. the term c_{t-1} in the LSTM equations. +# E.g. setting this to 50 means the activations can't get bigger +# than about 50.] class XconfigFastLstmpLayer(XconfigLayerBase): def __init__(self, first_token, key_to_value, prev_names = None): assert first_token == "fast-lstmp-layer" @@ -742,6 +648,7 @@ def set_default_configs(self): # the affine layer contains 4 of our old layers -> use a # larger max-change than the normal value of 0.75. 'ng-affine-options' : ' max-change=1.5', + 'max-cell-value': -1.0, 'zeroing-interval' : 20, 'zeroing-threshold' : 15.0 } @@ -763,7 +670,8 @@ def check_configs(self): if self.config[key] <= 0: raise RuntimeError("{0} has invalid value {1}.".format( key, self.config[key])) - + if self.config['delay'] == 0: + raise RuntimeError("delay cannot be zero") if (self.config['recurrent-projection-dim'] + self.config['non-recurrent-projection-dim'] > self.config['cell-dim']): @@ -815,20 +723,28 @@ def generate_lstm_config(self): input_dim = self.descriptors['input']['dim'] input_descriptor = self.descriptors['input']['final-string'] cell_dim = self.config['cell-dim'] + delay = self.config['delay'] rec_proj_dim = self.config['recurrent-projection-dim'] nonrec_proj_dim = self.config['non-recurrent-projection-dim'] - delay = self.config['delay'] + affine_str = self.config['ng-affine-options'] + max_cell_value = self.config['max-cell-value'] + # we expect max_cell_value to be either -1, or large, like 10 or 50. + recurrence_scale = (1.0 if max_cell_value < 0 else + 1.0 - (abs(delay) / max_cell_value)) + assert recurrence_scale > 0 # or user may have set max-cell-value much + # too small. + bptrunc_str = ("clipping-threshold={0}" " zeroing-threshold={1}" " zeroing-interval={2}" " recurrence-interval={3}" + " scale={4}" "".format(self.config['clipping-threshold'], self.config['zeroing-threshold'], self.config['zeroing-interval'], - abs(delay))) - affine_str = self.config['ng-affine-options'] - lstm_str = self.config['lstm-nonlinearity-options'] + abs(delay), recurrence_scale)) + lstm_str = self.config['lstm-nonlinearity-options'] configs = [] diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py index cc786d091ac..89458c65152 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py @@ -30,7 +30,6 @@ 'affine-layer' : xlayers.XconfigAffineLayer, 'lstm-layer' : xlayers.XconfigLstmLayer, 'lstmp-layer' : xlayers.XconfigLstmpLayer, - 'lstmpc-layer' : xlayers.XconfigLstmpcLayer, 'fast-lstm-layer' : xlayers.XconfigFastLstmLayer, 'fast-lstmp-layer' : xlayers.XconfigFastLstmpLayer } diff --git a/src/nnet3/nnet-general-component.cc b/src/nnet3/nnet-general-component.cc index b1a2d9327f8..9772c31b13b 100644 --- a/src/nnet3/nnet-general-component.cc +++ b/src/nnet3/nnet-general-component.cc @@ -887,7 +887,15 @@ void BackpropTruncationComponent::Read(std::istream &is, bool binary) { ExpectOneOrTwoTokens(is, binary, "", ""); ReadBasicType(is, binary, &dim_); - ExpectToken(is, binary, ""); + std::string tok; + ReadToken(is, binary, &tok); + if (tok == "") { + ReadBasicType(is, binary, &scale_); + ReadToken(is, binary, &tok); + } else { + scale_ = 1.0; + } + KALDI_ASSERT(tok == ""); ReadBasicType(is, binary, &clipping_threshold_); ExpectToken(is, binary, ""); ReadBasicType(is, binary, &zeroing_threshold_); @@ -911,6 +919,8 @@ void BackpropTruncationComponent::Write(std::ostream &os, bool binary) const { WriteToken(os, binary, ""); WriteToken(os, binary, ""); WriteBasicType(os, binary, dim_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, scale_); WriteToken(os, binary, ""); WriteBasicType(os, binary, clipping_threshold_); WriteToken(os, binary, ""); @@ -957,6 +967,7 @@ void BackpropTruncationComponentPrecomputedIndexes::Read(std::istream &istream, std::string BackpropTruncationComponent::Info() const { std::ostringstream stream; stream << Type() << ", dim=" << dim_ + << ", scale=" << scale_ << ", clipping-threshold=" << clipping_threshold_ << ", clipped-proportion=" << (count_ > 0.0 ? num_clipped_ / count_ : 0) @@ -969,14 +980,15 @@ std::string BackpropTruncationComponent::Info() const { return stream.str(); } -void BackpropTruncationComponent::Init(int32 dim, - BaseFloat clipping_threshold, - BaseFloat zeroing_threshold, - int32 zeroing_interval, - int32 recurrence_interval) { +void BackpropTruncationComponent::Init( + int32 dim, BaseFloat scale, BaseFloat clipping_threshold, + BaseFloat zeroing_threshold, int32 zeroing_interval, + int32 recurrence_interval) { KALDI_ASSERT(clipping_threshold >= 0 && zeroing_threshold >= 0 && - zeroing_interval > 0 && recurrence_interval > 0 && dim > 0); + scale > 0.0 && zeroing_interval > 0 && + recurrence_interval > 0 && dim > 0); dim_ = dim; + scale_ = scale; clipping_threshold_ = clipping_threshold; zeroing_threshold_ = zeroing_threshold; zeroing_interval_ = zeroing_interval; @@ -991,9 +1003,11 @@ void BackpropTruncationComponent::Init(int32 dim, void BackpropTruncationComponent::InitFromConfig(ConfigLine *cfl) { int32 dim = 0; bool ok = cfl->GetValue("dim", &dim); - BaseFloat clipping_threshold = 30.0; - BaseFloat zeroing_threshold = 15.0; + BaseFloat scale = 1.0, + clipping_threshold = 30.0, + zeroing_threshold = 15.0; int32 zeroing_interval = 20, recurrence_interval = 1; + cfl->GetValue("scale", &scale); cfl->GetValue("clipping-threshold", &clipping_threshold); cfl->GetValue("zeroing-threshold", &zeroing_threshold); cfl->GetValue("zeroing-interval", &zeroing_interval); @@ -1003,14 +1017,15 @@ void BackpropTruncationComponent::InitFromConfig(ConfigLine *cfl) { recurrence_interval < 1 || dim <= 0) KALDI_ERR << "Invalid initializer for layer of type " << Type() << ": \"" << cfl->WholeLine() << "\""; - Init(dim, clipping_threshold, zeroing_threshold, + Init(dim, scale, clipping_threshold, zeroing_threshold, zeroing_interval, recurrence_interval); } -// virtual +// virtual Component* BackpropTruncationComponent::Copy() const { BackpropTruncationComponent *ans = new BackpropTruncationComponent(); ans->dim_ = dim_; + ans->scale_ = scale_; ans->clipping_threshold_ = clipping_threshold_; ans->zeroing_threshold_ = zeroing_threshold_; ans->zeroing_interval_ = zeroing_interval_; @@ -1064,6 +1079,8 @@ void BackpropTruncationComponent::Propagate( const CuMatrixBase &in, CuMatrixBase *out) const { out->CopyFromMat(in); + if (scale_ != 1.0) + out->Scale(scale_); } // virtual @@ -1082,6 +1099,8 @@ void BackpropTruncationComponent::Backprop(const std::string &debug_info, // the following statement will do nothing if in_deriv and out_deriv have same // memory. in_deriv->CopyFromMat(out_deriv); + if (scale_ != 1.0) + in_deriv->Scale(scale_); BackpropTruncationComponent *to_update = dynamic_cast(to_update_in); @@ -1130,7 +1149,7 @@ void BackpropTruncationComponent::Backprop(const std::string &debug_info, zeroing_scales_vec.Add(1.0); // now the element of zeroing_scales_vec is 0.0 if we want to zero its // corresponding sample's gradient, and 1.0 otherwise - + // combines clipping_scales and zeroing_scales and applies combined_scales // to in_deriv all at once CuVector combined_scales(clipping_scales); diff --git a/src/nnet3/nnet-general-component.h b/src/nnet3/nnet-general-component.h index 93a46eaedbf..f389d019522 100644 --- a/src/nnet3/nnet-general-component.h +++ b/src/nnet3/nnet-general-component.h @@ -441,28 +441,29 @@ class StatisticsPoolingComponentPrecomputedIndexes: }; // BackpropTruncationComponent zeroes out the gradients every certain number -// of frames, as well as having gradient-clipping functionality as +// of frames, as well as having gradient-clipping functionality as // ClipGradientComponent. // This component will be used to prevent gradient explosion problem in // recurrent neural networks class BackpropTruncationComponent: public Component { public: BackpropTruncationComponent(int32 dim, + BaseFloat scale, BaseFloat clipping_threshold, BaseFloat zeroing_threshold, int32 zeroing_interval, int32 recurrence_interval) { - Init(dim, clipping_threshold, zeroing_threshold, + Init(dim, scale, clipping_threshold, zeroing_threshold, zeroing_interval, recurrence_interval);} - BackpropTruncationComponent(): dim_(0), clipping_threshold_(-1), + BackpropTruncationComponent(): dim_(0), scale_(1.0), clipping_threshold_(-1), zeroing_threshold_(-1), zeroing_interval_(0), recurrence_interval_(0), num_clipped_(0), num_zeroed_(0), count_(0), count_zeroing_boundaries_(0) { } virtual int32 InputDim() const { return dim_; } virtual int32 OutputDim() const { return dim_; } virtual void InitFromConfig(ConfigLine *cfl); - void Init(int32 dim, BaseFloat clipping_threshold, + void Init(int32 dim, BaseFloat scale, BaseFloat clipping_threshold, BaseFloat zeroing_threshold, int32 zeroing_interval, int32 recurrence_interval); @@ -505,7 +506,13 @@ class BackpropTruncationComponent: public Component { private: // input/output dimension int32 dim_; - + + // Scale that is applied in the forward propagation (and of course in the + // backprop to match. Expected to normally be 1, but setting this to other + // values (e.g. slightly less than 1) can be used to produce variants of + // LSTMs where the activations are bounded. + BaseFloat scale_; + // threshold (e.g., 30) to be used for clipping corresponds to max-row-norm BaseFloat clipping_threshold_; diff --git a/src/nnet3/nnet-test-utils.cc b/src/nnet3/nnet-test-utils.cc index 15130f5669a..98f49511adf 100644 --- a/src/nnet3/nnet-test-utils.cc +++ b/src/nnet3/nnet-test-utils.cc @@ -543,6 +543,7 @@ void GenerateConfigSequenceLstmWithTruncation( int32 clipping_threshold = RandInt(6, 50), zeroing_threshold = RandInt(1, 5), zeroing_interval = RandInt(1, 5) * 10; + BaseFloat scale = 0.8 + 0.1*RandInt(0,3); os << "input-node name=input dim=" << input_dim << std::endl; @@ -616,12 +617,14 @@ void GenerateConfigSequenceLstmWithTruncation( << " output-dim=" << cell_dim << std::endl; os << "component name=c type=BackpropTruncationComponent dim=" << cell_dim + << " scale=" << scale << " clipping-threshold=" << clipping_threshold << " zeroing-threshold=" << zeroing_threshold << " zeroing-interval=" << zeroing_interval << " recurrence-interval=1" << std::endl; os << "component name=r type=BackpropTruncationComponent dim=" << projection_dim + << " scale=" << scale << " clipping-threshold=" << clipping_threshold << " zeroing-threshold=" << zeroing_threshold << " zeroing-interval=" << zeroing_interval From cf252baaffce4482a0a10ec926603bad3ffa6130 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 5 Jan 2017 00:41:23 -0500 Subject: [PATCH 202/530] Add steps/nnet3/decode_looped.sh --- egs/wsj/s5/steps/nnet3/decode.sh | 2 - egs/wsj/s5/steps/nnet3/decode_looped.sh | 193 ++++++++++++++++++++++++ 2 files changed, 193 insertions(+), 2 deletions(-) create mode 100755 egs/wsj/s5/steps/nnet3/decode_looped.sh diff --git a/egs/wsj/s5/steps/nnet3/decode.sh b/egs/wsj/s5/steps/nnet3/decode.sh index 10ac29e1c59..b97e7f415d7 100755 --- a/egs/wsj/s5/steps/nnet3/decode.sh +++ b/egs/wsj/s5/steps/nnet3/decode.sh @@ -23,7 +23,6 @@ ivector_scale=1.0 lattice_beam=8.0 # Beam we use in lattice generation. iter=final num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # ignored now. scoring_opts= skip_diagnostics=false skip_scoring=false @@ -56,7 +55,6 @@ if [ $# -ne 3 ]; then echo " --iter # Iteration of model to decode; default is final." echo " --scoring-opts # options to local/score.sh" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '--num-threads 4' if you supply --num-threads 4" exit 1; fi diff --git a/egs/wsj/s5/steps/nnet3/decode_looped.sh b/egs/wsj/s5/steps/nnet3/decode_looped.sh new file mode 100755 index 00000000000..8850045c9a3 --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/decode_looped.sh @@ -0,0 +1,193 @@ +#!/bin/bash + +# Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey). +# Apache 2.0. + + +# This is like decode.sh except it uses "looped" decoding. This is an nnet3 +# mechanism for reusing previously computed activations when we evaluate the +# neural net for successive chunks of data. It is applicable to TDNNs and LSTMs +# and similar forward-recurrent topologies, but not to backward-recurrent +# topologies like BLSTMs. Be careful because the script itself does not have a +# way to figure out what kind of topology you are using. +# +# Also be aware that this decoding mechanism means that you have effectively +# unlimited context within the utterance. Unless your models were trained (at +# least partly) on quite large chunk-sizes, e.g. 100 or more (although the +# longer the BLSTM recurrence the larger chunk-size you'd need in training), +# there is a possibility that this effectively infinite left-context will cause +# a mismatch with the training condition. Also, for recurrent topologies, you may want to make sure +# that the --extra-left-context-initial matches the --egs.chunk-left-context-initial +# that you trained with, . [note: if not specified during training, it defaults to +# the same as the regular --extra-left-context + +# This script does decoding with a neural-net. If the neural net was built on +# top of fMLLR transforms from a conventional system, you should provide the +# --transform-dir option. + +# Begin configuration section. +stage=1 +transform_dir= # dir to find fMLLR transforms. +nj=4 # number of decoding jobs. If --transform-dir set, must match that number! +acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. +post_decode_acwt=1.0 # can be used in 'chain' systems to scale acoustics by 10 so the + # regular scoring script works. +cmd=run.pl +beam=15.0 +frames_per_chunk=50 +max_active=7000 +min_active=200 +ivector_scale=1.0 +lattice_beam=8.0 # Beam we use in lattice generation. +iter=final +scoring_opts= +skip_diagnostics=false +skip_scoring=false +extra_left_context_initial=0 +feat_type= +online_ivector_dir= +minimize=false +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo "e.g.: steps/nnet3/decode.sh --nj 8 \\" + echo "--online-ivector-dir exp/nnet2_online/ivectors_test_eval92 \\" + echo " exp/tri4b/graph_bg data/test_eval92_hires $dir/decode_bg_eval92" + echo "main options (for others, see top of script file)" + echo " --transform-dir # directory of previous decoding" + echo " # where we can find transforms for SAT systems." + echo " --config # config containing options" + echo " --nj # number of parallel jobs" + echo " --cmd # Command to run in parallel with" + echo " --beam # Decoding beam; default 15.0" + echo " --iter # Iteration of model to decode; default is final." + echo " --scoring-opts # options to local/score.sh" + exit 1; +fi + +graphdir=$1 +data=$2 +dir=$3 +srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory. +model=$srcdir/$iter.mdl + + +[ ! -z "$online_ivector_dir" ] && \ + extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period" + +for f in $graphdir/HCLG.fst $data/feats.scp $model $extra_files; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1; +done + +sdata=$data/split$nj; +cmvn_opts=`cat $srcdir/cmvn_opts` || exit 1; + +mkdir -p $dir/log +[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; +echo $nj > $dir/num_jobs + + +## Set up features. +if [ -z "$feat_type" ]; then + if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=raw; fi + echo "$0: feature type is $feat_type" +fi + +splice_opts=`cat $srcdir/splice_opts 2>/dev/null` + +case $feat_type in + raw) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |";; + lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |" + ;; + *) echo "$0: invalid feature type $feat_type" && exit 1; +esac +if [ ! -z "$transform_dir" ]; then + echo "$0: using transforms from $transform_dir" + [ ! -s $transform_dir/num_jobs ] && \ + echo "$0: expected $transform_dir/num_jobs to contain the number of jobs." && exit 1; + nj_orig=$(cat $transform_dir/num_jobs) + + if [ $feat_type == "raw" ]; then trans=raw_trans; + else trans=trans; fi + if [ $feat_type == "lda" ] && \ + ! cmp $transform_dir/../final.mat $srcdir/final.mat && \ + ! cmp $transform_dir/final.mat $srcdir/final.mat; then + echo "$0: LDA transforms differ between $srcdir and $transform_dir" + exit 1; + fi + if [ ! -f $transform_dir/$trans.1 ]; then + echo "$0: expected $transform_dir/$trans.1 to exist (--transform-dir option)" + exit 1; + fi + if [ $nj -ne $nj_orig ]; then + # Copy the transforms into an archive with an index. + for n in $(seq $nj_orig); do cat $transform_dir/$trans.$n; done | \ + copy-feats ark:- ark,scp:$dir/$trans.ark,$dir/$trans.scp || exit 1; + feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk scp:$dir/$trans.scp ark:- ark:- |" + else + # number of jobs matches with alignment dir. + feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/$trans.JOB ark:- ark:- |" + fi +elif grep 'transform-feats --utt2spk' $srcdir/log/train.1.log >&/dev/null; then + echo "$0: **WARNING**: you seem to be using a neural net system trained with transforms," + echo " but you are not providing the --transform-dir option in test time." +fi +## + +if [ ! -z "$online_ivector_dir" ]; then + ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; + ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" +fi + +if [ "$post_decode_acwt" == 1.0 ]; then + lat_wspecifier="ark:|gzip -c >$dir/lat.JOB.gz" +else + lat_wspecifier="ark:|lattice-scale --acoustic-scale=$post_decode_acwt ark:- ark:- | gzip -c >$dir/lat.JOB.gz" +fi + +frame_subsampling_opt= +if [ -f $srcdir/frame_subsampling_factor ]; then + # e.g. for 'chain' systems + frame_subsampling_opt="--frame-subsampling-factor=$(cat $srcdir/frame_subsampling_factor)" +fi + +if [ $stage -le 1 ]; then + $cmd JOB=1:$nj $dir/log/decode.JOB.log \ + nnet3-latgen-faster-looped $ivector_opts $frame_subsampling_opt \ + --frames-per-chunk=$frames_per_chunk \ + --extra-left-context-initial=$extra_left_context_initial \ + --minimize=$minimize --max-active=$max_active --min-active=$min_active --beam=$beam \ + --lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=true \ + --word-symbol-table=$graphdir/words.txt "$model" \ + $graphdir/HCLG.fst "$feats" "$lat_wspecifier" || exit 1; +fi + + +if [ $stage -le 2 ]; then + if ! $skip_diagnostics ; then + [ ! -z $iter ] && iter_opt="--iter $iter" + steps/diagnostic/analyze_lats.sh --cmd "$cmd" $iter_opt $graphdir $dir + fi +fi + + +# The output of this script is the files "lat.*.gz"-- we'll rescore this at +# different acoustic scales to get the final output. +if [ $stage -le 3 ]; then + if ! $skip_scoring ; then + [ ! -x local/score.sh ] && \ + echo "Not scoring because local/score.sh does not exist or not executable." && exit 1; + echo "score best paths" + [ "$iter" != "final" ] && iter_opt="--iter $iter" + local/score.sh $iter_opt $scoring_opts --cmd "$cmd" $data $graphdir $dir + echo "score confidence and timing with sclite" + fi +fi +echo "Decoding done." +exit 0; From c9f74ee320b0b599ae5acbaf99dfcdbbfb9ed3ca Mon Sep 17 00:00:00 2001 From: david-ryan-snyder Date: Thu, 5 Jan 2017 15:00:01 -0500 Subject: [PATCH 203/530] [src] fix to nnet3 ConstantFunctionComponent so that LR is set correctly (#1311) note: affects no currently-checked-in recipes. --- src/nnet3/nnet-general-component.h | 10 +++++----- src/nnet3/nnet-simple-component.cc | 7 ++++--- src/nnet3/nnet-test-utils.cc | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/nnet3/nnet-general-component.h b/src/nnet3/nnet-general-component.h index 93a46eaedbf..9750852544e 100644 --- a/src/nnet3/nnet-general-component.h +++ b/src/nnet3/nnet-general-component.h @@ -156,9 +156,9 @@ class DistributeComponentPrecomputedIndexes: StatisticsPoolingComponent to extract moving-average mean and standard-deviation statistics. - StatisticsExtractionExomponent designed to extract statistics-- 0th-order, + StatisticsExtractionComponent is designed to extract statistics-- 0th-order, 1st-order and optionally diagonal 2nd-order stats-- from small groups of - frames, such as 10 frame. The statistics will then be further processed by + frames, such as 10 frames. The statistics will then be further processed by StatisticsPoolingComponent to compute moving-average means and (if configured) standard deviations. The reason for the two-component way of doing this is efficiency, particularly in the graph-compilation phase. (Otherwise there @@ -185,7 +185,7 @@ class DistributeComponentPrecomputedIndexes: An output of this component will be 'computable' any time at least one of the corresponding inputs is computable. - In all cases the first dimension of the output will be a count (between 1 and + In all cases the first dimension of the output will be a count (between 1 and 10 inclusive in this example). If include-variance=false, then the output dimension will be input-dim + 1. and the output dimensions >0 will be 1st-order statistics (sums of the input). If include-variance=true, then the @@ -441,7 +441,7 @@ class StatisticsPoolingComponentPrecomputedIndexes: }; // BackpropTruncationComponent zeroes out the gradients every certain number -// of frames, as well as having gradient-clipping functionality as +// of frames, as well as having gradient-clipping functionality as // ClipGradientComponent. // This component will be used to prevent gradient explosion problem in // recurrent neural networks @@ -505,7 +505,7 @@ class BackpropTruncationComponent: public Component { private: // input/output dimension int32 dim_; - + // threshold (e.g., 30) to be used for clipping corresponds to max-row-norm BaseFloat clipping_threshold_; diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index 0c036eb7f99..90f52a11aa5 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -2342,12 +2342,13 @@ std::string ConstantFunctionComponent::Info() const { } ConstantFunctionComponent::ConstantFunctionComponent(): - input_dim_(-1), is_updatable_(true), use_natural_gradient_(true) { } + UpdatableComponent(), input_dim_(-1), is_updatable_(true), + use_natural_gradient_(true) { } ConstantFunctionComponent::ConstantFunctionComponent( const ConstantFunctionComponent &other): - input_dim_(other.input_dim_), output_(other.output_), - is_updatable_(other.is_updatable_), + UpdatableComponent(other), input_dim_(other.input_dim_), + output_(other.output_), is_updatable_(other.is_updatable_), use_natural_gradient_(other.use_natural_gradient_), preconditioner_(other.preconditioner_) { } diff --git a/src/nnet3/nnet-test-utils.cc b/src/nnet3/nnet-test-utils.cc index 170ea51ca8f..37ce355c788 100644 --- a/src/nnet3/nnet-test-utils.cc +++ b/src/nnet3/nnet-test-utils.cc @@ -1105,7 +1105,7 @@ static void GenerateRandomComponentConfig(std::string *component_type, std::string *config) { int32 n = RandInt(0, 30); - BaseFloat learning_rate = 0.001 * RandInt(1, 3); + BaseFloat learning_rate = 0.001 * RandInt(1, 100); std::ostringstream os; switch(n) { From 08c0db9a46664fa917e5b46107f1457d9ac00641 Mon Sep 17 00:00:00 2001 From: Jinyi Yang Date: Thu, 5 Jan 2017 15:02:03 -0500 Subject: [PATCH 204/530] [scripts] Extend queue.pl to trap signals and kill queue job as it dies (issue #861) (#1309) --- egs/wsj/s5/utils/queue.pl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/utils/queue.pl b/egs/wsj/s5/utils/queue.pl index 424b07ff612..69188ec074a 100755 --- a/egs/wsj/s5/utils/queue.pl +++ b/egs/wsj/s5/utils/queue.pl @@ -65,8 +65,8 @@ my $jobname; my $jobstart; my $jobend; - my $array_job = 0; +my $sge_job_id; sub print_usage() { print STDERR @@ -90,6 +90,13 @@ () exit 1; } +sub caught_signal { + if ( defined $sge_job_id ) { # Signal trapped after submitting jobs + system ("qdel $sge_job_id"); + die "Caught a signal: $! , deleting SGE task: $sge_job_id and exiting\n"; + } +} + if (@ARGV < 2) { print_usage(); } @@ -179,6 +186,9 @@ () # A more detailed description of the ways the options would be handled is at # the top of this file. +$SIG{INT} = \&caught_signal; +$SIG{TERM} = \&caught_signal; + my $opened_config_file = 1; open CONFIG, "<$config" or $opened_config_file = 0; @@ -418,7 +428,6 @@ () } } -my $sge_job_id; if (! $sync) { # We're not submitting with -sync y, so we # need to wait for the jobs to finish. We wait for the # sync-files we "touched" in the script to exist. From c160a9883a9e9af58c641b676f8525603f5f55b3 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 5 Jan 2017 18:34:26 -0800 Subject: [PATCH 205/530] [build] Adding versioning mechanism to Kaldi. This is version 5.0.0 (#1306) --- .gitignore | 2 + src/.version | 1 + src/base/Makefile | 16 ++++++- src/base/get_version.sh | 93 +++++++++++++++++++++++++++++++++++++++ src/base/kaldi-error.cc | 15 ++++--- src/util/parse-options.cc | 8 ++-- 6 files changed, 121 insertions(+), 14 deletions(-) create mode 100644 src/.version create mode 100755 src/base/get_version.sh diff --git a/.gitignore b/.gitignore index 998ad136e81..b0784cc2c0c 100644 --- a/.gitignore +++ b/.gitignore @@ -57,6 +57,8 @@ GSYMS *.manifest # /src/ +/src/.short_version +/src/base/version.h /src/doc/table/ /src/doc/tools.dox /src/htdocs/ diff --git a/src/.version b/src/.version new file mode 100644 index 00000000000..819e07a2243 --- /dev/null +++ b/src/.version @@ -0,0 +1 @@ +5.0 diff --git a/src/base/Makefile b/src/base/Makefile index 8db3b86d021..583c6badcf2 100644 --- a/src/base/Makefile +++ b/src/base/Makefile @@ -1,3 +1,16 @@ +# Whenever make is run in this directory, call ./get_version.sh as the +# first thing. This script regenereates ./version.h if necessary, e.g. +# if it does not already exist or if the version number has changed. +LOG := $(shell ./get_version.sh; echo " $$?") +ifneq ($(strip $(LOG)), 0) + RC := $(lastword $(LOG)) + OUT := $(wordlist 1,$(shell echo $$(($(words $(LOG))-1))),$(LOG)) + ifeq ($(RC),0) + $(info $(OUT)) + else + $(error $(OUT)) + endif +endif all: @@ -9,7 +22,6 @@ OBJFILES = kaldi-math.o kaldi-error.o io-funcs.o kaldi-utils.o LIBNAME = kaldi-base -ADDLIBS = +ADDLIBS = include ../makefiles/default_rules.mk - diff --git a/src/base/get_version.sh b/src/base/get_version.sh new file mode 100755 index 00000000000..4c1e8059059 --- /dev/null +++ b/src/base/get_version.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash + +# Copyright 2017 University of Southern California (Author: Dogan Can) + +# See ../../COPYING for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# Kaldi versioning is loosely based on the semantic versioning scheme. This +# script tries to work out the version string from the partial version number +# specified in src/.version along with the recent git history. By convention +# src/.version specifies the first two components (MAJOR.MINOR) of the version +# number. The third component (PATCH) is determined by counting how many +# commits there are that are newer than than the last commit modifiying +# src/.version. If there are uncommitted changes in the src/ directory, then +# the version string is extended with a suffix (~N) specifiying the number of +# files with uncommitted changes. The last component of the version string is +# the abbreviated hash of the HEAD commit. If git history is not available or +# if the file src/.short_version exists, then the version string defaults to +# the number specified in src/.version. + +set -e + +# Change working directory to the directory where this script is located. +cd "$(dirname ${BASH_SOURCE[0]})" + +# Read the partial version number specified in the first line of src/.version. +version=$(head -1 ../.version) + +# Empty version number is not allowed. +if [ -z "$version" ]; then + version="?" +fi + +if [ -e ../.short_version ]; then + echo "$0: File src/.short_version exists." + echo "$0: Stopping the construction of full version number from git history." +elif [[ $version != +([0-9]).+([0-9]) ]]; then + echo "$0: The version number \"$version\" specified in src/.version is not" \ + "in MAJOR.MINOR format." + echo "$0: Stopping the construction of full version number from git history." +elif ! which git >&/dev/null; then + echo "$0: Git is not installed." + echo "$0: Using the version number \"$version\" specified in src/.version." +elif [ "$(git rev-parse --is-inside-work-tree 2>/dev/null)" != true ]; then + echo "$0: Git history is not available." + echo "$0: Using the version number \"$version\" specified in src/.version." +else + # Figure out patch number. + version_commit=$(git log -1 --pretty=oneline ../.version | cut -f 1 -d ' ') + patch_number=$(git rev-list ${version_commit}..HEAD | wc -l) + version="$version.$patch_number" + + # Check for uncommitted changes in src/. + uncommitted_changes=$(git diff-index HEAD .. | wc -l) + if [ $uncommitted_changes -gt 0 ]; then + # Add suffix ~N if there are N files in src/ with uncommitted changes + version="$version~$uncommitted_changes" + fi + + # Figure out HEAD commit SHA-1. + head_commit=$(git log -1 --pretty=oneline | cut -f 1 -d ' ') + head_commit_short=$(git log -1 --oneline --abbrev=4 | cut -f 1 -d ' ') + version="$version-${head_commit_short}" +fi + +# Write version info to a temporary file. +temp=$(mktemp) +trap 'rm -f "$temp"' EXIT +echo "// This file was automatically created by ./get_version.sh." > $temp +echo "// It is only included by ./kaldi-error.cc." >> $temp +echo "#define KALDI_VERSION \"$version\"" >> $temp +if [ -n "$head_commit" ]; then + echo "#define KALDI_GIT_HEAD \"$head_commit\"" >> $temp +fi + +# Overwrite ./version.h with the temporary file if they are different. +if ! cmp -s $temp version.h; then + cp $temp version.h + chmod 644 version.h +fi diff --git a/src/base/kaldi-error.cc b/src/base/kaldi-error.cc index 62f26df4c98..f2ce1edf37d 100644 --- a/src/base/kaldi-error.cc +++ b/src/base/kaldi-error.cc @@ -31,6 +31,7 @@ #include "base/kaldi-common.h" #include "base/kaldi-error.h" +#include "base/version.h" namespace kaldi { @@ -40,14 +41,13 @@ int32 g_kaldi_verbose_level = 0; const char *g_program_name = NULL; static LogHandler g_log_handler = NULL; -// If the program name was set (g_program_name != ""), the function -// GetProgramName returns the program name (without the path) followed by a -// colon, e.g. "gmm-align:". Otherwise it returns the empty string "". +// If the program name was set (g_program_name != ""), GetProgramName +// returns the program name (without the path), e.g. "gmm-align". +// Otherwise it returns the empty string "". const char *GetProgramName() { return g_program_name == NULL ? "" : g_program_name; } - /***** HELPER FUNCTIONS *****/ // Given a filename like "/a/b/c/d/e/f.cc", GetShortFileName @@ -184,12 +184,13 @@ void MessageLogger::HandleMessage(const LogMessageEnvelope &envelope, header << "ASSERTION_FAILED ("; break; default: - abort(); // coding errror (unknown 'severity'), + abort(); // coding error (unknown 'severity'), } } // fill the other info from the envelope, - header << GetProgramName() << envelope.func << "():" - << envelope.file << ':' << envelope.line << ")"; + header << GetProgramName() << "[" KALDI_VERSION "]" << ':' + << envelope.func << "():" << envelope.file << ':' << envelope.line + << ")"; // Printing the message, if (envelope.severity >= LogMessageEnvelope::kWarning) { diff --git a/src/util/parse-options.cc b/src/util/parse-options.cc index 0fde0501d4c..bf8999c4956 100644 --- a/src/util/parse-options.cc +++ b/src/util/parse-options.cc @@ -314,9 +314,8 @@ int ParseOptions::Read(int argc, const char *const argv[]) { std::string key, value; int i; if (argc > 0) { - // set global "const char*" g_program_name - // name of the program (followed by ':') - // so it can print it out in error messages; + // set global "const char*" g_program_name (name of the program) + // so it can be printed out in error messages; // it's useful because often the stderr of different programs will // be mixed together in the same log file. #ifdef _MSC_VER @@ -328,9 +327,8 @@ int ParseOptions::Read(int argc, const char *const argv[]) { c = argv[0]; else c++; - char *program_name = new char[strlen(c)+2]; + char *program_name = new char[strlen(c)+1]; strcpy(program_name, c); - strcat(program_name, ":"); delete [] g_program_name; g_program_name = program_name; } From 53e33dc4b2c7ffa7d76bceff12ac7d08966820fd Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Fri, 6 Jan 2017 15:01:00 -0500 Subject: [PATCH 206/530] [scripts] nnet3 scripts: minor bug fixes in error-handling code (#1321) --- egs/wsj/s5/steps/libs/nnet3/train/common.py | 18 ++++++++++-------- .../nnet3/train/frame_level_objf/common.py | 15 +++++++++------ .../s5/steps/nnet3/report/generate_plots.py | 3 +-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index dc24b37fdee..b4b7c56b8d9 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -142,8 +142,9 @@ def copy_egs_properties_to_exp_dir(egs_dir, dir): if os.path.isfile(file_name): shutil.copy2(file_name, dir) except IOError: - raise Exception("Error while trying to copy egs " - "property files to {dir}".format(dir=dir)) + logger.error("Error while trying to copy egs " + "property files to {dir}".format(dir=dir)) + raise def parse_generic_config_vars_file(var_file): @@ -197,9 +198,10 @@ def verify_egs_dir(egs_dir, feat_dim, ivector_dim, return [egs_left_context, egs_right_context, frames_per_eg, num_archives] - except (IOError, ValueError) as e: - raise Exception("The egs dir {0} has missing or " - "malformed files: {1}".format(egs_dir, e.strerr)) + except (IOError, ValueError): + logger.error("The egs dir {0} has missing or " + "malformed files.".format(egs_dir)) + raise def compute_presoftmax_prior_scale(dir, alidir, num_jobs, run_opts, @@ -362,9 +364,9 @@ def clean_nnet_dir(nnet_dir, num_iters, egs_dir, remove_model(nnet_dir, iter, num_iters, None, preserve_model_interval, get_raw_nnet_from_am=get_raw_nnet_from_am) - except (IOError, OSError) as err: - logger.warning("Error while cleaning up the nnet directory") - raise err + except (IOError, OSError): + logger.error("Error while cleaning up the nnet directory") + raise def remove_model(nnet_dir, iter, num_iters, models_to_combine=None, diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index 87cae801e90..a12f8fb3944 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -172,9 +172,10 @@ def train_one_iteration(dir, iter, srand, egs_dir, if os.path.exists('{0}/srand'.format(dir)): try: saved_srand = int(open('{0}/srand'.format(dir)).readline().strip()) - except (IOError, ValueError) as e: - raise Exception("Exception while reading the random seed " - "for training: {0}".format(e.str())) + except (IOError, ValueError): + logger.error("Exception while reading the random seed " + "for training") + raise if srand != saved_srand: logger.warning("The random seed provided to this iteration " "(srand={0}) is different from the one saved last " @@ -306,7 +307,8 @@ def train_one_iteration(dir, iter, srand, egs_dir, for i in range(1, num_jobs + 1): os.remove("{0}/{1}.{2}.raw".format(dir, iter + 1, i)) except OSError: - raise Exception("Error while trying to delete the raw models") + logger.error("Error while trying to delete the raw models") + raise if get_raw_nnet_from_am: new_model = "{0}/{1}.mdl".format(dir, iter + 1) @@ -356,8 +358,9 @@ def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts, try: os.remove(file) except OSError: - raise Exception("There was error while trying to remove " - "lda stat files.") + logger.error("There was error while trying to remove " + "lda stat files.") + raise # this computes a fixed affine transform computed in the way we described # in Appendix C.6 of http://arxiv.org/pdf/1410.7455v6.pdf; it's a scaled # variant of an LDA transform but without dimensionality reduction. diff --git a/egs/wsj/s5/steps/nnet3/report/generate_plots.py b/egs/wsj/s5/steps/nnet3/report/generate_plots.py index b06cfc03e5c..0a558b91ae2 100755 --- a/egs/wsj/s5/steps/nnet3/report/generate_plots.py +++ b/egs/wsj/s5/steps/nnet3/report/generate_plots.py @@ -134,8 +134,7 @@ def compile(self): "-output-directory={0} {1}".format(dir_name, latex_file)) except Exception as e: logger.warning("There was an error compiling the latex file {0}, " - "please do it manually: {1}".format(latex_file, - e.errstr)) + "please do it manually: {1}".format(latex_file, e)) return False return True From e1150a46304db7da1e6d425fe6024df9313bade3 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 6 Jan 2017 15:19:54 -0500 Subject: [PATCH 207/530] Updating egs-generation scripts to use new-style options for iVectors --- egs/wsj/s5/steps/nnet3/align.sh | 2 -- egs/wsj/s5/steps/nnet3/chain/get_egs.sh | 12 +++++------- egs/wsj/s5/steps/nnet3/get_egs.sh | 12 +++++------- egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh | 16 +++++++--------- egs/wsj/s5/steps/nnet3/get_egs_targets.sh | 13 +++++-------- 5 files changed, 22 insertions(+), 33 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/align.sh b/egs/wsj/s5/steps/nnet3/align.sh index 9befe16164f..713ecc128da 100755 --- a/egs/wsj/s5/steps/nnet3/align.sh +++ b/egs/wsj/s5/steps/nnet3/align.sh @@ -124,7 +124,6 @@ fi ivector_opts= if [ ! -z "$online_ivector_dir" ]; then ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; - # note: subsample-feats, with negative n, will repeat each feature -n times. ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" fi @@ -153,4 +152,3 @@ $cmd $queue_opt JOB=1:$nj $dir/log/align.JOB.log \ steps/diagnostic/analyze_alignments.sh --cmd "$cmd" $lang $dir echo "$0: done aligning data." - diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh index 94bf322a514..76c77a38c46 100755 --- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh @@ -216,11 +216,9 @@ if [ ! -z "$online_ivector_dir" ]; then ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; echo $ivector_dim > $dir/info/ivector_dim ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; - - ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" - valid_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" - train_subset_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" + ivector_opts="--online-ivectors=$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" else + ivector_opts="" echo 0 >$dir/info/ivector_dim fi @@ -322,14 +320,14 @@ if [ $stage -le 3 ]; then lattice-align-phones --replace-output-symbols=true $latdir/final.mdl scp:$dir/lat_special.scp ark:- \| \ chain-get-supervision $chain_supervision_all_opts $chaindir/tree $chaindir/0.trans_mdl \ ark:- ark:- \| \ - nnet3-chain-get-egs $valid_ivector_opt --srand=$srand \ + nnet3-chain-get-egs $ivector_opts --srand=$srand \ $egs_opts $chaindir/normalization.fst \ "$valid_feats" ark,s,cs:- "ark:$dir/valid_all.cegs" || touch $dir/.error & $cmd $dir/log/create_train_subset.log \ lattice-align-phones --replace-output-symbols=true $latdir/final.mdl scp:$dir/lat_special.scp ark:- \| \ chain-get-supervision $chain_supervision_all_opts \ $chaindir/tree $chaindir/0.trans_mdl ark:- ark:- \| \ - nnet3-chain-get-egs $train_subset_ivector_opt --srand=$srand \ + nnet3-chain-get-egs $ivector_opts --srand=$srand \ $egs_opts $chaindir/normalization.fst \ "$train_subset_feats" ark,s,cs:- "ark:$dir/train_subset_all.cegs" || touch $dir/.error & wait; @@ -381,7 +379,7 @@ if [ $stage -le 4 ]; then lattice-align-phones --replace-output-symbols=true $latdir/final.mdl scp:- ark:- \| \ chain-get-supervision $chain_supervision_all_opts \ $chaindir/tree $chaindir/0.trans_mdl ark:- ark:- \| \ - nnet3-chain-get-egs $ivector_opt --srand=\$[JOB+$srand] $egs_opts \ + nnet3-chain-get-egs $ivector_opts --srand=\$[JOB+$srand] $egs_opts \ --num-frames-overlap=$frames_overlap_per_eg \ "$feats" ark,s,cs:- ark:- \| \ nnet3-chain-copy-egs --random=true --srand=\$[JOB+$srand] ark:- $egs_list || exit 1; diff --git a/egs/wsj/s5/steps/nnet3/get_egs.sh b/egs/wsj/s5/steps/nnet3/get_egs.sh index 6622f3632f7..330f4d8c7d3 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs.sh @@ -189,11 +189,9 @@ if [ ! -z "$online_ivector_dir" ]; then ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; echo $ivector_dim > $dir/info/ivector_dim ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; - - ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" - valid_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" - train_subset_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" + ivector_opts="--online-ivectors=$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" else + ivector_opts="" echo 0 >$dir/info/ivector_dim fi @@ -291,11 +289,11 @@ if [ $stage -le 3 ]; then <$dir/ali.scp >$dir/ali_special.scp $cmd $dir/log/create_valid_subset.log \ - nnet3-get-egs --num-pdfs=$num_pdfs $valid_ivector_opt $egs_opts "$valid_feats" \ + nnet3-get-egs --num-pdfs=$num_pdfs $ivector_opts $egs_opts "$valid_feats" \ "ark,s,cs:ali-to-pdf $alidir/final.mdl scp:$dir/ali_special.scp ark:- | ali-to-post ark:- ark:- |" \ "ark:$dir/valid_all.egs" || touch $dir/.error & $cmd $dir/log/create_train_subset.log \ - nnet3-get-egs --num-pdfs=$num_pdfs $train_subset_ivector_opt $egs_opts "$train_subset_feats" \ + nnet3-get-egs --num-pdfs=$num_pdfs $ivector_opts $egs_opts "$train_subset_feats" \ "ark,s,cs:ali-to-pdf $alidir/final.mdl scp:$dir/ali_special.scp ark:- | ali-to-post ark:- ark:- |" \ "ark:$dir/train_subset_all.egs" || touch $dir/.error & wait; @@ -335,7 +333,7 @@ if [ $stage -le 4 ]; then echo "$0: Generating training examples on disk" # The examples will go round-robin to egs_list. $cmd JOB=1:$nj $dir/log/get_egs.JOB.log \ - nnet3-get-egs --num-pdfs=$num_pdfs $ivector_opt $egs_opts "$feats" \ + nnet3-get-egs --num-pdfs=$num_pdfs $ivector_opts $egs_opts "$feats" \ "ark,s,cs:filter_scp.pl $sdata/JOB/utt2spk $dir/ali.scp | ali-to-pdf $alidir/final.mdl scp:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \ nnet3-copy-egs --random=true --srand=\$[JOB+$srand] ark:- $egs_list || exit 1; fi diff --git a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh index d746c895c18..02636b1630a 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh @@ -207,11 +207,9 @@ if [ ! -z $online_ivector_dir ]; then ivector_period=$(cat $online_ivector_dir/ivector_period) ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; echo $ivector_dim >$dir/info/ivector_dim - - ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" - valid_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" - train_subset_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" - priors_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/priors_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" + ivector_opts="--online-ivectors=$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" +else + ivector_opts="" fi if [ $stage -le 2 ]; then @@ -345,7 +343,7 @@ fi num_pdfs=`am-info $alidir/final.mdl | grep pdfs | awk '{print $NF}' 2>/dev/null` || exit 1 $cmd $dir/log/create_priors_subset.log \ - nnet3-get-egs --num-pdfs=$num_pdfs $priors_ivector_opt $priors_egs_opts "$priors_feats" \ + nnet3-get-egs --num-pdfs=$num_pdfs $ivector_opts $priors_egs_opts "$priors_feats" \ "$prior_ali_rspecifier ali-to-post ark:- ark:- |" \ ark:- \| nnet3-copy-egs ark:- $priors_egs_list || \ { touch $dir/.error; echo "Error in creating priors subset. See $dir/log/create_priors_subset.log"; exit 1; } @@ -372,13 +370,13 @@ if [ $stage -le 4 ]; then $cmd $dir/log/create_valid_subset.log \ discriminative-get-supervision $supervision_all_opts \ scp:$dir/ali_special.scp scp:$dir/lat_special.scp ark:- \| \ - nnet3-discriminative-get-egs $valid_ivector_opt $egs_opts \ + nnet3-discriminative-get-egs $ivector_opts $egs_opts \ $dir/final.mdl "$valid_feats" ark,s,cs:- "ark:$dir/valid_diagnostic.degs" || touch $dir/.error & $cmd $dir/log/create_train_subset.log \ discriminative-get-supervision $supervision_all_opts \ scp:$dir/ali_special.scp scp:$dir/lat_special.scp ark:- \| \ - nnet3-discriminative-get-egs $train_subset_ivector_opt $egs_opts \ + nnet3-discriminative-get-egs $ivector_opts $egs_opts \ $dir/final.mdl "$train_subset_feats" ark,s,cs:- "ark:$dir/train_diagnostic.degs" || touch $dir/.error & wait; [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1 @@ -408,7 +406,7 @@ if [ $stage -le 5 ]; then discriminative-get-supervision $supervision_all_opts \ "scp:utils/filter_scp.pl $sdata/JOB/utt2spk $dir/ali.scp |" \ "ark,s,cs:gunzip -c $denlatdir/lat.JOB.gz |" ark:- \| \ - nnet3-discriminative-get-egs $ivector_opt $egs_opts \ + nnet3-discriminative-get-egs $ivector_opts $egs_opts \ --num-frames-overlap=$frames_overlap_per_eg \ $dir/final.mdl "$feats" ark,s,cs:- ark:- \| \ nnet3-discriminative-copy-egs --random=true --srand=JOB ark:- $degs_list || exit 1; diff --git a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh index 7bd8fa5f983..04830a4bc05 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh @@ -182,11 +182,9 @@ if [ ! -z "$online_ivector_dir" ]; then ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; echo $ivector_dim > $dir/info/ivector_dim ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; - - ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" - valid_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" - train_subset_ivector_opt="--ivectors='ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |'" + ivector_opts="--online-ivectors=$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" else + ivector_opts="" echo 0 >$dir/info/ivector_dim fi @@ -306,12 +304,12 @@ if [ $stage -le 3 ]; then rm -f $dir/.error 2>/dev/null $cmd $dir/log/create_valid_subset.log \ $get_egs_program \ - $valid_ivector_opt $egs_opts "$valid_feats" \ + $ivector_opts $egs_opts "$valid_feats" \ "$valid_targets" \ "ark:$dir/valid_all.egs" || touch $dir/.error & $cmd $dir/log/create_train_subset.log \ $get_egs_program \ - $train_subset_ivector_opt $egs_opts "$train_subset_feats" \ + $ivector_opts $egs_opts "$train_subset_feats" \ "$train_subset_targets" \ "ark:$dir/train_subset_all.egs" || touch $dir/.error & wait; @@ -352,7 +350,7 @@ if [ $stage -le 4 ]; then # The examples will go round-robin to egs_list. $cmd JOB=1:$nj $dir/log/get_egs.JOB.log \ $get_egs_program \ - $ivector_opt $egs_opts "$feats" "$targets" \ + $ivector_opts $egs_opts "$feats" "$targets" \ ark:- \| \ nnet3-copy-egs --random=true --srand=\$[JOB+$srand] ark:- $egs_list || exit 1; fi @@ -411,4 +409,3 @@ if [ $stage -le 6 ]; then fi echo "$0: Finished preparing training examples" - From e5497f9a694c0778a737ec5bb7ecdd1bb4d0e09e Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 6 Jan 2017 15:21:09 -0500 Subject: [PATCH 208/530] Updating SetZero() to also zero stats. --- src/nnet3/nnet-utils.cc | 7 +++++++ src/nnet3/nnet-utils.h | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index dbe676de1ef..02b92c19a40 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -172,10 +172,17 @@ void SetZero(bool is_gradient, Nnet *nnet) { for (int32 c = 0; c < nnet->NumComponents(); c++) { Component *comp = nnet->GetComponent(c); + NonlinearComponent *nc = dynamic_cast(comp); if (comp->Properties() & kUpdatableComponent) { UpdatableComponent *u_comp = dynamic_cast(comp); KALDI_ASSERT(u_comp != NULL); u_comp->SetZero(is_gradient); + } else if (nc != NULL) { + nc->ZeroStats(); + } else { + // Scale(0.0) is called as a backup; currently it should never + // do anything useful for any component type. + comp->Scale(0.0); } } } diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index c0bdc7f86c8..4d17b65c401 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -54,7 +54,13 @@ int32 NumOutputNodes(const Nnet &nnet); int32 NumInputNodes(const Nnet &nnet); /// Calls SetZero (with the given is_gradient parameter) on all updatable -/// components of the nnet. +/// components of the nnet; calls ZeroComponentStats on all other components +/// that inherit from NonlinearComponent; and (just in case) calls Scale(0.0) on +/// all other components. +/// It's the same as ScaleNnet(0.0, nnet) except that if is_gradient is true it +/// can set the is_gradient_ flag on updatable components [to force simple +/// update]; and unlike ScaleNnet(0.0, nnet) it will get rid of NaNs that have +/// crept into the parameters or stats. void SetZero(bool is_gradient, Nnet *nnet); From 3ab45051a1bf6c5621365e7568925e17eb0d517a Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 6 Jan 2017 15:27:24 -0500 Subject: [PATCH 209/530] tedlium script changes; renaming max-cell-value to decay-time in LSTM scripts --- .../s5_r2/local/chain/compare_wer_general.sh | 50 +-- egs/tedlium/s5_r2/local/chain/run_tdnn_d.sh | 198 +++++++++++ .../s5_r2/local/chain/tuning/run_tdnn_1c.sh | 282 ++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1b.sh | 317 ++++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1c.sh | 279 +++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1d.sh | 312 +++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1e.sh | 314 +++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1f.sh | 314 +++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1g.sh | 316 +++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1h.sh | 316 +++++++++++++++++ egs/tedlium/s5_r2/run.sh | 2 +- egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py | 54 +-- 12 files changed, 2704 insertions(+), 50 deletions(-) create mode 100755 egs/tedlium/s5_r2/local/chain/run_tdnn_d.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh diff --git a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh index aebbd66349a..b8988fc8d1a 100755 --- a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh +++ b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh @@ -1,38 +1,37 @@ #!/bin/bash + echo $0 $* +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi + echo -n "System " for x in $*; do printf "% 10s" " $(basename $x)"; done echo -echo -n "WER on dev(orig) " -for x in $*; do - wer=$(grep Sum $x/decode_dev/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer -done -echo - -echo -n "WER on dev(rescored)" -for x in $*; do - wer=$(grep Sum $x/decode_dev_rescore/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer -done -echo - -echo -n "WER on test(orig) " -for x in $*; do - wer=$(grep Sum $x/decode_test/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer -done -echo +dirnames=(dev dev_rescore test test_rescore) +strings=("WER on dev(orig) " "WER on dev(rescored) " "WER on test(orig) " "WER on test(rescored)") -echo -n "WER on test(rescored)" -for x in $*; do - wer=$(grep Sum $x/decode_test_rescore/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer +for n in 0 1 2 3; do + echo -n "${strings[$n]}" + for x in $*; do + wer=$(grep Sum $x/decode_${dirnames[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n " [looped:] " + for x in $*; do + wer=$(grep Sum $x/decode_looped_${dirnames[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi done -echo echo -n "Final train prob " @@ -61,4 +60,5 @@ for x in $*; do prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done + echo diff --git a/egs/tedlium/s5_r2/local/chain/run_tdnn_d.sh b/egs/tedlium/s5_r2/local/chain/run_tdnn_d.sh new file mode 100755 index 00000000000..9e795316352 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/run_tdnn_d.sh @@ -0,0 +1,198 @@ +#!/bin/bash + +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# without cleanup: +# local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run the corresponding non-chain nnet3 system +# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=d #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale-nonlinearity 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 550 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh new file mode 100755 index 00000000000..111a68d9878 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh @@ -0,0 +1,282 @@ +#!/bin/bash + +# run_tdnn_1c.sh is like run_tdnn_1b.sh but changing chunk-width from 150 to +# '140,110,160', and +# and --trainer.num-chunk-per-minibatch from 128 to 128,64 + +# run_tdnn_1b.sh is like run_tdnn_1a.sh but upgrading to xconfig-based +# config generation. + +# Results (11/29/2016, note, this build is is before the upgrade of the LM +# done in Nov 2016): +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_sp_bi exp/chain_cleaned/tdnn1b_sp_bi +# System tdnn_sp_bi tdnn1b_sp_bi +# WER on dev(orig) 10.3 10.2 +# WER on dev(rescored) 9.8 9.6 +# WER on test(orig) 9.8 9.7 +# WER on test(rescored) 9.3 9.2 +# Final train prob -0.0918 -0.0928 +# Final valid prob -0.1190 -0.1178 +# Final train prob (xent) -1.3572 -1.4666 +# Final valid prob (xent) -1.4415 -1.5473 + + +## how you run this (note: this assumes that the run_tdnn.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# without cleanup: +# local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run the corresponding non-chain nnet3 system +# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# This script is like run_tdnn_1a.sh except it uses an xconfig-based mechanism +# to get the configuration. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1c #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width '140,110,160' \ + --trainer.num-chunk-per-minibatch '128,64' \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh new file mode 100755 index 00000000000..5149e5a54e8 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -0,0 +1,317 @@ +#!/bin/bash + +# this is as run_tdnn_lstm_1a.sh, but changing +# frames_per_chunk 150 to 140,100,160 +# and --trainer.num-chunk-per-minibatch from 128 to 128,64 +# and adding +# --egs.chunk-left-context-initial=0 +# and --egs.chunk-right-context-final=0 + + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1b #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh new file mode 100755 index 00000000000..bb3c5b1a942 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -0,0 +1,279 @@ +#!/bin/bash + +# 1c is as 1b, but adding the option --slow-start true. [since removed; it +# takes half the param change from the first two minibatches of each +# job]. The difference is probably just random noise. + + +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_lstm1b_sp_bi exp/chain_cleaned/tdnn_lstm1c_sp_bi +# System tdnn_lstm1b_sp_bi tdnn_lstm1c_sp_bi +# WER on dev(orig) 9.1 8.9 +# WER on dev(rescored) 8.4 8.2 +# WER on test(orig) 8.9 8.9 +# WER on test(rescored) 8.4 8.5 +# Final train prob -0.0621 -0.0620 +# Final valid prob -0.0799 -0.0811 +# Final train prob (xent) -0.8300 -0.8117 +# Final valid prob (xent) -0.9500 -0.9448 + + + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1c #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.slow-start true \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh new file mode 100755 index 00000000000..28ca16d939c --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -0,0 +1,312 @@ +#!/bin/bash + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1d #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=40 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=40 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=40 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh new file mode 100755 index 00000000000..5bfdc68fa3f --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -0,0 +1,314 @@ +#!/bin/bash + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1e #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh new file mode 100755 index 00000000000..ed778713907 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -0,0 +1,314 @@ +#!/bin/bash + +# 1f is as 1b, but increasing decay-time from 40 to 80. [see also 1e, at 20.] + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1f #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=80 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=80 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=80 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh new file mode 100755 index 00000000000..bbc17c77aea --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +####################### +# 1g is as 1e, but reducing decay-time further from 20 to 10. +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1g #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=10 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=10 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=10 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh new file mode 100755 index 00000000000..8ffd43f27bc --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +####################### +# 1h is as 1e, but increasing decay-time from to to 30. +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1h #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=30 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=30 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=30 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/run.sh b/egs/tedlium/s5_r2/run.sh index 19bc92a738c..754cec0494d 100755 --- a/egs/tedlium/s5_r2/run.sh +++ b/egs/tedlium/s5_r2/run.sh @@ -185,7 +185,7 @@ fi if [ $stage -le 17 ]; then # This will only work if you have GPUs on your system (and note that it requires # you to have the queue set up the right way... see kaldi-asr.org/doc/queue.html) - local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" + local/chain/run_tdnn.sh fi # The nnet3 TDNN recipe: diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py index 11db977f7a2..85a23936514 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py @@ -456,12 +456,15 @@ def generate_lstm_config(self): # lstm-nonlinearity-options=' max-change=0.75 ' [Options string to pass into the LSTM nonlinearity component.] # ng-affine-options=' max-change=1.5 ' [Additional options used for the full matrices in the LSTM, can be used to # do things like set biases to initialize to 1] -# max-cell-value=-1 [If >0, an approximate maximum on the contents of the cell (c_t); -# enforced by putting a scaling factor of -# recurrence_scale = 1 - abs(delay)/max_cell_value -# on the recurrence, i.e. the term c_{t-1} in the LSTM equations. -# E.g. setting this to 50 means the activations can't get bigger -# than about 50.] +# decay-time=-1 [If >0, an approximate maximum on how many frames +# can be remembered via summation into the cell +# contents c_t; enforced by putting a scaling factor +# of recurrence_scale = 1 - abs(delay)/decay_time on +# the recurrence, i.e. the term c_{t-1} in the LSTM +# equations. E.g. setting this to 20 means no more +# than about 20 frames' worth of history, +# i.e. history since about t = t-20, can be +# accumulated in c_t.] class XconfigFastLstmLayer(XconfigLayerBase): def __init__(self, first_token, key_to_value, prev_names = None): assert first_token == "fast-lstm-layer" @@ -482,7 +485,7 @@ def set_default_configs(self): # the affine layer contains 4 of our old layers -> use a # larger max-change than the normal value of 0.75. 'ng-affine-options' : ' max-change=1.5', - 'max-cell-value': -1.0 + 'decay-time': -1.0 } self.c_needed = False # keep track of whether the 'c' output is needed. @@ -544,11 +547,11 @@ def generate_lstm_config(self): cell_dim = self.config['cell-dim'] delay = self.config['delay'] affine_str = self.config['ng-affine-options'] - max_cell_value = self.config['max-cell-value'] - # we expect max_cell_value to be either -1, or large, like 10 or 50. - recurrence_scale = (1.0 if max_cell_value < 0 else - 1.0 - (abs(delay) / max_cell_value)) - assert recurrence_scale > 0 # or user may have set max-cell-value much + decay_time = self.config['decay-time'] + # we expect decay_time to be either -1, or large, like 10 or 50. + recurrence_scale = (1.0 if decay_time < 0 else + 1.0 - (abs(delay) / decay_time)) + assert recurrence_scale > 0 # or user may have set decay-time much # too small. lstm_str = self.config['lstm-nonlinearity-options'] bptrunc_str = ("clipping-threshold={0}" @@ -622,12 +625,15 @@ def generate_lstm_config(self): # lstm-nonlinearity-options=' max-change=0.75 ' [Options string to pass into the LSTM nonlinearity component.] # ng-affine-options=' max-change=1.5 ' [Additional options used for the full matrices in the LSTM, can be used to # do things like set biases to initialize to 1] -# max-cell-value=-1 [If >0, an approximate maximum on the contents of the cell (c_t); -# enforced by putting a scaling factor of -# recurrence_scale = 1 - abs(delay)/max_cell_value -# on the recurrence, i.e. the term c_{t-1} in the LSTM equations. -# E.g. setting this to 50 means the activations can't get bigger -# than about 50.] +# decay-time=-1 [If >0, an approximate maximum on how many frames +# can be remembered via summation into the cell +# contents c_t; enforced by putting a scaling factor +# of recurrence_scale = 1 - abs(delay)/decay_time on +# the recurrence, i.e. the term c_{t-1} in the LSTM +# equations. E.g. setting this to 20 means no more +# than about 20 frames' worth of history, +# i.e. history since about t = t-20, can be +# accumulated in c_t.] class XconfigFastLstmpLayer(XconfigLayerBase): def __init__(self, first_token, key_to_value, prev_names = None): assert first_token == "fast-lstmp-layer" @@ -648,7 +654,7 @@ def set_default_configs(self): # the affine layer contains 4 of our old layers -> use a # larger max-change than the normal value of 0.75. 'ng-affine-options' : ' max-change=1.5', - 'max-cell-value': -1.0, + 'decay-time': -1.0, 'zeroing-interval' : 20, 'zeroing-threshold' : 15.0 } @@ -727,11 +733,11 @@ def generate_lstm_config(self): rec_proj_dim = self.config['recurrent-projection-dim'] nonrec_proj_dim = self.config['non-recurrent-projection-dim'] affine_str = self.config['ng-affine-options'] - max_cell_value = self.config['max-cell-value'] - # we expect max_cell_value to be either -1, or large, like 10 or 50. - recurrence_scale = (1.0 if max_cell_value < 0 else - 1.0 - (abs(delay) / max_cell_value)) - assert recurrence_scale > 0 # or user may have set max-cell-value much + decay_time = self.config['decay-time'] + # we expect decay_time to be either -1, or large, like 10 or 50. + recurrence_scale = (1.0 if decay_time < 0 else + 1.0 - (abs(delay) / decay_time)) + assert recurrence_scale > 0 # or user may have set decay-time much # too small. bptrunc_str = ("clipping-threshold={0}" From f15be6387b0a656a7d91ff7de76bf57a3db2ee5a Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Fri, 6 Jan 2017 21:48:36 +0100 Subject: [PATCH 210/530] [scripts] minor updates of scripts in nnet1 (#1318) --- egs/wsj/s5/steps/conf/append_eval_to_ctm.py | 21 ++++++++++++++++----- egs/wsj/s5/steps/nnet/align.sh | 8 +++++--- egs/wsj/s5/steps/nnet/make_bn_feats.sh | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/egs/wsj/s5/steps/conf/append_eval_to_ctm.py b/egs/wsj/s5/steps/conf/append_eval_to_ctm.py index 3a35f5a9281..f8e2aad891d 100755 --- a/egs/wsj/s5/steps/conf/append_eval_to_ctm.py +++ b/egs/wsj/s5/steps/conf/append_eval_to_ctm.py @@ -38,11 +38,11 @@ assert(tag == 'op') hyp_vec = hyp_vec.split() op_vec = op_vec.split() - # Fill create eval vector with symbols 'C', 'S', 'I', + # Fill create eval vector with symbols 'C', 'S', 'I' assert(utt not in eval_vec) eval_vec[utt] = [] for op,hyp in zip(op_vec, hyp_vec): - if hyp != '': eval_vec[utt].append(op) + if op != 'D': eval_vec[utt].append((op,hyp)) # Load the 'ctm' into dictionary, ctm = dict() @@ -56,9 +56,20 @@ ctm_eval = [] for utt,ctm_part in ctm.iteritems(): ctm_part.sort(key = operator.itemgetter(2)) # Sort by 'beg' time, - # extending the 'tuple' by '+': - merged = [ tup + (evl,) for tup,evl in zip(ctm_part,eval_vec[utt]) ] - ctm_eval.extend(merged) + try: + # merging 'tuples' by '+', the record has format: + # (utt, ch, beg, dur, ctm_wrd, conf, op, hyp_wrd) + merged = [ ctm_tup + evl_tup for ctm_tup,evl_tup in zip(ctm_part,eval_vec[utt]) ] + # check, + for j in range(len(merged)): + hyp_wrd = merged[j][-1] + ctm_wrd = merged[j][-4] + assert hyp_wrd == ctm_wrd, "We failed with words: hyp_wrd %s, ctm_wrd %s" % (hyp_wrd,ctm_wrd) # Check that words in 'ctm' and 'utt_stats' match! + merged[j] = merged[j][:-1] # dropping the 'hyp_wrd' (the last element of tuple), + # append, + ctm_eval.extend(merged) + except KeyError: + print 'Missing key', utt, 'in the word-evaluation stats from scoring' # Sort again, ctm_eval.sort(key = operator.itemgetter(0,1,2)) diff --git a/egs/wsj/s5/steps/nnet/align.sh b/egs/wsj/s5/steps/nnet/align.sh index 55c00fca9fb..146cf4659aa 100755 --- a/egs/wsj/s5/steps/nnet/align.sh +++ b/egs/wsj/s5/steps/nnet/align.sh @@ -15,6 +15,7 @@ beam=10 retry_beam=40 nnet_forward_opts="--no-softmax=true --prior-scale=1.0" ivector= # rx-specifier with i-vectors (ark-with-vectors), +text= # (optional) transcipts we align to, align_to_lats=false # optionally produce alignment in lattice format lats_decode_opts="--acoustic-scale=0.1 --beam=20 --lattice_beam=10" @@ -62,9 +63,10 @@ feature_transform=$srcdir/final.feature_transform model=$dir/final.mdl # Check that files exist -for f in $sdata/1/feats.scp $sdata/1/text $lang/L.fst $nnet $model $feature_transform $class_frame_counts; do +for f in $sdata/1/feats.scp $lang/L.fst $nnet $model $feature_transform $class_frame_counts; do [ ! -f $f ] && echo "$0: missing file $f" && exit 1; done +[ -z "$text" -a ! -f $sdata/1/text ] && echo "$0: missing file $f" && exit 1 # PREPARE FEATURE EXTRACTION PIPELINE @@ -113,11 +115,11 @@ echo "$0: aligning data '$data' using nnet/model '$srcdir', putting alignments i # Map oovs in reference transcription, oov=`cat $lang/oov.int` || exit 1; -tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|"; +[ -z "$text" ] && text=$sdata/JOB/text +tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $text |"; # We could just use align-mapped in the next line, but it's less efficient as it compiles the # training graphs one by one. if [ $stage -le 0 ]; then - train_graphs="ark:compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/final.mdl $lang/L.fst '$tra' ark:- |" $cmd JOB=1:$nj $dir/log/align.JOB.log \ compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \ align-compiled-mapped $scale_opts --beam=$beam --retry-beam=$retry_beam $dir/final.mdl ark:- \ diff --git a/egs/wsj/s5/steps/nnet/make_bn_feats.sh b/egs/wsj/s5/steps/nnet/make_bn_feats.sh index 83a2a5fc159..1da96f796e6 100755 --- a/egs/wsj/s5/steps/nnet/make_bn_feats.sh +++ b/egs/wsj/s5/steps/nnet/make_bn_feats.sh @@ -44,7 +44,7 @@ bnfeadir=$5 # copy the dataset metadata from srcdata. mkdir -p $data $logdir $bnfeadir || exit 1; -utils/copy_data_dir.sh $srcdata $data; rm $data/{feats,cmvn}.scp 2>/dev/null +utils/copy_data_dir.sh $srcdata $data; rm -f $data/{feats,cmvn}.scp 2>/dev/null # make $bnfeadir an absolute pathname. [ '/' != ${bnfeadir:0:1} ] && bnfeadir=$PWD/$bnfeadir From 36c8a7b08daf43c947354f55e98573df55c9fe0c Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 6 Jan 2017 17:55:27 -0500 Subject: [PATCH 211/530] Add timing info to looped compilation --- src/nnet3/nnet-compile-looped.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/nnet3/nnet-compile-looped.cc b/src/nnet3/nnet-compile-looped.cc index 62f29762580..1237ba6ce1e 100644 --- a/src/nnet3/nnet-compile-looped.cc +++ b/src/nnet3/nnet-compile-looped.cc @@ -268,6 +268,7 @@ static bool CompileLoopedInternal( const ComputationRequest &request3, int32 num_requests, NnetComputation *computation) { + KALDI_ASSERT(num_requests >= 3); std::vector extra_requests(num_requests - 3); const ComputationRequest *prev_request = &request2; @@ -312,11 +313,15 @@ void CompileLooped(const Nnet &nnet, int32 num_requests1 = 5, factor = 2, max_requests = 100, num_requests; + Timer timer; + for (num_requests = num_requests1; num_requests <= max_requests; num_requests *= factor) { if (CompileLoopedInternal(nnet, optimize_opts, request1, request2, request3, num_requests, computation)) { + KALDI_LOG << "Spent " << timer.Elapsed() + << " seconds in looped nnet3 compilation."; return; } else { KALDI_VLOG(2) << "Looped compilation failed with " From 735b2b1490c41c3ca107ee349ff9416170c298eb Mon Sep 17 00:00:00 2001 From: "Z. Stanley Guan" Date: Fri, 6 Jan 2017 19:54:48 -0500 Subject: [PATCH 212/530] [egs] minor fix in fisher_swbd/s5/local/chain/run_blstm_6h.sh (#1320) --- egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh index b4b830bd57b..70a4cb23583 100644 --- a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh +++ b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh @@ -107,8 +107,6 @@ if [ $stage -le 13 ]; then /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage fi - touch $dir/egs/.nodelete # keep egs around when that run dies. - steps/nnet3/chain/train.py --stage $train_stage \ --cmd "$decode_cmd" \ --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ From a4b209159367cb9766953b4fb54e385daaa7ee62 Mon Sep 17 00:00:00 2001 From: Tom Ko Date: Sun, 8 Jan 2017 04:37:25 +0800 Subject: [PATCH 213/530] added reverberation based augmentation recipe for TDNN+LSTM model on AMI; a fix to aspire recipe (#1314) * added reverberated TDNN+LSTM recipe on AMI; a fix to aspire recipe * added script version number --- egs/ami/s5b/RESULTS_ihm | 9 +- egs/ami/s5b/RESULTS_mdm | 9 +- egs/ami/s5b/RESULTS_sdm | 14 +- .../local/chain/multi_condition/run_tdnn.sh | 6 +- .../chain/multi_condition/run_tdnn_lstm.sh | 1 + .../tuning/run_tdnn_lstm_1a.sh | 334 ++++++++++++++++++ egs/ami/s5b/local/chain/run_tdnn_lstm.sh | 1 + .../s5/local/chain/tuning/run_tdnn_7b.sh | 5 +- 8 files changed, 368 insertions(+), 11 deletions(-) create mode 120000 egs/ami/s5b/local/chain/multi_condition/run_tdnn_lstm.sh create mode 100755 egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh create mode 120000 egs/ami/s5b/local/chain/run_tdnn_lstm.sh diff --git a/egs/ami/s5b/RESULTS_ihm b/egs/ami/s5b/RESULTS_ihm index 484115c3243..44234fc3fd9 100644 --- a/egs/ami/s5b/RESULTS_ihm +++ b/egs/ami/s5b/RESULTS_ihm @@ -69,7 +69,7 @@ for d in exp/ihm/chain/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best %WER 22.4 | 13098 94476 | 80.4 10.4 9.2 2.8 22.4 54.6 | 0.069 | exp/ihm/chain/tdnn_sp_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys %WER 22.5 | 12643 89974 | 80.0 12.1 7.9 2.6 22.5 52.8 | 0.157 | exp/ihm/chain/tdnn_sp_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys -# local/chain/multi_condition/run_tdnn.sh --mic ihm & +# local/chain/multi_condition/run_tdnn.sh --mic ihm # cleanup + chain TDNN model + IHM reverberated data # for d in exp/ihm/chain_cleaned_rvb/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done %WER 21.5 | 13098 94486 | 81.8 11.0 7.2 3.3 21.5 54.6 | 0.090 | exp/ihm/chain_cleaned_rvb/tdnn_sp_rvb_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys @@ -80,3 +80,10 @@ for d in exp/ihm/chain/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best # cleanup + chain TDNN+LSTM model %WER 20.8 | 13098 94489 | 82.0 10.0 8.0 2.8 20.8 53.2 | -0.096 | exp/ihm/chain_cleaned/tdnn_lstm1i_sp_bi_ld5/decode_dev/ascore_11/dev_hires.ctm.filt.sys %WER 20.7 | 12643 89980 | 81.7 11.5 6.8 2.5 20.7 51.8 | 0.015 | exp/ihm/chain_cleaned/tdnn_lstm1i_sp_bi_ld5/decode_eval/ascore_11/eval_hires.ctm.filt.sys + + +# local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh --mic ihm +# cleanup + chain TDNN+LSTM model + IHM reverberated data +%WER 19.4 | 13098 94479 | 83.8 10.0 6.1 3.2 19.4 51.8 | -0.168 | exp/ihm/chain_cleaned_rvb/tdnn_lstm1i_sp_rvb_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys +%WER 19.3 | 12643 89977 | 83.3 11.0 5.7 2.6 19.3 49.6 | -0.046 | exp/ihm/chain_cleaned_rvb/tdnn_lstm1i_sp_rvb_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys + diff --git a/egs/ami/s5b/RESULTS_mdm b/egs/ami/s5b/RESULTS_mdm index 80eb152fc5d..f27da5773ac 100644 --- a/egs/ami/s5b/RESULTS_mdm +++ b/egs/ami/s5b/RESULTS_mdm @@ -86,6 +86,13 @@ # local/chain/tuning/run_tdnn_lstm_1i.sh --mic mdm8 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned # cleanup + chain TDNN+LSTM model, MDM audio and alignments from IHM data -# *** best system *** %WER 34.6 | 15116 94508 | 69.6 17.6 12.9 4.1 34.6 62.3 | 0.687 | exp/mdm8/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys %WER 37.1 | 14343 90002 | 66.3 18.8 14.9 3.4 37.1 62.3 | 0.659 | exp/mdm8/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys + + +# local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh --mic mdm8 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned +# cleanup + chain TDNN+LSTM model, MDM original + IHM reverberated data, alignments from IHM data +# *** best system *** +%WER 31.8 | 14488 94497 | 71.8 15.4 12.8 3.5 31.8 62.7 | 0.698 | exp/mdm8/chain_cleaned_rvb/tdnn_lstm1i_sp_rvb_bi_ihmali/decode_dev/ascore_10/dev_hires_o4.ctm.filt.sys +%WER 34.2 | 13832 89953 | 68.8 16.5 14.7 3.0 34.2 62.4 | 0.669 | exp/mdm8/chain_cleaned_rvb/tdnn_lstm1i_sp_rvb_bi_ihmali/decode_eval/ascore_10/eval_hires_o4.ctm.filt.sys + diff --git a/egs/ami/s5b/RESULTS_sdm b/egs/ami/s5b/RESULTS_sdm index 5ff1f934a3f..05b68e5e780 100644 --- a/egs/ami/s5b/RESULTS_sdm +++ b/egs/ami/s5b/RESULTS_sdm @@ -79,16 +79,22 @@ %WER 40.7 | 14549 94520 | 63.6 21.4 15.0 4.3 40.7 66.2 | 0.617 | exp/sdm1/chain/tdnn_sp_bi_ihmali/decode_dev/ascore_8/dev_hires_o4.ctm.filt.sys %WER 45.1 | 13296 89971 | 59.1 23.4 17.6 4.2 45.1 69.5 | 0.591 | exp/sdm1/chain/tdnn_sp_bi_ihmali/decode_eval/ascore_8/eval_hires_o4.ctm.filt.sys -# local/chain/multi_condition/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned & +# local/chain/multi_condition/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned # cleanup + chain TDNN model, SDM original + IHM reverberated data, alignments from ihm data. -# *** best system *** # for d in exp/sdm1/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done %WER 38.6 | 14760 94502 | 65.3 19.3 15.4 3.9 38.6 64.9 | 0.599 | exp/sdm1/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys %WER 42.7 | 14070 89982 | 60.9 21.0 18.0 3.6 42.7 64.5 | 0.571 | exp/sdm1/chain_cleaned_rvb/tdnn_sp_rvb_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys # local/chain/tuning/run_tdnn_lstm_1i.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned -# cleanup + chain TDNN model, SDM audio + alignments from ihm data. -# *** best system *** +# cleanup + chain TDNN+LSTM model, SDM audio + alignments from ihm data. %WER 37.6 | 15122 94495 | 66.1 18.7 15.1 3.7 37.6 63.2 | 0.646 | exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/decode_dev/ascore_10/dev_hires_o4.ctm.filt.sys %WER 40.9 | 13807 89961 | 62.4 20.0 17.6 3.3 40.9 65.7 | 0.612 | exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/decode_eval/ascore_10/eval_hires_o4.ctm.filt.sys + + +# local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned +# cleanup + chain TDNN+LSTM model, SDM original + IHM reverberated data, alignments from ihm data. +# *** best system *** +%WER 34.0 | 14455 94497 | 69.8 17.7 12.5 3.8 34.0 63.9 | 0.675 | exp/sdm1/chain_cleaned_rvb/tdnn_lstm1i_sp_rvb_bi_ihmali/decode_dev/ascore_10/dev_hires_o4.ctm.filt.sys +%WER 37.5 | 13261 89982 | 65.9 19.3 14.7 3.5 37.5 66.2 | 0.642 | exp/sdm1/chain_cleaned_rvb/tdnn_lstm1i_sp_rvb_bi_ihmali/decode_eval/ascore_10/eval_hires_o4.ctm.filt.sys + diff --git a/egs/ami/s5b/local/chain/multi_condition/run_tdnn.sh b/egs/ami/s5b/local/chain/multi_condition/run_tdnn.sh index 617336236ed..0f6abaf94c1 100755 --- a/egs/ami/s5b/local/chain/multi_condition/run_tdnn.sh +++ b/egs/ami/s5b/local/chain/multi_condition/run_tdnn.sh @@ -1,9 +1,9 @@ #!/bin/bash # This is a chain-training script with TDNN neural networks. -# This script is based on local/chain/run_tdnn.sh, but adding +# This script is based on local/chain/tuning/run_tdnn_1a.sh, but adding # the reverberated IHM data into the train set. -# This script obtains better results on both IHM and SDM tasks. +# This script obtains better results on IHM, SDM and MDM tasks. # Please see RESULTS_* for examples of command lines invoking this script. @@ -20,7 +20,7 @@ stage=1 mic=ihm nj=30 min_seg_len=1.55 -use_ihm_ali=true +use_ihm_ali=false train_set=train_cleaned gmm=tri3_cleaned # the gmm for the target data ihm_gmm=tri3_cleaned # the gmm for the IHM system (if --use-ihm-ali true). diff --git a/egs/ami/s5b/local/chain/multi_condition/run_tdnn_lstm.sh b/egs/ami/s5b/local/chain/multi_condition/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/ami/s5b/local/chain/multi_condition/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..2869049843f --- /dev/null +++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,334 @@ +#!/bin/bash + +# This is a chain-training script with TDNN+LSTM neural networks. +# This script is based on local/chain/tuning/run_tdnn_lstm_1i.sh, but adding +# the reverberated IHM data into the train set. +# This script obtains better results on IHM, SDM and MDM tasks. + +# Please see RESULTS_* for examples of command lines invoking this script. + +# local/chain/multi_condition/run_tdnn_lstm.sh --mic ihm --train-set train_cleaned --gmm tri3_cleaned & +# local/chain/multi_condition/run_tdnn_lstm.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned & +# local/chain/multi_condition/run_tdnn_lstm.sh --mic mdm8 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned & + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3_cleaned # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +num_data_reps=1 + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1i #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! $use_ihm_ali; then + [ "$mic" != "ihm" ] && \ + echo "$0: you cannot specify --use-ihm-ali false if the microphone is not ihm." && \ + exit 1; +else + [ "$mic" == "ihm" ] && \ + echo "$0: you must specify --use-ihm-ali false if the microphone is ihm." && \ + exit 1; +fi + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $original_lat_dir + rm $original_lat_dir/fsts.*.gz # save space + + lat_dir_ihmdata=exp/ihm/chain${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats + + mkdir -p $lat_dir/temp/ + mkdir -p $lat_dir/temp2/ + lattice-copy "ark:gunzip -c $original_lat_dir/lat.*.gz |" ark,scp:$lat_dir/temp/lats.ark,$lat_dir/temp/lats.scp + lattice-copy "ark:gunzip -c $lat_dir_ihmdata/lat.*.gz |" ark,scp:$lat_dir/temp2/lats.ark,$lat_dir/temp2/lats.scp + + # copy the lattices for the reverberated data + rm -f $lat_dir/temp/combined_lats.scp + touch $lat_dir/temp/combined_lats.scp + cat $lat_dir/temp/lats.scp >> $lat_dir/temp/combined_lats.scp + for i in `seq 1 $num_data_reps`; do + cat $lat_dir/temp2/lats.scp | sed -e "s/^/rev${i}_/" >> $lat_dir/temp/combined_lats.scp + done + sort -u $lat_dir/temp/combined_lats.scp > $lat_dir/temp/combined_lats_sorted.scp + + lattice-copy scp:$lat_dir/temp/combined_lats_sorted.scp "ark:|gzip -c >$lat_dir/lat.1.gz" || exit 1; + echo "1" > $lat_dir/num_jobs + + # copy other files from original lattice dir + for f in cmvn_opts final.mdl splice_opts tree; do + cp $original_lat_dir/$f $lat_dir/$f + done +fi + + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}${rvb_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/run_tdnn_lstm.sh b/egs/ami/s5b/local/chain/run_tdnn_lstm.sh new file mode 120000 index 00000000000..43145248fbd --- /dev/null +++ b/egs/ami/s5b/local/chain/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1i.sh \ No newline at end of file diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh index d91d016a1f4..4bddb3e5955 100755 --- a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh +++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh @@ -100,6 +100,7 @@ if [ $stage -le 9 ]; then data/train data/train_temp_for_lats utils/data/combine_short_segments.sh \ data/train_temp_for_lats $min_seg_len data/train_min${min_seg_len} + steps/compute_cmvn_stats.sh data/train_min${min_seg_len} || exit 1; fi if [ $stage -le 10 ]; then @@ -112,8 +113,8 @@ if [ $stage -le 10 ]; then rm -f $lat_dir/fsts.*.gz # save space rvb_lat_dir=exp/tri5a_rvb_min${min_seg_len}_lats - #mkdir -p $rvb_lat_dir/temp/ - #lattice-copy "ark:gunzip -c $lat_dir/lat.*.gz |" ark,scp:$rvb_lat_dir/temp/lats.ark,$rvb_lat_dir/temp/lats.scp + mkdir -p $rvb_lat_dir/temp/ + lattice-copy "ark:gunzip -c $lat_dir/lat.*.gz |" ark,scp:$rvb_lat_dir/temp/lats.ark,$rvb_lat_dir/temp/lats.scp # copy the lattices for the reverberated data rm -f $rvb_lat_dir/temp/combined_lats.scp From db4e85bce8b9ed87c3c73c3d5cf37c9add5b2779 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 7 Jan 2017 15:41:00 -0500 Subject: [PATCH 214/530] Various bug fixes in scripts and code --- .../nnet3/train/frame_level_objf/common.py | 20 +++++-------- egs/wsj/s5/steps/nnet3/chain/get_egs.sh | 2 +- egs/wsj/s5/steps/nnet3/get_egs.sh | 11 ++++--- .../s5/steps/nnet3/get_egs_discriminative.sh | 14 ++++----- egs/wsj/s5/steps/nnet3/get_egs_targets.sh | 9 ++++-- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 11 ------- egs/wsj/s5/steps/nnet3/train_rnn.py | 14 ++------- egs/wsj/s5/utils/queue.pl | 9 +++--- src/nnet3/nnet-compute.cc | 29 +++++++++---------- src/nnet3/nnet-example-utils.cc | 1 + src/nnet3/nnet-example-utils.h | 3 -- src/nnet3bin/nnet3-get-egs.cc | 2 ++ 12 files changed, 52 insertions(+), 73 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index 11bde7ebce1..280cd5ffff8 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -134,8 +134,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, num_hidden_layers, add_layers_period, left_context, right_context, momentum, max_param_change, shuffle_buffer_size, - run_opts, - cv_minibatch_size_str='256', frames_per_eg=-1, + run_opts, frames_per_eg=-1, min_deriv_time=None, max_deriv_time_relative=None, shrinkage_value=1.0, get_raw_nnet_from_am=True, @@ -183,7 +182,6 @@ def train_one_iteration(dir, iter, srand, egs_dir, dir=dir, iter=iter, egs_dir=egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, - minibatch_size_str=cv_minibatch_size_str, get_raw_nnet_from_am=get_raw_nnet_from_am, wait=False, background_process_handler=background_process_handler) @@ -193,7 +191,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, left_context=left_context, right_context=right_context, run_opts=run_opts, - minibatch_size_str=cv_minibatch_size_str, wait=False, + wait=False, get_raw_nnet_from_am=get_raw_nnet_from_am, background_process_handler=background_process_handler) @@ -368,7 +366,7 @@ def compute_preconditioning_matrix(dir, egs_dir, num_lda_jobs, run_opts, def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, - right_context, run_opts, minibatch_size_str='1:256', + right_context, run_opts, wait=False, background_process_handler=None, get_raw_nnet_from_am=True): if get_raw_nnet_from_am: @@ -385,12 +383,11 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, nnet3-compute-prob "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ ark:{egs_dir}/valid_diagnostic.egs ark:- | \ - nnet3-merge-egs --minibatch-size={minibatch_size_str} ark:- \ + nnet3-merge-egs --minibatch-size=1:64 ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, iter=iter, context_opts=context_opts, - minibatch_size_str=minibatch_size_str, model=model, egs_dir=egs_dir), wait=wait, background_process_handler=background_process_handler) @@ -400,20 +397,18 @@ def compute_train_cv_probabilities(dir, iter, egs_dir, left_context, nnet3-compute-prob "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ ark:{egs_dir}/train_diagnostic.egs ark:- | \ - nnet3-merge-egs --minibatch-size={minibatch_size_str} ark:- \ + nnet3-merge-egs --minibatch-size=1:64 ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, iter=iter, context_opts=context_opts, - minibatch_size_str=minibatch_size_str, model=model, egs_dir=egs_dir), wait=wait, background_process_handler=background_process_handler) def compute_progress(dir, iter, egs_dir, left_context, right_context, - run_opts, minibatch_size_str=256, - background_process_handler=None, wait=False, + run_opts, background_process_handler=None, wait=False, get_raw_nnet_from_am=True): if get_raw_nnet_from_am: prev_model = "nnet3-am-copy --raw=true {0}/{1}.mdl - |".format( @@ -432,13 +427,12 @@ def compute_progress(dir, iter, egs_dir, left_context, right_context, nnet3-show-progress --use-gpu=no "{prev_model}" "{model}" \ "ark,bg:nnet3-copy-egs {context_opts} \ ark:{egs_dir}/train_diagnostic.egs ark:- | \ - nnet3-merge-egs --minibatch-size={minibatch_size_str} ark:- \ + nnet3-merge-egs --minibatch-size=1:64 ark:- \ ark:- |" """.format(command=run_opts.command, dir=dir, iter=iter, model=model, context_opts=context_opts, - minibatch_size_str=minibatch_size_str, prev_model=prev_model, egs_dir=egs_dir), wait=wait, background_process_handler=background_process_handler) diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh index 76c77a38c46..3ca2fc84627 100755 --- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh @@ -216,7 +216,7 @@ if [ ! -z "$online_ivector_dir" ]; then ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; echo $ivector_dim > $dir/info/ivector_dim ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; - ivector_opts="--online-ivectors=$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" + ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" else ivector_opts="" echo 0 >$dir/info/ivector_dim diff --git a/egs/wsj/s5/steps/nnet3/get_egs.sh b/egs/wsj/s5/steps/nnet3/get_egs.sh index 330f4d8c7d3..27877680982 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs.sh @@ -189,7 +189,7 @@ if [ ! -z "$online_ivector_dir" ]; then ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; echo $ivector_dim > $dir/info/ivector_dim ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; - ivector_opts="--online-ivectors=$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" + ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" else ivector_opts="" echo 0 >$dir/info/ivector_dim @@ -201,8 +201,11 @@ if [ $stage -le 1 ]; then echo $num_frames > $dir/info/num_frames echo "$0: working out feature dim" feats_one="$(echo $feats | sed s/JOB/1/g)" - feat_dim=$(feat-to-dim "$feats_one" -) || exit 1; - echo $feat_dim > $dir/info/feat_dim + if feat_dim=$(feat-to-dim "$feats_one" - 2>/dev/null); then + echo $feat_dim > $dir/info/feat_dim + else # run without redirection to show the error. + feat-to-dim "$feats_one" -; exit 1 + fi else num_frames=$(cat $dir/info/num_frames) || exit 1; feat_dim=$(cat $dir/info/feat_dim) || exit 1; @@ -220,7 +223,7 @@ if [ $num_archives -eq 1 ]; then echo "*** $0: warning: the --frames-per-eg is too large to generate one archive with" echo "*** as many as --samples-per-iter egs in it. Consider reducing --frames-per-eg." sleep 4 -done +fi # We may have to first create a smaller number of larger archives, with number # $num_archives_intermediate, if $num_archives is more than the maximum number diff --git a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh index 017fd12acee..fd616160632 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh @@ -207,7 +207,7 @@ if [ ! -z $online_ivector_dir ]; then ivector_period=$(cat $online_ivector_dir/ivector_period) ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; echo $ivector_dim >$dir/info/ivector_dim - ivector_opts="--online-ivectors=$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" + ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" else ivector_opts="" fi @@ -217,12 +217,12 @@ if [ $stage -le 2 ]; then num_frames=$(steps/nnet2/get_num_frames.sh $data) echo $num_frames > $dir/info/num_frames echo "$0: working out feature dim" - feats_one="$(echo $feats | sed s/JOB/1/g)" - feat_dim=$(feat-to-dim "$feats_one" -) || exit 1; - echo $feat_dim > $dir/info/feat_dim -else - num_frames=$(cat $dir/info/num_frames) || exit 1; - feat_dim=$(cat $dir/info/feat_dim) || exit 1; + feats_one="$(echo $feats | sed s:JOB:1:g)" + if feat_dim=$(feat-to-dim "$feats_one" - 2>/dev/null); then + echo $feat_dim > $dir/info/feat_dim + else # run without stderr redirection to show the error. + feat-to-dim "$feats_one" -; exit 1 + fi fi # Working out total number of archives. Add one on the assumption the diff --git a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh index 04830a4bc05..4af10e2dde1 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh @@ -182,7 +182,7 @@ if [ ! -z "$online_ivector_dir" ]; then ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; echo $ivector_dim > $dir/info/ivector_dim ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; - ivector_opts="--online-ivectors=$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" + ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" else ivector_opts="" echo 0 >$dir/info/ivector_dim @@ -194,8 +194,11 @@ if [ $stage -le 1 ]; then echo $num_frames > $dir/info/num_frames echo "$0: working out feature dim" feats_one="$(echo $feats | sed s:JOB:1:g)" - feat_dim=$(feat-to-dim "$feats_one" -) || exit 1; - echo $feat_dim > $dir/info/feat_dim + if feat_dim=$(feat-to-dim "$feats_one" - 2>/dev/null); then + echo $feat_dim > $dir/info/feat_dim + else # run without stderr redirection to show the error. + feat-to-dim "$feats_one" -; exit 1 + fi else num_frames=$(cat $dir/info/num_frames) || exit 1; feat_dim=$(cat $dir/info/feat_dim) || exit 1; diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index d0fb33a54c5..0380e92ae1e 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -105,13 +105,6 @@ def get_args(): measured by steps/nnet3/get_saturation.pl) exceeds this threshold we scale the parameter matrices with the shrink-value.""") - parser.add_argument("--trainer.optimization.cv-minibatch-size", type=str, - dest='cv_minibatch_size', default='256', - help="""Size of the minibatch to be used in diagnostic - jobs (use smaller value for BLSTMs to control memory - usage). May be a more general rule as accepted by the - --minibatch-size option of nnet3-merge-egs; run that - program without args to see the format.""") # RNN specific trainer options parser.add_argument("--trainer.rnn.num-chunk-per-minibatch", type=str, dest='num_chunk_per_minibatch', default='100', @@ -164,9 +157,6 @@ def process_args(args): if not common_train_lib.validate_minibatch_size_str(args.num_chunk_per_minibatch): raise Exception("--trainer.rnn.num-chunk-per-minibatch has an invalid value"); - if not common_train_lib.validate_minibatch_size_str(args.cv_minibatch_size): - raise Exception("--trainer.optimization.cv-minibatch-size has an invalid value"); - if args.chunk_left_context < 0: raise Exception("--egs.chunk-left-context should be non-negative") @@ -425,7 +415,6 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, - cv_minibatch_size_str=args.cv_minibatch_size, run_opts=run_opts, get_raw_nnet_from_am=False, background_process_handler=background_process_handler) diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index 2211e826088..5aad4d972ba 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -104,13 +104,6 @@ def get_args(): measured by steps/nnet3/get_saturation.pl) exceeds this threshold we scale the parameter matrices with the shrink-value.""") - parser.add_argument("--trainer.optimization.cv-minibatch-size", type=str, - dest='cv_minibatch_size', default='256', - help="""Size of the minibatch to be used in diagnostic - jobs (use smaller value for BLSTMs to control memory - usage). May be a more general rule as accepted by the - --minibatch-size option of nnet3-merge-egs; run that - program without args to see the format.""") # RNN specific trainer options parser.add_argument("--trainer.rnn.num-chunk-per-minibatch", type=str, dest='num_chunk_per_minibatch', default='100', @@ -160,9 +153,6 @@ def process_args(args): if not common_train_lib.validate_minibatch_size_str(args.num_chunk_per_minibatch): raise Exception("--trainer.rnn.num-chunk-per-minibatch has an invalid value"); - if not common_train_lib.validate_minibatch_size_str(args.cv_minibatch_size): - raise Exception("--trainer.optimization.cv-minibatch-size has an invalid value"); - if args.chunk_left_context < 0: raise Exception("--egs.chunk-left-context should be non-negative") @@ -308,7 +298,8 @@ def train(args, run_opts, background_process_handler): left_context_initial, right_context_final)) if args.chunk_width != frames_per_eg_str: raise Exception("mismatch between --egs.chunk-width and the frames_per_eg " - "in the egs dir {0} vs {1}".(args.chunk_width, frames_per_eg_str)) + "in the egs dir {0} vs {1}".format(args.chunk_width, + frames_per_eg_str)) if (args.num_jobs_final > num_archives): raise Exception('num_jobs_final cannot exceed the number of archives ' @@ -420,7 +411,6 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): momentum=args.momentum, max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, - cv_minibatch_size_str=args.cv_minibatch_size, run_opts=run_opts, background_process_handler=background_process_handler) diff --git a/egs/wsj/s5/utils/queue.pl b/egs/wsj/s5/utils/queue.pl index 69188ec074a..10fd3b1a885 100755 --- a/egs/wsj/s5/utils/queue.pl +++ b/egs/wsj/s5/utils/queue.pl @@ -91,10 +91,11 @@ () } sub caught_signal { - if ( defined $sge_job_id ) { # Signal trapped after submitting jobs - system ("qdel $sge_job_id"); - die "Caught a signal: $! , deleting SGE task: $sge_job_id and exiting\n"; - } + if ( defined $sge_job_id ) { # Signal trapped after submitting jobs + my $signal = $!; + system ("qdel $sge_job_id"); + die "Caught a signal: $signal , deleting SGE task: $sge_job_id and exiting\n"; + } } if (@ARGV < 2) { diff --git a/src/nnet3/nnet-compute.cc b/src/nnet3/nnet-compute.cc index d01327c8265..abda3646417 100644 --- a/src/nnet3/nnet-compute.cc +++ b/src/nnet3/nnet-compute.cc @@ -453,24 +453,19 @@ void NnetComputer::CheckNoPendingIo() { pending_commands_.push_back(program_counter_); program_counter_++; } - while (!pending_commands_.empty()) { + for (size_t i = 0; i < pending_commands_.size(); i++) { // the order here doesn't really matter; we go from back to front // as it's more efficient, not that efficiency really matters here. - int32 last_command = pending_commands_.back(); - if (c[last_command].command_type == kProvideOutput) { - // we can ignore that we didn't provide output to the user. - KALDI_VLOG(3) << "Output to node '" << nnet_.GetNodeName(c[last_command].arg2) - << "' was available but not used."; - pending_commands_.pop_back(); - } else { + int32 command = pending_commands_[i]; + if (c[command].command_type == kAcceptInput) { // we can't ignore if we needed input from the user that hasn't been // provided. - KALDI_ASSERT(c[last_command].command_type == kAcceptInput); - int32 node = c[last_command].arg2; - KALDI_ERR << "Cannot run computation because we did not get input for node '" + int32 node = c[command].arg2; + KALDI_ERR << "Cannot run computation-- we did not get input for node '" << nnet_.GetNodeName(node) << "'"; } } + pending_commands_.clear(); } int32 NnetComputer::GetIoMatrixIndex(const std::string &node_name, bool is_output) { @@ -481,9 +476,9 @@ int32 NnetComputer::GetIoMatrixIndex(const std::string &node_name, bool is_outpu // first make sure all the I/O commands that we immediately expect, are listed // in 'pending_commands_'. while (program_counter_ < static_cast(computation_.commands.size()) && - (c[program_counter_].command_type == kAcceptInput || - c[program_counter_].command_type == kProvideOutput || - c[program_counter_].command_type == kNoOperationMarker)) { + ((c[program_counter_].command_type == kAcceptInput || + c[program_counter_].command_type == kProvideOutput || + c[program_counter_].command_type == kNoOperationMarker))) { if (c[program_counter_].command_type != kNoOperationMarker) pending_commands_.push_back(program_counter_); program_counter_++; @@ -495,7 +490,11 @@ int32 NnetComputer::GetIoMatrixIndex(const std::string &node_name, bool is_outpu int32 this_submatrix_index = c[command].arg1, this_node_index = c[command].arg2; if (this_command_is_output == is_output && node_index == this_node_index) { - pending_commands_.erase(pending_commands_.begin() + i); + if (!is_output) { + pending_commands_.erase(pending_commands_.begin() + i); + // don't erase the command for outputs, as that would prevent things + // from being output twice, which is an unnecessary restriction. + } if (!(computation_.IsWholeMatrix(this_submatrix_index))) KALDI_ERR << "Getting input or output that is not a whole matrix " << "(probably some optimization code needs to be changed)"; diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc index 28578de42fb..7c3743c3a7f 100644 --- a/src/nnet3/nnet-example-utils.cc +++ b/src/nnet3/nnet-example-utils.cc @@ -820,6 +820,7 @@ void UtteranceSplitter::GetChunksForUtterance( config_.right_context_final : config_.right_context); t += chunk_sizes[i]; } + SetOutputWeights(utterance_length, chunk_info); AccStatsForUtterance(utterance_length, *chunk_info); // check that the end of the last chunk doesn't go more than // 'config_.frame_subsampling_factor - 1' frames past the end diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h index 021e91959e3..d1eb85b6d11 100644 --- a/src/nnet3/nnet-example-utils.h +++ b/src/nnet3/nnet-example-utils.h @@ -122,9 +122,6 @@ struct ExampleGenerationConfig { "frames of right context of input features that are added " "to each example at the end of the utterance (if <0, this " "defaults to the same as --right-context)"); - po->Register("right-context", &right_context, "Number of frames of right " - "context of input features that are added to each " - "example"); po->Register("num-frames", &num_frames_str, "Number of frames with labels " "that each example contains (i.e. the left and right context " "are to be added to this). May just be an integer (e.g. " diff --git a/src/nnet3bin/nnet3-get-egs.cc b/src/nnet3bin/nnet3-get-egs.cc index 562684c30ab..03623f02a07 100644 --- a/src/nnet3bin/nnet3-get-egs.cc +++ b/src/nnet3bin/nnet3-get-egs.cc @@ -126,6 +126,8 @@ static bool ProcessFile(const MatrixBase &feats, iter->second *= chunk.output_weights[i]; } + eg.io.push_back(NnetIo("output", num_pdfs, 0, labels)); + if (compress) eg.Compress(); From 5cdbd1879747d40d29f5099fd51270355102b92b Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 7 Jan 2017 12:48:51 -0800 Subject: [PATCH 215/530] [build] Makefile changes: print time for tests; dash-compatibility change (#1322) --- src/makefiles/default_rules.mk | 10 +++++++--- tools/Makefile | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/makefiles/default_rules.mk b/src/makefiles/default_rules.mk index a329ee86706..91da11a2aad 100644 --- a/src/makefiles/default_rules.mk +++ b/src/makefiles/default_rules.mk @@ -78,16 +78,20 @@ test: test_compile @{ result=0; \ for x in $(TESTFILES); do \ printf "Running $$x ..."; \ + timestamp1=$$(date +"%s"); \ ./$$x >$$x.testlog 2>&1; \ - if [ $$? -ne 0 ]; then \ - echo "... FAIL $$x"; \ + ret=$$? \ + timestamp2=$$(date +"%s"); \ + time_taken=$$[timestamp2-timestamp1]; \ + if [ $$ret -ne 0 ]; then \ + echo " $${time_taken}s... FAIL $$x"; \ result=1; \ if [ -n "$TRAVIS" ] && [ -f core ] && command -v gdb >/dev/null 2>&1; then \ gdb $$x core -ex "thread apply all bt" -batch >>$$x.testlog 2>&1; \ rm -rf core; \ fi; \ else \ - echo "... SUCCESS"; \ + echo " $${time_taken}s... SUCCESS"; \ rm -f $$x.testlog; \ fi; \ done; \ diff --git a/tools/Makefile b/tools/Makefile index 8fd0c7a02a9..9fdc35da402 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -86,7 +86,7 @@ else ifeq ($(OS),Windows_NT) cd openfst-$(OPENFST_VERSION)/; ./configure --prefix=`pwd` --enable-static --enable-shared --enable-far --enable-ngram-fsts CXX=$(CXX) CXXFLAGS="$(CXXFLAGS) -O -Wa,-mbig-obj" LDFLAGS="$(LDFLAGS)" LIBS="-ldl" else # ppc64le needs the newsted config.guess to be correctly indentified - [ "$(shell uname -p)" == "ppc64le" ] && wget -O openfst-$(OPENFST_VERSION)/config.guess \ + [ "$(shell uname -p)" = "ppc64le" ] && wget -O openfst-$(OPENFST_VERSION)/config.guess \ "http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD" || \ echo "config.guess unchanged" cd openfst-$(OPENFST_VERSION)/; ./configure --prefix=`pwd` --enable-static --enable-shared --enable-far --enable-ngram-fsts CXX=$(CXX) CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" LIBS="-ldl" From 27aaec62211c385a5df58893b6a4e5f6b9055f77 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 8 Jan 2017 15:08:52 -0500 Subject: [PATCH 216/530] Add ConstantComponent [better alternative to ConstantFunctionComponent, now deprecated] and test it. --- egs/tedlium/s5_r2/local/nnet3/compare_wer.sh | 51 +++++ src/nnet3/nnet-analyze.cc | 6 +- src/nnet3/nnet-compile.cc | 37 +++- src/nnet3/nnet-component-itf.cc | 2 + src/nnet3/nnet-computation-graph.cc | 8 +- src/nnet3/nnet-general-component.cc | 190 +++++++++++++++++++ src/nnet3/nnet-general-component.h | 92 +++++++++ src/nnet3/nnet-simple-component.h | 5 +- src/nnet3/nnet-test-utils.cc | 13 +- 9 files changed, 389 insertions(+), 15 deletions(-) create mode 100755 egs/tedlium/s5_r2/local/nnet3/compare_wer.sh diff --git a/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh new file mode 100755 index 00000000000..012ea702427 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh @@ -0,0 +1,51 @@ +#!/bin/bash + + +echo $0 $* + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi + +echo -n "System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +dirnames=(dev dev_rescore test test_rescore) +strings=("WER on dev(orig) " "WER on dev(rescored) " "WER on test(orig) " "WER on test(rescored)") + +for n in 0 1 2 3; do + echo -n "${strings[$n]}" + for x in $*; do + wer=$(grep Sum $x/decode_${dirnames[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n " [looped:] " + for x in $*; do + wer=$(grep Sum $x/decode_looped_${dirnames[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +echo -n "Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo diff --git a/src/nnet3/nnet-analyze.cc b/src/nnet3/nnet-analyze.cc index d76624b93a1..e53b1742e06 100644 --- a/src/nnet3/nnet-analyze.cc +++ b/src/nnet3/nnet-analyze.cc @@ -725,10 +725,10 @@ void ComputationChecker::CheckComputationIndexes() const { // note: input may be the empty matrix (in unusual circumstances, for non-simple // components). if (c.arg3 < 0 || c.arg3 >= num_submatrices || - (c.arg3 == 0 && !(properties & kSimpleComponent)) || + (c.arg3 == 0 && (properties & kSimpleComponent)) || c.arg4 < 1 || c.arg4 >= num_submatrices) - KALDI_ERR << "Sub-matrix indexes out of range."; - if (submatrices[c.arg3].num_cols != component->InputDim()) + KALDI_ERR << "Sub-matrix indexes out of range."; + if (c.arg3 > 0 && submatrices[c.arg3].num_cols != component->InputDim()) KALDI_ERR << "Input-dim mismatch."; if (submatrices[c.arg4].num_cols != component->OutputDim()) KALDI_ERR << "Input-dim mismatch."; diff --git a/src/nnet3/nnet-compile.cc b/src/nnet3/nnet-compile.cc index 729d07a46b3..4bc20cd1cee 100644 --- a/src/nnet3/nnet-compile.cc +++ b/src/nnet3/nnet-compile.cc @@ -51,6 +51,7 @@ void Compiler::CreateComputation(const CompilerOptions &opts, NnetComputation *computation) { computation->Clear(); ComputationGraphBuilder builder(nnet_, &graph_); + // note: there are only >1 segments in a 'looped' computation. for (size_t segment = 0; segment < requests_.size(); segment++) { builder.Compute(*(requests_[segment])); if (!builder.AllOutputsAreComputable()) { @@ -59,18 +60,23 @@ void Compiler::CreateComputation(const CompilerOptions &opts, } builder.Prune(); } - // see function declaration's comment for meaning of "phases". + // see function declaration's comment for more on the meaning of "phases" (a + // phase will later be decomposed into one or more steps). for each segment + // s, phases_per_segment[s] is a list of phases; each phase is a list of + // cindex_ids. std::vector > > phases_per_segment; ComputeComputationPhases(nnet_, graph_, &phases_per_segment); std::vector > steps; steps.reserve(1000); // maps each step to the segment in which it appears. in the normal case - // (non-online computation), a vector of all zeros. + // (non-looped computation), a vector of all zeros. std::vector step_to_segment; { + // note: this class will output to 'steps' and to 'cindex_id_to_location_'. + // it may incidentally change 'graph_' by adding a few cindexes. ComputationStepsComputer steps_computer(nnet_, &graph_, &steps, &cindex_id_to_location_); @@ -80,7 +86,8 @@ void Compiler::CreateComputation(const CompilerOptions &opts, while (step_to_segment.size() < steps.size()) step_to_segment.push_back(segment); - // save memory, by deleting the phases we just consumed. + // save memory, by deleting the phases we just consumed. the + // following two lines just exist to save memory. std::vector > temp; phases_per_segment[segment].swap(temp); } @@ -286,10 +293,23 @@ void Compiler::CreateStepInfo( for (int32 row_index = 0; row_index < num_ids; row_index++) this_info.output_indexes[row_index] = graph_.cindexes[this_info.output_cindex_ids[row_index]].second; - KALDI_ASSERT(num_ids > 0); - // node id's of all Cindexes are the same, so just use first one. - this_info.node_index = - graph_.cindexes[this_info.output_cindex_ids.front()].first; + if (num_ids > 0) { + // node id's of all Cindexes are the same, so just use first one. + this_info.node_index = + graph_.cindexes[this_info.output_cindex_ids.front()].first; + } else { + // it's possible to have an empty step if it's the component-input step of + // a GeneralComponent that does not always have dependencies, such as the + // ConstantFunctionComponent. This is just a kind of placeholder; it will + // generate no commands. The next command works because the next + // step will be the propagate for that Component, whose node-index is one + // more than the component-input node. + KALDI_ASSERT((step+1) < by_step->size() && !(*by_step)[step+1].empty()); + this_info.node_index = + graph_.cindexes[(*by_step)[step+1][0]].first - 1; + KALDI_ASSERT(this_info.node_index >= 0); + continue; // we don't need to do anything else for this step. + } const NetworkNode &node = nnet_.GetNode(this_info.node_index); int32 num_rows = num_ids, num_cols = node.Dim(nnet_); @@ -1077,7 +1097,8 @@ void Compiler::OutputDebugInfo(NnetComputation *computation) const { computation->matrix_debug_info.resize(num_matrices); for (int32 step = 0; step < num_steps; step++) { const StepInfo &step_info = steps_[step]; - KALDI_ASSERT(step_info.value != 0); + if (step_info.value == 0) + continue; // e.g. input step for ConstantComponent. if (!computation->IsWholeMatrix(step_info.value)) continue; int32 value_matrix = computation->submatrices[step_info.value].matrix_index; diff --git a/src/nnet3/nnet-component-itf.cc b/src/nnet3/nnet-component-itf.cc index 00dd802e091..23a8662a0d5 100644 --- a/src/nnet3/nnet-component-itf.cc +++ b/src/nnet3/nnet-component-itf.cc @@ -143,6 +143,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) { ans = new StatisticsPoolingComponent(); } else if (component_type == "ConstantFunctionComponent") { ans = new ConstantFunctionComponent(); + } else if (component_type == "ConstantComponent") { + ans = new ConstantComponent(); } else if (component_type == "DropoutComponent") { ans = new DropoutComponent(); } else if (component_type == "BackpropTruncationComponent") { diff --git a/src/nnet3/nnet-computation-graph.cc b/src/nnet3/nnet-computation-graph.cc index 1761dd1b775..7c20f3ae711 100644 --- a/src/nnet3/nnet-computation-graph.cc +++ b/src/nnet3/nnet-computation-graph.cc @@ -1596,6 +1596,12 @@ void ComputationStepsComputer::ProcessInputOrOutputStep( int32 ComputationStepsComputer::AddStep(const std::vector &cindexes, bool add_if_absent) { + // note: we can't assert that cindexes is nonempty, because it's possible for + // input steps for GeneralComponents to be empty if they require no input + // indexes; and because the compiler code expects component steps to be + // preceded by component-input steps, we can't just omit these empty steps. + // [note: a component-input step is about preparing the input for a component's + // propagation.] int32 step_index = steps_->size(); steps_->push_back(std::vector()); std::vector &step = steps_->back(); // vector of cindex_id. @@ -1639,7 +1645,6 @@ int32 ComputationStepsComputer::AddStep(const std::vector &cindexes, int32 ComputationStepsComputer::AddStep(std::vector *cindex_ids) { int32 step_index = steps_->size(); - KALDI_ASSERT(!cindex_ids->empty()); steps_->push_back(std::vector()); steps_->back().swap(*cindex_ids); std::vector::const_iterator iter = steps_->back().begin(), @@ -1769,6 +1774,7 @@ void ComputationStepsComputer::ProcessComponentStep( int32 c = *set_iter; input_step.push_back(graph_->cindexes[c]); } + // sort the input cindexes. std::sort(input_step.begin(), input_step.end()); diff --git a/src/nnet3/nnet-general-component.cc b/src/nnet3/nnet-general-component.cc index b6acbfd59bf..e3ef684edfd 100644 --- a/src/nnet3/nnet-general-component.cc +++ b/src/nnet3/nnet-general-component.cc @@ -1185,5 +1185,195 @@ void BackpropTruncationComponent::Add(BaseFloat alpha, num_zeroed_ += alpha * other->num_zeroed_; } + +std::string ConstantComponent::Info() const { + std::ostringstream stream; + stream << UpdatableComponent::Info() + << ", " << Type() + << ", output-dim=" << OutputDim() + << ", is-updatable=" << std::boolalpha << is_updatable_ + << ", use-natural-gradient=" << std::boolalpha + << use_natural_gradient_; + PrintParameterStats(stream, "output", output_, true); + return stream.str(); +} + +ConstantComponent::ConstantComponent(): + UpdatableComponent(), is_updatable_(true), + use_natural_gradient_(true) { } + +ConstantComponent::ConstantComponent( + const ConstantComponent &other): + UpdatableComponent(other), output_(other.output_), + is_updatable_(other.is_updatable_), + use_natural_gradient_(other.use_natural_gradient_), + preconditioner_(other.preconditioner_) { } + +void ConstantComponent::Propagate( + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in, + CuMatrixBase *out) const { + out->CopyRowsFromVec(output_); +} + +void ConstantComponent::Backprop( + const std::string &debug_info, + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &, // in_value + const CuMatrixBase &, // out_value + const CuMatrixBase &out_deriv, + Component *to_update_in, + CuMatrixBase *in_deriv) const { + // we don't update in_deriv, since we set the flag + // kBackpropAdds, and the output doesn't depend on the + // input, so the input-derivative is zero. + if (to_update_in) { + ConstantComponent *to_update = + dynamic_cast(to_update_in); + if (to_update->is_updatable_) { + // only do the update if the is_updatable_ flag is set. + KALDI_ASSERT(to_update && to_update->is_updatable_); + if (to_update->use_natural_gradient_ && !to_update->is_gradient_) { + CuMatrix out_deriv_copy(out_deriv); + BaseFloat scale = 1.0; + to_update->preconditioner_.PreconditionDirections(&out_deriv_copy, + NULL, &scale); + to_update->output_.AddRowSumMat(scale * to_update->learning_rate_, + out_deriv_copy); + } else { + to_update->output_.AddRowSumMat(to_update->learning_rate_, + out_deriv); + } + } + } +} + +void ConstantComponent::Read(std::istream &is, bool binary) { + std::string token; + ReadToken(is, binary, &token); + if (token == "") { + ReadToken(is, binary, &token); + } + if (token == "") { + ReadBasicType(is, binary, &learning_rate_factor_); + ReadToken(is, binary, &token); + } else { + learning_rate_factor_ = 1.0; + } + if (token == "") { + ReadBasicType(is, binary, &is_gradient_); + ReadToken(is, binary, &token); + } else { + is_gradient_ = false; + } + if (token == "") { + ReadBasicType(is, binary, &max_change_); + ReadToken(is, binary, &token); + } else { + max_change_ = 0.0; + } + if (token == "") { + ReadBasicType(is, binary, &learning_rate_); + ReadToken(is, binary, &token); + } else { + learning_rate_ = 0.001; + } + if (token != "") { + KALDI_ERR << "Expected token , got " << token; + } + output_.Read(is, binary); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &is_updatable_); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &use_natural_gradient_); + ExpectToken(is, binary, ""); +} + +void ConstantComponent::Write(std::ostream &os, bool binary) const { + WriteUpdatableCommon(os, binary); // Write the opening tag and learning rate + WriteToken(os, binary, ""); + output_.Write(os, binary); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, is_updatable_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, use_natural_gradient_); + WriteToken(os, binary, ""); +} + +Component* ConstantComponent::Copy() const { + return new ConstantComponent(*this); +} + +void ConstantComponent::Scale(BaseFloat scale) { + if (is_updatable_) + output_.Scale(scale); +} + +void ConstantComponent::Add(BaseFloat alpha, const Component &other_in) { + if (is_updatable_) { + const ConstantComponent *other = + dynamic_cast(&other_in); + KALDI_ASSERT(other != NULL); + output_.AddVec(alpha, other->output_); + } +} + +void ConstantComponent::SetZero(bool treat_as_gradient) { + if (treat_as_gradient) { + SetActualLearningRate(1.0); + is_gradient_ = true; + } + output_.SetZero(); +} + +void ConstantComponent::PerturbParams(BaseFloat stddev) { + CuVector temp_output(output_.Dim(), kUndefined); + temp_output.SetRandn(); + output_.AddVec(stddev, temp_output); +} + +BaseFloat ConstantComponent::DotProduct( + const UpdatableComponent &other_in) const { + KALDI_ASSERT(is_updatable_); + const ConstantComponent *other = + dynamic_cast(&other_in); + KALDI_ASSERT(other != NULL); + return VecVec(output_, other->output_); +} + +void ConstantComponent::InitFromConfig(ConfigLine *cfl) { + int32 output_dim = 0; + InitLearningRatesFromConfig(cfl); + bool ok = cfl->GetValue("output-dim", &output_dim); + cfl->GetValue("is-updatable", &is_updatable_); + cfl->GetValue("use-natural-gradient", &use_natural_gradient_); + BaseFloat output_mean = 0.0, output_stddev = 0.0; + cfl->GetValue("output-mean", &output_mean); + cfl->GetValue("output-stddev", &output_stddev); + if (!ok || cfl->HasUnusedValues() || output_dim <= 0) { + KALDI_ERR << "Bad initializer " << cfl->WholeLine(); + } + Vector output(output_dim); + output.SetRandn(); + output.Scale(output_stddev); + output.Add(output_mean); + output_ = output; +} + +int32 ConstantComponent::NumParameters() const { + KALDI_ASSERT(is_updatable_); + return output_.Dim(); +} + +void ConstantComponent::Vectorize(VectorBase *params) const { + params->CopyFromVec(output_); +} + +void ConstantComponent::UnVectorize(const VectorBase ¶ms) { + output_.CopyFromVec(params); +} + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-general-component.h b/src/nnet3/nnet-general-component.h index 3fc88076fc2..6e5e542fa13 100644 --- a/src/nnet3/nnet-general-component.h +++ b/src/nnet3/nnet-general-component.h @@ -580,6 +580,98 @@ class BackpropTruncationComponentPrecomputedIndexes: } }; + +// ConstantComponent returns a constant value for all requested +// indexes, and it has no dependencies on any input. +// It's like a ConstantFunctionComponent, but done the "right" +// way without requiring an unnecessary input. +// It is optionally trainable, and optionally you can use natural +// gradient. +class ConstantComponent: public UpdatableComponent { + public: + // actually this component requires no inputs; this value + // is really a don't-care. + virtual int32 InputDim() const { return output_.Dim(); } + + virtual int32 OutputDim() const { return output_.Dim(); } + + virtual std::string Info() const; + + // possible parameter values with their defaults: + // is-updatable=true use-natural-gradient=true output-dim=-1 + // output-mean=0 output-stddev=0 + virtual void InitFromConfig(ConfigLine *cfl); + + ConstantComponent(); + + ConstantComponent(const ConstantComponent &other); + + virtual std::string Type() const { return "ConstantComponent"; } + virtual int32 Properties() const { + return + (is_updatable_ ? kUpdatableComponent|kLinearInParameters : 0); + } + virtual void Propagate(const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in, + CuMatrixBase *out) const; + virtual void Backprop(const std::string &debug_info, + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &, // in_value + const CuMatrixBase &, // out_value + const CuMatrixBase &out_deriv, + Component *to_update, + CuMatrixBase *in_deriv) const; + + virtual void Read(std::istream &is, bool binary); + virtual void Write(std::ostream &os, bool binary) const; + + virtual Component* Copy() const; + + // Some functions that are only to be reimplemented for GeneralComponents. + virtual void GetInputIndexes(const MiscComputationInfo &misc_info, + const Index &output_index, + std::vector *desired_indexes) const { + desired_indexes->clear(); // requires no inputs. + } + + // This function returns true if at least one of the input indexes used to + // compute this output index is computable. + // it's simple because this component requires no inputs. + virtual bool IsComputable(const MiscComputationInfo &misc_info, + const Index &output_index, + const IndexSet &input_index_set, + std::vector *used_inputs) const { + if (used_inputs) used_inputs->clear(); + return true; + } + + // Some functions from base-class UpdatableComponent. + virtual void Scale(BaseFloat scale); + virtual void Add(BaseFloat alpha, const Component &other); + virtual void SetZero(bool treat_as_gradient); + virtual void PerturbParams(BaseFloat stddev); + virtual BaseFloat DotProduct(const UpdatableComponent &other) const; + virtual int32 NumParameters() const; + virtual void Vectorize(VectorBase *params) const; + virtual void UnVectorize(const VectorBase ¶ms); + private: + + // the output value-- a vector. + CuVector output_; + + bool is_updatable_; + // if true, and if updatable, do natural-gradient update. + bool use_natural_gradient_; + OnlineNaturalGradient preconditioner_; + + const ConstantComponent &operator + = (const ConstantComponent &other); // Disallow. +}; + + + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h index 1106fdc3246..f64e38925a3 100644 --- a/src/nnet3/nnet-simple-component.h +++ b/src/nnet3/nnet-simple-component.h @@ -1324,8 +1324,9 @@ class PerElementOffsetComponent: public UpdatableComponent { // i.e. its output does not depend on its input. It is the same as // an affine component with the linear term fixed at zero. // It is optionally trainable, and optionally you can use natural -// gradient. The input is required only because the framework -// requires components to have an input. +// gradient. The input is required only because it's more convenient +// to make SimpleComponents [but see ConstantComponent, which requires +// no inputs]. class ConstantFunctionComponent: public UpdatableComponent { public: virtual int32 InputDim() const { return input_dim_; } diff --git a/src/nnet3/nnet-test-utils.cc b/src/nnet3/nnet-test-utils.cc index 7ab46d1003e..18131aaa213 100644 --- a/src/nnet3/nnet-test-utils.cc +++ b/src/nnet3/nnet-test-utils.cc @@ -388,6 +388,11 @@ void GenerateConfigSequenceLstm( os << "input-node name=input dim=" << input_dim << std::endl; + // trainable cell value for start/end of file. + os << "component name=c0 type=ConstantComponent" + << " output-dim=" << cell_dim << std::endl; + + // Parameter Definitions W*(* replaced by - to have valid names) // Input gate control : Wi* matrices os << "component name=Wi-xr type=NaturalGradientAffineComponent" @@ -467,7 +472,13 @@ void GenerateConfigSequenceLstm( } std::string spliced_input = temp_string_stream.str(); - std::string c_tminus1 = "Sum(IfDefined(Offset(c1_t, -1)), IfDefined(Offset( c2_t, -1)))"; + std::string c_tminus1 = "Sum(Failover(Offset(c1_t, -1), c0), IfDefined(Offset( c2_t, -1)))"; + + + // c0. note: the input is never used as the component requires + // no input indexes; we just write itself as input to keep the + // structures happy. + os << "component-node name=c0 component=c0 input=c0\n"; // i_t os << "component-node name=i1 component=Wi-xr input=Append(" From e09e92f87dc7fc1ea13bcabe135c2a38692bd082 Mon Sep 17 00:00:00 2001 From: Xingyu Na Date: Mon, 9 Jan 2017 15:21:38 +0800 Subject: [PATCH 217/530] [egs] minor fixes for mandarin recipes (#1325) --- egs/gale_mandarin/s5/run.sh | 4 +--- egs/hkust/s5/local/nnet3/run_tdnn.sh | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/egs/gale_mandarin/s5/run.sh b/egs/gale_mandarin/s5/run.sh index 74e69e9d12a..fe9fdbdd483 100755 --- a/egs/gale_mandarin/s5/run.sh +++ b/egs/gale_mandarin/s5/run.sh @@ -54,7 +54,7 @@ mfccdir=mfcc # spread the mfccs over various machines, as this data-set is quite large. if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then - mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename. + mfcc=$(basename $mfccdir) # in case was absolute pathname (unlikely), get basename. utils/create_split_dir.pl /export/b{05,06,07,08}/$USER/kaldi-data/egs/gale_mandarin/s5/$mfcc/storage \ $mfccdir/storage fi @@ -203,5 +203,3 @@ local/split_wer_per_corpus.sh $galeData >> RESULTS echo training succedded exit 0 - - diff --git a/egs/hkust/s5/local/nnet3/run_tdnn.sh b/egs/hkust/s5/local/nnet3/run_tdnn.sh index 35bcc7d7512..30611446ee4 100755 --- a/egs/hkust/s5/local/nnet3/run_tdnn.sh +++ b/egs/hkust/s5/local/nnet3/run_tdnn.sh @@ -66,7 +66,7 @@ fi if [ $stage -le 9 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/hkust-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage fi steps/nnet3/train_dnn.py --stage=$train_stage \ From 722df9ae6e00baba5e0c227d3f5ecf3aa408e12f Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Mon, 9 Jan 2017 12:31:43 -0800 Subject: [PATCH 218/530] [build] Fix dash problems [set shell to /bin/bash in Makefile] --- src/makefiles/default_rules.mk | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/makefiles/default_rules.mk b/src/makefiles/default_rules.mk index 91da11a2aad..17f122622f1 100644 --- a/src/makefiles/default_rules.mk +++ b/src/makefiles/default_rules.mk @@ -1,3 +1,6 @@ + +SHELL := /bin/bash + ifeq ($(KALDI_FLAVOR), dynamic) ifeq ($(shell uname), Darwin) XLDLIBS := $(LDLIBS) From f646952b3f038e5dacb62171b0ded1ea5a0421c8 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Mon, 9 Jan 2017 14:23:21 -0800 Subject: [PATCH 219/530] [build] src/base/get_version.sh: change regexp syntax to support older bash versions. (#1327) --- src/base/get_version.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/base/get_version.sh b/src/base/get_version.sh index 4c1e8059059..94efd41b631 100755 --- a/src/base/get_version.sh +++ b/src/base/get_version.sh @@ -39,15 +39,10 @@ cd "$(dirname ${BASH_SOURCE[0]})" # Read the partial version number specified in the first line of src/.version. version=$(head -1 ../.version) -# Empty version number is not allowed. -if [ -z "$version" ]; then - version="?" -fi - if [ -e ../.short_version ]; then echo "$0: File src/.short_version exists." echo "$0: Stopping the construction of full version number from git history." -elif [[ $version != +([0-9]).+([0-9]) ]]; then +elif ! [[ $version =~ ^[0-9][0-9]*.[0-9][0-9]*$ ]]; then echo "$0: The version number \"$version\" specified in src/.version is not" \ "in MAJOR.MINOR format." echo "$0: Stopping the construction of full version number from git history." @@ -76,6 +71,11 @@ else version="$version-${head_commit_short}" fi +# Empty version number is not allowed. +if [ -z "$version" ]; then + version="?" +fi + # Write version info to a temporary file. temp=$(mktemp) trap 'rm -f "$temp"' EXIT From 5f15d253ba23c1a9f84649a50d1c830603458355 Mon Sep 17 00:00:00 2001 From: Skanda Koppula Date: Tue, 10 Jan 2017 15:45:33 -0500 Subject: [PATCH 220/530] [egs] cosmetic; small typo fix in sre08 i-vector DNN extraction (#1330) --- egs/sre08/v1/sid/train_ivector_extractor_dnn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh index 5e5881e358b..64579735376 100755 --- a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh +++ b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh @@ -119,7 +119,7 @@ if [ $stage -le -2 ]; then $dir/final.ubm $dir/0.ie || exit 1; fi -# Do Gaussian selection and posterior extracion +# Do Gaussian selection and posterior extraction if [ $stage -le -1 ]; then echo $nj_full > $dir/num_jobs From 69b616d4a516db195203c9cf9e5c1c6feb98cbbc Mon Sep 17 00:00:00 2001 From: LvHang Date: Wed, 11 Jan 2017 00:56:15 -0500 Subject: [PATCH 221/530] [src,egs,scripts] Remove sinusoid detection code and old SGMM code/scripts (#1329) --- egs/aurora4/s5/local/run_sgmm.sh | 113 - egs/babel/s5/local/decode_helper.sh | 9 - egs/babel/s5b/local/decode_helper.sh | 9 - egs/babel/s5c/local/decode_helper.sh | 9 - egs/gp/s1/local/gp_train_multi_sgmm_deltas.sh | 359 --- egs/gp/s1/path.sh | 2 +- egs/gp/s1/steps/decode_sgmm_deltas.sh | 162 -- egs/gp/s1/steps/train_sgmm_deltas.sh | 270 --- egs/gp/s5/path.sh | 2 +- egs/gp/s5/run.sh | 6 +- egs/lre07/v2/path.sh | 2 +- egs/rm/s5/local/run_pitch.sh | 1 - egs/rm/s5/local/run_sgmm.sh | 39 - egs/rm/s5/run.sh | 3 +- egs/sprakbanken/s5/local/run_sgmm.sh | 112 - egs/swbd/s5/local/run_sgmm.sh | 38 - egs/swbd/s5/run.sh | 1 - egs/swbd/s5/run_edin.sh | 2 +- egs/swbd/s5b/local/run_sgmm.sh | 38 - egs/vystadial_cz/online_demo/path.sh | 2 +- egs/wsj/s5/local/run_sgmm.sh | 112 - egs/wsj/s5/run.sh | 3 - egs/wsj/s5/steps/align_sgmm.sh | 198 -- egs/wsj/s5/steps/align_sgmm2.sh | 4 +- egs/wsj/s5/steps/decode_sgmm.sh | 266 -- egs/wsj/s5/steps/decode_sgmm2_fromlats.sh | 2 +- egs/wsj/s5/steps/decode_sgmm2_rescore.sh | 4 +- egs/wsj/s5/steps/decode_sgmm_fromlats.sh | 277 --- egs/wsj/s5/steps/decode_sgmm_rescore.sh | 108 - egs/wsj/s5/steps/make_denlats_sgmm.sh | 189 -- egs/wsj/s5/steps/tandem/align_sgmm.sh | 236 -- egs/wsj/s5/steps/tandem/decode_sgmm.sh | 303 --- egs/wsj/s5/steps/tandem/make_denlats_sgmm.sh | 199 -- egs/wsj/s5/steps/tandem/train_mmi_sgmm.sh | 193 -- egs/wsj/s5/steps/tandem/train_sgmm.sh | 315 --- egs/wsj/s5/steps/train_mmi_sgmm.sh | 156 -- egs/wsj/s5/steps/train_sgmm.sh | 280 --- src/Doxyfile | 4 +- src/Makefile | 19 +- src/decoder/Makefile | 2 +- src/doc/kaldi_for_dummies.dox | 2 +- src/feat/Makefile | 7 +- src/feat/sinusoid-detection-test.cc | 452 ---- src/feat/sinusoid-detection.cc | 945 -------- src/feat/sinusoid-detection.h | 436 ---- src/featbin/Makefile | 4 +- src/featbin/detect-sinusoids.cc | 113 - src/sgmm/Makefile | 18 - src/sgmm/am-sgmm-test.cc | 278 --- src/sgmm/am-sgmm.cc | 1395 ----------- src/sgmm/am-sgmm.h | 420 ---- src/sgmm/decodable-am-sgmm.cc | 72 - src/sgmm/decodable-am-sgmm.h | 119 - src/sgmm/estimate-am-sgmm-ebw.cc | 654 ----- src/sgmm/estimate-am-sgmm-ebw.h | 217 -- src/sgmm/estimate-am-sgmm-multi-test.cc | 154 -- src/sgmm/estimate-am-sgmm-multi.cc | 746 ------ src/sgmm/estimate-am-sgmm-multi.h | 146 -- src/sgmm/estimate-am-sgmm-test.cc | 161 -- src/sgmm/estimate-am-sgmm.cc | 2135 ----------------- src/sgmm/estimate-am-sgmm.h | 475 ---- src/sgmm/fmllr-sgmm-test.cc | 233 -- src/sgmm/fmllr-sgmm.cc | 554 ----- src/sgmm/fmllr-sgmm.h | 192 -- src/sgmm/sgmm-clusterable.cc | 280 --- src/sgmm/sgmm-clusterable.h | 112 - src/sgmmbin/Makefile | 31 - src/sgmmbin/init-ubm.cc | 95 - src/sgmmbin/sgmm-acc-fmllrbasis-ali.cc | 216 -- src/sgmmbin/sgmm-acc-stats-ali.cc | 191 -- src/sgmmbin/sgmm-acc-stats-gpost.cc | 174 -- src/sgmmbin/sgmm-acc-stats.cc | 211 -- src/sgmmbin/sgmm-acc-stats2.cc | 217 -- src/sgmmbin/sgmm-acc-tree-stats.cc | 185 -- src/sgmmbin/sgmm-align-compiled.cc | 179 -- src/sgmmbin/sgmm-build-tree.cc | 201 -- src/sgmmbin/sgmm-calc-distances.cc | 74 - src/sgmmbin/sgmm-cluster-phones.cc | 148 -- src/sgmmbin/sgmm-comp-prexform.cc | 84 - src/sgmmbin/sgmm-copy.cc | 74 - src/sgmmbin/sgmm-decode-faster.cc | 218 -- src/sgmmbin/sgmm-est-ebw.cc | 118 - src/sgmmbin/sgmm-est-fmllr-gpost.cc | 261 -- src/sgmmbin/sgmm-est-fmllr.cc | 318 --- src/sgmmbin/sgmm-est-fmllrbasis.cc | 93 - src/sgmmbin/sgmm-est-multi.cc | 233 -- src/sgmmbin/sgmm-est-spkvecs-gpost.cc | 223 -- src/sgmmbin/sgmm-est-spkvecs.cc | 257 -- src/sgmmbin/sgmm-est.cc | 172 -- src/sgmmbin/sgmm-gselect.cc | 125 - src/sgmmbin/sgmm-info.cc | 110 - src/sgmmbin/sgmm-init-from-tree-stats.cc | 147 -- src/sgmmbin/sgmm-init.cc | 111 - src/sgmmbin/sgmm-latgen-faster.cc | 271 --- src/sgmmbin/sgmm-latgen-simple.cc | 232 -- src/sgmmbin/sgmm-mixup.cc | 145 -- src/sgmmbin/sgmm-normalize.cc | 85 - src/sgmmbin/sgmm-post-to-gpost.cc | 190 -- src/sgmmbin/sgmm-rescore-lattice.cc | 165 -- src/sgmmbin/sgmm-sum-accs.cc | 69 - src/sgmmbin/sgmm-sum-tree-stats.cc | 100 - src/sgmmbin/sgmm-write-ubm.cc | 71 - src/tree/clusterable-classes.h | 4 - 103 files changed, 32 insertions(+), 20135 deletions(-) delete mode 100755 egs/aurora4/s5/local/run_sgmm.sh delete mode 100755 egs/gp/s1/local/gp_train_multi_sgmm_deltas.sh delete mode 100755 egs/gp/s1/steps/decode_sgmm_deltas.sh delete mode 100755 egs/gp/s1/steps/train_sgmm_deltas.sh delete mode 100755 egs/rm/s5/local/run_sgmm.sh delete mode 100755 egs/sprakbanken/s5/local/run_sgmm.sh delete mode 100755 egs/swbd/s5/local/run_sgmm.sh delete mode 100755 egs/swbd/s5b/local/run_sgmm.sh delete mode 100755 egs/wsj/s5/local/run_sgmm.sh delete mode 100755 egs/wsj/s5/steps/align_sgmm.sh delete mode 100755 egs/wsj/s5/steps/decode_sgmm.sh delete mode 100755 egs/wsj/s5/steps/decode_sgmm_fromlats.sh delete mode 100755 egs/wsj/s5/steps/decode_sgmm_rescore.sh delete mode 100755 egs/wsj/s5/steps/make_denlats_sgmm.sh delete mode 100755 egs/wsj/s5/steps/tandem/align_sgmm.sh delete mode 100755 egs/wsj/s5/steps/tandem/decode_sgmm.sh delete mode 100755 egs/wsj/s5/steps/tandem/make_denlats_sgmm.sh delete mode 100755 egs/wsj/s5/steps/tandem/train_mmi_sgmm.sh delete mode 100755 egs/wsj/s5/steps/tandem/train_sgmm.sh delete mode 100755 egs/wsj/s5/steps/train_mmi_sgmm.sh delete mode 100755 egs/wsj/s5/steps/train_sgmm.sh delete mode 100644 src/feat/sinusoid-detection-test.cc delete mode 100644 src/feat/sinusoid-detection.cc delete mode 100644 src/feat/sinusoid-detection.h delete mode 100644 src/featbin/detect-sinusoids.cc delete mode 100644 src/sgmm/Makefile delete mode 100644 src/sgmm/am-sgmm-test.cc delete mode 100644 src/sgmm/am-sgmm.cc delete mode 100644 src/sgmm/am-sgmm.h delete mode 100644 src/sgmm/decodable-am-sgmm.cc delete mode 100644 src/sgmm/decodable-am-sgmm.h delete mode 100644 src/sgmm/estimate-am-sgmm-ebw.cc delete mode 100644 src/sgmm/estimate-am-sgmm-ebw.h delete mode 100644 src/sgmm/estimate-am-sgmm-multi-test.cc delete mode 100644 src/sgmm/estimate-am-sgmm-multi.cc delete mode 100644 src/sgmm/estimate-am-sgmm-multi.h delete mode 100644 src/sgmm/estimate-am-sgmm-test.cc delete mode 100644 src/sgmm/estimate-am-sgmm.cc delete mode 100644 src/sgmm/estimate-am-sgmm.h delete mode 100644 src/sgmm/fmllr-sgmm-test.cc delete mode 100644 src/sgmm/fmllr-sgmm.cc delete mode 100644 src/sgmm/fmllr-sgmm.h delete mode 100644 src/sgmm/sgmm-clusterable.cc delete mode 100644 src/sgmm/sgmm-clusterable.h delete mode 100644 src/sgmmbin/Makefile delete mode 100644 src/sgmmbin/init-ubm.cc delete mode 100644 src/sgmmbin/sgmm-acc-fmllrbasis-ali.cc delete mode 100644 src/sgmmbin/sgmm-acc-stats-ali.cc delete mode 100644 src/sgmmbin/sgmm-acc-stats-gpost.cc delete mode 100644 src/sgmmbin/sgmm-acc-stats.cc delete mode 100644 src/sgmmbin/sgmm-acc-stats2.cc delete mode 100644 src/sgmmbin/sgmm-acc-tree-stats.cc delete mode 100644 src/sgmmbin/sgmm-align-compiled.cc delete mode 100644 src/sgmmbin/sgmm-build-tree.cc delete mode 100644 src/sgmmbin/sgmm-calc-distances.cc delete mode 100644 src/sgmmbin/sgmm-cluster-phones.cc delete mode 100644 src/sgmmbin/sgmm-comp-prexform.cc delete mode 100644 src/sgmmbin/sgmm-copy.cc delete mode 100644 src/sgmmbin/sgmm-decode-faster.cc delete mode 100644 src/sgmmbin/sgmm-est-ebw.cc delete mode 100644 src/sgmmbin/sgmm-est-fmllr-gpost.cc delete mode 100644 src/sgmmbin/sgmm-est-fmllr.cc delete mode 100644 src/sgmmbin/sgmm-est-fmllrbasis.cc delete mode 100644 src/sgmmbin/sgmm-est-multi.cc delete mode 100644 src/sgmmbin/sgmm-est-spkvecs-gpost.cc delete mode 100644 src/sgmmbin/sgmm-est-spkvecs.cc delete mode 100644 src/sgmmbin/sgmm-est.cc delete mode 100644 src/sgmmbin/sgmm-gselect.cc delete mode 100644 src/sgmmbin/sgmm-info.cc delete mode 100644 src/sgmmbin/sgmm-init-from-tree-stats.cc delete mode 100644 src/sgmmbin/sgmm-init.cc delete mode 100644 src/sgmmbin/sgmm-latgen-faster.cc delete mode 100644 src/sgmmbin/sgmm-latgen-simple.cc delete mode 100644 src/sgmmbin/sgmm-mixup.cc delete mode 100644 src/sgmmbin/sgmm-normalize.cc delete mode 100644 src/sgmmbin/sgmm-post-to-gpost.cc delete mode 100644 src/sgmmbin/sgmm-rescore-lattice.cc delete mode 100644 src/sgmmbin/sgmm-sum-accs.cc delete mode 100644 src/sgmmbin/sgmm-sum-tree-stats.cc delete mode 100644 src/sgmmbin/sgmm-write-ubm.cc diff --git a/egs/aurora4/s5/local/run_sgmm.sh b/egs/aurora4/s5/local/run_sgmm.sh deleted file mode 100755 index 62be4d83774..00000000000 --- a/egs/aurora4/s5/local/run_sgmm.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/bash - -# This script is invoked from ../run.sh -# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity. - -. cmd.sh - -# SGMM system on si84 data [sgmm5a]. Note: the system we aligned from used the si284 data for -# training, but this shouldn't have much effect. - -( - steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train_si84 data/lang exp/tri4b exp/tri4b_ali_si84 || exit 1; - - steps/train_ubm.sh --cmd "$train_cmd" \ - 400 data/train_si84 data/lang exp/tri4b_ali_si84 exp/ubm5a || exit 1; - - steps/train_sgmm.sh --cmd "$train_cmd" \ - 3500 10000 data/train_si84 data/lang exp/tri4b_ali_si84 \ - exp/ubm5b/final.ubm exp/sgmm5a || exit 1; - - ( - utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5a exp/sgmm5a/graph_tgpr - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - exp/sgmm5a/graph_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 - ) & - - steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si84 \ - --use-graphs true --use-gselect true data/train_si84 data/lang exp/sgmm5a exp/sgmm5a_ali_si84 || exit 1; - steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 \ - data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \ - data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1 - - for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \ - exp/sgmm5a_mmi_b0.1/decode_tgpr_dev93_it$iter & - done - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \ - --update-opts "--cov-min-value=0.9" data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1_m0.9 - - for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \ - exp/sgmm5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it$iter & - done - -) & - - -( -# The next commands are the same thing on all the si284 data. - -# SGMM system on the si284 data [sgmm5b] - steps/train_ubm.sh --cmd "$train_cmd" \ - 600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/ubm5b || exit 1; - - steps/train_sgmm.sh --cmd "$train_cmd" \ - 5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \ - exp/ubm5b/final.ubm exp/sgmm5b || exit 1; - - ( - utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5b exp/sgmm5b/graph_tgpr - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - exp/sgmm5b/graph_tgpr data/test_dev93 exp/sgmm5b/decode_tgpr_dev93 - steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \ - exp/sgmm5b/graph_tgpr data/test_eval92 exp/sgmm5b/decode_tgpr_eval92 - - utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm5b exp/sgmm5b/graph_bd_tgpr || exit 1; - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \ - exp/sgmm5b/graph_bd_tgpr data/test_dev93 exp/sgmm5b/decode_bd_tgpr_dev93 - steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \ - exp/sgmm5b/graph_bd_tgpr data/test_eval92 exp/sgmm5b/decode_bd_tgpr_eval92 - ) & - - steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \ - --use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm5b exp/sgmm5b_ali_si284 - - steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \ - data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \ - data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 exp/sgmm5b_mmi_b0.1 - - for iter in 1 2 3 4; do - for test in dev93 eval92; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_${test} data/lang_test_tgpr data/test_${test} exp/sgmm5b/decode_tgpr_${test} \ - exp/sgmm5b_mmi_b0.1/decode_tgpr_${test}_it$iter & - - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_tgpr data/test_${test} exp/sgmm5b/decode_bd_tgpr_${test} \ - exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it$iter & - done - done -) & - - - -# Train quinphone SGMM system. - -steps/train_sgmm.sh --cmd "$train_cmd" \ - --context-opts "--context-width=5 --central-position=2" \ - 5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \ - exp/ubm5b/final.ubm exp/sgmm5c || exit 1; - -# Decode from lattices in exp/sgmm5a/decode_tgpr_dev93. -steps/decode_sgmm_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - data/test_dev93 data/lang_test_tgpr exp/sgmm5a/decode_tgpr_dev93 exp/sgmm5c/decode_tgpr_dev93 - diff --git a/egs/babel/s5/local/decode_helper.sh b/egs/babel/s5/local/decode_helper.sh index 3be49854038..59b2fdad3c9 100755 --- a/egs/babel/s5/local/decode_helper.sh +++ b/egs/babel/s5/local/decode_helper.sh @@ -18,15 +18,6 @@ elif [ "$1" == "FMLLR" ]; then utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 -elif [ "$1" == "SGMM" ]; then - utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 - - steps/decode_sgmm.sh --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR \ - $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1; - - steps/decode_sgmm.sh --use-fmllr true --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR\ - $MODELDIR/graph $DEVDIR $MODELDIR/decode_fmllr || exit 1; - fi diff --git a/egs/babel/s5b/local/decode_helper.sh b/egs/babel/s5b/local/decode_helper.sh index 3be49854038..59b2fdad3c9 100755 --- a/egs/babel/s5b/local/decode_helper.sh +++ b/egs/babel/s5b/local/decode_helper.sh @@ -18,15 +18,6 @@ elif [ "$1" == "FMLLR" ]; then utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 -elif [ "$1" == "SGMM" ]; then - utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 - - steps/decode_sgmm.sh --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR \ - $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1; - - steps/decode_sgmm.sh --use-fmllr true --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR\ - $MODELDIR/graph $DEVDIR $MODELDIR/decode_fmllr || exit 1; - fi diff --git a/egs/babel/s5c/local/decode_helper.sh b/egs/babel/s5c/local/decode_helper.sh index 3be49854038..59b2fdad3c9 100755 --- a/egs/babel/s5c/local/decode_helper.sh +++ b/egs/babel/s5c/local/decode_helper.sh @@ -18,15 +18,6 @@ elif [ "$1" == "FMLLR" ]; then utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 -elif [ "$1" == "SGMM" ]; then - utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 - - steps/decode_sgmm.sh --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR \ - $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1; - - steps/decode_sgmm.sh --use-fmllr true --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR\ - $MODELDIR/graph $DEVDIR $MODELDIR/decode_fmllr || exit 1; - fi diff --git a/egs/gp/s1/local/gp_train_multi_sgmm_deltas.sh b/egs/gp/s1/local/gp_train_multi_sgmm_deltas.sh deleted file mode 100755 index dfe1f211d6c..00000000000 --- a/egs/gp/s1/local/gp_train_multi_sgmm_deltas.sh +++ /dev/null @@ -1,359 +0,0 @@ -#!/bin/bash -u - -# Copyright 2012 Arnab Ghoshal -# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -# This is Subspace Gaussian Mixture Model (SGMM) training-- -# see "The subspace Gaussian mixture model--A structured model for speech recognition" -# by D. Povey et al, Computer Speech and Language, 2011. - -function error_exit () { - echo -e "$@" >&2; exit 1; -} - -function readint () { - local retval=${1/#*=/}; # In case --switch=ARG format was used -# retval=${retval#0*} # Strip any leading 0's - [[ "$retval" =~ ^-?[0-9][0-9]*$ ]] \ - || error_exit "Argument \"$retval\" not an integer." - echo $retval -} - -function est_alimodel () { -# If we have speaker vectors, we need an alignment model. This function gets -# the Gaussian-level alignments with the speaker vectors but accumulates stats -# without any speaker vectors; we re-estimate M, w, c and S to get a model -# that's compatible with not having speaker vectors. Note that the transitions -# are not updated since the decoding graph will be shared with the normal model. - local lx=$1 - for L in $LANGUAGES; do - wdir=$dir/$L - local lspkdim=`sgmm-info $wdir/$lx.mdl | grep speaker | awk '{print $NF}'` - if [ "$lspkdim" -le 0 ]; then - echo "est_alimodel: No speaker space in model '$wdir/$lx.mdl'. Returning." - return - fi - done - - local y=0; - local lflags=MwcS # First time don't update v - while [ $y -lt $numiters_alimdl ]; do - [ $y -gt 0 ] && lflags=vMwcS - echo "Pass $y of building alignment model, flags = '$lflags'" - local lmulti_est_opts='' # model, acc, model-out, occs-out tuples - for L in $LANGUAGES; do - ( - data=data/$L/train - lang=data/$L/lang - wdir=$dir/$L - local cur_alimdl=$wdir/tmp$y.alimdl - [ $y -eq 0 ] && cur_alimdl=$wdir/$lx.mdl - feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$wdir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |" - gselect_opt="--gselect=ark,s,cs:gunzip -c $wdir/TASK_ID.gselect.gz|" - spkvecs_opt="--spk-vecs=ark:$wdir/TASK_ID.vecs" - - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/acc_ali${lx}_$y.TASK_ID.log \ - $sjopts ali-to-post "ark:gunzip -c $wdir/TASK_ID.ali.gz|" ark:- \| \ - sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \ - --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk $wdir/$lx.mdl \ - "$feats" ark,s,cs:- ark:- \| \ - sgmm-acc-stats-gpost --update-flags=$lflags $cur_alimdl "$feats" \ - ark,s,cs:- $wdir/$y.TASK_ID.aliacc \ - || { touch $dir/err; \ - error_exit "$L; Align model iter $y: Error accumulating stats"; } - - # Summing accs is quite fast; run locally - sgmm-sum-accs $wdir/sum.aliacc $wdir/$y.*.aliacc || \ - { touch $dir/err; \ - error_exit "$L; Align model iter $y: Error summing stats"; } - )& # Accumulate in parallel for different languages - wdir=$dir/$L - local cur_alimdl=$wdir/tmp$y.alimdl - [ $y -eq 0 ] && cur_alimdl=$wdir/$lx.mdl - lmulti_est_opts="$lmulti_est_opts $cur_alimdl $wdir/sum.aliacc $wdir/tmp$[$y+1].alimdl $wdir/tmp$[$y+1].occs" - done - wait - - submit_jobs.sh "$qcmd" --log=$dir/log/update_ali.$y.log $sjopts \ - sgmm-est-multi --update-flags=$lflags --remove-speaker-space=true \ - $lmulti_est_opts \ - || error_exit "Error estimating alignment models on iter $y"; - - rm -f $dir/??/$y.*.aliacc $dir/??/sum.aliacc || exit 1; - [ $y -gt 0 ] && rm $dir/??/tmp$y.{alimdl,occs} - y=$[$y+1] - done - - for L in $LANGUAGES; do - mv $dir/$L/tmp$y.alimdl $dir/$L/$lx.alimdl - done -} - -nj=4 # Default number of jobs -stage=-5 # Default starting stage (start with tree building) -qcmd="" # Options for the submit_jobs.sh script -sjopts="" # Options for the submit_jobs.sh script -LANGUAGES='GE PO SP SW' # Languages processed - -PROG=`basename $0`; -usage="Usage: $PROG [options] \n -e.g.: $PROG 40 39 exp/ubm3c/final.ubm exp/sgmm3c\n\n -Options:\n - --help\t\tPrint this message and exit\n - --lang STR\tList of languages to process (default = '$LANGUAGES')\n - --num-jobs INT\tNumber of parallel jobs to run (default=$nj).\n - --qcmd STR\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n - --sjopts STR\tOptions for the 'submit_jobs.sh' script\n - --stage INT\tStarting stage (e.g. -4 for SGMM init; 2 for iter 2; default=$stage)\n -"; - -echo "$PROG $@" -while [ $# -gt 0 ]; do - case "${1# *}" in # ${1# *} strips any leading spaces from the arguments - --help) echo -e $usage; exit 0 ;; - --lang) LANGUAGES="$2"; shift 2 ;; - --num-jobs) - shift; nj=`readint $1`; - [ $nj -lt 1 ] && error_exit "--num-jobs arg '$nj' not positive."; - shift ;; - --qcmd) - shift; qcmd=" --qcmd=${1}"; shift ;; - --sjopts) - shift; sjopts="$1"; shift ;; - --stage) - shift; stage=`readint $1`; shift ;; - -*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;; - *) break ;; # end of options: interpreted as num-leaves - esac -done - -if [ $# != 4 ]; then - error_exit $usage; -fi - -[ -f path.sh ] && . path.sh - -# This is SGMM with speaker vectors, on top of LDA+[something] features. -# Any speaker-specific transforms are obtained from the alignment directory. -# To be run from .. - -phndim=$1 -spkdim=$2 -ubm=$3 -dir=$4 - -[ -f $ubm ] || error_exit "UBM file '$ubm' does not exist" -mkdir -p $dir/log || error_exit "Cannot create '$dir/log'" - -# (1): Model initialization; training graph and initial alignment generation. -for L in $LANGUAGES; do -( - data=data/$L/train - lang=data/$L/lang - alidir=exp/$L/tri2a_ali - wdir=$dir/$L - oov_sym=`cat $lang/oov.txt` - mkdir -p $wdir/log || error_exit "Cannot create working directory '$wdir'" - - # Initialize the model (removed the --spk-space-dim option) - if [ $stage -le -5 ]; then - echo "$L: Initializing model" - submit_jobs.sh "$qcmd" --log=$wdir/log/init_sgmm.log $sjopts \ - sgmm-init --phn-space-dim=$phndim $lang/topo $wdir/tree $ubm \ - $wdir/0.mdl || { touch $dir/err; error_exit "$L: SGMM init failed."; } - fi - - # Make training graphs - if [ $stage -le -4 ]; then - echo "$L: Compiling training graphs" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/mkgraphs.TASK_ID.log \ - $sjopts compile-train-graphs $wdir/tree $wdir/0.mdl $lang/L.fst \ - "ark:sym2int.pl --map-oov '$oov_sym' --ignore-first-field $lang/words.txt < $data/split$nj/TASK_ID/text |" \ - "ark:|gzip -c >$wdir/TASK_ID.fsts.gz" \ - || { touch $dir/err; error_exit "$L: Error compiling training graphs"; } - fi - - if [ $stage -le -3 ]; then - echo "$L: Converting alignments" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/convert.TASK_ID.log \ - $sjopts convert-ali $alidir/final.mdl $wdir/0.mdl $wdir/tree \ - "ark:gunzip -c $alidir/TASK_ID.ali.gz|" \ - "ark:|gzip -c >$wdir/TASK_ID.ali.gz" \ - || { touch $dir/err; error_exit "$L: Convert alignment failed."; } - fi - - if [ $stage -le -2 ]; then - echo "$L: Computing cepstral mean and variance statistics" - submit_jobs.sh "$qcmd" --njobs=$nj $sjopts --log=$wdir/log/cmvn.TASK_ID.log \ - compute-cmvn-stats --spk2utt=ark:$data/split$nj/TASK_ID/spk2utt \ - scp:$data/split$nj/TASK_ID/feats.scp ark:$wdir/TASK_ID.cmvn \ - || { touch $dir/err; error_exit "$L: Computing CMN/CVN stats failed."; } - fi - - feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$wdir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |" - - if [ $stage -le -1 ]; then - echo "$L: Doing Gaussian selection" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/gselectTASK_ID.log \ - $sjopts sgmm-gselect $wdir/0.mdl "$feats" "ark,t:|gzip -c > $wdir/TASK_ID.gselect.gz" \ - || { touch $dir/err; error_exit "$L: Error doing Gaussian selection"; } - fi -)& # Run the language-specific initializations in parallel -done -wait -[ -f $dir/err ] && { rm $dir/err; error_exit "Error initializing models."; } - -# Language independent constants -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" -numiters_alimdl=3 # Number of iterations for estimating alignment model. -incsub_interval=8 # increase substates every 8 iterations -# total substates after each such increment -total_substates=( 5000 7000 9000 12000 16000 20000 25000 30000 35000 40000 ) -# For a given number of substates, iterate for $incsub_interval iterations -numiters=$[(${#total_substates[@]}+1)*$incsub_interval] -realign_interval=4 # realign every 4 iterations -spkvec_start=8 # use speaker subspace *after* 8 iterations -spkvec_interval=2 # reestimate the speaker vectors every 2 iterations -randprune=0.1 - -# Initially don't have speaker vectors, but change this after we estimate them. -spkvecs_gen=0 - -x=0 -while [ $x -lt $numiters ]; do - if [ $x -eq 0 ]; then - flags=v # On first iter, don't update M or N. - elif [ $spkdim -gt 0 -a $[$x%2] -eq 0 -a $x -gt $spkvec_start ]; then - # Update N on odd iterations after 1st spkvec iter, if we have spk-space. - flags=NwSvct - else # Else update M but not N. - flags=MwSvct - fi - - if [ $stage -le $x ]; then - echo "Pass $x: update flags = '$flags' " - multi_est_opts='' # Will contain model, acc, model-out, occs-out tuples - for L in $LANGUAGES; do - ( - data=data/$L/train - lang=data/$L/lang - wdir=$dir/$L - feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$wdir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |" - gselect_opt="--gselect=ark,s,cs:gunzip -c $wdir/TASK_ID.gselect.gz|" - if [ $spkdim -gt 0 -a $spkvecs_gen -eq 1 ]; then - spkvecs_opt="--spk-vecs=ark:$wdir/TASK_ID.vecs" - else - spkvecs_opt='' - fi - silphonelist=`cat $lang/silphones.csl` -# numsubstates=`cat $wdir/numleaves` # Initial #-substates. - - if [ $[$x%$realign_interval] -eq 0 -a $x -gt 0 ]; then - echo "$L; iter $x: Aligning data" - submit_jobs.sh "$qcmd" $sjopts --log=$wdir/log/align.$x.TASK_ID.log \ - --njobs=$nj sgmm-align-compiled $spkvecs_opt $scale_opts \ - "$gselect_opt" --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk \ - --beam=8 --retry-beam=40 $wdir/$x.mdl \ - "ark:gunzip -c $wdir/TASK_ID.fsts.gz|" "$feats" \ - "ark:|gzip -c >$wdir/TASK_ID.ali.gz" || \ - { touch $dir/err; error_exit "$L, it $x: Error realigning data"; } - fi - - if [ $spkdim -gt 0 -a $x -gt $spkvec_start \ - -a $[$x%$spkvec_interval] -eq 0 ]; then - echo "$L; iter $x: Computing speaker vectors" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/spkvecs.$x.TASK_ID.log \ - $sjopts ali-to-post "ark:gunzip -c $wdir/TASK_ID.ali.gz|" ark:- \| \ - weight-silence-post 0.01 $silphonelist $wdir/$x.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --spk2utt=ark:$data/split$nj/TASK_ID/spk2utt \ - $spkvecs_opt "$gselect_opt" --rand-prune=$randprune $wdir/$x.mdl \ - "$feats" ark,s,cs:- ark:$wdir/tmpTASK_ID.vecs || \ - { touch $dir/err; error_exit "$L, it $x: Error computing spkvecs"; } - for n in `seq 1 $nj`; do - mv $wdir/tmp${n}.vecs $wdir/${n}.vecs; - done - spkvecs_gen=1 - fi - - submit_jobs.sh "$qcmd" --njobs=$nj --log=$wdir/log/acc.$x.TASK_ID.log \ - $sjopts sgmm-acc-stats --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk \ - --update-flags=$flags --rand-prune=$randprune $spkvecs_opt \ - "$gselect_opt" $wdir/$x.mdl "$feats" \ - "ark,s,cs:ali-to-post 'ark:gunzip -c $wdir/TASK_ID.ali.gz|' ark:-|" \ - $wdir/$x.TASK_ID.acc || \ - { touch $dir/err; error_exit "$L, it $x: Error accumulating stats"; } - - # Summing accs is quite fast; run locally - sgmm-sum-accs $wdir/sum.acc $wdir/$x.*.acc || \ - { touch $dir/err; error_exit "$L, it $x: Error summing stats"; } - ) & # Accumulate in parallel for different languages - wdir=$dir/$L - multi_est_opts="$multi_est_opts $wdir/$x.mdl $wdir/sum.acc $wdir/$[$x+1].mdl $wdir/$[$x+1].occs" - done - wait - [ -f $dir/err ] && \ - { rm $dir/err; error_exit "Iter $x: Error in accumulation"; } - - add_dim_opts='' - if [ $x -eq $spkvec_start ]; then - add_dim_opts="--increase-spk-dim=$spkdim --increase-phn-dim=$phndim" - elif [ $x -eq $[$spkvec_start*2] ]; then - add_dim_opts="--increase-spk-dim=$spkdim --increase-phn-dim=$phndim" - fi - split_opts='' - if [ $[$x%$incsub_interval] -eq 1 -a $x -gt 1 ]; then - index=$[($x/$incsub_interval)-1] - numsubstates=${total_substates[$index]} - split_opts="--split-substates=$numsubstates" - fi - - submit_jobs.sh "$qcmd" --log=$dir/log/update.$x.log $sjopts \ - sgmm-est-multi --update-flags=$flags $split_opts $add_dim_opts \ - $multi_est_opts || error_exit "Error in pass $x estimation." - - # If using speaker vectors, estimate alignment model without spkvecs - if [ $[$x%$incsub_interval] -eq 0 -a $x -gt 0 ]; then - chmod -w $dir/??/$x.mdl $dir/??/$x.occs # Preserve for scoring - [ $spkdim -gt 0 ] && est_alimodel $x; - else - rm -f $dir/??/$x.mdl $dir/??/$x.occs - fi - rm -f $dir/??/$x.*.acc $dir/??/sum.acc - fi # End of current stage - x=$[$x+1]; -done - -for L in $LANGUAGES; do - ( - wdir=$dir/$L - rm -f $wdir/final.mdl $wdir/final.occs; - chmod -w $wdir/$x.mdl $wdir/$x.occs # Preserve for scoring - ln -s $wdir/$x.mdl $wdir/final.mdl; - ln -s $wdir/$x.occs $wdir/final.occs; - # If using speaker vectors, estimate alignment model without spkvecs - [ $spkdim -gt 0 ] && est_alimodel $wdir/$x.mdl; - rm -f $wdir/final.alimdl; - ln -sf $wdir/$x.alimdl $wdir/final.alimdl; - - # Print out summary of the warning messages. - for x in $wdir/log/*.log; do - n=`grep WARNING $x | wc -l`; - if [ $n -ne 0 ]; then echo "$n warnings in $x"; fi; - done - ) -done - -echo Done diff --git a/egs/gp/s1/path.sh b/egs/gp/s1/path.sh index a38149ac899..cee9bacbde9 100644 --- a/egs/gp/s1/path.sh +++ b/egs/gp/s1/path.sh @@ -7,7 +7,7 @@ KALDIROOT=/exports/home/aghoshal/kaldi/trunk KALDISRC=$KALDIROOT/src KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin -KALDIBIN=$KALDIBIN:$KALDISRC/sgmmbin:$KALDISRC/lm +KALDIBIN=$KALDIBIN:$KALDISRC/sgmm2bin:$KALDISRC/lm FSTBIN=$KALDIROOT/tools/openfst/bin LMBIN=$KALDIROOT/tools/irstlm/bin diff --git a/egs/gp/s1/steps/decode_sgmm_deltas.sh b/egs/gp/s1/steps/decode_sgmm_deltas.sh deleted file mode 100755 index 0e15ef5aef5..00000000000 --- a/egs/gp/s1/steps/decode_sgmm_deltas.sh +++ /dev/null @@ -1,162 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Arnab Ghoshal -# Copyright 2010-2011 Microsoft Corporation - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -# Decoding script for SGMM using standard MFCC/PLP + delta + acceleration -# features. - -# assumes you are using speaker vectors [for no vectors, see -# decode_sgmm_novec_lda_etc.sh, if it exists already]. -# if this includes speaker-specific transforms, you have to provide an "old" -# decoding directory where the transforms are located. The data decoded in -# that directory must be split up in the same way as the current directory. - -function error_exit () { - echo -e "$@" >&2; exit 1; -} - -function file_exists () { - [ -f $1 ] || error_exit "$PROG: no such file '$1'" -} - -function readposint () { # Strictly speaking, reading non-negative integers - local retval=${1/#*=/}; # In case --switch=ARG format was used - [[ "$retval" =~ ^[0-9]*$ ]] \ - || error_exit "Argument \"$retval\" not a non-negative integer." - echo $retval -} - -beam=13.0 -nj=1 # Default total number of jobs -jobid=0 # Default job number -qcmd="" # Options for the submit_jobs.sh script -sjopts="" # Options for the submit_jobs.sh script -use_spkvecs='' # Not expecting a model with speaker vectors, by default. - -PROG=`basename $0`; -usage="Usage: $PROG [options] []\n -e.g.: $PROG -j 10 0 exp/sgmm3c/graph_tgpr data/test_dev93 exp/sgmm3c/decode_dev93_tgpr exp/tri2b/decode_dev93_tgpr\n\n -Options:\n - --help\t\tPrint this message and exit.\n - --beam FLOAT\tDecoding beam (default=$beam).\n - -j INT INT\tNumber of parallel jobs to run (default=$nj) and current jobid.\n - --qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n - --sjopts STRING\tOptions for the 'submit_jobs.sh' script.\n - --with-spkvecs\tModel has speaker vectors; do 2-pass decoding.\n -"; - -while [ $# -gt 0 ]; do - case "${1# *}" in # ${1# *} strips any leading spaces from the arguments - --help) echo -e $usage; exit 0 ;; - --beam) beam=$2; shift 2 ;; - -j) nj=`readposint $2`; jobid=`readposint $3`; shift 3 ;; - --qcmd) qcmd=" --qcmd=${2}"; shift 2 ;; - --sjopts) sjopts="$2"; shift 2 ;; - --with-spkvecs) use_spkvecs=1; shift ;; - -*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;; - *) break ;; # end of options: interpreted as num-leaves - esac -done - -if [ $# -lt 3 -o $# -gt 4 ]; then - error_exit $usage; -fi - -[ -f path.sh ] && . path.sh - -graphdir=$1 -data=$2 -dir=$3 -transdir=$4 -acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. - -srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory. - -mkdir -p $dir - -if [ $nj -gt 1 ]; then - mydata=$data/split$nj/$jobid -else - mydata=$data -fi - -requirements="$mydata/feats.scp $srcdir/final.mdl $graphdir/HCLG.fst" -[ -z "$use_spkvecs" ] || requirements=$requirements" $srcdir/final.alimdl" -for f in $requirements; do - file_exists $f -done - -if [ ! -z "$transdir" ]; then # "$transdir" nonempty.. - file_exists $transdir/$n.trans -fi - -feats="ark:compute-cmvn-stats --spk2utt=ark:$mydata/spk2utt scp:$mydata/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$mydata/utt2spk ark:- scp:$mydata/feats.scp ark:- | add-deltas ark:- ark:- |" - -[ ! -z "$transdir" ] && feats="$feats transform-feats --utt2spk=ark:$mydata/utt2spk ark:$transdir/$jobid.trans ark:- ark:- |" - - -# Do Gaussian selection, since we'll have two decoding passes and don't want to -# redo this. Note: it doesn't make a difference if we use final.mdl or -# final.alimdl, they have the same UBM. -sgmm-gselect $srcdir/final.mdl "$feats" "ark:|gzip -c >$dir/$jobid.gselect.gz" \ - 2>$dir/gselect$jobid.log \ - || error_exit "Error in Gaussian selection."; -gselect_opt="--gselect=ark:gunzip -c $dir/$jobid.gselect.gz|" - -target_lat="$dir/lat.$jobid.gz" -[ -z "$use_spkvecs" ] || target_lat="$dir/pre_lat.$jobid.gz" -align_model="$srcdir/final.mdl" -[ -z "$use_spkvecs" ] || align_model="$srcdir/final.alimdl" - -# Generate a state-level lattice for rescoring, with the alignment model and no -# speaker vectors. - -sgmm-latgen-faster --max-active=7000 --beam=$beam --lattice-beam=6.0 \ - --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \ - --word-symbol-table=$graphdir/words.txt "$gselect_opt" $align_model \ - $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $target_lat" \ - 2> $dir/decode_pass1.$jobid.log \ - || error_exit "Error in 1st-pass decoding."; - -# Do a second pass "decoding" if using speaker vectors. -if [ ! -z "$use_spkvecs" ]; then - silphonelist=`cat $graphdir/silphones.csl` || exit 1 - ( lattice-determinize --acoustic-scale=$acwt --prune=true --beam=4.0 \ - "ark:gunzip -c $dir/pre_lat.$jobid.gz|" ark:- \ - | lattice-to-post --acoustic-scale=$acwt ark:- ark:- \ - | weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- \ - | sgmm-post-to-gpost "$gselect_opt" $srcdir/final.alimdl "$feats" ark:- \ - ark:- \ - | sgmm-est-spkvecs-gpost --spk2utt=ark:$mydata/spk2utt $srcdir/final.mdl \ - "$feats" ark:- "ark:$dir/$jobid.vecs" - ) 2> $dir/vecs.$jobid.log \ - || error_exit "Error estimating speaker vectors."; - - # Now rescore the state-level lattices with the adapted features and the - # corresponding model. Prune and determinize the lattices to limit their size. - - sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$mydata/utt2spk \ - --spk-vecs=ark:$dir/$jobid.vecs $srcdir/final.mdl \ - "ark:gunzip -c $dir/pre_lat.$jobid.gz|" "$feats" \ - "ark:|lattice-determinize --acoustic-scale=$acwt --prune=true --beam=6.0 ark:- ark:- | gzip -c > $dir/lat.$jobid.gz" \ - 2>$dir/rescore.$jobid.log \ - || error_exit "Error in 2nd-pass rescoring."; - - rm $dir/pre_lat.$jobid.gz - # The top-level decoding script rescores "lat.$jobid.gz" to get final output. -fi - diff --git a/egs/gp/s1/steps/train_sgmm_deltas.sh b/egs/gp/s1/steps/train_sgmm_deltas.sh deleted file mode 100755 index e68a1757308..00000000000 --- a/egs/gp/s1/steps/train_sgmm_deltas.sh +++ /dev/null @@ -1,270 +0,0 @@ -#!/bin/bash - -# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -# This is Subspace Gaussian Mixture Model (SGMM) training-- -# see "The subspace Gaussian mixture model--A structured model for speech recognition" -# by D. Povey et al, Computer Speech and Language, 2011. - -function error_exit () { - echo -e "$@" >&2; exit 1; -} - -function readint () { - local retval=${1/#*=/}; # In case --switch=ARG format was used - retval=${retval#0*} # Strip any leading 0's - [[ "$retval" =~ ^-?[1-9][0-9]*$ ]] \ - || error_exit "Argument \"$retval\" not an integer." - echo $retval -} - -nj=4 # Default number of jobs -stage=-4 # Default starting stage (start with tree building) -qcmd="" # Options for the submit_jobs.sh script -sjopts="" # Options for the submit_jobs.sh script - -PROG=`basename $0`; -usage="Usage: $PROG [options] \n -e.g.: $PROG 10000 40 39 data/train data/lang exp/tri2a_ali exp/ubm3c/final.ubm exp/sgmm3c\n\n -Options:\n - --help\t\tPrint this message and exit\n - --num-jobs INT\tNumber of parallel jobs to run (default=$nj).\n - --qcmd STRING\tCommand for submitting a job to a grid engine (e.g. qsub) including switches.\n - --sjopts STRING\tOptions for the 'submit_jobs.sh' script\n - --stage INT\tStarting stage (e.g. -4 for SGMM init; 2 for iter 2; default=$stage)\n -"; - -while [ $# -gt 0 ]; do - case "${1# *}" in # ${1# *} strips any leading spaces from the arguments - --help) echo -e $usage; exit 0 ;; - --num-jobs) - shift; nj=`readint $1`; - [ $nj -lt 1 ] && error_exit "--num-jobs arg '$nj' not positive."; - shift ;; - --qcmd) - shift; qcmd=" --qcmd=${1}"; shift ;; - --sjopts) - shift; sjopts="$1"; shift ;; - --stage) - shift; stage=`readint $1`; shift ;; - -*) echo "Unknown argument: $1, exiting"; echo -e $usage; exit 1 ;; - *) break ;; # end of options: interpreted as num-leaves - esac -done - -if [ $# != 8 ]; then - error_exit $usage; -fi - -[ -f path.sh ] && . path.sh - -# This is SGMM with speaker vectors, on top of LDA+[something] features. -# Any speaker-specific transforms are obtained from the alignment directory. -# To be run from .. - -totsubstates=$1 -phndim=$2 -spkdim=$3 -data=$4 -lang=$5 -alidir=$6 -ubm=$7 -dir=$8 - -mkdir -p $dir || exit 1; - -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" - -numiters=25 # Total number of iterations -numiters_alimdl=3 # Number of iterations for estimating alignment model. -maxiterinc=15 # Last iter to increase #substates on. -realign_iters="5 10 15"; -spkvec_iters="5 8 12 17" -add_dim_iters="6 8 10 12"; # Iters on which to increase phn dim and/or spk dim, - # if necessary, In most cases, either none of these or only the first of these - # will have any effect (we increase in increments of [feature dim]) - -oov_sym=`cat $lang/oov.txt` -silphonelist=`cat $lang/silphones.csl` - -numsubstates=`cat $dir/numleaves` # Initial #-substates. -# per-iter increment for #substates -incsubstates=$[($totsubstates-$numsubstates)/$maxiterinc] - -# Initially don't have speaker vectors, but change this after we estimate them. -spkvecs_opt= -gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/TASK_ID.gselect.gz|" - -randprune=0.1 -mkdir -p $dir/log - -featspart="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk ark:$alidir/TASK_ID.cmvn scp:$data/split$nj/TASK_ID/feats.scp ark:- | add-deltas ark:- ark:- |" - -if [ ! -f $ubm ]; then - echo "No UBM in $ubm" - exit 1; -fi - -if [ $stage -le -4 ]; then - submit_jobs.sh "$qcmd" --log=$dir/log/init_sgmm.log $sjopts \ - sgmm-init --phn-space-dim=$phndim --spk-space-dim=$spkdim $lang/topo \ - $dir/tree $ubm $dir/0.mdl || error_exit "SGMM init failed." -fi - -if [ $stage -le -3 ]; then -# Make training graphs (this is split in $nj parts). - echo "Compiling training graphs" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/compile_graphsTASK_ID.log \ - $sjopts compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \ - "ark:sym2int.pl --map-oov '$oov_sym' --ignore-first-field $lang/words.txt < $data/split$nj/TASK_ID/text |" \ - "ark:|gzip -c >$dir/TASK_ID.fsts.gz" \ - || error_exit "Error compiling training graphs" -fi - -if [ $stage -le -2 ]; then - echo "Doing Gaussian selection" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/gselectTASK_ID.log \ - $sjopts sgmm-gselect $dir/0.mdl "$featspart" "ark,t:|gzip -c > $dir/TASK_ID.gselect.gz" \ - || error_exit "Error doing Gaussian selection" -fi - - -if [ $stage -le -1 ]; then - echo "Converting alignments" # don't bother parallelizing; very fast. - for n in `seq 1 $nj`; do - convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree \ - "ark:gunzip -c $alidir/$n.ali.gz|" "ark:|gzip -c >$dir/$n.ali.gz" \ - 2>$dir/log/convert.$n.log - done -fi - -x=0 -while [ $x -lt $numiters ]; do - if [ $x -eq 0 ]; then - flags=vwcSt # On first iter, don't update M or N. - elif [ $spkdim -gt 0 -a $[$x%2] -eq 1 -a \ - $x -ge `echo $spkvec_iters | awk '{print $1}'` ]; then - # Update N on odd iterations after 1st spkvec iter, if we have spk-space. - flags=vNwcSt - else # Else update M but not N. - flags=vMwcSt - fi - - if [ $stage -le $x ]; then - echo "Pass $x: update flags = '$flags' " - if echo $realign_iters | grep -w $x >/dev/null; then - echo "Aligning data" - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/align.$x.TASK_ID.log \ - $sjopts sgmm-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \ - --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk --beam=8 --retry-beam=40 \ - $dir/$x.mdl "ark:gunzip -c $dir/TASK_ID.fsts.gz|" "$featspart" \ - "ark:|gzip -c >$dir/TASK_ID.ali.gz" \ - || error_exit "Error realigning data on iter $x" - fi - - if [ $spkdim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/spkvecs.$x.TASK_ID.log \ - $sjopts ali-to-post "ark:gunzip -c $dir/TASK_ID.ali.gz|" ark:- \| \ - weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --spk2utt=ark:$data/split$nj/TASK_ID/spk2utt \ - $spkvecs_opt "$gselect_opt" --rand-prune=$randprune $dir/$x.mdl \ - "$featspart" ark,s,cs:- ark:$dir/tmpTASK_ID.vecs \ - || error_exit "Error computing speaker vectors on iter $x" - for n in `seq 1 $nj`; do - mv $dir/tmp${n}.vecs $dir/${n}.vecs; - done - spkvecs_opt="--spk-vecs=ark:$dir/TASK_ID.vecs" - fi - - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/acc.$x.TASK_ID.log \ - $sjopts sgmm-acc-stats --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk \ - --update-flags=$flags --rand-prune=$randprune $spkvecs_opt \ - "$gselect_opt" $dir/$x.mdl "$featspart" \ - "ark,s,cs:ali-to-post 'ark:gunzip -c $dir/TASK_ID.ali.gz|' ark:-|" \ - $dir/$x.TASK_ID.acc || error_exit "Error accumulating stats on iter $x" - - add_dim_opts= - if echo $add_dim_iters | grep -w $x >/dev/null; then - add_dim_opts="--increase-phn-dim=$phndim --increase-spk-dim=$spkdim" - fi - - submit_jobs.sh "$qcmd" --log=$dir/log/update.$x.log $sjopts \ - sgmm-est --update-flags=$flags --split-substates=$numsubstates \ - $add_dim_opts --write-occs=$dir/$[$x+1].occs $dir/$x.mdl \ - "sgmm-sum-accs - $dir/$x.*.acc|" $dir/$[$x+1].mdl \ - || error_exit "Error in pass $x estimation." - - rm -f $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs - fi - - if [ $x -lt $maxiterinc ]; then - numsubstates=$[$numsubstates+$incsubstates] - fi - x=$[$x+1]; -done - -( cd $dir; rm final.mdl final.occs 2>/dev/null; - ln -s $x.mdl final.mdl; - ln -s $x.occs final.occs ) - -if [ $spkdim -gt 0 ]; then - # If we have speaker vectors, we need an alignment model. - # The point of this last phase of accumulation is to get Gaussian-level - # alignments with the speaker vectors but accumulate stats without - # any speaker vectors; we re-estimate M, w, c and S to get a model - # that's compatible with not having speaker vectors. - - # We do this for a few iters, in this recipe. - cur_alimdl=$dir/$x.mdl - y=0; - while [ $y -lt $numiters_alimdl ]; do - echo "Pass $y of building alignment model" - if [ $y -eq 0 ]; then - flags=MwcS # First time don't update v... - else - flags=vMwcS # don't update transitions-- will probably share graph with normal model. - fi - - if [ $stage -le $[$y+100] ]; then - submit_jobs.sh "$qcmd" --njobs=$nj --log=$dir/log/acc_ali.$y.TASK_ID.log \ - $sjopts ali-to-post "ark:gunzip -c $dir/TASK_ID.ali.gz|" ark:- \| \ - sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \ - --utt2spk=ark:$data/split$nj/TASK_ID/utt2spk $dir/$x.mdl \ - "$featspart" ark,s,cs:- ark:- \| \ - sgmm-acc-stats-gpost --update-flags=$flags $cur_alimdl "$featspart" \ - ark,s,cs:- $dir/$y.TASK_ID.aliacc \ - || error_exit "Error accumulating stats for alignment model on iter $y" - - submit_jobs.sh "$qcmd" --log=$dir/log/update_ali.$y.log $sjopts \ - sgmm-est --update-flags=$flags --remove-speaker-space=true \ - $cur_alimdl "sgmm-sum-accs - $dir/$y.*.aliacc|" $dir/$[$y+1].alimdl \ - || error_exit "Error estimating alignment model on iter $y"; - rm $dir/$y.*.aliacc || exit 1; - [ $y -gt 0 ] && rm $dir/$y.alimdl - fi - cur_alimdl=$dir/$[$y+1].alimdl - y=$[$y+1] - done - (cd $dir; rm final.alimdl 2>/dev/null; ln -s $y.alimdl final.alimdl ) -fi - -# Print out summary of the warning messages. -for x in $dir/log/*.log; do - n=`grep WARNING $x | wc -l`; - if [ $n -ne 0 ]; then echo $n warnings in $x; fi; -done - -echo Done diff --git a/egs/gp/s5/path.sh b/egs/gp/s5/path.sh index af75fa50c1b..e9f7a8337bc 100644 --- a/egs/gp/s5/path.sh +++ b/egs/gp/s5/path.sh @@ -9,7 +9,7 @@ KALDI_ROOT=/homes/eva/q/qghoshal/src/kaldi/trunk KALDISRC=$KALDI_ROOT/src KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin -KALDIBIN=$KALDIBIN:$KALDISRC/sgmmbin:$KALDISRC/lm +KALDIBIN=$KALDIBIN:$KALDISRC/sgmm2bin:$KALDISRC/lm FSTBIN=$KALDI_ROOT/tools/openfst/bin LMBIN=$KALDI_ROOT/tools/irstlm/bin diff --git a/egs/gp/s5/run.sh b/egs/gp/s5/run.sh index e563bdff0d1..8054d02988d 100755 --- a/egs/gp/s5/run.sh +++ b/egs/gp/s5/run.sh @@ -347,12 +347,12 @@ for L in $GP_LANGUAGES; do num_states=$(grep "^$L" conf/sgmm.conf | cut -f2) num_substates=$(grep "^$L" conf/sgmm.conf | cut -f3) mkdir -p exp/$L/sgmm2a - steps/train_sgmm.sh --cmd "$train_cmd" --cluster-thresh 100 --spk-dim 0 \ + steps/train_sgmm2.sh --cmd "$train_cmd" --cluster-thresh 100 --spk-dim 0 \ $num_states $num_substates data/$L/train data/$L/lang exp/$L/tri1_ali \ exp/$L/ubm2a/final.ubm exp/$L/sgmm2a >& exp/$L/sgmm2a/train.log mkdir -p exp/$L/sgmm2b - steps/train_sgmm.sh --cmd "$train_cmd" --cluster-thresh 100 \ + steps/train_sgmm2.sh --cmd "$train_cmd" --cluster-thresh 100 \ $num_states $num_gauss data/$L/train data/$L/lang exp/$L/tri1_ali \ exp/$L/ubm2a/final.ubm exp/$L/sgmm2b >& exp/$L/sgmm2b/train.log ) & @@ -370,7 +370,7 @@ for L in $GP_LANGUAGES; do $highmem_cmd $graph_dir/mkgraph.log \ utils/mkgraph.sh data/$L/lang_test_${lm_suffix} exp/$L/$sgmm $graph_dir - steps/decode_sgmm.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/dev \ + steps/decode_sgmm2.sh --nj 5 --cmd "$decode_cmd" $graph_dir data/$L/dev \ exp/$L/$sgmm/decode_dev_${lm_suffix} ) & done # loop over LMs diff --git a/egs/lre07/v2/path.sh b/egs/lre07/v2/path.sh index 7cf73af8c53..d55f970d1fb 100755 --- a/egs/lre07/v2/path.sh +++ b/egs/lre07/v2/path.sh @@ -1,3 +1,3 @@ export KALDI_ROOT=$(cd ../../..; pwd) -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH export LC_ALL=C diff --git a/egs/rm/s5/local/run_pitch.sh b/egs/rm/s5/local/run_pitch.sh index ed17b628f47..7ff2bd975e1 100755 --- a/egs/rm/s5/local/run_pitch.sh +++ b/egs/rm/s5/local/run_pitch.sh @@ -208,7 +208,6 @@ done # local/run_raw_fmllr.sh # You don't have to run all 3 of the below, e.g. you can just run the run_sgmm2.sh -#local/run_sgmm.sh local/run_sgmm2.sh #local/run_sgmm2x.sh diff --git a/egs/rm/s5/local/run_sgmm.sh b/egs/rm/s5/local/run_sgmm.sh deleted file mode 100755 index 3a9ce297ada..00000000000 --- a/egs/rm/s5/local/run_sgmm.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -. cmd.sh - -## SGMM on top of LDA+MLLT+SAT features. -if [ ! -f exp/ubm4a/final.mdl ]; then - steps/train_ubm.sh --silence-weight 0.5 --cmd "$train_cmd" 400 data/train data/lang exp/tri3b_ali exp/ubm4a || exit 1; -fi -steps/train_sgmm.sh --cmd "$train_cmd" 2500 7500 data/train data/lang exp/tri3b_ali exp/ubm4a/final.ubm exp/sgmm4a || exit 1; - -utils/mkgraph.sh data/lang exp/sgmm4a exp/sgmm4a/graph || exit 1; - -steps/decode_sgmm.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ - --transform-dir exp/tri3b/decode exp/sgmm4a/graph data/test exp/sgmm4a/decode || exit 1; - -steps/decode_sgmm.sh --use-fmllr true --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ - --transform-dir exp/tri3b/decode exp/sgmm4a/graph data/test exp/sgmm4a/decode_fmllr || exit 1; - - # Now we'll align the SGMM system to prepare for discriminative training. - steps/align_sgmm.sh --nj 8 --cmd "$train_cmd" --transform-dir exp/tri3b \ - --use-graphs true --use-gselect true data/train data/lang exp/sgmm4a exp/sgmm4a_ali || exit 1; - steps/make_denlats_sgmm.sh --nj 8 --sub-split 20 --cmd "$decode_cmd" --transform-dir exp/tri3b \ - data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri3b --boost 0.2 \ - data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats exp/sgmm4a_mmi_b0.2 - - for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri3b/decode data/lang data/test exp/sgmm4a/decode exp/sgmm4a_mmi_b0.2/decode_it$iter & - done - -wait -steps/decode_combine.sh data/test data/lang exp/tri1/decode exp/tri2a/decode exp/combine_1_2a/decode || exit 1; -steps/decode_combine.sh data/test data/lang exp/sgmm4a/decode exp/tri3b_mmi/decode exp/combine_4a_3b/decode || exit 1; -# combining the sgmm run and the best MMI+fMMI run. -steps/decode_combine.sh data/test data/lang exp/sgmm4a/decode exp/tri3b_fmmi_c/decode_it5 exp/combine_4a_3b_fmmic5/decode || exit 1; - -steps/decode_combine.sh data/test data/lang exp/sgmm4a_mmi_b0.2/decode_it4 exp/tri3b_fmmi_c/decode_it5 exp/combine_4a_mmi_3b_fmmic5/decode || exit 1; - diff --git a/egs/rm/s5/run.sh b/egs/rm/s5/run.sh index 00bac326a80..aa838ceda89 100755 --- a/egs/rm/s5/run.sh +++ b/egs/rm/s5/run.sh @@ -233,8 +233,7 @@ done # local/run_raw_fmllr.sh -# You don't have to run all 3 of the below, e.g. you can just run the run_sgmm2.sh -#local/run_sgmm.sh +# You don't have to run all 2 of the below, e.g. you can just run the run_sgmm2.sh local/run_sgmm2.sh #local/run_sgmm2x.sh diff --git a/egs/sprakbanken/s5/local/run_sgmm.sh b/egs/sprakbanken/s5/local/run_sgmm.sh deleted file mode 100755 index 27d8449896f..00000000000 --- a/egs/sprakbanken/s5/local/run_sgmm.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/bin/bash - -# This script is invoked from ../run.sh -# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity. - -. cmd.sh - -# SGMM system on si84 data [sgmm5a]. Note: the system we aligned from used the si284 data for -# training, but this shouldn't have much effect. - -( - steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train_si84 data/lang exp/tri4b exp/tri4b_ali_si84 || exit 1; - - steps/train_ubm.sh --cmd "$train_cmd" \ - 400 data/train_si84 data/lang exp/tri4b_ali_si84 exp/ubm5a || exit 1; - - steps/train_sgmm.sh --cmd "$train_cmd" \ - 3500 10000 data/train_si84 data/lang exp/tri4b_ali_si84 \ - exp/ubm5a/final.ubm exp/sgmm5a || exit 1; - - ( - utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5a exp/sgmm5a/graph_tgpr - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - exp/sgmm5a/graph_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 - ) & - - steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si84 \ - --use-graphs true --use-gselect true data/train_si84 data/lang exp/sgmm5a exp/sgmm5a_ali_si84 || exit 1; - steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 \ - data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \ - data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1 - - for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \ - exp/sgmm5a_mmi_b0.1/decode_tgpr_dev93_it$iter & - done - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \ - --update-opts "--cov-min-value=0.9" data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1_m0.9 - - for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \ - exp/sgmm5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it$iter & - done - -) & - - -( -# The next commands are the same thing on all the si284 data. - -# SGMM system on the si284 data [sgmm5b] - steps/train_ubm.sh --cmd "$train_cmd" \ - 600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/ubm5b || exit 1; - - steps/train_sgmm.sh --cmd "$train_cmd" \ - 5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \ - exp/ubm5b/final.ubm exp/sgmm5b || exit 1; - - ( - utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5b exp/sgmm5b/graph_tgpr - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - exp/sgmm5b/graph_tgpr data/test_dev93 exp/sgmm5b/decode_tgpr_dev93 - steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \ - exp/sgmm5b/graph_tgpr data/test_eval92 exp/sgmm5b/decode_tgpr_eval92 - - utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm5b exp/sgmm5b/graph_bd_tgpr || exit 1; - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \ - exp/sgmm5b/graph_bd_tgpr data/test_dev93 exp/sgmm5b/decode_bd_tgpr_dev93 - steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \ - exp/sgmm5b/graph_bd_tgpr data/test_eval92 exp/sgmm5b/decode_bd_tgpr_eval92 - ) & - - steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \ - --use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm5b exp/sgmm5b_ali_si284 - - steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \ - data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \ - data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 exp/sgmm5b_mmi_b0.1 - - for iter in 1 2 3 4; do - for test in dev93 eval92; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_${test} data/lang_test_tgpr data/test_${test} exp/sgmm5b/decode_tgpr_${test} \ - exp/sgmm5b_mmi_b0.1/decode_tgpr_${test}_it$iter & - - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_tgpr data/test_${test} exp/sgmm5b/decode_bd_tgpr_${test} \ - exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it$iter & - done - done -) & - - - -# Train quinphone SGMM system. - -steps/train_sgmm.sh --cmd "$train_cmd" \ - --context-opts "--context-width=5 --central-position=2" \ - 5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \ - exp/ubm5b/final.ubm exp/sgmm5c || exit 1; - -# Decode from lattices in exp/sgmm5a/decode_tgpr_dev93. -steps/decode_sgmm_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - data/test_dev93 data/lang_test_tgpr exp/sgmm5a/decode_tgpr_dev93 exp/sgmm5c/decode_tgpr_dev93 diff --git a/egs/swbd/s5/local/run_sgmm.sh b/egs/swbd/s5/local/run_sgmm.sh deleted file mode 100755 index da9af425fd8..00000000000 --- a/egs/swbd/s5/local/run_sgmm.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -. cmd.sh - - -# Build a SGMM system on just the 100k_nodup data, on top of LDA+MLLT+SAT. -if [ ! -f exp/ubm5a/final.ubm ]; then - steps/train_ubm.sh --cmd "$train_cmd" 700 data/train_100k_nodup data/lang \ - exp/tri4a_ali_100k_nodup exp/ubm5a || exit 1; -fi -steps/train_sgmm.sh --cmd "$train_cmd" \ - 4500 40000 data/train_100k_nodup data/lang exp/tri4a_ali_100k_nodup \ - exp/ubm5a/final.ubm exp/sgmm5a || exit 1; - -utils/mkgraph.sh data/lang_test exp/sgmm5a exp/sgmm5a/graph || exit 1; - -steps/decode_sgmm.sh --cmd "$decode_cmd" --config conf/decode.config \ - --nj 30 --transform-dir exp/tri4a/decode_eval2000 \ - exp/sgmm5a/graph data/eval2000 exp/sgmm5a/decode_eval2000 - - # Now discriminatively train the SGMM system on 100k_nodup data. -steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4a_ali_100k_nodup \ - --use-graphs true --use-gselect true data/train_100k_nodup data/lang exp/sgmm5a exp/sgmm5a_ali_100k_nodup - - # Took the beam down to 10 to get acceptable decoding speed. -steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --beam 9.0 --lattice-beam 6 --cmd "$decode_cmd" \ - --transform-dir exp/tri4a_ali_100k_nodup \ - data/train_100k_nodup data/lang exp/sgmm5a_ali_100k_nodup exp/sgmm5a_denlats_100k_nodup - -steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4a_ali_100k_nodup --boost 0.1 \ - data/train_100k_nodup data/lang exp/sgmm5a_ali_100k_nodup exp/sgmm5a_denlats_100k_nodup exp/sgmm5a_mmi_b0.1 - -for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4a/decode_eval2000 data/lang_test data/eval2000 exp/sgmm5a/decode_eval2000 \ - exp/sgmm5a_mmi_b0.1/decode_eval2000_it$iter & -done - diff --git a/egs/swbd/s5/run.sh b/egs/swbd/s5/run.sh index 7286938b290..d61b818fe1b 100755 --- a/egs/swbd/s5/run.sh +++ b/egs/swbd/s5/run.sh @@ -161,7 +161,6 @@ steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ -#local/run_sgmm.sh local/run_sgmm2.sh # Building a larger SAT system. diff --git a/egs/swbd/s5/run_edin.sh b/egs/swbd/s5/run_edin.sh index 5778d017529..8aff7e40c66 100755 --- a/egs/swbd/s5/run_edin.sh +++ b/egs/swbd/s5/run_edin.sh @@ -340,7 +340,7 @@ done # TODO(arnab): add SGMM and hybrid -# local/run_sgmm.sh +# local/run_sgmm2.sh # # Recipe with DNN system on top of fMLLR features # local/run_hybrid.sh diff --git a/egs/swbd/s5b/local/run_sgmm.sh b/egs/swbd/s5b/local/run_sgmm.sh deleted file mode 100755 index da9af425fd8..00000000000 --- a/egs/swbd/s5b/local/run_sgmm.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -. cmd.sh - - -# Build a SGMM system on just the 100k_nodup data, on top of LDA+MLLT+SAT. -if [ ! -f exp/ubm5a/final.ubm ]; then - steps/train_ubm.sh --cmd "$train_cmd" 700 data/train_100k_nodup data/lang \ - exp/tri4a_ali_100k_nodup exp/ubm5a || exit 1; -fi -steps/train_sgmm.sh --cmd "$train_cmd" \ - 4500 40000 data/train_100k_nodup data/lang exp/tri4a_ali_100k_nodup \ - exp/ubm5a/final.ubm exp/sgmm5a || exit 1; - -utils/mkgraph.sh data/lang_test exp/sgmm5a exp/sgmm5a/graph || exit 1; - -steps/decode_sgmm.sh --cmd "$decode_cmd" --config conf/decode.config \ - --nj 30 --transform-dir exp/tri4a/decode_eval2000 \ - exp/sgmm5a/graph data/eval2000 exp/sgmm5a/decode_eval2000 - - # Now discriminatively train the SGMM system on 100k_nodup data. -steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4a_ali_100k_nodup \ - --use-graphs true --use-gselect true data/train_100k_nodup data/lang exp/sgmm5a exp/sgmm5a_ali_100k_nodup - - # Took the beam down to 10 to get acceptable decoding speed. -steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --beam 9.0 --lattice-beam 6 --cmd "$decode_cmd" \ - --transform-dir exp/tri4a_ali_100k_nodup \ - data/train_100k_nodup data/lang exp/sgmm5a_ali_100k_nodup exp/sgmm5a_denlats_100k_nodup - -steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4a_ali_100k_nodup --boost 0.1 \ - data/train_100k_nodup data/lang exp/sgmm5a_ali_100k_nodup exp/sgmm5a_denlats_100k_nodup exp/sgmm5a_mmi_b0.1 - -for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4a/decode_eval2000 data/lang_test data/eval2000 exp/sgmm5a/decode_eval2000 \ - exp/sgmm5a_mmi_b0.1/decode_eval2000_it$iter & -done - diff --git a/egs/vystadial_cz/online_demo/path.sh b/egs/vystadial_cz/online_demo/path.sh index e582fdc47e8..f54d95d60a8 100755 --- a/egs/vystadial_cz/online_demo/path.sh +++ b/egs/vystadial_cz/online_demo/path.sh @@ -29,7 +29,7 @@ SILENCE=models/silence.csl kaldisrc=`pwd`/../../../src openfst=`pwd`/../../../tools/openfst/ -export PATH=$kaldisrc/bin:$kaldisrc/fgmmbin:$kaldisrc/gmmbin:$kaldisrc/nnetbin:$kaldisrc/sgmm2bin:$kaldisrc/featbin:$kaldisrc/fstbin:$kaldisrc/latbin:$kaldisrc/onlinebin:$kaldisrc/sgmmbin:$kaldisrc/onl-rec:$openfst/bin:"$PATH" +export PATH=$kaldisrc/bin:$kaldisrc/fgmmbin:$kaldisrc/gmmbin:$kaldisrc/nnetbin:$kaldisrc/sgmm2bin:$kaldisrc/featbin:$kaldisrc/fstbin:$kaldisrc/latbin:$kaldisrc/onlinebin:$kaldisrc/onl-rec:$openfst/bin:"$PATH" export LD_LIBRARY_PATH=$kaldisrc/onl-rec:$kaldisrc/pykaldi/kaldi:$openfst/lib:$openfst/lib/fst:$LD_LIBRARY_PATH export PYTHONPATH=$kaldisrc/pykaldi:$kaldisrc/pykaldi/pyfst:$PYTHONPATH diff --git a/egs/wsj/s5/local/run_sgmm.sh b/egs/wsj/s5/local/run_sgmm.sh deleted file mode 100755 index 27d8449896f..00000000000 --- a/egs/wsj/s5/local/run_sgmm.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/bin/bash - -# This script is invoked from ../run.sh -# It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity. - -. cmd.sh - -# SGMM system on si84 data [sgmm5a]. Note: the system we aligned from used the si284 data for -# training, but this shouldn't have much effect. - -( - steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train_si84 data/lang exp/tri4b exp/tri4b_ali_si84 || exit 1; - - steps/train_ubm.sh --cmd "$train_cmd" \ - 400 data/train_si84 data/lang exp/tri4b_ali_si84 exp/ubm5a || exit 1; - - steps/train_sgmm.sh --cmd "$train_cmd" \ - 3500 10000 data/train_si84 data/lang exp/tri4b_ali_si84 \ - exp/ubm5a/final.ubm exp/sgmm5a || exit 1; - - ( - utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5a exp/sgmm5a/graph_tgpr - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - exp/sgmm5a/graph_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 - ) & - - steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si84 \ - --use-graphs true --use-gselect true data/train_si84 data/lang exp/sgmm5a exp/sgmm5a_ali_si84 || exit 1; - steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 \ - data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \ - data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1 - - for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \ - exp/sgmm5a_mmi_b0.1/decode_tgpr_dev93_it$iter & - done - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si84 --boost 0.1 \ - --update-opts "--cov-min-value=0.9" data/train_si84 data/lang exp/sgmm5a_ali_si84 exp/sgmm5a_denlats_si84 exp/sgmm5a_mmi_b0.1_m0.9 - - for iter in 1 2 3 4; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_dev93 data/lang_test_tgpr data/test_dev93 exp/sgmm5a/decode_tgpr_dev93 \ - exp/sgmm5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it$iter & - done - -) & - - -( -# The next commands are the same thing on all the si284 data. - -# SGMM system on the si284 data [sgmm5b] - steps/train_ubm.sh --cmd "$train_cmd" \ - 600 data/train_si284 data/lang exp/tri4b_ali_si284 exp/ubm5b || exit 1; - - steps/train_sgmm.sh --cmd "$train_cmd" \ - 5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \ - exp/ubm5b/final.ubm exp/sgmm5b || exit 1; - - ( - utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5b exp/sgmm5b/graph_tgpr - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - exp/sgmm5b/graph_tgpr data/test_dev93 exp/sgmm5b/decode_tgpr_dev93 - steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \ - exp/sgmm5b/graph_tgpr data/test_eval92 exp/sgmm5b/decode_tgpr_eval92 - - utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm5b exp/sgmm5b/graph_bd_tgpr || exit 1; - steps/decode_sgmm.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \ - exp/sgmm5b/graph_bd_tgpr data/test_dev93 exp/sgmm5b/decode_bd_tgpr_dev93 - steps/decode_sgmm.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \ - exp/sgmm5b/graph_bd_tgpr data/test_eval92 exp/sgmm5b/decode_bd_tgpr_eval92 - ) & - - steps/align_sgmm.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \ - --use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm5b exp/sgmm5b_ali_si284 - - steps/make_denlats_sgmm.sh --nj 30 --sub-split 30 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \ - data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 - - steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 --boost 0.1 \ - data/train_si284 data/lang exp/sgmm5b_ali_si284 exp/sgmm5b_denlats_si284 exp/sgmm5b_mmi_b0.1 - - for iter in 1 2 3 4; do - for test in dev93 eval92; do - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_tgpr_${test} data/lang_test_tgpr data/test_${test} exp/sgmm5b/decode_tgpr_${test} \ - exp/sgmm5b_mmi_b0.1/decode_tgpr_${test}_it$iter & - - steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4b/decode_bd_tgpr_${test} data/lang_test_bd_tgpr data/test_${test} exp/sgmm5b/decode_bd_tgpr_${test} \ - exp/sgmm5b_mmi_b0.1/decode_bd_tgpr_${test}_it$iter & - done - done -) & - - - -# Train quinphone SGMM system. - -steps/train_sgmm.sh --cmd "$train_cmd" \ - --context-opts "--context-width=5 --central-position=2" \ - 5500 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \ - exp/ubm5b/final.ubm exp/sgmm5c || exit 1; - -# Decode from lattices in exp/sgmm5a/decode_tgpr_dev93. -steps/decode_sgmm_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \ - data/test_dev93 data/lang_test_tgpr exp/sgmm5a/decode_tgpr_dev93 exp/sgmm5c/decode_tgpr_dev93 diff --git a/egs/wsj/s5/run.sh b/egs/wsj/s5/run.sh index ca13c1704f2..fb004117658 100755 --- a/egs/wsj/s5/run.sh +++ b/egs/wsj/s5/run.sh @@ -361,9 +361,6 @@ local/run_mmi_tri4b.sh #local/run_nnet2.sh -## Segregated some SGMM builds into a separate file. -#local/run_sgmm.sh - # You probably want to run the sgmm2 recipe as it's generally a bit better: local/run_sgmm2.sh diff --git a/egs/wsj/s5/steps/align_sgmm.sh b/egs/wsj/s5/steps/align_sgmm.sh deleted file mode 100755 index 782157f5ebe..00000000000 --- a/egs/wsj/s5/steps/align_sgmm.sh +++ /dev/null @@ -1,198 +0,0 @@ -#!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) -# Apache 2.0 - -# Computes training alignments and (if needed) speaker-vectors, given an -# SGMM system. If the system is built on top of SAT, you should supply -# transforms with the --transform-dir option. - -# If you supply the --use-graphs option, it will use the training -# graphs from the source directory. - -# Begin configuration section. -stage=0 -nj=4 -cmd=run.pl -use_graphs=false # use graphs from srcdir -use_gselect=false # use gselect info from srcdir [regardless, we use - # Gaussian-selection info, we might have to compute it though.] -gselect=15 # Number of Gaussian-selection indices for SGMMs. -# Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" -beam=10 -retry_beam=40 -transform_dir= # directory to find fMLLR transforms in. -# End configuration options. - -echo "$0 $@" # Print the command line for logging - -[ -f path.sh ] && . ./path.sh # source the path. -. parse_options.sh || exit 1; - -if [ $# != 4 ]; then - echo "usage: steps/align_sgmm.sh " - echo "e.g.: steps/align_sgmm.sh --transform-dir exp/tri3b data/train data/lang \\" - echo " exp/sgmm4a exp/sgmm5a_ali" - echo "main options (for others, see top of script file)" - echo " --config # config containing options" - echo " --nj # number of parallel jobs" - echo " --use-graphs true # use graphs in src-dir" - echo " --transform-dir # directory to find fMLLR transforms" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - exit 1; -fi - -data=$1 -lang=$2 -srcdir=$3 -dir=$4 - -oov=`cat $lang/oov.int` || exit 1; -silphonelist=`cat $lang/phones/silence.csl` || exit 1; -splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. -cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null` -sdata=$data/split$nj - -mkdir -p $dir/log -cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options. -cp $srcdir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option. -echo $nj > $dir/num_jobs -[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; - -utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -cp $srcdir/{tree,final.mdl} $dir || exit 1; -[ -f $srcdir/final.alimdl ] && cp $srcdir/final.alimdl $dir -cp $srcdir/final.occs $dir; - -## Set up features. -if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi -echo "$0: feature type is $feat_type" - -case $feat_type in - delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; - lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |" - cp $srcdir/final.mat $dir - ;; - *) echo "Invalid feature type $feat_type" && exit 1; -esac -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1; - [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \ - && echo "$0: #jobs mismatch with transform-dir." && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then - echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms," - echo " but you are not providing the --transform-dir option during alignment." -fi -## - -## Set up model and alignment model. -mdl=$srcdir/final.mdl -if [ -f $srcdir/final.alimdl ]; then - alimdl=$srcdir/final.alimdl -else - alimdl=$srcdir/final.mdl -fi -[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1; - -## Work out where we're getting the graphs from. -if $use_graphs; then - [ "$nj" != "`cat $srcdir/num_jobs`" ] && \ - echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1; - [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1; - graphdir=$srcdir - ln.pl $srcdir/fsts.*.gz $dir -else - graphdir=$dir - if [ $stage -le 0 ]; then - echo "$0: compiling training graphs" - tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|"; - $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ - compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/final.mdl $lang/L.fst "$tra" \ - "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; - fi -fi - -## Work out where we're getting the Gaussian-selection info from -if $use_gselect; then - [ "$nj" != "`cat $srcdir/num_jobs`" ] && \ - echo "$0: you specified --use-gselect true, but #jobs mismatch." && exit 1; - [ ! -f $srcdir/gselect.1.gz ] && echo "No gselect info in $srcdir" && exit 1; - graphdir=$srcdir - gselect_opt="--gselect=ark,s,cs:gunzip -c $srcdir/gselect.JOB.gz|" - ln.pl $srcdir/gselect.*.gz $dir -else - graphdir=$dir - if [ $stage -le 1 ]; then - echo "$0: computing Gaussian-selection info" - # Note: doesn't matter whether we use $alimdl or $mdl, they will - # have the same gselect info. - $cmd JOB=1:$nj $dir/log/gselect.JOB.log \ - sgmm-gselect --full-gmm-nbest=$gselect $alimdl \ - "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1; - fi - gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" -fi - - -if [ $alimdl == $mdl ]; then - # Speaker-independent decoding-- just one pass. Not normal. - T=`sgmm-info $mdl | grep 'speaker vector space' | awk '{print $NF}'` || exit 1; - [ "$T" -ne 0 ] && echo "No alignment model, yet speaker vector space nonempty" && exit 1; - - if [ $stage -le 2 ]; then - echo "$0: aligning data in $data using model $mdl (no speaker-vectors)" - $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \ - sgmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $alimdl \ - "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; - fi - echo "$0: done aligning data." - exit 0; -fi - -# Continue with system with speaker vectors. -if [ $stage -le 2 ]; then - echo "$0: aligning data in $data using model $alimdl" - $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \ - sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam $alimdl \ - "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1; -fi - -if [ $stage -le 3 ]; then - echo "$0: computing speaker vectors (1st pass)" - $cmd JOB=1:$nj $dir/log/spk_vecs1.JOB.log \ - ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \ - weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \ - sgmm-post-to-gpost "$gselect_opt" $alimdl "$feats" ark:- ark:- \| \ - sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \ - $mdl "$feats" ark,s,cs:- ark:$dir/pre_vecs.JOB || exit 1; -fi - -if [ $stage -le 4 ]; then - echo "$0: computing speaker vectors (2nd pass)" - $cmd JOB=1:$nj $dir/log/spk_vecs2.JOB.log \ - ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \ - weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \ - sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \ - --spk-vecs=ark:$dir/pre_vecs.JOB $mdl "$feats" ark,s,cs:- ark:$dir/vecs.JOB || exit 1; - rm $dir/pre_vecs.* -fi - -if [ $stage -le 5 ]; then - echo "$0: doing final alignment." - $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \ - sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam \ - --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \ - $mdl "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; -fi - -rm $dir/pre_ali.*.gz - -echo "$0: done aligning data." - -utils/summarize_warnings.pl $dir/log - -exit 0; diff --git a/egs/wsj/s5/steps/align_sgmm2.sh b/egs/wsj/s5/steps/align_sgmm2.sh index 8f68a2f7a08..d2f829f7e3e 100755 --- a/egs/wsj/s5/steps/align_sgmm2.sh +++ b/egs/wsj/s5/steps/align_sgmm2.sh @@ -30,8 +30,8 @@ echo "$0 $@" # Print the command line for logging . parse_options.sh || exit 1; if [ $# != 4 ]; then - echo "usage: steps/align_sgmm.sh " - echo "e.g.: steps/align_sgmm.sh --transform-dir exp/tri3b data/train data/lang \\" + echo "usage: steps/align_sgmm2.sh " + echo "e.g.: steps/align_sgmm2.sh --transform-dir exp/tri3b data/train data/lang \\" echo " exp/sgmm4a exp/sgmm5a_ali" echo "main options (for others, see top of script file)" echo " --config # config containing options" diff --git a/egs/wsj/s5/steps/decode_sgmm.sh b/egs/wsj/s5/steps/decode_sgmm.sh deleted file mode 100755 index 2faf2c10e0f..00000000000 --- a/egs/wsj/s5/steps/decode_sgmm.sh +++ /dev/null @@ -1,266 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. - -# This script does decoding with an SGMM system, with speaker vectors. -# If the SGMM system was -# built on top of fMLLR transforms from a conventional system, you should -# provide the --transform-dir option. - -# Begin configuration section. -stage=1 -alignment_model= -transform_dir= # dir to find fMLLR transforms. -nj=4 # number of decoding jobs. -acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. -cmd=run.pl -beam=15.0 -gselect=15 # Number of Gaussian-selection indices for SGMMs. [Note: - # the first_pass_gselect variable is used for the 1st pass of - # decoding and can be tighter. -first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in - # the 1st pass of decoding (lattice generation). -max_active=7000 - -#WARNING: This option is renamed lattice_beam (it was renamed to follow the naming -# in the other scripts -lattice_beam=6.0 # Beam we use in lattice generation. -vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for - # speaker-vector computation. Can be quite tight (actually we could - # probably just do best-path. -use_fmllr=false -fmllr_iters=10 -fmllr_min_count=1000 -skip_scoring=false -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# -ne 3 ]; then - echo "Usage: steps/decode_sgmm.sh [options] " - echo " e.g.: steps/decode_sgmm.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\" - echo " exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr" - echo "main options (for others, see top of script file)" - echo " --transform-dir # directory of previous decoding" - echo " # where we can find transforms for SAT systems." - echo " --alignment-model # Model for the first-pass decoding." - echo " --config # config containing options" - echo " --nj # number of parallel jobs" - echo " --cmd # Command to run in parallel with" - echo " --beam # Decoding beam; default 13.0" - exit 1; -fi - -graphdir=$1 -data=$2 -dir=$3 -srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory. - -for f in $graphdir/HCLG.fst $data/feats.scp $srcdir/final.mdl; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done - -sdata=$data/split$nj; -silphonelist=`cat $graphdir/phones/silence.csl` || exit 1 -splice_opts=`cat $srcdir/splice_opts 2>/dev/null` -cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null` -gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" -gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |" - -mkdir -p $dir/log -[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; -echo $nj > $dir/num_jobs - - -## Set up features. -if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi -echo "$0: feature type is $feat_type" - -case $feat_type in - delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; - lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |" - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1; - [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \ - && echo "$0: #jobs mismatch with transform-dir." && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then - echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms," - echo " but you are not providing the --transform-dir option in test time." -fi -## - -## Calculate FMLLR pre-transforms if needed. We are doing this here since this -## step is requried by models both with and without speaker vectors -if $use_fmllr; then - if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then - echo "$0: computing pre-transform for fMLLR computation." - sgmm-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1; - fi -fi - -## Save Gaussian-selection info to disk. -# Note: we can use final.mdl regardless of whether there is an alignment model-- -# they use the same UBM. -if [ $stage -le 1 ]; then - $cmd JOB=1:$nj $dir/log/gselect.JOB.log \ - sgmm-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \ - "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1; -fi - -## Work out name of alignment model. ## -if [ -z "$alignment_model" ]; then - if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl; - else alignment_model=$srcdir/final.mdl; fi -fi -[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1; - -# Generate state-level lattice which we can rescore. This is done with the -# alignment model and no speaker-vectors. -if [ $stage -le 2 ]; then - if [ -f "$graphdir/num_pdfs" ]; then - [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $alignment_model | grep pdfs | awk '{print $NF}'` ] || \ - { echo "Mismatch in number of pdfs with $alignment_model"; exit 1; } - fi - $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \ - sgmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ - --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \ - --word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $alignment_model \ - $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1; -fi - -## Check if the model has speaker vectors -spkdim=`sgmm-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'` - -if [ $spkdim -gt 0 ]; then ### For models with speaker vectors: - -# Estimate speaker vectors (1st pass). Prune before determinizing -# because determinization can take a while on un-pruned lattices. -# Note: the sgmm-post-to-gpost stage is necessary because we have -# a separate alignment-model and final model, otherwise we'd skip it -# and use sgmm-est-spkvecs. - if [ $stage -le 3 ]; then - $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \ - sgmm-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \ - sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \ - $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1; - fi - -# Estimate speaker vectors (2nd pass). Since we already have spk vectors, -# at this point we need to rescore the lattice to get the correct posteriors. - if [ $stage -le 4 ]; then - $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --speedup=true --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \ - $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1; - fi - rm $dir/pre_vecs.* - - if $use_fmllr; then - # Estimate fMLLR transforms (note: these may be on top of any - # fMLLR transforms estimated with the baseline GMM system. - if [ $stage -le 5 ]; then # compute fMLLR transforms. - echo "$0: computing fMLLR transforms." - $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --speedup=true --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \ - --fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \ - $srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1; - fi - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |" - fi - -# Now rescore the state-level lattices with the adapted features and the -# corresponding model. Prune and determinize the lattices to limit -# their size. - if [ $stage -le 6 ]; then - $cmd JOB=1:$nj $dir/log/rescore.JOB.log \ - sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \ - $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \ - "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; - fi - rm $dir/pre_lat.*.gz - -else ### For models without speaker vectors: - - if $use_fmllr; then - # Estimate fMLLR transforms (note: these may be on top of any - # fMLLR transforms estimated with the baseline GMM system. - if [ $stage -le 5 ]; then # compute fMLLR transforms. - echo "$0: computing fMLLR transforms." - $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --speedup=true --utt2spk=ark:$sdata/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \ - --fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \ - $srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1; - fi - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |" - fi - -# Now rescore the state-level lattices with the adapted features and the -# corresponding model. Prune and determinize the lattices to limit -# their size. - if [ $stage -le 6 ] && $use_fmllr; then - $cmd JOB=1:$nj $dir/log/rescore.JOB.log \ - sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk \ - $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \ - "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; - rm $dir/pre_lat.*.gz - else # If no adaptation needed, determinize the lattice. - $cmd JOB=1:$nj $dir/log/determinize.JOB.log \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam \ - "ark:gunzip -c $dir/pre_lat.JOB.gz|" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; - rm $dir/pre_lat.*.gz - fi - -fi - -if [ $stage -le 7 ]; then - steps/diagnostic/analyze_lats.sh --cmd "$cmd" $graphdir $dir -fi - -if [ $stage -le 8 ]; then - if ! $skip_scoring ; then - [ ! -x local/score.sh ] && \ - echo "Not scoring because local/score.sh does not exist or not executable." && exit 1; - echo "score best paths" - local/score.sh --cmd "$cmd" $data $graphdir $dir || - { echo "$0: Scoring failed. (ignore by '--skip-scoring true')"; exit 1; } - #echo "score confidence and timing with sclite" - #local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $graphdir $dir - fi -fi -echo "Decoding done." -exit 0; diff --git a/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh b/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh index 7a3a4f6bd48..c84e597192e 100755 --- a/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh +++ b/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh @@ -43,7 +43,7 @@ echo "$0 $@" # Print the command line for logging . parse_options.sh || exit 1; if [ $# -ne 4 ]; then - echo "Usage: steps/decode_sgmm_fromlats.sh [options] " + echo "Usage: steps/decode_sgmm2_fromlats.sh [options] " echo "" echo "main options (for others, see top of script file)" echo " --transform-dir # directory of previous decoding" diff --git a/egs/wsj/s5/steps/decode_sgmm2_rescore.sh b/egs/wsj/s5/steps/decode_sgmm2_rescore.sh index a37a47350d7..c258ad00067 100755 --- a/egs/wsj/s5/steps/decode_sgmm2_rescore.sh +++ b/egs/wsj/s5/steps/decode_sgmm2_rescore.sh @@ -26,8 +26,8 @@ echo "$0 $@" # Print the command line for logging . parse_options.sh || exit 1; if [ $# -ne 4 ]; then - echo "Usage: steps/decode_sgmm_rescore.sh [options] " - echo " e.g.: steps/decode_sgmm_rescore.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\" + echo "Usage: steps/decode_sgmm2_rescore.sh [options] " + echo " e.g.: steps/decode_sgmm2_rescore.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\" echo " exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a_mmi/decode_dev93_tgpr" echo "main options (for others, see top of script file)" echo " --transform-dir # directory of previous decoding" diff --git a/egs/wsj/s5/steps/decode_sgmm_fromlats.sh b/egs/wsj/s5/steps/decode_sgmm_fromlats.sh deleted file mode 100755 index bb1dacd113f..00000000000 --- a/egs/wsj/s5/steps/decode_sgmm_fromlats.sh +++ /dev/null @@ -1,277 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. - -# This script does decoding with an SGMM system, with speaker vectors. -# If the SGMM system was -# built on top of fMLLR transforms from a conventional system, you should -# provide the --transform-dir option. -# This script does not use a decoding graph, but instead you provide -# a previous decoding directory with lattices in it. This script will only -# make use of the word sequences in the lattices; it limits the decoding -# to those sequences. You should also provide a "lang" directory from -# which this script will use the G.fst and L.fst. - -# Begin configuration section. -stage=1 -alignment_model= -transform_dir= # dir to find fMLLR transforms. -acwt=0.08333 # Just a default value, used for adaptation and beam-pruning.. -batch_size=75 # Limits memory blowup in compile-train-graphs-fsts -cmd=run.pl -beam=20.0 -gselect=15 # Number of Gaussian-selection indices for SGMMs. [Note: - # the first_pass_gselect variable is used for the 1st pass of - # decoding and can be tighter. -first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in - # the 1st pass of decoding (lattice generation). -max_active=7000 - -#WARNING: This option is renamed lattice_beam (it was renamed to follow the naming -# in the other scripts -lattice_beam=8.0 # Beam we use in lattice generation. -vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for - # speaker-vector computation. Can be quite tight (actually we could - # probably just do best-path. -use_fmllr=false -fmllr_iters=10 -fmllr_min_count=1000 -scale_opts="--transition-scale=1.0 --self-loop-scale=0.1" -skip_scoring=false -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# -ne 4 ]; then - echo "Usage: steps/decode_sgmm_fromlats.sh [options] " - echo "" - echo "main options (for others, see top of script file)" - echo " --transform-dir # directory of previous decoding" - echo " # where we can find transforms for SAT systems." - echo " --alignment-model # Model for the first-pass decoding." - echo " --config # config containing options" - echo " --cmd # Command to run in parallel with" - echo " --beam # Decoding beam; default 13.0" - exit 1; -fi - -data=$1 -lang=$2 -olddir=$3 -dir=$4 -srcdir=`dirname $dir` - -for f in $data/feats.scp $lang/G.fst $lang/L_disambig.fst $lang/phones/disambig.int \ - $srcdir/final.mdl $srcdir/tree $olddir/lat.1.gz; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done - -nj=`cat $olddir/num_jobs` || exit 1; -sdata=$data/split$nj; -silphonelist=`cat $lang/phones/silence.csl` || exit 1 -splice_opts=`cat $srcdir/splice_opts 2>/dev/null` -cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null` -gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" -gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |" - -mkdir -p $dir/log -[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; -echo $nj > $dir/num_jobs - - -## Set up features - -if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi -echo "$0: feature type is $feat_type" -if [ -z "$transform_dir" ] && [ -f $olddir/trans.1 ]; then - transform_dir=$olddir -fi - -case $feat_type in - delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; - lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |" - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1; - [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \ - && echo "$0: #jobs mismatch with transform-dir." && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then - echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms," - echo " but you are not providing the --transform-dir option in test time." -fi - -## Calculate FMLLR pre-transforms if needed. We are doing this here since this -## step is requried by models both with and without speaker vectors -if $use_fmllr; then - if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then - echo "$0: computing pre-transform for fMLLR computation." - sgmm-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1; - fi -fi - -## Save Gaussian-selection info to disk. -# Note: we can use final.mdl regardless of whether there is an alignment model-- -# they use the same UBM. -if [ $stage -le 1 ]; then - $cmd JOB=1:$nj $dir/log/gselect.JOB.log \ - sgmm-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \ - "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1; -fi - -## Work out name of alignment model. ## -if [ -z "$alignment_model" ]; then - if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl; - else alignment_model=$srcdir/final.mdl; fi -fi -[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1; - -# Generate state-level lattice which we can rescore. This is done with the -# alignment model and no speaker-vectors. -if [ $stage -le 2 ]; then - $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \ - lattice-to-fst "ark:gunzip -c $olddir/lat.JOB.gz|" ark:- \| \ - fsttablecompose "fstproject --project_output=true $lang/G.fst | fstarcsort |" ark:- ark:- \| \ - fstdeterminizestar ark:- ark:- \| \ - compile-train-graphs-fsts --read-disambig-syms=$lang/phones/disambig.int \ - --batch-size=$batch_size $scale_opts \ - $srcdir/tree $srcdir/final.mdl $lang/L_disambig.fst ark:- ark:- \| \ - sgmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ - --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \ - --word-symbol-table=$lang/words.txt "$gselect_opt_1stpass" $alignment_model \ - "ark:-" "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1; -fi - -## Check if the model has speaker vectors -spkdim=`sgmm-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'` - -if [ $spkdim -gt 0 ]; then ### For models with speaker vectors: - -# Estimate speaker vectors (1st pass). Prune before determinizing -# because determinization can take a while on un-pruned lattices. -# Note: the sgmm-post-to-gpost stage is necessary because we have -# a separate alignment-model and final model, otherwise we'd skip it -# and use sgmm-est-spkvecs. - if [ $stage -le 3 ]; then - $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \ - sgmm-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \ - sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \ - $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1; - fi - -# Estimate speaker vectors (2nd pass). Since we already have spk vectors, -# at this point we need to rescore the lattice to get the correct posteriors. - if [ $stage -le 4 ]; then - $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \ - $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1; - fi - rm $dir/pre_vecs.* - - if $use_fmllr; then - # Estimate fMLLR transforms (note: these may be on top of any - # fMLLR transforms estimated with the baseline GMM system. - if [ $stage -le 5 ]; then # compute fMLLR transforms. - echo "$0: computing fMLLR transforms." - $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \ - --fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \ - $srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1; - fi - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |" - fi - -# Now rescore the state-level lattices with the adapted features and the -# corresponding model. Prune and determinize the lattices to limit -# their size. - if [ $stage -le 6 ]; then - $cmd JOB=1:$nj $dir/log/rescore.JOB.log \ - sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \ - $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \ - "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; - fi - rm $dir/pre_lat.*.gz - -else ### For models without speaker vectors: - - if $use_fmllr; then - # Estimate fMLLR transforms (note: these may be on top of any - # fMLLR transforms estimated with the baseline GMM system. - if [ $stage -le 5 ]; then # compute fMLLR transforms. - echo "$0: computing fMLLR transforms." - $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --utt2spk=ark:$sdata/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \ - --fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \ - $srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1; - fi - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |" - fi - -# Now rescore the state-level lattices with the adapted features and the -# corresponding model. Prune and determinize the lattices to limit -# their size. - if [ $stage -le 6 ] && $use_fmllr; then - $cmd JOB=1:$nj $dir/log/rescore.JOB.log \ - sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk \ - $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \ - "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; - rm $dir/pre_lat.*.gz - else # Already done with decoding if no adaptation needed. - for n in `seq 1 $nj`; do - mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz - done - fi - -fi - -# The output of this script is the files "lat.*.gz"-- we'll rescore this at -# different acoustic scales to get the final output. - - -if [ $stage -le 7 ]; then - if ! $skip_scoring ; then - [ ! -x local/score.sh ] && \ - echo "Not scoring because local/score.sh does not exist or not executable." && exit 1; - echo "score best paths" - local/score.sh --cmd "$cmd" $data $lang $dir || - { echo "$0: Scoring failed. (ignore by '--skip-scoring true')"; exit 1; } - # echo "score confidence and timing with sclite" - # local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $lang $dir - fi -fi -echo "Decoding done." -exit 0; diff --git a/egs/wsj/s5/steps/decode_sgmm_rescore.sh b/egs/wsj/s5/steps/decode_sgmm_rescore.sh deleted file mode 100755 index 398c8931e7f..00000000000 --- a/egs/wsj/s5/steps/decode_sgmm_rescore.sh +++ /dev/null @@ -1,108 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. - -# This script does decoding with an SGMM system, by rescoring lattices -# generated from a previous SGMM system. The directory with the lattices -# is assumed to contain speaker vectors, if used. Basically it rescores -# the lattices one final time, using the same setup as the final decoding -# pass of the source dir. The assumption is that the model may have -# been discriminatively trained. - -# If the system was built on top of fMLLR transforms from a conventional system, -# you should provide the --transform-dir option. - -# Begin configuration section. -transform_dir= # dir to find fMLLR transforms. -cmd=run.pl -iter=final -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# -ne 4 ]; then - echo "Usage: steps/decode_sgmm_rescore.sh [options] " - echo " e.g.: steps/decode_sgmm_rescore.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\" - echo " exp/sgmm3a/graph_tgpr data/test_dev93 exp/sgmm3a/decode_dev93_tgpr exp/sgmm3a_mmi/decode_dev93_tgpr" - echo "main options (for others, see top of script file)" - echo " --transform-dir # directory of previous decoding" - echo " # where we can find transforms for SAT systems." - echo " --config # config containing options" - echo " --cmd # Command to run in parallel with" - echo " --iter # iteration of model to use (default: final)" - exit 1; -fi - -graphdir=$1 -data=$2 -olddir=$3 -dir=$4 -srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory. - -for f in $graphdir/words.txt $data/feats.scp $olddir/lat.1.gz $olddir/gselect.1.gz \ - $srcdir/$iter.mdl; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done - -nj=`cat $olddir/num_jobs` || exit 1; -sdata=$data/split$nj; -gselect_opt="--gselect=ark,s,cs:gunzip -c $olddir/gselect.JOB.gz|" -splice_opts=`cat $srcdir/splice_opts 2>/dev/null` -cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null` - -mkdir -p $dir/log -[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; -echo $nj > $dir/num_jobs - -if [ -f $olddir/vecs.1 ]; then - echo "$0: using speaker vectors from $olddir" - spkvecs_opt="--spk-vecs=ark:$olddir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk" -else - echo "$0: no speaker vectors found." - spkvecs_opt= -fi - - -## Set up features. -if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi -echo "$0: feature type is $feat_type" - -case $feat_type in - delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; - lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |" - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1; - [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \ - && echo "$0: #jobs mismatch with transform-dir." && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then - echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms," - echo " but you are not providing the --transform-dir option in test time." -fi - -if [ -f $olddir/trans.1 ]; then - echo "$0: using (in addition to any previous transforms) transforms from $olddir" - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$olddir/trans.JOB ark:- ark:- |" -fi -## - -# Rescore the state-level lattices with the model provided. Just -# one command in this script. -echo "$0: rescoring lattices with SGMM model in $srcdir/$iter.mdl" -$cmd JOB=1:$nj $dir/log/rescore.JOB.log \ - sgmm-rescore-lattice "$gselect_opt" $spkvecs_opt \ - $srcdir/$iter.mdl "ark:gunzip -c $olddir/lat.JOB.gz|" "$feats" \ - "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; - -[ ! -x local/score.sh ] && \ - echo "Not scoring because local/score.sh does not exist or not executable." && exit 1; -local/score.sh --cmd "$cmd" $data $graphdir $dir - -exit 0; diff --git a/egs/wsj/s5/steps/make_denlats_sgmm.sh b/egs/wsj/s5/steps/make_denlats_sgmm.sh deleted file mode 100755 index fbd59378c9c..00000000000 --- a/egs/wsj/s5/steps/make_denlats_sgmm.sh +++ /dev/null @@ -1,189 +0,0 @@ -#!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. -# 2014 Guoguo Chen - -# Create denominator lattices for MMI/MPE training, with SGMM models. If the -# features have fMLLR transforms you have to supply the --transform-dir option. -# It gets any speaker vectors from the "alignment dir" ($alidir). Note: this is -# possibly a slight mismatch because the speaker vectors come from supervised -# adaptation. - -# Begin configuration section. -nj=4 -cmd=run.pl -sub_split=1 -beam=13.0 -lattice_beam=7.0 -acwt=0.1 -max_active=5000 -transform_dir= -max_mem=20000000 # This will stop the processes getting too large. -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# != 4 ]; then - echo "Usage: steps/make_denlats_sgmm.sh [options] " - echo " e.g.: steps/make_denlats_sgmm.sh data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats" - echo "Works for (delta|lda) features, and (with --transform-dir option) such features" - echo " plus transforms." - echo "" - echo "Main options (for others, see top of script file)" - echo " --config # config containing options" - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --sub-split # e.g. 40; use this for " - echo " # large databases so your jobs will be smaller and" - echo " # will (individually) finish reasonably soon." - echo " --transform-dir # directory to find fMLLR transforms." - exit 1; -fi - -data=$1 -lang=$2 -alidir=$3 # could also be $srcdir, but only if no vectors supplied. -dir=$4 - -sdata=$data/split$nj -splice_opts=`cat $alidir/splice_opts 2>/dev/null` -cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null` -mkdir -p $dir/log -[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; -echo $nj > $dir/num_jobs - -utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; - -oov=`cat $lang/oov.int` || exit 1; - -mkdir -p $dir - -cp -RH $lang $dir/ - -# Compute grammar FST which corresponds to unigram decoding graph. -new_lang="$dir/"$(basename "$lang") -echo "$0: Making unigram grammar FST in $new_lang" -cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \ - awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \ - utils/make_unigram_grammar.pl | fstcompile | fstarcsort --sort_type=ilabel > $new_lang/G.fst \ - || exit 1; - -# mkgraph.sh expects a whole directory "lang", so put everything in one directory... -# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and -# final.mdl from $alidir; the output HCLG.fst goes in $dir/graph. - -echo "$0: Compiling decoding graph in $dir/dengraph" -if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then - echo "$0: Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation." -else - utils/mkgraph.sh $new_lang $alidir $dir/dengraph || exit 1; -fi - -if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi -echo "$0: feature type is $feat_type" - -case $feat_type in - delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; - lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |" - cp $alidir/final.mat $dir - ;; - *) echo "$0: Invalid feature type $feat_type" && exit 1; -esac - -if [ ! -z "$transform_dir" ]; then # add transforms to features... - echo "$0: using fMLLR transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist." - [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \ - && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1; - [ -f $alidir/final.mat ] && ! cmp $transform_dir/final.mat $alidir/final.mat && \ - echo "$0: LDA transforms differ between $alidir and $transform_dir" - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |" -else - echo "$0: Assuming you don't have a SAT system, since no --transform-dir option supplied " -fi - -if [ -f $alidir/gselect.1.gz ]; then - gselect_opt="--gselect=ark,s,cs:gunzip -c $alidir/gselect.JOB.gz|" -else - echo "$0: no such file $alidir/gselect.1.gz" && exit 1; -fi - -if [ -f $alidir/vecs.1 ]; then - spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk" -else - if [ -f $alidir/final.alimdl ]; then - echo "$0: You seem to have an SGMM system with speaker vectors," - echo "yet we can't find speaker vectors. Perhaps you supplied" - echo "the model director instead of the alignment directory?" - exit 1; - fi -fi - -# if this job is interrupted by the user, we want any background jobs to be -# killed too. -cleanup() { - local pids=$(jobs -pr) - [ -n "$pids" ] && kill $pids -} -trap "cleanup" INT QUIT TERM EXIT - -if [ $sub_split -eq 1 ]; then - $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \ - sgmm-latgen-faster $spkvecs_opt "$gselect_opt" --beam=$beam \ - --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ - --max-mem=$max_mem --max-active=$max_active \ - --word-symbol-table=$lang/words.txt $alidir/final.mdl \ - $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; -else - # each job from 1 to $nj is split into multiple pieces (sub-split), and we aim - # to have at most two jobs running at each time. The idea is that if we have - # stragglers from one job, we can be processing another one at the same time. - rm $dir/.error 2>/dev/null - - prev_pid= - for n in `seq $[nj+1]`; do - if [ $n -gt $nj ]; then - this_pid= - elif [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then - echo "$0: Not processing subset $n as already done (delete $dir/.done.$n if not)"; - this_pid= - else - sdata2=$data/split$nj/$n/split${sub_split}utt; - split_data.sh --per-utt $sdata/$n $sub_split || exit 1; - mkdir -p $dir/log/$n - mkdir -p $dir/part - feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split${sub_split}utt/JOB/:g` - spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"` - gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"` - $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ - sgmm-latgen-faster $spkvecs_opt_subset "$gselect_opt_subset" \ - --beam=$beam --lattice-beam=$lattice_beam \ - --acoustic-scale=$acwt --max-mem=$max_mem --max-active=$max_active \ - --word-symbol-table=$lang/words.txt $alidir/final.mdl \ - $dir/dengraph/HCLG.fst "$feats_subset" \ - "ark:|gzip -c >$dir/lat.$n.JOB.gz" || touch $dir/.error & - this_pid=$! - fi - if [ ! -z "$prev_pid" ]; then # Wait for the previous job to merge lattices. - wait $prev_pid - [ -f $dir/.error ] && \ - echo "$0: error generating denominator lattices" && exit 1; - rm $dir/.merge_error 2>/dev/null - echo "$0: Merging archives for data subset $prev_n" - for k in `seq $sub_split`; do - gunzip -c $dir/lat.$prev_n.$k.gz || touch $dir/.merge_error; - done | gzip -c > $dir/lat.$prev_n.gz || touch $dir/.merge_error; - [ -f $dir/.merge_error ] && \ - echo "$0: Merging lattices for subset $prev_n failed" && exit 1; - rm $dir/lat.$prev_n.*.gz - touch $dir/.done.$prev_n - fi - prev_n=$n - prev_pid=$this_pid - done -fi - - -echo "$0: done generating denominator lattices with SGMMs." diff --git a/egs/wsj/s5/steps/tandem/align_sgmm.sh b/egs/wsj/s5/steps/tandem/align_sgmm.sh deleted file mode 100755 index bb3ba79bd9f..00000000000 --- a/egs/wsj/s5/steps/tandem/align_sgmm.sh +++ /dev/null @@ -1,236 +0,0 @@ -#!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) -# Korbinian Riedhammer -# Apache 2.0 - -# Computes training alignments and (if needed) speaker-vectors, given an -# SGMM system. If the system is built on top of SAT, you should supply -# transforms with the --transform-dir option. - -# If you supply the --use-graphs option, it will use the training -# graphs from the source directory. - -# Begin configuration section. -stage=0 -nj=4 -cmd=run.pl -use_graphs=false # use graphs from srcdir -use_gselect=false # use gselect info from srcdir [regardless, we use - # Gaussian-selection info, we might have to compute it though.] -gselect=15 # Number of Gaussian-selection indices for SGMMs. -# Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" -beam=10 -retry_beam=40 -transform_dir= # directory to find fMLLR transforms in. -# End configuration options. - -echo "$0 $@" # Print the command line for logging - -[ -f path.sh ] && . ./path.sh # source the path. -. parse_options.sh || exit 1; - -if [ $# != 5 ]; then - echo "usage: steps/tandem/align_sgmm.sh " - echo "e.g.: steps/tandem/align_sgmm.sh --transform-dir exp/tri3b data1/train data1/lang \\" - echo " exp/sgmm4a exp/sgmm5a_ali" - echo "main options (for others, see top of script file)" - echo " --config # config containing options" - echo " --nj # number of parallel jobs" - echo " --use-graphs true # use graphs in src-dir" - echo " --transform-dir # directory to find fMLLR transforms" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - exit 1; -fi - -data1=$1 -data2=$2 -lang=$3 -srcdir=$4 -dir=$5 - -oov=`cat $lang/oov.int` || exit 1; -silphonelist=`cat $lang/phones/silence.csl` || exit 1; - -mkdir -p $dir/log -echo $nj > $dir/num_jobs - -utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -## Set up features. - -sdata1=$data1/split$nj -sdata2=$data2/split$nj -[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1; -[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1; - -cp $srcdir/{tree,final.mdl} $dir || exit 1; -[ -f $srcdir/final.alimdl ] && cp $srcdir/final.alimdl $dir -cp $srcdir/final.occs $dir; - -## Set up features. -splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. -normft2=`cat $srcdir/normft2 2>/dev/null` - -if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi - -case $feat_type in - delta) - echo "$0: feature type is $feat_type" - ;; - lda) - echo "$0: feature type is $feat_type" - cp $srcdir/{lda,final}.mat $dir/ || exit 1; - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac - -# set up feature stream 1; this are usually spectral features, so we will add -# deltas or splice them -feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |" - -if [ "$feat_type" == "delta" ]; then - feats1="$feats1 add-deltas ark:- ark:- |" -elif [ "$feat_type" == "lda" ]; then - feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |" -fi - -# set up feature stream 2; this are usually bottleneck or posterior features, -# which may be normalized if desired -feats2="scp:$sdata2/JOB/feats.scp" - -if [ "$normft2" == "true" ]; then - feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |" -fi - -# assemble tandem features -feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |" - -# add transformation, if applicable -if [ "$feat_type" == "lda" ]; then - feats="$feats transform-feats $dir/final.mat ark:- ark:- |" -fi - -# splicing/normalization options -cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null - -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1; - [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \ - && echo "$0: #jobs mismatch with transform-dir." && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then - echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms," - echo " but you are not providing the --transform-dir option during alignment." -fi -## - -## Set up model and alignment model. -mdl=$srcdir/final.mdl -if [ -f $srcdir/final.alimdl ]; then - alimdl=$srcdir/final.alimdl -else - alimdl=$srcdir/final.mdl -fi -[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1; - -## Work out where we're getting the graphs from. -if $use_graphs; then - [ "$nj" != "`cat $srcdir/num_jobs`" ] && \ - echo "$0: you specified --use-graphs true, but #jobs mismatch." && exit 1; - [ ! -f $srcdir/fsts.1.gz ] && echo "No graphs in $srcdir" && exit 1; - graphdir=$srcdir - ln.pl $srcdir/fsts.*.gz $dir -else - graphdir=$dir - if [ $stage -le 0 ]; then - echo "$0: compiling training graphs" - tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata1/JOB/text|"; - $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ - compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/final.mdl $lang/L.fst "$tra" \ - "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; - fi -fi - -## Work out where we're getting the Gaussian-selection info from -if $use_gselect; then - [ "$nj" != "`cat $srcdir/num_jobs`" ] && \ - echo "$0: you specified --use-gselect true, but #jobs mismatch." && exit 1; - [ ! -f $srcdir/gselect.1.gz ] && echo "No gselect info in $srcdir" && exit 1; - graphdir=$srcdir - gselect_opt="--gselect=ark:gunzip -c $srcdir/gselect.JOB.gz|" - ln.pl $srcdir/gselect.*.gz $dir -else - graphdir=$dir - if [ $stage -le 1 ]; then - echo "$0: computing Gaussian-selection info" - # Note: doesn't matter whether we use $alimdl or $mdl, they will - # have the same gselect info. - $cmd JOB=1:$nj $dir/log/gselect.JOB.log \ - sgmm-gselect --full-gmm-nbest=$gselect $alimdl \ - "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1; - fi - gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|" -fi - - -if [ $alimdl == $mdl ]; then - # Speaker-independent decoding-- just one pass. Not normal. - T=`sgmm-info $mdl | grep 'speaker vector space' | awk '{print $NF}'` || exit 1; - [ "$T" -ne 0 ] && echo "No alignment model, yet speaker vector space nonempty" && exit 1; - - if [ $stage -le 2 ]; then - echo "$0: aligning data in $data using model $mdl (no speaker-vectors)" - $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \ - sgmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam $alimdl \ - "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; - fi - echo "$0: done aligning data." - exit 0; -fi - -# Continue with system with speaker vectors. -if [ $stage -le 2 ]; then - echo "$0: aligning data in $data using model $alimdl" - $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \ - sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam $alimdl \ - "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1; -fi - -if [ $stage -le 3 ]; then - echo "$0: computing speaker vectors (1st pass)" - $cmd JOB=1:$nj $dir/log/spk_vecs1.JOB.log \ - ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \ - weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \ - sgmm-post-to-gpost "$gselect_opt" $alimdl "$feats" ark:- ark:- \| \ - sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata1/JOB/spk2utt \ - $mdl "$feats" ark,s,cs:- ark:$dir/pre_vecs.JOB || exit 1; -fi - -if [ $stage -le 4 ]; then - echo "$0: computing speaker vectors (2nd pass)" - $cmd JOB=1:$nj $dir/log/spk_vecs2.JOB.log \ - ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \ - weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \ - sgmm-est-spkvecs --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" \ - --spk-vecs=ark:$dir/pre_vecs.JOB $mdl "$feats" ark,s,cs:- ark:$dir/vecs.JOB || exit 1; - rm $dir/pre_vecs.* -fi - -if [ $stage -le 5 ]; then - echo "$0: doing final alignment." - $cmd JOB=1:$nj $dir/log/align_pass2.JOB.log \ - sgmm-align-compiled $scale_opts "$gselect_opt" --beam=$beam --retry-beam=$retry_beam \ - --utt2spk=ark:$sdata1/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \ - $mdl "ark:gunzip -c $graphdir/fsts.JOB.gz|" "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; -fi - -rm $dir/pre_ali.*.gz - -echo "$0: done aligning data." - -utils/summarize_warnings.pl $dir/log - -exit 0; diff --git a/egs/wsj/s5/steps/tandem/decode_sgmm.sh b/egs/wsj/s5/steps/tandem/decode_sgmm.sh deleted file mode 100755 index c980bf13f4f..00000000000 --- a/egs/wsj/s5/steps/tandem/decode_sgmm.sh +++ /dev/null @@ -1,303 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. -# Korbinian Riedhammer - -# This script does decoding with an SGMM system, with speaker vectors. -# If the SGMM system was -# built on top of fMLLR transforms from a conventional system, you should -# provide the --transform-dir option. - -# Begin configuration section. -stage=1 -alignment_model= -transform_dir= # dir to find fMLLR transforms. -nj=4 # number of decoding jobs. -acwt=0.1 # Just a default value, used for adaptation and beam-pruning.. -cmd=run.pl -beam=15.0 -gselect=15 # Number of Gaussian-selection indices for SGMMs. [Note: - # the first_pass_gselect variable is used for the 1st pass of - # decoding and can be tighter. -first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in - # the 1st pass of decoding (lattice generation). -max_active=7000 - -#WARNING: This option is renamed lattice_beam (it was renamed to follow the naming -# in the other scripts -lattice_beam=8.0 # Beam we use in lattice generation. -vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for - # speaker-vector computation. Can be quite tight (actually we could - # probably just do best-path. -use_fmllr=false -fmllr_iters=10 -fmllr_min_count=1000 -skip_scoring=false -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# -ne 4 ]; then - echo "Usage: steps/tandem/decode_sgmm.sh [options] " - echo " e.g.: steps/tandem/decode_sgmm.sh --transform-dir exp/tri3b/decode_dev93_tgpr \\" - echo " exp/sgmm3a/graph_tgpr {mfcc,bottleneck}/data/test_dev93 exp/sgmm3a/decode_dev93_tgpr" - echo "main options (for others, see top of script file)" - echo " --transform-dir # directory of previous decoding" - echo " # where we can find transforms for SAT systems." - echo " --alignment-model # Model for the first-pass decoding." - echo " --config # config containing options" - echo " --nj # number of parallel jobs" - echo " --cmd # Command to run in parallel with" - echo " --beam # Decoding beam; default 13.0" - exit 1; -fi - -graphdir=$1 -data1=$2 -data2=$3 -dir=$4 -srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory. - -for f in $graphdir/HCLG.fst $data1/feats.scp $data2/feats.scp $srcdir/final.mdl; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done - -silphonelist=`cat $graphdir/phones/silence.csl` || exit 1 -gselect_opt="--gselect=ark:gunzip -c $dir/gselect.JOB.gz|" -gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |" - -mkdir -p $dir/log -echo $nj > $dir/num_jobs - -sdata1=$data1/split$nj; -sdata2=$data2/split$nj; -[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1; -[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1; - - -## Set up features. - -splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. -normft2=`cat $srcdir/normft2 2>/dev/null` - -if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi - -case $feat_type in - delta) - echo "$0: feature type is $feat_type" - ;; - lda) - echo "$0: feature type is $feat_type" - cp $srcdir/{lda,final}.mat $dir/ - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac - -# set up feature stream 1; this are usually spectral features, so we will add -# deltas or splice them -feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |" - -if [ "$feat_type" == "delta" ]; then - feats1="$feats1 add-deltas ark:- ark:- |" -elif [ "$feat_type" == "lda" ]; then - feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |" -fi - -# set up feature stream 2; this are usually bottleneck or posterior features, -# which may be normalized if desired -feats2="scp:$sdata2/JOB/feats.scp" - -if [ "$normft2" == "true" ]; then - echo "Using cmvn for feats2" - feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |" -fi - -# assemble tandem features -feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |" - -# add transformation, if applicable -if [ "$feat_type" == "lda" ]; then - feats="$feats transform-feats $dir/final.mat ark:- ark:- |" -fi - -# splicing/normalization options -cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null - -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1; - [ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \ - && echo "$0: #jobs mismatch with transform-dir." && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then - echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms," - echo " but you are not providing the --transform-dir option in test time." -fi -## - - -## Calculate FMLLR pre-transforms if needed. We are doing this here since this -## step is requried by models both with and without speaker vectors -if $use_fmllr; then - if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then - echo "$0: computing pre-transform for fMLLR computation." - sgmm-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1; - fi -fi - -## Save Gaussian-selection info to disk. -# Note: we can use final.mdl regardless of whether there is an alignment model-- -# they use the same UBM. -if [ $stage -le 1 ]; then - $cmd JOB=1:$nj $dir/log/gselect.JOB.log \ - sgmm-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \ - "$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1; -fi - -## Work out name of alignment model. ## -if [ -z "$alignment_model" ]; then - if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl; - else alignment_model=$srcdir/final.mdl; fi -fi -[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1; - -# Generate state-level lattice which we can rescore. This is done with the -# alignment model and no speaker-vectors. -if [ $stage -le 2 ]; then - $cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \ - sgmm-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ - --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \ - --word-symbol-table=$graphdir/words.txt "$gselect_opt_1stpass" $alignment_model \ - $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1; -fi - -## Check if the model has speaker vectors -spkdim=`sgmm-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'` - -if [ $spkdim -gt 0 ]; then ### For models with speaker vectors: - -# Estimate speaker vectors (1st pass). Prune before determinizing -# because determinization can take a while on un-pruned lattices. -# Note: the sgmm-post-to-gpost stage is necessary because we have -# a separate alignment-model and final model, otherwise we'd skip it -# and use sgmm-est-spkvecs. - if [ $stage -le 3 ]; then - $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \ - sgmm-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \ - sgmm-est-spkvecs-gpost --spk2utt=ark:$sdata1/JOB/spk2utt \ - $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1; - fi - -# Estimate speaker vectors (2nd pass). Since we already have spk vectors, -# at this point we need to rescore the lattice to get the correct posteriors. - if [ $stage -le 4 ]; then - $cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \ - $srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1; - fi - rm $dir/pre_vecs.* - - if $use_fmllr; then - # Estimate fMLLR transforms (note: these may be on top of any - # fMLLR transforms estimated with the baseline GMM system. - if [ $stage -le 5 ]; then # compute fMLLR transforms. - echo "$0: computing fMLLR transforms." - $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-fmllr --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \ - --fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \ - $srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1; - fi - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |" - fi - -# Now rescore the state-level lattices with the adapted features and the -# corresponding model. Prune and determinize the lattices to limit -# their size. - if [ $stage -le 6 ]; then - $cmd JOB=1:$nj $dir/log/rescore.JOB.log \ - sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata1/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \ - $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \ - "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; - fi - rm $dir/pre_lat.*.gz - -else ### For models without speaker vectors: - - if $use_fmllr; then - # Estimate fMLLR transforms (note: these may be on top of any - # fMLLR transforms estimated with the baseline GMM system. - if [ $stage -le 5 ]; then # compute fMLLR transforms. - echo "$0: computing fMLLR transforms." - $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \ - gunzip -c $dir/pre_lat.JOB.gz \| \ - sgmm-rescore-lattice --utt2spk=ark:$sdata1/JOB/utt2spk \ - "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \ - lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \ - sgmm-est-fmllr --spk2utt=ark:$sdata1/JOB/spk2utt "$gselect_opt" \ - --fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \ - $srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1; - fi - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |" - fi - -# Now rescore the state-level lattices with the adapted features and the -# corresponding model. Prune and determinize the lattices to limit -# their size. - if [ $stage -le 6 ] && $use_fmllr; then - $cmd JOB=1:$nj $dir/log/rescore.JOB.log \ - sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata1/JOB/utt2spk \ - $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \ - lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \ - "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; - rm $dir/pre_lat.*.gz - else # Already done with decoding if no adaptation needed. - for n in `seq 1 $nj`; do - mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz - done - fi - -fi - -# The output of this script is the files "lat.*.gz"-- we'll rescore this at -# different acoustic scales to get the final output. - - -if [ $stage -le 7 ]; then - if ! $skip_scoring ; then - [ ! -x local/score.sh ] && \ - echo "Not scoring because local/score.sh does not exist or not executable." && exit 1; - echo "score best paths" - local/score.sh --cmd "$cmd" $data $graphdir $dir || - { echo "$0: Scoring failed. (ignore by '--skip-scoring true')"; exit 1; } - # echo "score confidence and timing with sclite" - # local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $graphdir $dir - fi -fi -echo "Decoding done." -exit 0; diff --git a/egs/wsj/s5/steps/tandem/make_denlats_sgmm.sh b/egs/wsj/s5/steps/tandem/make_denlats_sgmm.sh deleted file mode 100755 index 6ee4609fb48..00000000000 --- a/egs/wsj/s5/steps/tandem/make_denlats_sgmm.sh +++ /dev/null @@ -1,199 +0,0 @@ -#!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. -# Korbinian Riedhammer - -# Create denominator lattices for MMI/MPE training, with SGMM models. If the -# features have fMLLR transforms you have to supply the --transform-dir option. -# It gets any speaker vectors from the "alignment dir" ($srcdir). Note: this is -# possibly a slight mismatch because the speaker vectors come from supervised -# adaptation. - -# Begin configuration section. -nj=4 -cmd=run.pl -sub_split=1 -beam=13.0 -lattice_beam=7.0 -acwt=0.1 -max_active=5000 -transform_dir= -max_mem=20000000 # This will stop the processes getting too large. -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# != 5 ]; then - echo "Usage: steps/tandem/make_denlats_sgmm.sh [options] " - echo " e.g.: steps/tandem/make_denlats_sgmm.sh {mfcc,bottleneck}/data/train data/lang exp/sgmm4a_ali exp/sgmm4a_denlats" - echo "Works for (delta|lda) features, and (with --transform-dir option) such features" - echo " plus transforms." - echo "" - echo "Main options (for others, see top of script file)" - echo " --config # config containing options" - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --sub-split # e.g. 40; use this for " - echo " # large databases so your jobs will be smaller and" - echo " # will (individually) finish reasonably soon." - echo " --transform-dir # directory to find fMLLR transforms." - exit 1; -fi - -data1=$1 -data2=$2 -lang=$3 -srcdir=$4 # could also be $srcdir, but only if no vectors supplied. -dir=$5 - -splice_opts=`cat $srcdir/splice_opts 2>/dev/null` -normft2=`cat $srcdir/normft2 2>/dev/null` -mkdir -p $dir/log - -utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; - -sdata1=$data1/split$nj -sdata2=$data2/split$nj -[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1; -[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1; - -echo $nj > $dir/num_jobs - -oov=`cat $lang/oov.int` || exit 1; - -mkdir -p $dir - -cp -r $lang $dir/ - -# Compute grammar FST which corresponds to unigram decoding graph. - -cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \ - awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \ - utils/make_unigram_grammar.pl | fstcompile > $dir/lang/G.fst \ - || exit 1; - -# mkgraph.sh expects a whole directory "lang", so put everything in one directory... -# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and -# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph. - -if [ -s $dir/dengraph/HCLG.fst ]; then - echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation." -else - utils/mkgraph.sh $dir/lang $srcdir $dir/dengraph || exit 1; -fi - -# Set up features -if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi - -case $feat_type in - delta) - echo "$0: feature type is $feat_type" - ;; - lda) - echo "$0: feature type is $feat_type" - cp $srcdir/{lda,final}.mat $dir/ || exit 1; - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac - -# set up feature stream 1; this are usually spectral features, so we will add -# deltas or splice them -feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |" - -if [ "$feat_type" == "delta" ]; then - feats1="$feats1 add-deltas ark:- ark:- |" -elif [ "$feat_type" == "lda" ]; then - feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |" -fi - -# set up feature stream 2; this are usually bottleneck or posterior features, -# which may be normalized if desired -feats2="scp:$sdata2/JOB/feats.scp" - -if [ "$normft2" == "true" ]; then - feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |" -fi - -# assemble tandem features -feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |" - -# add transformation, if applicable -if [ "$feat_type" == "lda" ]; then - feats="$feats transform-feats $dir/final.mat ark:- ark:- |" -fi - -# splicing/normalization options -cp $srcdir/{splice_opts,normft2,tandem} $dir 2>/dev/null - - -if [ ! -z "$transform_dir" ]; then # add transforms to features... - echo "$0: using fMLLR transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "Expected $transform_dir/trans.1 to exist." - [ "`cat $transform_dir/num_jobs`" -ne "$nj" ] \ - && echo "$0: mismatch in number of jobs with $transform_dir" && exit 1; - [ -f $srcdir/final.mat ] && ! cmp $transform_dir/final.mat $srcdir/final.mat && \ - echo "$0: LDA transforms differ between $srcdir and $transform_dir" - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |" -else - echo "Assuming you don't have a SAT system, since no --transform-dir option supplied " -fi - -if [ -f $srcdir/gselect.1.gz ]; then - gselect_opt="--gselect=ark:gunzip -c $srcdir/gselect.JOB.gz|" -else - echo "$0: no such file $srcdir/gselect.1.gz" && exit 1; -fi - -if [ -f $srcdir/vecs.1 ]; then - spkvecs_opt="--spk-vecs=ark:$srcdir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk" -else - if [ -f $srcdir/final.alimdl ]; then - echo "You seem to have an SGMM system with speaker vectors," - echo "yet we can't find speaker vectors. Perhaps you supplied" - echo "the model director instead of the alignment directory?" - exit 1; - fi -fi - -if [ $sub_split -eq 1 ]; then - $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \ - sgmm-latgen-faster $spkvecs_opt "$gselect_opt" --beam=$beam \ - --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ - --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ - $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; -else - for n in `seq $nj`; do - if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $srcdir/final.mdl ]; then - echo "Not processing subset $n as already done (delete $dir/.done.$n if not)"; - else - ssdata1=$data1/split$nj/$n/split${sub_split}utt; - split_data.sh --per-utt $sdata1/$n $sub_split || exit 1; - ssdata2=$data2/split$nj/$n/split${sub_split}utt; - split_data.sh --per-utt $sdata2/$n $sub_split || exit 1; - mkdir -p $dir/log/$n - mkdir -p $dir/part - feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split${sub_split}utt/JOB/:g` - spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"` - gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"` - $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ - sgmm-latgen-faster $spkvecs_opt_subset "$gselect_opt_subset" \ - --beam=$beam --lattice-beam=$lattice_beam \ - --acoustic-scale=$acwt --max-mem=$max_mem --max-active=$max_active \ - --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ - $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1; - echo Merging archives for data subset $n - rm $dir/.error 2>/dev/null; - for k in `seq $sub_split`; do - gunzip -c $dir/lat.$n.$k.gz || touch $dir/.error; - done | gzip -c > $dir/lat.$n.gz || touch $dir/.error; - [ -f $dir/.error ] && echo Merging lattices for subset $n failed && exit 1; - rm $dir/lat.$n.*.gz - touch $dir/.done.$n - fi - done -fi - - -echo "$0: done generating denominator lattices with SGMMs." diff --git a/egs/wsj/s5/steps/tandem/train_mmi_sgmm.sh b/egs/wsj/s5/steps/tandem/train_mmi_sgmm.sh deleted file mode 100755 index 3077fbceef3..00000000000 --- a/egs/wsj/s5/steps/tandem/train_mmi_sgmm.sh +++ /dev/null @@ -1,193 +0,0 @@ -#!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. -# Korbinian Riedhammer - -# MMI training (or optionally boosted MMI, if you give the --boost option), -# for SGMMs. 4 iterations (by default) of Extended Baum-Welch update. -# -# Begin configuration section. -cmd=run.pl -num_iters=4 -boost=0.0 -cancel=true # if true, cancel num and den counts on each frame. -acwt=0.1 -stage=0 - -update_opts= -transform_dir= -# End configuration section - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# -ne 6 ]; then - echo "Usage: steps/tandem/train_mmi_sgmm.sh " - echo " e.g.: steps/tandem/train_mmi_sgmm.sh {mfcc,bottleneck}/data1/train_si84 data1/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi" - echo "Main options (for others, see top of script file)" - echo " --boost # (e.g. 0.1), for boosted MMI. (default 0)" - echo " --cancel (true|false) # cancel stats (true by default)" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --config # config containing options" - echo " --stage # stage to do partial re-run from." - echo " --transform-dir # directory to find fMLLR transforms." - exit 1; -fi - -data1=$1 -data2=$2 -lang=$3 -alidir=$4 -denlatdir=$5 -dir=$6 -mkdir -p $dir/log - -utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -for f in $data1/feats.scp $data2/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done -nj=`cat $alidir/num_jobs` || exit 1; -[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \ - echo "$alidir and $denlatdir have different num-jobs" && exit 1; - -mkdir -p $dir/log -echo $nj > $dir/num_jobs - -cp $alidir/{final.mdl,tree} $dir -silphonelist=`cat $lang/phones/silence.csl` || exit 1; - -# Set up features - -sdata1=$data1/split$nj -sdata2=$data2/split$nj -[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1; -[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1; - -splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options. -normft2=`cat $alidir/normft2 2>/dev/null` - -if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi - -case $feat_type in - delta) - echo "$0: feature type is $feat_type" - ;; - lda) - echo "$0: feature type is $feat_type" - cp $alidir/{lda,final}.mat $dir/ || exit 1; - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac - -# set up feature stream 1; this are usually spectral features, so we will add -# deltas or splice them -feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |" - -if [ "$feat_type" == "delta" ]; then - feats1="$feats1 add-deltas ark:- ark:- |" -elif [ "$feat_type" == "lda" ]; then - feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |" -fi - -# set up feature stream 2; this are usually bottleneck or posterior features, -# which may be normalized if desired -feats2="scp:$sdata2/JOB/feats.scp" - -if [ "$normft2" == "true" ]; then - feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |" -fi - -# assemble tandem features -feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |" - -# add transformation, if applicable -if [ "$feat_type" == "lda" ]; then - feats="$feats transform-feats $dir/final.mat ark:- ark:- |" -fi - -# splicing/normalization options -cp $alidir/{splice_opts,normft2,tandem} $dir 2>/dev/null - - -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" \ - && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -else - echo "$0: no fMLLR transforms." -fi - -if [ -f $alidir/vecs.1 ]; then - echo "$0: using speaker vectors from $alidir" - spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata1/JOB/utt2spk" -else - echo "$0: no speaker vectors." - spkvecs_opt= -fi - -if [ -f $alidir/gselect.1.gz ]; then - echo "$0: using Gaussian-selection info from $alidir" - gselect_opt="--gselect=ark:gunzip -c $alidir/gselect.JOB.gz|" -else - echo "$0: error: no Gaussian-selection info found" && exit 1; -fi - -lats="ark:gunzip -c $denlatdir/lat.JOB.gz|" -if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then - lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |" -fi - - -cur_mdl=$alidir/final.mdl -x=0 -while [ $x -lt $num_iters ]; do - echo "Iteration $x of MMI training" - # Note: the num and den states are accumulated at the same time, so we - # can cancel them per frame. - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ - sgmm-rescore-lattice "$gselect_opt" $spkvecs_opt $cur_mdl "$lats" "$feats" ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - sum-post --merge=$cancel --scale1=-1 \ - ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \ - sgmm-acc-stats2 "$gselect_opt" $spkvecs_opt $cur_mdl "$feats" ark,s,cs:- \ - $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1; - - n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`; - [ "$n" -ne $[$nj*2] ] && \ - echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1; - $cmd $dir/log/den_acc_sum.$x.log \ - sgmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1; - rm $dir/den_acc.$x.*.acc - $cmd $dir/log/num_acc_sum.$x.log \ - sgmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1; - rm $dir/num_acc.$x.*.acc - - $cmd $dir/log/update.$x.log \ - sgmm-est-ebw $update_opts $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1; - fi - cur_mdl=$dir/$[$x+1].mdl - - - # Some diagnostics: the objective function progress and auxiliary-function - # improvement. Note: this code is same as in train_mmi.sh - tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while() { if(m/gmm-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames. Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1); $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf - objf=`cat $dir/tmpf | awk '{print $1}'`; - nf=`cat $dir/tmpf | awk '{print $2}'`; - rm $dir/tmpf - impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'` - impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames. - echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log - x=$[$x+1] -done - -echo "MMI training finished" - -rm $dir/final.mdl 2>/dev/null -ln -s $x.mdl $dir/final.mdl - -exit 0; diff --git a/egs/wsj/s5/steps/tandem/train_sgmm.sh b/egs/wsj/s5/steps/tandem/train_sgmm.sh deleted file mode 100755 index 48f392141a1..00000000000 --- a/egs/wsj/s5/steps/tandem/train_sgmm.sh +++ /dev/null @@ -1,315 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. -# Korbinian Riedhammer - -# SGMM training, with speaker vectors. This script would normally be called on -# top of fMLLR features obtained from a conventional system, but it also works -# on top of any type of speaker-independent features (based on -# deltas+delta-deltas or LDA+MLLT). For more info on SGMMs, see the paper "The -# subspace Gaussian mixture model--A structured model for speech recognition". -# (Computer Speech and Language, 2011). - -# Begin configuration section. -nj=4 -cmd=run.pl -stage=-6 -context_opts= # e.g. set it to "--context-width=5 --central-position=2" for a -# quinphone system. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" -num_iters=25 # Total number of iterations -num_iters_alimdl=3 # Number of iterations for estimating alignment model. -max_iter_inc=15 # Last iter to increase #substates on. -realign_iters="5 10 15"; # Iters to realign on. -spkvec_iters="5 8 12 17" # Iters to estimate speaker vectors on. -increase_dim_iters="6 8"; # Iters on which to increase phn dim and/or spk dim; - # rarely necessary, and if it is, only the 1st will normally be necessary. -rand_prune=0.1 # Randomized-pruning parameter for posteriors, to speed up training. -phn_dim= # You can use this to set the phonetic subspace dim. [default: feat-dim+1] -spk_dim= # You can use this to set the speaker subspace dim. [default: feat-dim] -power=0.2 # Exponent for number of gaussians according to occurrence counts -beam=8 -retry_beam=40 -cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves -normft2=true -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -if [ -f path.sh ]; then . ./path.sh; fi -. parse_options.sh || exit 1; - - -if [ $# != 8 ]; then - echo "Usage: steps/tandem/train_sgmm.sh " - echo " e.g.: steps/tandem/train_sgmm.sh 3500 10000 {mfcc,bottleneck},data/train_si84 data/lang \\" - echo " exp/tri3b_ali_si84 exp/ubm4a/final.ubm exp/sgmm4a" - echo "main options (for others, see top of script file)" - echo " --config # config containing options" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --silence-weight # weight for silence (e.g. 0.5 or 0.0)" - echo " --num-iters <#iters> # Number of iterations of E-M" - exit 1; -fi - - -num_leaves=$1 -totsubstates=$2 -data1=$3 -data2=$4 -lang=$5 -alidir=$6 -ubm=$7 -dir=$8 - -# Check some files. -for f in $data1/feats.scp $data2/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $ubm; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done - - -# Set some variables. -oov=`cat $lang/oov.int` -silphonelist=`cat $lang/phones/silence.csl` -numsubstates=$num_leaves # Initial #-substates. -incsubstates=$[($totsubstates-$numsubstates)/$max_iter_inc] # per-iter increment for #substates -feat_dim=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/feature dimension/{print $NF}'` || exit 1; -[ $feat_dim -eq $feat_dim ] || exit 1; # make sure it's numeric. -[ -z $phn_dim ] && phn_dim=$[$feat_dim+1] -[ -z $spk_dim ] && spk_dim=$feat_dim -nj=`cat $alidir/num_jobs` || exit 1; - -mkdir -p $dir/log -echo $nj > $dir/num_jobs - -utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -sdata1=$data1/split$nj; -sdata2=$data2/split$nj; -[[ -d $sdata1 && $data1/feats.scp -ot $sdata1 ]] || split_data.sh $data1 $nj || exit 1; -[[ -d $sdata2 && $data2/feats.scp -ot $sdata2 ]] || split_data.sh $data2 $nj || exit 1; - -spkvecs_opt= # Empty option for now, until we estimate the speaker vectors. -gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" - -## Set up features. -splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options. -normft2=`cat $alidir/normft2 2>/dev/null` - -if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi - -case $feat_type in - delta) - echo "$0: feature type is $feat_type" - ;; - lda) - echo "$0: feature type is $feat_type" - cp $alidir/{lda,final}.mat $dir/ || exit 1; - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac - -# set up feature stream 1; this are usually spectral features, so we will add -# deltas or splice them -feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- |" - -if [ "$feat_type" == "delta" ]; then - feats1="$feats1 add-deltas ark:- ark:- |" -elif [ "$feat_type" == "lda" ]; then - feats1="$feats1 splice-feats $splice_opts ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |" -fi - -# set up feature stream 2; this are usually bottleneck or posterior features, -# which may be normalized if desired -feats2="scp:$sdata2/JOB/feats.scp" - -if [ "$normft2" == "true" ]; then - feats2="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata2/JOB/utt2spk scp:$sdata2/JOB/cmvn.scp $feats2 ark:- |" -fi - -# assemble tandem features -feats="ark,s,cs:paste-feats '$feats1' '$feats2' ark:- |" - -# add transformation, if applicable -if [ "$feat_type" == "lda" ]; then - feats="$feats transform-feats $dir/final.mat ark:- ark:- |" -fi - -# splicing/normalization options -cp $alidir/{splice_opts,normft2,tandem} $dir 2>/dev/null - -if [ -f $alidir/trans.1 ]; then - echo "$0: using transforms from $alidir" - feats="$feats transform-feats --utt2spk=ark:$sdata1/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |" -fi -## - - -if [ $stage -le -6 ]; then - echo "$0: accumulating tree stats" - $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \ - acc-tree-stats --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \ - "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1; - [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1; - sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1; - rm $dir/*.treeacc -fi - -if [ $stage -le -5 ]; then - echo "$0: Getting questions for tree clustering." - # preparing questions, roots file... - cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1; - cat $lang/phones/extra_questions.int >> $dir/questions.int - compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1; - - echo "$0: Building the tree" - $cmd $dir/log/build_tree.log \ - build-tree --verbose=1 --max-leaves=$num_leaves \ - --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \ - $dir/questions.qst $lang/topo $dir/tree || exit 1; -fi - -if [ $stage -le -4 ]; then - echo "$0: Initializing the model" - # Note: if phn_dim > feat_dim+1 or spk_dim > feat_dim, these dims - # will be truncated on initialization. - $cmd $dir/log/init_sgmm.log \ - sgmm-init --phn-space-dim=$phn_dim --spk-space-dim=$spk_dim $lang/topo \ - $dir/tree $ubm $dir/0.mdl || exit 1; -fi - -if [ $stage -le -3 ]; then - echo "$0: doing Gaussian selection" - $cmd JOB=1:$nj $dir/log/gselect.JOB.log \ - sgmm-gselect $dir/0.mdl "$feats" \ - "ark,t:|gzip -c >$dir/gselect.JOB.gz" || exit 1; -fi - -if [ $stage -le -2 ]; then - echo "$0: compiling training graphs" - text="ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata1/JOB/text|" - $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ - compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/0.mdl $lang/L.fst \ - "$text" "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; -fi - -if [ $stage -le -1 ]; then - echo "$0: Converting alignments" - $cmd JOB=1:$nj $dir/log/convert_ali.JOB.log \ - convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree "ark:gunzip -c $alidir/ali.JOB.gz|" \ - "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; -fi - -x=0 -while [ $x -lt $num_iters ]; do - echo "$0: training pass $x ... " - if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then - echo "$0: re-aligning data" - $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \ - sgmm-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \ - --utt2spk=ark:$sdata1/JOB/utt2spk --beam=$beam --retry-beam=$retry_beam \ - $dir/$x.mdl "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \ - "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; - fi - if [ $spk_dim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/spkvecs.$x.JOB.log \ - ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ - weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --rand-prune=$rand_prune --spk2utt=ark:$sdata1/JOB/spk2utt \ - $spkvecs_opt "$gselect_opt" $dir/$x.mdl "$feats" ark,s,cs:- \ - ark:$dir/tmp_vecs.JOB '&&' mv $dir/tmp_vecs.JOB $dir/vecs.JOB || exit 1; - fi - spkvecs_opt[$n]="--spk-vecs=ark:$dir/vecs.JOB" - fi - if [ $x -eq 0 ]; then - flags=vwcSt # on the first iteration, don't update projections M or N - elif [ $spk_dim -gt 0 -a $[$x%2] -eq 1 -a $x -ge `echo $spkvec_iters | awk '{print $1}'` ]; then - # Update N if we have speaker-vector space and x is odd, - # and we've already updated the speaker vectors... - flags=vNwcSt - else - # otherwise update M. - flags=vMwcSt - fi - - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ - sgmm-acc-stats $spkvecs_opt --utt2spk=ark:$sdata1/JOB/utt2spk \ - --update-flags=$flags "$gselect_opt" --rand-prune=$rand_prune \ - $dir/$x.mdl "$feats" "ark,s,cs:gunzip -c $dir/ali.JOB.gz | ali-to-post ark:- ark:-|" \ - $dir/$x.JOB.acc || exit 1; - fi - - # The next option is needed if the user specifies a phone or speaker sub-space - # dimension that's higher than the "normal" one. - increase_dim_opts= - if echo $increase_dim_iters | grep -w $x >/dev/null; then - increase_dim_opts="--increase-phn-dim=$phn_dim --increase-spk-dim=$spk_dim" - # Note: the command below might have a null effect on some iterations. - if [ $spk_dim -gt $feat_dim ]; then - cmd JOB=1:$nj $dir/log/copy_vecs.$x.JOB.log \ - copy-vector --print-args=false --change-dim=$spk_dim \ - ark:$dir/vecs.JOB ark:$dir/vecs_tmp.$JOB '&&' \ - mv $dir/vecs_tmp.JOB $dir/vecs.JOB || exit 1; - fi - fi - - if [ $stage -le $x ]; then - $cmd $dir/log/update.$x.log \ - sgmm-est --update-flags=$flags --split-substates=$numsubstates $increase_dim_opts \ - --power=$power --write-occs=$dir/$[$x+1].occs $dir/$x.mdl "sgmm-sum-accs - $dir/$x.*.acc|" \ - $dir/$[$x+1].mdl || exit 1; - rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null - fi - - if [ $x -lt $max_iter_inc ]; then - numsubstates=$[$numsubstates+$incsubstates] - fi - x=$[$x+1]; -done - -rm $dir/final.mdl $dir/final.occs 2>/dev/null -ln -s $x.mdl $dir/final.mdl -ln -s $x.occs $dir/final.occs - -if [ $spk_dim -gt 0 ]; then - # We need to create an "alignment model" that's been trained - # without the speaker vectors, to do the first-pass decoding with. - # in test time. - - # We do this for a few iters, in this recipe. - final_mdl=$dir/$x.mdl - cur_alimdl=$dir/$x.mdl - while [ $x -lt $[$num_iters+$num_iters_alimdl] ]; do - echo "$0: building alignment model (pass $x)" - if [ $x -eq $num_iters ]; then # 1st pass of building alimdl. - flags=MwcS # don't update v the first time. Note-- we never update transitions. - # they wouldn't change anyway as we use the same alignment as previously. - else - flags=vMwcS - fi - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc_ali.$x.JOB.log \ - ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ - sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \ - --utt2spk=ark:$sdata1/JOB/utt2spk $final_mdl "$feats" ark,s,cs:- ark:- \| \ - sgmm-acc-stats-gpost --rand-prune=$rand_prune --update-flags=$flags \ - $cur_alimdl "$feats" ark,s,cs:- $dir/$x.JOB.aliacc || exit 1; - $cmd $dir/log/update_ali.$x.log \ - sgmm-est --update-flags=$flags --remove-speaker-space=true --power=$power $cur_alimdl \ - "sgmm-sum-accs - $dir/$x.*.aliacc|" $dir/$[$x+1].alimdl || exit 1; - rm $dir/$x.*.aliacc || exit 1; - [ $x -gt $num_iters ] && rm $dir/$x.alimdl - fi - cur_alimdl=$dir/$[$x+1].alimdl - x=$[$x+1] - done - rm $dir/final.alimdl 2>/dev/null - ln -s $x.alimdl $dir/final.alimdl -fi - -utils/summarize_warnings.pl $dir/log - -echo Done diff --git a/egs/wsj/s5/steps/train_mmi_sgmm.sh b/egs/wsj/s5/steps/train_mmi_sgmm.sh deleted file mode 100755 index cb0700e92fc..00000000000 --- a/egs/wsj/s5/steps/train_mmi_sgmm.sh +++ /dev/null @@ -1,156 +0,0 @@ -#!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. - -# MMI training (or optionally boosted MMI, if you give the --boost option), -# for SGMMs. 4 iterations (by default) of Extended Baum-Welch update. -# -# Begin configuration section. -cmd=run.pl -num_iters=4 -boost=0.0 -cancel=true # if true, cancel num and den counts on each frame. -acwt=0.1 -stage=0 - -update_opts= -transform_dir= -# End configuration section - -echo "$0 $@" # Print the command line for logging - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - -if [ $# -ne 5 ]; then - echo "Usage: steps/train_mmi_sgmm.sh " - echo " e.g.: steps/train_mmi_sgmm.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b_mmi" - echo "Main options (for others, see top of script file)" - echo " --boost # (e.g. 0.1), for boosted MMI. (default 0)" - echo " --cancel (true|false) # cancel stats (true by default)" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --config # config containing options" - echo " --stage # stage to do partial re-run from." - echo " --transform-dir # directory to find fMLLR transforms." - exit 1; -fi - -data=$1 -lang=$2 -alidir=$3 -denlatdir=$4 -dir=$5 -mkdir -p $dir/log - -utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.1.gz; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done -nj=`cat $alidir/num_jobs` || exit 1; -[ "$nj" -ne "`cat $denlatdir/num_jobs`" ] && \ - echo "$alidir and $denlatdir have different num-jobs" && exit 1; - -sdata=$data/split$nj -splice_opts=`cat $alidir/splice_opts 2>/dev/null` -cmvn_opts=`cat $alidir/cmvn_opts 2>/dev/null` -mkdir -p $dir/log -[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; -cp $alidir/splice_opts $dir 2>/dev/null -cp $alidir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option. -echo $nj > $dir/num_jobs - -cp $alidir/tree $dir -cp $alidir/final.mdl $dir/0.mdl -cp $alidir/final.alimdl $dir - -silphonelist=`cat $lang/phones/silence.csl` || exit 1; - -# Set up features - -if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi -echo "$0: feature type is $feat_type" - -case $feat_type in - delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; - lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |" - cp $alidir/final.mat $dir - ;; - *) echo "Invalid feature type $feat_type" && exit 1; -esac - -if [ ! -z "$transform_dir" ]; then - echo "$0: using transforms from $transform_dir" - [ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" \ - && exit 1; - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |" -else - echo "$0: no fMLLR transforms." -fi - -if [ -f $alidir/vecs.1 ]; then - echo "$0: using speaker vectors from $alidir" - spkvecs_opt="--spk-vecs=ark:$alidir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk" -else - echo "$0: no speaker vectors." - spkvecs_opt= -fi - -if [ -f $alidir/gselect.1.gz ]; then - echo "$0: using Gaussian-selection info from $alidir" - gselect_opt="--gselect=ark,s,cs:gunzip -c $alidir/gselect.JOB.gz|" -else - echo "$0: error: no Gaussian-selection info found" && exit 1; -fi - -lats="ark:gunzip -c $denlatdir/lat.JOB.gz|" -if [[ "$boost" != "0.0" && "$boost" != 0 ]]; then - lats="$lats lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/ali.JOB.gz|' ark:- |" -fi - -x=0 -while [ $x -lt $num_iters ]; do - echo "Iteration $x of MMI training" - # Note: the num and den states are accumulated at the same time, so we - # can cancel them per frame. - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ - sgmm-rescore-lattice --speedup=true "$gselect_opt" $spkvecs_opt $dir/$x.mdl "$lats" "$feats" ark:- \| \ - lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ - sum-post --merge=$cancel --scale1=-1 \ - ark:- "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-post ark:- ark:- |" ark:- \| \ - sgmm-acc-stats2 "$gselect_opt" $spkvecs_opt $dir/$x.mdl "$feats" ark,s,cs:- \ - $dir/num_acc.$x.JOB.acc $dir/den_acc.$x.JOB.acc || exit 1; - - n=`echo $dir/{num,den}_acc.$x.*.acc | wc -w`; - [ "$n" -ne $[$nj*2] ] && \ - echo "Wrong number of MMI accumulators $n versus 2*$nj" && exit 1; - $cmd $dir/log/den_acc_sum.$x.log \ - sgmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1; - rm $dir/den_acc.$x.*.acc - $cmd $dir/log/num_acc_sum.$x.log \ - sgmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1; - rm $dir/num_acc.$x.*.acc - - $cmd $dir/log/update.$x.log \ - sgmm-est-ebw $update_opts $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1; - fi - - # Some diagnostics: the objective function progress and auxiliary-function - # improvement. Note: this code is same as in train_mmi.sh - tail -n 50 $dir/log/acc.$x.*.log | perl -e '$acwt=shift @ARGV; while() { if(m/gmm-acc-stats2.+Overall weighted acoustic likelihood per frame was (\S+) over (\S+) frames/) { $tot_aclike += $1*$2; $tot_frames1 += $2; } if(m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames. Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames2 += $2; $tot_den_aclike += $3*$2; } } if (abs($tot_frames1 - $tot_frames2) > 0.01*($tot_frames1 + $tot_frames2)) { print STDERR "Frame-counts disagree $tot_frames1 versus $tot_frames2\n"; } $tot_den_lat_like /= $tot_frames2; $tot_den_aclike /= $tot_frames2; $tot_aclike *= ($acwt / $tot_frames1); $num_like = $tot_aclike + $tot_den_aclike; $per_frame_objf = $num_like - $tot_den_lat_like; print "$per_frame_objf $tot_frames1\n"; ' $acwt > $dir/tmpf - objf=`cat $dir/tmpf | awk '{print $1}'`; - nf=`cat $dir/tmpf | awk '{print $2}'`; - rm $dir/tmpf - impr=`grep -w Overall $dir/log/update.$x.log | awk '{x += $10*$12;} END{print x;}'` - impr=`perl -e "print ($impr*$acwt/$nf);"` # We multiply by acwt, and divide by $nf which is the "real" number of frames. - echo "Iteration $x: objf was $objf, MMI auxf change was $impr" | tee $dir/objf.$x.log - x=$[$x+1] -done - -echo "MMI training finished" - -rm $dir/final.mdl 2>/dev/null -ln -s $x.mdl $dir/final.mdl - -exit 0; diff --git a/egs/wsj/s5/steps/train_sgmm.sh b/egs/wsj/s5/steps/train_sgmm.sh deleted file mode 100755 index 0d372be2d84..00000000000 --- a/egs/wsj/s5/steps/train_sgmm.sh +++ /dev/null @@ -1,280 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. - -# SGMM training, with speaker vectors. This script would normally be called on -# top of fMLLR features obtained from a conventional system, but it also works -# on top of any type of speaker-independent features (based on -# deltas+delta-deltas or LDA+MLLT). For more info on SGMMs, see the paper "The -# subspace Gaussian mixture model--A structured model for speech recognition". -# (Computer Speech and Language, 2011). - -# Begin configuration section. -nj=4 -cmd=run.pl -stage=-6 -context_opts= # e.g. set it to "--context-width=5 --central-position=2" for a -# quinphone system. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" -num_iters=25 # Total number of iterations -num_iters_alimdl=3 # Number of iterations for estimating alignment model. -max_iter_inc=15 # Last iter to increase #substates on. -realign_iters="5 10 15"; # Iters to realign on. -spkvec_iters="5 8 12 17" # Iters to estimate speaker vectors on. -increase_dim_iters="6 8"; # Iters on which to increase phn dim and/or spk dim; - # rarely necessary, and if it is, only the 1st will normally be necessary. -rand_prune=0.1 # Randomized-pruning parameter for posteriors, to speed up training. -phn_dim= # You can use this to set the phonetic subspace dim. [default: feat-dim+1] -spk_dim= # You can use this to set the speaker subspace dim. [default: feat-dim] -power=0.25 # Exponent for number of gaussians according to occurrence counts -beam=8 -retry_beam=40 -cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -if [ -f path.sh ]; then . ./path.sh; fi -. parse_options.sh || exit 1; - - -if [ $# != 7 ]; then - echo "Usage: steps/train_sgmm.sh " - echo " e.g.: steps/train_sgmm.sh 3500 10000 data/train_si84 data/lang \\" - echo " exp/tri3b_ali_si84 exp/ubm4a/final.ubm exp/sgmm4a" - echo "main options (for others, see top of script file)" - echo " --config # config containing options" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --silence-weight # weight for silence (e.g. 0.5 or 0.0)" - echo " --num-iters <#iters> # Number of iterations of E-M" - exit 1; -fi - - -num_leaves=$1 -totsubstates=$2 -data=$3 -lang=$4 -alidir=$5 -ubm=$6 -dir=$7 - -# Check some files. -for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $ubm; do - [ ! -f $f ] && echo "$0: no such file $f" && exit 1; -done - - -# Set some variables. -oov=`cat $lang/oov.int` -silphonelist=`cat $lang/phones/silence.csl` -numsubstates=$num_leaves # Initial #-substates. -incsubstates=$[($totsubstates-$numsubstates)/$max_iter_inc] # per-iter increment for #substates -feat_dim=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/feature dimension/{print $NF}'` || exit 1; -[ $feat_dim -eq $feat_dim ] || exit 1; # make sure it's numeric. -[ -z $phn_dim ] && phn_dim=$[$feat_dim+1] -[ -z $spk_dim ] && spk_dim=$feat_dim -nj=`cat $alidir/num_jobs` || exit 1; -ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1; - -mkdir -p $dir/log -echo $nj > $dir/num_jobs -sdata=$data/split$nj; -splice_opts=`cat $alidir/splice_opts 2>/dev/null` -cmvn_opts=`cat $alidir/cmvn_opts 2>/dev/null` -cp $alidir/splice_opts $dir 2>/dev/null -cp $alidir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option. - -utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; - -spkvecs_opt= # Empty option for now, until we estimate the speaker vectors. -gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|" - -## Set up features. -if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi -echo "$0: feature type is $feat_type" - -case $feat_type in - delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; - lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |" - cp $alidir/final.mat $dir - ;; - *) echo "$0: invalid feature type $feat_type" && exit 1; -esac -if [ -f $alidir/trans.1 ]; then - echo "$0: using transforms from $alidir" - feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |" -fi -## - - -if [ $stage -le -6 ]; then - echo "$0: accumulating tree stats" - $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \ - acc-tree-stats --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \ - "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1; - [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1; - sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1; - rm $dir/*.treeacc -fi - -if [ $stage -le -5 ]; then - echo "$0: Getting questions for tree clustering." - # preparing questions, roots file... - cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1; - cat $lang/phones/extra_questions.int >> $dir/questions.int - compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1; - - echo "$0: Building the tree" - $cmd $dir/log/build_tree.log \ - build-tree --verbose=1 --max-leaves=$num_leaves \ - --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \ - $dir/questions.qst $lang/topo $dir/tree || exit 1; -fi - -if [ $stage -le -4 ]; then - echo "$0: Initializing the model" - # Note: if phn_dim > feat_dim+1 or spk_dim > feat_dim, these dims - # will be truncated on initialization. - $cmd $dir/log/init_sgmm.log \ - sgmm-init --phn-space-dim=$phn_dim --spk-space-dim=$spk_dim $lang/topo \ - $dir/tree $ubm $dir/0.mdl || exit 1; -fi - -if [ $stage -le -3 ]; then - echo "$0: doing Gaussian selection" - $cmd JOB=1:$nj $dir/log/gselect.JOB.log \ - sgmm-gselect $dir/0.mdl "$feats" \ - "ark,t:|gzip -c >$dir/gselect.JOB.gz" || exit 1; -fi - -if [ $stage -le -2 ]; then - echo "$0: compiling training graphs" - text="ark:sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text|" - $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ - compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/0.mdl $lang/L.fst \ - "$text" "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; -fi - -if [ $stage -le -1 ]; then - echo "$0: Converting alignments" - $cmd JOB=1:$nj $dir/log/convert_ali.JOB.log \ - convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree "ark:gunzip -c $alidir/ali.JOB.gz|" \ - "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; -fi - -x=0 -while [ $x -lt $num_iters ]; do - echo "$0: training pass $x ... " - if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then - echo "$0: re-aligning data" - $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \ - sgmm-align-compiled $spkvecs_opt $scale_opts "$gselect_opt" \ - --utt2spk=ark:$sdata/JOB/utt2spk --beam=$beam --retry-beam=$retry_beam \ - $dir/$x.mdl "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \ - "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; - fi - if [ $spk_dim -gt 0 ] && echo $spkvec_iters | grep -w $x >/dev/null; then - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/spkvecs.$x.JOB.log \ - ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ - weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- \| \ - sgmm-est-spkvecs --rand-prune=$rand_prune --spk2utt=ark:$sdata/JOB/spk2utt \ - $spkvecs_opt "$gselect_opt" $dir/$x.mdl "$feats" ark,s,cs:- \ - ark:$dir/tmp_vecs.JOB '&&' mv $dir/tmp_vecs.JOB $dir/vecs.JOB || exit 1; - fi - spkvecs_opt[$n]="--spk-vecs=ark:$dir/vecs.JOB" - fi - if [ $x -eq 0 ]; then - flags=vwcSt # on the first iteration, don't update projections M or N - elif [ $spk_dim -gt 0 -a $[$x%2] -eq 1 -a $x -ge `echo $spkvec_iters | awk '{print $1}'` ]; then - # Update N if we have speaker-vector space and x is odd, - # and we've already updated the speaker vectors... - flags=vNwcSt - else - # otherwise update M. - flags=vMwcSt - fi - - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ - sgmm-acc-stats $spkvecs_opt --utt2spk=ark:$sdata/JOB/utt2spk \ - --update-flags=$flags "$gselect_opt" --rand-prune=$rand_prune \ - $dir/$x.mdl "$feats" "ark,s,cs:gunzip -c $dir/ali.JOB.gz | ali-to-post ark:- ark:-|" \ - $dir/$x.JOB.acc || exit 1; - fi - - # The next option is needed if the user specifies a phone or speaker sub-space - # dimension that's higher than the "normal" one. - increase_dim_opts= - if echo $increase_dim_iters | grep -w $x >/dev/null; then - increase_dim_opts="--increase-phn-dim=$phn_dim --increase-spk-dim=$spk_dim" - # Note: the command below might have a null effect on some iterations. - if [ $spk_dim -gt $feat_dim ]; then - cmd JOB=1:$nj $dir/log/copy_vecs.$x.JOB.log \ - copy-vector --print-args=false --change-dim=$spk_dim \ - ark:$dir/vecs.JOB ark:$dir/vecs_tmp.$JOB '&&' \ - mv $dir/vecs_tmp.JOB $dir/vecs.JOB || exit 1; - fi - fi - - if [ $stage -le $x ]; then - $cmd $dir/log/update.$x.log \ - sgmm-est --update-flags=$flags --split-substates=$numsubstates $increase_dim_opts \ - --power=$power --write-occs=$dir/$[$x+1].occs $dir/$x.mdl "sgmm-sum-accs - $dir/$x.*.acc|" \ - $dir/$[$x+1].mdl || exit 1; - rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs 2>/dev/null - fi - - if [ $x -lt $max_iter_inc ]; then - numsubstates=$[$numsubstates+$incsubstates] - fi - x=$[$x+1]; -done - -rm $dir/final.mdl $dir/final.occs 2>/dev/null -ln -s $x.mdl $dir/final.mdl -ln -s $x.occs $dir/final.occs - -if [ $spk_dim -gt 0 ]; then - # We need to create an "alignment model" that's been trained - # without the speaker vectors, to do the first-pass decoding with. - # in test time. - - # We do this for a few iters, in this recipe. - final_mdl=$dir/$x.mdl - cur_alimdl=$dir/$x.mdl - while [ $x -lt $[$num_iters+$num_iters_alimdl] ]; do - echo "$0: building alignment model (pass $x)" - if [ $x -eq $num_iters ]; then # 1st pass of building alimdl. - flags=MwcS # don't update v the first time. Note-- we never update transitions. - # they wouldn't change anyway as we use the same alignment as previously. - else - flags=vMwcS - fi - if [ $stage -le $x ]; then - $cmd JOB=1:$nj $dir/log/acc_ali.$x.JOB.log \ - ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ - sgmm-post-to-gpost $spkvecs_opt "$gselect_opt" \ - --utt2spk=ark:$sdata/JOB/utt2spk $final_mdl "$feats" ark,s,cs:- ark:- \| \ - sgmm-acc-stats-gpost --rand-prune=$rand_prune --update-flags=$flags \ - $cur_alimdl "$feats" ark,s,cs:- $dir/$x.JOB.aliacc || exit 1; - $cmd $dir/log/update_ali.$x.log \ - sgmm-est --update-flags=$flags --remove-speaker-space=true --power=$power $cur_alimdl \ - "sgmm-sum-accs - $dir/$x.*.aliacc|" $dir/$[$x+1].alimdl || exit 1; - rm $dir/$x.*.aliacc || exit 1; - [ $x -gt $num_iters ] && rm $dir/$x.alimdl - fi - cur_alimdl=$dir/$[$x+1].alimdl - x=$[$x+1] - done - rm $dir/final.alimdl 2>/dev/null - ln -s $x.alimdl $dir/final.alimdl -fi - -utils/summarize_warnings.pl $dir/log - -echo Done diff --git a/src/Doxyfile b/src/Doxyfile index f5e874be3ad..bf2dc5197e2 100644 --- a/src/Doxyfile +++ b/src/Doxyfile @@ -453,9 +453,9 @@ WARN_LOGFILE = # the lines after "doc itf" are copied from SUBDIRS in the Makefile. INPUT = doc itf \ - base matrix util feat tree thread gmm transform sgmm \ + base matrix util feat tree thread gmm transform \ fstext hmm lm decoder lat cudamatrix nnet \ - bin fstbin gmmbin fgmmbin sgmmbin featbin \ + bin fstbin gmmbin fgmmbin featbin \ nnetbin latbin sgmm2 sgmm2bin nnet2 nnet2bin nnet3 nnet3bin \ kwsbin ivector ivectorbin diff --git a/src/Makefile b/src/Makefile index 9905be869a0..8bc18b254e9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -5,15 +5,15 @@ SHELL := /bin/bash -SUBDIRS = base matrix util feat tree thread gmm transform sgmm \ +SUBDIRS = base matrix util feat tree thread gmm transform \ fstext hmm lm decoder lat kws cudamatrix nnet \ - bin fstbin gmmbin fgmmbin sgmmbin featbin \ + bin fstbin gmmbin fgmmbin featbin \ nnetbin latbin sgmm2 sgmm2bin nnet2 nnet3 chain nnet3bin nnet2bin kwsbin \ ivector ivectorbin online2 online2bin lmbin chainbin -MEMTESTDIRS = base matrix util feat tree thread gmm transform sgmm \ +MEMTESTDIRS = base matrix util feat tree thread gmm transform \ fstext hmm lm decoder lat nnet kws chain \ - bin fstbin gmmbin fgmmbin sgmmbin featbin \ + bin fstbin gmmbin fgmmbin featbin \ nnetbin latbin sgmm2 nnet2 nnet3 nnet2bin nnet3bin sgmm2bin kwsbin \ ivector ivectorbin online2 online2bin lmbin @@ -153,8 +153,8 @@ $(EXT_SUBDIRS) : mklibdir # this is necessary for correct parallel compilation #1)The tools depend on all the libraries -bin fstbin gmmbin fgmmbin sgmmbin sgmm2bin featbin nnetbin nnet2bin nnet3bin chainbin latbin ivectorbin lmbin kwsbin online2bin: \ - base matrix util feat tree thread gmm transform sgmm sgmm2 fstext hmm \ +bin fstbin gmmbin fgmmbin sgmm2bin featbin nnetbin nnet2bin nnet3bin chainbin latbin ivectorbin lmbin kwsbin online2bin: \ + base matrix util feat tree thread gmm transform sgmm2 fstext hmm \ lm decoder lat cudamatrix nnet nnet2 nnet3 ivector chain kws online2 #2)The libraries have inter-dependencies @@ -166,12 +166,11 @@ feat: base matrix util gmm transform tree thread tree: base util thread matrix gmm: base util matrix tree thread transform: base util matrix gmm tree thread -sgmm: base util matrix gmm tree transform thread hmm sgmm2: base util matrix gmm tree transform thread hmm fstext: base util thread matrix tree hmm: base tree matrix util thread lm: base util thread matrix fstext -decoder: base util thread matrix gmm sgmm hmm tree transform lat +decoder: base util thread matrix gmm hmm tree transform lat lat: base util thread hmm tree matrix cudamatrix: base util thread matrix nnet: base util hmm tree thread matrix cudamatrix @@ -180,8 +179,8 @@ nnet3: base util matrix thread lat gmm hmm tree transform cudamatrix chain fstex chain: lat hmm tree fstext matrix cudamatrix util thread base ivector: base util matrix thread transform tree gmm #3)Dependencies for optional parts of Kaldi -onlinebin: base matrix util feat tree gmm transform sgmm sgmm2 fstext hmm lm decoder lat cudamatrix nnet nnet2 online thread -# python-kaldi-decoding: base matrix util feat tree thread gmm transform sgmm sgmm2 fstext hmm decoder lat online +onlinebin: base matrix util feat tree gmm transform sgmm2 fstext hmm lm decoder lat cudamatrix nnet nnet2 online thread +# python-kaldi-decoding: base matrix util feat tree thread gmm transform sgmm2 fstext hmm decoder lat online online: decoder gmm transform feat matrix util base lat hmm thread tree online2: decoder gmm transform feat matrix util base lat hmm thread tree ivector cudamatrix nnet2 nnet3 chain kws: base util thread hmm tree matrix lat diff --git a/src/decoder/Makefile b/src/decoder/Makefile index fe489d1cb3f..93db701cb7a 100644 --- a/src/decoder/Makefile +++ b/src/decoder/Makefile @@ -11,7 +11,7 @@ OBJFILES = training-graph-compiler.o lattice-simple-decoder.o lattice-faster-dec LIBNAME = kaldi-decoder -ADDLIBS = ../lat/kaldi-lat.a ../sgmm/kaldi-sgmm.a ../hmm/kaldi-hmm.a \ +ADDLIBS = ../lat/kaldi-lat.a ../hmm/kaldi-hmm.a \ ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ ../tree/kaldi-tree.a ../util/kaldi-util.a ../thread/kaldi-thread.a \ ../matrix/kaldi-matrix.a ../base/kaldi-base.a diff --git a/src/doc/kaldi_for_dummies.dox b/src/doc/kaldi_for_dummies.dox index 75a58011b1d..49c9fb69e42 100644 --- a/src/doc/kaldi_for_dummies.dox +++ b/src/doc/kaldi_for_dummies.dox @@ -413,7 +413,7 @@ b.) \c path.sh
export KALDI_ROOT=`pwd`/../.. # Setting paths to useful tools -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PATH # Defining audio data directory (modify it for your installation directory!) export DATA_ROOT="/home/{user}/kaldi-trunk/egs/digits/digits_audio" diff --git a/src/feat/Makefile b/src/feat/Makefile index 71a34192347..e987de55b38 100644 --- a/src/feat/Makefile +++ b/src/feat/Makefile @@ -6,13 +6,12 @@ include ../kaldi.mk TESTFILES = feature-mfcc-test feature-plp-test feature-fbank-test \ feature-functions-test pitch-functions-test feature-sdc-test \ - resample-test online-feature-test sinusoid-detection-test \ - signal-test + resample-test online-feature-test signal-test OBJFILES = feature-functions.o feature-mfcc.o feature-plp.o feature-fbank.o \ feature-spectrogram.o mel-computations.o wave-reader.o \ - pitch-functions.o resample.o online-feature.o sinusoid-detection.o \ - signal.o feature-window.o + pitch-functions.o resample.o online-feature.o signal.o \ + feature-window.o LIBNAME = kaldi-feat diff --git a/src/feat/sinusoid-detection-test.cc b/src/feat/sinusoid-detection-test.cc deleted file mode 100644 index 68148b44ccf..00000000000 --- a/src/feat/sinusoid-detection-test.cc +++ /dev/null @@ -1,452 +0,0 @@ -// feat/sinusoid-detection-test.cc - -// Copyright 2015 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include - -#include "base/kaldi-math.h" -#include "feat/sinusoid-detection.h" - - -namespace kaldi { - -// this function is used for testing AddSinusoid. -void AddSinusoidSimple(BaseFloat samp_freq, - const Sinusoid &sinusoid, - VectorBase *signal) { - for (int32 i = 0; i < signal->Dim(); i++) - (*signal)(i) += sinusoid.amplitude * - cos(M_2PI * sinusoid.freq / samp_freq * i + sinusoid.phase); -} - -void UnitTestAddSinusoid() { - BaseFloat samp_freq = 560.1; - int32 length = 511; - Vector orig(length); - orig.SetRandn(); - Vector orig2(orig); - Sinusoid sinusoid(49.20, 2.111, 1.5); - - AddSinusoid(samp_freq, sinusoid, &orig); - AddSinusoidSimple(samp_freq, sinusoid, &orig2); - AssertEqual(orig, orig2); -} - - - -void UnitTestQuadraticMaximizeEqualSpaced() { - for (int32 n = 0; n < 50; n++) { - - // Let the cubic function be y = a x^2 + b x + c, and let - // y0,y1,y2 be its values evaluated at x = [0, 1, 2]; we - // want it evaluated at arbitrary x. - - BaseFloat a = -0.5 + RandUniform(), b = -0.5 + RandUniform(), c = -0.5 + RandUniform(); - BaseFloat y[3]; - for (int32 i = 0; i < 3; i++) { - BaseFloat x = i; - y[i] = a * x * x + b * x + c; - } - BaseFloat x_max, y_max; - SinusoidDetector::QuadraticMaximizeEqualSpaced(y[0], y[1], y[2], &x_max, &y_max); - - for (int32 m = 0; m <= 10; m++) { - BaseFloat x_test = 0.1 * m; - BaseFloat y_test = a * x_test * x_test + b * x_test + c; - KALDI_ASSERT(y_test <= y_max + 1.0e-05); - } - } -} - -void UnitTestQuadraticMaximize() { - for (int32 n = 0; n < 50; n++) { - - // Let the cubic function be y = a x^2 + b x + c, and let - // y0,y1,y2 be its values evaluated at x = [0, 1, 2]; we - // want it evaluated at arbitrary x. - - BaseFloat a = -0.5 + RandUniform(), b = -0.5 + RandUniform(), c = -0.5 + RandUniform(), - x = 0.1 + RandUniform() * 0.98; - BaseFloat y[3]; - for (int32 i = 0; i < 3; i++) { - BaseFloat this_x; - if (i == 0) { this_x = 0.0; } - else if (i == 1) { this_x = x; } - else { this_x = 1.0; } - y[i] = a * this_x * this_x + b * this_x + c; - } - BaseFloat x_max, y_max; - SinusoidDetector::QuadraticMaximize(x, y[0], y[1], y[2], &x_max, &y_max); - - for (int32 m = 0; m <= 10; m++) { - BaseFloat x_test = 0.1 * m; - BaseFloat y_test = a * x_test * x_test + b * x_test + c; - if (n < 100 && m == 5) { - KALDI_VLOG(2) << "Checking y_test <= y_max: " - << y_test << " <= " << y_max << " [x_max = " - << x_max << "]"; - KALDI_ASSERT(y_test <= y_max + 1.0e-05); - } - } - } -} - - -void UnitTestSinusoidDetector() { - BaseFloat samp_freq = 4000 + (rand() % 2000); - int32 num_samp = 128 + rand() % 400; - SinusoidDetector detector(samp_freq, num_samp); - - for (int32 i = 0; i < 40; i++) { - - Vector signal(num_samp); - - // Sinusoid ref_sinusoid(1.3, 312.5, M_PI * 0.0); - // Sinusoid ref_sinusoid(1.3, 324.125, M_PI * 0.5); - - BaseFloat nyquist = samp_freq * 0.5; - BaseFloat freq = nyquist * RandUniform(); - BaseFloat amplitude = RandUniform(); - BaseFloat phase = M_2PI * RandUniform(); - - Sinusoid ref_sinusoid(amplitude, freq, phase); - - AddSinusoid(samp_freq, ref_sinusoid, &signal); - - - BaseFloat orig_energy = VecVec(signal, signal); - KALDI_LOG << "Real frequency is " << freq << ", amplitude " - << amplitude << ", phase " << phase << ", samp-freq " - << samp_freq; - KALDI_LOG << "Total energy of signal (with sinusoid) is " << orig_energy; - - Sinusoid sinusoid; - BaseFloat min_energy = 0.0; - BaseFloat energy = detector.DetectSinusoid(min_energy, - signal, &sinusoid); - - Vector new_signal(signal); - sinusoid.phase += M_PI; // Reverse the phase. - AddSinusoid(samp_freq, sinusoid, &new_signal); - BaseFloat delta_energy = VecVec(signal, signal) - - VecVec(new_signal, new_signal); - KALDI_LOG << "Projected delta energy = " << energy - << " and observed was " << delta_energy; - - BaseFloat remaining_energy = VecVec(new_signal, new_signal); - if (remaining_energy > 0.01 * orig_energy) { - KALDI_WARN << "Energy remaining is " << remaining_energy - << " vs. original " << orig_energy; - BaseFloat relative_freq = freq / nyquist; - BaseFloat inv_num_samp = 1.0 / num_samp; - // We only tolerate this kind of error for very ridiculous frequency, - // close to zero or the Nyquist. - KALDI_ASSERT(relative_freq < inv_num_samp || - relative_freq > 1.0 - inv_num_samp); - } - } -} - -// as UnitTestSinusoidDetector(), but doing it in noisy signals. -void UnitTestSinusoidDetectorNoisy() { - BaseFloat samp_freq = 4000 + (rand() % 2000); - int32 num_samp = 128 + rand() % 400; - SinusoidDetector detector(samp_freq, num_samp); - - for (int32 i = 0; i < 40; i++) { - - Vector signal(num_samp); - - signal.SetRandn(); - - BaseFloat rand_energy = VecVec(signal, signal); - - // Sinusoid ref_sinusoid(1.3, 312.5, M_PI * 0.0); - // Sinusoid ref_sinusoid(1.3, 324.125, M_PI * 0.5); - - BaseFloat nyquist = samp_freq * 0.5; - BaseFloat freq = nyquist * RandUniform(); - BaseFloat amplitude = 10.0 * RandUniform(); - BaseFloat phase = M_2PI * RandUniform(); - - Sinusoid ref_sinusoid(amplitude, freq, phase); - - AddSinusoid(samp_freq, ref_sinusoid, &signal); - - BaseFloat tot_energy = VecVec(signal, signal); - - KALDI_LOG << "Real frequency is " << freq << ", amplitude " - << amplitude << ", phase " << phase << ", samp-freq " - << samp_freq; - KALDI_LOG << "Total energy of signal (with noise + sinusoid) is " << tot_energy; - - Sinusoid sinusoid; - BaseFloat min_energy = 0.0; - BaseFloat energy = detector.DetectSinusoid(min_energy, - signal, &sinusoid); - - Vector new_signal(signal); - sinusoid.phase += M_PI; // reverse the phase. - AddSinusoid(samp_freq, sinusoid, &new_signal); - BaseFloat delta_energy = VecVec(signal, signal) - - VecVec(new_signal, new_signal); - KALDI_LOG << "Projected delta energy = " << energy - << " and observed was " << delta_energy; - - BaseFloat min_energy_diff = 0.99 * (tot_energy - rand_energy); - - if (delta_energy < min_energy_diff) { - KALDI_WARN << "Energy reduction is " << delta_energy - << " vs. expected " << (tot_energy - rand_energy); - BaseFloat relative_freq = freq / nyquist; - BaseFloat inv_num_samp = 1.0 / num_samp; - // We only tolerate this kind of error for very ridiculous frequency, - // close to zero or the Nyquist. - KALDI_ASSERT(relative_freq < inv_num_samp || - relative_freq > 1.0 - inv_num_samp); - } - } -} - - -void AddFreqToSignal(BaseFloat base_freq, - BaseFloat samp_freq, - BaseFloat tolerance, - BaseFloat gain, - VectorBase *signal) { - BaseFloat error_scale = (2 * RandUniform() - 1) * tolerance; - BaseFloat freq = base_freq * (1.0 + error_scale); - KALDI_VLOG(3) << "base-freq = " << base_freq << ", factor = " << error_scale; - for (int32 i = 0; i < signal->Dim(); i++) - (*signal)(i) += gain * sin(i * 2.0 * 3.14159 * freq / samp_freq); -} - - -void GenerateDtmfTestCase( - BaseFloat sampling_rate, - Vector *signal, - std::vector *ref_output) { - // the "ref_output" should correlate with the first of each run of frames with the same label. - - BaseFloat min_duration_secs = 0.04; // min duration of dtmf or non-tone segments. - BaseFloat min_dialtone_duration_secs = 0.1; - BaseFloat frequency_tolerance = 0.035; - BaseFloat dialtone_frequency_tolerance = 0.4 * (440.0 - 425.0) / 440.0; - - int32 num_events = 2 * (5 + rand() % 5) + 1; // odd number. - int32 tot_signal_dim = 0; - - ref_output->resize(num_events); - std::vector > all_signals(num_events); - for (int32 i = 0; i < num_events; i++) { - MultiSinusoidDetectorOutput &this_output = (*ref_output)[i]; - Vector &this_signal = all_signals[i]; - BaseFloat duration_secs = min_duration_secs * (1 + rand() % 3); - int32 num_samp = sampling_rate * duration_secs; - tot_signal_dim += num_samp; - - this_signal.Resize(num_samp); - this_signal.SetRandn(); - - if (i % 2 == 0); // do nothing; - else if (rand() % 2 == 0 && duration_secs >= min_dialtone_duration_secs) { - // dialtone. - BaseFloat freq; - if (rand() % 3 == 0) { freq = 350; } - else if (rand() % 2 == 0) { freq = 440; } - else { freq = 425; } - BaseFloat gain = 10.0 * (1.0 + rand() % 2); - AddFreqToSignal(freq, sampling_rate, dialtone_frequency_tolerance, - gain, &(this_signal)); - this_output.freq1 = freq; - } else { - // dtmf. use a subset of tones as examples. - BaseFloat freq1, freq2; - char c; - if (rand() % 4 == 0) { - c = '8'; freq1 = 852; freq2 = 1336; - } else if (rand() % 3 == 0) { - c = '0'; freq1 = 941; freq2 = 1336; - } else if (rand() % 2 == 0) { - c = '#'; freq1 = 941; freq2 = 1477; - } else { - c = '1'; freq1 = 697; freq2 = 1209; - } - BaseFloat base_gain = 10.0 * (1.0 + (rand() % 3)), - gain_factor = 1.0 + 0.1 * (-2 + rand() % 5), - gain1 = base_gain, gain2 = gain_factor * base_gain; - AddFreqToSignal(freq1, sampling_rate, frequency_tolerance, gain1, - &(this_signal)); - AddFreqToSignal(freq2, sampling_rate, frequency_tolerance, gain2, - &(this_signal)); - this_output.freq1 = freq1; - this_output.freq2 = freq2; - } - } - signal->Resize(tot_signal_dim); - int32 signal_offset = 0; - for (int32 i = 0; i < num_events; i++) { - int32 this_dim = all_signals[i].Dim(); - signal->Range(signal_offset, this_dim).CopyFromVec(all_signals[i]); - signal_offset += this_dim; - } -} - - -/* - -// Just a basic test to check that it produces output. - -void UnitTestToneDetection() { - BaseFloat samp_freq = (rand() % 2) == 0 ? 8000 : 16000; - ToneDetectionConfig config; - - int32 num_frames = 100 + (rand() % 100); - int32 frame_length = static_cast(samp_freq * config.frame_length_secs); - - int32 num_samples = frame_length * num_frames + rand() % frame_length; - Vector signal(num_samples); - signal.SetRandn(); - - ToneDetector tone_detector(config, samp_freq); - - int32 signal_offset = 0; - - std::vector tone_detector_output; - - while (signal_offset < num_samples) { - int32 signal_remaining = num_samples - signal_offset, - chunk_size = std::min((rand() % 200) + 100, - signal_remaining); - SubVector signal_part(signal, signal_offset, chunk_size); - tone_detector.AcceptWaveform(signal_part); - signal_offset += chunk_size; - - if (signal_offset == num_samples) - tone_detector.WaveformFinished(); - while (!tone_detector.Done() && - (rand() % 2 == 0 || signal_offset == num_samples)) { - ToneDetectorOutput *output = new ToneDetectorOutput(); - tone_detector.GetNextFrame(output); - tone_detector_output.push_back(output); - } - } - KALDI_ASSERT(signal_offset == num_samples); - - Vector signal2(signal.Dim()); - signal_offset = 0; - for (int32 i = 0; i < tone_detector_output.size(); i++) { - ToneDetectorOutput *output = tone_detector_output[i]; - signal2.Range(signal_offset, - output->signal.Dim()).CopyFromVec(output->signal); - signal_offset += output->signal.Dim(); - if (output->frame_type != 'n') { - KALDI_ERR << "Frame " << i << " badly classified, should be 'n', is: " - << output->frame_type; - } - delete output; - } - KALDI_ASSERT(signal_offset == num_samples && - signal.ApproxEqual(signal2, 1.0e-10)); - -} - -std::ostringstream & operator << (std::ostringstream &ostr, - const ToneDetectorOutput &output) { - ostr << output.frame_type; - if (output.frame_type == 'd') - ostr << output.dialtone_freq; - ostr << ' '; - return ostr; -} - -*/ - - -// This version of the unit-test generates a signal that has tones in it, and -// runs the detection on that signal. -void UnitTestToneDetection2() { - BaseFloat samp_freq = (rand() % 2) == 0 ? 8000 : 16000; - Vector signal; - std::vector ref_output; - GenerateDtmfTestCase(samp_freq, &signal, &ref_output); - - MultiSinusoidDetectorConfig config; - - int32 num_samples = signal.Dim(); - KALDI_ASSERT(num_samples > 0); - - MultiSinusoidDetector multi_sinusoid_detector(config, samp_freq); - - int32 signal_offset = 0; - - std::vector multi_sinusoid_detector_output; - - while (signal_offset < num_samples) { - int32 signal_remaining = num_samples - signal_offset, - chunk_size = std::min((rand() % 200) + 100, - signal_remaining); - SubVector signal_part(signal, signal_offset, chunk_size); - multi_sinusoid_detector.AcceptWaveform(signal_part); - signal_offset += chunk_size; - - if (signal_offset == num_samples) - multi_sinusoid_detector.WaveformFinished(); - while (!multi_sinusoid_detector.Done() && - (rand() % 2 == 0 || signal_offset == num_samples)) { - MultiSinusoidDetectorOutput *output = new MultiSinusoidDetectorOutput(); - multi_sinusoid_detector.GetNextFrame(output); - multi_sinusoid_detector_output.push_back(output); - } - } - KALDI_ASSERT(signal_offset == num_samples); - - // std::ostringstream str_ref, str_hyp; - //for (size_t i = 0; i < ref_output.size(); i++) - // str_ref << ref_output[i]; - - - for (size_t i = 0; i < multi_sinusoid_detector_output.size(); i++) { - MultiSinusoidDetectorOutput *output = multi_sinusoid_detector_output[i]; - KALDI_LOG << "tot-energy = " << output->tot_energy - << ", freq1 " << output->freq1 << ", energy1 " << output->energy1 - << ", freq2 " << output->freq2 << ", energy2 " << output->energy2; - delete output; - } -} - - - -} // namespace kaldi - -int main() { - using namespace kaldi; - - SetVerboseLevel(4); - - UnitTestToneDetection2(); - UnitTestAddSinusoid(); - UnitTestQuadraticMaximizeEqualSpaced(); - UnitTestQuadraticMaximize(); - for (int32 i = 0; i < 10; i++) { - UnitTestSinusoidDetector(); - UnitTestSinusoidDetectorNoisy(); - } - -} diff --git a/src/feat/sinusoid-detection.cc b/src/feat/sinusoid-detection.cc deleted file mode 100644 index bf6b0b9e4fe..00000000000 --- a/src/feat/sinusoid-detection.cc +++ /dev/null @@ -1,945 +0,0 @@ -// feat/sinusoid-detection.cc - -// Copyright 2015 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "feat/sinusoid-detection.h" -#include "matrix/matrix-functions.h" -#include "feat/resample.h" - -namespace kaldi { - - - -// This function adds the given sinusoid to the signal, as: -// (*signal)(t) += amplitude * cos(2 pi freq/samp_freq t + phase). -void AddSinusoid(BaseFloat samp_freq, - const Sinusoid &sinusoid, - VectorBase *signal) { - // treat "factor" as a complex variable equal to exp(i * 2 pi freq / samp_freq); it's - // the factor by which we multiply on each frame. - BaseFloat factor_real = cos(M_2PI * sinusoid.freq / samp_freq), - factor_im = sin(M_2PI * sinusoid.freq / samp_freq); - BaseFloat *signal_data = signal->Data(); - int32 dim = signal->Dim(), batch_size = 100; - // process frames in batches of size "batch_size", after which we recompute - // the starting point to prevent loss of accuracy due to drift. - for (int32 b = 0; b * batch_size < dim; b++) { - int32 t_offset = b * batch_size, - t_end = std::min(dim, t_offset + batch_size); - double phase = sinusoid.phase + M_2PI * t_offset * sinusoid.freq / samp_freq; - // treat x as a complex variable which initially is equal to amplitude * exp(i * phase), - // but which gets multiplied by "factor" on each frame. - BaseFloat x_real = sinusoid.amplitude * cos(phase), - x_im = sinusoid.amplitude * sin(phase); - for (int32 t = t_offset; t < t_end; t++) { - signal_data[t] += x_real; - ComplexMul(factor_real, factor_im, &x_real, &x_im); // x *= factor. - } - } -} - - -// static -void SinusoidDetector::QuadraticMaximizeEqualSpaced( - BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat *x_max, BaseFloat *y_max) { - // Let the function be y = a x^2 + b x + c, and - // suppose we have the values of y(0), y(1) and y(2). - // We have y0 = c, y1 = a + b + c, and y2 = 4a + 2b + c, - // so c = y0. - // Also, y2 - 2 y1 = 2a - c, so - // a = (y2 - 2 y1 + c) / 2, and - // b = y1 - a - c. - BaseFloat c = y0, a = y2 - 2 * y1 + c, b = y1 - a - c; - if (a >= 0) { - // The maximum of the function will occur at one of the end points. - if (y0 > y2) { - *x_max = 0; - *y_max = y0; - } else { - *x_max = 2; - *y_max = y2; - } - } else { - // derivative y' = 2a x + b. y' = 0 at x = -b / 2 a. - BaseFloat x = -b / (2.0 * a); - if (x <= 0.0) { - *x_max = 0; - *y_max = y0; - } else if (x >= 2.0) { - *x_max = 0; - *y_max = y2; - } else { - *x_max = x; - *y_max = a * x * x + b * x + c; - } - } -} - -// static -void SinusoidDetector::QuadraticMaximize( - BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat *x_max, BaseFloat *y_max) { - // Let the function be y = a x^2 + b x + c, and - // suppose we have the values of y(0), y(x1) and y(1), - // where 0 < x1 < 1. - // We have y0 = c, y1 = x1^2 a + x1 b + c, and y2 = a + b + c, - // so c = y0. - // Also, x1.y2 - y1 = a (x1 - x1^2) + (x1 - 1) c, so - // a = ( (x1 y2 - y1) - (x1 - 1) c) / (x1 - x1^2), and - // b = y2 - a - c. - BaseFloat c = y0, - a = (x1 * y2 - y1 - (x1 - 1.0) * c) / (x1 - x1*x1), - b = y2 - a - c; - - // TODO: remove these lines. - AssertEqual(y1, a * x1 * x1 + b * x1 + c); - AssertEqual(y2, a + b + c); - - if (a >= 0) { - // The maximum of the function will occur at one of the end points. - if (y0 > y2) { - *x_max = 0; - *y_max = y0; - } else { - *x_max = 1.0; - *y_max = y2; - } - } else { - // derivative y' = 2a x + b. y' = 0 at x = -b / 2 a. - BaseFloat x = -b / (2.0 * a); - if (x <= 0.0) { - *x_max = 0.0; - *y_max = y0; - } else if (x >= 1.0) { - *x_max = 1.0; - *y_max = y2; - } else { - *x_max = x; - *y_max = a * x * x + b * x + c; - } - } -} - -//static -BaseFloat SinusoidDetector::QuadraticInterpolate( - BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat x) { - // Let the function be y = a x^2 + b x + c, and - // suppose we have the values of y(0), y(x1) and y(1), - // where 0 < x1 < 1. - // We have y0 = c, y1 = x1^2 a + x1 b + c, and y2 = a + b + c, - // so c = y0. - // Also, x1.y2 - y1 = a (x1 - x1^2) + (x1 - 1) c, so - // a = ( (x1 y2 - y1) - (x1 - 1) c) / (x1 - x1^2), and - // b = y2 - a - c. - KALDI_ASSERT(x1 >= 0.0 && x1 <= 1.0); - if (x1 == 0.0) return y0; - else if (x1 == 1.0) return y2; - - BaseFloat c = y0, - a = (x1 * y2 - y1 - (x1 - 1.0) * c) / (x1 - x1*x1), - b = y2 - a - c; - return a * x * x + b * x + c; -} - -// This function does -// (*cos)(t) = cos(2 pi t freq / samp_freq) -// (*sin)(t) = sin(2 pi t freq / samp_freq) -//static -void SinusoidDetector::CreateCosAndSin(BaseFloat samp_freq, - BaseFloat freq, - VectorBase *cos_vec, - VectorBase *sin_vec) { - int32 dim = cos_vec->Dim(), batch_size = 100; - KALDI_ASSERT(dim == sin_vec->Dim()); - BaseFloat *cos_data = cos_vec->Data(), *sin_data = sin_vec->Data(); - BaseFloat factor_real = cos(M_2PI * freq / samp_freq), - factor_im = sin(M_2PI * freq / samp_freq); - - // process frames in batches of size "batch_size", after which we recompute - // the starting point to prevent loss of accuracy due to drift. - for (int32 b = 0; b * batch_size < dim; b++) { - int32 t_offset = b * batch_size, - t_end = std::min(dim, t_offset + batch_size); - double phase = M_2PI * t_offset * freq / samp_freq; - // treat x as a complex variable which initially is equal to amplitude * exp(i * phase), - // but which gets multiplied by "factor" on each frame. - BaseFloat x_real = cos(phase), x_im = sin(phase); - for (int32 t = t_offset; t < t_end; t++) { - cos_data[t] = x_real; - sin_data[t] = x_im; - ComplexMul(factor_real, factor_im, &x_real, &x_im); // x *= factor. - } - } -} - -SinusoidDetector::SinusoidDetector(BaseFloat samp_freq, - int32 num_samp): - samp_freq_(samp_freq), - num_samples_(num_samp), - num_samples_padded_(RoundUpToNearestPowerOfTwo(num_samp)), - fft_(num_samples_padded_), - factor1_(3.1), - factor2_(1.42) { - ComputeCoefficients(); -} - -void SinusoidDetector::SelfTest( - const VectorBase &signal, - const std::vector &info, - BaseFloat final_freq, - BaseFloat final_energy) { - int32 num_bins = num_samples_padded_ * 2 + 1; - - - { - BaseFloat cutoff = 0.0; - for (int32 k = 0; k <= num_bins; k += 4) - cutoff = std::max(cutoff, info[k].energy); - BaseFloat energy_upper_bound = factor1_ * cutoff; - if (final_energy > energy_upper_bound) { - KALDI_WARN << "Self-testing failed [factor1]: " - << final_energy << " > " << energy_upper_bound - << ", num-samples is " << num_samples_ - << ", freq/nyquist = " - << (final_freq / (samp_freq_ * 0.5)) - << "- would require factor1 >= " - << (final_energy / cutoff); - } - } - { - BaseFloat cutoff = 0.0; - for (int32 k = 0; k <= num_bins; k += 2) - if (info[k].valid) - cutoff = std::max(cutoff, info[k].energy); - BaseFloat energy_upper_bound = factor2_ * cutoff; - if (final_energy > energy_upper_bound) { - KALDI_WARN << "Self-testing failed [factor2]: " - << final_energy << " > " << energy_upper_bound - << ", num-samples is " << num_samples_ - << ", freq/nyquist = " - << (final_freq / (samp_freq_ * 0.5)) - << "- would require factor2 >= " - << (final_energy / cutoff); - - } - } - -} - - -BaseFloat SinusoidDetector::OptimizeFrequency( - const std::vector &info, - int32 *bin_out, - BaseFloat *offset_out) const { - - BaseFloat max_energy = 0.0; - *bin_out = -1; - int32 max_freq = num_samples_padded_ * 2; - - // For each bin, we consider the frequency range [bin, bin+1, bin+2], - // and if we have info for all those bins, do a quadratic interpolation to - // find the maximum within the range. - for (int32 bin = 0; bin + 2 <= max_freq; bin++) { - if (info[bin].valid && info[bin+1].valid && info[bin+2].valid) { - // First handle the left side of the bin. - BaseFloat best_x, best_y; - QuadraticMaximizeEqualSpaced(info[bin].energy, info[bin+1].energy, - info[bin+2].energy, &best_x, &best_y); - if (best_y > max_energy) { - max_energy = best_y; - if (best_x <= 1.0) { - *bin_out = bin; - *offset_out = best_x; - } else { - *bin_out = bin + 1; - *offset_out = best_x - 1; - } - } - } - } - return max_energy; -} - - -BaseFloat SinusoidDetector::DetectSinusoid( - BaseFloat min_energy, - const VectorBase &signal, - Sinusoid *sinusoid) { - if (signal(0) == 0.0 && signal.Norm(2.0) == 0.0) - return 0.0; - KALDI_ASSERT(signal.Dim() == num_samples_); - Vector fft(num_samples_padded_); - fft.Range(0, num_samples_).CopyFromVec(signal); - bool forward = true; - fft_.Compute(fft.Data(), forward); - - std::vector info; - ComputeCoarseInfo(fft, &info); - // we now have info for the "coarse" bins. - - // each element b of "bins" will be a multiple of 4: it's possible - // that the best frequency is in the range [b, b+4] - std::vector bins; - FindCandidateBins(min_energy, info, &bins); - - if (bins.empty()) - return 0.0; // not enough energy in signal. - - for (size_t i = 0; i < bins.size(); i++) { - int32 bin = bins[i]; - ComputeBinInfo(signal, bin, &(info[bin])); - } - - std::vector bins2; - FindCandidateBins2(min_energy, info, &bins2); - - for (size_t i = 0; i < bins2.size(); i++) { - int32 bin = bins2[i]; - ComputeBinInfo(signal, bin, &(info[bin])); - } - - // compute energy for the predicted-optimum point, which will usually be - // between bins, with an offset. - int32 bin; - BaseFloat offset; - - BaseFloat opt_energy = OptimizeFrequency(info, &bin, &offset); - - if (opt_energy == 0.0) - return 0.0; - - BaseFloat max_freq = (bin + offset) * samp_freq_ / (num_samples_padded_ * 4); - - KALDI_VLOG(4) << "Best frequency based on interpolation is " - << max_freq << ", best energy is " - << opt_energy << ", bin is " << bin; - - OptimizedInfo final_info; - - FineOptimizeFrequency(signal, bin, offset, &info, &final_info); - - // the following while loop will rarely be accessed. - while (final_info.offset == 0.0 && bin > 0) { - bin--; - FineOptimizeFrequency(signal, bin, 1.0, &info, &final_info); - } - - // the following while loop will rarely be accessed. - while (final_info.offset == 1.0 && bin < num_samples_padded_ * 2) { - bin++; - FineOptimizeFrequency(signal, bin, 0.0, &info, &final_info); - } - - if (bin <= 1 || bin >= num_samples_padded_ * 2 - 2) { - // If we're in the lowest or next-to-lowest bin, or the highest or - // next-to-highest allowed bin (note, "bin" here is a range, and it can - // never have the value num_samples_padded_ * 2), we tend to get more - // estimation error than usual, so do another round of optimization. - FineOptimizeFrequency(signal, bin, final_info.offset, &info, &final_info); - } - - BaseFloat final_freq = (final_info.bin + final_info.offset) * samp_freq_ / (num_samples_padded_ * 4); - KALDI_VLOG(4) << "Final optimized info is: freq " << final_freq - << ", cos coeff " << final_info.cos_coeff << ", sin coeff " - << final_info.sin_coeff << ", energy " << final_info.energy; - - if (GetVerboseLevel() > 1) - SelfTest(signal, info, final_freq, final_info.energy); - - if (final_info.energy >= min_energy) { - sinusoid->amplitude = std::sqrt(final_info.cos_coeff * final_info.cos_coeff - + final_info.sin_coeff * final_info.sin_coeff); - sinusoid->freq = final_freq; - sinusoid->phase = -std::atan2(final_info.sin_coeff, final_info.cos_coeff); - KALDI_VLOG(4) << "Phase is " << sinusoid->phase << ", amplitude is " - << sinusoid->amplitude << ", freq is " << sinusoid->freq; - return final_info.energy; - } else { - return 0.0; - } -} - - -/* - This function computes, the original FFT bins, the amount of energy in - the signal that can be explained by a sinusoid at the corresponding frequency. - - Let f be the continuous-valued frequency. - - Define the vector C_f as - C_f = [ c_0, c_1 ... c_n ] where c_k = cos(2 pi k f / samp_freq). [obviously this notation depends on f]. - and S_f the same thing with sin in place of cos. - - Let the signal, as a vector, be V. - We want to maximize the (positive) energy-difference: - ||V||^2 - || V - c C_f - s S_f ||^2 - where c and s are the coefficients of C_f and S_f. - This quantity can be expanded as follows, where . means dot product. - \delta E = -c^2 C_f.C_f - s^2 S_f.S_f - 2 c s C_f.S_f + 2 c V.C_f + 2 s V.S_f. - which can be written as follows, where . means dot-product and ' means transpose: - \delta E = 2 [c s] v - [c s] M [c s]' - where M = [ C_f.C_f, C_f.S_f, C_f.S_f, S_f.S_f ], - and v = [V.C_f, V.S_f]. - If M is invertible (i.e. for nonzero frequencies), this is maximized by - [c s] = M^-1 v - giving us the value. - \delta E = v' M^{-1} v. - We'll compute the inverse of M in advance, inside ComputeCoefficients(), using - the formula [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a] For zero frequency and at the - Nyquist, M has the value [ a 0; 0 0 ], and we have the same type of expression - limited to the first dim of v, i.e. Minv = [ a^{-1} 0; 0 0 ], a kind of pseudo-inverse. - */ - -void SinusoidDetector::ComputeCoarseInfo( - const Vector &fft, - std::vector *info) const { - info->resize(num_samples_padded_ * 2 + 1); // 4 times resolution of FFT itself. - - const BaseFloat *fft_data = fft.Data(); - - int32 num_bins = num_samples_padded_ / 2 + 1; - for (int32 k = 0; k < num_bins; k++) { - BaseFloat real, im; - if (k == 0) { - real = fft_data[0]; - im = 0.0; - } else if (k == num_samples_padded_ / 2) { - real = fft_data[1]; - im = 0.0; - } else { - real = fft_data[k * 2]; - im = fft_data[k * 2 + 1]; - } - // v1 and v2 are the two components of the vector v in the math above. - BaseFloat v1 = real, v2 = -im; - // Minv_'s row indexes correspond to frequencies with 4 times more - // resolution than the FFT bins. - const BaseFloat *Minv_data = Minv_.RowData(k * 4); - // The Matrix M^{-1} is of the form [a b; b d] - BaseFloat a = Minv_data[0], b = Minv_data[1], d = Minv_data[2]; - // compute \delta E = v' M^{-1} v. - BaseFloat delta_e = v1 * v1 * a + v2 * v2 * d + 2 * v1 * v2 * b; - InfoForBin &this_info = (*info)[k * 4]; - this_info.valid = true; - this_info.cos_dot = real; - this_info.sin_dot = -im; - this_info.energy = delta_e; - } -} - - -void SinusoidDetector::ComputeCoefficients() { - int32 num_samp = num_samples_; - int32 num_freq = num_samples_padded_ * 2 + 1; - cos_.Resize(num_freq, num_samp); - sin_.Resize(num_freq, num_samp); - - Vector cc(num_freq), cs(num_freq); - for (int32 k = 0; k < num_freq; k++) { - BaseFloat freq = k * samp_freq_ / (num_samples_padded_ * 4); - SubVector c(cos_, k), s(sin_, k); - CreateCosAndSin(samp_freq_, freq, &c, &s); - cc(k) = VecVec(c, c); - cs(k) = VecVec(c, s); - } - - M_.Resize(num_freq, 3, kUndefined); - Minv_.Resize(num_freq, 3, kUndefined); - - for (int32 k = 0; k < num_freq; k++) { - // Let the matrix M be [ a b; b d ]. [we don't write c because c == b]. - // We want to compute Minv_. - BaseFloat a = cc(k), b = cs(k), d = num_samples_ - a; - M_(k, 0) = a; - M_(k, 1) = b; - M_(k, 2) = d; - if (k == 0 || k == num_freq - 1) { - // this is a special case; it's not really the inverse of M but it will - - // give us the expression we want; it's like an inverse in just one dimension. - Minv_(k, 0) = 1.0 / a; - Minv_(k, 1) = 0.0; - Minv_(k, 2) = 0.0; - } else { - BaseFloat inv_det = 1.0 / (a * d - b * b); - // check for NaN and inf. - KALDI_ASSERT(inv_det == inv_det && inv_det - inv_det == 0.0); - // use: [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a], special case where c = b. - BaseFloat inv_a = d * inv_det, inv_b = -b * inv_det, inv_d = a * inv_det; - Minv_(k, 0) = inv_a; - Minv_(k, 1) = inv_b; - Minv_(k, 2) = inv_d; - } - } -} - - -// Does fine optimization of the frequency within this bin; returns the -// final energy, the optimized frequency, and the cos and sin coefficients. -void SinusoidDetector::FineOptimizeFrequency( - const VectorBase &signal, - int32 bin, - BaseFloat bin_offset, - std::vector *info_in, - OptimizedInfo *opt_info) const { - std::vector &info = *info_in; - if (!info[bin].valid) ComputeBinInfo(signal, bin, &(info[bin])); - if (!info[bin+1].valid) ComputeBinInfo(signal, bin+1, &(info[bin+1])); - - const BaseFloat epsilon = 0.02, delta = 0.001; - - // If the offset is very close to the edges of the bin, move it - // closer to the center. Otherwise we may have problems with the - // steps below. The initial offset is only used as a starting point - // anyway, so this won't affect the final value much. - if (bin_offset < epsilon) - bin_offset = epsilon; - if (bin_offset > 1.0 - epsilon) - bin_offset = 1.0 - epsilon; - KALDI_VLOG(4) << "Initial bin offset = " << bin_offset << ", bin = " << bin; - - // create cos and sin waves of the specified frequency. - BaseFloat freq = (bin + bin_offset) * samp_freq_ / (num_samples_padded_ * 4); - Vector c(num_samples_, kUndefined), s(num_samples_, kUndefined); - CreateCosAndSin(samp_freq_, freq, &c, &s); - - // these a, b and d values are the elements of the M matrix at this frequency - // "freq", i.e. the matrix M_f [ a b; b d ]. This will be invertible because - // we have ensured that the frequency is not too close to zero or the Nyquist. - BaseFloat a = VecVec(c, c), b = VecVec(c, s), d = num_samples_ - a; - BaseFloat inv_det = 1.0 / (a * d - b * b); - BaseFloat inv_a = d * inv_det, inv_b = -b * inv_det, inv_d = a * inv_det; - - - BaseFloat v1 = VecVec(c, signal), v2 = VecVec(s, signal); - - BaseFloat delta_e = v1 * v1 * inv_a + v2 * v2 * inv_d + 2 * v1 * v2 * inv_b; - - KALDI_VLOG(4) << "Actual energy-change at frequency " << freq << " is " - << delta_e; - // "freq" is frequency somewhere in the middle of the bin. - - BaseFloat final_offset, final_energy; - QuadraticMaximize(bin_offset, info[bin].energy, delta_e, info[bin+1].energy, - &final_offset, &final_energy); - - KALDI_VLOG(4) << "After further optimizing, offset was " << final_offset - << " giving freq " - << ((bin+final_offset) * samp_freq_ / (num_samples_padded_*4)) - << ", with energy " << final_energy; - - // Use interpolation (using a quadratic function) to get the entries of the M matrix - // the the final, tuned frequency. Interpolation on M is better than M^{-1}, as its - // elements are much better behaved as the frequency varies. - const BaseFloat *M_left_data = M_.RowData(bin), - *M_right_data = M_.RowData(bin + 1); - - BaseFloat a_interp = QuadraticInterpolate(bin_offset, M_left_data[0], a, M_right_data[0], - final_offset); - BaseFloat b_interp = QuadraticInterpolate(bin_offset, M_left_data[1], b, M_right_data[1], - final_offset); - BaseFloat d_interp = QuadraticInterpolate(bin_offset, M_left_data[2], d, M_right_data[2], - final_offset); - - // Now get the inverse of the M matrix at the final point. - BaseFloat a_inv_interp, b_inv_interp, d_inv_interp; - - if ((bin == 0 && final_offset < delta) || - (bin == num_samples_padded_ * 2 && final_offset > 1.0 - delta)) { - // If we're extremely close to zero or the Nyquist, we'll have trouble - // inverting M; just invert in the 1st dimension (only have a cos - // component). - a_inv_interp = 1.0 / a_interp; - b_inv_interp = 0.0; - d_inv_interp = 0.0; - } else { - BaseFloat inv_det = 1.0 / (a_interp * d_interp - b_interp * b_interp); - // check for NaN and inf. - KALDI_ASSERT(inv_det == inv_det && inv_det - inv_det == 0.0); - // use: [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a], special case where c = b. - a_inv_interp = d_interp * inv_det; - b_inv_interp = -b_interp * inv_det; - d_inv_interp = a_interp * inv_det; - } - - BaseFloat v1_interp = QuadraticInterpolate(bin_offset, info[bin].cos_dot, v1, - info[bin+1].cos_dot, final_offset); - BaseFloat v2_interp = QuadraticInterpolate(bin_offset, info[bin].sin_dot, v2, - info[bin+1].sin_dot, final_offset); - - opt_info->bin = bin; - opt_info->offset = final_offset; - // Recompute the energy-reduction using the more accurate interpolated values of - // v1 and v2 (the dot-products of the cos and sin with the signal), and - // of M. - opt_info->energy = v1_interp * v1_interp * a_inv_interp + - v2_interp * v2_interp * d_inv_interp + - 2 * v1_interp * v2_interp * b_inv_interp; - // Compute the coefficients of the cos and sin in the optimal sinusoid, as - // M^{-1} v. - opt_info->cos_coeff = a_inv_interp * v1_interp + b_inv_interp * v2_interp; - opt_info->sin_coeff = b_inv_interp * v1_interp + d_inv_interp * v2_interp; -} - -void SinusoidDetector::FindCandidateBins( - BaseFloat min_energy, - const std::vector &info, - std::vector *bins) const { - - int32 max_bin = num_samples_padded_ * 2; - - BaseFloat cutoff = min_energy; - for (int32 k = 0; k <= max_bin; k += 4) { - KALDI_ASSERT(info[k].valid); - cutoff = std::max(cutoff, info[k].energy); - } - - for (int32 k = 0; k < max_bin; k += 4) { - BaseFloat energy_upper_bound = - factor1_ * std::max(info[k].energy, - info[k+4].energy); - if (energy_upper_bound >= cutoff) - bins->push_back(k + 2); - } -} - - -void SinusoidDetector::FindCandidateBins2( - BaseFloat min_energy, - const std::vector &info, - std::vector *bins2) const { - - int32 max_bin = num_samples_padded_ * 2; - - BaseFloat cutoff = min_energy; - for (int32 k = 0; k <= max_bin; k += 2) { - if (info[k].valid) - cutoff = std::max(cutoff, info[k].energy); - } - - for (int32 k = 0; k < max_bin; k += 2) { - if (info[k].valid && info[k+2].valid) { - BaseFloat energy_upper_bound = - factor2_ * std::max(info[k].energy, - info[k+2].energy); - if (energy_upper_bound >= cutoff) - bins2->push_back(k + 1); - } - } -} - - -void SinusoidDetector::ComputeBinInfo( - const VectorBase &signal, - int32 bin, - InfoForBin *info) const { - KALDI_ASSERT(!info->valid); // or wasted time. - info->valid = true; - BaseFloat v1 = info->cos_dot = VecVec(cos_.Row(bin), signal); - BaseFloat v2 = info->sin_dot = VecVec(sin_.Row(bin), signal); - const BaseFloat *Minv_data = Minv_.RowData(bin); - BaseFloat a = Minv_data[0], b = Minv_data[1], d = Minv_data[2]; - // compute \delta E = v' M^{-1} v. - BaseFloat delta_e = v1 * v1 * a + v2 * v2 * d + 2 * v1 * v2 * b; - info->energy = delta_e; -} - - -MultiSinusoidDetector::MultiSinusoidDetector( - const MultiSinusoidDetectorConfig &config, - int32 sampling_freq): - config_(config), - sample_freq_(sampling_freq), - samples_per_frame_subsampled_(0.001 * config.frame_length_ms * - static_cast(config.subsample_freq)), - waveform_finished_(false), - samples_consumed_(0), - resampler_(sampling_freq, config.subsample_freq, - config.subsample_filter_cutoff, config.subsample_filter_zeros), - detector_(config.subsample_freq, samples_per_frame_subsampled_) { - config.Check(); -} - - -void MultiSinusoidDetector::Reset() { - waveform_finished_ = false; - samples_consumed_ = 0; - while(!subsampled_signal_.empty()) { - delete subsampled_signal_.front(); - subsampled_signal_.pop_front(); - } - resampler_.Reset(); -} - -void MultiSinusoidDetector::WaveformFinished() { - KALDI_ASSERT(!waveform_finished_ && - "WaveformFinished() called twice."); - - Vector empty_waveform; - subsampled_signal_.push_back(new Vector()); - bool flush = true; - resampler_.Resample(empty_waveform, flush, - subsampled_signal_.back()); - waveform_finished_ = true; - if (subsampled_signal_.back()->Dim() == 0) { - delete subsampled_signal_.back(); - subsampled_signal_.pop_back(); - } -} - -void MultiSinusoidDetector::AcceptWaveform( - const VectorBase &waveform) { - - - subsampled_signal_.push_back(new Vector()); - bool flush = false; - resampler_.Resample(waveform, flush, - subsampled_signal_.back()); - if (subsampled_signal_.back()->Dim() == 0) { - delete subsampled_signal_.back(); - subsampled_signal_.pop_back(); - } -} - -int32 MultiSinusoidDetector::NumSubsampledSamplesReady(int32 max_samp) const { - KALDI_ASSERT(samples_consumed_ >= 0 && - ((subsampled_signal_.empty() && samples_consumed_ == 0) || - (!subsampled_signal_.empty () && samples_consumed_ < - subsampled_signal_[0]->Dim()))); - - int32 ans = -samples_consumed_; - for (size_t i = 0; i < subsampled_signal_.size(); i++) { - ans += subsampled_signal_[i]->Dim(); - if (ans > max_samp) break; - } - KALDI_ASSERT(ans >= 0); - return std::min(ans, max_samp); -} - -bool MultiSinusoidDetector::Done() const { - int32 samp_ready = NumSubsampledSamplesReady(samples_per_frame_subsampled_); - if ((samp_ready >= samples_per_frame_subsampled_ && !waveform_finished_) || - (samp_ready > 0 && waveform_finished_)) - return false; - else - return true; -} - -void MultiSinusoidDetector::GetNextFrameOfSignal(Vector *frame) { - frame->Resize(samples_per_frame_subsampled_, kUndefined); - - int32 sample_offset = 0, - samples_needed = samples_per_frame_subsampled_; - while (samples_needed > 0 && - !subsampled_signal_.empty()) { - Vector *src = subsampled_signal_.front(); - int32 num_samples_avail = src->Dim() - samples_consumed_; - KALDI_ASSERT(num_samples_avail > 0); - int32 chunk_size = std::min(num_samples_avail, - samples_needed); - frame->Range(sample_offset, chunk_size).CopyFromVec( - src->Range(samples_consumed_, chunk_size)); - sample_offset += chunk_size; - samples_needed -= chunk_size; - samples_consumed_ += chunk_size; - if (samples_consumed_ == src->Dim()) { - samples_consumed_ = 0; - delete src; - subsampled_signal_.pop_front(); - } - } - if (samples_needed > 0) { - KALDI_ASSERT(waveform_finished_ && sample_offset > 0); // or code error. - frame->Range(sample_offset, samples_needed).SetZero(); - } -} - - -void MultiSinusoidDetector::GetNextFrame(MultiSinusoidDetectorOutput *output) { - Vector frame; - GetNextFrameOfSignal(&frame); - // Mean subtraction - frame.Add(-1.0 * frame.Sum() / frame.Dim()); - *output = MultiSinusoidDetectorOutput(); // reset to default. - - BaseFloat signal_energy = VecVec(frame, frame); - output->tot_energy = signal_energy / frame.Dim(); - if (signal_energy == 0.0) return; - - // min_energy1 is the lowest energy we might care about. - BaseFloat min_energy1 = signal_energy * - std::min(config_.two_freq_min_total_energy * 0.5, - config_.one_freq_min_energy); - - Sinusoid sinusoid1; - BaseFloat energy1 = detector_.DetectSinusoid(min_energy1, - frame, - &sinusoid1); - - if (energy1 == 0.0) return; // Nothing detected. - - // we only care about the 2nd sinusoid if - // energy1 + energy2 >= signal_energy * two_freq_min_total_energy, - // and energy2 >= signal_energy * config.two_freq_min_energy. - - BaseFloat min_energy2 = - std::max(signal_energy * config_.two_freq_min_energy, - signal_energy * config_.two_freq_min_total_energy - - energy1); - - BaseFloat energy2; - Sinusoid sinusoid2; - - // If there is enough energy left in the signal that we could - // possibly detect a sinusoid of energy at least min_energy2... - if (min_energy2 <= signal_energy - energy1) { - sinusoid1.phase += M_PI; // reverse the phase. - AddSinusoid(config_.subsample_freq, sinusoid1, &frame); - - - energy2 = detector_.DetectSinusoid(min_energy2, - frame, - &sinusoid2); - - if (energy2 > energy1) { - // The following is just for our information, so we are aware - // when the sinusoid detection gives us the non-optimal sinusoid - // first. - BaseFloat factor = energy2 / energy1; - KALDI_VLOG(2) << "Second sinusoid greater than first by a factor of " - << factor << ". (This means sinusoid detection is not " - << " working ideally)."; - } - - if (DetectedTwoFrequency(signal_energy, - sinusoid1, energy1, - sinusoid2, energy2, - output)) - return; - } else { - energy2 = 0.0; - } - // We don't need the return status of the following; we just return anyway. - DetectedOneFrequency(signal_energy, - sinusoid1, energy1, - sinusoid2, energy2, - output); -} - -// acceptable two-frequency tone. -bool MultiSinusoidDetector::DetectedTwoFrequency( - BaseFloat signal_energy, - const Sinusoid &sinusoid1, - BaseFloat energy1, - const Sinusoid &sinusoid2, - BaseFloat energy2, - MultiSinusoidDetectorOutput *output) { - - if (energy1 + energy2 >= signal_energy * - config_.two_freq_min_total_energy && - std::min(energy1, energy2) >= signal_energy * - config_.two_freq_min_energy && - std::min(sinusoid1.freq, sinusoid2.freq) >= config_.min_freq && - std::max(sinusoid1.freq, sinusoid2.freq) <= config_.max_freq) { - output->freq1 = sinusoid1.freq; - output->energy1 = energy1 / signal_energy; - output->freq2 = sinusoid2.freq; - output->energy2 = energy2 / signal_energy; - if (output->freq1 > output->freq2) { - std::swap(output->freq1, output->freq2); - std::swap(output->energy1, output->energy2); - } - return true; - } else { - return false; - } -} - - -// acceptable two-frequency tone. -bool MultiSinusoidDetector::DetectedOneFrequency( - BaseFloat signal_energy, - const Sinusoid &sinusoid1, - BaseFloat energy1, - const Sinusoid &sinusoid2, - BaseFloat energy2, - MultiSinusoidDetectorOutput *output) { - // If sinusoid detection were performing exactly to spec, we could assume - // energy1 >= energy2, but we don't assume this as it's not guaranteed. - if (energy1 > energy2 && energy1 > signal_energy * - config_.one_freq_min_energy && - sinusoid1.freq >= config_.min_freq && - sinusoid1.freq <= config_.max_freq) { - output->freq1 = sinusoid1.freq; - output->energy1 = energy1 / signal_energy; - output->freq2 = 0.0; - output->energy2 = 0.0; - return true; - } else if (energy2 > energy1 && energy2 > signal_energy * - config_.one_freq_min_energy && - sinusoid2.freq >= config_.min_freq && - sinusoid2.freq <= config_.max_freq) { - output->freq1 = sinusoid2.freq; - output->energy1 = energy2 / signal_energy; - output->freq2 = 0.0; - output->energy2 = 0.0; - return true; - } else { - return false; - } -} - - -void DetectSinusoids(const VectorBase &signal, - MultiSinusoidDetector *detector, - Matrix *output) { - std::vector output_vec; - detector->AcceptWaveform(signal); - detector->WaveformFinished(); - - int32 safety_margin = 10, approx_num_frames = safety_margin + - (signal.Dim() / (detector->SamplingFrequency() * - detector->FrameShiftSecs())); - output_vec.reserve(approx_num_frames); - while (!detector->Done()) { - output_vec.resize(output_vec.size() + 1); - detector->GetNextFrame(&(output_vec.back())); - } - detector->Reset(); - if (output_vec.empty()) { - output->Resize(0, 0); - } else { - output->Resize(output_vec.size(), 5, kUndefined); - for (int32 i = 0; i < output->NumRows(); i++) { - BaseFloat *row_data = output->RowData(i); - MultiSinusoidDetectorOutput &this_output = output_vec[i]; - row_data[0] = this_output.tot_energy; - row_data[1] = this_output.freq1; - row_data[2] = this_output.energy1; - row_data[3] = this_output.freq2; - row_data[4] = this_output.energy2; - } - } -} - - -} // namespace kaldi - diff --git a/src/feat/sinusoid-detection.h b/src/feat/sinusoid-detection.h deleted file mode 100644 index f6addc0b530..00000000000 --- a/src/feat/sinusoid-detection.h +++ /dev/null @@ -1,436 +0,0 @@ -// feat/sinusoid-detection.h - -// Copyright 2015 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_FEAT_SINUSOID_DETECTION_H_ -#define KALDI_FEAT_SINUSOID_DETECTION_H_ - - -#include "base/kaldi-error.h" -#include "matrix/matrix-lib.h" -#include "util/common-utils.h" -#include "feat/resample.h" -#include - -namespace kaldi { -/// @addtogroup feat FeatureExtraction -/// @{ - - -struct Sinusoid { - // this structure used to represent a sinusoid of type amplitude cos (2 pi - // freq t + phase), in the SinusoidDetector code. - BaseFloat amplitude; - BaseFloat freq; - BaseFloat phase; - Sinusoid(BaseFloat a, BaseFloat f, BaseFloat p): - amplitude(a), freq(f), phase(p) { } - Sinusoid() {} -}; - - -// This function adds the given sinusoid to the signal, as: -// (*signal)(t) += amplitude * cos(2 pi freq/samp_freq t + phase). -void AddSinusoid(BaseFloat samp_freq, - const Sinusoid &sinusoid, - VectorBase *signal); - - -class SinusoidDetector { - public: - SinusoidDetector(BaseFloat samp_freq, - int32 num_samp); - - - // Detect the dominant sinusoid component in the signal, as long as the - // energy-reduction of the signal from subtracting that sinuoid would be >= - // "min_energy_change", and return that energy reduction; or zero if no - // candidate was found. - // non-const because the FFT class has a temporary buffer. - BaseFloat DetectSinusoid(BaseFloat min_energy_change, - const VectorBase &signal, - Sinusoid *sinusoid); - - // This function does quadratic interpolation for a function that is known at - // three equally spaced points [x0 x1 x2] = [0 1 2], and we want the x-value - // and corresponding y-value at the maximum of the function within the range - // 0 <= x <= 2. It's public for testing reasons. - static void QuadraticMaximizeEqualSpaced( - BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat *x, BaseFloat *y); - - - // This function does quadratic interpolation for a function that is known at - // three points x0, x1 and x2 with x0 = 0, 0 < x1 < 1 and x2 = 1, where we - // want the x-value and corresponding y-value at the maximum of the function - // within the range 0 <= x <= 1. It's public for testing reasons. - static void QuadraticMaximize( - BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat *x, BaseFloat *y); - - // This function does quadratic interpolation for a function that is known at - // three points x0, x1 and x2 with x0 = 0, 0 <= x1 <= 1 and x2 = 1, where - // we want the value at a specific value x. The corresponding y-value is returned. - static BaseFloat QuadraticInterpolate( - BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2, - BaseFloat x); - - - private: - BaseFloat samp_freq_; - int32 num_samples_; - int32 num_samples_padded_; // Number of samples, after zero-padding to power of 2. - SplitRadixRealFft fft_; // Object used to compute FFT of padded_signal_. - - BaseFloat factor1_; // When we search the range between two FFT bins, we - // assume that the maximum energy-reduction within the - // range may be greater than the maximum of the - // energy-reductions at either side, by at most - // "factor1", with factor1 > 1.0. The analysis is quite - // hard so we determine this factor empirically. Making - // this as small as possible helps us avoid searching too - // many bins. - - BaseFloat factor2_; // As factor1, but for searches within a half-fft-bin - // range. Again determined empirically. After that we - // use quadratic interpolation to find the maximum energy. - - // This matrix, of dimension (num_samples_padded_ * 2 + 1) by - // num_samples_, has in each row, a different frequency of cosine wave. - Matrix cos_; - // This matrix, of dimension (num_samples_padded_ * 2 + 1) by - // num_samples_, has in each row, a different frequency of sine wave. - Matrix sin_; - - // M_ is a precomputed matrix of dimension (num_samples_padded_ * 2 + 1) by 3, - // containing the values x y z of a symmetric matrix [ a b; b c ]. There is - // one of these matrices for each frequency, sampled at one quarter the - // spacing of the FFT bins. There is a long comment next to the definition of - // ComputeCoefficients that describes this. - Matrix M_; - - // Minv_ is the coefficients in the same format as M_, but containing the - // corresponding coefficients of the inverse matrix. There is a long comment - // next to the definition of ComputeCoefficients that describes this. - Matrix Minv_; - - - struct InfoForBin { - bool valid; - BaseFloat cos_dot; // dot product of signal with cosine on left frequency - BaseFloat sin_dot; // dot product of signal with sine on left frequency - BaseFloat energy; // energy. - InfoForBin(): valid(false) { } - }; - - // Info after fine optimization within a bin. - struct OptimizedInfo { - int32 bin; - BaseFloat offset; - BaseFloat energy; - BaseFloat cos_coeff; - BaseFloat sin_coeff; - }; - - // Compute the coefficients and energies at the original FFT bins (every - // fourth entry in "info"). - void ComputeCoarseInfo(const Vector &fft, - std::vector *info) const; - - - // After the coarse-level info is computed using ComputeCoarseInfo, finds a - // set of intermediate bin indexes to compute, that are the midpoints of - // coarse-level bins. - void FindCandidateBins(BaseFloat min_energy, - const std::vector &info, - std::vector *bins) const; - - void FindCandidateBins2(BaseFloat min_energy, - const std::vector &info, - std::vector *bins) const; - - - void ComputeBinInfo(const VectorBase &signal, - int32 bin, InfoForBin *info) const; - - - // For each bin b such that we have valid "info" data for bins b, b+1 and b+2, - // does quadratic interpolation to find the maximum predicted energy in the - // range [b, b+2]. The location of the maximum predicted energy is output to - // "bin_out" and "offset_out", and the corresponding predicted energy is - // returned. - // - // Note: if there are two different frequencies with similar maximum energies - // (e.g. within a factor of probably around 1.2 or so), the fact that - // OptimizeFrequency only returns one maximum may potentially lead to the - // smaller maximum being output. We could have modified this to output - // multiple different maxima, which could have been more accurate in terms of - // being guaranteed to output the best maximum, but this probably wouldn't - // have a measurable impact on our application so we haven't bothered. - BaseFloat OptimizeFrequency( - const std::vector &info, - int32 *bin_out, - BaseFloat *offset_out) const; - - - // This function does - // (*cos)(t) = cos(2 pi t freq / samp_freq) - // (*sin)(t) = sin(2 pi t freq / samp_freq) - static void CreateCosAndSin(BaseFloat samp_freq, - BaseFloat freq, - VectorBase *cos, - VectorBase *sin); - - // Do fine optimization of the frequency within a bin, given a reasonable - // approximate position within it based on interpolation (that should be close - // to the optimum). - void FineOptimizeFrequency( - const VectorBase &signal, - int32 bin, - BaseFloat offset, - std::vector *info, - OptimizedInfo *opt_info) const; - - // Computes the coefficients cos_, sin_, and Minv_. - void ComputeCoefficients(); - - // Calls some self-testing code that prints warnings if - // some of our assumptions were wrong. - void SelfTest(const VectorBase &signal, - const std::vector &info, - BaseFloat final_freq, - BaseFloat final_energy); - -}; - - - -/** - This configuration class is for the frame-by-frame detection of - cases where there are one or two sinusoids that can explain - a lot of the energy in the signal. -*/ -struct MultiSinusoidDetectorConfig { - - // frame length in milliseconds - BaseFloat frame_length_ms; - // frame shift in milliseconds - BaseFloat frame_shift_ms; - - // Proportion of the total energy of the signal that the quieter of - // the two sinusoids must comprise, in order to be counted, if two - // sinusoids are detected. - BaseFloat two_freq_min_energy; - - // Proportion of the total energy of the signal that both sinusoids (if - // two are detected) must comprise, in order to be output. - BaseFloat two_freq_min_total_energy; - - // Proportion of the total energy of the signal that a single sinusoid - // must comprise, in order to be output, if we are considering - // reporting a single sinusoid. Note: detection of two sinusoids - // will take precedence over detection of a single sinusoid. - BaseFloat one_freq_min_energy; - - // Lower end of frequency range that we consider; frequencies outside - // this range are not candidates to appear in the detected output. - BaseFloat min_freq; - // Upper end of frequency range that we consider, see min_freq. - BaseFloat max_freq; - - // Frequency to which we subsample the signal before processing it. - // Must be integer because of how LinearResample code works. - int32 subsample_freq; - - // Filter cut-off frequency used in sub-sampling. - BaseFloat subsample_filter_cutoff; - - // the following is not critical and is not exported to the - // command line. - int32 subsample_filter_zeros; - - MultiSinusoidDetectorConfig(): - frame_length_ms(20), frame_shift_ms(10), - two_freq_min_energy(0.2), two_freq_min_total_energy(0.6), - one_freq_min_energy(0.75), min_freq(300.0), - max_freq(1800.0), subsample_freq(4000), - subsample_filter_cutoff(1900.0), subsample_filter_zeros(5) {} - - void Register(OptionsItf *opts) { - opts->Register("frame-length", &frame_length_ms, - "Frame length in milliseconds"); - opts->Register("frame-shift", &frame_shift_ms, - "Frame shift in milliseconds"); - opts->Register("two-freq-min-energy", &two_freq_min_energy, - "For detecting two-frequency tones, minimum energy that " - "the quieter frequency must have (relative to total " - "enegy of frame)"); - opts->Register("two-freq-min-total-energy", &two_freq_min_total_energy, - "For detecting two-frequency tones, minimum energy that " - "the two frequencies together must have (relative to total " - "energy of frame)"); - opts->Register("one-freq-min-energy", &one_freq_min_energy, "For detecting " - "single-frequency tones, minimum energy that the frequency " - "must have relative to total energy of frame"); - opts->Register("min-freq", &min_freq, "Minimum frequency of sinusoid that " - "will be detected"); - opts->Register("max-freq", &max_freq, "Maximum frequency of sinusoid that " - "will be detected"); - opts->Register("subsample-freq", &subsample_freq, "Frequency at which " - "we subsample the signal"); - opts->Register("subsample-filter-cutoff", &subsample_filter_cutoff, "Filter " - "cut-off frequency used in subsampling"); - } - void Check() const { - KALDI_ASSERT(frame_length_ms > 0 && frame_length_ms >= frame_shift_ms && - min_freq > 0 && max_freq > min_freq && - subsample_filter_cutoff > max_freq && - subsample_freq/2 > subsample_filter_cutoff && - subsample_filter_zeros > 2 && - subsample_filter_cutoff > 0.25 * subsample_freq && - two_freq_min_total_energy > two_freq_min_energy && - two_freq_min_energy <= 0.5 * two_freq_min_total_energy); - BaseFloat samples_per_frame_shift = - frame_shift_ms * 0.001 * subsample_freq; - // The following assert ensures that the frame-shift is an exact - // number of samples, so that the locations of the frames - // don't gradually drift out of sync. - KALDI_ASSERT(fabs(samples_per_frame_shift - - static_cast(samples_per_frame_shift)) < - 0.001); - - } -}; - -struct MultiSinusoidDetectorOutput { - BaseFloat tot_energy; // Total energy per sample of this frame (sum-square of - // signal divided by number of samples... this is after - // downsampling and mean subtraction. - BaseFloat freq1; // Lower frequency detected, or 0 if none detected. - BaseFloat energy1; // Energy of lower frequency divided by total energy, or 0 - // if none detected. - BaseFloat freq2; // Lower frequency detected, or 0 if zero or one - // frequencies detected. - BaseFloat energy2; // Energy of higher frequency divided by total energy, or 0 - // if zero or one freqencies detected. - MultiSinusoidDetectorOutput(): tot_energy(0.0), freq1(0.0), - energy1(0.0), freq2(0.0), energy2(0.0) { } -}; - - -class MultiSinusoidDetector { - public: - - // Initialize sinusoid detector. Sampling frequency must be integer. - MultiSinusoidDetector(const MultiSinusoidDetectorConfig &config, - int32 sampling_freq); - - /// This is how the class acccepts its input. You can put the waveform in - /// piece by piece, if it's an online application. - void AcceptWaveform(const VectorBase &waveform); - - /// The user calls this to announce to the class that the waveform has ended; - /// this forces any pending data to be flushed. - void WaveformFinished(); - - /// Resets the state of the class so you can start processing another waveform. - void Reset(); - - /// This returns true if the class currently has no more data ready to output. - bool Done() const; - - /// Outputs the next frame of output to "frame", which must be non-NULL. - /// It is an error to call this if Done() has returned true, or has not been - /// checked. - void GetNextFrame(MultiSinusoidDetectorOutput *output); - - BaseFloat FrameShiftSecs() const { return 0.001 * config_.frame_shift_ms; } - - BaseFloat SamplingFrequency() const { return sample_freq_; } - - private: - // Gets the next frame of subsampled signal, and consumes the appropriate - // amount of stored data. It is an error to call this if Done() returned - // true. - void GetNextFrameOfSignal(Vector *frame); - - // returns true and sets freq1, freq1, energy1 and energy2 in "output" if we - // successfully detected an acceptable two-frequency tone. - bool DetectedTwoFrequency(BaseFloat signal_energy, - const Sinusoid &sinusoid1, - BaseFloat energy1, - const Sinusoid &sinusoid2, - BaseFloat energy2, - MultiSinusoidDetectorOutput *output); - - // returns true and sets freq1, freq1, energy1 and energy2 in "output" if we - // successfully detected an acceptable one-frequency tone. - bool DetectedOneFrequency(BaseFloat signal_energy, - const Sinusoid &sinusoid1, - BaseFloat energy1, - const Sinusoid &sinusoid2, - BaseFloat energy2, - MultiSinusoidDetectorOutput *output); - - - // Returns std::min(max_samp, sum-of-samples-in-subsampled_signal_). - // (the std::min is for efficiency so we don't have to visit the - // whole list). - int32 NumSubsampledSamplesReady(int32 max_samp) const; - - MultiSinusoidDetectorConfig config_; - int32 sample_freq_; - int32 samples_per_frame_subsampled_; // (samples per frame at subsampled - // rate). - - // True if the user has called WaveformFinished(). - bool waveform_finished_; - - // Pieces of the subsampled signal that are awaiting processing. - // Normally there will be just one element here, but if someone calls - // AcceptWaveform multiple times before getting output, there could - // be more elements. All of these pieces are nonempty. - std::deque* > subsampled_signal_; - - // stores the number of samples consumed from the first member of - // subsampled_signal_. We will always have samples_consumed_ >= 0 and either - // (subsampled_signal_.empty() && samples_consumed_ == 0) or - // samples_consumed_ < subsampled_signal_[0]->Dim(). - int32 samples_consumed_; - - - // This object is used to subsample the signal. - LinearResample resampler_; - - // This object is used to detect sinusoids in the subsampled - // frames. - SinusoidDetector detector_; -}; - -// Detect sinusoids. Signal should be sampled at detector->SamplingFrequency(). -void DetectSinusoids(const VectorBase &signal, - MultiSinusoidDetector *detector, - Matrix *output); - - - - - -/// @} End of "addtogroup feat" -} // namespace kaldi -#endif // KALDI_FEAT_SINUSOID_DETECTION_H_ diff --git a/src/featbin/Makefile b/src/featbin/Makefile index dc2bea215d8..c51867b7d4c 100644 --- a/src/featbin/Makefile +++ b/src/featbin/Makefile @@ -14,8 +14,8 @@ BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats \ apply-cmvn-sliding compute-cmvn-stats-two-channel compute-kaldi-pitch-feats \ process-kaldi-pitch-feats compare-feats wav-to-duration add-deltas-sdc \ compute-and-process-kaldi-pitch-feats modify-cmvn-stats wav-copy \ - wav-reverberate append-vector-to-feats detect-sinusoids shift-feats \ - concat-feats append-post-to-feats post-to-feats + wav-reverberate append-vector-to-feats shift-feats concat-feats \ + append-post-to-feats post-to-feats OBJFILES = diff --git a/src/featbin/detect-sinusoids.cc b/src/featbin/detect-sinusoids.cc deleted file mode 100644 index 6c104d5ab5f..00000000000 --- a/src/featbin/detect-sinusoids.cc +++ /dev/null @@ -1,113 +0,0 @@ -// featbin/detect-sinusoids.cc - -// Copyright 2015 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "feat/sinusoid-detection.h" -#include "feat/wave-reader.h" - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - const char *usage = - "Detect sinusoids (one or two at a time) in waveform input and output\n" - "frame-by-frame information on their frequencies and energies. Useful\n" - "as part of DTMF and dialtone detection. Output is an archive of\n" - "matrices; for each file, there is a row per frame, containing\n" - " \n" - "where the frequencies and energies may be zero if no sufficiently\n" - "dominant sinusoid(s) was/were detected. If two frequencies were\n" - "detected, frequency1 < frequency2. See options for more detail on\n" - "configuration options.\n" - "\n" - "Usage: detect-sinusoids [options] \n" - "e.g.: detect-sinusoids scp:wav.scp ark,t:sinusoids.ark\n"; - - ParseOptions po(usage); - MultiSinusoidDetectorConfig config; - - config.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 2) { - po.PrintUsage(); - exit(1); - } - - std::string wav_rspecifier = po.GetArg(1), - matrix_wspecifier = po.GetArg(2); - - int32 num_done = 0, num_err = 0; - - SequentialTableReader wav_reader(wav_rspecifier); - BaseFloatMatrixWriter matrix_writer(matrix_wspecifier); - - MultiSinusoidDetector *detector = NULL; - - for (; !wav_reader.Done(); wav_reader.Next()) { - const WaveData &wav_data = wav_reader.Value(); - const Matrix &data = wav_data.Data(); - BaseFloat samp_freq = wav_data.SampFreq(); - int32 num_channels = data.NumRows(); - if (num_channels != 1) { - KALDI_WARN << "detect-sinusoids requires data with one " - << "channel. Recording " << wav_reader.Key() << " has " - << num_channels << ". First select one channel of your " - << "data (e.g. using sox)"; - num_err++; - continue; - } - if (samp_freq < config.subsample_freq) { - KALDI_WARN << "Sampling frequency of data " << wav_reader.Key() - << " is too low " << samp_freq << " < " - << config.subsample_freq << ". Reduce --subsample-freq " - << "if you want to run on this data."; - num_err++; - continue; - } - - if (detector == NULL || - samp_freq != detector->SamplingFrequency()) { - delete detector; - detector = new MultiSinusoidDetector(config, samp_freq); - } - - Matrix output; - DetectSinusoids(data.Row(0), detector, &output); - - if (output.NumRows() == 0) { - KALDI_WARN << "No output for " << wav_reader.Key(); - num_err++; - } else { - matrix_writer.Write(wav_reader.Key(), output); - num_done++; - } - } - delete detector; - KALDI_LOG << "Detected sinusoids in " << num_done << " wave files," - << num_err << " with errors."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - diff --git a/src/sgmm/Makefile b/src/sgmm/Makefile deleted file mode 100644 index 26996a13116..00000000000 --- a/src/sgmm/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -all: - -OPENFST_CXXFLAGS = -OPENFST_LDLIBS = -include ../kaldi.mk - -TESTFILES = am-sgmm-test estimate-am-sgmm-test fmllr-sgmm-test \ - estimate-am-sgmm-multi-test - -OBJFILES = am-sgmm.o estimate-am-sgmm.o fmllr-sgmm.o sgmm-clusterable.o \ - estimate-am-sgmm-ebw.o estimate-am-sgmm-multi.o decodable-am-sgmm.o - -LIBNAME = kaldi-sgmm -ADDLIBS = ../hmm/kaldi-hmm.a ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ - ../tree/kaldi-tree.a ../util/kaldi-util.a ../thread/kaldi-thread.a \ - ../matrix/kaldi-matrix.a ../base/kaldi-base.a - -include ../makefiles/default_rules.mk diff --git a/src/sgmm/am-sgmm-test.cc b/src/sgmm/am-sgmm-test.cc deleted file mode 100644 index 8b463a29f0e..00000000000 --- a/src/sgmm/am-sgmm-test.cc +++ /dev/null @@ -1,278 +0,0 @@ -// sgmm/am-sgmm-test.cc - -// Copyright 2012 Arnab Ghoshal -// Copyright 2009-2011 Saarland University - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "gmm/model-test-common.h" -#include "sgmm/am-sgmm.h" -#include "util/kaldi-io.h" - -using kaldi::AmSgmm; -using kaldi::int32; -using kaldi::BaseFloat; -namespace ut = kaldi::unittest; - -// Tests the initialization routines: InitializeFromFullGmm(), CopyFromSgmm() -// and CopyGlobalsInitVecs(). -void TestSgmmInit(const AmSgmm &sgmm) { - using namespace kaldi; - int32 dim = sgmm.FeatureDim(); - kaldi::SgmmGselectConfig config; - config.full_gmm_nbest = std::min(config.full_gmm_nbest, sgmm.NumGauss()); - - kaldi::Vector feat(dim); - for (int32 d = 0; d < dim; d++) { - feat(d) = kaldi::RandGauss(); - } - kaldi::SgmmPerFrameDerivedVars frame_vars; - frame_vars.Resize(sgmm.NumGauss(), sgmm.FeatureDim(), - sgmm.PhoneSpaceDim()); - - std::vector gselect; - sgmm.GaussianSelection(config, feat, &gselect); - SgmmPerSpkDerivedVars empty; - SgmmPerFrameDerivedVars per_frame; - sgmm.ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike = sgmm.LogLikelihood(per_frame, 0); - - // First, test the CopyFromSgmm() method: - AmSgmm *sgmm1 = new AmSgmm(); - sgmm1->CopyFromSgmm(sgmm, true); - sgmm1->GaussianSelection(config, feat, &gselect); - sgmm1->ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike1 = sgmm1->LogLikelihood(per_frame, 0); - kaldi::AssertEqual(loglike, loglike1, 1e-4); - delete sgmm1; - - AmSgmm *sgmm2 = new AmSgmm(); - sgmm2->CopyFromSgmm(sgmm, false); - sgmm2->ComputeNormalizers(); - sgmm2->GaussianSelection(config, feat, &gselect); - sgmm2->ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike2 = sgmm2->LogLikelihood(per_frame, 0); - kaldi::AssertEqual(loglike, loglike2, 1e-4); - delete sgmm2; - - // Next, initialize using the UBM from the current model - AmSgmm *sgmm3 = new AmSgmm(); - sgmm3->InitializeFromFullGmm(sgmm.full_ubm(), sgmm.NumPdfs(), - sgmm.PhoneSpaceDim(), sgmm.SpkSpaceDim()); - sgmm3->ComputeNormalizers(); - sgmm3->GaussianSelection(config, feat, &gselect); - sgmm3->ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike3 = sgmm3->LogLikelihood(per_frame, 0); - kaldi::AssertEqual(loglike, loglike3, 1e-4); - delete sgmm3; - - // Finally, copy the global parameters from the current model - AmSgmm *sgmm4 = new AmSgmm(); - sgmm4->CopyGlobalsInitVecs(sgmm, sgmm.PhoneSpaceDim(), sgmm.SpkSpaceDim(), - sgmm.NumPdfs()); - sgmm4->ComputeNormalizers(); - sgmm4->GaussianSelection(config, feat, &gselect); - sgmm4->ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike4 = sgmm4->LogLikelihood(per_frame, 0); - kaldi::AssertEqual(loglike, loglike4, 1e-4); - delete sgmm4; -} - -// Tests the Read() and Write() methods, in both binary and ASCII mode, as well -// as Check(), and methods in likelihood computations. -void TestSgmmIO(const AmSgmm &sgmm) { - using namespace kaldi; - int32 dim = sgmm.FeatureDim(); - kaldi::SgmmGselectConfig config; - config.full_gmm_nbest = std::min(config.full_gmm_nbest, sgmm.NumGauss()); - - kaldi::Vector feat(dim); - for (int32 d = 0; d < dim; d++) { - feat(d) = kaldi::RandGauss(); - } - kaldi::SgmmPerFrameDerivedVars frame_vars; - frame_vars.Resize(sgmm.NumGauss(), sgmm.FeatureDim(), - sgmm.PhoneSpaceDim()); - - std::vector gselect; - sgmm.GaussianSelection(config, feat, &gselect); - SgmmPerSpkDerivedVars empty; - SgmmPerFrameDerivedVars per_frame; - sgmm.ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike = sgmm.LogLikelihood(per_frame, 0); - - // First, non-binary write - sgmm.Write(kaldi::Output("tmpf", false).Stream(), false, - kaldi::kSgmmWriteAll); - - bool binary_in; - AmSgmm *sgmm1 = new AmSgmm(); - // Non-binary read - kaldi::Input ki1("tmpf", &binary_in); - sgmm1->Read(ki1.Stream(), binary_in); - sgmm1->Check(true); - sgmm1->GaussianSelection(config, feat, &gselect); - sgmm1->ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - - BaseFloat loglike1 = sgmm1->LogLikelihood(per_frame, 0); - kaldi::AssertEqual(loglike, loglike1, 1e-4); - - // Next, binary write - sgmm1->Write(kaldi::Output("tmpfb", true).Stream(), true, - kaldi::kSgmmWriteAll); - delete sgmm1; - - AmSgmm *sgmm2 = new AmSgmm(); - // Binary read - kaldi::Input ki2("tmpfb", &binary_in); - sgmm2->Read(ki2.Stream(), binary_in); - sgmm2->Check(true); - sgmm2->GaussianSelection(config, feat, &gselect); - sgmm2->ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike2 = sgmm2->LogLikelihood(per_frame, 0); - kaldi::AssertEqual(loglike, loglike2, 1e-4); - delete sgmm2; - - unlink("tmpf"); - unlink("tmpfb"); -} - -void TestSgmmSubstates(const AmSgmm &sgmm) { - using namespace kaldi; - int32 target_substates = 2 * sgmm.NumPdfs(); - kaldi::Vector occs(sgmm.NumPdfs()); - for (int32 i = 0; i < occs.Dim(); i++) - occs(i) = std::fabs(kaldi::RandGauss()) * (kaldi::RandUniform()+1); - AmSgmm *sgmm1 = new AmSgmm(); - sgmm1->CopyFromSgmm(sgmm, false); - sgmm1->SplitSubstates(occs, target_substates, 0.01, 0.2, 1000); - sgmm1->ComputeNormalizers(); - sgmm1->Check(true); - int32 dim = sgmm.FeatureDim(); - kaldi::SgmmGselectConfig config; - config.full_gmm_nbest = std::min(config.full_gmm_nbest, sgmm.NumGauss()); - kaldi::Vector feat(dim); - for (int32 d = 0; d < dim; d++) { - feat(d) = kaldi::RandGauss(); - } - - std::vector gselect; - sgmm.GaussianSelection(config, feat, &gselect); - - SgmmPerSpkDerivedVars empty; - SgmmPerFrameDerivedVars per_frame; - sgmm.ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike = sgmm.LogLikelihood(per_frame, 0); - - sgmm1->GaussianSelection(config, feat, &gselect); - sgmm1->ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike1 = sgmm1->LogLikelihood(per_frame, 0); - kaldi::AssertEqual(loglike, loglike1, 1e-2); - - delete sgmm1; -} - -void TestSgmmIncreaseDim(const AmSgmm &sgmm) { - using namespace kaldi; - int32 target_phn_dim = static_cast(1.5 * sgmm.PhoneSpaceDim()); - int32 target_spk_dim = sgmm.PhoneSpaceDim() - 1; - - int32 dim = sgmm.FeatureDim(); - kaldi::SgmmGselectConfig config; - config.full_gmm_nbest = std::min(config.full_gmm_nbest, sgmm.NumGauss()); - kaldi::Vector feat(dim); - for (int32 d = 0; d < dim; d++) { - feat(d) = kaldi::RandGauss(); - } - kaldi::SgmmPerFrameDerivedVars frame_vars; - - std::vector gselect; - sgmm.GaussianSelection(config, feat, &gselect); - SgmmPerSpkDerivedVars empty; - SgmmPerFrameDerivedVars per_frame; - sgmm.ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike = sgmm.LogLikelihood(per_frame, 0); - - kaldi::Matrix norm_xform; - kaldi::ComputeFeatureNormalizer(sgmm.full_ubm(), &norm_xform); - AmSgmm *sgmm1 = new AmSgmm(); - sgmm1->CopyFromSgmm(sgmm, false); - sgmm1->Check(true); - sgmm1->IncreasePhoneSpaceDim(target_phn_dim, norm_xform); - sgmm1->ComputeNormalizers(); - sgmm1->Check(true); - - - sgmm1->GaussianSelection(config, feat, &gselect); - sgmm1->ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike1 = sgmm1->LogLikelihood(per_frame, 0); - kaldi::AssertEqual(loglike, loglike1, 1e-4); - - sgmm1->IncreaseSpkSpaceDim(target_spk_dim, norm_xform); - sgmm1->Check(true); - sgmm1->GaussianSelection(config, feat, &gselect); - sgmm1->ComputePerFrameVars(feat, gselect, empty, 0.0, &per_frame); - BaseFloat loglike2 = sgmm1->LogLikelihood(per_frame, 0); - kaldi::AssertEqual(loglike, loglike2, 1e-4); - delete sgmm1; -} - -void TestSgmmPreXform(const AmSgmm &sgmm) { - kaldi::Matrix xform, inv_xform; - kaldi::Vector diag_scatter; - kaldi::Vector occs(sgmm.NumPdfs()); - occs.Set(100); - sgmm.ComputeFmllrPreXform(occs, &xform, &inv_xform, &diag_scatter); - int32 dim = xform.NumRows(); - kaldi::SubMatrix a_pre(xform, 0, dim, 0, dim), - a_inv(inv_xform, 0, dim, 0, dim); - kaldi::Vector b_pre(dim), b_inv(dim); - b_pre.CopyColFromMat(xform, dim); - b_inv.CopyColFromMat(inv_xform, dim); - kaldi::Matrix res_mat(dim, dim, kaldi::kSetZero); - res_mat.AddMatMat(1.0, a_pre, kaldi::kNoTrans, a_inv, kaldi::kNoTrans, 0.0); - KALDI_ASSERT(res_mat.IsUnit(1.0e-6)); - kaldi::Vector res_vec(dim, kaldi::kSetZero); - res_vec.AddMatVec(1.0, a_inv, kaldi::kNoTrans, b_pre, 0.0); - res_vec.AddVec(1.0, b_inv); - KALDI_ASSERT(res_vec.IsZero(1.0e-6)); -} - -void UnitTestSgmm() { - size_t dim = 1 + kaldi::RandInt(0, 9); // random dimension of the gmm - size_t num_comp = 3 + kaldi::RandInt(0, 9); // random number of mixtures; - // make sure it's more than one or we get errors initializing the SGMM. - kaldi::FullGmm full_gmm; - ut::InitRandFullGmm(dim, num_comp, &full_gmm); - - size_t num_states = 1; - AmSgmm sgmm; - kaldi::SgmmGselectConfig config; - sgmm.InitializeFromFullGmm(full_gmm, num_states, dim+1, 0); - sgmm.ComputeNormalizers(); - TestSgmmInit(sgmm); - TestSgmmIO(sgmm); - TestSgmmSubstates(sgmm); - TestSgmmIncreaseDim(sgmm); - TestSgmmPreXform(sgmm); -} - -int main() { - for (int i = 0; i < 10; i++) - UnitTestSgmm(); - std::cout << "Test OK.\n"; - return 0; -} diff --git a/src/sgmm/am-sgmm.cc b/src/sgmm/am-sgmm.cc deleted file mode 100644 index 1cd7c6a2b62..00000000000 --- a/src/sgmm/am-sgmm.cc +++ /dev/null @@ -1,1395 +0,0 @@ -// sgmm/am-sgmm.cc - -// Copyright 2009-2011 Microsoft Corporation; Lukas Burget; -// Saarland University (Author: Arnab Ghoshal); -// Ondrej Glembek; Yanmin Qian; -// Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey) -// Liang Lu; Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include "sgmm/am-sgmm.h" -#include "thread/kaldi-thread.h" - -namespace kaldi { -using std::vector; - -// This function needs to be added because std::generate is complaining -// about RandGauss(), which takes an optional arguments. -static inline float _RandGauss() -{ - return RandGauss(); -} - -void AmSgmm::Read(std::istream &in_stream, bool binary) { - int32 num_states, feat_dim, num_gauss; - std::string token; - - ExpectToken(in_stream, binary, ""); - ExpectToken(in_stream, binary, ""); - ReadBasicType(in_stream, binary, &num_states); - ExpectToken(in_stream, binary, ""); - ReadBasicType(in_stream, binary, &feat_dim); - KALDI_ASSERT(num_states > 0 && feat_dim > 0); - - ReadToken(in_stream, binary, &token); - - while (token != "") { - if (token == "") { - diag_ubm_.Read(in_stream, binary); - } else if (token == "") { - full_ubm_.Read(in_stream, binary); - } else if (token == "") { - ExpectToken(in_stream, binary, ""); - ReadBasicType(in_stream, binary, &num_gauss); - SigmaInv_.resize(num_gauss); - for (int32 i = 0; i < num_gauss; i++) { - SigmaInv_[i].Read(in_stream, binary); - } - } else if (token == "") { - ExpectToken(in_stream, binary, ""); - ReadBasicType(in_stream, binary, &num_gauss); - M_.resize(num_gauss); - for (int32 i = 0; i < num_gauss; i++) { - M_[i].Read(in_stream, binary); - } - } else if (token == "") { - ExpectToken(in_stream, binary, ""); - ReadBasicType(in_stream, binary, &num_gauss); - N_.resize(num_gauss); - for (int32 i = 0; i < num_gauss; i++) { - N_[i].Read(in_stream, binary); - } - } else if (token == "") { - w_.Read(in_stream, binary); - } else if (token == "") { - v_.resize(num_states); - for (int32 j = 0; j < num_states; j++) { - v_[j].Read(in_stream, binary); - } - } else if (token == "") { - c_.resize(num_states); - for (int32 j = 0; j < num_states; j++) { - c_[j].Read(in_stream, binary); - } - } else if (token == "") { - n_.resize(num_states); - for (int32 j = 0; j < num_states; j++) { - n_[j].Read(in_stream, binary); - } - // The following are the Gaussian prior parameters for MAP adaptation of M - // They may be moved to somewhere else eventually. - } else if (token == "") { - ExpectToken(in_stream, binary, ""); - ReadBasicType(in_stream, binary, &num_gauss); - M_prior_.resize(num_gauss); - for (int32 i = 0; i < num_gauss; i++) { - M_prior_[i].Read(in_stream, binary); - } - } else if (token == "") { - row_cov_inv_.Read(in_stream, binary); - } else if (token == "") { - col_cov_inv_.Read(in_stream, binary); - } else { - KALDI_ERR << "Unexpected token '" << token << "' in model file "; - } - ReadToken(in_stream, binary, &token); - } - - if (n_.empty()) { - ComputeNormalizers(); - } -} - -void AmSgmm::Write(std::ostream &out_stream, bool binary, - SgmmWriteFlagsType write_params) const { - int32 num_states = NumPdfs(), - feat_dim = FeatureDim(), - num_gauss = NumGauss(); - - WriteToken(out_stream, binary, ""); - if (!binary) out_stream << "\n"; - WriteToken(out_stream, binary, ""); - WriteBasicType(out_stream, binary, num_states); - WriteToken(out_stream, binary, ""); - WriteBasicType(out_stream, binary, feat_dim); - if (!binary) out_stream << "\n"; - - if (write_params & kSgmmBackgroundGmms) { - WriteToken(out_stream, binary, ""); - diag_ubm_.Write(out_stream, binary); - WriteToken(out_stream, binary, ""); - full_ubm_.Write(out_stream, binary); - } - - if (write_params & kSgmmGlobalParams) { - WriteToken(out_stream, binary, ""); - WriteToken(out_stream, binary, ""); - WriteBasicType(out_stream, binary, num_gauss); - if (!binary) out_stream << "\n"; - for (int32 i = 0; i < num_gauss; i++) { - SigmaInv_[i].Write(out_stream, binary); - } - WriteToken(out_stream, binary, ""); - WriteToken(out_stream, binary, ""); - WriteBasicType(out_stream, binary, num_gauss); - if (!binary) out_stream << "\n"; - for (int32 i = 0; i < num_gauss; i++) { - M_[i].Write(out_stream, binary); - } - if (N_.size() != 0) { - WriteToken(out_stream, binary, ""); - WriteToken(out_stream, binary, ""); - WriteBasicType(out_stream, binary, num_gauss); - if (!binary) out_stream << "\n"; - for (int32 i = 0; i < num_gauss; i++) { - N_[i].Write(out_stream, binary); - } - } - WriteToken(out_stream, binary, ""); - w_.Write(out_stream, binary); - - // The following are the Gaussian prior parameters for MAP adaptation of M. - // They may be moved to somewhere else eventually. - if (M_prior_.size() != 0) { - WriteToken(out_stream, binary, ""); - WriteToken(out_stream, binary, ""); - WriteBasicType(out_stream, binary, num_gauss); - if (!binary) out_stream << "\n"; - for (int32 i = 0; i < num_gauss; i++) { - M_prior_[i].Write(out_stream, binary); - } - - KALDI_ASSERT(row_cov_inv_.NumRows() != 0 && - "Empty row covariance for MAP prior"); - WriteToken(out_stream, binary, ""); - if (!binary) out_stream << "\n"; - row_cov_inv_.Write(out_stream, binary); - - KALDI_ASSERT(col_cov_inv_.NumRows() != 0 && - "Empty column covariance for MAP prior"); - WriteToken(out_stream, binary, ""); - if (!binary) out_stream << "\n"; - col_cov_inv_.Write(out_stream, binary); - } - // end priors for MAP adaptation - } - - if (write_params & kSgmmStateParams) { - WriteToken(out_stream, binary, ""); - for (int32 j = 0; j < num_states; j++) { - v_[j].Write(out_stream, binary); - } - WriteToken(out_stream, binary, ""); - for (int32 j = 0; j < num_states; j++) { - c_[j].Write(out_stream, binary); - } - } - - if (write_params & kSgmmNormalizers) { - WriteToken(out_stream, binary, ""); - if (n_.empty()) - KALDI_WARN << "Not writing normalizers since they are not present."; - else - for (int32 j = 0; j < num_states; j++) - n_[j].Write(out_stream, binary); - } - - WriteToken(out_stream, binary, ""); -} - -void AmSgmm::Check(bool show_properties) { - int32 num_states = NumPdfs(), - num_gauss = NumGauss(), - feat_dim = FeatureDim(), - phn_dim = PhoneSpaceDim(), - spk_dim = SpkSpaceDim(); - - if (show_properties) - KALDI_LOG << "AmSgmm: #states = " << num_states << ", #Gaussians = " - << num_gauss << ", feature dim = " << feat_dim - << ", phone-space dim =" << phn_dim - << ", speaker-space dim =" << spk_dim; - KALDI_ASSERT(num_states > 0 && num_gauss > 0 && feat_dim > 0 && phn_dim > 0); - - std::ostringstream debug_str; - - // First check the diagonal-covariance UBM. - KALDI_ASSERT(diag_ubm_.NumGauss() == num_gauss); - KALDI_ASSERT(diag_ubm_.Dim() == feat_dim); - - // Check the full-covariance UBM. - KALDI_ASSERT(full_ubm_.NumGauss() == num_gauss); - KALDI_ASSERT(full_ubm_.Dim() == feat_dim); - - // Check the globally-shared covariance matrices. - KALDI_ASSERT(SigmaInv_.size() == static_cast(num_gauss)); - for (int32 i = 0; i < num_gauss; i++) { - KALDI_ASSERT(SigmaInv_[i].NumRows() == feat_dim && - SigmaInv_[i](0, 0) > 0.0); // or it wouldn't be +ve definite. - } - - KALDI_ASSERT(M_.size() == static_cast(num_gauss)); - for (int32 i = 0; i < num_gauss; i++) { - KALDI_ASSERT(M_[i].NumRows() == feat_dim && M_[i].NumCols() == phn_dim); - } - - KALDI_ASSERT(w_.NumRows() == num_gauss && w_.NumCols() == phn_dim); - - { // check v, c. - KALDI_ASSERT(v_.size() == static_cast(num_states) && - c_.size() == static_cast(num_states)); - int32 nSubstatesTot = 0; - for (int32 j = 0; j < num_states; j++) { - int32 M_j = NumSubstates(j); - nSubstatesTot += M_j; - KALDI_ASSERT(M_j > 0 && v_[j].NumRows() == M_j && - c_[j].Dim() == M_j && v_[j].NumCols() == phn_dim); - } - debug_str << "Substates: "<< (nSubstatesTot) << ". "; - } - - // check n. - if (n_.size() == 0) { - debug_str << "Normalizers: no. "; - } else { - debug_str << "Normalizers: yes. "; - KALDI_ASSERT(n_.size() == static_cast(num_states)); - for (int32 j = 0; j < num_states; j++) { - KALDI_ASSERT(n_[j].NumRows() == num_gauss && - n_[j].NumCols() == NumSubstates(j)); - } - } - - if (show_properties) - KALDI_LOG << "Subspace GMM model properties: " << debug_str.str(); -} - -void AmSgmm::InitializeFromFullGmm(const FullGmm &full_gmm, - int32 num_states, - int32 phn_subspace_dim, - int32 spk_subspace_dim) { - full_ubm_.CopyFromFullGmm(full_gmm); - diag_ubm_.CopyFromFullGmm(full_gmm); - if (phn_subspace_dim < 1 || phn_subspace_dim > full_gmm.Dim() + 1) { - KALDI_WARN << "Initial phone-subspace dimension must be in [1, " - << full_gmm.Dim() + 1 << "]. Changing from " << phn_subspace_dim - << " to " << full_gmm.Dim() + 1; - phn_subspace_dim = full_gmm.Dim() + 1; - } - if (spk_subspace_dim < 0 || spk_subspace_dim > full_gmm.Dim()) { - KALDI_WARN << "Initial spk-subspace dimension must be in [1, " - << full_gmm.Dim() << "]. Changing from " << spk_subspace_dim - << " to " << full_gmm.Dim(); - spk_subspace_dim = full_gmm.Dim(); - } - w_.Resize(0, 0); - N_.clear(); - c_.clear(); - v_.clear(); - SigmaInv_.clear(); - - KALDI_LOG << "Initializing model"; - Matrix norm_xform; - ComputeFeatureNormalizer(full_gmm, &norm_xform); - InitializeMw(phn_subspace_dim, norm_xform); - if (spk_subspace_dim > 0) InitializeN(spk_subspace_dim, norm_xform); - InitializeVecs(num_states); - KALDI_LOG << "Initializing variances"; - InitializeCovars(); -} - -void AmSgmm::CopyFromSgmm(const AmSgmm &other, - bool copy_normalizers) { - KALDI_LOG << "Copying AmSgmm"; - - // Copy background GMMs - diag_ubm_.CopyFromDiagGmm(other.diag_ubm_); - full_ubm_.CopyFromFullGmm(other.full_ubm_); - - // Copy global params - SigmaInv_ = other.SigmaInv_; - M_ = other.M_; - w_ = other.w_; - N_ = other.N_; - - // Copy state-specific params, but only copy normalizers if requested. - v_ = other.v_; - c_ = other.c_; - if (copy_normalizers) n_ = other.n_; - - KALDI_LOG << "Done."; -} - -void AmSgmm::CopyGlobalsInitVecs(const AmSgmm &other, - int32 phn_subspace_dim, - int32 spk_subspace_dim, - int32 num_pdfs) { - if (phn_subspace_dim < 1 || phn_subspace_dim > other.PhoneSpaceDim()) { - KALDI_WARN << "Initial phone-subspace dimension must be in [1, " - << other.PhoneSpaceDim() << "]. Changing from " << phn_subspace_dim - << " to " << other.PhoneSpaceDim(); - phn_subspace_dim = other.PhoneSpaceDim(); - } - if (spk_subspace_dim < 0 || spk_subspace_dim > other.SpkSpaceDim()) { - KALDI_WARN << "Initial spk-subspace dimension must be in [1, " - << other.SpkSpaceDim() << "]. Changing from " << spk_subspace_dim - << " to " << other.SpkSpaceDim(); - spk_subspace_dim = other.SpkSpaceDim(); - } - - KALDI_LOG << "Initializing model"; - - // Copy background GMMs - diag_ubm_.CopyFromDiagGmm(other.diag_ubm_); - full_ubm_.CopyFromFullGmm(other.full_ubm_); - - // Copy global params - SigmaInv_ = other.SigmaInv_; - int32 num_gauss = diag_ubm_.NumGauss(), - data_dim = other.FeatureDim(); - M_.resize(num_gauss); - w_.Resize(num_gauss, phn_subspace_dim); - for (int32 i = 0; i < num_gauss; i++) { - M_[i].Resize(data_dim, phn_subspace_dim); - M_[i].CopyFromMat(other.M_[i].Range(0, data_dim, 0, phn_subspace_dim), - kNoTrans); - } - w_.CopyFromMat(other.w_.Range(0, num_gauss, 0, phn_subspace_dim), kNoTrans); - - if (spk_subspace_dim > 0) { - N_.resize(num_gauss); - for (int32 i = 0; i < num_gauss; i++) { - N_[i].Resize(data_dim, spk_subspace_dim); - N_[i].CopyFromMat(other.N_[i].Range(0, data_dim, 0, spk_subspace_dim), - kNoTrans); - } - } else { - N_.clear(); - } - InitializeVecs(num_pdfs); -} - - -void AmSgmm::ComputePerFrameVars(const VectorBase &data, - const std::vector &gselect, - const SgmmPerSpkDerivedVars &spk_vars, - BaseFloat logdet_s, - SgmmPerFrameDerivedVars *per_frame_vars) const { - KALDI_ASSERT(!n_.empty() && "ComputeNormalizers() must be called."); - - if (per_frame_vars->NeedsResizing(gselect.size(), - FeatureDim(), - PhoneSpaceDim())) - per_frame_vars->Resize(gselect.size(), FeatureDim(), PhoneSpaceDim()); - - per_frame_vars->gselect = gselect; - per_frame_vars->xt.CopyFromVec(data); - - for (int32 ki = 0, last = gselect.size(); ki < last; ki++) { - int32 i = gselect[ki]; - per_frame_vars->xti.Row(ki).CopyFromVec(per_frame_vars->xt); - if (spk_vars.v_s.Dim() != 0) - per_frame_vars->xti.Row(ki).AddVec(-1.0, spk_vars.o_s.Row(i)); - } - Vector SigmaInv_xt(FeatureDim()); - for (int32 ki = 0, last = gselect.size(); ki < last; ki++) { - int32 i = gselect[ki]; - SigmaInv_xt.AddSpVec(1.0, SigmaInv_[i], per_frame_vars->xti.Row(ki), 0.0); - // Eq (35): z_{i}(t) = M_{i}^{T} \Sigma_{i}^{-1} x_{i}(t) - per_frame_vars->zti.Row(ki).AddMatVec(1.0, M_[i], kTrans, SigmaInv_xt, 0.0); - // Eq.(36): n_{i}(t) = -0.5 x_{i}^{T} \Sigma_{i}^{-1} x_{i}(t) - per_frame_vars->nti(ki) = -0.5 * VecVec(per_frame_vars->xti.Row(ki), - SigmaInv_xt) + logdet_s; - } -} - -BaseFloat AmSgmm::LogLikelihood(const SgmmPerFrameDerivedVars &per_frame_vars, - int32 j, BaseFloat log_prune) const { - KALDI_ASSERT(j < NumPdfs()); - const vector &gselect = per_frame_vars.gselect; - - - // Eq.(37): log p(x(t), m, i|j) [indexed by j, ki] - // Although the extra memory allocation of storing this as a - // matrix might seem unnecessary, we save time in the LogSumExp() - // via more effective pruning. - Matrix logp_x(gselect.size(), NumSubstates(j)); - - for (int32 ki = 0, last = gselect.size(); ki < last; ki++) { - SubVector logp_xi(logp_x, ki); - int32 i = gselect[ki]; - // for all substates, compute z_{i}^T v_{jm} - logp_xi.AddMatVec(1.0, v_[j], kNoTrans, per_frame_vars.zti.Row(ki), 0.0); - logp_xi.AddVec(1.0, n_[j].Row(i)); // for all substates, add n_{jim} - logp_xi.Add(per_frame_vars.nti(ki)); // for all substates, add n_{i}(t) - } - // Eq. (38): log p(x(t)|j) = log \sum_{m, i} p(x(t), m, i|j) - return logp_x.LogSumExp(log_prune); -} - -BaseFloat -AmSgmm::ComponentPosteriors(const SgmmPerFrameDerivedVars &per_frame_vars, - int32 j, - Matrix *post) const { - KALDI_ASSERT(j < NumPdfs()); - if (post == NULL) KALDI_ERR << "NULL pointer passed as return argument."; - const vector &gselect = per_frame_vars.gselect; - int32 num_gselect = gselect.size(); - post->Resize(num_gselect, NumSubstates(j)); - - // Eq.(37): log p(x(t), m, i|j) = z_{i}^T v_{jm} (for all substates) - post->AddMatMat(1.0, per_frame_vars.zti, kNoTrans, v_[j], kTrans, 0.0); - for (int32 ki = 0; ki < num_gselect; ki++) { - int32 i = gselect[ki]; - // Eq. (37): log p(x(t), m, i|j) += n_{jim} + n_{i}(t) (for all substates) - post->Row(ki).AddVec(1.0, n_[j].Row(i)); - post->Row(ki).Add(per_frame_vars.nti(ki)); - } - - // Eq. (38): log p(x(t)|j) = log \sum_{m, i} p(x(t), m, i|j) - return post->ApplySoftMax(); -} - -struct SubstateCounter { - SubstateCounter(int32 j, int32 num_substates, BaseFloat occ) - : state_index(j), num_substates(num_substates), occupancy(occ) {} - - int32 state_index; - int32 num_substates; - BaseFloat occupancy; - - bool operator < (const SubstateCounter &r) const { - return occupancy/num_substates < r.occupancy/r.num_substates; - } -}; - -void AmSgmm::SplitSubstates(const Vector &state_occupancies, - int32 target_nsubstates, BaseFloat perturb, - BaseFloat power, BaseFloat max_cond) { - // power == p in document. target_nsubstates == T in document. - KALDI_ASSERT(state_occupancies.Dim() == NumPdfs()); - int32 tot_n_substates_old = 0; - int32 phn_dim = PhoneSpaceDim(); - std::priority_queue substate_counts; - vector< SpMatrix > H_i; - SpMatrix sqrt_H_sm; - Vector rand_vec(phn_dim), v_shift(phn_dim); - - for (int32 j = 0; j < NumPdfs(); j++) { - BaseFloat gamma_p = pow(state_occupancies(j), power); - substate_counts.push(SubstateCounter(j, NumSubstates(j), gamma_p)); - tot_n_substates_old += NumSubstates(j); - } - if (target_nsubstates <= tot_n_substates_old || tot_n_substates_old == 0) { - KALDI_WARN << "Cannot split from " << (tot_n_substates_old) << - " to " << (target_nsubstates) << " substates."; - return; - } - - ComputeH(&H_i); // set up that array. - ComputeSmoothingTermsFromModel(H_i, state_occupancies, &sqrt_H_sm, max_cond); - H_i.clear(); - sqrt_H_sm.ApplyPow(-0.5); - - for (int32 n_states = tot_n_substates_old; - n_states < target_nsubstates; n_states++) { - SubstateCounter state_to_split = substate_counts.top(); - substate_counts.pop(); - state_to_split.num_substates++; - substate_counts.push(state_to_split); - } - - while (!substate_counts.empty()) { - int32 j = substate_counts.top().state_index; - int32 tgt_n_substates_j = substate_counts.top().num_substates; - int32 n_substates_j = NumSubstates(j); - substate_counts.pop(); - - if (n_substates_j == tgt_n_substates_j) continue; - - // Resize v[j] and c[j] to fit new substates - Matrix tmp_v_j(v_[j]); - v_[j].Resize(tgt_n_substates_j, phn_dim); - v_[j].Range(0, n_substates_j, 0, phn_dim).CopyFromMat(tmp_v_j); - tmp_v_j.Resize(0, 0); - - Vector tmp_c_j(c_[j]); - c_[j].Resize(tgt_n_substates_j); - c_[j].Range(0, n_substates_j).CopyFromVec(tmp_c_j); - tmp_c_j.Resize(0); - - // Keep splitting substates until obtaining the desired number - for (; n_substates_j < tgt_n_substates_j; n_substates_j++) { - int32 split_substate = std::max_element(c_[j].Data(), c_[j].Data() - + n_substates_j) - c_[j].Data(); - - // c_{jkm} := c_{jmk}' := c_{jkm} / 2 - c_[j](split_substate) = c_[j](n_substates_j) = c_[j](split_substate) / 2; - - // v_{jkm} := +/- split_perturb * H_k^{(sm)}^{-0.5} * rand_vec - std::generate(rand_vec.Data(), rand_vec.Data() + rand_vec.Dim(), - _RandGauss); - v_shift.AddSpVec(perturb, sqrt_H_sm, rand_vec, 0.0); - v_[j].Row(n_substates_j).CopyFromVec(v_[j].Row(split_substate)); - v_[j].Row(n_substates_j).AddVec(1.0, v_shift); - v_[j].Row(split_substate).AddVec((-1.0), v_shift); - } - } - KALDI_LOG << "Getting rid of normalizers as they will no longer be valid"; - - n_.clear(); - KALDI_LOG << "Split " << (tot_n_substates_old) << " substates to " - << (target_nsubstates); -} - -void AmSgmm::IncreasePhoneSpaceDim(int32 target_dim, - const Matrix &norm_xform) { - KALDI_ASSERT(!M_.empty()); - int32 initial_dim = PhoneSpaceDim(), - feat_dim = FeatureDim(); - KALDI_ASSERT(norm_xform.NumRows() == feat_dim); - - if (target_dim < initial_dim) - KALDI_ERR << "You asked to increase phn dim to a value lower than the " - << " current dimension, " << target_dim << " < " << initial_dim; - - if (target_dim > initial_dim + feat_dim) { - KALDI_WARN << "Cannot increase phone subspace dimensionality from " - << initial_dim << " to " << target_dim << ", increasing to " - << initial_dim + feat_dim; - target_dim = initial_dim + feat_dim; - } - - if (initial_dim < target_dim) { - Matrix tmp_M(feat_dim, initial_dim); - for (int32 i = 0; i < NumGauss(); i++) { - tmp_M.CopyFromMat(M_[i]); - M_[i].Resize(feat_dim, target_dim); - M_[i].Range(0, feat_dim, 0, tmp_M.NumCols()).CopyFromMat(tmp_M); - M_[i].Range(0, feat_dim, tmp_M.NumCols(), - target_dim - tmp_M.NumCols()).CopyFromMat(norm_xform.Range(0, - feat_dim, 0, target_dim-tmp_M.NumCols())); - } - Matrix tmp_w = w_; - w_.Resize(tmp_w.NumRows(), target_dim); - w_.Range(0, tmp_w.NumRows(), 0, tmp_w.NumCols()).CopyFromMat(tmp_w); - - for (int32 j = 0; j < NumPdfs(); j++) { - // Resize v[j] - Matrix tmp_v_j = v_[j]; - v_[j].Resize(tmp_v_j.NumRows(), target_dim); - v_[j].Range(0, tmp_v_j.NumRows(), 0, tmp_v_j.NumCols()).CopyFromMat( - tmp_v_j); - } - KALDI_LOG << "Phone subspace dimensionality increased from " << - initial_dim << " to " << target_dim; - } else { - KALDI_LOG << "Phone subspace dimensionality unchanged, since target " << - "dimension (" << target_dim << ") <= initial dimansion (" << - initial_dim << ")"; - } -} - -void AmSgmm::IncreaseSpkSpaceDim(int32 target_dim, - const Matrix &norm_xform) { - int32 initial_dim = SpkSpaceDim(), - feat_dim = FeatureDim(); - KALDI_ASSERT(norm_xform.NumRows() == feat_dim); - - if (N_.size() == 0) - N_.resize(NumGauss()); - - if (target_dim < initial_dim) - KALDI_ERR << "You asked to increase spk dim to a value lower than the " - << " current dimension, " << target_dim << " < " << initial_dim; - - if (target_dim > initial_dim + feat_dim) { - KALDI_WARN << "Cannot increase speaker subspace dimensionality from " - << initial_dim << " to " << target_dim << ", increasing to " - << initial_dim + feat_dim; - target_dim = initial_dim + feat_dim; - } - - if (initial_dim < target_dim) { - int32 dim_change = target_dim - initial_dim; - Matrix tmp_N((initial_dim != 0) ? feat_dim : 0, - initial_dim); - for (int32 i = 0; i < NumGauss(); i++) { - if (initial_dim != 0) tmp_N.CopyFromMat(N_[i]); - N_[i].Resize(feat_dim, target_dim); - if (initial_dim != 0) { - N_[i].Range(0, feat_dim, 0, tmp_N.NumCols()).CopyFromMat(tmp_N); - } - N_[i].Range(0, feat_dim, tmp_N.NumCols(), dim_change).CopyFromMat( - norm_xform.Range(0, feat_dim, 0, dim_change)); - } - KALDI_LOG << "Speaker subspace dimensionality increased from " << - initial_dim << " to " << target_dim; - } else { - KALDI_LOG << "Speaker subspace dimensionality unchanged, since target " << - "dimension (" << target_dim << ") <= initial dimansion (" << - initial_dim << ")"; - } -} - -void AmSgmm::ComputeDerivedVars() { - if (n_.empty()) { - ComputeNormalizers(); - } - if (diag_ubm_.NumGauss() != full_ubm_.NumGauss() - || diag_ubm_.Dim() != full_ubm_.Dim()) { - diag_ubm_.CopyFromFullGmm(full_ubm_); - } -} - -class ComputeNormalizersClass: public MultiThreadable { // For multi-threaded. - public: - ComputeNormalizersClass(AmSgmm *am_sgmm, - int32 *entropy_count_ptr, - double *entropy_sum_ptr): - am_sgmm_(am_sgmm), entropy_count_ptr_(entropy_count_ptr), - entropy_sum_ptr_(entropy_sum_ptr), entropy_count_(0), - entropy_sum_(0.0) { } - - ~ComputeNormalizersClass() { - *entropy_count_ptr_ += entropy_count_; - *entropy_sum_ptr_ += entropy_sum_; - } - - inline void operator() () { - // Note: give them local copy of the sums we're computing, - // which will be propagated to original pointer in the destructor. - am_sgmm_->ComputeNormalizersInternal(num_threads_, thread_id_, - &entropy_count_, - &entropy_sum_); - } - private: - ComputeNormalizersClass() { } // Disallow empty constructor. - AmSgmm *am_sgmm_; - int32 *entropy_count_ptr_; - double *entropy_sum_ptr_; - int32 entropy_count_; - double entropy_sum_; - -}; - -void AmSgmm::ComputeNormalizers() { - KALDI_LOG << "Computing normalizers"; - n_.resize(NumPdfs()); - int32 entropy_count = 0; - double entropy_sum = 0.0; - ComputeNormalizersClass c(this, &entropy_count, &entropy_sum); - RunMultiThreaded(c); - - KALDI_LOG << "Entropy of weights in substates is " - << (entropy_sum / entropy_count) << " over " << entropy_count - << " substates, equivalent to perplexity of " - << (Exp(entropy_sum /entropy_count)); - KALDI_LOG << "Done computing normalizers"; -} - - -void AmSgmm::ComputeNormalizersInternal(int32 num_threads, int32 thread, - int32 *entropy_count, - double *entropy_sum) { - - BaseFloat DLog2pi = FeatureDim() * Log(2 * M_PI); - Vector log_det_Sigma(NumGauss()); - - for (int32 i = 0; i < NumGauss(); i++) { - try { - log_det_Sigma(i) = - SigmaInv_[i].LogPosDefDet(); - } catch(...) { - if (thread == 0) // just for one thread, print errors [else, duplicates] - KALDI_WARN << "Covariance is not positive definite, setting to unit"; - SigmaInv_[i].SetUnit(); - log_det_Sigma(i) = 0.0; - } - } - - - int block_size = (NumPdfs() + num_threads-1) / num_threads; - int j_start = thread * block_size, j_end = std::min(NumPdfs(), j_start + block_size); - - for (int32 j = j_start; j < j_end; j++) { - Matrix log_w_jm(NumSubstates(j), NumGauss()); - n_[j].Resize(NumGauss(), NumSubstates(j)); - Matrix mu_jmi(NumSubstates(j), FeatureDim()); - Matrix SigmaInv_mu(NumSubstates(j), FeatureDim()); - - // (in logs): w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7) - log_w_jm.AddMatMat(1.0, v_[j], kNoTrans, w_, kTrans, 0.0); - for (int32 m = 0; m < NumSubstates(j); m++) { - log_w_jm.Row(m).Add(-1.0 * log_w_jm.Row(m).LogSumExp()); - { // DIAGNOSTIC CODE - (*entropy_count)++; - for (int32 i = 0; i < NumGauss(); i++) { - (*entropy_sum) -= log_w_jm(m, i) * Exp(log_w_jm(m, i)); - } - } - } - - for (int32 i = 0; i < NumGauss(); i++) { - // mu_jmi = M_{i} * v_{jm} - mu_jmi.AddMatMat(1.0, v_[j], kNoTrans, M_[i], kTrans, 0.0); - SigmaInv_mu.AddMatSp(1.0, mu_jmi, kNoTrans, SigmaInv_[i], 0.0); - - for (int32 m = 0; m < NumSubstates(j); m++) { - // mu_{jmi} * \Sigma_{i}^{-1} * mu_{jmi} - BaseFloat mu_SigmaInv_mu = VecVec(mu_jmi.Row(m), SigmaInv_mu.Row(m)); - BaseFloat logc = Log(c_[j](m)); - - // Suggestion: Both mu_jmi and SigmaInv_mu could - // have been computed at once for i, - // if M[i] was concatenated to single matrix over i indices - - // eq.(31) - n_[j](i, m) = logc + log_w_jm(m, i) - 0.5 * (log_det_Sigma(i) + DLog2pi - + mu_SigmaInv_mu); - { // Mainly diagnostic code. Not necessary. - BaseFloat tmp = n_[j](i, m); - if (!KALDI_ISFINITE(tmp)) { // NaN or inf - KALDI_LOG << "Warning: normalizer for j = " << j << ", m = " << m - << ", i = " << i << " is infinite or NaN " << tmp << "= " - << (logc) << "+" << (log_w_jm(m, i)) << "+" << (-0.5 * - log_det_Sigma(i)) << "+" << (-0.5 * DLog2pi) - << "+" << (mu_SigmaInv_mu) << ", setting to finite."; - n_[j](i, m) = -1.0e+40; // future work(arnab): get rid of magic number - } - } - } - } - } -} - - -void AmSgmm::ComputeNormalizersNormalized( - const std::vector< std::vector > &normalize_sets) { - { // Check sets in normalize_sets are disjoint and cover all Gaussians. - std::set all; - for (int32 i = 0; i < normalize_sets.size(); i++) - for (int32 j = 0; static_cast(j) < normalize_sets[i].size(); j++) { - int32 n = normalize_sets[i][j]; - KALDI_ASSERT(all.count(n) == 0 && n >= 0 && n < NumGauss()); - all.insert(n); - } - KALDI_ASSERT(all.size() == NumGauss()); - } - - KALDI_LOG << "Computing normalizers [normalized]"; - BaseFloat DLog2pi = FeatureDim() * Log(2 * M_PI); - Vector mu_jmi(FeatureDim()); - Vector SigmaInv_mu(FeatureDim()); - Vector log_det_Sigma(NumGauss()); - - for (int32 i = 0; i < NumGauss(); i++) { - try { - log_det_Sigma(i) = - SigmaInv_[i].LogPosDefDet(); - } catch(...) { - KALDI_WARN << "Covariance is not positive definite, setting to unit"; - SigmaInv_[i].SetUnit(); - log_det_Sigma(i) = 0.0; - } - } - - n_.resize(NumPdfs()); - for (int32 j = 0; j < NumPdfs(); j++) { - Vector log_w_jm(NumGauss()); - - n_[j].Resize(NumGauss(), NumSubstates(j)); - for (int32 m = 0; m < NumSubstates(j); m++) { - BaseFloat logc = Log(c_[j](m)); - - // (in logs): w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7) - log_w_jm.AddMatVec(1.0, w_, kNoTrans, v_[j].Row(m), 0.0); - log_w_jm.Add((-1.0) * log_w_jm.LogSumExp()); - - for (int32 n = 0; n < normalize_sets.size(); n++) { - const std::vector &this_set(normalize_sets[n]); - double sum = 0.0; - for (int32 p = 0; p < this_set.size(); p++) - sum += Exp(log_w_jm(this_set[p])); - double offset = -Log(sum); // add "offset", to normalize weights. - for (int32 p = 0; p < this_set.size(); p++) - log_w_jm(this_set[p]) += offset; - } - - for (int32 i = 0; i < NumGauss(); i++) { - // mu_jmi = M_{i} * v_{jm} - mu_jmi.AddMatVec(1.0, M_[i], kNoTrans, v_[j].Row(m), 0.0); - - // mu_{jmi} * \Sigma_{i}^{-1} * mu_{jmi} - SigmaInv_mu.AddSpVec(1.0, SigmaInv_[i], mu_jmi, 0.0); - BaseFloat mu_SigmaInv_mu = VecVec(mu_jmi, SigmaInv_mu); - - // Suggestion: Both mu_jmi and SigmaInv_mu could - // have been computed at once for i , - // if M[i] was concatenated to single matrix over i indeces - - // eq.(31) - n_[j](i, m) = logc + log_w_jm(i) - 0.5 * (log_det_Sigma(i) + DLog2pi - + mu_SigmaInv_mu); - { // Mainly diagnostic code. Not necessary. - BaseFloat tmp = n_[j](i, m); - if (!KALDI_ISFINITE(tmp)) { // NaN or inf - KALDI_LOG << "Warning: normalizer for j = " << j << ", m = " << m - << ", i = " << i << " is infinite or NaN " << tmp << "= " - << (logc) << "+" << (log_w_jm(i)) << "+" << (-0.5 * - log_det_Sigma(i)) << "+" << (-0.5 * DLog2pi) - << "+" << (mu_SigmaInv_mu) << ", setting to finite."; - n_[j](i, m) = -1.0e+40; // future work(arnab): get rid of magic number - } - } - } - } - } - - KALDI_LOG << "Done computing normalizers (normalized over subsets)"; -} - - -void AmSgmm::ComputeFmllrPreXform(const Vector &state_occs, - Matrix *xform, Matrix *inv_xform, - Vector *diag_mean_scatter) const { - int32 num_states = NumPdfs(), - num_gauss = NumGauss(), - dim = FeatureDim(); - KALDI_ASSERT(state_occs.Dim() == num_states); - - BaseFloat total_occ = state_occs.Sum(); - - // Degenerate case: unlikely to ever happen. - if (total_occ == 0) { - KALDI_WARN << "Zero probability (computing transform). Using unit " - << "pre-transform"; - xform->Resize(dim, dim + 1, kUndefined); - xform->SetUnit(); - inv_xform->Resize(dim, dim + 1, kUndefined); - inv_xform->SetUnit(); - diag_mean_scatter->Resize(dim, kSetZero); - return; - } - - // Convert state occupancies to posteriors; Eq. (B.1) - Vector state_posteriors(state_occs); - state_posteriors.Scale(1/total_occ); - - Vector mu_jmi(dim), global_mean(dim); - SpMatrix within_class_covar(dim), between_class_covar(dim); - Vector gauss_weight(num_gauss); // weights for within-class vars. - Vector w_jm(num_gauss); - BaseFloat substate_weight; - for (int32 j = 0; j < num_states; j++) { - for (int32 m = 0; m < NumSubstates(j); m++) { - // Eq. (7): w_jm = softmax([w_{1}^T ... w_{D}^T] * v_{jm}) - w_jm.AddMatVec(1.0, w_, kNoTrans, v_[j].Row(m), 0.0); - w_jm.ApplySoftMax(); - - for (int32 i = 0; i < num_gauss; i++) { - substate_weight = state_posteriors(j) * c_[j](m) * w_jm(i); - mu_jmi.AddMatVec(1.0, M_[i], kNoTrans, v_[j].Row(m), 0.0); // Eq. (6) - // Eq. (B.3): \mu_avg = \sum_{jmi} p(j) c_{jm} w_{jmi} \mu_{jmi} - global_mean.AddVec(substate_weight, mu_jmi); - // \Sigma_B = \sum_{jmi} p(j) c_{jm} w_{jmi} \mu_{jmi} \mu_{jmi}^T - between_class_covar.AddVec2(substate_weight, mu_jmi); // Eq. (B.4) - gauss_weight(i) += substate_weight; - } - } - } - between_class_covar.AddVec2(-1.0, global_mean); // Eq. (B.4) - - for (int32 i = 0; i < num_gauss; i++) { - SpMatrix Sigma(SigmaInv_[i]); - Sigma.InvertDouble(); - // Eq. (B.2): \Sigma_W = \sum_{jmi} p(j) c_{jm} w_{jmi} \Sigma_i - within_class_covar.AddSp(gauss_weight(i), Sigma); - } - - TpMatrix tmpL(dim); - Matrix tmpLInvFull(dim, dim); - tmpL.Cholesky(within_class_covar); // \Sigma_W = L L^T - tmpL.InvertDouble(); // L^{-1} - tmpLInvFull.CopyFromTp(tmpL); // get as full matrix. - - // B := L^{-1} * \Sigma_B * L^{-T} - SpMatrix tmpB(dim); - tmpB.AddMat2Sp(1.0, tmpLInvFull, kNoTrans, between_class_covar, 0.0); - - Matrix U(dim, dim); - diag_mean_scatter->Resize(dim); - xform->Resize(dim, dim + 1); - inv_xform->Resize(dim, dim + 1); - - tmpB.Eig(diag_mean_scatter, &U); // Eq. (B.5): B = U D V^T - int32 n; - if ((n = diag_mean_scatter->ApplyFloor(1.0e-04)) != 0) - KALDI_WARN << "Floored " << n << " elements of the mean-scatter matrix."; - - // Eq. (B.6): A_{pre} = U^T * L^{-1} - SubMatrix Apre(*xform, 0, dim, 0, dim); - Apre.AddMatMat(1.0, U, kTrans, tmpLInvFull, kNoTrans, 0.0); - -#ifdef KALDI_PARANOID - { - SpMatrix tmp(dim); - tmp.AddMat2Sp(1.0, Apre, kNoTrans, within_class_covar, 0.0); - KALDI_ASSERT(tmp.IsUnit(0.01)); - } - { - SpMatrix tmp(dim); - tmp.AddMat2Sp(1.0, Apre, kNoTrans, between_class_covar, 0.0); - KALDI_ASSERT(tmp.IsDiagonal(0.01)); - } -#endif - - // Eq. (B.7): b_{pre} = - A_{pre} \mu_{avg} - Vector b_pre(dim); - b_pre.AddMatVec(-1.0, Apre, kNoTrans, global_mean, 0.0); - for (int32 r = 0; r < dim; r++) { - xform->Row(r)(dim) = b_pre(r); // W_{pre} = [ A_{pre}, b_{pre} ] - } - - // Eq. (B.8) & (B.9): W_{inv} = [ A_{pre}^{-1}, \mu_{avg} ] - inv_xform->CopyFromMat(*xform); - inv_xform->Range(0, dim, 0, dim).InvertDouble(); - for (int32 r = 0; r < dim; r++) - inv_xform->Row(r)(dim) = global_mean(r); -} // End of ComputePreXform() - -template -void AmSgmm::GetNtransSigmaInv(vector< Matrix > *out) const { - KALDI_ASSERT(SpkSpaceDim() > 0 && - "Cannot compute N^{T} \\Sigma_{i}^{-1} without speaker projections."); - out->resize(NumGauss()); - Matrix tmpcov(FeatureDim(), FeatureDim()); - Matrix tmp_n(FeatureDim(), SpkSpaceDim()); - for (int32 i = 0; i < NumGauss(); i++) { - tmpcov.CopyFromSp(SigmaInv_[i]); - tmp_n.CopyFromMat(N_[i]); - (*out)[i].Resize(SpkSpaceDim(), FeatureDim()); - (*out)[i].AddMatMat(1.0, tmp_n, kTrans, tmpcov, kNoTrans, 0.0); - } -} - -// Instantiate the above template. -template -void AmSgmm::GetNtransSigmaInv(vector< Matrix > *out) const; -template -void AmSgmm::GetNtransSigmaInv(vector< Matrix > *out) const; - -/////////////////////////////////////////////////////////////////////////////// - -template -void AmSgmm::ComputeH(std::vector< SpMatrix > *H_i) const { - KALDI_ASSERT(NumGauss() != 0); - (*H_i).resize(NumGauss()); - SpMatrix H_i_tmp(PhoneSpaceDim()); - for (int32 i = 0; i < NumGauss(); i++) { - (*H_i)[i].Resize(PhoneSpaceDim()); - H_i_tmp.AddMat2Sp(1.0, M_[i], kTrans, SigmaInv_[i], 0.0); - (*H_i)[i].CopyFromSp(H_i_tmp); - } -} - -// Instantiate the template. -template -void AmSgmm::ComputeH(std::vector< SpMatrix > *H_i) const; -template -void AmSgmm::ComputeH(std::vector< SpMatrix > *H_i) const; - - -// Initializes the matrices M_{i} and w_i -void AmSgmm::InitializeMw(int32 phn_subspace_dim, - const Matrix &norm_xform) { - int32 ddim = full_ubm_.Dim(); - KALDI_ASSERT(phn_subspace_dim <= ddim + 1); - KALDI_ASSERT(phn_subspace_dim <= norm_xform.NumCols() + 1); - KALDI_ASSERT(ddim <= norm_xform.NumRows()); - - Vector mean(ddim); - int32 num_gauss = full_ubm_.NumGauss(); - w_.Resize(num_gauss, phn_subspace_dim); - M_.resize(num_gauss); - for (int32 i = 0; i < num_gauss; i++) { - full_ubm_.GetComponentMean(i, &mean); - Matrix &thisM(M_[i]); - thisM.Resize(ddim, phn_subspace_dim); - // Eq. (27): M_{i} = [ \bar{\mu}_{i} (J)_{1:D, 1:(S-1)}] - thisM.CopyColFromVec(mean, 0); - thisM.Range(0, ddim, 1, phn_subspace_dim-1).CopyFromMat( - norm_xform.Range(0, ddim, 0, phn_subspace_dim-1), kNoTrans); - } -} - -// Initializes the matrices N_{i} -void AmSgmm::InitializeN(int32 spk_subspace_dim, - const Matrix &norm_xform) { - int32 ddim = full_ubm_.Dim(); - KALDI_ASSERT(spk_subspace_dim <= ddim); - KALDI_ASSERT(spk_subspace_dim <= norm_xform.NumCols()); - KALDI_ASSERT(ddim <= norm_xform.NumRows()); - - int32 num_gauss = full_ubm_.NumGauss(); - N_.resize(num_gauss); - for (int32 i = 0; i < num_gauss; i++) { - N_[i].Resize(ddim, spk_subspace_dim); - // Eq. (28): N_{i} = [ (J)_{1:D, 1:T)}] - N_[i].CopyFromMat(norm_xform.Range(0, ddim, 0, spk_subspace_dim), kNoTrans); - } -} - -// Initializes the vectors v_{jm} -void AmSgmm::InitializeVecs(int32 num_states) { - KALDI_ASSERT(num_states >= 0); - int32 phn_subspace_dim = PhoneSpaceDim(); - KALDI_ASSERT(phn_subspace_dim > 0 && "Initialize M and w first."); - - v_.resize(num_states); - c_.resize(num_states); - for (int32 j = 0; j < num_states; j++) { - v_[j].Resize(1, phn_subspace_dim); - c_[j].Resize(1); - v_[j](0, 0) = 1.0; // Eq. (26): v_{j1} = [1 0 0 ... 0] - c_[j](0) = 1.0; // Eq. (25): c_{j1} = 1.0 - } -} - -// Initializes the within-class vars Sigma_{ki} -void AmSgmm::InitializeCovars() { - std::vector< SpMatrix > &inv_covars(full_ubm_.inv_covars()); - int32 num_gauss = full_ubm_.NumGauss(); - int32 dim = full_ubm_.Dim(); - SigmaInv_.resize(num_gauss); - for (int32 i = 0; i < num_gauss; i++) { - SigmaInv_[i].Resize(dim); - SigmaInv_[i].CopyFromSp(inv_covars[i]); - } -} - -// Compute the "smoothing" matrices from expected counts given the model. -void AmSgmm::ComputeSmoothingTermsFromModel( - const std::vector< SpMatrix > &H, - const Vector &state_occupancies, SpMatrix *H_sm, - BaseFloat max_cond) const { - int32 num_gauss = NumGauss(); - BaseFloat tot_sum = 0.0; - KALDI_ASSERT(state_occupancies.Dim() == NumPdfs()); - Vector w_jm(num_gauss); - H_sm->Resize(PhoneSpaceDim()); - H_sm->SetZero(); - Vector gamma_i(num_gauss); - gamma_i.SetZero(); - for (int32 j = 0; j < NumPdfs(); j++) { - int32 M_j = NumSubstates(j); - KALDI_ASSERT(M_j > 0); - for (int32 m = 0; m < M_j; m++) { - w_jm.AddMatVec(1.0, w_, kNoTrans, v_[j].Row(m), 0.0); - w_jm.ApplySoftMax(); - gamma_i.AddVec(state_occupancies(j) * c_[j](m), w_jm); - } - } - BaseFloat sum = 0.0; - for (int32 i = 0; i < num_gauss; i++) { - if (gamma_i(i) > 0) { - H_sm->AddSp(gamma_i(i), H[i]); - sum += gamma_i(i); - } - } - if (sum == 0.0) { - KALDI_WARN << "Sum of counts is zero. "; - // set to unit matrix--arbitrary non-singular matrix.. won't ever matter. - H_sm->SetUnit(); - } else { - H_sm->Scale(1.0 / sum); - int32 tmp = H_sm->LimitCondDouble(max_cond); - if (tmp > 0) { - KALDI_WARN << "Limited " << (tmp) << " eigenvalues of H_sm"; - } - } - tot_sum += sum; - - KALDI_LOG << "ComputeSmoothingTermsFromModel: total count is " << tot_sum; -} - -void ComputeFeatureNormalizer(const FullGmm &gmm, Matrix *xform) { - int32 dim = gmm.Dim(); - int32 num_gauss = gmm.NumGauss(); - SpMatrix within_class_covar(dim); - SpMatrix between_class_covar(dim); - Vector global_mean(dim); - - // Accumulate LDA statistics from the GMM parameters. - { - BaseFloat total_weight = 0.0; - Vector tmp_weight(num_gauss); - Matrix tmp_means; - std::vector< SpMatrix > tmp_covars; - tmp_weight.CopyFromVec(gmm.weights()); - gmm.GetCovarsAndMeans(&tmp_covars, &tmp_means); - for (int32 i = 0; i < num_gauss; i++) { - BaseFloat w_i = tmp_weight(i); - total_weight += w_i; - within_class_covar.AddSp(w_i, tmp_covars[i]); - between_class_covar.AddVec2(w_i, tmp_means.Row(i)); - global_mean.AddVec(w_i, tmp_means.Row(i)); - } - KALDI_ASSERT(total_weight > 0); - if (fabs(total_weight - 1.0) > 0.001) { - KALDI_WARN << "Total weight across the GMMs is " << (total_weight) - << ", renormalizing."; - global_mean.Scale(1.0 / total_weight); - within_class_covar.Scale(1.0 / total_weight); - between_class_covar.Scale(1.0 / total_weight); - } - between_class_covar.AddVec2(-1.0, global_mean); - } - - TpMatrix chol(dim); - chol.Cholesky(within_class_covar); // Sigma_W = L L^T - TpMatrix chol_inv(chol); - chol_inv.InvertDouble(); - Matrix chol_full(dim, dim); - chol_full.CopyFromTp(chol_inv); - SpMatrix LBL(dim); - // LBL = L^{-1} \Sigma_B L^{-T} - LBL.AddMat2Sp(1.0, chol_full, kNoTrans, between_class_covar, 0.0); - Vector Dvec(dim); - Matrix U(dim, dim); - LBL.Eig(&Dvec, &U); - SortSvd(&Dvec, &U); - - xform->Resize(dim, dim); - chol_full.CopyFromTp(chol); - // T := L U, eq (23) - xform->AddMatMat(1.0, chol_full, kNoTrans, U, kNoTrans, 0.0); - -#ifdef KALDI_PARANOID - Matrix inv_xform(*xform); - inv_xform.InvertDouble(); - { // Check that T*within_class_covar*T' = I. - Matrix wc_covar_full(dim, dim), tmp(dim, dim); - wc_covar_full.CopyFromSp(within_class_covar); - tmp.AddMatMat(1.0, inv_xform, kNoTrans, wc_covar_full, kNoTrans, 0.0); - wc_covar_full.AddMatMat(1.0, tmp, kNoTrans, inv_xform, kTrans, 0.0); - KALDI_ASSERT(wc_covar_full.IsUnit(0.01)); - } - { // Check that T*between_class_covar*T' = diagonal. - Matrix bc_covar_full(dim, dim), tmp(dim, dim); - bc_covar_full.CopyFromSp(between_class_covar); - tmp.AddMatMat(1.0, inv_xform, kNoTrans, bc_covar_full, kNoTrans, 0.0); - bc_covar_full.AddMatMat(1.0, tmp, kNoTrans, inv_xform, kTrans, 0.0); - KALDI_ASSERT(bc_covar_full.IsDiagonal(0.01)); - } -#endif -} - -void AmSgmm::ComputePerSpkDerivedVars(SgmmPerSpkDerivedVars *vars) const { - KALDI_ASSERT(vars != NULL); - if (vars->v_s.Dim() != 0) { - KALDI_ASSERT(vars->v_s.Dim() == SpkSpaceDim()); - vars->o_s.Resize(NumGauss(), FeatureDim()); - int32 num_gauss = NumGauss(); - for (int32 i = 0; i < num_gauss; i++) { - // Eqn. (32): o_i^{(s)} = N_i v^{(s)} - vars->o_s.Row(i).AddMatVec(1.0, N_[i], kNoTrans, vars->v_s, 0.0); - } - } else { - vars->o_s.Resize(0, 0); - } -} - -BaseFloat AmSgmm::GaussianSelection(const SgmmGselectConfig &config, - const VectorBase &data, - std::vector *gselect) const { - KALDI_ASSERT(diag_ubm_.NumGauss() != 0 && - diag_ubm_.NumGauss() == full_ubm_.NumGauss() && - diag_ubm_.Dim() == data.Dim()); - KALDI_ASSERT(config.diag_gmm_nbest > 0 && config.full_gmm_nbest > 0 && - config.full_gmm_nbest < config.diag_gmm_nbest); - int32 num_gauss = diag_ubm_.NumGauss(); - - std::vector< std::pair > pruned_pairs; - if (config.diag_gmm_nbest < num_gauss) { - Vector loglikes(num_gauss); - diag_ubm_.LogLikelihoods(data, &loglikes); - Vector loglikes_copy(loglikes); - BaseFloat *ptr = loglikes_copy.Data(); - std::nth_element(ptr, ptr+num_gauss-config.diag_gmm_nbest, ptr+num_gauss); - BaseFloat thresh = ptr[num_gauss-config.diag_gmm_nbest]; - for (int32 g = 0; g < num_gauss; g++) - if (loglikes(g) >= thresh) // met threshold for diagonal phase. - pruned_pairs.push_back( - std::make_pair(full_ubm_.ComponentLogLikelihood(data, g), g)); - } else { - Vector loglikes(num_gauss); - full_ubm_.LogLikelihoods(data, &loglikes); - for (int32 g = 0; g < num_gauss; g++) - pruned_pairs.push_back(std::make_pair(loglikes(g), g)); - } - KALDI_ASSERT(!pruned_pairs.empty()); - if (pruned_pairs.size() > static_cast(config.full_gmm_nbest)) { - std::nth_element(pruned_pairs.begin(), - pruned_pairs.end() - config.full_gmm_nbest, - pruned_pairs.end()); - pruned_pairs.erase(pruned_pairs.begin(), - pruned_pairs.end() - config.full_gmm_nbest); - } - Vector loglikes_tmp(pruned_pairs.size()); // for return value. - KALDI_ASSERT(gselect != NULL); - gselect->resize(pruned_pairs.size()); - // Make sure pruned Gaussians appear from best to worst. - std::sort(pruned_pairs.begin(), pruned_pairs.end(), - std::greater< std::pair >()); - for (size_t i = 0; i < pruned_pairs.size(); i++) { - loglikes_tmp(i) = pruned_pairs[i].first; - (*gselect)[i] = pruned_pairs[i].second; - } - return loglikes_tmp.LogSumExp(); -} - -BaseFloat AmSgmm::GaussianSelectionPreselect(const SgmmGselectConfig &config, - const VectorBase &data, - const std::vector &preselect, - std::vector *gselect) const { - KALDI_ASSERT(IsSortedAndUniq(preselect) && !preselect.empty()); - KALDI_ASSERT(diag_ubm_.NumGauss() != 0 && - diag_ubm_.NumGauss() == full_ubm_.NumGauss() && - diag_ubm_.Dim() == data.Dim()); - - int32 num_preselect = preselect.size(); - - KALDI_ASSERT(config.diag_gmm_nbest > 0 && config.full_gmm_nbest > 0 && - config.full_gmm_nbest < num_preselect); - - std::vector > pruned_pairs; - if (config.diag_gmm_nbest < num_preselect) { - Vector loglikes(num_preselect); - diag_ubm_.LogLikelihoodsPreselect(data, preselect, &loglikes); - Vector loglikes_copy(loglikes); - BaseFloat *ptr = loglikes_copy.Data(); - std::nth_element(ptr, ptr+num_preselect-config.diag_gmm_nbest, - ptr+num_preselect); - BaseFloat thresh = ptr[num_preselect-config.diag_gmm_nbest]; - for (int32 p = 0; p < num_preselect; p++) { - if (loglikes(p) >= thresh) { // met threshold for diagonal phase. - int32 g = preselect[p]; - pruned_pairs.push_back( - std::make_pair(full_ubm_.ComponentLogLikelihood(data, g), g)); - } - } - } else { - for (int32 p = 0; p < num_preselect; p++) { - int32 g = preselect[p]; - pruned_pairs.push_back( - std::make_pair(full_ubm_.ComponentLogLikelihood(data, g), g)); - } - } - KALDI_ASSERT(!pruned_pairs.empty()); - if (pruned_pairs.size() > static_cast(config.full_gmm_nbest)) { - std::nth_element(pruned_pairs.begin(), - pruned_pairs.end() - config.full_gmm_nbest, - pruned_pairs.end()); - pruned_pairs.erase(pruned_pairs.begin(), - pruned_pairs.end() - config.full_gmm_nbest); - } - // Make sure pruned Gaussians appear from best to worst. - std::sort(pruned_pairs.begin(), pruned_pairs.end(), - std::greater >()); - Vector loglikes_tmp(pruned_pairs.size()); // for return value. - KALDI_ASSERT(gselect != NULL); - gselect->resize(pruned_pairs.size()); - for (size_t i = 0; i < pruned_pairs.size(); i++) { - loglikes_tmp(i) = pruned_pairs[i].first; - (*gselect)[i] = pruned_pairs[i].second; - } - return loglikes_tmp.LogSumExp(); -} - - - -void SgmmGauPost::Write(std::ostream &os, bool binary) const { - WriteToken(os, binary, ""); - int32 T = this->size(); - WriteBasicType(os, binary, T); - for (int32 t = 0; t < T; t++) { - WriteToken(os, binary, ""); - WriteIntegerVector(os, binary, (*this)[t].gselect); - WriteToken(os, binary, ""); - WriteIntegerVector(os, binary, (*this)[t].tids); - KALDI_ASSERT((*this)[t].tids.size() == (*this)[t].posteriors.size()); - for (size_t i = 0; i < (*this)[t].posteriors.size(); i++) { - (*this)[t].posteriors[i].Write(os, binary); - } - } - WriteToken(os, binary, ""); -} - -void SgmmGauPost::Read(std::istream &is, bool binary) { - ExpectToken(is, binary, ""); - int32 T; - ReadBasicType(is, binary, &T); - KALDI_ASSERT(T >= 0); - this->resize(T); - for (int32 t = 0; t < T; t++) { - ExpectToken(is, binary, ""); - ReadIntegerVector(is, binary, &((*this)[t].gselect)); - ExpectToken(is, binary, ""); - ReadIntegerVector(is, binary, &((*this)[t].tids)); - size_t sz = (*this)[t].tids.size(); - (*this)[t].posteriors.resize(sz); - for (size_t i = 0; i < sz; i++) - (*this)[t].posteriors[i].Read(is, binary); - } - ExpectToken(is, binary, ""); -} - - -void AmSgmmFunctions::ComputeDistances(const AmSgmm &model, - const Vector &state_occs, - MatrixBase *dists) { - int32 num_states = model.NumPdfs(), - phn_space_dim = model.PhoneSpaceDim(), - num_gauss = model.NumGauss(); - KALDI_ASSERT(dists != NULL && dists->NumRows() == num_states - && dists->NumCols() == num_states); - Vector prior(state_occs); - KALDI_ASSERT(prior.Sum() != 0.0); - prior.Scale(1.0 / prior.Sum()); // Normalize. - SpMatrix H(phn_space_dim); // The same as H_sm in some other code. - for (int32 i = 0; i < num_gauss; i++) { - SpMatrix Hi(phn_space_dim); - Hi.AddMat2Sp(1.0, model.M_[i], kTrans, model.SigmaInv_[i], 0.0); - H.AddSp(prior(i), Hi); - } - bool warned = false; - for (int32 j1 = 0; j1 < num_states; ++j1) { - if (model.NumSubstates(j1) != 1 && !warned) { - KALDI_WARN << "ComputeDistances() can only give meaningful output if you " - << "have one substate per state."; - warned = true; - } - for (int32 j2 = 0; j2 <= j1; ++j2) { - Vector v_diff(model.v_[j1].Row(0)); - v_diff.AddVec(-1.0, model.v_[j2].Row(0)); - (*dists)(j1, j2) = (*dists)(j2, j1) = VecSpVec(v_diff, H, v_diff); - } - } -} - -} // namespace kaldi diff --git a/src/sgmm/am-sgmm.h b/src/sgmm/am-sgmm.h deleted file mode 100644 index 229b1b4811f..00000000000 --- a/src/sgmm/am-sgmm.h +++ /dev/null @@ -1,420 +0,0 @@ -// sgmm/am-sgmm.h - -// Copyright 2009-2011 Microsoft Corporation; Lukas Burget; -// Saarland University (Author: Arnab Ghoshal); -// Ondrej Glembek; Yanmin Qian; -// Copyright 2012-2013 Johns Hopkins University (author: Daniel Povey) -// Liang Lu; Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_SGMM_AM_SGMM_H_ -#define KALDI_SGMM_AM_SGMM_H_ - -#include - -#include "base/kaldi-common.h" -#include "matrix/matrix-lib.h" -#include "gmm/model-common.h" -#include "gmm/diag-gmm.h" -#include "gmm/full-gmm.h" -#include "itf/options-itf.h" -#include "util/table-types.h" - -namespace kaldi { - -struct SgmmGselectConfig { - /// Number of highest-scoring full-covariance Gaussians per frame. - int32 full_gmm_nbest; - /// Number of highest-scoring diagonal-covariance Gaussians per frame. - int32 diag_gmm_nbest; - - SgmmGselectConfig() { - full_gmm_nbest = 15; - diag_gmm_nbest = 50; - } - - void Register(OptionsItf *opts) { - opts->Register("full-gmm-nbest", &full_gmm_nbest, "Number of highest-scoring" - " full-covariance Gaussians selected per frame."); - opts->Register("diag-gmm-nbest", &diag_gmm_nbest, "Number of highest-scoring" - " diagonal-covariance Gaussians selected per frame."); - } -}; - -/** \struct SgmmPerFrameDerivedVars - * Holds the per-frame precomputed quantities x(t), x_{i}(t), z_{i}(t), and - * n_{i}(t) (cf. Eq. (33)-(36)) for the SGMM, as well as the cached Gaussian - * selection records. - */ -struct SgmmPerFrameDerivedVars { - std::vector gselect; - Vector xt; ///< x'(t), FMLLR-adapted, dim = [D], eq.(33) - Matrix xti; ///< x_{i}(t) = x'(t) - o_i(s): dim = [I][D], eq.(34) - Matrix zti; ///< z_{i}(t), dim = [I][S], eq.(35) - Vector nti; ///< n_{i}(t), dim = [I], eq.(36) - - SgmmPerFrameDerivedVars() : xt(0), xti(0, 0), zti(0, 0), nti(0) {} - void Resize(int32 ngauss, int32 feat_dim, int32 phn_dim) { - xt.Resize(feat_dim); - xti.Resize(ngauss, feat_dim); - zti.Resize(ngauss, phn_dim); - nti.Resize(ngauss); - } - bool IsEmpty() const { - return (xt.Dim() == 0 || xti.NumRows() == 0 || zti.NumRows() == 0 - || nti.Dim() == 0); - } - bool NeedsResizing(int32 ngauss, int32 feat_dim, int32 phn_dim) const { - /* if (xt.Dim() != feat_dim) - KALDI_LOG << "xt dim = " << xt.Dim() << ", feat dim = " << feat_dim; - if (xti.NumRows() != ngauss || xti.NumCols() != feat_dim) - KALDI_LOG << "xti size = " << xti.NumRows() << ", " << xti.NumCols() - << "; ngauss = " << ngauss << ", feat dim = " << feat_dim; - if (zti.NumRows() != ngauss || zti.NumCols() != phn_dim) - KALDI_LOG << "zti size = " << zti.NumRows() << ", " << zti.NumCols() - << "; ngauss = " << ngauss << "; phn dim = " << phn_dim; - if (nti.Dim() != ngauss) - KALDI_LOG << "nti dim = " << nti.Dim() << ", ngauss = " << ngauss; - */ - return (xt.Dim() != feat_dim || xti.NumRows() != ngauss - || xti.NumCols() != feat_dim || zti.NumRows() != ngauss - || zti.NumCols() != phn_dim || nti.Dim() != ngauss); - } -}; - - -struct SgmmPerSpkDerivedVars { - // To set this up, call ComputePerSpkDerivedVars from the sgmm object. - void Clear() { - v_s.Resize(0); - o_s.Resize(0, 0); - } - Vector v_s; ///< Speaker adaptation vector v_^{(s)}. Dim is [T] - Matrix o_s; ///< Per-speaker offsets o_{i}. Dimension is [I][D] -}; - - -/** \class AmSgmm - * Class for definition of the subspace Gmm acoustic model - */ -class AmSgmm { - public: - AmSgmm() {} - void Read(std::istream &rIn, bool binary); - void Write(std::ostream &out, bool binary, - SgmmWriteFlagsType write_params) const; - - /// Checks the various components for correct sizes. With wrong sizes, - /// assertion failure occurs. When the argument is set to true, dimensions of - /// the various components are printed. - void Check(bool show_properties = true); - - /// Initializes the SGMM parameters from a full-covariance UBM. - void InitializeFromFullGmm(const FullGmm &gmm, int32 num_states, - int32 phn_subspace_dim, int32 spk_subspace_dim); - - /// Used to copy models (useful in update) - void CopyFromSgmm(const AmSgmm &other, bool copy_normalizers); - - /// Copies the global parameters from the supplied model, but sets - /// the state vectors to zero. Supports reducing the phonetic - /// and speaker subspace dimensions. - void CopyGlobalsInitVecs(const AmSgmm &other, int32 phn_subspace_dim, - int32 spk_subspace_dim, int32 num_pdfs); - - /// Computes the top-scoring Gaussian indices (used for pruning of later - /// stages of computation). Returns frame log-likelihood given selected - /// Gaussians from full UBM. - BaseFloat GaussianSelection(const SgmmGselectConfig &config, - const VectorBase &data, - std::vector *gselect) const; - - /// As GaussianSelection, but limiting it to a provided list of - /// preselected Gaussians (e.g. for gender dependency). - /// The list "preselect" must be sorted and uniq. - BaseFloat GaussianSelectionPreselect(const SgmmGselectConfig &config, - const VectorBase &data, - const std::vector &preselect, - std::vector *gselect) const; - - /// This needs to be called with each new frame of data, prior to accumulation - /// or likelihood evaluation: it computes various pre-computed quantities. The - /// 'logdet_s' term is the log determinant of the FMLLR transform, or 0.0 if - /// no FMLLR is used or it's single-class fMLLR applied in the feature - /// extraction, and we're not keeping track of it here. - void ComputePerFrameVars(const VectorBase &data, - const std::vector &gselect, - const SgmmPerSpkDerivedVars &spk_vars, - BaseFloat logdet_s, - SgmmPerFrameDerivedVars *per_frame_vars) const; - - /// Computes the per-speaker derived vars; assumes vars->v_s is already - /// set up. - void ComputePerSpkDerivedVars(SgmmPerSpkDerivedVars *vars) const; - - /// This does a likelihood computation for a given state using the - /// top-scoring Gaussian components (in per_frame_vars). If the - /// log_prune parameter is nonzero (e.g. 5.0), the LogSumExp() stage is - /// pruned, which is a significant speedup... smaller values are faster. - BaseFloat LogLikelihood(const SgmmPerFrameDerivedVars &per_frame_vars, - int32 state_index, BaseFloat log_prune = 0.0) const; - - /// Similar to LogLikelihood() function above, but also computes the posterior - /// probabilities for the top-scoring Gaussian components and all substates. - BaseFloat ComponentPosteriors(const SgmmPerFrameDerivedVars &per_frame_vars, - int32 state, Matrix *post) const; - - /// Increases the total number of substates based on the state occupancies. - void SplitSubstates(const Vector &state_occupancies, - int32 target_nsubstates, - BaseFloat perturb, - BaseFloat power, - BaseFloat cond); - - /// Functions for increasing the phonetic and speaker space dimensions. - /// The argument norm_xform is a LDA-like feature normalizing transform, - /// computed by the ComputeFeatureNormalizer function. - void IncreasePhoneSpaceDim(int32 target_dim, - const Matrix &norm_xform); - void IncreaseSpkSpaceDim(int32 target_dim, - const Matrix &norm_xform); - - /// Computes (and initializes if necessary) derived vars... - /// for now this is just the normalizers "n" and the diagonal UBM. - void ComputeDerivedVars(); - - /// Computes the data-independent terms in the log-likelihood computation - /// for each Gaussian component and all substates. Eq. (31) - void ComputeNormalizers(); - - /// Computes the normalizers, while normalizing the weights to one - /// among each of the sets in "normalize_sets": these sets should - /// be disjoint and their union should be all the indices 0 ... I-1. - void ComputeNormalizersNormalized( - const std::vector< std::vector > &normalize_sets); - - /// Computes the LDA-like pre-transform and its inverse as well as the - /// eigenvalues of the scatter of the means used in FMLLR estimation. - void ComputeFmllrPreXform(const Vector &state_occs, - Matrix *xform, - Matrix *inv_xform, - Vector *diag_mean_scatter) const; - - /// Various model dimensions. - int32 NumPdfs() const { return c_.size(); } - int32 NumSubstates(int32 j) const { return c_[j].Dim(); } - int32 NumGauss() const { return M_.size(); } - int32 PhoneSpaceDim() const { return w_.NumCols(); } - int32 SpkSpaceDim() const { return (N_.size() > 0) ? N_[0].NumCols() : 0; } - int32 FeatureDim() const { return M_[0].NumRows(); } - - void RemoveSpeakerSpace() { N_.clear(); } - - /// Accessors - const FullGmm & full_ubm() const { return full_ubm_; } - const DiagGmm & diag_ubm() const { return diag_ubm_; } - - const Matrix& StateVectors(int32 state_index) const { - return v_[state_index]; - } - const SpMatrix& GetInvCovars(int32 gauss_index) const { - return SigmaInv_[gauss_index]; - } - const Matrix& GetPhoneProjection(int32 gauss_index) const { - return M_[gauss_index]; - } - - /// Templated accessors (used to accumulate in different precision) - template - void GetInvCovars(int32 gauss_index, SpMatrix *out) const; - - template - void GetSubstateMean(int32 j, int32 m, int32 i, - VectorBase *mean_out) const; - - template - void GetSubstateSpeakerMean(int32 state, int32 substate, int32 gauss, - const SgmmPerSpkDerivedVars &spk, - VectorBase *mean_out) const; - - template - void GetVarScaledSubstateSpeakerMean(int32 state, int32 substate, - int32 gauss, - const SgmmPerSpkDerivedVars &spk, - VectorBase *mean_out) const; - - template - void GetNtransSigmaInv(std::vector< Matrix > *out) const; - - /// Computes quantities H = M_i Sigma_i^{-1} M_i^T. - template - void ComputeH(std::vector< SpMatrix > *H_i) const; - - protected: - friend class ComputeNormalizersClass; - private: - /// Compute a subset of normalizers; used in multi-threaded implementation. - void ComputeNormalizersInternal(int32 num_threads, int32 thread, - int32 *entropy_count, double *entropy_sum); - - - /// Initializes the matrices M_ and w_ - void InitializeMw(int32 phn_subspace_dim, - const Matrix &norm_xform); - /// Initializes the matrices N_ - void InitializeN(int32 spk_subspace_dim, const Matrix &norm_xform); - void InitializeVecs(int32 num_states); ///< Initializes the state-vectors. - void InitializeCovars(); ///< initializes the within-class covariances. - - void ComputeSmoothingTermsFromModel( - const std::vector< SpMatrix > &H, - const Vector &state_occupancies, SpMatrix *H_sm, - BaseFloat max_cond) const; - - private: - /// These contain the "background" model associated with the subspace GMM. - DiagGmm diag_ubm_; - FullGmm full_ubm_; - - /// Globally shared parameters of the subspace GMM. - /// The various quantities are: I = number of Gaussians, D = data dimension, - /// S = phonetic subspace dimension, T = speaker subspace dimension, - /// J = number of states, M_{j} = number of substates of state j. - - /// Inverse within-class (full) covariances; dim is [I][D][D]. - std::vector< SpMatrix > SigmaInv_; - /// Phonetic-subspace projections. Dimension is [I][D][S] - std::vector< Matrix > M_; - /// Speaker-subspace projections. Dimension is [I][D][T] - std::vector< Matrix > N_; - /// Weight projection vectors. Dimension is [I][S] - Matrix w_; - - /// The parameters in a particular SGMM state. - - /// v_{jm}, per-state phonetic-subspace vectors. Dimension is [J][M_{j}][S]. - std::vector< Matrix > v_; - /// c_{jm}, mixture weights. Dimension is [J][M_{j}] - std::vector< Vector > c_; - /// n_{jim}, per-Gaussian normalizer. Dimension is [J][I][M_{j}] - std::vector< Matrix > n_; - - // Priors for MAP adaptation of M -- keeping them here for now but they may - // be moved somewhere else eventually - // These are parameters of a matrix-variate normal distribution. The means are - // the unadapted M_i, and we have 2 separate covaraince matrices for the rows - // and columns of M. - std::vector< Matrix > M_prior_; // Matrix-variate Gaussian mean - SpMatrix row_cov_inv_; - SpMatrix col_cov_inv_; - - KALDI_DISALLOW_COPY_AND_ASSIGN(AmSgmm); - friend class EbwAmSgmmUpdater; - friend class MleAmSgmmUpdater; - friend class MleSgmmSpeakerAccs; - friend class AmSgmmFunctions; // misc functions that need access. - friend class MleAmSgmmUpdaterMulti; -}; - -template -inline void AmSgmm::GetInvCovars(int32 gauss_index, - SpMatrix *out) const { - out->Resize(SigmaInv_[gauss_index].NumRows(), kUndefined); - out->CopyFromSp(SigmaInv_[gauss_index]); -} - -template -inline void AmSgmm::GetSubstateMean(int32 j, int32 m, int32 i, - VectorBase *mean_out) const { - KALDI_ASSERT(mean_out != NULL); - KALDI_ASSERT(j < NumPdfs() && m < NumSubstates(j) && i < NumGauss()); - KALDI_ASSERT(mean_out->Dim() == FeatureDim()); - Vector mean_tmp(FeatureDim()); - mean_tmp.AddMatVec(1.0, M_[i], kNoTrans, v_[j].Row(m), 0.0); - mean_out->CopyFromVec(mean_tmp); -} - - -template -inline void AmSgmm::GetSubstateSpeakerMean(int32 j, int32 m, int32 i, - const SgmmPerSpkDerivedVars &spk, - VectorBase *mean_out) const { - GetSubstateMean(j, m, i, mean_out); - if (spk.v_s.Dim() != 0) // have speaker adaptation... - mean_out->AddVec(1.0, spk.o_s.Row(i)); -} - -template -void AmSgmm::GetVarScaledSubstateSpeakerMean(int32 j, int32 m, int32 i, - const SgmmPerSpkDerivedVars &spk, - VectorBase *mean_out) const { - Vector tmp_mean(mean_out->Dim()), tmp_mean2(mean_out->Dim()); - GetSubstateSpeakerMean(j, m, i, spk, &tmp_mean); - tmp_mean2.AddSpVec(1.0, SigmaInv_[i], tmp_mean, 0.0); - mean_out->CopyFromVec(tmp_mean2); -} - - -/// Computes the inverse of an LDA transform (without dimensionality reduction) -/// The computed transform is used in initializing the phonetic and speaker -/// subspaces, as well as while increasing the dimensions of those spaces. -void ComputeFeatureNormalizer(const FullGmm &gmm, Matrix *xform); - - -/// This is the entry for a single time. -struct SgmmGauPostElement { - // Need gselect info here, since "posteriors" is relative to this set of - // selected Gaussians. - std::vector gselect; - std::vector tids; // transition-ids for each entry in "posteriors" - std::vector > posteriors; -}; - - -/// indexed by time. -class SgmmGauPost: public std::vector { - public: - // Add the standard Kaldi Read and Write routines so - // we can use KaldiObjectHolder with this type. - explicit SgmmGauPost(size_t i) : std::vector(i) {} - SgmmGauPost() {} - void Write(std::ostream &os, bool binary) const; - void Read(std::istream &is, bool binary); -}; - -typedef KaldiObjectHolder SgmmGauPostHolder; -typedef RandomAccessTableReader RandomAccessSgmmGauPostReader; -typedef SequentialTableReader SequentialSgmmGauPostReader; -typedef TableWriter SgmmGauPostWriter; - -/// Class for misc functions that need access to SGMM private variables. -class AmSgmmFunctions { - public: - /// Computes matrix of approximated K-L divergences, - /// of size [#states x #states], as described in - /// "State-Level Data Borrowing for Low-Resource Speech Recognition based on - /// Subspace GMMs", by Yanmin Qian et. al, Interspeech 2011. - /// Model must have one substate per state. - static void ComputeDistances(const AmSgmm &model, - const Vector &state_occs, - MatrixBase *dists); -}; - -} // namespace kaldi - - -#endif // KALDI_SGMM_AM_SGMM_H_ diff --git a/src/sgmm/decodable-am-sgmm.cc b/src/sgmm/decodable-am-sgmm.cc deleted file mode 100644 index a654d557781..00000000000 --- a/src/sgmm/decodable-am-sgmm.cc +++ /dev/null @@ -1,72 +0,0 @@ -// sgmm/decodable-am-sgmm.cc - -// Copyright 2009-2011 Saarland University; Lukas Burget - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -using std::vector; - -#include "sgmm/decodable-am-sgmm.h" - -namespace kaldi { - -BaseFloat DecodableAmSgmm::LogLikelihoodZeroBased(int32 frame, int32 pdf_id) { - KALDI_ASSERT(frame >= 0 && frame < NumFramesReady()); - KALDI_ASSERT(pdf_id >= 0 && pdf_id < NumIndices()); - - if (log_like_cache_[pdf_id].hit_time == frame) { - return log_like_cache_[pdf_id].log_like; // return cached value, if found - } - - const VectorBase &data = feature_matrix_.Row(frame); - // check if everything is in order - if (acoustic_model_.FeatureDim() != data.Dim()) { - KALDI_ERR << "Dim mismatch: data dim = " << data.Dim() - << "vs. model dim = " << acoustic_model_.FeatureDim(); - } - - if (frame != previous_frame_) { // Per-frame precomputation for SGMM. - if (gselect_all_.empty()) - acoustic_model_.GaussianSelection(sgmm_config_, data, &gselect_); - else { - KALDI_ASSERT(frame < gselect_all_.size()); - gselect_ = gselect_all_[frame]; - } - acoustic_model_.ComputePerFrameVars(data, gselect_, spk_, - 0.0 /*FMLLR logdet*/, &per_frame_vars_); - previous_frame_ = frame; - } - - BaseFloat loglike = acoustic_model_.LogLikelihood(per_frame_vars_, pdf_id, - log_prune_); - if (KALDI_ISNAN(loglike) || KALDI_ISINF(loglike)) - KALDI_ERR << "Invalid answer (overflow or invalid variances/features?)"; - log_like_cache_[pdf_id].log_like = loglike; - log_like_cache_[pdf_id].hit_time = frame; - return loglike; -} - -void DecodableAmSgmm::ResetLogLikeCache() { - if (log_like_cache_.size() != acoustic_model_.NumPdfs()) { - log_like_cache_.resize(acoustic_model_.NumPdfs()); - } - vector::iterator it = log_like_cache_.begin(), - end = log_like_cache_.end(); - for (; it != end; ++it) { it->hit_time = -1; } -} - -} // namespace kaldi diff --git a/src/sgmm/decodable-am-sgmm.h b/src/sgmm/decodable-am-sgmm.h deleted file mode 100644 index f5f21732a3a..00000000000 --- a/src/sgmm/decodable-am-sgmm.h +++ /dev/null @@ -1,119 +0,0 @@ -// sgmm/decodable-am-sgmm.h - -// Copyright 2009-2011 Saarland University; Microsoft Corporation; -// Lukas Burget - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_SGMM_DECODABLE_AM_SGMM_H_ -#define KALDI_SGMM_DECODABLE_AM_SGMM_H_ - -#include - -#include "base/kaldi-common.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "itf/decodable-itf.h" - -namespace kaldi { - -class DecodableAmSgmm : public DecodableInterface { - public: - DecodableAmSgmm(const SgmmGselectConfig &opts, - const AmSgmm &am, - const SgmmPerSpkDerivedVars &spk, // may be empty - const TransitionModel &tm, - const Matrix &feats, - const std::vector > &gselect_all, - BaseFloat log_prune): // gselect_all may be empty - acoustic_model_(am), sgmm_config_(opts), spk_(spk), - trans_model_(tm), feature_matrix_(feats), - gselect_all_(gselect_all), previous_frame_(-1), - log_prune_(log_prune) { - ResetLogLikeCache(); - } - - // Note, frames are numbered from zero, but transition indices are 1-based! - // This is for compatibility with OpenFST. - virtual BaseFloat LogLikelihood(int32 frame, int32 tid) { - return LogLikelihoodZeroBased(frame, trans_model_.TransitionIdToPdf(tid)); - } - int32 NumFramesReady() const { return feature_matrix_.NumRows(); } - virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); } - - virtual bool IsLastFrame(int32 frame) const { - KALDI_ASSERT(frame < NumFramesReady()); - return (frame == NumFramesReady() - 1); - } - - protected: - void ResetLogLikeCache(); - virtual BaseFloat LogLikelihoodZeroBased(int32 frame, int32 pdf_id); - - const AmSgmm &acoustic_model_; - const SgmmGselectConfig &sgmm_config_; - const SgmmPerSpkDerivedVars &spk_; - const TransitionModel &trans_model_; ///< for tid to pdf mapping - const Matrix &feature_matrix_; - const std::vector > gselect_all_; ///< if nonempty, - ///< precomputed gaussian indices. - int32 previous_frame_; - BaseFloat log_prune_; - - /// Defines a cache record for a state - struct LikelihoodCacheRecord { - BaseFloat log_like; ///< Cache value - int32 hit_time; ///< Frame for which this value is relevant - }; - - /// Cached per-frame quantities used in SGMM likelihood computation. - std::vector log_like_cache_; - std::vector gselect_; - SgmmPerFrameDerivedVars per_frame_vars_; - - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableAmSgmm); -}; - -class DecodableAmSgmmScaled : public DecodableAmSgmm { - public: - DecodableAmSgmmScaled(const SgmmGselectConfig &opts, - const AmSgmm &am, - const SgmmPerSpkDerivedVars &spk, // may be empty - const TransitionModel &tm, - const Matrix &feats, - const std::vector > &gselect_all, - // gselect_all may be empty - BaseFloat log_prune, - BaseFloat scale) - : DecodableAmSgmm(opts, am, spk, tm, feats, gselect_all, log_prune), - scale_(scale) {} - - // Note, frames are numbered from zero but transition-ids from one. - virtual BaseFloat LogLikelihood(int32 frame, int32 tid) { - return LogLikelihoodZeroBased(frame, trans_model_.TransitionIdToPdf(tid)) - * scale_; - } - - private: - BaseFloat scale_; - KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableAmSgmmScaled); -}; - - -} // namespace kaldi - -#endif // KALDI_SGMM_DECODABLE_AM_SGMM_H_ diff --git a/src/sgmm/estimate-am-sgmm-ebw.cc b/src/sgmm/estimate-am-sgmm-ebw.cc deleted file mode 100644 index 74b79694ec8..00000000000 --- a/src/sgmm/estimate-am-sgmm-ebw.cc +++ /dev/null @@ -1,654 +0,0 @@ -// sgmm/estimate-am-sgmm-ebw.cc - -// Copyright 2012 Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "sgmm/estimate-am-sgmm-ebw.h" -#include "thread/kaldi-thread.h" -using std::vector; - -namespace kaldi { - -void EbwAmSgmmUpdater::Update(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model, - SgmmUpdateFlagsType flags, - BaseFloat *auxf_change_out, - BaseFloat *count_out) { - - KALDI_ASSERT((flags & (kSgmmPhoneVectors | kSgmmPhoneProjections | - kSgmmPhoneWeightProjections | kSgmmCovarianceMatrix | - kSgmmSubstateWeights | kSgmmSpeakerProjections)) != 0); - - // Various quantities need to be computed at the start, before we - // change any of the model parameters. - std::vector< SpMatrix > Q_num, Q_den, H, S_means; - - if (flags & kSgmmPhoneProjections) { - MleAmSgmmUpdater::ComputeQ(num_accs, *model, &Q_num); - MleAmSgmmUpdater::ComputeQ(den_accs, *model, &Q_den); - } - if (flags & kSgmmCovarianceMatrix) { // compute the difference between - // the num and den S_means matrices... this is what we will need. - MleAmSgmmUpdater::ComputeSMeans(num_accs, *model, &S_means); - std::vector< SpMatrix > S_means_tmp; - MleAmSgmmUpdater::ComputeSMeans(den_accs, *model, &S_means_tmp); - for (size_t i = 0; i < S_means.size(); i++) - S_means[i].AddSp(-1.0, S_means_tmp[i]); - } - if (flags & (kSgmmPhoneVectors | kSgmmPhoneWeightProjections)) - model->ComputeH(&H); - - BaseFloat tot_impr = 0.0; - - if (flags & kSgmmPhoneVectors) - tot_impr += UpdatePhoneVectors(num_accs, den_accs, model, H); - - if (flags & kSgmmPhoneProjections) - tot_impr += UpdateM(num_accs, den_accs, Q_num, Q_den, model); - - if (flags & kSgmmPhoneWeightProjections) - tot_impr += UpdateWParallel(num_accs, den_accs, model); - - if (flags & kSgmmCovarianceMatrix) - tot_impr += UpdateVars(num_accs, den_accs, S_means, model); - - if (flags & kSgmmSubstateWeights) - tot_impr += UpdateSubstateWeights(num_accs, den_accs, model); - - if (flags & kSgmmSpeakerProjections) - tot_impr += UpdateN(num_accs, den_accs, model); - - - if (auxf_change_out) *auxf_change_out = tot_impr * num_accs.total_frames_; - if (count_out) *count_out = num_accs.total_frames_; - - if (fabs(num_accs.total_frames_ - den_accs.total_frames_) > - 0.01*(num_accs.total_frames_ + den_accs.total_frames_)) - KALDI_WARN << "Num and den frame counts differ, " - << num_accs.total_frames_ << " vs. " << den_accs.total_frames_; - - BaseFloat like_diff = num_accs.total_like_ - den_accs.total_like_; - - KALDI_LOG << "***Averaged differenced likelihood per frame is " - << (like_diff/num_accs.total_frames_) - << " over " << (num_accs.total_frames_) << " frames."; - KALDI_LOG << "***Note: for this to be at all meaningful, if you use " - << "\"canceled\" stats you will have to renormalize this over " - << "the \"real\" frame count."; - - model->ComputeNormalizers(); -} - - -class EbwUpdatePhoneVectorsClass: public MultiThreadable { // For multi-threaded. - public: - EbwUpdatePhoneVectorsClass(const EbwAmSgmmUpdater *updater, - const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model, - const std::vector > &H, - double *auxf_impr): - updater_(updater), num_accs_(num_accs), den_accs_(den_accs), - model_(model), H_(H), auxf_impr_ptr_(auxf_impr), auxf_impr_(0.0) { } - - ~EbwUpdatePhoneVectorsClass() { - *auxf_impr_ptr_ += auxf_impr_; - } - - inline void operator() () { - // Note: give them local copy of the sums we're computing, - // which will be propagated to the total sums in the destructor. - updater_->UpdatePhoneVectorsInternal(num_accs_, den_accs_, model_, H_, - &auxf_impr_, num_threads_, thread_id_); - } - private: - const EbwAmSgmmUpdater *updater_; - const MleAmSgmmAccs &num_accs_; - const MleAmSgmmAccs &den_accs_; - AmSgmm *model_; - const std::vector > &H_; - double *auxf_impr_ptr_; - double auxf_impr_; -}; - - -void EbwAmSgmmUpdater::ComputePhoneVecStats( - const MleAmSgmmAccs &accs, - const AmSgmm &model, - const std::vector > &H, - int32 j, - int32 m, - const Vector &w_jm, - double gamma_jm, - Vector *g_jm, - SpMatrix *H_jm) { - g_jm->CopyFromVec(accs.y_[j].Row(m)); - for (int32 i = 0; i < accs.num_gaussians_; i++) { - double gamma_jmi = accs.gamma_[j](m, i); - double quadratic_term = std::max(gamma_jmi, gamma_jm * w_jm(i)); - double scalar = gamma_jmi - gamma_jm * w_jm(i) + quadratic_term - * VecVec(model.w_.Row(i), model.v_[j].Row(m)); - g_jm->AddVec(scalar, model.w_.Row(i)); - if (gamma_jmi != 0.0) - H_jm->AddSp(gamma_jmi, H[i]); // The most important term.. - if (quadratic_term > 1.0e-10) - H_jm->AddVec2(static_cast(quadratic_term), model.w_.Row(i)); - } -} - - -// Runs the phone vectors update for a subset of states (called -// multi-threaded). -void EbwAmSgmmUpdater::UpdatePhoneVectorsInternal( - const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model, - const std::vector > &H, - double *auxf_impr, - int32 num_threads, - int32 thread_id) const { - - int32 block_size = (num_accs.num_states_ + (num_threads-1)) / num_threads, - j_start = block_size * thread_id, - j_end = std::min(num_accs.num_states_, j_start + block_size); - - int32 S = num_accs.phn_space_dim_, I = num_accs.num_gaussians_; - - for (int32 j = j_start; j < j_end; j++) { - double num_state_count = 0.0, - state_auxf_impr = 0.0; - Vector w_jm(I); - for (int32 m = 0; m < model->NumSubstates(j); m++) { - double gamma_jm_num = num_accs.gamma_[j].Row(m).Sum(); - double gamma_jm_den = den_accs.gamma_[j].Row(m).Sum(); - num_state_count += gamma_jm_num; - Vector g_jm_num(S); // computed using eq. 58 of SGMM paper [for numerator stats] - SpMatrix H_jm_num(S); // computed using eq. 59 of SGMM paper [for numerator stats] - Vector g_jm_den(S); // same, but for denominator stats. - SpMatrix H_jm_den(S); - - // Compute the weights for this sub-state. - // w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7) - w_jm.AddMatVec(1.0, Matrix(model->w_), kNoTrans, - Vector(model->v_[j].Row(m)), 0.0); - w_jm.ApplySoftMax(); - - ComputePhoneVecStats(num_accs, *model, H, j, m, w_jm, gamma_jm_num, - &g_jm_num, &H_jm_num); - ComputePhoneVecStats(den_accs, *model, H, j, m, w_jm, gamma_jm_den, - &g_jm_den, &H_jm_den); - - Vector v_jm(model->v_[j].Row(m)); - Vector local_derivative(S); // difference of derivative of numerator - // and denominator objetive function. - local_derivative.AddVec(1.0, g_jm_num); - local_derivative.AddSpVec(-1.0, H_jm_num, v_jm, 1.0); - local_derivative.AddVec(-1.0, g_jm_den); - local_derivative.AddSpVec(-1.0 * -1.0, H_jm_den, v_jm, 1.0); - - SpMatrix quadratic_term(H_jm_num); - quadratic_term.AddSp(1.0, H_jm_den); - double substate_count = 1.0e-10 + gamma_jm_num + gamma_jm_den; - quadratic_term.Scale( (substate_count + options_.tau_v) / substate_count); - quadratic_term.Scale(1.0 / (options_.lrate_v + 1.0e-10) ); - - Vector delta_v_jm(S); - - SolverOptions opts; - opts.name = "v"; - opts.K = options_.max_cond; - opts.eps = options_.epsilon; - - double auxf_impr = - ((gamma_jm_num + gamma_jm_den == 0) ? 0.0 : - SolveQuadraticProblem(quadratic_term, local_derivative, - opts, &delta_v_jm)); - - v_jm.AddVec(1.0, delta_v_jm); - model->v_[j].Row(m).CopyFromVec(v_jm); - state_auxf_impr += auxf_impr; - } - - *auxf_impr += state_auxf_impr; - if (j < 10 && thread_id == 0) { - KALDI_LOG << "Objf impr for state j = " << j << " is " - << (state_auxf_impr / (num_state_count + 1.0e-10)) - << " over " << num_state_count << " frames"; - } - } -} - -double EbwAmSgmmUpdater::UpdatePhoneVectors(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model, - const vector< SpMatrix > &H) const { - KALDI_LOG << "Updating phone vectors."; - - double count = 0.0, auxf_impr = 0.0; - - int32 J = num_accs.num_states_; - for (int32 j = 0; j < J; j++) count += num_accs.gamma_[j].Sum(); - - EbwUpdatePhoneVectorsClass c(this, num_accs, den_accs, model, H, &auxf_impr); - RunMultiThreaded(c); - - auxf_impr /= count; - - KALDI_LOG << "**Overall auxf improvement for v is " << auxf_impr - << " over " << count << " frames"; - return auxf_impr; -} - - -double EbwAmSgmmUpdater::UpdateM(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - const std::vector< SpMatrix > &Q_num, - const std::vector< SpMatrix > &Q_den, - AmSgmm *model) const { - int32 S = model->PhoneSpaceDim(), - D = model->FeatureDim(), - I = model->NumGauss(); - - Vector num_count_vec(I), den_count_vec(I), impr_vec(I); - for (int32 j = 0; j < num_accs.num_states_; j++) { - num_count_vec.AddRowSumMat(1.0, num_accs.gamma_[j]); - den_count_vec.AddRowSumMat(1.0, den_accs.gamma_[j]); - } - - for (int32 i = 0; i < I; i++) { - double gamma_i_num = num_count_vec(i), gamma_i_den = den_count_vec(i); - - if (gamma_i_num + gamma_i_den == 0.0) { - KALDI_WARN << "Not updating phonetic basis for i = " << i - << " because count is zero. "; - continue; - } - - Matrix Mi(model->M_[i]); - Matrix L(D, S); // this is something like the Y quantity, which - // represents the linear term in the objf on M-- except that we make it the local - // derivative about the current value, instead of the derivative around zero. - // But it's not exactly the derivative w.r.t. M, due to the factor of Sigma_i. - // The auxiliary function is Q(x) = tr(M^T P Y) - 0.5 tr(P M Q M^T), - // where P is Y^{-1}. The quantity L we define here will be Y - M Q, - // and you can think of this as like the local derivative, except there is - // a term P in there. - L.AddMat(1.0, num_accs.Y_[i]); - L.AddMatSp(-1.0, Mi, kNoTrans, Q_num[i], 1.0); - L.AddMat(-1.0, den_accs.Y_[i]); - L.AddMatSp(-1.0*-1.0, Mi, kNoTrans, Q_den[i], 1.0); - - SpMatrix Q(S); // This is a combination of the Q's for the numerator and denominator. - Q.AddSp(1.0, Q_num[i]); - Q.AddSp(1.0, Q_den[i]); - - double state_count = 1.0e-10 + gamma_i_num + gamma_i_den; // the count - // represented by the quadratic part of the stats. - Q.Scale( (state_count + options_.tau_M) / state_count ); - Q.Scale( 1.0 / (options_.lrate_M + 1.0e-10) ); - - SolverOptions opts; - opts.name = "M"; - opts.K = options_.max_cond; - opts.eps = options_.epsilon; - - Matrix deltaM(D, S); - double impr = - SolveQuadraticMatrixProblem(Q, L, - SpMatrix(model->SigmaInv_[i]), - opts, &deltaM); - - impr_vec(i) = impr; - Mi.AddMat(1.0, deltaM); - model->M_[i].CopyFromMat(Mi); - if (i < 10 || impr / state_count > 3.0) { - KALDI_LOG << "Objf impr for projection M for i = " << i << ", is " - << (impr/(gamma_i_num + 1.0e-20)) << " over " << gamma_i_num - << " frames"; - } - } - BaseFloat tot_count = num_count_vec.Sum(), tot_impr = impr_vec.Sum(); - - tot_impr /= (tot_count + 1.0e-20); - KALDI_LOG << "Overall auxiliary function improvement for model projections " - << "M is " << tot_impr << " over " << tot_count << " frames"; - - KALDI_VLOG(1) << "Updating M: num-count is " << num_count_vec; - KALDI_VLOG(1) << "Updating M: den-count is " << den_count_vec; - KALDI_VLOG(1) << "Updating M: objf-impr is " << impr_vec; - - return tot_impr; -} - - -// Note: we do just one iteration of the weight-projection update here. The -// weak-sense auxiliary functions used don't really make sense if we do it for -// multiple iterations. It would be possible to use a similar auxiliary -// function to the one on my (D. Povey)'s thesis for the Gaussian mixture -// weights, which would make sense for multiple iterations, but this would be a -// bit more complex to implement and probably would not give much improvement -// over this approach. -double EbwAmSgmmUpdater::UpdateWParallel(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model) { - KALDI_LOG << "Updating weight projections"; - - int32 I = num_accs.num_gaussians_, S = num_accs.phn_space_dim_; - - Matrix g_i_num(I, S), g_i_den(I, S); - - // View F_i_{num,den} as vectors of SpMatrix [i.e. symmetric matrices, - // linearized into vectors] - Matrix F_i_num(I, (S*(S+1))/2), F_i_den(I, (S*(S+1))/2); - - Vector num_count_vec(I), den_count_vec(I), impr_vec(I); - for (int32 j = 0; j < num_accs.num_states_; j++) { - num_count_vec.AddRowSumMat(1.0, num_accs.gamma_[j]); - den_count_vec.AddRowSumMat(1.0, den_accs.gamma_[j]); - } - - // Get the F_i and g_i quantities-- this is done in parallel (multi-core), - // using the same code we use in the ML update [except we get it for - // numerator and denominator separately.] - Matrix w(model->w_); - { - double garbage; - UpdateWParallelClass c_num(num_accs, *model, w, &F_i_num, &g_i_num, &garbage); - RunMultiThreaded(c_num); - } - { - double garbage; - UpdateWParallelClass c_den(den_accs, *model, w, &F_i_den, &g_i_den, &garbage); - RunMultiThreaded(c_den); - } - - for (int32 i = 0; i < I; i++) { - - // auxf was originally formulated in terms of the change in w (i.e. the - // g quantities are the local derivatives), so there is less hassle than - // with some of the other updates, in changing it to be discriminative. - // we essentially just difference the linear terms and add the quadratic - // terms. - - Vector derivative(g_i_num.Row(i)); - derivative.AddVec(-1.0, g_i_den.Row(i)); - // F_i_num quadratic_term is a bit like the negated 2nd derivative - // of the numerator stats-- actually it's not the actual 2nd deriv, - // but an upper bound on it. - SpMatrix quadratic_term(S), tmp_F(S); - quadratic_term.CopyFromVec(F_i_num.Row(i)); - tmp_F.CopyFromVec(F_i_den.Row(i)); // tmp_F is used for Vector->SpMatrix conversion. - quadratic_term.AddSp(1.0, tmp_F); - - double state_count = num_count_vec(i) + den_count_vec(i); - - quadratic_term.Scale((state_count + options_.tau_w) / (state_count + 1.0e-10)); - quadratic_term.Scale(1.0 / (options_.lrate_w + 1.0e-10) ); - - Vector delta_w(S); - - SolverOptions opts; - opts.name = "w"; - opts.K = options_.max_cond; - opts.eps = options_.epsilon; - - double objf_impr = - SolveQuadraticProblem(quadratic_term, derivative, opts, &delta_w); - - impr_vec(i) = objf_impr; - if (i < 10 || objf_impr / (num_count_vec(i) + 1.0e-10) > 2.0) { - KALDI_LOG << "Predicted objf impr for w per frame is " - << (objf_impr / (num_count_vec(i) + 1.0e-10)) - << " over " << num_count_vec(i) << " frames."; - } - model->w_.Row(i).AddVec(1.0, delta_w); - } - KALDI_VLOG(1) << "Updating w: numerator count is " << num_count_vec; - KALDI_VLOG(1) << "Updating w: denominator count is " << den_count_vec; - KALDI_VLOG(1) << "Updating w: objf-impr is " << impr_vec; - - double tot_num_count = num_count_vec.Sum(), tot_impr = impr_vec.Sum(); - tot_impr /= tot_num_count; - - KALDI_LOG << "**Overall objf impr for w per frame is " - << tot_impr << " over " << tot_num_count - << " frames."; - return tot_impr; -} - - -double EbwAmSgmmUpdater::UpdateN(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model) const { - if (num_accs.spk_space_dim_ == 0 || num_accs.R_.size() == 0 || - num_accs.Z_.size() == 0) { - KALDI_ERR << "Speaker subspace dim is zero or no stats accumulated"; - } - - int32 I = num_accs.num_gaussians_, D = num_accs.feature_dim_, - T = num_accs.spk_space_dim_; - - Vector num_count_vec(I), den_count_vec(I), impr_vec(I); - for (int32 j = 0; j < num_accs.num_states_; j++) { - num_count_vec.AddRowSumMat(1.0, num_accs.gamma_[j]); - den_count_vec.AddRowSumMat(1.0, den_accs.gamma_[j]); - } - - for (int32 i = 0; i < I; i++) { - double gamma_i_num = num_count_vec(i), gamma_i_den = den_count_vec(i); - if (gamma_i_num + gamma_i_den == 0.0) { - KALDI_WARN << "Not updating speaker basis for i = " << i - << " because count is zero. "; - continue; - } - Matrix Ni(model->N_[i]); - // See comment near declaration of L in UpdateM(). This update is the - // same, but change M->N, Y->Z and Q->R. - - Matrix L(D, T); - L.AddMat(1.0, num_accs.Z_[i]); - L.AddMatSp(-1.0, Ni, kNoTrans, num_accs.R_[i], 1.0); - L.AddMat(-1.0, den_accs.Z_[i]); - L.AddMatSp(-1.0*-1.0, Ni, kNoTrans, den_accs.R_[i], 1.0); - - SpMatrix R(T); // combination of the numerator and denominator R's. - R.AddSp(1.0, num_accs.R_[i]); - R.AddSp(1.0, den_accs.R_[i]); - - double state_count = 1.0e-10 + gamma_i_num + gamma_i_den; // the count - // represented by the quadratic part of the stats. - R.Scale( (state_count + options_.tau_N) / state_count ); - R.Scale( 1.0 / (options_.lrate_N + 1.0e-10) ); - - Matrix deltaN(D, T); - - SolverOptions opts; - opts.name = "M"; - opts.K = options_.max_cond; - opts.eps = options_.epsilon; - - double impr = - SolveQuadraticMatrixProblem(R, L, - SpMatrix(model->SigmaInv_[i]), - opts, &deltaN); - - impr_vec(i) = impr; - Ni.AddMat(1.0, deltaN); - model->N_[i].CopyFromMat(Ni); - if (i < 10 || impr / (state_count+1.0e-20) > 3.0) { - KALDI_LOG << "Objf impr for spk projection N for i = " << (i) - << ", is " << (impr / (gamma_i_num + 1.0e-20)) << " over " - << gamma_i_num << " frames"; - } - } - - KALDI_VLOG(1) << "Updating N: numerator count is " << num_count_vec; - KALDI_VLOG(1) << "Updating N: denominator count is " << den_count_vec; - KALDI_VLOG(1) << "Updating N: objf-impr is " << impr_vec; - - double tot_count = num_count_vec.Sum(), tot_impr = impr_vec.Sum(); - tot_impr /= (tot_count + 1.0e-20); - KALDI_LOG << "**Overall auxf impr for N is " << tot_impr - << " over " << tot_count << " frames"; - return tot_impr; -} - -double EbwAmSgmmUpdater::UpdateVars(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - const std::vector< SpMatrix > &S_means, - AmSgmm *model) const { - // Note: S_means contains not only the quantity S_means in the paper, - // but also has a term - (Y_i M_i^T + M_i Y_i^T). Plus, it is differenced - // between numerator and denominator. We don't calculate it here, - // because it had to be computed with the original model, before we - // changed the M quantities. - int32 I = num_accs.num_gaussians_; - KALDI_ASSERT(S_means.size() == I); - - Vector num_count_vec(I), den_count_vec(I), impr_vec(I); - for (int32 j = 0; j < num_accs.num_states_; j++) { - num_count_vec.AddRowSumMat(1.0, num_accs.gamma_[j]); - den_count_vec.AddRowSumMat(1.0, den_accs.gamma_[j]); - } - - for (int32 i = 0; i < I; i++) { - double num_count = num_count_vec(i), den_count = den_count_vec(i); - - SpMatrix SigmaStats(S_means[i]); - SigmaStats.AddSp(1.0, num_accs.S_[i]); - SigmaStats.AddSp(-1.0, den_accs.S_[i]); - // SigmaStats now contain the stats for estimating Sigma (as in the main SGMM paper), - // differenced between num and den. - SpMatrix SigmaInvOld(model->SigmaInv_[i]), SigmaOld(model->SigmaInv_[i]); - SigmaOld.Invert(); - double count = num_count - den_count; - KALDI_ASSERT(options_.lrate_Sigma <= 1.0); - double inv_lrate = 1.0 / options_.lrate_Sigma; - // These formulas assure that the objective function behaves in - // a roughly symmetric way w.r.t. num and den counts. - double E_den = 1.0 + inv_lrate, E_num = inv_lrate - 1.0; - - double smoothing_count = - (options_.tau_Sigma * inv_lrate) + // multiply tau_Sigma by inverse-lrate - (E_den * den_count) + // for compatibility with other updates. - (E_num * num_count) + - 1.0e-10; - SigmaStats.AddSp(smoothing_count, SigmaOld); - count += smoothing_count; - SigmaStats.Scale(1.0 / count); - SpMatrix SigmaInv(SigmaStats); // before floor and ceiling. Currently sigma, - // not its inverse. - bool verbose = false; - int n_floor = SigmaInv.ApplyFloor(SigmaOld, options_.cov_min_value, verbose); - SigmaInv.Invert(); // make it inverse variance. - int n_ceiling = SigmaInv.ApplyFloor(SigmaInvOld, options_.cov_min_value, verbose); - - // this auxf_change. - double auxf_change = -0.5 * count *(TraceSpSp(SigmaInv, SigmaStats) - - TraceSpSp(SigmaInvOld, SigmaStats) - - SigmaInv.LogDet() - + SigmaInvOld.LogDet()); - - model->SigmaInv_[i].CopyFromSp(SigmaInv); - impr_vec(i) = auxf_change; - if (i < 10 || auxf_change / (num_count+den_count+1.0e-10) > 2.0 - || n_floor+n_ceiling > 0) { - KALDI_LOG << "Updating variance: Auxf change per frame for Gaussian " - << i << " is " << (auxf_change / num_count) << " over " - << num_count << " frames " << "(den count was " << den_count - << "), #floor,ceil was " << n_floor << ", " << n_ceiling; - } - } - KALDI_VLOG(1) << "Updating Sigma: numerator count is " << num_count_vec; - KALDI_VLOG(1) << "Updating Sigma: denominator count is " << den_count_vec; - KALDI_VLOG(1) << "Updating Sigma: objf-impr is " << impr_vec; - - double tot_count = num_count_vec.Sum(), tot_impr = impr_vec.Sum(); - tot_impr /= tot_count+1.0e-20; - KALDI_LOG << "**Overall auxf impr for Sigma is " << tot_impr - << " over " << tot_count << " frames"; - return tot_impr; -} - - -double EbwAmSgmmUpdater::UpdateSubstateWeights( - const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model) { - KALDI_LOG << "Updating substate mixture weights"; - - double tot_count = 0.0, tot_impr = 0.0; - for (int32 j = 0; j < num_accs.num_states_; j++) { - int32 M = model->NumSubstates(j); - Vector num_occs(M), den_occs(M), - orig_weights(model->c_[j]), weights(model->c_[j]); - - for (int32 m = 0; m < M; m++) { - num_occs(m) = num_accs.gamma_[j].Row(m).Sum() - + options_.tau_c * weights(m); - den_occs(m) = den_accs.gamma_[j].Row(m).Sum(); - } - - if (weights.Dim() > 1) { - double begin_auxf = 0.0, end_auxf = 0.0; - for (int32 m = 0; m < M; m++) { // see eq. 4.32, Dan Povey's PhD thesis. - begin_auxf += num_occs(m) * log (weights(m)) - - den_occs(m) * weights(m) / orig_weights(m); - } - for (int32 iter = 0; iter < 50; iter++) { - Vector k_jm(M); - double max_m = 0.0; - for (int32 m = 0; m < M; m++) - max_m = std::max(max_m, den_occs(m)/orig_weights(m)); - for (int32 m = 0; m < M; m++) - k_jm(m) = max_m - den_occs(m)/orig_weights(m); - for (int32 m = 0; m < M; m++) - weights(m) = num_occs(m) + k_jm(m)*weights(m); - weights.Scale(1.0 / weights.Sum()); - } - for (int32 m = 0; m < M; m++) - weights(m) = std::max(weights(m), - static_cast(options_.min_substate_weight)); - weights.Scale(1.0 / weights.Sum()); // renormalize. - - for (int32 m = 0; m < M; m++) { - end_auxf += num_occs(m) * log (weights(m)) - - den_occs(m) * weights(m) / orig_weights(m); - } - tot_impr += end_auxf - begin_auxf; - double this_impr = ((end_auxf - begin_auxf) / num_occs.Sum()); - if (j < 10 || this_impr > 0.5) { - KALDI_LOG << "Updating substate weights: auxf impr for state " << j - << " is " << this_impr << " per frame over " << num_occs.Sum() - << " frames (den count is " << den_occs.Sum() << ")"; - } - } - model->c_[j].CopyFromVec(weights); - tot_count += den_occs.Sum(); // Note: num and den occs should be the - // same, except num occs are smoothed, so this is what we want. - } - - tot_impr /= (tot_count + 1.0e-20); - - KALDI_LOG << "**Overall auxf impr for c is " << tot_impr - << " over " << tot_count << " frames"; - return tot_impr; -} - -} // namespace kaldi diff --git a/src/sgmm/estimate-am-sgmm-ebw.h b/src/sgmm/estimate-am-sgmm-ebw.h deleted file mode 100644 index d437dbe06a0..00000000000 --- a/src/sgmm/estimate-am-sgmm-ebw.h +++ /dev/null @@ -1,217 +0,0 @@ -// sgmm/estimate-am-sgmm-ebw.h - -// Copyright 2012 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_SGMM_ESTIMATE_AM_SGMM_EBW_H_ -#define KALDI_SGMM_ESTIMATE_AM_SGMM_EBW_H_ 1 - -#include -#include - -#include "gmm/model-common.h" -#include "itf/options-itf.h" -#include "sgmm/estimate-am-sgmm.h" - -namespace kaldi { - -/** - This header implements a form of Extended Baum-Welch training for SGMMs. - If you are confused by this comment, see Dan Povey's thesis for an explanation of - Extended Baum-Welch. - A note on the EBW (Extended Baum-Welch) updates for the SGMMs... In general there is - a parameter-specific value D that is similar to the D in EBW for GMMs. The value of - D is generally set to: - E * (denominator-count for that parameter) + tau-value for that parameter - where the tau-values are user-specified parameters that are specific to the type of - the parameter (e.g. phonetic vector, subspace projection, etc.). Things are a bit - more complex for this update than for GMMs, because it's not just a question of picking - a tau-value for smoothing: there is sometimes a scatter-matrix of some kind (e.g. - an outer product of vectors, or something) that defines a quadratic objective function - that we'll add as smoothing. We have to pick where to get this scatter-matrix from. - We feel that it's appropriate for the "E" part of the D to get its scatter-matrix from - denominator stats, and the tau part of the D to get half its scatter-matrix from the - both the numerator and denominator stats, assigned a weight proportional to how much - stats there were. When you see the auxiliary function written out, it's clear why this - makes sense. - - */ - -struct EbwAmSgmmOptions { - BaseFloat tau_v; ///< Smoothing constant for updates of sub-state vectors v_{jm} - BaseFloat lrate_v; ///< Learning rate used in updating v-- default 0.5 - BaseFloat tau_M; ///< Smoothing constant for the M quantities (phone-subspace projections) - BaseFloat lrate_M; ///< Learning rate used in updating M-- default 0.5 - BaseFloat tau_N; ///< Smoothing constant for the N quantities (speaker-subspace projections) - BaseFloat lrate_N; ///< Learning rate used in updating N-- default 0.5 - BaseFloat tau_c; ///< Tau value for smoothing substate weights (c) - BaseFloat tau_w; ///< Tau value for smoothing update of weight projectsions (w) - BaseFloat lrate_w; ///< Learning rate used in updating w-- default 0.5 - BaseFloat tau_Sigma; ///< Tau value for smoothing covariance-matrices Sigma. - BaseFloat lrate_Sigma; ///< Learning rate used in updating Sigma-- default 0.5 - BaseFloat min_substate_weight; ///< Minimum allowed weight in a sub-state. - - BaseFloat cov_min_value; ///< E.g. 0.5-- the maximum any eigenvalue of a covariance - /// is allowed to change. [this is the minimum; the maximum is the inverse of this, - /// i.e. 2.0 in this case. For example, 0.9 would constrain the covariance quite tightly, - /// 0.1 would be a loose setting. - - BaseFloat max_cond; ///< large value used in SolveQuadraticProblem. - BaseFloat epsilon; ///< very small value used in SolveQuadraticProblem; workaround - /// for an issue in some implementations of SVD. - - EbwAmSgmmOptions() { - tau_v = 50.0; - lrate_v = 0.5; - tau_M = 500.0; - lrate_M = 0.5; - tau_N = 500.0; - lrate_N = 0.5; - tau_c = 10.0; - tau_w = 50.0; - lrate_w = 1.0; - tau_Sigma = 500.0; - lrate_Sigma = 0.5; - - min_substate_weight = 1.0e-05; - cov_min_value = 0.5; - - max_cond = 1.0e+05; - epsilon = 1.0e-40; - } - - void Register(OptionsItf *opts) { - std::string module = "EbwAmSgmmOptions: "; - opts->Register("tau-v", &tau_v, module+ - "Smoothing constant for phone vector estimation."); - opts->Register("lrate-v", &lrate_v, module+ - "Learning rate constant for phone vector estimation."); - opts->Register("tau-m", &tau_M, module+ - "Smoothing constant for estimation of phonetic-subspace projections (M)."); - opts->Register("lrate-m", &lrate_M, module+ - "Learning rate constant for phonetic-subspace projections."); - opts->Register("tau-n", &tau_N, module+ - "Smoothing constant for estimation of speaker-subspace projections (N)."); - opts->Register("lrate-n", &lrate_N, module+ - "Learning rate constant for speaker-subspace projections."); - opts->Register("tau-c", &tau_c, module+ - "Smoothing constant for estimation of substate weights (c)"); - opts->Register("tau-w", &tau_w, module+ - "Smoothing constant for estimation of weight projections (w)"); - opts->Register("lrate-w", &lrate_w, module+ - "Learning rate constant for weight-projections"); - opts->Register("tau-sigma", &tau_Sigma, module+ - "Smoothing constant for estimation of within-class covariances (Sigma)"); - opts->Register("lrate-sigma", &lrate_Sigma, module+ - "Constant that controls speed of learning for variances (larger->slower)"); - opts->Register("cov-min-value", &cov_min_value, module+ - "Minimum value that an eigenvalue of the updated covariance matrix can take, " - "relative to its old value (maximum is inverse of this.)"); - opts->Register("min-substate-weight", &min_substate_weight, module+ - "Floor for weights of sub-states."); - opts->Register("max-cond", &max_cond, module+ - "Value used in handling singular matrices during update."); - opts->Register("epsilon", &max_cond, module+ - "Value used in handling singular matrices during update."); - } -}; - - -/** \class EbwAmSgmmUpdater - * Contains the functions needed to update the SGMM parameters. - */ -class EbwAmSgmmUpdater { - public: - explicit EbwAmSgmmUpdater(const EbwAmSgmmOptions &options): - options_(options) {} - - void Update(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model, - SgmmUpdateFlagsType flags, - BaseFloat *auxf_change_out, - BaseFloat *count_out); - - protected: - // The following two classes relate to multi-core parallelization of some - // phases of the update. - friend class EbwUpdateWParallelClass; - friend class EbwUpdatePhoneVectorsClass; - private: - EbwAmSgmmOptions options_; - - Vector gamma_j_; ///< State occupancies - - double UpdatePhoneVectors(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model, - const std::vector< SpMatrix > &H) const; - - // Called from UpdatePhoneVectors; updates a subset of states - // (relates to multi-threading). - void UpdatePhoneVectorsInternal(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model, - const std::vector > &H, - double *auxf_impr, - int32 num_threads, - int32 thread_id) const; - // Called from UpdatePhoneVectorsInternal - static void ComputePhoneVecStats(const MleAmSgmmAccs &accs, - const AmSgmm &model, - const std::vector > &H, - int32 j, - int32 m, - const Vector &w_jm, - double gamma_jm, - Vector *g_jm, - SpMatrix *H_jm); - - double UpdateM(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - const std::vector< SpMatrix > &Q_num, - const std::vector< SpMatrix > &Q_den, - AmSgmm *model) const; - - double UpdateN(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model) const; - - double UpdateVars(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - const std::vector< SpMatrix > &S_means, - AmSgmm *model) const; - - /// Note: in the discriminative case we do just one iteration of - /// updating the w quantities. - double UpdateWParallel(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model); - - double UpdateSubstateWeights(const MleAmSgmmAccs &num_accs, - const MleAmSgmmAccs &den_accs, - AmSgmm *model); - - KALDI_DISALLOW_COPY_AND_ASSIGN(EbwAmSgmmUpdater); - EbwAmSgmmUpdater() {} // Prevent unconfigured updater. -}; - - -} // namespace kaldi - - -#endif // KALDI_SGMM_ESTIMATE_AM_SGMM_EBW_H_ diff --git a/src/sgmm/estimate-am-sgmm-multi-test.cc b/src/sgmm/estimate-am-sgmm-multi-test.cc deleted file mode 100644 index 883934a3ce0..00000000000 --- a/src/sgmm/estimate-am-sgmm-multi-test.cc +++ /dev/null @@ -1,154 +0,0 @@ -// sgmm/estimate-am-sgmm-multi-test.cc - -// Copyright 2009-2012 Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "gmm/model-test-common.h" -#include "sgmm/am-sgmm.h" -#include "sgmm/estimate-am-sgmm.h" -#include "sgmm/estimate-am-sgmm-multi.h" -#include "util/kaldi-io.h" -#include "base/kaldi-math.h" - -using kaldi::AmSgmm; -using kaldi::MleAmSgmmAccs; -using kaldi::BaseFloat; -using kaldi::Exp; - -namespace ut = kaldi::unittest; - -// Tests the MleAmSgmmUpdaterMulti (and MleAmSgmmGlobalAccs) classes. -void TestMultiSgmmEst(const std::vector &models, - const std::vector< kaldi::Matrix > &feats, - kaldi::SgmmUpdateFlagsType flags) { - using namespace kaldi; - typedef kaldi::int32 int32; - - int32 num_gauss = models[0]->NumGauss(), - feat_dim = models[0]->FeatureDim(), - phn_dim = models[0]->PhoneSpaceDim(), - spk_dim = models[0]->SpkSpaceDim(), - num_models = models.size(); - SgmmPerFrameDerivedVars frame_vars; - SgmmPerSpkDerivedVars spk_vars; - spk_vars.v_s.Resize(spk_dim); - spk_vars.v_s.SetRandn(); - SgmmGselectConfig sgmm_config; - frame_vars.Resize(num_gauss, feat_dim, phn_dim); - sgmm_config.full_gmm_nbest = std::min(sgmm_config.full_gmm_nbest, num_gauss); - - std::vector accs(num_models); - BaseFloat loglike = 0.0; - for (int32 i = 0; i < num_models; ++i) { - MleAmSgmmAccs* acc = new MleAmSgmmAccs(*models[i], flags); - models[i]->ComputePerSpkDerivedVars(&spk_vars); - for (int32 f = 0; f < feats[i].NumRows(); ++f) { - std::vector gselect; - models[i]->GaussianSelection(sgmm_config, feats[i].Row(f), &gselect); - models[i]->ComputePerFrameVars(feats[i].Row(f), gselect, spk_vars, 0.0, - &frame_vars); - loglike += acc->Accumulate(*models[i], frame_vars, spk_vars.v_s, 0, 1.0, - flags); - } - acc->CommitStatsForSpk(*models[i], spk_vars.v_s); - accs[i] = acc; - } - - std::vector new_models(num_models); - kaldi::MleAmSgmmOptions update_opts; - for (int32 i = 0; i < num_models; ++i) { - AmSgmm *sgmm1 = new AmSgmm(); - sgmm1->CopyFromSgmm(*models[i], false); - new_models[i] = sgmm1; - } - - // Updater class stores globals parameters; OK to initialize with any model - // since it is assumed that they have the same global parameters. - kaldi::MleAmSgmmUpdaterMulti updater(*models[0], update_opts); - updater.Update(accs, new_models, flags); - - BaseFloat loglike1 = 0.0; - for (int32 i = 0; i < num_models; ++i) { - new_models[i]->ComputePerSpkDerivedVars(&spk_vars); - for (int32 f = 0; f < feats[i].NumRows(); ++f) { - std::vector gselect; - new_models[i]->GaussianSelection(sgmm_config, feats[i].Row(f), &gselect); - new_models[i]->ComputePerFrameVars(feats[i].Row(f), gselect, spk_vars, 0.0, - &frame_vars); - loglike1 += new_models[i]->LogLikelihood(frame_vars, 0); - } - } - KALDI_LOG << "LL = " << loglike << "; LL1 = " << loglike1; - - KALDI_ASSERT(loglike1 >= loglike - (std::abs(loglike1)+std::abs(loglike))*1.0e-06); - - DeletePointers(&accs); - DeletePointers(&new_models); -} - -void UnitTestEstimateSgmm() { - int32 dim = 2 + kaldi::RandInt(0, 9); // random dimension of the gmm - int32 num_comp = 2 + kaldi::RandInt(0, 9); // random mixture size - kaldi::FullGmm full_gmm; - ut::InitRandFullGmm(dim, num_comp, &full_gmm); - - int32 num_states = 1; - int32 num_models = kaldi::RandInt(2, 9); - std::vector models(num_models); - for (int32 i =0; i < num_models; ++i) { - AmSgmm* sgmm = new AmSgmm(); - sgmm->InitializeFromFullGmm(full_gmm, num_states, dim+1, dim); - sgmm->ComputeNormalizers(); - models[i] = sgmm; - } - - std::vector< kaldi::Matrix > feats(num_models); - for (int32 i = 0; i < num_models; ++i) { - // First, generate random means and variances - int32 num_feat_comp = num_comp + kaldi::RandInt(-num_comp/2, num_comp/2); - kaldi::Matrix means(num_feat_comp, dim), - vars(num_feat_comp, dim); - for (int32 m = 0; m < num_feat_comp; ++m) { - for (int32 d= 0; d < dim; d++) { - means(m, d) = kaldi::RandGauss(); - vars(m, d) = Exp(kaldi::RandGauss()) + 1e-2; - } - } - // Now generate random features with those means and variances. - feats[i].Resize(num_feat_comp * 200, dim); - for (int32 m = 0; m < num_feat_comp; ++m) { - kaldi::SubMatrix tmp(feats[i], m*200, 200, 0, dim); - ut::RandDiagGaussFeatures(200, means.Row(m), vars.Row(m), &tmp); - } - } - kaldi::SgmmUpdateFlagsType flags = kaldi::kSgmmAll; - TestMultiSgmmEst(models, feats, flags); - flags = (kaldi::kSgmmPhoneProjections | kaldi::kSgmmPhoneWeightProjections | - kaldi::kSgmmCovarianceMatrix); - TestMultiSgmmEst(models, feats, flags); - flags = (kaldi::kSgmmSpeakerProjections | kaldi::kSgmmCovarianceMatrix | - kaldi::kSgmmPhoneVectors); - TestMultiSgmmEst(models, feats, flags); - kaldi::DeletePointers(&models); -} - -int main() { - for (int i = 0; i < 10; ++i) - UnitTestEstimateSgmm(); - std::cout << "Test OK.\n"; - return 0; -} diff --git a/src/sgmm/estimate-am-sgmm-multi.cc b/src/sgmm/estimate-am-sgmm-multi.cc deleted file mode 100644 index 38d517b55ff..00000000000 --- a/src/sgmm/estimate-am-sgmm-multi.cc +++ /dev/null @@ -1,746 +0,0 @@ -// sgmm/estimate-am-sgmm-multi.cc - -// Copyright 2012 Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -#include -using std::string; -#include -using std::vector; - -#include "sgmm/am-sgmm.h" -#include "sgmm/estimate-am-sgmm-multi.h" -#include "thread/kaldi-thread.h" - -namespace kaldi { - -void MleAmSgmmGlobalAccs::ResizeAccumulators(const AmSgmm &model, - SgmmUpdateFlagsType flags) { - num_gaussians_ = model.NumGauss(); - feature_dim_ = model.FeatureDim(); - phn_space_dim_ = model.PhoneSpaceDim(); - spk_space_dim_ = model.SpkSpaceDim(); - - if (flags & (kSgmmPhoneProjections | kSgmmCovarianceMatrix)) { - Y_.resize(num_gaussians_); - Q_.resize(num_gaussians_); - for (int32 i = 0; i < num_gaussians_; ++i) { - Y_[i].Resize(feature_dim_, phn_space_dim_, kSetZero); - Q_[i].Resize(phn_space_dim_, kSetZero); - } - } else { - Y_.clear(); - Q_.clear(); - } - - if (flags & kSgmmCovarianceMatrix) { - S_.resize(num_gaussians_); - S_means_.resize(num_gaussians_); - for (int32 i = 0; i < num_gaussians_; i++) { - S_[i].Resize(feature_dim_, kSetZero); - S_means_[i].Resize(feature_dim_, kSetZero); - } - } else { - S_.clear(); - } - - if (flags & kSgmmSpeakerProjections) { - if (spk_space_dim_ == 0) { - KALDI_ERR << "Cannot set up accumulators for speaker projections " - << "because speaker subspace has not been set up"; - } - Z_.resize(num_gaussians_); - R_.resize(num_gaussians_); - for (int32 i = 0; i < num_gaussians_; ++i) { - Z_[i].Resize(feature_dim_, spk_space_dim_, kSetZero); - R_[i].Resize(spk_space_dim_, kSetZero); - } - } else { - Z_.clear(); - R_.clear(); - } - - gamma_i_.Resize(num_gaussians_, kSetZero); -} - -void MleAmSgmmGlobalAccs::ZeroAccumulators(SgmmUpdateFlagsType flags) { - if (flags & (kSgmmPhoneProjections | kSgmmCovarianceMatrix)) { - for (int32 i = 0, end = Y_.size(); i < end; ++i) - Y_[i].SetZero(); - } - if (flags & kSgmmCovarianceMatrix) { - for (int32 i = 0, end = S_.size(); i < end; ++i) { - S_[i].SetZero(); - S_means_[i].SetZero(); - } - } - - if (flags & kSgmmSpeakerProjections) { - for (int32 i = 0, end = Z_.size(); i < end; ++i) { - Z_[i].SetZero(); - R_[i].SetZero(); - } - } - gamma_i_.SetZero(); -} - -void MleAmSgmmGlobalAccs::AddAccumulators(const AmSgmm &model, - const MleAmSgmmAccs &accs, - SgmmUpdateFlagsType flags) { - total_frames_ += accs.total_frames_; - total_like_ += accs.total_like_; - for (int32 i = 0; i < num_gaussians_; ++i) { - if (flags & (kSgmmPhoneProjections | kSgmmCovarianceMatrix)) { - Y_[i].AddMat(1.0, accs.Y_[i], kNoTrans); - } - if (flags & kSgmmSpeakerProjections) { - Z_[i].AddMat(1.0, accs.Z_[i], kNoTrans); - R_[i].AddSp(1.0, accs.R_[i]); - } - if (flags & kSgmmCovarianceMatrix) - S_[i].AddSp(1.0, accs.S_[i]); - } - - // gamma_i - for (int32 j = 0; j < model.NumPdfs(); ++j) { - for (int32 m = 0; m < model.NumSubstates(j); ++m) { - gamma_i_.AddVec(1.0, accs.gamma_[j].Row(m)); - } - } - - // Compute the Q_i quantities (Eq. 64). - if (flags & kSgmmPhoneProjections) { - for (int32 i = 0; i < num_gaussians_; ++i) { - for (int32 j = 0; j < accs.num_states_; ++j) { - const Matrix &state_vec(model.StateVectors(j)); - for (int32 m = 0; m < model.NumSubstates(j); ++m) { - if (accs.gamma_[j](m, i) > 0.0) { - Q_[i].AddVec2(static_cast(accs.gamma_[j](m, i)), - state_vec.Row(m)); - } - } - } - } - } - - // Compute the S_i^{(means)} quantities (Eq. 74). - if (flags & kSgmmCovarianceMatrix) { - Matrix YM_MY(feature_dim_, feature_dim_); - SpMatrix tmp_S_means(feature_dim_); - Vector mu_jmi(feature_dim_); - for (int32 i = 0; i < num_gaussians_; ++i) { - // YM_MY = - (Y_{i} M_{i}^T) - Matrix M(model.GetPhoneProjection(i)); - YM_MY.AddMatMat(-1.0, accs.Y_[i], kNoTrans, M, kTrans, 0.0); - // Add its own transpose: YM_MY = - (Y_{i} M_{i}^T + M_{i} Y_{i}^T) - { - Matrix M(YM_MY, kTrans); - YM_MY.AddMat(1.0, M); - } - tmp_S_means.CopyFromMat(YM_MY); // Sigma_{i} = -(YM' + MY') - - for (int32 j = 0; j < accs.num_states_; ++j) { - for (int32 m = 0; m < model.NumSubstates(j); ++m) { - // Sigma_{i} += gamma_{jmi} * mu_{jmi}*mu_{jmi}^T - model.GetSubstateMean(j, m, i, &mu_jmi); - tmp_S_means.AddVec2(static_cast(accs.gamma_[j](m, i)), mu_jmi); - } - } - S_means_[i].AddSp(1.0, tmp_S_means); - KALDI_ASSERT(1.0 / S_means_[i](0, 0) != 0.0); - } - } -} - -BaseFloat MleAmSgmmUpdaterMulti::UpdateGlobals(const MleAmSgmmGlobalAccs &accs, - SgmmUpdateFlagsType flags) { - BaseFloat tot_impr = 0.0; - if (flags & kSgmmPhoneProjections) { - tot_impr += UpdateM(accs); - } - if (flags & kSgmmCovarianceMatrix) { - tot_impr += UpdateVars(accs); - } - if (flags & kSgmmSpeakerProjections) { - tot_impr += UpdateN(accs); - if (update_options_.renormalize_N) - KALDI_WARN << "Not renormalizing N"; - } - - KALDI_LOG << "**Total auxf improvement for phone projections & covariances is " - << (tot_impr) << " over " << accs.total_frames_ << " frames."; - return tot_impr; -} - -void MleAmSgmmUpdaterMulti::Update(const std::vector &accs, - const std::vector &models, - SgmmUpdateFlagsType flags) { - KALDI_ASSERT((flags & (kSgmmPhoneVectors | kSgmmPhoneProjections | - kSgmmPhoneWeightProjections | kSgmmCovarianceMatrix | - kSgmmSubstateWeights | kSgmmSpeakerProjections)) != 0); - if (accs.size() != models.size()) { - KALDI_ERR << "Found " << accs.size() << " accs and " << models.size() - << " models. Must have same number of models and accs."; - } - - SgmmUpdateFlagsType global_flags = (flags & (kSgmmPhoneProjections | - kSgmmPhoneWeightProjections | - kSgmmSpeakerProjections | - kSgmmCovarianceMatrix)); - SgmmUpdateFlagsType state_spec_flags = (flags & ~global_flags); - MleAmSgmmGlobalAccs glob_accs; - BaseFloat tot_impr = 0.0; - int32 num_models = models.size(); - - std::vector< SpMatrix > H; - if (update_options_.renormalize_V) - models[0]->ComputeH(&H); - - if (global_flags != 0) { // expected operating case - glob_accs.ResizeAccumulators(*models[0], global_flags); - for (int32 i = 0; i < num_models; ++i) { - glob_accs.AddAccumulators(*models[i], *accs[i], global_flags); - } - UpdateGlobals(glob_accs, global_flags); - - // Weight projection needs access to all models - if (global_flags & kSgmmPhoneWeightProjections) { - if (update_options_.use_sequential_weight_update) - KALDI_ERR << "Sequential weight update not implemented, using parallel"; -// tot_impr += UpdateWSequential(accs, model); -// } else { - tot_impr += UpdateWParallel(accs, models); -// } - } - } else { // Shouldn't be using this class without updating global params - KALDI_WARN << "Using MleAmSgmmUpdaterMulti class without updating global " - << " parameters."; - } - - // Update the state-specific parameters: phone vectors & substate weights - if (state_spec_flags != 0) { - MleAmSgmmOptions state_spec_opts = update_options_; - state_spec_opts.renormalize_V = false; - state_spec_opts.renormalize_N = false; - - MleAmSgmmUpdater sgmm_updater(state_spec_opts); - for (int32 i = 0; i < num_models; ++i) - tot_impr += sgmm_updater.Update(*accs[i], models[i], state_spec_flags); - } - - - if (update_options_.renormalize_V && (global_flags != 0)) { - SpMatrix H_sm; - this->ComputeSmoothingTerms(glob_accs, H, &H_sm); - RenormalizeV(H_sm, models); - } - - KALDI_LOG << "**Total auxf improvement, combining all parameters, over " - << "all model is " << tot_impr << " per frame."; - - // The following is just for diagnostics - double total_frames = 0, total_like = 0; - for (int32 i = 0; i < num_models; ++i) { - total_frames += accs[i]->TotalFrames(); - total_like += accs[i]->TotalLike(); - } - KALDI_LOG << "***Total data likelihood, over all models, is " - << (total_like/total_frames) << " over " << total_frames - << " frames."; - - // Now, copy the global parameters to the models - for (int32 i = 0; i < num_models; ++i) { - if ((flags & kSgmmPhoneProjections) || update_options_.renormalize_V) - models[i]->M_ = global_M_; - if (flags & kSgmmCovarianceMatrix) - models[i]->SigmaInv_ = global_SigmaInv_; - if ((flags & kSgmmSpeakerProjections) || update_options_.renormalize_N) - models[i]->N_ = global_N_; - if ((flags & kSgmmPhoneWeightProjections) || update_options_.renormalize_V) - models[i]->w_ = global_w_; - models[i]->ComputeNormalizers(); // So that the models are ready to use. - } -} - -// Compute H^{(sm)}, the "smoothing" matrices. -void MleAmSgmmUpdaterMulti::ComputeSmoothingTerms( - const MleAmSgmmGlobalAccs &accs, - const std::vector< SpMatrix > &H, - SpMatrix *H_sm) const { - KALDI_ASSERT(H_sm != NULL); - H_sm->Resize(PhoneSpaceDim()); - - double sum = 0.0; - for (int32 i = 0; i < NumGauss(); ++i) { - if (accs.gamma_i_(i) > 0) { - H_sm->AddSp(accs.gamma_i_(i), H[i]); - sum += accs.gamma_i_(i); - } - } - - if (sum == 0.0) { - KALDI_WARN << "Sum of counts is zero. Smoothing matrix set to unit"; - H_sm->SetUnit(); // arbitrary non-singular matrix - } else { - H_sm->Scale(1.0 / sum); - int32 tmp = H_sm->LimitCondDouble(update_options_.max_cond_H_sm); - if (tmp > 0) { - KALDI_WARN << "Limited " << tmp << " eigenvalues of H_sm."; - } - } -} - -double MleAmSgmmUpdaterMulti::UpdateM(const MleAmSgmmGlobalAccs &accs) { - double totcount = 0.0, tot_like_impr = 0.0; - for (int32 i = 0; i < accs.num_gaussians_; ++i) { - if (accs.gamma_i_(i) < accs.feature_dim_) { - KALDI_WARN << "For component " << i << ": not updating M due to very " - << "small count (=" << accs.gamma_i_(i) << ")."; - continue; - } - - - SolverOptions opts; - opts.name = "M"; - opts.K = update_options_.max_cond; - opts.eps = update_options_.epsilon; - - Matrix Mi(global_M_[i]); - double impr = - SolveQuadraticMatrixProblem(accs.Q_[i], accs.Y_[i], - SpMatrix(global_SigmaInv_[i]), - opts, &Mi); - global_M_[i].CopyFromMat(Mi); - - if (i % 50 == 0) { - KALDI_VLOG(2) << "Objf impr for projection M for i = " << i << ", is " - << (impr/(accs.gamma_i_(i) + 1.0e-20)) << " over " - << accs.gamma_i_(i) << " frames"; - } - totcount += accs.gamma_i_(i); - tot_like_impr += impr; - } - tot_like_impr /= (totcount + 1.0e-20); - KALDI_LOG << "Overall objective function improvement for model projections " - << "M is " << tot_like_impr << " over " << totcount << " frames"; - return tot_like_impr; -} - -double MleAmSgmmUpdaterMulti::UpdateN(const MleAmSgmmGlobalAccs &accs) { - double totcount = 0.0, tot_like_impr = 0.0; - if (accs.spk_space_dim_ == 0 || accs.R_.size() == 0 || accs.Z_.size() == 0) { - KALDI_ERR << "Speaker subspace dim is zero or no stats accumulated"; - } - - for (int32 i = 0; i < accs.num_gaussians_; ++i) { - if (accs.gamma_i_(i) < 2 * accs.spk_space_dim_) { - KALDI_WARN << "Not updating speaker basis for i = " << (i) - << " because count is too small " << (accs.gamma_i_(i)); - continue; - } - - SolverOptions opts; - opts.name = "N"; - opts.K = update_options_.max_cond; - opts.eps = update_options_.epsilon; - - Matrix Ni(global_N_[i]); - double impr = - SolveQuadraticMatrixProblem(accs.R_[i], accs.Z_[i], - SpMatrix(global_SigmaInv_[i]), - opts, &Ni); - global_N_[i].CopyFromMat(Ni); - if (i < 10) { - KALDI_LOG << "Objf impr for spk projection N for i = " << (i) - << ", is " << (impr / (accs.gamma_i_(i) + 1.0e-20)) << " over " - << (accs.gamma_i_(i)) << " frames"; - } - totcount += accs.gamma_i_(i); - tot_like_impr += impr; - } - - tot_like_impr /= (totcount+1.0e-20); - KALDI_LOG << "**Overall objf impr for N is " << tot_like_impr << " over " - << totcount << " frames"; - return tot_like_impr; -} - - -double MleAmSgmmUpdaterMulti::UpdateVars(const MleAmSgmmGlobalAccs &accs) { - SpMatrix Sigma_i(accs.feature_dim_), Sigma_i_ml(accs.feature_dim_); - double tot_objf_impr = 0.0, tot_t = 0.0; - SpMatrix covfloor(accs.feature_dim_); - Vector objf_improv(accs.num_gaussians_); - - // First pass over all (shared) Gaussian components to calculate the - // ML estimate of the covariances, and the total covariance for flooring. - for (int32 i = 0; i < accs.num_gaussians_; ++i) { - // Eq. (75): Sigma_{i}^{ml} = 1/gamma_{i} [S_{i} + S_{i}^{(means)} - ... - // Y_{i} M_{i}^T - M_{i} Y_{i}^T] - // Note the S_means_ already contains the Y_{i} M_{i}^T terms. - Sigma_i_ml.CopyFromSp(accs.S_means_[i]); - Sigma_i_ml.AddSp(1.0, accs.S_[i]); - covfloor.AddSp(1.0, Sigma_i_ml); - // inverting small values e.g. 4.41745328e-40 seems to generate inf, - // although would be fixed up later. - if (accs.gamma_i_(i) > 1.0e-20) { - Sigma_i_ml.Scale(1 / (accs.gamma_i_(i) + 1.0e-20)); - } else { - Sigma_i_ml.SetUnit(); - } - KALDI_ASSERT(1.0 / Sigma_i_ml(0, 0) != 0.0); - // Eq. (76): Compute the objective function with the old parameter values - objf_improv(i) = global_SigmaInv_[i].LogPosDefDet() - - TraceSpSp(SpMatrix(global_SigmaInv_[i]), Sigma_i_ml); - - global_SigmaInv_[i].CopyFromSp(Sigma_i_ml); // inverted in the next loop. - } - - // Compute the covariance floor. - if (accs.gamma_i_.Sum() == 0) { // If no count, use identity. - KALDI_WARN << "Updating variances: zero counts. Setting floor to unit."; - covfloor.SetUnit(); - } else { // else, use the global average covariance. - covfloor.Scale(update_options_.cov_floor / accs.gamma_i_.Sum()); - int32 tmp; - if ((tmp = covfloor.LimitCondDouble(update_options_.max_cond)) != 0) { - KALDI_WARN << "Covariance flooring matrix is poorly conditioned. Fixed " - << "up " << (tmp) << " eigenvalues."; - } - } - - if (update_options_.cov_diag_ratio > 1000) { - KALDI_LOG << "Assuming you want to build a diagonal system since " - << "cov_diag_ratio is large: making diagonal covFloor."; - for (int32 i = 0; i < covfloor.NumRows(); i++) - for (int32 j = 0; j < i; j++) - covfloor(i, j) = 0.0; - } - - // Second pass over all (shared) Gaussian components to calculate the - // floored estimate of the covariances, and update the model. - for (int32 i = 0; i < accs.num_gaussians_; ++i) { - Sigma_i.CopyFromSp(global_SigmaInv_[i]); - Sigma_i_ml.CopyFromSp(Sigma_i); - // In case of insufficient counts, make the covariance matrix diagonal. - // cov_diag_ratio is 2 by default, set to very large to always get diag-cov - if (accs.gamma_i_(i) < update_options_.cov_diag_ratio * accs.feature_dim_) { - KALDI_WARN << "For Gaussian component " << i << ": Too low count " - << accs.gamma_i_(i) << " for covariance matrix estimation. " - << "Setting to diagonal"; - for (int32 d = 0; d < accs.feature_dim_; d++) - for (int32 e = 0; e < d; e++) - Sigma_i(d, e) = 0.0; // SpMatrix, can only set lower traingular part - - int floored = Sigma_i.ApplyFloor(covfloor); - if (floored > 0) { - KALDI_WARN << "For Gaussian component " << i << ": Floored " << floored - << " covariance eigenvalues."; - } - global_SigmaInv_[i].CopyFromSp(Sigma_i); - global_SigmaInv_[i].InvertDouble(); - } else { // Updating the full covariance matrix. - try { - int floored = Sigma_i.ApplyFloor(covfloor); - if (floored > 0) { - KALDI_WARN << "For Gaussian component " << i << ": Floored " - << floored << " covariance eigenvalues."; - } - global_SigmaInv_[i].CopyFromSp(Sigma_i); - global_SigmaInv_[i].InvertDouble(); - - objf_improv(i) += Sigma_i.LogPosDefDet() + - TraceSpSp(SpMatrix(global_SigmaInv_[i]), Sigma_i_ml); - objf_improv(i) *= (-0.5 * accs.gamma_i_(i)); // Eq. (76) - - tot_objf_impr += objf_improv(i); - tot_t += accs.gamma_i_(i); - if (i < 5) { - KALDI_VLOG(2) << "objf impr from variance update =" << objf_improv(i) - / (accs.gamma_i_(i) + 1.0e-20) << " over " << (accs.gamma_i_(i)) - << " frames for i = " << (i); - } - } catch(...) { - KALDI_WARN << "Updating within-class covariance matrix i = " << (i) - << ", numerical problem"; - // This is a catch-all thing in case of unanticipated errors, but - // flooring should prevent this occurring for the most part. - global_SigmaInv_[i].SetUnit(); // Set to unit. - } - } - } - KALDI_LOG << "**Overall objf impr for variance update = " - << (tot_objf_impr / (tot_t+ 1.0e-20)) - << " over " << (tot_t) << " frames"; - return tot_objf_impr / (tot_t + 1.0e-20); -} - - -// The parallel weight update, in the paper. -double MleAmSgmmUpdaterMulti::UpdateWParallel( - const std::vector &accs, - const std::vector &models) { - KALDI_LOG << "Updating weight projections"; - - int32 phn_dim = models[0]->PhoneSpaceDim(), - num_gauss = models[0]->NumGauss(), - num_models = models.size(); - SpMatrix v_vT(phn_dim); - // tot_like_{after, before} are totals over multiple iterations, - // not valid likelihoods. but difference is valid (when divided by tot_count). - double tot_predicted_like_impr = 0.0, tot_like_before = 0.0, - tot_like_after = 0.0, tot_count = 0.0; - - Vector w_jm(num_gauss); - Matrix g_i(num_gauss, phn_dim); - std::vector< SpMatrix > F_i(num_gauss); - - Matrix w(global_w_); - for (int iter = 0; iter < update_options_.weight_projections_iters; iter++) { - for (int32 i = 0; i < num_gauss; ++i) { - F_i[i].Resize(phn_dim, kSetZero); - } - double k_like_before = 0.0, k_count = 0.0; - g_i.SetZero(); - - // Unlike in the report the inner most loop is over Gaussians, where - // per-gaussian statistics are accumulated. This is more memory demanding - // but more computationally efficient, as outer product v_{jvm} v_{jvm}^T - // is computed only once for all gaussians. - - for (int32 mdl_idx = 0; mdl_idx < num_models; ++mdl_idx) { - std::vector< Matrix > gamma(accs[mdl_idx]->GetOccs()); - for (int32 j = 0; j < models[mdl_idx]->NumPdfs(); j++) { - for (int32 m = 0; m < models[mdl_idx]->NumSubstates(j); m++) { - double gamma_jm = gamma[j].Row(m).Sum(); - k_count += gamma_jm; - - // w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7) - w_jm.AddMatVec(1.0, w, kNoTrans, - Vector(models[mdl_idx]->v_[j].Row(m)), 0.0); - w_jm.Add((-1.0) * w_jm.LogSumExp()); - k_like_before += VecVec(w_jm, gamma[j].Row(m)); - w_jm.ApplyExp(); - v_vT.SetZero(); - // v_vT := v_{jkm} v_{jkm}^T - v_vT.AddVec2(1.0, models[mdl_idx]->v_[j].Row(m)); - - for (int32 i = 0; i < num_gauss; i++) { - // Suggestion: g_jkm can be computed more efficiently - // using the Vector/Matrix routines for all i at once - // linear term around cur value. - double linear_term = gamma[j](m, i) - gamma_jm * w_jm(i); - double quadratic_term = std::max(gamma[j](m, i), gamma_jm * w_jm(i)); - g_i.Row(i).AddVec(linear_term, models[mdl_idx]->v_[j].Row(m)); - // Now I am calling this F_i in the document. [dan] - F_i[i].AddSp(quadratic_term, v_vT); - } - } // loop over substates - } // loop over states - } // loop over model/acc pairs - - Matrix w_orig(w); - double k_predicted_like_impr = 0.0, k_like_after = 0.0; - double min_step = 0.001, step_size; - - SolverOptions opts; - opts.name = "w"; - opts.K = update_options_.max_cond; - opts.eps = update_options_.epsilon; - - for (step_size = 1.0; step_size >= min_step; step_size /= 2) { - k_predicted_like_impr = 0.0; - k_like_after = 0.0; - - for (int32 i = 0; i < num_gauss; i++) { - // auxf is formulated in terms of change in w. - Vector delta_w(phn_dim); - // returns objf impr with step_size = 1, - // but it may not be 1 so we recalculate it. - SolveQuadraticProblem(F_i[i], g_i.Row(i), opts, &delta_w); - - delta_w.Scale(step_size); - double predicted_impr = VecVec(delta_w, g_i.Row(i)) - - 0.5 * VecSpVec(delta_w, F_i[i], delta_w); - - // should never be negative because - // we checked inside SolveQuadraticProblem. - KALDI_ASSERT(predicted_impr >= -1.0e-05); - - if (i < 10) { - KALDI_LOG << "Predicted objf impr for w (not per frame), iter = " << - (iter) << ", i = " << (i) << " is " << (predicted_impr); - } - k_predicted_like_impr += predicted_impr; - w.Row(i).AddVec(1.0, delta_w); - } - - for (int32 mdl_idx = 0; mdl_idx < num_models; ++mdl_idx) { - std::vector< Matrix > gamma(accs[mdl_idx]->GetOccs()); - for (int32 j = 0; j < models[mdl_idx]->NumPdfs(); j++) { - for (int32 m = 0; m < models[mdl_idx]->NumSubstates(j); m++) { - w_jm.AddMatVec(1.0, w, kNoTrans, - Vector(models[mdl_idx]->v_[j].Row(m)), 0.0); - w_jm.Add((-1.0) * w_jm.LogSumExp()); - k_like_after += VecVec(w_jm, gamma[j].Row(m)); - } - } - } - KALDI_VLOG(2) << "For iteration " << (iter) << ", updating w gives " - << "predicted per-frame like impr " - << (k_predicted_like_impr / k_count) << ", actual " - << ((k_like_after - k_like_before) / k_count) << ", over " - << (k_count) << " frames"; - if (k_like_after < k_like_before) { - w.CopyFromMat(w_orig); // Undo what we computed. - if (fabs(k_like_after - k_like_before) / k_count < 1.0e-05) { - k_like_after = k_like_before; - KALDI_WARN << "Not updating weights as not increasing auxf and " - << "probably due to numerical issues (since small change)."; - break; - } else { - KALDI_WARN << "Halving step size for weights as likelihood did " - << "not increase"; - } - } else { - break; - } - } - if (step_size < min_step) { - // Undo any step as we have no confidence that this is right. - w.CopyFromMat(w_orig); - } else { - if (iter == 0) { - tot_count += k_count; - } - tot_predicted_like_impr += k_predicted_like_impr; - tot_like_after += k_like_after; - tot_like_before += k_like_before; - } - } - - global_w_.CopyFromMat(w); - - tot_predicted_like_impr /= tot_count; - tot_like_after = (tot_like_after - tot_like_before) / tot_count; - KALDI_LOG << "**Overall objf impr for w is " << tot_predicted_like_impr - << ", actual " << tot_like_after << ", over " - << tot_count << " frames"; - return tot_like_after; -} - -void MleAmSgmmUpdaterMulti::RenormalizeV(const SpMatrix &H_sm, - const vector &models) { - int32 phn_dim = PhoneSpaceDim(), - feat_dim = FeatureDim(), - num_models = models.size(); - SpMatrix Sigma(phn_dim); - int32 count = 0; - for (int32 mdl = 0; mdl < num_models; ++mdl) { - for (int32 j = 0; j < models[mdl]->NumPdfs(); ++j) { - for (int32 m = 0; m < models[mdl]->NumSubstates(j); ++m) { - count++; - Sigma.AddVec2(static_cast(1.0), models[mdl]->v_[j].Row(m)); - } - } - } - Sigma.Scale(1.0 / count); - int32 fixed_eigs = Sigma.LimitCondDouble(update_options_.max_cond); - if (fixed_eigs != 0) { - KALDI_WARN << "Scatter of vectors v is poorly conditioned. Fixed up " - << fixed_eigs << " eigenvalues."; - } - KALDI_LOG << "Eigenvalues of scatter of vectors v is : "; - Sigma.PrintEigs("Sigma"); - if (!Sigma.IsPosDef()) { - KALDI_LOG << "Not renormalizing v because scatter is not positive definite" - << " -- maybe first iter?"; - return; - } - - // Want to make variance of v unit and H_sm (like precision matrix) diagonal. - TpMatrix L(phn_dim); - L.Cholesky(Sigma); - TpMatrix LInv(L); - LInv.Invert(); - - Matrix tmpL(phn_dim, phn_dim); - tmpL.CopyFromTp(L); - - SpMatrix H_sm_proj(phn_dim); - H_sm_proj.AddMat2Sp(1.0, tmpL, kTrans, H_sm, 0.0); - // H_sm_proj := L^{T} * H_sm * L. - // This is right because we would transform the vectors themselves - // by L^{-1}, and H_sm is like the inverse of the vectors, - // so it's {L^{-1}}^{-T} = L^T. - - Matrix U(phn_dim, phn_dim); - Vector eigs(phn_dim); - H_sm_proj.SymPosSemiDefEig(&eigs, &U, 1.0); // 1.0 means no checking +ve def -> faster - KALDI_LOG << "Note on the next diagnostic: the first number is generally not " - << "that meaningful as it relates to the static offset"; - H_sm_proj.PrintEigs("H_sm_proj (Significance of dims in vector space.. note)"); - - // Transform on vectors is U^T L^{-1}. - // Why? Because transform on H_sm is T =U^T L^T - // and we want T^{-T} by normal rules of vector/covector and we - // have (U^T L^T)^{-T} = (L U)^{-1} = U^T L^{-1}. - Matrix Trans(phn_dim, phn_dim); // T^{-T} - Matrix tmpLInv(phn_dim, phn_dim); - tmpLInv.CopyFromTp(LInv); - Trans.AddMatMat(1.0, U, kTrans, tmpLInv, kNoTrans, 0.0); - Matrix TransInv(Trans); - TransInv.Invert(); // T in above... - -#ifdef KALDI_PARANOID - { - SpMatrix H_sm_tmp(phn_dim); - H_sm_tmp.AddMat2Sp(1.0, TransInv, kTrans, H_sm, 0.0); - KALDI_ASSERT(H_sm_tmp.IsDiagonal(0.1)); - } - { - SpMatrix Sigma_tmp(phn_dim); - Sigma_tmp.AddMat2Sp(1.0, Trans, kNoTrans, Sigma, 0.0); - KALDI_ASSERT(Sigma_tmp.IsUnit(0.1)); - } -#endif - - for (int32 mdl = 0; mdl < num_models; ++mdl) { - for (int32 j = 0; j < models[mdl]->NumPdfs(); ++j) { - for (int32 m = 0; m < models[mdl]->NumSubstates(j); ++m) { - Vector tmp(phn_dim); - tmp.AddMatVec(1.0, Trans, kNoTrans, Vector(models[mdl]->v_[j].Row(m)), 0.0); - models[mdl]->v_[j].Row(m).CopyFromVec(tmp); - } - } - } - for (int32 i = 0; i < NumGauss(); ++i) { - Vector tmp(phn_dim); - tmp.AddMatVec(1.0, TransInv, kTrans, Vector(global_w_.Row(i)), 0.0); - global_w_.Row(i).CopyFromVec(tmp); - - Matrix tmpM(feat_dim, phn_dim); - // Multiplying on right not left so must not transpose TransInv. - tmpM.AddMatMat(1.0, Matrix(global_M_[i]), kNoTrans, - TransInv, kNoTrans, 0.0); - global_M_[i].CopyFromMat(tmpM); - } - KALDI_LOG << "Renormalized subspace."; -} - -} // namespace kaldi diff --git a/src/sgmm/estimate-am-sgmm-multi.h b/src/sgmm/estimate-am-sgmm-multi.h deleted file mode 100644 index 50eb28650b7..00000000000 --- a/src/sgmm/estimate-am-sgmm-multi.h +++ /dev/null @@ -1,146 +0,0 @@ -// sgmm/estimate-am-sgmm-multi.h - -// Copyright 2012 Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_SGMM_ESTIMATE_AM_SGMM_MULTI_H_ -#define KALDI_SGMM_ESTIMATE_AM_SGMM_MULTI_H_ 1 - -#include -#include - -#include "sgmm/am-sgmm.h" -#include "sgmm/estimate-am-sgmm.h" -#include "gmm/model-common.h" - -namespace kaldi { - -/** \class MleAmSgmmGlobalAccs - * Class for the accumulators associated with SGMM global parameters (e.g. - * phonetic-, weight- and speaker-projections; and covariances). This is - * used when the global parameters are updated using stats from multiple - * models. - */ -class MleAmSgmmGlobalAccs { - public: - explicit MleAmSgmmGlobalAccs() - : feature_dim_(0), phn_space_dim_(0), spk_space_dim_(0), - num_gaussians_(0), total_frames_(0.0), total_like_(0.0) {} - - /// Resizes the accumulators to the correct sizes given the model. The flags - /// argument control which accumulators to resize. - void ResizeAccumulators(const AmSgmm &model, SgmmUpdateFlagsType flags); - - /// Set the accumulators specified by the flags argument to zero. - void ZeroAccumulators(SgmmUpdateFlagsType flags); - - /// Add another accumulator object - void AddAccumulators(const AmSgmm &model, const MleAmSgmmAccs &acc, - SgmmUpdateFlagsType flags); - - int32 FeatureDim() const { return feature_dim_; } - int32 PhoneSpaceDim() const { return phn_space_dim_; } - int32 NumGauss() const { return num_gaussians_; } - - private: - /// The stats which are not tied to any state. - /// Stats Y_{i} for phonetic-subspace projections M; Dim is [I][D][S]. - std::vector< Matrix > Y_; - /// Stats Z_{i} for speaker-subspace projections N. Dim is [I][D][T]. - std::vector< Matrix > Z_; - /// R_{i}, quadratic term for speaker subspace estimation. Dim is [I][T][T] - std::vector< SpMatrix > R_; - /// S_{i}^{-}, scatter of adapted feature vectors x_{i}(t). Dim is [I][D][D]. - std::vector< SpMatrix > S_; - /// Total occupancies gamma_i for each Gaussian. Dim is [I] - Vector gamma_i_; - - /// Q_{i}, quadratic term for phonetic subspace estimation. Dim is [I][S][S] - std::vector< SpMatrix > Q_; - /// Eq (74): S_{i}^{(means)}, scatter of substate mean vectors for estimating - /// the shared covariance matrices. Dimension is [I][D][D]. - std::vector< SpMatrix > S_means_; - - /// Dimensionality of various subspaces - int32 feature_dim_, phn_space_dim_, spk_space_dim_; - int32 num_gaussians_; ///< Other model specifications - - double total_frames_, total_like_; - - KALDI_DISALLOW_COPY_AND_ASSIGN(MleAmSgmmGlobalAccs); - friend class MleAmSgmmUpdaterMulti; -}; - - -/** \class MleAmSgmmUpdaterMulti - * Contains the functions needed to update the parameters for multiple SGMMs - * whose global parameters are tied. - */ -class MleAmSgmmUpdaterMulti { - public: - explicit MleAmSgmmUpdaterMulti(const AmSgmm &model, - const MleAmSgmmOptions &options) - : update_options_(options), global_SigmaInv_(model.SigmaInv_), - global_M_(model.M_), global_N_(model.N_), global_w_(model.w_) {} - - void Update(const std::vector &accs, - const std::vector &models, - SgmmUpdateFlagsType flags); - - /// Various model dimensions. - int32 NumGauss() const { return global_M_.size(); } - int32 PhoneSpaceDim() const { return global_w_.NumCols(); } - int32 SpkSpaceDim() const { - return (global_N_.size() > 0) ? global_N_[0].NumCols() : 0; - } - int32 FeatureDim() const { return global_M_[0].NumRows(); } - - private: - MleAmSgmmOptions update_options_; - - /// SGMM global parameters that will be updated together and copied to the - /// different models: - std::vector< SpMatrix > global_SigmaInv_; - std::vector< Matrix > global_M_; - std::vector< Matrix > global_N_; - Matrix global_w_; - - BaseFloat UpdateGlobals(const MleAmSgmmGlobalAccs &glob_accs, - SgmmUpdateFlagsType flags); - - double UpdateM(const MleAmSgmmGlobalAccs &accs); - double UpdateN(const MleAmSgmmGlobalAccs &accs); - double UpdateVars(const MleAmSgmmGlobalAccs &accs); - double UpdateWParallel(const std::vector &accs, - const std::vector &models); -// double UpdateWSequential(const std::vector &accs, -// const std::vector &models); - - void ComputeSmoothingTerms(const MleAmSgmmGlobalAccs &accs, - const std::vector > &H, - SpMatrix *H_sm) const; - void RenormalizeV(const SpMatrix &H_sm, - const std::vector &models); - - KALDI_DISALLOW_COPY_AND_ASSIGN(MleAmSgmmUpdaterMulti); - MleAmSgmmUpdaterMulti() {} // Prevent unconfigured updater. -}; - -} // namespace kaldi - - -#endif // KALDI_SGMM_ESTIMATE_AM_SGMM_MULTI_H_ diff --git a/src/sgmm/estimate-am-sgmm-test.cc b/src/sgmm/estimate-am-sgmm-test.cc deleted file mode 100644 index a671b7fcb74..00000000000 --- a/src/sgmm/estimate-am-sgmm-test.cc +++ /dev/null @@ -1,161 +0,0 @@ -// sgmm/estimate-am-sgmm-test.cc - -// Copyright 2009-2011 Saarland University -// Author: Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "gmm/model-test-common.h" -#include "sgmm/am-sgmm.h" -#include "sgmm/estimate-am-sgmm.h" -#include "util/kaldi-io.h" -#include "base/kaldi-math.h" - -using kaldi::AmSgmm; -using kaldi::MleAmSgmmAccs; -using kaldi::BaseFloat; -using kaldi::Exp; - -namespace ut = kaldi::unittest; - -// Tests the Read() and Write() methods for the accumulators, in both binary -// and ASCII mode, as well as Check(). -void TestUpdateAndAccsIO(const AmSgmm &sgmm, - const kaldi::Matrix &feats) { - using namespace kaldi; - typedef kaldi::int32 int32; - - kaldi::SgmmUpdateFlagsType flags = kaldi::kSgmmAll; - kaldi::SgmmPerFrameDerivedVars frame_vars; - kaldi::SgmmPerSpkDerivedVars empty; - frame_vars.Resize(sgmm.NumGauss(), sgmm.FeatureDim(), - sgmm.PhoneSpaceDim()); - kaldi::SgmmGselectConfig sgmm_config; - sgmm_config.full_gmm_nbest = std::min(sgmm_config.full_gmm_nbest, - sgmm.NumGauss()); - MleAmSgmmAccs accs(sgmm, flags); - BaseFloat loglike = 0.0; - Vector empty_spk; - for (int32 i = 0; i < feats.NumRows(); i++) { - std::vector gselect; - sgmm.GaussianSelection(sgmm_config, feats.Row(i), &gselect); - sgmm.ComputePerFrameVars(feats.Row(i), gselect, empty, 0.0, &frame_vars); - loglike += accs.Accumulate(sgmm, frame_vars, empty_spk, 0, 1.0, flags); - } - accs.CommitStatsForSpk(sgmm, empty_spk); - - kaldi::MleAmSgmmOptions update_opts; - update_opts.check_v = (Rand()%2 == 0); - AmSgmm *sgmm1 = new AmSgmm(); - sgmm1->CopyFromSgmm(sgmm, false); - kaldi::MleAmSgmmUpdater updater(update_opts); - updater.Update(accs, sgmm1, flags); - std::vector gselect; - - sgmm1->GaussianSelection(sgmm_config, feats.Row(0), &gselect); - sgmm1->ComputePerFrameVars(feats.Row(0), gselect, empty, 0.0, &frame_vars); - BaseFloat loglike1 = sgmm1->LogLikelihood(frame_vars, 0); - delete sgmm1; - - // First, non-binary write - accs.Write(kaldi::Output("tmpf", false).Stream(), false); - bool binary_in; - MleAmSgmmAccs *accs1 = new MleAmSgmmAccs(); - // Non-binary read - kaldi::Input ki1("tmpf", &binary_in); - accs1->Read(ki1.Stream(), binary_in, false); - accs1->Check(sgmm, true); - AmSgmm *sgmm2 = new AmSgmm(); - sgmm2->CopyFromSgmm(sgmm, false); - updater.Update(*accs1, sgmm2, flags); - - sgmm2->GaussianSelection(sgmm_config, feats.Row(0), &gselect); - sgmm2->ComputePerFrameVars(feats.Row(0), gselect, empty, 0.0, &frame_vars); - BaseFloat loglike2 = sgmm2->LogLikelihood(frame_vars, 0); - kaldi::AssertEqual(loglike1, loglike2, 1e-4); - delete accs1; - - // Next, binary write - accs.Write(kaldi::Output("tmpfb", true).Stream(), true); - MleAmSgmmAccs *accs2 = new MleAmSgmmAccs(); - // Binary read - kaldi::Input ki2("tmpfb", &binary_in); - accs2->Read(ki2.Stream(), binary_in, false); - accs2->Check(sgmm, true); - AmSgmm *sgmm3 = new AmSgmm(); - sgmm3->CopyFromSgmm(sgmm, false); - updater.Update(*accs2, sgmm3, flags); - sgmm3->GaussianSelection(sgmm_config, feats.Row(0), &gselect); - sgmm3->ComputePerFrameVars(feats.Row(0), gselect, empty, 0.0, &frame_vars); - BaseFloat loglike3 = sgmm3->LogLikelihood(frame_vars, 0); - kaldi::AssertEqual(loglike1, loglike3, 1e-6); - - // Testing the MAP update of M - update_opts.tau_map_M = 100; - update_opts.full_col_cov = (RandUniform() > 0.5)? true : false; - update_opts.full_row_cov = (RandUniform() > 0.5)? true : false; - kaldi::MleAmSgmmUpdater updater_map(update_opts); - BaseFloat impr = updater_map.Update(*accs2, sgmm3, flags); - KALDI_ASSERT(impr >= 0); - - delete accs2; - delete sgmm2; - delete sgmm3; - - unlink("tmpf"); - unlink("tmpfb"); -} - -void UnitTestEstimateSgmm() { - int32 dim = 1 + kaldi::RandInt(0, 9); // random dimension of the gmm - int32 num_comp = 2 + kaldi::RandInt(0, 9); // random mixture size - kaldi::FullGmm full_gmm; - ut::InitRandFullGmm(dim, num_comp, &full_gmm); - - int32 num_states = 1; - AmSgmm sgmm; - kaldi::SgmmGselectConfig config; - sgmm.InitializeFromFullGmm(full_gmm, num_states, dim+1, dim); - sgmm.ComputeNormalizers(); - - kaldi::Matrix feats; - - { // First, generate random means and variances - int32 num_feat_comp = num_comp + kaldi::RandInt(-num_comp/2, num_comp/2); - kaldi::Matrix means(num_feat_comp, dim), - vars(num_feat_comp, dim); - for (int32 m = 0; m < num_feat_comp; m++) { - for (int32 d= 0; d < dim; d++) { - means(m, d) = kaldi::RandGauss(); - vars(m, d) = Exp(kaldi::RandGauss()) + 1e-2; - } - } - // Now generate random features with those means and variances. - feats.Resize(num_feat_comp * 200, dim); - for (int32 m = 0; m < num_feat_comp; m++) { - kaldi::SubMatrix tmp(feats, m*200, 200, 0, dim); - ut::RandDiagGaussFeatures(200, means.Row(m), vars.Row(m), &tmp); - } - } - TestUpdateAndAccsIO(sgmm, feats); -} - -int main() { - for (int i = 0; i < 10; i++) - UnitTestEstimateSgmm(); - std::cout << "Test OK.\n"; - return 0; -} diff --git a/src/sgmm/estimate-am-sgmm.cc b/src/sgmm/estimate-am-sgmm.cc deleted file mode 100644 index 1e95e6b281c..00000000000 --- a/src/sgmm/estimate-am-sgmm.cc +++ /dev/null @@ -1,2135 +0,0 @@ -// sgmm/estimate-am-sgmm.cc - -// Copyright 2009-2011 Microsoft Corporation; Lukas Burget; -// Saarland University (Author: Arnab Ghoshal); -// Ondrej Glembek; Yanmin Qian; -// Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey) -// Liang Lu; Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "sgmm/am-sgmm.h" -#include "sgmm/estimate-am-sgmm.h" -#include "thread/kaldi-thread.h" - -namespace kaldi { -using std::string; -using std::vector; - -void MleAmSgmmAccs::Write(std::ostream &out_stream, bool binary) const { - uint32 tmp_uint32; - - WriteToken(out_stream, binary, ""); - - WriteToken(out_stream, binary, ""); - tmp_uint32 = static_cast(num_states_); - WriteBasicType(out_stream, binary, tmp_uint32); - WriteToken(out_stream, binary, ""); - tmp_uint32 = static_cast(num_gaussians_); - WriteBasicType(out_stream, binary, tmp_uint32); - WriteToken(out_stream, binary, ""); - tmp_uint32 = static_cast(feature_dim_); - WriteBasicType(out_stream, binary, tmp_uint32); - WriteToken(out_stream, binary, ""); - tmp_uint32 = static_cast(phn_space_dim_); - WriteBasicType(out_stream, binary, tmp_uint32); - WriteToken(out_stream, binary, ""); - tmp_uint32 = static_cast(spk_space_dim_); - WriteBasicType(out_stream, binary, tmp_uint32); - if (!binary) out_stream << "\n"; - - if (Y_.size() != 0) { - KALDI_ASSERT(gamma_.size() != 0); - WriteToken(out_stream, binary, ""); - for (int32 i = 0; i < num_gaussians_; i++) { - Y_[i].Write(out_stream, binary); - } - } - if (Z_.size() != 0) { - KALDI_ASSERT(R_.size() != 0); - WriteToken(out_stream, binary, ""); - for (int32 i = 0; i < num_gaussians_; i++) { - Z_[i].Write(out_stream, binary); - } - WriteToken(out_stream, binary, ""); - for (int32 i = 0; i < num_gaussians_; i++) { - R_[i].Write(out_stream, binary); - } - } - if (S_.size() != 0) { - KALDI_ASSERT(gamma_.size() != 0); - WriteToken(out_stream, binary, ""); - for (int32 i = 0; i < num_gaussians_; i++) { - S_[i].Write(out_stream, binary); - } - } - if (y_.size() != 0) { - KALDI_ASSERT(gamma_.size() != 0); - WriteToken(out_stream, binary, ""); - for (int32 j = 0; j < num_states_; j++) { - y_[j].Write(out_stream, binary); - } - } - if (gamma_.size() != 0) { - WriteToken(out_stream, binary, ""); - for (int32 j = 0; j < num_states_; j++) { - gamma_[j].Write(out_stream, binary); - } - } - WriteToken(out_stream, binary, ""); - WriteBasicType(out_stream, binary, total_like_); - - WriteToken(out_stream, binary, ""); - WriteBasicType(out_stream, binary, total_frames_); - - WriteToken(out_stream, binary, ""); -} - -void MleAmSgmmAccs::Read(std::istream &in_stream, bool binary, - bool add) { - uint32 tmp_uint32; - string token; - - ExpectToken(in_stream, binary, ""); - - ExpectToken(in_stream, binary, ""); - ReadBasicType(in_stream, binary, &tmp_uint32); - num_states_ = static_cast(tmp_uint32); - ExpectToken(in_stream, binary, ""); - ReadBasicType(in_stream, binary, &tmp_uint32); - num_gaussians_ = static_cast(tmp_uint32); - ExpectToken(in_stream, binary, ""); - ReadBasicType(in_stream, binary, &tmp_uint32); - feature_dim_ = static_cast(tmp_uint32); - ExpectToken(in_stream, binary, ""); - ReadBasicType(in_stream, binary, &tmp_uint32); - phn_space_dim_ = static_cast(tmp_uint32); - ExpectToken(in_stream, binary, ""); - ReadBasicType(in_stream, binary, &tmp_uint32); - spk_space_dim_ = static_cast(tmp_uint32); - - ReadToken(in_stream, binary, &token); - - while (token != "") { - if (token == "") { - Y_.resize(num_gaussians_); - for (size_t i = 0; i < Y_.size(); i++) { - Y_[i].Read(in_stream, binary, add); - } - } else if (token == "") { - Z_.resize(num_gaussians_); - for (size_t i = 0; i < Z_.size(); i++) { - Z_[i].Read(in_stream, binary, add); - } - } else if (token == "") { - R_.resize(num_gaussians_); - if (gamma_s_.Dim() == 0) gamma_s_.Resize(num_gaussians_); - for (size_t i = 0; i < R_.size(); i++) { - R_[i].Read(in_stream, binary, add); - } - } else if (token == "") { - S_.resize(num_gaussians_); - for (size_t i = 0; i < S_.size(); i++) { - S_[i].Read(in_stream, binary, add); - } - } else if (token == "") { - y_.resize(num_states_); - for (int32 j = 0; j < num_states_; j++) { - y_[j].Read(in_stream, binary, add); - } - } else if (token == "") { - gamma_.resize(num_states_); - for (int32 j = 0; j < num_states_; j++) { - gamma_[j].Read(in_stream, binary, add); - } - // Don't read gamma_s, it's just a temporary variable and - // not part of the permanent (non-speaker-specific) accs. - } else if (token == "") { - double total_like; - ReadBasicType(in_stream, binary, &total_like); - if (add) - total_like_ += total_like; - else - total_like_ = total_like; - } else if (token == "") { - double total_frames; - ReadBasicType(in_stream, binary, &total_frames); - if (add) - total_frames_ += total_frames; - else - total_frames_ = total_frames; - } else { - KALDI_ERR << "Unexpected token '" << token << "' in model file "; - } - ReadToken(in_stream, binary, &token); - } -} - -void MleAmSgmmAccs::Check(const AmSgmm &model, - bool show_properties) const { - if (show_properties) { - KALDI_LOG << "SgmmPdfModel: J = " << num_states_ << ", D = " << - feature_dim_ << ", S = " << phn_space_dim_ << ", T = " << - spk_space_dim_ << ", I = " << num_gaussians_; - } - KALDI_ASSERT(num_states_ == model.NumPdfs() && num_states_ > 0); - KALDI_ASSERT(num_gaussians_ == model.NumGauss() && num_gaussians_ > 0); - KALDI_ASSERT(feature_dim_ == model.FeatureDim() && feature_dim_ > 0); - KALDI_ASSERT(phn_space_dim_ == model.PhoneSpaceDim() && phn_space_dim_ > 0); - KALDI_ASSERT(spk_space_dim_ == model.SpkSpaceDim()); - - std::ostringstream debug_str; - - if (Y_.size() == 0) { - debug_str << "Y: no. "; - } else { - KALDI_ASSERT(gamma_.size() != 0); - KALDI_ASSERT(Y_.size() == static_cast(num_gaussians_)); - bool nz = false; - for (int32 i = 0; i < num_gaussians_; i++) { - KALDI_ASSERT(Y_[i].NumRows() == feature_dim_ && - Y_[i].NumCols() == phn_space_dim_); - if (!nz && Y_[i](0, 0) != 0) { nz = true; } - } - debug_str << "Y: yes, " << string(nz ? "nonzero. " : "zero. "); - } - - if (Z_.size() == 0) { - KALDI_ASSERT(R_.size() == 0); - debug_str << "Z, R: no. "; - } else { - KALDI_ASSERT(gamma_s_.Dim() == num_gaussians_); - KALDI_ASSERT(Z_.size() == static_cast(num_gaussians_)); - KALDI_ASSERT(R_.size() == static_cast(num_gaussians_)); - bool Z_nz = false, R_nz = false; - for (int32 i = 0; i < num_gaussians_; i++) { - KALDI_ASSERT(Z_[i].NumRows() == feature_dim_ && - Z_[i].NumCols() == spk_space_dim_); - KALDI_ASSERT(R_[i].NumRows() == spk_space_dim_); - if (!Z_nz && Z_[i](0, 0) != 0) { Z_nz = true; } - if (!R_nz && R_[i](0, 0) != 0) { R_nz = true; } - } - bool gamma_s_nz = !gamma_s_.IsZero(); - debug_str << "Z: yes, " << string(Z_nz ? "nonzero. " : "zero. "); - debug_str << "R: yes, " << string(R_nz ? "nonzero. " : "zero. "); - debug_str << "gamma_s: yes, " << string(gamma_s_nz ? "nonzero. " : "zero. "); - } - - if (S_.size() == 0) { - debug_str << "S: no. "; - } else { - KALDI_ASSERT(gamma_.size() != 0); - bool S_nz = false; - KALDI_ASSERT(S_.size() == static_cast(num_gaussians_)); - for (int32 i = 0; i < num_gaussians_; i++) { - KALDI_ASSERT(S_[i].NumRows() == feature_dim_); - if (!S_nz && S_[i](0, 0) != 0) { S_nz = true; } - } - debug_str << "S: yes, " << string(S_nz ? "nonzero. " : "zero. "); - } - - if (y_.size() == 0) { - debug_str << "y: no. "; - } else { - KALDI_ASSERT(gamma_.size() != 0); - bool nz = false; - KALDI_ASSERT(y_.size() == static_cast(num_states_)); - for (int32 j = 0; j < num_states_; j++) { - KALDI_ASSERT(y_[j].NumRows() == model.NumSubstates(j)); - KALDI_ASSERT(y_[j].NumCols() == phn_space_dim_); - if (!nz && y_[j](0, 0) != 0) { nz = true; } - } - debug_str << "y: yes, " << string(nz ? "nonzero. " : "zero. "); - } - - if (gamma_.size() == 0) { - debug_str << "gamma: no. "; - } else { - debug_str << "gamma: yes. "; - bool nz = false; - KALDI_ASSERT(gamma_.size() == static_cast(num_states_)); - for (int32 j = 0; j < num_states_; j++) { - KALDI_ASSERT(gamma_[j].NumRows() == model.NumSubstates(j) && - gamma_[j].NumCols() == num_gaussians_); - // Just test the first substate for nonzero, else it would take too long. - if (!nz && gamma_[j].Row(0).Norm(1.0) != 0) { nz = true; } - } - debug_str << "gamma: yes, " << string(nz ? "nonzero. " : "zero. "); - } - - if (show_properties) - KALDI_LOG << "Subspace GMM model properties: " << debug_str.str(); -} - -void MleAmSgmmAccs::ResizeAccumulators(const AmSgmm &model, - SgmmUpdateFlagsType flags) { - num_states_ = model.NumPdfs(); - num_gaussians_ = model.NumGauss(); - feature_dim_ = model.FeatureDim(); - phn_space_dim_ = model.PhoneSpaceDim(); - spk_space_dim_ = model.SpkSpaceDim(); - - if (flags & (kSgmmPhoneProjections | kSgmmCovarianceMatrix)) { - Y_.resize(num_gaussians_); - for (int32 i = 0; i < num_gaussians_; i++) { - Y_[i].Resize(feature_dim_, phn_space_dim_); - } - } else { - Y_.clear(); - } - - if (flags & kSgmmSpeakerProjections) { - if (spk_space_dim_ == 0) { - KALDI_ERR << "Cannot set up accumulators for speaker projections " - << "because speaker subspace has not been set up"; - } - gamma_s_.Resize(num_gaussians_); - Z_.resize(num_gaussians_); - R_.resize(num_gaussians_); - for (int32 i = 0; i < num_gaussians_; i++) { - Z_[i].Resize(feature_dim_, spk_space_dim_); - R_[i].Resize(spk_space_dim_); - } - } else { - gamma_s_.Resize(0); - Z_.clear(); - R_.clear(); - } - - if (flags & kSgmmCovarianceMatrix) { - S_.resize(num_gaussians_); - for (int32 i = 0; i < num_gaussians_; i++) { - S_[i].Resize(feature_dim_); - } - } else { - S_.clear(); - } - - if (flags & (kSgmmPhoneVectors | kSgmmPhoneWeightProjections | - kSgmmCovarianceMatrix | kSgmmSubstateWeights | - kSgmmPhoneProjections)) { - gamma_.resize(num_states_); - total_frames_ = total_like_ = 0; - for (int32 j = 0; j < num_states_; j++) { - gamma_[j].Resize(model.NumSubstates(j), num_gaussians_); - } - } else { - gamma_.clear(); - total_frames_ = total_like_ = 0; - } - - if (flags & kSgmmPhoneVectors) { - y_.resize(num_states_); - for (int32 j = 0; j < num_states_; j++) { - y_[j].Resize(model.NumSubstates(j), phn_space_dim_); - } - } else { - y_.clear(); - } -} - -BaseFloat MleAmSgmmAccs::Accumulate(const AmSgmm &model, - const SgmmPerFrameDerivedVars &frame_vars, - const VectorBase &v_s, // may be empty - int32 j, BaseFloat weight, - SgmmUpdateFlagsType flags) { - // Calculate Gaussian posteriors and collect statistics - Matrix posteriors; - BaseFloat log_like = model.ComponentPosteriors(frame_vars, j, &posteriors); - posteriors.Scale(weight); - BaseFloat count = AccumulateFromPosteriors(model, frame_vars, posteriors, - v_s, j, flags); - // Note: total_frames_ is incremented in AccumulateFromPosteriors(). - total_like_ += count * log_like; - return log_like; -} - - -BaseFloat MleAmSgmmAccs::AccumulateFromPosteriors( - const AmSgmm &model, - const SgmmPerFrameDerivedVars &frame_vars, - const Matrix &posteriors, - const VectorBase &v_s, // may be empty - int32 j, - SgmmUpdateFlagsType flags) { - double tot_count = 0.0; - const vector &gselect = frame_vars.gselect; - // Intermediate variables - Vector gammat(gselect.size()); - Vector xt_jmi(feature_dim_), mu_jmi(feature_dim_), - zt_jmi(spk_space_dim_); - - int32 num_substates = model.NumSubstates(j); - for (int32 ki = 0; ki < static_cast(gselect.size()); ki++) { - int32 i = gselect[ki]; - - for (int32 m = 0; m < num_substates; m++) { - // Eq. (39): gamma_{jmi}(t) = p (j, m, i|t) - BaseFloat gammat_jmi = RandPrune(posteriors(ki, m), rand_prune_); - - // Accumulate statistics for non-zero gaussian posterior - if (gammat_jmi != 0.0) { - tot_count += gammat_jmi; - if (flags & (kSgmmPhoneVectors | kSgmmPhoneWeightProjections | - kSgmmCovarianceMatrix | kSgmmSubstateWeights | - kSgmmPhoneProjections)) { - // Eq. (40): gamma_{jmi} = \sum_t gamma_{jmi}(t) - gamma_[j](m, i) += gammat_jmi; - } - - if (flags & kSgmmPhoneVectors) { - // Eq. (41): y_{jm} = \sum_{t, i} \gamma_{jmi}(t) z_{i}(t) - // Suggestion: move this out of the loop over m - y_[j].Row(m).AddVec(gammat_jmi, frame_vars.zti.Row(ki)); - } - - if (flags & (kSgmmPhoneProjections | kSgmmCovarianceMatrix)) { - // Eq. (42): Y_{i} = \sum_{t, j, m} \gamma_{jmi}(t) x_{i}(t) v_{jm}^T - Y_[i].AddVecVec(gammat_jmi, frame_vars.xti.Row(ki), - model.StateVectors(j).Row(m)); - } - - if (flags & kSgmmCovarianceMatrix) - gammat(ki) += gammat_jmi; - - // Accumulate for speaker projections - if (flags & kSgmmSpeakerProjections) { - KALDI_ASSERT(spk_space_dim_ > 0); - // Eq. (43): x_{jmi}(t) = x_k(t) - M{i} v_{jm} - model.GetSubstateMean(j, m, i, &mu_jmi); - xt_jmi.CopyFromVec(frame_vars.xt); - xt_jmi.AddVec(-1.0, mu_jmi); - // Eq. (44): Z_{i} = \sum_{t, j, m} \gamma_{jmi}(t) x_{jmi}(t) v^{s}' - if (v_s.Dim() != 0) // interpret empty v_s as zero. - Z_[i].AddVecVec(gammat_jmi, xt_jmi, v_s); - // Eq. (49): \gamma_{i}^{(s)} = \sum_{t\in\Tau(s), j, m} gamma_{jmi} - // Will be used when you call CommitStatsForSpk(), to update R_. - gamma_s_(i) += gammat_jmi; - } - } // non-zero posteriors - } // loop over substates - } // loop over selected Gaussians - - if (flags & kSgmmCovarianceMatrix) { - for (int32 ki = 0; ki < static_cast(gselect.size()); ki++) { - int32 i = gselect[ki]; - // Eq. (47): S_{i} = \sum_{t, j, m} \gamma_{jmi}(t) x_{i}(t) x_{i}(t)^T - if (gammat(ki) != 0.0) - S_[i].AddVec2(gammat(ki), frame_vars.xti.Row(ki)); - } - } - total_frames_ += tot_count; - return tot_count; -} - -void MleAmSgmmAccs::CommitStatsForSpk(const AmSgmm &model, - const VectorBase &v_s) { - if (v_s.Dim() != 0 && spk_space_dim_ > 0 && gamma_s_.Dim() != 0) { - if (!v_s.IsZero()) - for (int32 i = 0; i < num_gaussians_; i++) - // Accumulate Statistics R_{ki} - if (gamma_s_(i) != 0.0) - R_[i].AddVec2(static_cast(gamma_s_(i)), v_s); - } - gamma_s_.SetZero(); -} - -void MleAmSgmmAccs::GetStateOccupancies(Vector *occs) const { - occs->Resize(gamma_.size()); - for (int32 j = 0, end = gamma_.size(); j < end; j++) { - (*occs)(j) = gamma_[j].Sum(); - } -} - -BaseFloat MleAmSgmmUpdater::Update(const MleAmSgmmAccs &accs, - AmSgmm *model, - SgmmUpdateFlagsType flags) { - KALDI_ASSERT((flags & (kSgmmPhoneVectors | kSgmmPhoneProjections | - kSgmmPhoneWeightProjections | kSgmmCovarianceMatrix | - kSgmmSubstateWeights | kSgmmSpeakerProjections)) != 0); - - if (flags & kSgmmPhoneProjections) - ComputeQ(accs, *model, &Q_); - if (flags & kSgmmCovarianceMatrix) - ComputeSMeans(accs, *model, &S_means_); - - // quantities used in both vector and weights updates... - vector< SpMatrix > H; - // "smoothing" matrices, weighted sums of above. - SpMatrix H_sm; - Vector y_sm; // "smoothing" vectors - if ((flags & (kSgmmPhoneVectors | kSgmmPhoneWeightProjections)) - || update_options_.renormalize_V) { - model->ComputeH(&H); - ComputeSmoothingTerms(accs, *model, H, &H_sm, - (flags & kSgmmPhoneVectors) ? &y_sm : NULL); - } - - BaseFloat tot_impr = 0.0; - - if (flags & kSgmmPhoneVectors) { - if (update_options_.check_v) { - KALDI_ASSERT(update_options_.tau_vec == 0 && - "You cannot combine the check-v and tau-vec options."); - tot_impr += UpdatePhoneVectorsChecked(accs, model, H); - } else { - tot_impr += UpdatePhoneVectors(accs, model, H, H_sm, y_sm); - } - } - if (flags & kSgmmPhoneProjections) { - if (update_options_.tau_map_M > 0.0) - tot_impr += MapUpdateM(accs, model); // MAP adaptation of M - else - tot_impr += UpdateM(accs, model); - } - - if (flags & kSgmmPhoneWeightProjections) { - if (update_options_.use_sequential_weight_update) { - tot_impr += UpdateWSequential(accs, model); - } else { - tot_impr += UpdateWParallel(accs, model); - } - } - if (flags & kSgmmCovarianceMatrix) - tot_impr += UpdateVars(accs, model); - if (flags & kSgmmSubstateWeights) - tot_impr += UpdateSubstateWeights(accs, model); - if (flags & kSgmmSpeakerProjections) { - tot_impr += UpdateN(accs, model); - if (update_options_.renormalize_N) - RenormalizeN(accs, model); // if you renormalize N you have to - // alter any speaker vectors you're keeping around, as well. - } - - if (update_options_.renormalize_V) - RenormalizeV(accs, model, H_sm); - - KALDI_LOG << "*Overall auxf improvement, combining all parameters, is " - << tot_impr; - - KALDI_LOG << "***Overall data likelihood is " - << (accs.total_like_/accs.total_frames_) - << " over " << (accs.total_frames_) << " frames."; - - model->ComputeNormalizers(); // So that the model is ready to use. - return tot_impr; -} - -// Compute the Q_{i} (Eq. 64) -void MleAmSgmmUpdater::ComputeQ(const MleAmSgmmAccs &accs, - const AmSgmm &model, - std::vector< SpMatrix > *Q) { - Q->resize(accs.num_gaussians_); - for (int32 i = 0; i < accs.num_gaussians_; i++) { - (*Q)[i].Resize(accs.phn_space_dim_); - for (int32 j = 0; j < accs.num_states_; j++) { - for (int32 m = 0; m < model.NumSubstates(j); m++) { - if (accs.gamma_[j](m, i) > 0.0) { - (*Q)[i].AddVec2(static_cast(accs.gamma_[j](m, i)), - model.v_[j].Row(m)); - } - } - } - } -} - -// Compute the S_i^{(means)} quantities (Eq. 74). -// Note: we seem to have also included in this variable -// the term - (Y_i M_I^T + M_i Y_i^T). -void MleAmSgmmUpdater::ComputeSMeans(const MleAmSgmmAccs &accs, - const AmSgmm &model, - std::vector< SpMatrix > *S_means) { - S_means->resize(accs.num_gaussians_); - Matrix YM_MY(accs.feature_dim_, accs.feature_dim_); - Vector mu_jmi(accs.feature_dim_); - for (int32 i = 0; i < accs.num_gaussians_; i++) { - // YM_MY = - (Y_{i} M_{i}^T) - YM_MY.AddMatMat(-1.0, accs.Y_[i], kNoTrans, - Matrix(model.M_[i]), kTrans, 0.0); - // Add its own transpose: YM_MY = - (Y_{i} M_{i}^T + M_{i} Y_{i}^T) - { - Matrix M(YM_MY, kTrans); - YM_MY.AddMat(1.0, M); - } - (*S_means)[i].Resize(accs.feature_dim_, kUndefined); - (*S_means)[i].CopyFromMat(YM_MY); // Sigma_{i} = -(YM' + MY') - - for (int32 j = 0; j < accs.num_states_; j++) { - for (int32 m = 0; m < model.NumSubstates(j); m++) { - if (accs.gamma_[j](m, i) != 0.0) { - // Sigma_{i} += gamma_{jmi} * mu_{jmi}*mu_{jmi}^T - mu_jmi.AddMatVec(1.0, model.M_[i], kNoTrans, model.v_[j].Row(m), 0.0); - (*S_means)[i].AddVec2(static_cast(accs.gamma_[j](m, i)), mu_jmi); - } - } - } - KALDI_ASSERT(1.0 / (*S_means)[i](0, 0) != 0.0); - } -} - -// Compute H^{(sm)}, the "smoothing" matrices. -void MleAmSgmmUpdater::ComputeSmoothingTerms(const MleAmSgmmAccs &accs, - const AmSgmm &model, - const vector > &H, - SpMatrix *H_sm, - Vector *y_sm) const { - KALDI_ASSERT(H_sm != NULL); - H_sm->Resize(accs.phn_space_dim_); - if (y_sm != NULL) y_sm->Resize(accs.phn_space_dim_); - Vector gamma_i(accs.num_gaussians_); - - for (int32 j = 0; j < accs.num_states_; j++) { - for (int32 m = 0, end = model.NumSubstates(j); m < end; m++) { - gamma_i.AddVec(1.0, accs.gamma_[j].Row(m)); - if (y_sm != NULL) (*y_sm).AddVec(1.0, accs.y_[j].Row(m)); - } - } - - double sum = 0.0; - for (int32 i = 0; i < accs.num_gaussians_; i++) { - if (gamma_i(i) > 0) { - H_sm->AddSp(gamma_i(i), H[i]); - sum += gamma_i(i); - } - } - - if (sum == 0.0) { - KALDI_WARN << "Sum of counts is zero. Smoothing matrix set to unit" - << string((y_sm != NULL)? " & smoothing vector set to 0." : "."); - H_sm->SetUnit(); // arbitrary non-singular matrix - } else { - if (y_sm != NULL) { - (*y_sm).Scale(1.0 / sum); - KALDI_VLOG(3) << "y_sm is " << (*y_sm); - } - H_sm->Scale(1.0 / sum); - Matrix H_sm_old(*H_sm); - int32 tmp = H_sm->LimitCondDouble(update_options_.max_cond_H_sm); - if (tmp > 0) { - KALDI_WARN << "Limited " << tmp << " eigenvalues of H_sm."; - if (update_options_.fixup_H_sm && y_sm != NULL) { - Vector avgVec(accs.phn_space_dim_); - SpMatrix HInv(H_sm_old); - HInv.Invert(); - avgVec.AddSpVec(1.0, HInv, (*y_sm), 0.0); - (*y_sm).AddSpVec(1.0, (*H_sm), avgVec, 0.0); - KALDI_VLOG(3) << "y_sm [fixed up] is " << (*y_sm); - } - } - } -} - - -class UpdatePhoneVectorsClass: public MultiThreadable { // For multi-threaded. - public: - UpdatePhoneVectorsClass(const MleAmSgmmUpdater &updater, - const MleAmSgmmAccs &accs, - AmSgmm *model, - const std::vector > &H, - const SpMatrix &H_sm, - const Vector &y_sm, - double *auxf_impr, - double *like_impr): - updater_(updater), accs_(accs), model_(model), - H_(H), H_sm_(H_sm), y_sm_(y_sm), auxf_impr_ptr_(auxf_impr), - auxf_impr_(0.0), like_impr_ptr_(like_impr), like_impr_(0.0) { } - - ~UpdatePhoneVectorsClass() { - *auxf_impr_ptr_ += auxf_impr_; - *like_impr_ptr_ += like_impr_; - } - - inline void operator() () { - // Note: give them local copy of the sums we're computing, - // which will be propagated to the total sums in the destructor. - updater_.UpdatePhoneVectorsInternal(accs_, model_, H_, H_sm_, y_sm_, - &auxf_impr_, &like_impr_, - num_threads_, thread_id_); - } - private: - const MleAmSgmmUpdater &updater_; - const MleAmSgmmAccs &accs_; - AmSgmm *model_; - const std::vector > &H_; - const SpMatrix &H_sm_; - const Vector &y_sm_; - double *auxf_impr_ptr_; - double auxf_impr_; - double *like_impr_ptr_; - double like_impr_; -}; - - -// Runs the phone vectors update for a subset of states (called -// multi-threaded). -void MleAmSgmmUpdater::UpdatePhoneVectorsInternal( - const MleAmSgmmAccs &accs, - AmSgmm *model, - const std::vector > &H, - const SpMatrix &H_sm, - const Vector &y_sm, - double *auxf_impr, - double *like_impr, - int32 num_threads, - int32 thread_id) const { - - int32 block_size = (accs.num_states_ + (num_threads-1)) / num_threads, - j_start = block_size * thread_id, - j_end = std::min(accs.num_states_, j_start + block_size); - - for (int32 j = j_start; j < j_end; j++) { - double state_count = 0.0, state_auxf_impr = 0.0, state_like_impr = 0.0; - Vector w_jm(accs.num_gaussians_); - for (int32 m = 0; m < model->NumSubstates(j); m++) { - double gamma_jm = accs.gamma_[j].Row(m).Sum(); - state_count += gamma_jm; - Vector g_jm(accs.phn_space_dim_); // computed using eq. 58 - SpMatrix H_jm(accs.phn_space_dim_); // computed using eq. 59 - // First compute normal H_jm. - - // need weights for this ... - // w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7) - w_jm.AddMatVec(1.0, Matrix(model->w_), kNoTrans, - Vector(model->v_[j].Row(m)), 0.0); - w_jm.ApplySoftMax(); - g_jm.CopyFromVec(accs.y_[j].Row(m)); - - for (int32 i = 0; i < accs.num_gaussians_; i++) { - double gamma_jmi = accs.gamma_[j](m, i); - double quadratic_term = std::max(gamma_jmi, gamma_jm * w_jm(i)); - double scalar = gamma_jmi - gamma_jm * w_jm(i) + quadratic_term - * VecVec(model->w_.Row(i), model->v_[j].Row(m)); - g_jm.AddVec(scalar, model->w_.Row(i)); - if (gamma_jmi != 0.0) { - H_jm.AddSp(gamma_jmi, H[i]); // The most important term.. - } - if (quadratic_term > 1.0e-10) { - H_jm.AddVec2(static_cast(quadratic_term), model->w_.Row(i)); - } - } - SpMatrix H_jm_dash(H_jm); // with ad-hoc smoothing term. - Vector g_jm_dash(g_jm); // with ad-hoc smoothing term. - - // H_jm_dash = H_jm + (smoothing term) - H_jm_dash.AddSp(update_options_.tau_vec, H_sm); - // g_jm_dash.BlasGemv(update_options_.mTauVec, H_sm, kNoTrans, e_1, 1.0); - // g_jm_dash = g_jm + (smoothing term) - g_jm_dash.AddVec(update_options_.tau_vec, y_sm); - - // if (gamma_jm == 0) continue; - // no, we still want to update even with zero count. -#ifdef KALDI_PARANOID - if (update_options_.tau_vec > 0) - KALDI_ASSERT(H_jm_dash.IsPosDef()); -#endif - Vector vhat_jm(model->v_[j].Row(m)); - SolverOptions opts; - opts.name = "v"; - opts.K = update_options_.max_cond; - opts.eps = update_options_.epsilon; - double objf_impr_with_prior = - SolveQuadraticProblem(H_jm_dash, - g_jm_dash, - opts, - &vhat_jm); - - SpMatrix H_jm_flt(H_jm); - - double objf_impr_noprior = - (VecVec(vhat_jm, g_jm) - - 0.5 * VecSpVec(vhat_jm, H_jm, vhat_jm)) - - (VecVec(model->v_[j].Row(m), g_jm) - - 0.5 * VecSpVec(model->v_[j].Row(m), H_jm_flt, model->v_[j].Row(m))); - model->v_[j].Row(m).CopyFromVec(vhat_jm); - if (j < 3 && m < 2 && thread_id == 0) { - KALDI_LOG << "Objf impr for j = " << (j) << " m = " << (m) << " is " - << (objf_impr_with_prior / (gamma_jm + 1.0e-20)) - << " (with ad-hoc prior) " - << (objf_impr_noprior / (gamma_jm + 1.0e-20)) - << " (no prior) over " << (gamma_jm) << " frames"; - } - state_auxf_impr += objf_impr_with_prior; - state_like_impr += objf_impr_noprior; - } - - *auxf_impr += state_auxf_impr; - *like_impr += state_like_impr; - if (j < 10 && thread_id == 0) { - KALDI_LOG << "Objf impr for state j = " << (j) << " is " - << (state_auxf_impr / (state_count + 1.0e-20)) - << " (with ad-hoc prior) " - << (state_like_impr / (state_count + 1.0e-20)) - << " (no prior) over " << (state_count) << " frames"; - } - } -} - -double MleAmSgmmUpdater::UpdatePhoneVectors(const MleAmSgmmAccs &accs, - AmSgmm *model, - const vector< SpMatrix > &H, - const SpMatrix &H_sm, - const Vector &y_sm) { - KALDI_LOG << "Updating phone vectors"; - - double count = 0.0, auxf_impr = 0.0, like_impr = 0.0; // sum over all states - - for (int32 j = 0; j < accs.num_states_; j++) count += accs.gamma_[j].Sum(); - - UpdatePhoneVectorsClass c(*this, accs, model, H, H_sm, y_sm, - &auxf_impr, &like_impr); - RunMultiThreaded(c); - - auxf_impr /= (count + 1.0e-20); - like_impr /= (count + 1.0e-20); - KALDI_LOG << "**Overall objf impr for v is " << auxf_impr - << "(with ad-hoc prior) " << like_impr << " (no prior) over " - << (count) << " frames"; - // Choosing to return actual likelihood impr here. - return like_impr; -} - - -/** - This is as UpdatePhoneVectors but does not support smoothing terms or - parallelization. However, it does compute the auxiliary function - after doing the update, and backtracks if it did not increase (due - to the weight terms, increase is not mathematically guaranteed). */ - -double MleAmSgmmUpdater::UpdatePhoneVectorsChecked(const MleAmSgmmAccs &accs, - AmSgmm *model, - const vector< SpMatrix > &H) { - KALDI_LOG << "Updating phone vectors (and checking auxiliary function)"; - - double tot_count = 0.0, tot_objf_impr = 0.0, tot_auxf_impr = 0.0; // sum over all states - - for (int32 j = 0; j < accs.num_states_; j++) { - for (int32 m = 0; m < model->NumSubstates(j); m++) { - double gamma_jm = accs.gamma_[j].Row(m).Sum(); - SpMatrix X_jm(accs.phn_space_dim_); // = \sum_i \gamma_{jmi} H_i - - for (int32 i = 0; i < accs.num_gaussians_; i++) { - double gamma_jmi = accs.gamma_[j](m, i); - if (gamma_jmi != 0.0) - X_jm.AddSp(gamma_jmi, H[i]); - } - - Vector v_jm_orig(model->v_[j].Row(m)), - v_jm(v_jm_orig); - - double exact_objf_start = 0.0, exact_objf = 0.0, auxf_impr = 0.0; - int32 backtrack_iter, max_backtrack = 10; - for (backtrack_iter = 0; backtrack_iter < max_backtrack; backtrack_iter++) { - // w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7) - Vector w_jm(accs.num_gaussians_); - w_jm.AddMatVec(1.0, Matrix(model->w_), kNoTrans, - v_jm, 0.0); - w_jm.Add(-w_jm.LogSumExp()); // it is now log w_jm - - exact_objf = VecVec(w_jm, accs.gamma_[j].Row(m)) - + VecVec(v_jm, accs.y_[j].Row(m)) - -0.5 * VecSpVec(v_jm, X_jm, v_jm); - - if (backtrack_iter == 0.0) { - exact_objf_start = exact_objf; - } else { - if (exact_objf >= exact_objf_start) { - break; // terminate backtracking. - } else { - KALDI_LOG << "Backtracking computation of v_jm for j = " << j - << " and m = " << m << " because objf changed by " - << (exact_objf-exact_objf_start) << " [vs. predicted:] " - << auxf_impr; - v_jm.AddVec(1.0, v_jm_orig); - v_jm.Scale(0.5); - } - } - - if (backtrack_iter == 0) { // computing updated value. - w_jm.ApplyExp(); // it is now w_jm - SpMatrix H_jm(X_jm); - Vector g_jm(accs.y_[j].Row(m)); - for (int32 i = 0; i < accs.num_gaussians_; i++) { - double gamma_jmi = accs.gamma_[j](m, i); - double quadratic_term = std::max(gamma_jmi, gamma_jm * w_jm(i)); - double scalar = gamma_jmi - gamma_jm * w_jm(i) + quadratic_term - * VecVec(model->w_.Row(i), model->v_[j].Row(m)); - g_jm.AddVec(scalar, model->w_.Row(i)); - if (quadratic_term > 1.0e-10) { - H_jm.AddVec2(static_cast(quadratic_term), model->w_.Row(i)); - } - } - SolverOptions opts; - opts.name = "v"; - opts.K = update_options_.max_cond; - opts.eps = update_options_.epsilon; - auxf_impr = SolveQuadraticProblem(H_jm, g_jm, opts, &v_jm); - } - } - double objf_impr = exact_objf - exact_objf_start; - tot_count += gamma_jm; - tot_objf_impr += objf_impr; - tot_auxf_impr += auxf_impr; - if (backtrack_iter == max_backtrack) { - KALDI_WARN << "Backtracked " << max_backtrack << " times [not updating]"; - } else { - model->v_[j].Row(m).CopyFromVec(v_jm); - } - - if (j < 3 && m < 2) { - KALDI_LOG << "Objf impr for j = " << (j) << " m = " << (m) << " is " - << objf_impr << " vs. quadratic auxf impr (before backtrack) " - << auxf_impr; - } - } - } - - tot_objf_impr /= (tot_count + 1.0e-20); - tot_auxf_impr /= (tot_count + 1.0e-20); - KALDI_LOG << "**Overall objf impr for v is " << tot_objf_impr - << " (auxf impr before backtracking:) " << tot_auxf_impr - << " over " << tot_count << " frames"; - // Choosing to return actual likelihood impr here. - return tot_objf_impr; -} - - - -class UpdatePhoneVectorsCheckedFromClusterableClass: public MultiThreadable { // For multi-threaded. - public: - UpdatePhoneVectorsCheckedFromClusterableClass( - MleAmSgmmUpdater *updater, - const std::vector &stats, - const std::vector > &H, - AmSgmm *model, - double *count, - double *like_impr): - updater_(updater), stats_(stats), H_(H), model_(model), - count_ptr_(count), count_(0.0), - like_impr_ptr_(like_impr), like_impr_(0.0) - { } - - ~UpdatePhoneVectorsCheckedFromClusterableClass() { - *count_ptr_ += count_; - *like_impr_ptr_ += like_impr_; - } - - inline void operator() () { - // Note: give them local copy of the sums we're computing, - // which will be propagated to the total sums in the destructor. - updater_->UpdatePhoneVectorsCheckedFromClusterableInternal( - stats_, H_, model_, &count_, &like_impr_, num_threads_, thread_id_); - } - private: - MleAmSgmmUpdater *updater_; - const std::vector &stats_; - const std::vector > &H_; - AmSgmm *model_; - double *count_ptr_; - double count_; - double *like_impr_ptr_; - double like_impr_; -}; - - -double MleAmSgmmUpdater::UpdatePhoneVectorsCheckedFromClusterable( - const std::vector &stats, - const vector< SpMatrix > &H, - AmSgmm *model) { - KALDI_LOG << "Updating phone vectors using stats from Clusterable class " - "(and checking auxiliary function)"; - double count = 0.0, like_impr = 0.0; - - UpdatePhoneVectorsCheckedFromClusterableClass c(this, stats, H, model, - &count, &like_impr); - RunMultiThreaded(c); - - KALDI_LOG << "**Overall objf impr for v is " << (like_impr / count) - << " over " << count << " frames."; - - return like_impr / count; -} - - -void MleAmSgmmUpdater::UpdatePhoneVectorsCheckedFromClusterableInternal( - const std::vector &stats, - const vector< SpMatrix > &H, - AmSgmm *model, - double *count_ptr, - double *like_impr_ptr, - int32 num_threads, - int32 thread_id) { - - int32 block_size = (model->NumPdfs() + (num_threads-1)) / num_threads, - j_start = block_size * thread_id, - j_end = std::min(model->NumPdfs(), j_start + block_size); - - double tot_count = 0.0, tot_objf_impr = 0.0, tot_auxf_impr = 0.0; // sum over all states - - KALDI_ASSERT(model->NumPdfs() == static_cast(stats.size())); - int32 num_gauss = model->NumGauss(); - for (int32 j = j_start; j < j_end; j++) { - KALDI_ASSERT(model->NumSubstates(j) == 1 && - "This function only works if there is 1 substate per state."); - int32 m = 0; // sub-state index. - const Vector &gamma = stats[j]->gamma(); - const Vector &y = stats[j]->y(); - - double gamma_jm = gamma.Sum(); - SpMatrix X_jm(model->PhoneSpaceDim()); // = \sum_i \gamma_{jmi} H_i - - for (int32 i = 0; i < num_gauss; i++) { - double gamma_jmi = gamma(i); - if (gamma_jmi != 0.0) - X_jm.AddSp(gamma_jmi, H[i]); - } - - Vector v_jm_orig(model->v_[j].Row(m)), - v_jm(v_jm_orig); - - double exact_objf_start = 0.0, exact_objf = 0.0, auxf_impr = 0.0; - int32 backtrack_iter, max_backtrack = 10; - for (backtrack_iter = 0; backtrack_iter < max_backtrack; backtrack_iter++) { - // w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7) - Vector w_jm(num_gauss); - w_jm.AddMatVec(1.0, Matrix(model->w_), kNoTrans, - v_jm, 0.0); - w_jm.Add(-w_jm.LogSumExp()); // it is now log w_jm - - exact_objf = VecVec(w_jm, gamma) - + VecVec(v_jm, y) - -0.5 * VecSpVec(v_jm, X_jm, v_jm); - - if (backtrack_iter == 0.0) { - exact_objf_start = exact_objf; - } else { - if (exact_objf >= exact_objf_start) { - break; // terminate backtracking. - } else { - KALDI_LOG << "Backtracking computation of v_jm for j = " << j - << " and m = " << m << " because objf changed by " - << (exact_objf-exact_objf_start) << " [vs. predicted:] " - << auxf_impr; - v_jm.AddVec(1.0, v_jm_orig); - v_jm.Scale(0.5); - } - } - - if (backtrack_iter == 0) { // computing updated value. - w_jm.ApplyExp(); // it is now w_jm - SpMatrix weight_2nd_deriv(model->PhoneSpaceDim()); // actually - // negatived 2nd derivative. - Vector num_deriv(model->PhoneSpaceDim()); - Vector den_deriv(model->PhoneSpaceDim()); - - // We modify the optimization to use the exact 2nd derivative. - // Because we do checking and backtracking, the loss of - // natural stability is OK. - for (int32 i = 0; i < num_gauss; i++) { - double gamma_jmi = gamma(i); - SubVector wi(model->w_, i); - num_deriv.AddVec(gamma_jmi, wi); - double scalar = gamma_jm * w_jm(i); // expected count. - den_deriv.AddVec(scalar, wi); - if (scalar > 1.0e-10) // if-statement is a speedup - weight_2nd_deriv.AddVec2(static_cast(scalar), wi); - } - Vector total_linear_term(y); - total_linear_term.AddVec(1.0, num_deriv); - total_linear_term.AddVec(-1.0, den_deriv); - if (gamma_jm > 0.0) - weight_2nd_deriv.AddVec2(-1.0/gamma_jm, den_deriv); - - total_linear_term.AddSpVec(1.0, weight_2nd_deriv, v_jm, 1.0); - // we want the derivatives around zero, not around the current point. - // Correction for this. - - SpMatrix total_quadratic_term(weight_2nd_deriv); - total_quadratic_term.AddSp(1.0, X_jm); - - SolverOptions opts; - opts.name = "v"; - opts.K = update_options_.max_cond; - opts.eps = update_options_.epsilon; - auxf_impr = SolveQuadraticProblem(total_quadratic_term, - total_linear_term, opts, &v_jm); - } - } - double objf_impr = exact_objf - exact_objf_start; - tot_count += gamma_jm; - tot_objf_impr += objf_impr; - tot_auxf_impr += auxf_impr; - if (backtrack_iter == max_backtrack) { - KALDI_WARN << "Backtracked " << max_backtrack << " times [not updating]"; - } else { - model->v_[j].Row(m).CopyFromVec(v_jm); - } - if (j < 3) { - KALDI_LOG << "Objf impr for j = " << (j) << " m = " << (m) << " is " - << objf_impr << " vs. quadratic auxf impr (before backtrack) " - << auxf_impr; - } - } - - *like_impr_ptr = tot_objf_impr; - *count_ptr = tot_count; - - tot_objf_impr /= (tot_count + 1.0e-20); - tot_auxf_impr /= (tot_count + 1.0e-20); - - if (j_start == 0) - KALDI_LOG << "**For first batch: objf impr for v is " << tot_objf_impr - << " (auxf impr before backtracking:) " << tot_auxf_impr - << " over " << tot_count << " frames"; -} - - -void MleAmSgmmUpdater::RenormalizeV(const MleAmSgmmAccs &accs, - AmSgmm *model, - const SpMatrix &H_sm) { - SpMatrix Sigma(accs.phn_space_dim_); - int32 count = 0; - for (int32 j = 0; j < accs.num_states_; j++) { - for (int32 m = 0; m < model->NumSubstates(j); m++) { - count++; - Sigma.AddVec2(static_cast(1.0), model->v_[j].Row(m)); - } - } - Sigma.Scale(1.0 / count); - int32 fixed_eigs = Sigma.LimitCondDouble(update_options_.max_cond); - if (fixed_eigs != 0) { - KALDI_WARN << "Scatter of vectors v is poorly conditioned. Fixed up " - << fixed_eigs << " eigenvalues."; - } - KALDI_LOG << "Eigenvalues of scatter of vectors v is : "; - Sigma.PrintEigs("Sigma"); - if (!Sigma.IsPosDef()) { - KALDI_LOG << "Not renormalizing v because scatter is not positive definite" - << " -- maybe first iter?"; - return; - } - - // Want to make variance of v unit and H_sm (like precision matrix) diagonal. - TpMatrix L(accs.phn_space_dim_); - L.Cholesky(Sigma); - TpMatrix LInv(L); - LInv.Invert(); - - Matrix tmpL(accs.phn_space_dim_, accs.phn_space_dim_); - tmpL.CopyFromTp(L); - - SpMatrix H_sm_proj(accs.phn_space_dim_); - H_sm_proj.AddMat2Sp(1.0, tmpL, kTrans, H_sm, 0.0); - // H_sm_proj := L^{T} * H_sm * L. - // This is right because we would transform the vectors themselves - // by L^{-1}, and H_sm is like the inverse of the vectors, - // so it's {L^{-1}}^{-T} = L^T. - - Matrix U(accs.phn_space_dim_, accs.phn_space_dim_); - Vector eigs(accs.phn_space_dim_); - H_sm_proj.SymPosSemiDefEig(&eigs, &U, 1.0); // 1.0 means no checking +ve def -> faster - KALDI_LOG << "Note on the next diagnostic: the first number is generally not " - << "that meaningful as it relates to the static offset"; - H_sm_proj.PrintEigs("H_sm_proj (Significance of dims in vector space.. note)"); - - // Transform on vectors is U^T L^{-1}. - // Why? Because transform on H_sm is T =U^T L^T - // and we want T^{-T} by normal rules of vector/covector and we - // have (U^T L^T)^{-T} = (L U)^{-1} = U^T L^{-1}. - Matrix Trans(accs.phn_space_dim_, accs.phn_space_dim_); // T^{-T} - Matrix tmpLInv(accs.phn_space_dim_, accs.phn_space_dim_); - tmpLInv.CopyFromTp(LInv); - Trans.AddMatMat(1.0, U, kTrans, tmpLInv, kNoTrans, 0.0); - Matrix TransInv(Trans); - TransInv.Invert(); // T in above... - -#ifdef KALDI_PARANOID - { - SpMatrix H_sm_tmp(accs.phn_space_dim_); - H_sm_tmp.AddMat2Sp(1.0, TransInv, kTrans, H_sm, 0.0); - KALDI_ASSERT(H_sm_tmp.IsDiagonal(0.1)); - } - { - SpMatrix Sigma_tmp(accs.phn_space_dim_); - Sigma_tmp.AddMat2Sp(1.0, Trans, kNoTrans, Sigma, 0.0); - KALDI_ASSERT(Sigma_tmp.IsUnit(0.1)); - } -#endif - - for (int32 j = 0; j < accs.num_states_; j++) { - for (int32 m = 0; m < model->NumSubstates(j); m++) { - Vector tmp(accs.phn_space_dim_); - tmp.AddMatVec(1.0, Trans, kNoTrans, Vector(model->v_[j].Row(m)), 0.0); - model->v_[j].Row(m).CopyFromVec(tmp); - } - } - for (int32 i = 0; i < accs.num_gaussians_; i++) { - Vector tmp(accs.phn_space_dim_); - tmp.AddMatVec(1.0, TransInv, kTrans, Vector(model->w_.Row(i)), 0.0); - model->w_.Row(i).CopyFromVec(tmp); - - Matrix tmpM(accs.feature_dim_, accs.phn_space_dim_); - // Multiplying on right not left so must not transpose TransInv. - tmpM.AddMatMat(1.0, Matrix(model->M_[i]), kNoTrans, - TransInv, kNoTrans, 0.0); - model->M_[i].CopyFromMat(tmpM); - } - KALDI_LOG << "Renormalized subspace."; -} - -double MleAmSgmmUpdater::UpdateM(const MleAmSgmmAccs &accs, - AmSgmm *model) { - double tot_count = 0.0, tot_like_impr = 0.0; - for (int32 i = 0; i < accs.num_gaussians_; i++) { - double gamma_i = 0.0; - for (int32 j = 0; j < accs.num_states_; j++) - for (int32 m = 0; m < model->NumSubstates(j); m++) - gamma_i += accs.gamma_[j](m, i); - - if (gamma_i < accs.feature_dim_) { - KALDI_WARN << "For component " << i << ": not updating M due to very " - << "small count (=" << gamma_i << ")."; - continue; - } - - SolverOptions opts; - opts.name = "M"; - opts.K = update_options_.max_cond; - opts.eps = update_options_.epsilon; - - Matrix Mi(model->M_[i]); - double impr = SolveQuadraticMatrixProblem(Q_[i], accs.Y_[i], - SpMatrix(model->SigmaInv_[i]), - opts, &Mi); - model->M_[i].CopyFromMat(Mi); - - if (i < 10) { - KALDI_VLOG(2) << "Objf impr for projection M for i = " << i << ", is " - << (impr/(gamma_i + 1.0e-20)) << " over " << gamma_i - << " frames"; - } - tot_count += gamma_i; - tot_like_impr += impr; - } - tot_like_impr /= (tot_count + 1.0e-20); - KALDI_LOG << "Overall objective function improvement for model projections " - << "M is " << tot_like_impr << " over " << tot_count << " frames"; - return tot_like_impr; -} - -// Estimate the parameters of a Gaussian prior over the M matrices. There are -// as many mean matrices as UBM size and two covariance matrices for the rows -// of M and columns of M. The prior means M_i are fixed to the unadapted values. -// This is what was done in Lu, et al. "Maximum a posteriori adaptation of -// subspace Gaussian mixture models for cross-lingual speech recognition", -// ICASSP 2012. -void MleAmSgmmUpdater::ComputeMPrior(AmSgmm *model) { - KALDI_ASSERT(update_options_.map_M_prior_iters > 0); - int32 Ddim = model->FeatureDim(); - int32 Sdim = model->PhoneSpaceDim(); - int32 nGaussians = model->NumGauss(); - - // inverse variance of the columns of M: dim is # of rows - model->col_cov_inv_.Resize(Ddim); - // inverse covariance of the rows of M: dim is # of columns - model->row_cov_inv_.Resize(Sdim); - - model->col_cov_inv_.SetUnit(); - model->row_cov_inv_.SetUnit(); - - if (model->M_prior_.size() == 0) { - model->M_prior_.resize(nGaussians); - for (int32 i = 0; i < nGaussians; i++) { - model->M_prior_[i].Resize(Ddim, Sdim); - model->M_prior_[i].CopyFromMat(model->M_[i]); // We initialize Mpri as this - } - } - - if (update_options_.full_col_cov || update_options_.full_row_cov) { - Matrix avg_M(Ddim, Sdim); // average of the Gaussian prior means - for (int32 i = 0; i < nGaussians; i++) - avg_M.AddMat(1.0, Matrix(model->M_prior_[i])); - avg_M.Scale(1.0 / nGaussians); - - Matrix MDiff(Ddim, Sdim); - for (int32 iter = 0; iter < update_options_.map_M_prior_iters; iter++) { - { // diagnostic block. - double prior_like = -0.5 * nGaussians * (Ddim * Sdim * Log(2 * M_PI) - + Sdim * (-model->row_cov_inv_.LogPosDefDet()) - + Ddim * (-model->col_cov_inv_.LogPosDefDet())); - for (int32 i = 0; i < nGaussians; i++) { - MDiff.CopyFromMat(Matrix(model->M_prior_[i])); - MDiff.AddMat(-1.0, avg_M); // MDiff = M_{i} - avg(M) - SpMatrix tmp(Ddim); - // tmp = MDiff.Omega_r^{-1}*MDiff^T. - tmp.AddMat2Sp(1.0, MDiff, kNoTrans, - SpMatrix(model->row_cov_inv_), 0.0); - prior_like -= 0.5 * TraceSpSp(tmp, SpMatrix(model->col_cov_inv_)); - } - KALDI_LOG << "Before iteration " << iter - << " of updating prior over M, log like per dimension modeled is " - << prior_like / (nGaussians * Ddim * Sdim); - } - - // First estimate the column covariances (\Omega_r in paper) - if (update_options_.full_col_cov) { - size_t limited; - model->col_cov_inv_.SetZero(); - for (int32 i = 0; i < nGaussians; i++) { - MDiff.CopyFromMat(Matrix(model->M_prior_[i])); - MDiff.AddMat(-1.0, avg_M); // MDiff = M_{i} - avg(M) - // Omega_r += 1/(D*I) * Mdiff * Omega_c^{-1} * Mdiff^T - model->col_cov_inv_.AddMat2Sp(1.0 / (Ddim * nGaussians), - Matrix(MDiff), kNoTrans, - model->row_cov_inv_, 1.0); - } - model->col_cov_inv_.PrintEigs("col_cov"); - limited = model->col_cov_inv_.LimitCond(update_options_.max_cond, - true /*invert the matrix*/); - if (limited != 0) { - KALDI_LOG << "Computing column covariances for M: limited " << limited - << " singular values, max condition is " - << update_options_.max_cond; - } - } - - // Now estimate the row covariances (\Omega_c in paper) - if (update_options_.full_row_cov) { - size_t limited; - model->row_cov_inv_.SetZero(); - for (int32 i = 0; i < nGaussians; i++) { - MDiff.CopyFromMat(Matrix(model->M_prior_[i])); - MDiff.AddMat(-1.0, avg_M); // MDiff = M_{i} - avg(M) - // Omega_c += 1/(S*I) * Mdiff^T * Omega_r^{-1} * Mdiff. - model->row_cov_inv_.AddMat2Sp(1.0 / (Sdim * nGaussians), - Matrix(MDiff), kTrans, - model->col_cov_inv_, 1.0); - } - model->row_cov_inv_.PrintEigs("row_cov"); - limited = model->row_cov_inv_.LimitCond(update_options_.max_cond, - true /*invert the matrix*/); - if (limited != 0) { - KALDI_LOG << "Computing row covariances for M: limited " << limited - << " singular values, max condition is " - << update_options_.max_cond; - } - } - } // end iterations - } -} - - -// MAP adaptation of M with a matrix-variate Gaussian prior -double MleAmSgmmUpdater::MapUpdateM(const MleAmSgmmAccs &accs, AmSgmm *model) { - int32 Ddim = model->FeatureDim(); - int32 Sdim = model->PhoneSpaceDim(); - int32 nGaussians = model->NumGauss(); - - KALDI_LOG << "Prior smoothing parameter: Tau = " << update_options_.tau_map_M; - if (model->M_prior_.size() == 0 || model->col_cov_inv_.NumRows() == 0 - || model->row_cov_inv_.NumRows() == 0) { - KALDI_LOG << "Computing the prior first"; - ComputeMPrior(model); - } - - Matrix G(Ddim, Sdim); - // \tau \Omega_c^{-1} avg(M) \Omega_r^{-1}, depends on Gaussian index - Matrix prior_term_i(Ddim, Sdim); - SpMatrix P2(model->col_cov_inv_); - SpMatrix Q2(model->row_cov_inv_); - Q2.Scale(update_options_.tau_map_M); - - double totcount = 0.0, tot_like_impr = 0.0; - for (int32 i = 0; i < nGaussians; ++i) { - double gamma_i = 0.0; - for (int32 j = 0; j < accs.num_states_; ++j) - for (int32 m = 0; m < model->NumSubstates(j); ++m) - gamma_i += accs.gamma_[j](m, i); - - if (gamma_i < accs.feature_dim_) { - KALDI_WARN << "For component " << i << ": not updating M due to very " - << "small count (=" << gamma_i << ")."; - continue; - } - - Matrix tmp(Ddim, Sdim, kSetZero); - tmp.AddSpMat(1.0, SpMatrix(model->col_cov_inv_), - Matrix(model->M_prior_[i]), kNoTrans, 0.0); - prior_term_i.AddMatSp(update_options_.tau_map_M, tmp, kNoTrans, - SpMatrix(model->row_cov_inv_), 0.0); - - Matrix SigmaY(Ddim, Sdim, kSetZero); - SigmaY.AddSpMat(1.0, SpMatrix(model->SigmaInv_[i]), accs.Y_[i], - kNoTrans, 0.0); - G.CopyFromMat(SigmaY); // G = \Sigma_{i}^{-1} Y_{i} - G.AddMat(1.0, prior_term_i); // G += \tau \Omega_c^{-1} avg(M) \Omega_r^{-1} - SpMatrix P1(model->SigmaInv_[i]); - Matrix Mi(model->M_[i]); - - SolverOptions opts; - opts.name = "M"; - opts.K = update_options_.max_cond; - opts.eps = update_options_.epsilon; - - double impr = SolveDoubleQuadraticMatrixProblem(G, P1, P2, Q_[i], Q2, opts, &Mi); - model->M_[i].CopyFromMat(Mi); - if (i < 10) { - KALDI_LOG << "Objf impr for projection M for i = " << i << ", is " - << (impr / (gamma_i + 1.0e-20)) << " over " << gamma_i - << " frames"; - } - totcount += gamma_i; - tot_like_impr += impr; - } - tot_like_impr /= (totcount + 1.0e-20); - KALDI_LOG << "Overall objective function improvement for model projections " - << "M is " << tot_like_impr << " over " << totcount << " frames"; - return tot_like_impr; -} - - -/// This function gets stats used inside UpdateWParallel, where it accumulates -/// the F_i and g_i quantities. Note: F_i is viewed as a vector of SpMatrix -/// (one for each i); each row of F_i is viewed as an SpMatrix even though -/// it's stored as a vector.... -/// Note: w is just a double-precision copy of the matrix model->w_ - -// static -void MleAmSgmmUpdater::UpdateWParallelGetStats(const MleAmSgmmAccs &accs, - const AmSgmm &model, - const Matrix &w, - Matrix *F_i, - Matrix *g_i, - double *tot_like, - int32 num_threads, - int32 thread_id) { - - // Accumulate stats from a block of states (this gets called in parallel). - int32 block_size = (accs.num_states_ + (num_threads-1)) / num_threads, - j_start = block_size * thread_id, - j_end = std::min(accs.num_states_, j_start + block_size); - - // Unlike in the report the inner most loop is over Gaussians, where - // per-gaussian statistics are accumulated. This is more memory demanding - // but more computationally efficient, as outer product v_{jvm} v_{jvm}^T - // is computed only once for all gaussians. - - SpMatrix v_vT(accs.phn_space_dim_); - - for (int32 j = j_start; j < j_end; j++) { - int32 num_substates = model.NumSubstates(j); - Matrix w_jm(num_substates, accs.num_gaussians_); - // The linear term and quadratic term for each Gaussian-- two scalars - // for each Gaussian, they appear in the accumulation formulas. - Matrix linear_term(num_substates, accs.num_gaussians_); - Matrix quadratic_term(num_substates, accs.num_gaussians_); - Matrix v_vT_m(num_substates, - (accs.phn_space_dim_*(accs.phn_space_dim_+1))/2); - - // w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7) - Matrix v_j_double(model.v_[j]); - w_jm.AddMatMat(1.0, v_j_double, kNoTrans, w, kTrans, 0.0); - - for (int32 m = 0; m < model.NumSubstates(j); m++) { - double gamma_jm = accs.gamma_[j].Row(m).Sum(); - - w_jm.Row(m).Add(-1.0 * w_jm.Row(m).LogSumExp()); - *tot_like += VecVec(w_jm.Row(m), accs.gamma_[j].Row(m)); - w_jm.Row(m).ApplyExp(); - v_vT.SetZero(); - // v_vT := v_{jkm} v_{jkm}^T - v_vT.AddVec2(static_cast(1.0), v_j_double.Row(m)); - v_vT_m.Row(m).CopyFromPacked(v_vT); // a bit wasteful, but does not dominate. - - for (int32 i = 0; i < accs.num_gaussians_; i++) { - // Suggestion: g_jkm can be computed more efficiently - // using the Vector/Matrix routines for all i at once - // linear term around cur value. - linear_term(m, i) = accs.gamma_[j](m, i) - gamma_jm * w_jm(m, i); - quadratic_term(m, i) = std::max(accs.gamma_[j](m, i), - gamma_jm * w_jm(m, i)); - } - } // loop over substates - g_i->AddMatMat(1.0, linear_term, kTrans, v_j_double, kNoTrans, 1.0); - F_i->AddMatMat(1.0, quadratic_term, kTrans, v_vT_m, kNoTrans, 1.0); - } // loop over states -} - -// The parallel weight update, in the paper. -double MleAmSgmmUpdater::UpdateWParallel(const MleAmSgmmAccs &accs, - AmSgmm *model) { - KALDI_LOG << "Updating weight projections"; - - // tot_like_{after, before} are totals over multiple iterations, - // not valid likelihoods. but difference is valid (when divided by tot_count). - double tot_predicted_like_impr = 0.0, tot_like_before = 0.0, - tot_like_after = 0.0; - - Matrix g_i(accs.num_gaussians_, accs.phn_space_dim_); - // View F_i as a vector of SpMatrix. - Matrix F_i(accs.num_gaussians_, - (accs.phn_space_dim_*(accs.phn_space_dim_+1))/2); - - Matrix w(model->w_); - double tot_count = 0.0; - for (int32 j = 0; j < accs.num_states_; j++) tot_count += accs.gamma_[j].Sum(); - - for (int iter = 0; iter < update_options_.weight_projections_iters; iter++) { - F_i.SetZero(); - g_i.SetZero(); - double k_like_before = 0.0; - - UpdateWParallelClass c(accs, *model, w, &F_i, &g_i, &k_like_before); - RunMultiThreaded(c); - - Matrix w_orig(w); - double k_predicted_like_impr = 0.0, k_like_after = 0.0; - double min_step = 0.001, step_size; - for (step_size = 1.0; step_size >= min_step; step_size /= 2) { - k_predicted_like_impr = 0.0; - k_like_after = 0.0; - - SolverOptions opts; - opts.name = "w"; - opts.K = update_options_.max_cond; - opts.eps = update_options_.epsilon; - - for (int32 i = 0; i < accs.num_gaussians_; i++) { - // auxf is formulated in terms of change in w. - Vector delta_w(accs.phn_space_dim_); - // returns objf impr with step_size = 1, - // but it may not be 1 so we recalculate it. - SpMatrix this_F_i(accs.phn_space_dim_); - this_F_i.CopyFromVec(F_i.Row(i)); - SolveQuadraticProblem(this_F_i, g_i.Row(i), opts, &delta_w); - - delta_w.Scale(step_size); - double predicted_impr = VecVec(delta_w, g_i.Row(i)) - - 0.5 * VecSpVec(delta_w, this_F_i, delta_w); - - // should never be negative because - // we checked inside SolveQuadraticProblem. - KALDI_ASSERT(predicted_impr >= -1.0e-05); - - if (i < 10) { - KALDI_LOG << "Predicted objf impr for w (not per frame), iter = " << - (iter) << ", i = " << (i) << " is " << (predicted_impr); - } - k_predicted_like_impr += predicted_impr; - w.Row(i).AddVec(1.0, delta_w); - } - Vector w_jm_vec(accs.num_gaussians_); - for (int32 j = 0; j < accs.num_states_; j++) { - for (int32 m = 0; m < model->NumSubstates(j); m++) { - w_jm_vec.AddMatVec(1.0, w, kNoTrans, Vector(model->v_[j].Row(m)), 0.0); - w_jm_vec.Add((-1.0) * w_jm_vec.LogSumExp()); - k_like_after += VecVec(w_jm_vec, accs.gamma_[j].Row(m)); - } - } - KALDI_VLOG(2) << "For iteration " << (iter) << ", updating w gives " - << "predicted per-frame like impr " - << (k_predicted_like_impr / tot_count) << ", actual " - << ((k_like_after - k_like_before) / tot_count) << ", over " - << (tot_count) << " frames"; - if (k_like_after < k_like_before) { - w.CopyFromMat(w_orig); // Undo what we computed. - if (fabs(k_like_after - k_like_before) / tot_count < 1.0e-05) { - k_like_after = k_like_before; - KALDI_WARN << "Not updating weights as not increasing auxf and " - << "probably due to numerical issues (since small change)."; - break; - } else { - KALDI_WARN << "Halving step size for weights as likelihood did " - << "not increase"; - } - } else { - break; - } - } - if (step_size < min_step) { - // Undo any step as we have no confidence that this is right. - w.CopyFromMat(w_orig); - } else { - tot_predicted_like_impr += k_predicted_like_impr; - tot_like_after += k_like_after; - tot_like_before += k_like_before; - } - } - - model->w_.CopyFromMat(w); - - tot_predicted_like_impr /= tot_count; - tot_like_after = (tot_like_after - tot_like_before) / tot_count; - KALDI_LOG << "**Overall objf impr for w is " << tot_predicted_like_impr - << ", actual " << tot_like_after << ", over " - << tot_count << " frames"; - return tot_like_after; -} - -double MleAmSgmmUpdater::UpdateWSequential( - const MleAmSgmmAccs &accs, AmSgmm *model) { - // Sequential version, in paper. - /* This is the approach for the weight projections that - * I originally implemented, in which we test the auxiliary function - improvement for each i that we update. This requires some - careful bookkeeping. It means that we need to store the - total of the un-normalized weights for each j, m. */ - - KALDI_LOG << "Updating weight projections [original approach, checking each" - << "Gaussian component]."; - - SpMatrix v_vT(accs.phn_space_dim_); - // tot_like_{after, before} are totals over multiple iterations, - // not valid likelihoods... - // but difference is valid (when divided by tot_count). - double tot_delta_predicted = 0.0, tot_delta_observed = 0.0, - tot_count = 0.0; - - Vector w_jm(accs.num_gaussians_); - Vector g_i(accs.phn_space_dim_); - SpMatrix F_i(accs.phn_space_dim_); - - double k_count = 0.0; - // Total count in each substate. - std::vector< Vector > gamma_jm(accs.num_states_); - for (int32 j = 0; j < accs.num_states_; j++) { // Initialize gamma_jm - gamma_jm[j].Resize(model->NumSubstates(j)); - for (int32 m = 0; m < model->NumSubstates(j); m++) { - k_count += (gamma_jm[j](m) = accs.gamma_[j].Row(m).Sum()); - } - } - - Matrix w(model->w_); - - for (int iter = 0; iter < update_options_.weight_projections_iters; iter++) { - double k_delta_predicted = 0.0, k_delta_observed = 0.0; - - // log total of un-normalized weights for each j, m - std::vector< Vector > weight_tots(accs.num_states_); - - // Initialize weight_tots - for (int32 j = 0; j < accs.num_states_; j++) { - weight_tots[j].Resize(model->NumSubstates(j)); - for (int32 m = 0; m < model->NumSubstates(j); m++) { - w_jm.AddMatVec(1.0, w, kNoTrans, Vector(model->v_[j].Row(m)), 0.0); - weight_tots[j](m) = w_jm.LogSumExp(); - } - } - - for (int32 i = 0; i < accs.num_gaussians_; i++) { - F_i.SetZero(); - g_i.SetZero(); - SubVector w_i = w.Row(i); - - for (int32 j = 0; j < accs.num_states_; j++) { - for (int32 m = 0; m < model->NumSubstates(j); m++) { - double this_unnormalized_weight = VecVec(w_i, model->v_[j].Row(m)); - double normalizer = weight_tots[j](m); - double this_log_w = this_unnormalized_weight - normalizer, - this_w = Exp(this_log_w), - substate_count = gamma_jm[j](m), - this_count = accs.gamma_[j](m, i); - - double linear_term = this_count - substate_count * this_w; - double quadratic_term = std::max(this_count, substate_count * this_w); - - g_i.AddVec(linear_term, model->v_[j].Row(m)); - // should not ever be zero, but check anyway. - if (quadratic_term != 0.0) - F_i.AddVec2(static_cast(quadratic_term), model->v_[j].Row(m)); - } - } - - SolverOptions opts; - opts.name = "w"; - opts.K = update_options_.max_cond; - opts.eps = update_options_.epsilon; - - // auxf is formulated in terms of change in w. - Vector delta_w(accs.phn_space_dim_); - // returns objf impr with step_size = 1, - // but it may not be 1 so we recalculate it. - SolveQuadraticProblem(F_i, - g_i, - opts, - &delta_w); - - try { // In case we have a problem in LogSub. - double step_size, min_step = 0.0001; - for (step_size = 1.0; step_size >= min_step; step_size /= 2) { - Vector new_w_i(w_i); - // copy it in case we do not commit this change. - std::vector > new_weight_tots(weight_tots); - new_w_i.AddVec(step_size, delta_w); - double predicted_impr = step_size * VecVec(delta_w, g_i) - - 0.5 * step_size * step_size * VecSpVec(delta_w, F_i, delta_w); - if (predicted_impr < -0.1) { - KALDI_WARN << "Negative predicted auxf improvement " << - (predicted_impr) << ", not updating this gaussian " << - "(either numerical problems or a code mistake."; - break; - } - // Now compute observed objf change. - double observed_impr = 0.0, this_tot_count = 0.0; - - for (int32 j = 0; j < accs.num_states_; j++) { - for (int32 m = 0; m < model->NumSubstates(j); m++) { - double old_unnorm_weight = VecVec(w_i, model->v_[j].Row(m)), - new_unnorm_weight = VecVec(new_w_i, model->v_[j].Row(m)), - substate_count = gamma_jm[j](m), - this_count = accs.gamma_[j](m, i); - this_tot_count += this_count; - observed_impr += this_count * // from numerator. - (new_unnorm_weight - old_unnorm_weight); - double old_normalizer = new_weight_tots[j](m), delta; - if (new_unnorm_weight > old_unnorm_weight) { - delta = LogAdd(0, LogSub(new_unnorm_weight - old_normalizer, - old_unnorm_weight - old_normalizer)); - } else { - delta = LogSub(0, LogSub(old_unnorm_weight - old_normalizer, - new_unnorm_weight - old_normalizer)); - // The if-statement above is equivalent to: - // delta = LogAdd(LogSub(0, - // old_unnorm_weight-old_normalizer), - // new_unnorm_weight-old_normalizer) - // but has better behaviour numerically. - } - observed_impr -= substate_count * delta; - new_weight_tots[j](m) += delta; - } - } - if (observed_impr < 0.0) { // failed, so we reduce step size. - KALDI_LOG << "Updating weights, for i = " << (i) << ", predicted " - "auxf: " << (predicted_impr/(this_tot_count + 1.0e-20)) - << ", observed " << observed_impr/(this_tot_count + 1.0e-20) - << " over " << this_tot_count << " frames. Reducing step size " - << "to " << (step_size/2); - if (predicted_impr / (this_tot_count + 1.0e-20) < 1.0e-07) { - KALDI_WARN << "Not updating this weight vector as auxf decreased" - << " probably due to numerical issues (since small change)."; - break; - } - } else { - if (i < 10) - KALDI_LOG << "Updating weights, for i = " << (i) - << ", auxf change per frame is" << ": predicted " << - (predicted_impr /(this_tot_count + 1.0e-20)) << ", observed " - << (observed_impr / (this_tot_count + 1.0e-20)) - << " over " << (this_tot_count) << " frames."; - - k_delta_predicted += predicted_impr; - k_delta_observed += observed_impr; - w.Row(i).CopyFromVec(new_w_i); - weight_tots = new_weight_tots; // Copy over normalizers. - break; - } - } - } catch(...) { - KALDI_LOG << "Warning: weight update for i = " << i - << " failed, possible numerical problem."; - } - } - KALDI_LOG << "For iteration " << iter << ", updating w gives predicted " - << "per-frame like impr " << (k_delta_predicted / k_count) << - ", observed " << (k_delta_observed / k_count) << ", over " << (k_count) - << " frames"; - if (iter == 0) tot_count += k_count; - tot_delta_predicted += k_delta_predicted; - tot_delta_observed += k_delta_observed; - } - - model->w_.CopyFromMat(w); - - tot_delta_observed /= tot_count; - tot_delta_predicted /= tot_count; - KALDI_LOG << "**Overall objf impr for w is " << tot_delta_predicted - << ", observed " << tot_delta_observed << ", over " - << tot_count << " frames"; - return tot_delta_observed; -} - -double MleAmSgmmUpdater::UpdateN(const MleAmSgmmAccs &accs, - AmSgmm *model) { - double tot_count = 0.0, tot_like_impr = 0.0; - if (accs.spk_space_dim_ == 0 || accs.R_.size() == 0 || accs.Z_.size() == 0) { - KALDI_ERR << "Speaker subspace dim is zero or no stats accumulated"; - } - - Vector gamma_i(accs.num_gaussians_); - for (int32 j = 0; j < accs.num_states_; j++) { - for (int32 m = 0; m < model->NumSubstates(j); m++) { - gamma_i.AddVec(1.0, accs.gamma_[j].Row(m)); - } - } - - SolverOptions opts; - opts.name = "N"; - opts.K = update_options_.max_cond; - opts.eps = update_options_.epsilon; - - for (int32 i = 0; i < accs.num_gaussians_; i++) { - if (gamma_i(i) < 2 * accs.spk_space_dim_) { - KALDI_WARN << "Not updating speaker basis for i = " << (i) - << " because count is too small " << (gamma_i(i)); - continue; - } - Matrix Ni(model->N_[i]); - double impr = - SolveQuadraticMatrixProblem(accs.R_[i], accs.Z_[i], - SpMatrix(model->SigmaInv_[i]), - opts, &Ni); - model->N_[i].CopyFromMat(Ni); - if (i < 10) { - KALDI_LOG << "Objf impr for spk projection N for i = " << (i) - << ", is " << (impr / (gamma_i(i) + 1.0e-20)) << " over " - << (gamma_i(i)) << " frames"; - } - tot_count += gamma_i(i); - tot_like_impr += impr; - } - - tot_like_impr /= (tot_count+1.0e-20); - KALDI_LOG << "**Overall objf impr for N is " << tot_like_impr << " over " - << tot_count << " frames"; - return tot_like_impr; -} - -void MleAmSgmmUpdater::RenormalizeN( - const MleAmSgmmAccs &accs, AmSgmm *model) { - KALDI_ASSERT(accs.R_.size() != 0); - Vector gamma_i(accs.num_gaussians_); - for (int32 j = 0; j < accs.num_states_; j++) { - for (int32 m = 0; m < model->NumSubstates(j); m++) { - gamma_i.AddVec(1.0, accs.gamma_[j].Row(m)); - } - } - double tot_count = gamma_i.Sum(); - if (tot_count == 0) { - KALDI_WARN << "Not renormalizing N, since there are no counts."; - return; - } - - SpMatrix RTot(accs.spk_space_dim_); - // for (int32 i = 0; i < accs.num_gaussians_; i++) { - // RTot.AddSp(1.0, accs.R_[i]); - // } - for (int32 i = 0; i < accs.num_gaussians_; i++) { - RTot.AddSp(gamma_i(i), accs.R_[i]); - } - RTot.Scale(1.0 / tot_count); - Matrix U(accs.spk_space_dim_, accs.spk_space_dim_); - Vector eigs(accs.spk_space_dim_); - RTot.SymPosSemiDefEig(&eigs, &U); - KALDI_LOG << "Renormalizing N, eigs are: " << (eigs); - Vector sqrteigs(accs.spk_space_dim_); - for (int32 t = 0; t < accs.spk_space_dim_; t++) { - sqrteigs(t) = sqrt(eigs(t)); - } - // e.g. diag(eigs)^{-0.5} * U' * RTot * U * diag(eigs)^{-0.5} = 1 - // But inverse transpose of this transformation needs to take place on R, - // i.e. not (on left: diag(eigs)^{-0.5} * U') - // but: (inverse it: U . diag(eigs)^{0.5}, - // transpose it: diag(eigs)^{0.5} U^T. Need to do this on the right to N - // (because N has the spk vecs on the right), so N := N U diag(eigs)^{0.5} - U.MulColsVec(sqrteigs); - Matrix Ntmp(accs.feature_dim_, accs.spk_space_dim_); - for (int32 i = 0; i < accs.num_gaussians_; i++) { - Ntmp.AddMatMat(1.0, Matrix(model->N_[i]), kNoTrans, U, kNoTrans, 0.0); - model->N_[i].CopyFromMat(Ntmp); - } -} - - -double MleAmSgmmUpdater::UpdateVars(const MleAmSgmmAccs &accs, - AmSgmm *model) { - KALDI_ASSERT(S_means_.size() == static_cast(accs.num_gaussians_) && - "Must call PreComputeStats before updating the covariances."); - SpMatrix Sigma_i(accs.feature_dim_), Sigma_i_ml(accs.feature_dim_); - double tot_objf_impr = 0.0, tot_t = 0.0; - SpMatrix covfloor(accs.feature_dim_); - Vector gamma_vec(accs.num_gaussians_); - Vector objf_improv(accs.num_gaussians_); - - // First pass over all (shared) Gaussian components to calculate the - // ML estimate of the covariances, and the total covariance for flooring. - for (int32 i = 0; i < accs.num_gaussians_; i++) { - double gamma_i = 0; - for (int32 j = 0; j < accs.num_states_; j++) - for (int32 m = 0, end = model->NumSubstates(j); m < end; m++) - gamma_i += accs.gamma_[j](m, i); - - // Eq. (75): Sigma_{i}^{ml} = 1/gamma_{i} [S_{i} + S_{i}^{(means)} - ... - // Y_{i} M_{i}^T - M_{i} Y_{i}^T] - // Note the S_means_ already contains the Y_{i} M_{i}^T terms. - Sigma_i_ml.CopyFromSp(S_means_[i]); - Sigma_i_ml.AddSp(1.0, accs.S_[i]); - - gamma_vec(i) = gamma_i; - covfloor.AddSp(1.0, Sigma_i_ml); - // inverting small values e.g. 4.41745328e-40 seems to generate inf, - // although would be fixed up later. - if (gamma_i > 1.0e-20) { - Sigma_i_ml.Scale(1 / (gamma_i + 1.0e-20)); - } else { - Sigma_i_ml.SetUnit(); - } - KALDI_ASSERT(1.0 / Sigma_i_ml(0, 0) != 0.0); - // Eq. (76): Compute the objective function with the old parameter values - objf_improv(i) = model->SigmaInv_[i].LogPosDefDet() - - TraceSpSp(SpMatrix(model->SigmaInv_[i]), Sigma_i_ml); - - model->SigmaInv_[i].CopyFromSp(Sigma_i_ml); // inverted in the next loop. - } - - // Compute the covariance floor. - if (gamma_vec.Sum() == 0) { // If no count, use identity. - KALDI_WARN << "Updating variances: zero counts. Setting floor to unit."; - covfloor.SetUnit(); - } else { // else, use the global average covariance. - covfloor.Scale(update_options_.cov_floor / gamma_vec.Sum()); - int32 tmp; - if ((tmp = covfloor.LimitCondDouble(update_options_.max_cond)) != 0) { - KALDI_WARN << "Covariance flooring matrix is poorly conditioned. Fixed " - << "up " << (tmp) << " eigenvalues."; - } - } - - if (update_options_.cov_diag_ratio > 1000) { - KALDI_LOG << "Assuming you want to build a diagonal system since " - << "cov_diag_ratio is large: making diagonal covFloor."; - for (int32 i = 0; i < covfloor.NumRows(); i++) - for (int32 j = 0; j < i; j++) - covfloor(i, j) = 0.0; - } - - // Second pass over all (shared) Gaussian components to calculate the - // floored estimate of the covariances, and update the model. - for (int32 i = 0; i < accs.num_gaussians_; i++) { - Sigma_i.CopyFromSp(model->SigmaInv_[i]); - Sigma_i_ml.CopyFromSp(Sigma_i); - // In case of insufficient counts, make the covariance matrix diagonal. - // cov_diag_ratio is 2 by default, set to very large to always get diag-cov - if (gamma_vec(i) < update_options_.cov_diag_ratio * accs.feature_dim_) { - KALDI_WARN << "For Gaussian component " << i << ": Too low count " - << gamma_vec(i) << " for covariance matrix estimation. Setting to " - << "diagonal"; - for (int32 d = 0; d < accs.feature_dim_; d++) - for (int32 e = 0; e < d; e++) - Sigma_i(d, e) = 0.0; // SpMatrix, can only set lower traingular part - - int floored = Sigma_i.ApplyFloor(covfloor); - if (floored > 0) { - KALDI_WARN << "For Gaussian component " << i << ": Floored " << floored - << " covariance eigenvalues."; - } - model->SigmaInv_[i].CopyFromSp(Sigma_i); - model->SigmaInv_[i].InvertDouble(); - } else { // Updating the full covariance matrix. - try { - int floored = Sigma_i.ApplyFloor(covfloor); - if (floored > 0) { - KALDI_WARN << "For Gaussian component " << i << ": Floored " - << floored << " covariance eigenvalues."; - } - model->SigmaInv_[i].CopyFromSp(Sigma_i); - model->SigmaInv_[i].InvertDouble(); - - objf_improv(i) += Sigma_i.LogPosDefDet() + - TraceSpSp(SpMatrix(model->SigmaInv_[i]), Sigma_i_ml); - objf_improv(i) *= (-0.5 * gamma_vec(i)); // Eq. (76) - - tot_objf_impr += objf_improv(i); - tot_t += gamma_vec(i); - if (i < 5) { - KALDI_VLOG(2) << "objf impr from variance update =" << objf_improv(i) - / (gamma_vec(i) + 1.0e-20) << " over " << (gamma_vec(i)) - << " frames for i = " << (i); - } - } catch(...) { - KALDI_WARN << "Updating within-class covariance matrix i = " << (i) - << ", numerical problem"; - // This is a catch-all thing in case of unanticipated errors, but - // flooring should prevent this occurring for the most part. - model->SigmaInv_[i].SetUnit(); // Set to unit. - } - } - } - KALDI_LOG << "**Overall objf impr for variance update = " - << (tot_objf_impr / (tot_t+ 1.0e-20)) - << " over " << (tot_t) << " frames"; - return tot_objf_impr / (tot_t + 1.0e-20); -} - - -double MleAmSgmmUpdater::UpdateSubstateWeights( - const MleAmSgmmAccs &accs, AmSgmm *model) { - KALDI_LOG << "Updating substate mixture weights"; - // Also set the vector gamma_j which is a cache of the state occupancies. - gamma_j_.Resize(accs.num_states_); - - double tot_gamma = 0.0, objf_impr = 0.0; - for (int32 j = 0; j < accs.num_states_; j++) { - double gamma_j_sm = 0.0; - int32 num_substates = model->NumSubstates(j); - Vector occs(num_substates), - smoothed_occs(num_substates); - for (int32 m = 0; m < num_substates; m++) { - occs(m) = accs.gamma_[j].Row(m).Sum(); // \sum_i gamma_{jmi} - gamma_j_(j) += occs(m); // actual state occupancy. - smoothed_occs(m) = occs(m) + update_options_.tau_c; - gamma_j_sm += smoothed_occs(m); // smoothed state occupancy for update. - } - - for (int32 m = 0; m < num_substates; m++) { - double cur_weight = model->c_[j](m); - if (cur_weight <= 0) { - KALDI_WARN << "Zero or negative weight, flooring"; - cur_weight = 1.0e-10; // future work(arnab): remove magic numbers - } - model->c_[j](m) = smoothed_occs(m) / gamma_j_sm; - objf_impr += Log(model->c_[j](m) / cur_weight) * occs(m); - } - tot_gamma += gamma_j_(j); - } - objf_impr /= (tot_gamma + 1.0e-20); - KALDI_LOG << "**Overall objf impr for c is " << objf_impr << ", over " - << tot_gamma << " frames."; - return objf_impr; -} - - -MleSgmmSpeakerAccs::MleSgmmSpeakerAccs(const AmSgmm &model, BaseFloat prune) - : rand_prune_(prune) { - KALDI_ASSERT(model.SpkSpaceDim() != 0); - H_spk_.resize(model.NumGauss()); - for (int32 i = 0; i < model.NumGauss(); i++) { - // Eq. (82): H_{i}^{spk} = N_{i}^T \Sigma_{i}^{-1} N_{i} - H_spk_[i].Resize(model.SpkSpaceDim()); - H_spk_[i].AddMat2Sp(1.0, Matrix(model.N_[i]), - kTrans, SpMatrix(model.SigmaInv_[i]), 0.0); - } - - model.GetNtransSigmaInv(&NtransSigmaInv_); - - gamma_s_.Resize(model.NumGauss()); - y_s_.Resize(model.SpkSpaceDim()); -} - -void MleSgmmSpeakerAccs::Clear() { - y_s_.SetZero(); - gamma_s_.SetZero(); -} - - -BaseFloat -MleSgmmSpeakerAccs::Accumulate(const AmSgmm &model, - const SgmmPerFrameDerivedVars &frame_vars, - int32 j, - BaseFloat weight) { - // Calculate Gaussian posteriors and collect statistics - Matrix posteriors; - BaseFloat log_like = model.ComponentPosteriors(frame_vars, j, &posteriors); - posteriors.Scale(weight); - AccumulateFromPosteriors(model, frame_vars, posteriors, j); - return log_like; -} - -BaseFloat -MleSgmmSpeakerAccs::AccumulateFromPosteriors(const AmSgmm &model, - const SgmmPerFrameDerivedVars &frame_vars, - const Matrix &posteriors, - int32 j) { - double tot_count = 0.0; - int32 feature_dim = model.FeatureDim(), - spk_space_dim = model.SpkSpaceDim(); - KALDI_ASSERT(spk_space_dim != 0); - const vector &gselect = frame_vars.gselect; - - // Intermediate variables - Vector xt_jmi(feature_dim), mu_jmi(feature_dim), - zt_jmi(spk_space_dim); - int32 num_substates = model.NumSubstates(j); - for (int32 ki = 0; ki < static_cast(gselect.size()); ki++) { - int32 i = gselect[ki]; - for (int32 m = 0; m < num_substates; m++) { - // Eq. (39): gamma_{jmi}(t) = p (j, m, i|t) - BaseFloat gammat_jmi = RandPrune(posteriors(ki, m), rand_prune_); - if (gammat_jmi != 0.0) { - tot_count += gammat_jmi; - model.GetSubstateMean(j, m, i, &mu_jmi); - xt_jmi.CopyFromVec(frame_vars.xt); - xt_jmi.AddVec(-1.0, mu_jmi); - // Eq. (48): z{jmi}(t) = N_{i}^{T} \Sigma_{i}^{-1} x_{jmi}(t) - zt_jmi.AddMatVec(1.0, NtransSigmaInv_[i], kNoTrans, xt_jmi, 0.0); - // Eq. (49): \gamma_{i}^{(s)} = \sum_{t\in\Tau(s), j, m} gamma_{jmi} - gamma_s_(i) += gammat_jmi; - // Eq. (50): y^{(s)} = \sum_{t, j, m, i} gamma_{jmi}(t) z_{jmi}(t) - y_s_.AddVec(gammat_jmi, zt_jmi); - } - } - } - return tot_count; -} - -void MleSgmmSpeakerAccs::Update(BaseFloat min_count, - Vector *v_s, - BaseFloat *objf_impr_out, - BaseFloat *count_out) { - double tot_gamma = gamma_s_.Sum(); - KALDI_ASSERT(y_s_.Dim() != 0); - int32 T = y_s_.Dim(); // speaker-subspace dim. - int32 num_gauss = gamma_s_.Dim(); - if (v_s->Dim() != T) v_s->Resize(T); // will set it to zero. - - if (tot_gamma < min_count) { - KALDI_WARN << "Updating speaker vectors, count is " << tot_gamma - << " < " << min_count << "not updating."; - if (objf_impr_out) *objf_impr_out = 0.0; - if (count_out) *count_out = 0.0; - return; - } - - // Eq. (84): H^{(s)} = \sum_{i} \gamma_{i}(s) H_{i}^{spk} - SpMatrix H_s(T); - - for (int32 i = 0; i < num_gauss; i++) - H_s.AddSp(gamma_s_(i), H_spk_[i]); - - - // Don't make these options to SolveQuadraticProblem configurable... - // they really don't make a difference at all unless the matrix in - // question is singular, which wouldn't happen in this case. - Vector v_s_dbl(*v_s); - double tot_objf_impr = - SolveQuadraticProblem(H_s, y_s_, SolverOptions("v_s"), &v_s_dbl); - v_s->CopyFromVec(v_s_dbl); - - KALDI_LOG << "*Objf impr for speaker vector is " << (tot_objf_impr / tot_gamma) - << " over " << (tot_gamma) << " frames."; - - if (objf_impr_out) *objf_impr_out = tot_objf_impr; - if (count_out) *count_out = tot_gamma; -} - - -MleAmSgmmAccs::~MleAmSgmmAccs() { - if (gamma_s_.Sum() != 0.0) - KALDI_ERR << "In destructor of MleAmSgmmAccs: detected that you forgot to " - "call CommitStatsForSpk()"; -} - - -} // namespace kaldi diff --git a/src/sgmm/estimate-am-sgmm.h b/src/sgmm/estimate-am-sgmm.h deleted file mode 100644 index c5c499dcd7d..00000000000 --- a/src/sgmm/estimate-am-sgmm.h +++ /dev/null @@ -1,475 +0,0 @@ -// sgmm/estimate-am-sgmm.h - -// Copyright 2009-2011 Microsoft Corporation; Lukas Burget; -// Saarland University (Author: Arnab Ghoshal); -// Ondrej Glembek; Yanmin Qian; -// Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey) -// Liang Lu; Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_SGMM_ESTIMATE_AM_SGMM_H_ -#define KALDI_SGMM_ESTIMATE_AM_SGMM_H_ 1 - -#include -#include - -#include "sgmm/am-sgmm.h" -#include "gmm/model-common.h" -#include "itf/options-itf.h" -#include "sgmm/sgmm-clusterable.h" -#include "thread/kaldi-thread.h" // for MultiThreadable - -namespace kaldi { - -/** \struct MleAmSgmmOptions - * Configuration variables needed in the SGMM estimation process. - */ -struct MleAmSgmmOptions { - /// Configuration Parameters. See initialization code for more comments. - BaseFloat tau_vec; ///< Amount of smoothing for v_{jm} update - BaseFloat tau_c; ///< Tau value for smoothing substate weights (c) - /// Floor covariance matrices Sigma_i to this times average cov. - BaseFloat cov_floor; - /// ratio to dim below which we use diagonal. default 2, set to inf for diag. - BaseFloat cov_diag_ratio; - /// Max on condition of matrices in update beyond which we do not update. - /// Should probably be related to numerical properties of machine - /// or BaseFloat type. - BaseFloat max_cond; - /// Limits condition of smoothing matrices H_sm (e.g. 100). - /// Only really important on 1st iter if using priors. - BaseFloat max_cond_H_sm; - /// Fix for the smoothing approach, necessary if max_cond_H_sm != inf - /// note: only has an effect if tau_vec != 0. - bool fixup_H_sm; - /// Set check_v to true if you want to use the "checking" version of the update - /// for the v's, in which it checks the "real" objective function value and - /// backtracks if necessary; - bool check_v; - - bool renormalize_V; // Renormalize the phonetic space. - bool renormalize_N; // Renormalize the speaker space. - - /// Number of iters when re-estimating weight projections "w". - int weight_projections_iters; - /// The "sequential" weight update that checks each i in turn. - /// (if false, uses the "parallel" one). - bool use_sequential_weight_update; - - BaseFloat epsilon; ///< very small value used to prevent SVD crashing. - - BaseFloat tau_map_M; ///< For MAP update of the phonetic subspace M - int map_M_prior_iters; ///< num of iterations to update the prior of M - bool full_row_cov; ///< Estimate row covariance instead of using I - bool full_col_cov; ///< Estimate col covariance instead of using I - - MleAmSgmmOptions() { - // tau value used in smoothing vector re-estimation (if no prior used). - tau_vec = 0.0; - tau_c = 5.0; - cov_floor = 0.025; - cov_diag_ratio = 2.0; // set to very large to get diagonal-cov models. - max_cond = 1.0e+05; - epsilon = 1.0e-40; - max_cond_H_sm = 1.0e+05; // only for diagnostics in normal situations. - fixup_H_sm = true; - check_v = false; // for back-compat. - renormalize_V = true; - renormalize_N = false; // default to false since will invalidate spk vectors - // on disk. - weight_projections_iters = 3; - use_sequential_weight_update = false; - - map_M_prior_iters = 5; - tau_map_M = 0.0; // No MAP update by default (~500-1000 depending on prior) - full_row_cov = false; - full_col_cov = false; - } - - void Register(OptionsItf *opts) { - std::string module = "MleAmSgmmOptions: "; - opts->Register("tau-vec", &tau_vec, module+ - "Smoothing for phone vector estimation."); - opts->Register("tau-c", &tau_c, module+ - "Smoothing for substate weights estimation."); - opts->Register("cov-floor", &cov_floor, module+ - "Covariance floor (fraction of average covariance)."); - opts->Register("cov-diag-ratio", &cov_diag_ratio, module+ - "Minimum occ/dim ratio below which use diagonal covariances."); - opts->Register("max-cond", &max_cond, module+"Maximum condition number beyond" - " which matrices are not updated."); - opts->Register("weight-projections-iters", &weight_projections_iters, module+ - "Number for iterations for weight projection estimation."); - opts->Register("renormalize-v", &renormalize_V, module+"If true, renormalize " - "the phonetic-subspace vectors to have meaningful sizes."); - opts->Register("check-v", &check_v, module+"If true, check real auxf " - "improvement in update of v and backtrack if needed " - "(not compatible with smoothing v)"); - opts->Register("renormalize-n", &renormalize_N, module+"If true, renormalize " - "the speaker subspace to have meaningful sizes."); - - opts->Register("tau-map-M", &tau_map_M, module+"Smoothing for MAP estimate " - "of M (0 means ML update)."); - opts->Register("map-M-prior-iters", &map_M_prior_iters, module+ - "Number of iterations to estimate prior covariances for M."); - opts->Register("full-row-cov", &full_row_cov, module+ - "Estimate row covariance instead of using I."); - opts->Register("full-col-cov", &full_col_cov, module+ - "Estimate column covariance instead of using I."); - } -}; - -/** \class MleAmSgmmAccs - * Class for the accumulators associated with the SGMM parameters except - * speaker vectors. - */ -class MleAmSgmmAccs { - public: - explicit MleAmSgmmAccs(BaseFloat rand_prune = 1.0e-05) - : total_frames_(0.0), total_like_(0.0), feature_dim_(0), - phn_space_dim_(0), spk_space_dim_(0), num_gaussians_(0), - num_states_(0), rand_prune_(rand_prune) {} - - MleAmSgmmAccs(const AmSgmm &model, SgmmUpdateFlagsType flags, - BaseFloat rand_prune = 1.0e-05) - : total_frames_(0.0), total_like_(0.0), rand_prune_(rand_prune) { - ResizeAccumulators(model, flags); - } - - ~MleAmSgmmAccs(); - - void Read(std::istream &in_stream, bool binary, bool add); - void Write(std::ostream &out_stream, bool binary) const; - - /// Checks the various accumulators for correct sizes given a model. With - /// wrong sizes, assertion failure occurs. When the show_properties argument - /// is set to true, dimensions and presence/absence of the various - /// accumulators are printed. For use when accumulators are read from file. - void Check(const AmSgmm &model, bool show_properties = true) const; - - /// Resizes the accumulators to the correct sizes given the model. The flags - /// argument control which accumulators to resize. - void ResizeAccumulators(const AmSgmm &model, SgmmUpdateFlagsType flags); - - /// Returns likelihood. - BaseFloat Accumulate(const AmSgmm &model, - const SgmmPerFrameDerivedVars &frame_vars, - const VectorBase &v_s, // spk-vec, may be empty - int32 state_index, BaseFloat weight, - SgmmUpdateFlagsType flags); - - /// Returns count accumulated (may differ from posteriors.Sum() - /// due to weight pruning). - BaseFloat AccumulateFromPosteriors(const AmSgmm &model, - const SgmmPerFrameDerivedVars &frame_vars, - const Matrix &posteriors, - const VectorBase &v_s, // may be empty - int32 state_index, - SgmmUpdateFlagsType flags); - - /// Accumulates global stats for the current speaker (if applicable). - /// If flags contains kSgmmSpeakerProjections (N), must call - /// this after finishing the speaker's data. - void CommitStatsForSpk(const AmSgmm &model, - const VectorBase &v_s); - - /// Accessors - void GetStateOccupancies(Vector *occs) const; - const std::vector< Matrix >& GetOccs() const { - return gamma_; - } - int32 FeatureDim() const { return feature_dim_; } - int32 PhoneSpaceDim() const { return phn_space_dim_; } - int32 NumStates() const { return num_states_; } - int32 NumGauss() const { return num_gaussians_; } - double TotalFrames() const { return total_frames_; } - double TotalLike() const { return total_like_; } - - private: - /// The stats which are not tied to any state. - /// Stats Y_{i} for phonetic-subspace projections M; Dim is [I][D][S]. - std::vector< Matrix > Y_; - /// Stats Z_{i} for speaker-subspace projections N. Dim is [I][D][T]. - std::vector< Matrix > Z_; - /// R_{i}, quadratic term for speaker subspace estimation. Dim is [I][T][T] - std::vector< SpMatrix > R_; - /// S_{i}^{-}, scatter of adapted feature vectors x_{i}(t). Dim is [I][D][D]. - std::vector< SpMatrix > S_; - - /// The SGMM state specific stats. - /// Statistics y_{jm} for state vectors v_{jm}. dimension is [J][M_{j}[S]. - std::vector< Matrix > y_; - /// Gaussian occupancies gamma_{jmi} for each substate. Dim is [J][M_{j}][I]. - std::vector< Matrix > gamma_; - - /// gamma_{i}^{(s)}. Per-speaker counts for each Gaussian. Dimension is [I] - /// Needed for stats R_. - Vector gamma_s_; - - double total_frames_, total_like_; - - /// Dimensionality of various subspaces - int32 feature_dim_, phn_space_dim_, spk_space_dim_; - int32 num_gaussians_, num_states_; ///< Other model specifications - - BaseFloat rand_prune_; - - KALDI_DISALLOW_COPY_AND_ASSIGN(MleAmSgmmAccs); - friend class MleAmSgmmUpdater; - friend class EbwAmSgmmUpdater; - friend class MleAmSgmmGlobalAccs; -}; - -/** \class MleAmSgmmUpdater - * Contains the functions needed to update the SGMM parameters. - */ -class MleAmSgmmUpdater { - public: - explicit MleAmSgmmUpdater(const MleAmSgmmOptions &options) - : update_options_(options) {} - void Reconfigure(const MleAmSgmmOptions &options) { - update_options_ = options; - } - - /// Main update function: Computes some overall stats, does parameter updates - /// and returns the total improvement of the different auxiliary functions. - BaseFloat Update(const MleAmSgmmAccs &accs, - AmSgmm *model, - SgmmUpdateFlagsType flags); - - /// This function is like UpdatePhoneVectorsChecked, which supports - /// objective-function checking and backtracking but no smoothing term, but it - /// takes as input the stats used in SGMM-based tree clustering-- this is used - /// in initializing an SGMM from the tree stats. It's not part of the - /// normal recipe. - double UpdatePhoneVectorsCheckedFromClusterable( - const std::vector &stats, - const std::vector > &H, - AmSgmm *model); - - protected: - friend class UpdateWParallelClass; - friend class UpdatePhoneVectorsClass; - friend class UpdatePhoneVectorsCheckedFromClusterableClass; - friend class EbwEstimateAmSgmm; - - /// Compute the Q_i quantities (Eq. 64). - static void ComputeQ(const MleAmSgmmAccs &accs, - const AmSgmm &model, - std::vector< SpMatrix > *Q); - - /// Compute the S_means quantities, minus sum: (Y_i M_i^T + M_i Y_I^T). - static void ComputeSMeans(const MleAmSgmmAccs &accs, - const AmSgmm &model, - std::vector< SpMatrix > *S_means); - friend class EbwAmSgmmUpdater; - private: - MleAmSgmmOptions update_options_; - /// Q_{i}, quadratic term for phonetic subspace estimation. Dim is [I][S][S] - std::vector< SpMatrix > Q_; - - /// Eq (74): S_{i}^{(means)}, scatter of substate mean vectors for estimating - /// the shared covariance matrices. [Actually this variable contains also the - /// term -(Y_i M_i^T + M_i Y_I^T).] Dimension is [I][D][D]. - std::vector< SpMatrix > S_means_; - - Vector gamma_j_; ///< State occupancies - - - void ComputeSmoothingTerms(const MleAmSgmmAccs &accs, - const AmSgmm &model, - const std::vector< SpMatrix > &H, - SpMatrix *H_sm, - Vector *y_sm) const; - - // UpdatePhoneVectors function that allows smoothing terms (but - // no checking of proper auxiliary function RE weights) - double UpdatePhoneVectors(const MleAmSgmmAccs &accs, - AmSgmm *model, - const std::vector > &H, - const SpMatrix &H_sm, - const Vector &y_sm); - - - // Called from UpdatePhoneVectors; updates a subset of states - // (relates to multi-threading). - void UpdatePhoneVectorsInternal(const MleAmSgmmAccs &accs, - AmSgmm *model, - const std::vector > &H, - const SpMatrix &H_sm, - const Vector &y_sm, - double *auxf_impr, - double *like_impr, - int32 num_threads, - int32 thread_id) const; - - // UpdatePhoneVectors function that does not support smoothing - // terms, but allows checking of objective-function improvement, - // and backtracking. - double UpdatePhoneVectorsChecked(const MleAmSgmmAccs &accs, - AmSgmm *model, - const std::vector > &H); - - // Called (indirectly) from UpdatePhoneVectorsCheckedFromClusterable() - void UpdatePhoneVectorsCheckedFromClusterableInternal( - const std::vector &stats, - const std::vector< SpMatrix > &H, - AmSgmm *model, - double *count_ptr, - double *like_impr_ptr, - int32 num_threads, - int32 thread_id); - - double UpdateM(const MleAmSgmmAccs &accs, AmSgmm *model); - - void RenormalizeV(const MleAmSgmmAccs &accs, AmSgmm *model, - const SpMatrix &H_sm); - double UpdateN(const MleAmSgmmAccs &accs, AmSgmm *model); - void RenormalizeN(const MleAmSgmmAccs &accs, AmSgmm *model); - double UpdateVars(const MleAmSgmmAccs &accs, AmSgmm *model); - double UpdateWParallel(const MleAmSgmmAccs &accs, AmSgmm *model); - - /// Called, multithreaded, inside UpdateWParallel - static - void UpdateWParallelGetStats(const MleAmSgmmAccs &accs, - const AmSgmm &model, - const Matrix &w, - Matrix *F_i, - Matrix *g_i, - double *tot_like, - int32 num_threads, - int32 thread_id); - - double UpdateWSequential(const MleAmSgmmAccs &accs, - AmSgmm *model); - double UpdateSubstateWeights(const MleAmSgmmAccs &accs, - AmSgmm *model); - - void ComputeMPrior(AmSgmm *model); // TODO(arnab): Maybe make this static? - double MapUpdateM(const MleAmSgmmAccs &accs, AmSgmm *model); - - KALDI_DISALLOW_COPY_AND_ASSIGN(MleAmSgmmUpdater); - MleAmSgmmUpdater() {} // Prevent unconfigured updater. -}; - - -/** \class MleSgmmSpeakerAccs - * Class for the accumulators required to update the speaker - * vectors v_s. - * Note: if you have multiple speakers you will want to initialize - * this just once and call Clear() after you're done with each speaker, - * rather than creating a new object for each speaker, since the - * initialization function does nontrivial work. - */ - -class MleSgmmSpeakerAccs { - public: - /// Initialize the object. Error if speaker subspace not set up. - MleSgmmSpeakerAccs(const AmSgmm &model, BaseFloat rand_prune_ = 1.0e-05); - - /// Clear the statistics. - void Clear(); - - /// Accumulate statistics. Returns per-frame log-likelihood. - BaseFloat Accumulate(const AmSgmm &model, - const SgmmPerFrameDerivedVars &frame_vars, - int32 state_index, BaseFloat weight); - - /// Accumulate statistics, given posteriors. Returns total - /// count accumulated, which may differ from posteriors.Sum() - /// due to randomized pruning. - BaseFloat AccumulateFromPosteriors(const AmSgmm &model, - const SgmmPerFrameDerivedVars &frame_vars, - const Matrix &posteriors, - int32 state_index); - - /// Update speaker vector. If v_s was empty, will assume it started as zero - /// and will resize it to the speaker-subspace size. - void Update(BaseFloat min_count, // e.g. 100 - Vector *v_s, - BaseFloat *objf_impr_out, - BaseFloat *count_out); - - private: - /// Statistics for speaker adaptation (vectors), stored per-speaker. - /// Per-speaker stats for vectors, y^{(s)}. Dimension [T]. - Vector y_s_; - /// gamma_{i}^{(s)}. Per-speaker counts for each Gaussian. Dimension is [I] - Vector gamma_s_; - - /// The following variable does not change per speaker. - /// Eq. (82): H_{i}^{spk} = N_{i}^T \Sigma_{i}^{-1} N_{i} - std::vector< SpMatrix > H_spk_; - - /// N_i^T \Sigma_{i}^{-1}. Needed for y^{(s)} - std::vector< Matrix > NtransSigmaInv_; - - /// small constant to randomly prune tiny posteriors - BaseFloat rand_prune_; -}; - -// This class, used in multi-core implementation of the updates of the "w_i" -// quantities, was previously in estimate-am-sgmm.cc, but is being moved to the -// header so it can be used in estimate-am-sgmm-ebw.cc. It is responsible for -// computing, in parallel, the F_i and g_i quantities used in the updates of -// w_i. -class UpdateWParallelClass: public MultiThreadable { - public: - UpdateWParallelClass(const MleAmSgmmAccs &accs, - const AmSgmm &model, - const Matrix &w, - Matrix *F_i, - Matrix *g_i, - double *tot_like): - accs_(accs), model_(model), w_(w), - F_i_ptr_(F_i), g_i_ptr_(g_i), tot_like_ptr_(tot_like) { - tot_like_ = 0.0; - F_i_.Resize(F_i->NumRows(), F_i->NumCols()); - g_i_.Resize(g_i->NumRows(), g_i->NumCols()); - } - - ~UpdateWParallelClass() { - F_i_ptr_->AddMat(1.0, F_i_, kNoTrans); - g_i_ptr_->AddMat(1.0, g_i_, kNoTrans); - *tot_like_ptr_ += tot_like_; - } - - inline void operator() () { - // Note: give them local copy of the sums we're computing, - // which will be propagated to the total sums in the destructor. - MleAmSgmmUpdater::UpdateWParallelGetStats(accs_, model_, w_, - &F_i_, &g_i_, &tot_like_, - num_threads_, thread_id_); - } - private: - // MleAmSgmmUpdater *updater_; - const MleAmSgmmAccs &accs_; - const AmSgmm &model_; - const Matrix &w_; - Matrix *F_i_ptr_; - Matrix *g_i_ptr_; - Matrix F_i_; - Matrix g_i_; - double *tot_like_ptr_; - double tot_like_; -}; - - -} // namespace kaldi - - -#endif // KALDI_SGMM_ESTIMATE_AM_SGMM_H_ diff --git a/src/sgmm/fmllr-sgmm-test.cc b/src/sgmm/fmllr-sgmm-test.cc deleted file mode 100644 index c9239d5740c..00000000000 --- a/src/sgmm/fmllr-sgmm-test.cc +++ /dev/null @@ -1,233 +0,0 @@ -// sgmm/fmllr-sgmm-test.cc - -// Copyright 2009-2011 Saarland University -// Author: Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "base/kaldi-math.h" -#include "gmm/model-test-common.h" -#include "sgmm/am-sgmm.h" -#include "sgmm/fmllr-sgmm.h" -#include "util/kaldi-io.h" - -using kaldi::AmSgmm; -using kaldi::int32; -using kaldi::BaseFloat; -using kaldi::Vector; -using kaldi::Matrix; -using kaldi::Exp; - -namespace ut = kaldi::unittest; - -void ApplyFmllrXform(const kaldi::VectorBase &in, - const Matrix &xf, - Vector *out) { - int32 dim = in.Dim(); - KALDI_ASSERT(xf.NumRows() == dim && xf.NumCols() == dim + 1); - Vector tmp(dim + 1); - tmp.Range(0, dim).CopyFromVec(in); - tmp(dim) = 1.0; - out->Resize(dim, kaldi::kSetZero); - out->AddMatVec(1.0, xf, kaldi::kNoTrans, tmp, 0.0); -} - -// Tests the Read() and Write() methods for the accumulators, in both binary -// and ASCII mode, as well as Check(). -void TestSgmmFmllrAccsIO(const AmSgmm &sgmm, - const kaldi::Matrix &feats) { - KALDI_LOG << "Test IO start."; - using namespace kaldi; - int32 dim = sgmm.FeatureDim(); - kaldi::SgmmPerFrameDerivedVars frame_vars; - kaldi::SgmmPerSpkDerivedVars empty; - kaldi::SgmmFmllrGlobalParams fmllr_globals; - kaldi::SgmmGselectConfig sgmm_config; - - frame_vars.Resize(sgmm.NumGauss(), dim, sgmm.PhoneSpaceDim()); - sgmm_config.full_gmm_nbest = std::min(sgmm_config.full_gmm_nbest, - sgmm.NumGauss()); - kaldi::Vector occs(sgmm.NumPdfs()); - occs.Set(feats.NumRows()); - sgmm.ComputeFmllrPreXform(occs, &fmllr_globals.pre_xform_, - &fmllr_globals.inv_xform_, - &fmllr_globals.mean_scatter_); - if (fmllr_globals.mean_scatter_.Min() == 0.0) { - KALDI_WARN << "Global covariances low rank!"; - KALDI_WARN << "Diag-scatter = " << fmllr_globals.mean_scatter_; - return; - } - -// std::cout << "Pre-Xform = " << fmllr_globals.pre_xform_; -// std::cout << "Inv-Xform = " << fmllr_globals.inv_xform_; - - FmllrSgmmAccs accs; - accs.Init(sgmm.FeatureDim(), sgmm.NumGauss()); - BaseFloat loglike = 0.0; - Vector empty_spk; - std::vector gselect; - for (int32 i = 0; i < feats.NumRows(); i++) { - sgmm.GaussianSelection(sgmm_config, feats.Row(i), &gselect); - sgmm.ComputePerFrameVars(feats.Row(i), gselect, empty, 0.0, &frame_vars); - loglike += accs.Accumulate(sgmm, empty, feats.Row(i), frame_vars, 0, 1.0); - } - - kaldi::SgmmFmllrConfig update_opts; -// update_opts.fmllr_min_count = 100; - kaldi::Matrix xform_mat(dim, dim+1); - xform_mat.SetUnit(); - BaseFloat frames, impr; - accs.Update(sgmm, fmllr_globals, update_opts, &xform_mat, &frames, &impr); - - Vector xformed_feat(dim); - ApplyFmllrXform(feats.Row(0), xform_mat, &xformed_feat); - sgmm.GaussianSelection(sgmm_config, xformed_feat, &gselect); - sgmm.ComputePerFrameVars(xformed_feat, gselect, empty, 0.0, &frame_vars); - BaseFloat loglike1 = sgmm.LogLikelihood(frame_vars, 0); - - bool binary_in; - // First, non-binary write - KALDI_LOG << "Test ASCII IO."; - accs.Write(kaldi::Output("tmpf", false).Stream(), false); - FmllrSgmmAccs *accs1 = new FmllrSgmmAccs(); - // Non-binary read - kaldi::Input ki1("tmpf", &binary_in); - accs1->Read(ki1.Stream(), binary_in, false); - xform_mat.SetUnit(); - accs1->Update(sgmm, fmllr_globals, update_opts, &xform_mat, NULL, NULL); - ApplyFmllrXform(feats.Row(0), xform_mat, &xformed_feat); - sgmm.GaussianSelection(sgmm_config, xformed_feat, &gselect); - sgmm.ComputePerFrameVars(xformed_feat, gselect, empty, 0.0, &frame_vars); - BaseFloat loglike2 = sgmm.LogLikelihood(frame_vars, 0); - std::cout << "LL1 = " << loglike1 << ", LL2 = " << loglike2 << std::endl; - kaldi::AssertEqual(loglike1, loglike2, 1e-2); - delete accs1; - - // Next, binary write - KALDI_LOG << "Test Binary IO."; - accs.Write(kaldi::Output("tmpfb", true).Stream(), true); - FmllrSgmmAccs *accs2 = new FmllrSgmmAccs(); - // Binary read - kaldi::Input ki2("tmpfb", &binary_in); - accs2->Read(ki2.Stream(), binary_in, false); - xform_mat.SetUnit(); - accs2->Update(sgmm, fmllr_globals, update_opts, &xform_mat, NULL, NULL); - ApplyFmllrXform(feats.Row(0), xform_mat, &xformed_feat); - sgmm.GaussianSelection(sgmm_config, xformed_feat, &gselect); - sgmm.ComputePerFrameVars(xformed_feat, gselect, empty, 0.0, &frame_vars); - BaseFloat loglike3 = sgmm.LogLikelihood(frame_vars, 0); - std::cout << "LL1 = " << loglike1 << ", LL3 = " << loglike3 << std::endl; - kaldi::AssertEqual(loglike1, loglike3, 1e-4); - delete accs2; - KALDI_LOG << "Test IO end."; - - unlink("tmpf"); - unlink("tmpfb"); -} - -void TestSgmmFmllrSubspace(const AmSgmm &sgmm, - const kaldi::Matrix &feats) { - KALDI_LOG << "Test Subspace start."; - using namespace kaldi; - int32 dim = sgmm.FeatureDim(); - kaldi::SgmmPerFrameDerivedVars frame_vars; - kaldi::SgmmPerSpkDerivedVars empty; - kaldi::SgmmFmllrGlobalParams fmllr_globals; - kaldi::SgmmGselectConfig sgmm_config; - - frame_vars.Resize(sgmm.NumGauss(), dim, sgmm.PhoneSpaceDim()); - sgmm_config.full_gmm_nbest = std::min(sgmm_config.full_gmm_nbest, - sgmm.NumGauss()); - kaldi::Vector occs(sgmm.NumPdfs()); - occs.Set(feats.NumRows()); - sgmm.ComputeFmllrPreXform(occs, &fmllr_globals.pre_xform_, - &fmllr_globals.inv_xform_, - &fmllr_globals.mean_scatter_); - if (fmllr_globals.mean_scatter_.Min() == 0.0) { - KALDI_WARN << "Global covariances low rank!"; - KALDI_WARN << "Diag-scatter = " << fmllr_globals.mean_scatter_; - return; - } - - FmllrSgmmAccs accs; - accs.Init(sgmm.FeatureDim(), sgmm.NumGauss()); - BaseFloat loglike = 0.0; - Vector empty_spk; - std::vector gselect; - for (int32 i = 0; i < feats.NumRows(); i++) { - sgmm.GaussianSelection(sgmm_config, feats.Row(i), &gselect); - sgmm.ComputePerFrameVars(feats.Row(i), gselect, empty, 0.0, &frame_vars); - loglike += accs.Accumulate(sgmm, empty, feats.Row(i), frame_vars, 0, 1.0); - } - - SpMatrix grad_scatter(dim * (dim+1)); - accs.AccumulateForFmllrSubspace(sgmm, fmllr_globals, &grad_scatter); - kaldi::SgmmFmllrConfig update_opts; - EstimateSgmmFmllrSubspace(grad_scatter, update_opts.num_fmllr_bases, dim, - &fmllr_globals); -// update_opts.fmllr_min_count = 100; - kaldi::Matrix xform_mat(dim, dim+1); - xform_mat.SetUnit(); - accs.Update(sgmm, fmllr_globals, update_opts, &xform_mat, NULL, NULL); - KALDI_LOG << "Test Subspace end."; -} - -void TestSgmmFmllr() { - // srand(time(NULL)); - int32 dim = 1 + kaldi::RandInt(0, 9); // random dimension of the gmm - int32 num_comp = 2 + kaldi::RandInt(0, 9); // random number of mixtures - kaldi::FullGmm full_gmm; - ut::InitRandFullGmm(dim, num_comp, &full_gmm); - - int32 num_states = 1; - AmSgmm sgmm; - kaldi::SgmmGselectConfig config; - sgmm.InitializeFromFullGmm(full_gmm, num_states, dim+1, dim); - sgmm.ComputeNormalizers(); - - kaldi::Matrix feats; - - { // First, generate random means and variances - int32 num_feat_comp = num_comp + kaldi::RandInt(-num_comp/2, num_comp/2); - kaldi::Matrix means(num_feat_comp, dim), - vars(num_feat_comp, dim); - for (int32 m = 0; m < num_feat_comp; m++) { - for (int32 d= 0; d < dim; d++) { - means(m, d) = kaldi::RandGauss(); - vars(m, d) = Exp(kaldi::RandGauss()) + 1e-2; - } - } - // Now generate random features with those means and variances. - feats.Resize(num_feat_comp * 200, dim); - for (int32 m = 0; m < num_feat_comp; m++) { - kaldi::SubMatrix tmp(feats, m*200, 200, 0, dim); - ut::RandDiagGaussFeatures(200, means.Row(m), vars.Row(m), &tmp); - } - } - TestSgmmFmllrAccsIO(sgmm, feats); - TestSgmmFmllrSubspace(sgmm, feats); -} - -int main() { - std::srand(1000); - kaldi::g_kaldi_verbose_level = 5; - for (int i = 0; i < 10; i++) - TestSgmmFmllr(); - std::cout << "Test OK.\n"; - return 0; -} diff --git a/src/sgmm/fmllr-sgmm.cc b/src/sgmm/fmllr-sgmm.cc deleted file mode 100644 index b1f87f9a967..00000000000 --- a/src/sgmm/fmllr-sgmm.cc +++ /dev/null @@ -1,554 +0,0 @@ -// sgmm/fmllr-sgmm.cc - -// Copyright 2009-2011 Saarland University (author: Arnab Ghoshal) -// 2012 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -using std::vector; - -#include "sgmm/fmllr-sgmm.h" -#include "util/parse-options.h" - -namespace kaldi { - -static void ApplyPreXformToGradient(const SgmmFmllrGlobalParams &globals, - const Matrix &gradient_in, - Matrix *gradient_out) { - // Eq. (B.14): P' = A_{inv}^T P {W_{pre}^+}^T - int32 dim = gradient_in.NumRows(); - Matrix Wpre_plus(dim + 1, dim + 1, kSetZero); - Wpre_plus.Range(0, dim, 0, dim + 1).CopyFromMat(globals.pre_xform_); - Wpre_plus(dim, dim) = 1; - SubMatrix Ainv(globals.inv_xform_, 0, dim, 0, dim); - Matrix AinvP(dim, dim + 1, kUndefined); - AinvP.AddMatMat(1.0, Ainv, kTrans, gradient_in, kNoTrans, 0.0); - gradient_out->AddMatMat(1.0, AinvP, kNoTrans, Wpre_plus, kTrans, 0.0); -} - -static void ApplyInvPreXformToChange(const SgmmFmllrGlobalParams &globals, - const Matrix &delta_in, - Matrix *delta_out) { - // Eq. (B.25): \Delta = A_{inv} \Delta' W_{pre}^+ - int32 dim = delta_in.NumRows(); - Matrix Wpre_plus(dim + 1, dim + 1, kSetZero); - Wpre_plus.Range(0, dim, 0, dim + 1).CopyFromMat(globals.pre_xform_); - Wpre_plus(dim, dim) = 1; - SubMatrix Ainv(globals.inv_xform_, 0, dim, 0, dim); - Matrix AinvD(dim, dim + 1, kUndefined); - AinvD.AddMatMat(1.0, Ainv, kNoTrans, delta_in, kNoTrans, 0.0); - delta_out->AddMatMat(1.0, AinvD, kNoTrans, Wpre_plus, kNoTrans, 0.0); -} - -static void ApplyHessianXformToGradient(const SgmmFmllrGlobalParams &globals, - const Matrix &gradient_in, - Matrix *gradient_out) { - int32 dim = gradient_in.NumRows(); - const Vector &D = globals.mean_scatter_; - if (D.Min() <= 0.0) - KALDI_ERR << "Cannot estimate FMLLR: mean scatter has 0 eigenvalues."; - for (int32 r = 0; r < dim; r++) { - for (int32 c = 0; c < r; c++) { - // Eq. (B.15) - (*gradient_out)(r, c) = gradient_in(r, c) / std::sqrt(1 + D(c)); - // Eq. (B.16) - (*gradient_out)(c, r) = gradient_in(c, r) / std::sqrt(1 + D(r) - - 1 / (1 + D(c))) - gradient_in(r, c) / ((1 + D(c)) * - std::sqrt(1 + D(r) - 1 / (1 + D(c)))); - } - // Eq. (B.17) & (B.18) - (*gradient_out)(r, r) = gradient_in(r, r) / std::sqrt(2 + D(r)); - (*gradient_out)(r, dim) = gradient_in(r, dim); - } -} - -static void ApplyInvHessianXformToChange(const SgmmFmllrGlobalParams &globals, - const Matrix &delta_in, - Matrix *delta_out) { - int32 dim = delta_in.NumRows(); - const Vector &D = globals.mean_scatter_; - if (D.Min() <= 0.0) - KALDI_ERR << "Cannot estimate FMLLR: mean scatter has 0 eigenvalues."; - for (int32 r = 0; r < dim; r++) { - for (int32 c = 0; c < r; c++) { - // Eq. (B.21) - (*delta_out)(r, c) = delta_in(r, c) / std::sqrt(1 + D(c)) - - delta_in(c, r) / ((1 + D(c)) * std::sqrt(1 + D(r) - 1 / (1 + D(c)))); - // Eq. (B.22) - (*delta_out)(c, r) = delta_in(c, r) / std::sqrt(1 + D(r) - 1/ (1 + D(c))); - } - // Eq. (B.23) & (B.24) - (*delta_out)(r, r) = delta_in(r, r) / std::sqrt(2 + D(r)); - (*delta_out)(r, dim) = delta_in(r, dim); - } -} - - -void SgmmFmllrGlobalParams::Write(std::ostream &out, bool binary) const { - WriteToken(out, binary, ""); - WriteToken(out, binary, ""); - pre_xform_.Write(out, binary); - WriteToken(out, binary, ""); - inv_xform_.Write(out, binary); - WriteToken(out, binary, ""); - mean_scatter_.Write(out, binary); - if (fmllr_bases_.size() != 0) { - WriteToken(out, binary, ""); - uint32 tmp = static_cast(fmllr_bases_.size()); - WriteBasicType(out, binary, tmp); - for (uint32 i = 0; i < tmp; i++) { - fmllr_bases_[i].Write(out, binary); - } - } - WriteToken(out, binary, ""); -} - -void SgmmFmllrGlobalParams::Read(std::istream &in, bool binary) { - ExpectToken(in, binary, ""); - ExpectToken(in, binary, ""); - pre_xform_.Read(in, binary); - ExpectToken(in, binary, ""); - inv_xform_.Read(in, binary); - ExpectToken(in, binary, ""); - mean_scatter_.Read(in, binary); - std::string token; - ReadToken(in, binary, &token); - if (token == "") { - uint32 tmp; - ReadBasicType(in, binary, &tmp); - fmllr_bases_.resize(tmp); - for (uint32 i = 0; i < tmp; i++) { - fmllr_bases_[i].Read(in, binary); - } - } else { - if (token != "") - KALDI_ERR << "Unexpected token '" << token << "' found."; - } -} - - -void FmllrSgmmAccs::Init(int32 dim, int32 num_gaussians) { - if (dim == 0) { // empty stats - dim_ = 0; // non-zero dimension is meaningless in empty stats - stats_.Init(0, 0); // clear the stats - } else { - dim_ = dim; - stats_.Init(dim, num_gaussians); - } -} - -BaseFloat FmllrSgmmAccs::Accumulate(const AmSgmm &model, - const SgmmPerSpkDerivedVars &spk, - const VectorBase &data, - const SgmmPerFrameDerivedVars &frame_vars, - int32 pdf_index, BaseFloat weight) { - // Calulate Gaussian posteriors and collect statistics - Matrix posteriors; - BaseFloat log_like = model.ComponentPosteriors(frame_vars, pdf_index, - &posteriors); - posteriors.Scale(weight); - AccumulateFromPosteriors(model, spk, data, frame_vars.gselect, posteriors, - pdf_index); - return log_like; -} - -void -FmllrSgmmAccs::AccumulateFromPosteriors(const AmSgmm &model, - const SgmmPerSpkDerivedVars &spk, - const VectorBase &data, - const vector &gselect, - const Matrix &posteriors, - int32 pdf_index) { - Vector var_scaled_mean(dim_), extended_data(dim_+1); - extended_data.Range(0, dim_).CopyFromVec(data); - extended_data(dim_) = 1.0; - SpMatrix scatter(dim_+1, kSetZero); - scatter.AddVec2(1.0, extended_data); - - for (int32 ki = 0, ki_max = gselect.size(); ki < ki_max; ki++) { - int32 i = gselect[ki]; - - for (int32 m = 0; m < model.NumSubstates(pdf_index); m++) { - // posterior gamma_{jkmi}(t) eq.(39) - BaseFloat gammat_jmi = posteriors(ki, m); - - // Accumulate statistics for non-zero gaussian posterior - if (gammat_jmi > 0.0) { - stats_.beta_ += gammat_jmi; - model.GetVarScaledSubstateSpeakerMean(pdf_index, m, i, spk, - &var_scaled_mean); - // Eq. (52): K += \gamma_{jmi} \Sigma_{i}^{-1} \mu_{jmi}^{(s)} x^{+T} - stats_.K_.AddVecVec(gammat_jmi, var_scaled_mean, extended_data); - // Eq. (53): G_{i} += \gamma_{jmi} x^{+} x^{+T} - stats_.G_[i].AddSp(gammat_jmi, scatter); - } // non-zero posteriors - } // loop over substates - } // loop over selected Gaussians -} - -void FmllrSgmmAccs::AccumulateForFmllrSubspace(const AmSgmm &sgmm, - const SgmmFmllrGlobalParams &globals, SpMatrix *grad_scatter) { - if (stats_.beta_ <= 0.0) { - KALDI_WARN << "Not committing any stats since no stats accumulated."; - return; - } - int32 dim = sgmm.FeatureDim(); - Matrix xform(dim, dim + 1, kUndefined); - xform.SetUnit(); - Matrix grad(dim, dim + 1, kSetZero); - this->FmllrObjGradient(sgmm, xform, &grad, NULL); - Matrix pre_xformed_grad(dim, dim + 1, kSetZero); - ApplyPreXformToGradient(globals, grad, &pre_xformed_grad); - Matrix hess_xformed_grad(dim, dim + 1, kSetZero); - ApplyHessianXformToGradient(globals, pre_xformed_grad, &hess_xformed_grad); - Vector grad_vec(dim * (dim + 1)); - grad_vec.CopyRowsFromMat(hess_xformed_grad); - grad_vec.Scale(1 / std::sqrt(stats_.beta_)); - grad_scatter->AddVec2(1.0, grad_vec); - KALDI_LOG << "Frame counts for when committing fMLLR subspace stats are " - << stats_.beta_; -} - - -BaseFloat FmllrSgmmAccs::FmllrObjGradient(const AmSgmm &sgmm, - const Matrix &xform, - Matrix *grad_out, - Matrix *G_out) const { - int32 dim = sgmm.FeatureDim(), - num_gauss = sgmm.NumGauss(); - KALDI_ASSERT(stats_.G_.size() == static_cast(num_gauss)); - Matrix xform_d(xform); - SubMatrix A(xform_d, 0, dim, 0, dim); - Matrix xform_g(dim, dim + 1), total_g(dim, dim + 1); - SpMatrix inv_covar(dim); - double obj = stats_.beta_ * A.LogDet() + - TraceMatMat(xform_d, stats_.K_, kTrans); - for (int32 i = 0; i < num_gauss; i++) { - sgmm.GetInvCovars(i, &inv_covar); - xform_g.AddMatSp(1.0, xform_d, kNoTrans, stats_.G_[i], 0.0); - total_g.AddSpMat(1.0, inv_covar, xform_g, kNoTrans, 1.0); - } - obj -= 0.5 * TraceMatMat(xform_d, total_g, kTrans); - if (G_out != NULL) G_out->CopyFromMat(total_g); - - // Compute the gradient: P = \beta [(A^{-1})^{T} , 0] + K - S - if (grad_out != NULL) { - Matrix grad_d(dim, dim + 1, kSetZero); - grad_d.Range(0, dim, 0, dim).CopyFromMat(A); - grad_d.Range(0, dim, 0, dim).InvertDouble(); - grad_d.Range(0, dim, 0, dim).Transpose(); - grad_d.Scale(stats_.beta_); - grad_d.AddMat(-1.0, total_g, kNoTrans); - grad_d.AddMat(1.0, stats_.K_, kNoTrans); - grad_out->CopyFromMat(grad_d); - } - - return obj; -} - - -void FmllrSgmmAccs::Write(std::ostream &out, bool binary) const { - WriteToken(out, binary, ""); - WriteToken(out, binary, ""); - WriteBasicType(out, binary, dim_); - WriteToken(out, binary, ""); - stats_.Write(out, binary); - WriteToken(out, binary, ""); -} - -void FmllrSgmmAccs::Read(std::istream &in, bool binary, bool add) { - ExpectToken(in, binary, ""); - ExpectToken(in, binary, ""); - ReadBasicType(in, binary, &dim_); - KALDI_ASSERT(dim_ > 0); - ExpectToken(in, binary, ""); - stats_.Read(in, binary, add); - ExpectToken(in, binary, ""); -} - - -static BaseFloat CalcFmllrStepSize(const AffineXformStats &stats, - const AmSgmm &sgmm, - const MatrixBase &Delta, - const MatrixBase &A, - const Matrix &G, - int32 max_iters) { - int32 dim = sgmm.FeatureDim(); - Matrix Delta_d(Delta); - Matrix G_d(G); - SubMatrix Delta_C(Delta_d, 0, dim, 0, dim); - - // Eq. (B.28): m = tr(\Delta K^T) - tr(\Delta S^T) - BaseFloat m = TraceMatMat(Delta_d, stats.K_, kTrans) - - TraceMatMat(Delta_d, G_d, kTrans); - // Eq. (B.29): n = \sum_i tr(\Delta \Sigma_{i}^{-1} \Delta S_{i}) - BaseFloat n = 0; - SpMatrix inv_covar; - for (int32 i = 0, num_gauss = sgmm.NumGauss(); i < num_gauss; i++) { - sgmm.GetInvCovars(i, &inv_covar); - n += TraceMatSpMatSp(Delta_d, kTrans, inv_covar, Delta_d, kNoTrans, - stats.G_[i]); - } - - BaseFloat step_size = 0.0; - // initialize just to get rid of compile errors. - BaseFloat obj_step_old, obj_step_new = 0.0; - Matrix new_A(dim, dim); - Matrix B(dim, dim); - for (int32 iter_step = 0; iter_step < max_iters; iter_step++) { - if (iter_step == 0) { - obj_step_old = stats.beta_ * A.LogDet(); // Q_0 = \beta * log det(A) - } else { - obj_step_old = obj_step_new; - } - - // Eq. (B.30); B = (A + k\Delta^{-C})^{-1} \Delta^{-C} - new_A.CopyFromMat(A); - new_A.AddMat(step_size, Delta_C, kNoTrans); - new_A.InvertDouble(); - B.AddMatMat(1.0, new_A, kNoTrans, Delta_C, kNoTrans, 0.0); - - BaseFloat d = m - step_size * n + stats.beta_ * TraceMat(B); - BaseFloat d2 = -n - stats.beta_ * TraceMatMat(B, B, kNoTrans); - if (std::fabs(d / d2) < 0.000001) { break; } // converged - - BaseFloat step_size_change = -(d / d2); - step_size += step_size_change; // Eq. (B.33) - - // Halve step size when the auxiliary function decreases. - do { - new_A.CopyFromMat(A); - new_A.AddMat(step_size, Delta_C, kNoTrans); - BaseFloat logdet = new_A.LogDet(); - obj_step_new = stats.beta_ * logdet + step_size * m - - 0.5 * step_size * step_size * n; - - if (obj_step_new - obj_step_old < -0.001) { - KALDI_WARN << "Objective function decreased (" << obj_step_old << "->" - << obj_step_new << "). Halving step size change (" - << step_size << " -> " << (step_size - (step_size_change/2)) - << ")"; - step_size_change /= 2; - step_size -= step_size_change; // take away half of our step - } // Facing numeric precision issues. Compute in double? - } while (obj_step_new - obj_step_old < -0.001 && step_size_change > 1e-05); - } - return step_size; -} - - -bool FmllrSgmmAccs::Update(const AmSgmm &sgmm, - const SgmmFmllrGlobalParams &globals, - const SgmmFmllrConfig &opts, - Matrix *out_xform, - BaseFloat *frame_count, BaseFloat *auxf_out) const { - BaseFloat auxf_improv = 0.0, logdet = 0.0; - KALDI_ASSERT(out_xform->NumRows() == dim_ && out_xform->NumCols() == dim_+1); - BaseFloat mincount = (globals.HasBasis() ? - std::min(opts.fmllr_min_count_basis, opts.fmllr_min_count_full) : - opts.fmllr_min_count); - bool using_subspace = (globals.HasBasis() ? - (stats_.beta_ < opts.fmllr_min_count_full) : false); - - if (globals.IsEmpty()) - KALDI_ERR << "Must set up pre-transforms before estimating FMLLR."; - - KALDI_VLOG(1) << "Mincount = " << mincount << "; Basis: " - << std::string(globals.HasBasis()? "yes; " : "no; ") - << "Using subspace: " << std::string(using_subspace? "yes; " - : "no; "); - - int32 num_bases = 0; - if (using_subspace) { - KALDI_ASSERT(globals.fmllr_bases_.size() != 0); - int32 max_bases = std::min(static_cast(globals.fmllr_bases_.size()), - opts.num_fmllr_bases); - num_bases = (opts.bases_occ_scale <= 0.0)? max_bases : - std::min(max_bases, static_cast(std::floor(opts.bases_occ_scale - * stats_.beta_))); - KALDI_VLOG(1) << "Have " << stats_.beta_ << " frames for speaker: Using " - << num_bases << " fMLLR bases."; - } - - // initialization just to get rid of compile errors. - BaseFloat auxf_old = 0, auxf_new = 0; - if (frame_count != NULL) *frame_count = stats_.beta_; - - // If occupancy is greater than the min count, update the transform - if (stats_.beta_ >= mincount) { - for (int32 iter = 0; iter < opts.fmllr_iters; iter++) { - Matrix grad(dim_, dim_ + 1, kSetZero); - Matrix G(dim_, dim_ + 1, kSetZero); - auxf_new = this->FmllrObjGradient(sgmm, *out_xform, &grad, &G); - - // For diagnostic purposes - KALDI_VLOG(3) << "Iter " << iter << ": Auxiliary function = " - << (auxf_new / stats_.beta_) << " per frame over " << stats_.beta_ - << " frames"; - - if (iter > 0) { - // For diagnostic purposes - KALDI_VLOG(2) << "Iter " << iter << ": Auxiliary function improvement: " - << ((auxf_new - auxf_old) / stats_.beta_) << " per frame over " - << (stats_.beta_) << " frames"; - auxf_improv += auxf_new - auxf_old; - } - - Matrix pre_xformed_grad(dim_, dim_ + 1, kSetZero); - ApplyPreXformToGradient(globals, grad, &pre_xformed_grad); -// std::cout << "Pre-X Grad = " << pre_xformed_grad << std::endl; - - // Transform P_sk with the Hessian - Matrix hess_xformed_grad(dim_, dim_ + 1, kSetZero); - ApplyHessianXformToGradient(globals, pre_xformed_grad, - &hess_xformed_grad); -// std::cout << "Hess-X Grad = " << hess_xformed_grad << std::endl; - - // Update the actual FMLLR transform matrices - Matrix hess_xformed_delta(dim_, dim_ + 1, kUndefined); - if (using_subspace) { - // Note that in this case we can simply store the speaker-specific - // coefficients for each of the basis matrices. The current - // implementation stores the computed transform to simplify the code! - hess_xformed_delta.SetZero(); - for (int32 b = 0; b < num_bases; b++) { // Eq (B.20) - hess_xformed_delta.AddMat(TraceMatMat(globals.fmllr_bases_[b], - hess_xformed_grad, kTrans), - globals.fmllr_bases_[b], kNoTrans); - } - hess_xformed_delta.Scale(1 / stats_.beta_); - } else { - hess_xformed_delta.CopyFromMat(hess_xformed_grad); - hess_xformed_delta.Scale(1 / stats_.beta_); // Eq. (B.19) - } - -// std::cout << "Hess-X Delta = " << hess_xformed_delta << std::endl; - - // Transform Delta with the Hessian - Matrix pre_xformed_delta(dim_, dim_ + 1, kSetZero); - ApplyInvHessianXformToChange(globals, hess_xformed_delta, - &pre_xformed_delta); - - // Apply inverse pre-transform to Delta - Matrix delta(dim_, dim_ + 1, kSetZero); - ApplyInvPreXformToChange(globals, pre_xformed_delta, &delta); - -#ifdef KALDI_PARANOID - // Check whether co-ordinate transformation is correct. - { - BaseFloat tr1 = TraceMatMat(delta, grad, kTrans); - BaseFloat tr2 = TraceMatMat(pre_xformed_delta, pre_xformed_grad, - kTrans); - BaseFloat tr3 = TraceMatMat(hess_xformed_delta, hess_xformed_grad, - kTrans); - AssertEqual(tr1, tr2, 1e-5); - AssertEqual(tr2, tr3, 1e-5); - } -#endif - - // Calculate the optimal step size - SubMatrix A(*out_xform, 0, dim_, 0, dim_); - BaseFloat step_size = CalcFmllrStepSize(stats_, sgmm, delta, A, G, - opts.fmllr_iters); - - // Update: W <-- W + k \Delta Eq. (B.34) - out_xform->AddMat(step_size, delta, kNoTrans); - auxf_old = auxf_new; - - // Check the objective function change for the last iteration - if (iter == opts.fmllr_iters - 1) { - auxf_new = this->FmllrObjGradient(sgmm, *out_xform, NULL, NULL); - logdet = A.LogDet(); - // SubMatrix A points to the memory location of out_xform, and so will - // contain the updated value - - KALDI_VLOG(2) << "Iter " << iter << ": Auxiliary function improvement: " - << ((auxf_new - auxf_old) / stats_.beta_) << " per frame over " - << (stats_.beta_) << " frames"; - auxf_improv += auxf_new - auxf_old; - } - } - if (auxf_out != NULL) *auxf_out = auxf_improv; - auxf_improv /= (stats_.beta_ + 1.0e-10); - - KALDI_LOG << "Auxiliary function improvement for FMLLR = " << auxf_improv - << " per frame over " << stats_.beta_ << " frames. Log-determinant = " - << logdet; - return true; - } else { - KALDI_ASSERT(stats_.beta_ < mincount); -// std::cerr.precision(10); -// std::cerr.setf(std::ios::fixed,std::ios::floatfield); - KALDI_WARN << "Not updating FMLLR because count is " << stats_.beta_ - << " < " << (mincount); - if (auxf_out != NULL) *auxf_out = 0.0; - return false; - } // Do not use the transform if it does not have enough counts - KALDI_ASSERT(false); // Should never be reached. -} - -void EstimateSgmmFmllrSubspace(const SpMatrix &fmllr_grad_scatter, - int32 num_fmllr_bases, int32 feat_dim, - SgmmFmllrGlobalParams *globals, double min_eig) { - KALDI_ASSERT(num_fmllr_bases > 0 && feat_dim > 0); - if (num_fmllr_bases > feat_dim * (feat_dim + 1)) { - num_fmllr_bases = feat_dim * (feat_dim + 1); - KALDI_WARN << "Limiting number of fMLLR bases to be the same as transform " - << "dimension."; - } - - vector< Matrix > &fmllr_bases(globals->fmllr_bases_); - - Vector s(fmllr_grad_scatter.NumRows()); - Matrix U(fmllr_grad_scatter.NumRows(), - fmllr_grad_scatter.NumRows()); - try { - fmllr_grad_scatter.Eig(&s, &U); - SortSvd(&s, &U); // in case was not exactly sorted. - KALDI_VLOG(1) << "Eigenvalues (max 200) of CMLLR scatter are: " - << (SubVector(s, 0, - std::min(static_cast(200), - s.Dim()))); - -// for (int32 b = 2; b < num_fmllr_bases; b++) { -// if (s(b) < min_eig) { -// num_fmllr_bases = b; -// KALDI_WARN << "Limiting number of fMLLR bases to " << num_fmllr_bases -// << " because of small eigenvalues."; -// break; -// } -// } - - U.Transpose(); // Now the rows of U correspond to the basis vectors. - fmllr_bases.resize(num_fmllr_bases); - for (int32 b = 0; b < num_fmllr_bases; b++) { - fmllr_bases[b].Resize(feat_dim, feat_dim + 1, kSetZero); - fmllr_bases[b].CopyRowsFromVec(U.Row(b)); - } - KALDI_LOG << "Estimated " << num_fmllr_bases << " fMLLR basis matrices."; - } catch(const std::exception &e) { - KALDI_WARN << "Not estimating FMLLR bases because of a thrown exception:\n" - << e.what(); - fmllr_bases.resize(0); - } -} // End of EstimateSgmmFmllrSubspace - - -} // namespace kaldi - diff --git a/src/sgmm/fmllr-sgmm.h b/src/sgmm/fmllr-sgmm.h deleted file mode 100644 index 832093e39ad..00000000000 --- a/src/sgmm/fmllr-sgmm.h +++ /dev/null @@ -1,192 +0,0 @@ -// sgmm/fmllr-sgmm.h - -// Copyright 2009-2011 Saarland University -// Author: Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_SGMM_FMLLR_SGMM_H_ -#define KALDI_SGMM_FMLLR_SGMM_H_ - -#include -#include - -#include "base/kaldi-common.h" -#include "sgmm/am-sgmm.h" -#include "transform/transform-common.h" -#include "util/kaldi-table.h" -#include "util/kaldi-holder.h" -#include "itf/options-itf.h" - -namespace kaldi { - -/** \struct SgmmFmllrConfig - * Configuration variables needed in the estimation of FMLLR for SGMMs. - */ -struct SgmmFmllrConfig { - int32 fmllr_iters; ///< Number of iterations in FMLLR estimation. - int32 step_iters; ///< Iterations to find optimal FMLLR step size. - /// Minimum occupancy count to estimate FMLLR using basis matrices. - BaseFloat fmllr_min_count_basis; - /// Minimum occupancy count to estimate FMLLR without basis matrices. - BaseFloat fmllr_min_count; - /// Minimum occupancy count to stop using FMLLR bases and switch to - /// regular FMLLR estimation. - BaseFloat fmllr_min_count_full; - /// Number of basis matrices to use for FMLLR estimation. Can only *reduce* - /// the number of bases present. Overridden by the 'bases_occ_scale' option. - int32 num_fmllr_bases; - /// Scale per-speaker count to determine number of CMLLR bases. - BaseFloat bases_occ_scale; - - SgmmFmllrConfig() { - fmllr_iters = 5; - step_iters = 10; - fmllr_min_count_basis = 100.0; - fmllr_min_count = 1000.0; - fmllr_min_count_full = 5000.0; - num_fmllr_bases = 50; - bases_occ_scale = 0.2; - } - - void Register(OptionsItf *opts); -}; - -inline void SgmmFmllrConfig::Register(OptionsItf *opts) { - std::string module = "SgmmFmllrConfig: "; - opts->Register("fmllr-iters", &fmllr_iters, module+ - "Number of iterations in FMLLR estimation."); - opts->Register("fmllr-step-iters", &step_iters, module+ - "Number of iterations to find optimal FMLLR step size."); - opts->Register("fmllr-min-count-bases", &fmllr_min_count_basis, module+ - "Minimum occupancy count to estimate FMLLR using basis matrices."); - opts->Register("fmllr-min-count", &fmllr_min_count, module+ - "Minimum occupancy count to estimate FMLLR (without bases)."); - opts->Register("fmllr-min-count-full", &fmllr_min_count_full, module+ - "Minimum occupancy count to stop using basis matrices for FMLLR."); - opts->Register("fmllr-num-bases", &num_fmllr_bases, module+ - "Number of FMLLR basis matrices."); - opts->Register("fmllr-bases-occ-scale", &bases_occ_scale, module+ - "Scale per-speaker count to determine number of CMLLR bases."); -} - - -/** \class SgmmFmllrGlobalParams - * Global adaptation parameters. - */ -class SgmmFmllrGlobalParams { - public: - void Init(const AmSgmm &sgmm, const Vector &state_occs); - void Write(std::ostream &out_stream, bool binary) const; - void Read(std::istream &in_stream, bool binary); - bool IsEmpty() const { - return (pre_xform_.NumRows() == 0 || inv_xform_.NumRows() == 0 || - mean_scatter_.Dim() == 0); - } - bool HasBasis() const { return fmllr_bases_.size() != 0; } - - /// Pre-transform matrix. Dim is [D][D+1]. - Matrix pre_xform_; - /// Inverse of pre-transform. Dim is [D][D+1]. - Matrix inv_xform_; - /// Diagonal of mean-scatter matrix. Dim is [D] - Vector mean_scatter_; - /// \tilde{W}_b. [b][d][d], dim is [B][D][D+1]. - std::vector< Matrix > fmllr_bases_; -}; - -inline void SgmmFmllrGlobalParams::Init(const AmSgmm &sgmm, - const Vector &state_occs) { - sgmm.ComputeFmllrPreXform(state_occs, &pre_xform_, &inv_xform_, - &mean_scatter_); -} - -/** \class FmllrSgmmAccs - * Class for computing the accumulators needed for the maximum-likelihood - * estimate of FMLLR transforms for a subspace GMM acoustic model. - */ -class FmllrSgmmAccs { - public: - FmllrSgmmAccs() : dim_(-1) {} - ~FmllrSgmmAccs() {} - - void Init(int32 dim, int32 num_gaussians); - void SetZero() { stats_.SetZero(); } - - void Write(std::ostream &out_stream, bool binary) const; - void Read(std::istream &in_stream, bool binary, bool add); - - /// Accumulation routine that computes the Gaussian posteriors and calls - /// the AccumulateFromPosteriors function with the computed posteriors. - /// The 'data' argument is not FMLLR-transformed and is needed in addition - /// to the the 'frame_vars' since the latter only contains a copy of the - /// transformed feature vector. - BaseFloat Accumulate(const AmSgmm &sgmm, - const SgmmPerSpkDerivedVars &spk, - const VectorBase &data, - const SgmmPerFrameDerivedVars &frame_vars, - int32 state_index, BaseFloat weight); - - void AccumulateFromPosteriors(const AmSgmm &sgmm, - const SgmmPerSpkDerivedVars &spk, - const VectorBase &data, - const std::vector &gauss_select, - const Matrix &posteriors, - int32 state_index); - - void AccumulateForFmllrSubspace(const AmSgmm &sgmm, - const SgmmFmllrGlobalParams &fmllr_globals, - SpMatrix *grad_scatter); - - BaseFloat FmllrObjGradient(const AmSgmm &sgmm, - const Matrix &xform, - Matrix *grad_out, - Matrix *G_out) const; - - /// Computes the FMLLR transform from the accumulated stats, using the - /// pre-transforms in fmllr_globals. Expects the transform matrix out_xform - /// to be initialized to the correct size. Returns true if the transform was - /// updated (i.e. had enough counts). - bool Update(const AmSgmm &model, - const SgmmFmllrGlobalParams &fmllr_globals, - const SgmmFmllrConfig &opts, Matrix *out_xform, - BaseFloat *frame_count, BaseFloat *auxf_improv) const; - - /// Accessors - int32 Dim() const { return dim_; } - const AffineXformStats &stats() const { return stats_; } - - private: - AffineXformStats stats_; ///< Accumulated stats - int32 dim_; ///< Dimension of feature vectors - - // Cannot have copy constructor and assigment operator - KALDI_DISALLOW_COPY_AND_ASSIGN(FmllrSgmmAccs); -}; - -/// Computes the fMLLR basis matrices given the scatter of the vectorized -/// gradients (eq: B.10). The result is stored in 'fmllr_globals'. -/// The actual number of bases may be less than 'num_fmllr_bases' depending -/// on the feature dimension and number of eigenvalues greater than 'min_eig'. -void EstimateSgmmFmllrSubspace(const SpMatrix &fmllr_grad_scatter, - int32 num_fmllr_bases, int32 feat_dim, - SgmmFmllrGlobalParams *fmllr_globals, - double min_eig = 0.0); - -} // namespace kaldi - -#endif // KALDI_SGMM_FMLLR_SGMM_H_ diff --git a/src/sgmm/sgmm-clusterable.cc b/src/sgmm/sgmm-clusterable.cc deleted file mode 100644 index f49f4f993f2..00000000000 --- a/src/sgmm/sgmm-clusterable.cc +++ /dev/null @@ -1,280 +0,0 @@ -// sgmm/sgmm-clusterable.cc - -// Copyright 2012 Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "sgmm/sgmm-clusterable.h" -#include "hmm/hmm-utils.h" - -namespace kaldi { - -void SgmmClusterable::Accumulate( - const SgmmPerFrameDerivedVars &per_frame_vars, - int32 j, // state index in original SGMM. - BaseFloat weight) { - Matrix post; - KALDI_ASSERT(weight >= 0.0); // Doesn't make sense to use negative weights here. - // Compute Gaussian-level posteriors. - // Note: "post" is indexed by Gaussian-selection index. - sgmm_.ComponentPosteriors(per_frame_vars, j, &post); - if (weight != 1.0) post.Scale(weight); - const std::vector &gselect = per_frame_vars.gselect; - for (int32 ki = 0; ki < gselect.size(); ki++) { - int32 i = gselect[ki]; - BaseFloat gamma = 0.0; // Sum the weight over all the vectors (index m) in - // the state. In sensible cases there should be just one vector per state - // at the point where we do this, though. - for (int32 m = 0; m < post.NumCols(); m++) gamma += post(ki, m); - gamma_(i) += gamma; - y_.AddVec(gamma, per_frame_vars.zti.Row(ki)); - } - // Invalidate my_H_, if present, since it's not efficient to - // keep it updated during accumulation. - if (my_H_.NumRows() != 0) - my_H_.Resize(0); -} - -BaseFloat SgmmClusterable::Objf() const { - // Objective function consists of the expected log-likelihood of - // a weight (assuming we estimate the weights directly as parameters - // instead of the whole subspace thing on the weights), plus - // the auxiliary function improvement we would get from estimating - // the state vector v_j starting from zero. Note: zero is an - // arbitrary starting point-- we could use any value as long as - // we were consistent. - KALDI_ASSERT(static_cast(H_.size()) == sgmm_.NumGauss()); - if (my_H_.NumRows() == 0.0) { - SgmmClusterable *s = static_cast(this->Copy()); // will - // set up my_H_, which we need. - BaseFloat ans = s->Objf(); - delete s; - return ans; - } - double ans = 0.0; - double tot_gamma = gamma_.Sum(), tot_gamma2 = 0.0; - if (tot_gamma == 0.0) return 0.0; - int32 I = gamma_.Dim(); - - for (int32 i = 0; i < I; i++) { - double gamma = gamma_(i); - if (gamma > 0.0) { // Note: should not be negative-- if it is, due to - double prob = gamma / tot_gamma; - if (prob > 0.0) { // Note: prob could be zero due to underflow-- this - // happened! [we can get tiny values due to floating-point roundoff - // while subtracting clusterable objects]. - ans += gamma * Log(gamma / tot_gamma); - } - } - tot_gamma2 += gamma; - } - if (tot_gamma2 == 0.0) - return 0.0; // No positive elements... maybe small negative ones were from - // round off. - - // objf improvement is y^T H^{-1} y. - // We'll try to compute this using Cholesky, first, which is more - // efficient; if this fails or appears to lead to big values, - // we'll back off to a more efficient SVD-based implementation. - try { - TpMatrix C(my_H_.NumRows()); - C.Cholesky(my_H_); - C.Invert(); - for (int32 i = 0; i < C.NumRows(); i++) - if (fabs(C(i, i)) > 100.0) { - KALDI_VLOG(3) << "Condion-number probably bad: element is " - << C(i, i); - throw std::runtime_error("Bad condition number"); // back off to SVD. - } - // Note: assuming things are well preconditioned, the elements - // C(i,i) should be of the rough magnitude 1/sqrt(count). - Vector yC(C.NumRows()); - // Note: if we decompose H = C C^T, then the line below - // does yC = C^{-1} y. Note: we are computing the inner - // product y^T H^{-1} y. H^{-1} = C^{-T} C^{-1}, so - // y^T H^{-1} y = y^T C^{-T} C^{-1} y = yC^T yC. - yC.AddTpVec(1.0, C, kNoTrans, y_, 0.0); - ans += 0.5 * VecVec(yC, yC); - } catch (...) { // Choleksy threw, or we detected bad condition. - // we'll do this using an SVD-based implementation that will - // deal with non-invertible matrices. - KALDI_VLOG(3) << "Backing off to SVD-based objective computation."; - Vector v(y_.Dim()); // Initialized automatically to zero. - ans += SolveQuadraticProblem(my_H_, y_, SolverOptions(), &v); // The objective function - // change from estimating this vector. - } - return ans; -} - -void SgmmClusterable::SetZero() { - gamma_.SetZero(); - y_.SetZero(); - my_H_.SetZero(); // Should work even if empty. -} - -void SgmmClusterable::Add(const Clusterable &other_in) { - const SgmmClusterable *other = - static_cast(&other_in); - gamma_.AddVec(1.0, other->gamma_); - y_.AddVec(1.0, other->y_); - if (!H_.empty()) { // we need to compute my_H_. - if (my_H_.NumRows() != 0 && other->my_H_.NumRows() != 0) - my_H_.AddSp(1.0, other->my_H_); - else { - my_H_.Resize(0); - ComputeH(); - } - } -} - -void SgmmClusterable::Sub(const Clusterable &other_in) { - const SgmmClusterable *other = - static_cast(&other_in); - gamma_.AddVec(-1.0, other->gamma_); - y_.AddVec(-1.0, other->y_); - if (!H_.empty()) { - if (my_H_.NumRows() != 0 && other->my_H_.NumRows() != 0) - my_H_.AddSp(-1.0, other->my_H_); - else { - my_H_.Resize(0); - ComputeH(); - } - } -} - -BaseFloat SgmmClusterable::Normalizer() const { - return gamma_.Sum(); -} - -Clusterable *SgmmClusterable::Copy() const { - SgmmClusterable *ans = new SgmmClusterable(sgmm_, H_); - ans->gamma_.CopyFromVec(gamma_); - ans->y_.CopyFromVec(y_); - if (!H_.empty()) { - if (my_H_.NumRows() == 0.0) ans->ComputeH(); - else { - ans->my_H_.Resize(my_H_.NumRows()); - ans->my_H_.CopyFromSp(my_H_); - } - } - return ans; -} - -void SgmmClusterable::Scale(BaseFloat f) { - KALDI_ASSERT(f >= 0.0); - gamma_.Scale(f); - y_.Scale(f); - if (my_H_.NumRows() != 0) my_H_.Scale(f); -} - -void SgmmClusterable::Write(std::ostream &os, bool binary) const { - gamma_.Write(os, binary); - y_.Write(os, binary); -} - -Clusterable *SgmmClusterable::ReadNew(std::istream &is, bool binary) const { - SgmmClusterable *ans = new SgmmClusterable(sgmm_, H_); - ans->gamma_.Read(is, binary); - ans->y_.Read(is, binary); - if (!H_.empty()) ans->ComputeH(); - return ans; -} - - -bool AccumulateSgmmTreeStats(const TransitionModel &trans_model, - const AmSgmm &am_sgmm, - const std::vector > &H, - int N, // context window size. - int P, // central position. - const std::vector &ci_phones, // must be sorted - const std::vector &alignment, - const std::vector > &gselect, - const SgmmPerSpkDerivedVars &per_spk_vars, - const Matrix &features, - std::map *stats) { - KALDI_ASSERT(IsSortedAndUniq(ci_phones)); - std::vector > split_alignment; - bool ans = SplitToPhones(trans_model, alignment, &split_alignment); - if (!ans) { - KALDI_WARN << "AccumulateTreeStats: bad alignment."; - return false; - } - int t = 0; - SgmmPerFrameDerivedVars per_frame_vars; - - KALDI_ASSERT(features.NumRows() == static_cast(alignment.size()) - && alignment.size() == gselect.size()); - for (int i = -N; i < static_cast(split_alignment.size()); i++) { - // consider window starting at i, only if i+P is within - // list of phones. - if (i + P >= 0 && i + P < static_cast(split_alignment.size())) { - int32 central_phone = trans_model.TransitionIdToPhone(split_alignment[i+P][0]); - bool is_ctx_dep = ! std::binary_search(ci_phones.begin(), - ci_phones.end(), - central_phone); - EventType evec; - for (int j = 0; j < N; j++) { - int phone; - if (i + j >= 0 && i + j < static_cast(split_alignment.size())) - phone = trans_model.TransitionIdToPhone(split_alignment[i+j][0]); - else - phone = 0; // ContextDependency class uses 0 to mean "out of window". - - if (is_ctx_dep || j == P) - evec.push_back(std::make_pair(static_cast(j), static_cast(phone))); - } - for (int j = 0; j < static_cast(split_alignment[i+P].size());j++) { - // for central phone of this window... - EventType evec_more(evec); - int32 pdf_id = trans_model.TransitionIdToPdf(split_alignment[i+P][j]), - pdf_class = trans_model.TransitionIdToPdfClass(split_alignment[i+P][j]); - // pdf_id represents the acoustic state in the current model. - // pdf_class will normally by 0, 1 or 2 for a 3-state HMM. - - std::pair pr(kPdfClass, pdf_class); - evec_more.push_back(pr); - std::sort(evec_more.begin(), evec_more.end()); // these must be sorted! - if (stats->count(evec_more) == 0) - (*stats)[evec_more] = new SgmmClusterable(am_sgmm, H); - - am_sgmm.ComputePerFrameVars(features.Row(t), gselect[t], per_spk_vars, 0.0, - &per_frame_vars); - BaseFloat weight = 1.0; // weight is one, since we have alignment. - (*stats)[evec_more]->Accumulate(per_frame_vars, pdf_id, weight); - t++; - } - } - } - KALDI_ASSERT(t == static_cast(alignment.size())); - return true; -} - -void SgmmClusterable::ComputeH() { - // We're computing my_H_, as a weighted sum of H_, with gamma_ as the - // weights. - KALDI_ASSERT(!H_.empty() && my_H_.NumRows() == 0); // Invalid to call this if H_ empty, - // or my_H_ already set up. - my_H_.Resize(H_[0].NumRows()); // will initialize to zero. - KALDI_ASSERT(static_cast(H_.size()) == gamma_.Dim()); - for (int32 i = 0; i < gamma_.Dim(); i++) { - double gamma = gamma_(i); - if (gamma > 0.0) my_H_.AddSp(gamma, H_[i]); - } -} - - -} // end namespace kaldi diff --git a/src/sgmm/sgmm-clusterable.h b/src/sgmm/sgmm-clusterable.h deleted file mode 100644 index 9a44fce1512..00000000000 --- a/src/sgmm/sgmm-clusterable.h +++ /dev/null @@ -1,112 +0,0 @@ -// sgmm/sgmm-clusterable.h - -// Copyright 2012 Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_SGMM_SGMM_CLUSTERABLE_H_ -#define KALDI_SGMM_SGMM_CLUSTERABLE_H_ - -#include -#include - -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "itf/clusterable-itf.h" - -namespace kaldi { - -/// This header defines an object that can be used to create decision -/// trees using a form of SGMM statistics. It is analogous to the -/// GaussClusterable object, but uses the SGMM. The auxiliary function -/// it uses is related to the normal SGMM auxiliary function, but for -/// efficiency it uses a simpler model on the weights, which is equivalent -/// to assuming the weights w_{ji} [there no index m since we assume one -/// mixture per state!] are directly estimated using ML, instead of being -/// computed from v_j and w_i as in the actual SGMM. - -class SgmmClusterable: public Clusterable { - public: - SgmmClusterable(const AmSgmm &sgmm, - const std::vector< SpMatrix > &H): // H can be empty vector - // at initialization. Used to cache something from the model. - sgmm_(sgmm), - H_(H), - gamma_(sgmm.NumGauss()), - y_(sgmm.PhoneSpaceDim()) { } - virtual std::string Type() const { return "sgmm"; } - - /// compare with the Accumulate function of MleAmSgmmAccs - /// Note: the pdf-index j, relating to the original SGMM - /// in sgmm_, is only needed to select the right vector to - /// compute Gaussian-level alignments with. - void Accumulate(const SgmmPerFrameDerivedVars &frame_vars, - int32 j, - BaseFloat weight); - - virtual BaseFloat Objf() const; - virtual void SetZero(); - virtual void Add(const Clusterable &other_in); - virtual void Sub(const Clusterable &other_in); - virtual BaseFloat Normalizer() const; - virtual Clusterable *Copy() const; - virtual void Scale(BaseFloat f); - virtual void Write(std::ostream &os, bool binary) const; - virtual Clusterable *ReadNew(std::istream &is, bool binary) const; - virtual ~SgmmClusterable() {} - - const Vector &gamma () const { return gamma_; } - const Vector &y() const { return y_; } - private: - void ComputeH(); // Compute the quantity my_H_, from gamma_ and H_. - - const AmSgmm &sgmm_; // Reference to the SGMM object, needed to compute - // objective functions. - const std::vector< SpMatrix > &H_; // Reference to a vector of SpMatrix which - // should have been computed from the model using ComputeH(). Needed for Objf() function. - Vector gamma_; // Occupation counts for each Gaussian index. Comparable - // to the gamma_{jmi} statistics in the SGMM paper. - Vector y_; // Statistics comparable to the y_{jm} statistics in the SGMM - // paper. - - SpMatrix my_H_; // This quantity is a weighted sum over the H quantities, - // weighted by gamma_(i). It's only nonempty if the H_ matrix is nonempty. - // This quantity is never written to disk; it is to be viewed as a kind of - // cache, present only for purposes of fast objective-function computation. -}; - - -/// Comparable to AccumulateTreeStats, but this version -/// accumulates stats of type SgmmClusterable. Returns -/// true on success. -bool AccumulateSgmmTreeStats(const TransitionModel &trans_model, - const AmSgmm &am_sgmm, - const std::vector > &H, // this is a ref. to temp. - // storage needed in the clusterable class... can be empty - // during accumulation as it doesn't call Objf(). - int N, // context window size. - int P, // central position. - const std::vector &ci_phones, // must be sorted - const std::vector &alignment, - const std::vector > &gselect, - const SgmmPerSpkDerivedVars &per_spk_vars, - const Matrix &features, - std::map *stats); - - -} // end namespace kaldi - -#endif // KALDI_SGMM_SGMM_CLUSTERABLE_H_ diff --git a/src/sgmmbin/Makefile b/src/sgmmbin/Makefile deleted file mode 100644 index 556001910e1..00000000000 --- a/src/sgmmbin/Makefile +++ /dev/null @@ -1,31 +0,0 @@ - -all: -EXTRA_CXXFLAGS = -Wno-sign-compare -include ../kaldi.mk - -BINFILES = init-ubm sgmm-align-compiled sgmm-acc-stats-ali \ - sgmm-sum-accs sgmm-est sgmm-decode-faster sgmm-init sgmm-gselect \ - sgmm-est-fmllr sgmm-acc-stats sgmm-est-spkvecs sgmm-post-to-gpost \ - sgmm-acc-stats-gpost sgmm-est-spkvecs-gpost sgmm-comp-prexform \ - sgmm-est-fmllr-gpost sgmm-acc-fmllrbasis-ali sgmm-est-fmllrbasis \ - sgmm-calc-distances sgmm-normalize sgmm-latgen-simple \ - sgmm-latgen-faster sgmm-rescore-lattice sgmm-copy sgmm-write-ubm \ - sgmm-mixup sgmm-info sgmm-acc-tree-stats sgmm-sum-tree-stats \ - sgmm-build-tree sgmm-cluster-phones sgmm-init-from-tree-stats \ - sgmm-est-ebw sgmm-acc-stats2 sgmm-est-multi - -OBJFILES = - - - -TESTFILES = - - -ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a \ - ../fstext/kaldi-fstext.a ../sgmm/kaldi-sgmm.a ../hmm/kaldi-hmm.a \ - ../feat/kaldi-feat.a ../transform/kaldi-transform.a \ - ../gmm/kaldi-gmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \ - ../thread/kaldi-thread.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a - -include ../makefiles/default_rules.mk diff --git a/src/sgmmbin/init-ubm.cc b/src/sgmmbin/init-ubm.cc deleted file mode 100644 index 3a0d398b7f6..00000000000 --- a/src/sgmmbin/init-ubm.cc +++ /dev/null @@ -1,95 +0,0 @@ -// sgmmbin/init-ubm.cc - -// Copyright 2009-2011 Saarland University -// Author: Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "util/kaldi-io.h" -#include "gmm/diag-gmm.h" -#include "gmm/full-gmm.h" -#include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" - - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - typedef kaldi::BaseFloat BaseFloat; - - const char *usage = - "Cluster the Gaussians in a diagonal-GMM acoustic model\n" - "to a single full-covariance or diagonal-covariance GMM.\n" - "Usage: init-ubm [options] \n"; - - bool binary_write = true, fullcov_ubm = true; - kaldi::ParseOptions po(usage); - po.Register("binary", &binary_write, "Write output in binary mode"); - po.Register("fullcov-ubm", &fullcov_ubm, "Write out full covariance UBM."); - kaldi::UbmClusteringOptions ubm_opts; - ubm_opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 3) { - po.PrintUsage(); - exit(1); - } - ubm_opts.Check(); - - std::string model_in_filename = po.GetArg(1), - occs_in_filename = po.GetArg(2), - gmm_out_filename = po.GetArg(3); - - kaldi::AmDiagGmm am_gmm; - kaldi::TransitionModel trans_model; - { - bool binary_read; - kaldi::Input ki(model_in_filename, &binary_read); - trans_model.Read(ki.Stream(), binary_read); - am_gmm.Read(ki.Stream(), binary_read); - } - - kaldi::Vector state_occs; - state_occs.Resize(am_gmm.NumPdfs()); - { - bool binary_read; - kaldi::Input ki(occs_in_filename, &binary_read); - state_occs.Read(ki.Stream(), binary_read); - } - - kaldi::DiagGmm ubm; - ClusterGaussiansToUbm(am_gmm, state_occs, ubm_opts, &ubm); - if (fullcov_ubm) { - kaldi::FullGmm full_ubm; - full_ubm.CopyFromDiagGmm(ubm); - kaldi::Output ko(gmm_out_filename, binary_write); - full_ubm.Write(ko.Stream(), binary_write); - } else { - kaldi::Output ko(gmm_out_filename, binary_write); - ubm.Write(ko.Stream(), binary_write); - } - - KALDI_LOG << "Written UBM to " << gmm_out_filename; - } catch(const std::exception &e) { - std::cerr << e.what() << '\n'; - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-acc-fmllrbasis-ali.cc b/src/sgmmbin/sgmm-acc-fmllrbasis-ali.cc deleted file mode 100644 index 3c33e47dde2..00000000000 --- a/src/sgmmbin/sgmm-acc-fmllrbasis-ali.cc +++ /dev/null @@ -1,216 +0,0 @@ -// sgmmbin/sgmm-acc-fmllrbasis-ali.cc - -// Copyright 2009-2011 Saarland University -// Author: Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "util/common-utils.h" -#include "hmm/transition-model.h" -#include "sgmm/am-sgmm.h" -#include "sgmm/fmllr-sgmm.h" - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - const char *usage = - "Accumulate stats for FMLLR bases training.\n" - "Usage: sgmm-acc-fmllrbasis-ali [options] " - " \n" - "e.g.: sgmm-acc-fmllrbasis-ali 1.mdl scp:train.scp ark:1.ali 1.acc\n"; - - ParseOptions po(usage); - bool binary_write = true; - std::string gselect_rspecifier, spkvecs_rspecifier, silphones_str; - BaseFloat sil_weight = 0.0; - kaldi::SgmmGselectConfig sgmm_opts; - po.Register("binary", &binary_write, "Write output in binary mode"); - po.Register("gselect", &gselect_rspecifier, - "Precomputed Gaussian indices (rspecifier)"); - po.Register("spk-vecs", &spkvecs_rspecifier, - "Speaker vectors to use during aligment (rspecifier)"); - po.Register("sil-phone-list", &silphones_str, - "Colon-separated list of phones (to weigh differently)"); - po.Register("sil-weight", &sil_weight, "Weight for \"silence\" phones."); - sgmm_opts.Register(&po); - po.Read(argc, argv); - - if (po.NumArgs() != 5) { - po.PrintUsage(); - exit(1); - } - - std::string model_filename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - alignments_rspecifier = po.GetArg(3), - spk2utt_rspecifier = po.GetArg(4), - accs_wxfilename = po.GetArg(5); - - typedef kaldi::int32 int32; - - AmSgmm am_sgmm; - TransitionModel trans_model; - SgmmFmllrGlobalParams fmllr_globals; - { - bool binary; - Input ki(model_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - fmllr_globals.Read(ki.Stream(), binary); - } - - SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - RandomAccessInt32VectorReader alignments_reader(alignments_rspecifier); - - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - - RandomAccessBaseFloatVectorReader spkvecs_reader(spkvecs_rspecifier); - - std::vector silence_phones; - if (!SplitStringToIntegers(silphones_str, ":", false, &silence_phones)) { - KALDI_ERR << "Silence-phones string has wrong format " - << silphones_str; - } - ConstIntegerSet silence_set(silence_phones); // faster lookup. - - - kaldi::SgmmPerFrameDerivedVars per_frame_vars; - SpMatrix fmllr_grad_scatter; - int32 dim = am_sgmm.FeatureDim(); - fmllr_grad_scatter.Resize(dim * (dim + 1), kSetZero); - FmllrSgmmAccs spk_stats; - spk_stats.Init(dim, am_sgmm.NumGauss()); - - double tot_like = 0.0, tot_t = 0.0; - int32 num_done = 0, num_no_alignment = 0, num_other_error = 0; - - for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { - spk_stats.SetZero(); - string spk = spk2utt_reader.Key(); - const std::vector &uttlist = spk2utt_reader.Value(); - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(spk)) { - spk_vars.v_s = spkvecs_reader.Value(spk); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << spk; - num_other_error++; - continue; - } - } // else spk_vars is "empty" - - for (size_t i = 0; i < uttlist.size(); i++) { - std::string utt = uttlist[i]; - if (!alignments_reader.HasKey(utt)) { - num_no_alignment++; - continue; - } - const std::vector &alignment = alignments_reader.Value(utt); - - if (!feature_reader.HasKey(utt)) { - KALDI_WARN << "Did not find features for utterance " << utt; - num_other_error++; - continue; - } - const Matrix &feats = feature_reader.Value(utt); - - if (alignment.size() != feats.NumRows()) { - KALDI_WARN << "Alignments has wrong size "<< (alignment.size()) << - " vs. "<< (feats.NumRows()); - num_other_error++; - continue; - } - - bool have_gselect = false; - if (gselect_reader.IsOpen()) { - if (gselect_reader.HasKey(utt)) { - have_gselect = (gselect_reader.Value(utt).size() == feats.NumRows()); - if (!have_gselect) - KALDI_WARN << "Gaussian-selection info available for utterance " - << utt << " has wrong size."; - } else { - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt; - } - } - - const std::vector > *gselect = - (have_gselect ? &gselect_reader.Value(utt) : NULL); - double file_like = 0.0, file_t = 0.0; - - - for (size_t i = 0; i < alignment.size(); i++) { - int32 tid = alignment[i]; // transition identifier. - int32 pdf_id = trans_model.TransitionIdToPdf(tid), - phone = trans_model.TransitionIdToPhone(tid); - BaseFloat weight = 1.0; - if (silence_set.count(phone) != 0) { // is a silence. - if (sil_weight > 0.0) - weight = sil_weight; - else - continue; - } - - std::vector this_gselect; - if (gselect != NULL) - this_gselect = (*gselect)[i]; - else - am_sgmm.GaussianSelection(sgmm_opts, feats.Row(i), &this_gselect); - am_sgmm.ComputePerFrameVars(feats.Row(i), this_gselect, spk_vars, 0.0, - &per_frame_vars); - file_like += - spk_stats.Accumulate(am_sgmm, spk_vars, feats.Row(i), - per_frame_vars, pdf_id, weight); - file_t += weight; - } // end looping over all the frames in the utterance - KALDI_VLOG(1) << "Average likelihood for utterance " << utt << " is " - << (file_like/file_t) << " over " << file_t << " frames"; - tot_like += file_like; - tot_t += file_t; - num_done++; - if (num_done % 20 == 0) - KALDI_VLOG(1) << "After " << num_done << " utterances: Average " - << "likelihood per frame = " << (tot_like/tot_t) - << ", over " << tot_t << " frames"; - } // end looping over all utterance for a given speaker - spk_stats.AccumulateForFmllrSubspace(am_sgmm, fmllr_globals, &fmllr_grad_scatter); - } // end looping over all speakers - - KALDI_LOG << "Done " << num_done << " files, " << num_no_alignment - << " with no alignments, " << num_other_error - << " with other errors."; - - KALDI_LOG << "Overall likelihood per frame frame = " << (tot_like/tot_t) - << " over " << tot_t << " frames."; - - { - Output ko(accs_wxfilename, binary_write); - fmllr_grad_scatter.Write(ko.Stream(), binary_write); - KALDI_LOG << "Written accs to: " << accs_wxfilename; - } - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-acc-stats-ali.cc b/src/sgmmbin/sgmm-acc-stats-ali.cc deleted file mode 100644 index 99371fea829..00000000000 --- a/src/sgmmbin/sgmm-acc-stats-ali.cc +++ /dev/null @@ -1,191 +0,0 @@ -// sgmmbin/sgmm-acc-stats-ali.cc - -// Copyright 2009-2012 Saarland University (author: Arnab Ghoshal); -// Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "sgmm/estimate-am-sgmm.h" - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - const char *usage = - "Accumulate stats for SGMM training.\n" - "Usage: sgmm-acc-stats-ali [options] " - " \n" - "e.g.: sgmm-acc-stats-ali 1.mdl 1.ali scp:train.scp ark:1.ali 1.acc\n"; - - ParseOptions po(usage); - bool binary = true; - std::string gselect_rspecifier, spkvecs_rspecifier, utt2spk_rspecifier; - std::string update_flags_str = "vMNwcSt"; - BaseFloat rand_prune = 1.0e-05; - kaldi::SgmmGselectConfig sgmm_opts; - po.Register("binary", &binary, "Write output in binary mode"); - po.Register("gselect", &gselect_rspecifier, "Precomputed Gaussian indices (rspecifier)"); - po.Register("spk-vecs", &spkvecs_rspecifier, "Speaker vectors (rspecifier)"); - po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); - po.Register("rand-prune", &rand_prune, "Randomized pruning threshold for posteriors"); - po.Register("update-flags", &update_flags_str, "Which SGMM parameters to update: subset of vMNwcS."); - sgmm_opts.Register(&po); - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - kaldi::SgmmUpdateFlagsType acc_flags = StringToSgmmUpdateFlags(update_flags_str); - - std::string model_filename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - alignments_rspecifier = po.GetArg(3), - accs_wxfilename = po.GetArg(4); - - using namespace kaldi; - typedef kaldi::int32 int32; - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - Vector transition_accs; - if (acc_flags & kaldi::kSgmmTransitions) - trans_model.InitStats(&transition_accs); - MleAmSgmmAccs sgmm_accs(rand_prune); - sgmm_accs.ResizeAccumulators(am_sgmm, acc_flags); - - double tot_like = 0.0; - kaldi::int64 tot_t = 0; - - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - RandomAccessInt32VectorReader alignments_reader(alignments_rspecifier); - - RandomAccessInt32VectorVectorReader gselect_reader; - if (!gselect_rspecifier.empty() && !gselect_reader.Open(gselect_rspecifier)) - KALDI_ERR << "Unable to open stream for gaussian-selection indices"; - - RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier, - utt2spk_rspecifier); - - kaldi::SgmmPerFrameDerivedVars per_frame_vars; - - int32 num_done = 0, num_no_alignment = 0, num_other_error = 0; - for (; !feature_reader.Done(); feature_reader.Next()) { - std::string utt = feature_reader.Key(); - if (!alignments_reader.HasKey(utt)) { - num_no_alignment++; - } else { - const Matrix &mat = feature_reader.Value(); - const std::vector &alignment = alignments_reader.Value(utt); - - bool have_gselect = !gselect_rspecifier.empty() - && gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == mat.NumRows(); - if (!gselect_rspecifier.empty() && !have_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)\n"; - std::vector > empty_gselect; - const std::vector > *gselect = - (have_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - num_other_error++; - continue; - } - } // else spk_vars is "empty" - - if (alignment.size() != mat.NumRows()) { - KALDI_WARN << "Alignments has wrong size "<< (alignment.size()) << - " vs. "<< (mat.NumRows()); - num_other_error++; - continue; - } - - num_done++; - BaseFloat tot_like_this_file = 0.0; - - for (size_t i = 0; i < alignment.size(); i++) { - int32 tid = alignment[i], // transition identifier. - pdf_id = trans_model.TransitionIdToPdf(tid); - if (acc_flags & kaldi::kSgmmTransitions) - trans_model.Accumulate(1.0, tid, &transition_accs); - std::vector this_gselect; - if (!gselect->empty()) this_gselect = (*gselect)[i]; - else am_sgmm.GaussianSelection(sgmm_opts, mat.Row(i), &this_gselect); - am_sgmm.ComputePerFrameVars(mat.Row(i), this_gselect, spk_vars, 0.0, - &per_frame_vars); - tot_like_this_file += sgmm_accs.Accumulate(am_sgmm, per_frame_vars, - spk_vars.v_s, pdf_id, 1.0, - acc_flags); - } - - sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars.v_s); // no harm doing it per utterance. - - KALDI_VLOG(2) << "Average like for this file is " - << (tot_like_this_file/alignment.size()) << " over " - << alignment.size() <<" frames."; - tot_like += tot_like_this_file; - tot_t += alignment.size(); - if (num_done % 50 == 0) { - KALDI_LOG << "Processed " << num_done << " utterances; for utterance " - << utt << " avg. like is " - << (tot_like_this_file/alignment.size()) - << " over " << alignment.size() <<" frames."; - } - } - } - KALDI_LOG << "Overall like per frame (Gaussian only) = " - << (tot_like/tot_t) << " over " << tot_t << " frames."; - - KALDI_LOG << "Done " << num_done << " files, " << num_no_alignment - << " with no alignments, " << num_other_error - << " with other errors."; - - { - Output ko(accs_wxfilename, binary); - // TODO(arnab): Ideally, we shouldn't be writing transition accs if not - // asked for, but that will complicate reading later. To be fixed? - transition_accs.Write(ko.Stream(), binary); - sgmm_accs.Write(ko.Stream(), binary); - } - KALDI_LOG << "Written accs."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-acc-stats-gpost.cc b/src/sgmmbin/sgmm-acc-stats-gpost.cc deleted file mode 100644 index 844afff4360..00000000000 --- a/src/sgmmbin/sgmm-acc-stats-gpost.cc +++ /dev/null @@ -1,174 +0,0 @@ -// sgmmbin/sgmm-acc-stats-gpost.cc - -// Copyright 2009-2012 Saarland University (Author: Arnab Ghoshal) -// Microsoft Corporation; -// Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "sgmm/estimate-am-sgmm.h" - - - - -int main(int argc, char *argv[]) { - using namespace kaldi; - try { - const char *usage = - "Accumulate stats for SGMM training, given Gaussian-level posteriors\n" - "Usage: sgmm-acc-stats-gpost [options] " - " \n" - "e.g.: sgmm-acc-stats-gpost 1.mdl 1.ali scp:train.scp ark, s, cs:- 1.acc\n"; - - ParseOptions po(usage); - bool binary = true; - std::string spkvecs_rspecifier, utt2spk_rspecifier; - std::string update_flags_str = "vMNwcSt"; - BaseFloat rand_prune = 1.0e-05; - - po.Register("binary", &binary, "Write output in binary mode"); - po.Register("spk-vecs", &spkvecs_rspecifier, "Speaker vectors (rspecifier)"); - po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); - po.Register("rand-prune", &rand_prune, "Pruning threshold for posteriors"); - po.Register("update-flags", &update_flags_str, "Which SGMM parameters to update: subset of vMNwcS."); - po.Read(argc, argv); - - kaldi::SgmmUpdateFlagsType acc_flags = StringToSgmmUpdateFlags(update_flags_str); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - std::string model_filename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - gpost_rspecifier = po.GetArg(3), - accs_wxfilename = po.GetArg(4); - - using namespace kaldi; - typedef kaldi::int32 int32; - - // Initialize the readers before the model, as this can avoid - // crashes on systems with low virtual memory. - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - RandomAccessSgmmGauPostReader gpost_reader(gpost_rspecifier); - RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier, - utt2spk_rspecifier); - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - Vector transition_accs; - if (acc_flags & kaldi::kSgmmTransitions) - trans_model.InitStats(&transition_accs); - MleAmSgmmAccs sgmm_accs(rand_prune); - sgmm_accs.ResizeAccumulators(am_sgmm, acc_flags); - - double tot_t = 0.0; - kaldi::SgmmPerFrameDerivedVars per_frame_vars; - - int32 num_done = 0, num_no_posterior = 0, num_other_error = 0; - for (; !feature_reader.Done(); feature_reader.Next()) { - std::string utt = feature_reader.Key(); - if (!gpost_reader.HasKey(utt)) { - num_no_posterior++; - } else { - const Matrix &mat = feature_reader.Value(); - const SgmmGauPost &gpost = gpost_reader.Value(utt); - - if (gpost.size() != mat.NumRows()) { - KALDI_WARN << "Alignments has wrong size "<< (gpost.size()) << - " vs. "<< (mat.NumRows()); - num_other_error++; - continue; - } - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - num_other_error++; - continue; - } - } // else spk_vars is "empty" - - num_done++; - BaseFloat tot_weight = 0.0; - - for (size_t i = 0; i < gpost.size(); i++) { - const std::vector &gselect = gpost[i].gselect; - am_sgmm.ComputePerFrameVars(mat.Row(i), gselect, spk_vars, 0.0, - &per_frame_vars); - - for (size_t j = 0; j < gpost[i].tids.size(); j++) { - int32 tid = gpost[i].tids[j], // transition identifier. - pdf_id = trans_model.TransitionIdToPdf(tid); - - BaseFloat weight = gpost[i].posteriors[j].Sum(); - if (acc_flags & kaldi::kSgmmTransitions) - trans_model.Accumulate(weight, tid, &transition_accs); - sgmm_accs.AccumulateFromPosteriors(am_sgmm, per_frame_vars, - gpost[i].posteriors[j], - spk_vars.v_s, - pdf_id, acc_flags); - tot_weight += weight; - } - } - - sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars.v_s); // no harm doing it per utterance. - - tot_t += tot_weight; - if (num_done % 50 == 0) - KALDI_LOG << "Processed " << num_done << " utterances"; - } - } - KALDI_LOG << "Overall number of frames is " << tot_t; - - KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior - << " with no posteriors, " << num_other_error - << " with other errors."; - - { - Output ko(accs_wxfilename, binary); - // TODO(arnab): Ideally, we shouldn't be writing transition accs if not - // asked for, but that will complicate reading later. To be fixed? - transition_accs.Write(ko.Stream(), binary); - sgmm_accs.Write(ko.Stream(), binary); - } - KALDI_LOG << "Written accs."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-acc-stats.cc b/src/sgmmbin/sgmm-acc-stats.cc deleted file mode 100644 index 7ea3a8b13be..00000000000 --- a/src/sgmmbin/sgmm-acc-stats.cc +++ /dev/null @@ -1,211 +0,0 @@ -// sgmmbin/sgmm-acc-stats.cc - -// Copyright 2009-2011 Saarland University (Author: Arnab Ghoshal), -// 2014 Guoguo Chen - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "sgmm/estimate-am-sgmm.h" -#include "hmm/posterior.h" - - -int main(int argc, char *argv[]) { - using namespace kaldi; - try { - const char *usage = - "Accumulate stats for SGMM training.\n" - "Usage: sgmm-acc-stats [options] " - " \n" - "e.g.: sgmm-acc-stats 1.mdl 1.ali scp:train.scp 'ark:ali-to-post 1.ali ark:-|' 1.acc\n"; - - ParseOptions po(usage); - bool binary = true; - std::string gselect_rspecifier, spkvecs_rspecifier, utt2spk_rspecifier; - std::string update_flags_str = "vMNwcSt"; - BaseFloat rand_prune = 1.0e-05; - SgmmGselectConfig sgmm_opts; - po.Register("binary", &binary, "Write output in binary mode"); - po.Register("gselect", &gselect_rspecifier, "Precomputed Gaussian indices (rspecifier)"); - po.Register("spk-vecs", &spkvecs_rspecifier, "Speaker vectors (rspecifier)"); - po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); - po.Register("rand-prune", &rand_prune, "Pruning threshold for posteriors"); - po.Register("update-flags", &update_flags_str, "Which SGMM parameters to accumulate " - "stats for: subset of vMNwcS."); - sgmm_opts.Register(&po); - - po.Read(argc, argv); - - kaldi::SgmmUpdateFlagsType acc_flags = StringToSgmmUpdateFlags(update_flags_str); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - std::string model_filename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - posteriors_rspecifier = po.GetArg(3), - accs_wxfilename = po.GetArg(4); - - using namespace kaldi; - typedef kaldi::int32 int32; - - // Initialize the readers before the model, as the model can - // be large, and we don't want to call fork() after reading it if - // virtual memory may be low. - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier); - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier, - utt2spk_rspecifier); - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - Vector transition_accs; - if (acc_flags & kaldi::kSgmmTransitions) - trans_model.InitStats(&transition_accs); - MleAmSgmmAccs sgmm_accs(rand_prune); - sgmm_accs.ResizeAccumulators(am_sgmm, acc_flags); - - double tot_like = 0.0; - double tot_t = 0; - - kaldi::SgmmPerFrameDerivedVars per_frame_vars; - - int32 num_done = 0, num_no_posterior = 0, num_other_error = 0; - for (; !feature_reader.Done(); feature_reader.Next()) { - std::string utt = feature_reader.Key(); - if (!posteriors_reader.HasKey(utt)) { - num_no_posterior++; - } else { - const Matrix &mat = feature_reader.Value(); - const Posterior &posterior = posteriors_reader.Value(utt); - - bool have_gselect = !gselect_rspecifier.empty() - && gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == mat.NumRows(); - if (!gselect_rspecifier.empty() && !have_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - std::vector > empty_gselect; - const std::vector > *gselect = - (have_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - - if (posterior.size() != mat.NumRows()) { - KALDI_WARN << "Alignments has wrong size "<< (posterior.size()) << - " vs. "<< (mat.NumRows()); - num_other_error++; - continue; - } - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - num_other_error++; - continue; - } - } // else spk_vars is "empty" - - num_done++; - BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0; - - Posterior pdf_posterior; - ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior); - for (size_t i = 0; i < posterior.size(); i++) { - if (posterior[i].empty()) - continue; - std::vector this_gselect; - if (!gselect->empty()) this_gselect = (*gselect)[i]; - else am_sgmm.GaussianSelection(sgmm_opts, mat.Row(i), &this_gselect); - am_sgmm.ComputePerFrameVars(mat.Row(i), this_gselect, spk_vars, 0.0, - &per_frame_vars); - - // Accumulates for SGMM. - for (size_t j = 0; j < pdf_posterior[i].size(); j++) { - int32 pdf_id = pdf_posterior[i][j].first; - BaseFloat weight = pdf_posterior[i][j].second; - tot_like_this_file += sgmm_accs.Accumulate(am_sgmm, per_frame_vars, - spk_vars.v_s, pdf_id, - weight, acc_flags) - * weight; - tot_weight += weight; - } - - // Accumulates for transitions. - for (size_t j = 0; j < posterior[i].size(); j++) { - if (acc_flags & kaldi::kSgmmTransitions) { - int32 tid = posterior[i][j].first; - BaseFloat weight = posterior[i][j].second; - trans_model.Accumulate(weight, tid, &transition_accs); - } - } - } - - sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars.v_s); // no harm doing it per utterance. - - KALDI_VLOG(2) << "Average like for this file is " - << (tot_like_this_file/tot_weight) << " over " - << tot_weight <<" frames."; - tot_like += tot_like_this_file; - tot_t += tot_weight; - if (num_done % 50 == 0) { - KALDI_LOG << "Processed " << num_done << " utterances; for utterance " - << utt << " avg. like is " - << (tot_like_this_file/tot_weight) - << " over " << tot_weight <<" frames."; - } - } - } - KALDI_LOG << "Overall like per frame (Gaussian only) = " - << (tot_like/tot_t) << " over " << tot_t << " frames."; - - KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior - << " with no posteriors, " << num_other_error - << " with other errors."; - - { - Output ko(accs_wxfilename, binary); - // TODO(arnab): Ideally, we shouldn't be writing transition accs if not - // asked for, but that will complicate reading later. To be fixed? - transition_accs.Write(ko.Stream(), binary); - sgmm_accs.Write(ko.Stream(), binary); - } - KALDI_LOG << "Written accs."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-acc-stats2.cc b/src/sgmmbin/sgmm-acc-stats2.cc deleted file mode 100644 index 2f835b727d1..00000000000 --- a/src/sgmmbin/sgmm-acc-stats2.cc +++ /dev/null @@ -1,217 +0,0 @@ -// sgmmbin/sgmm-acc-stats2.cc - -// Copyright 2009-2012 Saarland University (Author: Arnab Ghoshal), -// Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "sgmm/estimate-am-sgmm.h" -#include "hmm/posterior.h" - - -int main(int argc, char *argv[]) { - using namespace kaldi; - try { - const char *usage = - "Accumulate numerator and denominator stats for discriminative training\n" - "of SGMMs (input is posteriors of mixed sign)\n" - "Usage: sgmm-acc-stats2 [options] " - " \n" - "e.g.: sgmm-acc-stats2 1.mdl 1.ali scp:train.scp ark:1.posts num.acc den.acc\n"; - - ParseOptions po(usage); - bool binary = true; - std::string gselect_rspecifier, spkvecs_rspecifier, utt2spk_rspecifier; - std::string update_flags_str = "vMNwcSt"; - BaseFloat rand_prune = 1.0e-05; - SgmmGselectConfig sgmm_opts; - po.Register("binary", &binary, "Write output in binary mode"); - po.Register("gselect", &gselect_rspecifier, "Precomputed Gaussian indices (rspecifier)"); - po.Register("spk-vecs", &spkvecs_rspecifier, "Speaker vectors (rspecifier)"); - po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); - po.Register("rand-prune", &rand_prune, "Pruning threshold for posteriors"); - po.Register("update-flags", &update_flags_str, "Which SGMM parameters to accumulate " - "stats for: subset of vMNwcS."); - sgmm_opts.Register(&po); - - po.Read(argc, argv); - - kaldi::SgmmUpdateFlagsType acc_flags = StringToSgmmUpdateFlags(update_flags_str); - - if (po.NumArgs() != 5) { - po.PrintUsage(); - exit(1); - } - - std::string model_filename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - posteriors_rspecifier = po.GetArg(3), - num_accs_wxfilename = po.GetArg(4), - den_accs_wxfilename = po.GetArg(5); - - - using namespace kaldi; - typedef kaldi::int32 int32; - typedef kaldi::int64 int64; - - // Initialize the readers before the model, as the model can - // be large, and we don't want to call fork() after reading it if - // virtual memory may be low. - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier); - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier, - utt2spk_rspecifier); - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - Vector num_transition_accs, den_transition_accs; - if (acc_flags & kaldi::kSgmmTransitions) { - trans_model.InitStats(&num_transition_accs); - trans_model.InitStats(&den_transition_accs); - } - MleAmSgmmAccs num_sgmm_accs(rand_prune), den_sgmm_accs(rand_prune); - num_sgmm_accs.ResizeAccumulators(am_sgmm, acc_flags); - den_sgmm_accs.ResizeAccumulators(am_sgmm, acc_flags); - - double tot_like = 0.0, tot_weight = 0.0, tot_abs_weight = 0.0; - int64 tot_frames = 0; - - kaldi::SgmmPerFrameDerivedVars per_frame_vars; - - int32 num_done = 0, num_no_posterior = 0, num_other_error = 0; - for (; !feature_reader.Done(); feature_reader.Next()) { - std::string utt = feature_reader.Key(); - if (!posteriors_reader.HasKey(utt)) { - num_no_posterior++; - } else { - const Matrix &mat = feature_reader.Value(); - const Posterior &posterior = posteriors_reader.Value(utt); - - bool have_gselect = !gselect_rspecifier.empty() - && gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == mat.NumRows(); - if (!gselect_rspecifier.empty() && !have_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - std::vector > empty_gselect; - const std::vector > *gselect = - (have_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - - if (posterior.size() != mat.NumRows()) { - KALDI_WARN << "Alignments has wrong size "<< (posterior.size()) << - " vs. "<< (mat.NumRows()); - num_other_error++; - continue; - } - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - num_other_error++; - continue; - } - } // else spk_vars is "empty" - - num_done++; - BaseFloat tot_like_this_file = 0.0, tot_weight_this_file = 0.0, - tot_abs_weight_this_file = 0.0; - - for (size_t i = 0; i < posterior.size(); i++) { - std::vector this_gselect; - if (!gselect->empty()) this_gselect = (*gselect)[i]; - else am_sgmm.GaussianSelection(sgmm_opts, mat.Row(i), &this_gselect); - am_sgmm.ComputePerFrameVars(mat.Row(i), this_gselect, spk_vars, 0.0, - &per_frame_vars); - - for (size_t j = 0; j < posterior[i].size(); j++) { - int32 tid = posterior[i][j].first, // transition identifier. - pdf_id = trans_model.TransitionIdToPdf(tid); - BaseFloat weight = posterior[i][j].second, - abs_weight = std::abs(weight); - - if (acc_flags & kaldi::kSgmmTransitions) { - trans_model.Accumulate(abs_weight, tid, weight > 0 ? - &num_transition_accs : &den_transition_accs); - } - tot_like_this_file += - (weight > 0 ? num_sgmm_accs : den_sgmm_accs).Accumulate( - am_sgmm, per_frame_vars, spk_vars.v_s, pdf_id, - abs_weight, acc_flags) - * weight; - tot_weight_this_file += weight; - tot_abs_weight_this_file += abs_weight; - } - } - num_sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars.v_s); // no harm doing it per utterance. - den_sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars.v_s); - - tot_like += tot_like_this_file; - tot_weight += tot_weight_this_file; - tot_abs_weight += tot_abs_weight_this_file; - tot_frames += posterior.size(); - if (num_done % 50 == 0) - KALDI_LOG << "Processed " << num_done << " utterances."; - } - } - KALDI_LOG << "Overall weighted acoustic likelihood per frame was " - << (tot_like/tot_frames) << " over " << tot_frames << " frames; " - << "average weight per frame is " << (tot_weight/tot_frames) - << ", average abs(weight) per frame is " - << (tot_abs_weight/tot_frames); - - KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior - << " with no posteriors, " << num_other_error - << " with other errors."; - - { - Output ko(num_accs_wxfilename, binary); - // TODO(arnab): Ideally, we shouldn't be writing transition accs if not - // asked for, but that will complicate reading later. To be fixed? - num_transition_accs.Write(ko.Stream(), binary); - num_sgmm_accs.Write(ko.Stream(), binary); - } - { - Output ko(den_accs_wxfilename, binary); - den_transition_accs.Write(ko.Stream(), binary); - den_sgmm_accs.Write(ko.Stream(), binary); - } - KALDI_LOG << "Written accs."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-acc-tree-stats.cc b/src/sgmmbin/sgmm-acc-tree-stats.cc deleted file mode 100644 index a63a4ae6f5f..00000000000 --- a/src/sgmmbin/sgmm-acc-tree-stats.cc +++ /dev/null @@ -1,185 +0,0 @@ -// sgmmbin/sgmm-acc-tree-stats.cc - -// Copyright 2012 Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "tree/context-dep.h" -#include "tree/build-tree-utils.h" -#include "sgmm/sgmm-clusterable.h" -#include "hmm/transition-model.h" - -int main(int argc, char *argv[]) { - using namespace kaldi; - typedef kaldi::int32 int32; - try { - const char *usage = - "Accumulate statistics for decision tree training.\n" - "This version accumulates statistics in the form of state-specific " - "SGMM stats; you need to use the program sgmm-build-tree to build " - "the tree (and sgmm-sum-tree-accs to sum the stats).\n" - "Usage: sgmm-acc-tree-stats [options] sgmm-model-in features-rspecifier " - "alignments-rspecifier [tree-accs-out]\n" - "e.g.: sgmm-acc-tree-stats --ci-phones=48:49 1.mdl scp:train.scp ark:1.ali 1.tacc\n"; - - ParseOptions po(usage); - bool binary = true; - std::string gselect_rspecifier, spkvecs_rspecifier, utt2spk_rspecifier; - string ci_phones_str; - int N = 3, P = 1; - SgmmGselectConfig sgmm_opts; - po.Register("binary", &binary, "Write output in binary mode"); - po.Register("gselect", &gselect_rspecifier, "Precomputed Gaussian indices (rspecifier)"); - po.Register("spk-vecs", &spkvecs_rspecifier, "Speaker vectors (rspecifier)"); - po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); - po.Register("ci-phones", &ci_phones_str, "Colon-separated list of integer " - "indices of context-independent phones."); - po.Register("context-width", &N, "Context window size."); - po.Register("central-position", &P, - "Central context-window position (zero-based)"); - sgmm_opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() < 3 || po.NumArgs() > 4) { - po.PrintUsage(); - exit(1); - } - - std::string sgmm_filename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - alignment_rspecifier = po.GetArg(3), - accs_wxfilename = po.GetOptArg(4); - - std::vector ci_phones; - if (ci_phones_str != "") { - SplitStringToIntegers(ci_phones_str, ":", false, &ci_phones); - std::sort(ci_phones.begin(), ci_phones.end()); - if (!IsSortedAndUniq(ci_phones) || ci_phones[0] == 0) { - KALDI_ERR << "Invalid set of ci_phones: " << ci_phones_str; - } - } - - TransitionModel trans_model; - AmSgmm am_sgmm; - std::vector > H; // Not initialized in this program-- not needed - // as we don't call Objf() from stats. - { - bool binary; - Input ki(sgmm_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - if (gselect_rspecifier.empty()) - KALDI_ERR << "--gselect option is required."; - - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - RandomAccessInt32VectorReader alignment_reader(alignment_rspecifier); - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier, - utt2spk_rspecifier); - - std::map tree_stats; - - int num_done = 0, num_err = 0; - - for (; !feature_reader.Done(); feature_reader.Next()) { - std::string utt = feature_reader.Key(); - if (!alignment_reader.HasKey(utt)) { - num_err++; - } else { - const Matrix &mat = feature_reader.Value(); - const std::vector &alignment = alignment_reader.Value(utt); - - if (!gselect_reader.HasKey(utt) || - - gselect_reader.Value(utt).size() != mat.NumRows()) { - KALDI_WARN << "No gselect information for utterance " << utt - << " (or wrong size)"; - num_err++; - continue; - } - - const std::vector > &gselect = - gselect_reader.Value(utt); - - if (alignment.size() != mat.NumRows()) { - KALDI_WARN << "Alignments has wrong size "<< (alignment.size())<<" vs. "<< (mat.NumRows()); - num_err++; - continue; - } - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - } - } // else spk_vars is "empty" - - - // The work gets done here. - if (!AccumulateSgmmTreeStats(trans_model, - am_sgmm, - H, - N, P, - ci_phones, - alignment, - gselect, - spk_vars, - mat, - &tree_stats)) { - num_err++; - } else { - num_done++; - if (num_done % 1000 == 0) - KALDI_LOG << "Processed " << num_done << " utterances."; - } - } - } - - BuildTreeStatsType stats; // Converting from a map to a vector of pairs. - - for (std::map::const_iterator iter = tree_stats.begin(); - iter != tree_stats.end(); - iter++ ) { - stats.push_back(std::make_pair(iter->first, static_cast(iter->second))); - } - tree_stats.clear(); - - { - Output ko(accs_wxfilename, binary); - WriteBuildTreeStats(ko.Stream(), binary, stats); - } - KALDI_LOG << "Accumulated stats for " << num_done << " files, " - << num_err << " failed."; - KALDI_LOG << "Number of separate stats (context-dependent states) is " - << stats.size(); - DeleteBuildTreeStats(&stats); - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-align-compiled.cc b/src/sgmmbin/sgmm-align-compiled.cc deleted file mode 100644 index feeffe78840..00000000000 --- a/src/sgmmbin/sgmm-align-compiled.cc +++ /dev/null @@ -1,179 +0,0 @@ -// sgmmbin/sgmm-align-compiled.cc - -// Copyright 2009-2011 Microsoft Corporation; Saarland University - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "hmm/hmm-utils.h" -#include "fstext/fstext-lib.h" -#include "decoder/decoder-wrappers.h" -#include "decoder/training-graph-compiler.h" -#include "sgmm/decodable-am-sgmm.h" -#include "lat/kaldi-lattice.h" // for {Compact}LatticeArc - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - using fst::SymbolTable; - using fst::VectorFst; - using fst::StdArc; - - const char *usage = - "Align features given [SGMM-based] models.\n" - "Usage: sgmm-align-compiled [options] model-in graphs-rspecifier " - "feature-rspecifier alignments-wspecifier\n" - "e.g.: sgmm-align-compiled 1.mdl ark:graphs.fsts scp:train.scp ark:1.ali\n"; - - ParseOptions po(usage); - bool binary = true; - AlignConfig align_config; - BaseFloat acoustic_scale = 1.0; - BaseFloat transition_scale = 1.0; - BaseFloat self_loop_scale = 1.0; - BaseFloat log_prune = 5.0; - - std::string gselect_rspecifier, spkvecs_rspecifier, utt2spk_rspecifier; - SgmmGselectConfig sgmm_opts; - - align_config.Register(&po); - po.Register("binary", &binary, "Write output in binary mode"); - po.Register("log-prune", &log_prune, "Pruning beam used to reduce number " - "of exp() evaluations."); - po.Register("spk-vecs", &spkvecs_rspecifier, "Speaker vectors (rspecifier)"); - po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); - po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic " - "likelihoods"); - po.Register("transition-scale", &transition_scale, "Scaling factor for " - "some transition probabilities [see also self-loop-scale]."); - po.Register("self-loop-scale", &self_loop_scale, "Scaling factor for " - "self-loop versus non-self-loop probability mass [controls " - "most transition probabilities.]"); - po.Register("gselect", &gselect_rspecifier, "Precomputed Gaussian indices " - "(rspecifier)"); - sgmm_opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - std::string model_in_filename = po.GetArg(1), - fst_rspecifier = po.GetArg(2), - feature_rspecifier = po.GetArg(3), - alignment_wspecifier = po.GetArg(4); - - TransitionModel trans_model; - AmSgmm am_sgmm; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - SequentialTableReader fst_reader(fst_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - - RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier, - utt2spk_rspecifier); - - Int32VectorWriter alignment_writer(alignment_wspecifier); - - int32 num_done = 0, num_err = 0, num_retry = 0; - double tot_like = 0.0; - kaldi::int64 frame_count = 0; - - for (; !fst_reader.Done(); fst_reader.Next()) { - std::string utt = fst_reader.Key(); - if (!feature_reader.HasKey(utt)) { - KALDI_WARN << "No features found for utterance " << utt; - num_err++; - continue; - } - VectorFst decode_fst(fst_reader.Value()); - // stops copy-on-write of the fst by deleting the fst inside the reader, - // since we're about to mutate the fst by adding transition probs. - fst_reader.FreeCurrent(); - - const Matrix &features = feature_reader.Value(utt); - if (features.NumRows() == 0) { - KALDI_WARN << "Empty features for utterance " << utt; - num_err++; - continue; - } - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - num_err++; - continue; - } - } // else spk_vars is "empty" - - bool have_gselect = !gselect_rspecifier.empty() - && gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == features.NumRows(); - if (!gselect_rspecifier.empty() && !have_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - std::vector > empty_gselect; - const std::vector > *gselect = - (have_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - - { // Add transition-probs to the FST. - std::vector disambig_syms; // empty. - AddTransitionProbs(trans_model, disambig_syms, - transition_scale, self_loop_scale, - &decode_fst); - } - - DecodableAmSgmmScaled sgmm_decodable(sgmm_opts, am_sgmm, spk_vars, trans_model, - features, *gselect, log_prune, acoustic_scale); - - AlignUtteranceWrapper(align_config, utt, - acoustic_scale, &decode_fst, &sgmm_decodable, - &alignment_writer, NULL, - &num_done, &num_err, &num_retry, - &tot_like, &frame_count); - } - - KALDI_LOG << "Overall log-likelihood per frame is " << (tot_like/frame_count) - << " over " << frame_count<< " frames."; - KALDI_LOG << "Retried " << num_retry << " out of " - << (num_done + num_err) << " utterances."; - KALDI_LOG << "Done " << num_done << ", errors on " << num_err; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-build-tree.cc b/src/sgmmbin/sgmm-build-tree.cc deleted file mode 100644 index de63e60f56f..00000000000 --- a/src/sgmmbin/sgmm-build-tree.cc +++ /dev/null @@ -1,201 +0,0 @@ -// sgmmbin/sgmm-build-tree.cc - -// Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "hmm/hmm-topology.h" -#include "tree/context-dep.h" -#include "tree/build-tree.h" -#include "tree/build-tree-utils.h" -#include "sgmm/sgmm-clusterable.h" -#include "sgmm/estimate-am-sgmm.h" -#include "util/text-utils.h" - - -int main(int argc, char *argv[]) { - using namespace kaldi; - try { - using namespace kaldi; - typedef kaldi::int32 int32; - - const char *usage = - "Train decision tree\n" - "Usage: sgmm-build-tree [options] " - " []\n" - "e.g.: sgmm-build-tree 0.sgmm streeacc roots.txt 1.qst tree\n"; - - bool binary = true; - int32 P = 1, N = 3; - - BaseFloat thresh = 300.0; - BaseFloat cluster_thresh = -1.0; // negative means use smallest split in splitting phase as thresh. - int32 max_leaves = 0; - std::string occs_out_filename; - - ParseOptions po(usage); - po.Register("binary", &binary, "Write output in binary mode"); - po.Register("context-width", &N, "Context window size [must match " - "acc-tree-stats]"); - po.Register("central-position", &P, "Central position in context window " - "[must match acc-tree-stats]"); - po.Register("max-leaves", &max_leaves, "Maximum number of leaves to be " - "used in tree-buliding (if positive)"); - po.Register("thresh", &thresh, "Log-likelihood change threshold for " - "tree-building"); - po.Register("cluster-thresh", &cluster_thresh, "Log-likelihood change " - "threshold for clustering after tree-building"); - - po.Read(argc, argv); - - if (po.NumArgs() != 5) { - po.PrintUsage(); - exit(1); - } - - std::string sgmm_filename = po.GetArg(1), - stats_filename = po.GetArg(2), - roots_filename = po.GetArg(3), - questions_filename = po.GetArg(4), - tree_out_filename = po.GetArg(5); - - // Following 2 variables derived from roots file. - // phone_sets is sets of phones that share their roots. - // Just one phone each for normal systems. - std::vector > phone_sets; - std::vector is_shared_root; - std::vector is_split_root; - { - Input ki(roots_filename.c_str()); - ReadRootsFile(ki.Stream(), &phone_sets, &is_shared_root, &is_split_root); - } - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(sgmm_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - const HmmTopology &topo = trans_model.GetTopo(); - std::vector > H; - am_sgmm.ComputeH(&H); - - BuildTreeStatsType stats; - { - bool binary_in; - SgmmClusterable sc(am_sgmm, H); // dummy stats needed to provide - // type info, and access to am_sgmm and H. - Input ki(stats_filename, &binary_in); - ReadBuildTreeStats(ki.Stream(), binary_in, sc, &stats); - } - KALDI_LOG << "Number of separate statistics is " << stats.size(); - - Questions qo; - { - bool binary_in; - try { - Input ki(questions_filename, &binary_in); - qo.Read(ki.Stream(), binary_in); - } catch (const std::exception &e) { - KALDI_ERR << "Error reading questions file "< phone2num_pdf_classes; - topo.GetPhoneToNumPdfClasses(&phone2num_pdf_classes); - - EventMap *to_pdf = NULL; - - //////// Build the tree. //////////// - - to_pdf = BuildTree(qo, - phone_sets, - phone2num_pdf_classes, - is_shared_root, - is_split_root, - stats, - thresh, - max_leaves, - cluster_thresh, - P); - - { // This block is to warn about low counts. - std::vector split_stats; - SplitStatsByMap(stats, *to_pdf, - &split_stats); - for (size_t i = 0; i < split_stats.size(); i++) - if (SumNormalizer(split_stats[i]) < 100.0) - KALDI_VLOG(1) << "For pdf-id " << i << ", low count " - << SumNormalizer(split_stats[i]); - } - - ContextDependency ctx_dep(N, P, to_pdf); // takes ownership - // of pointer "to_pdf", so set it NULL. - to_pdf = NULL; - - WriteKaldiObject(ctx_dep, tree_out_filename, binary); - - { // This block is just doing some checks. - - std::vector all_phones; - for (size_t i = 0; i < phone_sets.size(); i++) - all_phones.insert(all_phones.end(), - phone_sets[i].begin(), phone_sets[i].end()); - SortAndUniq(&all_phones); - if (all_phones != topo.GetPhones()) { - std::ostringstream ss; - WriteIntegerVector(ss, false, all_phones); - ss << " vs. "; - WriteIntegerVector(ss, false, topo.GetPhones()); - KALDI_WARN << "Mismatch between phone sets provided in roots file, and those in topology: " << ss.str(); - } - std::vector seen_phones; - PossibleValues(P, stats, &seen_phones); // get phones seen in the data. - - std::vector unseen_phones; // diagnostic. - for (size_t i = 0; i < all_phones.size(); i++) - if (!std::binary_search(seen_phones.begin(), seen_phones.end(), all_phones[i])) - unseen_phones.push_back(all_phones[i]); - for (size_t i = 0; i < seen_phones.size(); i++) - if (!std::binary_search(all_phones.begin(), all_phones.end(), seen_phones[i])) - KALDI_ERR << "Phone " << (seen_phones[i]) - << " appears in stats but is not listed in roots file."; - if (!unseen_phones.empty()) { - std::ostringstream ss; - for (size_t i = 0; i < unseen_phones.size(); i++) - ss << unseen_phones[i] << ' '; - // Note, unseen phones is just a warning as in certain kinds of - // systems, this can be OK (e.g. where phone encodes position and - // stress information). - KALDI_WARN << "Saw no stats for following phones: " << ss.str(); - } - } - - KALDI_LOG << "Wrote tree"; - - DeleteBuildTreeStats(&stats); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/sgmmbin/sgmm-calc-distances.cc b/src/sgmmbin/sgmm-calc-distances.cc deleted file mode 100644 index a621b6217b4..00000000000 --- a/src/sgmmbin/sgmm-calc-distances.cc +++ /dev/null @@ -1,74 +0,0 @@ -// sgmmbin/sgmm-calc-distances.cc - -// Copyright 2009-2011 Saarland University; Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - - const char *usage = - "Compute matrix of approximated K-L divergences between states\n" - "Only works properly if a single substate per state.\n" - "Usage: sgmm-calc-distances [options] model-in occs-in distances-out\n"; - - bool binary = true; - ParseOptions po(usage); - po.Register("binary", &binary, "Write output in binary mode"); - po.Read(argc, argv); - - if (po.NumArgs() != 3) { - po.PrintUsage(); - exit(1); - } - - std::string model_in_filename = po.GetArg(1), - occs_in_filename = po.GetArg(2), - distances_out_filename = po.GetArg(3); - - - AmSgmm am_sgmm; - { - bool binary; - Input ki(model_in_filename, &binary); - TransitionModel trans_model; - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - Vector occs; - ReadKaldiObject(occs_in_filename, &occs); - - Matrix dists(am_sgmm.NumPdfs(), am_sgmm.NumPdfs()); - AmSgmmFunctions::ComputeDistances(am_sgmm, occs, &dists); - - Output ko(distances_out_filename, binary); - dists.Write(ko.Stream(), binary); - - KALDI_LOG << "Wrote distances to " << distances_out_filename; - } catch(const std::exception &e) { - std::cerr << e.what() << '\n'; - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-cluster-phones.cc b/src/sgmmbin/sgmm-cluster-phones.cc deleted file mode 100644 index fce3d43e113..00000000000 --- a/src/sgmmbin/sgmm-cluster-phones.cc +++ /dev/null @@ -1,148 +0,0 @@ -// sgmmbin/sgmm-cluster-phones.cc - -// Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "tree/context-dep.h" -#include "tree/build-tree.h" -#include "tree/build-tree-utils.h" -#include "tree/context-dep.h" -#include "sgmm/sgmm-clusterable.h" -#include "hmm/transition-model.h" -#include "util/text-utils.h" - - - -int main(int argc, char *argv[]) { - using namespace kaldi; - try { - using namespace kaldi; - typedef kaldi::int32 int32; - - const char *usage = - "Cluster phones (or sets of phones) into sets for various purposes\n" - "Usage: sgmm-cluster-phones [options] \n" - "e.g.: sgmm-cluster-phones 0.sgmm 1.tacc phonesets.txt questions.txt\n"; - // Format of phonesets.txt is e.g. - // 1 - // 2 3 4 - // 5 6 - // ... - // Format of questions.txt output is similar, but with more lines (and the same phone - // may appear on multiple lines). - - // bool binary = true; - int32 P = 1, N = 3; // Note: N does not matter. - std::string pdf_class_list_str = "1"; // 1 is just the central position of 3. - std::string mode = "questions"; - int32 num_classes = -1; - - ParseOptions po(usage); - // po.Register("binary", &binary, "Write output in binary mode"); - po.Register("central-position", &P, "Central position in context window [must match acc-tree-stats]"); - po.Register("context-width", &N, "Does not have any effect-- included for scripting convenience."); - po.Register("pdf-class-list", &pdf_class_list_str, "Colon-separated list of HMM positions to consider [Default = 1: just central position for 3-state models]."); - po.Register("mode", &mode, "Mode of operation: \"questions\"->sets suitable for decision trees; \"k-means\"->k-means algorithm, output k classes (set num-classes options)\n"); - po.Register("num-classes", &num_classes, "For k-means mode, number of classes."); - - - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - std::string sgmm_rxfilename = po.GetArg(1), - stats_rxfilename = po.GetArg(2), - phone_sets_rxfilename = po.GetArg(3), - phone_sets_wxfilename = po.GetArg(4); - - AmSgmm am_sgmm; - { - TransitionModel trans_model; - bool binary_in; - Input ki(sgmm_rxfilename, &binary_in); - trans_model.Read(ki.Stream(), binary_in); - am_sgmm.Read(ki.Stream(), binary_in); - } - std::vector > H; - am_sgmm.ComputeH(&H); - - BuildTreeStatsType stats; - { // Read tree stats. - bool binary_in; - SgmmClusterable sc(am_sgmm, H); // dummy needed to provide type and sgmm ref. - Input ki(stats_rxfilename, &binary_in); - ReadBuildTreeStats(ki.Stream(), binary_in, sc, &stats); - } - KALDI_LOG << "Number of separate states in stats is " - << stats.size(); - - std::vector pdf_class_list; - if (!SplitStringToIntegers(pdf_class_list_str, ":", false, &pdf_class_list) - || pdf_class_list.empty()) { - KALDI_ERR << "Invalid pdf-class-list string [expecting colon-separated list of integers]: " - << pdf_class_list_str; - } - - std::vector > phone_sets; - if (!ReadIntegerVectorVectorSimple(phone_sets_rxfilename, &phone_sets)) - KALDI_ERR << "Could not read phone sets from " - << PrintableRxfilename(phone_sets_rxfilename); - - if (phone_sets.size() == 0) - KALDI_ERR << "No phone sets in phone sets file "; - - std::vector > phone_sets_out; - - if (mode == "questions") { - if (num_classes != -1) - KALDI_ERR << "num-classes option is not (currently) compatible " - "with \"questions\" mode."; - AutomaticallyObtainQuestions(stats, - phone_sets, - pdf_class_list, - P, - &phone_sets_out); - } else if (mode == "k-means") { - if (num_classes <= 1 || - static_cast(num_classes) > phone_sets.size()) - KALDI_ERR << "num-classes invalid: num_classes is " << num_classes - << ", number of phone sets is " << phone_sets.size(); - KMeansClusterPhones(stats, - phone_sets, - pdf_class_list, - P, - num_classes, - &phone_sets_out); - } - - if (!WriteIntegerVectorVectorSimple(phone_sets_wxfilename, phone_sets_out)) - KALDI_ERR << "Error writing questions to " - << PrintableWxfilename(phone_sets_wxfilename); - else - KALDI_LOG << "Wrote questions to "< \n"; - - bool binary = true; - kaldi::ParseOptions po(usage); - po.Register("binary", &binary, "Write output in binary mode"); - po.Read(argc, argv); - - if (po.NumArgs() < 3) { - po.PrintUsage(); - exit(1); - } - - std::string sgmm_in_filename = po.GetArg(1), - occs_filename = po.GetArg(2), - sgmm_out_filename = po.GetArg(3); - - kaldi::AmSgmm sgmm_in; - kaldi::TransitionModel trans_model; - { - bool binary_read; - kaldi::Input ki(sgmm_in_filename, &binary_read); - trans_model.Read(ki.Stream(), binary_read); - sgmm_in.Read(ki.Stream(), binary_read); - } - - kaldi::Vector occs; - { - bool binary_read; - kaldi::Input ki(occs_filename, &binary_read); - occs.Read(ki.Stream(), binary_read); - } - - kaldi::SgmmFmllrGlobalParams fmllr_globals; - sgmm_in.ComputeFmllrPreXform(occs, &fmllr_globals.pre_xform_, - &fmllr_globals.inv_xform_, - &fmllr_globals.mean_scatter_); - - { - kaldi::Output ko(sgmm_out_filename, binary); - trans_model.Write(ko.Stream(), binary); - sgmm_in.Write(ko.Stream(), binary, kaldi::kSgmmWriteAll); - fmllr_globals.Write(ko.Stream(), binary); - } - - KALDI_LOG << "Written model to " << sgmm_out_filename; - } catch(const std::exception &e) { - std::cerr << e.what() << '\n'; - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-copy.cc b/src/sgmmbin/sgmm-copy.cc deleted file mode 100644 index df1f960ed95..00000000000 --- a/src/sgmmbin/sgmm-copy.cc +++ /dev/null @@ -1,74 +0,0 @@ -// sgmmbin/sgmm-copy.cc - -// Copyright 2009-2012 Microsoft Corporation -// Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" - -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - const char *usage = - "Copy SGMM (possibly changing binary/text format)\n" - "Usage: sgmm-copy [options] \n" - "e.g.: sgmm-copy --binary=false 1.mdl 1_text.mdl\n"; - - bool binary_write = true; - - ParseOptions po(usage); - po.Register("binary", &binary_write, "Write output in binary mode"); - - po.Read(argc, argv); - if (po.NumArgs() != 2) { - po.PrintUsage(); - exit(1); - } - std::string model_in_filename = po.GetArg(1), - model_out_filename = po.GetArg(2); - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - { - Output ko(model_out_filename, binary_write); - trans_model.Write(ko.Stream(), binary_write); - am_sgmm.Write(ko.Stream(), binary_write, kSgmmWriteAll); - } - - - KALDI_LOG << "Written model to " << model_out_filename; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-decode-faster.cc b/src/sgmmbin/sgmm-decode-faster.cc deleted file mode 100644 index b20808e144e..00000000000 --- a/src/sgmmbin/sgmm-decode-faster.cc +++ /dev/null @@ -1,218 +0,0 @@ -// sgmmbin/sgmm-decode-faster.cc - -// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -using std::string; - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "fstext/fstext-lib.h" -#include "decoder/faster-decoder.h" -#include "sgmm/decodable-am-sgmm.h" -#include "base/timer.h" -#include "lat/kaldi-lattice.h" // for {Compact}LatticeArc - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - using fst::SymbolTable; - using fst::VectorFst; - using fst::StdArc; - - const char *usage = - "Decode features using SGMM-based model.\n" - "Usage: sgmm-decode-faster [options] " - " [alignments-wspecifier]\n"; - ParseOptions po(usage); - bool allow_partial = true; - BaseFloat acoustic_scale = 0.1; - BaseFloat log_prune = 5.0; - string word_syms_filename, gselect_rspecifier, spkvecs_rspecifier, - utt2spk_rspecifier; - - FasterDecoderOptions decoder_opts; - decoder_opts.Register(&po, true); // true == include obscure settings. - kaldi::SgmmGselectConfig sgmm_opts; - sgmm_opts.Register(&po); - - po.Register("acoustic-scale", &acoustic_scale, - "Scaling factor for acoustic likelihoods"); - po.Register("log-prune", &log_prune, - "Pruning beam used to reduce number of exp() evaluations."); - po.Register("word-symbol-table", &word_syms_filename, - "Symbol table for words [for debug output]"); - po.Register("gselect", &gselect_rspecifier, - "rspecifier for precomputed per-frame Gaussian indices."); - po.Register("spk-vecs", &spkvecs_rspecifier, - "rspecifier for speaker vectors"); - po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); - po.Register("allow-partial", &allow_partial, - "Produce output even when final state was not reached"); - po.Read(argc, argv); - - if (po.NumArgs() < 4 || po.NumArgs() > 5) { - po.PrintUsage(); - exit(1); - } - - std::string model_in_filename = po.GetArg(1), - fst_in_filename = po.GetArg(2), - feature_rspecifier = po.GetArg(3), - words_wspecifier = po.GetArg(4), - alignment_wspecifier = po.GetOptArg(5); - - TransitionModel trans_model; - kaldi::AmSgmm am_sgmm; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - Int32VectorWriter words_writer(words_wspecifier); - Int32VectorWriter alignment_writer(alignment_wspecifier); - - fst::SymbolTable *word_syms = NULL; - if (word_syms_filename != "") - if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename))) - KALDI_ERR << "Could not read symbol table from file " - << word_syms_filename; - - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - - RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier, - utt2spk_rspecifier); - - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - - // It's important that we initialize decode_fst after feature_reader, as it - // can prevent crashes on systems installed without enough virtual memory. - // It has to do with what happens on UNIX systems if you call fork() on a - // large process: the page-table entries are duplicated, which requires a - // lot of virtual memory. - VectorFst *decode_fst = fst::ReadFstKaldi(fst_in_filename); - - BaseFloat tot_like = 0.0; - kaldi::int64 frame_count = 0; - int num_success = 0, num_fail = 0; - FasterDecoder decoder(*decode_fst, decoder_opts); - - Timer timer; - const std::vector > empty_gselect; - - for (; !feature_reader.Done(); feature_reader.Next()) { - string utt = feature_reader.Key(); - Matrix features(feature_reader.Value()); - feature_reader.FreeCurrent(); - if (features.NumRows() == 0) { - KALDI_WARN << "Zero-length utterance: " << utt; - num_fail++; - continue; - } - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - num_fail++; - continue; - } - } // else spk_vars is "empty" - - bool has_gselect = false; - if (gselect_reader.IsOpen()) { - has_gselect = gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == features.NumRows(); - if (!has_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - } - const std::vector > *gselect = - (has_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - - DecodableAmSgmmScaled sgmm_decodable(sgmm_opts, am_sgmm, spk_vars, - trans_model, features, *gselect, - log_prune, acoustic_scale); - decoder.Decode(&sgmm_decodable); - - VectorFst decoded; // linear FST. - - if ( (allow_partial || decoder.ReachedFinal()) - && decoder.GetBestPath(&decoded) ) { - if (!decoder.ReachedFinal()) - KALDI_WARN << "Decoder did not reach end-state, " - << "outputting partial traceback since --allow-partial=true"; - num_success++; - std::vector alignment; - std::vector words; - LatticeWeight weight; - frame_count += features.NumRows(); - - GetLinearSymbolSequence(decoded, &alignment, &words, &weight); - - words_writer.Write(utt, words); - if (alignment_writer.IsOpen()) - alignment_writer.Write(utt, alignment); - if (word_syms != NULL) { - std::cerr << utt << ' '; - for (size_t i = 0; i < words.size(); i++) { - std::string s = word_syms->Find(words[i]); - if (s == "") - KALDI_ERR << "Word-id " << words[i] << " not in symbol table."; - std::cerr << s << ' '; - } - std::cerr << '\n'; - } - BaseFloat like = -weight.Value1() -weight.Value2(); - tot_like += like; - KALDI_LOG << "Log-like per frame for utterance " << utt << " is " - << (like / features.NumRows()) << " over " - << features.NumRows() << " frames."; - } else { - num_fail++; - KALDI_WARN << "Did not successfully decode utterance " << utt - << ", len = " << features.NumRows(); - } - } - double elapsed = timer.Elapsed(); - KALDI_LOG << "Time taken [excluding initialization] "<< elapsed - << "s: real-time factor assuming 100 frames/sec is " - << (elapsed*100.0/frame_count); - KALDI_LOG << "Done " << num_success << " utterances, failed for " - << num_fail; - KALDI_LOG << "Overall log-likelihood per frame = " << (tot_like/frame_count) - << " over " << frame_count << " frames."; - - delete word_syms; - delete decode_fst; - return (num_success != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/sgmmbin/sgmm-est-ebw.cc b/src/sgmmbin/sgmm-est-ebw.cc deleted file mode 100644 index 71c7255c238..00000000000 --- a/src/sgmmbin/sgmm-est-ebw.cc +++ /dev/null @@ -1,118 +0,0 @@ -// sgmmbin/sgmm-est-ebw.cc - -// Copyright 2012 Johns Hopkins Univerity (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "thread/kaldi-thread.h" -#include "hmm/transition-model.h" -#include "sgmm/estimate-am-sgmm-ebw.h" - - -int main(int argc, char *argv[]) { - using namespace kaldi; - typedef kaldi::int32 int32; - using std::string; - try { - const char *usage = - "Estimate SGMM model parameters discriminatively using Extended\n" - "Baum-Welch style of update\n" - "Usage: sgmm-est-ebw [options] \n"; - - - string update_flags_str = "vMNwcSt"; - bool binary_write = true; - string write_flags_str = "gsnu"; - EbwAmSgmmOptions opts; - - - ParseOptions po(usage); - po.Register("binary", &binary_write, "Write output in binary mode"); - po.Register("update-flags", &update_flags_str, "Which SGMM parameters to " - "update: subset of vMNwcSt."); - po.Register("write-flags", &write_flags_str, "Which SGMM parameters to " - "write: subset of gsnu"); - po.Register("num-threads", &g_num_threads, "Number of threads to use in " - "weight update and normalizer computation"); - opts.Register(&po); - - po.Read(argc, argv); - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - string model_in_filename = po.GetArg(1), - num_stats_filename = po.GetArg(2), - den_stats_filename = po.GetArg(3), - model_out_filename = po.GetArg(4); - - SgmmUpdateFlagsType update_flags = StringToSgmmUpdateFlags(update_flags_str); - SgmmWriteFlagsType write_flags = StringToSgmmWriteFlags(write_flags_str); - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - MleAmSgmmAccs sgmm_num_accs; - { - bool binary; - Vector transition_accs; // won't be used. - Input ki(num_stats_filename, &binary); - transition_accs.Read(ki.Stream(), binary); - sgmm_num_accs.Read(ki.Stream(), binary, false); // false == add; doesn't matter. - } - MleAmSgmmAccs sgmm_den_accs; - { - bool binary; - Vector transition_accs; // won't be used. - Input ki(den_stats_filename, &binary); - transition_accs.Read(ki.Stream(), binary); - sgmm_den_accs.Read(ki.Stream(), binary, false); // false == add; doesn't matter. - } - - sgmm_num_accs.Check(am_sgmm, true); // Will check consistency and print some diagnostics. - sgmm_den_accs.Check(am_sgmm, true); // Will check consistency and print some diagnostics. - - { // Update SGMM. - BaseFloat auxf_impr, count; - kaldi::EbwAmSgmmUpdater sgmm_updater(opts); - sgmm_updater.Update(sgmm_num_accs, sgmm_den_accs, &am_sgmm, - update_flags, &auxf_impr, &count); - KALDI_LOG << "Overall auxf impr/frame from SGMM update is " << (auxf_impr/count) - << " over " << count << " frames."; - } - - { - Output ko(model_out_filename, binary_write); - trans_model.Write(ko.Stream(), binary_write); - am_sgmm.Write(ko.Stream(), binary_write, write_flags); - } - - KALDI_LOG << "Wrote model to " << model_out_filename; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/sgmmbin/sgmm-est-fmllr-gpost.cc b/src/sgmmbin/sgmm-est-fmllr-gpost.cc deleted file mode 100644 index 1190c6d5b73..00000000000 --- a/src/sgmmbin/sgmm-est-fmllr-gpost.cc +++ /dev/null @@ -1,261 +0,0 @@ -// sgmmbin/sgmm-est-fmllr-gpost.cc - -// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -using std::string; -#include -using std::vector; - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "sgmm/fmllr-sgmm.h" -#include "hmm/transition-model.h" - -namespace kaldi { - -void AccumulateForUtterance(const Matrix &feats, - const SgmmGauPost &gpost, - const TransitionModel &trans_model, - const AmSgmm &am_sgmm, - const SgmmPerSpkDerivedVars &spk_vars, - BaseFloat logdet, - FmllrSgmmAccs *spk_stats) { -// kaldi::SgmmPerFrameDerivedVars per_frame_vars; - - for (size_t i = 0; i < gpost.size(); i++) { -// am_sgmm.ComputePerFrameVars(feats.Row(i), gpost[i].gselect, spk_vars, -// logdet, &per_frame_vars); - - for (size_t j = 0; j < gpost[i].tids.size(); j++) { - int32 pdf_id = trans_model.TransitionIdToPdf(gpost[i].tids[j]); - spk_stats->AccumulateFromPosteriors(am_sgmm, spk_vars, feats.Row(i), - gpost[i].gselect, - gpost[i].posteriors[j], pdf_id); - } - } -} - -} // end namespace kaldi - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - using namespace kaldi; - const char *usage = - "Estimate FMLLR transform for SGMMs, either per utterance or for the " - "supplied set of speakers (with spk2utt option).\n" - "Reads Gaussian-level posteriors. Writes to a table of matrices.\n" - "Usage: sgmm-est-fmllr-gpost [options] " - " \n"; - - ParseOptions po(usage); - string spk2utt_rspecifier, spkvecs_rspecifier, fmllr_rspecifier; - BaseFloat min_count = 100; - SgmmFmllrConfig fmllr_opts; - - po.Register("spk2utt", &spk2utt_rspecifier, - "File to read speaker to utterance-list map from."); - po.Register("spkvec-min-count", &min_count, - "Minimum count needed to estimate speaker vectors"); - po.Register("spk-vecs", &spkvecs_rspecifier, - "Speaker vectors to use during aligment (rspecifier)"); - po.Register("input-fmllr", &fmllr_rspecifier, - "Initial FMLLR transform per speaker (rspecifier)"); - fmllr_opts.Register(&po); - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - string model_rxfilename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - gpost_rspecifier = po.GetArg(3), - fmllr_wspecifier = po.GetArg(4); - - TransitionModel trans_model; - AmSgmm am_sgmm; - SgmmFmllrGlobalParams fmllr_globals; - { - bool binary; - Input ki(model_rxfilename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - fmllr_globals.Read(ki.Stream(), binary); - } - - RandomAccessSgmmGauPostReader gpost_reader(gpost_rspecifier); - - RandomAccessBaseFloatVectorReader spkvecs_reader(spkvecs_rspecifier); - - RandomAccessBaseFloatMatrixReader fmllr_reader(fmllr_rspecifier); - - BaseFloatMatrixWriter fmllr_writer(fmllr_wspecifier); - - int32 dim = am_sgmm.FeatureDim(); - FmllrSgmmAccs spk_stats; - spk_stats.Init(dim, am_sgmm.NumGauss()); - Matrix fmllr_xform(dim, dim + 1); - BaseFloat logdet = 0.0; - double tot_impr = 0.0, tot_t = 0.0; - int32 num_done = 0, num_no_gpost = 0, num_other_error = 0; - std::vector > empty_gselect; - - if (!spk2utt_rspecifier.empty()) { // per-speaker adaptation - SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - - for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { - spk_stats.SetZero(); - string spk = spk2utt_reader.Key(); - const vector &uttlist = spk2utt_reader.Value(); - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(spk)) { - spk_vars.v_s = spkvecs_reader.Value(spk); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << spk; - num_other_error++; - continue; - } - } // else spk_vars is "empty" - - if (fmllr_reader.IsOpen()) { - if (fmllr_reader.HasKey(spk)) { - fmllr_xform.CopyFromMat(fmllr_reader.Value(spk)); - logdet = fmllr_xform.Range(0, dim, 0, dim).LogDet(); - } else { - KALDI_WARN << "Cannot find FMLLR transform for " << spk; - fmllr_xform.SetUnit(); - logdet = 0.0; - } - } else { - fmllr_xform.SetUnit(); - logdet = 0.0; - } - - for (size_t i = 0; i < uttlist.size(); i++) { - std::string utt = uttlist[i]; - if (!feature_reader.HasKey(utt)) { - KALDI_WARN << "Did not find features for utterance " << utt; - continue; - } - if (!gpost_reader.HasKey(utt)) { - KALDI_WARN << "Did not find posteriors for utterance " << utt; - num_no_gpost++; - continue; - } - const Matrix &feats = feature_reader.Value(utt); - const SgmmGauPost &gpost = gpost_reader.Value(utt); - if (static_cast(gpost.size()) != feats.NumRows()) { - KALDI_WARN << "gpost vector has wrong size " << (gpost.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - - AccumulateForUtterance(feats, gpost, trans_model, am_sgmm, spk_vars, - logdet, &spk_stats); - num_done++; - } // end looping over all utterances of the current speaker - - BaseFloat impr, spk_frame_count; - // Compute the FMLLR transform and write it out. - spk_stats.Update(am_sgmm, fmllr_globals, fmllr_opts, &fmllr_xform, - &spk_frame_count, &impr); - fmllr_writer.Write(spk, fmllr_xform); - tot_impr += impr; - tot_t += spk_frame_count; - } // end looping over speakers - } else { // per-utterance adaptation - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !feature_reader.Done(); feature_reader.Next()) { - string utt = feature_reader.Key(); - if (!gpost_reader.HasKey(utt)) { - KALDI_WARN << "Did not find posts for utterance " - << utt; - num_no_gpost++; - continue; - } - const Matrix &feats = feature_reader.Value(); - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - num_other_error++; - continue; - } - } // else spk_vars is "empty" - - if (fmllr_reader.IsOpen()) { - if (fmllr_reader.HasKey(utt)) { - fmllr_xform.CopyFromMat(fmllr_reader.Value(utt)); - logdet = fmllr_xform.Range(0, dim, 0, dim).LogDet(); - } else { - KALDI_WARN << "Cannot find FMLLR transform for " << utt; - fmllr_xform.SetUnit(); - logdet = 0.0; - } - } else { - fmllr_xform.SetUnit(); - logdet = 0.0; - } - - const SgmmGauPost &gpost = gpost_reader.Value(utt); - - if (static_cast(gpost.size()) != feats.NumRows()) { - KALDI_WARN << "gpost has wrong size " << (gpost.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - spk_stats.SetZero(); - AccumulateForUtterance(feats, gpost, trans_model, am_sgmm, spk_vars, - logdet, &spk_stats); - num_done++; - - BaseFloat impr, spk_frame_count; - // Compute the FMLLR transform and write it out. - spk_stats.Update(am_sgmm, fmllr_globals, fmllr_opts, &fmllr_xform, - &spk_frame_count, &impr); - fmllr_writer.Write(utt, fmllr_xform); - tot_impr += impr; - tot_t += spk_frame_count; - } - } - - KALDI_LOG << "Done " << num_done << " files, " << num_no_gpost - << " with no gposts, " << num_other_error << " with other errors."; - KALDI_LOG << "Num frames " << tot_t << ", auxf impr per frame is " - << (tot_impr / tot_t); - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - diff --git a/src/sgmmbin/sgmm-est-fmllr.cc b/src/sgmmbin/sgmm-est-fmllr.cc deleted file mode 100644 index 2ad2c8d62cf..00000000000 --- a/src/sgmmbin/sgmm-est-fmllr.cc +++ /dev/null @@ -1,318 +0,0 @@ -// sgmmbin/sgmm-est-fmllr.cc - -// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) -// 2014 Guoguo Chen - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -using std::string; -#include -using std::vector; - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "sgmm/fmllr-sgmm.h" -#include "hmm/transition-model.h" -#include "hmm/posterior.h" - -namespace kaldi { - -void AccumulateForUtterance(const Matrix &feats, - const Matrix &transformed_feats, // if already fMLLR - const std::vector > &gselect, - const SgmmGselectConfig &sgmm_config, - const Posterior &post, - const TransitionModel &trans_model, - const AmSgmm &am_sgmm, - const SgmmPerSpkDerivedVars &spk_vars, - BaseFloat logdet, - FmllrSgmmAccs *spk_stats) { - kaldi::SgmmPerFrameDerivedVars per_frame_vars; - - Posterior pdf_post; - ConvertPosteriorToPdfs(trans_model, post, &pdf_post); - for (size_t t = 0; t < post.size(); t++) { - std::vector this_gselect; - if (!gselect.empty()) { - KALDI_ASSERT(t < gselect.size()); - this_gselect = gselect[t]; - } else { - am_sgmm.GaussianSelection(sgmm_config, feats.Row(t), &this_gselect); - } - // per-frame vars only used for computing posteriors... use the - // transformed feats for this, if available. - am_sgmm.ComputePerFrameVars(transformed_feats.Row(t), this_gselect, spk_vars, - 0.0 /*fMLLR logdet*/, &per_frame_vars); - - - for (size_t j = 0; j < pdf_post[t].size(); j++) { - int32 pdf_id = pdf_post[t][j].first; - Matrix posteriors; - am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id, - &posteriors); - posteriors.Scale(pdf_post[t][j].second); - spk_stats->AccumulateFromPosteriors(am_sgmm, spk_vars, feats.Row(t), - this_gselect, - posteriors, pdf_id); - } - } -} - -} // end namespace kaldi - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - using namespace kaldi; - const char *usage = - "Estimate FMLLR transform for SGMMs, either per utterance or for the " - "supplied set of speakers (with spk2utt option).\n" - "Reads state-level posteriors. Writes to a table of matrices.\n" - "Usage: sgmm-est-fmllr [options] " - " \n"; - - ParseOptions po(usage); - string spk2utt_rspecifier, spkvecs_rspecifier, fmllr_rspecifier, - gselect_rspecifier; - BaseFloat min_count = 100; - SgmmFmllrConfig fmllr_opts; - SgmmGselectConfig sgmm_opts; - - po.Register("spk2utt", &spk2utt_rspecifier, - "File to read speaker to utterance-list map from."); - po.Register("spkvec-min-count", &min_count, - "Minimum count needed to estimate speaker vectors"); - po.Register("spk-vecs", &spkvecs_rspecifier, - "Speaker vectors to use during aligment (rspecifier)"); - po.Register("input-fmllr", &fmllr_rspecifier, - "Initial FMLLR transform per speaker (rspecifier)"); - po.Register("gselect", &gselect_rspecifier, - "Precomputed Gaussian indices (rspecifier)"); - fmllr_opts.Register(&po); - sgmm_opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - string model_rxfilename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - post_rspecifier = po.GetArg(3), - fmllr_wspecifier = po.GetArg(4); - - TransitionModel trans_model; - AmSgmm am_sgmm; - SgmmFmllrGlobalParams fmllr_globals; - { - bool binary; - Input ki(model_rxfilename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - fmllr_globals.Read(ki.Stream(), binary); - } - - RandomAccessPosteriorReader post_reader(post_rspecifier); - RandomAccessBaseFloatVectorReader spkvecs_reader(spkvecs_rspecifier); - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - RandomAccessBaseFloatMatrixReader fmllr_reader(fmllr_rspecifier); - - BaseFloatMatrixWriter fmllr_writer(fmllr_wspecifier); - - int32 dim = am_sgmm.FeatureDim(); - FmllrSgmmAccs spk_stats; - spk_stats.Init(dim, am_sgmm.NumGauss()); - Matrix fmllr_xform(dim, dim + 1); - BaseFloat logdet = 0.0; - double tot_impr = 0.0, tot_t = 0.0; - int32 num_done = 0, num_err = 0; - std::vector > empty_gselect; - - if (!spk2utt_rspecifier.empty()) { // per-speaker adaptation - SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - - for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { - spk_stats.SetZero(); - string spk = spk2utt_reader.Key(); - const vector &uttlist = spk2utt_reader.Value(); - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(spk)) { - spk_vars.v_s = spkvecs_reader.Value(spk); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << spk; - num_err++; - continue; - } - } // else spk_vars is "empty" - - if (fmllr_reader.IsOpen()) { - if (fmllr_reader.HasKey(spk)) { - fmllr_xform.CopyFromMat(fmllr_reader.Value(spk)); - logdet = fmllr_xform.Range(0, dim, 0, dim).LogDet(); - } else { - KALDI_WARN << "Cannot find FMLLR transform for " << spk; - fmllr_xform.SetUnit(); - logdet = 0.0; - } - } else { - fmllr_xform.SetUnit(); - logdet = 0.0; - } - - for (size_t i = 0; i < uttlist.size(); i++) { - std::string utt = uttlist[i]; - if (!feature_reader.HasKey(utt)) { - KALDI_WARN << "Did not find features for utterance " << utt; - num_err++; - continue; - } - if (!post_reader.HasKey(utt)) { - KALDI_WARN << "Did not find posteriors for utterance " << utt; - num_err++; - continue; - } - const Matrix &feats = feature_reader.Value(utt); - const Posterior &post = post_reader.Value(utt); - if (static_cast(post.size()) != feats.NumRows()) { - KALDI_WARN << "posterior vector has wrong size " << (post.size()) - << " vs. " << (feats.NumRows()); - num_err++; - continue; - } - - bool have_gselect = !gselect_rspecifier.empty() - && gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == feats.NumRows(); - if (!gselect_rspecifier.empty() && !have_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - const std::vector > *gselect = - (have_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - - Matrix transformed_feats(feats); - for (int32 r = 0; r < transformed_feats.NumRows(); r++) { - SubVector row(transformed_feats, r); - ApplyAffineTransform(fmllr_xform, &row); - } - AccumulateForUtterance(feats, transformed_feats, *gselect, sgmm_opts, - post, trans_model, am_sgmm, spk_vars, - logdet, &spk_stats); - num_done++; - } // end looping over all utterances of the current speaker - - BaseFloat impr, spk_frame_count; - // Compute the FMLLR transform and write it out. - spk_stats.Update(am_sgmm, fmllr_globals, fmllr_opts, &fmllr_xform, - &spk_frame_count, &impr); - fmllr_writer.Write(spk, fmllr_xform); - tot_impr += impr; - tot_t += spk_frame_count; - } // end looping over speakers - } else { // per-utterance adaptation - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !feature_reader.Done(); feature_reader.Next()) { - string utt = feature_reader.Key(); - if (!post_reader.HasKey(utt)) { - KALDI_WARN << "Did not find posts for utterance " - << utt; - num_err++; - continue; - } - const Matrix &feats = feature_reader.Value(); - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - num_err++; - continue; - } - } // else spk_vars is "empty" - - if (fmllr_reader.IsOpen()) { - if (fmllr_reader.HasKey(utt)) { - fmllr_xform.CopyFromMat(fmllr_reader.Value(utt)); - logdet = fmllr_xform.Range(0, dim, 0, dim).LogDet(); - } else { - KALDI_WARN << "Cannot find FMLLR transform for " << utt; - fmllr_xform.SetUnit(); - logdet = 0.0; - } - } else { - fmllr_xform.SetUnit(); - logdet = 0.0; - } - - const Posterior &post = post_reader.Value(utt); - - if (static_cast(post.size()) != feats.NumRows()) { - KALDI_WARN << "post has wrong size " << (post.size()) - << " vs. " << (feats.NumRows()); - num_err++; - continue; - } - spk_stats.SetZero(); - - Matrix transformed_feats(feats); - for (int32 r = 0; r < transformed_feats.NumRows(); r++) { - SubVector row(transformed_feats, r); - ApplyAffineTransform(fmllr_xform, &row); - } - bool have_gselect = !gselect_rspecifier.empty() - && gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == feats.NumRows(); - if (!gselect_rspecifier.empty() && !have_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - const std::vector > *gselect = - (have_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - - AccumulateForUtterance(feats, transformed_feats, *gselect, sgmm_opts, - post, trans_model, am_sgmm, spk_vars, - logdet, &spk_stats); - num_done++; - - BaseFloat impr, spk_frame_count; - // Compute the FMLLR transform and write it out. - spk_stats.Update(am_sgmm, fmllr_globals, fmllr_opts, &fmllr_xform, - &spk_frame_count, &impr); - fmllr_writer.Write(utt, fmllr_xform); - tot_impr += impr; - tot_t += spk_frame_count; - } - } - - KALDI_LOG << "Done " << num_done << " files, " << num_err << " with errors."; - KALDI_LOG << "Overall auxf impr per frame is " << (tot_impr / tot_t) - << " per frame, over " << tot_t << " frames."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - diff --git a/src/sgmmbin/sgmm-est-fmllrbasis.cc b/src/sgmmbin/sgmm-est-fmllrbasis.cc deleted file mode 100644 index 155d4ed7a1b..00000000000 --- a/src/sgmmbin/sgmm-est-fmllrbasis.cc +++ /dev/null @@ -1,93 +0,0 @@ -// sgmmbin/sgmm-est-fmllrbasis.cc - -// Copyright 2009-2011 Saarland University -// Author: Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "util/common-utils.h" -#include "matrix/matrix-lib.h" -#include "hmm/transition-model.h" -#include "sgmm/am-sgmm.h" -#include "sgmm/fmllr-sgmm.h" - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - - const char *usage = - "Sum multiple accumulated stats files for SGMM training.\n" - "Usage: sgmm-est-fmllrbasis [options] " - " [stats-in2 ...]\n"; - - bool binary = true; - int32 num_bases = 50; - kaldi::ParseOptions po(usage); - po.Register("binary", &binary, "Write output in binary mode."); - po.Register("num-bases", &num_bases, - "Number of fMLLR basis matrices to estimate."); - po.Read(argc, argv); - - if (po.NumArgs() < 3) { - po.PrintUsage(); - exit(1); - } - - std::string model_in_filename = po.GetArg(1), - model_out_filename = po.GetArg(2); - - kaldi::AmSgmm am_sgmm; - kaldi::TransitionModel trans_model; - kaldi::SgmmFmllrGlobalParams fmllr_globals; - { - bool binary_read; - kaldi::Input ki(model_in_filename, &binary_read); - trans_model.Read(ki.Stream(), binary_read); - am_sgmm.Read(ki.Stream(), binary_read); - fmllr_globals.Read(ki.Stream(), binary_read); - } - - kaldi::SpMatrix fmllr_grad_scatter; - int32 dim = am_sgmm.FeatureDim(); - fmllr_grad_scatter.Resize(dim * (dim + 1), kaldi::kSetZero); - - for (int i = 3, max = po.NumArgs(); i <= max; i++) { - std::string stats_in_filename = po.GetArg(i); - bool binary_read; - kaldi::Input ki(stats_in_filename, &binary_read); - fmllr_grad_scatter.Read(ki.Stream(), binary_read, - true /* add read values */); - } - - kaldi::EstimateSgmmFmllrSubspace(fmllr_grad_scatter, num_bases, dim, - &fmllr_globals); - - // Write out the accs - { - kaldi::Output ko(model_out_filename, binary); - trans_model.Write(ko.Stream(), binary); - am_sgmm.Write(ko.Stream(), binary, kaldi::kSgmmWriteAll); - fmllr_globals.Write(ko.Stream(), binary); - } - - KALDI_LOG << "Written model to " << model_out_filename; - } catch(const std::exception &e) { - std::cerr << e.what() << '\n'; - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-est-multi.cc b/src/sgmmbin/sgmm-est-multi.cc deleted file mode 100644 index 845714b7708..00000000000 --- a/src/sgmmbin/sgmm-est-multi.cc +++ /dev/null @@ -1,233 +0,0 @@ -// sgmmbin/sgmm-est-multi.cc - -// Copyright 2009-2012 Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" - -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "sgmm/estimate-am-sgmm.h" -#include "sgmm/estimate-am-sgmm-multi.h" - -int main(int argc, char *argv[]) { - using namespace kaldi; - // Memory for these will be freed in the catch block in case of exceptions. - std::vector sgmms_in; - std::vector sgmm_accs_in; - std::vector trans_models_in; - - try { - typedef kaldi::int32 int32; - const char *usage = - "Estimate multiple SGMM models from corresponding stats, such that the" - " global parameters\n(phone-, speaker-, and weight-projections and " - "covariances) are tied across models.\n" - "Usage: sgmm-est-multi [options] [ " - " ...]\n"; - - bool binary_write = true; - std::string update_flags_str = "vMNwcSt"; - std::string write_flags_str = "gsnu"; - kaldi::MleTransitionUpdateConfig tcfg; - kaldi::MleAmSgmmOptions sgmm_opts; - std::string split_substates = ""; // Space-seperated list of #substates - std::vector split_substates_int; // The above string split on space - int32 increase_phn_dim = 0; - int32 increase_spk_dim = 0; - bool remove_speaker_space = false; - BaseFloat perturb_factor = 0.01; - BaseFloat power = 0.2; - BaseFloat max_cond = 100; - - ParseOptions po(usage); - po.Register("binary", &binary_write, "Write output in binary mode"); - // The split-substates option also takes a single integer: the same number - // of substates for all models. - po.Register("split-substates", &split_substates, "Space-separated string " - "with target number of substates for each model."); - po.Register("increase-phn-dim", &increase_phn_dim, "Increase phone-space " - "dimension as far as allowed towards this target."); - po.Register("increase-spk-dim", &increase_spk_dim, "Increase speaker-space " - "dimension as far as allowed towards this target."); - po.Register("remove-speaker-space", &remove_speaker_space, - "Remove speaker-specific projections N"); - po.Register("power", &power, "Exponent for substate occupancies used while " - "splitting substates."); - po.Register("perturb-factor", &perturb_factor, "Perturbation factor for " - "state vectors while splitting substates."); - po.Register("max-cond-split", &max_cond, "Max condition number of smoothing " - "matrix used in substate splitting."); - po.Register("update-flags", &update_flags_str, "Which SGMM parameters to " - "update: subset of vMNwcSt."); - po.Register("write-flags", &write_flags_str, "Which SGMM parameters to " - "write: subset of gsnu"); - tcfg.Register(&po); - sgmm_opts.Register(&po); - - po.Read(argc, argv); - if (po.NumArgs() <= 0 || (po.NumArgs() % 4 != 0)) { - po.PrintUsage(); - exit(1); - } - // How many 4-tuples of model, stats, output model, output occs - int32 num_models = po.NumArgs()/4; - sgmms_in.resize(num_models, NULL); - sgmm_accs_in.resize(num_models, NULL); - trans_models_in.resize(num_models, NULL); - - if (!split_substates.empty()) { - SplitStringToIntegers(split_substates, " ", true /*omit empty strings*/, - &split_substates_int); - if (split_substates_int.size() == 1) { // Same #substates for all models - int32 tmp_int = split_substates_int[0]; - split_substates_int.resize(num_models, tmp_int); - } - if (split_substates_int.size() != num_models) { - KALDI_ERR << "Found " << split_substates_int.size() << " splitting " - << "targets; expecting 1 or " << num_models; - } - } - - SgmmUpdateFlagsType update_flags = StringToSgmmUpdateFlags(update_flags_str); - SgmmWriteFlagsType write_flags = StringToSgmmWriteFlags(write_flags_str); - - std::vector model_out_filenames(num_models); - std::vector occs_out_filenames(num_models); - int32 phn_dim, spk_dim, num_gauss, feat_dim; - - for (int i = 0; i < num_models; ++i) { - std::string model_in_filename = po.GetArg(i*4+1), - stats_filename = po.GetArg(i*4+2); - model_out_filenames[i] = po.GetArg(i*4+3); - occs_out_filenames[i] = po.GetArg(i*4+4); - - AmSgmm *am_sgmm = new AmSgmm(); - TransitionModel *trans_model = new TransitionModel(); - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model->Read(ki.Stream(), binary); - am_sgmm->Read(ki.Stream(), binary); - } - if (i == 0) { - phn_dim = am_sgmm->PhoneSpaceDim(); - spk_dim = am_sgmm->SpkSpaceDim(); - num_gauss = am_sgmm->NumGauss(); - feat_dim = am_sgmm->FeatureDim(); - } else { - if (am_sgmm->PhoneSpaceDim() != phn_dim) { - KALDI_ERR << "File '" << model_in_filename << "': mismatched " - << "phone-space dim: expecting " << phn_dim << ", found " - << am_sgmm->PhoneSpaceDim(); - } - if (am_sgmm->SpkSpaceDim() != spk_dim) { - KALDI_ERR << "File '" << model_in_filename << "': mismatched " - << "speaker-space dim: expecting " << spk_dim << ", found " - << am_sgmm->SpkSpaceDim(); - } - if (am_sgmm->NumGauss() != num_gauss) { - KALDI_ERR << "File '" << model_in_filename << "': mismatched UBM " - << "size: expecting " << num_gauss << ", found " - << am_sgmm->NumGauss(); - } - if (am_sgmm->FeatureDim() != feat_dim) { - KALDI_ERR << "File '" << model_in_filename << "': mismatched feature " - << "dim: expecting " << feat_dim << ", found " - << am_sgmm->FeatureDim(); - } - } - sgmms_in[i] = am_sgmm; - trans_models_in[i] = trans_model; - - Vector transition_accs; - MleAmSgmmAccs *sgmm_accs = new MleAmSgmmAccs(); - { - bool binary; - Input ki(stats_filename, &binary); - transition_accs.Read(ki.Stream(), binary); - sgmm_accs->Read(ki.Stream(), binary, false); - } - // Check consistency and print some diagnostics. - sgmm_accs->Check(*am_sgmm, true); - sgmm_accs_in[i] = sgmm_accs; - - if (update_flags & kSgmmTransitions) { // Update transition model. - BaseFloat objf_impr, count; - KALDI_LOG << "Updating transitions for model: " << model_in_filename; - trans_model->MleUpdate(transition_accs, tcfg, &objf_impr, &count); - KALDI_LOG << "Transition model update: average " << (objf_impr/count) - << " log-like improvement per frame over " << (count) - << " frames"; - } - } - - { // Update all the SGMMs together. - kaldi::MleAmSgmmUpdaterMulti multi_sgmm_updater(*sgmms_in[0], sgmm_opts); - multi_sgmm_updater.Update(sgmm_accs_in, sgmms_in, update_flags); - } - - for (int i = 0; i < num_models; ++i) { - Vector state_occs; - sgmm_accs_in[i]->GetStateOccupancies(&state_occs); - - if (!split_substates.empty()) { - sgmms_in[i]->SplitSubstates(state_occs, split_substates_int[i], perturb_factor, - power, max_cond); - sgmms_in[i]->ComputeDerivedVars(); // recompute normalizers... - } - - { - kaldi::Output ko(occs_out_filenames[i], false /* no binary write */); - state_occs.Write(ko.Stream(), false /* no binary write */); - } - - if (increase_phn_dim != 0 || increase_spk_dim != 0) { - // Feature normalizing transform matrix used to initialize the new columns - // of the phonetic- or speaker-space projection matrices. - kaldi::Matrix norm_xform; - ComputeFeatureNormalizer(sgmms_in[i]->full_ubm(), &norm_xform); - if (increase_phn_dim != 0) - sgmms_in[i]->IncreasePhoneSpaceDim(increase_phn_dim, norm_xform); - if (increase_spk_dim != 0) - sgmms_in[i]->IncreaseSpkSpaceDim(increase_spk_dim, norm_xform); - } - if (remove_speaker_space) { - KALDI_LOG << "Removing speaker space (projections N_)"; - sgmms_in[i]->RemoveSpeakerSpace(); - } - - { - Output ko(model_out_filenames[i], binary_write); - trans_models_in[i]->Write(ko.Stream(), binary_write); - sgmms_in[i]->Write(ko.Stream(), binary_write, write_flags); - KALDI_LOG << "Written model to " << model_out_filenames[i]; - } - } - return 0; - } catch(const std::exception& e) { - kaldi::DeletePointers(&sgmms_in); - kaldi::DeletePointers(&sgmm_accs_in); - kaldi::DeletePointers(&trans_models_in); - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-est-spkvecs-gpost.cc b/src/sgmmbin/sgmm-est-spkvecs-gpost.cc deleted file mode 100644 index 5f4e9078673..00000000000 --- a/src/sgmmbin/sgmm-est-spkvecs-gpost.cc +++ /dev/null @@ -1,223 +0,0 @@ -// sgmmbin/sgmm-est-spkvecs-gpost.cc - -// Copyright 2009-2011 Saarland University; Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -using std::string; -#include -using std::vector; - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "sgmm/estimate-am-sgmm.h" -#include "hmm/transition-model.h" - -namespace kaldi { - -void AccumulateForUtterance(const Matrix &feats, - const SgmmGauPost &gpost, - const TransitionModel &trans_model, - const AmSgmm &am_sgmm, - const SgmmPerSpkDerivedVars &spk_vars, - MleSgmmSpeakerAccs *spk_stats) { - kaldi::SgmmPerFrameDerivedVars per_frame_vars; - - for (size_t i = 0; i < gpost.size(); i++) { - am_sgmm.ComputePerFrameVars(feats.Row(i), - gpost[i].gselect, spk_vars, 0.0, - &per_frame_vars); - - for (size_t j = 0; j < gpost[i].tids.size(); j++) { - int32 pdf_id = trans_model.TransitionIdToPdf(gpost[i].tids[j]); - spk_stats->AccumulateFromPosteriors(am_sgmm, per_frame_vars, - gpost[i].posteriors[j], pdf_id); - } - } -} - -} // end namespace kaldi - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - using namespace kaldi; - const char *usage = - "Estimate SGMM speaker vectors, either per utterance or for the " - "supplied set of speakers (with spk2utt option).\n" - "Reads Gaussian-level posteriors. Writes to a table of vectors.\n" - "Usage: sgmm-est-spkvecs-gpost [options] " - " \n"; - - ParseOptions po(usage); - string spk2utt_rspecifier, spkvecs_rspecifier; - BaseFloat min_count = 100; - BaseFloat rand_prune = 1.0e-05; - - po.Register("spk2utt", &spk2utt_rspecifier, - "File to read speaker to utterance-list map from."); - po.Register("spkvec-min-count", &min_count, - "Minimum count needed to estimate speaker vectors"); - po.Register("rand-prune", &rand_prune, "Randomized pruning parameter for posteriors (more->faster)."); - po.Register("spk-vecs", &spkvecs_rspecifier, "Speaker vectors to use during aligment (rspecifier)"); - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - string model_rxfilename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - gpost_rspecifier = po.GetArg(3), - vecs_wspecifier = po.GetArg(4); - - TransitionModel trans_model; - AmSgmm am_sgmm; - { - bool binary; - Input ki(model_rxfilename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - MleSgmmSpeakerAccs spk_stats(am_sgmm, rand_prune); - - RandomAccessSgmmGauPostReader gpost_reader(gpost_rspecifier); - - RandomAccessBaseFloatVectorReader spkvecs_reader(spkvecs_rspecifier); - - BaseFloatVectorWriter vecs_writer(vecs_wspecifier); - - double tot_impr = 0.0, tot_t = 0.0; - int32 num_done = 0, num_no_gpost = 0, num_other_error = 0; - - if (!spk2utt_rspecifier.empty()) { // per-speaker adaptation - SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - - for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { - spk_stats.Clear(); - string spk = spk2utt_reader.Key(); - const vector &uttlist = spk2utt_reader.Value(); - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(spk)) { - spk_vars.v_s = spkvecs_reader.Value(spk); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << spk; - } - } // else spk_vars is "empty" - - for (size_t i = 0; i < uttlist.size(); i++) { - std::string utt = uttlist[i]; - if (!feature_reader.HasKey(utt)) { - KALDI_WARN << "Did not find features for utterance " << utt; - continue; - } - if (!gpost_reader.HasKey(utt)) { - KALDI_WARN << "Did not find posteriors for utterance " << utt; - num_no_gpost++; - continue; - } - const Matrix &feats = feature_reader.Value(utt); - const SgmmGauPost &gpost = gpost_reader.Value(utt); - if (static_cast(gpost.size()) != feats.NumRows()) { - KALDI_WARN << "gpost vector has wrong size " << (gpost.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - - AccumulateForUtterance(feats, gpost, trans_model, am_sgmm, spk_vars, &spk_stats); - num_done++; - } // end looping over all utterances of the current speaker - - BaseFloat impr, spk_tot_t; - { // Compute the spk_vec and write it out. - Vector spk_vec(am_sgmm.SpkSpaceDim(), kSetZero); - if (spk_vars.v_s.Dim() != 0) spk_vec.CopyFromVec(spk_vars.v_s); - spk_stats.Update(min_count, &spk_vec, &impr, &spk_tot_t); - vecs_writer.Write(spk, spk_vec); - } - KALDI_LOG << "For speaker " << spk << ", auxf-impr from speaker vector is " - << (impr/spk_tot_t) << ", over " << spk_tot_t << " frames.\n"; - tot_impr += impr; - tot_t += spk_tot_t; - } // end looping over speakers - } else { // per-utterance adaptation - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !feature_reader.Done(); feature_reader.Next()) { - string utt = feature_reader.Key(); - if (!gpost_reader.HasKey(utt)) { - KALDI_WARN << "Did not find posts for utterance " - << utt; - num_no_gpost++; - continue; - } - const Matrix &feats = feature_reader.Value(); - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - } - } // else spk_vars is "empty" - const SgmmGauPost &gpost = gpost_reader.Value(utt); - - if (static_cast(gpost.size()) != feats.NumRows()) { - KALDI_WARN << "gpost has wrong size " << (gpost.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - num_done++; - - spk_stats.Clear(); - - AccumulateForUtterance(feats, gpost, trans_model, am_sgmm, spk_vars, &spk_stats); - - BaseFloat impr, utt_tot_t; - { // Compute the spk_vec and write it out. - Vector spk_vec(am_sgmm.SpkSpaceDim(), kSetZero); - if (spk_vars.v_s.Dim() != 0) spk_vec.CopyFromVec(spk_vars.v_s); - spk_stats.Update(min_count, &spk_vec, &impr, &utt_tot_t); - vecs_writer.Write(utt, spk_vec); - } - KALDI_LOG << "For utterance " << utt << ", auxf-impr from speaker vectors is " - << (impr/utt_tot_t) << ", over " << utt_tot_t << " frames."; - tot_impr += impr; - tot_t += utt_tot_t; - } - } - - KALDI_LOG << "Done " << num_done << " files, " << num_no_gpost - << " with no gposts, " << num_other_error << " with other errors."; - KALDI_LOG << "Overall auxf impr per frame is " << (tot_impr / tot_t) - << " over " << tot_t << " frames."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - diff --git a/src/sgmmbin/sgmm-est-spkvecs.cc b/src/sgmmbin/sgmm-est-spkvecs.cc deleted file mode 100644 index c71897d13c5..00000000000 --- a/src/sgmmbin/sgmm-est-spkvecs.cc +++ /dev/null @@ -1,257 +0,0 @@ -// sgmmbin/sgmm-est-spkvecs.cc - -// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) -// 2014 Guoguo Chen - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -using std::string; -#include -using std::vector; - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "sgmm/estimate-am-sgmm.h" -#include "hmm/transition-model.h" -#include "hmm/posterior.h" - -namespace kaldi { - -void AccumulateForUtterance(const Matrix &feats, - const Posterior &post, - const TransitionModel &trans_model, - const AmSgmm &am_sgmm, - const SgmmGselectConfig &gselect_opts, - const vector< vector > &gselect, - const SgmmPerSpkDerivedVars &spk_vars, - MleSgmmSpeakerAccs *spk_stats) { - kaldi::SgmmPerFrameDerivedVars per_frame_vars; - - Posterior pdf_post; - ConvertPosteriorToPdfs(trans_model, post, &pdf_post); - for (size_t i = 0; i < post.size(); i++) { - std::vector this_gselect; - if (!gselect.empty()) - this_gselect = gselect[i]; - else - am_sgmm.GaussianSelection(gselect_opts, feats.Row(i), &this_gselect); - am_sgmm.ComputePerFrameVars(feats.Row(i), this_gselect, spk_vars, 0.0, &per_frame_vars); - - for (size_t j = 0; j < pdf_post[i].size(); j++) { - int32 pdf_id = pdf_post[i][j].first; - spk_stats->Accumulate(am_sgmm, per_frame_vars, pdf_id, pdf_post[i][j].second); - } - } -} - -} // end namespace kaldi - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - using namespace kaldi; - const char *usage = - "Estimate SGMM speaker vectors, either per utterance or for the " - "supplied set of speakers (with spk2utt option).\n" - "Reads Gaussian-level posteriors. Writes to a table of vectors.\n" - "Usage: sgmm-est-spkvecs [options] " - " \n"; - - ParseOptions po(usage); - string gselect_rspecifier, spk2utt_rspecifier, spkvecs_rspecifier; - BaseFloat min_count = 100; - BaseFloat rand_prune = 1.0e-05; - SgmmGselectConfig gselect_opts; - - gselect_opts.Register(&po); - po.Register("gselect", &gselect_rspecifier, - "File to read precomputed per-frame Gaussian indices from."); - po.Register("spk2utt", &spk2utt_rspecifier, - "File to read speaker to utterance-list map from."); - po.Register("spkvec-min-count", &min_count, - "Minimum count needed to estimate speaker vectors"); - po.Register("rand-prune", &rand_prune, "Pruning threshold for posteriors"); - po.Register("spk-vecs", &spkvecs_rspecifier, "Speaker vectors to use during aligment (rspecifier)"); - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - string model_rxfilename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - post_rspecifier = po.GetArg(3), - vecs_wspecifier = po.GetArg(4); - - TransitionModel trans_model; - AmSgmm am_sgmm; - { - bool binary; - Input ki(model_rxfilename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - MleSgmmSpeakerAccs spk_stats(am_sgmm, rand_prune); - - RandomAccessPosteriorReader post_reader(post_rspecifier); - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - - RandomAccessBaseFloatVectorReader spkvecs_reader(spkvecs_rspecifier); - - BaseFloatVectorWriter vecs_writer(vecs_wspecifier); - - double tot_impr = 0.0, tot_t = 0.0; - int32 num_done = 0, num_no_post = 0, num_other_error = 0; - std::vector > empty_gselect; - - if (!spk2utt_rspecifier.empty()) { // per-speaker adaptation - SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - - for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { - spk_stats.Clear(); - string spk = spk2utt_reader.Key(); - const vector &uttlist = spk2utt_reader.Value(); - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(spk)) { - spk_vars.v_s = spkvecs_reader.Value(spk); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << spk; - } - } // else spk_vars is "empty" - - for (size_t i = 0; i < uttlist.size(); i++) { - std::string utt = uttlist[i]; - if (!feature_reader.HasKey(utt)) { - KALDI_WARN << "Did not find features for utterance " << utt; - continue; - } - if (!post_reader.HasKey(utt)) { - KALDI_WARN << "Did not find posteriors for utterance " << utt; - num_no_post++; - continue; - } - const Matrix &feats = feature_reader.Value(utt); - const Posterior &post = post_reader.Value(utt); - if (static_cast(post.size()) != feats.NumRows()) { - KALDI_WARN << "Posterior vector has wrong size " << (post.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - bool has_gselect = false; - if (gselect_reader.IsOpen()) { - has_gselect = gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == feats.NumRows(); - if (!has_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - } - const std::vector > *gselect = - (has_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - - AccumulateForUtterance(feats, post, trans_model, am_sgmm, gselect_opts, *gselect, spk_vars, &spk_stats); - num_done++; - } // end looping over all utterances of the current speaker - - BaseFloat impr, spk_tot_t; - { // Compute the spk_vec and write it out. - Vector spk_vec(am_sgmm.SpkSpaceDim(), kSetZero); - if (spk_vars.v_s.Dim() != 0) spk_vec.CopyFromVec(spk_vars.v_s); - spk_stats.Update(min_count, &spk_vec, &impr, &spk_tot_t); - vecs_writer.Write(spk, spk_vec); - } - KALDI_LOG << "For speaker " << spk << ", auxf-impr from speaker vector is " - << (impr/spk_tot_t) << ", over " << spk_tot_t << " frames."; - tot_impr += impr; - tot_t += spk_tot_t; - } // end looping over speakers - } else { // per-utterance adaptation - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !feature_reader.Done(); feature_reader.Next()) { - string utt = feature_reader.Key(); - if (!post_reader.HasKey(utt)) { - KALDI_WARN << "Did not find posts for utterance " - << utt; - num_no_post++; - continue; - } - const Matrix &feats = feature_reader.Value(); - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - } - } // else spk_vars is "empty" - const Posterior &post = post_reader.Value(utt); - - if (static_cast(post.size()) != feats.NumRows()) { - KALDI_WARN << "Posterior has wrong size " << (post.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - num_done++; - - spk_stats.Clear(); - bool has_gselect = false; - if (gselect_reader.IsOpen()) { - has_gselect = gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == feats.NumRows(); - if (!has_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - } - const std::vector > *gselect = - (has_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - - AccumulateForUtterance(feats, post, trans_model, am_sgmm, gselect_opts, *gselect, spk_vars, &spk_stats); - - BaseFloat impr, utt_tot_t; - { // Compute the spk_vec and write it out. - Vector spk_vec(am_sgmm.SpkSpaceDim(), kSetZero); - if (spk_vars.v_s.Dim() != 0) spk_vec.CopyFromVec(spk_vars.v_s); - spk_stats.Update(min_count, &spk_vec, &impr, &utt_tot_t); - vecs_writer.Write(utt, spk_vec); - } - KALDI_LOG << "For utterance " << utt << ", auxf-impr from speaker vectors is " - << (impr/utt_tot_t) << ", over " << utt_tot_t << " frames."; - tot_impr += impr; - tot_t += utt_tot_t; - } - } - - KALDI_LOG << "Overall auxf impr per frame is " - << (tot_impr / tot_t) << " over " << tot_t << " frames."; - KALDI_LOG << "Done " << num_done << " files, " << num_no_post - << " with no posts, " << num_other_error << " with other errors."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - diff --git a/src/sgmmbin/sgmm-est.cc b/src/sgmmbin/sgmm-est.cc deleted file mode 100644 index fdb0bc36125..00000000000 --- a/src/sgmmbin/sgmm-est.cc +++ /dev/null @@ -1,172 +0,0 @@ -// sgmmbin/sgmm-est.cc - -// Copyright 2009-2011 Saarland University (Author: Arnab Ghoshal) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "thread/kaldi-thread.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "sgmm/estimate-am-sgmm.h" - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - const char *usage = - "Estimate SGMM model parameters from accumulated stats.\n" - "Usage: sgmm-est [options] \n"; - - bool binary_write = true; - std::string update_flags_str = "vMNwcSt"; - std::string write_flags_str = "gsnu"; - kaldi::MleTransitionUpdateConfig tcfg; - kaldi::MleAmSgmmOptions sgmm_opts; - int32 split_substates = 0; - int32 increase_phn_dim = 0; - int32 increase_spk_dim = 0; - bool remove_speaker_space = false; - BaseFloat perturb_factor = 0.01; - BaseFloat power = 0.2; - BaseFloat max_cond = 100; - std::string occs_out_filename; - - ParseOptions po(usage); - po.Register("binary", &binary_write, "Write output in binary mode"); - po.Register("split-substates", &split_substates, "Increase number of " - "substates to this overall target."); - po.Register("increase-phn-dim", &increase_phn_dim, "Increase phone-space " - "dimension as far as allowed towards this target."); - po.Register("increase-spk-dim", &increase_spk_dim, "Increase speaker-space " - "dimension as far as allowed towards this target."); - po.Register("remove-speaker-space", &remove_speaker_space, "Remove speaker-specific " - "projections N"); - po.Register("power", &power, "Exponent for substate occupancies used while " - "splitting substates."); - po.Register("perturb-factor", &perturb_factor, "Perturbation factor for " - "state vectors while splitting substates."); - po.Register("max-cond-split", &max_cond, "Max condition number of smoothing " - "matrix used in substate splitting."); - po.Register("write-occs", &occs_out_filename, "File to write pdf " - "occupantion counts to."); - po.Register("update-flags", &update_flags_str, "Which SGMM parameters to " - "update: subset of vMNwcSt."); - po.Register("write-flags", &write_flags_str, "Which SGMM parameters to " - "write: subset of gsnu"); - po.Register("num-threads", &g_num_threads, "Number of threads to use in " - "weight update and normalizer computation"); - tcfg.Register(&po); - sgmm_opts.Register(&po); - - po.Read(argc, argv); - if (po.NumArgs() != 3) { - po.PrintUsage(); - exit(1); - } - std::string model_in_filename = po.GetArg(1), - stats_filename = po.GetArg(2), - model_out_filename = po.GetArg(3); - - kaldi::SgmmUpdateFlagsType update_flags = - StringToSgmmUpdateFlags(update_flags_str); - kaldi::SgmmWriteFlagsType write_flags = - StringToSgmmWriteFlags(write_flags_str); - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - Vector transition_accs; - MleAmSgmmAccs sgmm_accs; - { - bool binary; - Input ki(stats_filename, &binary); - transition_accs.Read(ki.Stream(), binary); - sgmm_accs.Read(ki.Stream(), binary, true); // true == add; doesn't matter here. - } - - if (update_flags & kSgmmTransitions) { // Update transition model. - BaseFloat objf_impr, count; - trans_model.MleUpdate(transition_accs, tcfg, &objf_impr, &count); - KALDI_LOG << "Transition model update: Overall " << (objf_impr/count) - << " log-like improvement per frame over " << (count) - << " frames."; - } - - sgmm_accs.Check(am_sgmm, true); // Will check consistency and print some diagnostics. - - { // Do the update. - kaldi::MleAmSgmmUpdater updater(sgmm_opts); - updater.Update(sgmm_accs, &am_sgmm, update_flags); - } - - if (split_substates != 0 || !occs_out_filename.empty()) { // get state occs - Vector pdf_occs; - sgmm_accs.GetStateOccupancies(&pdf_occs); - - if (split_substates != 0) { - am_sgmm.SplitSubstates(pdf_occs, split_substates, perturb_factor, - power, max_cond); - am_sgmm.ComputeDerivedVars(); // recompute normalizers... - } - - if (!occs_out_filename.empty()) { - bool binary_write = false; - kaldi::Output ko(occs_out_filename, binary_write); - pdf_occs.Write(ko.Stream(), binary_write); - } - } - - if (increase_phn_dim != 0 || increase_spk_dim != 0) { - // Feature normalizing transform matrix used to initialize the new columns - // of the phonetic- or speaker-space projection matrices. - kaldi::Matrix norm_xform; - ComputeFeatureNormalizer(am_sgmm.full_ubm(), &norm_xform); - if (increase_phn_dim != 0) - am_sgmm.IncreasePhoneSpaceDim(increase_phn_dim, norm_xform); - if (increase_spk_dim != 0) - am_sgmm.IncreaseSpkSpaceDim(increase_spk_dim, norm_xform); - } - if (remove_speaker_space) { - KALDI_LOG << "Removing speaker space (projections N_)"; - am_sgmm.RemoveSpeakerSpace(); - } - - { - Output ko(model_out_filename, binary_write); - trans_model.Write(ko.Stream(), binary_write); - am_sgmm.Write(ko.Stream(), binary_write, write_flags); - } - - - KALDI_LOG << "Written model to " << model_out_filename; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-gselect.cc b/src/sgmmbin/sgmm-gselect.cc deleted file mode 100644 index 7234406f9ce..00000000000 --- a/src/sgmmbin/sgmm-gselect.cc +++ /dev/null @@ -1,125 +0,0 @@ -// sgmmbin/sgmm-gselect.cc - -// Copyright 2009-2011 Saarland University; Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - const char *usage = - "Precompute Gaussian indices for SGMM training " - "Usage: sgmm-gselect [options] \n" - "e.g.: sgmm-gselect 1.sgmm \"ark:feature-command |\" ark:1.gs\n" - "Note: you can do the same thing by combining the programs sgmm-write-ubm, fgmm-global-to-gmm,\n" - "gmm-gselect and fgmm-gselect\n"; - - ParseOptions po(usage); - kaldi::SgmmGselectConfig sgmm_opts; - std::string preselect_rspecifier; - std::string likelihood_wspecifier; - po.Register("preselect", &preselect_rspecifier, "Rspecifier for sets of Gaussians to " - "limit gselect to (e.g. for gender dependent systems)"); - po.Register("write-likes", &likelihood_wspecifier, "Wspecifier for likelihoods per " - "utterance"); - sgmm_opts.Register(&po); - po.Read(argc, argv); - - if (po.NumArgs() != 3) { - po.PrintUsage(); - exit(1); - } - - std::string model_filename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - gselect_wspecifier = po.GetArg(3); - - using namespace kaldi; - typedef kaldi::int32 int32; - - AmSgmm am_sgmm; - { - bool binary; - Input ki(model_filename, &binary); - TransitionModel trans_model; - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - double tot_like = 0.0; - kaldi::int64 tot_t = 0; - - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - Int32VectorVectorWriter gselect_writer(gselect_wspecifier); - BaseFloatWriter likelihood_writer(likelihood_wspecifier); - RandomAccessInt32VectorReader preselect_reader(preselect_rspecifier); - - int32 num_done = 0, num_err = 0; - for (; !feature_reader.Done(); feature_reader.Next()) { - int32 tot_t_this_file = 0; double tot_like_this_file = 0; - std::string utt = feature_reader.Key(); - const Matrix &mat = feature_reader.Value(); - std::vector > gselect_vec(mat.NumRows()); - tot_t_this_file += mat.NumRows(); - if(preselect_rspecifier != "") { // e.g. gender dependent. - if (!preselect_reader.HasKey(utt)) { - KALDI_WARN << "No preselect information for utterance " << utt; - num_err++; - continue; - } - const std::vector &preselect = preselect_reader.Value(utt); - KALDI_ASSERT(!preselect.empty()); - for (int32 i = 0; i < mat.NumRows(); i++) - tot_like_this_file += - am_sgmm.GaussianSelectionPreselect(sgmm_opts, mat.Row(i), - preselect, &(gselect_vec[i])); - } else { - for (int32 i = 0; i < mat.NumRows(); i++) - tot_like_this_file += am_sgmm.GaussianSelection(sgmm_opts, mat.Row(i), &(gselect_vec[i])); - } - gselect_writer.Write(utt, gselect_vec); - if (num_done % 10 == 0) - KALDI_LOG << "For " << num_done << "'th file, average UBM likelihood over " - << tot_t_this_file << " frames is " - << (tot_like_this_file/tot_t_this_file); - tot_t += tot_t_this_file; - tot_like += tot_like_this_file; - - if(likelihood_wspecifier != "") - likelihood_writer.Write(utt, tot_like_this_file); - num_done++; - } - - KALDI_LOG << "Done " << num_done << " files, " << num_err - << " with errors, average UBM log-likelihood is " - << (tot_like/tot_t) << " over " << tot_t << " frames."; - - - if (num_done != 0) return 0; - else return 1; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-info.cc b/src/sgmmbin/sgmm-info.cc deleted file mode 100644 index c5e5dc70686..00000000000 --- a/src/sgmmbin/sgmm-info.cc +++ /dev/null @@ -1,110 +0,0 @@ -// sgmmbin/sgmm-info.cc - -// Copyright 2012 Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "base/kaldi-common.h" -#include "util/common-utils.h" - -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - const char *usage = - "Print various information about an SGMM.\n" - "Usage: sgmm-info [options] [model-in2 ... ]\n"; - - bool sgmm_detailed = false; - bool trans_detailed = false; - - ParseOptions po(usage); - po.Register("sgmm-detailed", &sgmm_detailed, - "Print detailed information about substates."); - po.Register("trans-detailed", &trans_detailed, - "Print detailed information about transition model."); - - po.Read(argc, argv); - if (po.NumArgs() < 1) { - po.PrintUsage(); - exit(1); - } - - for (int i = 1, max = po.NumArgs(); i <= max; ++i) { - std::string model_in_filename = po.GetArg(i); - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - { - using namespace std; - cout.setf(ios::left); - cout << "\nModel file: " << model_in_filename << endl; - cout << " SGMM information:\n" - << setw(40) << " # of HMM states" << am_sgmm.NumPdfs() << endl - << setw(40) << " # of Gaussians per state" << am_sgmm.NumGauss() << endl - << setw(40) << " Dimension of phone vector space" - << am_sgmm.PhoneSpaceDim() << endl - << setw(40) << " Dimension of speaker vector space" - << am_sgmm.SpkSpaceDim() << endl - << setw(40) << " Dimension of feature vectors" - << am_sgmm.FeatureDim() << endl; - int32 total_substates = 0; - for (int32 j = 0; j < am_sgmm.NumPdfs(); j++) { - total_substates += am_sgmm.NumSubstates(j); - if (sgmm_detailed) { - cout << " # of substates for state " << setw(13) << j - << am_sgmm.NumSubstates(j) << endl; - } - } - cout << setw(40) << " Total # of substates " << total_substates << endl; - - cout << "\nTransition model information:\n" - << setw(40) << " # of HMM states" << trans_model.NumPdfs() << endl - << setw(40) << " # of transition states" - << trans_model.NumTransitionStates() << endl; - int32 total_indices = 0; - for (int32 s = 0; s < trans_model.NumTransitionStates(); s++) { - total_indices += trans_model.NumTransitionIndices(s); - if (trans_detailed) { - cout << " # of transition ids for state " << setw(8) << s - << trans_model.NumTransitionIndices(s) << endl; - } - } - cout << setw(40) << " Total # of transition ids " << total_indices - << endl; - } - } - - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-init-from-tree-stats.cc b/src/sgmmbin/sgmm-init-from-tree-stats.cc deleted file mode 100644 index 0802507c126..00000000000 --- a/src/sgmmbin/sgmm-init-from-tree-stats.cc +++ /dev/null @@ -1,147 +0,0 @@ -// sgmmbin/sgmm-init-from-tree-stats.cc - -// Copyright 2012 Arnab Ghoshal Johns Hopkins University (Author: Daniel Povey) -// Copyright 2009-2011 Saarland University - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "util/common-utils.h" -#include "gmm/am-diag-gmm.h" -#include "sgmm/am-sgmm.h" -#include "sgmm/sgmm-clusterable.h" -#include "sgmm/estimate-am-sgmm.h" -#include "hmm/transition-model.h" -#include "tree/context-dep.h" -#include "tree/build-tree-utils.h" - - - -namespace kaldi { -void InitAndOutputSgmm(const HmmTopology &topo, - const AmSgmm &am_sgmm, - const ContextDependency &ctx_dep, - const std::vector > &H, - const BuildTreeStatsType &stats, - const std::string &sgmm_wxfilename, - bool binary) { - int32 num_pdfs = ctx_dep.NumPdfs(); - AmSgmm am_sgmm_out; - am_sgmm_out.CopyGlobalsInitVecs(am_sgmm, am_sgmm.PhoneSpaceDim(), - am_sgmm.SpkSpaceDim(), num_pdfs); - MleAmSgmmOptions opts; // Use default options; we can change this later - // if we need to use any non-default options. - MleAmSgmmUpdater updater(opts); - - std::vector split_stats; - SplitStatsByMap(stats, ctx_dep.ToPdfMap(), &split_stats); - // Make sure each leaf has stats. - for (size_t i = 0; i < split_stats.size(); i++) - KALDI_ASSERT(! split_stats[i].empty() && "Tree has leaves with no stats." - " Modify your roots file as necessary to fix this."); - std::vector summed_stats; - SumStatsVec(split_stats, &summed_stats); - - std::vector &summed_sgmm_stats = - *(reinterpret_cast*> (&summed_stats)); - - for (int32 iter = 0; iter < 5; iter++) { // Update for - // several iterations; we're starting from zero so we won't - // converge exactly on the first iteration. - updater.UpdatePhoneVectorsCheckedFromClusterable(summed_sgmm_stats, - H, - &am_sgmm_out); - } - DeletePointers(&summed_stats); - - TransitionModel trans_model_out(ctx_dep, topo); - { - Output ko(sgmm_wxfilename, binary); - am_sgmm_out.ComputeNormalizers(); - trans_model_out.Write(ko.Stream(), binary); - am_sgmm_out.Write(ko.Stream(), binary, kSgmmWriteAll); - } -} - -} - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - - const char *usage = - "Initialize an SGMM from a previously built SGMM, a tree, \n" - "and SGMM-type tree stats\n" - "Usage: sgmm-init-from-tree-stats [options] \n"; - - bool binary = true; - kaldi::ParseOptions po(usage); - po.Register("binary", &binary, "Write output in binary mode"); - - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - std::string sgmm_in_filename = po.GetArg(1), - tree_in_filename = po.GetArg(2), - tree_stats_filename = po.GetArg(3), - sgmm_out_filename = po.GetArg(4); - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(sgmm_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - const HmmTopology &topo = trans_model.GetTopo(); - std::vector > H; - am_sgmm.ComputeH(&H); - - ContextDependency ctx_dep; - { - bool binary_in; - Input ki(tree_in_filename.c_str(), &binary_in); - ctx_dep.Read(ki.Stream(), binary_in); - } - - BuildTreeStatsType stats; - { - bool binary_in; - SgmmClusterable sc(am_sgmm, H); // dummy stats needed to provide - // type info, and access to am_sgmm and H. - Input ki(tree_stats_filename, &binary_in); - ReadBuildTreeStats(ki.Stream(), binary_in, sc, &stats); - } - KALDI_LOG << "Number of separate statistics is " << stats.size(); - - InitAndOutputSgmm(topo, am_sgmm, ctx_dep, H, stats, - sgmm_out_filename, binary); - - KALDI_LOG << "Written model to " << sgmm_out_filename; - } catch(const std::exception &e) { - std::cerr << e.what() << '\n'; - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-init.cc b/src/sgmmbin/sgmm-init.cc deleted file mode 100644 index f90ca3e5650..00000000000 --- a/src/sgmmbin/sgmm-init.cc +++ /dev/null @@ -1,111 +0,0 @@ -// sgmmbin/sgmm-init.cc - -// Copyright 2012 Arnab Ghoshal -// Copyright 2009-2011 Saarland University (Author: Arnab Ghoshal) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "util/common-utils.h" -#include "gmm/am-diag-gmm.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "tree/context-dep.h" - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - - const char *usage = - "Initialize an SGMM from a trained full-covariance UBM and a specified" - " model topology.\n" - "Usage: sgmm-init [options] \n" - "The argument can be a UBM (the default case) or another\n" - "SGMM (if the --init-from-sgmm flag is used).\n"; - - bool binary = true, init_from_sgmm = false; - int32 phn_space_dim = 0, spk_space_dim = 0; - kaldi::ParseOptions po(usage); - po.Register("binary", &binary, "Write output in binary mode"); - po.Register("phn-space-dim", &phn_space_dim, "Phonetic space dimension."); - po.Register("spk-space-dim", &spk_space_dim, "Speaker space dimension."); - po.Register("init-from-sgmm", &init_from_sgmm, - "Initialize from another SGMM (instead of a UBM)."); - - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - std::string topo_in_filename = po.GetArg(1), - tree_in_filename = po.GetArg(2), - init_model_filename = po.GetArg(3), - sgmm_out_filename = po.GetArg(4); - - ContextDependency ctx_dep; - { - bool binary_in; - Input ki(tree_in_filename.c_str(), &binary_in); - ctx_dep.Read(ki.Stream(), binary_in); - } - - - HmmTopology topo; - ReadKaldiObject(topo_in_filename, &topo); - - TransitionModel trans_model(ctx_dep, topo); - - kaldi::AmSgmm sgmm; - if (init_from_sgmm) { - kaldi::AmSgmm init_sgmm; - { - bool binary_read; - TransitionModel tmp_trans; - kaldi::Input ki(init_model_filename, &binary_read); - tmp_trans.Read(ki.Stream(), binary_read); - init_sgmm.Read(ki.Stream(), binary_read); - } - sgmm.CopyGlobalsInitVecs(init_sgmm, phn_space_dim, spk_space_dim, - trans_model.NumPdfs()); - } else { - kaldi::FullGmm ubm; - { - bool binary_read; - kaldi::Input ki(init_model_filename, &binary_read); - ubm.Read(ki.Stream(), binary_read); - } - sgmm.InitializeFromFullGmm(ubm, trans_model.NumPdfs(), phn_space_dim, - spk_space_dim); - } - sgmm.ComputeNormalizers(); - - { - kaldi::Output ko(sgmm_out_filename, binary); - trans_model.Write(ko.Stream(), binary); - sgmm.Write(ko.Stream(), binary, kaldi::kSgmmWriteAll); - } - - KALDI_LOG << "Written model to " << sgmm_out_filename; - } catch(const std::exception &e) { - std::cerr << e.what() << '\n'; - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-latgen-faster.cc b/src/sgmmbin/sgmm-latgen-faster.cc deleted file mode 100644 index 3162b1f72f9..00000000000 --- a/src/sgmmbin/sgmm-latgen-faster.cc +++ /dev/null @@ -1,271 +0,0 @@ -// sgmmbin/sgmm-latgen-faster.cc - -// Copyright 2009-2011 Saarland University; Microsoft Corporation; -// Johns Hopkins University (author: Daniel Povey) -// 2014 Guoguo Chen - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -using std::string; - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "fstext/fstext-lib.h" -#include "decoder/decoder-wrappers.h" -#include "sgmm/decodable-am-sgmm.h" -#include "base/timer.h" - -namespace kaldi { - -// the reference arguments at the beginning are not const as the style guide -// requires, but are best viewed as inputs. -bool ProcessUtterance(LatticeFasterDecoder &decoder, - const AmSgmm &am_sgmm, - const TransitionModel &trans_model, - const SgmmGselectConfig &sgmm_opts, - double log_prune, - double acoustic_scale, - const Matrix &features, - RandomAccessInt32VectorVectorReader &gselect_reader, - RandomAccessBaseFloatVectorReaderMapped &spkvecs_reader, - const fst::SymbolTable *word_syms, - const std::string &utt, - bool determinize, - bool allow_partial, - Int32VectorWriter *alignments_writer, - Int32VectorWriter *words_writer, - CompactLatticeWriter *compact_lattice_writer, - LatticeWriter *lattice_writer, - double *like_ptr) { // puts utterance's like in like_ptr on success. - using fst::VectorFst; - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt << ", not decoding this utterance"; - return false; // We could use zero, but probably the user would want to know about this - // (this would normally be a script error or some kind of failure). - } - } - bool has_gselect = false; - if (gselect_reader.IsOpen()) { - has_gselect = gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == features.NumRows(); - if (!has_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - } - std::vector > empty_gselect; - const std::vector > *gselect = - (has_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - DecodableAmSgmmScaled sgmm_decodable(sgmm_opts, am_sgmm, spk_vars, - trans_model, features, *gselect, - log_prune, acoustic_scale); - - return DecodeUtteranceLatticeFaster( - decoder, sgmm_decodable, trans_model, word_syms, utt, acoustic_scale, - determinize, allow_partial, alignments_writer, words_writer, - compact_lattice_writer, lattice_writer, like_ptr); -} - -} // end namespace kaldi - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - using fst::SymbolTable; - using fst::VectorFst; - using fst::StdArc; - - const char *usage = - "Decode features using SGMM-based model.\n" - "Usage: sgmm-latgen-faster [options] (|) " - " [ [] ]\n"; - ParseOptions po(usage); - BaseFloat acoustic_scale = 0.1; - bool allow_partial = false; - BaseFloat log_prune = 5.0; - string word_syms_filename, gselect_rspecifier, spkvecs_rspecifier, - utt2spk_rspecifier; - - LatticeFasterDecoderConfig decoder_opts; - SgmmGselectConfig sgmm_opts; - decoder_opts.Register(&po); - sgmm_opts.Register(&po); - - po.Register("acoustic-scale", &acoustic_scale, - "Scaling factor for acoustic likelihoods"); - po.Register("log-prune", &log_prune, - "Pruning beam used to reduce number of exp() evaluations."); - po.Register("word-symbol-table", &word_syms_filename, - "Symbol table for words [for debug output]"); - po.Register("allow-partial", &allow_partial, - "Produce output even when final state was not reached"); - po.Register("gselect", &gselect_rspecifier, - "rspecifier for precomputed per-frame Gaussian indices."); - po.Register("spk-vecs", &spkvecs_rspecifier, - "rspecifier for speaker vectors"); - po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); - po.Read(argc, argv); - - if (po.NumArgs() < 4 || po.NumArgs() > 6) { - po.PrintUsage(); - exit(1); - } - - std::string model_in_filename = po.GetArg(1), - fst_in_str = po.GetArg(2), - feature_rspecifier = po.GetArg(3), - lattice_wspecifier = po.GetArg(4), - words_wspecifier = po.GetOptArg(5), - alignment_wspecifier = po.GetOptArg(6); - - TransitionModel trans_model; - kaldi::AmSgmm am_sgmm; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - CompactLatticeWriter compact_lattice_writer; - LatticeWriter lattice_writer; - bool determinize = decoder_opts.determinize_lattice; - if (! (determinize ? compact_lattice_writer.Open(lattice_wspecifier) - : lattice_writer.Open(lattice_wspecifier))) - KALDI_ERR << "Could not open table for writing lattices: " - << lattice_wspecifier; - - Int32VectorWriter words_writer(words_wspecifier); - - Int32VectorWriter alignment_writer(alignment_wspecifier); - - fst::SymbolTable *word_syms = NULL; - if (word_syms_filename != "") - if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename))) - KALDI_ERR << "Could not read symbol table from file " - << word_syms_filename; - - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier, - utt2spk_rspecifier); - - - BaseFloat tot_like = 0.0; - kaldi::int64 frame_count = 0; - int num_success = 0, num_fail = 0; - - Timer timer; - - if (ClassifyRspecifier(fst_in_str, NULL, NULL) == kNoRspecifier) { // a single FST. - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - // It's important that we initialize decode_fst after feature_reader, as it - // can prevent crashes on systems installed without enough virtual memory. - // It has to do with what happens on UNIX systems if you call fork() on a - // large process: the page-table entries are duplicated, which requires a - // lot of virtual memory. - VectorFst *decode_fst = fst::ReadFstKaldi(fst_in_str); - timer.Reset(); // exclude graph loading time. - - { - LatticeFasterDecoder decoder(*decode_fst, decoder_opts); - - const std::vector > empty_gselect; - - for (; !feature_reader.Done(); feature_reader.Next()) { - string utt = feature_reader.Key(); - const Matrix &features(feature_reader.Value()); - if (features.NumRows() == 0) { - KALDI_WARN << "Zero-length utterance: " << utt; - num_fail++; - continue; - } - double like; - if (ProcessUtterance(decoder, am_sgmm, trans_model, sgmm_opts, log_prune, acoustic_scale, - features, gselect_reader, spkvecs_reader, word_syms, - utt, determinize, allow_partial, - &alignment_writer, &words_writer, &compact_lattice_writer, - &lattice_writer, &like)) { - tot_like += like; - frame_count += features.NumRows(); - KALDI_LOG << "Log-like per frame for utterance " << utt << " is " - << (like / features.NumRows()) << " over " - << features.NumRows() << " frames."; - num_success++; - } else { num_fail++; } - } - } - delete decode_fst; // only safe to do this after decoder goes out of scope. - } else { // We have different FSTs for different utterances. - SequentialTableReader fst_reader(fst_in_str); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !fst_reader.Done(); fst_reader.Next()) { - std::string utt = fst_reader.Key(); - if (!feature_reader.HasKey(utt)) { - KALDI_WARN << "Not decoding utterance " << utt - << " because no features available."; - num_fail++; - continue; - } - const Matrix &features = feature_reader.Value(utt); - if (features.NumRows() == 0) { - KALDI_WARN << "Zero-length utterance: " << utt; - num_fail++; - continue; - } - LatticeFasterDecoder decoder(fst_reader.Value(), decoder_opts); - double like; - if (ProcessUtterance(decoder, am_sgmm, trans_model, sgmm_opts, log_prune, acoustic_scale, - features, gselect_reader, spkvecs_reader, word_syms, - utt, determinize, allow_partial, - &alignment_writer, &words_writer, &compact_lattice_writer, - &lattice_writer, &like)) { - tot_like += like; - frame_count += features.NumRows(); - KALDI_LOG << "Log-like per frame for utterance " << utt << " is " - << (like / features.NumRows()) << " over " - << features.NumRows() << " frames."; - num_success++; - } else { num_fail++; } - } - } - double elapsed = timer.Elapsed(); - KALDI_LOG << "Time taken [excluding initialization] "<< elapsed - << "s: real-time factor assuming 100 frames/sec is " - << (elapsed*100.0/frame_count); - KALDI_LOG << "Done " << num_success << " utterances, failed for " - << num_fail; - KALDI_LOG << "Overall log-likelihood per frame = " << (tot_like/frame_count) - << " over " << frame_count << " frames."; - - delete word_syms; - return (num_success != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-latgen-simple.cc b/src/sgmmbin/sgmm-latgen-simple.cc deleted file mode 100644 index 69e272ba9c6..00000000000 --- a/src/sgmmbin/sgmm-latgen-simple.cc +++ /dev/null @@ -1,232 +0,0 @@ -// sgmmbin/sgmm-latgen-simple.cc - -// Copyright 2009-2011 Saarland University; Microsoft Corporation -// 2013 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -using std::string; - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "fstext/fstext-lib.h" -#include "decoder/decoder-wrappers.h" -#include "sgmm/decodable-am-sgmm.h" -#include "base/timer.h" - -namespace kaldi { - -// the reference arguments at the beginning are not const as the style guide -// requires, but are best viewed as inputs. -bool ProcessUtterance(LatticeSimpleDecoder &decoder, - const AmSgmm &am_sgmm, - const TransitionModel &trans_model, - const SgmmGselectConfig &sgmm_opts, - double log_prune, - double acoustic_scale, - const Matrix &features, - RandomAccessInt32VectorVectorReader &gselect_reader, - RandomAccessBaseFloatVectorReaderMapped &spkvecs_reader, - const fst::SymbolTable *word_syms, - const std::string &utt, - bool determinize, - bool allow_partial, - Int32VectorWriter *alignments_writer, - Int32VectorWriter *words_writer, - CompactLatticeWriter *compact_lattice_writer, - LatticeWriter *lattice_writer, - double *like_ptr) { // puts utterance's like in like_ptr on success. - using fst::VectorFst; - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt << ", not decoding this utterance"; - return false; // We could use zero, but probably the user would want to know about this - // (this would normally be a script error or some kind of failure). - } - } - bool has_gselect = false; - if (gselect_reader.IsOpen()) { - has_gselect = gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == features.NumRows(); - if (!has_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - } - std::vector > empty_gselect; - const std::vector > *gselect = - (has_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - DecodableAmSgmmScaled sgmm_decodable(sgmm_opts, am_sgmm, spk_vars, - trans_model, features, *gselect, - log_prune, acoustic_scale); - - return DecodeUtteranceLatticeSimple( - decoder, sgmm_decodable, trans_model, word_syms, utt, acoustic_scale, - determinize, allow_partial, alignments_writer, words_writer, - compact_lattice_writer, lattice_writer, like_ptr); -} - -} // end namespace kaldi - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - using fst::SymbolTable; - using fst::VectorFst; - using fst::StdArc; - - const char *usage = - "Decode features using SGMM-based model.\n" - "Usage: sgmm-latgen-simple [options] " - " [ [] ]\n"; - ParseOptions po(usage); - BaseFloat acoustic_scale = 0.1; - bool allow_partial = false; - BaseFloat log_prune = 5.0; - string word_syms_filename, gselect_rspecifier, spkvecs_rspecifier, - utt2spk_rspecifier; - - LatticeSimpleDecoderConfig decoder_opts; - SgmmGselectConfig sgmm_opts; - decoder_opts.Register(&po); - sgmm_opts.Register(&po); - - po.Register("acoustic-scale", &acoustic_scale, - "Scaling factor for acoustic likelihoods"); - po.Register("log-prune", &log_prune, - "Pruning beam used to reduce number of exp() evaluations."); - po.Register("word-symbol-table", &word_syms_filename, - "Symbol table for words [for debug output]"); - po.Register("allow-partial", &allow_partial, - "Produce output even when final state was not reached"); - po.Register("gselect", &gselect_rspecifier, - "rspecifier for precomputed per-frame Gaussian indices."); - po.Register("spk-vecs", &spkvecs_rspecifier, - "rspecifier for speaker vectors"); - po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); - po.Read(argc, argv); - - if (po.NumArgs() < 4 || po.NumArgs() > 6) { - po.PrintUsage(); - exit(1); - } - - std::string model_in_filename = po.GetArg(1), - fst_in_filename = po.GetArg(2), - feature_rspecifier = po.GetArg(3), - lattice_wspecifier = po.GetArg(4), - words_wspecifier = po.GetOptArg(5), - alignment_wspecifier = po.GetOptArg(6); - - TransitionModel trans_model; - kaldi::AmSgmm am_sgmm; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - CompactLatticeWriter compact_lattice_writer; - LatticeWriter lattice_writer; - bool determinize = decoder_opts.determinize_lattice; - if (! (determinize ? compact_lattice_writer.Open(lattice_wspecifier) - : lattice_writer.Open(lattice_wspecifier))) - KALDI_ERR << "Could not open table for writing lattices: " - << lattice_wspecifier; - - Int32VectorWriter words_writer(words_wspecifier); - - Int32VectorWriter alignment_writer(alignment_wspecifier); - - fst::SymbolTable *word_syms = NULL; - if (word_syms_filename != "") - if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename))) - KALDI_ERR << "Could not read symbol table from file " - << word_syms_filename; - - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - - RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier, - utt2spk_rspecifier); - - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - - // It's important that we initialize decode_fst after feature_reader, as it - // can prevent crashes on systems installed without enough virtual memory. - // It has to do with what happens on UNIX systems if you call fork() on a - // large process: the page-table entries are duplicated, which requires a - // lot of virtual memory. - VectorFst *decode_fst = fst::ReadFstKaldi(fst_in_filename); - - BaseFloat tot_like = 0.0; - kaldi::int64 frame_count = 0; - int num_success = 0, num_fail = 0; - LatticeSimpleDecoder decoder(*decode_fst, decoder_opts); - - Timer timer; - - for (; !feature_reader.Done(); feature_reader.Next()) { - string utt = feature_reader.Key(); - Matrix features(feature_reader.Value()); - feature_reader.FreeCurrent(); - if (features.NumRows() == 0) { - KALDI_WARN << "Zero-length utterance: " << utt; - num_fail++; - continue; - } - double like; - if (ProcessUtterance(decoder, am_sgmm, trans_model, sgmm_opts, log_prune, - acoustic_scale, features, gselect_reader, - spkvecs_reader, word_syms, utt, determinize, - allow_partial, &alignment_writer, &words_writer, - &compact_lattice_writer, &lattice_writer, &like)) { - tot_like += like; - frame_count += features.NumRows(); - KALDI_LOG << "Log-like per frame for utterance " << utt << " is " - << (like / features.NumRows()) << " over " - << features.NumRows() << " frames."; - num_success++; - } else num_fail++; - } - double elapsed = timer.Elapsed(); - KALDI_LOG << "Time taken [excluding initialization] "<< elapsed - << "s: real-time factor assuming 100 frames/sec is " - << (elapsed*100.0/frame_count); - KALDI_LOG << "Done " << num_success << " utterances, failed for " - << num_fail; - KALDI_LOG << "Overall log-likelihood per frame = " << (tot_like/frame_count) - << " over " << frame_count << " frames."; - - delete word_syms; - delete decode_fst; - return (num_success != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-mixup.cc b/src/sgmmbin/sgmm-mixup.cc deleted file mode 100644 index 36731587317..00000000000 --- a/src/sgmmbin/sgmm-mixup.cc +++ /dev/null @@ -1,145 +0,0 @@ -// sgmmbin/sgmm-mixup.cc - -// Copyright 2009-2011 Saarland University -// Author: Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" - -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "sgmm/estimate-am-sgmm.h" - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - const char *usage = - "Increase number of sub-states or dimensions in SGMM\n" - "Usage: sgmm-mixup [options] \n" - "E.g. of mixing up:\n" - " sgmm-mixup --read-occs=1.occs --num-substates=10000 1.mdl 2.mdl\n" - "E.g. of increasing phonetic dim:\n" - " sgmm-mixup --increase-phn-dim=50 1.mdl 2.mdl\n" - "E.g. of increasing speaker dim:\n" - " sgmm-mixup --increase-spk-dim=50 1.mdl 2.mdl\n" - "E.g. of removing speaker space:\n" - " sgmm-mixup --remove-speaker-space 1.mdl 2.mdl\n" - "These modes may be combined.\n"; - - bool binary_write = true; - std::string write_flags_str = "gsnu"; - int32 split_substates = 0; - int32 increase_phn_dim = 0; - int32 increase_spk_dim = 0; - bool remove_speaker_space = false; - BaseFloat perturb_factor = 0.01; - BaseFloat power = 0.2; - BaseFloat max_cond = 100; - std::string occs_in_filename; - - ParseOptions po(usage); - po.Register("binary", &binary_write, "Write output in binary mode"); - po.Register("split-substates", &split_substates, "Increase number of " - "substates to this overall target."); - po.Register("increase-phn-dim", &increase_phn_dim, "Increase phone-space " - "dimension as far as allowed towards this target."); - po.Register("increase-spk-dim", &increase_spk_dim, "Increase speaker-space " - "dimension as far as allowed towards this target."); - po.Register("remove-speaker-space", &remove_speaker_space, "Remove speaker-specific " - "projections N"); - po.Register("power", &power, "Exponent for substate occupancies used while " - "splitting substates."); - po.Register("perturb-factor", &perturb_factor, "Perturbation factor for " - "state vectors while splitting substates."); - po.Register("max-cond-split", &max_cond, "Max condition number of smoothing " - "matrix used in substate splitting."); - po.Register("write-flags", &write_flags_str, "Which SGMM parameters to " - "write: subset of gsnu"); - po.Register("read-occs", &occs_in_filename, "Read occupancies from this file " - "(required for mixing up)"); - - po.Read(argc, argv); - if (po.NumArgs() != 2) { - po.PrintUsage(); - exit(1); - } - std::string model_in_filename = po.GetArg(1), - model_out_filename = po.GetArg(2); - - kaldi::SgmmWriteFlagsType write_flags = - StringToSgmmWriteFlags(write_flags_str); - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - if (split_substates != 0) { - if (occs_in_filename.empty()) - KALDI_ERR << "The --split-substates option requires the --read-occs option"; - - Vector state_occs; - { - bool binary_in; - kaldi::Input ki(occs_in_filename, &binary_in); - state_occs.Read(ki.Stream(), binary_in); - } - - am_sgmm.SplitSubstates(state_occs, split_substates, perturb_factor, - power, max_cond); - am_sgmm.ComputeDerivedVars(); // recompute normalizers... - } - - if (increase_phn_dim != 0 || increase_spk_dim != 0) { - // Feature normalizing transform matrix used to initialize the new columns - // of the phonetic- or speaker-space projection matrices. - kaldi::Matrix norm_xform; - ComputeFeatureNormalizer(am_sgmm.full_ubm(), &norm_xform); - if (increase_phn_dim != 0) - am_sgmm.IncreasePhoneSpaceDim(increase_phn_dim, norm_xform); - if (increase_spk_dim != 0) - am_sgmm.IncreaseSpkSpaceDim(increase_spk_dim, norm_xform); - } - - if (remove_speaker_space) { - KALDI_LOG << "Removing speaker space (projections N_)"; - am_sgmm.RemoveSpeakerSpace(); - } - - { - Output ko(model_out_filename, binary_write); - trans_model.Write(ko.Stream(), binary_write); - am_sgmm.Write(ko.Stream(), binary_write, write_flags); - } - - KALDI_LOG << "Written model to " << model_out_filename; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-normalize.cc b/src/sgmmbin/sgmm-normalize.cc deleted file mode 100644 index c41141207dc..00000000000 --- a/src/sgmmbin/sgmm-normalize.cc +++ /dev/null @@ -1,85 +0,0 @@ -// sgmmbin/sgmm-normalize.cc - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" - -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - const char *usage = - "Renormalize SGMM so that within certain subsets of UBM Gaussians (typically \n" - "corresponding to gender), probabilities sum to one; write it out, including\n" - "normalizers." - "Note: gaussians-rspecifier will normally be \"ark:foo\" where foo looks like\n" - " m 0 1 2 3 4 5\n" - " f 6 7 8 9 10\n" - "Usage: sgmm-normalize [options] \n"; - - bool binary_write = true; - - ParseOptions po(usage); - po.Register("binary", &binary_write, "Write output in binary mode"); - - po.Read(argc, argv); - if (po.NumArgs() != 3) { - po.PrintUsage(); - exit(1); - } - std::string model_in_filename = po.GetArg(1), - gaussians_rspecifier = po.GetArg(2), - model_out_filename = po.GetArg(3); - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - std::vector > norm_sets; - SequentialInt32VectorReader vec_reader(gaussians_rspecifier); - for (;!vec_reader.Done(); vec_reader.Next()) - norm_sets.push_back(vec_reader.Value()); - - am_sgmm.ComputeNormalizersNormalized(norm_sets); - - { - Output ko(model_out_filename, binary_write); - trans_model.Write(ko.Stream(), binary_write); - am_sgmm.Write(ko.Stream(), binary_write, kSgmmWriteAll); - } - - - KALDI_LOG << "Written model to " << model_out_filename; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-post-to-gpost.cc b/src/sgmmbin/sgmm-post-to-gpost.cc deleted file mode 100644 index 9395b04fe74..00000000000 --- a/src/sgmmbin/sgmm-post-to-gpost.cc +++ /dev/null @@ -1,190 +0,0 @@ -// sgmmbin/sgmm-post-to-gpost.cc - -// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) -// 2014 Guoguo Chen - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "sgmm/estimate-am-sgmm.h" -#include "hmm/posterior.h" - - -int main(int argc, char *argv[]) { - using namespace kaldi; - try { - const char *usage = - "Convert posteriors to Gaussian-level posteriors for SGMM training.\n" - "Usage: sgmm-post-to-gpost [options] " - " \n" - "e.g.: sgmm-post-to-gpost 1.mdl 1.ali scp:train.scp 'ark:ali-to-post ark:1.ali ark:-|' ark:-"; - - ParseOptions po(usage); - std::string gselect_rspecifier, spkvecs_rspecifier, utt2spk_rspecifier; - SgmmGselectConfig sgmm_opts; - po.Register("gselect", &gselect_rspecifier, "Precomputed Gaussian indices (rspecifier)"); - po.Register("spk-vecs", &spkvecs_rspecifier, "Speaker vectors (rspecifier)"); - po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); - sgmm_opts.Register(&po); - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - std::string model_filename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - posteriors_rspecifier = po.GetArg(3), - gpost_wspecifier = po.GetArg(4); - - using namespace kaldi; - typedef kaldi::int32 int32; - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - double tot_like = 0.0; - kaldi::int64 tot_t = 0; - - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier); - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier, - utt2spk_rspecifier); - - SgmmPerFrameDerivedVars per_frame_vars; - - SgmmGauPostWriter gpost_writer(gpost_wspecifier); - - int32 num_done = 0, num_no_posterior = 0, num_other_error = 0; - for (; !feature_reader.Done(); feature_reader.Next()) { - std::string utt = feature_reader.Key(); - if (!posteriors_reader.HasKey(utt)) { - num_no_posterior++; - } else { - const Matrix &mat = feature_reader.Value(); - Posterior posterior = posteriors_reader.Value(utt); - - bool have_gselect = !gselect_rspecifier.empty() - && gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == mat.NumRows(); - if (!gselect_rspecifier.empty() && !have_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - std::vector > empty_gselect; - const std::vector > *gselect = - (have_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - - if (posterior.size() != mat.NumRows()) { - KALDI_WARN << "Alignments has wrong size "<< (posterior.size()) << - " vs. "<< (mat.NumRows()); - num_other_error++; - continue; - } - - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - num_other_error++; - continue; - } - } // else spk_vars is "empty" - - num_done++; - BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0; - - SgmmGauPost gpost(posterior.size()); // posterior.size() == T. - - SortPosteriorByPdfs(trans_model, &posterior); - int32 prev_pdf_id = -1; - BaseFloat prev_like = 0; - Matrix prev_posterior; - for (size_t i = 0; i < posterior.size(); i++) { - - std::vector this_gselect; - if (!gselect->empty()) this_gselect = (*gselect)[i]; - else am_sgmm.GaussianSelection(sgmm_opts, mat.Row(i), &this_gselect); - am_sgmm.ComputePerFrameVars(mat.Row(i), this_gselect, spk_vars, 0.0, &per_frame_vars); - - gpost[i].gselect = this_gselect; - gpost[i].tids.resize(posterior[i].size()); - gpost[i].posteriors.resize(posterior[i].size()); - - prev_pdf_id = -1; // Only cache for the same frame. - for (size_t j = 0; j < posterior[i].size(); j++) { - int32 tid = posterior[i][j].first, // transition identifier. - pdf_id = trans_model.TransitionIdToPdf(tid); - BaseFloat weight = posterior[i][j].second; - gpost[i].tids[j] = tid; - - if (pdf_id != prev_pdf_id) { - // First time see this pdf-id for this frame, update the cached - // variables. - prev_pdf_id = pdf_id; - prev_like = am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id, - &prev_posterior); - } - - gpost[i].posteriors[j] = prev_posterior; - tot_like_this_file += prev_like * weight; - tot_weight += weight; - gpost[i].posteriors[j].Scale(weight); - } - } - - KALDI_LOG << "Average like for this file is " - << (tot_like_this_file/posterior.size()) << " over " - << posterior.size() <<" frames."; - tot_like += tot_like_this_file; - tot_t += posterior.size(); - if (num_done % 10 == 0) - KALDI_LOG << "Avg like per frame so far is " - << (tot_like/tot_t); - gpost_writer.Write(utt, gpost); - } - } - - KALDI_LOG << "Overall like per frame (Gaussian only) = " - << (tot_like/tot_t) << " over " << tot_t << " frames."; - - KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior - << " with no posteriors, " << num_other_error - << " with other errors."; - - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-rescore-lattice.cc b/src/sgmmbin/sgmm-rescore-lattice.cc deleted file mode 100644 index 6ad50415182..00000000000 --- a/src/sgmmbin/sgmm-rescore-lattice.cc +++ /dev/null @@ -1,165 +0,0 @@ -// sgmmbin/sgmm-rescore-lattice.cc - -// Copyright 2009-2011 Saarland University (Author: Arnab Ghoshal) -// Cisco Systems (Author: Neha Agrawal) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "util/stl-utils.h" -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" -#include "fstext/fstext-lib.h" -#include "lat/kaldi-lattice.h" -#include "lat/lattice-functions.h" -#include "sgmm/decodable-am-sgmm.h" - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - typedef kaldi::int64 int64; - using fst::SymbolTable; - using fst::VectorFst; - using fst::StdArc; - - const char *usage = - "Replace the acoustic scores on a lattice using a new model.\n" - "Usage: sgmm-rescore-lattice [options] " - " \n" - " e.g.: sgmm-rescore-lattice 1.mdl ark:1.lats scp:trn.scp ark:2.lats\n"; - - kaldi::BaseFloat old_acoustic_scale = 0.0; - bool speedup = false; - BaseFloat log_prune = 5.0; - std::string gselect_rspecifier, spkvecs_rspecifier, utt2spk_rspecifier; - SgmmGselectConfig sgmm_opts; - kaldi::ParseOptions po(usage); - po.Register("old-acoustic-scale", &old_acoustic_scale, - "Add the current acoustic scores with some scale."); - po.Register("log-prune", &log_prune, - "Pruning beam used to reduce number of exp() evaluations."); - po.Register("spk-vecs", &spkvecs_rspecifier, "Speaker vectors (rspecifier)"); - po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); - po.Register("gselect", &gselect_rspecifier, - "Precomputed Gaussian indices (rspecifier)"); - po.Register("speedup", &speedup, - "If true, enable a faster version of the computation that " - "saves times when there is only one pdf-id on a single frame " - "by only sometimes (randomly) computing the probabilities, and " - "then scaling them up to preserve corpus-level diagnostics."); - - sgmm_opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - std::string model_filename = po.GetArg(1), - lats_rspecifier = po.GetArg(2), - feature_rspecifier = po.GetArg(3), - lats_wspecifier = po.GetArg(4); - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier, - utt2spk_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - // Read as regular lattice - SequentialCompactLatticeReader compact_lattice_reader(lats_rspecifier); - // Write as compact lattice. - CompactLatticeWriter compact_lattice_writer(lats_wspecifier); - - int32 num_done = 0, num_err = 0; - for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) { - std::string utt = compact_lattice_reader.Key(); - if (!feature_reader.HasKey(utt)) { - KALDI_WARN << "No feature found for utterance " << utt << ". Skipping"; - num_err++; - continue; - } - - CompactLattice clat = compact_lattice_reader.Value(); - compact_lattice_reader.FreeCurrent(); - if (old_acoustic_scale != 1.0) - fst::ScaleLattice(fst::AcousticLatticeScale(old_acoustic_scale), &clat); - - const Matrix &feats = feature_reader.Value(utt); - - // Get speaker vectors - SgmmPerSpkDerivedVars spk_vars; - if (spkvecs_reader.IsOpen()) { - if (spkvecs_reader.HasKey(utt)) { - spk_vars.v_s = spkvecs_reader.Value(utt); - am_sgmm.ComputePerSpkDerivedVars(&spk_vars); - } else { - KALDI_WARN << "Cannot find speaker vector for " << utt; - num_err++; - continue; - } - } // else spk_vars is "empty" - - bool have_gselect = !gselect_rspecifier.empty() - && gselect_reader.HasKey(utt) - && gselect_reader.Value(utt).size() == feats.NumRows(); - if (!gselect_rspecifier.empty() && !have_gselect) - KALDI_WARN << "No Gaussian-selection info available for utterance " - << utt << " (or wrong size)"; - std::vector > empty_gselect; - const std::vector > *gselect = - (have_gselect ? &gselect_reader.Value(utt) : &empty_gselect); - - DecodableAmSgmm sgmm_decodable(sgmm_opts, am_sgmm, spk_vars, - trans_model, feats, *gselect, - log_prune); - - if (!speedup) { - if (kaldi::RescoreCompactLattice(&sgmm_decodable, &clat)) { - compact_lattice_writer.Write(utt, clat); - num_done++; - } else num_err++; - } else { - BaseFloat speedup_factor = 100.0; - if (kaldi::RescoreCompactLatticeSpeedup(trans_model, speedup_factor, - &sgmm_decodable, - &clat)) { - compact_lattice_writer.Write(utt, clat); - num_done++; - } else num_err++; - } - } - - KALDI_LOG << "Done " << num_done << " lattices, errors on " - << num_err; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/sgmmbin/sgmm-sum-accs.cc b/src/sgmmbin/sgmm-sum-accs.cc deleted file mode 100644 index 8562536d9cf..00000000000 --- a/src/sgmmbin/sgmm-sum-accs.cc +++ /dev/null @@ -1,69 +0,0 @@ -// sgmmbin/sgmm-sum-accs.cc - -// Copyright 2009-2011 Saarland University; Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "util/common-utils.h" -#include "sgmm/estimate-am-sgmm.h" -#include "hmm/transition-model.h" - - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - - const char *usage = - "Sum multiple accumulated stats files for SGMM training.\n" - "Usage: sgmm-sum-accs [options] stats-out stats-in1 stats-in2 ...\n"; - - bool binary = true; - kaldi::ParseOptions po(usage); - po.Register("binary", &binary, "Write output in binary mode"); - po.Read(argc, argv); - - if (po.NumArgs() < 2) { - po.PrintUsage(); - exit(1); - } - - std::string stats_out_filename = po.GetArg(1); - kaldi::Vector transition_accs; - kaldi::MleAmSgmmAccs sgmm_accs; - - for (int i = 2, max = po.NumArgs(); i <= max; i++) { - std::string stats_in_filename = po.GetArg(i); - bool binary_read; - kaldi::Input ki(stats_in_filename, &binary_read); - transition_accs.Read(ki.Stream(), binary_read, true /* add values */); - sgmm_accs.Read(ki.Stream(), binary_read, true /* add values */); - } - - // Write out the accs - { - kaldi::Output ko(stats_out_filename, binary); - transition_accs.Write(ko.Stream(), binary); - sgmm_accs.Write(ko.Stream(), binary); - } - - KALDI_LOG << "Written stats to " << stats_out_filename; - } catch(const std::exception &e) { - std::cerr << e.what() << '\n'; - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-sum-tree-stats.cc b/src/sgmmbin/sgmm-sum-tree-stats.cc deleted file mode 100644 index a1eae2a0bcf..00000000000 --- a/src/sgmmbin/sgmm-sum-tree-stats.cc +++ /dev/null @@ -1,100 +0,0 @@ -// sgmmbin/sgmm-sum-tree-stats.cc - -// Copyright 2012 Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "tree/context-dep.h" -#include "tree/build-tree-utils.h" -#include "sgmm/sgmm-clusterable.h" - - -int main(int argc, char *argv[]) { - using namespace kaldi; - typedef kaldi::int32 int32; - try { - const char *usage = - "Sum SGMM-type statistics used for phonetic decision tree building.\n" - "Usage: sgmm-sum-tree-stats [options] tree-accs-out trea-accs-in1 tree-accs-in2 ...\n" - "e.g.: sgmm-sum-tree-stats treeacc 1.streeacc 2.streeacc 3.streeacc\n"; - - ParseOptions po(usage); - bool binary = true; - - po.Register("binary", &binary, "Write output in binary mode"); - po.Read(argc, argv); - - if (po.NumArgs() < 2) { - po.PrintUsage(); - exit(1); - } - - std::string treeacc_wxfilename = po.GetArg(1); - - std::map tree_stats; - - AmSgmm am_sgmm; // dummy variable needed to initialize stats. - std::vector > H; // also needed to initialize stats, - // but never accessed in this program. - - // typedef std::vector > BuildTreeStatsType; - for (int32 arg = 2; arg <= po.NumArgs(); arg++) { - std::string treeacc_rxfilename = po.GetArg(arg); - bool binary_in; - Input ki(treeacc_rxfilename, &binary_in); - BuildTreeStatsType stats_array; - SgmmClusterable example(am_sgmm, H); // Needed for its type information. - ReadBuildTreeStats(ki.Stream(), binary_in, example, &stats_array); - for (BuildTreeStatsType::iterator iter = stats_array.begin(); - iter != stats_array.end(); ++iter) { - EventType e = iter->first; - Clusterable *c = iter->second; - std::map::iterator map_iter = tree_stats.find(e); - if (map_iter == tree_stats.end()) { // Not already present. - tree_stats[e] = c; - } else { - map_iter->second->Add(*c); - delete c; - } - } - } - - BuildTreeStatsType stats; // all the stats, in vectorized form. - - for (std::map::const_iterator iter = tree_stats.begin(); - iter != tree_stats.end(); - iter++ ) { - stats.push_back(std::make_pair(iter->first, iter->second)); - } - tree_stats.clear(); - - { - Output ko(treeacc_wxfilename, binary); - WriteBuildTreeStats(ko.Stream(), binary, stats); - } - KALDI_LOG << "Wrote summed sgmm-treeaaccs: number of separate objects was " - << stats.size(); - DeleteBuildTreeStats(&stats); - return (stats.size() != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/sgmmbin/sgmm-write-ubm.cc b/src/sgmmbin/sgmm-write-ubm.cc deleted file mode 100644 index 3f994f11a03..00000000000 --- a/src/sgmmbin/sgmm-write-ubm.cc +++ /dev/null @@ -1,71 +0,0 @@ -// sgmmbin/sgmm-write-ubm.cc - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" - -#include "sgmm/am-sgmm.h" -#include "hmm/transition-model.h" - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - const char *usage = - "Write out the full-covariance UBM of the SGMM\n" - "Usage: sgmm-write-ubm [options] \n" - "e.g.: sgmm-write-ubm 1.mdl 1.ubm\n"; - - bool binary_write = true; - - ParseOptions po(usage); - po.Register("binary", &binary_write, "Write output in binary mode"); - - po.Read(argc, argv); - if (po.NumArgs() != 2) { - po.PrintUsage(); - exit(1); - } - std::string model_in_filename = po.GetArg(1), - ubm_out_filename = po.GetArg(2); - - AmSgmm am_sgmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_sgmm.Read(ki.Stream(), binary); - } - - { - Output ko(ubm_out_filename, binary_write); - am_sgmm.full_ubm().Write(ko.Stream(), binary_write); - } - - KALDI_LOG << "Written UBM to " << ubm_out_filename; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/tree/clusterable-classes.h b/src/tree/clusterable-classes.h index 817d0c65bc3..d19e17f6b68 100644 --- a/src/tree/clusterable-classes.h +++ b/src/tree/clusterable-classes.h @@ -27,10 +27,6 @@ namespace kaldi { -// Note: see sgmm/sgmm-clusterable.h for an SGMM-based clusterable -// class. We didn't include it here, to avoid adding an extra -// dependency to this directory. - /// \addtogroup clustering_group /// @{ From cd06802704718e86f1c4daf1d532728812dc6e60 Mon Sep 17 00:00:00 2001 From: Yiming Wang Date: Wed, 11 Jan 2017 02:33:30 -0500 Subject: [PATCH 222/530] [src] nnet3: fixed bug (LstmNonlinearityComponent::ZeroStats() was not implemented) (#1302) --- src/nnet3/nnet-general-component.cc | 2 ++ src/nnet3/nnet-simple-component.cc | 7 +++++++ src/nnet3/nnet-simple-component.h | 1 + 3 files changed, 10 insertions(+) diff --git a/src/nnet3/nnet-general-component.cc b/src/nnet3/nnet-general-component.cc index c899f592af9..160ff1d089e 100644 --- a/src/nnet3/nnet-general-component.cc +++ b/src/nnet3/nnet-general-component.cc @@ -19,6 +19,7 @@ #include #include +#include #include "nnet3/nnet-general-component.h" #include "nnet3/nnet-computation-graph.h" #include "nnet3/nnet-parse.h" @@ -957,6 +958,7 @@ void BackpropTruncationComponentPrecomputedIndexes::Read(std::istream &istream, std::string BackpropTruncationComponent::Info() const { std::ostringstream stream; stream << Type() << ", dim=" << dim_ + << ", count=" << std::setprecision(3) << count_ << std::setprecision(6) << ", clipping-threshold=" << clipping_threshold_ << ", clipped-proportion=" << (count_ > 0.0 ? num_clipped_ / count_ : 0) diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index 90f52a11aa5..741ead92c6a 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -5108,6 +5108,13 @@ Component* LstmNonlinearityComponent::Copy() const { return new LstmNonlinearityComponent(*this); } +void LstmNonlinearityComponent::ZeroStats() { + value_sum_.SetZero(); + deriv_sum_.SetZero(); + self_repair_total_.SetZero(); + count_ = 0.0; +} + void LstmNonlinearityComponent::Scale(BaseFloat scale) { params_.Scale(scale); value_sum_.Scale(scale); diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h index 1106fdc3246..44f487b49b9 100644 --- a/src/nnet3/nnet-simple-component.h +++ b/src/nnet3/nnet-simple-component.h @@ -1744,6 +1744,7 @@ class LstmNonlinearityComponent: public UpdatableComponent { virtual int32 NumParameters() const; virtual void Vectorize(VectorBase *params) const; virtual void UnVectorize(const VectorBase ¶ms); + virtual void ZeroStats(); // Some functions that are specific to this class: explicit LstmNonlinearityComponent( From 748a4a9233bb7b8bf9e6b95be7a0e5d9ae4ecfc9 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 11 Jan 2017 14:48:04 -0500 Subject: [PATCH 223/530] [src] Restore init-ubm.cc to sgmm2bin (was deleted when sgmmbin deleted) --- src/sgmm2bin/Makefile | 4 +- src/sgmm2bin/init-ubm.cc | 95 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 src/sgmm2bin/init-ubm.cc diff --git a/src/sgmm2bin/Makefile b/src/sgmm2bin/Makefile index fb8b3e45832..d8db40d5b33 100644 --- a/src/sgmm2bin/Makefile +++ b/src/sgmm2bin/Makefile @@ -8,7 +8,7 @@ BINFILES = sgmm2-init sgmm2-gselect sgmm2-acc-stats sgmm2-est sgmm2-sum-accs \ sgmm2-acc-stats-gpost sgmm2-latgen-faster sgmm2-est-spkvecs-gpost \ sgmm2-rescore-lattice sgmm2-copy sgmm2-info sgmm2-est-ebw \ sgmm2-acc-stats2 sgmm2-comp-prexform sgmm2-est-fmllr sgmm2-project \ - sgmm2-latgen-faster-parallel + sgmm2-latgen-faster-parallel init-ubm OBJFILES = @@ -22,6 +22,6 @@ ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a \ ../feat/kaldi-feat.a ../transform/kaldi-transform.a \ ../gmm/kaldi-gmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \ ../thread/kaldi-thread.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a + ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/sgmm2bin/init-ubm.cc b/src/sgmm2bin/init-ubm.cc new file mode 100644 index 00000000000..3a0d398b7f6 --- /dev/null +++ b/src/sgmm2bin/init-ubm.cc @@ -0,0 +1,95 @@ +// sgmmbin/init-ubm.cc + +// Copyright 2009-2011 Saarland University +// Author: Arnab Ghoshal + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "util/kaldi-io.h" +#include "gmm/diag-gmm.h" +#include "gmm/full-gmm.h" +#include "gmm/am-diag-gmm.h" +#include "hmm/transition-model.h" + + +int main(int argc, char *argv[]) { + try { + typedef kaldi::int32 int32; + typedef kaldi::BaseFloat BaseFloat; + + const char *usage = + "Cluster the Gaussians in a diagonal-GMM acoustic model\n" + "to a single full-covariance or diagonal-covariance GMM.\n" + "Usage: init-ubm [options] \n"; + + bool binary_write = true, fullcov_ubm = true; + kaldi::ParseOptions po(usage); + po.Register("binary", &binary_write, "Write output in binary mode"); + po.Register("fullcov-ubm", &fullcov_ubm, "Write out full covariance UBM."); + kaldi::UbmClusteringOptions ubm_opts; + ubm_opts.Register(&po); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + ubm_opts.Check(); + + std::string model_in_filename = po.GetArg(1), + occs_in_filename = po.GetArg(2), + gmm_out_filename = po.GetArg(3); + + kaldi::AmDiagGmm am_gmm; + kaldi::TransitionModel trans_model; + { + bool binary_read; + kaldi::Input ki(model_in_filename, &binary_read); + trans_model.Read(ki.Stream(), binary_read); + am_gmm.Read(ki.Stream(), binary_read); + } + + kaldi::Vector state_occs; + state_occs.Resize(am_gmm.NumPdfs()); + { + bool binary_read; + kaldi::Input ki(occs_in_filename, &binary_read); + state_occs.Read(ki.Stream(), binary_read); + } + + kaldi::DiagGmm ubm; + ClusterGaussiansToUbm(am_gmm, state_occs, ubm_opts, &ubm); + if (fullcov_ubm) { + kaldi::FullGmm full_ubm; + full_ubm.CopyFromDiagGmm(ubm); + kaldi::Output ko(gmm_out_filename, binary_write); + full_ubm.Write(ko.Stream(), binary_write); + } else { + kaldi::Output ko(gmm_out_filename, binary_write); + ubm.Write(ko.Stream(), binary_write); + } + + KALDI_LOG << "Written UBM to " << gmm_out_filename; + } catch(const std::exception &e) { + std::cerr << e.what() << '\n'; + return -1; + } +} + + From 95f0feca1d73b9ae2fc84dfe585c9f2aaf6457fc Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Wed, 11 Jan 2017 20:51:50 +0100 Subject: [PATCH 224/530] [src] nnet1: replacing 'bc' by 'awk' in steps/nnet/train_scheduler.sh (#1333) --- egs/wsj/s5/steps/nnet/train_scheduler.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/egs/wsj/s5/steps/nnet/train_scheduler.sh b/egs/wsj/s5/steps/nnet/train_scheduler.sh index e2499b17274..48bca4e3813 100755 --- a/egs/wsj/s5/steps/nnet/train_scheduler.sh +++ b/egs/wsj/s5/steps/nnet/train_scheduler.sh @@ -136,7 +136,7 @@ for iter in $(seq -w $max_iters); do # accept or reject? loss_prev=$loss - if [ 1 == $(bc <<< "$loss_new < $loss") -o $iter -le $keep_lr_iters -o $iter -le $min_iters ]; then + if [ 1 == $(awk "BEGIN{print($loss_new < $loss ? 1:0);}") -o $iter -le $keep_lr_iters -o $iter -le $min_iters ]; then # accepting: the loss was better, or we had fixed learn-rate, or we had fixed epoch-number, loss=$loss_new mlp_best=$dir/nnet/${mlp_base}_iter${iter}_learnrate${learn_rate}_tr$(printf "%.4f" $tr_loss)_cv$(printf "%.4f" $loss_new) @@ -159,8 +159,8 @@ for iter in $(seq -w $max_iters); do [ $iter -le $keep_lr_iters ] && continue # stopping criterion, - rel_impr=$(bc <<< "scale=10; ($loss_prev-$loss)/$loss_prev") - if [ 1 == $halving -a 1 == $(bc <<< "$rel_impr < $end_halving_impr") ]; then + rel_impr=$(awk "BEGIN{print(($loss_prev-$loss)/$loss_prev);}") + if [ 1 == $halving -a 1 == $(awk "BEGIN{print($rel_impr < $end_halving_impr ? 1:0);}") ]; then if [ $iter -le $min_iters ]; then echo we were supposed to finish, but we continue as min_iters : $min_iters continue @@ -170,7 +170,7 @@ for iter in $(seq -w $max_iters); do fi # start learning-rate fade-out when improvement is low, - if [ 1 == $(bc <<< "$rel_impr < $start_halving_impr") ]; then + if [ 1 == $(awk "BEGIN{print($rel_impr < $start_halving_impr ? 1:0);}") ]; then halving=1 echo $halving >$dir/.halving fi From 728b303dfe0b23316ebe1359fc6e12ea0ceaaea5 Mon Sep 17 00:00:00 2001 From: hainan-xv Date: Wed, 11 Jan 2017 23:02:08 -0500 Subject: [PATCH 225/530] [scrips] fix bug in lmrescore_rnnlm_lat.sh when using G.carpa (#1334) --- egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh index a669f5bc3d5..75b08bc4779 100755 --- a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh +++ b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh @@ -79,7 +79,7 @@ if [ "$oldlm" == "$oldlang/G.fst" ]; then else $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ lattice-lmrescore-const-arpa --lm-scale=$oldlm_weight \ - "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:- \| \ + "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm" ark:- \| \ lattice-lmrescore-rnnlm --lm-scale=$weight \ --max-ngram-order=$max_ngram_order ark:$rnnlm_dir/unk.probs \ $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \ From 42291a66f96ecebd85d15feeec91548f07ba1028 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 12 Jan 2017 01:11:37 -0500 Subject: [PATCH 226/530] [doc,egs] Fix path from lm to lmbin in kaldi for dummies and various egs/*/*/path.sh --- egs/babel/s5/path.sh | 5 ++--- egs/babel/s5b/path.sh | 5 ++--- egs/babel/s5c/path.sh | 5 ++--- egs/gp/s1/path.sh | 5 ++--- egs/gp/s5/path.sh | 7 +++---- egs/lre07/v2/path.sh | 2 +- src/doc/kaldi_for_dummies.dox | 2 +- 7 files changed, 13 insertions(+), 18 deletions(-) diff --git a/egs/babel/s5/path.sh b/egs/babel/s5/path.sh index 498423857fd..a45a39d1f6a 100755 --- a/egs/babel/s5/path.sh +++ b/egs/babel/s5/path.sh @@ -1,6 +1,5 @@ export KALDI_ROOT=`pwd`/../../.. [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh -. /export/babel/data/software/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +. /export/babel/data/software/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH export LC_ALL=C - diff --git a/egs/babel/s5b/path.sh b/egs/babel/s5b/path.sh index c8fdbad6ff7..2d7dba09015 100755 --- a/egs/babel/s5b/path.sh +++ b/egs/babel/s5b/path.sh @@ -1,5 +1,4 @@ export KALDI_ROOT=`pwd`/../../.. -. /export/babel/data/software/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +. /export/babel/data/software/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH export LC_ALL=C - diff --git a/egs/babel/s5c/path.sh b/egs/babel/s5c/path.sh index c8fdbad6ff7..2d7dba09015 100755 --- a/egs/babel/s5c/path.sh +++ b/egs/babel/s5c/path.sh @@ -1,5 +1,4 @@ export KALDI_ROOT=`pwd`/../../.. -. /export/babel/data/software/env.sh -export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +. /export/babel/data/software/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH export LC_ALL=C - diff --git a/egs/gp/s1/path.sh b/egs/gp/s1/path.sh index cee9bacbde9..8a3b9a84d98 100644 --- a/egs/gp/s1/path.sh +++ b/egs/gp/s1/path.sh @@ -5,9 +5,9 @@ KALDIROOT=/exports/home/aghoshal/kaldi/trunk [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh KALDISRC=$KALDIROOT/src -KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin +KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin -KALDIBIN=$KALDIBIN:$KALDISRC/sgmm2bin:$KALDISRC/lm +KALDIBIN=$KALDIBIN:$KALDISRC/sgmm2bin:$KALDISRC/lmbin FSTBIN=$KALDIROOT/tools/openfst/bin LMBIN=$KALDIROOT/tools/irstlm/bin @@ -34,4 +34,3 @@ export LC_ALL=C # Site-specific configs: [ `hostname -y` == ecdf ] && { . path_ed.sh; } - diff --git a/egs/gp/s5/path.sh b/egs/gp/s5/path.sh index e9f7a8337bc..fcf365ec8b6 100644 --- a/egs/gp/s5/path.sh +++ b/egs/gp/s5/path.sh @@ -7,9 +7,9 @@ KALDI_ROOT=/homes/eva/q/qghoshal/src/kaldi/trunk [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh KALDISRC=$KALDI_ROOT/src -KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin +KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin -KALDIBIN=$KALDIBIN:$KALDISRC/sgmm2bin:$KALDISRC/lm +KALDIBIN=$KALDIBIN:$KALDISRC/sgmm2bin:$KALDISRC/lmbin FSTBIN=$KALDI_ROOT/tools/openfst/bin LMBIN=$KALDI_ROOT/tools/irstlm/bin @@ -25,10 +25,9 @@ SCRIPTS=$kaldi_local:$kaldi_utils:$kaldi_steps export PATH=$PATH:$KALDIBIN:$FSTBIN:$LMBIN:$SCRIPTS -# If the correct version of shorten and sox are not on the path, +# If the correct version of shorten and sox are not on the path, # the following will be set by local/gp_check_tools.sh SHORTEN_BIN= # e.g. $PWD/tools/shorten-3.6.1/bin SOX_BIN= # e.g. $PWD/tools/sox-14.3.2/bin - diff --git a/egs/lre07/v2/path.sh b/egs/lre07/v2/path.sh index d55f970d1fb..161fc200300 100755 --- a/egs/lre07/v2/path.sh +++ b/egs/lre07/v2/path.sh @@ -1,3 +1,3 @@ export KALDI_ROOT=$(cd ../../..; pwd) -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH export LC_ALL=C diff --git a/src/doc/kaldi_for_dummies.dox b/src/doc/kaldi_for_dummies.dox index 49c9fb69e42..9afe831ecc4 100644 --- a/src/doc/kaldi_for_dummies.dox +++ b/src/doc/kaldi_for_dummies.dox @@ -413,7 +413,7 @@ b.) \c path.sh
export KALDI_ROOT=`pwd`/../.. # Setting paths to useful tools -export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PATH +export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$PWD:$PATH # Defining audio data directory (modify it for your installation directory!) export DATA_ROOT="/home/{user}/kaldi-trunk/egs/digits/digits_audio" From 421485ee954dc697c643f0a18f0d5647042cbbbe Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 12 Jan 2017 15:56:40 -0500 Subject: [PATCH 227/530] [src] nnet3: Remove deprecated options which are no longer used (#1335) --- src/nnet3/nnet-utils.cc | 73 ----------------------------------- src/nnet3/nnet-utils.h | 22 ----------- src/nnet3bin/nnet3-am-copy.cc | 8 ---- 3 files changed, 103 deletions(-) diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index d65193d9a54..d09c18b6ada 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -250,22 +250,6 @@ void ZeroComponentStats(Nnet *nnet) { } } -void ScaleLearningRate(BaseFloat learning_rate_scale, - Nnet *nnet) { - for (int32 c = 0; c < nnet->NumComponents(); c++) { - Component *comp = nnet->GetComponent(c); - if (comp->Properties() & kUpdatableComponent) { - // For now all updatable components inherit from class UpdatableComponent. - // If that changes in future, we will change this code. - UpdatableComponent *uc = dynamic_cast(comp); - if (uc == NULL) - KALDI_ERR << "Updatable component does not inherit from class " - "UpdatableComponent; change this code."; - uc->SetActualLearningRate(uc->LearningRate() * learning_rate_scale); - } - } -} - void SetLearningRate(BaseFloat learning_rate, Nnet *nnet) { for (int32 c = 0; c < nnet->NumComponents(); c++) { @@ -282,63 +266,6 @@ void SetLearningRate(BaseFloat learning_rate, } } -void SetLearningRates(const Vector &learning_rates, - Nnet *nnet) { - int32 i = 0; - for (int32 c = 0; c < nnet->NumComponents(); c++) { - Component *comp = nnet->GetComponent(c); - if (comp->Properties() & kUpdatableComponent) { - // For now all updatable components inherit from class UpdatableComponent. - // If that changes in future, we will change this code. - UpdatableComponent *uc = dynamic_cast(comp); - if (uc == NULL) - KALDI_ERR << "Updatable component does not inherit from class " - "UpdatableComponent; change this code."; - KALDI_ASSERT(i < learning_rates.Dim()); - uc->SetActualLearningRate(learning_rates(i++)); - } - } - KALDI_ASSERT(i == learning_rates.Dim()); -} - -void GetLearningRates(const Nnet &nnet, - Vector *learning_rates) { - learning_rates->Resize(NumUpdatableComponents(nnet)); - int32 i = 0; - for (int32 c = 0; c < nnet.NumComponents(); c++) { - const Component *comp = nnet.GetComponent(c); - if (comp->Properties() & kUpdatableComponent) { - // For now all updatable components inherit from class UpdatableComponent. - // If that changes in future, we will change this code. - const UpdatableComponent *uc = dynamic_cast(comp); - if (uc == NULL) - KALDI_ERR << "Updatable component does not inherit from class " - "UpdatableComponent; change this code."; - (*learning_rates)(i++) = uc->LearningRate(); - } - } - KALDI_ASSERT(i == learning_rates->Dim()); -} - -void ScaleNnetComponents(const Vector &scale_factors, - Nnet *nnet) { - int32 i = 0; - for (int32 c = 0; c < nnet->NumComponents(); c++) { - Component *comp = nnet->GetComponent(c); - if (comp->Properties() & kUpdatableComponent) { - // For now all updatable components inherit from class UpdatableComponent. - // If that changes in future, we will change this code. - UpdatableComponent *uc = dynamic_cast(comp); - if (uc == NULL) - KALDI_ERR << "Updatable component does not inherit from class " - "UpdatableComponent; change this code."; - KALDI_ASSERT(i < scale_factors.Dim()); - uc->Scale(scale_factors(i++)); - } - } - KALDI_ASSERT(i == scale_factors.Dim()); -} - void ScaleNnet(BaseFloat scale, Nnet *nnet) { if (scale == 1.0) return; else if (scale == 0.0) { diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index 1e0dcefd703..9cbfa87a800 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -116,31 +116,9 @@ void ComputeSimpleNnetContext(const Nnet &nnet, void SetLearningRate(BaseFloat learning_rate, Nnet *nnet); -/// Scales the actual learning rate for all the components in the nnet -/// by this factor -void ScaleLearningRate(BaseFloat learning_rate_scale, - Nnet *nnet); - -/// Sets the actual learning rates for all the updatable components in the -/// neural net to the values in 'learning_rates' vector -/// (one for each updatable component). -void SetLearningRates(const Vector &learning_rates, - Nnet *nnet); - -/// Get the learning rates for all the updatable components in the neural net -/// (the output must have dim equal to the number of updatable components). -void GetLearningRates(const Nnet &nnet, - Vector *learning_rates); - /// Scales the nnet parameters and stats by this scale. void ScaleNnet(BaseFloat scale, Nnet *nnet); -/// Scales the parameters of each of the updatable components. -/// Here, scales is a vector of size equal to the number of updatable -/// components -void ScaleNnetComponents(const Vector &scales, - Nnet *nnet); - /// Does *dest += alpha * src (affects nnet parameters and /// stored stats). void AddNnet(const Nnet &src, BaseFloat alpha, Nnet *dest); diff --git a/src/nnet3bin/nnet3-am-copy.cc b/src/nnet3bin/nnet3-am-copy.cc index 4851f839dcb..7aa0e4a32c0 100644 --- a/src/nnet3bin/nnet3-am-copy.cc +++ b/src/nnet3bin/nnet3-am-copy.cc @@ -80,9 +80,6 @@ int main(int argc, char *argv[]) { po.Register("learning-rate", &learning_rate, "If supplied, all the learning rates of updatable components" " are set to this value."); - po.Register("learning-rate-scale", &learning_rate_scale, - "Scales the learning rate of updatable components by this " - "factor"); po.Register("scale", &scale, "The parameter matrices are scaled" " by the specified value."); @@ -124,11 +121,6 @@ int main(int argc, char *argv[]) { if (learning_rate >= 0) SetLearningRate(learning_rate, &(am_nnet.GetNnet())); - KALDI_ASSERT(learning_rate_scale >= 0.0); - - if (learning_rate_scale != 1.0) - ScaleLearningRate(learning_rate_scale, &(am_nnet.GetNnet())); - if (!edits_config.empty()) { Input ki(edits_config); ReadEditConfig(ki.Stream(), &(am_nnet.GetNnet())); From 847035e7b02937a5f363ffe88dc029ed0358fa97 Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Fri, 13 Jan 2017 11:28:10 -0800 Subject: [PATCH 228/530] [egs] Remove some unused scripts --- egs/babel/s5b/local/make_pitch.sh | 307 ------------------------------ egs/babel/s5c/local/make_pitch.sh | 307 ------------------------------ 2 files changed, 614 deletions(-) delete mode 100755 egs/babel/s5b/local/make_pitch.sh delete mode 100755 egs/babel/s5c/local/make_pitch.sh diff --git a/egs/babel/s5b/local/make_pitch.sh b/egs/babel/s5b/local/make_pitch.sh deleted file mode 100755 index 107016d78a9..00000000000 --- a/egs/babel/s5b/local/make_pitch.sh +++ /dev/null @@ -1,307 +0,0 @@ -#!/bin/bash - -# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey) -# Bagher BabaAli -# Apache 2.0 -# To be run from .. (one directory up from here) -# This makes two-dimension p(voicing) and pitch features for some data/ directory. - -# Begin configuration section. -nj=4 -cmd=run.pl -stage=0 -pitch_config= -interpolate_pitch_opts= -process_pitch_opts= -cleanup=true -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -if [ -f path.sh ]; then . ./path.sh; fi -. parse_options.sh || exit 1; - -if [ $# != 3 ]; then - echo "Usage: make_pitch.sh [options] "; - echo "Makes two dimensional [p(voicing), pitch] features, based on SAcC pitch" - echo "extractor followed by some normalization and smoothing" - echo "E.g.: make_pitch.sh data/train_pitch exp/make_pitch_train plp/" - echo "Options: " - echo " --pitch-config # config passed to compute-pitch-feats " - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - exit 1; -fi - -data=$1 -expdir=$2 -pitchdir=$3 - -# make $pitchdir an absolute pathname. -pitchdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $pitchdir ${PWD}` -# make $expdir an absolute pathname. -expdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $expdir ${PWD}` - -# use "name" as part of name of the archive. -name=`basename $data` - -mkdir -p $pitchdir || exit 1; -mkdir -p $expdir/log || exit 1; - -scp=$data/wav.scp - -[ ! -s $KALDI_ROOT ] && KALDI_ROOT=../../.. - -( # this is for back compatiblity: - cd $KALDI_ROOT/tools - if [ -d sacc ] && [ ! -d pitch_trackers/sacc ]; then - echo "Linking sacc directory to new location." - mkdir -p pitch_trackers - cd pitch_trackers - ln -s ../sacc .. - fi -) - -sacc_dir=$KALDI_ROOT/tools/pitch_trackers/sacc/SAcC_GLNXA64/ -# make $sacc_dir an absolute pathname. -sacc_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $sacc_dir ${PWD}` - -sacc_script=$sacc_dir/run_SAcC.sh -sacc_config=$sacc_dir/conf/Babelnet_sr8k_bpo6_sb24_k10.config - -if [ ! -f $sacc_script ]; then - echo "*Expecting the script $sacc_script to exist" - echo "*cd to $KALDI_ROOT/tools/, and run extras/install_sacc.sh" - echo "*Re-run this script when it is installed." - exit 1; -fi - -required="$scp $pitch_config $sacc_config" - -for f in $required; do - if [ ! -f $f ]; then - echo "make_pitch.sh: no such file $f" - exit 1; - fi -done - -# note: in general, the double-parenthesis construct in bash "((" is "C-style -# syntax" where we can get rid of the $ for variable names, and omit spaces. -# The "for" loop in this style is a special construct. - -basename=`basename $data` -wavdir=$pitchdir/temp_wav_$basename -mkdir -p $wavdir - -if [ -f $data/segments ] || grep '|' $data/wav.scp >/dev/null; then - wav_scp=$expdir/wav.scp - cat $data/segments | awk -v dir=$wavdir '{key=$1; printf("%s %s/%s.wav\n", key, dir, key);}' \ - > $wav_scp || exit 1; - - if [ -f $data/segments ]; then - echo "$0 [info]: segments file exists: creating temporary wav files in $wavdir" - segments=$data/segments - else - # create a fake segments file that takes the whole file; this is an easy way - # to copy to static wav files. Note: probably this has not been tested. - cat $data/wav.scp | awk '{print $1, $1, 0.0, -1.0}' > $expdir/fake_segments - segments=$expdir/fake_segments - fi - if [ $stage -le 0 ]; then - echo "Extracting wav-file segments (or just converting to wav format)" - $cmd $expdir/log/extract-segments.log \ - extract-segments scp:$data/wav.scp $segments scp:$wav_scp || exit 1; - fi -else - echo "No segments file exists, and wav scp is plain: using wav files as input." - wav_scp=$data/wav.scp -fi - -wav_checked_scp=$expdir/wav_checked.scp -cat $wav_scp | \ - perl -ane '@A=split; if (-f $A[1]) { print; }' >$wav_checked_scp -nl_orig=`cat $wav_scp | wc -l` -nl_new=`cat $wav_checked_scp | wc -l` - -echo "After removing non-existent files, number of utterances decreased from $nl_orig to $nl_new"; -[ $nl_new -eq 0 ] && exit 1; - -# now $wav_scp is an scp file for the per-utterance wav files. - -# Split up the wav files into multiple lists. -split_wavs="" -for ((n=1; n<=nj; n++)); do - split_wavs="$split_wavs $expdir/split_wavs.$n.scp" -done -utils/split_scp.pl $wav_checked_scp $split_wavs || exit 1; - -# For each wav file, create corresponding temporary pitch file, in the -# format the SAcC outputs: [ 0 frame pitch p(voicing) ] -temp_pitchdir=$pitchdir/temp_pitch_$basename -mkdir -p $temp_pitchdir - -for ((n=1; n<=nj; n++)); do - mkdir -p $temp_pitchdir/$n - cat $expdir/split_wavs.$n.scp | awk -v pdir=$temp_pitchdir -v n=$n \ - '{key=$1; wavfile=$2; printf("%s,%s/%s/%s.pitch\n", wavfile, pdir, n, key);}' \ - > $expdir/sacc_flist.$n || exit 1 -done - -if [ $stage -le 1 ]; then - # Need to do this in director $sacc_dir as some of the things in its config - # are relative pathnames. - $cmd JOB=1:$nj $d/$expdir/log/sacc.JOB.log \ - cd $sacc_dir '&&' $sacc_script $expdir/sacc_flist.JOB $sacc_config || exit 1; -fi - -# I don't want to put a separate script in svn just for this, so creating a temporary -# script file in the experimental directory. Quotes around 'EOF' disable any -# interpretation in the here-doc. -cat <<'EOF' > $expdir/convert.sh -#!/bin/bash -sacc_flist=$1 -scpfile=$2 -[ $# -ne 2 ] && echo "Usage: convert.sh " && exit 1; - -for f in `cat $sacc_flist | cut -d, -f2`; do - g=`echo $f | sed s:.pitch$:.mat:` - if [ -f $f ]; then - cat $f | awk 'BEGIN{printf("[ "); } {print $4, $3;} END{ print "]"; }' > $g - rm $f - fi -done -cat $sacc_flist | cut -d, -f2 | \ - perl -ane 'm:/([^/]+)\.pitch$: || die "Bad line $_"; $key=$1; s/\.pitch$/\.mat/; print "$key $_";' > $scpfile -EOF -chmod +x $expdir/convert.sh - -if [ $stage -le 2 ]; then - echo "Converting format from .pitch to .mat (kaldi-readable format)" - $cmd JOB=1:$nj $expdir/log/convert.JOB.log \ - $expdir/convert.sh $expdir/sacc_flist.JOB $expdir/mat.scp.JOB || exit 1; -fi - -if [ $stage -le 3 ]; then - echo "Doing final processing (interpolation, smoothing, etc.) on pitch features" - $cmd JOB=1:$nj $expdir/log/process.JOB.log \ - interpolate-pitch $interpolate_pitch_opts scp:$expdir/mat.scp.JOB ark:- \| \ - process-pitch-feats $process_pitch_opts ark:- \ - ark,scp:$pitchdir/${basename}_pitch.JOB.ark,$pitchdir/${basename}_pitch.JOB.scp || exit 1; -fi - -echo "Creating $data/feats.scp" -for ((n=1; n<=nj; n++)); do cat $pitchdir/${basename}_pitch.$n.scp; done > $data/feats.scp - -if $cleanup; then - echo "Removing temporary files" - rm -r $wavdir $temp_pitchdir -fi - -echo "Finished extracting pitch features for $basename" - -debug=~/temp2.m -echo "A = [" > $debug -copy-feats scp:$data/feats.scp ark,t:- | grep -v ']' | grep -v '\[' | awk '{if (NF == 2) { print; }}' | head -n 200000 \ - >> $debug - -cat <<'EOF' >>$debug -]; -pov = A(:, 1); -pitch = A(:, 2); -subplot(2, 2, 1); -hist(pov, 30); -legend('pov') -subplot(2, 2, 2); -hist(pitch, 30); -legend('pitch') - -len=size(pov, 1); -povD = pov(1:len-1) - pov(2:len); -subplot(2, 2, 3); -hist(povD, 30); -legend('delta-pov') - -pitchD = pitch(1:len-1) - pitch(2:len); -pitchD = max(pitchD, -0.05); -pitchD = min(pitchD, 0.05); -subplot(2, 2, 4); -hist(pitchD, 50); -legend('delta-pitch'); - -print -deps 'C.eps' -EOF - -exit 0; - - -# Here's - -#copy-feats scp:plp/train_pitch_pitch.10.scp ark,t:- | grep -v ']' | grep -v '\[' | awk '{if (NF == 2) { print; }}' | head -n 200000 > ~/temp2.m - -# -### data goes here. -#]; - - - -# rm $expdir/.error 2>/dev/null - -# # for ((n=1; n<=nj; n++)); do -# # mkdir -p "$expdir/$n" -# # done - -# # $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# # extract-segments scp:$scp $expdir/segments.JOB ark:- \| \ -# # compute-pitch-feats --verbose=2 --config=$pitch_config ark:- \ -# # ark,scp:$pitchdir/raw_pitch_$name.JOB.ark,$pitchdir/raw_pitch_$name.JOB.scp \ -# # `pwd`/$expdir/JOB || exit 1; - -# $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# extract-segments scp:$scp $expdir/segments.JOB ark:- \| \ -# local/SAcC.sh $expdir/wav.JOB.scp $pitchdir $name.JOB || exit 1; - -# else -# echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." -# split_scps="" -# for ((n=1; n<=nj; n++)); do -# split_scps="$split_scps $expdir/wav.$n.scp" -# done - -# utils/split_scp.pl $scp $split_scps || exit 1; - -# # for ((n=1; n<=nj; n++)); do -# # mkdir -p "$expdir/$n" -# # done - -# # $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# # compute-pitch-feats --verbose=2 --config=$pitch_config scp:$expdir/wav.JOB.scp \ -# # ark,scp:$pitchdir/raw_pitch_$name.JOB.ark,$pitchdir/raw_pitch_$name.JOB.scp \ -# # $expdir/JOB || exit 1; - -# pushd $sacc_dir -# $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# cd $sacclocal/SAcC.sh $expdir/wav.JOB.scp $pitchdir $name.JOB || exit 1; -# fi - - -# if [ -f $expdir/.error.$name ]; then -# echo "Error producing pitch features for $name:" -# tail $expdir/make_pitch.*.log -# exit 1; -# fi - -# # concatenate the .scp files together. -# for ((n=1; n<=nj; n++)); do -# cat $pitchdir/raw_pitch_$name.$n.scp >> $data/pitchs.scp || exit 1; -# done > $data/pitchs.scp - -# rm $expdir/wav.*.scp $expdir/segments.* 2>/dev/null - -# nf=`cat $data/pitchs.scp | wc -l` -# nu=`cat $data/utt2spk | wc -l` -# if [ $nf -ne $nu ]; then -# echo "It seems not all of the feature files were successfully ($nf != $nu);" -# echo "consider using utils/fix_data_dir.sh $data" -# fi - -# echo "Succeeded creating PITCH features for $name" diff --git a/egs/babel/s5c/local/make_pitch.sh b/egs/babel/s5c/local/make_pitch.sh deleted file mode 100755 index 107016d78a9..00000000000 --- a/egs/babel/s5c/local/make_pitch.sh +++ /dev/null @@ -1,307 +0,0 @@ -#!/bin/bash - -# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey) -# Bagher BabaAli -# Apache 2.0 -# To be run from .. (one directory up from here) -# This makes two-dimension p(voicing) and pitch features for some data/ directory. - -# Begin configuration section. -nj=4 -cmd=run.pl -stage=0 -pitch_config= -interpolate_pitch_opts= -process_pitch_opts= -cleanup=true -# End configuration section. - -echo "$0 $@" # Print the command line for logging - -if [ -f path.sh ]; then . ./path.sh; fi -. parse_options.sh || exit 1; - -if [ $# != 3 ]; then - echo "Usage: make_pitch.sh [options] "; - echo "Makes two dimensional [p(voicing), pitch] features, based on SAcC pitch" - echo "extractor followed by some normalization and smoothing" - echo "E.g.: make_pitch.sh data/train_pitch exp/make_pitch_train plp/" - echo "Options: " - echo " --pitch-config # config passed to compute-pitch-feats " - echo " --nj # number of parallel jobs" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - exit 1; -fi - -data=$1 -expdir=$2 -pitchdir=$3 - -# make $pitchdir an absolute pathname. -pitchdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $pitchdir ${PWD}` -# make $expdir an absolute pathname. -expdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $expdir ${PWD}` - -# use "name" as part of name of the archive. -name=`basename $data` - -mkdir -p $pitchdir || exit 1; -mkdir -p $expdir/log || exit 1; - -scp=$data/wav.scp - -[ ! -s $KALDI_ROOT ] && KALDI_ROOT=../../.. - -( # this is for back compatiblity: - cd $KALDI_ROOT/tools - if [ -d sacc ] && [ ! -d pitch_trackers/sacc ]; then - echo "Linking sacc directory to new location." - mkdir -p pitch_trackers - cd pitch_trackers - ln -s ../sacc .. - fi -) - -sacc_dir=$KALDI_ROOT/tools/pitch_trackers/sacc/SAcC_GLNXA64/ -# make $sacc_dir an absolute pathname. -sacc_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $sacc_dir ${PWD}` - -sacc_script=$sacc_dir/run_SAcC.sh -sacc_config=$sacc_dir/conf/Babelnet_sr8k_bpo6_sb24_k10.config - -if [ ! -f $sacc_script ]; then - echo "*Expecting the script $sacc_script to exist" - echo "*cd to $KALDI_ROOT/tools/, and run extras/install_sacc.sh" - echo "*Re-run this script when it is installed." - exit 1; -fi - -required="$scp $pitch_config $sacc_config" - -for f in $required; do - if [ ! -f $f ]; then - echo "make_pitch.sh: no such file $f" - exit 1; - fi -done - -# note: in general, the double-parenthesis construct in bash "((" is "C-style -# syntax" where we can get rid of the $ for variable names, and omit spaces. -# The "for" loop in this style is a special construct. - -basename=`basename $data` -wavdir=$pitchdir/temp_wav_$basename -mkdir -p $wavdir - -if [ -f $data/segments ] || grep '|' $data/wav.scp >/dev/null; then - wav_scp=$expdir/wav.scp - cat $data/segments | awk -v dir=$wavdir '{key=$1; printf("%s %s/%s.wav\n", key, dir, key);}' \ - > $wav_scp || exit 1; - - if [ -f $data/segments ]; then - echo "$0 [info]: segments file exists: creating temporary wav files in $wavdir" - segments=$data/segments - else - # create a fake segments file that takes the whole file; this is an easy way - # to copy to static wav files. Note: probably this has not been tested. - cat $data/wav.scp | awk '{print $1, $1, 0.0, -1.0}' > $expdir/fake_segments - segments=$expdir/fake_segments - fi - if [ $stage -le 0 ]; then - echo "Extracting wav-file segments (or just converting to wav format)" - $cmd $expdir/log/extract-segments.log \ - extract-segments scp:$data/wav.scp $segments scp:$wav_scp || exit 1; - fi -else - echo "No segments file exists, and wav scp is plain: using wav files as input." - wav_scp=$data/wav.scp -fi - -wav_checked_scp=$expdir/wav_checked.scp -cat $wav_scp | \ - perl -ane '@A=split; if (-f $A[1]) { print; }' >$wav_checked_scp -nl_orig=`cat $wav_scp | wc -l` -nl_new=`cat $wav_checked_scp | wc -l` - -echo "After removing non-existent files, number of utterances decreased from $nl_orig to $nl_new"; -[ $nl_new -eq 0 ] && exit 1; - -# now $wav_scp is an scp file for the per-utterance wav files. - -# Split up the wav files into multiple lists. -split_wavs="" -for ((n=1; n<=nj; n++)); do - split_wavs="$split_wavs $expdir/split_wavs.$n.scp" -done -utils/split_scp.pl $wav_checked_scp $split_wavs || exit 1; - -# For each wav file, create corresponding temporary pitch file, in the -# format the SAcC outputs: [ 0 frame pitch p(voicing) ] -temp_pitchdir=$pitchdir/temp_pitch_$basename -mkdir -p $temp_pitchdir - -for ((n=1; n<=nj; n++)); do - mkdir -p $temp_pitchdir/$n - cat $expdir/split_wavs.$n.scp | awk -v pdir=$temp_pitchdir -v n=$n \ - '{key=$1; wavfile=$2; printf("%s,%s/%s/%s.pitch\n", wavfile, pdir, n, key);}' \ - > $expdir/sacc_flist.$n || exit 1 -done - -if [ $stage -le 1 ]; then - # Need to do this in director $sacc_dir as some of the things in its config - # are relative pathnames. - $cmd JOB=1:$nj $d/$expdir/log/sacc.JOB.log \ - cd $sacc_dir '&&' $sacc_script $expdir/sacc_flist.JOB $sacc_config || exit 1; -fi - -# I don't want to put a separate script in svn just for this, so creating a temporary -# script file in the experimental directory. Quotes around 'EOF' disable any -# interpretation in the here-doc. -cat <<'EOF' > $expdir/convert.sh -#!/bin/bash -sacc_flist=$1 -scpfile=$2 -[ $# -ne 2 ] && echo "Usage: convert.sh " && exit 1; - -for f in `cat $sacc_flist | cut -d, -f2`; do - g=`echo $f | sed s:.pitch$:.mat:` - if [ -f $f ]; then - cat $f | awk 'BEGIN{printf("[ "); } {print $4, $3;} END{ print "]"; }' > $g - rm $f - fi -done -cat $sacc_flist | cut -d, -f2 | \ - perl -ane 'm:/([^/]+)\.pitch$: || die "Bad line $_"; $key=$1; s/\.pitch$/\.mat/; print "$key $_";' > $scpfile -EOF -chmod +x $expdir/convert.sh - -if [ $stage -le 2 ]; then - echo "Converting format from .pitch to .mat (kaldi-readable format)" - $cmd JOB=1:$nj $expdir/log/convert.JOB.log \ - $expdir/convert.sh $expdir/sacc_flist.JOB $expdir/mat.scp.JOB || exit 1; -fi - -if [ $stage -le 3 ]; then - echo "Doing final processing (interpolation, smoothing, etc.) on pitch features" - $cmd JOB=1:$nj $expdir/log/process.JOB.log \ - interpolate-pitch $interpolate_pitch_opts scp:$expdir/mat.scp.JOB ark:- \| \ - process-pitch-feats $process_pitch_opts ark:- \ - ark,scp:$pitchdir/${basename}_pitch.JOB.ark,$pitchdir/${basename}_pitch.JOB.scp || exit 1; -fi - -echo "Creating $data/feats.scp" -for ((n=1; n<=nj; n++)); do cat $pitchdir/${basename}_pitch.$n.scp; done > $data/feats.scp - -if $cleanup; then - echo "Removing temporary files" - rm -r $wavdir $temp_pitchdir -fi - -echo "Finished extracting pitch features for $basename" - -debug=~/temp2.m -echo "A = [" > $debug -copy-feats scp:$data/feats.scp ark,t:- | grep -v ']' | grep -v '\[' | awk '{if (NF == 2) { print; }}' | head -n 200000 \ - >> $debug - -cat <<'EOF' >>$debug -]; -pov = A(:, 1); -pitch = A(:, 2); -subplot(2, 2, 1); -hist(pov, 30); -legend('pov') -subplot(2, 2, 2); -hist(pitch, 30); -legend('pitch') - -len=size(pov, 1); -povD = pov(1:len-1) - pov(2:len); -subplot(2, 2, 3); -hist(povD, 30); -legend('delta-pov') - -pitchD = pitch(1:len-1) - pitch(2:len); -pitchD = max(pitchD, -0.05); -pitchD = min(pitchD, 0.05); -subplot(2, 2, 4); -hist(pitchD, 50); -legend('delta-pitch'); - -print -deps 'C.eps' -EOF - -exit 0; - - -# Here's - -#copy-feats scp:plp/train_pitch_pitch.10.scp ark,t:- | grep -v ']' | grep -v '\[' | awk '{if (NF == 2) { print; }}' | head -n 200000 > ~/temp2.m - -# -### data goes here. -#]; - - - -# rm $expdir/.error 2>/dev/null - -# # for ((n=1; n<=nj; n++)); do -# # mkdir -p "$expdir/$n" -# # done - -# # $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# # extract-segments scp:$scp $expdir/segments.JOB ark:- \| \ -# # compute-pitch-feats --verbose=2 --config=$pitch_config ark:- \ -# # ark,scp:$pitchdir/raw_pitch_$name.JOB.ark,$pitchdir/raw_pitch_$name.JOB.scp \ -# # `pwd`/$expdir/JOB || exit 1; - -# $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# extract-segments scp:$scp $expdir/segments.JOB ark:- \| \ -# local/SAcC.sh $expdir/wav.JOB.scp $pitchdir $name.JOB || exit 1; - -# else -# echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." -# split_scps="" -# for ((n=1; n<=nj; n++)); do -# split_scps="$split_scps $expdir/wav.$n.scp" -# done - -# utils/split_scp.pl $scp $split_scps || exit 1; - -# # for ((n=1; n<=nj; n++)); do -# # mkdir -p "$expdir/$n" -# # done - -# # $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# # compute-pitch-feats --verbose=2 --config=$pitch_config scp:$expdir/wav.JOB.scp \ -# # ark,scp:$pitchdir/raw_pitch_$name.JOB.ark,$pitchdir/raw_pitch_$name.JOB.scp \ -# # $expdir/JOB || exit 1; - -# pushd $sacc_dir -# $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ -# cd $sacclocal/SAcC.sh $expdir/wav.JOB.scp $pitchdir $name.JOB || exit 1; -# fi - - -# if [ -f $expdir/.error.$name ]; then -# echo "Error producing pitch features for $name:" -# tail $expdir/make_pitch.*.log -# exit 1; -# fi - -# # concatenate the .scp files together. -# for ((n=1; n<=nj; n++)); do -# cat $pitchdir/raw_pitch_$name.$n.scp >> $data/pitchs.scp || exit 1; -# done > $data/pitchs.scp - -# rm $expdir/wav.*.scp $expdir/segments.* 2>/dev/null - -# nf=`cat $data/pitchs.scp | wc -l` -# nu=`cat $data/utt2spk | wc -l` -# if [ $nf -ne $nu ]; then -# echo "It seems not all of the feature files were successfully ($nf != $nu);" -# echo "consider using utils/fix_data_dir.sh $data" -# fi - -# echo "Succeeded creating PITCH features for $name" From ea25438dc3e82ca2dfadac085219c724bb4583e7 Mon Sep 17 00:00:00 2001 From: Tom Jorquera Date: Fri, 13 Jan 2017 20:29:19 +0100 Subject: [PATCH 229/530] [build] Fix return code of tools/extras/install_sacc.sh (#1337) --- tools/extras/install_sacc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/extras/install_sacc.sh b/tools/extras/install_sacc.sh index ff78506500d..4c55a76a6a0 100755 --- a/tools/extras/install_sacc.sh +++ b/tools/extras/install_sacc.sh @@ -71,4 +71,4 @@ cd SAcC_GLNXA64 && echo "**Error testing SAcC-- something went wrong." && exit 1; echo "Test succeeded." -exit 1; +exit 0; From e1e7bbf5c677f4b64206cd21cac560877e3594f8 Mon Sep 17 00:00:00 2001 From: pegahgh Date: Sat, 14 Jan 2017 00:56:06 -0500 Subject: [PATCH 230/530] [scripts] nnet3: fix to xconfig parsing to enable e.g. "input@-1" in xconfig descriptors (#1338) --- egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py index d88e0176ab5..3d958568717 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py @@ -484,7 +484,7 @@ def parse_config_line(orig_config_line): # treats splitting on space as a special case that may give zero fields. config_line = orig_config_line.split('#')[0] # Note: this set of allowed characters may have to be expanded in future. - x = re.search('[^a-zA-Z0-9\.\-\(\)_=,/\s"]', config_line) + x = re.search('[^a-zA-Z0-9\.\-\(\)@_=,/\s"]', config_line) if x is not None: bad_char = x.group(0) if bad_char == "'": From 18dddc2a64b4bd39adaebd7a497f01c32d25cb88 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 14 Jan 2017 18:58:40 -0500 Subject: [PATCH 231/530] Various refactoring of discriminative training; other fixes. --- .../s5/local/nnet3/run_tdnn_discriminative.sh | 30 +- .../s5/local/chain/run_tdnn_discriminative.sh | 8 +- .../s5/local/nnet3/run_tdnn_discriminative.sh | 30 +- .../tuning/run_blstm_6h_discriminative.sh | 8 +- .../tuning/run_tdnn_6h_discriminative.sh | 8 +- .../local/nnet3/run_blstm_discriminative.sh | 8 +- .../local/nnet3/run_tdnn_discriminative.sh | 30 +- .../s5/local/nnet3/run_tdnn_discriminative.sh | 30 +- egs/tedlium/s5_r2/local/nnet3/compare_wer.sh | 4 +- .../nnet3/tuning/run_tdnn_lstm_1b_disc.sh | 187 +++++++ .../s5/local/nnet3/run_lstm_discriminative.sh | 26 +- .../s5/local/nnet3/run_tdnn_discriminative.sh | 26 +- egs/wsj/s5/steps/nnet3/align.sh | 5 +- egs/wsj/s5/steps/nnet3/chain/train.py | 5 - egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh | 4 +- egs/wsj/s5/steps/nnet3/get_degs.sh | 499 ++++++++++++++++++ egs/wsj/s5/steps/nnet3/get_egs.sh | 2 +- .../s5/steps/nnet3/get_egs_discriminative.sh | 45 +- egs/wsj/s5/steps/nnet3/make_denlats.sh | 5 +- .../s5/steps/nnet3/train_discriminative.sh | 43 +- egs/wsj/s5/utils/filter_scps.pl | 3 +- egs/wsj/s5/utils/split_data.sh | 15 +- src/chainbin/nnet3-chain-copy-egs.cc | 8 +- src/nnet3/discriminative-supervision.cc | 89 ++-- src/nnet3/discriminative-supervision.h | 90 ++-- src/nnet3/nnet-am-decodable-simple.cc | 4 +- src/nnet3/nnet-am-decodable-simple.h | 5 +- src/nnet3/nnet-chain-example.cc | 22 - src/nnet3/nnet-chain-example.h | 9 - src/nnet3/nnet-discriminative-example.cc | 21 - src/nnet3/nnet-discriminative-example.h | 9 - src/nnet3bin/Makefile | 2 +- .../discriminative-get-supervision.cc | 100 ---- src/nnet3bin/nnet3-align-compiled.cc | 11 +- src/nnet3bin/nnet3-discriminative-copy-egs.cc | 9 +- src/nnet3bin/nnet3-discriminative-get-egs.cc | 53 +- 36 files changed, 958 insertions(+), 495 deletions(-) create mode 100755 egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh create mode 100755 egs/wsj/s5/steps/nnet3/get_degs.sh delete mode 100644 src/nnet3bin/discriminative-get-supervision.cc diff --git a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh index 4afa867503a..aa2a845d6a8 100644 --- a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh @@ -8,7 +8,7 @@ set -o pipefail # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# . ./cmd.sh @@ -38,16 +38,15 @@ dir=${srcdir}_${criterion} ## Egs options frames_per_eg=150 frames_overlap_per_eg=30 -truncate_deriv_weights=10 ## Nnet training options effective_learning_rate=0.00000125 max_param_change=1 num_jobs_nnet=4 num_epochs=2 -regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false +adjust_priors=true # May need to be set to false # because it does not help in some setups modify_learning_rates=true last_layer_factor=0.1 @@ -57,8 +56,8 @@ decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we deci if $use_gpu; then if ! cuda-compiled; then - cat < $dir/num_jobs -sdata=$data/split$nj -[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; +sdata=$data/split${nj}utt +[[ -d $sdata && $data/feats.scp -ot $sdata ]] || \ + split_data.sh --per-utt $data $nj || exit 1; if $use_gpu; then queue_opt="--gpu 1" diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index ba661847561..9dd04f45d71 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -86,11 +86,6 @@ def get_args(): action=common_lib.StrToBoolAction, choices=["true", "false"], help="") - parser.add_argument("--chain.truncate-deriv-weights", type=float, - dest='truncate_deriv_weights', default=0, - help="""Can be used to set to zero the weights of - derivs from frames near the edges. (counts subsampled - frames)""") parser.add_argument("--chain.frame-subsampling-factor", type=int, dest='frame_subsampling_factor', default=3, help="ratio of frames-per-second of features we " diff --git a/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh b/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh index ada92e66ff4..cb1d7d1c357 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh +++ b/egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh @@ -20,8 +20,6 @@ num_epochs=10 # Number of epochs of training; # Be careful with this: we actually go over the data # num-epochs * frame-subsampling-factor times, due to # using different data-shifts. -truncate_deriv_weights=0 # can be used to set to zero the weights of derivs from frames - # near the edges. (counts subsampled frames). apply_deriv_weights=true initial_effective_lrate=0.0002 final_effective_lrate=0.00002 @@ -530,7 +528,7 @@ while [ $x -lt $num_iters ]; do $this_cache_io_opts $parallel_train_opts $deriv_time_opts \ --max-param-change=$this_max_param_change \ --print-interval=10 "$mdl" $dir/den.fst \ - "ark,bg:nnet3-chain-copy-egs --truncate-deriv-weights=$truncate_deriv_weights --frame-shift=$frame_shift ark:$egs_dir/cegs.$archive.ark ark:- | nnet3-chain-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-chain-merge-egs --minibatch-size=$this_minibatch_size ark:- ark:- |" \ + "ark,bg:nnet3-chain-copy-egs --frame-shift=$frame_shift ark:$egs_dir/cegs.$archive.ark ark:- | nnet3-chain-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-chain-merge-egs --minibatch-size=$this_minibatch_size ark:- ark:- |" \ $dir/$[$x+1].$n.raw || touch $dir/.error & done wait diff --git a/egs/wsj/s5/steps/nnet3/get_degs.sh b/egs/wsj/s5/steps/nnet3/get_degs.sh new file mode 100755 index 00000000000..cc3ab5c4b13 --- /dev/null +++ b/egs/wsj/s5/steps/nnet3/get_degs.sh @@ -0,0 +1,499 @@ +#!/bin/bash + +# Copyright 2012-2016 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. +# Copyright 2014-2015 Vimal Manohar + +# Decodes denlats and dumps egs for discriminative training, in one script +# (avoids writing the non-compact lattices to disk, which can use a lot of disk +# space). + + +# Begin configuration section. +cmd=run.pl +max_copy_jobs=5 # Limit disk I/O + +# feature options +feat_type=raw # set it to 'lda' to use LDA features. +transform_dir= # If this is a SAT system, directory for transforms +online_ivector_dir= + +# example splitting and context options +frames_per_eg=150 # number of frames of labels per example. + # Note: may in general be a comma-separated string of alternative + # durations; the first one (the principal num-frames) is preferred. +frames_overlap_per_eg=30 # number of supervised frames of overlap that we aim for per eg. + # can be useful to avoid wasted data if you're using --left-deriv-truncate + # and --right-deriv-truncate. +looped=false # Set to true to enable looped decoding [can + # be a bit faster, for forward-recurrent models like LSTMs.] + +# .. these context options also affect decoding. +extra_left_context=0 # amount of left-context per eg, past what is required by the model + # (only useful for recurrent networks like LSTMs/BLSTMs) +extra_right_context=0 # amount of right-context per eg, past what is required by the model + # (only useful for backwards-recurrent networks like BLSTMs) +extra_left_context_initial=-1 # if >= 0, the --extra-left-context to use at + # the start of utterances. Recommend 0 if you + # used 0 for the baseline DNN training; if <0, + # defaults to same as extra_left_context +extra_right_context_final=-1 # if >= 0, the --extra-right-context to use at + # the end of utterances. Recommend 0 if you + # used 0 for the baseline DNN training; if <0, + # defaults to same as extra_left_context + +compress=true # set this to false to disable lossy compression of features + # dumped with egs (e.g. if you want to see whether results are + # affected). + +num_utts_subset=80 # number of utterances in validation and training + # subsets used for diagnostics. +num_egs_subset=800 # number of egs (maximum) for the validation and training + # subsets used for diagnostics. +frames_per_iter=400000 # each iteration of training, see this many frames + # per job. This is just a guideline; it will pick a number + # that divides the number of samples in the entire data. +cleanup=true + +stage=0 +nj=200 + +# By default this script uses final.mdl in , this configures it. +iter=final + + +# decoding-graph option +self_loop_scale=0.1 # for decoding graph.. should be 1.0 for chain models. + +# options relating to decoding. +frames_per_chunk_decoding=150 +beam=13.0 +lattice_beam=7.0 +acwt=0.1 +max_active=5000 +min_active=200 +max_mem=20000000 # This will stop the processes getting too large. +# This is in bytes, but not "real" bytes-- you have to multiply +# by something like 5 or 10 to get real bytes (not sure why so large) +num_threads=1 + +# affects whether we invoke lattice-determinize-non-compact after decoding +# discriminative-get-supervision. +determinize_before_split=true + + +# End configuration section. + + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + + +if [ $# != 5 ]; then + echo "Usage: $0 [opts] " + echo " e.g.: $0 data/train data/lang exp/nnet3/tdnn_a exp/nnet3/tdnn_a_ali exp/nnet3/tdnn_a_degs" + echo "" + echo "For options, see top of script file. Standard options:" + echo " --config # config file containing options" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs (probably would be good to add -tc 5 or so if using" + echo " # GridEngine (to avoid excessive NFS traffic)." + echo " --stage # Used to run a partially-completed training process from somewhere in" + echo " # the middle." + echo " --online-ivector-dir # Directory for online-estimated iVectors, used in the" + echo " # online-neural-net setup." + echo " --nj # number of jobs to submit to the queue." + echo " --num-threads # number of threads per decoding job" + exit 1; +fi + +data=$1 +lang=$2 +srcdir=$3 +alidir=$4 +dir=$5 + + +extra_files= +[ ! -z $online_ivector_dir ] && \ + extra_files="$extra_files $online_ivector_dir/ivector_period $online_ivector_dir/ivector_online.scp" +[ "$feat_type" = "lda" ] && \ + extra_files="$extra_files $srcdir/final.mat" +[ ! -z $transform_dir ] && \ + extra_files="$extra_files $transform_dir/trans.1 $transform_dir/num_jobs" + +# Check some files. +for f in $data/feats.scp $lang/L.fst $srcdir/${iter}.mdl $srcdir/tree \ + $srcdir/cmvn_opts $alidir/ali.1.gz $alidir/num_jobs $extra_files; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1; +done + +mkdir -p $dir/log $dir/info || exit 1; + +utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; +utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; +cp $lang/phones.txt $dir || exit 1; + + + +utils/split_data.sh --per-utt $data $nj +sdata=$data/split${nj}utt + + +## Set up features. +if [ -z "$feat_type" ]; then + if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=raw; fi +fi +echo "$0: feature type is $feat_type" + + +cmvn_opts=$(cat $srcdir/cmvn_opts) || exit 1 + +case $feat_type in + delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; + raw) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |" + ;; + lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |" + cp $srcdir/final.mat $dir + ;; + *) echo "Invalid feature type $feat_type" && exit 1; +esac + +if [ ! -z "$transform_dir" ]; then + echo "$0: using transforms from $transform_dir" + [ ! -s $transform_dir/num_jobs ] && \ + echo "$0: expected $transform_dir/num_jobs to contain the number of jobs." && exit 1; + nj_orig=$(cat $transform_dir/num_jobs) + + if [ $feat_type == "raw" ]; then trans=raw_trans; + else trans=trans; fi + if [ $feat_type == "lda" ] && ! cmp $transform_dir/final.mat $srcdir/final.mat; then + echo "$0: LDA transforms differ between $srcdir and $transform_dir" + exit 1; + fi + if [ ! -f $transform_dir/$trans.1 ]; then + echo "$0: expected $transform_dir/$trans.1 to exist (--transform-dir option)" + exit 1; + fi + if [ $nj -ne $nj_orig ]; then + # Copy the transforms into an archive with an index. + for n in $(seq $nj_orig); do cat $transform_dir/$trans.$n; done | \ + copy-feats ark:- ark,scp:$dir/$trans.ark,$dir/$trans.scp || exit 1; + feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk scp:$dir/$trans.scp ark:- ark:- |" + else + # number of jobs matches with alignment dir. + feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/$trans.JOB ark:- ark:- |" + fi +fi + + +## set iVector options +if [ ! -z "$online_ivector_dir" ]; then + online_ivector_period=$(cat $online_ivector_dir/ivector_period) + ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$online_ivector_period" +fi + +## set frame-subsampling-factor option and copy file +if [ -f $srcdir/frame_subsampling_factor ]; then + frame_subsampling_factor=$(cat $srcdir/frame_subsampling_factor) || exit 1 + # e.g. for 'chain' systems + frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor" + cp $srcdir/frame_subsampling_factor $dir +else + frame_subsampling_factor=1 +fi + + +## Make the decoding graph. +if [ $stage -le 0 ]; then + new_lang="$dir/"$(basename "$lang") + rm -r $new_lang 2>/dev/null + cp -rH $lang $dir + echo "$0: Making unigram grammar FST in $new_lang" + oov=$(cat data/lang/oov.txt) + cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \ + awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \ + utils/make_unigram_grammar.pl | fstcompile | fstarcsort --sort_type=ilabel > $new_lang/G.fst \ + || exit 1; + + utils/mkgraph.sh --self-loop-scale $self_loop_scale $new_lang $srcdir $dir/dengraph || exit 1; +fi + +# copy alignments into ark,scp format which allows us to use different num-jobs +# from the alignment, and is also convenient for getting priors. +if [ $stage -le 1 ]; then + echo "$0: Copying input alignments" + nj_ali=$(cat $alidir/num_jobs) + alis=$(for n in $(seq $nj_ali); do echo -n "$alidir/ali.$n.gz "; done) + $cmd $dir/log/copy_alignments.log \ + copy-int-vector "ark:gunzip -c $alis|" \ + ark,scp:$dir/ali.ark,$dir/ali.scp || exit 1; +fi + +[ -f $dir/ali.scp ] || { echo "$0: expected $dir/ali.scp to exist"; exit 1; } + +if [ $stage -le 2 ]; then + echo "$0: working out number of frames of training data" + num_frames=$(steps/nnet2/get_num_frames.sh $data) + echo $num_frames > $dir/info/num_frames + echo "$0: working out feature dim" + feats_one="$(echo $feats | sed s:JOB:1:g)" + if feat_dim=$(feat-to-dim "$feats_one" - 2>/dev/null); then + echo $feat_dim > $dir/info/feat_dim + else # run without stderr redirection to show the error. + feat-to-dim "$feats_one" -; exit 1 + fi +fi + +# copy the model to the degs directory. +cp $srcdir/${iter}.mdl $dir/final.mdl || exit 1 + +# Create some info in $dir/info + +# Work out total number of archives. Add one on the assumption the +# num-frames won't divide exactly, and we want to round up. +num_archives=$[num_frames/frames_per_iter+1] + +echo $num_archives >$dir/info/num_archives +echo $frame_subsampling_factor >$dir/info/frame_subsampling_factor + +# the first field in frames_per_eg (which is a comma-separated list of numbers) +# is the 'principal' frames-per-eg, and for purposes of working out the number +# of archives we assume that this will be the average number of frames per eg. +frames_per_eg_principal=$(echo $frames_per_eg | cut -d, -f1) + +if [ -e $dir/storage ]; then + # Make soft links to storage directories, if distributing this way.. See + # utils/create_split_dir.pl. + echo "$0: creating data links" + utils/create_data_link.pl $(for x in $(seq $num_archives); do echo $dir/degs.$x.ark; done) + utils/create_data_link.pl $(for x in $(seq $num_archives); do echo $dir/degs.$x.scp; done) + utils/create_data_link.pl $(for y in $(seq $nj); do echo $dir/degs_orig.$y.ark; done) + utils/create_data_link.pl $(for y in $(seq $nj); do echo $dir/degs_orig.$y.scp; done) + utils/create_data_link.pl $(for y in $(seq $nj); do echo $dir/degs_orig_filtered.$y.scp; done) +fi + + +extra_context_opts="--extra-left-context=$extra_left_context --extra-right-context=$extra_right_context --extra-left-context-initial=$extra_left_context_initial --extra-right-context-final=$extra_right_context_final" + +# work out absolute context opts, --left-context and so on [need model context] +model_left_context=$(nnet3-am-info $srcdir/${iter}.mdl | grep "^left-context:" | awk '{print $2}') +model_right_context=$(nnet3-am-info $srcdir/${iter}.mdl | grep "^right-context:" | awk '{print $2}') +left_context=$[model_left_context+extra_left_context+frame_subsampling_factor/2] +right_context=$[model_right_context+extra_right_context+frame_subsampling_factor/2] +context_opts="--left-context=$left_context --right-context=$right_context" +if [ $extra_left_context_initial -ge 0 ]; then + left_context_initial=$[model_left_context+extra_left_context_initial+frame_subsampling_factor/2] + context_opts="$context_opts --left-context-initial=$left_context_initial" +fi +if [ $extra_right_context_final -ge 0 ]; then + right_context_final=$[model_right_context+extra_right_context_final+frame_subsampling_factor/2] + context_opts="$context_opts --right-context-final=$right_context_final" +fi + +## +if [ $num_threads -eq 1 ]; then + if $looped; then + decoder="nnet3-latgen-faster-looped" + [ $extra_left_context_initial -ge 0 ] && \ + decoder="$decoder --extra-left-context-initial=$extra_left_context_initial" + else + decoder="nnet3-latgen-faster $extra_context_opts" + fi + threads_cmd_opt= +else + $looped && { echo "$0: --num-threads must be one if you use looped decoding"; exit 1; } + threads_cmd_opt="--num-threads $num_threads" + decoder="nnet3-latgen-faster-parallel --num-threads=$num_threads $extra_context_opts" + true +fi + +# set the command to determinize lattices, if specified. +if $determinize_before_split; then + lattice_determinize_cmd="lattice-determinize-non-compact --acoustic-scale=$acwt --max-mem=$max_mem --minimize=true --prune --beam=$lattice_beam ark:- ark:-" +else + lattice_determinize_cmd="cat" +fi + +if [ $stage -le 3 ]; then + echo "$0: decoding and dumping egs" + $cmd $threads_cmd_opt JOB=1:$nj $dir/log/decode_and_get_egs.JOB.log \ + $decoder \ + $ivector_opts $frame_subsampling_opt \ + --frames-per-chunk=$frames_per_chunk_decoding \ + --determinize-lattice=false \ + --max-active=$max_active --min-active=$min_active --beam=$beam \ + --lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=false \ + --word-symbol-table=$lang/words.txt $dir/final.mdl \ + $dir/dengraph/HCLG.fst "$feats" ark:- \| \ + $lattice_determinize_cmd \| \ + nnet3-discriminative-get-egs --acoustic-scale=$acwt --compress=$compress \ + --num-frames=$frames_per_eg --num-frames-overlap=$frames_overlap_per_eg \ + $ivector_opts $context_opts \ + $dir/final.mdl "$feats" "ark,s,cs:-" \ + "scp:utils/filter_scp.pl $sdata/JOB/utt2spk $dir/ali.scp |" \ + ark,scp:$dir/degs_orig.JOB.ark,$dir/degs_orig.JOB.scp || exit 1 +fi + + +if [ $stage -le 4 ]; then + echo "$0: getting validation utterances." + + ## Get list of validation utterances. + awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \ + > $dir/valid_uttlist || exit 1; + + if [ -f $data/utt2uniq ]; then # this matters if you use data augmentation. + echo "File $data/utt2uniq exists, so augmenting valid_uttlist to" + echo "include all perturbed versions of the same 'real' utterances." + mv $dir/valid_uttlist $dir/valid_uttlist.tmp + utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt + cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \ + sort | uniq | utils/apply_map.pl $dir/uniq2utt | \ + awk '{for(n=1;n<=NF;n++) print $n;}' | sort > $dir/valid_uttlist + rm $dir/uniq2utt $dir/valid_uttlist.tmp + fi + + # the following awk statement turns 'foo123' into something like + # '^foo123-[0-9]\+ ' which is a grep expression that matches the lines in the + # .scp file that correspond to an utterance in valid_uttlist. + cat $dir/valid_uttlist | awk '{printf("^%s-[0-9]\\+ \n", $1);}' \ + >$dir/valid_uttlist.regexps || exit 1 + + # remove the validation utterances from deg_orig.*.scp to produce + # degs_orig_filtered.*.scp. + # note: the '||' true is in case the grep returns nonzero status for + # some splits, because they were all validation utterances. + $cmd JOB=1:$nj $dir/log/filter_and_shuffle.JOB.log \ + grep -v -f $dir/valid_uttlist.regexps $dir/degs_orig.JOB.scp '>' \ + $dir/degs_orig_filtered.JOB.scp '||' true || exit 1 + + # extract just the validation utterances from deg_orig.*.scp to produce + # degs_valid.*.scp. + $cmd JOB=1:$nj $dir/log/extract_validation_egs.JOB.log \ + grep -f $dir/valid_uttlist.regexps $dir/degs_orig.JOB.scp '>' \ + $dir/degs_valid.JOB.scp '||' true || exit 1 + + for j in $(seq $nj); do + cat $dir/degs_valid.$j.scp; rm $dir/degs_valid.$j.scp; + done | utils/shuffle_list.pl | head -n$num_utts_subset >$dir/valid_diagnostic.scp || exit 1 + + [ -s $dir/valid_diagnostic.scp ] || { echo "$0: error getting validation egs"; exit 1; } +fi + + +# read 'mof' as max_open_filehandles. +# When splitting up the scp files, we don't want to have to hold too many +# files open at once. +mof=$(ulimit -n) || exit 1 +# the next step helps work around inconsistency between different machines on a +# cluster. It's unlikely that the allowed number of open filehandles would ever +# be less than 256. +if [ $mof -gt 256 ]; then mof=256; fi +# allocate mof minus 3 for the max allowed outputs, because of +# stdin,stderr,stdout. this will normally come to 253. We'll do a two-stage +# splitting if the needed number of scp files is larger than this. +num_groups=$[(num_archives+(mof-3)-1)/(mof-3)] +group_size=$[(num_archives+num_groups-1)/num_groups] +if [ $num_groups -gt 1 ]; then + new_num_archives=$[group_size*num_groups] + [ $new_num_archives -ne $num_archives ] && \ + echo "$0: rounding up num-archives from $num_archives to $new_num_archives for easier splitting" + echo $new_num_archives >$dir/info/num_archives +fi + + +# function/pseudo-command to randomly shuffle input lines using a small buffer size +function shuffle { + perl -e ' use List::Util qw(shuffle); srand(0); + $bufsz=1000; @A = (); while() { push @A, $_; if (@A == $bufsz) { + $n=int(rand()*$bufsz); print $A[$n]; $A[$n] = $A[$bufsz-1]; pop @A; }} + @A = shuffle(@A); print @A; ' + } +# funtion/pseudo-command to put input lines round robin to command line args. +function round_robin { + perl -e '@F=(); foreach $a (@ARGV) { my $f; open($f, ">$a") || die "opening file $a"; push @F, $f; } + $N=@F; $N>0||die "No output files"; $n=0; + while () { $fh=$F[$n%$N]; $n++; print $fh $_ || die "error printing"; } ' $* +} + + +if [ $stage -le 5 ]; then + echo "$0: rearranging scp files" + + if [ $num_groups -eq 1 ]; then + # output directly to the archive files. + outputs=$(for n in $(seq $num_archives); do echo $dir/degs.$n.scp; done) + else + # output to intermediate 'group' files. + outputs=$(for g in $(seq $num_groups); do echo $dir/degs_group.$g.scp; done) + fi + + # We can't use UNIX's split command because of compatibility issues (BSD + # version very different from GNU version), so we use 'round_robin' which is + # a bash function that calls an inline perl script. + for j in $(seq $nj); do cat $dir/degs_orig_filtered.$j.scp; done | \ + shuffle | round_robin $outputs || exit 1 + + if [ $num_groups -gt 1 ]; then + for g in $(seq $num_groups); do + first=$[1+group_size*(g-1)] + last=$[group_size*g] + outputs=$(for n in $(seq $first $last); do echo $dir/degs.$n.scp; done) + cat $dir/degs_group.$g.scp | shuffle | round_robin $outputs + done + fi +fi + +if [ $stage -le 6 ]; then + echo "$0: getting train-subset scp" + # get degs_train_subset.scp by taking the top and tail of the degs files [quicker + # than cat'ing all the files, random shuffling and head] + + nl=$[$num_egs_subset/$num_archives + 1] + + # use utils/shuffle_list.pl because it provides a complete shuffle (ok since + # the amount of data is small). note: shuf is not available on mac by + # default. + for n in $(seq $num_archives); do + head -n$nl $dir/degs.$n.scp; tail -n$nl $dir/degs.$n.scp + done | utils/shuffle_list.pl | head -n$num_utts_subset >$dir/train_diagnostic.scp + [ -s $dir/train_diagnostic.scp ] || { echo "$0: error getting train_diagnostic.scp"; exit 1; } +fi + +if [ $stage -le 7 ]; then + echo "$0: creating final archives" + $cmd --max-jobs-run "$max_copy_jobs" \ + JOB=1:$num_archives $dir/log/copy_archives.JOB.log \ + nnet3-discriminative-copy-egs scp:$dir/degs.JOB.scp ark:$dir/degs.JOB.ark || exit 1 + + run.pl $dir/log/copy_train_subset.log \ + nnet3-discriminative-copy-egs scp:$dir/train_diagnostic.scp \ + ark:$dir/train_diagnostic.ark || exit 1 + + run.pl $dir/log/copy_valid_subset.log \ + nnet3-discriminative-copy-egs scp:$dir/valid_diagnostic.scp \ + ark:$dir/valid_diagnostic.ark || exit 1 +fi + +if [ $stage -le 10 ] && $cleanup; then + echo "$0: cleaning up temporary files." + for j in $(seq $nj); do + for f in $dir/degs_orig.$j.{ark,scp} $dir/degs_orig_filtered.$j.scp; do + [ -L $f ] && rm $(readlink -f $f); rm $f + done + done + rm $dir/degs_group.*.scp $dir/valid_diagnostic.scp $dir/train_diagnostic.scp 2>/dev/null + rm $dir/ali.ark $dir/ali.scp 2>/dev/null + for n in $(seq $num_archives); do + for f in $dir/degs.$n.scp; do + [ -L $f ] && rm $(readlink -f $f); rm $f + done + done +fi + + +exit 0 + + +echo "$0: Finished decoding and preparing training examples" diff --git a/egs/wsj/s5/steps/nnet3/get_egs.sh b/egs/wsj/s5/steps/nnet3/get_egs.sh index 27877680982..cb7ea0ac73c 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. +# Copyright 2012-2016 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. # # This script, which will generally be called from other neural-net training # scripts, extracts the training examples used to train the neural net (and also diff --git a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh index fd616160632..377c49fc5cb 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh @@ -1,8 +1,11 @@ #!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. +# Copyright 2012-2016 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. # Copyright 2014-2015 Vimal Manohar +# Note: you may find it more convenient to use the newer script get_degs.sh, which +# combines decoding and example-creation in one step without writing lattices. + # This script dumps examples MPE or MMI or state-level minimum bayes risk (sMBR) # training of neural nets. # Criterion supported are mpe, smbr and mmi @@ -12,6 +15,8 @@ cmd=run.pl feat_type=raw # set it to 'lda' to use LDA features. frames_per_eg=150 # number of frames of labels per example. more->less disk space and # less time preparing egs, but more I/O during training. + # Note: may in general be a comma-separated string of alternative + # durations; the first one (the principal num-frames) is preferred. frames_overlap_per_eg=30 # number of supervised frames of overlap that we aim for per eg. # can be useful to avoid wasted data if you're using --left-deriv-truncate # and --right-deriv-truncate. @@ -32,11 +37,6 @@ frames_per_iter=400000 # each iteration of training, see this many frames # per job. This is just a guideline; it will pick a number # that divides the number of samples in the entire data. -determinize=true -minimize=true -remove_output_symbols=true -remove_epsilons=true -collapse_transition_ids=true acwt=0.1 stage=0 @@ -225,7 +225,7 @@ if [ $stage -le 2 ]; then fi fi -# Working out total number of archives. Add one on the assumption the +# Work out total number of archives. Add one on the assumption the # num-frames won't divide exactly, and we want to round up. num_archives=$[$num_frames/$frames_per_iter+1] @@ -244,8 +244,14 @@ num_archives=$[$archives_multiple*$num_archives_intermediate] || exit 1; echo $num_archives >$dir/info/num_archives echo $frames_per_eg >$dir/info/frames_per_eg + +# the first field in frames_per_eg (which is a comma-separated list of numbers) +# is the 'principal' frames-per-eg, and for purposes of working out the number +# of archives we assume that this will be the average number of frames per eg. +frames_per_eg_principal=$(echo $frames_per_eg | cut -d, -f1) + # Work out the number of egs per archive -egs_per_archive=$[$num_frames/($frames_per_eg*$num_archives)] || exit 1; +egs_per_archive=$[$num_frames/($frames_per_eg_principal*$num_archives)] || exit 1; ! [ $egs_per_archive -le $frames_per_iter ] && \ echo "$0: script error: egs_per_archive=$egs_per_archive not <= frames_per_iter=$frames_per_iter" \ && exit 1; @@ -279,7 +285,6 @@ if [ $stage -le 3 ]; then for id in $(seq $nj); do cat $dir/lat_special.$id.scp; done > $dir/lat_special.scp fi -splitter_opts="--supervision-splitter.determinize=$determinize --supervision-splitter.minimize=$minimize --supervision-splitter.remove_output_symbols=$remove_output_symbols --supervision-splitter.remove_epsilons=$remove_epsilons --supervision-splitter.collapse-transition-ids=$collapse_transition_ids --supervision-splitter.acoustic-scale=$acwt" # If frame_subsampling_factor > 0, we will later be shifting the egs slightly to @@ -291,7 +296,7 @@ right_context=$[right_context+frame_subsampling_factor/2] [ $left_context_initial -ge 0 ] && left_context_initial=$[left_context_initial+frame_subsampling_factor/2] [ $right_context_final -ge 0 ] && right_context_final=$[right_context_final+frame_subsampling_factor/2] -egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --compress=$compress --frame-subsampling-factor=$frame_subsampling_factor $splitter_opts" +egs_opts="--left-context=$left_context --right-context=$right_context --num-frames=$frames_per_eg --compress=$compress --frame-subsampling-factor=$frame_subsampling_factor --acoustic-scale=$acwt" [ $left_context_initial -ge 0 ] && egs_opts="$egs_opts --left-context-initial=$left_context_initial" [ $right_context_final -ge 0 ] && egs_opts="$egs_opts --right-context-final=$right_context_final" @@ -305,8 +310,6 @@ priors_egs_opts="--left-context=$left_context --right-context=$right_context --n [ $right_context_final -ge 0 ] && priors_egs_opts="$priors_egs_opts --right-context-final=$right_context_final" -supervision_all_opts="--frame-subsampling-factor=$frame_subsampling_factor" - echo $left_context > $dir/info/left_context echo $right_context > $dir/info/right_context echo $left_context_initial > $dir/info/left_context_initial @@ -368,16 +371,14 @@ if [ $stage -le 4 ]; then <$dir/ali.scp >$dir/ali_special.scp $cmd $dir/log/create_valid_subset.log \ - discriminative-get-supervision $supervision_all_opts \ - scp:$dir/ali_special.scp scp:$dir/lat_special.scp ark:- \| \ nnet3-discriminative-get-egs $ivector_opts $egs_opts \ - $dir/final.mdl "$valid_feats" ark,s,cs:- "ark:$dir/valid_diagnostic.degs" || touch $dir/.error & + $dir/final.mdl "$valid_feats" scp:$dir/lat_special.scp \ + scp:$dir/ali_special.scp "ark:$dir/valid_diagnostic.degs" || touch $dir/.error & $cmd $dir/log/create_train_subset.log \ - discriminative-get-supervision $supervision_all_opts \ - scp:$dir/ali_special.scp scp:$dir/lat_special.scp ark:- \| \ nnet3-discriminative-get-egs $ivector_opts $egs_opts \ - $dir/final.mdl "$train_subset_feats" ark,s,cs:- "ark:$dir/train_diagnostic.degs" || touch $dir/.error & + $dir/final.mdl "$train_subset_feats" scp:$dir/lat_special.scp \ + scp:$dir/ali_special.scp "ark:$dir/train_diagnostic.degs" || touch $dir/.error & wait; [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1 echo "... Getting subsets of validation examples for diagnostics and combination." @@ -403,12 +404,10 @@ if [ $stage -le 5 ]; then # files is the product of 'nj' by 'num_archives_intermediate', which might be # quite large. $cmd --max-jobs-run $max_jobs_run JOB=1:$nj $dir/log/get_egs.JOB.log \ - discriminative-get-supervision $supervision_all_opts \ - "scp:utils/filter_scp.pl $sdata/JOB/utt2spk $dir/ali.scp |" \ - "ark,s,cs:gunzip -c $denlatdir/lat.JOB.gz |" ark:- \| \ nnet3-discriminative-get-egs $ivector_opts $egs_opts \ - --num-frames-overlap=$frames_overlap_per_eg \ - $dir/final.mdl "$feats" ark,s,cs:- ark:- \| \ + --num-frames-overlap=$frames_overlap_per_eg \ + $dir/final.mdl "$feats" "ark,s,cs:gunzip -c $denlatdir/lat.JOB.gz |" \ + "scp:utils/filter_scp.pl $sdata/JOB/utt2spk $dir/ali.scp |" ark:- \| \ nnet3-discriminative-copy-egs --random=true --srand=JOB ark:- $degs_list || exit 1; fi diff --git a/egs/wsj/s5/steps/nnet3/make_denlats.sh b/egs/wsj/s5/steps/nnet3/make_denlats.sh index 7bc8dbd8c08..d1591c0b1de 100755 --- a/egs/wsj/s5/steps/nnet3/make_denlats.sh +++ b/egs/wsj/s5/steps/nnet3/make_denlats.sh @@ -22,7 +22,7 @@ transform_dir= max_mem=20000000 # This will stop the processes getting too large. # This is in bytes, but not "real" bytes-- you have to multiply # by something like 5 or 10 to get real bytes (not sure why so large) -num_threads=1 # Fixed to 1 for now +num_threads=1 # number of threads of decoder [only applicable if not looped, for now] online_ivector_dir= determinize=true minimize=false @@ -174,7 +174,7 @@ fi lattice_determinize_cmd= if $determinize; then - lattice_determinize_cmd="lattice-determinize-non-compact --acoustic-scale=$acwt --max-mem=$max_mem --minimize=$minimize --prune --beam=$beam ark:- ark:- |" + lattice_determinize_cmd="lattice-determinize-non-compact --acoustic-scale=$acwt --max-mem=$max_mem --minimize=$minimize --prune --beam=$lattice_beam ark:- ark:- |" fi if [ $sub_split -eq 1 ]; then @@ -248,4 +248,3 @@ fi echo "$0: done generating denominator lattices." - diff --git a/egs/wsj/s5/steps/nnet3/train_discriminative.sh b/egs/wsj/s5/steps/nnet3/train_discriminative.sh index b0bf2a2aad6..fb75e7b0aab 100755 --- a/egs/wsj/s5/steps/nnet3/train_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/train_discriminative.sh @@ -17,8 +17,6 @@ num_epochs=4 # Number of epochs of training; # num-epochs * frame-subsampling-factor times, due to # using different data-shifts. use_gpu=true -truncate_deriv_weights=0 # can be used to set to zero the weights of derivs from frames - # near the edges. (counts subsampled frames). apply_deriv_weights=true use_frame_shift=false run_diagnostics=true @@ -50,7 +48,8 @@ shuffle_buffer_size=1000 # This "buffer_size" variable controls randomization of stage=-3 -adjust_priors=true +adjust_priors=true # If true then it will + num_threads=16 # this is the default but you may want to change it, e.g. to 1 if # using GPUs. @@ -59,8 +58,9 @@ keep_model_iters=1 remove_egs=false src_model= # will default to $degs_dir/final.mdl -left_deriv_truncate= # number of time-steps to avoid using the deriv of, on the left. -right_deriv_truncate= # number of time-steps to avoid using the deriv of, on the right. + +min_deriv_time=0 +max_deriv_time_relative=0 # End configuration section. @@ -71,7 +71,7 @@ if [ -f path.sh ]; then . ./path.sh; fi if [ $# != 2 ]; then - echo "Usage: $0 [opts] " + echo "Usage: $0 [opts] " echo " e.g.: $0 exp/nnet3/tdnn_sp_degs exp/nnet3/tdnn_sp_smbr" echo "" echo "Main options (for others, see top of script file)" @@ -109,12 +109,18 @@ dir=$2 [ -z "$src_model" ] && src_model=$degs_dir/final.mdl # Check some files. -for f in $degs_dir/degs.1.ark $degs_dir/info/{num_archives,silence.csl,frames_per_eg,egs_per_archive} $src_model; do +for f in $degs_dir/degs.1.ark $degs_dir/info/{num_archives,silence.csl,frame_subsampling_factor} $src_model; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done mkdir -p $dir/log || exit 1; + +model_left_context=$(nnet3-am-info $src_model | grep "^left-context:" | awk '{print $2}') +model_right_context=$(nnet3-am-info $src_model | grep "^right-context:" | awk '{print $2}') + + + # copy some things for f in splice_opts cmvn_opts tree final.mat; do if [ -f $degs_dir/$f ]; then @@ -129,7 +135,6 @@ if $adjust_priors; then num_archives_priors=`cat $degs_dir/info/num_archives_priors` || exit 1 fi -frames_per_eg=$(cat $degs_dir/info/frames_per_eg) || { echo "error: no such file $degs_dir/info/frames_per_eg"; exit 1; } num_archives=$(cat $degs_dir/info/num_archives) || exit 1; frame_subsampling_factor=$(cat $degs_dir/info/frame_subsampling_factor) @@ -201,12 +206,7 @@ fi rm $dir/.error 2>/dev/null -x=0 - -deriv_time_opts= -[ ! -z "$left_deriv_truncate" ] && deriv_time_opts="--optimization.min-deriv-time=$left_deriv_truncate" -[ ! -z "$right_deriv_truncate" ] && \ - deriv_time_opts="$deriv_time_opts --optimization.max-deriv-time=$((frames_per_eg - right_deriv_truncate))" +x=0 while [ $x -lt $num_iters ]; do if [ $stage -le $x ]; then @@ -229,7 +229,7 @@ while [ $x -lt $num_iters ]; do $dir/$x.mdl \ ark:$degs_dir/train_diagnostic.degs & fi - + if [ $x -gt 0 ]; then $cmd $dir/log/progress.$x.log \ nnet3-show-progress --use-gpu=no "nnet3-am-copy --raw=true $dir/$[$x-1].mdl - |" "nnet3-am-copy --raw=true $dir/$x.mdl - |" \ @@ -239,9 +239,9 @@ while [ $x -lt $num_iters ]; do echo "Training neural net (pass $x)" - + cache_read_opt="--read-cache=$dir/cache.$x" - + ( # this sub-shell is so that when we "wait" below, # we only wait for the training jobs that we just spawned, # not the diagnostic jobs that we spawned above. @@ -253,7 +253,7 @@ while [ $x -lt $num_iters ]; do k=$[$num_archives_processed + $n - 1]; # k is a zero-based index that we'll derive # the other indexes from. archive=$[($k%$num_archives)+1]; # work out the 1-based archive index. - + if [ $n -eq 1 ]; then # an option for writing cache (storing pairs of nnet-computations and # computation-requests) during training. @@ -282,14 +282,16 @@ while [ $x -lt $num_iters ]; do $cmd $train_queue_opt $dir/log/train.$x.$n.log \ nnet3-discriminative-train $cache_read_opt $cache_write_opt \ --apply-deriv-weights=$apply_deriv_weights \ - $parallel_train_opts $deriv_time_opts \ + --optimization.min-deriv-time=-$model_left_context \ + --optimization.max-deriv-time-relative=$model_right_context \ + $parallel_train_opts \ --max-param-change=$this_max_param_change \ --silence-phones=$silphonelist \ --criterion=$criterion --drop-frames=$drop_frames \ --one-silence-class=$one_silence_class \ --boost=$boost --acoustic-scale=$acoustic_scale $regularization_opts \ $dir/$x.mdl \ - "ark:nnet3-discriminative-copy-egs --frame-shift=$frame_shift --truncate-deriv-weights=$truncate_deriv_weights ark:$degs_dir/degs.$archive.ark ark:- | nnet3-discriminative-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:- | nnet3-discriminative-merge-egs --minibatch-size=$minibatch_size ark:- ark:- |" \ + "ark:nnet3-discriminative-copy-egs --frame-shift=$frame_shift ark:$degs_dir/degs.$archive.ark ark:- | nnet3-discriminative-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:- | nnet3-discriminative-merge-egs --minibatch-size=$minibatch_size ark:- ark:- |" \ $dir/$[$x+1].$n.raw || touch $dir/.error & done wait @@ -378,4 +380,3 @@ if $cleanup; then fi done fi - diff --git a/egs/wsj/s5/utils/filter_scps.pl b/egs/wsj/s5/utils/filter_scps.pl index 0d9e0fe4837..07e59d6ba80 100755 --- a/egs/wsj/s5/utils/filter_scps.pl +++ b/egs/wsj/s5/utils/filter_scps.pl @@ -165,6 +165,5 @@ print STDERR "filter_scps.pl: warning: some input lines did not get output\n"; } if ($warn_multiply_covered && $print_warnings) { - print STDERR "filter_scps.pl: warning: some input lines were output to multiple files\n"; + print STDERR "filter_scps.pl: warning: some input lines were output to multiple files [OK if splitting per utt]\n"; } - diff --git a/egs/wsj/s5/utils/split_data.sh b/egs/wsj/s5/utils/split_data.sh index e44a4ab6359..ab0dbbf35c7 100755 --- a/egs/wsj/s5/utils/split_data.sh +++ b/egs/wsj/s5/utils/split_data.sh @@ -41,6 +41,14 @@ if ! [ "$numsplit" -gt 0 ]; then exit 1; fi +if $split_per_spk; then + warning_opt= +else + # suppress warnings from filter_scps.pl about 'some input lines were output + # to multiple files'. + warning_opt="--no-warn" +fi + n=0; feats="" wavs="" @@ -124,9 +132,6 @@ done # split some things that are indexed by speaker for f in spk2gender spk2warp cmvn.scp; do if [ -f $data/$f ]; then - ! $split_per_spk && warning_opt="--no-warn" - # suppress warnings from filter_scps.pl about 'some input lines were output - # to multiple files', which is expected in this case. utils/filter_scps.pl $warning_opt JOB=1:$numsplit \ $data/split${numsplit}${utt}/JOB/spk2utt $data/$f $data/split${numsplit}${utt}/JOB/$f || exit 1; fi @@ -140,12 +145,12 @@ if [ -f $data/segments ]; then awk '{print $2;}' $dsn/segments | sort | uniq > $dsn/tmp.reco # recording-ids. done if [ -f $data/reco2file_and_channel ]; then - utils/filter_scps.pl JOB=1:$numsplit \ + utils/filter_scps.pl $warning_opt JOB=1:$numsplit \ $data/split${numsplit}${utt}/JOB/tmp.reco $data/reco2file_and_channel \ $data/split${numsplit}${utt}/JOB/reco2file_and_channel || exit 1 fi if [ -f $data/wav.scp ]; then - utils/filter_scps.pl JOB=1:$numsplit \ + utils/filter_scps.pl $warning_opt JOB=1:$numsplit \ $data/split${numsplit}${utt}/JOB/tmp.reco $data/wav.scp \ $data/split${numsplit}${utt}/JOB/wav.scp || exit 1 fi diff --git a/src/chainbin/nnet3-chain-copy-egs.cc b/src/chainbin/nnet3-chain-copy-egs.cc index 1396932252a..fddaa6c9952 100644 --- a/src/chainbin/nnet3-chain-copy-egs.cc +++ b/src/chainbin/nnet3-chain-copy-egs.cc @@ -265,7 +265,6 @@ int main(int argc, char *argv[]) { bool random = false; int32 srand_seed = 0; int32 frame_shift = 0; - int32 truncate_deriv_weights = 0; int32 frame_subsampling_factor = -1; BaseFloat keep_proportion = 1.0; int32 left_context = -1, right_context = -1; @@ -282,9 +281,6 @@ int main(int argc, char *argv[]) { "in the supervision data (excluding iVector data) - useful in " "augmenting data. Note, the outputs will remain at the closest " "exact multiples of the frame subsampling factor"); - po.Register("truncate-deriv-weights", &truncate_deriv_weights, - "If nonzero, the number of initial/final subsample frames that " - "will have their derivatives' weights set to zero."); po.Register("left-context", &left_context, "Can be used to truncate the " "feature left-context that we output."); po.Register("right-context", &right_context, "Can be used to truncate the " @@ -320,7 +316,7 @@ int main(int argc, char *argv[]) { // count is normally 1; could be 0, or possibly >1. int32 count = GetCount(keep_proportion); std::string key = example_reader.Key(); - if (frame_shift == 0 && truncate_deriv_weights == 0 && + if (frame_shift == 0 && left_context == -1 && right_context == -1) { const NnetChainExample &eg = example_reader.Value(); for (int32 c = 0; c < count; c++) { @@ -338,8 +334,6 @@ int main(int argc, char *argv[]) { frame_subsampling_factor, &eg_out); else eg_out.Swap(&eg); - if (truncate_deriv_weights != 0) - TruncateDerivWeights(truncate_deriv_weights, &eg_out); for (int32 c = 0; c < count; c++) { int32 index = (random ? Rand() : num_written) % num_outputs; example_writers[index]->Write(key, eg_out); diff --git a/src/nnet3/discriminative-supervision.cc b/src/nnet3/discriminative-supervision.cc index 223257e5a5f..94a165f4c50 100644 --- a/src/nnet3/discriminative-supervision.cc +++ b/src/nnet3/discriminative-supervision.cc @@ -24,14 +24,11 @@ namespace kaldi { namespace discriminative { -void DiscriminativeSupervisionOptions::Check() const { - KALDI_ASSERT(frame_subsampling_factor > 0); -} DiscriminativeSupervision::DiscriminativeSupervision( const DiscriminativeSupervision &other): weight(other.weight), num_sequences(other.num_sequences), - frames_per_sequence(other.frames_per_sequence), + frames_per_sequence(other.frames_per_sequence), num_ali(other.num_ali), den_lat(other.den_lat) { } void DiscriminativeSupervision::Swap(DiscriminativeSupervision *other) { @@ -44,7 +41,7 @@ void DiscriminativeSupervision::Swap(DiscriminativeSupervision *other) { bool DiscriminativeSupervision::operator == ( const DiscriminativeSupervision &other) const { - return ( weight == other.weight && + return ( weight == other.weight && num_sequences == other.num_sequences && frames_per_sequence == other.frames_per_sequence && num_ali == other.num_ali && @@ -61,14 +58,14 @@ void DiscriminativeSupervision::Write(std::ostream &os, bool binary) const { WriteBasicType(os, binary, frames_per_sequence); KALDI_ASSERT(frames_per_sequence > 0 && num_sequences > 0); - + WriteToken(os, binary, ""); WriteIntegerVector(os, binary, num_ali); WriteToken(os, binary, ""); if (!WriteLattice(os, binary, den_lat)) { // We can't return error status from this function so we - // throw an exception. + // throw an exception. KALDI_ERR << "Error writing denominator lattice to stream"; } @@ -83,9 +80,9 @@ void DiscriminativeSupervision::Read(std::istream &is, bool binary) { ReadBasicType(is, binary, &num_sequences); ExpectToken(is, binary, ""); ReadBasicType(is, binary, &frames_per_sequence); - KALDI_ASSERT(frames_per_sequence > 0 && + KALDI_ASSERT(frames_per_sequence > 0 && num_sequences > 0); - + ExpectToken(is, binary, ""); ReadIntegerVector(is, binary, &num_ali); @@ -94,7 +91,7 @@ void DiscriminativeSupervision::Read(std::istream &is, bool binary) { Lattice *lat = NULL; if (!ReadLattice(is, binary, &lat) || lat == NULL) { // We can't return error status from this function so we - // throw an exception. + // throw an exception. KALDI_ERR << "Error reading Lattice from stream"; } den_lat = *lat; @@ -106,7 +103,7 @@ void DiscriminativeSupervision::Read(std::istream &is, bool binary) { } bool DiscriminativeSupervision::Initialize(const std::vector &num_ali, - const Lattice &den_lat, + const Lattice &den_lat, BaseFloat weight) { if (num_ali.size() == 0) return false; if (den_lat.NumStates() == 0) return false; @@ -126,7 +123,7 @@ bool DiscriminativeSupervision::Initialize(const std::vector &num_ali, void DiscriminativeSupervision::Check() const { int32 num_frames_subsampled = num_ali.size(); - KALDI_ASSERT(num_frames_subsampled == + KALDI_ASSERT(num_frames_subsampled == num_sequences * frames_per_sequence); { @@ -150,14 +147,14 @@ DiscriminativeSupervisionSplitter::DiscriminativeSupervisionSplitter( den_lat_ = supervision_.den_lat; PrepareLattice(&den_lat_, &den_lat_scores_); - + int32 num_states = den_lat_.NumStates(), num_frames = supervision_.frames_per_sequence * supervision_.num_sequences; KALDI_ASSERT(num_states > 0); int32 start_state = den_lat_.Start(); // Lattice should be top-sorted and connected, so start-state must be 0. KALDI_ASSERT(start_state == 0 && "Expecting start-state to be 0"); - + KALDI_ASSERT(num_states == den_lat_scores_.state_times.size()); KALDI_ASSERT(den_lat_scores_.state_times[start_state] == 0); KALDI_ASSERT(den_lat_scores_.state_times.back() == num_frames); @@ -193,7 +190,7 @@ void DiscriminativeSupervisionSplitter::CollapseTransitionIds( pdf_to_tid[t][pdf] = arc.ilabel; } } - } + } } void DiscriminativeSupervisionSplitter::LatticeInfo::Check() const { @@ -204,9 +201,9 @@ void DiscriminativeSupervisionSplitter::LatticeInfo::Check() const { // Check that the states are ordered in increasing order of state_times. // This must be true since the states are in breadth-first search order. KALDI_ASSERT(IsSorted(state_times)); -} +} -void DiscriminativeSupervisionSplitter::GetFrameRange(int32 begin_frame, int32 num_frames, bool normalize, +void DiscriminativeSupervisionSplitter::GetFrameRange(int32 begin_frame, int32 num_frames, bool normalize, DiscriminativeSupervision *out_supervision) const { int32 end_frame = begin_frame + num_frames; // Note: end_frame is not included in the range of frames that the @@ -224,7 +221,7 @@ void DiscriminativeSupervisionSplitter::GetFrameRange(int32 begin_frame, int32 n std::copy(supervision_.num_ali.begin() + begin_frame, supervision_.num_ali.begin() + end_frame, std::back_inserter(out_supervision->num_ali)); - + out_supervision->num_sequences = 1; out_supervision->weight = supervision_.weight; out_supervision->frames_per_sequence = num_frames; @@ -239,19 +236,19 @@ void DiscriminativeSupervisionSplitter::CreateRangeLattice( typedef Lattice::StateId StateId; const std::vector &state_times = scores.state_times; - - // Some checks to ensure the lattice and scores are prepared properly + + // Some checks to ensure the lattice and scores are prepared properly KALDI_ASSERT(state_times.size() == in_lat.NumStates()); if (!in_lat.Properties(fst::kTopSorted, true)) KALDI_ERR << "Input lattice must be topologically sorted."; std::vector::const_iterator begin_iter = std::lower_bound(state_times.begin(), state_times.end(), begin_frame), - end_iter = std::lower_bound(begin_iter, + end_iter = std::lower_bound(begin_iter, state_times.end(), end_frame); KALDI_ASSERT(*begin_iter == begin_frame && - (begin_iter == state_times.begin() || + (begin_iter == state_times.begin() || begin_iter[-1] < begin_frame)); // even if end_frame == supervision_.num_frames, there should be a state with // that frame index. @@ -267,10 +264,10 @@ void DiscriminativeSupervisionSplitter::CreateRangeLattice( // Add special start state StateId start_state = out_lat->AddState(); out_lat->SetStart(start_state); - + for (StateId i = begin_state; i < end_state; i++) out_lat->AddState(); - + // Add the special final-state. StateId final_state = out_lat->AddState(); out_lat->SetFinal(final_state, LatticeWeight::One()); @@ -280,10 +277,10 @@ void DiscriminativeSupervisionSplitter::CreateRangeLattice( if (state_times[state] == begin_frame) { // we'd like to make this an initial state, but OpenFst doesn't allow // multiple initial states. Instead we add an epsilon transition to it - // from our actual initial state. The weight on this + // from our actual initial state. The weight on this // transition is the forward probability of the said 'initial state' LatticeWeight weight = LatticeWeight::One(); - weight.SetValue1((normalize ? scores.beta[0] : 0.0) - scores.alpha[state]); + weight.SetValue1((normalize ? scores.beta[0] : 0.0) - scores.alpha[state]); // Add negative of the forward log-probability to the graph cost score, // since the acoustic scores would be changed later. // Assuming that the lattice is scaled with appropriate acoustic @@ -294,29 +291,29 @@ void DiscriminativeSupervisionSplitter::CreateRangeLattice( // Note: Doing a forward-backward on this split must result in a total // score of 0 because of the normalization. - out_lat->AddArc(start_state, + out_lat->AddArc(start_state, LatticeArc(0, 0, weight, output_state)); } else { KALDI_ASSERT(scores.state_times[state] < end_frame); } - for (fst::ArcIterator aiter(in_lat, state); + for (fst::ArcIterator aiter(in_lat, state); !aiter.Done(); aiter.Next()) { const LatticeArc &arc = aiter.Value(); StateId nextstate = arc.nextstate; if (nextstate >= end_state) { // A transition to any state outside the range becomes a transition to - // our special final-state. - // The weight is just the negative of the backward log-probability + + // our special final-state. + // The weight is just the negative of the backward log-probability + // the arc cost. We again normalize with the total lattice score. LatticeWeight weight; //KALDI_ASSERT(scores.beta[state] < 0); - weight.SetValue1(arc.weight.Value1() - scores.beta[nextstate]); + weight.SetValue1(arc.weight.Value1() - scores.beta[nextstate]); weight.SetValue2(arc.weight.Value2()); // Add negative of the backward log-probability to the LM score, since // the acoustic scores would be changed later. // Note: We don't normalize here because that is already done with the // initial cost. - + out_lat->AddArc(output_state, LatticeArc(arc.ilabel, arc.olabel, weight, final_state)); } else { @@ -350,28 +347,28 @@ void DiscriminativeSupervisionSplitter::CreateRangeLattice( } } - fst::TopSort(out_lat); + fst::TopSort(out_lat); std::vector state_times_tmp; KALDI_ASSERT(LatticeStateTimes(*out_lat, &state_times_tmp) == end_frame - begin_frame); // Remove the acoustic scale that was previously added - if (config_.supervision_config.acoustic_scale != 1.0) { + if (config_.acoustic_scale != 1.0) { fst::ScaleLattice(fst::AcousticLatticeScale( - 1 / config_.supervision_config.acoustic_scale), out_lat); + 1 / config_.acoustic_scale), out_lat); } } void DiscriminativeSupervisionSplitter::PrepareLattice( Lattice *lat, LatticeInfo *scores) const { - // Scale the lattice to appropriate acoustic scale. It is important to - // ensure this is equal to the acoustic scale used while training. This is - // because, on splitting lattices, the initial and final costs are added + // Scale the lattice to appropriate acoustic scale. It is important to + // ensure this is equal to the acoustic scale used while training. This is + // because, on splitting lattices, the initial and final costs are added // into the graph cost. - KALDI_ASSERT(config_.supervision_config.acoustic_scale != 0.0); - if (config_.supervision_config.acoustic_scale != 1.0) + KALDI_ASSERT(config_.acoustic_scale != 0.0); + if (config_.acoustic_scale != 1.0) fst::ScaleLattice(fst::AcousticLatticeScale( - config_.supervision_config.acoustic_scale), lat); + config_.acoustic_scale), lat); LatticeStateTimes(*lat, &(scores->state_times)); int32 num_states = lat->NumStates(); @@ -383,7 +380,7 @@ void DiscriminativeSupervisionSplitter::PrepareLattice( // Order the states based on the state times. This is stronger than just // topological sort. This is required by the lattice splitting code. std::sort(state_time_indexes.begin(), state_time_indexes.end()); - + std::vector state_order(num_states); for (int32 s = 0; s < num_states; s++) { state_order[state_time_indexes[s].second] = s; @@ -396,9 +393,9 @@ void DiscriminativeSupervisionSplitter::PrepareLattice( void DiscriminativeSupervisionSplitter::ComputeLatticeScores(const Lattice &lat, LatticeInfo *scores) const { LatticeStateTimes(lat, &(scores->state_times)); - ComputeLatticeAlphasAndBetas(lat, false, + ComputeLatticeAlphasAndBetas(lat, false, &(scores->alpha), &(scores->beta)); - scores->Check(); + scores->Check(); // This check will fail if the lattice is not breadth-first search sorted } @@ -427,7 +424,7 @@ void AppendSupervision(const std::vector &inpu fst::Concat(&output_supervision->back().den_lat, src.den_lat); output_supervision->back().num_ali.insert( - output_supervision->back().num_ali.end(), + output_supervision->back().num_ali.end(), src.num_ali.begin(), src.num_ali.end()); output_supervision->back().num_sequences++; @@ -448,5 +445,5 @@ void AppendSupervision(const std::vector &inpu } } -} // namespace discriminative +} // namespace discriminative } // namespace kaldi diff --git a/src/nnet3/discriminative-supervision.h b/src/nnet3/discriminative-supervision.h index c5cdc7a4107..d4c7ee3756e 100644 --- a/src/nnet3/discriminative-supervision.h +++ b/src/nnet3/discriminative-supervision.h @@ -29,37 +29,21 @@ namespace kaldi { namespace discriminative { -struct DiscriminativeSupervisionOptions { - int32 frame_subsampling_factor; - BaseFloat acoustic_scale; - - DiscriminativeSupervisionOptions(): frame_subsampling_factor(1), acoustic_scale(0.1) { } - - void Register(OptionsItf *opts) { - opts->Register("frame-subsampling-factor", &frame_subsampling_factor, "Used " - "if the frame-rate for the model will be less than the " - "frame-rate of the original alignment. Applied after " - "left-tolerance and right-tolerance are applied (so they are " - "in terms of the original num-frames."); - opts->Register("acoustic-scale", &acoustic_scale, - "Scaling factor for acoustic likelihoods"); - } - - void Check() const; -}; struct SplitDiscriminativeSupervisionOptions { + int32 frame_subsampling_factor; bool remove_output_symbols; bool collapse_transition_ids; bool remove_epsilons; bool determinize; bool minimize; // we'll push and minimize if this is true. - DiscriminativeSupervisionOptions supervision_config; - + BaseFloat acoustic_scale; + SplitDiscriminativeSupervisionOptions() : - remove_output_symbols(false), collapse_transition_ids(false), - remove_epsilons(false), determinize(false), - minimize(false) { } + frame_subsampling_factor(1), + remove_output_symbols(true), collapse_transition_ids(true), + remove_epsilons(true), determinize(true), + minimize(true), acoustic_scale(0.1) { } void Register(OptionsItf *opts) { opts->Register("collapse-transition-ids", &collapse_transition_ids, @@ -76,7 +60,12 @@ struct SplitDiscriminativeSupervisionOptions { "lattices (as Lattice) after splitting and possibly minimize"); opts->Register("minimize", &minimize, "If true, we push and " "minimize lattices (as Lattice) after splitting"); - supervision_config.Register(opts); + opts->Register("acoustic-scale", &acoustic_scale, + "Scaling factor for acoustic likelihoods (should match the " + "value used in discriminative-get-supervision)"); + opts->Register("frame-subsampling-factor", &frame_subsampling_factor, "Used " + "if the frame-rate for the model will be less than the " + "frame-rate of the original alignment."); } }; @@ -86,13 +75,13 @@ struct SplitDiscriminativeSupervisionOptions { */ // struct DiscriminativeSupervision is the fully-processed information for -// a whole utterance or (after splitting) part of an utterance. +// a whole utterance or (after splitting) part of an utterance. struct DiscriminativeSupervision { // The weight we assign to this example; // this will typically be one, but we include it - // for the sake of generality. - BaseFloat weight; - + // for the sake of generality. + BaseFloat weight; + // num_sequences will be 1 if you create a DiscriminativeSupervision object from a single // lattice or alignment, but if you combine multiple DiscriminativeSupervision objects // the 'num_sequences' is the number of objects that were combined (the @@ -104,20 +93,20 @@ struct DiscriminativeSupervision { // Technically this information is redundant with the lattices, but it's convenient // to have it separately. int32 frames_per_sequence; - + // The numerator alignment // Usually obtained by aligning the reference text with the seed neural // network model; can be the best path of generated lattice in the case of // semi-supervised training. std::vector num_ali; - + // Note: any acoustic // likelihoods in the lattices will be // recomputed at the time we train. - - // The denominator lattice. - Lattice den_lat; - + + // The denominator lattice. + Lattice den_lat; + DiscriminativeSupervision(): weight(1.0), num_sequences(1), frames_per_sequence(-1) { } @@ -128,7 +117,7 @@ struct DiscriminativeSupervision { // and denominator lattice. The supervision object is used for sequence // discriminative training. // Topologically sorts the lattice after copying to the supervision object. - // Returns false when alignment or lattice is empty + // Returns false when alignment or lattice is empty bool Initialize(const std::vector &alignment, const Lattice &lat, BaseFloat weight); @@ -136,13 +125,13 @@ struct DiscriminativeSupervision { void Swap(DiscriminativeSupervision *other); bool operator == (const DiscriminativeSupervision &other) const; - + // This function checks that this supervision object satifsies some // of the properties we expect of it, and calls KALDI_ERR if not. void Check() const; - - inline int32 NumFrames() const { - return num_sequences * frames_per_sequence; + + inline int32 NumFrames() const { + return num_sequences * frames_per_sequence; } void Write(std::ostream &os, bool binary) const; @@ -156,30 +145,30 @@ class DiscriminativeSupervisionSplitter { public: typedef fst::ArcTpl LatticeArc; typedef fst::VectorFst Lattice; - + DiscriminativeSupervisionSplitter( const SplitDiscriminativeSupervisionOptions &config, const TransitionModel &tmodel, const DiscriminativeSupervision &supervision); - // A structure used to store the forward and backward scores + // A structure used to store the forward and backward scores // and state times of a lattice struct LatticeInfo { - // These values are stored in log. + // These values are stored in log. std::vector alpha; std::vector beta; std::vector state_times; void Check() const; }; - - // Extracts a frame range of the supervision into 'supervision'. + + // Extracts a frame range of the supervision into 'supervision'. void GetFrameRange(int32 begin_frame, int32 frames_per_sequence, bool normalize, DiscriminativeSupervision *supervision) const; // Get the acoustic scaled denominator lattice out for debugging purposes - inline const Lattice& DenLat() const { return den_lat_; } + inline const Lattice& DenLat() const { return den_lat_; } private: @@ -187,7 +176,7 @@ class DiscriminativeSupervisionSplitter { // assuming that the corresponding state-range that we need to // include, begin_state <= s < end_state has been included. // (note: the output lattice will also have two special initial and final - // states). + // states). // Also does post-processing (RmEpsilon, Determinize, // TopSort on the result). See code for details. void CreateRangeLattice(const Lattice &in_lat, @@ -201,7 +190,7 @@ class DiscriminativeSupervisionSplitter { // Transition model is used by the function // CollapseTransitionIds() const TransitionModel &tmodel_; - + // A reference to the supervision object that we will be splitting const DiscriminativeSupervision &supervision_; @@ -216,7 +205,7 @@ class DiscriminativeSupervisionSplitter { // Function to compute lattice scores for a lattice void ComputeLatticeScores(const Lattice &lat, LatticeInfo *scores) const; - // Prepare lattice : + // Prepare lattice : // 1) Order states in breadth-first search order // 2) Compute states times, which must be a strictly non-decreasing vector // 3) Compute lattice alpha and beta scores @@ -225,7 +214,7 @@ class DiscriminativeSupervisionSplitter { // Modifies the transition-ids on lat_ so that on each frame, there is just // one with any given pdf-id. This allows us to determinize and minimize // more completely. - void CollapseTransitionIds(const std::vector &state_times, + void CollapseTransitionIds(const std::vector &state_times, Lattice *lat) const; }; @@ -241,9 +230,6 @@ void AppendSupervision(const std::vector &inpu bool compactify, std::vector *output_supervision); -typedef TableWriter > DiscriminativeSupervisionWriter; -typedef SequentialTableReader > SequentialDiscriminativeSupervisionReader; -typedef RandomAccessTableReader > RandomAccessDiscriminativeSupervisionReader; } // namespace discriminative } // namespace kaldi diff --git a/src/nnet3/nnet-am-decodable-simple.cc b/src/nnet3/nnet-am-decodable-simple.cc index 9d2176965b1..35b1506336e 100644 --- a/src/nnet3/nnet-am-decodable-simple.cc +++ b/src/nnet3/nnet-am-decodable-simple.cc @@ -64,7 +64,7 @@ DecodableAmNnetSimple::DecodableAmNnetSimple( const MatrixBase *online_ivectors, int32 online_ivector_period, CachingOptimizingCompiler *compiler): - compiler_(am_nnet.GetNnet(), opts.optimize_config), + compiler_(am_nnet.GetNnet(), opts.optimize_config, opts.compiler_config), decodable_nnet_(opts, am_nnet.GetNnet(), am_nnet.Priors(), feats, compiler != NULL ? compiler : &compiler_, ivector, online_ivectors, @@ -318,7 +318,7 @@ DecodableAmNnetSimpleParallel::DecodableAmNnetSimpleParallel( const VectorBase *ivector, const MatrixBase *online_ivectors, int32 online_ivector_period): - compiler_(am_nnet.GetNnet(), opts.optimize_config), + compiler_(am_nnet.GetNnet(), opts.optimize_config, opts.compiler_config), trans_model_(trans_model), feats_copy_(NULL), ivector_copy_(NULL), diff --git a/src/nnet3/nnet-am-decodable-simple.h b/src/nnet3/nnet-am-decodable-simple.h index acf0ba8e63a..6b382fbe033 100644 --- a/src/nnet3/nnet-am-decodable-simple.h +++ b/src/nnet3/nnet-am-decodable-simple.h @@ -51,6 +51,7 @@ struct NnetSimpleComputationOptions { bool debug_computation; NnetOptimizeOptions optimize_config; NnetComputeOptions compute_config; + CachingOptimizingCompilerOptions compiler_config; NnetSimpleComputationOptions(): extra_left_context(0), @@ -60,7 +61,9 @@ struct NnetSimpleComputationOptions { frame_subsampling_factor(1), frames_per_chunk(50), acoustic_scale(0.1), - debug_computation(false) { } + debug_computation(false) { + compiler_config.cache_capacity += frames_per_chunk; + } void Register(OptionsItf *opts) { opts->Register("extra-left-context", &extra_left_context, diff --git a/src/nnet3/nnet-chain-example.cc b/src/nnet3/nnet-chain-example.cc index 4f9cb4b92b8..005107a097c 100644 --- a/src/nnet3/nnet-chain-example.cc +++ b/src/nnet3/nnet-chain-example.cc @@ -290,28 +290,6 @@ void MergeChainExamples(bool compress, } } -void TruncateDerivWeights(int32 truncate, - NnetChainExample *eg) { - for (size_t i = 0; i < eg->outputs.size(); i++) { - NnetChainSupervision &supervision = eg->outputs[i]; - Vector &deriv_weights = supervision.deriv_weights; - if (deriv_weights.Dim() == 0) { - deriv_weights.Resize(supervision.indexes.size()); - deriv_weights.Set(1.0); - } - int32 num_sequences = supervision.supervision.num_sequences, - frames_per_sequence = supervision.supervision.frames_per_sequence; - KALDI_ASSERT(2 * truncate < frames_per_sequence); - for (int32 t = 0; t < truncate; t++) - for (int32 s = 0; s < num_sequences; s++) - deriv_weights(t * num_sequences + s) = 0.0; - for (int32 t = frames_per_sequence - truncate; - t < frames_per_sequence; t++) - for (int32 s = 0; s < num_sequences; s++) - deriv_weights(t * num_sequences + s) = 0.0; - } -} - void GetChainComputationRequest(const Nnet &nnet, const NnetChainExample &eg, bool need_model_derivative, diff --git a/src/nnet3/nnet-chain-example.h b/src/nnet3/nnet-chain-example.h index ac782a92805..7a024f3bfcd 100644 --- a/src/nnet3/nnet-chain-example.h +++ b/src/nnet3/nnet-chain-example.h @@ -189,15 +189,6 @@ void ShiftChainExampleTimes(int32 frame_shift, const std::vector &exclude_names, NnetChainExample *eg); -/** - This sets to zero any elements of 'egs->outputs[*].deriv_weights' that correspond - to frames within the first or last 'truncate' frames of the sequence (e.g. you could - set 'truncate=5' to set zero deriv-weight for the first and last 5 frames of the - sequence). - */ -void TruncateDerivWeights(int32 truncate, - NnetChainExample *eg); - /** This function takes a NnetChainExample and produces a ComputationRequest. Assumes you don't want the derivatives w.r.t. the inputs; if you do, you can create the ComputationRequest manually. Assumes that if diff --git a/src/nnet3/nnet-discriminative-example.cc b/src/nnet3/nnet-discriminative-example.cc index a7330e772a3..aa7eb48ea04 100644 --- a/src/nnet3/nnet-discriminative-example.cc +++ b/src/nnet3/nnet-discriminative-example.cc @@ -285,27 +285,6 @@ void MergeDiscriminativeExamples( } } -void TruncateDerivWeights(int32 truncate, - NnetDiscriminativeExample *eg) { - for (size_t i = 0; i < eg->outputs.size(); i++) { - NnetDiscriminativeSupervision &supervision = eg->outputs[i]; - Vector &deriv_weights = supervision.deriv_weights; - if (deriv_weights.Dim() == 0) { - deriv_weights.Resize(supervision.indexes.size()); - deriv_weights.Set(1.0); - } - int32 num_sequences = supervision.supervision.num_sequences, - frames_per_sequence = supervision.supervision.frames_per_sequence; - KALDI_ASSERT(2 * truncate < frames_per_sequence); - for (int32 t = 0; t < truncate; t++) - for (int32 s = 0; s < num_sequences; s++) - deriv_weights(t * num_sequences + s) = 0.0; - for (int32 t = frames_per_sequence - truncate; - t < frames_per_sequence; t++) - for (int32 s = 0; s < num_sequences; s++) - deriv_weights(t * num_sequences + s) = 0.0; - } -} void GetDiscriminativeComputationRequest(const Nnet &nnet, const NnetDiscriminativeExample &eg, diff --git a/src/nnet3/nnet-discriminative-example.h b/src/nnet3/nnet-discriminative-example.h index 9d9bba0c906..ba1cac7ffbe 100644 --- a/src/nnet3/nnet-discriminative-example.h +++ b/src/nnet3/nnet-discriminative-example.h @@ -196,15 +196,6 @@ void ShiftDiscriminativeExampleTimes(int32 frame_shift, const std::vector &exclude_names, NnetDiscriminativeExample *eg); -/** - This sets to zero any elements of 'egs->outputs[*].deriv_weights' that correspond - to frames within the first or last 'truncate' frames of the sequence (e.g. you could - set 'truncate=5' to set zero deriv-weight for the first and last 5 frames of the - sequence). - */ -void TruncateDerivWeights(int32 truncate, - NnetDiscriminativeExample *eg); - /** This function takes a NnetDiscriminativeExample and produces a ComputationRequest. Assumes you don't want the derivatives w.r.t. the inputs; if you do, you diff --git a/src/nnet3bin/Makefile b/src/nnet3bin/Makefile index fd576404f1d..2bae1dcdc43 100644 --- a/src/nnet3bin/Makefile +++ b/src/nnet3bin/Makefile @@ -16,7 +16,7 @@ BINFILES = nnet3-init nnet3-info nnet3-get-egs nnet3-copy-egs nnet3-subset-egs \ nnet3-discriminative-get-egs nnet3-discriminative-copy-egs \ nnet3-discriminative-merge-egs nnet3-discriminative-shuffle-egs \ nnet3-discriminative-compute-objf nnet3-discriminative-train \ - discriminative-get-supervision nnet3-discriminative-subset-egs \ + nnet3-discriminative-subset-egs \ nnet3-discriminative-compute-from-egs nnet3-latgen-faster-looped OBJFILES = diff --git a/src/nnet3bin/discriminative-get-supervision.cc b/src/nnet3bin/discriminative-get-supervision.cc deleted file mode 100644 index 32d66c1c55a..00000000000 --- a/src/nnet3bin/discriminative-get-supervision.cc +++ /dev/null @@ -1,100 +0,0 @@ -// nnet3bin/discriminative-get-supervision.cc - -// Copyright 2015 Johns Hopkins University (author: Daniel Povey) -// Copyright 2014-2015 Vimal Manohar - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "nnet3/discriminative-supervision.h" - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - using namespace kaldi::discriminative; - typedef kaldi::int32 int32; - typedef kaldi::int64 int64; - - const char *usage = - "Get a discriminative training supervision object for each file of training data.\n" - "This will normally be piped into nnet3-discriminative-get-egs, where it\n" - "will be split up into pieces and combined with the features.\n" - "Usage: discriminative-get-supervision [options] \\\n" - " \n"; - - DiscriminativeSupervisionOptions sup_opts; - - ParseOptions po(usage); - - sup_opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 3) { - po.PrintUsage(); - exit(1); - } - - std::string num_ali_rspecifier = po.GetArg(1), - den_lat_rspecifier = po.GetArg(2), - supervision_wspecifier = po.GetArg(3); - - DiscriminativeSupervisionWriter supervision_writer(supervision_wspecifier); - RandomAccessLatticeReader den_lat_reader(den_lat_rspecifier); - SequentialInt32VectorReader ali_reader(num_ali_rspecifier); - - int32 num_utts_done = 0, num_utts_error = 0; - - for (; !ali_reader.Done(); ali_reader.Next()) { - const std::string &key = ali_reader.Key(); - const std::vector &num_ali = ali_reader.Value(); - - if (!den_lat_reader.HasKey(key)) { - KALDI_WARN << "Could not find denominator lattice for utterance " - << key; - num_utts_error++; - continue; - } - - const Lattice &den_lat = den_lat_reader.Value(key); - - DiscriminativeSupervision supervision; - - if (!supervision.Initialize(num_ali, den_lat, 1.0)) { - KALDI_WARN << "Failed to convert lattice to supervision " - << "for utterance " << key; - num_utts_error++; - continue; - } - - supervision_writer.Write(key, supervision); - - num_utts_done++; - } - - KALDI_LOG << "Generated discriminative supervision information for " - << num_utts_done << " utterances, errors on " - << num_utts_error; - return (num_utts_done > num_utts_error ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what() << '\n'; - return -1; - } -} - diff --git a/src/nnet3bin/nnet3-align-compiled.cc b/src/nnet3bin/nnet3-align-compiled.cc index 790c0938fdf..bab5d16f370 100644 --- a/src/nnet3bin/nnet3-align-compiled.cc +++ b/src/nnet3bin/nnet3-align-compiled.cc @@ -63,7 +63,7 @@ int main(int argc, char *argv[]) { int32 online_ivector_period = 0; align_config.Register(&po); decodable_opts.Register(&po); - + po.Register("use-gpu", &use_gpu, "yes|no|optional|wait, only has effect if compiled with CUDA"); po.Register("transition-scale", &transition_scale, @@ -101,6 +101,7 @@ int main(int argc, char *argv[]) { double tot_like = 0.0; kaldi::int64 frame_count = 0; + { TransitionModel trans_model; AmNnetSimple am_nnet; @@ -110,6 +111,10 @@ int main(int argc, char *argv[]) { trans_model.Read(ki.Stream(), binary); am_nnet.Read(ki.Stream(), binary); } + // this compiler object allows caching of computations across + // different utterances. + CachingOptimizingCompiler compiler(am_nnet.GetNnet(), + decodable_opts.optimize_config); RandomAccessBaseFloatMatrixReader online_ivector_reader( online_ivector_rspecifier); @@ -173,7 +178,7 @@ int main(int argc, char *argv[]) { DecodableAmNnetSimple nnet_decodable( decodable_opts, trans_model, am_nnet, features, ivector, online_ivectors, - online_ivector_period); + online_ivector_period, &compiler); AlignUtteranceWrapper(align_config, utt, decodable_opts.acoustic_scale, @@ -199,5 +204,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/nnet3bin/nnet3-discriminative-copy-egs.cc b/src/nnet3bin/nnet3-discriminative-copy-egs.cc index 831484ebb11..17dc2ee4e13 100644 --- a/src/nnet3bin/nnet3-discriminative-copy-egs.cc +++ b/src/nnet3bin/nnet3-discriminative-copy-egs.cc @@ -58,7 +58,6 @@ int main(int argc, char *argv[]) { bool random = false; int32 srand_seed = 0; int32 frame_shift = 0; - int32 truncate_deriv_weights = 0; BaseFloat keep_proportion = 1.0; ParseOptions po(usage); @@ -74,9 +73,6 @@ int main(int argc, char *argv[]) { "in the supervision data (excluding iVector data) - useful in " "augmenting data. Note, the outputs will remain at the closest " "exact multiples of the frame subsampling factor"); - po.Register("truncate-deriv-weights", &truncate_deriv_weights, - "If nonzero, the number of initial/final subsample frames that " - "will have their derivatives' weights set to zero."); po.Read(argc, argv); @@ -106,7 +102,7 @@ int main(int argc, char *argv[]) { // count is normally 1; could be 0, or possibly >1. int32 count = GetCount(keep_proportion); std::string key = example_reader.Key(); - if (frame_shift == 0 && truncate_deriv_weights == 0) { + if (frame_shift == 0) { const NnetDiscriminativeExample &eg = example_reader.Value(); for (int32 c = 0; c < count; c++) { int32 index = (random ? Rand() : num_written) % num_outputs; @@ -117,8 +113,6 @@ int main(int argc, char *argv[]) { NnetDiscriminativeExample eg = example_reader.Value(); if (frame_shift != 0) ShiftDiscriminativeExampleTimes(frame_shift, exclude_names, &eg); - if (truncate_deriv_weights != 0) - TruncateDerivWeights(truncate_deriv_weights, &eg); for (int32 c = 0; c < count; c++) { int32 index = (random ? Rand() : num_written) % num_outputs; example_writers[index]->Write(key, eg); @@ -136,4 +130,3 @@ int main(int argc, char *argv[]) { return -1; } } - diff --git a/src/nnet3bin/nnet3-discriminative-get-egs.cc b/src/nnet3bin/nnet3-discriminative-get-egs.cc index 070a88b331d..4a31876532f 100644 --- a/src/nnet3bin/nnet3-discriminative-get-egs.cc +++ b/src/nnet3bin/nnet3-discriminative-get-egs.cc @@ -161,18 +161,15 @@ int main(int argc, char *argv[]) { const char *usage = "Get frame-by-frame examples of data for nnet3+sequence neural network\n" - "training. This involves breaking up utterances into pieces of a\n" - "fixed size. Input will come from discriminative-get-supervision.\n" + "training. This involves breaking up utterances into pieces of sizes\n" + "determined by the --num-frames option.\n" "\n" "Usage: nnet3-discriminative-get-egs [options] " - " \n" + " \n" "\n" "An example [where $feats expands to the actual features]:\n" - "discriminative-get-supervision [args] | \\\n" - " nnet3-discriminative-get-egs --left-context=25 --right-context=9 --num-frames=20 \\\n" - " \"$feats\" ark,s,cs:- ark:degs.1.ark\n" - "Note: the --frame-subsampling-factor option must be the same as given to\n" - "discriminative-get-supervision.\n"; + " nnet3-discriminative-get-egs --left-context=25 --right-context=9 --num-frames=150,100,90 \\\n" + " \"$feats\" \"ark,s,cs:gunzip -c lat.1.gz\" scp:ali.scp ark:degs.1.ark\n"; bool compress = true; int32 length_tolerance = 100, online_ivector_period = 1; @@ -198,13 +195,11 @@ int main(int argc, char *argv[]) { po.Register("length-tolerance", &length_tolerance, "Tolerance for " "difference in num-frames between feat and ivector matrices"); - - ParseOptions splitter_opts("supervision-splitter", &po); - splitter_config.Register(&splitter_opts); + splitter_config.Register(&po); po.Read(argc, argv); - if (po.NumArgs() != 4) { + if (po.NumArgs() != 5) { po.PrintUsage(); exit(1); } @@ -212,14 +207,12 @@ int main(int argc, char *argv[]) { eg_config.ComputeDerived(); UtteranceSplitter utt_splitter(eg_config); - std::string model_wxfilename, feature_rspecifier, - supervision_rspecifier, - examples_wspecifier; + std::string model_wxfilename = po.GetArg(1), + feature_rspecifier = po.GetArg(2), + den_lat_rspecifier = po.GetArg(3), + num_ali_rspecifier = po.GetArg(4), + examples_wspecifier = po.GetArg(5); - model_wxfilename = po.GetArg(1); - feature_rspecifier = po.GetArg(2); - supervision_rspecifier = po.GetArg(3); - examples_wspecifier = po.GetArg(4); TransitionModel tmodel; { @@ -229,8 +222,8 @@ int main(int argc, char *argv[]) { } SequentialBaseFloatMatrixReader feat_reader(feature_rspecifier); - discriminative::RandomAccessDiscriminativeSupervisionReader supervision_reader( - supervision_rspecifier); + RandomAccessLatticeReader den_lat_reader(den_lat_rspecifier); + RandomAccessInt32VectorReader ali_reader(num_ali_rspecifier); NnetDiscriminativeExampleWriter example_writer(examples_wspecifier); RandomAccessBaseFloatMatrixReader online_ivector_reader( online_ivector_rspecifier); @@ -240,11 +233,23 @@ int main(int argc, char *argv[]) { for (; !feat_reader.Done(); feat_reader.Next()) { std::string key = feat_reader.Key(); const Matrix &feats = feat_reader.Value(); - if (!supervision_reader.HasKey(key)) { - KALDI_WARN << "No supervision for key " << key; + if (!den_lat_reader.HasKey(key)) { + KALDI_WARN << "No denominator lattice for key " << key; + num_err++; + } else if (!ali_reader.HasKey(key)) { + KALDI_WARN << "No numerator alignment for key " << key; num_err++; } else { - const discriminative::DiscriminativeSupervision &supervision = supervision_reader.Value(key); + discriminative::DiscriminativeSupervision supervision; + if (!supervision.Initialize(ali_reader.Value(key), + den_lat_reader.Value(key), + 1.0)) { + KALDI_WARN << "Failed to convert lattice to supervision " + << "for utterance " << key; + num_err++; + continue; + } + const Matrix *online_ivector_feats = NULL; if (!online_ivector_rspecifier.empty()) { if (!online_ivector_reader.HasKey(key)) { From f546f0f895e108be36398e6c77408c720a379f0e Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 14 Jan 2017 21:17:47 -0500 Subject: [PATCH 232/530] Removing option --modify-learning-rates from example nnet3 discriminative training scripts --- .../s5/local/nnet3/run_tdnn_discriminative.sh | 3 +-- .../s5/local/nnet3/run_tdnn_discriminative.sh | 3 +-- egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh | 3 +-- egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh | 3 +-- egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh | 3 +-- .../s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh | 11 +++++------ egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh | 3 +-- egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh | 3 +-- egs/wsj/s5/steps/nnet3/train_discriminative.sh | 1 - 9 files changed, 12 insertions(+), 21 deletions(-) diff --git a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh index aa2a845d6a8..dfaf8f90da3 100644 --- a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh @@ -48,7 +48,6 @@ regularization_opts= # Applicable for providing --xent-regularize and - minibatch_size=64 adjust_priors=true # May need to be set to false # because it does not help in some setups -modify_learning_rates=true last_layer_factor=0.1 ## Decode options @@ -145,7 +144,7 @@ if [ $stage -le 4 ]; then --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ --adjust-priors $adjust_priors \ - --modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \ + --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh index 51caba2bc98..cf26cac406a 100755 --- a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh @@ -55,7 +55,6 @@ regularization_opts= # Applicable for providing --xent-regularize and - minibatch_size=64 adjust_priors=true # May need to be set to false # because it does not help in some setups -modify_learning_rates=true last_layer_factor=0.1 ## Decode options @@ -152,7 +151,7 @@ if [ $stage -le 4 ]; then --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ --adjust-priors $adjust_priors \ - --modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \ + --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh index 2f5badba26c..fbf6d64aefa 100755 --- a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh +++ b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh @@ -54,7 +54,6 @@ regularization_opts= # Applicable for providing --xent-regularize and - minibatch_size=64 adjust_priors=true # May need to be set to false # because it does not help in some setups -modify_learning_rates=true last_layer_factor=0.1 ## Decode options @@ -157,7 +156,7 @@ if [ $stage -le 4 ]; then --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ --adjust-priors $adjust_priors \ - --modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \ + --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh index 91bcaf06ccb..255f1d49882 100755 --- a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh @@ -48,7 +48,6 @@ regularization_opts= # Applicable for providing --xent-regularize and - minibatch_size=64 adjust_priors=true # May need to be set to false # because it does not help in some setups -modify_learning_rates=true last_layer_factor=0.1 ## Decode options @@ -145,7 +144,7 @@ if [ $stage -le 4 ]; then --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ --adjust-priors $adjust_priors \ - --modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \ + --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh index 0c5e05556ad..805d38b4e88 100755 --- a/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh @@ -62,7 +62,6 @@ regularization_opts= # Applicable for providing --xent-regularize and - minibatch_size=64 adjust_priors=true # May need to be set to false # because it does not help in some setups -modify_learning_rates=true last_layer_factor=0.1 ## Decode options @@ -156,7 +155,7 @@ if [ $stage -le 4 ]; then --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ --adjust-priors $adjust_priors \ - --modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \ + --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh index b1f7e6f8c93..9641ce16e21 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh @@ -73,9 +73,10 @@ num_jobs_nnet=4 num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 # we may have to reduce this. -adjust_priors=true # May need to be set to false - # because it does not help in some setups -modify_learning_rates=true +adjust_priors=false # Note: this option will eventually be removed and + # the script will do it automatically but write to + # a different filename + last_layer_factor=0.1 # prevent the final layer from learning too fast; # this can be a problem. @@ -139,8 +140,6 @@ if [ -z "$degs_dir" ]; then fi fi -exit 0 # TODO: remove this - if [ $stage -le 3 ]; then steps/nnet3/train_discriminative.sh --cmd "$decode_cmd" \ --stage $train_stage \ @@ -150,7 +149,7 @@ if [ $stage -le 3 ]; then --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ --adjust-priors $adjust_priors \ - --modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \ + --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh b/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh index b91208a0fe6..3fffd59426c 100755 --- a/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh +++ b/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh @@ -55,7 +55,6 @@ num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 adjust_priors=true -modify_learning_rates=true last_layer_factor=0.1 ## Decode options @@ -160,7 +159,7 @@ if [ $stage -le 4 ]; then --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ --adjust-priors $adjust_priors \ - --modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \ + --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh index 45bb36ea85c..b84688f574c 100755 --- a/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh @@ -47,7 +47,6 @@ num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 adjust_priors=true -modify_learning_rates=true last_layer_factor=0.1 ## Decode options @@ -144,7 +143,7 @@ if [ $stage -le 4 ]; then --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ --adjust-priors $adjust_priors \ - --modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \ + --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/wsj/s5/steps/nnet3/train_discriminative.sh b/egs/wsj/s5/steps/nnet3/train_discriminative.sh index fb75e7b0aab..8d7484aa889 100755 --- a/egs/wsj/s5/steps/nnet3/train_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/train_discriminative.sh @@ -37,7 +37,6 @@ num_jobs_nnet=4 # Number of neural net jobs to run in parallel. Note: this # versa). regularization_opts= minibatch_size=64 # This is the number of examples rather than the number of output frames. -modify_learning_rates=false # [deprecated] last_layer_factor=1.0 # relates to modify-learning-rates [deprecated] shuffle_buffer_size=1000 # This "buffer_size" variable controls randomization of the samples # on each iter. You could set it to 0 or to a large value for complete From 1dabfa54ce4ac0413b8313e74a4b0c7a76010212 Mon Sep 17 00:00:00 2001 From: Ke Li Date: Sat, 14 Jan 2017 23:20:25 -0500 Subject: [PATCH 233/530] [src] cosmetic changes to rnnlm-related code (#1283) --- src/lm/arpa-file-parser-test.cc | 2 +- src/lm/arpa-file-parser.h | 8 +- src/lm/arpa-lm-compiler-test.cc | 5 +- src/lm/const-arpa-lm.cc | 2 +- src/lm/mikolov-rnnlm-lib.cc | 262 +++++++++++++++++++------------- src/lm/mikolov-rnnlm-lib.h | 2 +- 6 files changed, 165 insertions(+), 116 deletions(-) diff --git a/src/lm/arpa-file-parser-test.cc b/src/lm/arpa-file-parser-test.cc index be69ddc6bf2..51af0bea2bf 100644 --- a/src/lm/arpa-file-parser-test.cc +++ b/src/lm/arpa-file-parser-test.cc @@ -149,7 +149,7 @@ void TestableArpaFileParser::Validate( // expect_ngrams.array, CompareNgrams); // if (mpos.first != ngrams_.end()) // KALDI_ERR << "Maismatch at index " << mpos.first - ngrams_.begin(); - //TODO:auto above requres C++11, and I cannot spell out the type!!! + // TODO: auto above requres C++11, and I cannot spell out the type!!! KALDI_ASSERT(std::equal(ngrams_.begin(), ngrams_.end(), expect_ngrams.array, CompareNgrams)); } diff --git a/src/lm/arpa-file-parser.h b/src/lm/arpa-file-parser.h index fc7c83deb30..8c4ada5692d 100644 --- a/src/lm/arpa-file-parser.h +++ b/src/lm/arpa-file-parser.h @@ -21,11 +21,11 @@ #ifndef KALDI_LM_ARPA_FILE_PARSER_H_ #define KALDI_LM_ARPA_FILE_PARSER_H_ +#include + #include #include -#include - #include "base/kaldi-types.h" #include "itf/options-itf.h" @@ -38,7 +38,7 @@ struct ArpaParseOptions { enum OovHandling { kRaiseError, ///< Abort on OOV words kAddToSymbols, ///< Add novel words to the symbol table. - kReplaceWithUnk, ///< Replace OOV words with . + kReplaceWithUnk, ///< Replace OOV words with . kSkipNGram ///< Skip n-gram with OOV word and continue. }; @@ -59,7 +59,7 @@ struct ArpaParseOptions { int32 eos_symbol; ///< Symbol for
, Required non-epsilon. int32 unk_symbol; ///< Symbol for , Required for kReplaceWithUnk. OovHandling oov_handling; ///< How to handle OOV words in the file. - int32 max_warnings; ///< Maximum warnings to report, <0 unlimited. + int32 max_warnings; ///< Maximum warnings to report, <0 unlimited. }; /** diff --git a/src/lm/arpa-lm-compiler-test.cc b/src/lm/arpa-lm-compiler-test.cc index 8242b6fc266..8d2e953304e 100644 --- a/src/lm/arpa-lm-compiler-test.cc +++ b/src/lm/arpa-lm-compiler-test.cc @@ -33,7 +33,7 @@ namespace kaldi { enum { kEps = 0, kDisambig, - kBos,kEos, + kBos, kEos, }; // Number of random sentences for coverage test. @@ -227,8 +227,7 @@ int main(int argc, char *argv[]) { if (ok) { KALDI_LOG << "All tests passed"; return 0; - } - else { + } else { KALDI_WARN << "Test FAILED"; return 1; } diff --git a/src/lm/const-arpa-lm.cc b/src/lm/const-arpa-lm.cc index bb1517c8875..8c848d245a9 100644 --- a/src/lm/const-arpa-lm.cc +++ b/src/lm/const-arpa-lm.cc @@ -176,7 +176,7 @@ class LmState { // auxiliary class LmState above. class ConstArpaLmBuilder : public ArpaFileParser { public: - ConstArpaLmBuilder(ArpaParseOptions options) + explicit ConstArpaLmBuilder(ArpaParseOptions options) : ArpaFileParser(options, NULL) { ngram_order_ = 0; num_words_ = 0; diff --git a/src/lm/mikolov-rnnlm-lib.cc b/src/lm/mikolov-rnnlm-lib.cc index d1afd666539..b1abb29dee7 100644 --- a/src/lm/mikolov-rnnlm-lib.cc +++ b/src/lm/mikolov-rnnlm-lib.cc @@ -66,7 +66,7 @@ static union { int j, i; } n; } d2i; -#define EXP_A (1048576/M_LN2) +#define EXP_A (1048576 / M_LN2) #define EXP_C 60801 #define FAST_EXP(y) (d2i.n.i = EXP_A * (y) + (1072693248 - EXP_C), d2i.d) @@ -147,7 +147,7 @@ CRnnLM::CRnnLM() { srand(rand_seed); vocab_hash_size = 100000000; - vocab_hash = (int *)calloc(vocab_hash_size, sizeof(int)); + vocab_hash = reinterpret_cast(calloc(vocab_hash_size, sizeof(int))); } CRnnLM::~CRnnLM() { @@ -178,7 +178,9 @@ CRnnLM::~CRnnLM() { free(syn1b); if (syncb != NULL) free(syncb); - for (i = 0; i < class_size; i++) free(class_words[i]); + for (i = 0; i < class_size; i++) { + free(class_words[i]); + } free(class_max_cn); free(class_cn); free(class_words); @@ -195,7 +197,7 @@ CRnnLM::~CRnnLM() { } real CRnnLM::random(real min, real max) { - return rand()/(real)RAND_MAX*(max-min)+min; + return rand() / (real)RAND_MAX * (max - min) + min; } void CRnnLM::setRnnLMFile(const std::string &str) { @@ -222,7 +224,7 @@ void CRnnLM::readWord(char *word, FILE *fin) { } if (ch == '\n') { - strcpy(word, (char *)"
"); + strcpy(word, const_cast("
")); return; } else { continue; @@ -244,7 +246,9 @@ int CRnnLM::getWordHash(const char *word) { unsigned int hash, a; hash = 0; - for (a = 0; a < strlen(word); a++) hash = hash * 237 + word[a]; + for (a = 0; a < strlen(word); a++) { + hash = hash * 237 + word[a]; + } hash = hash % vocab_hash_size; return hash; @@ -275,8 +279,9 @@ void CRnnLM::sortVocab() { for (a = 1; a < vocab_size; a++) { max = a; - for (b = a + 1; b < vocab_size; b++) + for (b = a + 1; b < vocab_size; b++) { if (vocab[max].cn < vocab[b].cn) max = b; + } swap = vocab[max]; vocab[max] = vocab[a]; @@ -307,24 +312,30 @@ void CRnnLM::saveWeights() { // saves current weights and unit activations neu2b[a].er = neu2[a].er; } - for (b = 0; b < layer1_size; b++) + for (b = 0; b < layer1_size; b++) { for (a = 0; a < layer0_size; a++) { syn0b[a + b * layer0_size].weight = syn0[a + b * layer0_size].weight; } + } if (layerc_size > 0) { - for (b = 0; b < layerc_size; b++) for (a = 0; a < layer1_size; a++) { - syn1b[a + b * layer1_size].weight = syn1[a + b * layer1_size].weight; + for (b = 0; b < layerc_size; b++) { + for (a = 0; a < layer1_size; a++) { + syn1b[a + b * layer1_size].weight = syn1[a + b * layer1_size].weight; + } } - for (b = 0; b < layer2_size; b++) for (a = 0; a < layerc_size; a++) { - syncb[a + b * layerc_size].weight = sync[a + b * layerc_size].weight; + for (b = 0; b < layer2_size; b++) { + for (a = 0; a < layerc_size; a++) { + syncb[a + b * layerc_size].weight = sync[a + b * layerc_size].weight; + } } } else { - for (b = 0; b < layer2_size; b++) + for (b = 0; b < layer2_size; b++) { for (a = 0; a < layer1_size; a++) { syn1b[a + b * layer1_size].weight = syn1[a + b * layer1_size].weight; } + } } // for (a = 0; a < direct_size; a++) syn_db[a].weight = syn_d[a].weight; @@ -364,12 +375,14 @@ void CRnnLM::initNet() { exit(1); } - syn_d = (direct_t *)calloc((long long)direct_size, sizeof(direct_t)); + syn_d = + reinterpret_cast(calloc(static_cast(direct_size), + sizeof(direct_t))); if (syn_d == NULL) { printf("Memory allocation for direct" - " connections failed (requested %lld bytes)\n", - (long long)direct_size * (long long)sizeof(direct_t)); + " connections failed (requested %ld bytes)\n", + static_cast(direct_size) * static_cast(sizeof(direct_t))); exit(1); } @@ -418,40 +431,49 @@ void CRnnLM::initNet() { neu2[a].er = 0; } - for (b = 0; b < layer1_size; b++) + for (b = 0; b < layer1_size; b++) { for (a = 0; a < layer0_size; a++) { syn0[a + b * layer0_size].weight = random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1); } + } if (layerc_size > 0) { - for (b = 0; b < layerc_size; b++) + for (b = 0; b < layerc_size; b++) { for (a = 0; a < layer1_size; a++) { syn1[a + b * layer1_size].weight = random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1); } + } - for (b = 0; b < layer2_size; b++) + for (b = 0; b < layer2_size; b++) { for (a = 0; a < layerc_size; a++) { sync[a + b * layerc_size].weight = random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1); } + } } else { - for (b = 0; b < layer2_size; b++) + for (b = 0; b < layer2_size; b++) { for (a = 0; a < layer1_size; a++) { syn1[a + b * layer1_size].weight = random(-0.1, 0.1) + random(-0.1, 0.1) + random(-0.1, 0.1); } + } } - long long aa; - for (aa = 0; aa < direct_size; aa++) syn_d[aa] = 0; + int64 aa; + for (aa = 0; aa < direct_size; aa++) { + syn_d[aa] = 0; + } if (bptt > 0) { - bptt_history = (int *)calloc((bptt + bptt_block + 10), sizeof(int)); - for (a = 0; a < bptt + bptt_block; a++) bptt_history[a] = -1; - bptt_hidden = (neuron *)calloc((bptt + bptt_block + 1) * layer1_size, - sizeof(neuron)); + bptt_history = reinterpret_cast(calloc((bptt + bptt_block + 10), + sizeof(int))); + for (a = 0; a < bptt + bptt_block; a++) { + bptt_history[a] = -1; + } + bptt_hidden = reinterpret_cast(calloc( + (bptt + bptt_block + 1) * layer1_size, sizeof(neuron))); for (a = 0; a < (bptt + bptt_block) * layer1_size; a++) { bptt_hidden[a].ac = 0; bptt_hidden[a].er = 0; @@ -475,11 +497,13 @@ void CRnnLM::initNet() { b = 0; if (old_classes) { // old classes - for (i = 0; i < vocab_size; i++) b += vocab[i].cn; for (i = 0; i < vocab_size; i++) { - df+= vocab[i].cn / (double)b; + b += vocab[i].cn; + } + for (i = 0; i < vocab_size; i++) { + df += vocab[i].cn / static_cast(b); if (df > 1) df = 1; - if (df > (a + 1) / (double)class_size) { + if (df > (a + 1) / static_cast(class_size)) { vocab[i].class_index = a; if (a < class_size - 1) a++; } else { @@ -487,14 +511,18 @@ void CRnnLM::initNet() { } } } else { // new classes - for (i = 0; i < vocab_size; i++) b += vocab[i].cn; - for (i = 0; i < vocab_size; i++) dd += sqrt(vocab[i].cn / (double)b); for (i = 0; i < vocab_size; i++) { - df += sqrt(vocab[i].cn / (double)b) / dd; + b += vocab[i].cn; + } + for (i = 0; i < vocab_size; i++) { + dd += sqrt(vocab[i].cn / static_cast(b)); + } + for (i = 0; i < vocab_size; i++) { + df += sqrt(vocab[i].cn / static_cast(b)) / dd; if (df > 1) df = 1; - if (df > (a + 1) / (double)class_size) { + if (df > (a + 1) / static_cast(class_size)) { vocab[i].class_index = a; - if (a < class_size-1) a++; + if (a < class_size - 1) a++; } else { vocab[i].class_index = a; } @@ -504,14 +532,14 @@ void CRnnLM::initNet() { // allocate auxiliary class variables (for faster search when // normalizing probability at output layer) - class_words = (int **)calloc(class_size, sizeof(int *)); - class_cn = (int *)calloc(class_size, sizeof(int)); - class_max_cn = (int *)calloc(class_size, sizeof(int)); + class_words = reinterpret_cast(calloc(class_size, sizeof(int *))); + class_cn = reinterpret_cast(calloc(class_size, sizeof(int))); + class_max_cn = reinterpret_cast(calloc(class_size, sizeof(int))); for (i = 0; i < class_size; i++) { class_cn[i] = 0; class_max_cn[i] = 10; - class_words[i] = (int *)calloc(class_max_cn[i], sizeof(int)); + class_words[i] = reinterpret_cast(calloc(class_max_cn[i], sizeof(int))); } for (i = 0; i < vocab_size; i++) { @@ -520,8 +548,8 @@ void CRnnLM::initNet() { class_cn[cl]++; if (class_cn[cl] + 2 >= class_max_cn[cl]) { class_max_cn[cl] += 10; - class_words[cl] = (int *)realloc(class_words[cl], - class_max_cn[cl] * sizeof(int)); + class_words[cl] = reinterpret_cast(realloc(class_words[cl], + class_max_cn[cl] * sizeof(int))); } } } @@ -593,7 +621,7 @@ void CRnnLM::restoreNet() { // will read whole network structure fscanf(fi, "%d", &layer2_size); if (ver > 5) { goToDelimiter(':', fi); - fscanf(fi, "%lld", &direct_size); + fscanf(fi, "%ld", &direct_size); } if (ver > 6) { goToDelimiter(':', fi); @@ -732,14 +760,14 @@ void CRnnLM::restoreNet() { // will read whole network structure } if (filetype == TEXT) { goToDelimiter(':', fi); // direct conenctions - long long aa; + int64 aa; for (aa = 0; aa < direct_size; aa++) { fscanf(fi, "%lf", &d); syn_d[aa] = d; } } if (filetype == BINARY) { - long long aa; + int64 aa; for (aa = 0; aa < direct_size; aa++) { fread(&fl, 4, 1, fi); syn_d[aa] = fl; @@ -765,15 +793,20 @@ void CRnnLM::netReset() { // cleans hidden layer activation + bptt history copyHiddenLayerToInput(); if (bptt > 0) { - for (a = 1; a < bptt + bptt_block; a++) bptt_history[a] = 0; - for (a = bptt + bptt_block-1; a > 1; a--) + for (a = 1; a < bptt + bptt_block; a++) { + bptt_history[a] = 0; + } + for (a = bptt + bptt_block - 1; a > 1; a--) { for (b = 0; b < layer1_size; b++) { bptt_hidden[a * layer1_size + b].ac = 0; bptt_hidden[a * layer1_size + b].er = 0; + } } } - for (a = 0; a < MAX_NGRAM_ORDER; a++) history[a] = 0; + for (a = 0; a < MAX_NGRAM_ORDER; a++) { + history[a] = 0; + } } void CRnnLM::matrixXvector(struct neuron *dest, struct neuron *srcvec, @@ -796,35 +829,35 @@ void CRnnLM::matrixXvector(struct neuron *dest, struct neuron *srcvec, val8 = 0; for (a = from2; a < to2; a++) { - val1 += srcvec[a].ac * srcmatrix[a+(b*8+from+0)*matrix_width].weight; - val2 += srcvec[a].ac * srcmatrix[a+(b*8+from+1)*matrix_width].weight; - val3 += srcvec[a].ac * srcmatrix[a+(b*8+from+2)*matrix_width].weight; - val4 += srcvec[a].ac * srcmatrix[a+(b*8+from+3)*matrix_width].weight; - - val5 += srcvec[a].ac * srcmatrix[a+(b*8+from+4)*matrix_width].weight; - val6 += srcvec[a].ac * srcmatrix[a+(b*8+from+5)*matrix_width].weight; - val7 += srcvec[a].ac * srcmatrix[a+(b*8+from+6)*matrix_width].weight; - val8 += srcvec[a].ac * srcmatrix[a+(b*8+from+7)*matrix_width].weight; + val1 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 0) * matrix_width].weight; + val2 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 1) * matrix_width].weight; + val3 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 2) * matrix_width].weight; + val4 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 3) * matrix_width].weight; + + val5 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 4) * matrix_width].weight; + val6 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 5) * matrix_width].weight; + val7 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 6) * matrix_width].weight; + val8 += srcvec[a].ac * srcmatrix[a + (b * 8 + from + 7) * matrix_width].weight; } - dest[b*8+from+0].ac += val1; - dest[b*8+from+1].ac += val2; - dest[b*8+from+2].ac += val3; - dest[b*8+from+3].ac += val4; - - dest[b*8+from+4].ac += val5; - dest[b*8+from+5].ac += val6; - dest[b*8+from+6].ac += val7; - dest[b*8+from+7].ac += val8; + dest[b * 8 + from + 0].ac += val1; + dest[b * 8 + from + 1].ac += val2; + dest[b * 8 + from + 2].ac += val3; + dest[b * 8 + from + 3].ac += val4; + + dest[b * 8 + from + 4].ac += val5; + dest[b * 8 + from + 5].ac += val6; + dest[b * 8 + from + 6].ac += val7; + dest[b * 8 + from + 7].ac += val8; } - for (b = b*8; b < to-from; b++) { + for (b = b * 8; b < to - from; b++) { for (a = from2; a < to2; a++) { dest[b+from].ac += - srcvec[a].ac * srcmatrix[a+(b+from)*matrix_width].weight; + srcvec[a].ac * srcmatrix[a + (b + from) * matrix_width].weight; } } } else { // er mod - for (a = 0; a < (to2-from2)/8; a++) { + for (a = 0; a < (to2 - from2) / 8; a++) { val1 = 0; val2 = 0; val3 = 0; @@ -836,25 +869,25 @@ void CRnnLM::matrixXvector(struct neuron *dest, struct neuron *srcvec, val8 = 0; for (b = from; b < to; b++) { - val1 += srcvec[b].er * srcmatrix[a*8+from2+0+b*matrix_width].weight; - val2 += srcvec[b].er * srcmatrix[a*8+from2+1+b*matrix_width].weight; - val3 += srcvec[b].er * srcmatrix[a*8+from2+2+b*matrix_width].weight; - val4 += srcvec[b].er * srcmatrix[a*8+from2+3+b*matrix_width].weight; - - val5 += srcvec[b].er * srcmatrix[a*8+from2+4+b*matrix_width].weight; - val6 += srcvec[b].er * srcmatrix[a*8+from2+5+b*matrix_width].weight; - val7 += srcvec[b].er * srcmatrix[a*8+from2+6+b*matrix_width].weight; - val8 += srcvec[b].er * srcmatrix[a*8+from2+7+b*matrix_width].weight; + val1 += srcvec[b].er * srcmatrix[a * 8 + from2 + 0 + b * matrix_width].weight; + val2 += srcvec[b].er * srcmatrix[a * 8 + from2 + 1 + b * matrix_width].weight; + val3 += srcvec[b].er * srcmatrix[a * 8 + from2 + 2 + b * matrix_width].weight; + val4 += srcvec[b].er * srcmatrix[a * 8 + from2 + 3 + b * matrix_width].weight; + + val5 += srcvec[b].er * srcmatrix[a * 8 + from2 + 4 + b * matrix_width].weight; + val6 += srcvec[b].er * srcmatrix[a * 8 + from2 + 5 + b * matrix_width].weight; + val7 += srcvec[b].er * srcmatrix[a * 8 + from2 + 6 + b * matrix_width].weight; + val8 += srcvec[b].er * srcmatrix[a * 8 + from2 + 7 + b * matrix_width].weight; } - dest[a*8+from2+0].er += val1; - dest[a*8+from2+1].er += val2; - dest[a*8+from2+2].er += val3; - dest[a*8+from2+3].er += val4; - - dest[a*8+from2+4].er += val5; - dest[a*8+from2+5].er += val6; - dest[a*8+from2+6].er += val7; - dest[a*8+from2+7].er += val8; + dest[a * 8 + from2 + 0].er += val1; + dest[a * 8 + from2 + 1].er += val2; + dest[a * 8 + from2 + 2].er += val3; + dest[a * 8 + from2 + 3].er += val4; + + dest[a * 8 + from2 + 4].er += val5; + dest[a * 8 + from2 + 5].er += val6; + dest[a * 8 + from2 + 6].er += val7; + dest[a * 8 + from2 + 7].er += val8; } for (a = a * 8; a < to2 - from2; a++) { @@ -899,15 +932,19 @@ void CRnnLM::computeNet(int last_word, int word) { if (last_word != -1) neu0[last_word].ac = 1; // propagate 0->1 - for (a = 0; a < layer1_size; a++) neu1[a].ac = 0; - for (a = 0; a < layerc_size; a++) neuc[a].ac = 0; + for (a = 0; a < layer1_size; a++) { + neu1[a].ac = 0; + } + for (a = 0; a < layerc_size; a++) { + neuc[a].ac = 0; + } matrixXvector(neu1, neu0, syn0, layer0_size, 0, layer1_size, - layer0_size-layer1_size, layer0_size, 0); + layer0_size - layer1_size, layer0_size, 0); for (b = 0; b < layer1_size; b++) { a = last_word; - if (a != -1) neu1[b].ac += neu0[a].ac * syn0[a+b*layer0_size].weight; + if (a != -1) neu1[b].ac += neu0[a].ac * syn0[a + b * layer0_size].weight; } // activate 1 --sigmoid @@ -931,7 +968,9 @@ void CRnnLM::computeNet(int last_word, int word) { } // 1->2 class - for (b = vocab_size; b < layer2_size; b++) neu2[b].ac = 0; + for (b = vocab_size; b < layer2_size; b++) { + neu2[b].ac = 0; + } if (layerc_size > 0) { matrixXvector(neu2, neuc, sync, layerc_size, @@ -943,20 +982,23 @@ void CRnnLM::computeNet(int last_word, int word) { // apply direct connections to classes if (direct_size > 0) { - unsigned long long hash[MAX_NGRAM_ORDER]; + uint64 hash[MAX_NGRAM_ORDER]; // this will hold pointers to syn_d that contains hash parameters - for (a = 0; a < direct_order; a++) hash[a] = 0; + for (a = 0; a < direct_order; a++) { + hash[a] = 0; + } for (a = 0; a < direct_order; a++) { b = 0; - if (a > 0) if (history[a-1] == -1) break; + if (a > 0) if (history[a - 1] == -1) break; // if OOV was in history, do not use this N-gram feature and higher orders - hash[a] = PRIMES[0]*PRIMES[1]; + hash[a] = PRIMES[0] * PRIMES[1]; - for (b = 1; b <= a; b++) + for (b = 1; b <= a; b++) { hash[a] += PRIMES[(a * PRIMES[b] + b) % PRIMES_SIZE] - * (unsigned long long)(history[b - 1] + 1); + * static_cast(history[b - 1] + 1); + } // update hash value based on words from the history hash[a] = hash[a] % (direct_size / 2); @@ -965,7 +1007,7 @@ void CRnnLM::computeNet(int last_word, int word) { } for (a = vocab_size; a < layer2_size; a++) { - for (b = 0; b < direct_order; b++) + for (b = 0; b < direct_order; b++) { if (hash[b]) { neu2[a].ac += syn_d[hash[b]]; // apply current parameter and move to the next one @@ -974,6 +1016,7 @@ void CRnnLM::computeNet(int last_word, int word) { } else { break; } + } } } @@ -986,7 +1029,9 @@ void CRnnLM::computeNet(int last_word, int word) { sum+= val; neu2[a].ac = val; } - for (a = vocab_size; a < layer2_size; a++) neu2[a].ac /= sum; + for (a = vocab_size; a < layer2_size; a++) { + neu2[a].ac /= sum; + } // output layer activations now sum exactly to 1 if (gen > 0) return; // if we generate words, we don't know what current word @@ -996,8 +1041,9 @@ void CRnnLM::computeNet(int last_word, int word) { // 1->2 word if (word != -1) { - for (c = 0; c < class_cn[vocab[word].class_index]; c++) + for (c = 0; c < class_cn[vocab[word].class_index]; c++) { neu2[class_words[vocab[word].class_index][c]].ac = 0; + } if (layerc_size > 0) { matrixXvector(neu2, neuc, sync, layerc_size, class_words[vocab[word].class_index][0], @@ -1015,20 +1061,23 @@ void CRnnLM::computeNet(int last_word, int word) { // apply direct connections to words if (word != -1) if (direct_size > 0) { - unsigned long long hash[MAX_NGRAM_ORDER]; + uint64 hash[MAX_NGRAM_ORDER]; - for (a = 0; a < direct_order; a++) hash[a] = 0; + for (a = 0; a < direct_order; a++) { + hash[a] = 0; + } for (a = 0; a < direct_order; a++) { b = 0; - if (a > 0) if (history[a-1] == -1) break; + if (a > 0) if (history[a - 1] == -1) break; hash[a] = PRIMES[0] * PRIMES[1] * - (unsigned long long)(vocab[word].class_index+1); + static_cast(vocab[word].class_index + 1); - for (b = 1; b <= a; b++) + for (b = 1; b <= a; b++) { hash[a] += PRIMES[(a * PRIMES[b] + b) % PRIMES_SIZE] - * (unsigned long long)(history[b - 1] + 1); + * static_cast(history[b - 1] + 1); + } hash[a] = (hash[a] % (direct_size / 2)) + (direct_size) / 2; } @@ -1056,8 +1105,9 @@ void CRnnLM::computeNet(int last_word, int word) { sum+= val; neu2[a].ac = val; } - for (c = 0; c < class_cn[vocab[word].class_index]; c++) + for (c = 0; c < class_cn[vocab[word].class_index]; c++) { neu2[class_words[vocab[word].class_index][c]].ac /= sum; + } } } diff --git a/src/lm/mikolov-rnnlm-lib.h b/src/lm/mikolov-rnnlm-lib.h index 36d88a0a5d0..fb9c340416b 100644 --- a/src/lm/mikolov-rnnlm-lib.h +++ b/src/lm/mikolov-rnnlm-lib.h @@ -143,7 +143,7 @@ class CRnnLM { int layerc_size; int layer2_size; - long long direct_size; + int64 direct_size; int direct_order; int history[MAX_NGRAM_ORDER]; From 7f99ea4a5b30fce828da6a23ba0fc0415aa43e2f Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 15 Jan 2017 16:15:36 -0500 Subject: [PATCH 234/530] Various script updates/fixes for discriminative training scripts; fix issue that Alexander Gorodetski pointed out on list RE a warning. --- .../local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh | 16 +++++----------- egs/wsj/s5/steps/nnet3/chain/get_egs.sh | 7 +++++-- egs/wsj/s5/steps/nnet3/get_egs.sh | 14 ++++++++++---- egs/wsj/s5/steps/nnet3/make_denlats.sh | 7 +++++-- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh index 9641ce16e21..97ed72ba429 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh @@ -17,7 +17,7 @@ # $0 --train-set train --gmm tri3 --nnet3-affix "" & - +set -e set -uo pipefail stage=1 @@ -27,20 +27,16 @@ use_gpu=true # for training cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like denlats, # alignments and degs). degs_dir= # set this to use preexisting degs. -# nj=400 # have a high number of jobs because this could take a while, and we might -# # have some stragglers. -nj=30 +nj=400 # have a high number of jobs because this could take a while, and we might + # have some stragglers. . ./cmd.sh . ./path.sh . ./utils/parse_options.sh srcdir=exp/nnet3_cleaned/tdnn_lstm1b_sp -#train_data_dir=data/train_cleaned_sp_hires_comb -#online_ivector_dir=exp/nnet3_cleaned/ivectors_train_cleaned_sp_hires_comb - -train_data_dir=data/dev_hires -online_ivector_dir=exp/nnet3_cleaned/ivectors_dev_hires +train_data_dir=data/train_cleaned_sp_hires_comb +online_ivector_dir=exp/nnet3_cleaned/ivectors_train_cleaned_sp_hires_comb ## Objective options criterion=smbr @@ -109,7 +105,6 @@ if [ $stage -le 1 ]; then steps/nnet3/align.sh --cmd "$decode_cmd" --use-gpu false \ --frames-per-chunk $frames_per_chunk_decoding \ --extra-left-context $extra_left_context --extra-right-context $extra_right_context \ - --looped $looped \ --extra-left-context-initial 0 --extra-right-context-final 0 \ --online-ivector-dir $online_ivector_dir \ --nj $nj $train_data_dir data/lang $srcdir ${srcdir}_ali ; @@ -133,7 +128,6 @@ if [ -z "$degs_dir" ]; then --extra-left-context-initial 0 --extra-right-context-final 0 \ --frames-per-chunk-decoding "$frames_per_chunk_decoding" \ --stage $get_egs_stage \ - --adjust-priors $adjust_priors \ --online-ivector-dir $online_ivector_dir \ --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \ $train_data_dir data/lang ${srcdir} ${srcdir}_ali ${srcdir}_degs || exit 1 diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh index 3ca2fc84627..0b1ddd1fbc7 100755 --- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh @@ -313,18 +313,21 @@ if [ $stage -le 3 ]; then rm $dir/.error 2>/dev/null echo "$0: ... extracting validation and training-subset alignments." + # do the filtering just once, as lat.scp may be long. utils/filter_scp.pl <(cat $dir/valid_uttlist $dir/train_subset_uttlist) \ <$dir/lat.scp >$dir/lat_special.scp $cmd $dir/log/create_valid_subset.log \ - lattice-align-phones --replace-output-symbols=true $latdir/final.mdl scp:$dir/lat_special.scp ark:- \| \ + utils/filter_scp.pl $dir/valid_uttlist $dir/lat_special.scp \| \ + lattice-align-phones --replace-output-symbols=true $latdir/final.mdl scp:- ark:- \| \ chain-get-supervision $chain_supervision_all_opts $chaindir/tree $chaindir/0.trans_mdl \ ark:- ark:- \| \ nnet3-chain-get-egs $ivector_opts --srand=$srand \ $egs_opts $chaindir/normalization.fst \ "$valid_feats" ark,s,cs:- "ark:$dir/valid_all.cegs" || touch $dir/.error & $cmd $dir/log/create_train_subset.log \ - lattice-align-phones --replace-output-symbols=true $latdir/final.mdl scp:$dir/lat_special.scp ark:- \| \ + utils/filter_scp.pl $dir/train_subset_uttlist $dir/lat_special.scp \| \ + lattice-align-phones --replace-output-symbols=true $latdir/final.mdl scp:- ark:- \| \ chain-get-supervision $chain_supervision_all_opts \ $chaindir/tree $chaindir/0.trans_mdl ark:- ark:- \| \ nnet3-chain-get-egs $ivector_opts --srand=$srand \ diff --git a/egs/wsj/s5/steps/nnet3/get_egs.sh b/egs/wsj/s5/steps/nnet3/get_egs.sh index cb7ea0ac73c..c47522fec7a 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs.sh @@ -288,17 +288,23 @@ if [ $stage -le 3 ]; then rm $dir/.error 2>/dev/null echo "$0: ... extracting validation and training-subset alignments." + + # do the filtering just once, as ali.scp may be long. utils/filter_scp.pl <(cat $dir/valid_uttlist $dir/train_subset_uttlist) \ <$dir/ali.scp >$dir/ali_special.scp $cmd $dir/log/create_valid_subset.log \ + utils/filter_scp.pl $dir/valid_uttlist $dir/ali_special.scp \| \ + ali-to-pdf $alidir/final.mdl scp:- ark:- \| \ + ali-to-post ark:- ark:- \| \ nnet3-get-egs --num-pdfs=$num_pdfs $ivector_opts $egs_opts "$valid_feats" \ - "ark,s,cs:ali-to-pdf $alidir/final.mdl scp:$dir/ali_special.scp ark:- | ali-to-post ark:- ark:- |" \ - "ark:$dir/valid_all.egs" || touch $dir/.error & + ark,s,cs:- "ark:$dir/valid_all.egs" || touch $dir/.error & $cmd $dir/log/create_train_subset.log \ + utils/filter_scp.pl $dir/train_subset_uttlist $dir/ali_special.scp \| \ + ali-to-pdf $alidir/final.mdl scp:- ark:- \| \ + ali-to-post ark:- ark:- \| \ nnet3-get-egs --num-pdfs=$num_pdfs $ivector_opts $egs_opts "$train_subset_feats" \ - "ark,s,cs:ali-to-pdf $alidir/final.mdl scp:$dir/ali_special.scp ark:- | ali-to-post ark:- ark:- |" \ - "ark:$dir/train_subset_all.egs" || touch $dir/.error & + ark,s,cs:- "ark:$dir/train_subset_all.egs" || touch $dir/.error & wait; [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1 echo "... Getting subsets of validation examples for diagnostics and combination." diff --git a/egs/wsj/s5/steps/nnet3/make_denlats.sh b/egs/wsj/s5/steps/nnet3/make_denlats.sh index d1591c0b1de..b9bb9bfd2a1 100755 --- a/egs/wsj/s5/steps/nnet3/make_denlats.sh +++ b/egs/wsj/s5/steps/nnet3/make_denlats.sh @@ -3,9 +3,12 @@ # 2014-2015 Vimal Manohar # Apache 2.0. -# Create denominator lattices for MMI/MPE training. +# Create denominator lattices for MMI/MPE training [deprecated]. # This version uses the neural-net models (version 3, i.e. the nnet3 code). # Creates its output in $dir/lat.*.gz +# Note: the more recent discriminative training scripts will not use this +# script at all, they'll use get_degs.sh which combines the decoding +# and egs-dumping into one script (to save disk space and disk I/O). # Begin configuration section. nj=4 @@ -174,7 +177,7 @@ fi lattice_determinize_cmd= if $determinize; then - lattice_determinize_cmd="lattice-determinize-non-compact --acoustic-scale=$acwt --max-mem=$max_mem --minimize=$minimize --prune --beam=$lattice_beam ark:- ark:- |" + lattice_determinize_cmd="lattice-determinize-non-compact --acoustic-scale=$acwt --max-mem=$max_mem --minimize=$minimize --prune=true --beam=$lattice_beam ark:- ark:- |" fi if [ $sub_split -eq 1 ]; then From d023245022a7cf5d26ba3239d126257217c67d3e Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 15 Jan 2017 16:59:06 -0500 Subject: [PATCH 235/530] Fix minor bugs --- egs/wsj/s5/steps/nnet3/get_degs.sh | 2 +- src/nnet3/discriminative-supervision.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/get_degs.sh b/egs/wsj/s5/steps/nnet3/get_degs.sh index cc3ab5c4b13..74e936e29e0 100755 --- a/egs/wsj/s5/steps/nnet3/get_degs.sh +++ b/egs/wsj/s5/steps/nnet3/get_degs.sh @@ -310,7 +310,7 @@ fi # set the command to determinize lattices, if specified. if $determinize_before_split; then - lattice_determinize_cmd="lattice-determinize-non-compact --acoustic-scale=$acwt --max-mem=$max_mem --minimize=true --prune --beam=$lattice_beam ark:- ark:-" + lattice_determinize_cmd="lattice-determinize-non-compact --acoustic-scale=$acwt --max-mem=$max_mem --minimize=true --prune=true --beam=$lattice_beam ark:- ark:-" else lattice_determinize_cmd="cat" fi diff --git a/src/nnet3/discriminative-supervision.h b/src/nnet3/discriminative-supervision.h index d4c7ee3756e..a9d58d120f5 100644 --- a/src/nnet3/discriminative-supervision.h +++ b/src/nnet3/discriminative-supervision.h @@ -40,7 +40,6 @@ struct SplitDiscriminativeSupervisionOptions { BaseFloat acoustic_scale; SplitDiscriminativeSupervisionOptions() : - frame_subsampling_factor(1), remove_output_symbols(true), collapse_transition_ids(true), remove_epsilons(true), determinize(true), minimize(true), acoustic_scale(0.1) { } @@ -63,9 +62,6 @@ struct SplitDiscriminativeSupervisionOptions { opts->Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods (should match the " "value used in discriminative-get-supervision)"); - opts->Register("frame-subsampling-factor", &frame_subsampling_factor, "Used " - "if the frame-rate for the model will be less than the " - "frame-rate of the original alignment."); } }; From 6c27f6b089ce2bf00809168a6a0f46b26f97a1a0 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 16 Jan 2017 03:03:33 -0500 Subject: [PATCH 236/530] Some bug fixes to I/O code for nnet3 --- src/nnet3/nnet-analyze.cc | 8 ++++++++ src/nnet3/nnet-common.cc | 20 +++++++++++++------- src/nnet3/nnet-computation.cc | 12 +++++++++++- src/nnet3/nnet-compute.cc | 12 ++++++------ src/nnet3/nnet-discriminative-training.cc | 9 +++++---- src/nnet3/nnet-optimize.cc | 7 +++++++ src/nnet3/nnet-training.cc | 14 +++++++------- src/nnet3/nnet-utils.cc | 4 ++-- 8 files changed, 59 insertions(+), 27 deletions(-) diff --git a/src/nnet3/nnet-analyze.cc b/src/nnet3/nnet-analyze.cc index e53b1742e06..c5fedf0240b 100644 --- a/src/nnet3/nnet-analyze.cc +++ b/src/nnet3/nnet-analyze.cc @@ -954,6 +954,14 @@ void ComputationChecker::CheckComputationDebugInfo() const { static_cast(computation_.matrices[i].num_rows)) KALDI_ERR << "Debug info for matrix m" << i << " has wrong num-rows."; + std::vector::const_iterator + iter = computation_.matrix_debug_info[i].cindexes.begin(), + end = computation_.matrix_debug_info[i].cindexes.end(); + for (; iter != end; ++iter) { + if (iter->second.n < 0) { + KALDI_ERR << "Negative n index in debug info"; + } + } } } diff --git a/src/nnet3/nnet-common.cc b/src/nnet3/nnet-common.cc index 04132bc42c4..906217c3561 100644 --- a/src/nnet3/nnet-common.cc +++ b/src/nnet3/nnet-common.cc @@ -162,16 +162,21 @@ static void WriteCindexVectorElementBinary( if (i == 0 || node_index != vec[i-1].first) { // '|' into ranges that each have all the same node name, like: // [node_1: index_1 index_2] [node_2: index_3 index_4] + // Caution: '|' is character 124 so we have to avoid that + // character in places where it might be confused with + // this separator. os.put('|'); WriteBasicType(os, binary, node_index); } if (i == 0) { + // we don't need to be concerned about reserving space for character 124 + // ('|') here, since (wastefully) '|' is always printed for i == 0. if (index.n == 0 && index.x == 0 && std::abs(index.t) < 125) { // handle this common case in one character. os.put(static_cast(index.t)); } else if (index.t == 0 && index.x == 0 && - std::abs(index.n) < 2) { + (index.n == 0 || index.n == 1)) { // handle this common case in one character. os.put(static_cast(index.n + 125)); } else { // handle the general case less efficiently. @@ -186,10 +191,10 @@ static void WriteCindexVectorElementBinary( std::abs(index.t - last_index.t) < 124) { signed char c = index.t - last_index.t; os.put(c); + // note: we have to reserve character 124 ('|') for when 'n' changes. } else if (index.t == last_index.t && index.x == last_index.x && - std::abs(index.n - last_index.n) < 2) { - signed char c = index.n - last_index.n; - os.put(c + 125); + (index.n == last_index.n || index.n == last_index.n + 1)) { + os.put(125 + index.n - last_index.n); } else { // handle the general case less efficiently. os.put(127); WriteBasicType(os, binary, index.n); @@ -213,15 +218,16 @@ static void ReadCindexVectorElementBinary( is.get(); ReadBasicType(is, binary, &((*vec)[i].first)); } else { + KALDI_ASSERT(i != 0); (*vec)[i].first = (*vec)[i-1].first; } signed char c = is.get(); if (i == 0) { - if (std::abs(int(c)) < 124) { + if (std::abs(int(c)) < 125) { index.n = 0; index.t = c; index.x = 0; - } else if (std::abs(int(c)) < 127) { + } else if (c == 125 || c == 126) { index.n = c - 125; index.t = 0; index.x = 0; @@ -239,7 +245,7 @@ static void ReadCindexVectorElementBinary( index.n = last_index.n; index.t = last_index.t + c; index.x = last_index.x; - } else if (std::abs(int(c)) < 127) { + } else if (c == 125 || c == 126) { index.n = last_index.n + c - 125; index.t = last_index.t; index.x = last_index.x; diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index 55cf23883ea..819538d37f8 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -146,7 +146,15 @@ void NnetComputation::MatrixInfo::Read(std::istream &is, bool binary) { ReadBasicType(is, binary, &num_rows); ExpectToken(is, binary, ""); ReadBasicType(is, binary, &num_cols); - ExpectToken(is, binary, ""); + std::string tok; + ReadToken(is, binary, &tok); + if (tok == "") { + stride_type = kDefaultStride; + } else { + KALDI_ASSERT(tok == ""); + stride_type = kStrideEqualNumCols; + ExpectToken(is, binary, ""); + } } void NnetComputation::MatrixInfo::Write(std::ostream &os, bool binary) const { @@ -156,6 +164,8 @@ void NnetComputation::MatrixInfo::Write(std::ostream &os, bool binary) const { WriteBasicType(os, binary, num_rows); WriteToken(os, binary, ""); WriteBasicType(os, binary, num_cols); + if (stride_type != kDefaultStride) + WriteToken(os, binary, ""); if (!binary) os << std::endl; WriteToken(os, binary, ""); if (!binary) os << std::endl; diff --git a/src/nnet3/nnet-compute.cc b/src/nnet3/nnet-compute.cc index abda3646417..f15b2883989 100644 --- a/src/nnet3/nnet-compute.cc +++ b/src/nnet3/nnet-compute.cc @@ -153,16 +153,16 @@ void NnetComputer::ExecuteCommand() { case kAllocMatrixZeroed: m1 = computation_.submatrices[c.arg1].matrix_index; matrices_[m1].Resize(computation_.matrices[m1].num_rows, - computation_.matrices[m1].num_cols, - kSetZero, - computation_.matrices[m1].stride_type); + computation_.matrices[m1].num_cols, + kSetZero, + computation_.matrices[m1].stride_type); break; case kAllocMatrixUndefined: m1 = computation_.submatrices[c.arg1].matrix_index; matrices_[m1].Resize(computation_.matrices[m1].num_rows, - computation_.matrices[m1].num_cols, - kUndefined, - computation_.matrices[m1].stride_type); + computation_.matrices[m1].num_cols, + kUndefined, + computation_.matrices[m1].stride_type); break; case kDeallocMatrix: m1 = computation_.submatrices[c.arg1].matrix_index; diff --git a/src/nnet3/nnet-discriminative-training.cc b/src/nnet3/nnet-discriminative-training.cc index 472c5658a61..15c91d5c23b 100644 --- a/src/nnet3/nnet-discriminative-training.cc +++ b/src/nnet3/nnet-discriminative-training.cc @@ -48,10 +48,12 @@ NnetDiscriminativeTrainer::NnetDiscriminativeTrainer( } if (opts.nnet_config.read_cache != "") { bool binary; - try { - Input ki(opts.nnet_config.read_cache, &binary); + Input ki; + if (ki.Open(opts.nnet_config.read_cache, &binary)) { compiler_.ReadCache(ki.Stream(), binary); - } catch (...) { + KALDI_LOG << "Read computation cache from " + << opts.nnet_config.read_cache; + } else { KALDI_WARN << "Could not open cached computation. " "Probably this is the first training iteration."; } @@ -259,4 +261,3 @@ NnetDiscriminativeTrainer::~NnetDiscriminativeTrainer() { } // namespace nnet3 } // namespace kaldi - diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index fcb0568dd5c..30b5f57feb7 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -603,6 +603,13 @@ void CachingOptimizingCompiler::ReadCache(std::istream &is, bool binary) { request->Read(is, binary); NnetComputation *computation = new NnetComputation(); computation->Read(is, binary); + if (GetVerboseLevel() >= 3) { + Timer timer; + CheckComputationOptions check_config; + ComputationChecker checker(check_config, nnet_, *computation); + checker.Check(); + seconds_taken_check_ += timer.Elapsed(); + } UpdateCache(request, computation); } } diff --git a/src/nnet3/nnet-training.cc b/src/nnet3/nnet-training.cc index 9e534256e3f..9757452058e 100644 --- a/src/nnet3/nnet-training.cc +++ b/src/nnet3/nnet-training.cc @@ -44,11 +44,11 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config, if (config_.read_cache != "") { bool binary; - try { - Input ki(config_.read_cache, &binary); + Input ki; + if (ki.Open(config_.read_cache, &binary)) { compiler_.ReadCache(ki.Stream(), binary); KALDI_LOG << "Read computation cache from " << config_.read_cache; - } catch (...) { + } else { KALDI_WARN << "Could not open cached computation. " "Probably this is the first training iteration."; } @@ -164,14 +164,14 @@ void NnetTrainer::UpdateParamsWithMaxChange() { if (min_scale < 1.0) ostr << "Per-component max-change active on " << num_max_change_per_component_applied_per_minibatch - << " / " << num_updatable << " Updatable Components." - << "(smallest factor=" << min_scale << " on " + << " / " << num_updatable << " updatable Components; " + << "smallest factor=" << min_scale << " on " << component_name_with_min_scale - << " with max-change=" << max_change_with_min_scale <<"). "; + << " with max-change=" << max_change_with_min_scale << '.'; if (param_delta > config_.max_param_change) ostr << "Global max-change factor was " << config_.max_param_change / param_delta - << " with max-change=" << config_.max_param_change << "."; + << " with max-change=" << config_.max_param_change << '.'; KALDI_LOG << ostr.str(); } // applies both of the max-change scalings all at once, component by component diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index 5e60b3143b9..468c0e893a4 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -71,10 +71,10 @@ void EvaluateComputationRequest( ComputationGraphBuilder builder(nnet, &graph); builder.Compute(request); builder.GetComputableInfo(is_computable); - if (GetVerboseLevel() >= 2) { + if (GetVerboseLevel() >= 4) { std::ostringstream graph_pretty; graph.Print(graph_pretty, nnet.GetNodeNames()); - KALDI_VLOG(3) << "Graph is " << graph_pretty.str(); + KALDI_VLOG(4) << "Graph is " << graph_pretty.str(); } } From 4d2c78e235c17c35d2176d298d8f2da84f864046 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 16 Jan 2017 03:03:33 -0500 Subject: [PATCH 237/530] Some bug fixes to I/O code for nnet3 Conflicts: src/nnet3/nnet-compute.cc src/nnet3/nnet-training.cc src/nnet3/nnet-utils.cc --- src/nnet3/nnet-analyze.cc | 8 ++++++++ src/nnet3/nnet-common.cc | 20 +++++++++++++------- src/nnet3/nnet-computation.cc | 12 +++++++++++- src/nnet3/nnet-discriminative-training.cc | 9 +++++---- src/nnet3/nnet-optimize.cc | 5 +++++ src/nnet3/nnet-training.cc | 22 +++++++++++----------- src/nnet3/nnet-utils.cc | 4 ++-- 7 files changed, 55 insertions(+), 25 deletions(-) diff --git a/src/nnet3/nnet-analyze.cc b/src/nnet3/nnet-analyze.cc index 29ff2f01fb1..3f04732848c 100644 --- a/src/nnet3/nnet-analyze.cc +++ b/src/nnet3/nnet-analyze.cc @@ -969,6 +969,14 @@ void ComputationChecker::CheckComputationDebugInfo() const { static_cast(computation_.matrices[i].num_rows)) KALDI_ERR << "Debug info for matrix m" << i << " has wrong num-rows."; + std::vector::const_iterator + iter = computation_.matrix_debug_info[i].cindexes.begin(), + end = computation_.matrix_debug_info[i].cindexes.end(); + for (; iter != end; ++iter) { + if (iter->second.n < 0) { + KALDI_ERR << "Negative n index in debug info"; + } + } } } diff --git a/src/nnet3/nnet-common.cc b/src/nnet3/nnet-common.cc index 9df01d4f048..918055df62d 100644 --- a/src/nnet3/nnet-common.cc +++ b/src/nnet3/nnet-common.cc @@ -162,16 +162,21 @@ static void WriteCindexVectorElementBinary( if (i == 0 || node_index != vec[i-1].first) { // '|' into ranges that each have all the same node name, like: // [node_1: index_1 index_2] [node_2: index_3 index_4] + // Caution: '|' is character 124 so we have to avoid that + // character in places where it might be confused with + // this separator. os.put('|'); WriteBasicType(os, binary, node_index); } if (i == 0) { + // we don't need to be concerned about reserving space for character 124 + // ('|') here, since (wastefully) '|' is always printed for i == 0. if (index.n == 0 && index.x == 0 && std::abs(index.t) < 125) { // handle this common case in one character. os.put(static_cast(index.t)); } else if (index.t == 0 && index.x == 0 && - std::abs(index.n) < 2) { + (index.n == 0 || index.n == 1)) { // handle this common case in one character. os.put(static_cast(index.n + 125)); } else { // handle the general case less efficiently. @@ -186,10 +191,10 @@ static void WriteCindexVectorElementBinary( std::abs(index.t - last_index.t) < 124) { signed char c = index.t - last_index.t; os.put(c); + // note: we have to reserve character 124 ('|') for when 'n' changes. } else if (index.t == last_index.t && index.x == last_index.x && - std::abs(index.n - last_index.n) < 2) { - signed char c = index.n - last_index.n; - os.put(c + 125); + (index.n == last_index.n || index.n == last_index.n + 1)) { + os.put(125 + index.n - last_index.n); } else { // handle the general case less efficiently. os.put(127); WriteBasicType(os, binary, index.n); @@ -213,15 +218,16 @@ static void ReadCindexVectorElementBinary( is.get(); ReadBasicType(is, binary, &((*vec)[i].first)); } else { + KALDI_ASSERT(i != 0); (*vec)[i].first = (*vec)[i-1].first; } signed char c = is.get(); if (i == 0) { - if (std::abs(int(c)) < 124) { + if (std::abs(int(c)) < 125) { index.n = 0; index.t = c; index.x = 0; - } else if (std::abs(int(c)) < 127) { + } else if (c == 125 || c == 126) { index.n = c - 125; index.t = 0; index.x = 0; @@ -239,7 +245,7 @@ static void ReadCindexVectorElementBinary( index.n = last_index.n; index.t = last_index.t + c; index.x = last_index.x; - } else if (std::abs(int(c)) < 127) { + } else if (c == 125 || c == 126) { index.n = last_index.n + c - 125; index.t = last_index.t; index.x = last_index.x; diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index 5c0e8911037..8eb60b91969 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -147,7 +147,15 @@ void NnetComputation::MatrixInfo::Read(std::istream &is, bool binary) { ReadBasicType(is, binary, &num_rows); ExpectToken(is, binary, ""); ReadBasicType(is, binary, &num_cols); - ExpectToken(is, binary, ""); + std::string tok; + ReadToken(is, binary, &tok); + if (tok == "") { + stride_type = kDefaultStride; + } else { + KALDI_ASSERT(tok == ""); + stride_type = kStrideEqualNumCols; + ExpectToken(is, binary, ""); + } } void NnetComputation::MatrixInfo::Write(std::ostream &os, bool binary) const { @@ -157,6 +165,8 @@ void NnetComputation::MatrixInfo::Write(std::ostream &os, bool binary) const { WriteBasicType(os, binary, num_rows); WriteToken(os, binary, ""); WriteBasicType(os, binary, num_cols); + if (stride_type != kDefaultStride) + WriteToken(os, binary, ""); if (!binary) os << std::endl; WriteToken(os, binary, ""); if (!binary) os << std::endl; diff --git a/src/nnet3/nnet-discriminative-training.cc b/src/nnet3/nnet-discriminative-training.cc index e4f6bf9d463..865056f3569 100644 --- a/src/nnet3/nnet-discriminative-training.cc +++ b/src/nnet3/nnet-discriminative-training.cc @@ -48,10 +48,12 @@ NnetDiscriminativeTrainer::NnetDiscriminativeTrainer( } if (opts.nnet_config.read_cache != "") { bool binary; - try { - Input ki(opts.nnet_config.read_cache, &binary); + Input ki; + if (ki.Open(opts.nnet_config.read_cache, &binary)) { compiler_.ReadCache(ki.Stream(), binary); - } catch (...) { + KALDI_LOG << "Read computation cache from " + << opts.nnet_config.read_cache; + } else { KALDI_WARN << "Could not open cached computation. " "Probably this is the first training iteration."; } @@ -259,4 +261,3 @@ NnetDiscriminativeTrainer::~NnetDiscriminativeTrainer() { } // namespace nnet3 } // namespace kaldi - diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 08a28e22025..aa586acde21 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -527,6 +527,11 @@ void CachingOptimizingCompiler::ReadCache(std::istream &is, bool binary) { request->Read(is, binary); NnetComputation *computation = new NnetComputation(); computation->Read(is, binary); + if (GetVerboseLevel() >= 3) { + CheckComputationOptions check_config; + ComputationChecker checker(check_config, nnet_, *computation); + checker.Check(); + } UpdateCache(request, computation); } } diff --git a/src/nnet3/nnet-training.cc b/src/nnet3/nnet-training.cc index 87d64e27871..ef12f0c89d7 100644 --- a/src/nnet3/nnet-training.cc +++ b/src/nnet3/nnet-training.cc @@ -39,20 +39,20 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config, // natural-gradient updates. SetZero(is_gradient, delta_nnet_); const int32 num_updatable = NumUpdatableComponents(*delta_nnet_); - num_max_change_per_component_applied_.resize(num_updatable, 0); + num_max_change_per_component_applied_.resize(num_updatable, 0); num_max_change_global_applied_ = 0; if (config_.read_cache != "") { bool binary; - try { - Input ki(config_.read_cache, &binary); + Input ki; + if (ki.Open(config_.read_cache, &binary)) { compiler_.ReadCache(ki.Stream(), binary); KALDI_LOG << "Read computation cache from " << config_.read_cache; - } catch (...) { + } else { KALDI_WARN << "Could not open cached computation. " "Probably this is the first training iteration."; } - } + } } @@ -164,14 +164,14 @@ void NnetTrainer::UpdateParamsWithMaxChange() { if (min_scale < 1.0) ostr << "Per-component max-change active on " << num_max_change_per_component_applied_per_minibatch - << " / " << num_updatable << " Updatable Components." - << "(smallest factor=" << min_scale << " on " + << " / " << num_updatable << " updatable Components; " + << "smallest factor=" << min_scale << " on " << component_name_with_min_scale - << " with max-change=" << max_change_with_min_scale <<"). "; + << " with max-change=" << max_change_with_min_scale << '.'; if (param_delta > config_.max_param_change) ostr << "Global max-change factor was " << config_.max_param_change / param_delta - << " with max-change=" << config_.max_param_change << "."; + << " with max-change=" << config_.max_param_change << '.'; KALDI_LOG << ostr.str(); } // applies both of the max-change scalings all at once, component by component @@ -276,7 +276,7 @@ bool ObjectiveFunctionInfo::PrintTotalStats(const std::string &name) const { << (tot_objf / tot_weight) << " over " << tot_weight << " frames."; } else { KALDI_LOG << "Overall average objective function for '" << name << "' is " - << objf << " + " << aux_objf << " = " << sum_objf + << objf << " + " << aux_objf << " = " << sum_objf << " over " << tot_weight << " frames."; } KALDI_LOG << "[this line is to be parsed by a script:] " @@ -290,7 +290,7 @@ NnetTrainer::~NnetTrainer() { Output ko(config_.write_cache, config_.binary_write_cache); compiler_.WriteCache(ko.Stream(), config_.binary_write_cache); KALDI_LOG << "Wrote computation cache to " << config_.write_cache; - } + } delete delta_nnet_; } diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index d09c18b6ada..07d12684967 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -71,10 +71,10 @@ void EvaluateComputationRequest( ComputationGraphBuilder builder(nnet, request, &graph); builder.Compute(); builder.GetComputableInfo(is_computable); - if (GetVerboseLevel() >= 2) { + if (GetVerboseLevel() >= 4) { std::ostringstream graph_pretty; graph.Print(graph_pretty, nnet.GetNodeNames()); - KALDI_VLOG(2) << "Graph is " << graph_pretty.str(); + KALDI_VLOG(4) << "Graph is " << graph_pretty.str(); } } From baa5bf48b9947e1c62d59892cfb8c4c650c7b74d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 16 Jan 2017 13:39:38 -0500 Subject: [PATCH 238/530] asr_diarization: Support per-utt gmm global --- src/gmmbin/gmm-global-copy.cc | 44 ++- src/gmmbin/gmm-global-get-post.cc | 77 +++-- .../gmm-global-init-models-from-feats.cc | 291 ++++++++++++++++++ 3 files changed, 385 insertions(+), 27 deletions(-) create mode 100644 src/gmmbin/gmm-global-init-models-from-feats.cc diff --git a/src/gmmbin/gmm-global-copy.cc b/src/gmmbin/gmm-global-copy.cc index af31b03aa9a..b850cdced51 100644 --- a/src/gmmbin/gmm-global-copy.cc +++ b/src/gmmbin/gmm-global-copy.cc @@ -29,11 +29,13 @@ int main(int argc, char *argv[]) { const char *usage = "Copy a diagonal-covariance GMM\n" "Usage: gmm-global-copy [options] \n" + " or gmm-global-copy [options] \n" "e.g.: gmm-global-copy --binary=false 1.model - | less"; bool binary_write = true; ParseOptions po(usage); - po.Register("binary", &binary_write, "Write output in binary mode"); + po.Register("binary", &binary_write, + "Write in binary mode (only relevant if output is a wxfilename)"); po.Read(argc, argv); @@ -45,15 +47,39 @@ int main(int argc, char *argv[]) { std::string model_in_filename = po.GetArg(1), model_out_filename = po.GetArg(2); - DiagGmm gmm; - { - bool binary_read; - Input ki(model_in_filename, &binary_read); - gmm.Read(ki.Stream(), binary_read); - } - WriteKaldiObject(gmm, model_out_filename, binary_write); + // all these "fn"'s are either rspecifiers or filenames. + + bool in_is_rspecifier = + (ClassifyRspecifier(model_in_filename, NULL, NULL) + != kNoRspecifier), + out_is_wspecifier = + (ClassifyWspecifier(model_out_filename, NULL, NULL, NULL) + != kNoWspecifier); + + if (in_is_rspecifier != out_is_wspecifier) + KALDI_ERR << "Cannot mix archives with regular files (copying gmm models)"; - KALDI_LOG << "Written model to " << model_out_filename; + if (!in_is_rspecifier) { + DiagGmm gmm; + { + bool binary_read; + Input ki(model_in_filename, &binary_read); + gmm.Read(ki.Stream(), binary_read); + } + WriteKaldiObject(gmm, model_out_filename, binary_write); + + KALDI_LOG << "Written model to " << model_out_filename; + } else { + SequentialDiagGmmReader gmm_reader(model_in_filename); + DiagGmmWriter gmm_writer(model_out_filename); + + int32 num_done = 0; + for (; !gmm_reader.Done(); gmm_reader.Next(), num_done++) { + gmm_writer.Write(gmm_reader.Key(), gmm_reader.Value()); + } + + KALDI_LOG << "Wrote " << num_done << " GMM models to " << model_out_filename; + } } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; diff --git a/src/gmmbin/gmm-global-get-post.cc b/src/gmmbin/gmm-global-get-post.cc index b364c33cab4..2092d1348f0 100644 --- a/src/gmmbin/gmm-global-get-post.cc +++ b/src/gmmbin/gmm-global-get-post.cc @@ -36,35 +36,51 @@ int main(int argc, char *argv[]) { " (e.g. in training UBMs, SGMMs, tied-mixture systems)\n" " For each frame, gives a list of the n best Gaussian indices,\n" " sorted from best to worst.\n" - "Usage: gmm-global-get-post [options] \n" - "e.g.: gmm-global-get-post --n=20 1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >post.1.gz\"\n"; + "Usage: gmm-global-get-post [options] []\n" + "e.g.: gmm-global-get-post --n=20 1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >post.1.gz\"\n" + " or : gmm-global-get-post --n=20 ark:1.gmm \"ark:feature-command |\" \"ark,t:|gzip -c >post.1.gz\"\n"; ParseOptions po(usage); int32 num_post = 50; BaseFloat min_post = 0.0; + std::string utt2spk_rspecifier; + po.Register("n", &num_post, "Number of Gaussians to keep per frame\n"); po.Register("min-post", &min_post, "Minimum posterior we will output " "before pruning and renormalizing (e.g. 0.01)"); + po.Register("utt2spk", &utt2spk_rspecifier, + "rspecifier for utterance to speaker map"); po.Read(argc, argv); - if (po.NumArgs() != 3) { + if (po.NumArgs() < 3 || po.NumArgs() > 4) { po.PrintUsage(); exit(1); } - std::string model_filename = po.GetArg(1), + std::string model_in_filename = po.GetArg(1), feature_rspecifier = po.GetArg(2), - post_wspecifier = po.GetArg(3); + post_wspecifier = po.GetArg(3), + frame_loglikes_wspecifier = po.GetOptArg(4); - DiagGmm gmm; - ReadKaldiObject(model_filename, &gmm); + RandomAccessDiagGmmReaderMapped *gmm_reader = NULL; + DiagGmm *gmm = NULL; + KALDI_ASSERT(num_post > 0); KALDI_ASSERT(min_post < 1.0); - int32 num_gauss = gmm.NumGauss(); - if (num_post > num_gauss) { - KALDI_WARN << "You asked for " << num_post << " Gaussians but GMM " - << "only has " << num_gauss << ", returning this many. "; - num_post = num_gauss; + + if (ClassifyRspecifier(model_in_filename, NULL, NULL) + != kNoRspecifier) { // reading models from a Table. + gmm_reader = new RandomAccessDiagGmmReaderMapped(model_in_filename, + utt2spk_rspecifier); + } else { + gmm = new DiagGmm(); + ReadKaldiObject(model_in_filename, gmm); + int32 num_gauss = gmm->NumGauss(); + if (num_post > num_gauss) { + KALDI_WARN << "You asked for " << num_post << " Gaussians but GMM " + << "only has " << num_gauss << ", returning this many. "; + num_post = num_gauss; + } } double tot_like = 0.0; @@ -72,10 +88,11 @@ int main(int argc, char *argv[]) { SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); PosteriorWriter post_writer(post_wspecifier); + BaseFloatVectorWriter likes_writer(frame_loglikes_wspecifier); int32 num_done = 0, num_err = 0; for (; !feature_reader.Done(); feature_reader.Next()) { - std::string utt = feature_reader.Key(); + const std::string &utt = feature_reader.Key(); const Matrix &feats = feature_reader.Value(); int32 T = feats.NumRows(); if (T == 0) { @@ -83,9 +100,20 @@ int main(int argc, char *argv[]) { num_err++; continue; } - if (feats.NumCols() != gmm.Dim()) { + + if (gmm_reader) { + if (!gmm_reader.HasKey(utt)) { + KALDI_WARN << "Could not find GMM for utterance " << utt; + num_err++; + continue; + } + gmm = gmm_reader.Value(utt); + } + + if (feats.NumCols() != gmm->Dim()) { KALDI_WARN << "Dimension mismatch for utterance " << utt - << ": got " << feats.NumCols() << ", expected " << gmm.Dim(); + << ": got " << feats.NumCols() << ", expected " + << gmm->Dim(); num_err++; continue; } @@ -93,15 +121,22 @@ int main(int argc, char *argv[]) { Matrix loglikes; - gmm.LogLikelihoods(feats, &loglikes); + gmm->LogLikelihoods(feats, &loglikes); + + Vector frame_loglikes; + if (!frame_loglikes_wspecifier.empty()) frame_loglikes.Resize(T); Posterior post(T); double log_like_this_file = 0.0; for (int32 t = 0; t < T; t++) { - log_like_this_file += - VectorToPosteriorEntry(loglikes.Row(t), num_post, + double log_like_this_frame = + VectorToPosteriorEntry(loglikes.Row(t), + num_post > num_gauss ? num_gauss : num_post, min_post, &(post[t])); + if (!frame_loglikes_wspecifier.empty()) + frame_loglikes(t) = log_like_this_frame; + log_like_this_file += log_like_this_frame; } KALDI_VLOG(1) << "Processed utterance " << utt << ", average likelihood " << (log_like_this_file / T) << " over " << T << " frames"; @@ -109,8 +144,14 @@ int main(int argc, char *argv[]) { tot_t += T; post_writer.Write(utt, post); + if (!frame_loglikes_wspecifier.empty()) + frame_loglikes.Write(utt, frame_loglikes); + num_done++; } + + delete gmm_reader; + delete gmm; KALDI_LOG << "Done " << num_done << " files, " << num_err << " with errors, average UBM log-likelihood is " diff --git a/src/gmmbin/gmm-global-init-models-from-feats.cc b/src/gmmbin/gmm-global-init-models-from-feats.cc new file mode 100644 index 00000000000..486ba5af27b --- /dev/null +++ b/src/gmmbin/gmm-global-init-models-from-feats.cc @@ -0,0 +1,291 @@ +// gmmbin/gmm-global-init-models-from-feats.cc + +// Copyright 2013 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "gmm/model-common.h" +#include "gmm/full-gmm.h" +#include "gmm/diag-gmm.h" +#include "gmm/mle-full-gmm.h" + +namespace kaldi { + +// We initialize the GMM parameters by setting the variance to the global +// variance of the features, and the means to distinct randomly chosen frames. +void InitGmmFromRandomFrames(const MatrixBase &feats, DiagGmm *gmm) { + int32 num_gauss = gmm->NumGauss(), num_frames = feats.NumRows(), + dim = feats.NumCols(); + KALDI_ASSERT(num_frames >= 10 * num_gauss && "Too few frames to train on"); + Vector mean(dim), var(dim); + for (int32 i = 0; i < num_frames; i++) { + mean.AddVec(1.0 / num_frames, feats.Row(i)); + var.AddVec2(1.0 / num_frames, feats.Row(i)); + } + var.AddVec2(-1.0, mean); + if (var.Max() <= 0.0) + KALDI_ERR << "Features do not have positive variance " << var; + + DiagGmmNormal gmm_normal(*gmm); + + std::set used_frames; + for (int32 g = 0; g < num_gauss; g++) { + int32 random_frame = RandInt(0, num_frames - 1); + while (used_frames.count(random_frame) != 0) + random_frame = RandInt(0, num_frames - 1); + used_frames.insert(random_frame); + gmm_normal.weights_(g) = 1.0 / num_gauss; + gmm_normal.means_.Row(g).CopyFromVec(feats.Row(random_frame)); + gmm_normal.vars_.Row(g).CopyFromVec(var); + } + gmm->CopyFromNormal(gmm_normal); + gmm->ComputeGconsts(); +} + +void TrainOneIter(const MatrixBase &feats, + const MleDiagGmmOptions &gmm_opts, + int32 iter, + int32 num_threads, + DiagGmm *gmm) { + AccumDiagGmm gmm_acc(*gmm, kGmmAll); + + Vector frame_weights(feats.NumRows(), kUndefined); + frame_weights.Set(1.0); + + double tot_like; + tot_like = gmm_acc.AccumulateFromDiagMultiThreaded(*gmm, feats, frame_weights, + num_threads); + + KALDI_LOG << "Likelihood per frame on iteration " << iter + << " was " << (tot_like / feats.NumRows()) << " over " + << feats.NumRows() << " frames."; + + BaseFloat objf_change, count; + MleDiagGmmUpdate(gmm_opts, gmm_acc, kGmmAll, gmm, &objf_change, &count); + + KALDI_LOG << "Objective-function change on iteration " << iter << " was " + << (objf_change / count) << " over " << count << " frames."; +} + +void TrainGmm(const MatrixBase &feats, + const MleDiagGmmOptions &gmm_opts, + int32 num_gauss, int32 num_gauss_init, int32 num_iters, + int32 num_threads, DiagGmm *gmm) { + KALDI_LOG << "Initializing GMM means from random frames to " + << num_gauss_init << " Gaussians."; + InitGmmFromRandomFrames(feats, gmm); + + // we'll increase the #Gaussians by splitting, + // till halfway through training. + int32 cur_num_gauss = num_gauss_init, + gauss_inc = (num_gauss - num_gauss_init) / (num_iters / 2); + + for (int32 iter = 0; iter < num_iters; iter++) { + TrainOneIter(feats, gmm_opts, iter, num_threads, gmm); + + int32 next_num_gauss = std::min(num_gauss, cur_num_gauss + gauss_inc); + if (next_num_gauss > gmm->NumGauss()) { + KALDI_LOG << "Splitting to " << next_num_gauss << " Gaussians."; + gmm->Split(next_num_gauss, 0.1); + cur_num_gauss = next_num_gauss; + } + } +} + +} // namespace kaldi + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "This program initializes a single diagonal GMM and does multiple iterations of\n" + "training from features stored in memory.\n" + "Usage: gmm-global-init-from-feats [options] \n" + "e.g.: gmm-global-init-from-feats scp:train.scp ark:1.ark\n"; + + ParseOptions po(usage); + MleDiagGmmOptions gmm_opts; + + bool binary = true; + int32 num_gauss = 100; + int32 num_gauss_init = 0; + int32 num_iters = 50; + int32 num_frames = 200000; + int32 srand_seed = 0; + int32 num_threads = 4; + std::string spk2utt_rspecifier; + + po.Register("binary", &binary, "Write output in binary mode"); + po.Register("num-gauss", &num_gauss, "Number of Gaussians in the model"); + po.Register("num-gauss-init", &num_gauss_init, "Number of Gaussians in " + "the model initially (if nonzero and less than num_gauss, " + "we'll do mixture splitting)"); + po.Register("num-iters", &num_iters, "Number of iterations of training"); + po.Register("num-frames", &num_frames, "Number of feature vectors to store in " + "memory and train on (randomly chosen from the input features)"); + po.Register("srand", &srand_seed, "Seed for random number generator "); + po.Register("num-threads", &num_threads, "Number of threads used for " + "statistics accumulation"); + po.Register("spk2utt-rspecifier", &spk2utt_rspecifier, + "If specified, estimates models per-speaker"); + + gmm_opts.Register(&po); + + po.Read(argc, argv); + + srand(srand_seed); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + if (num_gauss_init <= 0 || num_gauss_init > num_gauss) + num_gauss_init = num_gauss; + + std::string feature_rspecifier = po.GetArg(1), + model_wspecifier = po.GetArg(2); + + DiagGmmWriter gmm_writer(model_wspecifier); + + KALDI_ASSERT(num_frames > 0); + + KALDI_LOG << "Reading features (will keep " << num_frames << " frames " + << "per utterance.)"; + + int32 dim = 0; + + if (!spk2utt_rspecifier.empty()) { + SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); + for (; !feature_reader.Done(); feature_reader.Next()) { + const Matrix &this_feats = feature_reader.Value(); + if (dim == 0) { + dim = this_feats.NumCols(); + } else if (this_feats.NumCols() != dim) { + KALDI_ERR << "Features have inconsistent dims " + << this_feats.NumCols() << " vs. " << dim + << " (current utt is) " << feature_reader.Key(); + } + + Matrix feats(num_frames, dim); + int64 num_read = 0; + + for (int32 t = 0; t < this_feats.NumRows(); t++) { + num_read++; + if (num_read <= num_frames) { + feats.Row(num_read - 1).CopyFromVec(this_feats.Row(t)); + } else { + BaseFloat keep_prob = num_frames / static_cast(num_read); + if (WithProb(keep_prob)) { // With probability "keep_prob" + feats.Row(RandInt(0, num_frames - 1)).CopyFromVec(this_feats.Row(t)); + } + } + } + + if (num_read < num_frames) { + KALDI_WARN << "For utterance " << feature_reader.Key() << ", " + << "number of frames read " << num_read << " was less than " + << "target number " << num_frames << ", using all we read."; + feats.Resize(num_read, dim, kCopyData); + } else { + BaseFloat percent = num_frames * 100.0 / num_read; + KALDI_LOG << "For utterance " << feature_reader.Key() << ", " + << "kept " << num_frames << " out of " << num_read + << " input frames = " << percent << "%."; + } + + DiagGmm gmm(num_gauss_init, dim); + TrainGmm(feats, gmm_opts, num_gauss, num_gauss_init, num_iters, + num_threads, &gmm); + + gmm_writer.Write(feature_reader.Key(), gmm); + } + KALDI_LOG << "Done initializing GMMs."; + } else { + SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); + RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); + + int32 num_err = 0; + for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { + Matrix feats; + int64 num_read = 0; + + const std::vector &uttlist = spk2utt_reader.Value(); + + for (std::vector::const_iterator it = uttlist.begin(); + it != uttlist.end(); ++it) { + if (!feature_reader.HasKey(*it)) { + KALDI_WARN << "Could not find features for utterance " << *it; + num_err++; + } + + const Matrix &this_feats = feature_reader.Value(*it); + if (dim == 0) { + dim = this_feats.NumCols(); + feats.Resize(num_frames, dim); + } else if (this_feats.NumCols() != dim) { + KALDI_ERR << "Features have inconsistent dims " + << this_feats.NumCols() << " vs. " << dim + << " (current utt is) " << *it; + } + + for (int32 t = 0; t < this_feats.NumRows(); t++) { + num_read++; + if (num_read <= num_frames) { + feats.Row(num_read - 1).CopyFromVec(this_feats.Row(t)); + } else { + BaseFloat keep_prob = num_frames / static_cast(num_read); + if (WithProb(keep_prob)) { // With probability "keep_prob" + feats.Row(RandInt(0, num_frames - 1)).CopyFromVec(this_feats.Row(t)); + } + } + } + } + + if (num_read < num_frames) { + KALDI_WARN << "For speaker " << spk2utt_reader.Key() << ", " + << "number of frames read " << num_read << " was less than " + << "target number " << num_frames << ", using all we read."; + feats.Resize(num_read, dim, kCopyData); + } else { + BaseFloat percent = num_frames * 100.0 / num_read; + KALDI_LOG << "For spekear " << spk2utt_reader.Key() << ", " + << "kept " << num_frames << " out of " << num_read + << " input frames = " << percent << "%."; + } + + DiagGmm gmm(num_gauss_init, dim); + TrainGmm(feats, gmm_opts, num_gauss, num_gauss_init, num_iters, + num_threads, &gmm); + + gmm_writer.Write(spk2utt_reader.Key(), gmm); + } + + KALDI_LOG << "Done initializing GMMs. Failed getting features for " + << num_err << "utterances"; + } + + return 0; + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + From cbbf120862dc23f424abfd3c4a8e0b44e26cead3 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 16 Jan 2017 14:01:12 -0500 Subject: [PATCH 239/530] Discriminative-training script fixes --- .../local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh | 6 ++++-- egs/wsj/s5/steps/nnet3/get_degs.sh | 14 ++++++++++---- egs/wsj/s5/steps/nnet3/train_discriminative.sh | 4 ++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh index 97ed72ba429..bfebf708aad 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh @@ -68,7 +68,8 @@ max_param_change=1 num_jobs_nnet=4 num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options -minibatch_size=64 # we may have to reduce this. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). adjust_priors=false # Note: this option will eventually be removed and # the script will do it automatically but write to # a different filename @@ -135,11 +136,12 @@ if [ -z "$degs_dir" ]; then fi if [ $stage -le 3 ]; then + [ -z "$degs_dir" ] && degs_dir=${srcdir}_degs steps/nnet3/train_discriminative.sh --cmd "$decode_cmd" \ --stage $train_stage \ --effective-lrate $effective_learning_rate --max-param-change $max_param_change \ --criterion $criterion --drop-frames true \ - --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ + --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size "$minibatch_size" \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ --adjust-priors $adjust_priors \ diff --git a/egs/wsj/s5/steps/nnet3/get_degs.sh b/egs/wsj/s5/steps/nnet3/get_degs.sh index 74e936e29e0..9fbaf73d82c 100755 --- a/egs/wsj/s5/steps/nnet3/get_degs.sh +++ b/egs/wsj/s5/steps/nnet3/get_degs.sh @@ -123,7 +123,7 @@ extra_files= extra_files="$extra_files $transform_dir/trans.1 $transform_dir/num_jobs" # Check some files. -for f in $data/feats.scp $lang/L.fst $srcdir/${iter}.mdl $srcdir/tree \ +for f in $data/feats.scp $lang/L.fst $lang/phones/silence.csl $srcdir/${iter}.mdl $srcdir/tree \ $srcdir/cmvn_opts $alidir/ali.1.gz $alidir/num_jobs $extra_files; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done @@ -243,6 +243,11 @@ if [ $stage -le 2 ]; then else # run without stderr redirection to show the error. feat-to-dim "$feats_one" -; exit 1 fi +else + num_frames=$(cat $dir/info/num_frames) +fi +if ! [ "$num_frames" -gt 0 ]; then + echo "$0: bad num-frames=$num_frames"; exit 1 fi # copy the model to the degs directory. @@ -256,6 +261,7 @@ num_archives=$[num_frames/frames_per_iter+1] echo $num_archives >$dir/info/num_archives echo $frame_subsampling_factor >$dir/info/frame_subsampling_factor +cp $lang/phones/silence.csl $dir/info/ # the first field in frames_per_eg (which is a comma-separated list of numbers) # is the 'principal' frames-per-eg, and for purposes of working out the number @@ -409,7 +415,7 @@ function shuffle { $bufsz=1000; @A = (); while() { push @A, $_; if (@A == $bufsz) { $n=int(rand()*$bufsz); print $A[$n]; $A[$n] = $A[$bufsz-1]; pop @A; }} @A = shuffle(@A); print @A; ' - } +} # funtion/pseudo-command to put input lines round robin to command line args. function round_robin { perl -e '@F=(); foreach $a (@ARGV) { my $f; open($f, ">$a") || die "opening file $a"; push @F, $f; } @@ -469,11 +475,11 @@ if [ $stage -le 7 ]; then run.pl $dir/log/copy_train_subset.log \ nnet3-discriminative-copy-egs scp:$dir/train_diagnostic.scp \ - ark:$dir/train_diagnostic.ark || exit 1 + ark:$dir/train_diagnostic.degs || exit 1 run.pl $dir/log/copy_valid_subset.log \ nnet3-discriminative-copy-egs scp:$dir/valid_diagnostic.scp \ - ark:$dir/valid_diagnostic.ark || exit 1 + ark:$dir/valid_diagnostic.degs || exit 1 fi if [ $stage -le 10 ] && $cleanup; then diff --git a/egs/wsj/s5/steps/nnet3/train_discriminative.sh b/egs/wsj/s5/steps/nnet3/train_discriminative.sh index 8d7484aa889..139e9ba7505 100755 --- a/egs/wsj/s5/steps/nnet3/train_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/train_discriminative.sh @@ -70,7 +70,7 @@ if [ -f path.sh ]; then . ./path.sh; fi if [ $# != 2 ]; then - echo "Usage: $0 [opts] " + echo "Usage: $0 [opts] " echo " e.g.: $0 exp/nnet3/tdnn_sp_degs exp/nnet3/tdnn_sp_smbr" echo "" echo "Main options (for others, see top of script file)" @@ -290,7 +290,7 @@ while [ $x -lt $num_iters ]; do --one-silence-class=$one_silence_class \ --boost=$boost --acoustic-scale=$acoustic_scale $regularization_opts \ $dir/$x.mdl \ - "ark:nnet3-discriminative-copy-egs --frame-shift=$frame_shift ark:$degs_dir/degs.$archive.ark ark:- | nnet3-discriminative-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:- | nnet3-discriminative-merge-egs --minibatch-size=$minibatch_size ark:- ark:- |" \ + "ark,bg:nnet3-discriminative-copy-egs --frame-shift=$frame_shift ark:$degs_dir/degs.$archive.ark ark:- | nnet3-discriminative-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:- | nnet3-discriminative-merge-egs --minibatch-size=$minibatch_size ark:- ark:- |" \ $dir/$[$x+1].$n.raw || touch $dir/.error & done wait From 6c0a012d581d49fd8721234786476c2649fa9cc1 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 16 Jan 2017 19:36:33 -0500 Subject: [PATCH 240/530] Remove checking code which would fail if code updated in middle of training run. --- src/nnet3/nnet-optimize.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index aa586acde21..08a28e22025 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -527,11 +527,6 @@ void CachingOptimizingCompiler::ReadCache(std::istream &is, bool binary) { request->Read(is, binary); NnetComputation *computation = new NnetComputation(); computation->Read(is, binary); - if (GetVerboseLevel() >= 3) { - CheckComputationOptions check_config; - ComputationChecker checker(check_config, nnet_, *computation); - checker.Check(); - } UpdateCache(request, computation); } } From f7b2fe75488bae3ff07f302bdd29c76c89c2250a Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 16 Jan 2017 21:40:01 -0500 Subject: [PATCH 241/530] Bug fix (resolve failing test) --- src/nnet3/nnet-computation.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index 8eb60b91969..ba56f5080e8 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -152,7 +152,7 @@ void NnetComputation::MatrixInfo::Read(std::istream &is, bool binary) { if (tok == "") { stride_type = kDefaultStride; } else { - KALDI_ASSERT(tok == ""); + KALDI_ASSERT(tok == ""); stride_type = kStrideEqualNumCols; ExpectToken(is, binary, ""); } @@ -242,7 +242,7 @@ void NnetComputation::Command::Read(std::istream &is, bool binary) { command_type = static_cast(command_type_int); } else { std::string command_type_str; - getline(is, command_type_str); + getline(is, command_type_str); if (command_type_str == "kAllocMatrixZeroed") { command_type = kAllocMatrixZeroed; } else if (command_type_str == "kAllocMatrixUndefined") { @@ -700,7 +700,7 @@ void NnetComputation::Read(std::istream &is, bool binary) { std::vector component_precomputed_indexes_tmp; for (size_t c = 0; c < num_component_precomputed_indexes; c++) { bool is_null; // a boolean indicating whether the pointer should be NULL. - ReadBasicType(is, binary, &is_null); + ReadBasicType(is, binary, &is_null); if (!is_null) { ComponentPrecomputedIndexes* p = ComponentPrecomputedIndexes::ReadNew(is, binary); component_precomputed_indexes_tmp.push_back(p); @@ -796,7 +796,7 @@ void NnetComputation::Write(std::ostream &os, bool binary) const { for (size_t c = 0; c < submatrices.size(); c++) { submatrices[c].Write(os, binary); } - + if (!binary) os << std::endl; WriteToken(os, binary, ""); WriteBasicType(os, binary, component_precomputed_indexes.size()); From e435334b4b7bb9eaf28764f37bb44a825da320fc Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 16 Jan 2017 21:48:36 -0500 Subject: [PATCH 242/530] Add scripts for discriminative training of TDNNs on swbd. Small bug fix; various minor script improvements/fixes. --- egs/swbd/s5c/local/eval1997_data_prep.sh | 23 ++- egs/swbd/s5c/local/eval2000_data_prep.sh | 18 +- egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh | 3 +- .../s5c/local/nnet3/tuning/run_tdnn_d_disc.sh | 172 ++++++++++++++++++ egs/swbd/s5c/local/swbd1_prepare_dict.sh | 7 +- .../nnet3/tuning/run_tdnn_lstm_1b_disc.sh | 48 ++--- src/nnet3/nnet-computation.cc | 2 +- src/nnet3bin/nnet3-am-copy.cc | 1 - 8 files changed, 216 insertions(+), 58 deletions(-) create mode 100755 egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh diff --git a/egs/swbd/s5c/local/eval1997_data_prep.sh b/egs/swbd/s5c/local/eval1997_data_prep.sh index f49ac551192..e29da13deee 100755 --- a/egs/swbd/s5c/local/eval1997_data_prep.sh +++ b/egs/swbd/s5c/local/eval1997_data_prep.sh @@ -5,13 +5,13 @@ # To be run from one directory above this script. -# The input is a directory name containing the 1997 Hub5 english evaluation +# The input is a directory name containing the 1997 Hub5 english evaluation # test set and transcripts, which is LDC2002S10 # e.g. see # http://www.ldc.upenn.edu/Catalog/CatalogEntry.jsp?catalogId=LDC2002S10 # # It is assumed that the transcripts are in a subdirectory called transcr -# However, we download the STM from NIST site: +# However, we download the STM from NIST site: # ftp://jaguar.ncsl.nist.gov/lvcsr/mar97/eval/hub5e97.english.980618.stm if [ $# -ne 1 ]; then @@ -26,7 +26,7 @@ sdir=$1 [ ! -d $sdir/transcr ] \ && echo Expecting directory $sdir/transcr to be present && exit 1; -. path.sh +. path.sh dir=data/local/eval1997 mkdir -p $dir @@ -40,7 +40,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -49,8 +49,8 @@ awk -v sph2pipe=$sph2pipe '{ # segments file format is: utt-id side-id start-time end-time, e.g.: # sw02001-A_000098-001156 sw02001-A 0.98 11.56 pem=$sdir/speech/97_hub5e.pem -[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +[ ! -f $pem ] && echo "$0: No such file $pem" && exit 1; +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 # There is one line in the 97_hub5e.pem with an extra : on the channel # sw_10022 B: unknown_speaker 281.21 284.37 -- the : is removed @@ -64,7 +64,7 @@ grep -v ';;' $pem | sed -e 's?:??g' \ printf "%s %s %.2f %.2f\n", utt, spk, start, end; }' \ | sort -u > $dir/segments - + # Download the STM and GLM files: ( cd $dir rm -f stm glm @@ -78,9 +78,9 @@ grep -v ';;' $pem | sed -e 's?:??g' \ # stm file has lines like: -# en_4042 A en_4042_A 227.71 232.26 BEANS RIGHT THAT IS WHY I SAID BEANS -# One of the segments (sw_10022-B_028120-028437) is removed since it is not -# scored and does not show up in the pem file. +# en_4042 A en_4042_A 227.71 232.26 BEANS RIGHT THAT IS WHY I SAID BEANS +# One of the segments (sw_10022-B_028120-028437) is removed since it is not +# scored and does not show up in the pem file. grep -v ';;' $dir/hub5e97.english.980618.stm \ | awk '{ spk=$1"-"$2; @@ -96,7 +96,7 @@ grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -116,4 +116,3 @@ done echo Data preparation and formatting completed for Eval 2000 echo "(but not MFCC extraction)" - diff --git a/egs/swbd/s5c/local/eval2000_data_prep.sh b/egs/swbd/s5c/local/eval2000_data_prep.sh index 8d7e1f7ed6e..4c34061a120 100755 --- a/egs/swbd/s5c/local/eval2000_data_prep.sh +++ b/egs/swbd/s5c/local/eval2000_data_prep.sh @@ -1,11 +1,11 @@ #!/bin/bash -# Hub-5 Eval 2000 data preparation +# Hub-5 Eval 2000 data preparation # Author: Arnab Ghoshal (Jan 2013) # To be run from one directory above this script. -# The input is two directory names (possibly the same) containing the +# The input is two directory names (possibly the same) containing the # 2000 Hub5 english evaluation test set and transcripts, which are # respectively: LDC2002S09 LDC2002T43 # e.g. see @@ -35,7 +35,7 @@ tdir=$2 [ ! -d $tdir/reference ] \ && echo Expecting directory $tdir/reference to be present && exit 1; -. path.sh +. path.sh dir=data/local/eval2000 mkdir -p $dir @@ -49,7 +49,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -58,8 +58,8 @@ awk -v sph2pipe=$sph2pipe '{ # segments file format is: utt-id side-id start-time end-time, e.g.: # sw02001-A_000098-001156 sw02001-A 0.98 11.56 pem=$sdir/english/hub5e_00.pem -[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +[ ! -f $pem ] && echo "$0: No such file $pem" && exit 1; +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 # we ignore the warnings below for now, although they seem to indicate some problems @@ -72,7 +72,7 @@ grep -v ';;' $pem \ | sort -u | local/extend_segments.pl 0.1 > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -94,10 +94,10 @@ cp $tdir/reference/en20000405_hub5.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh index df02fec38fd..ec80972cf2d 100644 --- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh @@ -71,7 +71,7 @@ if [ $stage -le 9 ]; then relu-renorm-layer name=tdnn4 input=Append(-3,3) dim=1024 relu-renorm-layer name=tdnn5 input=Append(-7,2) dim=1024 relu-renorm-layer name=tdnn6 dim=1024 - + output-layer name=output input=tdnn6 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec EOF @@ -125,4 +125,3 @@ if [ $stage -le 11 ]; then fi wait; exit 0; - diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh new file mode 100755 index 00000000000..715a93ea49d --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh @@ -0,0 +1,172 @@ +#!/bin/bash + +# This script does discriminative training on top of the CE nnet3 system +# from run_tdnn_d. To simplify things, this assumes you are using the "speed-perturbed" data +# (--speed_perturb true, which is the default) in the baseline run_tdnn_d.sh script. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the lattice generation runs in about real-time, so takes of the order of +# 1000 hours of CPU time. + + +set -e +set -uo pipefail + +stage=1 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=400 # have a high number of jobs because this could take a while, and we might + # have some stragglers. + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +graph_dir=exp/tri4/graph_sw1_tg +srcdir=exp/nnet3/tdnn_d_sp +train_data_dir=data/train_nodup_sp_hires +online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp_hires + + +## Objective options +criterion=smbr +one_silence_class=true + +# you can set --disc-affix if you run different configurations, e.g. --disc-affix "_b" +disc_affix= + +dir=${srcdir}_${criterion}${disc_affix} + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_decoding=200 +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +## Note: extra-left-context and extra-right-context are 0 because this is a TDNN, +## it's not a recurrent model like an LSTM or BLSTM. +extra_left_context=0 +extra_right_context=0 + + +## Nnet training options +effective_learning_rate=0.0000125 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=3 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). + +last_layer_factor=0.1 # prevent the final layer from learning too fast; + # this can be a problem. + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + +if $use_gpu; then + if ! cuda-compiled; then + cat < WOLMANIZED # Also, mispronounced words, e.g. @@ -90,4 +90,3 @@ ln -sf lexicon5.txt lexicon.txt # This is the final lexicon. popd >&/dev/null rm $dir/lexiconp.txt 2>/dev/null echo Prepared input dictionary and phone-sets for Switchboard phase 1. - diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh index bfebf708aad..4fd74a71647 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh @@ -9,13 +9,6 @@ # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# how to run this (where $0 is the name of this script) -# by default, with the "cleaned" data: -# $0 - -# without the "cleaned" data: -# $0 --train-set train --gmm tri3 --nnet3-affix "" & - set -e set -uo pipefail @@ -24,7 +17,7 @@ stage=1 train_stage=-10 # can be used to start training in the middle. get_egs_stage=0 use_gpu=true # for training -cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like denlats, +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like # alignments and degs). degs_dir= # set this to use preexisting degs. nj=400 # have a high number of jobs because this could take a while, and we might @@ -34,6 +27,7 @@ nj=400 # have a high number of jobs because this could take a while, and we migh . ./path.sh . ./utils/parse_options.sh +graph_dir=exp/tri3_cleaned/graph srcdir=exp/nnet3_cleaned/tdnn_lstm1b_sp train_data_dir=data/train_cleaned_sp_hires_comb online_ivector_dir=exp/nnet3_cleaned/ivectors_train_cleaned_sp_hires_comb @@ -58,7 +52,6 @@ frames_per_chunk_decoding=200 ## directly in the script below, but this should also match the training condition. extra_left_context=40 extra_right_context=0 -looped=true # affects alignments; because it's an LSTM, would be false for pure TDNNs or BLSTMs. @@ -67,12 +60,10 @@ effective_learning_rate=0.0000125 max_param_change=1 num_jobs_nnet=4 num_epochs=4 -regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). -adjust_priors=false # Note: this option will eventually be removed and - # the script will do it automatically but write to - # a different filename last_layer_factor=0.1 # prevent the final layer from learning too fast; # this can be a problem. @@ -117,7 +108,7 @@ if [ -z "$degs_dir" ]; then if [ $stage -le 2 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then utils/create_split_dir.pl \ - /export/b{09,10,11,12}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage + /export/b{09,10,11,12}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/${srcdir}_degs/storage ${srcdir}_degs/storage fi if [ -d ${srcdir}_degs/storage ]; then max_copy_jobs=10; else max_copy_jobs=5; fi @@ -144,37 +135,36 @@ if [ $stage -le 3 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size "$minibatch_size" \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --adjust-priors $adjust_priors \ --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi -graph_dir=exp/tri3/graph -if [ $stage -le 5 ]; then +if [ $stage -le 4 ]; then for x in `seq $decode_start_epoch $num_epochs`; do for decode_set in dev test; do - ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - iter=epoch$x.adj + for iter in epoch$x epoch${x}_adj; do - steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ - --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${iter:+_$iter} || exit 1; + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_${iter} || exit 1; - steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ - data/lang_test data/lang_rescore data/${decode_set}_hires \ - $dir/decode_${decode_set}${iter:+_$iter} \ - $dir/decode_${decode_set}${iter:+_$iter}_rescore || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_test data/lang_rescore data/${decode_set}_hires \ + $dir/decode_${decode_set}_${iter} \ + $dir/decode_${decode_set}_${iter}_rescore || exit 1; ) & done done fi wait; -if [ $stage -le 6 ] && $cleanup; then +if [ $stage -le 5 ] && $cleanup; then # if you run with "--cleanup true --stage 6" you can clean up. - rm ${lats_dir}/lat.*.gz || true - rm ${srcdir}_ali/ali.*.gz || true + # actually, keep the alignments in case we need them later.. they're slow to + # create, and quite big. + # rm ${srcdir}_ali/ali.*.gz || true + steps/nnet2/remove_egs.sh ${srcdir}_degs || true fi diff --git a/src/nnet3/nnet-computation.cc b/src/nnet3/nnet-computation.cc index 819538d37f8..5be1b7def94 100644 --- a/src/nnet3/nnet-computation.cc +++ b/src/nnet3/nnet-computation.cc @@ -151,7 +151,7 @@ void NnetComputation::MatrixInfo::Read(std::istream &is, bool binary) { if (tok == "") { stride_type = kDefaultStride; } else { - KALDI_ASSERT(tok == ""); + KALDI_ASSERT(tok == ""); stride_type = kStrideEqualNumCols; ExpectToken(is, binary, ""); } diff --git a/src/nnet3bin/nnet3-am-copy.cc b/src/nnet3bin/nnet3-am-copy.cc index 7aa0e4a32c0..5f697356dbf 100644 --- a/src/nnet3bin/nnet3-am-copy.cc +++ b/src/nnet3bin/nnet3-am-copy.cc @@ -47,7 +47,6 @@ int main(int argc, char *argv[]) { bool binary_write = true, raw = false; BaseFloat learning_rate = -1; - BaseFloat learning_rate_scale = 1; std::string set_raw_nnet = ""; bool convert_repeated_to_block = false; BaseFloat scale = 1.0; From d498cdb2c34a8fe80bf2ea6319067feafa700041 Mon Sep 17 00:00:00 2001 From: Yiming Wang Date: Tue, 17 Jan 2017 02:25:06 -0500 Subject: [PATCH 243/530] [src]: Removing SetZero() function in nnet3, adding SetAsGradient(). (#1343) --- src/nnet3/nnet-chain-diagnostics.cc | 8 +- src/nnet3/nnet-chain-training.cc | 6 +- src/nnet3/nnet-component-itf.h | 9 +- src/nnet3/nnet-component-test.cc | 14 +- src/nnet3/nnet-derivative-test.cc | 4 +- src/nnet3/nnet-diagnostics.cc | 8 +- src/nnet3/nnet-discriminative-diagnostics.cc | 8 +- src/nnet3/nnet-discriminative-training.cc | 6 +- src/nnet3/nnet-general-component.cc | 17 +- src/nnet3/nnet-general-component.h | 1 - src/nnet3/nnet-optimize-test.cc | 11 +- src/nnet3/nnet-simple-component.cc | 175 ++++++++----------- src/nnet3/nnet-simple-component.h | 10 +- src/nnet3/nnet-training.cc | 6 +- src/nnet3/nnet-utils.cc | 34 ++-- src/nnet3/nnet-utils.h | 4 + 16 files changed, 133 insertions(+), 188 deletions(-) diff --git a/src/nnet3/nnet-chain-diagnostics.cc b/src/nnet3/nnet-chain-diagnostics.cc index 54d73a6ead3..76abc5ce154 100644 --- a/src/nnet3/nnet-chain-diagnostics.cc +++ b/src/nnet3/nnet-chain-diagnostics.cc @@ -37,8 +37,8 @@ NnetChainComputeProb::NnetChainComputeProb( num_minibatches_processed_(0) { if (nnet_config_.compute_deriv) { deriv_nnet_ = new Nnet(nnet_); - bool is_gradient = true; // force simple update - SetZero(is_gradient, deriv_nnet_); + ScaleNnet(0.0, deriv_nnet_); + SetNnetAsGradient(deriv_nnet_); // force simple update } } @@ -56,8 +56,8 @@ void NnetChainComputeProb::Reset() { num_minibatches_processed_ = 0; objf_info_.clear(); if (deriv_nnet_) { - bool is_gradient = true; - SetZero(is_gradient, deriv_nnet_); + ScaleNnet(0.0, deriv_nnet_); + SetNnetAsGradient(deriv_nnet_); } } diff --git a/src/nnet3/nnet-chain-training.cc b/src/nnet3/nnet-chain-training.cc index 1e293f588ae..4f63ba8304c 100644 --- a/src/nnet3/nnet-chain-training.cc +++ b/src/nnet3/nnet-chain-training.cc @@ -38,9 +38,7 @@ NnetChainTrainer::NnetChainTrainer(const NnetChainTrainingOptions &opts, KALDI_ASSERT(opts.nnet_config.momentum >= 0.0 && opts.nnet_config.max_param_change >= 0.0); delta_nnet_ = nnet_->Copy(); - bool is_gradient = false; // setting this to true would disable the - // natural-gradient updates. - SetZero(is_gradient, delta_nnet_); + ScaleNnet(0.0, delta_nnet_); const int32 num_updatable = NumUpdatableComponents(*delta_nnet_); num_max_change_per_component_applied_.resize(num_updatable, 0); num_max_change_global_applied_ = 0; @@ -201,7 +199,7 @@ void NnetChainTrainer::UpdateParamsWithMaxChange() { if (param_delta > nnet_config.max_param_change) { if (param_delta - param_delta != 0.0) { KALDI_WARN << "Infinite parameter change, will not apply."; - SetZero(false, delta_nnet_); + ScaleNnet(0.0, delta_nnet_); } else { scale *= nnet_config.max_param_change / param_delta; num_max_change_global_applied_++; diff --git a/src/nnet3/nnet-component-itf.h b/src/nnet3/nnet-component-itf.h index 9dc372340be..c1732fc9b25 100644 --- a/src/nnet3/nnet-component-itf.h +++ b/src/nnet3/nnet-component-itf.h @@ -375,11 +375,6 @@ class UpdatableComponent: public Component { learning_rate_factor_(other.learning_rate_factor_), is_gradient_(other.is_gradient_), max_change_(other.max_change_) { } - /// \brief Sets parameters to zero, and if treat_as_gradient is true, - /// sets is_gradient_ to true and sets learning_rate_ to 1, ignoring - /// learning_rate_factor_. - virtual void SetZero(bool treat_as_gradient) = 0; - UpdatableComponent(): learning_rate_(0.001), learning_rate_factor_(1.0), is_gradient_(false), max_change_(0.0) { } @@ -403,6 +398,10 @@ class UpdatableComponent: public Component { /// Sets the learning rate directly, bypassing learning_rate_factor_. virtual void SetActualLearningRate(BaseFloat lrate) { learning_rate_ = lrate; } + /// \brief Sets is_gradient_ to true and sets learning_rate_ to 1, ignoring + /// learning_rate_factor_. + virtual void SetAsGradient() { learning_rate_ = 1.0; is_gradient_ = true; } + /// Gets the learning rate of gradient descent. Note: if you call /// SetLearningRate(x), and learning_rate_factor_ != 1.0, /// a different value than x will returned. diff --git a/src/nnet3/nnet-component-test.cc b/src/nnet3/nnet-component-test.cc index 1cb96563b77..288179b2ffe 100644 --- a/src/nnet3/nnet-component-test.cc +++ b/src/nnet3/nnet-component-test.cc @@ -108,7 +108,7 @@ void TestNnetComponentVectorizeUnVectorize(Component *c) { UpdatableComponent *uc = dynamic_cast(c); KALDI_ASSERT(uc != NULL); UpdatableComponent *uc2 = dynamic_cast(uc->Copy()); - uc2->SetZero(false); + uc2->Scale(0.0); Vector params(uc2->NumParameters()); uc2->Vectorize(¶ms); KALDI_ASSERT(params.Min()==0.0 && params.Sum()==0.0); @@ -146,14 +146,14 @@ void TestNnetComponentUpdatable(Component *c) { } if(!(uc->Properties() & kUpdatableComponent)){ // testing that if it declares itself as non-updatable, - // Scale() and Add() and SetZero() have no effect. + // Scale() and Add() have no effect. KALDI_ASSERT(uc->NumParameters() == 0); KALDI_ASSERT(uc->DotProduct(*uc) == 0); UpdatableComponent *uc2 = dynamic_cast(uc->Copy()); uc2->Scale(7.0); uc2->Add(3.0, *uc); KALDI_ASSERT(StringsApproxEqual(uc2->Info(), uc->Info())); - uc->SetZero(false); + uc->Scale(0.0); KALDI_ASSERT(StringsApproxEqual(uc2->Info(), uc->Info())); delete uc2; } else { @@ -179,13 +179,13 @@ void TestNnetComponentUpdatable(Component *c) { uc3->Scale(0.5); KALDI_ASSERT(uc2->Info() == uc3->Info()); - // testing that SetZero() works the same whether done on the vectorized + // testing that Scale(0.0) works the same whether done on the vectorized // paramters or via SetZero(), and that unvectorizing something that's been // zeroed gives us zero parameters. uc2->Vectorize(&vec2); vec2.SetZero(); uc2->UnVectorize(vec2); - uc3->SetZero(false); + uc3->Scale(0.0); uc3->Vectorize(&vec2); KALDI_ASSERT(uc2->Info() == uc3->Info() && VecVec(vec2, vec2) == 0.0); @@ -422,8 +422,8 @@ bool TestSimpleComponentModelDerivative(const Component &c, UpdatableComponent *uc_copy = dynamic_cast(c_copy); KALDI_ASSERT(uc != NULL && uc_copy != NULL); if (test_derivative) { - bool is_gradient = true; - uc_copy->SetZero(is_gradient); + uc_copy->Scale(0.0); + uc_copy->SetAsGradient(); } CuMatrix input_deriv(num_rows, input_dim, diff --git a/src/nnet3/nnet-derivative-test.cc b/src/nnet3/nnet-derivative-test.cc index 1f9e61e2b2a..f76377a544c 100644 --- a/src/nnet3/nnet-derivative-test.cc +++ b/src/nnet3/nnet-derivative-test.cc @@ -136,8 +136,8 @@ void UnitTestNnetModelDerivatives() { } Nnet nnet_deriv(nnet); - bool is_gradient = true; - SetZero(is_gradient, &nnet_deriv); // forces "simple" update and unit + ScaleNnet(0.0, &nnet_deriv); + SetNnetAsGradient(&nnet_deriv); // forces "simple" update and unit // learning rate. int32 num_directions = 4; // must be >= 1. Best if it's >1, will reduce diff --git a/src/nnet3/nnet-diagnostics.cc b/src/nnet3/nnet-diagnostics.cc index d7de17682da..302e2cbfa50 100644 --- a/src/nnet3/nnet-diagnostics.cc +++ b/src/nnet3/nnet-diagnostics.cc @@ -32,8 +32,8 @@ NnetComputeProb::NnetComputeProb(const NnetComputeProbOptions &config, num_minibatches_processed_(0) { if (config_.compute_deriv) { deriv_nnet_ = new Nnet(nnet_); - bool is_gradient = true; // force simple update - SetZero(is_gradient, deriv_nnet_); + ScaleNnet(0.0, deriv_nnet_); + SetNnetAsGradient(deriv_nnet_); // force simple update } } @@ -52,8 +52,8 @@ void NnetComputeProb::Reset() { objf_info_.clear(); accuracy_info_.clear(); if (deriv_nnet_) { - bool is_gradient = true; - SetZero(is_gradient, deriv_nnet_); + ScaleNnet(0.0, deriv_nnet_); + SetNnetAsGradient(deriv_nnet_); } } diff --git a/src/nnet3/nnet-discriminative-diagnostics.cc b/src/nnet3/nnet-discriminative-diagnostics.cc index 417a6fa05ac..f23af549d72 100644 --- a/src/nnet3/nnet-discriminative-diagnostics.cc +++ b/src/nnet3/nnet-discriminative-diagnostics.cc @@ -42,8 +42,8 @@ NnetDiscriminativeComputeObjf::NnetDiscriminativeComputeObjf( log_priors_.ApplyLog(); if (nnet_config_.compute_deriv) { deriv_nnet_ = new Nnet(nnet_); - bool is_gradient = true; // force simple update - SetZero(is_gradient, deriv_nnet_); + ScaleNnet(0.0, deriv_nnet_); + SetNnetAsGradient(deriv_nnet_); // force simple update } } @@ -61,8 +61,8 @@ void NnetDiscriminativeComputeObjf::Reset() { num_minibatches_processed_ = 0; objf_info_.clear(); if (deriv_nnet_) { - bool is_gradient = true; - SetZero(is_gradient, deriv_nnet_); + ScaleNnet(0.0, deriv_nnet_); + SetNnetAsGradient(deriv_nnet_); } } diff --git a/src/nnet3/nnet-discriminative-training.cc b/src/nnet3/nnet-discriminative-training.cc index 15c91d5c23b..5ef1675c5ca 100644 --- a/src/nnet3/nnet-discriminative-training.cc +++ b/src/nnet3/nnet-discriminative-training.cc @@ -42,9 +42,7 @@ NnetDiscriminativeTrainer::NnetDiscriminativeTrainer( KALDI_ASSERT(opts.nnet_config.momentum >= 0.0 && opts.nnet_config.max_param_change >= 0.0); delta_nnet_ = nnet_->Copy(); - bool is_gradient = false; // setting this to true would disable the - // natural-gradient updates. - SetZero(is_gradient, delta_nnet_); + ScaleNnet(0.0, delta_nnet_); } if (opts.nnet_config.read_cache != "") { bool binary; @@ -92,7 +90,7 @@ void NnetDiscriminativeTrainer::Train(const NnetDiscriminativeExample &eg) { if (param_delta > nnet_config.max_param_change) { if (param_delta - param_delta != 0.0) { KALDI_WARN << "Infinite parameter change, will not apply."; - SetZero(false, delta_nnet_); + ScaleNnet(0.0, delta_nnet_); } else { scale *= nnet_config.max_param_change / param_delta; KALDI_LOG << "Parameter change too big: " << param_delta << " > " diff --git a/src/nnet3/nnet-general-component.cc b/src/nnet3/nnet-general-component.cc index 50dbb652d95..926ebd9b07d 100644 --- a/src/nnet3/nnet-general-component.cc +++ b/src/nnet3/nnet-general-component.cc @@ -1307,8 +1307,13 @@ Component* ConstantComponent::Copy() const { } void ConstantComponent::Scale(BaseFloat scale) { - if (is_updatable_) - output_.Scale(scale); + if (is_updatable_) { + if (scale == 0.0) { + output_.SetZero(); + } else { + output_.Scale(scale); + } + } } void ConstantComponent::Add(BaseFloat alpha, const Component &other_in) { @@ -1320,14 +1325,6 @@ void ConstantComponent::Add(BaseFloat alpha, const Component &other_in) { } } -void ConstantComponent::SetZero(bool treat_as_gradient) { - if (treat_as_gradient) { - SetActualLearningRate(1.0); - is_gradient_ = true; - } - output_.SetZero(); -} - void ConstantComponent::PerturbParams(BaseFloat stddev) { CuVector temp_output(output_.Dim(), kUndefined); temp_output.SetRandn(); diff --git a/src/nnet3/nnet-general-component.h b/src/nnet3/nnet-general-component.h index 6e5e542fa13..b945edf4475 100644 --- a/src/nnet3/nnet-general-component.h +++ b/src/nnet3/nnet-general-component.h @@ -648,7 +648,6 @@ class ConstantComponent: public UpdatableComponent { // Some functions from base-class UpdatableComponent. virtual void Scale(BaseFloat scale); virtual void Add(BaseFloat alpha, const Component &other); - virtual void SetZero(bool treat_as_gradient); virtual void PerturbParams(BaseFloat stddev); virtual BaseFloat DotProduct(const UpdatableComponent &other) const; virtual int32 NumParameters() const; diff --git a/src/nnet3/nnet-optimize-test.cc b/src/nnet3/nnet-optimize-test.cc index 0654683aa9c..0044ee05c51 100644 --- a/src/nnet3/nnet-optimize-test.cc +++ b/src/nnet3/nnet-optimize-test.cc @@ -94,10 +94,10 @@ static bool UnitTestNnetOptimizeWithOptions(int32 srand_seed, // test the consolidation of backprop commands, // otherwise the optimized and non-optimized // comptuations differ. - bool is_gradient = true; // with natural gradient, the consolidation would - // affect the final model params -> test just the - // gradient. - SetZero(is_gradient, &nnet_to_update); + ScaleNnet(0.0, &nnet_to_update); + // with natural gradient, the consolidation would affect the final model + // params -> test just the gradient. + SetNnetAsGradient(&nnet_to_update); NnetComputer computer(compute_opts, computation, @@ -107,7 +107,8 @@ static bool UnitTestNnetOptimizeWithOptions(int32 srand_seed, Nnet nnet_opt(nnet); // copy of the nnet for the optimized computation. // necessary in case backprop changes parameters. Nnet nnet_opt_to_update(nnet_opt); - SetZero(is_gradient, &nnet_opt_to_update); + ScaleNnet(0.0, &nnet_opt_to_update); + SetNnetAsGradient(&nnet_opt_to_update); // NnetComputer for the optimized version of the computation. NnetComputer computer_opt(compute_opts, diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index 8b2834e31e0..994cc465471 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -1136,8 +1136,14 @@ void RectifiedLinearComponent::StoreStats( } void AffineComponent::Scale(BaseFloat scale) { - linear_params_.Scale(scale); - bias_params_.Scale(scale); + if (scale == 0.0) { + // If scale == 0.0 we call SetZero() which will get rid of NaN's and inf's. + linear_params_.SetZero(); + bias_params_.SetZero(); + } else { + linear_params_.Scale(scale); + bias_params_.Scale(scale); + } } void AffineComponent::Resize(int32 input_dim, int32 output_dim) { @@ -1169,17 +1175,6 @@ AffineComponent::AffineComponent(const CuMatrixBase &linear_params, bias_params.Dim() != 0); } - - -void AffineComponent::SetZero(bool treat_as_gradient) { - if (treat_as_gradient) { - SetActualLearningRate(1.0); - is_gradient_ = true; - } - linear_params_.SetZero(); - bias_params_.SetZero(); -} - void AffineComponent::SetParams(const VectorBase &bias, const MatrixBase &linear) { bias_params_ = bias; @@ -1425,8 +1420,13 @@ RepeatedAffineComponent::RepeatedAffineComponent(const RepeatedAffineComponent & void RepeatedAffineComponent::Scale(BaseFloat scale) { - linear_params_.Scale(scale); - bias_params_.Scale(scale); + if (scale == 0.0) { + linear_params_.SetZero(); + bias_params_.SetZero(); + } else { + linear_params_.Scale(scale); + bias_params_.Scale(scale); + } } void RepeatedAffineComponent::Add(BaseFloat alpha, const Component &other_in) { @@ -1437,15 +1437,6 @@ void RepeatedAffineComponent::Add(BaseFloat alpha, const Component &other_in) { bias_params_.AddVec(alpha, other->bias_params_); } -void RepeatedAffineComponent::SetZero(bool treat_as_gradient) { - if (treat_as_gradient) { - SetActualLearningRate(1.0); - is_gradient_ = true; - } - linear_params_.SetZero(); - bias_params_.SetZero(); -} - void RepeatedAffineComponent::PerturbParams(BaseFloat stddev){ CuMatrix temp_linear_params(linear_params_); temp_linear_params.SetRandn(); @@ -1932,8 +1923,13 @@ void BlockAffineComponent::Backprop(const std::string &debug_info, } void BlockAffineComponent::Scale(BaseFloat scale) { - linear_params_.Scale(scale); - bias_params_.Scale(scale); + if (scale == 0.0) { + linear_params_.SetZero(); + bias_params_.SetZero(); + } else { + linear_params_.Scale(scale); + bias_params_.Scale(scale); + } } void BlockAffineComponent::Add(BaseFloat alpha, const Component &other_in) { @@ -1944,15 +1940,6 @@ void BlockAffineComponent::Add(BaseFloat alpha, const Component &other_in) { bias_params_.AddVec(alpha, other->bias_params_); } -void BlockAffineComponent::SetZero(bool treat_as_gradient) { - if (treat_as_gradient) { - SetActualLearningRate(1.0); - is_gradient_ = true; - } - linear_params_.SetZero(); - bias_params_.SetZero(); -} - void BlockAffineComponent::PerturbParams(BaseFloat stddev) { CuMatrix temp_linear_params(linear_params_); temp_linear_params.SetRandn(); @@ -2017,7 +2004,11 @@ void BlockAffineComponent::UnVectorize(const VectorBase ¶ms) { } void PerElementScaleComponent::Scale(BaseFloat scale) { - scales_.Scale(scale); + if (scale == 0.0) { + scales_.SetZero(); + } else { + scales_.Scale(scale); + } } void PerElementScaleComponent::Add(BaseFloat alpha, @@ -2033,14 +2024,6 @@ PerElementScaleComponent::PerElementScaleComponent( UpdatableComponent(component), scales_(component.scales_) { } -void PerElementScaleComponent::SetZero(bool treat_as_gradient) { - if (treat_as_gradient) { - SetActualLearningRate(1.0); - is_gradient_ = true; - } - scales_.SetZero(); -} - void PerElementScaleComponent::PerturbParams(BaseFloat stddev) { CuVector temp_scales(scales_.Dim(), kUndefined); temp_scales.SetRandn(); @@ -2180,7 +2163,11 @@ void PerElementScaleComponent::UnVectorize( } void PerElementOffsetComponent::Scale(BaseFloat scale) { - offsets_.Scale(scale); + if (scale == 0.0) { + offsets_.SetZero(); + } else { + offsets_.Scale(scale); + } } @@ -2197,14 +2184,6 @@ PerElementOffsetComponent::PerElementOffsetComponent( UpdatableComponent(component), offsets_(component.offsets_) { } -void PerElementOffsetComponent::SetZero(bool treat_as_gradient) { - if (treat_as_gradient) { - SetActualLearningRate(1.0); - is_gradient_ = true; - } - offsets_.SetZero(); -} - void PerElementOffsetComponent::PerturbParams(BaseFloat stddev) { CuVector temp_offsets(offsets_.Dim(), kUndefined); temp_offsets.SetRandn(); @@ -2448,8 +2427,13 @@ Component* ConstantFunctionComponent::Copy() const { } void ConstantFunctionComponent::Scale(BaseFloat scale) { - if (is_updatable_) - output_.Scale(scale); + if (is_updatable_) { + if (scale == 0.0) { + output_.SetZero(); + } else { + output_.Scale(scale); + } + } } void ConstantFunctionComponent::Add(BaseFloat alpha, const Component &other_in) { @@ -2461,14 +2445,6 @@ void ConstantFunctionComponent::Add(BaseFloat alpha, const Component &other_in) } } -void ConstantFunctionComponent::SetZero(bool treat_as_gradient) { - if (treat_as_gradient) { - SetActualLearningRate(1.0); - is_gradient_ = true; - } - output_.SetZero(); -} - void ConstantFunctionComponent::PerturbParams(BaseFloat stddev) { CuVector temp_output(output_.Dim(), kUndefined); temp_output.SetRandn(); @@ -3734,8 +3710,13 @@ void ConvolutionComponent::Propagate(const ComponentPrecomputedIndexes *indexes, // scale the parameters void ConvolutionComponent::Scale(BaseFloat scale) { - filter_params_.Scale(scale); - bias_params_.Scale(scale); + if (scale == 0.0) { + filter_params_.SetZero(); + bias_params_.SetZero(); + } else { + filter_params_.Scale(scale); + bias_params_.Scale(scale); + } } // add another convolution component @@ -3974,15 +3955,6 @@ void ConvolutionComponent::Update(const std::string &debug_info, bias_params_.AddVec(learning_rate_, bias_grad); } -void ConvolutionComponent::SetZero(bool treat_as_gradient) { - if (treat_as_gradient) { - SetActualLearningRate(1.0); - is_gradient_ = true; - } - filter_params_.SetZero(); - bias_params_.SetZero(); -} - void ConvolutionComponent::Read(std::istream &is, bool binary) { ReadUpdatableCommon(is, binary); // Read opening tag and learning rate. ExpectToken(is, binary, ""); @@ -4794,18 +4766,6 @@ void CompositeComponent::Add(BaseFloat alpha, const Component &other_in) { components_[i]->Add(alpha, *(other->components_[i])); } -// virtual -void CompositeComponent::SetZero(bool treat_as_gradient) { - KALDI_ASSERT(this->IsUpdatable()); // or should not be called. - for (size_t i = 0; i < components_.size(); i++) { - if (components_[i]->Properties() & kUpdatableComponent) { - UpdatableComponent *uc = - dynamic_cast(components_[i]); - uc->SetZero(treat_as_gradient); - } - } -} - // virtual void CompositeComponent::PerturbParams(BaseFloat stddev) { KALDI_ASSERT(this->IsUpdatable()); // or should not be called. @@ -4846,6 +4806,19 @@ void CompositeComponent::SetActualLearningRate(BaseFloat lrate) { } } +// virtual +void CompositeComponent::SetAsGradient() { + KALDI_ASSERT(this->IsUpdatable()); // or should not be called. + UpdatableComponent::SetAsGradient(); + for (size_t i = 0; i < components_.size(); i++) { + if (components_[i]->Properties() & kUpdatableComponent) { + UpdatableComponent *uc = + dynamic_cast(components_[i]); + uc->SetAsGradient(); + } + } +} + // virtual int32 CompositeComponent::NumParameters() const { KALDI_ASSERT(this->IsUpdatable()); // or should not be called. @@ -5116,11 +5089,19 @@ void LstmNonlinearityComponent::ZeroStats() { } void LstmNonlinearityComponent::Scale(BaseFloat scale) { - params_.Scale(scale); - value_sum_.Scale(scale); - deriv_sum_.Scale(scale); - self_repair_total_.Scale(scale); - count_ *= scale; + if (scale == 0.0) { + params_.SetZero(); + value_sum_.SetZero(); + deriv_sum_.SetZero(); + self_repair_total_.SetZero(); + count_ = 0.0; + } else { + params_.Scale(scale); + value_sum_.Scale(scale); + deriv_sum_.Scale(scale); + self_repair_total_.Scale(scale); + count_ *= scale; + } } void LstmNonlinearityComponent::Add(BaseFloat alpha, @@ -5135,18 +5116,6 @@ void LstmNonlinearityComponent::Add(BaseFloat alpha, count_ += alpha * other->count_; } -void LstmNonlinearityComponent::SetZero(bool treat_as_gradient) { - if (treat_as_gradient) { - SetActualLearningRate(1.0); - is_gradient_ = true; - } - params_.SetZero(); - value_sum_.SetZero(); - deriv_sum_.SetZero(); - self_repair_total_.SetZero(); - count_ = 0.0; -} - void LstmNonlinearityComponent::PerturbParams(BaseFloat stddev) { CuMatrix temp_params(params_.NumRows(), params_.NumCols()); temp_params.SetRandn(); diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h index e94e8530f2b..9ffedb87bd3 100644 --- a/src/nnet3/nnet-simple-component.h +++ b/src/nnet3/nnet-simple-component.h @@ -413,7 +413,6 @@ class AffineComponent: public UpdatableComponent { // Some functions from base-class UpdatableComponent. virtual void Scale(BaseFloat scale); virtual void Add(BaseFloat alpha, const Component &other); - virtual void SetZero(bool treat_as_gradient); virtual void PerturbParams(BaseFloat stddev); virtual BaseFloat DotProduct(const UpdatableComponent &other) const; virtual int32 NumParameters() const; @@ -514,7 +513,6 @@ class BlockAffineComponent : public UpdatableComponent { // Functions from base-class UpdatableComponent. virtual void Scale(BaseFloat scale); virtual void Add(BaseFloat alpha, const Component &other); - virtual void SetZero(bool treat_as_gradient); virtual void PerturbParams(BaseFloat stddev); virtual BaseFloat DotProduct(const UpdatableComponent &other) const; virtual int32 NumParameters() const; @@ -577,7 +575,6 @@ class RepeatedAffineComponent: public UpdatableComponent { // Some functions from base-class UpdatableComponent. virtual void Scale(BaseFloat scale); virtual void Add(BaseFloat alpha, const Component &other); - virtual void SetZero(bool treat_as_gradient); virtual void PerturbParams(BaseFloat stddev); virtual BaseFloat DotProduct(const UpdatableComponent &other) const; virtual int32 NumParameters() const; @@ -1227,7 +1224,6 @@ class PerElementScaleComponent: public UpdatableComponent { // Some functions from base-class UpdatableComponent. virtual void Scale(BaseFloat scale); virtual void Add(BaseFloat alpha, const Component &other); - virtual void SetZero(bool treat_as_gradient); virtual void PerturbParams(BaseFloat stddev); virtual BaseFloat DotProduct(const UpdatableComponent &other) const; virtual int32 NumParameters() const; @@ -1299,7 +1295,6 @@ class PerElementOffsetComponent: public UpdatableComponent { // Some functions from base-class UpdatableComponent. virtual void Scale(BaseFloat scale); virtual void Add(BaseFloat alpha, const Component &other); - virtual void SetZero(bool treat_as_gradient); virtual void PerturbParams(BaseFloat stddev); virtual BaseFloat DotProduct(const UpdatableComponent &other) const; virtual int32 NumParameters() const; @@ -1368,7 +1363,6 @@ class ConstantFunctionComponent: public UpdatableComponent { // Some functions from base-class UpdatableComponent. virtual void Scale(BaseFloat scale); virtual void Add(BaseFloat alpha, const Component &other); - virtual void SetZero(bool treat_as_gradient); virtual void PerturbParams(BaseFloat stddev); virtual BaseFloat DotProduct(const UpdatableComponent &other) const; virtual int32 NumParameters() const; @@ -1571,7 +1565,6 @@ class ConvolutionComponent: public UpdatableComponent { // Some functions from base-class UpdatableComponent. virtual void Scale(BaseFloat scale); virtual void Add(BaseFloat alpha, const Component &other); - virtual void SetZero(bool treat_as_gradient); virtual void PerturbParams(BaseFloat stddev); virtual BaseFloat DotProduct(const UpdatableComponent &other) const; virtual int32 NumParameters() const; @@ -1739,7 +1732,6 @@ class LstmNonlinearityComponent: public UpdatableComponent { // Some functions from base-class UpdatableComponent. virtual void Scale(BaseFloat scale); virtual void Add(BaseFloat alpha, const Component &other); - virtual void SetZero(bool treat_as_gradient); virtual void PerturbParams(BaseFloat stddev); virtual BaseFloat DotProduct(const UpdatableComponent &other) const; virtual int32 NumParameters() const; @@ -1995,9 +1987,9 @@ class CompositeComponent: public UpdatableComponent { // Some functions from base-class UpdatableComponent. virtual void SetUnderlyingLearningRate(BaseFloat lrate); virtual void SetActualLearningRate(BaseFloat lrate); + virtual void SetAsGradient(); virtual void Scale(BaseFloat scale); virtual void Add(BaseFloat alpha, const Component &other); - virtual void SetZero(bool treat_as_gradient); virtual void PerturbParams(BaseFloat stddev); virtual BaseFloat DotProduct(const UpdatableComponent &other) const; virtual int32 NumParameters() const; diff --git a/src/nnet3/nnet-training.cc b/src/nnet3/nnet-training.cc index 9757452058e..6bac172b5bd 100644 --- a/src/nnet3/nnet-training.cc +++ b/src/nnet3/nnet-training.cc @@ -35,9 +35,7 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config, KALDI_ASSERT(config.momentum >= 0.0 && config.max_param_change >= 0.0); delta_nnet_ = nnet_->Copy(); - bool is_gradient = false; // setting this to true would disable the - // natural-gradient updates. - SetZero(is_gradient, delta_nnet_); + ScaleNnet(0.0, delta_nnet_); const int32 num_updatable = NumUpdatableComponents(*delta_nnet_); num_max_change_per_component_applied_.resize(num_updatable, 0); num_max_change_global_applied_ = 0; @@ -150,7 +148,7 @@ void NnetTrainer::UpdateParamsWithMaxChange() { if (param_delta > config_.max_param_change) { if (param_delta - param_delta != 0.0) { KALDI_WARN << "Infinite parameter change, will not apply."; - SetZero(false, delta_nnet_); + ScaleNnet(0.0, delta_nnet_); } else { scale *= config_.max_param_change / param_delta; num_max_change_global_applied_++; diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index 468c0e893a4..865fdcd7c0a 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -168,25 +168,6 @@ void ComputeSimpleNnetContext(const Nnet &nnet, *std::max_element(right_contexts.begin(), right_contexts.end()); } -void SetZero(bool is_gradient, - Nnet *nnet) { - for (int32 c = 0; c < nnet->NumComponents(); c++) { - Component *comp = nnet->GetComponent(c); - NonlinearComponent *nc = dynamic_cast(comp); - if (comp->Properties() & kUpdatableComponent) { - UpdatableComponent *u_comp = dynamic_cast(comp); - KALDI_ASSERT(u_comp != NULL); - u_comp->SetZero(is_gradient); - } else if (nc != NULL) { - nc->ZeroStats(); - } else { - // Scale(0.0) is called as a backup; currently it should never - // do anything useful for any component type. - comp->Scale(0.0); - } - } -} - void PerturbParams(BaseFloat stddev, Nnet *nnet) { for (int32 c = 0; c < nnet->NumComponents(); c++) { @@ -280,11 +261,20 @@ void SetLearningRate(BaseFloat learning_rate, } } +void SetNnetAsGradient(Nnet *nnet) { + for (int32 c = 0; c < nnet->NumComponents(); c++) { + Component *comp = nnet->GetComponent(c); + if (comp->Properties() & kUpdatableComponent) { + UpdatableComponent *u_comp = dynamic_cast(comp); + KALDI_ASSERT(u_comp != NULL); + u_comp->SetAsGradient(); + } + } +} + void ScaleNnet(BaseFloat scale, Nnet *nnet) { if (scale == 1.0) return; - else if (scale == 0.0) { - SetZero(false, nnet); - } else { + else { for (int32 c = 0; c < nnet->NumComponents(); c++) { Component *comp = nnet->GetComponent(c); comp->Scale(scale); diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index de181defc60..2bda0c623b6 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -125,6 +125,10 @@ void SetLearningRate(BaseFloat learning_rate, /// Scales the nnet parameters and stats by this scale. void ScaleNnet(BaseFloat scale, Nnet *nnet); +/// Sets nnet as gradient by Setting is_gradient_ to true and +/// learning_rate_ to 1 for each UpdatableComponent in nnet +void SetNnetAsGradient(Nnet *nnet); + /// Does *dest += alpha * src (affects nnet parameters and /// stored stats). void AddNnet(const Nnet &src, BaseFloat alpha, Nnet *dest); From d3787c1869c4eaf5a5c47b30b13e7257e3264b1d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Tue, 17 Jan 2017 13:41:24 -0500 Subject: [PATCH 244/530] [src]: Minor updates to sequence training and adjusting priors. (#1345) --- egs/wsj/s5/steps/nnet3/adjust_priors.sh | 17 ++-- .../s5/steps/nnet3/train_discriminative.sh | 77 ++++++++----------- 2 files changed, 37 insertions(+), 57 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/adjust_priors.sh b/egs/wsj/s5/steps/nnet3/adjust_priors.sh index 60d377f18e8..5a0d8454781 100755 --- a/egs/wsj/s5/steps/nnet3/adjust_priors.sh +++ b/egs/wsj/s5/steps/nnet3/adjust_priors.sh @@ -21,6 +21,7 @@ egs_type=egs # Compute from $egs_type.*.ark in $egs_dir use_raw_nnet=false # If raw nnet, the averaged posterior is computed # and stored in post.$iter.vec; but there is no # adjusting of priors +minibatch_size=256 iter=final . utils/parse_options.sh @@ -59,20 +60,16 @@ fi rm -f $dir/post.$iter.*.vec 2>/dev/null -left_context=`cat $egs_dir/info/left_context` || exit 1 -right_context=`cat $egs_dir/info/right_context` || exit 1 - -context_opts="--left-context=$left_context --right-context=$right_context" - num_archives=$(cat $egs_dir/info/num_archives) || { echo "error: no such file $egs_dir/info/frames_per_eg"; exit 1; } -if [ $num_jobs_compute_prior -gt $num_archives ]; then egs_part=1; -else egs_part=JOB; fi +if [ $num_jobs_compute_prior -gt $num_archives ]; then + num_jobs_compute_prior=$num_archives +fi if [ $egs_type != "degs" ]; then $cmd JOB=1:$num_jobs_compute_prior $prior_queue_opt $dir/log/get_post.$iter.JOB.log \ nnet3-copy-egs ark:$egs_dir/$egs_type.$egs_part.ark ark:- \| \ nnet3-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \ - nnet3-merge-egs ark:- ark:- \| \ + nnet3-merge-egs --minibatch-size=$minibatch_size ark:- ark:- \| \ nnet3-compute-from-egs $prior_gpu_opt --apply-exp=true \ "$model" ark:- ark:- \| \ matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$iter.JOB.vec || exit 1; @@ -80,7 +77,7 @@ else $cmd JOB=1:$num_jobs_compute_prior $prior_queue_opt $dir/log/get_post.$iter.JOB.log \ nnet3-discriminative-copy-egs ark:$egs_dir/$egs_type.$egs_part.ark ark:- \| \ nnet3-discriminative-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \ - nnet3-discriminative-merge-egs ark:- ark:- \| \ + nnet3-discriminative-merge-egs --minibatch-size=$minibatch_size ark:- ark:- \| \ nnet3-compute-from-degs $prior_gpu_opt --apply-exp=true \ "$model" ark:- ark:- \| \ matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$iter.JOB.vec || exit 1; @@ -94,7 +91,7 @@ $cmd $dir/log/vector_sum.$iter.log \ if ! $use_raw_nnet; then run.pl $dir/log/adjust_priors.$iter.log \ - nnet3-am-adjust-priors $dir/$iter.mdl $dir/post.$iter.vec $dir/$iter.adj.mdl + nnet3-am-adjust-priors $dir/$iter.mdl $dir/post.$iter.vec $dir/${iter}_adj.mdl fi rm -f $dir/post.$iter.*.vec; diff --git a/egs/wsj/s5/steps/nnet3/train_discriminative.sh b/egs/wsj/s5/steps/nnet3/train_discriminative.sh index 139e9ba7505..05203ff5166 100755 --- a/egs/wsj/s5/steps/nnet3/train_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/train_discriminative.sh @@ -47,16 +47,15 @@ shuffle_buffer_size=1000 # This "buffer_size" variable controls randomization of stage=-3 -adjust_priors=true # If true then it will - num_threads=16 # this is the default but you may want to change it, e.g. to 1 if # using GPUs. cleanup=true -keep_model_iters=1 +keep_model_iters=100 remove_egs=false src_model= # will default to $degs_dir/final.mdl +num_jobs_compute_prior=10 min_deriv_time=0 max_deriv_time_relative=0 @@ -129,11 +128,6 @@ done silphonelist=`cat $degs_dir/info/silence.csl` || exit 1; -num_archives_priors=0 -if $adjust_priors; then - num_archives_priors=`cat $degs_dir/info/num_archives_priors` || exit 1 -fi - num_archives=$(cat $degs_dir/info/num_archives) || exit 1; frame_subsampling_factor=$(cat $degs_dir/info/frame_subsampling_factor) @@ -200,6 +194,8 @@ if [ $stage -le -1 ]; then $cmd $dir/log/convert.log \ nnet3-am-copy --learning-rate=$learning_rate "$src_model" $dir/0.mdl || exit 1; + + ln -sf 0.mdl $dir/epoch0.mdl fi @@ -307,28 +303,11 @@ while [ $x -lt $num_iters ]; do nnet3-am-copy --set-raw-nnet=- $dir/$x.mdl $dir/$[$x+1].mdl || exit 1; rm $nnets_list - - if [ ! -z "${iter_to_epoch[$x]}" ]; then - e=${iter_to_epoch[$x]} - ln -sf $x.mdl $dir/epoch$e.mdl - fi - - if $adjust_priors && [ ! -z "${iter_to_epoch[$x]}" ]; then - if [ ! -f $degs_dir/priors_egs.1.ark ]; then - echo "$0: Expecting $degs_dir/priors_egs.1.ark to exist since --adjust-priors was true." - echo "$0: Run this script with --adjust-priors false to not adjust priors" - exit 1 - fi - ( - e=${iter_to_epoch[$x]} - rm $dir/.error 2> /dev/null - - steps/nnet3/adjust_priors.sh --egs-type priors_egs \ - --num-jobs-compute-prior $num_archives_priors \ - --cmd "$cmd" --use-gpu false \ - --use-raw-nnet false --iter epoch$e $dir $degs_dir \ - || { touch $dir/.error; echo "Error in adjusting priors. See $dir/log/adjust_priors.epoch$e.log"; exit 1; } - ) & + [ ! -f $dir/$[$x+1].mdl ] && echo "$0: Did not create $dir/$[$x+1].mdl" && exit 1; + if [ -f $dir/$[$x-1].mdl ] && $cleanup && \ + [ $[($x-1)%$keep_model_iters] -ne 0 ] && \ + [ -z "${iter_to_epoch[$[$x-1]]}" ]; then + rm $dir/$[$x-1].mdl fi [ -f $dir/.error ] && { echo "Found $dir/.error. Error on iteration $x"; exit 1; } @@ -337,28 +316,27 @@ while [ $x -lt $num_iters ]; do rm $dir/cache.$x 2>/dev/null || true x=$[$x+1] num_archives_processed=$[num_archives_processed+num_jobs_nnet] -done -rm $dir/final.mdl 2>/dev/null -cp $dir/$x.mdl $dir/final.mdl -ln -sf final.mdl $dir/epoch$num_epochs_expanded.mdl + if [ $stage -le $x ] && [ ! -z "${iter_to_epoch[$x]}" ]; then + e=${iter_to_epoch[$x]} + ln -sf $x.mdl $dir/epoch$e.mdl -if $adjust_priors && [ $stage -le $num_iters ]; then - if [ ! -f $degs_dir/priors_egs.1.ark ]; then - echo "$0: Expecting $degs_dir/priors_egs.1.ark to exist since --adjust-priors was true." - echo "$0: Run this script with --adjust-priors false to not adjust priors" - exit 1 - fi + ( + rm $dir/.error 2> /dev/null - steps/nnet3/adjust_priors.sh --egs-type priors_egs \ - --num-jobs-compute-prior $num_archives_priors \ - --cmd "$cmd $prior_queue_opt" --use-gpu false \ - --use-raw-nnet false --iter epoch$num_epochs_expanded \ - $dir $degs_dir || exit 1 -fi + steps/nnet3/adjust_priors.sh --egs-type degs \ + --num-jobs-compute-prior $num_jobs_compute_prior \ + --cmd "$cmd" --use-gpu false \ + --minibatch-size $minibatch_size \ + --use-raw-nnet false --iter epoch$e $dir $degs_dir \ + || { touch $dir/.error; echo "Error in adjusting priors. See $dir/log/adjust_priors.epoch$e.log"; exit 1; } + ) & + fi -echo Done +done +rm $dir/final.mdl 2>/dev/null +cp $dir/$x.mdl $dir/final.mdl # function to remove egs that might be soft links. remove () { for x in $*; do [ -L $x ] && rm $(readlink -f $x); rm $x; done } @@ -379,3 +357,8 @@ if $cleanup; then fi done fi + +wait +[ -f $dir/.error ] && { echo "Found $dir/.error."; exit 1; } + +echo Done && exit 0 From 390aff4e5e176720d812b69f7440758744c8e6ac Mon Sep 17 00:00:00 2001 From: nanaca Date: Thu, 19 Jan 2017 04:32:35 +0900 Subject: [PATCH 245/530] [egs] Fix typo in egs/csj/s5/local/csj_run_rnnlm.sh (#1351) --- egs/csj/s5/local/csj_run_rnnlm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/csj/s5/local/csj_run_rnnlm.sh b/egs/csj/s5/local/csj_run_rnnlm.sh index bf3976b8a1f..5c6cd4343f6 100755 --- a/egs/csj/s5/local/csj_run_rnnlm.sh +++ b/egs/csj/s5/local/csj_run_rnnlm.sh @@ -44,7 +44,7 @@ local/csj_train_rnnlms.sh --dict-suffix "_nosp" \ echo h500 Begin local/csj_train_rnnlms.sh --dict-suffix "_nosp" \ --hidden 500 --nwords 10000 --class 200 \ - --direct 0 data/local/rnnlm.h400 + --direct 0 data/local/rnnlm.h500 #SKIP From 62fe2c7fdbf0dd05ecb62959781c408e7d6cfbcc Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 18 Jan 2017 16:17:48 -0500 Subject: [PATCH 246/530] Miscellaneous minor bug-fixes --- .../s5/local/nnet3/run_tdnn_discriminative.sh | 2 +- egs/fisher_swbd/s5/local/rt03_data_prep.sh | 20 ++-- .../s5/local/nnet3/run_tdnn_discriminative.sh | 2 +- egs/multi_en/s5/local/rt03_data_prep.sh | 18 +-- egs/swbd/README.txt | 6 +- .../local/nnet3/run_blstm_discriminative.sh | 2 +- .../s5c/local/nnet3/run_ivector_common.sh | 7 +- .../local/nnet3/run_tdnn_discriminative.sh | 2 +- egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh | 0 egs/swbd/s5c/local/rt03_data_prep.sh | 20 ++-- egs/swbd/s5c/run.sh | 7 +- .../s5_r2/local/chain/tuning/run_tdnn_1c.sh | 15 ++- egs/tedlium/s5_r2/local/nnet3/compare_wer.sh | 18 ++- egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh | 108 ------------------ .../nnet3/tuning/run_tdnn_lstm_1b_disc.sh | 5 +- .../nnet3/train/chain_objf/acoustic_model.py | 32 ++++-- egs/wsj/s5/steps/nnet3/adjust_priors.sh | 34 +++--- egs/wsj/s5/steps/nnet3/chain/train.py | 1 - egs/wsj/s5/steps/nnet3/get_degs.sh | 2 + .../s5/steps/nnet3/train_discriminative.sh | 2 +- egs/wsj/s5/utils/filter_scps.pl | 3 +- src/nnet3/discriminative-training.cc | 5 +- 22 files changed, 124 insertions(+), 187 deletions(-) mode change 100755 => 100644 egs/swbd/s5c/local/nnet3/run_ivector_common.sh mode change 100644 => 100755 egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh delete mode 100755 egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh diff --git a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh index dfaf8f90da3..7dc82ad34d1 100644 --- a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh @@ -154,7 +154,7 @@ if [ $stage -le 5 ]; then for decode_set in eval2000 rt03; do ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - iter=epoch$x.adj + iter=epoch${x}_adj steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ diff --git a/egs/fisher_swbd/s5/local/rt03_data_prep.sh b/egs/fisher_swbd/s5/local/rt03_data_prep.sh index a18637a6a16..d565b2b4b1a 100755 --- a/egs/fisher_swbd/s5/local/rt03_data_prep.sh +++ b/egs/fisher_swbd/s5/local/rt03_data_prep.sh @@ -1,6 +1,6 @@ #!/bin/bash -# RT-03 data preparation (conversational telephone speech part only) +# RT-03 data preparation (conversational telephone speech part only) # Adapted from Arnab Ghoshal's script for Hub-5 Eval 2000 by Peng Qi # To be run from one directory above this script. @@ -8,7 +8,8 @@ # Expects the standard directory layout for RT-03 if [ $# -ne 1 ]; then - echo "Usage: "`basename $0`" " + echo "Usage: $0 " + echo "e.g.: $0 /export/corpora/LDC/LDC2007S10" echo "See comments in the script for more details" exit 1 fi @@ -19,7 +20,7 @@ sdir=$1 [ ! -d $sdir/data/references/eval03/english/cts ] \ && echo Expecting directory $tdir/data/references/eval03/english/cts to be present && exit 1; -. path.sh +. path.sh dir=data/local/rt03 mkdir -p $dir @@ -37,7 +38,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -47,7 +48,7 @@ awk -v sph2pipe=$sph2pipe '{ # sw02001-A_000098-001156 sw02001-A 0.98 11.56 #pem=$sdir/english/hub5e_00.pem #[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 #grep -v ';;' $pem \ @@ -59,7 +60,7 @@ cat $tdir/*.stm | grep -v ';;' | grep -v inter_segment_gap \ | sort -u > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. #grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -77,7 +78,7 @@ cat $tdir/*.stm | \ grep -v inter_segment_gap | \ awk '{ printf $1; if ($1==";;") printf(" %s",$2); else printf(($2==1)?" A":" B"); for(n=3;n<=NF;n++) printf(" %s", $n); print ""; }'\ - > $dir/stm + > $dir/stm #$tdir/reference/hub5e00.english.000405.stm > $dir/stm cp $rtroot/data/trans_rules/en20030506.glm $dir/glm @@ -87,10 +88,10 @@ cp $rtroot/data/trans_rules/en20030506.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -110,4 +111,3 @@ done echo Data preparation and formatting completed for RT-03 echo "(but not MFCC extraction)" - diff --git a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh index cf26cac406a..365d01cc85d 100755 --- a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh @@ -161,7 +161,7 @@ if [ $stage -le 5 ]; then for decode_set in test_clean test_other dev_clean dev_other; do ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - iter=epoch$x.adj + iter=epoch${x}_adj steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ diff --git a/egs/multi_en/s5/local/rt03_data_prep.sh b/egs/multi_en/s5/local/rt03_data_prep.sh index 84955f0ed50..aa1e2ba4cc2 100755 --- a/egs/multi_en/s5/local/rt03_data_prep.sh +++ b/egs/multi_en/s5/local/rt03_data_prep.sh @@ -8,7 +8,7 @@ # - Modified paths to match multi_en naming conventions ########################################################################################### -# RT-03 data preparation (conversational telephone speech part only) +# RT-03 data preparation (conversational telephone speech part only) # Adapted from Arnab Ghoshal's script for Hub-5 Eval 2000 by Peng Qi # To be run from one directory above this script. @@ -16,7 +16,8 @@ # Expects the standard directory layout for RT-03 if [ $# -ne 1 ]; then - echo "Usage: "`basename $0`" " + echo "Usage: $0 " + echo "e.g.: $0 /export/corpora/LDC/LDC2007S10" echo "See comments in the script for more details" exit 1 fi @@ -45,7 +46,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -55,7 +56,7 @@ awk -v sph2pipe=$sph2pipe '{ # sw02001-A_000098-001156 sw02001-A 0.98 11.56 #pem=$sdir/english/hub5e_00.pem #[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 #grep -v ';;' $pem \ @@ -67,7 +68,7 @@ cat $tdir/*.stm | grep -v ';;' | grep -v inter_segment_gap \ | sort -u > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. #grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -85,7 +86,7 @@ cat $tdir/*.stm | \ grep -v inter_segment_gap | \ awk '{ printf $1; if ($1==";;") printf(" %s",$2); else printf(($2==1)?" A":" B"); for(n=3;n<=NF;n++) printf(" %s", $n); print ""; }'\ - > $dir/stm + > $dir/stm #$tdir/reference/hub5e00.english.000405.stm > $dir/stm cp $rtroot/data/trans_rules/en20030506.glm $dir/glm @@ -95,10 +96,10 @@ cp $rtroot/data/trans_rules/en20030506.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -118,4 +119,3 @@ done echo Data preparation and formatting completed for RT-03 echo "(but not MFCC extraction)" - diff --git a/egs/swbd/README.txt b/egs/swbd/README.txt index fc61a4c3060..1da570274e4 100644 --- a/egs/swbd/README.txt +++ b/egs/swbd/README.txt @@ -10,11 +10,14 @@ About the Switchboard corpus We are using the eval2000 a.k.a. hub5'00 evaluation data. The acoustics are LDC2002S09 and the text is LDC2002T43. + We are also using the RT'03 test set, available as LDC2007S10. Note: not + all parts of the recipe test with this. + About the Fisher corpus for language modeling We use Fisher English training speech transcripts for language modeling, if they are available. The catalog number for part 1 transcripts is LDC2004T19, - and LDC2005T19 for part 2. + and LDC2005T19 for part 2. Each subdirectory of this directory contains the scripts for a sequence of experiments. @@ -24,4 +27,3 @@ scripts for a sequence of experiments. s5b: This is (somewhat less) out of date, please see s5c s5c: This is the current recipe. - diff --git a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh index fbf6d64aefa..349fd246022 100755 --- a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh +++ b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh @@ -166,7 +166,7 @@ if [ $stage -le 5 ]; then for decode_set in train_dev eval2000; do ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - iter=epoch$x.adj + iter=epoch${x}_adj steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} $context_opts \ diff --git a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh old mode 100755 new mode 100644 index 109396ed72e..894de5e58f9 --- a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh +++ b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh @@ -13,6 +13,9 @@ speed_perturb=true mkdir -p nnet3 # perturbed data preparation train_set=train_nodup + +if [ -e data/rt03 ]; then maybe_rt03=rt03; else maybe_rt03= ; fi + if [ "$speed_perturb" == "true" ]; then if [ $stage -le 1 ]; then #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment @@ -81,7 +84,7 @@ for line in sys.stdin.readlines(): utils/fix_data_dir.sh data/${dataset}_hires; done - for dataset in eval2000 train_dev rt03; do + for dataset in eval2000 train_dev $maybe_rt03; do # Create MFCCs for the eval set utils/copy_data_dir.sh data/$dataset data/${dataset}_hires steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \ @@ -133,7 +136,7 @@ if [ $stage -le 8 ]; then steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ data/${train_set}_max2_hires exp/nnet3/extractor exp/nnet3/ivectors_$train_set || exit 1; - for data_set in eval2000 train_dev rt03; do + for data_set in eval2000 train_dev $maybe_rt03; do steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ data/${data_set}_hires exp/nnet3/extractor exp/nnet3/ivectors_$data_set || exit 1; done diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh index 255f1d49882..ceef60d0656 100755 --- a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh @@ -154,7 +154,7 @@ if [ $stage -le 5 ]; then for decode_set in train_dev eval2000; do ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - iter=epoch$x.adj + iter=epoch${x}_adj steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh old mode 100644 new mode 100755 diff --git a/egs/swbd/s5c/local/rt03_data_prep.sh b/egs/swbd/s5c/local/rt03_data_prep.sh index a18637a6a16..d565b2b4b1a 100755 --- a/egs/swbd/s5c/local/rt03_data_prep.sh +++ b/egs/swbd/s5c/local/rt03_data_prep.sh @@ -1,6 +1,6 @@ #!/bin/bash -# RT-03 data preparation (conversational telephone speech part only) +# RT-03 data preparation (conversational telephone speech part only) # Adapted from Arnab Ghoshal's script for Hub-5 Eval 2000 by Peng Qi # To be run from one directory above this script. @@ -8,7 +8,8 @@ # Expects the standard directory layout for RT-03 if [ $# -ne 1 ]; then - echo "Usage: "`basename $0`" " + echo "Usage: $0 " + echo "e.g.: $0 /export/corpora/LDC/LDC2007S10" echo "See comments in the script for more details" exit 1 fi @@ -19,7 +20,7 @@ sdir=$1 [ ! -d $sdir/data/references/eval03/english/cts ] \ && echo Expecting directory $tdir/data/references/eval03/english/cts to be present && exit 1; -. path.sh +. path.sh dir=data/local/rt03 mkdir -p $dir @@ -37,7 +38,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -47,7 +48,7 @@ awk -v sph2pipe=$sph2pipe '{ # sw02001-A_000098-001156 sw02001-A 0.98 11.56 #pem=$sdir/english/hub5e_00.pem #[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 #grep -v ';;' $pem \ @@ -59,7 +60,7 @@ cat $tdir/*.stm | grep -v ';;' | grep -v inter_segment_gap \ | sort -u > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. #grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -77,7 +78,7 @@ cat $tdir/*.stm | \ grep -v inter_segment_gap | \ awk '{ printf $1; if ($1==";;") printf(" %s",$2); else printf(($2==1)?" A":" B"); for(n=3;n<=NF;n++) printf(" %s", $n); print ""; }'\ - > $dir/stm + > $dir/stm #$tdir/reference/hub5e00.english.000405.stm > $dir/stm cp $rtroot/data/trans_rules/en20030506.glm $dir/glm @@ -87,10 +88,10 @@ cp $rtroot/data/trans_rules/en20030506.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -110,4 +111,3 @@ done echo Data preparation and formatting completed for RT-03 echo "(but not MFCC extraction)" - diff --git a/egs/swbd/s5c/run.sh b/egs/swbd/s5c/run.sh index 0eafe73d046..8b08419007d 100755 --- a/egs/swbd/s5c/run.sh +++ b/egs/swbd/s5c/run.sh @@ -72,11 +72,16 @@ fi # local/eval2000_data_prep.sh /home/dpovey/data/LDC2002S09/hub5e_00 /home/dpovey/data/LDC2002T43 local/eval2000_data_prep.sh /export/corpora2/LDC/LDC2002S09/hub5e_00 /export/corpora2/LDC/LDC2002T43 +# prepare the rt03 data. Note: this isn't 100% necessary for this +# recipe, not all parts actually test using rt03. +local/rt03_data_prep.sh /export/corpora/LDC/LDC2007S10 + # Now make MFCC features. # mfccdir should be some place with a largish disk where you # want to store MFCC features. +if [ -e data/rt03 ]; then maybe_rt03=rt03; else maybe_rt03= ; fi mfccdir=mfcc -for x in train eval2000; do +for x in train eval2000 $maybe_rt03; do steps/make_mfcc.sh --nj 50 --cmd "$train_cmd" \ data/$x exp/make_mfcc/$x $mfccdir steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh index 111a68d9878..f7a18b4bfcf 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh @@ -2,7 +2,20 @@ # run_tdnn_1c.sh is like run_tdnn_1b.sh but changing chunk-width from 150 to # '140,110,160', and -# and --trainer.num-chunk-per-minibatch from 128 to 128,64 +# and --trainer.num-chunk-per-minibatch from 128 to 128,64. +# Not better; if anything a little worse. But could possibly be noise. + +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1b_sp_bi exp/chain_cleaned/tdnn1c_sp_bi +# System tdnn1b_sp_bi tdnn1c_sp_bi +# WER on dev(orig) 9.4 9.8 +# WER on dev(rescored) 8.8 9.0 +# WER on test(orig) 9.6 9.7 +# WER on test(rescored) 9.0 9.2 +# Final train prob -0.0870 -0.0942 +# Final valid prob -0.1147 -0.1108 +# Final train prob (xent) -1.4014 -1.4227 +# Final valid prob (xent) -1.5634 -1.4884 + # run_tdnn_1b.sh is like run_tdnn_1a.sh but upgrading to xconfig-based # config generation. diff --git a/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh index 6aff556c142..cff39def83b 100755 --- a/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh +++ b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh @@ -36,14 +36,28 @@ done echo -n "Final train prob " for x in $*; do - prob=$(grep Overall $x/log/compute_prob_train.combined.log | awk '{printf("%.4f", $8)}') + prob=$(grep Overall $x/log/compute_prob_train.combined.log | grep log-like | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo echo -n "Final valid prob " for x in $*; do - prob=$(grep Overall $x/log/compute_prob_valid.combined.log | awk '{printf("%.4f", $8)}') + prob=$(grep Overall $x/log/compute_prob_valid.combined.log | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "Final train acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.combined.log | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "Final valid acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.combined.log | grep accuracy | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh deleted file mode 100755 index 91ba913c183..00000000000 --- a/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh +++ /dev/null @@ -1,108 +0,0 @@ -#!/bin/bash - -# This is the standard "tdnn" system, built in nnet3; this script -# is the version that's meant to run with data-cleanup, that doesn't -# support parallel alignments. - - -# by default, with cleanup: -# local/nnet3/run_tdnn.sh - -# without cleanup: -# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & - - -set -e -o pipefail -u - -# First the options that are passed through to run_ivector_common.sh -# (some of which are also used in this script directly). -stage=0 -nj=30 -decode_nj=30 -min_seg_len=1.55 -train_set=train_cleaned -gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it - # should have alignments for the specified training data. -num_threads_ubm=32 -nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned -tdnn_affix= #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. - -# Options which are not passed through to run_ivector_common.sh -train_stage=-10 -splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" -remove_egs=true -relu_dim=850 -num_epochs=3 - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat </dev/null - for dset in dev test; do - ( - steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ - --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ - ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 - steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ - data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 - ) || touch $dir/.error & - done - wait - [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 -fi - - -exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh index 4fd74a71647..11bb733333d 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh @@ -144,9 +144,9 @@ if [ $stage -le 4 ]; then for decode_set in dev test; do num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` for iter in epoch$x epoch${x}_adj; do - + ( steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ - --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + --online-ivector-dir exp/nnet3_cleaned/ivectors_${decode_set}_hires \ $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_${iter} || exit 1; steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -154,6 +154,7 @@ if [ $stage -le 4 ]; then $dir/decode_${decode_set}_${iter} \ $dir/decode_${decode_set}_${iter}_rescore || exit 1; ) & + done done done fi diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index 3e375a1b863..69eb0f52e3b 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -26,15 +26,26 @@ def create_phone_lm(dir, tree_dir, run_opts, lm_opts=None): This method trains a phone LM for chain training using the alignments in "tree_dir" """ + try: + f = open(tree_dir + "/num_jobs", 'r') + num_ali_jobs = int(f.readline()) + assert num_ali_jobs > 0 + except: + raise Exception("""There was an error getting the number of alignment + jobs from {0}/num_jobs""".format(tree_dir)) + + alignments=' '.join(['{0}/ali.{1}.gz'.format(tree_dir, job) + for job in range(1, num_ali_jobs + 1)]) + common_lib.run_job( """{command} {dir}/log/make_phone_lm.log \ - chain-est-phone-lm {lm_opts} \ - "ark:gunzip -c {tree_dir}/ali.*.gz | \ - ali-to-phones {tree_dir}/final.mdl ark:- ark:- |" \ - {dir}/phone_lm.fst""".format( - command=run_opts.command, dir=dir, - lm_opts=lm_opts if lm_opts is not None else '', - tree_dir=tree_dir)) + gunzip -c {alignments} \| \ + ali-to-phones {tree_dir}/final.mdl ark:- ark:- \| \ + chain-est-phone-lm {lm_opts} ark:- {dir}/phone_lm.fst""".format( + command=run_opts.command, dir=dir, + alignments=alignments, + lm_opts=lm_opts if lm_opts is not None else '', + tree_dir=tree_dir)) def create_denominator_fst(dir, tree_dir, run_opts): @@ -119,7 +130,7 @@ def train_new_models(dir, iter, srand, num_jobs, l2_regularize, xent_regularize, leaky_hmm_coefficient, momentum, max_param_change, shuffle_buffer_size, num_chunk_per_minibatch_str, - frame_subsampling_factor, truncate_deriv_weights, + frame_subsampling_factor, cache_io_opts, run_opts): """ Called from train_one_iteration(), this method trains new models @@ -168,7 +179,6 @@ def train_new_models(dir, iter, srand, num_jobs, "{raw_model}" {dir}/den.fst \ "ark,bg:nnet3-chain-copy-egs \ --left-context={lc} --right-context={rc} \ - --truncate-deriv-weights={trunc_deriv} \ --frame-shift={fr_shft} \ ark:{egs_dir}/cegs.{archive_index}.ark ark:- | \ nnet3-chain-shuffle-egs --buffer-size={buf_size} \ @@ -181,7 +191,6 @@ def train_new_models(dir, iter, srand, num_jobs, next_iter=iter + 1, job=job, deriv_time_opts=" ".join(deriv_time_opts), lc=left_context, rc=right_context, - trunc_deriv=truncate_deriv_weights, app_deriv_wts=apply_deriv_weights, fr_shft=frame_shift, l2=l2_regularize, xent_reg=xent_regularize, leaky=leaky_hmm_coefficient, @@ -220,7 +229,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, l2_regularize, xent_regularize, leaky_hmm_coefficient, momentum, max_param_change, shuffle_buffer_size, - frame_subsampling_factor, truncate_deriv_weights, + frame_subsampling_factor, run_opts, background_process_handler=None): """ Called from steps/nnet3/chain/train.py for one iteration for neural network training with LF-MMI objective @@ -318,7 +327,6 @@ def train_one_iteration(dir, iter, srand, egs_dir, shuffle_buffer_size=shuffle_buffer_size, num_chunk_per_minibatch_str=cur_num_chunk_per_minibatch_str, frame_subsampling_factor=frame_subsampling_factor, - truncate_deriv_weights=truncate_deriv_weights, cache_io_opts=cache_io_opts, run_opts=run_opts) [models_to_average, best_model] = common_train_lib.get_successful_models( diff --git a/egs/wsj/s5/steps/nnet3/adjust_priors.sh b/egs/wsj/s5/steps/nnet3/adjust_priors.sh index 5a0d8454781..e8adb408590 100755 --- a/egs/wsj/s5/steps/nnet3/adjust_priors.sh +++ b/egs/wsj/s5/steps/nnet3/adjust_priors.sh @@ -2,12 +2,12 @@ . path.sh -# This script computes the DNN output averaged over a small subset of +# This script computes the DNN output averaged over a small subset of # training egs and stores it in post.$iter.vec. -# This is used for the purpose of adjusting the nnet priors. -# When --use-raw-nnet is false, then the computed priors is added into the -# nnet model; hence the term adjust priors. -# When --use-raw-nnet is true, the computed priors is not added into the +# This is used for the purpose of adjusting the nnet priors. +# When --use-raw-nnet is false, then the computed priors is added into the +# nnet model; hence the term adjust priors. +# When --use-raw-nnet is true, the computed priors is not added into the # nnet model and left in the file post.$iter.vec. cmd=run.pl @@ -16,9 +16,9 @@ num_jobs_compute_prior=10 # these are single-threaded, run on CPU. use_gpu=false # if true, we run on GPU. egs_type=egs # Compute from $egs_type.*.ark in $egs_dir # If --egs-type is degs, then the program - # nnet3-discriminative-compute-from-egs is used + # nnet3-discriminative-compute-from-egs is used # instead of nnet3-compute-from-egs. -use_raw_nnet=false # If raw nnet, the averaged posterior is computed +use_raw_nnet=false # If raw nnet, the averaged posterior is computed # and stored in post.$iter.vec; but there is no # adjusting of priors minibatch_size=256 @@ -45,43 +45,42 @@ else prior_queue_opt="" fi -for f in $egs_dir/$egs_type.1.ark $egs_dir/info/num_archives; do +for f in $egs_dir/$egs_type.1.ark $egs_dir/info/num_archives; do if [ ! -f $f ]; then - echo "$f not found" - exit 1 + echo "$f not found" + exit 1 fi done if $use_raw_nnet; then model=$dir/$iter.raw -else +else model="nnet3-am-copy --raw=true $dir/$iter.mdl - |" fi rm -f $dir/post.$iter.*.vec 2>/dev/null num_archives=$(cat $egs_dir/info/num_archives) || { echo "error: no such file $egs_dir/info/frames_per_eg"; exit 1; } -if [ $num_jobs_compute_prior -gt $num_archives ]; then +if [ $num_jobs_compute_prior -gt $num_archives ]; then num_jobs_compute_prior=$num_archives fi if [ $egs_type != "degs" ]; then $cmd JOB=1:$num_jobs_compute_prior $prior_queue_opt $dir/log/get_post.$iter.JOB.log \ - nnet3-copy-egs ark:$egs_dir/$egs_type.$egs_part.ark ark:- \| \ + nnet3-copy-egs ark:$egs_dir/$egs_type.$JOB.ark ark:- \| \ nnet3-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \ nnet3-merge-egs --minibatch-size=$minibatch_size ark:- ark:- \| \ nnet3-compute-from-egs $prior_gpu_opt --apply-exp=true \ "$model" ark:- ark:- \| \ matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$iter.JOB.vec || exit 1; -else +else $cmd JOB=1:$num_jobs_compute_prior $prior_queue_opt $dir/log/get_post.$iter.JOB.log \ - nnet3-discriminative-copy-egs ark:$egs_dir/$egs_type.$egs_part.ark ark:- \| \ + nnet3-discriminative-copy-egs ark:$egs_dir/degs.JOB.ark ark:- \| \ nnet3-discriminative-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \ nnet3-discriminative-merge-egs --minibatch-size=$minibatch_size ark:- ark:- \| \ - nnet3-compute-from-degs $prior_gpu_opt --apply-exp=true \ + nnet3-discriminative-compute-from-egs $prior_gpu_opt --apply-exp=true \ "$model" ark:- ark:- \| \ matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$iter.JOB.vec || exit 1; - fi sleep 3; # make sure there is time for $dir/post.$iter.*.vec to appear. @@ -95,4 +94,3 @@ if ! $use_raw_nnet; then fi rm -f $dir/post.$iter.*.vec; - diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index 9dd04f45d71..5f1b341048a 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -468,7 +468,6 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): max_param_change=args.max_param_change, shuffle_buffer_size=args.shuffle_buffer_size, frame_subsampling_factor=args.frame_subsampling_factor, - truncate_deriv_weights=args.truncate_deriv_weights, run_opts=run_opts, background_process_handler=background_process_handler) diff --git a/egs/wsj/s5/steps/nnet3/get_degs.sh b/egs/wsj/s5/steps/nnet3/get_degs.sh index 9fbaf73d82c..65704fe9894 100755 --- a/egs/wsj/s5/steps/nnet3/get_degs.sh +++ b/egs/wsj/s5/steps/nnet3/get_degs.sh @@ -159,6 +159,8 @@ case $feat_type in *) echo "Invalid feature type $feat_type" && exit 1; esac +cp $srcdir/{splice_opts,cmvn_opts} $dir 2>/dev/null || true + if [ ! -z "$transform_dir" ]; then echo "$0: using transforms from $transform_dir" [ ! -s $transform_dir/num_jobs ] && \ diff --git a/egs/wsj/s5/steps/nnet3/train_discriminative.sh b/egs/wsj/s5/steps/nnet3/train_discriminative.sh index 05203ff5166..eb1a616e9de 100755 --- a/egs/wsj/s5/steps/nnet3/train_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/train_discriminative.sh @@ -329,7 +329,7 @@ while [ $x -lt $num_iters ]; do --cmd "$cmd" --use-gpu false \ --minibatch-size $minibatch_size \ --use-raw-nnet false --iter epoch$e $dir $degs_dir \ - || { touch $dir/.error; echo "Error in adjusting priors. See $dir/log/adjust_priors.epoch$e.log"; exit 1; } + || { touch $dir/.error; echo "Error in adjusting priors. See errors above."; exit 1; } ) & fi diff --git a/egs/wsj/s5/utils/filter_scps.pl b/egs/wsj/s5/utils/filter_scps.pl index 07e59d6ba80..418f8f73e1b 100755 --- a/egs/wsj/s5/utils/filter_scps.pl +++ b/egs/wsj/s5/utils/filter_scps.pl @@ -165,5 +165,6 @@ print STDERR "filter_scps.pl: warning: some input lines did not get output\n"; } if ($warn_multiply_covered && $print_warnings) { - print STDERR "filter_scps.pl: warning: some input lines were output to multiple files [OK if splitting per utt]\n"; + print STDERR "filter_scps.pl: warning: some input lines were output to multiple files [OK if splitting per utt] " . + join(" ", @ARGV) . "\n"; } diff --git a/src/nnet3/discriminative-training.cc b/src/nnet3/discriminative-training.cc index 438a01aafd9..4a32236c9ff 100644 --- a/src/nnet3/discriminative-training.cc +++ b/src/nnet3/discriminative-training.cc @@ -594,7 +594,7 @@ void DiscriminativeObjectiveInfo::Print(const std::string &criterion, } else if (criterion == "mpfe") { double avg_gradients = (tot_num_count + tot_den_count) / tot_t_weighted; double objf = tot_objf / tot_t_weighted; - KALDI_LOG << "Average modulus of MPFE gradients is " << avg_gradients + KALDI_LOG << "Average num+den count of MPFE stats is " << avg_gradients << " per frame, over " << tot_t_weighted << " frames"; KALDI_LOG << "MPFE objective function is " << objf @@ -602,7 +602,7 @@ void DiscriminativeObjectiveInfo::Print(const std::string &criterion, } else if (criterion == "smbr") { double avg_gradients = (tot_num_count + tot_den_count) / tot_t_weighted; double objf = tot_objf / tot_t_weighted; - KALDI_LOG << "Average modulus of SMBR gradients is " << avg_gradients + KALDI_LOG << "Average num+den count of SMBR stats is " << avg_gradients << " per frame, over " << tot_t_weighted << " frames"; KALDI_LOG << "SMBR objective function is " << objf @@ -642,4 +642,3 @@ void DiscriminativeObjectiveInfo::PrintAvgGradientForPdf(int32 pdf_id) const { } // namespace discriminative } // namespace kaldi - From 13559c5448087f96d387382696d6defd68a13afa Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 1 Dec 2016 12:40:14 -0800 Subject: [PATCH 247/530] Update tools/Makefile to support OpenFst-1.5.4. --- tools/Makefile | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/Makefile b/tools/Makefile index 9fdc35da402..0f5af6c7452 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -4,10 +4,11 @@ CXX = g++ # CXX = clang++ # Uncomment this line to build with Clang. CC = gcc # used for sph2pipe - OPENFST_VERSION = 1.3.4 # Uncomment the next line to build with OpenFst-1.4.1. # OPENFST_VERSION = 1.4.1 +# Uncomment the next line to build with OpenFst-1.5.4. +# OPENFST_VERSION = 1.5.4 # Note: OpenFst >= 1.4 requires C++11 support, hence you will need to use a # relatively recent C++ compiler, e.g. gcc >= 4.6, clang >= 3.0. @@ -20,11 +21,11 @@ ifeq ($(OPENFST_VERSION), 1.3.4) CXXFLAGS += -stdlib=libstdc++ LDFLAGS += -stdlib=libstdc++ endif +else ifeq ($(OPENFST_VERSION), 1.4.1) +else ifeq ($(OPENFST_VERSION), 1.5.4) else - ifneq ($(OPENFST_VERSION), 1.4.1) $(error OpenFst version $(OPENFST_VERSION) is not supported. \ - Supported versions: 1.3.4, 1.4.1) - endif + Supported versions: 1.3.4, 1.4.1, 1.5.4) endif all: check_required_programs sph2pipe atlas sclite openfst @@ -92,12 +93,14 @@ else cd openfst-$(OPENFST_VERSION)/; ./configure --prefix=`pwd` --enable-static --enable-shared --enable-far --enable-ngram-fsts CXX=$(CXX) CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" LIBS="-ldl" endif -# patches for openfst. openfst_gcc41up.patch is a patch for openfst to \ -# support multi-threads when compile with g++ (gcc) version above 4.1 +# patches for openfst. openfst_gcc41up.patch is a patch for openfst to +# support multi-threading when compiling with gcc >= 4.1. openfst-$(OPENFST_VERSION)/.patched: | openfst-$(OPENFST_VERSION) +ifneq ($(OPENFST_VERSION), 1.5.4) cd openfst-$(OPENFST_VERSION)/; \ patch -p1 -N < ../extras/openfst-$(OPENFST_VERSION).patch; $(CXX) -dumpversion | awk '{if(NR==1 && $$1>"4.1") print "cd openfst-$(OPENFST_VERSION)/src/include/fst; patch -c -p0 -N < ../../../../extras/openfst_gcc41up.patch"}' | sh - +endif touch $@ openfst-$(OPENFST_VERSION): openfst-$(OPENFST_VERSION).tar.gz From 80ccb9bef199a56232e67adb086de044342a7857 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 1 Dec 2016 13:26:49 -0800 Subject: [PATCH 248/530] Update src/configure to support OpenFst-1.5.4. --- src/configure | 40 +++++++++++++++++------------------ src/makefiles/darwin_10_10.mk | 3 ++- src/makefiles/darwin_10_11.mk | 3 ++- src/makefiles/darwin_10_12.mk | 3 ++- src/makefiles/darwin_10_9.mk | 3 ++- 5 files changed, 28 insertions(+), 24 deletions(-) diff --git a/src/configure b/src/configure index d4122f1808e..ae5f8ddb2df 100755 --- a/src/configure +++ b/src/configure @@ -852,20 +852,6 @@ if [ ! -f makefiles/common.mk ]; then failure makefiles/common.mk not found fi - -echo "Checking OpenFST library in $FSTROOT ..." -if [ ! -f $FSTROOT/include/fst/fst.h ]; then - failure "Could not find file $FSTROOT/include/fst/fst.h: - you may not have installed OpenFst. See ../tools/INSTALL" -fi -echo Checking OpenFst library was patched. -if ! grep "multiple repeated" $FSTROOT/include/fst/minimize.h >/dev/null; then - echo "** ERROR **" - echo "** $FSTROOT/include/fst/minimize.h seems not to be patched:" - echo "patch not applied? FST tools will not work in our recipe." - exit 1; -fi - # back up the old one in case we modified it if [ -f kaldi.mk ]; then echo "Backing up kaldi.mk to kaldi.mk.bak" @@ -882,15 +868,29 @@ fi echo "CONFIGURE_VERSION := $CONFIGURE_VERSION" >> kaldi.mk echo "FSTROOT = $FSTROOT" >> kaldi.mk -# Check installed OpenFst version and add C++11 flags if OpenFst >= 1.4 +echo "Checking OpenFST library in $FSTROOT ..." +if [ ! -f $FSTROOT/include/fst/fst.h ]; then + failure "Could not find file $FSTROOT/include/fst/fst.h: + you may not have installed OpenFst. See ../tools/INSTALL" +fi + OPENFST_VER="${OPENFST_VER:-`grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::'`}" +echo "Adding flags necessary for compiling against OpenFst-$OPENFST_VER ..." echo "OPENFST_VER = $OPENFST_VER" >> kaldi.mk OPENFST_VER_NUM=`echo $OPENFST_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d"` +echo "EXTRA_CXXFLAGS += -DOPENFST_VER=$OPENFST_VER_NUM" >> kaldi.mk if [ $OPENFST_VER_NUM -ge 10400 ]; then - echo "OPENFST_GE_10400 = 1" >> kaldi.mk - echo "EXTRA_CXXFLAGS += -DHAVE_OPENFST_GE_10400 -std=c++0x" >> kaldi.mk -else - echo "OPENFST_GE_10400 = 0" >> kaldi.mk + echo "EXTRA_CXXFLAGS += -std=c++0x" >> kaldi.mk +fi + +if [ $OPENFST_VER_NUM -lt 10500 ]; then + echo "Checking if OpenFst library was patched ..." + if ! grep "multiple repeated" $FSTROOT/include/fst/minimize.h >/dev/null; then + echo "** ERROR **" + echo "** $FSTROOT/include/fst/minimize.h seems not to be patched:" + echo "patch not applied? FST tools will not work in our recipe." + exit 1; + fi fi # Most of the OS-specific steps below will append to kaldi.mk @@ -900,7 +900,6 @@ echo "Doing OS specific configurations ..." # which crashes on Darwin. Also the linear algebra libraries on Macs are # used differently (through the Accelerate framework) than on Linux. if [ "`uname`" == "Darwin" ]; then - $use_cuda && configure_cuda echo "On Darwin: checking for Accelerate framework ..." if [ ! -e /System/Library/Frameworks/Accelerate.framework ]; then failure "Need the Accelerate.framework to compile on Darwin." @@ -946,6 +945,7 @@ if [ "`uname`" == "Darwin" ]; then else failure "OS X version '$osx_ver' not supported" fi + $use_cuda && configure_cuda echo "Configuration succeeded for platform Darwin." exit_success; fi diff --git a/src/makefiles/darwin_10_10.mk b/src/makefiles/darwin_10_10.mk index dcb35b0c59e..77d82708b1e 100644 --- a/src/makefiles/darwin_10_10.mk +++ b/src/makefiles/darwin_10_10.mk @@ -33,7 +33,8 @@ COMPILER = $(shell $(CXX) -v 2>&1 ) ifeq ($(findstring clang,$(COMPILER)),clang) CXXFLAGS += -Wno-mismatched-tags # Link with libstdc++ if we are building against OpenFst < 1.4 - ifneq ("$(OPENFST_GE_10400)","1") + OPENFST_VER_NUM := $(shell echo $(OPENFST_VER) | sed 's/\./ /g' | xargs printf "%d%02d%02d") + ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10400)","1") CXXFLAGS += -stdlib=libstdc++ LDFLAGS += -stdlib=libstdc++ endif diff --git a/src/makefiles/darwin_10_11.mk b/src/makefiles/darwin_10_11.mk index 73cd006735e..c3b11a49cfc 100644 --- a/src/makefiles/darwin_10_11.mk +++ b/src/makefiles/darwin_10_11.mk @@ -33,7 +33,8 @@ COMPILER = $(shell $(CXX) -v 2>&1 ) ifeq ($(findstring clang,$(COMPILER)),clang) CXXFLAGS += -Wno-mismatched-tags # Link with libstdc++ if we are building against OpenFst < 1.4 - ifneq ("$(OPENFST_GE_10400)","1") + OPENFST_VER_NUM := $(shell echo $(OPENFST_VER) | sed 's/\./ /g' | xargs printf "%d%02d%02d") + ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10400)","1") CXXFLAGS += -stdlib=libstdc++ LDFLAGS += -stdlib=libstdc++ endif diff --git a/src/makefiles/darwin_10_12.mk b/src/makefiles/darwin_10_12.mk index 68f50f01d51..46e05cc3427 100644 --- a/src/makefiles/darwin_10_12.mk +++ b/src/makefiles/darwin_10_12.mk @@ -33,7 +33,8 @@ COMPILER = $(shell $(CXX) -v 2>&1 ) ifeq ($(findstring clang,$(COMPILER)),clang) CXXFLAGS += -Wno-mismatched-tags # Link with libstdc++ if we are building against OpenFst < 1.4 - ifneq ("$(OPENFST_GE_10400)","1") + OPENFST_VER_NUM := $(shell echo $(OPENFST_VER) | sed 's/\./ /g' | xargs printf "%d%02d%02d") + ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10400)","1") CXXFLAGS += -stdlib=libstdc++ LDFLAGS += -stdlib=libstdc++ endif diff --git a/src/makefiles/darwin_10_9.mk b/src/makefiles/darwin_10_9.mk index 0069372c8ef..f3e8817503e 100644 --- a/src/makefiles/darwin_10_9.mk +++ b/src/makefiles/darwin_10_9.mk @@ -33,7 +33,8 @@ COMPILER = $(shell $(CXX) -v 2>&1 ) ifeq ($(findstring clang,$(COMPILER)),clang) CXXFLAGS += -Wno-mismatched-tags # Link with libstdc++ if we are building against OpenFst < 1.4 - ifneq ("$(OPENFST_GE_10400)","1") + OPENFST_VER_NUM := $(shell echo $(OPENFST_VER) | sed 's/\./ /g' | xargs printf "%d%02d%02d") + ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10400)","1") CXXFLAGS += -stdlib=libstdc++ LDFLAGS += -stdlib=libstdc++ endif From d01acbf286bbb684e96bbbe199f20f8c2736c07f Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 1 Dec 2016 13:29:57 -0800 Subject: [PATCH 249/530] Fix unqualified std::vector occurrences. --- src/decoder/nbest-decoder.h | 18 +++++++------- src/kws/kws-functions.cc | 16 ++++++------- src/kws/kws-functions.h | 6 ++--- src/kwsbin/lattice-to-kws-index.cc | 18 +++++++------- src/lat/lattice-functions.h | 9 ++++--- src/lat/sausages.cc | 30 ++++++++++++------------ src/latbin/lattice-oracle.cc | 6 ++--- src/latbin/lattice-rescore-mapped.cc | 12 +++++----- src/latbin/nbest-to-linear.cc | 12 +++++----- src/nnet2bin/nnet-am-average.cc | 7 +++--- src/nnet2bin/nnet-normalize-stddev.cc | 14 +++++------ src/nnetbin/nnet-train-mmi-sequential.cc | 2 +- src/nnetbin/nnet-train-mpe-sequential.cc | 2 +- src/online2/online-ivector-feature.cc | 4 ++-- 14 files changed, 77 insertions(+), 79 deletions(-) diff --git a/src/decoder/nbest-decoder.h b/src/decoder/nbest-decoder.h index 8db071d6591..daecc84e7b2 100644 --- a/src/decoder/nbest-decoder.h +++ b/src/decoder/nbest-decoder.h @@ -179,7 +179,7 @@ class NBestDecoder { continue; // skip that token } LatticeWeight path_w(lmscore, amscore); - CompactLatticeWeight path_weight(path_w, vector()); + CompactLatticeWeight path_weight(path_w, std::vector()); std::vector arcs_reverse; // reverse order output arcs // outer loop for word tokens @@ -230,8 +230,8 @@ class NBestDecoder { // ShortestPath(fst, &fst_one); // ConvertLattice(fst_one, fst_out, true); // return true; - // } - + // } + private: // TokenStore is a store of linked tokens with its own allocator @@ -388,7 +388,7 @@ class NBestDecoder { return tok2; } } - + inline bool CombineN(Elem *head, Token *new_tok) { // n-best version if (!new_tok) return false; Elem *e = head; @@ -435,7 +435,7 @@ class NBestDecoder { } inline Token* Advance(Token *source, Arc &arc, int32 frame, BaseFloat cutoff) { - // compute new weight + // compute new weight Weight w = Times(source->c, arc.weight); Weight amscore = Weight::One(); if (arc.ilabel > 0) { // emitting arc @@ -446,7 +446,7 @@ class NBestDecoder { if (w.Value() > cutoff) { // prune return NULL; } - // create new token + // create new token Token *tok; if (arc.olabel > 0) { // create new token // find or create corresponding Token @@ -593,10 +593,10 @@ class NBestDecoder { // KALDI_ASSERT(state == tok->arc_.nextstate); for (fst::ArcIterator > aiter(fst_, state); !aiter.Done(); aiter.Next()) { - // for all a in A(state) + // for all a in A(state) Arc arc = aiter.Value(); if (arc.ilabel != 0) { // propagate only emitting - Token *new_tok = + Token *new_tok = token_store_.Advance(tok, arc, frame, next_weight_cutoff); if (new_tok) { Elem *e_found = toks_.Find(arc.nextstate); @@ -637,7 +637,7 @@ class NBestDecoder { queue_.erase(queue_.begin()); Elem *elem = toks_.Find(state); // would segfault if state not // in toks_ but this can't happen. - + // we have to pop all tokens with the same state // this may create some unneccessary repetitions, since only the new token // needs to be forwarded, but I don't know yet how to solve this diff --git a/src/kws/kws-functions.cc b/src/kws/kws-functions.cc index 26645ee92cb..f6b6367d82b 100644 --- a/src/kws/kws-functions.cc +++ b/src/kws/kws-functions.cc @@ -38,12 +38,12 @@ bool CompareInterval(const Interval &i1, } bool ClusterLattice(CompactLattice *clat, - const vector &state_times) { + const std::vector &state_times) { using namespace fst; typedef CompactLattice::StateId StateId; // Hashmap to store the cluster heads. - unordered_map > head; + unordered_map > head; // Step 1: Iterate over the lattice to get the arcs StateId max_id = 0; @@ -69,11 +69,11 @@ bool ClusterLattice(CompactLattice *clat, // the cluster heads is to take the first one as a cluster head; then go // till we find the next one that doesn't overlap in time with the current // cluster head, and so on. - unordered_map >::iterator iter; + unordered_map >::iterator iter; for (iter = head.begin(); iter != head.end(); ++iter) { // For this ilabel, sort all the arcs on time, from first to last. sort(iter->second.begin(), iter->second.end(), CompareInterval); - vector tmp; + std::vector tmp; tmp.push_back(iter->second[0]); for (int32 i = 1; i < iter->second.size(); i++) { if (tmp.back().End() <= iter->second[i].Start()) @@ -145,7 +145,7 @@ class CompactLatticeToKwsProductFstMapper { bool CreateFactorTransducer(const CompactLattice &clat, - const vector &state_times, + const std::vector &state_times, int32 utterance_id, KwsProductFst *factor_transducer) { using namespace fst; @@ -153,8 +153,8 @@ bool CreateFactorTransducer(const CompactLattice &clat, // We first compute the alphas and betas bool success = false; - vector alpha; - vector beta; + std::vector alpha; + std::vector beta; success = ComputeCompactLatticeAlphas(clat, &alpha); success = success && ComputeCompactLatticeBetas(clat, &beta); if (!success) @@ -248,7 +248,7 @@ bool CreateFactorTransducer(const CompactLattice &clat, } void RemoveLongSilences(int32 max_silence_frames, - const vector &state_times, + const std::vector &state_times, KwsProductFst *factor_transducer) { using namespace fst; typedef KwsProductArc::StateId StateId; diff --git a/src/kws/kws-functions.h b/src/kws/kws-functions.h index e13e99f38ae..9d6424fb2b0 100644 --- a/src/kws/kws-functions.h +++ b/src/kws/kws-functions.h @@ -62,7 +62,7 @@ bool CompareInterval(const Interval &i1, // It puts disambiguating symbols in the olabels, leaving the words on the // ilabels. bool ClusterLattice(CompactLattice *clat, - const vector &state_times); + const std::vector &state_times); // This function contains two steps: weight pushing and factor generation. The // original ShortestDistance() is not very efficient, so we do the weight @@ -70,7 +70,7 @@ bool ClusterLattice(CompactLattice *clat, // factor generation step expand the lattice to the LXTXT' semiring, with // additional start state and end state (and corresponding arcs) added. bool CreateFactorTransducer(const CompactLattice &clat, - const vector &state_times, + const std::vector &state_times, int32 utterance_id, KwsProductFst *factor_transducer); @@ -81,7 +81,7 @@ bool CreateFactorTransducer(const CompactLattice &clat, // step, so the "search area" is limited to the original arcs before factor // generation. void RemoveLongSilences(int32 max_silence_frames, - const vector &state_times, + const std::vector &state_times, KwsProductFst *factor_transducer); // Do the factor merging part: encode input and output, and apply weighted diff --git a/src/kwsbin/lattice-to-kws-index.cc b/src/kwsbin/lattice-to-kws-index.cc index c635fe63736..b5ec577dc6d 100644 --- a/src/kwsbin/lattice-to-kws-index.cc +++ b/src/kwsbin/lattice-to-kws-index.cc @@ -110,10 +110,10 @@ int main(int argc, char *argv[]) { n_fail++; continue; } - } + } // Get the alignments - vector state_times; + std::vector state_times; CompactLatticeStateTimes(clat, &state_times); // Cluster the arcs in the CompactLattice, write the cluster_id on the @@ -145,9 +145,9 @@ int main(int argc, char *argv[]) { EnsureEpsilonProperty(&clat); fst::TopSort(&clat); // We have to recompute the state times because they will have changed. - CompactLatticeStateTimes(clat, &state_times); + CompactLatticeStateTimes(clat, &state_times); } - + // Generate factor transducer // CreateFactorTransducer() corresponds to the "Factor Generation" part of // Dogan and Murat's paper. But we also move the weight pushing step to @@ -158,7 +158,7 @@ int main(int argc, char *argv[]) { success = CreateFactorTransducer(clat, state_times, utterance_id, &factor_transducer); if (!success) { KALDI_WARN << "Cannot generate factor transducer for lattice " << key; - n_fail++; + n_fail++; } MaybeDoSanityCheck(factor_transducer); @@ -178,7 +178,7 @@ int main(int argc, char *argv[]) { DoFactorMerging(&factor_transducer, &index_transducer); MaybeDoSanityCheck(index_transducer); - + // Do factor disambiguation. It corresponds to the "Factor Disambiguation" // step in Dogan and Murat's paper. KALDI_VLOG(1) << "Doing factor disambiguation..."; @@ -191,10 +191,10 @@ int main(int argc, char *argv[]) { KALDI_VLOG(1) << "Optimizing factor transducer..."; OptimizeFactorTransducer(&index_transducer, max_states, allow_partial); - MaybeDoSanityCheck(index_transducer); - + MaybeDoSanityCheck(index_transducer); + // Write result - index_writer.Write(key, index_transducer); + index_writer.Write(key, index_transducer); n_done++; } diff --git a/src/lat/lattice-functions.h b/src/lat/lattice-functions.h index c58b2ec32b8..c95af70d7eb 100644 --- a/src/lat/lattice-functions.h +++ b/src/lat/lattice-functions.h @@ -67,12 +67,12 @@ BaseFloat LatticeForwardBackward(const Lattice &lat, // the CompactLattice lattice format. Also we only need the alpha in the forward // path, not the posteriors. bool ComputeCompactLatticeAlphas(const CompactLattice &lat, - vector *alpha); + std::vector *alpha); // A sibling of the function CompactLatticeAlphas()... We compute the beta from // the backward path here. bool ComputeCompactLatticeBetas(const CompactLattice &lat, - vector *beta); + std::vector *beta); // Computes (normal or Viterbi) alphas and betas; returns (total-prob, or @@ -82,8 +82,8 @@ bool ComputeCompactLatticeBetas(const CompactLattice &lat, template double ComputeLatticeAlphasAndBetas(const LatticeType &lat, bool viterbi, - vector *alpha, - vector *beta); + std::vector *alpha, + std::vector *beta); /// Topologically sort the compact lattice if not already topologically sorted. @@ -321,4 +321,3 @@ void ComposeCompactLatticeDeterministic( } // namespace kaldi #endif // KALDI_LAT_LATTICE_FUNCTIONS_H_ - diff --git a/src/lat/sausages.cc b/src/lat/sausages.cc index 53678efe844..e6fd0b61dd9 100644 --- a/src/lat/sausages.cc +++ b/src/lat/sausages.cc @@ -25,7 +25,7 @@ namespace kaldi { // this is Figure 6 in the paper. void MinimumBayesRisk::MbrDecode() { - + for (size_t counter = 0; ; counter++) { NormalizeEps(&R_); AccStats(); // writes to gamma_ @@ -33,13 +33,13 @@ void MinimumBayesRisk::MbrDecode() { one_best_times_.clear(); one_best_confidences_.clear(); - + // Caution: q in the line below is (q-1) in the algorithm // in the paper; both R_ and gamma_ are indexed by q-1. for (size_t q = 0; q < R_.size(); q++) { - if (do_mbr_) { // This loop updates R_ [indexed same as gamma_]. + if (do_mbr_) { // This loop updates R_ [indexed same as gamma_]. // gamma_[i] is sorted in reverse order so most likely one is first. - const vector > &this_gamma = gamma_[q]; + const std::vector > &this_gamma = gamma_[q]; double old_gamma = 0, new_gamma = this_gamma[0].second; int32 rq = R_[q], rhat = this_gamma[0].first; // rq: old word, rhat: new. for (size_t j = 0; j < this_gamma.size(); j++) @@ -71,7 +71,7 @@ void MinimumBayesRisk::MbrDecode() { struct Int32IsZero { bool operator() (int32 i) { return (i == 0); } }; -// static +// static void MinimumBayesRisk::RemoveEps(std::vector *vec) { Int32IsZero pred; vec->erase(std::remove_if (vec->begin(), vec->end(), pred), @@ -96,7 +96,7 @@ double MinimumBayesRisk::EditDistance(int32 N, int32 Q, Vector &alpha_dash_arc) { alpha(1) = 0.0; // = log(1). Line 5. alpha_dash(1, 0) = 0.0; // Line 5. - for (int32 q = 1; q <= Q; q++) + for (int32 q = 1; q <= Q; q++) alpha_dash(1, q) = alpha_dash(1, q-1) + l(0, r(q)); // Line 7. for (int32 n = 2; n <= N; n++) { double alpha_n = kLogZeroDouble; @@ -132,7 +132,7 @@ double MinimumBayesRisk::EditDistance(int32 N, int32 Q, // Figure 5 in the paper. void MinimumBayesRisk::AccStats() { using std::map; - + int32 N = static_cast(pre_.size()) - 1, Q = static_cast(R_.size()); @@ -141,8 +141,8 @@ void MinimumBayesRisk::AccStats() { Vector alpha_dash_arc(Q+1); // index 0...Q Matrix beta_dash(N+1, Q+1); // index (1...N, 0...Q) Vector beta_dash_arc(Q+1); // index 0...Q - vector b_arc(Q+1); // integer in {1,2,3}; index 1...Q - vector > gamma(Q+1); // temp. form of gamma. + std::vector b_arc(Q+1); // integer in {1,2,3}; index 1...Q + std::vector > gamma(Q+1); // temp. form of gamma. // index 1...Q [word] -> occ. // The tau arrays below are the sums over words of the tau_b @@ -151,7 +151,7 @@ void MinimumBayesRisk::AccStats() { // the sausage bins, not specifically for the 1-best output. Vector tau_b(Q+1), tau_e(Q+1); - double Ltmp = EditDistance(N, Q, alpha, alpha_dash, alpha_dash_arc); + double Ltmp = EditDistance(N, Q, alpha, alpha_dash, alpha_dash_arc); if (L_ != 0 && Ltmp > L_) { // L_ != 0 is to rule out 1st iter. KALDI_WARN << "Edit distance increased: " << Ltmp << " > " << L_; @@ -262,7 +262,7 @@ void MinimumBayesRisk::AccStats() { double avg = 0.5 * (times_[q-2].second + times_[q-1].first); times_[q-2].second = times_[q-1].first = avg; } - } + } } void MinimumBayesRisk::PrepareLatticeAndInitStats(CompactLattice *clat) { @@ -271,7 +271,7 @@ void MinimumBayesRisk::PrepareLatticeAndInitStats(CompactLattice *clat) { CreateSuperFinal(clat); // Add super-final state to clat... this is // one of the requirements of the MBR algorithm, as mentioned in the // paper (i.e. just one final state). - + // Topologically sort the lattice, if not already sorted. kaldi::uint64 props = clat->Properties(fst::kFstProperties, false); if (!(props & fst::kTopSorted)) { @@ -283,7 +283,7 @@ void MinimumBayesRisk::PrepareLatticeAndInitStats(CompactLattice *clat) { state_times_.push_back(0); // we'll convert to 1-based numbering. for (size_t i = state_times_.size()-1; i > 0; i--) state_times_[i] = state_times_[i-1]; - + // Now we convert the information in "clat" into a special internal // format (pre_, post_ and arcs_) which allows us to access the // arcs preceding any given state. @@ -343,9 +343,9 @@ MinimumBayesRisk::MinimumBayesRisk(const CompactLattice &clat_in, bool do_mbr): L_ = 0.0; // Set current edit-distance to 0 [just so we know // when we're on the 1st iter.] } - + MbrDecode(); - + } MinimumBayesRisk::MinimumBayesRisk(const CompactLattice &clat_in, diff --git a/src/latbin/lattice-oracle.cc b/src/latbin/lattice-oracle.cc index 799a7f6ce67..80c4e3e05d4 100644 --- a/src/latbin/lattice-oracle.cc +++ b/src/latbin/lattice-oracle.cc @@ -67,7 +67,7 @@ void MapWildCards(const LabelSet &wildcards, fst::StdVectorFst *ofst) { LabelSet::const_iterator it = wildcards.find(arc.ilabel); if (it != wildcards.end()) { KALDI_VLOG(4) << "MapWildCards: mapping symbol " << arc.ilabel - << " to epsilon" << endl; + << " to epsilon" << std::endl; arc.ilabel = 0; } it = wildcards.find(arc.olabel); @@ -173,7 +173,7 @@ void CountErrors(const fst::StdVectorFst &fst, bool CheckFst(const fst::StdVectorFst &fst, string name, string key) { #ifdef DEBUG StateId numstates = fst.NumStates(); - cerr << " " << name << " has " < lattice_fst; diff --git a/src/latbin/lattice-rescore-mapped.cc b/src/latbin/lattice-rescore-mapped.cc index 4dd8dfd875c..9dcc63219ee 100644 --- a/src/latbin/lattice-rescore-mapped.cc +++ b/src/latbin/lattice-rescore-mapped.cc @@ -1,7 +1,7 @@ // latbin/lattice-rescore-mapped.cc // Copyright 2009-2012 Saarland University (author: Arnab Ghoshal) -// Johns Hopkins University (author: Daniel Povey) +// Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // @@ -91,7 +91,7 @@ int main(int argc, char *argv[]) { "Usage: lattice-rescore-mapped [options] " " \n" " e.g.: nnet-logprob [args] .. | lattice-rescore-mapped final.mdl ark:1.lats ark:- ark:2.lats\n"; - + kaldi::BaseFloat old_acoustic_scale = 0.0; kaldi::ParseOptions po(usage); po.Register("old-acoustic-scale", &old_acoustic_scale, @@ -116,12 +116,12 @@ int main(int argc, char *argv[]) { trans_model.Read(ki.Stream(), binary); // Ignore what follows it in the model. } - + RandomAccessBaseFloatMatrixReader loglike_reader(loglike_rspecifier); // Read as regular lattice SequentialLatticeReader lattice_reader(lats_rspecifier); // Write as compact lattice. - CompactLatticeWriter compact_lattice_writer(lats_wspecifier); + CompactLatticeWriter compact_lattice_writer(lats_wspecifier); int32 num_done = 0, num_err = 0; int64 num_frames = 0; @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) { KALDI_ERR << "Cycles detected in lattice."; } - vector state_times; + std::vector state_times; int32 max_time = kaldi::LatticeStateTimes(lat, &state_times); const Matrix &log_likes = loglike_reader.Value(key); if (log_likes.NumRows() != max_time) { @@ -154,7 +154,7 @@ int main(int argc, char *argv[]) { num_err++; continue; } - + kaldi::LatticeAcousticRescore(trans_model, log_likes, state_times, &lat); CompactLattice clat_out; diff --git a/src/latbin/nbest-to-linear.cc b/src/latbin/nbest-to-linear.cc index 79da978e086..6b3fe5e1d01 100644 --- a/src/latbin/nbest-to-linear.cc +++ b/src/latbin/nbest-to-linear.cc @@ -40,7 +40,7 @@ int main(int argc, char *argv[]) { "[ [ []]]\n" " e.g.: lattice-to-nbest --n=10 ark:1.lats ark:- | \\\n" " nbest-to-linear ark:1.lats ark,t:1.ali ark,t:1.tra\n"; - + ParseOptions po(usage); po.Read(argc, argv); @@ -62,17 +62,17 @@ int main(int argc, char *argv[]) { Int32VectorWriter trans_writer(trans_wspecifier); BaseFloatWriter lm_cost_writer(lm_cost_wspecifier); BaseFloatWriter ac_cost_writer(ac_cost_wspecifier); - + int32 n_done = 0, n_err = 0; - + for (; !lattice_reader.Done(); lattice_reader.Next()) { std::string key = lattice_reader.Key(); Lattice lat = lattice_reader.Value(); - vector ilabels; - vector olabels; + std::vector ilabels; + std::vector olabels; LatticeWeight weight; - + if (!GetLinearSymbolSequence(lat, &ilabels, &olabels, &weight)) { KALDI_WARN << "Lattice/nbest for key " << key << " had wrong format: " "note, this program expects input with one path, e.g. from " diff --git a/src/nnet2bin/nnet-am-average.cc b/src/nnet2bin/nnet-am-average.cc index 0fa00f05995..d35375f44f2 100644 --- a/src/nnet2bin/nnet-am-average.cc +++ b/src/nnet2bin/nnet-am-average.cc @@ -29,7 +29,7 @@ namespace kaldi { void GetWeights(const std::string &weights_str, int32 num_inputs, - vector *weights) { + std::vector *weights) { KALDI_ASSERT(num_inputs >= 1); if (!weights_str.empty()) { SplitStringToFloats(weights_str, ":", true, weights); @@ -169,7 +169,7 @@ int main(int argc, char *argv[]) { int32 num_inputs = po.NumArgs() - 1; - vector model_weights; + std::vector model_weights; GetWeights(weights_str, num_inputs, &model_weights); int32 c_begin = 0, @@ -179,7 +179,7 @@ int main(int argc, char *argv[]) { KALDI_ASSERT(c_end != -1 && "Network has no updatable components."); int32 last_layer_idx = am_nnet1.GetNnet().NumComponents(); - vector skip_layers = GetSkipLayers(skip_layers_str, + std::vector skip_layers = GetSkipLayers(skip_layers_str, 0, last_layer_idx); @@ -257,4 +257,3 @@ int main(int argc, char *argv[]) { return -1; } } - diff --git a/src/nnet2bin/nnet-normalize-stddev.cc b/src/nnet2bin/nnet-normalize-stddev.cc index 29e3cf8fb80..b23faef5fc1 100644 --- a/src/nnet2bin/nnet-normalize-stddev.cc +++ b/src/nnet2bin/nnet-normalize-stddev.cc @@ -47,13 +47,13 @@ int main(int argc, char *argv[]) { bool binary_write = true; BaseFloat stddev = 1.0; std::string reference_model_filename; - + ParseOptions po(usage); po.Register("binary", &binary_write, "Write output in binary mode"); po.Register("stddev-from", &reference_model_filename, "Reference model"); po.Register("stddev", &stddev, "Target standard deviation that we normalize " "to (note: is overridden by --stddev-from option, if supplied)"); - + po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -77,7 +77,7 @@ int main(int argc, char *argv[]) { // Works out the layers that we would like to normalize: any affine or block // affine layers that are followed by pnorm and then renormalize layers. - vector identified_components; + std::vector identified_components; for (int32 c = 0; c < am_nnet.GetNnet().NumComponents() - 2; c++) { // Checks if the current layer is an affine layer or block affine layer. // Also includes PreconditionedAffineComponent and @@ -89,13 +89,13 @@ int main(int argc, char *argv[]) { dynamic_cast(component); if (ac == NULL && bac == NULL) continue; - + // Checks if the next layer is a pnorm layer. component = &(am_nnet.GetNnet().GetComponent(c + 1)); PnormComponent *pc = dynamic_cast(component); if (pc == NULL) continue; - + // Checks if the layer after the pnorm layer is a NormalizeComponent // or a PowerComponent followed by a NormalizeComponent component = &(am_nnet.GetNnet().GetComponent(c + 2)); @@ -126,7 +126,7 @@ int main(int argc, char *argv[]) { } BaseFloat ref_stddev = 0.0; - + // Normalizes the identified layers. for (int32 c = 0; c < identified_components.size(); c++) { ref_stddev = stddev; @@ -150,7 +150,7 @@ int main(int argc, char *argv[]) { KALDI_ASSERT(uc != NULL); Vector params(uc->GetParameterDim()); uc->Vectorize(¶ms); - BaseFloat params_average = params.Sum() + BaseFloat params_average = params.Sum() / static_cast(params.Dim()); params.Add(-1.0 * params_average); BaseFloat params_stddev = sqrt(VecVec(params, params) diff --git a/src/nnetbin/nnet-train-mmi-sequential.cc b/src/nnetbin/nnet-train-mmi-sequential.cc index 02a94ff3979..2554d64287a 100644 --- a/src/nnetbin/nnet-train-mmi-sequential.cc +++ b/src/nnetbin/nnet-train-mmi-sequential.cc @@ -272,7 +272,7 @@ int main(int argc, char *argv[]) { } } // get the lattice length and times of states, - vector state_times; + std::vector state_times; int32 max_time = kaldi::LatticeStateTimes(den_lat, &state_times); // check duration of den. lattice, if (max_time != mat.NumRows()) { diff --git a/src/nnetbin/nnet-train-mpe-sequential.cc b/src/nnetbin/nnet-train-mpe-sequential.cc index 76b4110ca28..2ba14527142 100644 --- a/src/nnetbin/nnet-train-mpe-sequential.cc +++ b/src/nnetbin/nnet-train-mpe-sequential.cc @@ -276,7 +276,7 @@ int main(int argc, char *argv[]) { } } // get the lattice length and times of states - vector state_times; + std::vector state_times; int32 max_time = kaldi::LatticeStateTimes(den_lat, &state_times); // check for temporal length of denominator lattices if (max_time != mat.NumRows()) { diff --git a/src/online2/online-ivector-feature.cc b/src/online2/online-ivector-feature.cc index fcdab88408e..cdfc5948571 100644 --- a/src/online2/online-ivector-feature.cc +++ b/src/online2/online-ivector-feature.cc @@ -387,7 +387,7 @@ OnlineSilenceWeighting::OnlineSilenceWeighting( const OnlineSilenceWeightingConfig &config): trans_model_(trans_model), config_(config), num_frames_output_and_correct_(0) { - vector silence_phones; + std::vector silence_phones; SplitStringToIntegers(config.silence_phones_str, ":,", false, &silence_phones); for (size_t i = 0; i < silence_phones.size(); i++) @@ -514,7 +514,7 @@ void OnlineSilenceWeighting::GetDeltaWeights( frames_out = static_cast(frame_info_.size()) - begin_frame; // frames_out is the number of frames we will output. KALDI_ASSERT(frames_out >= 0); - vector frame_weight(frames_out, 1.0); + std::vector frame_weight(frames_out, 1.0); // we will frame_weight to the value silence_weight for silence frames and for // transition-ids that repeat with duration > max_state_duration. Frames newer // than the most recent traceback will get a weight equal to the weight for the From 4a8aaa8e37d2452397f03dcf4e82493597c61939 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 1 Dec 2016 14:11:26 -0800 Subject: [PATCH 250/530] Update fstext to support OpenFst-1.5.4. OpenFst-1.5 replaces internal custom reference counting (RefCounter) with C++11 smart pointers. This commit adds conditional compilation directives to fstext headers to do the same when compiling against OpenFst-1.5. --- src/fstext/context-fst-inl.h | 6 ++++++ src/fstext/context-fst.h | 14 +++++++++++--- src/fstext/table-matcher.h | 26 +++++++++++++++++++------- src/fstext/trivial-factor-weight.h | 25 +++++++++++++++++++------ 4 files changed, 55 insertions(+), 16 deletions(-) diff --git a/src/fstext/context-fst-inl.h b/src/fstext/context-fst-inl.h index 5127e7ae584..9472e611f77 100644 --- a/src/fstext/context-fst-inl.h +++ b/src/fstext/context-fst-inl.h @@ -360,14 +360,20 @@ void ContextFstImpl::Expand(StateId s) { // expands arcs only [not template ContextFst::ContextFst(const ContextFst &fst, bool reset) { if (reset) { +#ifdef HAVE_OPENFST_GE_10500 + impl_ = std::make_shared >(*(fst.impl_)); +#else impl_ = new ContextFstImpl(*(fst.impl_)); // Copy constructor of ContextFstImpl. // Main use of calling with reset = true is to free up memory // (e.g. then you could delete original one). Might be useful in transcription // expansion during training. +#endif } else { impl_ = fst.impl_; +#ifndef HAVE_OPENFST_GE_10500 impl_->IncrRefCount(); +#endif } } diff --git a/src/fstext/context-fst.h b/src/fstext/context-fst.h index 15cb0ef9fdb..0f2fe6c817d 100644 --- a/src/fstext/context-fst.h +++ b/src/fstext/context-fst.h @@ -244,7 +244,9 @@ class ContextFst : public Fst { ContextFst(const ContextFst &fst, bool reset = false); - virtual ~ContextFst() { if (!impl_->DecrRefCount()) delete impl_; } +#ifndef HAVE_OPENFST_GE_10500 + virtual ~ContextFst() { if (!impl_->DecrRefCount()) delete impl_; } +#endif virtual StateId Start() const { return impl_->Start(); } @@ -307,13 +309,19 @@ class ContextFst : public Fst { friend class CacheStateIterator >; // so it can see impl_. private: +#ifdef HAVE_OPENFST_GE_10500 + std::shared_ptr > impl_; // protected so CacheStateIterator + ContextFstImpl *GetImpl() const { return impl_.get(); } +#else ContextFstImpl *impl_; // protected so CacheStateIterator // Makes visible to friends. ContextFstImpl *GetImpl() const { return impl_; } - // would be: ImplToFst >::GetImpl(); - // but need to convert to using the ImplToFst stuff. + // would be: ImplToFst >::GetImpl(); + // but need to convert to using the ImplToFst stuff. void operator = (const ContextFstImpl &fst); // disallow +#endif + }; /// Useful utility function for writing these vectors to disk. diff --git a/src/fstext/table-matcher.h b/src/fstext/table-matcher.h index aed821a8725..da23c83a546 100644 --- a/src/fstext/table-matcher.h +++ b/src/fstext/table-matcher.h @@ -86,7 +86,9 @@ class TableMatcherImpl : public MatcherBase { virtual const FST &GetFst() const { return *fst_; } virtual ~TableMatcherImpl() { +#ifndef HAVE_OPENFST_GE_10500 assert(RefCount() == 0); +#endif vector *const empty = ((vector*)(NULL)) + 1; // special marker. for (size_t i = 0; i < tables_.size(); i++) { if (tables_[i] != NULL && tables_[i] != empty) @@ -219,6 +221,7 @@ class TableMatcherImpl : public MatcherBase { virtual uint64 Properties(uint64 props) const { return props; } // simple matcher that does // not change its FST, so properties are properties of FST it is applied to +#ifndef HAVE_OPENFST_GE_10500 int RefCount() const { return ref_count_.count(); } @@ -230,8 +233,11 @@ class TableMatcherImpl : public MatcherBase { int DecrRefCount() { return ref_count_.Decr(); } +#endif private: +#ifndef HAVE_OPENFST_GE_10500 RefCounter ref_count_; // Reference count +#endif virtual void SetState_(StateId s) { SetState(s); } virtual bool Find_(Label label) { return Find(label); } @@ -263,22 +269,26 @@ class TableMatcher : public MatcherBase { typedef StateId ArcId; // Use this type to store arc offsets [it's actually size_t // in the Seek function of ArcIterator, but StateId should be big enough]. typedef typename Arc::Weight Weight; + typedef TableMatcherImpl I; TableMatcher(const FST &fst, MatchType match_type, const TableMatcherOptions &opts = TableMatcherOptions()): - impl_(new TableMatcherImpl(fst, match_type, opts)) { } - + impl_(new I(fst, match_type, opts)) { } TableMatcher(const TableMatcher &matcher, bool safe): impl_(matcher.impl_) { - impl_->IncrRefCount(); +#ifndef HAVE_OPENFST_GE_10500 + impl_->IncrRefCount(); +#endif } virtual const FST &GetFst() const { return impl_->GetFst(); } +#ifndef HAVE_OPENFST_GE_10500 virtual ~TableMatcher() { if (!impl_->DecrRefCount()) delete impl_; } +#endif virtual MatchType Type(bool test) const { return impl_->Type(test); } @@ -301,7 +311,11 @@ class TableMatcher : public MatcherBase { virtual uint64 Properties(uint64 props) const { return impl_->Properties(props); } // simple matcher that does // not change its FST, so properties are properties of FST it is applied to private: - TableMatcherImpl *impl_; +#ifdef HAVE_OPENFST_GE_10500 + std::shared_ptr impl_; +#else + I *impl_; +#endif virtual void SetState_(StateId s) { impl_->SetState(s); } virtual bool Find_(Label label) { return impl_->Find(label); } @@ -339,7 +353,7 @@ void TableCompose(const Fst &ifst1, const Fst &ifst2, *ofst = ComposeFst(ifst1, ifst2, impl_opts); } else { assert(opts.table_match_type == MATCH_INPUT) ; - // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2. + // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2. ComposeFstImplOptions, TableMatcher > impl_opts(nopts); impl_opts.matcher2 = new TableMatcher(ifst2, MATCH_INPUT, opts); *ofst = ComposeFst(ifst1, ifst2, impl_opts); @@ -388,5 +402,3 @@ void TableCompose(const Fst &ifst1, const Fst &ifst2, } // end namespace fst #endif - - diff --git a/src/fstext/trivial-factor-weight.h b/src/fstext/trivial-factor-weight.h index 109ba75ce10..b8afa757b39 100644 --- a/src/fstext/trivial-factor-weight.h +++ b/src/fstext/trivial-factor-weight.h @@ -353,10 +353,18 @@ class TrivialFactorWeightFst : public ImplToFst< TrivialFactorWeightFstImpl Impl; TrivialFactorWeightFst(const Fst &fst) +#ifdef HAVE_OPENFST_GE_10500 + : ImplToFst(std::make_shared(fst, TrivialFactorWeightOptions())) {} +#else : ImplToFst(new Impl(fst, TrivialFactorWeightOptions())) {} +#endif TrivialFactorWeightFst(const Fst &fst, const TrivialFactorWeightOptions &opts) +#ifdef HAVE_OPENFST_GE_10500 + : ImplToFst(std::make_shared(fst, opts)) {} +#else : ImplToFst(new Impl(fst, opts)) {} +#endif // See Fst<>::Copy() for doc. TrivialFactorWeightFst(const TrivialFactorWeightFst &fst, bool copy) @@ -370,12 +378,18 @@ class TrivialFactorWeightFst : public ImplToFst< TrivialFactorWeightFstImpl *data) const; virtual void InitArcIterator(StateId s, ArcIteratorData *data) const { - GetImpl()->InitArcIterator(s, data); + GetMutableImpl()->InitArcIterator(s, data); } private: // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst::GetImpl(); } +#ifdef HAVE_OPENFST_GE_10500 + using ImplToFst::GetImpl; + using ImplToFst::GetMutableImpl; +#else + const Impl *GetImpl() const { return ImplToFst::GetImpl(); } + Impl *GetMutableImpl() const { return ImplToFst::GetImpl(); } +#endif void operator=(const TrivialFactorWeightFst &fst); // Disallow }; @@ -387,7 +401,7 @@ class StateIterator< TrivialFactorWeightFst > : public CacheStateIterator< TrivialFactorWeightFst > { public: explicit StateIterator(const TrivialFactorWeightFst &fst) - : CacheStateIterator< TrivialFactorWeightFst >(fst, fst.GetImpl()) {} + : CacheStateIterator< TrivialFactorWeightFst >(fst, fst.GetMutableImpl()) {} }; @@ -399,9 +413,9 @@ class ArcIterator< TrivialFactorWeightFst > typedef typename A::StateId StateId; ArcIterator(const TrivialFactorWeightFst &fst, StateId s) - : CacheArcIterator< TrivialFactorWeightFst >(fst.GetImpl(), s) { + : CacheArcIterator< TrivialFactorWeightFst >(fst.GetMutableImpl(), s) { if (!fst.GetImpl()->HasArcs(s)) - fst.GetImpl()->Expand(s); + fst.GetMutableImpl()->Expand(s); } private: @@ -420,4 +434,3 @@ void TrivialFactorWeightFst::InitStateIterator(StateIteratorData *data) } // namespace fst #endif - From 43993b6b288495d8e221ae16e7daa50bd64b3d61 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 1 Dec 2016 16:01:55 -0800 Subject: [PATCH 251/530] Add support for API changes in OpenFst-1.5. --- src/fstext/lattice-utils-inl.h | 13 +++++++++---- src/fstext/lattice-weight.h | 2 +- src/lat/arctic-weight.h | 8 ++++---- src/latbin/lattice-compose.cc | 7 ++++++- src/latbin/lattice-lmrescore.cc | 7 ++++++- 5 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/fstext/lattice-utils-inl.h b/src/fstext/lattice-utils-inl.h index a3f603aa274..0f19b2b9513 100644 --- a/src/fstext/lattice-utils-inl.h +++ b/src/fstext/lattice-utils-inl.h @@ -209,12 +209,12 @@ void ScaleLattice( !aiter.Done(); aiter.Next()) { Arc arc = aiter.Value(); - arc.weight = ScaleTupleWeight(arc.weight, scale); + arc.weight = Weight(ScaleTupleWeight(arc.weight, scale)); aiter.SetValue(arc); } Weight final_weight = fst->Final(s); if (final_weight != Weight::Zero()) - fst->SetFinal(s, ScaleTupleWeight(final_weight, scale)); + fst->SetFinal(s, Weight(ScaleTupleWeight(final_weight, scale))); } } @@ -267,10 +267,15 @@ void ConvertFstToLattice( const ExpandedFst > &ifst, MutableFst > > *ofst) { int32 num_states_cache = 50000; - CacheOptions cache_opts(true, num_states_cache); +#ifdef HAVE_OPENFST_GE_10500 + fst::CacheOptions cache_opts(true, num_states_cache); + fst::MapFstOptions mapfst_opts(cache_opts); +#else + fst::CacheOptions mapfst_opts(true, num_states_cache); +#endif StdToLatticeMapper mapper; MapFst >, - StdToLatticeMapper > map_fst(ifst, mapper, cache_opts); + StdToLatticeMapper > map_fst(ifst, mapper, mapfst_opts); *ofst = map_fst; } diff --git a/src/fstext/lattice-weight.h b/src/fstext/lattice-weight.h index 8453b9c5670..3a03733cb3d 100644 --- a/src/fstext/lattice-weight.h +++ b/src/fstext/lattice-weight.h @@ -748,7 +748,7 @@ inline CompactLatticeWeightTpl ScaleTupleWeight( const CompactLatticeWeightTpl &w, const vector > &scale) { return CompactLatticeWeightTpl( - ScaleTupleWeight(w.Weight(), scale), w.String()); + Weight(ScaleTupleWeight(w.Weight(), scale)), w.String()); } /** Define some ConvertLatticeWeight functions that are used in various lattice diff --git a/src/lat/arctic-weight.h b/src/lat/arctic-weight.h index 7806cec96d1..2b308f44e65 100644 --- a/src/lat/arctic-weight.h +++ b/src/lat/arctic-weight.h @@ -27,8 +27,8 @@ namespace fst { // Arctic semiring: (max, +, inf, 0) // We define the Arctic semiring T' = (R \cup {-inf, +inf}, max, +, -inf, 0). -// The term "Arctic" came from Keith Kintzley (kintzley@jhu.edu), as opposite -// to the Tropical semiring. +// The term "Arctic" came from Keith Kintzley (kintzley@jhu.edu), as opposite +// to the Tropical semiring. template class ArcticWeightTpl : public FloatWeightTpl { public: @@ -49,7 +49,7 @@ class ArcticWeightTpl : public FloatWeightTpl { return ArcticWeightTpl(0.0F); } static const string &Type() { - static const string type = "arctic" + + static const string type = string("arctic") + FloatWeightTpl::GetPrecisionString(); return type; } @@ -57,7 +57,7 @@ class ArcticWeightTpl : public FloatWeightTpl { static ArcticWeightTpl NoWeight() { return ArcticWeightTpl(numeric_limits::infinity()); } - + bool Member() const { // First part fails for IEEE NaN return Value() == Value() && Value() != numeric_limits::infinity(); diff --git a/src/latbin/lattice-compose.cc b/src/latbin/lattice-compose.cc index 5feb958a6a1..2d1415eede5 100644 --- a/src/latbin/lattice-compose.cc +++ b/src/latbin/lattice-compose.cc @@ -85,10 +85,15 @@ int main(int argc, char *argv[]) { if (phi_label > 0) PropagateFinal(phi_label, fst2); +#ifdef HAVE_OPENFST_GE_10500 fst::CacheOptions cache_opts(true, num_states_cache); + fst::MapFstOptions mapfst_opts(cache_opts); +#else + fst::CacheOptions mapfst_opts(true, num_states_cache); +#endif fst::StdToLatticeMapper mapper; fst::MapFst > - mapped_fst2(*fst2, mapper, cache_opts); + mapped_fst2(*fst2, mapper, mapfst_opts); for (; !lattice_reader1.Done(); lattice_reader1.Next()) { std::string key = lattice_reader1.Key(); KALDI_VLOG(1) << "Processing lattice for key " << key; diff --git a/src/latbin/lattice-lmrescore.cc b/src/latbin/lattice-lmrescore.cc index b8f1067e607..10de27c43fc 100644 --- a/src/latbin/lattice-lmrescore.cc +++ b/src/latbin/lattice-lmrescore.cc @@ -74,10 +74,15 @@ int main(int argc, char *argv[]) { // mapped_fst is the LM fst interpreted using the LatticeWeight semiring, // with all the cost on the first member of the pair (since it's a graph // weight). +#ifdef HAVE_OPENFST_GE_10500 fst::CacheOptions cache_opts(true, num_states_cache); + fst::MapFstOptions mapfst_opts(cache_opts); +#else + fst::CacheOptions mapfst_opts(true, num_states_cache); +#endif fst::StdToLatticeMapper mapper; fst::MapFst > - lm_fst(*std_lm_fst, mapper, cache_opts); + lm_fst(*std_lm_fst, mapper, mapfst_opts); delete std_lm_fst; // The next fifteen or so lines are a kind of optimization and From 2d2d73808d0f4ab9c4feb849200110186f00d2df Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Fri, 2 Dec 2016 15:48:24 -0800 Subject: [PATCH 252/530] Rework OpenFst related preprocessor conditionals. --- src/bin/phones-to-prons.cc | 2 +- src/fstext/context-fst-inl.h | 19 ++++---- src/fstext/context-fst-test.cc | 6 +-- src/fstext/context-fst.h | 12 +++--- src/fstext/determinize-lattice-test.cc | 12 +++--- src/fstext/determinize-star-test.cc | 50 +++++++++++----------- src/fstext/factor-test.cc | 4 +- src/fstext/fstext-utils-test.cc | 6 +-- src/fstext/kaldi-fst-io-inl.h | 2 +- src/fstext/lattice-utils-inl.h | 2 +- src/fstext/lattice-utils-test.cc | 8 ++-- src/fstext/pre-determinize-test.cc | 16 +++---- src/fstext/prune-special-test.cc | 6 +-- src/fstext/push-special-test.cc | 4 +- src/fstext/remove-eps-local-test.cc | 8 ++-- src/fstext/table-matcher-test.cc | 16 +++---- src/fstext/table-matcher.h | 17 +++++--- src/fstext/trivial-factor-weight-test.cc | 20 ++++----- src/fstext/trivial-factor-weight.h | 10 ++--- src/lat/determinize-lattice-pruned-test.cc | 12 +++--- src/lat/kaldi-lattice.cc | 4 +- src/lat/push-lattice-test.cc | 4 +- src/latbin/lattice-compose.cc | 2 +- src/latbin/lattice-lmrescore.cc | 2 +- 24 files changed, 127 insertions(+), 117 deletions(-) diff --git a/src/bin/phones-to-prons.cc b/src/bin/phones-to-prons.cc index f9b9291a90b..33a821ce6ab 100644 --- a/src/bin/phones-to-prons.cc +++ b/src/bin/phones-to-prons.cc @@ -170,7 +170,7 @@ int main(int argc, char *argv[]) { << "not reach end-state, or mismatched lexicon.)"; if (g_kaldi_verbose_level >= 2) { KALDI_LOG << "phn2word FST is below:"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 fst::FstPrinter fstprinter(phn2word, NULL, NULL, NULL, false, true, "\t"); #else fst::FstPrinter fstprinter(phn2word, NULL, NULL, NULL, false, true); diff --git a/src/fstext/context-fst-inl.h b/src/fstext/context-fst-inl.h index 9472e611f77..204c8b92c1f 100644 --- a/src/fstext/context-fst-inl.h +++ b/src/fstext/context-fst-inl.h @@ -42,7 +42,8 @@ typename ContextFstImpl::StateId if (iter == state_map_.end()) { // Not already in map. StateId this_state_id = (StateId)state_seqs_.size(); //This check is not needed with OpenFst >= 1.4 -#ifndef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 +#else StateId this_state_id_check = CacheImpl::AddState(); // goes back to VectorFstBaseImpl, inherited via CacheFst assert(this_state_id == this_state_id_check); @@ -325,7 +326,7 @@ void ContextFstImpl::Expand(StateId s) { // expands arcs only [not // We just try adding all possible symbols on the output side. Arc arc; if (this->CreateArc(s, subsequential_symbol_, &arc)) { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 this->PushArc(s, arc); #else this->AddArc(s, arc); @@ -335,7 +336,7 @@ void ContextFstImpl::Expand(StateId s) { // expands arcs only [not iter != phone_syms_.end(); ++iter) { Label phone = *iter; if (this->CreateArc(s, phone, &arc)) { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 this->PushArc(s, arc); #else this->AddArc(s, arc); @@ -346,7 +347,7 @@ void ContextFstImpl::Expand(StateId s) { // expands arcs only [not iter != disambig_syms_.end(); ++iter) { Label disambig_sym = *iter; if (this->CreateArc(s, disambig_sym, &arc)) { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 this->PushArc(s, arc); #else this->AddArc(s, arc); @@ -359,22 +360,24 @@ void ContextFstImpl::Expand(StateId s) { // expands arcs only [not template ContextFst::ContextFst(const ContextFst &fst, bool reset) { +#if OPENFST_VER >= 10500 if (reset) { -#ifdef HAVE_OPENFST_GE_10500 impl_ = std::make_shared >(*(fst.impl_)); + } else { + impl_ = fst.impl_; + } #else + if (reset) { impl_ = new ContextFstImpl(*(fst.impl_)); // Copy constructor of ContextFstImpl. // Main use of calling with reset = true is to free up memory // (e.g. then you could delete original one). Might be useful in transcription // expansion during training. -#endif } else { impl_ = fst.impl_; -#ifndef HAVE_OPENFST_GE_10500 impl_->IncrRefCount(); -#endif } +#endif } diff --git a/src/fstext/context-fst-test.cc b/src/fstext/context-fst-test.cc index 53c774f829a..72b50da1339 100644 --- a/src/fstext/context-fst-test.cc +++ b/src/fstext/context-fst-test.cc @@ -192,7 +192,7 @@ template static void TestContextFst(bool verbose, bool use_matcher) { } if (verbose) { // Try to print the fst. -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(cfst, cfst.InputSymbols(), cfst.OutputSymbols(), NULL, false, true, "\t"); #else FstPrinter fstprinter(cfst, cfst.InputSymbols(), cfst.OutputSymbols(), NULL, false, true); @@ -211,7 +211,7 @@ template static void TestContextFst(bool verbose, bool use_matcher) { if (verbose) { std::cout << "Sequence FST is:\n"; { // Try to print the fst. -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*f, f->InputSymbols(), f->OutputSymbols(), NULL, false, true, "\t"); #else FstPrinter fstprinter(*f, f->InputSymbols(), f->OutputSymbols(), NULL, false, true); @@ -257,7 +257,7 @@ template static void TestContextFst(bool verbose, bool use_matcher) { if (verbose) { std::cout << "Composed FST is:\n"; { // Try to print the fst. -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst_composed, fst_composed.InputSymbols(), fst_composed.OutputSymbols(), NULL, false, true, "\t"); #else diff --git a/src/fstext/context-fst.h b/src/fstext/context-fst.h index 0f2fe6c817d..2d13e944f0a 100644 --- a/src/fstext/context-fst.h +++ b/src/fstext/context-fst.h @@ -94,7 +94,7 @@ class ContextFstImpl : public CacheImpl { typedef typename Arc::Weight Weight; typedef typename Arc::StateId StateId; typedef typename Arc::Label Label; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 typedef DefaultCacheStore Store; typedef typename Store::State State; #endif @@ -216,7 +216,8 @@ class ContextFst : public Fst { public: friend class ArcIterator< ContextFst >; friend class StateIterator< ContextFst >; -#ifndef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 +#else // We have to supply the default template argument below to work around a // Visual Studio bug. friend class CacheArcIterator< ContextFst, @@ -226,7 +227,7 @@ class ContextFst : public Fst { typedef typename Arc::Weight Weight; typedef typename Arc::Label Label; typedef typename Arc::StateId StateId; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 typedef DefaultCacheStore Store; typedef typename Store::State State; #else @@ -244,7 +245,8 @@ class ContextFst : public Fst { ContextFst(const ContextFst &fst, bool reset = false); -#ifndef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 +#else virtual ~ContextFst() { if (!impl_->DecrRefCount()) delete impl_; } #endif @@ -309,7 +311,7 @@ class ContextFst : public Fst { friend class CacheStateIterator >; // so it can see impl_. private: -#ifdef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 std::shared_ptr > impl_; // protected so CacheStateIterator ContextFstImpl *GetImpl() const { return impl_.get(); } #else diff --git a/src/fstext/determinize-lattice-test.cc b/src/fstext/determinize-lattice-test.cc index a12e368ea86..42122c6e193 100644 --- a/src/fstext/determinize-lattice-test.cc +++ b/src/fstext/determinize-lattice-test.cc @@ -91,7 +91,7 @@ template void TestDeterminizeLattice() { VectorFst *fst = RandFst(); std::cout << "FST before lattice-determinizing is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); @@ -107,7 +107,7 @@ template void TestDeterminizeLattice() { throw std::runtime_error("could not determinize"); std::cout << "FST after lattice-determinizing is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(det_fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(det_fst, NULL, NULL, NULL, false, true); @@ -122,7 +122,7 @@ template void TestDeterminizeLattice() { ConvertLattice(*fst, &compact_fst, false); std::cout << "Compact FST is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(compact_fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(compact_fst, NULL, NULL, NULL, false, true); @@ -137,7 +137,7 @@ template void TestDeterminizeLattice() { std::cout << "Compact version of determinized FST is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(compact_det_fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(compact_det_fst, NULL, NULL, NULL, false, true); @@ -162,7 +162,7 @@ template void TestDeterminizeLattice2() { VectorFst *fst = RandFst(opts); std::cout << "FST before lattice-determinizing is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); @@ -173,7 +173,7 @@ template void TestDeterminizeLattice2() { DeterminizeLattice(*fst, &ofst); std::cout << "FST after lattice-determinizing is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true); diff --git a/src/fstext/determinize-star-test.cc b/src/fstext/determinize-star-test.cc index d6aaaa4e024..f308d8460d8 100644 --- a/src/fstext/determinize-star-test.cc +++ b/src/fstext/determinize-star-test.cc @@ -37,7 +37,7 @@ template void TestDeterminizeGeneral() { VectorFst *fst = RandFst(); std::cout << "FST before determinizing is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); @@ -49,7 +49,7 @@ template void TestDeterminizeGeneral() { DeterminizeStar >(*fst, &ofst, kDelta, NULL, max_states); std::cout << "FST after determinizing is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true); @@ -108,7 +108,7 @@ template void TestDeterminize() { std::cout <<" printing before trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -120,7 +120,7 @@ template void TestDeterminize() { std::cout <<" printing after trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -137,7 +137,7 @@ template void TestDeterminize() { std::cout <<" printing after predeterminization\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -157,7 +157,7 @@ template void TestDeterminize() { std::cout <<" printing after epsilon removal\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -180,7 +180,7 @@ template void TestDeterminize() { { std::cout <<" printing after determinization [baseline]\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst_orig, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst_orig, sptr, sptr, NULL, false, true); @@ -191,7 +191,7 @@ template void TestDeterminize() { { std::cout <<" printing after determinization [star]\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true); @@ -205,7 +205,7 @@ template void TestDeterminize() { int64 num_removed = DeleteISymbols(&ofst_star, extra_syms); std::cout <<" printing after removing "<= 10400 FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true); @@ -277,7 +277,7 @@ template void TestPush() { std::cout <<" printing before trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -289,7 +289,7 @@ template void TestPush() { std::cout <<" printing after trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -310,7 +310,7 @@ template void TestPush() { std::cout <<" printing after pushing\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst_pushed, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(fst_pushed, sptr, sptr, NULL, false, true); @@ -367,7 +367,7 @@ template void TestMinimize() { std::cout <<" printing before trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -379,7 +379,7 @@ template void TestMinimize() { std::cout <<" printing after trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -396,7 +396,7 @@ template void TestMinimize() { std::cout <<" printing after predeterminization\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -416,7 +416,7 @@ template void TestMinimize() { std::cout <<" printing after epsilon removal\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -433,7 +433,7 @@ template void TestMinimize() { } { std::cout <<" printing after determinization [baseline]\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst_orig, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst_orig, sptr, sptr, NULL, false, true); @@ -449,7 +449,7 @@ template void TestMinimize() { DeterminizeStar(*fst, &gallic_fst); { std::cout <<" printing after determinization by DeterminizeStar [in gallic]\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true); @@ -463,7 +463,7 @@ template void TestMinimize() { { std::cout <<" printing after pushing weights [in gallic]\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true); @@ -476,7 +476,7 @@ template void TestMinimize() { Minimize(&gallic_fst); { std::cout <<" printing after minimization [in gallic]\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true); @@ -485,7 +485,7 @@ template void TestMinimize() { } printf("Converting gallic back to regular [my approach]\n"); -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 TrivialFactorWeightFst< GallicArc, GallicFactor > fwfst(gallic_fst); #else @@ -494,7 +494,7 @@ template void TestMinimize() { #endif { std::cout <<" printing factor-weight FST\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter > fstprinter(fwfst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter > fstprinter(fwfst, sptr, sptr, NULL, false, true); @@ -502,7 +502,7 @@ template void TestMinimize() { fstprinter.Print(&std::cout, "standard output"); } -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 Map(fwfst, &ofst_star, FromGallicMapper()); #else Map(fwfst, &ofst_star, FromGallicMapper()); @@ -510,7 +510,7 @@ template void TestMinimize() { { std::cout <<" printing after converting back to regular FST\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true); @@ -527,7 +527,7 @@ template void TestMinimize() { int64 num_removed = DeleteISymbols(&ofst_star, extra_syms); std::cout <<" printing after removing "<= 10400 FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true); diff --git a/src/fstext/factor-test.cc b/src/fstext/factor-test.cc index 9416f6fa4a4..1d446796b05 100644 --- a/src/fstext/factor-test.cc +++ b/src/fstext/factor-test.cc @@ -78,7 +78,7 @@ template static void TestFactor() { std::cout <<" printing before trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true); @@ -90,7 +90,7 @@ template static void TestFactor() { std::cout <<" printing after trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true); diff --git a/src/fstext/fstext-utils-test.cc b/src/fstext/fstext-utils-test.cc index 7f63d83186b..494935d3622 100644 --- a/src/fstext/fstext-utils-test.cc +++ b/src/fstext/fstext-utils-test.cc @@ -146,7 +146,7 @@ template void TestSafeDeterminizeWrapper() { // also tests SafeDete std::cout <<" printing before trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -158,7 +158,7 @@ template void TestSafeDeterminizeWrapper() { // also tests SafeDete std::cout <<" printing after trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -376,7 +376,7 @@ void TestEqualAlign() { template void Print(const Fst &fst, std::string message) { std::cout << message << "\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(fst, NULL, NULL, NULL, false, true); diff --git a/src/fstext/kaldi-fst-io-inl.h b/src/fstext/kaldi-fst-io-inl.h index 9185295bee6..58895449c72 100644 --- a/src/fstext/kaldi-fst-io-inl.h +++ b/src/fstext/kaldi-fst-io-inl.h @@ -42,7 +42,7 @@ void WriteFstKaldi(std::ostream &os, bool binary, // appear on its own line. os << '\n'; bool acceptor = false, write_one = false; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter printer(t, t.InputSymbols(), t.OutputSymbols(), NULL, acceptor, write_one, "\t"); #else diff --git a/src/fstext/lattice-utils-inl.h b/src/fstext/lattice-utils-inl.h index 0f19b2b9513..f15e8d2cc57 100644 --- a/src/fstext/lattice-utils-inl.h +++ b/src/fstext/lattice-utils-inl.h @@ -267,7 +267,7 @@ void ConvertFstToLattice( const ExpandedFst > &ifst, MutableFst > > *ofst) { int32 num_states_cache = 50000; -#ifdef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 fst::CacheOptions cache_opts(true, num_states_cache); fst::MapFstOptions mapfst_opts(cache_opts); #else diff --git a/src/fstext/lattice-utils-test.cc b/src/fstext/lattice-utils-test.cc index dc062343298..51df0ce8364 100644 --- a/src/fstext/lattice-utils-test.cc +++ b/src/fstext/lattice-utils-test.cc @@ -30,7 +30,7 @@ template void TestConvert(bool invert) { VectorFst *fst = RandFst(); std::cout << "FST before converting to compact-arc is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); @@ -42,7 +42,7 @@ template void TestConvert(bool invert) { std::cout << "FST after converting is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true); @@ -53,7 +53,7 @@ template void TestConvert(bool invert) { ConvertLattice(ofst, &origfst, invert); std::cout << "FST after back conversion is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(origfst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(origfst, NULL, NULL, NULL, false, true); @@ -78,7 +78,7 @@ template void TestShortestPath() { std::cout << "Testing shortest path\n"; std::cout << "FST before converting to compact-arc is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); diff --git a/src/fstext/pre-determinize-test.cc b/src/fstext/pre-determinize-test.cc index 8694267407b..774507b0792 100644 --- a/src/fstext/pre-determinize-test.cc +++ b/src/fstext/pre-determinize-test.cc @@ -69,7 +69,7 @@ template void TestPreDeterminize() { std::cout <<" printing before trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -81,7 +81,7 @@ template void TestPreDeterminize() { std::cout <<" printing after trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -99,7 +99,7 @@ template void TestPreDeterminize() { std::cout <<" printing after predeterminization\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -119,7 +119,7 @@ template void TestPreDeterminize() { std::cout <<" printing after epsilon removal\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -133,7 +133,7 @@ template void TestPreDeterminize() { Determinize(*fst, &ofst, opts); std::cout <<" printing after determinization\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst, sptr, sptr, NULL, false, true); @@ -144,7 +144,7 @@ template void TestPreDeterminize() { int64 num_removed = DeleteISymbols(&ofst, extra_syms); std::cout <<" printing after removing "<= 10400 FstPrinter fstprinter(ofst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst, sptr, sptr, NULL, false, true); @@ -200,7 +200,7 @@ template void TestAddSelfLoops() { } std::cout <<" printing before adding self-loops\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, ilabels, olabels, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, ilabels, olabels, NULL, false, true); @@ -223,7 +223,7 @@ template void TestAddSelfLoops() { std::cout <<" printing after adding self-loops\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, ilabels, olabels, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, ilabels, olabels, NULL, false, true); diff --git a/src/fstext/prune-special-test.cc b/src/fstext/prune-special-test.cc index e879a7593ac..cb55edca6cc 100644 --- a/src/fstext/prune-special-test.cc +++ b/src/fstext/prune-special-test.cc @@ -37,7 +37,7 @@ static void TestPruneSpecial() { float beam = 0.55; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*ifst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(*ifst, NULL, NULL, NULL, false, true); @@ -50,7 +50,7 @@ static void TestPruneSpecial() { VectorFst ofst1; PruneSpecial(*ifst, &ofst1, beam); { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst1, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst1, NULL, NULL, NULL, false, true); @@ -63,7 +63,7 @@ static void TestPruneSpecial() { VectorFst ofst2; Prune(*ifst, &ofst2, beam); { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst2, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst2, NULL, NULL, NULL, false, true); diff --git a/src/fstext/push-special-test.cc b/src/fstext/push-special-test.cc index 7f8ccbe92db..0106492e887 100644 --- a/src/fstext/push-special-test.cc +++ b/src/fstext/push-special-test.cc @@ -37,7 +37,7 @@ static void TestPushSpecial() { VectorFst *fst = RandFst(); { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); @@ -59,7 +59,7 @@ static void TestPushSpecial() { { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst_copy, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(fst_copy, NULL, NULL, NULL, false, true); diff --git a/src/fstext/remove-eps-local-test.cc b/src/fstext/remove-eps-local-test.cc index 676ba82025c..2c6c6f8d97f 100644 --- a/src/fstext/remove-eps-local-test.cc +++ b/src/fstext/remove-eps-local-test.cc @@ -82,7 +82,7 @@ template static void TestRemoveEpsLocal() { std::cout <<" printing after trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true); @@ -99,7 +99,7 @@ template static void TestRemoveEpsLocal() { { std::cout << "copy1 = \n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst_copy1, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(fst_copy1, sptr, sptr, NULL, false, true); @@ -148,7 +148,7 @@ static void TestRemoveEpsLocalSpecial() { #endif { std::cout << "logfst = \n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*logfst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(*logfst, NULL, NULL, NULL, false, true); @@ -167,7 +167,7 @@ static void TestRemoveEpsLocalSpecial() { { std::cout << "logfst2 = \n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(logfst2, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(logfst2, NULL, NULL, NULL, false, true); diff --git a/src/fstext/table-matcher-test.cc b/src/fstext/table-matcher-test.cc index b9e8a864454..0124fff4147 100644 --- a/src/fstext/table-matcher-test.cc +++ b/src/fstext/table-matcher-test.cc @@ -63,7 +63,7 @@ template void TestTableMatcher(bool connect, bool left) { std::cout <<"Table-Composed FST\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(composed, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(composed, NULL, NULL, NULL, false, true); @@ -73,7 +73,7 @@ template void TestTableMatcher(bool connect, bool left) { std::cout <<" Baseline-Composed FST\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(composed_baseline, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(composed_baseline, NULL, NULL, NULL, false, true); @@ -86,7 +86,7 @@ template void TestTableMatcher(bool connect, bool left) { Difference(composed, composed_baseline, &diff1); std::cout <<" Diff1 (composed - baseline) \n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true); @@ -99,7 +99,7 @@ template void TestTableMatcher(bool connect, bool left) { Difference(composed_baseline, composed, &diff2); std::cout <<" Diff2 (baseline - composed) \n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true); @@ -164,7 +164,7 @@ template void TestTableMatcherCacheLeft(bool connect) { Difference(composed, composed_baseline, &diff1); std::cout <<" Diff1 (composed - baseline) \n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true); @@ -177,7 +177,7 @@ template void TestTableMatcherCacheLeft(bool connect) { Difference(composed_baseline, composed, &diff2); std::cout <<" Diff2 (baseline - composed) \n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true); @@ -242,7 +242,7 @@ template void TestTableMatcherCacheRight(bool connect) { Difference(composed, composed_baseline, &diff1); std::cout <<" Diff1 (composed - baseline) \n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true); @@ -255,7 +255,7 @@ template void TestTableMatcherCacheRight(bool connect) { Difference(composed_baseline, composed, &diff2); std::cout <<" Diff2 (baseline - composed) \n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true); diff --git a/src/fstext/table-matcher.h b/src/fstext/table-matcher.h index da23c83a546..1a1b35d8c68 100644 --- a/src/fstext/table-matcher.h +++ b/src/fstext/table-matcher.h @@ -86,7 +86,8 @@ class TableMatcherImpl : public MatcherBase { virtual const FST &GetFst() const { return *fst_; } virtual ~TableMatcherImpl() { -#ifndef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 +#else assert(RefCount() == 0); #endif vector *const empty = ((vector*)(NULL)) + 1; // special marker. @@ -221,7 +222,8 @@ class TableMatcherImpl : public MatcherBase { virtual uint64 Properties(uint64 props) const { return props; } // simple matcher that does // not change its FST, so properties are properties of FST it is applied to -#ifndef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 +#else int RefCount() const { return ref_count_.count(); } @@ -235,7 +237,8 @@ class TableMatcherImpl : public MatcherBase { } #endif private: -#ifndef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 +#else RefCounter ref_count_; // Reference count #endif @@ -277,14 +280,16 @@ class TableMatcher : public MatcherBase { TableMatcher(const TableMatcher &matcher, bool safe): impl_(matcher.impl_) { -#ifndef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 +#else impl_->IncrRefCount(); #endif } virtual const FST &GetFst() const { return impl_->GetFst(); } -#ifndef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 +#else virtual ~TableMatcher() { if (!impl_->DecrRefCount()) delete impl_; } @@ -311,7 +316,7 @@ class TableMatcher : public MatcherBase { virtual uint64 Properties(uint64 props) const { return impl_->Properties(props); } // simple matcher that does // not change its FST, so properties are properties of FST it is applied to private: -#ifdef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 std::shared_ptr impl_; #else I *impl_; diff --git a/src/fstext/trivial-factor-weight-test.cc b/src/fstext/trivial-factor-weight-test.cc index af3f4a3de89..fcf34b6834e 100644 --- a/src/fstext/trivial-factor-weight-test.cc +++ b/src/fstext/trivial-factor-weight-test.cc @@ -70,7 +70,7 @@ template void TestFactor() { std::cout <<" printing before trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -82,7 +82,7 @@ template void TestFactor() { std::cout <<" printing after trimming\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -97,7 +97,7 @@ template void TestFactor() { std::cout <<" printing after predeterminization\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -117,7 +117,7 @@ template void TestFactor() { std::cout <<" printing after double-epsilon removal\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); @@ -140,7 +140,7 @@ template void TestFactor() { { std::cout <<" printing gallic FST\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true); @@ -152,7 +152,7 @@ template void TestFactor() { // Map(ofst_star, &gallic_fst, ToGallicMapper()); printf("Converting gallic back to regular\n"); -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 TrivialFactorWeightFst< GallicArc, GallicFactor > fwfst(gallic_fst); #else @@ -161,7 +161,7 @@ template void TestFactor() { #endif { std::cout <<" printing factor-weight FST\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter > fstprinter(fwfst, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter > fstprinter(fwfst, sptr, sptr, NULL, false, true); @@ -169,7 +169,7 @@ template void TestFactor() { fstprinter.Print(&std::cout, "standard output"); } -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 Map(fwfst, &ofst_star, FromGallicMapper()); #else Map(fwfst, &ofst_star, FromGallicMapper()); @@ -177,7 +177,7 @@ template void TestFactor() { { std::cout <<" printing after converting back to regular FST\n"; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true); @@ -187,7 +187,7 @@ template void TestFactor() { VectorFst > new_gallic_fst; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 Map(ofst_star, &new_gallic_fst, ToGallicMapper()); #else Map(ofst_star, &new_gallic_fst, ToGallicMapper()); diff --git a/src/fstext/trivial-factor-weight.h b/src/fstext/trivial-factor-weight.h index b8afa757b39..3e42dd287db 100644 --- a/src/fstext/trivial-factor-weight.h +++ b/src/fstext/trivial-factor-weight.h @@ -117,7 +117,7 @@ class TrivialFactorWeightFstImpl typedef typename A::StateId StateId; typedef F FactorIterator; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 typedef DefaultCacheStore Store; typedef typename Store::State State; #endif @@ -344,7 +344,7 @@ class TrivialFactorWeightFst : public ImplToFst< TrivialFactorWeightFstImpl= 10400 typedef DefaultCacheStore Store; typedef typename Store::State State; #else @@ -353,14 +353,14 @@ class TrivialFactorWeightFst : public ImplToFst< TrivialFactorWeightFstImpl Impl; TrivialFactorWeightFst(const Fst &fst) -#ifdef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 : ImplToFst(std::make_shared(fst, TrivialFactorWeightOptions())) {} #else : ImplToFst(new Impl(fst, TrivialFactorWeightOptions())) {} #endif TrivialFactorWeightFst(const Fst &fst, const TrivialFactorWeightOptions &opts) -#ifdef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 : ImplToFst(std::make_shared(fst, opts)) {} #else : ImplToFst(new Impl(fst, opts)) {} @@ -383,7 +383,7 @@ class TrivialFactorWeightFst : public ImplToFst< TrivialFactorWeightFstImpl= 10500 using ImplToFst::GetImpl; using ImplToFst::GetMutableImpl; #else diff --git a/src/lat/determinize-lattice-pruned-test.cc b/src/lat/determinize-lattice-pruned-test.cc index d5f22454017..c932e3c95de 100644 --- a/src/lat/determinize-lattice-pruned-test.cc +++ b/src/lat/determinize-lattice-pruned-test.cc @@ -62,7 +62,7 @@ template void TestDeterminizeLatticePruned() { std::cout << "FST before lattice-determinizing is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); @@ -79,7 +79,7 @@ template void TestDeterminizeLatticePruned() { std::cout << "FST after lattice-determinizing is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(det_fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(det_fst, NULL, NULL, NULL, false, true); @@ -100,7 +100,7 @@ template void TestDeterminizeLatticePruned() { ConvertLattice(pruned_fst, &compact_pruned_fst, false); std::cout << "Compact pruned FST is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(compact_pruned_fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(compact_pruned_fst, NULL, NULL, NULL, false, true); @@ -111,7 +111,7 @@ template void TestDeterminizeLatticePruned() { std::cout << "Compact version of determinized FST is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(compact_pruned_det_fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(compact_pruned_det_fst, NULL, NULL, NULL, false, true); @@ -138,7 +138,7 @@ template void TestDeterminizeLatticePruned2() { VectorFst *fst = RandPairFst(opts); std::cout << "FST before lattice-determinizing is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); @@ -149,7 +149,7 @@ template void TestDeterminizeLatticePruned2() { DeterminizeLatticePruned(*fst, 10.0, &ofst); std::cout << "FST after lattice-determinizing is:\n"; { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true, "\t"); #else FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true); diff --git a/src/lat/kaldi-lattice.cc b/src/lat/kaldi-lattice.cc index ee58e64704d..b44b12a5a23 100644 --- a/src/lat/kaldi-lattice.cc +++ b/src/lat/kaldi-lattice.cc @@ -75,7 +75,7 @@ bool WriteCompactLattice(std::ostream &os, bool binary, // on its own line. os << '\n'; bool acceptor = true, write_one = false; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 fst::FstPrinter printer(t, t.InputSymbols(), t.OutputSymbols(), NULL, acceptor, write_one, "\t"); @@ -406,7 +406,7 @@ bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) { // on its own line. os << '\n'; bool acceptor = false, write_one = false; -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 fst::FstPrinter printer(t, t.InputSymbols(), t.OutputSymbols(), NULL, acceptor, write_one, "\t"); diff --git a/src/lat/push-lattice-test.cc b/src/lat/push-lattice-test.cc index e1f99bcb31f..ecd60501888 100644 --- a/src/lat/push-lattice-test.cc +++ b/src/lat/push-lattice-test.cc @@ -90,7 +90,7 @@ void TestPushCompactLatticeWeights() { } if (!ApproxEqual(sum, LatticeWeight::One())) { { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 fst::FstPrinter printer(clat2, NULL, NULL, NULL, true, true, "\t"); #else @@ -100,7 +100,7 @@ void TestPushCompactLatticeWeights() { printer.Print(&std::cerr, ""); } { -#ifdef HAVE_OPENFST_GE_10400 +#if OPENFST_VER >= 10400 fst::FstPrinter printer(*clat, NULL, NULL, NULL, true, true, "\t"); #else diff --git a/src/latbin/lattice-compose.cc b/src/latbin/lattice-compose.cc index 2d1415eede5..365be941a85 100644 --- a/src/latbin/lattice-compose.cc +++ b/src/latbin/lattice-compose.cc @@ -85,7 +85,7 @@ int main(int argc, char *argv[]) { if (phi_label > 0) PropagateFinal(phi_label, fst2); -#ifdef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 fst::CacheOptions cache_opts(true, num_states_cache); fst::MapFstOptions mapfst_opts(cache_opts); #else diff --git a/src/latbin/lattice-lmrescore.cc b/src/latbin/lattice-lmrescore.cc index 10de27c43fc..d60d5fe93e5 100644 --- a/src/latbin/lattice-lmrescore.cc +++ b/src/latbin/lattice-lmrescore.cc @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) { // mapped_fst is the LM fst interpreted using the LatticeWeight semiring, // with all the cost on the first member of the pair (since it's a graph // weight). -#ifdef HAVE_OPENFST_GE_10500 +#if OPENFST_VER >= 10500 fst::CacheOptions cache_opts(true, num_states_cache); fst::MapFstOptions mapfst_opts(cache_opts); #else From d28544ef1b1d4d5b453e10c09d0412f44d76d753 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Fri, 2 Dec 2016 17:34:44 -0800 Subject: [PATCH 253/530] Increment configure version --- src/configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/configure b/src/configure index ae5f8ddb2df..cb9736a45ad 100755 --- a/src/configure +++ b/src/configure @@ -25,7 +25,7 @@ #This should be incremented after every significant change of the configure script #I.e. after each change that affects the kaldi.mk or the build system as whole -CONFIGURE_VERSION=4 +CONFIGURE_VERSION=5 function rel2abs { if [ ! -z "$1" ]; then From 22b1de0dd760f05277472e7581f54ab4ae866fbb Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Sat, 3 Dec 2016 22:08:52 -0800 Subject: [PATCH 254/530] Fix Minimize calls. --- src/kwsbin/kws-search.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/kwsbin/kws-search.cc b/src/kwsbin/kws-search.cc index 467a2ab1ccd..1359e165240 100644 --- a/src/kwsbin/kws-search.cc +++ b/src/kwsbin/kws-search.cc @@ -199,7 +199,11 @@ int main(int argc, char *argv[]) { Map(keyword, &keyword_fst, VectorFstToKwsLexicographicFstMapper()); Compose(keyword_fst, index, &result_fst); Project(&result_fst, PROJECT_OUTPUT); +#if OPENFST_VER >= 10500 + Minimize(&result_fst, (KwsLexicographicFst *) nullptr, kDelta, true); +#else Minimize(&result_fst); +#endif ShortestPath(result_fst, &result_fst, n_best); RmEpsilon(&result_fst); From 1abf0a7898edec816b1db3a70b341e5e5e4700bd Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Mon, 5 Dec 2016 14:49:40 -0800 Subject: [PATCH 255/530] Remove OpenFst related compiler flags in src/configure. --- src/configure | 65 ++++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 40 deletions(-) diff --git a/src/configure b/src/configure index cb9736a45ad..736689dc868 100755 --- a/src/configure +++ b/src/configure @@ -223,9 +223,9 @@ function check_for_slow_expf { make -f Makefile.slow_expf 1>/dev/null ./exp-test if [ $? -eq 1 ]; then - echo "*** WARNING: expf() seems to be slower than exp() on your machine. This is a known bug in old versions of glibc. Please consider updating glibc. ***" - echo "*** Kaldi will be configured to use exp() instead of expf() in base/kaldi-math.h Exp() routine for single-precision floats. ***" - echo "CXXFLAGS += -DKALDI_NO_EXPF" >> ../kaldi.mk + echo "*** WARNING: expf() seems to be slower than exp() on your machine. This is a known bug in old versions of glibc. Please consider updating glibc. ***" + echo "*** Kaldi will be configured to use exp() instead of expf() in base/kaldi-math.h Exp() routine for single-precision floats. ***" + echo "CXXFLAGS += -DKALDI_NO_EXPF" >> ../kaldi.mk fi cd .. } @@ -849,7 +849,7 @@ function linux_configure_dynamic { echo "Configuring ..." if [ ! -f makefiles/common.mk ]; then - failure makefiles/common.mk not found + failure makefiles/common.mk not found fi # back up the old one in case we modified it @@ -861,37 +861,22 @@ fi printf "# This file was generated using the following command:\n# $cmd_line\n\n" > kaldi.mk cat makefiles/common.mk >> kaldi.mk if $dynamic_kaldi ; then -KALDILIBDIR=`pwd`/lib -echo "KALDI_FLAVOR := dynamic" >> kaldi.mk -echo "KALDILIBDIR := $KALDILIBDIR" >> kaldi.mk + KALDILIBDIR=`pwd`/lib + echo "KALDI_FLAVOR := dynamic" >> kaldi.mk + echo "KALDILIBDIR := $KALDILIBDIR" >> kaldi.mk fi echo "CONFIGURE_VERSION := $CONFIGURE_VERSION" >> kaldi.mk echo "FSTROOT = $FSTROOT" >> kaldi.mk echo "Checking OpenFST library in $FSTROOT ..." if [ ! -f $FSTROOT/include/fst/fst.h ]; then - failure "Could not find file $FSTROOT/include/fst/fst.h: - you may not have installed OpenFst. See ../tools/INSTALL" + failure "Could not find file $FSTROOT/include/fst/fst.h: + you may not have installed OpenFst. See ../tools/INSTALL" fi OPENFST_VER="${OPENFST_VER:-`grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::'`}" -echo "Adding flags necessary for compiling against OpenFst-$OPENFST_VER ..." echo "OPENFST_VER = $OPENFST_VER" >> kaldi.mk -OPENFST_VER_NUM=`echo $OPENFST_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d"` -echo "EXTRA_CXXFLAGS += -DOPENFST_VER=$OPENFST_VER_NUM" >> kaldi.mk -if [ $OPENFST_VER_NUM -ge 10400 ]; then - echo "EXTRA_CXXFLAGS += -std=c++0x" >> kaldi.mk -fi - -if [ $OPENFST_VER_NUM -lt 10500 ]; then - echo "Checking if OpenFst library was patched ..." - if ! grep "multiple repeated" $FSTROOT/include/fst/minimize.h >/dev/null; then - echo "** ERROR **" - echo "** $FSTROOT/include/fst/minimize.h seems not to be patched:" - echo "patch not applied? FST tools will not work in our recipe." - exit 1; - fi -fi +echo "CXXFLAGS += -std=c++0x" >> kaldi.mk # Most of the OS-specific steps below will append to kaldi.mk echo "Doing OS specific configurations ..." @@ -951,26 +936,26 @@ if [ "`uname`" == "Darwin" ]; then fi if [ "`uname -o`" == "Cygwin" ]; then - echo "On Cygwin: checking for linear algebra libraries ..." - if [ ! -f ../tools/CLAPACK/clapack.h ]; then - failure "could not find file ../tools/CLAPACK/clapack.h" - fi - if [ ! -f /usr/lib/lapack/cygblas-0.dll ]; then - failure "please first install package liblapack0" - fi - cat makefiles/cygwin.mk >> kaldi.mk - echo "Configuration succeeded for platform cygwin" - exit_success; + echo "On Cygwin: checking for linear algebra libraries ..." + if [ ! -f ../tools/CLAPACK/clapack.h ]; then + failure "could not find file ../tools/CLAPACK/clapack.h" + fi + if [ ! -f /usr/lib/lapack/cygblas-0.dll ]; then + failure "please first install package liblapack0" + fi + cat makefiles/cygwin.mk >> kaldi.mk + echo "Configuration succeeded for platform cygwin" + exit_success; fi if [ "`uname`" == "Linux" ]; then if $static_fst ; then - OPENFSTLIBS="$FSTROOT/lib/libfst.a" - fst_type='a' + OPENFSTLIBS="$FSTROOT/lib/libfst.a" + fst_type='a' else - OPENFSTLIBS="-L${FSTROOT}/lib -lfst" - OPENFSTLDFLAGS="-Wl,-rpath=${FSTROOT}/lib" - fst_type='so' + OPENFSTLIBS="-L${FSTROOT}/lib -lfst" + OPENFSTLDFLAGS="-Wl,-rpath=${FSTROOT}/lib" + fst_type='so' fi if [ ! -f "$FSTROOT/lib/libfst.${fst_type}" ]; then failure "Static=[$static_fst] OpenFST library not found: See ../tools/INSTALL" From b23f7205c9b522f1255acc90e98f802e6ec3b0e7 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Mon, 5 Dec 2016 14:53:14 -0800 Subject: [PATCH 256/530] Update src/Makefile to enforce OpenFst >= 1.5.3. OpenFst-1.5.3 adds support for minimization of non-deterministic FSTs over idempotent semirings which is a feature used throughout Kaldi. Along with the requirement for a C++ compiler with C++11 support, we are also removing support for older OpenFst releases so that we can build against an un-patched OpenFst installation. --- src/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Makefile b/src/Makefile index 8bc18b254e9..cecc8ca5170 100644 --- a/src/Makefile +++ b/src/Makefile @@ -97,8 +97,8 @@ endif # Note: OPENFST_VER is determined by configure and added to kaldi.mk OPENFST_VER_NUM := $(shell echo $(OPENFST_VER) | sed 's/\./ /g' | xargs printf "%d%02d%02d") test_dependencies: -ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10302)","1") - $(error OpenFst $(OPENFST_VER) is not supported. You now need OpenFst >= 1.3.2.) +ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10503)","1") + $(error OpenFst $(OPENFST_VER) is not supported. You now need OpenFst >= 1.5.3.) endif check_portaudio: @@ -184,4 +184,3 @@ onlinebin: base matrix util feat tree gmm transform sgmm2 fstext hmm lm decoder online: decoder gmm transform feat matrix util base lat hmm thread tree online2: decoder gmm transform feat matrix util base lat hmm thread tree ivector cudamatrix nnet2 nnet3 chain kws: base util thread hmm tree matrix lat - From bf2ed0af830fc70fca320e88bd700da3e4ff06ca Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Mon, 5 Dec 2016 17:06:18 -0800 Subject: [PATCH 257/530] Remove code for supporting OpenFst < 1.5.3. This commit removes all code that has to do with supporting old OpenFst releases. It also makes some updates to TableMatcher, ContextFst and TrivialFactorWeight. --- src/bin/phones-to-prons.cc | 6 - src/fstext/context-fst-inl.h | 49 ++------ src/fstext/context-fst-test.cc | 13 -- src/fstext/context-fst.h | 139 +++++---------------- src/fstext/determinize-lattice-test.cc | 36 +----- src/fstext/determinize-star-test.cc | 103 +-------------- src/fstext/factor-test.cc | 8 -- src/fstext/fstext-utils-test.cc | 14 --- src/fstext/kaldi-fst-io-inl.h | 5 - src/fstext/lattice-utils-inl.h | 4 - src/fstext/lattice-utils-test.cc | 38 ++---- src/fstext/pre-determinize-test.cc | 38 +----- src/fstext/prune-special-test.cc | 12 -- src/fstext/push-special-test.cc | 12 +- src/fstext/remove-eps-local-test.cc | 19 +-- src/fstext/table-matcher-test.cc | 36 +----- src/fstext/table-matcher.h | 59 ++------- src/fstext/trivial-factor-weight-test.cc | 47 +------ src/fstext/trivial-factor-weight.h | 62 ++------- src/kwsbin/kws-search.cc | 4 - src/lat/determinize-lattice-pruned-test.cc | 36 +----- src/lat/kaldi-lattice.cc | 20 +-- src/lat/push-lattice-test.cc | 10 -- src/latbin/lattice-compose.cc | 4 - src/latbin/lattice-lmrescore.cc | 4 - 25 files changed, 104 insertions(+), 674 deletions(-) diff --git a/src/bin/phones-to-prons.cc b/src/bin/phones-to-prons.cc index 33a821ce6ab..6e3cf7a4651 100644 --- a/src/bin/phones-to-prons.cc +++ b/src/bin/phones-to-prons.cc @@ -170,11 +170,7 @@ int main(int argc, char *argv[]) { << "not reach end-state, or mismatched lexicon.)"; if (g_kaldi_verbose_level >= 2) { KALDI_LOG << "phn2word FST is below:"; -#if OPENFST_VER >= 10400 fst::FstPrinter fstprinter(phn2word, NULL, NULL, NULL, false, true, "\t"); -#else - fst::FstPrinter fstprinter(phn2word, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cerr, "standard error"); KALDI_LOG << "phone sequence is: "; for (size_t i = 0; i < phones.size(); i++) @@ -219,5 +215,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/fstext/context-fst-inl.h b/src/fstext/context-fst-inl.h index 204c8b92c1f..4427863d887 100644 --- a/src/fstext/context-fst-inl.h +++ b/src/fstext/context-fst-inl.h @@ -41,13 +41,6 @@ typename ContextFstImpl::StateId VectorToStateIter iter = state_map_.find(seq); if (iter == state_map_.end()) { // Not already in map. StateId this_state_id = (StateId)state_seqs_.size(); - //This check is not needed with OpenFst >= 1.4 -#if OPENFST_VER >= 10400 -#else - StateId this_state_id_check = CacheImpl::AddState(); - // goes back to VectorFstBaseImpl, inherited via CacheFst - assert(this_state_id == this_state_id_check); -#endif state_seqs_.push_back(seq); state_map_[seq] = this_state_id; return this_state_id; @@ -326,60 +319,34 @@ void ContextFstImpl::Expand(StateId s) { // expands arcs only [not // We just try adding all possible symbols on the output side. Arc arc; if (this->CreateArc(s, subsequential_symbol_, &arc)) { -#if OPENFST_VER >= 10400 this->PushArc(s, arc); -#else - this->AddArc(s, arc); -#endif } for (typename kaldi::ConstIntegerSet > : public CacheStateIterator< ContextFst > { public: explicit StateIterator(const ContextFst &fst) - : CacheStateIterator< ContextFst >(fst, fst.GetImpl()) {} + : CacheStateIterator< ContextFst >(fst, fst.GetMutableImpl()) {} }; @@ -369,13 +301,10 @@ class ArcIterator< ContextFst > typedef typename A::StateId StateId; ArcIterator(const ContextFst &fst, StateId s) - : CacheArcIterator< ContextFst >(fst.GetImpl(), s) { + : CacheArcIterator< ContextFst >(fst.GetMutableImpl(), s) { if (!fst.GetImpl()->HasArcs(s)) // arcs not already computed. - fst.GetImpl()->Expand(s); + fst.GetMutableImpl()->Expand(s); } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); }; template inline diff --git a/src/fstext/determinize-lattice-test.cc b/src/fstext/determinize-lattice-test.cc index 42122c6e193..7359fa1354d 100644 --- a/src/fstext/determinize-lattice-test.cc +++ b/src/fstext/determinize-lattice-test.cc @@ -75,7 +75,7 @@ template void TestDeterminizeLattice() { typedef typename Arc::Weight Weight; typedef int32 Int; typedef ArcTpl > CompactArc; - + for(int i = 0; i < 100; i++) { RandFstOptions opts; opts.n_states = 4; @@ -84,34 +84,26 @@ template void TestDeterminizeLattice() { opts.allow_empty = false; opts.weight_multiplier = 0.5; // impt for the randomly generated weights // to be exactly representable in float, - // or this test fails because numerical differences can cause symmetry in + // or this test fails because numerical differences can cause symmetry in // weights to be broken, which causes the wrong path to be chosen as far // as the string part is concerned. - + VectorFst *fst = RandFst(); std::cout << "FST before lattice-determinizing is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst det_fst; try { DeterminizeLatticeOptions lat_opts; lat_opts.max_mem = 100; - + if (!DeterminizeLattice(*fst, &det_fst, lat_opts, NULL)) throw std::runtime_error("could not determinize"); std::cout << "FST after lattice-determinizing is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(det_fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(det_fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } assert(det_fst.Properties(kIDeterministic, true) & kIDeterministic); @@ -122,11 +114,7 @@ template void TestDeterminizeLattice() { ConvertLattice(*fst, &compact_fst, false); std::cout << "Compact FST is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(compact_fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(compact_fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } if (kaldi::Rand() % 2 == 1) @@ -134,17 +122,13 @@ template void TestDeterminizeLattice() { else if (!DeterminizeLattice(*fst, &compact_det_fst, lat_opts, NULL)) throw std::runtime_error("could not determinize"); - + std::cout << "Compact version of determinized FST is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(compact_det_fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(compact_det_fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } - + assert(RandEquivalent(compact_det_fst, compact_fst, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length, max*/)); } catch (...) { std::cout << "Failed to lattice-determinize this FST (probably not determinizable)\n"; @@ -162,22 +146,14 @@ template void TestDeterminizeLattice2() { VectorFst *fst = RandFst(opts); std::cout << "FST before lattice-determinizing is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst ofst; DeterminizeLattice(*fst, &ofst); std::cout << "FST after lattice-determinizing is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } delete fst; diff --git a/src/fstext/determinize-star-test.cc b/src/fstext/determinize-star-test.cc index f308d8460d8..ee150f0c024 100644 --- a/src/fstext/determinize-star-test.cc +++ b/src/fstext/determinize-star-test.cc @@ -37,11 +37,7 @@ template void TestDeterminizeGeneral() { VectorFst *fst = RandFst(); std::cout << "FST before determinizing is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst ofst; @@ -49,14 +45,10 @@ template void TestDeterminizeGeneral() { DeterminizeStar >(*fst, &ofst, kDelta, NULL, max_states); std::cout << "FST after determinizing is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } - assert(RandEquivalent(*fst, ofst, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length, max*/)); + assert(RandEquivalent(*fst, ofst, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length, max*/)); } catch (...) { std::cout << "Failed to determinize *this FST (probably not determinizable)\n"; } @@ -108,11 +100,7 @@ template void TestDeterminize() { std::cout <<" printing before trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } // Trim resulting FST. @@ -120,11 +108,7 @@ template void TestDeterminize() { std::cout <<" printing after trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -137,11 +121,7 @@ template void TestDeterminize() { std::cout <<" printing after predeterminization\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -157,11 +137,7 @@ template void TestDeterminize() { std::cout <<" printing after epsilon removal\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst ofst_orig; @@ -180,22 +156,14 @@ template void TestDeterminize() { { std::cout <<" printing after determinization [baseline]\n"; -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst_orig, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst_orig, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); assert(ofst_orig.Properties(kIDeterministic, true) == kIDeterministic); } { std::cout <<" printing after determinization [star]\n"; -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); assert(ofst_star.Properties(kIDeterministic, true) == kIDeterministic); } @@ -205,11 +173,7 @@ template void TestDeterminize() { int64 num_removed = DeleteISymbols(&ofst_star, extra_syms); std::cout <<" printing after removing "<= 10400 FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -277,11 +241,7 @@ template void TestPush() { std::cout <<" printing before trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } // Trim resulting FST. @@ -289,11 +249,7 @@ template void TestPush() { std::cout <<" printing after trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -310,11 +266,7 @@ template void TestPush() { std::cout <<" printing after pushing\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst_pushed, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(fst_pushed, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -367,11 +319,7 @@ template void TestMinimize() { std::cout <<" printing before trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } // Trim resulting FST. @@ -379,11 +327,7 @@ template void TestMinimize() { std::cout <<" printing after trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -396,11 +340,7 @@ template void TestMinimize() { std::cout <<" printing after predeterminization\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -416,11 +356,7 @@ template void TestMinimize() { std::cout <<" printing after epsilon removal\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst ofst_orig; @@ -433,11 +369,7 @@ template void TestMinimize() { } { std::cout <<" printing after determinization [baseline]\n"; -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst_orig, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst_orig, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -449,11 +381,7 @@ template void TestMinimize() { DeterminizeStar(*fst, &gallic_fst); { std::cout <<" printing after determinization by DeterminizeStar [in gallic]\n"; -#if OPENFST_VER >= 10400 FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -463,11 +391,7 @@ template void TestMinimize() { { std::cout <<" printing after pushing weights [in gallic]\n"; -#if OPENFST_VER >= 10400 FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -476,45 +400,24 @@ template void TestMinimize() { Minimize(&gallic_fst); { std::cout <<" printing after minimization [in gallic]\n"; -#if OPENFST_VER >= 10400 FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } printf("Converting gallic back to regular [my approach]\n"); -#if OPENFST_VER >= 10400 TrivialFactorWeightFst< GallicArc, GallicFactor > fwfst(gallic_fst); -#else - TrivialFactorWeightFst< GallicArc, GallicFactor > fwfst(gallic_fst); -#endif { std::cout <<" printing factor-weight FST\n"; -#if OPENFST_VER >= 10400 FstPrinter > fstprinter(fwfst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter > fstprinter(fwfst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } -#if OPENFST_VER >= 10400 Map(fwfst, &ofst_star, FromGallicMapper()); -#else - Map(fwfst, &ofst_star, FromGallicMapper()); -#endif { std::cout <<" printing after converting back to regular FST\n"; -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -527,11 +430,7 @@ template void TestMinimize() { int64 num_removed = DeleteISymbols(&ofst_star, extra_syms); std::cout <<" printing after removing "<= 10400 FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } diff --git a/src/fstext/factor-test.cc b/src/fstext/factor-test.cc index 1d446796b05..cb021ab4643 100644 --- a/src/fstext/factor-test.cc +++ b/src/fstext/factor-test.cc @@ -78,11 +78,7 @@ template static void TestFactor() { std::cout <<" printing before trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } // Trim resulting FST. @@ -90,11 +86,7 @@ template static void TestFactor() { std::cout <<" printing after trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } diff --git a/src/fstext/fstext-utils-test.cc b/src/fstext/fstext-utils-test.cc index 494935d3622..2802a84cca6 100644 --- a/src/fstext/fstext-utils-test.cc +++ b/src/fstext/fstext-utils-test.cc @@ -146,11 +146,7 @@ template void TestSafeDeterminizeWrapper() { // also tests SafeDete std::cout <<" printing before trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } // Trim resulting FST. @@ -158,11 +154,7 @@ template void TestSafeDeterminizeWrapper() { // also tests SafeDete std::cout <<" printing after trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -376,11 +368,7 @@ void TestEqualAlign() { template void Print(const Fst &fst, std::string message) { std::cout << message << "\n"; -#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -451,5 +439,3 @@ int main() { fst::TestRemoveUselessArcs(); } } - - diff --git a/src/fstext/kaldi-fst-io-inl.h b/src/fstext/kaldi-fst-io-inl.h index 58895449c72..b6bae4b9dc9 100644 --- a/src/fstext/kaldi-fst-io-inl.h +++ b/src/fstext/kaldi-fst-io-inl.h @@ -42,13 +42,8 @@ void WriteFstKaldi(std::ostream &os, bool binary, // appear on its own line. os << '\n'; bool acceptor = false, write_one = false; -#if OPENFST_VER >= 10400 FstPrinter printer(t, t.InputSymbols(), t.OutputSymbols(), NULL, acceptor, write_one, "\t"); -#else - FstPrinter printer(t, t.InputSymbols(), t.OutputSymbols(), - NULL, acceptor, write_one); -#endif printer.Print(&os, ""); if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream"; diff --git a/src/fstext/lattice-utils-inl.h b/src/fstext/lattice-utils-inl.h index f15e8d2cc57..5bb40e3efa3 100644 --- a/src/fstext/lattice-utils-inl.h +++ b/src/fstext/lattice-utils-inl.h @@ -267,12 +267,8 @@ void ConvertFstToLattice( const ExpandedFst > &ifst, MutableFst > > *ofst) { int32 num_states_cache = 50000; -#if OPENFST_VER >= 10500 fst::CacheOptions cache_opts(true, num_states_cache); fst::MapFstOptions mapfst_opts(cache_opts); -#else - fst::CacheOptions mapfst_opts(true, num_states_cache); -#endif StdToLatticeMapper mapper; MapFst >, StdToLatticeMapper > map_fst(ifst, mapper, mapfst_opts); diff --git a/src/fstext/lattice-utils-test.cc b/src/fstext/lattice-utils-test.cc index 51df0ce8364..e74caef4aa2 100644 --- a/src/fstext/lattice-utils-test.cc +++ b/src/fstext/lattice-utils-test.cc @@ -30,11 +30,7 @@ template void TestConvert(bool invert) { VectorFst *fst = RandFst(); std::cout << "FST before converting to compact-arc is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst ofst; @@ -42,25 +38,17 @@ template void TestConvert(bool invert) { std::cout << "FST after converting is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst origfst; ConvertLattice(ofst, &origfst, invert); std::cout << "FST after back conversion is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(origfst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(origfst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } - + assert(RandEquivalent(*fst, origfst, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length-- max?*/)); delete fst; } @@ -78,11 +66,7 @@ template void TestShortestPath() { std::cout << "Testing shortest path\n"; std::cout << "FST before converting to compact-arc is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst cfst; @@ -96,8 +80,8 @@ template void TestShortestPath() { ShortestPath(*fst, &nbest_fst_2, 3); VectorFst nbest_fst_1b; ShortestPath(nbest_fst_2, &nbest_fst_1b, 1); - - + + assert(ApproxEqual(ShortestDistance(nbest_fst_1), ShortestDistance(nbest_fst_1b))); @@ -112,7 +96,7 @@ template void TestShortestPath() { ShortestPath(cfst, &nbest_fst_2, 3); VectorFst nbest_fst_1b; ShortestPath(nbest_fst_2, &nbest_fst_1b, 1); - + assert(ApproxEqual(ShortestDistance(nbest_fst_1), ShortestDistance(nbest_fst_1b))); // since semiring is idempotent, this should succeed too. @@ -122,7 +106,7 @@ template void TestShortestPath() { delete fst; } - } + } } @@ -132,7 +116,7 @@ template void TestConvert2() { typedef ArcTpl > ArcD; typedef ArcTpl, Int> > CArcF; typedef ArcTpl, Int> > CArcD; - + for(int i = 0; i < 2; i++) { { VectorFst *fst1 = RandPairFst(); @@ -197,7 +181,7 @@ template void TestConvert2() { assert(RandEquivalent(*fst1, fst2, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length-- max?*/)); delete fst1; } - + { VectorFst *fst1 = RandPairFst(); VectorFst cfst1; @@ -209,7 +193,7 @@ template void TestConvert2() { } } } - + // use TestConvertPair when the Weight can be constructed from // a pair of floats. @@ -239,7 +223,7 @@ template void TestConvertPair(bool invert) { fstprinter.Print(&std::cout, "standard output"); }*/ - assert(RandEquivalent(*fst, origfst, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length-- max?*/)); + assert(RandEquivalent(*fst, origfst, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length-- max?*/)); delete fst; } } @@ -268,7 +252,7 @@ template void TestScalePair(bool invert) { scale2[1][0] = -0.25; } - + typedef ArcTpl Arc; typedef ArcTpl > CompactArc; for(int i = 0; i < 2; i++) { @@ -331,7 +315,7 @@ int main() { } { typedef LatticeWeightTpl LatticeWeight; - TestShortestPath(); + TestShortestPath(); TestConvert2(); for(int i = 0; i < 2; i++) { bool invert = (i % 2); diff --git a/src/fstext/pre-determinize-test.cc b/src/fstext/pre-determinize-test.cc index 774507b0792..bea8120e0e5 100644 --- a/src/fstext/pre-determinize-test.cc +++ b/src/fstext/pre-determinize-test.cc @@ -36,12 +36,12 @@ template void TestPreDeterminize() { int n_syms = 2 + kaldi::Rand() % 5, n_states = 3 + kaldi::Rand() % 10, n_arcs = 5 + kaldi::Rand() % 30, n_final = 1 + kaldi::Rand()%3; // Up to 2 unique symbols. cout << "Testing pre-determinize with "< all_syms; // including epsilon. // Put symbols in the symbol table from 1..n_syms-1. for (size_t i = 0;i < (size_t)n_syms;i++) all_syms.push_back(i); - + // Create states. vector all_states; for (size_t i = 0;i < (size_t)n_states;i++) { @@ -69,11 +69,7 @@ template void TestPreDeterminize() { std::cout <<" printing before trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } // Trim resulting FST. @@ -81,11 +77,7 @@ template void TestPreDeterminize() { std::cout <<" printing after trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -99,11 +91,7 @@ template void TestPreDeterminize() { std::cout <<" printing after predeterminization\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -119,11 +107,7 @@ template void TestPreDeterminize() { std::cout <<" printing after epsilon removal\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -133,22 +117,14 @@ template void TestPreDeterminize() { Determinize(*fst, &ofst, opts); std::cout <<" printing after determinization\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } int64 num_removed = DeleteISymbols(&ofst, extra_syms); std::cout <<" printing after removing "<= 10400 FstPrinter fstprinter(ofst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -200,11 +176,7 @@ template void TestAddSelfLoops() { } std::cout <<" printing before adding self-loops\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, ilabels, olabels, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, ilabels, olabels, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -223,11 +195,7 @@ template void TestAddSelfLoops() { std::cout <<" printing after adding self-loops\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, ilabels, olabels, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, ilabels, olabels, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -247,5 +215,3 @@ int main() { fst::TestAddSelfLoops(); } } - - diff --git a/src/fstext/prune-special-test.cc b/src/fstext/prune-special-test.cc index cb55edca6cc..2da002d980e 100644 --- a/src/fstext/prune-special-test.cc +++ b/src/fstext/prune-special-test.cc @@ -37,11 +37,7 @@ static void TestPruneSpecial() { float beam = 0.55; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*ifst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*ifst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); std::cout << endl; } @@ -50,11 +46,7 @@ static void TestPruneSpecial() { VectorFst ofst1; PruneSpecial(*ifst, &ofst1, beam); { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst1, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst1, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); std::cout << endl; } @@ -63,11 +55,7 @@ static void TestPruneSpecial() { VectorFst ofst2; Prune(*ifst, &ofst2, beam); { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst2, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst2, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); std::cout << endl; } diff --git a/src/fstext/push-special-test.cc b/src/fstext/push-special-test.cc index 0106492e887..557b43d3062 100644 --- a/src/fstext/push-special-test.cc +++ b/src/fstext/push-special-test.cc @@ -37,14 +37,10 @@ static void TestPushSpecial() { VectorFst *fst = RandFst(); { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } - + VectorFst fst_copy(*fst); float delta = kDelta; @@ -59,11 +55,7 @@ static void TestPushSpecial() { { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst_copy, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(fst_copy, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } KALDI_LOG << "Min value is " << min.Value() << ", max value is " << max.Value(); @@ -71,7 +63,7 @@ static void TestPushSpecial() { // below, should be <= delta but different pieces of code compute this in this // part vs. push-special, so the roundoff may be different. KALDI_ASSERT(std::abs(min.Value() - max.Value()) <= 1.2 * delta); - + KALDI_ASSERT(RandEquivalent(*fst, fst_copy, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length-- max?*/)); delete fst; diff --git a/src/fstext/remove-eps-local-test.cc b/src/fstext/remove-eps-local-test.cc index 2c6c6f8d97f..af8b890cca8 100644 --- a/src/fstext/remove-eps-local-test.cc +++ b/src/fstext/remove-eps-local-test.cc @@ -82,11 +82,7 @@ template static void TestRemoveEpsLocal() { std::cout <<" printing after trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -99,11 +95,7 @@ template static void TestRemoveEpsLocal() { { std::cout << "copy1 = \n"; -#if OPENFST_VER >= 10400 FstPrinter fstprinter(fst_copy1, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(fst_copy1, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -126,7 +118,7 @@ static void TestRemoveEpsLocalSpecial() { typedef LogArc::StateId StateId; typedef LogArc Arc; VectorFst *logfst = RandFst(); - + { // Make the FST stochastic. for (StateId s = 0; s < logfst->NumStates(); s++) { Weight w = logfst->Final(s); @@ -148,11 +140,7 @@ static void TestRemoveEpsLocalSpecial() { #endif { std::cout << "logfst = \n"; -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*logfst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*logfst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -167,11 +155,7 @@ static void TestRemoveEpsLocalSpecial() { { std::cout << "logfst2 = \n"; -#if OPENFST_VER >= 10400 FstPrinter fstprinter(logfst2, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(logfst2, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } if (ApproxEqual(ShortestDistance(*logfst), ShortestDistance(logfst2))) { @@ -192,4 +176,3 @@ int main() { TestRemoveEpsLocalSpecial(); } } - diff --git a/src/fstext/table-matcher-test.cc b/src/fstext/table-matcher-test.cc index 0124fff4147..2d39fe957dd 100644 --- a/src/fstext/table-matcher-test.cc +++ b/src/fstext/table-matcher-test.cc @@ -63,21 +63,13 @@ template void TestTableMatcher(bool connect, bool left) { std::cout <<"Table-Composed FST\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(composed, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(composed, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } std::cout <<" Baseline-Composed FST\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(composed_baseline, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(composed_baseline, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -86,11 +78,7 @@ template void TestTableMatcher(bool connect, bool left) { Difference(composed, composed_baseline, &diff1); std::cout <<" Diff1 (composed - baseline) \n"; { -#if OPENFST_VER >= 10400 - FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true); -#endif + FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true, "\t"); fstprinter.Print(&std::cout, "standard output"); } @@ -99,11 +87,7 @@ template void TestTableMatcher(bool connect, bool left) { Difference(composed_baseline, composed, &diff2); std::cout <<" Diff2 (baseline - composed) \n"; { -#if OPENFST_VER >= 10400 - FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true); -#endif + FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true, "\t"); fstprinter.Print(&std::cout, "standard output"); } @@ -164,11 +148,7 @@ template void TestTableMatcherCacheLeft(bool connect) { Difference(composed, composed_baseline, &diff1); std::cout <<" Diff1 (composed - baseline) \n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -177,11 +157,7 @@ template void TestTableMatcherCacheLeft(bool connect) { Difference(composed_baseline, composed, &diff2); std::cout <<" Diff2 (baseline - composed) \n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -242,11 +218,7 @@ template void TestTableMatcherCacheRight(bool connect) { Difference(composed, composed_baseline, &diff1); std::cout <<" Diff1 (composed - baseline) \n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(diff1, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -255,11 +227,7 @@ template void TestTableMatcherCacheRight(bool connect) { Difference(composed_baseline, composed, &diff2); std::cout <<" Diff2 (baseline - composed) \n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(diff2, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } diff --git a/src/fstext/table-matcher.h b/src/fstext/table-matcher.h index 1a1b35d8c68..792fe98fe83 100644 --- a/src/fstext/table-matcher.h +++ b/src/fstext/table-matcher.h @@ -86,10 +86,6 @@ class TableMatcherImpl : public MatcherBase { virtual const FST &GetFst() const { return *fst_; } virtual ~TableMatcherImpl() { -#if OPENFST_VER >= 10500 -#else - assert(RefCount() == 0); -#endif vector *const empty = ((vector*)(NULL)) + 1; // special marker. for (size_t i = 0; i < tables_.size(); i++) { if (tables_[i] != NULL && tables_[i] != empty) @@ -222,26 +218,7 @@ class TableMatcherImpl : public MatcherBase { virtual uint64 Properties(uint64 props) const { return props; } // simple matcher that does // not change its FST, so properties are properties of FST it is applied to -#if OPENFST_VER >= 10500 -#else - int RefCount() const { - return ref_count_.count(); - } - - int IncrRefCount() { - return ref_count_.Incr(); - } - - int DecrRefCount() { - return ref_count_.Decr(); - } -#endif private: -#if OPENFST_VER >= 10500 -#else - RefCounter ref_count_; // Reference count -#endif - virtual void SetState_(StateId s) { SetState(s); } virtual bool Find_(Label label) { return Find(label); } virtual bool Done_() const { return Done(); } @@ -272,29 +249,22 @@ class TableMatcher : public MatcherBase { typedef StateId ArcId; // Use this type to store arc offsets [it's actually size_t // in the Seek function of ArcIterator, but StateId should be big enough]. typedef typename Arc::Weight Weight; - typedef TableMatcherImpl I; + typedef TableMatcherImpl Impl; TableMatcher(const FST &fst, MatchType match_type, - const TableMatcherOptions &opts = TableMatcherOptions()): - impl_(new I(fst, match_type, opts)) { } - - TableMatcher(const TableMatcher &matcher, bool safe): - impl_(matcher.impl_) { -#if OPENFST_VER >= 10500 -#else - impl_->IncrRefCount(); -#endif + const TableMatcherOptions &opts = TableMatcherOptions()) + : impl_(std::make_shared(fst, match_type, opts)) { } + + TableMatcher(const TableMatcher &matcher, + bool safe = false) + : impl_(matcher.impl_) { + if (safe == true) { + KALDI_ERR << "TableMatcher: Safe copy not supported"; + } } virtual const FST &GetFst() const { return impl_->GetFst(); } -#if OPENFST_VER >= 10500 -#else - virtual ~TableMatcher() { - if (!impl_->DecrRefCount()) delete impl_; - } -#endif - virtual MatchType Type(bool test) const { return impl_->Type(test); } void SetState(StateId s) { return impl_->SetState(s); } @@ -316,18 +286,15 @@ class TableMatcher : public MatcherBase { virtual uint64 Properties(uint64 props) const { return impl_->Properties(props); } // simple matcher that does // not change its FST, so properties are properties of FST it is applied to private: -#if OPENFST_VER >= 10500 - std::shared_ptr impl_; -#else - I *impl_; -#endif + std::shared_ptr impl_; virtual void SetState_(StateId s) { impl_->SetState(s); } virtual bool Find_(Label label) { return impl_->Find(label); } virtual bool Done_() const { return impl_->Done(); } virtual const Arc& Value_() const { return impl_->Value(); } virtual void Next_() { impl_->Next(); } - DISALLOW_COPY_AND_ASSIGN(TableMatcher); + + TableMatcher &operator=(const TableMatcher &) = delete; }; struct TableComposeOptions: public TableMatcherOptions { diff --git a/src/fstext/trivial-factor-weight-test.cc b/src/fstext/trivial-factor-weight-test.cc index fcf34b6834e..46b6aaf46fb 100644 --- a/src/fstext/trivial-factor-weight-test.cc +++ b/src/fstext/trivial-factor-weight-test.cc @@ -70,11 +70,7 @@ template void TestFactor() { std::cout <<" printing before trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } // Trim resulting FST. @@ -82,11 +78,7 @@ template void TestFactor() { std::cout <<" printing after trimming\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -97,11 +89,7 @@ template void TestFactor() { std::cout <<" printing after predeterminization\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -117,15 +105,11 @@ template void TestFactor() { std::cout <<" printing after double-epsilon removal\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst ofst_star; - + { printf("Converting to Gallic semiring"); VectorFst > gallic_fst; @@ -140,58 +124,33 @@ template void TestFactor() { { std::cout <<" printing gallic FST\n"; -#if OPENFST_VER >= 10400 FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter > fstprinter(gallic_fst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } // Map(ofst_star, &gallic_fst, ToGallicMapper()); - + printf("Converting gallic back to regular\n"); -#if OPENFST_VER >= 10400 TrivialFactorWeightFst< GallicArc, GallicFactor > fwfst(gallic_fst); -#else - TrivialFactorWeightFst< GallicArc, GallicFactor > fwfst(gallic_fst); -#endif { std::cout <<" printing factor-weight FST\n"; -#if OPENFST_VER >= 10400 FstPrinter > fstprinter(fwfst, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter > fstprinter(fwfst, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } -#if OPENFST_VER >= 10400 Map(fwfst, &ofst_star, FromGallicMapper()); -#else - Map(fwfst, &ofst_star, FromGallicMapper()); -#endif { std::cout <<" printing after converting back to regular FST\n"; -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst_star, sptr, sptr, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst > new_gallic_fst; -#if OPENFST_VER >= 10400 Map(ofst_star, &new_gallic_fst, ToGallicMapper()); -#else - Map(ofst_star, &new_gallic_fst, ToGallicMapper()); -#endif assert(RandEquivalent(gallic_fst, new_gallic_fst, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length-- max?*/)); @@ -251,5 +210,3 @@ int main() { fst::TestFactor(); } } - - diff --git a/src/fstext/trivial-factor-weight.h b/src/fstext/trivial-factor-weight.h index 3e42dd287db..f17ba4e2187 100644 --- a/src/fstext/trivial-factor-weight.h +++ b/src/fstext/trivial-factor-weight.h @@ -52,17 +52,8 @@ // This has the advantage that it always works, for any input (also I just // prefer this approach). -#ifdef _MSC_VER #include using std::unordered_map; -#elif __cplusplus > 199711L || defined(__GXX_EXPERIMENTAL_CXX0X__) -#include -using std::unordered_map; -#else -#include -using std::tr1::unordered_map; -#endif - #include #include @@ -117,10 +108,8 @@ class TrivialFactorWeightFstImpl typedef typename A::StateId StateId; typedef F FactorIterator; -#if OPENFST_VER >= 10400 typedef DefaultCacheStore Store; typedef typename Store::State State; -#endif struct Element { Element() {} @@ -157,10 +146,6 @@ class TrivialFactorWeightFstImpl SetOutputSymbols(impl.OutputSymbols()); } - ~TrivialFactorWeightFstImpl() { - delete fst_; - } - StateId Start() { if (!HasStart()) { StateId s = fst_->Start(); @@ -307,7 +292,7 @@ class TrivialFactorWeightFstImpl typedef unordered_map ElementMap; - const Fst *fst_; + std::unique_ptr> fst_; float delta_; uint32 mode_; // factoring arc and/or final weights Label extra_ilabel_; // ilabel of arc created when factoring final w's @@ -315,11 +300,10 @@ class TrivialFactorWeightFstImpl vector elements_; // mapping Fst state to Elements ElementMap element_map_; // mapping Elements to Fst state - void operator = (const TrivialFactorWeightFstImpl &); // disallow }; -/// FactorWeightFst takes as template parameter a FactorIterator as +/// TrivialFactorWeightFst takes as template parameter a FactorIterator as /// defined above. The result of weight factoring is a transducer /// equivalent to the input whose path weights have been factored /// according to the FactorIterator. States and transitions will be @@ -344,54 +328,36 @@ class TrivialFactorWeightFst : public ImplToFst< TrivialFactorWeightFstImpl= 10400 typedef DefaultCacheStore Store; typedef typename Store::State State; -#else - typedef CacheState State; -#endif typedef TrivialFactorWeightFstImpl Impl; - TrivialFactorWeightFst(const Fst &fst) -#if OPENFST_VER >= 10500 + explicit TrivialFactorWeightFst(const Fst &fst) : ImplToFst(std::make_shared(fst, TrivialFactorWeightOptions())) {} -#else - : ImplToFst(new Impl(fst, TrivialFactorWeightOptions())) {} -#endif TrivialFactorWeightFst(const Fst &fst, const TrivialFactorWeightOptions &opts) -#if OPENFST_VER >= 10500 : ImplToFst(std::make_shared(fst, opts)) {} -#else - : ImplToFst(new Impl(fst, opts)) {} -#endif // See Fst<>::Copy() for doc. TrivialFactorWeightFst(const TrivialFactorWeightFst &fst, bool copy) : ImplToFst(fst, copy) {} // Get a copy of this TrivialFactorWeightFst. See Fst<>::Copy() for further doc. - virtual TrivialFactorWeightFst *Copy(bool copy = false) const { + TrivialFactorWeightFst *Copy(bool copy = false) const override { return new TrivialFactorWeightFst(*this, copy); } - virtual inline void InitStateIterator(StateIteratorData *data) const; + inline void InitStateIterator(StateIteratorData *data) const override; - virtual void InitArcIterator(StateId s, ArcIteratorData *data) const { + void InitArcIterator(StateId s, ArcIteratorData *data) const override { GetMutableImpl()->InitArcIterator(s, data); } private: - // Makes visible to friends. -#if OPENFST_VER >= 10500 using ImplToFst::GetImpl; using ImplToFst::GetMutableImpl; -#else - const Impl *GetImpl() const { return ImplToFst::GetImpl(); } - Impl *GetMutableImpl() const { return ImplToFst::GetImpl(); } -#endif - void operator=(const TrivialFactorWeightFst &fst); // Disallow + TrivialFactorWeightFst &operator=(const TrivialFactorWeightFst &fst) = delete; }; @@ -413,18 +379,14 @@ class ArcIterator< TrivialFactorWeightFst > typedef typename A::StateId StateId; ArcIterator(const TrivialFactorWeightFst &fst, StateId s) - : CacheArcIterator< TrivialFactorWeightFst >(fst.GetMutableImpl(), s) { - if (!fst.GetImpl()->HasArcs(s)) - fst.GetMutableImpl()->Expand(s); + : CacheArcIterator< TrivialFactorWeightFst>(fst.GetMutableImpl(), s) { + if (!fst.GetImpl()->HasArcs(s)) fst.GetMutableImpl()->Expand(s); } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); }; -template inline -void TrivialFactorWeightFst::InitStateIterator(StateIteratorData *data) const -{ +template +inline void TrivialFactorWeightFst::InitStateIterator( + StateIteratorData *data) const { data->base = new StateIterator< TrivialFactorWeightFst >(*this); } diff --git a/src/kwsbin/kws-search.cc b/src/kwsbin/kws-search.cc index 1359e165240..c5f6edd525d 100644 --- a/src/kwsbin/kws-search.cc +++ b/src/kwsbin/kws-search.cc @@ -199,11 +199,7 @@ int main(int argc, char *argv[]) { Map(keyword, &keyword_fst, VectorFstToKwsLexicographicFstMapper()); Compose(keyword_fst, index, &result_fst); Project(&result_fst, PROJECT_OUTPUT); -#if OPENFST_VER >= 10500 Minimize(&result_fst, (KwsLexicographicFst *) nullptr, kDelta, true); -#else - Minimize(&result_fst); -#endif ShortestPath(result_fst, &result_fst, n_best); RmEpsilon(&result_fst); diff --git a/src/lat/determinize-lattice-pruned-test.cc b/src/lat/determinize-lattice-pruned-test.cc index c932e3c95de..f6684f0b5b5 100644 --- a/src/lat/determinize-lattice-pruned-test.cc +++ b/src/lat/determinize-lattice-pruned-test.cc @@ -37,7 +37,7 @@ template void TestDeterminizeLatticePruned() { typedef kaldi::int32 Int; typedef typename Arc::Weight Weight; typedef ArcTpl > CompactArc; - + for(int i = 0; i < 100; i++) { RandFstOptions opts; opts.n_states = 4; @@ -47,10 +47,10 @@ template void TestDeterminizeLatticePruned() { opts.weight_multiplier = 0.5; // impt for the randomly generated weights opts.acyclic = true; // to be exactly representable in float, - // or this test fails because numerical differences can cause symmetry in + // or this test fails because numerical differences can cause symmetry in // weights to be broken, which causes the wrong path to be chosen as far // as the string part is concerned. - + VectorFst *fst = RandPairFst(opts); bool sorted = TopSort(fst); @@ -59,14 +59,10 @@ template void TestDeterminizeLatticePruned() { ILabelCompare ilabel_comp; if (kaldi::Rand() % 2 == 0) ArcSort(fst, ilabel_comp); - + std::cout << "FST before lattice-determinizing is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst det_fst; @@ -79,11 +75,7 @@ template void TestDeterminizeLatticePruned() { std::cout << "FST after lattice-determinizing is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(det_fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(det_fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } KALDI_ASSERT(det_fst.Properties(kIDeterministic, true) & kIDeterministic); @@ -95,27 +87,19 @@ template void TestDeterminizeLatticePruned() { VectorFst pruned_fst(*fst); if (pruned_fst.NumStates() != 0) kaldi::PruneLattice(10.0, &pruned_fst); - + VectorFst compact_pruned_fst, compact_pruned_det_fst; ConvertLattice(pruned_fst, &compact_pruned_fst, false); std::cout << "Compact pruned FST is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(compact_pruned_fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(compact_pruned_fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } ConvertLattice(det_fst, &compact_pruned_det_fst, false); - + std::cout << "Compact version of determinized FST is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(compact_pruned_det_fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(compact_pruned_det_fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } @@ -138,22 +122,14 @@ template void TestDeterminizeLatticePruned2() { VectorFst *fst = RandPairFst(opts); std::cout << "FST before lattice-determinizing is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(*fst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } VectorFst ofst; DeterminizeLatticePruned(*fst, 10.0, &ofst); std::cout << "FST after lattice-determinizing is:\n"; { -#if OPENFST_VER >= 10400 FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true, "\t"); -#else - FstPrinter fstprinter(ofst, NULL, NULL, NULL, false, true); -#endif fstprinter.Print(&std::cout, "standard output"); } delete fst; diff --git a/src/lat/kaldi-lattice.cc b/src/lat/kaldi-lattice.cc index b44b12a5a23..744cc538462 100644 --- a/src/lat/kaldi-lattice.cc +++ b/src/lat/kaldi-lattice.cc @@ -75,15 +75,9 @@ bool WriteCompactLattice(std::ostream &os, bool binary, // on its own line. os << '\n'; bool acceptor = true, write_one = false; -#if OPENFST_VER >= 10400 fst::FstPrinter printer(t, t.InputSymbols(), t.OutputSymbols(), NULL, acceptor, write_one, "\t"); -#else - fst::FstPrinter printer(t, t.InputSymbols(), - t.OutputSymbols(), - NULL, acceptor, write_one); -#endif printer.Print(&os, ""); if (os.fail()) KALDI_WARN << "Stream failure detected."; @@ -131,7 +125,7 @@ class LatticeReader { if (col.size() > 5) { KALDI_WARN << "Reading lattice: bad line in FST: " << line; delete fst; - delete cfst; + delete cfst; return PairT(static_cast(NULL), static_cast(NULL)); } @@ -168,7 +162,7 @@ class LatticeReader { else fst->SetFinal(s, w); break; case 3: // 3 columns not ok for Lattice format; it's not an acceptor. - ok = false; + ok = false; break; case 4: ok = ConvertStringToInteger(col[1], &arc.nextstate) && @@ -253,7 +247,7 @@ class LatticeReader { SplitStringToVector(line, separator.c_str(), true, &col); if (col.empty()) break; } - return PairT(static_cast(NULL), + return PairT(static_cast(NULL), static_cast(NULL)); } } @@ -406,15 +400,9 @@ bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) { // on its own line. os << '\n'; bool acceptor = false, write_one = false; -#if OPENFST_VER >= 10400 fst::FstPrinter printer(t, t.InputSymbols(), t.OutputSymbols(), NULL, acceptor, write_one, "\t"); -#else - fst::FstPrinter printer(t, t.InputSymbols(), - t.OutputSymbols(), - NULL, acceptor, write_one); -#endif printer.Print(&os, ""); if (os.fail()) KALDI_WARN << "Stream failure detected."; @@ -511,7 +499,7 @@ bool LatticeHolder::Read(std::istream &is) { } else { return ReadLattice(is, true, &t_); } -} +} diff --git a/src/lat/push-lattice-test.cc b/src/lat/push-lattice-test.cc index ecd60501888..cc9ae827a86 100644 --- a/src/lat/push-lattice-test.cc +++ b/src/lat/push-lattice-test.cc @@ -90,23 +90,13 @@ void TestPushCompactLatticeWeights() { } if (!ApproxEqual(sum, LatticeWeight::One())) { { -#if OPENFST_VER >= 10400 fst::FstPrinter printer(clat2, NULL, NULL, NULL, true, true, "\t"); -#else - fst::FstPrinter printer(clat2, NULL, NULL, - NULL, true, true); -#endif printer.Print(&std::cerr, ""); } { -#if OPENFST_VER >= 10400 fst::FstPrinter printer(*clat, NULL, NULL, NULL, true, true, "\t"); -#else - fst::FstPrinter printer(*clat, NULL, NULL, - NULL, true, true); -#endif printer.Print(&std::cerr, ""); } KALDI_ERR << "Bad lattice being pushed."; diff --git a/src/latbin/lattice-compose.cc b/src/latbin/lattice-compose.cc index 365be941a85..b9b261f7d36 100644 --- a/src/latbin/lattice-compose.cc +++ b/src/latbin/lattice-compose.cc @@ -85,12 +85,8 @@ int main(int argc, char *argv[]) { if (phi_label > 0) PropagateFinal(phi_label, fst2); -#if OPENFST_VER >= 10500 fst::CacheOptions cache_opts(true, num_states_cache); fst::MapFstOptions mapfst_opts(cache_opts); -#else - fst::CacheOptions mapfst_opts(true, num_states_cache); -#endif fst::StdToLatticeMapper mapper; fst::MapFst > mapped_fst2(*fst2, mapper, mapfst_opts); diff --git a/src/latbin/lattice-lmrescore.cc b/src/latbin/lattice-lmrescore.cc index d60d5fe93e5..2e5406f75de 100644 --- a/src/latbin/lattice-lmrescore.cc +++ b/src/latbin/lattice-lmrescore.cc @@ -74,12 +74,8 @@ int main(int argc, char *argv[]) { // mapped_fst is the LM fst interpreted using the LatticeWeight semiring, // with all the cost on the first member of the pair (since it's a graph // weight). -#if OPENFST_VER >= 10500 fst::CacheOptions cache_opts(true, num_states_cache); fst::MapFstOptions mapfst_opts(cache_opts); -#else - fst::CacheOptions mapfst_opts(true, num_states_cache); -#endif fst::StdToLatticeMapper mapper; fst::MapFst > lm_fst(*std_lm_fst, mapper, mapfst_opts); From ab7feb015cca4fac3ea13b3e795bd0e9a607a912 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 6 Dec 2016 00:16:07 -0800 Subject: [PATCH 258/530] Remove obsolete OpenFst version checks in Darwin makefiles. --- src/makefiles/darwin_10_10.mk | 12 +++--------- src/makefiles/darwin_10_11.mk | 12 +++--------- src/makefiles/darwin_10_12.mk | 12 +++--------- src/makefiles/darwin_10_9.mk | 12 +++--------- 4 files changed, 12 insertions(+), 36 deletions(-) diff --git a/src/makefiles/darwin_10_10.mk b/src/makefiles/darwin_10_10.mk index 77d82708b1e..aeff69d4953 100644 --- a/src/makefiles/darwin_10_10.mk +++ b/src/makefiles/darwin_10_10.mk @@ -1,7 +1,7 @@ # makefiles/darwin_10_10.mk contains Darwin-specific rules for OS X 10.10.* ifndef FSTROOT -$(error FSTROOT not defined.) + $(error FSTROOT not defined.) endif DOUBLE_PRECISION = 0 @@ -17,7 +17,7 @@ CXXFLAGS += -msse -msse2 -Wall -I.. \ ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC + CXXFLAGS += -fPIC endif LDFLAGS = -g @@ -32,16 +32,10 @@ AR = ar COMPILER = $(shell $(CXX) -v 2>&1 ) ifeq ($(findstring clang,$(COMPILER)),clang) CXXFLAGS += -Wno-mismatched-tags - # Link with libstdc++ if we are building against OpenFst < 1.4 - OPENFST_VER_NUM := $(shell echo $(OPENFST_VER) | sed 's/\./ /g' | xargs printf "%d%02d%02d") - ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10400)","1") - CXXFLAGS += -stdlib=libstdc++ - LDFLAGS += -stdlib=libstdc++ - endif endif # We need to tell recent versions of g++ to allow vector conversions without # an explicit cast provided the vectors are of the same size. ifeq ($(findstring GCC,$(COMPILER)),GCC) - CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs + CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs endif diff --git a/src/makefiles/darwin_10_11.mk b/src/makefiles/darwin_10_11.mk index c3b11a49cfc..40ee3adf6d0 100644 --- a/src/makefiles/darwin_10_11.mk +++ b/src/makefiles/darwin_10_11.mk @@ -1,7 +1,7 @@ # makefiles/darwin_10_11.mk contains Darwin-specific rules for OS X 10.11.* ifndef FSTROOT -$(error FSTROOT not defined.) + $(error FSTROOT not defined.) endif DOUBLE_PRECISION = 0 @@ -17,7 +17,7 @@ CXXFLAGS += -msse -msse2 -Wall -I.. \ ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC + CXXFLAGS += -fPIC endif LDFLAGS = -g @@ -32,16 +32,10 @@ AR = ar COMPILER = $(shell $(CXX) -v 2>&1 ) ifeq ($(findstring clang,$(COMPILER)),clang) CXXFLAGS += -Wno-mismatched-tags - # Link with libstdc++ if we are building against OpenFst < 1.4 - OPENFST_VER_NUM := $(shell echo $(OPENFST_VER) | sed 's/\./ /g' | xargs printf "%d%02d%02d") - ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10400)","1") - CXXFLAGS += -stdlib=libstdc++ - LDFLAGS += -stdlib=libstdc++ - endif endif # We need to tell recent versions of g++ to allow vector conversions without # an explicit cast provided the vectors are of the same size. ifeq ($(findstring GCC,$(COMPILER)),GCC) - CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs + CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs endif diff --git a/src/makefiles/darwin_10_12.mk b/src/makefiles/darwin_10_12.mk index 46e05cc3427..10acd2d8577 100644 --- a/src/makefiles/darwin_10_12.mk +++ b/src/makefiles/darwin_10_12.mk @@ -1,7 +1,7 @@ # makefiles/darwin_10_12.mk contains Darwin-specific rules for OS X 10.12.* ifndef FSTROOT -$(error FSTROOT not defined.) + $(error FSTROOT not defined.) endif DOUBLE_PRECISION = 0 @@ -17,7 +17,7 @@ CXXFLAGS += -msse -msse2 -Wall -I.. \ ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC + CXXFLAGS += -fPIC endif LDFLAGS = -g @@ -32,16 +32,10 @@ AR = ar COMPILER = $(shell $(CXX) -v 2>&1 ) ifeq ($(findstring clang,$(COMPILER)),clang) CXXFLAGS += -Wno-mismatched-tags - # Link with libstdc++ if we are building against OpenFst < 1.4 - OPENFST_VER_NUM := $(shell echo $(OPENFST_VER) | sed 's/\./ /g' | xargs printf "%d%02d%02d") - ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10400)","1") - CXXFLAGS += -stdlib=libstdc++ - LDFLAGS += -stdlib=libstdc++ - endif endif # We need to tell recent versions of g++ to allow vector conversions without # an explicit cast provided the vectors are of the same size. ifeq ($(findstring GCC,$(COMPILER)),GCC) - CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs + CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs endif diff --git a/src/makefiles/darwin_10_9.mk b/src/makefiles/darwin_10_9.mk index f3e8817503e..ede1712e155 100644 --- a/src/makefiles/darwin_10_9.mk +++ b/src/makefiles/darwin_10_9.mk @@ -1,7 +1,7 @@ # makefiles/darwin_10_9.mk contains Darwin-specific rules for OS X 10.9.* ifndef FSTROOT -$(error FSTROOT not defined.) + $(error FSTROOT not defined.) endif DOUBLE_PRECISION = 0 @@ -17,7 +17,7 @@ CXXFLAGS += -msse -msse2 -Wall -I.. \ ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC + CXXFLAGS += -fPIC endif LDFLAGS = -g @@ -32,16 +32,10 @@ AR = ar COMPILER = $(shell $(CXX) -v 2>&1 ) ifeq ($(findstring clang,$(COMPILER)),clang) CXXFLAGS += -Wno-mismatched-tags - # Link with libstdc++ if we are building against OpenFst < 1.4 - OPENFST_VER_NUM := $(shell echo $(OPENFST_VER) | sed 's/\./ /g' | xargs printf "%d%02d%02d") - ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10400)","1") - CXXFLAGS += -stdlib=libstdc++ - LDFLAGS += -stdlib=libstdc++ - endif endif # We need to tell recent versions of g++ to allow vector conversions without # an explicit cast provided the vectors are of the same size. ifeq ($(findstring GCC,$(COMPILER)),GCC) - CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs + CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs endif From 1fd8e06e2a2c647eb64035706b21773d7a097357 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 6 Dec 2016 00:37:15 -0800 Subject: [PATCH 259/530] Remove support for OpenFst < 1.5.3 from tools/Makefile --- tools/INSTALL | 6 +- tools/Makefile | 40 +-- tools/extras/openfst-1.3.4.patch | 383 ------------------------ tools/extras/openfst-1.4.1.patch | 153 ---------- tools/extras/openfst_gcc41up.patch | 28 -- tools/extras/openfstwin-1.3.4.patch | 431 ---------------------------- 6 files changed, 10 insertions(+), 1031 deletions(-) delete mode 100644 tools/extras/openfst-1.3.4.patch delete mode 100644 tools/extras/openfst-1.4.1.patch delete mode 100644 tools/extras/openfst_gcc41up.patch delete mode 100644 tools/extras/openfstwin-1.3.4.patch diff --git a/tools/INSTALL b/tools/INSTALL index b13d45826bd..0678e2c8815 100644 --- a/tools/INSTALL +++ b/tools/INSTALL @@ -18,11 +18,9 @@ build by supplying the "-j" option to make, e.g. to use 4 CPUs: make -j 4 -By default, Kaldi builds against OpenFst-1.3.4. If you want to build against -OpenFst-1.4, edit the Makefile in this folder. Note that this change requires -a relatively new compiler with C++11 support, e.g. gcc >= 4.6, clang >= 3.0. +Kaldi builds against OpenFst >= 1.5.3 which requires a relatively new compiler +with C++11 support, e.g. gcc >= 4.6, clang >= 3.0. In extras/, there are also various scripts to install extra bits and pieces that are used by individual example scripts. If an example script needs you to run one of those scripts, it will tell you what to do. - diff --git a/tools/Makefile b/tools/Makefile index 0f5af6c7452..b6687ad1540 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -4,28 +4,14 @@ CXX = g++ # CXX = clang++ # Uncomment this line to build with Clang. CC = gcc # used for sph2pipe -OPENFST_VERSION = 1.3.4 -# Uncomment the next line to build with OpenFst-1.4.1. -# OPENFST_VERSION = 1.4.1 -# Uncomment the next line to build with OpenFst-1.5.4. -# OPENFST_VERSION = 1.5.4 -# Note: OpenFst >= 1.4 requires C++11 support, hence you will need to use a +# Note: OpenFst >= 1.5.3 requires C++11 support, hence you will need to use a # relatively recent C++ compiler, e.g. gcc >= 4.6, clang >= 3.0. +OPENFST_VERSION = 1.5.4 -# On Mac OS 10.9+, clang defaults to the new c++ standard library libc++. -# Since OpenFst-1.3 uses stuff from the tr1 namespace, we need to tell clang -# to use libstdc++ instead. -ifeq ($(OPENFST_VERSION), 1.3.4) - COMPILER = $(shell $(CXX) -v 2>&1 ) - ifeq ($(findstring clang,$(COMPILER)),clang) - CXXFLAGS += -stdlib=libstdc++ - LDFLAGS += -stdlib=libstdc++ - endif -else ifeq ($(OPENFST_VERSION), 1.4.1) -else ifeq ($(OPENFST_VERSION), 1.5.4) -else - $(error OpenFst version $(OPENFST_VERSION) is not supported. \ - Supported versions: 1.3.4, 1.4.1, 1.5.4) +OPENFST_VER_NUM := $(shell echo $(OPENFST_VERSION) | sed 's/\./ /g' | xargs printf "%d%02d%02d") +ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10503)","1") + $(error OpenFst-$(OPENFST_VERSION) is not supported. \ + Supported versions: >= 1.5.3) endif all: check_required_programs sph2pipe atlas sclite openfst @@ -78,7 +64,7 @@ openfst-$(OPENFST_VERSION)/lib: | openfst-$(OPENFST_VERSION)/Makefile # Add the -O flag to CXXFLAGS on cygwin as it can fix the compilation error # "file too big". -openfst-$(OPENFST_VERSION)/Makefile: openfst-$(OPENFST_VERSION)/.patched | check_required_programs +openfst-$(OPENFST_VERSION)/Makefile: openfst-$(OPENFST_VERSION) | check_required_programs # Note: OSTYPE path is probably dead for latest cygwin64 (installed on 2016/11/11). ifeq ($(OSTYPE),cygwin) cd openfst-$(OPENFST_VERSION)/; ./configure --prefix=`pwd` --enable-static --enable-shared --enable-far --enable-ngram-fsts CXX=$(CXX) CXXFLAGS="$(CXXFLAGS) -O -Wa,-mbig-obj" LDFLAGS="$(LDFLAGS)" LIBS="-ldl" @@ -93,16 +79,6 @@ else cd openfst-$(OPENFST_VERSION)/; ./configure --prefix=`pwd` --enable-static --enable-shared --enable-far --enable-ngram-fsts CXX=$(CXX) CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" LIBS="-ldl" endif -# patches for openfst. openfst_gcc41up.patch is a patch for openfst to -# support multi-threading when compiling with gcc >= 4.1. -openfst-$(OPENFST_VERSION)/.patched: | openfst-$(OPENFST_VERSION) -ifneq ($(OPENFST_VERSION), 1.5.4) - cd openfst-$(OPENFST_VERSION)/; \ - patch -p1 -N < ../extras/openfst-$(OPENFST_VERSION).patch; - $(CXX) -dumpversion | awk '{if(NR==1 && $$1>"4.1") print "cd openfst-$(OPENFST_VERSION)/src/include/fst; patch -c -p0 -N < ../../../../extras/openfst_gcc41up.patch"}' | sh - -endif - touch $@ - openfst-$(OPENFST_VERSION): openfst-$(OPENFST_VERSION).tar.gz tar xozf openfst-$(OPENFST_VERSION).tar.gz @@ -167,7 +143,7 @@ fortran_opt = $(shell gcc -v 2>&1 | perl -e '$$x = join(" ", ); if($$x =~ # note: you can uncomment the line that has USE_THREAD=1 and comment the line -# that has USE_THREADE=0 if you want Open Blas to use multiple threads. then +# that has USE_THREAD=0 if you want Open Blas to use multiple threads. then # you could set, for example, OPENBLAS_NUM_THREADS=2 in your path.sh so that the # runtime knows how many threads to use. Note: if you ever get the error # "Program is Terminated. Because you tried to allocate too many memory diff --git a/tools/extras/openfst-1.3.4.patch b/tools/extras/openfst-1.3.4.patch deleted file mode 100644 index bae0d6d7114..00000000000 --- a/tools/extras/openfst-1.3.4.patch +++ /dev/null @@ -1,383 +0,0 @@ ---- a/src/include/fst/interval-set.h -+++ b/src/include/fst/interval-set.h -@@ -37,38 +37,38 @@ template - class IntervalSet { - public: - struct Interval { -- T begin; -- T end; -+ T begin_; -+ T end_; - -- Interval() : begin(-1), end(-1) {} -+ Interval() : begin_(-1), end_(-1) {} - -- Interval(T b, T e) : begin(b), end(e) {} -+ Interval(T b, T e) : begin_(b), end_(e) {} - - bool operator<(const Interval &i) const { -- return begin < i.begin || (begin == i.begin && end > i.end); -+ return begin_ < i.begin_ || (begin_ == i.begin_ && end_ > i.end_); - } - - bool operator==(const Interval &i) const { -- return begin == i.begin && end == i.end; -+ return begin_ == i.begin_ && end_ == i.end_; - } - - bool operator!=(const Interval &i) const { -- return begin != i.begin || end != i.end; -+ return begin_ != i.begin_ || end_ != i.end_; - } - - istream &Read(istream &strm) { - T n; - ReadType(strm, &n); -- begin = n; -+ begin_ = n; - ReadType(strm, &n); -- end = n; -+ end_ = n; - return strm; - } - - ostream &Write(ostream &strm) const { -- T n = begin; -+ T n = begin_; - WriteType(strm, n); -- n = end; -+ n = end_; - WriteType(strm, n); - return strm; - } -@@ -108,7 +108,7 @@ class IntervalSet { - lower_bound(intervals_.begin(), intervals_.end(), interval); - if (lb == intervals_.begin()) - return false; -- return (--lb)->end > value; -+ return (--lb)->end_ > value; - } - - // Requires intervals be normalized. -@@ -123,7 +123,7 @@ class IntervalSet { - - bool Singleton() const { - return intervals_.size() == 1 && -- intervals_[0].begin + 1 == intervals_[0].end; -+ intervals_[0].begin_ + 1 == intervals_[0].end_; - } - - -@@ -178,17 +178,17 @@ void IntervalSet::Normalize() { - T size = 0; - for (T i = 0; i < intervals_.size(); ++i) { - Interval &inti = intervals_[i]; -- if (inti.begin == inti.end) -+ if (inti.begin_ == inti.end_) - continue; - for (T j = i + 1; j < intervals_.size(); ++j) { - Interval &intj = intervals_[j]; -- if (intj.begin > inti.end) -+ if (intj.begin_ > inti.end_) - break; -- if (intj.end > inti.end) -- inti.end = intj.end; -+ if (intj.end_ > inti.end_) -+ inti.end_ = intj.end_; - ++i; - } -- count_ += inti.end - inti.begin; -+ count_ += inti.end_ - inti.begin_; - intervals_[size++] = inti; - } - intervals_.resize(size); -@@ -208,17 +208,17 @@ void IntervalSet::Intersect(const IntervalSet &iset, - oset->count_ = 0; - - while (it1 != intervals_.end() && it2 != iintervals->end()) { -- if (it1->end <= it2->begin) { -+ if (it1->end_ <= it2->begin_) { - ++it1; -- } else if (it2->end <= it1->begin) { -+ } else if (it2->end_ <= it1->begin_) { - ++it2; - } else { - Interval interval; -- interval.begin = max(it1->begin, it2->begin); -- interval.end = min(it1->end, it2->end); -+ interval.begin_ = max(it1->begin_, it2->begin_); -+ interval.end_ = min(it1->end_, it2->end_); - ointervals->push_back(interval); -- oset->count_ += interval.end - interval.begin; -- if (it1->end < it2->end) -+ oset->count_ += interval.end_ - interval.begin_; -+ if (it1->end_ < it2->end_) - ++it1; - else - ++it2; -@@ -235,21 +235,21 @@ void IntervalSet::Complement(T maxval, IntervalSet *oset) const { - oset->count_ = 0; - - Interval interval; -- interval.begin = 0; -+ interval.begin_ = 0; - for (typename vector::const_iterator it = intervals_.begin(); - it != intervals_.end(); - ++it) { -- interval.end = min(it->begin, maxval); -- if (interval.begin < interval.end) { -+ interval.end_ = min(it->begin_, maxval); -+ if (interval.begin_ < interval.end_) { - ointervals->push_back(interval); -- oset->count_ += interval.end - interval.begin; -+ oset->count_ += interval.end_ - interval.begin_; - } -- interval.begin = it->end; -+ interval.begin_ = it->end_; - } -- interval.end = maxval; -- if (interval.begin < interval.end) { -+ interval.end_ = maxval; -+ if (interval.begin_ < interval.end_) { - ointervals->push_back(interval); -- oset->count_ += interval.end - interval.begin; -+ oset->count_ += interval.end_ - interval.begin_; - } - } - -@@ -263,7 +263,7 @@ void IntervalSet::Difference(const IntervalSet &iset, - oset->count_ = 0; - } else { - IntervalSet cset; -- iset.Complement(intervals_.back().end, &cset); -+ iset.Complement(intervals_.back().end_, &cset); - Intersect(cset, oset); - } - } -@@ -277,9 +277,9 @@ bool IntervalSet::Overlaps(const IntervalSet &iset) const { - typename vector::const_iterator it2 = intervals->begin(); - - while (it1 != intervals_.end() && it2 != intervals->end()) { -- if (it1->end <= it2->begin) { -+ if (it1->end_ <= it2->begin_) { - ++it1; -- } else if (it2->end <= it1->begin) { -+ } else if (it2->end_ <= it1->begin_) { - ++it2; - } else { - return true; -@@ -300,21 +300,21 @@ bool IntervalSet::StrictlyOverlaps(const IntervalSet &iset) const { - bool overlap = false; // point in both intervals_ and intervals - - while (it1 != intervals_.end() && it2 != intervals->end()) { -- if (it1->end <= it2->begin) { // no overlap - it1 first -+ if (it1->end_ <= it2->begin_) { // no overlap - it1 first - only1 = true; - ++it1; -- } else if (it2->end <= it1->begin) { // no overlap - it2 first -+ } else if (it2->end_ <= it1->begin_) { // no overlap - it2 first - only2 = true; - ++it2; -- } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals -+ } else if (it2->begin_ == it1->begin_ && it2->end_ == it1->end_) { // equals - overlap = true; - ++it1; - ++it2; -- } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2 -+ } else if (it2->begin_ <= it1->begin_ && it2->end_ >= it1->end_) { // 1 c 2 - only2 = true; - overlap = true; - ++it1; -- } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1 -+ } else if (it1->begin_ <= it2->begin_ && it1->end_ >= it2->end_) { // 2 c 1 - only1 = true; - overlap = true; - ++it2; -@@ -346,11 +346,11 @@ bool IntervalSet::Contains(const IntervalSet &iset) const { - typename vector::const_iterator it2 = intervals->begin(); - - while (it1 != intervals_.end() && it2 != intervals->end()) { -- if (it1->end <= it2->begin) { // no overlap - it1 first -+ if (it1->end_ <= it2->begin_) { // no overlap - it1 first - ++it1; -- } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C -+ } else if (it2->begin_ < it1->begin_ || it2->end_ > it1->end_) { // no C - return false; -- } else if (it2->end == it1->end) { -+ } else if (it2->end_ == it1->end_) { - ++it1; - ++it2; - } else { -@@ -370,7 +370,7 @@ ostream &operator<<(ostream &strm, const IntervalSet &s) { - ++it) { - if (it != intervals->begin()) - strm << ","; -- strm << "[" << it->begin << "," << it->end << ")"; -+ strm << "[" << it->begin_ << "," << it->end_ << ")"; - } - strm << "}"; - return strm; ---- a/src/include/fst/minimize.h -+++ b/src/include/fst/minimize.h -@@ -134,7 +134,14 @@ class CyclicMinimizer { - typedef typename A::Weight Weight; - typedef ReverseArc RevA; - -- CyclicMinimizer(const ExpandedFst& fst) { -+ CyclicMinimizer(const ExpandedFst& fst): -+ // tell the Partition data-member to expect multiple repeated -+ // calls to SplitOn with the same element if we are non-deterministic. -+ P_(fst.Properties(kIDeterministic, true) == 0) { -+ if(fst.Properties(kIDeterministic, true) == 0) -+ CHECK(Weight::Properties() & kIdempotent); // this minimization -+ // algorithm for non-deterministic FSTs can only work with idempotent -+ // semirings. - Initialize(fst); - Compute(fst); - } -@@ -315,7 +322,13 @@ class AcyclicMinimizer { - typedef typename A::StateId ClassId; - typedef typename A::Weight Weight; - -- AcyclicMinimizer(const ExpandedFst& fst) { -+ AcyclicMinimizer(const ExpandedFst& fst): -+ // tell the Partition data-member to expect multiple repeated -+ // calls to SplitOn with the same element if we are non-deterministic. -+ partition_(fst.Properties(kIDeterministic, true) == 0) { -+ if(fst.Properties(kIDeterministic, true) == 0) -+ CHECK(Weight::Properties() & kIdempotent); // minimization for -+ // non-deterministic FSTs can only work with idempotent semirings. - Initialize(fst); - Refine(fst); - } -@@ -531,13 +544,7 @@ template - void Minimize(MutableFst* fst, - MutableFst* sfst = 0, - float delta = kDelta) { -- uint64 props = fst->Properties(kAcceptor | kIDeterministic| -- kWeighted | kUnweighted, true); -- if (!(props & kIDeterministic)) { -- FSTERROR() << "FST is not deterministic"; -- fst->SetProperties(kError, kError); -- return; -- } -+ uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true); - - if (!(props & kAcceptor)) { // weighted transducer - VectorFst< GallicArc > gfst; ---- a/src/include/fst/partition.h -+++ b/src/include/fst/partition.h -@@ -43,8 +43,8 @@ class Partition { - friend class PartitionIterator; - - struct Element { -- Element() : value(0), next(0), prev(0) {} -- Element(T v) : value(v), next(0), prev(0) {} -+ Element() : value(0), next(0), prev(0) {} -+ Element(T v) : value(v), next(0), prev(0) {} - - T value; - Element* next; -@@ -52,9 +52,11 @@ class Partition { - }; - - public: -- Partition() {} -+ Partition(bool allow_repeated_split): -+ allow_repeated_split_(allow_repeated_split) {} - -- Partition(T num_states) { -+ Partition(bool allow_repeated_split, T num_states): -+ allow_repeated_split_(allow_repeated_split) { - Initialize(num_states); - } - -@@ -137,16 +139,16 @@ class Partition { - if (class_size_[class_id] == 1) return; - - // first time class is split -- if (split_size_[class_id] == 0) -+ if (split_size_[class_id] == 0) { - visited_classes_.push_back(class_id); -- -+ class_split_[class_id] = classes_[class_id]; -+ } - // increment size of split (set of element at head of chain) - split_size_[class_id]++; - - // update split point -- if (class_split_[class_id] == 0) -- class_split_[class_id] = classes_[class_id]; -- if (class_split_[class_id] == elements_[element_id]) -+ if (class_split_[class_id] != 0 -+ && class_split_[class_id] == elements_[element_id]) - class_split_[class_id] = elements_[element_id]->next; - - // move to head of chain in same class -@@ -157,24 +159,31 @@ class Partition { - // class indices of the newly created class. Returns the new_class id - // or -1 if no new class was created. - T SplitRefine(T class_id) { -+ -+ Element* split_el = class_split_[class_id]; - // only split if necessary -- if (class_size_[class_id] == split_size_[class_id]) { -- class_split_[class_id] = 0; -+ //if (class_size_[class_id] == split_size_[class_id]) { -+ if(split_el == NULL) { // we split on everything... - split_size_[class_id] = 0; - return -1; - } else { -- - T new_class = AddClass(); -+ -+ if(allow_repeated_split_) { // split_size_ is possibly -+ // inaccurate, so work it out exactly. -+ size_t split_count; Element *e; -+ for(split_count=0,e=classes_[class_id]; -+ e != split_el; split_count++, e=e->next); -+ split_size_[class_id] = split_count; -+ } - size_t remainder = class_size_[class_id] - split_size_[class_id]; - if (remainder < split_size_[class_id]) { // add smaller -- Element* split_el = class_split_[class_id]; - classes_[new_class] = split_el; -+ split_el->prev->next = 0; -+ split_el->prev = 0; - class_size_[class_id] = split_size_[class_id]; - class_size_[new_class] = remainder; -- split_el->prev->next = 0; -- split_el->prev = 0; - } else { -- Element* split_el = class_split_[class_id]; - classes_[new_class] = classes_[class_id]; - class_size_[class_id] = remainder; - class_size_[new_class] = split_size_[class_id]; -@@ -245,10 +254,16 @@ class Partition { - vector class_size_; - - // size of split for each class -+ // in the nondeterministic case, split_size_ is actually an upper -+ // bound on the size of split for each class. - vector split_size_; - - // set of visited classes to be used in split refine - vector visited_classes_; -+ -+ // true if input fst was deterministic: we can make -+ // certain assumptions in this case that speed up the algorithm. -+ bool allow_repeated_split_; - }; - - ---- a/src/script/text-io.cc -+++ b/src/script/text-io.cc -@@ -84,7 +84,7 @@ bool WritePotentials(const string& filename, - if (!*strm) - LOG(ERROR) << "WritePotentials: Write failed: " - << (filename.empty() ? "standard output" : filename); -- bool ret = *strm; -+ bool ret = !strm->fail(); - if (strm != &cout) - delete strm; - return ret; diff --git a/tools/extras/openfst-1.4.1.patch b/tools/extras/openfst-1.4.1.patch deleted file mode 100644 index 5889191d1a0..00000000000 --- a/tools/extras/openfst-1.4.1.patch +++ /dev/null @@ -1,153 +0,0 @@ ---- a/src/include/fst/minimize.h -+++ b/src/include/fst/minimize.h -@@ -134,7 +134,14 @@ class CyclicMinimizer { - typedef typename A::Weight Weight; - typedef ReverseArc RevA; - -- CyclicMinimizer(const ExpandedFst& fst) { -+ CyclicMinimizer(const ExpandedFst& fst): -+ // tell the Partition data-member to expect multiple repeated -+ // calls to SplitOn with the same element if we are non-deterministic. -+ P_(fst.Properties(kIDeterministic, true) == 0) { -+ if(fst.Properties(kIDeterministic, true) == 0) -+ CHECK(Weight::Properties() & kIdempotent); // this minimization -+ // algorithm for non-deterministic FSTs can only work with idempotent -+ // semirings. - Initialize(fst); - Compute(fst); - } -@@ -315,7 +322,13 @@ class AcyclicMinimizer { - typedef typename A::StateId ClassId; - typedef typename A::Weight Weight; - -- AcyclicMinimizer(const ExpandedFst& fst) { -+ AcyclicMinimizer(const ExpandedFst& fst): -+ // tell the Partition data-member to expect multiple repeated -+ // calls to SplitOn with the same element if we are non-deterministic. -+ partition_(fst.Properties(kIDeterministic, true) == 0) { -+ if(fst.Properties(kIDeterministic, true) == 0) -+ CHECK(Weight::Properties() & kIdempotent); // minimization for -+ // non-deterministic FSTs can only work with idempotent semirings. - Initialize(fst); - Refine(fst); - } -@@ -531,13 +544,7 @@ template - void Minimize(MutableFst* fst, - MutableFst* sfst = 0, - float delta = kDelta) { -- uint64 props = fst->Properties(kAcceptor | kIDeterministic| -- kWeighted | kUnweighted, true); -- if (!(props & kIDeterministic)) { -- FSTERROR() << "FST is not deterministic"; -- fst->SetProperties(kError, kError); -- return; -- } -+ uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true); - - if (!(props & kAcceptor)) { // weighted transducer - VectorFst< GallicArc > gfst; ---- a/src/include/fst/partition.h -+++ b/src/include/fst/partition.h -@@ -43,8 +43,8 @@ class Partition { - friend class PartitionIterator; - - struct Element { -- Element() : value(0), next(0), prev(0) {} -- Element(T v) : value(v), next(0), prev(0) {} -+ Element() : value(0), next(0), prev(0) {} -+ Element(T v) : value(v), next(0), prev(0) {} - - T value; - Element* next; -@@ -52,9 +52,11 @@ class Partition { - }; - - public: -- Partition() {} -+ Partition(bool allow_repeated_split): -+ allow_repeated_split_(allow_repeated_split) {} - -- Partition(T num_states) { -+ Partition(bool allow_repeated_split, T num_states): -+ allow_repeated_split_(allow_repeated_split) { - Initialize(num_states); - } - -@@ -137,16 +139,16 @@ class Partition { - if (class_size_[class_id] == 1) return; - - // first time class is split -- if (split_size_[class_id] == 0) -+ if (split_size_[class_id] == 0) { - visited_classes_.push_back(class_id); -- -+ class_split_[class_id] = classes_[class_id]; -+ } - // increment size of split (set of element at head of chain) - split_size_[class_id]++; - - // update split point -- if (class_split_[class_id] == 0) -- class_split_[class_id] = classes_[class_id]; -- if (class_split_[class_id] == elements_[element_id]) -+ if (class_split_[class_id] != 0 -+ && class_split_[class_id] == elements_[element_id]) - class_split_[class_id] = elements_[element_id]->next; - - // move to head of chain in same class -@@ -157,24 +159,31 @@ class Partition { - // class indices of the newly created class. Returns the new_class id - // or -1 if no new class was created. - T SplitRefine(T class_id) { -+ -+ Element* split_el = class_split_[class_id]; - // only split if necessary -- if (class_size_[class_id] == split_size_[class_id]) { -- class_split_[class_id] = 0; -+ //if (class_size_[class_id] == split_size_[class_id]) { -+ if(split_el == NULL) { // we split on everything... - split_size_[class_id] = 0; - return -1; - } else { -- - T new_class = AddClass(); -+ -+ if(allow_repeated_split_) { // split_size_ is possibly -+ // inaccurate, so work it out exactly. -+ size_t split_count; Element *e; -+ for(split_count=0,e=classes_[class_id]; -+ e != split_el; split_count++, e=e->next); -+ split_size_[class_id] = split_count; -+ } - size_t remainder = class_size_[class_id] - split_size_[class_id]; - if (remainder < split_size_[class_id]) { // add smaller -- Element* split_el = class_split_[class_id]; - classes_[new_class] = split_el; -+ split_el->prev->next = 0; -+ split_el->prev = 0; - class_size_[class_id] = split_size_[class_id]; - class_size_[new_class] = remainder; -- split_el->prev->next = 0; -- split_el->prev = 0; - } else { -- Element* split_el = class_split_[class_id]; - classes_[new_class] = classes_[class_id]; - class_size_[class_id] = remainder; - class_size_[new_class] = split_size_[class_id]; -@@ -245,10 +254,16 @@ class Partition { - vector class_size_; - - // size of split for each class -+ // in the nondeterministic case, split_size_ is actually an upper -+ // bound on the size of split for each class. - vector split_size_; - - // set of visited classes to be used in split refine - vector visited_classes_; -+ -+ // true if input fst was deterministic: we can make -+ // certain assumptions in this case that speed up the algorithm. -+ bool allow_repeated_split_; - }; - - diff --git a/tools/extras/openfst_gcc41up.patch b/tools/extras/openfst_gcc41up.patch deleted file mode 100644 index 2a47c9b9bd0..00000000000 --- a/tools/extras/openfst_gcc41up.patch +++ /dev/null @@ -1,28 +0,0 @@ -*** lock.h -*************** -*** 78,85 **** - RefCounter() : count_(1) {} - - int count() const { return count_; } -! int Incr() const { return ++count_; } -! int Decr() const { return --count_; } - - private: - mutable int count_; ---- 78,93 ---- - RefCounter() : count_(1) {} - - int count() const { return count_; } -! -! // below lines are modifications of openfst for multi-thrads support, -! // from tools/extras/openfst_gcc41up.patch, applied by tools/Makefile, -! // applicable to gcc 4.1 or above -! // int Incr() const { return ++count_; } -! // int Decr() const { return --count_; } -! -! int Incr() const { return __sync_add_and_fetch(&count_, 1); } -! int Decr() const { return __sync_sub_and_fetch(&count_, 1); } -! // end modifications - - private: - mutable int count_; diff --git a/tools/extras/openfstwin-1.3.4.patch b/tools/extras/openfstwin-1.3.4.patch deleted file mode 100644 index 9e624f4db32..00000000000 --- a/tools/extras/openfstwin-1.3.4.patch +++ /dev/null @@ -1,431 +0,0 @@ -diff --git a/src/include/fst/compat.h b/src/include/fst/compat.h -index 00e2dba..ff8bacc 100644 ---- a/src/include/fst/compat.h -+++ b/src/include/fst/compat.h -@@ -23,7 +23,9 @@ - #ifdef _MSC_VER //AddedPD - #include - typedef SSIZE_T ssize_t; -+#if _MSC_VER < 1900 //AddedYT -- Visual Studio 2016 already has snprintf - #define snprintf _snprintf -+#endif // _MSC_VER < 1900 - #define strtoll _strtoi64 - #ifndef OPENFSTEXPORT - #ifdef _DEBUG -@@ -37,7 +39,7 @@ typedef SSIZE_T ssize_t; - #pragma comment (lib, "openfst64.lib") - #else - #pragma comment (lib, "openfst.lib") -- #endif -+ #endif - #endif - #endif - #else -diff --git a/src/include/fst/interval-set.h b/src/include/fst/interval-set.h -index c4362f2..58cad44 100644 ---- a/src/include/fst/interval-set.h -+++ b/src/include/fst/interval-set.h -@@ -37,38 +37,38 @@ template - class IntervalSet { - public: - struct Interval { -- T begin; -- T end; -+ T begin_; -+ T end_; - -- Interval() : begin(-1), end(-1) {} -+ Interval() : begin_(-1), end_(-1) {} - -- Interval(T b, T e) : begin(b), end(e) {} -+ Interval(T b, T e) : begin_(b), end_(e) {} - - bool operator<(const Interval &i) const { -- return begin < i.begin || (begin == i.begin && end > i.end); -+ return begin_ < i.begin_ || (begin_ == i.begin_ && end_ > i.end_); - } - - bool operator==(const Interval &i) const { -- return begin == i.begin && end == i.end; -+ return begin_ == i.begin_ && end_ == i.end_; - } - - bool operator!=(const Interval &i) const { -- return begin != i.begin || end != i.end; -+ return begin_ != i.begin_ || end_ != i.end_; - } - - istream &Read(istream &strm) { - T n; - ReadType(strm, &n); -- begin = n; -+ begin_ = n; - ReadType(strm, &n); -- end = n; -+ end_ = n; - return strm; - } - - ostream &Write(ostream &strm) const { -- T n = begin; -+ T n = begin_; - WriteType(strm, n); -- n = end; -+ n = end_; - WriteType(strm, n); - return strm; - } -@@ -108,7 +108,7 @@ class IntervalSet { - lower_bound(intervals_.begin(), intervals_.end(), interval); - if (lb == intervals_.begin()) - return false; -- return (--lb)->end > value; -+ return (--lb)->end_ > value; - } - - // Requires intervals be normalized. -@@ -123,7 +123,7 @@ class IntervalSet { - - bool Singleton() const { - return intervals_.size() == 1 && -- intervals_[0].begin + 1 == intervals_[0].end; -+ intervals_[0].begin_ + 1 == intervals_[0].end_; - } - - -@@ -178,17 +178,17 @@ void IntervalSet::Normalize() { - T size = 0; - for (T i = 0; i < intervals_.size(); ++i) { - Interval &inti = intervals_[i]; -- if (inti.begin == inti.end) -+ if (inti.begin_ == inti.end_) - continue; - for (T j = i + 1; j < intervals_.size(); ++j) { - Interval &intj = intervals_[j]; -- if (intj.begin > inti.end) -+ if (intj.begin_ > inti.end_) - break; -- if (intj.end > inti.end) -- inti.end = intj.end; -+ if (intj.end_ > inti.end_) -+ inti.end_ = intj.end_; - ++i; - } -- count_ += inti.end - inti.begin; -+ count_ += inti.end_ - inti.begin_; - intervals_[size++] = inti; - } - intervals_.resize(size); -@@ -208,17 +208,17 @@ void IntervalSet::Intersect(const IntervalSet &iset, - oset->count_ = 0; - - while (it1 != intervals_.end() && it2 != iintervals->end()) { -- if (it1->end <= it2->begin) { -+ if (it1->end_ <= it2->begin_) { - ++it1; -- } else if (it2->end <= it1->begin) { -+ } else if (it2->end_ <= it1->begin_) { - ++it2; - } else { - Interval interval; -- interval.begin = max(it1->begin, it2->begin); -- interval.end = min(it1->end, it2->end); -+ interval.begin_ = max(it1->begin_, it2->begin_); -+ interval.end_ = min(it1->end_, it2->end_); - ointervals->push_back(interval); -- oset->count_ += interval.end - interval.begin; -- if (it1->end < it2->end) -+ oset->count_ += interval.end_ - interval.begin_; -+ if (it1->end_ < it2->end_) - ++it1; - else - ++it2; -@@ -235,21 +235,21 @@ void IntervalSet::Complement(T maxval, IntervalSet *oset) const { - oset->count_ = 0; - - Interval interval; -- interval.begin = 0; -+ interval.begin_ = 0; - for (typename vector::const_iterator it = intervals_.begin(); - it != intervals_.end(); - ++it) { -- interval.end = min(it->begin, maxval); -- if (interval.begin < interval.end) { -+ interval.end_ = min(it->begin_, maxval); -+ if (interval.begin_ < interval.end_) { - ointervals->push_back(interval); -- oset->count_ += interval.end - interval.begin; -+ oset->count_ += interval.end_ - interval.begin_; - } -- interval.begin = it->end; -+ interval.begin_ = it->end_; - } -- interval.end = maxval; -- if (interval.begin < interval.end) { -+ interval.end_ = maxval; -+ if (interval.begin_ < interval.end_) { - ointervals->push_back(interval); -- oset->count_ += interval.end - interval.begin; -+ oset->count_ += interval.end_ - interval.begin_; - } - } - -@@ -263,7 +263,7 @@ void IntervalSet::Difference(const IntervalSet &iset, - oset->count_ = 0; - } else { - IntervalSet cset; -- iset.Complement(intervals_.back().end, &cset); -+ iset.Complement(intervals_.back().end_, &cset); - Intersect(cset, oset); - } - } -@@ -277,9 +277,9 @@ bool IntervalSet::Overlaps(const IntervalSet &iset) const { - typename vector::const_iterator it2 = intervals->begin(); - - while (it1 != intervals_.end() && it2 != intervals->end()) { -- if (it1->end <= it2->begin) { -+ if (it1->end_ <= it2->begin_) { - ++it1; -- } else if (it2->end <= it1->begin) { -+ } else if (it2->end_ <= it1->begin_) { - ++it2; - } else { - return true; -@@ -300,21 +300,21 @@ bool IntervalSet::StrictlyOverlaps(const IntervalSet &iset) const { - bool overlap = false; // point in both intervals_ and intervals - - while (it1 != intervals_.end() && it2 != intervals->end()) { -- if (it1->end <= it2->begin) { // no overlap - it1 first -+ if (it1->end_ <= it2->begin_) { // no overlap - it1 first - only1 = true; - ++it1; -- } else if (it2->end <= it1->begin) { // no overlap - it2 first -+ } else if (it2->end_ <= it1->begin_) { // no overlap - it2 first - only2 = true; - ++it2; -- } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals -+ } else if (it2->begin_ == it1->begin_ && it2->end_ == it1->end_) { // equals - overlap = true; - ++it1; - ++it2; -- } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2 -+ } else if (it2->begin_ <= it1->begin_ && it2->end_ >= it1->end_) { // 1 c 2 - only2 = true; - overlap = true; - ++it1; -- } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1 -+ } else if (it1->begin_ <= it2->begin_ && it1->end_ >= it2->end_) { // 2 c 1 - only1 = true; - overlap = true; - ++it2; -@@ -346,11 +346,11 @@ bool IntervalSet::Contains(const IntervalSet &iset) const { - typename vector::const_iterator it2 = intervals->begin(); - - while (it1 != intervals_.end() && it2 != intervals->end()) { -- if (it1->end <= it2->begin) { // no overlap - it1 first -+ if (it1->end_ <= it2->begin_) { // no overlap - it1 first - ++it1; -- } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C -+ } else if (it2->begin_ < it1->begin_ || it2->end_ > it1->end_) { // no C - return false; -- } else if (it2->end == it1->end) { -+ } else if (it2->end_ == it1->end_) { - ++it1; - ++it2; - } else { -@@ -370,7 +370,7 @@ ostream &operator<<(ostream &strm, const IntervalSet &s) { - ++it) { - if (it != intervals->begin()) - strm << ","; -- strm << "[" << it->begin << "," << it->end << ")"; -+ strm << "[" << it->begin_ << "," << it->end_ << ")"; - } - strm << "}"; - return strm; -diff --git a/src/include/fst/label-reachable.h b/src/include/fst/label-reachable.h -index a7c3360..491ef7d 100644 ---- a/src/include/fst/label-reachable.h -+++ b/src/include/fst/label-reachable.h -@@ -359,9 +359,9 @@ class LabelReachable { - iiter = intervals->begin(); - iiter != intervals->end(); ++iiter) { - begin_low = LowerBound(aiter, end_low, aiter_end, -- aiter_input, iiter->begin); -+ aiter_input, iiter->begin_); - end_low = LowerBound(aiter, begin_low, aiter_end, -- aiter_input, iiter->end); -+ aiter_input, iiter->end_); - if (end_low - begin_low > 0) { - if (reach_begin_ < 0) - reach_begin_ = begin_low; -diff --git a/src/include/fst/minimize.h b/src/include/fst/minimize.h -index 3fbe3ba..6e9dd3d 100644 ---- a/src/include/fst/minimize.h -+++ b/src/include/fst/minimize.h -@@ -134,7 +134,14 @@ class CyclicMinimizer { - typedef typename A::Weight Weight; - typedef ReverseArc RevA; - -- CyclicMinimizer(const ExpandedFst& fst) { -+ CyclicMinimizer(const ExpandedFst& fst): -+ // tell the Partition data-member to expect multiple repeated -+ // calls to SplitOn with the same element if we are non-deterministic. -+ P_(fst.Properties(kIDeterministic, true) == 0) { -+ if(fst.Properties(kIDeterministic, true) == 0) -+ CHECK(Weight::Properties() & kIdempotent); // this minimization -+ // algorithm for non-deterministic FSTs can only work with idempotent -+ // semirings. - Initialize(fst); - Compute(fst); - } -@@ -315,7 +322,13 @@ class AcyclicMinimizer { - typedef typename A::StateId ClassId; - typedef typename A::Weight Weight; - -- AcyclicMinimizer(const ExpandedFst& fst) { -+ AcyclicMinimizer(const ExpandedFst& fst): -+ // tell the Partition data-member to expect multiple repeated -+ // calls to SplitOn with the same element if we are non-deterministic. -+ partition_(fst.Properties(kIDeterministic, true) == 0) { -+ if(fst.Properties(kIDeterministic, true) == 0) -+ CHECK(Weight::Properties() & kIdempotent); // minimization for -+ // non-deterministic FSTs can only work with idempotent semirings. - Initialize(fst); - Refine(fst); - } -@@ -531,13 +544,7 @@ template - void Minimize(MutableFst* fst, - MutableFst* sfst = 0, - float delta = kDelta) { -- uint64 props = fst->Properties(kAcceptor | kIDeterministic| -- kWeighted | kUnweighted, true); -- if (!(props & kIDeterministic)) { -- FSTERROR() << "FST is not deterministic"; -- fst->SetProperties(kError, kError); -- return; -- } -+ uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true); - - if (!(props & kAcceptor)) { // weighted transducer - VectorFst< GallicArc > gfst; -diff --git a/src/include/fst/partition.h b/src/include/fst/partition.h -index dcee67b..40b849a 100644 ---- a/src/include/fst/partition.h -+++ b/src/include/fst/partition.h -@@ -43,8 +43,8 @@ class Partition { - friend class PartitionIterator; - - struct Element { -- Element() : value(0), next(0), prev(0) {} -- Element(T v) : value(v), next(0), prev(0) {} -+ Element() : value(0), next(0), prev(0) {} -+ Element(T v) : value(v), next(0), prev(0) {} - - T value; - Element* next; -@@ -52,9 +52,11 @@ class Partition { - }; - - public: -- Partition() {} -+ Partition(bool allow_repeated_split): -+ allow_repeated_split_(allow_repeated_split) {} - -- Partition(T num_states) { -+ Partition(bool allow_repeated_split, T num_states): -+ allow_repeated_split_(allow_repeated_split) { - Initialize(num_states); - } - -@@ -137,16 +139,16 @@ class Partition { - if (class_size_[class_id] == 1) return; - - // first time class is split -- if (split_size_[class_id] == 0) -+ if (split_size_[class_id] == 0) { - visited_classes_.push_back(class_id); -- -+ class_split_[class_id] = classes_[class_id]; -+ } - // increment size of split (set of element at head of chain) - split_size_[class_id]++; -- -+ - // update split point -- if (class_split_[class_id] == 0) -- class_split_[class_id] = classes_[class_id]; -- if (class_split_[class_id] == elements_[element_id]) -+ if (class_split_[class_id] != 0 -+ && class_split_[class_id] == elements_[element_id]) - class_split_[class_id] = elements_[element_id]->next; - - // move to head of chain in same class -@@ -157,24 +159,31 @@ class Partition { - // class indices of the newly created class. Returns the new_class id - // or -1 if no new class was created. - T SplitRefine(T class_id) { -+ -+ Element* split_el = class_split_[class_id]; - // only split if necessary -- if (class_size_[class_id] == split_size_[class_id]) { -- class_split_[class_id] = 0; -+ //if (class_size_[class_id] == split_size_[class_id]) { -+ if(split_el == NULL) { // we split on everything... - split_size_[class_id] = 0; - return -1; - } else { -- - T new_class = AddClass(); -+ -+ if(allow_repeated_split_) { // split_size_ is possibly -+ // inaccurate, so work it out exactly. -+ size_t split_count; Element *e; -+ for(split_count=0,e=classes_[class_id]; -+ e != split_el; split_count++, e=e->next); -+ split_size_[class_id] = split_count; -+ } - size_t remainder = class_size_[class_id] - split_size_[class_id]; - if (remainder < split_size_[class_id]) { // add smaller -- Element* split_el = class_split_[class_id]; - classes_[new_class] = split_el; -- class_size_[class_id] = split_size_[class_id]; -- class_size_[new_class] = remainder; - split_el->prev->next = 0; - split_el->prev = 0; -+ class_size_[class_id] = split_size_[class_id]; -+ class_size_[new_class] = remainder; - } else { -- Element* split_el = class_split_[class_id]; - classes_[new_class] = classes_[class_id]; - class_size_[class_id] = remainder; - class_size_[new_class] = split_size_[class_id]; -@@ -245,10 +254,16 @@ class Partition { - vector class_size_; - - // size of split for each class -+ // in the nondeterministic case, split_size_ is actually an upper -+ // bound on the size of split for each class. - vector split_size_; - - // set of visited classes to be used in split refine - vector visited_classes_; -+ -+ // true if input fst was deterministic: we can make -+ // certain assumptions in this case that speed up the algorithm. -+ bool allow_repeated_split_; - }; - - -diff --git a/src/include/fst/state-reachable.h b/src/include/fst/state-reachable.h -index 6d0c971..1da922e 100644 ---- a/src/include/fst/state-reachable.h -+++ b/src/include/fst/state-reachable.h -@@ -112,7 +112,7 @@ class IntervalReachVisitor { - void FinishState(StateId s, StateId p, const A *arc) { - if (index_ >= 0 && fst_.Final(s) != Weight::Zero()) { - vector *intervals = (*isets_)[s].Intervals(); -- (*intervals)[0].end = index_; // Update tree interval end -+ (*intervals)[0].end_ = index_; // Update tree interval end - } - (*isets_)[s].Normalize(); - if (p != kNoStateId) From 9d0b8a2426d03b3a0e321f33dfcaf80b4854e739 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 6 Dec 2016 12:03:07 -0800 Subject: [PATCH 260/530] Fix tools/Makefile to resolve travis failure. --- tools/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/Makefile b/tools/Makefile index b6687ad1540..787a69e90f5 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -56,8 +56,7 @@ openfst: openfst_compiled openfst-$(OPENFST_VERSION)/lib .PHONY: openfst_compiled openfst_compiled: openfst-$(OPENFST_VERSION)/Makefile - cd openfst-$(OPENFST_VERSION)/ && \ - $(MAKE) install + $(MAKE) -C openfst-$(OPENFST_VERSION) install MAKEOVERRIDES= openfst-$(OPENFST_VERSION)/lib: | openfst-$(OPENFST_VERSION)/Makefile -cd openfst-$(OPENFST_VERSION) && [ -d lib64 ] && [ ! -d lib ] && ln -s lib64 lib From f06506b65bd7131ce421b62181a7175b0559295c Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 6 Dec 2016 15:54:15 -0800 Subject: [PATCH 261/530] Fix src/configure and add C++11 flag to makefiles/*.mk --- src/configure | 1 - src/makefiles/cygwin.mk | 3 +-- src/makefiles/darwin_10_10.mk | 19 +++++++++---------- src/makefiles/darwin_10_11.mk | 19 +++++++++---------- src/makefiles/darwin_10_12.mk | 19 +++++++++---------- src/makefiles/darwin_10_5.mk | 9 ++++----- src/makefiles/darwin_10_6.mk | 9 ++++----- src/makefiles/darwin_10_7.mk | 9 ++++----- src/makefiles/darwin_10_8.mk | 9 ++++----- src/makefiles/darwin_10_9.mk | 19 +++++++++---------- src/makefiles/linux_atlas.mk | 4 ++-- src/makefiles/linux_atlas_arm.mk | 4 ++-- src/makefiles/linux_clapack.mk | 4 ++-- src/makefiles/linux_clapack_arm.mk | 4 ++-- src/makefiles/linux_openblas.mk | 9 ++++----- src/makefiles/linux_openblas_arm.mk | 9 ++++----- src/makefiles/linux_x86_64_mkl.mk | 6 +++--- 17 files changed, 72 insertions(+), 84 deletions(-) diff --git a/src/configure b/src/configure index 736689dc868..d3e9d63760f 100755 --- a/src/configure +++ b/src/configure @@ -876,7 +876,6 @@ fi OPENFST_VER="${OPENFST_VER:-`grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::'`}" echo "OPENFST_VER = $OPENFST_VER" >> kaldi.mk -echo "CXXFLAGS += -std=c++0x" >> kaldi.mk # Most of the OS-specific steps below will append to kaldi.mk echo "Doing OS specific configurations ..." diff --git a/src/makefiles/cygwin.mk b/src/makefiles/cygwin.mk index 6da982e20a4..e8f926ab986 100644 --- a/src/makefiles/cygwin.mk +++ b/src/makefiles/cygwin.mk @@ -10,7 +10,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ -I ../../tools/CLAPACK/ \ -I $(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ + -std=c++0x $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) @@ -24,4 +24,3 @@ CXX = g++ CC = g++ RANLIB = ranlib AR = ar - diff --git a/src/makefiles/darwin_10_10.mk b/src/makefiles/darwin_10_10.mk index aeff69d4953..498180c6f99 100644 --- a/src/makefiles/darwin_10_10.mk +++ b/src/makefiles/darwin_10_10.mk @@ -5,14 +5,13 @@ ifndef FSTROOT endif DOUBLE_PRECISION = 0 -CXXFLAGS += -msse -msse2 -Wall -I.. \ - -pthread \ +CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Winit-self \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ + -std=c++0x $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID @@ -20,13 +19,6 @@ ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif -LDFLAGS = -g -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++ -CC = $(CXX) -RANLIB = ranlib -AR = ar - # Add no-mismatched-tags flag to suppress the annoying clang warnings # that are perfectly valid per spec. COMPILER = $(shell $(CXX) -v 2>&1 ) @@ -39,3 +31,10 @@ endif ifeq ($(findstring GCC,$(COMPILER)),GCC) CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs endif + +LDFLAGS = -g +LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate +CXX = g++ +CC = $(CXX) +RANLIB = ranlib +AR = ar diff --git a/src/makefiles/darwin_10_11.mk b/src/makefiles/darwin_10_11.mk index 40ee3adf6d0..a2bd5ad028a 100644 --- a/src/makefiles/darwin_10_11.mk +++ b/src/makefiles/darwin_10_11.mk @@ -5,14 +5,13 @@ ifndef FSTROOT endif DOUBLE_PRECISION = 0 -CXXFLAGS += -msse -msse2 -Wall -I.. \ - -pthread \ +CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Winit-self \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) -Wno-unused-local-typedef \ + -std=c++0x $(EXTRA_CXXFLAGS) -Wno-unused-local-typedef \ -g # -O0 -DKALDI_PARANOID @@ -20,13 +19,6 @@ ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif -LDFLAGS = -g -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++ -CC = $(CXX) -RANLIB = ranlib -AR = ar - # Add no-mismatched-tags flag to suppress the annoying clang warnings # that are perfectly valid per spec. COMPILER = $(shell $(CXX) -v 2>&1 ) @@ -39,3 +31,10 @@ endif ifeq ($(findstring GCC,$(COMPILER)),GCC) CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs endif + +LDFLAGS = -g +LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate +CXX = g++ +CC = $(CXX) +RANLIB = ranlib +AR = ar diff --git a/src/makefiles/darwin_10_12.mk b/src/makefiles/darwin_10_12.mk index 10acd2d8577..946788a3db0 100644 --- a/src/makefiles/darwin_10_12.mk +++ b/src/makefiles/darwin_10_12.mk @@ -5,14 +5,13 @@ ifndef FSTROOT endif DOUBLE_PRECISION = 0 -CXXFLAGS += -msse -msse2 -Wall -I.. \ - -pthread \ +CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Winit-self \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) -Wno-unused-local-typedef \ + -std=c++0x $(EXTRA_CXXFLAGS) -Wno-unused-local-typedef \ -g # -O0 -DKALDI_PARANOID @@ -20,13 +19,6 @@ ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif -LDFLAGS = -g -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++ -CC = $(CXX) -RANLIB = ranlib -AR = ar - # Add no-mismatched-tags flag to suppress the annoying clang warnings # that are perfectly valid per spec. COMPILER = $(shell $(CXX) -v 2>&1 ) @@ -39,3 +31,10 @@ endif ifeq ($(findstring GCC,$(COMPILER)),GCC) CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs endif + +LDFLAGS = -g +LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate +CXX = g++ +CC = $(CXX) +RANLIB = ranlib +AR = ar diff --git a/src/makefiles/darwin_10_5.mk b/src/makefiles/darwin_10_5.mk index 5a1353b3893..6f3e6605226 100644 --- a/src/makefiles/darwin_10_5.mk +++ b/src/makefiles/darwin_10_5.mk @@ -1,22 +1,21 @@ # makefiles/darwin_10_5.mk contains Darwin-specific rules for OS X 10.5.* ifndef FSTROOT -$(error FSTROOT not defined.) + $(error FSTROOT not defined.) endif DOUBLE_PRECISION = 0 -CXXFLAGS += -msse -msse2 -Wall -I.. \ - -pthread \ +CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Winit-self \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ + -std=c++0x $(EXTRA_CXXFLAGS) \ -gdwarf-2 # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC + CXXFLAGS += -fPIC endif LDFLAGS = -gdwarf-2 diff --git a/src/makefiles/darwin_10_6.mk b/src/makefiles/darwin_10_6.mk index 50883335a9d..10398326126 100644 --- a/src/makefiles/darwin_10_6.mk +++ b/src/makefiles/darwin_10_6.mk @@ -1,22 +1,21 @@ # makefiles/darwin_10_6.mk contains Darwin-specific rules for OS X 10.6.* ifndef FSTROOT -$(error FSTROOT not defined.) + $(error FSTROOT not defined.) endif DOUBLE_PRECISION = 0 -CXXFLAGS += -msse -msse2 -Wall -I.. \ - -pthread \ +CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Winit-self \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ + -std=c++0x $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC + CXXFLAGS += -fPIC endif LDFLAGS = -g -rdynamic diff --git a/src/makefiles/darwin_10_7.mk b/src/makefiles/darwin_10_7.mk index ad5a153f5a9..fd491a91968 100644 --- a/src/makefiles/darwin_10_7.mk +++ b/src/makefiles/darwin_10_7.mk @@ -1,23 +1,22 @@ # makefiles/darwin_10_6.mk contains Darwin-specific rules for OS X 10.7.* ifndef FSTROOT -$(error FSTROOT not defined.) + $(error FSTROOT not defined.) endif DOUBLE_PRECISION = 0 -CXXFLAGS += -msse -msse2 -Wall -I.. \ - -pthread \ +CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Winit-self \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ + -std=c++0x $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC + CXXFLAGS += -fPIC endif LDFLAGS = -g -rdynamic diff --git a/src/makefiles/darwin_10_8.mk b/src/makefiles/darwin_10_8.mk index c89aea0f44f..54203882c5f 100644 --- a/src/makefiles/darwin_10_8.mk +++ b/src/makefiles/darwin_10_8.mk @@ -1,23 +1,22 @@ # makefiles/darwin_10_8.mk contains Darwin-specific rules for OS X 10.8.* ifndef FSTROOT -$(error FSTROOT not defined.) + $(error FSTROOT not defined.) endif DOUBLE_PRECISION = 0 -CXXFLAGS += -msse -msse2 -Wall -I.. \ - -pthread \ +CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Winit-self \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ + -std=c++0x $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC + CXXFLAGS += -fPIC endif LDFLAGS = -g -rdynamic diff --git a/src/makefiles/darwin_10_9.mk b/src/makefiles/darwin_10_9.mk index ede1712e155..c0d2adfd97f 100644 --- a/src/makefiles/darwin_10_9.mk +++ b/src/makefiles/darwin_10_9.mk @@ -5,14 +5,13 @@ ifndef FSTROOT endif DOUBLE_PRECISION = 0 -CXXFLAGS += -msse -msse2 -Wall -I.. \ - -pthread \ +CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Winit-self \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ + -std=c++0x $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID @@ -20,13 +19,6 @@ ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif -LDFLAGS = -g -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++ -CC = $(CXX) -RANLIB = ranlib -AR = ar - # Add no-mismatched-tags flag to suppress the annoying clang warnings # that are perfectly valid per spec. COMPILER = $(shell $(CXX) -v 2>&1 ) @@ -39,3 +31,10 @@ endif ifeq ($(findstring GCC,$(COMPILER)),GCC) CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs endif + +LDFLAGS = -g +LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate +CXX = g++ +CC = $(CXX) +RANLIB = ranlib +AR = ar diff --git a/src/makefiles/linux_atlas.mk b/src/makefiles/linux_atlas.mk index a0b757ed39a..9cf05d18b8d 100644 --- a/src/makefiles/linux_atlas.mk +++ b/src/makefiles/linux_atlas.mk @@ -21,8 +21,8 @@ CXXFLAGS = -msse -msse2 -Wall -I.. \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_ATLAS -I$(ATLASINC) \ -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID + -std=c++0x $(EXTRA_CXXFLAGS) \ + -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC diff --git a/src/makefiles/linux_atlas_arm.mk b/src/makefiles/linux_atlas_arm.mk index 07d9e9f3385..07d3b7f5278 100644 --- a/src/makefiles/linux_atlas_arm.mk +++ b/src/makefiles/linux_atlas_arm.mk @@ -21,8 +21,8 @@ CXXFLAGS = -ftree-vectorize -mfloat-abi=hard -mfpu=neon -Wall -I.. \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_ATLAS -I$(ATLASINC) \ -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID + -std=c++0x $(EXTRA_CXXFLAGS) \ + -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC diff --git a/src/makefiles/linux_clapack.mk b/src/makefiles/linux_clapack.mk index 83ec0ddce82..de2f1b85aa2 100644 --- a/src/makefiles/linux_clapack.mk +++ b/src/makefiles/linux_clapack.mk @@ -7,8 +7,8 @@ CXXFLAGS = -msse -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_CLAPACK -I ../../tools/CLAPACK \ -I ../../tools/openfst/include \ - $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID + -std=c++0x $(EXTRA_CXXFLAGS) \ + -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC diff --git a/src/makefiles/linux_clapack_arm.mk b/src/makefiles/linux_clapack_arm.mk index 94e6ee25bf1..6c20c8734c9 100644 --- a/src/makefiles/linux_clapack_arm.mk +++ b/src/makefiles/linux_clapack_arm.mk @@ -7,8 +7,8 @@ CXXFLAGS = -ftree-vectorize -mfloat-abi=hard -mfpu=neon -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_CLAPACK -I ../../tools/CLAPACK \ -I ../../tools/openfst/include \ - $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID + -std=c++0x $(EXTRA_CXXFLAGS) \ + -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC diff --git a/src/makefiles/linux_openblas.mk b/src/makefiles/linux_openblas.mk index 7a4e2687664..307945222a7 100644 --- a/src/makefiles/linux_openblas.mk +++ b/src/makefiles/linux_openblas.mk @@ -14,22 +14,21 @@ endif DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. \ - -pthread \ +CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include \ -I $(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID + -std=c++0x $(EXTRA_CXXFLAGS) \ + -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif LDFLAGS = -rdynamic $(OPENFSTLDFLAGS) -LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl CC = g++ CXX = g++ AR = ar diff --git a/src/makefiles/linux_openblas_arm.mk b/src/makefiles/linux_openblas_arm.mk index e4c18e6b4d4..ec9dbd544f9 100644 --- a/src/makefiles/linux_openblas_arm.mk +++ b/src/makefiles/linux_openblas_arm.mk @@ -14,22 +14,21 @@ endif DOUBLE_PRECISION = 0 -CXXFLAGS = -ftree-vectorize -mfloat-abi=hard -mfpu=neon -Wall -I.. \ - -pthread \ +CXXFLAGS = -ftree-vectorize -mfloat-abi=hard -mfpu=neon -Wall -I.. -pthread \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include \ -I $(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID + -std=c++0x $(EXTRA_CXXFLAGS) \ + -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif LDFLAGS = -rdynamic $(OPENFSTLDFLAGS) -LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl CC = g++ CXX = g++ AR = ar diff --git a/src/makefiles/linux_x86_64_mkl.mk b/src/makefiles/linux_x86_64_mkl.mk index 7186f4bbb88..20ac2fac5df 100644 --- a/src/makefiles/linux_x86_64_mkl.mk +++ b/src/makefiles/linux_x86_64_mkl.mk @@ -1,9 +1,9 @@ # You have to make sure MKLROOT and (optionally) MKLLIB is set -# We have tested Kaldi with MKL version 10.2 on Linux/GCC and Intel(R) 64 +# We have tested Kaldi with MKL version 10.2 on Linux/GCC and Intel(R) 64 # architecture (also referred to as x86_64) with LP64 interface layer. -# The linking flags for MKL will be very different depending on the OS, +# The linking flags for MKL will be very different depending on the OS, # architecture, compiler, etc. used. The correct flags can be obtained from # http://software.intel.com/en-us/articles/intel-mkl-link-line-advisor/ # Use the options obtained from this website to manually configure for other @@ -26,7 +26,7 @@ CXXFLAGS = -m64 -msse -msse2 -pthread -Wall -I.. \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_MKL -I$(MKLROOT)/include \ -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ + -std=c++0x $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) From 153c61496f61af67ecdc8a8b6191d35e2755b804 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Wed, 7 Dec 2016 00:07:20 -0800 Subject: [PATCH 262/530] Add check for compiler with C++11 support --- src/fstext/table-matcher.h | 2 +- src/makefiles/cygwin.mk | 2 +- src/makefiles/darwin_10_10.mk | 2 +- src/makefiles/darwin_10_11.mk | 2 +- src/makefiles/darwin_10_12.mk | 2 +- src/makefiles/darwin_10_5.mk | 2 +- src/makefiles/darwin_10_6.mk | 2 +- src/makefiles/darwin_10_7.mk | 2 +- src/makefiles/darwin_10_8.mk | 2 +- src/makefiles/darwin_10_9.mk | 2 +- src/makefiles/linux_atlas.mk | 2 +- src/makefiles/linux_atlas_arm.mk | 2 +- src/makefiles/linux_clapack.mk | 2 +- src/makefiles/linux_clapack_arm.mk | 2 +- src/makefiles/linux_openblas.mk | 2 +- src/makefiles/linux_openblas_arm.mk | 2 +- src/makefiles/linux_x86_64_mkl.mk | 2 +- tools/Makefile | 9 +++++---- tools/extras/check_dependencies.sh | 11 ++++++++++- 19 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/fstext/table-matcher.h b/src/fstext/table-matcher.h index 792fe98fe83..3e704879fb9 100644 --- a/src/fstext/table-matcher.h +++ b/src/fstext/table-matcher.h @@ -259,7 +259,7 @@ class TableMatcher : public MatcherBase { bool safe = false) : impl_(matcher.impl_) { if (safe == true) { - KALDI_ERR << "TableMatcher: Safe copy not supported"; + LOG(FATAL) << "TableMatcher: Safe copy not supported"; } } diff --git a/src/makefiles/cygwin.mk b/src/makefiles/cygwin.mk index e8f926ab986..c6871e6802d 100644 --- a/src/makefiles/cygwin.mk +++ b/src/makefiles/cygwin.mk @@ -10,7 +10,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ -I ../../tools/CLAPACK/ \ -I $(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) diff --git a/src/makefiles/darwin_10_10.mk b/src/makefiles/darwin_10_10.mk index 498180c6f99..c6d75dc69ae 100644 --- a/src/makefiles/darwin_10_10.mk +++ b/src/makefiles/darwin_10_10.mk @@ -11,7 +11,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID diff --git a/src/makefiles/darwin_10_11.mk b/src/makefiles/darwin_10_11.mk index a2bd5ad028a..b0eba615a49 100644 --- a/src/makefiles/darwin_10_11.mk +++ b/src/makefiles/darwin_10_11.mk @@ -11,7 +11,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) -Wno-unused-local-typedef \ + -std=c++11 $(EXTRA_CXXFLAGS) -Wno-unused-local-typedef \ -g # -O0 -DKALDI_PARANOID diff --git a/src/makefiles/darwin_10_12.mk b/src/makefiles/darwin_10_12.mk index 946788a3db0..8721a33b304 100644 --- a/src/makefiles/darwin_10_12.mk +++ b/src/makefiles/darwin_10_12.mk @@ -11,7 +11,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) -Wno-unused-local-typedef \ + -std=c++11 $(EXTRA_CXXFLAGS) -Wno-unused-local-typedef \ -g # -O0 -DKALDI_PARANOID diff --git a/src/makefiles/darwin_10_5.mk b/src/makefiles/darwin_10_5.mk index 6f3e6605226..ae9f59a6f86 100644 --- a/src/makefiles/darwin_10_5.mk +++ b/src/makefiles/darwin_10_5.mk @@ -11,7 +11,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -gdwarf-2 # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) diff --git a/src/makefiles/darwin_10_6.mk b/src/makefiles/darwin_10_6.mk index 10398326126..880fff9973a 100644 --- a/src/makefiles/darwin_10_6.mk +++ b/src/makefiles/darwin_10_6.mk @@ -11,7 +11,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) diff --git a/src/makefiles/darwin_10_7.mk b/src/makefiles/darwin_10_7.mk index fd491a91968..6cdb7181f96 100644 --- a/src/makefiles/darwin_10_7.mk +++ b/src/makefiles/darwin_10_7.mk @@ -11,7 +11,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID diff --git a/src/makefiles/darwin_10_8.mk b/src/makefiles/darwin_10_8.mk index 54203882c5f..8aa305c5c94 100644 --- a/src/makefiles/darwin_10_8.mk +++ b/src/makefiles/darwin_10_8.mk @@ -11,7 +11,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID diff --git a/src/makefiles/darwin_10_9.mk b/src/makefiles/darwin_10_9.mk index c0d2adfd97f..ee3090f1036 100644 --- a/src/makefiles/darwin_10_9.mk +++ b/src/makefiles/darwin_10_9.mk @@ -11,7 +11,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ -DHAVE_CLAPACK \ -I$(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID diff --git a/src/makefiles/linux_atlas.mk b/src/makefiles/linux_atlas.mk index 9cf05d18b8d..d985344f479 100644 --- a/src/makefiles/linux_atlas.mk +++ b/src/makefiles/linux_atlas.mk @@ -21,7 +21,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_ATLAS -I$(ATLASINC) \ -I$(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) diff --git a/src/makefiles/linux_atlas_arm.mk b/src/makefiles/linux_atlas_arm.mk index 07d3b7f5278..3359ea5e626 100644 --- a/src/makefiles/linux_atlas_arm.mk +++ b/src/makefiles/linux_atlas_arm.mk @@ -21,7 +21,7 @@ CXXFLAGS = -ftree-vectorize -mfloat-abi=hard -mfpu=neon -Wall -I.. \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_ATLAS -I$(ATLASINC) \ -I$(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) diff --git a/src/makefiles/linux_clapack.mk b/src/makefiles/linux_clapack.mk index de2f1b85aa2..d9cd6163ceb 100644 --- a/src/makefiles/linux_clapack.mk +++ b/src/makefiles/linux_clapack.mk @@ -7,7 +7,7 @@ CXXFLAGS = -msse -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_CLAPACK -I ../../tools/CLAPACK \ -I ../../tools/openfst/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) diff --git a/src/makefiles/linux_clapack_arm.mk b/src/makefiles/linux_clapack_arm.mk index 6c20c8734c9..f155248862c 100644 --- a/src/makefiles/linux_clapack_arm.mk +++ b/src/makefiles/linux_clapack_arm.mk @@ -7,7 +7,7 @@ CXXFLAGS = -ftree-vectorize -mfloat-abi=hard -mfpu=neon -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_CLAPACK -I ../../tools/CLAPACK \ -I ../../tools/openfst/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) diff --git a/src/makefiles/linux_openblas.mk b/src/makefiles/linux_openblas.mk index 307945222a7..2d09bc2bcfc 100644 --- a/src/makefiles/linux_openblas.mk +++ b/src/makefiles/linux_openblas.mk @@ -20,7 +20,7 @@ CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include \ -I $(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) diff --git a/src/makefiles/linux_openblas_arm.mk b/src/makefiles/linux_openblas_arm.mk index ec9dbd544f9..3a72d96308f 100644 --- a/src/makefiles/linux_openblas_arm.mk +++ b/src/makefiles/linux_openblas_arm.mk @@ -20,7 +20,7 @@ CXXFLAGS = -ftree-vectorize -mfloat-abi=hard -mfpu=neon -Wall -I.. -pthread \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include \ -I $(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) diff --git a/src/makefiles/linux_x86_64_mkl.mk b/src/makefiles/linux_x86_64_mkl.mk index 20ac2fac5df..7e9c13e6ac0 100644 --- a/src/makefiles/linux_x86_64_mkl.mk +++ b/src/makefiles/linux_x86_64_mkl.mk @@ -26,7 +26,7 @@ CXXFLAGS = -m64 -msse -msse2 -pthread -Wall -I.. \ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ -DHAVE_MKL -I$(MKLROOT)/include \ -I$(FSTROOT)/include \ - -std=c++0x $(EXTRA_CXXFLAGS) \ + -std=c++11 $(EXTRA_CXXFLAGS) \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) diff --git a/tools/Makefile b/tools/Makefile index 787a69e90f5..eb62da22c4e 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,11 +1,12 @@ # SHELL += -x CXX = g++ -# CXX = clang++ # Uncomment this line to build with Clang. -CC = gcc # used for sph2pipe +CC = gcc # used for sph2pipe +# CXX = clang++ # Uncomment these lines +# CC = clang # to build with Clang. -# Note: OpenFst >= 1.5.3 requires C++11 support, hence you will need to use a -# relatively recent C++ compiler, e.g. gcc >= 4.6, clang >= 3.0. +# Note: OpenFst >= 1.5.3 and Kaldi require a relatively recent C++ compiler +# with C++11 support, e.g. g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. OPENFST_VERSION = 1.5.4 OPENFST_VER_NUM := $(shell echo $(OPENFST_VERSION) | sed 's/\./ /g' | xargs printf "%d%02d%02d") diff --git a/tools/extras/check_dependencies.sh b/tools/extras/check_dependencies.sh index f45402e810e..c1b4912c8d9 100755 --- a/tools/extras/check_dependencies.sh +++ b/tools/extras/check_dependencies.sh @@ -21,7 +21,16 @@ fi if ! which g++ >&/dev/null; then echo "$0: g++ is not installed." - add_packages gcc-c++ g++ gcc-c++ + echo " You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + # add_packages gcc-c++ g++ gcc-c++ +elif [[ $(g++ -v 2>&1) == *"GCC"* ]]; then + GCC_VER=$(g++ -dumpversion) + GCC_VER_NUM=$(echo $GCC_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d") + if [ $GCC_VER_NUM -lt 40700 ]; then + echo "$0: System default g++ ($GCC_VER) does not support C++11." + echo " You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + # add_packages gcc-c++ g++ gcc-c++ + fi fi if ! echo "#include " | gcc -E - >&/dev/null; then From ef79b1bd1c3957056744593d30185ddfd0fcd022 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Wed, 7 Dec 2016 00:38:31 -0800 Subject: [PATCH 263/530] Update installation instructions. --- src/INSTALL | 29 ++++++++++++++++++++--------- tools/INSTALL | 10 +++++++--- tools/Makefile | 6 +++--- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/INSTALL b/src/INSTALL index 3f7a01928ba..8decefe71c2 100644 --- a/src/INSTALL +++ b/src/INSTALL @@ -6,14 +6,25 @@ compilation, see ../windows/INSTALL. You must first have completed the installation steps in ../tools/INSTALL (compiling OpenFst; getting ATLAS and CLAPACK headers). -The installation instructions are: -./configure --shared -make depend -make - -Note that "make" takes a long time; you can speed it up by running make -in parallel if you have multiple CPUs, for instance - make depend -j 8 - make -j 8 +The installation instructions are + + ./configure --shared + make depend + make + +Note that "make" takes a long time. You can speed it up by running make +in parallel if you have multiple CPUs, e.g. to use 8 CPUs + + make depend -j 8 + make -j 8 + +Kaldi requires a relatively recent C++ compiler with C++11 support, +e.g. g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. If your system +default compiler does not support C++11, you can specify a C++11 compliant +compiler by setting the CXX environment variable, e.g. + + make depend CXX=g++-4.8 + make CXX=g++-4.8 + For more information, see documentation at http://kaldi-asr.org/doc/ and click on "The build process (how Kaldi is compiled)". diff --git a/tools/INSTALL b/tools/INSTALL index 0678e2c8815..7e5549294c8 100644 --- a/tools/INSTALL +++ b/tools/INSTALL @@ -14,12 +14,16 @@ Then run make If you have multiple CPUs and want to speed things up, you can do a parallel -build by supplying the "-j" option to make, e.g. to use 4 CPUs: +build by supplying the "-j" option to make, e.g. to use 4 CPUs make -j 4 -Kaldi builds against OpenFst >= 1.5.3 which requires a relatively new compiler -with C++11 support, e.g. gcc >= 4.6, clang >= 3.0. +OpenFst requires a relatively recent C++ compiler with C++11 support, +e.g. g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. If your system +default compiler does not support C++11, you can specify a C++11 compliant +compiler by setting the CXX environment variable, e.g. + + make CXX=g++-4.8 In extras/, there are also various scripts to install extra bits and pieces that are used by individual example scripts. If an example script needs you to run diff --git a/tools/Makefile b/tools/Makefile index eb62da22c4e..f6fe7a45db8 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -5,9 +5,9 @@ CC = gcc # used for sph2pipe # CXX = clang++ # Uncomment these lines # CC = clang # to build with Clang. -# Note: OpenFst >= 1.5.3 and Kaldi require a relatively recent C++ compiler -# with C++11 support, e.g. g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. -OPENFST_VERSION = 1.5.4 +# Note: OpenFst requires a relatively recent C++ compiler with C++11 support, +# e.g. g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. +OPENFST_VERSION = 1.5.4 # Supported versions: >= 1.5.3 OPENFST_VER_NUM := $(shell echo $(OPENFST_VERSION) | sed 's/\./ /g' | xargs printf "%d%02d%02d") ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10503)","1") From 0f378d87c0c38a614c87b54584835b6a1b89a059 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Wed, 7 Dec 2016 00:54:54 -0800 Subject: [PATCH 264/530] Remove a comment in tools/Makefile to resolve the build problem. --- tools/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/Makefile b/tools/Makefile index f6fe7a45db8..772f8c18398 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -7,7 +7,7 @@ CC = gcc # used for sph2pipe # Note: OpenFst requires a relatively recent C++ compiler with C++11 support, # e.g. g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. -OPENFST_VERSION = 1.5.4 # Supported versions: >= 1.5.3 +OPENFST_VERSION = 1.5.4 OPENFST_VER_NUM := $(shell echo $(OPENFST_VERSION) | sed 's/\./ /g' | xargs printf "%d%02d%02d") ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10503)","1") From c15d5c7a0a0b29694ba8d68eeece46911f2c4c2e Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Wed, 7 Dec 2016 22:18:29 -0800 Subject: [PATCH 265/530] Add C++11 compliant compiler check and update installation instructions. --- tools/INSTALL | 30 ++++++++++--------- tools/extras/check_dependencies.sh | 48 ++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/tools/INSTALL b/tools/INSTALL index 7e5549294c8..1ca33f9c515 100644 --- a/tools/INSTALL +++ b/tools/INSTALL @@ -1,30 +1,32 @@ - -To install the most important prerequisites for Kaldi: - - first do +To check the prerequisites for Kaldi, first run extras/check_dependencies.sh -to see if there are any system-level installations or modifications you need to do. -Check the output carefully: there are some things that will make your life a lot -easier if you fix them at this stage. +and see if there are any system-level installations you need to do. Check the +output carefully. There are some things that will make your life a lot easier +if you fix them at this stage. If your system default C++ compiler is not +supported, you can do the check with another compiler by setting the CXX +environment variable, e.g. + + CXX=g++-4.8 extras/check_dependencies.sh Then run make +which by default will install ATLAS headers, OpenFst, SCTK and sph2pipe. +OpenFst requires a relatively recent C++ compiler with C++11 support, e.g. +g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. If your system default +compiler does not have adequate support for C++11, you can specify a C++11 +compliant compiler as a command argument, e.g. + + make CXX=g++-4.8 + If you have multiple CPUs and want to speed things up, you can do a parallel build by supplying the "-j" option to make, e.g. to use 4 CPUs make -j 4 -OpenFst requires a relatively recent C++ compiler with C++11 support, -e.g. g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. If your system -default compiler does not support C++11, you can specify a C++11 compliant -compiler by setting the CXX environment variable, e.g. - - make CXX=g++-4.8 - In extras/, there are also various scripts to install extra bits and pieces that are used by individual example scripts. If an example script needs you to run one of those scripts, it will tell you what to do. diff --git a/tools/extras/check_dependencies.sh b/tools/extras/check_dependencies.sh index c1b4912c8d9..3c26fd53e82 100755 --- a/tools/extras/check_dependencies.sh +++ b/tools/extras/check_dependencies.sh @@ -1,4 +1,7 @@ -#!/bin/bash +#!/usr/bin/env bash + +CXX=${CXX:-g++} +status=0 # at some point we could try to add packages for Cywgin or macports(?) to this # script. @@ -19,17 +22,36 @@ if ! which which >&/dev/null; then add_packages which debianutils which fi -if ! which g++ >&/dev/null; then - echo "$0: g++ is not installed." - echo " You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." - # add_packages gcc-c++ g++ gcc-c++ -elif [[ $(g++ -v 2>&1) == *"GCC"* ]]; then - GCC_VER=$(g++ -dumpversion) - GCC_VER_NUM=$(echo $GCC_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d") - if [ $GCC_VER_NUM -lt 40700 ]; then - echo "$0: System default g++ ($GCC_VER) does not support C++11." - echo " You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." - # add_packages gcc-c++ g++ gcc-c++ +if ! which $CXX >&/dev/null; then + echo "$0: $CXX is not installed." + echo "$0: You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + status=1 +else + COMPILER_VER_INFO=$($CXX --version 2>/dev/null) + if [[ $COMPILER_VER_INFO == *"g++"* ]]; then + GCC_VER=$($CXX -dumpversion) + GCC_VER_NUM=$(echo $GCC_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d") + if [ $GCC_VER_NUM -lt 40700 ]; then + echo "$0: $CXX (g++-$GCC_VER) is not supported." + echo "$0: You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + status=1 + fi + elif [[ $COMPILER_VER_INFO == *"Apple"* ]]; then + CLANG_VER=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*version \([0-9\.]*\).*/\1/") + CLANG_VER_NUM=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*clang-\([0-9]*\).*/\1/") + if [ $CLANG_VER_NUM -lt 500 ]; then + echo "$0: $CXX (Apple clang-$CLANG_VER) is not supported." + echo "$0: You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + status=1 + fi + elif [[ $COMPILER_VER_INFO == *"LLVM"* ]]; then + CLANG_VER=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*version \([0-9\.]*\).*/\1/") + CLANG_VER_NUM=$(echo $CLANG_VER | sed 's/\./ /g' | xargs printf "%d%02d") + if [ $CLANG_VER_NUM -lt 303 ]; then + echo "$0: $CXX (LLVM clang-$CLANG_VER) is not supported." + echo "$0: You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + status=1 + fi fi fi @@ -141,7 +163,7 @@ fi if [ ! -z "$debian_packages" ]; then # If the list of packages to be installed is nonempty, # we'll exit with error status. Check this outside of - # hecking for yum or apt-get, as we want it to exit with + # checking for yum or apt-get, as we want it to exit with # error even if we're not on Debian or red hat. status=1 fi From 7acc3a6bd84777cb125ead48133b381566ec28ed Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 15 Dec 2016 15:56:01 -0800 Subject: [PATCH 266/530] Refactor makefiles/*.mk --- src/makefiles/common.mk | 35 ++++++++++++++----- src/makefiles/cuda_32bit.mk | 5 ++- src/makefiles/cuda_64bit.mk | 5 +-- src/makefiles/cygwin.mk | 28 +++------------ src/makefiles/darwin.mk | 17 +++++++++ src/makefiles/darwin_10_10.mk | 40 --------------------- src/makefiles/darwin_10_11.mk | 40 --------------------- src/makefiles/darwin_10_12.mk | 40 --------------------- src/makefiles/darwin_10_5.mk | 26 -------------- src/makefiles/darwin_10_6.mk | 26 -------------- src/makefiles/darwin_10_7.mk | 27 --------------- src/makefiles/darwin_10_8.mk | 27 --------------- src/makefiles/darwin_10_9.mk | 40 --------------------- src/makefiles/default_rules.mk | 54 ++++++++++++++++------------- src/makefiles/linux_atlas.mk | 32 +++-------------- src/makefiles/linux_atlas_arm.mk | 32 +++-------------- src/makefiles/linux_clapack.mk | 27 ++++----------- src/makefiles/linux_clapack_arm.mk | 27 ++++----------- src/makefiles/linux_openblas.mk | 32 ++++------------- src/makefiles/linux_openblas_arm.mk | 32 ++++------------- src/makefiles/linux_x86_64_mkl.mk | 30 +++------------- 21 files changed, 122 insertions(+), 500 deletions(-) create mode 100644 src/makefiles/darwin.mk delete mode 100644 src/makefiles/darwin_10_10.mk delete mode 100644 src/makefiles/darwin_10_11.mk delete mode 100644 src/makefiles/darwin_10_12.mk delete mode 100644 src/makefiles/darwin_10_5.mk delete mode 100644 src/makefiles/darwin_10_6.mk delete mode 100644 src/makefiles/darwin_10_7.mk delete mode 100644 src/makefiles/darwin_10_8.mk delete mode 100644 src/makefiles/darwin_10_9.mk diff --git a/src/makefiles/common.mk b/src/makefiles/common.mk index 3a464ea99a1..93f6d98c471 100644 --- a/src/makefiles/common.mk +++ b/src/makefiles/common.mk @@ -1,13 +1,30 @@ -# Rules that enable valgrind debugging ("make valgrind") +# Platform independent settings -valgrind: .valgrind +ifndef FSTROOT +$(error FSTROOT not defined.) +endif -.valgrind: - echo -n > valgrind.out - for x in $(TESTFILES); do echo $$x>>valgrind.out; valgrind ./$$x >/dev/null 2>> valgrind.out; done - ! ( grep 'ERROR SUMMARY' valgrind.out | grep -v '0 errors' ) - ! ( grep 'definitely lost' valgrind.out | grep -v -w 0 ) - rm valgrind.out - touch .valgrind +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif +CXXFLAGS = -std=c++11 -I.. -I$(FSTROOT)/include \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + $(EXTRA_CXXFLAGS) \ + -g # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC +endif + +LDFLAGS = $(OPENFSTLDFLAGS) $(EXTRA_LDFLAGS) +LDLIBS = $(OPENFSTLIBS) -lm -lpthread -ldl $(EXTRA_LDLIBS) + +RANLIB = ranlib +AR = ar +AS = as diff --git a/src/makefiles/cuda_32bit.mk b/src/makefiles/cuda_32bit.mk index 38d810acaa8..4c72451fed8 100644 --- a/src/makefiles/cuda_32bit.mk +++ b/src/makefiles/cuda_32bit.mk @@ -1,8 +1,11 @@ +ifndef CUDATKDIR +$(error CUDATKDIR not defined.) +endif + ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) endif - CUDA_INCLUDE= -I$(CUDATKDIR)/include CUDA_FLAGS = -g -Xcompiler -fPIC --verbose --machine 32 -DHAVE_CUDA \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) diff --git a/src/makefiles/cuda_64bit.mk b/src/makefiles/cuda_64bit.mk index fc11c034d78..691fda6135b 100644 --- a/src/makefiles/cuda_64bit.mk +++ b/src/makefiles/cuda_64bit.mk @@ -1,13 +1,14 @@ +ifndef CUDATKDIR +$(error CUDATKDIR not defined.) +endif ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) endif - CUDA_INCLUDE= -I$(CUDATKDIR)/include CUDA_FLAGS = -g -Xcompiler -fPIC --verbose --machine 64 -DHAVE_CUDA \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) CXXFLAGS += -DHAVE_CUDA -I$(CUDATKDIR)/include CUDA_LDFLAGS += -L$(CUDATKDIR)/lib64 -Wl,-rpath,$(CUDATKDIR)/lib64 CUDA_LDLIBS += -lcublas -lcudart -lcurand #LDLIBS : The libs are loaded later than static libs in implicit rule - diff --git a/src/makefiles/cygwin.mk b/src/makefiles/cygwin.mk index c6871e6802d..48f07e901cf 100644 --- a/src/makefiles/cygwin.mk +++ b/src/makefiles/cygwin.mk @@ -1,26 +1,6 @@ -# makefiles/kaldi.mk.cygwin contains Cygwin-specific rules +# Cygwin settings -ifndef FSTROOT -$(error FSTROOT not defined.) -endif +CXXFLAGS += -msse -msse2 -DHAVE_CLAPACK -I ../../tools/CLAPACK/ -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -DHAVE_CLAPACK -I ../../tools/CLAPACK/ \ - -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ - -I ../../tools/CLAPACK/ \ - -I $(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - -ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC -endif - -LDFLAGS = -g --enable-auto-import -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -L/usr/lib/lapack \ - --enable-auto-import -lcyglapack-0 -lcygblas-0 -lm -lpthread -CXX = g++ -CC = g++ -RANLIB = ranlib -AR = ar +LDFLAGS += -g --enable-auto-import -L/usr/lib/lapack +LDLIBS += -lcyglapack-0 -lcygblas-0 diff --git a/src/makefiles/darwin.mk b/src/makefiles/darwin.mk new file mode 100644 index 00000000000..62bc30c6136 --- /dev/null +++ b/src/makefiles/darwin.mk @@ -0,0 +1,17 @@ +# Darwin (macOS) settings + +CXXFLAGS += -msse -msse2 -pthread \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK + +# Compiler specific flags +COMPILER = $(shell $(CXX) -v 2>&1) +ifeq ($(findstring clang,$(COMPILER)),clang) +# Suppress annoying clang warnings that are perfectly valid per spec. +CXXFLAGS += -Wno-mismatched-tags +else ifeq ($(findstring GCC,$(COMPILER)),GCC) +# Allow implicit conversions between vectors. +CXXFLAGS += -flax-vector-conversions +endif + +LDFLAGS += -g +LDLIBS += -framework Accelerate diff --git a/src/makefiles/darwin_10_10.mk b/src/makefiles/darwin_10_10.mk deleted file mode 100644 index c6d75dc69ae..00000000000 --- a/src/makefiles/darwin_10_10.mk +++ /dev/null @@ -1,40 +0,0 @@ -# makefiles/darwin_10_10.mk contains Darwin-specific rules for OS X 10.10.* - -ifndef FSTROOT - $(error FSTROOT not defined.) -endif - -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Winit-self \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ - -DHAVE_CLAPACK \ - -I$(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - - -ifeq ($(KALDI_FLAVOR), dynamic) - CXXFLAGS += -fPIC -endif - -# Add no-mismatched-tags flag to suppress the annoying clang warnings -# that are perfectly valid per spec. -COMPILER = $(shell $(CXX) -v 2>&1 ) -ifeq ($(findstring clang,$(COMPILER)),clang) - CXXFLAGS += -Wno-mismatched-tags -endif - -# We need to tell recent versions of g++ to allow vector conversions without -# an explicit cast provided the vectors are of the same size. -ifeq ($(findstring GCC,$(COMPILER)),GCC) - CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs -endif - -LDFLAGS = -g -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++ -CC = $(CXX) -RANLIB = ranlib -AR = ar diff --git a/src/makefiles/darwin_10_11.mk b/src/makefiles/darwin_10_11.mk deleted file mode 100644 index b0eba615a49..00000000000 --- a/src/makefiles/darwin_10_11.mk +++ /dev/null @@ -1,40 +0,0 @@ -# makefiles/darwin_10_11.mk contains Darwin-specific rules for OS X 10.11.* - -ifndef FSTROOT - $(error FSTROOT not defined.) -endif - -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Winit-self \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ - -DHAVE_CLAPACK \ - -I$(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) -Wno-unused-local-typedef \ - -g # -O0 -DKALDI_PARANOID - - -ifeq ($(KALDI_FLAVOR), dynamic) - CXXFLAGS += -fPIC -endif - -# Add no-mismatched-tags flag to suppress the annoying clang warnings -# that are perfectly valid per spec. -COMPILER = $(shell $(CXX) -v 2>&1 ) -ifeq ($(findstring clang,$(COMPILER)),clang) - CXXFLAGS += -Wno-mismatched-tags -endif - -# We need to tell recent versions of g++ to allow vector conversions without -# an explicit cast provided the vectors are of the same size. -ifeq ($(findstring GCC,$(COMPILER)),GCC) - CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs -endif - -LDFLAGS = -g -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++ -CC = $(CXX) -RANLIB = ranlib -AR = ar diff --git a/src/makefiles/darwin_10_12.mk b/src/makefiles/darwin_10_12.mk deleted file mode 100644 index 8721a33b304..00000000000 --- a/src/makefiles/darwin_10_12.mk +++ /dev/null @@ -1,40 +0,0 @@ -# makefiles/darwin_10_12.mk contains Darwin-specific rules for OS X 10.12.* - -ifndef FSTROOT - $(error FSTROOT not defined.) -endif - -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Winit-self \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ - -DHAVE_CLAPACK \ - -I$(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) -Wno-unused-local-typedef \ - -g # -O0 -DKALDI_PARANOID - - -ifeq ($(KALDI_FLAVOR), dynamic) - CXXFLAGS += -fPIC -endif - -# Add no-mismatched-tags flag to suppress the annoying clang warnings -# that are perfectly valid per spec. -COMPILER = $(shell $(CXX) -v 2>&1 ) -ifeq ($(findstring clang,$(COMPILER)),clang) - CXXFLAGS += -Wno-mismatched-tags -endif - -# We need to tell recent versions of g++ to allow vector conversions without -# an explicit cast provided the vectors are of the same size. -ifeq ($(findstring GCC,$(COMPILER)),GCC) - CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs -endif - -LDFLAGS = -g -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++ -CC = $(CXX) -RANLIB = ranlib -AR = ar diff --git a/src/makefiles/darwin_10_5.mk b/src/makefiles/darwin_10_5.mk deleted file mode 100644 index ae9f59a6f86..00000000000 --- a/src/makefiles/darwin_10_5.mk +++ /dev/null @@ -1,26 +0,0 @@ -# makefiles/darwin_10_5.mk contains Darwin-specific rules for OS X 10.5.* - -ifndef FSTROOT - $(error FSTROOT not defined.) -endif - -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Winit-self \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ - -DHAVE_CLAPACK \ - -I$(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -gdwarf-2 # -O0 -DKALDI_PARANOID - -ifeq ($(KALDI_FLAVOR), dynamic) - CXXFLAGS += -fPIC -endif - -LDFLAGS = -gdwarf-2 -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++-4 -CC = g++-4 -RANLIB = ranlib -AR = ar diff --git a/src/makefiles/darwin_10_6.mk b/src/makefiles/darwin_10_6.mk deleted file mode 100644 index 880fff9973a..00000000000 --- a/src/makefiles/darwin_10_6.mk +++ /dev/null @@ -1,26 +0,0 @@ -# makefiles/darwin_10_6.mk contains Darwin-specific rules for OS X 10.6.* - -ifndef FSTROOT - $(error FSTROOT not defined.) -endif - -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Winit-self \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \ - -DHAVE_CLAPACK \ - -I$(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - -ifeq ($(KALDI_FLAVOR), dynamic) - CXXFLAGS += -fPIC -endif - -LDFLAGS = -g -rdynamic -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++ -CC = g++ -RANLIB = ranlib -AR = ar diff --git a/src/makefiles/darwin_10_7.mk b/src/makefiles/darwin_10_7.mk deleted file mode 100644 index 6cdb7181f96..00000000000 --- a/src/makefiles/darwin_10_7.mk +++ /dev/null @@ -1,27 +0,0 @@ -# makefiles/darwin_10_6.mk contains Darwin-specific rules for OS X 10.7.* - -ifndef FSTROOT - $(error FSTROOT not defined.) -endif - -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Winit-self \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \ - -DHAVE_CLAPACK \ - -I$(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - - -ifeq ($(KALDI_FLAVOR), dynamic) - CXXFLAGS += -fPIC -endif - -LDFLAGS = -g -rdynamic -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++ -CC = g++ -RANLIB = ranlib -AR = ar diff --git a/src/makefiles/darwin_10_8.mk b/src/makefiles/darwin_10_8.mk deleted file mode 100644 index 8aa305c5c94..00000000000 --- a/src/makefiles/darwin_10_8.mk +++ /dev/null @@ -1,27 +0,0 @@ -# makefiles/darwin_10_8.mk contains Darwin-specific rules for OS X 10.8.* - -ifndef FSTROOT - $(error FSTROOT not defined.) -endif - -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Winit-self \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -rdynamic \ - -DHAVE_CLAPACK \ - -I$(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - - -ifeq ($(KALDI_FLAVOR), dynamic) - CXXFLAGS += -fPIC -endif - -LDFLAGS = -g -rdynamic -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++ -CC = g++ -RANLIB = ranlib -AR = ar diff --git a/src/makefiles/darwin_10_9.mk b/src/makefiles/darwin_10_9.mk deleted file mode 100644 index ee3090f1036..00000000000 --- a/src/makefiles/darwin_10_9.mk +++ /dev/null @@ -1,40 +0,0 @@ -# makefiles/darwin_10_9.mk contains Darwin-specific rules for OS X 10.9.* - -ifndef FSTROOT - $(error FSTROOT not defined.) -endif - -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Winit-self \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ - -DHAVE_CLAPACK \ - -I$(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - - -ifeq ($(KALDI_FLAVOR), dynamic) - CXXFLAGS += -fPIC -endif - -# Add no-mismatched-tags flag to suppress the annoying clang warnings -# that are perfectly valid per spec. -COMPILER = $(shell $(CXX) -v 2>&1 ) -ifeq ($(findstring clang,$(COMPILER)),clang) - CXXFLAGS += -Wno-mismatched-tags -endif - -# We need to tell recent versions of g++ to allow vector conversions without -# an explicit cast provided the vectors are of the same size. -ifeq ($(findstring GCC,$(COMPILER)),GCC) - CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs -endif - -LDFLAGS = -g -LDLIBS = $(EXTRA_LDLIBS) $(FSTROOT)/lib/libfst.a -ldl -lm -lpthread -framework Accelerate -CXX = g++ -CC = $(CXX) -RANLIB = ranlib -AR = ar diff --git a/src/makefiles/default_rules.mk b/src/makefiles/default_rules.mk index 17f122622f1..fda52521186 100644 --- a/src/makefiles/default_rules.mk +++ b/src/makefiles/default_rules.mk @@ -3,28 +3,20 @@ SHELL := /bin/bash ifeq ($(KALDI_FLAVOR), dynamic) ifeq ($(shell uname), Darwin) - XLDLIBS := $(LDLIBS) ifdef LIBNAME LIBFILE = lib$(LIBNAME).dylib - #LDLIBS += -l$(LIBNAME) endif - LDFLAGS += -L$(KALDILIBDIR) -Wl,-rpath -Wl,$(KALDILIBDIR) - XDEPENDS = $(foreach dep,$(ADDLIBS), $(dir $(dep))/lib$(notdir $(basename $(dep))).dylib ) - XLDLIBS += $(foreach dep,$(ADDLIBS), -l$(notdir $(basename $(dep))) ) - else - ifeq ($(shell uname), Linux) - ifdef LIBNAME - LIBFILE = lib$(LIBNAME).so - #LDLIBS += -l$(LIBNAME) - endif - LDFLAGS += -Wl,-rpath=$(shell readlink -f $(KALDILIBDIR)) -L. - LDFLAGS += $(foreach dep,$(ADDLIBS), -L$(dir $(dep)) ) - XDEPENDS = $(foreach dep,$(ADDLIBS), $(dir $(dep))/lib$(notdir $(basename $(dep))).so ) - else # Platform not supported - $(error Dynamic libraries not supported on this platform. Run configure with --static flag. ) + LDFLAGS += -Wl,-rpath -Wl,$(KALDILIBDIR) + XDEPENDS = $(foreach dep,$(ADDLIBS), $(dir $(dep))lib$(notdir $(basename $(dep))).dylib) + else ifeq ($(shell uname), Linux) + ifdef LIBNAME + LIBFILE = lib$(LIBNAME).so endif + LDFLAGS += -Wl,-rpath=$(shell readlink -f $(KALDILIBDIR)) + XDEPENDS = $(foreach dep,$(ADDLIBS), $(dir $(dep))lib$(notdir $(basename $(dep))).so) + else # Platform not supported + $(error Dynamic libraries not supported on this platform. Run configure with --static flag.) endif - LDLIBS += $(foreach dep,$(ADDLIBS), -l$(notdir $(basename $(dep))) ) else ifdef LIBNAME LIBFILE = $(LIBNAME).a @@ -39,24 +31,24 @@ $(LIBFILE): $(OBJFILES) $(RANLIB) $(LIBNAME).a ifeq ($(KALDI_FLAVOR), dynamic) ifeq ($(shell uname), Darwin) - $(CXX) -dynamiclib -o $@ -install_name @rpath/$@ -framework Accelerate $(LDFLAGS) $(XLDLIBS) $(OBJFILES) $(LDLIBS) + $(CXX) -dynamiclib -o $@ -install_name @rpath/$@ $(LDFLAGS) $(OBJFILES) $(XDEPENDS) $(LDLIBS) rm -f $(KALDILIBDIR)/$@; ln -s $(shell pwd)/$@ $(KALDILIBDIR)/$@ -else -ifeq ($(shell uname), Linux) +else ifeq ($(shell uname), Linux) # Building shared library from static (static was compiled with -fPIC) $(CXX) -shared -o $@ -Wl,--no-undefined -Wl,--as-needed -Wl,-soname=$@,--whole-archive $(LIBNAME).a -Wl,--no-whole-archive $(LDFLAGS) $(XDEPENDS) $(LDLIBS) rm -f $(KALDILIBDIR)/$@; ln -s $(shell pwd)/$@ $(KALDILIBDIR)/$@ - #cp $@ $(KALDILIBDIR) else # Platform not supported - $(error Dynamic libraries not supported on this platform. Run configure with --static flag. ) -endif + $(error Dynamic libraries not supported on this platform. Run configure with --static flag.) endif endif +# By default (GNU) make uses the C compiler $(CC) for linking object files even +# if they were compiled from a C++ source. Below redefinition forces make to +# use the C++ compiler $(CXX) instead. +LINK.o = $(CXX) $(LDFLAGS) $(TARGET_ARCH) $(BINFILES): $(LIBFILE) $(XDEPENDS) - # Rule below would expand to, e.g.: # ../base/kaldi-base.a: # make -c ../base kaldi-base.a @@ -100,8 +92,20 @@ test: test_compile done; \ exit $$result; } -.valgrind: $(BINFILES) $(TESTFILES) +# Rules that enable valgrind debugging ("make valgrind") + +valgrind: .valgrind +.valgrind: $(TESTFILES) + echo -n > valgrind.out + for x in $(TESTFILES); do \ + echo $$x >>valgrind.out; \ + valgrind ./$$x >/dev/null 2>> valgrind.out; \ + done + ! ( grep 'ERROR SUMMARY' valgrind.out | grep -v '0 errors' ) + ! ( grep 'definitely lost' valgrind.out | grep -v -w 0 ) + rm valgrind.out + touch .valgrind depend: -$(CXX) -M $(CXXFLAGS) *.cc > .depend.mk diff --git a/src/makefiles/linux_atlas.mk b/src/makefiles/linux_atlas.mk index d985344f479..1f366727821 100644 --- a/src/makefiles/linux_atlas.mk +++ b/src/makefiles/linux_atlas.mk @@ -1,8 +1,4 @@ -# You have to make sure ATLASLIBS is set... - -ifndef FSTROOT -$(error FSTROOT not defined.) -endif +# ATLAS specific Linux settings ifndef ATLASINC $(error ATLASINC not defined.) @@ -12,26 +8,8 @@ ifndef ATLASLIBS $(error ATLASLIBS not defined.) endif +CXXFLAGS += -msse -msse2 -pthread -rdynamic \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. \ - -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ - -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ - -DHAVE_ATLAS -I$(ATLASINC) \ - -I$(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - -ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC -endif - -LDFLAGS = -rdynamic $(OPENFSTLDFLAGS) -LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl -CC = g++ -CXX = g++ -AR = ar -AS = as -RANLIB = ranlib +LDFLAGS += -rdynamic +LDLIBS += $(ATLASLIBS) diff --git a/src/makefiles/linux_atlas_arm.mk b/src/makefiles/linux_atlas_arm.mk index 3359ea5e626..5f62f82d297 100644 --- a/src/makefiles/linux_atlas_arm.mk +++ b/src/makefiles/linux_atlas_arm.mk @@ -1,8 +1,4 @@ -# You have to make sure ATLASLIBS is set... - -ifndef FSTROOT -$(error FSTROOT not defined.) -endif +# ATLAS specific Linux ARM settings ifndef ATLASINC $(error ATLASINC not defined.) @@ -12,26 +8,8 @@ ifndef ATLASLIBS $(error ATLASLIBS not defined.) endif +CXXFLAGS += -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) -DOUBLE_PRECISION = 0 -CXXFLAGS = -ftree-vectorize -mfloat-abi=hard -mfpu=neon -Wall -I.. \ - -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ - -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ - -DHAVE_ATLAS -I$(ATLASINC) \ - -I$(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - -ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC -endif - -LDFLAGS = -rdynamic $(OPENFSTLDFLAGS) -LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl -CC = g++ -CXX = g++ -AR = ar -AS = as -RANLIB = ranlib +LDFLAGS += -rdynamic +LDLIBS += $(ATLASLIBS) diff --git a/src/makefiles/linux_clapack.mk b/src/makefiles/linux_clapack.mk index d9cd6163ceb..4d733bb207c 100644 --- a/src/makefiles/linux_clapack.mk +++ b/src/makefiles/linux_clapack.mk @@ -1,23 +1,8 @@ -# You have to make sure CLAPACKLIBS is set... +# CLAPACK specific Linux settings -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) -msse2 \ - -Wno-sign-compare -Wno-unused-local-typedefs \ - -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ - -DHAVE_CLAPACK -I ../../tools/CLAPACK \ - -I ../../tools/openfst/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID +CXXFLAGS += -msse -msse2 -pthread -rdynamic \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ + -DHAVE_CLAPACK -I ../../tools/CLAPACK -ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC -endif - -LDFLAGS = -rdynamic $(OPENFSTLDFLAGS) -LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl -CC = g++ -CXX = g++ -AR = ar -AS = as -RANLIB = ranlib +LDFLAGS += -rdynamic +LDLIBS += $(ATLASLIBS) diff --git a/src/makefiles/linux_clapack_arm.mk b/src/makefiles/linux_clapack_arm.mk index f155248862c..7d3119a08c9 100644 --- a/src/makefiles/linux_clapack_arm.mk +++ b/src/makefiles/linux_clapack_arm.mk @@ -1,23 +1,8 @@ -# You have to make sure CLAPACKLIBS is set... +# CLAPACK specific Linux ARM settings -DOUBLE_PRECISION = 0 -CXXFLAGS = -ftree-vectorize -mfloat-abi=hard -mfpu=neon -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Wno-unused-local-typedefs \ - -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ - -DHAVE_CLAPACK -I ../../tools/CLAPACK \ - -I ../../tools/openfst/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID +CXXFLAGS += -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ + -DHAVE_CLAPACK -I ../../tools/CLAPACK -ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC -endif - -LDFLAGS = -rdynamic $(OPENFSTLDFLAGS) -LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl -CC = g++ -CXX = g++ -AR = ar -AS = as -RANLIB = ranlib +LDFLAGS += -rdynamic +LDLIBS += $(ATLASLIBS) diff --git a/src/makefiles/linux_openblas.mk b/src/makefiles/linux_openblas.mk index 2d09bc2bcfc..8636b43e38e 100644 --- a/src/makefiles/linux_openblas.mk +++ b/src/makefiles/linux_openblas.mk @@ -1,8 +1,4 @@ -# You have to make sure FSTROOT,OPENBLASROOT,OPENBLASLIBS are set... - -ifndef FSTROOT -$(error FSTROOT not defined.) -endif +# OpenBLAS specific Linux settings ifndef OPENBLASLIBS $(error OPENBLASLIBS not defined.) @@ -12,25 +8,9 @@ ifndef OPENBLASROOT $(error OPENBLASROOT not defined.) endif +CXXFLAGS += -msse -msse2 -pthread -rdynamic \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ + -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include -DOUBLE_PRECISION = 0 -CXXFLAGS = -msse -msse2 -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ - -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ - -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include \ - -I $(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - -ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC -endif - -LDFLAGS = -rdynamic $(OPENFSTLDFLAGS) -LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl -CC = g++ -CXX = g++ -AR = ar -AS = as -RANLIB = ranlib +LDFLAGS += -rdynamic +LDLIBS += $(OPENBLASLIBS) diff --git a/src/makefiles/linux_openblas_arm.mk b/src/makefiles/linux_openblas_arm.mk index 3a72d96308f..682d62b5154 100644 --- a/src/makefiles/linux_openblas_arm.mk +++ b/src/makefiles/linux_openblas_arm.mk @@ -1,8 +1,4 @@ -# You have to make sure FSTROOT,OPENBLASROOT,OPENBLASLIBS are set... - -ifndef FSTROOT -$(error FSTROOT not defined.) -endif +# OpenBLAS specific Linux ARM settings ifndef OPENBLASLIBS $(error OPENBLASLIBS not defined.) @@ -12,25 +8,9 @@ ifndef OPENBLASROOT $(error OPENBLASROOT not defined.) endif +CXXFLAGS += -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ + -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include -DOUBLE_PRECISION = 0 -CXXFLAGS = -ftree-vectorize -mfloat-abi=hard -mfpu=neon -Wall -I.. -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ - -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ - -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include \ - -I $(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - -ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC -endif - -LDFLAGS = -rdynamic $(OPENFSTLDFLAGS) -LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl -CC = g++ -CXX = g++ -AR = ar -AS = as -RANLIB = ranlib +LDFLAGS += -rdynamic +LDLIBS += $(OPENBLASLIBS) diff --git a/src/makefiles/linux_x86_64_mkl.mk b/src/makefiles/linux_x86_64_mkl.mk index 7e9c13e6ac0..5e93d393b3e 100644 --- a/src/makefiles/linux_x86_64_mkl.mk +++ b/src/makefiles/linux_x86_64_mkl.mk @@ -1,4 +1,4 @@ -# You have to make sure MKLROOT and (optionally) MKLLIB is set +# MKL specific Linux settings # We have tested Kaldi with MKL version 10.2 on Linux/GCC and Intel(R) 64 # architecture (also referred to as x86_64) with LP64 interface layer. @@ -13,25 +13,10 @@ ifndef MKLROOT $(error MKLROOT not defined.) endif -ifndef FSTROOT -$(error FSTROOT not defined.) -endif - MKLLIB ?= $(MKLROOT)/lib/em64t -DOUBLE_PRECISION = 0 -CXXFLAGS = -m64 -msse -msse2 -pthread -Wall -I.. \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ - -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ - -DHAVE_MKL -I$(MKLROOT)/include \ - -I$(FSTROOT)/include \ - -std=c++11 $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - -ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC -endif +CXXFLAGS += -m64 -msse -msse2 -pthread -rdynamic \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_MKL -I$(MKLROOT)/include ## Use the following for STATIC LINKING of the SEQUENTIAL version of MKL MKL_STA_SEQ = $(MKLLIB)/libmkl_solver_lp64_sequential.a -Wl,--start-group \ @@ -53,10 +38,5 @@ MKL_DYN_MUL = -L$(MKLLIB) -lmkl_solver_lp64 -Wl,--start-group -lmkl_intel_lp64 \ # MKLFLAGS = $(MKL_DYN_MUL) -LDFLAGS = -rdynamic $(OPENFSTLDFLAGS) -LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(MKLFLAGS) -lm -lpthread -ldl -CC = g++ -CXX = g++ -AR = ar -AS = as -RANLIB = ranlib +LDFLAGS += -rdynamic +LDLIBS += $(MKLFLAGS) From 9790d4b6d04852d7ec4f7e0fea31ab0ef3171885 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 15 Dec 2016 15:56:50 -0800 Subject: [PATCH 267/530] Clean up configure script. --- src/configure | 478 +++++++++++++++++++++++++++----------------------- 1 file changed, 255 insertions(+), 223 deletions(-) diff --git a/src/configure b/src/configure index d3e9d63760f..a5b4e34b100 100755 --- a/src/configure +++ b/src/configure @@ -1,11 +1,11 @@ #!/bin/bash -# + # This configure script is hand-generated, not auto-generated. # It creates the file kaldi.mk, which is %included by the Makefiles # in the subdirectories. -# The file kaldi.mk is editable by hand-- for example, you may want to +# The file kaldi.mk is editable by hand -- for example, you may want to # remove the options -g -O0 -DKALDI_PARANOID, or edit the -# -DKALDI_DOUBLE_PRECISION option (to be 1 not 0), +# DOUBLE_PRECISION variable (to be 1 not 0). # Example command lines: @@ -23,10 +23,61 @@ # ./configure --use-cuda=no # disable CUDA detection (will build cpu-only # # version of kaldi even on CUDA-enabled machine -#This should be incremented after every significant change of the configure script -#I.e. after each change that affects the kaldi.mk or the build system as whole +# This should be incremented after any significant change to the configure +# script, i.e. any change affecting kaldi.mk or the build system as a whole. CONFIGURE_VERSION=5 +if ! [ -x "$PWD/configure" ]; then + echo 'You must run "configure" from the src/ directory.' + exit 1 +fi + +function usage { + cat < + LDFLAGS Additional linker flags, e.g. -L + LDLIBS Additional libraries to pass to the linker, e.g. -l + +EOF +} + function rel2abs { if [ ! -z "$1" ]; then local retval=`cd $1 2>/dev/null && pwd || exit 1` @@ -50,42 +101,106 @@ function is_set { fi } +function failure { + echo "***configure failed: $* ***" >&2 + if [ -f kaldi.mk ]; then rm kaldi.mk; fi + exit 1; +} +function check_exists { + if [ ! -f $1 ]; then failure "$1 not found."; fi +} -## First do some checks. These verify that all the things are -## here that should be here. -if ! [ -x "$PWD/configure" ]; then - echo 'You must run "configure" from the src/ directory.' - exit 1 -fi +function check_compiler { + COMPILER=$1 + if ! which $COMPILER >&/dev/null; then + failure "$COMPILER is not installed. + You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + else + COMPILER_VER_INFO=$($COMPILER --version 2>/dev/null) + if [[ $COMPILER_VER_INFO == *"g++"* ]]; then + GCC_VER=$($COMPILER -dumpversion) + GCC_VER_NUM=$(echo $GCC_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d") + if [ $GCC_VER_NUM -lt 40700 ]; then + failure "$COMPILER (g++-$GCC_VER) is not supported. + You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + elif [ $GCC_VER_NUM == 40801 ] || [ $GCC_VER_NUM == 40802 ]; then + failure "$COMPILER (g++-$GCC_VER) is not supported. + GCC 4.8.1 and 4.8.2 have a bug in the implementation of + the nth_element algorithm provided by the standard library. + This will cause Kaldi to crash (make test would fail). + Please use another C++ compiler with C++11 support. + You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + fi + elif [[ $COMPILER_VER_INFO == *"Apple"* ]]; then + CLANG_VER=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*version \([0-9\.]*\).*/\1/") + CLANG_VER_NUM=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*clang-\([0-9]*\).*/\1/") + if [ $CLANG_VER_NUM -lt 500 ]; then + failure "$COMPILER (Apple clang-$CLANG_VER) is not supported. + You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + fi + elif [[ $COMPILER_VER_INFO == *"LLVM"* ]]; then + CLANG_VER=$(echo $COMPILER_VER_INFO | grep version | sed "s/.*version \([0-9\.]*\).*/\1/") + CLANG_VER_NUM=$(echo $CLANG_VER | sed 's/\./ /g' | xargs printf "%d%02d") + if [ $CLANG_VER_NUM -lt 303 ]; then + failure "$COMPILER (LLVM clang-$CLANG_VER) is not supported. + You need g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3." + fi + fi + fi +} -## Default locations for FST and linear algebra libraries. -MATHLIB='ATLAS' -ATLASROOT=`rel2abs ../tools/ATLAS/` -FSTROOT=`rel2abs ../tools/openfst` +function check_for_slow_expf { + cd probe + rm -f exp-test + make -f Makefile.slow_expf 1>/dev/null + ./exp-test + if [ $? -eq 1 ]; then + echo "*** WARNING: expf() seems to be slower than exp() on your machine. This is a known bug in old versions of glibc. Please consider updating glibc. ***" + echo "*** Kaldi will be configured to use exp() instead of expf() in base/kaldi-math.h Exp() routine for single-precision floats. ***" + echo "CXXFLAGS += -DKALDI_NO_EXPF" >> ../kaldi.mk + fi + cd .. +} + +function check_library { + local libpath=$1 + local libname=$2 + local libext=$3 + local full_libname="$libpath/$libname.$libext" + ##echo "Testing $full_libname" >&2 + test -f "$full_libname" && return ; + return 1 +} -# Avoid using any variables that are set in the shell. +# If configuration sets any of these variables, we will switch the external +# math library. Here we unset them so that we can check later. unset MKLROOT unset CLAPACKROOT unset OPENBLASROOT unset MKLLIBDIR -function usage { - echo 'Usage: ./configure [--static|--shared] [--threaded-atlas={yes|no}] [--atlas-root=ATLASROOT] [--fst-root=FSTROOT] - [--openblas-root=OPENBLASROOOT] [--clapack-root=CLAPACKROOT] [--mkl-root=MKLROOT] [--mkl-libdir=MKLLIBDIR] - [--omp-libdir=OMPDIR] [--static-fst={yes|no}] [--static-math={yes|no}] [--threaded-math={yes|no}] [--mathlib=ATLAS|MKL|CLAPACK|OPENBLAS] - [--use-cuda={yes|no}] [--cudatk-dir=CUDATKDIR][--mkl-threading=sequential|iomp|tbb|gomp]'; -} +# These environment variables are OK. +CXX=${CXX:-g++} +ENV_CXXFLAGS=$CXXFLAGS +ENV_LDFLAGS=$LDFLAGS +ENV_LDLIBS=$LDLIBS -threaded_atlas=false # By default, use the un-threaded version of ATLAS. -threaded_math=${threaded_atlas} -static_math=false -static_fst=false -use_cuda=true +# Default configuration dynamic_kaldi=false +use_cuda=true +static_fst=false +static_math=false +threaded_atlas=false mkl_threading=sequential +double_precision=false + +MATHLIB='ATLAS' +ATLASROOT=`rel2abs ../tools/ATLAS/` +FSTROOT=`rel2abs ../tools/openfst` -cmd_line="$0 $@" # Save the command line to include in kaldi.mk +# Save the command line to include in kaldi.mk +cmd_line="$0 $@" while [ $# -gt 0 ]; do @@ -104,6 +219,12 @@ do static_math=false; static_fst=false; shift ;; + --double-precision=yes) + double_precision=true; + shift ;; + --double-precision=no) + double_precision=false; + shift ;; --atlas-root=*) ATLASROOT=`read_dirname $1`; shift ;; @@ -115,12 +236,10 @@ do shift ;; --threaded-math=yes) threaded_atlas=true; - threaded_math=true; mkl_threading=iomp shift ;; --threaded-math=no) threaded_atlas=false; - threaded_math=false; mkl_threading=sequential shift ;; --use-cuda=yes) @@ -143,13 +262,11 @@ do shift ;; --mkl-threading=sequential) threaded_atlas=false; - threaded_math=false; mkl_threading=sequential; shift ;; --mkl-threading=*) mkl_threading=`expr "X$1" : '[^=]*=\(.*\)'`; threaded_atlas=true; - threaded_math=true; shift ;; --fst-root=*) FSTROOT=`read_dirname $1`; @@ -172,7 +289,7 @@ do --speex-libdir=*) SPEEXLIBDIR=`read_dirname $1`; shift ;; - --speex-includedir=*) + --speex-incdir=*) SPEEXINCLUDEDIR=`read_dirname $1`; shift ;; --omp-libdir=*) @@ -188,78 +305,18 @@ do esac done -# the idea here is that if you change the configuration options from using +# The idea here is that if you change the configuration options from using # CUDA to not using it, or vice versa, we want to recompile all parts of the -# code that may use a GPU. Touching this file is a way to force this. +# code that may use a GPU. Touching this file is a way to force this. touch cudamatrix/cu-common.h 2>/dev/null -function failure { - echo "***configure failed: $* ***" >&2 - if [ -f kaldi.mk ]; then rm kaldi.mk; fi - exit 1; -} - -function check_exists { - if [ ! -f $1 ]; then failure "$1 not found."; fi -} - -function check_for_bad_gcc { - if which gcc >&/dev/null; then # gcc is on the path - gcc_version=$(gcc -dumpspecs 2>&1 | grep -A1 -F '*version:' | grep -v version) - if [ "$gcc_version" == "4.8.2" ] || [ "$gcc_version" == "4.8.1" ]; then - echo "*** WARNING: your version of gcc seems to be 4.8.1 or 4.8.2. ***" - echo "*** These versions of gcc has a bug in nth_element ***" - echo "*** in its implementation of the standard library ***" - echo "*** This will cause Kaldi to crash (make test ***" - echo "*** would fail). Please either upgrade or downgrade gcc. ***" - exit 1 - fi - fi -} - -function check_for_slow_expf { - cd probe - rm -f exp-test - make -f Makefile.slow_expf 1>/dev/null - ./exp-test - if [ $? -eq 1 ]; then - echo "*** WARNING: expf() seems to be slower than exp() on your machine. This is a known bug in old versions of glibc. Please consider updating glibc. ***" - echo "*** Kaldi will be configured to use exp() instead of expf() in base/kaldi-math.h Exp() routine for single-precision floats. ***" - echo "CXXFLAGS += -DKALDI_NO_EXPF" >> ../kaldi.mk - fi - cd .. -} - - -function exit_success { - check_for_bad_gcc; - check_for_slow_expf; - echo "SUCCESS" - exit 0; -} - +# If one of these variables is set, switch the external math library. +is_set $MKLLIBDIR && echo "Configuring KALDI to use MKL" && export MATHLIB="MKL" +is_set $MKLROOT && echo "Configuring KALDI to use MKL"&& export MATHLIB="MKL" +is_set $CLAPACKROOT && echo "Configuring KALDI to use CLAPACK"&& export MATHLIB="CLAPACK" +is_set $OPENBLASROOT && echo "Configuring KALDI to use OPENBLAS"&& export MATHLIB="OPENBLAS" - -function check_library { - local libpath=$1 - local libname=$2 - local libext=$3 - local full_libname="$libpath/$libname.$libext" - ##echo "Testing $full_libname" >&2 - test -f "$full_libname" && return ; - return 1 -} - - - -#Check if at least one of these variables is set -#If yes, we want to switch to using the MKL -is_set $MKLLIBDIR && echo "Force-configuring KALDI to use MKL" && export MATHLIB="MKL" -is_set $MKLROOT && echo "Force-configuring KALDI to use MKL"&& export MATHLIB="MKL" -is_set $CLAPACKROOT && echo "Force-configuring KALDI to use CLAPACK"&& export MATHLIB="CLAPACK" -is_set $OPENBLASROOT && echo "Force-configuring KALDI to use OPENBLAS"&& export MATHLIB="OPENBLAS" - -#MKL functions +# MKL functions function linux_configure_mkllibdir { local mklroot=$1 @@ -278,7 +335,6 @@ function linux_configure_mkl_includes { failure "Could not find the MKL include directory" } - function linux_configure_mkl_libraries { local mkllibdir=$1 local static=$2 @@ -414,13 +470,13 @@ function linux_configure_mkl_threading { echo "$OMP_LINK_LINE" } -## -## CUDA is used only in selected directories including src/cudamatrix, src/nnet* -## and src/chain*. It is used to accelerate the neural network training, the -## rest of kaldi runs on CPUs. -## + +# CUDA is used only in selected directories including src/cudamatrix, src/nnet* +# and src/chain*. It is used to accelerate the neural network training. +# The rest of Kaldi runs on CPUs. + function configure_cuda { - #check for CUDA toolkit in the system + # Check for CUDA toolkit in the system if [ ! -d "$CUDATKDIR" ]; then for base in /Developer/NVIDIA/CUDA-6.0 /usr/local/share/cuda /usr/local/cuda /pkgs_local/cuda-3.2/ /opt/nvidia_cuda/cuda-6.0/ /usr/; do if [ -f $base/bin/nvcc ]; then @@ -476,7 +532,7 @@ function configure_cuda { } function linux_configure_speex { - #check whether the user has called tools/extras/install_speex.sh or not + # Check whether the user has called tools/extras/install_speex.sh or not [ ! -z "$SPEEXROOT" ] || SPEEXROOT=`pwd`/../tools/speex [ ! -z "$SPEEXLIBDIR" ] || SPEEXLIBDIR="$SPEEXROOT"/lib [ ! -z "$SPEEXINCLUDEDIR" ] || SPEEXINCLUDEDIR="$SPEEXROOT"/include @@ -513,17 +569,7 @@ function linux_configure_speex { fi } -function fix_cxx_flag { - CXXCOMPILER=`grep "CXX = " kaldi.mk | awk '{print $3}'` - if [ $CXXCOMPILER=="g++" ]; then - $CXXCOMPILER -dumpversion | \ - awk '{if(NR==1 && $1<"4.4") print "sed \"s/-Wno-unused-local-typedefs//g\" \ - kaldi.mk > tmpf; mv tmpf kaldi.mk; "}' | sh - - fi -} - -function linux_atlas_failure { # function we use when we couldn't find - # ATLAS libs. +function linux_atlas_failure { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = [somewhere]/liblapack.a [somewhere]/libcblas.a [somewhere]/libatlas.a [somewhere]/libf77blas.a $ATLASLIBDIR >> kaldi.mk if [[ "`uname -m`" == arm* ]]; then @@ -533,7 +579,6 @@ function linux_atlas_failure { # function we use when we couldn't find else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag echo "** $* ***" echo "** ERROR **" echo "** Configure cannot proceed automatically." @@ -590,11 +635,9 @@ function linux_configure_debian_ubuntu { else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag echo "Successfully configured for Debian/Ubuntu Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda linux_configure_speex - exit_success; } function linux_configure_debian_ubuntu3 { @@ -615,11 +658,9 @@ function linux_configure_debian_ubuntu3 { else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag echo "Successfully configured for Debian/Ubuntu Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda linux_configure_speex - exit_success; } function linux_configure_debian7 { @@ -643,11 +684,9 @@ function linux_configure_debian7 { else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag echo "Successfully configured for Debian 7 [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda linux_configure_speex - exit_success; } function linux_configure_redhat { @@ -668,10 +707,8 @@ function linux_configure_redhat { else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag echo "Successfully configured for red hat [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda - exit_success; } function linux_configure_redhat_fat { @@ -695,13 +732,10 @@ function linux_configure_redhat_fat { else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag echo "Successfully configured for red hat [dynamic libraries, fat] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda - exit_success; } - function linux_configure_static { if $threaded_atlas; then pt=pt; else pt=""; fi @@ -754,11 +788,9 @@ function linux_configure_static { else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag $use_cuda && configure_cuda linux_configure_speex echo "Successfully configured for Linux [static libraries] with ATLASLIBS =$ATLASLIBS" - exit_success; } function linux_check_dynamic { @@ -839,43 +871,71 @@ function linux_configure_dynamic { else cat makefiles/linux_atlas.mk >> kaldi.mk fi - fix_cxx_flag $use_cuda && configure_cuda linux_configure_speex echo "Successfully configured for Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS" - exit_success; } echo "Configuring ..." -if [ ! -f makefiles/common.mk ]; then - failure makefiles/common.mk not found -fi - -# back up the old one in case we modified it +# Back up the old kaldi.mk in case we modified it if [ -f kaldi.mk ]; then echo "Backing up kaldi.mk to kaldi.mk.bak" cp kaldi.mk kaldi.mk.bak fi +echo "Checking compiler $CXX ..." +check_compiler $CXX + printf "# This file was generated using the following command:\n# $cmd_line\n\n" > kaldi.mk -cat makefiles/common.mk >> kaldi.mk +echo "CONFIGURE_VERSION := $CONFIGURE_VERSION" >> kaldi.mk +echo >> kaldi.mk + +echo "# Configuration" >> kaldi.mk +echo >> kaldi.mk if $dynamic_kaldi ; then KALDILIBDIR=`pwd`/lib echo "KALDI_FLAVOR := dynamic" >> kaldi.mk echo "KALDILIBDIR := $KALDILIBDIR" >> kaldi.mk fi -echo "CONFIGURE_VERSION := $CONFIGURE_VERSION" >> kaldi.mk -echo "FSTROOT = $FSTROOT" >> kaldi.mk - -echo "Checking OpenFST library in $FSTROOT ..." +if $double_precision; then + echo "DOUBLE_PRECISION = 1" >> kaldi.mk +else + echo "DOUBLE_PRECISION = 0" >> kaldi.mk +fi +echo "Checking OpenFst library in $FSTROOT ..." if [ ! -f $FSTROOT/include/fst/fst.h ]; then failure "Could not find file $FSTROOT/include/fst/fst.h: - you may not have installed OpenFst. See ../tools/INSTALL" + you may not have installed OpenFst. See ../tools/INSTALL" fi - +echo "FSTROOT = $FSTROOT" >> kaldi.mk OPENFST_VER="${OPENFST_VER:-`grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::'`}" echo "OPENFST_VER = $OPENFST_VER" >> kaldi.mk +if $static_fst ; then + OPENFSTLIBS="$FSTROOT/lib/libfst.a" +else + if [ "`uname`" == "Darwin" ]; then + OPENFSTLIBS="$FSTROOT/lib/libfst.dylib" + OPENFSTLDFLAGS="-Wl,-rpath -Wl,${FSTROOT}/lib" + elif ["`uname`" == "Linux" ]; then + OPENFSTLIBS="$FSTROOT/lib/libfst.so" + OPENFSTLDFLAGS="-Wl,-rpath=${FSTROOT}/lib" + else + failure "Dynamic libraries not supported on this platform. + Run configure with --static --static-fst=no flag." + fi +fi +if [ ! -f "$OPENFSTLIBS" ]; then + failure "Static=[$static_fst] OpenFST library not found: See ../tools/INSTALL" +fi +echo "OPENFSTLIBS = $OPENFSTLIBS" >> kaldi.mk +echo "OPENFSTLDFLAGS = $OPENFSTLDFLAGS" >> kaldi.mk +echo "CXX = $CXX" >> kaldi.mk +echo >> kaldi.mk + +# Add platform independent settings +cat makefiles/common.mk >> kaldi.mk +echo >> kaldi.mk # Most of the OS-specific steps below will append to kaldi.mk echo "Doing OS specific configurations ..." @@ -884,58 +944,34 @@ echo "Doing OS specific configurations ..." # which crashes on Darwin. Also the linear algebra libraries on Macs are # used differently (through the Accelerate framework) than on Linux. if [ "`uname`" == "Darwin" ]; then - echo "On Darwin: checking for Accelerate framework ..." + echo "On Darwin: Checking for Accelerate framework ..." if [ ! -e /System/Library/Frameworks/Accelerate.framework ]; then - failure "Need the Accelerate.framework to compile on Darwin." + failure "Need the Accelerate framework to compile on Darwin." fi - if [ ! -f $FSTROOT/lib/libfst.a ]; then - failure "Static OpenFST library not found: See ../tools/INSTALL" + OSX_VER=`sw_vers | grep ProductVersion | awk '{print $2}' | awk '{split($0,a,"."); print a[1] "." a[2]; }'` + OSX_VER_NUM=$(echo $OSX_VER | sed 's/\./ /g' | xargs printf "%d%02d") + echo "Configuring for OS X version $OSX_VER ..." + if [ $OSX_VER_NUM -ge 1005 ]; then + cat makefiles/darwin.mk >> kaldi.mk + else + failure "Mac OS X version '$OSX_VER' is not supported." fi - # posix_memalign and gcc -rdynamic options not present on OS X 10.5.* - osx_ver=`sw_vers | grep ProductVersion | awk '{print $2}' | awk '{split($0,a,"."); print a[1] "." a[2]; }'` - echo "Configuring for OS X version $osx_ver ..." - if [ "$osx_ver" == "10.5" ]; then - check_exists makefiles/darwin_10_5.mk - cat makefiles/darwin_10_5.mk >> kaldi.mk - elif [ "$osx_ver" == "10.6" ]; then - check_exists makefiles/darwin_10_6.mk - cat makefiles/darwin_10_6.mk >> kaldi.mk - elif [ "$osx_ver" == "10.7" ]; then - check_exists makefiles/darwin_10_7.mk - cat makefiles/darwin_10_7.mk >> kaldi.mk - elif [ "$osx_ver" == "10.8" ]; then - check_exists makefiles/darwin_10_8.mk - cat makefiles/darwin_10_8.mk >> kaldi.mk - elif [ "$osx_ver" == "10.9" ]; then - check_exists makefiles/darwin_10_9.mk - cat makefiles/darwin_10_9.mk >> kaldi.mk - elif [ "$osx_ver" == "10.10" ]; then - check_exists makefiles/darwin_10_10.mk - cat makefiles/darwin_10_10.mk >> kaldi.mk - elif [ "$osx_ver" == "10.11" ]; then - check_exists makefiles/darwin_10_11.mk - cat makefiles/darwin_10_11.mk >> kaldi.mk + + if [ $OSX_VER_NUM == 1011 ]; then echo "**BAD WARNING**: You are using OS X El Capitan. Some versions of this OS" echo "**BAD WARNING**: have a bug in the BLAS implementation that affects Kaldi." echo "**BAD WARNING**: After compiling, cd to matrix/ and type 'make test'. The" echo "**BAD WARNING**: test will fail if the problem exists in your version. " echo "**BAD WARNING**: Eventually this issue will be fixed by system updates from" - echo "**BAD WARNING** Apple. Unexplained crashes with reports of NaNs will" - echo "**BAD WARNING** be caused by this bug, but some recipes will (sometimes) work." + echo "**BAD WARNING**: Apple. Unexplained crashes with reports of NaNs will" + echo "**BAD WARNING**: be caused by this bug, but some recipes will (sometimes) work." sleep 1; echo -n .; sleep 1; echo -n .; sleep 1; echo . - elif [ "$osx_ver" == "10.12" ]; then - check_exists makefiles/darwin_10_12.mk - cat makefiles/darwin_10_12.mk >> kaldi.mk - else - failure "OS X version '$osx_ver' not supported" fi + echo "Successfully configured for Darwin with Accelerate framework." $use_cuda && configure_cuda - echo "Configuration succeeded for platform Darwin." - exit_success; -fi -if [ "`uname -o`" == "Cygwin" ]; then - echo "On Cygwin: checking for linear algebra libraries ..." +elif [ "`uname -o`" == "Cygwin" ]; then + echo "On Cygwin: Checking for linear algebra libraries ..." if [ ! -f ../tools/CLAPACK/clapack.h ]; then failure "could not find file ../tools/CLAPACK/clapack.h" fi @@ -943,25 +979,9 @@ if [ "`uname -o`" == "Cygwin" ]; then failure "please first install package liblapack0" fi cat makefiles/cygwin.mk >> kaldi.mk - echo "Configuration succeeded for platform cygwin" - exit_success; -fi - -if [ "`uname`" == "Linux" ]; then - if $static_fst ; then - OPENFSTLIBS="$FSTROOT/lib/libfst.a" - fst_type='a' - else - OPENFSTLIBS="-L${FSTROOT}/lib -lfst" - OPENFSTLDFLAGS="-Wl,-rpath=${FSTROOT}/lib" - fst_type='so' - fi - if [ ! -f "$FSTROOT/lib/libfst.${fst_type}" ]; then - failure "Static=[$static_fst] OpenFST library not found: See ../tools/INSTALL" - fi - echo OPENFSTLIBS = $OPENFSTLIBS >> kaldi.mk - echo OPENFSTLDFLAGS = $OPENFSTLDFLAGS >> kaldi.mk + echo "Successfully configured for Cygwin with CLAPACK." +elif [ "`uname`" == "Linux" ]; then echo "On Linux: Checking for linear algebra header files ..." if [ "$MATHLIB" == "ATLAS" ]; then if [ ! -f $ATLASROOT/include/cblas.h ] || [ ! -f $ATLASROOT/include/clapack.h ] ; then @@ -992,7 +1012,7 @@ if [ "`uname`" == "Linux" ]; then linux_configure_redhat || \ linux_configure_redhat_fat 64 || \ linux_configure_redhat_fat || \ - linux_atlas_failure "Failed to configure ATLAS lbiraries"; + linux_atlas_failure "Failed to configure ATLAS libraries"; else # Prefer dynamic to static math. linux_configure_debian_ubuntu3 || \ @@ -1005,7 +1025,7 @@ if [ "`uname`" == "Linux" ]; then linux_configure_redhat || \ linux_configure_redhat_fat 64 || \ linux_configure_redhat_fat || \ - linux_atlas_failure "Failed to configure ATLAS lbiraries"; + linux_atlas_failure "Failed to configure ATLAS libraries"; fi elif [ "$MATHLIB" == "MKL" ]; then @@ -1052,26 +1072,23 @@ if [ "`uname`" == "Linux" ]; then fi check_exists makefiles/linux_x86_64_mkl.mk cat makefiles/linux_x86_64_mkl.mk >> kaldi.mk - fix_cxx_flag echo "MKLFLAGS = ${MKL_LINK_LINE} ${THREADING_LINE} $EXTRA_LIBS " >> kaldi.mk - + echo "Successfully configured for Linux with MKL libs from $MKLROOT" $use_cuda && configure_cuda linux_configure_speex - echo "Successfully configured for Linux with MKL libs from $MKLROOT" - exit_success; elif [ "$MATHLIB" == "CLAPACK" ]; then if [ -z "$CLAPACKROOT" ]; then failure "Must specify the location of CLAPACK with --clapack-root option (and it must exist)" fi if [ ! -f ../tools/CLAPACK/clapack.h ]; then - failure could not find file ../tools/CLAPACK/clapack.h + failure "could not find file ../tools/CLAPACK/clapack.h" fi if [ ! -d "$CLAPACKROOT" ]; then failure "The directory $CLAPACKROOT does not exist" fi # Also check for cblas.h and f2c.h - echo "Using CLAPACK as the linear algebra library." + echo "Using CLAPACK libs from $CLAPACKROOT as the linear algebra library." if [ ! -f makefiles/linux_clapack.mk ]; then failure "makefiles/linux_clapack.mk not found." fi @@ -1080,12 +1097,11 @@ if [ "`uname`" == "Linux" ]; then else cat makefiles/linux_clapack.mk >> kaldi.mk fi - fix_cxx_flag - echo "Warning (CLAPACK): this part of the configure process is not properly tested and will not work." + echo "Warning (CLAPACK): this part of the configure process is not properly tested and may not work." + echo "Successfully configured for Linux with CLAPACK libs from $CLAPACKROOT" $use_cuda && configure_cuda linux_configure_speex - echo "Successfully configured for Linux with CLAPACK libs from $CLAPACKROOT" - exit_success; + elif [ "$MATHLIB" == "OPENBLAS" ]; then OPENBLASROOT=`rel2abs "$OPENBLASROOT"` if [ -z "$OPENBLASROOT" ]; then @@ -1094,7 +1110,7 @@ if [ "`uname`" == "Linux" ]; then if [ ! -f $OPENBLASROOT/lib/libopenblas.so ]; then failure "Expected to find the file $OPENBLASROOT/lib/libopenblas.so" fi - echo "Your math library seems to be OpenBLAS. Configuring appropriately." + echo "Your math library seems to be OpenBLAS from $OPENBLASROOT. Configuring appropriately." if $static_math; then echo "Configuring static OpenBlas since --static-math=yes" OPENBLASLIBS="$OPENBLASROOT/lib/libopenblas.a -lgfortran" @@ -1111,14 +1127,30 @@ if [ "`uname`" == "Linux" ]; then else cat makefiles/linux_openblas.mk >> kaldi.mk fi - fix_cxx_flag + echo "Successfully configured for Linux with OpenBLAS from $OPENBLASROOT" $use_cuda && configure_cuda linux_configure_speex - echo "Successfully configured OpenBLAS from $OPENBLASROOT." - exit_success; + else failure "Unsupported linear algebra library '$MATHLIB'" fi +else + failure "Could not detect the platform or we have not yet worked out the + appropriate configuration for this platform. + Please contact the developers." fi -failure Could not detect platform or we have not yet worked out the appropriate configuration for this platform. Please contact the developers. +# Append the flags set by environment variables last so they can be used +# to override the automatically generated configuration. +echo >> kaldi.mk +echo "# Environment settings" >> kaldi.mk +echo >> kaldi.mk +if [ -n "$ENV_CXXFLAGS" ]; then echo "CXXFLAGS += $ENV_CXXFLAGS" >> kaldi.mk; fi +if [ -n "$ENV_LDFLAGS" ]; then echo "LDFLAGS += $ENV_LDFLAGS" >> kaldi.mk; fi +if [ -n "$ENV_LDLIBS" ]; then echo "LDLIBS += $ENV_LDLIBS" >> kaldi.mk; fi + +# We check for slow exp implementation just before we exit. This check uses +# and possibly modifies the kaldi.mk file that we just generated. +check_for_slow_expf; +echo "SUCCESS" +exit 0; From de68b94ff6829acd8cd68c39e2e14cd631920472 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 15 Dec 2016 15:57:14 -0800 Subject: [PATCH 268/530] Update travis script. --- tools/extras/travis_script.sh | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index a857f538edd..2b8652a1f25 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -45,23 +45,18 @@ then exit 0; fi -# Prepare make command fragments. +# Prepare environment variables CF="$CFLAGS -g $(addsw -I $INCDIRS)" LDF="$LDFLAGS $(addsw -L $LIBDIRS)" -CCC="$(mtoken CC $CXX) $(mtoken CXX $CXX)" +CCC="$(mtoken CXX $CXX)" runvx cd tools runvx make openfst $CCC CXXFLAGS="$CF" -j$MAXPAR cd .. runvx cd src -runvx ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root=$XROOT/usr +runvx $CCC CXXFLAGS="$CF" LDFLAGS="$LDF" ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root=$XROOT/usr +runvx make all -j$MAXPAR +runvx make test -k -make_kaldi() { - runvx make "$@" $CCC EXTRA_CXXFLAGS="$CF" EXTRA_LDLIBS="$LDF" -} - -#make_kaldi mklibdir base matrix -j$MAXPAR -#make_kaldi matrix/test - -make_kaldi all -j$MAXPAR -make_kaldi test -k +#runvx make mklibdir base matrix -j$MAXPAR +#runvx make matrix/test From f77a33aa3f554d1f091a4d9932dc4ec9efb1d969 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 15 Dec 2016 15:58:31 -0800 Subject: [PATCH 269/530] Initialize a few variables to silence compiler warnings. --- src/nnet/nnet-utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nnet/nnet-utils.h b/src/nnet/nnet-utils.h index 869bb174f23..8b1afbbed3b 100644 --- a/src/nnet/nnet-utils.h +++ b/src/nnet/nnet-utils.h @@ -243,7 +243,7 @@ inline void BuildIntegerVector(const std::vector >& in, // loop over records, for (int32 i = 0; i < in.size(); i++) { // process i'th record, - int32 beg, end, step; + int32 beg = 0, end = 0, step = 1; switch (in[i].size()) { case 1: beg = in[i][0]; From 3981c272e786f138c137d61c728cf5a1435f6949 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 15 Dec 2016 16:11:12 -0800 Subject: [PATCH 270/530] Fix spacing error in configure. --- src/configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/configure b/src/configure index a5b4e34b100..b92729319a7 100755 --- a/src/configure +++ b/src/configure @@ -917,7 +917,7 @@ else if [ "`uname`" == "Darwin" ]; then OPENFSTLIBS="$FSTROOT/lib/libfst.dylib" OPENFSTLDFLAGS="-Wl,-rpath -Wl,${FSTROOT}/lib" - elif ["`uname`" == "Linux" ]; then + elif [ "`uname`" == "Linux" ]; then OPENFSTLIBS="$FSTROOT/lib/libfst.so" OPENFSTLDFLAGS="-Wl,-rpath=${FSTROOT}/lib" else From ec66a31b4f8f47b6bde35de4afb20bbd496f892a Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 15 Dec 2016 16:55:12 -0800 Subject: [PATCH 271/530] Fix travis script. --- tools/extras/travis_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index 2b8652a1f25..d347d5dfd58 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -17,7 +17,7 @@ TESTABLE_DIRS="src/" # Run verbose (run and echo) and exit if failed. runvx() { echo "\$ $@" - "$@" || exit 1 + eval "$@" || exit 1 } # $(addsw -L foo bar) => "-Lfoo -Lbar". From e185e2f5843edebfe10e45591bcacb9a7a2c2960 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 15 Dec 2016 17:21:58 -0800 Subject: [PATCH 272/530] Yet another fix for the travis script. --- tools/extras/travis_script.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index d347d5dfd58..b0acca7e4cf 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -17,7 +17,7 @@ TESTABLE_DIRS="src/" # Run verbose (run and echo) and exit if failed. runvx() { echo "\$ $@" - eval "$@" || exit 1 + "$@" || exit 1 } # $(addsw -L foo bar) => "-Lfoo -Lbar". @@ -54,7 +54,9 @@ runvx cd tools runvx make openfst $CCC CXXFLAGS="$CF" -j$MAXPAR cd .. runvx cd src -runvx $CCC CXXFLAGS="$CF" LDFLAGS="$LDF" ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root=$XROOT/usr +# runvx does not work when we have environment variables as prefix +echo "$CCC CXXFLAGS=$CF LDFLAGS=$LDF ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root=$XROOT/usr" +$CCC CXXFLAGS="$CF" LDFLAGS="$LDF" ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root="$XROOT/usr" || exit 1 runvx make all -j$MAXPAR runvx make test -k From 8bccbe061e0dbb1526a5801ffc569b761c8162aa Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 15 Dec 2016 17:35:45 -0800 Subject: [PATCH 273/530] One more fix to travis script. --- tools/extras/travis_script.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index b0acca7e4cf..97dec920b44 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -55,8 +55,8 @@ runvx make openfst $CCC CXXFLAGS="$CF" -j$MAXPAR cd .. runvx cd src # runvx does not work when we have environment variables as prefix -echo "$CCC CXXFLAGS=$CF LDFLAGS=$LDF ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root=$XROOT/usr" -$CCC CXXFLAGS="$CF" LDFLAGS="$LDF" ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root="$XROOT/usr" || exit 1 +echo "CXX=$CXX CXXFLAGS=$CF LDFLAGS=$LDF ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root=$XROOT/usr" +CXX="$CXX" CXXFLAGS="$CF" LDFLAGS="$LDF" ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root="$XROOT/usr" || exit 1 runvx make all -j$MAXPAR runvx make test -k From 35ea13afe389bd1e5c97d9154d2f1e7f22394879 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 15 Dec 2016 20:59:01 -0800 Subject: [PATCH 274/530] Quote environment variables defined in travis script to resolve the build error. --- tools/extras/travis_script.sh | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index 97dec920b44..0164e8532ab 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -17,7 +17,7 @@ TESTABLE_DIRS="src/" # Run verbose (run and echo) and exit if failed. runvx() { echo "\$ $@" - "$@" || exit 1 + eval "$@" || exit 1 } # $(addsw -L foo bar) => "-Lfoo -Lbar". @@ -46,17 +46,15 @@ then fi # Prepare environment variables -CF="$CFLAGS -g $(addsw -I $INCDIRS)" -LDF="$LDFLAGS $(addsw -L $LIBDIRS)" -CCC="$(mtoken CXX $CXX)" +CF="\"$CFLAGS -g $(addsw -I $INCDIRS)\"" +LDF="\"$LDFLAGS $(addsw -L $LIBDIRS)\"" +CCC="\"$(mtoken CXX $CXX)\"" runvx cd tools -runvx make openfst $CCC CXXFLAGS="$CF" -j$MAXPAR +runvx make openfst "$CCC" CXXFLAGS="$CF" -j$MAXPAR cd .. runvx cd src -# runvx does not work when we have environment variables as prefix -echo "CXX=$CXX CXXFLAGS=$CF LDFLAGS=$LDF ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root=$XROOT/usr" -CXX="$CXX" CXXFLAGS="$CF" LDFLAGS="$LDF" ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root="$XROOT/usr" || exit 1 +runvx "$CCC" CXXFLAGS="$CF" LDFLAGS="$LDF" ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root="$XROOT/usr" runvx make all -j$MAXPAR runvx make test -k From 41869ea3e84ea62551be0a567c5358c11d95272d Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 15 Dec 2016 21:24:21 -0800 Subject: [PATCH 275/530] Fix the quoting in mtoken function defined in travis script. --- tools/extras/travis_script.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index 0164e8532ab..15e284f66a6 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -28,7 +28,7 @@ addsw() { } # $(mtoken CXX gcc) => "CXX=gcc"; # $(mtoken CXX ) => "". -mtoken() { echo ${2+$1=$2}; } +mtoken() { echo ${2+$1=\"$2\"}; } # Print machine info and environment. runvx uname -a @@ -48,7 +48,7 @@ fi # Prepare environment variables CF="\"$CFLAGS -g $(addsw -I $INCDIRS)\"" LDF="\"$LDFLAGS $(addsw -L $LIBDIRS)\"" -CCC="\"$(mtoken CXX $CXX)\"" +CCC="$(mtoken CXX "$CXX")" runvx cd tools runvx make openfst "$CCC" CXXFLAGS="$CF" -j$MAXPAR From 3f0fa2a6113ad9da491311100bc2680af2a0d42d Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 15 Dec 2016 22:12:51 -0800 Subject: [PATCH 276/530] Run tests in parallel to avoid the travis timeout. --- tools/extras/travis_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index 15e284f66a6..5aefdd3e543 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -56,7 +56,7 @@ cd .. runvx cd src runvx "$CCC" CXXFLAGS="$CF" LDFLAGS="$LDF" ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root="$XROOT/usr" runvx make all -j$MAXPAR -runvx make test -k +runvx make test -k -j$MAXPAR #runvx make mklibdir base matrix -j$MAXPAR #runvx make matrix/test From e271d98c25972d73aca3dd236e4cacef2813f818 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 20 Dec 2016 23:19:07 -0800 Subject: [PATCH 277/530] Reorganize platform specific makefiles. --- src/makefiles/common.mk | 30 -------------------------- src/makefiles/cuda_32bit.mk | 1 - src/makefiles/cuda_64bit.mk | 1 - src/makefiles/cygwin.mk | 31 ++++++++++++++++++++++++--- src/makefiles/darwin.mk | 32 +++++++++++++++++++++++----- src/makefiles/linux_atlas.mk | 30 +++++++++++++++++++++----- src/makefiles/linux_atlas_arm.mk | 30 +++++++++++++++++++++----- src/makefiles/linux_clapack.mk | 31 ++++++++++++++++++++++----- src/makefiles/linux_clapack_arm.mk | 31 ++++++++++++++++++++++----- src/makefiles/linux_openblas.mk | 33 +++++++++++++++++++++++------ src/makefiles/linux_openblas_arm.mk | 33 +++++++++++++++++++++++------ src/makefiles/linux_x86_64_mkl.mk | 29 +++++++++++++++++++++---- 12 files changed, 234 insertions(+), 78 deletions(-) delete mode 100644 src/makefiles/common.mk diff --git a/src/makefiles/common.mk b/src/makefiles/common.mk deleted file mode 100644 index 93f6d98c471..00000000000 --- a/src/makefiles/common.mk +++ /dev/null @@ -1,30 +0,0 @@ -# Platform independent settings - -ifndef FSTROOT -$(error FSTROOT not defined.) -endif - -ifndef DOUBLE_PRECISION -$(error DOUBLE_PRECISION not defined.) -endif - -ifndef OPENFSTLIBS -$(error OPENFSTLIBS not defined.) -endif - -CXXFLAGS = -std=c++11 -I.. -I$(FSTROOT)/include \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID - -ifeq ($(KALDI_FLAVOR), dynamic) -CXXFLAGS += -fPIC -endif - -LDFLAGS = $(OPENFSTLDFLAGS) $(EXTRA_LDFLAGS) -LDLIBS = $(OPENFSTLIBS) -lm -lpthread -ldl $(EXTRA_LDLIBS) - -RANLIB = ranlib -AR = ar -AS = as diff --git a/src/makefiles/cuda_32bit.mk b/src/makefiles/cuda_32bit.mk index 4c72451fed8..4019d5027b1 100644 --- a/src/makefiles/cuda_32bit.mk +++ b/src/makefiles/cuda_32bit.mk @@ -1,7 +1,6 @@ ifndef CUDATKDIR $(error CUDATKDIR not defined.) endif - ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) endif diff --git a/src/makefiles/cuda_64bit.mk b/src/makefiles/cuda_64bit.mk index 691fda6135b..0ce7bacdd00 100644 --- a/src/makefiles/cuda_64bit.mk +++ b/src/makefiles/cuda_64bit.mk @@ -1,7 +1,6 @@ ifndef CUDATKDIR $(error CUDATKDIR not defined.) endif - ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) endif diff --git a/src/makefiles/cygwin.mk b/src/makefiles/cygwin.mk index 48f07e901cf..beaea294638 100644 --- a/src/makefiles/cygwin.mk +++ b/src/makefiles/cygwin.mk @@ -1,6 +1,31 @@ # Cygwin settings -CXXFLAGS += -msse -msse2 -DHAVE_CLAPACK -I ../../tools/CLAPACK/ +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif -LDFLAGS += -g --enable-auto-import -L/usr/lib/lapack -LDLIBS += -lcyglapack-0 -lcygblas-0 +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_CLAPACK -I ../../tools/CLAPACK/ \ + -msse -msse2 \ + -g # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC +endif + +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -g \ + --enable-auto-import -L/usr/lib/lapack +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) -lcyglapack-0 -lcygblas-0 \ + -lm -lpthread -ldl + +RANLIB = ranlib +AR = ar +AS = as diff --git a/src/makefiles/darwin.mk b/src/makefiles/darwin.mk index 62bc30c6136..2f1692018ac 100644 --- a/src/makefiles/darwin.mk +++ b/src/makefiles/darwin.mk @@ -1,7 +1,32 @@ # Darwin (macOS) settings -CXXFLAGS += -msse -msse2 -pthread \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif + +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK \ + -msse -msse2 -pthread \ + -g # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC +endif + +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -g +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) -framework Accelerate -lm -lpthread -ldl + +RANLIB = ranlib +AR = ar +AS = as # Compiler specific flags COMPILER = $(shell $(CXX) -v 2>&1) @@ -12,6 +37,3 @@ else ifeq ($(findstring GCC,$(COMPILER)),GCC) # Allow implicit conversions between vectors. CXXFLAGS += -flax-vector-conversions endif - -LDFLAGS += -g -LDLIBS += -framework Accelerate diff --git a/src/makefiles/linux_atlas.mk b/src/makefiles/linux_atlas.mk index 1f366727821..3fbeab9bed3 100644 --- a/src/makefiles/linux_atlas.mk +++ b/src/makefiles/linux_atlas.mk @@ -1,15 +1,35 @@ # ATLAS specific Linux settings +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif ifndef ATLASINC $(error ATLASINC not defined.) endif - ifndef ATLASLIBS $(error ATLASLIBS not defined.) endif -CXXFLAGS += -msse -msse2 -pthread -rdynamic \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \ + -msse -msse2 -pthread -rdynamic \ + -g # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC +endif + +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl -LDFLAGS += -rdynamic -LDLIBS += $(ATLASLIBS) +RANLIB = ranlib +AR = ar +AS = as diff --git a/src/makefiles/linux_atlas_arm.mk b/src/makefiles/linux_atlas_arm.mk index 5f62f82d297..0bbcbdd2acd 100644 --- a/src/makefiles/linux_atlas_arm.mk +++ b/src/makefiles/linux_atlas_arm.mk @@ -1,15 +1,35 @@ # ATLAS specific Linux ARM settings +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif ifndef ATLASINC $(error ATLASINC not defined.) endif - ifndef ATLASLIBS $(error ATLASLIBS not defined.) endif -CXXFLAGS += -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \ + -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ + -g # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC +endif + +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl -LDFLAGS += -rdynamic -LDLIBS += $(ATLASLIBS) +RANLIB = ranlib +AR = ar +AS = as diff --git a/src/makefiles/linux_clapack.mk b/src/makefiles/linux_clapack.mk index 4d733bb207c..60fbf4918e3 100644 --- a/src/makefiles/linux_clapack.mk +++ b/src/makefiles/linux_clapack.mk @@ -1,8 +1,29 @@ # CLAPACK specific Linux settings -CXXFLAGS += -msse -msse2 -pthread -rdynamic \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ - -DHAVE_CLAPACK -I ../../tools/CLAPACK +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif -LDFLAGS += -rdynamic -LDLIBS += $(ATLASLIBS) +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK -I../../tools/CLAPACK \ + -msse -msse2 -pthread -rdynamic \ + -g # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC +endif + +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl + +RANLIB = ranlib +AR = ar +AS = as diff --git a/src/makefiles/linux_clapack_arm.mk b/src/makefiles/linux_clapack_arm.mk index 7d3119a08c9..ab49a3e6c13 100644 --- a/src/makefiles/linux_clapack_arm.mk +++ b/src/makefiles/linux_clapack_arm.mk @@ -1,8 +1,29 @@ # CLAPACK specific Linux ARM settings -CXXFLAGS += -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ - -DHAVE_CLAPACK -I ../../tools/CLAPACK +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif -LDFLAGS += -rdynamic -LDLIBS += $(ATLASLIBS) +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK -I../../tools/CLAPACK \ + -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ + -g # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC +endif + +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl + +RANLIB = ranlib +AR = ar +AS = as diff --git a/src/makefiles/linux_openblas.mk b/src/makefiles/linux_openblas.mk index 8636b43e38e..0227c300041 100644 --- a/src/makefiles/linux_openblas.mk +++ b/src/makefiles/linux_openblas.mk @@ -1,16 +1,35 @@ # OpenBLAS specific Linux settings +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif +ifndef OPENBLASINC +$(error OPENBLASROOT not defined.) +endif ifndef OPENBLASLIBS $(error OPENBLASLIBS not defined.) endif -ifndef OPENBLASROOT -$(error OPENBLASROOT not defined.) +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \ + -msse -msse2 -pthread -rdynamic \ + -g # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC endif -CXXFLAGS += -msse -msse2 -pthread -rdynamic \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ - -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl -LDFLAGS += -rdynamic -LDLIBS += $(OPENBLASLIBS) +RANLIB = ranlib +AR = ar +AS = as diff --git a/src/makefiles/linux_openblas_arm.mk b/src/makefiles/linux_openblas_arm.mk index 682d62b5154..f1cdac8090d 100644 --- a/src/makefiles/linux_openblas_arm.mk +++ b/src/makefiles/linux_openblas_arm.mk @@ -1,16 +1,35 @@ # OpenBLAS specific Linux ARM settings +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif +ifndef OPENBLASINC +$(error OPENBLASINC not defined.) +endif ifndef OPENBLASLIBS $(error OPENBLASLIBS not defined.) endif -ifndef OPENBLASROOT -$(error OPENBLASROOT not defined.) +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \ + -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ + -g # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC endif -CXXFLAGS += -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H \ - -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl -LDFLAGS += -rdynamic -LDLIBS += $(OPENBLASLIBS) +RANLIB = ranlib +AR = ar +AS = as diff --git a/src/makefiles/linux_x86_64_mkl.mk b/src/makefiles/linux_x86_64_mkl.mk index 5e93d393b3e..83b799356b9 100644 --- a/src/makefiles/linux_x86_64_mkl.mk +++ b/src/makefiles/linux_x86_64_mkl.mk @@ -9,14 +9,31 @@ # Use the options obtained from this website to manually configure for other # platforms using MKL. +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif ifndef MKLROOT $(error MKLROOT not defined.) endif MKLLIB ?= $(MKLROOT)/lib/em64t -CXXFLAGS += -m64 -msse -msse2 -pthread -rdynamic \ - -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_MKL -I$(MKLROOT)/include +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_MKL -I$(MKLROOT)/include \ + -m64 -msse -msse2 -pthread -rdynamic \ + -g # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC +endif ## Use the following for STATIC LINKING of the SEQUENTIAL version of MKL MKL_STA_SEQ = $(MKLLIB)/libmkl_solver_lp64_sequential.a -Wl,--start-group \ @@ -38,5 +55,9 @@ MKL_DYN_MUL = -L$(MKLLIB) -lmkl_solver_lp64 -Wl,--start-group -lmkl_intel_lp64 \ # MKLFLAGS = $(MKL_DYN_MUL) -LDFLAGS += -rdynamic -LDLIBS += $(MKLFLAGS) +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(MKLFLAGS) -lm -lpthread -ldl + +RANLIB = ranlib +AR = ar +AS = as From d8fd0d9cc9283cc5e4654749a62bd8bf2145446c Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 20 Dec 2016 23:19:33 -0800 Subject: [PATCH 278/530] Further changes to configure. --- src/Makefile | 15 ++------------- src/configure | 15 +++++++-------- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/src/Makefile b/src/Makefile index cecc8ca5170..fded748fbe5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -31,7 +31,7 @@ include kaldi.mk # Reset the default goal, so that the all target will become default .DEFAULT_GOAL := -all: checkversion test_dependencies kaldi.mk mklibdir $(SUBDIRS) +all: checkversion kaldi.mk mklibdir $(SUBDIRS) -echo Done mklibdir: @@ -88,23 +88,12 @@ kaldi.mk: @[ -f kaldi.mk ] || { echo "kaldi.mk does not exist; you have to run ./configure"; exit 1; } # Compile optional stuff -ext: test_dependencies ext_depend $(SUBDIRS) $(EXT_SUBDIRS) +ext: ext_depend $(SUBDIRS) $(EXT_SUBDIRS) -echo Done -ifndef OPENFST_VER -$(error Please rerun configure: OPENFST_VER is not defined, likely kaldi.mk was produced by older configure script.) -endif -# Note: OPENFST_VER is determined by configure and added to kaldi.mk -OPENFST_VER_NUM := $(shell echo $(OPENFST_VER) | sed 's/\./ /g' | xargs printf "%d%02d%02d") -test_dependencies: -ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10503)","1") - $(error OpenFst $(OPENFST_VER) is not supported. You now need OpenFst >= 1.5.3.) -endif - check_portaudio: @[ -d ../tools/portaudio ] || ( cd ../tools; ./install_portaudio.sh ) - clean: rmlibdir -for x in $(SUBDIRS) $(EXT_SUBDIRS); do $(MAKE) -C $$x clean; done diff --git a/src/configure b/src/configure index b92729319a7..3e4db9a4712 100755 --- a/src/configure +++ b/src/configure @@ -908,9 +908,12 @@ if [ ! -f $FSTROOT/include/fst/fst.h ]; then failure "Could not find file $FSTROOT/include/fst/fst.h: you may not have installed OpenFst. See ../tools/INSTALL" fi -echo "FSTROOT = $FSTROOT" >> kaldi.mk -OPENFST_VER="${OPENFST_VER:-`grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::'`}" -echo "OPENFST_VER = $OPENFST_VER" >> kaldi.mk +OPENFST_VER=$(grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::') +OPENFST_VER_NUM=$(echo $OPENFST_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d") +if [ $OPENFST_VER_NUM -lt 10503 ]; then + failure "OpenFst-$OPENFST_VER is not supported. You need OpenFst >= 1.5.3.)" +fi +echo "OPENFSTINC = $FSTROOT/include" >> kaldi.mk if $static_fst ; then OPENFSTLIBS="$FSTROOT/lib/libfst.a" else @@ -933,10 +936,6 @@ echo "OPENFSTLDFLAGS = $OPENFSTLDFLAGS" >> kaldi.mk echo "CXX = $CXX" >> kaldi.mk echo >> kaldi.mk -# Add platform independent settings -cat makefiles/common.mk >> kaldi.mk -echo >> kaldi.mk - # Most of the OS-specific steps below will append to kaldi.mk echo "Doing OS specific configurations ..." @@ -1118,8 +1117,8 @@ elif [ "`uname`" == "Linux" ]; then echo "Configuring dynamically loaded OpenBlas since --static-math=no (the default)" OPENBLASLIBS="-L$OPENBLASROOT/lib -lopenblas -lgfortran -Wl,-rpath=$OPENBLASROOT/lib" fi + echo "OPENBLASINC = $OPENBLASROOT/include" >> kaldi.mk echo "OPENBLASLIBS = $OPENBLASLIBS" >> kaldi.mk - echo "OPENBLASROOT = $OPENBLASROOT" >> kaldi.mk if [[ "`uname -m`" == arm* ]]; then cat makefiles/linux_openblas_arm.mk >> kaldi.mk elif [[ "`uname -m`" == ppc64le ]]; then From d454b868901ec59d5d5a0eb8f494856a10753f27 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 20 Dec 2016 23:27:36 -0800 Subject: [PATCH 279/530] Configure script now accepts binary flags without the yes/no qualifiers as yes. --- src/configure | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/configure b/src/configure index 3e4db9a4712..1131b5f069e 100755 --- a/src/configure +++ b/src/configure @@ -219,6 +219,9 @@ do static_math=false; static_fst=false; shift ;; + --double-precision) + double_precision=true; + shift ;; --double-precision=yes) double_precision=true; shift ;; @@ -228,12 +231,19 @@ do --atlas-root=*) ATLASROOT=`read_dirname $1`; shift ;; + --threaded-atlas) + threaded_atlas=true; + shift ;; --threaded-atlas=yes) threaded_atlas=true; shift ;; --threaded-atlas=no) threaded_atlas=false; shift ;; + --threaded-math) + threaded_atlas=true; + mkl_threading=iomp + shift ;; --threaded-math=yes) threaded_atlas=true; mkl_threading=iomp @@ -242,18 +252,27 @@ do threaded_atlas=false; mkl_threading=sequential shift ;; + --use-cuda) + use_cuda=true; + shift ;; --use-cuda=yes) use_cuda=true; shift ;; --use-cuda=no) use_cuda=false; shift ;; + --static-math) + static_math=true; + shift ;; --static-math=yes) static_math=true; shift ;; --static-math=no) static_math=false; shift ;; + --static-fst) + static_fst=true; + shift ;; --static-fst=yes) static_fst=true; shift ;; From 2c33bd228177046f914f3bbd06fea0799c2c7553 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 20 Dec 2016 23:45:48 -0800 Subject: [PATCH 280/530] Small cosmetic changes to platform specific makefiles. --- src/fstext/context-fst-inl.h | 11 ----------- src/makefiles/cuda_32bit.mk | 6 +++--- src/makefiles/cuda_64bit.mk | 6 +++--- src/makefiles/cygwin.mk | 4 ++-- src/makefiles/darwin.mk | 2 +- src/makefiles/linux_atlas.mk | 2 +- src/makefiles/linux_atlas_arm.mk | 2 +- src/makefiles/linux_clapack.mk | 2 +- src/makefiles/linux_clapack_arm.mk | 2 +- src/makefiles/linux_openblas.mk | 4 ++-- src/makefiles/linux_openblas_arm.mk | 2 +- src/makefiles/linux_x86_64_mkl.mk | 2 +- 12 files changed, 17 insertions(+), 28 deletions(-) diff --git a/src/fstext/context-fst-inl.h b/src/fstext/context-fst-inl.h index 4427863d887..6fa8632cf67 100644 --- a/src/fstext/context-fst-inl.h +++ b/src/fstext/context-fst-inl.h @@ -338,17 +338,6 @@ void ContextFstImpl::Expand(StateId s) { // expands arcs only [not this->SetArcs(s); // mark the arcs as "done". [so HasArcs returns true]. } - -// template -// ContextFst::ContextFst(const ContextFst &fst, bool reset) { -// if (reset) { -// impl_ = std::make_shared >(*(fst.impl_)); -// } else { -// impl_ = fst.impl_; -// } -// } - - template bool ContextMatcher::Find(typename Arc::Label match_label) { assert(s_ != kNoStateId); diff --git a/src/makefiles/cuda_32bit.mk b/src/makefiles/cuda_32bit.mk index 4019d5027b1..c6bba9669ea 100644 --- a/src/makefiles/cuda_32bit.mk +++ b/src/makefiles/cuda_32bit.mk @@ -1,9 +1,9 @@ -ifndef CUDATKDIR -$(error CUDATKDIR not defined.) -endif ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) endif +ifndef CUDATKDIR +$(error CUDATKDIR not defined.) +endif CUDA_INCLUDE= -I$(CUDATKDIR)/include CUDA_FLAGS = -g -Xcompiler -fPIC --verbose --machine 32 -DHAVE_CUDA \ diff --git a/src/makefiles/cuda_64bit.mk b/src/makefiles/cuda_64bit.mk index 0ce7bacdd00..89696253c84 100644 --- a/src/makefiles/cuda_64bit.mk +++ b/src/makefiles/cuda_64bit.mk @@ -1,9 +1,9 @@ -ifndef CUDATKDIR -$(error CUDATKDIR not defined.) -endif ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) endif +ifndef CUDATKDIR +$(error CUDATKDIR not defined.) +endif CUDA_INCLUDE= -I$(CUDATKDIR)/include CUDA_FLAGS = -g -Xcompiler -fPIC --verbose --machine 64 -DHAVE_CUDA \ diff --git a/src/makefiles/cygwin.mk b/src/makefiles/cygwin.mk index beaea294638..6cae548e3b2 100644 --- a/src/makefiles/cygwin.mk +++ b/src/makefiles/cygwin.mk @@ -13,7 +13,7 @@ endif CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -DHAVE_CLAPACK -I ../../tools/CLAPACK/ \ + -DHAVE_CLAPACK -I../../tools/CLAPACK/ \ -msse -msse2 \ -g # -O0 -DKALDI_PARANOID @@ -26,6 +26,6 @@ LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -g \ LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) -lcyglapack-0 -lcygblas-0 \ -lm -lpthread -ldl -RANLIB = ranlib AR = ar AS = as +RANLIB = ranlib diff --git a/src/makefiles/darwin.mk b/src/makefiles/darwin.mk index 2f1692018ac..e0570e43d55 100644 --- a/src/makefiles/darwin.mk +++ b/src/makefiles/darwin.mk @@ -24,9 +24,9 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -g LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) -framework Accelerate -lm -lpthread -ldl -RANLIB = ranlib AR = ar AS = as +RANLIB = ranlib # Compiler specific flags COMPILER = $(shell $(CXX) -v 2>&1) diff --git a/src/makefiles/linux_atlas.mk b/src/makefiles/linux_atlas.mk index 3fbeab9bed3..8ae3b46c92e 100644 --- a/src/makefiles/linux_atlas.mk +++ b/src/makefiles/linux_atlas.mk @@ -30,6 +30,6 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl -RANLIB = ranlib AR = ar AS = as +RANLIB = ranlib diff --git a/src/makefiles/linux_atlas_arm.mk b/src/makefiles/linux_atlas_arm.mk index 0bbcbdd2acd..c20ebd2373c 100644 --- a/src/makefiles/linux_atlas_arm.mk +++ b/src/makefiles/linux_atlas_arm.mk @@ -30,6 +30,6 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl -RANLIB = ranlib AR = ar AS = as +RANLIB = ranlib diff --git a/src/makefiles/linux_clapack.mk b/src/makefiles/linux_clapack.mk index 60fbf4918e3..a597bd14935 100644 --- a/src/makefiles/linux_clapack.mk +++ b/src/makefiles/linux_clapack.mk @@ -24,6 +24,6 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl -RANLIB = ranlib AR = ar AS = as +RANLIB = ranlib diff --git a/src/makefiles/linux_clapack_arm.mk b/src/makefiles/linux_clapack_arm.mk index ab49a3e6c13..5b60dc11e1a 100644 --- a/src/makefiles/linux_clapack_arm.mk +++ b/src/makefiles/linux_clapack_arm.mk @@ -24,6 +24,6 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl -RANLIB = ranlib AR = ar AS = as +RANLIB = ranlib diff --git a/src/makefiles/linux_openblas.mk b/src/makefiles/linux_openblas.mk index 0227c300041..eaccd5d8646 100644 --- a/src/makefiles/linux_openblas.mk +++ b/src/makefiles/linux_openblas.mk @@ -10,7 +10,7 @@ ifndef OPENFSTLIBS $(error OPENFSTLIBS not defined.) endif ifndef OPENBLASINC -$(error OPENBLASROOT not defined.) +$(error OPENBLASINC not defined.) endif ifndef OPENBLASLIBS $(error OPENBLASLIBS not defined.) @@ -30,6 +30,6 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl -RANLIB = ranlib AR = ar AS = as +RANLIB = ranlib diff --git a/src/makefiles/linux_openblas_arm.mk b/src/makefiles/linux_openblas_arm.mk index f1cdac8090d..4e6e31aa715 100644 --- a/src/makefiles/linux_openblas_arm.mk +++ b/src/makefiles/linux_openblas_arm.mk @@ -30,6 +30,6 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl -RANLIB = ranlib AR = ar AS = as +RANLIB = ranlib diff --git a/src/makefiles/linux_x86_64_mkl.mk b/src/makefiles/linux_x86_64_mkl.mk index 83b799356b9..dbd7d72a523 100644 --- a/src/makefiles/linux_x86_64_mkl.mk +++ b/src/makefiles/linux_x86_64_mkl.mk @@ -58,6 +58,6 @@ MKL_DYN_MUL = -L$(MKLLIB) -lmkl_solver_lp64 -Wl,--start-group -lmkl_intel_lp64 \ LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(MKLFLAGS) -lm -lpthread -ldl -RANLIB = ranlib AR = ar AS = as +RANLIB = ranlib From f0f74aa93dc1b6eebfc09942e21038e05927a99c Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 20 Dec 2016 23:55:02 -0800 Subject: [PATCH 281/530] Update installation instructions. --- src/INSTALL | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/INSTALL b/src/INSTALL index 8decefe71c2..f40a514c4b6 100644 --- a/src/INSTALL +++ b/src/INSTALL @@ -23,8 +23,7 @@ e.g. g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. If your system default compiler does not support C++11, you can specify a C++11 compliant compiler by setting the CXX environment variable, e.g. - make depend CXX=g++-4.8 - make CXX=g++-4.8 + CXX=g++-4.8 ./configure --shared For more information, see documentation at http://kaldi-asr.org/doc/ and click on "The build process (how Kaldi is compiled)". From ed847d5adcbf47ed243498f392d780fa46dbc819 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Wed, 21 Dec 2016 12:22:39 -0800 Subject: [PATCH 282/530] Cosmetic fixes. --- src/configure | 6 ++++-- src/makefiles/cuda_32bit.mk | 2 ++ src/makefiles/cuda_64bit.mk | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/configure b/src/configure index 1131b5f069e..f9ad1cd8c07 100755 --- a/src/configure +++ b/src/configure @@ -510,7 +510,6 @@ function configure_cuda { fi echo "Using CUDA toolkit $CUDATKDIR (nvcc compiler and runtime libraries)" echo >> kaldi.mk - echo "#Next section enables CUDA for compilation" >> kaldi.mk echo CUDA = true >> kaldi.mk echo CUDATKDIR = $CUDATKDIR >> kaldi.mk @@ -966,7 +965,7 @@ if [ "`uname`" == "Darwin" ]; then if [ ! -e /System/Library/Frameworks/Accelerate.framework ]; then failure "Need the Accelerate framework to compile on Darwin." fi - OSX_VER=`sw_vers | grep ProductVersion | awk '{print $2}' | awk '{split($0,a,"."); print a[1] "." a[2]; }'` + OSX_VER=$(sw_vers | grep ProductVersion | awk '{print $2}' | awk '{split($0,a,"."); print a[1] "." a[2]; }') OSX_VER_NUM=$(echo $OSX_VER | sed 's/\./ /g' | xargs printf "%d%02d") echo "Configuring for OS X version $OSX_VER ..." if [ $OSX_VER_NUM -ge 1005 ]; then @@ -1018,6 +1017,7 @@ elif [ "`uname`" == "Linux" ]; then # containing {liblapack.a,libblas.a}, and linking against just these two # libraries worked. + echo >> kaldi.mk if $static_math; then # Prefer static to dynamic math. linux_configure_static || \ @@ -1088,6 +1088,7 @@ elif [ "`uname`" == "Linux" ]; then if [ ! -z $MKLLIBDIR ]; then echo MKLLIB = $MKLLIBDIR >> kaldi.mk fi + echo >> kaldi.mk check_exists makefiles/linux_x86_64_mkl.mk cat makefiles/linux_x86_64_mkl.mk >> kaldi.mk echo "MKLFLAGS = ${MKL_LINK_LINE} ${THREADING_LINE} $EXTRA_LIBS " >> kaldi.mk @@ -1138,6 +1139,7 @@ elif [ "`uname`" == "Linux" ]; then fi echo "OPENBLASINC = $OPENBLASROOT/include" >> kaldi.mk echo "OPENBLASLIBS = $OPENBLASLIBS" >> kaldi.mk + echo >> kaldi.mk if [[ "`uname -m`" == arm* ]]; then cat makefiles/linux_openblas_arm.mk >> kaldi.mk elif [[ "`uname -m`" == ppc64le ]]; then diff --git a/src/makefiles/cuda_32bit.mk b/src/makefiles/cuda_32bit.mk index c6bba9669ea..84b8686e374 100644 --- a/src/makefiles/cuda_32bit.mk +++ b/src/makefiles/cuda_32bit.mk @@ -1,3 +1,5 @@ +# 32bit CUDA settings + ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) endif diff --git a/src/makefiles/cuda_64bit.mk b/src/makefiles/cuda_64bit.mk index 89696253c84..c47908e7323 100644 --- a/src/makefiles/cuda_64bit.mk +++ b/src/makefiles/cuda_64bit.mk @@ -1,3 +1,5 @@ +# 64bit CUDA settings + ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) endif From 262d993673ef4ec2c677c0bab80a87c6602c375e Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Wed, 21 Dec 2016 12:30:47 -0800 Subject: [PATCH 283/530] More cosmetic fixes. --- src/configure | 60 +++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/src/configure b/src/configure index f9ad1cd8c07..a9c11980812 100755 --- a/src/configure +++ b/src/configure @@ -588,30 +588,31 @@ function linux_configure_speex { } function linux_atlas_failure { - echo ATLASINC = $ATLASROOT/include >> kaldi.mk - echo ATLASLIBS = [somewhere]/liblapack.a [somewhere]/libcblas.a [somewhere]/libatlas.a [somewhere]/libf77blas.a $ATLASLIBDIR >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then - cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then - cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk - else - cat makefiles/linux_atlas.mk >> kaldi.mk - fi - echo "** $* ***" - echo "** ERROR **" - echo "** Configure cannot proceed automatically." - echo "** If you know that you have ATLAS installed somewhere on your machine, you" - echo "** may be able to proceed by replacing [somewhere] in kaldi.mk with a directory." - echo "** If you have sudo (root) access you could install the ATLAS package on your" - echo "** machine, e.g. 'sudo apt-get install libatlas-dev libatlas-base-dev' or" - echo "** 'sudo yum install atlas.x86_64' or 'sudo zypper install libatlas3-devel'," - echo "** or on cygwin, install atlas from the installer GUI; and then run ./configure" - echo "** again." - echo "**" - echo "** Otherwise (or if you prefer OpenBLAS for speed), you could go the OpenBLAS" - echo "** route: cd to ../tools, type 'extras/install_openblas.sh', cd back to here," - echo "** and type './configure --openblas-root=../tools/OpenBLAS/install'" - exit 1; + echo ATLASINC = $ATLASROOT/include >> kaldi.mk + echo ATLASLIBS = [somewhere]/liblapack.a [somewhere]/libcblas.a [somewhere]/libatlas.a [somewhere]/libf77blas.a $ATLASLIBDIR >> kaldi.mk + echo >> kaldi.mk + if [[ "`uname -m`" == arm* ]]; then + cat makefiles/linux_atlas_arm.mk >> kaldi.mk + elif [[ "`uname -m`" == ppc64le ]]; then + cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk + else + cat makefiles/linux_atlas.mk >> kaldi.mk + fi + echo "** $* ***" + echo "** ERROR **" + echo "** Configure cannot proceed automatically." + echo "** If you know that you have ATLAS installed somewhere on your machine, you" + echo "** may be able to proceed by replacing [somewhere] in kaldi.mk with a directory." + echo "** If you have sudo (root) access you could install the ATLAS package on your" + echo "** machine, e.g. 'sudo apt-get install libatlas-dev libatlas-base-dev' or" + echo "** 'sudo yum install atlas.x86_64' or 'sudo zypper install libatlas3-devel'," + echo "** or on cygwin, install atlas from the installer GUI; and then run ./configure" + echo "** again." + echo "**" + echo "** Otherwise (or if you prefer OpenBLAS for speed), you could go the OpenBLAS" + echo "** route: cd to ../tools, type 'extras/install_openblas.sh', cd back to here," + echo "** and type './configure --openblas-root=../tools/OpenBLAS/install'" + exit 1; } function linux_check_static { @@ -646,6 +647,7 @@ function linux_configure_debian_ubuntu { fi echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk + echo >> kaldi.mk if [[ "`uname -m`" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk elif [[ "`uname -m`" == ppc64le ]]; then @@ -669,6 +671,7 @@ function linux_configure_debian_ubuntu3 { fi echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk + echo >> kaldi.mk if [[ "`uname -m`" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk elif [[ "`uname -m`" == ppc64le ]]; then @@ -694,7 +697,7 @@ function linux_configure_debian7 { [ -z "$libdir" ] && echo "Error getting libdir in linux_configure_debian7" && exit 1; echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk - echo + echo >> kaldi.mk if [[ "`uname -m`" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk elif [[ "`uname -m`" == ppc64le ]]; then @@ -717,7 +720,7 @@ function linux_configure_redhat { [ -z "$libdir" ] && echo "Error getting libdir in linux_configure_redhat" && exit 1; echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk - echo + echo >> kaldi.mk if [[ "`uname -m`" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk elif [[ "`uname -m`" == ppc64le ]]; then @@ -742,7 +745,7 @@ function linux_configure_redhat_fat { [ -z "$libdir" ] && echo "Error getting libdir in linux_configure_redhat_fat" && exit 1; echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk - echo + echo >> kaldi.mk if [[ "`uname -m`" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk elif [[ "`uname -m`" == ppc64le ]]; then @@ -799,6 +802,7 @@ function linux_configure_static { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk + echo >> kaldi.mk if [[ "`uname -m`" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk elif [[ "`uname -m`" == ppc64le ]]; then @@ -882,6 +886,7 @@ function linux_configure_dynamic { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk + echo >> kaldi.mk if [[ "`uname -m`" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk elif [[ "`uname -m`" == ppc64le ]]; then @@ -1017,7 +1022,6 @@ elif [ "`uname`" == "Linux" ]; then # containing {liblapack.a,libblas.a}, and linking against just these two # libraries worked. - echo >> kaldi.mk if $static_math; then # Prefer static to dynamic math. linux_configure_static || \ From b863e2eae132951cf3c6b0082aabdb51aa23aefc Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Wed, 21 Dec 2016 12:49:47 -0800 Subject: [PATCH 284/530] Further cosmetic fixes. --- src/configure | 9 ++++++--- src/makefiles/cuda_32bit.mk | 2 -- src/makefiles/cuda_64bit.mk | 2 -- src/makefiles/cygwin.mk | 2 +- src/makefiles/darwin.mk | 2 +- src/makefiles/linux_atlas.mk | 2 +- src/makefiles/linux_atlas_arm.mk | 2 +- src/makefiles/linux_clapack.mk | 2 +- src/makefiles/linux_clapack_arm.mk | 2 +- src/makefiles/linux_openblas.mk | 2 +- src/makefiles/linux_openblas_arm.mk | 2 +- src/makefiles/linux_x86_64_mkl.mk | 2 +- 12 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/configure b/src/configure index a9c11980812..07d9bb0a319 100755 --- a/src/configure +++ b/src/configure @@ -510,6 +510,8 @@ function configure_cuda { fi echo "Using CUDA toolkit $CUDATKDIR (nvcc compiler and runtime libraries)" echo >> kaldi.mk + echo "# CUDA configuration" >> kaldi.mk + echo >> kaldi.mk echo CUDA = true >> kaldi.mk echo CUDATKDIR = $CUDATKDIR >> kaldi.mk @@ -528,6 +530,7 @@ function configure_cuda { *) echo "Unsupported CUDA_VERSION (CUDA_VERSION=$CUDA_VERSION), please report it to Kaldi mailing list, together with 'nvcc -h' or 'ptxas -h' which lists allowed -gencode values..."; exit 1 ;; esac echo "CUDA_ARCH = $CUDA_ARCH" >> kaldi.mk + echo >> kaldi.mk # 64bit/32bit? if [ "`uname -m`" == "x86_64" ]; then @@ -903,7 +906,7 @@ echo "Configuring ..." # Back up the old kaldi.mk in case we modified it if [ -f kaldi.mk ]; then - echo "Backing up kaldi.mk to kaldi.mk.bak" + echo "Backing up kaldi.mk to kaldi.mk.bak ..." cp kaldi.mk kaldi.mk.bak fi @@ -914,7 +917,7 @@ printf "# This file was generated using the following command:\n# $cmd_line\n\n" echo "CONFIGURE_VERSION := $CONFIGURE_VERSION" >> kaldi.mk echo >> kaldi.mk -echo "# Configuration" >> kaldi.mk +echo "# Base configuration" >> kaldi.mk echo >> kaldi.mk if $dynamic_kaldi ; then KALDILIBDIR=`pwd`/lib @@ -1167,7 +1170,7 @@ fi # Append the flags set by environment variables last so they can be used # to override the automatically generated configuration. echo >> kaldi.mk -echo "# Environment settings" >> kaldi.mk +echo "# Environment configuration" >> kaldi.mk echo >> kaldi.mk if [ -n "$ENV_CXXFLAGS" ]; then echo "CXXFLAGS += $ENV_CXXFLAGS" >> kaldi.mk; fi if [ -n "$ENV_LDFLAGS" ]; then echo "LDFLAGS += $ENV_LDFLAGS" >> kaldi.mk; fi diff --git a/src/makefiles/cuda_32bit.mk b/src/makefiles/cuda_32bit.mk index 84b8686e374..c6bba9669ea 100644 --- a/src/makefiles/cuda_32bit.mk +++ b/src/makefiles/cuda_32bit.mk @@ -1,5 +1,3 @@ -# 32bit CUDA settings - ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) endif diff --git a/src/makefiles/cuda_64bit.mk b/src/makefiles/cuda_64bit.mk index c47908e7323..89696253c84 100644 --- a/src/makefiles/cuda_64bit.mk +++ b/src/makefiles/cuda_64bit.mk @@ -1,5 +1,3 @@ -# 64bit CUDA settings - ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) endif diff --git a/src/makefiles/cygwin.mk b/src/makefiles/cygwin.mk index 6cae548e3b2..14ece9d4ee7 100644 --- a/src/makefiles/cygwin.mk +++ b/src/makefiles/cygwin.mk @@ -1,4 +1,4 @@ -# Cygwin settings +# Cygwin configuration ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) diff --git a/src/makefiles/darwin.mk b/src/makefiles/darwin.mk index e0570e43d55..5dbcd6f768b 100644 --- a/src/makefiles/darwin.mk +++ b/src/makefiles/darwin.mk @@ -1,4 +1,4 @@ -# Darwin (macOS) settings +# Darwin (macOS) configuration ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) diff --git a/src/makefiles/linux_atlas.mk b/src/makefiles/linux_atlas.mk index 8ae3b46c92e..9ab038295b6 100644 --- a/src/makefiles/linux_atlas.mk +++ b/src/makefiles/linux_atlas.mk @@ -1,4 +1,4 @@ -# ATLAS specific Linux settings +# ATLAS specific Linux configuration ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) diff --git a/src/makefiles/linux_atlas_arm.mk b/src/makefiles/linux_atlas_arm.mk index c20ebd2373c..0dfc32863b4 100644 --- a/src/makefiles/linux_atlas_arm.mk +++ b/src/makefiles/linux_atlas_arm.mk @@ -1,4 +1,4 @@ -# ATLAS specific Linux ARM settings +# ATLAS specific Linux ARM configuration ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) diff --git a/src/makefiles/linux_clapack.mk b/src/makefiles/linux_clapack.mk index a597bd14935..d8f8cf5668f 100644 --- a/src/makefiles/linux_clapack.mk +++ b/src/makefiles/linux_clapack.mk @@ -1,4 +1,4 @@ -# CLAPACK specific Linux settings +# CLAPACK specific Linux configuration ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) diff --git a/src/makefiles/linux_clapack_arm.mk b/src/makefiles/linux_clapack_arm.mk index 5b60dc11e1a..432bd689f55 100644 --- a/src/makefiles/linux_clapack_arm.mk +++ b/src/makefiles/linux_clapack_arm.mk @@ -1,4 +1,4 @@ -# CLAPACK specific Linux ARM settings +# CLAPACK specific Linux ARM configuration ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) diff --git a/src/makefiles/linux_openblas.mk b/src/makefiles/linux_openblas.mk index eaccd5d8646..a859fc7e272 100644 --- a/src/makefiles/linux_openblas.mk +++ b/src/makefiles/linux_openblas.mk @@ -1,4 +1,4 @@ -# OpenBLAS specific Linux settings +# OpenBLAS specific Linux configuration ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) diff --git a/src/makefiles/linux_openblas_arm.mk b/src/makefiles/linux_openblas_arm.mk index 4e6e31aa715..00c4ae2bbdd 100644 --- a/src/makefiles/linux_openblas_arm.mk +++ b/src/makefiles/linux_openblas_arm.mk @@ -1,4 +1,4 @@ -# OpenBLAS specific Linux ARM settings +# OpenBLAS specific Linux ARM configuration ifndef DOUBLE_PRECISION $(error DOUBLE_PRECISION not defined.) diff --git a/src/makefiles/linux_x86_64_mkl.mk b/src/makefiles/linux_x86_64_mkl.mk index dbd7d72a523..d2aee4a036f 100644 --- a/src/makefiles/linux_x86_64_mkl.mk +++ b/src/makefiles/linux_x86_64_mkl.mk @@ -1,4 +1,4 @@ -# MKL specific Linux settings +# MKL specific Linux configuration # We have tested Kaldi with MKL version 10.2 on Linux/GCC and Intel(R) 64 # architecture (also referred to as x86_64) with LP64 interface layer. From bc58f3bc8a2353b5b02acf0c50f4f3149168f040 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Wed, 21 Dec 2016 14:16:55 -0800 Subject: [PATCH 285/530] Syncronize ppc64le configuration. --- src/configure | 2 +- src/makefiles/cuda_ppc64le.mk | 12 ------- src/makefiles/linux_atlas_ppc64le.mk | 37 +++++++++++---------- src/makefiles/linux_openblas_ppc64le.mk | 43 ++++++++++++------------- 4 files changed, 40 insertions(+), 54 deletions(-) delete mode 100644 src/makefiles/cuda_ppc64le.mk diff --git a/src/configure b/src/configure index 07d9bb0a319..8b68c97fd67 100755 --- a/src/configure +++ b/src/configure @@ -540,7 +540,7 @@ function configure_cuda { cat makefiles/cuda_64bit.mk >> kaldi.mk fi elif [ "`uname -m`" == "ppc64le" ]; then - cat makefiles/cuda_ppc64le.mk >> kaldi.mk + cat makefiles/cuda_64bit.mk >> kaldi.mk else cat makefiles/cuda_32bit.mk >> kaldi.mk fi diff --git a/src/makefiles/cuda_ppc64le.mk b/src/makefiles/cuda_ppc64le.mk deleted file mode 100644 index 3941de6a230..00000000000 --- a/src/makefiles/cuda_ppc64le.mk +++ /dev/null @@ -1,12 +0,0 @@ - -ifndef DOUBLE_PRECISION -$(error DOUBLE_PRECISION not defined.) -endif - - -CUDA_INCLUDE= -I$(CUDATKDIR)/include -CUDA_FLAGS = -g -Xcompiler -fPIC --verbose --machine 64 -DHAVE_CUDA \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) -CXXFLAGS += -DHAVE_CUDA -I$(CUDATKDIR)/include -CUDA_LDFLAGS += -L$(CUDATKDIR)/lib64 -Wl,-rpath,$(CUDATKDIR)/lib64 -CUDA_LDLIBS += -lcublas -lcudart -lcurand #LDLIBS : The libs are loaded later than static libs in implicit rule diff --git a/src/makefiles/linux_atlas_ppc64le.mk b/src/makefiles/linux_atlas_ppc64le.mk index 234a3794721..aa121fc5cdc 100644 --- a/src/makefiles/linux_atlas_ppc64le.mk +++ b/src/makefiles/linux_atlas_ppc64le.mk @@ -1,37 +1,36 @@ -# You have to make sure ATLASLIBS is set... +# ATLAS specific Linux ppc64le configuration -ifndef FSTROOT -$(error FSTROOT not defined.) +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) endif - ifndef ATLASINC $(error ATLASINC not defined.) endif - ifndef ATLASLIBS $(error ATLASLIBS not defined.) endif - -DOUBLE_PRECISION = 0 -CXXFLAGS = -m64 -maltivec -mcpu=power8 -Wall -I.. \ - -mtune=power8 -mpower8-vector -mvsx -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ - -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ - -DHAVE_ATLAS -I$(ATLASINC) \ - -I$(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \ + -m64 -maltivec -mcpu=power8 -mtune=power8 -mpower8-vector -mvsx \ + -pthread -rdynamic \ + -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif -LDFLAGS = -rdynamic $(OPENFSTLDFLAGS) +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl -CC = g++ -CXX = g++ + AR = ar AS = as RANLIB = ranlib diff --git a/src/makefiles/linux_openblas_ppc64le.mk b/src/makefiles/linux_openblas_ppc64le.mk index 222551f3bab..1e7a391dc79 100644 --- a/src/makefiles/linux_openblas_ppc64le.mk +++ b/src/makefiles/linux_openblas_ppc64le.mk @@ -1,37 +1,36 @@ -# You have to make sure FSTROOT,OPENBLASROOT,OPENBLASLIBS are set... +# OpenBLAS specific Linux configuration -ifndef FSTROOT -$(error FSTROOT not defined.) +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif +ifndef OPENBLASINC +$(error OPENBLASINC not defined.) endif - ifndef OPENBLASLIBS $(error OPENBLASLIBS not defined.) endif -ifndef OPENBLASROOT -$(error OPENBLASROOT not defined.) -endif - - -DOUBLE_PRECISION = 0 -CXXFLAGS = -m64 -maltivec -mcpu=power8 -Wall -I.. \ - -mtune=power8 -mpower8-vector -mvsx -pthread \ - -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ - -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ - -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \ - -DHAVE_OPENBLAS -I $(OPENBLASROOT)/include \ - -I $(FSTROOT)/include \ - $(EXTRA_CXXFLAGS) \ - -g # -O0 -DKALDI_PARANOID +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \ + -m64 -maltivec -mcpu=power8 -mtune=power8 -mpower8-vector -mvsx \ + -pthread -rdynamic \ + -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif -LDFLAGS = -rdynamic $(OPENFSTLDFLAGS) +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl -CC = g++ -CXX = g++ + AR = ar AS = as RANLIB = ranlib From a0accb24c2ba0c822ad934782bb5b2d71be619f2 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Wed, 21 Dec 2016 14:17:18 -0800 Subject: [PATCH 286/530] Update .gitignore. --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index b0784cc2c0c..fc4f047b342 100644 --- a/.gitignore +++ b/.gitignore @@ -84,6 +84,8 @@ GSYMS /tools/openfst-1.3.4/ /tools/openfst-1.4.1.tar.gz /tools/openfst-1.4.1/ +/tools/openfst-1.5.4.tar.gz +/tools/openfst-1.5.4/ /tools/pa_stable_v19_20111121.tgz /tools/portaudio/ /tools/sctk-2.4.0-20091110-0958.tar.bz2 From b3d0e152271968a9abee175171199ec81e93965c Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 22 Dec 2016 21:24:55 -0800 Subject: [PATCH 287/530] Remove a few include guards that are no longer needed. --- src/fstext/determinize-star-inl.h | 8 -------- src/util/stl-utils.h | 13 ------------- tools/Makefile | 6 +----- 3 files changed, 1 insertion(+), 26 deletions(-) diff --git a/src/fstext/determinize-star-inl.h b/src/fstext/determinize-star-inl.h index ea599008e56..b9eaa485350 100644 --- a/src/fstext/determinize-star-inl.h +++ b/src/fstext/determinize-star-inl.h @@ -24,16 +24,8 @@ #include "base/kaldi-error.h" -#ifdef _MSC_VER #include using std::unordered_map; -#elif __cplusplus > 199711L || defined(__GXX_EXPERIMENTAL_CXX0X__) -#include -using std::unordered_map; -#else -#include -using std::tr1::unordered_map; -#endif #include #include diff --git a/src/util/stl-utils.h b/src/util/stl-utils.h index b5f8f246d95..95ca0b03c5a 100644 --- a/src/util/stl-utils.h +++ b/src/util/stl-utils.h @@ -20,22 +20,10 @@ #ifndef KALDI_UTIL_STL_UTILS_H_ #define KALDI_UTIL_STL_UTILS_H_ -#ifdef _MSC_VER #include #include using std::unordered_map; using std::unordered_set; -#elif __cplusplus > 199711L || defined(__GXX_EXPERIMENTAL_CXX0X__) -#include -#include -using std::unordered_map; -using std::unordered_set; -#else -#include -#include -using std::tr1::unordered_map; -using std::tr1::unordered_set; -#endif #include #include @@ -329,4 +317,3 @@ inline void MergePairVectorSumming(std::vector > *vec) { } // namespace kaldi #endif // KALDI_UTIL_STL_UTILS_H_ - diff --git a/tools/Makefile b/tools/Makefile index 772f8c18398..8ca95ac95ff 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -72,10 +72,6 @@ ifeq ($(OSTYPE),cygwin) else ifeq ($(OS),Windows_NT) cd openfst-$(OPENFST_VERSION)/; ./configure --prefix=`pwd` --enable-static --enable-shared --enable-far --enable-ngram-fsts CXX=$(CXX) CXXFLAGS="$(CXXFLAGS) -O -Wa,-mbig-obj" LDFLAGS="$(LDFLAGS)" LIBS="-ldl" else - # ppc64le needs the newsted config.guess to be correctly indentified - [ "$(shell uname -p)" = "ppc64le" ] && wget -O openfst-$(OPENFST_VERSION)/config.guess \ - "http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD" || \ - echo "config.guess unchanged" cd openfst-$(OPENFST_VERSION)/; ./configure --prefix=`pwd` --enable-static --enable-shared --enable-far --enable-ngram-fsts CXX=$(CXX) CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" LIBS="-ldl" endif @@ -83,7 +79,7 @@ openfst-$(OPENFST_VERSION): openfst-$(OPENFST_VERSION).tar.gz tar xozf openfst-$(OPENFST_VERSION).tar.gz openfst-$(OPENFST_VERSION).tar.gz: - wget --tries=1 -T 5 http://openfst.cs.nyu.edu/twiki/pub/FST/FstDownload/openfst-$(OPENFST_VERSION).tar.gz || \ + wget -T 10 -t 1 http://openfst.cs.nyu.edu/twiki/pub/FST/FstDownload/openfst-$(OPENFST_VERSION).tar.gz || \ wget -T 10 -t 3 http://www.openslr.org/resources/2/openfst-$(OPENFST_VERSION).tar.gz sclite: sclite_compiled From e2ecec49da2b224bbb8f7852642ba8650c4cb714 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Sun, 8 Jan 2017 20:07:11 -0800 Subject: [PATCH 288/530] Upgrade codebase to support OpenFst-1.6.0. --- .gitignore | 2 + src/base/kaldi-utils.h | 4 +- src/chain/chain-den-graph.cc | 2 +- src/configure | 4 +- src/fstext/context-fst-inl.h | 3 + src/fstext/context-fst.h | 8 +- src/fstext/determinize-lattice-inl.h | 4 +- src/fstext/determinize-star-inl.h | 4 +- src/fstext/fstext-utils.h | 2 +- src/fstext/lattice-weight.h | 6 ++ src/fstext/trivial-factor-weight.h | 7 +- src/gmmbin/gmm-adapt-map.cc | 32 +++---- src/hmm/transition-model.h | 2 +- src/lat/determinize-lattice-pruned.cc | 116 +++++++++++++------------- src/nnet3bin/nnet3-average.cc | 15 ++-- tools/Makefile | 2 +- 16 files changed, 113 insertions(+), 100 deletions(-) diff --git a/.gitignore b/.gitignore index fc4f047b342..16acb47e181 100644 --- a/.gitignore +++ b/.gitignore @@ -86,6 +86,8 @@ GSYMS /tools/openfst-1.4.1/ /tools/openfst-1.5.4.tar.gz /tools/openfst-1.5.4/ +/tools/openfst-1.6.0.tar.gz +/tools/openfst-1.6.0/ /tools/pa_stable_v19_20111121.tgz /tools/portaudio/ /tools/sctk-2.4.0-20091110-0958.tar.bz2 diff --git a/src/base/kaldi-utils.h b/src/base/kaldi-utils.h index 47c60b4b01d..2cfecdcc7db 100644 --- a/src/base/kaldi-utils.h +++ b/src/base/kaldi-utils.h @@ -113,8 +113,7 @@ void Sleep(float seconds); (reinterpret_cast(&a))[1]=t;} -// Makes copy constructor and operator= private. Same as in compat.h of OpenFst -// toolkit. +// Makes copy constructor and operator= private. #define KALDI_DISALLOW_COPY_AND_ASSIGN(type) \ type(const type&); \ void operator = (const type&) @@ -156,4 +155,3 @@ template<> class KaldiCompileTimeAssert { #endif #endif // KALDI_BASE_KALDI_UTILS_H_ - diff --git a/src/chain/chain-den-graph.cc b/src/chain/chain-den-graph.cc index 6f494a0c562..5386f959b1f 100644 --- a/src/chain/chain-den-graph.cc +++ b/src/chain/chain-den-graph.cc @@ -186,7 +186,7 @@ void MinimizeAcceptorNoPush(fst::StdVectorFst *fst) { fst::EncodeMapper encoder(fst::kEncodeLabels | fst::kEncodeWeights, fst::ENCODE); fst::Encode(fst, &encoder); - fst::AcceptorMinimize(fst); + fst::internal::AcceptorMinimize(fst); fst::Decode(fst, encoder); } diff --git a/src/configure b/src/configure index 8b68c97fd67..3388d8ebd50 100755 --- a/src/configure +++ b/src/configure @@ -936,8 +936,8 @@ if [ ! -f $FSTROOT/include/fst/fst.h ]; then fi OPENFST_VER=$(grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::') OPENFST_VER_NUM=$(echo $OPENFST_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d") -if [ $OPENFST_VER_NUM -lt 10503 ]; then - failure "OpenFst-$OPENFST_VER is not supported. You need OpenFst >= 1.5.3.)" +if [ $OPENFST_VER_NUM -lt 10600 ]; then + failure "OpenFst-$OPENFST_VER is not supported. You need OpenFst >= 1.6.0.)" fi echo "OPENFSTINC = $FSTROOT/include" >> kaldi.mk if $static_fst ; then diff --git a/src/fstext/context-fst-inl.h b/src/fstext/context-fst-inl.h index 6fa8632cf67..dc8a4a8370b 100644 --- a/src/fstext/context-fst-inl.h +++ b/src/fstext/context-fst-inl.h @@ -31,6 +31,7 @@ namespace fst { /// \addtogroup context_fst_group /// @{ +namespace internal { template typename ContextFstImpl::StateId @@ -338,6 +339,8 @@ void ContextFstImpl::Expand(StateId s) { // expands arcs only [not this->SetArcs(s); // mark the arcs as "done". [so HasArcs returns true]. } +} // namespace internal + template bool ContextMatcher::Find(typename Arc::Label match_label) { assert(s_ != kNoStateId); diff --git a/src/fstext/context-fst.h b/src/fstext/context-fst.h index 7a00b7ed2f1..246dce924b2 100644 --- a/src/fstext/context-fst.h +++ b/src/fstext/context-fst.h @@ -64,11 +64,12 @@ namespace fst { /// \addtogroup context_fst_group "Classes and functions related to context expansion" /// @{ +namespace internal { + /* ContextFstImpl inherits from CacheImpl, which handles caching of states. */ - template // make the vector &fst) : ImplToFst(std::make_shared(fst, TrivialFactorWeightOptions())) {} diff --git a/src/gmmbin/gmm-adapt-map.cc b/src/gmmbin/gmm-adapt-map.cc index bc0bac9f6cc..ec3eb8cea9b 100644 --- a/src/gmmbin/gmm-adapt-map.cc +++ b/src/gmmbin/gmm-adapt-map.cc @@ -40,20 +40,20 @@ int main(int argc, char *argv[]) { "\n" "Usage: gmm-adapt-map [options] " " \n"; - + ParseOptions po(usage); - string spk2utt_rspecifier; + std::string spk2utt_rspecifier; bool binary = true; MapDiagGmmOptions map_config; std::string update_flags_str = "mw"; - + po.Register("spk2utt", &spk2utt_rspecifier, "rspecifier for speaker to " "utterance-list map"); po.Register("binary", &binary, "Write output in binary mode"); po.Register("update-flags", &update_flags_str, "Which GMM parameters will be " "updated: subset of mvw."); map_config.Register(&po); - + po.Read(argc, argv); if (po.NumArgs() != 4) { @@ -67,7 +67,7 @@ int main(int argc, char *argv[]) { map_am_wspecifier = po.GetArg(4); GmmFlagsType update_flags = StringToGmmFlags(update_flags_str); - + RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier); MapAmDiagGmmWriter map_am_writer(map_am_wspecifier); @@ -83,7 +83,7 @@ int main(int argc, char *argv[]) { double tot_like = 0.0, tot_like_change = 0.0, tot_t = 0.0, tot_t_check = 0.0; int32 num_done = 0, num_err = 0; - + if (spk2utt_rspecifier != "") { // per-speaker adaptation SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); @@ -93,9 +93,9 @@ int main(int argc, char *argv[]) { copy_am_gmm.CopyFromAmDiagGmm(am_gmm); AccumAmDiagGmm map_accs; map_accs.Init(am_gmm, update_flags); - + const std::vector &uttlist = spk2utt_reader.Value(); - + // for each speaker, estimate MAP means std::vector::const_iterator iter = uttlist.begin(), end = uttlist.end(); @@ -124,8 +124,8 @@ int main(int argc, char *argv[]) { ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior); for ( size_t i = 0; i < posterior.size(); i++ ) { for ( size_t j = 0; j < pdf_posterior[i].size(); j++ ) { - int32 pdf_id = pdf_posterior[i][j].first; - BaseFloat weight = pdf_posterior[i][j].second; + int32 pdf_id = pdf_posterior[i][j].first; + BaseFloat weight = pdf_posterior[i][j].second; file_like += map_accs.AccumulateForGmm(copy_am_gmm, feats.Row(i), pdf_id, weight); @@ -135,7 +135,7 @@ int main(int argc, char *argv[]) { KALDI_VLOG(2) << "Average like for utterance " << utt << " is " << (file_like/file_t) << " over " << file_t << " frames."; - + tot_like += file_like; tot_t += file_t; num_done++; @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) { KALDI_VLOG(1) << "Avg like per frame so far is " << (tot_like / tot_t); } // end looping over all utterances of the current speaker - + // MAP estimation. BaseFloat spk_objf_change = 0.0, spk_frames = 0.0; MapAmDiagGmmUpdate(map_config, map_accs, update_flags, ©_am_gmm, @@ -154,7 +154,7 @@ int main(int argc, char *argv[]) { << " over " << spk_frames << " frames."; tot_like_change += spk_objf_change; tot_t_check += spk_frames; - + // Writing AM for each speaker in a table map_am_writer.Write(spk,copy_am_gmm); } // end looping over speakers @@ -201,9 +201,9 @@ int main(int argc, char *argv[]) { tot_like += file_like; tot_t += file_t; if ( num_done % 10 == 0 ) - KALDI_VLOG(1) << "Avg like per frame so far is " + KALDI_VLOG(1) << "Avg like per frame so far is " << (tot_like / tot_t); - + // MAP BaseFloat utt_objf_change = 0.0, utt_frames = 0.0; MapAmDiagGmmUpdate(map_config, map_accs, update_flags, ©_am_gmm, @@ -213,7 +213,7 @@ int main(int argc, char *argv[]) { << " over " << utt_frames << " frames."; tot_like_change += utt_objf_change; tot_t_check += utt_frames; - + // Writing AM for each utterance in a table map_am_writer.Write(feature_reader.Key(), copy_am_gmm); } diff --git a/src/hmm/transition-model.h b/src/hmm/transition-model.h index 33a0d55443e..442de8fd2e0 100644 --- a/src/hmm/transition-model.h +++ b/src/hmm/transition-model.h @@ -317,7 +317,7 @@ class TransitionModel { int32 num_pdfs_; - DISALLOW_COPY_AND_ASSIGN(TransitionModel); + KALDI_DISALLOW_COPY_AND_ASSIGN(TransitionModel); }; diff --git a/src/lat/determinize-lattice-pruned.cc b/src/lat/determinize-lattice-pruned.cc index e38c62b3bfa..8c790e749a3 100644 --- a/src/lat/determinize-lattice-pruned.cc +++ b/src/lat/determinize-lattice-pruned.cc @@ -48,8 +48,8 @@ template class LatticeDeterminizerPruned { typedef CompactLatticeWeightTpl CompactWeight; typedef ArcTpl CompactArc; // arc in compact, acceptor form of lattice - typedef ArcTpl Arc; // arc in non-compact version of lattice - + typedef ArcTpl Arc; // arc in non-compact version of lattice + // Output to standard FST with CompactWeightTpl as its weight type (the // weight stores the original output-symbol strings). If destroy == true, // release memory as we go (but we cannot output again). @@ -123,7 +123,7 @@ template class LatticeDeterminizerPruned { for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) { OutputState &this_state = *(output_states_[this_state_id]); vector &this_vec(this_state.arcs); - + typename vector::const_iterator iter = this_vec.begin(), end = this_vec.end(); for (; iter != end; ++iter) { const TempArc &temp_arc(*iter); @@ -209,12 +209,12 @@ template class LatticeDeterminizerPruned { ifst_ = NULL; } { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); } - + for (size_t i = 0; i < output_states_.size(); i++) { vector empty_subset; empty_subset.swap(output_states_[i]->minimal_subset); } - + for (typename InitialSubsetHash::iterator iter = initial_hash_.begin(); iter != initial_hash_.end(); ++iter) delete iter->first; @@ -235,14 +235,14 @@ template class LatticeDeterminizerPruned { } { vector > tmp; tmp.swap(all_elems_tmp_); } } - + ~LatticeDeterminizerPruned() { FreeMostMemory(); FreeOutputStates(); // rest is deleted by destructors. } - - void RebuildRepository() { // rebuild the string repository, + + void RebuildRepository() { // rebuild the string repository, // freeing stuff we don't need.. we call this when memory usage // passes a supplied threshold. We need to accumulate all the // strings we need the repository to "remember", then tell it @@ -281,10 +281,10 @@ template class LatticeDeterminizerPruned { needed_strings.end()), needed_strings.end()); // uniq the strings. KALDI_LOG << "Rebuilding repository."; - + repository_.Rebuild(needed_strings); } - + bool CheckMemoryUsage() { int32 repo_size = repository_.MemSize(), arcs_size = num_arcs_ * sizeof(TempArc), @@ -299,7 +299,7 @@ template class LatticeDeterminizerPruned { KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from " << repo_size << " to " << new_repo_size << " bytes (approximately)"; - + if (new_total_size > static_cast(opts_.max_mem * 0.8)) { // Rebuilding didn't help enough-- we need a margin to stop // having to rebuild too often. We'll just return to the user at @@ -325,7 +325,7 @@ template class LatticeDeterminizerPruned { } return true; } - + bool Determinize(double *effective_beam) { KALDI_ASSERT(!determinized_); // This determinizes the input fst but leaves it in the "special format" @@ -344,13 +344,13 @@ template class LatticeDeterminizerPruned { // memory passed a user-specified threshold and cleanup failed // to get it below that threshold. size_t num_states = output_states_.size(); - if ((opts_.max_states > 0 && num_states > opts_.max_states) || - (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) || + if ((opts_.max_states > 0 && num_states > opts_.max_states) || + (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) || (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point // it was num_states % 100, not num_states % 10, but I encountered an example // where memory was exhausted before we reached state #100. KALDI_VLOG(1) << "Lattice determinization terminated but not " - << " because of lattice-beam. (#states, #arcs) is ( " + << " because of lattice-beam. (#states, #arcs) is ( " << output_states_.size() << ", " << num_arcs_ << " ), versus limits ( " << opts_.max_states << ", " << opts_.max_arcs << " ) (else, may be memory limit)."; @@ -376,7 +376,7 @@ template class LatticeDeterminizerPruned { // arc or state limit. } private: - + typedef typename Arc::Label Label; typedef typename Arc::StateId StateId; // use this when we don't know if it's input or output. typedef typename Arc::StateId InputStateId; // state in the input FST. @@ -493,7 +493,7 @@ template class LatticeDeterminizerPruned { // these types are the same anyway]. typedef unordered_map*, Element, SubsetKey, SubsetEqual> InitialSubsetHash; - + // converts the representation of the subset from canonical (all states) to // minimal (only states with output symbols on arcs leaving them, and final @@ -511,7 +511,7 @@ template class LatticeDeterminizerPruned { } subset->resize(cur_out - subset->begin()); } - + // Takes a minimal, normalized subset, and converts it to an OutputStateId. // Involves a hash lookup, and possibly adding a new OutputStateId. // If it creates a new OutputStateId, it creates a new record for it, works @@ -546,7 +546,7 @@ template class LatticeDeterminizerPruned { return state_id; } - + // Given a normalized initial subset of elements (i.e. before epsilon closure), // compute the corresponding output-state. OutputStateId InitialToStateId(const vector &subset_in, @@ -573,7 +573,7 @@ template class LatticeDeterminizerPruned { ConvertToMinimal(&subset); // remove all but emitting and final states. Element elem; // will be used to store remaining weight and string, and - // OutputStateId, in initial_hash_; + // OutputStateId, in initial_hash_; NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put // common string and weight in "elem". The subset is now a minimal, // normalized subset. @@ -584,7 +584,7 @@ template class LatticeDeterminizerPruned { *common_prefix = elem.string; if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!"; - + // Before returning "ans", add the initial subset to the hash, // so that we can bypass the epsilon-closure etc., next time // we process the same initial subset. @@ -634,7 +634,7 @@ template class LatticeDeterminizerPruned { // at input, subset must have only one example of each StateId. [will still // be so at output]. This function follows input-epsilons, and augments the // subset accordingly. - + std::priority_queue, greater > queue; unordered_map cur_subset; typedef typename unordered_map::iterator MapIter; @@ -653,7 +653,7 @@ template class LatticeDeterminizerPruned { while (queue.size() != 0) { Element elem = queue.top(); queue.pop(); - + // The next if-statement is a kind of optimization. It's to prevent us // unnecessarily repeating the processing of a state. "cur_subset" always // contains only one Element with a particular state. The issue is that @@ -678,8 +678,8 @@ template class LatticeDeterminizerPruned { next_elem.state = arc.nextstate; next_elem.weight = Times(elem.weight, arc.weight); // next_elem.string is not set up yet... create it only - // when we know we need it (this is an optimization) - + // when we know we need it (this is an optimization) + MapIter iter = cur_subset.find(next_elem.state); if (iter == cur_subset.end()) { // was no such StateId: insert and add to queue. @@ -695,10 +695,10 @@ template class LatticeDeterminizerPruned { if (comp == 0) { // A tie on weights. This should be a rare case; // we don't optimize for it. next_elem.string = (arc.olabel == 0 ? elem.string : - repository_.Successor(elem.string, + repository_.Successor(elem.string, arc.olabel)); comp = Compare(next_elem.weight, next_elem.string, - iter->second.weight, iter->second.string); + iter->second.weight, iter->second.string); } if(comp == 1) { // next_elem is better, so use its (weight, string) next_elem.string = (arc.olabel == 0 ? elem.string : @@ -766,7 +766,7 @@ template class LatticeDeterminizerPruned { temp_arc.string = final_string; temp_arc.weight = final_weight; state.arcs.push_back(temp_arc); - num_arcs_++; + num_arcs_++; } } @@ -808,11 +808,11 @@ template class LatticeDeterminizerPruned { // (weight, string) pair in the semiring). void MakeSubsetUnique(vector *subset) { typedef typename vector::iterator IterType; - + // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on // state. KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state); - + IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end(); size_t num_out = 0; // Merge elements with same state-id @@ -835,7 +835,7 @@ template class LatticeDeterminizerPruned { } subset->resize(num_out); } - + // ProcessTransition was called from "ProcessTransitions" in the non-pruned // code, but now we in effect put the calls to ProcessTransition on a priority // queue, and it now gets called directly from Determinize(). This function @@ -850,7 +850,7 @@ template class LatticeDeterminizerPruned { Weight tot_weight; NormalizeSubset(subset, &tot_weight, &common_str); forward_cost += ConvertToCost(tot_weight); - + OutputStateId nextstate; { Weight next_tot_weight; @@ -876,7 +876,7 @@ template class LatticeDeterminizerPruned { // "less than" operator for pair. Used in ProcessTransitions. - // Lexicographical order, which only compares the state when ordering the + // Lexicographical order, which only compares the state when ordering the // "Element" member of the pair. class PairComparator { @@ -898,7 +898,7 @@ template class LatticeDeterminizerPruned { // states. Partitions the emitting transitions up by ilabel (by sorting on // ilabel), and for each unique ilabel, it creates a Task record that contains // the information we need to process the transition. - + void ProcessTransitions(OutputStateId output_state_id) { const vector &minimal_subset = output_states_[output_state_id]->minimal_subset; // it's possible that minimal_subset could be empty if there are @@ -922,7 +922,7 @@ template class LatticeDeterminizerPruned { next_elem.weight = Times(elem.weight, arc.weight); if (arc.olabel == 0) // output epsilon next_elem.string = elem.string; - else + else next_elem.string = repository_.Successor(elem.string, arc.olabel); all_elems.push_back(this_pr); } @@ -953,7 +953,7 @@ template class LatticeDeterminizerPruned { backward_costs_[element.state]); cur++; } - + // After the command below, the "priority_cost" is a value comparable to // the total-weight of the input FST, like a total-path weight... of // course, it will typically be less (in the semiring) than that. @@ -965,7 +965,7 @@ template class LatticeDeterminizerPruned { delete task; } else { MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state. - queue_.push(task); // Push the task onto the queue. The queue keeps it + queue_.push(task); // Push the task onto the queue. The queue keeps it // in prioritized order, so we always process the one with the "best" // weight (highest in the semiring). @@ -983,7 +983,7 @@ template class LatticeDeterminizerPruned { // empty. } - + bool IsIsymbolOrFinal(InputStateId state) { // returns true if this state // of the input FST either is final or has an osymbol on an arc out of it. // Uses the vector isymbol_or_final_ as a cache for this info. @@ -1029,13 +1029,13 @@ template class LatticeDeterminizerPruned { if (ifst_->Start() == kNoStateId) return; // we'll be returning // an empty FST. - + double best_cost = backward_costs_[ifst_->Start()]; if (best_cost == numeric_limits::infinity()) KALDI_WARN << "Total weight of input lattice is zero."; cutoff_ = best_cost + beam_; } - + void InitializeDeterminization() { // We insist that the input lattice be topologically sorted. This is not a // fundamental limitation of the algorithm (which in principle should be @@ -1088,8 +1088,8 @@ template class LatticeDeterminizerPruned { // the queue, which we'll start processing in Determinize(). } } - - DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned); + + KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned); struct OutputState { vector minimal_subset; @@ -1106,23 +1106,23 @@ template class LatticeDeterminizerPruned { double forward_cost): minimal_subset(minimal_subset), forward_cost(forward_cost) { } }; - + vector output_states_; // All the info about the output states. - + int num_arcs_; // keep track of memory usage: number of arcs in output_states_[ ]->arcs int num_elems_; // keep track of memory usage: number of elems in output_states_ and // the keys of initial_hash_ - + const ExpandedFst *ifst_; std::vector backward_costs_; // This vector stores, for every state in ifst_, // the minimal cost to the end-state (i.e. the sum of weights; they are guaranteed to // have "take-the-minimum" semantics). We get the double from the ConvertToCost() // function on the lattice weights. - + double beam_; double cutoff_; // beam plus total-weight of input (and note, the weight is // guaranteed to be "tropical-like" so the sum does represent a min-cost. - + DeterminizeLatticePrunedOptions opts_; SubsetKey hasher_; // object that computes keys-- has no data members. SubsetEqual equal_; // object that compares subsets-- only data member is delta_. @@ -1141,7 +1141,7 @@ template class LatticeDeterminizerPruned { // normalize, there may be an extra weight // and string. Owns the pointers // in its keys. - + struct Task { OutputStateId state; // State from which we're processing the transition. Label label; // Label on the transition we're processing out of this state. @@ -1164,15 +1164,15 @@ template class LatticeDeterminizerPruned { // order according to the best weight of any path passing through these // determinized states... it's possible to work this out. std::priority_queue, TaskCompare> queue_; - + vector > all_elems_tmp_; // temporary vector used in ProcessTransitions. - + enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 }; - + vector isymbol_or_final_; // A kind of cache; it says whether // each state is (emitting or final) where emitting means it has at least one // non-epsilon output arc. Only accessed by IsIsymbolOrFinal() - + LatticeStringRepository repository_; // defines a compact and fast way of // storing sequences of labels. @@ -1300,7 +1300,7 @@ typename ArcTpl::Label DeterminizeLatticeInsertPhones( // Work out the first phone symbol. This is more related to the phone // insertion function, so we put it here and make it the returning value of - // DeterminizeLatticeInsertPhones(). + // DeterminizeLatticeInsertPhones(). Label first_phone_label = HighestNumberedInputSymbol(*fst) + 1; // Insert phones here. @@ -1373,7 +1373,7 @@ void DeterminizeLatticeDeletePhones( template void DeterminizeLatticeDeletePhones( ArcTpl::Label first_phone_label, - MutableFst > *fst); + MutableFst > *fst); /** This function does a first pass determinization with phone symbols inserted at phone boundary. It uses a transition model to work out the transition-id @@ -1396,7 +1396,7 @@ bool DeterminizeLatticePhonePrunedFirstPass( typename ArcTpl::Label first_phone_label = DeterminizeLatticeInsertPhones(trans_model, fst); TopSort(fst); - + // Second, do determinization with phone inserted. bool ans = DeterminizeLatticePruned(*fst, beam, fst, opts); @@ -1438,7 +1438,7 @@ bool DeterminizeLatticePhonePruned( // lattices. if (opts.phone_determinize) { KALDI_VLOG(1) << "Doing first pass of determinization on phone + word " - << "lattices."; + << "lattices."; ans = DeterminizeLatticePhonePrunedFirstPass( trans_model, beam, ifst, det_opts) && ans; @@ -1513,14 +1513,14 @@ template bool DeterminizeLatticePruned( const ExpandedFst &ifst, double prune, - MutableFst *ofst, + MutableFst *ofst, DeterminizeLatticePrunedOptions opts); template bool DeterminizeLatticePruned( const ExpandedFst &ifst, double prune, - MutableFst *ofst, + MutableFst *ofst, DeterminizeLatticePrunedOptions opts); template diff --git a/src/nnet3bin/nnet3-average.cc b/src/nnet3bin/nnet3-average.cc index c82e3b93323..9d4513775d6 100644 --- a/src/nnet3bin/nnet3-average.cc +++ b/src/nnet3bin/nnet3-average.cc @@ -73,10 +73,10 @@ int main(int argc, char *argv[]) { ParseOptions po(usage); po.Register("binary", &binary_write, "Write output in binary mode"); - string weights_str; + std::string weights_str; po.Register("weights", &weights_str, "Colon-separated list of weights, one " "for each input model. These will be normalized to sum to one."); - + po.Read(argc, argv); if (po.NumArgs() < 2) { @@ -90,23 +90,23 @@ int main(int argc, char *argv[]) { Nnet nnet; ReadKaldiObject(first_nnet_rxfilename, &nnet); - + int32 num_inputs = po.NumArgs() - 1; std::vector model_weights; GetWeights(weights_str, num_inputs, &model_weights); - + ScaleNnet(model_weights[0], &nnet); - + for (int32 i = 2; i <= num_inputs; i++) { Nnet src_nnet; ReadKaldiObject(po.GetArg(i), &src_nnet); AddNnet(src_nnet, model_weights[i - 1], &nnet); } - + WriteKaldiObject(nnet, nnet_wxfilename, binary_write); - + KALDI_LOG << "Averaged parameters of " << num_inputs << " neural nets, and wrote to " << nnet_wxfilename; return 0; // it will throw an exception if there are any problems. @@ -115,4 +115,3 @@ int main(int argc, char *argv[]) { return -1; } } - diff --git a/tools/Makefile b/tools/Makefile index 8ca95ac95ff..4a8e08823a0 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -7,7 +7,7 @@ CC = gcc # used for sph2pipe # Note: OpenFst requires a relatively recent C++ compiler with C++11 support, # e.g. g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. -OPENFST_VERSION = 1.5.4 +OPENFST_VERSION = 1.6.0 OPENFST_VER_NUM := $(shell echo $(OPENFST_VERSION) | sed 's/\./ /g' | xargs printf "%d%02d%02d") ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10503)","1") From 96eec2b013b0e3a7afb250fd6b4dccb08b7e0a32 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Sun, 8 Jan 2017 21:53:44 -0800 Subject: [PATCH 289/530] Stop relinking dynamic libraries whenever they are updated. --- src/makefiles/default_rules.mk | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/makefiles/default_rules.mk b/src/makefiles/default_rules.mk index fda52521186..34abd905924 100644 --- a/src/makefiles/default_rules.mk +++ b/src/makefiles/default_rules.mk @@ -7,13 +7,13 @@ ifeq ($(KALDI_FLAVOR), dynamic) LIBFILE = lib$(LIBNAME).dylib endif LDFLAGS += -Wl,-rpath -Wl,$(KALDILIBDIR) - XDEPENDS = $(foreach dep,$(ADDLIBS), $(dir $(dep))lib$(notdir $(basename $(dep))).dylib) + EXTRA_LDLIBS += $(foreach dep,$(ADDLIBS), $(dir $(dep))lib$(notdir $(basename $(dep))).dylib) else ifeq ($(shell uname), Linux) ifdef LIBNAME LIBFILE = lib$(LIBNAME).so endif LDFLAGS += -Wl,-rpath=$(shell readlink -f $(KALDILIBDIR)) - XDEPENDS = $(foreach dep,$(ADDLIBS), $(dir $(dep))lib$(notdir $(basename $(dep))).so) + EXTRA_LDLIBS += $(foreach dep,$(ADDLIBS), $(dir $(dep))lib$(notdir $(basename $(dep))).so) else # Platform not supported $(error Dynamic libraries not supported on this platform. Run configure with --static flag.) endif @@ -31,11 +31,11 @@ $(LIBFILE): $(OBJFILES) $(RANLIB) $(LIBNAME).a ifeq ($(KALDI_FLAVOR), dynamic) ifeq ($(shell uname), Darwin) - $(CXX) -dynamiclib -o $@ -install_name @rpath/$@ $(LDFLAGS) $(OBJFILES) $(XDEPENDS) $(LDLIBS) + $(CXX) -dynamiclib -o $@ -install_name @rpath/$@ $(LDFLAGS) $(OBJFILES) $(LDLIBS) rm -f $(KALDILIBDIR)/$@; ln -s $(shell pwd)/$@ $(KALDILIBDIR)/$@ else ifeq ($(shell uname), Linux) # Building shared library from static (static was compiled with -fPIC) - $(CXX) -shared -o $@ -Wl,--no-undefined -Wl,--as-needed -Wl,-soname=$@,--whole-archive $(LIBNAME).a -Wl,--no-whole-archive $(LDFLAGS) $(XDEPENDS) $(LDLIBS) + $(CXX) -shared -o $@ -Wl,--no-undefined -Wl,--as-needed -Wl,-soname=$@,--whole-archive $(LIBNAME).a -Wl,--no-whole-archive $(LDFLAGS) $(LDLIBS) rm -f $(KALDILIBDIR)/$@; ln -s $(shell pwd)/$@ $(KALDILIBDIR)/$@ else # Platform not supported $(error Dynamic libraries not supported on this platform. Run configure with --static flag.) @@ -47,7 +47,11 @@ endif # use the C++ compiler $(CXX) instead. LINK.o = $(CXX) $(LDFLAGS) $(TARGET_ARCH) +ifeq ($(KALDI_FLAVOR), dynamic) +$(BINFILES): $(LIBFILE) +else $(BINFILES): $(LIBFILE) $(XDEPENDS) +endif # Rule below would expand to, e.g.: # ../base/kaldi-base.a: @@ -65,7 +69,11 @@ clean: distclean: clean -rm -f .depend.mk +ifeq ($(KALDI_FLAVOR), dynamic) +$(TESTFILES): $(LIBFILE) +else $(TESTFILES): $(LIBFILE) $(XDEPENDS) +endif test_compile: $(TESTFILES) From c17ee6815fd5980b11135672013884776307bc21 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Sun, 8 Jan 2017 22:29:46 -0800 Subject: [PATCH 290/530] Add -Wno-deprecated-declarations compiler flag to stop warnings about fst::TokenType --- src/makefiles/cygwin.mk | 3 ++- src/makefiles/darwin.mk | 3 ++- src/makefiles/linux_atlas.mk | 3 ++- src/makefiles/linux_atlas_arm.mk | 3 ++- src/makefiles/linux_atlas_ppc64le.mk | 3 ++- src/makefiles/linux_clapack.mk | 3 ++- src/makefiles/linux_clapack_arm.mk | 3 ++- src/makefiles/linux_openblas.mk | 3 ++- src/makefiles/linux_openblas_arm.mk | 3 ++- src/makefiles/linux_openblas_ppc64le.mk | 3 ++- src/makefiles/linux_x86_64_mkl.mk | 3 ++- 11 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/makefiles/cygwin.mk b/src/makefiles/cygwin.mk index 14ece9d4ee7..e5657818ce5 100644 --- a/src/makefiles/cygwin.mk +++ b/src/makefiles/cygwin.mk @@ -11,7 +11,8 @@ $(error OPENFSTLIBS not defined.) endif CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_CLAPACK -I../../tools/CLAPACK/ \ -msse -msse2 \ diff --git a/src/makefiles/darwin.mk b/src/makefiles/darwin.mk index 5dbcd6f768b..24fbdca890f 100644 --- a/src/makefiles/darwin.mk +++ b/src/makefiles/darwin.mk @@ -11,7 +11,8 @@ $(error OPENFSTLIBS not defined.) endif CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK \ -msse -msse2 -pthread \ diff --git a/src/makefiles/linux_atlas.mk b/src/makefiles/linux_atlas.mk index 9ab038295b6..929461831df 100644 --- a/src/makefiles/linux_atlas.mk +++ b/src/makefiles/linux_atlas.mk @@ -17,7 +17,8 @@ $(error ATLASLIBS not defined.) endif CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \ -msse -msse2 -pthread -rdynamic \ diff --git a/src/makefiles/linux_atlas_arm.mk b/src/makefiles/linux_atlas_arm.mk index 0dfc32863b4..9b9c42257fb 100644 --- a/src/makefiles/linux_atlas_arm.mk +++ b/src/makefiles/linux_atlas_arm.mk @@ -17,7 +17,8 @@ $(error ATLASLIBS not defined.) endif CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \ -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ diff --git a/src/makefiles/linux_atlas_ppc64le.mk b/src/makefiles/linux_atlas_ppc64le.mk index aa121fc5cdc..a0c22927f2e 100644 --- a/src/makefiles/linux_atlas_ppc64le.mk +++ b/src/makefiles/linux_atlas_ppc64le.mk @@ -17,7 +17,8 @@ $(error ATLASLIBS not defined.) endif CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \ -m64 -maltivec -mcpu=power8 -mtune=power8 -mpower8-vector -mvsx \ diff --git a/src/makefiles/linux_clapack.mk b/src/makefiles/linux_clapack.mk index d8f8cf5668f..95c58d0ec22 100644 --- a/src/makefiles/linux_clapack.mk +++ b/src/makefiles/linux_clapack.mk @@ -11,7 +11,8 @@ $(error OPENFSTLIBS not defined.) endif CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK -I../../tools/CLAPACK \ -msse -msse2 -pthread -rdynamic \ diff --git a/src/makefiles/linux_clapack_arm.mk b/src/makefiles/linux_clapack_arm.mk index 432bd689f55..2b15193046b 100644 --- a/src/makefiles/linux_clapack_arm.mk +++ b/src/makefiles/linux_clapack_arm.mk @@ -11,7 +11,8 @@ $(error OPENFSTLIBS not defined.) endif CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK -I../../tools/CLAPACK \ -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ diff --git a/src/makefiles/linux_openblas.mk b/src/makefiles/linux_openblas.mk index a859fc7e272..b7b74bff89a 100644 --- a/src/makefiles/linux_openblas.mk +++ b/src/makefiles/linux_openblas.mk @@ -17,7 +17,8 @@ $(error OPENBLASLIBS not defined.) endif CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \ -msse -msse2 -pthread -rdynamic \ diff --git a/src/makefiles/linux_openblas_arm.mk b/src/makefiles/linux_openblas_arm.mk index 00c4ae2bbdd..344879580aa 100644 --- a/src/makefiles/linux_openblas_arm.mk +++ b/src/makefiles/linux_openblas_arm.mk @@ -17,7 +17,8 @@ $(error OPENBLASLIBS not defined.) endif CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \ -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ diff --git a/src/makefiles/linux_openblas_ppc64le.mk b/src/makefiles/linux_openblas_ppc64le.mk index 1e7a391dc79..9225f4922f0 100644 --- a/src/makefiles/linux_openblas_ppc64le.mk +++ b/src/makefiles/linux_openblas_ppc64le.mk @@ -17,7 +17,8 @@ $(error OPENBLASLIBS not defined.) endif CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \ -m64 -maltivec -mcpu=power8 -mtune=power8 -mpower8-vector -mvsx \ diff --git a/src/makefiles/linux_x86_64_mkl.mk b/src/makefiles/linux_x86_64_mkl.mk index d2aee4a036f..595557a5ef4 100644 --- a/src/makefiles/linux_x86_64_mkl.mk +++ b/src/makefiles/linux_x86_64_mkl.mk @@ -25,7 +25,8 @@ endif MKLLIB ?= $(MKLROOT)/lib/em64t CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ - -Wall -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_MKL -I$(MKLROOT)/include \ -m64 -msse -msse2 -pthread -rdynamic \ From a49c20ebbecfe1513e879ed82299817346be7321 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Sun, 8 Jan 2017 22:43:33 -0800 Subject: [PATCH 291/530] Fix test code to conform with OpenFst-1.6 API. --- src/fstext/fstext-utils-test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fstext/fstext-utils-test.cc b/src/fstext/fstext-utils-test.cc index 2802a84cca6..b016b53691f 100644 --- a/src/fstext/fstext-utils-test.cc +++ b/src/fstext/fstext-utils-test.cc @@ -213,7 +213,7 @@ template void TestAcceptorMinimize() { RemoveWeights(fst); VectorFst fst2(*fst); - AcceptorMinimize(&fst2); + internal::AcceptorMinimize(&fst2); assert(RandEquivalent(*fst, fst2, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length-- max?*/)); From f9d5e4f128ce171b28383a7602ab5b0e60c69aa5 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Mon, 9 Jan 2017 15:31:13 -0800 Subject: [PATCH 292/530] Add date/time info to travis script. --- tools/extras/travis_script.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index 5aefdd3e543..2067476b553 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -50,13 +50,20 @@ CF="\"$CFLAGS -g $(addsw -I $INCDIRS)\"" LDF="\"$LDFLAGS $(addsw -L $LIBDIRS)\"" CCC="$(mtoken CXX "$CXX")" +echo "Building tools..." [Time: $(date)] runvx cd tools runvx make openfst "$CCC" CXXFLAGS="$CF" -j$MAXPAR cd .. + +echo "Building src..." [Time: $(date)] runvx cd src runvx "$CCC" CXXFLAGS="$CF" LDFLAGS="$LDF" ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root="$XROOT/usr" runvx make all -j$MAXPAR + +echo "Running tests..." [Time: $(date)] runvx make test -k -j$MAXPAR +echo "Done." [Time: $(date)] + #runvx make mklibdir base matrix -j$MAXPAR #runvx make matrix/test From f4f7f0994115143a39927a400875eb182bca3ea7 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Mon, 9 Jan 2017 22:23:29 -0800 Subject: [PATCH 293/530] Testing Travis CI with different build settings. --- tools/extras/travis_script.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index 2067476b553..3ff284cbe8b 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -9,7 +9,7 @@ # LDFLAGS="-llapack" # Maximum make parallelism. Simply -j runs out of memory on Travis VM. -MAXPAR=3 +MAXPAR=4 # Directories with code that can be tested with Travis (space-separated) TESTABLE_DIRS="src/" @@ -46,6 +46,7 @@ then fi # Prepare environment variables +CXX=clang++-3.4 CF="\"$CFLAGS -g $(addsw -I $INCDIRS)\"" LDF="\"$LDFLAGS $(addsw -L $LIBDIRS)\"" CCC="$(mtoken CXX "$CXX")" From 7b03f2c3291785b00428f2fba71ab723a776bdec Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Mon, 9 Jan 2017 22:39:09 -0800 Subject: [PATCH 294/530] Testing Travis CI with different build settings 2. --- tools/extras/travis_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index 3ff284cbe8b..5bdb91515f8 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -46,7 +46,7 @@ then fi # Prepare environment variables -CXX=clang++-3.4 +CXX=clang++ CF="\"$CFLAGS -g $(addsw -I $INCDIRS)\"" LDF="\"$LDFLAGS $(addsw -L $LIBDIRS)\"" CCC="$(mtoken CXX "$CXX")" From adacc9de0ca7302e4c6b550248e053314867e635 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Mon, 9 Jan 2017 22:50:40 -0800 Subject: [PATCH 295/530] Testing Travis CI with different build settings 3. --- tools/extras/travis_script.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index 5bdb91515f8..c8c6c2d7905 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -46,7 +46,6 @@ then fi # Prepare environment variables -CXX=clang++ CF="\"$CFLAGS -g $(addsw -I $INCDIRS)\"" LDF="\"$LDFLAGS $(addsw -L $LIBDIRS)\"" CCC="$(mtoken CXX "$CXX")" From 845f5a039eef76a290211af78ec3fca531e49d59 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 10 Jan 2017 10:52:36 -0800 Subject: [PATCH 296/530] Testing Travis CI with different build settings 4. --- tools/extras/travis_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index c8c6c2d7905..4386ca2e030 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -46,7 +46,7 @@ then fi # Prepare environment variables -CF="\"$CFLAGS -g $(addsw -I $INCDIRS)\"" +CF="\"$CFLAGS -O0 -g $(addsw -I $INCDIRS)\"" LDF="\"$LDFLAGS $(addsw -L $LIBDIRS)\"" CCC="$(mtoken CXX "$CXX")" From f6379ce28ec487a6e2b287894799f3d16dd98cf2 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 10 Jan 2017 11:27:23 -0800 Subject: [PATCH 297/530] Testing Travis CI with different build settings 5. --- tools/extras/travis_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index 4386ca2e030..cbd427bb9bd 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -46,7 +46,7 @@ then fi # Prepare environment variables -CF="\"$CFLAGS -O0 -g $(addsw -I $INCDIRS)\"" +CF="\"$CFLAGS -pipe -g $(addsw -I $INCDIRS)\"" LDF="\"$LDFLAGS $(addsw -L $LIBDIRS)\"" CCC="$(mtoken CXX "$CXX")" From 136120ad834a5bcc794a13689f8dee024f3c1ba9 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 10 Jan 2017 11:53:10 -0800 Subject: [PATCH 298/530] Testing Travis CI with different build settings 6. --- tools/extras/travis_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index cbd427bb9bd..c8c6c2d7905 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -46,7 +46,7 @@ then fi # Prepare environment variables -CF="\"$CFLAGS -pipe -g $(addsw -I $INCDIRS)\"" +CF="\"$CFLAGS -g $(addsw -I $INCDIRS)\"" LDF="\"$LDFLAGS $(addsw -L $LIBDIRS)\"" CCC="$(mtoken CXX "$CXX")" From 339c7379c89af879fb7461cfbb7a43ad1401f850 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 18 Jan 2017 19:37:20 -0500 Subject: [PATCH 299/530] Various minor fixes and script updates --- egs/swbd/s5c/local/nnet3/run_ivector_common.sh | 15 ++------------- .../s5c/local/nnet3/tuning/run_tdnn_d_disc.sh | 2 +- .../local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh | 15 +++++++++------ egs/wsj/s5/steps/nnet3/train_discriminative.sh | 6 +----- src/nnet3/nnet-chain-training.cc | 1 + 5 files changed, 14 insertions(+), 25 deletions(-) mode change 100644 => 100755 egs/swbd/s5c/local/nnet3/run_ivector_common.sh diff --git a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh old mode 100644 new mode 100755 index 894de5e58f9..9768d82c806 --- a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh +++ b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh @@ -62,18 +62,7 @@ if [ $stage -le 3 ]; then for dataset in $train_set train_100k_nodup; do utils/copy_data_dir.sh data/$dataset data/${dataset}_hires - # scale the waveforms, this is useful as we don't use CMVN - data_dir=data/${dataset}_hires - cat $data_dir/wav.scp | python -c " -import sys, os, subprocess, re, random -scale_low = 1.0/8 -scale_high = 2.0 -for line in sys.stdin.readlines(): - if len(line.strip()) == 0: - continue - print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) -"| sort -k1,1 -u > $data_dir/wav.scp_scaled || exit 1; - mv $data_dir/wav.scp_scaled $data_dir/wav.scp + utils/data/perturb_data_dir_volume.sh adata/${dataset}_hires steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir; @@ -131,7 +120,7 @@ if [ $stage -le 8 ]; then # having a larger number of speakers is helpful for generalization, and to # handle per-utterance decoding well (iVector starts at zero). - steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires + utils/data/modify_speaker_info.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ data/${train_set}_max2_hires exp/nnet3/extractor exp/nnet3/ivectors_$train_set || exit 1; diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh index 715a93ea49d..da7cae954f8 100755 --- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh @@ -147,7 +147,7 @@ if [ $stage -le 4 ]; then for iter in epoch$x epoch${x}_adj; do steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ - --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set}_hires \ $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_sw1_tg_${iter} || exit 1; steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh index 11bb733333d..9a77a6af6c7 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh @@ -37,7 +37,7 @@ criterion=smbr one_silence_class=true # you can set --disc-affix if you run different configurations, e.g. --disc-affix "_b" -disc_affix= +disc_affix=slow dir=${srcdir}_${criterion}${disc_affix} @@ -56,10 +56,10 @@ extra_right_context=0 ## Nnet training options -effective_learning_rate=0.0000125 +effective_learning_rate=0.000005 max_param_change=1 num_jobs_nnet=4 -num_epochs=4 +num_epochs=2 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, # in chain models. minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); @@ -146,11 +146,14 @@ if [ $stage -le 4 ]; then for iter in epoch$x epoch${x}_adj; do ( steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ - --online-ivector-dir exp/nnet3_cleaned/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_${iter} || exit 1; + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + --online-ivector-dir exp/nnet3_cleaned/ivectors_${decode_set}_hires \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_${iter} || exit 1; steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ - data/lang_test data/lang_rescore data/${decode_set}_hires \ + data/lang data/lang_rescore data/${decode_set}_hires \ $dir/decode_${decode_set}_${iter} \ $dir/decode_${decode_set}_${iter}_rescore || exit 1; ) & diff --git a/egs/wsj/s5/steps/nnet3/train_discriminative.sh b/egs/wsj/s5/steps/nnet3/train_discriminative.sh index eb1a616e9de..77198a00576 100755 --- a/egs/wsj/s5/steps/nnet3/train_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/train_discriminative.sh @@ -258,11 +258,7 @@ while [ $x -lt $num_iters ]; do fi if $use_frame_shift; then - if [ $[num_archives % frame_subsampling_factor] -ne 0 ]; then - frame_shift=$[k % frame_subsampling_factor] - else - frame_shift=$[(k + k/num_archives) % frame_subsampling_factor] - fi + frame_shift=$[(k%num_archives + k/num_archives) % frame_subsampling_factor] else frame_shift=0 fi diff --git a/src/nnet3/nnet-chain-training.cc b/src/nnet3/nnet-chain-training.cc index 4f63ba8304c..c3ae3ae0336 100644 --- a/src/nnet3/nnet-chain-training.cc +++ b/src/nnet3/nnet-chain-training.cc @@ -240,6 +240,7 @@ bool NnetChainTrainer::PrintTotalStats() const { const ObjectiveFunctionInfo &info = iter->second; ans = info.PrintTotalStats(name) || ans; } + PrintMaxChangeStats(); return ans; } From 012ca31754970d17b969d67fef0782cd02dac601 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 18 Jan 2017 20:20:25 -0500 Subject: [PATCH 300/530] Add more specific compilation instructions in configure script --- src/configure | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/configure b/src/configure index 3388d8ebd50..bf478b5b73f 100755 --- a/src/configure +++ b/src/configure @@ -1180,4 +1180,6 @@ if [ -n "$ENV_LDLIBS" ]; then echo "LDLIBS += $ENV_LDLIBS" >> kaldi.mk; fi # and possibly modifies the kaldi.mk file that we just generated. check_for_slow_expf; echo "SUCCESS" +echo "To compile: make clean -j; make depend -j; make -j" +echo " ... or e.g. -j 10, instead of -j, to use a specified number of CPUs" exit 0; From e8e5928a6a6fea38c9c87163b6afaf7a444e4ba0 Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Thu, 19 Jan 2017 21:51:51 +0100 Subject: [PATCH 301/530] [src] cudamatrix: added {Cu,}VectorBase::SetRandUniform(), + optimize/fix cu-rand code (#1352) --- src/cudamatrix/cu-rand-speed-test.cc | 151 ++++++++++++++++++++++----- src/cudamatrix/cu-rand.cc | 129 +++++++++++++++-------- src/cudamatrix/cu-rand.h | 3 + src/cudamatrix/cu-vector.cc | 8 ++ src/cudamatrix/cu-vector.h | 2 + src/matrix/kaldi-vector.cc | 8 ++ src/matrix/kaldi-vector.h | 3 + 7 files changed, 235 insertions(+), 69 deletions(-) diff --git a/src/cudamatrix/cu-rand-speed-test.cc b/src/cudamatrix/cu-rand-speed-test.cc index cf07301cb63..23f82eab977 100644 --- a/src/cudamatrix/cu-rand-speed-test.cc +++ b/src/cudamatrix/cu-rand-speed-test.cc @@ -56,63 +56,166 @@ std::string MeanVariance(const CuMatrixBase& m) { return std::string("mean ") + ToString(mean) + ", std-dev " + ToString(std::sqrt(var)); } +template +std::string MeanVariance(const CuVectorBase& v) { + std::ostringstream os; + Real mean = v.Sum() / v.Dim(); + CuVector tmp(v); + tmp.Add(-mean); + tmp.ApplyPow(2.0); + Real var = tmp.Sum() / tmp.Dim(); + return std::string("mean ") + ToString(mean) + ", std-dev " + ToString(std::sqrt(var)); +} + + template -void CuRandUniformMatrixSpeedTest() { +void CuRandUniformMatrixSpeedTest(const int32 iter) { Timer t; CuRand rand; - CuMatrix m(249,2011); - for (int32 i = 0; i < 200; i++) { + CuMatrix m(249,1001, kUndefined); + for (int32 i = 0; i < iter; i++) { rand.RandUniform(&m); } - KALDI_LOG << __func__ << NameOf() << " t = " << t.Elapsed() << "s, " << MeanVariance(m); + CuMatrix m2(256,1024, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandUniform(&m2); + } + // flops = number of generated random numbers per second, + Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(m) << ")"; } template -void CuRandGaussianMatrixSpeedTest() { +void CuRandUniformMatrixBaseSpeedTest(const int32 iter) { Timer t; CuRand rand; - CuMatrix m(249,2011); - for (int32 i = 0; i < 200; i++) { + CuMatrix m(249,1001, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandUniform(dynamic_cast*>(&m)); + } + CuMatrix m2(256,1024, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandUniform(dynamic_cast*>(&m2)); + } + // flops = number of generated random numbers per second, + Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(m) << ")"; +} + +template +void CuRandGaussianMatrixSpeedTest(const int32 iter) { + Timer t; + CuRand rand; + CuMatrix m(249,1001, kUndefined); + for (int32 i = 0; i < iter; i++) { rand.RandGaussian(&m); } - KALDI_LOG << __func__ << NameOf() << " t = " << t.Elapsed() << "s, " << MeanVariance(m); + CuMatrix m2(256,1024, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandGaussian(&m2); + } + // flops = number of generated random numbers per second, + Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(m) << ")"; } template -void CuRandGaussianVectorSpeedTest() { +void CuRandGaussianMatrixBaseSpeedTest(const int32 iter) { Timer t; CuRand rand; - CuVector v(2011); - for (int32 i = 0; i < 200; i++) { + CuMatrix m(249,1001, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandGaussian(dynamic_cast*>(&m)); + } + CuMatrix m2(256,1024, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandGaussian(dynamic_cast*>(&m2)); + } + // flops = number of generated random numbers per second, + Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(m) << ")"; +} + +template +void CuRandUniformVectorSpeedTest(const int32 iter) { + Timer t; + CuRand rand; + CuVector v(2011, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandUniform(&v); + } + CuVector v2(2048, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandUniform(&v2); + } + // flops = number of generated random numbers per second, + Real flops = iter * (v.Dim() + v2.Dim()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(v) << ")"; +} + +template +void CuRandGaussianVectorSpeedTest(const int32 iter) { + Timer t; + CuRand rand; + CuVector v(2011, kUndefined); + for (int32 i = 0; i < iter; i++) { rand.RandGaussian(&v); } - KALDI_LOG << __func__ << NameOf() << " t = " << t.Elapsed() << "s"; + CuVector v2(2048, kUndefined); + for (int32 i = 0; i < iter; i++) { + rand.RandGaussian(&v2); + } + // flops = number of generated random numbers per second, + Real flops = iter * (v.Dim() + v2.Dim()) / t.Elapsed(); + KALDI_LOG << __func__ << NameOf() + << " Speed was " << flops << " rand_elems/s. " + << "(debug " << MeanVariance(v) << ")"; } } // namespace kaldi int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 iter = 10; // Be quick on CPU, #if HAVE_CUDA == 1 + for (int32 loop = 0; loop < 2; loop++) { // NO for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().SetDebugStrideMode(true); - if (loop == 0) + if ( loop == 0) CuDevice::Instantiate().SelectGpuId("no"); - else + else { CuDevice::Instantiate().SelectGpuId("yes"); + iter = 400; // GPUs are faster, + } #endif - kaldi::CuRandUniformMatrixSpeedTest(); - kaldi::CuRandGaussianMatrixSpeedTest(); - kaldi::CuRandGaussianVectorSpeedTest(); + Timer t; + kaldi::CuRandUniformMatrixSpeedTest(iter); + kaldi::CuRandUniformMatrixBaseSpeedTest(iter); + kaldi::CuRandUniformVectorSpeedTest(iter); + kaldi::CuRandGaussianMatrixSpeedTest(iter); + kaldi::CuRandGaussianMatrixBaseSpeedTest(iter); + kaldi::CuRandGaussianVectorSpeedTest(iter); fprintf(stderr, "---\n"); - kaldi::CuRandUniformMatrixSpeedTest(); - kaldi::CuRandGaussianMatrixSpeedTest(); - kaldi::CuRandGaussianVectorSpeedTest(); - fprintf(stderr, "\n"); - } - + kaldi::CuRandUniformMatrixSpeedTest(iter); + kaldi::CuRandUniformMatrixBaseSpeedTest(iter); + kaldi::CuRandUniformVectorSpeedTest(iter); + kaldi::CuRandGaussianMatrixSpeedTest(iter); + kaldi::CuRandGaussianMatrixBaseSpeedTest(iter); + kaldi::CuRandGaussianVectorSpeedTest(iter); + fprintf(stderr, "--- ELAPSED %fs.\n\n", t.Elapsed()); #if HAVE_CUDA == 1 + } // NO for loop if 'HAVE_CUDA != 1', + CuDevice::Instantiate().PrintProfile(); #endif std::cout << "Tests succeeded.\n"; diff --git a/src/cudamatrix/cu-rand.cc b/src/cudamatrix/cu-rand.cc index 6506896b10b..82a9e5b7057 100644 --- a/src/cudamatrix/cu-rand.cc +++ b/src/cudamatrix/cu-rand.cc @@ -1,6 +1,6 @@ // cudamatrix/cu-rand.cc -// Copyright 2016 Brno University of Technology (author Karel Vesely) +// Copyright 2016-2017 Brno University of Technology (author Karel Vesely) // See ../../COPYING for clarification regarding multiple authors // @@ -21,18 +21,50 @@ namespace kaldi { +#if HAVE_CUDA == 1 +/// Wrappers of curand functions to interface both float and double as 1 function, + +/// Wrapper of curandGenerateUniform(), curandGenerateUniformDouble(), +template +curandStatus_t curandGenerateUniformWrap(curandGenerator_t gen, Real *ptr, size_t num); +// +template<> +curandStatus_t curandGenerateUniformWrap(curandGenerator_t gen, float *ptr, size_t num) { + return curandGenerateUniform(gen, ptr, num); +} template<> -void CuRand::RandUniform(CuMatrixBase *tgt) { +curandStatus_t curandGenerateUniformWrap(curandGenerator_t gen, double *ptr, size_t num) { + return curandGenerateUniformDouble(gen, ptr, num); +} + +/// Wrapper of curandGenerateNormal(), curandGenerateNormalDouble(), +template +curandStatus_t curandGenerateNormalWrap( + curandGenerator_t gen, Real *ptr, size_t num); +// +template<> +curandStatus_t curandGenerateNormalWrap( + curandGenerator_t gen, float *ptr, size_t num) { + return curandGenerateNormal(gen, ptr, num, 0.0 /*mean*/, 1.0 /*stddev*/); +} +template<> +curandStatus_t curandGenerateNormalWrap( + curandGenerator_t gen, double *ptr, size_t num) { + return curandGenerateNormalDouble(gen, ptr, num, 0.0 /*mean*/, 1.0 /*stddev*/); +} +/// End of wrappers. +#endif + + +template +void CuRand::RandUniform(CuMatrixBase *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix, // so we should not use it to generate random numbers over whole stride. - CuMatrix tmp(tgt->NumRows(), tgt->NumCols(), kUndefined); - // We need even number of `elements', or it crahes! - // (possibly touching 1 element after array, into the padding of memory alignment), - size_t tmp_elems_even = (1 + (tmp.NumRows()*tmp.Stride() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateUniform(gen_, tmp.Data(), tmp_elems_even)); + CuMatrix tmp(tgt->NumRows(), tgt->NumCols(), kUndefined); + CU_SAFE_CALL(curandGenerateUniformWrap(gen_, tmp.Data(), tmp.NumRows() * tmp.Stride())); tgt->CopyFromMat(tmp); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else @@ -42,19 +74,13 @@ void CuRand::RandUniform(CuMatrixBase *tgt) { } } -template<> -void CuRand::RandUniform(CuMatrixBase *tgt) { +template +void CuRand::RandUniform(CuMatrix *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; - // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix, - // so we should not use it to generate random numbers over whole stride. - CuMatrix tmp(tgt->NumRows(), tgt->NumCols(), kUndefined); - // We need even number of `elements', or it crahes! - // (possibly touching 1 element after array, into the padding of memory alignment), - size_t tmp_elems_even = (1 + (tmp.NumRows()*tmp.Stride() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateUniformDouble(gen_, tmp.Data(), tmp_elems_even)); - tgt->CopyFromMat(tmp); + // Here we don't need to use 'tmp' matrix, + CU_SAFE_CALL(curandGenerateUniformWrap(gen_, tgt->Data(), tgt->NumRows() * tgt->Stride())); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif @@ -63,40 +89,34 @@ void CuRand::RandUniform(CuMatrixBase *tgt) { } } -template<> -void CuRand::RandGaussian(CuMatrixBase *tgt) { +template +void CuRand::RandUniform(CuVectorBase *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; - // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix, - // so we should not use it to generate random numbers over whole stride. - CuMatrix tmp(tgt->NumRows(), tgt->NumCols(), kUndefined); - // We need even number of `elements', or it crahes! - // (possibly touching 1 element after array, into the padding of memory alignment), - size_t tmp_elems_even = (1 + (tmp.NumRows()*tmp.Stride() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateNormal(gen_, tmp.Data(), tmp_elems_even, 0.0, 1.0)); - tgt->CopyFromMat(tmp); + CU_SAFE_CALL(curandGenerateUniformWrap(gen_, tgt->Data(), tgt->Dim())); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif { - tgt->Mat().SetRandn(); + tgt->Vec().SetRandUniform(); } } -template<> -void CuRand::RandGaussian(CuMatrixBase *tgt) { +template +void CuRand::RandGaussian(CuMatrixBase *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; // Better use 'tmp' matrix, 'tgt' can be a window into a larger matrix, // so we should not use it to generate random numbers over whole stride. - CuMatrix tmp(tgt->NumRows(), tgt->NumCols(), kUndefined); - // We need even number of `elements', or it crahes! - // (possibly touching 1 element after array, into the padding of memory alignment), - size_t tmp_elems_even = (1 + (tmp.NumRows()*tmp.Stride() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateNormalDouble(gen_, tmp.Data(), tmp_elems_even, 0.0, 1.0)); - tgt->CopyFromMat(tmp); + // Also, we ensure to have 'even' number of elements for calling 'curand' + // by possibly adding one column. Even number of elements is required by + // curandGenerateUniform(), curandGenerateUniformDouble(). + MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1, + CuMatrix tmp(tgt->NumRows(), num_cols_even, kUndefined); + CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), tmp.NumRows()*tmp.Stride())); + tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols())); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif @@ -105,28 +125,47 @@ void CuRand::RandGaussian(CuMatrixBase *tgt) { } } -template<> -void CuRand::RandGaussian(CuVectorBase *tgt) { +template +void CuRand::RandGaussian(CuMatrix *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; - MatrixIndexT dim_even = (1 + (tgt->Dim() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateNormal(gen_, tgt->Data(), dim_even, 0.0, 1.0)); + // Here we don't need to use 'tmp' matrix, if the number of elements is even, + MatrixIndexT num_elements = tgt->NumRows() * tgt->Stride(); + if (0 == (num_elements % 2)) { + CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tgt->Data(), num_elements)); + } else { + // We use 'tmp' matrix with one column added, this guarantees 'even' number of elements. + MatrixIndexT num_cols_even = tgt->NumCols() + (tgt->NumCols() % 2); // + 0 or 1, + CuMatrix tmp(tgt->NumRows(), num_cols_even, kUndefined); + CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), tmp.NumRows()*tmp.Stride())); + tgt->CopyFromMat(tmp.ColRange(0,tgt->NumCols())); + } CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif { - tgt->Vec().SetRandn(); + tgt->Mat().SetRandn(); } } -template<> -void CuRand::RandGaussian(CuVectorBase *tgt) { +template +void CuRand::RandGaussian(CuVectorBase *tgt) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { Timer tim; - MatrixIndexT dim_even = (1 + (tgt->Dim() - 1) / 2) * 2; - CU_SAFE_CALL(curandGenerateNormalDouble(gen_, tgt->Data(), dim_even, 0.0, 1.0)); + // To ensure 'even' number of elements, we use 'tmp' vector of even length. + // Even number of elements is required by 'curand' functions: + // curandGenerateUniform(), curandGenerateUniformDouble(). + MatrixIndexT num_elements = tgt->Dim(); + if (0 == (num_elements % 2)) { + CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tgt->Data(), tgt->Dim())); + } else { + MatrixIndexT dim_even = tgt->Dim() + (tgt->Dim() % 2); // + 0 or 1, + CuVector tmp(dim_even, kUndefined); + CU_SAFE_CALL(curandGenerateNormalWrap(gen_, tmp.Data(), tmp.Dim())); + tgt->CopyFromVec(tmp.Range(0,tgt->Dim())); + } CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); } else #endif diff --git a/src/cudamatrix/cu-rand.h b/src/cudamatrix/cu-rand.h index 60587391edd..2c8204b6b5f 100644 --- a/src/cudamatrix/cu-rand.h +++ b/src/cudamatrix/cu-rand.h @@ -68,8 +68,11 @@ class CuRand { /// Fill with uniform [0..1] floats, void RandUniform(CuMatrixBase *tgt); + void RandUniform(CuMatrix *tgt); + void RandUniform(CuVectorBase *tgt); /// Fill with Normal random numbers, void RandGaussian(CuMatrixBase *tgt); + void RandGaussian(CuMatrix *tgt); void RandGaussian(CuVectorBase *tgt); /// align probabilities to discrete 0/1 states (use uniform sampling), diff --git a/src/cudamatrix/cu-vector.cc b/src/cudamatrix/cu-vector.cc index 1a0eefa7019..e6aa72249f7 100644 --- a/src/cudamatrix/cu-vector.cc +++ b/src/cudamatrix/cu-vector.cc @@ -255,6 +255,14 @@ void CuVectorBase::SetRandn() { tmp.RandGaussian(this); } +template +void CuVectorBase::SetRandUniform() { + if (dim_ == 0) return; + CuRand tmp; + tmp.RandUniform(this); +} + + template Real CuVectorBase::Sum() const { diff --git a/src/cudamatrix/cu-vector.h b/src/cudamatrix/cu-vector.h index cff5270e6cf..53641556669 100644 --- a/src/cudamatrix/cu-vector.h +++ b/src/cudamatrix/cu-vector.h @@ -125,7 +125,9 @@ class CuVectorBase { MatrixIndexT ApplyCeiling(Real ceiling_val); void ApplyPow(Real power); Real Sum() const; + void SetRandn(); + void SetRandUniform(); CuSubVector Range(const MatrixIndexT o, const MatrixIndexT l) { return CuSubVector(*this, o, l); diff --git a/src/matrix/kaldi-vector.cc b/src/matrix/kaldi-vector.cc index 851db1a1d2f..87237369680 100644 --- a/src/matrix/kaldi-vector.cc +++ b/src/matrix/kaldi-vector.cc @@ -306,6 +306,14 @@ void VectorBase::SetRandn() { if (Dim() != last) data_[last] = static_cast(kaldi::RandGauss(&rstate)); } +template +void VectorBase::SetRandUniform() { + kaldi::RandomState rstate; + for (MatrixIndexT i = 0; i < Dim(); i++) { + *(data_+i) = RandUniform(&rstate); + } +} + template MatrixIndexT VectorBase::RandCategorical() const { kaldi::RandomState rstate; diff --git a/src/matrix/kaldi-vector.h b/src/matrix/kaldi-vector.h index 498ddda302d..dcfdd47b09c 100644 --- a/src/matrix/kaldi-vector.h +++ b/src/matrix/kaldi-vector.h @@ -50,6 +50,9 @@ class VectorBase { /// Set vector to random normally-distributed noise. void SetRandn(); + /// Sets to numbers uniformly distributed on (0,1) + void SetRandUniform(); + /// This function returns a random index into this vector, /// chosen with probability proportional to the corresponding /// element. Requires that this->Min() >= 0 and this->Sum() > 0. From 39c20a73573b9c5084ab2a2d2102a4a5aa37642e Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Thu, 19 Jan 2017 16:23:28 -0500 Subject: [PATCH 302/530] [src] Fix compilation issues on mac --- src/matrix/kaldi-vector.cc | 8 ++++---- src/nnet3/nnet-example-utils.cc | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/matrix/kaldi-vector.cc b/src/matrix/kaldi-vector.cc index 87237369680..057569d1182 100644 --- a/src/matrix/kaldi-vector.cc +++ b/src/matrix/kaldi-vector.cc @@ -1029,8 +1029,8 @@ template void VectorBase::AddVec(const Real alpha, const VectorBase &v) { KALDI_ASSERT(dim_ == v.dim_); // remove __restrict__ if it causes compilation problems. - register Real *__restrict__ data = data_; - register OtherReal *__restrict__ other_data = v.data_; + Real *__restrict__ data = data_; + OtherReal *__restrict__ other_data = v.data_; MatrixIndexT dim = dim_; if (alpha != 1.0) for (MatrixIndexT i = 0; i < dim; i++) @@ -1050,8 +1050,8 @@ template void VectorBase::AddVec2(const Real alpha, const VectorBase &v) { KALDI_ASSERT(dim_ == v.dim_); // remove __restrict__ if it causes compilation problems. - register Real *__restrict__ data = data_; - register OtherReal *__restrict__ other_data = v.data_; + Real *__restrict__ data = data_; + OtherReal *__restrict__ other_data = v.data_; MatrixIndexT dim = dim_; if (alpha != 1.0) for (MatrixIndexT i = 0; i < dim; i++) diff --git a/src/nnet3/nnet-example-utils.cc b/src/nnet3/nnet-example-utils.cc index 7c3743c3a7f..088772bcba7 100644 --- a/src/nnet3/nnet-example-utils.cc +++ b/src/nnet3/nnet-example-utils.cc @@ -319,7 +319,7 @@ void ExampleGenerationConfig::ComputeDerived() { } KALDI_LOG << "Rounding up --num-frames=" << num_frames_str << " to multiples of --frame-subsampling-factor=" << m - << ", to: " << rounded_num_frames_str; + << ", to: " << rounded_num_frames_str.str(); } } From dafec02a4c04f3ac6c1e23d0d7ce2b9cc53cadc6 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 19 Jan 2017 18:07:12 -0500 Subject: [PATCH 303/530] asr_diarization: Fix some bugs in segmenter code and make it simpler --- src/segmenter/Makefile | 8 +-- src/segmenter/segment.h | 19 +++++++ src/segmenter/segmentation-post-processor.cc | 3 +- src/segmenter/segmentation-post-processor.h | 6 ++- src/segmenter/segmentation-utils.cc | 21 ++++++-- src/segmenter/segmentation-utils.h | 11 +++- src/segmenterbin/Makefile | 4 +- src/segmenterbin/segmentation-copy.cc | 3 +- ...ntation-init-from-additive-signals-info.cc | 53 +++++++++---------- .../segmentation-init-from-segments.cc | 43 +++++++-------- .../segmentation-remove-segments.cc | 8 ++- 11 files changed, 115 insertions(+), 64 deletions(-) diff --git a/src/segmenter/Makefile b/src/segmenter/Makefile index 03df6132050..8a9b37cad75 100644 --- a/src/segmenter/Makefile +++ b/src/segmenter/Makefile @@ -2,14 +2,16 @@ all: include ../kaldi.mk -TESTFILES = segmentation-io-test +TESTFILES = segmentation-io-test information-bottleneck-clusterable-test OBJFILES = segment.o segmentation.o segmentation-utils.o \ - segmentation-post-processor.o + segmentation-post-processor.o \ + information-bottleneck-clusterable.o \ + information-bottleneck-cluster-utils.o LIBNAME = kaldi-segmenter -ADDLIBS = ../gmm/kaldi-gmm.a \ +ADDLIBS = ../tree/kaldi-tree.a ../gmm/kaldi-gmm.a \ ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a ../thread/kaldi-thread.a include ../makefiles/default_rules.mk diff --git a/src/segmenter/segment.h b/src/segmenter/segment.h index b54b5367c73..f7ada5b92ee 100644 --- a/src/segmenter/segment.h +++ b/src/segmenter/segment.h @@ -1,3 +1,22 @@ +// segmenter/segment.h" + +// Copyright 2016 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + #ifndef KALDI_SEGMENTER_SEGMENT_H_ #define KALDI_SEGMENTER_SEGMENT_H_ diff --git a/src/segmenter/segmentation-post-processor.cc b/src/segmenter/segmentation-post-processor.cc index 2c97e31db56..e8c7747c8c4 100644 --- a/src/segmenter/segmentation-post-processor.cc +++ b/src/segmenter/segmentation-post-processor.cc @@ -177,7 +177,8 @@ void SegmentationPostProcessor::DoBlendingShortSegments( void SegmentationPostProcessor::DoRemovingSegments(Segmentation *seg) const { if (!IsRemovingSegmentsToBeDone(opts_)) return; - RemoveSegments(remove_labels_, seg); + RemoveSegments(remove_labels_, opts_.max_remove_length, + seg); } void SegmentationPostProcessor::DoMergingAdjacentSegments( diff --git a/src/segmenter/segmentation-post-processor.h b/src/segmenter/segmentation-post-processor.h index 01a23b93b1b..0de54d026e1 100644 --- a/src/segmenter/segmentation-post-processor.h +++ b/src/segmenter/segmentation-post-processor.h @@ -47,6 +47,7 @@ struct SegmentationPostProcessingOptions { int32 max_blend_length; std::string remove_labels_csl; + int32 max_remove_length; bool merge_adjacent_segments; int32 max_intersegment_length; @@ -63,7 +64,7 @@ struct SegmentationPostProcessingOptions { blend_short_segments_class(-1), max_blend_length(-1), merge_adjacent_segments(false), max_intersegment_length(0), max_segment_length(-1), overlap_length(0), - post_process_label(-1) { } + max_remove_length(-1), post_process_label(-1) { } void Register(OptionsItf *opts) { opts->Register("merge-labels", &merge_labels_csl, "Merge labels into a " @@ -109,6 +110,9 @@ struct SegmentationPostProcessingOptions { "Remove any segment whose label is contained in " "remove_labels_csl. " "Refer to the RemoveLabels() code for details."); + opts->Register("max-remove-length", &max_remove_length, + "If provided, specifies the maximum length of segments " + "that will be removed by --remove-labels option"); opts->Register("merge-adjacent-segments", &merge_adjacent_segments, "Merge adjacent segments of the same label if they are " "within max-intersegment-length distance. " diff --git a/src/segmenter/segmentation-utils.cc b/src/segmenter/segmentation-utils.cc index c69d7ff3397..3cece810d45 100644 --- a/src/segmenter/segmentation-utils.cc +++ b/src/segmenter/segmentation-utils.cc @@ -54,18 +54,27 @@ void RelabelSegmentsUsingMap(const unordered_map &label_map, } for (SegmentList::iterator it = segmentation->Begin(); - it != segmentation->End(); ++it) { + it != segmentation->End(); ) { unordered_map::const_iterator map_it = label_map.find( it->Label()); + int32 dest_label = -100; if (map_it == label_map.end()) { if (default_label == -1) KALDI_ERR << "Could not find label " << it->Label() << " in label map."; else - it->SetLabel(default_label); + dest_label = default_label; } else { - it->SetLabel(map_it->second); + dest_label = map_it->second; } + + if (dest_label == -1) { + // Remove segments that will be mapped to label -1. + it = segmentation->Erase(it); + continue; + } + it->SetLabel(dest_label); + ++it; } } @@ -98,6 +107,7 @@ void RemoveSegments(int32 label, Segmentation *segmentation) { } void RemoveSegments(const std::vector &labels, + int32 max_remove_length, Segmentation *segmentation) { // Check if sorted and unique KALDI_ASSERT(std::adjacent_find(labels.begin(), @@ -105,7 +115,10 @@ void RemoveSegments(const std::vector &labels, for (SegmentList::iterator it = segmentation->Begin(); it != segmentation->End(); ) { - if (std::binary_search(labels.begin(), labels.end(), it->Label())) { + if ((max_remove_length == -1 || + it->Length() < max_remove_length) && + std::binary_search(labels.begin(), labels.end(), + it->Label())) { it = segmentation->Erase(it); } else { ++it; diff --git a/src/segmenter/segmentation-utils.h b/src/segmenter/segmentation-utils.h index 30136ab0a5a..4fa3271e874 100644 --- a/src/segmenter/segmentation-utils.h +++ b/src/segmenter/segmentation-utils.h @@ -56,12 +56,19 @@ void ScaleFrameShift(BaseFloat factor, Segmentation *segmentation); void RemoveSegments(int32 label, Segmentation *segmentation); /** - * This is very straight forward. It removes any segment whose label is - * contained in the vector "labels" + * This removes any segment whose label is + * contained in the vector "labels" and has a length smaller than + * max_remove_length. max_remove_length can be provided -1 to + * specify a value of +infinity i.e. to remove segments + * based on only the labels and irrespective of their lengths. **/ void RemoveSegments(const std::vector &labels, + int32 max_remove_length, Segmentation *segmentation); +void RemoveShortSegments(int32 label, int32 min_length, + Segmentation *segmentation); + // Keep only segments of label "label" void KeepSegments(int32 label, Segmentation *segmentation); diff --git a/src/segmenterbin/Makefile b/src/segmenterbin/Makefile index 22a74e70551..6e2fd226019 100644 --- a/src/segmenterbin/Makefile +++ b/src/segmenterbin/Makefile @@ -17,7 +17,9 @@ BINFILES = segmentation-copy segmentation-get-stats \ segmentation-create-overlapped-subsegments \ segmentation-intersect-segments \ segmentation-init-from-additive-signals-info \ - class-counts-per-frame-to-labels#\ + class-counts-per-frame-to-labels \ + agglomerative-cluster-ib \ + intersect-int-vectors #\ gmm-acc-pdf-stats-segmentation \ gmm-est-segmentation gmm-update-segmentation \ segmentation-init-from-diarization \ diff --git a/src/segmenterbin/segmentation-copy.cc b/src/segmenterbin/segmentation-copy.cc index e3384170805..b7e215b55f8 100644 --- a/src/segmenterbin/segmentation-copy.cc +++ b/src/segmenterbin/segmentation-copy.cc @@ -54,7 +54,8 @@ int main(int argc, char *argv[]) { "Write in binary mode " "(only relevant if output is a wxfilename)"); po.Register("label-map", &label_map_rxfilename, - "File with mapping from old to new labels"); + "File with mapping from old to new labels. " + "If new label is -1, then that segment is removed."); po.Register("frame-subsampling-factor", &frame_subsampling_factor, "Change frame rate by this factor"); po.Register("utt2label-map-rspecifier", &utt2label_map_rspecifier, diff --git a/src/segmenterbin/segmentation-init-from-additive-signals-info.cc b/src/segmenterbin/segmentation-init-from-additive-signals-info.cc index ccddb4c2b60..abf5aed219b 100644 --- a/src/segmenterbin/segmentation-init-from-additive-signals-info.cc +++ b/src/segmenterbin/segmentation-init-from-additive-signals-info.cc @@ -29,13 +29,13 @@ int main(int argc, char *argv[]) { const char *usage = "Convert overlapping segments information into segmentation\n" "\n" - "Usage: segmentation-init-from-additive-signals-info [options] " + "Usage: segmentation-init-from-additive-signals-info [options] " " \n" " e.g.: segmentation-init-from-additive-signals-info --additive-signals-segmentation-rspecifier=ark:utt_segmentation.ark " - "ark:reco_segmentation.ark ark,t:overlapped_segments_info.txt ark:-\n"; + "ark,t:overlapped_segments_info.txt ark:-\n"; BaseFloat frame_shift = 0.01; - int32 junk_label = -1; + int32 junk_label = -2; std::string lengths_rspecifier; std::string additive_signals_segmentation_rspecifier; @@ -50,42 +50,35 @@ int main(int argc, char *argv[]) { "Archive of segmentation of the additive signal which will used " "instead of an all 1 segmentation"); po.Register("junk-label", &junk_label, - "If specified, then unreliable regions are labeled with this " - "label"); + "The unreliable regions are labeled with this label"); po.Read(argc, argv); - if (po.NumArgs() != 3) { + if (po.NumArgs() != 2) { po.PrintUsage(); exit(1); } - std::string reco_segmentation_rspecifier = po.GetArg(1), - additive_signals_info_rspecifier = po.GetArg(2), - segmentation_wspecifier = po.GetArg(3); + std::string additive_signals_info_rspecifier = po.GetArg(1), + segmentation_wspecifier = po.GetArg(2); - SequentialSegmentationReader reco_segmentation_reader(reco_segmentation_rspecifier); - RandomAccessTokenVectorReader additive_signals_info_reader(additive_signals_info_rspecifier); + SequentialTokenVectorReader additive_signals_info_reader( + additive_signals_info_rspecifier); SegmentationWriter writer(segmentation_wspecifier); - RandomAccessSegmentationReader additive_signals_segmentation_reader(additive_signals_segmentation_rspecifier); - + RandomAccessSegmentationReader additive_signals_segmentation_reader( + additive_signals_segmentation_rspecifier); RandomAccessInt32Reader lengths_reader(lengths_rspecifier); - int32 num_done = 0, num_err = 0, num_missing = 0; + int32 num_done = 0, num_err = 0; - for (; !reco_segmentation_reader.Done(); reco_segmentation_reader.Next()) { - const std::string &key = reco_segmentation_reader.Key(); - - if (!additive_signals_info_reader.HasKey(key)) { - KALDI_WARN << "Could not find additive_signals_info for key " << key; - num_missing++; - continue; - } + for (; !additive_signals_info_reader.Done(); + additive_signals_info_reader.Next()) { + const std::string &key = additive_signals_info_reader.Key(); const std::vector &additive_signals_info = - additive_signals_info_reader.Value(key); + additive_signals_info_reader.Value(); - Segmentation segmentation(reco_segmentation_reader.Value()); + Segmentation segmentation; for (size_t i = 0; i < additive_signals_info.size(); i++) { std::vector parts; @@ -107,7 +100,9 @@ int main(int argc, char *argv[]) { if (!additive_signals_segmentation_reader.HasKey(utt_id)) { KALDI_WARN << "Could not find utterance " << utt_id << " in " - << "segmentation " << additive_signals_segmentation_rspecifier; + << "segmentation " + << additive_signals_segmentation_rspecifier + << ". Assiginng the segment --junk-label."; if (duration < 0) { KALDI_ERR << "duration < 0 for utt_id " << utt_id << " in " << "additive_signals_info " @@ -143,14 +138,14 @@ int main(int argc, char *argv[]) { } KALDI_LOG << "Successfully processed " << num_done << " recordings " - << " in additive signals info; failed for " << num_missing - << "; could not get segmentation for " << num_err; + << " in additive signals info" + << "; could not get segmentation for " << num_err + << "additive signals."; - return (num_done > (num_missing/ 2) ? 0 : 1); + return (num_done > num_err / 2 ? 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); return -1; } } - diff --git a/src/segmenterbin/segmentation-init-from-segments.cc b/src/segmenterbin/segmentation-init-from-segments.cc index c39996b5ef4..469b4ef2965 100644 --- a/src/segmenterbin/segmentation-init-from-segments.cc +++ b/src/segmenterbin/segmentation-init-from-segments.cc @@ -27,15 +27,15 @@ // Beta-001 Beta 0.50 2.66 // Beta-002 Beta 3.50 5.20 // the output segmentation will contain -// Alpha-001 [ 0 16 1 ] -// Alpha-002 [ 0 360 1 ] -// Beta-001 [ 0 216 1 ] -// Beta-002 [ 0 170 1 ] +// Alpha-001 [ 0 15 1 ] +// Alpha-002 [ 0 359 1 ] +// Beta-001 [ 0 215 1 ] +// Beta-002 [ 0 169 1 ] // If --shift-to-zero=false is provided, then the output will contain -// Alpha-001 [ 0 16 1 ] -// Alpha-002 [ 150 410 1 ] -// Beta-001 [ 50 266 1 ] -// Beta-002 [ 350 520 1 ] +// Alpha-001 [ 0 15 1 ] +// Alpha-002 [ 150 409 1 ] +// Beta-001 [ 50 265 1 ] +// Beta-002 [ 350 519 1 ] // // If the following utt2label-rspecifier was provided: // Alpha-001 2 @@ -43,10 +43,10 @@ // Beta-001 4 // Beta-002 4 // then the output segmentation will contain -// Alpha-001 [ 0 16 2 ] -// Alpha-002 [ 0 360 2 ] -// Beta-001 [ 0 216 4 ] -// Beta-002 [ 0 170 4 ] +// Alpha-001 [ 0 15 2 ] +// Alpha-002 [ 0 359 2 ] +// Beta-001 [ 0 215 4 ] +// Beta-002 [ 0 169 4 ] int main(int argc, char *argv[]) { try { @@ -153,15 +153,16 @@ int main(int argc, char *argv[]) { segment_label = utt2label_reader.Value(utt); } - int32 length = round((end - frame_overlap)/ frame_shift) - - round(start / frame_shift); - - if (shift_to_zero) - segmentation.EmplaceBack(0, length, segment_label); - else - segmentation.EmplaceBack(round(start / frame_shift), - round((end-frame_overlap) / frame_shift) - 1, - segment_label); + if (shift_to_zero) { + int32 last_frame = (end-frame_overlap) / frame_shift + - start / frame_shift - 1; + segmentation.EmplaceBack(0, last_frame, segment_label); + } else { + segmentation.EmplaceBack( + static_cast(start / frame_shift + 0.5), + static_cast((end-frame_overlap) / frame_shift - 0.5), + segment_label); + } writer.Write(utt, segmentation); num_done++; diff --git a/src/segmenterbin/segmentation-remove-segments.cc b/src/segmenterbin/segmentation-remove-segments.cc index ce3ef2de6fd..27af1420e54 100644 --- a/src/segmenterbin/segmentation-remove-segments.cc +++ b/src/segmenterbin/segmentation-remove-segments.cc @@ -45,6 +45,7 @@ int main(int argc, char *argv[]) { bool binary = true; int32 remove_label = -1; + int32 max_remove_length = -1; std::string remove_labels_rspecifier = ""; ParseOptions po(usage); @@ -55,6 +56,11 @@ int main(int argc, char *argv[]) { po.Register("remove-label", &remove_label, "Remove segments of this label"); po.Register("remove-labels-rspecifier", &remove_labels_rspecifier, "Specify colon separated list of labels for each key"); + po.Register("max-remove-length", &max_remove_length, + "If supplied, this specifies the maximum length of segments " + "will be removed. A value of -1 specifies a length of " + "+infinity i.e. segments will be removed based " + "on only their labels and irrespective of their lengths."); po.Read(argc, argv); @@ -135,7 +141,7 @@ int main(int argc, char *argv[]) { remove_label = remove_labels[0]; - RemoveSegments(remove_labels, &segmentation); + RemoveSegments(remove_labels, max_remove_length, &segmentation); } else { RemoveSegments(remove_label, &segmentation); } From 31a3e79e7c539aafdfd862d906ae76d861966294 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 19 Jan 2017 18:07:58 -0500 Subject: [PATCH 304/530] asr_diarzation: Rename get_subsegmented_feats.sh --- egs/wsj/s5/utils/data/get_subsegment_feats.sh | 47 +------------------ .../s5/utils/data/get_subsegmented_feats.sh | 46 ++++++++++++++++++ 2 files changed, 47 insertions(+), 46 deletions(-) mode change 100755 => 120000 egs/wsj/s5/utils/data/get_subsegment_feats.sh create mode 100755 egs/wsj/s5/utils/data/get_subsegmented_feats.sh diff --git a/egs/wsj/s5/utils/data/get_subsegment_feats.sh b/egs/wsj/s5/utils/data/get_subsegment_feats.sh deleted file mode 100755 index 6baba68eedd..00000000000 --- a/egs/wsj/s5/utils/data/get_subsegment_feats.sh +++ /dev/null @@ -1,46 +0,0 @@ -#! /bin/bash - -# Copyright 2016 Johns Hopkins University (Author: Dan Povey) -# 2016 Vimal Manohar -# Apache 2.0. - -if [ $# -ne 4 ]; then - echo "This scripts gets subsegmented_feats (by adding ranges to data/feats.scp) " - echo "for the subsegments file. This is does one part of the " - echo "functionality in subsegment_data_dir.sh, which additionally " - echo "creates a new subsegmented data directory." - echo "Usage: $0 " - echo " e.g.: $0 data/train/feats.scp 0.01 0.015 subsegments" - exit 1 -fi - -feats=$1 -frame_shift=$2 -frame_overlap=$3 -subsegments=$4 - -# The subsegments format is . -# e.g. 'utt_foo-1 utt_foo 7.21 8.93' -# The first awk command replaces this with the format: -# -# e.g. 'utt_foo-1 utt_foo 721 893' -# and the apply_map.pl command replaces 'utt_foo' (the 2nd field) with its corresponding entry -# from the original wav.scp, so we get a line like: -# e.g. 'utt_foo-1 foo-bar.ark:514231 721 892' -# Note: the reason we subtract one from the last time is that it's going to -# represent the 'last' frame, not the 'end' frame [i.e. not one past the last], -# in the matlab-like, but zero-indexed [first:last] notion. For instance, a segment with 1 frame -# would have start-time 0.00 and end-time 0.01, which would become the frame range -# [0:0] -# The second awk command turns this into something like -# utt_foo-1 foo-bar.ark:514231[721:892] -# It has to be a bit careful because the format actually allows for more general things -# like pipes that might contain spaces, so it has to be able to produce output like the -# following: -# utt_foo-1 some command|[721:892] -# Lastly, utils/data/normalize_data_range.pl will only do something nontrivial if -# the original data-dir already had data-ranges in square brackets. -awk -v s=$frame_shift -v fovlp=$frame_overlap '{print $1, $2, int(($3/s)+0.5), int(($4-fovlp)/s+0.5);}' <$subsegments| \ - utils/apply_map.pl -f 2 $feats | \ - awk '{p=NF-1; for (n=1;n " + echo " e.g.: $0 data/train/feats.scp 0.01 0.015 subsegments" + exit 1 +fi + +feats=$1 +frame_shift=$2 +frame_overlap=$3 +subsegments=$4 + +# The subsegments format is . +# e.g. 'utt_foo-1 utt_foo 7.21 8.93' +# The first awk command replaces this with the format: +# +# e.g. 'utt_foo-1 utt_foo 721 893' +# and the apply_map.pl command replaces 'utt_foo' (the 2nd field) with its corresponding entry +# from the original wav.scp, so we get a line like: +# e.g. 'utt_foo-1 foo-bar.ark:514231 721 892' +# Note: the reason we subtract one from the last time is that it's going to +# represent the 'last' frame, not the 'end' frame [i.e. not one past the last], +# in the matlab-like, but zero-indexed [first:last] notion. For instance, a segment with 1 frame +# would have start-time 0.00 and end-time 0.01, which would become the frame range +# [0:0] +# The second awk command turns this into something like +# utt_foo-1 foo-bar.ark:514231[721:892] +# It has to be a bit careful because the format actually allows for more general things +# like pipes that might contain spaces, so it has to be able to produce output like the +# following: +# utt_foo-1 some command|[721:892] +# Lastly, utils/data/normalize_data_range.pl will only do something nontrivial if +# the original data-dir already had data-ranges in square brackets. +awk -v s=$frame_shift -v fovlp=$frame_overlap '{print $1, $2, int(($3/s)+0.5), int(($4-fovlp)/s+0.5);}' <$subsegments| \ + utils/apply_map.pl -f 2 $feats | \ + awk '{p=NF-1; for (n=1;n Date: Thu, 19 Jan 2017 18:09:10 -0500 Subject: [PATCH 305/530] asr_diarization: Modify utt2num_frames etc. --- egs/wsj/s5/utils/data/get_reco2num_frames.sh | 2 +- egs/wsj/s5/utils/data/get_utt2num_frames.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/egs/wsj/s5/utils/data/get_reco2num_frames.sh b/egs/wsj/s5/utils/data/get_reco2num_frames.sh index 8df5afdb156..edb16609703 100755 --- a/egs/wsj/s5/utils/data/get_reco2num_frames.sh +++ b/egs/wsj/s5/utils/data/get_reco2num_frames.sh @@ -15,7 +15,7 @@ fi data=$1 -if [ -f $data/reco2num_frames ]; then +if [ -s $data/reco2num_frames ]; then echo "$0: $data/reco2num_frames already present!" exit 0; fi diff --git a/egs/wsj/s5/utils/data/get_utt2num_frames.sh b/egs/wsj/s5/utils/data/get_utt2num_frames.sh index e2921601ec9..3f6d15c45a5 100755 --- a/egs/wsj/s5/utils/data/get_utt2num_frames.sh +++ b/egs/wsj/s5/utils/data/get_utt2num_frames.sh @@ -31,12 +31,12 @@ if [ ! -f $data/feats.scp ]; then exit 0 fi -utils/split_data.sh $data $nj || exit 1 +utils/split_data.sh --per-utt $data $nj || exit 1 $cmd JOB=1:$nj $data/log/get_utt2num_frames.JOB.log \ - feat-to-len scp:$data/split${nj}/JOB/feats.scp ark,t:$data/split$nj/JOB/utt2num_frames || exit 1 + feat-to-len scp:$data/split${nj}utt/JOB/feats.scp ark,t:$data/split${nj}utt/JOB/utt2num_frames || exit 1 for n in `seq $nj`; do - cat $data/split$nj/$n/utt2num_frames + cat $data/split${nj}utt/$n/utt2num_frames done > $data/utt2num_frames echo "$0: Computed and wrote $data/utt2num_frames" From 665642edb27c92fa6be27073d609d76385d6780c Mon Sep 17 00:00:00 2001 From: Yiming Wang Date: Thu, 19 Jan 2017 23:25:05 -0500 Subject: [PATCH 306/530] [src] nnet3: removed the declaration of SetZero() in nnet-utils.h (#1358) --- src/nnet3/nnet-utils.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index 2bda0c623b6..f98782a6a22 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -53,17 +53,6 @@ int32 NumOutputNodes(const Nnet &nnet); /// returns the number of input nodes of this nnet. int32 NumInputNodes(const Nnet &nnet); -/// Calls SetZero (with the given is_gradient parameter) on all updatable -/// components of the nnet; calls ZeroComponentStats on all other components -/// that inherit from NonlinearComponent; and (just in case) calls Scale(0.0) on -/// all other components. -/// It's the same as ScaleNnet(0.0, nnet) except that if is_gradient is true it -/// can set the is_gradient_ flag on updatable components [to force simple -/// update]; and unlike ScaleNnet(0.0, nnet) it will get rid of NaNs that have -/// crept into the parameters or stats. -void SetZero(bool is_gradient, - Nnet *nnet); - /// Calls PerturbParams (with the given stddev) on all updatable components of /// the nnet. void PerturbParams(BaseFloat stddev, From 522acd834c71d12591ff58fffb2ddf535e66304a Mon Sep 17 00:00:00 2001 From: Kirill Katsnelson Date: Fri, 20 Jan 2017 00:03:08 -0800 Subject: [PATCH 307/530] [build] Enable Travis CI on the 'shortcut' branch (#1359) --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 85bbc7a52e4..d3ad85363ce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,7 @@ addons: branches: only: - master + - shortcut before_install: - cat /proc/sys/kernel/core_pattern From d117c955d1a5b930e5a2507310cb3962feb4bbb6 Mon Sep 17 00:00:00 2001 From: Ke Li Date: Fri, 20 Jan 2017 16:18:38 -0500 Subject: [PATCH 308/530] [src] Some style-related fixes (and fix compiler warnings) in src/lm/ --- src/lm/arpa-file-parser-test.cc | 32 ++++++++++++++++---------------- src/lm/mikolov-rnnlm-lib.cc | 24 ++++++++++++------------ src/lm/mikolov-rnnlm-lib.h | 2 +- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/lm/arpa-file-parser-test.cc b/src/lm/arpa-file-parser-test.cc index 51af0bea2bf..8c21512677d 100644 --- a/src/lm/arpa-file-parser-test.cc +++ b/src/lm/arpa-file-parser-test.cc @@ -165,18 +165,18 @@ ngram 2=2\n\ ngram 3=2\n\ \n\ \\1-grams:\n\ --5.2 4 -3.3\n\ --3.4 5\n\ -0 1 -2.5\n\ --4.3 2\n\ +-5.2\t4\t-3.3\n\ +-3.4\t5\n\ +0\t1\t-2.5\n\ +-4.3\t2\n\ \n\ \\2-grams:\n\ --1.4 4 5 -3.2\n\ --1.3 1 4 -4.2\n\ +-1.4\t4 5\t-3.2\n\ +-1.3\t1 4\t-4.2\n\ \n\ \\3-grams:\n\ --0.3 1 4 5\n\ --0.2 4 5 2\n\ +-0.3\t1 4 5\n\ +-0.2\t4 5 2\n\ \n\ \\end\\"; @@ -220,18 +220,18 @@ ngram 2=2\n\ ngram 3=2\n\ \n\ \\1-grams: \n\ --5.2 a -3.3\n\ --3.4 \xCE\xB2\n\ -0.0 -2.5\n\ --4.3 \n\ +-5.2\ta\t-3.3\n\ +-3.4\t\xCE\xB2\n\ +0.0\t\t-2.5\n\ +-4.3\t\n\ \n\ \\2-grams:\t\n\ --1.5 a \xCE\xB2 -3.2\n\ --1.3 a -4.2\n\ +-1.5\ta \xCE\xB2\t-3.2\n\ +-1.3\t a\t-4.2\n\ \n\ \\3-grams:\n\ --0.3 a \xCE\xB2\n\ --0.2 a \n\ +-0.3\t a \xCE\xB2\n\ +-0.2\t a \n\ \\end\\"; // Symbol table that is created with predefined test symbols, "a" but no "b". diff --git a/src/lm/mikolov-rnnlm-lib.cc b/src/lm/mikolov-rnnlm-lib.cc index b1abb29dee7..645f76c22d1 100644 --- a/src/lm/mikolov-rnnlm-lib.cc +++ b/src/lm/mikolov-rnnlm-lib.cc @@ -376,13 +376,13 @@ void CRnnLM::initNet() { } syn_d = - reinterpret_cast(calloc(static_cast(direct_size), + reinterpret_cast(calloc(static_cast(direct_size), sizeof(direct_t))); if (syn_d == NULL) { printf("Memory allocation for direct" - " connections failed (requested %ld bytes)\n", - static_cast(direct_size) * static_cast(sizeof(direct_t))); + " connections failed (requested %lld bytes)\n", + static_cast(direct_size) * static_cast(sizeof(direct_t))); exit(1); } @@ -461,7 +461,7 @@ void CRnnLM::initNet() { } } - int64 aa; + long long aa; for (aa = 0; aa < direct_size; aa++) { syn_d[aa] = 0; } @@ -621,7 +621,7 @@ void CRnnLM::restoreNet() { // will read whole network structure fscanf(fi, "%d", &layer2_size); if (ver > 5) { goToDelimiter(':', fi); - fscanf(fi, "%ld", &direct_size); + fscanf(fi, "%lld", &direct_size); } if (ver > 6) { goToDelimiter(':', fi); @@ -760,14 +760,14 @@ void CRnnLM::restoreNet() { // will read whole network structure } if (filetype == TEXT) { goToDelimiter(':', fi); // direct conenctions - int64 aa; + long long aa; for (aa = 0; aa < direct_size; aa++) { fscanf(fi, "%lf", &d); syn_d[aa] = d; } } if (filetype == BINARY) { - int64 aa; + long long aa; for (aa = 0; aa < direct_size; aa++) { fread(&fl, 4, 1, fi); syn_d[aa] = fl; @@ -982,7 +982,7 @@ void CRnnLM::computeNet(int last_word, int word) { // apply direct connections to classes if (direct_size > 0) { - uint64 hash[MAX_NGRAM_ORDER]; + unsigned long long hash[MAX_NGRAM_ORDER]; // this will hold pointers to syn_d that contains hash parameters for (a = 0; a < direct_order; a++) { @@ -997,7 +997,7 @@ void CRnnLM::computeNet(int last_word, int word) { for (b = 1; b <= a; b++) { hash[a] += PRIMES[(a * PRIMES[b] + b) % PRIMES_SIZE] - * static_cast(history[b - 1] + 1); + * static_cast(history[b - 1] + 1); } // update hash value based on words from the history @@ -1061,7 +1061,7 @@ void CRnnLM::computeNet(int last_word, int word) { // apply direct connections to words if (word != -1) if (direct_size > 0) { - uint64 hash[MAX_NGRAM_ORDER]; + unsigned long long hash[MAX_NGRAM_ORDER]; for (a = 0; a < direct_order; a++) { hash[a] = 0; @@ -1072,11 +1072,11 @@ void CRnnLM::computeNet(int last_word, int word) { if (a > 0) if (history[a - 1] == -1) break; hash[a] = PRIMES[0] * PRIMES[1] * - static_cast(vocab[word].class_index + 1); + static_cast(vocab[word].class_index + 1); for (b = 1; b <= a; b++) { hash[a] += PRIMES[(a * PRIMES[b] + b) % PRIMES_SIZE] - * static_cast(history[b - 1] + 1); + * static_cast(history[b - 1] + 1); } hash[a] = (hash[a] % (direct_size / 2)) + (direct_size) / 2; } diff --git a/src/lm/mikolov-rnnlm-lib.h b/src/lm/mikolov-rnnlm-lib.h index fb9c340416b..36d88a0a5d0 100644 --- a/src/lm/mikolov-rnnlm-lib.h +++ b/src/lm/mikolov-rnnlm-lib.h @@ -143,7 +143,7 @@ class CRnnLM { int layerc_size; int layer2_size; - int64 direct_size; + long long direct_size; int direct_order; int history[MAX_NGRAM_ORDER]; From b9c1e8bcee6c4deb7081e5e041c1dc1a13a7d3ad Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 20 Jan 2017 19:37:39 -0500 Subject: [PATCH 309/530] [src] Modify some tests that were slow on travis, to make them faster. (#1347) --- src/chain/chain-supervision-test.cc | 2 +- src/cudamatrix/cu-math-test.cc | 8 ++++- src/cudamatrix/cu-matrix-speed-test.cc | 4 +-- src/feat/pitch-functions-test.cc | 20 ++++++------ src/feat/signal-test.cc | 8 ++--- src/gmm/ebw-diag-gmm-test.cc | 37 +++++++++++------------ src/ivector/plda-test.cc | 16 +++++----- src/nnet3/natural-gradient-online-test.cc | 2 +- src/transform/fmllr-raw-test.cc | 14 ++++----- 9 files changed, 58 insertions(+), 53 deletions(-) diff --git a/src/chain/chain-supervision-test.cc b/src/chain/chain-supervision-test.cc index 0f0a3009ccd..e38fbca745f 100644 --- a/src/chain/chain-supervision-test.cc +++ b/src/chain/chain-supervision-test.cc @@ -615,7 +615,7 @@ int main() { else CuDevice::Instantiate().SelectGpuId("yes"); #endif - for (int32 i = 0; i < 5; i++) { + for (int32 i = 0; i < 3; i++) { kaldi::chain::ChainSupervisionTest(); kaldi::chain::BreadthFirstTest(); } diff --git a/src/cudamatrix/cu-math-test.cc b/src/cudamatrix/cu-math-test.cc index c7a01cf9aa6..9a78c652745 100644 --- a/src/cudamatrix/cu-math-test.cc +++ b/src/cudamatrix/cu-math-test.cc @@ -161,7 +161,7 @@ static void UnitTestCuMathComputeLstmNonlinearity() { AssertEqual(Houtput, HDoutput); } - for (int i = 16; i <= 2048; i *= 2) { + for (int i = 16; i <= 1024; i *= 2) { BaseFloat time_in_secs = 0.025; int32 num_rows = i; int32 cell_dim = i; @@ -180,6 +180,8 @@ static void UnitTestCuMathComputeLstmNonlinearity() { KALDI_LOG << "For ComputeLstmNonlinearity" << (sizeof(Real)==8 ? "" : "") << ", for dim = " << i << ", speed was " << gflops << " gigaflops"; + if (tim.Elapsed() > 0.05) + break; } } @@ -441,6 +443,8 @@ static void UnitTestBackpropLstmNonlinearity() { KALDI_LOG << "For BackpropLstmNonlinearity" << (sizeof(Real) == 8 ? "" : "") << ", for dim = " << i << ", speed was " << gflops << " gigaflops"; + if (tim.Elapsed() > 0.05) + break; } } @@ -509,6 +513,8 @@ static void UnitTestCuMathNormalizePerRow() { KALDI_LOG << "For CuMatrix::NormalizePerRow" << (sizeof(Real)==8?"":"") << ", for dim = " << dim << ", speed was " << gflops << " gigaflops."; + if (tim.Elapsed() > 0.05) + break; } } diff --git a/src/cudamatrix/cu-matrix-speed-test.cc b/src/cudamatrix/cu-matrix-speed-test.cc index 37257522fa8..032351564c0 100644 --- a/src/cudamatrix/cu-matrix-speed-test.cc +++ b/src/cudamatrix/cu-matrix-speed-test.cc @@ -998,7 +998,7 @@ template void CudaMatrixSpeedTest() { TestCuMatrixAddMatBlocks(sizes[s], 3, 3); for (int32 s = 0; s < ns; s++) TestCuMatrixMatMat(sizes[s]); - for (int32 s = 0; s < ns; s++) + for (int32 s = 0; s + 1 < ns; s++) TestCuMatrixMatMatBatched(sizes[s], 10); for (int32 s = 0; s < ns; s++) { TestCuMatrixAddDiagVecMat(sizes[s], kNoTrans); @@ -1050,7 +1050,7 @@ template void CudaMatrixSpeedTest() { TestCuMatrixCopyUpperToLower(sizes[s]); for (int32 s = 0; s < ns; s++) TestCuMatrixSetZeroAboveDiag(sizes[s]); - for (int32 s = 0; s < ns; s++) + for (int32 s = 0; s + 2 < ns; s++) TestCuMatrixLookup(sizes[s]); for (int32 s = 0; s < ns; s++) TestCuMatrixCopyRows1(sizes[s]); diff --git a/src/feat/pitch-functions-test.cc b/src/feat/pitch-functions-test.cc index 616dbc68d54..098e590a8e9 100644 --- a/src/feat/pitch-functions-test.cc +++ b/src/feat/pitch-functions-test.cc @@ -84,10 +84,10 @@ static void UnitTestSnipEdges() { KALDI_ASSERT(wave.Data().NumRows() == 1); SubVector waveform(wave.Data(), 0); - // Process files with snip edge enabled or disabled, on various + // Process files with snip edge enabled or disabled, on various // frame shifts and frame lengths - for (int fs = 1; fs <= 10; fs++) { - for (int wl = 20; wl <= 100; wl += 10) { + for (int fs = 4; fs <= 10; fs += 2) { + for (int wl = 20; wl <= 100; wl += 20) { // Rather dirty way to round, but works fine int32 ms_fs = (int32)(wave.SampFreq() * 0.001 * fs + 0.5); int32 ms_wl = (int32)(wave.SampFreq() * 0.001 * wl + 0.5); @@ -99,11 +99,11 @@ static void UnitTestSnipEdges() { op_NoSnipEdges.frame_length_ms = wl; ComputeAndProcessKaldiPitch(op_SnipEdges, opp, waveform, &m1); ComputeAndProcessKaldiPitch(op_NoSnipEdges, opp, waveform, &m2); - + // Check the output differ in a predictable manner: // 1. The length of the output should only depend on the window size & window shift KALDI_LOG << "Output: " << m1.NumRows() << " ; " << m2.NumRows(); - // - with snip edges disabled, depends on file size and frame shift only */ + // - with snip edges disabled, depends on file size and frame shift only */ AssertEqual(m2.NumRows(), ((int)(wave.Data().NumCols() + ms_fs / 2)) / ms_fs); // - with snip edges disabled, depend on file size, frame shift, frame length */ AssertEqual(m1.NumRows(), ((int)(wave.Data().NumCols() - ms_wl + ms_fs)) / ms_fs); @@ -117,7 +117,7 @@ static void UnitTestSnipEdges() { int32 blag = -1; int32 max_lag = wl / fs * 2; int num_frames_f0 = m1.NumRows() - max_lag; - + /* Looks for the best correlation between the output signals, identify the lag, compares it with theoretical value */ SubVector sub_vec1(f0_1, 0, num_frames_f0); @@ -129,9 +129,9 @@ static void UnitTestSnipEdges() { blag = lag; } } - KALDI_LOG << "Best lag: " << blag * fs << "ms with value: " << bcorr << + KALDI_LOG << "Best lag: " << blag * fs << "ms with value: " << bcorr << "; expected lag: " << wl / 2 + 10 - fs / 2 << " ± " << fs; - // BP: the lag should in theory be equal to wl / 2 - fs / 2, but it seems + // BP: the lag should in theory be equal to wl / 2 - fs / 2, but it seems // to be: wl / 2 + 10 - fs / 2! It appears the 10 ms comes from the nccf_lag which // is 82 samples with the default settings => nccf_lag / resample_freq / 2 => 10.25ms // We should really be using the full_frame_length of the algorithm for accurate results, @@ -230,7 +230,7 @@ static void UnitTestDelay() { ext_opt.nccf_ballast_online = true; // this is necessary for the computation // to be identical regardless how many pieces we break the signal into. - int32 size = 10000 + rand() % 50000; + int32 size = 1000 + rand() % 5000; Vector v(size); // init with noise plus a sine-wave whose frequency is changing randomly. @@ -294,7 +294,7 @@ static void UnitTestSearch() { op.nccf_ballast_online = true; // this is necessary for the computation // to be identical regardless how many pieces we break the signal into. - int32 size = 10000 + rand() % 10000; + int32 size = 1000 + rand() % 1000; Vector v(size); // init with noise plus a sine-wave whose frequency is changing randomly. diff --git a/src/feat/signal-test.cc b/src/feat/signal-test.cc index 39a379040b0..4fcd2aaf2c6 100644 --- a/src/feat/signal-test.cc +++ b/src/feat/signal-test.cc @@ -25,8 +25,8 @@ namespace kaldi { void UnitTestBlockConvolution() { for (int32 i = 0; i < 5; i++) { - int32 signal_length = 4000000 + Rand() % 400000; - int32 filter_length = 10000 + Rand() % 1000; + int32 signal_length = 400000 + Rand() % 40000; + int32 filter_length = 1000 + Rand() % 100; Vector signal(signal_length); Vector filter(filter_length); signal.SetRandn(); @@ -40,8 +40,8 @@ void UnitTestBlockConvolution() { void UnitTestConvolution() { for (int32 i = 0; i < 5; i++) { - int32 signal_length = 40000 + Rand() % 4000; - int32 filter_length = 100 + Rand() % 100; + int32 signal_length = 4000 + Rand() % 400; + int32 filter_length = 10 + Rand() % 10; Vector signal(signal_length); Vector filter(filter_length); signal.SetRandn(); diff --git a/src/gmm/ebw-diag-gmm-test.cc b/src/gmm/ebw-diag-gmm-test.cc index bd334921c78..dfcec0e0fd3 100644 --- a/src/gmm/ebw-diag-gmm-test.cc +++ b/src/gmm/ebw-diag-gmm-test.cc @@ -20,7 +20,7 @@ #include "gmm/diag-gmm.h" -#include "gmm/ebw-diag-gmm.h" +#include "gmm/ebw-diag-gmm.h" #include "util/kaldi-io.h" @@ -28,9 +28,9 @@ namespace kaldi { void UnitTestEstimateMmieDiagGmm() { - size_t dim = 15; // dimension of the gmm + size_t dim = RandInt(5, 10); // dimension of the gmm size_t nMix = 2; // number of mixtures in the data - size_t maxiterations = 20; // number of iterations for estimation + size_t maxiterations = RandInt(2, 5); // number of iterations for estimation // maximum number of densities in the GMM // larger than the number of mixtures in the data @@ -48,7 +48,7 @@ void UnitTestEstimateMmieDiagGmm() { // std::cout << "Gauss " << m << ": Mean = " << means_f.Row(m) << '\n' // << "Vars = " << vars_f.Row(m) << '\n'; } - + // Numerator stats // second, generate 1000 feature vectors for each of the mixture components size_t counter_num = 0, multiple = 200; @@ -96,7 +96,7 @@ void UnitTestEstimateMmieDiagGmm() { // write the feature vectors to a file std::ofstream of("tmpfeats"); of.precision(10); - of << feats_num; + of << feats_num; of.close(); // now generate randomly initial values for the GMM @@ -126,12 +126,12 @@ void UnitTestEstimateMmieDiagGmm() { + ((r/2)%2 == 0 ? kGmmVariances : 0) + ((r/4)%2 == 0 ? kGmmWeights : 0); double tau = (r/8)%2 == 0 ? 100 : 0.0; - + if ((flags & kGmmVariances) && !(flags & kGmmMeans)) { delete gmm; return; // Don't do this case: not supported in the update equations. } - + AccumDiagGmm num; AccumDiagGmm den; @@ -139,7 +139,7 @@ void UnitTestEstimateMmieDiagGmm() { num.SetZero(flags); den.Resize(gmm->NumGauss(), gmm->Dim(), flags); den.SetZero(flags); - + size_t iteration = 0; double last_log_like_diff; while (iteration < maxiterations) { @@ -149,7 +149,7 @@ void UnitTestEstimateMmieDiagGmm() { num.SetZero(flags); den.Resize(gmm->NumGauss(), gmm->Dim(), flags); den.SetZero(flags); - + double loglike_num = 0.0; double loglike_den = 0.0; for (size_t i = 0; i < counter_num; i++) { @@ -182,12 +182,12 @@ void UnitTestEstimateMmieDiagGmm() { << GmmFlagsToString(flags) << ", tau = " << tau << " )"; } last_log_like_diff = loglike_diff; - + AccumDiagGmm num_smoothed(num); IsmoothStatsDiagGmm(num, tau, &num_smoothed); // Apply I-smoothing. - + BaseFloat auxf_gauss, auxf_weight, count; - std::cout << "MEANX: " << gmm->weights() << '\n'; + std::cout << "MEANX: " << gmm->weights() << '\n'; int32 num_floored; UpdateEbwDiagGmm(num_smoothed, den, flags, ebw_opts, @@ -197,21 +197,21 @@ void UnitTestEstimateMmieDiagGmm() { UpdateEbwWeightsDiagGmm(num, den, ebw_weight_opts, gmm, &auxf_weight, &count); } - + // mean_hlp.CopyFromVec(gmm->means_invvars().Row(0)); - // std::cout << "MEANY: " << mean_hlp << '\n'; + // std::cout << "MEANY: " << mean_hlp << '\n'; std::cout << "MEANY: " << gmm->weights() << '\n'; if ((iteration % 3 == 1) && (gmm->NumGauss() * 2 <= maxcomponents)) { gmm->Split(gmm->NumGauss() * 2, 0.001); - std::cout << "Ngauss, Ndim: " << gmm->NumGauss() << " " << gmm->Dim() << '\n'; + std::cout << "Ngauss, Ndim: " << gmm->NumGauss() << " " << gmm->Dim() << '\n'; } - + iteration++; } delete gmm; - + unlink("tmpfeats"); } @@ -219,8 +219,7 @@ void UnitTestEstimateMmieDiagGmm() { int main() { - // repeat the test 20 times - for (int i = 0; i < 20; i++) { + for (int i = 0; i < 5; i++) { kaldi::UnitTestEstimateMmieDiagGmm(); } std::cout << "Test OK.\n"; diff --git a/src/ivector/plda-test.cc b/src/ivector/plda-test.cc index 87560b2a1d2..e2ccb571ef5 100644 --- a/src/ivector/plda-test.cc +++ b/src/ivector/plda-test.cc @@ -23,7 +23,7 @@ namespace kaldi { void UnitTestPldaEstimation(int32 dim) { - int32 num_classes = 4000 + Rand() % 10; + int32 num_classes = 1000 + Rand() % 10; Matrix between_proj(dim, dim); while (between_proj.Cond() > 100) between_proj.SetRandn(); @@ -31,13 +31,13 @@ void UnitTestPldaEstimation(int32 dim) { while (within_proj.Cond() > 100) within_proj.SetRandn(); - + Vector global_mean(dim); global_mean.SetRandn(); global_mean.Scale(10.0); - + PldaStats stats; - + for (int32 n = 0; n < num_classes; n++) { int32 num_egs = 1 + Rand() % 30; Vector rand_vec(dim); @@ -57,8 +57,8 @@ void UnitTestPldaEstimation(int32 dim) { offset_mat); } - - + + SpMatrix between_var(dim), within_var(dim); between_var.AddMat2(1.0, between_proj, kNoTrans, 0.0); within_var.AddMat2(1.0, within_proj, kNoTrans, 0.0); @@ -86,7 +86,7 @@ void UnitTestPldaEstimation(int32 dim) { KALDI_LOG << "Diagonal of between-class variance in normalized space " << "should be: " << s; } - + } } @@ -109,7 +109,7 @@ int main() { UnitTestPldaEstimation(i + 1); // UnitTestPldaEstimation(400); - UnitTestPldaEstimation(80); + UnitTestPldaEstimation(40); std::cout << "Test OK.\n"; return 0; } diff --git a/src/nnet3/natural-gradient-online-test.cc b/src/nnet3/natural-gradient-online-test.cc index 2723ba4742f..7c46dfb3596 100644 --- a/src/nnet3/natural-gradient-online-test.cc +++ b/src/nnet3/natural-gradient-online-test.cc @@ -334,7 +334,7 @@ int main() { else CuDevice::Instantiate().SelectGpuId("optional"); // -2 .. automatic selection #endif - for (int32 i = 0; i < 10; i++) { + for (int32 i = 0; i < 5; i++) { UnitTestPreconditionDirectionsOnline(); } } diff --git a/src/transform/fmllr-raw-test.cc b/src/transform/fmllr-raw-test.cc index dc9d9beeb99..10fa3bae188 100644 --- a/src/transform/fmllr-raw-test.cc +++ b/src/transform/fmllr-raw-test.cc @@ -53,7 +53,7 @@ void UnitTestFmllrRaw(bool use_offset) { InitRandomGmm(&gmm); int32 model_dim = gmm.Dim(); - int32 raw_dim = 10 + Rand() % 5; + int32 raw_dim = 5 + Rand() % 3; int32 num_splice = 1 + Rand() % 5; while (num_splice * raw_dim < model_dim) { num_splice++; @@ -64,7 +64,7 @@ void UnitTestFmllrRaw(bool use_offset) { Matrix rand_points(npoints, full_dim); rand_points.SetRandn(); - + Matrix lda_mllt(full_dim, full_dim + (use_offset ? 1 : 0)); // This is the full LDA+MLLT // matrix. TODO: test with offset. lda_mllt.SetRandn(); @@ -73,7 +73,7 @@ void UnitTestFmllrRaw(bool use_offset) { BaseFloat prev_objf_impr; for (int32 iter = 0; iter < 4; iter++) { - + for (int32 i = 0; i < npoints; i++) { SubVector sample(rand_points, i); accs.AccumulateForGmm(gmm, sample, 1.0); @@ -81,7 +81,7 @@ void UnitTestFmllrRaw(bool use_offset) { Matrix fmllr_mat(raw_dim, raw_dim + 1); fmllr_mat.SetUnit(); // sets diagonal elements to one. - + FmllrRawOptions opts; BaseFloat objf_impr, count; accs.Update(opts, &fmllr_mat, &objf_impr, &count); @@ -94,8 +94,8 @@ void UnitTestFmllrRaw(bool use_offset) { KALDI_ASSERT(objf_impr < prev_objf_impr); } prev_objf_impr = objf_impr; - - + + // Now transform the raw features. for (int32 splice = 0; splice < num_splice; splice++) { SubMatrix raw_feats(rand_points, @@ -115,7 +115,7 @@ void UnitTestFmllrRaw(bool use_offset) { int main() { kaldi::g_kaldi_verbose_level = 5; - + for (int i = 0; i < 2; i++) { // did more iterations when first testing... kaldi::UnitTestFmllrRaw(i % 2 == 0); } From ef755720fbfc32a27190b431f93a85ae953e83eb Mon Sep 17 00:00:00 2001 From: Shiyin Kang Date: Sun, 22 Jan 2017 02:23:49 +0800 Subject: [PATCH 310/530] [src] Implement CuMatrixBase::CopyColsFromVec() (#1361) --- src/cudamatrix/cu-kernels-ansi.h | 4 ++++ src/cudamatrix/cu-kernels.cu | 22 ++++++++++++++++++ src/cudamatrix/cu-kernels.h | 9 ++++++++ src/cudamatrix/cu-matrix-test.cc | 35 +++++++++++++++++++++++++--- src/cudamatrix/cu-matrix.cc | 39 +++++++++++++++++++++++++++++++- src/cudamatrix/cu-matrix.h | 5 ++++ 6 files changed, 110 insertions(+), 4 deletions(-) diff --git a/src/cudamatrix/cu-kernels-ansi.h b/src/cudamatrix/cu-kernels-ansi.h index 878ba216407..116428ea82c 100644 --- a/src/cudamatrix/cu-kernels-ansi.h +++ b/src/cudamatrix/cu-kernels-ansi.h @@ -697,6 +697,10 @@ void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, const int self_repair_sum_out_stride); +void cudaD_copy_cols_from_vec(dim3 Gr, dim3 Bl, double *mat_out, + MatrixDim d_out, const double *v_in); +void cudaF_copy_cols_from_vec(dim3 Gr, dim3 Bl, float *mat_out, MatrixDim d_out, + const float *v_in); } // extern "C" diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index 505c6f7f67f..abb4efd47ef 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -745,6 +745,18 @@ static void _copy_rows_from_vec(Real* m_out, MatrixDim d, const Real* v_in) { } } +// This kernel writes a copy of the vector "v_in" to each col of the matrix +// "m_out". the dimension of v_in should be equal to the #row of m_out. +template +__global__ +static void _copy_cols_from_vec(Real* m_out, MatrixDim d, const Real* v_in) { + int i = blockIdx.y * blockDim.y + threadIdx.y; // row id + int j = blockIdx.x * blockDim.x + threadIdx.x; // col id + if (i < d.rows && j < d.cols) { + m_out[i * d.stride + j] = v_in[i]; + } +} + // _trace_mat_mat reduce the partial sum to // value[blockIdx.y * gridDim.x + blockIdx.x] // It use shared mem to transpose matrix B to ensure coalesced memory access @@ -4644,3 +4656,13 @@ void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, value_sum_out_stride, deriv_sum_out, deriv_sum_out_stride, self_repair_sum_out, self_repair_sum_out_stride); } + + +void cudaD_copy_cols_from_vec(dim3 Gr, dim3 Bl, double *mat_out, + MatrixDim d_out, const double *v_in) { + _copy_cols_from_vec<<>>(mat_out, d_out, v_in); +} +void cudaF_copy_cols_from_vec(dim3 Gr, dim3 Bl, float *mat_out, MatrixDim d_out, + const float *v_in) { + _copy_cols_from_vec<<>>(mat_out, d_out, v_in); +} diff --git a/src/cudamatrix/cu-kernels.h b/src/cudamatrix/cu-kernels.h index 9e9910d6f56..649a25ab67e 100644 --- a/src/cudamatrix/cu-kernels.h +++ b/src/cudamatrix/cu-kernels.h @@ -1331,6 +1331,15 @@ inline void cuda_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, self_repair_sum_out_stride); } +inline void cuda_copy_cols_from_vec(dim3 Gr, dim3 Bl, double *mat_out, + MatrixDim d_out, const double *v_in) { + cudaD_copy_cols_from_vec(Gr, Bl, mat_out, d_out, v_in); +} +inline void cuda_copy_cols_from_vec(dim3 Gr, dim3 Bl, float *mat_out, + MatrixDim d_out, const float *v_in) { + cudaF_copy_cols_from_vec(Gr, Bl, mat_out, d_out, v_in); +} + } // namespace kaldi #endif // HAVE_CUDA diff --git a/src/cudamatrix/cu-matrix-test.cc b/src/cudamatrix/cu-matrix-test.cc index 72abace138d..a6f84f3f6aa 100644 --- a/src/cudamatrix/cu-matrix-test.cc +++ b/src/cudamatrix/cu-matrix-test.cc @@ -376,6 +376,31 @@ static void UnitTestCuMatrixCopyRowsFromVec() { } +template +static void UnitTestCuMatrixCopyColsFromVec() { + for (int32 p = 0; p < 2; p++) { + int32 num_rows = 100 + Rand() % 255; + int32 num_cols = 100 + Rand() % 200; + + int32 vec_dim; + if (p % 2 == 0) vec_dim = num_rows; + else vec_dim = num_cols * num_rows; + + CuVector cu_vec(vec_dim); + cu_vec.SetRandn(); + Vector vec(cu_vec); + + CuMatrix cu_mat(num_rows, num_cols); + cu_mat.CopyColsFromVec(cu_vec); + Matrix mat(num_rows, num_cols); + mat.CopyColsFromVec(vec); + + Matrix mat2(cu_mat); + AssertEqual(mat, mat2); + } +} + + template static void UnitTestCuMatrixCopyRows() { for (int32 p = 0; p < 2; p++) { @@ -1574,7 +1599,7 @@ static void UnitTestCuMatrixAddMatTp() { template static void UnitTestCuMatrixTranspose() { - for (int32 i = 1; i < 10; i++) { + for (int32 i = 1; i < 2; i++) { MatrixIndexT dimM = 5 * i + Rand() % 10, dimN = dimM; if (i % 2 == 0) dimN += 5; @@ -1582,8 +1607,11 @@ static void UnitTestCuMatrixTranspose() { CuMatrix A(dimM, dimN); A.SetRandn(); CuMatrix B(A, kTrans); - A.Transpose(); - AssertEqual(A, B); + + Matrix hA(A); + Matrix hB(B); + hB.Transpose(); + AssertEqual(hA, hB); } } @@ -2615,6 +2643,7 @@ template void CudaMatrixUnitTest() { UnitTestCuMatrixSumColumnRanges(); UnitTestCuMatrixCopyRows(); UnitTestCuMatrixCopyRowsFromVec(); + UnitTestCuMatrixCopyColsFromVec(); UnitTestCuMatrixCopyToRows(); UnitTestCuMatrixAddRows(); UnitTestCuMatrixAddToRows(); diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc index c5f41d5a944..652364f3dc8 100644 --- a/src/cudamatrix/cu-matrix.cc +++ b/src/cudamatrix/cu-matrix.cc @@ -2135,6 +2135,43 @@ void CuMatrixBase::CopyRowsFromVec(const VectorBase &v) { } } +template +void CuMatrixBase::CopyColsFromVec(const CuVectorBase &rv) { +#if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { + Timer tim; + if (rv.Dim() == num_rows_ * num_cols_) { + // treat rv as a matrix of the size (num_cols x num_rows_) + // and use transposed copy to fill *this + // see CuMatrixBase::CopyFromMat() for more detail of the impl + MatrixDim rv_dim = { num_cols_, num_rows_, num_rows_ }; + const int32 warpSize = 32; + dim3 dimBlock(warpSize, CU1DBLOCK / warpSize); + dim3 dimGrid(n_blocks(rv_dim.cols, warpSize), + n_blocks(rv_dim.rows, warpSize)); + cuda_copy_from_mat_trans(dimGrid, dimBlock, data_, rv.Data(), Dim(), + rv_dim); + CU_SAFE_CALL(cudaGetLastError()); + } else if (rv.Dim() == num_rows_) { + // use 2D block (8x32) and large enough grid to cover matrix *this + // dimBlock.x need to be at least warpSize for coalesced memory access. + const int32 warpSize = 32; + dim3 dimBlock(warpSize, CU1DBLOCK / warpSize); + dim3 dimGrid(n_blocks(num_cols_, dimBlock.x), + n_blocks(num_rows_, dimBlock.y)); + cuda_copy_cols_from_vec(dimGrid, dimBlock, Data(), Dim(), rv.Data()); + CU_SAFE_CALL(cudaGetLastError()); + } else { + KALDI_ERR<< "Wrong sized arguments"; + } + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); + } else +#endif + { + Mat().CopyColsFromVec(rv.Vec()); + } +} + template void CuMatrixBase::CopyColFromVec(const CuVectorBase &v, @@ -2801,7 +2838,7 @@ void CuMatrix::Transpose() { return; // Copy and swap for all cases. // No need for a separate kernel of squared matrix in-place transpose. - // It has the same posible peak performance as copy transpose, + // It has the same possible peak performance as copy_transpose, // if allocate/deallocate overhead can be ignored. CuMatrix tmp(*this, kTrans); this->Swap(&tmp); diff --git a/src/cudamatrix/cu-matrix.h b/src/cudamatrix/cu-matrix.h index 4601080ad37..fb26fbf1013 100644 --- a/src/cudamatrix/cu-matrix.h +++ b/src/cudamatrix/cu-matrix.h @@ -252,6 +252,11 @@ class CuMatrixBase { /// Version of CopyRowsFromVec() that takes a CPU-based vector. void CopyRowsFromVec(const VectorBase &v); + /// Copies vector into matrix, column-by-column. + /// Note that rv.Dim() must either equal NumRows()*NumCols() or NumRows(); + /// this has two modes of operation. + void CopyColsFromVec(const CuVectorBase &v); + /// Copy vector into specific column of matrix. void CopyColFromVec(const CuVectorBase &v, const MatrixIndexT col); From df730e8e61b834c5c55d8da26c13f0e7c481677e Mon Sep 17 00:00:00 2001 From: Shiyin Kang Date: Sun, 22 Jan 2017 02:23:49 +0800 Subject: [PATCH 311/530] [src] Implement CuMatrixBase::CopyColsFromVec() (#1361) --- src/cudamatrix/cu-kernels-ansi.h | 4 ++++ src/cudamatrix/cu-kernels.cu | 22 ++++++++++++++++++ src/cudamatrix/cu-kernels.h | 9 ++++++++ src/cudamatrix/cu-matrix-test.cc | 35 +++++++++++++++++++++++++--- src/cudamatrix/cu-matrix.cc | 39 +++++++++++++++++++++++++++++++- src/cudamatrix/cu-matrix.h | 5 ++++ 6 files changed, 110 insertions(+), 4 deletions(-) diff --git a/src/cudamatrix/cu-kernels-ansi.h b/src/cudamatrix/cu-kernels-ansi.h index 878ba216407..116428ea82c 100644 --- a/src/cudamatrix/cu-kernels-ansi.h +++ b/src/cudamatrix/cu-kernels-ansi.h @@ -697,6 +697,10 @@ void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, const int self_repair_sum_out_stride); +void cudaD_copy_cols_from_vec(dim3 Gr, dim3 Bl, double *mat_out, + MatrixDim d_out, const double *v_in); +void cudaF_copy_cols_from_vec(dim3 Gr, dim3 Bl, float *mat_out, MatrixDim d_out, + const float *v_in); } // extern "C" diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index 505c6f7f67f..abb4efd47ef 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -745,6 +745,18 @@ static void _copy_rows_from_vec(Real* m_out, MatrixDim d, const Real* v_in) { } } +// This kernel writes a copy of the vector "v_in" to each col of the matrix +// "m_out". the dimension of v_in should be equal to the #row of m_out. +template +__global__ +static void _copy_cols_from_vec(Real* m_out, MatrixDim d, const Real* v_in) { + int i = blockIdx.y * blockDim.y + threadIdx.y; // row id + int j = blockIdx.x * blockDim.x + threadIdx.x; // col id + if (i < d.rows && j < d.cols) { + m_out[i * d.stride + j] = v_in[i]; + } +} + // _trace_mat_mat reduce the partial sum to // value[blockIdx.y * gridDim.x + blockIdx.x] // It use shared mem to transpose matrix B to ensure coalesced memory access @@ -4644,3 +4656,13 @@ void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, value_sum_out_stride, deriv_sum_out, deriv_sum_out_stride, self_repair_sum_out, self_repair_sum_out_stride); } + + +void cudaD_copy_cols_from_vec(dim3 Gr, dim3 Bl, double *mat_out, + MatrixDim d_out, const double *v_in) { + _copy_cols_from_vec<<>>(mat_out, d_out, v_in); +} +void cudaF_copy_cols_from_vec(dim3 Gr, dim3 Bl, float *mat_out, MatrixDim d_out, + const float *v_in) { + _copy_cols_from_vec<<>>(mat_out, d_out, v_in); +} diff --git a/src/cudamatrix/cu-kernels.h b/src/cudamatrix/cu-kernels.h index 9e9910d6f56..649a25ab67e 100644 --- a/src/cudamatrix/cu-kernels.h +++ b/src/cudamatrix/cu-kernels.h @@ -1331,6 +1331,15 @@ inline void cuda_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, self_repair_sum_out_stride); } +inline void cuda_copy_cols_from_vec(dim3 Gr, dim3 Bl, double *mat_out, + MatrixDim d_out, const double *v_in) { + cudaD_copy_cols_from_vec(Gr, Bl, mat_out, d_out, v_in); +} +inline void cuda_copy_cols_from_vec(dim3 Gr, dim3 Bl, float *mat_out, + MatrixDim d_out, const float *v_in) { + cudaF_copy_cols_from_vec(Gr, Bl, mat_out, d_out, v_in); +} + } // namespace kaldi #endif // HAVE_CUDA diff --git a/src/cudamatrix/cu-matrix-test.cc b/src/cudamatrix/cu-matrix-test.cc index 72abace138d..a6f84f3f6aa 100644 --- a/src/cudamatrix/cu-matrix-test.cc +++ b/src/cudamatrix/cu-matrix-test.cc @@ -376,6 +376,31 @@ static void UnitTestCuMatrixCopyRowsFromVec() { } +template +static void UnitTestCuMatrixCopyColsFromVec() { + for (int32 p = 0; p < 2; p++) { + int32 num_rows = 100 + Rand() % 255; + int32 num_cols = 100 + Rand() % 200; + + int32 vec_dim; + if (p % 2 == 0) vec_dim = num_rows; + else vec_dim = num_cols * num_rows; + + CuVector cu_vec(vec_dim); + cu_vec.SetRandn(); + Vector vec(cu_vec); + + CuMatrix cu_mat(num_rows, num_cols); + cu_mat.CopyColsFromVec(cu_vec); + Matrix mat(num_rows, num_cols); + mat.CopyColsFromVec(vec); + + Matrix mat2(cu_mat); + AssertEqual(mat, mat2); + } +} + + template static void UnitTestCuMatrixCopyRows() { for (int32 p = 0; p < 2; p++) { @@ -1574,7 +1599,7 @@ static void UnitTestCuMatrixAddMatTp() { template static void UnitTestCuMatrixTranspose() { - for (int32 i = 1; i < 10; i++) { + for (int32 i = 1; i < 2; i++) { MatrixIndexT dimM = 5 * i + Rand() % 10, dimN = dimM; if (i % 2 == 0) dimN += 5; @@ -1582,8 +1607,11 @@ static void UnitTestCuMatrixTranspose() { CuMatrix A(dimM, dimN); A.SetRandn(); CuMatrix B(A, kTrans); - A.Transpose(); - AssertEqual(A, B); + + Matrix hA(A); + Matrix hB(B); + hB.Transpose(); + AssertEqual(hA, hB); } } @@ -2615,6 +2643,7 @@ template void CudaMatrixUnitTest() { UnitTestCuMatrixSumColumnRanges(); UnitTestCuMatrixCopyRows(); UnitTestCuMatrixCopyRowsFromVec(); + UnitTestCuMatrixCopyColsFromVec(); UnitTestCuMatrixCopyToRows(); UnitTestCuMatrixAddRows(); UnitTestCuMatrixAddToRows(); diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc index c5f41d5a944..652364f3dc8 100644 --- a/src/cudamatrix/cu-matrix.cc +++ b/src/cudamatrix/cu-matrix.cc @@ -2135,6 +2135,43 @@ void CuMatrixBase::CopyRowsFromVec(const VectorBase &v) { } } +template +void CuMatrixBase::CopyColsFromVec(const CuVectorBase &rv) { +#if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { + Timer tim; + if (rv.Dim() == num_rows_ * num_cols_) { + // treat rv as a matrix of the size (num_cols x num_rows_) + // and use transposed copy to fill *this + // see CuMatrixBase::CopyFromMat() for more detail of the impl + MatrixDim rv_dim = { num_cols_, num_rows_, num_rows_ }; + const int32 warpSize = 32; + dim3 dimBlock(warpSize, CU1DBLOCK / warpSize); + dim3 dimGrid(n_blocks(rv_dim.cols, warpSize), + n_blocks(rv_dim.rows, warpSize)); + cuda_copy_from_mat_trans(dimGrid, dimBlock, data_, rv.Data(), Dim(), + rv_dim); + CU_SAFE_CALL(cudaGetLastError()); + } else if (rv.Dim() == num_rows_) { + // use 2D block (8x32) and large enough grid to cover matrix *this + // dimBlock.x need to be at least warpSize for coalesced memory access. + const int32 warpSize = 32; + dim3 dimBlock(warpSize, CU1DBLOCK / warpSize); + dim3 dimGrid(n_blocks(num_cols_, dimBlock.x), + n_blocks(num_rows_, dimBlock.y)); + cuda_copy_cols_from_vec(dimGrid, dimBlock, Data(), Dim(), rv.Data()); + CU_SAFE_CALL(cudaGetLastError()); + } else { + KALDI_ERR<< "Wrong sized arguments"; + } + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); + } else +#endif + { + Mat().CopyColsFromVec(rv.Vec()); + } +} + template void CuMatrixBase::CopyColFromVec(const CuVectorBase &v, @@ -2801,7 +2838,7 @@ void CuMatrix::Transpose() { return; // Copy and swap for all cases. // No need for a separate kernel of squared matrix in-place transpose. - // It has the same posible peak performance as copy transpose, + // It has the same possible peak performance as copy_transpose, // if allocate/deallocate overhead can be ignored. CuMatrix tmp(*this, kTrans); this->Swap(&tmp); diff --git a/src/cudamatrix/cu-matrix.h b/src/cudamatrix/cu-matrix.h index 4601080ad37..fb26fbf1013 100644 --- a/src/cudamatrix/cu-matrix.h +++ b/src/cudamatrix/cu-matrix.h @@ -252,6 +252,11 @@ class CuMatrixBase { /// Version of CopyRowsFromVec() that takes a CPU-based vector. void CopyRowsFromVec(const VectorBase &v); + /// Copies vector into matrix, column-by-column. + /// Note that rv.Dim() must either equal NumRows()*NumCols() or NumRows(); + /// this has two modes of operation. + void CopyColsFromVec(const CuVectorBase &v); + /// Copy vector into specific column of matrix. void CopyColFromVec(const CuVectorBase &v, const MatrixIndexT col); From 9b22433f60fef247ffc01a8440d457d093713920 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 22 Jan 2017 20:33:40 -0500 Subject: [PATCH 312/530] [src] nnet3: modifying nnet-combine.{h,cc} to support soft enforcement of sum-to-one constraint. --- src/nnet3/nnet-combine.cc | 80 ++++++++++++++++++++++++++++++++++++--- src/nnet3/nnet-combine.h | 29 ++++++++++++-- 2 files changed, 101 insertions(+), 8 deletions(-) diff --git a/src/nnet3/nnet-combine.cc b/src/nnet3/nnet-combine.cc index 45c1f74477b..9a220fd5ade 100644 --- a/src/nnet3/nnet-combine.cc +++ b/src/nnet3/nnet-combine.cc @@ -34,6 +34,13 @@ NnetCombiner::NnetCombiner(const NnetCombineConfig &config, nnet_params_(std::min(num_nnets, config_.max_effective_inputs), NumParameters(first_nnet)), tot_input_weighting_(nnet_params_.NumRows()) { + + if (config_.sum_to_one_penalty != 0.0 && + config_.enforce_sum_to_one) { + KALDI_WARN << "--sum-to-one-penalty=" << config_.sum_to_one_penalty + << " is nonzero, so setting --enforce-sum-to-one=false."; + config_.enforce_sum_to_one = false; + } SubVector first_params(nnet_params_, 0); VectorizeNnet(nnet_, &first_params); tot_input_weighting_(0) += 1.0; @@ -42,7 +49,6 @@ NnetCombiner::NnetCombiner(const NnetCombineConfig &config, NnetComputeProbOptions compute_prob_opts; compute_prob_opts.compute_deriv = true; prob_computer_ = new NnetComputeProb(compute_prob_opts, nnet_); - } void NnetCombiner::ComputeUpdatableComponentDims(){ @@ -145,8 +151,21 @@ void NnetCombiner::Combine() { lbfgs.DoStep(objf, deriv); } - KALDI_LOG << "Combining nnets, objective function changed from " - << initial_objf << " to " << objf; + if (!config_.sum_to_one_penalty) { + KALDI_LOG << "Combining nnets, objective function changed from " + << initial_objf << " to " << objf; + } else { + Vector weights(WeightDim()); + GetWeights(params, &weights); + bool print_weights = true; + BaseFloat penalty = GetSumToOnePenalty(weights, NULL, print_weights); + // note: initial_objf has no penalty term because it summed exactly + // to one. + KALDI_LOG << "Combining nnets, objective function changed from " + << initial_objf << " to " << objf << " = " + << (objf - penalty) << " + " << penalty; + } + // must recompute nnet_ if "params" is not exactly equal to the // final params that LB @@ -360,6 +379,53 @@ void NnetCombiner::GetParamsDeriv(const VectorBase &weights, } +double NnetCombiner::GetSumToOnePenalty( + const VectorBase &weights, + VectorBase *weights_penalty_deriv, + bool print_weights) const { + + KALDI_ASSERT(config_.sum_to_one_penalty >= 0.0); + BaseFloat penalty = config_.sum_to_one_penalty; + if (penalty == 0.0) { + weights_penalty_deriv->SetZero(); + return 0.0; + } + double ans = 0.0; + int32 num_uc = NumUpdatableComponents(), + num_models = nnet_params_.NumRows(); + Vector tot_weights(num_uc); + std::ostringstream tot_weight_info; + for (int32 c = 0; c < num_uc; c++) { + double this_total_weight = 0.0; + for (int32 m = 0; m < num_models; m++) { + int32 index = m * num_uc + c; + BaseFloat this_weight = weights(index); + this_total_weight += this_weight; + } + tot_weights(c) = this_total_weight; + ans += -0.5 * penalty * + (this_total_weight - 1.0) * (this_total_weight - 1.0); + if (weights_penalty_deriv != NULL) { + KALDI_ASSERT(weights.Dim() == weights_penalty_deriv->Dim()); + // this_total_weight_deriv is the derivative of the penalty + // term w.r.t. this component's total weight. + BaseFloat this_total_weight_deriv = + penalty * (1.0 - this_total_weight); + for (int32 m = 0; m < num_models; m++) { + int32 index = m * num_uc + c; + (*weights_penalty_deriv)(index) = this_total_weight_deriv; + } + } + } + if (print_weights) { + KALDI_LOG << "Total weights per component: " + << PrintVectorPerUpdatableComponent(nnet_, + tot_weights); + } + return ans; +} + + void NnetCombiner::GetNnetParameters(const Vector &weights, VectorBase *nnet_params) const { KALDI_ASSERT(nnet_params->Dim() == nnet_params_.NumCols()); @@ -442,22 +508,26 @@ double NnetCombiner::ComputeObjfAndDerivFromParameters( Vector weights(WeightDim()), normalized_weights(WeightDim()), nnet_params(NnetParameterDim(), kUndefined), nnet_params_deriv(NnetParameterDim(), kUndefined), + weights_sum_to_one_penalty_deriv(WeightDim()), normalized_weights_deriv(WeightDim()), weights_deriv(WeightDim()); GetWeights(params, &weights); + double ans = GetSumToOnePenalty(weights, &weights_sum_to_one_penalty_deriv); GetNormalizedWeights(weights, &normalized_weights); GetNnetParameters(normalized_weights, &nnet_params); - double ans = ComputeObjfAndDerivFromNnet(nnet_params, &nnet_params_deriv); + ans += ComputeObjfAndDerivFromNnet(nnet_params, &nnet_params_deriv); if (ans != ans || ans - ans != 0) // NaN or inf return ans; // No point computing derivative GetWeightsDeriv(nnet_params_deriv, &normalized_weights_deriv); GetUnnormalizedWeightsDeriv(weights, normalized_weights_deriv, &weights_deriv); + weights_deriv.AddVec(1.0, weights_sum_to_one_penalty_deriv); GetParamsDeriv(weights, weights_deriv, params_deriv); return ans; } -// enforces the constraint that the weights for each component must sum to one. +// enforces the constraint that the weights for each component must sum to one, +// if necessary. void NnetCombiner::GetNormalizedWeights( const VectorBase &unnorm_weights, VectorBase *norm_weights) const { diff --git a/src/nnet3/nnet-combine.h b/src/nnet3/nnet-combine.h index a2883dab5b2..1b20c2fbb70 100644 --- a/src/nnet3/nnet-combine.h +++ b/src/nnet3/nnet-combine.h @@ -48,6 +48,7 @@ struct NnetCombineConfig { bool test_gradient; bool enforce_positive_weights; bool enforce_sum_to_one; + BaseFloat sum_to_one_penalty; bool separate_weights_per_component; NnetCombineConfig(): num_iters(60), initial_impr(0.01), @@ -55,6 +56,7 @@ struct NnetCombineConfig { test_gradient(false), enforce_positive_weights(false), enforce_sum_to_one(false), + sum_to_one_penalty(0.0), separate_weights_per_component(true) { } void Register(OptionsItf *po) { @@ -73,6 +75,11 @@ struct NnetCombineConfig { "If true, enforce that all weights are positive."); po->Register("enforce-sum-to-one", &enforce_sum_to_one, "If true, enforce that " "the model weights for each component should sum to one."); + po->Register("sum-to-one-penalty", &sum_to_one_penalty, "If >0, a penalty term " + "on the squared difference between sum(weights) for one component," + " and 1.0. This is like --enforce-sum-to-one, but done in a 'soft' " + "way (e.g. maybe useful with dropout). We suggest small values " + "like 10e-2 (for regular nnets) or 1.0e-03 (for chain models)."); po->Register("separate-weights-per-component", &separate_weights_per_component, "If true, have a separate weight for each updatable component in " "the nnet."); @@ -104,7 +111,7 @@ class NnetCombiner { ~NnetCombiner() { delete prob_computer_; } private: - const NnetCombineConfig &config_; + NnetCombineConfig config_; const std::vector &egs_; @@ -126,8 +133,9 @@ class NnetCombiner { Matrix nnet_params_; // This vector has the same dimension as nnet_params_.NumRows(), - // and helps us normalize so each row of nnet_params correspondss to - // a weighted average of its inputs. + // and helps us normalize so each row of nnet_params corresponds to + // a weighted average of its inputs (will be all ones if + // config_.max_effective_inputs >= the number of nnets provided). Vector tot_input_weighting_; // returns the parameter dimension, i.e. the dimension of the parameters that @@ -182,6 +190,21 @@ class NnetCombiner { void GetNormalizedWeights(const VectorBase &unnorm_weights, VectorBase *norm_weights) const; + // if config_.sum_to_one_penalty is 0.0, returns 0.0 and sets + // weights_penalty_deriv to 0.0; else it computes, for each + // updatable component u the total weight w_u, returns the value + // -0.5 * config_.sum_to_one_penalty * sum_u (w_u - 1.0)^2; + // and sets 'weights_penalty_deriv' to the derivative w.r.t. + // the result. + // Note: config_.sum_to_one_penalty is exclusive with + // config_.enforce_sum_to_one, so there is really no distinction between + // normalized and unnormalized weights here (since normalization would be a + // no-op). + double GetSumToOnePenalty(const VectorBase &weights, + VectorBase *weights_penalty_deriv, + bool print_weights = false) const; + + // Computes the nnet-parameter vector from the normalized weights and // nnet_params_, as a vector. (See the functions Vectorize() and // UnVectorize() for how they relate to the nnet's components' parameters). From 98d8a5b29dedb0edb262ca7bb3785d54db8a83c3 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 22 Jan 2017 21:07:14 -0500 Subject: [PATCH 313/530] [src] nnet3: Changing from floating-point to double precision in nnet-combine code. --- src/nnet3/nnet-combine.cc | 114 +++++++++++++++++++------------------- src/nnet3/nnet-combine.h | 38 ++++++------- 2 files changed, 77 insertions(+), 75 deletions(-) diff --git a/src/nnet3/nnet-combine.cc b/src/nnet3/nnet-combine.cc index 9a220fd5ade..d50b5adc072 100644 --- a/src/nnet3/nnet-combine.cc +++ b/src/nnet3/nnet-combine.cc @@ -135,12 +135,12 @@ void NnetCombiner::Combine() { // itself, so this is BFGS. lbfgs_options.first_step_impr = config_.initial_impr; - Vector params(dim), deriv(dim); - BaseFloat objf, initial_objf; + Vector params(dim), deriv(dim); + double objf, initial_objf; GetInitialParameters(¶ms); - OptimizeLbfgs lbfgs(params, lbfgs_options); + OptimizeLbfgs lbfgs(params, lbfgs_options); for (int32 i = 0; i < config_.num_iters; i++) { params.CopyFromVec(lbfgs.GetProposedValue()); @@ -155,10 +155,10 @@ void NnetCombiner::Combine() { KALDI_LOG << "Combining nnets, objective function changed from " << initial_objf << " to " << objf; } else { - Vector weights(WeightDim()); + Vector weights(WeightDim()); GetWeights(params, &weights); bool print_weights = true; - BaseFloat penalty = GetSumToOnePenalty(weights, NULL, print_weights); + double penalty = GetSumToOnePenalty(weights, NULL, print_weights); // note: initial_objf has no penalty term because it summed exactly // to one. KALDI_LOG << "Combining nnets, objective function changed from " @@ -169,7 +169,7 @@ void NnetCombiner::Combine() { // must recompute nnet_ if "params" is not exactly equal to the // final params that LB - Vector final_params(dim); + Vector final_params(dim); final_params.CopyFromVec(lbfgs.GetValue(&objf)); if (!params.ApproxEqual(final_params, 0.0)) { // the following call makes sure that nnet_ corresponds to the parameters @@ -180,9 +180,9 @@ void NnetCombiner::Combine() { } -void NnetCombiner::PrintParams(const VectorBase ¶ms) const { +void NnetCombiner::PrintParams(const VectorBase ¶ms) const { - Vector weights(params.Dim()), normalized_weights(params.Dim()); + Vector weights(params.Dim()), normalized_weights(params.Dim()); GetWeights(params, &weights); GetNormalizedWeights(weights, &normalized_weights); int32 num_models = nnet_params_.NumRows(), @@ -232,21 +232,21 @@ void NnetCombiner::PrintParams(const VectorBase ¶ms) const { bool NnetCombiner::SelfTestDerivatives() { int32 num_tests = 2; // more properly, this is the number of dimensions in a // single test. - BaseFloat delta = 0.001; + double delta = 0.001; int32 dim = ParameterDim(); - Vector params(dim), deriv(dim); - Vector predicted_changes(num_tests), + Vector params(dim), deriv(dim); + Vector predicted_changes(num_tests), observed_changes(num_tests); GetInitialParameters(¶ms); - BaseFloat initial_objf = ComputeObjfAndDerivFromParameters(params, + double initial_objf = ComputeObjfAndDerivFromParameters(params, &deriv); for (int32 i = 0; i < num_tests; i++) { - Vector new_deriv(dim), offset(dim), new_params(params); + Vector new_deriv(dim), offset(dim), new_params(params); offset.SetRandn(); new_params.AddVec(delta, offset); - BaseFloat new_objf = ComputeObjfAndDerivFromParameters(new_params, + double new_objf = ComputeObjfAndDerivFromParameters(new_params, &new_deriv); // for predicted changes, interpolate old and new derivs. predicted_changes(i) = @@ -254,7 +254,7 @@ bool NnetCombiner::SelfTestDerivatives() { 0.5 * VecVec(new_params, new_deriv) - 0.5 * VecVec(params, new_deriv); observed_changes(i) = new_objf - initial_objf; } - BaseFloat threshold = 0.1; + double threshold = 0.1; KALDI_LOG << "predicted_changes = " << predicted_changes; KALDI_LOG << "observed_changes = " << observed_changes; if (!ApproxEqual(predicted_changes, observed_changes, threshold)) { @@ -271,23 +271,23 @@ void NnetCombiner::SelfTestModelDerivatives() { // single test. int32 dim = ParameterDim(); - Vector params(dim), deriv(dim); - Vector predicted_changes(num_tests), + Vector params(dim), deriv(dim); + Vector predicted_changes(num_tests), observed_changes(num_tests); GetInitialParameters(¶ms); - Vector weights(WeightDim()), normalized_weights(WeightDim()), - nnet_params(NnetParameterDim(), kUndefined), + Vector weights(WeightDim()), normalized_weights(WeightDim()); + Vector nnet_params(NnetParameterDim(), kUndefined), nnet_deriv(NnetParameterDim(), kUndefined); GetWeights(params, &weights); GetNormalizedWeights(weights, &normalized_weights); GetNnetParameters(normalized_weights, &nnet_params); - BaseFloat initial_objf = ComputeObjfAndDerivFromNnet(nnet_params, + double initial_objf = ComputeObjfAndDerivFromNnet(nnet_params, &nnet_deriv); - BaseFloat delta = 0.002 * std::sqrt(VecVec(nnet_params, nnet_params) / - NnetParameterDim()); + double delta = 0.002 * std::sqrt(VecVec(nnet_params, nnet_params) / + NnetParameterDim()); for (int32 i = 0; i < num_tests; i++) { @@ -295,7 +295,7 @@ void NnetCombiner::SelfTestModelDerivatives() { offset(NnetParameterDim()), new_nnet_params(nnet_params); offset.SetRandn(); new_nnet_params.AddVec(delta, offset); - BaseFloat new_objf = ComputeObjfAndDerivFromNnet(new_nnet_params, + double new_objf = ComputeObjfAndDerivFromNnet(new_nnet_params, &new_nnet_deriv); // for predicted changes, interpolate old and new derivs. predicted_changes(i) = @@ -305,7 +305,7 @@ void NnetCombiner::SelfTestModelDerivatives() { 0.5 * VecVec(nnet_params, new_nnet_deriv); observed_changes(i) = new_objf - initial_objf; } - BaseFloat threshold = 0.1; + double threshold = 0.1; KALDI_LOG << "model-derivatives: predicted_changes = " << predicted_changes; KALDI_LOG << "model-derivatives: observed_changes = " << observed_changes; if (!ApproxEqual(predicted_changes, observed_changes, threshold)) @@ -323,7 +323,7 @@ int32 NnetCombiner::ParameterDim() const { } -void NnetCombiner::GetInitialParameters(VectorBase *params) const { +void NnetCombiner::GetInitialParameters(VectorBase *params) const { KALDI_ASSERT(params->Dim() == ParameterDim()); params->Set(1.0 / nnet_params_.NumRows()); if (config_.enforce_positive_weights) { @@ -333,8 +333,8 @@ void NnetCombiner::GetInitialParameters(VectorBase *params) const { } } -void NnetCombiner::GetWeights(const VectorBase ¶ms, - VectorBase *weights) const { +void NnetCombiner::GetWeights(const VectorBase ¶ms, + VectorBase *weights) const { KALDI_ASSERT(weights->Dim() == WeightDim()); if (config_.separate_weights_per_component) { weights->CopyFromVec(params); @@ -354,12 +354,12 @@ void NnetCombiner::GetWeights(const VectorBase ¶ms, } -void NnetCombiner::GetParamsDeriv(const VectorBase &weights, - const VectorBase &weights_deriv, - VectorBase *param_deriv) { +void NnetCombiner::GetParamsDeriv(const VectorBase &weights, + const VectorBase &weights_deriv, + VectorBase *param_deriv) { KALDI_ASSERT(weights.Dim() == WeightDim() && param_deriv->Dim() == ParameterDim()); - Vector preexp_weights_deriv(weights_deriv); + Vector preexp_weights_deriv(weights_deriv); if (config_.enforce_positive_weights) { // to enforce positive weights we first compute weights (call these // preexp_weights) and then take exponential. Note, d/dx exp(x) = exp(x). @@ -380,12 +380,12 @@ void NnetCombiner::GetParamsDeriv(const VectorBase &weights, double NnetCombiner::GetSumToOnePenalty( - const VectorBase &weights, - VectorBase *weights_penalty_deriv, + const VectorBase &weights, + VectorBase *weights_penalty_deriv, bool print_weights) const { KALDI_ASSERT(config_.sum_to_one_penalty >= 0.0); - BaseFloat penalty = config_.sum_to_one_penalty; + double penalty = config_.sum_to_one_penalty; if (penalty == 0.0) { weights_penalty_deriv->SetZero(); return 0.0; @@ -393,13 +393,13 @@ double NnetCombiner::GetSumToOnePenalty( double ans = 0.0; int32 num_uc = NumUpdatableComponents(), num_models = nnet_params_.NumRows(); - Vector tot_weights(num_uc); + Vector tot_weights(num_uc); std::ostringstream tot_weight_info; for (int32 c = 0; c < num_uc; c++) { double this_total_weight = 0.0; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; - BaseFloat this_weight = weights(index); + double this_weight = weights(index); this_total_weight += this_weight; } tot_weights(c) = this_total_weight; @@ -409,7 +409,7 @@ double NnetCombiner::GetSumToOnePenalty( KALDI_ASSERT(weights.Dim() == weights_penalty_deriv->Dim()); // this_total_weight_deriv is the derivative of the penalty // term w.r.t. this component's total weight. - BaseFloat this_total_weight_deriv = + double this_total_weight_deriv = penalty * (1.0 - this_total_weight); for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; @@ -418,15 +418,16 @@ double NnetCombiner::GetSumToOnePenalty( } } if (print_weights) { + Vector tot_weights_float(tot_weights); KALDI_LOG << "Total weights per component: " << PrintVectorPerUpdatableComponent(nnet_, - tot_weights); + tot_weights_float); } return ans; } -void NnetCombiner::GetNnetParameters(const Vector &weights, +void NnetCombiner::GetNnetParameters(const Vector &weights, VectorBase *nnet_params) const { KALDI_ASSERT(nnet_params->Dim() == nnet_params_.NumCols()); nnet_params->SetZero(); @@ -452,7 +453,7 @@ void NnetCombiner::GetNnetParameters(const Vector &weights, // compare GetNnetParameters. void NnetCombiner::GetWeightsDeriv( const VectorBase &nnet_params_deriv, - VectorBase *weights_deriv) { + VectorBase *weights_deriv) { KALDI_ASSERT(nnet_params_deriv.Dim() == nnet_params_.NumCols() && weights_deriv->Dim() == WeightDim()); int32 num_uc = NumUpdatableComponents(), @@ -503,13 +504,14 @@ double NnetCombiner::ComputeObjfAndDerivFromNnet( double NnetCombiner::ComputeObjfAndDerivFromParameters( - VectorBase ¶ms, - VectorBase *params_deriv) { - Vector weights(WeightDim()), normalized_weights(WeightDim()), - nnet_params(NnetParameterDim(), kUndefined), - nnet_params_deriv(NnetParameterDim(), kUndefined), + VectorBase ¶ms, + VectorBase *params_deriv) { + Vector weights(WeightDim()), normalized_weights(WeightDim()), weights_sum_to_one_penalty_deriv(WeightDim()), normalized_weights_deriv(WeightDim()), weights_deriv(WeightDim()); + Vector + nnet_params(NnetParameterDim(), kUndefined), + nnet_params_deriv(NnetParameterDim(), kUndefined); GetWeights(params, &weights); double ans = GetSumToOnePenalty(weights, &weights_sum_to_one_penalty_deriv); GetNormalizedWeights(weights, &normalized_weights); @@ -529,8 +531,8 @@ double NnetCombiner::ComputeObjfAndDerivFromParameters( // enforces the constraint that the weights for each component must sum to one, // if necessary. void NnetCombiner::GetNormalizedWeights( - const VectorBase &unnorm_weights, - VectorBase *norm_weights) const { + const VectorBase &unnorm_weights, + VectorBase *norm_weights) const { if (!config_.enforce_sum_to_one) { norm_weights->CopyFromVec(unnorm_weights); return; @@ -538,12 +540,12 @@ void NnetCombiner::GetNormalizedWeights( int32 num_uc = NumUpdatableComponents(), num_models = nnet_params_.NumRows(); for (int32 c = 0; c < num_uc; c++) { - BaseFloat sum = 0.0; + double sum = 0.0; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; sum += unnorm_weights(index); } - BaseFloat inv_sum = 1.0 / sum; // if it's NaN then it's OK, we'll get NaN + double inv_sum = 1.0 / sum; // if it's NaN then it's OK, we'll get NaN // weights and eventually -inf objective. for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; @@ -553,9 +555,9 @@ void NnetCombiner::GetNormalizedWeights( } void NnetCombiner::GetUnnormalizedWeightsDeriv( - const VectorBase &unnorm_weights, - const VectorBase &norm_weights_deriv, - VectorBase *unnorm_weights_deriv) { + const VectorBase &unnorm_weights, + const VectorBase &norm_weights_deriv, + VectorBase *unnorm_weights_deriv) { if (!config_.enforce_sum_to_one) { unnorm_weights_deriv->CopyFromVec(norm_weights_deriv); return; @@ -563,13 +565,13 @@ void NnetCombiner::GetUnnormalizedWeightsDeriv( int32 num_uc = NumUpdatableComponents(), num_models = nnet_params_.NumRows(); for (int32 c = 0; c < num_uc; c++) { - BaseFloat sum = 0.0; + double sum = 0.0; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; sum += unnorm_weights(index); } - BaseFloat inv_sum = 1.0 / sum; - BaseFloat inv_sum_deriv = 0.0; + double inv_sum = 1.0 / sum; + double inv_sum_deriv = 0.0; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; // in the forward direction, we'd do: @@ -578,7 +580,7 @@ void NnetCombiner::GetUnnormalizedWeightsDeriv( inv_sum_deriv += norm_weights_deriv(index) * unnorm_weights(index); } // note: d/dx (1/x) = -1/x^2 - BaseFloat sum_deriv = -1.0 * inv_sum_deriv * inv_sum * inv_sum; + double sum_deriv = -1.0 * inv_sum_deriv * inv_sum * inv_sum; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; (*unnorm_weights_deriv)(index) += sum_deriv; diff --git a/src/nnet3/nnet-combine.h b/src/nnet3/nnet-combine.h index 1b20c2fbb70..5b60d30b8ed 100644 --- a/src/nnet3/nnet-combine.h +++ b/src/nnet3/nnet-combine.h @@ -79,7 +79,7 @@ struct NnetCombineConfig { "on the squared difference between sum(weights) for one component," " and 1.0. This is like --enforce-sum-to-one, but done in a 'soft' " "way (e.g. maybe useful with dropout). We suggest small values " - "like 10e-2 (for regular nnets) or 1.0e-03 (for chain models)."); + "like 10e-3 (for regular nnets) or 1.0e-04 (for chain models)."); po->Register("separate-weights-per-component", &separate_weights_per_component, "If true, have a separate weight for each updatable component in " "the nnet."); @@ -157,7 +157,7 @@ class NnetCombiner { // Computes the initial parameters. The parameters are the underlying thing // that we optimize; their dimension equals ParameterDim(). They are not the same // thing as the nnet parameters. - void GetInitialParameters(VectorBase *params) const; + void GetInitialParameters(VectorBase *params) const; // Tests that derivatives are accurate. Prints warning and returns false if not. bool SelfTestDerivatives(); @@ -167,28 +167,28 @@ class NnetCombiner { // prints the parameters via logging statements. - void PrintParams(const VectorBase ¶ms) const; + void PrintParams(const VectorBase ¶ms) const; // This function computes the objective function (and its derivative, if the objective // function is finite) at the given value of the parameters (the parameters we're optimizing, // i.e. the combination weights; not the nnet parameters. This function calls most of the // functions below. double ComputeObjfAndDerivFromParameters( - VectorBase ¶ms, - VectorBase *params_deriv); + VectorBase ¶ms, + VectorBase *params_deriv); // Computes the weights from the parameters in a config-dependent way. The // weight dimension is always (the number of updatable components times // nnet_params_.NumRows()). - void GetWeights(const VectorBase ¶ms, - VectorBase *weights) const; + void GetWeights(const VectorBase ¶ms, + VectorBase *weights) const; // Given the raw weights: if config_.enforce_sum_to_one, then compute weights // with sum-to-one constrint per component included; else just copy input to // output. - void GetNormalizedWeights(const VectorBase &unnorm_weights, - VectorBase *norm_weights) const; + void GetNormalizedWeights(const VectorBase &unnorm_weights, + VectorBase *norm_weights) const; // if config_.sum_to_one_penalty is 0.0, returns 0.0 and sets // weights_penalty_deriv to 0.0; else it computes, for each @@ -200,15 +200,15 @@ class NnetCombiner { // config_.enforce_sum_to_one, so there is really no distinction between // normalized and unnormalized weights here (since normalization would be a // no-op). - double GetSumToOnePenalty(const VectorBase &weights, - VectorBase *weights_penalty_deriv, + double GetSumToOnePenalty(const VectorBase &weights, + VectorBase *weights_penalty_deriv, bool print_weights = false) const; // Computes the nnet-parameter vector from the normalized weights and // nnet_params_, as a vector. (See the functions Vectorize() and // UnVectorize() for how they relate to the nnet's components' parameters). - void GetNnetParameters(const Vector &normalized_weights, + void GetNnetParameters(const Vector &normalized_weights, VectorBase *nnet_params) const; // This function computes the objective function (and its derivative, if the objective @@ -220,23 +220,23 @@ class NnetCombiner { // Given an objective-function derivative with respect to the nnet parameters, // computes the derivative with respect to the (normalized) weights. void GetWeightsDeriv(const VectorBase &nnet_params_deriv, - VectorBase *normalized_weights_deriv); + VectorBase *normalized_weights_deriv); // Computes the derivative w.r.t. the unnormalized weights, by propagating // through the normalization operation. // If config_.enforce_sum_to_one == false, just copies norm_weights_deriv to // unnorm_weights_deriv. - void GetUnnormalizedWeightsDeriv(const VectorBase &unnorm_weights, - const VectorBase &norm_weights_deriv, - VectorBase *unnorm_weights_deriv); + void GetUnnormalizedWeightsDeriv(const VectorBase &unnorm_weights, + const VectorBase &norm_weights_deriv, + VectorBase *unnorm_weights_deriv); // Given a derivative w.r.t. the weights, outputs a derivative w.r.t. // the params - void GetParamsDeriv(const VectorBase &weights, - const VectorBase &weight_deriv, - VectorBase *param_deriv); + void GetParamsDeriv(const VectorBase &weights, + const VectorBase &weight_deriv, + VectorBase *param_deriv); void ComputeUpdatableComponentDims(); void FinishPreprocessingInput(); From 13c62a9b168d8bedddb6d7b5286cb694591a7219 Mon Sep 17 00:00:00 2001 From: Ke Li Date: Mon, 23 Jan 2017 01:09:11 -0500 Subject: [PATCH 314/530] [src] cudamatrix: modify test code to guarantee loop only run once if no GPU (#1366) --- src/cudamatrix/cu-array-test.cc | 5 +++-- src/cudamatrix/cu-block-matrix-test.cc | 6 ++++-- src/cudamatrix/cu-device-test.cc | 7 ++++--- src/cudamatrix/cu-math-test.cc | 5 +++-- src/cudamatrix/cu-matrix-speed-test.cc | 6 +++--- src/cudamatrix/cu-matrix-test.cc | 9 +++++---- src/cudamatrix/cu-rand-speed-test.cc | 2 +- src/cudamatrix/cu-sp-matrix-speed-test.cc | 2 +- src/cudamatrix/cu-sp-matrix-test.cc | 6 +++--- src/cudamatrix/cu-sparse-matrix-test.cc | 19 +++++++++++-------- src/cudamatrix/cu-test.cc | 6 ++---- src/cudamatrix/cu-tp-matrix-test.cc | 6 +++--- src/cudamatrix/cu-vector-speed-test.cc | 2 +- src/cudamatrix/cu-vector-test.cc | 8 ++++---- 14 files changed, 48 insertions(+), 41 deletions(-) diff --git a/src/cudamatrix/cu-array-test.cc b/src/cudamatrix/cu-array-test.cc index f3ebcb72ee0..863ca5dde18 100644 --- a/src/cudamatrix/cu-array-test.cc +++ b/src/cudamatrix/cu-array-test.cc @@ -116,8 +116,9 @@ static void UnitTestCuArray() { int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); @@ -134,8 +135,8 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-block-matrix-test.cc b/src/cudamatrix/cu-block-matrix-test.cc index 4193e61c609..387749904b1 100644 --- a/src/cudamatrix/cu-block-matrix-test.cc +++ b/src/cudamatrix/cu-block-matrix-test.cc @@ -181,8 +181,9 @@ template void CuBlockMatrixUnitTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU @@ -200,12 +201,13 @@ int main() { #else kaldi::CuBlockMatrixUnitTest(); #endif + if (loop == 0) KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-device-test.cc b/src/cudamatrix/cu-device-test.cc index ec0fa7b1f9f..8f44985ede0 100644 --- a/src/cudamatrix/cu-device-test.cc +++ b/src/cudamatrix/cu-device-test.cc @@ -99,8 +99,8 @@ void CudaMatrixResizeTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { #if HAVE_CUDA == 1 + for (int32 loop = 0; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); @@ -118,9 +118,10 @@ int main() { #else kaldi::CudaMatrixResizeTest(); #endif - } + #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif - std::cout << "Tests succeeded.\n"; + KALDI_LOG << "Tests succeeded."; } diff --git a/src/cudamatrix/cu-math-test.cc b/src/cudamatrix/cu-math-test.cc index c7a01cf9aa6..f8962d5e7a6 100644 --- a/src/cudamatrix/cu-math-test.cc +++ b/src/cudamatrix/cu-math-test.cc @@ -531,8 +531,9 @@ template void CudaMathUnitTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU @@ -556,8 +557,8 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-matrix-speed-test.cc b/src/cudamatrix/cu-matrix-speed-test.cc index 37257522fa8..c4a96252b84 100644 --- a/src/cudamatrix/cu-matrix-speed-test.cc +++ b/src/cudamatrix/cu-matrix-speed-test.cc @@ -1085,8 +1085,8 @@ template void CudaMatrixSpeedTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { #if HAVE_CUDA == 1 + for (int32 loop = 0; loop < 2; loop++) { if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); else @@ -1103,9 +1103,9 @@ int main() { #else kaldi::CudaMatrixSpeedTest(); #endif - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif - std::cout << "Tests succeeded.\n"; + KALDI_LOG << "Tests succeeded."; } diff --git a/src/cudamatrix/cu-matrix-test.cc b/src/cudamatrix/cu-matrix-test.cc index a6f84f3f6aa..38c800d8e58 100644 --- a/src/cudamatrix/cu-matrix-test.cc +++ b/src/cudamatrix/cu-matrix-test.cc @@ -2707,8 +2707,9 @@ template void CudaMatrixUnitTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); @@ -2718,7 +2719,6 @@ int main() { kaldi::CudaMatrixUnitTest(); - #if HAVE_CUDA == 1 if (CuDevice::Instantiate().DoublePrecisionSupported()) { kaldi::CudaMatrixUnitTest(); @@ -2733,9 +2733,10 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } - SetVerboseLevel(4); + + SetVerboseLevel(4); #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-rand-speed-test.cc b/src/cudamatrix/cu-rand-speed-test.cc index 23f82eab977..abcae76c598 100644 --- a/src/cudamatrix/cu-rand-speed-test.cc +++ b/src/cudamatrix/cu-rand-speed-test.cc @@ -218,5 +218,5 @@ int main() { CuDevice::Instantiate().PrintProfile(); #endif - std::cout << "Tests succeeded.\n"; + KALDI_LOG << "Tests succeeded."; } diff --git a/src/cudamatrix/cu-sp-matrix-speed-test.cc b/src/cudamatrix/cu-sp-matrix-speed-test.cc index 455bf58608f..ded4baed49b 100644 --- a/src/cudamatrix/cu-sp-matrix-speed-test.cc +++ b/src/cudamatrix/cu-sp-matrix-speed-test.cc @@ -146,5 +146,5 @@ int main() { #if HAVE_CUDA == 1 CuDevice::Instantiate().PrintProfile(); #endif - std::cout << "Tests succeeded.\n"; + KALDI_LOG << "Tests succeeded."; } diff --git a/src/cudamatrix/cu-sp-matrix-test.cc b/src/cudamatrix/cu-sp-matrix-test.cc index 3e3991afc81..c0f1119acea 100644 --- a/src/cudamatrix/cu-sp-matrix-test.cc +++ b/src/cudamatrix/cu-sp-matrix-test.cc @@ -363,9 +363,9 @@ template void CudaSpMatrixUnitTest() { int main() { using namespace kaldi; - - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU @@ -394,8 +394,8 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-sparse-matrix-test.cc b/src/cudamatrix/cu-sparse-matrix-test.cc index 8f885815c72..6514ddbfa87 100644 --- a/src/cudamatrix/cu-sparse-matrix-test.cc +++ b/src/cudamatrix/cu-sparse-matrix-test.cc @@ -25,6 +25,8 @@ #include "util/common-utils.h" #include "cudamatrix/cu-matrix-lib.h" +using namespace kaldi; + namespace kaldi { template @@ -185,19 +187,20 @@ void CudaSparseMatrixUnitTest() { int main() { - for (kaldi::int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 - kaldi::CuDevice::Instantiate().SetDebugStrideMode(true); + for (; loop < 2; loop++) { + CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) - kaldi::CuDevice::Instantiate().SelectGpuId("no"); + CuDevice::Instantiate().SelectGpuId("no"); else - kaldi::CuDevice::Instantiate().SelectGpuId("yes"); + CuDevice::Instantiate().SelectGpuId("yes"); #endif kaldi::CudaSparseMatrixUnitTest(); #if HAVE_CUDA == 1 - if (kaldi::CuDevice::Instantiate().DoublePrecisionSupported()) { + if (CuDevice::Instantiate().DoublePrecisionSupported()) { kaldi::CudaSparseMatrixUnitTest(); } else { KALDI_WARN << "Double precision not supported"; @@ -210,10 +213,10 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } - kaldi::SetVerboseLevel(4); + SetVerboseLevel(4); #if HAVE_CUDA == 1 - kaldi::CuDevice::Instantiate().PrintProfile(); + } + CuDevice::Instantiate().PrintProfile(); #endif return 0; } diff --git a/src/cudamatrix/cu-test.cc b/src/cudamatrix/cu-test.cc index c27e2b64691..66b62f097c9 100644 --- a/src/cudamatrix/cu-test.cc +++ b/src/cudamatrix/cu-test.cc @@ -575,9 +575,8 @@ static void CuMatrixUnitTest() { int main() { using namespace kaldi; - - for (int32 loop = 0; loop < 2; loop++) { #if HAVE_CUDA == 1 + for (int32 loop = 0; loop < 2; loop++) { if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); else @@ -593,9 +592,8 @@ int main() { { kaldi::CuMatrixUnitTest(); } - } - #if HAVE_CUDA == 1 + } kaldi::CuDevice::Instantiate().PrintProfile(); #endif diff --git a/src/cudamatrix/cu-tp-matrix-test.cc b/src/cudamatrix/cu-tp-matrix-test.cc index 675cd19a56c..f5018aef6b7 100644 --- a/src/cudamatrix/cu-tp-matrix-test.cc +++ b/src/cudamatrix/cu-tp-matrix-test.cc @@ -187,9 +187,9 @@ template void CudaTpMatrixUnitTest() { int main() { using namespace kaldi; - - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU @@ -211,8 +211,8 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-vector-speed-test.cc b/src/cudamatrix/cu-vector-speed-test.cc index 81f6f2bf14d..cf3f126937f 100644 --- a/src/cudamatrix/cu-vector-speed-test.cc +++ b/src/cudamatrix/cu-vector-speed-test.cc @@ -319,6 +319,6 @@ int main() { #else kaldi::CudaVectorSpeedTest(); #endif - std::cout << "Tests succeeded.\n"; + KALDI_LOG << "Tests succeeded."; } diff --git a/src/cudamatrix/cu-vector-test.cc b/src/cudamatrix/cu-vector-test.cc index a17a7baa930..6537bab70c6 100644 --- a/src/cudamatrix/cu-vector-test.cc +++ b/src/cudamatrix/cu-vector-test.cc @@ -755,9 +755,10 @@ int main(int argc, char *argv[]) { po.PrintUsage(); exit(1); } - - for (int32 loop = 0; loop < 2; loop++) { + + int32 loop = 0; #if HAVE_CUDA == 1 + for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU @@ -765,7 +766,6 @@ int main(int argc, char *argv[]) { CuDevice::Instantiate().SelectGpuId(use_gpu); #endif - kaldi::CuVectorUnitTest(); #if HAVE_CUDA == 1 if (CuDevice::Instantiate().DoublePrecisionSupported()) { @@ -781,8 +781,8 @@ int main(int argc, char *argv[]) { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } CuDevice::Instantiate().PrintProfile(); #endif return 0; From 04404176cdeac2911807e13155148414f7c3ed4c Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 23 Jan 2017 01:23:44 -0500 Subject: [PATCH 315/530] [src,scripts]: Adding dropout schedule option to nnet3 (#1248) --- .../nnet3/train/chain_objf/acoustic_model.py | 24 +- egs/wsj/s5/steps/libs/nnet3/train/common.py | 29 +- .../libs/nnet3/train/dropout_schedule.py | 309 ++++++++++++++++++ .../nnet3/train/frame_level_objf/common.py | 15 +- egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py | 26 +- egs/wsj/s5/steps/nnet3/chain/train.py | 27 +- egs/wsj/s5/steps/nnet3/train_dnn.py | 10 +- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 10 +- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 25 +- egs/wsj/s5/steps/nnet3/train_rnn.py | 25 +- src/nnet3/nnet-chain-combine.cc | 1 + src/nnet3/nnet-combine.cc | 1 + src/nnet3/nnet-utils.cc | 23 ++ src/nnet3/nnet-utils.h | 3 + src/nnet3bin/nnet3-combine.cc | 1 + src/nnet3bin/nnet3-copy.cc | 8 +- 16 files changed, 471 insertions(+), 66 deletions(-) create mode 100644 egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index 0c871f07c2e..d3f0d01897e 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -223,7 +223,9 @@ def train_one_iteration(dir, iter, srand, egs_dir, leaky_hmm_coefficient, momentum, max_param_change, shuffle_buffer_size, frame_subsampling_factor, truncate_deriv_weights, - run_opts, background_process_handler=None): + run_opts, + dropout_edit_string="", + background_process_handler=None): """ Called from steps/nnet3/chain/train.py for one iteration for neural network training with LF-MMI objective @@ -237,9 +239,10 @@ def train_one_iteration(dir, iter, srand, egs_dir, if os.path.exists('{0}/srand'.format(dir)): try: saved_srand = int(open('{0}/srand'.format(dir)).readline().strip()) - except (IOError, ValueError) as e: - raise Exception("Exception while reading the random seed " - "for training: {0}".format(e.str())) + except (IOError, ValueError): + logger.error("Exception while reading the random seed " + "for training") + raise if srand != saved_srand: logger.warning("The random seed provided to this iteration " "(srand={0}) is different from the one saved last " @@ -302,6 +305,17 @@ def train_one_iteration(dir, iter, srand, egs_dir, cur_num_chunk_per_minibatch = num_chunk_per_minibatch / 2 cur_max_param_change = float(max_param_change) / math.sqrt(2) + raw_model_string = '{0} {1}'.format(raw_model_string, dropout_edit_string) + + shrink_info_str = '' + if shrinkage_value != 1.0: + shrink_info_str = ' and shrink value is {0}'.format(shrinkage_value) + + logger.info("On iteration {0}, learning rate is {1}" + "{shrink_info}.".format( + iter, learning_rate, + shrink_info=shrink_info_str)) + train_new_models(dir=dir, iter=iter, srand=srand, num_jobs=num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, @@ -521,7 +535,7 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch, models_to_combine.add(num_iters) - for iter in models_to_combine: + for iter in sorted(models_to_combine): model_file = '{0}/{1}.mdl'.format(dir, iter) if os.path.exists(model_file): raw_model_strings.append( diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index b4b7c56b8d9..c6ced36f127 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -17,12 +17,14 @@ import shutil import libs.common as common_lib +import libs.nnet3.train.dropout_schedule as dropout_schedule +from dropout_schedule import * logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) -class RunOpts: +class RunOpts(object): """A structure to store run options. Run options like queue.pl and run.pl, along with their memory @@ -532,6 +534,31 @@ def __init__(self): Note: we implemented it in such a way that it doesn't increase the effective learning rate.""") + self.parser.add_argument("--trainer.dropout-schedule", type=str, + action=common_lib.NullstrToNoneAction, + dest='dropout_schedule', default=None, + help="""Use this to specify the dropout + schedule. You specify a piecewise linear + function on the domain [0,1], where 0 is the + start and 1 is the end of training; the + function-argument (x) rises linearly with the + amount of data you have seen, not iteration + number (this improves invariance to + num-jobs-{initial-final}). E.g. '0,0.2,0' + means 0 at the start; 0.2 after seeing half + the data; and 0 at the end. You may specify + the x-value of selected points, e.g. + '0,0.2@0.25,0' means that the 0.2 + dropout-proportion is reached a quarter of the + way through the data. The start/end x-values + are at x=0/x=1, and other unspecified x-values + are interpolated between known x-values. You + may specify different rules for different + component-name patterns using 'pattern1=func1 + pattern2=func2', e.g. 'relu*=0,0.1,0 + lstm*=0,0.2,0'. More general should precede + less general patterns, as they are applied + sequentially.""") # General options self.parser.add_argument("--stage", type=int, default=-4, diff --git a/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py b/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py new file mode 100644 index 00000000000..d9cf3112e4a --- /dev/null +++ b/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py @@ -0,0 +1,309 @@ +#! /usr/bin/env python + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +"""This module contains methods related to scheduling dropout. +See _self_test() for examples of how the functions work. +""" + +import logging + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +def _parse_dropout_option(dropout_option): + """Parses the string option to --trainer.dropout-schedule and + returns a list of dropout schedules for different component name patterns. + Calls _parse_dropout_string() function for each component name pattern + in the option. + + Arguments: + dropout_option: The string option passed to --trainer.dropout-schedule. + See its help for details. + See _self_test() for examples. + num_archive_to_process: See _parse_dropout_string() for details. + + Returns a list of (component_name, dropout_schedule) tuples, + where dropout_schedule is itself a list of + (data_fraction, dropout_proportion) tuples sorted in reverse order of + data_fraction. + A data fraction of 0 corresponds to beginning of training + and 1 corresponds to all data. + """ + components = dropout_option.strip().split(' ') + dropout_schedule = [] + for component in components: + parts = component.split('=') + + if len(parts) == 2: + component_name = parts[0] + this_dropout_str = parts[1] + elif len(parts) == 1: + component_name = '*' + this_dropout_str = parts[0] + else: + raise Exception("The dropout schedule must be specified in the " + "format 'pattern1=func1 patter2=func2' where " + "the pattern can be omitted for a global function " + "for all components.\n" + "Got {0} in {1}".format(component, dropout_option)) + + this_dropout_values = _parse_dropout_string(this_dropout_str) + dropout_schedule.append((component_name, this_dropout_values)) + + logger.info("Dropout schedules for component names is as follows:") + logger.info(": [(num_archives_processed), " + "(dropout_proportion) ...]") + for name, schedule in dropout_schedule: + logger.info("{0}: {1}".format(name, schedule)) + + return dropout_schedule + + +def _parse_dropout_string(dropout_str): + """Parses the dropout schedule from the string corresponding to a + single component in --trainer.dropout-schedule. + This is a module-internal function called by parse_dropout_function(). + + Arguments: + dropout_str: Specifies dropout schedule for a particular component + name pattern. + See help for the option --trainer.dropout-schedule. + + Returns a list of (data_fraction_processed, dropout_proportion) tuples + sorted in descending order of num_archives_processed. + A data fraction of 1 corresponds to all data. + """ + dropout_values = [] + parts = dropout_str.strip().split(',') + + try: + if len(parts) < 2: + raise Exception("dropout proportion string must specify " + "at least the start and end dropouts") + + # Starting dropout proportion + dropout_values.append((0, float(parts[0]))) + for i in range(1, len(parts) - 1): + value_x_pair = parts[i].split('@') + if len(value_x_pair) == 1: + # Dropout proportion at half of training + dropout_proportion = float(value_x_pair[0]) + data_fraction = 0.5 + else: + assert len(value_x_pair) == 2 + + dropout_proportion = float(value_x_pair[0]) + data_fraction = float(value_x_pair[1]) + + if (data_fraction < dropout_values[-1][0] + or data_fraction > 1.0): + logger.error( + "Failed while parsing value %s in dropout-schedule. " + "dropout-schedule must be in incresing " + "order of data fractions.", value_x_pair) + raise ValueError + + dropout_values.append((data_fraction, float(dropout_proportion))) + + dropout_values.append((1.0, float(parts[-1]))) + except Exception: + logger.error("Unable to parse dropout proportion string %s. " + "See help for option " + "--trainer.dropout-schedule.", dropout_str) + raise + + # reverse sort so that its easy to retrieve the dropout proportion + # for a particular data fraction + dropout_values.reverse() + for data_fraction, proportion in dropout_values: + assert data_fraction <= 1.0 and data_fraction >= 0.0 + assert proportion <= 1.0 and proportion >= 0.0 + + return dropout_values + + +def _get_component_dropout(dropout_schedule, data_fraction): + """Retrieve dropout proportion from schedule when data_fraction + proportion of data is seen. This value is obtained by using a + piecewise linear function on the dropout schedule. + This is a module-internal function called by _get_dropout_proportions(). + + See help for --trainer.dropout-schedule for how the dropout value + is obtained from the options. + + Arguments: + dropout_schedule: A list of (data_fraction, dropout_proportion) values + sorted in descending order of data_fraction. + data_fraction: The fraction of data seen until this stage of + training. + """ + if data_fraction == 0: + # Dropout at start of the iteration is in the last index of + # dropout_schedule + assert dropout_schedule[-1][0] == 0 + return dropout_schedule[-1][1] + try: + # Find lower bound of the data_fraction. This is the + # lower end of the piecewise linear function. + (dropout_schedule_index, initial_data_fraction, + initial_dropout) = next((i, tup[0], tup[1]) + for i, tup in enumerate(dropout_schedule) + if tup[0] <= data_fraction) + except StopIteration: + raise RuntimeError( + "Could not find data_fraction in dropout schedule " + "corresponding to data_fraction {0}.\n" + "Maybe something wrong with the parsed " + "dropout schedule {1}.".format(data_fraction, dropout_schedule)) + + if dropout_schedule_index == 0: + assert dropout_schedule[0][0] == 1 and data_fraction == 1 + return dropout_schedule[0][1] + + # The upper bound of data_fraction is at the index before the + # lower bound. + final_data_fraction, final_dropout = dropout_schedule[ + dropout_schedule_index - 1] + + if final_data_fraction == initial_data_fraction: + assert data_fraction == initial_data_fraction + return initial_dropout + + assert (data_fraction >= initial_data_fraction + and data_fraction < final_data_fraction) + + return ((data_fraction - initial_data_fraction) + * (final_dropout - initial_dropout) + / (final_data_fraction - initial_data_fraction) + + initial_dropout) + + +def _get_dropout_proportions(dropout_schedule, data_fraction): + """Returns dropout proportions based on the dropout_schedule for the + fraction of data seen at this stage of training. + Returns None if dropout_schedule is None. + + Calls _get_component_dropout() for the different component name patterns + in dropout_schedule. + + Arguments: + dropout_schedule: Value for the --trainer.dropout-schedule option. + See help for --trainer.dropout-schedule. + See _self_test() for examples. + data_fraction: The fraction of data seen until this stage of + training. + """ + if dropout_schedule is None: + return None + dropout_schedule = _parse_dropout_option(dropout_schedule) + dropout_proportions = [] + for component_name, component_dropout_schedule in dropout_schedule: + dropout_proportions.append( + (component_name, _get_component_dropout( + component_dropout_schedule, data_fraction))) + return dropout_proportions + + +def get_dropout_edit_string(dropout_schedule, data_fraction, iter_): + """Return an nnet3-copy --edits line to modify raw_model_string to + set dropout proportions according to dropout_proportions. + + Arguments: + dropout_schedule: Value for the --trainer.dropout-schedule option. + See help for --trainer.dropout-schedule. + See _self_test() for examples. + + See ReadEditConfig() in nnet3/nnet-utils.h to see how + set-dropout-proportion directive works. + """ + + if dropout_schedule is None: + return "" + + dropout_proportions = _get_dropout_proportions( + dropout_schedule, data_fraction) + + edit_config_lines = [] + dropout_info = [] + + for component_name, dropout_proportion in dropout_proportions: + edit_config_lines.append( + "set-dropout-proportion name={0} proportion={1}".format( + component_name, dropout_proportion)) + dropout_info.append("pattern/dropout-proportion={0}/{1}".format( + component_name, dropout_proportion)) + + logger.info("On iteration %d, %s", iter_, ', '.join(dropout_info)) + return ("""nnet3-copy --edits='{edits}' - - |""".format( + edits=";".join(edit_config_lines))) + + +def _self_test(): + """Run self-test. + This method is called if the module is run as a standalone script. + """ + + def assert_approx_equal(list1, list2): + """Checks that the two dropout proportions lists are equal.""" + assert len(list1) == len(list2) + for i in range(0, len(list1)): + assert len(list1[i]) == 2 + assert len(list2[i]) == 2 + assert list1[i][0] == list2[i][0] + assert abs(list1[i][1] - list2[i][1]) < 1e-8 + + assert (_parse_dropout_option('*=0.0,0.5,0.0 lstm.*=0.0,0.3@0.75,0.0') + == [ ('*', [ (1.0, 0.0), (0.5, 0.5), (0.0, 0.0) ]), + ('lstm.*', [ (1.0, 0.0), (0.75, 0.3), (0.0, 0.0) ]) ]) + assert_approx_equal(_get_dropout_proportions( + '*=0.0,0.5,0.0 lstm.*=0.0,0.3@0.75,0.0', 0.75), + [ ('*', 0.25), ('lstm.*', 0.3) ]) + assert_approx_equal(_get_dropout_proportions( + '*=0.0,0.5,0.0 lstm.*=0.0,0.3@0.75,0.0', 0.5), + [ ('*', 0.5), ('lstm.*', 0.2) ]) + assert_approx_equal(_get_dropout_proportions( + '*=0.0,0.5,0.0 lstm.*=0.0,0.3@0.75,0.0', 0.25), + [ ('*', 0.25), ('lstm.*', 0.1) ]) + + assert (_parse_dropout_option('0.0,0.3,0.0') + == [ ('*', [ (1.0, 0.0), (0.5, 0.3), (0.0, 0.0) ]) ]) + assert_approx_equal(_get_dropout_proportions('0.0,0.3,0.0', 0.5), + [ ('*', 0.3) ]) + assert_approx_equal(_get_dropout_proportions('0.0,0.3,0.0', 0.0), + [ ('*', 0.0) ]) + assert_approx_equal(_get_dropout_proportions('0.0,0.3,0.0', 1.0), + [ ('*', 0.0) ]) + assert_approx_equal(_get_dropout_proportions('0.0,0.3,0.0', 0.25), + [ ('*', 0.15) ]) + + assert (_parse_dropout_option('0.0,0.5@0.25,0.0,0.6@0.75,0.0') + == [ ('*', [ (1.0, 0.0), (0.75, 0.6), (0.5, 0.0), (0.25, 0.5), (0.0, 0.0) ]) ]) + assert_approx_equal(_get_dropout_proportions( + '0.0,0.5@0.25,0.0,0.6@0.75,0.0', 0.25), + [ ('*', 0.5) ]) + assert_approx_equal(_get_dropout_proportions( + '0.0,0.5@0.25,0.0,0.6@0.75,0.0', 0.1), + [ ('*', 0.2) ]) + + assert (_parse_dropout_option('lstm.*=0.0,0.3,0.0@0.75,1.0') + == [ ('lstm.*', [ (1.0, 1.0), (0.75, 0.0), (0.5, 0.3), (0.0, 0.0) ]) ]) + assert_approx_equal(_get_dropout_proportions( + 'lstm.*=0.0,0.3,0.0@0.75,1.0', 0.25), + [ ('lstm.*', 0.15) ]) + assert_approx_equal(_get_dropout_proportions( + 'lstm.*=0.0,0.3,0.0@0.75,1.0', 0.5), + [ ('lstm.*', 0.3) ]) + assert_approx_equal(_get_dropout_proportions( + 'lstm.*=0.0,0.3,0.0@0.75,1.0', 0.9), + [ ('lstm.*', 0.6) ]) + + +if __name__ == '__main__': + try: + _self_test() + except Exception: + logger.error("Failed self test") + raise diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index a12f8fb3944..a888a6d7613 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -141,7 +141,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, run_opts, cv_minibatch_size=256, frames_per_eg=-1, min_deriv_time=None, max_deriv_time=None, - shrinkage_value=1.0, + shrinkage_value=1.0, dropout_edit_string="", get_raw_nnet_from_am=True, background_process_handler=None): """ Called from steps/nnet3/train_*.py scripts for one iteration of neural @@ -249,6 +249,8 @@ def train_one_iteration(dir, iter, srand, egs_dir, "{dir}/{iter}.raw - |".format( lr=learning_rate, dir=dir, iter=iter)) + raw_model_string = '{0} {1}'.format(raw_model_string, dropout_edit_string) + if do_average: cur_minibatch_size = minibatch_size cur_max_param_change = max_param_change @@ -266,6 +268,15 @@ def train_one_iteration(dir, iter, srand, egs_dir, except OSError: pass + shrink_info_str = '' + if shrinkage_value != 1.0: + shrink_info_str = ' and shrink value is {0}'.format(shrinkage_value) + + logger.info("On iteration {0}, learning rate is {1}" + "{shrink_info}.".format( + iter, learning_rate, + shrink_info=shrink_info_str)) + train_new_models(dir=dir, iter=iter, srand=srand, num_jobs=num_jobs, num_archives_processed=num_archives_processed, num_archives=num_archives, @@ -468,7 +479,7 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir, models_to_combine.add(num_iters) - for iter in models_to_combine: + for iter in sorted(models_to_combine): if get_raw_nnet_from_am: model_file = '{0}/{1}.mdl'.format(dir, iter) if not os.path.exists(model_file): diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py index cbd31ccea64..7c5f262a7f5 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py @@ -247,7 +247,8 @@ def set_default_configs(self): 'ng-affine-options' : ' max-change=0.75 ', 'self-repair-scale-nonlinearity' : 0.00001, 'zeroing-interval' : 20, - 'zeroing-threshold' : 15.0 + 'zeroing-threshold' : 15.0, + 'dropout-proportion' : -1.0 # -1.0 stands for no dropout will be added } def set_derived_configs(self): @@ -279,6 +280,12 @@ def check_configs(self): .format(self.layer_type, key, self.config[key])) + if ((self.config['dropout-proportion'] > 1.0 or + self.config['dropout-proportion'] < 0.0) and + self.config['dropout-proportion'] != -1.0 ): + raise RuntimeError("dropout-proportion has invalid value {0}." + "".format(self.config['dropout-proportion'])) + def auxiliary_outputs(self): return ['c_t'] @@ -338,6 +345,8 @@ def generate_lstm_config(self): abs(delay))) affine_str = self.config['ng-affine-options'] pes_str = self.config['ng-per-element-scale-options'] + lstm_dropout_value = self.config['dropout-proportion'] + lstm_dropout_str = 'dropout-proportion='+str(self.config['dropout-proportion']) # Natural gradient per element scale parameters # TODO: decide if we want to keep exposing these options @@ -417,13 +426,21 @@ def generate_lstm_config(self): # add the recurrent connections configs.append("# projection matrices : Wrm and Wpm") + if lstm_dropout_value != -1.0: + configs.append("component name={0}.W_rp.m.dropout type=DropoutComponent dim={1} {2}".format(name, cell_dim, lstm_dropout_str)) configs.append("component name={0}.W_rp.m type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, cell_dim, rec_proj_dim + nonrec_proj_dim, affine_str)) configs.append("component name={0}.r type=BackpropTruncationComponent dim={1} {2}".format(name, rec_proj_dim, bptrunc_str)) configs.append("# r_t and p_t : rp_t will be the output") - configs.append("component-node name={0}.rp_t component={0}.W_rp.m input={0}.m_t".format(name)) - configs.append("dim-range-node name={0}.r_t_preclip input-node={0}.rp_t dim-offset=0 dim={1}".format(name, rec_proj_dim)) - configs.append("component-node name={0}.r_t component={0}.r input={0}.r_t_preclip".format(name)) + if lstm_dropout_value != -1.0: + configs.append("component-node name={0}.rp_t.dropout component={0}.W_rp.m.dropout input={0}.m_t".format(name)) + configs.append("component-node name={0}.rp_t component={0}.W_rp.m input={0}.rp_t.dropout".format(name)) + configs.append("dim-range-node name={0}.r_t_preclip input-node={0}.rp_t dim-offset=0 dim={1}".format(name, rec_proj_dim)) + configs.append("component-node name={0}.r_t component={0}.r input={0}.r_t_preclip".format(name)) + else: + configs.append("component-node name={0}.rp_t component={0}.W_rp.m input={0}.m_t".format(name)) + configs.append("dim-range-node name={0}.r_t_preclip input-node={0}.rp_t dim-offset=0 dim={1}".format(name, rec_proj_dim)) + configs.append("component-node name={0}.r_t component={0}.r input={0}.r_t_preclip".format(name)) return configs @@ -744,6 +761,7 @@ def set_default_configs(self): 'ng-affine-options' : ' max-change=1.5', 'zeroing-interval' : 20, 'zeroing-threshold' : 15.0 + } def set_derived_configs(self): diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index 0254589be85..743a854e160 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -22,11 +22,11 @@ import libs.nnet3.report.log_parse as nnet3_log_parse -logger = logging.getLogger(__name__) +logger = logging.getLogger('libs') logger.setLevel(logging.INFO) handler = logging.StreamHandler() handler.setLevel(logging.INFO) -formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " +formatter = logging.Formatter("%(asctime)s [%(pathname)s:%(lineno)s - " "%(funcName)s - %(levelname)s ] %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) @@ -134,13 +134,15 @@ def get_args(): shrink-threshold at the non-linearities. E.g. 0.99. Only applicable when the neural net contains sigmoid or tanh units.""") - parser.add_argument("--trainer.optimization.shrink-saturation-threshold", type=float, + parser.add_argument("--trainer.optimization.shrink-saturation-threshold", + type=float, dest='shrink_saturation_threshold', default=0.40, - help="""Threshold that controls when we apply the 'shrinkage' - (i.e. scaling by shrink-value). If the saturation of the - sigmoid and tanh nonlinearities in the neural net (as - measured by steps/nnet3/get_saturation.pl) exceeds this - threshold we scale the parameter matrices with the + help="""Threshold that controls when we apply the + 'shrinkage' (i.e. scaling by shrink-value). If the + saturation of the sigmoid and tanh nonlinearities in + the neural net (as measured by + steps/nnet3/get_saturation.pl) exceeds this threshold + we scale the parameter matrices with the shrink-value.""") # RNN-specific training options parser.add_argument("--trainer.deriv-truncate-margin", type=int, @@ -420,11 +422,6 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): args.shrink_saturation_threshold) else 1 ) - logger.info("On iteration {0}, learning rate is {1} and " - "shrink value is {2}.".format( - iter, learning_rate(iter, current_num_jobs, - num_archives_processed), - shrinkage_value)) chain_lib.train_one_iteration( dir=args.dir, @@ -436,6 +433,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), + dropout_edit_string=common_lib.get_dropout_edit_string( + args.dropout_schedule, + float(num_archives_processed) / num_archives_to_process, + iter), shrinkage_value=shrinkage_value, num_chunk_per_minibatch=args.num_chunk_per_minibatch, num_hidden_layers=num_hidden_layers, diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index 83170ea1e8e..8ab3959885a 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -26,7 +26,7 @@ logger.setLevel(logging.INFO) handler = logging.StreamHandler() handler.setLevel(logging.INFO) -formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " +formatter = logging.Formatter("%(asctime)s [%(pathname)s:%(lineno)s - " "%(funcName)s - %(levelname)s ] %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) @@ -298,10 +298,6 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): * float(iter) / num_iters) if args.stage <= iter: - logger.info("On iteration {0}, learning rate is {1}.".format( - iter, learning_rate(iter, current_num_jobs, - num_archives_processed))) - train_lib.common.train_one_iteration( dir=args.dir, iter=iter, @@ -312,6 +308,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), + dropout_edit_string=common_lib.get_dropout_edit_string( + args.dropout_schedule, + float(num_archives_processed) / num_archives_to_process, + iter), minibatch_size=args.minibatch_size, frames_per_eg=args.frames_per_eg, num_hidden_layers=num_hidden_layers, diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index b8fe4a25384..3f946d13de8 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -26,7 +26,7 @@ logger.setLevel(logging.INFO) handler = logging.StreamHandler() handler.setLevel(logging.INFO) -formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " +formatter = logging.Formatter("%(asctime)s [%(pathname)s:%(lineno)s - " "%(funcName)s - %(levelname)s ] %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) @@ -301,10 +301,6 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): * float(iter) / num_iters) if args.stage <= iter: - logger.info("On iteration {0}, learning rate is {1}.".format( - iter, learning_rate(iter, current_num_jobs, - num_archives_processed))) - train_lib.common.train_one_iteration( dir=args.dir, iter=iter, @@ -315,6 +311,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), + dropout_edit_string=common_lib.get_dropout_edit_string( + args.dropout_schedule, + float(num_archives_processed) / num_archives_to_process, + iter), minibatch_size=args.minibatch_size, frames_per_eg=args.frames_per_eg, num_hidden_layers=num_hidden_layers, diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index 8366eccc993..cf7ae3f2b45 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -26,7 +26,7 @@ logger.setLevel(logging.INFO) handler = logging.StreamHandler() handler.setLevel(logging.INFO) -formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " +formatter = logging.Formatter("%(asctime)s [%(pathname)s:%(lineno)s - " "%(funcName)s - %(levelname)s ] %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) @@ -101,13 +101,15 @@ def get_args(): shrink-threshold at the non-linearities. E.g. 0.99. Only applicable when the neural net contains sigmoid or tanh units.""") - parser.add_argument("--trainer.optimization.shrink-saturation-threshold", type=float, + parser.add_argument("--trainer.optimization.shrink-saturation-threshold", + type=float, dest='shrink_saturation_threshold', default=0.40, - help="""Threshold that controls when we apply the 'shrinkage' - (i.e. scaling by shrink-value). If the saturation of the - sigmoid and tanh nonlinearities in the neural net (as - measured by steps/nnet3/get_saturation.pl) exceeds this - threshold we scale the parameter matrices with the + help="""Threshold that controls when we apply the + 'shrinkage' (i.e. scaling by shrink-value). If the + saturation of the sigmoid and tanh nonlinearities in + the neural net (as measured by + steps/nnet3/get_saturation.pl) exceeds this threshold + we scale the parameter matrices with the shrink-value.""") parser.add_argument("--trainer.optimization.cv-minibatch-size", type=int, dest='cv_minibatch_size', default=256, @@ -398,11 +400,6 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): get_raw_nnet_from_am=False) else 1 ) - logger.info("On iteration {0}, learning rate is {1} and " - "shrink value is {2}.".format( - iter, learning_rate(iter, current_num_jobs, - num_archives_processed), - shrinkage_value)) train_lib.common.train_one_iteration( dir=args.dir, @@ -414,6 +411,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), + dropout_edit_string=common_lib.get_dropout_edit_string( + args.dropout_schedule, + float(num_archives_processed) / num_archives_to_process, + iter), shrinkage_value=shrinkage_value, minibatch_size=args.num_chunk_per_minibatch, num_hidden_layers=num_hidden_layers, diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index 482c9a8ee03..35745bce7b2 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -26,7 +26,7 @@ logger.setLevel(logging.INFO) handler = logging.StreamHandler() handler.setLevel(logging.INFO) -formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " +formatter = logging.Formatter("%(asctime)s [%(pathname)s:%(lineno)s - " "%(funcName)s - %(levelname)s ] %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) @@ -100,13 +100,15 @@ def get_args(): shrink-threshold at the non-linearities. E.g. 0.99. Only applicable when the neural net contains sigmoid or tanh units.""") - parser.add_argument("--trainer.optimization.shrink-saturation-threshold", type=float, + parser.add_argument("--trainer.optimization.shrink-saturation-threshold", + type=float, dest='shrink_saturation_threshold', default=0.40, - help="""Threshold that controls when we apply the 'shrinkage' - (i.e. scaling by shrink-value). If the saturation of the - sigmoid and tanh nonlinearities in the neural net (as - measured by steps/nnet3/get_saturation.pl) exceeds this - threshold we scale the parameter matrices with the + help="""Threshold that controls when we apply the + 'shrinkage' (i.e. scaling by shrink-value). If the + saturation of the sigmoid and tanh nonlinearities in + the neural net (as measured by + steps/nnet3/get_saturation.pl) exceeds this threshold + we scale the parameter matrices with the shrink-value.""") parser.add_argument("--trainer.optimization.cv-minibatch-size", type=int, dest='cv_minibatch_size', default=256, @@ -392,11 +394,6 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): args.shrink_saturation_threshold) else 1 ) - logger.info("On iteration {0}, learning rate is {1} and " - "shrink value is {2}.".format( - iter, learning_rate(iter, current_num_jobs, - num_archives_processed), - shrinkage_value)) train_lib.common.train_one_iteration( dir=args.dir, @@ -408,6 +405,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), + dropout_edit_string=common_lib.get_dropout_edit_string( + args.dropout_schedule, + float(num_archives_processed) / num_archives_to_process, + iter), shrinkage_value=shrinkage_value, minibatch_size=args.num_chunk_per_minibatch, num_hidden_layers=num_hidden_layers, diff --git a/src/nnet3/nnet-chain-combine.cc b/src/nnet3/nnet-chain-combine.cc index 810ee2b471a..dd9b99fe26d 100644 --- a/src/nnet3/nnet-chain-combine.cc +++ b/src/nnet3/nnet-chain-combine.cc @@ -38,6 +38,7 @@ NnetChainCombiner::NnetChainCombiner(const NnetCombineConfig &combine_config, nnet_params_(std::min(num_nnets, combine_config_.max_effective_inputs), NumParameters(first_nnet)), tot_input_weighting_(nnet_params_.NumRows()) { + SetDropoutProportion(0, &nnet_); SubVector first_params(nnet_params_, 0); VectorizeNnet(nnet_, &first_params); tot_input_weighting_(0) += 1.0; diff --git a/src/nnet3/nnet-combine.cc b/src/nnet3/nnet-combine.cc index 45c1f74477b..07a96d143c2 100644 --- a/src/nnet3/nnet-combine.cc +++ b/src/nnet3/nnet-combine.cc @@ -34,6 +34,7 @@ NnetCombiner::NnetCombiner(const NnetCombineConfig &config, nnet_params_(std::min(num_nnets, config_.max_effective_inputs), NumParameters(first_nnet)), tot_input_weighting_(nnet_params_.NumRows()) { + SetDropoutProportion(0, &nnet_); SubVector first_params(nnet_params_, 0); VectorizeNnet(nnet_, &first_params); tot_input_weighting_(0) += 1.0; diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index 07d12684967..1e22575cee3 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -615,6 +615,29 @@ void ReadEditConfig(std::istream &edit_config_is, Nnet *nnet) { if (outputs_remaining == 0) KALDI_ERR << "All outputs were removed."; nnet->RemoveSomeNodes(nodes_to_remove); + } else if (directive == "set-dropout-proportion") { + std::string name_pattern = "*"; + // name_pattern defaults to '*' if none is given. This pattern + // matches names of components, not nodes. + config_line.GetValue("name", &name_pattern); + BaseFloat proportion = -1; + if (!config_line.GetValue("proportion", &proportion)) { + KALDI_ERR << "In edits-config, expected proportion to be set in line: " + << config_line.WholeLine(); + } + DropoutComponent *component = NULL; + int32 num_dropout_proportions_set = 0; + for (int32 c = 0; c < nnet->NumComponents(); c++) { + if (NameMatchesPattern(nnet->GetComponentName(c).c_str(), + name_pattern.c_str()) && + (component = + dynamic_cast(nnet->GetComponent(c)))) { + component->SetDropoutProportion(proportion); + num_dropout_proportions_set++; + } + } + KALDI_LOG << "Set dropout proportions for " + << num_dropout_proportions_set << " nodes."; } else { KALDI_ERR << "Directive '" << directive << "' is not currently " "supported (reading edit-config)."; diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index 9cbfa87a800..0b5ab3c1fd4 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -211,6 +211,9 @@ void FindOrphanNodes(const Nnet &nnet, std::vector *nodes); remove internal nodes directly; instead you should use the command 'remove-orphans'. + set-dropout-proportion [name=] proportion= + Sets the dropout rates for any components of type DropoutComponent whose + names match the given (e.g. lstm*). defaults to "*". \endverbatim */ void ReadEditConfig(std::istream &config_file, Nnet *nnet); diff --git a/src/nnet3bin/nnet3-combine.cc b/src/nnet3bin/nnet3-combine.cc index 525fe664353..5abc317f054 100644 --- a/src/nnet3bin/nnet3-combine.cc +++ b/src/nnet3bin/nnet3-combine.cc @@ -104,6 +104,7 @@ int main(int argc, char *argv[]) { } else { KALDI_LOG << "Copying the single input model directly to the output, " << "without any combination."; + SetDropoutProportion(0, &nnet); WriteKaldiObject(nnet, nnet_wxfilename, binary_write); } KALDI_LOG << "Finished combining neural nets, wrote model to " diff --git a/src/nnet3bin/nnet3-copy.cc b/src/nnet3bin/nnet3-copy.cc index e4a41933fff..1f75527d69c 100644 --- a/src/nnet3bin/nnet3-copy.cc +++ b/src/nnet3bin/nnet3-copy.cc @@ -41,8 +41,7 @@ int main(int argc, char *argv[]) { " nnet3-copy --binary=false 0.raw text.raw\n"; bool binary_write = true; - BaseFloat learning_rate = -1, - dropout = 0.0; + BaseFloat learning_rate = -1; std::string nnet_config, edits_config, edits_str; BaseFloat scale = 1.0; @@ -63,8 +62,6 @@ int main(int argc, char *argv[]) { "Can be used as an inline alternative to edits-config; semicolons " "will be converted to newlines before parsing. E.g. " "'--edits=remove-orphans'."); - po.Register("set-dropout-proportion", &dropout, "Set dropout proportion " - "in all DropoutComponent to this value."); po.Register("scale", &scale, "The parameter matrices are scaled" " by the specified value."); po.Read(argc, argv); @@ -91,9 +88,6 @@ int main(int argc, char *argv[]) { if (scale != 1.0) ScaleNnet(scale, &nnet); - if (dropout > 0) - SetDropoutProportion(dropout, &nnet); - if (!edits_config.empty()) { Input ki(edits_config); ReadEditConfig(ki.Stream(), &nnet); From 61510ca0d8d9b38096701227ee064a166816fcbe Mon Sep 17 00:00:00 2001 From: Ke Li Date: Mon, 23 Jan 2017 02:07:24 -0500 Subject: [PATCH 316/530] nnet3/report : Modified directory specification options in generate_plots.py (#1368) --- .../s5/steps/nnet3/report/generate_plots.py | 37 +++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/report/generate_plots.py b/egs/wsj/s5/steps/nnet3/report/generate_plots.py index 0a558b91ae2..6db262aed60 100755 --- a/egs/wsj/s5/steps/nnet3/report/generate_plots.py +++ b/egs/wsj/s5/steps/nnet3/report/generate_plots.py @@ -48,13 +48,16 @@ def get_args(): parser = argparse.ArgumentParser( description="""Parses the training logs and generates a variety of plots. - e.g.: steps/nnet3/report/generate_plots.py \\ + e.g. (deprecated): steps/nnet3/report/generate_plots.py \\ --comparison-dir exp/nnet3/tdnn1 --comparison-dir exp/nnet3/tdnn2 \\ - exp/nnet3/tdnn exp/nnet3/tdnn/report""") + exp/nnet3/tdnn exp/nnet3/tdnn/report + e.g. (current): steps/nnet3/report/generate_plots.py \\ + exp/nnet3/tdnn exp/nnet3/tdnn1 exp/nnet3/tdnn2 exp/nnet3/tdnn/report""") parser.add_argument("--comparison-dir", type=str, action='append', help="other experiment directories for comparison. " - "These will only be used for plots, not tables") + "These will only be used for plots, not tables" + "Note: this option is deprecated.") parser.add_argument("--start-iter", type=int, help="Iteration from which plotting will start", default=1) @@ -66,16 +69,19 @@ def get_args(): help="""List of space separated : entities, one for each output node""") - parser.add_argument("exp_dir", - help="experiment directory, e.g. exp/nnet3/tdnn") + parser.add_argument("exp_dir", nargs='+', + help="the first dir is the experiment directory, " + "e.g. exp/nnet3/tdnn, the rest dirs (if exist) " + "are other experiment directories for comparison.") parser.add_argument("output_dir", help="experiment directory, " "e.g. exp/nnet3/tdnn/report") args = parser.parse_args() - if args.comparison_dir is not None and len(args.comparison_dir) > 6: + if (args.comparison_dir is not None and len(args.comparison_dir) > 6) or \ + (args.exp_dir is not None and len(args.exp_dir) > 7): raise Exception( - """max 6 --comparison-dir options can be specified. + """max 6 comparison directories can be specified. If you want to compare with more comparison_dir, you would have to carefully tune the plot_colors variable which specified colors used for plotting.""") @@ -653,10 +659,19 @@ def main(): output_nodes.append(('output', 'chain')) else: output_nodes.append(('output', 'linear')) - - generate_plots(args.exp_dir, args.output_dir, output_nodes, - comparison_dir=args.comparison_dir, - start_iter=args.start_iter) + + if args.comparison_dir is not None: + generate_plots(args.exp_dir[0], args.output_dir, output_nodes, + comparison_dir=args.comparison_dir, + start_iter=args.start_iter) + else: + if len(args.exp_dir) == 1: + generate_plots(args.exp_dir[0], args.output_dir, output_nodes, + start_iter=args.start_iter) + if len(args.exp_dir) > 1: + generate_plots(args.exp_dir[0], args.output_dir, output_nodes, + comparison_dir=args.exp_dir[1:], + start_iter=args.start_iter) if __name__ == "__main__": From 1582c77340588a53ee534b27973c4eb1c780d03d Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 22 Jan 2017 23:30:09 -0500 Subject: [PATCH 317/530] [src] nnet3: Extending nnet3-combine to support soft enforcement of sum-to-one. --- src/nnet3/nnet-combine.cc | 176 +++++++++++++++++++++++++++----------- src/nnet3/nnet-combine.h | 61 +++++++++---- 2 files changed, 166 insertions(+), 71 deletions(-) diff --git a/src/nnet3/nnet-combine.cc b/src/nnet3/nnet-combine.cc index 45c1f74477b..d50b5adc072 100644 --- a/src/nnet3/nnet-combine.cc +++ b/src/nnet3/nnet-combine.cc @@ -34,6 +34,13 @@ NnetCombiner::NnetCombiner(const NnetCombineConfig &config, nnet_params_(std::min(num_nnets, config_.max_effective_inputs), NumParameters(first_nnet)), tot_input_weighting_(nnet_params_.NumRows()) { + + if (config_.sum_to_one_penalty != 0.0 && + config_.enforce_sum_to_one) { + KALDI_WARN << "--sum-to-one-penalty=" << config_.sum_to_one_penalty + << " is nonzero, so setting --enforce-sum-to-one=false."; + config_.enforce_sum_to_one = false; + } SubVector first_params(nnet_params_, 0); VectorizeNnet(nnet_, &first_params); tot_input_weighting_(0) += 1.0; @@ -42,7 +49,6 @@ NnetCombiner::NnetCombiner(const NnetCombineConfig &config, NnetComputeProbOptions compute_prob_opts; compute_prob_opts.compute_deriv = true; prob_computer_ = new NnetComputeProb(compute_prob_opts, nnet_); - } void NnetCombiner::ComputeUpdatableComponentDims(){ @@ -129,12 +135,12 @@ void NnetCombiner::Combine() { // itself, so this is BFGS. lbfgs_options.first_step_impr = config_.initial_impr; - Vector params(dim), deriv(dim); - BaseFloat objf, initial_objf; + Vector params(dim), deriv(dim); + double objf, initial_objf; GetInitialParameters(¶ms); - OptimizeLbfgs lbfgs(params, lbfgs_options); + OptimizeLbfgs lbfgs(params, lbfgs_options); for (int32 i = 0; i < config_.num_iters; i++) { params.CopyFromVec(lbfgs.GetProposedValue()); @@ -145,12 +151,25 @@ void NnetCombiner::Combine() { lbfgs.DoStep(objf, deriv); } - KALDI_LOG << "Combining nnets, objective function changed from " - << initial_objf << " to " << objf; + if (!config_.sum_to_one_penalty) { + KALDI_LOG << "Combining nnets, objective function changed from " + << initial_objf << " to " << objf; + } else { + Vector weights(WeightDim()); + GetWeights(params, &weights); + bool print_weights = true; + double penalty = GetSumToOnePenalty(weights, NULL, print_weights); + // note: initial_objf has no penalty term because it summed exactly + // to one. + KALDI_LOG << "Combining nnets, objective function changed from " + << initial_objf << " to " << objf << " = " + << (objf - penalty) << " + " << penalty; + } + // must recompute nnet_ if "params" is not exactly equal to the // final params that LB - Vector final_params(dim); + Vector final_params(dim); final_params.CopyFromVec(lbfgs.GetValue(&objf)); if (!params.ApproxEqual(final_params, 0.0)) { // the following call makes sure that nnet_ corresponds to the parameters @@ -161,9 +180,9 @@ void NnetCombiner::Combine() { } -void NnetCombiner::PrintParams(const VectorBase ¶ms) const { +void NnetCombiner::PrintParams(const VectorBase ¶ms) const { - Vector weights(params.Dim()), normalized_weights(params.Dim()); + Vector weights(params.Dim()), normalized_weights(params.Dim()); GetWeights(params, &weights); GetNormalizedWeights(weights, &normalized_weights); int32 num_models = nnet_params_.NumRows(), @@ -213,21 +232,21 @@ void NnetCombiner::PrintParams(const VectorBase ¶ms) const { bool NnetCombiner::SelfTestDerivatives() { int32 num_tests = 2; // more properly, this is the number of dimensions in a // single test. - BaseFloat delta = 0.001; + double delta = 0.001; int32 dim = ParameterDim(); - Vector params(dim), deriv(dim); - Vector predicted_changes(num_tests), + Vector params(dim), deriv(dim); + Vector predicted_changes(num_tests), observed_changes(num_tests); GetInitialParameters(¶ms); - BaseFloat initial_objf = ComputeObjfAndDerivFromParameters(params, + double initial_objf = ComputeObjfAndDerivFromParameters(params, &deriv); for (int32 i = 0; i < num_tests; i++) { - Vector new_deriv(dim), offset(dim), new_params(params); + Vector new_deriv(dim), offset(dim), new_params(params); offset.SetRandn(); new_params.AddVec(delta, offset); - BaseFloat new_objf = ComputeObjfAndDerivFromParameters(new_params, + double new_objf = ComputeObjfAndDerivFromParameters(new_params, &new_deriv); // for predicted changes, interpolate old and new derivs. predicted_changes(i) = @@ -235,7 +254,7 @@ bool NnetCombiner::SelfTestDerivatives() { 0.5 * VecVec(new_params, new_deriv) - 0.5 * VecVec(params, new_deriv); observed_changes(i) = new_objf - initial_objf; } - BaseFloat threshold = 0.1; + double threshold = 0.1; KALDI_LOG << "predicted_changes = " << predicted_changes; KALDI_LOG << "observed_changes = " << observed_changes; if (!ApproxEqual(predicted_changes, observed_changes, threshold)) { @@ -252,23 +271,23 @@ void NnetCombiner::SelfTestModelDerivatives() { // single test. int32 dim = ParameterDim(); - Vector params(dim), deriv(dim); - Vector predicted_changes(num_tests), + Vector params(dim), deriv(dim); + Vector predicted_changes(num_tests), observed_changes(num_tests); GetInitialParameters(¶ms); - Vector weights(WeightDim()), normalized_weights(WeightDim()), - nnet_params(NnetParameterDim(), kUndefined), + Vector weights(WeightDim()), normalized_weights(WeightDim()); + Vector nnet_params(NnetParameterDim(), kUndefined), nnet_deriv(NnetParameterDim(), kUndefined); GetWeights(params, &weights); GetNormalizedWeights(weights, &normalized_weights); GetNnetParameters(normalized_weights, &nnet_params); - BaseFloat initial_objf = ComputeObjfAndDerivFromNnet(nnet_params, + double initial_objf = ComputeObjfAndDerivFromNnet(nnet_params, &nnet_deriv); - BaseFloat delta = 0.002 * std::sqrt(VecVec(nnet_params, nnet_params) / - NnetParameterDim()); + double delta = 0.002 * std::sqrt(VecVec(nnet_params, nnet_params) / + NnetParameterDim()); for (int32 i = 0; i < num_tests; i++) { @@ -276,7 +295,7 @@ void NnetCombiner::SelfTestModelDerivatives() { offset(NnetParameterDim()), new_nnet_params(nnet_params); offset.SetRandn(); new_nnet_params.AddVec(delta, offset); - BaseFloat new_objf = ComputeObjfAndDerivFromNnet(new_nnet_params, + double new_objf = ComputeObjfAndDerivFromNnet(new_nnet_params, &new_nnet_deriv); // for predicted changes, interpolate old and new derivs. predicted_changes(i) = @@ -286,7 +305,7 @@ void NnetCombiner::SelfTestModelDerivatives() { 0.5 * VecVec(nnet_params, new_nnet_deriv); observed_changes(i) = new_objf - initial_objf; } - BaseFloat threshold = 0.1; + double threshold = 0.1; KALDI_LOG << "model-derivatives: predicted_changes = " << predicted_changes; KALDI_LOG << "model-derivatives: observed_changes = " << observed_changes; if (!ApproxEqual(predicted_changes, observed_changes, threshold)) @@ -304,7 +323,7 @@ int32 NnetCombiner::ParameterDim() const { } -void NnetCombiner::GetInitialParameters(VectorBase *params) const { +void NnetCombiner::GetInitialParameters(VectorBase *params) const { KALDI_ASSERT(params->Dim() == ParameterDim()); params->Set(1.0 / nnet_params_.NumRows()); if (config_.enforce_positive_weights) { @@ -314,8 +333,8 @@ void NnetCombiner::GetInitialParameters(VectorBase *params) const { } } -void NnetCombiner::GetWeights(const VectorBase ¶ms, - VectorBase *weights) const { +void NnetCombiner::GetWeights(const VectorBase ¶ms, + VectorBase *weights) const { KALDI_ASSERT(weights->Dim() == WeightDim()); if (config_.separate_weights_per_component) { weights->CopyFromVec(params); @@ -335,12 +354,12 @@ void NnetCombiner::GetWeights(const VectorBase ¶ms, } -void NnetCombiner::GetParamsDeriv(const VectorBase &weights, - const VectorBase &weights_deriv, - VectorBase *param_deriv) { +void NnetCombiner::GetParamsDeriv(const VectorBase &weights, + const VectorBase &weights_deriv, + VectorBase *param_deriv) { KALDI_ASSERT(weights.Dim() == WeightDim() && param_deriv->Dim() == ParameterDim()); - Vector preexp_weights_deriv(weights_deriv); + Vector preexp_weights_deriv(weights_deriv); if (config_.enforce_positive_weights) { // to enforce positive weights we first compute weights (call these // preexp_weights) and then take exponential. Note, d/dx exp(x) = exp(x). @@ -360,7 +379,55 @@ void NnetCombiner::GetParamsDeriv(const VectorBase &weights, } -void NnetCombiner::GetNnetParameters(const Vector &weights, +double NnetCombiner::GetSumToOnePenalty( + const VectorBase &weights, + VectorBase *weights_penalty_deriv, + bool print_weights) const { + + KALDI_ASSERT(config_.sum_to_one_penalty >= 0.0); + double penalty = config_.sum_to_one_penalty; + if (penalty == 0.0) { + weights_penalty_deriv->SetZero(); + return 0.0; + } + double ans = 0.0; + int32 num_uc = NumUpdatableComponents(), + num_models = nnet_params_.NumRows(); + Vector tot_weights(num_uc); + std::ostringstream tot_weight_info; + for (int32 c = 0; c < num_uc; c++) { + double this_total_weight = 0.0; + for (int32 m = 0; m < num_models; m++) { + int32 index = m * num_uc + c; + double this_weight = weights(index); + this_total_weight += this_weight; + } + tot_weights(c) = this_total_weight; + ans += -0.5 * penalty * + (this_total_weight - 1.0) * (this_total_weight - 1.0); + if (weights_penalty_deriv != NULL) { + KALDI_ASSERT(weights.Dim() == weights_penalty_deriv->Dim()); + // this_total_weight_deriv is the derivative of the penalty + // term w.r.t. this component's total weight. + double this_total_weight_deriv = + penalty * (1.0 - this_total_weight); + for (int32 m = 0; m < num_models; m++) { + int32 index = m * num_uc + c; + (*weights_penalty_deriv)(index) = this_total_weight_deriv; + } + } + } + if (print_weights) { + Vector tot_weights_float(tot_weights); + KALDI_LOG << "Total weights per component: " + << PrintVectorPerUpdatableComponent(nnet_, + tot_weights_float); + } + return ans; +} + + +void NnetCombiner::GetNnetParameters(const Vector &weights, VectorBase *nnet_params) const { KALDI_ASSERT(nnet_params->Dim() == nnet_params_.NumCols()); nnet_params->SetZero(); @@ -386,7 +453,7 @@ void NnetCombiner::GetNnetParameters(const Vector &weights, // compare GetNnetParameters. void NnetCombiner::GetWeightsDeriv( const VectorBase &nnet_params_deriv, - VectorBase *weights_deriv) { + VectorBase *weights_deriv) { KALDI_ASSERT(nnet_params_deriv.Dim() == nnet_params_.NumCols() && weights_deriv->Dim() == WeightDim()); int32 num_uc = NumUpdatableComponents(), @@ -437,30 +504,35 @@ double NnetCombiner::ComputeObjfAndDerivFromNnet( double NnetCombiner::ComputeObjfAndDerivFromParameters( - VectorBase ¶ms, - VectorBase *params_deriv) { - Vector weights(WeightDim()), normalized_weights(WeightDim()), - nnet_params(NnetParameterDim(), kUndefined), - nnet_params_deriv(NnetParameterDim(), kUndefined), + VectorBase ¶ms, + VectorBase *params_deriv) { + Vector weights(WeightDim()), normalized_weights(WeightDim()), + weights_sum_to_one_penalty_deriv(WeightDim()), normalized_weights_deriv(WeightDim()), weights_deriv(WeightDim()); + Vector + nnet_params(NnetParameterDim(), kUndefined), + nnet_params_deriv(NnetParameterDim(), kUndefined); GetWeights(params, &weights); + double ans = GetSumToOnePenalty(weights, &weights_sum_to_one_penalty_deriv); GetNormalizedWeights(weights, &normalized_weights); GetNnetParameters(normalized_weights, &nnet_params); - double ans = ComputeObjfAndDerivFromNnet(nnet_params, &nnet_params_deriv); + ans += ComputeObjfAndDerivFromNnet(nnet_params, &nnet_params_deriv); if (ans != ans || ans - ans != 0) // NaN or inf return ans; // No point computing derivative GetWeightsDeriv(nnet_params_deriv, &normalized_weights_deriv); GetUnnormalizedWeightsDeriv(weights, normalized_weights_deriv, &weights_deriv); + weights_deriv.AddVec(1.0, weights_sum_to_one_penalty_deriv); GetParamsDeriv(weights, weights_deriv, params_deriv); return ans; } -// enforces the constraint that the weights for each component must sum to one. +// enforces the constraint that the weights for each component must sum to one, +// if necessary. void NnetCombiner::GetNormalizedWeights( - const VectorBase &unnorm_weights, - VectorBase *norm_weights) const { + const VectorBase &unnorm_weights, + VectorBase *norm_weights) const { if (!config_.enforce_sum_to_one) { norm_weights->CopyFromVec(unnorm_weights); return; @@ -468,12 +540,12 @@ void NnetCombiner::GetNormalizedWeights( int32 num_uc = NumUpdatableComponents(), num_models = nnet_params_.NumRows(); for (int32 c = 0; c < num_uc; c++) { - BaseFloat sum = 0.0; + double sum = 0.0; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; sum += unnorm_weights(index); } - BaseFloat inv_sum = 1.0 / sum; // if it's NaN then it's OK, we'll get NaN + double inv_sum = 1.0 / sum; // if it's NaN then it's OK, we'll get NaN // weights and eventually -inf objective. for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; @@ -483,9 +555,9 @@ void NnetCombiner::GetNormalizedWeights( } void NnetCombiner::GetUnnormalizedWeightsDeriv( - const VectorBase &unnorm_weights, - const VectorBase &norm_weights_deriv, - VectorBase *unnorm_weights_deriv) { + const VectorBase &unnorm_weights, + const VectorBase &norm_weights_deriv, + VectorBase *unnorm_weights_deriv) { if (!config_.enforce_sum_to_one) { unnorm_weights_deriv->CopyFromVec(norm_weights_deriv); return; @@ -493,13 +565,13 @@ void NnetCombiner::GetUnnormalizedWeightsDeriv( int32 num_uc = NumUpdatableComponents(), num_models = nnet_params_.NumRows(); for (int32 c = 0; c < num_uc; c++) { - BaseFloat sum = 0.0; + double sum = 0.0; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; sum += unnorm_weights(index); } - BaseFloat inv_sum = 1.0 / sum; - BaseFloat inv_sum_deriv = 0.0; + double inv_sum = 1.0 / sum; + double inv_sum_deriv = 0.0; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; // in the forward direction, we'd do: @@ -508,7 +580,7 @@ void NnetCombiner::GetUnnormalizedWeightsDeriv( inv_sum_deriv += norm_weights_deriv(index) * unnorm_weights(index); } // note: d/dx (1/x) = -1/x^2 - BaseFloat sum_deriv = -1.0 * inv_sum_deriv * inv_sum * inv_sum; + double sum_deriv = -1.0 * inv_sum_deriv * inv_sum * inv_sum; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; (*unnorm_weights_deriv)(index) += sum_deriv; diff --git a/src/nnet3/nnet-combine.h b/src/nnet3/nnet-combine.h index a2883dab5b2..5b60d30b8ed 100644 --- a/src/nnet3/nnet-combine.h +++ b/src/nnet3/nnet-combine.h @@ -48,6 +48,7 @@ struct NnetCombineConfig { bool test_gradient; bool enforce_positive_weights; bool enforce_sum_to_one; + BaseFloat sum_to_one_penalty; bool separate_weights_per_component; NnetCombineConfig(): num_iters(60), initial_impr(0.01), @@ -55,6 +56,7 @@ struct NnetCombineConfig { test_gradient(false), enforce_positive_weights(false), enforce_sum_to_one(false), + sum_to_one_penalty(0.0), separate_weights_per_component(true) { } void Register(OptionsItf *po) { @@ -73,6 +75,11 @@ struct NnetCombineConfig { "If true, enforce that all weights are positive."); po->Register("enforce-sum-to-one", &enforce_sum_to_one, "If true, enforce that " "the model weights for each component should sum to one."); + po->Register("sum-to-one-penalty", &sum_to_one_penalty, "If >0, a penalty term " + "on the squared difference between sum(weights) for one component," + " and 1.0. This is like --enforce-sum-to-one, but done in a 'soft' " + "way (e.g. maybe useful with dropout). We suggest small values " + "like 10e-3 (for regular nnets) or 1.0e-04 (for chain models)."); po->Register("separate-weights-per-component", &separate_weights_per_component, "If true, have a separate weight for each updatable component in " "the nnet."); @@ -104,7 +111,7 @@ class NnetCombiner { ~NnetCombiner() { delete prob_computer_; } private: - const NnetCombineConfig &config_; + NnetCombineConfig config_; const std::vector &egs_; @@ -126,8 +133,9 @@ class NnetCombiner { Matrix nnet_params_; // This vector has the same dimension as nnet_params_.NumRows(), - // and helps us normalize so each row of nnet_params correspondss to - // a weighted average of its inputs. + // and helps us normalize so each row of nnet_params corresponds to + // a weighted average of its inputs (will be all ones if + // config_.max_effective_inputs >= the number of nnets provided). Vector tot_input_weighting_; // returns the parameter dimension, i.e. the dimension of the parameters that @@ -149,7 +157,7 @@ class NnetCombiner { // Computes the initial parameters. The parameters are the underlying thing // that we optimize; their dimension equals ParameterDim(). They are not the same // thing as the nnet parameters. - void GetInitialParameters(VectorBase *params) const; + void GetInitialParameters(VectorBase *params) const; // Tests that derivatives are accurate. Prints warning and returns false if not. bool SelfTestDerivatives(); @@ -159,33 +167,48 @@ class NnetCombiner { // prints the parameters via logging statements. - void PrintParams(const VectorBase ¶ms) const; + void PrintParams(const VectorBase ¶ms) const; // This function computes the objective function (and its derivative, if the objective // function is finite) at the given value of the parameters (the parameters we're optimizing, // i.e. the combination weights; not the nnet parameters. This function calls most of the // functions below. double ComputeObjfAndDerivFromParameters( - VectorBase ¶ms, - VectorBase *params_deriv); + VectorBase ¶ms, + VectorBase *params_deriv); // Computes the weights from the parameters in a config-dependent way. The // weight dimension is always (the number of updatable components times // nnet_params_.NumRows()). - void GetWeights(const VectorBase ¶ms, - VectorBase *weights) const; + void GetWeights(const VectorBase ¶ms, + VectorBase *weights) const; // Given the raw weights: if config_.enforce_sum_to_one, then compute weights // with sum-to-one constrint per component included; else just copy input to // output. - void GetNormalizedWeights(const VectorBase &unnorm_weights, - VectorBase *norm_weights) const; + void GetNormalizedWeights(const VectorBase &unnorm_weights, + VectorBase *norm_weights) const; + + // if config_.sum_to_one_penalty is 0.0, returns 0.0 and sets + // weights_penalty_deriv to 0.0; else it computes, for each + // updatable component u the total weight w_u, returns the value + // -0.5 * config_.sum_to_one_penalty * sum_u (w_u - 1.0)^2; + // and sets 'weights_penalty_deriv' to the derivative w.r.t. + // the result. + // Note: config_.sum_to_one_penalty is exclusive with + // config_.enforce_sum_to_one, so there is really no distinction between + // normalized and unnormalized weights here (since normalization would be a + // no-op). + double GetSumToOnePenalty(const VectorBase &weights, + VectorBase *weights_penalty_deriv, + bool print_weights = false) const; + // Computes the nnet-parameter vector from the normalized weights and // nnet_params_, as a vector. (See the functions Vectorize() and // UnVectorize() for how they relate to the nnet's components' parameters). - void GetNnetParameters(const Vector &normalized_weights, + void GetNnetParameters(const Vector &normalized_weights, VectorBase *nnet_params) const; // This function computes the objective function (and its derivative, if the objective @@ -197,23 +220,23 @@ class NnetCombiner { // Given an objective-function derivative with respect to the nnet parameters, // computes the derivative with respect to the (normalized) weights. void GetWeightsDeriv(const VectorBase &nnet_params_deriv, - VectorBase *normalized_weights_deriv); + VectorBase *normalized_weights_deriv); // Computes the derivative w.r.t. the unnormalized weights, by propagating // through the normalization operation. // If config_.enforce_sum_to_one == false, just copies norm_weights_deriv to // unnorm_weights_deriv. - void GetUnnormalizedWeightsDeriv(const VectorBase &unnorm_weights, - const VectorBase &norm_weights_deriv, - VectorBase *unnorm_weights_deriv); + void GetUnnormalizedWeightsDeriv(const VectorBase &unnorm_weights, + const VectorBase &norm_weights_deriv, + VectorBase *unnorm_weights_deriv); // Given a derivative w.r.t. the weights, outputs a derivative w.r.t. // the params - void GetParamsDeriv(const VectorBase &weights, - const VectorBase &weight_deriv, - VectorBase *param_deriv); + void GetParamsDeriv(const VectorBase &weights, + const VectorBase &weight_deriv, + VectorBase *param_deriv); void ComputeUpdatableComponentDims(); void FinishPreprocessingInput(); From c4bb86691710c1ba4e9e71d3691300404b243941 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 22 Jan 2017 23:33:12 -0500 Subject: [PATCH 318/530] [src] Add more diagnostic output to lattice determinization programs --- src/lat/minimize-lattice.h | 1 + src/latbin/lattice-depth.cc | 6 +- src/latbin/lattice-determinize-non-compact.cc | 80 +++++++++++-------- .../lattice-determinize-phone-pruned.cc | 27 +++++-- src/latbin/lattice-determinize-pruned.cc | 18 +++++ src/latbin/lattice-determinize.cc | 36 ++++++--- 6 files changed, 119 insertions(+), 49 deletions(-) diff --git a/src/lat/minimize-lattice.h b/src/lat/minimize-lattice.h index 60acfb27353..fcf6c0f36df 100644 --- a/src/lat/minimize-lattice.h +++ b/src/lat/minimize-lattice.h @@ -40,6 +40,7 @@ namespace fst { /// function will not combine as many states as it could, but it won't crash. /// Returns true on success, and false if it failed due to topological sorting /// failing. +/// The output will be topologically sorted. template bool MinimizeCompactLattice( MutableFst > > *clat, diff --git a/src/latbin/lattice-depth.cc b/src/latbin/lattice-depth.cc index 93dfd5c966b..9a785c4b6a6 100644 --- a/src/latbin/lattice-depth.cc +++ b/src/latbin/lattice-depth.cc @@ -34,7 +34,7 @@ int main(int argc, char *argv[]) { using fst::VectorFst; using fst::StdArc; typedef StdArc::StateId StateId; - + const char *usage = "Compute the lattice depths in terms of the average number of arcs that\n" "cross a frame. See also lattice-depth-per-frame\n" @@ -42,7 +42,7 @@ int main(int argc, char *argv[]) { "E.g.: lattice-depth ark:- ark,t:-\n"; ParseOptions po(usage); - + po.Read(argc, argv); if (po.NumArgs() < 1 || po.NumArgs() > 2) { @@ -63,7 +63,7 @@ int main(int argc, char *argv[]) { std::string key = clat_reader.Key(); TopSortCompactLatticeIfNeeded(&clat); - + int32 t; BaseFloat depth = CompactLatticeDepth(clat, &t); diff --git a/src/latbin/lattice-determinize-non-compact.cc b/src/latbin/lattice-determinize-non-compact.cc index f79262e0832..44ae8566f86 100644 --- a/src/latbin/lattice-determinize-non-compact.cc +++ b/src/latbin/lattice-determinize-non-compact.cc @@ -56,9 +56,9 @@ bool DeterminizeLatticeWrapper(const Lattice &lat, KALDI_WARN << "Detected empty lattice, skipping " << key; return false; } - - // The work gets done in the next line. - if (DeterminizeLattice(lat, clat, lat_opts, NULL)) { + + // The work gets done in the next line. + if (DeterminizeLattice(lat, clat, lat_opts, NULL)) { if (prune) PruneLattice(cur_beam, clat); return true; } else { // failed to determinize.. @@ -91,14 +91,14 @@ bool DeterminizeLatticeWrapper(const Lattice &lat, } void ComputeAcousticScoresMap( - const Lattice &lat, - unordered_map, std::pair, + const Lattice &lat, + unordered_map, std::pair, PairHasher > *acoustic_scores) { acoustic_scores->clear(); std::vector state_times; LatticeStateTimes(lat, &state_times); - + KALDI_ASSERT(lat.Start() == 0); for (StateId s = 0; s < lat.NumStates(); s++) { @@ -111,17 +111,17 @@ void ComputeAcousticScoresMap( int32 tid = arc.ilabel; if (tid != 0) { - unordered_map, std::pair, + unordered_map, std::pair, PairHasher >::iterator it = acoustic_scores->find(std::make_pair(t, tid)); if (it == acoustic_scores->end()) { - acoustic_scores->insert(std::make_pair(std::make_pair(t, tid), + acoustic_scores->insert(std::make_pair(std::make_pair(t, tid), std::make_pair(weight.Value2(), 1))); } else { - if (it->second.second == 2 + if (it->second.second == 2 && it->second.first / it->second.second != weight.Value2()) { KALDI_VLOG(2) << "Transitions on the same frame have different " - << "acoustic costs for tid " << tid << "; " - << it->second.first / it->second.second + << "acoustic costs for tid " << tid << "; " + << it->second.first / it->second.second << " vs " << weight.Value2(); } it->second.first += weight.Value2(); @@ -135,7 +135,7 @@ void ComputeAcousticScoresMap( LatticeWeight f = lat.Final(s); if (f != LatticeWeight::Zero()) { - // Final acoustic cost must be 0 as we are reading from + // Final acoustic cost must be 0 as we are reading from // non-determinized, non-compact lattice KALDI_ASSERT(f.Value2() == 0.0); } @@ -143,25 +143,25 @@ void ComputeAcousticScoresMap( } void ReplaceAcousticScoresFromMap( - const unordered_map, std::pair, + const unordered_map, std::pair, PairHasher > &acoustic_scores, Lattice *lat) { fst::TopSort(lat); - + std::vector state_times; LatticeStateTimes(*lat, &state_times); - + KALDI_ASSERT(lat->Start() == 0); for (StateId s = 0; s < lat->NumStates(); s++) { int32 t = state_times[s]; - for (fst::MutableArcIterator aiter(lat, s); + for (fst::MutableArcIterator aiter(lat, s); !aiter.Done(); aiter.Next()) { Arc arc(aiter.Value()); - + int32 tid = arc.ilabel; if (tid != 0) { - unordered_map, std::pair, + unordered_map, std::pair, PairHasher >::const_iterator it = acoustic_scores.find(std::make_pair(t, tid)); if (it == acoustic_scores.end()) { KALDI_ERR << "Could not find tid " << tid << " at time " << t @@ -207,7 +207,7 @@ int main(int argc, char *argv[]) { "\n" "Usage: lattice-determinize-non-compact [options] lattice-rspecifier lattice-wspecifier\n" " e.g.: lattice-determinize-non-compact --acoustic-scale=0.1 --beam=15.0 ark:1.lats ark:det.lats\n"; - + ParseOptions po(usage); BaseFloat acoustic_scale = 1.0; BaseFloat beam = 10.0; @@ -218,7 +218,7 @@ int main(int argc, char *argv[]) { BaseFloat delta = fst::kDelta; bool prune = false; bool minimize = false; - + po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods"); po.Register("beam", &beam, @@ -238,7 +238,7 @@ int main(int argc, char *argv[]) { "decrease beam by beam-ratio if determinization fails."); po.Register("minimize", &minimize, "If true, push and minimize after determinization"); - + po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -252,12 +252,16 @@ int main(int argc, char *argv[]) { // Read as regular lattice-- this is the form we need it in for efficient // pruning. SequentialLatticeReader lattice_reader(lats_rspecifier); - + // Write as regular lattice. - LatticeWriter lattice_writer(lats_wspecifier); + LatticeWriter lattice_writer(lats_wspecifier); int32 n_done = 0, n_error = 0; + // depth stats (for diagnostics). + double sum_depth_in = 0.0, + sum_depth_out = 0.0, sum_t = 0.0; + if (acoustic_scale == 0.0) KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)"; LatticeWeight beam_weight(beam, static_cast(0.0)); @@ -265,21 +269,21 @@ int main(int argc, char *argv[]) { for (; !lattice_reader.Done(); lattice_reader.Next()) { std::string key = lattice_reader.Key(); Lattice lat = lattice_reader.Value(); - + lattice_reader.FreeCurrent(); - + fst::TopSort(&lat); - + fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale), &lat); - // Compute a map from each (t, tid) to (sum_of_acoustic_scores, count) - unordered_map, std::pair, + // Compute a map from each (t, tid) to (sum_of_acoustic_scores, count) + unordered_map, std::pair, PairHasher > acoustic_scores; ComputeAcousticScoresMap(lat, &acoustic_scores); - + Invert(&lat); // make it so word labels are on the input. - + CompactLattice clat; if (DeterminizeLatticeWrapper(lat, key, prune, beam, beam_ratio, max_mem, max_loop, @@ -290,6 +294,13 @@ int main(int argc, char *argv[]) { MinimizeCompactLattice(&clat); } + int32 t; + TopSortCompactLatticeIfNeeded(&clat); + double depth = CompactLatticeDepth(clat, &t); + sum_depth_in += lat.NumStates(); + sum_depth_out += depth * t; + sum_t += t; + Lattice out_lat; fst::ConvertLattice(clat, &out_lat); fst::TopSort(&out_lat); @@ -298,7 +309,7 @@ int main(int argc, char *argv[]) { // the computed map ReplaceAcousticScoresFromMap(acoustic_scores, &out_lat); - fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale), + fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale), &out_lat); lattice_writer.Write(key, out_lat); n_done++; @@ -307,6 +318,12 @@ int main(int argc, char *argv[]) { } } + if (sum_t != 0.0) { + KALDI_LOG << "Average input-lattice depth (measured at at state level) is " + << (sum_depth_in / sum_t) << ", output depth is " + << (sum_depth_out / sum_t) << ", over " << sum_t << "frames " + << " (average num-frames = " << (sum_t / n_done) << ")."; + } KALDI_LOG << "Done " << n_done << " lattices, errors on " << n_error; return (n_done != 0 ? 0 : 1); } catch(const std::exception &e) { @@ -314,4 +331,3 @@ int main(int argc, char *argv[]) { return -1; } } - diff --git a/src/latbin/lattice-determinize-phone-pruned.cc b/src/latbin/lattice-determinize-phone-pruned.cc index 8df4bda1e1a..0959bcbcd74 100644 --- a/src/latbin/lattice-determinize-phone-pruned.cc +++ b/src/latbin/lattice-determinize-phone-pruned.cc @@ -28,7 +28,7 @@ int main(int argc, char *argv[]) { try { using namespace kaldi; typedef kaldi::int32 int32; - + const char *usage = "Determinize lattices, keeping only the best path (sequence of\n" "acoustic states) for each input-symbol sequence. This version does\n" @@ -41,13 +41,13 @@ int main(int argc, char *argv[]) { " \n" " e.g.: lattice-determinize-phone-pruned --acoustic-scale=0.1 \\\n" " final.mdl ark:in.lats ark:det.lats\n"; - + ParseOptions po(usage); BaseFloat acoustic_scale = 1.0; BaseFloat beam = 10.0; fst::DeterminizeLatticePhonePrunedOptions opts; opts.max_mem = 50000000; - + po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic" " likelihoods."); po.Register("beam", &beam, "Pruning beam [applied after acoustic scaling]."); @@ -69,12 +69,16 @@ int main(int argc, char *argv[]) { // Reads as regular lattice-- this is the form the determinization code // accepts. SequentialLatticeReader lat_reader(lats_rspecifier); - + // Writes as compact lattice. - CompactLatticeWriter compact_lat_writer(lats_wspecifier); + CompactLatticeWriter compact_lat_writer(lats_wspecifier); int32 n_done = 0, n_warn = 0; + // depth stats (for diagnostics). + double sum_depth_in = 0.0, + sum_depth_out = 0.0, sum_t = 0.0; + if (acoustic_scale == 0.0) KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)"; @@ -95,11 +99,24 @@ int main(int argc, char *argv[]) { n_warn++; } + int32 t; + TopSortCompactLatticeIfNeeded(&det_clat); + double depth = CompactLatticeDepth(det_clat, &t); + sum_depth_in += lat.NumStates(); + sum_depth_out += depth * t; + sum_t += t; + fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale), &det_clat); compact_lat_writer.Write(key, det_clat); n_done++; } + if (sum_t != 0.0) { + KALDI_LOG << "Average input-lattice depth (measured at at state level) is " + << (sum_depth_in / sum_t) << ", output depth is " + << (sum_depth_out / sum_t) << ", over " << sum_t << " frames " + << " (average num-frames = " << (sum_t / n_done) << ")."; + } KALDI_LOG << "Done " << n_done << " lattices, determinization finished " << "earlier than specified by the beam on " << n_warn << " of " << "these."; diff --git a/src/latbin/lattice-determinize-pruned.cc b/src/latbin/lattice-determinize-pruned.cc index 1e6fa2d6de2..3e8bca5a3ce 100644 --- a/src/latbin/lattice-determinize-pruned.cc +++ b/src/latbin/lattice-determinize-pruned.cc @@ -74,6 +74,10 @@ int main(int argc, char *argv[]) { int32 n_done = 0, n_warn = 0; + // depth stats (for diagnostics). + double sum_depth_in = 0.0, + sum_depth_out = 0.0, sum_t = 0.0; + if (acoustic_scale == 0.0) KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)"; @@ -109,11 +113,25 @@ int main(int argc, char *argv[]) { PushCompactLatticeWeights(&det_clat); MinimizeCompactLattice(&det_clat); } + + int32 t; + TopSortCompactLatticeIfNeeded(&det_clat); + double depth = CompactLatticeDepth(det_clat, &t); + sum_depth_in += lat.NumStates(); + sum_depth_out += depth * t; + sum_t += t; + fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale), &det_clat); compact_lat_writer.Write(key, det_clat); n_done++; } + if (sum_t != 0.0) { + KALDI_LOG << "Average input-lattice depth (measured at at state level) is " + << (sum_depth_in / sum_t) << ", output depth is " + << (sum_depth_out / sum_t) << ", over " << sum_t << " frames " + << " (average num-frames = " << (sum_t / n_done) << ")."; + } KALDI_LOG << "Done " << n_done << " lattices, determinization finished " << "earlier than specified by the beam (or output was empty) on " << n_warn << " of these."; diff --git a/src/latbin/lattice-determinize.cc b/src/latbin/lattice-determinize.cc index 8a5bd93e503..d59fcda7022 100644 --- a/src/latbin/lattice-determinize.cc +++ b/src/latbin/lattice-determinize.cc @@ -50,9 +50,9 @@ bool DeterminizeLatticeWrapper(const Lattice &lat, KALDI_WARN << "Detected empty lattice, skipping " << key; return false; } - - // The work gets done in the next line. - if (DeterminizeLattice(lat, clat, lat_opts, NULL)) { + + // The work gets done in the next line. + if (DeterminizeLattice(lat, clat, lat_opts, NULL)) { if (prune) PruneLattice(cur_beam, clat); return true; } else { // failed to determinize.. @@ -104,7 +104,7 @@ int main(int argc, char *argv[]) { "\n" "Usage: lattice-determinize [options] lattice-rspecifier lattice-wspecifier\n" " e.g.: lattice-determinize --acoustic-scale=0.1 --beam=15.0 ark:1.lats ark:det.lats\n"; - + ParseOptions po(usage); BaseFloat acoustic_scale = 1.0; BaseFloat beam = 10.0; @@ -115,7 +115,7 @@ int main(int argc, char *argv[]) { BaseFloat delta = fst::kDelta; bool prune = false; bool minimize = false; - + po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods"); po.Register("beam", &beam, @@ -135,7 +135,7 @@ int main(int argc, char *argv[]) { "decrease beam by beam-ratio if determinization fails."); po.Register("minimize", &minimize, "If true, push and minimize after determinization"); - + po.Read(argc, argv); if (po.NumArgs() != 2) { @@ -150,12 +150,16 @@ int main(int argc, char *argv[]) { // Read as regular lattice-- this is the form we need it in for efficient // pruning. SequentialLatticeReader lattice_reader(lats_rspecifier); - + // Write as compact lattice. - CompactLatticeWriter compact_lattice_writer(lats_wspecifier); + CompactLatticeWriter compact_lattice_writer(lats_wspecifier); int32 n_done = 0, n_error = 0; + // depth stats (for diagnostics). + double sum_depth_in = 0.0, + sum_depth_out = 0.0, sum_t = 0.0; + if (acoustic_scale == 0.0) KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)"; LatticeWeight beam_weight(beam, static_cast(0.0)); @@ -164,7 +168,7 @@ int main(int argc, char *argv[]) { std::string key = lattice_reader.Key(); Lattice lat = lattice_reader.Value(); Invert(&lat); // make it so word labels are on the input. - + lattice_reader.FreeCurrent(); fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale), &lat); @@ -177,6 +181,14 @@ int main(int argc, char *argv[]) { PushCompactLatticeWeights(&clat); MinimizeCompactLattice(&clat); } + + int32 t; + TopSortCompactLatticeIfNeeded(&clat); + double depth = CompactLatticeDepth(clat, &t); + sum_depth_in += lat.NumStates(); + sum_depth_out += depth * t; + sum_t += t; + fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale), &clat); compact_lattice_writer.Write(key, clat); n_done++; @@ -185,6 +197,12 @@ int main(int argc, char *argv[]) { } } + if (sum_t != 0.0) { + KALDI_LOG << "Average input-lattice depth (measured at at state level) is " + << (sum_depth_in / sum_t) << ", output depth is " + << (sum_depth_out / sum_t) << ", over " << sum_t << " frames " + << " (average num-frames = " << (sum_t / n_done) << ")."; + } KALDI_LOG << "Done " << n_done << " lattices, errors on " << n_error; return (n_done != 0 ? 0 : 1); } catch(const std::exception &e) { From 5aee23492a6b89fca4112eabd1cee2b1b66a2382 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 22 Jan 2017 23:37:31 -0500 Subject: [PATCH 319/530] [src] Extend nnet3 Nnet reading code to accept .mdl files --- src/nnet3/nnet-discriminative-training.cc | 13 +++++++++++++ src/nnet3/nnet-nnet.cc | 22 ++++++++++++++++++++++ src/nnet3/nnet-nnet.h | 2 ++ src/nnet3/nnet-utils.h | 4 +++- 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/nnet3/nnet-discriminative-training.cc b/src/nnet3/nnet-discriminative-training.cc index 5ef1675c5ca..fb4b7db8c3c 100644 --- a/src/nnet3/nnet-discriminative-training.cc +++ b/src/nnet3/nnet-discriminative-training.cc @@ -238,6 +238,19 @@ void DiscriminativeObjectiveFunctionInfo::PrintStatsForThisPhase( bool DiscriminativeObjectiveFunctionInfo::PrintTotalStats(const std::string &name, const std::string &criterion) const { BaseFloat objf = stats.TotalObjf(criterion) /stats.tot_t_weighted; + + double avg_gradients = (stats.tot_num_count + stats.tot_den_count) / + stats.tot_t_weighted; + KALDI_LOG << "Average num+den count of stats is " << avg_gradients + << " per frame, over " + << stats.tot_t_weighted << " frames."; + if (stats.tot_l2_term != 0.0) { + KALDI_LOG << "Average l2 norm of output per frame is " + << (stats.tot_l2_term / stats.tot_t_weighted) << " over " + << stats.tot_t_weighted << " frames."; + } + + KALDI_LOG << "Overall average objective function for '" << name << "' is " << objf << " over " << stats.tot_t_weighted << " frames."; KALDI_LOG << "[this line is to be parsed by a script:] " diff --git a/src/nnet3/nnet-nnet.cc b/src/nnet3/nnet-nnet.cc index ad5f715a294..dd90af739e7 100644 --- a/src/nnet3/nnet-nnet.cc +++ b/src/nnet3/nnet-nnet.cc @@ -23,6 +23,8 @@ #include "nnet3/nnet-parse.h" #include "nnet3/nnet-utils.h" #include "nnet3/nnet-simple-component.h" +#include "nnet3/am-nnet-simple.h" +#include "hmm/transition-model.h" namespace kaldi { namespace nnet3 { @@ -565,8 +567,28 @@ void Nnet::GetSomeNodeNames( } } +void Nnet::Swap(Nnet *other) { + component_names_.swap(other->component_names_); + components_.swap(other->components_); + node_names_.swap(other->node_names_); + nodes_.swap(other->nodes_); +} + void Nnet::Read(std::istream &is, bool binary) { Destroy(); + int first_char = PeekToken(is, binary); + if (first_char == 'T') { + // This branch is to allow '.mdl' files (containing a TransitionModel + // and then an AmNnetSimple) to be read where .raw files (containing + // just an Nnet) would be expected. This is often convenient. + TransitionModel temp_trans_model; + temp_trans_model.Read(is, binary); + AmNnetSimple temp_am_nnet; + temp_am_nnet.Read(is, binary); + temp_am_nnet.GetNnet().Swap(this); + return; + } + ExpectToken(is, binary, ""); std::ostringstream config_file_out; std::string cur_line; diff --git a/src/nnet3/nnet-nnet.h b/src/nnet3/nnet-nnet.h index 0e6918de18d..5eb87fd30f3 100644 --- a/src/nnet3/nnet-nnet.h +++ b/src/nnet3/nnet-nnet.h @@ -233,6 +233,8 @@ class Nnet { Nnet *Copy() const { return new Nnet(*this); } + void Swap(Nnet *other); + // Assignment operator Nnet& operator =(const Nnet &nnet); diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index f98782a6a22..1ae907049c4 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -179,7 +179,9 @@ void FindOrphanNodes(const Nnet &nnet, std::vector *nodes); ReadEditConfig() reads a file with a similar-looking format to the config file read by Nnet::ReadConfig(), but this consists of a sequence of operations to perform on an existing network, mostly modifying components. It's one - "directive" (i.e. command) per line. + "directive" (i.e. command) per line, but if supplying the options via + the --edits option to programs like nnet3-am-copy, you can use a semicolon + in place of the newline to separate commands. The following describes the allowed commands. Note: all patterns are like UNIX globbing patterns where the only metacharacter is '*', representing zero From 44fa7eeabe8f7d459ff2a4d76ee27321faebaf6b Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 22 Jan 2017 23:38:03 -0500 Subject: [PATCH 320/530] [src] speed up test chain/chain-supervision-test.cc --- src/chain/chain-supervision-test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chain/chain-supervision-test.cc b/src/chain/chain-supervision-test.cc index 0f0a3009ccd..e38fbca745f 100644 --- a/src/chain/chain-supervision-test.cc +++ b/src/chain/chain-supervision-test.cc @@ -615,7 +615,7 @@ int main() { else CuDevice::Instantiate().SelectGpuId("yes"); #endif - for (int32 i = 0; i < 5; i++) { + for (int32 i = 0; i < 3; i++) { kaldi::chain::ChainSupervisionTest(); kaldi::chain::BreadthFirstTest(); } From 350a9f4b3548888a08dae1ddb8c41da69be52155 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 23 Jan 2017 14:09:52 -0500 Subject: [PATCH 321/530] [src][egs] Various script updates/clarifications, remove no-op options; remove now-removed options from some discriminative-training egs scripts; various bug fixes/tuning. --- .../s5/local/nnet3/run_tdnn_discriminative.sh | 7 --- .../s5/local/nnet3/run_tdnn_discriminative.sh | 6 -- .../s5c/local/nnet3/compare_wer_general.sh | 0 .../local/nnet3/run_blstm_discriminative.sh | 6 -- .../local/nnet3/run_tdnn_discriminative.sh | 6 -- egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh | 16 ++--- .../s5c/local/nnet3/tuning/run_tdnn_d_disc.sh | 33 +++++----- .../s5/local/nnet3/run_tdnn_discriminative.sh | 6 -- .../nnet3/tuning/run_tdnn_lstm_1b_disc.sh | 29 ++++++--- .../s5/local/nnet3/run_lstm_discriminative.sh | 5 -- .../s5/local/nnet3/run_tdnn_discriminative.sh | 5 -- egs/wsj/s5/steps/nnet2/train_tanh.sh | 24 +++---- egs/wsj/s5/steps/nnet3/align.sh | 11 +++- egs/wsj/s5/steps/nnet3/get_degs.sh | 63 ++++++++++++------- .../s5/steps/nnet3/report/generate_plots.py | 6 +- .../s5/steps/nnet3/train_discriminative.sh | 12 +++- egs/wsj/s5/steps/shift_feats.sh | 8 +-- egs/wsj/s5/utils/data/limit_feature_dim.sh | 2 +- 18 files changed, 127 insertions(+), 118 deletions(-) mode change 100644 => 100755 egs/swbd/s5c/local/nnet3/compare_wer_general.sh diff --git a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh index 7dc82ad34d1..324061aa5ac 100644 --- a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh @@ -46,10 +46,6 @@ num_jobs_nnet=4 num_epochs=2 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -last_layer_factor=0.1 - ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -126,7 +122,6 @@ if [ -z "$degs_dir" ]; then steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ - --adjust-priors $adjust_priors \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ $frame_subsampling_opt \ @@ -143,8 +138,6 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --adjust-priors $adjust_priors \ - --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh index 365d01cc85d..b513e0908a5 100755 --- a/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/librispeech/s5/local/nnet3/run_tdnn_discriminative.sh @@ -53,9 +53,6 @@ num_jobs_nnet=4 num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -last_layer_factor=0.1 ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -133,7 +130,6 @@ if [ -z "$degs_dir" ]; then steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ - --adjust-priors $adjust_priors \ --online-ivector-dir $train_ivector_dir \ --left-context $left_context --right-context $right_context \ $frame_subsampling_opt \ @@ -150,8 +146,6 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --adjust-priors $adjust_priors \ - --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/swbd/s5c/local/nnet3/compare_wer_general.sh b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh old mode 100644 new mode 100755 diff --git a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh index 349fd246022..c6dfb0107cd 100755 --- a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh +++ b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh @@ -52,9 +52,6 @@ num_jobs_nnet=4 num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -last_layer_factor=0.1 ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -138,7 +135,6 @@ if [ -z "$degs_dir" ]; then steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ - --adjust-priors $adjust_priors \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ $frame_subsampling_opt \ @@ -155,8 +151,6 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --adjust-priors $adjust_priors \ - --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh index ceef60d0656..7af311e7ff4 100755 --- a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh @@ -46,9 +46,6 @@ num_jobs_nnet=4 num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -last_layer_factor=0.1 ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -126,7 +123,6 @@ if [ -z "$degs_dir" ]; then steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ - --adjust-priors $adjust_priors \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ $frame_subsampling_opt \ @@ -143,8 +139,6 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --adjust-priors $adjust_priors \ - --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh index ec80972cf2d..b4b60688cdb 100755 --- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d.sh @@ -6,13 +6,15 @@ # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, # --num-threads 16 and --minibatch-size 128. -# System tdnn_c tdnn_d -# WER on train_dev(tg) 17.37 16.72 -# WER on train_dev(fg) 15.94 15.31 -# WER on eval2000(tg) 20.0 19.2 -# WER on eval2000(fg) 18.2 17.8 -# Final train prob -1.43781 -1.22859 -# Final valid prob -1.56895 -1.354 +# note: the last column is a version of tdnn_d that was done after the +# changes for the 5.1 version of Kaldi (variable minibatch-sizes, etc.) +# System tdnn_c tdnn_d tdnn_d[repeat] +# WER on train_dev(tg) 17.37 16.72 16.51 +# WER on train_dev(fg) 15.94 15.31 15.34 +# WER on eval2000(tg) 20.0 19.2 19.2 +# WER on eval2000(fg) 18.2 17.8 17.7 +# Final train prob -1.43781 -1.22859 -1.22215 +# Final valid prob -1.56895 -1.354 -1.31647 stage=0 affix= diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh index da7cae954f8..22f4004c056 100755 --- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_d_disc.sh @@ -29,7 +29,7 @@ nj=400 # have a high number of jobs because this could take a while, and we migh graph_dir=exp/tri4/graph_sw1_tg srcdir=exp/nnet3/tdnn_d_sp train_data_dir=data/train_nodup_sp_hires -online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp_hires +online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp ## Objective options @@ -37,7 +37,12 @@ criterion=smbr one_silence_class=true # you can set --disc-affix if you run different configurations, e.g. --disc-affix "_b" -disc_affix= +# originally ran with no affix, with effective_learning_rate=0.0000125; +# reran by mistake with no affix with effective_learning_rate=0.000005 [was a bit +# better, see NOTES, but still best after 1st epoch]. +# reran again with affix=slow and effective_learning_rate=0.0000025 + +disc_affix=slow dir=${srcdir}_${criterion}${disc_affix} @@ -57,7 +62,7 @@ extra_right_context=0 ## Nnet training options -effective_learning_rate=0.0000125 +effective_learning_rate=0.0000025 max_param_change=1 num_jobs_nnet=4 num_epochs=3 @@ -66,8 +71,6 @@ regularization_opts= # Applicable for providing --xent-regularize and - minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). -last_layer_factor=0.1 # prevent the final layer from learning too fast; - # this can be a problem. ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -136,7 +139,6 @@ if [ $stage -le 3 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size "$minibatch_size" \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi @@ -145,15 +147,16 @@ if [ $stage -le 4 ]; then for decode_set in train_dev eval2000; do num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` for iter in epoch$x epoch${x}_adj; do - - steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ - --online-ivector-dir exp/nnet3/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_sw1_tg_${iter} || exit 1; - - steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ - data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ - $dir/decode_${decode_set}_${iter}_sw1_{tg,fsh_fg} || exit 1; - ) & + ( + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_sw1_tg_${iter} || exit 1; + + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_${iter} || exit 1; + ) & + done done done fi diff --git a/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh index 805d38b4e88..8d7393af853 100755 --- a/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh @@ -60,9 +60,6 @@ num_jobs_nnet=4 num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -last_layer_factor=0.1 ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -137,7 +134,6 @@ if [ -z "$degs_dir" ]; then steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ - --adjust-priors $adjust_priors \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ $frame_subsampling_opt \ @@ -154,8 +150,6 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --adjust-priors $adjust_priors \ - --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh index 9a77a6af6c7..07c3d4af233 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh @@ -6,8 +6,8 @@ # to use the non-cleaned data. # # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, -# since the lattice generation runs in about real-time, so takes of the order of -# 1000 hours of CPU time. +# since the alignment and the lattice generation/egs-dumping takes quite a bit +# of CPU time. set -e @@ -37,7 +37,11 @@ criterion=smbr one_silence_class=true # you can set --disc-affix if you run different configurations, e.g. --disc-affix "_b" -disc_affix=slow +# note, I ran without affix with learning rate 0.0000125, with disc_affic=slow +# with learning rate 0.000005, and with disc_affix=slow2 with learning rate 0.0000025. +# disc_affix=slow3 is with effective_learning_rate=0.000005 and last_layer_factor=0.1 + +disc_affix=slow3 dir=${srcdir}_${criterion}${disc_affix} @@ -45,11 +49,17 @@ dir=${srcdir}_${criterion}${disc_affix} ## so it can split utterances without much gap or overlap. frames_per_eg=300,280,150,120,100 frames_overlap_per_eg=0 -frames_per_chunk_decoding=200 +frames_per_chunk_egs=200 # for alignments and denlat creation. +frames_per_chunk_decoding=50 # for decoding; should be the same as the value + # used in the script that trained the nnet. + # We didn't set the frames_per_chunk in + # run_tdnn_lstm_1b.sh, so it defaults to 50. ## these context options should match the training condition. (chunk_left_context, ## chunk_right_context) ## We set --extra-left-context-initial 0 and --extra-right-context-final 0 ## directly in the script below, but this should also match the training condition. +## note: --extra-left-context should be the same as the chunk_left_context (or in +## general, the argument of --egs.chunk-left-context) in the baseline script. extra_left_context=40 extra_right_context=0 @@ -57,6 +67,7 @@ extra_right_context=0 ## Nnet training options effective_learning_rate=0.000005 +last_layer_factor=0.1 max_param_change=1 num_jobs_nnet=4 num_epochs=2 @@ -65,8 +76,6 @@ regularization_opts= # Applicable for providing --xent-regularize and - minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). -last_layer_factor=0.1 # prevent the final layer from learning too fast; - # this can be a problem. ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -95,7 +104,7 @@ if [ $stage -le 1 ]; then # hardcode no-GPU for alignment, although you could use GPU [you wouldn't # get excellent GPU utilization though.] steps/nnet3/align.sh --cmd "$decode_cmd" --use-gpu false \ - --frames-per-chunk $frames_per_chunk_decoding \ + --frames-per-chunk $frames_per_chunk_egs \ --extra-left-context $extra_left_context --extra-right-context $extra_right_context \ --extra-left-context-initial 0 --extra-right-context-final 0 \ --online-ivector-dir $online_ivector_dir \ @@ -118,7 +127,7 @@ if [ -z "$degs_dir" ]; then --extra-left-context $extra_left_context \ --extra-right-context $extra_right_context \ --extra-left-context-initial 0 --extra-right-context-final 0 \ - --frames-per-chunk-decoding "$frames_per_chunk_decoding" \ + --frames-per-chunk-decoding "$frames_per_chunk_egs" \ --stage $get_egs_stage \ --online-ivector-dir $online_ivector_dir \ --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \ @@ -131,11 +140,11 @@ if [ $stage -le 3 ]; then steps/nnet3/train_discriminative.sh --cmd "$decode_cmd" \ --stage $train_stage \ --effective-lrate $effective_learning_rate --max-param-change $max_param_change \ + --last-layer-factor $last_layer_factor \ --criterion $criterion --drop-frames true \ --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size "$minibatch_size" \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi @@ -149,6 +158,7 @@ if [ $stage -le 4 ]; then --extra-left-context $extra_left_context \ --extra-right-context $extra_right_context \ --extra-left-context-initial 0 --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_decoding" \ --online-ivector-dir exp/nnet3_cleaned/ivectors_${decode_set}_hires \ $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_${iter} || exit 1; @@ -172,5 +182,4 @@ if [ $stage -le 5 ] && $cleanup; then steps/nnet2/remove_egs.sh ${srcdir}_degs || true fi - exit 0; diff --git a/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh b/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh index 3fffd59426c..124b04949a0 100755 --- a/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh +++ b/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh @@ -54,8 +54,6 @@ num_jobs_nnet=4 num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true -last_layer_factor=0.1 ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -141,7 +139,6 @@ if [ -z "$degs_dir" ]; then steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ - --adjust-priors $adjust_priors \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ $frame_subsampling_opt \ @@ -158,8 +155,6 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --adjust-priors $adjust_priors \ - --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh index b84688f574c..01e1476befb 100755 --- a/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/wsj/s5/local/nnet3/run_tdnn_discriminative.sh @@ -46,8 +46,6 @@ num_jobs_nnet=4 num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true -last_layer_factor=0.1 ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -125,7 +123,6 @@ if [ -z "$degs_dir" ]; then steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ - --adjust-priors $adjust_priors \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ $frame_subsampling_opt \ @@ -142,8 +139,6 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --adjust-priors $adjust_priors \ - --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi diff --git a/egs/wsj/s5/steps/nnet2/train_tanh.sh b/egs/wsj/s5/steps/nnet2/train_tanh.sh index d4ec6412be9..7568da320ee 100755 --- a/egs/wsj/s5/steps/nnet2/train_tanh.sh +++ b/egs/wsj/s5/steps/nnet2/train_tanh.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. @@ -15,7 +15,7 @@ num_iters_final=20 # Maximum number of final iterations to give to the initial_learning_rate=0.04 final_learning_rate=0.004 bias_stddev=0.5 -shrink_interval=5 # shrink every $shrink_interval iters except while we are +shrink_interval=5 # shrink every $shrink_interval iters except while we are # still adding layers, when we do it every iter. shrink=true num_frames_shrink=2000 # note: must be <= --num-frames-diagnostic option to get_egs.sh, if @@ -66,7 +66,7 @@ egs_dir= lda_opts= egs_opts= transform_dir= -cmvn_opts= # will be passed to get_lda.sh and get_egs.sh, if supplied. +cmvn_opts= # will be passed to get_lda.sh and get_egs.sh, if supplied. # only relevant for "raw" features, not lda. feat_type= # can be used to force "raw" feature type. prior_subset_size=10000 # 10k samples per job, for computing priors. Should be @@ -122,7 +122,7 @@ if [ $# != 4 ]; then echo " # interpolate parameters (the weights are learned with a validation set)" echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -139,7 +139,7 @@ done # Set some variables. num_leaves=`am-info $alidir/final.mdl 2>/dev/null | awk '/number of pdfs/{print $NF}'` || exit 1; - + nj=`cat $alidir/num_jobs` || exit 1; # number of jobs in alignment dir... # in this dir we'll have just one job. sdata=$data/split$nj @@ -210,7 +210,7 @@ SoftmaxComponent dim=$num_leaves EOF # to hidden.config it will write the part of the config corresponding to a - # single hidden layer; we need this to add new layers. + # single hidden layer; we need this to add new layers. cat >$dir/hidden.config < $dir/foo 2>/dev/null || exit 1 nu=`cat $dir/foo | grep num-updatable-components | awk '{print $2}'` - na=`cat $dir/foo | grep -v Fixed | grep AffineComponent | wc -l` + na=`cat $dir/foo | grep -v Fixed | grep AffineComponent | wc -l` # na is number of last updatable AffineComponent layer [one-based, counting only # updatable components.] # The last two layers will get this (usually lower) learning rate. lr_string="$learning_rate" - for n in `seq 2 $nu`; do + for n in `seq 2 $nu`; do if [ $n -eq $na ] || [ $n -eq $[$na-1] ]; then lr=$last_layer_learning_rate; else lr=$learning_rate; fi lr_string="$lr_string:$lr" done - + $cmd $dir/log/average.$x.log \ nnet-am-average $nnets_list - \| \ nnet-am-copy --learning-rates=$lr_string - $dir/$[$x+1].mdl || exit 1; @@ -327,7 +327,7 @@ while [ $x -lt $num_iters ]; do else # On other iters, do nnet-am-fix which is much faster and has roughly # the same effect. - nnet-am-fix $dir/$[$x+1].mdl $dir/$[$x+1].mdl 2>$dir/log/fix.$x.log + nnet-am-fix $dir/$[$x+1].mdl $dir/$[$x+1].mdl 2>$dir/log/fix.$x.log fi if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then @@ -411,7 +411,7 @@ if $cleanup; then fi echo Removing most of the models for x in `seq 0 $num_iters`; do - if [ $[$x%100] -ne 0 ] && [ $x -lt $[$num_iters-$num_iters_final+1] ]; then + if [ $[$x%100] -ne 0 ] && [ $x -lt $[$num_iters-$num_iters_final+1] ]; then # delete all but every 10th model; don't delete the ones which combine to form the final model. rm $dir/$x.mdl fi diff --git a/egs/wsj/s5/steps/nnet3/align.sh b/egs/wsj/s5/steps/nnet3/align.sh index fdf8130ec62..4c3b0987562 100755 --- a/egs/wsj/s5/steps/nnet3/align.sh +++ b/egs/wsj/s5/steps/nnet3/align.sh @@ -135,10 +135,19 @@ tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|" frame_subsampling_opt= if [ -f $srcdir/frame_subsampling_factor ]; then # e.g. for 'chain' systems - frame_subsampling_opt="--frame-subsampling-factor=$(cat $srcdir/frame_subsampling_factor)" + frame_subsampling_factor=$(cat $srcdir/frame_subsampling_factor) + frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor" cp $srcdir/frame_subsampling_factor $dir + if [ "$frame_subsampling_factor" -gt 1 ] && \ + [ "$scale_opts" == "--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" ]; then + echo "$0: frame-subsampling-factor is not 1 (so likely a chain system)," + echo "... but the scale opts are the defaults. You probably want" + echo "--scale-opts '--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0'" + sleep 1 + fi fi + $cmd $queue_opt JOB=1:$nj $dir/log/align.JOB.log \ compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $srcdir/${iter}.mdl $lang/L.fst "$tra" ark:- \| \ nnet3-align-compiled $scale_opts $ivector_opts $frame_subsampling_opt \ diff --git a/egs/wsj/s5/steps/nnet3/get_degs.sh b/egs/wsj/s5/steps/nnet3/get_degs.sh index 65704fe9894..f9737b4c8f4 100755 --- a/egs/wsj/s5/steps/nnet3/get_degs.sh +++ b/egs/wsj/s5/steps/nnet3/get_degs.sh @@ -49,9 +49,9 @@ num_utts_subset=80 # number of utterances in validation and training # subsets used for diagnostics. num_egs_subset=800 # number of egs (maximum) for the validation and training # subsets used for diagnostics. -frames_per_iter=400000 # each iteration of training, see this many frames - # per job. This is just a guideline; it will pick a number - # that divides the number of samples in the entire data. +frames_per_iter=1000000 # each iteration of training, see this many frames + # per job. This is just a guideline; it will pick a number + # that divides the number of samples in the entire data. cleanup=true stage=0 @@ -201,10 +201,20 @@ if [ -f $srcdir/frame_subsampling_factor ]; then # e.g. for 'chain' systems frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor" cp $srcdir/frame_subsampling_factor $dir + if [ $frame_subsampling_factor -ne 1 ] && [ "$self_loop_scale" == "0.1" ]; then + echo "$0: warning: frame_subsampling_factor is not 1 (so likely a chain system)," + echo "... but self-loop-scale is 0.1. Make sure this is not a mistake." + sleep 1 + fi else frame_subsampling_factor=1 fi +if [ "$self_loop_scale" == "1.0" ] && [ "$acwt" == 0.1 ]; then + echo "$0: warning: you set --self-loop-scale=1.0 (so likely a chain system)", + echo " ... but the acwt is still 0.1 (you probably want --acwt 1.0)" + sleep 1 +fi ## Make the decoding graph. if [ $stage -le 0 ]; then @@ -270,6 +280,30 @@ cp $lang/phones/silence.csl $dir/info/ # of archives we assume that this will be the average number of frames per eg. frames_per_eg_principal=$(echo $frames_per_eg | cut -d, -f1) + +# read 'mof' as max_open_filehandles. +# When splitting up the scp files, we don't want to have to hold too many +# files open at once. If the number of archives we have to write exceeds +# 256 (or less if unlimit -n is smaller), we split in two stages. +mof=$(ulimit -n) || exit 1 +# the next step helps work around inconsistency between different machines on a +# cluster. It's unlikely that the allowed number of open filehandles would ever +# be less than 256. +if [ $mof -gt 256 ]; then mof=256; fi +# allocate mof minus 3 for the max allowed outputs, because of +# stdin,stderr,stdout. this will normally come to 253. We'll do a two-stage +# splitting if the needed number of scp files is larger than this. +num_groups=$[(num_archives+(mof-3)-1)/(mof-3)] +group_size=$[(num_archives+num_groups-1)/num_groups] +if [ $num_groups -gt 1 ]; then + new_num_archives=$[group_size*num_groups] + [ $new_num_archives -ne $num_archives ] && \ + echo "$0: rounding up num-archives from $num_archives to $new_num_archives for easier splitting" + num_archives=$new_num_archives + echo $new_num_archives >$dir/info/num_archives +fi + + if [ -e $dir/storage ]; then # Make soft links to storage directories, if distributing this way.. See # utils/create_split_dir.pl. @@ -336,7 +370,8 @@ if [ $stage -le 3 ]; then $dir/dengraph/HCLG.fst "$feats" ark:- \| \ $lattice_determinize_cmd \| \ nnet3-discriminative-get-egs --acoustic-scale=$acwt --compress=$compress \ - --num-frames=$frames_per_eg --num-frames-overlap=$frames_overlap_per_eg \ + $frame_subsampling_opt --num-frames=$frames_per_eg \ + --num-frames-overlap=$frames_overlap_per_eg \ $ivector_opts $context_opts \ $dir/final.mdl "$feats" "ark,s,cs:-" \ "scp:utils/filter_scp.pl $sdata/JOB/utt2spk $dir/ali.scp |" \ @@ -390,26 +425,6 @@ if [ $stage -le 4 ]; then fi -# read 'mof' as max_open_filehandles. -# When splitting up the scp files, we don't want to have to hold too many -# files open at once. -mof=$(ulimit -n) || exit 1 -# the next step helps work around inconsistency between different machines on a -# cluster. It's unlikely that the allowed number of open filehandles would ever -# be less than 256. -if [ $mof -gt 256 ]; then mof=256; fi -# allocate mof minus 3 for the max allowed outputs, because of -# stdin,stderr,stdout. this will normally come to 253. We'll do a two-stage -# splitting if the needed number of scp files is larger than this. -num_groups=$[(num_archives+(mof-3)-1)/(mof-3)] -group_size=$[(num_archives+num_groups-1)/num_groups] -if [ $num_groups -gt 1 ]; then - new_num_archives=$[group_size*num_groups] - [ $new_num_archives -ne $num_archives ] && \ - echo "$0: rounding up num-archives from $num_archives to $new_num_archives for easier splitting" - echo $new_num_archives >$dir/info/num_archives -fi - # function/pseudo-command to randomly shuffle input lines using a small buffer size function shuffle { diff --git a/egs/wsj/s5/steps/nnet3/report/generate_plots.py b/egs/wsj/s5/steps/nnet3/report/generate_plots.py index 0a558b91ae2..dddef38573e 100755 --- a/egs/wsj/s5/steps/nnet3/report/generate_plots.py +++ b/egs/wsj/s5/steps/nnet3/report/generate_plots.py @@ -60,7 +60,7 @@ def get_args(): default=1) parser.add_argument("--is-chain", type=str, default=False, action=common_lib.StrToBoolAction, - help="Iteration from which plotting will start") + help="True if directory contains chain models") parser.add_argument("--output-nodes", type=str, default=None, action=common_lib.NullstrToNoneAction, help="""List of space separated @@ -177,7 +177,9 @@ def generate_accuracy_plots(exp_dir, output_dir, plot, key='accuracy', color_val = g_plot_colors[index] data = np.array(accuracy_data) if data.shape[0] == 0: - raise Exception("Couldn't find any rows for the accuracy plot") + logger.warning("Couldn't find any rows for the accuracy plot, " + "not generating it."); + return data = data[data[:, 0] >= start_iter, :] plot_handle, = plt.plot(data[:, 0], data[:, 1], color=color_val, linestyle="--", diff --git a/egs/wsj/s5/steps/nnet3/train_discriminative.sh b/egs/wsj/s5/steps/nnet3/train_discriminative.sh index 77198a00576..bdee5a54e4d 100755 --- a/egs/wsj/s5/steps/nnet3/train_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/train_discriminative.sh @@ -192,8 +192,18 @@ if [ $stage -le -1 ]; then echo "$0: setting learning rate to $learning_rate = --num-jobs-nnet * --effective-lrate." fi + + # set the learning rate to $learning_rate, and + # set the output-layer's learning rate to + # $learning_rate times $last_layer_factor. + edits_str="set-learning-rate learning-rate=$learning_rate" + if [ "$last_layer_factor" != "1.0" ]; then + last_layer_lrate=$(perl -e "print ($learning_rate*$last_layer_factor);") || exit 1 + edits_str="$edits_str; set-learning-rate name=output.affine learning-rate=$last_layer_lrate" + fi + $cmd $dir/log/convert.log \ - nnet3-am-copy --learning-rate=$learning_rate "$src_model" $dir/0.mdl || exit 1; + nnet3-am-copy --edits="$edits_str" "$src_model" $dir/0.mdl || exit 1; ln -sf 0.mdl $dir/epoch0.mdl fi diff --git a/egs/wsj/s5/steps/shift_feats.sh b/egs/wsj/s5/steps/shift_feats.sh index 9ad85368c3f..22b17f2cb09 100755 --- a/egs/wsj/s5/steps/shift_feats.sh +++ b/egs/wsj/s5/steps/shift_feats.sh @@ -5,9 +5,10 @@ # This script shifts the feats in the input data directory and creates a # new directory _fs with shifted feats. -# If the shift is negative, the initial frames get truncated. -# If the shift is positive, the first frame is repeated. -# Usually applicable for sequence training +# If the shift is negative, the initial frames get truncated and the +# last frame repeated; if positive, vice versa. +# Used to prepare data for sequence training of models with +# frame_subsampling_factor != 1 (e.g. chain models). # To be run from .. (one directory up from here) # see ../run.sh for example @@ -82,4 +83,3 @@ if [ $nf -ne $nu ]; then fi echo "Succeeded shifting features for $name into $data" - diff --git a/egs/wsj/s5/utils/data/limit_feature_dim.sh b/egs/wsj/s5/utils/data/limit_feature_dim.sh index 4e64e68d7c7..2d969ee569b 100755 --- a/egs/wsj/s5/utils/data/limit_feature_dim.sh +++ b/egs/wsj/s5/utils/data/limit_feature_dim.sh @@ -1,5 +1,5 @@ #!/bin/bash -77;20003;0c + # Copyright 2016 Alibaba Robotics Corp. (author: Xingyu Na) # Apache 2.0 From c7a7cd56c166e8a9b564601a0a569fb13f1715c9 Mon Sep 17 00:00:00 2001 From: Gaofeng Cheng <770579626@qq.com> Date: Tue, 24 Jan 2017 23:00:18 +0800 Subject: [PATCH 322/530] python level implementation --- egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py | 88 +++++++++++++++------ 1 file changed, 63 insertions(+), 25 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py index 7c5f262a7f5..6f0e1e0f1c6 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py @@ -248,7 +248,8 @@ def set_default_configs(self): 'self-repair-scale-nonlinearity' : 0.00001, 'zeroing-interval' : 20, 'zeroing-threshold' : 15.0, - 'dropout-proportion' : -1.0 # -1.0 stands for no dropout will be added + 'dropout-proportion' : -1.0, # -1.0 stands for no dropout will be added + 'dropout-per-frame' : 'false' # default normal dropout mode } def set_derived_configs(self): @@ -286,6 +287,10 @@ def check_configs(self): raise RuntimeError("dropout-proportion has invalid value {0}." "".format(self.config['dropout-proportion'])) + if (self.config['dropout-per-frame'] != 'false' and + self.config['dropout-per-frame'] != 'true'): + raise xparser_error("dropout-per-frame has invalid value {0}.".format(self.config['dropout-per-frame'])) + def auxiliary_outputs(self): return ['c_t'] @@ -347,6 +352,8 @@ def generate_lstm_config(self): pes_str = self.config['ng-per-element-scale-options'] lstm_dropout_value = self.config['dropout-proportion'] lstm_dropout_str = 'dropout-proportion='+str(self.config['dropout-proportion']) + lstm_dropout_per_frame_value = self.config['dropout-per-frame'] + lstm_dropout_per_frame_str = 'dropout-per-frame='+str(self.config['dropout-per-frame']) # Natural gradient per element scale parameters # TODO: decide if we want to keep exposing these options @@ -383,6 +390,8 @@ def generate_lstm_config(self): configs.append("component name={0}.o type=SigmoidComponent dim={1} {2}".format(name, cell_dim, repair_nonlin_str)) configs.append("component name={0}.g type=TanhComponent dim={1} {2}".format(name, cell_dim, repair_nonlin_str)) configs.append("component name={0}.h type=TanhComponent dim={1} {2}".format(name, cell_dim, repair_nonlin_str)) + if lstm_dropout_value != -1.0: + configs.append("component name={0}.dropout type=DropoutComponent dim={1} {2} {3}".format(name, cell_dim, lstm_dropout_str, lstm_dropout_per_frame_str)) configs.append("# Defining the components for other cell computations") configs.append("component name={0}.c1 type=ElementwiseProductComponent input-dim={1} output-dim={2}".format(name, 2 * cell_dim, cell_dim)) @@ -398,17 +407,29 @@ def generate_lstm_config(self): configs.append("# i_t") configs.append("component-node name={0}.i1_t component={0}.W_i.xr input=Append({1}, IfDefined(Offset({2}, {3})))".format(name, input_descriptor, recurrent_connection, delay)) configs.append("component-node name={0}.i2_t component={0}.w_i.c input={1}".format(name, delayed_c_t_descriptor)) - configs.append("component-node name={0}.i_t component={0}.i input=Sum({0}.i1_t, {0}.i2_t)".format(name)) + if lstm_dropout_value != -1.0: + configs.append("component-node name={0}.i_t_predrop component={0}.i input=Sum({0}.i1_t, {0}.i2_t)".format(name)) + configs.append("component-node name={0}.i_t component={0}.dropout input={0}.i_t_predrop".format(name)) + else: + configs.append("component-node name={0}.i_t component={0}.i input=Sum({0}.i1_t, {0}.i2_t)".format(name)) configs.append("# f_t") configs.append("component-node name={0}.f1_t component={0}.W_f.xr input=Append({1}, IfDefined(Offset({2}, {3})))".format(name, input_descriptor, recurrent_connection, delay)) configs.append("component-node name={0}.f2_t component={0}.w_f.c input={1}".format(name, delayed_c_t_descriptor)) - configs.append("component-node name={0}.f_t component={0}.f input=Sum({0}.f1_t, {0}.f2_t)".format(name)) + if lstm_dropout_value != -1.0: + configs.append("component-node name={0}.f_t_predrop component={0}.f input=Sum({0}.f1_t, {0}.f2_t)".format(name)) + configs.append("component-node name={0}.f_t component={0}.dropout input={0}.f_t_predrop".format(name)) + else: + configs.append("component-node name={0}.f_t component={0}.f input=Sum({0}.f1_t, {0}.f2_t)".format(name)) configs.append("# o_t") configs.append("component-node name={0}.o1_t component={0}.W_o.xr input=Append({1}, IfDefined(Offset({2}, {3})))".format(name, input_descriptor, recurrent_connection, delay)) configs.append("component-node name={0}.o2_t component={0}.w_o.c input={0}.c_t".format(name)) - configs.append("component-node name={0}.o_t component={0}.o input=Sum({0}.o1_t, {0}.o2_t)".format(name)) + if lstm_dropout_value != -1.0: + configs.append("component-node name={0}.o_t_predrop component={0}.o input=Sum({0}.o1_t, {0}.o2_t)".format(name)) + configs.append("component-node name={0}.o_t component={0}.dropout input={0}.o_t_predrop".format(name)) + else: + configs.append("component-node name={0}.o_t component={0}.o input=Sum({0}.o1_t, {0}.o2_t)".format(name)) configs.append("# h_t") configs.append("component-node name={0}.h_t component={0}.h input={0}.c_t".format(name)) @@ -426,21 +447,13 @@ def generate_lstm_config(self): # add the recurrent connections configs.append("# projection matrices : Wrm and Wpm") - if lstm_dropout_value != -1.0: - configs.append("component name={0}.W_rp.m.dropout type=DropoutComponent dim={1} {2}".format(name, cell_dim, lstm_dropout_str)) configs.append("component name={0}.W_rp.m type=NaturalGradientAffineComponent input-dim={1} output-dim={2} {3}".format(name, cell_dim, rec_proj_dim + nonrec_proj_dim, affine_str)) configs.append("component name={0}.r type=BackpropTruncationComponent dim={1} {2}".format(name, rec_proj_dim, bptrunc_str)) configs.append("# r_t and p_t : rp_t will be the output") - if lstm_dropout_value != -1.0: - configs.append("component-node name={0}.rp_t.dropout component={0}.W_rp.m.dropout input={0}.m_t".format(name)) - configs.append("component-node name={0}.rp_t component={0}.W_rp.m input={0}.rp_t.dropout".format(name)) - configs.append("dim-range-node name={0}.r_t_preclip input-node={0}.rp_t dim-offset=0 dim={1}".format(name, rec_proj_dim)) - configs.append("component-node name={0}.r_t component={0}.r input={0}.r_t_preclip".format(name)) - else: - configs.append("component-node name={0}.rp_t component={0}.W_rp.m input={0}.m_t".format(name)) - configs.append("dim-range-node name={0}.r_t_preclip input-node={0}.rp_t dim-offset=0 dim={1}".format(name, rec_proj_dim)) - configs.append("component-node name={0}.r_t component={0}.r input={0}.r_t_preclip".format(name)) + configs.append("component-node name={0}.rp_t component={0}.W_rp.m input={0}.m_t".format(name)) + configs.append("dim-range-node name={0}.r_t_preclip input-node={0}.rp_t dim-offset=0 dim={1}".format(name, rec_proj_dim)) + configs.append("component-node name={0}.r_t component={0}.r input={0}.r_t_preclip".format(name)) return configs @@ -760,8 +773,9 @@ def set_default_configs(self): # larger max-change than the normal value of 0.75. 'ng-affine-options' : ' max-change=1.5', 'zeroing-interval' : 20, - 'zeroing-threshold' : 15.0 - + 'zeroing-threshold' : 15.0, + 'dropout-proportion' : -1.0 ,# -1.0 stands for no dropout will be added + 'dropout-per-frame' : 'false' # default normal dropout mode } def set_derived_configs(self): @@ -775,6 +789,15 @@ def set_derived_configs(self): self.config['non-recurrent-projection-dim'] = \ self.config['recurrent-projection-dim'] + if ((self.config['dropout-proportion'] > 1.0 or + self.config['dropout-proportion'] < 0.0) and + self.config['dropout-proportion'] != -1.0 ): + raise xparser_error("dropout-proportion has invalid value {0}.".format(self.config['dropout-proportion'])) + + if (self.config['dropout-per-frame'] != 'false' and + self.config['dropout-per-frame'] != 'true'): + raise xparser_error("dropout-per-frame has invalid value {0}.".format(self.config['dropout-per-frame'])) + def check_configs(self): for key in ['cell-dim', 'recurrent-projection-dim', 'non-recurrent-projection-dim']: @@ -846,7 +869,10 @@ def generate_lstm_config(self): abs(delay))) affine_str = self.config['ng-affine-options'] lstm_str = self.config['lstm-nonlinearity-options'] - + lstm_dropout_value = self.config['dropout-proportion'] + lstm_dropout_str = 'dropout-proportion='+str(self.config['dropout-proportion']) + lstm_dropout_per_frame_value = self.config['dropout-per-frame'] + lstm_dropout_per_frame_str = 'dropout-per-frame='+str(self.config['dropout-per-frame']) configs = [] @@ -865,6 +891,8 @@ def generate_lstm_config(self): configs.append("# Component for backprop truncation, to avoid gradient blowup in long training examples.") configs.append("component name={0}.cr_trunc type=BackpropTruncationComponent " "dim={1} {2}".format(name, cell_dim + rec_proj_dim, bptrunc_str)) + if lstm_dropout_value != -1.0: + configs.append("component name={0}.cr_trunc.dropout type=DropoutComponent dim={1} {2} {3}".format(name, cell_dim + rec_proj_dim, lstm_dropout_str, lstm_dropout_per_frame_str)) configs.append("# Component specific to 'projected' LSTM (LSTMP), contains both recurrent"); configs.append("# and non-recurrent projections") configs.append("component name={0}.W_rp type=NaturalGradientAffineComponent input-dim={1} " @@ -886,11 +914,21 @@ def generate_lstm_config(self): configs.append("# Note: it's not 100% efficient that we have to stitch the c") configs.append("# and r back together to truncate them but it probably"); configs.append("# makes the deriv truncation more accurate .") - configs.append("component-node name={0}.cr_trunc component={0}.cr_trunc " - "input=Append({0}.c, {0}.r)".format(name)) - configs.append("dim-range-node name={0}.c_trunc input-node={0}.cr_trunc " - "dim-offset=0 dim={1}".format(name, cell_dim)) - configs.append("dim-range-node name={0}.r_trunc input-node={0}.cr_trunc " - "dim-offset={1} dim={2}".format(name, cell_dim, rec_proj_dim)) - configs.append("### End LSTM Layer '{0}'".format(name)) + if lstm_dropout_value != -1.0: + configs.append("component-node name={0}.cr_trunc component={0}.cr_trunc " + "input=Append({0}.c, {0}.r)".format(name)) + configs.append("component-node name={0}.cr_trunc.dropout component={0}.cr_trunc.dropout input={0}.cr_trunc".format(name)) + configs.append("dim-range-node name={0}.c_trunc input-node={0}.cr_trunc.dropout " + "dim-offset=0 dim={1}".format(name, cell_dim)) + configs.append("dim-range-node name={0}.r_trunc input-node={0}.cr_trunc.dropout " + "dim-offset={1} dim={2}".format(name, cell_dim, rec_proj_dim)) + configs.append("### End LSTM Layer '{0}'".format(name)) + else: + configs.append("component-node name={0}.cr_trunc component={0}.cr_trunc " + "input=Append({0}.c, {0}.r)".format(name)) + configs.append("dim-range-node name={0}.c_trunc input-node={0}.cr_trunc " + "dim-offset=0 dim={1}".format(name, cell_dim)) + configs.append("dim-range-node name={0}.r_trunc input-node={0}.cr_trunc " + "dim-offset={1} dim={2}".format(name, cell_dim, rec_proj_dim)) + configs.append("### End LSTM Layer '{0}'".format(name)) return configs From 4507183f30e5e517ecbd577cd3b0e9d3e0c300cd Mon Sep 17 00:00:00 2001 From: Petr Stanislav Date: Tue, 24 Jan 2017 20:11:26 +0100 Subject: [PATCH 323/530] [build] fixing patch for OpenFst's compat.h for Windows build (#1373) --- tools/extras/openfstwin-1.3.4.patch | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tools/extras/openfstwin-1.3.4.patch b/tools/extras/openfstwin-1.3.4.patch index 9e624f4db32..2fbb1d1fc27 100644 --- a/tools/extras/openfstwin-1.3.4.patch +++ b/tools/extras/openfstwin-1.3.4.patch @@ -2,16 +2,6 @@ diff --git a/src/include/fst/compat.h b/src/include/fst/compat.h index 00e2dba..ff8bacc 100644 --- a/src/include/fst/compat.h +++ b/src/include/fst/compat.h -@@ -23,7 +23,9 @@ - #ifdef _MSC_VER //AddedPD - #include - typedef SSIZE_T ssize_t; -+#if _MSC_VER < 1900 //AddedYT -- Visual Studio 2016 already has snprintf - #define snprintf _snprintf -+#endif // _MSC_VER < 1900 - #define strtoll _strtoi64 - #ifndef OPENFSTEXPORT - #ifdef _DEBUG @@ -37,7 +39,7 @@ typedef SSIZE_T ssize_t; #pragma comment (lib, "openfst64.lib") #else From 99b7d964d67a344c4519fb093beba6b30449c6d7 Mon Sep 17 00:00:00 2001 From: Arseniy Gorin Date: Tue, 24 Jan 2017 22:40:24 +0300 Subject: [PATCH 324/530] [scripts] lexicon learning: update missing defaults and help message; other fixes (#1360) --- egs/wsj/s5/steps/dict/learn_lexicon.sh | 55 +++++++++++++++++--------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/egs/wsj/s5/steps/dict/learn_lexicon.sh b/egs/wsj/s5/steps/dict/learn_lexicon.sh index 7f32428c059..4b4c177d554 100755 --- a/egs/wsj/s5/steps/dict/learn_lexicon.sh +++ b/egs/wsj/s5/steps/dict/learn_lexicon.sh @@ -27,30 +27,36 @@ # learned lexicon. See the last stage in this script for details. -stage=0 # Begin configuration section. cmd=run.pl -nj= -stage=6 +nj=4 +stage=0 + oov_symbol= lexicon_g2p= + min_prob=0.3 variants_prob_mass=0.7 variants_prob_mass_ref=0.9 + prior_counts_tot=15 prior_mean="0.7,0.2,0.1" num_gauss= num_leaves= retrain_src_mdl=true + cleanup=true # End configuration section. . ./path.sh . utils/parse_options.sh -if [ $# -ne 7 ]; then - echo "Usage: $0 [options] \\" - echo " ." +if [ $# -lt 6 ] || [ $# -gt 7 ]; then + echo "Usage: $0 [options] \\" + echo " [ ]" + echo "e.g.: $0 --oov-symbol \"\" data/local/dict data/local/lm/librispeech-vocab.txt data/train \\" + echo " exp/tri3 data/lang data/local/dict_learned" + echo "" echo " This script does lexicon expansion using a combination of acoustic" echo " evidence and G2P to produce a lexicon that covers words of a target vocab:" echo "" @@ -68,20 +74,20 @@ if [ $# -ne 7 ]; then echo " the reference lang dir which we use to get non-scored-words" echo " like for building new dict dirs" echo " the dict dir where we put the final learned lexicon, whose vocab" - echo " matches ." + echo " matches " + echo " [ ] the temporary dir where most of the intermediate outputs are stored" + echo " (default: \${src-mdl-dir}_lex_learn_work)" echo "" echo "Note: and the vocab of don't have to match. For words" echo " who are in but not seen in , their pronunciations" echo " will be given by G2P at the end." echo "" - echo "e.g. $0 data/local/dict data/local/lm/librispeech-vocab.txt data/train \\" - echo " exp/tri3 data/lang data/local/dict_learned" echo "Options:" echo " --stage # stage to run from, to enable resuming from partially" echo " # completed run (default: 0)" echo " --cmd '$cmd' # command to submit jobs with (e.g. run.pl, queue.pl)" echo " --nj # number of parallel jobs" - echo " --oov-symbol '$oov_symbol' # oov symbol, like ." + echo " --oov-symbol # (required option) oov symbol, like ." echo " --g2p-pron-candidates # A lexicon file containing g2p generated pronunciations, for words in acoustic training " echo " # data / target vocabulary. It's optional." echo " --min-prob # The cut-off parameter used to select pronunciation candidates from phonetic" @@ -117,9 +123,20 @@ data=$3 src_mdl_dir=$4 ref_lang=$5 dest_dict=$6 -dir=$7 # Most intermediate outputs will be put here. + +if [ -z "$oov_symbol" ]; then + echo "$0: the --oov-symbol option is required." + exit 1 +fi + +if [ $# -gt 6 ]; then + dir=$7 +else + dir=${src_mdl_dir}_lex_learn_work +fi mkdir -p $dir + if [ $stage -le 0 ]; then echo "$0: Some preparatory work." # Get the word counts of training data. @@ -176,19 +193,21 @@ if [ $stage -le 1 ] && $retrain_src_mdl; then awk 'NR==FNR{a[$1] = 1; next} ($1 in a)' $dir/target_vocab.txt - | \ cat $dir/non_scored_entries - | sort | uniq > $dir/dict_expanded_target_vocab/lexicon.txt - + utils/prepare_lang.sh --phone-symbol-table $ref_lang/phones.txt $dir/dict_expanded_target_vocab \ - $oov_symbol $dir/lang_expanded_target_vocab_tmp $dir/lang_expanded_target_vocab || exit 1; + "$oov_symbol" $dir/lang_expanded_target_vocab_tmp $dir/lang_expanded_target_vocab || exit 1; # Align the acoustic training data using the given src_mdl_dir. alidir=${src_mdl_dir}_ali_$(basename $data) steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ $data $dir/lang_expanded_target_vocab $src_mdl_dir $alidir || exit 1; - + # Train another SAT system on the given data and put it in $dir/${src_mdl_dir}_retrained # this model will be used for phonetic decoding and lattice alignment later on. if [ -z $num_leaves ] || [ -z $num_gauss ] ; then - echo "num_leaves and num_gauss need to be specified." && exit 1; + # infer the model parameters using the inital GMM + num_leaves=`gmm-info ${src_mdl_dir}/final.mdl | grep 'pdfs' | awk '{print $NF-1}'` + num_gauss=`gmm-info ${src_mdl_dir}/final.mdl | grep 'gaussians' | awk '{print $NF-1}'` fi steps/train_sat.sh --cmd "$train_cmd" $num_leaves $num_gauss \ $data $dir/lang_expanded_target_vocab $alidir $dir/${src_mdl_dir}_retrained || exit 1; @@ -231,7 +250,7 @@ if [ $stage -le 2 ]; then cat - $dir/non_scored_entries | \ sort | uniq > $dir/dict_expanded_train/lexicon.txt || exit 1; - utils/prepare_lang.sh $dir/dict_expanded_train $oov_symbol \ + utils/prepare_lang.sh $dir/dict_expanded_train "$oov_symbol" \ $dir/lang_expanded_train_tmp $dir/lang_expanded_train || exit 1; fi @@ -270,7 +289,7 @@ if [ $stage -le 4 ]; then sort | uniq > $dir/dict_combined_iter1/lexicon.txt utils/prepare_lang.sh --phone-symbol-table $ref_lang/phones.txt \ - $dir/dict_combined_iter1 $oov_symbol \ + $dir/dict_combined_iter1 "$oov_symbol" \ $dir/lang_combined_iter1_tmp $dir/lang_combined_iter1 || exit 1; # Generate lattices for the acoustic training data with the combined lexicon. @@ -317,7 +336,7 @@ if [ $stage -le 5 ]; then sort | uniq > $dir/dict_combined_iter2/lexicon.txt utils/prepare_lang.sh --phone-symbol-table $ref_lang/phones.txt \ - $dir/dict_combined_iter2 $oov_symbol \ + $dir/dict_combined_iter2 "$oov_symbol" \ $dir/lang_combined_iter2_tmp $dir/lang_combined_iter2 || exit 1; if $retrain_src_mdl; then mdl_dir=$dir/${src_mdl_dir}_retrained; else mdl_dir=$src_mdl_dir; fi From 5c205eae3833fbb066e51ee5e399b03dd260c465 Mon Sep 17 00:00:00 2001 From: Vijayaditya Peddinti Date: Tue, 24 Jan 2017 12:07:19 -0800 Subject: [PATCH 325/530] [scripts] nnet3/report : Added support for parsing latest compute_{train,valid}*.log (#1371) --- .../s5/steps/libs/nnet3/report/log_parse.py | 35 +++++++++++++++++-- .../s5/steps/nnet3/report/generate_plots.py | 34 +++++++++--------- 2 files changed, 50 insertions(+), 19 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py b/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py index 88a77d4d2d0..2d7f6f46cce 100755 --- a/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py +++ b/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py @@ -14,6 +14,18 @@ logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) +class KaldiLogParseException(Exception): + """ An Exception class that throws an error when there is an issue in + parsing the log files. Extend this class if more granularity is needed. + """ + def __init__(self, message = None): + if message is not None and message.strip() == "": + message = None + + Exception.__init__(self, + "There was an error while trying to parse the logs." + " Details : \n{0}\n".format(message)) + def parse_progress_logs_for_nonlinearity_stats(exp_dir): """ Parse progress logs for mean and std stats for non-linearities. @@ -279,7 +291,7 @@ def parse_prob_logs(exp_dir, key='accuracy', output="output"): parse_regex = re.compile( ".*compute_prob_.*\.([0-9]+).log:LOG " - ".nnet3.*compute-prob:PrintTotalStats..:" + ".nnet3.*compute-prob.*:PrintTotalStats..:" "nnet.*diagnostics.cc:[0-9]+. Overall ([a-zA-Z\-]+) for " "'{output}'.*is ([0-9.\-e]+) .*per frame".format(output=output)) @@ -292,19 +304,33 @@ def parse_prob_logs(exp_dir, key='accuracy', output="output"): groups = mat_obj.groups() if groups[1] == key: train_loss[int(groups[0])] = groups[2] + if not train_loss: + raise KaldiLogParseException("Could not find any lines with {k} in " + " {l}".format(k=key, l=train_prob_files)) + for line in valid_prob_strings.split('\n'): mat_obj = parse_regex.search(line) if mat_obj is not None: groups = mat_obj.groups() if groups[1] == key: valid_loss[int(groups[0])] = groups[2] + + if not valid_loss: + raise KaldiLogParseException("Could not find any lines with {k} in " + " {l}".format(k=key, l=valid_prob_files)) + iters = list(set(valid_loss.keys()).intersection(train_loss.keys())) + if not iters: + raise KaldiLogParseException("Could not any common iterations with" + " key {k} in both {tl} and {vl}".format( + k=key, tl=train_prob_files, vl=valid_prob_files)) iters.sort() return map(lambda x: (int(x), float(train_loss[x]), float(valid_loss[x])), iters) -def generate_accuracy_report(exp_dir, key="accuracy", output="output"): + +def generate_acc_logprob_report(exp_dir, key="accuracy", output="output"): times = parse_train_logs(exp_dir) data = parse_prob_logs(exp_dir, key, output) report = [] @@ -315,6 +341,11 @@ def generate_accuracy_report(exp_dir, key="accuracy", output="output"): x[1], x[2], x[2]-x[1])) except KeyError: continue + if len(report) - 1 == 0: + raise KaldiLogParseException("Could not find any lines with {k} in " + " {e}/log/compute_prob_train.*.log or " + " {e}/log/compute_prob_valid.*.log or both".format( + k=key, e=exp_dir)) total_time = 0 for iter in times.keys(): diff --git a/egs/wsj/s5/steps/nnet3/report/generate_plots.py b/egs/wsj/s5/steps/nnet3/report/generate_plots.py index 6db262aed60..bf9bcd1d45c 100755 --- a/egs/wsj/s5/steps/nnet3/report/generate_plots.py +++ b/egs/wsj/s5/steps/nnet3/report/generate_plots.py @@ -156,10 +156,10 @@ def latex_compliant_name(name_string): return node_name_string -def generate_accuracy_plots(exp_dir, output_dir, plot, key='accuracy', - file_basename='accuracy', comparison_dir=None, - start_iter=1, - latex_report=None, output_name='output'): +def generate_acc_logprob_plots(exp_dir, output_dir, plot, key='accuracy', + file_basename='accuracy', comparison_dir=None, + start_iter=1, latex_report=None, output_name='output'): + assert start_iter >= 1 if plot: @@ -170,20 +170,20 @@ def generate_accuracy_plots(exp_dir, output_dir, plot, key='accuracy', dirs = [exp_dir] + comparison_dir index = 0 for dir in dirs: - [accuracy_report, accuracy_times, - accuracy_data] = log_parse.generate_accuracy_report(dir, key, - output_name) + [report, times, data] = log_parse.generate_acc_logprob_report(dir, key, + output_name) if index == 0: # this is the main experiment directory with open("{0}/{1}.log".format(output_dir, file_basename), "w") as f: - f.write(accuracy_report) + f.write(report) if plot: color_val = g_plot_colors[index] - data = np.array(accuracy_data) + data = np.array(data) if data.shape[0] == 0: - raise Exception("Couldn't find any rows for the accuracy plot") + raise Exception("Couldn't find any rows for the" + "accuracy/log-probability plot") data = data[data[:, 0] >= start_iter, :] plot_handle, = plt.plot(data[:, 0], data[:, 1], color=color_val, linestyle="--", @@ -594,28 +594,28 @@ def generate_plots(exp_dir, output_dir, output_names, comparison_dir=None, for (output_name, objective_type) in output_names: if objective_type == "linear": logger.info("Generating accuracy plots") - generate_accuracy_plots( + generate_acc_logprob_plots( exp_dir, output_dir, g_plot, key='accuracy', file_basename='accuracy', comparison_dir=comparison_dir, start_iter=start_iter, latex_report=latex_report, output_name=output_name) logger.info("Generating log-likelihood plots") - generate_accuracy_plots( + generate_acc_logprob_plots( exp_dir, output_dir, g_plot, key='log-likelihood', file_basename='loglikelihood', comparison_dir=comparison_dir, start_iter=start_iter, latex_report=latex_report, output_name=output_name) elif objective_type == "chain": logger.info("Generating log-probability plots") - generate_accuracy_plots( + generate_acc_logprob_plots( exp_dir, output_dir, g_plot, key='log-probability', file_basename='log_probability', comparison_dir=comparison_dir, start_iter=start_iter, latex_report=latex_report, output_name=output_name) else: logger.info("Generating " + objective_type + " objective plots") - generate_accuracy_plots( + generate_acc_logprob_plots( exp_dir, output_dir, g_plot, key='objective', file_basename='objective', comparison_dir=comparison_dir, start_iter=start_iter, @@ -659,13 +659,13 @@ def main(): output_nodes.append(('output', 'chain')) else: output_nodes.append(('output', 'linear')) - + if args.comparison_dir is not None: generate_plots(args.exp_dir[0], args.output_dir, output_nodes, comparison_dir=args.comparison_dir, start_iter=args.start_iter) - else: - if len(args.exp_dir) == 1: + else: + if len(args.exp_dir) == 1: generate_plots(args.exp_dir[0], args.output_dir, output_nodes, start_iter=args.start_iter) if len(args.exp_dir) > 1: From f42b3ced09eecacdc037628ed06b05f91ef96c36 Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Tue, 24 Jan 2017 22:42:58 +0100 Subject: [PATCH 326/530] [scripts] nnet1,make_denlats: prevent crash when utt-ids contain slashes (#1374) --- egs/wsj/s5/steps/nnet/make_denlats.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/egs/wsj/s5/steps/nnet/make_denlats.sh b/egs/wsj/s5/steps/nnet/make_denlats.sh index 607fc97ad72..bf1074b576e 100755 --- a/egs/wsj/s5/steps/nnet/make_denlats.sh +++ b/egs/wsj/s5/steps/nnet/make_denlats.sh @@ -154,7 +154,9 @@ if [ $sub_split -eq 1 ]; then # Prepare 'scp' for storing lattices separately and gzipped for n in `seq $nj`; do [ ! -d $dir/lat$n ] && mkdir $dir/lat$n; - cat $sdata/$n/feats.scp | awk '{ print $1" | gzip -c >'$dir'/lat'$n'/"$1".gz"; }' + cat $sdata/$n/feats.scp | \ + awk -v dir=$dir -v n=$n '{ utt=$1; utt_noslash=gensub("/","_","g",utt); + printf("%s | gzip -c >%s/lat%d/%s.gz\n", utt, dir, n, utt_noslash); }' done >$dir/lat.store_separately_as_gz.scp # Generate the lattices $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \ @@ -183,8 +185,10 @@ else # Prepare 'scp' for storing lattices separately and gzipped for k in `seq $sub_split`; do [ ! -d $dir/lat$n/$k ] && mkdir -p $dir/lat$n/$k; - cat $sdata2/$k/feats.scp | awk '{ print $1" | gzip -c >'$dir'/lat'$n'/'$k'/"$1".gz"; }' - done >$dir/lat.$n.store_separately_as_gz.scp + cat $sdata2/$k/feats.scp | \ + awk -v dir=$dir -v n=$n -v k=$k '{ utt=$1; utt_noslash=gensub("/","_","g",utt); + printf("%s | gzip -c >%s/lat%d/%d/%s.gz\n", utt, dir, n, k, utt_noslash); }' + done >$dir/lat.${n}.store_separately_as_gz.scp # Generate lattices $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ From 82167f9718937a39b612bfd2ea102f97b6c99af7 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Tue, 24 Jan 2017 18:29:37 -0500 Subject: [PATCH 327/530] [scripts] remove import statement that was creating problems in nnet3 scripts. --- egs/wsj/s5/steps/libs/nnet3/train/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/__init__.py b/egs/wsj/s5/steps/libs/nnet3/train/__init__.py index ada7230865b..0503c0135cd 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/__init__.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/__init__.py @@ -9,7 +9,3 @@ frame_level_objf -- For both recurrent and non-recurrent architectures chain_objf -- LF-MMI objective training """ - -import common - -__all__ = ["common"] From c003aa763530da4b95d5bc54ce8f48ea2e27473e Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 25 Jan 2017 01:20:05 -0500 Subject: [PATCH 328/530] [src] Adding chain version of the combination changes from the last 2 commits --- src/nnet3/nnet-chain-combine.cc | 174 ++++++++++++++++++++++---------- src/nnet3/nnet-chain-combine.h | 54 ++++++---- 2 files changed, 157 insertions(+), 71 deletions(-) diff --git a/src/nnet3/nnet-chain-combine.cc b/src/nnet3/nnet-chain-combine.cc index 810ee2b471a..b80c585e7fa 100644 --- a/src/nnet3/nnet-chain-combine.cc +++ b/src/nnet3/nnet-chain-combine.cc @@ -38,6 +38,13 @@ NnetChainCombiner::NnetChainCombiner(const NnetCombineConfig &combine_config, nnet_params_(std::min(num_nnets, combine_config_.max_effective_inputs), NumParameters(first_nnet)), tot_input_weighting_(nnet_params_.NumRows()) { + + if (combine_config_.sum_to_one_penalty != 0.0 && + combine_config_.enforce_sum_to_one) { + KALDI_WARN << "--sum-to-one-penalty=" << combine_config_.sum_to_one_penalty + << " is nonzero, so setting --enforce-sum-to-one=false."; + combine_config_.enforce_sum_to_one = false; + } SubVector first_params(nnet_params_, 0); VectorizeNnet(nnet_, &first_params); tot_input_weighting_(0) += 1.0; @@ -132,12 +139,12 @@ void NnetChainCombiner::Combine() { // itself, so this is BFGS. lbfgs_options.first_step_impr = combine_config_.initial_impr; - Vector params(dim), deriv(dim); - BaseFloat objf, initial_objf; + Vector params(dim), deriv(dim); + double objf, initial_objf; GetInitialParameters(¶ms); - OptimizeLbfgs lbfgs(params, lbfgs_options); + OptimizeLbfgs lbfgs(params, lbfgs_options); for (int32 i = 0; i < combine_config_.num_iters; i++) { params.CopyFromVec(lbfgs.GetProposedValue()); @@ -148,12 +155,25 @@ void NnetChainCombiner::Combine() { lbfgs.DoStep(objf, deriv); } - KALDI_LOG << "Combining nnets, objective function changed from " - << initial_objf << " to " << objf; + if (!combine_config_.sum_to_one_penalty) { + KALDI_LOG << "Combining nnets, objective function changed from " + << initial_objf << " to " << objf; + } else { + Vector weights(WeightDim()); + GetWeights(params, &weights); + bool print_weights = true; + double penalty = GetSumToOnePenalty(weights, NULL, print_weights); + // note: initial_objf has no penalty term because it summed exactly + // to one. + KALDI_LOG << "Combining nnets, objective function changed from " + << initial_objf << " to " << objf << " = " + << (objf - penalty) << " + " << penalty; + } + // must recompute nnet_ if "params" is not exactly equal to the // final params that LB - Vector final_params(dim); + Vector final_params(dim); final_params.CopyFromVec(lbfgs.GetValue(&objf)); if (!params.ApproxEqual(final_params, 0.0)) { // the following call makes sure that nnet_ corresponds to the parameters @@ -164,9 +184,8 @@ void NnetChainCombiner::Combine() { } -void NnetChainCombiner::PrintParams(const VectorBase ¶ms) const { - - Vector weights(params.Dim()), normalized_weights(params.Dim()); +void NnetChainCombiner::PrintParams(const VectorBase ¶ms) const { + Vector weights(params.Dim()), normalized_weights(params.Dim()); GetWeights(params, &weights); GetNormalizedWeights(weights, &normalized_weights); int32 num_models = nnet_params_.NumRows(), @@ -216,21 +235,21 @@ void NnetChainCombiner::PrintParams(const VectorBase ¶ms) const { bool NnetChainCombiner::SelfTestDerivatives() { int32 num_tests = 2; // more properly, this is the number of dimensions in a // single test. - BaseFloat delta = 0.001; + double delta = 0.001; int32 dim = ParameterDim(); - Vector params(dim), deriv(dim); - Vector predicted_changes(num_tests), + Vector params(dim), deriv(dim); + Vector predicted_changes(num_tests), observed_changes(num_tests); GetInitialParameters(¶ms); - BaseFloat initial_objf = ComputeObjfAndDerivFromParameters(params, + double initial_objf = ComputeObjfAndDerivFromParameters(params, &deriv); for (int32 i = 0; i < num_tests; i++) { - Vector new_deriv(dim), offset(dim), new_params(params); + Vector new_deriv(dim), offset(dim), new_params(params); offset.SetRandn(); new_params.AddVec(delta, offset); - BaseFloat new_objf = ComputeObjfAndDerivFromParameters(new_params, + double new_objf = ComputeObjfAndDerivFromParameters(new_params, &new_deriv); // for predicted changes, interpolate old and new derivs. predicted_changes(i) = @@ -238,7 +257,7 @@ bool NnetChainCombiner::SelfTestDerivatives() { 0.5 * VecVec(new_params, new_deriv) - 0.5 * VecVec(params, new_deriv); observed_changes(i) = new_objf - initial_objf; } - BaseFloat threshold = 0.1; + double threshold = 0.1; KALDI_LOG << "predicted_changes = " << predicted_changes; KALDI_LOG << "observed_changes = " << observed_changes; if (!ApproxEqual(predicted_changes, observed_changes, threshold)) { @@ -255,23 +274,23 @@ void NnetChainCombiner::SelfTestModelDerivatives() { // single test. int32 dim = ParameterDim(); - Vector params(dim), deriv(dim); - Vector predicted_changes(num_tests), + Vector params(dim), deriv(dim); + Vector predicted_changes(num_tests), observed_changes(num_tests); GetInitialParameters(¶ms); - Vector weights(WeightDim()), normalized_weights(WeightDim()), - nnet_params(NnetParameterDim(), kUndefined), + Vector weights(WeightDim()), normalized_weights(WeightDim()); + Vector nnet_params(NnetParameterDim(), kUndefined), nnet_deriv(NnetParameterDim(), kUndefined); GetWeights(params, &weights); GetNormalizedWeights(weights, &normalized_weights); GetNnetParameters(normalized_weights, &nnet_params); - BaseFloat initial_objf = ComputeObjfAndDerivFromNnet(nnet_params, + double initial_objf = ComputeObjfAndDerivFromNnet(nnet_params, &nnet_deriv); - BaseFloat delta = 0.002 * std::sqrt(VecVec(nnet_params, nnet_params) / - NnetParameterDim()); + double delta = 0.002 * std::sqrt(VecVec(nnet_params, nnet_params) / + NnetParameterDim()); for (int32 i = 0; i < num_tests; i++) { @@ -279,7 +298,7 @@ void NnetChainCombiner::SelfTestModelDerivatives() { offset(NnetParameterDim()), new_nnet_params(nnet_params); offset.SetRandn(); new_nnet_params.AddVec(delta, offset); - BaseFloat new_objf = ComputeObjfAndDerivFromNnet(new_nnet_params, + double new_objf = ComputeObjfAndDerivFromNnet(new_nnet_params, &new_nnet_deriv); // for predicted changes, interpolate old and new derivs. predicted_changes(i) = @@ -289,7 +308,7 @@ void NnetChainCombiner::SelfTestModelDerivatives() { 0.5 * VecVec(nnet_params, new_nnet_deriv); observed_changes(i) = new_objf - initial_objf; } - BaseFloat threshold = 0.1; + double threshold = 0.1; KALDI_LOG << "model-derivatives: predicted_changes = " << predicted_changes; KALDI_LOG << "model-derivatives: observed_changes = " << observed_changes; if (!ApproxEqual(predicted_changes, observed_changes, threshold)) @@ -307,7 +326,7 @@ int32 NnetChainCombiner::ParameterDim() const { } -void NnetChainCombiner::GetInitialParameters(VectorBase *params) const { +void NnetChainCombiner::GetInitialParameters(VectorBase *params) const { KALDI_ASSERT(params->Dim() == ParameterDim()); params->Set(1.0 / nnet_params_.NumRows()); if (combine_config_.enforce_positive_weights) { @@ -317,8 +336,8 @@ void NnetChainCombiner::GetInitialParameters(VectorBase *params) cons } } -void NnetChainCombiner::GetWeights(const VectorBase ¶ms, - VectorBase *weights) const { +void NnetChainCombiner::GetWeights(const VectorBase ¶ms, + VectorBase *weights) const { KALDI_ASSERT(weights->Dim() == WeightDim()); if (combine_config_.separate_weights_per_component) { weights->CopyFromVec(params); @@ -338,12 +357,12 @@ void NnetChainCombiner::GetWeights(const VectorBase ¶ms, } -void NnetChainCombiner::GetParamsDeriv(const VectorBase &weights, - const VectorBase &weights_deriv, - VectorBase *param_deriv) { +void NnetChainCombiner::GetParamsDeriv(const VectorBase &weights, + const VectorBase &weights_deriv, + VectorBase *param_deriv) { KALDI_ASSERT(weights.Dim() == WeightDim() && param_deriv->Dim() == ParameterDim()); - Vector preexp_weights_deriv(weights_deriv); + Vector preexp_weights_deriv(weights_deriv); if (combine_config_.enforce_positive_weights) { // to enforce positive weights we first compute weights (call these // preexp_weights) and then take exponential. Note, d/dx exp(x) = exp(x). @@ -362,8 +381,54 @@ void NnetChainCombiner::GetParamsDeriv(const VectorBase &weights, } } +double NnetChainCombiner::GetSumToOnePenalty( + const VectorBase &weights, + VectorBase *weights_penalty_deriv, + bool print_weights) const { -void NnetChainCombiner::GetNnetParameters(const Vector &weights, + KALDI_ASSERT(combine_config_.sum_to_one_penalty >= 0.0); + double penalty = combine_config_.sum_to_one_penalty; + if (penalty == 0.0) { + weights_penalty_deriv->SetZero(); + return 0.0; + } + double ans = 0.0; + int32 num_uc = NumUpdatableComponents(), + num_models = nnet_params_.NumRows(); + Vector tot_weights(num_uc); + std::ostringstream tot_weight_info; + for (int32 c = 0; c < num_uc; c++) { + double this_total_weight = 0.0; + for (int32 m = 0; m < num_models; m++) { + int32 index = m * num_uc + c; + double this_weight = weights(index); + this_total_weight += this_weight; + } + tot_weights(c) = this_total_weight; + ans += -0.5 * penalty * + (this_total_weight - 1.0) * (this_total_weight - 1.0); + if (weights_penalty_deriv != NULL) { + KALDI_ASSERT(weights.Dim() == weights_penalty_deriv->Dim()); + // this_total_weight_deriv is the derivative of the penalty + // term w.r.t. this component's total weight. + double this_total_weight_deriv = + penalty * (1.0 - this_total_weight); + for (int32 m = 0; m < num_models; m++) { + int32 index = m * num_uc + c; + (*weights_penalty_deriv)(index) = this_total_weight_deriv; + } + } + } + if (print_weights) { + Vector tot_weights_float(tot_weights); + KALDI_LOG << "Total weights per component: " + << PrintVectorPerUpdatableComponent(nnet_, + tot_weights_float); + } + return ans; +} + +void NnetChainCombiner::GetNnetParameters(const Vector &weights, VectorBase *nnet_params) const { KALDI_ASSERT(nnet_params->Dim() == nnet_params_.NumCols()); nnet_params->SetZero(); @@ -389,7 +454,7 @@ void NnetChainCombiner::GetNnetParameters(const Vector &weights, // compare GetNnetParameters. void NnetChainCombiner::GetWeightsDeriv( const VectorBase &nnet_params_deriv, - VectorBase *weights_deriv) { + VectorBase *weights_deriv) { KALDI_ASSERT(nnet_params_deriv.Dim() == nnet_params_.NumCols() && weights_deriv->Dim() == WeightDim()); int32 num_uc = NumUpdatableComponents(), @@ -441,30 +506,35 @@ double NnetChainCombiner::ComputeObjfAndDerivFromNnet( double NnetChainCombiner::ComputeObjfAndDerivFromParameters( - VectorBase ¶ms, - VectorBase *params_deriv) { - Vector weights(WeightDim()), normalized_weights(WeightDim()), - nnet_params(NnetParameterDim(), kUndefined), - nnet_params_deriv(NnetParameterDim(), kUndefined), + VectorBase ¶ms, + VectorBase *params_deriv) { + Vector weights(WeightDim()), normalized_weights(WeightDim()), + weights_sum_to_one_penalty_deriv(WeightDim()), normalized_weights_deriv(WeightDim()), weights_deriv(WeightDim()); + Vector + nnet_params(NnetParameterDim(), kUndefined), + nnet_params_deriv(NnetParameterDim(), kUndefined); GetWeights(params, &weights); + double ans = GetSumToOnePenalty(weights, &weights_sum_to_one_penalty_deriv); GetNormalizedWeights(weights, &normalized_weights); GetNnetParameters(normalized_weights, &nnet_params); - double ans = ComputeObjfAndDerivFromNnet(nnet_params, &nnet_params_deriv); + ans += ComputeObjfAndDerivFromNnet(nnet_params, &nnet_params_deriv); if (ans != ans || ans - ans != 0) // NaN or inf return ans; // No point computing derivative GetWeightsDeriv(nnet_params_deriv, &normalized_weights_deriv); GetUnnormalizedWeightsDeriv(weights, normalized_weights_deriv, &weights_deriv); + weights_deriv.AddVec(1.0, weights_sum_to_one_penalty_deriv); GetParamsDeriv(weights, weights_deriv, params_deriv); return ans; } -// enforces the constraint that the weights for each component must sum to one. +// enforces the constraint that the weights for each component must sum to one, +// if necessary. void NnetChainCombiner::GetNormalizedWeights( - const VectorBase &unnorm_weights, - VectorBase *norm_weights) const { + const VectorBase &unnorm_weights, + VectorBase *norm_weights) const { if (!combine_config_.enforce_sum_to_one) { norm_weights->CopyFromVec(unnorm_weights); return; @@ -472,12 +542,12 @@ void NnetChainCombiner::GetNormalizedWeights( int32 num_uc = NumUpdatableComponents(), num_models = nnet_params_.NumRows(); for (int32 c = 0; c < num_uc; c++) { - BaseFloat sum = 0.0; + double sum = 0.0; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; sum += unnorm_weights(index); } - BaseFloat inv_sum = 1.0 / sum; // if it's NaN then it's OK, we'll get NaN + double inv_sum = 1.0 / sum; // if it's NaN then it's OK, we'll get NaN // weights and eventually -inf objective. for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; @@ -487,9 +557,9 @@ void NnetChainCombiner::GetNormalizedWeights( } void NnetChainCombiner::GetUnnormalizedWeightsDeriv( - const VectorBase &unnorm_weights, - const VectorBase &norm_weights_deriv, - VectorBase *unnorm_weights_deriv) { + const VectorBase &unnorm_weights, + const VectorBase &norm_weights_deriv, + VectorBase *unnorm_weights_deriv) { if (!combine_config_.enforce_sum_to_one) { unnorm_weights_deriv->CopyFromVec(norm_weights_deriv); return; @@ -497,13 +567,13 @@ void NnetChainCombiner::GetUnnormalizedWeightsDeriv( int32 num_uc = NumUpdatableComponents(), num_models = nnet_params_.NumRows(); for (int32 c = 0; c < num_uc; c++) { - BaseFloat sum = 0.0; + double sum = 0.0; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; sum += unnorm_weights(index); } - BaseFloat inv_sum = 1.0 / sum; - BaseFloat inv_sum_deriv = 0.0; + double inv_sum = 1.0 / sum; + double inv_sum_deriv = 0.0; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; // in the forward direction, we'd do: @@ -512,7 +582,7 @@ void NnetChainCombiner::GetUnnormalizedWeightsDeriv( inv_sum_deriv += norm_weights_deriv(index) * unnorm_weights(index); } // note: d/dx (1/x) = -1/x^2 - BaseFloat sum_deriv = -1.0 * inv_sum_deriv * inv_sum * inv_sum; + double sum_deriv = -1.0 * inv_sum_deriv * inv_sum * inv_sum; for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; (*unnorm_weights_deriv)(index) += sum_deriv; diff --git a/src/nnet3/nnet-chain-combine.h b/src/nnet3/nnet-chain-combine.h index 6ef882ecc38..3aeb3882650 100644 --- a/src/nnet3/nnet-chain-combine.h +++ b/src/nnet3/nnet-chain-combine.h @@ -62,7 +62,7 @@ class NnetChainCombiner { ~NnetChainCombiner() { delete prob_computer_; } private: - const NnetCombineConfig &combine_config_; + NnetCombineConfig combine_config_; const chain::ChainTrainingOptions &chain_config_; const std::vector &egs_; @@ -87,8 +87,9 @@ class NnetChainCombiner { Matrix nnet_params_; // This vector has the same dimension as nnet_params_.NumRows(), - // and helps us normalize so each row of nnet_params correspondss to - // a weighted average of its inputs. + // and helps us normalize so each row of nnet_params corresponds to + // a weighted average of its inputs (will be all ones if + // config_.max_effective_inputs >= the number of nnets provided). Vector tot_input_weighting_; // returns the parameter dimension, i.e. the dimension of the parameters that @@ -110,7 +111,7 @@ class NnetChainCombiner { // Computes the initial parameters. The parameters are the underlying thing // that we optimize; their dimension equals ParameterDim(). They are not the same // thing as the nnet parameters. - void GetInitialParameters(VectorBase *params) const; + void GetInitialParameters(VectorBase *params) const; // Tests that derivatives are accurate. Prints warning and returns false if not. bool SelfTestDerivatives(); @@ -120,33 +121,48 @@ class NnetChainCombiner { // prints the parameters via logging statements. - void PrintParams(const VectorBase ¶ms) const; + void PrintParams(const VectorBase ¶ms) const; // This function computes the objective function (and its derivative, if the objective // function is finite) at the given value of the parameters (the parameters we're optimizing, // i.e. the combination weights; not the nnet parameters. This function calls most of the // functions below. double ComputeObjfAndDerivFromParameters( - VectorBase ¶ms, - VectorBase *params_deriv); + VectorBase ¶ms, + VectorBase *params_deriv); // Computes the weights from the parameters in a config-dependent way. The // weight dimension is always (the number of updatable components times // nnet_params_.NumRows()). - void GetWeights(const VectorBase ¶ms, - VectorBase *weights) const; + void GetWeights(const VectorBase ¶ms, + VectorBase *weights) const; // Given the raw weights: if config_.enforce_sum_to_one, then compute weights // with sum-to-one constrint per component included; else just copy input to // output. - void GetNormalizedWeights(const VectorBase &unnorm_weights, - VectorBase *norm_weights) const; + void GetNormalizedWeights(const VectorBase &unnorm_weights, + VectorBase *norm_weights) const; + + // if config_.sum_to_one_penalty is 0.0, returns 0.0 and sets + // weights_penalty_deriv to 0.0; else it computes, for each + // updatable component u the total weight w_u, returns the value + // -0.5 * config_.sum_to_one_penalty * sum_u (w_u - 1.0)^2; + // and sets 'weights_penalty_deriv' to the derivative w.r.t. + // the result. + // Note: config_.sum_to_one_penalty is exclusive with + // config_.enforce_sum_to_one, so there is really no distinction between + // normalized and unnormalized weights here (since normalization would be a + // no-op). + double GetSumToOnePenalty(const VectorBase &weights, + VectorBase *weights_penalty_deriv, + bool print_weights = false) const; + // Computes the nnet-parameter vector from the normalized weights and // nnet_params_, as a vector. (See the functions Vectorize() and // UnVectorize() for how they relate to the nnet's components' parameters). - void GetNnetParameters(const Vector &normalized_weights, + void GetNnetParameters(const Vector &normalized_weights, VectorBase *nnet_params) const; // This function computes the objective function (and its derivative, if the objective @@ -158,23 +174,23 @@ class NnetChainCombiner { // Given an objective-function derivative with respect to the nnet parameters, // computes the derivative with respect to the (normalized) weights. void GetWeightsDeriv(const VectorBase &nnet_params_deriv, - VectorBase *normalized_weights_deriv); + VectorBase *normalized_weights_deriv); // Computes the derivative w.r.t. the unnormalized weights, by propagating // through the normalization operation. // If config_.enforce_sum_to_one == false, just copies norm_weights_deriv to // unnorm_weights_deriv. - void GetUnnormalizedWeightsDeriv(const VectorBase &unnorm_weights, - const VectorBase &norm_weights_deriv, - VectorBase *unnorm_weights_deriv); + void GetUnnormalizedWeightsDeriv(const VectorBase &unnorm_weights, + const VectorBase &norm_weights_deriv, + VectorBase *unnorm_weights_deriv); // Given a derivative w.r.t. the weights, outputs a derivative w.r.t. // the params - void GetParamsDeriv(const VectorBase &weights, - const VectorBase &weight_deriv, - VectorBase *param_deriv); + void GetParamsDeriv(const VectorBase &weights, + const VectorBase &weight_deriv, + VectorBase *param_deriv); void ComputeUpdatableComponentDims(); void FinishPreprocessingInput(); From 9208165a58178c757a5afdbdcd79809c1aa91146 Mon Sep 17 00:00:00 2001 From: Gaofeng Cheng <770579626@qq.com> Date: Wed, 25 Jan 2017 14:29:02 +0800 Subject: [PATCH 329/530] [src] nnet3: Add the "per-frame" option to DropoutComponent (#1324) --- src/nnet3/nnet-simple-component.cc | 72 ++++++++++++++++++++++-------- src/nnet3/nnet-simple-component.h | 20 ++++++--- src/nnet3/nnet-utils.cc | 12 ++--- src/nnet3bin/nnet3-copy.cc | 4 +- 4 files changed, 77 insertions(+), 31 deletions(-) diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index 741ead92c6a..5935b4dacad 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -87,27 +87,34 @@ void PnormComponent::Write(std::ostream &os, bool binary) const { } -void DropoutComponent::Init(int32 dim, BaseFloat dropout_proportion) { +void DropoutComponent::Init(int32 dim, BaseFloat dropout_proportion, + bool dropout_per_frame) { dropout_proportion_ = dropout_proportion; + dropout_per_frame_ = dropout_per_frame; dim_ = dim; } void DropoutComponent::InitFromConfig(ConfigLine *cfl) { int32 dim = 0; BaseFloat dropout_proportion = 0.0; + bool dropout_per_frame = false; bool ok = cfl->GetValue("dim", &dim) && cfl->GetValue("dropout-proportion", &dropout_proportion); + cfl->GetValue("dropout-per-frame", &dropout_per_frame); + // for this stage, dropout is hard coded in + // normal mode if not declared in config if (!ok || cfl->HasUnusedValues() || dim <= 0 || dropout_proportion < 0.0 || dropout_proportion > 1.0) - KALDI_ERR << "Invalid initializer for layer of type " - << Type() << ": \"" << cfl->WholeLine() << "\""; - Init(dim, dropout_proportion); + KALDI_ERR << "Invalid initializer for layer of type " + << Type() << ": \"" << cfl->WholeLine() << "\""; + Init(dim, dropout_proportion, dropout_per_frame); } std::string DropoutComponent::Info() const { std::ostringstream stream; stream << Type() << ", dim=" << dim_ - << ", dropout-proportion=" << dropout_proportion_; + << ", dropout-proportion=" << dropout_proportion_ + << ", dropout-per-frame=" << (dropout_per_frame_ ? "true" : "false"); return stream.str(); } @@ -119,16 +126,29 @@ void DropoutComponent::Propagate(const ComponentPrecomputedIndexes *indexes, BaseFloat dropout = dropout_proportion_; KALDI_ASSERT(dropout >= 0.0 && dropout <= 1.0); + if (!dropout_per_frame_) { + // This const_cast is only safe assuming you don't attempt + // to use multi-threaded code with the GPU. + const_cast&>(random_generator_).RandUniform(out); - // This const_cast is only safe assuming you don't attempt - // to use multi-threaded code with the GPU. - const_cast&>(random_generator_).RandUniform(out); + out->Add(-dropout); // now, a proportion "dropout" will be <0.0 + // apply the function (x>0?1:0). Now, a proportion + // "dropout" will be zero and (1 - dropout) will be 1.0. + out->ApplyHeaviside(); - out->Add(-dropout); // now, a proportion "dropout" will be <0.0 - out->ApplyHeaviside(); // apply the function (x>0?1:0). Now, a proportion "dropout" will - // be zero and (1 - dropout) will be 1.0. - - out->MulElements(in); + out->MulElements(in); + } else { + // randomize the dropout matrix by row, + // i.e. [[1,1,1,1],[0,0,0,0],[0,0,0,0],[1,1,1,1],[0,0,0,0]] + CuMatrix tmp(1, out->NumRows(), kUndefined); + // This const_cast is only safe assuming you don't attempt + // to use multi-threaded code with the GPU. + const_cast&>(random_generator_).RandUniform(&tmp); + tmp.Add(-dropout); + tmp.ApplyHeaviside(); + out->CopyColsFromVec(tmp.Row(0)); + out->MulElements(in); + } } @@ -150,11 +170,25 @@ void DropoutComponent::Backprop(const std::string &debug_info, void DropoutComponent::Read(std::istream &is, bool binary) { - ExpectOneOrTwoTokens(is, binary, "", ""); - ReadBasicType(is, binary, &dim_); - ExpectToken(is, binary, ""); - ReadBasicType(is, binary, &dropout_proportion_); - ExpectToken(is, binary, ""); + std::string token; + ReadToken(is, binary, &token); + if (token == "") { + ReadToken(is, binary, &token); + } + KALDI_ASSERT(token == ""); + ReadBasicType(is, binary, &dim_); // read dimension. + ReadToken(is, binary, &token); + KALDI_ASSERT(token == ""); + ReadBasicType(is, binary, &dropout_proportion_); // read dropout rate + ReadToken(is, binary, &token); + if (token == "") { + ReadBasicType(is, binary, &dropout_per_frame_); // read dropout mode + ReadToken(is, binary, &token); + KALDI_ASSERT(token == ""); + } else { + dropout_per_frame_ = false; + KALDI_ASSERT(token == ""); + } } void DropoutComponent::Write(std::ostream &os, bool binary) const { @@ -163,6 +197,8 @@ void DropoutComponent::Write(std::ostream &os, bool binary) const { WriteBasicType(os, binary, dim_); WriteToken(os, binary, ""); WriteBasicType(os, binary, dropout_proportion_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, dropout_per_frame_); WriteToken(os, binary, ""); } diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h index 44f487b49b9..ba7c679cb6c 100644 --- a/src/nnet3/nnet-simple-component.h +++ b/src/nnet3/nnet-simple-component.h @@ -87,11 +87,16 @@ class PnormComponent: public Component { // "Dropout: A Simple Way to Prevent Neural Networks from Overfitting". class DropoutComponent : public RandomComponent { public: - void Init(int32 dim, BaseFloat dropout_proportion = 0.0); + void Init(int32 dim, BaseFloat dropout_proportion = 0.0, + bool dropout_per_frame = false); - DropoutComponent(int32 dim, BaseFloat dropout = 0.0) { Init(dim, dropout); } + DropoutComponent(int32 dim, BaseFloat dropout = 0.0, + bool dropout_per_frame = false) { + Init(dim, dropout, dropout_per_frame); + } - DropoutComponent(): dim_(0), dropout_proportion_(0.0) { } + DropoutComponent(): dim_(0), dropout_proportion_(0.0), + dropout_per_frame_(false) { } virtual int32 Properties() const { return kLinearInInput|kBackpropInPlace|kSimpleComponent|kBackpropNeedsInput|kBackpropNeedsOutput; @@ -120,17 +125,20 @@ class DropoutComponent : public RandomComponent { Component *to_update, CuMatrixBase *in_deriv) const; virtual Component* Copy() const { return new DropoutComponent(dim_, - dropout_proportion_); } + dropout_proportion_, + dropout_per_frame_); } virtual std::string Info() const; - void SetDropoutProportion(BaseFloat dropout_proportion) { dropout_proportion_ = dropout_proportion; } + void SetDropoutProportion(BaseFloat dropout_proportion) { + dropout_proportion_ = dropout_proportion; + } private: int32 dim_; /// dropout-proportion is the proportion that is dropped out, /// e.g. if 0.1, we set 10% to zero value. BaseFloat dropout_proportion_; - + bool dropout_per_frame_; }; class ElementwiseProductComponent: public Component { diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index 1e22575cee3..3d4330ac9f3 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -625,19 +625,21 @@ void ReadEditConfig(std::istream &edit_config_is, Nnet *nnet) { KALDI_ERR << "In edits-config, expected proportion to be set in line: " << config_line.WholeLine(); } - DropoutComponent *component = NULL; + DropoutComponent *dropout_component = NULL; int32 num_dropout_proportions_set = 0; for (int32 c = 0; c < nnet->NumComponents(); c++) { if (NameMatchesPattern(nnet->GetComponentName(c).c_str(), name_pattern.c_str()) && - (component = + (dropout_component = dynamic_cast(nnet->GetComponent(c)))) { - component->SetDropoutProportion(proportion); - num_dropout_proportions_set++; + if (dropout_component != NULL) { + dropout_component->SetDropoutProportion(proportion); + num_dropout_proportions_set++; + } } } KALDI_LOG << "Set dropout proportions for " - << num_dropout_proportions_set << " nodes."; + << num_dropout_proportions_set << " components."; } else { KALDI_ERR << "Directive '" << directive << "' is not currently " "supported (reading edit-config)."; diff --git a/src/nnet3bin/nnet3-copy.cc b/src/nnet3bin/nnet3-copy.cc index 1f75527d69c..9d3b69dd986 100644 --- a/src/nnet3bin/nnet3-copy.cc +++ b/src/nnet3bin/nnet3-copy.cc @@ -84,10 +84,10 @@ int main(int argc, char *argv[]) { if (learning_rate >= 0) SetLearningRate(learning_rate, &nnet); - + if (scale != 1.0) ScaleNnet(scale, &nnet); - + if (!edits_config.empty()) { Input ki(edits_config); ReadEditConfig(ki.Stream(), &nnet); From e227eda38339a2f7f0cbbc73b166823c13fdde25 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 25 Jan 2017 00:19:07 -0500 Subject: [PATCH 330/530] [doc] Documentation changes; add scripts to automatically add documentation of patch versions. --- .gitignore | 1 + egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh | 3 +- src/Doxyfile | 2 +- src/doc/README | 16 +- src/doc/dependencies.dox | 20 +- src/doc/dnn3.dox | 2 +- src/doc/dnn3_code_compilation.dox | 15 ++ src/doc/dnn3_code_optimization.dox | 2 +- src/doc/dnn3_scripts_context.dox | 246 ++++++++++++++++++ src/doc/get_version_info.sh | 88 +++++++ src/doc/mainpage.dox | 33 +-- src/doc/versions.dox | 92 +++++++ 12 files changed, 491 insertions(+), 29 deletions(-) mode change 100644 => 100755 src/doc/README create mode 100644 src/doc/dnn3_scripts_context.dox create mode 100755 src/doc/get_version_info.sh create mode 100644 src/doc/versions.dox diff --git a/.gitignore b/.gitignore index b0784cc2c0c..cb5191ccccd 100644 --- a/.gitignore +++ b/.gitignore @@ -61,6 +61,7 @@ GSYMS /src/base/version.h /src/doc/table/ /src/doc/tools.dox +/src/doc/*.html /src/htdocs/ /src/html.tar.gz /src/kaldi.mk diff --git a/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh b/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh index 6fe772f7e0d..a04a0e894ac 100755 --- a/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh +++ b/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh @@ -1,6 +1,7 @@ #!/bin/bash # note, TDNN is the same as what we used to call multisplice. +# THIS SCRIPT IS DEPRECATED, see ../train_raw_dnn.py # Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey). # 2013 Xiaohui Zhang @@ -75,6 +76,7 @@ if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# != 3 ]; then + echo "$0: THIS SCRIPT IS DEPRECATED, see ../train_raw_dnn.py" echo "Usage: $0 [opts] " echo " e.g.: $0 data/train scp:snr_targets/targets.scp exp/nnet3_snr_predictor" echo "" @@ -544,4 +546,3 @@ if $cleanup; then fi done fi - diff --git a/src/Doxyfile b/src/Doxyfile index bf2dc5197e2..a6c0b434ff2 100644 --- a/src/Doxyfile +++ b/src/Doxyfile @@ -503,7 +503,7 @@ EXCLUDE_PATTERNS = # directories that contain example code fragments that are included (see # the \include command). -EXAMPLE_PATH = +EXAMPLE_PATH = doc # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp diff --git a/src/doc/README b/src/doc/README old mode 100644 new mode 100755 index 566f0d0bf64..27de5defc9f --- a/src/doc/README +++ b/src/doc/README @@ -1,3 +1,9 @@ +#!/bin/bash + +if [ $0 != "doc/README" ]; then + echo "$0: this should be run from one level up (in src/)." + exit 1 +fi #This directory contains some of the source for the Doxygen documentation (the #code itself, and its comments, is the rest of the source). Doxygen will create @@ -6,7 +12,7 @@ #not work, search for "Kaldi main page" online and you will hopefully get a #version of the documentation. -# Note: I generally run this file by typing ". doc/README" from src/, +# Note: I generally run this file by typing "doc/README" from src/, # but this relies on having dsa encryption set up with Sourceforge. # instructions (from Vassil Panayotov) on how to do this: # type @@ -20,6 +26,12 @@ # cd to src/ doc/make_tools.sh + +echo "$0: running doc/get_version_info.sh" +doc/get_version_info.sh +echo "$0: done" + + doxygen cp doc/*.pptx html/; # get the style sheet in the html/ directory. @@ -29,7 +41,6 @@ doxygen -w html header.html footer.html stylesheet.css rm header.html footer.html mv stylesheet.css html/ - if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then cp ../misc/logo/KaldiIco.png html/favicon.ico tar -czf html.tar.gz html @@ -61,4 +72,3 @@ fi # moved the header.html to doc/ and edited it to include the following snippet, # and added it to the repo. # - diff --git a/src/doc/dependencies.dox b/src/doc/dependencies.dox index bff6983e0d6..63d2658b726 100644 --- a/src/doc/dependencies.dox +++ b/src/doc/dependencies.dox @@ -34,9 +34,10 @@ and you can reserve these on the queue by adding some extra option to qsub. See \ref queue for more information. - We have started a separate project called Kluster that shows you - how to create such a cluster on Amazon's EC2; MIT's StarCluster is a larger and better-supported project that provides the same functionality. Most of the scripts should be suitable for a locally hosted cluster based on Debian or @@ -51,9 +52,8 @@ course it will be slower, and you may have to reduce the number of jobs used in some of the example scripts to avoid exhausting your machine's memory. - Kaldi is best tested on Debian and Red Hat Linux, but will run on any - Linux distribution, or on Cygwin or Mac OsX. We are working on FreeBSD - installation scripts. + Kaldi is best tested on Debian and Red Hat Linux, but will run on any + Linux distribution, or on Cygwin or Mac OsX. Kaldi's scripts have been written in such a way that if you replace SGE with a similar mechanism with different syntax (such as Tork), it should be @@ -68,7 +68,6 @@ \section dependencies_packages Software packages required - The following is a non-exhaustive list of some of the packages you need in order to install Kaldi. The full list is not important since the installation scripts will tell you what you are missing. @@ -98,6 +97,11 @@ (the corresponding packages are automake and libtool). - Note: some of the example scripts now use SRILM; we make it easy to install that, although you still have to register online to download it. + - SRILM: some of the example scripts use this. It's generally a better + and more complete language modeling toolkit than IRSTLM; the only drawback + is the license, which is not free for commercial use. You have to + enter your name on the download page to download it, so the installation + script requires some human interaction. - sph2pipe: this is for converting sph format files into other formats such as wav. It's needed for the example scripts that use LDC data. - sclite: this is for scoring and is not necessary as we have our own, simple @@ -109,5 +113,9 @@ - CLAPACK, the linear algebra library (we download the headers). This is useful only on systems where you don't have ATLAS and are instead compiling with CLAPACK. + - OpenBLAS: this is an alernative to ATLAS or CLAPACK. The scripts don't + use it by default but we provide installation scripts so you can install + it if you want to compare it against ATLAS (it's more actively + maintained than ATLAS). */ diff --git a/src/doc/dnn3.dox b/src/doc/dnn3.dox index b6dbea42fd6..547707d417b 100644 --- a/src/doc/dnn3.dox +++ b/src/doc/dnn3.dox @@ -40,7 +40,7 @@ namespace kaldi { - \subpage dnn3_code_data_types - \subpage dnn3_code_compilation - \subpage dnn3_code_optimization - - [documentation on scripts to come] + - \subpage dnn3_scripts_context */ diff --git a/src/doc/dnn3_code_compilation.dox b/src/doc/dnn3_code_compilation.dox index 59844a5d488..f536bb07449 100644 --- a/src/doc/dnn3_code_compilation.dox +++ b/src/doc/dnn3_code_compilation.dox @@ -917,6 +917,21 @@ as an optimization. In this case the associated \ref NnetComputation::debug_inf will correspond to the debug information of one of the matrices that we merged. +\subsection dnn3_compile_compiler_shortcut Shortcut compilation + +A feature available from Kaldi version 5.1 is 'shortcut' compilation (enabled +by default). This is done only when the ComputationRequest has a suitably +regular structure; this basically means that there are more than two different +"n" indexes in the computation, they are numbered consecutively from zero, +nd for each "n" index, the requested set of "t" and "x" indexes is the same +and in a regular order. What the shortcut compilation does is reduce the +computation request down to just two distinct "n" indexes (zero and one), +compile the mini-request, and then expand the resulting compilation-- basically, +it extrapolates the compiled computation to what it would have been if +the entire original computation request had been supplied. Shortcut +compilation significantly cuts down compilation time. + + - Up: \ref dnn3 - Previous: \ref dnn3_code_data_types - Next: \ref dnn3_code_optimization diff --git a/src/doc/dnn3_code_optimization.dox b/src/doc/dnn3_code_optimization.dox index 89a61a7fc32..accf2f50793 100644 --- a/src/doc/dnn3_code_optimization.dox +++ b/src/doc/dnn3_code_optimization.dox @@ -319,7 +319,7 @@ struct NnetOptimizeConfig { }; \endverbatim The top-level call to the optimization code is just a function call. -We show the code for this function below: +We show some partial code for this function below: \verbatim void Optimize(const NnetOptimizeConfig &config, const Nnet &nnet, diff --git a/src/doc/dnn3_scripts_context.dox b/src/doc/dnn3_scripts_context.dox new file mode 100644 index 00000000000..43ee0d40260 --- /dev/null +++ b/src/doc/dnn3_scripts_context.dox @@ -0,0 +1,246 @@ +// doc/dnn3_scripts_context.dox + + +// Copyright 2015 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +namespace kaldi { +namespace nnet3 { + +/** + \page dnn3_scripts_context Context and chunk-size in the "nnet3" setup + + \section dnn3_scripts_context_intro Introduction + + This page discusses certain issues of terminology in the nnet3 setup + about chunk sizes for decoding and training, and left and right context. + This will be helpful in understanding some of the scripts. At the current + time don't have any 'overview' documentation of nnet3 from a scripting perspective, + so this will have to stand as an isolated piece of documentation. + + \section dnn3_scripts_context_basics The basics + + If you have read the previous documentation available for \ref dnn3, you will + realize that the "nnet3" setup supports setups other than simple feedforward + DNNs. It can be used for time delay neural networks (TDNNs) where temporal + splicing (frame splicing) is done at internal layers of the network; and also + for recurrent topologies (RNNs, LSTMs, BLSTMs, etc.). So nnet3 + "knows about" the time axis. Below we estabilish some terminology. + + \subsection dnn3_scripts_context_basics_context Left and right context + + Suppose we want a network to compute an output for a specific time index; + to be concrete, say time t = 154. If the network does frame splicing + internally (or anything else nontrivial with the 't' indexes), it may not be able to + compute this output without seeing a range of input frames. For example, + it may be impossible to compute the output without seeing the range of + 't' values from t = 150 through t = 157. In this case (glossing over details), + we'd say that the network has a \b left-context of 3 and a \b right-context of 4. + The actual computation of the context is a bit more complex as it has to + take into account special cases like where, say, the behavior for odd and + even 't' values is different (c.f. Round() descriptors in + \ref dnn3_dt_nnet_descriptor_config). + + There are cases with recurrent topologies where, in addition to the + "required" left and right context, we want to give the training or the + decoding "extra" context. For such topologies, the network can make use + of context beyond the required context. + In the scripts you'll generally see variables called + \b extra-left-context and \b extra-right-context, which mean + "the amount of context that we're going to provide in addition to what is required". + + In some circumstances the names \b left-context and + \b right-context simply mean the total left and right context that we're + adding to the chunks, i.e. the sums of the model left/right context and the + extra left/right context. So in some circumstances you may have to work out + from the context whether a variable refers to the model left/right context + of the left/right context of the chunks of data. + + In Kaldi version 5.0 and earlier the left and right context in the chunks + of data is not affected by whether the chunks were at the + beginning or end of the utterance; at the ends we pad the input with copies of the + first or last frame. This means that for recurrent topologies, we might end up + padding the start or end of the utterance with a lot of frames (up to 40 or so). + This is wasteful and rather strange. + In versions 5.1 and later, you can specify configuration values \b extra-left-context-initial and + \b extra-right-context-final that allow the start/end of the utterance to have a different + amount of context. If you specify these values, you would normally specify them both to be 0 + (i.e. no extra context). However, for back compatibility to older setups, they + generally default to -1 (meaning, just copy the default extra-left-context and extra-right-context). + + + \subsection dnn3_scripts_context_basics_chunk Chunk size + + The \b chunk-size is the number of (output) frames for each chunk of data + that we evaluate in training or decoding. In the get_egs.sh script + and train_dnn.py it is also referred to as \b frames-per-eg (in some contexts, + this is not the same as the chunk size; see below). In decoding we call this + the \b frames-per-chunk. + + \subsubsection dnn3_scripts_context_basics_chunk_dnn Non-recurrent, non-chain case + + For the very simplest types of networks, such as feedforward networks or TDNNs + trained with the cross-entropy objective function, we randomize the entire + dataset at the frame level and we just train on one frame at a time. In order + for the training jobs to mostly do sequential I/O, we aim pre-randomize the + data at the frame level. However, when you consider that we might easily + require 10 frames each of left and right context, and we have to write this out, + we could easily be increasing the amount of data by a factor of 20 or so when we + generate the training examples. To solve this problem we include labels for + a range of time values, controlled by \b frames-per-eg (normally 8), and include + enough left/right context that we can train on any of those 8 frames. Then + when we train the model, any given training job will pick one of those 8 frames to + train on. + + \subsubsection dnn3_scripts_context_basics_chunk_rnn Recurrent or chain case + + In models that are RNNs or LSTMs or are \ref chain, we always train on fairly large + chunks (generally in the range 40 to 150 frames). This is referred to as the + \b chunk-size. When we decode, we also generally evaluate the neural net on fairly + large chunks of data (like, 30, 50 or 100 frames). This is usually referred to + as the \b frames-per-chunk. For recurrent networks we tend to + make sure that the \b chunk-size/\b frames-per-chunk + and the \b extra-left-context and \b extra-right-context are about the same in + training and decoding, because this generally gives the best results (although + sometimes it's best to make the extra-context values slightly larger in decoding). + One might expect that in decoding time longer context would always be better, but + this does not always seem to be the case (however, see \ref dnn3_scripts_context_looped + below, where we mention a way around this). + + + \subsubsection dnn3_scripts_context_basics_chunk_subsampling Interaction of chunk size with frame-subsampling-factor + + In cases where there is frame-subsampling at the output (like the chain model), + the chunk-size is still measured in multiples of 't', and we make sure (via + rounding up in the code) that it's a multiple of the frame-subsampling factor. + Bear in mind that if the \b chunk-size is 90 and the \b frame-subsampling-factor + is 3, then we're only evaluating 30 distinct output indexes for each chunk of + 90 frames (e.g. t=0, t=3 ... t=87). + + \subsection dnn3_scripts_context_basics_variable Variable chunk size + + Variable chunk size is something used in training that is only available in Kaldi version + 5.1 or later. This is a mechanism to allow fairly large chunks while avoiding + the loss of data due to files that are not exact multiples of the chunk size. + Instead of specifying the chunk size as (say) 150, we might specify the chunk + size as a comma-separated list like 150,120,90,75, and the commands that generate the + training examples are allowed to create chunks of any of those sizes. The + first chunk size specified is referred to as the primary chunk size, and is + "special" in that for any given utterance, we are allowed pick at most two of the + non-primary chunk size; the remaining chunks must be of the primary chunk size. + This restriction makes it easier to work out the optimal split of a file of + a given length into chunks, and allows us to bias the chunk-generation to + chunks of a certain length. + + + \subsection dnn3_scripts_context_basics_minibatch Minibatch size + + The program nnet3-merge-egs merges individual training examples into + minibatches containing many different examples (each original example + gets a different 'n' index). The \b minibatch-size is the desired + size of minibatch, by which we mean the number of examples (frames or + sequences) that we combine into one(for example, minibatch-size=128). + When the chunk sizes + are variable (and taking into account that the context may be different + at the start/end of utterances if we set the \b extra-left-context-initial + and \b extra-right-context-final), it's important to ensure that only + ``similar'' examples are merged into minibatches; this prevents expensive + recompilation from happening on every single minibatch. + + In Kaldi version + 5.1 and later, nnet3-merge-egs only merges together chunks of the same + structure (i.e. the same chunk-size and left and right context). + It keeps reading chunks from the input until it finds that + for some structure of input, there are \b minibatch-size examples ready + to merge into one. In Kaldi versions prior to 5.1 we generally discarded + the "odd-numbered" examples that couldn't be fit into a normal-sized + minibatch, but this becomes problematic now that there are many different + chunk-sizes (we'd discard too much data). + + \subsubsection dnn3_scripts_context_basics_minibatch_variable Variable minibatch size + + From Kaldi 5.1 and later, + the --minibatch-size is a more general string that allows the user more + control than just having a fixed minibatch size. For example, you can specify --minibatch-size=64,128 and + for each type of example it will try to accumulate batches of the + largest specified size (128) and output + them, until it reaches the end of the input; then it will output + a minibatch of size 64 if there are >= 64 egs left. Ranges are also + supported, e.g. --minibatch-size=1:64 means to output minibatches of size 64 + until the end of the input, then output all remaining examples as a single + minibatch. You may also specify different rules for examples of different + sizes (run nnet3-merge-egs without arguments for details of this); this can be useful + to stay within GPU memory limits. + + \section dnn3_scripts_context_looped Looped decoding + + Looped decoding in nnet3 is another feature that is new in Kaldi version 5.1. + It is applicable to forward-recurrent neural networks such as RNNs and LSTMs + (but not to BLSTMs). It allows us to re-use hidden-state activations from + previously-computed chunks. This allows us to have effectively unlimited left + context. The reason why it's called ``looped decoding'' relates to the way + it's implemented: we create a computation whose last statement is a 'goto' + that jumps to somewhere in the middle, so effectively it has a loop like + 'while(1)'. (Note: the computations have statements that request user input or + provide output, so the loop doesn't cause the computation to run indefinitely when called; + it will stop when an I/O operation is reached). Looped computation is intended to solve two problems: wasteful + computation, and latency. Suppose we trained our LSTMs with 40 frames of left + context and a chunk-size of 100. Without looped computation, we'd probably + want to decode with chunks of size about 100 and we'd left-pad the input with around 40 + frames. But this takes about 40\% extra computation; and the chunk size of 1 + second would be a problem for latency/responsiveness in a real-time + application. With looped computation, we can choose any chunk size that's + convenient, because the effective left context is infinite; and the chunk size + doesn't affect the computed output any more. + + However, there is a slight problem with what we sketched out above. In + practice, we've found for LSTMs that decoding works best with about the same + chunk sizes and context as we trained with. That is, adding more context than + we trained on is not helpful. Our theory about why this happens is that + as the context gets longer we reach parts of activation space that were unreachable + before. The maximum value of the cells \f$c_t\f$ in LSTMs rises linearly with + the number of frames we've seen. Following this theory, we made a modification + to LSTMs that seems to fix the problem. We scale the \f$c_t\f$ in the LSTM equations + by a value slightly less than one in the recurrence (for example, like 0.9). + This puts a bound on the maximum hidden activation activations and makes them + increase less dramatically with increasing recurrence time. It's specified + as a configuration value in the LSTM components in the "xconfig" configuration files + with the "decay-time" value, e.g. "decay-time=20". This doesn't seem to + degrade the Word Error Rates, and it removes the discrepancy between regular + and looped decoding (i.e. it makes the networks tolerant to longer context than + was seen in training). + + The script steps/nnet3/decode_looped.sh (only available from Kaldi version 5.1) + takes only two chunk- or context-related configuration values: + \b frames-per-chunk (which only affects the speed/latency tradeoff and not + results), and \b extra-left-context-initial, which should be set to + match the training condition (generally this will be zero, in up-to-date + scripts). + + + At the time of writing, we have not yet created a program similar to + online2-wav-nnet3-latgen-faster that uses the looped decoder; that is + on our TODO list (it's not inherently difficult). + + + - Up: \ref dnn3 + - Previous: \ref dnn3_code_optimization + +*/ + +} +} diff --git a/src/doc/get_version_info.sh b/src/doc/get_version_info.sh new file mode 100755 index 00000000000..5b6de79e04c --- /dev/null +++ b/src/doc/get_version_info.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +# Note: this script assumes that it's part of a git repository where +# the official kaldi repo is a remote named 'upstream', as shown +# here: +# git remote -vv | grep upstream +# upstream git@github.com:kaldi-asr/kaldi.git (fetch) +# upstream git@github.com:kaldi-asr/kaldi.git (push) +# Since Dan is going to be the one running this script and that's +# how he does it, this should work fine. + + + +# the tuples are: + +if [ "$0" != "doc/get_version_info.sh" ] || [ $# -ne 0 ]; then + echo "$0: you should run this script without arguments, from the src/ directory." + echo "... It generates 5.0.html, 5.1.html, and so on." +fi + +if ! git fetch upstream; then + echo "$0: command 'git fetch upstream' failed" + exit 1 +fi + + +# echo "fooXXabcYYbar" | perl -ane ' if (m/XX(.+)YY/) { $a=$`;$x=$1;$y=$'\''; $x =~ s/a/b/g; print "${a}XX${x}YY${y}"; } else {print;}' + +# Note: when you add new tuples here you'll want to add ndew +# \htmlinclude directives in versions.dox. +for tuple in "5.0 master c160a9883"; do + major_minor_number=$(echo $tuple | awk '{print $1}') # e.g. 5.0 + branch=$(echo $tuple | awk '{print $2}') # e.g. 'master', or '5.1' (it's a branch name) + first_commit=$(echo $tuple | awk '{print $3}') + + + + tempfile=$(mktemp) + echo "$0: for version=$major_minor_number, writing git output to $tempfile" + + patch_number=0 + # git rev-list --reverse $first_commit..$branch lists the revisions from + # $first_commit to $branch... --boundary causes it to include $first_commit + # in the range, but with a dash (-) included for the first commit, so we + # use a sed command to get rid of that. + for rev in $(git rev-list --reverse $first_commit..$branch --boundary | sed s/-//); do + # %h is abbrev. commit hash, %H is long commit hash, %cd is the commit date, + # %%s is the one-line log message; x09 is tab. + # so we're printing " " + # we'll later parse this and generate HTML. + pretty_str="${patch_number}%x09%h%x09%H%x09%cd%x09%s"; + git log --date=short --pretty="$pretty_str" -1 $rev + patch_number=$[patch_number+1] + done > $tempfile + + htmlfile=doc/$major_minor_number.html + echo "$0: for version=$major_minor_number, processing $tempfile to $htmlfile" + + cat $tempfile | perl -e ' + ($major_minor_number) = @ARGV; + while () { + if (! m/^(\S+)\t(\S+)\t(\S+)\t(\S+)\t(.+)/) { + die "Could not parse line $_ in git output"; + } else { + $patch_number = $1; $short_commit = $2; $long_commit = $3; + $commit_date = $4; $commit_subject = $5; + if ($commit_subject =~ m/\(#(\d+)\)\s*$/) { + $pull_request_number = $1; + $pre_match = $`; # part before what was matched. + $pre_match =~ s//&rt;/g; + # if commit subject line ends with e.g. (#1302), which will + # be a pull request; create a href to github for that. + $commit_subject = $pre_match . + "(#$pull_request_number)"; + } else { + $commit_subject =~ s//&rt;/g; + } + $commit_href = + "$short_commit"; + $line = "$major_minor_number.$patch_number $commit_href $commit_date $commit_subject
\n"; + print $line; + } + print "

\n"; + } ' "$major_minor_number" >$htmlfile || exit 1 + echo "$0: generated file $htmlfile with $(wc -l <$htmlfile) lines" +done diff --git a/src/doc/mainpage.dox b/src/doc/mainpage.dox index 4cc684e85b8..3b21f6174b0 100644 --- a/src/doc/mainpage.dox +++ b/src/doc/mainpage.dox @@ -34,14 +34,15 @@ location. kaldi-asr.org/doc is the definitive location of this documentation. Kaldi's code repository is now located at http://github.com/kaldi-asr/kaldi - - See also the top level of kaldi-asr.org, where + + See also the top level of kaldi-asr.org, where you can download pre-built models.

- - \subpage about + - \subpage about - \subpage other - - \subpage install + - \subpage install + - \subpage versions - \subpage dependencies - \subpage legal - \subpage tutorial @@ -49,26 +50,26 @@ - \subpage examples - \subpage glossary - \subpage data_prep - - \subpage build_setup - - \subpage style - - \subpage history - - \subpage matrix - - \subpage matrixwrap + - \subpage build_setup + - \subpage style + - \subpage history + - \subpage matrix + - \subpage matrixwrap - \subpage cudamatrix - - \subpage io + - \subpage io - \subpage io_tut - - \subpage error - - \subpage parse_options + - \subpage error + - \subpage parse_options - \subpage util - \subpage clustering - \subpage hmm - \subpage tree_internals - \subpage tree_externals - - \subpage graph + - \subpage graph - \subpage graph_recipe_test - \subpage graph_recipe_train - - \subpage fst_algo - - \subpage decoders + - \subpage fst_algo + - \subpage decoders - \subpage lattices - \subpage model - \subpage feat @@ -76,7 +77,7 @@ - \subpage dnn - \ref dnn1 - \ref dnn2 - - \ref dnn3 + - \ref dnn3 - \ref chain - \subpage online_decoding - \subpage kws diff --git a/src/doc/versions.dox b/src/doc/versions.dox new file mode 100644 index 00000000000..2c67b2de317 --- /dev/null +++ b/src/doc/versions.dox @@ -0,0 +1,92 @@ +// doc/versions.dox + +// Copyright 2017 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +// note: you have to run the file get_version_info.sh in order +// to generate the HTML files that we include via \htmlinclude. + + + +/** + + \page versions Versions of Kaldi + + \section versions_scheme Versioning scheme + + During its lifetime, Kaldi has has three different versioning methods. + Originally Kaldi was a subversion (svn)-based project, and was hosted + on Sourceforge. Then Kaldi was moved to github, and for some time the + only version-number available was the git hash of the commit. + + In January 2017 we introduced a version number scheme. The first version + of Kaldi was 5.0.0, in recognition of the fact that the project had + already existed for quite a long time. The basic scheme is major/minor/patch, + but the "patch" version number may also encompass features (usually + back-compatible ones). The "patch number" automatically increases whenever + a commit to Kaldi is merged on github. + + We only intend to change the major or minor + version number when making relatively larger changes, or non-back compatible + changes. Version 5.1 of Kaldi is currently being prepared. When that is + finished (probably in early Feburary 2017), the latest version of 5.0.x will + be backed up to a branch named '5.0', and 'master' will point to version 5.1.0. + We may continue to update the 5.0 branch with fixes and the like, depending on + demand. + + We always plan to recommend that Kaldi users check out the latest version of + 'master', since actively supporting multiple versions would increase our workload. + + \section versions_versions Versions (and changes) + + This section lists the version numbers of Kaldi with the commit messages + for each patch commit (by "patch commit" we mean a commit that does not + increase the major or minor version number). + Each time we add a new major/minor version number we will include a longer + section explaining the changes involved. + + \subsection versions_versions_50 Version 5.0 + + This is the first major/minor version number after introducing the versioning scheme. + It is currently available in the 'master' branch on github. + Specific patches: + + \htmlinclude 5.0.html + + + \subsection versions_versions_51 Version 5.1 + + Version 5.1 is in preparation and version 5.1.0 does not actually exist yet. + You can see the development in the 'shortcut' branch on github. + Some of the major changes introduced in version 5.1 are: + - Kaldi now requires C++11 to compile, and we support only the latest + version of OpenFst (1.6.0). (This simplifies Kaldi's code, and will later + enable the threading code to be + rewritten + to use C++11's better and more portable mechanisms). + - The way chunk size and feature context is handled in nnet3 is changed + to allow variable chunk size and shorter context at utterance boundaries. + See \ref dnn3_scripts_context for more information. + - A new decoding mechanism, \ref dnn3_scripts_context_looped, is introduced + in nnet3; this allows faster and more-easily-online decoding for + recurrent setups (but only unidirectionally-recurrent ones, like LSTMs + but not BLSTMs). + - The sequence-training scripts in nnet3 are refactored and are now simpler + and use less disk space. + + +*/ From 0c1517c37744535e89a3c48df43a8952b37d9498 Mon Sep 17 00:00:00 2001 From: Ewald Enzinger Date: Wed, 25 Jan 2017 10:29:35 -0800 Subject: [PATCH 331/530] [egs] swbd/s5c, chain recipes: fix hardcoded directory name (#1377) The scripts used a hard-coded path including '_sp' suffix irrespective of whether speed_perturb is set. --- egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh | 2 +- egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh index 1718b5a4f7e..ae7c97e7d08 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh @@ -119,7 +119,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh index e262430ab06..8d3fcae4297 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh @@ -120,7 +120,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh index 2a0019e59d7..fa6518a9ad9 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh @@ -116,7 +116,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh index 946ae796e2f..59bc2c64f70 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh @@ -109,7 +109,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh index c19ca88a843..c5b5633d94c 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh @@ -112,7 +112,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh index b3bed2f2538..9aec95393d1 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh @@ -111,7 +111,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh index b346862049b..f7681a743e1 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh @@ -111,7 +111,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh index 47d4fcdd52c..89ed8ad1d72 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -118,7 +118,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh index 07e38cb29c5..f0c88368245 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -114,7 +114,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh index ea34aefe29f..b305c57b6ab 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -117,7 +117,7 @@ fi if [ $stage -le 12 ]; then echo "$0: creating neural net configs using the xconfig parser"; - num_targets=$(tree-info exp/chain/tri5_7d_tree_sp/tree |grep num-pdfs|awk '{print $2}') + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) mkdir -p $dir/configs From bf44dda710cbd6cfa2daba76224128ceded74cc6 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 14:43:38 -0500 Subject: [PATCH 332/530] asr_diarization: gmm-global-get-post to support archives of models --- src/gmmbin/gmm-global-get-post.cc | 28 ++- .../gmm-global-init-models-from-feats.cc | 229 ++++++++++++++++-- 2 files changed, 224 insertions(+), 33 deletions(-) rename src/{gmmbin => segmenterbin}/gmm-global-init-models-from-feats.cc (55%) diff --git a/src/gmmbin/gmm-global-get-post.cc b/src/gmmbin/gmm-global-get-post.cc index 2092d1348f0..35438a7e849 100644 --- a/src/gmmbin/gmm-global-get-post.cc +++ b/src/gmmbin/gmm-global-get-post.cc @@ -49,7 +49,8 @@ int main(int argc, char *argv[]) { po.Register("min-post", &min_post, "Minimum posterior we will output " "before pruning and renormalizing (e.g. 0.01)"); po.Register("utt2spk", &utt2spk_rspecifier, - "rspecifier for utterance to speaker map"); + "rspecifier for utterance to speaker map for reading " + "per-speaker GMM models"); po.Read(argc, argv); if (po.NumArgs() < 3 || po.NumArgs() > 4) { @@ -63,7 +64,7 @@ int main(int argc, char *argv[]) { frame_loglikes_wspecifier = po.GetOptArg(4); RandomAccessDiagGmmReaderMapped *gmm_reader = NULL; - DiagGmm *gmm = NULL; + DiagGmm diag_gmm; KALDI_ASSERT(num_post > 0); KALDI_ASSERT(min_post < 1.0); @@ -73,9 +74,8 @@ int main(int argc, char *argv[]) { gmm_reader = new RandomAccessDiagGmmReaderMapped(model_in_filename, utt2spk_rspecifier); } else { - gmm = new DiagGmm(); - ReadKaldiObject(model_in_filename, gmm); - int32 num_gauss = gmm->NumGauss(); + ReadKaldiObject(model_in_filename, &diag_gmm); + int32 num_gauss = diag_gmm.NumGauss(); if (num_post > num_gauss) { KALDI_WARN << "You asked for " << num_post << " Gaussians but GMM " << "only has " << num_gauss << ", returning this many. "; @@ -88,7 +88,7 @@ int main(int argc, char *argv[]) { SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); PosteriorWriter post_writer(post_wspecifier); - BaseFloatVectorWriter likes_writer(frame_loglikes_wspecifier); + BaseFloatVectorWriter frame_loglikes_writer(frame_loglikes_wspecifier); int32 num_done = 0, num_err = 0; for (; !feature_reader.Done(); feature_reader.Next()) { @@ -101,14 +101,19 @@ int main(int argc, char *argv[]) { continue; } + const DiagGmm *gmm; if (gmm_reader) { - if (!gmm_reader.HasKey(utt)) { + if (!gmm_reader->HasKey(utt)) { KALDI_WARN << "Could not find GMM for utterance " << utt; num_err++; continue; } - gmm = gmm_reader.Value(utt); + gmm = &(gmm_reader->Value(utt)); + } else { + gmm = &diag_gmm; } + int32 num_gauss_to_compute = + num_post > gmm->NumGauss() ? gmm->NumGauss() : num_post; if (feats.NumCols() != gmm->Dim()) { KALDI_WARN << "Dimension mismatch for utterance " << utt @@ -117,8 +122,6 @@ int main(int argc, char *argv[]) { num_err++; continue; } - vector > gselect(T); - Matrix loglikes; gmm->LogLikelihoods(feats, &loglikes); @@ -132,7 +135,7 @@ int main(int argc, char *argv[]) { for (int32 t = 0; t < T; t++) { double log_like_this_frame = VectorToPosteriorEntry(loglikes.Row(t), - num_post > num_gauss ? num_gauss : num_post, + num_gauss_to_compute, min_post, &(post[t])); if (!frame_loglikes_wspecifier.empty()) frame_loglikes(t) = log_like_this_frame; @@ -145,13 +148,12 @@ int main(int argc, char *argv[]) { post_writer.Write(utt, post); if (!frame_loglikes_wspecifier.empty()) - frame_loglikes.Write(utt, frame_loglikes); + frame_loglikes_writer.Write(utt, frame_loglikes); num_done++; } delete gmm_reader; - delete gmm; KALDI_LOG << "Done " << num_done << " files, " << num_err << " with errors, average UBM log-likelihood is " diff --git a/src/gmmbin/gmm-global-init-models-from-feats.cc b/src/segmenterbin/gmm-global-init-models-from-feats.cc similarity index 55% rename from src/gmmbin/gmm-global-init-models-from-feats.cc rename to src/segmenterbin/gmm-global-init-models-from-feats.cc index 486ba5af27b..a472b48624c 100644 --- a/src/gmmbin/gmm-global-init-models-from-feats.cc +++ b/src/segmenterbin/gmm-global-init-models-from-feats.cc @@ -1,6 +1,7 @@ // gmmbin/gmm-global-init-models-from-feats.cc // Copyright 2013 Johns Hopkins University (author: Daniel Povey) +// 2016 Vimal Manohar // See ../../COPYING for clarification regarding multiple authors // @@ -58,10 +59,145 @@ void InitGmmFromRandomFrames(const MatrixBase &feats, DiagGmm *gmm) { gmm->ComputeGconsts(); } +void MleDiagGmmSharedVarsUpdate(const MleDiagGmmOptions &config, + const AccumDiagGmm &diag_gmm_acc, + GmmFlagsType flags, + DiagGmm *gmm, + BaseFloat *obj_change_out, + BaseFloat *count_out, + int32 *floored_elements_out, + int32 *floored_gaussians_out, + int32 *removed_gaussians_out) { + KALDI_ASSERT(gmm != NULL); + + if (flags & ~diag_gmm_acc.Flags()) + KALDI_ERR << "Flags in argument do not match the active accumulators"; + + KALDI_ASSERT(diag_gmm_acc.NumGauss() == gmm->NumGauss() && + diag_gmm_acc.Dim() == gmm->Dim()); + + int32 num_gauss = gmm->NumGauss(); + double occ_sum = diag_gmm_acc.occupancy().Sum(); + + int32 elements_floored = 0, gauss_floored = 0; + + // remember old objective value + gmm->ComputeGconsts(); + BaseFloat obj_old = MlObjective(*gmm, diag_gmm_acc); + + // First get the gmm in "normal" representation (not the exponential-model + // form). + DiagGmmNormal ngmm(*gmm); + + Vector shared_var(gmm->Dim()); + + std::vector to_remove; + for (int32 i = 0; i < num_gauss; i++) { + double occ = diag_gmm_acc.occupancy()(i); + double prob; + if (occ_sum > 0.0) + prob = occ / occ_sum; + else + prob = 1.0 / num_gauss; + + if (occ > static_cast(config.min_gaussian_occupancy) + && prob > static_cast(config.min_gaussian_weight)) { + + ngmm.weights_(i) = prob; + + // copy old mean for later normalizations + Vector old_mean(ngmm.means_.Row(i)); + + // update mean, then variance, as far as there are accumulators + if (diag_gmm_acc.Flags() & (kGmmMeans|kGmmVariances)) { + Vector mean(diag_gmm_acc.mean_accumulator().Row(i)); + mean.Scale(1.0 / occ); + // transfer to estimate + ngmm.means_.CopyRowFromVec(mean, i); + } + + if (diag_gmm_acc.Flags() & kGmmVariances) { + KALDI_ASSERT(diag_gmm_acc.Flags() & kGmmMeans); + Vector var(diag_gmm_acc.variance_accumulator().Row(i)); + var.Scale(1.0 / occ); + var.AddVec2(-1.0, ngmm.means_.Row(i)); // subtract squared means. + + // if we intend to only update the variances, we need to compensate by + // adding the difference between the new and old mean + if (!(flags & kGmmMeans)) { + old_mean.AddVec(-1.0, ngmm.means_.Row(i)); + var.AddVec2(1.0, old_mean); + } + shared_var.AddVec(occ, var); + } + } else { // Insufficient occupancy. + if (config.remove_low_count_gaussians && + static_cast(to_remove.size()) < num_gauss-1) { + // remove the component, unless it is the last one. + KALDI_WARN << "Too little data - removing Gaussian (weight " + << std::fixed << prob + << ", occupation count " << std::fixed << diag_gmm_acc.occupancy()(i) + << ", vector size " << gmm->Dim() << ")"; + to_remove.push_back(i); + } else { + KALDI_WARN << "Gaussian has too little data but not removing it because" + << (config.remove_low_count_gaussians ? + " it is the last Gaussian: i = " + : " remove-low-count-gaussians == false: g = ") << i + << ", occ = " << diag_gmm_acc.occupancy()(i) << ", weight = " << prob; + ngmm.weights_(i) = + std::max(prob, static_cast(config.min_gaussian_weight)); + } + } + } + + if (diag_gmm_acc.Flags() & kGmmVariances) { + int32 floored; + if (config.variance_floor_vector.Dim() != 0) { + floored = shared_var.ApplyFloor(config.variance_floor_vector); + } else { + floored = shared_var.ApplyFloor(config.min_variance); + } + if (floored != 0) { + elements_floored += floored; + gauss_floored++; + } + + shared_var.Scale(1.0 / occ_sum); + for (int32 i = 0; i < num_gauss; i++) { + ngmm.vars_.CopyRowFromVec(shared_var, i); + } + } + + // copy to natural representation according to flags + ngmm.CopyToDiagGmm(gmm, flags); + + gmm->ComputeGconsts(); // or MlObjective will fail. + BaseFloat obj_new = MlObjective(*gmm, diag_gmm_acc); + + if (obj_change_out) + *obj_change_out = (obj_new - obj_old); + if (count_out) *count_out = occ_sum; + if (floored_elements_out) *floored_elements_out = elements_floored; + if (floored_gaussians_out) *floored_gaussians_out = gauss_floored; + + if (to_remove.size() > 0) { + gmm->RemoveComponents(to_remove, true /*renormalize weights*/); + gmm->ComputeGconsts(); + } + if (removed_gaussians_out != NULL) *removed_gaussians_out = to_remove.size(); + + if (gauss_floored > 0) + KALDI_VLOG(2) << gauss_floored << " variances floored in " << gauss_floored + << " Gaussians."; +} + + void TrainOneIter(const MatrixBase &feats, const MleDiagGmmOptions &gmm_opts, int32 iter, int32 num_threads, + bool share_covars, DiagGmm *gmm) { AccumDiagGmm gmm_acc(*gmm, kGmmAll); @@ -86,7 +222,7 @@ void TrainOneIter(const MatrixBase &feats, void TrainGmm(const MatrixBase &feats, const MleDiagGmmOptions &gmm_opts, int32 num_gauss, int32 num_gauss_init, int32 num_iters, - int32 num_threads, DiagGmm *gmm) { + int32 num_threads, bool share_covars, DiagGmm *gmm) { KALDI_LOG << "Initializing GMM means from random frames to " << num_gauss_init << " Gaussians."; InitGmmFromRandomFrames(feats, gmm); @@ -97,7 +233,7 @@ void TrainGmm(const MatrixBase &feats, gauss_inc = (num_gauss - num_gauss_init) / (num_iters / 2); for (int32 iter = 0; iter < num_iters; iter++) { - TrainOneIter(feats, gmm_opts, iter, num_threads, gmm); + TrainOneIter(feats, gmm_opts, iter, num_threads, share_covars, gmm); int32 next_num_gauss = std::min(num_gauss, cur_num_gauss + gauss_inc); if (next_num_gauss > gmm->NumGauss()) { @@ -126,10 +262,14 @@ int main(int argc, char *argv[]) { bool binary = true; int32 num_gauss = 100; int32 num_gauss_init = 0; + int32 max_gauss = 0; + int32 min_gauss = 0; int32 num_iters = 50; int32 num_frames = 200000; int32 srand_seed = 0; int32 num_threads = 4; + BaseFloat num_gauss_fraction = -1; + bool share_covars = false; std::string spk2utt_rspecifier; po.Register("binary", &binary, "Write output in binary mode"); @@ -145,6 +285,16 @@ int main(int argc, char *argv[]) { "statistics accumulation"); po.Register("spk2utt-rspecifier", &spk2utt_rspecifier, "If specified, estimates models per-speaker"); + po.Register("num-gauss-fraction", &num_gauss_fraction, + "If specified, chooses the number of gaussians to be " + "num-gauss-fraction * min(num-frames-available, num-frames). " + "This number is expected to be in the range(0, 0.1)."); + po.Register("max-gauss", &max_gauss, "Maximum number of Gaussians allowed " + "in the model. Applicable when num_gauss_fraction is specified."); + po.Register("min-gauss", &min_gauss, "Minimum number of Gaussians allowed " + "in the model. Applicable when num_gauss_fraction is specified."); + po.Register("share-covars", &share_covars, "If true, then the variances " + "of the Gaussian components are tied."); gmm_opts.Register(&po); @@ -157,25 +307,33 @@ int main(int argc, char *argv[]) { exit(1); } - if (num_gauss_init <= 0 || num_gauss_init > num_gauss) - num_gauss_init = num_gauss; - + if (num_gauss_fraction != -1) { + KALDI_ASSERT(num_gauss_fraction > 0 && num_gauss_fraction < 0.1); + } + + KALDI_ASSERT(max_gauss >= 0 && min_gauss >= 0 && max_gauss >= min_gauss); + std::string feature_rspecifier = po.GetArg(1), model_wspecifier = po.GetArg(2); DiagGmmWriter gmm_writer(model_wspecifier); KALDI_ASSERT(num_frames > 0); - - KALDI_LOG << "Reading features (will keep " << num_frames << " frames " - << "per utterance.)"; + + if (spk2utt_rspecifier.empty()) { + KALDI_LOG << "Reading features (will keep " << num_frames << " frames " + << "per utterance.)"; + } else { + KALDI_LOG << "Reading features (will keep " << num_frames << " frames " + << "per speaker.)"; + } int32 dim = 0; - if (!spk2utt_rspecifier.empty()) { + if (spk2utt_rspecifier.empty()) { SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); for (; !feature_reader.Done(); feature_reader.Next()) { - const Matrix &this_feats = feature_reader.Value(); + const Matrix &this_feats = feature_reader.Value(); if (dim == 0) { dim = this_feats.NumCols(); } else if (this_feats.NumCols() != dim) { @@ -198,6 +356,8 @@ int main(int argc, char *argv[]) { } } } + + KALDI_ASSERT(num_read > 0); if (num_read < num_frames) { KALDI_WARN << "For utterance " << feature_reader.Key() << ", " @@ -211,9 +371,23 @@ int main(int argc, char *argv[]) { << " input frames = " << percent << "%."; } - DiagGmm gmm(num_gauss_init, dim); - TrainGmm(feats, gmm_opts, num_gauss, num_gauss_init, num_iters, - num_threads, &gmm); + int32 this_num_gauss_init = num_gauss_init; + int32 this_num_gauss = num_gauss; + + if (num_gauss_fraction != -1) { + this_num_gauss = feats.NumRows() * num_gauss_fraction; + if (this_num_gauss > max_gauss) + this_num_gauss = max_gauss; + if (this_num_gauss < min_gauss) + this_num_gauss = min_gauss; + } + + if (this_num_gauss_init <= 0 || this_num_gauss_init > this_num_gauss) + this_num_gauss_init = this_num_gauss; + + DiagGmm gmm(this_num_gauss_init, dim); + TrainGmm(feats, gmm_opts, this_num_gauss, this_num_gauss_init, + num_iters, num_threads, share_covars, &gmm); gmm_writer.Write(feature_reader.Key(), gmm); } @@ -224,11 +398,11 @@ int main(int argc, char *argv[]) { int32 num_err = 0; for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { + const std::vector &uttlist = spk2utt_reader.Value(); + Matrix feats; int64 num_read = 0; - const std::vector &uttlist = spk2utt_reader.Value(); - for (std::vector::const_iterator it = uttlist.begin(); it != uttlist.end(); ++it) { if (!feature_reader.HasKey(*it)) { @@ -237,7 +411,7 @@ int main(int argc, char *argv[]) { } const Matrix &this_feats = feature_reader.Value(*it); - if (dim == 0) { + if (feats.NumCols() == 0) { dim = this_feats.NumCols(); feats.Resize(num_frames, dim); } else if (this_feats.NumCols() != dim) { @@ -258,6 +432,8 @@ int main(int argc, char *argv[]) { } } } + + KALDI_ASSERT(num_read > 0); if (num_read < num_frames) { KALDI_WARN << "For speaker " << spk2utt_reader.Key() << ", " @@ -271,9 +447,23 @@ int main(int argc, char *argv[]) { << " input frames = " << percent << "%."; } - DiagGmm gmm(num_gauss_init, dim); - TrainGmm(feats, gmm_opts, num_gauss, num_gauss_init, num_iters, - num_threads, &gmm); + int32 this_num_gauss_init = num_gauss_init; + int32 this_num_gauss = num_gauss; + + if (num_gauss_fraction != -1) { + this_num_gauss = feats.NumRows() * num_gauss_fraction; + if (this_num_gauss > max_gauss) + this_num_gauss = max_gauss; + if (this_num_gauss < min_gauss) + this_num_gauss = min_gauss; + } + + if (this_num_gauss_init <= 0 || this_num_gauss_init > this_num_gauss) + this_num_gauss_init = this_num_gauss; + + DiagGmm gmm(this_num_gauss_init, dim); + TrainGmm(feats, gmm_opts, this_num_gauss, this_num_gauss_init, + num_iters, num_threads, share_covars, &gmm); gmm_writer.Write(spk2utt_reader.Key(), gmm); } @@ -288,4 +478,3 @@ int main(int argc, char *argv[]) { return -1; } } - From 9bd17f271009808f27ce6e0c463e2fb06e4ae7d5 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 14:44:55 -0500 Subject: [PATCH 333/530] asr_diarization: Add some debugging stuff to segmenter --- src/segmenter/segment.cc | 7 +++++++ src/segmenter/segment.h | 2 ++ src/segmenter/segmentation-post-processor.h | 6 ++++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/segmenter/segment.cc b/src/segmenter/segment.cc index b4f485c26bc..65a91a39264 100644 --- a/src/segmenter/segment.cc +++ b/src/segmenter/segment.cc @@ -31,5 +31,12 @@ void Segment::Read(std::istream &is, bool binary) { KALDI_ASSERT(end_frame >= start_frame && start_frame >= 0); } +std::ostream& operator<<(std::ostream& os, const Segment &seg) { + os << "[ "; + seg.Write(os, false); + os << "]"; + return os; +} + } // end namespace segmenter } // end namespace kaldi diff --git a/src/segmenter/segment.h b/src/segmenter/segment.h index f7ada5b92ee..b172fa854a8 100644 --- a/src/segmenter/segment.h +++ b/src/segmenter/segment.h @@ -96,6 +96,8 @@ class SegmentLengthComparator { return lhs.Length() < rhs.Length(); } }; + +std::ostream& operator<<(std::ostream& os, const Segment &seg); } // end namespace segmenter } // end namespace kaldi diff --git a/src/segmenter/segmentation-post-processor.h b/src/segmenter/segmentation-post-processor.h index 0de54d026e1..040d6c44383 100644 --- a/src/segmenter/segmentation-post-processor.h +++ b/src/segmenter/segmentation-post-processor.h @@ -62,9 +62,11 @@ struct SegmentationPostProcessingOptions { pad_label(-1), pad_length(-1), shrink_label(-1), shrink_length(-1), blend_short_segments_class(-1), max_blend_length(-1), - merge_adjacent_segments(false), max_intersegment_length(0), + max_remove_length(-1), + merge_adjacent_segments(false), + max_intersegment_length(0), max_segment_length(-1), overlap_length(0), - max_remove_length(-1), post_process_label(-1) { } + post_process_label(-1) { } void Register(OptionsItf *opts) { opts->Register("merge-labels", &merge_labels_csl, "Merge labels into a " From 1e6b3c9b79316dcd1e087e847c4d1aff6c204e45 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 14:49:48 -0500 Subject: [PATCH 334/530] asr_diarization: Preprare for SimpleHmm --- src/hmm/hmm-utils.cc | 2 -- src/hmm/transition-model.cc | 10 ++++++++++ src/hmm/transition-model.h | 5 ++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/hmm/hmm-utils.cc b/src/hmm/hmm-utils.cc index ab0b133f708..f9e1533daac 100644 --- a/src/hmm/hmm-utils.cc +++ b/src/hmm/hmm-utils.cc @@ -231,8 +231,6 @@ GetHmmAsFstSimple(std::vector phone_window, - - // The H transducer has a separate outgoing arc for each of the symbols in ilabel_info. fst::VectorFst *GetHTransducer (const std::vector > &ilabel_info, diff --git a/src/hmm/transition-model.cc b/src/hmm/transition-model.cc index 83edbaf5805..7973be69dcd 100644 --- a/src/hmm/transition-model.cc +++ b/src/hmm/transition-model.cc @@ -240,6 +240,16 @@ TransitionModel::TransitionModel(const ContextDependencyInterface &ctx_dep, Check(); } +void TransitionModel::Init(const ContextDependencyInterface &ctx_dep, + const HmmTopology &hmm_topo) { + topo_ = hmm_topo; + // First thing is to get all possible tuples. + ComputeTuples(ctx_dep); + ComputeDerived(); + InitializeProbs(); + Check(); +} + int32 TransitionModel::TupleToTransitionState(int32 phone, int32 hmm_state, int32 pdf, int32 self_loop_pdf) const { Tuple tuple(phone, hmm_state, pdf, self_loop_pdf); // Note: if this ever gets too expensive, which is unlikely, we can refactor diff --git a/src/hmm/transition-model.h b/src/hmm/transition-model.h index 33a0d55443e..c059e319dd5 100644 --- a/src/hmm/transition-model.h +++ b/src/hmm/transition-model.h @@ -128,10 +128,13 @@ class TransitionModel { TransitionModel(const ContextDependencyInterface &ctx_dep, const HmmTopology &hmm_topo); - /// Constructor that takes no arguments: typically used prior to calling Read. TransitionModel() { } + /// Does the same things as the constructor. + void Init(const ContextDependencyInterface &ctx_dep, + const HmmTopology &hmm_topo); + void Read(std::istream &is, bool binary); // note, no symbol table: topo object always read/written w/o symbols. void Write(std::ostream &os, bool binary) const; From eaa56b44bf7fecc8242ee671dedb5d0147461141 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 14:50:16 -0500 Subject: [PATCH 335/530] asr_diarization: Old version of SimpleHmm --- src/simplehmm/Makefile | 16 + src/simplehmm/decodable-simple-hmm.h | 88 ++++ src/simplehmm/simple-hmm-acc-stats-fsts.cc | 173 ++++++++ src/simplehmm/simple-hmm-computation.cc | 5 + src/simplehmm/simple-hmm-test.cc | 76 ++++ src/simplehmm/simple-hmm.cc | 456 +++++++++++++++++++++ src/simplehmm/simple-hmm.h | 274 +++++++++++++ 7 files changed, 1088 insertions(+) create mode 100644 src/simplehmm/Makefile create mode 100644 src/simplehmm/decodable-simple-hmm.h create mode 100644 src/simplehmm/simple-hmm-acc-stats-fsts.cc create mode 100644 src/simplehmm/simple-hmm-computation.cc create mode 100644 src/simplehmm/simple-hmm-test.cc create mode 100644 src/simplehmm/simple-hmm.cc create mode 100644 src/simplehmm/simple-hmm.h diff --git a/src/simplehmm/Makefile b/src/simplehmm/Makefile new file mode 100644 index 00000000000..89c9f70a8c3 --- /dev/null +++ b/src/simplehmm/Makefile @@ -0,0 +1,16 @@ +all: + + +include ../kaldi.mk + +TESTFILES = simple-hmm-test + +OBJFILES = simple-hmm.o simple-hmm-utils.o simple-hmm-graph-compiler.o + +LIBNAME = kaldi-simplehmm +ADDLIBS = ../hmm/kaldi-hmm.a ../decoder/kaldi-decoder.a \ + ../util/kaldi-util.a ../thread/kaldi-thread.a \ + ../matrix/kaldi-matrix.a ../base/kaldi-base.a + +include ../makefiles/default_rules.mk + diff --git a/src/simplehmm/decodable-simple-hmm.h b/src/simplehmm/decodable-simple-hmm.h new file mode 100644 index 00000000000..6f224ee6176 --- /dev/null +++ b/src/simplehmm/decodable-simple-hmm.h @@ -0,0 +1,88 @@ +// simplehmm/decodable-simple-hmm.h + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_SIMPLEHMM_DECODABLE_SIMPLE_HMM_H_ +#define KALDI_SIMPLEHMM_DECODABLE_SIMPLE_HMM_H_ + +#include + +#include "base/kaldi-common.h" +#include "simplehmm/simple-hmm.h" +#include "itf/decodable-itf.h" + +namespace kaldi { +namespace simple_hmm { + +class DecodableMatrixSimpleHmm: public DecodableInterface { + public: + // This constructor creates an object that will not delete "likes" + // when done. + DecodableMatrixSimpleHmm(const SimpleHmm &model, + const Matrix &likes, + BaseFloat scale): + model_(model), likes_(&likes), scale_(scale), delete_likes_(false) + { + if (likes.NumCols() != model.NumPdfs()) + KALDI_ERR << "DecodableMatrixScaledMapped: mismatch, matrix has " + << likes.NumCols() << " rows but transition-model has " + << model.NumPdfs() << " pdf-ids."; + } + + // This constructor creates an object that will delete "likes" + // when done. + DecodableMatrixSimpleHmm(const SimpleHmm &model, + BaseFloat scale, + const Matrix *likes): + model_(model), likes_(likes), scale_(scale), delete_likes_(true) { + if (likes->NumCols() != model.NumPdfs()) + KALDI_ERR << "DecodableMatrixScaledMapped: mismatch, matrix has " + << likes->NumCols() << " rows but transition-model has " + << model.NumPdfs() << " pdf-ids."; + } + + virtual int32 NumFramesReady() const { return likes_->NumRows(); } + + virtual bool IsLastFrame(int32 frame) const { + KALDI_ASSERT(frame < NumFramesReady()); + return (frame == NumFramesReady() - 1); + } + + // Note, frames are numbered from zero. + virtual BaseFloat LogLikelihood(int32 frame, int32 tid) { + return scale_ * (*likes_)(frame, model_.TransitionIdToPdfClass(tid)); + } + + // Indices are one-based! This is for compatibility with OpenFst. + virtual int32 NumIndices() const { return model_.NumTransitionIds(); } + + virtual ~DecodableMatrixSimpleHmm() { + if (delete_likes_) delete likes_; + } + private: + const SimpleHmm &model_; // for tid to pdf mapping + const Matrix *likes_; + BaseFloat scale_; + bool delete_likes_; + KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableMatrixSimpleHmm); +}; + +} // namespace simple_hmm +} // namespace kaldi + +#endif // KALDI_SIMPLEHMM_DECODABLE_SIMPLE_HMM_H_ diff --git a/src/simplehmm/simple-hmm-acc-stats-fsts.cc b/src/simplehmm/simple-hmm-acc-stats-fsts.cc new file mode 100644 index 00000000000..de4a7528836 --- /dev/null +++ b/src/simplehmm/simple-hmm-acc-stats-fsts.cc @@ -0,0 +1,173 @@ +// simplehmmbin/simple-hmm-acc-stats-fsts.cc + +// Copyright 2016 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "simplehmm/simple-hmm.h" +#include "hmm/hmm-utils.h" +#include "fstext/fstext-lib.h" +#include "decoder/decoder-wrappers.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + typedef kaldi::int32 int32; + using fst::SymbolTable; + using fst::VectorFst; + using fst::StdArc; + + const char *usage = + "Accumulate stats for simple HMM models from FSTs directly.\n" + "Usage: simple-hmm-acc-stats-fsts [options] " + " \n" + "e.g.: \n" + " simple-hmm-acc-stats-fsts 1.mdl ark:graphs.fsts scp:likes.scp pdf2class_map 1.stats\n"; + + ParseOptions po(usage); + + BaseFloat acoustic_scale = 1.0; + BaseFloat transition_scale = 1.0; + BaseFloat self_loop_scale = 1.0; + + po.Register("transition-scale", &transition_scale, + "Transition-probability scale [relative to acoustics]"); + po.Register("acoustic-scale", &acoustic_scale, + "Scaling factor for acoustic likelihoods"); + po.Register("self-loop-scale", &self_loop_scale, + "Scale of self-loop versus non-self-loop log probs [relative to acoustics]"); + po.Read(argc, argv); + + + if (po.NumArgs() != 5) { + po.PrintUsage(); + exit(1); + } + + std::string model_in_filename = po.GetArg(1), + fst_rspecifier = po.GetArg(2), + likes_rspecifier = po.GetArg(3), + pdf2class_map_rxfilename = po.GetArg(4), + accs_wxfilename = po.GetArg(5); + + simple_hmm::SimpleHmm model; + ReadKaldiObject(model_in_filename, &model); + + SequentialTableReader fst_reader(fst_rspecifier); + RandomAccessBaseFloatMatrixReader likes_reader(likes_rspecifier); + + std::vector pdf2class; + { + Input ki(pdf2class_map_rxfilename); + std::string line; + while (std::getline(ki.Stream(), line)) { + std::vector parts; + SplitStringToVector(line, " ", true, &parts); + if (parts.size() != 2) { + KALDI_ERR << "Invalid line " << line + << " in pdf2class-map " << pdf2class_map_rxfilename; + } + int32 pdf_id = std::atoi(parts[0].c_str()), + class_id = std::atoi(parts[1].c_str()); + + if (pdf_id != pdf2class.size()) + KALDI_ERR << "pdf2class-map is not sorted or does not contain " + << "pdf " << pdf_id - 1 << " in " + << pdf2class_map_rxfilename; + + if (pdf_id < pdf2class.size()) + KALDI_ERR << "Duplicate pdf " << pdf_id + << " in pdf2class-map " << pdf2class_map_rxfilename; + + pdf2class.push_back(class_id); + } + } + + int32 num_done = 0, num_err = 0; + double tot_like = 0.0, tot_t = 0.0; + int64 frame_count = 0; + + Vector transition_accs; + model.InitStats(&transition_accs); + + SimpleHmmComputation computation(model, pdf2class_map); + + for (; !fst_reader.Done(); fst_reader.Next()) { + const std::string &utt = fst_reader.Key(); + + if (!likes_reader.HasKey(utt)) { + num_err++; + KALDI_WARN << "No likes for utterance " << utt; + continue; + } + + const Matrix &likes = likes_reader.Value(utt); + VectorFst decode_fst(fst_reader.Value()); + fst_reader.FreeCurrent(); // this stops copy-on-write of the fst + // by deleting the fst inside the reader, since we're about to mutate + // the fst by adding transition probs. + + if (likes.NumRows() == 0) { + KALDI_WARN << "Zero-length utterance: " << utt; + num_err++; + continue; + } + + if (likes.NumCols() != pdf2class.size()) { + KALDI_ERR << "Mismatch in pdf dimension in log-likelihood matrix " + << "and pdf2class map; " << likes.NumCols() << " vs " + << pdf2class.size(); + } + + // Add transition-probs to the FST. + AddTransitionProbs(model, transition_scale, self_loop_scale, + &decode_fst); + + BaseFloat tot_like_this_utt = 0.0, tot_weight = 0.0; + if (!computation.Compute(decode_fst, likes, acoustic_scale, + &transition_accs, + &tot_like_this_utt, &tot_weight)) { + KALDI_WARN << "Failed to do computation for utterance " << utt; + num_err++; + } + tot_like += tot_like_this_utt; + tot_t += tot_weight; + frame_count += likes.NumRows(); + + num_done++; + } + + KALDI_LOG << "Done " << num_done << " files, " << num_err + << " with errors."; + + KALDI_LOG << "Overall avg like per frame = " + << (tot_like/tot_t) << " over " << tot_t << " frames."; + + { + Output ko(accs_wxfilename, binary); + transition_accs.Write(ko.Stream(), binary); + } + KALDI_LOG << "Written accs."; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/simplehmm/simple-hmm-computation.cc b/src/simplehmm/simple-hmm-computation.cc new file mode 100644 index 00000000000..e20f84169a1 --- /dev/null +++ b/src/simplehmm/simple-hmm-computation.cc @@ -0,0 +1,5 @@ +SimpleHmmComputation::SimpleHmmComputation( + const SimpleHmm &model, + const std::vector &num_pdfs, + VectorFst *decode_fst, + const Matrix &log_likes) diff --git a/src/simplehmm/simple-hmm-test.cc b/src/simplehmm/simple-hmm-test.cc new file mode 100644 index 00000000000..b2de0e05a08 --- /dev/null +++ b/src/simplehmm/simple-hmm-test.cc @@ -0,0 +1,76 @@ +// hmm/simple-hmm-test.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "simplehmm/simple-hmm.h" +#include "hmm/hmm-test-utils.h" + +namespace kaldi { +namespace simple_hmm { + + +SimpleHmm *GenRandSimpleHmm() { + std::vector phones; + phones.push_back(1); + + std::vector num_pdf_classes; + num_pdf_classes.push_back(rand() + 1); + + HmmTopology topo = GenRandTopology(phones, num_pdf_classes); + + SimpleHmm *model = new SimpleHmm(topo); + + return model; +} + + +void TestSimpleHmm() { + + SimpleHmm *model = GenRandSimpleHmm(); + + bool binary = (rand() % 2 == 0); + + std::ostringstream os; + model->Write(os, binary); + + SimpleHmm model2; + std::istringstream is2(os.str()); + model2.Read(is2, binary); + + { + std::ostringstream os1, os2; + model->Write(os1, false); + model2.Write(os2, false); + KALDI_ASSERT(os1.str() == os2.str()); + KALDI_ASSERT(model->Compatible(model2)); + } + delete model; +} + + +} // end namespace simple_hmm +} // end namespace kaldi + + +int main() { + for (int i = 0; i < 2; i++) + kaldi::TestSimpleHmm(); + KALDI_LOG << "Test OK.\n"; +} + + diff --git a/src/simplehmm/simple-hmm.cc b/src/simplehmm/simple-hmm.cc new file mode 100644 index 00000000000..9af077cedc6 --- /dev/null +++ b/src/simplehmm/simple-hmm.cc @@ -0,0 +1,456 @@ +// hmm/simple-hmm.cc + +// Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) +// Johns Hopkins University (author: Guoguo Chen) +// 2016 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include +#include "simplehmm/simple-hmm.h" + +namespace kaldi { +namespace simple_hmm { + +void SimpleHmm::Initialize() { + KALDI_ASSERT(topo_.GetPhones().size() == 1); + + const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); + for (int32 j = 0; j < static_cast(entry.size()); j++) { // for each state... + int32 pdf_class = entry[j].forward_pdf_class; + if (pdf_class != kNoPdf) { + states_.push_back(j); + } + } + + // now states_ is populated with all possible pairs + // (hmm_state, pdf_class). + // sort to enable reverse lookup. + std::sort(states_.begin(), states_.end()); + // this sorting defines the transition-ids. +} + +void SimpleHmm::ComputeDerived() { + state2id_.resize(states_.size()+2); // indexed by transition-state, which + // is one based, but also an entry for one past end of list. + + int32 cur_transition_id = 1; + num_pdfs_ = 0; + for (int32 tstate = 1; + tstate <= static_cast(states_.size()+1); // not a typo. + tstate++) { + state2id_[tstate] = cur_transition_id; + if (static_cast(tstate) <= states_.size()) { + int32 hmm_state = states_[tstate-1]; + const HmmTopology::HmmState &state = topo_.TopologyForPhone(1)[hmm_state]; + int32 pdf_class = state.forward_pdf_class; + num_pdfs_ = std::max(num_pdfs_, pdf_class + 1); + int32 my_num_ids = static_cast(state.transitions.size()); + cur_transition_id += my_num_ids; // # trans out of this state. + } + } + + id2state_.resize(cur_transition_id); // cur_transition_id is #transition-ids+1. + for (int32 tstate = 1; + tstate <= static_cast(states_.size()); tstate++) { + for (int32 tid = state2id_[tstate]; tid < state2id_[tstate+1]; tid++) { + id2state_[tid] = tstate; + } + } +} + +void SimpleHmm::InitializeProbs() { + log_probs_.Resize(NumTransitionIds()+1); // one-based array, zeroth element empty. + for (int32 trans_id = 1; trans_id <= NumTransitionIds(); trans_id++) { + int32 trans_state = id2state_[trans_id]; + int32 trans_index = trans_id - state2id_[trans_state]; + int32 hmm_state = states_[trans_state-1]; + const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); + KALDI_ASSERT(static_cast(hmm_state) < entry.size()); + BaseFloat prob = entry[hmm_state].transitions[trans_index].second; + if (prob <= 0.0) + KALDI_ERR << "SimpleHmm::InitializeProbs, zero " + "probability [should remove that entry in the topology]"; + if (prob > 1.0) + KALDI_WARN << "SimpleHmm::InitializeProbs, prob greater than one."; + log_probs_(trans_id) = Log(prob); + } + ComputeDerivedOfProbs(); +} + +void SimpleHmm::Check() const { + KALDI_ASSERT(topo_.GetPhones().size() == 1); + + KALDI_ASSERT(NumTransitionIds() != 0 && NumTransitionStates() != 0); + { + int32 sum = 0; + for (int32 ts = 1; ts <= NumTransitionStates(); ts++) sum += NumTransitionIndices(ts); + KALDI_ASSERT(sum == NumTransitionIds()); + } + for (int32 tid = 1; tid <= NumTransitionIds(); tid++) { + int32 tstate = TransitionIdToTransitionState(tid), + index = TransitionIdToTransitionIndex(tid); + KALDI_ASSERT(tstate > 0 && tstate <=NumTransitionStates() && index >= 0); + KALDI_ASSERT(tid == PairToTransitionId(tstate, index)); + int32 hmm_state = TransitionStateToHmmState(tstate); + KALDI_ASSERT(tstate == HmmStateToTransitionState(hmm_state)); + KALDI_ASSERT(log_probs_(tid) <= 0.0 && + log_probs_(tid) - log_probs_(tid) == 0.0); + // checking finite and non-positive (and not out-of-bounds). + } + + KALDI_ASSERT(num_pdfs_ == topo_.NumPdfClasses(1)); +} + +SimpleHmm::SimpleHmm( + const HmmTopology &hmm_topo): topo_(hmm_topo) { + Initialize(); + ComputeDerived(); + InitializeProbs(); + Check(); +} + +int32 SimpleHmm::HmmStateToTransitionState(int32 hmm_state) const { + // Note: if this ever gets too expensive, which is unlikely, we can refactor + // this code to sort first on pdf_class, and then index on pdf_class, so those + // that have the same pdf_class are in a contiguous range. + std::vector::const_iterator iter = + std::lower_bound(states_.begin(), states_.end(), hmm_state); + if (iter == states_.end() || !(*iter == hmm_state)) { + KALDI_ERR << "SimpleHmm::HmmStateToTransitionState; " + << "HmmState " << hmm_state << " not found." + << " (incompatible model?)"; + } + // states_is indexed by transition_state-1, so add one. + return static_cast((iter - states_.begin())) + 1; +} + + +int32 SimpleHmm::NumTransitionIndices(int32 trans_state) const { + KALDI_ASSERT(static_cast(trans_state) <= states_.size()); + return static_cast(state2id_[trans_state+1]-state2id_[trans_state]); +} + +int32 SimpleHmm::TransitionIdToTransitionState(int32 trans_id) const { + KALDI_ASSERT(trans_id != 0 && + static_cast(trans_id) < id2state_.size()); + return id2state_[trans_id]; +} + +int32 SimpleHmm::TransitionIdToTransitionIndex(int32 trans_id) const { + KALDI_ASSERT(trans_id != 0 && + static_cast(trans_id) < id2state_.size()); + return trans_id - state2id_[id2state_[trans_id]]; +} + +int32 SimpleHmm::TransitionStateToPdfClass(int32 trans_state) const { + KALDI_ASSERT(static_cast(trans_state) <= states_.size()); + int32 hmm_state = states_[trans_state-1]; + const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); + KALDI_ASSERT(static_cast(hmm_state) < entry.size()); + return entry[hmm_state].forward_pdf_class; +} + +int32 SimpleHmm::TransitionStateToHmmState(int32 trans_state) const { + KALDI_ASSERT(static_cast(trans_state) <= states_.size()); + return states_[trans_state-1]; +} + +int32 SimpleHmm::PairToTransitionId(int32 trans_state, + int32 trans_index) const { + KALDI_ASSERT(static_cast(trans_state) <= states_.size()); + KALDI_ASSERT(trans_index < state2id_[trans_state+1] - state2id_[trans_state]); + return state2id_[trans_state] + trans_index; +} + +bool SimpleHmm::IsFinal(int32 trans_id) const { + KALDI_ASSERT(static_cast(trans_id) < id2state_.size()); + int32 trans_state = id2state_[trans_id]; + int32 trans_index = trans_id - state2id_[trans_state]; + int32 hmm_state = states_[trans_state-1]; + const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); + KALDI_ASSERT(static_cast(hmm_state) < entry.size()); + KALDI_ASSERT(static_cast(trans_index) < + entry[hmm_state].transitions.size()); + // return true if the transition goes to the final state of the + // topology entry. + return (entry[hmm_state].transitions[trans_index].first + 1 == + static_cast(entry.size())); +} + +// returns the self-loop transition-id, +// or zero if does not exist. +int32 SimpleHmm::SelfLoopOf(int32 trans_state) const { + KALDI_ASSERT(static_cast(trans_state-1) < states_.size()); + int32 hmm_state = states_[trans_state-1]; + const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); + KALDI_ASSERT(static_cast(hmm_state) < entry.size()); + for (int32 trans_index = 0; + trans_index < static_cast(entry[hmm_state].transitions.size()); + trans_index++) + if (entry[hmm_state].transitions[trans_index].first == hmm_state) + return PairToTransitionId(trans_state, trans_index); + return 0; // invalid transition id. +} + +void SimpleHmm::ComputeDerivedOfProbs() { + // this array indexed by transition-state with nothing in zeroth element. + non_self_loop_log_probs_.Resize(NumTransitionStates()+1); + for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { + int32 tid = SelfLoopOf(tstate); + if (tid == 0) { // no self-loop + non_self_loop_log_probs_(tstate) = 0.0; // log(1.0) + } else { + BaseFloat self_loop_prob = Exp(GetTransitionLogProb(tid)), + non_self_loop_prob = 1.0 - self_loop_prob; + if (non_self_loop_prob <= 0.0) { + KALDI_WARN << "ComputeDerivedOfProbs(): non-self-loop prob is " << non_self_loop_prob; + non_self_loop_prob = 1.0e-10; // just so we can continue... + } + non_self_loop_log_probs_(tstate) = Log(non_self_loop_prob); // will be negative. + } + } +} + +void SimpleHmm::Read(std::istream &is, bool binary) { + ExpectToken(is, binary, ""); + topo_.Read(is, binary); + Initialize(); + ComputeDerived(); + ExpectToken(is, binary, ""); + log_probs_.Read(is, binary); + ExpectToken(is, binary, ""); + ExpectToken(is, binary, ""); + ComputeDerivedOfProbs(); + Check(); +} + +void SimpleHmm::Write(std::ostream &os, bool binary) const { + WriteToken(os, binary, ""); + if (!binary) os << "\n"; + topo_.Write(os, binary); + if (!binary) os << "\n"; + WriteToken(os, binary, ""); + if (!binary) os << "\n"; + log_probs_.Write(os, binary); + WriteToken(os, binary, ""); + if (!binary) os << "\n"; + WriteToken(os, binary, ""); + if (!binary) os << "\n"; +} + +BaseFloat SimpleHmm::GetTransitionProb(int32 trans_id) const { + return Exp(log_probs_(trans_id)); +} + +BaseFloat SimpleHmm::GetTransitionLogProb(int32 trans_id) const { + return log_probs_(trans_id); +} + +BaseFloat SimpleHmm::GetNonSelfLoopLogProb(int32 trans_state) const { + KALDI_ASSERT(trans_state != 0); + return non_self_loop_log_probs_(trans_state); +} + +BaseFloat SimpleHmm::GetTransitionLogProbIgnoringSelfLoops( + int32 trans_id) const { + KALDI_ASSERT(trans_id != 0); + KALDI_PARANOID_ASSERT(!IsSelfLoop(trans_id)); + return log_probs_(trans_id) - GetNonSelfLoopLogProb(TransitionIdToTransitionState(trans_id)); +} + +// stats are counts/weights, indexed by transition-id. +void SimpleHmm::MleUpdate(const Vector &stats, + const MleSimpleHmmUpdateConfig &cfg, + BaseFloat *objf_impr_out, + BaseFloat *count_out) { + BaseFloat count_sum = 0.0, objf_impr_sum = 0.0; + int32 num_skipped = 0, num_floored = 0; + KALDI_ASSERT(stats.Dim() == NumTransitionIds()+1); + for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { + int32 n = NumTransitionIndices(tstate); + KALDI_ASSERT(n>=1); + if (n > 1) { // no point updating if only one transition... + Vector counts(n); + for (int32 tidx = 0; tidx < n; tidx++) { + int32 tid = PairToTransitionId(tstate, tidx); + counts(tidx) = stats(tid); + } + double tstate_tot = counts.Sum(); + count_sum += tstate_tot; + if (tstate_tot < cfg.mincount) { num_skipped++; } + else { + Vector old_probs(n), new_probs(n); + for (int32 tidx = 0; tidx < n; tidx++) { + int32 tid = PairToTransitionId(tstate, tidx); + old_probs(tidx) = new_probs(tidx) = GetTransitionProb(tid); + } + for (int32 tidx = 0; tidx < n; tidx++) + new_probs(tidx) = counts(tidx) / tstate_tot; + for (int32 i = 0; i < 3; i++) { // keep flooring+renormalizing for 3 times.. + new_probs.Scale(1.0 / new_probs.Sum()); + for (int32 tidx = 0; tidx < n; tidx++) + new_probs(tidx) = std::max(new_probs(tidx), cfg.floor); + } + // Compute objf change + for (int32 tidx = 0; tidx < n; tidx++) { + if (new_probs(tidx) == cfg.floor) num_floored++; + double objf_change = counts(tidx) * (Log(new_probs(tidx)) + - Log(old_probs(tidx))); + objf_impr_sum += objf_change; + } + // Commit updated values. + for (int32 tidx = 0; tidx < n; tidx++) { + int32 tid = PairToTransitionId(tstate, tidx); + log_probs_(tid) = Log(new_probs(tidx)); + if (log_probs_(tid) - log_probs_(tid) != 0.0) + KALDI_ERR << "Log probs is inf or NaN: error in update or bad stats?"; + } + } + } + } + KALDI_LOG << "SimpleHmm::Update, objf change is " + << (objf_impr_sum / count_sum) << " per frame over " << count_sum + << " frames. "; + KALDI_LOG << num_floored << " probabilities floored, " << num_skipped + << " out of " << NumTransitionStates() << " transition-states " + "skipped due to insuffient data (it is normal to have some skipped.)"; + if (objf_impr_out) *objf_impr_out = objf_impr_sum; + if (count_out) *count_out = count_sum; + ComputeDerivedOfProbs(); +} + + +// stats are counts/weights, indexed by transition-id. +void SimpleHmm::MapUpdate(const Vector &stats, + const MapSimpleHmmUpdateConfig &cfg, + BaseFloat *objf_impr_out, + BaseFloat *count_out) { + KALDI_ASSERT(cfg.tau > 0.0); + BaseFloat count_sum = 0.0, objf_impr_sum = 0.0; + KALDI_ASSERT(stats.Dim() == NumTransitionIds()+1); + for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { + int32 n = NumTransitionIndices(tstate); + KALDI_ASSERT(n>=1); + if (n > 1) { // no point updating if only one transition... + Vector counts(n); + for (int32 tidx = 0; tidx < n; tidx++) { + int32 tid = PairToTransitionId(tstate, tidx); + counts(tidx) = stats(tid); + } + double tstate_tot = counts.Sum(); + count_sum += tstate_tot; + Vector old_probs(n), new_probs(n); + for (int32 tidx = 0; tidx < n; tidx++) { + int32 tid = PairToTransitionId(tstate, tidx); + old_probs(tidx) = new_probs(tidx) = GetTransitionProb(tid); + } + for (int32 tidx = 0; tidx < n; tidx++) + new_probs(tidx) = (counts(tidx) + cfg.tau * old_probs(tidx)) / + (cfg.tau + tstate_tot); + // Compute objf change + for (int32 tidx = 0; tidx < n; tidx++) { + double objf_change = counts(tidx) * (Log(new_probs(tidx)) + - Log(old_probs(tidx))); + objf_impr_sum += objf_change; + } + // Commit updated values. + for (int32 tidx = 0; tidx < n; tidx++) { + int32 tid = PairToTransitionId(tstate, tidx); + log_probs_(tid) = Log(new_probs(tidx)); + if (log_probs_(tid) - log_probs_(tid) != 0.0) + KALDI_ERR << "Log probs is inf or NaN: error in update or bad stats?"; + } + } + } + KALDI_LOG << "Objf change is " << (objf_impr_sum / count_sum) + << " per frame over " << count_sum + << " frames."; + if (objf_impr_out) *objf_impr_out = objf_impr_sum; + if (count_out) *count_out = count_sum; + ComputeDerivedOfProbs(); +} + + +int32 SimpleHmm::TransitionIdToPdfClass(int32 trans_id) const { + KALDI_ASSERT(trans_id != 0 && + static_cast(trans_id) < id2state_.size()); + int32 trans_state = id2state_[trans_id]; + + int32 hmm_state = states_[trans_state-1]; + const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); + KALDI_ASSERT(static_cast(hmm_state) < entry.size()); + return entry[hmm_state].forward_pdf_class; +} + +int32 SimpleHmm::TransitionIdToHmmState(int32 trans_id) const { + KALDI_ASSERT(trans_id != 0 && + static_cast(trans_id) < id2state_.size()); + int32 trans_state = id2state_[trans_id]; + return states_[trans_state-1]; +} + +void SimpleHmm::Print(std::ostream &os, + const Vector *occs) { + if (occs != NULL) + KALDI_ASSERT(occs->Dim() == NumPdfs()); + for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { + int32 hmm_state = TransitionStateToHmmState(tstate); + int32 pdf_class = TransitionStateToPdfClass(tstate); + + os << " hmm-state = " << hmm_state; + os << " pdf-class = " << pdf_class << '\n'; + for (int32 tidx = 0; tidx < NumTransitionIndices(tstate); tidx++) { + int32 tid = PairToTransitionId(tstate, tidx); + BaseFloat p = GetTransitionProb(tid); + os << " Transition-id = " << tid << " p = " << p; + if (occs) { + os << " count of pdf-class = " << (*occs)(pdf_class); + } + // now describe what it's a transition to. + if (IsSelfLoop(tid)) { + os << " [self-loop]\n"; + } else { + const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); + KALDI_ASSERT(static_cast(hmm_state) < entry.size()); + int32 next_hmm_state = entry[hmm_state].transitions[tidx].first; + KALDI_ASSERT(next_hmm_state != hmm_state); + os << " [" << hmm_state << " -> " << next_hmm_state << "]\n"; + } + } + } +} + +bool SimpleHmm::Compatible(const SimpleHmm &other) const { + return (topo_ == other.topo_ && states_ == other.states_ && + state2id_ == other.state2id_ && id2state_ == other.id2state_ + && NumPdfs() == other.NumPdfs()); +} + +bool SimpleHmm::IsSelfLoop(int32 trans_id) const { + KALDI_ASSERT(static_cast(trans_id) < id2state_.size()); + int32 trans_state = id2state_[trans_id]; + int32 trans_index = trans_id - state2id_[trans_state]; + int32 hmm_state = states_[trans_state-1]; + const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); + KALDI_ASSERT(static_cast(hmm_state) < entry.size()); + return (static_cast(trans_index) < entry[hmm_state].transitions.size() + && entry[hmm_state].transitions[trans_index].first == hmm_state); +} + +} // end namespace simple_hmm +} // end namespace kaldi + diff --git a/src/simplehmm/simple-hmm.h b/src/simplehmm/simple-hmm.h new file mode 100644 index 00000000000..ef3a5b9abde --- /dev/null +++ b/src/simplehmm/simple-hmm.h @@ -0,0 +1,274 @@ +// hmm/simple-hmm.h + +// Copyright 2009-2012 Microsoft Corporation +// Johns Hopkins University (author: Guoguo Chen) +// 2016 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_HMM_SIMPLE_HMM_H +#define KALDI_HMM_SIMPLE_HMM_H + +#include "base/kaldi-common.h" +#include "util/const-integer-set.h" +#include "fst/fst-decl.h" // forward declarations. +#include "hmm/hmm-topology.h" +#include "itf/options-itf.h" + +namespace kaldi { +namespace simple_hmm { + +/// \addtogroup hmm_group +/// @{ + +// The class SimpleHmm is a repository for the transition probabilities. +// The model is exactly like a single phone. It has a HMM topology defined in +// hmm-topology.h. Each HMM-state has a number of +// transitions (and final-probs) out of it. Each emitting HMM-state defined in +// the HmmTopology class has an associated class-id. +// The transition model associates the +// transition probs with the (HMM-state, class-id). We associate with +// each such pair a transition-state. Each +// transition-state has a number of associated probabilities to estimate; +// this depends on the number of transitions/final-probs in the topology for +// that HMM-state. Each probability has an associated transition-index. +// We associate with each (transition-state, transition-index) a unique transition-id. +// Each individual probability estimated by the transition-model is asociated with a +// transition-id. +// +// List of the various types of quantity referred to here and what they mean: +// HMM-state: a number (0, 1, 2...) that indexes TopologyEntry (see hmm-topology.h) +// transition-state: the states for which we estimate transition probabilities for transitions +// out of them. In some topologies, will map one-to-one with pdf-ids. +// One-based, since it appears on FSTs. +// transition-index: identifier of a transition (or final-prob) in the HMM. Indexes the +// "transitions" vector in HmmTopology::HmmState. [if it is out of range, +// equal to transitions.size(), it refers to the final-prob.] +// Zero-based. +// transition-id: identifier of a unique parameter of the +// SimpleHmm. +// Associated with a (transition-state, transition-index) pair. +// One-based, since it appears on FSTs. +// +// List of the possible mappings SimpleHmm can do: +// (HMM-state, class-id) -> transition-state +// (transition-state, transition-index) -> transition-id +// Reverse mappings: +// transition-id -> transition-state +// transition-id -> transition-index +// transition-state -> HMM-state +// transition-state -> class-id +// +// The main things the SimpleHmm object can do are: +// Get initialized (need HmmTopology objects). +// Read/write. +// Update [given a vector of counts indexed by transition-id]. +// Do the various integer mappings mentioned above. +// Get the probability (or log-probability) associated with a particular transition-id. + + +struct MleSimpleHmmUpdateConfig { + BaseFloat floor; + BaseFloat mincount; + MleSimpleHmmUpdateConfig(BaseFloat floor = 0.01, + BaseFloat mincount = 5.0): + floor(floor), mincount(mincount) { } + + void Register (OptionsItf *opts) { + opts->Register("transition-floor", &floor, + "Floor for transition probabilities"); + opts->Register("transition-min-count", &mincount, + "Minimum count required to update transitions from a state"); + } +}; + +struct MapSimpleHmmUpdateConfig { + BaseFloat tau; + MapSimpleHmmUpdateConfig(): tau(5.0) { } + + void Register (OptionsItf *opts) { + opts->Register("transition-tau", &tau, "Tau value for MAP estimation of transition " + "probabilities."); + } +}; + +class SimpleHmm { + + public: + /// Initialize the object [e.g. at the start of training]. + /// The class keeps a copy of the HmmTopology object. + SimpleHmm(const HmmTopology &hmm_topo); + + /// Constructor that takes no arguments: typically used prior to calling Read. + SimpleHmm() { } + + void Read(std::istream &is, bool binary); // note, no symbol table: topo object always read/written w/o symbols. + void Write(std::ostream &os, bool binary) const; + + + /// return reference to HMM-topology object. + const HmmTopology &GetTopo() const { return topo_; } + + /// \name Integer mapping functions + /// @{ + + int32 HmmStateToTransitionState(int32 hmm_state) const; + int32 PairToTransitionId(int32 trans_state, int32 trans_index) const; + int32 TransitionIdToTransitionState(int32 trans_id) const; + int32 TransitionIdToTransitionIndex(int32 trans_id) const; + int32 TransitionStateToHmmState(int32 trans_state) const; + int32 TransitionStateToPdfClass(int32 trans_state) const; + // returns the self-loop transition-id, or zero if + // this state doesn't have a self-loop. + int32 SelfLoopOf(int32 trans_state) const; + + int32 TransitionIdToPdfClass(int32 trans_id) const; + int32 TransitionIdToHmmState(int32 trans_id) const; + + /// @} + + bool IsFinal(int32 trans_id) const; // returns true if this trans_id goes to the final state + // (which is bound to be nonemitting). + bool IsSelfLoop(int32 trans_id) const; // return true if this trans_id corresponds to a self-loop. + + /// Returns the total number of transition-ids (note, these are one-based). + inline int32 NumTransitionIds() const { return id2state_.size()-1; } + + /// Returns the number of transition-indices for a particular transition-state. + /// Note: "Indices" is the plural of "index". Index is not the same as "id", + /// here. A transition-index is a zero-based offset into the transitions + /// out of a particular transition state. + int32 NumTransitionIndices(int32 trans_state) const; + + /// Returns the total number of transition-states (note, these are one-based). + int32 NumTransitionStates() const { return states_.size(); } + + // NumPdfs() in the model. + int32 NumPdfs() const { return num_pdfs_; } + + // Transition-parameter-getting functions: + BaseFloat GetTransitionProb(int32 trans_id) const; + BaseFloat GetTransitionLogProb(int32 trans_id) const; + + // The following functions are more specialized functions for getting + // transition probabilities, that are provided for convenience. + + /// Returns the log-probability of a particular non-self-loop transition + /// after subtracting the probability mass of the self-loop and renormalizing; + /// will crash if called on a self-loop. Specifically: + /// for non-self-loops it returns the log of (that prob divided by (1 minus + /// self-loop-prob-for-that-state)). + BaseFloat GetTransitionLogProbIgnoringSelfLoops(int32 trans_id) const; + + /// Returns the log-prob of the non-self-loop probability + /// mass for this transition state. (you can get the self-loop prob, if a self-loop + /// exists, by calling GetTransitionLogProb(SelfLoopOf(trans_state)). + BaseFloat GetNonSelfLoopLogProb(int32 trans_state) const; + + /// Does Maximum Likelihood estimation. The stats are counts/weights, indexed + /// by transition-id. This was previously called Update(). + void MleUpdate(const Vector &stats, + const MleSimpleHmmUpdateConfig &cfg, + BaseFloat *objf_impr_out, + BaseFloat *count_out); + + /// Does Maximum A Posteriori (MAP) estimation. The stats are counts/weights, + /// indexed by transition-id. + void MapUpdate(const Vector &stats, + const MapSimpleHmmUpdateConfig &cfg, + BaseFloat *objf_impr_out, + BaseFloat *count_out); + + /// Print will print the simple HMM in a human-readable way, + /// for purposes of human + /// inspection. + /// The "occs" are optional (they are indexed by pdf-classes). + void Print(std::ostream &os, + const Vector *occs = NULL); + + + void InitStats(Vector *stats) const { stats->Resize(NumTransitionIds()+1); } + + void Accumulate(BaseFloat prob, int32 trans_id, Vector *stats) const { + KALDI_ASSERT(trans_id <= NumTransitionIds()); + (*stats)(trans_id) += prob; + // This is trivial and doesn't require class members, but leaves us more open + // to design changes than doing it manually. + } + + /// returns true if all the integer class members are identical (but does not + /// compare the transition probabilities. + bool Compatible(const SimpleHmm &other) const; + + private: + void MleUpdateShared(const Vector &stats, + const MleSimpleHmmUpdateConfig &cfg, + BaseFloat *objf_impr_out, BaseFloat *count_out); + void MapUpdateShared(const Vector &stats, + const MapSimpleHmmUpdateConfig &cfg, + BaseFloat *objf_impr_out, BaseFloat *count_out); + + // called from constructor and Read(): initializes states_ + void Initialize(); + // called from constructor and Read(): computes state2id_ and id2state_ + void ComputeDerived(); + // computes quantities derived from log-probs (currently just + // non_self_loop_log_probs_; called whenever log-probs change. + void ComputeDerivedOfProbs(); + void InitializeProbs(); // called from constructor. + void Check() const; + + HmmTopology topo_; + + /// States indexed by transition state minus one; + /// the states are in sorted order which allows us to do the reverse mapping + /// from state to transition state + std::vector states_; + + /// Gives the first transition_id of each transition-state; indexed by + /// the transition-state. Array indexed 1..num-transition-states+1 + /// (the last one is needed so we can know the num-transitions of the last + /// transition-state. + std::vector state2id_; + + /// For each transition-id, the corresponding transition + /// state (indexed by transition-id). + std::vector id2state_; + + /// For each transition-id, the corresponding log-prob. + /// Indexed by transition-id. + Vector log_probs_; + + /// For each transition-state, the log of (1 - self-loop-prob). Indexed by + /// transition-state. + Vector non_self_loop_log_probs_; + + /// This is equal to the one + highest-numbered pdf class. + int32 num_pdfs_; + + + DISALLOW_COPY_AND_ASSIGN(SimpleHmm); + +}; + +/// @} + + +} // end namespace simple_hmm +} // end namespace kaldi + + +#endif From b05406d80a142a929ad9ca2c7683c74d8784c39d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 14:51:38 -0500 Subject: [PATCH 336/530] asr_diarization: Add SimpleHmm --- src/hmm/simple-hmm-utils.cc | 146 ++++++++++++++++++++++++++++++++++++ src/hmm/simple-hmm-utils.h | 51 +++++++++++++ src/hmm/simple-hmm.cc | 79 +++++++++++++++++++ src/hmm/simple-hmm.h | 95 +++++++++++++++++++++++ 4 files changed, 371 insertions(+) create mode 100644 src/hmm/simple-hmm-utils.cc create mode 100644 src/hmm/simple-hmm-utils.h create mode 100644 src/hmm/simple-hmm.cc create mode 100644 src/hmm/simple-hmm.h diff --git a/src/hmm/simple-hmm-utils.cc b/src/hmm/simple-hmm-utils.cc new file mode 100644 index 00000000000..3406b7b56f8 --- /dev/null +++ b/src/hmm/simple-hmm-utils.cc @@ -0,0 +1,146 @@ +// hmm/simple-hmm-utils.cc + +// Copyright 2009-2011 Microsoft Corporation +// 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "hmm/simple-hmm-utils.h" +#include "fst/fstlib.h" +#include "fstext/fstext-lib.h" + +namespace kaldi { + +fst::VectorFst* GetHTransducer( + const SimpleHmm &model, + BaseFloat transition_scale, BaseFloat self_loop_scale) { + using namespace fst; + typedef StdArc Arc; + typedef Arc::Weight Weight; + typedef Arc::StateId StateId; + typedef Arc::Label Label; + + VectorFst *fst = GetSimpleHmmAsFst(model, transition_scale, + self_loop_scale); + + for (StateIterator > siter(*fst); + !siter.Done(); siter.Next()) { + Arc::StateId s = siter.Value(); + for (MutableArcIterator > aiter(fst, s); + !aiter.Done(); aiter.Next()) { + Arc arc = aiter.Value(); + if (arc.ilabel == 0) { + KALDI_ASSERT(arc.olabel == 0); + continue; + } + + KALDI_ASSERT(arc.ilabel == arc.olabel && + arc.ilabel <= model.NumTransitionIds()); + + arc.olabel = model.TransitionIdToPdf(arc.ilabel) + 1; + aiter.SetValue(arc); + } + } + + return fst; +} + +fst::VectorFst *GetSimpleHmmAsFst( + const SimpleHmm &model, + BaseFloat transition_scale, BaseFloat self_loop_scale) { + using namespace fst; + typedef StdArc Arc; + typedef Arc::Weight Weight; + typedef Arc::StateId StateId; + typedef Arc::Label Label; + + KALDI_ASSERT(model.NumPdfs() > 0); + const HmmTopology &topo = model.GetTopo(); + // This special Hmm has only one phone + const HmmTopology::TopologyEntry &entry = topo.TopologyForPhone(1); + + VectorFst *ans = new VectorFst; + + // Create a mini-FST with a superfinal state [in case we have emitting + // final-states, which we usually will.] + + std::vector state_ids; + for (size_t i = 0; i < entry.size(); i++) + state_ids.push_back(ans->AddState()); + KALDI_ASSERT(state_ids.size() > 1); // Or invalid topology entry. + ans->SetStart(state_ids[0]); + StateId final_state = state_ids.back(); + ans->SetFinal(final_state, Weight::One()); + + for (int32 hmm_state = 0; + hmm_state < static_cast(entry.size()); + hmm_state++) { + int32 pdf_class = entry[hmm_state].forward_pdf_class; + int32 self_loop_pdf_class = entry[hmm_state].self_loop_pdf_class; + KALDI_ASSERT(self_loop_pdf_class == pdf_class); + + if (pdf_class != kNoPdf) { + KALDI_ASSERT(pdf_class < model.NumPdfs()); + } + + int32 trans_idx; + for (trans_idx = 0; + trans_idx < static_cast(entry[hmm_state].transitions.size()); + trans_idx++) { + BaseFloat log_prob; + Label label; + int32 dest_state = entry[hmm_state].transitions[trans_idx].first; + + if (pdf_class == kNoPdf) { + // no pdf, hence non-estimated probability. very unusual case. [would + // not happen with normal topology] . There is no transition-state + // involved in this case. + KALDI_ASSERT(hmm_state != dest_state); + log_prob = transition_scale + * Log(entry[hmm_state].transitions[trans_idx].second); + label = 0; + } else { // normal probability. + int32 trans_state = + model.TupleToTransitionState(1, hmm_state, pdf_class, pdf_class); + int32 trans_id = + model.PairToTransitionId(trans_state, trans_idx); + + log_prob = model.GetTransitionLogProb(trans_id); + + if (hmm_state == dest_state) + log_prob *= self_loop_scale; + else + log_prob *= transition_scale; + // log_prob is a negative number (or zero)... + label = trans_id; + } + ans->AddArc(state_ids[hmm_state], + Arc(label, label, Weight(-log_prob), + state_ids[dest_state])); + } + } + + fst::RemoveEpsLocal(ans); // this is safe and will not blow up. + // Now apply probability scale. + // We waited till after the possible weight-pushing steps, + // because weight-pushing needs "real" weights in order to work. + // ApplyProbabilityScale(config.transition_scale, ans); + return ans; +} + +} // end namespace kaldi diff --git a/src/hmm/simple-hmm-utils.h b/src/hmm/simple-hmm-utils.h new file mode 100644 index 00000000000..bd0a3a15702 --- /dev/null +++ b/src/hmm/simple-hmm-utils.h @@ -0,0 +1,51 @@ +// hmm/simple-hmm-utils.h + +// Copyright 2009-2011 Microsoft Corporation +// 2016 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_HMM_SIMPLE_HMM_UTILS_H_ +#define KALDI_HMM_SIMPLE_HMM_UTILS_H_ + +#include "hmm/hmm-utils.h" +#include "hmm/simple-hmm.h" +#include "fst/fstlib.h" + +namespace kaldi { + +fst::VectorFst* GetHTransducer( + const SimpleHmm &model, + BaseFloat transition_scale = 1.0, BaseFloat self_loop_scale = 1.0); + +/** + * Converts the SimpleHmm into H tranducer; result owned by caller. + * Caution: our version of + * the H transducer does not include self-loops; you have to add those later. + * See \ref hmm_graph_get_h_transducer. The H transducer has on the + * input transition-ids. + * The output side contains the one-indexed mappings of pdf_ids, typically + * just pdf_id + 1. + */ +fst::VectorFst* +GetSimpleHmmAsFst (const SimpleHmm &model, + BaseFloat transition_scale = 1.0, + BaseFloat self_loop_scale = 1.0); + + +} // end namespace kaldi + +#endif diff --git a/src/hmm/simple-hmm.cc b/src/hmm/simple-hmm.cc new file mode 100644 index 00000000000..2db6bfbf297 --- /dev/null +++ b/src/hmm/simple-hmm.cc @@ -0,0 +1,79 @@ +// hmm/simple-hmm.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "hmm/simple-hmm.h" + +namespace kaldi { + +void SimpleHmm::FakeContextDependency::GetPdfInfo( + const std::vector &phones, // list of phones + const std::vector &num_pdf_classes, // indexed by phone, + std::vector > > *pdf_info) const { + KALDI_ASSERT(phones.size() == 1 && phones[0] == 1); + KALDI_ASSERT(num_pdf_classes.size() == 2 && + num_pdf_classes[1] == NumPdfs()); + KALDI_ASSERT(pdf_info); + pdf_info->resize(NumPdfs(), + std::vector >()); + + for (int32 pdf = 0; pdf < NumPdfs(); pdf++) { + (*pdf_info)[pdf].push_back(std::make_pair(1, pdf)); + } +} + +void SimpleHmm::FakeContextDependency::GetPdfInfo( + const std::vector &phones, + const std::vector > > &pdf_class_pairs, + std::vector > > > *pdf_info) const { + KALDI_ASSERT(pdf_info); + KALDI_ASSERT(phones.size() == 1 && phones[0] == 1); + KALDI_ASSERT(pdf_class_pairs.size() == 2); + + pdf_info->resize(2); + (*pdf_info)[1].resize(pdf_class_pairs[1].size()); + + for (size_t j = 0; j < pdf_class_pairs[1].size(); j++) { + int32 pdf_class = pdf_class_pairs[1][j].first, + self_loop_pdf_class = pdf_class_pairs[1][j].second; + KALDI_ASSERT(pdf_class == self_loop_pdf_class && + pdf_class < NumPdfs()); + + (*pdf_info)[1][j].push_back(std::make_pair(pdf_class, pdf_class)); + } +} + +void SimpleHmm::Read(std::istream &is, bool binary) { + TransitionModel::Read(is, binary); + ctx_dep_.Init(NumPdfs()); + CheckSimpleHmm(); +} + +void SimpleHmm::CheckSimpleHmm() const { + KALDI_ASSERT(NumPhones() == 1); + KALDI_ASSERT(GetPhones()[0] == 1); + const HmmTopology::TopologyEntry &entry = GetTopo().TopologyForPhone(1); + for (int32 j = 0; j < static_cast(entry.size()); j++) { // for each state... + int32 forward_pdf_class = entry[j].forward_pdf_class, + self_loop_pdf_class = entry[j].self_loop_pdf_class; + KALDI_ASSERT(forward_pdf_class == self_loop_pdf_class && + forward_pdf_class < NumPdfs()); + } +} + +} // end namespace kaldi diff --git a/src/hmm/simple-hmm.h b/src/hmm/simple-hmm.h new file mode 100644 index 00000000000..4b40f212401 --- /dev/null +++ b/src/hmm/simple-hmm.h @@ -0,0 +1,95 @@ +// hmm/simple-hmm.h + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_HMM_SIMPLE_HMM_H_ +#define KALDI_HMM_SIMPLE_HMM_H_ + +#include "base/kaldi-common.h" +#include "hmm/transition-model.h" +#include "itf/context-dep-itf.h" + +namespace kaldi { + +class SimpleHmm: public TransitionModel { + public: + SimpleHmm(const HmmTopology &hmm_topo): + ctx_dep_(hmm_topo) { + Init(ctx_dep_, hmm_topo); + CheckSimpleHmm(); + } + + SimpleHmm(): TransitionModel() { } + + void Read(std::istream &is, bool binary); // note, no symbol table: topo object always read/written w/o symbols. + + private: + void CheckSimpleHmm() const; + + class FakeContextDependency: public ContextDependencyInterface { + public: + int ContextWidth() const { return 1; } + int CentralPosition() const { return 0; } + + bool Compute(const std::vector &phoneseq, int32 pdf_class, + int32 *pdf_id) const { + if (phoneseq.size() == 1 && phoneseq[0] == 1) { + *pdf_id = pdf_class; + return true; + } + return false; + } + + void GetPdfInfo( + const std::vector &phones, // list of phones + const std::vector &num_pdf_classes, // indexed by phone, + std::vector > > *pdf_info) const; + + void GetPdfInfo( + const std::vector &phones, + const std::vector > > &pdf_class_pairs, + std::vector > > > *pdf_info) + const; + + void Init(int32 num_pdfs) { num_pdfs_ = num_pdfs; } + + int32 NumPdfs() const { return num_pdfs_; } + + FakeContextDependency(const HmmTopology &topo) { + KALDI_ASSERT(topo.GetPhones().size() == 1); + num_pdfs_ = topo.NumPdfClasses(1); + } + + FakeContextDependency(): num_pdfs_(0) { } + + ContextDependencyInterface* Copy() const { + FakeContextDependency *copy = new FakeContextDependency(); + copy->Init(num_pdfs_); + return copy; + } + + private: + int32 num_pdfs_; + } ctx_dep_; + + DISALLOW_COPY_AND_ASSIGN(SimpleHmm); +}; + +} // end namespace kaldi + +#endif // KALDI_HMM_SIMPLE_HMM_H_ From 3d4cba868d2b707217e571f5bfc99efb2402d064 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 14:53:11 -0500 Subject: [PATCH 337/530] asr_diarization: Moving SimpleHmm --- src/Makefile | 17 +- src/decoder/Makefile | 3 +- src/decoder/simple-hmm-graph-compiler.cc | 128 ++++++ src/decoder/simple-hmm-graph-compiler.h | 100 +++++ src/hmm/simple-hmm.cc | 79 ---- src/hmm/simple-hmm.h | 95 ----- src/{hmm => simplehmm}/simple-hmm-utils.cc | 0 src/{hmm => simplehmm}/simple-hmm-utils.h | 0 src/simplehmm/simple-hmm.cc | 467 ++------------------- src/simplehmm/simple-hmm.h | 291 +++---------- 10 files changed, 340 insertions(+), 840 deletions(-) create mode 100644 src/decoder/simple-hmm-graph-compiler.cc create mode 100644 src/decoder/simple-hmm-graph-compiler.h delete mode 100644 src/hmm/simple-hmm.cc delete mode 100644 src/hmm/simple-hmm.h rename src/{hmm => simplehmm}/simple-hmm-utils.cc (100%) rename src/{hmm => simplehmm}/simple-hmm-utils.h (100%) diff --git a/src/Makefile b/src/Makefile index a42f78f4742..7a7b672e607 100644 --- a/src/Makefile +++ b/src/Makefile @@ -6,16 +6,16 @@ SHELL := /bin/bash SUBDIRS = base matrix util feat tree thread gmm transform sgmm \ - fstext hmm lm decoder lat kws cudamatrix nnet segmenter \ + fstext hmm lm decoder lat kws cudamatrix nnet segmenter simplehmm \ bin fstbin gmmbin fgmmbin sgmmbin featbin \ nnetbin latbin sgmm2 sgmm2bin nnet2 nnet3 chain nnet3bin nnet2bin kwsbin \ - ivector ivectorbin online2 online2bin lmbin chainbin segmenterbin + ivector ivectorbin online2 online2bin lmbin chainbin segmenterbin simplehmmbin MEMTESTDIRS = base matrix util feat tree thread gmm transform sgmm \ - fstext hmm lm decoder lat nnet kws chain segmenter \ + fstext hmm lm decoder lat nnet kws chain segmenter simplehmm \ bin fstbin gmmbin fgmmbin sgmmbin featbin \ nnetbin latbin sgmm2 nnet2 nnet3 nnet2bin nnet3bin sgmm2bin kwsbin \ - ivector ivectorbin online2 online2bin lmbin segmenterbin + ivector ivectorbin online2 online2bin lmbin segmenterbin simplehmmbin CUDAMEMTESTDIR = cudamatrix @@ -153,9 +153,9 @@ $(EXT_SUBDIRS) : mklibdir # this is necessary for correct parallel compilation #1)The tools depend on all the libraries -bin fstbin gmmbin fgmmbin sgmmbin sgmm2bin featbin nnetbin nnet2bin nnet3bin chainbin latbin ivectorbin lmbin kwsbin online2bin: \ +bin fstbin gmmbin fgmmbin sgmmbin sgmm2bin featbin nnetbin nnet2bin nnet3bin chainbin latbin ivectorbin lmbin kwsbin online2bin segmenterbin simplehmmbin: \ base matrix util feat tree thread gmm transform sgmm sgmm2 fstext hmm \ - lm decoder lat cudamatrix nnet nnet2 nnet3 ivector chain kws online2 segmenter + lm decoder lat cudamatrix nnet nnet2 nnet3 ivector chain kws online2 segmenter simplehmm #2)The libraries have inter-dependencies base: base/.depend.mk @@ -171,7 +171,7 @@ sgmm2: base util matrix gmm tree transform thread hmm fstext: base util thread matrix tree hmm: base tree matrix util thread lm: base util thread matrix fstext -decoder: base util thread matrix gmm sgmm hmm tree transform lat +decoder: base util thread matrix gmm sgmm hmm simplehmm tree transform lat lat: base util thread hmm tree matrix cudamatrix: base util thread matrix nnet: base util hmm tree thread matrix cudamatrix @@ -179,7 +179,8 @@ nnet2: base util matrix thread lat gmm hmm tree transform cudamatrix nnet3: base util matrix thread lat gmm hmm tree transform cudamatrix chain fstext chain: lat hmm tree fstext matrix cudamatrix util thread base ivector: base util matrix thread transform tree gmm -segmenter: base matrix util gmm thread +segmenter: base matrix util gmm thread tree +simplehmm: base tree matrix util thread hmm #3)Dependencies for optional parts of Kaldi onlinebin: base matrix util feat tree gmm transform sgmm sgmm2 fstext hmm lm decoder lat cudamatrix nnet nnet2 online thread # python-kaldi-decoding: base matrix util feat tree thread gmm transform sgmm sgmm2 fstext hmm decoder lat online diff --git a/src/decoder/Makefile b/src/decoder/Makefile index fe489d1cb3f..3d2112629a2 100644 --- a/src/decoder/Makefile +++ b/src/decoder/Makefile @@ -7,11 +7,12 @@ TESTFILES = OBJFILES = training-graph-compiler.o lattice-simple-decoder.o lattice-faster-decoder.o \ lattice-faster-online-decoder.o simple-decoder.o faster-decoder.o \ - decoder-wrappers.o + decoder-wrappers.o simple-hmm-graph-compiler.o LIBNAME = kaldi-decoder ADDLIBS = ../lat/kaldi-lat.a ../sgmm/kaldi-sgmm.a ../hmm/kaldi-hmm.a \ + ../simplehmm/kaldi-simplehmm.a \ ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ ../tree/kaldi-tree.a ../util/kaldi-util.a ../thread/kaldi-thread.a \ ../matrix/kaldi-matrix.a ../base/kaldi-base.a diff --git a/src/decoder/simple-hmm-graph-compiler.cc b/src/decoder/simple-hmm-graph-compiler.cc new file mode 100644 index 00000000000..5f91380ca06 --- /dev/null +++ b/src/decoder/simple-hmm-graph-compiler.cc @@ -0,0 +1,128 @@ +// decoder/simple-hmm-graph-compiler.cc + +// Copyright 2009-2011 Microsoft Corporation +// 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "decoder/simple-hmm-graph-compiler.h" +#include "simplehmm/simple-hmm-utils.h" // for GetHTransducer + +namespace kaldi { + +bool SimpleHmmGraphCompiler::CompileGraphFromAlignment( + const std::vector &alignment, + fst::VectorFst *out_fst) { + using namespace fst; + VectorFst class_fst; + MakeLinearAcceptor(alignment, &class_fst); + return CompileGraph(class_fst, out_fst); +} + +bool SimpleHmmGraphCompiler::CompileGraph( + const fst::VectorFst &class_fst, + fst::VectorFst *out_fst) { + using namespace fst; + KALDI_ASSERT(out_fst); + KALDI_ASSERT(class_fst.Start() != kNoStateId); + + if (GetVerboseLevel() >= 4) { + KALDI_VLOG(4) << "Classes FST: "; + WriteFstKaldi(KALDI_LOG, false, class_fst); + } + + VectorFst *H = GetHTransducer(model_, opts_.transition_scale, + opts_.self_loop_scale); + + if (GetVerboseLevel() >= 4) { + KALDI_VLOG(4) << "HTransducer:"; + WriteFstKaldi(KALDI_LOG, false, *H); + } + + // Epsilon-removal and determinization combined. + // This will fail if not determinizable. + DeterminizeStarInLog(H); + + if (GetVerboseLevel() >= 4) { + KALDI_VLOG(4) << "HTransducer determinized:"; + WriteFstKaldi(KALDI_LOG, false, *H); + } + + VectorFst &trans2class_fst = *out_fst; // transition-id to class. + TableCompose(*H, class_fst, &trans2class_fst); + + KALDI_ASSERT(trans2class_fst.Start() != kNoStateId); + + if (GetVerboseLevel() >= 4) { + KALDI_VLOG(4) << "trans2class_fst:"; + WriteFstKaldi(KALDI_LOG, false, trans2class_fst); + } + + // Epsilon-removal and determinization combined. + // This will fail if not determinizable. + DeterminizeStarInLog(&trans2class_fst); + + // we elect not to remove epsilons after this phase, as it is + // a little slow. + if (opts_.rm_eps) + RemoveEpsLocal(&trans2class_fst); + + // Encoded minimization. + MinimizeEncoded(&trans2class_fst); + + delete H; + return true; +} + +bool SimpleHmmGraphCompiler::CompileGraphsFromAlignments( + const std::vector > &alignments, + std::vector*> *out_fsts) { + using namespace fst; + std::vector* > class_fsts(alignments.size()); + for (size_t i = 0; i < alignments.size(); i++) { + VectorFst *class_fst = new VectorFst(); + MakeLinearAcceptor(alignments[i], class_fst); + class_fsts[i] = class_fst; + } + bool ans = CompileGraphs(class_fsts, out_fsts); + for (size_t i = 0; i < alignments.size(); i++) + delete class_fsts[i]; + return ans; +} + +bool SimpleHmmGraphCompiler::CompileGraphs( + const std::vector* > &class_fsts, + std::vector* > *out_fsts) { + + using namespace fst; + KALDI_ASSERT(out_fsts && out_fsts->empty()); + out_fsts->resize(class_fsts.size(), NULL); + if (class_fsts.empty()) return true; + + for (size_t i = 0; i < class_fsts.size(); i++) { + const VectorFst *class_fst = class_fsts[i]; + VectorFst out_fst; + + CompileGraph(*class_fst, &out_fst); + + (*out_fsts)[i] = out_fst.Copy(); + } + + return true; +} + + +} // end namespace kaldi diff --git a/src/decoder/simple-hmm-graph-compiler.h b/src/decoder/simple-hmm-graph-compiler.h new file mode 100644 index 00000000000..dcc8f8fd2ba --- /dev/null +++ b/src/decoder/simple-hmm-graph-compiler.h @@ -0,0 +1,100 @@ +// decoder/simple-hmm-graph-compiler.h + +// Copyright 2009-2011 Microsoft Corporation +// 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_DECODER_SIMPLE_HMM_GRAPH_COMPILER_H_ +#define KALDI_DECODER_SIMPLE_HMM_GRAPH_COMPILER_H_ + +#include "base/kaldi-common.h" +#include "simplehmm/simple-hmm.h" +#include "fst/fstlib.h" +#include "fstext/fstext-lib.h" + + +// This header provides functionality to compile a graph directly from the +// alignment where the alignment is of classes that are simple mappings +// of 'pdf-ids' (same as pdf classes for SimpleHmm). + +namespace kaldi { + +struct SimpleHmmGraphCompilerOptions { + BaseFloat transition_scale; + BaseFloat self_loop_scale; + bool rm_eps; + + explicit SimpleHmmGraphCompilerOptions(BaseFloat transition_scale = 1.0, + BaseFloat self_loop_scale = 1.0): + transition_scale(transition_scale), + self_loop_scale(self_loop_scale), + rm_eps(true) { } + + void Register(OptionsItf *opts) { + opts->Register("transition-scale", &transition_scale, "Scale of transition " + "probabilities (excluding self-loops)"); + opts->Register("self-loop-scale", &self_loop_scale, "Scale of self-loop vs. " + "non-self-loop probability mass "); + opts->Register("rm-eps", &rm_eps, "Remove [most] epsilons before minimization (only applicable " + "if disambig symbols present)"); + } +}; + + +class SimpleHmmGraphCompiler { + public: + SimpleHmmGraphCompiler(const SimpleHmm &model, // Maintains reference to this object. + const SimpleHmmGraphCompilerOptions &opts): + model_(model), opts_(opts) { } + + + /// CompileGraph compiles a single training graph its input is a + /// weighted acceptor (G) at the class level, its output is HCLG-type graph. + /// Note: G could actually be an acceptor, it would also work. + /// This function is not const for technical reasons involving the cache. + /// if not for "table_compose" we could make it const. + bool CompileGraph(const fst::VectorFst &class_fst, + fst::VectorFst *out_fst); + + // CompileGraphs allows you to compile a number of graphs at the same + // time. This consumes more memory but is faster. + bool CompileGraphs( + const std::vector *> &class_fsts, + std::vector *> *out_fsts); + + // This version creates an FST from the per-frame alignment and calls + // CompileGraph. + bool CompileGraphFromAlignment(const std::vector &alignment, + fst::VectorFst *out_fst); + + // This function creates FSTs from the per-frame alignment and calls + // CompileGraphs. + bool CompileGraphsFromAlignments( + const std::vector > &alignments, + std::vector *> *out_fsts); + + ~SimpleHmmGraphCompiler() { } + private: + const SimpleHmm &model_; + + SimpleHmmGraphCompilerOptions opts_; +}; + + +} // end namespace kaldi. + +#endif // KALDI_DECODER_SIMPLE_HMM_GRAPH_COMPILER_H_ diff --git a/src/hmm/simple-hmm.cc b/src/hmm/simple-hmm.cc deleted file mode 100644 index 2db6bfbf297..00000000000 --- a/src/hmm/simple-hmm.cc +++ /dev/null @@ -1,79 +0,0 @@ -// hmm/simple-hmm.cc - -// Copyright 2016 Vimal Manohar - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "hmm/simple-hmm.h" - -namespace kaldi { - -void SimpleHmm::FakeContextDependency::GetPdfInfo( - const std::vector &phones, // list of phones - const std::vector &num_pdf_classes, // indexed by phone, - std::vector > > *pdf_info) const { - KALDI_ASSERT(phones.size() == 1 && phones[0] == 1); - KALDI_ASSERT(num_pdf_classes.size() == 2 && - num_pdf_classes[1] == NumPdfs()); - KALDI_ASSERT(pdf_info); - pdf_info->resize(NumPdfs(), - std::vector >()); - - for (int32 pdf = 0; pdf < NumPdfs(); pdf++) { - (*pdf_info)[pdf].push_back(std::make_pair(1, pdf)); - } -} - -void SimpleHmm::FakeContextDependency::GetPdfInfo( - const std::vector &phones, - const std::vector > > &pdf_class_pairs, - std::vector > > > *pdf_info) const { - KALDI_ASSERT(pdf_info); - KALDI_ASSERT(phones.size() == 1 && phones[0] == 1); - KALDI_ASSERT(pdf_class_pairs.size() == 2); - - pdf_info->resize(2); - (*pdf_info)[1].resize(pdf_class_pairs[1].size()); - - for (size_t j = 0; j < pdf_class_pairs[1].size(); j++) { - int32 pdf_class = pdf_class_pairs[1][j].first, - self_loop_pdf_class = pdf_class_pairs[1][j].second; - KALDI_ASSERT(pdf_class == self_loop_pdf_class && - pdf_class < NumPdfs()); - - (*pdf_info)[1][j].push_back(std::make_pair(pdf_class, pdf_class)); - } -} - -void SimpleHmm::Read(std::istream &is, bool binary) { - TransitionModel::Read(is, binary); - ctx_dep_.Init(NumPdfs()); - CheckSimpleHmm(); -} - -void SimpleHmm::CheckSimpleHmm() const { - KALDI_ASSERT(NumPhones() == 1); - KALDI_ASSERT(GetPhones()[0] == 1); - const HmmTopology::TopologyEntry &entry = GetTopo().TopologyForPhone(1); - for (int32 j = 0; j < static_cast(entry.size()); j++) { // for each state... - int32 forward_pdf_class = entry[j].forward_pdf_class, - self_loop_pdf_class = entry[j].self_loop_pdf_class; - KALDI_ASSERT(forward_pdf_class == self_loop_pdf_class && - forward_pdf_class < NumPdfs()); - } -} - -} // end namespace kaldi diff --git a/src/hmm/simple-hmm.h b/src/hmm/simple-hmm.h deleted file mode 100644 index 4b40f212401..00000000000 --- a/src/hmm/simple-hmm.h +++ /dev/null @@ -1,95 +0,0 @@ -// hmm/simple-hmm.h - -// Copyright 2016 Vimal Manohar - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_HMM_SIMPLE_HMM_H_ -#define KALDI_HMM_SIMPLE_HMM_H_ - -#include "base/kaldi-common.h" -#include "hmm/transition-model.h" -#include "itf/context-dep-itf.h" - -namespace kaldi { - -class SimpleHmm: public TransitionModel { - public: - SimpleHmm(const HmmTopology &hmm_topo): - ctx_dep_(hmm_topo) { - Init(ctx_dep_, hmm_topo); - CheckSimpleHmm(); - } - - SimpleHmm(): TransitionModel() { } - - void Read(std::istream &is, bool binary); // note, no symbol table: topo object always read/written w/o symbols. - - private: - void CheckSimpleHmm() const; - - class FakeContextDependency: public ContextDependencyInterface { - public: - int ContextWidth() const { return 1; } - int CentralPosition() const { return 0; } - - bool Compute(const std::vector &phoneseq, int32 pdf_class, - int32 *pdf_id) const { - if (phoneseq.size() == 1 && phoneseq[0] == 1) { - *pdf_id = pdf_class; - return true; - } - return false; - } - - void GetPdfInfo( - const std::vector &phones, // list of phones - const std::vector &num_pdf_classes, // indexed by phone, - std::vector > > *pdf_info) const; - - void GetPdfInfo( - const std::vector &phones, - const std::vector > > &pdf_class_pairs, - std::vector > > > *pdf_info) - const; - - void Init(int32 num_pdfs) { num_pdfs_ = num_pdfs; } - - int32 NumPdfs() const { return num_pdfs_; } - - FakeContextDependency(const HmmTopology &topo) { - KALDI_ASSERT(topo.GetPhones().size() == 1); - num_pdfs_ = topo.NumPdfClasses(1); - } - - FakeContextDependency(): num_pdfs_(0) { } - - ContextDependencyInterface* Copy() const { - FakeContextDependency *copy = new FakeContextDependency(); - copy->Init(num_pdfs_); - return copy; - } - - private: - int32 num_pdfs_; - } ctx_dep_; - - DISALLOW_COPY_AND_ASSIGN(SimpleHmm); -}; - -} // end namespace kaldi - -#endif // KALDI_HMM_SIMPLE_HMM_H_ diff --git a/src/hmm/simple-hmm-utils.cc b/src/simplehmm/simple-hmm-utils.cc similarity index 100% rename from src/hmm/simple-hmm-utils.cc rename to src/simplehmm/simple-hmm-utils.cc diff --git a/src/hmm/simple-hmm-utils.h b/src/simplehmm/simple-hmm-utils.h similarity index 100% rename from src/hmm/simple-hmm-utils.h rename to src/simplehmm/simple-hmm-utils.h diff --git a/src/simplehmm/simple-hmm.cc b/src/simplehmm/simple-hmm.cc index 9af077cedc6..2db6bfbf297 100644 --- a/src/simplehmm/simple-hmm.cc +++ b/src/simplehmm/simple-hmm.cc @@ -1,8 +1,6 @@ // hmm/simple-hmm.cc -// Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) -// Johns Hopkins University (author: Guoguo Chen) -// 2016 Vimal Manohar (Johns Hopkins University) +// Copyright 2016 Vimal Manohar // See ../../COPYING for clarification regarding multiple authors // @@ -19,438 +17,63 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#include -#include "simplehmm/simple-hmm.h" +#include "hmm/simple-hmm.h" namespace kaldi { -namespace simple_hmm { -void SimpleHmm::Initialize() { - KALDI_ASSERT(topo_.GetPhones().size() == 1); - - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); - for (int32 j = 0; j < static_cast(entry.size()); j++) { // for each state... - int32 pdf_class = entry[j].forward_pdf_class; - if (pdf_class != kNoPdf) { - states_.push_back(j); - } - } - - // now states_ is populated with all possible pairs - // (hmm_state, pdf_class). - // sort to enable reverse lookup. - std::sort(states_.begin(), states_.end()); - // this sorting defines the transition-ids. -} - -void SimpleHmm::ComputeDerived() { - state2id_.resize(states_.size()+2); // indexed by transition-state, which - // is one based, but also an entry for one past end of list. - - int32 cur_transition_id = 1; - num_pdfs_ = 0; - for (int32 tstate = 1; - tstate <= static_cast(states_.size()+1); // not a typo. - tstate++) { - state2id_[tstate] = cur_transition_id; - if (static_cast(tstate) <= states_.size()) { - int32 hmm_state = states_[tstate-1]; - const HmmTopology::HmmState &state = topo_.TopologyForPhone(1)[hmm_state]; - int32 pdf_class = state.forward_pdf_class; - num_pdfs_ = std::max(num_pdfs_, pdf_class + 1); - int32 my_num_ids = static_cast(state.transitions.size()); - cur_transition_id += my_num_ids; // # trans out of this state. - } - } - - id2state_.resize(cur_transition_id); // cur_transition_id is #transition-ids+1. - for (int32 tstate = 1; - tstate <= static_cast(states_.size()); tstate++) { - for (int32 tid = state2id_[tstate]; tid < state2id_[tstate+1]; tid++) { - id2state_[tid] = tstate; - } +void SimpleHmm::FakeContextDependency::GetPdfInfo( + const std::vector &phones, // list of phones + const std::vector &num_pdf_classes, // indexed by phone, + std::vector > > *pdf_info) const { + KALDI_ASSERT(phones.size() == 1 && phones[0] == 1); + KALDI_ASSERT(num_pdf_classes.size() == 2 && + num_pdf_classes[1] == NumPdfs()); + KALDI_ASSERT(pdf_info); + pdf_info->resize(NumPdfs(), + std::vector >()); + + for (int32 pdf = 0; pdf < NumPdfs(); pdf++) { + (*pdf_info)[pdf].push_back(std::make_pair(1, pdf)); } } -void SimpleHmm::InitializeProbs() { - log_probs_.Resize(NumTransitionIds()+1); // one-based array, zeroth element empty. - for (int32 trans_id = 1; trans_id <= NumTransitionIds(); trans_id++) { - int32 trans_state = id2state_[trans_id]; - int32 trans_index = trans_id - state2id_[trans_state]; - int32 hmm_state = states_[trans_state-1]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); - KALDI_ASSERT(static_cast(hmm_state) < entry.size()); - BaseFloat prob = entry[hmm_state].transitions[trans_index].second; - if (prob <= 0.0) - KALDI_ERR << "SimpleHmm::InitializeProbs, zero " - "probability [should remove that entry in the topology]"; - if (prob > 1.0) - KALDI_WARN << "SimpleHmm::InitializeProbs, prob greater than one."; - log_probs_(trans_id) = Log(prob); - } - ComputeDerivedOfProbs(); -} - -void SimpleHmm::Check() const { - KALDI_ASSERT(topo_.GetPhones().size() == 1); - - KALDI_ASSERT(NumTransitionIds() != 0 && NumTransitionStates() != 0); - { - int32 sum = 0; - for (int32 ts = 1; ts <= NumTransitionStates(); ts++) sum += NumTransitionIndices(ts); - KALDI_ASSERT(sum == NumTransitionIds()); - } - for (int32 tid = 1; tid <= NumTransitionIds(); tid++) { - int32 tstate = TransitionIdToTransitionState(tid), - index = TransitionIdToTransitionIndex(tid); - KALDI_ASSERT(tstate > 0 && tstate <=NumTransitionStates() && index >= 0); - KALDI_ASSERT(tid == PairToTransitionId(tstate, index)); - int32 hmm_state = TransitionStateToHmmState(tstate); - KALDI_ASSERT(tstate == HmmStateToTransitionState(hmm_state)); - KALDI_ASSERT(log_probs_(tid) <= 0.0 && - log_probs_(tid) - log_probs_(tid) == 0.0); - // checking finite and non-positive (and not out-of-bounds). - } - - KALDI_ASSERT(num_pdfs_ == topo_.NumPdfClasses(1)); -} - -SimpleHmm::SimpleHmm( - const HmmTopology &hmm_topo): topo_(hmm_topo) { - Initialize(); - ComputeDerived(); - InitializeProbs(); - Check(); -} - -int32 SimpleHmm::HmmStateToTransitionState(int32 hmm_state) const { - // Note: if this ever gets too expensive, which is unlikely, we can refactor - // this code to sort first on pdf_class, and then index on pdf_class, so those - // that have the same pdf_class are in a contiguous range. - std::vector::const_iterator iter = - std::lower_bound(states_.begin(), states_.end(), hmm_state); - if (iter == states_.end() || !(*iter == hmm_state)) { - KALDI_ERR << "SimpleHmm::HmmStateToTransitionState; " - << "HmmState " << hmm_state << " not found." - << " (incompatible model?)"; - } - // states_is indexed by transition_state-1, so add one. - return static_cast((iter - states_.begin())) + 1; -} - - -int32 SimpleHmm::NumTransitionIndices(int32 trans_state) const { - KALDI_ASSERT(static_cast(trans_state) <= states_.size()); - return static_cast(state2id_[trans_state+1]-state2id_[trans_state]); -} - -int32 SimpleHmm::TransitionIdToTransitionState(int32 trans_id) const { - KALDI_ASSERT(trans_id != 0 && - static_cast(trans_id) < id2state_.size()); - return id2state_[trans_id]; -} - -int32 SimpleHmm::TransitionIdToTransitionIndex(int32 trans_id) const { - KALDI_ASSERT(trans_id != 0 && - static_cast(trans_id) < id2state_.size()); - return trans_id - state2id_[id2state_[trans_id]]; -} - -int32 SimpleHmm::TransitionStateToPdfClass(int32 trans_state) const { - KALDI_ASSERT(static_cast(trans_state) <= states_.size()); - int32 hmm_state = states_[trans_state-1]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); - KALDI_ASSERT(static_cast(hmm_state) < entry.size()); - return entry[hmm_state].forward_pdf_class; -} - -int32 SimpleHmm::TransitionStateToHmmState(int32 trans_state) const { - KALDI_ASSERT(static_cast(trans_state) <= states_.size()); - return states_[trans_state-1]; -} - -int32 SimpleHmm::PairToTransitionId(int32 trans_state, - int32 trans_index) const { - KALDI_ASSERT(static_cast(trans_state) <= states_.size()); - KALDI_ASSERT(trans_index < state2id_[trans_state+1] - state2id_[trans_state]); - return state2id_[trans_state] + trans_index; -} - -bool SimpleHmm::IsFinal(int32 trans_id) const { - KALDI_ASSERT(static_cast(trans_id) < id2state_.size()); - int32 trans_state = id2state_[trans_id]; - int32 trans_index = trans_id - state2id_[trans_state]; - int32 hmm_state = states_[trans_state-1]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); - KALDI_ASSERT(static_cast(hmm_state) < entry.size()); - KALDI_ASSERT(static_cast(trans_index) < - entry[hmm_state].transitions.size()); - // return true if the transition goes to the final state of the - // topology entry. - return (entry[hmm_state].transitions[trans_index].first + 1 == - static_cast(entry.size())); -} - -// returns the self-loop transition-id, -// or zero if does not exist. -int32 SimpleHmm::SelfLoopOf(int32 trans_state) const { - KALDI_ASSERT(static_cast(trans_state-1) < states_.size()); - int32 hmm_state = states_[trans_state-1]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); - KALDI_ASSERT(static_cast(hmm_state) < entry.size()); - for (int32 trans_index = 0; - trans_index < static_cast(entry[hmm_state].transitions.size()); - trans_index++) - if (entry[hmm_state].transitions[trans_index].first == hmm_state) - return PairToTransitionId(trans_state, trans_index); - return 0; // invalid transition id. -} - -void SimpleHmm::ComputeDerivedOfProbs() { - // this array indexed by transition-state with nothing in zeroth element. - non_self_loop_log_probs_.Resize(NumTransitionStates()+1); - for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { - int32 tid = SelfLoopOf(tstate); - if (tid == 0) { // no self-loop - non_self_loop_log_probs_(tstate) = 0.0; // log(1.0) - } else { - BaseFloat self_loop_prob = Exp(GetTransitionLogProb(tid)), - non_self_loop_prob = 1.0 - self_loop_prob; - if (non_self_loop_prob <= 0.0) { - KALDI_WARN << "ComputeDerivedOfProbs(): non-self-loop prob is " << non_self_loop_prob; - non_self_loop_prob = 1.0e-10; // just so we can continue... - } - non_self_loop_log_probs_(tstate) = Log(non_self_loop_prob); // will be negative. - } +void SimpleHmm::FakeContextDependency::GetPdfInfo( + const std::vector &phones, + const std::vector > > &pdf_class_pairs, + std::vector > > > *pdf_info) const { + KALDI_ASSERT(pdf_info); + KALDI_ASSERT(phones.size() == 1 && phones[0] == 1); + KALDI_ASSERT(pdf_class_pairs.size() == 2); + + pdf_info->resize(2); + (*pdf_info)[1].resize(pdf_class_pairs[1].size()); + + for (size_t j = 0; j < pdf_class_pairs[1].size(); j++) { + int32 pdf_class = pdf_class_pairs[1][j].first, + self_loop_pdf_class = pdf_class_pairs[1][j].second; + KALDI_ASSERT(pdf_class == self_loop_pdf_class && + pdf_class < NumPdfs()); + + (*pdf_info)[1][j].push_back(std::make_pair(pdf_class, pdf_class)); } } void SimpleHmm::Read(std::istream &is, bool binary) { - ExpectToken(is, binary, ""); - topo_.Read(is, binary); - Initialize(); - ComputeDerived(); - ExpectToken(is, binary, ""); - log_probs_.Read(is, binary); - ExpectToken(is, binary, ""); - ExpectToken(is, binary, ""); - ComputeDerivedOfProbs(); - Check(); -} - -void SimpleHmm::Write(std::ostream &os, bool binary) const { - WriteToken(os, binary, ""); - if (!binary) os << "\n"; - topo_.Write(os, binary); - if (!binary) os << "\n"; - WriteToken(os, binary, ""); - if (!binary) os << "\n"; - log_probs_.Write(os, binary); - WriteToken(os, binary, ""); - if (!binary) os << "\n"; - WriteToken(os, binary, ""); - if (!binary) os << "\n"; -} - -BaseFloat SimpleHmm::GetTransitionProb(int32 trans_id) const { - return Exp(log_probs_(trans_id)); -} - -BaseFloat SimpleHmm::GetTransitionLogProb(int32 trans_id) const { - return log_probs_(trans_id); -} - -BaseFloat SimpleHmm::GetNonSelfLoopLogProb(int32 trans_state) const { - KALDI_ASSERT(trans_state != 0); - return non_self_loop_log_probs_(trans_state); -} - -BaseFloat SimpleHmm::GetTransitionLogProbIgnoringSelfLoops( - int32 trans_id) const { - KALDI_ASSERT(trans_id != 0); - KALDI_PARANOID_ASSERT(!IsSelfLoop(trans_id)); - return log_probs_(trans_id) - GetNonSelfLoopLogProb(TransitionIdToTransitionState(trans_id)); -} - -// stats are counts/weights, indexed by transition-id. -void SimpleHmm::MleUpdate(const Vector &stats, - const MleSimpleHmmUpdateConfig &cfg, - BaseFloat *objf_impr_out, - BaseFloat *count_out) { - BaseFloat count_sum = 0.0, objf_impr_sum = 0.0; - int32 num_skipped = 0, num_floored = 0; - KALDI_ASSERT(stats.Dim() == NumTransitionIds()+1); - for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { - int32 n = NumTransitionIndices(tstate); - KALDI_ASSERT(n>=1); - if (n > 1) { // no point updating if only one transition... - Vector counts(n); - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - counts(tidx) = stats(tid); - } - double tstate_tot = counts.Sum(); - count_sum += tstate_tot; - if (tstate_tot < cfg.mincount) { num_skipped++; } - else { - Vector old_probs(n), new_probs(n); - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - old_probs(tidx) = new_probs(tidx) = GetTransitionProb(tid); - } - for (int32 tidx = 0; tidx < n; tidx++) - new_probs(tidx) = counts(tidx) / tstate_tot; - for (int32 i = 0; i < 3; i++) { // keep flooring+renormalizing for 3 times.. - new_probs.Scale(1.0 / new_probs.Sum()); - for (int32 tidx = 0; tidx < n; tidx++) - new_probs(tidx) = std::max(new_probs(tidx), cfg.floor); - } - // Compute objf change - for (int32 tidx = 0; tidx < n; tidx++) { - if (new_probs(tidx) == cfg.floor) num_floored++; - double objf_change = counts(tidx) * (Log(new_probs(tidx)) - - Log(old_probs(tidx))); - objf_impr_sum += objf_change; - } - // Commit updated values. - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - log_probs_(tid) = Log(new_probs(tidx)); - if (log_probs_(tid) - log_probs_(tid) != 0.0) - KALDI_ERR << "Log probs is inf or NaN: error in update or bad stats?"; - } - } - } - } - KALDI_LOG << "SimpleHmm::Update, objf change is " - << (objf_impr_sum / count_sum) << " per frame over " << count_sum - << " frames. "; - KALDI_LOG << num_floored << " probabilities floored, " << num_skipped - << " out of " << NumTransitionStates() << " transition-states " - "skipped due to insuffient data (it is normal to have some skipped.)"; - if (objf_impr_out) *objf_impr_out = objf_impr_sum; - if (count_out) *count_out = count_sum; - ComputeDerivedOfProbs(); + TransitionModel::Read(is, binary); + ctx_dep_.Init(NumPdfs()); + CheckSimpleHmm(); } - -// stats are counts/weights, indexed by transition-id. -void SimpleHmm::MapUpdate(const Vector &stats, - const MapSimpleHmmUpdateConfig &cfg, - BaseFloat *objf_impr_out, - BaseFloat *count_out) { - KALDI_ASSERT(cfg.tau > 0.0); - BaseFloat count_sum = 0.0, objf_impr_sum = 0.0; - KALDI_ASSERT(stats.Dim() == NumTransitionIds()+1); - for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { - int32 n = NumTransitionIndices(tstate); - KALDI_ASSERT(n>=1); - if (n > 1) { // no point updating if only one transition... - Vector counts(n); - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - counts(tidx) = stats(tid); - } - double tstate_tot = counts.Sum(); - count_sum += tstate_tot; - Vector old_probs(n), new_probs(n); - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - old_probs(tidx) = new_probs(tidx) = GetTransitionProb(tid); - } - for (int32 tidx = 0; tidx < n; tidx++) - new_probs(tidx) = (counts(tidx) + cfg.tau * old_probs(tidx)) / - (cfg.tau + tstate_tot); - // Compute objf change - for (int32 tidx = 0; tidx < n; tidx++) { - double objf_change = counts(tidx) * (Log(new_probs(tidx)) - - Log(old_probs(tidx))); - objf_impr_sum += objf_change; - } - // Commit updated values. - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - log_probs_(tid) = Log(new_probs(tidx)); - if (log_probs_(tid) - log_probs_(tid) != 0.0) - KALDI_ERR << "Log probs is inf or NaN: error in update or bad stats?"; - } - } - } - KALDI_LOG << "Objf change is " << (objf_impr_sum / count_sum) - << " per frame over " << count_sum - << " frames."; - if (objf_impr_out) *objf_impr_out = objf_impr_sum; - if (count_out) *count_out = count_sum; - ComputeDerivedOfProbs(); -} - - -int32 SimpleHmm::TransitionIdToPdfClass(int32 trans_id) const { - KALDI_ASSERT(trans_id != 0 && - static_cast(trans_id) < id2state_.size()); - int32 trans_state = id2state_[trans_id]; - - int32 hmm_state = states_[trans_state-1]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); - KALDI_ASSERT(static_cast(hmm_state) < entry.size()); - return entry[hmm_state].forward_pdf_class; -} - -int32 SimpleHmm::TransitionIdToHmmState(int32 trans_id) const { - KALDI_ASSERT(trans_id != 0 && - static_cast(trans_id) < id2state_.size()); - int32 trans_state = id2state_[trans_id]; - return states_[trans_state-1]; -} - -void SimpleHmm::Print(std::ostream &os, - const Vector *occs) { - if (occs != NULL) - KALDI_ASSERT(occs->Dim() == NumPdfs()); - for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { - int32 hmm_state = TransitionStateToHmmState(tstate); - int32 pdf_class = TransitionStateToPdfClass(tstate); - - os << " hmm-state = " << hmm_state; - os << " pdf-class = " << pdf_class << '\n'; - for (int32 tidx = 0; tidx < NumTransitionIndices(tstate); tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - BaseFloat p = GetTransitionProb(tid); - os << " Transition-id = " << tid << " p = " << p; - if (occs) { - os << " count of pdf-class = " << (*occs)(pdf_class); - } - // now describe what it's a transition to. - if (IsSelfLoop(tid)) { - os << " [self-loop]\n"; - } else { - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); - KALDI_ASSERT(static_cast(hmm_state) < entry.size()); - int32 next_hmm_state = entry[hmm_state].transitions[tidx].first; - KALDI_ASSERT(next_hmm_state != hmm_state); - os << " [" << hmm_state << " -> " << next_hmm_state << "]\n"; - } - } +void SimpleHmm::CheckSimpleHmm() const { + KALDI_ASSERT(NumPhones() == 1); + KALDI_ASSERT(GetPhones()[0] == 1); + const HmmTopology::TopologyEntry &entry = GetTopo().TopologyForPhone(1); + for (int32 j = 0; j < static_cast(entry.size()); j++) { // for each state... + int32 forward_pdf_class = entry[j].forward_pdf_class, + self_loop_pdf_class = entry[j].self_loop_pdf_class; + KALDI_ASSERT(forward_pdf_class == self_loop_pdf_class && + forward_pdf_class < NumPdfs()); } } -bool SimpleHmm::Compatible(const SimpleHmm &other) const { - return (topo_ == other.topo_ && states_ == other.states_ && - state2id_ == other.state2id_ && id2state_ == other.id2state_ - && NumPdfs() == other.NumPdfs()); -} - -bool SimpleHmm::IsSelfLoop(int32 trans_id) const { - KALDI_ASSERT(static_cast(trans_id) < id2state_.size()); - int32 trans_state = id2state_[trans_id]; - int32 trans_index = trans_id - state2id_[trans_state]; - int32 hmm_state = states_[trans_state-1]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(1); - KALDI_ASSERT(static_cast(hmm_state) < entry.size()); - return (static_cast(trans_index) < entry[hmm_state].transitions.size() - && entry[hmm_state].transitions[trans_index].first == hmm_state); -} - -} // end namespace simple_hmm } // end namespace kaldi - diff --git a/src/simplehmm/simple-hmm.h b/src/simplehmm/simple-hmm.h index ef3a5b9abde..4b40f212401 100644 --- a/src/simplehmm/simple-hmm.h +++ b/src/simplehmm/simple-hmm.h @@ -1,8 +1,6 @@ // hmm/simple-hmm.h -// Copyright 2009-2012 Microsoft Corporation -// Johns Hopkins University (author: Guoguo Chen) -// 2016 Vimal Manohar (Johns Hopkins University) +// Copyright 2016 Vimal Manohar // See ../../COPYING for clarification regarding multiple authors // @@ -19,256 +17,79 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#ifndef KALDI_HMM_SIMPLE_HMM_H -#define KALDI_HMM_SIMPLE_HMM_H +#ifndef KALDI_HMM_SIMPLE_HMM_H_ +#define KALDI_HMM_SIMPLE_HMM_H_ #include "base/kaldi-common.h" -#include "util/const-integer-set.h" -#include "fst/fst-decl.h" // forward declarations. -#include "hmm/hmm-topology.h" -#include "itf/options-itf.h" +#include "hmm/transition-model.h" +#include "itf/context-dep-itf.h" namespace kaldi { -namespace simple_hmm { - -/// \addtogroup hmm_group -/// @{ - -// The class SimpleHmm is a repository for the transition probabilities. -// The model is exactly like a single phone. It has a HMM topology defined in -// hmm-topology.h. Each HMM-state has a number of -// transitions (and final-probs) out of it. Each emitting HMM-state defined in -// the HmmTopology class has an associated class-id. -// The transition model associates the -// transition probs with the (HMM-state, class-id). We associate with -// each such pair a transition-state. Each -// transition-state has a number of associated probabilities to estimate; -// this depends on the number of transitions/final-probs in the topology for -// that HMM-state. Each probability has an associated transition-index. -// We associate with each (transition-state, transition-index) a unique transition-id. -// Each individual probability estimated by the transition-model is asociated with a -// transition-id. -// -// List of the various types of quantity referred to here and what they mean: -// HMM-state: a number (0, 1, 2...) that indexes TopologyEntry (see hmm-topology.h) -// transition-state: the states for which we estimate transition probabilities for transitions -// out of them. In some topologies, will map one-to-one with pdf-ids. -// One-based, since it appears on FSTs. -// transition-index: identifier of a transition (or final-prob) in the HMM. Indexes the -// "transitions" vector in HmmTopology::HmmState. [if it is out of range, -// equal to transitions.size(), it refers to the final-prob.] -// Zero-based. -// transition-id: identifier of a unique parameter of the -// SimpleHmm. -// Associated with a (transition-state, transition-index) pair. -// One-based, since it appears on FSTs. -// -// List of the possible mappings SimpleHmm can do: -// (HMM-state, class-id) -> transition-state -// (transition-state, transition-index) -> transition-id -// Reverse mappings: -// transition-id -> transition-state -// transition-id -> transition-index -// transition-state -> HMM-state -// transition-state -> class-id -// -// The main things the SimpleHmm object can do are: -// Get initialized (need HmmTopology objects). -// Read/write. -// Update [given a vector of counts indexed by transition-id]. -// Do the various integer mappings mentioned above. -// Get the probability (or log-probability) associated with a particular transition-id. - - -struct MleSimpleHmmUpdateConfig { - BaseFloat floor; - BaseFloat mincount; - MleSimpleHmmUpdateConfig(BaseFloat floor = 0.01, - BaseFloat mincount = 5.0): - floor(floor), mincount(mincount) { } - - void Register (OptionsItf *opts) { - opts->Register("transition-floor", &floor, - "Floor for transition probabilities"); - opts->Register("transition-min-count", &mincount, - "Minimum count required to update transitions from a state"); - } -}; - -struct MapSimpleHmmUpdateConfig { - BaseFloat tau; - MapSimpleHmmUpdateConfig(): tau(5.0) { } - - void Register (OptionsItf *opts) { - opts->Register("transition-tau", &tau, "Tau value for MAP estimation of transition " - "probabilities."); - } -}; - -class SimpleHmm { +class SimpleHmm: public TransitionModel { public: - /// Initialize the object [e.g. at the start of training]. - /// The class keeps a copy of the HmmTopology object. - SimpleHmm(const HmmTopology &hmm_topo); - - /// Constructor that takes no arguments: typically used prior to calling Read. - SimpleHmm() { } + SimpleHmm(const HmmTopology &hmm_topo): + ctx_dep_(hmm_topo) { + Init(ctx_dep_, hmm_topo); + CheckSimpleHmm(); + } + SimpleHmm(): TransitionModel() { } + void Read(std::istream &is, bool binary); // note, no symbol table: topo object always read/written w/o symbols. - void Write(std::ostream &os, bool binary) const; - - - /// return reference to HMM-topology object. - const HmmTopology &GetTopo() const { return topo_; } - - /// \name Integer mapping functions - /// @{ - - int32 HmmStateToTransitionState(int32 hmm_state) const; - int32 PairToTransitionId(int32 trans_state, int32 trans_index) const; - int32 TransitionIdToTransitionState(int32 trans_id) const; - int32 TransitionIdToTransitionIndex(int32 trans_id) const; - int32 TransitionStateToHmmState(int32 trans_state) const; - int32 TransitionStateToPdfClass(int32 trans_state) const; - // returns the self-loop transition-id, or zero if - // this state doesn't have a self-loop. - int32 SelfLoopOf(int32 trans_state) const; - - int32 TransitionIdToPdfClass(int32 trans_id) const; - int32 TransitionIdToHmmState(int32 trans_id) const; - - /// @} - - bool IsFinal(int32 trans_id) const; // returns true if this trans_id goes to the final state - // (which is bound to be nonemitting). - bool IsSelfLoop(int32 trans_id) const; // return true if this trans_id corresponds to a self-loop. - - /// Returns the total number of transition-ids (note, these are one-based). - inline int32 NumTransitionIds() const { return id2state_.size()-1; } - - /// Returns the number of transition-indices for a particular transition-state. - /// Note: "Indices" is the plural of "index". Index is not the same as "id", - /// here. A transition-index is a zero-based offset into the transitions - /// out of a particular transition state. - int32 NumTransitionIndices(int32 trans_state) const; - - /// Returns the total number of transition-states (note, these are one-based). - int32 NumTransitionStates() const { return states_.size(); } - - // NumPdfs() in the model. - int32 NumPdfs() const { return num_pdfs_; } - - // Transition-parameter-getting functions: - BaseFloat GetTransitionProb(int32 trans_id) const; - BaseFloat GetTransitionLogProb(int32 trans_id) const; - - // The following functions are more specialized functions for getting - // transition probabilities, that are provided for convenience. - - /// Returns the log-probability of a particular non-self-loop transition - /// after subtracting the probability mass of the self-loop and renormalizing; - /// will crash if called on a self-loop. Specifically: - /// for non-self-loops it returns the log of (that prob divided by (1 minus - /// self-loop-prob-for-that-state)). - BaseFloat GetTransitionLogProbIgnoringSelfLoops(int32 trans_id) const; - - /// Returns the log-prob of the non-self-loop probability - /// mass for this transition state. (you can get the self-loop prob, if a self-loop - /// exists, by calling GetTransitionLogProb(SelfLoopOf(trans_state)). - BaseFloat GetNonSelfLoopLogProb(int32 trans_state) const; - - /// Does Maximum Likelihood estimation. The stats are counts/weights, indexed - /// by transition-id. This was previously called Update(). - void MleUpdate(const Vector &stats, - const MleSimpleHmmUpdateConfig &cfg, - BaseFloat *objf_impr_out, - BaseFloat *count_out); - - /// Does Maximum A Posteriori (MAP) estimation. The stats are counts/weights, - /// indexed by transition-id. - void MapUpdate(const Vector &stats, - const MapSimpleHmmUpdateConfig &cfg, - BaseFloat *objf_impr_out, - BaseFloat *count_out); - - /// Print will print the simple HMM in a human-readable way, - /// for purposes of human - /// inspection. - /// The "occs" are optional (they are indexed by pdf-classes). - void Print(std::ostream &os, - const Vector *occs = NULL); - - - void InitStats(Vector *stats) const { stats->Resize(NumTransitionIds()+1); } - - void Accumulate(BaseFloat prob, int32 trans_id, Vector *stats) const { - KALDI_ASSERT(trans_id <= NumTransitionIds()); - (*stats)(trans_id) += prob; - // This is trivial and doesn't require class members, but leaves us more open - // to design changes than doing it manually. - } - - /// returns true if all the integer class members are identical (but does not - /// compare the transition probabilities. - bool Compatible(const SimpleHmm &other) const; private: - void MleUpdateShared(const Vector &stats, - const MleSimpleHmmUpdateConfig &cfg, - BaseFloat *objf_impr_out, BaseFloat *count_out); - void MapUpdateShared(const Vector &stats, - const MapSimpleHmmUpdateConfig &cfg, - BaseFloat *objf_impr_out, BaseFloat *count_out); - - // called from constructor and Read(): initializes states_ - void Initialize(); - // called from constructor and Read(): computes state2id_ and id2state_ - void ComputeDerived(); - // computes quantities derived from log-probs (currently just - // non_self_loop_log_probs_; called whenever log-probs change. - void ComputeDerivedOfProbs(); - void InitializeProbs(); // called from constructor. - void Check() const; + void CheckSimpleHmm() const; - HmmTopology topo_; - - /// States indexed by transition state minus one; - /// the states are in sorted order which allows us to do the reverse mapping - /// from state to transition state - std::vector states_; - - /// Gives the first transition_id of each transition-state; indexed by - /// the transition-state. Array indexed 1..num-transition-states+1 - /// (the last one is needed so we can know the num-transitions of the last - /// transition-state. - std::vector state2id_; + class FakeContextDependency: public ContextDependencyInterface { + public: + int ContextWidth() const { return 1; } + int CentralPosition() const { return 0; } + + bool Compute(const std::vector &phoneseq, int32 pdf_class, + int32 *pdf_id) const { + if (phoneseq.size() == 1 && phoneseq[0] == 1) { + *pdf_id = pdf_class; + return true; + } + return false; + } + + void GetPdfInfo( + const std::vector &phones, // list of phones + const std::vector &num_pdf_classes, // indexed by phone, + std::vector > > *pdf_info) const; + + void GetPdfInfo( + const std::vector &phones, + const std::vector > > &pdf_class_pairs, + std::vector > > > *pdf_info) + const; + + void Init(int32 num_pdfs) { num_pdfs_ = num_pdfs; } - /// For each transition-id, the corresponding transition - /// state (indexed by transition-id). - std::vector id2state_; + int32 NumPdfs() const { return num_pdfs_; } - /// For each transition-id, the corresponding log-prob. - /// Indexed by transition-id. - Vector log_probs_; + FakeContextDependency(const HmmTopology &topo) { + KALDI_ASSERT(topo.GetPhones().size() == 1); + num_pdfs_ = topo.NumPdfClasses(1); + } - /// For each transition-state, the log of (1 - self-loop-prob). Indexed by - /// transition-state. - Vector non_self_loop_log_probs_; + FakeContextDependency(): num_pdfs_(0) { } - /// This is equal to the one + highest-numbered pdf class. - int32 num_pdfs_; + ContextDependencyInterface* Copy() const { + FakeContextDependency *copy = new FakeContextDependency(); + copy->Init(num_pdfs_); + return copy; + } + private: + int32 num_pdfs_; + } ctx_dep_; DISALLOW_COPY_AND_ASSIGN(SimpleHmm); - }; -/// @} - - -} // end namespace simple_hmm -} // end namespace kaldi - +} // end namespace kaldi -#endif +#endif // KALDI_HMM_SIMPLE_HMM_H_ From be892299646d2989561dc928dfbc8d4289c90d95 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 14:59:04 -0500 Subject: [PATCH 338/530] asr_diarization: Convert GMM posteriors to feats --- src/gmm/diag-gmm.h | 10 +++ src/gmmbin/Makefile | 3 +- src/gmmbin/gmm-global-post-to-feats.cc | 103 +++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 src/gmmbin/gmm-global-post-to-feats.cc diff --git a/src/gmm/diag-gmm.h b/src/gmm/diag-gmm.h index 1243d7a6bfd..32ef4f146d7 100644 --- a/src/gmm/diag-gmm.h +++ b/src/gmm/diag-gmm.h @@ -32,6 +32,8 @@ #include "matrix/matrix-lib.h" #include "tree/cluster-utils.h" #include "tree/clusterable-classes.h" +#include "util/kaldi-table.h" +#include "util/kaldi-holder.h" namespace kaldi { @@ -255,6 +257,14 @@ operator << (std::ostream &os, const kaldi::DiagGmm &gmm); std::istream & operator >> (std::istream &is, kaldi::DiagGmm &gmm); +typedef KaldiObjectHolder DiagGmmHolder; + +typedef TableWriter DiagGmmWriter; +typedef SequentialTableReader SequentialDiagGmmReader; +typedef RandomAccessTableReader RandomAccessDiagGmmReader; +typedef RandomAccessTableReaderMapped +RandomAccessDiagGmmReaderMapped; + } // End namespace kaldi #include "gmm/diag-gmm-inl.h" // templated functions. diff --git a/src/gmmbin/Makefile b/src/gmmbin/Makefile index 7adb8bdc41e..caf4b1f8118 100644 --- a/src/gmmbin/Makefile +++ b/src/gmmbin/Makefile @@ -28,7 +28,8 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \ gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats \ gmm-global-info gmm-latgen-faster-regtree-fmllr gmm-est-fmllr-global \ gmm-acc-mllt-global gmm-transform-means-global gmm-global-get-post \ - gmm-global-gselect-to-post gmm-global-est-lvtln-trans + gmm-global-gselect-to-post gmm-global-est-lvtln-trans \ + gmm-global-post-to-feats OBJFILES = diff --git a/src/gmmbin/gmm-global-post-to-feats.cc b/src/gmmbin/gmm-global-post-to-feats.cc new file mode 100644 index 00000000000..fa903b66014 --- /dev/null +++ b/src/gmmbin/gmm-global-post-to-feats.cc @@ -0,0 +1,103 @@ +// gmmbin/gmm-global-post-to-feats.cc + +// Copyright 2016 Brno University of Technology (Author: Karel Vesely) +// 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "matrix/kaldi-matrix.h" +#include "hmm/posterior.h" +#include "gmm/diag-gmm.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Convert GMM global posteriors to features\n" + "\n" + "Usage: gmm-global-post-to-feats [options] \n" + "e.g.: gmm-global-post-to-feats ark:1.gmm ark:post.ark ark:feat.ark\n" + "See also: post-to-feats --post-dim, post-to-weights feat-to-post, append-vector-to-feats, append-post-to-feats\n"; + + ParseOptions po(usage); + std::string utt2spk_rspecifier; + + po.Register("utt2spk", &utt2spk_rspecifier, + "rspecifier for utterance to speaker map for reading " + "per-speaker GMM models"); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string model_in_filename = po.GetArg(1), + post_rspecifier = po.GetArg(2), + feat_wspecifier = po.GetArg(3); + + DiagGmm diag_gmm; + RandomAccessDiagGmmReaderMapped *gmm_reader = NULL; + SequentialPosteriorReader post_reader(post_rspecifier); + BaseFloatMatrixWriter feat_writer(feat_wspecifier); + + if (ClassifyRspecifier(po.GetArg(1), NULL, NULL) + != kNoRspecifier) { // We're operating on tables, e.g. archives. + gmm_reader = new RandomAccessDiagGmmReaderMapped(model_in_filename, + utt2spk_rspecifier); + } else { + ReadKaldiObject(model_in_filename, &diag_gmm); + } + + int32 num_done = 0, num_err = 0; + + for (; !post_reader.Done(); post_reader.Next()) { + const std::string &utt = post_reader.Key(); + + const DiagGmm *gmm = &diag_gmm; + if (gmm_reader) { + if (!gmm_reader->HasKey(utt)) { + KALDI_WARN << "Could not find GMM model for utterance " << utt; + num_err++; + continue; + } + gmm = &(gmm_reader->Value(utt)); + } + + int32 post_dim = gmm->NumGauss(); + + const Posterior &post = post_reader.Value(); + + Matrix output; + PosteriorToMatrix(post, post_dim, &output); + + feat_writer.Write(utt, output); + num_done++; + } + KALDI_LOG << "Done " << num_done << " utts, errors on " + << num_err; + + return (num_done == 0 ? -1 : 0); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} From c23060eb527957ab6c9e6a6064e9de3c28e0b657 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 14:59:34 -0500 Subject: [PATCH 339/530] asr_diarization: Remove some accidentally added files --- src/simplehmm/simple-hmm-acc-stats-fsts.cc | 173 --------------------- src/simplehmm/simple-hmm-computation.cc | 5 - 2 files changed, 178 deletions(-) delete mode 100644 src/simplehmm/simple-hmm-acc-stats-fsts.cc delete mode 100644 src/simplehmm/simple-hmm-computation.cc diff --git a/src/simplehmm/simple-hmm-acc-stats-fsts.cc b/src/simplehmm/simple-hmm-acc-stats-fsts.cc deleted file mode 100644 index de4a7528836..00000000000 --- a/src/simplehmm/simple-hmm-acc-stats-fsts.cc +++ /dev/null @@ -1,173 +0,0 @@ -// simplehmmbin/simple-hmm-acc-stats-fsts.cc - -// Copyright 2016 Vimal Manohar (Johns Hopkins University) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "simplehmm/simple-hmm.h" -#include "hmm/hmm-utils.h" -#include "fstext/fstext-lib.h" -#include "decoder/decoder-wrappers.h" - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - using fst::SymbolTable; - using fst::VectorFst; - using fst::StdArc; - - const char *usage = - "Accumulate stats for simple HMM models from FSTs directly.\n" - "Usage: simple-hmm-acc-stats-fsts [options] " - " \n" - "e.g.: \n" - " simple-hmm-acc-stats-fsts 1.mdl ark:graphs.fsts scp:likes.scp pdf2class_map 1.stats\n"; - - ParseOptions po(usage); - - BaseFloat acoustic_scale = 1.0; - BaseFloat transition_scale = 1.0; - BaseFloat self_loop_scale = 1.0; - - po.Register("transition-scale", &transition_scale, - "Transition-probability scale [relative to acoustics]"); - po.Register("acoustic-scale", &acoustic_scale, - "Scaling factor for acoustic likelihoods"); - po.Register("self-loop-scale", &self_loop_scale, - "Scale of self-loop versus non-self-loop log probs [relative to acoustics]"); - po.Read(argc, argv); - - - if (po.NumArgs() != 5) { - po.PrintUsage(); - exit(1); - } - - std::string model_in_filename = po.GetArg(1), - fst_rspecifier = po.GetArg(2), - likes_rspecifier = po.GetArg(3), - pdf2class_map_rxfilename = po.GetArg(4), - accs_wxfilename = po.GetArg(5); - - simple_hmm::SimpleHmm model; - ReadKaldiObject(model_in_filename, &model); - - SequentialTableReader fst_reader(fst_rspecifier); - RandomAccessBaseFloatMatrixReader likes_reader(likes_rspecifier); - - std::vector pdf2class; - { - Input ki(pdf2class_map_rxfilename); - std::string line; - while (std::getline(ki.Stream(), line)) { - std::vector parts; - SplitStringToVector(line, " ", true, &parts); - if (parts.size() != 2) { - KALDI_ERR << "Invalid line " << line - << " in pdf2class-map " << pdf2class_map_rxfilename; - } - int32 pdf_id = std::atoi(parts[0].c_str()), - class_id = std::atoi(parts[1].c_str()); - - if (pdf_id != pdf2class.size()) - KALDI_ERR << "pdf2class-map is not sorted or does not contain " - << "pdf " << pdf_id - 1 << " in " - << pdf2class_map_rxfilename; - - if (pdf_id < pdf2class.size()) - KALDI_ERR << "Duplicate pdf " << pdf_id - << " in pdf2class-map " << pdf2class_map_rxfilename; - - pdf2class.push_back(class_id); - } - } - - int32 num_done = 0, num_err = 0; - double tot_like = 0.0, tot_t = 0.0; - int64 frame_count = 0; - - Vector transition_accs; - model.InitStats(&transition_accs); - - SimpleHmmComputation computation(model, pdf2class_map); - - for (; !fst_reader.Done(); fst_reader.Next()) { - const std::string &utt = fst_reader.Key(); - - if (!likes_reader.HasKey(utt)) { - num_err++; - KALDI_WARN << "No likes for utterance " << utt; - continue; - } - - const Matrix &likes = likes_reader.Value(utt); - VectorFst decode_fst(fst_reader.Value()); - fst_reader.FreeCurrent(); // this stops copy-on-write of the fst - // by deleting the fst inside the reader, since we're about to mutate - // the fst by adding transition probs. - - if (likes.NumRows() == 0) { - KALDI_WARN << "Zero-length utterance: " << utt; - num_err++; - continue; - } - - if (likes.NumCols() != pdf2class.size()) { - KALDI_ERR << "Mismatch in pdf dimension in log-likelihood matrix " - << "and pdf2class map; " << likes.NumCols() << " vs " - << pdf2class.size(); - } - - // Add transition-probs to the FST. - AddTransitionProbs(model, transition_scale, self_loop_scale, - &decode_fst); - - BaseFloat tot_like_this_utt = 0.0, tot_weight = 0.0; - if (!computation.Compute(decode_fst, likes, acoustic_scale, - &transition_accs, - &tot_like_this_utt, &tot_weight)) { - KALDI_WARN << "Failed to do computation for utterance " << utt; - num_err++; - } - tot_like += tot_like_this_utt; - tot_t += tot_weight; - frame_count += likes.NumRows(); - - num_done++; - } - - KALDI_LOG << "Done " << num_done << " files, " << num_err - << " with errors."; - - KALDI_LOG << "Overall avg like per frame = " - << (tot_like/tot_t) << " over " << tot_t << " frames."; - - { - Output ko(accs_wxfilename, binary); - transition_accs.Write(ko.Stream(), binary); - } - KALDI_LOG << "Written accs."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/simplehmm/simple-hmm-computation.cc b/src/simplehmm/simple-hmm-computation.cc deleted file mode 100644 index e20f84169a1..00000000000 --- a/src/simplehmm/simple-hmm-computation.cc +++ /dev/null @@ -1,5 +0,0 @@ -SimpleHmmComputation::SimpleHmmComputation( - const SimpleHmm &model, - const std::vector &num_pdfs, - VectorFst *decode_fst, - const Matrix &log_likes) From 8786deab25721910fb31f0ffcd744fa62e563c12 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:00:23 -0500 Subject: [PATCH 340/530] asr_diarzation: Update do_corruption_data_dir{,_music} --- .../segmentation/do_corruption_data_dir.sh | 16 ++-- .../do_corruption_data_dir_music.sh | 80 +++++++++++++------ 2 files changed, 64 insertions(+), 32 deletions(-) diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh index 1bfa08370e7..5d38be87d70 100755 --- a/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh @@ -76,14 +76,14 @@ corrupted_data_dir=data/${corrupted_data_id} if $speed_perturb; then if [ $stage -le 2 ]; then ## Assuming whole data directories - for x in $clean_data_dir $corrupted_data_dir $noise_data_dir; do + for x in $corrupted_data_dir; do cp $x/reco2dur $x/utt2dur - utils/data/perturb_data_dir_speed_3way.sh $x ${x}_sp + utils/data/perturb_data_dir_speed_random.sh $x ${x}_spr done fi - corrupted_data_dir=${corrupted_data_dir}_sp - corrupted_data_id=${corrupted_data_id}_sp + corrupted_data_dir=${corrupted_data_dir}_spr + corrupted_data_id=${corrupted_data_id}_spr if [ $stage -le 3 ]; then utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 \ @@ -118,14 +118,14 @@ fi if [ $stage -le 8 ]; then if [ ! -z "$reco_vad_dir" ]; then - if [ ! -f $reco_vad_dir/speech_feat.scp ]; then - echo "$0: Could not find file $reco_vad_dir/speech_feat.scp" + if [ ! -f $reco_vad_dir/speech_labels.scp ]; then + echo "$0: Could not find file $reco_vad_dir/speech_labels.scp" exit 1 fi - cat $reco_vad_dir/speech_feat.scp | \ + cat $reco_vad_dir/speech_labels.scp | \ steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ - sort -k1,1 > ${corrupted_data_dir}/speech_feat.scp + sort -k1,1 > ${corrupted_data_dir}/speech_labels.scp cat $reco_vad_dir/deriv_weights.scp | \ steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh index 214cba347da..4fc369234ea 100755 --- a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh @@ -71,19 +71,20 @@ if $dry_run; then fi corrupted_data_dir=data/${corrupted_data_id} -orig_corrupted_data_dir=$corrupted_data_dir +# Data dir without speed perturbation +orig_corrupted_data_dir=$corrupted_data_dir if $speed_perturb; then if [ $stage -le 2 ]; then ## Assuming whole data directories for x in $corrupted_data_dir; do cp $x/reco2dur $x/utt2dur - utils/data/perturb_data_dir_speed_3way.sh $x ${x}_sp + utils/data/perturb_data_dir_speed_random.sh $x ${x}_spr done fi - corrupted_data_dir=${corrupted_data_dir}_sp - corrupted_data_id=${corrupted_data_id}_sp + corrupted_data_dir=${corrupted_data_dir}_spr + corrupted_data_id=${corrupted_data_id}_spr if [ $stage -le 3 ]; then utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 \ @@ -122,14 +123,14 @@ fi if [ $stage -le 8 ]; then if [ ! -z "$reco_vad_dir" ]; then - if [ ! -f $reco_vad_dir/speech_feat.scp ]; then - echo "$0: Could not find file $reco_vad_dir/speech_feat.scp" + if [ ! -f $reco_vad_dir/speech_labels.scp ]; then + echo "$0: Could not find file $reco_vad_dir/speech_labels.scp" exit 1 fi - cat $reco_vad_dir/speech_feat.scp | \ + cat $reco_vad_dir/speech_labels.scp | \ steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "music" | \ - sort -k1,1 > ${corrupted_data_dir}/speech_feat.scp + sort -k1,1 > ${corrupted_data_dir}/speech_labels.scp cat $reco_vad_dir/deriv_weights.scp | \ steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "music" | \ @@ -144,38 +145,41 @@ music_data_dir=$music_dir/music_data mkdir -p $music_data_dir if [ $stage -le 10 ]; then - utils/data/get_utt2num_frames.sh $corrupted_data_dir + utils/data/get_reco2num_frames.sh --nj $reco_nj $orig_corrupted_data_dir utils/split_data.sh --per-reco ${orig_corrupted_data_dir} $reco_nj cp $orig_corrupted_data_dir/wav.scp $music_data_dir - - # Combine the VAD from the base recording and the VAD from the overlapping segments - # to create per-frame labels of the number of overlapping speech segments - # Unreliable segments are regions where no VAD labels were available for the - # overlapping segments. These can be later removed by setting deriv weights to 0. + + # The first rspecifier is a dummy required to get the recording-id as key. + # It has no segments in it as they are all removed by --remove-labels. $train_cmd JOB=1:$reco_nj $music_dir/log/get_music_seg.JOB.log \ - segmentation-init-from-additive-signals-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + segmentation-init-from-additive-signals-info --lengths-rspecifier=ark,t:${orig_corrupted_data_dir}/reco2num_frames \ --additive-signals-segmentation-rspecifier="ark:segmentation-init-from-lengths ark:$music_utt2num_frames ark:- |" \ - "ark:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${reco_nj}reco/JOB/utt2spk $corrupted_data_dir/utt2num_frames | segmentation-init-from-lengths --label=1 ark:- ark:- | segmentation-post-process --remove-labels=1 ark:- ark:- |" \ - ark,t:$orig_corrupted_data_dir/additive_signals_info.txt \ + "ark,t:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt $orig_corrupted_data_dir/additive_signals_info.txt |" \ ark:- \| \ segmentation-post-process --merge-adjacent-segments ark:- \ ark:- \| \ segmentation-to-segments ark:- ark:$music_data_dir/utt2spk.JOB \ $music_data_dir/segments.JOB + utils/data/get_reco2utt.sh $corrupted_data_dir for n in `seq $reco_nj`; do cat $music_data_dir/utt2spk.$n; done > $music_data_dir/utt2spk for n in `seq $reco_nj`; do cat $music_data_dir/segments.$n; done > $music_data_dir/segments utils/fix_data_dir.sh $music_data_dir if $speed_perturb; then - utils/data/perturb_data_dir_speed_3way.sh $music_data_dir ${music_data_dir}_sp + utils/data/perturb_data_dir_speed_3way.sh $music_data_dir ${music_data_dir}_spr + mv ${music_data_dir}_spr/segments{,.temp} + cat ${music_data_dir}_spr/segments.temp | \ + utils/filter_scp.pl -f 2 ${corrupted_data_dir}/reco2utt > ${music_data_dir}_spr/segments + utils/fix_data_dir.sh ${music_data_dir}_spr + rm ${music_data_dir}_spr/segments.temp fi fi if $speed_perturb; then - music_data_dir=${music_data_dir}_sp + music_data_dir=${music_data_dir}_spr fi label_dir=music_labels @@ -184,13 +188,20 @@ mkdir -p $label_dir label_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $label_dir ${PWD}` if [ $stage -le 11 ]; then - utils/split_data.sh --per-reco ${music_data_dir} $reco_nj + utils/split_data.sh --per-reco ${corrupted_data_dir} $reco_nj + # TODO: Don't assume that its whole data directory. + nj=$reco_nj + if [ $nj -gt 4 ]; then + nj=4 + fi + utils/data/get_utt2num_frames.sh --cmd "$train_cmd" --nj $nj ${corrupted_data_dir} + utils/data/get_reco2utt.sh $music_data_dir/ $train_cmd JOB=1:$reco_nj $music_dir/log/get_music_labels.JOB.log \ - utils/data/get_reco2utt.sh ${music_data_dir}/split${reco_nj}reco/JOB '&&' \ segmentation-init-from-segments --shift-to-zero=false \ - ${music_data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ - segmentation-combine-segments-to-recordings ark:- ark,t:${music_data_dir}/split${reco_nj}reco/JOB/reco2utt \ + "utils/filter_scp.pl -f 2 ${corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt ${music_data_dir}/segments |" ark:- \| \ + segmentation-combine-segments-to-recordings ark:- \ + "ark,t:utils/filter_scp.pl ${corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt ${music_data_dir}/reco2utt |" \ ark:- \| \ segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ ark,scp:$label_dir/music_labels_${corrupted_data_id}.JOB.ark,$label_dir/music_labels_${corrupted_data_id}.JOB.scp @@ -198,6 +209,27 @@ fi for n in `seq $reco_nj`; do cat $label_dir/music_labels_${corrupted_data_id}.$n.scp -done > ${corrupted_data_dir}/music_labels.scp +done | utils/filter_scp.pl ${corrupted_data_dir}/utt2spk > ${corrupted_data_dir}/music_labels.scp + +if [ $stage -le 12 ]; then + utils/split_data.sh --per-reco ${corrupted_data_dir} $reco_nj + + cat < $music_dir/speech_music_map +0 0 0 +0 1 3 +1 0 1 +1 1 2 +EOF + + $train_cmd JOB=1:$reco_nj $music_dir/log/get_speech_music_labels.JOB.log \ + intersect-int-vectors --mapping-in=$music_dir/speech_music_map \ + "scp:utils/filter_scp.pl ${corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt ${corrupted_data_dir}/speech_labels.scp |" \ + "scp:utils/filter_scp.pl ${corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt ${corrupted_data_dir}/music_labels.scp |" \ + ark,scp:$label_dir/speech_music_labels_${corrupted_data_id}.JOB.ark,$label_dir/speech_music_labels_${corrupted_data_id}.JOB.scp + + for n in `seq $reco_nj`; do + cat $label_dir/speech_music_labels_${corrupted_data_id}.$n.scp + done > $corrupted_data_dir/speech_music_labels.scp +fi exit 0 From eb5432282b54e897d14ea65e7fc8ce8cac1c3420 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:00:58 -0500 Subject: [PATCH 341/530] asr_diarization: Prepare unsad data fisher and babel --- .../local/segmentation/prepare_babel_data.sh | 105 ++++++++++++++++++ .../local/segmentation/prepare_fisher_data.sh | 18 +-- .../local/segmentation/prepare_unsad_data.sh | 28 ++--- 3 files changed, 128 insertions(+), 23 deletions(-) create mode 100644 egs/aspire/s5/local/segmentation/prepare_babel_data.sh diff --git a/egs/aspire/s5/local/segmentation/prepare_babel_data.sh b/egs/aspire/s5/local/segmentation/prepare_babel_data.sh new file mode 100644 index 00000000000..24a61eca772 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_babel_data.sh @@ -0,0 +1,105 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +# This script prepares Babel data for training speech activity detection, +# music detection. + +. path.sh +. cmd.sh + +set -e +set -o pipefail +set -u + +lang_id=assamese +subset= # Number of recordings to keep before speed perturbation and corruption. + # In limitedLP, this is about 120. So subset, if specified, must be lower that that. + +# All the paths below can be modified to any absolute path. +ROOT_DIR=/home/vimal/workspace_waveform/egs/babel/s5c_assamese/ + +stage=-1 + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + echo "This script is to serve as an example recipe." + echo "Edit the script to change variables if needed." + exit 1 +fi + +dir=exp/unsad/make_unsad_babel_${lang_id}_train # Work dir + +model_dir=$ROOT_DIR/exp/tri4 # Model directory used for decoding +sat_model_dir=$ROOT_DIR/exp/tri5 # Model directory used for getting alignments +lang=$ROOT_DIR/data/lang # Language directory +lang_test=$ROOT_DIR/data/lang # Language directory used to build graph + +mkdir -p $dir + +# Hard code the mapping from phones to SAD labels +# 0 for silence, 1 for speech, 2 for noise, 3 for unk +cat < $dir/babel_sad.map + 3 +_B 3 +_E 3 +_I 3 +_S 3 + 2 +_B 2 +_E 2 +_I 2 +_S 2 + 2 +_B 2 +_E 2 +_I 2 +_S 2 +SIL 0 +SIL_B 0 +SIL_E 0 +SIL_I 0 +SIL_S 0 +EOF + +# The original data directory which will be converted to a whole (recording-level) directory. +utils/copy_data_dir.sh $ROOT_DIR/data/train data/babel_${lang_id}_train +train_data_dir=data/babel_${lang_id}_train + +# Expecting the user to have done run.sh to have $model_dir, +# $sat_model_dir, $lang, $lang_test, $train_data_dir +local/segmentation/prepare_unsad_data.sh --stage 14 \ + --sad-map $dir/babel_sad.map \ + --config-dir $ROOT_DIR/conf --feat-type plp --add-pitch true \ + --reco-nj 40 --nj 100 --cmd "$train_cmd" \ + --sat-model-dir $sat_model_dir \ + --lang-test $lang_test \ + $train_data_dir $lang $model_dir $dir + +orig_data_dir=${train_data_dir}_sp + +data_dir=${train_data_dir}_whole + +if [ ! -z $subset ]; then + # Work on a subset + utils/subset_data_dir.sh ${data_dir} $subset \ + ${data_dir}_$subset + data_dir=${data_dir}_$subset +fi + +reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp + +# Add noise from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir.sh \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir \ + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf + +# Add music from MUSAN corpus to data directory and create a new data directory +local/segmentation/do_corruption_data_dir_music.sh \ + --data-dir $data_dir \ + --reco-vad-dir $reco_vad_dir \ + --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf diff --git a/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh b/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh index 1344e185a02..4749ff7da8a 100644 --- a/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh +++ b/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh @@ -9,6 +9,8 @@ . path.sh . cmd.sh +set -e -o pipefail + if [ $# -ne 0 ]; then echo "Usage: $0" echo "This script is to serve as an example recipe." @@ -17,7 +19,7 @@ if [ $# -ne 0 ]; then fi dir=exp/unsad/make_unsad_fisher_train_100k # Work dir -subset=150 +subset=900 # All the paths below can be modified to any absolute path. @@ -54,21 +56,23 @@ oov_I 3 oov_S 3 EOF +false && { # Expecting the user to have done run.sh to have $model_dir, # $sat_model_dir, $lang, $lang_test, $train_data_dir local/segmentation/prepare_unsad_data.sh \ --sad-map $dir/fisher_sad.map \ --config-dir conf \ --reco-nj 40 --nj 100 --cmd "$train_cmd" \ - --sat-model $sat_model_dir \ + --sat-model-dir $sat_model_dir \ --lang-test $lang_test \ $train_data_dir $lang $model_dir $dir +} data_dir=${train_data_dir}_whole if [ ! -z $subset ]; then # Work on a subset - utils/subset_data_dir.sh ${data_dir} $subset \ + false && utils/subset_data_dir.sh ${data_dir} $subset \ ${data_dir}_$subset data_dir=${data_dir}_$subset fi @@ -76,13 +80,13 @@ fi reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp # Add noise from MUSAN corpus to data directory and create a new data directory -local/segmentation/do_corruption_data_dir.sh +false && local/segmentation/do_corruption_data_dir.sh \ --data-dir $data_dir \ - --reco-vad-dir $reco_vad_dir + --reco-vad-dir $reco_vad_dir \ --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf # Add music from MUSAN corpus to data directory and create a new data directory -local/segmentation/do_corruption_data_dir_music.sh +local/segmentation/do_corruption_data_dir_music.sh --stage 10 \ --data-dir $data_dir \ - --reco-vad-dir $reco_vad_dir + --reco-vad-dir $reco_vad_dir \ --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh index 12097811ec9..7385e309f5f 100755 --- a/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh @@ -40,7 +40,7 @@ lang_test= # Language directory used to build graph. . utils/parse_options.sh -if [ $# -ne 5 ]; then +if [ $# -ne 4 ]; then echo "This script takes a data directory and creates a new data directory " echo "and speech activity labels" echo "for the purpose of training a Universal Speech Activity Detector." @@ -241,12 +241,12 @@ fi utils/data/get_reco2utt.sh $data_dir if [ $stage -le 0 ]; then - steps/segmentation/get_utt2num_frames.sh \ + utils/data/get_utt2num_frames.sh \ --frame-shift $frame_shift --frame-overlap $frame_overlap \ --cmd "$cmd" --nj $reco_nj $whole_data_dir awk '{print $1" "$2}' ${data_dir}/segments | utils/apply_map.pl -f 2 ${whole_data_dir}/utt2num_frames > $data_dir/utt2max_frames - utils/data/subsegment_feats.sh ${whole_data_dir}/feats.scp \ + utils/data/get_subsegmented_feats.sh ${whole_data_dir}/feats.scp \ $frame_shift $frame_overlap ${data_dir}/segments | \ utils/data/fix_subsegmented_feats.pl $data_dir/utt2max_frames \ > ${data_dir}/feats.scp @@ -289,8 +289,7 @@ utils/split_data.sh $data_dir $nj vad_dir=$dir/`basename ${ali_dir}`_vad_${data_id} if [ $stage -le 3 ]; then steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$cmd" \ - $data_dir $ali_dir \ - $dir/sad_map $vad_dir + $ali_dir $dir/sad_map $vad_dir fi [ ! -s $vad_dir/sad_seg.scp ] && echo "$0: $vad_dir/vad.scp is empty" && exit 1 @@ -381,9 +380,9 @@ if [ $stage -le 6 ]; then utils/data/get_reco2utt.sh $outside_data_dir awk '{print $1" "$2}' $outside_data_dir/segments | utils/apply_map.pl -f 2 $whole_data_dir/utt2num_frames > $outside_data_dir/utt2max_frames - utils/data/subsegment_feats.sh ${whole_data_dir}/feats.scp \ + utils/data/get_subsegmented_feats.sh ${whole_data_dir}/feats.scp \ $frame_shift $frame_overlap ${outside_data_dir}/segments | \ - utils/data/fix_subsegmented_feats.pl $outside_data_dir/utt2max_framres \ + utils/data/fix_subsegmented_feats.pl $outside_data_dir/utt2max_frames \ > ${outside_data_dir}/feats.scp fi @@ -432,8 +431,7 @@ model_id=`basename $model_dir` decode_vad_dir=$dir/${model_id}_decode_vad_${data_id} if [ $stage -le 9 ]; then steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$cmd" \ - $extended_data_dir ${model_dir}/decode_${data_id}_extended \ - $dir/sad_map $decode_vad_dir + ${model_dir}/decode_${data_id}_extended $dir/sad_map $decode_vad_dir fi [ ! -s $decode_vad_dir/sad_seg.scp ] && echo "$0: $decode_vad_dir/vad.scp is empty" && exit 1 @@ -477,7 +475,7 @@ set +e for n in `seq $reco_nj`; do utils/create_data_link.pl $reco_vad_dir/deriv_weights.$n.ark utils/create_data_link.pl $reco_vad_dir/deriv_weights_for_uncorrupted.$n.ark - utils/create_data_link.pl $reco_vad_dir/speech_feat.$n.ark + utils/create_data_link.pl $reco_vad_dir/speech_labels.$n.ark done set -e @@ -508,14 +506,12 @@ fi if [ $stage -le 14 ]; then $cmd JOB=1:$reco_nj $reco_vad_dir/log/get_speech_labels.JOB.log \ - segmentation-post-process --keep-label=1 scp:$reco_vad_dir/sad_seg.JOB.scp ark:- \| \ + segmentation-copy --keep-label=1 scp:$reco_vad_dir/sad_seg.JOB.scp ark:- \| \ segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames \ - ark:- ark,t:- \| \ - steps/segmentation/convert_ali_to_vec.pl \| vector-to-feat ark:- ark:- \| copy-feats --compress \ - ark:- ark,scp:$reco_vad_dir/speech_feat.JOB.ark,$reco_vad_dir/speech_feat.JOB.scp + ark:- ark,scp:$reco_vad_dir/speech_labels.JOB.ark,$reco_vad_dir/speech_labels.JOB.scp for n in `seq $reco_nj`; do - cat $reco_vad_dir/speech_feat.$n.scp - done > $reco_vad_dir/speech_feat.scp + cat $reco_vad_dir/speech_labels.$n.scp + done > $reco_vad_dir/speech_labels.scp fi if [ $stage -le 15 ]; then From e52f0324d7e91311e5228bac3a30c41ac26797fc Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:01:28 -0500 Subject: [PATCH 342/530] asr_diarization: Bug fix in reverberate_data_dir.py --- egs/wsj/s5/steps/data/reverberate_data_dir.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/egs/wsj/s5/steps/data/reverberate_data_dir.py b/egs/wsj/s5/steps/data/reverberate_data_dir.py index 9a71126dde3..c9a4d918c91 100755 --- a/egs/wsj/s5/steps/data/reverberate_data_dir.py +++ b/egs/wsj/s5/steps/data/reverberate_data_dir.py @@ -8,6 +8,8 @@ import argparse, glob, math, os, random, sys, warnings, copy, imp, ast import data_dir_manipulation_lib as data_lib +sys.path.insert(0, 'steps') +import libs.common as common_lib def GetArgs(): # we add required arguments as named arguments for readability From b7fba13cb42f6d5f2957021977069dfb756b06c1 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:02:01 -0500 Subject: [PATCH 343/530] asr_diarization: Updated compute_output.sh to compute from Am --- egs/wsj/s5/steps/nnet3/compute_output.sh | 36 ++++++++++++++---------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/compute_output.sh b/egs/wsj/s5/steps/nnet3/compute_output.sh index f49790bc578..4c32b5cb0ea 100755 --- a/egs/wsj/s5/steps/nnet3/compute_output.sh +++ b/egs/wsj/s5/steps/nnet3/compute_output.sh @@ -27,7 +27,7 @@ compress=false online_ivector_dir= post_vec= output_name= -get_raw_nnet_from_am=true +use_raw_nnet=true # End configuration section. echo "$0 $@" # Print the command line for logging @@ -54,11 +54,13 @@ data=$1 srcdir=$2 dir=$3 -if $get_raw_nnet_from_am; then +if ! $use_raw_nnet; then [ ! -f $srcdir/$iter.mdl ] && echo "$0: no such file $srcdir/$iter.mdl" && exit 1 - model="nnet3-am-copy --raw=true $srcdir/$iter.mdl - |" + prog=nnet3-am-compute + model="$srcdir/$iter.mdl" else [ ! -f $srcdir/$iter.raw ] && echo "$0: no such file $srcdir/$iter.raw" && exit 1 + prog=nnet3-compute model="nnet3-copy $srcdir/$iter.raw - |" fi @@ -142,18 +144,22 @@ if [ $frame_subsampling_factor -ne 1 ]; then frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor" fi -output_wspecifier="ark:| copy-feats --compress=$compress ark:- ark:- | gzip -c > $dir/nnet_output.JOB.gz" - -if [ ! -z $post_vec ]; then - if [ $stage -le 1 ]; then - copy-vector --binary=false $post_vec - | \ - awk '{for (i = 2; i < NF; i++) { sum += i; }; - printf ("["); - for (i = 2; i < NF; i++) { printf " "log(i/sum); }; - print (" ]");}' > $dir/log_priors.vec +if ! $use_raw_nnet; then + output_wspecifier="ark:| copy-feats --compress=$compress ark:- ark:- | gzip -c > $dir/log_likes.JOB.gz" +else + output_wspecifier="ark:| copy-feats --compress=$compress ark:- ark:- | gzip -c > $dir/nnet_output.JOB.gz" + + if [ ! -z $post_vec ]; then + if [ $stage -le 1 ]; then + copy-vector --binary=false $post_vec - | \ + awk '{for (i = 2; i < NF; i++) { sum += i; }; + printf ("["); + for (i = 2; i < NF; i++) { printf " "log(i/sum); }; + print (" ]");}' > $dir/log_priors.vec + fi + + output_wspecifier="ark:| matrix-add-offset ark:- 'vector-scale --scale=-1.0 $dir/log_priors.vec - |' ark:- | copy-feats --compress=$compress ark:- ark:- | gzip -c > $dir/log_likes.JOB.gz" fi - - output_wspecifier="ark:| matrix-add-offset ark:- 'vector-scale --scale=-1.0 $dir/log_priors.vec - |' ark:- | copy-feats --compress=$compress ark:- ark:- | gzip -c > $dir/log_likes.JOB.gz" fi gpu_opt="--use-gpu=no" @@ -166,7 +172,7 @@ fi if [ $stage -le 2 ]; then $cmd $gpu_queue_opt JOB=1:$nj $dir/log/compute_output.JOB.log \ - nnet3-compute $gpu_opt $ivector_opts $frame_subsampling_opt \ + $prog $gpu_opt $ivector_opts $frame_subsampling_opt \ --frames-per-chunk=$frames_per_chunk \ --extra-left-context=$extra_left_context \ --extra-right-context=$extra_right_context \ From 84889b61d1fc74bf578886c823d30d8f346acd2c Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:02:25 -0500 Subject: [PATCH 344/530] asr_diarization: Bug fix in get_egs_multiple_targets --- .../s5/steps/nnet3/get_egs_multiple_targets.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py b/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py index 8e6f1442c7a..30449c81e81 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py +++ b/egs/wsj/s5/steps/nnet3/get_egs_multiple_targets.py @@ -675,7 +675,14 @@ def generate_training_examples_internal(dir, targets_parameters, feat_dir, fpi=samples_per_iter)) if dry_run: - cleanup(dir, archives_multiple) + if generate_egs_scp: + for i in range(1, num_archives_intermediate + 1): + for j in range(1, archives_multiple + 1): + archive_index = (i-1) * archives_multiple + j + common_lib.force_symlink( + "egs.{0}.ark".format(archive_index), + "{dir}/egs.{i}.{j}.ark".format(dir=dir, i=i, j=j)) + cleanup(dir, archives_multiple, generate_egs_scp) return {'num_frames': num_frames, 'num_archives': num_archives, 'egs_per_archive': egs_per_archive} @@ -763,7 +770,7 @@ def generate_training_examples_internal(dir, targets_parameters, feat_dir, for i in range(1, num_archives_intermediate + 1): for j in range(1, archives_multiple + 1): archive_index = (i-1) * archives_multiple + j - common_lib.force_sym_link( + common_lib.force_symlink( "egs.{0}.ark".format(archive_index), "{dir}/egs.{i}.{j}.ark".format(dir=dir, i=i, j=j)) @@ -785,20 +792,20 @@ def generate_training_examples_internal(dir, targets_parameters, feat_dir, print (line.strip(), file=out_egs_handle) out_egs_handle.close() - cleanup(dir, archives_multiple) + cleanup(dir, archives_multiple, generate_egs_scp) return {'num_frames': num_frames, 'num_archives': num_archives, 'egs_per_archive': egs_per_archive} -def cleanup(dir, archives_multiple): +def cleanup(dir, archives_multiple, generate_egs_scp=False): logger.info("Removing temporary archives in {0}.".format(dir)) for file_name in glob.glob("{0}/egs_orig*".format(dir)): real_path = os.path.realpath(file_name) data_lib.try_to_delete(real_path) data_lib.try_to_delete(file_name) - if archives_multiple > 1: + if archives_multiple > 1 and not generate_egs_scp: # there will be some extra soft links we want to delete for file_name in glob.glob('{0}/egs.*.*.ark'.format(dir)): os.remove(file_name) From 911d1d06b7aa230ffeb5f936502c37e4eaeb68e6 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:02:50 -0500 Subject: [PATCH 345/530] asr_diariztion: Add compute-per-dim-accuracy --- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index 2bea66dbcbf..d43406e7f3e 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -468,7 +468,8 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): background_process_handler=background_process_handler, extra_egs_copy_cmd=args.extra_egs_copy_cmd, use_multitask_egs=args.use_multitask_egs, - rename_multitask_outputs=args.rename_multitask_outputs) + rename_multitask_outputs=args.rename_multitask_outputs, + compute_per_dim_accuracy=args.compute_per_dim_accuracy) if args.cleanup: # do a clean up everythin but the last 2 models, under certain @@ -493,6 +494,9 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): if args.stage <= num_iters: logger.info("Doing final combination to produce final.raw") + common_lib.run_kaldi_command( + "cp {dir}/{num_iters}.raw {dir}/pre_combine.raw" + "".format(dir=args.dir, num_iters=num_iters)) train_lib.common.combine_models( dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, egs_dir=egs_dir, @@ -500,7 +504,8 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): run_opts=run_opts, chunk_width=args.chunk_width, background_process_handler=background_process_handler, get_raw_nnet_from_am=False, - extra_egs_copy_cmd=args.extra_egs_copy_cmd) + extra_egs_copy_cmd=args.extra_egs_copy_cmd, + compute_per_dim_accuracy=args.compute_per_dim_accuracy) if include_log_softmax and args.stage <= num_iters + 1: logger.info("Getting average posterior for purposes of " From abd45fe1e28f59745cfb64ddff61c593e3f32e08 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:04:30 -0500 Subject: [PATCH 346/530] asr_diarization: Update some segmentation scripts --- egs/wsj/s5/steps/segmentation/decode_sad.sh | 18 +++-- .../segmentation/decode_sad_to_segments.sh | 25 ++++-- .../internal/post_process_segments.sh | 8 +- .../segmentation/internal/prepare_sad_lang.py | 79 +++++++++++++------ 4 files changed, 91 insertions(+), 39 deletions(-) diff --git a/egs/wsj/s5/steps/segmentation/decode_sad.sh b/egs/wsj/s5/steps/segmentation/decode_sad.sh index 9758d36e24e..2f2e5ae2586 100755 --- a/egs/wsj/s5/steps/segmentation/decode_sad.sh +++ b/egs/wsj/s5/steps/segmentation/decode_sad.sh @@ -7,6 +7,8 @@ cmd=run.pl acwt=0.1 beam=8 max_active=1000 +get_pdfs=false +iter=final . path.sh @@ -22,21 +24,27 @@ graph_dir=$1 log_likes_dir=$2 dir=$3 +mkdir -p $dir nj=`cat $log_likes_dir/num_jobs` echo $nj > $dir/num_jobs -for f in $dir/trans.mdl $log_likes_dir/log_likes.1.gz $graph_dir/HCLG.fst; do +for f in $graph_dir/$iter.mdl $log_likes_dir/log_likes.1.gz $graph_dir/HCLG.fst; do if [ ! -f $f ]; then echo "$0: Could not find file $f" + exit 1 fi done decoder_opts+=(--acoustic-scale=$acwt --beam=$beam --max-active=$max_active) +ali="ark:| ali-to-phones --per-frame $graph_dir/$iter.mdl ark:- ark:- | gzip -c > $dir/ali.JOB.gz" + +if $get_pdfs; then + ali="ark:| ali-to-pdf $graph_dir/$iter.mdl ark:- ark:- | gzip -c > $dir/ali.JOB.gz" +fi + $cmd JOB=1:$nj $dir/log/decode.JOB.log \ decode-faster-mapped ${decoder_opts[@]} \ - $dir/trans.mdl \ + $graph_dir/$iter.mdl \ $graph_dir/HCLG.fst "ark:gunzip -c $log_likes_dir/log_likes.JOB.gz |" \ - ark:/dev/null ark:- \| \ - ali-to-phones --per-frame $dir/trans.mdl ark:- \ - "ark:|gzip -c > $dir/ali.JOB.gz" + ark:/dev/null "$ali" diff --git a/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh b/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh index de8ab0d90e8..84287230fba 100755 --- a/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh +++ b/egs/wsj/s5/steps/segmentation/decode_sad_to_segments.sh @@ -16,6 +16,7 @@ nonsil_transition_probability=0.1 sil_transition_probability=0.1 sil_prior=0.5 speech_prior=0.5 +use_unigram_lm=true # Decoding options acwt=1 @@ -59,14 +60,25 @@ if [ $stage -le 2 ]; then fi if [ $stage -le 3 ]; then - cat > $lang/word2prior < $lang/word2prior < $lang/G.fst + steps/segmentation/internal/make_G_fst.py --word2prior-map $lang/word2prior | \ + fstcompile --isymbols=$lang/words.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false \ + > $lang/G.fst + else + { + echo "1 0.99 1:0.6 2:0.39"; + echo "2 0.01 1:0.5 2:0.49"; + } | \ + steps/segmentation/internal/make_bigram_G_fst.py - - | \ + fstcompile --isymbols=$lang/words.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false \ + > $lang/G.fst + fi fi graph_dir=$dir/graph_test_${t} @@ -75,11 +87,12 @@ if [ $stage -le 4 ]; then $cmd $dir/log/make_vad_graph.log \ steps/segmentation/internal/make_sad_graph.sh --iter trans \ $lang $dir $dir/graph_test_${t} || exit 1 + cp $dir/trans.mdl $graph_dir fi if [ $stage -le 5 ]; then steps/segmentation/decode_sad.sh \ - --acwt $acwt --beam $beam --max-active $max_active \ + --acwt $acwt --beam $beam --max-active $max_active --iter trans \ $graph_dir $sad_likes_dir $dir fi diff --git a/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh b/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh index e37d5dc2f62..31f0d09f351 100755 --- a/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh +++ b/egs/wsj/s5/steps/segmentation/internal/post_process_segments.sh @@ -26,8 +26,10 @@ max_segment_length=1000 # Segments that are longer than this are split into overlap_length=100 # Overlapping frames when segments are split. # See the above option. min_silence_length=30 # Min silence length at which to split very long segments +min_segment_length=20 frame_shift=0.01 +frame_overlap=0.016 . utils/parse_options.sh @@ -44,7 +46,7 @@ data_dir=$1 dir=$2 segmented_data_dir=$3 -for f in $dir/orig_segmentation.1.gz $data_dir/segments; do +for f in $dir/orig_segmentation.1.gz; do if [ ! -f $f ]; then echo "$0: Could not find $f" exit 1 @@ -80,9 +82,11 @@ if [ $stage -le 2 ]; then segmentation-post-process ${post_pad_length:+--pad-label=1 --pad-length=$post_pad_length} ark:- ark:- \| \ segmentation-split-segments --alignments="ark,s,cs:gunzip -c $dir/orig_segmentation.JOB.gz | segmentation-to-ali ark:- ark:- |" \ --max-segment-length=$max_segment_length --min-alignment-chunk-length=$min_silence_length --ali-label=0 ark:- ark:- \| \ + segmentation-post-process --remove-labels=1 --max-remove-length=$min_segment_length ark:- ark:- \| \ segmentation-split-segments \ --max-segment-length=$max_segment_length --overlap-length=$overlap_length ark:- ark:- \| \ - segmentation-to-segments --frame-shift=$frame_shift ark:- \ + segmentation-to-segments --frame-shift=$frame_shift \ + --frame-overlap=$frame_overlap ark:- \ ark,t:$dir/utt2spk.JOB $dir/segments.JOB || exit 1 fi diff --git a/egs/wsj/s5/steps/segmentation/internal/prepare_sad_lang.py b/egs/wsj/s5/steps/segmentation/internal/prepare_sad_lang.py index 17b039015d2..b539286a85b 100755 --- a/egs/wsj/s5/steps/segmentation/internal/prepare_sad_lang.py +++ b/egs/wsj/s5/steps/segmentation/internal/prepare_sad_lang.py @@ -1,7 +1,12 @@ #! /usr/bin/env python from __future__ import print_function -import argparse, shlex +import argparse +import sys +import shlex + +sys.path.insert(0, 'steps') +import libs.common as common_lib def GetArgs(): parser = argparse.ArgumentParser(description="""This script generates a lang @@ -9,13 +14,13 @@ def GetArgs(): the corresponding min durations and end transition probability.""") parser.add_argument("--phone-transition-parameters", dest='phone_transition_para_array', - type=str, action='append', required = True, - help = "Options to build topology. \n" + type=str, action='append', required=True, + help="Options to build topology. \n" "--phone-list= # Colon-separated list of phones\n" "--min-duration= # Min duration for the phones\n" "--end-transition-probability= # Probability of the end transition after the minimum duration\n") parser.add_argument("dir", type=str, - help = "Output lang directory") + help="Output lang directory") args = parser.parse_args() return args @@ -47,7 +52,8 @@ def ParsePhoneTransitionParameters(para_array): return phone_transition_parameters -def GetPhoneMap(phone_transition_parameters): + +def get_phone_map(phone_transition_parameters): phone2int = {} n = 1 for t in phone_transition_parameters: @@ -59,36 +65,57 @@ def GetPhoneMap(phone_transition_parameters): return phone2int -def Main(): + +def print_duration_constraint_states(min_duration, topo): + for state in range(0, min_duration - 1): + print(" {state} 0" + " {dest_state} 1.0 ".format( + state=state, dest_state=state + 1), + file=topo) + + +def print_topology(phone_transition_parameters, phone2int, args, topo): + for t in phone_transition_parameters: + print ("", file=topo) + print ("", file=topo) + print ("{0}".format(" ".join([str(phone2int[p]) + for p in t.phone_list])), file=topo) + print ("", file=topo) + + print_duration_constraint_states(t.min_duration, topo) + + print(" {state} 0 " + " {state} {self_prob} " + " {next_state} {next_prob} ".format( + state=t.min_duration - 1, next_state=t.min_duration, + self_prob=1 - t.end_transition_probability, + next_prob=t.end_transition_probability), file=topo) + + print(" {state} ".format(state=t.min_duration), + file=topo) # Final state + print ("", file=topo) + + +def main(): args = GetArgs() phone_transition_parameters = ParsePhoneTransitionParameters(args.phone_transition_para_array) - phone2int = GetPhoneMap(phone_transition_parameters) + phone2int = get_phone_map(phone_transition_parameters) topo = open("{0}/topo".format(args.dir), 'w') - print ("", file = topo) + print ("", file=topo) - for t in phone_transition_parameters: - print ("", file = topo) - print ("", file = topo) - print ("{0}".format(" ".join([str(phone2int[p]) for p in t.phone_list])), file = topo) - print ("", file = topo) - - for state in range(0, t.min_duration-1): - print(" {0} 0 {1} 1.0 ".format(state, state + 1), file = topo) - print(" {state} 0 {state} {self_prob} {next_state} {next_prob} ".format( - state = t.min_duration - 1, next_state = t.min_duration, - self_prob = 1 - t.end_transition_probability, - next_prob = t.end_transition_probability), file = topo) - print(" {state} ".format(state = t.min_duration), file = topo) # Final state - print ("", file = topo) - print ("", file = topo) + print_topology(phone_transition_parameters, phone2int, args, topo) + + print ("", file=topo) phones_file = open("{0}/phones.txt".format(args.dir), 'w') - for p,n in sorted(list(phone2int.items()), key = lambda x:x[1]): - print ("{0} {1}".format(p, n), file = phones_file) + print (" 0", file=phones_file) + + for p,n in sorted(list(phone2int.items()), key=lambda x:x[1]): + print ("{0} {1}".format(p, n), file=phones_file) if __name__ == '__main__': - Main() + main() From 0cd44c87162eaaf5632801ce01f16085e442f80e Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:05:00 -0500 Subject: [PATCH 347/530] asr_diarization: SimpleHmm version of segmentation --- .../do_segmentation_data_dir_simple.sh | 239 ++++++++++++++++++ .../internal/prepare_simple_hmm_lang.py | 202 +++++++++++++++ 2 files changed, 441 insertions(+) create mode 100755 egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh create mode 100755 egs/wsj/s5/steps/segmentation/internal/prepare_simple_hmm_lang.py diff --git a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh new file mode 100755 index 00000000000..0da130ee3ab --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh @@ -0,0 +1,239 @@ +#!/bin/bash + +set -e +set -o pipefail +set -u + +. path.sh +. cmd.sh + +affix= # Affix for the segmentation +nj=32 # works on recordings as against on speakers + +# Feature options (Must match training) +mfcc_config=conf/mfcc_hires_bp.conf +feat_affix=bp # Affix for the type of feature used + +skip_output_computation=false + +stage=-1 +sad_stage=-1 +output_name=output-speech # The output node in the network +sad_name=sad # Base name for the directory storing the computed loglikes +segmentation_name=segmentation # Base name for the directory doing segmentation + +# SAD network config +iter=final # Model iteration to use + +# Contexts must ideally match training for LSTM models, but +# may not necessarily for stats components +extra_left_context=0 # Set to some large value, typically 40 for LSTM (must match training) +extra_right_context=0 + +frame_subsampling_factor=1 # Subsampling at the output + +transition_scale=10.0 +loopscale=1.0 + +# Set to true if the test data has > 8kHz sampling frequency. +do_downsampling=false + +# Segmentation configs +segmentation_config=conf/segmentation_speech.conf +convert_data_dir_to_whole=true + +echo $* + +. utils/parse_options.sh + +if [ $# -ne 5 ]; then + echo "Usage: $0 " + echo " e.g.: $0 ~/workspace/egs/ami/s5b/data/sdm1/dev exp/nnet3_sad_snr/nnet_tdnn_j_n4 mfcc_hires_bp data/ami_sdm1_dev" + exit 1 +fi + +src_data_dir=$1 # The input data directory that needs to be segmented. + # Any segments in that will be ignored. +sad_nnet_dir=$2 # The SAD neural network +lang=$3 +mfcc_dir=$4 # The directory to store the features +data_dir=$5 # The output data directory will be ${data_dir}_seg + +affix=${affix:+_$affix} +feat_affix=${feat_affix:+_$feat_affix} + +data_id=`basename $data_dir` +sad_dir=${sad_nnet_dir}/${sad_name}${affix}_${data_id}_whole${feat_affix} +seg_dir=${sad_nnet_dir}/${segmentation_name}${affix}_${data_id}_whole${feat_affix} + +export PATH="$KALDI_ROOT/tools/sph2pipe_v2.5/:$PATH" +[ ! -z `which sph2pipe` ] + +test_data_dir=data/${data_id}${feat_affix}_hires + +if $convert_data_dir_to_whole; then + if [ $stage -le 0 ]; then + whole_data_dir=${sad_dir}/${data_id}_whole + utils/data/convert_data_dir_to_whole.sh $src_data_dir ${whole_data_dir} + + if $do_downsampling; then + freq=`cat $mfcc_config | perl -pe 's/\s*#.*//g' | grep "sample-frequency=" | awk -F'=' '{if (NF == 0) print 16000; else print $2}'` + utils/data/downsample_data_dir.sh $freq $whole_data_dir + fi + + utils/copy_data_dir.sh ${whole_data_dir} $test_data_dir + fi +else + if [ $stage -le 0 ]; then + utils/copy_data_dir.sh $src_data_dir $test_data_dir + + if $do_downsampling; then + freq=`cat $mfcc_config | perl -pe 's/\s*#.*//g' | grep "sample-frequency=" | awk -F'=' '{if (NF == 0) print 16000; else print $2}'` + utils/data/downsample_data_dir.sh $freq $test_data_dir + fi + fi +fi + +if [ $stage -le 1 ]; then + steps/make_mfcc.sh --mfcc-config $mfcc_config --nj $nj --cmd "$train_cmd" \ + ${test_data_dir} exp/make_hires/${data_id}${feat_affix} $mfcc_dir + steps/compute_cmvn_stats.sh ${test_data_dir} exp/make_hires/${data_id}${feat_affix} $mfcc_dir + utils/fix_data_dir.sh ${test_data_dir} +fi + +post_vec=$sad_nnet_dir/post_${output_name}.vec +if [ ! -f $sad_nnet_dir/post_${output_name}.vec ]; then + echo "$0: Could not find $sad_nnet_dir/post_${output_name}.vec. See the last stage of local/segmentation/run_train_sad.sh" + exit 1 +fi + +create_topo=true +if $create_topo; then + if [ ! -f $lang/classes_info.txt ]; then + echo "$0: Could not find $lang/topo or $lang/classes_info.txt" + exit 1 + else + steps/segmentation/internal/prepare_simple_hmm_lang.py \ + $lang/classes_info.txt $lang + fi +fi + +if [ $stage -le 3 ]; then + simple-hmm-init $lang/topo $lang/init.mdl + + $train_cmd $sad_nnet_dir/log/get_final_${output_name}_model.log \ + nnet3-am-init $lang/init.mdl \ + "nnet3-copy --edits='rename-node old-name=$output_name new-name=output' $sad_nnet_dir/$iter.raw - |" - \| \ + nnet3-am-adjust-priors - $sad_nnet_dir/post_${output_name}.vec \ + $sad_nnet_dir/${iter}_${output_name}.mdl +fi +iter=${iter}_${output_name} + +if [ $stage -le 4 ]; then + steps/nnet3/compute_output.sh --nj $nj --cmd "$train_cmd" \ + --iter $iter --use-raw-nnet false \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk 150 \ + --stage $sad_stage \ + --frame-subsampling-factor $frame_subsampling_factor \ + ${test_data_dir} $sad_nnet_dir $sad_dir +fi + +graph_dir=${sad_nnet_dir}/graph_${output_name} + +if [ $stage -le 5 ]; then + cp -r $lang $graph_dir + + if [ ! -f $lang/final.mdl ]; then + echo "$0: Could not find $lang/final.mdl!" + echo "$0: Using $lang/init.mdl instead" + cp $lang/init.mdl $graph_dir/final.mdl + else + cp $lang/final.mdl $graph_dir + fi + + $train_cmd $lang/log/make_graph.log \ + make-simple-hmm-graph --transition-scale=$transition_scale \ + --self-loop-scale=$loopscale \ + $graph_dir/final.mdl \| \ + fstdeterminizestar --use-log=true \| \ + fstrmepslocal \| \ + fstminimizeencoded '>' $graph_dir/HCLG.fst +fi + +if [ $stage -le 6 ]; then + # 'final' here refers to $lang/final.mdl + steps/segmentation/decode_sad.sh --acwt 1.0 --cmd "$decode_cmd" \ + --iter final --get-pdfs true $graph_dir $sad_dir $seg_dir +fi + +if [ $stage -le 7 ]; then + steps/segmentation/post_process_sad_to_subsegments.sh \ + --cmd "$train_cmd" --segmentation-config $segmentation_config \ + --frame-subsampling-factor $frame_subsampling_factor \ + ${test_data_dir} $lang/phone2sad_map ${seg_dir} \ + ${seg_dir} ${data_dir}_seg + + cp $src_data_dir/wav.scp ${data_dir}_seg +fi + +exit 0 + +segments_opts="--single-speaker" + +if false; then + mkdir -p ${seg_dir}/post_process_${data_id} + echo $nj > ${seg_dir}/post_process_${data_id}/num_jobs + + $train_cmd JOB=1:$nj $seg_dir/log/convert_to_segments.JOB.log \ + segmentation-init-from-ali "ark:gunzip -c $seg_dir/ali.JOB.gz |" ark:- \| \ + segmentation-copy --label-map=$lang/phone2sad_map --frame-subsampling-factor=$frame_subsampling_factor ark:- ark:- \| \ + segmentation-to-segments --frame-overlap=0.02 $segments_opts ark:- \ + ark,t:${seg_dir}/post_process_${data_id}/utt2spk.JOB \ + ${seg_dir}/post_process_${data_id}/segments.JOB + + for n in `seq $nj`; do + cat ${seg_dir}/post_process_${data_id}/segments.$n + done > ${seg_dir}/post_process_${data_id}/segments + + for n in `seq $nj`; do + cat ${seg_dir}/post_process_${data_id}/utt2spk.$n + done > ${seg_dir}/post_process_${data_id}/utt2spk + + rm -r ${data_dir}_seg || true + mkdir -p ${data_dir}_seg + + utils/data/subsegment_data_dir.sh ${test_data_dir} \ + ${seg_dir}/post_process_${data_id}/segments ${data_dir}_seg + + cp ${src_data_dir}/wav.scp ${data_dir}_seg + cp ${seg_dir}/post_process_${data_id}/utt2spk ${data_dir}_seg + for f in stm glm reco2file_and_channel; do + [ -f $src_data_dir/$f ] && cp ${src_data_dir}/$f ${data_dir}_seg + done + + rm ${data_dir}/{cmvn.scp,spk2utt} || true + utils/fix_data_dir.sh ${data_dir}_seg +fi + +exit 0 + +# Subsegment data directory +if [ $stage -le 8 ]; then + utils/data/get_reco2num_frames.sh ${test_data_dir} + awk '{print $1" "$2}' ${data_dir}_seg/segments | \ + utils/apply_map.pl -f 2 ${test_data_dir}/reco2num_frames > \ + ${data_dir}_seg/utt2max_frames + + frame_shift_info=`cat $mfcc_config | steps/segmentation/get_frame_shift_info_from_config.pl` + utils/data/get_subsegment_feats.sh ${test_data_dir}/feats.scp \ + $frame_shift_info ${data_dir}_seg/segments | \ + utils/data/fix_subsegmented_feats.pl ${data_dir}_seg/utt2max_frames > \ + ${data_dir}_seg/feats.scp + steps/compute_cmvn_stats.sh --fake ${data_dir}_seg + + utils/fix_data_dir.sh ${data_dir}_seg +fi + + diff --git a/egs/wsj/s5/steps/segmentation/internal/prepare_simple_hmm_lang.py b/egs/wsj/s5/steps/segmentation/internal/prepare_simple_hmm_lang.py new file mode 100755 index 00000000000..eae0f142668 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/prepare_simple_hmm_lang.py @@ -0,0 +1,202 @@ +#! /usr/bin/env python + +from __future__ import print_function +import argparse +import logging +import os +import sys + +sys.path.insert(0, 'steps') +import libs.common as common_lib + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " + "%(funcName)s - %(levelname)s ] %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) + + +def get_args(): + parser = argparse.ArgumentParser( + description="""This script generates a lang directory for decoding with + simple HMM model. + It needs as an input classes_info file with the + format: + , + where each pair is :. + destination-class -1 is used to represent final probabilitiy.""") + + parser.add_argument("classes_info", type=argparse.FileType('r'), + help="File with classes_info") + parser.add_argument("dir", type=str, + help="Output lang directory") + args = parser.parse_args() + return args + + +class ClassInfo(object): + def __init__(self, class_id): + self.class_id = class_id + self.start_state = -1 + self.num_states = 0 + self.initial_prob = 0 + self.self_loop_prob= 0 + self.transitions = {} + + def __str__(self): + return ("class-id={0},start-state={1},num-states={2}," + "initial-prob={3:.2f},transitions={4}".format( + self.class_id, self.start_state, self.num_states, + self.initial_prob, ' '.join( + ['{0}:{1}'.format(x,y) + for x,y in self.transitions.iteritems()]))) + + +def read_classes_info(file_handle): + classes_info = {} + + num_states = 1 + num_classes = 0 + + for line in file_handle.readlines(): + try: + parts = line.split() + class_id = int(parts[0]) + assert class_id > 0, class_id + if class_id in classes_info: + raise RuntimeError( + "Duplicate class-id {0} in file {1}".format( + class_id, file_handle.name)) + classes_info[class_id] = ClassInfo(class_id) + class_info = classes_info[class_id] + class_info.initial_prob = float(parts[1]) + class_info.self_loop_prob = float(parts[2]) + class_info.num_states = int(parts[3]) + class_info.start_state = num_states + num_states += class_info.num_states + num_classes += 1 + + if len(parts) > 4: + for part in parts[4:]: + dest_class, transition_prob = part.split(':') + dest_class = int(dest_class) + if dest_class in class_info.transitions: + logger.error( + "Duplicate transition to class-id {0}" + "in transitions".format(dest_class)) + raise RuntimeError + class_info.transitions[dest_class] = float(transition_prob) + else: + raise RuntimeError( + "No transitions out of class {0}".format(class_id)) + except Exception: + logger.error("Error processing line %s in file %s", + line, file_handle.name) + raise + + # Final state + classes_info[-1] = ClassInfo(-1) + class_info = classes_info[-1] + class_info.num_states = 1 + class_info.start_state = num_states + + for class_id, class_info in classes_info.iteritems(): + logger.info("For class %d, dot class-info %s", class_id, class_info) + + return classes_info, num_classes + + +def print_states_for_class(class_id, classes_info, topo): + class_info = classes_info[class_id] + + assert class_info.num_states > 1, class_info + + for state in range(class_info.start_state, + class_info.start_state + class_info.num_states - 1): + print(" {state} {pdf}" + " {dest_state} 1.0 ".format( + state=state, dest_state=state + 1, + pdf=class_info.class_id - 1), + file=topo) + + state = class_info.start_state + class_info.num_states - 1 + + transitions = [] + + transitions.append(" {next_state} {next_prob}".format( + next_state=state, next_prob=class_info.self_loop_prob)) + + for dest_class, prob in class_info.transitions.iteritems(): + try: + next_state = classes_info[dest_class].start_state + + transitions.append(" {next_state} {next_prob}".format( + next_state=next_state, next_prob=prob)) + except Exception: + logger.error("Failed to add transition (%d->%d).\n" + "classes_info = %s", class_id, dest_class, + class_info) + + print(" {state} {pdf} " + "{transitions} ".format( + state=state, pdf=class_id - 1, + transitions=' '.join(transitions)), file=topo) + + +def main(): + try: + args = get_args() + run(args) + except Exception: + logger.error("Failed preparing lang directory") + raise + + +def run(args): + if not os.path.exists(args.dir): + os.makedirs(args.dir) + + classes_info, num_classes = read_classes_info(args.classes_info) + + topo = open("{0}/topo".format(args.dir), 'w') + + print ("", file=topo) + print ("", file=topo) + print ("", file=topo) + print ("1", file=topo) + print ("", file=topo) + + # Print transitions from initial state (initial probs) + transitions = [] + for class_id in range(1, num_classes + 1): + class_info = classes_info[class_id] + transitions.append(" {next_state} {next_prob}".format( + next_state=class_info.start_state, + next_prob=class_info.initial_prob)) + print(" 0 {transitions} ".format( + transitions=' '.join(transitions)), file=topo) + + for class_id in range(1, num_classes + 1): + print_states_for_class(class_id, classes_info, topo) + + print(" {state} ".format( + state=classes_info[-1].start_state), file=topo) + + print ("", file=topo) + print ("", file=topo) + topo.close() + + with open('{0}/phones.txt'.format(args.dir), 'w') as phones_f: + for class_id in range(1, num_classes + 1): + print ("{0} {1}".format(class_id - 1, class_id), file=phones_f) + + common_lib.force_symlink('{0}/phones.txt'.format(args.dir), + '{0}/words.txt'.format(args.dir)) + + +if __name__ == '__main__': + main() From a4b823c7c145ceae70a026ebf3a226277397a087 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:05:30 -0500 Subject: [PATCH 348/530] More segmentation script updated --- .../post_process_sad_to_subsegments.sh | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh b/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh index 0ca6b3dd126..d5ad48a492f 100755 --- a/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh +++ b/egs/wsj/s5/steps/segmentation/post_process_sad_to_subsegments.sh @@ -14,6 +14,7 @@ nj=18 frame_subsampling_factor=1 frame_shift=0.01 +frame_overlap=0.015 . utils/parse_options.sh @@ -56,21 +57,32 @@ if [ $stage -le 1 ]; then fi if [ $stage -le 2 ]; then + # --frame-overlap is set to 0 to not do any additional padding when writing + # segments. This padding will be done later by the option + # --segment-end-padding to utils/data/subsegment_data_dir.sh. steps/segmentation/internal/post_process_segments.sh \ --stage $stage --cmd "$cmd" \ --config $segmentation_config --frame-shift $frame_shift \ + --frame-overlap 0 \ $data_dir $dir $segmented_data_dir fi mv $segmented_data_dir/segments $segmented_data_dir/sub_segments -utils/data/subsegment_data_dir.sh $data_dir $segmented_data_dir/sub_segments $segmented_data_dir +utils/data/subsegment_data_dir.sh --segment-end-padding `perl -e "print $frame_overlap"` \ + $data_dir $segmented_data_dir/sub_segments $segmented_data_dir +utils/fix_data_dir.sh $segmented_data_dir -utils/data/get_reco2num_frames.sh ${data_dir} +utils/data/get_reco2num_frames.sh --nj $nj --cmd "$cmd" ${data_dir} mv $segmented_data_dir/feats.scp $segmented_data_dir/feats.scp.tmp -cat $segmented_data_dir/segments | utils/apply_map.pl -f 2 $data_dir/reco2num_frames > $segmetned_data_dir/utt2max_frames -cat $segmented_data_dir/feats.scp.tmp | utils/data/fix_subsegmented_feats.pl $dsegmented_data_dir/utt2max_frames > $segmented_data_dir/feats.scp - -utils/utt2spk_to_spk2utt.pl $segmented_data_dir/utt2spk > $segmented_data_dir/spk2utt || exit 1 +cat $segmented_data_dir/segments | awk '{print $1" "$2}' | \ + utils/apply_map.pl -f 2 $data_dir/reco2num_frames > \ + $segmented_data_dir/utt2max_frames +cat $segmented_data_dir/feats.scp.tmp | \ + utils/data/fix_subsegmented_feats.pl $segmented_data_dir/utt2max_frames > \ + $segmented_data_dir/feats.scp + +utils/utt2spk_to_spk2utt.pl $segmented_data_dir/utt2spk > \ + $segmented_data_dir/spk2utt || exit 1 utils/fix_data_dir.sh $segmented_data_dir if [ ! -s $segmented_data_dir/utt2spk ] || [ ! -s $segmented_data_dir/segments ]; then From dd51f1ce9ee2c6f5f0467a14e38b6320940fd1b5 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:06:45 -0500 Subject: [PATCH 349/530] subsegment_data_dir fix --- egs/wsj/s5/utils/data/fix_subsegmented_feats.pl | 2 +- egs/wsj/s5/utils/data/get_subsegment_feats.sh | 1 - egs/wsj/s5/utils/data/subsegment_data_dir.sh | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) delete mode 120000 egs/wsj/s5/utils/data/get_subsegment_feats.sh diff --git a/egs/wsj/s5/utils/data/fix_subsegmented_feats.pl b/egs/wsj/s5/utils/data/fix_subsegmented_feats.pl index bd8aeb8e409..b0cece46ca8 100755 --- a/egs/wsj/s5/utils/data/fix_subsegmented_feats.pl +++ b/egs/wsj/s5/utils/data/fix_subsegmented_feats.pl @@ -49,7 +49,7 @@ my @F = split(/ /, $before_range); my $utt = shift @F; - defined $utt2max_frames{$utt} or die "fix_subsegmented_feats.pl: Could not find key $utt in $utt2num_frames_file.\nError with line $line"; + defined $utt2max_frames{$utt} or die "fix_subsegmented_feats.pl: Could not find key $utt in $utt2max_frames_file.\nError with line $line"; if ($range !~ m/^(\d*):(\d*)([,]?.*)$/) { print STDERR "fix_subsegmented_feats.pl: could not make sense of input line $_"; diff --git a/egs/wsj/s5/utils/data/get_subsegment_feats.sh b/egs/wsj/s5/utils/data/get_subsegment_feats.sh deleted file mode 120000 index c1495ea63ff..00000000000 --- a/egs/wsj/s5/utils/data/get_subsegment_feats.sh +++ /dev/null @@ -1 +0,0 @@ -get_subsegmented_feats.sh \ No newline at end of file diff --git a/egs/wsj/s5/utils/data/subsegment_data_dir.sh b/egs/wsj/s5/utils/data/subsegment_data_dir.sh index b018d5ec94a..10a8a9cb264 100755 --- a/egs/wsj/s5/utils/data/subsegment_data_dir.sh +++ b/egs/wsj/s5/utils/data/subsegment_data_dir.sh @@ -202,6 +202,7 @@ utils/data/fix_data_dir.sh $dir validate_opts= [ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats" [ ! -f $srcdir/wav.scp ] && validate_opts="$validate_opts --no-wav" +$no_text && validate_opts="$validate_opts --no-text" utils/data/validate_data_dir.sh $validate_opts $dir From 310f42e71973e1624f848dd7584e540ac5c33097 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:07:15 -0500 Subject: [PATCH 350/530] asr_diarization: Update get_sad_map --- egs/wsj/s5/steps/segmentation/get_sad_map.py | 42 +++++--------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/egs/wsj/s5/steps/segmentation/get_sad_map.py b/egs/wsj/s5/steps/segmentation/get_sad_map.py index 9160503c7ad..222e6c1a512 100755 --- a/egs/wsj/s5/steps/segmentation/get_sad_map.py +++ b/egs/wsj/s5/steps/segmentation/get_sad_map.py @@ -20,34 +20,10 @@ """ import argparse +import sys - -class StrToBoolAction(argparse.Action): - """ A custom action to convert bools from shell format i.e., true/false - to python format i.e., True/False """ - def __call__(self, parser, namespace, values, option_string=None): - try: - if values == "true": - setattr(namespace, self.dest, True) - elif values == "true": - setattr(namespace, self.dest, False) - else: - raise ValueError - except ValueError: - raise Exception("Unknown value {0} for --{1}".format(values, - self.dest)) - - -class NullstrToNoneAction(argparse.Action): - """ A custom action to convert empty strings passed by shell - to None in python. This is necessary as shell scripts print null - strings when a variable is not specified. We could use the more apt - None in python. """ - def __call__(self, parser, namespace, values, option_string=None): - if values.strip() == "": - setattr(namespace, self.dest, None) - else: - setattr(namespace, self.dest, values) +sys.path.insert(0, 'steps') +import libs.common as common_lib def get_args(): @@ -71,7 +47,7 @@ def get_args(): or noise phones to separate SAD labels. """) - parser.add_argument("--init-sad-map", type=str, action=NullstrToNoneAction, + parser.add_argument("--init-sad-map", type=str, action=common_lib.NullstrToNoneAction, help="""Initial SAD map that will be used to override the default mapping using phones/silence.txt and phones/nonsilence.txt. Does not need to specify labels @@ -82,24 +58,24 @@ def get_args(): noise_group = parser.add_mutually_exclusive_group() noise_group.add_argument("--noise-phones-file", type=str, - action=NullstrToNoneAction, + action=common_lib.NullstrToNoneAction, help="Map noise phones from file to label 2") noise_group.add_argument("--noise-phones-list", type=str, - action=NullstrToNoneAction, + action=common_lib.NullstrToNoneAction, help="A colon-separated list of noise phones to " "map to label 2") - parser.add_argument("--unk", type=str, action=NullstrToNoneAction, + parser.add_argument("--unk", type=str, action=common_lib.NullstrToNoneAction, help="""UNK phone, if provided will be mapped to label 3""") parser.add_argument("--map-noise-to-sil", type=str, - action=StrToBoolAction, + action=common_lib.StrToBoolAction, choices=["true", "false"], default=False, help="""Map noise phones to silence before writing the map. i.e. anything with label 2 is mapped to label 0.""") parser.add_argument("--map-unk-to-speech", type=str, - action=StrToBoolAction, + action=common_lib.StrToBoolAction, choices=["true", "false"], default=False, help="""Map UNK phone to speech before writing the map i.e. anything with label 3 is mapped to label 1.""") From c9a44e0332089365f4660dec77735b49f0c2a62f Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:07:35 -0500 Subject: [PATCH 351/530] asr_diarization: downsample_data_dir.sh perturb_data_dir_speed_random.sh --- egs/wsj/s5/utils/data/downsample_data_dir.sh | 34 +++++++++++++ .../data/perturb_data_dir_speed_random.sh | 51 +++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100755 egs/wsj/s5/utils/data/downsample_data_dir.sh create mode 100755 egs/wsj/s5/utils/data/perturb_data_dir_speed_random.sh diff --git a/egs/wsj/s5/utils/data/downsample_data_dir.sh b/egs/wsj/s5/utils/data/downsample_data_dir.sh new file mode 100755 index 00000000000..022af67d265 --- /dev/null +++ b/egs/wsj/s5/utils/data/downsample_data_dir.sh @@ -0,0 +1,34 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +if [ $# -ne 2 ]; then + echo "Usage: $0 " + exit 1 +fi + +freq=$1 +dir=$2 + +sox=`which sox` || { echo "Could not find sox in PATH"; exit 1; } + +if [ -f $dir/feats.scp ]; then + mkdir -p $dir/.backup + mv $dir/feats.scp $dir/.backup/ + if [ -f $dir/cmvn.scp ]; then + mv $dir/cmvn.scp $dir/.backup/ + fi + echo "$0: feats.scp already exists. Moving it to $dir/.backup" +fi + +mv $dir/wav.scp $dir/wav.scp.tmp +cat $dir/wav.scp.tmp | python -c "import sys +for line in sys.stdin.readlines(): + splits = line.strip().split() + if splits[-1] == '|': + out_line = line.strip() + ' $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |' + else: + out_line = 'cat {0} {1} | $sox -t wav - -r $freq -c 1 -b 16 -t wav - downsample |'.format(splits[0], ' '.join(splits[1:])) + print (out_line)" > ${dir}/wav.scp +rm $dir/wav.scp.tmp diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_speed_random.sh b/egs/wsj/s5/utils/data/perturb_data_dir_speed_random.sh new file mode 100755 index 00000000000..d9d027b77a3 --- /dev/null +++ b/egs/wsj/s5/utils/data/perturb_data_dir_speed_random.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Copyright 2017 Vimal Manohar + +# Apache 2.0 + +. utils/parse_options.sh + +if [ $# != 2 ]; then + echo "Usage: perturb_data_dir_speed_random.sh " + echo "Applies 3-way speed perturbation using factors of 0.9, 1.0 and 1.1 on random subsets." + echo "e.g.:" + echo " $0 data/train data/train_spr" + echo "Note: if /feats.scp already exists, this will refuse to run." + exit 1 +fi + +srcdir=$1 +destdir=$2 + +if [ ! -f $srcdir/wav.scp ]; then + echo "$0: expected $srcdir/wav.scp to exist" + exit 1 +fi + +if [ -f $destdir/feats.scp ]; then + echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)" + exit 1 +fi + +echo "$0: making sure the utt2dur file is present in ${srcdir}, because " +echo "... obtaining it after speed-perturbing would be very slow, and" +echo "... you might need it." +utils/data/get_utt2dur.sh ${srcdir} + +utils/split_data.sh --per-reco $srcdir 3 + +utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir}/split3reco/1 ${destdir}_speed0.9 || exit 1 +utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir}/split3reco/3 ${destdir}_speed1.1 || exit 1 +utils/data/combine_data.sh $destdir ${srcdir}/split3reco/2 ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1 + +rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 + +echo "$0: generated 3-way speed-perturbed version of random subsets of data in $srcdir, in $destdir" +if [ -f $srcdir/text ]; then + utils/validate_data_dir.sh --no-feats $destdir +else + utils/validate_data_dir.sh --no-feats --no-text $destdir +fi + + From 0e276b309dde3efb1c9a2657948ba4dde294e77d Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:08:02 -0500 Subject: [PATCH 352/530] asr_diarization: normalize_data_range.pl --- egs/wsj/s5/utils/data/normalize_data_range.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/utils/data/normalize_data_range.pl b/egs/wsj/s5/utils/data/normalize_data_range.pl index a7a144fd82e..61ccfd593f7 100755 --- a/egs/wsj/s5/utils/data/normalize_data_range.pl +++ b/egs/wsj/s5/utils/data/normalize_data_range.pl @@ -51,7 +51,7 @@ sub combine_ranges { if ($start1 + $end2 > $end1) { chop $line; print STDERR ("normalize_data_range.pl: could not make sense of line $line " . - "[second $row_or_column range too large vs first range, $start1 + $end2 > $end1]\n"); + "[second $row_or_column range too large vs first range, $start1 + $end2 > $end1]; adjusting end.\n"); } return ($start2+$start1, $end2+$start1); } @@ -75,7 +75,7 @@ sub combine_ranges { "if concat-feats was in the input data\n"; exit(1); } - print STDERR "matched: $before_range $first_range $second_range\n"; + # print STDERR "matched: $before_range $first_range $second_range\n"; if ($first_range !~ m/^((\d*):(\d*)|)(,(\d*):(\d*)|)$/) { print STDERR "normalize_data_range.pl: could not make sense of input line $_"; exit(1); From 4637f02ad5733f572879cb12c252e4b166dd276c Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:08:23 -0500 Subject: [PATCH 353/530] asr_diarization: Add reco2utt to split_data.sh --- egs/wsj/s5/utils/split_data.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/utils/split_data.sh b/egs/wsj/s5/utils/split_data.sh index 646830481db..f90ce9e6759 100755 --- a/egs/wsj/s5/utils/split_data.sh +++ b/egs/wsj/s5/utils/split_data.sh @@ -27,14 +27,17 @@ elif [ "$1" == "--per-reco" ]; then fi if [ $# != 2 ]; then - echo "Usage: $0 [--per-utt] " + echo "Usage: $0 [--per-utt|--per-reco] " echo "E.g.: $0 data/train 50" echo "It creates its output in e.g. data/train/split50/{1,2,3,...50}, or if the " echo "--per-utt option was given, in e.g. data/train/split50utt/{1,2,3,...50}." + echo "If the --per-reco option was given, in e.g. data/train/split50reco/{1,2,3,...50}." echo "" echo "This script will not split the data-dir if it detects that the output is newer than the input." echo "By default it splits per speaker (so each speaker is in only one split dir)," echo "but with the --per-utt option it will ignore the speaker information while splitting." + echo "But if --per-reco option is given, it splits per recording " + echo "(so each recording is in only one split dir)" exit 1 fi @@ -133,7 +136,7 @@ if [ ! -f $data/segments ]; then fi # split some things that are indexed by utterance. -for f in feats.scp text vad.scp utt2lang $maybe_wav_scp; do +for f in feats.scp text vad.scp utt2lang $maybe_wav_scp utt2dur utt2num_frames; do if [ -f $data/$f ]; then utils/filter_scps.pl JOB=1:$numsplit \ $data/split${numsplit}${utt}/JOB/utt2spk $data/$f $data/split${numsplit}${utt}/JOB/$f || exit 1; @@ -168,6 +171,12 @@ if [ -f $data/segments ]; then $data/split${numsplit}${utt}/JOB/tmp.reco $data/wav.scp \ $data/split${numsplit}${utt}/JOB/wav.scp || exit 1 fi + if [ -f $data/reco2utt ]; then + utils/filter_scps.pl JOB=1:$numsplit \ + $data/split${numsplit}${utt}/JOB/tmp.reco $data/reco2utt \ + $data/split${numsplit}${utt}/JOB/reco2utt || exit 1 + fi + for f in $data/split${numsplit}${utt}/*/tmp.reco; do rm $f; done fi From bf1647b3bb0194b3a770074621b1fc14d0898bb2 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:08:55 -0500 Subject: [PATCH 354/530] asr_diarization: Possibly deprecated update to do_segmentation_data_dir.sh --- egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh index c1e690af366..9e95cca9cc0 100755 --- a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh +++ b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh @@ -107,11 +107,12 @@ if [ $stage -le 2 ]; then --frames-per-chunk 150 \ --stage $sad_stage --output-name $output_name \ --frame-subsampling-factor $frame_subsampling_factor \ - --get-raw-nnet-from-am false ${test_data_dir} $sad_nnet_dir $sad_dir + --use-raw-nnet true ${test_data_dir} $sad_nnet_dir $sad_dir fi if [ $stage -le 3 ]; then steps/segmentation/decode_sad_to_segments.sh \ + --use-unigram-lm false \ --frame-subsampling-factor $frame_subsampling_factor \ --min-silence-duration $min_silence_duration \ --min-speech-duration $min_speech_duration \ From b63787afef3b8b4c4fbd5621164e5d54df7a8ae9 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:09:34 -0500 Subject: [PATCH 355/530] asr_diarization: Minor logging to nnet3-copy-egs --- src/nnet3bin/nnet3-copy-egs.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nnet3bin/nnet3-copy-egs.cc b/src/nnet3bin/nnet3-copy-egs.cc index 5189ee4046f..13d9a0d6a15 100644 --- a/src/nnet3bin/nnet3-copy-egs.cc +++ b/src/nnet3bin/nnet3-copy-egs.cc @@ -429,6 +429,7 @@ int main(int argc, char *argv[]) { // count is normally 1; could be 0, or possibly >1. int32 count = GetCount(keep_proportion); std::string key = example_reader.Key(); + KALDI_VLOG(2) << "Copying eg " << key; NnetExample eg(example_reader.Value()); if (!keep_outputs_str.empty()) { From ea5004233def84a1f0a6175fc9aea5e25b6dbb9a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:10:36 -0500 Subject: [PATCH 356/530] asr_diarization: Partial update to aspire segmentation --- .../nnet3/prep_test_aspire_segmentation.sh | 78 +------------------ 1 file changed, 3 insertions(+), 75 deletions(-) diff --git a/egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh b/egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh index e7f70c0c07f..266781fc84d 100755 --- a/egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh +++ b/egs/aspire/s5/local/nnet3/prep_test_aspire_segmentation.sh @@ -78,13 +78,13 @@ if [ $stage -le 1 ]; then --mfcc-config conf/mfcc_hires_bp.conf --feat-affix bp --iter $sad_iter \ --do-downsampling false --extra-left-context 100 --extra-right-context 20 \ --output-name output-speech --frame-subsampling-factor 6 \ - data/${data_set} $sad_nnet_dir mfcc_hires_bp data/${data_set} + data/${data_set} $sad_nnet_dir mfcc_hires_bp data/${data_set}${affix} # Output will be in data/${data_set}_seg fi # uniform segmentation script would have created this dataset # so update that script if you plan to change this variable -segmented_data_set=${data_set}_seg +segmented_data_set=${data_set}${affix}_seg if [ $stage -le 2 ]; then mfccdir=mfcc_reverb @@ -103,79 +103,7 @@ if [ $stage -le 2 ]; then utils/validate_data_dir.sh --no-text data/${segmented_data_set}_hires fi -decode_dir=$dir/decode_${segmented_data_set}${affix}_pp -false && { -if [ $stage -le 2 ]; then - echo "Extracting i-vectors, stage 1" - steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \ - --max-count $max_count \ - data/${segmented_data_set}_hires $ivector_dir/extractor \ - $ivector_dir/ivectors_${segmented_data_set}${ivector_affix}_stage1; - # float comparisons are hard in bash - if [ `bc <<< "$ivector_scale != 1"` -eq 1 ]; then - ivector_scale_affix=_scale$ivector_scale - else - ivector_scale_affix= - fi - - if [ ! -z "$ivector_scale_affix" ]; then - echo "$0: Scaling iVectors, stage 1" - srcdir=$ivector_dir/ivectors_${segmented_data_set}${ivector_affix}_stage1 - outdir=$ivector_dir/ivectors_${segmented_data_set}${ivector_affix}${ivector_scale_affix}_stage1 - mkdir -p $outdir - copy-matrix --scale=$ivector_scale scp:$srcdir/ivector_online.scp ark:- | \ - copy-feats --compress=true ark:- ark,scp:$outdir/ivector_online.ark,$outdir/ivector_online.scp; - cp $srcdir/ivector_period $outdir/ivector_period - fi -fi - -# generate the lattices -if [ $stage -le 3 ]; then - echo "Generating lattices, stage 1" - steps/nnet3/decode.sh --nj $decode_num_jobs --cmd "$decode_cmd" --config conf/decode.config \ - --acwt $acwt --post-decode-acwt $post_decode_acwt \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --frames-per-chunk "$frames_per_chunk" \ - --online-ivector-dir $ivector_dir/ivectors_${segmented_data_set}${ivector_affix}${ivector_scale_affix}_stage1 \ - --skip-scoring true --iter $iter \ - $graph data/${segmented_data_set}_hires ${decode_dir}_stage1; -fi - -if [ $stage -le 4 ]; then - if $filter_ctm; then - if [ ! -z $weights_file ]; then - echo "$0: Using provided vad weights file $weights_file" - ivector_extractor_input=$weights_file - else - echo "$0 : Generating vad weights file" - ivector_extractor_input=${decode_dir}_stage1/weights${affix}.gz - local/extract_vad_weights.sh --cmd "$decode_cmd" --iter $iter \ - data/${segmented_data_set}_hires $lang \ - ${decode_dir}_stage1 $ivector_extractor_input - fi - else - # just use all the frames - ivector_extractor_input=${decode_dir}_stage1 - fi -fi - -if [ $stage -le 5 ]; then - echo "Extracting i-vectors, stage 2 with input $ivector_extractor_input" - # this does offline decoding, except we estimate the iVectors per - # speaker, excluding silence (based on alignments from a DNN decoding), with a - # different script. This is just to demonstrate that script. - # the --sub-speaker-frames is optional; if provided, it will divide each speaker - # up into "sub-speakers" of at least that many frames... can be useful if - # acoustic conditions drift over time within the speaker's data. - steps/online/nnet2/extract_ivectors.sh --cmd "$train_cmd" --nj 20 \ - --silence-weight $silence_weight \ - --sub-speaker-frames $sub_speaker_frames --max-count $max_count \ - data/${segmented_data_set}_hires $lang $ivector_dir/extractor \ - $ivector_extractor_input $ivector_dir/ivectors_${segmented_data_set}${ivector_affix}; -fi -} - +decode_dir=$dir/decode_${segmented_data_set}_pp if [ $stage -le 5 ]; then echo "Extracting i-vectors, stage 2" # this does offline decoding, except we estimate the iVectors per From 6a0fca91e183c5ae0b1365a3374cd5376ff64430 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:11:09 -0500 Subject: [PATCH 357/530] asr_diarization: Update overlapping speech detection in ami --- .../s5/local/segmentation/prepare_ami.sh | 136 ++++++++++-------- .../prepare_babel_data_overlapped_speech.sh | 10 +- ...are_unsad_overlapped_speech_data_simple.sh | 5 +- 3 files changed, 81 insertions(+), 70 deletions(-) diff --git a/egs/aspire/s5/local/segmentation/prepare_ami.sh b/egs/aspire/s5/local/segmentation/prepare_ami.sh index 38ed9559c89..7147a3004cb 100755 --- a/egs/aspire/s5/local/segmentation/prepare_ami.sh +++ b/egs/aspire/s5/local/segmentation/prepare_ami.sh @@ -112,87 +112,97 @@ if [ $stage -le 6 ]; then --cmd queue.pl --nj $nj \ $src_dir/data/sdm1/${dataset} - # Get a filter that selects only regions within the manual segments. - $train_cmd $dir/log/get_manual_segments_regions.log \ - segmentation-init-from-segments --shift-to-zero=false $src_dir/data/sdm1/${dataset}/segments ark:- \| \ - segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/${dataset}/reco2utt ark:- \| \ - segmentation-create-subsegments --filter-label=1 --subsegment-label=1 \ - "ark:segmentation-init-from-lengths --label=0 ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames ark:- |" ark:- ark,t:- \| \ - perl -ane '$F[3] = 10000; $F[$#F-1] = 10000; print join(" ", @F) . "\n";' \| \ - segmentation-create-subsegments --filter-label=10000 --subsegment-label=10000 \ - ark,t:- "ark:gunzip -c $dir/ref_spk_seg.gz |" ark:- \| \ - segmentation-post-process --merge-labels=0:1 --merge-dst-label=1 ark:- ark:- \| \ - segmentation-post-process --merge-labels=10000 --merge-dst-label=0 --merge-adjacent-segments \ - --max-intersegment-length=10000 ark,t:- \ - "ark:| gzip -c > $dir/manual_segments_regions.seg.gz" + ## Get a filter that selects only regions within the manual segments. + #$train_cmd $dir/log/get_manual_segments_regions.log \ + # segmentation-init-from-segments --shift-to-zero=false $src_dir/data/sdm1/${dataset}/segments ark:- \| \ + # segmentation-combine-segments-to-recordings ark:- ark,t:$src_dir/data/sdm1/${dataset}/reco2utt ark:- \| \ + # segmentation-create-subsegments --filter-label=1 --subsegment-label=1 \ + # "ark:segmentation-init-from-lengths --label=0 ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames ark:- |" ark:- ark,t:- \| \ + # perl -ane '$F[3] = 10000; $F[$#F-1] = 10000; print join(" ", @F) . "\n";' \| \ + # segmentation-create-subsegments --filter-label=10000 --subsegment-label=10000 \ + # ark,t:- "ark:gunzip -c $dir/ref_spk_seg.gz |" ark:- \| \ + # segmentation-post-process --merge-labels=0:1 --merge-dst-label=1 ark:- ark:- \| \ + # segmentation-post-process --merge-labels=10000 --merge-dst-label=0 --merge-adjacent-segments \ + # --max-intersegment-length=10000 ark,t:- \ + # "ark:| gzip -c > $dir/manual_segments_regions.seg.gz" fi if [ $stage -le 7 ]; then - # To get the actual RTTM, we need to add no-score - $train_cmd $dir/log/get_ref_rttm.log \ + $train_cmd $dir/log/get_overlap_sad_seg.log \ segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ - "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0:10000 ark:- ark:- |" \ - ark:/dev/null ark:- \| \ - segmentation-init-from-ali ark:- ark:- \| \ - segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 \ - --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ - segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ - ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ - segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ - segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ - --no-score-label=10000 ark:- $dir/ref.rttm + "ark:gunzip -c $dir/ref_spk_seg.gz |" \ + ark:/dev/null ark:/dev/null ark:- \| \ + classes-per-frame-to-labels --junk-label=10000 ark:- ark:- \| \ + segmentation-init-from-ali ark:- \ + "ark:| gzip -c > $dir/overlap_sad_seg.gz" fi - if [ $stage -le 8 ]; then + # To get the actual RTTM, we need to add no-score + $train_cmd $dir/log/get_ref_rttm.log \ + gunzip -c $dir/overlap_sad_seg.gz \| \ + segmentation-post-process --merge-labels=1:2 --merge-dst-label=1 \ + ark:- ark:- \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 ark:- $dir/ref.rttm + # Get RTTM for overlapped speech detection with 3 classes # 0 -> SILENCE, 1 -> SINGLE_SPEAKER, 2 -> OVERLAP - $train_cmd $dir/log/get_overlapping_rttm.log \ - segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ - "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0:10000 ark:- ark:- |" \ - ark:/dev/null ark:- \| \ - segmentation-init-from-ali ark:- ark:- \| \ - segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 \ - --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ - segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ - ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ - segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ - segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ - --no-score-label=10000 ark:- $dir/overlapping_speech_ref.rttm + $train_cmd $dir/log/get_ref_rttm.log \ + gunzip -c $dir/overlap_sad_seg.gz \| \ + segmentation-to-rttm --reco2file-and-channel=$dir/reco2file_and_channel \ + --no-score-label=10000 --map-to-speech-and-sil=false ark:- $dir/overlapping_speech_ref.rttm fi + +#if [ $stage -le 8 ]; then +# # Get RTTM for overlapped speech detection with 3 classes +# # 0 -> SILENCE, 1 -> SINGLE_SPEAKER, 2 -> OVERLAP +# $train_cmd $dir/log/get_overlapping_rttm.log \ +# segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ +# "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0:10000 ark:- ark:- |" \ +# ark:/dev/null ark:- \| \ +# segmentation-init-from-ali ark:- ark:- \| \ +# segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 \ +# --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ +# segmentation-create-subsegments --filter-label=0 --subsegment-label=10000 \ +# ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ +# segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ +# segmentation-to-rttm --map-to-speech-and-sil=false --reco2file-and-channel=$dir/reco2file_and_channel \ +# --no-score-label=10000 ark:- $dir/overlapping_speech_ref.rttm +#fi + +# make $dir an absolute pathname. +dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}` + if [ $stage -le 9 ]; then # Get a filter that selects only regions of speech $train_cmd $dir/log/get_speech_filter.log \ - segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ - "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0:10000 ark:- ark:- |" \ - ark:/dev/null ark:- \| \ - segmentation-init-from-ali ark:- ark:- \| \ - segmentation-post-process --merge-labels=1:2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ - segmentation-create-subsegments --filter-label=0 --subsegment-label=0 \ - ark:- "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark:- \| \ - segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 \ - ark:- "ark:| gzip -c > $dir/manual_segments_speech_regions.seg.gz" + gunzip -c $dir/overlap_sad_seg.gz \| \ + segmentation-post-process --merge-labels=1:2 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --remove-labels=10000 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ + ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| \ + copy-vector ark,t: ark,scp:$dir/deriv_weights_for_overlapping_sad.ark,$dir/deriv_weights_for_overlapping_sad.scp + + # Get deriv weights + $train_cmd $dir/log/get_speech_filter.log \ + gunzip -c $dir/overlap_sad_seg.gz \| \ + segmentation-post-process --merge-labels=0:1:2 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --remove-labels=10000 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ + ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| \ + copy-vector ark,t: ark,scp:$dir/deriv_weights.ark,$dir/deriv_weights.scp fi -# make $dir an absolute pathname. -dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}` - if [ $stage -le 10 ]; then $train_cmd $dir/log/get_overlapping_sad.log \ - segmentation-get-stats --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ - "ark:gunzip -c $dir/ref_spk_seg.gz | segmentation-post-process --remove-labels=0:10000 ark:- ark:- |" \ - ark:/dev/null ark:- \| \ - segmentation-init-from-ali ark:- ark:- \| \ - segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=2 \ - --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ - segmentation-post-process --merge-adjacent-segments --max-intersegment-length=10000 ark:- ark:- \| \ - segmentation-to-ali ark:- ark,scp:$dir/overlapping_sad_labels.ark,$dir/overlapping_sad_labels.scp - - $train_cmd $dir/log/get_deriv_weights_for_overlapping_sad.log \ - segmentation-to-ali "ark:gunzip -c $dir/manual_segments_regions.seg.gz |" ark,t:- \| \ - steps/segmentation/convert_ali_to_vec.pl \| \ - copy-vector ark,t: ark,scp:$dir/deriv_weights_for_overlapping_sad.ark,$dir/deriv_weights_for_overlapping_sad.scp + gunzip -c $dir/overlap_sad_seg.gz \| \ + segmentation-post-process --remove-labels=10000 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$src_dir/data/sdm1/${dataset}/reco2num_frames \ + ark:- ark,scp:$dir/overlapping_sad_labels.ark,$dir/overlapping_sad_labels.scp fi if false && [ $stage -le 11 ]; then diff --git a/egs/aspire/s5/local/segmentation/prepare_babel_data_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/prepare_babel_data_overlapped_speech.sh index 2136f42f322..a3e087d95ec 100644 --- a/egs/aspire/s5/local/segmentation/prepare_babel_data_overlapped_speech.sh +++ b/egs/aspire/s5/local/segmentation/prepare_babel_data_overlapped_speech.sh @@ -14,7 +14,7 @@ set -o pipefail set -u lang_id=assamese -subset=25 # Number of recordings to keep before speed perturbation and corruption +subset=150 # Number of recordings to keep before speed perturbation and corruption utt_subset=30000 # Number of utterances to keep after speed perturbation for adding overlapped-speech # All the paths below can be modified to any absolute path. @@ -88,15 +88,15 @@ fi reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp # Add noise from MUSAN corpus to data directory and create a new data directory -local/segmentation/do_corruption_data_dir.sh +local/segmentation/do_corruption_data_dir.sh \ --data-dir $data_dir \ - --reco-vad-dir $reco_vad_dir + --reco-vad-dir $reco_vad_dir \ --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf # Add music from MUSAN corpus to data directory and create a new data directory -local/segmentation/do_corruption_data_dir_music.sh +local/segmentation/do_corruption_data_dir_music.sh \ --data-dir $data_dir \ - --reco-vad-dir $reco_vad_dir + --reco-vad-dir $reco_vad_dir \ --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf if [ ! -z $utt_subset ]; then diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data_simple.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data_simple.sh index 73f2abca566..80810afd619 100755 --- a/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data_simple.sh +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_overlapped_speech_data_simple.sh @@ -68,8 +68,9 @@ if [ $stage -le 1 ]; then segmentation-init-from-additive-signals-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ --junk-label=10000 \ --additive-signals-segmentation-rspecifier=scp:$utt_vad_dir/sad_seg.scp \ - "scp:utils/filter_scp.pl ${corrupted_data_dir}/split${nj}/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" \ - ark,t:$orig_corrupted_data_dir/overlapped_segments_info.txt ark:- \| \ + "ark,t:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${reco_nj}reco/JOB/reco2utt $orig_corrupted_data_dir/overlapped_segments_info.txt |" \ + ark:- \| \ + segmentation-merge "scp:utils/filter_scp.pl ${corrupted_data_dir}/split${nj}/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" ark:- ark:- \| \ segmentation-get-stats --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ ark:- ark:/dev/null ark:/dev/null ark:- \| \ classes-per-frame-to-labels --junk-label=10000 ark:- ark:- \| \ From fd96de7a8867dc455579cdc62ad5f48076c9fdc4 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:11:34 -0500 Subject: [PATCH 358/530] asr_diarization: Add simplehmmbin to common_path --- src/simplehmmbin/Makefile | 23 +++ .../compile-train-simple-hmm-graphs.cc | 151 ++++++++++++++++++ src/simplehmmbin/make-simple-hmm-graph.cc | 87 ++++++++++ src/simplehmmbin/simple-hmm-acc-stats-ali.cc | 88 ++++++++++ src/simplehmmbin/simple-hmm-align-compiled.cc | 131 +++++++++++++++ src/simplehmmbin/simple-hmm-est.cc | 86 ++++++++++ src/simplehmmbin/simple-hmm-init.cc | 70 ++++++++ tools/config/common_path.sh | 3 +- 8 files changed, 638 insertions(+), 1 deletion(-) create mode 100644 src/simplehmmbin/Makefile create mode 100644 src/simplehmmbin/compile-train-simple-hmm-graphs.cc create mode 100644 src/simplehmmbin/make-simple-hmm-graph.cc create mode 100644 src/simplehmmbin/simple-hmm-acc-stats-ali.cc create mode 100644 src/simplehmmbin/simple-hmm-align-compiled.cc create mode 100644 src/simplehmmbin/simple-hmm-est.cc create mode 100644 src/simplehmmbin/simple-hmm-init.cc diff --git a/src/simplehmmbin/Makefile b/src/simplehmmbin/Makefile new file mode 100644 index 00000000000..f382b30277c --- /dev/null +++ b/src/simplehmmbin/Makefile @@ -0,0 +1,23 @@ + +all: +EXTRA_CXXFLAGS = -Wno-sign-compare +include ../kaldi.mk + +BINFILES = simple-hmm-init \ + compile-train-simple-hmm-graphs simple-hmm-align-compiled \ + simple-hmm-acc-stats-ali simple-hmm-est make-simple-hmm-graph + + +OBJFILES = + +ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a \ + ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a \ + ../simplehmm/kaldi-simplehmm.a\ + ../util/kaldi-util.a ../thread/kaldi-thread.a \ + ../matrix/kaldi-matrix.a ../base/kaldi-base.a + + +TESTFILES = + +include ../makefiles/default_rules.mk + diff --git a/src/simplehmmbin/compile-train-simple-hmm-graphs.cc b/src/simplehmmbin/compile-train-simple-hmm-graphs.cc new file mode 100644 index 00000000000..a1914ed0763 --- /dev/null +++ b/src/simplehmmbin/compile-train-simple-hmm-graphs.cc @@ -0,0 +1,151 @@ +// bin/compile-train-simple-hmm-graphs.cc + +// Copyright 2009-2012 Microsoft Corporation +// 2012-2015 Johns Hopkins University (Author: Daniel Povey) +// 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "tree/context-dep.h" +#include "simplehmm/simple-hmm.h" +#include "fstext/fstext-lib.h" +#include "decoder/simple-hmm-graph-compiler.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + typedef kaldi::int32 int32; + using fst::SymbolTable; + using fst::VectorFst; + using fst::StdArc; + + const char *usage = + "Creates training graphs (without transition-probabilities, by default)\n" + "for training SimpleHmm models using alignments of pdf-ids.\n" + "Usage: compile-train-simple-hmm-graphs [options] " + " \n" + "e.g.: \n" + " compile-train-simple-hmm-graphs 1.mdl ark:train.tra ark:graphs.fsts\n"; + ParseOptions po(usage); + + SimpleHmmGraphCompilerOptions gopts; + int32 batch_size = 250; + gopts.transition_scale = 0.0; // Change the default to 0.0 since we will generally add the + // transition probs in the alignment phase (since they change eacm time) + gopts.self_loop_scale = 0.0; // Ditto for self-loop probs. + std::string disambig_rxfilename; + gopts.Register(&po); + + po.Register("batch-size", &batch_size, + "Number of FSTs to compile at a time (more -> faster but uses " + "more memory. E.g. 500"); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string model_rxfilename = po.GetArg(1); + std::string alignment_rspecifier = po.GetArg(2); + std::string fsts_wspecifier = po.GetArg(3); + + SimpleHmm model; + ReadKaldiObject(model_rxfilename, &model); + + SimpleHmmGraphCompiler gc(model, gopts); + + SequentialInt32VectorReader alignment_reader(alignment_rspecifier); + TableWriter fst_writer(fsts_wspecifier); + + int32 num_succeed = 0, num_fail = 0; + + if (batch_size == 1) { // We treat batch_size of 1 as a special case in order + // to test more parts of the code. + for (; !alignment_reader.Done(); alignment_reader.Next()) { + const std::string &key = alignment_reader.Key(); + std::vector alignment = alignment_reader.Value(); + + for (std::vector::iterator it = alignment.begin(); + it != alignment.end(); ++it) { + KALDI_ASSERT(*it < model.NumPdfs()); + ++(*it); + } + + VectorFst decode_fst; + + if (!gc.CompileGraphFromAlignment(alignment, &decode_fst)) { + decode_fst.DeleteStates(); // Just make it empty. + } + if (decode_fst.Start() != fst::kNoStateId) { + num_succeed++; + fst_writer.Write(key, decode_fst); + } else { + KALDI_WARN << "Empty decoding graph for utterance " + << key; + num_fail++; + } + } + } else { + std::vector keys; + std::vector > alignments; + while (!alignment_reader.Done()) { + keys.clear(); + alignments.clear(); + for (; !alignment_reader.Done() && + static_cast(alignments.size()) < batch_size; + alignment_reader.Next()) { + keys.push_back(alignment_reader.Key()); + alignments.push_back(alignment_reader.Value()); + + for (std::vector::iterator it = alignments.back().begin(); + it != alignments.back().end(); ++it) { + KALDI_ASSERT(*it < model.NumPdfs()); + ++(*it); + } + } + std::vector* > fsts; + if (!gc.CompileGraphsFromAlignments(alignments, &fsts)) { + KALDI_ERR << "Not expecting CompileGraphs to fail."; + } + KALDI_ASSERT(fsts.size() == keys.size()); + for (size_t i = 0; i < fsts.size(); i++) { + if (fsts[i]->Start() != fst::kNoStateId) { + num_succeed++; + fst_writer.Write(keys[i], *(fsts[i])); + } else { + KALDI_WARN << "Empty decoding graph for utterance " + << keys[i]; + num_fail++; + } + } + DeletePointers(&fsts); + } + } + KALDI_LOG << "compile-train--simple-hmm-graphs: succeeded for " + << num_succeed << " graphs, failed for " << num_fail; + return (num_succeed != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/simplehmmbin/make-simple-hmm-graph.cc b/src/simplehmmbin/make-simple-hmm-graph.cc new file mode 100644 index 00000000000..088a73e7c50 --- /dev/null +++ b/src/simplehmmbin/make-simple-hmm-graph.cc @@ -0,0 +1,87 @@ +// simplehmmbin/make-simple-hmm-graph.cc + +// Copyright 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "simplehmm/simple-hmm.h" +#include "simplehmm/simple-hmm-utils.h" +#include "util/common-utils.h" +#include "fst/fstlib.h" +#include "fstext/table-matcher.h" +#include "fstext/fstext-utils.h" +#include "fstext/context-fst.h" +#include "decoder/simple-hmm-graph-compiler.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + typedef kaldi::int32 int32; + using fst::SymbolTable; + using fst::VectorFst; + using fst::StdArc; + + const char *usage = + "Make graph to decode with simple HMM. It is an FST from " + "transition-ids to pdf-ids + 1, \n" + "Usage: make-simple-hmm-graph []\n" + "e.g.: \n" + " make-simple-hmm-graph 1.mdl > HCLG.fst\n"; + ParseOptions po(usage); + + SimpleHmmGraphCompilerOptions gopts; + gopts.Register(&po); + + po.Read(argc, argv); + + if (po.NumArgs() < 1 || po.NumArgs() > 2) { + po.PrintUsage(); + exit(1); + } + + std::string model_filename = po.GetArg(1); + std::string fst_out_filename; + if (po.NumArgs() >= 2) fst_out_filename = po.GetArg(2); + if (fst_out_filename == "-") fst_out_filename = ""; + + SimpleHmm trans_model; + ReadKaldiObject(model_filename, &trans_model); + + // The work gets done here. + fst::VectorFst *H = GetHTransducer (trans_model, + gopts.transition_scale, + gopts.self_loop_scale); + +#if _MSC_VER + if (fst_out_filename == "") + _setmode(_fileno(stdout), _O_BINARY); +#endif + + if (! H->Write(fst_out_filename) ) + KALDI_ERR << "make-simple-hmm-graph: error writing FST to " + << (fst_out_filename == "" ? + "standard output" : fst_out_filename); + + delete H; + return 0; + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/simplehmmbin/simple-hmm-acc-stats-ali.cc b/src/simplehmmbin/simple-hmm-acc-stats-ali.cc new file mode 100644 index 00000000000..5bcf8239311 --- /dev/null +++ b/src/simplehmmbin/simple-hmm-acc-stats-ali.cc @@ -0,0 +1,88 @@ +// simplehmmbin/simple-hmm-acc-stats-ali.cc + +// Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) +// 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "simplehmm/simple-hmm.h" + +int main(int argc, char *argv[]) { + using namespace kaldi; + typedef kaldi::int32 int32; + try { + const char *usage = + "Accumulate stats for simple HMM training.\n" + "Usage: simple-hmm-acc-stats-ali [options] " + " \n" + "e.g.:\n simple-hmm-acc-stats-ali 1.mdl ark:1.ali 1.acc\n"; + + ParseOptions po(usage); + bool binary = true; + po.Register("binary", &binary, "Write output in binary mode"); + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string model_filename = po.GetArg(1), + alignments_rspecifier = po.GetArg(2), + accs_wxfilename = po.GetArg(3); + + SimpleHmm model; + ReadKaldiObject(model_filename, &model); + + Vector transition_accs; + model.InitStats(&transition_accs); + + SequentialInt32VectorReader alignments_reader(alignments_rspecifier); + + int32 num_done = 0, num_err = 0; + for (; !alignments_reader.Done(); alignments_reader.Next()) { + const std::string &key = alignments_reader.Key(); + const std::vector &alignment = alignments_reader.Value(); + + for (size_t i = 0; i < alignment.size(); i++) { + int32 tid = alignment[i]; // transition identifier. + model.Accumulate(1.0, tid, &transition_accs); + } + + num_done++; + } + KALDI_LOG << "Done " << num_done << " files, " << num_err + << " with errors."; + + { + Output ko(accs_wxfilename, binary); + transition_accs.Write(ko.Stream(), binary); + } + KALDI_LOG << "Written accs."; + if (num_done != 0) + return 0; + else + return 1; + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/simplehmmbin/simple-hmm-align-compiled.cc b/src/simplehmmbin/simple-hmm-align-compiled.cc new file mode 100644 index 00000000000..4a2bc286b24 --- /dev/null +++ b/src/simplehmmbin/simple-hmm-align-compiled.cc @@ -0,0 +1,131 @@ +// simplehmmbin/simple-hmm-align-compiled.cc + +// Copyright 2009-2013 Microsoft Corporation +// Johns Hopkins University (author: Daniel Povey) +// 2016 Vimal Manohar + + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "simplehmm/simple-hmm.h" +#include "simplehmm/simple-hmm-utils.h" +#include "fstext/fstext-lib.h" +#include "decoder/decoder-wrappers.h" +#include "decoder/decodable-matrix.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + typedef kaldi::int32 int32; + using fst::SymbolTable; + using fst::VectorFst; + using fst::StdArc; + + const char *usage = + "Align matrix of log-likelihoods given simple HMM model.\n" + "Usage: simple-hmm-align-compiled [options] " + " []\n" + "e.g.: \n" + " simple-hmm-align-compiled 1.mdl ark:graphs.fsts ark:log_likes.1.ark ark:1.ali\n"; + + ParseOptions po(usage); + AlignConfig align_config; + BaseFloat acoustic_scale = 1.0; + BaseFloat transition_scale = 1.0; + BaseFloat self_loop_scale = 1.0; + + align_config.Register(&po); + po.Register("transition-scale", &transition_scale, + "Transition-probability scale [relative to acoustics]"); + po.Register("acoustic-scale", &acoustic_scale, + "Scaling factor for acoustic likelihoods"); + po.Register("self-loop-scale", &self_loop_scale, + "Scale of self-loop versus non-self-loop log probs [relative to acoustics]"); + po.Read(argc, argv); + + if (po.NumArgs() < 4 || po.NumArgs() > 5) { + po.PrintUsage(); + exit(1); + } + + std::string model_in_filename = po.GetArg(1), + fst_rspecifier = po.GetArg(2), + loglikes_rspecifier = po.GetArg(3), + alignment_wspecifier = po.GetArg(4), + scores_wspecifier = po.GetOptArg(5); + + SimpleHmm model; + ReadKaldiObject(model_in_filename, &model); + + SequentialTableReader fst_reader(fst_rspecifier); + RandomAccessBaseFloatMatrixReader loglikes_reader(loglikes_rspecifier); + Int32VectorWriter alignment_writer(alignment_wspecifier); + BaseFloatWriter scores_writer(scores_wspecifier); + + int32 num_done = 0, num_err = 0, num_retry = 0; + double tot_like = 0.0; + kaldi::int64 frame_count = 0; + + for (; !fst_reader.Done(); fst_reader.Next()) { + const std::string &utt = fst_reader.Key(); + if (!loglikes_reader.HasKey(utt)) { + num_err++; + KALDI_WARN << "No loglikes for utterance " << utt; + } else { + const Matrix &loglikes = loglikes_reader.Value(utt); + VectorFst decode_fst(fst_reader.Value()); + fst_reader.FreeCurrent(); // this stops copy-on-write of the fst + // by deleting the fst inside the reader, since we're about to mutate + // the fst by adding transition probs. + + if (loglikes.NumRows() == 0) { + KALDI_WARN << "Zero-length utterance: " << utt; + num_err++; + continue; + } + + { // Add transition-probs to the FST. + std::vector disambig_syms; // empty + AddTransitionProbs(model, disambig_syms, transition_scale, + self_loop_scale, &decode_fst); + } + + DecodableMatrixScaledMapped decodable(model, loglikes, acoustic_scale); + + AlignUtteranceWrapper(align_config, utt, + acoustic_scale, &decode_fst, + &decodable, + &alignment_writer, &scores_writer, + &num_done, &num_err, &num_retry, + &tot_like, &frame_count); + } + } + KALDI_LOG << "Overall log-likelihood per frame is " + << (tot_like/frame_count) + << " over " << frame_count<< " frames."; + KALDI_LOG << "Retried " << num_retry << " out of " + << (num_done + num_err) << " utterances."; + KALDI_LOG << "Done " << num_done << ", errors on " << num_err; + return (num_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/simplehmmbin/simple-hmm-est.cc b/src/simplehmmbin/simple-hmm-est.cc new file mode 100644 index 00000000000..b121bad44b0 --- /dev/null +++ b/src/simplehmmbin/simple-hmm-est.cc @@ -0,0 +1,86 @@ +// simplehmmbin/simple-hmm-est.cc + +// Copyright 2009-2011 Microsoft Corporation +// 2016 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "simplehmm/simple-hmm.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + typedef kaldi::int32 int32; + + const char *usage = + "Do Maximum Likelihood re-estimation of simple HMM " + "transition parameters\n" + "Usage: simple-hmm-est [options] \n" + "e.g.: simple-hmm-est 1.mdl 1.acc 2.mdl\n"; + + bool binary_write = true; + MleTransitionUpdateConfig tcfg; + std::string occs_out_filename; + + ParseOptions po(usage); + po.Register("binary", &binary_write, "Write output in binary mode"); + tcfg.Register(&po); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string model_in_filename = po.GetArg(1), + stats_filename = po.GetArg(2), + model_out_filename = po.GetArg(3); + + SimpleHmm model; + ReadKaldiObject(model_in_filename, &model); + + Vector transition_accs; + ReadKaldiObject(stats_filename, &transition_accs); + + { + BaseFloat objf_impr, count; + model.MleUpdate(transition_accs, tcfg, &objf_impr, &count); + KALDI_LOG << "Transition model update: Overall " << (objf_impr/count) + << " log-like improvement per frame over " << (count) + << " frames."; + } + + WriteKaldiObject(model, model_out_filename, binary_write); + + if (GetVerboseLevel() >= 2) { + std::vector phone_names; + phone_names.push_back("0"); + phone_names.push_back("1"); + model.Print(KALDI_LOG, phone_names); + } + + KALDI_LOG << "Written model to " << model_out_filename; + return 0; + } catch(const std::exception &e) { + std::cerr << e.what() << '\n'; + return -1; + } +} + + diff --git a/src/simplehmmbin/simple-hmm-init.cc b/src/simplehmmbin/simple-hmm-init.cc new file mode 100644 index 00000000000..ddee0893b7c --- /dev/null +++ b/src/simplehmmbin/simple-hmm-init.cc @@ -0,0 +1,70 @@ +// bin/simple-hmm-init.cc + +// Copyright 2016 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "hmm/hmm-topology.h" +#include "simplehmm/simple-hmm.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using kaldi::int32; + + const char *usage = + "Initialize simple HMM from topology.\n" + "Usage: simple-hmm-init \n" + "e.g.: \n" + " simple-hmm-init topo init.mdl\n"; + + bool binary = true; + + ParseOptions po(usage); + po.Register("binary", &binary, "Write output in binary mode"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string topo_filename = po.GetArg(1); + std::string model_filename = po.GetArg(2); + + HmmTopology topo; + { + bool binary_in; + Input ki(topo_filename, &binary_in); + topo.Read(ki.Stream(), binary_in); + } + + SimpleHmm model(topo); + { + Output ko(model_filename, binary); + model.Write(ko.Stream(), binary); + } + return 0; + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/tools/config/common_path.sh b/tools/config/common_path.sh index 36b5350dd8e..9c2e32d0cb1 100644 --- a/tools/config/common_path.sh +++ b/tools/config/common_path.sh @@ -1,4 +1,4 @@ -# we assume KALDI_ROOT is already defined +# we assume KALDI_ROOT is already defined [ -z "$KALDI_ROOT" ] && echo "The variable KALDI_ROOT must be already defined" && exit 1 # The formatting of the path export command is intentionally weird, because # this allows for easy diff'ing @@ -21,4 +21,5 @@ ${KALDI_ROOT}/src/onlinebin:\ ${KALDI_ROOT}/src/sgmm2bin:\ ${KALDI_ROOT}/src/sgmmbin:\ ${KALDI_ROOT}/src/segmenterbin:\ +${KALDI_ROOT}/src/simplehmmbin:\ $PATH From 7a678fdb323f67b8fc0f0622cff15156111c7043 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:14:28 -0500 Subject: [PATCH 359/530] asr_diarization: Add IB clustering --- .../information-bottleneck-cluster-utils.cc | 192 +++++++++++++++ .../information-bottleneck-cluster-utils.h | 65 +++++ ...information-bottleneck-clusterable-test.cc | 94 ++++++++ .../information-bottleneck-clusterable.cc | 226 ++++++++++++++++++ .../information-bottleneck-clusterable.h | 163 +++++++++++++ src/tree/cluster-utils.cc | 73 +----- src/tree/cluster-utils.h | 89 ++++++- 7 files changed, 838 insertions(+), 64 deletions(-) create mode 100644 src/segmenter/information-bottleneck-cluster-utils.cc create mode 100644 src/segmenter/information-bottleneck-cluster-utils.h create mode 100644 src/segmenter/information-bottleneck-clusterable-test.cc create mode 100644 src/segmenter/information-bottleneck-clusterable.cc create mode 100644 src/segmenter/information-bottleneck-clusterable.h diff --git a/src/segmenter/information-bottleneck-cluster-utils.cc b/src/segmenter/information-bottleneck-cluster-utils.cc new file mode 100644 index 00000000000..75fda8c59fe --- /dev/null +++ b/src/segmenter/information-bottleneck-cluster-utils.cc @@ -0,0 +1,192 @@ +// segmenter/information-bottleneck-cluster-utils.cc + +// Copyright 2017 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "tree/cluster-utils.h" +#include "segmenter/information-bottleneck-cluster-utils.h" + +namespace kaldi { + +typedef uint16 uint_smaller; +typedef int16 int_smaller; + +class InformationBottleneckBottomUpClusterer : public BottomUpClusterer { + public: + InformationBottleneckBottomUpClusterer( + const std::vector &points, + const InformationBottleneckClustererOptions &opts, + BaseFloat max_merge_thresh, + int32 min_clusters, + std::vector *clusters_out, + std::vector *assignments_out); + + private: + virtual BaseFloat ComputeDistance(int32 i, int32 j); + virtual bool StoppingCriterion() const; + virtual void UpdateClustererStats(int32 i, int32 j); + + BaseFloat NormalizedMutualInformation() const { + return ((merged_entropy_ - current_entropy_) + / (merged_entropy_ - initial_entropy_)); + } + + /// Stop merging when the stopping criterion, e.g. NMI, reaches this + /// threshold. + BaseFloat stopping_threshold_; + + /// Weight of the relevant variables entropy towards the objective. + BaseFloat relevance_factor_; + + /// Weight of the input variables entropy towards the objective. + BaseFloat input_factor_; + + /// Running entropy of the clusters. + BaseFloat current_entropy_; + + /// Some stats computed by the constructor that will be useful for + /// adding stopping criterion. + BaseFloat initial_entropy_; + BaseFloat merged_entropy_; +}; + + +InformationBottleneckBottomUpClusterer::InformationBottleneckBottomUpClusterer( + const std::vector &points, + const InformationBottleneckClustererOptions &opts, + BaseFloat max_merge_thresh, + int32 min_clusters, + std::vector *clusters_out, + std::vector *assignments_out) : + BottomUpClusterer(points, max_merge_thresh, min_clusters, + clusters_out, assignments_out), + stopping_threshold_(opts.stopping_threshold), + relevance_factor_(opts.relevance_factor), + input_factor_(opts.input_factor), + current_entropy_(0.0), initial_entropy_(0.0), merged_entropy_(0.0) { + if (points.size() == 0) return; + + InformationBottleneckClusterable* ibc = + static_cast(points[0]->Copy()); + initial_entropy_ -= ibc->Objf(1.0, 0.0); + + for (size_t i = 1; i < points.size(); i++) { + InformationBottleneckClusterable *c = + static_cast(points[i]); + ibc->Add(*points[i]); + initial_entropy_ -= c->Objf(1.0, 0.0); + } + + merged_entropy_ = -ibc->Objf(1.0, 0.0); + current_entropy_ = initial_entropy_; +} + +BaseFloat InformationBottleneckBottomUpClusterer::ComputeDistance( + int32 i, int32 j) { + const InformationBottleneckClusterable* cluster_i + = static_cast(GetCluster(i)); + const InformationBottleneckClusterable* cluster_j + = static_cast(GetCluster(j)); + + BaseFloat dist = (cluster_i->Distance(*cluster_j, relevance_factor_, + input_factor_)); + // / (cluster_i->Normalizer() + cluster_j->Normalizer())); + Distance(i, j) = dist; // set the distance in the array. + return dist; +} + +bool InformationBottleneckBottomUpClusterer::StoppingCriterion() const { + bool flag = (NumClusters() <= MinClusters() || IsQueueEmpty() || + NormalizedMutualInformation() < stopping_threshold_); + if (GetVerboseLevel() < 2 || !flag) return flag; + + if (NormalizedMutualInformation() < stopping_threshold_) { + KALDI_VLOG(2) << "Stopping at " << NumClusters() << " clusters " + << "because NMI = " << NormalizedMutualInformation() + << " < stopping_threshold (" << stopping_threshold_ << ")"; + } else if (NumClusters() < MinClusters()) { + KALDI_VLOG(2) << "Stopping at " << NumClusters() << " clusters " + << "<= min-clusters (" << MinClusters() << ")"; + } else if (IsQueueEmpty()) { + KALDI_VLOG(2) << "Stopping at " << NumClusters() << " clusters " + << "because queue is empty."; + } + + return flag; +} + +void InformationBottleneckBottomUpClusterer::UpdateClustererStats( + int32 i, int32 j) { + const InformationBottleneckClusterable* cluster_i + = static_cast(GetCluster(i)); + current_entropy_ += cluster_i->Distance(*GetCluster(j), 1.0, 0.0); + + if (GetVerboseLevel() > 2) { + const InformationBottleneckClusterable* cluster_j + = static_cast(GetCluster(j)); + std::vector cluster_i_points; + { + std::map::const_iterator it + = cluster_i->Counts().begin(); + for (; it != cluster_i->Counts().end(); ++it) + cluster_i_points.push_back(it->first); + } + + std::vector cluster_j_points; + { + std::map::const_iterator it + = cluster_j->Counts().begin(); + for (; it != cluster_j->Counts().end(); ++it) + cluster_j_points.push_back(it->first); + } + KALDI_VLOG(3) << "Merging clusters " + << "(" << cluster_i_points + << ", " << cluster_j_points + << ").. distance=" << Distance(i, j) + << ", num-clusters-after-merge= " << NumClusters() - 1 + << ", NMI= " << NormalizedMutualInformation(); + } +} + +BaseFloat IBClusterBottomUp( + const std::vector &points, + const InformationBottleneckClustererOptions &opts, + BaseFloat max_merge_thresh, + int32 min_clust, + std::vector *clusters_out, + std::vector *assignments_out) { + KALDI_ASSERT(max_merge_thresh >= 0.0 && min_clust >= 0); + KALDI_ASSERT(opts.stopping_threshold >= 0.0); + KALDI_ASSERT(opts.relevance_factor >= 0.0 && opts.input_factor >= 0.0); + + KALDI_ASSERT(!ContainsNullPointers(points)); + int32 npoints = points.size(); + // make sure fits in uint_smaller and does not hit the -1 which is reserved. + KALDI_ASSERT(sizeof(uint_smaller)==sizeof(uint32) || + npoints < static_cast(static_cast(-1))); + + KALDI_VLOG(2) << "Initializing clustering object."; + InformationBottleneckBottomUpClusterer bc( + points, opts, max_merge_thresh, min_clust, + clusters_out, assignments_out); + BaseFloat ans = bc.Cluster(); + if (clusters_out) KALDI_ASSERT(!ContainsNullPointers(*clusters_out)); + return ans; +} + +} // end namespace kaldi diff --git a/src/segmenter/information-bottleneck-cluster-utils.h b/src/segmenter/information-bottleneck-cluster-utils.h new file mode 100644 index 00000000000..58f1e4f380a --- /dev/null +++ b/src/segmenter/information-bottleneck-cluster-utils.h @@ -0,0 +1,65 @@ +// segmenter/information-bottleneck-cluster-utils.h + +// Copyright 2017 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_SEGMENTER_INFORMATION_BOTTLENECK_CLUSTER_UTILS_H_ +#define KALDI_SEGMENTER_INFORMATION_BOTTLENECK_CLUSTER_UTILS_H_ + +#include "base/kaldi-common.h" +#include "tree/cluster-utils.h" +#include "segmenter/information-bottleneck-clusterable.h" +#include "util/common-utils.h" + +namespace kaldi { + +struct InformationBottleneckClustererOptions { + BaseFloat distance_threshold; + int32 num_clusters; + BaseFloat stopping_threshold; + BaseFloat relevance_factor; + BaseFloat input_factor; + + InformationBottleneckClustererOptions() : + distance_threshold(std::numeric_limits::max()), num_clusters(1), + stopping_threshold(0.3), relevance_factor(1.0), input_factor(0.1) { } + + + void Register(OptionsItf *opts) { + opts->Register("stopping-threshold", &stopping_threshold, + "Stopping merging/splitting when an objective such as " + "NMI reaches this value."); + opts->Register("relevance-factor", &relevance_factor, + "Weight factor of the entropy of relevant variables " + "in the objective function"); + opts->Register("input-factor", &input_factor, + "Weight factor of the entropy of input variables " + "in the objective function"); + } +}; + +BaseFloat IBClusterBottomUp( + const std::vector &points, + const InformationBottleneckClustererOptions &opts, + BaseFloat max_merge_thresh, + int32 min_clusters, + std::vector *clusters_out, + std::vector *assignments_out); + +} // end namespace kaldi + +#endif // KALDI_SEGMENTER_INFORMATION_BOTTLENECK_CLUSTER_UTILS_H_ diff --git a/src/segmenter/information-bottleneck-clusterable-test.cc b/src/segmenter/information-bottleneck-clusterable-test.cc new file mode 100644 index 00000000000..ee0358c8f05 --- /dev/null +++ b/src/segmenter/information-bottleneck-clusterable-test.cc @@ -0,0 +1,94 @@ + +#include "base/kaldi-common.h" +#include "segmenter/information-bottleneck-clusterable.h" + +namespace kaldi { + +static void TestClusterable() { + { + Vector a_vec(3); + a_vec(0) = 0.5; + a_vec(1) = 0.5; + int32 a_count = 100; + KALDI_ASSERT(ApproxEqual(a_vec.Sum(), 1.0)); + + Vector b_vec(3); + b_vec(1) = 0.333; + b_vec(2) = 0.667; + int32 b_count = 100; + KALDI_ASSERT(ApproxEqual(b_vec.Sum(), 1.0)); + + InformationBottleneckClusterable a(1, a_count, a_vec); + InformationBottleneckClusterable b(2, b_count, b_vec); + + Vector sum_vec(a_vec.Dim()); + sum_vec.AddVec(a_count, a_vec); + sum_vec.AddVec(b_count, b_vec); + sum_vec.Scale(1.0 / (a_count + b_count)); + KALDI_ASSERT(ApproxEqual(sum_vec.Sum(), 1.0)); + + InformationBottleneckClusterable sum(3); + InformationBottleneckClusterable c(3); + + sum.Add(a); + sum.Add(b); + + c.AddStats(1, a_count, a_vec); + c.AddStats(2, b_count, b_vec); + + KALDI_ASSERT(c.Counts() == sum.Counts()); + KALDI_ASSERT(ApproxEqual(c.Objf(), sum.Objf())); + KALDI_ASSERT(ApproxEqual(-c.Objf() + a.Objf() + b.Objf(), a.Distance(b))); + KALDI_ASSERT(sum_vec.ApproxEqual(c.RelevanceDist())); + KALDI_ASSERT(sum_vec.ApproxEqual(sum.RelevanceDist())); + } + + for (int32 i = 0; i < 100; i++) { + int32 dim = RandInt(2, 10); + + Vector a_vec(dim); + a_vec.SetRandn(); + a_vec.ApplyPowAbs(1.0); + a_vec.Scale(1 / a_vec.Sum()); + KALDI_ASSERT(ApproxEqual(a_vec.Sum(), 1.0)); + int32 a_count = RandInt(1, 100); + InformationBottleneckClusterable a(1, a_count, a_vec); + + Vector b_vec(dim); + b_vec.SetRandn(); + b_vec.ApplyPowAbs(1.0); + b_vec.Scale(1 / b_vec.Sum()); + KALDI_ASSERT(ApproxEqual(b_vec.Sum(), 1.0)); + int32 b_count = RandInt(1, 100); + InformationBottleneckClusterable b(2, b_count, b_vec); + + Vector sum_vec(a_vec.Dim()); + sum_vec.AddVec(a_count, a_vec); + sum_vec.AddVec(b_count, b_vec); + sum_vec.Scale(1.0 / (a_count + b_count)); + KALDI_ASSERT(ApproxEqual(sum_vec.Sum(), 1.0)); + + InformationBottleneckClusterable sum(dim); + InformationBottleneckClusterable c(dim); + + sum.Add(a); + sum.Add(b); + + c.AddStats(1, a_count, a_vec); + c.AddStats(2, b_count, b_vec); + + KALDI_ASSERT(c.Counts() == sum.Counts()); + KALDI_ASSERT(ApproxEqual(c.Objf(), sum.Objf())); + KALDI_ASSERT(ApproxEqual(-c.Objf() + a.Objf() + b.Objf(), a.Distance(b))); + KALDI_ASSERT(sum_vec.ApproxEqual(c.RelevanceDist())); + KALDI_ASSERT(sum_vec.ApproxEqual(sum.RelevanceDist())); + } +} + +} // end namespace kaldi + +int main() { + using namespace kaldi; + + TestClusterable(); +} diff --git a/src/segmenter/information-bottleneck-clusterable.cc b/src/segmenter/information-bottleneck-clusterable.cc new file mode 100644 index 00000000000..7817f7cfdc6 --- /dev/null +++ b/src/segmenter/information-bottleneck-clusterable.cc @@ -0,0 +1,226 @@ +// segmenter/information-bottleneck-clusterable.cc + +// Copyright 2017 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "segmenter/information-bottleneck-clusterable.h" + +namespace kaldi { + +void InformationBottleneckClusterable::AddStats( + int32 id, BaseFloat count, + const VectorBase &relevance_dist) { + std::map::iterator it = counts_.find(id); + KALDI_ASSERT(it == counts_.end() || it->first != id); + counts_.insert(it, std::make_pair(id, count)); + + double sum = relevance_dist.Sum(); + KALDI_ASSERT (sum != 0.0); + + p_yp_c_.Scale(total_count_); + p_yp_c_.AddVec(count / sum, relevance_dist); + total_count_ += count; + p_yp_c_.Scale(1.0 / total_count_); +} + +BaseFloat InformationBottleneckClusterable::Objf( + BaseFloat relevance_factor, BaseFloat input_factor) const { + double relevance_entropy = 0.0, count = 0.0; + for (int32 i = 0; i < p_yp_c_.Dim(); i++) { + if (p_yp_c_(i) > 1e-20) { + relevance_entropy -= p_yp_c_(i) * Log(p_yp_c_(i)); + count += p_yp_c_(i); + } + } + relevance_entropy = total_count_ * (relevance_entropy / count - Log(count)); + + double input_entropy = total_count_ * Log(total_count_); + for (std::map::const_iterator it = counts_.begin(); + it != counts_.end(); ++it) { + input_entropy -= it->second * Log(it->second); + } + + BaseFloat objf = -relevance_factor * relevance_entropy + + input_factor * input_entropy; + return objf; +} + +void InformationBottleneckClusterable::Add(const Clusterable &other_in) { + KALDI_ASSERT(other_in.Type() == "information-bottleneck"); + const InformationBottleneckClusterable *other = + static_cast (&other_in); + + for (std::map::const_iterator it = other->counts_.begin(); + it != other->counts_.end(); ++it) { + std::map::iterator hint_it = counts_.lower_bound( + it->first); + KALDI_ASSERT (hint_it == counts_.end() || hint_it->first != it->first); + counts_.insert(hint_it, *it); + } + + p_yp_c_.Scale(total_count_); + p_yp_c_.AddVec(other->total_count_, other->p_yp_c_); + total_count_ += other->total_count_; + p_yp_c_.Scale(1.0 / total_count_); +} + +void InformationBottleneckClusterable::Sub(const Clusterable &other_in) { + KALDI_ASSERT(other_in.Type() == "information-bottleneck"); + const InformationBottleneckClusterable *other = + static_cast (&other_in); + + for (std::map::const_iterator it = other->counts_.begin(); + it != other->counts_.end(); ++it) { + std::map::iterator hint_it = counts_.lower_bound( + it->first); + KALDI_ASSERT (hint_it->first == it->first); + counts_.erase(hint_it); + } + + p_yp_c_.Scale(total_count_); + p_yp_c_.AddVec(-other->total_count_, other->p_yp_c_); + total_count_ -= other->total_count_; + p_yp_c_.Scale(1.0 / total_count_); +} + +Clusterable* InformationBottleneckClusterable::Copy() const { + InformationBottleneckClusterable *ans = + new InformationBottleneckClusterable(RelevanceDim()); + ans->Add(*this); + return ans; +} + +void InformationBottleneckClusterable::Scale(BaseFloat f) { + KALDI_ASSERT(f >= 0.0); + for (std::map::iterator it = counts_.begin(); + it != counts_.end(); ++it) { + it->second *= f; + } + total_count_ *= f; +} + +void InformationBottleneckClusterable::Write( + std::ostream &os, bool binary) const { + WriteToken(os, binary, "IBCL"); // magic string. + WriteBasicType(os, binary, counts_.size()); + BaseFloat total_count = 0.0; + for (std::map::const_iterator it = counts_.begin(); + it != counts_.end(); ++it) { + WriteBasicType(os, binary, it->first); + WriteBasicType(os, binary, it->second); + total_count += it->second; + } + KALDI_ASSERT(ApproxEqual(total_count_, total_count)); + WriteToken(os, binary, ""); + p_yp_c_.Write(os, binary); +} + +Clusterable* InformationBottleneckClusterable::ReadNew( + std::istream &is, bool binary) const { + InformationBottleneckClusterable *ibc = + new InformationBottleneckClusterable(); + ibc->Read(is, binary); + return ibc; +} + +void InformationBottleneckClusterable::Read(std::istream &is, bool binary) { + ExpectToken(is, binary, "IBCL"); // magic string. + int32 size; + ReadBasicType(is, binary, &size); + + for (int32 i = 0; i < 2 * size; i++) { + int32 id; + BaseFloat count; + ReadBasicType(is, binary, &id); + ReadBasicType(is, binary, &count); + std::pair::iterator, bool> ret; + ret = counts_.insert(std::make_pair(id, count)); + if (!ret.second) { + KALDI_ERR << "Duplicate element " << id << " when reading counts"; + } + total_count_ += count; + } + + ExpectToken(is, binary, ""); + p_yp_c_.Read(is, binary); +} + +BaseFloat InformationBottleneckClusterable::ObjfPlus( + const Clusterable &other, BaseFloat relevance_factor, + BaseFloat input_factor) const { + InformationBottleneckClusterable *copy = static_cast(Copy()); + copy->Add(other); + BaseFloat ans = copy->Objf(relevance_factor, input_factor); + delete copy; + return ans; +} + +BaseFloat InformationBottleneckClusterable::ObjfMinus( + const Clusterable &other, BaseFloat relevance_factor, + BaseFloat input_factor) const { + InformationBottleneckClusterable *copy = static_cast(Copy()); + copy->Add(other); + BaseFloat ans = copy->Objf(relevance_factor, input_factor); + delete copy; + return ans; +} + +BaseFloat InformationBottleneckClusterable::Distance( + const Clusterable &other_in, BaseFloat relevance_factor, + BaseFloat input_factor) const { + KALDI_ASSERT(other_in.Type() == "information-bottleneck"); + const InformationBottleneckClusterable *other = + static_cast (&other_in); + + BaseFloat normalizer = this->Normalizer() + other->Normalizer(); + BaseFloat pi_i = this->Normalizer() / normalizer; + BaseFloat pi_j = other->Normalizer() / normalizer; + + // Compute the distribution q_Y(y) = p(y|{c_i} + {c_j}) + Vector relevance_dist(this->RelevanceDim()); + relevance_dist.AddVec(pi_i, this->RelevanceDist()); + relevance_dist.AddVec(pi_j, other->RelevanceDist()); + + BaseFloat relevance_divergence + = pi_i * KLDivergence(this->RelevanceDist(), relevance_dist) + + pi_j * KLDivergence(other->RelevanceDist(), relevance_dist); + + BaseFloat input_divergence + = Log(normalizer) - pi_i * Log(this->Normalizer()) + - pi_j * Log(other->Normalizer()); + + KALDI_ASSERT(relevance_divergence > -1e-4); + KALDI_ASSERT(input_divergence > -1e-4); + return (normalizer * (relevance_factor * relevance_divergence + - input_factor * input_divergence)); +} + +BaseFloat KLDivergence(const VectorBase &p1, + const VectorBase &p2) { + KALDI_ASSERT(p1.Dim() == p2.Dim()); + + double ans = 0.0, sum = 0.0; + for (int32 i = 0; i < p1.Dim(); i++) { + if (p1(i) > 1e-20) { + ans += p1(i) * Log(p1(i) / p2(i)); + sum += p1(i); + } + } + return ans / sum - Log(sum); +} + +} // end namespace kaldi diff --git a/src/segmenter/information-bottleneck-clusterable.h b/src/segmenter/information-bottleneck-clusterable.h new file mode 100644 index 00000000000..cb88d1221f7 --- /dev/null +++ b/src/segmenter/information-bottleneck-clusterable.h @@ -0,0 +1,163 @@ +// segmenter/information-bottleneck-clusterable.h + +// Copyright 2017 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_SEGMENTER_INFORMATION_BOTTLENECK_CLUSTERABLE_H_ +#define KALDI_SEGMENTER_INFORMATION_BOTTLENECK_CLUSTERABLE_H_ + +#include "base/kaldi-common.h" +#include "matrix/kaldi-matrix.h" +#include "itf/clusterable-itf.h" + +namespace kaldi { + +class InformationBottleneckClusterable: public Clusterable { + public: + /// Constructor used for creating empty object e.g. when reading from file. + InformationBottleneckClusterable(): total_count_(0.0) { } + + /// Constructor initializing the relevant variable dimension. + /// Used for making Copy() of object. + InformationBottleneckClusterable(int32 relevance_dim) : + total_count_(0.0), p_yp_c_(relevance_dim) { } + + /// Constructor initializing from input stats corresponding to a + /// segment. + InformationBottleneckClusterable(int32 id, BaseFloat count, + const VectorBase &relevance_dist): + total_count_(0.0), p_yp_c_(relevance_dist.Dim()) { + AddStats(id, count, relevance_dist); + } + + /// Return a copy of this object. + virtual Clusterable* Copy() const; + + /// Return the objective function, which is + /// N(c) * (-r * H(Y|c) + ibeta * H(X|c)) + /// where N(c) is the total count in the cluster + /// H(Y|c) is the conditional entropy of the relevance + /// variable distribution + /// H(X|c) is the conditional entropy of the input variable + /// distribution + /// r is the weight on the relevant variables + /// ibeta is the weight on the input variables + virtual BaseFloat Objf(BaseFloat relevance_factor, + BaseFloat input_factor) const; + + /// Return the objective function with the default values + /// for relevant_factor (1.0) and input_factor (0.1) + virtual BaseFloat Objf() const { return Objf(1.0, 0.1); } + + /// Return the count in this cluster. + virtual BaseFloat Normalizer() const { return total_count_; } + + /// Set stats to empty. + virtual void SetZero() { + counts_.clear(); + p_yp_c_.Resize(0); + total_count_ = 0.0; + } + + /// Add stats to this object + virtual void AddStats(int32 id, BaseFloat count, + const VectorBase &relevance_dist); + + /// Add other stats. + virtual void Add(const Clusterable &other); + /// Subtract other stats. + virtual void Sub(const Clusterable &other); + /// Scale the stats by a positive number f. + virtual void Scale(BaseFloat f); + + /// Return a string that describes the clusterable type. + virtual std::string Type() const { return "information-bottleneck"; } + + /// Write data to stream. + virtual void Write(std::ostream &os, bool binary) const; + + /// Read data from a stream and return the corresponding object (const + /// function; it's a class member because we need access to the vtable + /// so generic code can read derived types). + virtual Clusterable* ReadNew(std::istream &is, bool binary) const; + + /// Read data from stream + virtual void Read(std::istream &is, bool binary); + + /// Return the objective function of the combined object this + other. + virtual BaseFloat ObjfPlus(const Clusterable &other, + BaseFloat relevance_factor, + BaseFloat input_factor) const; + + /// Same as the above function, but using default values for + /// relevance_factor (1.0) and input_factor (0.1) + virtual BaseFloat ObjfPlus(const Clusterable &other) const { + return ObjfPlus(other, 1.0, 0.1); + } + + /// Return the objective function of the combined object this + other. + virtual BaseFloat ObjfMinus(const Clusterable &other, + BaseFloat relevance_factor, + BaseFloat input_factor) const; + + /// Same as the above function, but using default values for + /// relevance_factor (1.0) and input_factor (0.1) + virtual BaseFloat ObjfMinus(const Clusterable &other) const { + return ObjfMinus(other, 1.0, 0.1); + } + + /// Return the objective function decrease from merging the two + /// clusters. + /// Always a non-negative number. + virtual BaseFloat Distance(const Clusterable &other, + BaseFloat relevance_factor, + BaseFloat input_factor) const; + + /// Same as the above function, but using default values for + /// relevance_factor (1.0) and input_factor (0.1) + virtual BaseFloat Distance(const Clusterable &other) const { + return Distance(other, 1.0, 0.1); + } + + virtual ~InformationBottleneckClusterable() {} + + /// Public accessors + virtual const Vector& RelevanceDist() const { return p_yp_c_; } + virtual int32 RelevanceDim() const { return p_yp_c_.Dim(); } + + virtual const std::map& Counts() const { return counts_; } + + private: + /// A list of the original segments this cluster contains along with + /// their corresponding counts. + std::map counts_; + + /// Total count in this cluster. + BaseFloat total_count_; + + /// Relevant variable distribution. + /// TODO: Make sure that this is a valid probability distribution. + Vector p_yp_c_; +}; + +/// Returns the KL Divergence between two probability distributions. +BaseFloat KLDivergence(const VectorBase &p1, + const VectorBase &p2); + +} // end namespace kaldi + +#endif // KALDI_SEGMENTER_INFORMATION_BOTTLENECK_CLUSTERABLE_H_ diff --git a/src/tree/cluster-utils.cc b/src/tree/cluster-utils.cc index 53de0825e08..965eb104d9e 100644 --- a/src/tree/cluster-utils.cc +++ b/src/tree/cluster-utils.cc @@ -190,62 +190,6 @@ void AddToClustersOptimized(const std::vector &stats, // Bottom-up clustering routines // ============================================================================ -class BottomUpClusterer { - public: - BottomUpClusterer(const std::vector &points, - BaseFloat max_merge_thresh, - int32 min_clust, - std::vector *clusters_out, - std::vector *assignments_out) - : ans_(0.0), points_(points), max_merge_thresh_(max_merge_thresh), - min_clust_(min_clust), clusters_(clusters_out != NULL? clusters_out - : &tmp_clusters_), assignments_(assignments_out != NULL ? - assignments_out : &tmp_assignments_) { - nclusters_ = npoints_ = points.size(); - dist_vec_.resize((npoints_ * (npoints_ - 1)) / 2); - } - - BaseFloat Cluster(); - ~BottomUpClusterer() { DeletePointers(&tmp_clusters_); } - - private: - void Renumber(); - void InitializeAssignments(); - void SetInitialDistances(); ///< Sets up distances and queue. - /// CanMerge returns true if i and j are existing clusters, and the distance - /// (negated objf-change) "dist" is accurate (i.e. not outdated). - bool CanMerge(int32 i, int32 j, BaseFloat dist); - /// Merge j into i and delete j. - void MergeClusters(int32 i, int32 j); - /// Reconstructs the priority queue from the distances. - void ReconstructQueue(); - - void SetDistance(int32 i, int32 j); - BaseFloat& Distance(int32 i, int32 j) { - KALDI_ASSERT(i < npoints_ && j < i); - return dist_vec_[(i * (i - 1)) / 2 + j]; - } - - BaseFloat ans_; - const std::vector &points_; - BaseFloat max_merge_thresh_; - int32 min_clust_; - std::vector *clusters_; - std::vector *assignments_; - - std::vector tmp_clusters_; - std::vector tmp_assignments_; - - std::vector dist_vec_; - int32 nclusters_; - int32 npoints_; - typedef std::pair > QueueElement; - // Priority queue using greater (lowest distances are highest priority). - typedef std::priority_queue, - std::greater > QueueType; - QueueType queue_; -}; - BaseFloat BottomUpClusterer::Cluster() { KALDI_VLOG(2) << "Initializing cluster assignments."; InitializeAssignments(); @@ -253,12 +197,15 @@ BaseFloat BottomUpClusterer::Cluster() { SetInitialDistances(); KALDI_VLOG(2) << "Clustering..."; - while (nclusters_ > min_clust_ && !queue_.empty()) { + while (!StoppingCriterion()) { std::pair > pr = queue_.top(); BaseFloat dist = pr.first; int32 i = (int32) pr.second.first, j = (int32) pr.second.second; queue_.pop(); - if (CanMerge(i, j, dist)) MergeClusters(i, j); + if (CanMerge(i, j, dist)) { + UpdateClustererStats(i, j); + MergeClusters(i, j); + } } KALDI_VLOG(2) << "Renumbering clusters to contiguous numbers."; Renumber(); @@ -325,11 +272,12 @@ void BottomUpClusterer::InitializeAssignments() { void BottomUpClusterer::SetInitialDistances() { for (int32 i = 0; i < npoints_; i++) { for (int32 j = 0; j < i; j++) { - BaseFloat dist = (*clusters_)[i]->Distance(*((*clusters_)[j])); - dist_vec_[(i * (i - 1)) / 2 + j] = dist; + BaseFloat dist = ComputeDistance(i, j); if (dist <= max_merge_thresh_) queue_.push(std::make_pair(dist, std::make_pair(static_cast(i), static_cast(j)))); + if (j == i - 1) + KALDI_VLOG(2) << "Distance(" << i << ", " << j << ") = " << dist; } } } @@ -344,6 +292,7 @@ bool BottomUpClusterer::CanMerge(int32 i, int32 j, BaseFloat dist) { void BottomUpClusterer::MergeClusters(int32 i, int32 j) { KALDI_ASSERT(i != j && i < npoints_ && j < npoints_); + (*clusters_)[i]->Add(*((*clusters_)[j])); delete (*clusters_)[j]; (*clusters_)[j] = NULL; @@ -389,8 +338,7 @@ void BottomUpClusterer::ReconstructQueue() { void BottomUpClusterer::SetDistance(int32 i, int32 j) { KALDI_ASSERT(i < npoints_ && j < i && (*clusters_)[i] != NULL && (*clusters_)[j] != NULL); - BaseFloat dist = (*clusters_)[i]->Distance(*((*clusters_)[j])); - dist_vec_[(i * (i - 1)) / 2 + j] = dist; // set the distance in the array. + BaseFloat dist = ComputeDistance(i, j); if (dist < max_merge_thresh_) { queue_.push(std::make_pair(dist, std::make_pair(static_cast(i), static_cast(j)))); @@ -403,7 +351,6 @@ void BottomUpClusterer::SetDistance(int32 i, int32 j) { } - BaseFloat ClusterBottomUp(const std::vector &points, BaseFloat max_merge_thresh, int32 min_clust, diff --git a/src/tree/cluster-utils.h b/src/tree/cluster-utils.h index 55583a237bf..b11dfe1c031 100644 --- a/src/tree/cluster-utils.h +++ b/src/tree/cluster-utils.h @@ -21,10 +21,14 @@ #ifndef KALDI_TREE_CLUSTER_UTILS_H_ #define KALDI_TREE_CLUSTER_UTILS_H_ +#include #include +using std::vector; #include "matrix/matrix-lib.h" +#include "util/stl-utils.h" #include "itf/clusterable-itf.h" + namespace kaldi { /// \addtogroup clustering_group_simple @@ -103,9 +107,92 @@ void AddToClustersOptimized(const std::vector &stats, * @param assignments_out [out] If non-NULL, will be resized to the number of * points, and each element is the index of the cluster that point * was assigned to. + */ + +class BottomUpClusterer { + public: + typedef uint16 uint_smaller; + typedef int16 int_smaller; + + BottomUpClusterer(const std::vector &points, + BaseFloat max_merge_thresh, + int32 min_clust, + std::vector *clusters_out, + std::vector *assignments_out) + : ans_(0.0), points_(points), max_merge_thresh_(max_merge_thresh), + min_clust_(min_clust), clusters_(clusters_out != NULL? clusters_out + : &tmp_clusters_), assignments_(assignments_out != NULL ? + assignments_out : &tmp_assignments_) { + nclusters_ = npoints_ = points.size(); + dist_vec_.resize((npoints_ * (npoints_ - 1)) / 2); + } + + BaseFloat Cluster(); + ~BottomUpClusterer() { DeletePointers(&tmp_clusters_); } + + /// Public accessors + const Clusterable* GetCluster(int32 i) const { return (*clusters_)[i]; } + BaseFloat& Distance(int32 i, int32 j) { + KALDI_ASSERT(i < npoints_ && j < i); + return dist_vec_[(i * (i - 1)) / 2 + j]; + } + /// CanMerge returns true if i and j are existing clusters, and the distance + /// (negated objf-change) "dist" is accurate (i.e. not outdated). + virtual bool CanMerge(int32 i, int32 j, BaseFloat dist); + + /// Merge j into i and delete j. + virtual void MergeClusters(int32 i, int32 j); + + int32 NumClusters() const { return nclusters_; } + int32 NumPoints() const { return npoints_; } + int32 MinClusters() const { return min_clust_; } + bool IsQueueEmpty() const { return queue_.empty(); } + + private: + void Renumber(); + void InitializeAssignments(); + void SetInitialDistances(); ///< Sets up distances and queue. + /// Reconstructs the priority queue from the distances. + void ReconstructQueue(); + + /// Update some stats to reflect merging clusters i and j + virtual void UpdateClustererStats(int32, int32 j) { }; + + virtual bool StoppingCriterion() const { + return nclusters_ <= min_clust_ || queue_.empty(); + } + + void SetDistance(int32 i, int32 j); + virtual BaseFloat ComputeDistance(int32 i, int32 j) { + BaseFloat dist = (*clusters_)[i]->Distance(*((*clusters_)[j])); + dist_vec_[(i * (i - 1)) / 2 + j] = dist; // set the distance in the array. + return dist; + } + + BaseFloat ans_; + const std::vector &points_; + BaseFloat max_merge_thresh_; + int32 min_clust_; + std::vector *clusters_; + std::vector *assignments_; + + std::vector tmp_clusters_; + std::vector tmp_assignments_; + + std::vector dist_vec_; + int32 nclusters_; + int32 npoints_; + typedef std::pair > QueueElement; + // Priority queue using greater (lowest distances are highest priority). + typedef std::priority_queue, + std::greater > QueueType; + QueueType queue_; +}; + +/** This is a wrapper function to the BottomUpClusterer class. * @return Returns the total objf change relative to all clusters being separate, which is * a negative. Note that this is not the same as what the other clustering algorithms return. - */ + **/ BaseFloat ClusterBottomUp(const std::vector &points, BaseFloat thresh, int32 min_clust, From 403bde7b3470c34967b6a86e24d2c2cd4a98623e Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:14:53 -0500 Subject: [PATCH 360/530] asr_diarization: Add intersect int vectors --- src/segmenterbin/intersect-int-vectors.cc | 158 ++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 src/segmenterbin/intersect-int-vectors.cc diff --git a/src/segmenterbin/intersect-int-vectors.cc b/src/segmenterbin/intersect-int-vectors.cc new file mode 100644 index 00000000000..53731bf9046 --- /dev/null +++ b/src/segmenterbin/intersect-int-vectors.cc @@ -0,0 +1,158 @@ +// segmenterbin/intersect-int-vectors.cc + +// Copyright 2017 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Intersect two integer vectors and create a new integer vectors " + "whole ids are defined as the cross-products of the integer " + "ids from the two vectors.\n" + "\n" + "Usage: intersect-int-vectors [options] " + " \n" + " e.g.: intersect-int-vectors ark:1.ali ark:2.ali ark:-\n" + "See also: segmentation-init-from-segments, " + "segmentation-combine-segments\n"; + + ParseOptions po(usage); + + std::string mapping_rxfilename, mapping_wxfilename; + int32 length_tolerance = 0; + + po.Register("mapping-in", &mapping_rxfilename, + "A file with three columns that define the mapping from " + "a pair of integers to a third one."); + po.Register("mapping-out", &mapping_wxfilename, + "Write a mapping in the same format as --mapping-in, " + "but let the program decide the mapping to unique integer " + "ids."); + po.Register("length-tolerance", &length_tolerance, + "Tolerance this number of frames of mismatch between the " + "two integer vector pairs."); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string ali_rspecifier1 = po.GetArg(1), + ali_rspecifier2 = po.GetArg(2), + ali_wspecifier = po.GetArg(3); + + std::map, int32> mapping; + if (!mapping_rxfilename.empty()) { + Input ki(mapping_rxfilename); + std::string line; + while (std::getline(ki.Stream(), line)) { + std::vector parts; + SplitStringToVector(line, " ", true, &parts); + KALDI_ASSERT(parts.size() == 3); + + std::pair id_pair = std::make_pair( + std::atoi(parts[0].c_str()), std::atoi(parts[1].c_str())); + int32 id_new = std::atoi(parts[2].c_str()); + KALDI_ASSERT(id_new >= 0); + + std::map, int32>::iterator it = + mapping.lower_bound(id_pair); + KALDI_ASSERT(it == mapping.end() || it->first != id_pair); + + mapping.insert(it, std::make_pair(id_pair, id_new)); + } + } + + SequentialInt32VectorReader ali_reader1(ali_rspecifier1); + RandomAccessInt32VectorReader ali_reader2(ali_rspecifier2); + + Int32VectorWriter ali_writer(ali_wspecifier); + + int32 num_ids = 0, num_err = 0, num_done = 0; + + for (; !ali_reader1.Done(); ali_reader1.Next()) { + const std::string &key = ali_reader1.Key(); + + if (!ali_reader2.HasKey(key)) { + KALDI_WARN << "Could not find second alignment for key " << key + << "in " << ali_rspecifier2; + num_err++; + continue; + } + + const std::vector &alignment1 = ali_reader1.Value(); + const std::vector &alignment2 = ali_reader2.Value(key); + + if (static_cast(alignment1.size()) + - static_cast(alignment2.size()) > length_tolerance) { + KALDI_WARN << "Mismatch in length of alignments in " + << ali_rspecifier1 << " and " << ali_rspecifier2 + << "; " << alignment1.size() << " vs " + << alignment2.size(); + num_err++; + } + + std::vector alignment_out(alignment1.size()); + + for (size_t i = 0; i < alignment1.size(); i++) { + std::pair id_pair = std::make_pair( + alignment1[i], alignment2[i]); + + std::map, int32>::iterator it = + mapping.lower_bound(id_pair); + + int32 id_new = -1; + if (!mapping_rxfilename.empty()) { + if (it == mapping.end() || it->first != id_pair) { + KALDI_ERR << "Could not find id-pair (" << id_pair.first + << ", " << id_pair.second + << ") in mapping " << mapping_rxfilename; + } + id_new = it->second; + } else { + if (it == mapping.end() || it->first != id_pair) { + id_new = ++num_ids; + mapping.insert(it, std::make_pair(id_pair, id_new)); + } else { + id_new = it->second; + } + } + + alignment_out[i] = id_new; + } + + ali_writer.Write(key, alignment_out); + num_done++; + } + + KALDI_LOG << "Intersected " << num_done << " int vector pairs; " + << "failed with " << num_err; + + return ((num_done > 0 && num_err < num_done) ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + From 71f0de65b482c419d18707ca3b5bc6ebdd7978e2 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:15:09 -0500 Subject: [PATCH 361/530] asr_diarization: Clustering using IB --- .../segmentation-cluster-adjacent-segments.cc | 290 ++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 src/segmenterbin/segmentation-cluster-adjacent-segments.cc diff --git a/src/segmenterbin/segmentation-cluster-adjacent-segments.cc b/src/segmenterbin/segmentation-cluster-adjacent-segments.cc new file mode 100644 index 00000000000..812785ac5e6 --- /dev/null +++ b/src/segmenterbin/segmentation-cluster-adjacent-segments.cc @@ -0,0 +1,290 @@ +// segmenterbin/segmentation-merge.cc + +// Copyright 2017 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "segmenter/segmentation-utils.h" +#include "tree/clusterable-classes.h" + +namespace kaldi { +namespace segmenter { + +BaseFloat Distance(const Segment &seg1, const Segment &seg2, + const MatrixBase &feats, + BaseFloat var_floor, + int32 length_tolerance = 2) { + int32 start1 = seg1.start_frame; + int32 end1 = seg1.end_frame; + + int32 start2 = seg2.start_frame; + int32 end2 = seg2.end_frame; + + if (end1 > feats.NumRows() + length_tolerance) { + KALDI_ERR << "Segment end > feature length; " << end1 + << " vs " << feats.NumRows(); + } + + GaussClusterable stats1(feats.NumCols(), var_floor); + for (int32 i = start1; i < std::min(end1, feats.NumRows()); i++) { + stats1.AddStats(feats.Row(i)); + } + Vector means1(stats1.x_stats()); + means1.Scale(1.0 / stats1.count()); + Vector vars1(stats1.x2_stats()); + vars1.Scale(1.0 / stats1.count()); + vars1.AddVec2(-1.0, means1); + vars1.ApplyFloor(var_floor); + + GaussClusterable stats2(feats.NumCols(), var_floor); + for (int32 i = start2; i < std::min(end2, feats.NumRows()); i++) { + stats2.AddStats(feats.Row(i)); + } + Vector means2(stats2.x_stats()); + means2.Scale(1.0 / stats2.count()); + Vector vars2(stats2.x2_stats()); + vars2.Scale(1.0 / stats2.count()); + vars2.AddVec2(-1.0, means2); + vars2.ApplyFloor(var_floor); + + double ans = 0.0; + for (int32 i = 0; i < feats.NumCols(); i++) { + ans += (vars1(i) / vars2(i) + vars2(i) / vars1(i) + + (means2(i) - means1(i)) * (means2(i) - means1(i)) + * (1.0 / vars1(i) + 1.0 / vars2(i))); + } + + return ans; +} + +int32 ClusterAdjacentSegments(const MatrixBase &feats, + BaseFloat absolute_distance_threshold, + BaseFloat delta_distance_threshold, + BaseFloat var_floor, + int32 length_tolerance, + Segmentation *segmentation) { + if (segmentation->Dim() == 1) { + segmentation->Begin()->SetLabel(1); + return 1; + } + + SegmentList::iterator it = segmentation->Begin(), + next_it = segmentation->Begin(); + ++next_it; + + BaseFloat prev_dist = Distance(*it, *next_it, feats, + var_floor, length_tolerance); + + if (segmentation->Dim() == 2) { + it->SetLabel(1); + if (prev_dist < absolute_distance_threshold * feats.NumCols()) { + // Similar segments merged. + next_it->SetLabel(it->Label()); + } else { + // Segments not merged. + next_it->SetLabel(it->Label() + 1); + } + + return next_it->Label();; + } + + ++it; + ++next_it; + bool next_segment_is_new_cluster = false; + + for (; next_it != segmentation->End(); ++it, ++next_it) { + SegmentList::iterator prev_it(it); + --prev_it; + + // Compute distance between this and next segment. + BaseFloat dist = Distance(*it, *next_it, feats, var_floor, + length_tolerance); + + // Possibly merge current segment if previous. + if (next_segment_is_new_cluster || + (prev_it->end_frame + 1 >= it->start_frame && + prev_dist < absolute_distance_threshold * feats.NumCols())) { + // Previous and current segment are next to each other. + // Merge current segment with previous. + it->SetLabel(prev_it->Label()); + + KALDI_VLOG(3) << "Merging clusters " << *prev_it << " and " << *it + << " ; dist = " << prev_dist; + } else { + it->SetLabel(prev_it->Label() + 1); + KALDI_VLOG(3) << "Not merging merging cluster " << *prev_it + << " and " << *it << " ; dist = " << prev_dist; + } + + // Decide if the current segment must be merged with next. + if (prev_it->end_frame + 1 >= it->start_frame && + it->end_frame + 1 >= next_it->start_frame) { + // All 3 segments are adjacent. + if (dist - prev_dist > delta_distance_threshold * feats.NumCols()) { + // Next segment is very different from the current and previous segment. + // So create a new cluster for the next segment. + next_segment_is_new_cluster = true; + } else { + next_segment_is_new_cluster = false; + } + } + + prev_dist = dist; + } + + SegmentList::iterator prev_it(it); + --prev_it; + if (next_segment_is_new_cluster || + (prev_it->end_frame + 1 >= it->start_frame && + prev_dist < absolute_distance_threshold * feats.NumCols())) { + // Merge current segment with previous. + it->SetLabel(prev_it->Label()); + + KALDI_VLOG(3) << "Merging clusters " << *prev_it << " and " << *it + << " ; dist = " << prev_dist; + } else { + it->SetLabel(prev_it->Label() + 1); + } + + return it->Label(); +} + +} // end segmenter +} // end kaldi + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace segmenter; + + const char *usage = + "Merge adjacent segments that are similar to each other.\n" + "\n" + "Usage: segmentation-cluster-adjacent-segments [options] " + " \n" + " e.g.: segmentation-cluster-adjacent-segments ark:foo.seg ark:feats.ark ark,t:-\n" + "See also: segmentation-merge, segmentation-merge-recordings, " + "segmentation-post-process --merge-labels\n"; + + bool binary = true; + int32 length_tolerance = 2; + BaseFloat var_floor = 0.01; + BaseFloat absolute_distance_threshold = 3.0; + BaseFloat delta_distance_threshold = 0.2; + + ParseOptions po(usage); + + po.Register("binary", &binary, + "Write in binary mode " + "(only relevant if output is a wxfilename)"); + po.Register("length-tolerance", &length_tolerance, + "Tolerate length difference between segmentation and " + "features if its less than this many frames."); + po.Register("variance-floor", &var_floor, + "Variance floor of Gaussians used in computing distances " + "for clustering."); + po.Register("absolute-distance-threshold", &absolute_distance_threshold, + "Maximum per-dim distance below which segments will not be " + "be merged."); + po.Register("delta-distance-threshold", &delta_distance_threshold, + "If the delta-distance is below this value, then the " + "adjacent segments will not be merged."); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string segmentation_in_fn = po.GetArg(1), + feats_in_fn = po.GetArg(2), + segmentation_out_fn = po.GetArg(3); + + // all these "fn"'s are either rspecifiers or filenames. + bool in_is_rspecifier = + (ClassifyRspecifier(segmentation_in_fn, NULL, NULL) + != kNoRspecifier), + out_is_wspecifier = + (ClassifyWspecifier(segmentation_out_fn, NULL, NULL, NULL) + != kNoWspecifier); + + if (in_is_rspecifier != out_is_wspecifier) + KALDI_ERR << "Cannot mix regular files and archives"; + + if (!in_is_rspecifier) { + Segmentation segmentation; + ReadKaldiObject(segmentation_in_fn, &segmentation); + + Matrix feats; + ReadKaldiObject(feats_in_fn, &feats); + + Sort(&segmentation); + int32 num_clusters = ClusterAdjacentSegments( + feats, absolute_distance_threshold, delta_distance_threshold, + var_floor, length_tolerance, + &segmentation); + + KALDI_LOG << "Clustered segments; got " << num_clusters << " clusters."; + WriteKaldiObject(segmentation, segmentation_out_fn, binary); + + return 0; + } else { + int32 num_done = 0, num_err = 0; + + SequentialSegmentationReader segmentation_reader(segmentation_in_fn); + RandomAccessBaseFloatMatrixReader feats_reader(feats_in_fn); + SegmentationWriter segmentation_writer(segmentation_out_fn); + + for (; !segmentation_reader.Done(); segmentation_reader.Next()) { + Segmentation segmentation(segmentation_reader.Value()); + const std::string &key = segmentation_reader.Key(); + + if (!feats_reader.HasKey(key)) { + KALDI_WARN << "Could not find key " << key << " in " + << "feats-rspecifier " << feats_in_fn; + num_err++; + continue; + } + + const MatrixBase &feats = feats_reader.Value(key); + + Sort(&segmentation); + int32 num_clusters = ClusterAdjacentSegments( + feats, absolute_distance_threshold, delta_distance_threshold, + var_floor, length_tolerance, + &segmentation); + KALDI_VLOG(2) << "For key " << key << ", got " << num_clusters + << " clusters."; + + segmentation_writer.Write(key, segmentation); + num_done++; + } + + KALDI_LOG << "Clustered segments from " << num_done << " recordings " + << "failed with " << num_err; + return (num_done != 0 ? 0 : 1); + } + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + From 120ac02034b2e748f050279309bbd40601a1d348 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:15:22 -0500 Subject: [PATCH 362/530] asr_diarization: aib cluster --- src/segmenterbin/agglomerative-cluster-ib.cc | 160 +++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 src/segmenterbin/agglomerative-cluster-ib.cc diff --git a/src/segmenterbin/agglomerative-cluster-ib.cc b/src/segmenterbin/agglomerative-cluster-ib.cc new file mode 100644 index 00000000000..489b24c24bc --- /dev/null +++ b/src/segmenterbin/agglomerative-cluster-ib.cc @@ -0,0 +1,160 @@ +// segmenterbin/agglomerative-cluster-ib.cc + +// Copyright 2017 Vimal Manohar (Johns Hopkins University) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "tree/cluster-utils.h" +#include "segmenter/information-bottleneck-cluster-utils.h" + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + + const char *usage = + "Cluster per-utterance probability distributions of " + "relevance variables using Information Bottleneck principle.\n" + "Usage: agglomerative-cluster-ib [options] " + " \n" + " e.g.: agglomerative-cluster-ib ark:avg_post.1.ark " + "ark,t:data/dev/reco2utt ark,t:labels.txt"; + + ParseOptions po(usage); + + InformationBottleneckClustererOptions opts; + + std::string reco2num_clusters_rspecifier; + std::string counts_rspecifier; + int32 junk_label = -2; + BaseFloat max_merge_thresh = std::numeric_limits::max(); + int32 min_clusters = 1; + + po.Register("reco2num-clusters-rspecifier", &reco2num_clusters_rspecifier, + "If supplied, clustering creates exactly this many clusters " + "for the corresponding recording."); + po.Register("counts-rspecifier", &counts_rspecifier, + "The counts for each of the initial segments. If not specified " + "the count is taken to be 1 for each segment."); + po.Register("junk-label", &junk_label, + "Assign this label to utterances that could not be clustered"); + po.Register("max-merge-thresh", &max_merge_thresh, + "Threshold on cost change from merging clusters; clusters " + "won't be merged if the cost is more than this."); + po.Register("min-clusters", &min_clusters, + "Mininum number of clusters desired; we'll stop merging " + "after reaching this number."); + + opts.Register(&po); + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + + std::string relevance_prob_rspecifier = po.GetArg(1), + reco2utt_rspecifier = po.GetArg(2), + label_wspecifier = po.GetArg(3); + + RandomAccessBaseFloatVectorReader relevance_prob_reader( + relevance_prob_rspecifier); + SequentialTokenVectorReader reco2utt_reader(reco2utt_rspecifier); + RandomAccessInt32Reader reco2num_clusters_reader( + reco2num_clusters_rspecifier); + Int32Writer label_writer(label_wspecifier); + RandomAccessBaseFloatReader counts_reader(counts_rspecifier); + + int32 count = 1, num_utt_err = 0, num_reco_err = 0, num_done = 0, + num_reco = 0; + + for (; !reco2utt_reader.Done(); reco2utt_reader.Next()) { + const std::vector &uttlist = reco2utt_reader.Value(); + const std::string &reco = reco2utt_reader.Key(); + + std::vector points; + points.reserve(uttlist.size()); + + int32 id = 0; + for (std::vector::const_iterator it = uttlist.begin(); + it != uttlist.end(); ++it, id++) { + if (!relevance_prob_reader.HasKey(*it)) { + KALDI_WARN << "Could not find relevance probability distribution " + << "for utterance " << *it << " in archive " + << relevance_prob_rspecifier; + num_utt_err++; + continue; + } + + if (!counts_rspecifier.empty()) { + if (!counts_reader.HasKey(*it)) { + KALDI_WARN << "Could not find counts for utterance " << *it; + num_utt_err++; + continue; + } + count = counts_reader.Value(*it); + } + + const Vector& relevance_prob = + relevance_prob_reader.Value(*it); + + points.push_back( + new InformationBottleneckClusterable(id, count, relevance_prob)); + num_done++; + } + + std::vector clusters_out; + std::vector assignments_out; + + int32 this_num_clusters = min_clusters; + + if (!reco2num_clusters_rspecifier.empty()) { + if (!reco2num_clusters_reader.HasKey(reco)) { + KALDI_WARN << "Could not find num-clusters for recording " + << reco; + num_reco_err++; + } else { + this_num_clusters = reco2num_clusters_reader.Value(reco); + } + } + + IBClusterBottomUp(points, opts, max_merge_thresh, this_num_clusters, + NULL, &assignments_out); + + for (int32 i = 0; i < points.size(); i++) { + InformationBottleneckClusterable* point + = static_cast (points[i]); + int32 id = point->Counts().begin()->first; + const std::string &utt = uttlist[id]; + label_writer.Write(utt, assignments_out[i] + 1); + } + + DeletePointers(&points); + num_reco++; + } + + KALDI_LOG << "Clustered " << num_done << " segments from " + << num_reco << " recordings; failed with " + << num_utt_err << " segments and " + << num_reco_err << " recordings."; + + return (num_done > 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} From 932073b2a537e5e79d330b6d52b3bc3af93a0b7e Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:16:02 -0500 Subject: [PATCH 363/530] asr_diarization: LSTM SAD music --- .../tuning/train_lstm_sad_music_1a.sh | 267 ++++++++++++++++ .../tuning/train_lstm_sad_music_1b.sh | 265 ++++++++++++++++ .../tuning/train_lstm_sad_music_1c.sh | 265 ++++++++++++++++ .../tuning/train_lstm_sad_music_1e.sh | 269 ++++++++++++++++ .../tuning/train_lstm_sad_music_1f.sh | 291 ++++++++++++++++++ 5 files changed, 1357 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1a.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1b.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1c.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1e.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1f.sh diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1a.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1a.sh new file mode 100644 index 00000000000..4f0754d8355 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1a.sh @@ -0,0 +1,267 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1e, but removes the stats component in the 3rd layer. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=80 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +music_data_dir=data/train_aztec_unsad_whole_music_corrupted_sp_hires_bp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn5 + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25)"` input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn5 + + output name=output-temp input=Append(input@-2,input@-1,input,input@1,input@2) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1b.sh new file mode 100644 index 00000000000..cbbb016607a --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1b.sh @@ -0,0 +1,265 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1e, but removes the stats component in the 3rd layer. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=80 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +music_data_dir=data/train_aztec_unsad_whole_music_corrupted_sp_hires_bp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25) / $num_snr_bins"` input=tdnn3 + output-layer name=output-speech include-log-softmax=true dim=2 objective-scale=`perl -e "print (($num_frames_music / $num_frames_sad) ** 0.25)"` input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1c.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1c.sh new file mode 100644 index 00000000000..53c2a7a47ac --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1c.sh @@ -0,0 +1,265 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1e, but removes the stats component in the 3rd layer. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=80 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +sad_data_dir=data/train_aztec_unsad_whole_corrupted_sp_hires_bp_2400 +music_data_dir=data/train_aztec_unsad_whole_music_corrupted_sp_hires_bp + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=`perl -e "print $speech_scale / $num_snr_bins"` input=tdnn3 + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_feat.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"extract-column --column-index=0 scp:- ark,t:- | steps/segmentation/quantize_vector.pl | ali-to-post ark,t:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_feat.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1e.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1e.sh new file mode 100644 index 00000000000..dfb1297c895 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1e.sh @@ -0,0 +1,269 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + #--targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1f.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1f.sh new file mode 100644 index 00000000000..782a31132c6 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1f.sh @@ -0,0 +1,291 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1b + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + +cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + #--targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + From 53b7649c38555d17ccee7fada57f91f9e1fac94a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:17:13 -0500 Subject: [PATCH 364/530] asr_diarization: segmentation configs --- egs/ami/s5b/conf/segmentation_speech.conf | 14 ++++++++++++++ egs/aspire/s5/conf/segmentation_speech_simple.conf | 14 ++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 egs/ami/s5b/conf/segmentation_speech.conf create mode 100644 egs/aspire/s5/conf/segmentation_speech_simple.conf diff --git a/egs/ami/s5b/conf/segmentation_speech.conf b/egs/ami/s5b/conf/segmentation_speech.conf new file mode 100644 index 00000000000..c4c75b212fc --- /dev/null +++ b/egs/ami/s5b/conf/segmentation_speech.conf @@ -0,0 +1,14 @@ +# General segmentation options +pad_length=20 # Pad speech segments by this many frames on either side +max_relabel_length=10 # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=30 # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=10 # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=1000 # Segments that are longer than this are split into + # overlapping frames. +overlap_length=250 # Overlapping frames when segments are split. + # See the above option. +min_silence_length=20 # Min silence length at which to split very long segments diff --git a/egs/aspire/s5/conf/segmentation_speech_simple.conf b/egs/aspire/s5/conf/segmentation_speech_simple.conf new file mode 100644 index 00000000000..56c178c8115 --- /dev/null +++ b/egs/aspire/s5/conf/segmentation_speech_simple.conf @@ -0,0 +1,14 @@ +# General segmentation options +pad_length=20 # Pad speech segments by this many frames on either side +max_relabel_length=-1 # Maximum duration of speech that will be removed as part + # of smoothing process. This is only if there are no other + # speech segments nearby. +max_intersegment_length=30 # Merge nearby speech segments if the silence + # between them is less than this many frames. +post_pad_length=-1 # Pad speech segments by this many frames on either side + # after the merging process using max_intersegment_length +max_segment_length=1000 # Segments that are longer than this are split into + # overlapping frames. +overlap_length=250 # Overlapping frames when segments are split. + # See the above option. +min_silence_length=20 # Min silence length at which to split very long segments From cb8c7187f62506079a09f1eb5e744fcf21f715c3 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:17:59 -0500 Subject: [PATCH 365/530] An old version of resolve_ctm_overlaps --- egs/wsj/s5/steps/resolve_ctm_overlaps.py.old | 149 +++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100755 egs/wsj/s5/steps/resolve_ctm_overlaps.py.old diff --git a/egs/wsj/s5/steps/resolve_ctm_overlaps.py.old b/egs/wsj/s5/steps/resolve_ctm_overlaps.py.old new file mode 100755 index 00000000000..aaee767e7e4 --- /dev/null +++ b/egs/wsj/s5/steps/resolve_ctm_overlaps.py.old @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# Copyright 2014 Johns Hopkins University (Authors: Daniel Povey, Vijayaditya Peddinti). +# 2016 Vimal Manohar +# Apache 2.0. + +# Script to combine ctms with overlapping segments + +import sys, math, numpy as np, argparse +break_threshold = 0.01 + +def ReadSegments(segments_file): + segments = {} + for line in open(segments_file).readlines(): + parts = line.strip().split() + segments[parts[0]] = (parts[1], float(parts[2]), float(parts[3])) + return segments + +#def get_breaks(ctm, prev_end): +# breaks = [] +# for i in xrange(0, len(ctm)): +# if ctm[i][2] - prev_end > break_threshold: +# breaks.append([i, ctm[i][2]]) +# prev_end = ctm[i][2] + ctm[i][3] +# return np.array(breaks) + +# Resolve overlaps within segments of the same recording +def ResolveOverlaps(ctms, segments): + total_ctm = [] + if len(ctms) == 0: + raise Exception('Something wrong with the input ctms') + + next_utt = ctms[0][0][0] + for ctm_index in range(len(ctms) - 1): + # Assumption here is that the segments are written in consecutive order? + cur_ctm = ctms[ctm_index] + next_ctm = ctms[ctm_index + 1] + + cur_utt = next_utt + next_utt = next_ctm[0][0] + if (next_utt not in segments): + raise Exception('Could not find utterance %s in segments' % next_utt) + + if len(cur_ctm) > 0: + assert(cur_utt == cur_ctm[0][0]) + + assert(next_utt > cur_utt) + if (cur_utt not in segments): + raise Exception('Could not find utterance %s in segments' % cur_utt) + + # length of this segment + window_length = segments[cur_utt][2] - segments[cur_utt][1] + + # overlap of this segment with the next segment + # Note: It is possible for this to be negative when there is actually + # no overlap between consecutive segments. + overlap = segments[cur_utt][2] - segments[next_utt][1] + + # find the breaks after overlap starts + index = len(cur_ctm) + + for i in xrange(len(cur_ctm)): + if (cur_ctm[i][2] + cur_ctm[i][3]/2.0 > (window_length - overlap/2.0)): + # if midpoint of a hypothesis word is beyond the midpoint of the + # overlap region + index = i + break + + # Ignore the hypotheses beyond this midpoint. They will be considered as + # part of the next segment. + total_ctm += cur_ctm[:index] + + # Ignore the hypotheses of the next utterance that overlaps with the + # current utterance + index = -1 + for i in xrange(len(next_ctm)): + if (next_ctm[i][2] + next_ctm[i][3]/2.0 > (overlap/2.0)): + index = i + break + + if index >= 0: + ctms[ctm_index + 1] = next_ctm[index:] + else: + ctms[ctm_index + 1] = [] + + # merge the last ctm entirely + total_ctm += ctms[-1] + + return total_ctm + +def ReadCtm(ctm_file_lines, segments): + ctms = {} + for key in [ x[0] for x in segments.values() ]: + ctms[key] = [] + + ctm = [] + prev_utt = ctm_file_lines[0].split()[0] + for line in ctm_file_lines: + parts = line.split() + if (prev_utt == parts[0]): + ctm.append([parts[0], parts[1], float(parts[2]), + float(parts[3])] + parts[4:]) + else: + # New utterance. Append the previous utterance's CTM + # into the list for the utterance's recording + ctms[segments[ctm[0][0]][0]].append(ctm) + + assert(parts[0] > prev_utt) + + prev_utt = parts[0] + ctm = [] + ctm.append([parts[0], parts[1], float(parts[2]), + float(parts[3])] + parts[4:]) + + # append the last ctm + ctms[segments[ctm[0][0]][0]].append(ctm) + return ctms + +def WriteCtm(ctm_lines, out_file): + for line in ctm_lines: + out_file.write("{0} {1} {2} {3} {4}\n".format(line[0], line[1], line[2], line[3], " ".join(line[4:]))) + +if __name__ == "__main__": + usage = """ Python script to resolve overlaps in ctms """ + parser = argparse.ArgumentParser(usage) + parser.add_argument('segments', type=str, help = 'use segments to resolve overlaps') + parser.add_argument('ctm_in', type=str, help='input_ctm_file') + parser.add_argument('ctm_out', type=str, help='output_ctm_file') + params = parser.parse_args() + + if params.ctm_in == "-": + params.ctm_in = sys.stdin + else: + params.ctm_in = open(params.ctm_in) + if params.ctm_out == "-": + params.ctm_out = sys.stdout + else: + params.ctm_out = open(params.ctm_out, 'w') + + segments = ReadSegments(params.segments) + + # Read CTMs into a dictionary indexed by the recording + ctms = ReadCtm(params.ctm_in.readlines(), segments) + + for key in sorted(ctms.keys()): + # Process CTMs in the sorted order of recordings + ctm_reco = ctms[key] + ctm_reco = ResolveOverlaps(ctm_reco, segments) + WriteCtm(ctm_reco, params.ctm_out) + params.ctm_out.close() From 58dc6a6955241c5a1841ab52fc5335c578224e44 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:18:43 -0500 Subject: [PATCH 366/530] asr_diarization: Add steps/data/make_corrupted_data_dir.py --- .../s5/steps/data/make_corrupted_data_dir.py | 613 ++++++++++++++++++ 1 file changed, 613 insertions(+) create mode 100644 egs/wsj/s5/steps/data/make_corrupted_data_dir.py diff --git a/egs/wsj/s5/steps/data/make_corrupted_data_dir.py b/egs/wsj/s5/steps/data/make_corrupted_data_dir.py new file mode 100644 index 00000000000..c0fa94c2a42 --- /dev/null +++ b/egs/wsj/s5/steps/data/make_corrupted_data_dir.py @@ -0,0 +1,613 @@ +#!/usr/bin/env python +# Copyright 2016 Tom Ko +# Apache 2.0 +# script to generate reverberated data + +# we're using python 3.x style print but want it to work in python 2.x, +from __future__ import print_function +import argparse, shlex, glob, math, os, random, sys, warnings, copy, imp, ast + +import data_dir_manipulation_lib as data_lib + +sys.path.insert(0, 'steps') +import libs.common as common_lib + +def GetArgs(): + # we add required arguments as named arguments for readability + parser = argparse.ArgumentParser(description="Reverberate the data directory with an option " + "to add isotropic and point source noises. " + "Usage: reverberate_data_dir.py [options...] " + "E.g. reverberate_data_dir.py --rir-set-parameters rir_list " + "--foreground-snrs 20:10:15:5:0 --background-snrs 20:10:15:5:0 " + "--noise-list-file noise_list --speech-rvb-probability 1 --num-replications 2 " + "--random-seed 1 data/train data/train_rvb", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument("--rir-set-parameters", type=str, action='append', required = True, dest = "rir_set_para_array", + help="Specifies the parameters of an RIR set. " + "Supports the specification of mixture_weight and rir_list_file_name. The mixture weight is optional. " + "The default mixture weight is the probability mass remaining after adding the mixture weights " + "of all the RIR lists, uniformly divided among the RIR lists without mixture weights. " + "E.g. --rir-set-parameters '0.3, rir_list' or 'rir_list' " + "the format of the RIR list file is " + "--rir-id --room-id " + "--receiver-position-id --source-position-id " + "--rt-60 --drr location " + "E.g. --rir-id 00001 --room-id 001 --receiver-position-id 001 --source-position-id 00001 " + "--rt60 0.58 --drr -4.885 data/impulses/Room001-00001.wav") + parser.add_argument("--noise-set-parameters", type=str, action='append', + default = None, dest = "noise_set_para_array", + help="Specifies the parameters of an noise set. " + "Supports the specification of mixture_weight and noise_list_file_name. The mixture weight is optional. " + "The default mixture weight is the probability mass remaining after adding the mixture weights " + "of all the noise lists, uniformly divided among the noise lists without mixture weights. " + "E.g. --noise-set-parameters '0.3, noise_list' or 'noise_list' " + "the format of the noise list file is " + "--noise-id --noise-type " + "--bg-fg-type " + "--room-linkage " + "location " + "E.g. --noise-id 001 --noise-type isotropic --rir-id 00019 iso_noise.wav") + parser.add_argument("--speech-segments-set-parameters", type=str, action='append', + default = None, dest = "speech_segments_set_para_array", + help="Specifies the speech segments for overlapped speech generation.\n" + "Format: [], wav_scp, segments_list\n"); + parser.add_argument("--num-replications", type=int, dest = "num_replicas", default = 1, + help="Number of replicate to generated for the data") + parser.add_argument('--foreground-snrs', type=str, dest = "foreground_snr_string", + default = '20:10:0', + help='When foreground noises are being added the script will iterate through these SNRs.') + parser.add_argument('--background-snrs', type=str, dest = "background_snr_string", + default = '20:10:0', + help='When background noises are being added the script will iterate through these SNRs.') + parser.add_argument('--overlap-snrs', type=str, dest = "overlap_snr_string", + default = "20:10:0", + help='When overlapping speech segments are being added the script will iterate through these SNRs.') + parser.add_argument('--prefix', type=str, default = None, + help='This prefix will modified for each reverberated copy, by adding additional affixes.') + parser.add_argument("--speech-rvb-probability", type=float, default = 1.0, + help="Probability of reverberating a speech signal, e.g. 0 <= p <= 1") + parser.add_argument("--pointsource-noise-addition-probability", type=float, default = 1.0, + help="Probability of adding point-source noises, e.g. 0 <= p <= 1") + parser.add_argument("--isotropic-noise-addition-probability", type=float, default = 1.0, + help="Probability of adding isotropic noises, e.g. 0 <= p <= 1") + parser.add_argument("--overlapping-speech-addition-probability", type=float, default = 1.0, + help="Probability of adding overlapping speech, e.g. 0 <= p <= 1") + parser.add_argument("--rir-smoothing-weight", type=float, default = 0.3, + help="Smoothing weight for the RIR probabilties, e.g. 0 <= p <= 1. If p = 0, no smoothing will be done. " + "The RIR distribution will be mixed with a uniform distribution according to the smoothing weight") + parser.add_argument("--noise-smoothing-weight", type=float, default = 0.3, + help="Smoothing weight for the noise probabilties, e.g. 0 <= p <= 1. If p = 0, no smoothing will be done. " + "The noise distribution will be mixed with a uniform distribution according to the smoothing weight") + parser.add_argument("--overlapping-speech-smoothing-weight", type=float, default = 0.3, + help="The overlapping speech distribution will be mixed with a uniform distribution according to the smoothing weight") + parser.add_argument("--max-noises-per-minute", type=int, default = 2, + help="This controls the maximum number of point-source noises that could be added to a recording according to its duration") + parser.add_argument("--min-overlapping-segments-per-minute", type=int, default = 1, + help="This controls the minimum number of overlapping segments of speech that could be added to a recording per minute") + parser.add_argument("--max-overlapping-segments-per-minute", type=int, default = 5, + help="This controls the maximum number of overlapping segments of speech that could be added to a recording per minute") + parser.add_argument('--random-seed', type=int, default=0, + help='seed to be used in the randomization of impulses and noises') + parser.add_argument("--shift-output", type=str, + help="If true, the reverberated waveform will be shifted by the amount of the peak position of the RIR", + choices=['true', 'false'], default = "true") + parser.add_argument('--source-sampling-rate', type=int, default=None, + help="Sampling rate of the source data. If a positive integer is specified with this option, " + "the RIRs/noises will be resampled to the rate of the source data.") + parser.add_argument("--include-original-data", type=str, help="If true, the output data includes one copy of the original data", + choices=['true', 'false'], default = "false") + parser.add_argument("--output-additive-noise-dir", type=str, + action = common_lib.NullstrToNoneAction, default = None, + help="Output directory corresponding to the additive noise part of the data corruption") + parser.add_argument("--output-reverb-dir", type=str, + action = common_lib.NullstrToNoneAction, default = None, + help="Output directory corresponding to the reverberated signal part of the data corruption") + + parser.add_argument("input_dir", + help="Input data directory") + parser.add_argument("output_dir", + help="Output data directory") + + print(' '.join(sys.argv)) + + args = parser.parse_args() + args = CheckArgs(args) + + return args + +def CheckArgs(args): + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + + ## Check arguments. + + if args.prefix is None: + if args.num_replicas > 1 or args.include_original_data == "true": + args.prefix = "rvb" + warnings.warn("--prefix is set to 'rvb' as more than one copy of data is generated") + + if args.output_reverb_dir is not None: + if not os.path.exists(args.output_reverb_dir): + os.makedirs(args.output_reverb_dir) + + if args.output_additive_noise_dir is not None: + if not os.path.exists(args.output_additive_noise_dir): + os.makedirs(args.output_additive_noise_dir) + + ## Check arguments. + + if args.num_replicas > 1 and args.prefix is None: + args.prefix = "rvb" + warnings.warn("--prefix is set to 'rvb' as --num-replications is larger than 1.") + + if not args.num_replicas > 0: + raise Exception("--num-replications cannot be non-positive") + + if args.speech_rvb_probability < 0 or args.speech_rvb_probability > 1: + raise Exception("--speech-rvb-probability must be between 0 and 1") + + if args.pointsource_noise_addition_probability < 0 or args.pointsource_noise_addition_probability > 1: + raise Exception("--pointsource-noise-addition-probability must be between 0 and 1") + + if args.isotropic_noise_addition_probability < 0 or args.isotropic_noise_addition_probability > 1: + raise Exception("--isotropic-noise-addition-probability must be between 0 and 1") + + if args.overlapping_speech_addition_probability < 0 or args.overlapping_speech_addition_probability > 1: + raise Exception("--overlapping-speech-addition-probability must be between 0 and 1") + + if args.rir_smoothing_weight < 0 or args.rir_smoothing_weight > 1: + raise Exception("--rir-smoothing-weight must be between 0 and 1") + + if args.noise_smoothing_weight < 0 or args.noise_smoothing_weight > 1: + raise Exception("--noise-smoothing-weight must be between 0 and 1") + + if args.overlapping_speech_smoothing_weight < 0 or args.overlapping_speech_smoothing_weight > 1: + raise Exception("--overlapping-speech-smoothing-weight must be between 0 and 1") + + if args.max_noises_per_minute < 0: + raise Exception("--max-noises-per-minute cannot be negative") + + if args.min_overlapping_segments_per_minute < 0: + raise Exception("--min-overlapping-segments-per-minute cannot be negative") + + if args.max_overlapping_segments_per_minute < 0: + raise Exception("--max-overlapping-segments-per-minute cannot be negative") + + return args + +def ParseSpeechSegmentsList(speech_segments_set_para_array, smoothing_weight): + set_list = [] + for set_para in speech_segments_set_para_array: + set = lambda: None + setattr(set, "wav_scp", None) + setattr(set, "segments", None) + setattr(set, "probability", None) + parts = set_para.split(',') + if len(parts) == 3: + set.probability = float(parts[0]) + set.wav_scp = parts[1].strip() + set.segments = parts[2].strip() + else: + set.wav_scp = parts[0].strip() + set.segments = parts[1].strip() + if not os.path.isfile(set.wav_scp): + raise Exception(set.wav_scp + " not found") + if not os.path.isfile(set.segments): + raise Exception(set.segments + " not found") + set_list.append(set) + + data_lib.SmoothProbabilityDistribution(set_list) + + segments_list = [] + for segments_set in set_list: + current_segments_list = [] + + wav_dict = {} + for s in open(segments_set.wav_scp): + parts = s.strip().split() + wav_dict[parts[0]] = ' '.join(parts[1:]) + + for s in open(segments_set.segments): + parts = s.strip().split() + current_segment = argparse.Namespace() + current_segment.utt_id = parts[0] + current_segment.probability = None + + start_time = float(parts[2]) + end_time = float(parts[3]) + + current_segment.duration = (end_time - start_time) + + wav_rxfilename = wav_dict[parts[1]] + if wav_rxfilename.split()[-1] == '|': + current_segment.wav_rxfilename = "{0} sox -t wav - -t wav - trim {1} {2} |".format(wav_rxfilename, start_time, end_time - start_time) + else: + current_segment.wav_rxfilename = "sox {0} -t wav - trim {1} {2} |".format(wav_rxfilename, start_time, end_time - start_time) + + current_segments_list.append(current_segment) + + segments_list += data_lib.SmoothProbabilityDistribution(current_segments_list, smoothing_weight, segments_set.probability) + + return segments_list + +def AddOverlappingSpeech(room, # the room selected + speech_segments_list, # the speech list + overlapping_speech_addition_probability, # Probability of another speech waveform + snrs, # the SNR for adding the foreground speech + speech_dur, # duration of the recording + min_overlapping_speech_segments, # Minimum number of speech signals that can be added + max_overlapping_speech_segments, # Maximum number of speech signals that can be added + overlapping_speech_descriptor # descriptor to store the information of the overlapping speech + ): + if (len(speech_segments_list) > 0 and random.random() < overlapping_speech_addition_probability + and max_overlapping_speech_segments >= 1): + for k in range(1, random.randint(min_overlapping_speech_segments, max_overlapping_speech_segments) + 1): + # pick the overlapping_speech speech signal and the RIR to + # reverberate the overlapping_speech speech signal + speech_segment = data_lib.PickItemWithProbability(speech_segments_list) + rir = data_lib.PickItemWithProbability(room.rir_list) + + speech_rvb_command = """wav-reverberate --impulse-response="{0}" --shift-output=true """.format(rir.rir_rspecifier) + overlapping_speech_descriptor['start_times'].append( + round(random.random() + * max(speech_dur - speech_segment.duration, 0), 2)) + overlapping_speech_descriptor['snrs'].append(snrs.next()) + overlapping_speech_descriptor['utt_ids'].append(speech_segment.utt_id) + overlapping_speech_descriptor['durations'].append(speech_segment.duration) + + if len(speech_segment.wav_rxfilename.split()) == 1: + overlapping_speech_descriptor['speech_segments'].append("{1} {0} - |".format(speech_segment.wav_rxfilename, speech_rvb_command)) + else: + overlapping_speech_descriptor['speech_segments'].append("{0} {1} - - |".format(speech_segment.wav_rxfilename, speech_rvb_command)) + +# This function randomly decides whether to reverberate, and sample a RIR if it does +# It also decides whether to add the appropriate noises +# This function return the string of options to the binary wav-reverberate +def GenerateReverberationAndOverlappedSpeechOpts( + room_dict, # the room dictionary, please refer to MakeRoomDict() for the format + pointsource_noise_list, # the point source noise list + iso_noise_dict, # the isotropic noise dictionary + foreground_snrs, # the SNR for adding the foreground noises + background_snrs, # the SNR for adding the background noises + speech_segments_list, + overlap_snrs, + speech_rvb_probability, # Probability of reverberating a speech signal + isotropic_noise_addition_probability, # Probability of adding isotropic noises + pointsource_noise_addition_probability, # Probability of adding point-source noises + overlapping_speech_addition_probability, # Probability of adding overlapping speech segments + speech_dur, # duration of the recording + max_noises_recording, # Maximum number of point-source noises that can be added + min_overlapping_segments_recording, # Minimum number of overlapping segments that can be added + max_overlapping_segments_recording # Maximum number of overlapping segments that can be added + ): + impulse_response_opts = "" + + noise_addition_descriptor = {'noise_io': [], + 'start_times': [], + 'snrs': [], + 'noise_ids': [], + 'durations': [] + } + + # Randomly select the room + # Here the room probability is a sum of the probabilities of the RIRs recorded in the room. + room = data_lib.PickItemWithProbability(room_dict) + # Randomly select the RIR in the room + speech_rir = data_lib.PickItemWithProbability(room.rir_list) + if random.random() < speech_rvb_probability: + # pick the RIR to reverberate the speech + impulse_response_opts = """--impulse-response="{0}" """.format(speech_rir.rir_rspecifier) + + rir_iso_noise_list = [] + if speech_rir.room_id in iso_noise_dict: + rir_iso_noise_list = iso_noise_dict[speech_rir.room_id] + # Add the corresponding isotropic noise associated with the selected RIR + if len(rir_iso_noise_list) > 0 and random.random() < isotropic_noise_addition_probability: + isotropic_noise = data_lib.PickItemWithProbability(rir_iso_noise_list) + # extend the isotropic noise to the length of the speech waveform + # check if it is really a pipe + if len(isotropic_noise.noise_rspecifier.split()) == 1: + noise_addition_descriptor['noise_io'].append("wav-reverberate --duration={1} {0} - |".format(isotropic_noise.noise_rspecifier, speech_dur)) + else: + noise_addition_descriptor['noise_io'].append("{0} wav-reverberate --duration={1} - - |".format(isotropic_noise.noise_rspecifier, speech_dur)) + noise_addition_descriptor['start_times'].append(0) + noise_addition_descriptor['snrs'].append(background_snrs.next()) + noise_addition_descriptor['noise_ids'].append(isotropic_noise.noise_id) + noise_addition_descriptor['durations'].append(speech_dur) + + data_lib.AddPointSourceNoise(room, # the room selected + pointsource_noise_list, # the point source noise list + pointsource_noise_addition_probability, # Probability of adding point-source noises + foreground_snrs, # the SNR for adding the foreground noises + background_snrs, # the SNR for adding the background noises + speech_dur, # duration of the recording + max_noises_recording, # Maximum number of point-source noises that can be added + noise_addition_descriptor # descriptor to store the information of the noise added + ) + + assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['start_times']) + assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['snrs']) + assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['noise_ids']) + assert len(noise_addition_descriptor['noise_io']) == len(noise_addition_descriptor['durations']) + + overlapping_speech_descriptor = {'speech_segments': [], + 'start_times': [], + 'snrs': [], + 'utt_ids': [], + 'durations': [] + } + + AddOverlappingSpeech(room, + speech_segments_list, # speech segments list + overlapping_speech_addition_probability, + overlap_snrs, + speech_dur, + min_overlapping_segments_recording, + max_overlapping_segments_recording, + overlapping_speech_descriptor + ) + + return [impulse_response_opts, noise_addition_descriptor, + overlapping_speech_descriptor] + +# This is the main function to generate pipeline command for the corruption +# The generic command of wav-reverberate will be like: +# wav-reverberate --duration=t --impulse-response=rir.wav +# --additive-signals='noise1.wav,noise2.wav' --snrs='snr1,snr2' --start-times='s1,s2' input.wav output.wav +def GenerateReverberatedWavScpWithOverlappedSpeech( + wav_scp, # a dictionary whose values are the Kaldi-IO strings of the speech recordings + durations, # a dictionary whose values are the duration (in sec) of the speech recordings + output_dir, # output directory to write the corrupted wav.scp + room_dict, # the room dictionary, please refer to MakeRoomDict() for the format + pointsource_noise_list, # the point source noise list + iso_noise_dict, # the isotropic noise dictionary + foreground_snr_array, # the SNR for adding the foreground noises + background_snr_array, # the SNR for adding the background noises + speech_segments_list, # list of speech segments to create overlapped speech + overlap_snr_array, # the SNR for adding overlapping speech + num_replicas, # Number of replicate to generated for the data + include_original, # include a copy of the original data + prefix, # prefix for the id of the corrupted utterances + speech_rvb_probability, # Probability of reverberating a speech signal + shift_output, # option whether to shift the output waveform + isotropic_noise_addition_probability, # Probability of adding isotropic noises + pointsource_noise_addition_probability, # Probability of adding point-source noises + max_noises_per_minute, # maximum number of point-source noises that can be added to a recording according to its duration + overlapping_speech_addition_probability, + min_overlapping_segments_per_minute, + max_overlapping_segments_per_minute, + output_reverb_dir = None, + output_additive_noise_dir = None + ): + foreground_snrs = data_lib.list_cyclic_iterator(foreground_snr_array) + background_snrs = data_lib.list_cyclic_iterator(background_snr_array) + overlap_snrs = data_lib.list_cyclic_iterator(overlap_snr_array) + + corrupted_wav_scp = {} + reverb_wav_scp = {} + additive_noise_wav_scp = {} + overlapping_segments_info = {} + + keys = wav_scp.keys() + keys.sort() + + if include_original: + start_index = 0 + else: + start_index = 1 + + for i in range(start_index, num_replicas+1): + for recording_id in keys: + wav_original_pipe = wav_scp[recording_id] + # check if it is really a pipe + if len(wav_original_pipe.split()) == 1: + wav_original_pipe = "cat {0} |".format(wav_original_pipe) + speech_dur = durations[recording_id] + max_noises_recording = math.floor(max_noises_per_minute * speech_dur / 60) + min_overlapping_segments_recording = max(math.floor(min_overlapping_segments_per_minute * speech_dur / 60), 1) + max_overlapping_segments_recording = math.ceil(max_overlapping_segments_per_minute * speech_dur / 60) + + [impulse_response_opts, noise_addition_descriptor, + overlapping_speech_descriptor] = GenerateReverberationAndOverlappedSpeechOpts( + room_dict = room_dict, # the room dictionary, please refer to MakeRoomDict() for the format + pointsource_noise_list = pointsource_noise_list, # the point source noise list + iso_noise_dict = iso_noise_dict, # the isotropic noise dictionary + foreground_snrs = foreground_snrs, # the SNR for adding the foreground noises + background_snrs = background_snrs, # the SNR for adding the background noises + speech_segments_list = speech_segments_list, # Speech segments for creating overlapped speech + overlap_snrs = overlap_snrs, # the SNR for adding overlapping speech + speech_rvb_probability = speech_rvb_probability, # Probability of reverberating a speech signal + isotropic_noise_addition_probability = isotropic_noise_addition_probability, # Probability of adding isotropic noises + pointsource_noise_addition_probability = pointsource_noise_addition_probability, # Probability of adding point-source noises + overlapping_speech_addition_probability = overlapping_speech_addition_probability, + speech_dur = speech_dur, # duration of the recording + max_noises_recording = max_noises_recording, # Maximum number of point-source noises that can be added + min_overlapping_segments_recording = min_overlapping_segments_recording, + max_overlapping_segments_recording = max_overlapping_segments_recording + ) + + additive_noise_opts = "" + + if (len(noise_addition_descriptor['noise_io']) > 0 or + len(overlapping_speech_descriptor['speech_segments']) > 0): + additive_noise_opts += ("--additive-signals='{0}' " + .format(',' + .join(noise_addition_descriptor['noise_io'] + + overlapping_speech_descriptor['speech_segments'])) + ) + additive_noise_opts += ("--start-times='{0}' " + .format(',' + .join(map(lambda x:str(x), noise_addition_descriptor['start_times'] + + overlapping_speech_descriptor['start_times']))) + ) + additive_noise_opts += ("--snrs='{0}' " + .format(',' + .join(map(lambda x:str(x), noise_addition_descriptor['snrs'] + + overlapping_speech_descriptor['snrs']))) + ) + + reverberate_opts = impulse_response_opts + additive_noise_opts + + new_recording_id = data_lib.GetNewId(recording_id, prefix, i) + + # prefix using index 0 is reserved for original data e.g. rvb0_swb0035 corresponds to the swb0035 recording in original data + if reverberate_opts == "" or i == 0: + wav_corrupted_pipe = "{0}".format(wav_original_pipe) + else: + wav_corrupted_pipe = "{0} wav-reverberate --shift-output={1} {2} - - |".format(wav_original_pipe, shift_output, reverberate_opts) + + corrupted_wav_scp[new_recording_id] = wav_corrupted_pipe + + if output_reverb_dir is not None: + if impulse_response_opts == "": + wav_reverb_pipe = "{0}".format(wav_original_pipe) + else: + wav_reverb_pipe = "{0} wav-reverberate --shift-output={1} --reverb-out-wxfilename=- {2} - /dev/null |".format(wav_original_pipe, shift_output, reverberate_opts) + reverb_wav_scp[new_recording_id] = wav_reverb_pipe + + if output_additive_noise_dir is not None: + if additive_noise_opts != "": + wav_additive_noise_pipe = "{0} wav-reverberate --shift-output={1} --additive-noise-out-wxfilename=- {2} - /dev/null |".format(wav_original_pipe, shift_output, reverberate_opts) + additive_noise_wav_scp[new_recording_id] = wav_additive_noise_pipe + else: + assert False + + if len(overlapping_speech_descriptor['speech_segments']) > 0: + overlapping_segments_info[new_recording_id] = [ + ':'.join(x) + for x in zip(overlapping_speech_descriptor['utt_ids'], + [ str(x) for x in overlapping_speech_descriptor['start_times'] ], + [ str(x) for x in overlapping_speech_descriptor['durations'] ]) + ] + + data_lib.WriteDictToFile(corrupted_wav_scp, output_dir + "/wav.scp") + + # Write for each new recording, the id, start time and durations + # of the overlapping segments + data_lib.WriteDictToFile(overlapping_segments_info, output_dir + "/overlapped_segments_info.txt") + + if output_reverb_dir is not None: + data_lib.WriteDictToFile(reverb_wav_scp, output_reverb_dir + "/wav.scp") + + if output_additive_noise_dir is not None: + data_lib.WriteDictToFile(additive_noise_wav_scp, output_additive_noise_dir + "/wav.scp") + + +# This function creates multiple copies of the necessary files, e.g. utt2spk, wav.scp ... +def CreateReverberatedCopy(input_dir, + output_dir, + room_dict, # the room dictionary, please refer to MakeRoomDict() for the format + pointsource_noise_list, # the point source noise list + iso_noise_dict, # the isotropic noise dictionary + speech_segments_list, + foreground_snr_string, # the SNR for adding the foreground noises + background_snr_string, # the SNR for adding the background noises + overlap_snr_string, # the SNR for overlapping speech + num_replicas, # Number of replicate to generated for the data + include_original, # include a copy of the original data + prefix, # prefix for the id of the corrupted utterances + speech_rvb_probability, # Probability of reverberating a speech signal + shift_output, # option whether to shift the output waveform + isotropic_noise_addition_probability, # Probability of adding isotropic noises + pointsource_noise_addition_probability, # Probability of adding point-source noises + max_noises_per_minute, # maximum number of point-source noises that can be added to a recording according to its duration + overlapping_speech_addition_probability, + min_overlapping_segments_per_minute, + max_overlapping_segments_per_minute, + output_reverb_dir = None, + output_additive_noise_dir = None + ): + + wav_scp = data_lib.ParseFileToDict(input_dir + "/wav.scp", value_processor = lambda x: " ".join(x)) + if not os.path.isfile(input_dir + "/reco2dur"): + print("Getting the duration of the recordings..."); + read_entire_file="false" + for value in wav_scp.values(): + # we will add more checks for sox commands which modify the header as we come across these cases in our data + if "sox" in value and "speed" in value: + read_entire_file="true" + break + data_lib.RunKaldiCommand("wav-to-duration --read-entire-file={1} scp:{0}/wav.scp ark,t:{0}/reco2dur".format(input_dir, read_entire_file)) + durations = data_lib.ParseFileToDict(input_dir + "/reco2dur", value_processor = lambda x: float(x[0])) + foreground_snr_array = map(lambda x: float(x), foreground_snr_string.split(':')) + background_snr_array = map(lambda x: float(x), background_snr_string.split(':')) + overlap_snr_array = map(lambda x: float(x), overlap_snr_string.split(':')) + + GenerateReverberatedWavScpWithOverlappedSpeech( + wav_scp = wav_scp, + durations = durations, + output_dir = output_dir, + room_dict = room_dict, + pointsource_noise_list = pointsource_noise_list, + iso_noise_dict = iso_noise_dict, + foreground_snr_array = foreground_snr_array, + background_snr_array = background_snr_array, + speech_segments_list = speech_segments_list, + overlap_snr_array = overlap_snr_array, + num_replicas = num_replicas, include_original=include_original, prefix = prefix, + speech_rvb_probability = speech_rvb_probability, + shift_output = shift_output, + isotropic_noise_addition_probability = isotropic_noise_addition_probability, + pointsource_noise_addition_probability = pointsource_noise_addition_probability, + max_noises_per_minute = max_noises_per_minute, + overlapping_speech_addition_probability = overlapping_speech_addition_probability, + min_overlapping_segments_per_minute = min_overlapping_segments_per_minute, + max_overlapping_segments_per_minute = max_overlapping_segments_per_minute, + output_reverb_dir = output_reverb_dir, + output_additive_noise_dir = output_additive_noise_dir) + + data_lib.CopyDataDirFiles(input_dir, output_dir, num_replicas, include_original=include_original, prefix=prefix) + + if output_reverb_dir is not None: + data_lib.CopyDataDirFiles(input_dir, output_reverb_dir, num_replicas, include_original=include_original, prefix=prefix) + + if output_additive_noise_dir is not None: + data_lib.CopyDataDirFiles(input_dir, output_additive_noise_dir, num_replicas, include_original=include_original, prefix=prefix) + + +def Main(): + args = GetArgs() + random.seed(args.random_seed) + rir_list = data_lib.ParseRirList(args.rir_set_para_array, args.rir_smoothing_weight, args.source_sampling_rate) + print("Number of RIRs is {0}".format(len(rir_list))) + pointsource_noise_list = [] + iso_noise_dict = {} + if args.noise_set_para_array is not None: + pointsource_noise_list, iso_noise_dict = data_lib.ParseNoiseList(args.noise_set_para_array, args.noise_smoothing_weight, args.source_sampling_rate) + print("Number of point-source noises is {0}".format(len(pointsource_noise_list))) + print("Number of isotropic noises is {0}".format(sum(len(iso_noise_dict[key]) for key in iso_noise_dict.keys()))) + room_dict = data_lib.MakeRoomDict(rir_list) + + if args.include_original_data == "true": + include_original = True + else: + include_original = False + + speech_segments_list = ParseSpeechSegmentsList(args.speech_segments_set_para_array, args.overlapping_speech_smoothing_weight) + + CreateReverberatedCopy(input_dir = args.input_dir, + output_dir = args.output_dir, + room_dict = room_dict, + pointsource_noise_list = pointsource_noise_list, + iso_noise_dict = iso_noise_dict, + speech_segments_list = speech_segments_list, + foreground_snr_string = args.foreground_snr_string, + background_snr_string = args.background_snr_string, + overlap_snr_string = args.overlap_snr_string, + num_replicas = args.num_replicas, + include_original = include_original, + prefix = args.prefix, + speech_rvb_probability = args.speech_rvb_probability, + shift_output = args.shift_output, + isotropic_noise_addition_probability = args.isotropic_noise_addition_probability, + pointsource_noise_addition_probability = args.pointsource_noise_addition_probability, + max_noises_per_minute = args.max_noises_per_minute, + overlapping_speech_addition_probability = args.overlapping_speech_addition_probability, + min_overlapping_segments_per_minute = args.min_overlapping_segments_per_minute, + max_overlapping_segments_per_minute = args.max_overlapping_segments_per_minute, + output_reverb_dir = args.output_reverb_dir, + output_additive_noise_dir = args.output_additive_noise_dir) + +if __name__ == "__main__": + Main() From 613f0aa8921baf05c8315f829f40e7a96b28b88e Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:19:37 -0500 Subject: [PATCH 367/530] asr_diarization: Add deprecated sad run scripts --- .../local/segmentation/run_train_sad_music.sh | 161 ++++++++++++++++ .../run_train_sad_ovlp_logprob.sh | 148 +++++++++++++++ .../segmentation/run_train_stats_sad_music.sh | 172 ++++++++++++++++++ .../v1/local/run_dnn_music_id.sh | 130 +++++++++++++ 4 files changed, 611 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/run_train_sad_music.sh create mode 100644 egs/aspire/s5/local/segmentation/run_train_sad_ovlp_logprob.sh create mode 100644 egs/aspire/s5/local/segmentation/run_train_stats_sad_music.sh create mode 100755 egs/bn_music_speech/v1/local/run_dnn_music_id.sh diff --git a/egs/aspire/s5/local/segmentation/run_train_sad_music.sh b/egs/aspire/s5/local/segmentation/run_train_sad_music.sh new file mode 100644 index 00000000000..5acb4bf4306 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/run_train_sad_music.sh @@ -0,0 +1,161 @@ +#!/bin/bash + +# this is the standard "tdnn" system, built in nnet3; it's what we use to +# call multi-splice. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= + +splice_indexes="-3,-2,-1,0,1,2,3 -6,0 -9,0,3 0" +relu_dim=256 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=1 +extra_egs_copy_cmd= + +num_utts_subset_valid=40 +num_utts_subset_train=40 +add_idct=true + +# target options +train_data_dir=data/train_azteec_whole_sp_corrupted_hires + +snr_scp= +speech_feat_scp= +music_labels_scp= + +deriv_weights_scp= +deriv_weights_for_irm_scp= + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=a + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +num_hidden_layers=`echo $splice_indexes | perl -ane 'print scalar @F'` || exit 1 +if [ -z "$dir" ]; then + dir=exp/nnet3_sad_snr/nnet_tdnn +fi + +dir=$dir${affix:+_$affix}_n${num_hidden_layers} + +if ! cuda-compiled; then + cat <

" + echo " e.g.: $0 data/bn exp/nnet3_sad_snr/tdnn_b_n4/sad_bn_whole exp/nnet3_sad_snr/tdnn_b_n4/music_bn_whole exp/nnet3_sad_snr/tdnn_b_n4/segmentation_bn_whole exp/nnet3_sad_snr/tdnn_b_n4/segmentation_music_bn_whole exp/dnn_music_id" + exit 1 +fi + +data=$1 +sad_likes_dir=$2 +music_likes_dir=$3 +dir=$4 + +min_silence_duration=`perl -e "print (int($min_silence_duration / $frame_subsampling_factor))"` +min_speech_duration=`perl -e "print (int($min_speech_duration / $frame_subsampling_factor))"` +min_music_duration=`perl -e "print (int($min_music_duration / $frame_subsampling_factor))"` + +lang=$dir/lang + +if [ $stage -le 1 ]; then + mkdir -p $lang + + # Create a lang directory with phones.txt and topo with + # silence, music and speech phones. + steps/segmentation/internal/prepare_sad_lang.py \ + --phone-transition-parameters="--phone-list=1 --min-duration=$min_silence_duration --end-transition-probability=$sil_transition_probability" \ + --phone-transition-parameters="--phone-list=2 --min-duration=$min_speech_duration --end-transition-probability=$speech_transition_probability" \ + --phone-transition-parameters="--phone-list=3 --min-duration=$min_music_duration --end-transition-probability=$music_transition_probability" \ + $lang + + cp $lang/phones.txt $lang/words.txt +fi + +feat_dim=2 # dummy. We don't need this. +if [ $stage -le 2 ]; then + $cmd $dir/log/create_transition_model.log gmm-init-mono \ + $lang/topo $feat_dim - $dir/tree \| \ + copy-transition-model --binary=false - $dir/trans.mdl || exit 1 +fi + +# Make unigram G.fst +if [ $stage -le 3 ]; then + cat > $lang/word2prior < $lang/G.fst +fi + +graph_dir=$dir/graph_test + +if [ $stage -le 4 ]; then + $cmd $dir/log/make_vad_graph.log \ + steps/segmentation/internal/make_sad_graph.sh --iter trans \ + $lang $dir $dir/graph_test || exit 1 +fi + +if [ $stage -le 5 ]; then + utils/split_data.sh $data $nj + sdata=$data/split$nj + + nj_sad=`cat $sad_likes_dir/num_jobs` + sad_likes= + for n in `seq $nj_sad`; do + sad_likes="$sad_likes $sad_likes_dir/log_likes.$n.gz" + done + + nj_music=`cat $music_likes_dir/num_jobs` + music_likes= + for n in `seq $nj_music`; do + music_likes="$music_likes $music_likes_dir/log_likes.$n.gz" + done + + decoder_opts+=(--acoustic-scale=$acwt --beam=$beam --max-active=$max_active) + $cmd JOB=1:$nj $dir/log/decode.JOB.log \ + paste-feats "ark:gunzip -c $sad_likes | extract-feature-segments ark,s,cs:- $sdata/JOB/segments ark:- |" \ + "ark,s,cs:gunzip -c $music_likes | extract-feature-segments ark,s,cs:- $sdata/JOB/segments ark:- | select-feats 1 ark:- ark:- |" \ + ark:- \| decode-faster-mapped ${decoder_opts[@]} \ + $dir/trans.mdl $graph_dir/HCLG.fst ark:- \ + ark:/dev/null ark:- \| \ + ali-to-phones --per-frame $dir/trans.mdl ark:- \ + "ark:|gzip -c > $dir/ali.JOB.gz" +fi + +include_silence=true +if [ $stage -le 6 ]; then + $cmd JOB=1:$nj $dir/log/get_class_id.JOB.log \ + ali-to-post "ark:gunzip -c $dir/ali.JOB.gz |" ark:- \| \ + post-to-feats --post-dim=4 ark:- ark:- \| \ + matrix-sum-rows --do-average ark:- ark,t:- \| \ + sid/vector_to_music_labels.pl ${include_silence:+--include-silence-in-music} '>' $dir/ratio.JOB +fi + +for n in `seq $nj`; do + cat $dir/ratio.$n +done > $dir/ratio + +cat $dir/ratio | local/print_scores.py /dev/stdin | compute-eer - From 840bee25d288d61a3364318daa34ac9ec9e9e816 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:21:45 -0500 Subject: [PATCH 368/530] asr_diarization: Add deprecated do_corruption_whole_data_dir_overlapped_speech.sh --- ...uption_whole_data_dir_overlapped_speech.sh | 284 ++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100755 egs/aspire/s5/local/segmentation/do_corruption_whole_data_dir_overlapped_speech.sh diff --git a/egs/aspire/s5/local/segmentation/do_corruption_whole_data_dir_overlapped_speech.sh b/egs/aspire/s5/local/segmentation/do_corruption_whole_data_dir_overlapped_speech.sh new file mode 100755 index 00000000000..75dbce578b2 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/do_corruption_whole_data_dir_overlapped_speech.sh @@ -0,0 +1,284 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -u +set -o pipefail + +. path.sh + +stage=0 +corruption_stage=-10 +corrupt_only=false + +# Data options +data_dir=data/train_si284 # Excpecting non-whole data directory +speed_perturb=true +num_data_reps=5 # Number of corrupted versions +snrs="20:10:15:5:0:-5" +foreground_snrs="20:10:15:5:0:-5" +background_snrs="20:10:15:5:0:-5" +overlap_snrs="5:2:1:0:-1:-2" +# Whole-data directory corresponding to data_dir +whole_data_dir=data/train_si284_whole +overlap_labels_dir=overlap_labels + +# Parallel options +reco_nj=40 +nj=40 +cmd=queue.pl + +# Options for feature extraction +mfcc_config=conf/mfcc_hires_bp.conf +feat_suffix=hires_bp +energy_config=conf/log_energy.conf + +reco_vad_dir= # Output of prepare_unsad_data.sh. + # If provided, the speech labels and deriv weights will be + # copied into the output data directory. +utt_vad_dir= + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + exit 1 +fi + +rvb_opts=() +# This is the config for the system using simulated RIRs and point-source noises +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") +rvb_opts+=(--speech-segments-set-parameters="$data_dir/wav.scp,$data_dir/segments") + +whole_data_id=`basename ${whole_data_dir}` + +corrupted_data_id=${whole_data_id}_ovlp_corrupted +clean_data_id=${whole_data_id}_ovlp_clean +noise_data_id=${whole_data_id}_ovlp_noise + +if [ $stage -le 1 ]; then + python steps/data/make_corrupted_data_dir.py \ + "${rvb_opts[@]}" \ + --prefix="ovlp" \ + --overlap-snrs=$overlap_snrs \ + --speech-rvb-probability=1 \ + --overlapping-speech-addition-probability=1 \ + --num-replications=$num_data_reps \ + --min-overlapping-segments-per-minute=5 \ + --max-overlapping-segments-per-minute=20 \ + --output-additive-noise-dir=data/${noise_data_id} \ + --output-reverb-dir=data/${clean_data_id} \ + data/${whole_data_id} data/${corrupted_data_id} +fi + +if $dry_run; then + exit 0 +fi + +clean_data_dir=data/${clean_data_id} +corrupted_data_dir=data/${corrupted_data_id} +noise_data_dir=data/${noise_data_id} +orig_corrupted_data_dir=$corrupted_data_dir + +if $speed_perturb; then + if [ $stage -le 2 ]; then + ## Assuming whole data directories + for x in $clean_data_dir $corrupted_data_dir $noise_data_dir; do + cp $x/reco2dur $x/utt2dur + utils/data/perturb_data_dir_speed_3way.sh $x ${x}_sp + done + fi + + corrupted_data_dir=${corrupted_data_dir}_sp + clean_data_dir=${clean_data_dir}_sp + noise_data_dir=${noise_data_dir}_sp + + corrupted_data_id=${corrupted_data_id}_sp + clean_data_id=${clean_data_id}_sp + noise_data_id=${noise_data_id}_sp + + if [ $stage -le 3 ]; then + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 --force true ${corrupted_data_dir} + utils/data/perturb_data_dir_volume.sh --force true --reco2vol ${corrupted_data_dir}/reco2vol ${clean_data_dir} + utils/data/perturb_data_dir_volume.sh --force true --reco2vol ${corrupted_data_dir}/reco2vol ${noise_data_dir} + fi +fi + +if $corrupt_only; then + echo "$0: Got corrupted data directory in ${corrupted_data_dir}" + exit 0 +fi + +mfccdir=`basename $mfcc_config` +mfccdir=${mfccdir%%.conf} + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage +fi + +if [ $stage -le 4 ]; then + utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$train_cmd" --nj $reco_nj \ + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir +fi + +if [ $stage -le 5 ]; then + steps/make_mfcc.sh --mfcc-config $energy_config \ + --cmd "$train_cmd" --nj $reco_nj \ + $clean_data_dir exp/make_log_energy/${clean_data_id} log_energy_feats +fi + +if [ $stage -le 6 ]; then + steps/make_mfcc.sh --mfcc-config $energy_config \ + --cmd "$train_cmd" --nj $reco_nj \ + $noise_data_dir exp/make_log_energy/${noise_data_id} log_energy_feats +fi + +if [ -z "$reco_vad_dir" ]; then + echo "reco-vad-dir must be provided" + exit 1 +fi + +targets_dir=irm_targets +if [ $stage -le 8 ]; then + mkdir -p exp/make_irm_targets/${corrupted_data_id} + + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $targets_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$targets_dir/storage $targets_dir/storage + fi + + steps/segmentation/make_snr_targets.sh \ + --nj $nj --cmd "$train_cmd --max-jobs-run $max_jobs_run" \ + --target-type Irm --compress true --apply-exp false \ + ${clean_data_dir} ${noise_data_dir} ${corrupted_data_dir} \ + exp/make_irm_targets/${corrupted_data_id} $targets_dir +fi + +# Combine the VAD from the base recording and the VAD from the overlapping segments +# to create per-frame labels of the number of overlapping speech segments +# Unreliable segments are regions where no VAD labels were available for the +# overlapping segments. These can be later removed by setting deriv weights to 0. + +# Data dirs without speed perturbation +overlap_dir=exp/make_overlap_labels/${corrupted_data_id} +unreliable_dir=exp/make_overlap_labels/unreliable_${corrupted_data_id} +overlap_data_dir=$overlap_dir/overlap_data +unreliable_data_dir=$overlap_dir/unreliable_data + +mkdir -p $unreliable_dir + +if [ $stage -le 8 ]; then + cat $reco_vad_dir/sad_seg.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "ovlp" \ + | sort -k1,1 > ${corrupted_data_dir}/sad_seg.scp + utils/data/get_utt2num_frames.sh $corrupted_data_dir + utils/split_data.sh --per-reco ${orig_corrupted_data_dir} $reco_nj + + $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_overlap_seg.JOB.log \ + segmentation-init-from-overlap-info --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + "scp:utils/filter_scp.pl ${orig_corrupted_data_dir}/split${reco_nj}reco/JOB/utt2spk $corrupted_data_dir/sad_seg.scp |" \ + ark,t:$orig_corrupted_data_dir/overlapped_segments_info.txt \ + scp:$utt_vad_dir/sad_seg.scp ark:- ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \| \ + segmentation-copy --keep-label=1 ark:- ark:- \| \ + segmentation-get-stats --lengths-rspecifier=ark,t:$corrupted_data_dir/utt2num_frames \ + ark:- ark:- ark:/dev/null \| \ + segmentation-init-from-ali ark:- ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark +fi + +if [ $stage -le 9 ]; then + mkdir -p $overlap_data_dir $unreliable_data_dir + cp $orig_corrupted_data_dir/wav.scp $overlap_data_dir + cp $orig_corrupted_data_dir/wav.scp $unreliable_data_dir + + # Create segments where there is definitely an overlap. + # Assume no more than 10 speakers overlap. + $train_cmd JOB=1:$reco_nj $overlap_dir/log/process_to_segments.JOB.log \ + segmentation-post-process --remove-labels=0:1 \ + ark:$overlap_dir/overlap_seg_speed_unperturbed.JOB.ark ark:- \| \ + segmentation-post-process --merge-labels=2:3:4:5:6:7:8:9:10 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-to-segments ark:- ark:$overlap_data_dir/utt2spk.JOB $overlap_data_dir/segments.JOB + + $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_unreliable_segments.JOB.log \ + segmentation-to-segments --single-speaker \ + ark:$unreliable_dir/unreliable_seg_speed_unperturbed.JOB.ark \ + ark:$unreliable_data_dir/utt2spk.JOB $unreliable_data_dir/segments.JOB + + for n in `seq $reco_nj`; do cat $overlap_data_dir/utt2spk.$n; done > $overlap_data_dir/utt2spk + for n in `seq $reco_nj`; do cat $overlap_data_dir/segments.$n; done > $overlap_data_dir/segments + for n in `seq $reco_nj`; do cat $unreliable_data_dir/utt2spk.$n; done > $unreliable_data_dir/utt2spk + for n in `seq $reco_nj`; do cat $unreliable_data_dir/segments.$n; done > $unreliable_data_dir/segments + + utils/fix_data_dir.sh $overlap_data_dir + utils/fix_data_dir.sh $unreliable_data_dir + + if $speed_perturb; then + utils/data/perturb_data_dir_speed_3way.sh $overlap_data_dir ${overlap_data_dir}_sp + utils/data/perturb_data_dir_speed_3way.sh $unreliable_data_dir ${unreliable_data_dir}_sp + fi +fi + +if $speed_perturb; then + overlap_data_dir=${overlap_data_dir}_sp + unreliable_data_dir=${unreliable_data_dir}_sp +fi + +# make $overlap_labels_dir an absolute pathname. +overlap_labels_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $overlap_labels_dir ${PWD}` + +if [ $stage -le 10 ]; then + utils/split_data.sh --per-reco ${overlap_data_dir} $reco_nj + + $train_cmd JOB=1:$reco_nj $overlap_dir/log/get_overlap_speech_labels.JOB.log \ + utils/data/get_reco2utt.sh ${overlap_data_dir}/split${reco_nj}reco/JOB '&&' \ + segmentation-init-from-segments --shift-to-zero=false \ + ${overlap_data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- ark,t:${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt \ + ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- \ + ark,scp:$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.ark,$overlap_labels_dir/overlapped_speech_${corrupted_data_id}.JOB.scp +fi + +for n in `seq $reco_nj`; do + cat $overlap_labels_dir/overlapped_speech_${corrupted_data_id}.$n.scp +done > ${corrupted_data_dir}/overlapped_speech_labels.scp + +if [ $stage -le 11 ]; then + utils/data/get_reco2utt.sh ${unreliable_data_dir} + + # First convert the unreliable segments into a recording-level segmentation. + # Initialize a segmentation from utt2num_frames and set to 0, the regions + # of unreliable segments. At this stage deriv weights is 1 for all but the + # unreliable segment regions. + # Initialize a segmentation from the VAD labels and retain only the speech segments. + # Intersect this with the deriv weights segmentation from above. At this stage + # deriv weights is 1 for only the regions where base VAD label is 1 and + # the overlapping segment is not unreliable. Convert this to deriv weights. + $train_cmd JOB=1:$reco_nj $unreliable_dir/log/get_deriv_weights.JOB.log\ + segmentation-init-from-segments --shift-to-zero=false \ + "utils/filter_scp.pl -f 2 ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/segments |" ark:- \| \ + segmentation-combine-segments-to-recordings ark:- "ark,t:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt ${unreliable_data_dir}/reco2utt |" \ + ark:- \| \ + segmentation-create-subsegments --filter-label=1 --subsegment-label=0 --ignore-missing \ + "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/utt2num_frames | segmentation-init-from-lengths ark,t:- ark:- |" \ + ark:- ark:- \| \ + segmentation-intersect-segments --mismatch-label=0 \ + "ark:utils/filter_scp.pl ${overlap_data_dir}/split${reco_nj}reco/JOB/reco2utt $corrupted_data_dir/sad_seg.scp | segmentation-post-process --remove-labels=0:2:3 scp:- ark:- |" \ + ark:- ark:- \| \ + segmentation-post-process --remove-labels=0 ark:- ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:${corrupted_data_dir}/utt2num_frames ark:- ark,t:- \| \ + steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ + ark,scp:$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.ark,$overlap_labels_dir/deriv_weights_for_overlapped_speech.JOB.scp + + for n in `seq $reco_nj`; do + cat $overlap_labels_dir/deriv_weights_for_overlapped_speech.${n}.scp + done > $corrupted_data_dir/deriv_weights_for_overlapped_speech.scp +fi + +exit 0 From 2725cd1e75b743513e8ba93dbde1ee8750dc0d5c Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:23:53 -0500 Subject: [PATCH 369/530] asr_diarization: steps/data/wav_scp2noise_list.py --- egs/wsj/s5/steps/data/wav_scp2noise_list.py | 39 +++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 egs/wsj/s5/steps/data/wav_scp2noise_list.py diff --git a/egs/wsj/s5/steps/data/wav_scp2noise_list.py b/egs/wsj/s5/steps/data/wav_scp2noise_list.py new file mode 100755 index 00000000000..960bce33c7d --- /dev/null +++ b/egs/wsj/s5/steps/data/wav_scp2noise_list.py @@ -0,0 +1,39 @@ +#! /usr/bin/env python + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +from __future__ import print_function +import argparse, random + +def GetArgs(): + parser = argparse.ArgumentParser(description="""This script converts a wav.scp +into noise-set-paramters that can be passed to steps/data/reverberate_data_dir.py.""") + + parser.add_argument("wav_scp", type=str, + help = "The input wav.scp") + parser.add_argument("noise_list", type=str, + help = "File to write the output noise-set-parameters") + + args = parser.parse_args() + + return args + +def Main(): + args = GetArgs() + + noise_list = open(args.noise_list, 'w') + + for line in open(args.wav_scp): + parts = line.strip().split() + + print ('''--noise-id {reco} --noise-type point-source \ +--bg-fg-type foreground "{wav}"'''.format( + reco = parts[0], + wav = " ".join(parts[1:])), file = noise_list) + + noise_list.close() + +if __name__ == '__main__': + Main() + From 4a35cec70673db92e5d5e559fe237ee0e050abbc Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:24:16 -0500 Subject: [PATCH 370/530] asr_diarization: Cluster segments AIB --- .../segmentation/cluster_segments_aIB.sh | 138 ++++++++++++++++++ .../cluster_segments_aIB_change_point.sh | 138 ++++++++++++++++++ 2 files changed, 276 insertions(+) create mode 100755 egs/wsj/s5/steps/segmentation/cluster_segments_aIB.sh create mode 100755 egs/wsj/s5/steps/segmentation/cluster_segments_aIB_change_point.sh diff --git a/egs/wsj/s5/steps/segmentation/cluster_segments_aIB.sh b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB.sh new file mode 100755 index 00000000000..a1f187fab31 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB.sh @@ -0,0 +1,138 @@ +#! /bin/bash + +window=2.5 +overlap=0.0 +stage=-1 +cmd=queue.pl +reco_nj=4 +frame_shift=0.01 +utt_nj=18 +min_clusters=10 +stopping_threshold=0.5 + +. path.sh +. utils/parse_options.sh + +set -o pipefail +set -e +set -u + +if [ $# -ne 3 ]; then + echo "Usage: $0 " + exit 1 +fi + +data=$1 +dir=$2 +out_data=$3 + +num_frames=`perl -e "print int($window / $frame_shift + 0.5)"` +num_frames_overlap=`perl -e "print int($overlap/ $frame_shift + 0.5)"` + +data_uniform_seg=${data}_uniform_seg_window${window}_ovlp${overlap} + +mkdir -p ${data_uniform_seg} + +mkdir -p $dir + +#segmentation-cluster-adjacent-segments --verbose=0 'ark:segmentation-copy --keep-label=1 "ark:gunzip -c exp/nnet3_lstm_sad_music/nnet_lstm_1e//segmentation_bn_eval97_whole_bp/orig_segmentation.1.gz |" ark:- | segmentation-split-segments --max-segment-length=250 --overlap-length=0 ark:- ark:- |' scp:data/bn_eval97_bp_hires/feats.scp "ark:| segmentation-post-process --merge-adjacent-segments ark:- ark:- | segmentation-to-segments ark:- ark,t:- /dev/null" 2>&1 | less + +if [ $stage -le 0 ]; then + $cmd $dir/log/get_subsegments.log \ + segmentation-init-from-segments --frame-overlap=0.015 $data/segments ark:- \| \ + segmentation-split-segments --max-segment-length=$num_frames --overlap-length=$num_frames_overlap ark:- ark:- \| \ + segmentation-cluster-adjacent-segments --verbose=3 ark:- "scp:$data/feats.scp" ark:- \| \ + segmentation-post-process --merge-adjacent-segments ark:- ark:- \| \ + segmentation-to-segments --frame-overlap=0.0 ark:- ark:/dev/null \ + ${data_uniform_seg}/sub_segments + + utils/data/subsegment_data_dir.sh ${data} ${data_uniform_seg}{/sub_segments,} +fi + +gmm_dir=$dir/gmms +mkdir -p $gmm_dir + +utils/split_data.sh --per-reco ${data_uniform_seg} $reco_nj + +if [ $stage -le 1 ]; then + echo $reco_nj > $gmm_dir/num_jobs + $cmd JOB=1:$reco_nj $gmm_dir/log/train_gmm.JOB.log \ + gmm-global-init-models-from-feats --share-covars=true \ + --spk2utt-rspecifier=ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt \ + --num-gauss-init=64 --num-gauss=64 --num-gauss-fraction=0.001 --max-gauss=512 --min-gauss=64 \ + --num-iters=20 --num-frames=500000 \ + scp:${data_uniform_seg}/split${reco_nj}reco/JOB/feats.scp \ + ark,scp:$gmm_dir/gmm.JOB.ark,$gmm_dir/gmm.JOB.scp + + for n in `seq $reco_nj`; do + cat $gmm_dir/gmm.$n.scp + done > $gmm_dir/gmm.scp + +fi + +post_dir=$gmm_dir/post_`basename $data_uniform_seg` +mkdir -p $post_dir + +if [ $stage -le 2 ]; then + echo $reco_nj > $post_dir/num_jobs + + $cmd JOB=1:$reco_nj $gmm_dir/log/compute_post.JOB.log \ + gmm-global-get-post \ + --utt2spk="ark,t:cut -d ' ' -f 1,2 ${data_uniform_seg}/split${reco_nj}reco/JOB/segments |" \ + scp:$gmm_dir/gmm.scp \ + scp:${data_uniform_seg}/split${reco_nj}reco/JOB/feats.scp \ + "ark:| gzip -c > $post_dir/post.JOB.gz" \ + "ark:| gzip -c > $post_dir/frame_loglikes.JOB.gz" +fi + +if [ $stage -le 3 ]; then + utils/data/get_utt2num_frames.sh --nj $utt_nj --cmd "$cmd" ${data_uniform_seg} + + $cmd JOB=1:$reco_nj $post_dir/log/compute_average_post.JOB.log \ + gmm-global-post-to-feats \ + --utt2spk="ark,t:cut -d ' ' -f 1,2 ${data_uniform_seg}/split${reco_nj}reco/JOB/segments |" \ + scp:$gmm_dir/gmm.scp "ark:gunzip -c $post_dir/post.JOB.gz |" ark:- \| \ + matrix-sum-rows --do-average ark:- "ark:| gzip -c > $post_dir/avg_post.JOB.gz" +fi + +seg_dir=$dir/segmentation_`basename $data_uniform_seg` + +if [ $stage -le 4 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/cluster_segments.JOB.log \ + agglomerative-cluster-ib --min-clusters=$min_clusters \ + --verbose=3 --stopping-threshold=$stopping_threshold --input-factor=0 \ + --counts-rspecifier="ark,t:utils/filter_scp.pl $data_uniform_seg/split${reco_nj}reco/JOB/utt2spk $data_uniform_seg/utt2num_frames |" \ + "ark:gunzip -c $post_dir/avg_post.JOB.gz |" \ + "ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt" \ + ark,t:$seg_dir/utt2cluster_id.JOB +fi + +if [ $stage -le 5 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/init_segmentation.JOB.log \ + segmentation-init-from-segments --frame-overlap=0.0 --shift-to-zero=false \ + --utt2label-rspecifier=ark,t:${seg_dir}/utt2cluster_id.JOB \ + ${data_uniform_seg}/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- \ + ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt \ + ark:- \| \ + segmentation-post-process --merge-adjacent-segments ark:- ark:- \| \ + segmentation-post-process --max-segment-length=1000 --overlap-length=250 ark:- ark:- \| \ + segmentation-to-segments ark:- ark,t:$seg_dir/utt2spk.JOB $seg_dir/segments.JOB +fi + +if [ $stage -le 6 ]; then + rm -r $out_data || true + utils/data/convert_data_dir_to_whole.sh $data $out_data + rm $out_data/{text,cmvn.scp} || true + + for n in `seq $reco_nj`; do + cat $seg_dir/utt2spk.$n + done > $out_data/utt2spk + + for n in `seq $reco_nj`; do + cat $seg_dir/segments.$n + done > $out_data/segments + + utils/utt2spk_to_spk2utt.pl $out_data/utt2spk > $out_data/spk2utt + utils/fix_data_dir.sh $out_data +fi diff --git a/egs/wsj/s5/steps/segmentation/cluster_segments_aIB_change_point.sh b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB_change_point.sh new file mode 100755 index 00000000000..a1f187fab31 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB_change_point.sh @@ -0,0 +1,138 @@ +#! /bin/bash + +window=2.5 +overlap=0.0 +stage=-1 +cmd=queue.pl +reco_nj=4 +frame_shift=0.01 +utt_nj=18 +min_clusters=10 +stopping_threshold=0.5 + +. path.sh +. utils/parse_options.sh + +set -o pipefail +set -e +set -u + +if [ $# -ne 3 ]; then + echo "Usage: $0 " + exit 1 +fi + +data=$1 +dir=$2 +out_data=$3 + +num_frames=`perl -e "print int($window / $frame_shift + 0.5)"` +num_frames_overlap=`perl -e "print int($overlap/ $frame_shift + 0.5)"` + +data_uniform_seg=${data}_uniform_seg_window${window}_ovlp${overlap} + +mkdir -p ${data_uniform_seg} + +mkdir -p $dir + +#segmentation-cluster-adjacent-segments --verbose=0 'ark:segmentation-copy --keep-label=1 "ark:gunzip -c exp/nnet3_lstm_sad_music/nnet_lstm_1e//segmentation_bn_eval97_whole_bp/orig_segmentation.1.gz |" ark:- | segmentation-split-segments --max-segment-length=250 --overlap-length=0 ark:- ark:- |' scp:data/bn_eval97_bp_hires/feats.scp "ark:| segmentation-post-process --merge-adjacent-segments ark:- ark:- | segmentation-to-segments ark:- ark,t:- /dev/null" 2>&1 | less + +if [ $stage -le 0 ]; then + $cmd $dir/log/get_subsegments.log \ + segmentation-init-from-segments --frame-overlap=0.015 $data/segments ark:- \| \ + segmentation-split-segments --max-segment-length=$num_frames --overlap-length=$num_frames_overlap ark:- ark:- \| \ + segmentation-cluster-adjacent-segments --verbose=3 ark:- "scp:$data/feats.scp" ark:- \| \ + segmentation-post-process --merge-adjacent-segments ark:- ark:- \| \ + segmentation-to-segments --frame-overlap=0.0 ark:- ark:/dev/null \ + ${data_uniform_seg}/sub_segments + + utils/data/subsegment_data_dir.sh ${data} ${data_uniform_seg}{/sub_segments,} +fi + +gmm_dir=$dir/gmms +mkdir -p $gmm_dir + +utils/split_data.sh --per-reco ${data_uniform_seg} $reco_nj + +if [ $stage -le 1 ]; then + echo $reco_nj > $gmm_dir/num_jobs + $cmd JOB=1:$reco_nj $gmm_dir/log/train_gmm.JOB.log \ + gmm-global-init-models-from-feats --share-covars=true \ + --spk2utt-rspecifier=ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt \ + --num-gauss-init=64 --num-gauss=64 --num-gauss-fraction=0.001 --max-gauss=512 --min-gauss=64 \ + --num-iters=20 --num-frames=500000 \ + scp:${data_uniform_seg}/split${reco_nj}reco/JOB/feats.scp \ + ark,scp:$gmm_dir/gmm.JOB.ark,$gmm_dir/gmm.JOB.scp + + for n in `seq $reco_nj`; do + cat $gmm_dir/gmm.$n.scp + done > $gmm_dir/gmm.scp + +fi + +post_dir=$gmm_dir/post_`basename $data_uniform_seg` +mkdir -p $post_dir + +if [ $stage -le 2 ]; then + echo $reco_nj > $post_dir/num_jobs + + $cmd JOB=1:$reco_nj $gmm_dir/log/compute_post.JOB.log \ + gmm-global-get-post \ + --utt2spk="ark,t:cut -d ' ' -f 1,2 ${data_uniform_seg}/split${reco_nj}reco/JOB/segments |" \ + scp:$gmm_dir/gmm.scp \ + scp:${data_uniform_seg}/split${reco_nj}reco/JOB/feats.scp \ + "ark:| gzip -c > $post_dir/post.JOB.gz" \ + "ark:| gzip -c > $post_dir/frame_loglikes.JOB.gz" +fi + +if [ $stage -le 3 ]; then + utils/data/get_utt2num_frames.sh --nj $utt_nj --cmd "$cmd" ${data_uniform_seg} + + $cmd JOB=1:$reco_nj $post_dir/log/compute_average_post.JOB.log \ + gmm-global-post-to-feats \ + --utt2spk="ark,t:cut -d ' ' -f 1,2 ${data_uniform_seg}/split${reco_nj}reco/JOB/segments |" \ + scp:$gmm_dir/gmm.scp "ark:gunzip -c $post_dir/post.JOB.gz |" ark:- \| \ + matrix-sum-rows --do-average ark:- "ark:| gzip -c > $post_dir/avg_post.JOB.gz" +fi + +seg_dir=$dir/segmentation_`basename $data_uniform_seg` + +if [ $stage -le 4 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/cluster_segments.JOB.log \ + agglomerative-cluster-ib --min-clusters=$min_clusters \ + --verbose=3 --stopping-threshold=$stopping_threshold --input-factor=0 \ + --counts-rspecifier="ark,t:utils/filter_scp.pl $data_uniform_seg/split${reco_nj}reco/JOB/utt2spk $data_uniform_seg/utt2num_frames |" \ + "ark:gunzip -c $post_dir/avg_post.JOB.gz |" \ + "ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt" \ + ark,t:$seg_dir/utt2cluster_id.JOB +fi + +if [ $stage -le 5 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/init_segmentation.JOB.log \ + segmentation-init-from-segments --frame-overlap=0.0 --shift-to-zero=false \ + --utt2label-rspecifier=ark,t:${seg_dir}/utt2cluster_id.JOB \ + ${data_uniform_seg}/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-combine-segments-to-recordings ark:- \ + ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt \ + ark:- \| \ + segmentation-post-process --merge-adjacent-segments ark:- ark:- \| \ + segmentation-post-process --max-segment-length=1000 --overlap-length=250 ark:- ark:- \| \ + segmentation-to-segments ark:- ark,t:$seg_dir/utt2spk.JOB $seg_dir/segments.JOB +fi + +if [ $stage -le 6 ]; then + rm -r $out_data || true + utils/data/convert_data_dir_to_whole.sh $data $out_data + rm $out_data/{text,cmvn.scp} || true + + for n in `seq $reco_nj`; do + cat $seg_dir/utt2spk.$n + done > $out_data/utt2spk + + for n in `seq $reco_nj`; do + cat $seg_dir/segments.$n + done > $out_data/segments + + utils/utt2spk_to_spk2utt.pl $out_data/utt2spk > $out_data/spk2utt + utils/fix_data_dir.sh $out_data +fi From 268e0175fd376ec9a90b7193d1d33873c7b7d478 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:24:43 -0500 Subject: [PATCH 371/530] asr_diarization: Train simple HMM --- .../s5/steps/segmentation/train_simple_hmm.py | 194 ++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100755 egs/wsj/s5/steps/segmentation/train_simple_hmm.py diff --git a/egs/wsj/s5/steps/segmentation/train_simple_hmm.py b/egs/wsj/s5/steps/segmentation/train_simple_hmm.py new file mode 100755 index 00000000000..9f581b0a520 --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/train_simple_hmm.py @@ -0,0 +1,194 @@ +#! /usr/bin/env python + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +import argparse +import logging +import os +import sys + +sys.path.insert(0, 'steps') +import libs.common as common_lib + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s [%(pathname)s:%(lineno)s - " + "%(funcName)s - %(levelname)s ] %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) + + +def get_args(): + """Parse command-line arguments""" + + parser = argparse.ArgumentParser( + """Train a simple HMM model starting from HMM topology.""") + + # Alignment options + parser.add_argument("--align.transition-scale", dest='transition_scale', + type=float, default=10.0, + help="""Transition-probability scale [relative to + acoustics]""") + parser.add_argument("--align.self-loop-scale", dest='self_loop_scale', + type=float, default=1.0, + help="""Scale on self-loop versus non-self-loop log + probs [relative to acoustics]""") + parser.add_argument("--align.beam", dest='beam', + type=float, default=6, + help="""Decoding beam used in alignment""") + + # Training options + parser.add_argument("--training.num-iters", dest='num_iters', + type=int, default=30, + help="""Number of iterations of training""") + parser.add_argument("--training.use-soft-counts", dest='use_soft_counts', + type=str, action=common_lib.StrToBoolAction, + choices=["true", "false"], default=False, + help="""Use soft counts (posteriors) instead of + alignments""") + + # General options + parser.add_argument("--scp2ark-cmd", type=str, + default="copy-int-vector scp:- ark:- |", + help="The command used to convert scp from stdin to " + "write archive to stdout") + parser.add_argument("--cmd", dest='command', type=str, + default="run.pl", + help="Command used to run jobs") + parser.add_argument("--stage", type=int, default=-10, + help="""Stage to run training from""") + + parser.add_argument("--data", type=str, required=True, + help="Data directory; primarily used for splitting") + + labels_group = parser.add_mutually_exclusive_group(required=True) + labels_group.add_argument("--labels-scp", type=str, + help="Input labels that must be convert to alignment " + "of class-ids using --scp2ark-cmd") + labels_group.add_argument("--labels-rspecifier", type=str, + help="Input labels rspecifier") + + parser.add_argument("--lang", type=str, required=True, + help="The language directory containing the " + "HMM Topology file topo") + parser.add_argument("--loglikes-dir", type=str, required=True, + help="Directory containing the log-likelihoods") + parser.add_argument("--dir", type=str, required=True, + help="Directory where the intermediate and final " + "models will be written") + + args = parser.parse_args() + + if args.use_soft_counts: + raise NotImplementedError("--use-soft-counts not supported yet!") + + return args + + +def check_files(args): + """Check files required for this script""" + + files = ("{lang}/topo {data}/utt2spk " + "{loglikes_dir}/log_likes.1.gz {loglikes_dir}/num_jobs " + "".format(lang=args.lang, data=args.data, + loglikes_dir=args.loglikes_dir).split()) + + if args.labels_scp is not None: + files.append(args.labels_scp) + + for f in files: + if not os.path.exists(f): + logger.error("Could not find file %s", f) + raise RuntimeError + + +def run(args): + """The function that does it all""" + + check_files(args) + + if args.stage <= -2: + logger.info("Initializing simple HMM model") + common_lib.run_kaldi_command( + """{cmd} {dir}/log/init.log simple-hmm-init {lang}/topo """ + """ {dir}/0.mdl""".format(cmd=args.command, dir=args.dir, + lang=args.lang)) + + num_jobs = common_lib.get_number_of_jobs(args.loglikes_dir) + split_data = common_lib.split_data(args.data, num_jobs) + + if args.labels_rspecifier is not None: + labels_rspecifier = args.labels_rspecifier + else: + labels_rspecifier = ("ark:utils/filter_scp.pl {sdata}/JOB/utt2spk " + "{labels_scp} | {scp2ark_cmd}".format( + sdata=split_data, labels_scp=args.labels_scp, + scp2ark_cmd=args.scp2ark_cmd)) + + if args.stage <= -1: + logger.info("Compiling training graphs") + common_lib.run_kaldi_command( + """{cmd} JOB=1:{nj} {dir}/log/compile_graphs.JOB.log """ + """ compile-train-simple-hmm-graphs {dir}/0.mdl """ + """ "{labels_rspecifier}" """ + """ "ark:| gzip -c > {dir}/fsts.JOB.gz" """.format( + cmd=args.command, nj=num_jobs, + dir=args.dir, lang=args.lang, + labels_rspecifier=labels_rspecifier)) + + scale_opts = ("--transition-scale={tscale} --self-loop-scale={loop_scale}" + "".format(tscale=args.transition_scale, + loop_scale=args.self_loop_scale)) + + for iter_ in range(0, args.num_iters): + if args.stage > iter_: + continue + + logger.info("Training iteration %d", iter_) + + common_lib.run_kaldi_command( + """{cmd} JOB=1:{nj} {dir}/log/align.{iter}.JOB.log """ + """ simple-hmm-align-compiled {scale_opts} """ + """ --beam={beam} --retry-beam={retry_beam} {dir}/{iter}.mdl """ + """ "ark:gunzip -c {dir}/fsts.JOB.gz |" """ + """ "ark:gunzip -c {loglikes_dir}/log_likes.JOB.gz |" """ + """ ark:- \| """ + """ simple-hmm-acc-stats-ali {dir}/{iter}.mdl ark:- """ + """ {dir}/{iter}.JOB.acc""".format( + cmd=args.command, nj=num_jobs, dir=args.dir, iter=iter_, + scale_opts=scale_opts, beam=args.beam, + retry_beam=args.beam * 4, loglikes_dir=args.loglikes_dir)) + + common_lib.run_kaldi_command( + """{cmd} {dir}/log/update.{iter}.log """ + """ simple-hmm-est {dir}/{iter}.mdl """ + """ "vector-sum {dir}/{iter}.*.acc - |" """ + """ {dir}/{new_iter}.mdl""".format( + cmd=args.command, dir=args.dir, iter=iter_, + new_iter=iter_ + 1)) + + common_lib.run_kaldi_command( + "rm {dir}/{iter}.*.acc".format(dir=args.dir, iter=iter_)) + # end train loop + + common_lib.force_symlink("{0}.mdl".format(args.num_iters), + "{0}/final.mdl".format(args.dir)) + + logger.info("Done training simple HMM in %s/final.mdl", args.dir) + + +def main(): + try: + args = get_args() + run(args) + except Exception: + logger.error("Failed training models") + raise + + +if __name__ == '__main__': + main() From 7f10cd555746dc3034f990a92a6d96c5178f6cce Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:25:39 -0500 Subject: [PATCH 372/530] asr_diarization: Add deprecated data_lib.py --- egs/wsj/s5/utils/data/data_lib.py | 57 +++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 egs/wsj/s5/utils/data/data_lib.py diff --git a/egs/wsj/s5/utils/data/data_lib.py b/egs/wsj/s5/utils/data/data_lib.py new file mode 100644 index 00000000000..5e58fcac3d5 --- /dev/null +++ b/egs/wsj/s5/utils/data/data_lib.py @@ -0,0 +1,57 @@ +import os + +import libs.common as common_lib + +def get_frame_shift(data_dir): + frame_shift = common_lib.run_kaldi_command("utils/data/get_frame_shift.sh {0}".format(data_dir))[0] + return float(frame_shift.strip()) + +def generate_utt2dur(data_dir): + common_lib.run_kaldi_command("utils/data/get_utt2dur.sh {0}".format(data_dir)) + +def get_utt2dur(data_dir): + GenerateUtt2Dur(data_dir) + utt2dur = {} + for line in open('{0}/utt2dur'.format(data_dir), 'r').readlines(): + parts = line.split() + utt2dur[parts[0]] = float(parts[1]) + return utt2dur + +def get_utt2uniq(data_dir): + utt2uniq_file = '{0}/utt2uniq'.format(data_dir) + if not os.path.exists(utt2uniq_file): + return None, None + utt2uniq = {} + uniq2utt = {} + for line in open(utt2uniq_file, 'r').readlines(): + parts = line.split() + utt2uniq[parts[0]] = parts[1] + if uniq2utt.has_key(parts[1]): + uniq2utt[parts[1]].append(parts[0]) + else: + uniq2utt[parts[1]] = [parts[0]] + return utt2uniq, uniq2utt + +def get_num_frames(data_dir, utts = None): + GenerateUtt2Dur(data_dir) + frame_shift = GetFrameShift(data_dir) + total_duration = 0 + utt2dur = GetUtt2Dur(data_dir) + if utts is None: + utts = utt2dur.keys() + for utt in utts: + total_duration = total_duration + utt2dur[utt] + return int(float(total_duration)/frame_shift) + +def create_data_links(file_names): + # if file_names already exist create_data_link.pl returns with code 1 + # so we just delete them before calling create_data_link.pl + for file_name in file_names: + TryToDelete(file_name) + common_lib.run_kaldi_command(" utils/create_data_link.pl {0}".format(" ".join(file_names))) + +def try_to_delete(file_name): + try: + os.remove(file_name) + except OSError: + pass From 311d31f75768ecb339502203bc777ac4b2fad9ab Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:26:12 -0500 Subject: [PATCH 373/530] asr_diarization: Add nnet3-copy-egs-overlapped --- src/nnet3bin/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nnet3bin/Makefile b/src/nnet3bin/Makefile index 2a660da232c..39aa6d477a2 100644 --- a/src/nnet3bin/Makefile +++ b/src/nnet3bin/Makefile @@ -18,7 +18,7 @@ BINFILES = nnet3-init nnet3-info nnet3-get-egs nnet3-copy-egs nnet3-subset-egs \ nnet3-discriminative-compute-objf nnet3-discriminative-train \ discriminative-get-supervision nnet3-discriminative-subset-egs \ nnet3-discriminative-compute-from-egs nnet3-get-egs-multiple-targets \ - nnet3-copy-egs-overlap-detection + nnet3-am-compute nnet3-copy-egs-overlap-detection OBJFILES = From d54b41220f9ad991445ba002c2b7539015570100 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:26:36 -0500 Subject: [PATCH 374/530] segmenterbin/Makefile --- src/segmenterbin/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/segmenterbin/Makefile b/src/segmenterbin/Makefile index 6e2fd226019..a424f192d3e 100644 --- a/src/segmenterbin/Makefile +++ b/src/segmenterbin/Makefile @@ -19,7 +19,9 @@ BINFILES = segmentation-copy segmentation-get-stats \ segmentation-init-from-additive-signals-info \ class-counts-per-frame-to-labels \ agglomerative-cluster-ib \ - intersect-int-vectors #\ + intersect-int-vectors \ + gmm-global-init-models-from-feats \ + segmentation-cluster-adjacent-segments #\ gmm-acc-pdf-stats-segmentation \ gmm-est-segmentation gmm-update-segmentation \ segmentation-init-from-diarization \ From e27267f8d7ae5cf3302e63b9b84110af06fb2fb2 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:27:57 -0500 Subject: [PATCH 375/530] asr_diarization: Overlapping speech detection tuning scripts --- .../local/segmentation/tuning/train_lstm_overlapping_sad_1b.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_overlapping_sad_1b.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_overlapping_sad_1b.sh index a634060b317..361f7c27bc0 100755 --- a/egs/aspire/s5/local/segmentation/tuning/train_lstm_overlapping_sad_1b.sh +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_overlapping_sad_1b.sh @@ -56,7 +56,7 @@ num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 400 num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` if [ -z "$dir" ]; then - dir=exp/nnet3_stats_sad_ovlp_snr/nnet_lstm + dir=exp/nnet3_lstm_sad_ovlp_snr/nnet_lstm fi dir=$dir${affix:+_$affix} From 27ab5b2752d22a735add9ada1ea265433028d69c Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 25 Jan 2017 15:28:50 -0500 Subject: [PATCH 376/530] asr_diarization: add nnet3-am-compute --- src/nnet3bin/nnet3-am-compute.cc | 186 +++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 src/nnet3bin/nnet3-am-compute.cc diff --git a/src/nnet3bin/nnet3-am-compute.cc b/src/nnet3bin/nnet3-am-compute.cc new file mode 100644 index 00000000000..c91417c0aee --- /dev/null +++ b/src/nnet3bin/nnet3-am-compute.cc @@ -0,0 +1,186 @@ +// nnet3bin/nnet3-am-compute.cc + +// Copyright 2012-2015 Johns Hopkins University (author: Daniel Povey) +// 2015 Vimal Manohar + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "nnet3/nnet-am-decodable-simple.h" +#include "base/timer.h" +#include "nnet3/nnet-utils.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace kaldi::nnet3; + typedef kaldi::int32 int32; + typedef kaldi::int64 int64; + + const char *usage = + "Propagate the features through neural network model " + "and write the pseudo log-likelihoods (after dividing by priors).\n" + "If --apply-exp=true, apply the Exp() function to the output " + "before writing it out.\n" + "\n" + "Usage: nnet3-am-compute [options] " + "\n" + " e.g.: nnet3-am-compute final.mdl scp:feats.scp ark:log_likes.ark\n" + "See also: nnet3-compute-from-egs, nnet3-compute\n"; + + ParseOptions po(usage); + Timer timer; + + NnetSimpleComputationOptions opts; + opts.acoustic_scale = 1.0; // by default do no scaling in this recipe. + + bool apply_exp = false; + std::string use_gpu = "yes"; + + std::string word_syms_filename; + std::string ivector_rspecifier, + online_ivector_rspecifier, + utt2spk_rspecifier; + int32 online_ivector_period = 0; + + opts.Register(&po); + + po.Register("ivectors", &ivector_rspecifier, "Rspecifier for " + "iVectors as vectors (i.e. not estimated online); per utterance " + "by default, or per speaker if you provide the --utt2spk option."); + po.Register("utt2spk", &utt2spk_rspecifier, "Rspecifier for " + "utt2spk option used to get ivectors per speaker"); + po.Register("online-ivectors", &online_ivector_rspecifier, "Rspecifier for " + "iVectors estimated online, as matrices. If you supply this," + " you must set the --online-ivector-period option."); + po.Register("online-ivector-period", &online_ivector_period, "Number of frames " + "between iVectors in matrices supplied to the --online-ivectors " + "option"); + po.Register("apply-exp", &apply_exp, "If true, apply exp function to " + "output"); + po.Register("use-gpu", &use_gpu, + "yes|no|optional|wait, only has effect if compiled with CUDA"); + + po.Read(argc, argv); + + if (po.NumArgs() != 3) { + po.PrintUsage(); + exit(1); + } + +#if HAVE_CUDA==1 + CuDevice::Instantiate().SelectGpuId(use_gpu); +#endif + + std::string nnet_rxfilename = po.GetArg(1), + feature_rspecifier = po.GetArg(2), + matrix_wspecifier = po.GetArg(3); + + TransitionModel trans_model; + AmNnetSimple am_nnet; + { + bool binary_read; + Input ki(nnet_rxfilename, &binary_read); + trans_model.Read(ki.Stream(), binary_read); + am_nnet.Read(ki.Stream(), binary_read); + } + + RandomAccessBaseFloatMatrixReader online_ivector_reader( + online_ivector_rspecifier); + RandomAccessBaseFloatVectorReaderMapped ivector_reader( + ivector_rspecifier, utt2spk_rspecifier); + + CachingOptimizingCompiler compiler(am_nnet.GetNnet(), opts.optimize_config); + + BaseFloatMatrixWriter matrix_writer(matrix_wspecifier); + + int32 num_success = 0, num_fail = 0; + int64 frame_count = 0; + + SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); + + for (; !feature_reader.Done(); feature_reader.Next()) { + std::string utt = feature_reader.Key(); + const Matrix &features (feature_reader.Value()); + if (features.NumRows() == 0) { + KALDI_WARN << "Zero-length utterance: " << utt; + num_fail++; + continue; + } + const Matrix *online_ivectors = NULL; + const Vector *ivector = NULL; + if (!ivector_rspecifier.empty()) { + if (!ivector_reader.HasKey(utt)) { + KALDI_WARN << "No iVector available for utterance " << utt; + num_fail++; + continue; + } else { + ivector = &ivector_reader.Value(utt); + } + } + if (!online_ivector_rspecifier.empty()) { + if (!online_ivector_reader.HasKey(utt)) { + KALDI_WARN << "No online iVector available for utterance " << utt; + num_fail++; + continue; + } else { + online_ivectors = &online_ivector_reader.Value(utt); + } + } + + DecodableNnetSimple nnet_computer( + opts, am_nnet.GetNnet(), am_nnet.Priors(), + features, &compiler, + ivector, online_ivectors, + online_ivector_period); + + Matrix matrix(nnet_computer.NumFrames(), + nnet_computer.OutputDim()); + for (int32 t = 0; t < nnet_computer.NumFrames(); t++) { + SubVector row(matrix, t); + nnet_computer.GetOutputForFrame(t, &row); + } + + if (apply_exp) + matrix.ApplyExp(); + + matrix_writer.Write(utt, matrix); + + frame_count += features.NumRows(); + num_success++; + } + +#if HAVE_CUDA==1 + CuDevice::Instantiate().PrintProfile(); +#endif + double elapsed = timer.Elapsed(); + KALDI_LOG << "Time taken "<< elapsed + << "s: real-time factor assuming 100 frames/sec is " + << (elapsed*100.0/frame_count); + KALDI_LOG << "Done " << num_success << " utterances, failed for " + << num_fail; + + if (num_success != 0) return 0; + else return 1; + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + From c262026e3edbbeb6de4e0758909dcbc0dec8ba0c Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 26 Jan 2017 00:18:52 -0500 Subject: [PATCH 377/530] [egs][scripts] Adding more example scripts for Tedlium and Swbd; add nnet3_disc_dir_info.pl. --- .../s5c/local/chain/compare_wer_general.sh | 5 + .../s5c/local/nnet3/compare_wer_general.sh | 67 +++- .../local/nnet3/run_blstm_discriminative.sh | 4 +- egs/swbd/s5c/local/nnet3/run_tdnn_disc.sh | 1 + .../local/nnet3/run_tdnn_discriminative.sh | 175 --------- .../s5c/local/nnet3/tuning/run_tdnn_d_disc.sh | 55 ++- .../s5_r2/local/chain/compare_wer_general.sh | 57 ++- .../s5_r2/local/chain/tuning/run_tdnn_1d.sh | 256 +++++++++++++ .../chain/tuning/run_tdnn_lstm_1e_disc.sh | 264 ++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1i.sh | 337 ++++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1j.sh | 334 +++++++++++++++++ egs/tedlium/s5_r2/local/nnet3/compare_wer.sh | 75 +++- .../s5_r2/local/nnet3/run_tdnn_lstm.sh | 1 + .../s5_r2/local/nnet3/run_tdnn_lstm_disc.sh | 1 + .../s5_r2/local/nnet3/tuning/run_tdnn_1a.sh | 120 +++++++ .../s5_r2/local/nnet3/tuning/run_tdnn_1b.sh | 169 +++++++++ .../local/nnet3/tuning/run_tdnn_lstm_1a.sh | 228 ++++++++++++ .../local/nnet3/tuning/run_tdnn_lstm_1b.sh | 240 +++++++++++++ .../local/nnet3/tuning/run_tdnn_lstm_1c.sh | 234 ++++++++++++ egs/wsj/s5/steps/info/chain_dir_info.pl | 4 +- egs/wsj/s5/steps/info/nnet2_dir_info.pl | 4 +- egs/wsj/s5/steps/info/nnet3_dir_info.pl | 4 +- egs/wsj/s5/steps/info/nnet3_disc_dir_info.pl | 172 +++++++++ 23 files changed, 2592 insertions(+), 215 deletions(-) create mode 120000 egs/swbd/s5c/local/nnet3/run_tdnn_disc.sh delete mode 100755 egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh create mode 120000 egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm.sh create mode 120000 egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_disc.sh create mode 100755 egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh create mode 100755 egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh create mode 100755 egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh create mode 100755 egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh create mode 100755 egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh create mode 100755 egs/wsj/s5/steps/info/nnet3_disc_dir_info.pl diff --git a/egs/swbd/s5c/local/chain/compare_wer_general.sh b/egs/swbd/s5c/local/chain/compare_wer_general.sh index c8aae0b3b94..1b1f0d16047 100755 --- a/egs/swbd/s5c/local/chain/compare_wer_general.sh +++ b/egs/swbd/s5c/local/chain/compare_wer_general.sh @@ -1,5 +1,10 @@ #!/bin/bash +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer_general.sh tdnn_7h_sp tdnn_7i_sp + +echo "$0 $*"; # print command line. + echo -n "System " for x in $*; do printf "% 10s" $x; done echo diff --git a/egs/swbd/s5c/local/nnet3/compare_wer_general.sh b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh index 11742173120..37eaeeac85b 100755 --- a/egs/swbd/s5c/local/nnet3/compare_wer_general.sh +++ b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh @@ -1,48 +1,99 @@ #!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/nnet3/compare_wer_general.sh tdnn_c_sp tdnn_d_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/nnet3/compare_wer_general.sh tdnn_d_sp tdnn_d_sp_smbr:1 tdnn_d_sp_smbr:2 ... + +echo "# $0 $*"; # print command line. + + echo -n "# System " -for x in $*; do printf "% 10s" $x; done +for x in $*; do printf " % 9s" $x; done echo + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free name, like: +# set_names tdnn_a_sp +# it will set dir=exp/nnet3/tdnn_a_sp and epoch_suffix="" +# If called with something like: +# set_names tdnn_d_sp_smbr:3 +# it will set dir=exp/nnet3/tdnn_d_sp_smbr and epoch_suffix="epoch3" +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + name=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + dirname=exp/nnet3/$name + if [ -z $epoch ]; then + epoch_suffix="" + else + used_epochs=true + epoch_suffix=_epoch${epoch} + fi +} + + echo -n "# WER on train_dev(tg) " for x in $*; do - wer=$(grep WER exp/nnet3/${x}_sp/decode_train_dev_hires_sw1_tg/wer_* | utils/best_wer.sh | awk '{print $2}') + set_names $x + # note: the '*' in the directory name is because there + # is _hires_ in there for the cross-entropy systems, and + # nothing for the sequence trained systems. + wer=$(grep WER $dirname/decode_train_dev*sw1_tg$epoch_suffix/wer_* | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo echo -n "# WER on train_dev(fg) " for x in $*; do - wer=$(grep WER exp/nnet3/${x}_sp/decode_train_dev_hires_sw1_fsh_fg/wer_* | utils/best_wer.sh | awk '{print $2}') + set_names $x + wer=$(grep WER $dirname/decode_train_dev*sw1_fsh_fg$epoch_suffix/wer_* | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo echo -n "# WER on eval2000(tg) " for x in $*; do - wer=$(grep Sum exp/nnet3/${x}_sp/decode_eval2000_hires_sw1_tg/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + set_names $x + wer=$(grep Sum $dirname/decode_eval2000*sw1_tg$epoch_suffix/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo echo -n "# WER on eval2000(fg) " for x in $*; do - wer=$(grep Sum exp/nnet3/${x}_sp/decode_eval2000_hires_sw1_fsh_fg/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + set_names $x + wer=$(grep Sum $dirname/decode_eval2000*sw1_fsh_fg$epoch_suffix/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo +if $used_epochs; then + # we don't print the probs in this case. + exit 0 +fi + echo -n "# Final train prob " for x in $*; do - prob=$(grep log-likelihood exp/nnet3/${x}_sp/log/compute_prob_train.combined.log | awk '{print $8}') + set_names $x + prob=$(grep log-likelihood $dirname/log/compute_prob_train.combined.log | awk '{print $8}') printf "% 10s" $prob done echo echo -n "# Final valid prob " for x in $*; do - prob=$(grep log-likelihood exp/nnet3/${x}_sp/log/compute_prob_valid.combined.log | awk '{print $8}') + set_names $x + prob=$(grep log-likelihood $dirname/log/compute_prob_valid.combined.log | awk '{print $8}') printf "% 10s" $prob done echo - diff --git a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh index c6dfb0107cd..ba751ad8732 100755 --- a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh +++ b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh @@ -2,7 +2,9 @@ set -o pipefail set -e -# this is run_discriminative.sh + +# Caution: this script is out of date, it does not use the +# refactored discriminative training script with get_degs.sh. # This script does discriminative training on top of CE BLSTM system. # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn_disc.sh b/egs/swbd/s5c/local/nnet3/run_tdnn_disc.sh new file mode 120000 index 00000000000..e4d47deb7a4 --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/run_tdnn_disc.sh @@ -0,0 +1 @@ +tuning/run_tdnn_d_disc.sh \ No newline at end of file diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh deleted file mode 100755 index 7af311e7ff4..00000000000 --- a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh +++ /dev/null @@ -1,175 +0,0 @@ -#!/bin/bash - -set -o pipefail -set -e -# this is run_discriminative.sh - -# This script does discriminative training on top of CE nnet3 system. -# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, -# since the lattice generation runs in about real-time, so takes of the order of -# 1000 hours of CPU time. -# -. cmd.sh - - -stage=0 -train_stage=-10 # can be used to start training in the middle. -get_egs_stage=-10 -use_gpu=true # for training -cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like denlats, - # alignments and degs). - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -srcdir=exp/nnet3/nnet_ms_a -train_data_dir=data/train_nodup_sp_hires -online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp -degs_dir= # If provided, will skip the degs directory creation -lats_dir= # If provided, will skip denlats creation - -## Objective options -criterion=smbr -one_silence_class=true - -dir=${srcdir}_${criterion} - -## Egs options -frames_per_eg=150 -frames_overlap_per_eg=30 - -## Nnet training options -effective_learning_rate=0.0000125 -max_param_change=1 -num_jobs_nnet=4 -num_epochs=4 -regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options -minibatch_size=64 - -## Decode options -decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. - -if $use_gpu; then - if ! cuda-compiled; then - cat <3606 combine=-0.10->-0.10 xent:train/valid[167,252,final]=(-1.47,-1.40,-1.40/-1.61,-1.57,-1.56) logprob:train/valid[167,252,final]=(-0.096,-0.087,-0.087/-0.119,-0.115,-0.115) +# exp/chain_cleaned/tdnn1d_sp_bi: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3606 combine=-0.10->-0.10 xent:train/valid[167,252,final]=(-1.46,-1.39,-1.39/-1.61,-1.56,-1.55) logprob:train/valid[167,252,final]=(-0.096,-0.088,-0.088/-0.120,-0.115,-0.115) + +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1b_sp_bi exp/chain_cleaned/tdnn1d_sp_bi +# System tdnn1b_sp_bi tdnn1d_sp_bi +# WER on dev(orig) 9.4 9.5 +# WER on dev(rescored) 8.8 8.6 +# WER on test(orig) 9.6 9.4 +# WER on test(rescored) 9.0 8.9 +# Final train prob -0.0870 -0.0878 +# Final valid prob -0.1147 -0.1152 +# Final train prob (xent) -1.4014 -1.3921 +# Final valid prob (xent) -1.5634 -1.5543 + +# run_tdnn_1b.sh is like run_tdnn_1a.sh but upgrading to xconfig-based +# config generation. + + +## how you run this (note: this assumes that the run_tdnn.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# without cleanup: +# local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run the corresponding non-chain nnet3 system +# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# This script is like run_tdnn_1a.sh except it uses an xconfig-based mechanism +# to get the configuration. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1d #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 self-repair-scale=1.0e-04 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh new file mode 100755 index 00000000000..0d64c75aea8 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh @@ -0,0 +1,264 @@ +#!/bin/bash + +# This script does discriminative training on top of the 1e chain system. To +# simplify things, this assumes you are using the "cleaned" data (since this is +# generally better), i.e. it won't work if you used options to run_tdnn_lstm_1e.sh +# to use the non-cleaned data. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the alignment and the lattice generation/egs-dumping takes quite a bit +# of CPU time. + + +# Below is with 0.00002 and last_layer_factor=0.5 +# this is the setting we're leaving in the script, but the discriminative training +# is not really helping. Maybe we should try the frame-shifted version. +# steps/info/nnet3_disc_dir_info.pl exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow2 +# exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow2:num-jobs=4;effective-lrate=2e-05;last-layer-factor=0.50;iters-per-epoch=138;epoch[0,1,2]:train-objf=[0.94,0.96,0.97],valid-objf=[0.95,0.96,0.96],train-counts=[0.24,0.12,0.10],valid-counts=[0.28,0.20,0.17] +# b01:s5_r2: steps/info/nnet3_disc_dir_info.pl exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow2:{1,2} +# System tdnn_lstm1e_sp_bi tdnn_lstm1e_sp_bi_smbroutslow2:1 tdnn_lstm1e_sp_bi_smbroutslow2:2 +# WER on dev(orig) 9.0 8.9 8.9 +# [looped:] 9.0 8.9 8.9 +# WER on dev(rescored) 8.4 8.3 8.4 +# [looped:] 8.4 8.3 8.4 +# WER on test(orig) 8.8 8.7 8.8 +# [looped:] 8.8 8.8 8.8 +# WER on test(rescored) 8.4 8.3 8.4 +# [looped:] 8.3 8.4 8.5 + + + +# Below is with 0.00002 and last_layer_factor=1.0. +# b01:s5_r2: steps/info/nnet3_disc_dir_info.pl exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr +# exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr:num-jobs=4;lrate=2e-05;iters-per-epoch=138;epoch[0,1,2]:train-objf=[0.94,0.96,0.97],valid-objf=[0.95,0.96,0.96],train-counts=[0.24,0.12,0.09],valid-counts=[0.28,0.19,0.16] +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr:{1,2} +# System tdnn_lstm1e_sp_bi tdnn_lstm1e_sp_bi_smbr:1 tdnn_lstm1e_sp_bi_smbr:2 +# WER on dev(orig) 9.0 8.8 8.9 +# [looped:] 9.0 8.9 8.9 +# WER on dev(rescored) 8.4 8.3 8.4 +# [looped:] 8.4 8.3 8.4 +# WER on test(orig) 8.8 8.8 8.9 +# [looped:] 8.8 8.8 8.9 +# WER on test(rescored) 8.4 8.4 8.5 +# [looped:] 8.3 8.4 8.5 + + +set -e +set -uo pipefail + +stage=1 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=400 # have a high number of jobs because this could take a while, and we might + # have some stragglers. +# you can set disc_affix if you run different configurations, e.g. --disc-affix "_b" +disc_affix= + + + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +srcdir=exp/chain_cleaned/tdnn_lstm1e_sp_bi +graph_dir=$srcdir/graph +train_data_dir=data/train_cleaned_sp_hires_comb +online_ivector_dir=exp/nnet3_cleaned/ivectors_train_cleaned_sp_hires_comb + +## Objective options +criterion=smbr +one_silence_class=true + +dir=${srcdir}_${criterion}${disc_affix} + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_egs=200 # frames-per-chunk for decoding in alignment and + # denlat decoding. +frames_per_chunk_decoding=140 # frames-per-chunk for decoding when we test + # the models. +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +extra_left_context=40 +extra_right_context=0 + + + +## Nnet training options +effective_learning_rate=0.00002 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=2 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +last_layer_factor=0.5 # have the output layer train slower than the others.. this can + # be helpful. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). + + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + +if $use_gpu; then + if ! cuda-compiled; then + cat </dev/null || true + + for x in `seq $decode_start_epoch $num_epochs`; do + for decode_set in dev test; do + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + iter=epoch$x + # We don't test the iter "epoch${x}_adj", although it's computed, + # because prior-adjustment doesn't make sense for chain models + # and it degrades the results. + ( + steps/nnet3/decode_looped.sh \ + --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3_cleaned/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/${decode_set}_hires $dir/decode_looped_${decode_set}_${iter} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${decode_set}_hires \ + ${dir}/decode_looped_${decode_set}_${iter} ${dir}/decode_looped_${decode_set}_${iter}_rescore || exit 1 + ) || touch $dir/.error & + done + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +wait; + +if [ $stage -le 6 ] && $cleanup; then + # if you run with "--cleanup true --stage 6" you can clean up. + # actually, keep the alignments in case we need them later.. they're slow to + # create, and quite big. + # rm ${srcdir}_ali/ali.*.gz || true + + steps/nnet2/remove_egs.sh ${srcdir}_degs || true +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh new file mode 100755 index 00000000000..62497ca59ff --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -0,0 +1,337 @@ +#!/bin/bash + +# 1i is as 1e, but adding boundary-offset. No clear effect. +# +# the 3 columns below are: baseline; boundary-offset with that component +# learning with 10x the normal learning rate; boundary-offset with +# regular learning rate. There seems no clear benefit from this +# idea. Reverting the code changes that supported it; +# see ~dpovey/patches/lstm_boundary.patch + + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1i_sp_bi exp/chain_cleaned/tdnn_lstm1i_sp_bi.orig_learning_rate +# System tdnn_lstm1e_sp_bi tdnn_lstm1i_sp_bi tdnn_lstm1i_sp_bi.orig_learning_rate +# WER on dev(orig) 9.0 9.1 8.9 +# [looped:] 9.0 9.0 9.0 +# WER on dev(rescored) 8.4 8.3 8.3 +# [looped:] 8.4 8.2 8.2 +# WER on test(orig) 8.8 8.9 8.9 +# [looped:] 8.8 8.9 8.9 +# WER on test(rescored) 8.4 8.4 8.4 +# [looped:] 8.3 8.4 8.4 +# Final train prob -0.0648 -0.0625 -0.0644 +# Final valid prob -0.0827 -0.0833 -0.0855 +# Final train prob (xent) -0.8372 -0.8129 -0.8286 +# Final valid prob (xent) -0.9497 -0.9558 -0.9641 + + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1i #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 boundary-offset=true + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 boundary-offset=true + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 boundary-offset=true + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh new file mode 100755 index 00000000000..c9a57f0ab4d --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -0,0 +1,334 @@ +#!/bin/bash + +# 1j is as 1e, but adding self-repair-scale=1.0e-04 on 1st tdnn layer [default is 1e-5]. +# It's definitely more effective in preventing under or over-saturated ReLUs, but +# it's not clear that there is any other benefit. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,j}_sp_bi +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1j_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1j_sp_bi +# WER on dev(orig) 9.0 9.1 +# [looped:] 9.0 9.1 +# WER on dev(rescored) 8.4 8.5 +# [looped:] 8.4 8.5 +# WER on test(orig) 8.8 9.0 +# [looped:] 8.8 9.1 +# WER on test(rescored) 8.4 8.6 +# [looped:] 8.3 8.5 +# Final train prob -0.0648 -0.0646 +# Final valid prob -0.0827 -0.0835 +# Final train prob (xent) -0.8372 -0.8296 +# Final valid prob (xent) -0.9497 -0.9597 + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1j #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 self-repair-scale=1.0e-04 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh index cff39def83b..3e14a4efc55 100755 --- a/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh +++ b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh @@ -1,7 +1,13 @@ #!/bin/bash +# this script is used for comparing decoding results between systems. +# e.g. local/nnet3/compare_wer_general.sh exp/nnet3_cleaned/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_c_sp exp/nnet3_cleaned/tdnn_c_sp_smbr:{1,2,3} -echo $0 $* + +echo "# $0 $*" include_looped=false if [ "$1" == "--looped" ]; then @@ -9,24 +15,58 @@ if [ "$1" == "--looped" ]; then shift fi -echo -n "System " + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain_cleaned/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain_cleaned/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " for x in $*; do printf "% 10s" " $(basename $x)"; done echo -dirnames=(dev dev_rescore test test_rescore) -strings=("WER on dev(orig) " "WER on dev(rescored) " "WER on test(orig) " "WER on test(rescored)") +strings=("# WER on dev(orig) " "# WER on dev(rescored) " "# WER on test(orig) " "# WER on test(rescored)") for n in 0 1 2 3; do echo -n "${strings[$n]}" for x in $*; do - wer=$(grep Sum $x/decode_${dirnames[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum $dirname/decode_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo if $include_looped; then - echo -n " [looped:] " + echo -n "# [looped:] " for x in $*; do - wer=$(grep Sum $x/decode_looped_${dirnames[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum $dirname/decode_looped_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo @@ -34,30 +74,35 @@ for n in 0 1 2 3; do done -echo -n "Final train prob " +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " for x in $*; do - prob=$(grep Overall $x/log/compute_prob_train.combined.log | grep log-like | awk '{printf("%.4f", $8)}') + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final valid prob " +echo -n "# Final valid prob " for x in $*; do - prob=$(grep Overall $x/log/compute_prob_valid.combined.log | grep log-like | awk '{printf("%.4f", $8)}') + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final train acc " +echo -n "# Final train acc " for x in $*; do - prob=$(grep Overall $x/log/compute_prob_train.combined.log | grep accuracy | awk '{printf("%.4f", $8)}') + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final valid acc " +echo -n "# Final valid acc " for x in $*; do - prob=$(grep Overall $x/log/compute_prob_valid.combined.log | grep accuracy | awk '{printf("%.4f", $8)}') + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_disc.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_disc.sh new file mode 120000 index 00000000000..50d28fb91f3 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_disc.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a_disc.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..80ff91b8606 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# This is the standard "tdnn" system, built in nnet3; this script +# is the version that's meant to run with data-cleanup, that doesn't +# support parallel alignments. + + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1a_sp exp/nnet3_cleaned/tdnn1b_sp +# System tdnn1a_sp tdnn1b_sp +# WER on dev(orig) 11.9 11.7 +# WER on dev(rescored) 11.2 10.9 +# WER on test(orig) 11.6 11.7 +# WER on test(rescored) 11.0 11.0 +# Final train prob -0.9255 -0.9416 +# Final valid prob -1.1842 -1.1496 +# Final train acc 0.7245 0.7241 +# Final valid acc 0.6771 0.6788 + + +# by default, with cleanup: +# local/nnet3/run_tdnn.sh + +# without cleanup: +# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1a #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" +remove_egs=true +relu_dim=850 +num_epochs=3 + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat </dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh new file mode 100755 index 00000000000..379c8040a27 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh @@ -0,0 +1,169 @@ +#!/bin/bash + +# This is the standard "tdnn" system, built in nnet3; this script +# is the version that's meant to run with data-cleanup, that doesn't +# support parallel alignments. + + +# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn1b_sp +# exp/nnet3_cleaned/tdnn1b_sp: num-iters=240 nj=2..12 num-params=10.3M dim=40+100->4187 combine=-0.95->-0.95 loglike:train/valid[159,239,combined]=(-1.01,-0.95,-0.94/-1.18,-1.16,-1.15) accuracy:train/valid[159,239,combined]=(0.71,0.72,0.72/0.67,0.68,0.68) + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1a_sp exp/nnet3_cleaned/tdnn1b_sp +# System tdnn1a_sp tdnn1b_sp +# WER on dev(orig) 11.9 11.7 +# WER on dev(rescored) 11.2 10.9 +# WER on test(orig) 11.6 11.7 +# WER on test(rescored) 11.0 11.0 +# Final train prob -0.9255 -0.9416 +# Final valid prob -1.1842 -1.1496 +# Final train acc 0.7245 0.7241 +# Final valid acc 0.6771 0.6788 + + +# by default, with cleanup: +# local/nnet3/run_tdnn.sh + +# without cleanup: +# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1b #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +remove_egs=true +relu_dim=850 +srand=0 +reporting_email=dpovey@gmail.com +# set common_egs_dir to use previously dumped egs. +common_egs_dir= + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=850 + relu-renorm-layer name=tdnn2 dim=850 input=Append(-1,2) + relu-renorm-layer name=tdnn3 dim=850 input=Append(-3,3) + relu-renorm-layer name=tdnn4 dim=850 input=Append(-7,2) + relu-renorm-layer name=tdnn5 dim=850 input=Append(-3,3) + relu-renorm-layer name=tdnn6 dim=850 + output-layer name=output dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=3 \ + --trainer.samples-per-iter=400000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=12 \ + --trainer.optimization.initial-effective-lrate=0.0015 \ + --trainer.optimization.final-effective-lrate=0.00015 \ + --trainer.optimization.minibatch-size=256,128 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # note: for TDNNs, looped decoding gives exactly the same results + # as regular decoding, so there is no point in testing it separately. + # We use regular decoding because it supports multi-threaded (we just + # didn't create the binary for that, for looped decoding, so far). + rm $dir/.error || true 2>/dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..f1502dd2761 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,228 @@ +#!/bin/bash + +# this is a TDNN+LSTM system; the configuration is similar to +# local/chain/tuning/run_tdnn_lstm_1e.sh, but a non-chain nnet3 system, and +# with 1.5 times larger hidden dimensions. + + +# local/nnet3/compare_wer.sh --looped exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1b_sp +# System tdnn_lstm1a_sp tdnn_lstm1b_sp +# WER on dev(orig) 11.0 11.0 +# [looped:] 11.0 11.1 +# WER on dev(rescored) 10.3 10.3 +# [looped:] 10.3 10.5 +# WER on test(orig) 10.8 10.6 +# [looped:] 10.7 10.7 +# WER on test(rescored) 10.1 9.9 +# [looped:] 10.0 10.0 +# Final train prob -0.68810.7954-0.68970.7946 +# Final valid prob -0.77960.7611-0.79890.7582 + + +# by default, with cleanup: +# local/nnet3/run_tdnn_lstm.sh + +# without cleanup: +# local/nnet3/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned + +# Options which are not passed through to run_ivector_common.sh +affix=1a +common_egs_dir= +reporting_email= + +# LSTM options +train_stage=-10 +label_delay=5 + +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 +# decode chunk-size options (for non-looped decoding) +extra_left_context=50 +extra_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +extra_left_context= +extra_right_context= +frames_per_chunk= + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=768 + relu-renorm-layer name=tdnn2 dim=768 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=10000 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=15 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +if [ $stage -le 15 ]; then + # 'looped' decoding. + # note: you should NOT do this decoding step for setups that have bidirectional + # recurrence, like BLSTMs-- it doesn't make sense and will give bd results. + # we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $graph_dir data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh new file mode 100755 index 00000000000..8b8af6eff78 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh @@ -0,0 +1,240 @@ +#!/bin/bash + +# 1b is as 1a, but removing the decay-time option as a baseline. + +# the decay-time option does seem to be having the expected interaction with +# 'looped' decoding, i.e. with the decay-time option we don't get a degradation +# from looped decoding (if anything, with decay time, looped decoding is a +# little better than baseline decoding). + +# local/nnet3/compare_wer.sh --looped exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1b_sp +# System tdnn_lstm1a_sp tdnn_lstm1b_sp +# WER on dev(orig) 11.0 11.0 +# [looped:] 11.0 11.1 +# WER on dev(rescored) 10.3 10.3 +# [looped:] 10.3 10.5 +# WER on test(orig) 10.8 10.6 +# [looped:] 10.7 10.7 +# WER on test(rescored) 10.1 9.9 +# [looped:] 10.0 10.0 +# Final train prob -0.6881 -0.6897 +# Final valid prob -0.7796 -0.7989 +# Final train acc 0.7954 0.7946 +# Final valid acc 0.7611 0.7582 + + + +# this is a TDNN+LSTM system; the configuration is similar to +# local/chain/tuning/run_tdnn_lstm_1e.sh, but a non-chain nnet3 system, and +# with 1.5 times larger hidden dimensions. + +# by default, with cleanup: +# local/nnet3/run_tdnn_lstm.sh + +# without cleanup: +# local/nnet3/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned + +# Options which are not passed through to run_ivector_common.sh +affix=1b +common_egs_dir= +reporting_email= + +# LSTM options +train_stage=-10 +label_delay=5 + +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 +# decode chunk-size options (for non-looped decoding) +extra_left_context=50 +extra_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +extra_left_context= +extra_right_context= +frames_per_chunk= + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=768 + relu-renorm-layer name=tdnn2 dim=768 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 delay=-3 + relu-renorm-layer name=tdnn3 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 delay=-3 + relu-renorm-layer name=tdnn5 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=10000 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=15 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # caution: we don't set the --frames-per-chunk here, we just use the + # default value of 50, which happens to be suitable because it's + # close to the primary chunk_width of 40. + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +if [ $stage -le 15 ]; then + # 'looped' decoding. + # note: you should NOT do this decoding step for setups that have bidirectional + # recurrence, like BLSTMs-- it doesn't make sense and will give bd results. + # we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $graph_dir data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh new file mode 100755 index 00000000000..1d3b12f2697 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh @@ -0,0 +1,234 @@ +#!/bin/bash + +# run_tdnn_lstm_1c.sh is as run_tdnn_lstm_1a.sh, but about 1.5 times larger +# chunk lengths than 1a. +# There doesn't seem to be any advantage in the longer chunk lengths. + +# this is a TDNN+LSTM system; the configuration is similar to +# local/chain/tuning/run_tdnn_lstm_1e.sh, but a non-chain nnet3 system, and +# with 1.5 times larger hidden dimensions. + +# local/nnet3/compare_wer.sh --looped exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1b_sp exp/nnet3_cleaned/tdnn_lstm1c_sp +# System tdnn_lstm1a_sp tdnn_lstm1b_sp tdnn_lstm1c_sp +# WER on dev(orig) 11.0 11.0 11.0 +# [looped:] 11.0 11.1 10.9 +# WER on dev(rescored) 10.3 10.3 10.4 +# [looped:] 10.3 10.5 10.3 +# WER on test(orig) 10.8 10.6 10.8 +# [looped:] 10.7 10.7 10.7 +# WER on test(rescored) 10.1 9.9 10.1 +# [looped:] 10.0 10.0 10.1 +# Final train prob -0.6881 -0.6897 -0.5998 +# Final valid prob -0.7796 -0.7989 -0.8542 +# Final train acc 0.7954 0.7946 0.7988 +# Final valid acc 0.7611 0.7582 0.7521 + + + +# by default, with cleanup: +# local/nnet3/run_tdnn_lstm.sh + +# without cleanup: +# local/nnet3/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned + +# Options which are not passed through to run_ivector_common.sh +affix=1c +common_egs_dir= +reporting_email= + +# LSTM options +train_stage=-10 +label_delay=5 + +# training chunk-options +chunk_width=60,50,40,30 +chunk_left_context=40 +chunk_right_context=0 +# decode chunk-size options (for non-looped decoding) +extra_left_context=50 +extra_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +extra_left_context= +extra_right_context= +frames_per_chunk= + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=768 + relu-renorm-layer name=tdnn2 dim=768 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=10000 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=15 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +if [ $stage -le 15 ]; then + # 'looped' decoding. + # note: you should NOT do this decoding step for setups that have bidirectional + # recurrence, like BLSTMs-- it doesn't make sense and will give bad results. + # we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $graph_dir data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0; diff --git a/egs/wsj/s5/steps/info/chain_dir_info.pl b/egs/wsj/s5/steps/info/chain_dir_info.pl index b43e1752ee8..1d659b89c89 100755 --- a/egs/wsj/s5/steps/info/chain_dir_info.pl +++ b/egs/wsj/s5/steps/info/chain_dir_info.pl @@ -17,8 +17,8 @@ "This script extracts some important information from the logs\n" . "and displays it on a single (rather long) line.\n" . "The --debug option is just to debug the script itself.\n" . - "This program exits with status 0 if it seems like the argument\n" . - "really was a GMM dir, and 1 otherwise.\n"; + "This program exits with status 0 if it seems like the arguments\n" . + "really were of the expected directory type, and 1 otherwise.\n"; exit(1); } diff --git a/egs/wsj/s5/steps/info/nnet2_dir_info.pl b/egs/wsj/s5/steps/info/nnet2_dir_info.pl index 6ef10a2e03d..e572245e0ca 100755 --- a/egs/wsj/s5/steps/info/nnet2_dir_info.pl +++ b/egs/wsj/s5/steps/info/nnet2_dir_info.pl @@ -17,8 +17,8 @@ "This script extracts some important information from the logs\n" . "and displays it on a single (rather long) line.\n" . "The --debug option is just to debug the script itself.\n" . - "This program exits with status 0 if it seems like the argument\n" . - "really was a GMM dir, and 1 otherwise.\n"; + "This program exits with status 0 if it seems like the arguments\n" . + "really were of the expected directory type, and 1 otherwise.\n"; exit(1); } diff --git a/egs/wsj/s5/steps/info/nnet3_dir_info.pl b/egs/wsj/s5/steps/info/nnet3_dir_info.pl index 89b4c398d46..46ddd9f822c 100755 --- a/egs/wsj/s5/steps/info/nnet3_dir_info.pl +++ b/egs/wsj/s5/steps/info/nnet3_dir_info.pl @@ -17,8 +17,8 @@ "This script extracts some important information from the logs\n" . "and displays it on a single (rather long) line.\n" . "The --debug option is just to debug the script itself.\n" . - "This program exits with status 0 if it seems like the argument\n" . - "really was a GMM dir, and 1 otherwise.\n"; + "This program exits with status 0 if it seems like the arguments\n" . + "really were of the expected directory type, and 1 otherwise.\n"; exit(1); } diff --git a/egs/wsj/s5/steps/info/nnet3_disc_dir_info.pl b/egs/wsj/s5/steps/info/nnet3_disc_dir_info.pl new file mode 100755 index 00000000000..10bdb70fc9f --- /dev/null +++ b/egs/wsj/s5/steps/info/nnet3_disc_dir_info.pl @@ -0,0 +1,172 @@ +#!/usr/bin/perl -w + +use Fcntl; + +# we may at some point support options. + +$debug = 0; # we set it to 1 for debugging the script itself. + +if ($ARGV[0] eq "--debug") { + $debug = 1; + shift @ARGV; +} + +if (@ARGV == 0) { + print STDERR "Usage: steps/info/nnet3_disc_dir_info.pl [--debug] [ ... ]\n" . + "e.g: steps/info/nnet3_dir_info.pl exp/nnet3/tdnn_sp_smbr\n" . + "This script extracts some important information from the logs\n" . + "and displays it on a few lines.\n" . + "The --debug option is just to debug the script itself.\n" . + "This program exits with status 0 if it seems like the argument\n" . + "really was a GMM dir, and 1 otherwise.\n"; + exit(1); +} + +if (@ARGV > 1) { + # repeatedly invoke this program with each of the remaining args. + $exit_status = 0; + if ($debug) { $debug_opt = "--debug " } else { $debug_opt = ""; } + foreach $dir (@ARGV) { + if (system("$0 $debug_opt$dir") != 0) { + $exit_status = 1; + } + } + exit($exit_status); +} + +# from this point we can assume we're invoked with one argument. +$nnet_dir = shift @ARGV; + +# This function returns an array of iteration numbers, one +# for each epoch that has already completed (but including +# epoch zero)... e.g. +# it might return (0, 194, 388, 582). +# This is done by reading the soft links, e.g. epoch1.mdl ->194.mdl +sub get_iters_for_epochs { + my @ans = (); + for (my $n = 0; 1; $n++) { + if (-l "$nnet_dir/epoch$n.mdl") { + my $link_name = readlink("$nnet_dir/epoch$n.mdl"); + if ($link_name =~ m/^(\d+).mdl/) { + my $iter = $1; + push @ans, $iter; + } else { + die "unexpected link name $nnet_dir/epoch$n.mdl -> $link_name"; + } + } else { + if (@ans == 0) { + die "$nnet_dir does not seem to be a discriminative-training dir " . + "(expected a link $nnet_dir/epoch0.mdl)"; + } + return @ans; + } + } +} + + +sub get_num_jobs { + my $j = 1; + for (my $j = 1; 1; $j++) { + if (! -f "$nnet_dir/log/train.0.$j.log") { + if ($j == 1) { + die "$nnet_dir does not seem to be a discriminative-training dir " . + "(expected $nnet_dir/log/train.0.1.log to exist)"; + } else { + return $j - 1; + } + } + } +} + +# returns a string describing the effective learning rate and possibly +# any final-layer-factor. +sub get_effective_learning_rate_str { + # effective learning rate is the actual learning rate divided by the + # number of jobs. + my $convert_log = "$nnet_dir/log/convert.log"; + if (-f $convert_log) { + open(F, "<$convert_log"); + while () { + if (m/--edits/) { + if (m/set-learning-rate learning-rate=(\S+); set-learning-rate name=output.affine learning-rate=([^"']+)["']/) { + my $learning_rate = $1; + my $last_layer_factor = sprintf("%.2f", $2 / $1); + my $num_jobs = get_num_jobs(); + my $effective_learning_rate = sprintf("%.3g", $learning_rate / $num_jobs); + close(F); + return "effective-lrate=$effective_learning_rate;last-layer-factor=$last_layer_factor"; + } elsif (m/set-learning-rate learning-rate=([^"']+)["']/) { + my $learning_rate = $1; + my $num_jobs = get_num_jobs(); + my $effective_learning_rate = sprintf("%.3g", $learning_rate / $num_jobs); + close(F); + return "effective-lrate=$effective_learning_rate"; + } + } + } + } else { + die("Expected file $convert_log to exist"); + } + close(F); + return "lrate=??"; # could not parse it from the log. +} + + +# prints some info about the objective function... +sub get_objf_str { + my @iters_for_epochs = get_iters_for_epochs(); + if (@iters_for_epochs == 1) { + die("No epochs have finished in directory $nnet_dir") + } + # will produce output like: + # iters-per-epoch=123;epoch[0,1,2,3,4]:train-objf=[0.89,0.92,0.93,0.94],valid-objf=[...],train-counts=[...],valid-counts=[...]" + # the "counts" are the average num+den occupation counts in the lattices; it's a measure of how much confusability + # there still is in the lattices. + my $iters_per_epoch = $iters_for_epochs[1] - $iters_for_epochs[0]; + my $ans = "iters-per-epoch=$iters_per_epoch"; + $ans .= ";epoch[" . join(",", 0..$#iters_for_epochs) . "]:"; + my @train_objfs = (); + my @train_counts = (); + my @valid_objfs = (); + my @valid_counts = (); + foreach $iter (@iters_for_epochs) { + if ($iter > 0) { $iter -= 1; } # last iter will not exist. + my $train_log = "$nnet_dir/log/compute_objf_train.$iter.log"; + my $valid_log = "$nnet_dir/log/compute_objf_valid.$iter.log"; + if (!open (T, "<$train_log")){ print STDERR "$0: warning: Expected file $train_log to exist\n"; } + if (!open (V, "<$valid_log")){ print STDERR "$0: warning: Expected file $valid_log to exist\n"; } + my $train_count = "??"; + my $valid_count = "??"; + my $train_objf = "??"; + my $valid_objf = "??"; + while () { + if (m/num\+den count.+is (\S+) per frame/) { $train_count = sprintf("%.2f", $1); } + if (m/Overall.+ is (\S+) per frame/) { $train_objf = sprintf("%.2f", $1); } + } + close(T); + while () { + if (m/num\+den count.+is (\S+) per frame/) { $valid_count = sprintf("%.2f", $1); } + if (m/Overall.+ is (\S+) per frame/) { $valid_objf = sprintf("%.2f", $1); } + } + push @train_objfs, $train_objf; + push @train_counts, $train_count; + push @valid_objfs, $valid_objf; + push @valid_counts, $valid_count; + close(V); + } + $ans .= "train-objf=[" . join(",", @train_objfs) . + "],valid-objf=[" . join(",", @valid_objfs) . + "],train-counts=[" . join(",", @train_counts) . + "],valid-counts=[" . join(",", @valid_counts) . "]"; + return $ans; +} + + + + +$output_string = "$nnet_dir:num-jobs=".get_num_jobs().";" . + get_effective_learning_rate_str() . ";" . get_objf_str(); + +print "$output_string\n"; + +exit(0); From c96a0aa0f239001723c595d47033d413a70714e1 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 26 Jan 2017 14:14:48 -0500 Subject: [PATCH 378/530] [build]: use BSD-compatible mktemp in get_version*.sh; thanks; @andrely. --- src/base/get_version.sh | 2 +- src/doc/get_version_info.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/base/get_version.sh b/src/base/get_version.sh index 94efd41b631..3395ffaa6ff 100755 --- a/src/base/get_version.sh +++ b/src/base/get_version.sh @@ -77,7 +77,7 @@ if [ -z "$version" ]; then fi # Write version info to a temporary file. -temp=$(mktemp) +temp=$(mktemp /tmp/tmp.XXXXX) trap 'rm -f "$temp"' EXIT echo "// This file was automatically created by ./get_version.sh." > $temp echo "// It is only included by ./kaldi-error.cc." >> $temp diff --git a/src/doc/get_version_info.sh b/src/doc/get_version_info.sh index 5b6de79e04c..23b22c2bf4f 100755 --- a/src/doc/get_version_info.sh +++ b/src/doc/get_version_info.sh @@ -35,7 +35,7 @@ for tuple in "5.0 master c160a9883"; do - tempfile=$(mktemp) + tempfile=$(mktemp /tmp.XXXX) echo "$0: for version=$major_minor_number, writing git output to $tempfile" patch_number=0 From 6ec2eb0031534e26450ed64e50d44d01508f4229 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 26 Jan 2017 14:17:25 -0500 Subject: [PATCH 379/530] [build]: fix to previous commit regarding usage of mktemp. --- src/base/get_version.sh | 2 +- src/doc/get_version_info.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/base/get_version.sh b/src/base/get_version.sh index 3395ffaa6ff..bf5efa8c14a 100755 --- a/src/base/get_version.sh +++ b/src/base/get_version.sh @@ -77,7 +77,7 @@ if [ -z "$version" ]; then fi # Write version info to a temporary file. -temp=$(mktemp /tmp/tmp.XXXXX) +temp=$(mktemp /tmp/temp.XXXXXX) trap 'rm -f "$temp"' EXIT echo "// This file was automatically created by ./get_version.sh." > $temp echo "// It is only included by ./kaldi-error.cc." >> $temp diff --git a/src/doc/get_version_info.sh b/src/doc/get_version_info.sh index 23b22c2bf4f..bad5bdacc7b 100755 --- a/src/doc/get_version_info.sh +++ b/src/doc/get_version_info.sh @@ -35,7 +35,7 @@ for tuple in "5.0 master c160a9883"; do - tempfile=$(mktemp /tmp.XXXX) + tempfile=$(mktemp /tmp/temp.XXXXXX) echo "$0: for version=$major_minor_number, writing git output to $tempfile" patch_number=0 From 3b7fd1f928b2498aad5b060996c62a6165afb2a1 Mon Sep 17 00:00:00 2001 From: "Nickolay V. Shmyrev" Date: Thu, 26 Jan 2017 23:16:30 +0300 Subject: [PATCH 380/530] [scripts] Set path in utils/ssh.pl just like in slurm.pl or queue.pl to avoid path problems (#1379) --- egs/wsj/s5/utils/ssh.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/egs/wsj/s5/utils/ssh.pl b/egs/wsj/s5/utils/ssh.pl index 8f2755a5ccb..5d3e3e44d71 100755 --- a/egs/wsj/s5/utils/ssh.pl +++ b/egs/wsj/s5/utils/ssh.pl @@ -161,6 +161,7 @@ # bash commands. print S "set -e\n"; # if any of the later commands fails, we want it to exit. print S "cd $cwd\n"; + print S ". ./path.sh\n"; print S "mkdir -p $logdir\n"; print S "time1=\`date +\"%s\"\`\n"; print S "( echo '#' Running on \`hostname\`\n"; From 2b667a35443db2e4abb9700c934a6ed183f44085 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 26 Jan 2017 21:36:03 -0500 Subject: [PATCH 381/530] [egs]: fix some soft links --- egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh | 2 +- egs/tedlium/s5_r2/local/chain/run_tdnn_lstm_disc.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 120000 egs/tedlium/s5_r2/local/chain/run_tdnn_lstm_disc.sh diff --git a/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh index 8e647598556..fbc28248491 120000 --- a/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh +++ b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh @@ -1 +1 @@ -tuning/run_tdnn_lstm_1a.sh \ No newline at end of file +tuning/run_tdnn_lstm_1e.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm_disc.sh b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm_disc.sh new file mode 120000 index 00000000000..d4268b4185a --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm_disc.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1e_disc.sh \ No newline at end of file From 193bb9214aeb8887db16370d742be0249264517c Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 26 Jan 2017 21:45:23 -0500 Subject: [PATCH 382/530] [build]: minor bug fix in maintenance/documentation script. --- src/doc/get_version_info.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/doc/get_version_info.sh b/src/doc/get_version_info.sh index bad5bdacc7b..b37ac5f400f 100755 --- a/src/doc/get_version_info.sh +++ b/src/doc/get_version_info.sh @@ -68,14 +68,14 @@ for tuple in "5.0 master c160a9883"; do $pull_request_number = $1; $pre_match = $`; # part before what was matched. $pre_match =~ s//&rt;/g; + $pre_match =~ s/>/>/g; # if commit subject line ends with e.g. (#1302), which will # be a pull request; create a href to github for that. $commit_subject = $pre_match . "(#$pull_request_number)"; } else { $commit_subject =~ s//&rt;/g; + $commit_subject =~ s/>/>/g; } $commit_href = "$short_commit"; @@ -85,4 +85,6 @@ for tuple in "5.0 master c160a9883"; do print "

\n"; } ' "$major_minor_number" >$htmlfile || exit 1 echo "$0: generated file $htmlfile with $(wc -l <$htmlfile) lines" + # you might want to comment the command below if you are debugging the script. + rm $tempfile done From 5af1983f1bf58d7cf2a0f6783bc1fbb1954f77f9 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 27 Jan 2017 14:32:36 -0500 Subject: [PATCH 383/530] [scripts] Fix nnet3 training scripts for bug introduced in #1371 --- egs/wsj/s5/steps/nnet3/chain/train.py | 4 ++-- egs/wsj/s5/steps/nnet3/train_dnn.py | 4 ++-- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 4 ++-- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 4 ++-- egs/wsj/s5/steps/nnet3/train_rnn.py | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index 743a854e160..b8d7a55671d 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -469,7 +469,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( - nnet3_log_parse.generate_accuracy_report( + nnet3_log_parse.generate_acc_logprob_report( args.dir, "log-probability")) message = report subject = ("Update : Expt {dir} : " @@ -507,7 +507,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): remove_egs=remove_egs) # do some reporting - [report, times, data] = nnet3_log_parse.generate_accuracy_report( + [report, times, data] = nnet3_log_parse.generate_acc_logprob_report( args.dir, "log-probability") if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index 8ab3959885a..342ac1f09b4 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -336,7 +336,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( - nnet3_log_parse.generate_accuracy_report(args.dir)) + nnet3_log_parse.generate_acc_logprob_report(args.dir)) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) @@ -385,7 +385,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): remove_egs=remove_egs) # do some reporting - [report, times, data] = nnet3_log_parse.generate_accuracy_report(args.dir) + [report, times, data] = nnet3_log_parse.generate_acc_logprob_report(args.dir) if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index 3f946d13de8..cf71e9dd846 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -341,7 +341,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( - nnet3_log_parse.generate_accuracy_report(args.dir)) + nnet3_log_parse.generate_acc_logprob_report(args.dir)) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) @@ -385,7 +385,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): get_raw_nnet_from_am=False) # do some reporting - [report, times, data] = nnet3_log_parse.generate_accuracy_report(args.dir) + [report, times, data] = nnet3_log_parse.generate_acc_logprob_report(args.dir) if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index cf7ae3f2b45..cfdae51cfa8 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -444,7 +444,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( - nnet3_log_parse.generate_accuracy_report(args.dir)) + nnet3_log_parse.generate_acc_logprob_report(args.dir)) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) @@ -488,7 +488,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): get_raw_nnet_from_am=False) # do some reporting - [report, times, data] = nnet3_log_parse.generate_accuracy_report(args.dir) + [report, times, data] = nnet3_log_parse.generate_acc_logprob_report(args.dir) if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index 35745bce7b2..97e037f99fe 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -436,7 +436,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): if iter % reporting_iter_interval == 0: # lets do some reporting [report, times, data] = ( - nnet3_log_parse.generate_accuracy_report(args.dir)) + nnet3_log_parse.generate_acc_logprob_report(args.dir)) message = report subject = ("Update : Expt {dir} : " "Iter {iter}".format(dir=args.dir, iter=iter)) @@ -485,7 +485,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): remove_egs=remove_egs) # do some reporting - [report, times, data] = nnet3_log_parse.generate_accuracy_report(args.dir) + [report, times, data] = nnet3_log_parse.generate_acc_logprob_report(args.dir) if args.email is not None: common_lib.send_mail(report, "Update : Expt {0} : " "complete".format(args.dir), args.email) From 2f8c33d139f96468c0b76c75beab7f4ad264bbf2 Mon Sep 17 00:00:00 2001 From: Xiaohui Zhang Date: Fri, 27 Jan 2017 16:39:59 -0500 Subject: [PATCH 384/530] [scripts,egs] make steps/dict/learn_lexicon.sh more robust RE empty g2p prons (#1378) --- egs/tedlium/s5_r2/local/run_learn_lex.sh | 2 +- egs/wsj/s5/steps/dict/learn_lexicon.sh | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/egs/tedlium/s5_r2/local/run_learn_lex.sh b/egs/tedlium/s5_r2/local/run_learn_lex.sh index 4960fbd848e..a2a6f2e46b8 100755 --- a/egs/tedlium/s5_r2/local/run_learn_lex.sh +++ b/egs/tedlium/s5_r2/local/run_learn_lex.sh @@ -72,7 +72,7 @@ if [ $stage -le 0 ]; then $data/train_vocab.txt | sort > $data/oov_train.txt || exit 1; steps/dict/apply_g2p.sh --var-counts 4 $data/oov_train.txt \ $g2p_mdl_dir exp/g2p/oov_lex_train || exit 1; - cat exp/g2p/oov_lex_train/lexicon.lex | cut -f1,3 | \ + cat exp/g2p/oov_lex_train/lexicon.lex | awk '{if (NF>=3) print $0}' | cut -f1,3 | \ tr -s '\t' ' ' | sort | uniq > $data/lexicon_oov_g2p.txt || exit 1; fi diff --git a/egs/wsj/s5/steps/dict/learn_lexicon.sh b/egs/wsj/s5/steps/dict/learn_lexicon.sh index 4b4c177d554..0ea580528ee 100755 --- a/egs/wsj/s5/steps/dict/learn_lexicon.sh +++ b/egs/wsj/s5/steps/dict/learn_lexicon.sh @@ -88,7 +88,7 @@ if [ $# -lt 6 ] || [ $# -gt 7 ]; then echo " --cmd '$cmd' # command to submit jobs with (e.g. run.pl, queue.pl)" echo " --nj # number of parallel jobs" echo " --oov-symbol # (required option) oov symbol, like ." - echo " --g2p-pron-candidates # A lexicon file containing g2p generated pronunciations, for words in acoustic training " + echo " --lexicon-g2p # A lexicon file containing g2p generated pronunciations, for words in acoustic training " echo " # data / target vocabulary. It's optional." echo " --min-prob # The cut-off parameter used to select pronunciation candidates from phonetic" echo " # decoding. We remove pronunciations with probabilities less than this value" @@ -168,7 +168,8 @@ if [ $stage -le 0 ]; then # create an empty list of g2p generated prons, if it's not given. touch $dir/lexicon_g2p.txt else - cp $lexicon_g2p $dir/lexicon_g2p.txt 2>/dev/null + cat $lexicon_g2p | awk '{if (NF<2) {print "There is an empty pronunciation in lexicon_g2p.txt. Exit." \ + > "/dev/stderr"; exit 1} print $0}' - > $dir/lexicon_g2p.txt || exit 1; fi fi From b0e550eeaec99092695ac92e8713ca299cd81c18 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 27 Jan 2017 21:09:47 -0500 Subject: [PATCH 385/530] [src,egs,scripts]: improve use of sum-to-one penalty in combination, provide script support; examples of use of dropout in TDNN+LSTMs; change minibatch-size in combination phase. --- .../s5_r2/local/chain/compare_wer_general.sh | 8 +- .../local/chain/tuning/run_tdnn_lstm_1e.sh | 2 +- .../local/chain/tuning/run_tdnn_lstm_1k.sh | 339 ++++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1l.sh | 330 +++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1m.sh | 330 +++++++++++++++++ .../nnet3/train/chain_objf/acoustic_model.py | 12 +- egs/wsj/s5/steps/libs/nnet3/train/common.py | 5 + .../libs/nnet3/train/dropout_schedule.py | 16 +- .../nnet3/train/frame_level_objf/common.py | 20 +- egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py | 2 +- egs/wsj/s5/steps/nnet3/chain/train.py | 6 +- egs/wsj/s5/steps/nnet3/train_dnn.py | 5 +- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 5 +- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 4 +- egs/wsj/s5/steps/nnet3/train_rnn.py | 4 +- src/nnet3/nnet-chain-combine.cc | 25 +- src/nnet3/nnet-combine.cc | 25 +- 17 files changed, 1093 insertions(+), 45 deletions(-) create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh diff --git a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh index 21ba1720e3a..d3acae200b8 100755 --- a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh +++ b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh @@ -77,28 +77,28 @@ if $used_epochs; then exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. fi -echo -n "Final train prob " +echo -n "# Final train prob " for x in $*; do prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final valid prob " +echo -n "# Final valid prob " for x in $*; do prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final train prob (xent)" +echo -n "# Final train prob (xent)" for x in $*; do prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final valid prob (xent)" +echo -n "# Final valid prob (xent)" for x in $*; do prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh index 5bfdc68fa3f..7f0b9588b66 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -73,7 +73,7 @@ frames_per_chunk_primary=140 train_stage=-10 tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. tdnn_lstm_affix=1e #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. -common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. +common_egs_dir= # you can set this to use previously dumped egs. # End configuration section. echo "$0 $@" # Print the command line for logging diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh new file mode 100755 index 00000000000..ab9d6ce6342 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -0,0 +1,339 @@ +#!/bin/bash + +# 1k is as 1e, but introducing a dropout schedule. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,k,l,m}_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1k_sp_bi tdnn_lstm1l_sp_bi tdnn_lstm1m_sp_bi +# WER on dev(orig) 9.0 8.7 8.9 9.0 +# [looped:] 9.0 8.6 8.9 8.9 +# WER on dev(rescored) 8.4 7.9 8.2 8.2 +# [looped:] 8.4 7.8 8.2 8.3 +# WER on test(orig) 8.8 8.8 8.9 8.9 +# [looped:] 8.8 8.7 8.8 8.8 +# WER on test(rescored) 8.4 8.3 8.2 8.5 +# [looped:] 8.3 8.3 8.3 8.4 +# Final train prob -0.0648 -0.0693 -0.0768 -0.0807 +# Final valid prob -0.0827 -0.0854 -0.0943 -0.0931 +# Final train prob (xent) -0.8372 -0.8848 -0.9371 -0.9807 +# Final valid prob (xent) -0.9497 -0.9895 -1.0546 -1.0629 + + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1k #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.7@0.5,0@0.75,0' \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh new file mode 100755 index 00000000000..e09df86558a --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -0,0 +1,330 @@ +#!/bin/bash + + +# 1l is as 1k, but having the dropout end at the end of training, not @0.75. + +# see run_tdnn_lstm_1k.sh for results. + + +# 1k is as 1e, but introducing a dropout schedule. + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1l #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.7@0.5,0' \ + --trainer.optimization.combine-sum-to-one-penalty=0.001 \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh new file mode 100755 index 00000000000..3e75c9fe3e0 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh @@ -0,0 +1,330 @@ +#!/bin/bash + + +# 1m is as 1l, but having the dropout end at 0.1 +# see run_tdnn_lstm_1k.sh for results. + +# 1l is as 1k, but having the dropout end at the end of training. + +# 1k is as 1e, but introducing a dropout schedule. + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1m #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.7@0.5,0.1' \ + --trainer.optimization.combine-sum-to-one-penalty=0.001 \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index feb065de411..e3ae02b4b09 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -528,7 +528,9 @@ def compute_progress(dir, iter, run_opts, wait=False, def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_str, egs_dir, left_context, right_context, leaky_hmm_coefficient, l2_regularize, - xent_regularize, run_opts, background_process_handler=None): + xent_regularize, run_opts, + background_process_handler=None, + sum_to_one_penalty=0.0): """ Function to do model combination In the nnet3 setup, the logic @@ -552,9 +554,11 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_st common_lib.run_job( """{command} {combine_queue_opt} {dir}/log/combine.log \ - nnet3-chain-combine --num-iters=40 \ + nnet3-chain-combine --num-iters=80 \ --l2-regularize={l2} --leaky-hmm-coefficient={leaky} \ - --enforce-sum-to-one=true --enforce-positive-weights=true \ + --enforce-sum-to-one={hard_enforce} \ + --sum-to-one-penalty={penalty} \ + --enforce-positive-weights=true \ --verbose=3 {dir}/den.fst {raw_models} \ "ark,bg:nnet3-chain-copy-egs --left-context={lc} \ --right-context={rc} ark:{egs_dir}/combine.cegs ark:- | \ @@ -567,6 +571,8 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_st lc=left_context, rc=right_context, l2=l2_regularize, leaky=leaky_hmm_coefficient, dir=dir, raw_models=" ".join(raw_model_strings), + hard_enforce=(sum_to_one_penalty <= 0), + penalty=sum_to_one_penalty, num_chunk_per_mb=num_chunk_per_minibatch_str, num_iters=num_iters, egs_dir=egs_dir)) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index 3c20eb1831d..25852ab2806 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -721,6 +721,11 @@ def __init__(self, the final model combination stage. These models will themselves be averages of iteration-number ranges""") + self.parser.add_argument("--trainer.optimization.combine-sum-to-one-penalty", + type=float, dest='combine_sum_to_one_penalty', default=0.0, + help="""If > 0, activates 'soft' enforcement of the + sum-to-one penalty in combination (may be helpful + if using dropout). E.g. 1.0e-03.""") self.parser.add_argument("--trainer.optimization.momentum", type=float, dest='momentum', default=0.0, help="""Momentum used in update computation. diff --git a/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py b/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py index d9cf3112e4a..0ad93e5977d 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py @@ -13,6 +13,8 @@ logger.addHandler(logging.NullHandler()) +_debug_dropout = False + def _parse_dropout_option(dropout_option): """Parses the string option to --trainer.dropout-schedule and returns a list of dropout schedules for different component name patterns. @@ -53,11 +55,12 @@ def _parse_dropout_option(dropout_option): this_dropout_values = _parse_dropout_string(this_dropout_str) dropout_schedule.append((component_name, this_dropout_values)) - logger.info("Dropout schedules for component names is as follows:") - logger.info(": [(num_archives_processed), " - "(dropout_proportion) ...]") - for name, schedule in dropout_schedule: - logger.info("{0}: {1}".format(name, schedule)) + if _debug_dropout: + logger.info("Dropout schedules for component names is as follows:") + logger.info(": [(num_archives_processed), " + "(dropout_proportion) ...]") + for name, schedule in dropout_schedule: + logger.info("{0}: {1}".format(name, schedule)) return dropout_schedule @@ -236,7 +239,8 @@ def get_dropout_edit_string(dropout_schedule, data_fraction, iter_): dropout_info.append("pattern/dropout-proportion={0}/{1}".format( component_name, dropout_proportion)) - logger.info("On iteration %d, %s", iter_, ', '.join(dropout_info)) + if _debug_dropout: + logger.info("On iteration %d, %s", iter_, ', '.join(dropout_info)) return ("""nnet3-copy --edits='{edits}' - - |""".format( edits=";".join(edit_config_lines))) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index e697004aa99..94478b263f3 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -451,8 +451,10 @@ def compute_progress(dir, iter, egs_dir, left_context, right_context, def combine_models(dir, num_iters, models_to_combine, egs_dir, left_context, right_context, + minibatch_size_str, run_opts, background_process_handler=None, - chunk_width=None, get_raw_nnet_from_am=True): + chunk_width=None, get_raw_nnet_from_am=True, + sum_to_one_penalty=0.0): """ Function to do model combination In the nnet3 setup, the logic @@ -478,12 +480,6 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir, raise Exception('Model file {0} missing'.format(model_file)) raw_model_strings.append(model_file) - if chunk_width is not None: - # this is an RNN model - mbsize = int(1024.0/(common_train_lib.principal_chunk_width(chunk_width))) - else: - mbsize = 1024 - if get_raw_nnet_from_am: out_model = ("| nnet3-am-copy --set-raw-nnet=- {dir}/{num_iters}.mdl " "{dir}/combined.mdl".format(dir=dir, num_iters=num_iters)) @@ -495,8 +491,10 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir, common_lib.run_job( """{command} {combine_queue_opt} {dir}/log/combine.log \ - nnet3-combine --num-iters=40 \ - --enforce-sum-to-one=true --enforce-positive-weights=true \ + nnet3-combine --num-iters=80 \ + --enforce-sum-to-one={hard_enforce} \ + --sum-to-one-penalty={penalty} \ + --enforce-positive-weights=true \ --verbose=3 {raw_models} \ "ark,bg:nnet3-copy-egs {context_opts} \ ark:{egs_dir}/combine.egs ark:- | \ @@ -506,8 +504,10 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir, """.format(command=run_opts.command, combine_queue_opt=run_opts.combine_queue_opt, dir=dir, raw_models=" ".join(raw_model_strings), + hard_enforce=(sum_to_one_penalty <= 0), + penalty=sum_to_one_penalty, context_opts=context_opts, - mbsize=mbsize, + mbsize=minibatch_size_str, out_model=out_model, egs_dir=egs_dir)) diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py index 4b0908563fd..9d7f649c4b4 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py @@ -842,6 +842,6 @@ def generate_lstm_config(self): "dim-offset=0 dim={1}".format(name, cell_dim)) configs.append("dim-range-node name={0}.r_trunc input-node={0}.cr_trunc " "dim-offset={1} dim={2}".format(name, cell_dim, rec_proj_dim)) - configs.append("### End LSTM Layer '{0}'".format(name)) + configs.append("### End LSTM Layer '{0}'".format(name)) return configs diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index c7431696d88..1791aee665b 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -449,7 +449,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), - dropout_edit_string=common_lib.get_dropout_edit_string( + dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), @@ -505,7 +505,9 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): l2_regularize=args.l2_regularize, xent_regularize=args.xent_regularize, run_opts=run_opts, - background_process_handler=background_process_handler) + background_process_handler=background_process_handler, + sum_to_one_penalty=args.combine_sum_to_one_penalty) + if args.cleanup: logger.info("Cleaning up the experiment directory " diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index dc3a030c48d..e56c09653f5 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -355,8 +355,9 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): models_to_combine=models_to_combine, egs_dir=egs_dir, left_context=left_context, right_context=right_context, - run_opts=run_opts, - background_process_handler=background_process_handler) + minibatch_size_str=args.minibatch_size, run_opts=run_opts, + background_process_handler=background_process_handler, + sum_to_one_penalty=args.combine_sum_to_one_penalty) if args.stage <= num_iters + 1: logger.info("Getting average posterior for purposes of " diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index 4e6eb29e2f9..f29782b0705 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -360,9 +360,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, egs_dir=egs_dir, left_context=left_context, right_context=right_context, - run_opts=run_opts, + minibatch_size_str=args.minibatch_size, run_opts=run_opts, background_process_handler=background_process_handler, - get_raw_nnet_from_am=False) + get_raw_nnet_from_am=False, + sum_to_one_penalty=args.combine_sum_to_one_penalty) if include_log_softmax and args.stage <= num_iters + 1: logger.info("Getting average posterior for purposes of " diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index eef05fa6892..e137a1a2e04 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -447,9 +447,11 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): dir=args.dir, num_iters=num_iters, models_to_combine=models_to_combine, egs_dir=egs_dir, left_context=left_context, right_context=right_context, + minibatch_size_str=args.num_chunk_per_minibatch, run_opts=run_opts, chunk_width=args.chunk_width, background_process_handler=background_process_handler, - get_raw_nnet_from_am=False) + get_raw_nnet_from_am=False, + sum_to_one_penalty=args.combine_sum_to_one_penalty) if include_log_softmax and args.stage <= num_iters + 1: logger.info("Getting average posterior for purposes of " diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index 4fa7300bfcb..cd88ac411b4 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -442,8 +442,10 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): models_to_combine=models_to_combine, egs_dir=egs_dir, run_opts=run_opts, left_context=left_context, right_context=right_context, + minibatch_size_str=args.num_chunk_per_minibatch, background_process_handler=background_process_handler, - chunk_width=args.chunk_width) + chunk_width=args.chunk_width, + sum_to_one_penalty=args.combine_sum_to_one_penalty) if args.stage <= num_iters + 1: logger.info("Getting average posterior for purposes of " diff --git a/src/nnet3/nnet-chain-combine.cc b/src/nnet3/nnet-chain-combine.cc index c93e9ac0950..d130490aba6 100644 --- a/src/nnet3/nnet-chain-combine.cc +++ b/src/nnet3/nnet-chain-combine.cc @@ -406,14 +406,27 @@ double NnetChainCombiner::GetSumToOnePenalty( this_total_weight += this_weight; } tot_weights(c) = this_total_weight; - ans += -0.5 * penalty * - (this_total_weight - 1.0) * (this_total_weight - 1.0); + // this_total_weight_deriv is the derivative of the penalty + // term w.r.t. this component's total weight. + double this_total_weight_deriv; + if (combine_config_.enforce_positive_weights) { + // if combine_config_.enforce_positive_weights is true, then we choose to + // formulate the penalty in a slightly different way.. this solves the + // problem that with the formulation in the 'else' below, if for some + // reason the total weight is << 1.0, the deriv w.r.t. the actual + // parameters gets tiny [because weight = exp(params)]. + double log_total = log(this_total_weight); + ans += -0.5 * penalty * log_total * log_total; + double log_total_deriv = -1.0 * penalty * log_total; + this_total_weight_deriv = log_total_deriv / this_total_weight; + } else { + ans += -0.5 * penalty * + (this_total_weight - 1.0) * (this_total_weight - 1.0); + this_total_weight_deriv = penalty * (1.0 - this_total_weight); + + } if (weights_penalty_deriv != NULL) { KALDI_ASSERT(weights.Dim() == weights_penalty_deriv->Dim()); - // this_total_weight_deriv is the derivative of the penalty - // term w.r.t. this component's total weight. - double this_total_weight_deriv = - penalty * (1.0 - this_total_weight); for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; (*weights_penalty_deriv)(index) = this_total_weight_deriv; diff --git a/src/nnet3/nnet-combine.cc b/src/nnet3/nnet-combine.cc index 19f4925bd9b..b7fbd4fa6d9 100644 --- a/src/nnet3/nnet-combine.cc +++ b/src/nnet3/nnet-combine.cc @@ -404,14 +404,27 @@ double NnetCombiner::GetSumToOnePenalty( this_total_weight += this_weight; } tot_weights(c) = this_total_weight; - ans += -0.5 * penalty * - (this_total_weight - 1.0) * (this_total_weight - 1.0); + // this_total_weight_deriv is the derivative of the penalty + // term w.r.t. this component's total weight. + double this_total_weight_deriv; + if (config_.enforce_positive_weights) { + // if config_.enforce_positive_weights is true, then we choose to + // formulate the penalty in a slightly different way.. this solves the + // problem that with the formulation in the 'else' below, if for some + // reason the total weight is << 1.0, the deriv w.r.t. the actual + // parameters gets tiny [because weight = exp(params)]. + double log_total = log(this_total_weight); + ans += -0.5 * penalty * log_total * log_total; + double log_total_deriv = -1.0 * penalty * log_total; + this_total_weight_deriv = log_total_deriv / this_total_weight; + } else { + ans += -0.5 * penalty * + (this_total_weight - 1.0) * (this_total_weight - 1.0); + this_total_weight_deriv = penalty * (1.0 - this_total_weight); + + } if (weights_penalty_deriv != NULL) { KALDI_ASSERT(weights.Dim() == weights_penalty_deriv->Dim()); - // this_total_weight_deriv is the derivative of the penalty - // term w.r.t. this component's total weight. - double this_total_weight_deriv = - penalty * (1.0 - this_total_weight); for (int32 m = 0; m < num_models; m++) { int32 index = m * num_uc + c; (*weights_penalty_deriv)(index) = this_total_weight_deriv; From 9d53e051b274166bc49394fbee7beb53f2a67882 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 29 Jan 2017 00:29:20 -0500 Subject: [PATCH 386/530] [scripts] nnet3 training: fix for bug introduced when dropout scripting support was merged. --- egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py | 2 +- egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index d3f0d01897e..0d20b7c3287 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -305,7 +305,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, cur_num_chunk_per_minibatch = num_chunk_per_minibatch / 2 cur_max_param_change = float(max_param_change) / math.sqrt(2) - raw_model_string = '{0} {1}'.format(raw_model_string, dropout_edit_string) + raw_model_string = raw_model_string + dropout_edit_string shrink_info_str = '' if shrinkage_value != 1.0: diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index a888a6d7613..65a9c105e45 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -249,7 +249,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, "{dir}/{iter}.raw - |".format( lr=learning_rate, dir=dir, iter=iter)) - raw_model_string = '{0} {1}'.format(raw_model_string, dropout_edit_string) + raw_model_string = raw_model_string + dropout_edit_string if do_average: cur_minibatch_size = minibatch_size From 2965ca2bfd2d4acac6b4653bfb465f9986ca8b23 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 29 Jan 2017 00:32:22 -0500 Subject: [PATCH 387/530] Merging changes from master into upstream/shortcut --- egs/tedlium/s5_r2/local/run_learn_lex.sh | 2 +- egs/wsj/s5/steps/dict/learn_lexicon.sh | 5 +++-- .../s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py | 2 +- egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/egs/tedlium/s5_r2/local/run_learn_lex.sh b/egs/tedlium/s5_r2/local/run_learn_lex.sh index 4960fbd848e..a2a6f2e46b8 100755 --- a/egs/tedlium/s5_r2/local/run_learn_lex.sh +++ b/egs/tedlium/s5_r2/local/run_learn_lex.sh @@ -72,7 +72,7 @@ if [ $stage -le 0 ]; then $data/train_vocab.txt | sort > $data/oov_train.txt || exit 1; steps/dict/apply_g2p.sh --var-counts 4 $data/oov_train.txt \ $g2p_mdl_dir exp/g2p/oov_lex_train || exit 1; - cat exp/g2p/oov_lex_train/lexicon.lex | cut -f1,3 | \ + cat exp/g2p/oov_lex_train/lexicon.lex | awk '{if (NF>=3) print $0}' | cut -f1,3 | \ tr -s '\t' ' ' | sort | uniq > $data/lexicon_oov_g2p.txt || exit 1; fi diff --git a/egs/wsj/s5/steps/dict/learn_lexicon.sh b/egs/wsj/s5/steps/dict/learn_lexicon.sh index 4b4c177d554..0ea580528ee 100755 --- a/egs/wsj/s5/steps/dict/learn_lexicon.sh +++ b/egs/wsj/s5/steps/dict/learn_lexicon.sh @@ -88,7 +88,7 @@ if [ $# -lt 6 ] || [ $# -gt 7 ]; then echo " --cmd '$cmd' # command to submit jobs with (e.g. run.pl, queue.pl)" echo " --nj # number of parallel jobs" echo " --oov-symbol # (required option) oov symbol, like ." - echo " --g2p-pron-candidates # A lexicon file containing g2p generated pronunciations, for words in acoustic training " + echo " --lexicon-g2p # A lexicon file containing g2p generated pronunciations, for words in acoustic training " echo " # data / target vocabulary. It's optional." echo " --min-prob # The cut-off parameter used to select pronunciation candidates from phonetic" echo " # decoding. We remove pronunciations with probabilities less than this value" @@ -168,7 +168,8 @@ if [ $stage -le 0 ]; then # create an empty list of g2p generated prons, if it's not given. touch $dir/lexicon_g2p.txt else - cp $lexicon_g2p $dir/lexicon_g2p.txt 2>/dev/null + cat $lexicon_g2p | awk '{if (NF<2) {print "There is an empty pronunciation in lexicon_g2p.txt. Exit." \ + > "/dev/stderr"; exit 1} print $0}' - > $dir/lexicon_g2p.txt || exit 1; fi fi diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index e3ae02b4b09..fde8ae65461 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -312,7 +312,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, num_chunk_per_minibatch_str) cur_max_param_change = float(max_param_change) / math.sqrt(2) - raw_model_string = '{0} {1}'.format(raw_model_string, dropout_edit_string) + raw_model_string = raw_model_string + dropout_edit_string shrink_info_str = '' if shrinkage_value != 1.0: diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index 94478b263f3..25fd94d98ff 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -239,7 +239,7 @@ def train_one_iteration(dir, iter, srand, egs_dir, "{dir}/{iter}.raw - |".format( lr=learning_rate, dir=dir, iter=iter)) - raw_model_string = '{0} {1}'.format(raw_model_string, dropout_edit_string) + raw_model_string = raw_model_string + dropout_edit_string if do_average: cur_minibatch_size_str = minibatch_size_str From da591795e07aa19b715d68d910a1a12a46c0c11f Mon Sep 17 00:00:00 2001 From: Arseniy Gorin Date: Mon, 30 Jan 2017 23:10:03 +0300 Subject: [PATCH 388/530] [scripts] steps/dict/learn_lexicon.sh: minor fixes (#1385) --- egs/wsj/s5/steps/dict/learn_lexicon.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/steps/dict/learn_lexicon.sh b/egs/wsj/s5/steps/dict/learn_lexicon.sh index 0ea580528ee..a719422b593 100755 --- a/egs/wsj/s5/steps/dict/learn_lexicon.sh +++ b/egs/wsj/s5/steps/dict/learn_lexicon.sh @@ -43,7 +43,7 @@ prior_counts_tot=15 prior_mean="0.7,0.2,0.1" num_gauss= num_leaves= -retrain_src_mdl=true +retrain_src_mdl=false cleanup=true # End configuration section. @@ -251,7 +251,7 @@ if [ $stage -le 2 ]; then cat - $dir/non_scored_entries | \ sort | uniq > $dir/dict_expanded_train/lexicon.txt || exit 1; - utils/prepare_lang.sh $dir/dict_expanded_train "$oov_symbol" \ + utils/prepare_lang.sh --phone-symbol-table $ref_lang/phones.txt $dir/dict_expanded_train "$oov_symbol" \ $dir/lang_expanded_train_tmp $dir/lang_expanded_train || exit 1; fi From 6549622ee667182d6b9d51f868d31127e4bb6e28 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 30 Jan 2017 17:04:31 -0500 Subject: [PATCH 389/530] [scripts] change default in nnet3/chain/build_tree.sh --- egs/wsj/s5/steps/nnet3/chain/build_tree.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/chain/build_tree.sh b/egs/wsj/s5/steps/nnet3/chain/build_tree.sh index bbff6263fe4..72bc91c6014 100755 --- a/egs/wsj/s5/steps/nnet3/chain/build_tree.sh +++ b/egs/wsj/s5/steps/nnet3/chain/build_tree.sh @@ -22,7 +22,9 @@ cmd=run.pl context_opts= # e.g. set this to "--context-width 5 --central-position 2" for quinphone. cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves frame_subsampling_factor=1 -leftmost_questions_truncate=10 +leftmost_questions_truncate=-1 # note: this used to default to 10, but we never + # use this option now with value != -1, and + # we're changing the default tree_stats_opts= cluster_phones_opts= # End configuration section. @@ -179,4 +181,3 @@ fi cp $dir/1.mdl $dir/final.mdl echo $0: Done building tree - From 93d88656cea2d29c4db9aee10befe8cda158189b Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 30 Jan 2017 17:06:13 -0500 Subject: [PATCH 390/530] [scripts] various minor script fixes or extensions --- .../s5_r2/local/chain/compare_wer_general.sh | 1 - .../steps/libs/nnet3/xconfig/basic_layers.py | 18 +++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh index d3acae200b8..00b2d29cc88 100755 --- a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh +++ b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh @@ -15,7 +15,6 @@ if [ "$1" == "--looped" ]; then shift fi - used_epochs=false # this function set_names is used to separate the epoch-related parts of the name diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py index 3726eebeb6e..1a42c86ad81 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py @@ -441,6 +441,12 @@ class XconfigOutputLayer(XconfigLayerBase): -0.25 is referred to as presoftmax_prior_scale_power in scripts. In the scripts this would normally be set to config_dir/presoftmax_prior_scale.vec + output-delay=0 : Can be used to shift the frames on the output, equivalent + to delaying labels by this many frames (positive value increases latency + in online decoding but may help if you're using unidirectional LSTMs. + ng-affine-options='' : Can be used supply non-default options to the affine + layer (intended for the natural gradient but can be an arbitrary string + to be added to the config line. e.g. 'update-period=2'.). """ def __init__(self, first_token, key_to_value, prev_names = None): @@ -466,7 +472,8 @@ def set_default_configs(self): 'max-change' : 1.5, 'param-stddev' : 0.0, 'bias-stddev' : 0.0, - 'output-delay' : 0 + 'output-delay' : 0, + 'ng-affine-options' : '' } def check_configs(self): @@ -529,6 +536,7 @@ def get_full_config(self): bias_stddev = self.config['bias-stddev'] output_delay = self.config['output-delay'] max_change = self.config['max-change'] + ng_affine_options = self.config['ng-affine-options'] # note: ref.config is used only for getting the left-context and # right-context of the network; @@ -541,9 +549,9 @@ def get_full_config(self): ' output-dim={2}' ' param-stddev={3}' ' bias-stddev={4}' - ' max-change={5} ' + ' max-change={5} {6} ' ''.format(self.name, input_dim, output_dim, - param_stddev, bias_stddev, max_change) + + param_stddev, bias_stddev, max_change, ng_affine_options) + ('learning-rate-factor={0} '.format(learning_rate_factor) if learning_rate_factor != 1.0 else '')) ans.append((config_name, line)) @@ -690,7 +698,7 @@ def _add_components(self, input_desc, input_dim, nonlinearities): self_repair_scale = self.config['self-repair-scale'] target_rms = self.config['target-rms'] max_change = self.config['max-change'] - ng_opt_str = self.config['ng-affine-options'] + ng_affine_options = self.config['ng-affine-options'] configs = [] # First the affine node. @@ -701,7 +709,7 @@ def _add_components(self, input_desc, input_dim, nonlinearities): ' max-change={3}' ' {4}' ''.format(self.name, input_dim, output_dim, - max_change, ng_opt_str)) + max_change, ng_affine_options)) configs.append(line) line = ('component-node name={0}.affine' From b22fe7a88140640a321f9a48279869053254cf9d Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 30 Jan 2017 20:14:44 -0500 Subject: [PATCH 391/530] [egs] Adding various tuning scripts on tedlium. --- .../local/chain/tuning/run_tdnn_lstm_1e.sh | 8 +- .../local/chain/tuning/run_tdnn_lstm_1n.sh | 340 +++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1o.sh | 344 ++++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1r.sh | 339 +++++++++++++++++ .../nnet3/tuning/run_tdnn_lstm_1a_disc.sh | 246 +++++++++++++ src/cudamatrix/cu-sparse-matrix.h | 4 - 6 files changed, 1274 insertions(+), 7 deletions(-) create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh create mode 100755 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh create mode 100755 egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh index 7f0b9588b66..32950e7df6a 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -60,13 +60,14 @@ chunk_left_context=40 chunk_right_context=0 chunk_left_context_initial=0 chunk_right_context_final=0 +frames_per_chunk=140,100,160 # decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) extra_left_context=50 extra_right_context=0 extra_left_context_initial=0 extra_right_context_final=0 -frames_per_chunk=140,100,160 -frames_per_chunk_primary=140 + # The rest are configs specific to this script. Most of the parameters # are just hardcoded at this level, in the commands below. @@ -74,6 +75,7 @@ train_stage=-10 tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. tdnn_lstm_affix=1e #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. common_egs_dir= # you can set this to use previously dumped egs. +remove_egs=true # End configuration section. echo "$0 $@" # Print the command line for logging @@ -241,7 +243,7 @@ if [ $stage -le 18 ]; then --trainer.optimization.initial-effective-lrate 0.001 \ --trainer.optimization.final-effective-lrate 0.0001 \ --trainer.optimization.momentum 0.0 \ - --cleanup.remove-egs true \ + --cleanup.remove-egs "$remove_egs" \ --feat-dir $train_data_dir \ --tree-dir $tree_dir \ --lat-dir $lat_dir \ diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh new file mode 100755 index 00000000000..ed79404f815 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh @@ -0,0 +1,340 @@ +#!/bin/bash + +# 1n is as 1k, but maxing out at 0.5, not 0.7. +# 1k is as 1e, but introducing a dropout schedule. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,k,l,m}_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1k_sp_bi tdnn_lstm1l_sp_bi tdnn_lstm1m_sp_bi +# WER on dev(orig) 9.0 8.7 8.9 9.0 +# [looped:] 9.0 8.6 8.9 8.9 +# WER on dev(rescored) 8.4 7.9 8.2 8.2 +# [looped:] 8.4 7.8 8.2 8.3 +# WER on test(orig) 8.8 8.8 8.9 8.9 +# [looped:] 8.8 8.7 8.8 8.8 +# WER on test(rescored) 8.4 8.3 8.2 8.5 +# [looped:] 8.3 8.3 8.3 8.4 +# Final train prob -0.0648 -0.0693 -0.0768 -0.0807 +# Final valid prob -0.0827 -0.0854 -0.0943 -0.0931 +# Final train prob (xent) -0.8372 -0.8848 -0.9371 -0.9807 +# Final valid prob (xent) -0.9497 -0.9895 -1.0546 -1.0629 + + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1n #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.5@0.5,0@0.75,0' \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh new file mode 100755 index 00000000000..ec97bce3a8b --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh @@ -0,0 +1,344 @@ +#!/bin/bash + +# 1o is as 1k, but putting the dropout on (c,m), i.e. the output +# of the LstmNonlinearityComponent, which I believe is the same as +# putting it on (i,f) which Gaofeng found worked well in the non-fast Lstm +# component; and using schedule which maxes out at 0.3, not 0.7. +# [note: this was a little worse. turns out it was not the same as +# what gaofeng did because he had separate masks on (i,f). +# note: I've since removed the script-level support for this. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,k,l,m,n,o}_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1k_sp_bi tdnn_lstm1l_sp_bi tdnn_lstm1m_sp_bi tdnn_lstm1n_sp_bi tdnn_lstm1o_sp_bi +# WER on dev(orig) 9.0 8.7 8.9 9.0 8.8 8.8 +# [looped:] 9.0 8.6 8.9 8.9 8.8 8.9 +# WER on dev(rescored) 8.4 7.9 8.2 8.2 8.1 8.1 +# [looped:] 8.4 7.8 8.2 8.3 8.1 8.2 +# WER on test(orig) 8.8 8.8 8.9 8.9 8.7 8.7 +# [looped:] 8.8 8.7 8.8 8.8 8.7 8.7 +# WER on test(rescored) 8.4 8.3 8.2 8.5 8.3 8.2 +# [looped:] 8.3 8.3 8.3 8.5 8.3 8.2 +# Final train prob -0.0648 -0.0693 -0.0768 -0.0807 -0.0702 -0.0698 +# Final valid prob -0.0827 -0.0854 -0.0943 -0.0931 -0.0836 -0.0858 +# Final train prob (xent) -0.8372 -0.8848 -0.9371 -0.9807 -0.8719 -0.8998 +# Final valid prob (xent) -0.9497 -0.9895 -1.0546 -1.0629 -0.9732 -1.0084 + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1o #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-place=2 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.3@0.5,0@0.75,0' \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh new file mode 100755 index 00000000000..b3da38e412a --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh @@ -0,0 +1,339 @@ +#!/bin/bash + +# 1r is as 1e, but changing update-period of natural gradient from 4 to 1, +# Not helpful. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,r}_sp_bi +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1r_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1r_sp_bi +# WER on dev(orig) 9.0 9.0 +# [looped:] 9.0 9.1 +# WER on dev(rescored) 8.4 8.5 +# [looped:] 8.4 8.6 +# WER on test(orig) 8.8 9.1 +# [looped:] 8.8 9.0 +# WER on test(rescored) 8.4 8.4 +# [looped:] 8.3 8.5 +# Final train prob -0.0648 -0.0642 +# Final valid prob -0.0827 -0.0838 +# Final train prob (xent) -0.8372 -0.8319 +# Final valid prob (xent) -0.9497 -0.9635 + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +frames_per_chunk=140,100,160 +# decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 + + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1r #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. +remove_egs=true + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + tdnn_opts='ng-affine-options="update-period=1"' + lstmp_opts='ng-affine-options="update-period=1" decay-time=20' + output_opts='max-change=1.5 ng-affine-options="update-period=1"' + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 $tdnn_opts + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) $tdnn_opts + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) $tdnn_opts + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) $tdnn_opts + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) $tdnn_opts + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) $tdnn_opts + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets $output_opts + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs "$remove_egs" \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh new file mode 100755 index 00000000000..1826caf3d05 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh @@ -0,0 +1,246 @@ +#!/bin/bash + +# This script does discriminative training on top of CE nnet3 system. To +# simplify things, this assumes you are using the "cleaned" data (since this is +# generally better), i.e. it won't work if you used options to run_tdnn_lstm_1a.sh +# to use the non-cleaned data. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the alignment and the lattice generation/egs-dumping takes quite a bit +# of CPU time. + +# below is with the current settings (effective_learning_rate=0.0000025, last_layer_factor=0.5): +# steps/info/nnet3_disc_dir_info.pl exp/nnet3_cleaned/tdnn_lstm1a_sp_smbrslow +# exp/nnet3_cleaned/tdnn_lstm1a_sp_smbrslow:num-jobs=4;effective-lrate=2.5e-06;last-layer-factor=0.50;iters-per-epoch=55;epoch[0,1,2,3]:train-objf=[0.94,0.96,0.97,0.97],valid-objf=[0.91,0.93,0.93,0.93],train-counts=[0.40,0.25,0.17,0.12],valid-counts=[0.57,0.31,0.34,0.35] + +# local/nnet3/compare_wer.sh --looped exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1a_sp_smbrslow:{1,2,3} +# System tdnn_lstm1a_sp tdnn_lstm1a_sp_smbrslow:1 tdnn_lstm1a_sp_smbrslow:2 tdnn_lstm1a_sp_smbrslow:3 +# WER on dev(orig) 11.0 9.4 9.4 9.4 +# [looped:] 11.0 9.4 9.5 9.4 +# WER on dev(rescored) 10.3 8.8 8.7 8.7 +# [looped:] 10.3 8.8 8.9 8.9 +# WER on test(orig) 10.8 9.6 9.7 9.6 +# [looped:] 10.7 9.6 9.6 9.7 +# WER on test(rescored) 10.1 9.1 9.2 9.1 +# [looped:] 10.0 9.1 9.2 9.1 + +# Below is with twice the lrate (5e-06) and the same last-layer-factor (0.5). Trained too fast. +# exp/nnet3_cleaned/tdnn_lstm1a_sp_smbr:num-jobs=4;effective-lrate=5e-06;last-layer-factor=0.50;iters-per-epoch=55;epoch[0,1,2,3]:train-objf=[0.94,0.97,0.97,0.98],valid-objf=[0.91,0.93,0.93,0.93],train-counts=[0.40,0.22,0.12,0.09],valid-counts=[0.57,0.31,0.27,0.32] +# I'm not showing the looped decoding results with this older step; +# there was a script bug (now fixed) and I don't want to rerun them. +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1a_sp_smbr:{1,2,3} +# System tdnn_lstm1a_sp tdnn_lstm1a_sp_smbr:1 tdnn_lstm1a_sp_smbr:2 tdnn_lstm1a_sp_smbr:3 +# WER on dev(orig) 11.0 9.4 9.4 9.5 +# WER on dev(rescored) 10.3 8.8 8.8 8.9 +# WER on test(orig) 10.8 9.6 9.8 9.8 +# WER on test(rescored) 10.1 9.1 9.3 9.4 + +set -e +set -uo pipefail + +stage=1 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=400 # have a high number of jobs because this could take a while, and we might + # have some stragglers. + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +graph_dir=exp/tri3_cleaned/graph +srcdir=exp/nnet3_cleaned/tdnn_lstm1a_sp +train_data_dir=data/train_cleaned_sp_hires_comb +online_ivector_dir=exp/nnet3_cleaned/ivectors_train_cleaned_sp_hires_comb + +## Objective options +criterion=smbr +one_silence_class=true + +# originally ran with effective_learning_rate=0.000005, +# changing to effective_learning_rate=0.0000025 and using affix=slow + +# you can set --disc-affix if you run different configurations. +disc_affix= + +dir=${srcdir}_${criterion}${disc_affix} + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_egs=200 # for alignments and denlat creation. +frames_per_chunk_decoding=50 # for decoding; should be the same as the value + # used in the script that trained the nnet. + # We didn't set the frames_per_chunk in + # run_tdnn_lstm_1a.sh, so it defaults to 50. +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +## note: --extra-left-context should be the same as the chunk_left_context (or in +## general, the argument of --egs.chunk-left-context) in the baseline script. +extra_left_context=40 +extra_right_context=0 + + + +## Nnet training options +effective_learning_rate=0.0000025 +last_layer_factor=0.5 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=3 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). + + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + +if $use_gpu; then + if ! cuda-compiled; then + cat </dev/null || true + + for x in `seq $decode_start_epoch $num_epochs`; do + for decode_set in dev test; do + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + iter=epoch$x + # We don't test the iter "epoch${x}_adj", although it's computed, + # because prior-adjustment doesn't make sense for chain models + # and it degrades the results. + ( + steps/nnet3/decode_looped.sh \ + --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3_cleaned/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/${decode_set}_hires $dir/decode_looped_${decode_set}_${iter} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${decode_set}_hires \ + ${dir}/decode_looped_${decode_set}_${iter} ${dir}/decode_looped_${decode_set}_${iter}_rescore || exit 1 + ) || touch $dir/.error & + done + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +if [ $stage -le 6 ] && $cleanup; then + # if you run with "--cleanup true --stage 6" you can clean up. + # actually, keep the alignments in case we need them later.. they're slow to + # create, and quite big. + # rm ${srcdir}_ali/ali.*.gz || true + + steps/nnet2/remove_egs.sh ${srcdir}_degs || true +fi + +exit 0; diff --git a/src/cudamatrix/cu-sparse-matrix.h b/src/cudamatrix/cu-sparse-matrix.h index 1298ee5ea5f..4da74871bac 100644 --- a/src/cudamatrix/cu-sparse-matrix.h +++ b/src/cudamatrix/cu-sparse-matrix.h @@ -121,10 +121,6 @@ class CuSparseMatrix { ~CuSparseMatrix() { } - // Use the CuMatrix::CopyFromSmat() function to copy from this to - // CuMatrix. - // Also see CuMatrix::AddSmat(). - protected: // The following two functions should only be called if we did not compile // with CUDA or could not get a CUDA card; in that case the contents are From d51a9b38024893221e6f9b31eec6e5243e29bdb5 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 30 Jan 2017 20:38:05 -0500 Subject: [PATCH 392/530] [egs] egs/swbd/s5c, minor script updates and new tuning scripts. --- .../s5c/local/chain/compare_wer_general.sh | 131 +++++++-- .../local/chain/tuning/run_tdnn_lstm_1c.sh | 0 .../local/chain/tuning/run_tdnn_lstm_1d.sh | 266 ++++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1e.sh | 258 +++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1f.sh | 262 +++++++++++++++++ .../s5c/local/nnet3/compare_wer_general.sh | 4 +- 6 files changed, 898 insertions(+), 23 deletions(-) mode change 100644 => 100755 egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh create mode 100755 egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh create mode 100755 egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh create mode 100755 egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh diff --git a/egs/swbd/s5c/local/chain/compare_wer_general.sh b/egs/swbd/s5c/local/chain/compare_wer_general.sh index 1b1f0d16047..29a5dc83063 100755 --- a/egs/swbd/s5c/local/chain/compare_wer_general.sh +++ b/egs/swbd/s5c/local/chain/compare_wer_general.sh @@ -1,66 +1,155 @@ -#!/bin/bash # this script is used for comparing decoding results between systems. -# e.g. local/chain/compare_wer_general.sh tdnn_7h_sp tdnn_7i_sp +# e.g. local/chain/compare_wer_general.sh tdnn_c_sp tdnn_d_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/chain/compare_wer_general.sh tdnn_d_sp tdnn_d_sp_smbr:1 tdnn_d_sp_smbr:2 ... -echo "$0 $*"; # print command line. +echo "# $0 $*"; # print command line. -echo -n "System " -for x in $*; do printf "% 10s" $x; done +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi + +echo -n "# System " +for x in $*; do printf " % 9s" $x; done echo -echo -n "WER on train_dev(tg) " + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free name, like: +# set_names tdnn_a +# it will set dir=exp/chain/tdnn_a and epoch_suffix="" +# If called with something like: +# set_names tdnn_d_smbr:3 +# it will set dir=exp/chain/tdnn_d_smbr and epoch_suffix="epoch3" +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + name=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + dirname=exp/chain/$name + if [ -z $epoch ]; then + epoch_suffix="" + else + used_epochs=true + epoch_suffix=_epoch${epoch} + fi +} + + +echo -n "# WER on train_dev(tg) " for x in $*; do - wer=$(grep WER exp/chain/${x}_sp/decode_train_dev_sw1_tg/wer_* | utils/best_wer.sh | awk '{print $2}') + set_names $x + # note: the '*' in the directory name is because there + # is _hires_ in there for the cross-entropy systems, and + # nothing for the sequence trained systems. + wer=$(grep WER $dirname/decode_train_dev*sw1_tg$epoch_suffix/wer_* | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo +if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x + wer=$(grep WER $dirname/decode_train_dev*sw1_tg${epoch_suffix}_looped/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo +fi -echo -n "WER on train_dev(fg) " +echo -n "# WER on train_dev(fg) " for x in $*; do - wer=$(grep WER exp/chain/${x}_sp/decode_train_dev_sw1_fsh_fg/wer_* | utils/best_wer.sh | awk '{print $2}') + set_names $x + wer=$(grep WER $dirname/decode_train_dev*sw1_fsh_fg$epoch_suffix/wer_* | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo -echo -n "WER on eval2000(tg) " +if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x + wer=$(grep WER $dirname/decode_train_dev*sw1_fsh_fg${epoch_suffix}_looped/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo +fi + +echo -n "# WER on eval2000(tg) " for x in $*; do - wer=$(grep Sum exp/chain/${x}_sp/decode_eval2000_sw1_tg/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + set_names $x + wer=$(grep Sum $dirname/decode_eval2000*sw1_tg$epoch_suffix/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo -echo -n "WER on eval2000(fg) " +if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x + wer=$(grep Sum $dirname/decode_eval2000*sw1_tg${epoch_suffix}_looped/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo +fi + +echo -n "# WER on eval2000(fg) " for x in $*; do - wer=$(grep Sum exp/chain/${x}_sp/decode_eval2000_sw1_fsh_fg/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + set_names $x + wer=$(grep Sum $dirname/decode_eval2000*sw1_fsh_fg$epoch_suffix/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo +if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x + wer=$(grep Sum $dirname/decode_eval2000*sw1_fsh_fg${epoch_suffix}_looped/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo +fi + + +if $used_epochs; then + # we don't print the probs in this case. + exit 0 +fi + + echo -n "Final train prob " for x in $*; do - prob=$(grep Overall exp/chain/${x}_sp/log/compute_prob_train.final.log | grep -v xent | awk '{print $8}') - printf "% 10s" $prob + prob=$(grep Overall exp/chain/${x}/log/compute_prob_train.final.log | grep -v xent | awk '{print $8}') + printf "% 10.3f" $prob done echo echo -n "Final valid prob " for x in $*; do - prob=$(grep Overall exp/chain/${x}_sp/log/compute_prob_valid.final.log | grep -v xent | awk '{print $8}') - printf "% 10s" $prob + prob=$(grep Overall exp/chain/${x}/log/compute_prob_valid.final.log | grep -v xent | awk '{print $8}') + printf "% 10.3f" $prob done echo echo -n "Final train prob (xent) " for x in $*; do - prob=$(grep Overall exp/chain/${x}_sp/log/compute_prob_train.final.log | grep -w xent | awk '{print $8}') - printf "% 10s" $prob + prob=$(grep Overall exp/chain/${x}/log/compute_prob_train.final.log | grep -w xent | awk '{print $8}') + printf "% 10.3f" $prob done echo echo -n "Final valid prob (xent) " for x in $*; do - prob=$(grep Overall exp/chain/${x}_sp/log/compute_prob_valid.final.log | grep -w xent | awk '{print $8}') - printf "% 10s" $prob + prob=$(grep Overall exp/chain/${x}/log/compute_prob_valid.final.log | grep -w xent | awk '{print $8}') + printf "% 10.4f" $prob done echo diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh old mode 100644 new mode 100755 diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh new file mode 100755 index 00000000000..837eb944875 --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -0,0 +1,266 @@ +#!/bin/bash + + +# run_tdnn_lstm_1d.sh is like run_tdnn_lstm_1c.sh but making +# various kaldi-5.1-related upgrades to the script: +# change chunk-width to be variable, add extra_left_context_initial=0 +# and extra_right_context_final=0; add looped decoding. +# Also changed frames-per-iter from 1.2 million to 1.5 million... this +# might have been a mistake, trying 1 million in 1f to see if this matters. + +# run_tdnn_lstm_1c.sh is like run_tdnn_lstm_1b.sh but using the +# new 'fast-lstm' layer. Results are slightly improved, plus +# it's faster. See PR #1243 on github, and issue #1237. +# This used to be called run_tdnn_fastlstm_1b.sh. + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1d # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh new file mode 100755 index 00000000000..bf93b156974 --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -0,0 +1,258 @@ +#!/bin/bash + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1e # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir=exp/chain/tdnn_lstm_1d_sp/egs + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh new file mode 100755 index 00000000000..3d9e1e4a63b --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -0,0 +1,262 @@ +#!/bin/bash + +# run_tdnn_lstm_1f.sh is like run_tdnn_lstm_1e.sh but +# reducing the frames-per-iter from 1.5 million to 1 million, +# since the time per iter was too much (about 5 minutes). + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1f # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b1{1,2,3,4}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1000000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/nnet3/compare_wer_general.sh b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh index 37eaeeac85b..7cf42c9ae04 100755 --- a/egs/swbd/s5c/local/nnet3/compare_wer_general.sh +++ b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh @@ -86,7 +86,7 @@ echo -n "# Final train prob " for x in $*; do set_names $x prob=$(grep log-likelihood $dirname/log/compute_prob_train.combined.log | awk '{print $8}') - printf "% 10s" $prob + printf "% 10.3f" $prob done echo @@ -94,6 +94,6 @@ echo -n "# Final valid prob " for x in $*; do set_names $x prob=$(grep log-likelihood $dirname/log/compute_prob_valid.combined.log | awk '{print $8}') - printf "% 10s" $prob + printf "% 10.3f" $prob done echo From 9b17a97cd39c5323c9c4435d843c9b876491a832 Mon Sep 17 00:00:00 2001 From: Hossein Hadian Date: Tue, 31 Jan 2017 00:33:22 -0500 Subject: [PATCH 393/530] [scripts] add utils/data/shift_feats.sh, deprecates steps/shift_feats.sh (#1386) --- egs/wsj/s5/steps/shift_feats.sh | 5 ++ .../s5/utils/data/shift_and_combine_feats.sh | 55 ++++++++++++ egs/wsj/s5/utils/data/shift_feats.sh | 55 ++++++++++++ src/featbin/shift-feats.cc | 89 +++++++++++++------ 4 files changed, 176 insertions(+), 28 deletions(-) create mode 100755 egs/wsj/s5/utils/data/shift_and_combine_feats.sh create mode 100755 egs/wsj/s5/utils/data/shift_feats.sh diff --git a/egs/wsj/s5/steps/shift_feats.sh b/egs/wsj/s5/steps/shift_feats.sh index 22b17f2cb09..ada5716f187 100755 --- a/egs/wsj/s5/steps/shift_feats.sh +++ b/egs/wsj/s5/steps/shift_feats.sh @@ -3,6 +3,9 @@ # Copyright 2016 Vimal Manohar # Apache 2.0 +# This script is deprecated. The newer script utils/data/shift_feats.sh +# should be used instead. + # This script shifts the feats in the input data directory and creates a # new directory _fs with shifted feats. # If the shift is negative, the initial frames get truncated and the @@ -25,6 +28,8 @@ if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -ne 4 ]; then + echo "This script is deprecated. The newer script utils/data/shift_feats.sh" + echo "should be used instead." echo "usage: $0 [options] "; echo "e.g.: $0 -1 data/train exp/shift-1_train mfcc" echo "options: " diff --git a/egs/wsj/s5/utils/data/shift_and_combine_feats.sh b/egs/wsj/s5/utils/data/shift_and_combine_feats.sh new file mode 100755 index 00000000000..1a15b324ee8 --- /dev/null +++ b/egs/wsj/s5/utils/data/shift_and_combine_feats.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# Copyright 2017 Hossein Hadian + +# Apache 2.0 + +echo "$0 $@" # Print the command line for logging +if [ -f path.sh ]; then . ./path.sh; fi +. utils/parse_options.sh + +if [ $# != 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 3 data/train data/train_fs3" + echo "For use in perturbing data for discriminative training and alignment of" + echo "frame-subsampled systems, this script uses utils/data/shift_feats.sh" + echo "and utils/data/combine_data.sh to shift the features" + echo " different ways and combine them." + echo "E.g. if is 3, this script will combine" + echo "the data frame-shifted by -1, 0 and 1 (c.f. shift-feats)." + exit 1 +fi + +frame_subsampling_factor=$1 +srcdir=$2 +destdir=$3 + +if [ ! -f $srcdir/feats.scp ]; then + echo "$0: expected $srcdir/feats.scp to exist" + exit 1 +fi + +if [ -f $destdir/feats.scp ]; then + echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)" + exit 1 +fi + +tmp_shift_destdirs=() +for frame_shift in `seq $[-(frame_subsampling_factor/2)] $[-(frame_subsampling_factor/2) + frame_subsampling_factor - 1]`; do + if [ "$frame_shift" == 0 ]; then continue; fi + utils/data/shift_feats.sh $frame_shift $srcdir ${destdir}_fs$frame_shift || exit 1 + tmp_shift_destdirs+=("${destdir}_fs$frame_shift") +done +utils/data/combine_data.sh $destdir $srcdir ${tmp_shift_destdirs[@]} || exit 1 +rm -r ${tmp_shift_destdirs[@]} + +utils/validate_data_dir.sh $destdir + +src_nf=`cat $srcdir/feats.scp | wc -l` +dest_nf=`cat $destdir/feats.scp | wc -l` +if [ $[src_nf*frame_subsampling_factor] -ne $dest_nf ]; then + echo "There was a problem. Expected number of feature lines in destination dir to be $[src_nf*frame_subsampling_factor];" + exit 1; +fi + +echo "$0: Successfully generated $frame_subsampling_factor-way shifted version of data in $srcdir, in $destdir" diff --git a/egs/wsj/s5/utils/data/shift_feats.sh b/egs/wsj/s5/utils/data/shift_feats.sh new file mode 100755 index 00000000000..2ae7b2435d3 --- /dev/null +++ b/egs/wsj/s5/utils/data/shift_feats.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar +# 2017 Hossein Hadian +# Apache 2.0 + +echo "$0 $@" # Print the command line for logging +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo " Usage: $0 " + echo "e.g.: $0 -1 data/train data/train_fs-1" + echo "The script creates a new data directory with the features modified" + echo "using the program shift-feats with the specified frame-shift." + echo "This program automatically adds the prefix 'fs-' to the" + echo "utterance and speaker names. See also utils/data/shift_and_combine_feats.sh" + exit 1 +fi + +frame_shift=$1 +srcdir=$2 +destdir=$3 + + +if [ "$destdir" == "$srcdir" ]; then + echo "$0: this script requires and to be different." + exit 1 +fi + +if [ ! -f $srcdir/feats.scp ]; then + echo "$0: no such file $srcdir/feats.scp" + exit 1; +fi + +utt_prefix="fs$frame_shift-" +spk_prefix="fs$frame_shift-" + +mkdir -p $destdir +utils/copy_data_dir.sh --utt-prefix $utt_prefix --spk-prefix $spk_prefix \ + $srcdir $destdir + +if grep --quiet "'" $srcdir/feats.scp; then + echo "$0: the input features already use single quotes. Can't proceed." + exit 1; +fi + +awk -v shift=$frame_shift 'NF == 2 {uttid=$1; feat=$2; qt="";} \ +NF > 2 {idx=index($0, " "); uttid=$1; feat=substr($0, idx + 1); qt="\x27";} \ +NF {print uttid " shift-feats --print-args=false --shift=" shift, qt feat qt " - |";}' \ + $destdir/feats.scp >$destdir/feats_shifted.scp +mv -f $destdir/feats_shifted.scp $destdir/feats.scp + +echo "$0: Done" + diff --git a/src/featbin/shift-feats.cc b/src/featbin/shift-feats.cc index 7b970e92248..5d392c9d15a 100644 --- a/src/featbin/shift-feats.cc +++ b/src/featbin/shift-feats.cc @@ -22,20 +22,41 @@ #include "util/common-utils.h" #include "matrix/kaldi-matrix.h" +namespace kaldi { + void ShiftFeatureMatrix(const Matrix &src, int32 shift, + Matrix* rearranged) { + for (int32 r = 0; r < src.NumRows(); r++) { + int32 src_r = r - shift; + if (src_r < 0) src_r = 0; + if (src_r >= src.NumRows()) src_r = src.NumRows() - 1; + rearranged->Row(r).CopyFromVec(src.Row(src_r)); + } + } +} int main(int argc, char *argv[]) { try { using namespace kaldi; const char *usage = - "Copy features and possibly shift them in time while maintaining the length, e.g.\n" - "shift-feats --shift=1 will shift all frames to the\n" - "right by one (the first frame would be duplicated).\n" - "See also: copy-feats, copy-matrix\n"; + "Copy features, and possibly shift them while maintaining the " + "num-frames.\n" + "Usage: shift-feats [options] " + "\n" + "or: shift-feats [options] \n" + "e.g.: shift-feats --shift=-1 foo.scp bar.ark\n" + "or: shift-feats --shift=1 foo.mat bar.mat\n" + "See also: copy-feats, copy-matrix, select-feats, extract-rows,\n" + "subset-feats, subsample-feats, splice-feats, paste-feats, " + "concat-feats\n"; ParseOptions po(usage); + bool binary = true; int32 shift = 0; - po.Register("shift", &shift, "Number of frames by which to shift the features."); + po.Register("shift", &shift, "Number of frames by which to shift the " + "features."); + po.Register("binary", &binary, "Binary-mode output (not relevant if " + "writing to archive)"); po.Read(argc, argv); @@ -46,32 +67,40 @@ int main(int argc, char *argv[]) { int32 num_done = 0, num_err = 0; - SequentialBaseFloatMatrixReader feat_reader(po.GetArg(1)); - BaseFloatMatrixWriter feat_writer(po.GetArg(2)); - - - for (; !feat_reader.Done(); feat_reader.Next()) { - const std::string &key = feat_reader.Key(); - const Matrix &src = feat_reader.Value(); - if (src.NumRows() == 0) { - KALDI_WARN << "Empty matrix for key " << key; - num_err++; - continue; + if (ClassifyRspecifier(po.GetArg(1), NULL, NULL) != kNoRspecifier) { + SequentialBaseFloatMatrixReader feat_reader(po.GetArg(1)); + BaseFloatMatrixWriter feat_writer(po.GetArg(2)); + + + for (; !feat_reader.Done(); feat_reader.Next()) { + const std::string &key = feat_reader.Key(); + const Matrix &src = feat_reader.Value(); + if (src.NumRows() == 0) { + KALDI_WARN << "Empty matrix for key " << key; + num_err++; + continue; + } + Matrix rearranged(src.NumRows(), src.NumCols()); + ShiftFeatureMatrix(src, shift, &rearranged); + feat_writer.Write(key, rearranged); + num_done++; } + + KALDI_LOG << "Shifted " << num_done << " features by " + << shift << " frames; " << num_err << " with errors."; + return (num_done > 0 ? 0 : 1); + } else { + std::string feat_rxfilename = po.GetArg(1), + feat_wxfilename = po.GetArg(2); + Matrix src; + ReadKaldiObject(feat_rxfilename, &src); + if (src.NumRows() == 0) + KALDI_ERR << "Empty input matrix"; Matrix rearranged(src.NumRows(), src.NumCols()); - for (int32 r = 0; r < src.NumRows(); r++) { - int32 src_r = r - shift; - if (src_r < 0) src_r = 0; - if (src_r >= src.NumRows()) src_r = src.NumRows() - 1; - rearranged.Row(r).CopyFromVec(src.Row(src_r)); - } - feat_writer.Write(key, rearranged); - num_done++; + ShiftFeatureMatrix(src, shift, &rearranged); + WriteKaldiObject(rearranged, feat_wxfilename, binary); + // we do not print any log messages here } - - KALDI_LOG << "Shifted " << num_done << " features by " - << shift << " frames; " << num_err << " with errors."; - return (num_done > 0 ? 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); return -1; @@ -87,4 +116,8 @@ int main(int argc, char *argv[]) { 1 1 1 1 2 2 ] + + + echo "[ 1 1; 2 2; 3 3 ]" | ./shift-feats --print-args=false --binary=false \ + --shift=1 - - */ From 6b85ed767424f99872e00c5479fff494d8b4e1b5 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 30 Jan 2017 23:11:16 -0500 Subject: [PATCH 394/530] [egs] Small fixes/additions in Swbd/s5c chain scripts --- .../local/chain/tuning/run_tdnn_lstm_1e.sh | 4 +- .../local/chain/tuning/run_tdnn_lstm_1g.sh | 261 ++++++++++++++++++ 2 files changed, 263 insertions(+), 2 deletions(-) create mode 100755 egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh index bf93b156974..14dbb1cdd2e 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -242,11 +242,11 @@ if [ $stage -le 16 ]; then --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ $graph_dir data/${decode_set}_hires \ - $dir/decode_${decode_set}_sw1_tg || exit 1; + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; if $has_fisher; then steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ - $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; fi ) & done diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh new file mode 100755 index 00000000000..6cacdf2dadb --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -0,0 +1,261 @@ +#!/bin/bash + +# 1g is like 1e, but reducing decay-time from 20 to 15, to see if +# it reduces the difference between regular and looped decoding. +# +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1g # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir=exp/chain/tdnn_lstm_1d_sp/egs + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=15" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; From 01d96bf1051a040c3fa34414a88212d2930ffd65 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 1 Feb 2017 12:52:11 -0500 Subject: [PATCH 395/530] [scripts] Fix bug in dropout code found by Tanel Alumae --- egs/wsj/s5/steps/nnet3/chain/train.py | 2 +- egs/wsj/s5/steps/nnet3/train_dnn.py | 2 +- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 2 +- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 2 +- egs/wsj/s5/steps/nnet3/train_rnn.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index b8d7a55671d..f658d2a770f 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -433,7 +433,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), - dropout_edit_string=common_lib.get_dropout_edit_string( + dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index 342ac1f09b4..ca495654819 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -308,7 +308,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), - dropout_edit_string=common_lib.get_dropout_edit_string( + dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index cf71e9dd846..21cbca64e7a 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -311,7 +311,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), - dropout_edit_string=common_lib.get_dropout_edit_string( + dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index cfdae51cfa8..e8a48653a5a 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -411,7 +411,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), - dropout_edit_string=common_lib.get_dropout_edit_string( + dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index 97e037f99fe..d08585fa537 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -405,7 +405,7 @@ def learning_rate(iter, current_num_jobs, num_archives_processed): num_archives=num_archives, learning_rate=learning_rate(iter, current_num_jobs, num_archives_processed), - dropout_edit_string=common_lib.get_dropout_edit_string( + dropout_edit_string=common_train_lib.get_dropout_edit_string( args.dropout_schedule, float(num_archives_processed) / num_archives_to_process, iter), From 67cabd02622fd7f72b896bfe5705f55c790555bc Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Wed, 1 Feb 2017 13:22:07 -0500 Subject: [PATCH 396/530] [build]: resolving OpenFst compilation issue with gcc-6.x (#1392) --- tools/extras/openfst-1.3.4.patch | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/extras/openfst-1.3.4.patch b/tools/extras/openfst-1.3.4.patch index bae0d6d7114..41ce6d59221 100644 --- a/tools/extras/openfst-1.3.4.patch +++ b/tools/extras/openfst-1.3.4.patch @@ -381,3 +381,15 @@ if (strm != &cout) delete strm; return ret; + +--- a/src/include/fst/extensions/ngram/ngram-fst.h ++++ b/src/include/fst/extensions/ngram/ngram-fst.h +@@ -130,7 +130,7 @@ + hdr.SetNumStates(num_states_); + WriteHeader(strm, opts, kFileVersion, &hdr); + strm.write(data_, Storage(num_states_, num_futures_, num_final_)); ++ return !strm.fail(); +- return strm; + } + + StateId Start() const { From f1d7891c5ea55884baceb4645754aff74fc3e0d3 Mon Sep 17 00:00:00 2001 From: Ahmed Date: Wed, 1 Feb 2017 23:39:22 +0300 Subject: [PATCH 397/530] [egs] Add new graphemic system for Gale Arabic, with newer nnet scripts (#1298) --- egs/gale_arabic/README.txt | 9 +- .../s5/local/gale_data_prep_txt.sh | 2 + egs/gale_arabic/s5/local/gale_format_data.sh | 2 + egs/gale_arabic/s5/local/gale_prep_dict.sh | 1 + egs/gale_arabic/s5/local/gale_train_lms.sh | 2 + egs/gale_arabic/s5/run.sh | 24 +- egs/gale_arabic/s5b/RESULTS | 72 ++++++ egs/gale_arabic/s5b/cmd.sh | 15 ++ egs/gale_arabic/s5b/conf/decode.config | 1 + egs/gale_arabic/s5b/conf/mfcc.conf | 1 + egs/gale_arabic/s5b/conf/mfcc_hires.conf | 10 + egs/gale_arabic/s5b/conf/online_cmvn.conf | 1 + egs/gale_arabic/s5b/local/bad_segments | 10 + egs/gale_arabic/s5b/local/chain/run_tdnn.sh | 1 + .../s5b/local/chain/run_tdnn_lstm.sh | 1 + .../s5b/local/chain/tuning/run_tdnn_1a.sh | 210 ++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1a.sh | 223 ++++++++++++++++ .../s5b/local/gale_data_prep_audio.sh | 32 +++ .../s5b/local/gale_data_prep_split.sh | 39 +++ .../s5b/local/gale_data_prep_txt.sh | 60 +++++ egs/gale_arabic/s5b/local/gale_format_data.sh | 60 +++++ .../s5b/local/gale_prep_grapheme_dict.sh | 41 +++ egs/gale_arabic/s5b/local/gale_train_lms.sh | 81 ++++++ .../s5b/local/nnet3/run_ivector_common.sh | 237 ++++++++++++++++++ egs/gale_arabic/s5b/local/nnet3/run_lstm.sh | 1 + egs/gale_arabic/s5b/local/nnet3/run_tdnn.sh | 1 + .../s5b/local/nnet3/tuning/run_lstm_1a.sh | 161 ++++++++++++ .../s5b/local/nnet3/tuning/run_tdnn_1a.sh | 88 +++++++ .../s5b/local/normalize_transcript_BW.pl | 111 ++++++++ egs/gale_arabic/s5b/local/score.sh | 57 +++++ egs/gale_arabic/s5b/local/split_wer.sh | 72 ++++++ egs/gale_arabic/s5b/local/test_list | 11 + egs/gale_arabic/s5b/path.sh | 5 + egs/gale_arabic/s5b/run.sh | 167 ++++++++++++ egs/gale_arabic/s5b/steps | 1 + egs/gale_arabic/s5b/utils | 1 + egs/wsj/s5/utils/build_const_arpa_lm.sh | 2 +- 37 files changed, 1800 insertions(+), 13 deletions(-) create mode 100644 egs/gale_arabic/s5b/RESULTS create mode 100755 egs/gale_arabic/s5b/cmd.sh create mode 100644 egs/gale_arabic/s5b/conf/decode.config create mode 100644 egs/gale_arabic/s5b/conf/mfcc.conf create mode 100644 egs/gale_arabic/s5b/conf/mfcc_hires.conf create mode 100644 egs/gale_arabic/s5b/conf/online_cmvn.conf create mode 100644 egs/gale_arabic/s5b/local/bad_segments create mode 120000 egs/gale_arabic/s5b/local/chain/run_tdnn.sh create mode 120000 egs/gale_arabic/s5b/local/chain/run_tdnn_lstm.sh create mode 100755 egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh create mode 100755 egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh create mode 100755 egs/gale_arabic/s5b/local/gale_data_prep_audio.sh create mode 100755 egs/gale_arabic/s5b/local/gale_data_prep_split.sh create mode 100755 egs/gale_arabic/s5b/local/gale_data_prep_txt.sh create mode 100755 egs/gale_arabic/s5b/local/gale_format_data.sh create mode 100755 egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh create mode 100755 egs/gale_arabic/s5b/local/gale_train_lms.sh create mode 100755 egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh create mode 120000 egs/gale_arabic/s5b/local/nnet3/run_lstm.sh create mode 120000 egs/gale_arabic/s5b/local/nnet3/run_tdnn.sh create mode 100755 egs/gale_arabic/s5b/local/nnet3/tuning/run_lstm_1a.sh create mode 100755 egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh create mode 100755 egs/gale_arabic/s5b/local/normalize_transcript_BW.pl create mode 100755 egs/gale_arabic/s5b/local/score.sh create mode 100755 egs/gale_arabic/s5b/local/split_wer.sh create mode 100644 egs/gale_arabic/s5b/local/test_list create mode 100755 egs/gale_arabic/s5b/path.sh create mode 100755 egs/gale_arabic/s5b/run.sh create mode 120000 egs/gale_arabic/s5b/steps create mode 120000 egs/gale_arabic/s5b/utils diff --git a/egs/gale_arabic/README.txt b/egs/gale_arabic/README.txt index db436f11e8c..928fca8fdf3 100644 --- a/egs/gale_arabic/README.txt +++ b/egs/gale_arabic/README.txt @@ -10,8 +10,13 @@ GALE Phase 2 Arabic Broadcast Conversation Speech was developed by the Linguisti The data has two types of speech: conversational and report. This script trains and test on all of them and results are reported for each of them, train data is 320 hours, 9.3 hours testing -The dictionary, and scripts can be obtained from QCRI portal: http://alt.qcri.org/ +The dictionaries, and scripts can be obtained from QCRI portal: http://alt.qcri.org/ +The experiments here are based on the above corpus -s5: The experiments here are based on the above corpus +s5: Phoneme based: +s5b: Grapheme based: This is the receommended setup; including nnet3 and chain modeling + +[1] "A Complete Kaldi Recipe For Building Arabic Speech Recognition Systems", A. Ali, Y. Zhang, P. Cardinal, N. Dahak, S. Vogel, J. Glass. SLT 2014 +[2] "QCRI Advanced Transcription Systems (QATS) for the Arabic Multi-Dialect Brodcast Media Recognition: MGB-2 Challenge", S. Khurana, A. Ali. SLT 2016 \ No newline at end of file diff --git a/egs/gale_arabic/s5/local/gale_data_prep_txt.sh b/egs/gale_arabic/s5/local/gale_data_prep_txt.sh index 8e42128726f..8b93a234eec 100755 --- a/egs/gale_arabic/s5/local/gale_data_prep_txt.sh +++ b/egs/gale_arabic/s5/local/gale_data_prep_txt.sh @@ -59,3 +59,5 @@ awk '{if ($1 == "conversational") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' #rm -fr $txtdir cd $top_pwd echo data prep text succeeded + +exit 0 diff --git a/egs/gale_arabic/s5/local/gale_format_data.sh b/egs/gale_arabic/s5/local/gale_format_data.sh index 6675dd20f71..9f03b9224cf 100755 --- a/egs/gale_arabic/s5/local/gale_format_data.sh +++ b/egs/gale_arabic/s5/local/gale_format_data.sh @@ -56,3 +56,5 @@ fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \ echo gale_format_data succeeded. + +exit 0 \ No newline at end of file diff --git a/egs/gale_arabic/s5/local/gale_prep_dict.sh b/egs/gale_arabic/s5/local/gale_prep_dict.sh index b46d5d5fa29..74ef789eda7 100755 --- a/egs/gale_arabic/s5/local/gale_prep_dict.sh +++ b/egs/gale_arabic/s5/local/gale_prep_dict.sh @@ -30,3 +30,4 @@ sort -u > $dir/nonsilence_phones.txt || exit 1; echo Dictionary preparation succeeded +exit 0 diff --git a/egs/gale_arabic/s5/local/gale_train_lms.sh b/egs/gale_arabic/s5/local/gale_train_lms.sh index 838e7a26136..1b5d4665a19 100755 --- a/egs/gale_arabic/s5/local/gale_train_lms.sh +++ b/egs/gale_arabic/s5/local/gale_train_lms.sh @@ -112,3 +112,5 @@ fi echo train lm succeeded + +exit 0 \ No newline at end of file diff --git a/egs/gale_arabic/s5/run.sh b/egs/gale_arabic/s5/run.sh index f04e2cd8716..7c1da835ef0 100755 --- a/egs/gale_arabic/s5/run.sh +++ b/egs/gale_arabic/s5/run.sh @@ -1,10 +1,12 @@ -#!/bin/bash +#!/bin/bash + +set -e # Copyright 2014 QCRI (author: Ahmed Ali) # Apache 2.0 -. ./path.sh -. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. +. path.sh +. cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. nJobs=120 nDecodeJobs=40 @@ -21,6 +23,8 @@ nDecodeJobs=40 #This is CLSP configuration. We add the 2014 GALE data. We got around 2 % #improvement just by including it. The gain might be large if someone would tweak # the number of leaves and states and so on. + +#Make sure you edit this section to reflect whers you keep the LDC data on your cluster audio=( /data/sls/scratch/amali/data/GALE/LDC2013S02 /data/sls/scratch/amali/data/GALE/LDC2013S07 @@ -42,25 +46,25 @@ galeData=GALE # By copying and pasting into your shell. #copy the audio files to local folder wav and convet flac files to wav -local/gale_data_prep_audio.sh "${audio[@]}" $galeData +local/gale_data_prep_audio.sh "${audio[@]}" $galeData || exit 1; #get the transcription and remove empty prompts and all noise markers -local/gale_data_prep_txt.sh "${text[@]}" $galeData +local/gale_data_prep_txt.sh "${text[@]}" $galeData || exit 1; # split the data to reports and conversational and for each class will have rain/dev and test -local/gale_data_prep_split.sh $galeData +local/gale_data_prep_split.sh $galeData || exit 1; # get QCRI dictionary and add silence and UN -local/gale_prep_dict.sh +local/gale_prep_dict.sh || exit 1; #prepare the langauge resources -utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang +utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang || exit 1; # LM training -local/gale_train_lms.sh +local/gale_train_lms.sh || exit 1; -local/gale_format_data.sh +local/gale_format_data.sh || exit 1; # G compilation, check LG composition # Now make MFCC features. diff --git a/egs/gale_arabic/s5b/RESULTS b/egs/gale_arabic/s5b/RESULTS new file mode 100644 index 00000000000..2260a106654 --- /dev/null +++ b/egs/gale_arabic/s5b/RESULTS @@ -0,0 +1,72 @@ +## +# This file is generated using local/split_wer.sh $galeData //galeData is a local folder to keep intermediate gale data +# look at the end of run.sh in the same folder +## +##### RESULTS generated by amali at 2017-01-01-08-05-59 + +Report Results WER: +%WER 9.50 [ 2124 / 22363, 160 ins, 275 del, 1689 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_report_9 +%WER 10.72 [ 2398 / 22363, 163 ins, 313 del, 1922 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_report_9 +%WER 12.04 [ 2693 / 22363, 226 ins, 271 del, 2196 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_report_9 +%WER 12.29 [ 2749 / 22363, 273 ins, 266 del, 2210 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_report_10 +%WER 17.82 [ 3986 / 22363, 315 ins, 618 del, 3053 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_report_12 +%WER 18.15 [ 4059 / 22363, 335 ins, 589 del, 3135 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_report_11 +%WER 18.42 [ 4119 / 22363, 346 ins, 590 del, 3183 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_report_11 +%WER 18.69 [ 4179 / 22363, 304 ins, 640 del, 3235 sub ] exp/sgmm_5a_mmi_b0.1/decode2/wer_report_13 +%WER 19.06 [ 4263 / 22363, 348 ins, 611 del, 3304 sub ] exp/sgmm_5a_mmi_b0.1/decode1/wer_report_12 +%WER 19.24 [ 4302 / 22363, 315 ins, 580 del, 3407 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_report_12 +%WER 19.37 [ 4331 / 22363, 319 ins, 553 del, 3459 sub ] exp/tri2b_mmi/decode_it4/wer_report_12 +%WER 19.61 [ 4386 / 22363, 348 ins, 563 del, 3475 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_report_12 +%WER 19.71 [ 4408 / 22363, 301 ins, 607 del, 3500 sub ] exp/tri2b_mmi/decode_it3/wer_report_13 +%WER 19.81 [ 4429 / 22363, 349 ins, 667 del, 3413 sub ] exp/sgmm_5a/decode/wer_report_14 +%WER 20.14 [ 4503 / 22363, 399 ins, 647 del, 3457 sub ] exp/tri2b_mpe/decode_it4/wer_report_14 +%WER 20.58 [ 4603 / 22363, 408 ins, 658 del, 3537 sub ] exp/tri2b_mpe/decode_it3/wer_report_14 +%WER 21.64 [ 4839 / 22363, 498 ins, 614 del, 3727 sub ] exp/tri3b/decode/wer_report_17 +%WER 23.32 [ 5214 / 22363, 470 ins, 727 del, 4017 sub ] exp/tri2b/decode/wer_report_16 +%WER 23.54 [ 5265 / 22363, 444 ins, 794 del, 4027 sub ] exp/tri3b/decode.si/wer_report_17 +%WER 25.66 [ 5738 / 22363, 478 ins, 838 del, 4422 sub ] exp/tri2a/decode/wer_report_14 +%WER 26.38 [ 5900 / 22363, 435 ins, 929 del, 4536 sub ] exp/tri1/decode/wer_report_15 +Conversational Results WER: +%WER 21.59 [ 10213 / 47305, 944 ins, 3092 del, 6177 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_conversational_9 +%WER 24.77 [ 11716 / 47305, 1098 ins, 3579 del, 7039 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_conversational_9 +%WER 26.78 [ 12670 / 47305, 1741 ins, 2434 del, 8495 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_conversational_9 +%WER 27.55 [ 13032 / 47305, 1800 ins, 2666 del, 8566 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_conversational_11 +%WER 34.10 [ 16133 / 47305, 1903 ins, 3245 del, 10985 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_conversational_11 +%WER 34.81 [ 16466 / 47305, 2077 ins, 3037 del, 11352 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_conversational_10 +%WER 35.19 [ 16648 / 47305, 1933 ins, 3264 del, 11451 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_conversational_11 +%WER 35.63 [ 16857 / 47305, 1988 ins, 3247 del, 11622 sub ] exp/sgmm_5a_mmi_b0.1/decode2/wer_conversational_11 +%WER 36.23 [ 17137 / 47305, 2091 ins, 3256 del, 11790 sub ] exp/sgmm_5a_mmi_b0.1/decode1/wer_conversational_11 +%WER 37.40 [ 17691 / 47305, 2150 ins, 3362 del, 12179 sub ] exp/sgmm_5a/decode/wer_conversational_12 +%WER 37.95 [ 17951 / 47305, 1738 ins, 3892 del, 12321 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_conversational_11 +%WER 37.97 [ 17960 / 47305, 1890 ins, 4212 del, 11858 sub ] exp/tri2b_mpe/decode_it4/wer_conversational_13 +%WER 38.16 [ 18050 / 47305, 1678 ins, 4083 del, 12289 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_conversational_12 +%WER 38.47 [ 18200 / 47305, 1804 ins, 3698 del, 12698 sub ] exp/tri2b_mmi/decode_it4/wer_conversational_11 +%WER 38.50 [ 18213 / 47305, 1958 ins, 4156 del, 12099 sub ] exp/tri2b_mpe/decode_it3/wer_conversational_13 +%WER 38.51 [ 18215 / 47305, 1993 ins, 3476 del, 12746 sub ] exp/tri2b_mmi/decode_it3/wer_conversational_11 +%WER 39.26 [ 18574 / 47305, 2319 ins, 3963 del, 12292 sub ] exp/tri3b/decode/wer_conversational_17 +%WER 41.40 [ 19586 / 47305, 2140 ins, 4216 del, 13230 sub ] exp/tri3b/decode.si/wer_conversational_15 +%WER 42.23 [ 19979 / 47305, 2153 ins, 4354 del, 13472 sub ] exp/tri2b/decode/wer_conversational_15 +%WER 45.92 [ 21724 / 47305, 1995 ins, 5213 del, 14516 sub ] exp/tri2a/decode/wer_conversational_14 +%WER 46.86 [ 22166 / 47305, 2212 ins, 4819 del, 15135 sub ] exp/tri1/decode/wer_conversational_13 +Combined Results for Reports and Conversational WER: +%WER 17.64 [ 12286 / 69668, 1310 ins, 2807 del, 8169 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_8 +%WER 20.26 [ 14114 / 69668, 1261 ins, 3892 del, 8961 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_9 +%WER 22.05 [ 15363 / 69668, 1967 ins, 2705 del, 10691 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_9 +%WER 22.66 [ 15786 / 69668, 2047 ins, 2955 del, 10784 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_11 +%WER 28.89 [ 20127 / 69668, 2244 ins, 3829 del, 14054 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_11 +%WER 29.48 [ 20541 / 69668, 2243 ins, 3860 del, 14438 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_11 +%WER 29.81 [ 20767 / 69668, 2279 ins, 3854 del, 14634 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_11 +%WER 30.22 [ 21056 / 69668, 2165 ins, 4095 del, 14796 sub ] exp/sgmm_5a_mmi_b0.1/decode2/wer_12 +%WER 30.74 [ 21417 / 69668, 2273 ins, 4099 del, 15045 sub ] exp/sgmm_5a_mmi_b0.1/decode1/wer_12 +%WER 31.78 [ 22142 / 69668, 2547 ins, 3990 del, 15605 sub ] exp/sgmm_5a/decode/wer_12 +%WER 31.95 [ 22259 / 69668, 2092 ins, 4413 del, 15754 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_11 +%WER 32.20 [ 22436 / 69668, 2026 ins, 4646 del, 15764 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_12 +%WER 32.25 [ 22471 / 69668, 2315 ins, 4797 del, 15359 sub ] exp/tri2b_mpe/decode_it4/wer_13 +%WER 32.36 [ 22542 / 69668, 2156 ins, 4184 del, 16202 sub ] exp/tri2b_mmi/decode_it4/wer_11 +%WER 32.50 [ 22640 / 69668, 2393 ins, 3956 del, 16291 sub ] exp/tri2b_mmi/decode_it3/wer_11 +%WER 32.79 [ 22847 / 69668, 2407 ins, 4760 del, 15680 sub ] exp/tri2b_mpe/decode_it3/wer_13 +%WER 33.61 [ 23413 / 69668, 2817 ins, 4577 del, 16019 sub ] exp/tri3b/decode/wer_17 +%WER 35.73 [ 24894 / 69668, 2630 ins, 4944 del, 17320 sub ] exp/tri3b/decode.si/wer_15 +%WER 36.17 [ 25196 / 69668, 2429 ins, 5393 del, 17374 sub ] exp/tri2b/decode/wer_16 +%WER 39.42 [ 27462 / 69668, 2473 ins, 6051 del, 18938 sub ] exp/tri2a/decode/wer_14 +%WER 40.35 [ 28113 / 69668, 2713 ins, 5635 del, 19765 sub ] exp/tri1/decode/wer_13 diff --git a/egs/gale_arabic/s5b/cmd.sh b/egs/gale_arabic/s5b/cmd.sh new file mode 100755 index 00000000000..71dd849a93b --- /dev/null +++ b/egs/gale_arabic/s5b/cmd.sh @@ -0,0 +1,15 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +export train_cmd="queue.pl --mem 2G" +export decode_cmd="queue.pl --mem 4G" +export mkgraph_cmd="queue.pl --mem 8G" diff --git a/egs/gale_arabic/s5b/conf/decode.config b/egs/gale_arabic/s5b/conf/decode.config new file mode 100644 index 00000000000..6f503eab35e --- /dev/null +++ b/egs/gale_arabic/s5b/conf/decode.config @@ -0,0 +1 @@ +link decode_dnn.config \ No newline at end of file diff --git a/egs/gale_arabic/s5b/conf/mfcc.conf b/egs/gale_arabic/s5b/conf/mfcc.conf new file mode 100644 index 00000000000..7361509099f --- /dev/null +++ b/egs/gale_arabic/s5b/conf/mfcc.conf @@ -0,0 +1 @@ +--use-energy=false # only non-default option. diff --git a/egs/gale_arabic/s5b/conf/mfcc_hires.conf b/egs/gale_arabic/s5b/conf/mfcc_hires.conf new file mode 100644 index 00000000000..c45f2b691a9 --- /dev/null +++ b/egs/gale_arabic/s5b/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=16000 +--num-mel-bins=40 +--num-ceps=40 +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/gale_arabic/s5b/conf/online_cmvn.conf b/egs/gale_arabic/s5b/conf/online_cmvn.conf new file mode 100644 index 00000000000..cbdaf5f281c --- /dev/null +++ b/egs/gale_arabic/s5b/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh diff --git a/egs/gale_arabic/s5b/local/bad_segments b/egs/gale_arabic/s5b/local/bad_segments new file mode 100644 index 00000000000..c3413f0714c --- /dev/null +++ b/egs/gale_arabic/s5b/local/bad_segments @@ -0,0 +1,10 @@ +ARABIYA_FROMIRAQ_ARB_20070302_175801_2326286_2327450 +ARABIYA_BILARABI_ARB_20061005_201400_221375_223694 +LBC_NAHAR_ARB_20060911_142800_3683267_3685290 +LBC_NAHAR_ARB_20070303_145800_3249800_3251128 +LBC_NAHAR_ARB_20070303_145800_3623646_3624152 +LBC_NAHAR_ARB_20070305_035800_481003_484069 +ALAM_WITHEVENT_ARB_20070227_205800_3141876_3144152 +ALAM_NEWSRPT_ARB_20070130_015801_2875054_2876396 +ALJZ_TODHARV_ARB_20060914_155800_2947717_2949041 +ALJZ_TODHARV_ARB_20070107_145800_2417848_2419238 diff --git a/egs/gale_arabic/s5b/local/chain/run_tdnn.sh b/egs/gale_arabic/s5b/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..34499362831 --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1a.sh \ No newline at end of file diff --git a/egs/gale_arabic/s5b/local/chain/run_tdnn_lstm.sh b/egs/gale_arabic/s5b/local/chain/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..f897827461c --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +#started from tedlium recipe with few edits + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +xent_regularize=0.1 +train_set=train +gmm=tri2b # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=0 #default -10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1b #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +stage=18 +tarin_stage=3 +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 2 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/test_hires $dir/decode || exit 1; +fi +exit 0 diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..e604dc7e714 --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,223 @@ +#!/bin/bash + +#started from tedlium recipe with few edits + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +decode_nj=30 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train +gmm=tri2b # the gmm for the target data gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1a #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 3 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/test_hires $dir/decode || exit 1; +fi +exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh b/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh new file mode 100755 index 00000000000..0fc667ac53a --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + + +galeData=$(readlink -f "${@: -1}" ); # last argumnet; the local folder +audio_dvds=${@:1:${#}-1} # all the audio dvds for GALE corpus; ; check audio=( in ../run.sh + +mkdir -p $galeData + +# check that sox is installed +which sox &>/dev/null +if [[ $? != 0 ]]; then + echo "sox is not installed"; exit 1 +fi + +for dvd in $audio_dvds; do + dvd_full_path=$(readlink -f $dvd) + if [[ ! -e $dvd_full_path ]]; then + echo missing $dvd_full_path; exit 1; + fi + find $dvd_full_path \( -name "*.wav" -o -name "*.flac" \) | while read file; do + id=$(basename $file | awk '{gsub(".wav","");gsub(".flac","");print}') + echo "$id sox $file -r 16000 -t wav - |" + done +done | sort -u > $galeData/wav.scp + +echo data prep audio succeded + +exit 0 + diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_split.sh b/egs/gale_arabic/s5b/local/gale_data_prep_split.sh new file mode 100755 index 00000000000..a62904a3b57 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_data_prep_split.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +if [ $# -ne 1 ]; then + echo "Arguments should be the "; exit 1 +fi + + +#data will data/local + +galeData=$(readlink -f $1) +mkdir -p data/local +dir=$(readlink -f data/local) + + +grep -f local/test_list $galeData/all | grep -v -f local/bad_segments > $galeData/all.test +grep -v -f local/test_list $galeData/all | grep -v -f local/bad_segments > $galeData/all.train + +for x in test train; do + outdir=$dir/$x + file=$galeData/all.$x + mkdir -p $outdir + awk '{print $2 " " $2}' $file | sort -u > $outdir/utt2spk + cp -pr $outdir/utt2spk $outdir/spk2utt + awk '{print $2 " " $1 " " $3 " " $4}' $file | sort -u > $outdir/segments + awk '{printf $2 " "; for (i=5; i<=NF; i++) {printf $i " "} printf "\n"}' $file | sort -u > $outdir/text +done + + +grep -f local/test_list $galeData/wav.scp > $dir/test/wav.scp + +cat $galeData/wav.scp | awk -v seg=$dir/train/segments 'BEGIN{while((getline0) {seen[$2]=1;}} + {if (seen[$1]) { print $0}}' > $dir/train/wav.scp + +echo data prep split succeeded + +exit 0 \ No newline at end of file diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh b/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh new file mode 100755 index 00000000000..14d7241d4c1 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +galeData=$(readlink -f "${@: -1}" ); # last argumnet; the local folder +txt_dvds=${@:1:${#}-1} # all the txt cds correspoding to the audio corpus; check text=( in ../run.sh + + +top_pwd=`pwd` +txtdir=$galeData/txt +mkdir -p $txtdir; cd $txtdir + +for cdx in $txt_dvds; do + echo "Preparing $cdx" + if [[ $cdx == *.tgz ]] ; then + tar -xvf $cdx + elif [ -d "$cdx" ]; then + ln -s $cdx `basename $cdx` + else + echo "I don't really know what I shall do with $cdx " >&2 + fi +done + +find -L . -type f -name "*.tdf" | while read file; do +sed '1,3d' $file # delete the first 3 lines +done > all.tmp$$ + +perl -e ' + ($inFile,$idFile,$txtFile)= split /\s+/, $ARGV[0]; + open(IN, "$inFile"); + open(ID, ">$idFile"); + open(TXT, ">$txtFile"); + while () { + @arr= split /\t/,$_; + $start=sprintf ("%0.3f",$arr[2]);$rStart=$start;$start=~s/\.//; $start=~s/^0+$/0/; $start=~s/^0+([^0])/$1/; # remove zeros at the beginning + $end=sprintf ("%0.3f",$arr[3]);$rEnd=$end;$end=~s/^0+([^0])/$1/;$end=~s/\.//; + if ( ($arr[11] !~ m/report/) && ($arr[11] !~ m/conversational/) ){$arr[11]="UNK";} + $id="$arr[11] $arr[0] $arr[0]_${start}_${end} $rStart $rEnd\n"; + next if ($rStart == $rEnd); + $id =~ s/.sph//g; + print ID $id; + print TXT "$arr[7]\n"; + }' "all.tmp$$ allid.tmp$$ contentall.tmp$$" + + +perl ${top_pwd}/local/normalize_transcript_BW.pl contentall.tmp$$ contentall.buck.tmp$$ + +paste allid.tmp$$ contentall.buck.tmp$$ | sed 's: $::' | awk '{if (NF>5) {print $0}}' > all_1.tmp$$ + +awk '{$1="";print $0}' all_1.tmp$$ | sed 's:^ ::' > $galeData/all +awk '{if ($1 == "report") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $galeData/report +awk '{if ($1 == "conversational") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $galeData/conversational + +cd ..; +rm -fr $txtdir +cd $top_pwd +echo data prep text succeeded + +exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_format_data.sh b/egs/gale_arabic/s5b/local/gale_format_data.sh new file mode 100755 index 00000000000..a572b8194a3 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_format_data.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +if [ -f path.sh ]; then + . path.sh; else + echo "$0: missing path.sh"; exit 1; +fi + +for dir in test train; do + cp -pr data/local/$dir data/$dir +done + + +mkdir -p data/lang_test + +arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz +[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1; + +rm -r data/lang_test +cp -r data/lang data/lang_test + +gunzip -c "$arpa_lm" | \ + arpa2fst --disambig-symbol=#0 \ + --read-symbol-table=data/lang_test/words.txt - data/lang_test/G.fst + + +echo "$0: Checking how stochastic G is (the first of these numbers should be small):" +fstisstochastic data/lang_test/G.fst + +## Check lexicon. +## just have a look and make sure it seems sane. +echo "$0: First few lines of lexicon FST:" +fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst | head + +echo "$0: Performing further checks" + +# Checking that G.fst is determinizable. +fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G. + +# Checking that L_disambig.fst is determinizable. +fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L. + +# Checking that disambiguated lexicon times G is determinizable +# Note: we do this with fstdeterminizestar not fstdeterminize, as +# fstdeterminize was taking forever (presumbaly relates to a bug +# in this version of OpenFst that makes determinization slow for +# some case). +fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \ + fstdeterminizestar >/dev/null || echo Error + +# Checking that LG is stochastic: +fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \ + fstisstochastic || echo LG is not stochastic + + +echo gale_format_data succeeded. + +exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh b/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh new file mode 100755 index 00000000000..0162eb49330 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright 2017 QCRI (author: Ahmed Ali) +# Apache 2.0 + + +# run this from ../ +dir=$(readlink -f data/local/dict) +mkdir -p $dir + + +# (1) Get all avaialble dictionaries, since this is a grapheme model, so we mainly need the most frequent word lists +wget http://alt.qcri.org//resources/speech/dictionary/ar-ar_grapheme_lexicon_2016-02-09.bz2 || exit 1; +wget http://alt.qcri.org//resources/speech/dictionary/ar-ar_lexicon_2014-03-17.txt.bz2 || exit 1; +bzcat ar-ar_grapheme_lexicon_2016-02-09.bz2 | sed '1,3d' | awk '{print $1}' > tmp$$ +bzcat ar-ar_lexicon_2014-03-17.txt.bz2 | sed '1,3d' | awk '{print $1}' >> tmp$$ +# (2) Now we add all the words appeared in the training data +cat data/local/train/text | cut -d ' ' -f 2- | tr -s " " "\n" | sort -u >> tmp$$ +grep -v [0-9] tmp$$ | sed -e 's:[FNKaui\~o\`]::g' -e 's:{:}:g' | sort -u > tmp1.$$ # remove vowels and rare alef wasla +cat tmp1.$$ | sed 's:\(\):\1 :g' | sed -e 's: : :g' -e 's: : :g' -e 's:\s*: :g' -e 's:\*:V:g' > tmp2.$$ +paste -d ' ' tmp1.$$ tmp2.$$ > $dir/lexicon.txt + +#(2) Dictionary preparation: + +# silence phones, one per line. +echo SIL > $dir/silence_phones.txt +echo SIL > $dir/optional_silence.txt + +# nonsilence phones; on each line is a list of phones that correspond +# really to the same base phone. +cat tmp2.$$ | tr -s ' ' '\n' | grep -v ^$ | sort -u > $dir/nonsilence_phones.txt || exit 1; + +sed -i '1i SIL' $dir/lexicon.txt # insert word with phone sil at the begining of the dictionary + +rm -fr ar-ar_lexicon_2014-03-17.txt.bz2 ar-ar_grapheme_lexicon_2016-02-09.bz2 tmp$$ tmp1.$$ tmp2.$$ +echo Dictionary preparation succeeded + +# The script is still missing dates and numbers + +exit 0 + diff --git a/egs/gale_arabic/s5b/local/gale_train_lms.sh b/egs/gale_arabic/s5b/local/gale_train_lms.sh new file mode 100755 index 00000000000..3988ec3818f --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_train_lms.sh @@ -0,0 +1,81 @@ +#!/bin/bash + + +# To be run from one directory above this script. + + +lexicon=data/local/dict/lexicon.txt +[ ! -f $lexicon ] && echo "$0: No such file $lexicon" && exit 1; + + +# This script takes no arguments. It assumes you have already run +# previus steps successfully +# It takes as input the files +#data/local/train.*/text +#data/local/dict/lexicon.txt + + +export LC_ALL=C # You'll get errors about things being not sorted, if you +# have a different locale. +export PATH=$PATH:./../../../tools/kaldi_lm +( # First make sure the kaldi_lm toolkit is installed. + cd $KALDI_ROOT/tools || exit 1; + if [ -d kaldi_lm ]; then + echo Not installing the kaldi_lm toolkit since it is already there. + else + echo Downloading and installing the kaldi_lm tools + if [ ! -f kaldi_lm.tar.gz ]; then + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; + fi + tar -xvzf kaldi_lm.tar.gz || exit 1; + cd kaldi_lm + make || exit 1; + echo Done making the kaldi_lm tools + fi +) || exit 1; + + +dir=data/local/lm + mkdir -p $dir + text=data/local/train/text + [ ! -f $text ] && echo "$0: No such file $text" && exit 1; + + cleantext=$dir/text.no_oov + + cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } + {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ",$n);} } printf("\n");}' \ + > $cleantext || exit 1; + + + cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \ + sort -nr > $dir/word.counts || exit 1; + + +# Get counts from acoustic training transcripts, and add one-count +# for each word in the lexicon (but not silence, we don't want it +# in the LM-- we'll add it optionally later). + cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \ + cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \ + sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1; + +# note: we probably won't really make use of as there aren't any OOVs + cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "" > $dir/word_map \ + || exit 1; + +# note: ignore 1st field of train.txt, it's the utterance-id. + cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline0)map[$1]=$2;} + { for(n=2;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz \ + || exit 1; + + train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1; + +# LM is small enough that we don't need to prune it (only about 0.7M N-grams). +# Perplexity over 128254.000000 words is 90.446690 + +# note: output is +# data/local/lm/3gram-mincount/lm_unpruned.gz + + +echo train lm succeeded + +exit 0 diff --git a/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..d9fc3385a42 --- /dev/null +++ b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh @@ -0,0 +1,237 @@ +#!/bin/bash + +set -e -o pipefail + +# This script is called from local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh (and may eventually +# be called by more scripts). It contains the common feature preparation and iVector-related parts +# of the script. See those scripts for examples of usage. + + +stage=0 +nj=100 +min_seg_len=1.55 # min length in seconds... we do this because chain training + # will discard segments shorter than 1.5 seconds. Must remain in sync + # with the same option given to prepare_lores_feats_and_alignments.sh +train_set=train # you might set this to e.g. train. +gmm=tri2b # This specifies a GMM-dir from the features of the type you're training the system on; + # it should contain alignments for 'train_set'. + +num_threads_ubm=32 +nnet3_affix=_cleaned # affix for exp/nnet3 directory to put iVector stuff in, so it + # becomes exp/nnet3_cleaned or whatever. + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp_comb + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + + + +if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then + echo "$0: data/${train_set}_sp_hires/feats.scp already exists." + echo " ... Please either remove it, or rerun this script with stage > 2." + exit 1 +fi + + +if [ $stage -le 1 ]; then + echo "$0: preparing directory for speed-perturbed data" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: creating high-resolution MFCC features" + + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp test; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires + + for datadir in ${train_set}_sp test; do + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires + steps/compute_cmvn_stats.sh data/${datadir}_hires + utils/fix_data_dir.sh data/${datadir}_hires + done +fi + +if [ $stage -le 3 ]; then + echo "$0: combining short segments of speed-perturbed high-resolution MFCC training data" + # we have to combine short segments or we won't be able to train chain models + # on those segments. + utils/data/combine_short_segments.sh \ + data/${train_set}_sp_hires $min_seg_len data/${train_set}_sp_hires_comb + + # just copy over the CMVN to avoid having to recompute it. + cp data/${train_set}_sp_hires/cmvn.scp data/${train_set}_sp_hires_comb/ + utils/fix_data_dir.sh data/${train_set}_sp_hires_comb/ +fi + +if [ $stage -le 4 ]; then + echo "$0: selecting segments of hires training data that were also present in the" + echo " ... original training data." + + # note, these data-dirs are temporary; we put them in a sub-directory + # of the place where we'll make the alignments. + temp_data_root=exp/nnet3${nnet3_affix}/tri5 + mkdir -p $temp_data_root + + utils/data/subset_data_dir.sh --utt-list data/${train_set}/feats.scp \ + data/${train_set}_sp_hires $temp_data_root/${train_set}_hires + + # note: essentially all the original segments should be in the hires data. + n1=$(wc -l /dev/null || true + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + ${graph_dir} data/test_hires ${dir}/decode || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/test_hires ${dir}/decode_test ${dir}/decode_test_rescore || exit 1 +fi + +exit 0; diff --git a/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..a6cc6e2dec8 --- /dev/null +++ b/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +# started from tedlium recipe with few edits + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train +gmm=tri2b # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix= #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" +remove_egs=true +relu_dim=850 +num_epochs=3 + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < \n"; + exit (1); + } + +# <\check usage> +my $inFile = shift (@ARGV); +my $ouFile = shift(@ARGV); + + +open INFILE, "<$inFile" || die "unable to open the input file $inFile\n"; +binmode INFILE, ":encoding(utf8)"; + + +open OUTPUTFILE, ">$ouFile" or die "unable to open the output mlf file $ouFile\n"; +binmode OUTPUTFILE, ":encoding(utf8)"; + + +while () { + s/[^اأإآبتثجحخدذرزسشصضطظعغفقكلمنهويىئءؤة0-9]+/ /g; ## Removes non Arabic or numbers + my $BW = convertUTF8ToBuckwalter ($_); + print OUTPUTFILE "$BW"."\n"; +} +close INFILE; +close OUTPUTFILE; + + + +# this function is copied from MADATools.pm: MADA Tools + sub convertUTF8ToBuckwalter { + + my ($line)= (@_); + #$line = $UTF8_ENCODING_OBJ->decode($line); ## Same as Encode::decode("utf8",$line), but faster since object already created + $line =~ s/\x{0621}/\'/g; ## HAMZA + $line =~ s/\x{0622}/\|/g; ## ALEF WITH MADDA ABOVE + $line =~ s/\x{0623}/\>/g; ## ALEF WITH HAMZA ABOVE + $line =~ s/\x{0624}/\&/g; ## WAW WITH HAMZA ABOVE + $line =~ s/\x{0625}/\ " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab \ + ark:- ark,t:$dir/scoring/LMWT.tra || exit 1; + +# Note: the double level of quoting for the sed command +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cat $dir/scoring/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; + +exit 0; diff --git a/egs/gale_arabic/s5b/local/split_wer.sh b/egs/gale_arabic/s5b/local/split_wer.sh new file mode 100755 index 00000000000..70c97ae5d19 --- /dev/null +++ b/egs/gale_arabic/s5b/local/split_wer.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# Report WER for reports and conversational +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +if [ $# -ne 1 ]; then + echo "Arguments should be the gale folder, see ../run.sh for example." + exit 1; +fi + +[ -f ./path.sh ] && . ./path.sh + + +galeFolder=$(readlink -f $1) +symtab=./data/lang/words.txt +find exp/ -maxdepth 3 -type d -name decode\* > list_decode$$ + +#split the test set per type: +awk '{print $2}' $galeFolder/all.test | sort -u > $galeFolder/test_id$$ + +# generate the report test set +awk '{print $2}' $galeFolder/report | sort -u > $galeFolder/report_id$$ +comm -1 -2 $galeFolder/test_id$$ $galeFolder/report_id$$ > $galeFolder/report.test + +# generate the conversational test set +awk '{print $2}' $galeFolder/conversational | sort -u > $galeFolder/conversational_id$$ + +comm -1 -2 $galeFolder/test_id$$ $galeFolder/conversational_id$$ > $galeFolder/conversational.test + +rm -fr $galeFolder/test_id$$ $galeFolder/report_id$$ $galeFolder/conversational_id$$ + +min_lmwt=7 +max_lmwt=20 +cat list_decode$$ | while read dir; do + for type in report conversational; do + #echo "Processing: $dir $type" + rm -fr $dir/scoring_$type + cp -pr $dir/scoring $dir/scoring_$type + ( cd $dir/scoring_$type; + for x in *.tra test_filt.txt; do + sort -u $x > tmp$$ + join tmp$$ $galeFolder/${type}.test > $x + rm -fr tmp$$ + done + ) + +utils/run.pl LMWT=$min_lmwt:$max_lmwt $dir/scoring_$type/log/score.LMWT.log \ + cat $dir/scoring_${type}/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring_${type}/test_filt.txt ark,p:- ">&" $dir/wer_${type}_LMWT +done +done + + +time=$(date +"%Y-%m-%d-%H-%M-%S") +echo "RESULTS generated by $USER at $time" + +echo "Report Results WER:" +cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_report_* | utils/best_wer.sh; done | sort -n -k2 + +echo "Conversational Results WER:" +cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_conversational_* | utils/best_wer.sh; done | sort -n -k2 + +echo "Combined Results for Reports and Conversational WER:" +cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_?? $x/wer_?| utils/best_wer.sh; done | sort -n -k2 + +rm list_decode$$ + + + diff --git a/egs/gale_arabic/s5b/local/test_list b/egs/gale_arabic/s5b/local/test_list new file mode 100644 index 00000000000..d82cf498804 --- /dev/null +++ b/egs/gale_arabic/s5b/local/test_list @@ -0,0 +1,11 @@ +ALAM_WITHEVENT_ARB_20070116_205800 +ALAM_WITHEVENT_ARB_20070130_205800 +ALAM_WITHEVENT_ARB_20070206_205801 +ALAM_WITHEVENT_ARB_20070213_205800 +ALAM_WITHEVENT_ARB_20070227_205800 +ALAM_WITHEVENT_ARB_20070306_205800 +ALAM_WITHEVENT_ARB_20070313_205800 +ARABIYA_FROMIRAQ_ARB_20070216_175800 +ARABIYA_FROMIRAQ_ARB_20070223_175801 +ARABIYA_FROMIRAQ_ARB_20070302_175801 +ARABIYA_FROMIRAQ_ARB_20070309_175800 diff --git a/egs/gale_arabic/s5b/path.sh b/egs/gale_arabic/s5b/path.sh new file mode 100755 index 00000000000..be11b34cbc6 --- /dev/null +++ b/egs/gale_arabic/s5b/path.sh @@ -0,0 +1,5 @@ +export KALDI_ROOT=$(pwd)/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/gale_arabic/s5b/run.sh b/egs/gale_arabic/s5b/run.sh new file mode 100755 index 00000000000..9cc72d31a95 --- /dev/null +++ b/egs/gale_arabic/s5b/run.sh @@ -0,0 +1,167 @@ +#!/bin/bash -e + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +. path.sh +. cmd.sh ## You'll want to change cmd.sh to something that will work on your system. + ## This relates to the queue. +num_jobs=120 +num_decode_jobs=40 + +#NB: You can add whatever number of copora you like. The supported extensions +#NB: (formats) are wav and flac. Flac will be converted using sox and in contrast +#NB: with the old approach, the conversion will be on-the-fly and one-time-only +#NB: during the parametrization. + +#NB: Text corpora scpecification. We support either tgz files, which are unpacked +#NB: or just plain (already unpacked) directories. The list of transcript is then +#NB: obtained using find command + +#This is CLSP configuration. We add the 2014 GALE data. We got around 2 % +#improvement just by including it. The gain might be large if someone would tweak +# the number of leaves and states and so on. + +#Make sure you edit this section to reflect whers you keep the LDC data on your cluster +audio=( + /data/sls/scratch/amali/data/GALE/LDC2013S02 + /data/sls/scratch/amali/data/GALE/LDC2013S07 + /data/sls/scratch/amali/data/GALE/LDC2014S07 +) +text=( + /data/sls/scratch/amali/data/GALE/LDC2013T17.tgz + /data/sls/scratch/amali/data/GALE/LDC2013T04.tgz + /data/sls/scratch/amali/data/GALE/LDC2014T17.tgz +) + +galeData=GALE +#prepare the data +#split train dev test +#prepare lexicon and LM + +# You can run the script from here automatically, but it is recommended to run the data preparation, +# and features extraction manually and and only once. +# By copying and pasting into your shell. + +#copy the audio files to local folder wav and convet flac files to wav +local/gale_data_prep_audio.sh "${audio[@]}" $galeData || exit 1; + +#get the transcription and remove empty prompts and all noise markers +local/gale_data_prep_txt.sh "${text[@]}" $galeData || exit 1; + +# split the data to reports and conversational and for each class will have rain/dev and test +local/gale_data_prep_split.sh $galeData || exit 1; + +# get all Arabic grapheme dictionaries and add silence and UNK +local/gale_prep_grapheme_dict.sh || exit 1; + + +#prepare the langauge resources +utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang || exit 1; + +# LM training +local/gale_train_lms.sh || exit 1; + +local/gale_format_data.sh || exit 1; +# G compilation, check LG composition + +# Now make MFCC features. +# mfccdir should be some place with a largish disk where you +# want to store MFCC features. +mfccdir=mfcc + +for x in train test ; do + steps/make_mfcc.sh --cmd "$train_cmd" --nj $num_jobs \ + data/$x exp/make_mfcc/$x $mfccdir + utils/fix_data_dir.sh data/$x # some files fail to get mfcc for many reasons + steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir +done + + +# Here we start the AM + +# Let's create a subset with 10k segments to make quick flat-start training: +utils/subset_data_dir.sh data/train 10000 data/train.10K || exit 1; + +# Train monophone models on a subset of the data, 10K segment +# Note: the --boost-silence option should probably be omitted by default +steps/train_mono.sh --nj 40 --cmd "$train_cmd" \ + data/train.10K data/lang exp/mono || exit 1; + + +# Get alignments from monophone system. +steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/mono exp/mono_ali || exit 1; + +# train tri1 [first triphone pass] +steps/train_deltas.sh --cmd "$train_cmd" \ + 2500 30000 data/train data/lang exp/mono_ali exp/tri1 || exit 1; + +# First triphone decoding +utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph +steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ + exp/tri1/graph data/test exp/tri1/decode + +steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/tri1 exp/tri1_ali || exit 1; + +# Train tri2a, which is deltas+delta+deltas +steps/train_deltas.sh --cmd "$train_cmd" \ + 3000 40000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1; + +# tri2a decoding +utils/mkgraph.sh data/lang_test exp/tri2a exp/tri2a/graph +steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ + exp/tri2a/graph data/test exp/tri2a/decode + +# train and decode tri2b [LDA+MLLT] +steps/train_lda_mllt.sh --cmd "$train_cmd" 4000 50000 \ + data/train data/lang exp/tri1_ali exp/tri2b || exit 1; + +utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph +steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ + exp/tri2b/graph data/test exp/tri2b/decode + +# Align all data with LDA+MLLT system (tri2b) +steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; + + +# From 2b system, train 3b which is LDA + MLLT + SAT. +steps/train_sat.sh --cmd "$train_cmd" \ + 5000 100000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; + +utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph +steps/decode_fmllr.sh --nj $num_decode_jobs --cmd \ + "$decode_cmd" exp/tri3b/graph data/test exp/tri3b/decode + +# From 3b system, align all data. +steps/align_fmllr.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; + + +# nnet3 cross-entropy +local/nnet3/run_tdnn.sh #tdnn recipe: +local/nnet3/run_lstm.sh #lstm recipe: + +# chain lattice-free +local/chain/run_tdnn.sh #tdnn recipe: +local/chain/run_tdnn_lstm.sh #tdnn-lstm recipe: + +time=$(date +"%Y-%m-%d-%H-%M-%S") + +#get detailed WER; reports, conversational and combined +local/split_wer.sh $galeData > RESULTS.details.$USER.$time # to make sure you keep the results timed and owned + +echo training succedded +exit 0 + +#TODO: +#LM (4-gram and RNN) rescoring +#combine lattices +#dialect detection + + + + + diff --git a/egs/gale_arabic/s5b/steps b/egs/gale_arabic/s5b/steps new file mode 120000 index 00000000000..1b186770dd1 --- /dev/null +++ b/egs/gale_arabic/s5b/steps @@ -0,0 +1 @@ +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/gale_arabic/s5b/utils b/egs/gale_arabic/s5b/utils new file mode 120000 index 00000000000..a3279dc8679 --- /dev/null +++ b/egs/gale_arabic/s5b/utils @@ -0,0 +1 @@ +../../wsj/s5/utils/ \ No newline at end of file diff --git a/egs/wsj/s5/utils/build_const_arpa_lm.sh b/egs/wsj/s5/utils/build_const_arpa_lm.sh index 375ffd79eb4..ec067df0d39 100755 --- a/egs/wsj/s5/utils/build_const_arpa_lm.sh +++ b/egs/wsj/s5/utils/build_const_arpa_lm.sh @@ -34,7 +34,7 @@ mkdir -p $new_lang cp -r $old_lang/* $new_lang unk=`cat $new_lang/oov.int` -bos=`grep "" $new_lang/words.txt | awk '{print $2}'` +bos=`grep -w "" $new_lang/words.txt | awk '{print $2}'` eos=`grep "" $new_lang/words.txt | awk '{print $2}'` if [[ -z $bos || -z $eos ]]; then echo "$0: and symbols are not in $new_lang/words.txt" From 80284fe34125585426b535b0c98fbac8702e2a45 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 1 Feb 2017 18:59:08 -0500 Subject: [PATCH 398/530] [src,egs,scripts]: various minor fixes: make num-epochs continuous; add decay-time to other LSTM types; bug-fix in nnet3 combination code; swbd/s5c results added. --- .../s5c/local/chain/compare_wer_general.sh | 8 +-- .../local/chain/tuning/run_tdnn_lstm_1f.sh | 17 +++++++ egs/wsj/s5/steps/libs/nnet3/train/common.py | 16 ++++-- egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py | 51 +++++++++++++++---- egs/wsj/s5/steps/nnet3/chain/train.py | 6 +-- egs/wsj/s5/steps/nnet3/train_dnn.py | 2 +- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 2 +- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 2 +- egs/wsj/s5/steps/nnet3/train_rnn.py | 2 +- src/nnet3/nnet-chain-combine.cc | 2 +- src/nnet3/nnet-combine.cc | 3 +- 11 files changed, 84 insertions(+), 27 deletions(-) diff --git a/egs/swbd/s5c/local/chain/compare_wer_general.sh b/egs/swbd/s5c/local/chain/compare_wer_general.sh index 29a5dc83063..f56cbfb8675 100755 --- a/egs/swbd/s5c/local/chain/compare_wer_general.sh +++ b/egs/swbd/s5c/local/chain/compare_wer_general.sh @@ -126,28 +126,28 @@ if $used_epochs; then fi -echo -n "Final train prob " +echo -n "# Final train prob " for x in $*; do prob=$(grep Overall exp/chain/${x}/log/compute_prob_train.final.log | grep -v xent | awk '{print $8}') printf "% 10.3f" $prob done echo -echo -n "Final valid prob " +echo -n "# Final valid prob " for x in $*; do prob=$(grep Overall exp/chain/${x}/log/compute_prob_valid.final.log | grep -v xent | awk '{print $8}') printf "% 10.3f" $prob done echo -echo -n "Final train prob (xent) " +echo -n "# Final train prob (xent) " for x in $*; do prob=$(grep Overall exp/chain/${x}/log/compute_prob_train.final.log | grep -w xent | awk '{print $8}') printf "% 10.3f" $prob done echo -echo -n "Final valid prob (xent) " +echo -n "# Final valid prob (xent) " for x in $*; do prob=$(grep Overall exp/chain/${x}/log/compute_prob_valid.final.log | grep -w xent | awk '{print $8}') printf "% 10.4f" $prob diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh index 3d9e1e4a63b..b8f1fdd92f6 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -8,6 +8,23 @@ # trying the change of xent_regularize from 0.025 (which was an # unusual value) to the more usual 0.01. +# WER is worse but this seems to be due to more complete optimization +# (train better, valid worse). Looks like we may be overtraining. +# +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# WER on train_dev(tg) 12.74 13.23 +# [looped:] 12.93 13.27 +# WER on train_dev(fg) 11.70 12.17 +# [looped:] 12.09 12.42 +# WER on eval2000(tg) 15.7 16.1 +# [looped:] 15.9 16.2 +# WER on eval2000(fg) 14.3 14.6 +# [looped:] 14.6 14.7 +# Final train prob -0.066 -0.065 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.916 +# Final valid prob (xent) -1.0279 -1.0359 set -e diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index 25852ab2806..977393c44b0 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -442,16 +442,24 @@ def verify_iterations(num_iters, num_epochs, num_hidden_layers, "layer-wise discriminatory training.") approx_iters_per_epoch_final = num_archives/num_jobs_final + # Note: it used to be that we would combine over an entire epoch, + # but in practice we very rarely would use any weights from towards + # the end of that range, so we are changing it to use not + # approx_iters_per_epoch_final, but instead: + # approx_iters_per_epoch_final/2 + 1, + # dividing by 2 to use half an epoch, and adding 1 just to make sure + # it's not zero. + # First work out how many iterations we want to combine over in the final # nnet3-combine-fast invocation. # The number we use is: - # min(max(max_models_combine, approx_iters_per_epoch_final), + # min(max(max_models_combine, approx_iters_per_epoch_final/2+1), # 1/2 * iters_after_last_layer_added) # But if this value is > max_models_combine, then the models # are subsampled to get these many models to combine. half_iters_after_add_layers = (num_iters - finish_add_layers_iter)/2 - num_iters_combine_initial = min(approx_iters_per_epoch_final, + num_iters_combine_initial = min(approx_iters_per_epoch_final/2 + 1, half_iters_after_add_layers) if num_iters_combine_initial > max_models_combine: @@ -651,8 +659,8 @@ def __init__(self, other random seeds used in other stages of the experiment like data preparation (e.g. volume perturbation).""") - self.parser.add_argument("--trainer.num-epochs", type=int, - dest='num_epochs', default=8, + self.parser.add_argument("--trainer.num-epochs", type=float, + dest='num_epochs', default=8.0, help="Number of epochs to train the model") self.parser.add_argument("--trainer.shuffle-buffer-size", type=int, dest='shuffle_buffer_size', default=5000, diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py index 9d7f649c4b4..4ffebcd9436 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py @@ -30,6 +30,15 @@ # i.e., SigmoidComponent, TanhComponent and RectifiedLinearComponent ] # ng-per-element-scale-options='' [Additional options used for the diagonal matrices in the LSTM ] # ng-affine-options='' [Additional options used for the full matrices in the LSTM, can be used to do things like set biases to initialize to 1] +# decay-time=-1 [If >0, an approximate maximum on how many frames +# can be remembered via summation into the cell +# contents c_t; enforced by putting a scaling factor +# of recurrence_scale = 1 - abs(delay)/decay_time on +# the recurrence, i.e. the term c_{t-1} in the LSTM +# equations. E.g. setting this to 20 means no more +# than about 20 frames' worth of history, +# i.e. history since about t = t-20, can be +# accumulated in c_t.] class XconfigLstmLayer(XconfigLayerBase): def __init__(self, first_token, key_to_value, prev_names = None): assert first_token == "lstm-layer" @@ -44,7 +53,8 @@ def set_default_configs(self): 'ng-affine-options' : ' max-change=0.75 ', 'self-repair-scale-nonlinearity' : 0.00001, 'zeroing-interval' : 20, - 'zeroing-threshold' : 15.0 + 'zeroing-threshold' : 15.0, + 'decay-time': -1.0 } def set_derived_configs(self): @@ -108,17 +118,23 @@ def generate_lstm_config(self): input_descriptor = self.descriptors['input']['final-string'] cell_dim = self.config['cell-dim'] delay = self.config['delay'] - - repair_nonlin = self.config['self-repair-scale-nonlinearity'] - repair_nonlin_str = "self-repair-scale={0:.10f}".format(repair_nonlin) if repair_nonlin is not None else '' + decay_time = self.config['decay-time'] + # we expect decay_time to be either -1, or large, like 10 or 50. + recurrence_scale = (1.0 if decay_time < 0 else + 1.0 - (abs(delay) / decay_time)) + assert recurrence_scale > 0 # or user may have set decay-time much + # too small. bptrunc_str = ("clipping-threshold={0}" " zeroing-threshold={1}" " zeroing-interval={2}" " recurrence-interval={3}" + " scale={4}" "".format(self.config['clipping-threshold'], self.config['zeroing-threshold'], self.config['zeroing-interval'], - abs(delay))) + abs(delay), recurrence_scale)) + repair_nonlin = self.config['self-repair-scale-nonlinearity'] + repair_nonlin_str = "self-repair-scale={0:.10f}".format(repair_nonlin) if repair_nonlin is not None else '' affine_str = self.config['ng-affine-options'] # Natural gradient per element scale parameters # TODO: decide if we want to keep exposing these options @@ -233,6 +249,15 @@ def generate_lstm_config(self): # i.e., SigmoidComponent, TanhComponent and RectifiedLinearComponent ] # ng-per-element-scale-options='' [Additional options used for the diagonal matrices in the LSTM ] # ng-affine-options='' [Additional options used for the full matrices in the LSTM, can be used to do things like set biases to initialize to 1] +# decay-time=-1 [If >0, an approximate maximum on how many frames +# can be remembered via summation into the cell +# contents c_t; enforced by putting a scaling factor +# of recurrence_scale = 1 - abs(delay)/decay_time on +# the recurrence, i.e. the term c_{t-1} in the LSTM +# equations. E.g. setting this to 20 means no more +# than about 20 frames' worth of history, +# i.e. history since about t = t-20, can be +# accumulated in c_t.] class XconfigLstmpLayer(XconfigLayerBase): def __init__(self, first_token, key_to_value, prev_names = None): assert first_token == "lstmp-layer" @@ -252,7 +277,8 @@ def set_default_configs(self): 'zeroing-interval' : 20, 'zeroing-threshold' : 15.0, 'dropout-proportion' : -1.0, # If -1.0, no dropout components will be added - 'dropout-per-frame' : False # If false, regular dropout, not per frame. + 'dropout-per-frame' : False, # If false, regular dropout, not per frame. + 'decay-time': -1.0 } def set_derived_configs(self): @@ -342,14 +368,21 @@ def generate_lstm_config(self): delay = self.config['delay'] repair_nonlin = self.config['self-repair-scale-nonlinearity'] repair_nonlin_str = "self-repair-scale={0:.10f}".format(repair_nonlin) if repair_nonlin is not None else '' + decay_time = self.config['decay-time'] + # we expect decay_time to be either -1, or large, like 10 or 50. + recurrence_scale = (1.0 if decay_time < 0 else + 1.0 - (abs(delay) / decay_time)) + assert recurrence_scale > 0 # or user may have set decay-time much + # too small. bptrunc_str = ("clipping-threshold={0}" " zeroing-threshold={1}" " zeroing-interval={2}" " recurrence-interval={3}" + " scale={4}" "".format(self.config['clipping-threshold'], self.config['zeroing-threshold'], self.config['zeroing-interval'], - abs(delay))) + abs(delay), recurrence_scale)) affine_str = self.config['ng-affine-options'] pes_str = self.config['ng-per-element-scale-options'] dropout_proportion = self.config['dropout-proportion'] @@ -578,7 +611,6 @@ def generate_lstm_config(self): 1.0 - (abs(delay) / decay_time)) assert recurrence_scale > 0 # or user may have set decay-time much # too small. - lstm_str = self.config['lstm-nonlinearity-options'] bptrunc_str = ("clipping-threshold={0}" " zeroing-threshold={1}" " zeroing-interval={2}" @@ -588,6 +620,8 @@ def generate_lstm_config(self): self.config['zeroing-threshold'], self.config['zeroing-interval'], abs(delay), recurrence_scale)) + lstm_str = self.config['lstm-nonlinearity-options'] + configs = [] @@ -772,7 +806,6 @@ def generate_lstm_config(self): 1.0 - (abs(delay) / decay_time)) assert recurrence_scale > 0 # or user may have set decay-time much # too small. - bptrunc_str = ("clipping-threshold={0}" " zeroing-threshold={1}" " zeroing-interval={2}" diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index 1791aee665b..8624dc947b9 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -101,8 +101,8 @@ def get_args(): help="Deprecated. Kept for back compatibility") # trainer options - parser.add_argument("--trainer.num-epochs", type=int, dest='num_epochs', - default=10, + parser.add_argument("--trainer.num-epochs", type=float, dest='num_epochs', + default=10.0, help="Number of epochs to train the model") parser.add_argument("--trainer.frames-per-iter", type=int, dest='frames_per_iter', default=800000, @@ -391,7 +391,7 @@ def train(args, run_opts, background_process_handler): # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. - num_archives_to_process = args.num_epochs * num_archives_expanded + num_archives_to_process = int(args.num_epochs * num_archives_expanded) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index b1a20ccc7b9..689450a80f0 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -271,7 +271,7 @@ def train(args, run_opts, background_process_handler): # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_expanded = num_archives * args.frames_per_eg - num_archives_to_process = args.num_epochs * num_archives_expanded + num_archives_to_process = int(args.num_epochs * num_archives_expanded) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index 5eb2739be71..21247e8c7c3 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -275,7 +275,7 @@ def train(args, run_opts, background_process_handler): # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. num_archives_expanded = num_archives * args.frames_per_eg - num_archives_to_process = args.num_epochs * num_archives_expanded + num_archives_to_process = int(args.num_epochs * num_archives_expanded) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index 97ab378f5fd..5a96d6020fa 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -341,7 +341,7 @@ def train(args, run_opts, background_process_handler): # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. - num_archives_to_process = args.num_epochs * num_archives + num_archives_to_process = int(args.num_epochs * num_archives) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index 19da38db958..5824a77dbfe 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -338,7 +338,7 @@ def train(args, run_opts, background_process_handler): # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == # $num_epochs*$num_archives, where # avg_num_jobs=(num_jobs_initial+num_jobs_final)/2. - num_archives_to_process = args.num_epochs * num_archives + num_archives_to_process = int(args.num_epochs * num_archives) num_archives_processed = 0 num_iters = ((num_archives_to_process * 2) / (args.num_jobs_initial + args.num_jobs_final)) diff --git a/src/nnet3/nnet-chain-combine.cc b/src/nnet3/nnet-chain-combine.cc index d130490aba6..d6c376ab45e 100644 --- a/src/nnet3/nnet-chain-combine.cc +++ b/src/nnet3/nnet-chain-combine.cc @@ -186,7 +186,7 @@ void NnetChainCombiner::Combine() { void NnetChainCombiner::PrintParams(const VectorBase ¶ms) const { - Vector weights(params.Dim()), normalized_weights(params.Dim()); + Vector weights(WeightDim()), normalized_weights(WeightDim()); GetWeights(params, &weights); GetNormalizedWeights(weights, &normalized_weights); int32 num_models = nnet_params_.NumRows(), diff --git a/src/nnet3/nnet-combine.cc b/src/nnet3/nnet-combine.cc index b7fbd4fa6d9..ba904b1c93a 100644 --- a/src/nnet3/nnet-combine.cc +++ b/src/nnet3/nnet-combine.cc @@ -182,8 +182,7 @@ void NnetCombiner::Combine() { void NnetCombiner::PrintParams(const VectorBase ¶ms) const { - - Vector weights(params.Dim()), normalized_weights(params.Dim()); + Vector weights(WeightDim()), normalized_weights(WeightDim()); GetWeights(params, &weights); GetNormalizedWeights(weights, &normalized_weights); int32 num_models = nnet_params_.NumRows(), From 21cfe99c5e08a35eb410ce3cc28d150fd4cb7505 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Thu, 2 Feb 2017 16:58:10 -0500 Subject: [PATCH 399/530] [build] Windows build: generate missing base/version.h; cosmetic changes (#1397) --- windows/INSTALL.md | 10 ++++++--- windows/get_version.pl | 44 +++++++++++++++++++++++++++++++++++++ windows/variables.props.dev | 3 +++ 3 files changed, 54 insertions(+), 3 deletions(-) create mode 100755 windows/get_version.pl diff --git a/windows/INSTALL.md b/windows/INSTALL.md index 770844520d2..9edcee65144 100644 --- a/windows/INSTALL.md +++ b/windows/INSTALL.md @@ -157,13 +157,17 @@ for their processors. It isn't free, but you can get [Community Licensing for In For example, for a build using OpenBLAS and VS 2015 you would run: - (kaldi)/tools$ generate_solution.pl --vsver vs2015 --enable-openblas + (kaldi)/windows$ generate_solution.pl --vsver vs2015 --enable-openblas Another example, for OpenBLAS, VS 2013 and CUDA support: - (kaldi)/tools$ generate_solution.pl --vsver vs2013 --enable-cuda --enable-openblas + (kaldi)/windows$ generate_solution.pl --vsver vs2013 --enable-cuda --enable-openblas -16. Open the generated solution in the visual studio and switch to **Debug|x64** (or **Release|x64**) and build. +13. Run the script (kaldi)/windows/get_version.pl: + + (kaldi)/windows$ get_version.pl + +17. Open the generated solution in the visual studio and switch to **Debug|x64** (or **Release|x64**) and build. Expect 10 projects to fail, majority of them will fail because of missing include `portaudio.h` ------ diff --git a/windows/get_version.pl b/windows/get_version.pl new file mode 100755 index 00000000000..2a54891516a --- /dev/null +++ b/windows/get_version.pl @@ -0,0 +1,44 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2017 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; + +open(my $F, "<", "../src/.version") or do { + print "$!\n"; + print "The file ../src/.version does not exist\n"; + print "Either you are not running this script from within\n"; + print "the windows/ directory or you have accidently \n"; + print "delete the file\n"; + exit 1; +}; + +open(my $H, ">", "../src/base/version.h") or do { + print "$!\n"; + print "Could not write to ../src/base/version.h\n"; + print "Either you are not running this script from within\n"; + print "the windows/ directory or there were some other \n"; + print "issues\n"; + exit 1; +}; + +my $kaldi_ver=<$F>; chomp $kaldi_ver; +print $H "KALDI_VERSION=${kaldi_ver}-win\n"; +close($F); +close($H); diff --git a/windows/variables.props.dev b/windows/variables.props.dev index c8063dc1841..837b3f999a9 100644 --- a/windows/variables.props.dev +++ b/windows/variables.props.dev @@ -2,11 +2,14 @@ + + C:\Program Files (x86)\Intel\Composer XE\mkl C:\Users\Yenda\Downloads\kaldi-svn\tools\OpenBLAS-v0.2.14-Win64-int32 C:\Users\Yenda\Downloads\kaldi-svn\tools\pthread-win32\Pre-built.2 C:\Users\Yenda\Downloads\kaldi-svn\tools\openfstwin-1.3.4\ C:\Users\Yenda\Downloads\kaldi-svn\tools\openfstwin-1.3.4\MSVC12\x64\ + From cd97bd22a915598af30ac9be12eb67b72fc3c4c1 Mon Sep 17 00:00:00 2001 From: Eric Munson Date: Thu, 2 Feb 2017 18:01:13 -0500 Subject: [PATCH 400/530] [build]: Enable cross compilation, including to android. (#726) If a user has a number of tool chains installed and they do not want to use the default, they must currently edit the kaldi.mk file after running configure to change the CC, CXX, AR, AS, and RANLIB variables. This is something that should be exposed via the configure script. This patch exposes an option to set the host triple for the desired tool chain in the configure script. Building Kaldi on my Raspberry Pi boards is not particularly fast. I have been using the following patch to build kaldi executables for use on the Pi boards for the better part of a year. A typical invocation for me is something like: $ ./configure --static --atlas-root=/opt/cross/armv8hf \ --fst-root=/opt/cross/armv8hf --host=armv8-rpi3-linux-gnueabihf \ --fst-version=1.4.1 This way I can build on my much faster x86 desktop, but still run experiments on ARM. I have included support for cross compiling for ppc64le and it works for me (at least it produces binaries for ppc64le I don't have a ppc64 machine to test it). Signed-off-by: Eric B Munson * Add mk file and configure options for building for Android Building for Android requires a toolchain that can be built using the Android NDK. It works similiarly to the linux build except that it only uses clang, only supports the openBLAS math library, and requires an additional include directory for the system C++ headers. A typical configure invocation looks like: ./configure --static --openblas-root=/opt/cross/arm-linux-androideabi \ --fst-root=/opt/cross/arm-linux-androideabi \ --host=arm-linux-androideabi --fst-version=1.4.1 \ --android-includes=/opt/cross/arm-linux-androideabi/sysroot/usr/include Signed-off-by: Eric B Munson * Make pthread cancel symbols noops for Android The Android C library does not support cancelling pthreads so the symbols PTHREAD_CANCEL_STATE and pthread_setcancelstate are undefined. Because a pthread cannot be cancelled in Android, it is reasonable to make the pthread_setcancelstate() call a noop. Signed-off-by: Eric B Munson --- src/configure | 222 +++++++++++++++++++++++++----- src/makefiles/android_openblas.mk | 64 +++++++++ src/thread/kaldi-barrier.h | 9 ++ 3 files changed, 257 insertions(+), 38 deletions(-) create mode 100644 src/makefiles/android_openblas.mk diff --git a/src/configure b/src/configure index d4122f1808e..bb8f5d4cb4d 100755 --- a/src/configure +++ b/src/configure @@ -22,10 +22,24 @@ # ./configure --atlas-root=../tools/ATLAS/build # ./configure --use-cuda=no # disable CUDA detection (will build cpu-only # # version of kaldi even on CUDA-enabled machine +# # Cross compile for armv8hf, this assumes that you have openfst built +# # with the armv8-rpi3-linux-gnueabihf toolchain and installed to +# # /opt/cross/armv8hf. It also assumes that you have an ATLAS library +# # built for the target install to /opt/cross/armv8hf and that the +# # armv8-rpi3-linux-gnueabihf toolchain is available in your path +# ./configure --static --fst-root=/opt/cross/armv8hf --atlas-root=/opt/cross/armv8hf --host=armv8-rpi3-linux-gnueabihf +# # Cross compile for Android on arm +# # The only difference here is the addtion of the the --android-includes +# # flag because the toolchains produced by the Android NDK don't always +# # include the C++ stdlib headers in the normal cross compile include +# # path +# ./configure --static --openblas-root=/opt/cross/arm-linux-androideabi \ +# --fst-root=/opt/cross/arm-linux-androideabi --host=arm-linux-androideabi \ +# --fst-version=1.4.1 --android-includes=/opt/cross/arm-linux-androideabi/sysroot/usr/include #This should be incremented after every significant change of the configure script #I.e. after each change that affects the kaldi.mk or the build system as whole -CONFIGURE_VERSION=4 +CONFIGURE_VERSION=5 function rel2abs { if [ ! -z "$1" ]; then @@ -69,12 +83,14 @@ unset MKLROOT unset CLAPACKROOT unset OPENBLASROOT unset MKLLIBDIR +unset HOST function usage { echo 'Usage: ./configure [--static|--shared] [--threaded-atlas={yes|no}] [--atlas-root=ATLASROOT] [--fst-root=FSTROOT] [--openblas-root=OPENBLASROOOT] [--clapack-root=CLAPACKROOT] [--mkl-root=MKLROOT] [--mkl-libdir=MKLLIBDIR] [--omp-libdir=OMPDIR] [--static-fst={yes|no}] [--static-math={yes|no}] [--threaded-math={yes|no}] [--mathlib=ATLAS|MKL|CLAPACK|OPENBLAS] - [--use-cuda={yes|no}] [--cudatk-dir=CUDATKDIR][--mkl-threading=sequential|iomp|tbb|gomp]'; + [--use-cuda={yes|no}] [--cudatk-dir=CUDATKDIR][--mkl-threading=sequential|iomp|tbb|gomp] [--fst-version=VERSION] + [--host=HOST] [--android-includes=ANDROID_INC_DIR]'; } threaded_atlas=false # By default, use the un-threaded version of ATLAS. @@ -84,6 +100,11 @@ static_fst=false use_cuda=true dynamic_kaldi=false mkl_threading=sequential +# HOST and TARGET_ARCH are used when cross compiling, the user will specify HOST via the --host +# switch. TARGET_ARCH will be the first value in HOST if set, and `uname -m` otherwise +HOST="" +TARGET_ARCH="" +android=false cmd_line="$0 $@" # Save the command line to include in kaldi.mk @@ -184,15 +205,47 @@ do --cudatk-dir=*) CUDATKDIR=`read_dirname $1`; shift ;; #CUDA is used in src/cudamatrix and src/nnet{,bin} only + --fst-version=*) + OPENFST_VER=`expr "X$1" : '[^=]*=\(.*\)'`; + shift;; + --host=*) + # This expects the same format of host "triple" as autotools based projects + # this script will infer the target architecture from the specified triple. + HOST=`expr "X$1" : '[^=]*=\(.*\)'`; + shift ;; + --android-includes=*) + threaded_math=false; + static_math=true; + static_fst=true; + dynamic_kaldi=false; + MATHLIB='OPENBLAS'; + android=true; + ANDROIDINC=`read_dirname $1`; + shift;; *) echo "Unknown argument: $1, exiting"; usage; exit 1 ;; esac done + # the idea here is that if you change the configuration options from using # CUDA to not using it, or vice versa, we want to recompile all parts of the # code that may use a GPU. Touching this file is a way to force this. touch cudamatrix/cu-common.h 2>/dev/null + +function add_cross_tools { + # If the $HOST variable is set, we need to tell make to use the specified tools + if [ ! -z "$HOST" ]; then + echo '# A host triple was specified, we need to prepend all the tools with it' >> kaldi.mk + echo "HOST = $HOST" >> kaldi.mk + echo 'CC := $(HOST)-$(CC)' >> kaldi.mk + echo 'CXX := $(HOST)-$(CXX)' >> kaldi.mk + echo 'AR := $(HOST)-$(AR)' >> kaldi.mk + echo 'AS := $(HOST)-$(AS)' >> kaldi.mk + echo 'RANLIB := $(HOST)-$(RANLIB)' >> kaldi.mk + fi +} + function failure { echo "***configure failed: $* ***" >&2 if [ -f kaldi.mk ]; then rm kaldi.mk; fi @@ -204,8 +257,13 @@ function check_exists { } function check_for_bad_gcc { - if which gcc >&/dev/null; then # gcc is on the path - gcc_version=$(gcc -dumpspecs 2>&1 | grep -A1 -F '*version:' | grep -v version) + if [ -z "$HOST" ] ; then + compiler="gcc" + else + compiler="$HOST-gcc" + fi + if which $compiler >&/dev/null; then # gcc is on the path + gcc_version=$($compiler -dumpspecs 2>&1 | grep -A1 -F '*version:' | grep -v version) if [ "$gcc_version" == "4.8.2" ] || [ "$gcc_version" == "4.8.1" ]; then echo "*** WARNING: your version of gcc seems to be 4.8.1 or 4.8.2. ***" echo "*** These versions of gcc has a bug in nth_element ***" @@ -218,16 +276,19 @@ function check_for_bad_gcc { } function check_for_slow_expf { - cd probe - rm -f exp-test - make -f Makefile.slow_expf 1>/dev/null - ./exp-test - if [ $? -eq 1 ]; then - echo "*** WARNING: expf() seems to be slower than exp() on your machine. This is a known bug in old versions of glibc. Please consider updating glibc. ***" - echo "*** Kaldi will be configured to use exp() instead of expf() in base/kaldi-math.h Exp() routine for single-precision floats. ***" - echo "CXXFLAGS += -DKALDI_NO_EXPF" >> ../kaldi.mk + # We cannot run this test if we are cross compiling. + if [[ "$TARGET_ARCH" == "`uname -m`" ]] ; then + cd probe + rm -f exp-test + make -f Makefile.slow_expf 1>/dev/null + ./exp-test + if [ $? -eq 1 ]; then + echo "*** WARNING: expf() seems to be slower than exp() on your machine. This is a known bug in old versions of glibc. Please consider updating glibc. ***" + echo "*** Kaldi will be configured to use exp() instead of expf() in base/kaldi-math.h Exp() routine for single-precision floats. ***" + echo "CXXFLAGS += -DKALDI_NO_EXPF" >> ../kaldi.mk + fi + cd .. fi - cd .. } @@ -251,7 +312,6 @@ function check_library { } - #Check if at least one of these variables is set #If yes, we want to switch to using the MKL is_set $MKLLIBDIR && echo "Force-configuring KALDI to use MKL" && export MATHLIB="MKL" @@ -259,6 +319,32 @@ is_set $MKLROOT && echo "Force-configuring KALDI to use MKL"&& export MATHLIB="M is_set $CLAPACKROOT && echo "Force-configuring KALDI to use CLAPACK"&& export MATHLIB="CLAPACK" is_set $OPENBLASROOT && echo "Force-configuring KALDI to use OPENBLAS"&& export MATHLIB="OPENBLAS" + +# If HOST is specified, parse the TARGET_ARCH, otherwise use uname -m +if [[ "$HOST" == "" ]] ; then + TARGET_ARCH="`uname -m`" +else + # The HOST value will be something like "armv8-rpi3-linux-gnueabihf" and we need the first value + # as delimited by '-' to be used as the TARGET_ARCH for this build. The read command is the + # bash equivalent of split() found in other scripting languages. read uses the value in + # environment variable IFS as the field delimiter. The following command will take the + # host string "armv8-rpi3-linux-gnueabihf" as streamed in from the HOST variable + # and return ["armv8", "rpi3", "linux", "gnueabihf"] in PARTS + # + # Note that by changing the value of IFS (which is an environment variable) on the same + # line as the read invocation, it is only changed for that invocation and not for the shell + # executing this script. So we do not need to cache and reset the value. + IFS='-' read -ra PARTS <<< "$HOST" + # We only want the first entry from the list as the architecture + TARGET_ARCH="$PARTS" + if [[ "$TARGET_ARCH" != arm* && "$TARGET_ARCH" != ppc64le && "$TARGET_ARCH" != x86* ]] ; then + # We currently only support building for x86[_64], arm*, and ppc64le, if the + # TARGET_ARCH was read from the HOST variable, it must be one of these + failure "$TARGET_ARCH is an unsupported architecture, kaldi currently supports x86[_64], arm*, and ppc64le" + fi +fi + + #MKL functions function linux_configure_mkllibdir { local mklroot=$1 @@ -433,6 +519,11 @@ function configure_cuda { if [ ! -f $CUDATKDIR/bin/nvcc ]; then failure "Cannnot find nvcc in CUDATKDIR=$CUDATKDIR" fi + + if [[ "$TARGET_ARCH" != "`uname -m`" ]] ; then + failure "Cannot cross compile with CUDA support" + fi + echo "Using CUDA toolkit $CUDATKDIR (nvcc compiler and runtime libraries)" echo >> kaldi.mk echo "#Next section enables CUDA for compilation" >> kaldi.mk @@ -455,7 +546,7 @@ function configure_cuda { esac echo "CUDA_ARCH = $CUDA_ARCH" >> kaldi.mk - # 64bit/32bit? + # 64bit/32bit? We do not support cross compilation with CUDA so, use direct calls to uname -m here if [ "`uname -m`" == "x86_64" ]; then if [ "`uname`" == "Darwin" ]; then sed 's/lib64/lib/g' < makefiles/cuda_64bit.mk >> kaldi.mk @@ -514,8 +605,13 @@ function linux_configure_speex { } function fix_cxx_flag { - CXXCOMPILER=`grep "CXX = " kaldi.mk | awk '{print $3}'` - if [ $CXXCOMPILER=="g++" ]; then + USINGGXX=`grep -c "CXX = g++" kaldi.mk` + if [ $USINGGXX -ge 1 ]; then + if [ -z "$HOST" ] ; then + CXXCOMPILER="g++" + else + CXXCOMPILER="$HOST-g++" + fi $CXXCOMPILER -dumpversion | \ awk '{if(NR==1 && $1<"4.4") print "sed \"s/-Wno-unused-local-typedefs//g\" \ kaldi.mk > tmpf; mv tmpf kaldi.mk; "}' | sh - @@ -526,9 +622,9 @@ function linux_atlas_failure { # function we use when we couldn't find # ATLAS libs. echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = [somewhere]/liblapack.a [somewhere]/libcblas.a [somewhere]/libatlas.a [somewhere]/libf77blas.a $ATLASLIBDIR >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk @@ -558,7 +654,12 @@ function linux_check_static { if [ -f $dir/libatlas.a ]; then # candidate... # Note: on the next line, the variable assignment # LANG=en_US should apply just to the program called on that line. - if LANG=en_US gcc -o test_linking test_linking.cc -u ATL_flushcache $dir/libatlas.a 2>&1 | grep -i "incompatible" >/dev/null; then + if [ -z "$HOST" ] ; then + compiler="gcc" + else + compiler="$HOST-gcc" + fi + if LANG=en_US $compiler -o test_linking test_linking.cc -u ATL_flushcache $dir/libatlas.a 2>&1 | grep -i "incompatible" >/dev/null; then echo "Directory $dir may contain ATLAS libraries but seems to be wrong architecture"; rm test_linking test_linking.cc 2>/dev/null return 1; @@ -583,14 +684,15 @@ function linux_configure_debian_ubuntu { fi echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi fix_cxx_flag + add_cross_tools echo "Successfully configured for Debian/Ubuntu Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda linux_configure_speex @@ -608,14 +710,15 @@ function linux_configure_debian_ubuntu3 { fi echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi fix_cxx_flag + add_cross_tools echo "Successfully configured for Debian/Ubuntu Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda linux_configure_speex @@ -636,14 +739,15 @@ function linux_configure_debian7 { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk echo - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi fix_cxx_flag + add_cross_tools echo "Successfully configured for Debian 7 [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda linux_configure_speex @@ -661,14 +765,15 @@ function linux_configure_redhat { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk echo - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi fix_cxx_flag + add_cross_tools echo "Successfully configured for red hat [dynamic libraries] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda exit_success; @@ -688,14 +793,15 @@ function linux_configure_redhat_fat { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk echo - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi fix_cxx_flag + add_cross_tools echo "Successfully configured for red hat [dynamic libraries, fat] with ATLASLIBS =$ATLASLIBS" $use_cuda && configure_cuda exit_success; @@ -747,14 +853,15 @@ function linux_configure_static { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi fix_cxx_flag + add_cross_tools $use_cuda && configure_cuda linux_configure_speex echo "Successfully configured for Linux [static libraries] with ATLASLIBS =$ATLASLIBS" @@ -832,14 +939,15 @@ function linux_configure_dynamic { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk fi fix_cxx_flag + add_cross_tools $use_cuda && configure_cuda linux_configure_speex echo "Successfully configured for Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS" @@ -896,6 +1004,42 @@ fi # Most of the OS-specific steps below will append to kaldi.mk echo "Doing OS specific configurations ..." +if $android ; then + OPENFSTLIBS="$FSTROOT/lib/libfst.a" + echo "OPENFSTLIBS = $OPENFSTLIBS" >> kaldi.mk + + if [ -z $ANDROIDINC ] ; then + failure "--android-includes must be specified for android builds" + fi + + if [ -z $HOST ] ; then + failure "HOST must be specified for android builds" + fi + + OPENBLASROOT=`rel2abs "$OPENBLASROOT"` + if [ -z "$OPENBLASROOT" ]; then + failure "Must specify the location of OPENBLAS with --openblas-root option (and it must exist)" + fi + if [ ! -f $OPENBLASROOT/lib/libopenblas.a ]; then + failure "Expected to find the file $OPENBLASROOT/lib/libopenblas.a" + fi + echo "Your math library seems to be OpenBLAS. Configuring appropriately." + + OPENBLASLIBS="$OPENBLASROOT/lib/libopenblas.a $OPENBLASROOT/lib/libclapack.a $OPENBLASROOT/lib/liblapack.a $OPENBLASROOT/lib/libblas.a $OPENBLASROOT/lib/libf2c.a" + echo "OPENBLASROOT = $OPENBLASROOT" >> kaldi.mk + echo "OPENBLASLIBS = $OPENBLASLIBS" >> kaldi.mk + echo "ANDROIDINC = $ANDROIDINC" >> kaldi.mk + + cat makefiles/android_openblas.mk >> kaldi.mk + + add_cross_tools + + echo "Successfully configured OpenBLAS from $OPENBLASROOT." + echo "Configuration succeeded for platform Android" + exit_success +fi + + # Check for Darwin at first, because we later call uname -o (for Cygwin) # which crashes on Darwin. Also the linear algebra libraries on Macs are # used differently (through the Accelerate framework) than on Linux. @@ -1025,7 +1169,7 @@ if [ "`uname`" == "Linux" ]; then fi elif [ "$MATHLIB" == "MKL" ]; then - if [ "`uname -m`" != "x86_64" ]; then + if [ "$TARGET_ARCH" != "x86_64" ]; then failure "MKL on Linux only supported for Intel(R) 64 architecture (x86_64). See makefiles/linux_64_mkl.mk to manually configure for other platforms." fi @@ -1091,12 +1235,13 @@ if [ "`uname`" == "Linux" ]; then if [ ! -f makefiles/linux_clapack.mk ]; then failure "makefiles/linux_clapack.mk not found." fi - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_clapack_arm.mk >> kaldi.mk else cat makefiles/linux_clapack.mk >> kaldi.mk fi fix_cxx_flag + add_cross_tools echo "Warning (CLAPACK): this part of the configure process is not properly tested and will not work." $use_cuda && configure_cuda linux_configure_speex @@ -1120,14 +1265,15 @@ if [ "`uname`" == "Linux" ]; then fi echo "OPENBLASLIBS = $OPENBLASLIBS" >> kaldi.mk echo "OPENBLASROOT = $OPENBLASROOT" >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_openblas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_openblas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_openblas.mk >> kaldi.mk fi fix_cxx_flag + add_cross_tools $use_cuda && configure_cuda linux_configure_speex echo "Successfully configured OpenBLAS from $OPENBLASROOT." diff --git a/src/makefiles/android_openblas.mk b/src/makefiles/android_openblas.mk new file mode 100644 index 00000000000..f628c0400a1 --- /dev/null +++ b/src/makefiles/android_openblas.mk @@ -0,0 +1,64 @@ +ifndef FSTROOT +$(error FSTROOT not defined.) +endif + +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif + +ifndef OPENBLASLIBS +$(error OPENBLASLIBS not defined.) +endif + +ifndef OPENBLASROOT +$(error OPENBLASROOT not defined.) +endif + +ifndef ANDROIDINC +$(error ANDROIDINC not defined.) +endif + + CXXFLAGS += -mhard-float -D_NDK_MATH_NO_SOFTFP=1 -Wall -I.. \ + -pthread -mfpu=neon -ftree-vectorize -mfloat-abi=hard \ + -DHAVE_OPENBLAS -DANDROID_BUILD -I $(OPENBLASROOT)/include \ + -I$(ANDROIDINC) \ + -DKALDI_DOUBLEPRECISION=0 -DHAVE_POSIX_MEMALIGN \ + -Wno-sign-compare -Winit-self \ + -DHAVE_CXXABI_H \ + -DHAVE_CLAPACK \ + -I$(FSTROOT)/include \ + $(EXTRA_CXXFLAGS) \ + # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC +endif + +LDFLAGS = -Wl,--no-warn-mismatch -pie +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -ldl -lm_hard + +CC = clang++ +CXX = clang++ +AR = ar +AS = as +RANLIB = ranlib + +# Add no-mismatched-tags flag to suppress the annoying clang warnings +# that are perfectly valid per spec. +COMPILER = $(shell $(CXX) -v 2>&1 ) +ifeq ($(findstring clang,$(COMPILER)),clang) + CXXFLAGS += -Wno-mismatched-tags + # Link with libstdc++ if we are building against OpenFst < 1.4 + ifneq ("$(OPENFST_GE_10400)","1") + CXXFLAGS += -stdlib=libstdc++ + LDFLAGS += -stdlib=libstdc++ + endif +endif + +# We need to tell recent versions of g++ to allow vector conversions without +# an explicit cast provided the vectors are of the same size. +ifeq ($(findstring GCC,$(COMPILER)),GCC) + CXXFLAGS += -flax-vector-conversions -Wno-unused-local-typedefs +endif + + diff --git a/src/thread/kaldi-barrier.h b/src/thread/kaldi-barrier.h index 4c64726dc7a..b4b5658c629 100644 --- a/src/thread/kaldi-barrier.h +++ b/src/thread/kaldi-barrier.h @@ -55,3 +55,12 @@ class Barrier { #endif // KALDI_THREAD_KALDI_BARRIER_H_ +/* + * Android does not support cancelling pthreads so the following symbols are not defined. + * Define them here, they can be a noop because we cannot cancel a pthread on Android. + */ +#ifdef ANDROID_BUILD +#define PTHREAD_CANCEL_STATE 0 +#define pthread_setcancelstate(a, b) do { } while(0) +#endif + From 60d1c7884061d6a506fb49c4a3f2eb2a3220c2ed Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 2 Feb 2017 20:21:02 -0500 Subject: [PATCH 401/530] [egs,scripts]: add Swbd/s5c tuning scripts; simplify nnet3+chain 'combination' stage (doesn't affect results; faster); minor info-script fix. --- egs/swbd/s5c/local/chain/run_tdnn_lstm.sh | 2 +- .../local/chain/tuning/run_tdnn_lstm_1c.sh | 22 +- .../local/chain/tuning/run_tdnn_lstm_1d.sh | 22 ++ .../local/chain/tuning/run_tdnn_lstm_1e.sh | 17 + .../local/chain/tuning/run_tdnn_lstm_1f.sh | 23 +- .../local/chain/tuning/run_tdnn_lstm_1g.sh | 21 ++ .../local/chain/tuning/run_tdnn_lstm_1h.sh | 279 ++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1i.sh | 300 ++++++++++++++++++ egs/wsj/s5/steps/info/chain_dir_info.pl | 2 +- .../nnet3/train/chain_objf/acoustic_model.py | 13 +- 10 files changed, 686 insertions(+), 15 deletions(-) create mode 100755 egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh create mode 100755 egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh diff --git a/egs/swbd/s5c/local/chain/run_tdnn_lstm.sh b/egs/swbd/s5c/local/chain/run_tdnn_lstm.sh index 9669251c14a..fbc28248491 120000 --- a/egs/swbd/s5c/local/chain/run_tdnn_lstm.sh +++ b/egs/swbd/s5c/local/chain/run_tdnn_lstm.sh @@ -1 +1 @@ -tuning/run_tdnn_lstm_1c.sh \ No newline at end of file +tuning/run_tdnn_lstm_1e.sh \ No newline at end of file diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh index b305c57b6ab..d71301eb102 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -5,15 +5,19 @@ # it's faster. See PR #1243 on github, and issue #1237. # This used to be called run_tdnn_fastlstm_1b.sh. -#System tdnn_lstm_1a_ld5 tdnn_lstm_1b_ld5 tdnn_lstm_1c_ld5 -#WER on train_dev(tg) 13.42 13.00 12.91 -#WER on train_dev(fg) 12.42 12.03 11.98 -#WER on eval2000(tg) 15.7 15.3 15.2 -#WER on eval2000(fg) 14.2 13.9 13.8 -#Final train prob -0.0538088 -0.056294 -0.050 -#Final valid prob -0.0800484 -0.0813322 -0.092 -#Final train prob (xent) -0.7603 -0.777787 -0.756 -#Final valid prob (xent) -0.949909 -0.939146 -0.983 +## note: the last column below was this run on Feb 1 2017, in the +## shortcut branch. Results are a bit worse, but I believe this is just +## random noise or a little bit of mean-regression. + +#System tdnn_lstm_1a_ld5_sp tdnn_lstm_1b_ld5_sp tdnn_lstm_1c_ld5_sp tdnn_lstm_1c_ld5_sp +#WER on train_dev(tg) 13.42 13.00 12.91 13.17 +#WER on train_dev(fg) 12.42 12.03 11.98 12.25 +#WER on eval2000(tg) 15.7 15.3 15.2 15.4 +#WER on eval2000(fg) 14.2 13.9 13.8 14.1 +#Final train prob -0.0538088 -0.056294 -0.050 -0.046 +#Final valid prob -0.0800484 -0.0813322 -0.092 -0.073 +#Final train prob (xent) -0.7603 -0.777787 -0.756 -0.749 +#Final valid prob (xent) -0.949909 -0.939146 -0.983 -0.980 set -e diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh index 837eb944875..22c7d2e582d 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -8,6 +8,28 @@ # Also changed frames-per-iter from 1.2 million to 1.5 million... this # might have been a mistake, trying 1 million in 1f to see if this matters. +# The comparison below is with a version of the 1c system that was run at about +# the same time. The degradation in log-likelihood and xent prob is likely because +# now on average the chunk-size is slightly smaller than before (150 -> 136); +# possibly the change in extra-(left,right) context has a similar effect +# (or maybe it's just because the validation and train-subset examples have changed). + + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1c_ld5_sp tdnn_lstm_1d_sp +# System tdnn_lstm_1c_ld5_sp tdnn_lstm_1d_sp +# WER on train_dev(tg) 13.17 12.90 +# [looped:] 13.01 +# WER on train_dev(fg) 12.25 11.90 +# [looped:] 12.13 +# WER on eval2000(tg) 15.4 15.7 +# [looped:] 15.7 +# WER on eval2000(fg) 14.1 14.2 +# [looped:] 14.4 +# Final train prob -0.046 -0.064 +# Final valid prob -0.073 -0.088 +# Final train prob (xent) -0.749 -0.836 +# Final valid prob (xent) -0.9084 -0.9631 + # run_tdnn_lstm_1c.sh is like run_tdnn_lstm_1b.sh but using the # new 'fast-lstm' layer. Results are slightly improved, plus # it's faster. See PR #1243 on github, and issue #1237. diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh index 14dbb1cdd2e..f8b3d70aa2b 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -4,6 +4,23 @@ # trying the change of xent_regularize from 0.025 (which was an # unusual value) to the more usual 0.01. +# There seems to be no consistent difference in WER. Inconclusive. +# However I may keep 0.01 just for consistency with other setups. +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1d_sp tdnn_lstm_1e_sp +# System tdnn_lstm_1d_sp tdnn_lstm_1e_sp +# WER on train_dev(tg) 12.90 12.74 +# [looped:] 13.01 12.93 +# WER on train_dev(fg) 11.90 11.70 +# [looped:] 12.13 12.09 +# WER on eval2000(tg) 15.7 15.7 +# [looped:] 15.7 15.9 +# WER on eval2000(fg) 14.2 14.3 +# [looped:] 14.4 14.6 +# Final train prob -0.064 -0.066 +# Final valid prob -0.088 -0.087 +# Final train prob (xent) -0.836 -0.931 +# Final valid prob (xent) -0.9631 -1.0279 + set -e diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh index b8f1fdd92f6..90e179379e4 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -2,7 +2,28 @@ # run_tdnn_lstm_1f.sh is like run_tdnn_lstm_1e.sh but # reducing the frames-per-iter from 1.5 million to 1 million, -# since the time per iter was too much (about 5 minutes). +# since the time per iter was more than usual (about 5 minutes). + +# Below, the WER seems to get a little worse, although the optimization +# is improved slightly. There seems to be more train/valid difference. +# see also 1i. + +# exp/chain/tdnn_lstm_1f_sp: num-iters=392 nj=3..16 num-params=39.6M dim=40+100->6042 combine=-0.080->-0.073 xent:train/valid[260,391,final]=(-1.06,-0.903,-0.916/-1.13,-1.03,-1.04) logprob:train/valid[260,391,final]=(-0.084,-0.064,-0.065/-0.100,-0.091,-0.090) + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# WER on train_dev(tg) 12.74 13.23 +# [looped:] 12.93 13.27 +# WER on train_dev(fg) 11.70 12.17 +# [looped:] 12.09 12.42 +# WER on eval2000(tg) 15.7 16.1 +# [looped:] 15.9 16.2 +# WER on eval2000(fg) 14.3 14.6 +# [looped:] 14.6 14.7 +# Final train prob -0.066 -0.065 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.916 +# Final valid prob (xent) -1.0279 -1.0359 # run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but # trying the change of xent_regularize from 0.025 (which was an diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh index 6cacdf2dadb..cb73f020e3e 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -3,6 +3,27 @@ # 1g is like 1e, but reducing decay-time from 20 to 15, to see if # it reduces the difference between regular and looped decoding. # +# There doesn't seem to be a very consistent difference betwen 1e and 1g. + + +# exp/chain/tdnn_lstm_1g_sp: num-iters=262 nj=3..16 num-params=39.6M dim=40+100->6042 combine=-0.083->-0.076 xent:train/valid[173,261,final]=(-1.09,-0.929,-0.938/-1.15,-1.04,-1.05) logprob:train/valid[173,261,final]=(-0.089,-0.066,-0.067/-0.102,-0.089,-0.090) + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1g_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1g_sp +# WER on train_dev(tg) 12.74 13.03 +# [looped:] 12.93 12.98 +# WER on train_dev(fg) 11.70 12.02 +# [looped:] 12.09 12.13 +# WER on eval2000(tg) 15.7 15.6 +# [looped:] 15.9 15.9 +# WER on eval2000(fg) 14.3 14.1 +# [looped:] 14.6 14.4 +# Final train prob -0.066 -0.067 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.938 +# Final valid prob (xent) -1.0279 -1.0473 + + # run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but # trying the change of xent_regularize from 0.025 (which was an # unusual value) to the more usual 0.01. diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh new file mode 100755 index 00000000000..b12be22ce3d --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -0,0 +1,279 @@ +#!/bin/bash + +# 1h is like 1e, but reducing the hidden-dims from 1024 to 880. + +# Does not seem to help; both train and valid probs get worse by about +# the same amount, and WER is overall just slightly worse. Maybe 1024 +# was approximately optimal. + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1h_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1h_sp +# WER on train_dev(tg) 12.74 13.06 +# [looped:] 12.93 13.17 +# WER on train_dev(fg) 11.70 12.13 +# [looped:] 12.09 12.27 +# WER on eval2000(tg) 15.7 15.7 +# [looped:] 15.9 15.9 +# WER on eval2000(fg) 14.3 14.4 +# [looped:] 14.6 14.5 +# Final train prob -0.066 -0.069 +# Final valid prob -0.087 -0.091 +# Final train prob (xent) -0.931 -0.967 +# Final valid prob (xent) -1.0279 -1.0631 + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1h # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir=exp/chain/tdnn_lstm_1d_sp/egs + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=880 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=880 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=880 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=880 recurrent-projection-dim=220 non-recurrent-projection-dim=220 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=880 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=880 + fast-lstmp-layer name=fastlstm2 cell-dim=880 recurrent-projection-dim=220 non-recurrent-projection-dim=220 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=880 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=880 + fast-lstmp-layer name=fastlstm3 cell-dim=880 recurrent-projection-dim=220 non-recurrent-projection-dim=220 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh new file mode 100755 index 00000000000..7e05834c1fb --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -0,0 +1,300 @@ +#!/bin/bash + +# run_tdnn_lstm_1i.sh is like run_tdnn_lstm_1{e,f}.sh but +# with a different frames-per-iter: 2 million, vs. 1.5 million +# (1e) and 1 million (1f) + +# Results are inconclusive regarding comparison with 1e: it's [0.3 worse, 0.1 +# better, 0.2 worse, same, 0.2 better, 0.2 better, 0.3 better, 0.3 better] on +# the different conditions. There is less train/valid difference and worse +# train prob [the trends of valid and train probs are consistent as we change +# the frames-per-iter]. + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1{e,f,i}_sp 2>/dev/null +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp tdnn_lstm_1i_sp +# WER on train_dev(tg) 12.74 13.23 13.08 +# [looped:] 12.93 13.27 13.00 +# WER on train_dev(fg) 11.70 12.17 11.97 +# [looped:] 12.09 12.42 12.08 +# WER on eval2000(tg) 15.7 16.1 15.5 +# [looped:] 15.9 16.2 15.7 +# WER on eval2000(fg) 14.3 14.6 14.0 +# [looped:] 14.6 14.7 14.3 +# Final train prob -0.066 -0.065 -0.069 +# Final valid prob -0.087 -0.090 -0.088 +# Final train prob (xent) -0.931 -0.916 -0.947 +# Final valid prob (xent) -1.0279 -1.0359 -1.0419 + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + +# WER is worse but this seems to be due to more complete optimization +# (train better, valid worse). Looks like we may be overtraining. +# +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# WER on train_dev(tg) 12.74 13.23 +# [looped:] 12.93 13.27 +# WER on train_dev(fg) 11.70 12.17 +# [looped:] 12.09 12.42 +# WER on eval2000(tg) 15.7 16.1 +# [looped:] 15.9 16.2 +# WER on eval2000(fg) 14.3 14.6 +# [looped:] 14.6 14.7 +# Final train prob -0.066 -0.065 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.916 +# Final valid prob (xent) -1.0279 -1.0359 + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1i # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b1{1,2,3,4}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 2000000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/wsj/s5/steps/info/chain_dir_info.pl b/egs/wsj/s5/steps/info/chain_dir_info.pl index 1d659b89c89..b0adb7e498c 100755 --- a/egs/wsj/s5/steps/info/chain_dir_info.pl +++ b/egs/wsj/s5/steps/info/chain_dir_info.pl @@ -136,7 +136,7 @@ sub get_combine_info { while () { if (m/Combining nnets, objective function changed from (\S+) to (\S+)/) { close(F); - return sprintf(" combine=%.2f->%.2f", $1, $2); + return sprintf(" combine=%.3f->%.3f", $1, $2); } } } diff --git a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py index fde8ae65461..f28aa89774e 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py @@ -543,19 +543,24 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_st models_to_combine.add(num_iters) + # TODO: if it turns out the sum-to-one-penalty code is not useful, + # remove support for it. + for iter in sorted(models_to_combine): model_file = '{0}/{1}.mdl'.format(dir, iter) if os.path.exists(model_file): - raw_model_strings.append( - '"nnet3-am-copy --raw=true {0} -|"'.format(model_file)) + # we used to copy them with nnet3-am-copy --raw=true, but now + # the raw-model-reading code discards the other stuff itself. + raw_model_strings.append(model_file) else: print("{0}: warning: model file {1} does not exist " "(final combination)".format(sys.argv[0], model_file)) common_lib.run_job( """{command} {combine_queue_opt} {dir}/log/combine.log \ - nnet3-chain-combine --num-iters=80 \ + nnet3-chain-combine --num-iters={opt_iters} \ --l2-regularize={l2} --leaky-hmm-coefficient={leaky} \ + --separate-weights-per-component={separate_weights} \ --enforce-sum-to-one={hard_enforce} \ --sum-to-one-penalty={penalty} \ --enforce-positive-weights=true \ @@ -568,6 +573,8 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_st {dir}/final.mdl""".format( command=run_opts.command, combine_queue_opt=run_opts.combine_queue_opt, + opt_iters=(20 if sum_to_one_penalty <= 0 else 80), + separate_weights=(sum_to_one_penalty > 0), lc=left_context, rc=right_context, l2=l2_regularize, leaky=leaky_hmm_coefficient, dir=dir, raw_models=" ".join(raw_model_strings), From c747ed5d51687003f995f859b449cb64dc0fc0c7 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Mon, 6 Feb 2017 14:54:28 -0500 Subject: [PATCH 402/530] [build] fixing issue introduced in the previous win commit (#1399) --- windows/get_version.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/windows/get_version.pl b/windows/get_version.pl index 2a54891516a..f66a3a23c25 100755 --- a/windows/get_version.pl +++ b/windows/get_version.pl @@ -39,6 +39,6 @@ }; my $kaldi_ver=<$F>; chomp $kaldi_ver; -print $H "KALDI_VERSION=${kaldi_ver}-win\n"; +print $H "#define KALDI_VERSION \"${kaldi_ver}-win\"\n"; close($F); close($H); From 9156d29387c370384ef0b4f92e78be90942b5bfb Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 6 Feb 2017 16:02:35 -0500 Subject: [PATCH 403/530] asr_diarization: Update simple hmm --- src/simplehmm/Makefile | 2 +- src/simplehmm/simple-hmm-utils.cc | 2 +- src/simplehmm/simple-hmm-utils.h | 2 +- src/simplehmm/simple-hmm.cc | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/simplehmm/Makefile b/src/simplehmm/Makefile index 89c9f70a8c3..d83fba05900 100644 --- a/src/simplehmm/Makefile +++ b/src/simplehmm/Makefile @@ -5,7 +5,7 @@ include ../kaldi.mk TESTFILES = simple-hmm-test -OBJFILES = simple-hmm.o simple-hmm-utils.o simple-hmm-graph-compiler.o +OBJFILES = simple-hmm.o simple-hmm-utils.o LIBNAME = kaldi-simplehmm ADDLIBS = ../hmm/kaldi-hmm.a ../decoder/kaldi-decoder.a \ diff --git a/src/simplehmm/simple-hmm-utils.cc b/src/simplehmm/simple-hmm-utils.cc index 3406b7b56f8..fc0c7e4ca3c 100644 --- a/src/simplehmm/simple-hmm-utils.cc +++ b/src/simplehmm/simple-hmm-utils.cc @@ -20,7 +20,7 @@ #include -#include "hmm/simple-hmm-utils.h" +#include "simplehmm/simple-hmm-utils.h" #include "fst/fstlib.h" #include "fstext/fstext-lib.h" diff --git a/src/simplehmm/simple-hmm-utils.h b/src/simplehmm/simple-hmm-utils.h index bd0a3a15702..5bdf185214a 100644 --- a/src/simplehmm/simple-hmm-utils.h +++ b/src/simplehmm/simple-hmm-utils.h @@ -22,7 +22,7 @@ #define KALDI_HMM_SIMPLE_HMM_UTILS_H_ #include "hmm/hmm-utils.h" -#include "hmm/simple-hmm.h" +#include "simplehmm/simple-hmm.h" #include "fst/fstlib.h" namespace kaldi { diff --git a/src/simplehmm/simple-hmm.cc b/src/simplehmm/simple-hmm.cc index 2db6bfbf297..e0e7442ead3 100644 --- a/src/simplehmm/simple-hmm.cc +++ b/src/simplehmm/simple-hmm.cc @@ -17,7 +17,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#include "hmm/simple-hmm.h" +#include "simplehmm/simple-hmm.h" namespace kaldi { From 2d13d907f35412dc0f2844fdad74a45bcbbd37e0 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 6 Feb 2017 16:02:55 -0500 Subject: [PATCH 404/530] asr_diarization: Update cluster-utils --- src/tree/cluster-utils.cc | 6 +++--- src/tree/cluster-utils.h | 40 +++++++++++++++++++++++---------------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/tree/cluster-utils.cc b/src/tree/cluster-utils.cc index 965eb104d9e..aa9ae46bc01 100644 --- a/src/tree/cluster-utils.cc +++ b/src/tree/cluster-utils.cc @@ -273,7 +273,7 @@ void BottomUpClusterer::SetInitialDistances() { for (int32 i = 0; i < npoints_; i++) { for (int32 j = 0; j < i; j++) { BaseFloat dist = ComputeDistance(i, j); - if (dist <= max_merge_thresh_) + if (dist <= MergeThreshold(i, j)) queue_.push(std::make_pair(dist, std::make_pair(static_cast(i), static_cast(j)))); if (j == i - 1) @@ -325,7 +325,7 @@ void BottomUpClusterer::ReconstructQueue() { for (int32 j = 0; j < i; j++) { if ((*clusters_)[j] != NULL) { BaseFloat dist = dist_vec_[(i * (i - 1)) / 2 + j]; - if (dist <= max_merge_thresh_) { + if (dist <= MergeThreshold(i, j)) { queue_.push(std::make_pair(dist, std::make_pair( static_cast(i), static_cast(j)))); } @@ -339,7 +339,7 @@ void BottomUpClusterer::SetDistance(int32 i, int32 j) { KALDI_ASSERT(i < npoints_ && j < i && (*clusters_)[i] != NULL && (*clusters_)[j] != NULL); BaseFloat dist = ComputeDistance(i, j); - if (dist < max_merge_thresh_) { + if (dist < MergeThreshold(i, j)) { queue_.push(std::make_pair(dist, std::make_pair(static_cast(i), static_cast(j)))); } diff --git a/src/tree/cluster-utils.h b/src/tree/cluster-utils.h index b11dfe1c031..2658cb8dfd0 100644 --- a/src/tree/cluster-utils.h +++ b/src/tree/cluster-utils.h @@ -119,9 +119,10 @@ class BottomUpClusterer { int32 min_clust, std::vector *clusters_out, std::vector *assignments_out) - : ans_(0.0), points_(points), max_merge_thresh_(max_merge_thresh), + : points_(points), max_merge_thresh_(max_merge_thresh), min_clust_(min_clust), clusters_(clusters_out != NULL? clusters_out - : &tmp_clusters_), assignments_(assignments_out != NULL ? + : &tmp_clusters_), ans_(0.0), + assignments_(assignments_out != NULL ? assignments_out : &tmp_assignments_) { nclusters_ = npoints_ = points.size(); dist_vec_.resize((npoints_ * (npoints_ - 1)) / 2); @@ -131,7 +132,6 @@ class BottomUpClusterer { ~BottomUpClusterer() { DeletePointers(&tmp_clusters_); } /// Public accessors - const Clusterable* GetCluster(int32 i) const { return (*clusters_)[i]; } BaseFloat& Distance(int32 i, int32 j) { KALDI_ASSERT(i < npoints_ && j < i); return dist_vec_[(i * (i - 1)) / 2 + j]; @@ -143,10 +143,27 @@ class BottomUpClusterer { /// Merge j into i and delete j. virtual void MergeClusters(int32 i, int32 j); + typedef std::pair > + QueueElement; + // Priority queue using greater (lowest distances are highest priority). + typedef std::priority_queue, + std::greater > QueueType; + int32 NumClusters() const { return nclusters_; } int32 NumPoints() const { return npoints_; } int32 MinClusters() const { return min_clust_; } bool IsQueueEmpty() const { return queue_.empty(); } + + protected: + const std::vector &points_; + BaseFloat max_merge_thresh_; + int32 min_clust_; + std::vector *clusters_; + + std::vector dist_vec_; + int32 nclusters_; + int32 npoints_; + QueueType queue_; private: void Renumber(); @@ -162,6 +179,10 @@ class BottomUpClusterer { return nclusters_ <= min_clust_ || queue_.empty(); } + virtual BaseFloat MergeThreshold(int32 i, int32 j) { + return max_merge_thresh_; + } + void SetDistance(int32 i, int32 j); virtual BaseFloat ComputeDistance(int32 i, int32 j) { BaseFloat dist = (*clusters_)[i]->Distance(*((*clusters_)[j])); @@ -170,23 +191,10 @@ class BottomUpClusterer { } BaseFloat ans_; - const std::vector &points_; - BaseFloat max_merge_thresh_; - int32 min_clust_; - std::vector *clusters_; std::vector *assignments_; std::vector tmp_clusters_; std::vector tmp_assignments_; - - std::vector dist_vec_; - int32 nclusters_; - int32 npoints_; - typedef std::pair > QueueElement; - // Priority queue using greater (lowest distances are highest priority). - typedef std::priority_queue, - std::greater > QueueType; - QueueType queue_; }; /** This is a wrapper function to the BottomUpClusterer class. From e5988b726d2a840079d3399ba161f68e53a08128 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 6 Feb 2017 16:03:16 -0500 Subject: [PATCH 405/530] asr_diarization: ib clusterable --- .../information-bottleneck-cluster-utils.cc | 77 +++++++++++-------- .../information-bottleneck-cluster-utils.h | 11 ++- .../information-bottleneck-clusterable.cc | 11 ++- .../segmentation-cluster-adjacent-segments.cc | 55 +++++++++++-- 4 files changed, 114 insertions(+), 40 deletions(-) diff --git a/src/segmenter/information-bottleneck-cluster-utils.cc b/src/segmenter/information-bottleneck-cluster-utils.cc index 75fda8c59fe..5ed283da564 100644 --- a/src/segmenter/information-bottleneck-cluster-utils.cc +++ b/src/segmenter/information-bottleneck-cluster-utils.cc @@ -37,24 +37,27 @@ class InformationBottleneckBottomUpClusterer : public BottomUpClusterer { std::vector *assignments_out); private: + virtual void SetInitialDistances(); virtual BaseFloat ComputeDistance(int32 i, int32 j); virtual bool StoppingCriterion() const; virtual void UpdateClustererStats(int32 i, int32 j); + virtual BaseFloat MergeThreshold(int32 i, int32 j) { + if (opts_.normalize_by_count) + return max_merge_thresh_ + * ((*clusters_)[i]->Normalizer() + (*clusters_)[j]->Normalizer()); + else if (opts_.normalize_by_entropy) + return -max_merge_thresh_ * (*clusters_)[i]->ObjfPlus(*(*clusters_)[j]); + else + return max_merge_thresh_; + } + BaseFloat NormalizedMutualInformation() const { return ((merged_entropy_ - current_entropy_) / (merged_entropy_ - initial_entropy_)); } - /// Stop merging when the stopping criterion, e.g. NMI, reaches this - /// threshold. - BaseFloat stopping_threshold_; - - /// Weight of the relevant variables entropy towards the objective. - BaseFloat relevance_factor_; - - /// Weight of the input variables entropy towards the objective. - BaseFloat input_factor_; + const InformationBottleneckClustererOptions &opts_; /// Running entropy of the clusters. BaseFloat current_entropy_; @@ -75,9 +78,7 @@ InformationBottleneckBottomUpClusterer::InformationBottleneckBottomUpClusterer( std::vector *assignments_out) : BottomUpClusterer(points, max_merge_thresh, min_clusters, clusters_out, assignments_out), - stopping_threshold_(opts.stopping_threshold), - relevance_factor_(opts.relevance_factor), - input_factor_(opts.input_factor), + opts_(opts), current_entropy_(0.0), initial_entropy_(0.0), merged_entropy_(0.0) { if (points.size() == 0) return; @@ -96,34 +97,50 @@ InformationBottleneckBottomUpClusterer::InformationBottleneckBottomUpClusterer( current_entropy_ = initial_entropy_; } +void InformationBottleneckBottomUpClusterer::SetInitialDistances() { + for (int32 i = 0; i < npoints_; i++) { + for (int32 j = 0; j < i; j++) { + BaseFloat dist = ComputeDistance(i, j); + if (dist <= MergeThreshold(i, j)) { + queue_.push(std::make_pair( + dist, std::make_pair(static_cast(i), + static_cast(j)))); + } + if (j == i - 1) + KALDI_VLOG(2) << "Distance(" << i << ", " << j << ") = " << dist; + } + } +} + BaseFloat InformationBottleneckBottomUpClusterer::ComputeDistance( int32 i, int32 j) { const InformationBottleneckClusterable* cluster_i - = static_cast(GetCluster(i)); + = static_cast((*clusters_)[i]); const InformationBottleneckClusterable* cluster_j - = static_cast(GetCluster(j)); + = static_cast((*clusters_)[j]); - BaseFloat dist = (cluster_i->Distance(*cluster_j, relevance_factor_, - input_factor_)); + BaseFloat dist = (cluster_i->Distance(*cluster_j, opts_.relevance_factor, + opts_.input_factor)); // / (cluster_i->Normalizer() + cluster_j->Normalizer())); Distance(i, j) = dist; // set the distance in the array. return dist; } bool InformationBottleneckBottomUpClusterer::StoppingCriterion() const { - bool flag = (NumClusters() <= MinClusters() || IsQueueEmpty() || - NormalizedMutualInformation() < stopping_threshold_); + bool flag = (nclusters_ <= min_clust_ || queue_.empty() || + NormalizedMutualInformation() < opts_.stopping_threshold); if (GetVerboseLevel() < 2 || !flag) return flag; - if (NormalizedMutualInformation() < stopping_threshold_) { - KALDI_VLOG(2) << "Stopping at " << NumClusters() << " clusters " + if (NormalizedMutualInformation() < opts_.stopping_threshold) { + KALDI_VLOG(2) << "Stopping at " << nclusters_ << " clusters " << "because NMI = " << NormalizedMutualInformation() - << " < stopping_threshold (" << stopping_threshold_ << ")"; - } else if (NumClusters() < MinClusters()) { - KALDI_VLOG(2) << "Stopping at " << NumClusters() << " clusters " - << "<= min-clusters (" << MinClusters() << ")"; - } else if (IsQueueEmpty()) { - KALDI_VLOG(2) << "Stopping at " << NumClusters() << " clusters " + << " < stopping_threshold (" + << opts_.stopping_threshold << ")"; + } else if (nclusters_ < min_clust_) { + KALDI_VLOG(2) << "Stopping at " << nclusters_ << " clusters " + << "<= min-clusters (" << min_clust_ << ")"; + } else if (queue_.empty()) { + KALDI_VLOG(2) << "Stopping at " << nclusters_ << " clusters " << "because queue is empty."; } @@ -133,12 +150,12 @@ bool InformationBottleneckBottomUpClusterer::StoppingCriterion() const { void InformationBottleneckBottomUpClusterer::UpdateClustererStats( int32 i, int32 j) { const InformationBottleneckClusterable* cluster_i - = static_cast(GetCluster(i)); - current_entropy_ += cluster_i->Distance(*GetCluster(j), 1.0, 0.0); + = static_cast((*clusters_)[i]); + current_entropy_ += cluster_i->Distance(*(*clusters_)[j], 1.0, 0.0); if (GetVerboseLevel() > 2) { const InformationBottleneckClusterable* cluster_j - = static_cast(GetCluster(j)); + = static_cast((*clusters_)[j]); std::vector cluster_i_points; { std::map::const_iterator it @@ -158,7 +175,7 @@ void InformationBottleneckBottomUpClusterer::UpdateClustererStats( << "(" << cluster_i_points << ", " << cluster_j_points << ").. distance=" << Distance(i, j) - << ", num-clusters-after-merge= " << NumClusters() - 1 + << ", num-clusters-after-merge= " << nclusters_ - 1 << ", NMI= " << NormalizedMutualInformation(); } } diff --git a/src/segmenter/information-bottleneck-cluster-utils.h b/src/segmenter/information-bottleneck-cluster-utils.h index 58f1e4f380a..82b5c285c65 100644 --- a/src/segmenter/information-bottleneck-cluster-utils.h +++ b/src/segmenter/information-bottleneck-cluster-utils.h @@ -33,10 +33,13 @@ struct InformationBottleneckClustererOptions { BaseFloat stopping_threshold; BaseFloat relevance_factor; BaseFloat input_factor; + bool normalize_by_count; + bool normalize_by_entropy; InformationBottleneckClustererOptions() : distance_threshold(std::numeric_limits::max()), num_clusters(1), - stopping_threshold(0.3), relevance_factor(1.0), input_factor(0.1) { } + stopping_threshold(0.3), relevance_factor(1.0), input_factor(0.1), + normalize_by_count(false), normalize_by_entropy(false) { } void Register(OptionsItf *opts) { @@ -49,6 +52,12 @@ struct InformationBottleneckClustererOptions { opts->Register("input-factor", &input_factor, "Weight factor of the entropy of input variables " "in the objective function"); + opts->Register("normalize-by-count", &normalize_by_count, + "If provided, normalizes the score (distance) by " + "the count post-merge."); + opts->Register("normalize-by-entropy", &normalize_by_entropy, + "If provided, normalizes the score (distance) by " + "the entropy post-merge."); } }; diff --git a/src/segmenter/information-bottleneck-clusterable.cc b/src/segmenter/information-bottleneck-clusterable.cc index 7817f7cfdc6..05850c1eebc 100644 --- a/src/segmenter/information-bottleneck-clusterable.cc +++ b/src/segmenter/information-bottleneck-clusterable.cc @@ -68,7 +68,9 @@ void InformationBottleneckClusterable::Add(const Clusterable &other_in) { it != other->counts_.end(); ++it) { std::map::iterator hint_it = counts_.lower_bound( it->first); - KALDI_ASSERT (hint_it == counts_.end() || hint_it->first != it->first); + if (hint_it != counts_.end() && hint_it->first == it->first) { + KALDI_ERR << "Duplicate segment id " << it->first; + } counts_.insert(hint_it, *it); } @@ -205,8 +207,11 @@ BaseFloat InformationBottleneckClusterable::Distance( KALDI_ASSERT(relevance_divergence > -1e-4); KALDI_ASSERT(input_divergence > -1e-4); - return (normalizer * (relevance_factor * relevance_divergence - - input_factor * input_divergence)); + + double ans = (normalizer * (relevance_factor * relevance_divergence + - input_factor * input_divergence)); + KALDI_ASSERT(input_factor != 0.0 || ans > -1e-4); + return ans; } BaseFloat KLDivergence(const VectorBase &p1, diff --git a/src/segmenterbin/segmentation-cluster-adjacent-segments.cc b/src/segmenterbin/segmentation-cluster-adjacent-segments.cc index 812785ac5e6..fde13cd7ead 100644 --- a/src/segmenterbin/segmentation-cluster-adjacent-segments.cc +++ b/src/segmenterbin/segmentation-cluster-adjacent-segments.cc @@ -78,21 +78,59 @@ int32 ClusterAdjacentSegments(const MatrixBase &feats, BaseFloat var_floor, int32 length_tolerance, Segmentation *segmentation) { - if (segmentation->Dim() == 1) { - segmentation->Begin()->SetLabel(1); + if (segmentation->Dim() <= 3) { + // Very unusual case. + // TODO: Do something more reasonable. return 1; } + SegmentList::iterator it = segmentation->Begin(), next_it = segmentation->Begin(); ++next_it; + + // Vector storing for each segment, whether there is a change point at the + // beginning of the segment. + std::vector is_change_point(segmentation->Dim(), false); + is_change_point[0] = true; + + Vector distances(segmentation->Dim() - 1); + int32 i = 0; + + for (; next_it != segmentation->End(); ++it, ++next_it, i++) { + // Distance between segment i and i + 1 + distances(i) = Distance(*it, *next_it, feats, + var_floor, length_tolerance); + + if (i > 2) { + if (distances(i-1) - distances(i-2) > delta_distance_threshold && + distances(i) - distances(i-1) < -delta_distance_threshold) { + is_change_point[i-1] = true; + } + } else { + if (distances(i) - distances(i-1) > absolute_distance_threshold) + is_change_point[i] = true; + } + } + + int32 num_classes = 0; + for (i = 0, it = segmentation->Begin(); + it != segmentation->End(); ++it, i++) { + if (is_change_point[i]) { + num_classes++; + } + it->SetLabel(num_classes); + } + return num_classes; + /* BaseFloat prev_dist = Distance(*it, *next_it, feats, var_floor, length_tolerance); if (segmentation->Dim() == 2) { it->SetLabel(1); - if (prev_dist < absolute_distance_threshold * feats.NumCols()) { + if (prev_dist < absolute_distance_threshold * feats.NumCols() + && next_it->start_frame <= it->end_frame) { // Similar segments merged. next_it->SetLabel(it->Label()); } else { @@ -103,6 +141,10 @@ int32 ClusterAdjacentSegments(const MatrixBase &feats, return next_it->Label();; } + // The algorithm is a simple peak detection. + // Consider three segments that are pointed by the iterators + // prev_it, it, next_it. + // If Distance(prev_it, it) > Consider ++it; ++next_it; bool next_segment_is_new_cluster = false; @@ -162,6 +204,7 @@ int32 ClusterAdjacentSegments(const MatrixBase &feats, } return it->Label(); + */ } } // end segmenter @@ -186,7 +229,7 @@ int main(int argc, char *argv[]) { int32 length_tolerance = 2; BaseFloat var_floor = 0.01; BaseFloat absolute_distance_threshold = 3.0; - BaseFloat delta_distance_threshold = 0.2; + BaseFloat delta_distance_threshold = 0.0002; ParseOptions po(usage); @@ -203,8 +246,8 @@ int main(int argc, char *argv[]) { "Maximum per-dim distance below which segments will not be " "be merged."); po.Register("delta-distance-threshold", &delta_distance_threshold, - "If the delta-distance is below this value, then the " - "adjacent segments will not be merged."); + "If the delta-distance is below this value, then it will " + "be treated as 0."); po.Read(argc, argv); From 9a86fc0c5ad9a947df6929ce5af6792587d4cdc2 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 6 Feb 2017 16:03:42 -0500 Subject: [PATCH 406/530] asr_diarization: init-models-from-feats --- src/segmenterbin/Makefile | 3 ++- .../gmm-global-init-models-from-feats.cc | 13 +++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/segmenterbin/Makefile b/src/segmenterbin/Makefile index a424f192d3e..6e0036c6fb7 100644 --- a/src/segmenterbin/Makefile +++ b/src/segmenterbin/Makefile @@ -21,7 +21,8 @@ BINFILES = segmentation-copy segmentation-get-stats \ agglomerative-cluster-ib \ intersect-int-vectors \ gmm-global-init-models-from-feats \ - segmentation-cluster-adjacent-segments #\ + segmentation-cluster-adjacent-segments \ + ib-scoring-dense #\ gmm-acc-pdf-stats-segmentation \ gmm-est-segmentation gmm-update-segmentation \ segmentation-init-from-diarization \ diff --git a/src/segmenterbin/gmm-global-init-models-from-feats.cc b/src/segmenterbin/gmm-global-init-models-from-feats.cc index a472b48624c..c323306df83 100644 --- a/src/segmenterbin/gmm-global-init-models-from-feats.cc +++ b/src/segmenterbin/gmm-global-init-models-from-feats.cc @@ -65,9 +65,9 @@ void MleDiagGmmSharedVarsUpdate(const MleDiagGmmOptions &config, DiagGmm *gmm, BaseFloat *obj_change_out, BaseFloat *count_out, - int32 *floored_elements_out, - int32 *floored_gaussians_out, - int32 *removed_gaussians_out) { + int32 *floored_elements_out = NULL, + int32 *floored_gaussians_out = NULL, + int32 *removed_gaussians_out = NULL) { KALDI_ASSERT(gmm != NULL); if (flags & ~diag_gmm_acc.Flags()) @@ -213,7 +213,12 @@ void TrainOneIter(const MatrixBase &feats, << feats.NumRows() << " frames."; BaseFloat objf_change, count; - MleDiagGmmUpdate(gmm_opts, gmm_acc, kGmmAll, gmm, &objf_change, &count); + if (share_covars) { + MleDiagGmmSharedVarsUpdate(gmm_opts, gmm_acc, kGmmAll, gmm, + &objf_change, &count); + } else { + MleDiagGmmUpdate(gmm_opts, gmm_acc, kGmmAll, gmm, &objf_change, &count); + } KALDI_LOG << "Objective-function change on iteration " << iter << " was " << (objf_change / count) << " over " << count << " frames."; From 4646f14d59a0685526ee4897a806f6c75cc29177 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 6 Feb 2017 16:04:06 -0500 Subject: [PATCH 407/530] asr_diarization: Clustering script --- .../segmentation/cluster_segments_aIB.sh | 32 +++++++++---- .../cluster_segments_aIB_change_point.sh | 45 ++++++++++++++----- 2 files changed, 58 insertions(+), 19 deletions(-) diff --git a/egs/wsj/s5/steps/segmentation/cluster_segments_aIB.sh b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB.sh index a1f187fab31..7cf151f1ad0 100755 --- a/egs/wsj/s5/steps/segmentation/cluster_segments_aIB.sh +++ b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB.sh @@ -8,7 +8,7 @@ reco_nj=4 frame_shift=0.01 utt_nj=18 min_clusters=10 -stopping_threshold=0.5 +clustering_opts="--stopping-threshold=0.5 --max-merge-thresh=0.25 --normalize-by-entropy" . path.sh . utils/parse_options.sh @@ -29,7 +29,7 @@ out_data=$3 num_frames=`perl -e "print int($window / $frame_shift + 0.5)"` num_frames_overlap=`perl -e "print int($overlap/ $frame_shift + 0.5)"` -data_uniform_seg=${data}_uniform_seg_window${window}_ovlp${overlap} +data_uniform_seg=$dir/`basename ${data}`_uniform_seg_window${window}_ovlp${overlap} mkdir -p ${data_uniform_seg} @@ -41,8 +41,6 @@ if [ $stage -le 0 ]; then $cmd $dir/log/get_subsegments.log \ segmentation-init-from-segments --frame-overlap=0.015 $data/segments ark:- \| \ segmentation-split-segments --max-segment-length=$num_frames --overlap-length=$num_frames_overlap ark:- ark:- \| \ - segmentation-cluster-adjacent-segments --verbose=3 ark:- "scp:$data/feats.scp" ark:- \| \ - segmentation-post-process --merge-adjacent-segments ark:- ark:- \| \ segmentation-to-segments --frame-overlap=0.0 ark:- ark:/dev/null \ ${data_uniform_seg}/sub_segments @@ -98,16 +96,34 @@ fi seg_dir=$dir/segmentation_`basename $data_uniform_seg` if [ $stage -le 4 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/compute_scores.JOB.log \ + ib-scoring-dense --input-factor=0.0 $clustering_opts \ + --counts-rspecifier="ark,t:utils/filter_scp.pl $data_uniform_seg/split${reco_nj}reco/JOB/utt2spk $data_uniform_seg/utt2num_frames |" \ + "ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt" \ + "ark:gunzip -c $post_dir/avg_post.JOB.gz |" \ + ark,t:$seg_dir/scores.JOB.txt ark:/dev/null +fi + +if [ $stage -le 5 ]; then + threshold=$(for n in `seq $reco_nj`; do + /export/a12/vmanoha1/kaldi-diarization-v2/src/ivectorbin/compute-calibration \ + ark,t:$seg_dir/scores.$n.txt -; done | \ + awk '{i += $1; j++;} END{print i / j}') + echo $threshold > $seg_dir/threshold +fi + +threshold=$(cat $seg_dir/threshold) +if [ $stage -le 6 ]; then $cmd JOB=1:$reco_nj $seg_dir/log/cluster_segments.JOB.log \ - agglomerative-cluster-ib --min-clusters=$min_clusters \ - --verbose=3 --stopping-threshold=$stopping_threshold --input-factor=0 \ + agglomerative-cluster-ib --input-factor=0.0 --min-clusters=$min_clusters $clustering_opts \ + --max-merge-thresh=$threshold --verbose=3 \ --counts-rspecifier="ark,t:utils/filter_scp.pl $data_uniform_seg/split${reco_nj}reco/JOB/utt2spk $data_uniform_seg/utt2num_frames |" \ "ark:gunzip -c $post_dir/avg_post.JOB.gz |" \ "ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt" \ ark,t:$seg_dir/utt2cluster_id.JOB fi -if [ $stage -le 5 ]; then +if [ $stage -le 7 ]; then $cmd JOB=1:$reco_nj $seg_dir/log/init_segmentation.JOB.log \ segmentation-init-from-segments --frame-overlap=0.0 --shift-to-zero=false \ --utt2label-rspecifier=ark,t:${seg_dir}/utt2cluster_id.JOB \ @@ -120,7 +136,7 @@ if [ $stage -le 5 ]; then segmentation-to-segments ark:- ark,t:$seg_dir/utt2spk.JOB $seg_dir/segments.JOB fi -if [ $stage -le 6 ]; then +if [ $stage -le 8 ]; then rm -r $out_data || true utils/data/convert_data_dir_to_whole.sh $data $out_data rm $out_data/{text,cmvn.scp} || true diff --git a/egs/wsj/s5/steps/segmentation/cluster_segments_aIB_change_point.sh b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB_change_point.sh index a1f187fab31..9ca3efb7b9a 100755 --- a/egs/wsj/s5/steps/segmentation/cluster_segments_aIB_change_point.sh +++ b/egs/wsj/s5/steps/segmentation/cluster_segments_aIB_change_point.sh @@ -6,9 +6,10 @@ stage=-1 cmd=queue.pl reco_nj=4 frame_shift=0.01 +frame_overlap=0.0 utt_nj=18 min_clusters=10 -stopping_threshold=0.5 +clustering_opts="--stopping-threshold=0.5 --max-merge-thresh=0.25 --normalize-by-entropy" . path.sh . utils/parse_options.sh @@ -29,17 +30,19 @@ out_data=$3 num_frames=`perl -e "print int($window / $frame_shift + 0.5)"` num_frames_overlap=`perl -e "print int($overlap/ $frame_shift + 0.5)"` -data_uniform_seg=${data}_uniform_seg_window${window}_ovlp${overlap} - -mkdir -p ${data_uniform_seg} +data_id=`basename $data` +data_uniform_seg=$dir/${data_id}_uniform_seg_window${window}_ovlp${overlap} mkdir -p $dir #segmentation-cluster-adjacent-segments --verbose=0 'ark:segmentation-copy --keep-label=1 "ark:gunzip -c exp/nnet3_lstm_sad_music/nnet_lstm_1e//segmentation_bn_eval97_whole_bp/orig_segmentation.1.gz |" ark:- | segmentation-split-segments --max-segment-length=250 --overlap-length=0 ark:- ark:- |' scp:data/bn_eval97_bp_hires/feats.scp "ark:| segmentation-post-process --merge-adjacent-segments ark:- ark:- | segmentation-to-segments ark:- ark,t:- /dev/null" 2>&1 | less if [ $stage -le 0 ]; then + rm -r ${data_uniform_seg} || true + mkdir -p ${data_uniform_seg} + $cmd $dir/log/get_subsegments.log \ - segmentation-init-from-segments --frame-overlap=0.015 $data/segments ark:- \| \ + segmentation-init-from-segments --frame-overlap=$frame_overlap $data/segments ark:- \| \ segmentation-split-segments --max-segment-length=$num_frames --overlap-length=$num_frames_overlap ark:- ark:- \| \ segmentation-cluster-adjacent-segments --verbose=3 ark:- "scp:$data/feats.scp" ark:- \| \ segmentation-post-process --merge-adjacent-segments ark:- ark:- \| \ @@ -86,8 +89,6 @@ if [ $stage -le 2 ]; then fi if [ $stage -le 3 ]; then - utils/data/get_utt2num_frames.sh --nj $utt_nj --cmd "$cmd" ${data_uniform_seg} - $cmd JOB=1:$reco_nj $post_dir/log/compute_average_post.JOB.log \ gmm-global-post-to-feats \ --utt2spk="ark,t:cut -d ' ' -f 1,2 ${data_uniform_seg}/split${reco_nj}reco/JOB/segments |" \ @@ -98,16 +99,38 @@ fi seg_dir=$dir/segmentation_`basename $data_uniform_seg` if [ $stage -le 4 ]; then + utils/data/get_utt2num_frames.sh --nj $utt_nj --cmd "$cmd" ${data_uniform_seg} + + $cmd JOB=1:$reco_nj $seg_dir/log/compute_scores.JOB.log \ + ib-scoring-dense --input-factor=0 $clustering_opts \ + --counts-rspecifier="ark,t:utils/filter_scp.pl $data_uniform_seg/split${reco_nj}reco/JOB/utt2spk $data_uniform_seg/utt2num_frames |" \ + "ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt" \ + "ark:gunzip -c $post_dir/avg_post.JOB.gz |" \ + ark,t:$seg_dir/scores.JOB.txt ark:/dev/null +fi + +if [ $stage -le 5 ]; then + $cmd JOB=1:$reco_nj $seg_dir/log/calibrate.JOB.log \ + /export/a12/vmanoha1/kaldi-diarization-v2/src/ivectorbin/compute-calibration \ + ark,t:$seg_dir/scores.JOB.txt $seg_dir/threshold.JOB.txt + + threshold=$(for n in `seq $reco_nj`; do cat $seg_dir/threshold.$n.txt; done | \ + awk '{i += $1; j++;} END{print i / j}') + echo $threshold > $seg_dir/threshold +fi + +threshold=$(cat $seg_dir/threshold) +if [ $stage -le 6 ]; then $cmd JOB=1:$reco_nj $seg_dir/log/cluster_segments.JOB.log \ - agglomerative-cluster-ib --min-clusters=$min_clusters \ - --verbose=3 --stopping-threshold=$stopping_threshold --input-factor=0 \ + agglomerative-cluster-ib --input-factor=0.0 $clustering_opts \ + --max-merge-thresh=$threshold --verbose=3 \ --counts-rspecifier="ark,t:utils/filter_scp.pl $data_uniform_seg/split${reco_nj}reco/JOB/utt2spk $data_uniform_seg/utt2num_frames |" \ "ark:gunzip -c $post_dir/avg_post.JOB.gz |" \ "ark,t:${data_uniform_seg}/split${reco_nj}reco/JOB/reco2utt" \ ark,t:$seg_dir/utt2cluster_id.JOB fi -if [ $stage -le 5 ]; then +if [ $stage -le 7 ]; then $cmd JOB=1:$reco_nj $seg_dir/log/init_segmentation.JOB.log \ segmentation-init-from-segments --frame-overlap=0.0 --shift-to-zero=false \ --utt2label-rspecifier=ark,t:${seg_dir}/utt2cluster_id.JOB \ @@ -120,7 +143,7 @@ if [ $stage -le 5 ]; then segmentation-to-segments ark:- ark,t:$seg_dir/utt2spk.JOB $seg_dir/segments.JOB fi -if [ $stage -le 6 ]; then +if [ $stage -le 8 ]; then rm -r $out_data || true utils/data/convert_data_dir_to_whole.sh $data $out_data rm $out_data/{text,cmvn.scp} || true From 53e167d4e3500f9b01518209ca269598683d81ed Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 6 Feb 2017 16:48:58 -0500 Subject: [PATCH 408/530] asr_diarization: Added virtual destructor --- src/hmm/transition-model.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/hmm/transition-model.h b/src/hmm/transition-model.h index c059e319dd5..51802b37f41 100644 --- a/src/hmm/transition-model.h +++ b/src/hmm/transition-model.h @@ -130,12 +130,15 @@ class TransitionModel { /// Constructor that takes no arguments: typically used prior to calling Read. TransitionModel() { } + + virtual ~TransitionModel() { } /// Does the same things as the constructor. void Init(const ContextDependencyInterface &ctx_dep, const HmmTopology &hmm_topo); - void Read(std::istream &is, bool binary); // note, no symbol table: topo object always read/written w/o symbols. + // note, no symbol table: topo object always read/written w/o symbols. + virtual void Read(std::istream &is, bool binary); void Write(std::ostream &os, bool binary) const; @@ -319,7 +322,6 @@ class TransitionModel { /// of pdfs). int32 num_pdfs_; - DISALLOW_COPY_AND_ASSIGN(TransitionModel); }; From 21c8031456de9858d469b6f52f86e075c76dc7b1 Mon Sep 17 00:00:00 2001 From: caizexin <313284213@qq.com> Date: Wed, 8 Feb 2017 02:54:23 +0800 Subject: [PATCH 409/530] [egs] Fix to HKUST nnet2/3 scripts. (#1401) when training ubm, we should just use the 40 dimention mfcc so change the train directory for avoiding dimention mismatching this script won't get error when run after nnet2's scripts. --- egs/hkust/s5/local/nnet3/run_ivector_common.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/hkust/s5/local/nnet3/run_ivector_common.sh b/egs/hkust/s5/local/nnet3/run_ivector_common.sh index bbdb5796c22..2ef33e43081 100755 --- a/egs/hkust/s5/local/nnet3/run_ivector_common.sh +++ b/egs/hkust/s5/local/nnet3/run_ivector_common.sh @@ -71,14 +71,14 @@ fi if [ $stage -le 3 ] && [ -z $ivector_extractor ]; then steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \ --num-frames 700000 \ - data/train_hires 512 exp/nnet3/tri5 exp/nnet3/diag_ubm + data/train_hires_nopitch 512 exp/nnet3/tri5 exp/nnet3/diag_ubm fi if [ $stage -le 4 ] && [ -z $ivector_extractor ]; then # iVector extractors can in general be sensitive to the amount of data, but # this one has a fairly small dim (defaults to 100) steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ - data/train_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; + data/train_hires_nopitch exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; ivector_extractor=exp/nnet3/extractor fi From fc333ed7866970bc1f48d99f263ea5c0cfd775bd Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Wed, 8 Feb 2017 10:48:06 -0800 Subject: [PATCH 410/530] Resolve conflicts due to cross compilation changes in master (#1400) * [build]: resolving OpenFst compilation issue with gcc-6.x (#1392) * [egs] Add new graphemic system for Gale Arabic, with newer nnet scripts (#1298) * [build] Windows build: generate missing base/version.h; cosmetic changes (#1397) * [build]: Enable cross compilation, including to android. (#726) If a user has a number of tool chains installed and they do not want to use the default, they must currently edit the kaldi.mk file after running configure to change the CC, CXX, AR, AS, and RANLIB variables. This is something that should be exposed via the configure script. This patch exposes an option to set the host triple for the desired tool chain in the configure script. Building Kaldi on my Raspberry Pi boards is not particularly fast. I have been using the following patch to build kaldi executables for use on the Pi boards for the better part of a year. A typical invocation for me is something like: $ ./configure --static --atlas-root=/opt/cross/armv8hf \ --fst-root=/opt/cross/armv8hf --host=armv8-rpi3-linux-gnueabihf \ --fst-version=1.4.1 This way I can build on my much faster x86 desktop, but still run experiments on ARM. I have included support for cross compiling for ppc64le and it works for me (at least it produces binaries for ppc64le I don't have a ppc64 machine to test it). Signed-off-by: Eric B Munson * Add mk file and configure options for building for Android Building for Android requires a toolchain that can be built using the Android NDK. It works similiarly to the linux build except that it only uses clang, only supports the openBLAS math library, and requires an additional include directory for the system C++ headers. A typical configure invocation looks like: ./configure --static --openblas-root=/opt/cross/arm-linux-androideabi \ --fst-root=/opt/cross/arm-linux-androideabi \ --host=arm-linux-androideabi --fst-version=1.4.1 \ --android-includes=/opt/cross/arm-linux-androideabi/sysroot/usr/include Signed-off-by: Eric B Munson * Make pthread cancel symbols noops for Android The Android C library does not support cancelling pthreads so the symbols PTHREAD_CANCEL_STATE and pthread_setcancelstate are undefined. Because a pthread cannot be cancelled in Android, it is reasonable to make the pthread_setcancelstate() call a noop. Signed-off-by: Eric B Munson * A few small fixes for configure * Reword the clang++ requirement for android builds. --- egs/gale_arabic/README.txt | 9 +- .../s5/local/gale_data_prep_txt.sh | 2 + egs/gale_arabic/s5/local/gale_format_data.sh | 2 + egs/gale_arabic/s5/local/gale_prep_dict.sh | 1 + egs/gale_arabic/s5/local/gale_train_lms.sh | 2 + egs/gale_arabic/s5/run.sh | 24 +- egs/gale_arabic/s5b/RESULTS | 72 ++ egs/gale_arabic/s5b/cmd.sh | 15 + egs/gale_arabic/s5b/conf/decode.config | 1 + egs/gale_arabic/s5b/conf/mfcc.conf | 1 + egs/gale_arabic/s5b/conf/mfcc_hires.conf | 10 + egs/gale_arabic/s5b/conf/online_cmvn.conf | 1 + egs/gale_arabic/s5b/local/bad_segments | 10 + egs/gale_arabic/s5b/local/chain/run_tdnn.sh | 1 + .../s5b/local/chain/run_tdnn_lstm.sh | 1 + .../s5b/local/chain/tuning/run_tdnn_1a.sh | 210 ++++++ .../local/chain/tuning/run_tdnn_lstm_1a.sh | 223 ++++++ .../s5b/local/gale_data_prep_audio.sh | 32 + .../s5b/local/gale_data_prep_split.sh | 39 ++ .../s5b/local/gale_data_prep_txt.sh | 60 ++ egs/gale_arabic/s5b/local/gale_format_data.sh | 60 ++ .../s5b/local/gale_prep_grapheme_dict.sh | 41 ++ egs/gale_arabic/s5b/local/gale_train_lms.sh | 81 +++ .../s5b/local/nnet3/run_ivector_common.sh | 237 +++++++ egs/gale_arabic/s5b/local/nnet3/run_lstm.sh | 1 + egs/gale_arabic/s5b/local/nnet3/run_tdnn.sh | 1 + .../s5b/local/nnet3/tuning/run_lstm_1a.sh | 161 +++++ .../s5b/local/nnet3/tuning/run_tdnn_1a.sh | 88 +++ .../s5b/local/normalize_transcript_BW.pl | 111 +++ egs/gale_arabic/s5b/local/score.sh | 57 ++ egs/gale_arabic/s5b/local/split_wer.sh | 72 ++ egs/gale_arabic/s5b/local/test_list | 11 + egs/gale_arabic/s5b/path.sh | 5 + egs/gale_arabic/s5b/run.sh | 167 +++++ egs/gale_arabic/s5b/steps | 1 + egs/gale_arabic/s5b/utils | 1 + egs/wsj/s5/utils/build_const_arpa_lm.sh | 2 +- src/configure | 651 +++++++++++------- src/makefiles/android_openblas.mk | 42 ++ src/makefiles/cygwin.mk | 4 - src/makefiles/darwin.mk | 4 - src/makefiles/linux_atlas.mk | 4 - src/makefiles/linux_atlas_arm.mk | 4 - src/makefiles/linux_atlas_ppc64le.mk | 4 - src/makefiles/linux_clapack.mk | 4 - src/makefiles/linux_clapack_arm.mk | 4 - src/makefiles/linux_openblas.mk | 4 - src/makefiles/linux_openblas_arm.mk | 4 - src/makefiles/linux_openblas_ppc64le.mk | 4 - src/makefiles/linux_x86_64_mkl.mk | 4 - src/thread/kaldi-barrier.h | 9 + windows/INSTALL.md | 10 +- windows/get_version.pl | 44 ++ windows/variables.props.dev | 3 + 54 files changed, 2299 insertions(+), 317 deletions(-) create mode 100644 egs/gale_arabic/s5b/RESULTS create mode 100755 egs/gale_arabic/s5b/cmd.sh create mode 100644 egs/gale_arabic/s5b/conf/decode.config create mode 100644 egs/gale_arabic/s5b/conf/mfcc.conf create mode 100644 egs/gale_arabic/s5b/conf/mfcc_hires.conf create mode 100644 egs/gale_arabic/s5b/conf/online_cmvn.conf create mode 100644 egs/gale_arabic/s5b/local/bad_segments create mode 120000 egs/gale_arabic/s5b/local/chain/run_tdnn.sh create mode 120000 egs/gale_arabic/s5b/local/chain/run_tdnn_lstm.sh create mode 100755 egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh create mode 100755 egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh create mode 100755 egs/gale_arabic/s5b/local/gale_data_prep_audio.sh create mode 100755 egs/gale_arabic/s5b/local/gale_data_prep_split.sh create mode 100755 egs/gale_arabic/s5b/local/gale_data_prep_txt.sh create mode 100755 egs/gale_arabic/s5b/local/gale_format_data.sh create mode 100755 egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh create mode 100755 egs/gale_arabic/s5b/local/gale_train_lms.sh create mode 100755 egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh create mode 120000 egs/gale_arabic/s5b/local/nnet3/run_lstm.sh create mode 120000 egs/gale_arabic/s5b/local/nnet3/run_tdnn.sh create mode 100755 egs/gale_arabic/s5b/local/nnet3/tuning/run_lstm_1a.sh create mode 100755 egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh create mode 100755 egs/gale_arabic/s5b/local/normalize_transcript_BW.pl create mode 100755 egs/gale_arabic/s5b/local/score.sh create mode 100755 egs/gale_arabic/s5b/local/split_wer.sh create mode 100644 egs/gale_arabic/s5b/local/test_list create mode 100755 egs/gale_arabic/s5b/path.sh create mode 100755 egs/gale_arabic/s5b/run.sh create mode 120000 egs/gale_arabic/s5b/steps create mode 120000 egs/gale_arabic/s5b/utils create mode 100644 src/makefiles/android_openblas.mk create mode 100755 windows/get_version.pl diff --git a/egs/gale_arabic/README.txt b/egs/gale_arabic/README.txt index db436f11e8c..928fca8fdf3 100644 --- a/egs/gale_arabic/README.txt +++ b/egs/gale_arabic/README.txt @@ -10,8 +10,13 @@ GALE Phase 2 Arabic Broadcast Conversation Speech was developed by the Linguisti The data has two types of speech: conversational and report. This script trains and test on all of them and results are reported for each of them, train data is 320 hours, 9.3 hours testing -The dictionary, and scripts can be obtained from QCRI portal: http://alt.qcri.org/ +The dictionaries, and scripts can be obtained from QCRI portal: http://alt.qcri.org/ +The experiments here are based on the above corpus -s5: The experiments here are based on the above corpus +s5: Phoneme based: +s5b: Grapheme based: This is the receommended setup; including nnet3 and chain modeling + +[1] "A Complete Kaldi Recipe For Building Arabic Speech Recognition Systems", A. Ali, Y. Zhang, P. Cardinal, N. Dahak, S. Vogel, J. Glass. SLT 2014 +[2] "QCRI Advanced Transcription Systems (QATS) for the Arabic Multi-Dialect Brodcast Media Recognition: MGB-2 Challenge", S. Khurana, A. Ali. SLT 2016 \ No newline at end of file diff --git a/egs/gale_arabic/s5/local/gale_data_prep_txt.sh b/egs/gale_arabic/s5/local/gale_data_prep_txt.sh index 8e42128726f..8b93a234eec 100755 --- a/egs/gale_arabic/s5/local/gale_data_prep_txt.sh +++ b/egs/gale_arabic/s5/local/gale_data_prep_txt.sh @@ -59,3 +59,5 @@ awk '{if ($1 == "conversational") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' #rm -fr $txtdir cd $top_pwd echo data prep text succeeded + +exit 0 diff --git a/egs/gale_arabic/s5/local/gale_format_data.sh b/egs/gale_arabic/s5/local/gale_format_data.sh index 6675dd20f71..9f03b9224cf 100755 --- a/egs/gale_arabic/s5/local/gale_format_data.sh +++ b/egs/gale_arabic/s5/local/gale_format_data.sh @@ -56,3 +56,5 @@ fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \ echo gale_format_data succeeded. + +exit 0 \ No newline at end of file diff --git a/egs/gale_arabic/s5/local/gale_prep_dict.sh b/egs/gale_arabic/s5/local/gale_prep_dict.sh index b46d5d5fa29..74ef789eda7 100755 --- a/egs/gale_arabic/s5/local/gale_prep_dict.sh +++ b/egs/gale_arabic/s5/local/gale_prep_dict.sh @@ -30,3 +30,4 @@ sort -u > $dir/nonsilence_phones.txt || exit 1; echo Dictionary preparation succeeded +exit 0 diff --git a/egs/gale_arabic/s5/local/gale_train_lms.sh b/egs/gale_arabic/s5/local/gale_train_lms.sh index 838e7a26136..1b5d4665a19 100755 --- a/egs/gale_arabic/s5/local/gale_train_lms.sh +++ b/egs/gale_arabic/s5/local/gale_train_lms.sh @@ -112,3 +112,5 @@ fi echo train lm succeeded + +exit 0 \ No newline at end of file diff --git a/egs/gale_arabic/s5/run.sh b/egs/gale_arabic/s5/run.sh index f04e2cd8716..7c1da835ef0 100755 --- a/egs/gale_arabic/s5/run.sh +++ b/egs/gale_arabic/s5/run.sh @@ -1,10 +1,12 @@ -#!/bin/bash +#!/bin/bash + +set -e # Copyright 2014 QCRI (author: Ahmed Ali) # Apache 2.0 -. ./path.sh -. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. +. path.sh +. cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. nJobs=120 nDecodeJobs=40 @@ -21,6 +23,8 @@ nDecodeJobs=40 #This is CLSP configuration. We add the 2014 GALE data. We got around 2 % #improvement just by including it. The gain might be large if someone would tweak # the number of leaves and states and so on. + +#Make sure you edit this section to reflect whers you keep the LDC data on your cluster audio=( /data/sls/scratch/amali/data/GALE/LDC2013S02 /data/sls/scratch/amali/data/GALE/LDC2013S07 @@ -42,25 +46,25 @@ galeData=GALE # By copying and pasting into your shell. #copy the audio files to local folder wav and convet flac files to wav -local/gale_data_prep_audio.sh "${audio[@]}" $galeData +local/gale_data_prep_audio.sh "${audio[@]}" $galeData || exit 1; #get the transcription and remove empty prompts and all noise markers -local/gale_data_prep_txt.sh "${text[@]}" $galeData +local/gale_data_prep_txt.sh "${text[@]}" $galeData || exit 1; # split the data to reports and conversational and for each class will have rain/dev and test -local/gale_data_prep_split.sh $galeData +local/gale_data_prep_split.sh $galeData || exit 1; # get QCRI dictionary and add silence and UN -local/gale_prep_dict.sh +local/gale_prep_dict.sh || exit 1; #prepare the langauge resources -utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang +utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang || exit 1; # LM training -local/gale_train_lms.sh +local/gale_train_lms.sh || exit 1; -local/gale_format_data.sh +local/gale_format_data.sh || exit 1; # G compilation, check LG composition # Now make MFCC features. diff --git a/egs/gale_arabic/s5b/RESULTS b/egs/gale_arabic/s5b/RESULTS new file mode 100644 index 00000000000..2260a106654 --- /dev/null +++ b/egs/gale_arabic/s5b/RESULTS @@ -0,0 +1,72 @@ +## +# This file is generated using local/split_wer.sh $galeData //galeData is a local folder to keep intermediate gale data +# look at the end of run.sh in the same folder +## +##### RESULTS generated by amali at 2017-01-01-08-05-59 + +Report Results WER: +%WER 9.50 [ 2124 / 22363, 160 ins, 275 del, 1689 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_report_9 +%WER 10.72 [ 2398 / 22363, 163 ins, 313 del, 1922 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_report_9 +%WER 12.04 [ 2693 / 22363, 226 ins, 271 del, 2196 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_report_9 +%WER 12.29 [ 2749 / 22363, 273 ins, 266 del, 2210 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_report_10 +%WER 17.82 [ 3986 / 22363, 315 ins, 618 del, 3053 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_report_12 +%WER 18.15 [ 4059 / 22363, 335 ins, 589 del, 3135 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_report_11 +%WER 18.42 [ 4119 / 22363, 346 ins, 590 del, 3183 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_report_11 +%WER 18.69 [ 4179 / 22363, 304 ins, 640 del, 3235 sub ] exp/sgmm_5a_mmi_b0.1/decode2/wer_report_13 +%WER 19.06 [ 4263 / 22363, 348 ins, 611 del, 3304 sub ] exp/sgmm_5a_mmi_b0.1/decode1/wer_report_12 +%WER 19.24 [ 4302 / 22363, 315 ins, 580 del, 3407 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_report_12 +%WER 19.37 [ 4331 / 22363, 319 ins, 553 del, 3459 sub ] exp/tri2b_mmi/decode_it4/wer_report_12 +%WER 19.61 [ 4386 / 22363, 348 ins, 563 del, 3475 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_report_12 +%WER 19.71 [ 4408 / 22363, 301 ins, 607 del, 3500 sub ] exp/tri2b_mmi/decode_it3/wer_report_13 +%WER 19.81 [ 4429 / 22363, 349 ins, 667 del, 3413 sub ] exp/sgmm_5a/decode/wer_report_14 +%WER 20.14 [ 4503 / 22363, 399 ins, 647 del, 3457 sub ] exp/tri2b_mpe/decode_it4/wer_report_14 +%WER 20.58 [ 4603 / 22363, 408 ins, 658 del, 3537 sub ] exp/tri2b_mpe/decode_it3/wer_report_14 +%WER 21.64 [ 4839 / 22363, 498 ins, 614 del, 3727 sub ] exp/tri3b/decode/wer_report_17 +%WER 23.32 [ 5214 / 22363, 470 ins, 727 del, 4017 sub ] exp/tri2b/decode/wer_report_16 +%WER 23.54 [ 5265 / 22363, 444 ins, 794 del, 4027 sub ] exp/tri3b/decode.si/wer_report_17 +%WER 25.66 [ 5738 / 22363, 478 ins, 838 del, 4422 sub ] exp/tri2a/decode/wer_report_14 +%WER 26.38 [ 5900 / 22363, 435 ins, 929 del, 4536 sub ] exp/tri1/decode/wer_report_15 +Conversational Results WER: +%WER 21.59 [ 10213 / 47305, 944 ins, 3092 del, 6177 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_conversational_9 +%WER 24.77 [ 11716 / 47305, 1098 ins, 3579 del, 7039 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_conversational_9 +%WER 26.78 [ 12670 / 47305, 1741 ins, 2434 del, 8495 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_conversational_9 +%WER 27.55 [ 13032 / 47305, 1800 ins, 2666 del, 8566 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_conversational_11 +%WER 34.10 [ 16133 / 47305, 1903 ins, 3245 del, 10985 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_conversational_11 +%WER 34.81 [ 16466 / 47305, 2077 ins, 3037 del, 11352 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_conversational_10 +%WER 35.19 [ 16648 / 47305, 1933 ins, 3264 del, 11451 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_conversational_11 +%WER 35.63 [ 16857 / 47305, 1988 ins, 3247 del, 11622 sub ] exp/sgmm_5a_mmi_b0.1/decode2/wer_conversational_11 +%WER 36.23 [ 17137 / 47305, 2091 ins, 3256 del, 11790 sub ] exp/sgmm_5a_mmi_b0.1/decode1/wer_conversational_11 +%WER 37.40 [ 17691 / 47305, 2150 ins, 3362 del, 12179 sub ] exp/sgmm_5a/decode/wer_conversational_12 +%WER 37.95 [ 17951 / 47305, 1738 ins, 3892 del, 12321 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_conversational_11 +%WER 37.97 [ 17960 / 47305, 1890 ins, 4212 del, 11858 sub ] exp/tri2b_mpe/decode_it4/wer_conversational_13 +%WER 38.16 [ 18050 / 47305, 1678 ins, 4083 del, 12289 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_conversational_12 +%WER 38.47 [ 18200 / 47305, 1804 ins, 3698 del, 12698 sub ] exp/tri2b_mmi/decode_it4/wer_conversational_11 +%WER 38.50 [ 18213 / 47305, 1958 ins, 4156 del, 12099 sub ] exp/tri2b_mpe/decode_it3/wer_conversational_13 +%WER 38.51 [ 18215 / 47305, 1993 ins, 3476 del, 12746 sub ] exp/tri2b_mmi/decode_it3/wer_conversational_11 +%WER 39.26 [ 18574 / 47305, 2319 ins, 3963 del, 12292 sub ] exp/tri3b/decode/wer_conversational_17 +%WER 41.40 [ 19586 / 47305, 2140 ins, 4216 del, 13230 sub ] exp/tri3b/decode.si/wer_conversational_15 +%WER 42.23 [ 19979 / 47305, 2153 ins, 4354 del, 13472 sub ] exp/tri2b/decode/wer_conversational_15 +%WER 45.92 [ 21724 / 47305, 1995 ins, 5213 del, 14516 sub ] exp/tri2a/decode/wer_conversational_14 +%WER 46.86 [ 22166 / 47305, 2212 ins, 4819 del, 15135 sub ] exp/tri1/decode/wer_conversational_13 +Combined Results for Reports and Conversational WER: +%WER 17.64 [ 12286 / 69668, 1310 ins, 2807 del, 8169 sub ] exp/chain_cleaned/tdnn_lstm1a_sp_bi/decode/wer_8 +%WER 20.26 [ 14114 / 69668, 1261 ins, 3892 del, 8961 sub ] exp/chain_cleaned/tdnn1b_sp_bi/decode/wer_9 +%WER 22.05 [ 15363 / 69668, 1967 ins, 2705 del, 10691 sub ] exp/nnet3_cleaned/lstm_ld5_sp/decode/wer_9 +%WER 22.66 [ 15786 / 69668, 2047 ins, 2955 del, 10784 sub ] exp/nnet3_cleaned/tdnn_sp/decode/wer_11 +%WER 28.89 [ 20127 / 69668, 2244 ins, 3829 del, 14054 sub ] exp/sgmm_5a_mmi_b0.1/decode/wer_11 +%WER 29.48 [ 20541 / 69668, 2243 ins, 3860 del, 14438 sub ] exp/sgmm_5a_mmi_b0.1/decode4/wer_11 +%WER 29.81 [ 20767 / 69668, 2279 ins, 3854 del, 14634 sub ] exp/sgmm_5a_mmi_b0.1/decode3/wer_11 +%WER 30.22 [ 21056 / 69668, 2165 ins, 4095 del, 14796 sub ] exp/sgmm_5a_mmi_b0.1/decode2/wer_12 +%WER 30.74 [ 21417 / 69668, 2273 ins, 4099 del, 15045 sub ] exp/sgmm_5a_mmi_b0.1/decode1/wer_12 +%WER 31.78 [ 22142 / 69668, 2547 ins, 3990 del, 15605 sub ] exp/sgmm_5a/decode/wer_12 +%WER 31.95 [ 22259 / 69668, 2092 ins, 4413 del, 15754 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_11 +%WER 32.20 [ 22436 / 69668, 2026 ins, 4646 del, 15764 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_12 +%WER 32.25 [ 22471 / 69668, 2315 ins, 4797 del, 15359 sub ] exp/tri2b_mpe/decode_it4/wer_13 +%WER 32.36 [ 22542 / 69668, 2156 ins, 4184 del, 16202 sub ] exp/tri2b_mmi/decode_it4/wer_11 +%WER 32.50 [ 22640 / 69668, 2393 ins, 3956 del, 16291 sub ] exp/tri2b_mmi/decode_it3/wer_11 +%WER 32.79 [ 22847 / 69668, 2407 ins, 4760 del, 15680 sub ] exp/tri2b_mpe/decode_it3/wer_13 +%WER 33.61 [ 23413 / 69668, 2817 ins, 4577 del, 16019 sub ] exp/tri3b/decode/wer_17 +%WER 35.73 [ 24894 / 69668, 2630 ins, 4944 del, 17320 sub ] exp/tri3b/decode.si/wer_15 +%WER 36.17 [ 25196 / 69668, 2429 ins, 5393 del, 17374 sub ] exp/tri2b/decode/wer_16 +%WER 39.42 [ 27462 / 69668, 2473 ins, 6051 del, 18938 sub ] exp/tri2a/decode/wer_14 +%WER 40.35 [ 28113 / 69668, 2713 ins, 5635 del, 19765 sub ] exp/tri1/decode/wer_13 diff --git a/egs/gale_arabic/s5b/cmd.sh b/egs/gale_arabic/s5b/cmd.sh new file mode 100755 index 00000000000..71dd849a93b --- /dev/null +++ b/egs/gale_arabic/s5b/cmd.sh @@ -0,0 +1,15 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +export train_cmd="queue.pl --mem 2G" +export decode_cmd="queue.pl --mem 4G" +export mkgraph_cmd="queue.pl --mem 8G" diff --git a/egs/gale_arabic/s5b/conf/decode.config b/egs/gale_arabic/s5b/conf/decode.config new file mode 100644 index 00000000000..6f503eab35e --- /dev/null +++ b/egs/gale_arabic/s5b/conf/decode.config @@ -0,0 +1 @@ +link decode_dnn.config \ No newline at end of file diff --git a/egs/gale_arabic/s5b/conf/mfcc.conf b/egs/gale_arabic/s5b/conf/mfcc.conf new file mode 100644 index 00000000000..7361509099f --- /dev/null +++ b/egs/gale_arabic/s5b/conf/mfcc.conf @@ -0,0 +1 @@ +--use-energy=false # only non-default option. diff --git a/egs/gale_arabic/s5b/conf/mfcc_hires.conf b/egs/gale_arabic/s5b/conf/mfcc_hires.conf new file mode 100644 index 00000000000..c45f2b691a9 --- /dev/null +++ b/egs/gale_arabic/s5b/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=16000 +--num-mel-bins=40 +--num-ceps=40 +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/gale_arabic/s5b/conf/online_cmvn.conf b/egs/gale_arabic/s5b/conf/online_cmvn.conf new file mode 100644 index 00000000000..cbdaf5f281c --- /dev/null +++ b/egs/gale_arabic/s5b/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh diff --git a/egs/gale_arabic/s5b/local/bad_segments b/egs/gale_arabic/s5b/local/bad_segments new file mode 100644 index 00000000000..c3413f0714c --- /dev/null +++ b/egs/gale_arabic/s5b/local/bad_segments @@ -0,0 +1,10 @@ +ARABIYA_FROMIRAQ_ARB_20070302_175801_2326286_2327450 +ARABIYA_BILARABI_ARB_20061005_201400_221375_223694 +LBC_NAHAR_ARB_20060911_142800_3683267_3685290 +LBC_NAHAR_ARB_20070303_145800_3249800_3251128 +LBC_NAHAR_ARB_20070303_145800_3623646_3624152 +LBC_NAHAR_ARB_20070305_035800_481003_484069 +ALAM_WITHEVENT_ARB_20070227_205800_3141876_3144152 +ALAM_NEWSRPT_ARB_20070130_015801_2875054_2876396 +ALJZ_TODHARV_ARB_20060914_155800_2947717_2949041 +ALJZ_TODHARV_ARB_20070107_145800_2417848_2419238 diff --git a/egs/gale_arabic/s5b/local/chain/run_tdnn.sh b/egs/gale_arabic/s5b/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..34499362831 --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1a.sh \ No newline at end of file diff --git a/egs/gale_arabic/s5b/local/chain/run_tdnn_lstm.sh b/egs/gale_arabic/s5b/local/chain/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..f897827461c --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +#started from tedlium recipe with few edits + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +xent_regularize=0.1 +train_set=train +gmm=tri2b # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=0 #default -10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1b #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +stage=18 +tarin_stage=3 +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 2 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/test_hires $dir/decode || exit 1; +fi +exit 0 diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..e604dc7e714 --- /dev/null +++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,223 @@ +#!/bin/bash + +#started from tedlium recipe with few edits + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +decode_nj=30 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train +gmm=tri2b # the gmm for the target data gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1a #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 3 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_test $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/test_hires $dir/decode || exit 1; +fi +exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh b/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh new file mode 100755 index 00000000000..0fc667ac53a --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_data_prep_audio.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + + +galeData=$(readlink -f "${@: -1}" ); # last argumnet; the local folder +audio_dvds=${@:1:${#}-1} # all the audio dvds for GALE corpus; ; check audio=( in ../run.sh + +mkdir -p $galeData + +# check that sox is installed +which sox &>/dev/null +if [[ $? != 0 ]]; then + echo "sox is not installed"; exit 1 +fi + +for dvd in $audio_dvds; do + dvd_full_path=$(readlink -f $dvd) + if [[ ! -e $dvd_full_path ]]; then + echo missing $dvd_full_path; exit 1; + fi + find $dvd_full_path \( -name "*.wav" -o -name "*.flac" \) | while read file; do + id=$(basename $file | awk '{gsub(".wav","");gsub(".flac","");print}') + echo "$id sox $file -r 16000 -t wav - |" + done +done | sort -u > $galeData/wav.scp + +echo data prep audio succeded + +exit 0 + diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_split.sh b/egs/gale_arabic/s5b/local/gale_data_prep_split.sh new file mode 100755 index 00000000000..a62904a3b57 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_data_prep_split.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +if [ $# -ne 1 ]; then + echo "Arguments should be the "; exit 1 +fi + + +#data will data/local + +galeData=$(readlink -f $1) +mkdir -p data/local +dir=$(readlink -f data/local) + + +grep -f local/test_list $galeData/all | grep -v -f local/bad_segments > $galeData/all.test +grep -v -f local/test_list $galeData/all | grep -v -f local/bad_segments > $galeData/all.train + +for x in test train; do + outdir=$dir/$x + file=$galeData/all.$x + mkdir -p $outdir + awk '{print $2 " " $2}' $file | sort -u > $outdir/utt2spk + cp -pr $outdir/utt2spk $outdir/spk2utt + awk '{print $2 " " $1 " " $3 " " $4}' $file | sort -u > $outdir/segments + awk '{printf $2 " "; for (i=5; i<=NF; i++) {printf $i " "} printf "\n"}' $file | sort -u > $outdir/text +done + + +grep -f local/test_list $galeData/wav.scp > $dir/test/wav.scp + +cat $galeData/wav.scp | awk -v seg=$dir/train/segments 'BEGIN{while((getline0) {seen[$2]=1;}} + {if (seen[$1]) { print $0}}' > $dir/train/wav.scp + +echo data prep split succeeded + +exit 0 \ No newline at end of file diff --git a/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh b/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh new file mode 100755 index 00000000000..14d7241d4c1 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_data_prep_txt.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +galeData=$(readlink -f "${@: -1}" ); # last argumnet; the local folder +txt_dvds=${@:1:${#}-1} # all the txt cds correspoding to the audio corpus; check text=( in ../run.sh + + +top_pwd=`pwd` +txtdir=$galeData/txt +mkdir -p $txtdir; cd $txtdir + +for cdx in $txt_dvds; do + echo "Preparing $cdx" + if [[ $cdx == *.tgz ]] ; then + tar -xvf $cdx + elif [ -d "$cdx" ]; then + ln -s $cdx `basename $cdx` + else + echo "I don't really know what I shall do with $cdx " >&2 + fi +done + +find -L . -type f -name "*.tdf" | while read file; do +sed '1,3d' $file # delete the first 3 lines +done > all.tmp$$ + +perl -e ' + ($inFile,$idFile,$txtFile)= split /\s+/, $ARGV[0]; + open(IN, "$inFile"); + open(ID, ">$idFile"); + open(TXT, ">$txtFile"); + while () { + @arr= split /\t/,$_; + $start=sprintf ("%0.3f",$arr[2]);$rStart=$start;$start=~s/\.//; $start=~s/^0+$/0/; $start=~s/^0+([^0])/$1/; # remove zeros at the beginning + $end=sprintf ("%0.3f",$arr[3]);$rEnd=$end;$end=~s/^0+([^0])/$1/;$end=~s/\.//; + if ( ($arr[11] !~ m/report/) && ($arr[11] !~ m/conversational/) ){$arr[11]="UNK";} + $id="$arr[11] $arr[0] $arr[0]_${start}_${end} $rStart $rEnd\n"; + next if ($rStart == $rEnd); + $id =~ s/.sph//g; + print ID $id; + print TXT "$arr[7]\n"; + }' "all.tmp$$ allid.tmp$$ contentall.tmp$$" + + +perl ${top_pwd}/local/normalize_transcript_BW.pl contentall.tmp$$ contentall.buck.tmp$$ + +paste allid.tmp$$ contentall.buck.tmp$$ | sed 's: $::' | awk '{if (NF>5) {print $0}}' > all_1.tmp$$ + +awk '{$1="";print $0}' all_1.tmp$$ | sed 's:^ ::' > $galeData/all +awk '{if ($1 == "report") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $galeData/report +awk '{if ($1 == "conversational") {$1="";print $0}}' all_1.tmp$$ | sed 's:^ ::' > $galeData/conversational + +cd ..; +rm -fr $txtdir +cd $top_pwd +echo data prep text succeeded + +exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_format_data.sh b/egs/gale_arabic/s5b/local/gale_format_data.sh new file mode 100755 index 00000000000..a572b8194a3 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_format_data.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +if [ -f path.sh ]; then + . path.sh; else + echo "$0: missing path.sh"; exit 1; +fi + +for dir in test train; do + cp -pr data/local/$dir data/$dir +done + + +mkdir -p data/lang_test + +arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz +[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1; + +rm -r data/lang_test +cp -r data/lang data/lang_test + +gunzip -c "$arpa_lm" | \ + arpa2fst --disambig-symbol=#0 \ + --read-symbol-table=data/lang_test/words.txt - data/lang_test/G.fst + + +echo "$0: Checking how stochastic G is (the first of these numbers should be small):" +fstisstochastic data/lang_test/G.fst + +## Check lexicon. +## just have a look and make sure it seems sane. +echo "$0: First few lines of lexicon FST:" +fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst | head + +echo "$0: Performing further checks" + +# Checking that G.fst is determinizable. +fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G. + +# Checking that L_disambig.fst is determinizable. +fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L. + +# Checking that disambiguated lexicon times G is determinizable +# Note: we do this with fstdeterminizestar not fstdeterminize, as +# fstdeterminize was taking forever (presumbaly relates to a bug +# in this version of OpenFst that makes determinization slow for +# some case). +fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \ + fstdeterminizestar >/dev/null || echo Error + +# Checking that LG is stochastic: +fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \ + fstisstochastic || echo LG is not stochastic + + +echo gale_format_data succeeded. + +exit 0 diff --git a/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh b/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh new file mode 100755 index 00000000000..0162eb49330 --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_prep_grapheme_dict.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright 2017 QCRI (author: Ahmed Ali) +# Apache 2.0 + + +# run this from ../ +dir=$(readlink -f data/local/dict) +mkdir -p $dir + + +# (1) Get all avaialble dictionaries, since this is a grapheme model, so we mainly need the most frequent word lists +wget http://alt.qcri.org//resources/speech/dictionary/ar-ar_grapheme_lexicon_2016-02-09.bz2 || exit 1; +wget http://alt.qcri.org//resources/speech/dictionary/ar-ar_lexicon_2014-03-17.txt.bz2 || exit 1; +bzcat ar-ar_grapheme_lexicon_2016-02-09.bz2 | sed '1,3d' | awk '{print $1}' > tmp$$ +bzcat ar-ar_lexicon_2014-03-17.txt.bz2 | sed '1,3d' | awk '{print $1}' >> tmp$$ +# (2) Now we add all the words appeared in the training data +cat data/local/train/text | cut -d ' ' -f 2- | tr -s " " "\n" | sort -u >> tmp$$ +grep -v [0-9] tmp$$ | sed -e 's:[FNKaui\~o\`]::g' -e 's:{:}:g' | sort -u > tmp1.$$ # remove vowels and rare alef wasla +cat tmp1.$$ | sed 's:\(\):\1 :g' | sed -e 's: : :g' -e 's: : :g' -e 's:\s*: :g' -e 's:\*:V:g' > tmp2.$$ +paste -d ' ' tmp1.$$ tmp2.$$ > $dir/lexicon.txt + +#(2) Dictionary preparation: + +# silence phones, one per line. +echo SIL > $dir/silence_phones.txt +echo SIL > $dir/optional_silence.txt + +# nonsilence phones; on each line is a list of phones that correspond +# really to the same base phone. +cat tmp2.$$ | tr -s ' ' '\n' | grep -v ^$ | sort -u > $dir/nonsilence_phones.txt || exit 1; + +sed -i '1i SIL' $dir/lexicon.txt # insert word with phone sil at the begining of the dictionary + +rm -fr ar-ar_lexicon_2014-03-17.txt.bz2 ar-ar_grapheme_lexicon_2016-02-09.bz2 tmp$$ tmp1.$$ tmp2.$$ +echo Dictionary preparation succeeded + +# The script is still missing dates and numbers + +exit 0 + diff --git a/egs/gale_arabic/s5b/local/gale_train_lms.sh b/egs/gale_arabic/s5b/local/gale_train_lms.sh new file mode 100755 index 00000000000..3988ec3818f --- /dev/null +++ b/egs/gale_arabic/s5b/local/gale_train_lms.sh @@ -0,0 +1,81 @@ +#!/bin/bash + + +# To be run from one directory above this script. + + +lexicon=data/local/dict/lexicon.txt +[ ! -f $lexicon ] && echo "$0: No such file $lexicon" && exit 1; + + +# This script takes no arguments. It assumes you have already run +# previus steps successfully +# It takes as input the files +#data/local/train.*/text +#data/local/dict/lexicon.txt + + +export LC_ALL=C # You'll get errors about things being not sorted, if you +# have a different locale. +export PATH=$PATH:./../../../tools/kaldi_lm +( # First make sure the kaldi_lm toolkit is installed. + cd $KALDI_ROOT/tools || exit 1; + if [ -d kaldi_lm ]; then + echo Not installing the kaldi_lm toolkit since it is already there. + else + echo Downloading and installing the kaldi_lm tools + if [ ! -f kaldi_lm.tar.gz ]; then + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; + fi + tar -xvzf kaldi_lm.tar.gz || exit 1; + cd kaldi_lm + make || exit 1; + echo Done making the kaldi_lm tools + fi +) || exit 1; + + +dir=data/local/lm + mkdir -p $dir + text=data/local/train/text + [ ! -f $text ] && echo "$0: No such file $text" && exit 1; + + cleantext=$dir/text.no_oov + + cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } + {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ",$n);} } printf("\n");}' \ + > $cleantext || exit 1; + + + cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \ + sort -nr > $dir/word.counts || exit 1; + + +# Get counts from acoustic training transcripts, and add one-count +# for each word in the lexicon (but not silence, we don't want it +# in the LM-- we'll add it optionally later). + cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \ + cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \ + sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1; + +# note: we probably won't really make use of as there aren't any OOVs + cat $dir/unigram.counts | awk '{print $2}' | get_word_map.pl "" "" "" > $dir/word_map \ + || exit 1; + +# note: ignore 1st field of train.txt, it's the utterance-id. + cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline0)map[$1]=$2;} + { for(n=2;n<=NF;n++) { printf map[$n]; if(n$dir/train.gz \ + || exit 1; + + train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1; + +# LM is small enough that we don't need to prune it (only about 0.7M N-grams). +# Perplexity over 128254.000000 words is 90.446690 + +# note: output is +# data/local/lm/3gram-mincount/lm_unpruned.gz + + +echo train lm succeeded + +exit 0 diff --git a/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..d9fc3385a42 --- /dev/null +++ b/egs/gale_arabic/s5b/local/nnet3/run_ivector_common.sh @@ -0,0 +1,237 @@ +#!/bin/bash + +set -e -o pipefail + +# This script is called from local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh (and may eventually +# be called by more scripts). It contains the common feature preparation and iVector-related parts +# of the script. See those scripts for examples of usage. + + +stage=0 +nj=100 +min_seg_len=1.55 # min length in seconds... we do this because chain training + # will discard segments shorter than 1.5 seconds. Must remain in sync + # with the same option given to prepare_lores_feats_and_alignments.sh +train_set=train # you might set this to e.g. train. +gmm=tri2b # This specifies a GMM-dir from the features of the type you're training the system on; + # it should contain alignments for 'train_set'. + +num_threads_ubm=32 +nnet3_affix=_cleaned # affix for exp/nnet3 directory to put iVector stuff in, so it + # becomes exp/nnet3_cleaned or whatever. + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp_comb + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + + + +if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then + echo "$0: data/${train_set}_sp_hires/feats.scp already exists." + echo " ... Please either remove it, or rerun this script with stage > 2." + exit 1 +fi + + +if [ $stage -le 1 ]; then + echo "$0: preparing directory for speed-perturbed data" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: creating high-resolution MFCC features" + + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp test; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires + + for datadir in ${train_set}_sp test; do + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires + steps/compute_cmvn_stats.sh data/${datadir}_hires + utils/fix_data_dir.sh data/${datadir}_hires + done +fi + +if [ $stage -le 3 ]; then + echo "$0: combining short segments of speed-perturbed high-resolution MFCC training data" + # we have to combine short segments or we won't be able to train chain models + # on those segments. + utils/data/combine_short_segments.sh \ + data/${train_set}_sp_hires $min_seg_len data/${train_set}_sp_hires_comb + + # just copy over the CMVN to avoid having to recompute it. + cp data/${train_set}_sp_hires/cmvn.scp data/${train_set}_sp_hires_comb/ + utils/fix_data_dir.sh data/${train_set}_sp_hires_comb/ +fi + +if [ $stage -le 4 ]; then + echo "$0: selecting segments of hires training data that were also present in the" + echo " ... original training data." + + # note, these data-dirs are temporary; we put them in a sub-directory + # of the place where we'll make the alignments. + temp_data_root=exp/nnet3${nnet3_affix}/tri5 + mkdir -p $temp_data_root + + utils/data/subset_data_dir.sh --utt-list data/${train_set}/feats.scp \ + data/${train_set}_sp_hires $temp_data_root/${train_set}_hires + + # note: essentially all the original segments should be in the hires data. + n1=$(wc -l /dev/null || true + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + ${graph_dir} data/test_hires ${dir}/decode || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/test_hires ${dir}/decode_test ${dir}/decode_test_rescore || exit 1 +fi + +exit 0; diff --git a/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..a6cc6e2dec8 --- /dev/null +++ b/egs/gale_arabic/s5b/local/nnet3/tuning/run_tdnn_1a.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +# started from tedlium recipe with few edits + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train +gmm=tri2b # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix= #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" +remove_egs=true +relu_dim=850 +num_epochs=3 + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < \n"; + exit (1); + } + +# <\check usage> +my $inFile = shift (@ARGV); +my $ouFile = shift(@ARGV); + + +open INFILE, "<$inFile" || die "unable to open the input file $inFile\n"; +binmode INFILE, ":encoding(utf8)"; + + +open OUTPUTFILE, ">$ouFile" or die "unable to open the output mlf file $ouFile\n"; +binmode OUTPUTFILE, ":encoding(utf8)"; + + +while () { + s/[^اأإآبتثجحخدذرزسشصضطظعغفقكلمنهويىئءؤة0-9]+/ /g; ## Removes non Arabic or numbers + my $BW = convertUTF8ToBuckwalter ($_); + print OUTPUTFILE "$BW"."\n"; +} +close INFILE; +close OUTPUTFILE; + + + +# this function is copied from MADATools.pm: MADA Tools + sub convertUTF8ToBuckwalter { + + my ($line)= (@_); + #$line = $UTF8_ENCODING_OBJ->decode($line); ## Same as Encode::decode("utf8",$line), but faster since object already created + $line =~ s/\x{0621}/\'/g; ## HAMZA + $line =~ s/\x{0622}/\|/g; ## ALEF WITH MADDA ABOVE + $line =~ s/\x{0623}/\>/g; ## ALEF WITH HAMZA ABOVE + $line =~ s/\x{0624}/\&/g; ## WAW WITH HAMZA ABOVE + $line =~ s/\x{0625}/\ " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab \ + ark:- ark,t:$dir/scoring/LMWT.tra || exit 1; + +# Note: the double level of quoting for the sed command +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cat $dir/scoring/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; + +exit 0; diff --git a/egs/gale_arabic/s5b/local/split_wer.sh b/egs/gale_arabic/s5b/local/split_wer.sh new file mode 100755 index 00000000000..70c97ae5d19 --- /dev/null +++ b/egs/gale_arabic/s5b/local/split_wer.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# Report WER for reports and conversational +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +if [ $# -ne 1 ]; then + echo "Arguments should be the gale folder, see ../run.sh for example." + exit 1; +fi + +[ -f ./path.sh ] && . ./path.sh + + +galeFolder=$(readlink -f $1) +symtab=./data/lang/words.txt +find exp/ -maxdepth 3 -type d -name decode\* > list_decode$$ + +#split the test set per type: +awk '{print $2}' $galeFolder/all.test | sort -u > $galeFolder/test_id$$ + +# generate the report test set +awk '{print $2}' $galeFolder/report | sort -u > $galeFolder/report_id$$ +comm -1 -2 $galeFolder/test_id$$ $galeFolder/report_id$$ > $galeFolder/report.test + +# generate the conversational test set +awk '{print $2}' $galeFolder/conversational | sort -u > $galeFolder/conversational_id$$ + +comm -1 -2 $galeFolder/test_id$$ $galeFolder/conversational_id$$ > $galeFolder/conversational.test + +rm -fr $galeFolder/test_id$$ $galeFolder/report_id$$ $galeFolder/conversational_id$$ + +min_lmwt=7 +max_lmwt=20 +cat list_decode$$ | while read dir; do + for type in report conversational; do + #echo "Processing: $dir $type" + rm -fr $dir/scoring_$type + cp -pr $dir/scoring $dir/scoring_$type + ( cd $dir/scoring_$type; + for x in *.tra test_filt.txt; do + sort -u $x > tmp$$ + join tmp$$ $galeFolder/${type}.test > $x + rm -fr tmp$$ + done + ) + +utils/run.pl LMWT=$min_lmwt:$max_lmwt $dir/scoring_$type/log/score.LMWT.log \ + cat $dir/scoring_${type}/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring_${type}/test_filt.txt ark,p:- ">&" $dir/wer_${type}_LMWT +done +done + + +time=$(date +"%Y-%m-%d-%H-%M-%S") +echo "RESULTS generated by $USER at $time" + +echo "Report Results WER:" +cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_report_* | utils/best_wer.sh; done | sort -n -k2 + +echo "Conversational Results WER:" +cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_conversational_* | utils/best_wer.sh; done | sort -n -k2 + +echo "Combined Results for Reports and Conversational WER:" +cat list_decode$$ | while read x; do [ -d $x ] && grep WER $x/wer_?? $x/wer_?| utils/best_wer.sh; done | sort -n -k2 + +rm list_decode$$ + + + diff --git a/egs/gale_arabic/s5b/local/test_list b/egs/gale_arabic/s5b/local/test_list new file mode 100644 index 00000000000..d82cf498804 --- /dev/null +++ b/egs/gale_arabic/s5b/local/test_list @@ -0,0 +1,11 @@ +ALAM_WITHEVENT_ARB_20070116_205800 +ALAM_WITHEVENT_ARB_20070130_205800 +ALAM_WITHEVENT_ARB_20070206_205801 +ALAM_WITHEVENT_ARB_20070213_205800 +ALAM_WITHEVENT_ARB_20070227_205800 +ALAM_WITHEVENT_ARB_20070306_205800 +ALAM_WITHEVENT_ARB_20070313_205800 +ARABIYA_FROMIRAQ_ARB_20070216_175800 +ARABIYA_FROMIRAQ_ARB_20070223_175801 +ARABIYA_FROMIRAQ_ARB_20070302_175801 +ARABIYA_FROMIRAQ_ARB_20070309_175800 diff --git a/egs/gale_arabic/s5b/path.sh b/egs/gale_arabic/s5b/path.sh new file mode 100755 index 00000000000..be11b34cbc6 --- /dev/null +++ b/egs/gale_arabic/s5b/path.sh @@ -0,0 +1,5 @@ +export KALDI_ROOT=$(pwd)/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/gale_arabic/s5b/run.sh b/egs/gale_arabic/s5b/run.sh new file mode 100755 index 00000000000..9cc72d31a95 --- /dev/null +++ b/egs/gale_arabic/s5b/run.sh @@ -0,0 +1,167 @@ +#!/bin/bash -e + +# Copyright 2014 QCRI (author: Ahmed Ali) +# Apache 2.0 + +. path.sh +. cmd.sh ## You'll want to change cmd.sh to something that will work on your system. + ## This relates to the queue. +num_jobs=120 +num_decode_jobs=40 + +#NB: You can add whatever number of copora you like. The supported extensions +#NB: (formats) are wav and flac. Flac will be converted using sox and in contrast +#NB: with the old approach, the conversion will be on-the-fly and one-time-only +#NB: during the parametrization. + +#NB: Text corpora scpecification. We support either tgz files, which are unpacked +#NB: or just plain (already unpacked) directories. The list of transcript is then +#NB: obtained using find command + +#This is CLSP configuration. We add the 2014 GALE data. We got around 2 % +#improvement just by including it. The gain might be large if someone would tweak +# the number of leaves and states and so on. + +#Make sure you edit this section to reflect whers you keep the LDC data on your cluster +audio=( + /data/sls/scratch/amali/data/GALE/LDC2013S02 + /data/sls/scratch/amali/data/GALE/LDC2013S07 + /data/sls/scratch/amali/data/GALE/LDC2014S07 +) +text=( + /data/sls/scratch/amali/data/GALE/LDC2013T17.tgz + /data/sls/scratch/amali/data/GALE/LDC2013T04.tgz + /data/sls/scratch/amali/data/GALE/LDC2014T17.tgz +) + +galeData=GALE +#prepare the data +#split train dev test +#prepare lexicon and LM + +# You can run the script from here automatically, but it is recommended to run the data preparation, +# and features extraction manually and and only once. +# By copying and pasting into your shell. + +#copy the audio files to local folder wav and convet flac files to wav +local/gale_data_prep_audio.sh "${audio[@]}" $galeData || exit 1; + +#get the transcription and remove empty prompts and all noise markers +local/gale_data_prep_txt.sh "${text[@]}" $galeData || exit 1; + +# split the data to reports and conversational and for each class will have rain/dev and test +local/gale_data_prep_split.sh $galeData || exit 1; + +# get all Arabic grapheme dictionaries and add silence and UNK +local/gale_prep_grapheme_dict.sh || exit 1; + + +#prepare the langauge resources +utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang || exit 1; + +# LM training +local/gale_train_lms.sh || exit 1; + +local/gale_format_data.sh || exit 1; +# G compilation, check LG composition + +# Now make MFCC features. +# mfccdir should be some place with a largish disk where you +# want to store MFCC features. +mfccdir=mfcc + +for x in train test ; do + steps/make_mfcc.sh --cmd "$train_cmd" --nj $num_jobs \ + data/$x exp/make_mfcc/$x $mfccdir + utils/fix_data_dir.sh data/$x # some files fail to get mfcc for many reasons + steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir +done + + +# Here we start the AM + +# Let's create a subset with 10k segments to make quick flat-start training: +utils/subset_data_dir.sh data/train 10000 data/train.10K || exit 1; + +# Train monophone models on a subset of the data, 10K segment +# Note: the --boost-silence option should probably be omitted by default +steps/train_mono.sh --nj 40 --cmd "$train_cmd" \ + data/train.10K data/lang exp/mono || exit 1; + + +# Get alignments from monophone system. +steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/mono exp/mono_ali || exit 1; + +# train tri1 [first triphone pass] +steps/train_deltas.sh --cmd "$train_cmd" \ + 2500 30000 data/train data/lang exp/mono_ali exp/tri1 || exit 1; + +# First triphone decoding +utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph +steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ + exp/tri1/graph data/test exp/tri1/decode + +steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/tri1 exp/tri1_ali || exit 1; + +# Train tri2a, which is deltas+delta+deltas +steps/train_deltas.sh --cmd "$train_cmd" \ + 3000 40000 data/train data/lang exp/tri1_ali exp/tri2a || exit 1; + +# tri2a decoding +utils/mkgraph.sh data/lang_test exp/tri2a exp/tri2a/graph +steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ + exp/tri2a/graph data/test exp/tri2a/decode + +# train and decode tri2b [LDA+MLLT] +steps/train_lda_mllt.sh --cmd "$train_cmd" 4000 50000 \ + data/train data/lang exp/tri1_ali exp/tri2b || exit 1; + +utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph +steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ + exp/tri2b/graph data/test exp/tri2b/decode + +# Align all data with LDA+MLLT system (tri2b) +steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ + --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; + + +# From 2b system, train 3b which is LDA + MLLT + SAT. +steps/train_sat.sh --cmd "$train_cmd" \ + 5000 100000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; + +utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph +steps/decode_fmllr.sh --nj $num_decode_jobs --cmd \ + "$decode_cmd" exp/tri3b/graph data/test exp/tri3b/decode + +# From 3b system, align all data. +steps/align_fmllr.sh --nj $num_jobs --cmd "$train_cmd" \ + data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; + + +# nnet3 cross-entropy +local/nnet3/run_tdnn.sh #tdnn recipe: +local/nnet3/run_lstm.sh #lstm recipe: + +# chain lattice-free +local/chain/run_tdnn.sh #tdnn recipe: +local/chain/run_tdnn_lstm.sh #tdnn-lstm recipe: + +time=$(date +"%Y-%m-%d-%H-%M-%S") + +#get detailed WER; reports, conversational and combined +local/split_wer.sh $galeData > RESULTS.details.$USER.$time # to make sure you keep the results timed and owned + +echo training succedded +exit 0 + +#TODO: +#LM (4-gram and RNN) rescoring +#combine lattices +#dialect detection + + + + + diff --git a/egs/gale_arabic/s5b/steps b/egs/gale_arabic/s5b/steps new file mode 120000 index 00000000000..1b186770dd1 --- /dev/null +++ b/egs/gale_arabic/s5b/steps @@ -0,0 +1 @@ +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/gale_arabic/s5b/utils b/egs/gale_arabic/s5b/utils new file mode 120000 index 00000000000..a3279dc8679 --- /dev/null +++ b/egs/gale_arabic/s5b/utils @@ -0,0 +1 @@ +../../wsj/s5/utils/ \ No newline at end of file diff --git a/egs/wsj/s5/utils/build_const_arpa_lm.sh b/egs/wsj/s5/utils/build_const_arpa_lm.sh index 375ffd79eb4..ec067df0d39 100755 --- a/egs/wsj/s5/utils/build_const_arpa_lm.sh +++ b/egs/wsj/s5/utils/build_const_arpa_lm.sh @@ -34,7 +34,7 @@ mkdir -p $new_lang cp -r $old_lang/* $new_lang unk=`cat $new_lang/oov.int` -bos=`grep "" $new_lang/words.txt | awk '{print $2}'` +bos=`grep -w "" $new_lang/words.txt | awk '{print $2}'` eos=`grep "" $new_lang/words.txt | awk '{print $2}'` if [[ -z $bos || -z $eos ]]; then echo "$0: and symbols are not in $new_lang/words.txt" diff --git a/src/configure b/src/configure index bf478b5b73f..a4f3ce1c8b3 100755 --- a/src/configure +++ b/src/configure @@ -15,17 +15,32 @@ # ./configure --mkl-root=/opt/intel/mkl --threaded-math=yes # ./configure --mkl-root=/opt/intel/mkl --threaded-math=yes --mkl-threading=tbb # # This is for MKL 11.3, which does not seem to provide Intel OMP libs -# ./configure --openblas-root=../tools/OpenBLAS/install # before doing -# # this, cd to ../tools and type "make openblas". Note: -# # this is not working correctly on all platforms, do "make test" +# ./configure --openblas-root=../tools/OpenBLAS/install +# # Before doing this, cd to ../tools and type "make openblas". +# # Note: this is not working correctly on all platforms, do "make test" # # and look out for segmentation faults. # ./configure --atlas-root=../tools/ATLAS/build # ./configure --use-cuda=no # disable CUDA detection (will build cpu-only # # version of kaldi even on CUDA-enabled machine +# ./configure --static --fst-root=/opt/cross/armv8hf \ +# --atlas-root=/opt/cross/armv8hf --host=armv8-rpi3-linux-gnueabihf +# # Cross compile for armv8hf, this assumes that you have openfst built +# # with the armv8-rpi3-linux-gnueabihf toolchain and installed to +# # /opt/cross/armv8hf. It also assumes that you have an ATLAS library +# # built for the target install to /opt/cross/armv8hf and that the +# # armv8-rpi3-linux-gnueabihf toolchain is available in your path +# ./configure --static --openblas-root=/opt/cross/arm-linux-androideabi \ +# --fst-root=/opt/cross/arm-linux-androideabi --fst-version=1.4.1 \ +# --android-incdir=/opt/cross/arm-linux-androideabi/sysroot/usr/include \ +# --host=arm-linux-androideabi +# # Cross compile for Android on arm. The only difference here is the +# # addition of the the --android-includes flag because the toolchains +# # produced by the Android NDK don't always include the C++ stdlib +# # headers in the normal cross compile include path. # This should be incremented after any significant change to the configure # script, i.e. any change affecting kaldi.mk or the build system as a whole. -CONFIGURE_VERSION=5 +CONFIGURE_VERSION=6 if ! [ -x "$PWD/configure" ]; then echo 'You must run "configure" from the src/ directory.' @@ -42,35 +57,46 @@ The default configuration is to build and link against static Kaldi libraries. OpenFst and Math libraries are linked dynamically. Configuration options: - --help Display this help message and exit - --version Display the version of 'configure' and exit - --static Build and link against static libraries [default=no] - --shared Build and link against shared libraries [default=no] - --use-cuda Build with CUDA [default=yes] - --cudatk-dir=DIR CUDA toolkit directory - --double-precision Build with double precision numbers [default=no] - --static-fst Build with static OpenFst libraries [default=no] - --fst-root=DIR OpenFst root directory [default=../tools/openfst/] - --mathlib=LIB Math library [default=ATLAS] - Supported libraries: ATLAS, MKL, CLAPACK, OPENBLAS. - --static-math Build with static math libraries [default=no] - --threaded-math Build with multi-threaded math libraries [default=no] - --threaded-atlas Build with multi-threaded ATLAS libraries [default=no] - --atlas-root=DIR ATLAS root directory [default=../tools/ATLAS/] - --openblas-root=DIR OpenBLAS root directory - --clapack-root=DIR CLAPACK root directory - --mkl-root=DIR MKL root directory - --mkl-libdir=DIR MKL library directory - --mkl-threading=LIB MKL threading layer [default=sequential] - Supported layers: sequential, iomp, tbb, gomp. - --omp-libdir=DIR OpenMP directory - --speex-root=DIR SPEEX root directory - --speex-libdir=DIR SPEEX library directory - --speex-incdir=DIR SPEEX include directory - -Following environment variables can be used to override the compiler -or to provide additional flags to the compiler/linker. - CXX C++ compiler command + --help Display this help message and exit + --version Display the version of 'configure' and exit + --static Build and link against static libraries [default=no] + --shared Build and link against shared libraries [default=no] + --use-cuda Build with CUDA [default=yes] + --cudatk-dir=DIR CUDA toolkit directory + --double-precision Build with double precision floats [default=no] + --static-fst Build with static OpenFst libraries [default=no] + --fst-root=DIR OpenFst root directory [default=../tools/openfst/] + --fst-version=STR OpenFst version string + --mathlib=LIB Math library [default=ATLAS] + Supported libraries: ATLAS, MKL, CLAPACK, OPENBLAS. + --static-math Build with static math libraries [default=no] + --threaded-math Build with multi-threaded math libraries [default=no] + --threaded-atlas Build with multi-threaded ATLAS libraries [default=no] + --atlas-root=DIR ATLAS root directory [default=../tools/ATLAS/] + --openblas-root=DIR OpenBLAS root directory + --clapack-root=DIR CLAPACK root directory + --mkl-root=DIR MKL root directory + --mkl-libdir=DIR MKL library directory + --mkl-threading=LIB MKL threading layer [default=sequential] + Supported layers: sequential, iomp, tbb, gomp. + --omp-libdir=DIR OpenMP directory + --speex-root=DIR SPEEX root directory + --speex-libdir=DIR SPEEX library directory + --speex-incdir=DIR SPEEX include directory + --host=HOST Host triple in the format 'cpu-vendor-os' + If provided, it is prepended to all toolchain programs. + --android-incdir=DIR Andraid include directory + +Following environment variables can be used to override the default toolchain. + CXX C++ compiler [default=g++] + AR Archive maintenance utility [default=ar] + AS Assembler [default=as] + RANLIB Archive indexing utility [default=ranlib] + +If a host triple is provided, it is prepended to CXX, AR, AS and RANLIB. + +Following environment variables can be used to provide additional flags to the +compiler/linker. CXXFLAGS Additional C++ compiler flags, e.g. -I LDFLAGS Additional linker flags, e.g. -L LDLIBS Additional libraries to pass to the linker, e.g. -l @@ -111,6 +137,16 @@ function check_exists { if [ ! -f $1 ]; then failure "$1 not found."; fi } +function check_library { + local libpath=$1 + local libname=$2 + local libext=$3 + local full_libname="$libpath/$libname.$libext" + ##echo "Testing $full_libname" >&2 + test -f "$full_libname" && return ; + return 1 +} + function check_compiler { COMPILER=$1 if ! which $COMPILER >&/dev/null; then @@ -151,190 +187,21 @@ function check_compiler { } function check_for_slow_expf { - cd probe - rm -f exp-test - make -f Makefile.slow_expf 1>/dev/null - ./exp-test - if [ $? -eq 1 ]; then - echo "*** WARNING: expf() seems to be slower than exp() on your machine. This is a known bug in old versions of glibc. Please consider updating glibc. ***" - echo "*** Kaldi will be configured to use exp() instead of expf() in base/kaldi-math.h Exp() routine for single-precision floats. ***" - echo "CXXFLAGS += -DKALDI_NO_EXPF" >> ../kaldi.mk + # We cannot run this test if we are cross compiling. + if [[ "$TARGET_ARCH" == "`uname -m`" ]] ; then + cd probe + rm -f exp-test + make -f Makefile.slow_expf 1>/dev/null + ./exp-test + if [ $? -eq 1 ]; then + echo "*** WARNING: expf() seems to be slower than exp() on your machine. This is a known bug in old versions of glibc. Please consider updating glibc. ***" + echo "*** Kaldi will be configured to use exp() instead of expf() in base/kaldi-math.h Exp() routine for single-precision floats. ***" + echo "CXXFLAGS += -DKALDI_NO_EXPF" >> ../kaldi.mk + fi + cd .. fi - cd .. -} - -function check_library { - local libpath=$1 - local libname=$2 - local libext=$3 - local full_libname="$libpath/$libname.$libext" - ##echo "Testing $full_libname" >&2 - test -f "$full_libname" && return ; - return 1 } -# If configuration sets any of these variables, we will switch the external -# math library. Here we unset them so that we can check later. -unset MKLROOT -unset CLAPACKROOT -unset OPENBLASROOT -unset MKLLIBDIR - -# These environment variables are OK. -CXX=${CXX:-g++} -ENV_CXXFLAGS=$CXXFLAGS -ENV_LDFLAGS=$LDFLAGS -ENV_LDLIBS=$LDLIBS - -# Default configuration -dynamic_kaldi=false -use_cuda=true -static_fst=false -static_math=false -threaded_atlas=false -mkl_threading=sequential -double_precision=false - -MATHLIB='ATLAS' -ATLASROOT=`rel2abs ../tools/ATLAS/` -FSTROOT=`rel2abs ../tools/openfst` - -# Save the command line to include in kaldi.mk -cmd_line="$0 $@" - -while [ $# -gt 0 ]; -do - case "$1" in - --help) - usage; exit 0 ;; - --version) - echo $CONFIGURE_VERSION; exit 0 ;; - --static) - dynamic_kaldi=false; - static_math=true; - static_fst=true; - shift ;; - --shared) - dynamic_kaldi=true; - static_math=false; - static_fst=false; - shift ;; - --double-precision) - double_precision=true; - shift ;; - --double-precision=yes) - double_precision=true; - shift ;; - --double-precision=no) - double_precision=false; - shift ;; - --atlas-root=*) - ATLASROOT=`read_dirname $1`; - shift ;; - --threaded-atlas) - threaded_atlas=true; - shift ;; - --threaded-atlas=yes) - threaded_atlas=true; - shift ;; - --threaded-atlas=no) - threaded_atlas=false; - shift ;; - --threaded-math) - threaded_atlas=true; - mkl_threading=iomp - shift ;; - --threaded-math=yes) - threaded_atlas=true; - mkl_threading=iomp - shift ;; - --threaded-math=no) - threaded_atlas=false; - mkl_threading=sequential - shift ;; - --use-cuda) - use_cuda=true; - shift ;; - --use-cuda=yes) - use_cuda=true; - shift ;; - --use-cuda=no) - use_cuda=false; - shift ;; - --static-math) - static_math=true; - shift ;; - --static-math=yes) - static_math=true; - shift ;; - --static-math=no) - static_math=false; - shift ;; - --static-fst) - static_fst=true; - shift ;; - --static-fst=yes) - static_fst=true; - shift ;; - --static-fst=no) - static_fst=false; - shift ;; - --mkl-threading=sequential) - threaded_atlas=false; - mkl_threading=sequential; - shift ;; - --mkl-threading=*) - mkl_threading=`expr "X$1" : '[^=]*=\(.*\)'`; - threaded_atlas=true; - shift ;; - --fst-root=*) - FSTROOT=`read_dirname $1`; - shift ;; - --clapack-root=*) - CLAPACKROOT=`read_dirname $1`; - shift ;; - --openblas-root=*) - OPENBLASROOT=`read_dirname $1`; - shift ;; - --mkl-root=*) - MKLROOT=`read_dirname $1`; - shift ;; - --mkl-libdir=*) - MKLLIBDIR=`read_dirname $1`; - shift ;; - --speex-root=*) - SPEEXROOT=`read_dirname $1`; - shift ;; - --speex-libdir=*) - SPEEXLIBDIR=`read_dirname $1`; - shift ;; - --speex-incdir=*) - SPEEXINCLUDEDIR=`read_dirname $1`; - shift ;; - --omp-libdir=*) - OMPLIBDIR=`read_dirname $1`; - shift ;; - --mathlib=*) - MATHLIB=`expr "X$1" : '[^=]*=\(.*\)'`; - shift ;; - --cudatk-dir=*) - CUDATKDIR=`read_dirname $1`; - shift ;; #CUDA is used in src/cudamatrix and src/nnet{,bin} only - *) echo "Unknown argument: $1, exiting"; usage; exit 1 ;; - esac -done - -# The idea here is that if you change the configuration options from using -# CUDA to not using it, or vice versa, we want to recompile all parts of the -# code that may use a GPU. Touching this file is a way to force this. -touch cudamatrix/cu-common.h 2>/dev/null - -# If one of these variables is set, switch the external math library. -is_set $MKLLIBDIR && echo "Configuring KALDI to use MKL" && export MATHLIB="MKL" -is_set $MKLROOT && echo "Configuring KALDI to use MKL"&& export MATHLIB="MKL" -is_set $CLAPACKROOT && echo "Configuring KALDI to use CLAPACK"&& export MATHLIB="CLAPACK" -is_set $OPENBLASROOT && echo "Configuring KALDI to use OPENBLAS"&& export MATHLIB="OPENBLAS" - # MKL functions function linux_configure_mkllibdir { local mklroot=$1 @@ -508,6 +375,11 @@ function configure_cuda { if [ ! -f $CUDATKDIR/bin/nvcc ]; then failure "Cannnot find nvcc in CUDATKDIR=$CUDATKDIR" fi + + if [[ "$TARGET_ARCH" != "`uname -m`" ]] ; then + failure "Cannot cross compile with CUDA support" + fi + echo "Using CUDA toolkit $CUDATKDIR (nvcc compiler and runtime libraries)" echo >> kaldi.mk echo "# CUDA configuration" >> kaldi.mk @@ -532,7 +404,7 @@ function configure_cuda { echo "CUDA_ARCH = $CUDA_ARCH" >> kaldi.mk echo >> kaldi.mk - # 64bit/32bit? + # 64bit/32bit? We do not support cross compilation with CUDA so, use direct calls to uname -m here if [ "`uname -m`" == "x86_64" ]; then if [ "`uname`" == "Darwin" ]; then sed 's/lib64/lib/g' < makefiles/cuda_64bit.mk >> kaldi.mk @@ -556,7 +428,7 @@ function linux_configure_speex { # Check whether the user has called tools/extras/install_speex.sh or not [ ! -z "$SPEEXROOT" ] || SPEEXROOT=`pwd`/../tools/speex [ ! -z "$SPEEXLIBDIR" ] || SPEEXLIBDIR="$SPEEXROOT"/lib - [ ! -z "$SPEEXINCLUDEDIR" ] || SPEEXINCLUDEDIR="$SPEEXROOT"/include + [ ! -z "$SPEEXINCDIR" ] || SPEEXINCDIR="$SPEEXROOT"/include static_speex=$1 if [ "foo"$static_speex == "foo" ]; then static_speex=false @@ -573,9 +445,9 @@ function linux_configure_speex { return fi - if [ -f $SPEEXINCLUDEDIR/speex/speex.h ]; then + if [ -f $SPEEXINCDIR/speex/speex.h ]; then echo >> kaldi.mk - echo CXXFLAGS += -DHAVE_SPEEX -I${SPEEXINCLUDEDIR} >> kaldi.mk + echo CXXFLAGS += -DHAVE_SPEEX -I${SPEEXINCDIR} >> kaldi.mk if $static_speex; then echo LDLIBS += $SPEEXLIBDIR/libspeex.a @@ -594,12 +466,12 @@ function linux_atlas_failure { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = [somewhere]/liblapack.a [somewhere]/libcblas.a [somewhere]/libatlas.a [somewhere]/libf77blas.a $ATLASLIBDIR >> kaldi.mk echo >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then - cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then - cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk + if [[ "$TARGET_ARCH" == arm* ]]; then + cat makefiles/linux_atlas_arm.mk >> kaldi.mk + elif [[ "$TARGET_ARCH" == ppc64le ]]; then + cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else - cat makefiles/linux_atlas.mk >> kaldi.mk + cat makefiles/linux_atlas.mk >> kaldi.mk fi echo "** $* ***" echo "** ERROR **" @@ -625,7 +497,7 @@ function linux_check_static { if [ -f $dir/libatlas.a ]; then # candidate... # Note: on the next line, the variable assignment # LANG=en_US should apply just to the program called on that line. - if LANG=en_US gcc -o test_linking test_linking.cc -u ATL_flushcache $dir/libatlas.a 2>&1 | grep -i "incompatible" >/dev/null; then + if LANG=en_US $CXX -o test_linking test_linking.cc -u ATL_flushcache $dir/libatlas.a 2>&1 | grep -i "incompatible" >/dev/null; then echo "Directory $dir may contain ATLAS libraries but seems to be wrong architecture"; rm test_linking test_linking.cc 2>/dev/null return 1; @@ -651,9 +523,9 @@ function linux_configure_debian_ubuntu { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk echo >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk @@ -675,9 +547,9 @@ function linux_configure_debian_ubuntu3 { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk echo >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk @@ -701,9 +573,9 @@ function linux_configure_debian7 { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk echo >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk @@ -724,9 +596,9 @@ function linux_configure_redhat { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk echo >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk @@ -749,9 +621,9 @@ function linux_configure_redhat_fat { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS -Wl,-rpath=$libdir >> kaldi.mk echo >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk @@ -806,9 +678,9 @@ function linux_configure_static { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk echo >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk @@ -890,9 +762,9 @@ function linux_configure_dynamic { echo ATLASINC = $ATLASROOT/include >> kaldi.mk echo ATLASLIBS = $ATLASLIBS >> kaldi.mk echo >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_atlas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_atlas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_atlas.mk >> kaldi.mk @@ -902,6 +774,234 @@ function linux_configure_dynamic { echo "Successfully configured for Linux [dynamic libraries] with ATLASLIBS =$ATLASLIBS" } +############################# CONFIGURATION ############################# + +# If configuration sets any of these variables, we will switch the external +# math library. Here we unset them so that we can check later. +unset MKLROOT +unset CLAPACKROOT +unset OPENBLASROOT +unset MKLLIBDIR + +# This variable identifies the type of system where built programs and +# libraries will run. It is set by the configure script when cross compiling. +unset HOST + +# These environment variables can be used to override the default toolchain. +CXX=${CXX:-g++} +AR=${AR:-ar} +AS=${AS:-as} +RANLIB=${RANLIB:-ranlib} + +# These environment variables can be used to provide additional flags to the +# compiler/linker. We want these flags to override the flags determined by the +# configure script, so we append them to the appropriate variables (CXXFLAGS, +# LDFLAGS and LDLIBS) after those variables are set by the configure script. +ENV_CXXFLAGS=$CXXFLAGS +ENV_LDFLAGS=$LDFLAGS +ENV_LDLIBS=$LDLIBS + +# Default configuration +double_precision=false +dynamic_kaldi=false +use_cuda=true +static_fst=false +static_math=false +threaded_atlas=false +mkl_threading=sequential +android=false + +MATHLIB='ATLAS' +ATLASROOT=`rel2abs ../tools/ATLAS/` +FSTROOT=`rel2abs ../tools/openfst` + +# Save the command line to include in kaldi.mk +cmd_line="$0 $@" + +while [ $# -gt 0 ]; +do + case "$1" in + --help) + usage; exit 0 ;; + --version) + echo $CONFIGURE_VERSION; exit 0 ;; + --static) + dynamic_kaldi=false; + static_math=true; + static_fst=true; + shift ;; + --shared) + dynamic_kaldi=true; + static_math=false; + static_fst=false; + shift ;; + --double-precision) + double_precision=true; + shift ;; + --double-precision=yes) + double_precision=true; + shift ;; + --double-precision=no) + double_precision=false; + shift ;; + --atlas-root=*) + ATLASROOT=`read_dirname $1`; + shift ;; + --threaded-atlas) + threaded_atlas=true; + shift ;; + --threaded-atlas=yes) + threaded_atlas=true; + shift ;; + --threaded-atlas=no) + threaded_atlas=false; + shift ;; + --threaded-math) + threaded_atlas=true; + mkl_threading=iomp + shift ;; + --threaded-math=yes) + threaded_atlas=true; + mkl_threading=iomp + shift ;; + --threaded-math=no) + threaded_atlas=false; + mkl_threading=sequential + shift ;; + --use-cuda) + use_cuda=true; + shift ;; + --use-cuda=yes) + use_cuda=true; + shift ;; + --use-cuda=no) + use_cuda=false; + shift ;; + --static-math) + static_math=true; + shift ;; + --static-math=yes) + static_math=true; + shift ;; + --static-math=no) + static_math=false; + shift ;; + --static-fst) + static_fst=true; + shift ;; + --static-fst=yes) + static_fst=true; + shift ;; + --static-fst=no) + static_fst=false; + shift ;; + --mkl-threading=sequential) + threaded_atlas=false; + mkl_threading=sequential; + shift ;; + --mkl-threading=*) + mkl_threading=`expr "X$1" : '[^=]*=\(.*\)'`; + threaded_atlas=true; + shift ;; + --fst-root=*) + FSTROOT=`read_dirname $1`; + shift ;; + --clapack-root=*) + CLAPACKROOT=`read_dirname $1`; + shift ;; + --openblas-root=*) + OPENBLASROOT=`read_dirname $1`; + shift ;; + --mkl-root=*) + MKLROOT=`read_dirname $1`; + shift ;; + --mkl-libdir=*) + MKLLIBDIR=`read_dirname $1`; + shift ;; + --speex-root=*) + SPEEXROOT=`read_dirname $1`; + shift ;; + --speex-libdir=*) + SPEEXLIBDIR=`read_dirname $1`; + shift ;; + --speex-incdir=*) + SPEEXINCDIR=`read_dirname $1`; + shift ;; + --omp-libdir=*) + OMPLIBDIR=`read_dirname $1`; + shift ;; + --mathlib=*) + MATHLIB=`expr "X$1" : '[^=]*=\(.*\)'`; + shift ;; + --cudatk-dir=*) + CUDATKDIR=`read_dirname $1`; + shift ;; #CUDA is used in src/cudamatrix and src/nnet{,bin} only + --fst-version=*) + OPENFST_VER=`expr "X$1" : '[^=]*=\(.*\)'`; + shift;; + --host=*) + # The type of system where built programs and libraries will run. + # It should be in the format cpu-vendor-os. If specified, this script + # will infer the target architecture from the specified host triple. + HOST=`expr "X$1" : '[^=]*=\(.*\)'`; + shift ;; + --android-incdir=*) + android=true; + threaded_math=false; + static_math=true; + static_fst=true; + dynamic_kaldi=false; + MATHLIB='OPENBLAS'; + ANDROIDINCDIR=`read_dirname $1`; + shift;; + *) echo "Unknown argument: $1, exiting"; usage; exit 1 ;; + esac +done + +# The idea here is that if you change the configuration options from using +# CUDA to not using it, or vice versa, we want to recompile all parts of the +# code that may use a GPU. Touching this file is a way to force this. +touch cudamatrix/cu-common.h 2>/dev/null + +if $android && [[ "$CXX" != *clang++* ]] ; then + failure "Android build requires clang++. Make sure you have clang++ installed + on your system and then override the default compiler by setting CXX, e.g. + CXX=clang++ ./configure" +fi + +# If HOST is set +# 1. We prepend it to CXX, AR, AS and RANLIB. +# 2. We parse the target architecture from the HOST triple. +# Otherwise we set the target architecture to the output of `uname -m`. +if is_set $HOST; then + CXX="$HOST-$CXX" + AR="$HOST-$AR" + AS="$HOST-$AS" + RANLIB="$HOST-$RANLIB" + + # The host triple will be something like "armv8-rpi3-linux-gnueabihf". We + # need the first field which is the target architecture for this build. The + # following command will take the host triple "armv8-rpi3-linux-gnueabihf" + # and return ["armv8", "rpi3", "linux", "gnueabihf"] in PARTS. + IFS='-' read -ra PARTS <<< "$HOST" + # The first field in the PARTS list is the target architecture. + TARGET_ARCH="$PARTS" + if [[ "$TARGET_ARCH" != arm* && "$TARGET_ARCH" != ppc64le && "$TARGET_ARCH" != x86* ]] ; then + # We currently only support building for x86[_64], arm*, and ppc64le. + # If TARGET_ARCH was read from the HOST variable, it must be one of these. + failure "$TARGET_ARCH is not a supported architecture. + Supported architectures: x86[_64], arm*, ppc64le." + fi +else + TARGET_ARCH="`uname -m`" +fi + +# If one of these variables is set, we switch the external math library. +is_set $MKLLIBDIR && echo "Configuring KALDI to use MKL" && export MATHLIB="MKL" +is_set $MKLROOT && echo "Configuring KALDI to use MKL"&& export MATHLIB="MKL" +is_set $CLAPACKROOT && echo "Configuring KALDI to use CLAPACK"&& export MATHLIB="CLAPACK" +is_set $OPENBLASROOT && echo "Configuring KALDI to use OPENBLAS"&& export MATHLIB="OPENBLAS" + echo "Configuring ..." # Back up the old kaldi.mk in case we modified it @@ -910,13 +1010,24 @@ if [ -f kaldi.mk ]; then cp kaldi.mk kaldi.mk.bak fi -echo "Checking compiler $CXX ..." -check_compiler $CXX - -printf "# This file was generated using the following command:\n# $cmd_line\n\n" > kaldi.mk +# Generate the new kaldi.mk file +echo "# This file was generated using the following command:" > kaldi.mk +echo "# $cmd_line" >> kaldi.mk +echo >> kaldi.mk echo "CONFIGURE_VERSION := $CONFIGURE_VERSION" >> kaldi.mk echo >> kaldi.mk +echo "# Toolchain configuration" >> kaldi.mk +echo >> kaldi.mk +echo "CXX = $CXX" >> kaldi.mk +echo "AR = $AR" >> kaldi.mk +echo "AS = $AS" >> kaldi.mk +echo "RANLIB = $RANLIB" >> kaldi.mk +echo >> kaldi.mk + +echo "Checking compiler $CXX ..." +check_compiler $CXX + echo "# Base configuration" >> kaldi.mk echo >> kaldi.mk if $dynamic_kaldi ; then @@ -934,13 +1045,13 @@ if [ ! -f $FSTROOT/include/fst/fst.h ]; then failure "Could not find file $FSTROOT/include/fst/fst.h: you may not have installed OpenFst. See ../tools/INSTALL" fi -OPENFST_VER=$(grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::') +OPENFST_VER=${OPENFST_VER:-$(grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::')} OPENFST_VER_NUM=$(echo $OPENFST_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d") if [ $OPENFST_VER_NUM -lt 10600 ]; then failure "OpenFst-$OPENFST_VER is not supported. You need OpenFst >= 1.6.0.)" fi echo "OPENFSTINC = $FSTROOT/include" >> kaldi.mk -if $static_fst ; then +if $static_fst ; then OPENFSTLIBS="$FSTROOT/lib/libfst.a" else if [ "`uname`" == "Darwin" ]; then @@ -950,7 +1061,7 @@ else OPENFSTLIBS="$FSTROOT/lib/libfst.so" OPENFSTLDFLAGS="-Wl,-rpath=${FSTROOT}/lib" else - failure "Dynamic libraries not supported on this platform. + failure "Dynamic libraries are not supported on this platform. Run configure with --static --static-fst=no flag." fi fi @@ -959,16 +1070,43 @@ if [ ! -f "$OPENFSTLIBS" ]; then fi echo "OPENFSTLIBS = $OPENFSTLIBS" >> kaldi.mk echo "OPENFSTLDFLAGS = $OPENFSTLDFLAGS" >> kaldi.mk -echo "CXX = $CXX" >> kaldi.mk echo >> kaldi.mk -# Most of the OS-specific steps below will append to kaldi.mk +# OS-specific steps given below append to kaldi.mk echo "Doing OS specific configurations ..." -# Check for Darwin at first, because we later call uname -o (for Cygwin) -# which crashes on Darwin. Also the linear algebra libraries on Macs are -# used differently (through the Accelerate framework) than on Linux. -if [ "`uname`" == "Darwin" ]; then +if $android ; then + if [ -z $ANDROIDINCDIR ] ; then + failure "--android-incdir must be specified for android builds." + fi + + if ! is_set $HOST; then + failure "HOST must be specified for android builds." + fi + + OPENBLASROOT=`rel2abs "$OPENBLASROOT"` + if [ -z "$OPENBLASROOT" ]; then + failure "The location of OPENBLAS must be specified for android builds + using --openblas-root (and it must exist)" + fi + if [ ! -f $OPENBLASROOT/lib/libopenblas.a ]; then + failure "Expected to find the file $OPENBLASROOT/lib/libopenblas.a" + fi + echo "Using OpenBLAS as the linear algebra library." + + OPENBLASLIBS="$OPENBLASROOT/lib/libopenblas.a $OPENBLASROOT/lib/libclapack.a $OPENBLASROOT/lib/liblapack.a $OPENBLASROOT/lib/libblas.a $OPENBLASROOT/lib/libf2c.a" + echo "OPENBLASINC = $OPENBLASROOT/include" >> kaldi.mk + echo "OPENBLASLIBS = $OPENBLASLIBS" >> kaldi.mk + echo "ANDROIDINCDIR = $ANDROIDINCDIR" >> kaldi.mk + + cat makefiles/android_openblas.mk >> kaldi.mk + + echo "Successfully configured for Android with OpenBLAS from $OPENBLASROOT." + +elif [ "`uname`" == "Darwin" ]; then + # Check for Darwin first, because we later call uname -o (for Cygwin) + # which crashes on Darwin. + echo "On Darwin: Checking for Accelerate framework ..." if [ ! -e /System/Library/Frameworks/Accelerate.framework ]; then failure "Need the Accelerate framework to compile on Darwin." @@ -1054,7 +1192,7 @@ elif [ "`uname`" == "Linux" ]; then fi elif [ "$MATHLIB" == "MKL" ]; then - if [ "`uname -m`" != "x86_64" ]; then + if [ "$TARGET_ARCH" != "x86_64" ]; then failure "MKL on Linux only supported for Intel(R) 64 architecture (x86_64). See makefiles/linux_64_mkl.mk to manually configure for other platforms." fi @@ -1118,7 +1256,7 @@ elif [ "`uname`" == "Linux" ]; then if [ ! -f makefiles/linux_clapack.mk ]; then failure "makefiles/linux_clapack.mk not found." fi - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_clapack_arm.mk >> kaldi.mk else cat makefiles/linux_clapack.mk >> kaldi.mk @@ -1147,9 +1285,9 @@ elif [ "`uname`" == "Linux" ]; then echo "OPENBLASINC = $OPENBLASROOT/include" >> kaldi.mk echo "OPENBLASLIBS = $OPENBLASLIBS" >> kaldi.mk echo >> kaldi.mk - if [[ "`uname -m`" == arm* ]]; then + if [[ "$TARGET_ARCH" == arm* ]]; then cat makefiles/linux_openblas_arm.mk >> kaldi.mk - elif [[ "`uname -m`" == ppc64le ]]; then + elif [[ "$TARGET_ARCH" == ppc64le ]]; then cat makefiles/linux_openblas_ppc64le.mk >> kaldi.mk else cat makefiles/linux_openblas.mk >> kaldi.mk @@ -1163,8 +1301,7 @@ elif [ "`uname`" == "Linux" ]; then fi else failure "Could not detect the platform or we have not yet worked out the - appropriate configuration for this platform. - Please contact the developers." + appropriate configuration for this platform. Please contact the developers." fi # Append the flags set by environment variables last so they can be used diff --git a/src/makefiles/android_openblas.mk b/src/makefiles/android_openblas.mk new file mode 100644 index 00000000000..c8f60f4fa4f --- /dev/null +++ b/src/makefiles/android_openblas.mk @@ -0,0 +1,42 @@ +# OpenBLAS specific Android configuration + +ifndef DOUBLE_PRECISION +$(error DOUBLE_PRECISION not defined.) +endif +ifndef OPENFSTINC +$(error OPENFSTINC not defined.) +endif +ifndef OPENFSTLIBS +$(error OPENFSTLIBS not defined.) +endif +ifndef OPENBLASINC +$(error OPENBLASINC not defined.) +endif +ifndef OPENBLASLIBS +$(error OPENBLASLIBS not defined.) +endif +ifndef ANDROIDINC +$(error ANDROIDINC not defined.) +endif + +COMPILER = $(shell $(CXX) -v 2>&1) +ifneq ($(findstring clang,$(COMPILER)),clang) +$(error Android build does not support compiling with $(CXX). + Supported compilers: clang++) +endif + +CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ + -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ + -Wno-deprecated-declarations -Winit-self -Wno-mismatched-tags \ + -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ + -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -DANDROID_BUILD \ + -I$(OPENBLASINC) -I$(ANDROIDINC) -ftree-vectorize -mfloat-abi=hard \ + -mfpu=neon -mhard-float -D_NDK_MATH_NO_SOFTFP=1 -pthread \ + -g # -O0 -DKALDI_PARANOID + +ifeq ($(KALDI_FLAVOR), dynamic) +CXXFLAGS += -fPIC +endif + +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -Wl,--no-warn-mismatch -pie +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm_hard -ldl diff --git a/src/makefiles/cygwin.mk b/src/makefiles/cygwin.mk index e5657818ce5..c58cd3a42da 100644 --- a/src/makefiles/cygwin.mk +++ b/src/makefiles/cygwin.mk @@ -26,7 +26,3 @@ LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -g \ --enable-auto-import -L/usr/lib/lapack LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) -lcyglapack-0 -lcygblas-0 \ -lm -lpthread -ldl - -AR = ar -AS = as -RANLIB = ranlib diff --git a/src/makefiles/darwin.mk b/src/makefiles/darwin.mk index 24fbdca890f..dffcc878083 100644 --- a/src/makefiles/darwin.mk +++ b/src/makefiles/darwin.mk @@ -25,10 +25,6 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -g LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) -framework Accelerate -lm -lpthread -ldl -AR = ar -AS = as -RANLIB = ranlib - # Compiler specific flags COMPILER = $(shell $(CXX) -v 2>&1) ifeq ($(findstring clang,$(COMPILER)),clang) diff --git a/src/makefiles/linux_atlas.mk b/src/makefiles/linux_atlas.mk index 929461831df..b30c7ad5474 100644 --- a/src/makefiles/linux_atlas.mk +++ b/src/makefiles/linux_atlas.mk @@ -30,7 +30,3 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl - -AR = ar -AS = as -RANLIB = ranlib diff --git a/src/makefiles/linux_atlas_arm.mk b/src/makefiles/linux_atlas_arm.mk index 9b9c42257fb..35e98da51d7 100644 --- a/src/makefiles/linux_atlas_arm.mk +++ b/src/makefiles/linux_atlas_arm.mk @@ -30,7 +30,3 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl - -AR = ar -AS = as -RANLIB = ranlib diff --git a/src/makefiles/linux_atlas_ppc64le.mk b/src/makefiles/linux_atlas_ppc64le.mk index a0c22927f2e..a5962f7964b 100644 --- a/src/makefiles/linux_atlas_ppc64le.mk +++ b/src/makefiles/linux_atlas_ppc64le.mk @@ -31,7 +31,3 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl - -AR = ar -AS = as -RANLIB = ranlib diff --git a/src/makefiles/linux_clapack.mk b/src/makefiles/linux_clapack.mk index 95c58d0ec22..87e016aae5b 100644 --- a/src/makefiles/linux_clapack.mk +++ b/src/makefiles/linux_clapack.mk @@ -24,7 +24,3 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl - -AR = ar -AS = as -RANLIB = ranlib diff --git a/src/makefiles/linux_clapack_arm.mk b/src/makefiles/linux_clapack_arm.mk index 2b15193046b..d21e640d3c1 100644 --- a/src/makefiles/linux_clapack_arm.mk +++ b/src/makefiles/linux_clapack_arm.mk @@ -24,7 +24,3 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl - -AR = ar -AS = as -RANLIB = ranlib diff --git a/src/makefiles/linux_openblas.mk b/src/makefiles/linux_openblas.mk index b7b74bff89a..d145c687438 100644 --- a/src/makefiles/linux_openblas.mk +++ b/src/makefiles/linux_openblas.mk @@ -30,7 +30,3 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl - -AR = ar -AS = as -RANLIB = ranlib diff --git a/src/makefiles/linux_openblas_arm.mk b/src/makefiles/linux_openblas_arm.mk index 344879580aa..29a91752509 100644 --- a/src/makefiles/linux_openblas_arm.mk +++ b/src/makefiles/linux_openblas_arm.mk @@ -30,7 +30,3 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl - -AR = ar -AS = as -RANLIB = ranlib diff --git a/src/makefiles/linux_openblas_ppc64le.mk b/src/makefiles/linux_openblas_ppc64le.mk index 9225f4922f0..6550d915c6c 100644 --- a/src/makefiles/linux_openblas_ppc64le.mk +++ b/src/makefiles/linux_openblas_ppc64le.mk @@ -31,7 +31,3 @@ endif LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl - -AR = ar -AS = as -RANLIB = ranlib diff --git a/src/makefiles/linux_x86_64_mkl.mk b/src/makefiles/linux_x86_64_mkl.mk index 595557a5ef4..50b4047def7 100644 --- a/src/makefiles/linux_x86_64_mkl.mk +++ b/src/makefiles/linux_x86_64_mkl.mk @@ -58,7 +58,3 @@ MKL_DYN_MUL = -L$(MKLLIB) -lmkl_solver_lp64 -Wl,--start-group -lmkl_intel_lp64 \ LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(MKLFLAGS) -lm -lpthread -ldl - -AR = ar -AS = as -RANLIB = ranlib diff --git a/src/thread/kaldi-barrier.h b/src/thread/kaldi-barrier.h index 4c64726dc7a..b4b5658c629 100644 --- a/src/thread/kaldi-barrier.h +++ b/src/thread/kaldi-barrier.h @@ -55,3 +55,12 @@ class Barrier { #endif // KALDI_THREAD_KALDI_BARRIER_H_ +/* + * Android does not support cancelling pthreads so the following symbols are not defined. + * Define them here, they can be a noop because we cannot cancel a pthread on Android. + */ +#ifdef ANDROID_BUILD +#define PTHREAD_CANCEL_STATE 0 +#define pthread_setcancelstate(a, b) do { } while(0) +#endif + diff --git a/windows/INSTALL.md b/windows/INSTALL.md index 770844520d2..9edcee65144 100644 --- a/windows/INSTALL.md +++ b/windows/INSTALL.md @@ -157,13 +157,17 @@ for their processors. It isn't free, but you can get [Community Licensing for In For example, for a build using OpenBLAS and VS 2015 you would run: - (kaldi)/tools$ generate_solution.pl --vsver vs2015 --enable-openblas + (kaldi)/windows$ generate_solution.pl --vsver vs2015 --enable-openblas Another example, for OpenBLAS, VS 2013 and CUDA support: - (kaldi)/tools$ generate_solution.pl --vsver vs2013 --enable-cuda --enable-openblas + (kaldi)/windows$ generate_solution.pl --vsver vs2013 --enable-cuda --enable-openblas -16. Open the generated solution in the visual studio and switch to **Debug|x64** (or **Release|x64**) and build. +13. Run the script (kaldi)/windows/get_version.pl: + + (kaldi)/windows$ get_version.pl + +17. Open the generated solution in the visual studio and switch to **Debug|x64** (or **Release|x64**) and build. Expect 10 projects to fail, majority of them will fail because of missing include `portaudio.h` ------ diff --git a/windows/get_version.pl b/windows/get_version.pl new file mode 100755 index 00000000000..2a54891516a --- /dev/null +++ b/windows/get_version.pl @@ -0,0 +1,44 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2017 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; + +open(my $F, "<", "../src/.version") or do { + print "$!\n"; + print "The file ../src/.version does not exist\n"; + print "Either you are not running this script from within\n"; + print "the windows/ directory or you have accidently \n"; + print "delete the file\n"; + exit 1; +}; + +open(my $H, ">", "../src/base/version.h") or do { + print "$!\n"; + print "Could not write to ../src/base/version.h\n"; + print "Either you are not running this script from within\n"; + print "the windows/ directory or there were some other \n"; + print "issues\n"; + exit 1; +}; + +my $kaldi_ver=<$F>; chomp $kaldi_ver; +print $H "KALDI_VERSION=${kaldi_ver}-win\n"; +close($F); +close($H); diff --git a/windows/variables.props.dev b/windows/variables.props.dev index c8063dc1841..837b3f999a9 100644 --- a/windows/variables.props.dev +++ b/windows/variables.props.dev @@ -2,11 +2,14 @@ + + C:\Program Files (x86)\Intel\Composer XE\mkl C:\Users\Yenda\Downloads\kaldi-svn\tools\OpenBLAS-v0.2.14-Win64-int32 C:\Users\Yenda\Downloads\kaldi-svn\tools\pthread-win32\Pre-built.2 C:\Users\Yenda\Downloads\kaldi-svn\tools\openfstwin-1.3.4\ C:\Users\Yenda\Downloads\kaldi-svn\tools\openfstwin-1.3.4\MSVC12\x64\ + From dc454cc0ad936e403d7cb1a9648dc5661683c120 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Wed, 8 Feb 2017 13:49:36 -0500 Subject: [PATCH 411/530] [egs,scripts,src] Add BABEL s5d recipe; various associated fixes (#1356) * Creating a new recipe directory * adding lists * Improvements in the pipeline, fixes, syllab search * Transplanting the diff to s5d * added TDNN, LSTM and BLSTM scripts. added Telugu conf files. * added blstm script and top level commands * improved keyword search, new lang configs * removing not needed scripts * added blstm results * some keyword-search optimization binaries * removing some extra files + kwsearch pipeline improvement * adding configs for the OP3 langs * configs for the rest of the OP3 langs * Added updated configs for IndusDB.20151208.Babel.tar.bz2 * fixes of the pipeline, added langp (re)estimation * adding the kaldi-native search pipeline and a bunch of changes related to this * removing extra files * A couple of fixes * KWS improvements and fixes * Fixes of a couple of issues reported by Fred Richardson * A separate script for lexicon expansion * A couple of fixes and tweaks. Added checks for tools, especially sox. * adding a couple of changes -- new style options and results for BP langs * adding new results(still will need to be updated) * added langp and some details tweaked * updated STT results, new KWS results and a couple of small fixes all around * adding file lists for dev languages * miniature fixes and cleanups * one more batch of small fixes -- mostly whitespace cleanup * small fixes -- location of files and removal of trailing slash inn the pathname * enabling stage-2 KWS pipeline * adding some directories to .gitignore * some quick fixes * latest fixes * making the script split_compound_set to conform to the naming * some last minute fixes for the combination scoring * do not attempt to score when the scoring data is not available * bug fixes and --ntrue-from option * another batch of fixes * adding +x permission to split_compound_set.sh * fixing whitespaces * fixing whitespaces * a couple of fixes * adding the cleanup script and chain models training * adding the graphemic/unicode lexicon feature * adding the graphemic/unicode lexicon feature * fixing the the cc files headers, adding c info * use the user-provided kwset id, not the filename * use _cleaned affix * fixes w.r.t. getting chain models independent on other systems * small fixes as reported by Fred Richardson and Yenda * another issue reported by Fred Richarson * fixing KWS for the chain systems * fixes in the KWS hitlist combination * adding 40hrs pashto config and fixes for the unicode system * fixing some bugs as reported by Ni Chongjia (I2R) * fixing some bugs as reported by Fred Richardson * adding 40hrs Pashto OP3 setup * addressing Dan's comments, some further cleanup * improving the make_index script * remove fsts-scale * adding 'see also' to some of the fst tools * adding back accidentaly removed svn check --- .gitignore | 11 + egs/babel/s5/local/make_pitch.sh | 18 +- egs/babel/s5c/local/CHECKPOINT.sh | 18 +- egs/babel/s5c/local/ali_to_rttm.sh | 4 +- .../s5c/local/annotated_kwlist_to_KWs.pl | 4 +- egs/babel/s5c/local/apply_g2p.sh | 12 +- .../s5c/local/apply_map_tab_preserving.pl | 12 +- egs/babel/s5c/local/augment_original_stm.pl | 2 +- egs/babel/s5c/local/best_path_weights.sh | 18 +- egs/babel/s5c/local/check_models.sh | 2 +- egs/babel/s5c/local/check_wers.sh | 4 +- egs/babel/s5c/local/cmu_uem2kaldi_dir.sh | 28 +- egs/babel/s5c/local/create_shadow_dataset.sh | 16 +- egs/babel/s5c/local/cstr_ndx2flist.pl | 4 +- egs/babel/s5c/local/ctm2segments.pl | 20 +- egs/babel/s5c/local/datasets/basic_kws.sh | 10 +- egs/babel/s5c/local/datasets/extra_kws.sh | 37 +- .../s5c/local/datasets/supervised_pem.sh | 2 +- .../s5c/local/datasets/supervised_seg.sh | 4 +- .../s5c/local/datasets/supervised_uem.sh | 4 +- egs/babel/s5c/local/datasets/vocab_kws.sh | 22 +- egs/babel/s5c/local/extend_lexicon.sh | 42 +- egs/babel/s5c/local/extract_oov_words.pl | 14 +- egs/babel/s5c/local/filter_kwslist.pl | 8 +- egs/babel/s5c/local/find_transcripts.pl | 4 +- egs/babel/s5c/local/fix_kwslist.pl | 2 +- .../s5c/local/generate_confusion_matrix.sh | 4 +- egs/babel/s5c/local/generate_example_kws.sh | 4 +- .../s5c/local/generate_proxy_keywords.sh | 42 +- egs/babel/s5c/local/get_syllable_text.sh | 77 - egs/babel/s5c/local/gridsearch.pl | 12 +- egs/babel/s5c/local/gridsearch2.pl | 12 +- egs/babel/s5c/local/kwords2indices.pl | 18 +- egs/babel/s5c/local/kws_combine.sh | 6 +- egs/babel/s5c/local/kws_data_prep.sh | 42 +- egs/babel/s5c/local/kws_data_prep_proxy.sh | 26 +- .../s5c/local/kws_data_prep_syllables.sh | 144 -- .../s5c/local/kws_gen_oracle_lattices.sh | 8 +- egs/babel/s5c/local/kws_oracle.sh | 20 +- egs/babel/s5c/local/kws_score_f4de.sh | 9 +- egs/babel/s5c/local/kws_search.sh | 8 +- egs/babel/s5c/local/kws_setup.sh | 14 +- egs/babel/s5c/local/lattice_to_ctm.sh | 47 +- .../s5c/local/lattice_to_ctm_syllable.sh | 115 -- egs/babel/s5c/local/make_L_align.sh | 4 +- egs/babel/s5c/local/make_ecf_subset.sh | 4 +- .../s5c/local/make_lexicon_fst_special.pl | 2 +- egs/babel/s5c/local/make_lexicon_subset.sh | 6 +- egs/babel/s5c/local/make_syllable_lexicon.sh | 72 - egs/babel/s5c/local/naive_comb.pl | 4 +- egs/babel/s5c/local/ndx2flist.pl | 4 +- .../local/nist_eval/create_compound_set.sh | 2 +- .../s5c/local/nist_eval/export_systems.sh | 8 +- egs/babel/s5c/local/nist_eval/filter_data.sh | 2 +- .../s5c/local/nist_eval/get_training_times.sh | 12 +- egs/babel/s5c/local/nist_eval/make_release.sh | 18 +- .../local/nnet2/get_egs_semi_supervised.sh | 28 +- egs/babel/s5c/local/normalize_transcript.pl | 6 +- .../local/prepare_acoustic_training_data.pl | 16 +- egs/babel/s5c/local/prepare_lexicon.pl | 22 +- egs/babel/s5c/local/prepare_stm.pl | 6 +- .../local/resegment/evaluate_segmentation.pl | 2 +- .../s5c/local/resegment/generate_segments.sh | 24 +- egs/babel/s5c/local/rttm_to_text.pl | 8 +- egs/babel/s5c/local/run_kws_stt_task.sh | 30 +- egs/babel/s5c/local/score_combine.sh | 22 +- egs/babel/s5c/local/score_mbr.sh | 2 +- egs/babel/s5c/local/score_sctk_prune.sh | 14 +- egs/babel/s5c/local/score_stm.sh | 7 +- egs/babel/s5c/local/shadow_set_kws_search.sh | 20 +- egs/babel/s5c/local/split_ctms.sh | 6 +- egs/babel/s5c/local/stm2text.pl | 6 +- egs/babel/s5c/local/subset_atwv.pl | 6 +- egs/babel/s5c/local/subset_kwslist.pl | 2 +- egs/babel/s5c/local/summarize_logs.pl | 4 +- egs/babel/s5c/local/syllab/ali_to_syllabs.sh | 71 + .../s5c/local/syllab/create_syllables.pl | 154 ++ .../local/syllab/generate_syllable_lang.sh | 125 ++ .../local/syllab/map_prons_to_syllables.pl | 61 + egs/babel/s5c/local/train_g2p.sh | 4 +- egs/babel/s5c/local/train_lms_srilm.sh | 143 +- egs/babel/s5c/local/train_mmi_sgmm2.sh | 6 +- egs/babel/s5c/local/txt_to_rttm.pl | 4 +- egs/babel/s5c/local/uem_ctm2segments.pl | 8 +- egs/babel/s5c/results/RESULTS.105-turkish.flp | 29 + egs/babel/s5c/results/RESULTS.106-tagalog.flp | 34 + .../s5c/results/RESULTS.107-vietnamese.flp | 50 + egs/babel/s5c/run-1-main.sh | 2 +- egs/babel/s5c/run-4-anydecode.sh | 28 +- egs/babel/s5c/run-4b-anydecode-bnf.sh | 14 +- egs/babel/s5d/EXAMPLE.vietnamese | 116 ++ egs/babel/s5d/README.txt | 82 + egs/babel/s5d/RESULTS | 0 egs/babel/s5d/RESULTS.txt | 8 + egs/babel/s5d/RUN_UNICODE_SYSTEM | 9 + egs/babel/s5d/UNICODE_README | 119 ++ egs/babel/s5d/babel.html | 788 +++++++++ egs/babel/s5d/cmd.sh | 29 + egs/babel/s5d/conf/bnf/config_full.py | 61 + egs/babel/s5d/conf/bnf/config_limited.py | 62 + egs/babel/s5d/conf/common.fullLP | 124 ++ egs/babel/s5d/conf/common.limitedLP | 128 ++ .../s5d/conf/common.semisupervised.limitedLP | 27 + egs/babel/s5d/conf/common_vars.sh | 28 + egs/babel/s5d/conf/glm | 13 + .../lang/101-cantonese-fullLP.official.conf | 104 ++ .../101-cantonese-limitedLP.official.conf | 112 ++ .../lang/102-assamese-fullLP.official.conf | 105 ++ .../lang/102-assamese-limitedLP.official.conf | 114 ++ .../lang/103-bengali-fullLP.official.conf | 105 ++ .../lang/103-bengali-limitedLP.official.conf | 114 ++ .../104-pashto-fullLP-40hrs.official.conf | 114 ++ .../conf/lang/104-pashto-fullLP.official.conf | 114 ++ .../lang/104-pashto-limitedLP.official.conf | 110 ++ .../lang/105-turkish-fullLP.official.conf | 111 ++ .../lang/105-turkish-limitedLP.official.conf | 111 ++ .../lang/106-tagalog-fullLP.official.conf | 108 ++ .../lang/106-tagalog-limitedLP.official.conf | 108 ++ .../lang/107-vietnamese-fullLP.official.conf | 107 ++ .../107-vietnamese-limitedLP.official.conf | 115 ++ .../lang/201-haitian-fullLP.official.conf | 80 + .../lang/201-haitian-limitedLP.official.conf | 89 + .../conf/lang/202-swahili.FLP.official.conf | 93 + .../conf/lang/202-swahili.LLP.official.conf | 99 ++ .../conf/lang/203-lao-fullLP.official.conf | 101 ++ .../conf/lang/203-lao-limitedLP.official.conf | 110 ++ .../conf/lang/204-tamil-fullLP.official.conf | 112 ++ .../lang/204-tamil-limitedLP.official.conf | 122 ++ .../conf/lang/205-kurmanji.FLP.official.conf | 94 + .../conf/lang/205-kurmanji.LLP.official.conf | 100 ++ .../conf/lang/206-zulu-fullLP.official.conf | 129 ++ .../lang/206-zulu-limitedLP.official.conf | 126 ++ .../conf/lang/207-tokpisin.FLP.official.conf | 93 + .../conf/lang/207-tokpisin.LLP.official.conf | 99 ++ .../conf/lang/301-cebuano.FLP.official.conf | 100 ++ .../conf/lang/301-cebuano.LLP.official.conf | 106 ++ .../conf/lang/302-kazakh.FLP.official.conf | 101 ++ .../conf/lang/302-kazakh.LLP.official.conf | 107 ++ .../conf/lang/303-telugu.FLP.official.conf | 100 ++ .../conf/lang/303-telugu.LLP.official.conf | 107 ++ .../lang/304-lithuanian.FLP.official.conf | 100 ++ .../lang/304-lithuanian.LLP.official.conf | 106 ++ .../conf/lang/305-guarani.FLP.official.conf | 45 + .../conf/lang/305-guarani.LLP.official.conf | 51 + .../s5d/conf/lang/306-igbo.FLP.official.conf | 45 + .../s5d/conf/lang/306-igbo.LLP.official.conf | 51 + .../conf/lang/307-amharic.FLP.official.conf | 46 + .../conf/lang/307-amharic.LLP.official.conf | 52 + .../conf/lang/401-mongolian.FLP.official.conf | 46 + .../conf/lang/401-mongolian.LLP.official.conf | 52 + .../conf/lang/402-javanese.FLP.official.conf | 47 + .../conf/lang/402-javanese.LLP.official.conf | 51 + .../conf/lang/403-dholuo.FLP.official.conf | 45 + .../conf/lang/403-dholuo.LLP.official.conf | 51 + .../s5d/conf/lists/101-cantonese/dev.list | 120 ++ .../s5d/conf/lists/101-cantonese/eval.list | 220 +++ .../conf/lists/101-cantonese/evalpart1.list | 63 + .../lists/101-cantonese/train.FullLP.list | 965 +++++++++++ .../lists/101-cantonese/train.LimitedLP.list | 120 ++ .../s5d/conf/lists/102-assamese/dev.list | 126 ++ .../s5d/conf/lists/102-assamese/eval.list | 189 +++ .../conf/lists/102-assamese/evalpart1.list | 65 + .../conf/lists/102-assamese/train.FullLP.list | 790 +++++++++ .../lists/102-assamese/train.LimitedLP.list | 138 ++ .../train.LimitedLP.untranscribed.list | 652 +++++++ .../102-assamese/train.untranscribed.list | 259 +++ egs/babel/s5d/conf/lists/103-bengali/dev.list | 125 ++ .../s5d/conf/lists/103-bengali/eval.list | 193 +++ .../s5d/conf/lists/103-bengali/evalpart1.list | 66 + .../conf/lists/103-bengali/train.FullLP.list | 751 ++++++++ .../lists/103-bengali/train.LimitedLP.list | 124 ++ .../train.LimitedLP.untranscribed.list | 627 +++++++ .../103-bengali/train.untranscribed.list | 255 +++ egs/babel/s5d/conf/lists/104-pashto/dev.list | 143 ++ egs/babel/s5d/conf/lists/104-pashto/eval.list | 198 +++ .../s5d/conf/lists/104-pashto/evalpart1.list | 70 + .../conf/lists/104-pashto/train.40HrFLP.list | 512 ++++++ .../lists/104-pashto/train.LimitedLP.list | 131 ++ .../s5d/conf/lists/104-pashto/training.list | 1026 +++++++++++ egs/babel/s5d/conf/lists/105-turkish/dev.list | 127 ++ .../s5d/conf/lists/105-turkish/eval.list | 194 +++ .../s5d/conf/lists/105-turkish/evalpart1.list | 65 + .../conf/lists/105-turkish/train.FullLP.list | 993 +++++++++++ .../lists/105-turkish/train.LimitedLP.list | 128 ++ egs/babel/s5d/conf/lists/106-tagalog/dev.list | 146 ++ .../s5d/conf/lists/106-tagalog/eval.list | 241 +++ .../s5d/conf/lists/106-tagalog/evalpart1.list | 69 + .../conf/lists/106-tagalog/train.FullLP.list | 1138 +++++++++++++ .../lists/106-tagalog/train.LimitedLP.list | 134 ++ .../s5d/conf/lists/107-vietnamese/dev.list | 132 ++ .../s5d/conf/lists/107-vietnamese/eval.list | 981 +++++++++++ .../conf/lists/107-vietnamese/evalpart1.list | 194 +++ .../lists/107-vietnamese/train.FullLP.list | 1042 ++++++++++++ .../lists/107-vietnamese/train.LimitedLP.list | 126 ++ .../train.LimitedLP.untranscribed.list | 916 ++++++++++ egs/babel/s5d/conf/lists/201-haitian/dev.list | 126 ++ .../s5d/conf/lists/201-haitian/eval.list | 194 +++ .../s5d/conf/lists/201-haitian/evalpart1.list | 64 + .../conf/lists/201-haitian/train.FullLP.list | 760 +++++++++ .../lists/201-haitian/train.LimitedLP.list | 126 ++ .../train.LimitedLP.untranscribed.list | 634 +++++++ .../201-haitian/train.untranscribed.list | 270 +++ egs/babel/s5d/conf/lists/202-swahili/dev.list | 142 ++ .../s5d/conf/lists/202-swahili/eval.list | 963 +++++++++++ .../s5d/conf/lists/202-swahili/evalpart1.list | 196 +++ .../s5d/conf/lists/202-swahili/sub-train.list | 128 ++ .../202-swahili/sub-train.untranscribed.list | 397 +++++ .../s5d/conf/lists/202-swahili/training.list | 525 ++++++ .../202-swahili/untranscribed-training.list | 555 ++++++ egs/babel/s5d/conf/lists/203-lao/dev.list | 131 ++ egs/babel/s5d/conf/lists/203-lao/eval.list | 192 +++ .../s5d/conf/lists/203-lao/evalpart1.list | 70 + .../s5d/conf/lists/203-lao/train.FullLP.list | 781 +++++++++ .../conf/lists/203-lao/train.LimitedLP.list | 127 ++ .../train.LimitedLP.untranscribed.list | 654 +++++++ .../lists/203-lao/train.untranscribed.list | 257 +++ egs/babel/s5d/conf/lists/204-tamil/dev.list | 125 ++ egs/babel/s5d/conf/lists/204-tamil/eval.list | 947 +++++++++++ .../s5d/conf/lists/204-tamil/evalpart1.list | 186 ++ .../conf/lists/204-tamil/train.FullLP.list | 778 +++++++++ .../conf/lists/204-tamil/train.LimitedLP.list | 125 ++ .../train.LimitedLP.untranscribed.list | 653 +++++++ .../lists/204-tamil/train.untranscribed.list | 269 +++ .../s5d/conf/lists/205-kurmanji/dev.list | 132 ++ .../s5d/conf/lists/205-kurmanji/eval.list | 193 +++ .../conf/lists/205-kurmanji/evalpart1.list | 63 + .../conf/lists/205-kurmanji/sub-train.list | 133 ++ .../205-kurmanji/sub-train.untranscribed.list | 399 +++++ .../s5d/conf/lists/205-kurmanji/training.list | 532 ++++++ .../205-kurmanji/untranscribed-training.list | 521 ++++++ egs/babel/s5d/conf/lists/206-zulu/dev.list | 141 ++ egs/babel/s5d/conf/lists/206-zulu/eval.list | 202 +++ .../s5d/conf/lists/206-zulu/evalpart1.list | 72 + .../s5d/conf/lists/206-zulu/train.FullLP.list | 829 +++++++++ .../conf/lists/206-zulu/train.LimitedLP.list | 124 ++ .../train.LimitedLP.untranscribed.list | 705 ++++++++ .../lists/206-zulu/train.untranscribed.list | 285 ++++ .../s5d/conf/lists/207-tokpisin/dev.list | 132 ++ .../s5d/conf/lists/207-tokpisin/eval.list | 192 +++ .../conf/lists/207-tokpisin/evalpart1.list | 64 + .../conf/lists/207-tokpisin/sub-train.list | 126 ++ .../207-tokpisin/sub-train.untranscribed.list | 380 +++++ .../s5d/conf/lists/207-tokpisin/training.list | 506 ++++++ .../207-tokpisin/untranscribed-training.list | 539 ++++++ egs/babel/s5d/conf/lists/301-cebuano/dev.list | 134 ++ .../s5d/conf/lists/301-cebuano/eval.list | 190 +++ .../s5d/conf/lists/301-cebuano/evalpart1.list | 62 + .../s5d/conf/lists/301-cebuano/sub-train.list | 126 ++ .../301-cebuano/sub-train.untranscribed.list | 376 ++++ .../s5d/conf/lists/301-cebuano/training.list | 502 ++++++ .../301-cebuano/untranscribed-training.list | 548 ++++++ egs/babel/s5d/conf/lists/302-kazakh/dev.list | 140 ++ egs/babel/s5d/conf/lists/302-kazakh/eval.list | 191 +++ .../s5d/conf/lists/302-kazakh/evalpart1.list | 61 + .../s5d/conf/lists/302-kazakh/sub-train.list | 130 ++ .../302-kazakh/sub-train.untranscribed.list | 398 +++++ .../s5d/conf/lists/302-kazakh/training.list | 528 ++++++ .../302-kazakh/untranscribed-training.list | 569 +++++++ .../s5d/conf/lists/303-telugu/dev.2h.list | 126 ++ egs/babel/s5d/conf/lists/303-telugu/dev.list | 126 ++ egs/babel/s5d/conf/lists/303-telugu/eval.list | 192 +++ .../s5d/conf/lists/303-telugu/evalpart1.list | 62 + .../s5d/conf/lists/303-telugu/sub-train.list | 134 ++ .../303-telugu/sub-train.untranscribed.list | 380 +++++ .../s5d/conf/lists/303-telugu/training.list | 514 ++++++ .../303-telugu/untranscribed-training.list | 501 ++++++ .../s5d/conf/lists/304-lithuanian/dev.2h.list | 122 ++ .../s5d/conf/lists/304-lithuanian/dev.list | 122 ++ .../s5d/conf/lists/304-lithuanian/eval.list | 192 +++ .../conf/lists/304-lithuanian/evalpart1.list | 60 + .../conf/lists/304-lithuanian/sub-train.list | 120 ++ .../sub-train.untranscribed.list | 364 ++++ .../conf/lists/304-lithuanian/training.list | 484 ++++++ .../untranscribed-training.list | 524 ++++++ .../s5d/conf/lists/305-guarani/dev.2h.list | 124 ++ egs/babel/s5d/conf/lists/305-guarani/dev.list | 124 ++ .../s5d/conf/lists/305-guarani/eval.list | 186 ++ .../s5d/conf/lists/305-guarani/sub-train.list | 134 ++ .../305-guarani/sub-train.untranscribed.list | 392 +++++ .../s5d/conf/lists/305-guarani/training.list | 526 ++++++ .../305-guarani/untranscribed-training.list | 525 ++++++ egs/babel/s5d/conf/lists/306-igbo/dev.2h.list | 136 ++ egs/babel/s5d/conf/lists/306-igbo/dev.list | 136 ++ egs/babel/s5d/conf/lists/306-igbo/eval.list | 194 +++ .../s5d/conf/lists/306-igbo/sub-train.list | 132 ++ .../306-igbo/sub-train.untranscribed.list | 380 +++++ .../s5d/conf/lists/306-igbo/training.list | 512 ++++++ .../306-igbo/untranscribed-training.list | 537 ++++++ .../s5d/conf/lists/307-amharic/dev.2h.list | 123 ++ egs/babel/s5d/conf/lists/307-amharic/dev.list | 123 ++ .../s5d/conf/lists/307-amharic/eval.list | 186 ++ .../s5d/conf/lists/307-amharic/sub-train.list | 122 ++ .../307-amharic/sub-train.untranscribed.list | 364 ++++ .../s5d/conf/lists/307-amharic/training.list | 486 ++++++ .../307-amharic/untranscribed-training.list | 568 +++++++ .../s5d/conf/lists/401-mongolian/dev.2h.list | 124 ++ .../s5d/conf/lists/401-mongolian/dev.list | 124 ++ .../s5d/conf/lists/401-mongolian/eval.list | 186 ++ .../conf/lists/401-mongolian/sub-train.list | 126 ++ .../sub-train.untranscribed.list | 392 +++++ .../conf/lists/401-mongolian/training.list | 518 ++++++ .../401-mongolian/untranscribed-training.list | 530 ++++++ .../s5d/conf/lists/402-javanese/dev.2h.list | 122 ++ .../s5d/conf/lists/402-javanese/dev.list | 122 ++ .../s5d/conf/lists/402-javanese/eval.list | 188 ++ .../conf/lists/402-javanese/sub-train.list | 122 ++ .../402-javanese/sub-train.untranscribed.list | 370 ++++ .../s5d/conf/lists/402-javanese/training.list | 492 ++++++ .../402-javanese/untranscribed-training.list | 519 ++++++ .../s5d/conf/lists/403-dholuo/dev.2h.list | 122 ++ egs/babel/s5d/conf/lists/403-dholuo/dev.list | 122 ++ egs/babel/s5d/conf/lists/403-dholuo/eval.list | 182 ++ .../s5d/conf/lists/403-dholuo/sub-train.list | 122 ++ .../403-dholuo/sub-train.untranscribed.list | 380 +++++ .../s5d/conf/lists/403-dholuo/training.list | 502 ++++++ .../403-dholuo/untranscribed-training.list | 533 ++++++ egs/babel/s5d/conf/mfcc.conf | 2 + egs/babel/s5d/conf/mfcc_hires.conf | 10 + egs/babel/s5d/conf/online_cmvn.conf | 1 + egs/babel/s5d/conf/pitch.conf | 1 + egs/babel/s5d/conf/plp.conf | 1 + egs/babel/s5d/conf/slurm.bluecrab.conf | 11 + egs/babel/s5d/local/ali_to_rttm.sh | 80 + .../s5d/local/annotated_kwlist_to_KWs.pl | 124 ++ egs/babel/s5d/local/apply_g2p.sh | 127 ++ .../s5d/local/apply_map_tab_preserving.pl | 94 + egs/babel/s5d/local/arpa2G.sh | 115 ++ egs/babel/s5d/local/augment_original_stm.pl | 110 ++ egs/babel/s5d/local/best_path_weights.sh | 142 ++ egs/babel/s5d/local/best_scores.sh | 43 + egs/babel/s5d/local/best_scores_kws.sh | 179 ++ .../s5d/local/build_edit_distance_fst.pl | 127 ++ egs/babel/s5d/local/chain/run_blstm.sh | 180 ++ egs/babel/s5d/local/chain/run_blstm_bab1.sh | 180 ++ egs/babel/s5d/local/chain/run_blstm_bab2.sh | 180 ++ egs/babel/s5d/local/chain/run_blstm_bab3.sh | 180 ++ egs/babel/s5d/local/chain/run_blstm_bab4.sh | 179 ++ egs/babel/s5d/local/chain/run_blstm_bab5.sh | 179 ++ .../s5d/local/chain/run_ivector_common.sh | 240 +++ egs/babel/s5d/local/chain/run_tdnn.sh | 177 ++ egs/babel/s5d/local/chain/run_tdnn_bab1.sh | 177 ++ egs/babel/s5d/local/chain/run_tdnn_bab2.sh | 177 ++ egs/babel/s5d/local/chain/run_tdnn_bab3.sh | 178 ++ egs/babel/s5d/local/chain/run_tdnn_bab4.sh | 177 ++ egs/babel/s5d/local/check_models.sh | 34 + egs/babel/s5d/local/check_tools.sh | 40 + egs/babel/s5d/local/check_wers.sh | 50 + egs/babel/s5d/local/cmu_uem2kaldi_dir.sh | 124 ++ egs/babel/s5d/local/count_to_logprob.pl | 94 + egs/babel/s5d/local/create_shadow_dataset.sh | 176 ++ egs/babel/s5d/local/cstr_ndx2flist.pl | 54 + egs/babel/s5d/local/ctm2segments.pl | 159 ++ egs/babel/s5d/local/datasets/basic_kws.sh | 28 + egs/babel/s5d/local/datasets/extra_kws.sh | 137 ++ .../s5d/local/datasets/supervised_pem.sh | 35 + .../s5d/local/datasets/supervised_seg.sh | 90 + .../s5d/local/datasets/supervised_uem.sh | 36 + .../s5d/local/datasets/unsupervised_seg.sh | 1 + .../s5d/local/datasets/unsupervised_uem.sh | 1 + egs/babel/s5d/local/datasets/vocab_kws.sh | 51 + egs/babel/s5d/local/decode_helper.sh | 32 + egs/babel/s5d/local/eval_kw_subsets.sh | 4 + egs/babel/s5d/local/extend_lexicon.sh | 572 +++++++ egs/babel/s5d/local/extract_oov_words.pl | 70 + egs/babel/s5d/local/filter_keywords.pl | 68 + egs/babel/s5d/local/filter_kwslist.pl | 55 + egs/babel/s5d/local/fix_kwslist.pl | 89 + .../s5d/local/generate_confusion_matrix.sh | 102 ++ egs/babel/s5d/local/generate_example_kws.sh | 110 ++ .../local/generate_phoneme_transcription.sh | 86 + .../s5d/local/generate_proxy_keywords.sh | 176 ++ egs/babel/s5d/local/kaldi_dir2uem.py | 101 ++ egs/babel/s5d/local/kwords2indices.pl | 123 ++ egs/babel/s5d/local/kws_combine.sh | 119 ++ egs/babel/s5d/local/kws_data_prep.sh | 142 ++ egs/babel/s5d/local/kws_data_prep_proxy.sh | 270 +++ .../s5d/local/kws_gen_oracle_lattices.sh | 56 + egs/babel/s5d/local/kws_oracle.sh | 136 ++ egs/babel/s5d/local/kws_oracle_threshold.pl | 200 +++ egs/babel/s5d/local/kws_score.sh | 1 + egs/babel/s5d/local/kws_score_f4de.sh | 96 ++ egs/babel/s5d/local/kws_search.sh | 230 +++ egs/babel/s5d/local/kws_setup.sh | 158 ++ egs/babel/s5d/local/lattice_to_ctm.sh | 109 ++ .../s5d/local/lexicon/make_unicode_lexicon.py | 469 +++++ egs/babel/s5d/local/lexicon/make_word_list.py | 93 + egs/babel/s5d/local/lonestar.py | 333 ++++ egs/babel/s5d/local/make_L_align.sh | 54 + egs/babel/s5d/local/make_corpus_subset.sh | 96 ++ egs/babel/s5d/local/make_ecf_subset.sh | 52 + .../s5d/local/make_lexicon_fst_special.pl | 53 + egs/babel/s5d/local/make_lexicon_subset.sh | 30 + egs/babel/s5d/local/make_wordlist.sh | 14 + egs/babel/s5d/local/map_lang.sh | 81 + egs/babel/s5d/local/naive_comb.pl | 234 +++ .../local/nist_eval/create_compound_set.sh | 164 ++ .../create_new_language_configs.FLP.sh | 236 +++ .../create_new_language_configs.LLP.sh | 204 +++ .../s5d/local/nist_eval/export_systems.sh | 33 + egs/babel/s5d/local/nist_eval/filter_data.sh | 152 ++ .../s5d/local/nist_eval/get_training_times.sh | 229 +++ egs/babel/s5d/local/nist_eval/make_release.sh | 356 ++++ .../s5d/local/nist_eval/split_compound_set.sh | 53 + .../local/nnet2/get_egs_semi_supervised.sh | 374 ++++ egs/babel/s5d/local/nnet3/run_blstm.sh | 29 + .../s5d/local/nnet3/run_blstm_realigned.sh | 32 + .../s5d/local/nnet3/run_ivector_common.sh | 137 ++ .../run_ivector_multicondition_common.sh | 208 +++ egs/babel/s5d/local/nnet3/run_lstm.sh | 156 ++ .../s5d/local/nnet3/run_lstm_realigned.sh | 149 ++ egs/babel/s5d/local/nnet3/run_tdnn.sh | 83 + egs/babel/s5d/local/normalize_transcript.pl | 59 + .../s5d/local/optimize/OptimizeParams.pm | 631 +++++++ egs/babel/s5d/local/optimize2.pl | 152 ++ .../local/prepare_acoustic_training_data.pl | 484 ++++++ .../s5d/local/prepare_extended_lexicon.sh | 30 + egs/babel/s5d/local/prepare_lexicon.pl | 404 +++++ egs/babel/s5d/local/prepare_stm.pl | 345 ++++ .../s5d/local/prepare_unicode_lexicon.py | 198 +++ egs/babel/s5d/local/reestimate_langp.sh | 33 + .../local/resegment/evaluate_segmentation.pl | 198 +++ .../s5d/local/resegment/generate_segments.sh | 156 ++ egs/babel/s5d/local/resegment/segmentation.py | 1508 +++++++++++++++++ .../s5d/local/resegment/train_segmentation.sh | 63 + egs/babel/s5d/local/rttm_to_text.pl | 151 ++ .../s5d/local/run_cleanup_segmentation.sh | 56 + egs/babel/s5d/local/run_kws_stt_task.sh | 99 ++ egs/babel/s5d/local/run_kws_stt_task2.sh | 124 ++ egs/babel/s5d/local/score.sh | 1 + egs/babel/s5d/local/score_combine.sh | 181 ++ egs/babel/s5d/local/score_map.sh | 64 + egs/babel/s5d/local/score_mbr.sh | 58 + egs/babel/s5d/local/score_sctk_prune.sh | 138 ++ egs/babel/s5d/local/score_stm.sh | 103 ++ egs/babel/s5d/local/search/analyze_stats.pl | 219 +++ egs/babel/s5d/local/search/annotate_kwlist.pl | 166 ++ egs/babel/s5d/local/search/combine.sh | 258 +++ egs/babel/s5d/local/search/combine_results.pl | 422 +++++ egs/babel/s5d/local/search/combine_special.sh | 200 +++ .../s5d/local/search/compile_keywords.sh | 54 + .../local/search/compile_proxy_keywords.sh | 271 +++ .../s5d/local/search/create_categories.pl | 112 ++ .../s5d/local/search/filter_by_category.pl | 360 ++++ .../s5d/local/search/filter_kws_results.pl | 189 +++ egs/babel/s5d/local/search/normalize.sh | 89 + .../s5d/local/search/normalize_categories.pl | 89 + .../s5d/local/search/normalize_results_kst.pl | 203 +++ .../s5d/local/search/per_category_stats.pl | 326 ++++ .../s5d/local/search/rttm_to_hitlists.sh | 107 ++ egs/babel/s5d/local/search/run_phn_search.sh | 135 ++ egs/babel/s5d/local/search/run_search.sh | 136 ++ egs/babel/s5d/local/search/run_syll_search.sh | 135 ++ egs/babel/s5d/local/search/score.sh | 143 ++ egs/babel/s5d/local/search/search.sh | 206 +++ egs/babel/s5d/local/search/setup.sh | 118 ++ egs/babel/s5d/local/search/utt_to_files.pl | 62 + egs/babel/s5d/local/search/write_kwslist.pl | 134 ++ egs/babel/s5d/local/search_index.sh | 51 + egs/babel/s5d/local/setup_categories.sh | 36 + egs/babel/s5d/local/shadow_set_kws_search.sh | 265 +++ egs/babel/s5d/local/show_lattice.sh | 34 + egs/babel/s5d/local/split_ctms.sh | 65 + egs/babel/s5d/local/stm2text.pl | 43 + egs/babel/s5d/local/subset_atwv.pl | 120 ++ egs/babel/s5d/local/subset_kwslist.pl | 33 + egs/babel/s5d/local/summarize_logs.pl | 121 ++ egs/babel/s5d/local/syllab/ali_to_syllabs.sh | 71 + .../s5d/local/syllab/create_syll_datadir.sh | 55 + .../s5d/local/syllab/create_syllables.pl | 154 ++ .../s5d/local/syllab/generate_phone_lang.sh | 129 ++ .../local/syllab/generate_syllable_lang.sh | 129 ++ .../s5d/local/syllab/lattice_word2syll.sh | 57 + .../local/syllab/map_prons_to_syllables.pl | 61 + egs/babel/s5d/local/syllab/run_phones.sh | 67 + egs/babel/s5d/local/syllab/run_syllabs.sh | 67 + egs/babel/s5d/local/train_g2p.sh | 94 + egs/babel/s5d/local/train_lms_srilm.sh | 229 +++ egs/babel/s5d/local/txt_to_rttm.pl | 108 ++ egs/babel/s5d/local/uem_ctm2segments.pl | 232 +++ egs/babel/s5d/nnet3_examples.sh | 32 + egs/babel/s5d/path.sh | 7 + egs/babel/s5d/results/RESULTS.105-turkish.flp | 29 + egs/babel/s5d/results/RESULTS.106-tagalog.flp | 34 + .../s5d/results/RESULTS.107-vietnamese.flp | 50 + ....jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 | 211 +++ ....jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 | 100 ++ ....jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 | 100 ++ ....jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 | 100 ++ ....jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 | 100 ++ ....jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 | 100 ++ ....jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 | 100 ++ ...f.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 | 28 + ...f.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 | 27 + ...f.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 | 22 + ...f.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 | 22 + ....jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 | 242 +++ ...f.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 | 22 + ...f.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 | 22 + ...f.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 | 21 + ...f.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 | 21 + ...f.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 | 27 + ...f.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 | 14 + ...f.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 | 8 + ...f.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 | 96 ++ ...f.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 | 14 + ...f.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 | 34 + ....jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 | 43 + ....jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 | 52 + ....jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 | 48 + ....jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 | 34 + ....jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 | 41 + ....jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 | 54 + egs/babel/s5d/run-1-main-extend-lex.sh | 190 +++ egs/babel/s5d/run-1-main-unicode.sh | 385 +++++ egs/babel/s5d/run-1-main.sh | 363 ++++ egs/babel/s5d/run-2-segmentation.sh | 107 ++ egs/babel/s5d/run-2a-nnet-cpu.sh | 34 + egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh | 46 + egs/babel/s5d/run-2a-nnet-gpu.sh | 36 + egs/babel/s5d/run-2a-nnet-mpe.sh | 50 + egs/babel/s5d/run-2b-bnf.sh | 150 ++ egs/babel/s5d/run-3a-nnet-mpe.sh | 54 + egs/babel/s5d/run-3b-bnf-nnet.sh | 86 + egs/babel/s5d/run-3b-bnf-sgmm.sh | 97 ++ egs/babel/s5d/run-4-anydecode.sh | 724 ++++++++ egs/babel/s5d/run-4-phn-anydecode.sh | 613 +++++++ egs/babel/s5d/run-4-syll-anydecode.sh | 613 +++++++ egs/babel/s5d/run-4b-anydecode-bnf.sh | 259 +++ egs/babel/s5d/run-6-combine.sh | 73 + egs/babel/s5d/steps | 1 + egs/babel/s5d/utils | 1 + egs/wsj/s5/steps/make_index.sh | 27 +- egs/wsj/s5/steps/make_plp_pitch.sh | 2 +- egs/wsj/s5/steps/nnet2/get_egs.sh | 14 +- .../s5/steps/nnet2/train_discriminative.sh | 4 +- .../s5/steps/nnet2/train_pnorm_ensemble.sh | 2 +- egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh | 2 +- .../s5/steps/nnet2/train_tanh_bottleneck.sh | 5 +- egs/wsj/s5/steps/nnet3/make_tdnn_configs.py | 2 +- egs/wsj/s5/steps/search_index.sh | 15 +- egs/wsj/s5/utils/dict_dir_add_pronprobs.sh | 4 +- egs/wsj/s5/utils/make_lexicon_fst.pl | 23 +- egs/wsj/s5/utils/slurm.pl | 5 +- egs/wsj/s5/utils/write_kwslist.pl | 15 +- src/fstbin/Makefile | 3 +- src/fstbin/fsts-project.cc | 82 + src/fstbin/fsts-to-transcripts.cc | 29 +- src/fstbin/fsts-union.cc | 98 ++ src/kws/kws-functions.cc | 58 +- src/kws/kws-functions.h | 8 +- src/kwsbin/Makefile | 3 +- src/kwsbin/compute-atwv.cc | 36 +- src/kwsbin/generate-proxy-keywords.cc | 6 +- src/kwsbin/kws-index-union.cc | 21 +- src/kwsbin/kws-search.cc | 194 ++- src/kwsbin/lattice-to-kws-index.cc | 66 +- src/kwsbin/print-proxy-keywords.cc | 134 ++ src/kwsbin/transcripts-to-fsts.cc | 70 +- 558 files changed, 88097 insertions(+), 1068 deletions(-) delete mode 100755 egs/babel/s5c/local/get_syllable_text.sh delete mode 100755 egs/babel/s5c/local/kws_data_prep_syllables.sh delete mode 100755 egs/babel/s5c/local/lattice_to_ctm_syllable.sh delete mode 100755 egs/babel/s5c/local/make_syllable_lexicon.sh create mode 100755 egs/babel/s5c/local/syllab/ali_to_syllabs.sh create mode 100755 egs/babel/s5c/local/syllab/create_syllables.pl create mode 100755 egs/babel/s5c/local/syllab/generate_syllable_lang.sh create mode 100755 egs/babel/s5c/local/syllab/map_prons_to_syllables.pl create mode 100644 egs/babel/s5c/results/RESULTS.105-turkish.flp create mode 100644 egs/babel/s5c/results/RESULTS.106-tagalog.flp create mode 100644 egs/babel/s5c/results/RESULTS.107-vietnamese.flp create mode 100644 egs/babel/s5d/EXAMPLE.vietnamese create mode 100644 egs/babel/s5d/README.txt create mode 100644 egs/babel/s5d/RESULTS create mode 100644 egs/babel/s5d/RESULTS.txt create mode 100644 egs/babel/s5d/RUN_UNICODE_SYSTEM create mode 100644 egs/babel/s5d/UNICODE_README create mode 100644 egs/babel/s5d/babel.html create mode 100644 egs/babel/s5d/cmd.sh create mode 100755 egs/babel/s5d/conf/bnf/config_full.py create mode 100755 egs/babel/s5d/conf/bnf/config_limited.py create mode 100644 egs/babel/s5d/conf/common.fullLP create mode 100644 egs/babel/s5d/conf/common.limitedLP create mode 100644 egs/babel/s5d/conf/common.semisupervised.limitedLP create mode 100644 egs/babel/s5d/conf/common_vars.sh create mode 100644 egs/babel/s5d/conf/glm create mode 100644 egs/babel/s5d/conf/lang/101-cantonese-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/101-cantonese-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/102-assamese-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/102-assamese-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/103-bengali-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/103-bengali-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/104-pashto-fullLP-40hrs.official.conf create mode 100644 egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/104-pashto-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/105-turkish-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/106-tagalog-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/106-tagalog-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/107-vietnamese-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/107-vietnamese-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/201-haitian-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/201-haitian-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/202-swahili.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/202-swahili.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/203-lao-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/203-lao-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/204-tamil-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/204-tamil-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/205-kurmanji.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/205-kurmanji.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/206-zulu-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/206-zulu-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/207-tokpisin.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/207-tokpisin.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/301-cebuano.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/301-cebuano.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/302-kazakh.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/302-kazakh.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/303-telugu.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/303-telugu.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/304-lithuanian.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/304-lithuanian.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/305-guarani.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/306-igbo.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/307-amharic.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/401-mongolian.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/402-javanese.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/403-dholuo.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lists/101-cantonese/dev.list create mode 100644 egs/babel/s5d/conf/lists/101-cantonese/eval.list create mode 100644 egs/babel/s5d/conf/lists/101-cantonese/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/101-cantonese/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/101-cantonese/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/dev.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/eval.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/dev.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/eval.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/dev.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/eval.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/train.40HrFLP.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/training.list create mode 100644 egs/babel/s5d/conf/lists/105-turkish/dev.list create mode 100644 egs/babel/s5d/conf/lists/105-turkish/eval.list create mode 100644 egs/babel/s5d/conf/lists/105-turkish/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/105-turkish/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/105-turkish/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/106-tagalog/dev.list create mode 100644 egs/babel/s5d/conf/lists/106-tagalog/eval.list create mode 100644 egs/babel/s5d/conf/lists/106-tagalog/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/106-tagalog/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/106-tagalog/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/dev.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/eval.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/dev.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/eval.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/dev.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/eval.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/training.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/dev.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/eval.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/dev.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/eval.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/dev.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/eval.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/training.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/dev.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/eval.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/dev.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/eval.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/training.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/dev.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/eval.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/training.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/dev.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/eval.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/training.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/dev.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/eval.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/training.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/dev.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/eval.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/training.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/dev.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/eval.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/training.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/dev.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/eval.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/training.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/dev.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/eval.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/training.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/dev.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/eval.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/training.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/dev.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/eval.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/training.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/dev.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/eval.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/training.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/mfcc.conf create mode 100644 egs/babel/s5d/conf/mfcc_hires.conf create mode 100644 egs/babel/s5d/conf/online_cmvn.conf create mode 100644 egs/babel/s5d/conf/pitch.conf create mode 100644 egs/babel/s5d/conf/plp.conf create mode 100644 egs/babel/s5d/conf/slurm.bluecrab.conf create mode 100755 egs/babel/s5d/local/ali_to_rttm.sh create mode 100755 egs/babel/s5d/local/annotated_kwlist_to_KWs.pl create mode 100755 egs/babel/s5d/local/apply_g2p.sh create mode 100755 egs/babel/s5d/local/apply_map_tab_preserving.pl create mode 100755 egs/babel/s5d/local/arpa2G.sh create mode 100755 egs/babel/s5d/local/augment_original_stm.pl create mode 100755 egs/babel/s5d/local/best_path_weights.sh create mode 100755 egs/babel/s5d/local/best_scores.sh create mode 100755 egs/babel/s5d/local/best_scores_kws.sh create mode 100755 egs/babel/s5d/local/build_edit_distance_fst.pl create mode 100755 egs/babel/s5d/local/chain/run_blstm.sh create mode 100755 egs/babel/s5d/local/chain/run_blstm_bab1.sh create mode 100755 egs/babel/s5d/local/chain/run_blstm_bab2.sh create mode 100755 egs/babel/s5d/local/chain/run_blstm_bab3.sh create mode 100755 egs/babel/s5d/local/chain/run_blstm_bab4.sh create mode 100755 egs/babel/s5d/local/chain/run_blstm_bab5.sh create mode 100755 egs/babel/s5d/local/chain/run_ivector_common.sh create mode 100755 egs/babel/s5d/local/chain/run_tdnn.sh create mode 100755 egs/babel/s5d/local/chain/run_tdnn_bab1.sh create mode 100755 egs/babel/s5d/local/chain/run_tdnn_bab2.sh create mode 100755 egs/babel/s5d/local/chain/run_tdnn_bab3.sh create mode 100755 egs/babel/s5d/local/chain/run_tdnn_bab4.sh create mode 100755 egs/babel/s5d/local/check_models.sh create mode 100755 egs/babel/s5d/local/check_tools.sh create mode 100755 egs/babel/s5d/local/check_wers.sh create mode 100755 egs/babel/s5d/local/cmu_uem2kaldi_dir.sh create mode 100755 egs/babel/s5d/local/count_to_logprob.pl create mode 100755 egs/babel/s5d/local/create_shadow_dataset.sh create mode 100755 egs/babel/s5d/local/cstr_ndx2flist.pl create mode 100755 egs/babel/s5d/local/ctm2segments.pl create mode 100644 egs/babel/s5d/local/datasets/basic_kws.sh create mode 100644 egs/babel/s5d/local/datasets/extra_kws.sh create mode 100644 egs/babel/s5d/local/datasets/supervised_pem.sh create mode 100644 egs/babel/s5d/local/datasets/supervised_seg.sh create mode 100644 egs/babel/s5d/local/datasets/supervised_uem.sh create mode 120000 egs/babel/s5d/local/datasets/unsupervised_seg.sh create mode 120000 egs/babel/s5d/local/datasets/unsupervised_uem.sh create mode 100644 egs/babel/s5d/local/datasets/vocab_kws.sh create mode 100755 egs/babel/s5d/local/decode_helper.sh create mode 100755 egs/babel/s5d/local/eval_kw_subsets.sh create mode 100755 egs/babel/s5d/local/extend_lexicon.sh create mode 100755 egs/babel/s5d/local/extract_oov_words.pl create mode 100755 egs/babel/s5d/local/filter_keywords.pl create mode 100755 egs/babel/s5d/local/filter_kwslist.pl create mode 100755 egs/babel/s5d/local/fix_kwslist.pl create mode 100755 egs/babel/s5d/local/generate_confusion_matrix.sh create mode 100755 egs/babel/s5d/local/generate_example_kws.sh create mode 100755 egs/babel/s5d/local/generate_phoneme_transcription.sh create mode 100755 egs/babel/s5d/local/generate_proxy_keywords.sh create mode 100755 egs/babel/s5d/local/kaldi_dir2uem.py create mode 100755 egs/babel/s5d/local/kwords2indices.pl create mode 100755 egs/babel/s5d/local/kws_combine.sh create mode 100755 egs/babel/s5d/local/kws_data_prep.sh create mode 100755 egs/babel/s5d/local/kws_data_prep_proxy.sh create mode 100755 egs/babel/s5d/local/kws_gen_oracle_lattices.sh create mode 100755 egs/babel/s5d/local/kws_oracle.sh create mode 100755 egs/babel/s5d/local/kws_oracle_threshold.pl create mode 120000 egs/babel/s5d/local/kws_score.sh create mode 100755 egs/babel/s5d/local/kws_score_f4de.sh create mode 100755 egs/babel/s5d/local/kws_search.sh create mode 100755 egs/babel/s5d/local/kws_setup.sh create mode 100755 egs/babel/s5d/local/lattice_to_ctm.sh create mode 100755 egs/babel/s5d/local/lexicon/make_unicode_lexicon.py create mode 100755 egs/babel/s5d/local/lexicon/make_word_list.py create mode 100755 egs/babel/s5d/local/lonestar.py create mode 100755 egs/babel/s5d/local/make_L_align.sh create mode 100755 egs/babel/s5d/local/make_corpus_subset.sh create mode 100755 egs/babel/s5d/local/make_ecf_subset.sh create mode 100755 egs/babel/s5d/local/make_lexicon_fst_special.pl create mode 100755 egs/babel/s5d/local/make_lexicon_subset.sh create mode 100644 egs/babel/s5d/local/make_wordlist.sh create mode 100755 egs/babel/s5d/local/map_lang.sh create mode 100755 egs/babel/s5d/local/naive_comb.pl create mode 100755 egs/babel/s5d/local/nist_eval/create_compound_set.sh create mode 100755 egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh create mode 100755 egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh create mode 100755 egs/babel/s5d/local/nist_eval/export_systems.sh create mode 100755 egs/babel/s5d/local/nist_eval/filter_data.sh create mode 100755 egs/babel/s5d/local/nist_eval/get_training_times.sh create mode 100755 egs/babel/s5d/local/nist_eval/make_release.sh create mode 100755 egs/babel/s5d/local/nist_eval/split_compound_set.sh create mode 100755 egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh create mode 100755 egs/babel/s5d/local/nnet3/run_blstm.sh create mode 100755 egs/babel/s5d/local/nnet3/run_blstm_realigned.sh create mode 100755 egs/babel/s5d/local/nnet3/run_ivector_common.sh create mode 100755 egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh create mode 100755 egs/babel/s5d/local/nnet3/run_lstm.sh create mode 100755 egs/babel/s5d/local/nnet3/run_lstm_realigned.sh create mode 100755 egs/babel/s5d/local/nnet3/run_tdnn.sh create mode 100755 egs/babel/s5d/local/normalize_transcript.pl create mode 100644 egs/babel/s5d/local/optimize/OptimizeParams.pm create mode 100755 egs/babel/s5d/local/optimize2.pl create mode 100755 egs/babel/s5d/local/prepare_acoustic_training_data.pl create mode 100644 egs/babel/s5d/local/prepare_extended_lexicon.sh create mode 100755 egs/babel/s5d/local/prepare_lexicon.pl create mode 100755 egs/babel/s5d/local/prepare_stm.pl create mode 100755 egs/babel/s5d/local/prepare_unicode_lexicon.py create mode 100755 egs/babel/s5d/local/reestimate_langp.sh create mode 100755 egs/babel/s5d/local/resegment/evaluate_segmentation.pl create mode 100755 egs/babel/s5d/local/resegment/generate_segments.sh create mode 100755 egs/babel/s5d/local/resegment/segmentation.py create mode 100755 egs/babel/s5d/local/resegment/train_segmentation.sh create mode 100755 egs/babel/s5d/local/rttm_to_text.pl create mode 100755 egs/babel/s5d/local/run_cleanup_segmentation.sh create mode 100755 egs/babel/s5d/local/run_kws_stt_task.sh create mode 100755 egs/babel/s5d/local/run_kws_stt_task2.sh create mode 120000 egs/babel/s5d/local/score.sh create mode 100755 egs/babel/s5d/local/score_combine.sh create mode 100755 egs/babel/s5d/local/score_map.sh create mode 100755 egs/babel/s5d/local/score_mbr.sh create mode 100755 egs/babel/s5d/local/score_sctk_prune.sh create mode 100755 egs/babel/s5d/local/score_stm.sh create mode 100755 egs/babel/s5d/local/search/analyze_stats.pl create mode 100755 egs/babel/s5d/local/search/annotate_kwlist.pl create mode 100755 egs/babel/s5d/local/search/combine.sh create mode 100755 egs/babel/s5d/local/search/combine_results.pl create mode 100755 egs/babel/s5d/local/search/combine_special.sh create mode 100755 egs/babel/s5d/local/search/compile_keywords.sh create mode 100755 egs/babel/s5d/local/search/compile_proxy_keywords.sh create mode 100755 egs/babel/s5d/local/search/create_categories.pl create mode 100755 egs/babel/s5d/local/search/filter_by_category.pl create mode 100755 egs/babel/s5d/local/search/filter_kws_results.pl create mode 100755 egs/babel/s5d/local/search/normalize.sh create mode 100755 egs/babel/s5d/local/search/normalize_categories.pl create mode 100755 egs/babel/s5d/local/search/normalize_results_kst.pl create mode 100755 egs/babel/s5d/local/search/per_category_stats.pl create mode 100755 egs/babel/s5d/local/search/rttm_to_hitlists.sh create mode 100755 egs/babel/s5d/local/search/run_phn_search.sh create mode 100755 egs/babel/s5d/local/search/run_search.sh create mode 100755 egs/babel/s5d/local/search/run_syll_search.sh create mode 100755 egs/babel/s5d/local/search/score.sh create mode 100755 egs/babel/s5d/local/search/search.sh create mode 100755 egs/babel/s5d/local/search/setup.sh create mode 100755 egs/babel/s5d/local/search/utt_to_files.pl create mode 100755 egs/babel/s5d/local/search/write_kwslist.pl create mode 100755 egs/babel/s5d/local/search_index.sh create mode 100644 egs/babel/s5d/local/setup_categories.sh create mode 100755 egs/babel/s5d/local/shadow_set_kws_search.sh create mode 100755 egs/babel/s5d/local/show_lattice.sh create mode 100755 egs/babel/s5d/local/split_ctms.sh create mode 100755 egs/babel/s5d/local/stm2text.pl create mode 100755 egs/babel/s5d/local/subset_atwv.pl create mode 100755 egs/babel/s5d/local/subset_kwslist.pl create mode 100755 egs/babel/s5d/local/summarize_logs.pl create mode 100755 egs/babel/s5d/local/syllab/ali_to_syllabs.sh create mode 100755 egs/babel/s5d/local/syllab/create_syll_datadir.sh create mode 100755 egs/babel/s5d/local/syllab/create_syllables.pl create mode 100755 egs/babel/s5d/local/syllab/generate_phone_lang.sh create mode 100755 egs/babel/s5d/local/syllab/generate_syllable_lang.sh create mode 100755 egs/babel/s5d/local/syllab/lattice_word2syll.sh create mode 100755 egs/babel/s5d/local/syllab/map_prons_to_syllables.pl create mode 100755 egs/babel/s5d/local/syllab/run_phones.sh create mode 100755 egs/babel/s5d/local/syllab/run_syllabs.sh create mode 100755 egs/babel/s5d/local/train_g2p.sh create mode 100755 egs/babel/s5d/local/train_lms_srilm.sh create mode 100755 egs/babel/s5d/local/txt_to_rttm.pl create mode 100755 egs/babel/s5d/local/uem_ctm2segments.pl create mode 100644 egs/babel/s5d/nnet3_examples.sh create mode 100755 egs/babel/s5d/path.sh create mode 100644 egs/babel/s5d/results/RESULTS.105-turkish.flp create mode 100644 egs/babel/s5d/results/RESULTS.106-tagalog.flp create mode 100644 egs/babel/s5d/results/RESULTS.107-vietnamese.flp create mode 100644 egs/babel/s5d/results/kws_results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 create mode 100644 egs/babel/s5d/results/kws_results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 create mode 100644 egs/babel/s5d/results/kws_results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 create mode 100644 egs/babel/s5d/results/kws_results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 create mode 100644 egs/babel/s5d/results/kws_results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 create mode 100644 egs/babel/s5d/results/kws_results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 create mode 100644 egs/babel/s5d/results/kws_results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 create mode 100644 egs/babel/s5d/results/results.101-cantonese-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 create mode 100644 egs/babel/s5d/results/results.102-assamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 create mode 100644 egs/babel/s5d/results/results.103-bengali-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 create mode 100644 egs/babel/s5d/results/results.104-pashto-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 create mode 100644 egs/babel/s5d/results/results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 create mode 100644 egs/babel/s5d/results/results.105-turkish-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 create mode 100644 egs/babel/s5d/results/results.106-tagalog-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 create mode 100644 egs/babel/s5d/results/results.107-vietnamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 create mode 100644 egs/babel/s5d/results/results.201-haitian-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 create mode 100644 egs/babel/s5d/results/results.202-swahili.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 create mode 100644 egs/babel/s5d/results/results.203-lao-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 create mode 100644 egs/babel/s5d/results/results.204-tamil-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 create mode 100644 egs/babel/s5d/results/results.205-kurmanji.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 create mode 100644 egs/babel/s5d/results/results.206-zulu-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 create mode 100644 egs/babel/s5d/results/results.207-tokpisin.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 create mode 100644 egs/babel/s5d/results/results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 create mode 100644 egs/babel/s5d/results/results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 create mode 100644 egs/babel/s5d/results/results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 create mode 100644 egs/babel/s5d/results/results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 create mode 100644 egs/babel/s5d/results/results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 create mode 100644 egs/babel/s5d/results/results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 create mode 100755 egs/babel/s5d/run-1-main-extend-lex.sh create mode 100755 egs/babel/s5d/run-1-main-unicode.sh create mode 100755 egs/babel/s5d/run-1-main.sh create mode 100755 egs/babel/s5d/run-2-segmentation.sh create mode 100755 egs/babel/s5d/run-2a-nnet-cpu.sh create mode 100755 egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh create mode 100755 egs/babel/s5d/run-2a-nnet-gpu.sh create mode 100755 egs/babel/s5d/run-2a-nnet-mpe.sh create mode 100755 egs/babel/s5d/run-2b-bnf.sh create mode 100755 egs/babel/s5d/run-3a-nnet-mpe.sh create mode 100755 egs/babel/s5d/run-3b-bnf-nnet.sh create mode 100755 egs/babel/s5d/run-3b-bnf-sgmm.sh create mode 100755 egs/babel/s5d/run-4-anydecode.sh create mode 100755 egs/babel/s5d/run-4-phn-anydecode.sh create mode 100755 egs/babel/s5d/run-4-syll-anydecode.sh create mode 100755 egs/babel/s5d/run-4b-anydecode-bnf.sh create mode 100755 egs/babel/s5d/run-6-combine.sh create mode 120000 egs/babel/s5d/steps create mode 120000 egs/babel/s5d/utils create mode 100644 src/fstbin/fsts-project.cc create mode 100644 src/fstbin/fsts-union.cc create mode 100644 src/kwsbin/print-proxy-keywords.cc diff --git a/.gitignore b/.gitignore index cb5191ccccd..16d03d4a193 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,9 @@ GRTAGS GPATH GSYMS +# python compiled sources +*.pyc + # Make dependencies .depend.mk @@ -112,5 +115,13 @@ GSYMS /tools/pthreads*.zip /tools/sequitur /tools/srilm.tgz +/tools/liblbfgs-1.10.tar.gz +/tools/liblbfgs-1.10/ +/tools/openfst-1.5.0.tar.gz +/tools/openfst-1.5.0/ +/tools/srilm-1.7.2-beta.tar.gz +/tools/liblbfgs/ +/tools/sequitur-g2p/ /kaldiwin_vs* + diff --git a/egs/babel/s5/local/make_pitch.sh b/egs/babel/s5/local/make_pitch.sh index 107016d78a9..f3597f504dd 100755 --- a/egs/babel/s5/local/make_pitch.sh +++ b/egs/babel/s5/local/make_pitch.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey) # Bagher BabaAli @@ -50,7 +50,7 @@ mkdir -p $expdir/log || exit 1; scp=$data/wav.scp -[ ! -s $KALDI_ROOT ] && KALDI_ROOT=../../.. +[ ! -s $KALDI_ROOT ] && KALDI_ROOT=../../.. ( # this is for back compatiblity: cd $KALDI_ROOT/tools @@ -92,7 +92,7 @@ done basename=`basename $data` wavdir=$pitchdir/temp_wav_$basename mkdir -p $wavdir - + if [ -f $data/segments ] || grep '|' $data/wav.scp >/dev/null; then wav_scp=$expdir/wav.scp cat $data/segments | awk -v dir=$wavdir '{key=$1; printf("%s %s/%s.wav\n", key, dir, key);}' \ @@ -104,7 +104,7 @@ if [ -f $data/segments ] || grep '|' $data/wav.scp >/dev/null; then else # create a fake segments file that takes the whole file; this is an easy way # to copy to static wav files. Note: probably this has not been tested. - cat $data/wav.scp | awk '{print $1, $1, 0.0, -1.0}' > $expdir/fake_segments + cat $data/wav.scp | awk '{print $1, $1, 0.0, -1.0}' > $expdir/fake_segments segments=$expdir/fake_segments fi if [ $stage -le 0 ]; then @@ -155,11 +155,11 @@ if [ $stage -le 1 ]; then fi # I don't want to put a separate script in svn just for this, so creating a temporary -# script file in the experimental directory. Quotes around 'EOF' disable any +# script file in the experimental directory. Quotes around 'EOF' disable any # interpretation in the here-doc. cat <<'EOF' > $expdir/convert.sh #!/bin/bash -sacc_flist=$1 +sacc_flist=$1 scpfile=$2 [ $# -ne 2 ] && echo "Usage: convert.sh " && exit 1; @@ -247,7 +247,7 @@ exit 0; # rm $expdir/.error 2>/dev/null # # for ((n=1; n<=nj; n++)); do -# # mkdir -p "$expdir/$n" +# # mkdir -p "$expdir/$n" # # done # # $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ @@ -297,8 +297,8 @@ exit 0; # rm $expdir/wav.*.scp $expdir/segments.* 2>/dev/null -# nf=`cat $data/pitchs.scp | wc -l` -# nu=`cat $data/utt2spk | wc -l` +# nf=`cat $data/pitchs.scp | wc -l` +# nu=`cat $data/utt2spk | wc -l` # if [ $nf -ne $nu ]; then # echo "It seems not all of the feature files were successfully ($nf != $nu);" # echo "consider using utils/fix_data_dir.sh $data" diff --git a/egs/babel/s5c/local/CHECKPOINT.sh b/egs/babel/s5c/local/CHECKPOINT.sh index 91b64d7fe1a..ed0ddd18399 100755 --- a/egs/babel/s5c/local/CHECKPOINT.sh +++ b/egs/babel/s5c/local/CHECKPOINT.sh @@ -1,11 +1,11 @@ #!/bin/bash function GETAPPROVAL { - until false ; do + until false ; do echo "Do you want to run the command (y/n)?" read -n 1 WISH - - if [ "$WISH" == "y" ]; then + + if [ "$WISH" == "y" ]; then return true; elif [ "$WISH" == "n" ]; then return false; @@ -21,11 +21,11 @@ function ESCAPE_PARAMS { if [[ "$v" == *"<"* ]]; then out="$out \"$v\"" - elif [[ "$v" == *">"* ]] ; then + elif [[ "$v" == *">"* ]] ; then out="$out \"$v\"" - elif [[ "$v" == *"|"* ]] ; then + elif [[ "$v" == *"|"* ]] ; then out="$out \'$v\'" - elif [[ "$v" == *" "* ]] ; then + elif [[ "$v" == *" "* ]] ; then out="$out \"$v\"" else out="$out $v" @@ -76,7 +76,7 @@ function CHECKPOINT { if [ !$INTERACTIVE_CHECKPOINT ] ; then eval `ESCAPE_PARAMS "$@"` - else + else APPROVAL=GETAPPROVAL if $APPROVAL ; then eval `ESCAPE_PARAMS $@` @@ -87,7 +87,7 @@ function CHECKPOINT { echo -e ${COLOR_RED}"CHECKPOINT FAILURE: The command returned non-zero status" >&2 echo -e " rerun the script with the parameter -c $LAST_GOOD_NAME=$COUNTER" >&2 echo -e "COMMAND">&2 - echo -e " " "$@" ${COLOR_RED} >&2 + echo -e " " "$@" ${COLOR_RED} >&2 exit 1 fi @@ -97,7 +97,7 @@ function CHECKPOINT { echo -e "$@"${COLOR_DEFAULT} >&2 fi - COUNTER=$(( $COUNTER + 1 )) + COUNTER=$(( $COUNTER + 1 )) eval export $COUNTER_NAME=$COUNTER } diff --git a/egs/babel/s5c/local/ali_to_rttm.sh b/egs/babel/s5c/local/ali_to_rttm.sh index 63cf8f44dc4..09df9a15805 100755 --- a/egs/babel/s5c/local/ali_to_rttm.sh +++ b/egs/babel/s5c/local/ali_to_rttm.sh @@ -42,7 +42,7 @@ if [ $# != 3 ]; then exit 1; fi -set -e +set -e set -o pipefail set -u @@ -65,7 +65,7 @@ fi $cmd $dir/log/align_to_words.log \ ali-to-phones $dir/final.mdl "ark:gunzip -c $dir/ali.*.gz|" ark,t:- \| \ phones-to-prons $lang/L_align.fst $wbegin $wend ark:- "ark,s:utils/sym2int.pl -f 2- --map-oov '$oov' $lang/words.txt <$data/text|" ark,t:- \| \ - prons-to-wordali ark:- "ark:ali-to-phones --write-lengths=true $dir/final.mdl 'ark:gunzip -c $dir/ali.*.gz|' ark,t:- |" ark,t:$dir/align.txt + prons-to-wordali ark:- "ark:ali-to-phones --write-lengths=true $dir/final.mdl 'ark:gunzip -c $dir/ali.*.gz|' ark,t:- |" ark,t:$dir/align.txt echo "$0: done writing alignments." diff --git a/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl b/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl index 198da36da5a..a4c80cef345 100755 --- a/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl +++ b/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl @@ -26,7 +26,7 @@ Allowed options: EOU -GetOptions(); +GetOptions(); @ARGV >= 2 || die $Usage; @@ -77,7 +77,7 @@ if ($count == 0) { $output .= "$value"; $count ++; next; - } + } if ($count == 6) { $output .= ", ..."; last; diff --git a/egs/babel/s5c/local/apply_g2p.sh b/egs/babel/s5c/local/apply_g2p.sh index f47274cb21c..385b1f3536e 100755 --- a/egs/babel/s5c/local/apply_g2p.sh +++ b/egs/babel/s5c/local/apply_g2p.sh @@ -2,7 +2,7 @@ # Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0 -# Begin configuration section. +# Begin configuration section. iters=5 stage=0 encoding='utf-8' @@ -82,15 +82,15 @@ cat $output/output.* > $output/output #Remap the words from output file back to the original casing #Conversion of some of thems might have failed, so we have to be careful #and use the transform_map file we generated beforehand -#Also, because the sequitur output is not readily usable as lexicon (it adds +#Also, because the sequitur output is not readily usable as lexicon (it adds #one more column with ordering of the pron. variants) convert it into the proper lexicon form output_lex=$output/lexicon.lex if [ ! -z $icu_transform ] ; then #also, the transform is generally N -> 1, i.e. we have to take #extra care of words that might have been mapped into the same one - perl -e 'open(WORDS, $ARGV[0]) or die "Could not open file $ARGV[0]"; - while() { chomp; @F=split; - if ($MAP{$F[0]} ) { push @{$MAP{$F[0]}}, $F[1]; } + perl -e 'open(WORDS, $ARGV[0]) or die "Could not open file $ARGV[0]"; + while() { chomp; @F=split; + if ($MAP{$F[0]} ) { push @{$MAP{$F[0]}}, $F[1]; } else { $MAP{$F[0]} = [$F[1]]; } } close(WORDS); @@ -101,7 +101,7 @@ if [ ! -z $icu_transform ] ; then next; } foreach $word (@{$MAP{$F[0]}} ) { - print "$word\t$F[2]\t$F[3]\n"; + print "$word\t$F[2]\t$F[3]\n"; } } close(LEX); diff --git a/egs/babel/s5c/local/apply_map_tab_preserving.pl b/egs/babel/s5c/local/apply_map_tab_preserving.pl index 2a3238c04a3..b57262f1930 100755 --- a/egs/babel/s5c/local/apply_map_tab_preserving.pl +++ b/egs/babel/s5c/local/apply_map_tab_preserving.pl @@ -12,8 +12,8 @@ # this version preserves tabs. if (@ARGV > 0 && $ARGV[0] eq "-f") { - shift @ARGV; - $field_spec = shift @ARGV; + shift @ARGV; + $field_spec = shift @ARGV; if ($field_spec =~ m/^\d+$/) { $field_begin = $field_spec - 1; $field_end = $field_spec - 1; } @@ -26,7 +26,7 @@ } } if (!defined $field_begin && !defined $field_end) { - die "Bad argument to -f option: $field_spec"; + die "Bad argument to -f option: $field_spec"; } } @@ -70,7 +70,7 @@ $field_offset = 0; for ($n = 0; $n < @A; $n++) { @B = split(" ", $A[$n]); - + for ($x = 0; $x < @B; $x++) { $y = $x + $field_offset; if ( (!defined $field_begin || $y >= $field_begin) @@ -78,12 +78,12 @@ $b = $B[$x]; if (!defined $map{$b}) { if (!$permissive) { - die "apply_map.pl: undefined key $a\n"; + die "apply_map.pl: undefined key $a\n"; } else { print STDERR "apply_map.pl: warning! missing key $a\n"; } } else { - $B[$x] = $map{$b}; + $B[$x] = $map{$b}; } } } diff --git a/egs/babel/s5c/local/augment_original_stm.pl b/egs/babel/s5c/local/augment_original_stm.pl index 4c58ccc6271..c5ad87fd286 100755 --- a/egs/babel/s5c/local/augment_original_stm.pl +++ b/egs/babel/s5c/local/augment_original_stm.pl @@ -8,7 +8,7 @@ #As a result, the scoring will be done on per-speaker basis as well #As the segment from segment mapping generally do not correspond to #the segmentation of the original STM file, it combines the files -#segments and utt2spk to work out the correct speaker ID for +#segments and utt2spk to work out the correct speaker ID for #the reference segment #In case of overlay, it will either use the previous speaker or #prints out an error message diff --git a/egs/babel/s5c/local/best_path_weights.sh b/egs/babel/s5c/local/best_path_weights.sh index 8e88a3610a4..52782ee3655 100755 --- a/egs/babel/s5c/local/best_path_weights.sh +++ b/egs/babel/s5c/local/best_path_weights.sh @@ -16,19 +16,19 @@ # limitations under the License. -# This script combines frame-level posteriors from different decode -# directories. The first decode directory is assumed to be the primary +# This script combines frame-level posteriors from different decode +# directories. The first decode directory is assumed to be the primary # and is used to get the best path. The posteriors from other decode -# directories are interpolated with the posteriors of the best path. -# The output is a new directory with final.mdl, tree from the primary -# decode-dir and the best path alignments and weights in a decode-directory +# directories are interpolated with the posteriors of the best path. +# The output is a new directory with final.mdl, tree from the primary +# decode-dir and the best path alignments and weights in a decode-directory # with the same basename as the primary directory. # This is typically used to get better posteriors for semisupervised training # of DNN -# e.g. local/combine_posteriors.sh exp/tri6_nnet/decode_train_unt.seg +# e.g. local/combine_posteriors.sh exp/tri6_nnet/decode_train_unt.seg # exp/sgmm_mmi_b0.1/decode_fmllr_train_unt.seg_it4 exp/combine_dnn_sgmm -# Here the final.mdl and tree are copied from exp/tri6_nnet to -# exp/combine_dnn_sgmm. best_path_ali.*.gz obtained from the primary dir and +# Here the final.mdl and tree are copied from exp/tri6_nnet to +# exp/combine_dnn_sgmm. best_path_ali.*.gz obtained from the primary dir and # the interpolated posteriors in weights.*.gz are placed in # exp/combine_dnn_sgmm/decode_train_unt.seg @@ -115,7 +115,7 @@ for i in `seq 0 $[num_sys-1]`; do echo $nj > $out_decode/num_jobs else if [ $nj != `cat $decode_dir/num_jobs` ]; then - echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" exit 1; fi fi diff --git a/egs/babel/s5c/local/check_models.sh b/egs/babel/s5c/local/check_models.sh index d02fc4e561a..88b3dacc94b 100755 --- a/egs/babel/s5c/local/check_models.sh +++ b/egs/babel/s5c/local/check_models.sh @@ -4,7 +4,7 @@ check_model () { model=$1 if [ -s $model ]; then echo $model - else + else dir=`dirname $model` latest_model=`ls -lt $dir/{?,??}.mdl 2>/dev/null | head -1 | awk '{print $9}'` echo "*$model is not there, latest is: $latest_model" diff --git a/egs/babel/s5c/local/check_wers.sh b/egs/babel/s5c/local/check_wers.sh index ebd6bb28790..10e1a89ee3a 100755 --- a/egs/babel/s5c/local/check_wers.sh +++ b/egs/babel/s5c/local/check_wers.sh @@ -4,7 +4,7 @@ check_wer () { dir=$1 - if [ -d $dir ]; then + if [ -d $dir ]; then seen_dir=false for ddir in $dir/decode*; do if [ -d $ddir ]; then @@ -34,7 +34,7 @@ for n in `seq 10`; do fi done -if [ $# != 0 ]; then +if [ $# != 0 ]; then echo "Usage: local/check_wers.sh [--final] [--char]" exit 1; fi diff --git a/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh b/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh index a8fcc39eba5..f320cfa19cd 100755 --- a/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh +++ b/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh @@ -30,12 +30,12 @@ mkdir -p $datadir echo "Converting `basename $database` to kaldi directory $datadir " cat $database | perl -pe 's:.+(BABEL):BABEL:; s:\}\s+\{FROM\s+: :; s:\}\s+\{TO\s+: :; s:\}.+::;' | \ - perl -ne '@K = split; - $utteranceID = @K[0]; - $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; - $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; - $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; - $utteranceID .= sprintf ("_%06i", (100*@K[2])); + perl -ne '@K = split; + $utteranceID = @K[0]; + $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; + $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; + $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; + $utteranceID .= sprintf ("_%06i", (100*@K[2])); printf("%s %s %.2f %.2f\n", $utteranceID, @K[0], @K[1], @K[2]);' | sort > $datadir/segments if [ ! -z $filelist ] ; then @@ -66,12 +66,12 @@ perl -ne '{chomp; @K=split; $utt{@K[1]}.=" @K[0]";} # 4. Create the wav.scp file: sph2pipe=`which sph2pipe || which $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe` if [ $? -ne 0 ] ; then - echo "Could not find sph2pipe binary. Add it to PATH" + echo "Could not find sph2pipe binary. Add it to PATH" exit 1; fi sox=`which sox` if [ $? -ne 0 ] ; then - echo "Could not find sox binary. Add it to PATH" + echo "Could not find sox binary. Add it to PATH" exit 1; fi @@ -84,19 +84,19 @@ echo "Creating the $datadir/wav.scp file" elif [ -f $audiopath/audio/$file.wav ] ; then echo "$file $sox $audiopath/audio/$file.wav -r 8000 -c 1 -b 16 -t wav - downsample |" else - echo "Audio file $audiopath/audio/$file.sph does not exist!" >&2 + echo "Audio file $audiopath/audio/$file.sph does not exist!" >&2 exit 1 fi - done | sort -u > $datadir/wav.scp - if [ $? -ne 0 ] ; then - echo "Error producing the wav.scp file" + done | sort -u > $datadir/wav.scp + if [ $? -ne 0 ] ; then + echo "Error producing the wav.scp file" exit 1 fi -) || exit 1 +) || exit 1 l1=`wc -l $datadir/wav.scp | cut -f 1 -d ' ' ` echo "wav.scp contains $l1 files" -if [ ! -z $filelist ] ; then +if [ ! -z $filelist ] ; then l2=`wc -l $filelist | cut -f 1 -d ' '` echo "filelist `basename $filelist` contains $l2 files" diff --git a/egs/babel/s5c/local/create_shadow_dataset.sh b/egs/babel/s5c/local/create_shadow_dataset.sh index 6783ee49770..49467ed28c1 100755 --- a/egs/babel/s5c/local/create_shadow_dataset.sh +++ b/egs/babel/s5c/local/create_shadow_dataset.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2012 Johns Hopkins University +# Copyright 2012 Johns Hopkins University # Apache 2.0. stage=0 @@ -29,8 +29,8 @@ if [ $stage -le 1 ] ; then #zkombinovat ecf echo "Combining ECF files..." perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -87,8 +87,8 @@ if [ $stage -le 2 ] ; then #zkombinovat kwlist echo "Combining the KWLIST files" perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -107,7 +107,7 @@ if [ $stage -le 2 ] ; then if ( $src1->{language} ne $src2->{language} ) { die "KWLIST languages differ in the source kwlist.xml files"; } - + $tgt->{ecf_filename} = ""; $tgt->{language}=$src1->{language}; $tgt->{compareNormalize}=$src1->{compareNormalize}; @@ -143,8 +143,8 @@ fi if [ $stage -le 3 ] ; then echo "Making KWLIST maps" perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; diff --git a/egs/babel/s5c/local/cstr_ndx2flist.pl b/egs/babel/s5c/local/cstr_ndx2flist.pl index d19db421a9f..79daa1a99db 100755 --- a/egs/babel/s5c/local/cstr_ndx2flist.pl +++ b/egs/babel/s5c/local/cstr_ndx2flist.pl @@ -16,7 +16,7 @@ # limitations under the License. # This is modified from the script in standard Kaldi recipe to account -# for the way the WSJ data is structured on the Edinburgh systems. +# for the way the WSJ data is structured on the Edinburgh systems. # - Arnab Ghoshal, 12/1/12 # This program takes as its standard input an .ndx file from the WSJ corpus that looks @@ -25,7 +25,7 @@ #;; #;; Index for WSJ0 SI-short Sennheiser training data #;; Data is read WSJ sentences, Sennheiser mic. -#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts +#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts #;; per speaker TI) = 7236 utts #;; #11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1 diff --git a/egs/babel/s5c/local/ctm2segments.pl b/egs/babel/s5c/local/ctm2segments.pl index 26a786c88b9..55a8bd84fc8 100755 --- a/egs/babel/s5c/local/ctm2segments.pl +++ b/egs/babel/s5c/local/ctm2segments.pl @@ -45,21 +45,21 @@ chop $line; my @entries = split(/ /, $line); die "Cannot parse line \"$line\"" if scalar @entries != 6; - + ($filename, my $chann_id, my $beg, my $end, my $word, my $conf) = @entries; - - $total_seconds += $end * 1.0; - + + $total_seconds += $end * 1.0; + if ($conf >= $cf_needed ) { if ( $words ne "" ) { #print "Extend segment\n"; $words .= " $word"; - $seg_end = $beg * 1.0 + $end*1.0; + $seg_end = $beg * 1.0 + $end*1.0; } else { #start a new segment #print "Start segment\n"; $seg_start = $beg; - $seg_end = $beg * 1.0 + $end*1.0; + $seg_end = $beg * 1.0 + $end*1.0; $words = $word; } } else { @@ -75,14 +75,14 @@ $extracted_seconds+= ($seg_end - $seg_start); $seg_start -= $extend_segments; - $seg_end += $extend_segments; + $seg_end += $extend_segments; my $spk_id=$filename_parts[3] . "_" . $channel; my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); my $last_part = sprintf("%06d", $seg_start * 100); $utt_id .= "_" . $last_part; #print $utt_id . " $beg \n"; - + #14350_A_20121123_042710_001337 #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 @@ -111,14 +111,14 @@ $extracted_seconds+= ($seg_end - $seg_start); $seg_start -= $extend_segments; - $seg_end += $extend_segments; + $seg_end += $extend_segments; my $spk_id=$filename_parts[3] . "_" . $channel; my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); my $last_part = sprintf("%06d", $seg_start * 100); $utt_id .= "_" . $last_part; #print $utt_id . " $beg \n"; - + #14350_A_20121123_042710_001337 #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 diff --git a/egs/babel/s5c/local/datasets/basic_kws.sh b/egs/babel/s5c/local/datasets/basic_kws.sh index 35d6e379658..ed6995b3080 100644 --- a/egs/babel/s5c/local/datasets/basic_kws.sh +++ b/egs/babel/s5c/local/datasets/basic_kws.sh @@ -1,13 +1,13 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. if [ "${dataset_kind}" == "supervised" ] ; then - mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" optional_variables="my_subset_ecf" else - mandatory_variables="my_ecf_file my_kwlist_file" + mandatory_variables="my_ecf_file my_kwlist_file" optional_variables="my_subset_ecf" fi @@ -23,6 +23,6 @@ if [ ! -f ${dataset_dir}/kws/.done ] ; then fi local/kws_setup.sh --case_insensitive $case_insensitive \ "${kws_flags[@]}" "${icu_opt[@]}" \ - $my_ecf_file $my_kwlist_file data/lang ${dataset_dir} || exit 1 - touch ${dataset_dir}/kws/.done + $my_ecf_file $my_kwlist_file $lang ${dataset_dir} || exit 1 + touch ${dataset_dir}/kws/.done fi diff --git a/egs/babel/s5c/local/datasets/extra_kws.sh b/egs/babel/s5c/local/datasets/extra_kws.sh index cb90968a1dc..32031270b36 100644 --- a/egs/babel/s5c/local/datasets/extra_kws.sh +++ b/egs/babel/s5c/local/datasets/extra_kws.sh @@ -1,13 +1,13 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. if [ "${dataset_kind}" == "supervised" ] ; then - mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" optional_variables="my_subset_ecf" else - mandatory_variables="my_ecf_file my_kwlist_file" + mandatory_variables="my_ecf_file my_kwlist_file" optional_variables="my_subset_ecf" fi @@ -17,7 +17,7 @@ function register_extraid { local dataset_dir=$1 local extraid=$2 echo "Registering $extraid" - echo $extraid >> $dataset_dir/extra_kws_tasks; + echo $extraid >> $dataset_dir/extra_kws_tasks; sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks } @@ -31,7 +31,7 @@ function setup_oov_search { local data_dir=$1 local source_dir=$2 local extraid=$3 - + local kwsdatadir=$data_dir/${extraid}_kws mkdir -p $kwsdatadir @@ -50,7 +50,7 @@ function setup_oov_search { paste \ <(cat $kwlist | grep -o -P "(?<=kwid=\").*(?=\")") \ <(cat $kwlist | grep -o -P "(?<=).*(?=)" | uconv -f utf-8 -t utf-8 -x Any-Lower) \ - >$kwsdatadir/keywords.txt + >$kwsdatadir/keywords.txt cut -f 2 $kwsdatadir/keywords.txt | \ sed 's/\s\s*/\n/g' | sort -u > $kwsdatadir/oov.txt @@ -61,7 +61,7 @@ function setup_oov_search { if [ ! -f exp/conf_matrix/.done ] ; then local/generate_confusion_matrix.sh --cmd "$decode_cmd" --nj $my_nj \ exp/sgmm5_denlats/dengraph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix || return 1 - touch exp/conf_matrix/.done + touch exp/conf_matrix/.done fi confusion=exp/conf_matrix/confusions.txt @@ -75,10 +75,13 @@ function setup_oov_search { fi local/apply_g2p.sh --nj $my_nj --cmd "$decode_cmd" \ --var-counts $g2p_nbest --var-mass $g2p_mass \ - $kwsdatadir/oov.txt exp/g2p $kwsdatadir/g2p + $kwsdatadir/oov.txt exp/g2p $kwsdatadir/g2p || return 1 L2_lex=$kwsdatadir/g2p/lexicon.lex - L1_lex=data/local/lexiconp.txt + if [ -z "$L1_lex" ] ; then + L1_lex=data/local/lexiconp.txt + fi + local/kws_data_prep_proxy.sh \ --cmd "$decode_cmd" --nj $my_nj \ --case-insensitive true \ @@ -86,14 +89,14 @@ function setup_oov_search { --phone-cutoff $phone_cutoff \ --pron-probs true --beam $proxy_beam --nbest $proxy_nbest \ --phone-beam $proxy_phone_beam --phone-nbest $proxy_phone_nbest \ - data/lang $data_dir $L1_lex $L2_lex $kwsdatadir + $lang $data_dir $L1_lex $L2_lex $kwsdatadir } kws_flags=( --use-icu true ) if [ "${dataset_kind}" == "supervised" ] ; then - #The presence of the file had been already verified, so just + #The presence of the file had been already verified, so just #add the correct switches kws_flags+=(--rttm-file $my_rttm_file ) fi @@ -107,20 +110,20 @@ if [ ! -f $dataset_dir/.done.kws.oov ] ; then touch $dataset_dir/.done.kws.oov fi if [ ${#my_more_kwlists[@]} -ne 0 ] ; then - + touch $dataset_dir/extra_kws_tasks - + for extraid in "${!my_more_kwlists[@]}" ; do #The next line will help us in running only one. We don't really - #know in which directory the KWS setup will reside in, so we will + #know in which directory the KWS setup will reside in, so we will #place the .done file directly into the data directory [ -f $dataset_dir/.done.kws.$extraid ] && continue; kwlist=${my_more_kwlists[$extraid]} local/kws_setup.sh --extraid $extraid --case_insensitive $case_insensitive \ "${kws_flags[@]}" "${icu_opt[@]}" \ - $my_ecf_file $kwlist data/lang ${dataset_dir} || exit 1 - + $my_ecf_file $kwlist $lang ${dataset_dir} || exit 1 + #Register the dataset for default running... #We can do it without any problem here -- the kws_stt_tasks will not #run it, unless called with --run-extra-tasks true switch @@ -129,7 +132,7 @@ if [ ${#my_more_kwlists[@]} -ne 0 ] ; then done for extraid in "${!my_more_kwlists[@]}" ; do #The next line will help us in running only one. We don't really - #know in which directory the KWS setup will reside in, so we will + #know in which directory the KWS setup will reside in, so we will #place the .done file directly into the data directory [ -f $dataset_dir/.done.kws.${extraid}_oov ] && continue; setup_oov_search $dataset_dir $dataset_dir/${extraid}_kws ${extraid}_oov diff --git a/egs/babel/s5c/local/datasets/supervised_pem.sh b/egs/babel/s5c/local/datasets/supervised_pem.sh index c32d73e0718..e131fae40fa 100644 --- a/egs/babel/s5c/local/datasets/supervised_pem.sh +++ b/egs/babel/s5c/local/datasets/supervised_pem.sh @@ -1,4 +1,4 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. diff --git a/egs/babel/s5c/local/datasets/supervised_seg.sh b/egs/babel/s5c/local/datasets/supervised_seg.sh index a681688f480..a5ccd36211b 100644 --- a/egs/babel/s5c/local/datasets/supervised_seg.sh +++ b/egs/babel/s5c/local/datasets/supervised_seg.sh @@ -1,4 +1,4 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. @@ -57,7 +57,7 @@ echo "Creating the $unseg_dir/reco2file_and_channel file" cat $unseg_dir/wav.scp | awk '{print $1, $1, "A";}' > $unseg_dir/reco2file_and_channel cat $unseg_dir/wav.scp | awk '{print $1, $1;}' > $unseg_dir/utt2spk utils/utt2spk_to_spk2utt.pl $unseg_dir/utt2spk > $unseg_dir/spk2utt - + make_plp $unseg_dir $workdir/make_plp $workdir/plp || exit 1 local/resegment/generate_segments.sh --nj $my_nj --cmd "$decode_cmd" \ diff --git a/egs/babel/s5c/local/datasets/supervised_uem.sh b/egs/babel/s5c/local/datasets/supervised_uem.sh index 318518ad86e..5ac1e003d5d 100644 --- a/egs/babel/s5c/local/datasets/supervised_uem.sh +++ b/egs/babel/s5c/local/datasets/supervised_uem.sh @@ -1,4 +1,4 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. @@ -6,7 +6,7 @@ eval my_data_cmudb=\$${dataset_type}_data_cmudb if [ "${dataset_kind}" != "supervised" ] ; then - mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" + mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" optional_variables="" else mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" diff --git a/egs/babel/s5c/local/datasets/vocab_kws.sh b/egs/babel/s5c/local/datasets/vocab_kws.sh index 812122bd024..40c1d8e841d 100644 --- a/egs/babel/s5c/local/datasets/vocab_kws.sh +++ b/egs/babel/s5c/local/datasets/vocab_kws.sh @@ -1,13 +1,13 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. if [ "${dataset_kind}" == "supervised" ] ; then - mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" optional_variables="my_subset_ecf" else - mandatory_variables="my_ecf_file my_kwlist_file" + mandatory_variables="my_ecf_file my_kwlist_file" optional_variables="my_subset_ecf" fi @@ -15,7 +15,7 @@ check_variables_are_set if [ "$dataset_kind" == "shadow" ]; then true #we do not support multiple kw lists for shadow set system - + elif [ ! -f $dataset_dir/.done.kws.fullvocab ] ; then #a This will work for both supervised and unsupervised dataset kinds kws_flags=() @@ -25,25 +25,25 @@ elif [ ! -f $dataset_dir/.done.kws.fullvocab ] ; then if $my_subset_ecf ; then kws_flags+=(--subset-ecf $my_data_list) fi - + #We just could come with some bogus naming scheme, #but as long as the audio files can tell the iarpa lang id, we will use that langid=`ls -1 $my_data_dir/audio/ | head -n 1| cut -d '_' -f 3` - #NB: we assume the default KWS search is already done and will "borrow" + #NB: we assume the default KWS search is already done and will "borrow" #the rttm and ecf files. #We could easily generate the ecf file, but the RTTM assumes the decoding - #had been already done. That could be done + #had been already done. That could be done #Ideally, these files should be generated here! local/kws_setup.sh --kwlist-wordlist true "${kws_flags[@]}" \ --extraid fullvocab $my_ecf_file \ - <(cat data/lang/words.txt | \ - grep -v -F "<" | grep -v -F "#" | \ + <(cat $lang/words.txt | \ + grep -v "^<" | grep -v "^#" | \ awk "{printf \"KWID$langid-FULLVOCAB-%05d %s\\n\", \$2, \$1 }" ) \ - data/lang ${dataset_dir} || exit 1 + $lang ${dataset_dir} || exit 1 - echo fullvocab >> $dataset_dir/extra_kws_tasks; + echo fullvocab >> $dataset_dir/extra_kws_tasks; sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks touch $dataset_dir/.done.kws.fullvocab fi diff --git a/egs/babel/s5c/local/extend_lexicon.sh b/egs/babel/s5c/local/extend_lexicon.sh index fd0b27a4172..48553dd6279 100755 --- a/egs/babel/s5c/local/extend_lexicon.sh +++ b/egs/babel/s5c/local/extend_lexicon.sh @@ -10,7 +10,7 @@ # two files: lexiconp.txt (this is the lexicon format that has pronunciation # probabilities; the words in the original lexicon have probability one), and # oov2prob, which says how the OOV mass is distributed among the new OOV words -# in the lexicon. +# in the lexicon. # It assumes that the syllables in pronunciations in the input lexicon.txt are # separated by tabs, as is normal for the BABEL setup; the syllable boundaries @@ -39,7 +39,7 @@ # because we felt that this would make the mapping harder for g2p to learn. # Instead we mapped the phones to unique letters; this is what the "phone_map" # file is about. Furthermore, in BABEL we have the concept of tags on the -# phones, e.g. in a tonal language, ay_3 might be the phone "ay" with tone 3. +# phones, e.g. in a tonal language, ay_3 might be the phone "ay" with tone 3. # As far as Kaldi is concerned, ay_3 is a single phone. To avoid the number of # letters blowing up too much, we make these tags separate letters when generating # phone_map, so ay_3 might be mapped to kX with ay mapping to k and 3 mapping to @@ -79,7 +79,7 @@ # equal to 0.33 times the probability listed in oov2prob. However, that script # will not allow the unigram probability of any OOV word to be more probable than # the least probable word which was originally in the ARPA file (not counting , -# which generally has probability -99); this is applied as a ceiling on the +# which generally has probability -99); this is applied as a ceiling on the # unknown-word probabilities. Note: the --unk-fraction should probably be # similar to the OOV rate in that language. Calculating the OOV rate on some # dev data is one reasonable way to set this; see the commands at the very @@ -149,7 +149,7 @@ cp $input_lexicon $toplevel_dir/input_lexicon.txt # just to have a record of wh loc=`which ngram-count`; if [ -z $loc ]; then if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... - sdir=`pwd`/../../../tools/srilm/bin/i686-m64 + sdir=`pwd`/../../../tools/srilm/bin/i686-m64 else sdir=`pwd`/../../../tools/srilm/bin/i686 fi @@ -256,21 +256,21 @@ if [ $stage -le -1 ]; then rm $dir/probs.* 2>/dev/null echo '#!/usr/bin/perl -while(1) { +while(1) { $sent = <>; $line=<>; if ($line !~ m/sentences/) { $sent =~ m/^file/ || die "Bad sent $sent"; exit(0); } - $line = <>; if ($line !~ m/logprob= (\S+)/) { die "Bad line $line"; } print "$1 $sent"; + $line = <>; if ($line !~ m/logprob= (\S+)/) { die "Bad line $line"; } print "$1 $sent"; $line = <>; $line eq "\n" || die "expected blank line"; }' >$dir/temp.pl chmod +x $dir/temp.pl $cmd JOB=1:$nj $dir/log/compute_prob.JOB.log \ $ngram -debug 1 -lm $dir/lm.gz -ppl $dir/sents.JOB \| $dir/temp.pl \| sort -gr \> $dir/probs.JOB || exit 1; - if $cleanup; then - rm $dir/sents.*; + if $cleanup; then + rm $dir/sents.*; fi sort -m -gr $dir/probs.* | uniq | head -n $num_prons > $dir/probs - if $cleanup; then - rm $dir/probs.*; + if $cleanup; then + rm $dir/probs.*; fi mass=$(cat $dir/probs | awk '{x += exp($1 * log(10));} END{print x}') @@ -296,7 +296,7 @@ fi # We may lose a little information by doing this, though, because the segmentation # into phonemes may be ambiguous. So we create a mapping from the original phonemes # and tags to letters of the alphabet. Note: tags are things like s_3 for a phone: here -# s is the phone and _3 is the tag. +# s is the phone and _3 is the tag. if [ $stage -le 0 ]; then @@ -375,10 +375,10 @@ if [ $stage -le $[$g2p_iters+1] ]; then awk '{if (NF >= 4) {printf("%s %s ", $1, $3); for (n=4;n<=NF;n++) {printf("%s", $n);} printf("\n"); }}' | \ sort | uniq > $dir/pron2spelling - # Now remove from pron2spelling, any words that appear in $dir/lexiconp_in.txt + # Now remove from pron2spelling, any words that appear in $dir/lexiconp_in.txt # (this also contains the excluded words like ). cat $dir/pron2spelling | \ - perl -e 'open(F, $ARGV[0]) || die "opening $ARGV[0]"; while() { @A=split; $seen_word{$A[0]}=1; } + perl -e 'open(F, $ARGV[0]) || die "opening $ARGV[0]"; while() { @A=split; $seen_word{$A[0]}=1; } while() { @A=split; if (! $seen_word{$A[2]}) { print; }} ' $dir/lexiconp_in.txt > $dir/pron2spelling.excluded # $dir/pron2spelling.excluded contains lines like #ab syllable1 syllable2 ... # e.g. # Kuku 0.000002642 k>&u k>&u - + cat $dir/probs | \ perl -e ' while(){ @A = split; $prob = shift @A; $pron=join("", @A); $pron =~ tr/,//d; print "$pron $_"; } '> $dir/probs.with_pron @@ -402,7 +402,7 @@ if [ $stage -le $[$g2p_iters+1] ]; then # This is so we can get the pronunciation in the same form that we put it in, for # the p2g training, for easier comparison with the lines in $dir/pron2spelling.excluded - perl -e ' ($p2s, $probs_with_pron) = @ARGV; + perl -e ' ($p2s, $probs_with_pron) = @ARGV; open(P2S, "<$p2s" || die); open(PROBS, "<$probs_with_pron")||die; while () { @A = split; @@ -487,7 +487,7 @@ if [ $stage -le $[$g2p_iters+1] ]; then print L "$word\t$pronprob\t$pron"; } close(L); close(W); # wait for sort to finish. ' \ $dir/lexiconp_oov.txt $dir/oov2prob - + # lexiconp_oov.txt contains lines like: #leyanga 0.96471840417664 l 3 j_" a_" N a #leyanga 1 l 3 j_" a_" N g a @@ -497,7 +497,7 @@ if [ $stage -le $[$g2p_iters+1] ]; then #Adlule 9.62418179264897e-08 #Afuna 2.23048402109824e-06 fi - + if [ $stage -le $[$g2p_iters+2] ]; then # put it to the output directory $localdir e.g. data/local/ cat $dir/lexiconp_in.txt $dir/lexiconp_oov.txt | \ @@ -526,7 +526,7 @@ if [ ! -z $dev_text ]; then $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); printf("Seen $invoc out of $tot tokens; token OOV rate is %.2f\n", $oov_rate);' \ $toplevel_dir/lexiconp.txt > $toplevel_dir/new_oov_rates - + # Original type OOV rate cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | sort -u |\ perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} @@ -549,7 +549,7 @@ exit 0; ###BELOW HERE IS JUST COMMENTS ########### #cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt | \ -for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt | \ perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} @@ -559,7 +559,7 @@ done #Seen 13675 out of 60613 tokens; OOV rate is 77.44 #Seen 26936 out of 60613 tokens; OOV rate is 55.56 -for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do cat data/dev10h/text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | \ perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} diff --git a/egs/babel/s5c/local/extract_oov_words.pl b/egs/babel/s5c/local/extract_oov_words.pl index fbb6e95286d..08f8f5d1436 100755 --- a/egs/babel/s5c/local/extract_oov_words.pl +++ b/egs/babel/s5c/local/extract_oov_words.pl @@ -5,15 +5,15 @@ use Data::Dumper; $Data::Dumper::Indent = 1; -binmode STDOUT, ":utf8"; -binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; $ignore_oov = 0; $ignore_first_field = 0; for($x = 0; $x < 2; $x++) { if ($ARGV[0] eq "-f") { - shift @ARGV; - $field_spec = shift @ARGV; + shift @ARGV; + $field_spec = shift @ARGV; if ($field_spec =~ m/^\d+$/) { $field_begin = $field_spec - 1; $field_end = $field_spec - 1; } @@ -26,7 +26,7 @@ } } if (!defined $field_begin && !defined $field_end) { - die "Bad argument to -f option: $field_spec"; + die "Bad argument to -f option: $field_spec"; } } } @@ -43,7 +43,7 @@ while() { @A = split(" ", $_); @A == 2 || die "bad line in symbol table file: $_"; - + if ( not defined( $sym2int{$A[0]} ) ) { $sym2int{$A[0]} = []; } @@ -62,7 +62,7 @@ $i = $sym2int{$a}; if (!defined ($i)) { print $a . "\n"; - } + } } } } diff --git a/egs/babel/s5c/local/filter_kwslist.pl b/egs/babel/s5c/local/filter_kwslist.pl index c84a5f6d3c9..7c57b62517a 100755 --- a/egs/babel/s5c/local/filter_kwslist.pl +++ b/egs/babel/s5c/local/filter_kwslist.pl @@ -24,19 +24,19 @@ if(ref($kwentry->{kw}) eq 'ARRAY'){ my @arr = @{$kwentry->{kw}}; my @newarray = (); - + push @newarray, $arr[0]; #$arr[0]->{tbeg} . "\n"; for (my $i = 1; $i < scalar(@arr); $i +=1) { - + my $found = 0; foreach my $kw (@newarray) { - if (( abs($arr[$i]->{tbeg} - $kw->{tbeg}) < $duptime ) && + if (( abs($arr[$i]->{tbeg} - $kw->{tbeg}) < $duptime ) && ( $arr[$i]->{channel} == $kw->{channel}) && ( $arr[$i]->{file} eq $kw->{file}) ) { $found = 1; - + #print $arr[$i]->{tbeg} . "\n"; } } diff --git a/egs/babel/s5c/local/find_transcripts.pl b/egs/babel/s5c/local/find_transcripts.pl index 6429411b864..d34b075e7ea 100755 --- a/egs/babel/s5c/local/find_transcripts.pl +++ b/egs/babel/s5c/local/find_transcripts.pl @@ -21,7 +21,7 @@ # It takes as # Extracts from the dot files the transcripts for a given # dataset (represented by a file list). -# +# @ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts"; $dot_flist = shift @ARGV; @@ -36,7 +36,7 @@ -while(){ +while(){ chop; $uttid = $_; $uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_"; diff --git a/egs/babel/s5c/local/fix_kwslist.pl b/egs/babel/s5c/local/fix_kwslist.pl index 29afc73e473..33c6dc30e82 100755 --- a/egs/babel/s5c/local/fix_kwslist.pl +++ b/egs/babel/s5c/local/fix_kwslist.pl @@ -81,7 +81,7 @@ sub mysort { print $xml; } else { if (!open(O, ">$fixed_kwslist_out")) { - print "Fail to open output file: $fixed_kwslist_out\n"; + print "Fail to open output file: $fixed_kwslist_out\n"; exit 1; } print O $xml; diff --git a/egs/babel/s5c/local/generate_confusion_matrix.sh b/egs/babel/s5c/local/generate_confusion_matrix.sh index 4bcbacb5ae9..e6b221f7cc0 100755 --- a/egs/babel/s5c/local/generate_confusion_matrix.sh +++ b/egs/babel/s5c/local/generate_confusion_matrix.sh @@ -2,7 +2,7 @@ # Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0 -# Begin configuration section. +# Begin configuration section. nj=4 cmd=run.pl acwt=0.1 @@ -86,7 +86,7 @@ cat $confusion_files | cut -f 2- -d ' ' | sed 's/ *; */\n/g'| sort | uniq -c | \ perl -ane ' die unless scalar @F == 3; print "$F[1] $F[2] $F[0]\n"; - ' > $wdir/confusions.txt + ' > $wdir/confusions.txt exit 0 #-echo "Converting alignments to phone sequences..." diff --git a/egs/babel/s5c/local/generate_example_kws.sh b/egs/babel/s5c/local/generate_example_kws.sh index 2c849438192..e90752926b3 100755 --- a/egs/babel/s5c/local/generate_example_kws.sh +++ b/egs/babel/s5c/local/generate_example_kws.sh @@ -71,7 +71,7 @@ cat $text | perl -e ' } $min_count++; } - + $total = 20; $current = 0; $min_count = 4; @@ -88,7 +88,7 @@ cat $text | perl -e ' } $min_count++; } - + $total = 10; $current = 0; $min_count = 3; diff --git a/egs/babel/s5c/local/generate_proxy_keywords.sh b/egs/babel/s5c/local/generate_proxy_keywords.sh index 8562953efa4..584f7d7902e 100755 --- a/egs/babel/s5c/local/generate_proxy_keywords.sh +++ b/egs/babel/s5c/local/generate_proxy_keywords.sh @@ -3,7 +3,7 @@ # Copyright 2012-2014 Guoguo Chen # Apache 2.0. -# Begin configuration section. +# Begin configuration section. nj=8 cmd=run.pl beam=-1 # Beam for proxy FST, -1 means no prune @@ -46,7 +46,7 @@ if [ $# -ne 1 ]; then exit 1; fi -set -e +set -e set -o pipefail kwsdatadir=$1 @@ -68,8 +68,34 @@ if $pron_probs; then pron_probs_param="--pron-probs"; fi +cat $kwsdatadir/L1.lex | \ + perl -e ' + while ( $line = ) { + chomp $line; + ($word, $pron) = split " ", $line, 2; + $pron = join(" ", split(" ", $pron)); + push @{$LEX{$pron}}, $word; + } + + open(L1, "| sort -u > $ARGV[0]") or die "Cannot open $ARGV[0]\n"; + open(MAP, "| sort -u > $ARGV[1]") or die "Cannot open $ARGV[1]\n"; + foreach $pron (keys %LEX) { + $head = $LEX{$pron}->[0]; + print L1 "$head $pron\n"; + foreach $alt (@{$LEX{$pron}}) { + print MAP "0 0 $alt $head\n"; + } + } + print MAP "0\n"; + close(L1); + close(MAP); +' $kwsdatadir/L1_dedup.lex $kwsdatadir/L1.revdup.fst.txt + +fstcompile --isymbols=$kwsdatadir/words.txt --osymbols=$kwsdatadir/words.txt $kwsdatadir/L1.revdup.fst.txt | \ + fstarcsort --sort_type=olabel - $kwsdatadir/L1.revdup.fst + ndisambig=`utils/add_lex_disambig.pl \ - $pron_probs_param $kwsdatadir/L1.lex $kwsdatadir/L1_disambig.lex` + $pron_probs_param $kwsdatadir/L1_dedup.lex $kwsdatadir/L1_disambig.lex` ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST. ( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $kwsdatadir/disambig.txt @@ -86,11 +112,12 @@ cat $kwsdatadir/L2.lex |\ --osymbols=$kwsdatadir/words.txt - |\ fstinvert | fstarcsort --sort_type=olabel > $kwsdatadir/L2.fst +echo $kwsdatadir/phones.txt phone_disambig_symbol=`grep \#0 $kwsdatadir/phones.txt | awk '{print $2}'` word_disambig_symbol=`grep \#0 $kwsdatadir/words.txt | awk '{print $2}'` -phone_disambig_symbols=`grep \# $kwsdatadir/phones.txt |\ +phone_disambig_symbols=`grep "^#" $kwsdatadir/phones.txt |\ awk '{print $2}' | tr "\n" " "` -word_disambig_symbols=`grep \# $kwsdatadir/words.txt |\ +word_disambig_symbols=`grep "^#" $kwsdatadir/words.txt |\ awk '{print $2}' | tr "\n" " "` cat $kwsdatadir/L1_disambig.lex |\ utils/make_lexicon_fst.pl $pron_probs_param - |\ @@ -139,10 +166,11 @@ $cmd JOB=1:$nj $kwsdatadir/split/log/proxy.JOB.log \ generate-proxy-keywords --verbose=1 \ --proxy-beam=$beam --proxy-nbest=$nbest \ --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ - $kwsdatadir/L2xE.fst $kwsdatadir/L1.fst ark:- ark:$kwsdatadir/split/proxy.JOB.fsts + $kwsdatadir/L2xE.fst $kwsdatadir/L1.fst ark:- ark,t:$kwsdatadir/split/proxy.JOB.fsts proxy_fsts="" for j in `seq 1 $nj`; do proxy_fsts="$proxy_fsts $kwsdatadir/split/proxy.$j.fsts" done -cat $proxy_fsts > $kwsdatadir/keywords.fsts +cat $proxy_fsts | fsttablecompose $kwsdatadir/L1.revdup.fst ark:- ark:- | \ + fsts-project ark:- ark:$kwsdatadir/keywords.fsts diff --git a/egs/babel/s5c/local/get_syllable_text.sh b/egs/babel/s5c/local/get_syllable_text.sh deleted file mode 100755 index 97d2af7ed65..00000000000 --- a/egs/babel/s5c/local/get_syllable_text.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash - -# Copyright Johns Hopkins University 2013 (author: Daniel Povey) -# Apache 2.0. - -if [ $# -ne 7 ]; then - echo "Usage: get_syllable_text.sh " - echo "e.g.: get_syllable_text.sh data/train data/lang ../s5-vietnamese-limited-syllables/data/lang_nopos \\" - echo " ../s5-vietnamese-limited-syllables/data/local/syllables/word2syllable_lexicon_unweighted.fst" - echo " exp/tri5h_ali exp/tri5_align_syllables ../s5-vietnamese-limited-syllables/data/train" - echo "This script copies the data-directory to but converts the text into syllable-level text." - echo "The inputs are as follows (those that are not self-explanatory):" - echo " is the syllable-level lang/ directory that has been built without" - echo " word-position dependency (we'll strip the suffixes from phones and expect them to be compatible with this)" - echo " is a kind of lexicon FST that describes words as syllable sequences." - echo " contains a word-level alignment of the data in " - echo " will be used to put temporary files and logs (make it somewhere in exp/)" - echo " is a data directory to put the syllable-level data; transcripts go to /text" - exit 1; -fi - -[ -f path.sh ] && . ./path.sh - -data=$1 -lang=$2 -lang_nopos=$3 -word2syllable_fst=$4 -alidir=$5 -dir=$6 -tgtdata=$7 - -for f in $data/text $lang/L.fst $lang_nopos/L.fst $word2syllable_fst $alidir/ali.1.gz \ - $alidir/final.mdl $alidir/num_jobs; do - if [ ! -f $f ]; then - echo "Expected file $f to exist" - exit 1; - fi -done - -mkdir -p $dir/log -nj=`cat $alidir/num_jobs` || exit 1; -sil=`cat data/lang/phones/optional_silence.txt` || exit 1 - -! ( ( for n in `seq $nj`; do gunzip -c $alidir/ali.$n.gz; done ) | \ - ali-to-phones $alidir/final.mdl ark:- ark,t:- | \ - utils/int2sym.pl -f 2- $lang/phones.txt - | \ - sed -E 's/_I( |$)/ /g' | sed -E 's/_E( |$)/ /g' | sed -E 's/_B( |$)/ /g' | sed -E 's/_S( |$)/ /g' | \ - utils/sym2int.pl -f 2- $lang_nopos/phones.txt | \ - gzip -c > $dir/phones.ark.gz ) 2>&1 | tee $dir/log/align.log \ - && echo "Error getting phone-level (non-word-position-dependent) alignments" && exit 1; - -# Get an archive of syllable-level acceptors corresponding to the training data. -# transcripts. We don't have an fstproject program for archives so we use a line of awk. - -! ( cat $data/text | utils/sym2int.pl --map-oov `cat $lang/oov.int` -f 2- $lang/words.txt | \ - transcripts-to-fsts ark:- ark:- | \ - fsttablecompose $word2syllable_fst ark:- ark,t:- | \ - awk '{if (NF < 4) { print; } else { print $1, $2, $3, $3, $5; }}' | \ - gzip -c > $dir/syllables.ark.gz ) 2>&1 | tee $dir/log/get_syllable_fsts.log && \ - echo "Error getting syllable FSTs" && exit 1; - -cp -rT $data $tgtdata || exit 1; -rm -rf $tgtdata/split* - -# From the phone-level transcripts and the syllable-level acceptors, work out -# the syllable sequence for each . Remove consecutive silences. -! ( fsttablecompose $lang_nopos/L.fst "ark:gunzip -c $dir/syllables.ark.gz|" ark:- | \ - fsttablecompose "ark:gunzip -c $dir/phones.ark.gz | transcripts-to-fsts ark:- ark:- |" \ - ark,s,cs:- ark,t:- | fsts-to-transcripts ark:- ark,t:- | int2sym.pl -f 2- $lang_nopos/words.txt | \ - sed "s/$sil $sil/$sil/g" > $tgtdata/text ) && echo "Error getting text data" && exit 1; - -! utils/fix_data_dir.sh $tgtdata/ && echo "Error fixing data dir" && exit 1; - -exit 0; - - - diff --git a/egs/babel/s5c/local/gridsearch.pl b/egs/babel/s5c/local/gridsearch.pl index 7b2ad530fa4..937273286fe 100755 --- a/egs/babel/s5c/local/gridsearch.pl +++ b/egs/babel/s5c/local/gridsearch.pl @@ -78,7 +78,7 @@ sub substitute { sub escape { my @cmd_in = @{$_[0]}; my @cmd = (); - foreach my $x (@cmd_in) { + foreach my $x (@cmd_in) { if ($x =~ m/^\S+$/) { push @cmd, $x } # If string contains no spaces, take # as-is. @@ -100,11 +100,11 @@ sub escape { for (my $i=0; $i < scalar(@ARGV); $i++) { if ($ARGV[$i] eq "-var") { - + $i++; (my $name, my @range) = gen_sequence(split('=', $ARGV[$i])); $VARIABLES{$name}=\@range - + } elsif ($ARGV[$i] eq "-train") { if ( $cmdid ) { if ( $cmdid eq "-eval" ) { @@ -113,7 +113,7 @@ sub escape { @traincmd = @cmd; } } - + $cmdid = $ARGV[$i]; @cmd = (); @@ -167,12 +167,12 @@ sub escape { @out = substitute(\@traincmd, \%params); print "Running train:\n" . join(" ", @out) . "\n"; system(@out) == 0 or die "system @out failed: exit code $?"; - + @out = substitute(\@evalcmd, \%params); print "Running eval:\n" . join(" ", @out) . "\n"; system(@out) == 0 or die "system @out failed: exit code $?"; - + } diff --git a/egs/babel/s5c/local/gridsearch2.pl b/egs/babel/s5c/local/gridsearch2.pl index 6645743c114..d09d8b28f0a 100755 --- a/egs/babel/s5c/local/gridsearch2.pl +++ b/egs/babel/s5c/local/gridsearch2.pl @@ -91,17 +91,17 @@ sub substitute { for (my $i=0; $i < scalar(@ARGV); $i++) { if ($ARGV[$i] eq "-var") { - + $i++; (my $name, my @range) = gen_sequence(split('=', $ARGV[$i])); $VARIABLES{$name}=\@range - + } elsif (grep {$_ eq $ARGV[$i]} @known_switches) { if ($cmdid) { print "CMD: $cmdid\n"; my @tmp = @cmd; - $found_switches{$cmdid} = \@tmp; + $found_switches{$cmdid} = \@tmp; pp(%found_switches); } @@ -120,7 +120,7 @@ sub substitute { if ($cmdid) { print "CMD: $cmdid\n"; my @tmp = @cmd; - $found_switches{$cmdid} = \@tmp; + $found_switches{$cmdid} = \@tmp; } pp(%VARIABLES); @@ -136,11 +136,11 @@ sub substitute { my @out; @out = substitute(\@traincmd, \%params); system(@out) == 0 or die "system @out failed: exit code $?"; - + @out = substitute(\@evalcmd, \%params); system(@out) == 0 or die "system @out failed: exit code $?"; - + } diff --git a/egs/babel/s5c/local/kwords2indices.pl b/egs/babel/s5c/local/kwords2indices.pl index 47cc3dc2741..776f66c5951 100755 --- a/egs/babel/s5c/local/kwords2indices.pl +++ b/egs/babel/s5c/local/kwords2indices.pl @@ -5,8 +5,8 @@ use Data::Dumper; $Data::Dumper::Indent = 1; -binmode STDOUT, ":utf8"; -binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; sub permute { @@ -16,10 +16,10 @@ sub permute { return map([$_], @$last); } - return map { - my $left = $_; + return map { + my $left = $_; map([@$left, $_], @$last) - } + } permute(@_); } @@ -32,8 +32,8 @@ sub permute { shift @ARGV; $map_oov = shift @ARGV; } if ($ARGV[0] eq "-f") { - shift @ARGV; - $field_spec = shift @ARGV; + shift @ARGV; + $field_spec = shift @ARGV; if ($field_spec =~ m/^\d+$/) { $field_begin = $field_spec - 1; $field_end = $field_spec - 1; } @@ -46,7 +46,7 @@ sub permute { } } if (!defined $field_begin && !defined $field_end) { - die "Bad argument to -f option: $field_spec"; + die "Bad argument to -f option: $field_spec"; } } } @@ -61,7 +61,7 @@ sub permute { while() { @A = split(" ", $_); @A == 2 || die "bad line in symbol table file: $_"; - + if ( not defined( $sym2int{$A[0]} ) ) { $sym2int{$A[0]} = []; } diff --git a/egs/babel/s5c/local/kws_combine.sh b/egs/babel/s5c/local/kws_combine.sh index 33446915eac..f795c63aad9 100755 --- a/egs/babel/s5c/local/kws_combine.sh +++ b/egs/babel/s5c/local/kws_combine.sh @@ -17,9 +17,9 @@ # Script for system combination using minimum Bayes risk decoding. -# This calls lattice-combine to create a union of lattices that have been +# This calls lattice-combine to create a union of lattices that have been # normalized by removing the total forward cost from them. The resulting lattice -# is used as input to lattice-mbr-decode. This should not be put in steps/ or +# is used as input to lattice-mbr-decode. This should not be put in steps/ or # utils/ since the scores on the combined lattice must not be scaled. # begin configuration section. @@ -71,7 +71,7 @@ for i in `seq 0 $[num_sys-1]`; do offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. decode_dir=`echo $decode_dir | cut -d: -f1` [ -z "$offset" ] && offset=1 - + weight=$(perl -e "print ($offset/$total_sum);") if [ -f $decode_dir ] ; then systems+="$weight $decode_dir " diff --git a/egs/babel/s5c/local/kws_data_prep.sh b/egs/babel/s5c/local/kws_data_prep.sh index 909e9b2596c..3882c99ce6d 100755 --- a/egs/babel/s5c/local/kws_data_prep.sh +++ b/egs/babel/s5c/local/kws_data_prep.sh @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) # Apache 2.0. -# Begin configuration section. +# Begin configuration section. case_insensitive=true use_icu=true icu_transform="Any-Lower" @@ -21,11 +21,11 @@ help_message=" Note: most important output is keywords.fsts allowed switches: --case-sensitive # Shall we be case-sensitive or not? - # Please not the case-sensitivness depends + # Please not the case-sensitivness depends # on the shell locale! --use-uconv # Use the ICU uconv binary to normalize casing --icu-transform # When using ICU, use this transliteration - + " [ -f ./path.sh ] && . ./path.sh; # source the path. @@ -39,7 +39,7 @@ if [ $# -ne 3 ]; then fi set -u -set -e +set -e set -o pipefail langdir=$1; @@ -51,8 +51,8 @@ keywords=$kwsdatadir/kwlist.xml mkdir -p $kwsdatadir; cat $keywords | perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -75,8 +75,8 @@ if $case_insensitive && ! $use_icu ; then echo "$0: Running case insensitive processing" cat $langdir/words.txt | tr '[:lower:]' '[:upper:]' > $kwsdatadir/words.txt [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ - echo "$0: Warning, multiple words in dictionary differ only in case: " - + echo "$0: Warning, multiple words in dictionary differ only in case: " + cat $kwsdatadir/keywords.txt | tr '[:lower:]' '[:upper:]' | \ sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int @@ -84,7 +84,7 @@ elif $case_insensitive && $use_icu ; then echo "$0: Running case insensitive processing (using ICU with transform \"$icu_transform\")" cat $langdir/words.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" > $kwsdatadir/words.txt [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ - echo "$0: Warning, multiple words in dictionary differ only in case: " + echo "$0: Warning, multiple words in dictionary differ only in case: " paste <(cut -f 1 $kwsdatadir/keywords.txt ) \ <(cut -f 2 $kwsdatadir/keywords.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" ) |\ @@ -107,15 +107,21 @@ fi # Compile keywords into FSTs -if [ -z $silence_word ]; then - transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:$kwsdatadir/keywords.fsts +if [ -s $kwsdatadir/keywords.int ]; then + if [ -z $silence_word ]; then + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:$kwsdatadir/keywords.fsts + else + silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'` + [ -z $silence_int ] && \ + echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:- | \ + awk -v 'OFS=\t' -v silint=$silence_int '{if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; }' \ + > $kwsdatadir/keywords.fsts + fi else - silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'` - [ -z $silence_int ] && \ - echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; - transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:- | \ - awk -v 'OFS=\t' -v silint=$silence_int '{if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; }' \ - > $kwsdatadir/keywords.fsts + echo "WARNING: $kwsdatadir/keywords.int is zero-size. That means no keyword" + echo "WARNING: was found in the dictionary. That might be OK -- or not." + touch $kwsdatadir/keywords.fsts fi # Create utterance id for each utterance @@ -129,7 +135,7 @@ cat $datadir/segments | \ $idx++; }' > $kwsdatadir/utter_id -# Map utterance to the names that will appear in the rttm file. You have +# Map utterance to the names that will appear in the rttm file. You have # to modify the commands below accoring to your rttm file cat $datadir/segments | awk '{print $1" "$2}' | sort | uniq > $kwsdatadir/utter_map; diff --git a/egs/babel/s5c/local/kws_data_prep_proxy.sh b/egs/babel/s5c/local/kws_data_prep_proxy.sh index 787cb009960..04cc59b6499 100755 --- a/egs/babel/s5c/local/kws_data_prep_proxy.sh +++ b/egs/babel/s5c/local/kws_data_prep_proxy.sh @@ -3,7 +3,7 @@ # Copyright 2014 Guoguo Chen # Apache 2.0. -# Begin configuration section. +# Begin configuration section. nj=8 cmd=run.pl beam=-1 # Beam for proxy FST, -1 means no prune @@ -15,6 +15,10 @@ phone_nbest=50 # Use top n best phone sequences in KxL2xE, -1 means all phone_cutoff=5 # We don't generate proxy keywords for OOV keywords that # have less phones than the specified cutoff as they may # introduce a lot false alarms +max_phone_cutoff=9990 # We don't generate proxy keywords for OOV keywords that + # have more than this phonemes. This can be used when + # we need to use different parameters for keywords of + # different lengths. confusion_matrix= # If supplied, using corresponding E transducer count_cutoff=1 # Minimal count to be considered in the confusion matrix; # will ignore phone pairs that have count less than this. @@ -38,13 +42,13 @@ if [ $# -ne 5 ]; then echo " data/local/tmp.lang/lexiconp.txt oov_lexicon.txt data/dev10h/kws/" echo "allowed options:" echo " --case-sensitive # Being case-sensitive or not" - echo " --icu-transform # Transliteration for upper/lower case" + echo " --icu-transform # Transliteration for upper/lower case" echo " # mapping" echo " --proxy-set # Keyword set for generating proxies" exit 1 fi -set -e +set -e set -o pipefail langdir=$1 @@ -62,8 +66,8 @@ keywords=$kwsdatadir/kwlist.xml mkdir -p $kwsdatadir/tmp/ cat $keywords | perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -103,7 +107,7 @@ if $case_insensitive; then else cat $l2_lexicon | sed 's/\s/ /g' > $kwsdatadir/tmp/L2.tmp.lex cp $kwsdatadir/raw_keywords_all.txt $kwsdatadir/keywords_all.txt - + cat $kwsdatadir/keywords_all.txt | \ sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt \ > $kwsdatadir/keywords_all.int @@ -139,11 +143,11 @@ cat $kwsdatadir/keywords_proxy.txt |\ # L1 since it is the lexicon used for the LVCSR training. cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 1 |\ paste -d ' ' - <(cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 2-|\ - sed 's/_[B|E|I|S]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ awk '{if(NF>=2) {print $0}}' > $kwsdatadir/tmp/L1.lex cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 1 |\ paste -d ' ' - <(cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 2-|\ - sed 's/_[B|E|I|S]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ awk '{if(NF>=2) {print $0}}' | perl -e ' ($lex1, $words) = @ARGV; open(L, "<$lex1") || die "Fail to open $lex1.\n"; @@ -230,8 +234,10 @@ cat $kwsdatadir/keywords_proxy.txt | perl -e ' print STEDRR "'$0': No pronunciation found for word: $col[$i]\n"; } } - if ($len >= '$phone_cutoff') { + if (($len >= '$phone_cutoff') && ($len <= '$max_phone_cutoff')){ print "$line\n"; + } elsif ($len > '$max_phone_cutoff'){ + print STDERR "'$0': Keyword $col[0] is too long, not generating proxy\n"; } else { print STDERR "'$0': Keyword $col[0] is too short, not generating proxy\n"; } @@ -256,7 +262,7 @@ cat $datadir/segments | \ $idx++; }' > $kwsdatadir/utter_id -# Map utterance to the names that will appear in the rttm file. You have +# Map utterance to the names that will appear in the rttm file. You have # to modify the commands below accoring to your rttm file cat $datadir/segments | awk '{print $1" "$2}' |\ sort | uniq > $kwsdatadir/utter_map; diff --git a/egs/babel/s5c/local/kws_data_prep_syllables.sh b/egs/babel/s5c/local/kws_data_prep_syllables.sh deleted file mode 100755 index c6245e52c9e..00000000000 --- a/egs/babel/s5c/local/kws_data_prep_syllables.sh +++ /dev/null @@ -1,144 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) -# Apache 2.0. - -# Begin configuration section. -silence_word= # Optional silence word to insert (once) between words of the transcript. -# End configuration section. - -echo $0 "$@" - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - - -if [ $# -ne 4 ]; then - echo "Usage: local/kws_data_prep_syllables.sh [options] " - echo " e.g.: local/kws_data_prep_syllables.sh data/lang/ data/dev10h/ SIL data/kws/" - echo "Input is in : kwlist.xml, ecf.xml (rttm file not needed)." - echo "The lang directory is expected to be syllable-level. The syllable-lexicon " - echo "is a text file with lines of the form:" - echo "word syllable1 syllable2" - echo "This script is as kws_data_prep.sh, except that the output keywords.fsts" - echo "contains the various alternative syllable-level pronunciations of the input" - echo "words." - echo "Output is in : keywords.txt, kwlist_invocab.xml," - echo " kwlist_outvocab.xml, keywords.fsts; note that the only syllable-level" - echo " output (and the only one that really matters) is keywords.fsts" - echo "Note: most important output is keywords.fsts" - echo " Options:" - echo " --silence-word # Note, this is required. It is a word, e.g. SIL," - echo " # in the syllable lexicon, that's optional." - exit 1; -fi - -langdir=$1; -datadir=$2; -syllable_lexicon=$3 -kwsdatadir=$4 -keywords=$kwsdatadir/kwlist.xml - -[ -z $silence_word ] && echo "--silence-word option is required" && exit 1; - -mkdir -p $kwsdatadir; - -cat $keywords | perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; - - use XML::Simple; - use Data::Dumper; - - my $data = XMLin(\*STDIN); - - #print Dumper($data->{kw}); - foreach $kwentry (@{$data->{kw}}) { - #print Dumper($kwentry); - print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; - } -' > $kwsdatadir/keywords.txt - -[ ! -s "$syllable_lexicon" ] && echo "No such file '$syllable_lexicon' (syllable lexicon), or empty file." && exit 1; - -# The word symbols on the first entry of $syllable_lexicon will be given a symbol-table -# file. We just use this symbol table in this script; the values will never appear -# elsewhere. - -mkdir -p $kwsdatadir/temp - -# Remove any lines with symbols we don't have in our symbol vocabulary. -temp_syllable_lexicon=$kwsdatadir/temp/syllable_lexicon.in -cat $syllable_lexicon | sym2int.pl --map-oov 123456789 -f 2- $langdir/words.txt | grep -v -w 123456789 | \ - int2sym.pl -f 2- $langdir/words.txt > $temp_syllable_lexicon - -n1=`cat $syllable_lexicon | wc -l` -n2=`cat $temp_syllable_lexicon | wc -l` -echo "After removing OOV symbols from word-to-syllable lexicon, #lines changed from $n1 to $n2" - - -if $case_insensitive; then - echo "Running case insensitive processing" - # we turn the first element of each line of $temp_syllable_lexicon into upper case. - tr '[:lower:]' '[:upper:]' < $temp_syllable_lexicon | awk '{print $1}' | \ - paste - <(awk '{for(n=2;n<=NF;n++) { printf("%s ", $n); } print ""; }' <$temp_syllable_lexicon) \ - > $kwsdatadir/temp/syllable_lexicon.txt || exit 1; - - # We turn all but the first element of each line in $kwsdatadir/keywords.txt - # into upper case. - tr '[:lower:]' '[:upper:]' < $kwsdatadir/keywords.txt | \ - awk '{for(n=2;n<=NF;n++) { printf("%s ", $n); } print ""; }' | \ - paste <(awk '{print $1}' <$kwsdatadir/keywords.txt) - \ - > $kwsdatadir/temp/keywords.txt || exit 1; -else - cp $temp_syllable_lexicon $kwsdatadir/temp/syllable_lexicon.txt || exit 1; - cp $kwsdatadir/keywords.txt $kwsdatadir/temp/ || exit 1; -fi - -cat $kwsdatadir/temp/syllable_lexicon.txt | awk '{print $1}' | sort | uniq | \ - awk 'BEGIN{print " 0";} {print $1, NR;}' > $kwsdatadir/temp/words.txt - -sym2int.pl --map-oov 0 -f 2- $kwsdatadir/temp/words.txt < $kwsdatadir/temp/keywords.txt \ - > $kwsdatadir/temp/keywords_all.int - -cat $kwsdatadir/temp/keywords_all.int | \ - grep -v " 0 " | grep -v " 0$" > $kwsdatadir/keywords.int - -cut -f 1 -d ' ' $kwsdatadir/keywords.int | \ - local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_invocab.xml - -cat $kwsdatadir/temp/keywords_all.int | \ - egrep " 0 | 0$" | cut -f 1 -d ' ' | \ - local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_outvocab.xml - -local/make_lexicon_fst_special.pl $kwsdatadir/temp/syllable_lexicon.txt $silence_word | \ - sym2int.pl -f 4 $kwsdatadir/temp/words.txt | \ - sym2int.pl -f 3 $langdir/words.txt | \ - fstcompile | \ - fstarcsort --sort_type=olabel > $kwsdatadir/temp/L.fst || exit 1; - -# Compile keywords into FSTs, compose with lexicon to get syllables -# and project on the input (keeping only syllable labels), -# before writing to keywords.fsts - -transcripts-to-fsts ark:$kwsdatadir/keywords.int ark:- | \ - fsttablecompose $kwsdatadir/temp/L.fst ark:- ark,t:- | \ - awk '{if (NF < 4) { print; } else { print $1, $2, $3, $3, $5; }}' > \ - $kwsdatadir/keywords.fsts - -# Create utterance id for each utterance -cat $datadir/segments | \ - awk '{print $1}' | \ - sort | uniq | perl -e ' - $idx=1; - while(<>) { - chomp; - print "$_ $idx\n"; - $idx++; - }' > $kwsdatadir/utter_id - -# Map utterance to the names that will appear in the rttm file. You have -# to modify the commands below accoring to your rttm file -cat $datadir/segments | awk '{print $1" "$2}' | sort | uniq > $kwsdatadir/utter_map; - -echo "Kws data preparation succeeded" diff --git a/egs/babel/s5c/local/kws_gen_oracle_lattices.sh b/egs/babel/s5c/local/kws_gen_oracle_lattices.sh index aa9e22cca96..b73112b191d 100755 --- a/egs/babel/s5c/local/kws_gen_oracle_lattices.sh +++ b/egs/babel/s5c/local/kws_gen_oracle_lattices.sh @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) # Apache 2.0. -# Begin configuration section. +# Begin configuration section. cmd=run.pl duptime=0.5 model=final.mdl @@ -35,8 +35,8 @@ mkdir -p $oracledir/log for filename in $lang/words.txt $decodedir/num_jobs \ $data/text $decodedir/lat.1.gz \ $decodedir/../$model ; do - if [[ ! -f $filename ]] ; then - echo "FATAL: File $filename does not exist!" + if [[ ! -f $filename ]] ; then + echo "FATAL: File $filename does not exist!" exit 1; fi done @@ -44,7 +44,7 @@ done nj=`cat $decodedir/num_jobs` (cd $decodedir; ln -s ../$model final.mdl ) -(cd $oracledir; echo "$nj" > num_jobs ) +(cd $oracledir; echo "$nj" > num_jobs ) $cmd LAT=1:$nj $oracledir/log/lat.LAT.log \ cat $data/text \| \ diff --git a/egs/babel/s5c/local/kws_oracle.sh b/egs/babel/s5c/local/kws_oracle.sh index 44334ba1413..c7aa661664f 100755 --- a/egs/babel/s5c/local/kws_oracle.sh +++ b/egs/babel/s5c/local/kws_oracle.sh @@ -1,23 +1,23 @@ #!/bin/bash # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal) -# 2013 Johns Hopkins University +# 2013 Johns Hopkins University # Apache 2.0. . ./path.sh . ./cmd.sh -# Begin configuration section. +# Begin configuration section. cmd=run.pl -acwt=0.09091 #Acoustic weight -- should not be necessary for oracle lattices +acwt=0.09091 #Acoustic weight -- should not be necessary for oracle lattices duptime=0.6 #Max time difference in which the occurences of the same KW will be seen as duplicates text= # an alternative reference text to use. when not specified, the /text will be used -model= # acoustic model to use +model= # acoustic model to use extraid= # kws setup extra ID (kws task was setup using kws_setup.sh --extraid stage=0 # to resume the computation from different stage # End configuration section. -set -e +set -e set -o pipefail echo "$0 $@" # Print the command line for logging @@ -47,7 +47,7 @@ fi if [ -z "$model" ]; then # if --model was not specified on the command line... srcdir=`dirname $decodedir`; # The model directory is one level up from decoding directory. - model=$srcdir/final.mdl; + model=$srcdir/final.mdl; fi if [ -z $extraid ] ; then # the same logic as with kws_setup.sh @@ -59,7 +59,7 @@ fi nj=`cat $decodedir/num_jobs`; oracledir=$decodedir/kws_oracle -mkdir -p $oracledir +mkdir -p $oracledir mkdir -p $oracledir/log if [ $stage -le 0 ] ; then @@ -119,17 +119,17 @@ if [ $stage -le 4 ]; then echo "=======================================================" ( echo -n "ATWV-full " - grep Occurrence $oracledir/sum.txt | cut -d '|' -f 13 + grep Occurrence $oracledir/sum.txt | cut -d '|' -f 13 ) #-( #-echo -n "ATWV-invocab " - #-grep Occurrence $oracledir/invocab.sum.txt | cut -d '|' -f 13 + #-grep Occurrence $oracledir/invocab.sum.txt | cut -d '|' -f 13 #-) || echo "Error occured getting the invocab results" #-( #-echo -n "ATWV-outvocab " - #-grep Occurrence $oracledir/outvocab.sum.txt | cut -d '|' -f 13 + #-grep Occurrence $oracledir/outvocab.sum.txt | cut -d '|' -f 13 #-) || echo "Error occured getting the outvocab results" echo "=======================================================" diff --git a/egs/babel/s5c/local/kws_score_f4de.sh b/egs/babel/s5c/local/kws_score_f4de.sh index d761e080c1c..cd6948a8a08 100755 --- a/egs/babel/s5c/local/kws_score_f4de.sh +++ b/egs/babel/s5c/local/kws_score_f4de.sh @@ -16,11 +16,11 @@ help_message="$0: score the kwslist using the F4DE scorer from NIST Example: $0 [additional-parameters] where the most important additional parameters can be: - --extraid #for using, when a non-default kws tasks are setup + --extraid #for using, when a non-default kws tasks are setup (using the kws_setup.sh --extraid) for a kaldi-single data-dir --kwlist #allows for an alternative kwlist -- if not set, the default kwlist is taken from - --f4de-prefix #allows for scoring the same results using + --f4de-prefix #allows for scoring the same results using different kwlists and storing them in the same dir " echo $0 $@ @@ -72,8 +72,9 @@ done echo KWSEval -e $ecf -r $rttm -t $kwlist \ -s $kwsoutputdir/kwslist.xml -c -o -b -d -f $kwsoutputdir -KWSEval -e $ecf -r $rttm -t $kwlist \ - -s $kwsoutputdir/kwslist.xml -c -o -b -d -f ${kwsoutputdir}${f4de_prefix} || exit 1; +KWSEval -e $ecf -r $rttm -t $kwlist -a --zGlobalMeasures MAP \ + --zGlobalMeasures MAPpct --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -s $kwsoutputdir/kwslist.xml -c -o -b -d -f ${kwsoutputdir}${f4de_prefix} || exit 1; duration=`cat ${kwsoutputdir}${f4de_prefix}/sum.txt | grep TotDur | cut -f 3 -d '|' | sed "s/\s*//g"` diff --git a/egs/babel/s5c/local/kws_search.sh b/egs/babel/s5c/local/kws_search.sh index 4b275048e0e..9e998d6c3f9 100755 --- a/egs/babel/s5c/local/kws_search.sh +++ b/egs/babel/s5c/local/kws_search.sh @@ -10,7 +10,7 @@ help_message="$(basename $0): do keyword indexing and search. data-dir is assum Usage: $(basename $0) " -# Begin configuration section. +# Begin configuration section. #acwt=0.0909091 min_lmwt=7 max_lmwt=17 @@ -101,7 +101,7 @@ if [ ! -z "$model" ]; then else model_flags= fi - + if [ $stage -le 0 ] ; then if [ ! -f $indices_dir/.done.index ] ; then @@ -109,8 +109,8 @@ if [ $stage -le 0 ] ; then for lmwt in `seq $min_lmwt $max_lmwt` ; do indices=${indices_dir}_$lmwt mkdir -p $indices - - acwt=`perl -e "print (1.0/$lmwt);"` + + acwt=`perl -e "print (1.0/$lmwt);"` [ ! -z $silence_word ] && silence_opt="--silence-word $silence_word" steps/make_index.sh $silence_opt --cmd "$cmd" --acwt $acwt $model_flags\ --skip-optimization $skip_optimization --max-states $max_states \ diff --git a/egs/babel/s5c/local/kws_setup.sh b/egs/babel/s5c/local/kws_setup.sh index f1036f100de..a6b87ef004f 100755 --- a/egs/babel/s5c/local/kws_setup.sh +++ b/egs/babel/s5c/local/kws_setup.sh @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) # Apache 2.0. -# Begin configuration section. +# Begin configuration section. cmd=run.pl case_insensitive=true subset_ecf= @@ -18,7 +18,7 @@ silence_word= # Optional silence word to insert (once) between words of the tra echo "$0 $@" # Print the command line for logging -set -e +set -e set -u set -o pipefail @@ -26,13 +26,13 @@ help_message="$0: Initialize and setup the KWS task directory Usage: $0 [rttm-file] allowed switches: - --subset-ecf /path/to/filelist # The script will subset the ecf file + --subset-ecf /path/to/filelist # The script will subset the ecf file # to contain only the files from the filelist --rttm-file /path/to/rttm # the preferred way how to specify the rttm - # the older way (as an in-line parameter is + # the older way (as an in-line parameter is # obsolete and will be removed in near future --case-insensitive # Shall we be case-sensitive or not? - # Please not the case-sensitivness depends + # Please not the case-sensitivness depends # on the shell locale! --use-icu # Use the ICU uconv binary to normalize casing --icu-transform # When using ICU, use this transliteration @@ -85,13 +85,13 @@ fi mkdir -p $kwsdatadir if [ -z $subset_ecf ] ; then - test -f $kwsdatadir/ecf.xml && rm -f $kwsdatadir/ecf.xml + test -f $kwsdatadir/ecf.xml && rm -f $kwsdatadir/ecf.xml cp "$ecf_file" $kwsdatadir/ecf.xml || exit 1 else local/make_ecf_subset.sh $subset_ecf $ecf_file > $kwsdatadir/ecf.xml fi -if $kwlist_wordlist ; then +if $kwlist_wordlist ; then ( echo '' awk '{ printf(" \n", $1); diff --git a/egs/babel/s5c/local/lattice_to_ctm.sh b/egs/babel/s5c/local/lattice_to_ctm.sh index 08a1b5889a7..5fbde42d237 100755 --- a/egs/babel/s5c/local/lattice_to_ctm.sh +++ b/egs/babel/s5c/local/lattice_to_ctm.sh @@ -39,8 +39,7 @@ if [ -z "$model" ] ; then fi -for f in $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do +for f in $lang/words.txt $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; done @@ -49,17 +48,31 @@ name=`basename $data`; # e.g. eval2000 mkdir -p $dir/scoring/log if [ $stage -le 0 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - set -e -o pipefail \; \ - mkdir -p $dir/score_LMWT/ '&&' \ - lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ - lattice-prune --beam=$beam ark:- ark:- \| \ - lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ - lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; + if [ ! -f $lang/phones/word_boundary.int ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + fi fi if [ $stage -le 1 ]; then @@ -76,12 +89,12 @@ if [ $stage -le 1 ]; then grep -v -E '' | \ perl -e '@list = (); %list = (); while(<>) { - chomp; - @col = split(" ", $_); + chomp; + @col = split(" ", $_); push(@list, $_); - $key = "$col[0]" . " $col[1]"; + $key = "$col[0]" . " $col[1]"; $list{$key} = 1; - } + } foreach(sort keys %list) { $key = $_; foreach(grep(/$key/, @list)) { diff --git a/egs/babel/s5c/local/lattice_to_ctm_syllable.sh b/egs/babel/s5c/local/lattice_to_ctm_syllable.sh deleted file mode 100755 index 7165a7a04e5..00000000000 --- a/egs/babel/s5c/local/lattice_to_ctm_syllable.sh +++ /dev/null @@ -1,115 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -stage=0 -decode_mbr=true -beam=4 # Use a fairly narrow beam because lattice-align-words is slow-ish. -word_ins_penalty=0.5 -min_lmwt=7 -max_lmwt=17 -cleanup=true -model= - -#end configuration section. - -#debugging stuff -echo $0 $@ - -[ -f ./path.sh ] && . ./path.sh -[ -f ./cmd.sh ] && . ./cmd.sh -. parse_options.sh || exit 1; - -if [ $# -ne 4 ]; then - echo "Usage: $0 [options] " && exit; - echo "This is as lattice_to_ctm.sh, but for syllable-based systems where we want to" - echo "obtain word-level ctms. Here, is a directory like data/local/w2s," - echo "as created by run-6-syllables.sh. It contains:" - echo " G.fst, Ldet.fst, words.txt, word_align_lexicon.int" - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1) # (createCTM | filterCTM )." - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -w2sdir=$3 -dir=$4 - -if [ -z "$model" ] ; then - model=`dirname $dir`/final.mdl # Relative path does not work in some cases - #model=$dir/../final.mdl # assume model one level up from decoding dir. - #[ ! -f $model ] && model=`(set +P; cd $dir/../; pwd)`/final.mdl -fi - -for f in $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz \ - $w2sdir/{G.fst,Ldet.fst,words.txt,word_align_lexicon.int}; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - -# we are counting the LM twice since we have both the original, syllable-level LM -# and the new, word-level one, so we scale by 0.5 to get a reasonably scaled -# LM cost. - -if [ $stage -le 0 ]; then - nj=`cat $dir/num_jobs` || exit 1; - $cmd JOB=1:$nj $dir/scoring/log/get_word_lats.JOB.log \ - lattice-compose "ark:gunzip -c $dir/lat.JOB.gz|" $w2sdir/Ldet.fst ark:- \| \ - lattice-determinize ark:- ark:- \| \ - lattice-compose ark:- $w2sdir/G.fst ark:- \| \ - lattice-scale --lm-scale=0.5 ark:- "ark:|gzip -c >$dir/wlat.JOB.gz" || exit 1; -fi - -if [ $stage -le 1 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - mkdir -p $dir/score_LMWT/ '&&' \ - lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/wlat.*.gz|" ark:- \| \ - lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ - lattice-prune --beam=$beam ark:- ark:- \| \ - lattice-push ark:- ark:- \| \ - lattice-align-words-lexicon --max-expand=10 --output-if-empty=true $w2sdir/word_align_lexicon.int $model ark:- ark:- \| \ - lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ - utils/int2sym.pl -f 5 $w2sdir/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; -fi - -if [ $stage -le 2 ]; then - # Remove some stuff we don't want to score, from the ctm. - for x in $dir/score_*/$name.ctm; do - cp $x $x.bkup1; - cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ - grep -v -E '|%HESITATION|\(\(\)\)' | \ - grep -v -E '' | \ - grep -v -E '' | \ - grep -v -E '' | \ - grep -v -E '' | \ - grep -v -E '' | \ - perl -e '@list = (); %list = (); - while(<>) { - chomp; - @col = split(" ", $_); - push(@list, $_); - $key = "$col[0]" . " $col[1]"; - $list{$key} = 1; - } - foreach(sort keys %list) { - $key = $_; - foreach(grep(/$key/, @list)) { - print "$_\n"; - } - }' > $x; - done -fi - -$cleanup && rm $dir/wlat.*.gz - -echo "Lattice2CTM finished on " `date` -exit 0 diff --git a/egs/babel/s5c/local/make_L_align.sh b/egs/babel/s5c/local/make_L_align.sh index 03d1ad517fe..50e46a00493 100755 --- a/egs/babel/s5c/local/make_L_align.sh +++ b/egs/babel/s5c/local/make_L_align.sh @@ -20,7 +20,7 @@ set -e if [ $# -ne 3 ]; then echo "This is a simple script that will generate the L_align.fst" - echo "The FST L_align.fst is used for getting the force-aligned " + echo "The FST L_align.fst is used for getting the force-aligned " echo "utterances" echo "The script automaticky recognizes the probabilistic lexicon" echo "is used and will use the correct file" @@ -39,7 +39,7 @@ silphone=`cat $dir/phones/optional_silence.txt` || exit 1; # Create lexicon with alignment info if [ -f $tmpdir/lexicon.txt ] ; then cat $tmpdir/lexicon.txt | \ - awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' + awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' elif [ -f $tmpdir/lexiconp.txt ] ; then cat $tmpdir/lexiconp.txt | \ awk '{printf("%s #1 ", $1); for (n=3; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' diff --git a/egs/babel/s5c/local/make_ecf_subset.sh b/egs/babel/s5c/local/make_ecf_subset.sh index 53bddcbc839..9bdd95c3e27 100755 --- a/egs/babel/s5c/local/make_ecf_subset.sh +++ b/egs/babel/s5c/local/make_ecf_subset.sh @@ -8,7 +8,7 @@ echo "$0 $@" 1>&2 # Print the command line for logging [ -f ./path.sh ] && . ./path.sh; # source the path. . parse_options.sh || exit 1; -help_message="$0: generates an subset ecf file for spoken term detection evaluation. +help_message="$0: generates an subset ecf file for spoken term detection evaluation. The first parameter specifies the descriptor of the subset, the second parameter specifies the original ecf file. The file will be generated in the kws subdirectory of the directory @@ -47,6 +47,6 @@ duration=`grep -F -f $list_file $src_ecf_file | sed "s/.*dur=\"\([0-9.][0-9.]*\) # Output is produced here: ( grep "" ) diff --git a/egs/babel/s5c/local/make_lexicon_fst_special.pl b/egs/babel/s5c/local/make_lexicon_fst_special.pl index 976c28c029c..3df6e7a9527 100755 --- a/egs/babel/s5c/local/make_lexicon_fst_special.pl +++ b/egs/babel/s5c/local/make_lexicon_fst_special.pl @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (author: Daniel Povey) # makes lexicon FST -- special version only for use in keyword search -# for allowing optional silences between words. This version has +# for allowing optional silences between words. This version has # no pron-probs involved, and # does support an optional silence, but this silence is only allowed # between words (where it may occur an arbitrary number of times), diff --git a/egs/babel/s5c/local/make_lexicon_subset.sh b/egs/babel/s5c/local/make_lexicon_subset.sh index c2bf0e21623..1e77fcaa2b9 100755 --- a/egs/babel/s5c/local/make_lexicon_subset.sh +++ b/egs/babel/s5c/local/make_lexicon_subset.sh @@ -10,9 +10,9 @@ input_lexicon_file=$2 output_lexicon_file=$3 ( - #find $dev_data_dir/transcription/ -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' + #find $dev_data_dir/transcription/ -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' find $transcriptions -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' -) | sort -u | awk ' +) | sort -u | awk ' BEGIN { while(( getline line< ARGV[2] ) > 0 ) { split(line, e, "\t") @@ -20,7 +20,7 @@ output_lexicon_file=$3 } FILENAME="-" i=0 - + while(( getline word< ARGV[1] ) > 0 ) { if (word in LEXICON) print LEXICON[word] diff --git a/egs/babel/s5c/local/make_syllable_lexicon.sh b/egs/babel/s5c/local/make_syllable_lexicon.sh deleted file mode 100755 index 118845982b9..00000000000 --- a/egs/babel/s5c/local/make_syllable_lexicon.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash - - -help="Usage: $(basename $0) - E.g. $(basename $0) data/local/lexicon.txt word2syllable_lexicon.txt data/local/syllables/lexicon.txt - Here, is the text-form lexicon but with tabs separating the syllables, e.g. - WORD w o rr d - has entries of the form - WORD w/o rr/d - has entries of the form - w/o w o" - -# config vars: -pron_probs=false # If you set --pron-probs true, will expect pron-prob on input lexicon and produce - # pron-probs on word2syllable lexicon. -# end configs. -. utils/parse_options.sh - -if [ $# != 3 ]; then - echo $help 2>&1; - exit 1; -fi - -lex_in=$1 -w2s_lex_out=$2 -s2p_lex_out=$3 - -[ ! -f $lex_in ] && echo "No such file $lex_in" && exit 1; -mkdir -p `dirname $w2s_lex_out` -mkdir -p `dirname $s2p_lex_out` - -cat $lex_in | perl -e ' - ($w2s, $pron_probs) = @ARGV; - open(W2S, ">$w2s") || die "opening word to syllable lexicon"; - $saw_tabs = 0; - while() { - chop; - if ($pron_probs eq "true") { - m:(\S+)\s+(\S+)\s+(.+): || die "Bad line $_ (note: have pron probs)."; - $word = $1; - $prob = $2; - $pron = $3; - ($prob > 0.0 && $prob <= 1.0) || die "Bad pron-prob $prob in line $_"; - print W2S "$word $prob"; - } else { - m:(\S+)\s+(.+): || die "Bad line $_ (note: do not have pron probs)."; - $word = $1; - $pron = $2; - print W2S "$word"; - } - @A = split("\t", $pron); - @A >= 1 || die "Bad lexicon line $_\n"; - if (@A > 1) { $saw_tabs = 1; } - foreach $s (@A) { - $s =~ s/^\s+//; # Remove leading space. - $s =~ s/\s+$//; # Remove trailing space. - if ($s ne "") { - $s =~ m:/: && die "slash (/) present in syllable $s (not allowed)\n"; - $t = join("/", split(" ", $s)); # replace spaces with / - print W2S " $t"; - print "$t $s\n"; - } - } - print W2S "\n"; - } - if (! $saw_tabs) { - die "You seem to be using as input to this script, a lexicon that does not have " . - "syllables separated by tabs."; - } - ' $w2s_lex_out $pron_probs | sort | uniq > $s2p_lex_out || exit 1; - -exit 0; diff --git a/egs/babel/s5c/local/naive_comb.pl b/egs/babel/s5c/local/naive_comb.pl index e49ac972169..74ad20d84e3 100755 --- a/egs/babel/s5c/local/naive_comb.pl +++ b/egs/babel/s5c/local/naive_comb.pl @@ -102,7 +102,7 @@ sub KwslistTimeCompare { } } else { $a->[0] cmp $b->[0]; - } + } } sub KwslistTimeSort { @@ -124,7 +124,7 @@ sub KwslistTimeSort { my $method = 1; my $power = 0.5; -GetOptions('tolerance=f' => \$tolerance, +GetOptions('tolerance=f' => \$tolerance, 'method=i' => \$method, 'power=f' => \$power, 'inv-power=f' => sub { (my $opt, my $val) = @_; $power = 1.0/$val;}); diff --git a/egs/babel/s5c/local/ndx2flist.pl b/egs/babel/s5c/local/ndx2flist.pl index 48fc3dec101..c5f676affcd 100755 --- a/egs/babel/s5c/local/ndx2flist.pl +++ b/egs/babel/s5c/local/ndx2flist.pl @@ -21,7 +21,7 @@ #;; #;; Index for WSJ0 SI-short Sennheiser training data #;; Data is read WSJ sentences, Sennheiser mic. -#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts +#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts #;; per speaker TI) = 7236 utts #;; #11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1 @@ -37,7 +37,7 @@ foreach $fn (@ARGV) { $fn =~ m:.+/([0-9\.\-]+)/?$: || die "Bad command-line argument $fn\n"; - $disk_id=$1; + $disk_id=$1; $disk_id =~ tr/-\./__/; # replace - and . with - so 11-10.1 becomes 11_10_1 $fn =~ s:/$::; # Remove final slash, just in case it is present. $disk2fn{$disk_id} = $fn; diff --git a/egs/babel/s5c/local/nist_eval/create_compound_set.sh b/egs/babel/s5c/local/nist_eval/create_compound_set.sh index 63de46f6106..1e745d1ebba 100755 --- a/egs/babel/s5c/local/nist_eval/create_compound_set.sh +++ b/egs/babel/s5c/local/nist_eval/create_compound_set.sh @@ -3,7 +3,7 @@ #Simple script to create compound set info that will allow for more automatized #work with the shadow set. # -#The notion of shadow data set came from the need to be able to verify +#The notion of shadow data set came from the need to be able to verify #the output of the recognizer during decoding the evaluation data. #The idea is simple -- instead of decoding just the eval data, decode both #eval data plus the dev data (or at least some portion of it) interleved diff --git a/egs/babel/s5c/local/nist_eval/export_systems.sh b/egs/babel/s5c/local/nist_eval/export_systems.sh index 7e514bcc077..d0af608416c 100755 --- a/egs/babel/s5c/local/nist_eval/export_systems.sh +++ b/egs/babel/s5c/local/nist_eval/export_systems.sh @@ -2,11 +2,11 @@ set -e set -o pipefail -. ./cmd.sh; . ./path.sh; +. ./cmd.sh; . ./path.sh; #( -#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* #bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* #) & #bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/tri6*_nnet*/decode_shadow.uem* @@ -14,9 +14,9 @@ set -o pipefail ( bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* -#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* ) & -bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/tri6*_nnet*/decode_shadow.uem +bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/tri6*_nnet*/decode_shadow.uem wait wait diff --git a/egs/babel/s5c/local/nist_eval/filter_data.sh b/egs/babel/s5c/local/nist_eval/filter_data.sh index f36903035b6..8576b93fef8 100755 --- a/egs/babel/s5c/local/nist_eval/filter_data.sh +++ b/egs/babel/s5c/local/nist_eval/filter_data.sh @@ -38,7 +38,7 @@ outputname=$name while (( "$#" )); do resultdir=$1;shift - echo "Processing data directory $resultdir" + echo "Processing data directory $resultdir" [ ! -d $resultdir ] && echo "Decode dir $resultdir does not exist!" && exit 1; diff --git a/egs/babel/s5c/local/nist_eval/get_training_times.sh b/egs/babel/s5c/local/nist_eval/get_training_times.sh index 2b92dcefcdc..f5b0012c2f2 100755 --- a/egs/babel/s5c/local/nist_eval/get_training_times.sh +++ b/egs/babel/s5c/local/nist_eval/get_training_times.sh @@ -24,8 +24,8 @@ function process { replace+="\t" done - ( - eval `grep "group=all"` + ( + eval `grep "group=all"` echo -n "threads=$total_threads" echo -n " cpu_time=$total_cpu_time wall_time=$clock_time" echo -n " human_cpu_time="`convertsecs $total_cpu_time` @@ -43,17 +43,17 @@ local/summarize_logs.pl $dir/exp/make_*/*train*/ | process if [ -d $dir/data/local/extend ] ; then legend "Extending the lexicon" - local/summarize_logs.pl $dir/data/local/extend/tmp/log | process + local/summarize_logs.pl $dir/data/local/extend/tmp/log | process fi legend "Training upto stage tri5" -local/summarize_logs.pl $dir/exp/mono*/log $dir/exp/tri{1..5}/log $dir/exp/tri{1..4}_ali*/log | process +local/summarize_logs.pl $dir/exp/mono*/log $dir/exp/tri{1..5}/log $dir/exp/tri{1..4}_ali*/log | process legend "SGMM2 stage training" -local/summarize_logs.pl $dir/exp/ubm5/log $dir/exp/sgmm5/log $dir/exp/tri5_ali/log | process +local/summarize_logs.pl $dir/exp/ubm5/log $dir/exp/sgmm5/log $dir/exp/tri5_ali/log | process legend "SGMM2+bMMI stage training" -local/summarize_logs.pl $dir/exp/sgmm5_*/log $dir/exp/ubm5/log $dir/exp/sgmm5_denlats/log/* | process +local/summarize_logs.pl $dir/exp/sgmm5_*/log $dir/exp/ubm5/log $dir/exp/sgmm5_denlats/log/* | process nnet=tri6_nnet [ ! -d $dir/exp/$nnet ] && nnet=tri6b_nnet diff --git a/egs/babel/s5c/local/nist_eval/make_release.sh b/egs/babel/s5c/local/nist_eval/make_release.sh index ce784431a5c..aff89f92846 100755 --- a/egs/babel/s5c/local/nist_eval/make_release.sh +++ b/egs/babel/s5c/local/nist_eval/make_release.sh @@ -57,7 +57,7 @@ function export_file { else echo "$source_file -> $target_file" fi - + else echo "The file is already there, not doing anything. Either change the version (using --version), or delete that file manually)" exit 1 @@ -72,7 +72,7 @@ function export_kws_file { fixed_xml=$2 kwlist=$3 export_xml=$4 - + echo "Exporting KWS $source_xml as `basename $export_xml`" if [ -f $source_xml ] ; then cp $source_xml $fixed_xml.bak @@ -110,7 +110,7 @@ function find_best_stt_result { local dir=$1 local mask=$2 local record=`(find $dir -name "*.ctm.sys" -path "$mask" -not -ipath "*rescore*" | xargs grep Avg) | sed 's/|//g' | column -t | sort -n -k 9 | head -n 1` - + echo $record >&2 local file=`echo $record | awk -F ":" '{print $1}'` #echo $file >&2 @@ -200,7 +200,7 @@ function figure_out_scase { if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then local basnam=${ecf%%.ecf.xml} local scase=`echo $basnam | awk -F _ '{print $2}'` - + if [[ $scase =~ conv-dev(\..*)? ]]; then echo "BaDev" elif [[ $scase =~ conv-eval(\..*)? ]]; then @@ -211,7 +211,7 @@ function figure_out_scase { echo "BaDev" return 1 fi - else + else echo "WARNING: The ECF file $ecf is probably not an official file" >&2 echo "WARNING: Does not match the mask IARPA-babel.*.ecf.xml" >&2 echo "BaDev" @@ -225,7 +225,7 @@ function figure_out_partition { if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then local basnam=${ecf%%.ecf.xml} local scase=`echo $basnam | awk -F _ '{print $2}'` - + if [[ $scase =~ conv-dev(\..*)? ]]; then echo "conv-dev" elif [[ $scase =~ conv-eval(\..*)? ]]; then @@ -235,7 +235,7 @@ function figure_out_partition { echo "conv-dev" return 1 fi - else + else echo "WARNING: The ECF file $ecf is probably not an official file" >&2 echo "conv-dev" return 1 @@ -264,7 +264,7 @@ fi #data=data/shadow.uem dirid=`basename $data` kws_tasks="kws " -[ -f $data/extra_kws_tasks ] && kws_tasks+=`cat $data/extra_kws_tasks | awk '{print $1"_kws"}'` +[ -f $data/extra_kws_tasks ] && kws_tasks+=`cat $data/extra_kws_tasks | awk '{print $1"_kws"}'` [ -d $data/compounds ] && compounds=`ls $data/compounds` if [ -z "$compounds" ] ; then @@ -295,7 +295,7 @@ else submit_to_google $best_one $ATWV $MTWV ) || echo "Submission failed!" - + for compound in $compounds ; do compound_best_one=`echo $best_one | sed "s:$master/${kws}_:$compound/${kws}_:g"` echo "From ($kws) $best_one going to $compound_best_one" diff --git a/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh index 760d7ee80d5..3b12222e13a 100755 --- a/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh +++ b/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh @@ -7,7 +7,7 @@ # This script, which will generally be called from other neural-net training # scripts, extracts the training examples used to train the neural net (and also # the validation examples used for diagnostics), and puts them in separate archives. -# This is similar to the script steps/nnet2/get_egs.sh, but this also extracts +# This is similar to the script steps/nnet2/get_egs.sh, but this also extracts # frames from unsupervsied data. Decode directory for unsupervised data which # has the best path done along with posteriors (can be done using local/combine_posteriors.sh) @@ -25,15 +25,15 @@ samples_per_iter=400000 # each iteration of training, see this many samples # per job. This is just a guideline; it will pick a number # that divides the number of samples in the entire data. transform_dir_sup= # If supplied, overrides alidir -transform_dir_unsup= +transform_dir_unsup= num_jobs_nnet=16 # Number of neural net jobs to run in parallel stage=-10 -io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. splice_width=4 # meaning +- 4 frames on each side for second LDA spk_vecs_dir_sup= spk_vecs_dir_unsup= random_copy=false -weight_threshold=0.7 # Threshold on confidence factor of an unsupervised data +weight_threshold=0.7 # Threshold on confidence factor of an unsupervised data # frame for it to not be ignored supervised_copies=3 # Make x copies of supervised data. use_frame_selection=true @@ -70,7 +70,7 @@ if [ $# != 6 ]; then echo " --supervised-copies <#copies|3> # Make copies of supervised data" echo " --transform-dir-sup # Directory with transforms for supervised training data" echo " --transform-dir-unsup # Directory with transforms for unsupervised training data" - + exit 1; fi @@ -109,7 +109,7 @@ cp $alidir/tree $dir awk '{print $1}' $data_sup/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset > $dir/valid_uttlist -# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately if [ -f $data_sup/utt2uniq ]; then echo "File $data_sup/utt2uniq exists, so augmenting valid_uttlist to" echo "include all perturbed versions of the same 'real' utterances." @@ -121,7 +121,7 @@ if [ -f $data_sup/utt2uniq ]; then rm $dir/uniq2utt $dir/valid_uttlist.tmp fi -# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately awk '{print $1}' $data_sup/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \ head -$num_utts_subset > $dir/train_subset_uttlist @@ -137,7 +137,7 @@ if [ "$norm_vars" != "$norm_vars_unsup" ]; then fi cp $alidir/norm_vars $dir 2>/dev/null -## Set up features. +## Set up features. if [ -z $feat_type ]; then if [ -f $alidir/final.mat ] && [ ! -f $transform_dir_sup/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi fi @@ -150,7 +150,7 @@ case $feat_type in valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` #splice_opts_unsup=`cat $latdir/../splice_opts 2>/dev/null` #if [ "$splice_opts" -ne "$splice_opts_unsup" ]; then @@ -159,14 +159,14 @@ case $feat_type in # exit 1 #fi cp $alidir/splice_opts $dir/splice_opts 2>/dev/null - + #if [ "`diff $alidir/final.mat $latdir/../final.mat &> /dev/null; echo $?`" -ne "0" ]; then # echo "ERROR: Features mismatch for supervised and unsupervised data!" # echo "LDA matrices $alidir/final.mat for supervised data and $latdir/../final.mat for unsupervised data don't match" # exit 1 #fi - cp $alidir/final.mat $dir + cp $alidir/final.mat $dir feats_sup="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata_sup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_sup/JOB/utt2spk scp:$sdata_sup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" feats_unsup="ark,s,cs:cat $sdata_unsup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_unsup/JOB/utt2spk scp:$sdata_unsup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" @@ -309,18 +309,18 @@ if [ $stage -le 3 ]; then for (( i=0; i \$fragMarkers, - "oov=s" => \$OOV_symbol, + "oov=s" => \$OOV_symbol, "vocab=s" => \$vocabFile, "icu-transform=s" => \$icu_transform, "get-whole-transcripts=s" => \$get_whole_transcripts @@ -112,7 +112,7 @@ print STDERR ("\tLimiting transcriptions to words in $vocabFile\n"); print STDERR ("\tMapping OOV tokens to \"$OOV_symbol\"\n"); print STDERR ("\tif they remain OOV even after removing [$fragMarkers] from either end\n") if ($fragMarkers); - } + } print STDERR ("$0 ADVICE: Use full path for the Input Directory\n") unless ($inDir=~m:^/:); } else { print STDERR ("Usage: $0 [--options] InputDir OutputDir\n"); @@ -295,7 +295,7 @@ } else { print STDERR ("$0 ERROR: No .txt files found $TranscriptionDir\n"); exit(1); - } + } } else { print STDERR ("$0 ERROR: No directory named $TranscriptionDir\n"); exit(1); @@ -322,8 +322,8 @@ $SampleRate = 8000; #default while ($#Info>=0) { $line = shift @Info; - $SampleCount = $1 if ($line =~ m:sample_count -i (\d+):); - $SampleRate = $1 if ($line =~ m:sample_rate -i (\d+):); + $SampleCount = $1 if ($line =~ m:sample_count -i (\d+):); + $SampleRate = $1 if ($line =~ m:sample_rate -i (\d+):); } if ($SampleCount<0) { # Unable to extract a valid duration from the sphere header @@ -342,7 +342,7 @@ print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); } else { print STDERR ("$0 NOTICE: No .sph files in $AudioDir\n"); - } + } @AudioFiles = `ls ${AudioDir}/*.wav`; if ($#AudioFiles >= 0) { @@ -378,8 +378,8 @@ print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); } else { print STDERR ("$0 NOTICE: No .wav files in $AudioDir\n"); - } - + } + if ( $#waveformName == 0 ) { print STDERR ("$0 ERROR: No audio files found!"); } diff --git a/egs/babel/s5c/local/prepare_lexicon.pl b/egs/babel/s5c/local/prepare_lexicon.pl index 721e56a0dcf..ff128f07637 100755 --- a/egs/babel/s5c/local/prepare_lexicon.pl +++ b/egs/babel/s5c/local/prepare_lexicon.pl @@ -27,10 +27,10 @@ # 㓤 k_1 i:_1 t_1 # 兄妹 h_1 i:_1 N_1 m_2 u:j_2 # 兄妹 h_1 i:_1 N_1 m_6 u:j_6 -# +# # # Write only one pronunciation per line -# Transfer any tags, prefixed by underscores, to phones in the syllable +# Transfer any tags, prefixed by underscores, to phones in the syllable # Remove the syllable boundary markers, given by periods or pound signs # # NOTE: The Romainzation is present only for some languages. See -r option. @@ -46,7 +46,7 @@ $icu_transform = ""; $phonemap=""; # -# - nonsilence_phones.txt: tagged phones from the new lexicon +# - nonsilence_phones.txt: tagged phones from the new lexicon # # - optional_silence.txt: phones used to model silence in acoustic training # @@ -61,12 +61,12 @@ # ############################################################################### -GetOptions("add=s" => \$nsWordsFile, - "oov=s" => \$OOV_symbol, - "romanized!" => \$romanized, - "sil=s" => \$sil, +GetOptions("add=s" => \$nsWordsFile, + "oov=s" => \$OOV_symbol, + "romanized!" => \$romanized, + "sil=s" => \$sil, "icu-transform=s" => \$icu_transform, - "phonemap=s" => \$phonemap + "phonemap=s" => \$phonemap ); if ($#ARGV == 1) { @@ -165,7 +165,7 @@ $syllable =~ s:\s+: :g; @original_phones = split(" ", $syllable); @substituted_original_phones=(); - + foreach $phone (@original_phones) { if (defined $phonemap_hash{$phone} ) { #print "Sub: $phone => " . join (' ', @{$phonemap_hash{$phone}}) . "\n"; @@ -205,7 +205,7 @@ # It is a phone if ( $substituted_phones{phone} ) { die "ERROR, the $new_phone and $phone are both existing phones, so we cannot do automatic map!"; - } + } $is_original_phone{$phone} = "$phone"; $new_phones .= " $phone"; } @@ -277,7 +277,7 @@ && print STDERR ("$0: Wrote $numProns pronunciations to $outLex\n"); ############################################################################### -# - nonsilence_phones.txt: tagged phones from the new lexicon, 1 phone/line +# - nonsilence_phones.txt: tagged phones from the new lexicon, 1 phone/line ############################################################################### foreach $phone (sort keys %is_new_phone) { diff --git a/egs/babel/s5c/local/prepare_stm.pl b/egs/babel/s5c/local/prepare_stm.pl index edf1b43676d..b4daec585e3 100755 --- a/egs/babel/s5c/local/prepare_stm.pl +++ b/egs/babel/s5c/local/prepare_stm.pl @@ -92,7 +92,7 @@ @tokens = split(/\s+/, $line); unless ($#tokens == 3) { $num_failed_parses+=1; - print STDERR "$0: Couldn't parse line $. in $segmentsFile\n" + print STDERR "$0: Couldn't parse line $. in $segmentsFile\n" if ($num_failed_parses == 1); print STDERR ("\tLine: $line") if ($num_failed_parses le $num_failed_parses_max); @@ -174,7 +174,7 @@ $waveform{$recordingID} =~ s:.+/::; # remove path prefix $waveform{$recordingID} =~ s:\.(sph|wav)\s*$::; # remove file extension $channel{$recordingID} = 1 # Default - unless (exists $channel{$recordingID}); + unless (exists $channel{$recordingID}); ++$numRecordings; } close(SCP); @@ -321,7 +321,7 @@ $w =~ s:([^\x00-\x7F])(?=[^\x00-\x7F]):$1 :g; # split adjacent non-ASCII chars print CHARSTM ("$w\n"); } -close(CHARSTM); +close(CHARSTM); close(STM); print STDERR ("$0: Wrote char.stm file $charStmFile\n"); diff --git a/egs/babel/s5c/local/resegment/evaluate_segmentation.pl b/egs/babel/s5c/local/resegment/evaluate_segmentation.pl index 06a762d7762..9d865cca8c9 100755 --- a/egs/babel/s5c/local/resegment/evaluate_segmentation.pl +++ b/egs/babel/s5c/local/resegment/evaluate_segmentation.pl @@ -1,6 +1,6 @@ #!/usr/bin/env perl -# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar +# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar # Apache 2.0 ################################################################################ diff --git a/egs/babel/s5c/local/resegment/generate_segments.sh b/egs/babel/s5c/local/resegment/generate_segments.sh index 01917c3d4e9..95e88deb87d 100755 --- a/egs/babel/s5c/local/resegment/generate_segments.sh +++ b/egs/babel/s5c/local/resegment/generate_segments.sh @@ -37,14 +37,14 @@ if [ $# -ne 5 ]; then echo " --segmentation-opts '--opt1 opt1val --opt2 opt2val' # options for segmentation.py" echo " --reference-rttm # Reference RTTM file that will be used for analysis of the segmentation" echo " --get-text (true|false) # Convert text from base data directory to correspond to the new segments" - echo + echo echo "e.g.:" echo "$0 data/dev10h data/lang exp/tri4b_seg exp/tri4b_resegment_dev10h" exit 1 fi datadir=$1 # The base data directory that contains at least the files wav.scp and reco2file_and_channel -lang=$2 +lang=$2 model_dir=$3 # Segmentation model directory created using local/resegment/run_segmentation_train.sh temp_dir=$4 # Temporary directory to store some intermediate files during segmentation output_dir=$5 # The target directory @@ -73,18 +73,18 @@ if [ $stage -le 1 ]; then ali-to-phones --per-frame=true $model_dir/final.mdl ark:- ark,t:- \| \ utils/int2sym.pl -f 2- $lang/phones.txt \| \ gzip -c '>' $temp_dir/pred.JOB.gz || exit 1 - + mkdir -p $temp_dir/pred gunzip -c $temp_dir/pred.*.gz | \ - perl -ne '($file, $phones)=split / /, $_, 2; - open($fh, ">'$temp_dir/pred/'$file.pred" ) or die $!; - print {$fh} "$file $phones"; + perl -ne '($file, $phones)=split / /, $_, 2; + open($fh, ">'$temp_dir/pred/'$file.pred" ) or die $!; + print {$fh} "$file $phones"; close($fh);' || exit 1 fi t2=$(date +%s) total_time=$((total_time + t2 - t1)) -echo "SI decoding done in $((t2-t1)) seconds" +echo "SI decoding done in $((t2-t1)) seconds" ############################################################################### @@ -99,8 +99,8 @@ if ! [ `cat $lang/phones/optional_silence.txt | wc -w` -eq 1 ]; then exit 1; fi -silphone=`cat $lang/phones/optional_silence.txt` -# silphone will typically be "sil" or "SIL". +silphone=`cat $lang/phones/optional_silence.txt` +# silphone will typically be "sil" or "SIL". # 3 sets of phones: 0 is silence, 1 is noise, 2 is speech., ( @@ -127,15 +127,15 @@ local/resegment/segmentation.py --verbose 2 $segmentation_opts \ if [ ! -s $output_dir/segments ] ; then echo "Zero segments created during segmentation process." - echo "That means something failed. Try the cause and re-run!" + echo "That means something failed. Try the cause and re-run!" exit 1 fi t2=$(date +%s) total_time=$((total_time + t2 - t1)) -echo "Resegment data done in $((t2-t1)) seconds" +echo "Resegment data done in $((t2-t1)) seconds" -for file in reco2file_and_channel wav.scp ; do +for file in reco2file_and_channel wav.scp ; do [ ! -f $datadir/$file ] && echo "Expected file $datadir/$file to exist" && exit 1 cp $datadir/$file $output_dir/$file done diff --git a/egs/babel/s5c/local/rttm_to_text.pl b/egs/babel/s5c/local/rttm_to_text.pl index 7312acdb886..d33c71e2f17 100755 --- a/egs/babel/s5c/local/rttm_to_text.pl +++ b/egs/babel/s5c/local/rttm_to_text.pl @@ -64,7 +64,7 @@ sub float_gt { @times = (); $filename = $_filename; } - + #I don't really know what is the distinction between all #of these. Let's throw away the SPEAKER, as it does not #really contain information that is to be found in the transcript @@ -91,12 +91,12 @@ sub float_gt { my $B = $times[-1][0]; my $Aend = $times[-2][1]; my $Bend = $times[-1][1]; - + #print "WARNING: Elements in the RTTM file are not sorted for FILENAME $filename!\n"; #print $times[-2][0] . " " . $times[-2][1] - $times[-2][0]. " " . $times[-2][2] . "\n"; #print $times[-1][0] . " " . $times[-1][1] - $times[-1][0]. " " . $times[-1][2] . "\n"; #print "\n"; - + my @sorted = sort {$a <=> $b} ($A, $B, $Aend, $Bend); #print Dumper(\@sorted); $times[-1][0] = $sorted[0]; @@ -129,7 +129,7 @@ sub float_gt { #if ($segmentname ne "10470_A_20111118_172644_000000" ) { # next; #} - + #print $filename . "\n"; #print Dumper(\@times); diff --git a/egs/babel/s5c/local/run_kws_stt_task.sh b/egs/babel/s5c/local/run_kws_stt_task.sh index 50c96e41035..d622aac9442 100755 --- a/egs/babel/s5c/local/run_kws_stt_task.sh +++ b/egs/babel/s5c/local/run_kws_stt_task.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) # Licensed under the Apache License, Version 2.0 (the "License"); @@ -39,7 +39,7 @@ if [ $(basename $0) == score.sh ]; then fi echo $0 "$@" -. utils/parse_options.sh +. utils/parse_options.sh if [ $# -ne 3 ]; then echo "Usage: $0 [options] " @@ -47,27 +47,29 @@ if [ $# -ne 3 ]; then exit 1; fi -data_dir=$1; +data_dir=$1; lang_dir=$2; -decode_dir=$3; +decode_dir=$3; ##NB: The first ".done" files are used for backward compatibility only ##NB: should be removed in a near future... -if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then - local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ - --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ - $data_dir $lang_dir $decode_dir - - if ! $skip_scoring ; then - local/score_stm.sh --cmd "$cmd" --cer $cer \ - --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ +if ! $skip_stt ; then + if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then + local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ $data_dir $lang_dir $decode_dir + + if ! $skip_scoring ; then + local/score_stm.sh --cmd "$cmd" --cer $cer \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ + $data_dir $lang_dir $decode_dir + fi + touch $decode_dir/.done.score fi - touch $decode_dir/.done.score fi if ! $skip_kws ; then - if [ ! -f $decode_dir/.kws.done ] && [ ! -f $decode_dir/.done.kws ]; then + if [ ! -f $decode_dir/.kws.done ] && [ ! -f $decode_dir/.done.kws ]; then local/kws_search.sh --cmd "$cmd" --max-states ${max_states} \ --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} --skip-scoring $skip_scoring\ --indices-dir $decode_dir/kws_indices $lang_dir $data_dir $decode_dir diff --git a/egs/babel/s5c/local/score_combine.sh b/egs/babel/s5c/local/score_combine.sh index f425b5afc68..7e8af85b2d8 100755 --- a/egs/babel/s5c/local/score_combine.sh +++ b/egs/babel/s5c/local/score_combine.sh @@ -18,9 +18,9 @@ # Script for system combination using minimum Bayes risk decoding. -# This calls lattice-combine to create a union of lattices that have been +# This calls lattice-combine to create a union of lattices that have been # normalized by removing the total forward cost from them. The resulting lattice -# is used as input to lattice-mbr-decode. This should not be put in steps/ or +# is used as input to lattice-mbr-decode. This should not be put in steps/ or # utils/ since the scores on the combined lattice must not be scaled. # begin configuration section. @@ -43,7 +43,7 @@ help_message="Usage: "$(basename $0)" [options] or: "$(basename $0)" data/test data/lang exp/tri1/decode exp/tri2/decode:18 exp/tri3/decode:13 exp/combine Options: --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. - --min-lmwt INT # minumum LM-weight for lattice rescoring + --min-lmwt INT # minumum LM-weight for lattice rescoring --max-lmwt INT # maximum LM-weight for lattice rescoring --lat-weights STR # colon-separated string of lattice weights --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. @@ -70,7 +70,7 @@ decode_dirs=( $@ ) # read the remaining arguments into an array unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir num_sys=${#decode_dirs[@]} # number of systems to combine -#Let the user to set the CTM file name +#Let the user to set the CTM file name #use the data-dir name in case the user doesn't care if [ -z ${ctm_name} ] ; then ctm_name=`basename $data` @@ -94,7 +94,7 @@ for i in `seq 0 $[num_sys-1]`; do offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. decode_dir=`echo $decode_dir | cut -d: -f1` [ -z "$offset" ] && offset=0 - + model=`dirname $decode_dir`/final.mdl # model one level up from decode dir for f in $model $decode_dir/lat.1.gz ; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; @@ -103,7 +103,7 @@ for i in `seq 0 $[num_sys-1]`; do nj=`cat $decode_dir/num_jobs` || exit 1; else if [ $nj != `cat $decode_dir/num_jobs` ]; then - echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" exit 1; fi fi @@ -128,7 +128,7 @@ if [ -z "$lat_weights" ]; then for i in `seq $[$num_sys-1]`; do lat_weights="$lat_weights:1.0"; done fi -if [ $stage -le 0 ]; then +if [ $stage -le 0 ]; then $cmd $parallel_opts LMWT=$min_lmwt:$max_lmwt $dir/log/combine_lats.LMWT.log \ mkdir -p $dir/score_LMWT/ '&&' \ lattice-combine --lat-weights=$lat_weights "${lats[@]}" ark:- \| \ @@ -155,12 +155,12 @@ if [ $stage -le 1 ]; then grep -v -E '' | \ perl -e '@list = (); %list = (); while(<>) { - chomp; - @col = split(" ", $_); + chomp; + @col = split(" ", $_); push(@list, $_); - $key = "$col[0]" . " $col[1]"; + $key = "$col[0]" . " $col[1]"; $list{$key} = 1; - } + } foreach(sort keys %list) { $key = $_; foreach(grep(/$key/, @list)) { diff --git a/egs/babel/s5c/local/score_mbr.sh b/egs/babel/s5c/local/score_mbr.sh index 1c39830b4c7..a86dd5c3f71 100755 --- a/egs/babel/s5c/local/score_mbr.sh +++ b/egs/babel/s5c/local/score_mbr.sh @@ -48,7 +48,7 @@ for inv_acwt in `seq $min_lmwt $max_lmwt`; do done wait; [ -f $dir/.error ] && echo "score_mbr.sh: errror getting MBR outout."; - + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ cat $dir/scoring/LMWT.tra \| \ diff --git a/egs/babel/s5c/local/score_sctk_prune.sh b/egs/babel/s5c/local/score_sctk_prune.sh index a6eca9fd071..09662af57c8 100755 --- a/egs/babel/s5c/local/score_sctk_prune.sh +++ b/egs/babel/s5c/local/score_sctk_prune.sh @@ -73,12 +73,12 @@ if [ $stage -le 1 ]; then grep -v -E '' | \ perl -e '@list = (); %list = (); while(<>) { - chomp; - @col = split(" ", $_); + chomp; + @col = split(" ", $_); push(@list, $_); - $key = "$col[0]" . " $col[1]"; + $key = "$col[0]" . " $col[1]"; $list{$key} = 1; - } + } foreach(sort keys %list) { $key = $_; foreach(grep(/$key/, @list)) { @@ -103,8 +103,8 @@ if [ $stage -le 1 ]; then foreach (@char) { $char = encode("UTF8", $_); $start += $dur; - # printf "$col[0] $col[1] $start $dur $char\n"; - printf "%s %s %.2f %.2f %s %s\n", $col[0], $col[1], $start, $dur, $char, $col[5]; + # printf "$col[0] $col[1] $start $dur $char\n"; + printf "%s %s %.2f %.2f %s %s\n", $col[0], $col[1], $start, $dur, $char, $col[5]; } } }' > $y.char.ctm @@ -122,7 +122,7 @@ if [ $stage -le 2 ]; then cp $data/char.stm $dir/score_LMWT/'&&'\ $ScoringProgram -s -r $dir/score_LMWT/char.stm stm -h $dir/score_LMWT/${name}.char.ctm ctm -o all -o dtl; fi - + # for x in $dir/score_*/*.ctm; do # mv $x.filt $x; # rm -f $x.filt*; diff --git a/egs/babel/s5c/local/score_stm.sh b/egs/babel/s5c/local/score_stm.sh index 2406af4e726..56835109722 100755 --- a/egs/babel/s5c/local/score_stm.sh +++ b/egs/babel/s5c/local/score_stm.sh @@ -48,7 +48,7 @@ data=$1 lang=$2 # This parameter is not used -- kept only for backwards compatibility dir=$3 -set -e +set -e set -o pipefail set -u @@ -82,8 +82,9 @@ if [ $stage -le 0 ] ; then \> $dir/score_LMWT/stm '&&' \ paste -d ' ' \<\(cut -f 1-4 -d ' ' $dir/score_LMWT/${name}.ctm.sorted \) \ \<\(cut -f 5- -d ' ' $dir/score_LMWT/${name}.ctm.sorted \| uconv -f utf8 -t utf8 -x "$icu_transform" \) \ - \> $dir/score_LMWT/${name}.ctm '&&' \ - utils/fix_ctm.sh $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm '&&' \ + \> $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + utils/fix_ctm.sh $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + $SortingProgram sortCTM \<$dir/score_LMWT/${name}.ctm.sorted2 \>$dir/score_LMWT/${name}.ctm '&&' \ $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm \ -n "$name.ctm" -f 0 -D -F -o sum rsum prf dtl sgml -e utf-8 || exit 1 fi diff --git a/egs/babel/s5c/local/shadow_set_kws_search.sh b/egs/babel/s5c/local/shadow_set_kws_search.sh index 76521fda9b6..a67a3a57f6a 100755 --- a/egs/babel/s5c/local/shadow_set_kws_search.sh +++ b/egs/babel/s5c/local/shadow_set_kws_search.sh @@ -13,7 +13,7 @@ help_message="$0: create subset of the input directory (specified as the first d Example: $0 [data-dir2 [data-dir3 [ ...] ]" -# Begin configuration section. +# Begin configuration section. #acwt=0.0909091 min_lmwt=7 max_lmwt=17 @@ -101,8 +101,8 @@ if [ $stage -le 0 ] ; then for lmwt in `seq $min_lmwt $max_lmwt` ; do kwsoutdir=$decodedir/kws_$lmwt mkdir -p $kwsoutdir - - acwt=`perl -e "print (1.0/$lmwt);"` + + acwt=`perl -e "print (1.0/$lmwt);"` steps/make_index.sh --strict $strict --cmd "$cmd" --max-states $max_states\ --acwt $acwt $model_flags --skip-optimization $skip_optimization \ --word_ins_penalty $word_ins_penalty \ @@ -128,14 +128,14 @@ if [ $stage -le 1 ] ; then dirB=$decodedir/`basename $datasetB`/kws_$lmwt mkdir -p $dirA mkdir -p $dirB - + steps/search_index.sh --cmd "$cmd" $kwsdatadir $kwsoutdir || exit 1 [ ! -f $datasetA/kws/utter_id ] && echo "File $datasetA/kws/utter_id must exist!" && exit 1; cat $kwsoutdir/result.* | \ grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ grep "^KW[-a-zA-Z0-9]*-A " | \ - sed 's/^\(KW.*\)-A /\1 /g' > $dirA/results + sed 's/^\(KW.*\)-A /\1 /g' > $dirA/results [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; cat $kwsoutdir/result.* | \ @@ -152,7 +152,7 @@ if [ $stage -le 1 ] ; then cat $kwsoutdir/result.* | \ grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ grep "^KW[-a-zA-Z0-9]*-B " | \ - sed 's/^\(KW.*\)-B /\1 /g' > $dirA/results + sed 's/^\(KW.*\)-B /\1 /g' > $dirA/results [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; cat $kwsoutdir/result.* | \ @@ -192,7 +192,7 @@ if [ $stage -le 3 ] ; then utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationA \ --segments=$datadir/segments --normalize=false --remove-dup=true\ --map-utter=$kwsdatadir/utter_map - $rootdirA/kws_LMWT/kwslist.unnormalized.xml || exit 1 - + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_write_unnormalized.LMWT.log \ set -e';' set -o pipefail';' \ cat $rootdirAB/kws_LMWT/results \| \ @@ -204,15 +204,15 @@ fi echo "Scoring $datasetA" if [ $stage -le 4 ] ; then if [[ (! -x local/kws_score.sh ) || ($skip_scoring == true) ]] ; then - echo "Not scoring, because the file local/kws_score.sh is not present" + echo "Not scoring, because the file local/kws_score.sh is not present" exit 1 elif [ ! -f $datasetA/kws/rttm ] ; then echo "Not scoring, because the file $datasetA/kws/rttm is not present" else $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_scoring.LMWT.log \ - local/kws_score.sh $datasetA $rootdirA/kws_LMWT + local/kws_score.sh $datasetA $rootdirA/kws_LMWT $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_scoring.LMWT.log \ - local/kws_score.sh --kwlist $datasetB/kws/kwlist.xml $datasetA $rootdirAB/kws_LMWT + local/kws_score.sh --kwlist $datasetB/kws/kwlist.xml $datasetA $rootdirAB/kws_LMWT fi fi diff --git a/egs/babel/s5c/local/split_ctms.sh b/egs/babel/s5c/local/split_ctms.sh index efba126a5dd..b24a1380111 100755 --- a/egs/babel/s5c/local/split_ctms.sh +++ b/egs/babel/s5c/local/split_ctms.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) # Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,8 +32,8 @@ echo "$0 $@" set -e set -o pipefail -data=$1; -q=$2; +data=$1; +q=$2; shift; shift; if [ -z $ctm_name ] ; then diff --git a/egs/babel/s5c/local/stm2text.pl b/egs/babel/s5c/local/stm2text.pl index 3ec3806238a..3b069c63554 100755 --- a/egs/babel/s5c/local/stm2text.pl +++ b/egs/babel/s5c/local/stm2text.pl @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0. -#This script takes the source STM file and generates the *.txt files which +#This script takes the source STM file and generates the *.txt files which #are usually part of the BABEL delivery #The *.txt files are not the part of the delivery for the evalpart1 subset #The program works as a filter and the only parameter it expects is @@ -12,7 +12,7 @@ #example of usage: # cat data/evalpart1/stm local/stm2text.pl data/raw_evalpart1_data/transcriptions -use strict; +use strict; use warnings; use utf8; @@ -30,7 +30,7 @@ next if ( $filename =~ /;;.*/ ); #$filename =~ s/;;(.*)/$1/ if ( $filename =~ /;;.*/ ); $text = "" if not $text; - + if ( $prev_filename ne $filename ) { #close($OUTPUT) if ( tell(FH) != -1 ); print "$output_dir/$filename.txt\n"; diff --git a/egs/babel/s5c/local/subset_atwv.pl b/egs/babel/s5c/local/subset_atwv.pl index 910703db996..ce6b7043116 100755 --- a/egs/babel/s5c/local/subset_atwv.pl +++ b/egs/babel/s5c/local/subset_atwv.pl @@ -13,7 +13,7 @@ e.g.: subset_atwv.pl keywords.list bsum.txt This script will compute the ATWV for a subset of the original keywords in bsum.txt. -Note that bsum.txt is a file generated by the NIST scoring tool F4DE. keywords.list +Note that bsum.txt is a file generated by the NIST scoring tool F4DE. keywords.list is a list of the keywords that you want to compute the ATWV for. For example: KW101-0001 KW101-0002 @@ -27,7 +27,7 @@ my $subset_name = ""; my $width = 5; GetOptions('subset-name=s' => \$subset_name, - 'width=i' => \$width); + 'width=i' => \$width); @ARGV == 2 || die $Usage; @@ -72,7 +72,7 @@ if (/^Keyword/) {$flag = 1;} my @col; if ($flag == 1) { - # Figure out keywords that don't have occurrences in the search collection + # Figure out keywords that don't have occurrences in the search collection @col = split(/\|/, $_); $col[2] =~ s/^\s+//; $col[2] =~ s/\s+$//; diff --git a/egs/babel/s5c/local/subset_kwslist.pl b/egs/babel/s5c/local/subset_kwslist.pl index 96c2c7a7fdd..361291179ef 100755 --- a/egs/babel/s5c/local/subset_kwslist.pl +++ b/egs/babel/s5c/local/subset_kwslist.pl @@ -29,5 +29,5 @@ } $data->{kw} = \@filtered_kws; my $xml = XMLout($data, RootName=> "kwlist", KeyAttr=>''); -print $xml; +print $xml; exit 0 diff --git a/egs/babel/s5c/local/summarize_logs.pl b/egs/babel/s5c/local/summarize_logs.pl index 4f7fc058f96..e816d57d68f 100755 --- a/egs/babel/s5c/local/summarize_logs.pl +++ b/egs/babel/s5c/local/summarize_logs.pl @@ -23,7 +23,7 @@ sub parse_accounting_entry { $entry= shift @_; @elems = split " ", $entry; - + $time=undef; $threads=undef; foreach $elem (@elems) { @@ -96,7 +96,7 @@ sub parse_accounting_entry { $total_threads=0.0; foreach $fgroup (split_hundreds($fmap{$c})) { $lines=`grep -P "# Accounting:? " $fgroup |sed 's/.* Accounting:* *//g'`; - + #print $lines ."\n"; @entries = split "\n", $lines; diff --git a/egs/babel/s5c/local/syllab/ali_to_syllabs.sh b/egs/babel/s5c/local/syllab/ali_to_syllabs.sh new file mode 100755 index 00000000000..8f0cb88771a --- /dev/null +++ b/egs/babel/s5c/local/syllab/ali_to_syllabs.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh + +if [ -f ./path.sh ]; then . ./path.sh; fi + +if [ $# != 4 ]; then + echo "This script takes an ali directory and syllab lang dir and generates" + echo "syllabic transceription of the alignment" + echo "" + echo "Usage: $0 " + echo " e.g.: $0 data/train data/lang_syll exp/tri5_ali exp/tri5_ali_syll" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) " + + exit 1; +fi + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +data=$1 +lang=$2 +ali=$3 +out=$4 + + +for f in real_words.txt lex.words2syllabs.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory is probably not a syllable lang dir" && \ + echo "The file $lang/$f is missing" && \ + exit 1 +done + +for f in words.txt L.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +for f in $ali/num_jobs $ali/final.mdl $ali/ali.1.gz ; do + [ ! -f $f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +nj=$(cat $ali/num_jobs) +echo "Extracting phoneme sequences" +$cmd JOB=1:$nj $out/log/ali-to-phones.JOB.log \ + ali-to-phones $ali/final.mdl ark:"gunzip -c $ali/ali.JOB.gz|" ark:- \| \ + transcripts-to-fsts ark:- ark:$out/phones.JOB.fst || exit 1 + +echo "Composing with files in $lang to get syllable sequences" +$cmd JOB=1:$nj $out/log/get-syll-text.JOB.log \ + cat $data/split$nj/JOB/text \| sym2int.pl -f 2- --map-oov '\' $lang/real_words.txt \| \ + transcripts-to-fsts ark,t:- ark:- \|\ + fsttablecompose $lang/lex.words2syllabs.fst ark:- ark:-\| \ + fsts-project ark:- ark:-\| \ + fsttablecompose $lang/L.fst ark:- ark:- \|\ + fsttablecompose ark:$out/phones.JOB.fst ark:- ark:- \| \ + fsts-to-transcripts ark:- ark,t:"|int2sym.pl -f 2- $lang/words.txt > $out/text.JOB" +cat $out/text.* | sort > $out/text + +echo "Done" + diff --git a/egs/babel/s5c/local/syllab/create_syllables.pl b/egs/babel/s5c/local/syllab/create_syllables.pl new file mode 100755 index 00000000000..29a0a67dc8d --- /dev/null +++ b/egs/babel/s5c/local/syllab/create_syllables.pl @@ -0,0 +1,154 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 Johns Hopkins University (Author: Yenda Trmal) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use Getopt::Long; +use Data::Dumper; + +my $with_probs; +my $position_independent_phones; + +GetOptions("with-probs" => \$with_probs, + "position-independent-phones" => \$position_independent_phones +); + +my %SYLLS; +my %LEXICON; + +while (my $line = ) { + chomp $line; + my $word; my $prob; my $pron; + if ($with_probs) { + ($word, $prob, $pron) = split(" ", $line, 3); + } else { + ($word, $pron) = split(" ", $line, 2); + } + my @syllabs = split(/\s*\t\s*/, $pron); + + my $pronlen= scalar @syllabs; + my @extended_syllabs; + if (( $syllabs[0] =~ /x\<.*\>/) || ($word eq "SIL")) { + $SYLLS{$pron} +=1; + push @extended_syllabs, $pron; + } elsif ($pronlen == 1) { + my $syl; + my @phones=split " ", $syllabs[0]; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if (scalar(@phones) == 1 ) { + $syl = "$phones[0]_S"; + } else { + $phones2[0] = $phones[0] . "_B" unless $position_independent_phones; + $phones2[-1] = $phones[-1] ."_E" unless $position_independent_phones; + $syl = join(" ", @phones2); + } + } + $SYLLS{$syl} += 1; + push @extended_syllabs, $syl; + } else { + for (my $i = 0; $i lt $pronlen; $i+=1) { + my $syl; + my @phones=split " ", $syllabs[$i]; + my $first_index = 0; + my $last_index = scalar(@phones)-1; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if ($i == 0) { + $phones2[$first_index] = $phones[$first_index] . "_B"; + } elsif ( $i == ($pronlen - 1)) { + $phones2[$last_index] = $phones[$last_index] . "_E"; + } + $syl = join(" ", @phones2); + } + + push @extended_syllabs, $syl; + $SYLLS{$syl} += 1; + } + } + push @{$LEXICON{$word}}, \@extended_syllabs; +} + + +my %VOCAB; +my %COUNTS; +my %REV_VOCAB; +foreach my $syl (keys %SYLLS) { + my $seq=1; + my $word=$syl; + $word =~ s/_[^\s]*//g; + $word =~ s/ //g; + $word =~ s/[^a-zA-Z0-9<>-|\/]//g; + + my $wordx=$word; + $wordx .= "#$seq"; + while (exists $COUNTS{$wordx}) { + $seq += 1; + $wordx = "$word#$seq"; + } + + $COUNTS{$wordx} += $SYLLS{$syl}; + push @{$VOCAB{$wordx}}, $syl; + $REV_VOCAB{$syl} = $wordx; +} + +open(my $lex_f, "|sort -u > $ARGV[0]") or +die "Cannot open the file\"$ARGV[0]\" for writing"; + +foreach my $word (keys %VOCAB) { + print $lex_f "$word\t" . join("\t", @{$VOCAB{$word}}) . "\n"; +} + +close($lex_f); + +open(my $word2syll_f, "|sort -u > $ARGV[1]") or +die "Cannot open the file\"$ARGV[1]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + my @pron_in_syllabs; + foreach my $syl (@{$pron}) { + die "In word $word, pronunciation $pron: syllable $syl not in the lexicon!" unless exists $REV_VOCAB{$syl}; + push @pron_in_syllabs, $REV_VOCAB{$syl}; + } + print $word2syll_f "$word\t" . join(" ", @pron_in_syllabs) . "\n"; + } +} + +close($word2syll_f); + +open(my $word2ali_f, "|sort -u > $ARGV[2]") or +die "Cannot open the file\"$ARGV[2]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + print $word2ali_f "$word\t$word\t" . join(" ", @{$pron}) . "\n"; + } +} + +close($word2ali_f); + diff --git a/egs/babel/s5c/local/syllab/generate_syllable_lang.sh b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh new file mode 100755 index 00000000000..2d1fcb2259e --- /dev/null +++ b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh +. ./path.sh + + + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +llang=$2 +lang=$3 +out=$4 +lout=$5 + +test -d $lout && rm -rf $lout +mkdir -p $lout +test -d $out && rm -rf $out +cp -R $lang $out +rm -rf $out/tmp $out/L.fst $out/L_disambig.fst $out/G.fst $out/words.txt +rm -rf $out/phones/word_boundary.{int,txt} + +echo "Generating lexicons.." +if [ -f $lang/phones/word_boundary.int ] ; then + echo "Position dependent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl \ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +else + echo "Position independent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs --position-independent-phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl --position_independent_phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +fi +cp $lout/lex.{syllabs2phones,words2syllabs,words2phones}.txt $out + +#We will fake the words.txt file +( + echo ""; + cut -f 1 $out/lex.syllabs2phones.txt; + echo -e "#0\n\n"; +) | nl -v 0 | awk '{print $2, $1}' > $out/syllabs.txt +ln -s syllabs.txt $out/words.txt +cp $lang/words.txt $out/real_words.txt + + +#Figure out the "OOV" token +oovword=$(cat $lang/oov.txt) +oovsyl=$(grep -w -F "$oovword" $out/lex.words2syllabs.txt | \ + awk '{if (NF == 2) { print $2;} + else {print "Error, oov word has more than one syllable "; exit 1;}}') + +echo $oovsyl > $out/oov.txt +grep -w -F "$oovsyl" $out/words.txt | awk '{print $2}' > $out/oov.int + +phone_disambig_symbol=$(grep '#0' $out/phones.txt | awk '{print $2}') +word_disambig_symbol=$(grep '#0' $out/words.txt | awk '{print $2}') + +optional_sil=$(cat $out/phones/optional_silence.txt) +utils/add_lex_disambig.pl $out/lex.syllabs2phones.txt $out/lex.syllabs2phones.disambig.txt > /dev/null +cat $out/lex.syllabs2phones.disambig.txt | sort -u > $lout/lexicon.txt + +echo " SIL" | cat - $lout/lexicon.txt | perl -ane 'print $F[0], " ", join(" ", @F), "\n";' | \ + sed 's/ #[0-9]$//g' > $out/phones/align_lexicon.txt +cat $lout/lexicon.txt | perl -ane 'print $F[0], "\t1.0\t", join(" ", @F[1..$#F]), "\n";' \ + > $lout/lexiconp.txt + +cat $out/phones/align_lexicon.txt |\ + sym2int.pl -f 3- $out/phones.txt |\ + sym2int.pl -f 1-2 $out/words.txt \ + > $out/phones/align_lexicon.int + +ndisambig=$(cat $out/phones/disambig.int | wc -l) +ndisambig=$[$ndisambig-1] + + +#Compile the lexicons +echo "Compiling words2syllables FST" +utils/make_lexicon_fst.pl $out/lex.words2syllabs.txt | \ + fstcompile --isymbols=$out/syllabs.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.words2syllabs.fst + +echo "Compiling L.fst and L_disambig.fst" +sil=$(cat $lang/phones/optional_silence.txt) +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.txt 0.5 $sil | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.fst +ln -s lex.syllabs2phones.fst $out/L.fst + + +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.disambig.txt 0.5 $sil '#'$ndisambig | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstaddselfloops "echo $phone_disambig_symbol |" "echo $word_disambig_symbol |"|\ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.disambig.fst +ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst + +echo "Validating the output lang dir" +utils/validate_lang.pl $out || exit 1 + +sed -i'' 's/#1$//g' $lout/lexicon.txt +sed -i'' 's/#1$//g' $lout/lexiconp.txt + +echo "Done OK." +exit 0 diff --git a/egs/babel/s5c/local/syllab/map_prons_to_syllables.pl b/egs/babel/s5c/local/syllab/map_prons_to_syllables.pl new file mode 100755 index 00000000000..df3ce93ce4e --- /dev/null +++ b/egs/babel/s5c/local/syllab/map_prons_to_syllables.pl @@ -0,0 +1,61 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use GetOpt::Long; + +my $probs; + +GetOptions ("--with-probs" => \$probs) + +my $syllab_lexicon=$ARGV[0]; + +my %PRON2SYL; + + +open(my $f, $syllab_lexicon) or die "Cannot open file $syllab_lexicon\n"; +while (my $line = <$f>) { + chomp $line; + + my $syll; + my $pron; + my $prob; + + if ($probs) { + $syll, $prob, $pron = split " ", $line, 3; + } else { + $syll, $pron = split " ", $line, 2; + } + $PRON2SYL{$pron} = $syll; +} + +while (my $line = ) { + chomp $line; + my ($word, $pron) = split(/\s*\t\s*/, $line, 2); + my @syllabs = split(/\s*\t\s*/, $pron); + + my @syl_pron; + foreach my $syl (@syllabs) { + die "in $line unknown syllable $syl" unless exists $PRON2SYL{$syl}; + push @syl_pron, $PRON2SYL{$syl}; + } + print "$word\t" . join(" ", @syl_pron) . "\n"; + +} diff --git a/egs/babel/s5c/local/train_g2p.sh b/egs/babel/s5c/local/train_g2p.sh index d608d084ac2..08be0014656 100755 --- a/egs/babel/s5c/local/train_g2p.sh +++ b/egs/babel/s5c/local/train_g2p.sh @@ -2,7 +2,7 @@ # Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0 -# Begin configuration section. +# Begin configuration section. iters=5 stage=0 encoding='utf-8' @@ -74,7 +74,7 @@ if [ $stage -le 0 ]; then fi for i in `seq 0 $(($iters-2))`; do - + echo "Training the G2P model (iter $[$i + 1] )" if [ $stage -le $i ]; then diff --git a/egs/babel/s5c/local/train_lms_srilm.sh b/egs/babel/s5c/local/train_lms_srilm.sh index 5bb1bfaa760..be2b0247aeb 100755 --- a/egs/babel/s5c/local/train_lms_srilm.sh +++ b/egs/babel/s5c/local/train_lms_srilm.sh @@ -4,22 +4,41 @@ export LC_ALL=C words_file= train_text= dev_text= +oov_symbol="" -. ./utils/parse_options.sh +echo "$0 $@" + +[ -f path.sh ] && . ./path.sh +. ./utils/parse_options.sh || exit 1 echo "-------------------------------------" echo "Building an SRILM language model " echo "-------------------------------------" +if [ $# -ne 2 ] ; then + echo "Incorrect number of parameters. " + echo "Script has to be called like this:" + echo " $0 [switches] " + echo "For example: " + echo " $0 data data/srilm" + echo "The allowed switches are: " + echo " words_file= word list file -- data/lang/words.txt by default" + echo " train_text= data/train/text is used in case when not specified" + echo " dev_text= last 10 % of the train text is used by default" + echo " oov_symbol=> symbol to use for oov modeling -- by default" + exit 1 +fi + datadir=$1 tgtdir=$2 outlm=lm.gz + ##End of configuration loc=`which ngram-count`; if [ -z $loc ]; then if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... - sdir=`pwd`/../../../tools/srilm/bin/i686-m64 + sdir=`pwd`/../../../tools/srilm/bin/i686-m64 else sdir=`pwd`/../../../tools/srilm/bin/i686 fi @@ -34,23 +53,39 @@ if [ -z $loc ]; then fi fi -[ -z $words_file ] && words_file=$datadir/lang/words.txt -[ -z $train_text ] && train_text=$datadir/train/text -[ -z $dev_text ] && dev_text=$datadir/dev2h/text - -echo "Using words file: $words_file" -echo "Using train text: $train_text" -echo "Using dev text : $dev_text" +# Prepare the destination directory +mkdir -p $tgtdir for f in $words_file $train_text $dev_text; do [ ! -s $f ] && echo "No such file $f" && exit 1; done -# Prepare the destination directory -mkdir -p $tgtdir +[ -z $words_file ] && words_file=$datadir/lang/words.txt +if [ ! -z "$train_text" ] && [ -z "$dev_text" ] ; then + nr=`cat $train_text | wc -l` + nr_dev=$(($nr / 10 )) + nr_train=$(( $nr - $nr_dev )) + orig_train_text=$train_text + head -n $nr_train $train_text > $tgtdir/train_text + tail -n $nr_dev $train_text > $tgtdir/dev_text + + train_text=$tgtdir/train_text + dev_text=$tgtdir/dev_text + echo "Using words file: $words_file" + echo "Using train text: 9/10 of $orig_train_text" + echo "Using dev text : 1/10 of $orig_train_text" +else + echo "Using words file: $words_file" + echo "Using train text: $train_text" + echo "Using dev text : $dev_text" + train_text=$datadir/train/text + dev_text=$datadir/dev2h/text +fi + + # Extract the word list from the training dictionary; exclude special symbols -sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '' > $tgtdir/vocab +sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '' | grep -v -F "$oov_symbol" > $tgtdir/vocab if (($?)); then echo "Failed to create vocab from $words_file" exit 1 @@ -67,8 +102,8 @@ if (($?)); then else echo "Removed first word (uid) from every line of $train_text" # wc text.train train.txt # doesn't work due to some encoding issues - echo $train_text contains `cat $train_text | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` - echo train.txt contains `cat $tgtdir/train.txt | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` + echo $train_text contains `cat $train_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo train.txt contains `cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` fi # Kaldi transcript files contain Utterance_ID as the first word; remove it @@ -79,56 +114,76 @@ if (($?)); then else echo "Removed first word (uid) from every line of $dev_text" # wc text.train train.txt # doesn't work due to some encoding issues - echo $train_text contains `cat $dev_text | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` - echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` + echo $dev_text contains `cat $dev_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` fi - echo "-------------------" echo "Good-Turing 3grams" echo "-------------------" -ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" echo "-------------------" echo "Kneser-Ney 3grams" echo "-------------------" -ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" echo "-------------------" echo "Good-Turing 4grams" echo "-------------------" -ngram-count -lm $tgtdir/4gram.gt0111.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0112.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0122.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0123.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0113.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0222.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0223.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/4gram.gt0111.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0112.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0122.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0123.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0113.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0222.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0223.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" echo "-------------------" echo "Kneser-Ney 4grams" echo "-------------------" -ngram-count -lm $tgtdir/4gram.kn0111.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0112.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0113.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0122.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0123.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0222.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0223.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/4gram.kn0111.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0112.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0113.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0122.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0123.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0222.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0223.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +if [ ! -z ${LIBLBFGS} ]; then + set -x + #please not that if the switch -map-unk "$oov_symbol" is used with -maxent-convert-to-arpa, ngram-count will segfault + #instead of that, we simply output the model in the maxent format and convert it using the "ngram" + echo "-------------------" + echo "Maxent 3grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 3 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/3gram.me.gz || exit 1 + + echo "-------------------" + echo "Maxent 4grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 4 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1 + +fi + echo "--------------------" echo "Computing perplexity" echo "--------------------" ( - for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done - for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done -) | sort -r -n -k 13 | column -t | tee $tgtdir/perplexities.txt + for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done + for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done +) | sort -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt echo "The perlexity scores report is stored in $tgtdir/perplexities.txt " @@ -141,9 +196,9 @@ nof_trigram_lm=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | wc -l` if [[ $nof_trigram_lm -eq 0 ]] ; then lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` elif [[ $nof_trigram_lm -eq 2 ]] ; then - lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` + lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` else #exactly one 3gram LM - lmfilename=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | cut -f 1 -d ' '` + lmfilename=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | cut -f 1 -d ' '` fi (cd $tgtdir; ln -sf `basename $lmfilename` $outlm ) diff --git a/egs/babel/s5c/local/train_mmi_sgmm2.sh b/egs/babel/s5c/local/train_mmi_sgmm2.sh index 2d3d0b5bf49..cdf9e28b1bf 100755 --- a/egs/babel/s5c/local/train_mmi_sgmm2.sh +++ b/egs/babel/s5c/local/train_mmi_sgmm2.sh @@ -30,7 +30,7 @@ if [ $# -ne 5 ]; then echo " --cancel (true|false) # cancel stats (true by default)" echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." echo " --config # config containing options" - echo " --stage # stage to do partial re-run from." + echo " --stage # stage to do partial re-run from." echo " --transform-dir # directory to find fMLLR transforms." exit 1; fi @@ -68,7 +68,7 @@ echo "$0: feature type is $feat_type" case $feat_type in delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |" - cp $alidir/final.mat $dir + cp $alidir/final.mat $dir ;; *) echo "Invalid feature type $feat_type" && exit 1; esac @@ -152,7 +152,7 @@ while [ $x -lt $num_iters ]; do $cmd $dir/log/num_acc_sum.$x.log \ sgmm2-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1; rm $dir/num_acc.$x.*.acc - + $cmd $dir/log/update.$x.log \ sgmm2-est-ebw $update_opts $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1; fi diff --git a/egs/babel/s5c/local/txt_to_rttm.pl b/egs/babel/s5c/local/txt_to_rttm.pl index 659d3c593d7..0e128520880 100755 --- a/egs/babel/s5c/local/txt_to_rttm.pl +++ b/egs/babel/s5c/local/txt_to_rttm.pl @@ -18,7 +18,7 @@ my $flen = 0.01; GetOptions('symtab=s' => \$symtab, 'segment=s' => \$segment, - 'flen=f' => \$flen); + 'flen=f' => \$flen); if ($symtab) { if (!open(S, "<$symtab")) {print "Fail to open symbol table: $symtab\n"; exit 1;} @@ -82,7 +82,7 @@ my $uid = shift @col; my $words = join(" ", @col); @col = split(/;/, $words); - + my $utt = $uid; my $sta = 0; if ($segment) { diff --git a/egs/babel/s5c/local/uem_ctm2segments.pl b/egs/babel/s5c/local/uem_ctm2segments.pl index ab560639c06..658690172c8 100755 --- a/egs/babel/s5c/local/uem_ctm2segments.pl +++ b/egs/babel/s5c/local/uem_ctm2segments.pl @@ -40,10 +40,10 @@ $defaultSegLen = 10; # seconds ################################################################################ -GetOptions("ctmTimeStep=f" => \$ctmTimeStep, - "minSilence=f" => \$minSilence, - "silence=s" => \$silence, - "maxSegLen=f" => \$maxSegLen, +GetOptions("ctmTimeStep=f" => \$ctmTimeStep, + "minSilence=f" => \$minSilence, + "silence=s" => \$silence, + "maxSegLen=f" => \$maxSegLen, "defaultSegLen=f" => \$defaultSegLen); if ($#ARGV == 1) { diff --git a/egs/babel/s5c/results/RESULTS.105-turkish.flp b/egs/babel/s5c/results/RESULTS.105-turkish.flp new file mode 100644 index 00000000000..737d0893abe --- /dev/null +++ b/egs/babel/s5c/results/RESULTS.105-turkish.flp @@ -0,0 +1,29 @@ +%WER 57.5 | 22070 54382 | 49.0 41.7 9.2 6.5 57.5 30.8 | -1.255 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.8 | 22070 54382 | 57.3 34.1 8.6 5.1 47.8 29.0 | -0.605 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.7 8.3 4.8 45.8 28.7 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.4 8.5 4.8 45.8 28.4 | -0.630 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_17/dev10h.pem.ctm.sys +%WER 47.1 | 22070 54382 | 56.5 32.7 10.8 3.6 47.1 28.7 | -0.430 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5003, THRESHOLD = 0.555 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_13/metrics.txt:MTWV = 0.5339, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_19/metrics.txt:MTWV = 0.5203, THRESHOLD = 0.553 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5078, THRESHOLD = 0.553 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_12/metrics.txt:MTWV = 0.0045, THRESHOLD = 0.891000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_11/metrics.txt:MTWV = 0.0066, THRESHOLD = 0.720000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_18/metrics.txt:MTWV = 0.0058, THRESHOLD = 0.867000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_20/metrics.txt:MTWV = 0.0072, THRESHOLD = 0.785000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 diff --git a/egs/babel/s5c/results/RESULTS.106-tagalog.flp b/egs/babel/s5c/results/RESULTS.106-tagalog.flp new file mode 100644 index 00000000000..72568cebf81 --- /dev/null +++ b/egs/babel/s5c/results/RESULTS.106-tagalog.flp @@ -0,0 +1,34 @@ +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/dev_oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5117, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_10/metrics.txt:MTWV = 0.5408, THRESHOLD = 0.504 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_17/metrics.txt:MTWV = 0.5221, THRESHOLD = 0.556 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5077, THRESHOLD = 0.648 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_oov_kws_10/metrics.txt:MTWV = 0.0038, THRESHOLD = 0.900000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.659 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_17/metrics.txt:MTWV = 0.0047, THRESHOLD = 0.889000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0052, THRESHOLD = 0.522 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 diff --git a/egs/babel/s5c/results/RESULTS.107-vietnamese.flp b/egs/babel/s5c/results/RESULTS.107-vietnamese.flp new file mode 100644 index 00000000000..e64bca74572 --- /dev/null +++ b/egs/babel/s5c/results/RESULTS.107-vietnamese.flp @@ -0,0 +1,50 @@ +%WER 57.9 | 21875 111957 | 45.4 42.3 12.3 3.2 57.9 36.7 | -1.203 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.3 | 21875 111957 | 53.2 37.3 9.5 3.5 50.3 35.8 | -0.917 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_9/dev10h.pem.ctm.sys +%WER 47.4 | 21875 111957 | 55.1 32.8 12.1 2.6 47.4 35.7 | -0.642 | exp/tri6_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 48.6 | 21875 111957 | 54.3 35.9 9.8 2.9 48.6 35.4 | -0.769 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 51.3 32.4 16.2 1.8 50.4 35.7 | -0.487 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys + +############################################################################################################################# + +#KWS on the dev kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_9/metrics.txt:MTWV = 0.4488, THRESHOLD = 0.601 +exp/tri6_nnet/decode_dev10h.pem/kws_10/metrics.txt:MTWV = 0.4926, THRESHOLD = 0.576 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_15/metrics.txt:MTWV = 0.4589, THRESHOLD = 0.635 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4477, THRESHOLD = 0.591 + +#KWS on the dev kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_8/metrics.txt:MTWV = 0.0001, THRESHOLD = 0.778 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_11/metrics.txt:MTWV = 0.0024, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_16/metrics.txt:MTWV = 0.0012, THRESHOLD = 0.596 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_15/metrics.txt:MTWV = 0.0017, THRESHOLD = 0.817 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_8/metrics.txt:MTWV = 0.2886, THRESHOLD = 0.513 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.3672, THRESHOLD = 0.693 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.2999, THRESHOLD = 0.792 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.3041, THRESHOLD = 0.693 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_10/metrics.txt:MTWV = 0.0000, THRESHOLD = 0 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.873 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.214 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.831 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_kws_9/metrics.txt:MTWV = 0.3791, THRESHOLD = 0.564 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_12/metrics.txt:MTWV = 0.4444, THRESHOLD = 0.406 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_15/metrics.txt:MTWV = 0.3780, THRESHOLD = 0.609 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.3904, THRESHOLD = 0.51 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_10/metrics.txt:MTWV = 0.0021, THRESHOLD = 0.724 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0040, THRESHOLD = 0.491 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_15/metrics.txt:MTWV = 0.0032, THRESHOLD = 0.867 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0039, THRESHOLD = 0.105 + +############################################################################################################################ + diff --git a/egs/babel/s5c/run-1-main.sh b/egs/babel/s5c/run-1-main.sh index e01910ffac0..99d74069087 100755 --- a/egs/babel/s5c/run-1-main.sh +++ b/egs/babel/s5c/run-1-main.sh @@ -119,7 +119,7 @@ if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then echo --------------------------------------------------------------------- echo "Training SRILM language models on" `date` echo --------------------------------------------------------------------- - local/train_lms_srilm.sh --dev-text data/dev2h/text \ + local/train_lms_srilm.sh --oov-symbol $oovSymbol --dev-text data/dev2h/text \ --train-text data/train/text data data/srilm fi diff --git a/egs/babel/s5c/run-4-anydecode.sh b/egs/babel/s5c/run-4-anydecode.sh index 68b87ea1e27..312d26911df 100755 --- a/egs/babel/s5c/run-4-anydecode.sh +++ b/egs/babel/s5c/run-4-anydecode.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash set -e set -o pipefail @@ -13,7 +13,6 @@ fast_path=true skip_kws=false skip_stt=false skip_scoring=false -max_states=150000 extra_kws=true vocab_kws=false tri5_only=false @@ -32,7 +31,7 @@ fi #set of scripts will exit when sourcing several of them together #Otherwise, the CTRL-C just terminates the deepest sourced script ? # Let shell functions inherit ERR trap. Same as `set -E'. -set -o errtrace +set -o errtrace trap "echo Exited!; exit;" SIGINT SIGTERM # Set proxy search parameters for the extended lexicon case. @@ -82,8 +81,8 @@ if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then fi eval my_stm_file=\$${dataset_type}_stm_file -eval my_ecf_file=\$${dataset_type}_ecf_file -eval my_kwlist_file=\$${dataset_type}_kwlist_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_kwlist_file=\$${dataset_type}_kwlist_file eval my_rttm_file=\$${dataset_type}_rttm_file eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate @@ -200,12 +199,12 @@ if [ ! -f $dataset_dir/.done ] ; then fi elif [ "$dataset_kind" == "unsupervised" ] ; then if [ "$dataset_segments" == "seg" ] ; then - . ./local/datasets/unsupervised_seg.sh + . ./local/datasets/unsupervised_seg.sh elif [ "$dataset_segments" == "uem" ] ; then . ./local/datasets/unsupervised_uem.sh elif [ "$dataset_segments" == "pem" ] ; then ##This combination does not really makes sense, - ##Because the PEM is that we get the segmentation + ##Because the PEM is that we get the segmentation ##and because of the format of the segment files ##the transcript as well echo "ERROR: $dataset_segments combined with $dataset_type" @@ -230,7 +229,7 @@ if [ ! -f $dataset_dir/.done ] ; then make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp touch ${dataset_dir}/.plp.done fi - touch $dataset_dir/.done + touch $dataset_dir/.done fi ##################################################################### # @@ -240,12 +239,15 @@ fi echo --------------------------------------------------------------------- echo "Preparing kws data files in ${dataset_dir} on" `date` echo --------------------------------------------------------------------- +lang=data/lang +set -x if ! $skip_kws ; then . ./local/datasets/basic_kws.sh || exit 1 - if $extra_kws ; then + if $extra_kws ; then + L1_lex=data/local/lexiconp.txt . ./local/datasets/extra_kws.sh || exit 1 fi - if $vocab_kws ; then + if $vocab_kws ; then . ./local/datasets/vocab_kws.sh || exit 1 fi fi @@ -257,7 +259,7 @@ fi #################################################################### ## -## FMLLR decoding +## FMLLR decoding ## #################################################################### decode=exp/tri5/decode_${dataset_id} @@ -297,7 +299,7 @@ if $tri5_only; then fi #################################################################### -## SGMM2 decoding +## SGMM2 decoding ## We Include the SGMM_MMI inside this, as we might only have the DNN systems ## trained and not PLP system. The DNN systems build only on the top of tri5 stage #################################################################### @@ -493,5 +495,5 @@ for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ ${dataset_dir} data/lang $decode fi done -echo "Everything looking good...." +echo "Everything looking good...." exit 0 diff --git a/egs/babel/s5c/run-4b-anydecode-bnf.sh b/egs/babel/s5c/run-4b-anydecode-bnf.sh index 27c68bacfd8..205f37b46d9 100755 --- a/egs/babel/s5c/run-4b-anydecode-bnf.sh +++ b/egs/babel/s5c/run-4b-anydecode-bnf.sh @@ -45,7 +45,7 @@ if [ -z "$unsup_string" ] ; then fi fi -if ! echo {dev10h,dev2h,eval,unsup,shadow}{,.uem,.seg} | grep -w "$type" >/dev/null; then +if ! echo {dev10h,dev2h,eval,unsup,shadow}{,.pem,.uem,.seg} | grep -w "$type" >/dev/null; then # note: echo dev10.uem | grep -w dev10h will produce a match, but this # doesn't matter because dev10h is also a valid value. echo "Invalid variable type=${type}, valid values are " {dev10h,dev2h,eval,unsup}{,.uem,.seg} @@ -247,11 +247,13 @@ if [ -f $exp_dir/tri7_nnet/.done ] && touch $decode/.done fi - local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ - --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ - "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ - ${datadir} data/lang $decode fi -echo "$0: Everything looking good...." +decode=$exp_dir/tri7_nnet/decode_${dirid} +local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/lang $decode + +echo "$0: Everything looking good...." exit 0 diff --git a/egs/babel/s5d/EXAMPLE.vietnamese b/egs/babel/s5d/EXAMPLE.vietnamese new file mode 100644 index 00000000000..f5dde82c364 --- /dev/null +++ b/egs/babel/s5d/EXAMPLE.vietnamese @@ -0,0 +1,116 @@ +#!/bin/bash + +#This is an example sequence of commands for running the default Kaldi Babel OP1 system +#It is not assumed that you will run it as a script, even though you can try :) + +./run-1-main.sh +./run-2a-nnet-ensemble-gpu.sh +./run-2b-bnf.sh --semisupervised false --ali-dir exp/tri5_ali/ +./run-3b-bnf-sgmm.sh --semisupervised false +./run-3b-bnf-nnet.sh --semisupervised false + +##Training of the automatic segmenter +./run-2-segmentation.sh + +##Decoding the automatic segmentation of dev2h subset. dev2h.pem would mean decoding +##the dev2h subset using the officialy provided segmentation. +##Also possible to run dev10h.pem, dev10h.uem, dev10h.seg and so on... +./run-4-anydecode.sh --dir dev2h.seg +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised false --extra-kws true + +##Decoding of the unsupervivsed data +./run-4-anydecode.sh --dir unsup.seg --skip-kws true --skip-stt true +./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised false + +##Get the one-best path and the weights for frame-weighting of posteriors +./local/best_path_weights.sh --cmd "$train_cmd" data/unsup.seg/ data/lang \ + exp/tri6b_nnet/decode_unsup.seg/ \ + exp/sgmm5_mmi_b0.1/decode_fmllr_unsup.seg_it1/ \ + exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \ + exp_bnf/tri7_nnet/decode_unsup.seg \ + exp_bnf_semisup/best_path_weights/unsup.seg + +##Semisupervised bottleneck system training (initial setup) +./run-2b-bnf.sh --semisupervised true --ali-model exp/tri6b_nnet/ \ + --weights-dir exp/best_path_weights/unsup.seg/decode_unsup.seg/ + +##Semisup training, SGMM+bMMI on the top of the BN features +./run-3b-bnf-sgmm.sh --semisupervised true +##Semisup training, pNorm DNN on the top of the BN features +./run-3b-bnf-nnet.sh --semisupervised true + +##And decoding again. We decode the unsup.seg again to do the second run of the +##semisupervised training +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised true --extra-kws true +./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised true + +##One-best output and frame weights for the second run of the semisup training +./local/best_path_weights.sh --cmd "$train_cmd" data/unsup.seg/ data/lang \ + exp_bnf_semisup/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \ + exp_bnf_semisup/tri7_nnet/decode_unsup.seg \ + exp/tri6b_nnet/decode_unsup.seg/ \ + exp/sgmm5_mmi_b0.1/decode_fmllr_unsup.seg_it1/ \ + exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \ + exp_bnf/tri7_nnet/decode_unsup.seg \ + exp_bnf_semisup2/best_path_weights/unsup.seg + +##Second run of the semisup training +./run-2b-bnf.sh --unsup-string "_semisup2" --semisupervised true --ali-model exp/tri6b_nnet/ \ + --weights-dir exp_bnf_semisup2/best_path_weights/unsup.seg/decode_fmllr_unsup.seg_it1/ + +./run-3b-bnf-sgmm.sh --semisupervised true --unsup_string "_semisup2" +./run-3b-bnf-nnet.sh --semisupervised true --unsup_string "_semisup2" + +##Decode again to see if we got an improvement +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised true --unsup_string "_semisup2" --extra-kws true + + +##Decoding of the dev10h (all systems, all stages) +./run-4-anydecode.sh --dir dev10h.seg --extra-kws true +./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised false --extra-kws true +./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised true --extra-kws true +./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised true --extra-kws true --unsup_string "_semisup2" + +##Decoding of the shadow.seg (combination of dev10h.seg and eval.seg) +##We did this for eval run as a kind of "sanity check" -- we check the shadow.seg/dev10h.seg subset +##performance vs the standalone dev10h.seg performance to catch (hopefully) possible problems +./run-4-anydecode.sh --dir shadow.seg --extra-kws true +./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised false --extra-kws true +./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised true --extra-kws true +./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised true --extra-kws true --unsup_string "_semisup2" + + + +#This prepares for separation/split of the shadow dataset into the devset, which we can evaluate +# and the eval set, which we will submit +#Note: we do this only once, for ./data, as we do not really need anything else +#just the file lists... +#NB: there was a oversight in one of the scripts that was causing thectm files contain +#BN: incorrect channel info (A instead of 1) +#NB: To fix that, you can run something like this: +#NB: find exp/ -name "shadow.seg.ctm" | xargs -t -n 1 sed -i'.bakx' 's/ A / 1 /g' +./local/nist_eval/create_compound_set.sh --evlset eval.seg --devset dev10h.seg --tgtdir data/shadow.seg + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp/tri6b_nnet/decode_shadow.seg +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp/tri6b_nnet/decode_shadow.seg + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp/sgmm5_mmi_b0.1/decode_*shadow.seg* +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp/sgmm5_mmi_b0.1/decode_*shadow.seg* + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.seg* +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.seg* + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp_bnf_semisup/sgmm7_mmi_b0.1/decode_*shadow.seg* +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp_bnf_semisup/sgmm7_mmi_b0.1/decode_*shadow.seg* + +#The following commands will actually do two things +#a) looking at the performance of the dataset --master they will figure out the correct LMW +#b) symlink the appropriate evaluation result file under the correct EXPID into the ./release directory +#Warning: it's a lot of files so it's easy to get confused! +./local/nist_eval/make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1 --data data/shadow.seg --master dev10h.seg lang.conf ./release +./local/nist_eval/make_release.sh --dryrun false --dir exp/tri6b_nnet --data data/shadow.seg --master dev10h.seg lang.conf ./release +./local/nist_eval/make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1 --data data/shadow.seg --master dev10h.seg lang.conf ./release +./local/nist_eval/make_release.sh --dryrun false --dir exp_bnf_semisup/sgmm7_mmi_b0.1 --extrasys SEMISUPX --data data/shadow.seg --master dev10h.seg lang.conf ./release + +#Combine results (what we call 4way-combo) + diff --git a/egs/babel/s5d/README.txt b/egs/babel/s5d/README.txt new file mode 100644 index 00000000000..6bc3ddacba7 --- /dev/null +++ b/egs/babel/s5d/README.txt @@ -0,0 +1,82 @@ +How to setup the BABEL database training environment +==================================================== +a) Preparation: you need to make sure the BABEL data and the F4DE scoring software + is set up as it is in JHU, or change this setup accordingly. This will probably + be hard and will involve some trial and error. Some relevant pathnames can be + found in conf/lang/* and ./path.sh + + Link one of the config files in conf/languages to ./lang.conf. E.g.: + ln -s conf/languages/105-turkish-limitedLP.official.conf lang.conf + + +b) If you plan to work on one or more languages, the following approach is advised. + aa) create empty directory somewhere according to your choice + ( + mkdir 206-zulu-llp; cd 206-zulu-llp + ) + + ab) copy cmd.sh and path.sh (you will probably need to do some changes in these) + especially pay attention to KALDI_ROOT in path.sh and possibly switch to using + run.pl in cmd.sh + ( + cp /path/to/kaldi/egs/babel/s5b/{cmd.sh,path.sh} . + ) + + ac) symlink all the directories here to that directory + ( + ln -s /path/to/kaldi/egs/babel/s5b/{conf,steps,utils,local} . + ) + ad) link the necessary scripts ( see below ) + { + ln -s /path/to/kaldi/egs/babel/s5b/run-1-main.sh . + } + ae) link the appropriate language-specific config file to lang.conf in + each directory. + ( + 206-zulu-llp$ ln -s conf/lang/206-zulu-limitedLP.official.conf lang.conf + ) + + +Running the training scripts +=================================================== + +You run the scripts in order, i.e. + run-1-main.sh + run-2a-nnet.sh and run-2-bnf.sh may be run in parallel, but run-2-bnf.sh should be + run on a machine that has a GPU. + run-3-bnf-system.sh trains an SGMM system on top of bottleneck features from run-2-bnf.sh + run-4-test.sh is decoding with provided segmentation (we get this from CMU) + run-5-anydecode.sh seems to be decoding with the segmentation provided + + + +Official NIST submission preparation +================================================== +The make_release.sh script might come handy. +The scripts evaluates the performance of the sgmm2_mmi_b.0.1 system on +the eval.uem dataset and chooses the same set of parameters to +determine the path inside the test.uem dataset. + +./make_release.sh --relname defaultJHU --lp FullLP --lr BaseLR --ar NTAR \ + conf/languages/106-tagalog-fullLP.official.conf /export/babel/data/releases + + + + + +./run-1-main.sh +./run-2a-nnet-ensemble-gpu.sh +./run-2b-bnf.sh --semisupervised false --ali-dir exp/tri5_ali/ +./run-3b-bnf-sgmm.sh --semisupervised false +./run-3b-bnf-nnet.sh --semisupervised false + +./run-2-segmentation.sh + +./run-4-anydecode.sh --dir dev2h.seg +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised false --extra-kws true + + + +./run-4-anydecode.sh --dir unsup.seg --skip-kws true --skip-stt true +./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised false + diff --git a/egs/babel/s5d/RESULTS b/egs/babel/s5d/RESULTS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/egs/babel/s5d/RESULTS.txt b/egs/babel/s5d/RESULTS.txt new file mode 100644 index 00000000000..c87bf7f2b8b --- /dev/null +++ b/egs/babel/s5d/RESULTS.txt @@ -0,0 +1,8 @@ +The results are by default to be found in /decode_* where the individual /decode_* directory correspond to the language model weight. + +An easthetically pleasing table with the results can be obtained for example like this (YMMV, as well as your aesthetic feeling): +find exp/sgmm5_mmi_b0.1 -name "*.ctm.sys" -not -name "*char.ctm.sys" -ipath "*fmllr_eval.pem*" | xargs grep 'Sum/Avg' | sed 's/:* *| */ /g' | sed 's/ */ /g' | sort -n -k 9 | column -t + +similarly, for the kws outputs, the same table can be obtained as +find exp/sgmm5_mmi_b0.1 -name "sum.txt" -ipath "*fmllr_eval.pem*" | xargs grep "| Occurrence" | cut -f 1,13 -d '|'| sed 's/:|//g' | column -t | sort -k 2 -n -r + diff --git a/egs/babel/s5d/RUN_UNICODE_SYSTEM b/egs/babel/s5d/RUN_UNICODE_SYSTEM new file mode 100644 index 00000000000..79168d4c3bc --- /dev/null +++ b/egs/babel/s5d/RUN_UNICODE_SYSTEM @@ -0,0 +1,9 @@ +./run-1-main-unicode.sh --unicode-lexicon true --morfessor true --tri5-only true + +# For tri5 +./run-4-anydecode.sh --fast-path false --tri5-only true --skip-kws true (for tri5 only) + +# For lstm +./run-4-anydecode.sh --fast-path false --tri5-only true --skip-kws true --data-only true +./local/nnet3/run_lstm.sh +./run-4-anydecode.sh --nnet3-model nnet3/lstm_sp --is-rnn true --dir dev10h.pem --skip-kws true diff --git a/egs/babel/s5d/UNICODE_README b/egs/babel/s5d/UNICODE_README new file mode 100644 index 00000000000..b8b2358436f --- /dev/null +++ b/egs/babel/s5d/UNICODE_README @@ -0,0 +1,119 @@ +Graphemic Lexicon from Unicode +================================================================================ + +General Description +---------------------------------------- +Given some form of word list in an unknown language, we must find pronunciations +for each word. When the language is written alphabetically, the letters +themselves can be used as word pronunciations. In English for instance there +would be 26 phones, and possibly a few extra for the rarely occuring letters, + + "ö","é","è","â", ... + +which occur primarily in foreign loan words. + +Some languages use syllabic systems or partially alphabetic scripts, for which +nothing close to a 1-1 mapping from graphemes to phonemes exists. Examples of +such are Abougidas and Abjads. + +The premise of this system is that for most languages, there exists a unicode +description of the graphemes from which the phonetics may be recovered. + +While non-alphabetic scripts present an obvious challenge, we find that even +for languages such as English and French, the issue of whether or not to treat +each accented character as a separate phone presents a problem. After all, +pâté, pâte, and pate are all English words with different pronunciations. +Resume, and résumé, are also examples. And this for a language that is generally +considered unaccented. In French, which is known to have many diacritics +affecting pronunciation, we nonetheless find words such as forêt, and bosquet, +with essentially the same meaning whose "e" sounds have very much the same +pronunciation. In some scripts, such diacritics are vowel markers, indicators +of tone, or stress, and probably many other linguistic phenomena we have not +yet encounted. + +Fortunately, the unicode representation of such graphemes has an alternate +normalization, "NFD", which decomposes a grapheme into its constituent parts. +In this implementation we treat such marks as modifying the preceding grapheme. +When the grapheme occurs frequently enough, the accented grapheme is +automatically considered a separate phoneme. For infrequent accented graphemes +we treat the accent as a tag and use the tag as an extra question in the tree +building step. + +The issue of syllable boundaries in words is mostly important for keyword-seach. +Syllables can be created by training a morphological analyser on the +conversational transcripts, and then segmenting each word into its learned +morphemes. + +Usage +---------------------------------------- +All the scripts for creating the graphemic lexicon are located in local/lexion, +except for prepare_unicode_lexicon.py. Run ... + +./run-1-main-unicode.sh --unicode-lexicon true --morfessor true + +for a full system run using a unicode-lexicon and morfessor. + +The general structure is. + +1. Generate list of unqiue words in the training data. Just use the word + entries of the filtered_lexicon if available. Do not include words present in + in conversation transcriptions such as , etc.. + +local/lexicon/phone2morph_lexicon.py + +2. Use morfessor to create somewhat logical syllabic units. Train the system + on the conversational transcriptions for instance, though any body of text + in the language should do. The conversational transcriptions were used in + this script however. + +3. Segment each word in the word list into its morphemes. Represent this as + a lexicon of sorts. + +local/lexicon/make_unicode_lexicon.py + +4. Use the morphemic lexicon created in step 3. as input. + +5. Get the unicode representation for each grapheme in each word + +local/lexicon/methods/blind_tags_counts.py + +6. Convert the unicode representation of each word into actual units with + which we derive an entry in the lexicon. This function is actually imported + into make_unicode_lexicon.py It's written this way to allow for more + flexibility in processing the unicode descriptions of graphemes. + +local/prepare_unicode_lexicon.py +7. This creates the rest of the data/local directory. It also adds the extra + questions derived from the unicode-derived tags to extra_questions.txt. + + +Script Descriptions +------------------------------------------------------------------------------ +In local/lexicon, +make_unicode_lexicon.py : + + This script takes as arguments: a lexicon, word-list, or file with distinct + space separated words; a path to an output lexicon that will be generated; a + directory containing all possible methods of processing the unicode + character descriptions; and the name of the method in the directory to use. + Options exist for specifying the type of input file, whether to treat the + input lexicon entries as morphemes, etc.. + +In local/lexicon/methods +blind_tags_counts.py + + Each method in the methods directory is supposed to follow a strict format: + 1. Must have a fmt global specifying the output lexicon format + (normally kalid). + 2. Must have an encode function which maps a certain structure in which + unicode character descriptions were stored to lexicon entries in the + new lexicon we are creating. + 3. Certain input arguments, especially a table argument for the table + containing the mapping between unicode graphemes, and lexical entries. + + +In local/lexicon/methods +phone2morph_lexicon.py + + This script takes an input word list, and outputs a morphemic dictionary. + diff --git a/egs/babel/s5d/babel.html b/egs/babel/s5d/babel.html new file mode 100644 index 00000000000..9848e6566f8 --- /dev/null +++ b/egs/babel/s5d/babel.html @@ -0,0 +1,788 @@ + + + + + +

Description of Kaldi subsystems

+ + This is a description of the complete Kaldi sub-system, containing a description of + all components. It will be referred to from the system descriptions of the various + Kaldi sub-systems, and from the top-level system description of the RADICAL team. + +

1. Abstract

+ +

+ The Kaldi keyword search system is based mostly on a conventional LVCSR pipeline. + We have three main sub-systems, which separately decode the data; + we then use conventional system combination techniques. The four systems are: +

    +
  • SGMM+BMMI. This is a Subspace Gaussian Mixture Model (SGMM) of the type described in [2], + discriminatively trained with Boosted MMI [3]. +
  • DNN. This is a Deep Neural Network with p-norm activations as described in [8]. + For LimitedLP systems we improve performance with an ensemble method which we will + describe below. +
  • Bottleneck SGMM+BMMI system. In this system we train a DNN with a bottleneck layer + of dimension 42, and use it to extract features which we train an SGMM+BMMI system on. +
+ For LimitedLP we add a fourth system, which is a version of the bottleneck system where + DNN to extract the bottleneck features is trained on automatically transcribed data as + well as the LimitedLP data. For FullLP we add a different fourth system, which is + a "sequence-trained" version of the DNN, trained with the State-level Minimum Bayes + Risk criterion (a variant of MPE). + + We also include a fifth, less conventional sub-system, based on the "Point Process Model" (PPM) + that uses phone-level posteriors from a DNNs trained for one of the systems above. + This will be described in Section 4.16. Its outputs are combined with our systems above + for keyword spotting but not for transcription. +

+ Our keyword search pipeline is based on lattice-indexing as described in [5]; the lattices + are generated using the "exact" lattice generation method described in [6]. + To handle out of vocabulary (OOV) keywords, we use the method [4] which constructs for + an OOV keyword sequence, proxy keyword sequences consisting of word sequences which are phonetically + similar. This year we added a "lexicon expansion" method, in which we generate plausible + new words using a syllable-level language model and add them to the lexicon and language model + when decoding (see Section 4.4). (This even slightly improves the WER). We actually add + the original and expanded-lexicon versions of each system to the final system combination, + but including non-expanded decodings in the system combination is not really necessary. +

+ The code and scripts used for the main Kaldi system are available as part of Kaldi; + see svn://svn.code.sf.net/p/kaldi/code/trunk/. The scripts we used this year are + located in the directory egs/babel/s5b. + + +

2. Notable features

+ + A new feature of our system that is shared by all the sub-systems is our + pitch features . We describe these in more detail in [7]. This is a + pitch extraction algorithm based on the old "getf0" method, but which naturally + ensures continuity of the pitch contours even in unvoiced regions. We also + derive a continuous-valued voicing feature from the algoirhtm. Finally we get + a three-dimensional feature consisting of pitch, delta-pitch, and a feature + derived from probability of voicing (POV). These are appended to the PLP + features, giving us consistent gains across languages compared with our + previous pitch features (other teams have also reported gains using our + features). +

+ Something else that is new is the p-norm neural networks [8]. This + is a new nonlinearity type that is related to maxout (in that it is a + dimension-reducing nonlinearity). This gave us around 1% absolute improvement + compared with our old, tanh-based networks. On top of this, for LimitedLP + we introduce an ensemble training method . Imagine training four + networks from different random seeds. We can average the scores from all + of them to get an improvement (around 2% absolute). But we don't like to have + to use multiple networks in test time. Our ensemble method introduces a term in + the objective function to train the networks' outputs towards each other, to make + them more similar, so that in test time we can pick just one of the networks to test with. + This gives us three quarters of the improvement from the simple method of averaging the scores, + but does not slow us down in test time. We only do this for limitedLP because it + slows down training too much to be practical for FullLP. +

+ Our bottleneck feature system is heavily modified since last year, and + has improved. + Firstly, we implemented it all in Kaldi, as opposed to last year's system which was a + hybrid between Kaldi and Theano. This makes the training faster, since Kaldi + supports parallelized neural network training, using multiple GPUs. The basic + recipe is basically the same as last year-- a DNN with a 42-dimensional bottleneck, appending + these features with the baseline fMLLR features, splicing across 3 frames and doing + LDA dimension reduction to 60 dimensions, then training an SGMM system on these features. + However, results seemed a little better with the Kaldi implementation, perhaps 0.5\% + absolute. It's hard to say why, as there are too many differences. The thing that is + new is that we implemented semi-supervised training in the LimitedLP case. We + use the 1-best output from decoding as supervision for the untranscribed data, but only + train on a frame if the state-level posterior is above a threshold (we use a low threshold + of 0.35 for this case). +

+ Our point process model system ( Section 4.16), while it get only around half + the ATWV of our conventional system by itself, is giving us large improvements in + combination with our conventional system, of around 3 to 4% ATWV. This is an + unconventional "exemplar-based" approach. +

+ Our expanded lexicon (Section 4.4) also new. This method takes + as input the provided lexicon, and uses it to hypothesize likely new words + and their pronuciations, along with their probabilities. We generate 2 million + extra words, with associated probabilities, and we allocate the "unknown-word" + probability mass of our language model to these words. Our method is + "backwards", in that we first generate the phonetic sequences, and then + work out the spellings. The improvement this gives is extremely variable. + For Bengali and Assamese, it makes essentialy no difference. But for Zulu + LimitedLP using the development keywords on the development data, it improved + the Kaldi-only ATWV from 0.20 to 0.28. + +

3. Extra resources

+ + For the submitted Kaldi systems we did not use any linguistic or other + resources outside of the language development pack. For our LimitedLP + submissions, we did use the FullLP and "untranscribed" data for unsupervised + training, without using the transcriptions. (This is allowed even in the + BaseLR condition). + +

4. System description

+ +

4.1 Low level features

+ + Our basic features are standard 13-dimensional PLP features. To these we + append 3-dimensional features derived from our "Kaldi" pitch tracker, giving a + 16-dimensional "base feature". Our pitch tracker and the configuration we used + are described in [7]. These features were extremly helpful on tonal languages: + on Cantonese and Vietnamese last year, our tests showed as much as 6% absolute + WER improvement compared with no pitch features. In general our new "Kaldi" + pitch features give us about twice as much improvement as our old features from + last year that were based on SAcC. + +

4.2 Segmentation

+ + Our segmentation is performed via decoding the whole-conversation data using a + GMM-based model. The model is trained in the normal way for an LVCSR system, + but the decoding graph is derived from a phone bigram language model (unsmoothed, + to avoid blowup due to context dependency). We do a single pass of decoding, + without adaptation; the features are processed as spliced-frames+LDA+STC. The + model used for segmentation is trained on transcripts that included certain + data we would normally exclude: segments containing only non-speech events such + as noise are included in the transcripts. +

+ The output of the decoding above is used as the input to the following algorithm. + First we map the frames of the decoder best path to one of three classes: speech, + noise or silence. The segmentation algorithm is as follows: + +

    +
  • Get initial segments: Contiguous regions consisting of speech and/or noise are marked as the initial segments.
  • +
  • Pad the initial segments: Non-speech frames on either side of the initial segments are included in the segments one at a time until there +are no more non-speech frames adjacent to any segments (unlikely) or until the non-speech frames make up about 5% of the total frames in the conversation.
  • +
  • Merge segments: Two segments are merged if the length of non-speech frames between two segments is less than about 1 second and the merged segments are not longer than 10 seconds.
  • +
  • Split long segments: Initial segments that are longer than 10s are split into equal pieces, each shorter than 10s.
  • +
  • Remove segments with only non-speech frames, i.e. containing only silence and noise.
  • +
+ + +

4.3 Lexicon (non-expanded)

+ + Here we describe our basic lexicon, before expansion. The BABEL lexicon + comes with syllable boundaries marked using tabs, and syllable-level tags + marking tone. We attach the tone tags to the phones, so that a syllable + k a t _1 would become the phone sequence k_1 a_1 t_1 + Formally, each tone version of a phone is a separate phone, but see + our explanation of contenxt dependency below . + We noticed that in some languages, the original lexicon seemed to have been expanded + with some kind of script where some original phone was mapped to two alternative + phones. That was the case for Vietnamese last year and Zulu this year, and it + was helpful to reverse this mapping. Our mapping for Zulu is as follows: + + + + + + + +
k_> g_<
3 e
R l
o O
b_< b
t_> th
+ After generating a lexicon as described above, we perform the standard procedure + in Kaldi training scripts, to add word-position dependency. Each phone is mapped + to five versions of the phone depending on whether it's at the beginning, middle + or end of a word, or is a singleton phone, or is a nonword phone (e.g. optional + silence in the lexicon). By this point the phone set is quite large, but again, + see our explanation of context dependency below . +

+ We have four phones in our inventory apart from those that appear in words; + they are all modeled in a context independent way and using a different topology + (5 states, where the middle 3 states all have transitions to each other). These are + for silence, noise, vocalized-noise and unknown-words. The difference between + vocalized noise and unknown-words is that vocalized noise models things like coughs + and laughs, whereas the unknown-word phone models words whose pronunciation is not + known (mainly so we can align them during training). + +

4.4 Lexicon (expanded)

+ + As mentioned above, we perform lexicon expansion to improve our ability to decode + OOV words. The lexicon expansion procedure produces pronunciations and probabilities + for the generated words, so that we know how to allocate the "unknown-word" probability + mass in the language model. The unknown words are introduced as unigrams into our + ARPA language model, with probabilities equal to the probabilities we estimated, + times the unknown-word fraction (equal to the token OOV rate). +

+ The lexicon expansion procedure works as follows (but note that lexicon expansion is + not the only thing we do to handle OOVs; see also Section 4.15). We first take all the entries + in our original lexicon and view them as sentences, where each syllable corresponds to + one symbol (we ignore the spelling). We train an ARPA language model on this with + SRILM; a 3-gram "modified Kneser-Ney with interpolation" seemed to work the best. + We then generate a large number of "sentences" from this language model: 20 million or so, + For each unique sentence in the generated sentences, we compute its language model + probability; we then exclude the sentences that correspond to words in the original + lexicon, take the 2 million best ones, and these will become the pronunciations of + our lexicon entries. +

+ A lexicon entry needs a spelling as well as a pronunciation, and to do this we + use the g2p tool from Sequitur in reverse to produce the most likely + spellings for each pronunciation. We reverse it by taking each lexicon entry, + e.g.
+ hi h iy +and reversing it to produce something like
+ hiy h i
+ Actually we don't do it exactly this way because we want iy to appear as a single + symbol on the left, rather than as a sequence of two symbols. So we map the phones + to ASCII symbols first. When doing so we treat tags (e.g. tones) separately, so each tag + has its own ASCII symbol, and a phone with a tag would be rendered as two ASCII symbols. +

+ We use g2p to generate a list of the top few likely spellings for each of the generated + pronunciations. We take the pronunciations we generated and the probabilities of their spellings, + and convert them into a list of words with probabilities on the words, and a list of + pronunciations for each word with asociated pronunciation probabilities. This is the output + of the lexicon expansion and it is used to create the lexicon and language model that we + decode with. +

+ We ran two versions of each system, one with and one without the lexicon + expansion, because we wanted to see how much effect it was having. Because we + had them both available, we decided to combine both versions for the final + system combination, but this combination made very little difference to the + results and we could equally well have submitted just the expanded-lexicon + systems. + + +

4.5 Phonetic context dependency

+ + Our phonetic context dependency is a fairly standard setup based on triphone context + and a phonetic decision tree with questions about the phonetic context. However, + we should mention how we handle tone and word-position-dependent phones. The number + of actual phone symbols is quite large; it consists of the number of "base phones" + times five (from word-position dependency), times the number of tones. Firstly, + the decision-tree roots are not separate for each phone symbol, but we have one per + "base phone", with all states sharing a root. The questions can be about the state + of the HMM, or about the left phone, the central phone, or the right phone. + Each question is simply a set of phone symbols. However, in constructing the questions + we make use of the structure of the phone symbols. Each question is either about + the tone (or some other tag), about the word-position, or about the "base-phone", + and the questions about the base phone consist of sets of base-phones that are derived + from a binary tree clustering of the acoustic statistics from the central HMM-states + of all the phones. + +

4.6 Language models

+ + Our language models are created using SRILM using the training transcripts. + We automatically select the best one from among a range of smoothing rules and + count cutoffs, using perplexity on held-out data as the criterion; a typical + chosen language model is a good-Turing smoothed 3-gram. + +

4.7 Feature processing and adaptation

+ + Our base features, as described above, are 16-dimensional (MFCC + pitch) features. + We process these by splicing with 3 frames of left and right context, doing + LDA (with the context-dependent states as the classes), and then estimating + an STC/MLLT transform [13] along with our models. We then use speaker adaptation + based on fMLLR, done also during training (i.e. our models are speaker adaptive). + In test time the transforms are obtained by decoding with a GMM-based model. + Our SGMM models use speaker vectors as an additional form of adaptation on top of + this. + + +

4.8 Subspace Gaussian Mixture Models (SGMMs)

+ + Two of the branches of our systems are based on SGMMs [14], as mentioned in the + introduction. Our SGMMs are the "SGMM2" recipe of Kaldi; this uses + the "symmetric" extension of SGMMs as described in [2], and also a substate-tying + scheme that uses a two-level version of the phonetic decision tree, and is similar + in spirit to the Gaussian tying used in BBN's Byblos system. +

+ The main tunable parameters of the SGMM training are given below: + + + + +
Num-gauss-UBM Num-leaves Num-substates
LimitedLP 750 5000 18000
FullLP 800 10000 80000
+ The number of "leaves per group" in the substate-tying scheme is set at its normal value, which + is 5. + + +

4.9 Deep Neural Networks

+ + The deep neural network training setup we use in Kaldi is one of two parallel setups that + we maintain "Karel's setup" and "Dan's setup". This system uses "Dan's setup". The + training procedure differs in a number of ways from previously published methods, and + for reasons of time and space we can't document it fully here. + See here for more information. + The most salient point is that the setup allows us to train a neural network in parallel + on multiple GPUs, which substantially decreases the training time. For example, for Zulu, the + FullLP system took 11 hours to train for 25 epochs on 8 GPUs. + The LimitedLP system took 7 hours to train for 25 epochs on 4 GPUs, but note that we + were training 4 networks at the same time, which slowed down the training by roughly a factor + of 4. + +
4.9.1 p-norm nonlinearities
+ + Our major improvement to our DNN system was the introduction of "p-norm" nonlinearities. + This is described in [8]. The input to our DNNs are 40-dimensional fMLLR features, obtained + via first-pass decoding with our GMM system. These are spliced across a 9-frame context window + (4 frames on each side), and processed with an LDA-like transform to decorrelate them. + The FullLP system has four hidden layers with 4000 as the input dimension to the nonlinearity + and 400 as the output-dimension (so the group size is 10). There are 12000 output neurons + in the softmax layer; this is more than the number of context-dependent states (which is + about 5000), because of the "mixing-up" as described here . + For the LimitedLP system the input/output dimensions are 3000/300 and the softmax layer dimension + is 5000 (versus about 2000 context-dependent states). + +
4.9.2 Ensemble training
+ + For the LimitedLP system we improve our system via a novel "ensemble training" method. + This involves training four versions of the neural network in parallel. We initialize + four networks using four different random seeds. During training, we train them + towards each other by adding a term in the objective function which penalizes the + K-L divergence between their outputs. Practically speaking, this means interpolating + the "hard label" for each frame with a "soft label" derived from interpolating the + posteriors derived from the averaged output of all four neural nets. The amount of + the "soft label" we add to the "hard" label is determined by a constant that we vary + from about 3 to 5 during training, so the extent of "training towards each other" gets + stronger as we train. +

+ During decoding, we pick just one of the systems arbitrarily. Since it has been + trained towards the other networks, it acts a little bit like an ensemble of + networks, even though it is just one network. This gives us about 1.5% WER + improvement. + +

4.9.3 Sequence training
+ + For the FullLP system only, we do discriminative training ("sequence training") + on our DNN. Our discriminative training is based on a state-level variannt of + the Minimum Phone Error (MPE) criterion, called sMBR [15]. We are mostly following + the recipe described in [16], although modified for our parallel-training method. + + The training is based on Stochastic Gradient Descent (SGD), although modified by our + "preconditioning method" which will eventually be described + here (till then, see the code). + We use a learning rate of 9E-5, but one tenth that value for the output layer. + Training is for four epochs. + Instead of frame-level randomization we use segment-level randomization, where a + segment is the smallest pieces we could chop our lattices into while still being + able to accurately evaluate the objective function. The training is in parallel + using 4 GPUs and periodically averaging the parameters, just like for our basic training. + (Note that the "effective learning rate" is as a result four times lower than what + we mentioned above). + + +

4.10 Bottleneck features

+ + Our bottleneck system is based on the same code and methods as our DNN system, + except that we use tanh rather than p-norm nonlinearities, and the DNN has a bottleneck + layer. For the LimitedLP system we use four hidden layers with 1024 neurons, then + a bottleneck layer with 42 neurons, then one hidden layer with 1024 neurons, then the + output layer. For the FullLP system, replace (4, 1024) with (5, 2048). As before, + the input to the network is 40-dimensional LDA+STC+fMLLR features, spliced across 9 frames. +

+ For feature extraction we remove the part after the 42-dimensional bottleneck, including + the tanh nonlinearity, and append it with the baseline 40-dimensional features, giving + an 82-dimensional feature vector. This is appended across ±1 frame and the dimension + is reduced with LDA to 60 dimensions. (Note: we don't commence training on these features + from scratch but start with alignments from our SAT-trained GMM-based system). +

+ From this point we train an SGMM+BMMI system. Because the feature dimension is higher the + number of parameters would increase if we left the rest of the configuration of the system + the same, so we use the following reduced configuration values: + + + + +
Num-gauss-UBM Num-leaves Num-substates
LimitedLP 500 5000 10000
FullLP 5500 10000 50000
+ Because the features are much "stronger" than normal features (i.e. more informative about the + class), and more correlated, we need to decode with a different acoustic scale than normal. + We normally decode SGMM systems with an acoustic scale of 0.1. For this system we decode with + an acoustic scale of 1/15 = 0.06666. Note: the more finely tuned acoustic scale is determined + by best WER or ATWV on the development data, after rescoring the lattices with different weights; + this value is just to get us in the right ballpark during lattice generation. + + +

4.11 Build order

+ + In order to clarify the relationship between the various systems, we document here the + order of system building. The initial stages, when the dependency graph is just a linear + sequence, are as follows: + + + + + + + + + +
Stage Num-leaves/gauss Num-leaves/gauss Feature type
(LimitedLP) (FullLP)
mono n/a n/a delta+delta-delta
tri1 1000/10k 1000/10k delta+delta-delta
tri2 2500/36k 1000/20k delta+delta-delta
tri3 2500/36k 6000/75k delta+delta-delta
tri4 2500/36k 6000/75k LDA+STC
tri5 2500/36k 6000/75k LDA+STC+fMLLR
+After the tri5 stage, the build graph "branches out", and the training of the SGMM system, the +DNN system and the DNN that includes the bottleneck features, all depend on the alignments and +transforms obtained from the tri5 system. We have documented the number of parameters of those +other systems separately. + +

4.12 Decoding order

+ + After training the tri5 system, we obtain via single-pass retraining a version of the system that + is trained on speaker-independent features. This model is used in the first, speaker-independent pass + of recognition-- other than about segmentation, which we have documented separately. All decoding + passes are with WFST decoders that output lattices. Starting from a raw, + state-level lattice we use the determinization algorithm of [6] to produce + a word-level lattice, although this year we extended the determinization algorithm slightly to + enable the generation of deeper lattices, by first doing a phone-level determinization before + the word-level determinization. This keeps the determinization from "blowing up" when the + beam is too large. +

+ The lattices from the speaker-independent decoding are used with the speaker-adapted "tri5" model to compute initial + fMLLR transforms, which are used with the speaker-adapted model to rescore the lattices to get + better posteriors and estimate the fMLLR transforms a second time. + Then another lattice generation pass is done with the speaker-adapted model and adapted features, + and the fMLLR transforms are estimated a third time and the lattices rescored with those features. +

+ Note: we don't include silence frames in the fMLLR computation. Since the + lattice generates soft counts, this is accomplished via per-frame weights, + not a hard cutoff. +

+ The decoding of the later models-- the SGMMs, DNNs and bottleneck feature based SGMMs-- + all depend on the "tri5" decoding because they use the fMLLR transforms generated there. +

+ Once we have these transforms, the DNN decoding is single-pass, but for the discriminatively + trained DNNs we first decode with the basic DNN and then rescore the lattices with + four different versions of the final DNN system, one for each epoch. This is so that we + can choose the best epoch to use. +

+ The SGMM decoding naturally has two passes: one using a speaker-independent version of + the SGMM system (speaker-independent because it doesn't have speaker vectors, although + we do have fMLLR features), and then another pass of decoding after estimating the + speaker vectors. However, we only generate the lattice once. In order to ensure + an accurate final lattice, we dump the state-level lattice from the first pass of + decoding and don't do the final lattice-determinization until after estimating the + speaker vectors. See [6] if the term "state-level lattice" is confusing. + +

4.13 Keyword index generation

+ + The keyword index generation uses Finite State Transducer concepts, and is based on [5]. + It relies on the fact that our lattices are determinized at the word level, which + is an essential part of our lattice generation procedure. This method constructs + an index such that for any given keyword sequence (of any length), one can do a simple + lookup in a finite state transducer and find a list of all the occurrences of that keyword + sequence in the set of lattices that were indexed. + The number of potential word sequences grows exponentially with the sequence + length, and the index does not blow up even though it allows us to look up arbitrarily long + sequences. This is accomplished through the magic of determinization, together with + some clever choices of semirings. +

+ We build a separate index for each language model scale in a predetermined range (e.g. 10, 12, 13, 14, 15), + so that we can separately run the keyword search pipeline for each scale, and pick the + scale with the best ATWV on the dev data. (Note: since there is only one dev set, all our + numbers reported on the dev set have these scales optimized on that set, and the same + applies for WER numbers). + +

4.14 Keyword search

+ + Once the index is built, keyword search is very simple and fast: we look up + the sequence in the index generated above, and it returns a list of the hit locations + (utterance-ids and start and end times) and the associated lattice posteriors. + In this document, we assume that by "keyword" we mean some given sequence of words, possibly + of length one. +

+ The most non-obvious aspect of this is the per-keyword normalization of the scores. + The Term Weighted Value (TWV) metric, after ignoring constant terms and doing + a few manipulations, may be expressed as follows: +

+ TWV = const + sum-over-keywords ( 1/K ( Ntrue-hit / Ntrue - beta/duration NFA ) ) +

+ Here, sum-over-keywords is taken over all keywords that were actually seen in + the test set being considered. The values in the equation may be defined as follows: + + + + + + + + +
Name Definition
K Number of keywords that appear in this test set
Ntrue-hit Number of occurrences of this keyword that we correctly spotted.
Ntrue Number of times this keyword actually occurred in this test set.
NFA Number of incorrect hits of this keyword that we produced.
beta A constant equal to exactly 999.9 (don't ask)
duration The total number of seconds of audio in the test set: a constant we know exactly.
+ + I believe the following analysis comes from [17]. In statistical systems, if we assume + model correctness we can generally trust marginals even of very noisy and unreliable things. + So for instance, even if our individual recognitions of a word are very inaccurate, the sum + of the posterior may be reasonably accurate if the system was well trained. At least, we can hope so. + So if we take the sum of posteriors of the hits of a keyword over our entire training set, we can form + a reasonable estimate of Ntrue. In what goes below, let Ntrue-estimate be simply + the sum of the lattice posteriors of this keyword, over all our test set. We will use Ntrue-estimate + in place of Ntrue. So for some keyword, the TWV contribution from that keyword is: +

+ TWV-contribution = 1/K ( Ntrue-hit / Ntrue-estimate - beta/duration NFA ) +

+ Here, Ntrue-estimate and beta/duration are both known quantities. Consider one putative hit, + i.e. one location in time where we have a nonzero posterior and we might want to produce a hit. Let + the posterior of the keyword in the lattice be p. Let's assume that p is a reasonable estimate of the + probability that the keyword actually exists there, which is reasonable assuming model correctness. + As an aside, note that we scale down the acoustics in our lattices while computing the posteriors, so the probabilities + are quite well calibrated; also, we have plotted the (posterior in our lattice) versus + (probability that the word was actually there) and it's within spitting distance of a straight line. + Anyway, back to the task at hand. We can write, for this putative hit, +

+ expected-TWV-contribution = 1/K ( p / Ntrue-estimate - beta/duration (1-p) ) . +

+ Here, all but one of the quantities in the equation are known. K is not known, because we don't know + how many keywords were actually seen in the test set, but because we only care about the sign of this quantity + we don't actually need to know K. For a putative hit, the equation above gives us all we need to know + in order to know whether to say "yes" or "no": if it's positive, "yes", else "no". We want to + keep the hit if this is positive, i.e. if. +

+ p / Ntrue-estimate - beta/duration (1-p) > 0
+ p (1/Ntrue-estimate + beta/duration) - beta/duration > 0
+ p > (beta/duration) / (1/Ntrue-estimate + beta/duration)
+ p > Ntrue-estimate / (duration/beta + Ntrue-estimate) +

+ Let's call the value above the "threshold", i.e.
+threshold = Ntrue-estimate / (duration/beta + Ntrue-estimate)

+ (there is a different threshold for each keyword). In order to make it easier to choose + the cutoff point for when to stop producing hits, we would to produce the output + as normalized scores that are all somehow comparable to each other. That way we can tune a global threshold. + We would like to normalize our scores in such a way that they are still all between zero and one. + We do this by converting p to a log-ratio, i.e. q = log(p / (1-p)), computing a similar log-ratio for the + threshold, i.e. t = log(threshold / (1-threshold)), and then subtracting t from q, + i.e. q' = q - t, to produce a normalized log-ratio q' (so if q' > 0, then p > threshold). + Then we convert back from a log-ratio to an actual + probability, call this p'. When we work out the equations for this, it comes out to
+ p' = (1-threshold) * p / ((1-threshold)*p + (1-p)*threshold) + + +

4.15 Out of vocabulary (OOV) keyword search

+ + In this section we describe how we perform the keyword search when the keyword is + OOV-- i.e. when at least one of the words in the sequence is not in our lexicon. + Note that this is a separate thing from the lexicon expansion described above. + If we are using the lexicon-expanded decoding graph, then this procedure is only applied + if the keyword is OOV with respect to the expanded lexicon. +

+ We have described our basic proxy search procedure in [4] so we will not repeat + it at length here. The basic idea is to use a learned phone confusion matrix + to find a list of in-vocabulary word sequences that are phonetically close to + the sequence we want, with associated penalties for being too distant. As a + special case, we don't penalize the proxy sequences for having extra phones at + their beginning and end (so, for instance, if the pronunciation of a + searched-for word appeared as part of a longer word, we would allow that + without penalty). +

+ As background, our index lookup is actually done by FST composition, where one + of the things to be composed is the "query FST" (normally with a linear structure) + and one is the huge index. In our proxy search method, we represent the set of + proxy keywords, and their associated weights, as an FST, and to the keyword + search pipeline it looks no different from a linear sequence (since the input + is just an FST). +

+ There is something new about our proxy keyword search pipeline this + year. After implementing the "expanded lexicon", we noticed that the process + of generating proxy keywords was very slow. This procedure involves various + operations of composition and determinization, where the inputs are a linear + sequence consisting of the OOV keyword (as phones), a phone-edit-distance FST, + and a lexicon. When we made the lexicon much bigger, it became slow. In order + to make it fast again, we had to rearrange the order of composition and + determinization, and implement an "on-demand" FST pruning procedure for OpenFST + (as part of the Kaldi extensions to OpenFST). + + + +

4.16. Point Process Models for Keyword Search

+ +

The point process model (PPM) for keyword search [9] is a +whole-word, event-based acoustic modeling and phonetic search technique. +It operates on sparse phonetic event streams extracted from the speech +signal using a frame-level subword acoustic model. In our Babel system, +we use our Kaldi Deep Neural Network acoustic models described above to +generate posteriorgrams over context-dependent states. We subsequently +sum posterior dimensions sharing the same center phone to produce +monophone posteriorgrams for each utterance. After applying the matched +filter smoothing of [10], local maxima of each posterior trajectory +define phonetic event times. The set of phonetic events for the search +collection defines the index for subsequent keyword search; this +construction, which is performed entirely independent of the keyword +set, is our only use of the test audio. +

+The next stage is point process model construction. For +in-vocabulary words, we perform MAP estimation of the Poisson rate +parameters for each word in the lexicon [11]. This takes advantage of +any exemplars present in the training data, but falls back on +dictionary-based model priors (the simple variant, see [11] for details) +if no exemplars are available. For OOV keywords, we use Sequitur G2P +pronunciations to construct the dictionary models. Multi-word keyword +models are constructed by concatenating MAP estimated unigram PPMs, with +the overall duration distributions derived using the Monte Carlo +techniques from [12]. Search for each keyword is performed using an +optimized detection function calculation scheme that is 500,000 times +faster than realtime. We consider the PPM system performance both in +isolation and in combination (at the kwslist level) with the Kaldi LVCSR +search engine outputs. + +
+

4.17. Class-based language model

+Due to the sparsity of the Tamil data a combination of different smoothing techniques where used to train a trigram for LimitedLP and FullLP: +
    +
  • 1. a class based language model, where the class is derived from the first three characters of the Tamil word
  • +
  • 2. a class based LM using the first six characters
  • +
  • 3. one using the last three characters
  • +
  • 4. a skip bigram
  • +
  • 5. a word trigram where the absolute discounting parameter depends on the count level using a rational function
  • +
  • 6. the original trigram (KN as implemented in SRILM)
  • +
+Models 1-5 where implemented in LSVLM. In order to map them to ARPA format an artificial corpus of 30 million tokens was sampled using model 5. A trigram tree was constructed and probabilities of models 1-5 where written to the leafs of that tree. In the end model 1-6 where combined using linear interpolation. Model 2 had for all experiments the largest contribution. + +
+ +

4.18. Segment-level decoding

+
+ +

4.19 System combination methods

+ +
4.19.1 System combination for transcription
+ + Here we describe the system combination methods that are used in the "Kaldi-only" + submissions. For the overall RADICAL combination, which is based on ROVER, we + provide both the individual Kaldi sub-systems, and the overall combined system + which we combine as described in this section. +

+ Our systems are not cross-adapted, unless you count the fact that they all use + the fMLLR transforms from the shared "tri5" stage. For transcription purposes, + the only form of combination we use in the Kaldi sub-system is a combination + procedure based on Minimum Bayes Risk decoding, as described in [1]. We view + this as a more principled way to do confusion network combination (CNC) [18], + without the various heuristics that are used to produce confusion networks. + There is one aspect of this that we should explain, which relates to the + language-model weight. Normally when decoding, we do a linear sweep over the + language model weights over some range (e.g. 10, 11, 12, ... 18), and select + the best one. We do the same when combining systems, except that sometimes the + different systems will require substantially different language model weights + and there is no one weight that is good for all of them; it's not practical to + try all possible combinations of weights. When combining systems, we apply a + different offset to the language-model weights for each system. This offset is + determined by the beginning of the language-model-weight range that we sweep + for each system, which in turn was determined by us when setting up the + configuration files for our system. So for instance, if we start the regular + SGMM system at offset 10, and the bottleneck+SGMM system at 15, then there would + be an offset of 5 between the two systems when we do the combination. +

+ We don't bother applying weights to the different systems when combining, but + on occasion we do leave out some of the worse systems from the combination. + This is decided by a human operator, based on trying different combinations on + the dev set. The identities of the systems that were combined will be noted + in the individual submissions. + +

4.19.2 System combination for keyword search
+ + In this section we describe the Kaldi-internal method of system combination for + keyword search. For the overall RADICAL system combination, we provide the kwslists + for both the individual Kaldi subsystems, and their combination as described in this + section. +

+ The Kaldi-internal combination for keyword search is based on averaging across systems the + unnormalized putative hits (i.e. the lattice posteriors extracted from the index), + before normalizing the averaged posteriors using the normalization method described + in Section 4.14.. Note that in order to do this averaging, we have + to have some notion of when multiple hits are "at the same time". This is pretty obvious + (hits are the same if they overlap in time), so we won't refer to it further. If one + system did not have a hit at a particular time, it's identical to it having a posterior of + zero. +

+ We do not do a conventional average (i.e. a mean). + We wanted to implement something that was in between a mean and a geometric mean. We + used the notion that a geometric mean is a mean of logs, and a log is like a power of + x, (1/p) xp, as p approaches zero. So if we take the mean of xp + for some power p between zero and one, and take the result to the power 1/p, + this is somewhere between a mean and a geometric mean. So this is what we do. + Suppose we have three scores: a, b and c. We choose a power p (say, p=0.5, but it's tuned + per language). Then we let
+ average = (ap + bp + cp)1/p . +
+Actually we extend this to a weighted average, i.e. +
+ average = (waap + wbbp + wccp)1/p +
+where the weights sum to one. The weights are determined manually in small scale +experiments on one of the languages, as the result is not very sensitive to the +weights. We used weights that are fairly close to each other, but with better +systems having larger weights. +

+We apply the normalization method of Section 4.14. after taking the + weighted mean. + + +

5. Hardware

+ +A variable number of 16-core (Intel(R) Xeon(R) CPU E5-2680) machines was used. The amount +of per-core memory was 2\;GB. The training of the LimitedLP was done using 32 cores (2 nodes), +the training of FullLP system was done using 64 cores (4 nodes). Each of the nodes was +equipped with one GPU card (Tesla K20m), however these card weren't used for training, with +the exception of the neural networks (DNN and BNF systems). The detailed timing info will be +provided in the next section. The maximum total storage capacity used was approximately 5TB. +The typical size of a complete system (including lattices) is around 300\;GB. The lattice +generation of the shadow dataset (combined dev10h and eval) was done on 96 cores (6 nodes). +Indexing and search was done on 64 cpus (4 nodes). + +

6. Timing

+
+DATADEF:==BaseLR{204LimitedLP}:AM{204LimitedLP},LM{204LimitedLP},PRON{204LimitedLP},AR{None}
+
+
+Ingestion Elapsed Time (hh:mm:ss) - 151:29:03
+Ingestion Total CPU Time (hh:mm:ss) - 9546:33:38
+Ingestion Total GPU Time (hh:mm:ss) - 92:23:16
+
+Ingestion Maximum CPU Memory (gbytes) - 192
+Ingestion Maximum GPU Memory (gbytes) - 16
+
+Search Elapsed Time (hh:mm:ss) - 12:39:08
+Search Total CPU Time (hh:mm:ss) - 427:17:22
+Search Total GPU Time (hh:mm:ss) - 0:00:00
+
+Search Maximum CPU Memory (gbytes) - 32
+Search Maximum GPU Memory (gbytes) - 16
+
+ + +

7. References

+ + +
    + +
  • [1] "Minimum Bayes Risk decoding and system combination based on a recursion for edit distance", + Haihua Xu, Daniel Povey, Lidia Mangu and Jie Zhu, Computer Speech and Language, 2011.
  • + +
  • [2] "A Symmetrization of the Subspace Gaussian Mixture Model", Daniel Povey, + Martin Karafiat, Arnab Ghoshal, Petr Schwarz, ICASSP 2011
  • + +
  • [3] "Boosted MMI for Model and Feature Space Discriminative Training" , + Daniel Povey, Dimitri Kanevsky, Brian Kingsbury, Bhuvana Ramabhadran, George Saon & Karthik Visweswariah.
  • + +
  • [4] "Using Proxies for OOV keywords in the Keyword Search Task", Guoguo Chen, Oguz Yilmaz, Jan Trmal, + Daniel Povey, and Sanjeev Khudanpur, ASRU 2013
  • + +
  • [5] "Lattice Indexing for Spoken Term Detection", Dogan Can and Murat Saraclar, + IEEE Transactions on Audio, Speech and Language Processing.
  • + +
  • [6] "Generating exact lattices in the WFST framework", D. Povey, M. Hannemann et. al, ICASSP 2012
  • + +
  • [7] "A Pitch Extraction Algorithm Tuned for Automatic Speech Recognition", + Pegah Ghahremani, Bagher BabaAli, Daniel Povey, Korbinian Riedhammer, Jan Trmal and Sanjeev Khudanpur, ICASSP 2014
  • + +
  • [8] "Improving Deep Neural Network Acoustic Models using Generalized Maxout Networks", + Xiaohui Zhang, Jan Trmal, Daniel Povey and Sanjeev Khudanpur, ICASSP 2014 + +
  • [9] Jansen, A. and Niyogi, P., ``Point process models for spotting keywords in + continuous speech", IEEE Trans. Audio, Speech and Language Proc., 17(8), pp. 1457-1470, 2009.
  • + +
  • [10] Kintzley, K., Jansen, A., and Hermansky, H., ``Event Selection from Phone Posteriorgrams + Using Matched Filters," in Proc. of INTERSPEECH, 2011.
  • + +
  • [11] Kintzley, K., Jansen, A., and Hermansky, H., ``MAP Estimation of Whole-Word Acoustic Models + with Dictionary Priors," in Proc. of INTERSPEECH, 2012.
  • + +
  • [12] Kintzley, K., Jansen, A., and Hermansky, H., ``Featherweight Phonetic Keyword Search for + Conversational Speech", in Proc. of ICASSP, 2014.
  • + +
  • [13] Mark Gales, "Semi-Tied Covariance Matrices for Hidden Markov Models", IEEE Trans. SAP, 1999.
  • + +
  • [14] Daniel Povey, Lukas Burget et. al, "The Subspace Gaussian Mixture Model– a Structured + Model for Speech Recognition", Computer Speech and Language, 2011.
  • + +
  • [15] Gibson, Matthew. "Minimum Bayes risk acoustic model estimation and adaptation." Dissertation, + University of Sheffield, 2008.
  • + +
  • [16] K. Vesely, A. Ghoshal, L. Burget and D. Povey, + "Sequence-discriminative training of deep neural networks", Proc. Interspeech 2013
  • + +
  • [17] Damianos Karakos et al., "Score normalization and system combination for improved keyword spotting", ASRU 2013.
  • + +
  • [18] Evermann, Gunnar, and P. C. Woodland. "Posterior probability decoding, confidence estimation and system combination." Proc. Speech Transcription Workshop. Vol. 27. 2000.
  • + +
+ + diff --git a/egs/babel/s5d/cmd.sh b/egs/babel/s5d/cmd.sh new file mode 100644 index 00000000000..a4a11bef039 --- /dev/null +++ b/egs/babel/s5d/cmd.sh @@ -0,0 +1,29 @@ +# "queue.pl" uses qsub. The options to it are +# options to qsub. If you have GridEngine installed, +# change this to a queue you have access to. +# Otherwise, use "run.pl", which will run jobs locally +# (make sure your --num-jobs options are no more than +# the number of cpus on your machine. + +#a) JHU cluster options +export train_cmd="queue.pl -l arch=*64" +export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G" +export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G" + +#export cuda_cmd="..." + + +#b) BUT cluster options +#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M" +#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M" +#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G" + +#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1" +#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu" +#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G" + +#c) run it locally... +#export train_cmd=run.pl +#export decode_cmd=run.pl +#export cuda_cmd=run.pl +#export mkgraph_cmd=run.pl diff --git a/egs/babel/s5d/conf/bnf/config_full.py b/egs/babel/s5d/conf/bnf/config_full.py new file mode 100755 index 00000000000..5ea3ddbb1d9 --- /dev/null +++ b/egs/babel/s5d/conf/bnf/config_full.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +################################################# +## PTDNN - Python Toolkit for Deep Neural Network +## Author: Yajie Miao +################################################# + +import os +import sys + +from utils.learn_rates import LearningRateExpDecay + + +class BnfExpConfig(object): + + def __init__(self): + + # working directory; by default, the pfiles should be here + self.wdir = "WORK/" + self.pretrain_data = self.wdir + 'train.pfile.gz' # pretraining data + self.pretrain_output = self.wdir + "rbm.ptr" # pretraining output + + # finetuning data + self.finetune_train_data = self.wdir + 'train.pfile.gz' # finetune training data + self.finetune_valid_data = self.wdir + 'valid.pfile.gz' # finetune validation data + self.finetune_output = self.wdir + "final.nnet.raw" # finetune output + self.nnet_kaldi_fmt = self.wdir + "final.nnet" + + # global config for nnet topo + self.n_ins=250 # size of input data + self.n_outs=N_OUTS # number of output targets.. we'll replace this with + # the correct number when we move this to the right place. + self.hidden_layers_sizes=[1024, 1024, 1024, 1024, 1024, 42, 1024] # hidden layer sizes + self.bnf_layer_index = 6 # the index of the Bottleneck layer + self.pretrain_layer_num = 5 # number of hidden layers to be pretrained + + # global config for data + self.shuffle = True + self.chunk_size = '200m' + + # pretraining batch size + self.pretrain_batch_size = 128 # batch-size in pretraining + + # pretraining schedule + self.pretrain_gbrbm_lr = 0.005 # learning rate for Gaussian-Bernoulli RBM + self.pretrain_rbm_lr = 0.08 # learning rate for Bernoulli-Bernoulli RBM + self.initial_momentum = 0.5 # initial momentum + self.final_momentum = 0.9 # final momentum + self.initial_momentum_epoch = 2 # for how many epochs do we use initial_momentum + self.pretraining_epochs = 4 # total epochs + + # finetuning batch size + self.finetune_batch_size = 256 # batch-size for finetuning + + # finetuning schedule + self.finetune_momentum = 0.5 # momentum for finetuning + self.lrate = LearningRateExpDecay(start_rate=0.04, # starting learning rate + scale_by = 0.5, # decaying factor in ramping + max_epochs = 1000, # 'dump' epoch limit, never can be reached + min_derror_ramp_start = 0.01, # min validation error difference to trigger ramping + min_derror_stop = 0.01, # min validation error difference to stop finetuning, after ramping + init_error = 100) diff --git a/egs/babel/s5d/conf/bnf/config_limited.py b/egs/babel/s5d/conf/bnf/config_limited.py new file mode 100755 index 00000000000..f63c3640d68 --- /dev/null +++ b/egs/babel/s5d/conf/bnf/config_limited.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +################################################# +## PTDNN - Python Toolkit for Deep Neural Network +## Author: Yajie Miao +################################################# + +import os +import sys + +from utils.learn_rates import LearningRateExpDecay + + +class BnfExpConfig(object): + + def __init__(self): + + # working directory; by default, the pfiles should be here + self.wdir = "WORK/" # Note: we'll replace CWD with the current directory + # when we move this to the right place. + self.pretrain_data = self.wdir + 'train.pfile.gz' # pretraining data + self.pretrain_output = self.wdir + "rbm.ptr" # pretraining output + + # finetuning data + self.finetune_train_data = self.wdir + 'train.pfile.gz' # finetune training data + self.finetune_valid_data = self.wdir + 'valid.pfile.gz' # finetune validation data + self.finetune_output = self.wdir + "final.nnet.raw" # finetune output + self.nnet_kaldi_fmt = self.wdir + "final.nnet" + + # global config for nnet topo + self.n_ins=250 # size of input data + self.n_outs=N_OUTS # number of output targets.. we'll replace this with + # the correct number when we move this to the right place. + self.hidden_layers_sizes=[1024, 1024, 1024, 1024, 42, 1024] # hidden layer sizes + self.bnf_layer_index = 5 # the index of the Bottleneck layer + self.pretrain_layer_num = 4 # number of hidden layers to be pretrained + + # global config for data + self.shuffle = True + self.chunk_size = '200m' + + # pretraining batch size + self.pretrain_batch_size = 128 # batch-size in pretraining + + # pretraining schedule + self.pretrain_gbrbm_lr = 0.005 # learning rate for Gaussian-Bernoulli RBM + self.pretrain_rbm_lr = 0.08 # learning rate for Bernoulli-Bernoulli RBM + self.initial_momentum = 0.5 # initial momentum + self.final_momentum = 0.9 # final momentum + self.initial_momentum_epoch = 5 # for how many epochs do we use initial_momentum + self.pretraining_epochs=10 # total epochs + + # finetuning batch size + self.finetune_batch_size = 256 # batch-size for finetuning + + # finetuning schedule + self.finetune_momentum = 0.5 # momentum for finetuning + self.lrate = LearningRateExpDecay(start_rate=0.08, # starting learning rate + scale_by = 0.5, # decaying factor in ramping + max_epochs = 1000, # 'dump' epoch limit, never can be reached + min_derror_ramp_start = 0.01, # min validation error difference to trigger ramping + min_derror_stop = 0.01, # min validation error difference to stop finetuning, after ramping + init_error = 100) diff --git a/egs/babel/s5d/conf/common.fullLP b/egs/babel/s5d/conf/common.fullLP new file mode 100644 index 00000000000..d203908d3e0 --- /dev/null +++ b/egs/babel/s5d/conf/common.fullLP @@ -0,0 +1,124 @@ +# BNF training parameters +bnf_num_hidden_layers=6 +bottleneck_dim=42 +bnf_hidden_layer_dim=2048 +bnf_minibatch_size=512 +bnf_init_learning_rate=0.008 +bnf_final_learning_rate=0.0008 +bnf_max_change=40 +bnf_num_jobs=4 +bnf_num_threads=1 +bnf_mixup=10000 +bnf_mpe_learning_rate=0.00009 +bnf_mpe_last_layer_factor=0.1 +bnf_num_gauss_ubm=550 # use fewer UBM Gaussians than the + # non-bottleneck system (which has 800) +bnf_num_gauss_sgmm=50000 # use fewer SGMM sub-states than the + # non-bottleneck system (which has 80000). +bnf_decode_acwt=0.066666 + + +# DNN hybrid system training parameters +dnn_num_hidden_layers=4 +dnn_input_dim=4000 +dnn_output_dim=400 +dnn_init_learning_rate=0.008 +dnn_final_learning_rate=0.0008 +dnn_mixup=12000 + +dnn_mpe_learning_rate=0.00008 +dnn_mpe_last_layer_factor=0.1 +dnn_mpe_retroactive=true + +bnf_every_nth_frame=2 # take every 2nd frame. +babel_type=full + +use_pitch=true + +lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 18 ) +lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 ) +lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 ) +lmwt_chain_extra_opts=( --min-lmwt 4 --max-lmwt 22 ) + +dnn_beam=16.0 +dnn_lat_beam=8.5 + +icu_opt=(--use-icu true --icu-transform Any-Lower) + +if [[ `hostname` == *.tacc.utexas.edu ]] ; then + decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" ) + sgmm_train_extra_opts=( ) + sgmm_group_extra_opts=( --num_iters 25 ) + sgmm_denlats_extra_opts=( --num-threads 2 ) + sgmm_mmi_extra_opts=(--cmd "local/lonestar.py -pe smp 2") + dnn_denlats_extra_opts=( --num-threads 2 ) + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" ) + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1) + + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1) + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1) + dnn_parallel_opts="-l gpu=1" +else + decode_extra_opts=(--num-threads 6 --parallel-opts "--num-threads 6 --mem 4G") + sgmm_train_extra_opts=( --num-iters 25 ) + sgmm_group_extra_opts=(--group 3 --parallel-opts "--num-threads 7 --mem 6G") + sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "--num-threads 4" ) + sgmm_mmi_extra_opts=() + dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "--num-threads 4") + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "--num-threads 16") + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1 \ + --parallel-opts "--gpu 1" ) + dnn_parallel_opts="--gpu 1" + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1 \ + --parallel-opts "--gpu 1") +fi + +icu_transform="Any-Lower" +case_insensitive=true + + +max_states=150000 +wip=0.5 + + +phoneme_mapping= + +minimize=true + +proxy_phone_beam=-1 +proxy_phone_nbest=-1 +proxy_beam=5 +proxy_nbest=500 + +extlex_proxy_phone_beam=5 +extlex_proxy_phone_nbest=300 +extlex_proxy_beam=-1 +extlex_proxy_nbest=-1 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/common.limitedLP b/egs/babel/s5d/conf/common.limitedLP new file mode 100644 index 00000000000..a73080a5b65 --- /dev/null +++ b/egs/babel/s5d/conf/common.limitedLP @@ -0,0 +1,128 @@ +# BNF training parameters +bnf_num_hidden_layers=5 +bottleneck_dim=42 +bnf_hidden_layer_dim=1024 +bnf_minibatch_size=512 +bnf_init_learning_rate=0.008 +bnf_final_learning_rate=0.0008 +bnf_max_change=40 +bnf_num_jobs=4 +bnf_num_threads=1 +bnf_mixup=5000 +bnf_mpe_learning_rate=0.00009 +bnf_mpe_last_layer_factor=0.1 +bnf_num_gauss_ubm=500 # use fewer UBM Gaussians than the + # non-bottleneck system (which has 750) +bnf_num_gauss_sgmm=10000 # use fewer SGMM sub-states than the + # non-bottleneck system (which has 18000). +bnf_decode_acwt=0.066666 + + +## DNN hybrid system training parameters +dnn_num_hidden_layers=3 +dnn_input_dim=2000 +dnn_output_dim=200 +dnn_init_learning_rate=0.008 +dnn_final_learning_rate=0.0008 +dnn_mixup=5000 + +dnn_mpe_learning_rate=0.00009 +dnn_mpe_last_layer_factor=0.1 +dnn_mpe_retroactive=true + +bnf_every_nth_frame=1 # take all frames. +babel_type=limited + +use_pitch=true + +lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 12 ) +lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 ) +lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 ) + +dnn_beam=16.0 +dnn_lat_beam=8.5 + +icu_opt=(--use-icu true --icu-transform Any-Lower) + +# Semi-supervised examples options +dnn_update_egs_opts=(--weight-threshold 0.7 --splice-width 4 --samples-per-iter 200000 --num-jobs-nnet 4 --io-opts "-tc 5" ) + +if [[ `hostname` == *.tacc.utexas.edu ]] ; then + decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" ) + sgmm_train_extra_opts=( --num-iters 25 ) + sgmm_group_extra_opts=( ) + sgmm_denlats_extra_opts=( --num-threads 1 ) + dnn_denlats_extra_opts=( --num-threads 1 ) + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" ) + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 + --parallel-opts "-pe smp 16" ) + + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1) + + dnn_update_parallel_opts=( --num-epochs 15 --num-epochs-extra 5 --num-iters-final 20 ) +else + decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=4.0G") + sgmm_train_extra_opts=( --num-iters 25 ) + sgmm_group_extra_opts=(--group 3 --parallel-opts "-pe smp 3 -l mem_free=7G,ram_free=7.0G" --cmd "queue.pl -l arch=*64 -l mem_free=2.0G,ram_free=2.0G") + sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2.0G") + sgmm_mmi_extra_opts=(--cmd "queue.pl -l arch=*64 -l mem_free=1.5G,ram_free=1.5G") + dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2.0G") + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 \ + --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + dnn_parallel_opts="-l gpu=1" + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1 \ + --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + + dnn_update_parallel_opts=( --num-epochs 15 --num-epochs-extra 5 --num-iters-final 20 ) +fi + +icu_transform="Any-Lower" +case_insensitive=true + + +max_states=150000 +wip=0.5 + + +phoneme_mapping= + +minimize=true + +proxy_phone_beam=-1 +proxy_phone_nbest=-1 +proxy_beam=5 +proxy_nbest=500 + +extlex_proxy_phone_beam=5 +extlex_proxy_phone_nbest=300 +extlex_proxy_beam=-1 +extlex_proxy_nbest=-1 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/common.semisupervised.limitedLP b/egs/babel/s5d/conf/common.semisupervised.limitedLP new file mode 100644 index 00000000000..63118af268c --- /dev/null +++ b/egs/babel/s5d/conf/common.semisupervised.limitedLP @@ -0,0 +1,27 @@ +## DNN hybrid system training parameters +dnn_num_hidden_layers=3 +dnn_input_dim=2000 +dnn_output_dim=200 +dnn_init_learning_rate=0.008 +dnn_final_learning_rate=0.0008 +dnn_mixup=5000 +num_epochs=15 +num_epochs_extra=5 +num_iters_final=20 + +babel_type=limited + +# Supervised tuning options +# To update only the last layer using only the supervised data after +# semi-supervised training is done +do_supervised_tuning=true +dnn_update_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") +dnn_update_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 \ + --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + +# Semi-supervised examples options +egs_gpu_opts=(--splice-width 4 --samples-per-iter 200000 --num-jobs-nnet 4 --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") +egs_cpu_opts=(--splice-width 4 --samples-per-iter 200000 --num-jobs-nnet 8 --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") +egs_io_opts="-tc 5" +weight_threshold=0.7 diff --git a/egs/babel/s5d/conf/common_vars.sh b/egs/babel/s5d/conf/common_vars.sh new file mode 100644 index 00000000000..4a48d2577a8 --- /dev/null +++ b/egs/babel/s5d/conf/common_vars.sh @@ -0,0 +1,28 @@ +#keyword search default +glmFile=conf/glm +duptime=0.5 +case_insensitive=false +use_pitch=true +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="-oov " +boost_sil=1.5 # note from Dan: I expect 1.0 might be better (equivalent to not + # having the option)... should test. +cer=0 + +#Declaring here to make the definition inside the language conf files more +# transparent and nice +declare -A dev10h_kwlists +declare -A dev2h_kwlists +declare -A evalpart1_kwlists +declare -A eval_kwlists +declare -A shadow_kwlists + +# just for back-compatibility +declare -A dev10h_more_kwlists +declare -A dev2h_more_kwlists +declare -A evalpart1_more_kwlists +declare -A eval_more_kwlists +declare -A shadow_more_kwlists +[ -f ./path.sh ] && . ./path.sh; # source the path. +[ -f ./cmd.sh ] && . ./cmd.sh; # source train and decode cmds. diff --git a/egs/babel/s5d/conf/glm b/egs/babel/s5d/conf/glm new file mode 100644 index 00000000000..cdf9c42feaa --- /dev/null +++ b/egs/babel/s5d/conf/glm @@ -0,0 +1,13 @@ +;; +;; File: ma970904.glm +;; Desc: This file contains the transcript filtering rules for the ARPA +;; Mandarin Hub5-NE Evaluation. +;; +;; Date: 970904 +;; - initial creation +;; +;; Hesitation mappings + => %HESITATION / [ ] __ [ ] + => %HESITATION / [ ] __ [ ] + => %HESITATION / [ ] __ [ ] + diff --git a/egs/babel/s5d/conf/lang/101-cantonese-fullLP.official.conf b/egs/babel/s5d/conf/lang/101-cantonese-fullLP.official.conf new file mode 100644 index 00000000000..7d2da3715fb --- /dev/null +++ b/egs/babel/s5d/conf/lang/101-cantonese-fullLP.official.conf @@ -0,0 +1,104 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/101-cantonese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Cantonese_Babel101/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.3hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev2h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev10h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.kwlist.xml +eval_nj=64 + +evalpart1_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +evalpart1_data_list=conf/lists/101-cantonese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/101-cantonese/release-current/conversational/dev + /export/babel/data/101-cantonese/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Cantonese_Babel101/dev.list + /export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm +lexicon_file=/export/babel/data/101-cantonese/release-current/conversational/reference_materials/lexicon.txt +cer=1 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/101-cantonese-limitedLP.official.conf b/egs/babel/s5d/conf/lang/101-cantonese-limitedLP.official.conf new file mode 100644 index 00000000000..66347522065 --- /dev/null +++ b/egs/babel/s5d/conf/lang/101-cantonese-limitedLP.official.conf @@ -0,0 +1,112 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/101-cantonese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Cantonese_Babel101/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.3hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev2h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev10h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.kwlist.xml +eval_nj=64 + +evalpart1_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +evalpart1_data_list=conf/lists/101-cantonese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/101-cantonese/release-current/conversational/dev + /export/babel/data/101-cantonese/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Cantonese_Babel101/dev.list + /export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Pashto_Babel104/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm +lexicon_file=/export/babel/data/101-cantonese/release-babel101b-v0.4c_sub-train1/conversational/reference_materials/lexicon.sub-train1.txt +cer=1 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/102-assamese-fullLP.official.conf b/egs/babel/s5d/conf/lang/102-assamese-fullLP.official.conf new file mode 100644 index 00000000000..f00afb53454 --- /dev/null +++ b/egs/babel/s5d/conf/lang/102-assamese-fullLP.official.conf @@ -0,0 +1,105 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/102-assamese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Assamese_Babel102/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Assamese_Babel102/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=24 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Assamese_Babel102//dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ +eval_data_list=/export/babel/data/splits/Assamese_Babel102/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/102-assamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/102-assamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/102-assamese/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Assamese_Babel102/dev.list + /export/babel/data/splits/Assamese_Babel102/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/102-assamese/release-current/conversational/reference_materials/lexicon.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/102-assamese-limitedLP.official.conf b/egs/babel/s5d/conf/lang/102-assamese-limitedLP.official.conf new file mode 100644 index 00000000000..937166caf7d --- /dev/null +++ b/egs/babel/s5d/conf/lang/102-assamese-limitedLP.official.conf @@ -0,0 +1,114 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/102-assamese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Assamese_Babel102/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Assamese_Babel102/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=24 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Assamese_Babel102//dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ +eval_data_list=/export/babel/data/splits/Assamese_Babel102/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/102-assamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/102-assamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/102-assamese/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Assamese_Babel102/dev.list + /export/babel/data/splits/Assamese_Babel102/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/102-assamese//release-current/conversational/training/ + /export/babel/data/102-assamese//release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Assamese_Babel102/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Assamese_Babel102/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/102-assamese/release-current/conversational/reference_materials/lexicon.sub-train.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/103-bengali-fullLP.official.conf b/egs/babel/s5d/conf/lang/103-bengali-fullLP.official.conf new file mode 100644 index 00000000000..d283be30d16 --- /dev/null +++ b/egs/babel/s5d/conf/lang/103-bengali-fullLP.official.conf @@ -0,0 +1,105 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/103-bengali/release-current/conversational/training +train_data_list=/export/babel/data/splits/Bengali_Babel103/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=12 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval +eval_data_list=/export/babel/data/splits/Bengali_Babel103//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/103-bengali/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/103-bengali/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/103-bengali/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Bengali_Babel103/dev.list + /export/babel/data/splits/Bengali_Babel103/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/103-bengali/release-current/conversational/reference_materials/lexicon.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/103-bengali-limitedLP.official.conf b/egs/babel/s5d/conf/lang/103-bengali-limitedLP.official.conf new file mode 100644 index 00000000000..3799653db68 --- /dev/null +++ b/egs/babel/s5d/conf/lang/103-bengali-limitedLP.official.conf @@ -0,0 +1,114 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/103-bengali//release-current/conversational/training +train_data_list=/export/babel/data/splits/Bengali_Babel103/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=12 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval +eval_data_list=/export/babel/data/splits/Bengali_Babel103//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/103-bengali/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/103-bengali/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/103-bengali/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Bengali_Babel103/dev.list + /export/babel/data/splits/Bengali_Babel103/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/103-bengali/release-current/conversational/training/ + /export/babel/data/103-bengali/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Bengali_Babel103/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Bengali_Babel103/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/103-bengali/release-current/conversational/reference_materials/lexicon.sub-train.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/104-pashto-fullLP-40hrs.official.conf b/egs/babel/s5d/conf/lang/104-pashto-fullLP-40hrs.official.conf new file mode 100644 index 00000000000..9fbaf629935 --- /dev/null +++ b/egs/babel/s5d/conf/lang/104-pashto-fullLP-40hrs.official.conf @@ -0,0 +1,114 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training +train_data_list=./conf/lists/104-pashto/train.40HrFLP.list +train_nj=32 + +#RADICAL DEV2H data files +dev2h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Pashto_Babel104/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev10h_data_list=./conf/lists/104-pashto/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval +evalpart1_data_list=conf/lists/104-pashto/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/dev + /export/babel/data/104-pashto/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Pashto_Babel104/dev.list + /export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/104-pashto/release-current/conversational/reference_materials/lexicon.txt + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf b/egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf new file mode 100644 index 00000000000..08f849b7605 --- /dev/null +++ b/egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf @@ -0,0 +1,114 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training +train_data_list=/export/babel/data/splits/Pashto_Babel104/train.FullLP.list +train_nj=32 + +#RADICAL DEV2H data files +dev2h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Pashto_Babel104/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Pashto_Babel104/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval +evalpart1_data_list=conf/lists/104-pashto/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/dev + /export/babel/data/104-pashto/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Pashto_Babel104/dev.list + /export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/104-pashto/release-current/conversational/reference_materials/lexicon.txt + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/104-pashto-limitedLP.official.conf b/egs/babel/s5d/conf/lang/104-pashto-limitedLP.official.conf new file mode 100644 index 00000000000..41bc3ba85ef --- /dev/null +++ b/egs/babel/s5d/conf/lang/104-pashto-limitedLP.official.conf @@ -0,0 +1,110 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training +train_data_list=/export/babel/data/splits/Pashto_Babel104/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV2H data files +dev2h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Pashto_Babel104/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Pashto_Babel104/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval +evalpart1_data_list=conf/lists/104-pashto/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/dev + /export/babel/data/104-pashto/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Pashto_Babel104/dev.list + /export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Pashto_Babel104/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/104-pashto/release-current-subtrain/conversational/reference_materials/lexicon.sub-train.txt + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf b/egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf new file mode 100644 index 00000000000..6889cb7eb37 --- /dev/null +++ b/egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf @@ -0,0 +1,111 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training +train_data_list=/export/babel/data/splits/Turkish_Babel105/train.fullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev2h_data_list=/export/babel/data/splits/Turkish_Babel105/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev10h_data_list=/export/babel/data/splits/Turkish_Babel105/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +eval_data_list=/export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +evalpart1_data_list=conf/lists/105-turkish/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/105-turkish/release-current-b/conversational/dev + /export/babel/data/105-turkish/release-current-b/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Turkish_Babel105/dev.list + /export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +glmFile=./conf/glm +lexicon_file=/export/babel/data/105-turkish/release-current-b/conversational/reference_materials/lexicon.txt +#http://demo.icu-project.org/icu-bin/translit +icu_opt=(--use-icu true --icu-transform 'İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();' ) +#icu_opt=(--use-icu true --icu-transform "'\\\\\\\\İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();'" ) +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/105-turkish-limitedLP.official.conf b/egs/babel/s5d/conf/lang/105-turkish-limitedLP.official.conf new file mode 100644 index 00000000000..f7ca60c6f25 --- /dev/null +++ b/egs/babel/s5d/conf/lang/105-turkish-limitedLP.official.conf @@ -0,0 +1,111 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training +train_data_list=/export/babel/data/splits/Turkish_Babel105/train.LimitedLP.official.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev2h_data_list=/export/babel/data/splits/Turkish_Babel105/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev10h_data_list=/export/babel/data/splits/Turkish_Babel105/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +eval_data_list=/export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +evalpart1_data_list=conf/lists/105-turkish/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/105-turkish/release-current-b/conversational/dev + /export/babel/data/105-turkish/release-current-b/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Turkish_Babel105/dev.list + /export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/105-turkish/release-current-b/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Turkish_Babel105/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=600 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/105-turkish/release-babel105b-v0.4-rc1/conversational/reference_materials/lexicon.sub-train.txt +#http://demo.icu-project.org/icu-bin/translit +icu_opt=(--use-icu true --icu-transform 'İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();' ) +#icu_opt=(--use-icu true --icu-transform "'\\\\\\\\İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();'" ) +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/106-tagalog-fullLP.official.conf b/egs/babel/s5d/conf/lang/106-tagalog-fullLP.official.conf new file mode 100644 index 00000000000..fa1afe4717e --- /dev/null +++ b/egs/babel/s5d/conf/lang/106-tagalog-fullLP.official.conf @@ -0,0 +1,108 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/106-tagalog/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Tagalog_Babel106/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Tagalog_Babel106/babel106b-v0.2g_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=23 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/106-tagalog/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/106-tagalog/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/106-tagalog/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/106-tagalog/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/106-tagalog-limitedLP.official.conf b/egs/babel/s5d/conf/lang/106-tagalog-limitedLP.official.conf new file mode 100644 index 00000000000..86148300e0c --- /dev/null +++ b/egs/babel/s5d/conf/lang/106-tagalog-limitedLP.official.conf @@ -0,0 +1,108 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/106-tagalog/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.official.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Tagalog_Babel106/babel106b-v0.2g_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=23 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/106-tagalog/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/106-tagalog/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/106-tagalog/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/106-tagalog/release-babel106b-v0.2g-sub-train/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/107-vietnamese-fullLP.official.conf b/egs/babel/s5d/conf/lang/107-vietnamese-fullLP.official.conf new file mode 100644 index 00000000000..e09ef9df4fd --- /dev/null +++ b/egs/babel/s5d/conf/lang/107-vietnamese-fullLP.official.conf @@ -0,0 +1,107 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Vietnamese_Babel107/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev2h_subset_ecf=true +dev2h_nj=27 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev/ +dev10h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Vietnamese_Babel107/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.kwlist3.xml +eval_nj=81 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/107-vietnamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.annot.kwlist3.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/107-vietnamese/release-current/conversational/dev/ + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Vietnamese_Babel107/dev.list + /export/babel/data/splits/Vietnamese_Babel107/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml + + ) +shadow_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/107-vietnamese/release-current/conversational/reference_materials/lexicon.txt + +phoneme_mapping="i@U=i @ U;oaI=o a I;oaI:=o a I:;u@I=u @ I;uI@= u I @;1@I=1 @ I;1@U=1 @ U; + a:I=a: I; a:U=a: U; aU=a U; @U=@ U; aI=a I; @I=@ I; EU=E U; eU=e U; i@=i @; iU=i U; Oa:=O a: ; Oa=O a; + OE=O E; OI=O I; oI=o I; @:I=@: I; u@=u @; 1@=1 @; ue=u e; uI=u I; 1I=1 I; u@:=u @:; 1U=1 U; ui:=u i:" +# + + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/107-vietnamese-limitedLP.official.conf b/egs/babel/s5d/conf/lang/107-vietnamese-limitedLP.official.conf new file mode 100644 index 00000000000..a659c44ecc4 --- /dev/null +++ b/egs/babel/s5d/conf/lang/107-vietnamese-limitedLP.official.conf @@ -0,0 +1,115 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Vietnamese_Babel107/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev2h_subset_ecf=true +dev2h_nj=27 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev/ +dev10h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Vietnamese_Babel107/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.kwlist3.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/107-vietnamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.annot.kwlist3.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/107-vietnamese/release-current/conversational/dev/ + /export/babel/data/107-vietnamese/release-current/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Vietnamese_Babel107/dev.list + /export/babel/data/splits/Vietnamese_Babel107/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/107-vietnamese/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Vietnamese_Babel107/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/107-vietnamese/release-current/conversational/reference_materials/lexicon.sub-train.txt + +phoneme_mapping="i@U=i @ U;oaI=o a I;oaI:=o a I:;u@I=u @ I;uI@= u I @;1@I=1 @ I;1@U=1 @ U; + a:I=a: I; a:U=a: U; aU=a U; @U=@ U; aI=a I; @I=@ I; EU=E U; eU=e U; i@=i @; iU=i U; Oa:=O a: ; Oa=O a; + OE=O E; OI=O I; oI=o I; @:I=@: I; u@=u @; 1@=1 @; ue=u e; uI=u I; 1I=1 I; u@:=u @:; 1U=1 U; ui:=u i:" +# + + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/201-haitian-fullLP.official.conf b/egs/babel/s5d/conf/lang/201-haitian-fullLP.official.conf new file mode 100644 index 00000000000..b92a52b7bb6 --- /dev/null +++ b/egs/babel/s5d/conf/lang/201-haitian-fullLP.official.conf @@ -0,0 +1,80 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Haitian_Babel201/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval +eval_data_list=/export/babel/data/splits/Haitian_Babel201//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/201-haitian/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/201-haitian/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Haitian_Babel201/dev.list + /export/babel/data/splits/Haitian_Babel201/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.txt + diff --git a/egs/babel/s5d/conf/lang/201-haitian-limitedLP.official.conf b/egs/babel/s5d/conf/lang/201-haitian-limitedLP.official.conf new file mode 100644 index 00000000000..d1320fd0245 --- /dev/null +++ b/egs/babel/s5d/conf/lang/201-haitian-limitedLP.official.conf @@ -0,0 +1,89 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Haitian_Babel201/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval +eval_data_list=/export/babel/data/splits/Haitian_Babel201//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/201-haitian/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/201-haitian/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Haitian_Babel201/dev.list + /export/babel/data/splits/Haitian_Babel201/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/201-haitian/release-current/conversational/training/ + /export/babel/data/201-haitian/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Haitian_Babel201/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Haitian_Babel201/train.untranscribed.list + ) +unsup_nj=64 + + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.sub-train.txt + diff --git a/egs/babel/s5d/conf/lang/202-swahili.FLP.official.conf b/egs/babel/s5d/conf/lang/202-swahili.FLP.official.conf new file mode 100644 index 00000000000..d24eb1b73a4 --- /dev/null +++ b/egs/babel/s5d/conf/lang/202-swahili.FLP.official.conf @@ -0,0 +1,93 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/training +train_data_list=./conf/lists/202-swahili//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev2h_data_list=./conf/lists/202-swahili//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev10h_data_list=./conf/lists/202-swahili//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +eval_data_list=./conf/lists/202-swahili//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.ecf.xml +eval_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +evalpart1_data_list=./conf/lists/202-swahili//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist3.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/202-swahili//dev.list + ./conf/lists/202-swahili//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +shadow_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/untranscribed-training +unsup_data_list=./conf/lists/202-swahili//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/202-swahili.LLP.official.conf b/egs/babel/s5d/conf/lang/202-swahili.LLP.official.conf new file mode 100644 index 00000000000..761e6c6e0ab --- /dev/null +++ b/egs/babel/s5d/conf/lang/202-swahili.LLP.official.conf @@ -0,0 +1,99 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/training +train_data_list=./conf/lists/202-swahili//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev2h_data_list=./conf/lists/202-swahili//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev10h_data_list=./conf/lists/202-swahili//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +eval_data_list=./conf/lists/202-swahili//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.ecf.xml +eval_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +evalpart1_data_list=./conf/lists/202-swahili//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist3.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/202-swahili//dev.list + ./conf/lists/202-swahili//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +shadow_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/202-swahili//untranscribed-training.list + ./conf/lists/202-swahili//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/untranscribed-training + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/203-lao-fullLP.official.conf b/egs/babel/s5d/conf/lang/203-lao-fullLP.official.conf new file mode 100644 index 00000000000..052aa6bbc50 --- /dev/null +++ b/egs/babel/s5d/conf/lang/203-lao-fullLP.official.conf @@ -0,0 +1,101 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/203-lao/release-current/conversational/training +train_data_list=/export/babel/data/splits/Lao_Babel203/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Lao_Babel203/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Lao_Babel203/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval +eval_data_list=/export/babel/data/splits/Lao_Babel203//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/203-lao/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/203-lao/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/203-lao/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Lao_Babel203/dev.list + /export/babel/data/splits/Lao_Babel203/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/203-lao/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/203-lao-limitedLP.official.conf b/egs/babel/s5d/conf/lang/203-lao-limitedLP.official.conf new file mode 100644 index 00000000000..1e12a529361 --- /dev/null +++ b/egs/babel/s5d/conf/lang/203-lao-limitedLP.official.conf @@ -0,0 +1,110 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/203-lao/release-current/conversational/training +train_data_list=/export/babel/data/splits/Lao_Babel203/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Lao_Babel203/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Lao_Babel203/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval +eval_data_list=/export/babel/data/splits/Lao_Babel203//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/203-lao/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/203-lao/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/203-lao/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Lao_Babel203/dev.list + /export/babel/data/splits/Lao_Babel203/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/203-lao/release-current/conversational/training/ + /export/babel/data/203-lao/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Lao_Babel203/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Lao_Babel203/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/203-lao/release-current/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/204-tamil-fullLP.official.conf b/egs/babel/s5d/conf/lang/204-tamil-fullLP.official.conf new file mode 100644 index 00000000000..700ae3d5dfb --- /dev/null +++ b/egs/babel/s5d/conf/lang/204-tamil-fullLP.official.conf @@ -0,0 +1,112 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/204-tamil/release-current/conversational/training +train_data_list=/export/babel/data/splits/Tamil_Babel204/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +dev2h_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +dev10h_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Tamil_Babel204/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +eval_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +eval_nj=64 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/204-tamil/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +evalpart1_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist5.xml + ) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/204-tamil/release-current/conversational/dev/ + /export/babel/data/204-tamil/release-current/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Tamil_Babel204/dev.list + /export/babel/data/splits/Tamil_Babel204/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +shadow_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +shadow_nj=64 + + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/204-tamil/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/204-tamil-limitedLP.official.conf b/egs/babel/s5d/conf/lang/204-tamil-limitedLP.official.conf new file mode 100644 index 00000000000..7e16fcd8be5 --- /dev/null +++ b/egs/babel/s5d/conf/lang/204-tamil-limitedLP.official.conf @@ -0,0 +1,122 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/204-tamil/release-current/conversational/training +train_data_list=/export/babel/data/splits/Tamil_Babel204/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev2h_kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev10h_kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Tamil_Babel204/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +eval_kwlists=( + [eval]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +) +eval_nj=64 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/204-tamil/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist5.xml + ) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/204-tamil/release-current/conversational/dev/ + /export/babel/data/204-tamil/release-current/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Tamil_Babel204/dev.list + /export/babel/data/splits/Tamil_Babel204/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +shadow__kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +shadow_nj=64 + + +unsup_data_dir=(/export/babel/data/204-tamil/release-current/conversational/training/ + /export/babel/data/204-tamil/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Tamil_Babel204/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Tamil_Babel204/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/204-tamil/release-current/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/205-kurmanji.FLP.official.conf b/egs/babel/s5d/conf/lang/205-kurmanji.FLP.official.conf new file mode 100644 index 00000000000..74e006e2692 --- /dev/null +++ b/egs/babel/s5d/conf/lang/205-kurmanji.FLP.official.conf @@ -0,0 +1,94 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/training +train_data_list=./conf/lists/205-kurmanji//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev2h_data_list=./conf/lists/205-kurmanji//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev10h_data_list=./conf/lists/205-kurmanji//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +eval_data_list=./conf/lists/205-kurmanji//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +evalpart1_data_list=./conf/lists/205-kurmanji//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/205-kurmanji//dev.list + ./conf/lists/205-kurmanji//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/untranscribed-training +unsup_data_list=./conf/lists/205-kurmanji//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/205-kurmanji.LLP.official.conf b/egs/babel/s5d/conf/lang/205-kurmanji.LLP.official.conf new file mode 100644 index 00000000000..fc5fdd4aa52 --- /dev/null +++ b/egs/babel/s5d/conf/lang/205-kurmanji.LLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/training +train_data_list=./conf/lists/205-kurmanji//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev2h_data_list=./conf/lists/205-kurmanji//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev10h_data_list=./conf/lists/205-kurmanji//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +eval_data_list=./conf/lists/205-kurmanji//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +evalpart1_data_list=./conf/lists/205-kurmanji//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/205-kurmanji//dev.list + ./conf/lists/205-kurmanji//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/205-kurmanji//untranscribed-training.list + ./conf/lists/205-kurmanji//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/untranscribed-training + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/206-zulu-fullLP.official.conf b/egs/babel/s5d/conf/lang/206-zulu-fullLP.official.conf new file mode 100644 index 00000000000..675dc83780d --- /dev/null +++ b/egs/babel/s5d/conf/lang/206-zulu-fullLP.official.conf @@ -0,0 +1,129 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training +train_data_list=/export/babel/data/splits/Zulu_Babel206/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list +dev2h_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/db-dev-jhuseg-v7-utt.dat +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Zulu_Babel206//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/206-zulu/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.stm +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1.ecf.xml +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/206-zulu/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_206/conversational/eval/ + ) +shadow_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/206-shadow-v0-cleaned-utt.dat +shadow_data_list=( + /export/babel/data/splits/Zulu_Babel206/dev.list + /export/babel/data/splits/Zulu_Babel206/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + + ) +shadow_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +#Zulu seems to need much larger LM Weights +lmwt_plp_extra_opts=( --min-lmwt 10 --max-lmwt 17 ) +lmwt_bnf_extra_opts=( --min-lmwt 13 --max-lmwt 18 ) +lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 17 ) + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " +phoneme_mapping="k_>=g_<; 3=e; R=l; o=O; b_<=b; t_>=th;" + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/206-zulu-limitedLP.official.conf b/egs/babel/s5d/conf/lang/206-zulu-limitedLP.official.conf new file mode 100644 index 00000000000..caaf8cdc023 --- /dev/null +++ b/egs/babel/s5d/conf/lang/206-zulu-limitedLP.official.conf @@ -0,0 +1,126 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Zulu_Babel206/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list +dev2h_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/db-dev-jhuseg-v7-utt.dat +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Zulu_Babel206//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/206-zulu/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.stm +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1.ecf.xml +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist4.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/206-zulu/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_206/conversational/eval/ + ) +shadow_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/206-shadow-v0-cleaned-utt.dat +shadow_data_list=( + /export/babel/data/splits/Zulu_Babel206/dev.list + /export/babel/data/splits/Zulu_Babel206/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + + +unsup_data_dir=(/export/babel/data/206-zulu/release-current/conversational/training/ + /export/babel/data/206-zulu/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Zulu_Babel206/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Zulu_Babel206/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +#Zulu seems to need larger LM Weights +lmwt_plp_extra_opts=( --min-lmwt 10 --max-lmwt 17 ) +lmwt_bnf_extra_opts=( --min-lmwt 17 --max-lmwt 24 ) +lmwt_dnn_extra_opts=( --min-lmwt 12 --max-lmwt 17 ) + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " +phoneme_mapping="k_>=g_<; 3=e; R=l; o=O; b_<=b; t_>=th;" + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true + +proxy_phone_beam=-1 +proxy_phone_nbest=-1 +proxy_beam=5 +proxy_nbest=500 +proxy_cutoff=0 + diff --git a/egs/babel/s5d/conf/lang/207-tokpisin.FLP.official.conf b/egs/babel/s5d/conf/lang/207-tokpisin.FLP.official.conf new file mode 100644 index 00000000000..0653c16fd8f --- /dev/null +++ b/egs/babel/s5d/conf/lang/207-tokpisin.FLP.official.conf @@ -0,0 +1,93 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/training +train_data_list=./conf/lists/207-tokpisin//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev2h_data_list=./conf/lists/207-tokpisin//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev2h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev10h_data_list=./conf/lists/207-tokpisin//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev10h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +eval_data_list=./conf/lists/207-tokpisin//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.ecf.xml +eval_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +evalpart1_data_list=./conf/lists/207-tokpisin//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist4.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist5.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/207-tokpisin//dev.list + ./conf/lists/207-tokpisin//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +shadow_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/untranscribed-training +unsup_data_list=./conf/lists/207-tokpisin//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/207-tokpisin.LLP.official.conf b/egs/babel/s5d/conf/lang/207-tokpisin.LLP.official.conf new file mode 100644 index 00000000000..d48f3196686 --- /dev/null +++ b/egs/babel/s5d/conf/lang/207-tokpisin.LLP.official.conf @@ -0,0 +1,99 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/training +train_data_list=./conf/lists/207-tokpisin//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev2h_data_list=./conf/lists/207-tokpisin//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev2h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev10h_data_list=./conf/lists/207-tokpisin//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev10h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +eval_data_list=./conf/lists/207-tokpisin//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.ecf.xml +eval_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +evalpart1_data_list=./conf/lists/207-tokpisin//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist4.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist5.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/207-tokpisin//dev.list + ./conf/lists/207-tokpisin//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +shadow_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/207-tokpisin//untranscribed-training.list + ./conf/lists/207-tokpisin//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/untranscribed-training + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/301-cebuano.FLP.official.conf b/egs/babel/s5d/conf/lang/301-cebuano.FLP.official.conf new file mode 100644 index 00000000000..4e552e919f8 --- /dev/null +++ b/egs/babel/s5d/conf/lang/301-cebuano.FLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/training +train_data_list=./conf/lists/301-cebuano//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev2h_data_list=./conf/lists/301-cebuano//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev10h_data_list=./conf/lists/301-cebuano//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +eval_data_list=./conf/lists/301-cebuano//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +evalpart1_data_list=./conf/lists/301-cebuano//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/301-cebuano//dev.list + ./conf/lists/301-cebuano//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/untranscribed-training +unsup_data_list=./conf/lists/301-cebuano//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/301-cebuano.LLP.official.conf b/egs/babel/s5d/conf/lang/301-cebuano.LLP.official.conf new file mode 100644 index 00000000000..6ae02781972 --- /dev/null +++ b/egs/babel/s5d/conf/lang/301-cebuano.LLP.official.conf @@ -0,0 +1,106 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/training +train_data_list=./conf/lists/301-cebuano//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev2h_data_list=./conf/lists/301-cebuano//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev10h_data_list=./conf/lists/301-cebuano//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +eval_data_list=./conf/lists/301-cebuano//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +evalpart1_data_list=./conf/lists/301-cebuano//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/301-cebuano//dev.list + ./conf/lists/301-cebuano//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/301-cebuano//untranscribed-training.list + ./conf/lists/301-cebuano//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/untranscribed-training + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/302-kazakh.FLP.official.conf b/egs/babel/s5d/conf/lang/302-kazakh.FLP.official.conf new file mode 100644 index 00000000000..d3a02dc1075 --- /dev/null +++ b/egs/babel/s5d/conf/lang/302-kazakh.FLP.official.conf @@ -0,0 +1,101 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/training +train_data_list=./conf/lists/302-kazakh//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev2h_data_list=./conf/lists/302-kazakh//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev10h_data_list=./conf/lists/302-kazakh//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +eval_data_list=./conf/lists/302-kazakh//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +evalpart1_data_list=./conf/lists/302-kazakh//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/302-kazakh//dev.list + ./conf/lists/302-kazakh//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/untranscribed-training +unsup_data_list=./conf/lists/302-kazakh//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/302-kazakh.LLP.official.conf b/egs/babel/s5d/conf/lang/302-kazakh.LLP.official.conf new file mode 100644 index 00000000000..2049c820695 --- /dev/null +++ b/egs/babel/s5d/conf/lang/302-kazakh.LLP.official.conf @@ -0,0 +1,107 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/training +train_data_list=./conf/lists/302-kazakh//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev2h_data_list=./conf/lists/302-kazakh//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev10h_data_list=./conf/lists/302-kazakh//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +eval_data_list=./conf/lists/302-kazakh//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +evalpart1_data_list=./conf/lists/302-kazakh//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/302-kazakh//dev.list + ./conf/lists/302-kazakh//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/302-kazakh//untranscribed-training.list + ./conf/lists/302-kazakh//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/untranscribed-training + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/303-telugu.FLP.official.conf b/egs/babel/s5d/conf/lang/303-telugu.FLP.official.conf new file mode 100644 index 00000000000..5ba3f8a1606 --- /dev/null +++ b/egs/babel/s5d/conf/lang/303-telugu.FLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/training +train_data_list=./conf/lists/303-telugu//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev2h_data_list=./conf/lists/303-telugu//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev10h_data_list=./conf/lists/303-telugu//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +eval_data_list=./conf/lists/303-telugu//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +evalpart1_data_list=./conf/lists/303-telugu//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/303-telugu//dev.list + ./conf/lists/303-telugu//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/untranscribed-training +unsup_data_list=./conf/lists/303-telugu//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/303-telugu.LLP.official.conf b/egs/babel/s5d/conf/lang/303-telugu.LLP.official.conf new file mode 100644 index 00000000000..b916b5b27e6 --- /dev/null +++ b/egs/babel/s5d/conf/lang/303-telugu.LLP.official.conf @@ -0,0 +1,107 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/training +train_data_list=./conf/lists/303-telugu//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev2h_data_list=./conf/lists/303-telugu//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev10h_data_list=./conf/lists/303-telugu//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +eval_data_list=./conf/lists/303-telugu//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +evalpart1_data_list=./conf/lists/303-telugu//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/303-telugu//dev.list + ./conf/lists/303-telugu//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/303-telugu//untranscribed-training.list + ./conf/lists/303-telugu//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/untranscribed-training + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/304-lithuanian.FLP.official.conf b/egs/babel/s5d/conf/lang/304-lithuanian.FLP.official.conf new file mode 100644 index 00000000000..8459ca096a0 --- /dev/null +++ b/egs/babel/s5d/conf/lang/304-lithuanian.FLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/training +train_data_list=./conf/lists/304-lithuanian//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev2h_data_list=./conf/lists/304-lithuanian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev10h_data_list=./conf/lists/304-lithuanian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +eval_data_list=./conf/lists/304-lithuanian//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +evalpart1_data_list=./conf/lists/304-lithuanian//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/304-lithuanian//dev.list + ./conf/lists/304-lithuanian//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/untranscribed-training +unsup_data_list=./conf/lists/304-lithuanian//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/304-lithuanian.LLP.official.conf b/egs/babel/s5d/conf/lang/304-lithuanian.LLP.official.conf new file mode 100644 index 00000000000..a571161390e --- /dev/null +++ b/egs/babel/s5d/conf/lang/304-lithuanian.LLP.official.conf @@ -0,0 +1,106 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/training +train_data_list=./conf/lists/304-lithuanian//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev2h_data_list=./conf/lists/304-lithuanian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev10h_data_list=./conf/lists/304-lithuanian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +eval_data_list=./conf/lists/304-lithuanian//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +evalpart1_data_list=./conf/lists/304-lithuanian//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/304-lithuanian//dev.list + ./conf/lists/304-lithuanian//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/304-lithuanian//untranscribed-training.list + ./conf/lists/304-lithuanian//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/untranscribed-training + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf b/egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf new file mode 100644 index 00000000000..233cd81fffb --- /dev/null +++ b/egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf @@ -0,0 +1,45 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/training +train_data_list=./conf/lists/305-guarani//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev2h_data_list=./conf/lists/305-guarani//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev10h_data_list=./conf/lists/305-guarani//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/untranscribed-training +unsup_data_list=./conf/lists/305-guarani//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/305-guarani.LLP.official.conf b/egs/babel/s5d/conf/lang/305-guarani.LLP.official.conf new file mode 100644 index 00000000000..c0d9cc97524 --- /dev/null +++ b/egs/babel/s5d/conf/lang/305-guarani.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/training +train_data_list=./conf/lists/305-guarani//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev2h_data_list=./conf/lists/305-guarani//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev10h_data_list=./conf/lists/305-guarani//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/305-guarani//untranscribed-training.list + ./conf/lists/305-guarani//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/untranscribed-training + /export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf b/egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf new file mode 100644 index 00000000000..87f82da6b49 --- /dev/null +++ b/egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf @@ -0,0 +1,45 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/training +train_data_list=./conf/lists/306-igbo//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev2h_data_list=./conf/lists/306-igbo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev10h_data_list=./conf/lists/306-igbo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/untranscribed-training +unsup_data_list=./conf/lists/306-igbo//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/306-igbo.LLP.official.conf b/egs/babel/s5d/conf/lang/306-igbo.LLP.official.conf new file mode 100644 index 00000000000..70642537caf --- /dev/null +++ b/egs/babel/s5d/conf/lang/306-igbo.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/training +train_data_list=./conf/lists/306-igbo//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev2h_data_list=./conf/lists/306-igbo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev10h_data_list=./conf/lists/306-igbo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/306-igbo//untranscribed-training.list + ./conf/lists/306-igbo//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/untranscribed-training + /export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf b/egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf new file mode 100644 index 00000000000..9668bd14e6b --- /dev/null +++ b/egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf @@ -0,0 +1,46 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/training +train_data_list=./conf/lists/307-amharic//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev2h_data_list=./conf/lists/307-amharic//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev10h_data_list=./conf/lists/307-amharic//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/untranscribed-training +unsup_data_list=./conf/lists/307-amharic//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/307-amharic.LLP.official.conf b/egs/babel/s5d/conf/lang/307-amharic.LLP.official.conf new file mode 100644 index 00000000000..3c49d4356ce --- /dev/null +++ b/egs/babel/s5d/conf/lang/307-amharic.LLP.official.conf @@ -0,0 +1,52 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/training +train_data_list=./conf/lists/307-amharic//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev2h_data_list=./conf/lists/307-amharic//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev10h_data_list=./conf/lists/307-amharic//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/307-amharic//untranscribed-training.list + ./conf/lists/307-amharic//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/untranscribed-training + /export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf b/egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf new file mode 100644 index 00000000000..902ded164d2 --- /dev/null +++ b/egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf @@ -0,0 +1,46 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/training +train_data_list=./conf/lists/401-mongolian//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev2h_data_list=./conf/lists/401-mongolian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev10h_data_list=./conf/lists/401-mongolian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/untranscribed-training +unsup_data_list=./conf/lists/401-mongolian//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/401-mongolian.LLP.official.conf b/egs/babel/s5d/conf/lang/401-mongolian.LLP.official.conf new file mode 100644 index 00000000000..e3bd46c7e68 --- /dev/null +++ b/egs/babel/s5d/conf/lang/401-mongolian.LLP.official.conf @@ -0,0 +1,52 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/training +train_data_list=./conf/lists/401-mongolian//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev2h_data_list=./conf/lists/401-mongolian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev10h_data_list=./conf/lists/401-mongolian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/401-mongolian//untranscribed-training.list + ./conf/lists/401-mongolian//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/untranscribed-training + /export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf b/egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf new file mode 100644 index 00000000000..0f176dc9396 --- /dev/null +++ b/egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf @@ -0,0 +1,47 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/training +train_data_list=./conf/lists/402-javanese//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev2h_data_list=./conf/lists/402-javanese//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev10h_data_list=./conf/lists/402-javanese//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist3.xml + +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/untranscribed-training +unsup_data_list=./conf/lists/402-javanese//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/402-javanese.LLP.official.conf b/egs/babel/s5d/conf/lang/402-javanese.LLP.official.conf new file mode 100644 index 00000000000..99438159ae6 --- /dev/null +++ b/egs/babel/s5d/conf/lang/402-javanese.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/training +train_data_list=./conf/lists/402-javanese//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev2h_data_list=./conf/lists/402-javanese//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev10h_data_list=./conf/lists/402-javanese//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/402-javanese//untranscribed-training.list + ./conf/lists/402-javanese//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/untranscribed-training + /export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf b/egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf new file mode 100644 index 00000000000..6dc95d74304 --- /dev/null +++ b/egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf @@ -0,0 +1,45 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/training +train_data_list=./conf/lists/403-dholuo//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev2h_data_list=./conf/lists/403-dholuo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev10h_data_list=./conf/lists/403-dholuo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/untranscribed-training +unsup_data_list=./conf/lists/403-dholuo//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/403-dholuo.LLP.official.conf b/egs/babel/s5d/conf/lang/403-dholuo.LLP.official.conf new file mode 100644 index 00000000000..827a1ca5ed0 --- /dev/null +++ b/egs/babel/s5d/conf/lang/403-dholuo.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/training +train_data_list=./conf/lists/403-dholuo//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev2h_data_list=./conf/lists/403-dholuo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev10h_data_list=./conf/lists/403-dholuo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/403-dholuo//untranscribed-training.list + ./conf/lists/403-dholuo//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/untranscribed-training + /export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lists/101-cantonese/dev.list b/egs/babel/s5d/conf/lists/101-cantonese/dev.list new file mode 100644 index 00000000000..581862a9701 --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/dev.list @@ -0,0 +1,120 @@ +BABEL_BP_101_10470_20111118_172644_inLine +BABEL_BP_101_10470_20111118_172644_outLine +BABEL_BP_101_10713_20111024_220917_inLine +BABEL_BP_101_10713_20111024_220917_outLine +BABEL_BP_101_10733_20111021_141006_inLine +BABEL_BP_101_10733_20111021_141006_outLine +BABEL_BP_101_11982_20111027_140138_inLine +BABEL_BP_101_11982_20111027_140138_outLine +BABEL_BP_101_15916_20111129_174019_inLine +BABEL_BP_101_15916_20111129_174019_outLine +BABEL_BP_101_16346_20111117_212011_inLine +BABEL_BP_101_16346_20111117_212011_outLine +BABEL_BP_101_17983_20111027_140721_inLine +BABEL_BP_101_17983_20111027_140721_outLine +BABEL_BP_101_19656_20111103_235107_inLine +BABEL_BP_101_19656_20111103_235107_outLine +BABEL_BP_101_20471_20111102_141335_inLine +BABEL_BP_101_20471_20111102_141335_outLine +BABEL_BP_101_20741_20111018_195422_inLine +BABEL_BP_101_20741_20111018_195422_outLine +BABEL_BP_101_24833_20111031_142944_inLine +BABEL_BP_101_24833_20111031_142944_outLine +BABEL_BP_101_29290_20111031_003657_inLine +BABEL_BP_101_29290_20111031_003657_outLine +BABEL_BP_101_29589_20111126_175320_inLine +BABEL_BP_101_29589_20111126_175320_outLine +BABEL_BP_101_36722_20111104_030316_inLine +BABEL_BP_101_36722_20111104_030316_outLine +BABEL_BP_101_37784_20111208_190128_inLine +BABEL_BP_101_37784_20111208_190128_outLine +BABEL_BP_101_39963_20111026_150832_inLine +BABEL_BP_101_39963_20111026_150832_outLine +BABEL_BP_101_41146_20111026_153646_inLine +BABEL_BP_101_41146_20111026_153646_outLine +BABEL_BP_101_41541_20111206_172913_inLine +BABEL_BP_101_41541_20111206_172913_outLine +BABEL_BP_101_43306_20111103_161140_inLine +BABEL_BP_101_43306_20111103_161140_outLine +BABEL_BP_101_49582_20111027_141449_inLine +BABEL_BP_101_49582_20111027_141449_outLine +BABEL_BP_101_50718_20111020_135643_inLine +BABEL_BP_101_50718_20111020_135643_outLine +BABEL_BP_101_50798_20111026_223324_inLine +BABEL_BP_101_50798_20111026_223324_outLine +BABEL_BP_101_50883_20111102_204642_inLine +BABEL_BP_101_50883_20111102_204642_outLine +BABEL_BP_101_52335_20111203_155425_inLine +BABEL_BP_101_52335_20111203_155425_outLine +BABEL_BP_101_53994_20111202_163841_inLine +BABEL_BP_101_53994_20111202_163841_outLine +BABEL_BP_101_54339_20111124_170134_inLine +BABEL_BP_101_54339_20111124_170134_outLine +BABEL_BP_101_54621_20111125_183536_inLine +BABEL_BP_101_54621_20111125_183536_outLine +BABEL_BP_101_57724_20111027_181810_inLine +BABEL_BP_101_57724_20111027_181810_outLine +BABEL_BP_101_59175_20111027_151958_inLine +BABEL_BP_101_59175_20111027_151958_outLine +BABEL_BP_101_60193_20111102_144921_inLine +BABEL_BP_101_60193_20111102_144921_outLine +BABEL_BP_101_63114_20111123_012206_inLine +BABEL_BP_101_63114_20111123_012206_outLine +BABEL_BP_101_64351_20111124_153905_inLine +BABEL_BP_101_64351_20111124_153905_outLine +BABEL_BP_101_67411_20111030_182522_inLine +BABEL_BP_101_67411_20111030_182522_outLine +BABEL_BP_101_67750_20111025_140818_inLine +BABEL_BP_101_67750_20111025_140818_outLine +BABEL_BP_101_70285_20111026_191056_inLine +BABEL_BP_101_70285_20111026_191056_outLine +BABEL_BP_101_70625_20111129_171555_inLine +BABEL_BP_101_70625_20111129_171555_outLine +BABEL_BP_101_76192_20111102_164411_inLine +BABEL_BP_101_76192_20111102_164411_outLine +BABEL_BP_101_77137_20111125_163632_inLine +BABEL_BP_101_77137_20111125_163632_outLine +BABEL_BP_101_77591_20111114_194820_inLine +BABEL_BP_101_77591_20111114_194820_outLine +BABEL_BP_101_80150_20111117_003728_inLine +BABEL_BP_101_80150_20111117_003728_outLine +BABEL_BP_101_81119_20111118_140013_inLine +BABEL_BP_101_81119_20111118_140013_outLine +BABEL_BP_101_81717_20111118_145402_inLine +BABEL_BP_101_81717_20111118_145402_outLine +BABEL_BP_101_83531_20111104_002551_inLine +BABEL_BP_101_83531_20111104_002551_outLine +BABEL_BP_101_85573_20111019_141646_inLine +BABEL_BP_101_85573_20111019_141646_outLine +BABEL_BP_101_87539_20111201_130219_inLine +BABEL_BP_101_87539_20111201_130219_outLine +BABEL_BP_101_87607_20111125_162304_inLine +BABEL_BP_101_87607_20111125_162304_outLine +BABEL_BP_101_90082_20111127_153333_inLine +BABEL_BP_101_90082_20111127_153333_outLine +BABEL_BP_101_90559_20111203_144741_inLine +BABEL_BP_101_90559_20111203_144741_outLine +BABEL_BP_101_91723_20111104_231255_inLine +BABEL_BP_101_91723_20111104_231255_outLine +BABEL_BP_101_92602_20111029_191642_inLine +BABEL_BP_101_92602_20111029_191642_outLine +BABEL_BP_101_94235_20111119_200950_inLine +BABEL_BP_101_94235_20111119_200950_outLine +BABEL_BP_101_95120_20111120_194049_inLine +BABEL_BP_101_95120_20111120_194049_outLine +BABEL_BP_101_95121_20111204_185315_inLine +BABEL_BP_101_95121_20111204_185315_outLine +BABEL_BP_101_95350_20111018_202556_inLine +BABEL_BP_101_95350_20111018_202556_outLine +BABEL_BP_101_95514_20111203_141811_inLine +BABEL_BP_101_95514_20111203_141811_outLine +BABEL_BP_101_95637_20111024_141608_inLine +BABEL_BP_101_95637_20111024_141608_outLine +BABEL_BP_101_95736_20111102_184136_inLine +BABEL_BP_101_95736_20111102_184136_outLine +BABEL_BP_101_97518_20111130_230103_inLine +BABEL_BP_101_97518_20111130_230103_outLine +BABEL_BP_101_98402_20111203_194645_inLine +BABEL_BP_101_98402_20111203_194645_outLine +BABEL_BP_101_98675_20111117_190458_inLine +BABEL_BP_101_98675_20111117_190458_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/eval.list b/egs/babel/s5d/conf/lists/101-cantonese/eval.list new file mode 100644 index 00000000000..d2301ae3d82 --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/eval.list @@ -0,0 +1,220 @@ +BABEL_BP_101_11267_20111202_163633_inLine +BABEL_BP_101_11267_20111202_163633_outLine +BABEL_BP_101_11311_20111017_201941_inLine +BABEL_BP_101_11311_20111017_201941_outLine +BABEL_BP_101_12535_20111203_130510_inLine +BABEL_BP_101_12535_20111203_130510_outLine +BABEL_BP_101_13065_20111118_192048_inLine +BABEL_BP_101_13065_20111118_192048_outLine +BABEL_BP_101_13476_20111121_181636_inLine +BABEL_BP_101_13476_20111121_181636_outLine +BABEL_BP_101_14707_20111122_145307_inLine +BABEL_BP_101_14836_20111124_161142_inLine +BABEL_BP_101_14836_20111124_161142_outLine +BABEL_BP_101_14836_20111124_162649_inLine +BABEL_BP_101_14836_20111124_162649_outLine +BABEL_BP_101_15146_20111017_171639_inLine +BABEL_BP_101_15146_20111017_171639_outLine +BABEL_BP_101_15859_20111129_022308_inLine +BABEL_BP_101_15859_20111129_022308_outLine +BABEL_BP_101_16299_20111029_221723_inLine +BABEL_BP_101_16299_20111029_221723_outLine +BABEL_BP_101_16646_20111116_212752_inLine +BABEL_BP_101_16646_20111116_212752_outLine +BABEL_BP_101_17900_20111025_234518_inLine +BABEL_BP_101_17900_20111025_234518_outLine +BABEL_BP_101_19063_20111117_154053_inLine +BABEL_BP_101_19619_20111027_130540_inLine +BABEL_BP_101_19619_20111027_130540_outLine +BABEL_BP_101_20347_20111115_190811_inLine +BABEL_BP_101_20347_20111115_190811_outLine +BABEL_BP_101_21050_20111127_140516_inLine +BABEL_BP_101_21052_20111117_134126_inLine +BABEL_BP_101_21052_20111117_134126_outLine +BABEL_BP_101_22351_20111117_141906_inLine +BABEL_BP_101_22351_20111117_141906_outLine +BABEL_BP_101_22351_20111117_142946_inLine +BABEL_BP_101_22351_20111117_142946_outLine +BABEL_BP_101_24589_20111122_200522_inLine +BABEL_BP_101_25106_20111103_002754_inLine +BABEL_BP_101_25106_20111103_002754_outLine +BABEL_BP_101_26598_20111117_165818_inLine +BABEL_BP_101_26598_20111117_165818_outLine +BABEL_BP_101_27724_20111128_203411_inLine +BABEL_BP_101_27724_20111128_203411_outLine +BABEL_BP_101_28990_20111120_210441_inLine +BABEL_BP_101_28990_20111120_210441_outLine +BABEL_BP_101_30642_20111116_150618_inLine +BABEL_BP_101_30642_20111116_150618_outLine +BABEL_BP_101_32011_20111201_004544_inLine +BABEL_BP_101_32011_20111201_004544_outLine +BABEL_BP_101_32045_20111104_024613_inLine +BABEL_BP_101_32045_20111104_024613_outLine +BABEL_BP_101_32132_20111119_185103_inLine +BABEL_BP_101_32132_20111119_185103_outLine +BABEL_BP_101_33540_20111027_144812_inLine +BABEL_BP_101_33540_20111027_144812_outLine +BABEL_BP_101_35074_20111203_144945_inLine +BABEL_BP_101_35074_20111203_144945_outLine +BABEL_BP_101_35612_20111110_210341_inLine +BABEL_BP_101_35612_20111110_210341_outLine +BABEL_BP_101_36143_20111029_193157_inLine +BABEL_BP_101_36143_20111029_193157_outLine +BABEL_BP_101_36155_20111120_144557_inLine +BABEL_BP_101_36155_20111120_144557_outLine +BABEL_BP_101_36155_20111120_150859_inLine +BABEL_BP_101_36155_20111120_150859_outLine +BABEL_BP_101_36868_20111117_210558_inLine +BABEL_BP_101_37348_20111130_223024_inLine +BABEL_BP_101_37348_20111130_223024_outLine +BABEL_BP_101_38635_20111120_180033_inLine +BABEL_BP_101_38635_20111120_180033_outLine +BABEL_BP_101_38640_20111028_200532_inLine +BABEL_BP_101_38640_20111028_200532_outLine +BABEL_BP_101_38640_20111028_202051_inLine +BABEL_BP_101_38640_20111028_202051_outLine +BABEL_BP_101_39114_20111128_134323_inLine +BABEL_BP_101_39114_20111128_134323_outLine +BABEL_BP_101_41797_20111117_181049_inLine +BABEL_BP_101_41797_20111117_181049_outLine +BABEL_BP_101_42768_20111115_173157_inLine +BABEL_BP_101_42768_20111115_173157_outLine +BABEL_BP_101_42853_20111014_121048_inLine +BABEL_BP_101_42853_20111014_121048_outLine +BABEL_BP_101_43317_20111115_183049_inLine +BABEL_BP_101_43317_20111115_183049_outLine +BABEL_BP_101_43991_20111121_191522_inLine +BABEL_BP_101_43991_20111121_191522_outLine +BABEL_BP_101_46409_20111103_190907_inLine +BABEL_BP_101_46409_20111103_190907_outLine +BABEL_BP_101_46464_20111119_154431_inLine +BABEL_BP_101_46464_20111119_154431_outLine +BABEL_BP_101_46521_20111027_144539_inLine +BABEL_BP_101_46521_20111027_144539_outLine +BABEL_BP_101_46950_20111102_231112_inLine +BABEL_BP_101_46950_20111102_231112_outLine +BABEL_BP_101_47185_20111116_191402_inLine +BABEL_BP_101_47185_20111116_191402_outLine +BABEL_BP_101_48536_20111028_200823_inLine +BABEL_BP_101_48536_20111028_200823_outLine +BABEL_BP_101_48645_20111121_182116_inLine +BABEL_BP_101_48645_20111121_182116_outLine +BABEL_BP_101_48645_20111121_183054_inLine +BABEL_BP_101_48645_20111121_183054_outLine +BABEL_BP_101_49042_20111030_233559_inLine +BABEL_BP_101_49042_20111030_233559_outLine +BABEL_BP_101_49173_20111128_203628_inLine +BABEL_BP_101_49173_20111128_203628_outLine +BABEL_BP_101_49173_20111128_204848_inLine +BABEL_BP_101_49173_20111128_204848_outLine +BABEL_BP_101_49239_20111122_153732_inLine +BABEL_BP_101_49239_20111122_153732_outLine +BABEL_BP_101_49552_20111114_230835_inLine +BABEL_BP_101_49552_20111114_230835_outLine +BABEL_BP_101_50555_20111120_155930_inLine +BABEL_BP_101_50555_20111120_155930_outLine +BABEL_BP_101_51042_20111204_200010_inLine +BABEL_BP_101_51042_20111204_200010_outLine +BABEL_BP_101_53278_20111122_170608_inLine +BABEL_BP_101_53463_20111120_193926_inLine +BABEL_BP_101_53463_20111120_193926_outLine +BABEL_BP_101_53982_20111122_191730_inLine +BABEL_BP_101_57422_20111122_180847_inLine +BABEL_BP_101_57551_20111019_214945_inLine +BABEL_BP_101_57551_20111019_214945_outLine +BABEL_BP_101_59169_20111122_141419_inLine +BABEL_BP_101_59671_20111027_145636_inLine +BABEL_BP_101_59671_20111027_145636_outLine +BABEL_BP_101_59891_20111124_143157_inLine +BABEL_BP_101_60064_20111203_191808_inLine +BABEL_BP_101_60064_20111203_191808_outLine +BABEL_BP_101_60277_20111126_194551_inLine +BABEL_BP_101_60277_20111126_194551_outLine +BABEL_BP_101_60277_20111126_200232_inLine +BABEL_BP_101_60277_20111126_200232_outLine +BABEL_BP_101_61203_20111030_130830_inLine +BABEL_BP_101_61203_20111030_130830_outLine +BABEL_BP_101_61906_20111117_202948_inLine +BABEL_BP_101_61988_20111028_001218_inLine +BABEL_BP_101_61988_20111028_001219_outLine +BABEL_BP_101_64946_20111201_195421_inLine +BABEL_BP_101_64946_20111201_195421_outLine +BABEL_BP_101_65601_20111103_222906_inLine +BABEL_BP_101_65601_20111103_222906_outLine +BABEL_BP_101_66709_20111119_145638_inLine +BABEL_BP_101_66709_20111119_145638_outLine +BABEL_BP_101_67304_20111129_183928_inLine +BABEL_BP_101_67304_20111129_183928_outLine +BABEL_BP_101_68861_20111030_183357_inLine +BABEL_BP_101_68861_20111030_183357_outLine +BABEL_BP_101_72647_20111128_150245_inLine +BABEL_BP_101_72746_20111121_191752_inLine +BABEL_BP_101_73782_20111126_201918_inLine +BABEL_BP_101_73782_20111126_201918_outLine +BABEL_BP_101_74295_20111121_152402_inLine +BABEL_BP_101_74295_20111121_152402_outLine +BABEL_BP_101_74607_20111022_195251_inLine +BABEL_BP_101_74607_20111022_195251_outLine +BABEL_BP_101_74986_20111116_153007_inLine +BABEL_BP_101_74986_20111116_153007_outLine +BABEL_BP_101_75151_20111203_163659_inLine +BABEL_BP_101_75151_20111203_163659_outLine +BABEL_BP_101_75799_20111122_163729_inLine +BABEL_BP_101_75932_20111111_151802_inLine +BABEL_BP_101_75932_20111111_151802_outLine +BABEL_BP_101_76451_20111026_184920_inLine +BABEL_BP_101_76451_20111026_184920_outLine +BABEL_BP_101_76451_20111026_190345_inLine +BABEL_BP_101_76451_20111026_190345_outLine +BABEL_BP_101_76763_20111017_191052_inLine +BABEL_BP_101_76763_20111017_191052_outLine +BABEL_BP_101_76925_20111103_205340_inLine +BABEL_BP_101_76925_20111103_205340_outLine +BABEL_BP_101_77465_20111120_175215_inLine +BABEL_BP_101_77465_20111120_175215_outLine +BABEL_BP_101_78046_20111125_134944_inLine +BABEL_BP_101_78046_20111125_134944_outLine +BABEL_BP_101_79619_20111119_194350_inLine +BABEL_BP_101_79619_20111119_194350_outLine +BABEL_BP_101_79860_20111102_155320_inLine +BABEL_BP_101_79860_20111102_155320_outLine +BABEL_BP_101_80874_20111125_172008_inLine +BABEL_BP_101_80874_20111125_172008_outLine +BABEL_BP_101_81053_20111114_221753_inLine +BABEL_BP_101_81053_20111114_221753_outLine +BABEL_BP_101_81261_20111104_210152_inLine +BABEL_BP_101_81261_20111104_210152_outLine +BABEL_BP_101_81261_20111104_211429_inLine +BABEL_BP_101_81261_20111104_211429_outLine +BABEL_BP_101_81583_20111022_221726_inLine +BABEL_BP_101_81583_20111022_221726_outLine +BABEL_BP_101_81642_20111124_172127_inLine +BABEL_BP_101_81642_20111124_172127_outLine +BABEL_BP_101_83053_20111118_151047_inLine +BABEL_BP_101_83053_20111118_151047_outLine +BABEL_BP_101_83700_20111121_152308_inLine +BABEL_BP_101_83700_20111121_152308_outLine +BABEL_BP_101_83713_20111104_193756_inLine +BABEL_BP_101_83713_20111104_193756_outLine +BABEL_BP_101_86014_20111120_171648_inLine +BABEL_BP_101_88982_20111126_152512_inLine +BABEL_BP_101_88982_20111126_152512_outLine +BABEL_BP_101_89301_20111128_210850_inLine +BABEL_BP_101_89301_20111128_210850_outLine +BABEL_BP_101_89993_20111125_174226_inLine +BABEL_BP_101_89993_20111125_174226_outLine +BABEL_BP_101_90817_20111118_004749_inLine +BABEL_BP_101_90817_20111118_004749_outLine +BABEL_BP_101_91677_20111122_233646_inLine +BABEL_BP_101_91677_20111122_233646_outLine +BABEL_BP_101_91703_20111116_145954_inLine +BABEL_BP_101_91703_20111116_145954_outLine +BABEL_BP_101_94162_20111118_160545_inLine +BABEL_BP_101_94162_20111118_160545_outLine +BABEL_BP_101_95861_20111028_214238_inLine +BABEL_BP_101_95861_20111028_214238_outLine +BABEL_BP_101_96108_20111122_132644_inLine +BABEL_BP_101_97254_20111117_145052_inLine +BABEL_BP_101_97254_20111117_145052_outLine +BABEL_BP_101_97486_20111104_200750_inLine +BABEL_BP_101_97486_20111104_200750_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/evalpart1.list b/egs/babel/s5d/conf/lists/101-cantonese/evalpart1.list new file mode 100644 index 00000000000..1980d99ef3e --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/evalpart1.list @@ -0,0 +1,63 @@ +BABEL_BP_101_15859_20111129_022308_inLine +BABEL_BP_101_15859_20111129_022308_outLine +BABEL_BP_101_17900_20111025_234518_inLine +BABEL_BP_101_17900_20111025_234518_outLine +BABEL_BP_101_20347_20111115_190811_inLine +BABEL_BP_101_20347_20111115_190811_outLine +BABEL_BP_101_33540_20111027_144812_inLine +BABEL_BP_101_33540_20111027_144812_outLine +BABEL_BP_101_36143_20111029_193157_inLine +BABEL_BP_101_36143_20111029_193157_outLine +BABEL_BP_101_38635_20111120_180033_inLine +BABEL_BP_101_38635_20111120_180033_outLine +BABEL_BP_101_39114_20111128_134323_outLine +BABEL_BP_101_42768_20111115_173157_inLine +BABEL_BP_101_42768_20111115_173157_outLine +BABEL_BP_101_42853_20111014_121048_inLine +BABEL_BP_101_42853_20111014_121048_outLine +BABEL_BP_101_43317_20111115_183049_inLine +BABEL_BP_101_43317_20111115_183049_outLine +BABEL_BP_101_43991_20111121_191522_inLine +BABEL_BP_101_43991_20111121_191522_outLine +BABEL_BP_101_46464_20111119_154431_inLine +BABEL_BP_101_46464_20111119_154431_outLine +BABEL_BP_101_47185_20111116_191402_inLine +BABEL_BP_101_47185_20111116_191402_outLine +BABEL_BP_101_48536_20111028_200823_inLine +BABEL_BP_101_48536_20111028_200823_outLine +BABEL_BP_101_49552_20111114_230835_inLine +BABEL_BP_101_49552_20111114_230835_outLine +BABEL_BP_101_51042_20111204_200010_inLine +BABEL_BP_101_51042_20111204_200010_outLine +BABEL_BP_101_57551_20111019_214945_inLine +BABEL_BP_101_57551_20111019_214945_outLine +BABEL_BP_101_60064_20111203_191808_inLine +BABEL_BP_101_60064_20111203_191808_outLine +BABEL_BP_101_66709_20111119_145638_inLine +BABEL_BP_101_66709_20111119_145638_outLine +BABEL_BP_101_67304_20111129_183928_inLine +BABEL_BP_101_67304_20111129_183928_outLine +BABEL_BP_101_68861_20111030_183357_inLine +BABEL_BP_101_68861_20111030_183357_outLine +BABEL_BP_101_74295_20111121_152402_inLine +BABEL_BP_101_74295_20111121_152402_outLine +BABEL_BP_101_74607_20111022_195251_inLine +BABEL_BP_101_74607_20111022_195251_outLine +BABEL_BP_101_75151_20111203_163659_inLine +BABEL_BP_101_75151_20111203_163659_outLine +BABEL_BP_101_75932_20111111_151802_inLine +BABEL_BP_101_75932_20111111_151802_outLine +BABEL_BP_101_76451_20111026_184920_inLine +BABEL_BP_101_76451_20111026_184920_outLine +BABEL_BP_101_76451_20111026_190345_inLine +BABEL_BP_101_76451_20111026_190345_outLine +BABEL_BP_101_76763_20111017_191052_inLine +BABEL_BP_101_76763_20111017_191052_outLine +BABEL_BP_101_81642_20111124_172127_inLine +BABEL_BP_101_81642_20111124_172127_outLine +BABEL_BP_101_83053_20111118_151047_inLine +BABEL_BP_101_83053_20111118_151047_outLine +BABEL_BP_101_90817_20111118_004749_inLine +BABEL_BP_101_90817_20111118_004749_outLine +BABEL_BP_101_97486_20111104_200750_inLine +BABEL_BP_101_97486_20111104_200750_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/train.FullLP.list b/egs/babel/s5d/conf/lists/101-cantonese/train.FullLP.list new file mode 100644 index 00000000000..a7db2aa2a23 --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/train.FullLP.list @@ -0,0 +1,965 @@ +BABEL_BP_101_10033_20111024_205740_inLine +BABEL_BP_101_10033_20111024_205740_outLine +BABEL_BP_101_10066_20111120_165933_inLine +BABEL_BP_101_10066_20111120_165933_outLine +BABEL_BP_101_10160_20111017_201159_inLine +BABEL_BP_101_10160_20111017_201159_outLine +BABEL_BP_101_10211_20111026_234151_inLine +BABEL_BP_101_10211_20111026_234151_outLine +BABEL_BP_101_10900_20111029_155829_inLine +BABEL_BP_101_10900_20111029_155829_outLine +BABEL_BP_101_10925_20111025_152502_inLine +BABEL_BP_101_10925_20111025_152502_outLine +BABEL_BP_101_10945_20111030_173950_inLine +BABEL_BP_101_10945_20111030_173950_outLine +BABEL_BP_101_10973_20111019_183249_inLine +BABEL_BP_101_10973_20111019_183249_outLine +BABEL_BP_101_11031_20111024_203919_inLine +BABEL_BP_101_11031_20111024_203920_outLine +BABEL_BP_101_11036_20111019_192958_inLine +BABEL_BP_101_11036_20111019_192958_outLine +BABEL_BP_101_11371_20111018_183136_inLine +BABEL_BP_101_11371_20111018_183136_outLine +BABEL_BP_101_11422_20111019_145654_inLine +BABEL_BP_101_11422_20111019_145654_outLine +BABEL_BP_101_11479_20111021_205756_inLine +BABEL_BP_101_11479_20111021_205756_outLine +BABEL_BP_101_11690_20111206_171715_inLine +BABEL_BP_101_11690_20111206_171715_outLine +BABEL_BP_101_11694_20111204_205320_inLine +BABEL_BP_101_11694_20111204_205320_outLine +BABEL_BP_101_11827_20111025_190953_inLine +BABEL_BP_101_11827_20111025_190954_outLine +BABEL_BP_101_11868_20111203_180801_inLine +BABEL_BP_101_11868_20111203_180801_outLine +BABEL_BP_101_12003_20111116_132035_inLine +BABEL_BP_101_12003_20111116_132035_outLine +BABEL_BP_101_12552_20111115_153047_inLine +BABEL_BP_101_12552_20111115_153047_outLine +BABEL_BP_101_12631_20111020_140550_inLine +BABEL_BP_101_12631_20111020_140550_outLine +BABEL_BP_101_12807_20111207_142617_inLine +BABEL_BP_101_12807_20111207_142617_outLine +BABEL_BP_101_12897_20111115_165516_inLine +BABEL_BP_101_12897_20111115_165516_outLine +BABEL_BP_101_13229_20111127_140526_inLine +BABEL_BP_101_13229_20111127_140526_outLine +BABEL_BP_101_13272_20111027_193738_inLine +BABEL_BP_101_13272_20111027_193738_outLine +BABEL_BP_101_13530_20111203_184256_inLine +BABEL_BP_101_13530_20111203_184256_outLine +BABEL_BP_101_13781_20111125_145211_inLine +BABEL_BP_101_13781_20111125_145211_outLine +BABEL_BP_101_14054_20111119_163712_inLine +BABEL_BP_101_14054_20111119_163712_outLine +BABEL_BP_101_14294_20111103_134040_inLine +BABEL_BP_101_14294_20111103_134040_outLine +BABEL_BP_101_14500_20111114_202424_inLine +BABEL_BP_101_14500_20111114_202424_outLine +BABEL_BP_101_14666_20111122_125103_inLine +BABEL_BP_101_14666_20111122_125103_outLine +BABEL_BP_101_14729_20111114_200940_inLine +BABEL_BP_101_14729_20111114_200940_outLine +BABEL_BP_101_14769_20111121_155034_inLine +BABEL_BP_101_14769_20111121_155034_outLine +BABEL_BP_101_14891_20111018_130049_inLine +BABEL_BP_101_14891_20111018_130049_outLine +BABEL_BP_101_14915_20111119_165151_inLine +BABEL_BP_101_14915_20111119_165151_outLine +BABEL_BP_101_14936_20111026_202920_inLine +BABEL_BP_101_14936_20111026_202920_outLine +BABEL_BP_101_14997_20111126_152707_inLine +BABEL_BP_101_14997_20111126_152707_outLine +BABEL_BP_101_15142_20111029_163819_inLine +BABEL_BP_101_15142_20111029_163819_outLine +BABEL_BP_101_15460_20111121_223019_inLine +BABEL_BP_101_15460_20111121_223019_outLine +BABEL_BP_101_15473_20111031_131455_inLine +BABEL_BP_101_15473_20111031_131455_outLine +BABEL_BP_101_15696_20111022_193230_inLine +BABEL_BP_101_15696_20111022_193230_outLine +BABEL_BP_101_15873_20111027_121806_inLine +BABEL_BP_101_15873_20111027_121806_outLine +BABEL_BP_101_15881_20111024_141728_inLine +BABEL_BP_101_15881_20111024_141729_outLine +BABEL_BP_101_16066_20111020_145228_inLine +BABEL_BP_101_16066_20111020_145228_outLine +BABEL_BP_101_16266_20111027_153525_inLine +BABEL_BP_101_16266_20111027_153525_outLine +BABEL_BP_101_16313_20111022_221750_inLine +BABEL_BP_101_16313_20111022_221750_outLine +BABEL_BP_101_16406_20111103_000453_inLine +BABEL_BP_101_16406_20111103_000453_outLine +BABEL_BP_101_16617_20111030_144124_inLine +BABEL_BP_101_16617_20111030_144124_outLine +BABEL_BP_101_16660_20111020_211620_inLine +BABEL_BP_101_16660_20111020_211620_outLine +BABEL_BP_101_16669_20111019_142510_inLine +BABEL_BP_101_16669_20111019_142510_outLine +BABEL_BP_101_16883_20111122_184255_inLine +BABEL_BP_101_16883_20111122_184255_outLine +BABEL_BP_101_17013_20111117_011741_inLine +BABEL_BP_101_17013_20111117_011741_outLine +BABEL_BP_101_17018_20111020_161922_inLine +BABEL_BP_101_17018_20111020_161922_outLine +BABEL_BP_101_17080_20111020_184025_inLine +BABEL_BP_101_17080_20111020_184025_outLine +BABEL_BP_101_17093_20111124_155145_inLine +BABEL_BP_101_17093_20111124_155145_outLine +BABEL_BP_101_17203_20111026_142831_inLine +BABEL_BP_101_17203_20111026_142831_outLine +BABEL_BP_101_17203_20111026_145429_inLine +BABEL_BP_101_17203_20111026_145429_outLine +BABEL_BP_101_17572_20111116_155402_inLine +BABEL_BP_101_17572_20111116_155402_outLine +BABEL_BP_101_17606_20111130_231145_inLine +BABEL_BP_101_17606_20111130_231145_outLine +BABEL_BP_101_17933_20111120_204846_inLine +BABEL_BP_101_17933_20111120_204846_outLine +BABEL_BP_101_18701_20111121_171853_inLine +BABEL_BP_101_18701_20111121_171853_outLine +BABEL_BP_101_18950_20111127_144125_inLine +BABEL_BP_101_18950_20111127_144125_outLine +BABEL_BP_101_19012_20111122_173413_inLine +BABEL_BP_101_19012_20111122_173413_outLine +BABEL_BP_101_19147_20111021_174406_inLine +BABEL_BP_101_19147_20111021_174406_outLine +BABEL_BP_101_20320_20111027_210504_inLine +BABEL_BP_101_20320_20111027_210504_outLine +BABEL_BP_101_20408_20111101_210200_inLine +BABEL_BP_101_20408_20111101_210200_outLine +BABEL_BP_101_20518_20111119_174458_inLine +BABEL_BP_101_20518_20111119_174458_outLine +BABEL_BP_101_20582_20111023_162723_inLine +BABEL_BP_101_20582_20111023_162723_outLine +BABEL_BP_101_20590_20111017_172008_inLine +BABEL_BP_101_20590_20111017_172008_outLine +BABEL_BP_101_20685_20111019_125028_inLine +BABEL_BP_101_20685_20111019_125028_outLine +BABEL_BP_101_20740_20111125_195727_inLine +BABEL_BP_101_20740_20111125_195727_outLine +BABEL_BP_101_21367_20111126_132150_inLine +BABEL_BP_101_21367_20111126_132150_outLine +BABEL_BP_101_21430_20111027_145918_inLine +BABEL_BP_101_21430_20111027_145918_outLine +BABEL_BP_101_21477_20111031_155928_inLine +BABEL_BP_101_21477_20111031_155928_outLine +BABEL_BP_101_21584_20111030_210806_inLine +BABEL_BP_101_21584_20111030_210807_outLine +BABEL_BP_101_21929_20111025_182511_inLine +BABEL_BP_101_21929_20111025_182511_outLine +BABEL_BP_101_21946_20111122_150655_inLine +BABEL_BP_101_21946_20111122_150655_outLine +BABEL_BP_101_22898_20111022_141857_inLine +BABEL_BP_101_22898_20111022_141857_outLine +BABEL_BP_101_22903_20111116_132430_inLine +BABEL_BP_101_22903_20111116_132430_outLine +BABEL_BP_101_22910_20111028_190802_inLine +BABEL_BP_101_22910_20111028_190802_outLine +BABEL_BP_101_22979_20111129_142742_inLine +BABEL_BP_101_22979_20111129_142742_outLine +BABEL_BP_101_23167_20111026_194856_inLine +BABEL_BP_101_23167_20111026_194856_outLine +BABEL_BP_101_23168_20111120_192134_inLine +BABEL_BP_101_23168_20111120_192134_outLine +BABEL_BP_101_23571_20111128_232031_inLine +BABEL_BP_101_23571_20111128_232031_outLine +BABEL_BP_101_23719_20111103_143124_inLine +BABEL_BP_101_23719_20111103_143124_outLine +BABEL_BP_101_23930_20111125_132944_inLine +BABEL_BP_101_23930_20111125_132944_outLine +BABEL_BP_101_24420_20111122_215626_inLine +BABEL_BP_101_24420_20111122_215626_outLine +BABEL_BP_101_24608_20111019_191348_inLine +BABEL_BP_101_24608_20111019_191348_outLine +BABEL_BP_101_24642_20111129_132050_inLine +BABEL_BP_101_24642_20111129_132050_outLine +BABEL_BP_101_24661_20111207_131837_inLine +BABEL_BP_101_24661_20111207_131837_outLine +BABEL_BP_101_25021_20111018_200603_inLine +BABEL_BP_101_25021_20111018_200603_outLine +BABEL_BP_101_25035_20111028_135038_inLine +BABEL_BP_101_25035_20111028_135038_outLine +BABEL_BP_101_25236_20111129_194650_inLine +BABEL_BP_101_25236_20111129_194650_outLine +BABEL_BP_101_25278_20111125_162450_inLine +BABEL_BP_101_25278_20111125_162450_outLine +BABEL_BP_101_25576_20111022_203923_inLine +BABEL_BP_101_25576_20111022_203923_outLine +BABEL_BP_101_25934_20111014_130931_inLine +BABEL_BP_101_25934_20111014_130931_outLine +BABEL_BP_101_26017_20111030_202851_inLine +BABEL_BP_101_26017_20111030_202851_outLine +BABEL_BP_101_26350_20111019_203820_inLine +BABEL_BP_101_26350_20111019_203820_outLine +BABEL_BP_101_26684_20111119_145219_inLine +BABEL_BP_101_26684_20111119_145219_outLine +BABEL_BP_101_27064_20111019_132106_inLine +BABEL_BP_101_27064_20111019_132106_outLine +BABEL_BP_101_27178_20111025_174857_inLine +BABEL_BP_101_27178_20111025_174857_outLine +BABEL_BP_101_27427_20111021_132850_inLine +BABEL_BP_101_27427_20111021_132850_outLine +BABEL_BP_101_27503_20111021_175113_inLine +BABEL_BP_101_27503_20111021_175113_outLine +BABEL_BP_101_27619_20111102_201443_inLine +BABEL_BP_101_27619_20111102_201443_outLine +BABEL_BP_101_28107_20111019_140723_inLine +BABEL_BP_101_28107_20111019_140723_outLine +BABEL_BP_101_28132_20111023_133733_inLine +BABEL_BP_101_28132_20111023_133733_outLine +BABEL_BP_101_28161_20111024_180609_inLine +BABEL_BP_101_28161_20111024_180609_outLine +BABEL_BP_101_28204_20111025_133714_inLine +BABEL_BP_101_28204_20111025_133714_outLine +BABEL_BP_101_28260_20111021_184044_inLine +BABEL_BP_101_28260_20111021_184044_outLine +BABEL_BP_101_28675_20111118_185525_inLine +BABEL_BP_101_28675_20111118_185525_outLine +BABEL_BP_101_28740_20111028_214620_inLine +BABEL_BP_101_28740_20111028_214620_outLine +BABEL_BP_101_29097_20111018_135944_inLine +BABEL_BP_101_29097_20111018_135944_outLine +BABEL_BP_101_29133_20111024_182947_inLine +BABEL_BP_101_29133_20111024_182947_outLine +BABEL_BP_101_29302_20111023_172339_inLine +BABEL_BP_101_29302_20111023_172339_outLine +BABEL_BP_101_29328_20111019_133534_inLine +BABEL_BP_101_29328_20111019_133534_outLine +BABEL_BP_101_29335_20111121_164238_inLine +BABEL_BP_101_29335_20111121_164238_outLine +BABEL_BP_101_29444_20111024_213300_inLine +BABEL_BP_101_29444_20111024_213300_outLine +BABEL_BP_101_29959_20111116_201012_inLine +BABEL_BP_101_29959_20111116_201012_outLine +BABEL_BP_101_30168_20111118_132348_inLine +BABEL_BP_101_30168_20111118_132348_outLine +BABEL_BP_101_30530_20111024_153842_inLine +BABEL_BP_101_30530_20111024_153842_outLine +BABEL_BP_101_30722_20111208_204304_inLine +BABEL_BP_101_30722_20111208_204304_outLine +BABEL_BP_101_31265_20111207_131905_inLine +BABEL_BP_101_31265_20111207_131905_outLine +BABEL_BP_101_31393_20111018_154135_inLine +BABEL_BP_101_31393_20111018_154135_outLine +BABEL_BP_101_31441_20111026_004058_inLine +BABEL_BP_101_31441_20111026_004058_outLine +BABEL_BP_101_31451_20111024_213113_inLine +BABEL_BP_101_31451_20111024_213113_outLine +BABEL_BP_101_31460_20111019_144918_inLine +BABEL_BP_101_31460_20111019_144918_outLine +BABEL_BP_101_31917_20111124_151225_inLine +BABEL_BP_101_31917_20111124_151225_outLine +BABEL_BP_101_31980_20111025_130427_inLine +BABEL_BP_101_31980_20111025_130427_outLine +BABEL_BP_101_32274_20111024_160835_inLine +BABEL_BP_101_32274_20111024_160835_outLine +BABEL_BP_101_32295_20111111_144923_inLine +BABEL_BP_101_32295_20111111_144923_outLine +BABEL_BP_101_32452_20111022_135256_inLine +BABEL_BP_101_32452_20111022_135256_outLine +BABEL_BP_101_32710_20111119_133220_inLine +BABEL_BP_101_32710_20111119_133220_outLine +BABEL_BP_101_32890_20111130_220957_inLine +BABEL_BP_101_32890_20111130_220957_outLine +BABEL_BP_101_33023_20111024_133813_inLine +BABEL_BP_101_33023_20111024_133813_outLine +BABEL_BP_101_33671_20111019_130712_inLine +BABEL_BP_101_33671_20111019_130712_outLine +BABEL_BP_101_33742_20111118_231555_inLine +BABEL_BP_101_33742_20111118_231555_outLine +BABEL_BP_101_34194_20111024_173622_inLine +BABEL_BP_101_34194_20111024_173622_outLine +BABEL_BP_101_34446_20111019_005315_inLine +BABEL_BP_101_34446_20111019_005315_outLine +BABEL_BP_101_34930_20111024_143654_inLine +BABEL_BP_101_34930_20111024_143654_outLine +BABEL_BP_101_34961_20111027_175107_inLine +BABEL_BP_101_34961_20111027_175107_outLine +BABEL_BP_101_35006_20111120_181354_inLine +BABEL_BP_101_35006_20111120_181354_outLine +BABEL_BP_101_35016_20111203_203519_inLine +BABEL_BP_101_35016_20111203_203519_outLine +BABEL_BP_101_35179_20111124_131132_inLine +BABEL_BP_101_35179_20111124_131132_outLine +BABEL_BP_101_35357_20111203_170817_inLine +BABEL_BP_101_35357_20111203_170817_outLine +BABEL_BP_101_35391_20111130_144901_inLine +BABEL_BP_101_35391_20111130_144901_outLine +BABEL_BP_101_35576_20111118_131203_inLine +BABEL_BP_101_35576_20111118_131203_outLine +BABEL_BP_101_35932_20111023_151638_inLine +BABEL_BP_101_35932_20111023_151638_outLine +BABEL_BP_101_36268_20111028_174826_inLine +BABEL_BP_101_36268_20111028_174826_outLine +BABEL_BP_101_36383_20111129_181746_inLine +BABEL_BP_101_36383_20111129_181746_outLine +BABEL_BP_101_36424_20111119_145307_inLine +BABEL_BP_101_36424_20111119_145307_outLine +BABEL_BP_101_36502_20111025_145704_inLine +BABEL_BP_101_36502_20111025_145704_outLine +BABEL_BP_101_36711_20111104_142236_inLine +BABEL_BP_101_36711_20111104_142236_outLine +BABEL_BP_101_37094_20111019_184657_inLine +BABEL_BP_101_37094_20111019_184657_outLine +BABEL_BP_101_37110_20111019_203150_inLine +BABEL_BP_101_37110_20111019_203150_outLine +BABEL_BP_101_37203_20111103_180606_inLine +BABEL_BP_101_37203_20111103_180606_outLine +BABEL_BP_101_37210_20111102_172955_inLine +BABEL_BP_101_37210_20111102_172955_outLine +BABEL_BP_101_37258_20111110_203745_inLine +BABEL_BP_101_37258_20111110_203745_outLine +BABEL_BP_101_37285_20111028_003951_inLine +BABEL_BP_101_37285_20111028_003951_outLine +BABEL_BP_101_37461_20111022_210313_inLine +BABEL_BP_101_37461_20111022_210313_outLine +BABEL_BP_101_37766_20111130_012017_inLine +BABEL_BP_101_37766_20111130_012017_outLine +BABEL_BP_101_38108_20111125_153427_inLine +BABEL_BP_101_38108_20111125_153427_outLine +BABEL_BP_101_38698_20111025_183746_inLine +BABEL_BP_101_38698_20111025_183746_outLine +BABEL_BP_101_38879_20111029_193700_inLine +BABEL_BP_101_38879_20111029_193701_outLine +BABEL_BP_101_38912_20111120_214951_inLine +BABEL_BP_101_38912_20111120_214951_outLine +BABEL_BP_101_38956_20111025_175018_inLine +BABEL_BP_101_38956_20111025_175018_outLine +BABEL_BP_101_39080_20111124_182207_inLine +BABEL_BP_101_39080_20111124_182207_outLine +BABEL_BP_101_39140_20111026_125824_inLine +BABEL_BP_101_39140_20111026_125824_outLine +BABEL_BP_101_39246_20111119_185410_inLine +BABEL_BP_101_39246_20111119_185410_outLine +BABEL_BP_101_39287_20111119_192815_inLine +BABEL_BP_101_39287_20111119_192815_outLine +BABEL_BP_101_39317_20111020_162113_inLine +BABEL_BP_101_39317_20111020_162113_outLine +BABEL_BP_101_39756_20111207_162851_inLine +BABEL_BP_101_39756_20111207_162851_outLine +BABEL_BP_101_39809_20111025_182053_inLine +BABEL_BP_101_39809_20111025_182053_outLine +BABEL_BP_101_39915_20111101_164819_inLine +BABEL_BP_101_39915_20111101_164819_outLine +BABEL_BP_101_39997_20111124_152508_inLine +BABEL_BP_101_39997_20111124_152508_outLine +BABEL_BP_101_40046_20111018_185918_inLine +BABEL_BP_101_40046_20111018_185918_outLine +BABEL_BP_101_40123_20111129_182232_inLine +BABEL_BP_101_40123_20111129_182232_outLine +BABEL_BP_101_40346_20111018_165337_inLine +BABEL_BP_101_40346_20111018_165337_outLine +BABEL_BP_101_40439_20111203_182814_inLine +BABEL_BP_101_40439_20111203_182814_outLine +BABEL_BP_101_40510_20111126_151543_inLine +BABEL_BP_101_40510_20111126_151543_outLine +BABEL_BP_101_40980_20111119_150324_inLine +BABEL_BP_101_40980_20111119_150324_outLine +BABEL_BP_101_41170_20111018_182942_inLine +BABEL_BP_101_41170_20111018_182942_outLine +BABEL_BP_101_41456_20111117_162327_inLine +BABEL_BP_101_41456_20111117_162327_outLine +BABEL_BP_101_41513_20111121_142105_inLine +BABEL_BP_101_41513_20111121_142105_outLine +BABEL_BP_101_41661_20111102_131955_inLine +BABEL_BP_101_41661_20111102_131955_outLine +BABEL_BP_101_42145_20111117_131023_inLine +BABEL_BP_101_42145_20111117_131023_outLine +BABEL_BP_101_42266_20111031_233515_inLine +BABEL_BP_101_42266_20111031_233515_outLine +BABEL_BP_101_42615_20111018_173023_inLine +BABEL_BP_101_42615_20111018_173023_outLine +BABEL_BP_101_42766_20111124_150047_inLine +BABEL_BP_101_42766_20111124_150047_outLine +BABEL_BP_101_42788_20111120_201122_inLine +BABEL_BP_101_42788_20111120_201122_outLine +BABEL_BP_101_43086_20111025_160708_inLine +BABEL_BP_101_43086_20111025_160708_outLine +BABEL_BP_101_43383_20111019_135432_inLine +BABEL_BP_101_43383_20111019_135432_outLine +BABEL_BP_101_44129_20111118_210653_inLine +BABEL_BP_101_44129_20111118_210653_outLine +BABEL_BP_101_44209_20111120_131002_inLine +BABEL_BP_101_44209_20111120_131002_outLine +BABEL_BP_101_44403_20111023_151830_inLine +BABEL_BP_101_44403_20111023_151830_outLine +BABEL_BP_101_44403_20111023_152732_inLine +BABEL_BP_101_44403_20111023_152732_outLine +BABEL_BP_101_44535_20111021_153223_inLine +BABEL_BP_101_44535_20111021_153223_outLine +BABEL_BP_101_44836_20111119_154154_inLine +BABEL_BP_101_44836_20111119_154154_outLine +BABEL_BP_101_45361_20111124_141850_inLine +BABEL_BP_101_45361_20111124_141850_outLine +BABEL_BP_101_45472_20111020_171106_inLine +BABEL_BP_101_45472_20111020_171106_outLine +BABEL_BP_101_45511_20111025_204720_inLine +BABEL_BP_101_45511_20111025_204720_outLine +BABEL_BP_101_45642_20111027_171601_inLine +BABEL_BP_101_45642_20111027_171601_outLine +BABEL_BP_101_45678_20111020_155310_inLine +BABEL_BP_101_45678_20111020_155310_outLine +BABEL_BP_101_45702_20111130_133011_inLine +BABEL_BP_101_45702_20111130_133011_outLine +BABEL_BP_101_45738_20111129_143901_inLine +BABEL_BP_101_45738_20111129_143901_outLine +BABEL_BP_101_45931_20111021_190814_inLine +BABEL_BP_101_45931_20111021_190815_outLine +BABEL_BP_101_46243_20111020_204505_inLine +BABEL_BP_101_46243_20111020_204505_outLine +BABEL_BP_101_46332_20111103_181337_inLine +BABEL_BP_101_46332_20111103_181337_outLine +BABEL_BP_101_46603_20111128_205449_inLine +BABEL_BP_101_46603_20111128_205449_outLine +BABEL_BP_101_47128_20111027_143246_inLine +BABEL_BP_101_47128_20111027_143246_outLine +BABEL_BP_101_47634_20111026_134005_inLine +BABEL_BP_101_47634_20111026_134005_outLine +BABEL_BP_101_47646_20111126_015509_inLine +BABEL_BP_101_47646_20111126_015509_outLine +BABEL_BP_101_47661_20111028_183156_inLine +BABEL_BP_101_47661_20111028_183156_outLine +BABEL_BP_101_47794_20111204_021008_inLine +BABEL_BP_101_47794_20111204_021008_outLine +BABEL_BP_101_47823_20111129_204026_inLine +BABEL_BP_101_47823_20111129_204026_outLine +BABEL_BP_101_47906_20111119_130308_inLine +BABEL_BP_101_47906_20111119_130308_outLine +BABEL_BP_101_48053_20111020_130943_inLine +BABEL_BP_101_48053_20111020_130943_outLine +BABEL_BP_101_48188_20111117_210754_inLine +BABEL_BP_101_48188_20111117_210754_outLine +BABEL_BP_101_48410_20111021_230709_inLine +BABEL_BP_101_48410_20111021_230709_outLine +BABEL_BP_101_48418_20111203_171145_inLine +BABEL_BP_101_48418_20111203_171145_outLine +BABEL_BP_101_48511_20111026_133629_inLine +BABEL_BP_101_48511_20111026_133629_outLine +BABEL_BP_101_48559_20111118_125850_inLine +BABEL_BP_101_48559_20111118_125850_outLine +BABEL_BP_101_48733_20111117_140942_inLine +BABEL_BP_101_48733_20111117_140942_outLine +BABEL_BP_101_49306_20111130_170120_inLine +BABEL_BP_101_49306_20111130_170120_outLine +BABEL_BP_101_49452_20111027_171653_inLine +BABEL_BP_101_49452_20111027_171653_outLine +BABEL_BP_101_49541_20111104_192333_inLine +BABEL_BP_101_49541_20111104_192333_outLine +BABEL_BP_101_49624_20111120_201437_inLine +BABEL_BP_101_49624_20111120_201437_outLine +BABEL_BP_101_49689_20111125_174904_inLine +BABEL_BP_101_49689_20111125_174904_outLine +BABEL_BP_101_49773_20111021_195218_inLine +BABEL_BP_101_49773_20111021_195218_outLine +BABEL_BP_101_49864_20111023_192125_inLine +BABEL_BP_101_49864_20111023_192125_outLine +BABEL_BP_101_50101_20111019_173327_inLine +BABEL_BP_101_50101_20111019_173327_outLine +BABEL_BP_101_50201_20111026_154228_inLine +BABEL_BP_101_50201_20111026_154228_outLine +BABEL_BP_101_50409_20111204_161529_inLine +BABEL_BP_101_50409_20111204_161529_outLine +BABEL_BP_101_50416_20111129_170514_inLine +BABEL_BP_101_50416_20111129_170514_outLine +BABEL_BP_101_50476_20111130_010429_inLine +BABEL_BP_101_50476_20111130_010429_outLine +BABEL_BP_101_50589_20111025_190441_inLine +BABEL_BP_101_50589_20111025_190441_outLine +BABEL_BP_101_50842_20111030_171650_inLine +BABEL_BP_101_50842_20111030_171650_outLine +BABEL_BP_101_51052_20111121_175102_inLine +BABEL_BP_101_51052_20111121_175102_outLine +BABEL_BP_101_51117_20111025_175138_inLine +BABEL_BP_101_51117_20111025_175138_outLine +BABEL_BP_101_51374_20111020_152431_inLine +BABEL_BP_101_51374_20111020_152431_outLine +BABEL_BP_101_51446_20111127_145511_inLine +BABEL_BP_101_51446_20111127_145511_outLine +BABEL_BP_101_51569_20111019_201532_inLine +BABEL_BP_101_51569_20111019_201532_outLine +BABEL_BP_101_51727_20111117_003536_inLine +BABEL_BP_101_51727_20111117_003536_outLine +BABEL_BP_101_52366_20111018_140013_inLine +BABEL_BP_101_52366_20111018_140013_outLine +BABEL_BP_101_52642_20111129_221057_inLine +BABEL_BP_101_52642_20111129_221057_outLine +BABEL_BP_101_53181_20111025_171245_inLine +BABEL_BP_101_53181_20111025_171246_outLine +BABEL_BP_101_53464_20111020_132212_inLine +BABEL_BP_101_53464_20111020_132212_outLine +BABEL_BP_101_53544_20111205_190859_inLine +BABEL_BP_101_53544_20111205_190859_outLine +BABEL_BP_101_53703_20111026_123307_inLine +BABEL_BP_101_53703_20111026_123307_outLine +BABEL_BP_101_53824_20111115_174804_inLine +BABEL_BP_101_53824_20111115_174804_outLine +BABEL_BP_101_53985_20111027_134232_inLine +BABEL_BP_101_53985_20111027_134232_outLine +BABEL_BP_101_54315_20111018_150809_inLine +BABEL_BP_101_54315_20111018_150809_outLine +BABEL_BP_101_54787_20111027_003335_inLine +BABEL_BP_101_54787_20111027_003335_outLine +BABEL_BP_101_55369_20111022_150532_inLine +BABEL_BP_101_55369_20111022_150532_outLine +BABEL_BP_101_55786_20111023_175604_inLine +BABEL_BP_101_55786_20111023_175604_outLine +BABEL_BP_101_55786_20111023_181021_inLine +BABEL_BP_101_55786_20111023_181022_outLine +BABEL_BP_101_55944_20111124_180312_inLine +BABEL_BP_101_55944_20111124_180312_outLine +BABEL_BP_101_56070_20111030_192056_inLine +BABEL_BP_101_56070_20111030_192056_outLine +BABEL_BP_101_56117_20111120_230517_inLine +BABEL_BP_101_56117_20111120_230517_outLine +BABEL_BP_101_56648_20111126_183128_inLine +BABEL_BP_101_56648_20111126_183128_outLine +BABEL_BP_101_57457_20111104_004433_inLine +BABEL_BP_101_57457_20111104_004433_outLine +BABEL_BP_101_57629_20111018_150159_inLine +BABEL_BP_101_57629_20111018_150200_outLine +BABEL_BP_101_58137_20111121_200320_inLine +BABEL_BP_101_58137_20111121_200320_outLine +BABEL_BP_101_58190_20111124_203150_inLine +BABEL_BP_101_58190_20111124_203150_outLine +BABEL_BP_101_58357_20111122_155154_inLine +BABEL_BP_101_58357_20111122_155154_outLine +BABEL_BP_101_58536_20111103_202702_inLine +BABEL_BP_101_58536_20111103_202702_outLine +BABEL_BP_101_58715_20111025_173420_inLine +BABEL_BP_101_58715_20111025_173420_outLine +BABEL_BP_101_58863_20111029_204335_inLine +BABEL_BP_101_58863_20111029_204335_outLine +BABEL_BP_101_58923_20111021_133326_inLine +BABEL_BP_101_58923_20111021_133326_outLine +BABEL_BP_101_59028_20111130_201120_inLine +BABEL_BP_101_59028_20111130_201120_outLine +BABEL_BP_101_59032_20111130_125508_inLine +BABEL_BP_101_59032_20111130_125508_outLine +BABEL_BP_101_59454_20111117_203722_inLine +BABEL_BP_101_59454_20111117_203722_outLine +BABEL_BP_101_59544_20111027_165941_inLine +BABEL_BP_101_59544_20111027_165941_outLine +BABEL_BP_101_59868_20111021_213412_inLine +BABEL_BP_101_59868_20111021_213412_outLine +BABEL_BP_101_59925_20111203_131501_inLine +BABEL_BP_101_59925_20111203_131501_outLine +BABEL_BP_101_59961_20111031_203903_inLine +BABEL_BP_101_59961_20111031_203903_outLine +BABEL_BP_101_60106_20111024_194048_inLine +BABEL_BP_101_60106_20111024_194048_outLine +BABEL_BP_101_60110_20111102_211956_inLine +BABEL_BP_101_60110_20111102_211956_outLine +BABEL_BP_101_60183_20111129_192036_inLine +BABEL_BP_101_60183_20111129_192036_outLine +BABEL_BP_101_60605_20111124_131048_inLine +BABEL_BP_101_60605_20111124_131048_outLine +BABEL_BP_101_60826_20111120_164851_inLine +BABEL_BP_101_60826_20111120_164851_outLine +BABEL_BP_101_61073_20111102_190426_inLine +BABEL_BP_101_61073_20111102_190426_outLine +BABEL_BP_101_61119_20111125_210556_inLine +BABEL_BP_101_61119_20111125_210556_outLine +BABEL_BP_101_61408_20111204_193348_inLine +BABEL_BP_101_61408_20111204_193348_outLine +BABEL_BP_101_61446_20111019_151903_inLine +BABEL_BP_101_61446_20111019_151903_outLine +BABEL_BP_101_61449_20111117_151606_inLine +BABEL_BP_101_61449_20111117_151606_outLine +BABEL_BP_101_61762_20111028_180944_inLine +BABEL_BP_101_61762_20111028_180944_outLine +BABEL_BP_101_61822_20111022_202742_inLine +BABEL_BP_101_61822_20111022_202742_outLine +BABEL_BP_101_62177_20111019_143057_inLine +BABEL_BP_101_62177_20111019_143057_outLine +BABEL_BP_101_63339_20111019_193743_inLine +BABEL_BP_101_63339_20111019_193743_outLine +BABEL_BP_101_63459_20111120_165000_inLine +BABEL_BP_101_63459_20111120_165000_outLine +BABEL_BP_101_63711_20111025_215436_inLine +BABEL_BP_101_63711_20111025_215436_outLine +BABEL_BP_101_64205_20111203_135507_inLine +BABEL_BP_101_64205_20111203_135507_outLine +BABEL_BP_101_64404_20111018_165302_inLine +BABEL_BP_101_64404_20111018_165302_outLine +BABEL_BP_101_64889_20111124_220757_inLine +BABEL_BP_101_64889_20111124_220757_outLine +BABEL_BP_101_65606_20111116_140731_inLine +BABEL_BP_101_65606_20111116_140731_outLine +BABEL_BP_101_65743_20111019_161830_inLine +BABEL_BP_101_65743_20111019_161830_outLine +BABEL_BP_101_66188_20111206_204246_inLine +BABEL_BP_101_66188_20111206_204246_outLine +BABEL_BP_101_66798_20111026_145101_inLine +BABEL_BP_101_66798_20111026_145101_outLine +BABEL_BP_101_66839_20111120_192904_inLine +BABEL_BP_101_66839_20111120_192904_outLine +BABEL_BP_101_66866_20111128_183933_inLine +BABEL_BP_101_66866_20111128_183933_outLine +BABEL_BP_101_66903_20111021_130004_inLine +BABEL_BP_101_66903_20111021_130004_outLine +BABEL_BP_101_66964_20111117_173710_inLine +BABEL_BP_101_66964_20111117_173710_outLine +BABEL_BP_101_67555_20111023_140926_inLine +BABEL_BP_101_67555_20111023_140926_outLine +BABEL_BP_101_67798_20111104_013951_inLine +BABEL_BP_101_67798_20111104_013951_outLine +BABEL_BP_101_68129_20111120_133854_inLine +BABEL_BP_101_68129_20111120_133854_outLine +BABEL_BP_101_68295_20111124_181015_inLine +BABEL_BP_101_68295_20111124_181015_outLine +BABEL_BP_101_68545_20111121_132438_inLine +BABEL_BP_101_68545_20111121_132438_outLine +BABEL_BP_101_68706_20111025_141920_inLine +BABEL_BP_101_68706_20111025_141920_outLine +BABEL_BP_101_68767_20111029_174711_inLine +BABEL_BP_101_68767_20111029_174711_outLine +BABEL_BP_101_69049_20111102_140355_inLine +BABEL_BP_101_69049_20111102_140355_outLine +BABEL_BP_101_69137_20111121_162510_inLine +BABEL_BP_101_69137_20111121_162510_outLine +BABEL_BP_101_69236_20111029_183129_inLine +BABEL_BP_101_69236_20111029_183130_outLine +BABEL_BP_101_69295_20111130_003858_inLine +BABEL_BP_101_69295_20111130_003858_outLine +BABEL_BP_101_69368_20111020_193935_inLine +BABEL_BP_101_69368_20111020_193935_outLine +BABEL_BP_101_69548_20111024_183457_inLine +BABEL_BP_101_69548_20111024_192648_inLine +BABEL_BP_101_69548_20111024_192648_outLine +BABEL_BP_101_69650_20111025_220513_inLine +BABEL_BP_101_69650_20111025_220513_outLine +BABEL_BP_101_69764_20111026_211954_inLine +BABEL_BP_101_69764_20111026_211954_outLine +BABEL_BP_101_70511_20111119_201802_inLine +BABEL_BP_101_70511_20111119_201802_outLine +BABEL_BP_101_70548_20111127_144545_inLine +BABEL_BP_101_70548_20111127_144545_outLine +BABEL_BP_101_70615_20111019_192646_inLine +BABEL_BP_101_70615_20111019_192646_outLine +BABEL_BP_101_70680_20111018_151854_inLine +BABEL_BP_101_70680_20111018_151854_outLine +BABEL_BP_101_71741_20111026_164112_inLine +BABEL_BP_101_71741_20111026_164112_outLine +BABEL_BP_101_71778_20111121_153418_inLine +BABEL_BP_101_71778_20111121_153418_outLine +BABEL_BP_101_72330_20111021_174758_inLine +BABEL_BP_101_72330_20111021_174758_outLine +BABEL_BP_101_72718_20111129_164931_inLine +BABEL_BP_101_72718_20111129_164931_outLine +BABEL_BP_101_72879_20111018_234432_inLine +BABEL_BP_101_72879_20111018_234432_outLine +BABEL_BP_101_73170_20111023_213358_inLine +BABEL_BP_101_73170_20111023_213358_outLine +BABEL_BP_101_73542_20111019_212519_inLine +BABEL_BP_101_73542_20111019_212520_outLine +BABEL_BP_101_73761_20111115_130043_inLine +BABEL_BP_101_73761_20111115_130043_outLine +BABEL_BP_101_73786_20111019_133350_inLine +BABEL_BP_101_73786_20111019_133350_outLine +BABEL_BP_101_73911_20111026_220612_inLine +BABEL_BP_101_73911_20111026_220612_outLine +BABEL_BP_101_73923_20111017_171925_inLine +BABEL_BP_101_73923_20111017_171925_outLine +BABEL_BP_101_74234_20111102_161626_inLine +BABEL_BP_101_74234_20111102_161626_outLine +BABEL_BP_101_74317_20111115_154736_inLine +BABEL_BP_101_74317_20111115_154736_outLine +BABEL_BP_101_74395_20111117_135831_inLine +BABEL_BP_101_74395_20111117_135831_outLine +BABEL_BP_101_74404_20111020_190145_inLine +BABEL_BP_101_74404_20111020_190145_outLine +BABEL_BP_101_74451_20111117_164153_inLine +BABEL_BP_101_74451_20111117_164153_outLine +BABEL_BP_101_74823_20111024_162421_inLine +BABEL_BP_101_74823_20111024_162421_outLine +BABEL_BP_101_74884_20111030_143437_inLine +BABEL_BP_101_74884_20111030_143437_outLine +BABEL_BP_101_75020_20111020_153842_inLine +BABEL_BP_101_75020_20111020_153842_outLine +BABEL_BP_101_75243_20111204_182510_inLine +BABEL_BP_101_75243_20111204_182510_outLine +BABEL_BP_101_75815_20111029_172800_inLine +BABEL_BP_101_75815_20111029_172800_outLine +BABEL_BP_101_76341_20111027_132615_inLine +BABEL_BP_101_76341_20111027_132615_outLine +BABEL_BP_101_76585_20111025_150729_inLine +BABEL_BP_101_76585_20111025_150729_outLine +BABEL_BP_101_76661_20111024_190704_inLine +BABEL_BP_101_76661_20111024_190704_outLine +BABEL_BP_101_76661_20111024_194723_inLine +BABEL_BP_101_76661_20111024_194723_outLine +BABEL_BP_101_76733_20111128_133322_inLine +BABEL_BP_101_76733_20111128_133322_outLine +BABEL_BP_101_76919_20111120_202312_inLine +BABEL_BP_101_76919_20111120_202312_outLine +BABEL_BP_101_76944_20111114_210715_inLine +BABEL_BP_101_76944_20111114_210715_outLine +BABEL_BP_101_77238_20111024_161359_inLine +BABEL_BP_101_77238_20111024_161359_outLine +BABEL_BP_101_77244_20111127_021035_inLine +BABEL_BP_101_77244_20111127_021035_outLine +BABEL_BP_101_77348_20111018_171727_inLine +BABEL_BP_101_77348_20111018_171727_outLine +BABEL_BP_101_77802_20111125_191137_inLine +BABEL_BP_101_77802_20111125_191137_outLine +BABEL_BP_101_77886_20111026_163310_inLine +BABEL_BP_101_77886_20111026_163311_outLine +BABEL_BP_101_77965_20111019_133612_inLine +BABEL_BP_101_77965_20111019_133612_outLine +BABEL_BP_101_77965_20111019_134901_inLine +BABEL_BP_101_77965_20111019_134901_outLine +BABEL_BP_101_78094_20111026_132018_inLine +BABEL_BP_101_78094_20111026_132018_outLine +BABEL_BP_101_78514_20111026_204851_inLine +BABEL_BP_101_78514_20111026_204851_outLine +BABEL_BP_101_78879_20111102_133430_inLine +BABEL_BP_101_78879_20111102_133430_outLine +BABEL_BP_101_79412_20111026_010314_inLine +BABEL_BP_101_79412_20111026_010314_outLine +BABEL_BP_101_79495_20111017_200437_inLine +BABEL_BP_101_79495_20111017_200438_outLine +BABEL_BP_101_80008_20111031_180815_inLine +BABEL_BP_101_80008_20111031_180815_outLine +BABEL_BP_101_80156_20111024_145349_inLine +BABEL_BP_101_80156_20111024_145349_outLine +BABEL_BP_101_80535_20111206_193024_inLine +BABEL_BP_101_80535_20111206_193024_outLine +BABEL_BP_101_80786_20111030_205240_inLine +BABEL_BP_101_80786_20111030_205240_outLine +BABEL_BP_101_80817_20111125_182621_inLine +BABEL_BP_101_80817_20111125_182621_outLine +BABEL_BP_101_80953_20111122_205857_inLine +BABEL_BP_101_80953_20111122_205857_outLine +BABEL_BP_101_81056_20111130_220634_inLine +BABEL_BP_101_81056_20111130_220634_outLine +BABEL_BP_101_81308_20111021_143922_inLine +BABEL_BP_101_81308_20111021_143922_outLine +BABEL_BP_101_81321_20111028_124244_inLine +BABEL_BP_101_81321_20111028_124244_outLine +BABEL_BP_101_81486_20111027_163851_inLine +BABEL_BP_101_81486_20111027_163851_outLine +BABEL_BP_101_82023_20111024_151938_inLine +BABEL_BP_101_82023_20111024_151938_outLine +BABEL_BP_101_82025_20111024_170514_inLine +BABEL_BP_101_82025_20111024_170514_outLine +BABEL_BP_101_82217_20111115_191956_inLine +BABEL_BP_101_82217_20111115_191956_outLine +BABEL_BP_101_82484_20111103_172542_inLine +BABEL_BP_101_82484_20111103_172542_outLine +BABEL_BP_101_82591_20111030_152731_inLine +BABEL_BP_101_82591_20111030_152731_outLine +BABEL_BP_101_82766_20111026_195127_inLine +BABEL_BP_101_82766_20111026_195127_outLine +BABEL_BP_101_82881_20111025_194316_inLine +BABEL_BP_101_82881_20111025_194316_outLine +BABEL_BP_101_83362_20111018_185746_inLine +BABEL_BP_101_83362_20111018_185746_outLine +BABEL_BP_101_83791_20111017_205314_inLine +BABEL_BP_101_83791_20111017_205314_outLine +BABEL_BP_101_84042_20111129_190132_inLine +BABEL_BP_101_84042_20111129_190132_outLine +BABEL_BP_101_84088_20111020_184621_inLine +BABEL_BP_101_84088_20111020_184621_outLine +BABEL_BP_101_84335_20111116_205244_inLine +BABEL_BP_101_84335_20111116_205244_outLine +BABEL_BP_101_84540_20111102_204218_inLine +BABEL_BP_101_84540_20111102_204218_outLine +BABEL_BP_101_84543_20111124_200551_inLine +BABEL_BP_101_84543_20111124_200551_outLine +BABEL_BP_101_84943_20111020_144955_inLine +BABEL_BP_101_84943_20111020_144955_outLine +BABEL_BP_101_85083_20111123_195138_inLine +BABEL_BP_101_85083_20111123_195138_outLine +BABEL_BP_101_85533_20111029_135232_inLine +BABEL_BP_101_85533_20111029_135232_outLine +BABEL_BP_101_85617_20111126_195610_inLine +BABEL_BP_101_85617_20111126_195610_outLine +BABEL_BP_101_85883_20111126_183750_inLine +BABEL_BP_101_85883_20111126_183750_outLine +BABEL_BP_101_85948_20111020_171625_inLine +BABEL_BP_101_85948_20111020_171626_outLine +BABEL_BP_101_86016_20111118_140325_inLine +BABEL_BP_101_86016_20111118_140326_outLine +BABEL_BP_101_86029_20111027_190831_inLine +BABEL_BP_101_86029_20111027_190831_outLine +BABEL_BP_101_86227_20111020_213628_inLine +BABEL_BP_101_86227_20111020_213628_outLine +BABEL_BP_101_86258_20111128_161415_inLine +BABEL_BP_101_86258_20111128_161415_outLine +BABEL_BP_101_86419_20111019_211829_inLine +BABEL_BP_101_86419_20111019_211829_outLine +BABEL_BP_101_86752_20111206_182753_inLine +BABEL_BP_101_86752_20111206_182753_outLine +BABEL_BP_101_86900_20111029_140540_inLine +BABEL_BP_101_86900_20111029_140540_outLine +BABEL_BP_101_87107_20111028_193807_inLine +BABEL_BP_101_87107_20111028_193807_outLine +BABEL_BP_101_87351_20111021_224242_inLine +BABEL_BP_101_87351_20111021_224242_outLine +BABEL_BP_101_87481_20111129_131455_inLine +BABEL_BP_101_87481_20111129_131455_outLine +BABEL_BP_101_87564_20111130_175930_inLine +BABEL_BP_101_87564_20111130_175930_outLine +BABEL_BP_101_87634_20111019_151449_inLine +BABEL_BP_101_87634_20111019_151449_outLine +BABEL_BP_101_87634_20111019_152457_inLine +BABEL_BP_101_87634_20111019_152457_outLine +BABEL_BP_101_88243_20111024_193201_inLine +BABEL_BP_101_88243_20111024_193201_outLine +BABEL_BP_101_88294_20111026_023525_inLine +BABEL_BP_101_88294_20111026_023525_outLine +BABEL_BP_101_88464_20111119_194433_inLine +BABEL_BP_101_88464_20111119_194433_outLine +BABEL_BP_101_88506_20111115_203514_inLine +BABEL_BP_101_88506_20111115_203514_outLine +BABEL_BP_101_88929_20111118_201818_inLine +BABEL_BP_101_88929_20111118_201818_outLine +BABEL_BP_101_89345_20111021_155741_inLine +BABEL_BP_101_89345_20111021_155741_outLine +BABEL_BP_101_89573_20111025_201747_inLine +BABEL_BP_101_89573_20111025_201747_outLine +BABEL_BP_101_89619_20111029_215743_inLine +BABEL_BP_101_89619_20111029_215743_outLine +BABEL_BP_101_89630_20111125_193140_inLine +BABEL_BP_101_89630_20111125_193140_outLine +BABEL_BP_101_89674_20111025_190234_inLine +BABEL_BP_101_89674_20111025_190234_outLine +BABEL_BP_101_89818_20111019_181821_inLine +BABEL_BP_101_89818_20111019_181821_outLine +BABEL_BP_101_89965_20111129_175314_inLine +BABEL_BP_101_89965_20111129_175314_outLine +BABEL_BP_101_90313_20111019_155232_inLine +BABEL_BP_101_90313_20111019_155232_outLine +BABEL_BP_101_90393_20111103_165919_inLine +BABEL_BP_101_90393_20111103_165919_outLine +BABEL_BP_101_90490_20111017_192604_inLine +BABEL_BP_101_90490_20111017_192604_outLine +BABEL_BP_101_90506_20111026_182007_inLine +BABEL_BP_101_90506_20111026_182007_outLine +BABEL_BP_101_90511_20111024_132449_inLine +BABEL_BP_101_90511_20111024_132449_outLine +BABEL_BP_101_90577_20111014_144604_inLine +BABEL_BP_101_90577_20111014_144605_outLine +BABEL_BP_101_90730_20111025_154632_inLine +BABEL_BP_101_90730_20111025_154632_outLine +BABEL_BP_101_90819_20111126_173557_inLine +BABEL_BP_101_90819_20111126_173557_outLine +BABEL_BP_101_90890_20111018_143525_inLine +BABEL_BP_101_90890_20111018_143526_outLine +BABEL_BP_101_90960_20111024_132656_outLine +BABEL_BP_101_91007_20111203_160119_inLine +BABEL_BP_101_91007_20111203_160119_outLine +BABEL_BP_101_91358_20111207_171552_inLine +BABEL_BP_101_91358_20111207_171552_outLine +BABEL_BP_101_91401_20111028_174554_inLine +BABEL_BP_101_91401_20111028_174554_outLine +BABEL_BP_101_91406_20111114_221433_inLine +BABEL_BP_101_91406_20111114_221433_outLine +BABEL_BP_101_91409_20111023_181828_inLine +BABEL_BP_101_91409_20111023_181828_outLine +BABEL_BP_101_91481_20111124_213929_inLine +BABEL_BP_101_91481_20111124_213929_outLine +BABEL_BP_101_91491_20111021_160657_outLine +BABEL_BP_101_91668_20111127_133044_inLine +BABEL_BP_101_91668_20111127_133044_outLine +BABEL_BP_101_91777_20111025_195108_outLine +BABEL_BP_101_91873_20111129_214832_inLine +BABEL_BP_101_91873_20111129_214832_outLine +BABEL_BP_101_91905_20111120_211325_inLine +BABEL_BP_101_91905_20111120_211325_outLine +BABEL_BP_101_91979_20111019_195336_inLine +BABEL_BP_101_91979_20111019_195336_outLine +BABEL_BP_101_92321_20111125_152246_inLine +BABEL_BP_101_92321_20111125_152246_outLine +BABEL_BP_101_92436_20111024_214516_inLine +BABEL_BP_101_92436_20111024_214516_outLine +BABEL_BP_101_92560_20111025_142040_outLine +BABEL_BP_101_92628_20111102_131604_inLine +BABEL_BP_101_92628_20111102_131604_outLine +BABEL_BP_101_92642_20111025_170509_inLine +BABEL_BP_101_92642_20111025_170509_outLine +BABEL_BP_101_92735_20111024_171657_inLine +BABEL_BP_101_92735_20111024_171658_outLine +BABEL_BP_101_92800_20111030_222032_inLine +BABEL_BP_101_92800_20111030_222032_outLine +BABEL_BP_101_93004_20111121_190213_inLine +BABEL_BP_101_93004_20111121_190213_outLine +BABEL_BP_101_93091_20111022_191333_inLine +BABEL_BP_101_93091_20111022_191333_outLine +BABEL_BP_101_93151_20111023_184643_inLine +BABEL_BP_101_93151_20111023_184644_outLine +BABEL_BP_101_93192_20111020_130226_inLine +BABEL_BP_101_93192_20111020_130226_outLine +BABEL_BP_101_93454_20111027_014223_inLine +BABEL_BP_101_93454_20111027_014223_outLine +BABEL_BP_101_93597_20111020_195543_outLine +BABEL_BP_101_93643_20111021_154435_inLine +BABEL_BP_101_93643_20111021_154435_outLine +BABEL_BP_101_94149_20111027_125107_inLine +BABEL_BP_101_94149_20111027_125107_outLine +BABEL_BP_101_94222_20111021_144043_outLine +BABEL_BP_101_94223_20111026_220859_inLine +BABEL_BP_101_94223_20111026_220859_outLine +BABEL_BP_101_94226_20111125_140433_inLine +BABEL_BP_101_94226_20111125_140433_outLine +BABEL_BP_101_94514_20111127_130706_inLine +BABEL_BP_101_94514_20111127_130706_outLine +BABEL_BP_101_94696_20111203_191827_inLine +BABEL_BP_101_94696_20111203_191827_outLine +BABEL_BP_101_94989_20111028_152522_inLine +BABEL_BP_101_94989_20111028_152522_outLine +BABEL_BP_101_95034_20111126_193931_inLine +BABEL_BP_101_95034_20111126_193931_outLine +BABEL_BP_101_95423_20111116_164510_inLine +BABEL_BP_101_95423_20111116_164510_outLine +BABEL_BP_101_95533_20111129_183735_inLine +BABEL_BP_101_95533_20111129_183735_outLine +BABEL_BP_101_95542_20111026_190957_inLine +BABEL_BP_101_95542_20111026_190957_outLine +BABEL_BP_101_95589_20111118_214545_inLine +BABEL_BP_101_95589_20111118_214545_outLine +BABEL_BP_101_95650_20111019_144529_inLine +BABEL_BP_101_95650_20111019_144529_outLine +BABEL_BP_101_95815_20111024_155626_inLine +BABEL_BP_101_95815_20111024_155626_outLine +BABEL_BP_101_96216_20111021_181529_inLine +BABEL_BP_101_96216_20111021_181529_outLine +BABEL_BP_101_96283_20111115_154603_inLine +BABEL_BP_101_96283_20111115_154603_outLine +BABEL_BP_101_96322_20111031_190734_inLine +BABEL_BP_101_96322_20111031_190734_outLine +BABEL_BP_101_96347_20111024_201758_inLine +BABEL_BP_101_96347_20111024_201758_outLine +BABEL_BP_101_96438_20111125_195114_inLine +BABEL_BP_101_96438_20111125_195114_outLine +BABEL_BP_101_96630_20111104_005203_inLine +BABEL_BP_101_96630_20111104_005203_outLine +BABEL_BP_101_97274_20111023_151720_inLine +BABEL_BP_101_97274_20111023_151720_outLine +BABEL_BP_101_97405_20111019_151334_inLine +BABEL_BP_101_97405_20111019_151334_outLine +BABEL_BP_101_97629_20111130_000852_inLine +BABEL_BP_101_97629_20111130_000852_outLine +BABEL_BP_101_97650_20111126_144341_inLine +BABEL_BP_101_97650_20111126_144341_outLine +BABEL_BP_101_98086_20111129_161539_inLine +BABEL_BP_101_98086_20111129_161539_outLine +BABEL_BP_101_98099_20111120_130108_inLine +BABEL_BP_101_98099_20111120_130108_outLine +BABEL_BP_101_98219_20111125_155849_inLine +BABEL_BP_101_98219_20111125_155849_outLine +BABEL_BP_101_98279_20111122_195453_inLine +BABEL_BP_101_98279_20111122_195453_outLine +BABEL_BP_101_98345_20111020_205712_outLine +BABEL_BP_101_98467_20111020_152253_inLine +BABEL_BP_101_98467_20111020_152253_outLine +BABEL_BP_101_98476_20111114_220758_inLine +BABEL_BP_101_98476_20111114_220758_outLine +BABEL_BP_101_99061_20111020_183348_outLine +BABEL_BP_101_99562_20111205_235804_inLine +BABEL_BP_101_99562_20111205_235804_outLine +BABEL_BP_101_99571_20111024_164204_inLine +BABEL_BP_101_99571_20111024_164204_outLine +BABEL_BP_101_99856_20111125_184505_inLine +BABEL_BP_101_99856_20111125_184505_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/train.LimitedLP.list b/egs/babel/s5d/conf/lists/101-cantonese/train.LimitedLP.list new file mode 100644 index 00000000000..84f6e984f4b --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/train.LimitedLP.list @@ -0,0 +1,120 @@ +BABEL_BP_101_11694_20111204_205320_inLine +BABEL_BP_101_11694_20111204_205320_outLine +BABEL_BP_101_14054_20111119_163712_inLine +BABEL_BP_101_14054_20111119_163712_outLine +BABEL_BP_101_14729_20111114_200940_inLine +BABEL_BP_101_14729_20111114_200940_outLine +BABEL_BP_101_15873_20111027_121806_inLine +BABEL_BP_101_15873_20111027_121806_outLine +BABEL_BP_101_16617_20111030_144124_inLine +BABEL_BP_101_16617_20111030_144124_outLine +BABEL_BP_101_16883_20111122_184255_inLine +BABEL_BP_101_16883_20111122_184255_outLine +BABEL_BP_101_17933_20111120_204846_inLine +BABEL_BP_101_17933_20111120_204846_outLine +BABEL_BP_101_19012_20111122_173413_inLine +BABEL_BP_101_19012_20111122_173413_outLine +BABEL_BP_101_20408_20111101_210200_inLine +BABEL_BP_101_20408_20111101_210200_outLine +BABEL_BP_101_21367_20111126_132150_inLine +BABEL_BP_101_21367_20111126_132150_outLine +BABEL_BP_101_21946_20111122_150655_inLine +BABEL_BP_101_21946_20111122_150655_outLine +BABEL_BP_101_22979_20111129_142742_inLine +BABEL_BP_101_22979_20111129_142742_outLine +BABEL_BP_101_23168_20111120_192134_inLine +BABEL_BP_101_23168_20111120_192134_outLine +BABEL_BP_101_23571_20111128_232031_inLine +BABEL_BP_101_23571_20111128_232031_outLine +BABEL_BP_101_28204_20111025_133714_inLine +BABEL_BP_101_28204_20111025_133714_outLine +BABEL_BP_101_36424_20111119_145307_inLine +BABEL_BP_101_36424_20111119_145307_outLine +BABEL_BP_101_37285_20111028_003951_inLine +BABEL_BP_101_37285_20111028_003951_outLine +BABEL_BP_101_38108_20111125_153427_inLine +BABEL_BP_101_38108_20111125_153427_outLine +BABEL_BP_101_38879_20111029_193700_inLine +BABEL_BP_101_38879_20111029_193701_outLine +BABEL_BP_101_40123_20111129_182232_inLine +BABEL_BP_101_40123_20111129_182232_outLine +BABEL_BP_101_40439_20111203_182814_inLine +BABEL_BP_101_40439_20111203_182814_outLine +BABEL_BP_101_42145_20111117_131023_inLine +BABEL_BP_101_42145_20111117_131023_outLine +BABEL_BP_101_44836_20111119_154154_inLine +BABEL_BP_101_44836_20111119_154154_outLine +BABEL_BP_101_46332_20111103_181337_inLine +BABEL_BP_101_46332_20111103_181337_outLine +BABEL_BP_101_50409_20111204_161529_inLine +BABEL_BP_101_50409_20111204_161529_outLine +BABEL_BP_101_50476_20111130_010429_inLine +BABEL_BP_101_50476_20111130_010429_outLine +BABEL_BP_101_53985_20111027_134232_inLine +BABEL_BP_101_53985_20111027_134232_outLine +BABEL_BP_101_54787_20111027_003335_inLine +BABEL_BP_101_54787_20111027_003335_outLine +BABEL_BP_101_56648_20111126_183128_inLine +BABEL_BP_101_56648_20111126_183128_outLine +BABEL_BP_101_58190_20111124_203150_inLine +BABEL_BP_101_58190_20111124_203150_outLine +BABEL_BP_101_58357_20111122_155154_inLine +BABEL_BP_101_58357_20111122_155154_outLine +BABEL_BP_101_59028_20111130_201120_inLine +BABEL_BP_101_59028_20111130_201120_outLine +BABEL_BP_101_59925_20111203_131501_inLine +BABEL_BP_101_59925_20111203_131501_outLine +BABEL_BP_101_63459_20111120_165000_inLine +BABEL_BP_101_63459_20111120_165000_outLine +BABEL_BP_101_66839_20111120_192904_inLine +BABEL_BP_101_66839_20111120_192904_outLine +BABEL_BP_101_66964_20111117_173710_inLine +BABEL_BP_101_66964_20111117_173710_outLine +BABEL_BP_101_67798_20111104_013951_inLine +BABEL_BP_101_67798_20111104_013951_outLine +BABEL_BP_101_68129_20111120_133854_inLine +BABEL_BP_101_68129_20111120_133854_outLine +BABEL_BP_101_69049_20111102_140355_inLine +BABEL_BP_101_69049_20111102_140355_outLine +BABEL_BP_101_74395_20111117_135831_inLine +BABEL_BP_101_74395_20111117_135831_outLine +BABEL_BP_101_76944_20111114_210715_inLine +BABEL_BP_101_76944_20111114_210715_outLine +BABEL_BP_101_77244_20111127_021035_inLine +BABEL_BP_101_77244_20111127_021035_outLine +BABEL_BP_101_78879_20111102_133430_inLine +BABEL_BP_101_78879_20111102_133430_outLine +BABEL_BP_101_80008_20111031_180815_inLine +BABEL_BP_101_80008_20111031_180815_outLine +BABEL_BP_101_80535_20111206_193024_inLine +BABEL_BP_101_80535_20111206_193024_outLine +BABEL_BP_101_81486_20111027_163851_inLine +BABEL_BP_101_81486_20111027_163851_outLine +BABEL_BP_101_82217_20111115_191956_inLine +BABEL_BP_101_82217_20111115_191956_outLine +BABEL_BP_101_86016_20111118_140325_inLine +BABEL_BP_101_86016_20111118_140326_outLine +BABEL_BP_101_88464_20111119_194433_inLine +BABEL_BP_101_88464_20111119_194433_outLine +BABEL_BP_101_91358_20111207_171552_inLine +BABEL_BP_101_91358_20111207_171552_outLine +BABEL_BP_101_91406_20111114_221433_inLine +BABEL_BP_101_91406_20111114_221433_outLine +BABEL_BP_101_92321_20111125_152246_inLine +BABEL_BP_101_92321_20111125_152246_outLine +BABEL_BP_101_92628_20111102_131604_inLine +BABEL_BP_101_92628_20111102_131604_outLine +BABEL_BP_101_94696_20111203_191827_inLine +BABEL_BP_101_94696_20111203_191827_outLine +BABEL_BP_101_94989_20111028_152522_inLine +BABEL_BP_101_94989_20111028_152522_outLine +BABEL_BP_101_95542_20111026_190957_inLine +BABEL_BP_101_95542_20111026_190957_outLine +BABEL_BP_101_96438_20111125_195114_inLine +BABEL_BP_101_96438_20111125_195114_outLine +BABEL_BP_101_96630_20111104_005203_inLine +BABEL_BP_101_96630_20111104_005203_outLine +BABEL_BP_101_98086_20111129_161539_inLine +BABEL_BP_101_98086_20111129_161539_outLine +BABEL_BP_101_98219_20111125_155849_inLine +BABEL_BP_101_98219_20111125_155849_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/dev.list b/egs/babel/s5d/conf/lists/102-assamese/dev.list new file mode 100644 index 00000000000..044d46cc85a --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/dev.list @@ -0,0 +1,126 @@ +BABEL_OP1_102_10408_20121105_223454_inLine +BABEL_OP1_102_10408_20121105_223454_outLine +BABEL_OP1_102_10925_20120329_192327_inLine +BABEL_OP1_102_10925_20120329_192327_outLine +BABEL_OP1_102_13450_20120421_200138_inLine +BABEL_OP1_102_13450_20120421_200138_outLine +BABEL_OP1_102_13879_20121112_220931_inLine +BABEL_OP1_102_13879_20121112_220931_outLine +BABEL_OP1_102_17900_20120331_195842_inLine +BABEL_OP1_102_17900_20120331_195842_outLine +BABEL_OP1_102_18672_20120614_212320_inLine +BABEL_OP1_102_18672_20120614_212320_outLine +BABEL_OP1_102_20518_20120618_155945_inLine +BABEL_OP1_102_20518_20120618_155945_outLine +BABEL_OP1_102_21370_20120410_231048_inLine +BABEL_OP1_102_21370_20120410_231048_outLine +BABEL_OP1_102_25502_20120404_190523_inLine +BABEL_OP1_102_25502_20120404_190523_outLine +BABEL_OP1_102_27178_20120409_211226_inLine +BABEL_OP1_102_27178_20120409_211226_outLine +BABEL_OP1_102_27698_20120328_165641_inLine +BABEL_OP1_102_27698_20120328_165641_outLine +BABEL_OP1_102_29988_20120805_160211_inLine +BABEL_OP1_102_29988_20120805_160211_outLine +BABEL_OP1_102_31345_20121010_194432_inLine +BABEL_OP1_102_31345_20121010_194432_outLine +BABEL_OP1_102_31345_20121010_195905_inLine +BABEL_OP1_102_31345_20121010_195905_outLine +BABEL_OP1_102_32962_20120427_215011_inLine +BABEL_OP1_102_32962_20120427_215011_outLine +BABEL_OP1_102_33704_20130204_172729_inLine +BABEL_OP1_102_33704_20130204_172729_outLine +BABEL_OP1_102_33969_20130123_165132_inLine +BABEL_OP1_102_33969_20130123_165132_outLine +BABEL_OP1_102_34446_20120426_194557_inLine +BABEL_OP1_102_34446_20120426_194557_outLine +BABEL_OP1_102_34446_20120426_195519_inLine +BABEL_OP1_102_34446_20120426_195519_outLine +BABEL_OP1_102_35470_20130122_212719_inLine +BABEL_OP1_102_35470_20130122_212719_outLine +BABEL_OP1_102_36391_20130127_213459_inLine +BABEL_OP1_102_36391_20130127_213459_outLine +BABEL_OP1_102_38879_20120410_224941_inLine +BABEL_OP1_102_38879_20120410_224941_outLine +BABEL_OP1_102_40385_20121224_164959_inLine +BABEL_OP1_102_40385_20121224_164959_outLine +BABEL_OP1_102_41989_20120410_220614_inLine +BABEL_OP1_102_41989_20120410_220614_outLine +BABEL_OP1_102_43587_20120607_204145_inLine +BABEL_OP1_102_43587_20120607_204145_outLine +BABEL_OP1_102_45106_20120318_191747_inLine +BABEL_OP1_102_45106_20120318_191747_outLine +BABEL_OP1_102_45678_20120328_224850_inLine +BABEL_OP1_102_45678_20120328_224850_outLine +BABEL_OP1_102_45786_20121016_025157_inLine +BABEL_OP1_102_45786_20121016_025157_outLine +BABEL_OP1_102_46593_20121010_023019_inLine +BABEL_OP1_102_46593_20121010_023019_outLine +BABEL_OP1_102_47429_20130121_172000_inLine +BABEL_OP1_102_47429_20130121_172000_outLine +BABEL_OP1_102_47469_20120411_181423_inLine +BABEL_OP1_102_47469_20120411_181423_outLine +BABEL_OP1_102_48812_20120420_004425_inLine +BABEL_OP1_102_48812_20120420_004425_outLine +BABEL_OP1_102_49351_20121214_224227_inLine +BABEL_OP1_102_49351_20121214_224227_outLine +BABEL_OP1_102_50589_20120401_163239_inLine +BABEL_OP1_102_50589_20120401_163239_outLine +BABEL_OP1_102_53179_20121009_225324_inLine +BABEL_OP1_102_53179_20121009_225324_outLine +BABEL_OP1_102_54358_20120908_182858_inLine +BABEL_OP1_102_54358_20120908_182858_outLine +BABEL_OP1_102_54785_20120928_184426_inLine +BABEL_OP1_102_54785_20120928_184426_outLine +BABEL_OP1_102_55355_20121007_010642_inLine +BABEL_OP1_102_55355_20121007_010642_outLine +BABEL_OP1_102_56868_20120410_224604_inLine +BABEL_OP1_102_56868_20120410_224604_outLine +BABEL_OP1_102_59544_20120401_222134_inLine +BABEL_OP1_102_59544_20120401_222134_outLine +BABEL_OP1_102_59746_20120820_014637_inLine +BABEL_OP1_102_59746_20120820_014637_outLine +BABEL_OP1_102_62160_20120422_220826_inLine +BABEL_OP1_102_62160_20120422_220826_outLine +BABEL_OP1_102_64661_20120422_194219_inLine +BABEL_OP1_102_64661_20120422_194219_outLine +BABEL_OP1_102_64880_20121107_190955_inLine +BABEL_OP1_102_64880_20121107_190955_outLine +BABEL_OP1_102_66103_20121006_184826_inLine +BABEL_OP1_102_66103_20121006_184826_outLine +BABEL_OP1_102_66668_20120409_185702_inLine +BABEL_OP1_102_66668_20120409_185702_outLine +BABEL_OP1_102_68706_20120412_221059_inLine +BABEL_OP1_102_68706_20120412_221100_outLine +BABEL_OP1_102_69052_20120506_162432_inLine +BABEL_OP1_102_69052_20120506_162432_outLine +BABEL_OP1_102_70643_20121108_030513_inLine +BABEL_OP1_102_70643_20121108_030513_outLine +BABEL_OP1_102_73122_20120427_225442_inLine +BABEL_OP1_102_73122_20120427_225442_outLine +BABEL_OP1_102_73122_20120427_230538_inLine +BABEL_OP1_102_73122_20120427_230538_outLine +BABEL_OP1_102_77886_20120407_215452_inLine +BABEL_OP1_102_77886_20120407_215452_outLine +BABEL_OP1_102_79519_20121008_214502_inLine +BABEL_OP1_102_79519_20121008_214502_outLine +BABEL_OP1_102_80856_20120423_184225_inLine +BABEL_OP1_102_80856_20120423_184225_outLine +BABEL_OP1_102_84042_20120806_194540_inLine +BABEL_OP1_102_84042_20120806_194540_outLine +BABEL_OP1_102_84532_20121222_152400_inLine +BABEL_OP1_102_84532_20121222_152400_outLine +BABEL_OP1_102_84700_20130104_162152_inLine +BABEL_OP1_102_84700_20130104_162152_outLine +BABEL_OP1_102_86305_20120408_170901_inLine +BABEL_OP1_102_86305_20120408_170901_outLine +BABEL_OP1_102_87671_20120401_172420_inLine +BABEL_OP1_102_87671_20120401_172420_outLine +BABEL_OP1_102_87885_20121113_193407_inLine +BABEL_OP1_102_87885_20121113_193407_outLine +BABEL_OP1_102_88245_20121010_173153_inLine +BABEL_OP1_102_88245_20121010_173153_outLine +BABEL_OP1_102_88464_20120612_191239_inLine +BABEL_OP1_102_88464_20120612_191239_outLine +BABEL_OP1_102_90313_20120407_173340_inLine +BABEL_OP1_102_90313_20120407_173340_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/eval.list b/egs/babel/s5d/conf/lists/102-assamese/eval.list new file mode 100644 index 00000000000..f9c825384ea --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/eval.list @@ -0,0 +1,189 @@ +BABEL_OP1_102_10033_20120330_194952_inLine +BABEL_OP1_102_10033_20120330_194952_outLine +BABEL_OP1_102_11824_20120425_195521_inLine +BABEL_OP1_102_11824_20120425_195521_outLine +BABEL_OP1_102_13635_20121106_201907_inLine +BABEL_OP1_102_13635_20121106_201907_outLine +BABEL_OP1_102_14075_20120729_184929_inLine +BABEL_OP1_102_14075_20120729_184929_outLine +BABEL_OP1_102_16875_20121224_191839_inLine +BABEL_OP1_102_16875_20121224_191839_outLine +BABEL_OP1_102_16984_20120817_222744_inLine +BABEL_OP1_102_16984_20120817_222744_outLine +BABEL_OP1_102_18648_20121220_162525_inLine +BABEL_OP1_102_18648_20121220_162525_outLine +BABEL_OP1_102_18858_20120328_182337_inLine +BABEL_OP1_102_18858_20120328_182337_outLine +BABEL_OP1_102_19479_20130126_224027_inLine +BABEL_OP1_102_19479_20130126_224027_outLine +BABEL_OP1_102_20483_20120427_223135_inLine +BABEL_OP1_102_20483_20120427_223135_outLine +BABEL_OP1_102_20685_20120327_193652_inLine +BABEL_OP1_102_20685_20120327_193652_outLine +BABEL_OP1_102_22566_20121106_194723_inLine +BABEL_OP1_102_22566_20121106_194723_outLine +BABEL_OP1_102_24379_20120928_162955_inLine +BABEL_OP1_102_24379_20120928_162955_outLine +BABEL_OP1_102_27363_20121106_193315_inLine +BABEL_OP1_102_27363_20121106_193315_outLine +BABEL_OP1_102_27645_20121001_010501_inLine +BABEL_OP1_102_27645_20121001_010501_outLine +BABEL_OP1_102_28754_20130128_193759_inLine +BABEL_OP1_102_28754_20130128_193759_outLine +BABEL_OP1_102_28754_20130128_194940_inLine +BABEL_OP1_102_28754_20130128_194940_outLine +BABEL_OP1_102_28768_20121219_231954_inLine +BABEL_OP1_102_28768_20121219_231954_outLine +BABEL_OP1_102_29268_20120410_182212_inLine +BABEL_OP1_102_29268_20120410_182212_outLine +BABEL_OP1_102_29290_20120408_172044_inLine +BABEL_OP1_102_29290_20120408_172044_outLine +BABEL_OP1_102_30210_20121104_182918_outLine +BABEL_OP1_102_32452_20120427_181559_inLine +BABEL_OP1_102_32452_20120427_181559_outLine +BABEL_OP1_102_32452_20120427_183038_inLine +BABEL_OP1_102_32452_20120427_183038_outLine +BABEL_OP1_102_32890_20121114_200236_inLine +BABEL_OP1_102_32890_20121114_200236_outLine +BABEL_OP1_102_34439_20121106_033220_inLine +BABEL_OP1_102_34439_20121106_033220_outLine +BABEL_OP1_102_39915_20130126_231519_inLine +BABEL_OP1_102_39915_20130126_231519_outLine +BABEL_OP1_102_41590_20121114_173839_inLine +BABEL_OP1_102_41590_20121114_173839_outLine +BABEL_OP1_102_42212_20121108_215733_inLine +BABEL_OP1_102_42212_20121108_215733_outLine +BABEL_OP1_102_42768_20120719_001335_inLine +BABEL_OP1_102_42768_20120719_001335_outLine +BABEL_OP1_102_44369_20121104_184516_inLine +BABEL_OP1_102_44369_20121104_184516_outLine +BABEL_OP1_102_44827_20130127_025842_inLine +BABEL_OP1_102_44827_20130127_025842_outLine +BABEL_OP1_102_45472_20120328_164753_inLine +BABEL_OP1_102_45472_20120328_164753_outLine +BABEL_OP1_102_45570_20120716_014312_inLine +BABEL_OP1_102_45570_20120716_014312_outLine +BABEL_OP1_102_46409_20130127_020220_inLine +BABEL_OP1_102_46409_20130127_020220_outLine +BABEL_OP1_102_46427_20120623_181054_inLine +BABEL_OP1_102_46427_20120623_181054_outLine +BABEL_OP1_102_46813_20120722_222747_inLine +BABEL_OP1_102_46813_20120722_222747_outLine +BABEL_OP1_102_46950_20130128_024910_inLine +BABEL_OP1_102_46950_20130128_024910_outLine +BABEL_OP1_102_47249_20121110_184344_inLine +BABEL_OP1_102_47249_20121110_184344_outLine +BABEL_OP1_102_48072_20120405_174716_inLine +BABEL_OP1_102_48072_20120405_174716_outLine +BABEL_OP1_102_48188_20121114_175337_inLine +BABEL_OP1_102_48188_20121114_175337_outLine +BABEL_OP1_102_48191_20121222_233713_inLine +BABEL_OP1_102_48191_20121222_233713_outLine +BABEL_OP1_102_48404_20121223_171643_inLine +BABEL_OP1_102_48404_20121223_171643_outLine +BABEL_OP1_102_49020_20121114_165007_inLine +BABEL_OP1_102_49020_20121114_165007_outLine +BABEL_OP1_102_49306_20120807_210522_inLine +BABEL_OP1_102_49306_20120807_210522_outLine +BABEL_OP1_102_49476_20120623_191532_inLine +BABEL_OP1_102_49476_20120623_191532_outLine +BABEL_OP1_102_50915_20130127_185334_inLine +BABEL_OP1_102_50915_20130127_185334_outLine +BABEL_OP1_102_51374_20120328_232452_inLine +BABEL_OP1_102_51374_20120328_232452_outLine +BABEL_OP1_102_51791_20120729_183323_inLine +BABEL_OP1_102_51791_20120729_183323_outLine +BABEL_OP1_102_53866_20120401_203758_inLine +BABEL_OP1_102_53866_20120401_203758_outLine +BABEL_OP1_102_54315_20120420_202214_inLine +BABEL_OP1_102_54315_20120420_202214_outLine +BABEL_OP1_102_55144_20120418_220307_inLine +BABEL_OP1_102_55144_20120418_220307_outLine +BABEL_OP1_102_55369_20120331_183350_inLine +BABEL_OP1_102_55369_20120331_183350_outLine +BABEL_OP1_102_55369_20120331_184706_inLine +BABEL_OP1_102_55369_20120331_184706_outLine +BABEL_OP1_102_55678_20120411_170804_inLine +BABEL_OP1_102_55678_20120411_170804_outLine +BABEL_OP1_102_57071_20120806_181947_inLine +BABEL_OP1_102_57071_20120806_181947_outLine +BABEL_OP1_102_57551_20120423_192651_inLine +BABEL_OP1_102_57551_20120423_192651_outLine +BABEL_OP1_102_57609_20121003_192352_inLine +BABEL_OP1_102_57609_20121003_192352_outLine +BABEL_OP1_102_57625_20121002_011432_inLine +BABEL_OP1_102_57625_20121002_011432_outLine +BABEL_OP1_102_57637_20130127_030012_inLine +BABEL_OP1_102_57637_20130127_030012_outLine +BABEL_OP1_102_59147_20120329_204323_inLine +BABEL_OP1_102_59147_20120329_204323_outLine +BABEL_OP1_102_65783_20130127_014613_inLine +BABEL_OP1_102_65783_20130127_014613_outLine +BABEL_OP1_102_66798_20120401_215538_inLine +BABEL_OP1_102_66798_20120401_215538_outLine +BABEL_OP1_102_67555_20120401_162516_inLine +BABEL_OP1_102_67555_20120401_162516_outLine +BABEL_OP1_102_68028_20121014_031021_inLine +BABEL_OP1_102_68028_20121014_031021_outLine +BABEL_OP1_102_68136_20130127_022217_inLine +BABEL_OP1_102_68136_20130127_022217_outLine +BABEL_OP1_102_69473_20121104_215944_inLine +BABEL_OP1_102_69473_20121104_215944_outLine +BABEL_OP1_102_70906_20121104_210914_inLine +BABEL_OP1_102_70906_20121104_210914_outLine +BABEL_OP1_102_70975_20130126_220855_inLine +BABEL_OP1_102_70975_20130126_220855_outLine +BABEL_OP1_102_73205_20120409_210950_inLine +BABEL_OP1_102_73205_20120409_210950_outLine +BABEL_OP1_102_74062_20121225_190622_inLine +BABEL_OP1_102_74062_20121225_190622_outLine +BABEL_OP1_102_74607_20120425_221930_inLine +BABEL_OP1_102_74607_20120425_221930_outLine +BABEL_OP1_102_75020_20120328_234502_inLine +BABEL_OP1_102_75020_20120328_234502_outLine +BABEL_OP1_102_76333_20130127_032712_inLine +BABEL_OP1_102_76333_20130127_032712_outLine +BABEL_OP1_102_76372_20121112_041800_inLine +BABEL_OP1_102_76372_20121112_041800_outLine +BABEL_OP1_102_76763_20120330_231328_inLine +BABEL_OP1_102_76763_20120330_231328_outLine +BABEL_OP1_102_76878_20121112_041639_inLine +BABEL_OP1_102_76878_20121112_041639_outLine +BABEL_OP1_102_76925_20130127_021046_inLine +BABEL_OP1_102_76925_20130127_021046_outLine +BABEL_OP1_102_77584_20121114_173809_inLine +BABEL_OP1_102_77584_20121114_173809_outLine +BABEL_OP1_102_77965_20120327_195119_inLine +BABEL_OP1_102_77965_20120327_195119_outLine +BABEL_OP1_102_78245_20120421_181224_inLine +BABEL_OP1_102_78245_20120421_181224_outLine +BABEL_OP1_102_78728_20120430_194848_inLine +BABEL_OP1_102_78728_20120430_194848_outLine +BABEL_OP1_102_81944_20121112_011411_inLine +BABEL_OP1_102_81944_20121112_011411_outLine +BABEL_OP1_102_83053_20130209_201738_inLine +BABEL_OP1_102_83053_20130209_201738_outLine +BABEL_OP1_102_83053_20130209_224536_inLine +BABEL_OP1_102_83053_20130209_224536_outLine +BABEL_OP1_102_83362_20120419_230220_inLine +BABEL_OP1_102_83362_20120419_230220_outLine +BABEL_OP1_102_83585_20120428_191954_inLine +BABEL_OP1_102_83585_20120428_191954_outLine +BABEL_OP1_102_86014_20120607_010221_inLine +BABEL_OP1_102_86014_20120607_010221_outLine +BABEL_OP1_102_88385_20121226_173154_inLine +BABEL_OP1_102_88385_20121226_173154_outLine +BABEL_OP1_102_88932_20120428_164025_inLine +BABEL_OP1_102_88932_20120428_164025_outLine +BABEL_OP1_102_89301_20120927_001102_inLine +BABEL_OP1_102_89301_20120927_001102_outLine +BABEL_OP1_102_91660_20130123_181342_inLine +BABEL_OP1_102_91660_20130123_181342_outLine +BABEL_OP1_102_93000_20120426_203959_inLine +BABEL_OP1_102_93000_20120426_203959_outLine +BABEL_OP1_102_93454_20120331_220854_inLine +BABEL_OP1_102_93454_20120331_220854_outLine +BABEL_OP1_102_95572_20130128_023142_inLine +BABEL_OP1_102_95572_20130128_023142_outLine +BABEL_OP1_102_95952_20121111_182203_inLine +BABEL_OP1_102_95952_20121111_182203_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/evalpart1.list b/egs/babel/s5d/conf/lists/102-assamese/evalpart1.list new file mode 100644 index 00000000000..b6a7ec78017 --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/evalpart1.list @@ -0,0 +1,65 @@ +BABEL_OP1_102_11824_20120425_195521_inLine +BABEL_OP1_102_11824_20120425_195521_outLine +BABEL_OP1_102_16984_20120817_222744_inLine +BABEL_OP1_102_16984_20120817_222744_outLine +BABEL_OP1_102_18858_20120328_182337_inLine +BABEL_OP1_102_18858_20120328_182337_outLine +BABEL_OP1_102_20685_20120327_193652_inLine +BABEL_OP1_102_20685_20120327_193652_outLine +BABEL_OP1_102_22566_20121106_194723_inLine +BABEL_OP1_102_22566_20121106_194723_outLine +BABEL_OP1_102_24379_20120928_162955_inLine +BABEL_OP1_102_24379_20120928_162955_outLine +BABEL_OP1_102_27645_20121001_010501_inLine +BABEL_OP1_102_27645_20121001_010501_outLine +BABEL_OP1_102_28754_20130128_193759_inLine +BABEL_OP1_102_28754_20130128_193759_outLine +BABEL_OP1_102_28754_20130128_194940_inLine +BABEL_OP1_102_28754_20130128_194940_outLine +BABEL_OP1_102_28768_20121219_231954_inLine +BABEL_OP1_102_28768_20121219_231954_outLine +BABEL_OP1_102_29268_20120410_182212_inLine +BABEL_OP1_102_29268_20120410_182212_outLine +BABEL_OP1_102_30210_20121104_182918_outLine +BABEL_OP1_102_42768_20120719_001335_inLine +BABEL_OP1_102_42768_20120719_001335_outLine +BABEL_OP1_102_45570_20120716_014312_inLine +BABEL_OP1_102_45570_20120716_014312_outLine +BABEL_OP1_102_46427_20120623_181054_inLine +BABEL_OP1_102_46427_20120623_181054_outLine +BABEL_OP1_102_46813_20120722_222747_inLine +BABEL_OP1_102_46813_20120722_222747_outLine +BABEL_OP1_102_47249_20121110_184344_inLine +BABEL_OP1_102_47249_20121110_184344_outLine +BABEL_OP1_102_49476_20120623_191532_inLine +BABEL_OP1_102_49476_20120623_191532_outLine +BABEL_OP1_102_51791_20120729_183323_inLine +BABEL_OP1_102_51791_20120729_183323_outLine +BABEL_OP1_102_57551_20120423_192651_inLine +BABEL_OP1_102_57551_20120423_192651_outLine +BABEL_OP1_102_57625_20121002_011432_inLine +BABEL_OP1_102_57625_20121002_011432_outLine +BABEL_OP1_102_66798_20120401_215538_inLine +BABEL_OP1_102_66798_20120401_215538_outLine +BABEL_OP1_102_70906_20121104_210914_inLine +BABEL_OP1_102_70906_20121104_210914_outLine +BABEL_OP1_102_73205_20120409_210950_inLine +BABEL_OP1_102_73205_20120409_210950_outLine +BABEL_OP1_102_74062_20121225_190622_inLine +BABEL_OP1_102_74062_20121225_190622_outLine +BABEL_OP1_102_78245_20120421_181224_inLine +BABEL_OP1_102_78245_20120421_181224_outLine +BABEL_OP1_102_81944_20121112_011411_inLine +BABEL_OP1_102_81944_20121112_011411_outLine +BABEL_OP1_102_83053_20130209_201738_inLine +BABEL_OP1_102_83053_20130209_201738_outLine +BABEL_OP1_102_83053_20130209_224536_inLine +BABEL_OP1_102_83053_20130209_224536_outLine +BABEL_OP1_102_83362_20120419_230220_inLine +BABEL_OP1_102_83362_20120419_230220_outLine +BABEL_OP1_102_83585_20120428_191954_inLine +BABEL_OP1_102_83585_20120428_191954_outLine +BABEL_OP1_102_93000_20120426_203959_inLine +BABEL_OP1_102_93000_20120426_203959_outLine +BABEL_OP1_102_93454_20120331_220854_inLine +BABEL_OP1_102_93454_20120331_220854_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.FullLP.list b/egs/babel/s5d/conf/lists/102-assamese/train.FullLP.list new file mode 100644 index 00000000000..4e388dab16c --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.FullLP.list @@ -0,0 +1,790 @@ +BABEL_OP1_102_10187_20120405_173448_inLine +BABEL_OP1_102_10187_20120405_173448_outLine +BABEL_OP1_102_10271_20120729_173749_inLine +BABEL_OP1_102_10271_20120729_173749_outLine +BABEL_OP1_102_10713_20120401_204236_inLine +BABEL_OP1_102_10713_20120401_204236_outLine +BABEL_OP1_102_11004_20120420_213442_inLine +BABEL_OP1_102_11004_20120420_213442_outLine +BABEL_OP1_102_11031_20120926_231829_inLine +BABEL_OP1_102_11031_20120926_231829_outLine +BABEL_OP1_102_11036_20120406_202335_inLine +BABEL_OP1_102_11036_20120406_202335_outLine +BABEL_OP1_102_11158_20121008_011850_inLine +BABEL_OP1_102_11158_20121008_011850_outLine +BABEL_OP1_102_11371_20120327_175933_inLine +BABEL_OP1_102_11371_20120327_175933_outLine +BABEL_OP1_102_11521_20121005_005530_inLine +BABEL_OP1_102_11521_20121005_005530_outLine +BABEL_OP1_102_11694_20121108_184639_inLine +BABEL_OP1_102_11694_20121108_184639_outLine +BABEL_OP1_102_12120_20121105_205527_inLine +BABEL_OP1_102_12120_20121105_205527_outLine +BABEL_OP1_102_12486_20121009_231421_inLine +BABEL_OP1_102_12486_20121009_231421_outLine +BABEL_OP1_102_12535_20121009_024245_inLine +BABEL_OP1_102_12535_20121009_024245_outLine +BABEL_OP1_102_12552_20120727_023454_inLine +BABEL_OP1_102_12552_20120727_023454_outLine +BABEL_OP1_102_12643_20121108_184648_inLine +BABEL_OP1_102_12643_20121108_184648_outLine +BABEL_OP1_102_12655_20120318_171708_inLine +BABEL_OP1_102_12655_20120318_171708_outLine +BABEL_OP1_102_12844_20120411_193813_inLine +BABEL_OP1_102_12844_20120411_193813_outLine +BABEL_OP1_102_13229_20130127_023814_inLine +BABEL_OP1_102_13229_20130127_023814_outLine +BABEL_OP1_102_13389_20120406_184440_inLine +BABEL_OP1_102_13389_20120406_184440_outLine +BABEL_OP1_102_13702_20130121_185149_inLine +BABEL_OP1_102_13702_20130121_185149_outLine +BABEL_OP1_102_13913_20120807_001423_inLine +BABEL_OP1_102_13913_20120807_001423_outLine +BABEL_OP1_102_14769_20120926_165746_inLine +BABEL_OP1_102_14769_20120926_165746_outLine +BABEL_OP1_102_14874_20120417_153112_inLine +BABEL_OP1_102_14874_20120417_153112_outLine +BABEL_OP1_102_14891_20121009_003232_inLine +BABEL_OP1_102_14891_20121009_003232_outLine +BABEL_OP1_102_15146_20120318_184752_inLine +BABEL_OP1_102_15146_20120318_184752_outLine +BABEL_OP1_102_15234_20121108_022333_inLine +BABEL_OP1_102_15234_20121108_022333_outLine +BABEL_OP1_102_15493_20130127_203044_inLine +BABEL_OP1_102_15493_20130127_203044_outLine +BABEL_OP1_102_15502_20120419_233859_inLine +BABEL_OP1_102_15502_20120419_233859_outLine +BABEL_OP1_102_15502_20120420_000213_inLine +BABEL_OP1_102_15502_20120420_000213_outLine +BABEL_OP1_102_15881_20120331_215830_inLine +BABEL_OP1_102_15881_20120331_215830_outLine +BABEL_OP1_102_15916_20120428_221806_inLine +BABEL_OP1_102_15916_20120428_221806_outLine +BABEL_OP1_102_16167_20130122_175936_inLine +BABEL_OP1_102_16167_20130122_175936_outLine +BABEL_OP1_102_16185_20121105_042129_inLine +BABEL_OP1_102_16185_20121105_042129_outLine +BABEL_OP1_102_16313_20120331_215132_inLine +BABEL_OP1_102_16313_20120331_215132_outLine +BABEL_OP1_102_16669_20120327_202211_inLine +BABEL_OP1_102_16669_20120327_202211_outLine +BABEL_OP1_102_17013_20121105_230820_inLine +BABEL_OP1_102_17013_20121105_230820_outLine +BABEL_OP1_102_17203_20121221_161532_inLine +BABEL_OP1_102_17203_20121221_161532_outLine +BABEL_OP1_102_17207_20120729_230128_inLine +BABEL_OP1_102_17207_20120729_230128_outLine +BABEL_OP1_102_17572_20120806_235812_inLine +BABEL_OP1_102_17572_20120806_235812_outLine +BABEL_OP1_102_17933_20120607_184111_inLine +BABEL_OP1_102_17933_20120607_184111_outLine +BABEL_OP1_102_18344_20121109_192858_inLine +BABEL_OP1_102_18344_20121109_192858_outLine +BABEL_OP1_102_18534_20121105_185859_inLine +BABEL_OP1_102_18534_20121105_185859_outLine +BABEL_OP1_102_18730_20130122_171244_inLine +BABEL_OP1_102_18730_20130122_171244_outLine +BABEL_OP1_102_18802_20121104_232940_inLine +BABEL_OP1_102_18802_20121104_232940_outLine +BABEL_OP1_102_19063_20130209_231415_inLine +BABEL_OP1_102_19063_20130209_231415_outLine +BABEL_OP1_102_19147_20120329_190609_inLine +BABEL_OP1_102_19147_20120329_190609_outLine +BABEL_OP1_102_19456_20121110_201037_inLine +BABEL_OP1_102_19456_20121110_201037_outLine +BABEL_OP1_102_19731_20130123_200845_inLine +BABEL_OP1_102_19731_20130123_200845_outLine +BABEL_OP1_102_19758_20120417_174950_inLine +BABEL_OP1_102_19758_20120417_174950_outLine +BABEL_OP1_102_19867_20130127_211111_inLine +BABEL_OP1_102_19867_20130127_211111_outLine +BABEL_OP1_102_20271_20120410_205746_inLine +BABEL_OP1_102_20271_20120410_205746_outLine +BABEL_OP1_102_20320_20120409_212129_inLine +BABEL_OP1_102_20320_20120409_212129_outLine +BABEL_OP1_102_20320_20120409_214042_inLine +BABEL_OP1_102_20320_20120409_214042_outLine +BABEL_OP1_102_20454_20121010_020017_inLine +BABEL_OP1_102_20454_20121010_020017_outLine +BABEL_OP1_102_20591_20120806_210212_inLine +BABEL_OP1_102_20591_20120806_210212_outLine +BABEL_OP1_102_21050_20120619_010126_inLine +BABEL_OP1_102_21050_20120619_010126_outLine +BABEL_OP1_102_21477_20120417_212152_inLine +BABEL_OP1_102_21477_20120417_212152_outLine +BABEL_OP1_102_21518_20120805_195607_inLine +BABEL_OP1_102_21518_20120805_195607_outLine +BABEL_OP1_102_21758_20120823_164553_inLine +BABEL_OP1_102_21758_20120823_164553_outLine +BABEL_OP1_102_21782_20120422_184156_inLine +BABEL_OP1_102_21782_20120422_184156_outLine +BABEL_OP1_102_22401_20121017_023338_inLine +BABEL_OP1_102_22401_20121017_023338_outLine +BABEL_OP1_102_22408_20120426_225012_inLine +BABEL_OP1_102_22408_20120426_225012_outLine +BABEL_OP1_102_23167_20120329_204718_inLine +BABEL_OP1_102_23167_20120329_204718_outLine +BABEL_OP1_102_24420_20120624_013709_inLine +BABEL_OP1_102_24420_20120624_013709_outLine +BABEL_OP1_102_24661_20121104_224032_inLine +BABEL_OP1_102_24661_20121104_224032_outLine +BABEL_OP1_102_24833_20120410_172706_inLine +BABEL_OP1_102_24833_20120410_172706_outLine +BABEL_OP1_102_25236_20120804_180700_inLine +BABEL_OP1_102_25236_20120804_180700_outLine +BABEL_OP1_102_25576_20120422_180912_inLine +BABEL_OP1_102_25576_20120422_180912_outLine +BABEL_OP1_102_25904_20120611_203203_inLine +BABEL_OP1_102_25904_20120611_203203_outLine +BABEL_OP1_102_25934_20120329_005438_inLine +BABEL_OP1_102_25934_20120329_005438_outLine +BABEL_OP1_102_26348_20121109_170513_inLine +BABEL_OP1_102_26348_20121109_170513_outLine +BABEL_OP1_102_27007_20120611_223823_inLine +BABEL_OP1_102_27007_20120611_223823_outLine +BABEL_OP1_102_27349_20120422_192337_inLine +BABEL_OP1_102_27349_20120422_192337_outLine +BABEL_OP1_102_27427_20120412_182452_inLine +BABEL_OP1_102_27427_20120412_182452_outLine +BABEL_OP1_102_27824_20120427_201104_inLine +BABEL_OP1_102_27824_20120427_201104_outLine +BABEL_OP1_102_27890_20121002_030324_inLine +BABEL_OP1_102_27890_20121002_030324_outLine +BABEL_OP1_102_28016_20120430_193141_inLine +BABEL_OP1_102_28016_20120430_193141_outLine +BABEL_OP1_102_28016_20120430_194530_inLine +BABEL_OP1_102_28016_20120430_194530_outLine +BABEL_OP1_102_28107_20120327_204144_inLine +BABEL_OP1_102_28107_20120327_204144_outLine +BABEL_OP1_102_28204_20120401_204624_inLine +BABEL_OP1_102_28204_20120401_204624_outLine +BABEL_OP1_102_28260_20120329_210829_inLine +BABEL_OP1_102_28260_20120329_210829_outLine +BABEL_OP1_102_28648_20120608_192702_inLine +BABEL_OP1_102_28648_20120608_192702_outLine +BABEL_OP1_102_29168_20120411_174248_inLine +BABEL_OP1_102_29168_20120411_174248_outLine +BABEL_OP1_102_29259_20120612_211621_inLine +BABEL_OP1_102_29259_20120612_211621_outLine +BABEL_OP1_102_29335_20120609_182335_inLine +BABEL_OP1_102_29335_20120609_182335_outLine +BABEL_OP1_102_29335_20120609_183151_inLine +BABEL_OP1_102_29335_20120609_183151_outLine +BABEL_OP1_102_29444_20120331_231513_inLine +BABEL_OP1_102_29444_20120331_231513_outLine +BABEL_OP1_102_29444_20120331_233317_inLine +BABEL_OP1_102_29444_20120331_233317_outLine +BABEL_OP1_102_29512_20120805_170123_inLine +BABEL_OP1_102_29512_20120805_170123_outLine +BABEL_OP1_102_29512_20120805_172610_inLine +BABEL_OP1_102_29512_20120805_172610_outLine +BABEL_OP1_102_29545_20121105_220136_inLine +BABEL_OP1_102_29545_20121105_220136_outLine +BABEL_OP1_102_29959_20130128_195931_inLine +BABEL_OP1_102_29959_20130128_195931_outLine +BABEL_OP1_102_29959_20130128_223813_inLine +BABEL_OP1_102_29959_20130128_223813_outLine +BABEL_OP1_102_30266_20120331_212330_inLine +BABEL_OP1_102_30266_20120331_212330_outLine +BABEL_OP1_102_30530_20120330_173152_inLine +BABEL_OP1_102_30530_20120330_173152_outLine +BABEL_OP1_102_30722_20121011_013755_inLine +BABEL_OP1_102_30722_20121011_013755_outLine +BABEL_OP1_102_31031_20120611_193208_inLine +BABEL_OP1_102_31031_20120611_193208_outLine +BABEL_OP1_102_31902_20120425_211816_inLine +BABEL_OP1_102_31902_20120425_211816_outLine +BABEL_OP1_102_31917_20120611_195339_inLine +BABEL_OP1_102_31917_20120611_195339_outLine +BABEL_OP1_102_32011_20121014_024351_inLine +BABEL_OP1_102_32011_20121014_024351_outLine +BABEL_OP1_102_32562_20121010_014014_inLine +BABEL_OP1_102_32562_20121010_014014_outLine +BABEL_OP1_102_32642_20121104_220528_inLine +BABEL_OP1_102_32642_20121104_220528_outLine +BABEL_OP1_102_33023_20120329_224858_inLine +BABEL_OP1_102_33023_20120329_224858_outLine +BABEL_OP1_102_33540_20120401_212225_inLine +BABEL_OP1_102_33540_20120401_212225_outLine +BABEL_OP1_102_33671_20120422_231219_inLine +BABEL_OP1_102_33671_20120422_231219_outLine +BABEL_OP1_102_34169_20120331_183840_inLine +BABEL_OP1_102_34169_20120331_183840_outLine +BABEL_OP1_102_34194_20120330_182542_inLine +BABEL_OP1_102_34194_20120330_182542_outLine +BABEL_OP1_102_34235_20120405_190745_inLine +BABEL_OP1_102_34235_20120405_190745_outLine +BABEL_OP1_102_34480_20121012_193452_inLine +BABEL_OP1_102_34480_20121012_193452_outLine +BABEL_OP1_102_34590_20120417_151435_inLine +BABEL_OP1_102_34590_20120417_151435_outLine +BABEL_OP1_102_34590_20120417_155556_inLine +BABEL_OP1_102_34590_20120417_155556_outLine +BABEL_OP1_102_34930_20120411_200043_inLine +BABEL_OP1_102_34930_20120411_200043_outLine +BABEL_OP1_102_35011_20120420_020024_inLine +BABEL_OP1_102_35011_20120420_020024_outLine +BABEL_OP1_102_35229_20121106_204019_inLine +BABEL_OP1_102_35229_20121106_204019_outLine +BABEL_OP1_102_35324_20120426_180016_inLine +BABEL_OP1_102_35324_20120426_180016_outLine +BABEL_OP1_102_35324_20120426_203214_inLine +BABEL_OP1_102_35324_20120426_203214_outLine +BABEL_OP1_102_35455_20121112_000231_inLine +BABEL_OP1_102_35455_20121112_000231_outLine +BABEL_OP1_102_36868_20130209_201544_inLine +BABEL_OP1_102_36868_20130209_201544_outLine +BABEL_OP1_102_37260_20120808_012733_inLine +BABEL_OP1_102_37260_20120808_012733_outLine +BABEL_OP1_102_37260_20120808_014150_inLine +BABEL_OP1_102_37260_20120808_014150_outLine +BABEL_OP1_102_37268_20121226_203217_inLine +BABEL_OP1_102_37268_20121226_203217_outLine +BABEL_OP1_102_37285_20120405_223443_inLine +BABEL_OP1_102_37285_20120405_223443_outLine +BABEL_OP1_102_37444_20130128_032426_inLine +BABEL_OP1_102_37444_20130128_032426_outLine +BABEL_OP1_102_37461_20120409_191629_inLine +BABEL_OP1_102_37461_20120409_191629_outLine +BABEL_OP1_102_37461_20120409_194138_inLine +BABEL_OP1_102_37461_20120409_194138_outLine +BABEL_OP1_102_37461_20120409_195519_inLine +BABEL_OP1_102_37461_20120409_195519_outLine +BABEL_OP1_102_37524_20120329_182549_inLine +BABEL_OP1_102_37524_20120329_182549_outLine +BABEL_OP1_102_38264_20121105_050622_inLine +BABEL_OP1_102_38264_20121105_050622_outLine +BABEL_OP1_102_38464_20121012_023702_inLine +BABEL_OP1_102_38464_20121012_023702_outLine +BABEL_OP1_102_38592_20121225_215825_inLine +BABEL_OP1_102_38592_20121225_215825_outLine +BABEL_OP1_102_38635_20120607_010931_inLine +BABEL_OP1_102_38635_20120607_010931_outLine +BABEL_OP1_102_38698_20120401_215032_inLine +BABEL_OP1_102_38698_20120401_215032_outLine +BABEL_OP1_102_38863_20121011_183009_inLine +BABEL_OP1_102_38863_20121011_183009_outLine +BABEL_OP1_102_38985_20120806_174824_inLine +BABEL_OP1_102_38985_20120806_174824_outLine +BABEL_OP1_102_38985_20120806_181000_inLine +BABEL_OP1_102_38985_20120806_181000_outLine +BABEL_OP1_102_39098_20120405_203729_inLine +BABEL_OP1_102_39098_20120405_203729_outLine +BABEL_OP1_102_39114_20120930_180045_inLine +BABEL_OP1_102_39114_20120930_180045_outLine +BABEL_OP1_102_39364_20121105_220855_inLine +BABEL_OP1_102_39364_20121105_220855_outLine +BABEL_OP1_102_39430_20120411_182026_inLine +BABEL_OP1_102_39430_20120411_182026_outLine +BABEL_OP1_102_39430_20120411_184729_inLine +BABEL_OP1_102_39430_20120411_184729_outLine +BABEL_OP1_102_40133_20121112_214034_inLine +BABEL_OP1_102_40133_20121112_214034_outLine +BABEL_OP1_102_40168_20120428_173400_inLine +BABEL_OP1_102_40168_20120428_173400_outLine +BABEL_OP1_102_40882_20130209_204142_inLine +BABEL_OP1_102_40882_20130209_204142_outLine +BABEL_OP1_102_41561_20121111_220752_inLine +BABEL_OP1_102_41561_20121111_220752_outLine +BABEL_OP1_102_41949_20120426_222144_inLine +BABEL_OP1_102_41949_20120426_222144_outLine +BABEL_OP1_102_42615_20120327_180819_inLine +BABEL_OP1_102_42615_20120327_180819_outLine +BABEL_OP1_102_42651_20120409_221530_inLine +BABEL_OP1_102_42651_20120409_221530_outLine +BABEL_OP1_102_42749_20121114_005458_inLine +BABEL_OP1_102_42749_20121114_005458_outLine +BABEL_OP1_102_42749_20121114_010754_inLine +BABEL_OP1_102_42749_20121114_010754_outLine +BABEL_OP1_102_43383_20120406_193121_inLine +BABEL_OP1_102_43383_20120406_193121_outLine +BABEL_OP1_102_43423_20120919_201131_inLine +BABEL_OP1_102_43423_20120919_201131_outLine +BABEL_OP1_102_43426_20120501_170331_inLine +BABEL_OP1_102_43426_20120501_170331_outLine +BABEL_OP1_102_43553_20120408_174809_inLine +BABEL_OP1_102_43553_20120408_174809_outLine +BABEL_OP1_102_43652_20120428_191659_inLine +BABEL_OP1_102_43652_20120428_191659_outLine +BABEL_OP1_102_44649_20120611_185930_inLine +BABEL_OP1_102_44649_20120611_185930_outLine +BABEL_OP1_102_44829_20120907_011054_inLine +BABEL_OP1_102_44829_20120907_011054_outLine +BABEL_OP1_102_44829_20120907_013730_inLine +BABEL_OP1_102_44829_20120907_013730_outLine +BABEL_OP1_102_45227_20120329_003400_inLine +BABEL_OP1_102_45227_20120329_003400_outLine +BABEL_OP1_102_45361_20120611_222502_inLine +BABEL_OP1_102_45361_20120611_222502_outLine +BABEL_OP1_102_45677_20130123_192645_inLine +BABEL_OP1_102_45677_20130123_192645_outLine +BABEL_OP1_102_45681_20120623_173741_inLine +BABEL_OP1_102_45681_20120623_173741_outLine +BABEL_OP1_102_45738_20120806_202458_inLine +BABEL_OP1_102_45738_20120806_202458_outLine +BABEL_OP1_102_45892_20120408_220557_inLine +BABEL_OP1_102_45892_20120408_220557_outLine +BABEL_OP1_102_45931_20120421_233726_inLine +BABEL_OP1_102_45931_20120421_233726_outLine +BABEL_OP1_102_46002_20121009_215715_inLine +BABEL_OP1_102_46002_20121009_215715_outLine +BABEL_OP1_102_46269_20121110_215228_inLine +BABEL_OP1_102_46269_20121110_215228_outLine +BABEL_OP1_102_46521_20120411_193429_inLine +BABEL_OP1_102_46521_20120411_193429_outLine +BABEL_OP1_102_47634_20120408_214325_inLine +BABEL_OP1_102_47634_20120408_214325_outLine +BABEL_OP1_102_47823_20120804_180038_inLine +BABEL_OP1_102_47823_20120804_180038_outLine +BABEL_OP1_102_48281_20120411_214725_inLine +BABEL_OP1_102_48281_20120411_214725_outLine +BABEL_OP1_102_48410_20120407_204734_inLine +BABEL_OP1_102_48410_20120407_204734_outLine +BABEL_OP1_102_48976_20120410_161651_inLine +BABEL_OP1_102_48976_20120410_161651_outLine +BABEL_OP1_102_49042_20120408_165038_inLine +BABEL_OP1_102_49042_20120408_165038_outLine +BABEL_OP1_102_49628_20120817_204731_inLine +BABEL_OP1_102_49628_20120817_204731_outLine +BABEL_OP1_102_49864_20120421_155657_inLine +BABEL_OP1_102_49864_20120421_155657_outLine +BABEL_OP1_102_50416_20120803_215223_inLine +BABEL_OP1_102_50416_20120803_215223_outLine +BABEL_OP1_102_50555_20120606_224819_inLine +BABEL_OP1_102_50555_20120606_224819_outLine +BABEL_OP1_102_50597_20120623_193352_inLine +BABEL_OP1_102_50597_20120623_193352_outLine +BABEL_OP1_102_50718_20120421_191449_inLine +BABEL_OP1_102_50718_20120421_191449_outLine +BABEL_OP1_102_50752_20121227_204235_inLine +BABEL_OP1_102_50752_20121227_204235_outLine +BABEL_OP1_102_50763_20120405_203621_inLine +BABEL_OP1_102_50763_20120405_203621_outLine +BABEL_OP1_102_50798_20120426_190454_inLine +BABEL_OP1_102_50798_20120426_190454_outLine +BABEL_OP1_102_51149_20121227_201136_inLine +BABEL_OP1_102_51149_20121227_201136_outLine +BABEL_OP1_102_52335_20130123_183229_inLine +BABEL_OP1_102_52335_20130123_183229_outLine +BABEL_OP1_102_52606_20121009_222016_inLine +BABEL_OP1_102_52606_20121009_222016_outLine +BABEL_OP1_102_52642_20120803_212045_inLine +BABEL_OP1_102_52642_20120803_212045_outLine +BABEL_OP1_102_52691_20120407_210408_inLine +BABEL_OP1_102_52691_20120407_210408_outLine +BABEL_OP1_102_52691_20120407_211728_inLine +BABEL_OP1_102_52691_20120407_211728_outLine +BABEL_OP1_102_52691_20120407_213757_inLine +BABEL_OP1_102_52691_20120407_213757_outLine +BABEL_OP1_102_52902_20120607_175045_inLine +BABEL_OP1_102_52902_20120607_175045_outLine +BABEL_OP1_102_52902_20120607_180239_inLine +BABEL_OP1_102_52902_20120607_180239_outLine +BABEL_OP1_102_53429_20121224_202431_inLine +BABEL_OP1_102_53429_20121224_202431_outLine +BABEL_OP1_102_53500_20120428_175953_inLine +BABEL_OP1_102_53500_20120428_175953_outLine +BABEL_OP1_102_53703_20120409_180047_inLine +BABEL_OP1_102_53703_20120409_180047_outLine +BABEL_OP1_102_53982_20120607_220642_inLine +BABEL_OP1_102_53982_20120607_220642_outLine +BABEL_OP1_102_54241_20120911_024357_inLine +BABEL_OP1_102_54241_20120911_024357_outLine +BABEL_OP1_102_54241_20120911_025705_inLine +BABEL_OP1_102_54241_20120911_025705_outLine +BABEL_OP1_102_55182_20120330_201037_inLine +BABEL_OP1_102_55182_20120330_201037_outLine +BABEL_OP1_102_55399_20120409_211258_inLine +BABEL_OP1_102_55399_20120409_211258_outLine +BABEL_OP1_102_55450_20121013_171507_inLine +BABEL_OP1_102_55450_20121013_171507_outLine +BABEL_OP1_102_55470_20120429_194956_inLine +BABEL_OP1_102_55470_20120429_194956_outLine +BABEL_OP1_102_55823_20121010_005200_inLine +BABEL_OP1_102_55823_20121010_005200_outLine +BABEL_OP1_102_55874_20121108_215431_inLine +BABEL_OP1_102_55874_20121108_215431_outLine +BABEL_OP1_102_56070_20120410_224512_inLine +BABEL_OP1_102_56070_20120410_224512_outLine +BABEL_OP1_102_56648_20120615_181652_inLine +BABEL_OP1_102_56648_20120615_181652_outLine +BABEL_OP1_102_56812_20121010_203710_inLine +BABEL_OP1_102_56812_20121010_203710_outLine +BABEL_OP1_102_56943_20121221_203039_inLine +BABEL_OP1_102_56943_20121221_203039_outLine +BABEL_OP1_102_57039_20121107_201157_inLine +BABEL_OP1_102_57039_20121107_201157_outLine +BABEL_OP1_102_57422_20120607_213941_inLine +BABEL_OP1_102_57422_20120607_213941_outLine +BABEL_OP1_102_57629_20121010_011015_inLine +BABEL_OP1_102_57629_20121010_011015_outLine +BABEL_OP1_102_57907_20121013_035627_inLine +BABEL_OP1_102_57907_20121013_035627_outLine +BABEL_OP1_102_58715_20120425_190758_inLine +BABEL_OP1_102_58715_20120425_190758_outLine +BABEL_OP1_102_58863_20120404_195038_inLine +BABEL_OP1_102_58863_20120404_195038_outLine +BABEL_OP1_102_58947_20121106_203812_inLine +BABEL_OP1_102_58947_20121106_203812_outLine +BABEL_OP1_102_58947_20121106_205338_inLine +BABEL_OP1_102_58947_20121106_205338_outLine +BABEL_OP1_102_59169_20120611_172953_inLine +BABEL_OP1_102_59169_20120611_172953_outLine +BABEL_OP1_102_59383_20121220_151350_inLine +BABEL_OP1_102_59383_20121220_151350_outLine +BABEL_OP1_102_59628_20121106_031543_inLine +BABEL_OP1_102_59628_20121106_031543_outLine +BABEL_OP1_102_59891_20120611_212238_inLine +BABEL_OP1_102_59891_20120611_212238_outLine +BABEL_OP1_102_59925_20121111_214225_inLine +BABEL_OP1_102_59925_20121111_214225_outLine +BABEL_OP1_102_60193_20120419_201756_inLine +BABEL_OP1_102_60193_20120419_201756_outLine +BABEL_OP1_102_60277_20120615_195600_inLine +BABEL_OP1_102_60277_20120615_195600_outLine +BABEL_OP1_102_60826_20120606_231535_inLine +BABEL_OP1_102_60826_20120606_231535_outLine +BABEL_OP1_102_60848_20121110_170724_inLine +BABEL_OP1_102_60848_20121110_170724_outLine +BABEL_OP1_102_60881_20120401_212818_inLine +BABEL_OP1_102_60881_20120401_212818_outLine +BABEL_OP1_102_60995_20121107_203546_inLine +BABEL_OP1_102_60995_20121107_203546_outLine +BABEL_OP1_102_61263_20121112_213923_inLine +BABEL_OP1_102_61263_20121112_213923_outLine +BABEL_OP1_102_61446_20120420_184155_inLine +BABEL_OP1_102_61446_20120420_184155_outLine +BABEL_OP1_102_61936_20121224_175007_inLine +BABEL_OP1_102_61936_20121224_175007_outLine +BABEL_OP1_102_62132_20120614_214158_inLine +BABEL_OP1_102_62132_20120614_214158_outLine +BABEL_OP1_102_62923_20130122_190544_inLine +BABEL_OP1_102_62923_20130122_190544_outLine +BABEL_OP1_102_63076_20121224_225415_inLine +BABEL_OP1_102_63076_20121224_225415_outLine +BABEL_OP1_102_64185_20120722_220159_inLine +BABEL_OP1_102_64185_20120722_220159_outLine +BABEL_OP1_102_64351_20120608_202610_inLine +BABEL_OP1_102_64351_20120608_202610_outLine +BABEL_OP1_102_65248_20120317_180718_inLine +BABEL_OP1_102_65248_20120317_180718_outLine +BABEL_OP1_102_65273_20121226_233200_inLine +BABEL_OP1_102_65273_20121226_233200_outLine +BABEL_OP1_102_65371_20121228_213615_inLine +BABEL_OP1_102_65371_20121228_213615_outLine +BABEL_OP1_102_65415_20120410_193034_inLine +BABEL_OP1_102_65415_20120410_193034_outLine +BABEL_OP1_102_65580_20120320_234602_inLine +BABEL_OP1_102_65580_20120320_234602_outLine +BABEL_OP1_102_65601_20120427_193019_inLine +BABEL_OP1_102_65601_20120427_193019_outLine +BABEL_OP1_102_65837_20121106_201713_inLine +BABEL_OP1_102_65837_20121106_201713_outLine +BABEL_OP1_102_66330_20120429_164154_inLine +BABEL_OP1_102_66330_20120429_164154_outLine +BABEL_OP1_102_66330_20120429_164900_inLine +BABEL_OP1_102_66330_20120429_164900_outLine +BABEL_OP1_102_66416_20120817_204557_inLine +BABEL_OP1_102_66416_20120817_204557_outLine +BABEL_OP1_102_66441_20120411_170112_inLine +BABEL_OP1_102_66441_20120411_170112_outLine +BABEL_OP1_102_66559_20121227_172234_inLine +BABEL_OP1_102_66559_20121227_172234_outLine +BABEL_OP1_102_67150_20121106_232551_inLine +BABEL_OP1_102_67150_20121106_232551_outLine +BABEL_OP1_102_67733_20120409_192100_inLine +BABEL_OP1_102_67733_20120409_192100_outLine +BABEL_OP1_102_67750_20120330_210301_inLine +BABEL_OP1_102_67750_20120330_210301_outLine +BABEL_OP1_102_67798_20120408_211247_inLine +BABEL_OP1_102_67798_20120408_211247_outLine +BABEL_OP1_102_67916_20121224_185018_inLine +BABEL_OP1_102_67916_20121224_185018_outLine +BABEL_OP1_102_69049_20120422_174706_inLine +BABEL_OP1_102_69049_20120422_174706_outLine +BABEL_OP1_102_69145_20121006_214000_inLine +BABEL_OP1_102_69145_20121006_214000_outLine +BABEL_OP1_102_69275_20121009_000322_inLine +BABEL_OP1_102_69275_20121009_000322_outLine +BABEL_OP1_102_69368_20120328_214605_inLine +BABEL_OP1_102_69368_20120328_214605_outLine +BABEL_OP1_102_69446_20130130_183941_inLine +BABEL_OP1_102_69446_20130130_183941_outLine +BABEL_OP1_102_70077_20121222_173141_inLine +BABEL_OP1_102_70077_20121222_173141_outLine +BABEL_OP1_102_70555_20120421_203231_inLine +BABEL_OP1_102_70555_20120421_203231_outLine +BABEL_OP1_102_71778_20120608_222028_inLine +BABEL_OP1_102_71778_20120608_222028_outLine +BABEL_OP1_102_71844_20120331_200325_inLine +BABEL_OP1_102_71844_20120331_200325_outLine +BABEL_OP1_102_72032_20120329_225115_inLine +BABEL_OP1_102_72032_20120329_225115_outLine +BABEL_OP1_102_72718_20121010_030640_inLine +BABEL_OP1_102_72718_20121010_030640_outLine +BABEL_OP1_102_72799_20120428_225215_inLine +BABEL_OP1_102_72799_20120428_225215_outLine +BABEL_OP1_102_73050_20120929_012255_inLine +BABEL_OP1_102_73050_20120929_012255_outLine +BABEL_OP1_102_73059_20121225_162645_inLine +BABEL_OP1_102_73059_20121225_162645_outLine +BABEL_OP1_102_73059_20121225_163932_inLine +BABEL_OP1_102_73059_20121225_163932_outLine +BABEL_OP1_102_73438_20121103_170431_inLine +BABEL_OP1_102_73438_20121103_170431_outLine +BABEL_OP1_102_73440_20120428_195653_inLine +BABEL_OP1_102_73440_20120428_195653_outLine +BABEL_OP1_102_73452_20121003_021245_inLine +BABEL_OP1_102_73452_20121003_021245_outLine +BABEL_OP1_102_73786_20120420_171039_inLine +BABEL_OP1_102_73786_20120420_171039_outLine +BABEL_OP1_102_74043_20120422_170724_inLine +BABEL_OP1_102_74043_20120422_170724_outLine +BABEL_OP1_102_74368_20121008_041653_inLine +BABEL_OP1_102_74368_20121008_041653_outLine +BABEL_OP1_102_74709_20120806_191528_inLine +BABEL_OP1_102_74709_20120806_191528_outLine +BABEL_OP1_102_74823_20120330_181459_inLine +BABEL_OP1_102_74823_20120330_181459_outLine +BABEL_OP1_102_75140_20120330_171509_inLine +BABEL_OP1_102_75140_20120330_171509_outLine +BABEL_OP1_102_75354_20121105_033257_inLine +BABEL_OP1_102_75354_20121105_033257_outLine +BABEL_OP1_102_75498_20120806_180214_inLine +BABEL_OP1_102_75498_20120806_180214_outLine +BABEL_OP1_102_75680_20121110_180407_inLine +BABEL_OP1_102_75680_20121110_180407_outLine +BABEL_OP1_102_75990_20120426_182351_inLine +BABEL_OP1_102_75990_20120426_182351_outLine +BABEL_OP1_102_76331_20120806_185250_inLine +BABEL_OP1_102_76331_20120806_185250_outLine +BABEL_OP1_102_76451_20120329_193459_inLine +BABEL_OP1_102_76451_20120329_193459_outLine +BABEL_OP1_102_77207_20120804_174005_inLine +BABEL_OP1_102_77207_20120804_174005_outLine +BABEL_OP1_102_77244_20121001_003159_inLine +BABEL_OP1_102_77244_20121001_003159_outLine +BABEL_OP1_102_77465_20120607_001521_inLine +BABEL_OP1_102_77465_20120607_001521_outLine +BABEL_OP1_102_77771_20121227_191404_inLine +BABEL_OP1_102_77771_20121227_191404_outLine +BABEL_OP1_102_77811_20130123_215211_inLine +BABEL_OP1_102_77811_20130123_215211_outLine +BABEL_OP1_102_78514_20120409_182010_inLine +BABEL_OP1_102_78514_20120409_182010_outLine +BABEL_OP1_102_79495_20120320_011136_inLine +BABEL_OP1_102_79495_20120320_011136_outLine +BABEL_OP1_102_79618_20120401_204258_inLine +BABEL_OP1_102_79618_20120401_204258_outLine +BABEL_OP1_102_79698_20121106_212429_inLine +BABEL_OP1_102_79698_20121106_212429_outLine +BABEL_OP1_102_80174_20130211_031725_inLine +BABEL_OP1_102_80174_20130211_031725_outLine +BABEL_OP1_102_80868_20121028_015553_inLine +BABEL_OP1_102_80868_20121028_015553_outLine +BABEL_OP1_102_81084_20120406_191910_inLine +BABEL_OP1_102_81084_20120406_191910_outLine +BABEL_OP1_102_81587_20121225_213038_inLine +BABEL_OP1_102_81587_20121225_213038_outLine +BABEL_OP1_102_81611_20121110_221005_inLine +BABEL_OP1_102_81611_20121110_221005_outLine +BABEL_OP1_102_81717_20130209_201202_inLine +BABEL_OP1_102_81717_20130209_201202_outLine +BABEL_OP1_102_81878_20120331_181439_inLine +BABEL_OP1_102_81878_20120331_181439_outLine +BABEL_OP1_102_81878_20120331_182958_inLine +BABEL_OP1_102_81878_20120331_182958_outLine +BABEL_OP1_102_82009_20121104_013002_inLine +BABEL_OP1_102_82009_20121104_013002_outLine +BABEL_OP1_102_82023_20120330_175253_inLine +BABEL_OP1_102_82023_20120330_175253_outLine +BABEL_OP1_102_82192_20120429_180649_inLine +BABEL_OP1_102_82192_20120429_180649_outLine +BABEL_OP1_102_82408_20120402_190241_inLine +BABEL_OP1_102_82408_20120402_190241_outLine +BABEL_OP1_102_82880_20121108_173528_inLine +BABEL_OP1_102_82880_20121108_173528_outLine +BABEL_OP1_102_83256_20120330_210950_inLine +BABEL_OP1_102_83256_20120330_210950_outLine +BABEL_OP1_102_83493_20120429_172305_inLine +BABEL_OP1_102_83493_20120429_172305_outLine +BABEL_OP1_102_83493_20120429_175508_inLine +BABEL_OP1_102_83493_20120429_175508_outLine +BABEL_OP1_102_83531_20120408_201200_inLine +BABEL_OP1_102_83531_20120408_201200_outLine +BABEL_OP1_102_83531_20120408_203827_inLine +BABEL_OP1_102_83531_20120408_203827_outLine +BABEL_OP1_102_83634_20130123_212154_inLine +BABEL_OP1_102_83634_20130123_212154_outLine +BABEL_OP1_102_83791_20120420_215616_inLine +BABEL_OP1_102_83791_20120420_215616_outLine +BABEL_OP1_102_84088_20120328_180739_inLine +BABEL_OP1_102_84088_20120328_180739_outLine +BABEL_OP1_102_84284_20121225_175332_inLine +BABEL_OP1_102_84284_20121225_175332_outLine +BABEL_OP1_102_84397_20121110_230552_inLine +BABEL_OP1_102_84397_20121110_230552_outLine +BABEL_OP1_102_84439_20120427_184114_inLine +BABEL_OP1_102_84439_20120427_184114_outLine +BABEL_OP1_102_84608_20120609_194053_inLine +BABEL_OP1_102_84608_20120609_194053_outLine +BABEL_OP1_102_84943_20120401_170153_inLine +BABEL_OP1_102_84943_20120401_170153_outLine +BABEL_OP1_102_85204_20120329_192035_inLine +BABEL_OP1_102_85204_20120329_192035_outLine +BABEL_OP1_102_85716_20120401_165708_inLine +BABEL_OP1_102_85716_20120401_165708_outLine +BABEL_OP1_102_86004_20120418_230109_inLine +BABEL_OP1_102_86004_20120418_230109_outLine +BABEL_OP1_102_86227_20120401_195417_inLine +BABEL_OP1_102_86227_20120401_195417_outLine +BABEL_OP1_102_86886_20121112_201306_inLine +BABEL_OP1_102_86886_20121112_201306_outLine +BABEL_OP1_102_86956_20120401_173752_inLine +BABEL_OP1_102_86956_20120401_173752_outLine +BABEL_OP1_102_87234_20121224_212540_inLine +BABEL_OP1_102_87234_20121224_212540_outLine +BABEL_OP1_102_87481_20121027_153449_inLine +BABEL_OP1_102_87481_20121027_153449_outLine +BABEL_OP1_102_87486_20120406_200642_inLine +BABEL_OP1_102_87486_20120406_200642_outLine +BABEL_OP1_102_87806_20120409_183938_inLine +BABEL_OP1_102_87806_20120409_183938_outLine +BABEL_OP1_102_87857_20120405_202526_inLine +BABEL_OP1_102_87857_20120405_202526_outLine +BABEL_OP1_102_87961_20120423_155726_inLine +BABEL_OP1_102_87961_20120423_155726_outLine +BABEL_OP1_102_88163_20121112_003006_inLine +BABEL_OP1_102_88163_20121112_003006_outLine +BABEL_OP1_102_89583_20121011_013631_inLine +BABEL_OP1_102_89583_20121011_013631_outLine +BABEL_OP1_102_89583_20121012_014745_inLine +BABEL_OP1_102_89583_20121012_014745_outLine +BABEL_OP1_102_89838_20120409_214411_inLine +BABEL_OP1_102_89838_20120409_214411_outLine +BABEL_OP1_102_89993_20120607_175900_inLine +BABEL_OP1_102_89993_20120607_175900_outLine +BABEL_OP1_102_90055_20120405_192435_inLine +BABEL_OP1_102_90055_20120405_192435_outLine +BABEL_OP1_102_90389_20121012_050118_inLine +BABEL_OP1_102_90389_20121012_050118_outLine +BABEL_OP1_102_90393_20120419_214927_inLine +BABEL_OP1_102_90393_20120419_214927_outLine +BABEL_OP1_102_90511_20120329_224306_inLine +BABEL_OP1_102_90511_20120329_224306_outLine +BABEL_OP1_102_90609_20120410_184424_inLine +BABEL_OP1_102_90609_20120410_184424_outLine +BABEL_OP1_102_90810_20120404_221650_inLine +BABEL_OP1_102_90810_20120404_221650_outLine +BABEL_OP1_102_90819_20120614_222542_inLine +BABEL_OP1_102_90819_20120614_222542_outLine +BABEL_OP1_102_90890_20120320_235811_inLine +BABEL_OP1_102_90890_20120320_235811_outLine +BABEL_OP1_102_90975_20130127_194034_inLine +BABEL_OP1_102_90975_20130127_194034_outLine +BABEL_OP1_102_90975_20130127_195301_inLine +BABEL_OP1_102_90975_20130127_195301_outLine +BABEL_OP1_102_91171_20121222_000026_inLine +BABEL_OP1_102_91171_20121222_000026_outLine +BABEL_OP1_102_91358_20121103_191541_inLine +BABEL_OP1_102_91358_20121103_191541_outLine +BABEL_OP1_102_91386_20121226_175240_inLine +BABEL_OP1_102_91386_20121226_175240_outLine +BABEL_OP1_102_91401_20120409_195325_inLine +BABEL_OP1_102_91401_20120409_195325_outLine +BABEL_OP1_102_91481_20120806_232222_inLine +BABEL_OP1_102_91481_20120806_232222_outLine +BABEL_OP1_102_91865_20130127_193426_inLine +BABEL_OP1_102_91865_20130127_193426_outLine +BABEL_OP1_102_92002_20120821_172434_inLine +BABEL_OP1_102_92002_20120821_172434_outLine +BABEL_OP1_102_92252_20120805_193105_inLine +BABEL_OP1_102_92252_20120805_193105_outLine +BABEL_OP1_102_92252_20120805_202508_inLine +BABEL_OP1_102_92252_20120805_202508_outLine +BABEL_OP1_102_92321_20120729_204129_inLine +BABEL_OP1_102_92321_20120729_204129_outLine +BABEL_OP1_102_92386_20120401_175909_inLine +BABEL_OP1_102_92386_20120401_175909_outLine +BABEL_OP1_102_92407_20120330_180101_inLine +BABEL_OP1_102_92407_20120330_180101_outLine +BABEL_OP1_102_92591_20120818_164613_inLine +BABEL_OP1_102_92591_20120818_164613_outLine +BABEL_OP1_102_92591_20120818_170346_inLine +BABEL_OP1_102_92591_20120818_170346_outLine +BABEL_OP1_102_92591_20120818_171559_inLine +BABEL_OP1_102_92591_20120818_171559_outLine +BABEL_OP1_102_92628_20120404_212106_inLine +BABEL_OP1_102_92628_20120404_212106_outLine +BABEL_OP1_102_92800_20120408_165253_inLine +BABEL_OP1_102_92800_20120408_165253_outLine +BABEL_OP1_102_93091_20120425_204602_inLine +BABEL_OP1_102_93091_20120425_204602_outLine +BABEL_OP1_102_93091_20120425_205745_inLine +BABEL_OP1_102_93091_20120425_205745_outLine +BABEL_OP1_102_93151_20120410_200907_inLine +BABEL_OP1_102_93151_20120410_200907_outLine +BABEL_OP1_102_93277_20121028_025007_inLine +BABEL_OP1_102_93277_20121028_025007_outLine +BABEL_OP1_102_93277_20121106_011048_inLine +BABEL_OP1_102_93277_20121106_011048_outLine +BABEL_OP1_102_93509_20120422_230046_inLine +BABEL_OP1_102_93509_20120422_230046_outLine +BABEL_OP1_102_93607_20120806_194627_inLine +BABEL_OP1_102_93607_20120806_194627_outLine +BABEL_OP1_102_94162_20130209_213329_inLine +BABEL_OP1_102_94162_20130209_213329_outLine +BABEL_OP1_102_94542_20130122_222709_inLine +BABEL_OP1_102_94542_20130122_222709_outLine +BABEL_OP1_102_94694_20120611_183126_inLine +BABEL_OP1_102_94694_20120611_183126_outLine +BABEL_OP1_102_94696_20130127_183814_inLine +BABEL_OP1_102_94696_20130127_183814_outLine +BABEL_OP1_102_95350_20120420_225049_inLine +BABEL_OP1_102_95350_20120420_225049_outLine +BABEL_OP1_102_95566_20121222_024129_inLine +BABEL_OP1_102_95566_20121222_024129_outLine +BABEL_OP1_102_95637_20120329_225942_inLine +BABEL_OP1_102_95637_20120329_225942_outLine +BABEL_OP1_102_95650_20120327_230850_inLine +BABEL_OP1_102_95650_20120327_230850_outLine +BABEL_OP1_102_95815_20120401_233401_inLine +BABEL_OP1_102_95815_20120401_233401_outLine +BABEL_OP1_102_95849_20121106_222829_inLine +BABEL_OP1_102_95849_20121106_222829_outLine +BABEL_OP1_102_95996_20120427_174020_inLine +BABEL_OP1_102_95996_20120427_174020_outLine +BABEL_OP1_102_96216_20120412_193323_inLine +BABEL_OP1_102_96216_20120412_193323_outLine +BABEL_OP1_102_96283_20120720_021526_inLine +BABEL_OP1_102_96283_20120720_021526_outLine +BABEL_OP1_102_96347_20120330_201932_inLine +BABEL_OP1_102_96347_20120330_201932_outLine +BABEL_OP1_102_96788_20120411_183347_inLine +BABEL_OP1_102_96788_20120411_183347_outLine +BABEL_OP1_102_97004_20121107_210600_inLine +BABEL_OP1_102_97004_20121107_210600_outLine +BABEL_OP1_102_97260_20120409_175649_inLine +BABEL_OP1_102_97260_20120409_175649_outLine +BABEL_OP1_102_97590_20121110_214746_inLine +BABEL_OP1_102_97590_20121110_214746_outLine +BABEL_OP1_102_97590_20121110_215543_inLine +BABEL_OP1_102_97590_20121110_215543_outLine +BABEL_OP1_102_97760_20121010_154720_inLine +BABEL_OP1_102_97760_20121010_154720_outLine +BABEL_OP1_102_98402_20121112_014920_inLine +BABEL_OP1_102_98402_20121112_014920_outLine +BABEL_OP1_102_98640_20120930_211930_inLine +BABEL_OP1_102_98640_20120930_211930_outLine +BABEL_OP1_102_98675_20130209_215547_inLine +BABEL_OP1_102_98675_20130209_215547_outLine +BABEL_OP1_102_99514_20120406_182505_inLine +BABEL_OP1_102_99514_20120406_182505_outLine +BABEL_OP1_102_99709_20120429_201437_inLine +BABEL_OP1_102_99709_20120429_201437_outLine +BABEL_OP1_102_99709_20120429_202748_inLine +BABEL_OP1_102_99709_20120429_202748_outLine +BABEL_OP1_102_99731_20121220_214209_inLine +BABEL_OP1_102_99731_20121220_214209_outLine +BABEL_OP1_102_99823_20120429_181728_inLine +BABEL_OP1_102_99823_20120429_181728_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.list b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.list new file mode 100644 index 00000000000..138e2c7651f --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.list @@ -0,0 +1,138 @@ +BABEL_OP1_102_10713_20120401_204236_inLine +BABEL_OP1_102_10713_20120401_204236_outLine +BABEL_OP1_102_11031_20120926_231829_inLine +BABEL_OP1_102_11031_20120926_231829_outLine +BABEL_OP1_102_12655_20120318_171708_inLine +BABEL_OP1_102_12655_20120318_171708_outLine +BABEL_OP1_102_14874_20120417_153112_inLine +BABEL_OP1_102_14874_20120417_153112_outLine +BABEL_OP1_102_15493_20130127_203044_inLine +BABEL_OP1_102_15493_20130127_203044_outLine +BABEL_OP1_102_16185_20121105_042129_inLine +BABEL_OP1_102_16185_20121105_042129_outLine +BABEL_OP1_102_17207_20120729_230128_inLine +BABEL_OP1_102_17207_20120729_230128_outLine +BABEL_OP1_102_18344_20121109_192858_inLine +BABEL_OP1_102_18344_20121109_192858_outLine +BABEL_OP1_102_19063_20130209_231415_inLine +BABEL_OP1_102_19063_20130209_231415_outLine +BABEL_OP1_102_19758_20120417_174950_inLine +BABEL_OP1_102_19758_20120417_174950_outLine +BABEL_OP1_102_29512_20120805_170123_inLine +BABEL_OP1_102_29512_20120805_170123_outLine +BABEL_OP1_102_29512_20120805_172610_inLine +BABEL_OP1_102_29512_20120805_172610_outLine +BABEL_OP1_102_30530_20120330_173152_inLine +BABEL_OP1_102_30530_20120330_173152_outLine +BABEL_OP1_102_32011_20121014_024351_inLine +BABEL_OP1_102_32011_20121014_024351_outLine +BABEL_OP1_102_34194_20120330_182542_inLine +BABEL_OP1_102_34194_20120330_182542_outLine +BABEL_OP1_102_37524_20120329_182549_inLine +BABEL_OP1_102_37524_20120329_182549_outLine +BABEL_OP1_102_38464_20121012_023702_inLine +BABEL_OP1_102_38464_20121012_023702_outLine +BABEL_OP1_102_38635_20120607_010931_inLine +BABEL_OP1_102_38635_20120607_010931_outLine +BABEL_OP1_102_38985_20120806_174824_inLine +BABEL_OP1_102_38985_20120806_174824_outLine +BABEL_OP1_102_38985_20120806_181000_inLine +BABEL_OP1_102_38985_20120806_181000_outLine +BABEL_OP1_102_39098_20120405_203729_inLine +BABEL_OP1_102_39098_20120405_203729_outLine +BABEL_OP1_102_45227_20120329_003400_inLine +BABEL_OP1_102_45227_20120329_003400_outLine +BABEL_OP1_102_46521_20120411_193429_inLine +BABEL_OP1_102_46521_20120411_193429_outLine +BABEL_OP1_102_48281_20120411_214725_inLine +BABEL_OP1_102_48281_20120411_214725_outLine +BABEL_OP1_102_50416_20120803_215223_inLine +BABEL_OP1_102_50416_20120803_215223_outLine +BABEL_OP1_102_51149_20121227_201136_inLine +BABEL_OP1_102_51149_20121227_201136_outLine +BABEL_OP1_102_53429_20121224_202431_inLine +BABEL_OP1_102_53429_20121224_202431_outLine +BABEL_OP1_102_55399_20120409_211258_inLine +BABEL_OP1_102_55399_20120409_211258_outLine +BABEL_OP1_102_59628_20121106_031543_inLine +BABEL_OP1_102_59628_20121106_031543_outLine +BABEL_OP1_102_61936_20121224_175007_inLine +BABEL_OP1_102_61936_20121224_175007_outLine +BABEL_OP1_102_65601_20120427_193019_inLine +BABEL_OP1_102_65601_20120427_193019_outLine +BABEL_OP1_102_66330_20120429_164154_inLine +BABEL_OP1_102_66330_20120429_164154_outLine +BABEL_OP1_102_66330_20120429_164900_inLine +BABEL_OP1_102_66330_20120429_164900_outLine +BABEL_OP1_102_69446_20130130_183941_inLine +BABEL_OP1_102_69446_20130130_183941_outLine +BABEL_OP1_102_70077_20121222_173141_inLine +BABEL_OP1_102_70077_20121222_173141_outLine +BABEL_OP1_102_71844_20120331_200325_inLine +BABEL_OP1_102_71844_20120331_200325_outLine +BABEL_OP1_102_73059_20121225_162645_inLine +BABEL_OP1_102_73059_20121225_162645_outLine +BABEL_OP1_102_73059_20121225_163932_inLine +BABEL_OP1_102_73059_20121225_163932_outLine +BABEL_OP1_102_77207_20120804_174005_inLine +BABEL_OP1_102_77207_20120804_174005_outLine +BABEL_OP1_102_79618_20120401_204258_inLine +BABEL_OP1_102_79618_20120401_204258_outLine +BABEL_OP1_102_79698_20121106_212429_inLine +BABEL_OP1_102_79698_20121106_212429_outLine +BABEL_OP1_102_80174_20130211_031725_inLine +BABEL_OP1_102_80174_20130211_031725_outLine +BABEL_OP1_102_81611_20121110_221005_inLine +BABEL_OP1_102_81611_20121110_221005_outLine +BABEL_OP1_102_82880_20121108_173528_inLine +BABEL_OP1_102_82880_20121108_173528_outLine +BABEL_OP1_102_85204_20120329_192035_inLine +BABEL_OP1_102_85204_20120329_192035_outLine +BABEL_OP1_102_86227_20120401_195417_inLine +BABEL_OP1_102_86227_20120401_195417_outLine +BABEL_OP1_102_86956_20120401_173752_inLine +BABEL_OP1_102_86956_20120401_173752_outLine +BABEL_OP1_102_87481_20121027_153449_inLine +BABEL_OP1_102_87481_20121027_153449_outLine +BABEL_OP1_102_87486_20120406_200642_inLine +BABEL_OP1_102_87486_20120406_200642_outLine +BABEL_OP1_102_87806_20120409_183938_inLine +BABEL_OP1_102_87806_20120409_183938_outLine +BABEL_OP1_102_89583_20121011_013631_inLine +BABEL_OP1_102_89583_20121011_013631_outLine +BABEL_OP1_102_89583_20121012_014745_inLine +BABEL_OP1_102_89583_20121012_014745_outLine +BABEL_OP1_102_89838_20120409_214411_inLine +BABEL_OP1_102_89838_20120409_214411_outLine +BABEL_OP1_102_90055_20120405_192435_inLine +BABEL_OP1_102_90055_20120405_192435_outLine +BABEL_OP1_102_90389_20121012_050118_inLine +BABEL_OP1_102_90389_20121012_050118_outLine +BABEL_OP1_102_90609_20120410_184424_inLine +BABEL_OP1_102_90609_20120410_184424_outLine +BABEL_OP1_102_92591_20120818_164613_inLine +BABEL_OP1_102_92591_20120818_164613_outLine +BABEL_OP1_102_92591_20120818_170346_inLine +BABEL_OP1_102_92591_20120818_170346_outLine +BABEL_OP1_102_92591_20120818_171559_inLine +BABEL_OP1_102_92591_20120818_171559_outLine +BABEL_OP1_102_93151_20120410_200907_inLine +BABEL_OP1_102_93151_20120410_200907_outLine +BABEL_OP1_102_93277_20121028_025007_inLine +BABEL_OP1_102_93277_20121028_025007_outLine +BABEL_OP1_102_93277_20121106_011048_inLine +BABEL_OP1_102_93277_20121106_011048_outLine +BABEL_OP1_102_95996_20120427_174020_inLine +BABEL_OP1_102_95996_20120427_174020_outLine +BABEL_OP1_102_96216_20120412_193323_inLine +BABEL_OP1_102_96216_20120412_193323_outLine +BABEL_OP1_102_97004_20121107_210600_inLine +BABEL_OP1_102_97004_20121107_210600_outLine +BABEL_OP1_102_97760_20121010_154720_inLine +BABEL_OP1_102_97760_20121010_154720_outLine +BABEL_OP1_102_98640_20120930_211930_inLine +BABEL_OP1_102_98640_20120930_211930_outLine +BABEL_OP1_102_99709_20120429_201437_inLine +BABEL_OP1_102_99709_20120429_201437_outLine +BABEL_OP1_102_99709_20120429_202748_inLine +BABEL_OP1_102_99709_20120429_202748_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..f363d1b4216 --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.untranscribed.list @@ -0,0 +1,652 @@ +BABEL_OP1_102_10187_20120405_173448_inLine +BABEL_OP1_102_10187_20120405_173448_outLine +BABEL_OP1_102_10271_20120729_173749_inLine +BABEL_OP1_102_10271_20120729_173749_outLine +BABEL_OP1_102_11004_20120420_213442_inLine +BABEL_OP1_102_11004_20120420_213442_outLine +BABEL_OP1_102_11036_20120406_202335_inLine +BABEL_OP1_102_11036_20120406_202335_outLine +BABEL_OP1_102_11158_20121008_011850_inLine +BABEL_OP1_102_11158_20121008_011850_outLine +BABEL_OP1_102_11371_20120327_175933_inLine +BABEL_OP1_102_11371_20120327_175933_outLine +BABEL_OP1_102_11521_20121005_005530_inLine +BABEL_OP1_102_11521_20121005_005530_outLine +BABEL_OP1_102_11694_20121108_184639_inLine +BABEL_OP1_102_11694_20121108_184639_outLine +BABEL_OP1_102_12120_20121105_205527_inLine +BABEL_OP1_102_12120_20121105_205527_outLine +BABEL_OP1_102_12486_20121009_231421_inLine +BABEL_OP1_102_12486_20121009_231421_outLine +BABEL_OP1_102_12535_20121009_024245_inLine +BABEL_OP1_102_12535_20121009_024245_outLine +BABEL_OP1_102_12552_20120727_023454_inLine +BABEL_OP1_102_12552_20120727_023454_outLine +BABEL_OP1_102_12643_20121108_184648_inLine +BABEL_OP1_102_12643_20121108_184648_outLine +BABEL_OP1_102_12844_20120411_193813_inLine +BABEL_OP1_102_12844_20120411_193813_outLine +BABEL_OP1_102_13229_20130127_023814_inLine +BABEL_OP1_102_13229_20130127_023814_outLine +BABEL_OP1_102_13389_20120406_184440_inLine +BABEL_OP1_102_13389_20120406_184440_outLine +BABEL_OP1_102_13702_20130121_185149_inLine +BABEL_OP1_102_13702_20130121_185149_outLine +BABEL_OP1_102_13913_20120807_001423_inLine +BABEL_OP1_102_13913_20120807_001423_outLine +BABEL_OP1_102_14769_20120926_165746_inLine +BABEL_OP1_102_14769_20120926_165746_outLine +BABEL_OP1_102_14891_20121009_003232_inLine +BABEL_OP1_102_14891_20121009_003232_outLine +BABEL_OP1_102_15146_20120318_184752_inLine +BABEL_OP1_102_15146_20120318_184752_outLine +BABEL_OP1_102_15234_20121108_022333_inLine +BABEL_OP1_102_15234_20121108_022333_outLine +BABEL_OP1_102_15502_20120419_233859_inLine +BABEL_OP1_102_15502_20120419_233859_outLine +BABEL_OP1_102_15502_20120420_000213_inLine +BABEL_OP1_102_15502_20120420_000213_outLine +BABEL_OP1_102_15881_20120331_215830_inLine +BABEL_OP1_102_15881_20120331_215830_outLine +BABEL_OP1_102_15916_20120428_221806_inLine +BABEL_OP1_102_15916_20120428_221806_outLine +BABEL_OP1_102_16167_20130122_175936_inLine +BABEL_OP1_102_16167_20130122_175936_outLine +BABEL_OP1_102_16313_20120331_215132_inLine +BABEL_OP1_102_16313_20120331_215132_outLine +BABEL_OP1_102_16669_20120327_202211_inLine +BABEL_OP1_102_16669_20120327_202211_outLine +BABEL_OP1_102_17013_20121105_230820_inLine +BABEL_OP1_102_17013_20121105_230820_outLine +BABEL_OP1_102_17203_20121221_161532_inLine +BABEL_OP1_102_17203_20121221_161532_outLine +BABEL_OP1_102_17572_20120806_235812_inLine +BABEL_OP1_102_17572_20120806_235812_outLine +BABEL_OP1_102_17933_20120607_184111_inLine +BABEL_OP1_102_17933_20120607_184111_outLine +BABEL_OP1_102_18534_20121105_185859_inLine +BABEL_OP1_102_18534_20121105_185859_outLine +BABEL_OP1_102_18730_20130122_171244_inLine +BABEL_OP1_102_18730_20130122_171244_outLine +BABEL_OP1_102_18802_20121104_232940_inLine +BABEL_OP1_102_18802_20121104_232940_outLine +BABEL_OP1_102_19147_20120329_190609_inLine +BABEL_OP1_102_19147_20120329_190609_outLine +BABEL_OP1_102_19456_20121110_201037_inLine +BABEL_OP1_102_19456_20121110_201037_outLine +BABEL_OP1_102_19731_20130123_200845_inLine +BABEL_OP1_102_19731_20130123_200845_outLine +BABEL_OP1_102_19867_20130127_211111_inLine +BABEL_OP1_102_19867_20130127_211111_outLine +BABEL_OP1_102_20271_20120410_205746_inLine +BABEL_OP1_102_20271_20120410_205746_outLine +BABEL_OP1_102_20320_20120409_212129_inLine +BABEL_OP1_102_20320_20120409_212129_outLine +BABEL_OP1_102_20320_20120409_214042_inLine +BABEL_OP1_102_20320_20120409_214042_outLine +BABEL_OP1_102_20454_20121010_020017_inLine +BABEL_OP1_102_20454_20121010_020017_outLine +BABEL_OP1_102_20591_20120806_210212_inLine +BABEL_OP1_102_20591_20120806_210212_outLine +BABEL_OP1_102_21050_20120619_010126_inLine +BABEL_OP1_102_21050_20120619_010126_outLine +BABEL_OP1_102_21477_20120417_212152_inLine +BABEL_OP1_102_21477_20120417_212152_outLine +BABEL_OP1_102_21518_20120805_195607_inLine +BABEL_OP1_102_21518_20120805_195607_outLine +BABEL_OP1_102_21758_20120823_164553_inLine +BABEL_OP1_102_21758_20120823_164553_outLine +BABEL_OP1_102_21782_20120422_184156_inLine +BABEL_OP1_102_21782_20120422_184156_outLine +BABEL_OP1_102_22401_20121017_023338_inLine +BABEL_OP1_102_22401_20121017_023338_outLine +BABEL_OP1_102_22408_20120426_225012_inLine +BABEL_OP1_102_22408_20120426_225012_outLine +BABEL_OP1_102_23167_20120329_204718_inLine +BABEL_OP1_102_23167_20120329_204718_outLine +BABEL_OP1_102_24420_20120624_013709_inLine +BABEL_OP1_102_24420_20120624_013709_outLine +BABEL_OP1_102_24661_20121104_224032_inLine +BABEL_OP1_102_24661_20121104_224032_outLine +BABEL_OP1_102_24833_20120410_172706_inLine +BABEL_OP1_102_24833_20120410_172706_outLine +BABEL_OP1_102_25236_20120804_180700_inLine +BABEL_OP1_102_25236_20120804_180700_outLine +BABEL_OP1_102_25576_20120422_180912_inLine +BABEL_OP1_102_25576_20120422_180912_outLine +BABEL_OP1_102_25904_20120611_203203_inLine +BABEL_OP1_102_25904_20120611_203203_outLine +BABEL_OP1_102_25934_20120329_005438_inLine +BABEL_OP1_102_25934_20120329_005438_outLine +BABEL_OP1_102_26348_20121109_170513_inLine +BABEL_OP1_102_26348_20121109_170513_outLine +BABEL_OP1_102_27007_20120611_223823_inLine +BABEL_OP1_102_27007_20120611_223823_outLine +BABEL_OP1_102_27349_20120422_192337_inLine +BABEL_OP1_102_27349_20120422_192337_outLine +BABEL_OP1_102_27427_20120412_182452_inLine +BABEL_OP1_102_27427_20120412_182452_outLine +BABEL_OP1_102_27824_20120427_201104_inLine +BABEL_OP1_102_27824_20120427_201104_outLine +BABEL_OP1_102_27890_20121002_030324_inLine +BABEL_OP1_102_27890_20121002_030324_outLine +BABEL_OP1_102_28016_20120430_193141_inLine +BABEL_OP1_102_28016_20120430_193141_outLine +BABEL_OP1_102_28016_20120430_194530_inLine +BABEL_OP1_102_28016_20120430_194530_outLine +BABEL_OP1_102_28107_20120327_204144_inLine +BABEL_OP1_102_28107_20120327_204144_outLine +BABEL_OP1_102_28204_20120401_204624_inLine +BABEL_OP1_102_28204_20120401_204624_outLine +BABEL_OP1_102_28260_20120329_210829_inLine +BABEL_OP1_102_28260_20120329_210829_outLine +BABEL_OP1_102_28648_20120608_192702_inLine +BABEL_OP1_102_28648_20120608_192702_outLine +BABEL_OP1_102_29168_20120411_174248_inLine +BABEL_OP1_102_29168_20120411_174248_outLine +BABEL_OP1_102_29259_20120612_211621_inLine +BABEL_OP1_102_29259_20120612_211621_outLine +BABEL_OP1_102_29335_20120609_182335_inLine +BABEL_OP1_102_29335_20120609_182335_outLine +BABEL_OP1_102_29335_20120609_183151_inLine +BABEL_OP1_102_29335_20120609_183151_outLine +BABEL_OP1_102_29444_20120331_231513_inLine +BABEL_OP1_102_29444_20120331_231513_outLine +BABEL_OP1_102_29444_20120331_233317_inLine +BABEL_OP1_102_29444_20120331_233317_outLine +BABEL_OP1_102_29545_20121105_220136_inLine +BABEL_OP1_102_29545_20121105_220136_outLine +BABEL_OP1_102_29959_20130128_195931_inLine +BABEL_OP1_102_29959_20130128_195931_outLine +BABEL_OP1_102_29959_20130128_223813_inLine +BABEL_OP1_102_29959_20130128_223813_outLine +BABEL_OP1_102_30266_20120331_212330_inLine +BABEL_OP1_102_30266_20120331_212330_outLine +BABEL_OP1_102_30722_20121011_013755_inLine +BABEL_OP1_102_30722_20121011_013755_outLine +BABEL_OP1_102_31031_20120611_193208_inLine +BABEL_OP1_102_31031_20120611_193208_outLine +BABEL_OP1_102_31902_20120425_211816_inLine +BABEL_OP1_102_31902_20120425_211816_outLine +BABEL_OP1_102_31917_20120611_195339_inLine +BABEL_OP1_102_31917_20120611_195339_outLine +BABEL_OP1_102_32562_20121010_014014_inLine +BABEL_OP1_102_32562_20121010_014014_outLine +BABEL_OP1_102_32642_20121104_220528_inLine +BABEL_OP1_102_32642_20121104_220528_outLine +BABEL_OP1_102_33023_20120329_224858_inLine +BABEL_OP1_102_33023_20120329_224858_outLine +BABEL_OP1_102_33540_20120401_212225_inLine +BABEL_OP1_102_33540_20120401_212225_outLine +BABEL_OP1_102_33671_20120422_231219_inLine +BABEL_OP1_102_33671_20120422_231219_outLine +BABEL_OP1_102_34169_20120331_183840_inLine +BABEL_OP1_102_34169_20120331_183840_outLine +BABEL_OP1_102_34235_20120405_190745_inLine +BABEL_OP1_102_34235_20120405_190745_outLine +BABEL_OP1_102_34480_20121012_193452_inLine +BABEL_OP1_102_34480_20121012_193452_outLine +BABEL_OP1_102_34590_20120417_151435_inLine +BABEL_OP1_102_34590_20120417_151435_outLine +BABEL_OP1_102_34590_20120417_155556_inLine +BABEL_OP1_102_34590_20120417_155556_outLine +BABEL_OP1_102_34930_20120411_200043_inLine +BABEL_OP1_102_34930_20120411_200043_outLine +BABEL_OP1_102_35011_20120420_020024_inLine +BABEL_OP1_102_35011_20120420_020024_outLine +BABEL_OP1_102_35229_20121106_204019_inLine +BABEL_OP1_102_35229_20121106_204019_outLine +BABEL_OP1_102_35324_20120426_180016_inLine +BABEL_OP1_102_35324_20120426_180016_outLine +BABEL_OP1_102_35324_20120426_203214_inLine +BABEL_OP1_102_35324_20120426_203214_outLine +BABEL_OP1_102_35455_20121112_000231_inLine +BABEL_OP1_102_35455_20121112_000231_outLine +BABEL_OP1_102_36868_20130209_201544_inLine +BABEL_OP1_102_36868_20130209_201544_outLine +BABEL_OP1_102_37260_20120808_012733_inLine +BABEL_OP1_102_37260_20120808_012733_outLine +BABEL_OP1_102_37260_20120808_014150_inLine +BABEL_OP1_102_37260_20120808_014150_outLine +BABEL_OP1_102_37268_20121226_203217_inLine +BABEL_OP1_102_37268_20121226_203217_outLine +BABEL_OP1_102_37285_20120405_223443_inLine +BABEL_OP1_102_37285_20120405_223443_outLine +BABEL_OP1_102_37444_20130128_032426_inLine +BABEL_OP1_102_37444_20130128_032426_outLine +BABEL_OP1_102_37461_20120409_191629_inLine +BABEL_OP1_102_37461_20120409_191629_outLine +BABEL_OP1_102_37461_20120409_194138_inLine +BABEL_OP1_102_37461_20120409_194138_outLine +BABEL_OP1_102_37461_20120409_195519_inLine +BABEL_OP1_102_37461_20120409_195519_outLine +BABEL_OP1_102_38264_20121105_050622_inLine +BABEL_OP1_102_38264_20121105_050622_outLine +BABEL_OP1_102_38592_20121225_215825_inLine +BABEL_OP1_102_38592_20121225_215825_outLine +BABEL_OP1_102_38698_20120401_215032_inLine +BABEL_OP1_102_38698_20120401_215032_outLine +BABEL_OP1_102_38863_20121011_183009_inLine +BABEL_OP1_102_38863_20121011_183009_outLine +BABEL_OP1_102_39114_20120930_180045_inLine +BABEL_OP1_102_39114_20120930_180045_outLine +BABEL_OP1_102_39364_20121105_220855_inLine +BABEL_OP1_102_39364_20121105_220855_outLine +BABEL_OP1_102_39430_20120411_182026_inLine +BABEL_OP1_102_39430_20120411_182026_outLine +BABEL_OP1_102_39430_20120411_184729_inLine +BABEL_OP1_102_39430_20120411_184729_outLine +BABEL_OP1_102_40133_20121112_214034_inLine +BABEL_OP1_102_40133_20121112_214034_outLine +BABEL_OP1_102_40168_20120428_173400_inLine +BABEL_OP1_102_40168_20120428_173400_outLine +BABEL_OP1_102_40882_20130209_204142_inLine +BABEL_OP1_102_40882_20130209_204142_outLine +BABEL_OP1_102_41561_20121111_220752_inLine +BABEL_OP1_102_41561_20121111_220752_outLine +BABEL_OP1_102_41949_20120426_222144_inLine +BABEL_OP1_102_41949_20120426_222144_outLine +BABEL_OP1_102_42615_20120327_180819_inLine +BABEL_OP1_102_42615_20120327_180819_outLine +BABEL_OP1_102_42651_20120409_221530_inLine +BABEL_OP1_102_42651_20120409_221530_outLine +BABEL_OP1_102_42749_20121114_005458_inLine +BABEL_OP1_102_42749_20121114_005458_outLine +BABEL_OP1_102_42749_20121114_010754_inLine +BABEL_OP1_102_42749_20121114_010754_outLine +BABEL_OP1_102_43383_20120406_193121_inLine +BABEL_OP1_102_43383_20120406_193121_outLine +BABEL_OP1_102_43423_20120919_201131_inLine +BABEL_OP1_102_43423_20120919_201131_outLine +BABEL_OP1_102_43426_20120501_170331_inLine +BABEL_OP1_102_43426_20120501_170331_outLine +BABEL_OP1_102_43553_20120408_174809_inLine +BABEL_OP1_102_43553_20120408_174809_outLine +BABEL_OP1_102_43652_20120428_191659_inLine +BABEL_OP1_102_43652_20120428_191659_outLine +BABEL_OP1_102_44649_20120611_185930_inLine +BABEL_OP1_102_44649_20120611_185930_outLine +BABEL_OP1_102_44829_20120907_011054_inLine +BABEL_OP1_102_44829_20120907_011054_outLine +BABEL_OP1_102_44829_20120907_013730_inLine +BABEL_OP1_102_44829_20120907_013730_outLine +BABEL_OP1_102_45361_20120611_222502_inLine +BABEL_OP1_102_45361_20120611_222502_outLine +BABEL_OP1_102_45677_20130123_192645_inLine +BABEL_OP1_102_45677_20130123_192645_outLine +BABEL_OP1_102_45681_20120623_173741_inLine +BABEL_OP1_102_45681_20120623_173741_outLine +BABEL_OP1_102_45738_20120806_202458_inLine +BABEL_OP1_102_45738_20120806_202458_outLine +BABEL_OP1_102_45892_20120408_220557_inLine +BABEL_OP1_102_45892_20120408_220557_outLine +BABEL_OP1_102_45931_20120421_233726_inLine +BABEL_OP1_102_45931_20120421_233726_outLine +BABEL_OP1_102_46002_20121009_215715_inLine +BABEL_OP1_102_46002_20121009_215715_outLine +BABEL_OP1_102_46269_20121110_215228_inLine +BABEL_OP1_102_46269_20121110_215228_outLine +BABEL_OP1_102_47634_20120408_214325_inLine +BABEL_OP1_102_47634_20120408_214325_outLine +BABEL_OP1_102_47823_20120804_180038_inLine +BABEL_OP1_102_47823_20120804_180038_outLine +BABEL_OP1_102_48410_20120407_204734_inLine +BABEL_OP1_102_48410_20120407_204734_outLine +BABEL_OP1_102_48976_20120410_161651_inLine +BABEL_OP1_102_48976_20120410_161651_outLine +BABEL_OP1_102_49042_20120408_165038_inLine +BABEL_OP1_102_49042_20120408_165038_outLine +BABEL_OP1_102_49628_20120817_204731_inLine +BABEL_OP1_102_49628_20120817_204731_outLine +BABEL_OP1_102_49864_20120421_155657_inLine +BABEL_OP1_102_49864_20120421_155657_outLine +BABEL_OP1_102_50555_20120606_224819_inLine +BABEL_OP1_102_50555_20120606_224819_outLine +BABEL_OP1_102_50597_20120623_193352_inLine +BABEL_OP1_102_50597_20120623_193352_outLine +BABEL_OP1_102_50718_20120421_191449_inLine +BABEL_OP1_102_50718_20120421_191449_outLine +BABEL_OP1_102_50752_20121227_204235_inLine +BABEL_OP1_102_50752_20121227_204235_outLine +BABEL_OP1_102_50763_20120405_203621_inLine +BABEL_OP1_102_50763_20120405_203621_outLine +BABEL_OP1_102_50798_20120426_190454_inLine +BABEL_OP1_102_50798_20120426_190454_outLine +BABEL_OP1_102_52335_20130123_183229_inLine +BABEL_OP1_102_52335_20130123_183229_outLine +BABEL_OP1_102_52606_20121009_222016_inLine +BABEL_OP1_102_52606_20121009_222016_outLine +BABEL_OP1_102_52642_20120803_212045_inLine +BABEL_OP1_102_52642_20120803_212045_outLine +BABEL_OP1_102_52691_20120407_210408_inLine +BABEL_OP1_102_52691_20120407_210408_outLine +BABEL_OP1_102_52691_20120407_211728_inLine +BABEL_OP1_102_52691_20120407_211728_outLine +BABEL_OP1_102_52691_20120407_213757_inLine +BABEL_OP1_102_52691_20120407_213757_outLine +BABEL_OP1_102_52902_20120607_175045_inLine +BABEL_OP1_102_52902_20120607_175045_outLine +BABEL_OP1_102_52902_20120607_180239_inLine +BABEL_OP1_102_52902_20120607_180239_outLine +BABEL_OP1_102_53500_20120428_175953_inLine +BABEL_OP1_102_53500_20120428_175953_outLine +BABEL_OP1_102_53703_20120409_180047_inLine +BABEL_OP1_102_53703_20120409_180047_outLine +BABEL_OP1_102_53982_20120607_220642_inLine +BABEL_OP1_102_53982_20120607_220642_outLine +BABEL_OP1_102_54241_20120911_024357_inLine +BABEL_OP1_102_54241_20120911_024357_outLine +BABEL_OP1_102_54241_20120911_025705_inLine +BABEL_OP1_102_54241_20120911_025705_outLine +BABEL_OP1_102_55182_20120330_201037_inLine +BABEL_OP1_102_55182_20120330_201037_outLine +BABEL_OP1_102_55450_20121013_171507_inLine +BABEL_OP1_102_55450_20121013_171507_outLine +BABEL_OP1_102_55470_20120429_194956_inLine +BABEL_OP1_102_55470_20120429_194956_outLine +BABEL_OP1_102_55823_20121010_005200_inLine +BABEL_OP1_102_55823_20121010_005200_outLine +BABEL_OP1_102_55874_20121108_215431_inLine +BABEL_OP1_102_55874_20121108_215431_outLine +BABEL_OP1_102_56070_20120410_224512_inLine +BABEL_OP1_102_56070_20120410_224512_outLine +BABEL_OP1_102_56648_20120615_181652_inLine +BABEL_OP1_102_56648_20120615_181652_outLine +BABEL_OP1_102_56812_20121010_203710_inLine +BABEL_OP1_102_56812_20121010_203710_outLine +BABEL_OP1_102_56943_20121221_203039_inLine +BABEL_OP1_102_56943_20121221_203039_outLine +BABEL_OP1_102_57039_20121107_201157_inLine +BABEL_OP1_102_57039_20121107_201157_outLine +BABEL_OP1_102_57422_20120607_213941_inLine +BABEL_OP1_102_57422_20120607_213941_outLine +BABEL_OP1_102_57629_20121010_011015_inLine +BABEL_OP1_102_57629_20121010_011015_outLine +BABEL_OP1_102_57907_20121013_035627_inLine +BABEL_OP1_102_57907_20121013_035627_outLine +BABEL_OP1_102_58715_20120425_190758_inLine +BABEL_OP1_102_58715_20120425_190758_outLine +BABEL_OP1_102_58863_20120404_195038_inLine +BABEL_OP1_102_58863_20120404_195038_outLine +BABEL_OP1_102_58947_20121106_203812_inLine +BABEL_OP1_102_58947_20121106_203812_outLine +BABEL_OP1_102_58947_20121106_205338_inLine +BABEL_OP1_102_58947_20121106_205338_outLine +BABEL_OP1_102_59169_20120611_172953_inLine +BABEL_OP1_102_59169_20120611_172953_outLine +BABEL_OP1_102_59383_20121220_151350_inLine +BABEL_OP1_102_59383_20121220_151350_outLine +BABEL_OP1_102_59891_20120611_212238_inLine +BABEL_OP1_102_59891_20120611_212238_outLine +BABEL_OP1_102_59925_20121111_214225_inLine +BABEL_OP1_102_59925_20121111_214225_outLine +BABEL_OP1_102_60193_20120419_201756_inLine +BABEL_OP1_102_60193_20120419_201756_outLine +BABEL_OP1_102_60277_20120615_195600_inLine +BABEL_OP1_102_60277_20120615_195600_outLine +BABEL_OP1_102_60826_20120606_231535_inLine +BABEL_OP1_102_60826_20120606_231535_outLine +BABEL_OP1_102_60848_20121110_170724_inLine +BABEL_OP1_102_60848_20121110_170724_outLine +BABEL_OP1_102_60881_20120401_212818_inLine +BABEL_OP1_102_60881_20120401_212818_outLine +BABEL_OP1_102_60995_20121107_203546_inLine +BABEL_OP1_102_60995_20121107_203546_outLine +BABEL_OP1_102_61263_20121112_213923_inLine +BABEL_OP1_102_61263_20121112_213923_outLine +BABEL_OP1_102_61446_20120420_184155_inLine +BABEL_OP1_102_61446_20120420_184155_outLine +BABEL_OP1_102_62132_20120614_214158_inLine +BABEL_OP1_102_62132_20120614_214158_outLine +BABEL_OP1_102_62923_20130122_190544_inLine +BABEL_OP1_102_62923_20130122_190544_outLine +BABEL_OP1_102_63076_20121224_225415_inLine +BABEL_OP1_102_63076_20121224_225415_outLine +BABEL_OP1_102_64185_20120722_220159_inLine +BABEL_OP1_102_64185_20120722_220159_outLine +BABEL_OP1_102_64351_20120608_202610_inLine +BABEL_OP1_102_64351_20120608_202610_outLine +BABEL_OP1_102_65248_20120317_180718_inLine +BABEL_OP1_102_65248_20120317_180718_outLine +BABEL_OP1_102_65273_20121226_233200_inLine +BABEL_OP1_102_65273_20121226_233200_outLine +BABEL_OP1_102_65371_20121228_213615_inLine +BABEL_OP1_102_65371_20121228_213615_outLine +BABEL_OP1_102_65415_20120410_193034_inLine +BABEL_OP1_102_65415_20120410_193034_outLine +BABEL_OP1_102_65580_20120320_234602_inLine +BABEL_OP1_102_65580_20120320_234602_outLine +BABEL_OP1_102_65837_20121106_201713_inLine +BABEL_OP1_102_65837_20121106_201713_outLine +BABEL_OP1_102_66416_20120817_204557_inLine +BABEL_OP1_102_66416_20120817_204557_outLine +BABEL_OP1_102_66441_20120411_170112_inLine +BABEL_OP1_102_66441_20120411_170112_outLine +BABEL_OP1_102_66559_20121227_172234_inLine +BABEL_OP1_102_66559_20121227_172234_outLine +BABEL_OP1_102_67150_20121106_232551_inLine +BABEL_OP1_102_67150_20121106_232551_outLine +BABEL_OP1_102_67733_20120409_192100_inLine +BABEL_OP1_102_67733_20120409_192100_outLine +BABEL_OP1_102_67750_20120330_210301_inLine +BABEL_OP1_102_67750_20120330_210301_outLine +BABEL_OP1_102_67798_20120408_211247_inLine +BABEL_OP1_102_67798_20120408_211247_outLine +BABEL_OP1_102_67916_20121224_185018_inLine +BABEL_OP1_102_67916_20121224_185018_outLine +BABEL_OP1_102_69049_20120422_174706_inLine +BABEL_OP1_102_69049_20120422_174706_outLine +BABEL_OP1_102_69145_20121006_214000_inLine +BABEL_OP1_102_69145_20121006_214000_outLine +BABEL_OP1_102_69275_20121009_000322_inLine +BABEL_OP1_102_69275_20121009_000322_outLine +BABEL_OP1_102_69368_20120328_214605_inLine +BABEL_OP1_102_69368_20120328_214605_outLine +BABEL_OP1_102_70555_20120421_203231_inLine +BABEL_OP1_102_70555_20120421_203231_outLine +BABEL_OP1_102_71778_20120608_222028_inLine +BABEL_OP1_102_71778_20120608_222028_outLine +BABEL_OP1_102_72032_20120329_225115_inLine +BABEL_OP1_102_72032_20120329_225115_outLine +BABEL_OP1_102_72718_20121010_030640_inLine +BABEL_OP1_102_72718_20121010_030640_outLine +BABEL_OP1_102_72799_20120428_225215_inLine +BABEL_OP1_102_72799_20120428_225215_outLine +BABEL_OP1_102_73050_20120929_012255_inLine +BABEL_OP1_102_73050_20120929_012255_outLine +BABEL_OP1_102_73438_20121103_170431_inLine +BABEL_OP1_102_73438_20121103_170431_outLine +BABEL_OP1_102_73440_20120428_195653_inLine +BABEL_OP1_102_73440_20120428_195653_outLine +BABEL_OP1_102_73452_20121003_021245_inLine +BABEL_OP1_102_73452_20121003_021245_outLine +BABEL_OP1_102_73786_20120420_171039_inLine +BABEL_OP1_102_73786_20120420_171039_outLine +BABEL_OP1_102_74043_20120422_170724_inLine +BABEL_OP1_102_74043_20120422_170724_outLine +BABEL_OP1_102_74368_20121008_041653_inLine +BABEL_OP1_102_74368_20121008_041653_outLine +BABEL_OP1_102_74709_20120806_191528_inLine +BABEL_OP1_102_74709_20120806_191528_outLine +BABEL_OP1_102_74823_20120330_181459_inLine +BABEL_OP1_102_74823_20120330_181459_outLine +BABEL_OP1_102_75140_20120330_171509_inLine +BABEL_OP1_102_75140_20120330_171509_outLine +BABEL_OP1_102_75354_20121105_033257_inLine +BABEL_OP1_102_75354_20121105_033257_outLine +BABEL_OP1_102_75498_20120806_180214_inLine +BABEL_OP1_102_75498_20120806_180214_outLine +BABEL_OP1_102_75680_20121110_180407_inLine +BABEL_OP1_102_75680_20121110_180407_outLine +BABEL_OP1_102_75990_20120426_182351_inLine +BABEL_OP1_102_75990_20120426_182351_outLine +BABEL_OP1_102_76331_20120806_185250_inLine +BABEL_OP1_102_76331_20120806_185250_outLine +BABEL_OP1_102_76451_20120329_193459_inLine +BABEL_OP1_102_76451_20120329_193459_outLine +BABEL_OP1_102_77244_20121001_003159_inLine +BABEL_OP1_102_77244_20121001_003159_outLine +BABEL_OP1_102_77465_20120607_001521_inLine +BABEL_OP1_102_77465_20120607_001521_outLine +BABEL_OP1_102_77771_20121227_191404_inLine +BABEL_OP1_102_77771_20121227_191404_outLine +BABEL_OP1_102_77811_20130123_215211_inLine +BABEL_OP1_102_77811_20130123_215211_outLine +BABEL_OP1_102_78514_20120409_182010_inLine +BABEL_OP1_102_78514_20120409_182010_outLine +BABEL_OP1_102_79495_20120320_011136_inLine +BABEL_OP1_102_79495_20120320_011136_outLine +BABEL_OP1_102_80868_20121028_015553_inLine +BABEL_OP1_102_80868_20121028_015553_outLine +BABEL_OP1_102_81084_20120406_191910_inLine +BABEL_OP1_102_81084_20120406_191910_outLine +BABEL_OP1_102_81587_20121225_213038_inLine +BABEL_OP1_102_81587_20121225_213038_outLine +BABEL_OP1_102_81717_20130209_201202_inLine +BABEL_OP1_102_81717_20130209_201202_outLine +BABEL_OP1_102_81878_20120331_181439_inLine +BABEL_OP1_102_81878_20120331_181439_outLine +BABEL_OP1_102_81878_20120331_182958_inLine +BABEL_OP1_102_81878_20120331_182958_outLine +BABEL_OP1_102_82009_20121104_013002_inLine +BABEL_OP1_102_82009_20121104_013002_outLine +BABEL_OP1_102_82023_20120330_175253_inLine +BABEL_OP1_102_82023_20120330_175253_outLine +BABEL_OP1_102_82192_20120429_180649_inLine +BABEL_OP1_102_82192_20120429_180649_outLine +BABEL_OP1_102_82408_20120402_190241_inLine +BABEL_OP1_102_82408_20120402_190241_outLine +BABEL_OP1_102_83256_20120330_210950_inLine +BABEL_OP1_102_83256_20120330_210950_outLine +BABEL_OP1_102_83493_20120429_172305_inLine +BABEL_OP1_102_83493_20120429_172305_outLine +BABEL_OP1_102_83493_20120429_175508_inLine +BABEL_OP1_102_83493_20120429_175508_outLine +BABEL_OP1_102_83531_20120408_201200_inLine +BABEL_OP1_102_83531_20120408_201200_outLine +BABEL_OP1_102_83531_20120408_203827_inLine +BABEL_OP1_102_83531_20120408_203827_outLine +BABEL_OP1_102_83634_20130123_212154_inLine +BABEL_OP1_102_83634_20130123_212154_outLine +BABEL_OP1_102_83791_20120420_215616_inLine +BABEL_OP1_102_83791_20120420_215616_outLine +BABEL_OP1_102_84088_20120328_180739_inLine +BABEL_OP1_102_84088_20120328_180739_outLine +BABEL_OP1_102_84284_20121225_175332_inLine +BABEL_OP1_102_84284_20121225_175332_outLine +BABEL_OP1_102_84397_20121110_230552_inLine +BABEL_OP1_102_84397_20121110_230552_outLine +BABEL_OP1_102_84439_20120427_184114_inLine +BABEL_OP1_102_84439_20120427_184114_outLine +BABEL_OP1_102_84608_20120609_194053_inLine +BABEL_OP1_102_84608_20120609_194053_outLine +BABEL_OP1_102_84943_20120401_170153_inLine +BABEL_OP1_102_84943_20120401_170153_outLine +BABEL_OP1_102_85716_20120401_165708_inLine +BABEL_OP1_102_85716_20120401_165708_outLine +BABEL_OP1_102_86004_20120418_230109_inLine +BABEL_OP1_102_86004_20120418_230109_outLine +BABEL_OP1_102_86886_20121112_201306_inLine +BABEL_OP1_102_86886_20121112_201306_outLine +BABEL_OP1_102_87234_20121224_212540_inLine +BABEL_OP1_102_87234_20121224_212540_outLine +BABEL_OP1_102_87857_20120405_202526_inLine +BABEL_OP1_102_87857_20120405_202526_outLine +BABEL_OP1_102_87961_20120423_155726_inLine +BABEL_OP1_102_87961_20120423_155726_outLine +BABEL_OP1_102_88163_20121112_003006_inLine +BABEL_OP1_102_88163_20121112_003006_outLine +BABEL_OP1_102_89993_20120607_175900_inLine +BABEL_OP1_102_89993_20120607_175900_outLine +BABEL_OP1_102_90393_20120419_214927_inLine +BABEL_OP1_102_90393_20120419_214927_outLine +BABEL_OP1_102_90511_20120329_224306_inLine +BABEL_OP1_102_90511_20120329_224306_outLine +BABEL_OP1_102_90810_20120404_221650_inLine +BABEL_OP1_102_90810_20120404_221650_outLine +BABEL_OP1_102_90819_20120614_222542_inLine +BABEL_OP1_102_90819_20120614_222542_outLine +BABEL_OP1_102_90890_20120320_235811_inLine +BABEL_OP1_102_90890_20120320_235811_outLine +BABEL_OP1_102_90975_20130127_194034_inLine +BABEL_OP1_102_90975_20130127_194034_outLine +BABEL_OP1_102_90975_20130127_195301_inLine +BABEL_OP1_102_90975_20130127_195301_outLine +BABEL_OP1_102_91171_20121222_000026_inLine +BABEL_OP1_102_91171_20121222_000026_outLine +BABEL_OP1_102_91358_20121103_191541_inLine +BABEL_OP1_102_91358_20121103_191541_outLine +BABEL_OP1_102_91386_20121226_175240_inLine +BABEL_OP1_102_91386_20121226_175240_outLine +BABEL_OP1_102_91401_20120409_195325_inLine +BABEL_OP1_102_91401_20120409_195325_outLine +BABEL_OP1_102_91481_20120806_232222_inLine +BABEL_OP1_102_91481_20120806_232222_outLine +BABEL_OP1_102_91865_20130127_193426_inLine +BABEL_OP1_102_91865_20130127_193426_outLine +BABEL_OP1_102_92002_20120821_172434_inLine +BABEL_OP1_102_92002_20120821_172434_outLine +BABEL_OP1_102_92252_20120805_193105_inLine +BABEL_OP1_102_92252_20120805_193105_outLine +BABEL_OP1_102_92252_20120805_202508_inLine +BABEL_OP1_102_92252_20120805_202508_outLine +BABEL_OP1_102_92321_20120729_204129_inLine +BABEL_OP1_102_92321_20120729_204129_outLine +BABEL_OP1_102_92386_20120401_175909_inLine +BABEL_OP1_102_92386_20120401_175909_outLine +BABEL_OP1_102_92407_20120330_180101_inLine +BABEL_OP1_102_92407_20120330_180101_outLine +BABEL_OP1_102_92628_20120404_212106_inLine +BABEL_OP1_102_92628_20120404_212106_outLine +BABEL_OP1_102_92800_20120408_165253_inLine +BABEL_OP1_102_92800_20120408_165253_outLine +BABEL_OP1_102_93091_20120425_204602_inLine +BABEL_OP1_102_93091_20120425_204602_outLine +BABEL_OP1_102_93091_20120425_205745_inLine +BABEL_OP1_102_93091_20120425_205745_outLine +BABEL_OP1_102_93509_20120422_230046_inLine +BABEL_OP1_102_93509_20120422_230046_outLine +BABEL_OP1_102_93607_20120806_194627_inLine +BABEL_OP1_102_93607_20120806_194627_outLine +BABEL_OP1_102_94162_20130209_213329_inLine +BABEL_OP1_102_94162_20130209_213329_outLine +BABEL_OP1_102_94542_20130122_222709_inLine +BABEL_OP1_102_94542_20130122_222709_outLine +BABEL_OP1_102_94694_20120611_183126_inLine +BABEL_OP1_102_94694_20120611_183126_outLine +BABEL_OP1_102_94696_20130127_183814_inLine +BABEL_OP1_102_94696_20130127_183814_outLine +BABEL_OP1_102_95350_20120420_225049_inLine +BABEL_OP1_102_95350_20120420_225049_outLine +BABEL_OP1_102_95566_20121222_024129_inLine +BABEL_OP1_102_95566_20121222_024129_outLine +BABEL_OP1_102_95637_20120329_225942_inLine +BABEL_OP1_102_95637_20120329_225942_outLine +BABEL_OP1_102_95650_20120327_230850_inLine +BABEL_OP1_102_95650_20120327_230850_outLine +BABEL_OP1_102_95815_20120401_233401_inLine +BABEL_OP1_102_95815_20120401_233401_outLine +BABEL_OP1_102_95849_20121106_222829_inLine +BABEL_OP1_102_95849_20121106_222829_outLine +BABEL_OP1_102_96283_20120720_021526_inLine +BABEL_OP1_102_96283_20120720_021526_outLine +BABEL_OP1_102_96347_20120330_201932_inLine +BABEL_OP1_102_96347_20120330_201932_outLine +BABEL_OP1_102_96788_20120411_183347_inLine +BABEL_OP1_102_96788_20120411_183347_outLine +BABEL_OP1_102_97260_20120409_175649_inLine +BABEL_OP1_102_97260_20120409_175649_outLine +BABEL_OP1_102_97590_20121110_214746_inLine +BABEL_OP1_102_97590_20121110_214746_outLine +BABEL_OP1_102_97590_20121110_215543_inLine +BABEL_OP1_102_97590_20121110_215543_outLine +BABEL_OP1_102_98402_20121112_014920_inLine +BABEL_OP1_102_98402_20121112_014920_outLine +BABEL_OP1_102_98675_20130209_215547_inLine +BABEL_OP1_102_98675_20130209_215547_outLine +BABEL_OP1_102_99514_20120406_182505_inLine +BABEL_OP1_102_99514_20120406_182505_outLine +BABEL_OP1_102_99731_20121220_214209_inLine +BABEL_OP1_102_99731_20121220_214209_outLine +BABEL_OP1_102_99823_20120429_181728_inLine +BABEL_OP1_102_99823_20120429_181728_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.untranscribed.list b/egs/babel/s5d/conf/lists/102-assamese/train.untranscribed.list new file mode 100644 index 00000000000..f93c4c32be7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.untranscribed.list @@ -0,0 +1,259 @@ +BABEL_OP1_102_11267_20120807_194639_inLine +BABEL_OP1_102_11267_20120807_194639_outLine +BABEL_OP1_102_11311_20120420_205813_inLine +BABEL_OP1_102_11311_20120420_205813_outLine +BABEL_OP1_102_14610_20120405_182316_inLine +BABEL_OP1_102_14610_20120405_182316_outLine +BABEL_OP1_102_14936_20120408_200722_inLine +BABEL_OP1_102_14936_20120408_200722_outLine +BABEL_OP1_102_16855_20121112_222619_inLine +BABEL_OP1_102_16855_20121112_222619_outLine +BABEL_OP1_102_17080_20120328_184723_inLine +BABEL_OP1_102_17080_20120328_184723_outLine +BABEL_OP1_102_19656_20120426_205905_inLine +BABEL_OP1_102_19656_20120426_205905_outLine +BABEL_OP1_102_22973_20121228_181929_inLine +BABEL_OP1_102_22973_20121228_181929_outLine +BABEL_OP1_102_24642_20121027_144752_inLine +BABEL_OP1_102_24642_20121027_144752_outLine +BABEL_OP1_102_24799_20120425_195004_inLine +BABEL_OP1_102_24799_20120425_195004_outLine +BABEL_OP1_102_25106_20120408_181647_inLine +BABEL_OP1_102_25106_20120408_181647_outLine +BABEL_OP1_102_25992_20120611_184443_inLine +BABEL_OP1_102_25992_20120611_184443_outLine +BABEL_OP1_102_26164_20121224_194642_inLine +BABEL_OP1_102_26164_20121224_194642_outLine +BABEL_OP1_102_27605_20120420_193239_inLine +BABEL_OP1_102_27605_20120420_193239_outLine +BABEL_OP1_102_27825_20120612_214044_inLine +BABEL_OP1_102_27825_20120612_214044_outLine +BABEL_OP1_102_27825_20120612_215834_inLine +BABEL_OP1_102_27825_20120612_215834_outLine +BABEL_OP1_102_27916_20121011_020742_inLine +BABEL_OP1_102_27916_20121011_020742_outLine +BABEL_OP1_102_29302_20120411_221747_inLine +BABEL_OP1_102_29302_20120411_221747_outLine +BABEL_OP1_102_29812_20120408_222336_inLine +BABEL_OP1_102_29812_20120408_222336_outLine +BABEL_OP1_102_30227_20121105_031526_inLine +BABEL_OP1_102_30227_20121105_031526_outLine +BABEL_OP1_102_31393_20120409_185950_inLine +BABEL_OP1_102_31393_20120409_185950_outLine +BABEL_OP1_102_31538_20120411_163952_inLine +BABEL_OP1_102_31538_20120411_163952_outLine +BABEL_OP1_102_31975_20120805_174531_inLine +BABEL_OP1_102_31975_20120805_174531_outLine +BABEL_OP1_102_32045_20120408_214902_inLine +BABEL_OP1_102_32045_20120408_214902_outLine +BABEL_OP1_102_32236_20130121_194429_inLine +BABEL_OP1_102_32236_20130121_194429_outLine +BABEL_OP1_102_32263_20120805_213534_inLine +BABEL_OP1_102_32263_20120805_213534_outLine +BABEL_OP1_102_32274_20120407_220211_inLine +BABEL_OP1_102_32274_20120407_220211_outLine +BABEL_OP1_102_34558_20120401_172719_inLine +BABEL_OP1_102_34558_20120401_172719_outLine +BABEL_OP1_102_35932_20120409_181050_inLine +BABEL_OP1_102_35932_20120409_181050_outLine +BABEL_OP1_102_35972_20120804_222857_inLine +BABEL_OP1_102_35972_20120804_222857_outLine +BABEL_OP1_102_36561_20120615_182603_inLine +BABEL_OP1_102_36561_20120615_182603_outLine +BABEL_OP1_102_37094_20120327_212647_inLine +BABEL_OP1_102_37094_20120327_212647_outLine +BABEL_OP1_102_37374_20120807_002505_inLine +BABEL_OP1_102_37374_20120807_002505_outLine +BABEL_OP1_102_37374_20120807_004102_inLine +BABEL_OP1_102_37374_20120807_004102_outLine +BABEL_OP1_102_39141_20121220_172812_inLine +BABEL_OP1_102_39141_20121220_172812_outLine +BABEL_OP1_102_39497_20120428_183546_inLine +BABEL_OP1_102_39497_20120428_183546_outLine +BABEL_OP1_102_39774_20121224_203424_inLine +BABEL_OP1_102_39774_20121224_203424_outLine +BABEL_OP1_102_40040_20120611_202254_inLine +BABEL_OP1_102_40040_20120611_202254_outLine +BABEL_OP1_102_41512_20121224_195155_inLine +BABEL_OP1_102_41512_20121224_195155_outLine +BABEL_OP1_102_41686_20120404_200841_inLine +BABEL_OP1_102_41686_20120404_200841_outLine +BABEL_OP1_102_42420_20121225_200910_inLine +BABEL_OP1_102_42420_20121225_200910_outLine +BABEL_OP1_102_43317_20120804_190955_inLine +BABEL_OP1_102_43317_20120804_190955_outLine +BABEL_OP1_102_44038_20121110_191648_inLine +BABEL_OP1_102_44038_20121110_191648_outLine +BABEL_OP1_102_44117_20121105_205012_inLine +BABEL_OP1_102_44117_20121105_205012_outLine +BABEL_OP1_102_44500_20120609_205327_inLine +BABEL_OP1_102_44500_20120609_205327_outLine +BABEL_OP1_102_44744_20120330_204705_inLine +BABEL_OP1_102_44744_20120330_204705_outLine +BABEL_OP1_102_45145_20120417_144517_inLine +BABEL_OP1_102_45145_20120417_144517_outLine +BABEL_OP1_102_45512_20120408_174807_inLine +BABEL_OP1_102_45512_20120408_174807_outLine +BABEL_OP1_102_45655_20120405_201151_inLine +BABEL_OP1_102_45655_20120405_201151_outLine +BABEL_OP1_102_45655_20120405_205759_inLine +BABEL_OP1_102_45655_20120405_205759_outLine +BABEL_OP1_102_47037_20120805_212557_inLine +BABEL_OP1_102_47037_20120805_212557_outLine +BABEL_OP1_102_47433_20120329_001114_inLine +BABEL_OP1_102_47433_20120329_001114_outLine +BABEL_OP1_102_47733_20120607_225347_inLine +BABEL_OP1_102_47733_20120607_225347_outLine +BABEL_OP1_102_49173_20121028_022705_inLine +BABEL_OP1_102_49173_20121028_022705_outLine +BABEL_OP1_102_51448_20121111_155248_inLine +BABEL_OP1_102_51448_20121111_155248_outLine +BABEL_OP1_102_52325_20120430_191407_inLine +BABEL_OP1_102_52325_20120430_191407_outLine +BABEL_OP1_102_52515_20120910_021046_inLine +BABEL_OP1_102_52515_20120910_021046_outLine +BABEL_OP1_102_52900_20120426_225238_inLine +BABEL_OP1_102_52900_20120426_225238_outLine +BABEL_OP1_102_52900_20120426_230606_inLine +BABEL_OP1_102_52900_20120426_230606_outLine +BABEL_OP1_102_52913_20121224_231026_inLine +BABEL_OP1_102_52913_20121224_231026_outLine +BABEL_OP1_102_53278_20120607_205252_inLine +BABEL_OP1_102_53278_20120607_205252_outLine +BABEL_OP1_102_53649_20121008_013457_inLine +BABEL_OP1_102_53649_20121008_013457_outLine +BABEL_OP1_102_54818_20120407_212156_inLine +BABEL_OP1_102_54818_20120407_212156_outLine +BABEL_OP1_102_55786_20120401_224618_inLine +BABEL_OP1_102_55786_20120401_224618_outLine +BABEL_OP1_102_57277_20121227_213448_inLine +BABEL_OP1_102_57277_20121227_213448_outLine +BABEL_OP1_102_57454_20120615_183718_inLine +BABEL_OP1_102_57454_20120615_183718_outLine +BABEL_OP1_102_58536_20120426_204822_inLine +BABEL_OP1_102_60064_20121006_215918_inLine +BABEL_OP1_102_60064_20121006_215918_outLine +BABEL_OP1_102_61351_20121220_161410_inLine +BABEL_OP1_102_61351_20121220_161410_outLine +BABEL_OP1_102_62163_20121011_012642_inLine +BABEL_OP1_102_62163_20121011_012642_outLine +BABEL_OP1_102_62277_20120722_203834_inLine +BABEL_OP1_102_62277_20120722_203834_outLine +BABEL_OP1_102_63233_20120405_184701_inLine +BABEL_OP1_102_63233_20120405_184701_outLine +BABEL_OP1_102_63339_20120328_190947_inLine +BABEL_OP1_102_63339_20120328_190947_outLine +BABEL_OP1_102_63353_20120409_193206_inLine +BABEL_OP1_102_63353_20120409_193206_outLine +BABEL_OP1_102_63353_20120409_194011_inLine +BABEL_OP1_102_63353_20120409_194011_outLine +BABEL_OP1_102_64372_20120406_183945_inLine +BABEL_OP1_102_64372_20120406_183945_outLine +BABEL_OP1_102_65989_20120607_000921_inLine +BABEL_OP1_102_65989_20120607_000921_outLine +BABEL_OP1_102_66275_20120719_004257_inLine +BABEL_OP1_102_66275_20120719_004257_outLine +BABEL_OP1_102_66883_20120428_204106_inLine +BABEL_OP1_102_66883_20120428_204106_outLine +BABEL_OP1_102_67304_20120806_203538_inLine +BABEL_OP1_102_67304_20120806_203538_outLine +BABEL_OP1_102_68191_20120606_224106_inLine +BABEL_OP1_102_68191_20120606_224106_outLine +BABEL_OP1_102_68337_20120420_004336_inLine +BABEL_OP1_102_68337_20120420_004336_outLine +BABEL_OP1_102_68671_20121014_155929_inLine +BABEL_OP1_102_68671_20121014_155929_outLine +BABEL_OP1_102_69548_20120330_180855_inLine +BABEL_OP1_102_69548_20120330_180855_outLine +BABEL_OP1_102_72907_20121219_204634_inLine +BABEL_OP1_102_72907_20121219_204634_outLine +BABEL_OP1_102_74295_20120618_234350_inLine +BABEL_OP1_102_74295_20120618_234350_outLine +BABEL_OP1_102_74625_20121010_165038_inLine +BABEL_OP1_102_74625_20121010_165038_outLine +BABEL_OP1_102_75151_20121017_164432_inLine +BABEL_OP1_102_75151_20121017_164432_outLine +BABEL_OP1_102_75151_20121017_164918_inLine +BABEL_OP1_102_75151_20121017_164918_outLine +BABEL_OP1_102_75248_20121106_201226_inLine +BABEL_OP1_102_75248_20121106_201226_outLine +BABEL_OP1_102_75333_20130121_191749_inLine +BABEL_OP1_102_75333_20130121_191749_outLine +BABEL_OP1_102_75871_20120910_013715_inLine +BABEL_OP1_102_75871_20120910_013715_outLine +BABEL_OP1_102_77238_20120331_175602_inLine +BABEL_OP1_102_77238_20120331_175602_outLine +BABEL_OP1_102_77238_20120331_181840_inLine +BABEL_OP1_102_77238_20120331_181840_outLine +BABEL_OP1_102_77697_20130128_202557_inLine +BABEL_OP1_102_77697_20130128_202557_outLine +BABEL_OP1_102_77697_20130128_203734_inLine +BABEL_OP1_102_77697_20130128_203734_outLine +BABEL_OP1_102_78290_20121010_135127_inLine +BABEL_OP1_102_78290_20121010_135127_outLine +BABEL_OP1_102_78681_20121112_013035_inLine +BABEL_OP1_102_78681_20121112_013035_outLine +BABEL_OP1_102_79293_20120404_182947_inLine +BABEL_OP1_102_79293_20120404_182947_outLine +BABEL_OP1_102_80075_20120617_182928_inLine +BABEL_OP1_102_80075_20120617_182928_outLine +BABEL_OP1_102_80247_20130121_182518_inLine +BABEL_OP1_102_80247_20130121_182518_outLine +BABEL_OP1_102_81053_20130127_205227_inLine +BABEL_OP1_102_81053_20130127_205227_outLine +BABEL_OP1_102_81119_20130209_215021_inLine +BABEL_OP1_102_81119_20130209_215021_outLine +BABEL_OP1_102_81642_20120608_184707_inLine +BABEL_OP1_102_81642_20120608_184707_outLine +BABEL_OP1_102_81647_20121010_143838_inLine +BABEL_OP1_102_81647_20121010_143838_outLine +BABEL_OP1_102_81820_20130121_175432_inLine +BABEL_OP1_102_81820_20130121_175432_outLine +BABEL_OP1_102_82881_20120330_215822_inLine +BABEL_OP1_102_82881_20120330_215822_outLine +BABEL_OP1_102_83186_20120817_222832_inLine +BABEL_OP1_102_83186_20120817_222832_outLine +BABEL_OP1_102_83219_20121112_012249_inLine +BABEL_OP1_102_83219_20121112_012249_outLine +BABEL_OP1_102_83702_20120419_173053_inLine +BABEL_OP1_102_83702_20120419_173053_outLine +BABEL_OP1_102_84491_20121026_003510_inLine +BABEL_OP1_102_84491_20121026_003510_outLine +BABEL_OP1_102_86998_20121110_171744_inLine +BABEL_OP1_102_86998_20121110_171744_outLine +BABEL_OP1_102_87077_20120429_190133_inLine +BABEL_OP1_102_87077_20120429_190133_outLine +BABEL_OP1_102_87634_20120327_210105_inLine +BABEL_OP1_102_87634_20120327_210105_outLine +BABEL_OP1_102_88294_20120331_223132_inLine +BABEL_OP1_102_88383_20120409_194253_inLine +BABEL_OP1_102_88383_20120409_194253_outLine +BABEL_OP1_102_88532_20120805_223539_inLine +BABEL_OP1_102_88532_20120805_223539_outLine +BABEL_OP1_102_88982_20120607_221313_inLine +BABEL_OP1_102_88982_20120607_221313_outLine +BABEL_OP1_102_89345_20120331_184511_inLine +BABEL_OP1_102_89345_20120331_184511_outLine +BABEL_OP1_102_89345_20120331_190311_inLine +BABEL_OP1_102_89345_20120331_190311_outLine +BABEL_OP1_102_90024_20121106_025738_inLine +BABEL_OP1_102_90024_20121106_025738_outLine +BABEL_OP1_102_90490_20120318_194705_inLine +BABEL_OP1_102_90490_20120318_194705_outLine +BABEL_OP1_102_90730_20120420_175543_inLine +BABEL_OP1_102_90951_20120929_024352_inLine +BABEL_OP1_102_90951_20120929_024352_outLine +BABEL_OP1_102_91409_20120425_213805_inLine +BABEL_OP1_102_91409_20120425_213805_outLine +BABEL_OP1_102_92642_20120329_225854_inLine +BABEL_OP1_102_92642_20120329_225854_outLine +BABEL_OP1_102_92735_20120425_232435_inLine +BABEL_OP1_102_92735_20120425_232435_outLine +BABEL_OP1_102_92820_20121104_193517_inLine +BABEL_OP1_102_92820_20121104_193517_outLine +BABEL_OP1_102_94218_20121112_171836_inLine +BABEL_OP1_102_94218_20121112_171836_outLine +BABEL_OP1_102_97052_20121013_023448_inLine +BABEL_OP1_102_97052_20121013_023448_outLine +BABEL_OP1_102_99694_20120401_230049_inLine +BABEL_OP1_102_99694_20120401_230049_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/dev.list b/egs/babel/s5d/conf/lists/103-bengali/dev.list new file mode 100644 index 00000000000..4dd26d694d3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/dev.list @@ -0,0 +1,125 @@ +BABEL_OP1_103_10569_20111221_201913_inLine +BABEL_OP1_103_10569_20111221_201913_outLine +BABEL_OP1_103_10576_20111221_214850_inLine +BABEL_OP1_103_10576_20111221_214850_outLine +BABEL_OP1_103_11153_20120204_001459_inLine +BABEL_OP1_103_11153_20120204_001459_outLine +BABEL_OP1_103_12600_20120127_235915_inLine +BABEL_OP1_103_12600_20120127_235915_outLine +BABEL_OP1_103_13990_20120121_225453_inLine +BABEL_OP1_103_13990_20120121_225453_outLine +BABEL_OP1_103_14002_20120116_220151_inLine +BABEL_OP1_103_14002_20120116_220151_outLine +BABEL_OP1_103_14852_20120203_024637_inLine +BABEL_OP1_103_14852_20120203_024637_outLine +BABEL_OP1_103_17081_20120608_004038_inLine +BABEL_OP1_103_17081_20120608_004038_outLine +BABEL_OP1_103_21203_20120523_225358_inLine +BABEL_OP1_103_21203_20120523_225358_outLine +BABEL_OP1_103_22340_20120513_220417_inLine +BABEL_OP1_103_22340_20120513_220417_outLine +BABEL_OP1_103_24503_20120127_182430_inLine +BABEL_OP1_103_24503_20120127_182430_outLine +BABEL_OP1_103_24810_20120114_225518_inLine +BABEL_OP1_103_24810_20120114_225518_outLine +BABEL_OP1_103_25067_20120129_230104_inLine +BABEL_OP1_103_25067_20120129_230104_outLine +BABEL_OP1_103_27912_20120123_185402_inLine +BABEL_OP1_103_27912_20120123_185402_outLine +BABEL_OP1_103_31084_20120729_201226_inLine +BABEL_OP1_103_31084_20120729_201226_outLine +BABEL_OP1_103_37083_20120125_224559_inLine +BABEL_OP1_103_38382_20120110_013824_inLine +BABEL_OP1_103_38382_20120110_013824_outLine +BABEL_OP1_103_40114_20120122_183602_inLine +BABEL_OP1_103_40114_20120122_183602_outLine +BABEL_OP1_103_41417_20120122_224848_inLine +BABEL_OP1_103_41417_20120122_224848_outLine +BABEL_OP1_103_42929_20120118_211148_inLine +BABEL_OP1_103_42929_20120118_211148_outLine +BABEL_OP1_103_42929_20120118_212321_inLine +BABEL_OP1_103_42929_20120118_212321_outLine +BABEL_OP1_103_43051_20120524_163506_inLine +BABEL_OP1_103_43051_20120524_163506_outLine +BABEL_OP1_103_44799_20120131_222925_inLine +BABEL_OP1_103_44799_20120131_222925_outLine +BABEL_OP1_103_48834_20111227_010514_inLine +BABEL_OP1_103_48834_20111227_010514_outLine +BABEL_OP1_103_49329_20120131_223617_inLine +BABEL_OP1_103_49329_20120131_223617_outLine +BABEL_OP1_103_50583_20120114_233345_inLine +BABEL_OP1_103_50583_20120114_233345_outLine +BABEL_OP1_103_50614_20120130_225030_inLine +BABEL_OP1_103_50614_20120130_225030_outLine +BABEL_OP1_103_50910_20120122_001708_inLine +BABEL_OP1_103_50910_20120122_001708_outLine +BABEL_OP1_103_52067_20120127_020600_inLine +BABEL_OP1_103_52845_20120126_200807_inLine +BABEL_OP1_103_52845_20120126_200807_outLine +BABEL_OP1_103_53805_20120126_211949_inLine +BABEL_OP1_103_53805_20120126_211949_outLine +BABEL_OP1_103_57087_20120204_181410_inLine +BABEL_OP1_103_57087_20120204_181410_outLine +BABEL_OP1_103_57721_20120531_194610_inLine +BABEL_OP1_103_57721_20120531_194610_outLine +BABEL_OP1_103_57742_20120125_200619_inLine +BABEL_OP1_103_57742_20120125_200619_outLine +BABEL_OP1_103_60462_20120521_181224_inLine +BABEL_OP1_103_60462_20120521_181224_outLine +BABEL_OP1_103_62038_20111230_004215_inLine +BABEL_OP1_103_62038_20111230_004215_outLine +BABEL_OP1_103_62169_20120304_153842_inLine +BABEL_OP1_103_62169_20120304_153842_outLine +BABEL_OP1_103_62584_20120305_152943_inLine +BABEL_OP1_103_62584_20120305_152943_outLine +BABEL_OP1_103_62837_20120307_223844_inLine +BABEL_OP1_103_62837_20120307_223844_outLine +BABEL_OP1_103_62837_20120307_225550_inLine +BABEL_OP1_103_62837_20120307_225550_outLine +BABEL_OP1_103_63220_20120514_232049_inLine +BABEL_OP1_103_63220_20120514_232049_outLine +BABEL_OP1_103_63444_20120316_030633_inLine +BABEL_OP1_103_64297_20120514_162741_inLine +BABEL_OP1_103_64297_20120514_162741_outLine +BABEL_OP1_103_64853_20120405_163727_inLine +BABEL_OP1_103_64853_20120405_163727_outLine +BABEL_OP1_103_65597_20120530_213140_inLine +BABEL_OP1_103_65597_20120530_213140_outLine +BABEL_OP1_103_65895_20120229_202918_inLine +BABEL_OP1_103_65895_20120229_202918_outLine +BABEL_OP1_103_66313_20120229_230907_inLine +BABEL_OP1_103_66313_20120229_230907_outLine +BABEL_OP1_103_66351_20120111_041605_inLine +BABEL_OP1_103_66351_20120111_041605_outLine +BABEL_OP1_103_66757_20120131_215301_inLine +BABEL_OP1_103_66757_20120131_215301_outLine +BABEL_OP1_103_67421_20120310_230757_inLine +BABEL_OP1_103_67421_20120310_230757_outLine +BABEL_OP1_103_69894_20120307_152955_inLine +BABEL_OP1_103_69894_20120307_152955_outLine +BABEL_OP1_103_76654_20120519_203100_inLine +BABEL_OP1_103_76654_20120519_203100_outLine +BABEL_OP1_103_77082_20120203_232638_inLine +BABEL_OP1_103_77082_20120203_232638_outLine +BABEL_OP1_103_80105_20120530_211541_inLine +BABEL_OP1_103_80105_20120530_211541_outLine +BABEL_OP1_103_80875_20120522_224314_inLine +BABEL_OP1_103_80875_20120522_224314_outLine +BABEL_OP1_103_81318_20120104_020938_inLine +BABEL_OP1_103_81318_20120104_020938_outLine +BABEL_OP1_103_81773_20120101_024120_inLine +BABEL_OP1_103_81773_20120101_024120_outLine +BABEL_OP1_103_82526_20120118_185334_inLine +BABEL_OP1_103_82526_20120118_185334_outLine +BABEL_OP1_103_86207_20120127_145936_inLine +BABEL_OP1_103_86207_20120127_145936_outLine +BABEL_OP1_103_88690_20120201_005057_inLine +BABEL_OP1_103_88690_20120201_005057_outLine +BABEL_OP1_103_91202_20111229_185342_inLine +BABEL_OP1_103_91202_20111229_185342_outLine +BABEL_OP1_103_91275_20120529_195749_inLine +BABEL_OP1_103_91275_20120529_195749_outLine +BABEL_OP1_103_93273_20120123_022109_inLine +BABEL_OP1_103_93273_20120123_022109_outLine +BABEL_OP1_103_95826_20120201_001701_inLine +BABEL_OP1_103_95826_20120201_001701_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/eval.list b/egs/babel/s5d/conf/lists/103-bengali/eval.list new file mode 100644 index 00000000000..03220030e17 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/eval.list @@ -0,0 +1,193 @@ +BABEL_OP1_103_10490_20111220_235407_inLine +BABEL_OP1_103_10490_20111220_235407_outLine +BABEL_OP1_103_11146_20120528_182053_inLine +BABEL_OP1_103_11146_20120528_182053_outLine +BABEL_OP1_103_11168_20111228_213615_inLine +BABEL_OP1_103_11168_20111228_213615_outLine +BABEL_OP1_103_11388_20120520_161554_inLine +BABEL_OP1_103_11511_20120526_232041_inLine +BABEL_OP1_103_11511_20120526_232041_outLine +BABEL_OP1_103_12959_20120127_201055_inLine +BABEL_OP1_103_12959_20120127_201055_outLine +BABEL_OP1_103_14503_20120117_213020_inLine +BABEL_OP1_103_14503_20120117_213020_outLine +BABEL_OP1_103_14713_20120123_195706_inLine +BABEL_OP1_103_14713_20120123_195706_outLine +BABEL_OP1_103_16352_20120201_160631_inLine +BABEL_OP1_103_16352_20120201_160631_outLine +BABEL_OP1_103_17749_20120115_221220_inLine +BABEL_OP1_103_17749_20120115_221220_outLine +BABEL_OP1_103_23322_20120519_165208_inLine +BABEL_OP1_103_23322_20120519_165208_outLine +BABEL_OP1_103_24427_20120513_210712_inLine +BABEL_OP1_103_24427_20120513_210712_outLine +BABEL_OP1_103_25147_20120201_164613_inLine +BABEL_OP1_103_25147_20120201_164614_outLine +BABEL_OP1_103_25525_20120114_010656_inLine +BABEL_OP1_103_25525_20120114_010656_outLine +BABEL_OP1_103_27267_20120101_213815_inLine +BABEL_OP1_103_27267_20120101_213815_outLine +BABEL_OP1_103_28046_20120407_154949_inLine +BABEL_OP1_103_28046_20120407_154949_outLine +BABEL_OP1_103_28516_20120421_231427_inLine +BABEL_OP1_103_28516_20120421_231428_outLine +BABEL_OP1_103_28533_20120421_224406_inLine +BABEL_OP1_103_28533_20120421_224406_outLine +BABEL_OP1_103_28534_20120421_222000_inLine +BABEL_OP1_103_28561_20120421_215523_inLine +BABEL_OP1_103_28561_20120421_215523_outLine +BABEL_OP1_103_28607_20120421_213119_inLine +BABEL_OP1_103_28607_20120421_213119_outLine +BABEL_OP1_103_28834_20120421_205128_inLine +BABEL_OP1_103_28834_20120421_205941_inLine +BABEL_OP1_103_28922_20120421_202038_inLine +BABEL_OP1_103_28922_20120421_202038_outLine +BABEL_OP1_103_29061_20120421_195632_inLine +BABEL_OP1_103_29061_20120421_195632_outLine +BABEL_OP1_103_29397_20120421_192844_inLine +BABEL_OP1_103_29397_20120421_192844_outLine +BABEL_OP1_103_29411_20120421_190505_inLine +BABEL_OP1_103_29411_20120421_190505_outLine +BABEL_OP1_103_29471_20120421_183732_inLine +BABEL_OP1_103_29471_20120421_183732_outLine +BABEL_OP1_103_29489_20120421_181719_inLine +BABEL_OP1_103_29489_20120421_181719_outLine +BABEL_OP1_103_29513_20120421_170000_inLine +BABEL_OP1_103_29513_20120421_170000_outLine +BABEL_OP1_103_30747_20120111_231823_inLine +BABEL_OP1_103_30747_20120111_231823_outLine +BABEL_OP1_103_30848_20120102_001515_inLine +BABEL_OP1_103_30848_20120102_001515_outLine +BABEL_OP1_103_32703_20120128_203538_inLine +BABEL_OP1_103_32703_20120128_203538_outLine +BABEL_OP1_103_33590_20120122_165207_inLine +BABEL_OP1_103_33590_20120122_165207_outLine +BABEL_OP1_103_33590_20120122_170610_inLine +BABEL_OP1_103_33590_20120122_170610_outLine +BABEL_OP1_103_33809_20120122_184348_inLine +BABEL_OP1_103_33809_20120122_184349_outLine +BABEL_OP1_103_34102_20120528_233758_inLine +BABEL_OP1_103_34102_20120528_233758_outLine +BABEL_OP1_103_35052_20120118_164925_inLine +BABEL_OP1_103_35052_20120118_164925_outLine +BABEL_OP1_103_35052_20120118_171428_inLine +BABEL_OP1_103_35052_20120118_171428_outLine +BABEL_OP1_103_36842_20120120_013653_inLine +BABEL_OP1_103_36842_20120120_013653_outLine +BABEL_OP1_103_37798_20120121_014828_inLine +BABEL_OP1_103_37798_20120121_014828_outLine +BABEL_OP1_103_40701_20120523_230827_inLine +BABEL_OP1_103_40701_20120523_230827_outLine +BABEL_OP1_103_40701_20120523_232042_inLine +BABEL_OP1_103_40701_20120523_232042_outLine +BABEL_OP1_103_41871_20120127_015943_inLine +BABEL_OP1_103_41871_20120127_015943_outLine +BABEL_OP1_103_43725_20120518_195136_inLine +BABEL_OP1_103_43725_20120518_195136_outLine +BABEL_OP1_103_44141_20120520_005301_inLine +BABEL_OP1_103_44141_20120520_005301_outLine +BABEL_OP1_103_44515_20120104_001740_inLine +BABEL_OP1_103_44515_20120104_001740_outLine +BABEL_OP1_103_44515_20120104_002748_inLine +BABEL_OP1_103_44515_20120104_002749_outLine +BABEL_OP1_103_46776_20120520_000315_inLine +BABEL_OP1_103_46776_20120520_000315_outLine +BABEL_OP1_103_47313_20120110_161032_inLine +BABEL_OP1_103_47313_20120110_161032_outLine +BABEL_OP1_103_50697_20120523_192842_inLine +BABEL_OP1_103_50697_20120523_192842_outLine +BABEL_OP1_103_51047_20120129_041648_inLine +BABEL_OP1_103_51047_20120129_041648_outLine +BABEL_OP1_103_51079_20120125_205839_inLine +BABEL_OP1_103_51079_20120125_205839_outLine +BABEL_OP1_103_51791_20120207_192918_inLine +BABEL_OP1_103_51791_20120207_192918_outLine +BABEL_OP1_103_52306_20120204_205158_inLine +BABEL_OP1_103_52306_20120204_205158_outLine +BABEL_OP1_103_52570_20120202_202812_inLine +BABEL_OP1_103_52570_20120202_202812_outLine +BABEL_OP1_103_53334_20120115_004411_inLine +BABEL_OP1_103_53334_20120115_004411_outLine +BABEL_OP1_103_54178_20120205_163228_inLine +BABEL_OP1_103_54178_20120205_163228_outLine +BABEL_OP1_103_54673_20120203_032314_inLine +BABEL_OP1_103_54673_20120203_032314_outLine +BABEL_OP1_103_56452_20120131_183725_inLine +BABEL_OP1_103_56452_20120131_183725_outLine +BABEL_OP1_103_56452_20120131_185001_inLine +BABEL_OP1_103_56452_20120131_185001_outLine +BABEL_OP1_103_56945_20120125_234057_inLine +BABEL_OP1_103_56945_20120125_234057_outLine +BABEL_OP1_103_57320_20120519_014148_inLine +BABEL_OP1_103_57320_20120519_014148_outLine +BABEL_OP1_103_57618_20120206_004508_inLine +BABEL_OP1_103_57618_20120206_004508_outLine +BABEL_OP1_103_58807_20120106_230153_inLine +BABEL_OP1_103_58807_20120106_230153_outLine +BABEL_OP1_103_59399_20120123_013608_inLine +BABEL_OP1_103_59399_20120123_013608_outLine +BABEL_OP1_103_61606_20120524_001028_inLine +BABEL_OP1_103_61735_20120102_220532_inLine +BABEL_OP1_103_61735_20120102_220532_outLine +BABEL_OP1_103_62671_20120521_174222_inLine +BABEL_OP1_103_62671_20120521_174222_outLine +BABEL_OP1_103_62941_20120311_004945_inLine +BABEL_OP1_103_62941_20120311_004945_outLine +BABEL_OP1_103_63204_20120312_013958_inLine +BABEL_OP1_103_63204_20120312_013958_outLine +BABEL_OP1_103_63327_20120312_024230_inLine +BABEL_OP1_103_63327_20120312_024230_outLine +BABEL_OP1_103_63439_20120315_041347_inLine +BABEL_OP1_103_63439_20120315_041347_outLine +BABEL_OP1_103_63548_20120319_031651_inLine +BABEL_OP1_103_63548_20120319_031651_outLine +BABEL_OP1_103_63575_20120319_044400_inLine +BABEL_OP1_103_63575_20120319_044400_outLine +BABEL_OP1_103_65788_20120524_153801_inLine +BABEL_OP1_103_65788_20120524_153801_outLine +BABEL_OP1_103_66784_20120111_032559_inLine +BABEL_OP1_103_66784_20120111_032559_outLine +BABEL_OP1_103_66825_20120305_214401_inLine +BABEL_OP1_103_66825_20120305_214401_outLine +BABEL_OP1_103_67716_20120106_145810_inLine +BABEL_OP1_103_67716_20120106_145810_outLine +BABEL_OP1_103_67721_20111229_210017_inLine +BABEL_OP1_103_67721_20111229_210017_outLine +BABEL_OP1_103_68063_20120601_155054_inLine +BABEL_OP1_103_68063_20120601_155054_outLine +BABEL_OP1_103_70108_20120516_193813_inLine +BABEL_OP1_103_70108_20120516_193813_outLine +BABEL_OP1_103_70466_20120526_205046_inLine +BABEL_OP1_103_70466_20120526_205046_outLine +BABEL_OP1_103_72693_20120522_233148_inLine +BABEL_OP1_103_72693_20120522_233148_outLine +BABEL_OP1_103_73171_20120511_003731_inLine +BABEL_OP1_103_73171_20120511_003731_outLine +BABEL_OP1_103_78737_20120107_144050_inLine +BABEL_OP1_103_78737_20120107_144050_outLine +BABEL_OP1_103_80424_20120523_223457_inLine +BABEL_OP1_103_80424_20120523_223457_outLine +BABEL_OP1_103_83137_20120101_220939_inLine +BABEL_OP1_103_83137_20120101_220939_outLine +BABEL_OP1_103_83733_20120114_230510_inLine +BABEL_OP1_103_83733_20120114_230510_outLine +BABEL_OP1_103_88434_20120616_183901_inLine +BABEL_OP1_103_88434_20120616_183901_outLine +BABEL_OP1_103_90432_20111231_212535_inLine +BABEL_OP1_103_90432_20111231_212535_outLine +BABEL_OP1_103_91407_20120204_221709_inLine +BABEL_OP1_103_91407_20120204_221709_outLine +BABEL_OP1_103_92880_20120522_232802_inLine +BABEL_OP1_103_92880_20120522_232802_outLine +BABEL_OP1_103_93227_20120116_190634_inLine +BABEL_OP1_103_93227_20120116_190634_outLine +BABEL_OP1_103_93748_20120114_210648_inLine +BABEL_OP1_103_93748_20120114_210648_outLine +BABEL_OP1_103_96956_20120519_002918_inLine +BABEL_OP1_103_96956_20120519_002918_outLine +BABEL_OP1_103_97738_20120521_183220_inLine +BABEL_OP1_103_97738_20120521_183220_outLine +BABEL_OP1_103_99354_20120206_194646_inLine +BABEL_OP1_103_99354_20120206_194646_outLine +BABEL_OP1_103_99354_20120206_195707_inLine +BABEL_OP1_103_99354_20120206_195707_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/evalpart1.list b/egs/babel/s5d/conf/lists/103-bengali/evalpart1.list new file mode 100644 index 00000000000..1c606caf3b3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/evalpart1.list @@ -0,0 +1,66 @@ +BABEL_OP1_103_11146_20120528_182053_inLine +BABEL_OP1_103_11146_20120528_182053_outLine +BABEL_OP1_103_11168_20111228_213615_inLine +BABEL_OP1_103_11168_20111228_213615_outLine +BABEL_OP1_103_16352_20120201_160631_inLine +BABEL_OP1_103_16352_20120201_160631_outLine +BABEL_OP1_103_17749_20120115_221220_inLine +BABEL_OP1_103_17749_20120115_221220_outLine +BABEL_OP1_103_24427_20120513_210712_inLine +BABEL_OP1_103_24427_20120513_210712_outLine +BABEL_OP1_103_25147_20120201_164613_inLine +BABEL_OP1_103_25147_20120201_164614_outLine +BABEL_OP1_103_28046_20120407_154949_inLine +BABEL_OP1_103_28046_20120407_154949_outLine +BABEL_OP1_103_30747_20120111_231823_inLine +BABEL_OP1_103_30747_20120111_231823_outLine +BABEL_OP1_103_33809_20120122_184348_inLine +BABEL_OP1_103_33809_20120122_184349_outLine +BABEL_OP1_103_35052_20120118_164925_inLine +BABEL_OP1_103_35052_20120118_164925_outLine +BABEL_OP1_103_35052_20120118_171428_inLine +BABEL_OP1_103_35052_20120118_171428_outLine +BABEL_OP1_103_37798_20120121_014828_inLine +BABEL_OP1_103_37798_20120121_014828_outLine +BABEL_OP1_103_41871_20120127_015943_inLine +BABEL_OP1_103_41871_20120127_015943_outLine +BABEL_OP1_103_51079_20120125_205839_inLine +BABEL_OP1_103_51079_20120125_205839_outLine +BABEL_OP1_103_51791_20120207_192918_inLine +BABEL_OP1_103_51791_20120207_192918_outLine +BABEL_OP1_103_52306_20120204_205158_inLine +BABEL_OP1_103_52306_20120204_205158_outLine +BABEL_OP1_103_56452_20120131_183725_inLine +BABEL_OP1_103_56452_20120131_183725_outLine +BABEL_OP1_103_56452_20120131_185001_inLine +BABEL_OP1_103_56452_20120131_185001_outLine +BABEL_OP1_103_58807_20120106_230153_inLine +BABEL_OP1_103_58807_20120106_230153_outLine +BABEL_OP1_103_63204_20120312_013958_inLine +BABEL_OP1_103_63204_20120312_013958_outLine +BABEL_OP1_103_63327_20120312_024230_inLine +BABEL_OP1_103_63327_20120312_024230_outLine +BABEL_OP1_103_63439_20120315_041347_inLine +BABEL_OP1_103_63439_20120315_041347_outLine +BABEL_OP1_103_63548_20120319_031651_inLine +BABEL_OP1_103_63548_20120319_031651_outLine +BABEL_OP1_103_66784_20120111_032559_inLine +BABEL_OP1_103_66784_20120111_032559_outLine +BABEL_OP1_103_68063_20120601_155054_inLine +BABEL_OP1_103_68063_20120601_155054_outLine +BABEL_OP1_103_70466_20120526_205046_inLine +BABEL_OP1_103_70466_20120526_205046_outLine +BABEL_OP1_103_73171_20120511_003731_inLine +BABEL_OP1_103_73171_20120511_003731_outLine +BABEL_OP1_103_83137_20120101_220939_inLine +BABEL_OP1_103_83137_20120101_220939_outLine +BABEL_OP1_103_83733_20120114_230510_inLine +BABEL_OP1_103_83733_20120114_230510_outLine +BABEL_OP1_103_90432_20111231_212535_inLine +BABEL_OP1_103_90432_20111231_212535_outLine +BABEL_OP1_103_92880_20120522_232802_inLine +BABEL_OP1_103_92880_20120522_232802_outLine +BABEL_OP1_103_93748_20120114_210648_inLine +BABEL_OP1_103_93748_20120114_210648_outLine +BABEL_OP1_103_97738_20120521_183220_inLine +BABEL_OP1_103_97738_20120521_183220_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.FullLP.list b/egs/babel/s5d/conf/lists/103-bengali/train.FullLP.list new file mode 100644 index 00000000000..203b313ade2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.FullLP.list @@ -0,0 +1,751 @@ +BABEL_OP1_103_10193_20111229_035249_inLine +BABEL_OP1_103_10193_20111229_035249_outLine +BABEL_OP1_103_10301_20111220_225237_inLine +BABEL_OP1_103_10301_20111220_225237_outLine +BABEL_OP1_103_10305_20111220_231100_inLine +BABEL_OP1_103_10305_20111220_231100_outLine +BABEL_OP1_103_10348_20120113_213854_inLine +BABEL_OP1_103_10348_20120113_213854_outLine +BABEL_OP1_103_10531_20120118_042000_inLine +BABEL_OP1_103_10531_20120118_042000_outLine +BABEL_OP1_103_10556_20111221_000031_inLine +BABEL_OP1_103_10556_20111221_000031_outLine +BABEL_OP1_103_10612_20111222_210911_inLine +BABEL_OP1_103_10612_20111222_210911_outLine +BABEL_OP1_103_10806_20111226_181132_inLine +BABEL_OP1_103_10806_20111226_181132_outLine +BABEL_OP1_103_11128_20120124_200626_inLine +BABEL_OP1_103_11128_20120124_200626_outLine +BABEL_OP1_103_11155_20111230_211626_inLine +BABEL_OP1_103_11155_20111230_211626_outLine +BABEL_OP1_103_11442_20120125_025606_inLine +BABEL_OP1_103_11442_20120125_025606_outLine +BABEL_OP1_103_12518_20111227_181020_inLine +BABEL_OP1_103_12518_20111227_181021_outLine +BABEL_OP1_103_12639_20111229_015021_inLine +BABEL_OP1_103_12639_20111229_015021_outLine +BABEL_OP1_103_12682_20120125_201902_inLine +BABEL_OP1_103_12682_20120125_201902_outLine +BABEL_OP1_103_12682_20120125_210238_inLine +BABEL_OP1_103_12682_20120125_210238_outLine +BABEL_OP1_103_12719_20120203_035027_inLine +BABEL_OP1_103_12719_20120203_035027_outLine +BABEL_OP1_103_12786_20111230_012748_inLine +BABEL_OP1_103_12786_20111230_012749_outLine +BABEL_OP1_103_12809_20111229_175926_inLine +BABEL_OP1_103_12809_20111229_175926_outLine +BABEL_OP1_103_12843_20120117_224043_inLine +BABEL_OP1_103_12843_20120117_224043_outLine +BABEL_OP1_103_13024_20111229_010356_inLine +BABEL_OP1_103_13024_20111229_010357_outLine +BABEL_OP1_103_13295_20120522_232550_inLine +BABEL_OP1_103_13295_20120522_232550_outLine +BABEL_OP1_103_13615_20120113_174612_inLine +BABEL_OP1_103_13615_20120113_174612_outLine +BABEL_OP1_103_13708_20120102_032700_inLine +BABEL_OP1_103_13708_20120102_032700_outLine +BABEL_OP1_103_13752_20120530_221929_inLine +BABEL_OP1_103_13752_20120530_221929_outLine +BABEL_OP1_103_14086_20120113_200751_inLine +BABEL_OP1_103_14086_20120113_200751_outLine +BABEL_OP1_103_14147_20120531_160226_inLine +BABEL_OP1_103_14147_20120531_160226_outLine +BABEL_OP1_103_14147_20120531_170020_inLine +BABEL_OP1_103_14147_20120531_170020_outLine +BABEL_OP1_103_14422_20120514_181741_inLine +BABEL_OP1_103_14422_20120514_181741_outLine +BABEL_OP1_103_14554_20120120_230548_inLine +BABEL_OP1_103_14554_20120120_230548_outLine +BABEL_OP1_103_14583_20120515_192730_inLine +BABEL_OP1_103_14583_20120515_192730_outLine +BABEL_OP1_103_14942_20120101_203529_inLine +BABEL_OP1_103_14942_20120101_203529_outLine +BABEL_OP1_103_15304_20120106_035227_inLine +BABEL_OP1_103_15304_20120106_035227_outLine +BABEL_OP1_103_15600_20111231_181856_inLine +BABEL_OP1_103_15600_20111231_181856_outLine +BABEL_OP1_103_15665_20120517_162750_inLine +BABEL_OP1_103_15665_20120517_162750_outLine +BABEL_OP1_103_15749_20111230_015120_inLine +BABEL_OP1_103_15749_20111230_015120_outLine +BABEL_OP1_103_15803_20120528_164556_inLine +BABEL_OP1_103_15803_20120528_164556_outLine +BABEL_OP1_103_16210_20120118_201234_inLine +BABEL_OP1_103_16210_20120118_201234_outLine +BABEL_OP1_103_16393_20111230_012139_inLine +BABEL_OP1_103_16393_20111230_012139_outLine +BABEL_OP1_103_16416_20120205_011943_inLine +BABEL_OP1_103_16416_20120205_011943_outLine +BABEL_OP1_103_16633_20120105_164800_inLine +BABEL_OP1_103_16633_20120105_164800_outLine +BABEL_OP1_103_16754_20120101_015558_inLine +BABEL_OP1_103_16754_20120101_015558_outLine +BABEL_OP1_103_17063_20120202_201950_inLine +BABEL_OP1_103_17063_20120202_201950_outLine +BABEL_OP1_103_17063_20120202_204211_inLine +BABEL_OP1_103_17063_20120202_204211_outLine +BABEL_OP1_103_17139_20120110_182115_inLine +BABEL_OP1_103_17139_20120110_182115_outLine +BABEL_OP1_103_17180_20120126_233802_inLine +BABEL_OP1_103_17180_20120126_233802_outLine +BABEL_OP1_103_17612_20120531_232906_inLine +BABEL_OP1_103_17876_20120203_220933_inLine +BABEL_OP1_103_17876_20120203_220933_outLine +BABEL_OP1_103_18244_20120514_000930_inLine +BABEL_OP1_103_18244_20120514_000931_outLine +BABEL_OP1_103_18556_20111231_233139_inLine +BABEL_OP1_103_18556_20111231_233139_outLine +BABEL_OP1_103_18755_20120103_012800_inLine +BABEL_OP1_103_18755_20120103_012800_outLine +BABEL_OP1_103_18861_20120612_231154_inLine +BABEL_OP1_103_18861_20120612_231154_outLine +BABEL_OP1_103_18938_20120515_163044_inLine +BABEL_OP1_103_18938_20120515_163044_outLine +BABEL_OP1_103_19118_20120206_194310_inLine +BABEL_OP1_103_19118_20120206_194310_outLine +BABEL_OP1_103_19280_20120515_173629_inLine +BABEL_OP1_103_19280_20120515_173630_outLine +BABEL_OP1_103_19324_20120114_223457_inLine +BABEL_OP1_103_19324_20120114_223457_outLine +BABEL_OP1_103_19564_20120112_151539_inLine +BABEL_OP1_103_19564_20120112_151539_outLine +BABEL_OP1_103_19697_20120107_043218_inLine +BABEL_OP1_103_19697_20120107_043218_outLine +BABEL_OP1_103_19885_20120517_200533_inLine +BABEL_OP1_103_19885_20120517_200533_outLine +BABEL_OP1_103_20481_20120118_004556_inLine +BABEL_OP1_103_20481_20120118_004556_outLine +BABEL_OP1_103_21020_20120517_182615_inLine +BABEL_OP1_103_21020_20120517_182615_outLine +BABEL_OP1_103_21076_20111231_203216_inLine +BABEL_OP1_103_21076_20111231_203216_outLine +BABEL_OP1_103_21208_20120113_192303_inLine +BABEL_OP1_103_21208_20120113_192303_outLine +BABEL_OP1_103_21417_20120115_235720_inLine +BABEL_OP1_103_21417_20120115_235720_outLine +BABEL_OP1_103_21648_20111229_024025_inLine +BABEL_OP1_103_21648_20111229_024025_outLine +BABEL_OP1_103_21928_20120205_210433_inLine +BABEL_OP1_103_21928_20120205_221157_inLine +BABEL_OP1_103_22134_20120204_185956_inLine +BABEL_OP1_103_22134_20120204_185956_outLine +BABEL_OP1_103_22134_20120204_191024_inLine +BABEL_OP1_103_22134_20120204_191024_outLine +BABEL_OP1_103_22338_20120203_214144_inLine +BABEL_OP1_103_22338_20120203_214144_outLine +BABEL_OP1_103_22528_20120204_221751_inLine +BABEL_OP1_103_22528_20120204_221751_outLine +BABEL_OP1_103_22697_20120123_213617_inLine +BABEL_OP1_103_22697_20120123_213617_outLine +BABEL_OP1_103_23237_20120201_204534_inLine +BABEL_OP1_103_23237_20120201_204534_outLine +BABEL_OP1_103_24235_20120205_171351_inLine +BABEL_OP1_103_24235_20120205_171351_outLine +BABEL_OP1_103_24340_20120526_221640_inLine +BABEL_OP1_103_24340_20120526_221640_outLine +BABEL_OP1_103_25040_20120129_215646_inLine +BABEL_OP1_103_25040_20120129_215647_outLine +BABEL_OP1_103_25489_20120107_015122_inLine +BABEL_OP1_103_25489_20120107_015122_outLine +BABEL_OP1_103_26464_20120115_153724_inLine +BABEL_OP1_103_26464_20120115_153725_outLine +BABEL_OP1_103_26603_20120519_190743_inLine +BABEL_OP1_103_26603_20120519_190743_outLine +BABEL_OP1_103_26980_20120114_151400_inLine +BABEL_OP1_103_26980_20120114_151400_outLine +BABEL_OP1_103_27159_20120109_175434_inLine +BABEL_OP1_103_27159_20120109_175434_outLine +BABEL_OP1_103_27298_20120519_164745_inLine +BABEL_OP1_103_27298_20120519_164745_outLine +BABEL_OP1_103_27374_20120608_213343_inLine +BABEL_OP1_103_27374_20120608_213344_outLine +BABEL_OP1_103_27889_20120405_184406_inLine +BABEL_OP1_103_27889_20120405_184406_outLine +BABEL_OP1_103_27895_20120407_201822_inLine +BABEL_OP1_103_27895_20120407_201822_outLine +BABEL_OP1_103_27997_20120406_024629_inLine +BABEL_OP1_103_27997_20120406_024629_outLine +BABEL_OP1_103_28150_20120421_030716_inLine +BABEL_OP1_103_28150_20120421_030716_outLine +BABEL_OP1_103_28281_20120204_215552_inLine +BABEL_OP1_103_28281_20120204_215552_outLine +BABEL_OP1_103_28325_20120421_034840_inLine +BABEL_OP1_103_28325_20120421_034840_outLine +BABEL_OP1_103_28349_20120422_230936_inLine +BABEL_OP1_103_28349_20120422_230936_outLine +BABEL_OP1_103_28452_20120423_002721_inLine +BABEL_OP1_103_28452_20120423_002721_outLine +BABEL_OP1_103_28820_20111231_235604_inLine +BABEL_OP1_103_28820_20111231_235604_outLine +BABEL_OP1_103_29083_20120524_203900_inLine +BABEL_OP1_103_29083_20120524_203900_outLine +BABEL_OP1_103_29368_20120109_152242_inLine +BABEL_OP1_103_29368_20120109_152242_outLine +BABEL_OP1_103_29757_20120607_155549_inLine +BABEL_OP1_103_29757_20120607_155549_outLine +BABEL_OP1_103_30012_20120523_210111_inLine +BABEL_OP1_103_30012_20120523_210111_outLine +BABEL_OP1_103_30031_20111231_051935_inLine +BABEL_OP1_103_30031_20111231_051935_outLine +BABEL_OP1_103_30040_20120114_164613_inLine +BABEL_OP1_103_30040_20120114_164613_outLine +BABEL_OP1_103_30243_20120115_005252_inLine +BABEL_OP1_103_30243_20120115_005252_outLine +BABEL_OP1_103_30620_20111231_181228_inLine +BABEL_OP1_103_30620_20111231_181228_outLine +BABEL_OP1_103_30711_20120612_211646_inLine +BABEL_OP1_103_30711_20120612_211646_outLine +BABEL_OP1_103_30810_20111227_000227_inLine +BABEL_OP1_103_30810_20111227_000227_outLine +BABEL_OP1_103_30847_20120108_235955_inLine +BABEL_OP1_103_30847_20120108_235955_outLine +BABEL_OP1_103_30904_20120522_013413_inLine +BABEL_OP1_103_30904_20120522_013413_outLine +BABEL_OP1_103_31201_20120523_211540_inLine +BABEL_OP1_103_31201_20120523_211540_outLine +BABEL_OP1_103_31871_20120115_205857_inLine +BABEL_OP1_103_31871_20120115_205857_outLine +BABEL_OP1_103_32040_20120122_181109_inLine +BABEL_OP1_103_32040_20120122_181109_outLine +BABEL_OP1_103_32453_20120116_174338_inLine +BABEL_OP1_103_32453_20120116_174338_outLine +BABEL_OP1_103_32722_20120115_005258_inLine +BABEL_OP1_103_32722_20120115_005258_outLine +BABEL_OP1_103_33223_20120108_225050_inLine +BABEL_OP1_103_33223_20120108_225050_outLine +BABEL_OP1_103_33316_20120528_173250_inLine +BABEL_OP1_103_33316_20120528_173250_outLine +BABEL_OP1_103_33534_20120122_020502_inLine +BABEL_OP1_103_33534_20120122_020502_outLine +BABEL_OP1_103_33551_20120122_194434_inLine +BABEL_OP1_103_33551_20120122_194434_outLine +BABEL_OP1_103_33699_20120122_173500_inLine +BABEL_OP1_103_33699_20120122_173500_outLine +BABEL_OP1_103_33807_20120122_190057_inLine +BABEL_OP1_103_33807_20120122_190057_outLine +BABEL_OP1_103_33885_20120125_172938_inLine +BABEL_OP1_103_33885_20120125_172938_outLine +BABEL_OP1_103_33991_20120117_202117_inLine +BABEL_OP1_103_33991_20120117_202118_outLine +BABEL_OP1_103_34137_20120529_224220_inLine +BABEL_OP1_103_34137_20120529_224220_outLine +BABEL_OP1_103_34332_20120204_191733_inLine +BABEL_OP1_103_34332_20120204_191733_outLine +BABEL_OP1_103_34545_20120118_173942_inLine +BABEL_OP1_103_34545_20120118_173942_outLine +BABEL_OP1_103_34564_20120530_211027_inLine +BABEL_OP1_103_34564_20120530_211027_outLine +BABEL_OP1_103_34925_20120112_154829_inLine +BABEL_OP1_103_34925_20120112_154829_outLine +BABEL_OP1_103_34994_20120115_213251_inLine +BABEL_OP1_103_34994_20120115_213251_outLine +BABEL_OP1_103_35144_20120123_230913_inLine +BABEL_OP1_103_35144_20120123_230913_outLine +BABEL_OP1_103_35152_20111230_220705_inLine +BABEL_OP1_103_35152_20111230_220705_outLine +BABEL_OP1_103_35157_20120124_010640_inLine +BABEL_OP1_103_35157_20120124_010640_outLine +BABEL_OP1_103_35444_20120612_203930_inLine +BABEL_OP1_103_35444_20120612_203930_outLine +BABEL_OP1_103_35660_20120122_013401_inLine +BABEL_OP1_103_35660_20120122_013402_outLine +BABEL_OP1_103_35750_20111230_025221_inLine +BABEL_OP1_103_35750_20111230_025221_outLine +BABEL_OP1_103_35892_20120120_205811_inLine +BABEL_OP1_103_35892_20120120_205811_outLine +BABEL_OP1_103_36584_20120201_230611_inLine +BABEL_OP1_103_36584_20120201_230611_outLine +BABEL_OP1_103_36748_20120121_230812_inLine +BABEL_OP1_103_36748_20120121_230812_outLine +BABEL_OP1_103_36962_20120810_005828_inLine +BABEL_OP1_103_36962_20120810_005828_outLine +BABEL_OP1_103_37131_20120522_165130_inLine +BABEL_OP1_103_37131_20120522_165130_outLine +BABEL_OP1_103_37551_20111229_232422_inLine +BABEL_OP1_103_37551_20111229_232422_outLine +BABEL_OP1_103_37604_20120122_203335_inLine +BABEL_OP1_103_37604_20120122_203335_outLine +BABEL_OP1_103_37687_20120124_220825_inLine +BABEL_OP1_103_37687_20120124_220826_outLine +BABEL_OP1_103_38163_20120202_001843_inLine +BABEL_OP1_103_38163_20120202_001843_outLine +BABEL_OP1_103_38573_20120120_234500_inLine +BABEL_OP1_103_38573_20120120_234500_outLine +BABEL_OP1_103_38573_20120121_000745_inLine +BABEL_OP1_103_38573_20120121_000745_outLine +BABEL_OP1_103_38588_20120522_215415_inLine +BABEL_OP1_103_38588_20120522_215415_outLine +BABEL_OP1_103_39119_20120608_004832_inLine +BABEL_OP1_103_39119_20120608_004832_outLine +BABEL_OP1_103_39320_20120207_022344_inLine +BABEL_OP1_103_39320_20120207_022344_outLine +BABEL_OP1_103_39769_20120127_213455_inLine +BABEL_OP1_103_39769_20120127_213455_outLine +BABEL_OP1_103_40410_20120124_204758_inLine +BABEL_OP1_103_40410_20120124_204758_outLine +BABEL_OP1_103_40442_20120202_174431_inLine +BABEL_OP1_103_40442_20120202_174431_outLine +BABEL_OP1_103_40889_20120206_221100_inLine +BABEL_OP1_103_40889_20120206_221100_outLine +BABEL_OP1_103_41144_20120118_222314_inLine +BABEL_OP1_103_41144_20120118_222314_outLine +BABEL_OP1_103_41172_20120114_134829_inLine +BABEL_OP1_103_41172_20120114_134829_outLine +BABEL_OP1_103_41197_20120805_155112_inLine +BABEL_OP1_103_41197_20120805_155112_outLine +BABEL_OP1_103_41498_20120118_023411_inLine +BABEL_OP1_103_41498_20120118_023411_outLine +BABEL_OP1_103_42332_20120126_191134_inLine +BABEL_OP1_103_42332_20120126_191134_outLine +BABEL_OP1_103_42332_20120126_192035_inLine +BABEL_OP1_103_42332_20120126_192035_outLine +BABEL_OP1_103_42651_20120122_000902_inLine +BABEL_OP1_103_42651_20120122_000902_outLine +BABEL_OP1_103_42698_20120123_230900_inLine +BABEL_OP1_103_42698_20120123_230900_outLine +BABEL_OP1_103_42742_20120123_232130_inLine +BABEL_OP1_103_42742_20120123_232130_outLine +BABEL_OP1_103_42790_20120129_205024_inLine +BABEL_OP1_103_42790_20120129_205025_outLine +BABEL_OP1_103_42986_20120125_204035_inLine +BABEL_OP1_103_42986_20120125_204035_outLine +BABEL_OP1_103_43442_20120120_033602_inLine +BABEL_OP1_103_43442_20120120_033602_outLine +BABEL_OP1_103_43571_20111226_210759_inLine +BABEL_OP1_103_43571_20111226_210759_outLine +BABEL_OP1_103_43812_20120124_005515_inLine +BABEL_OP1_103_43812_20120124_005515_outLine +BABEL_OP1_103_43959_20120125_223215_inLine +BABEL_OP1_103_43959_20120125_223215_outLine +BABEL_OP1_103_43974_20120110_164058_inLine +BABEL_OP1_103_43974_20120110_164058_outLine +BABEL_OP1_103_44192_20120523_184414_inLine +BABEL_OP1_103_44192_20120523_184414_outLine +BABEL_OP1_103_44838_20111229_014707_inLine +BABEL_OP1_103_44838_20111229_014707_outLine +BABEL_OP1_103_44948_20120203_011011_inLine +BABEL_OP1_103_44948_20120203_011011_outLine +BABEL_OP1_103_44967_20120207_025756_inLine +BABEL_OP1_103_44967_20120207_025756_outLine +BABEL_OP1_103_45020_20120522_170055_inLine +BABEL_OP1_103_45020_20120522_170055_outLine +BABEL_OP1_103_45029_20120608_010540_inLine +BABEL_OP1_103_45029_20120608_010540_outLine +BABEL_OP1_103_45565_20120125_220956_inLine +BABEL_OP1_103_45565_20120125_220956_outLine +BABEL_OP1_103_45601_20120201_181124_inLine +BABEL_OP1_103_45601_20120201_181124_outLine +BABEL_OP1_103_45763_20120116_175349_inLine +BABEL_OP1_103_45763_20120116_175349_outLine +BABEL_OP1_103_46197_20120524_220246_inLine +BABEL_OP1_103_46197_20120524_220246_outLine +BABEL_OP1_103_46460_20120530_183725_inLine +BABEL_OP1_103_46460_20120530_183725_outLine +BABEL_OP1_103_46460_20120530_185105_inLine +BABEL_OP1_103_46460_20120530_185106_outLine +BABEL_OP1_103_46548_20120517_192114_inLine +BABEL_OP1_103_46548_20120517_192114_outLine +BABEL_OP1_103_46862_20120124_195804_inLine +BABEL_OP1_103_46862_20120124_195804_outLine +BABEL_OP1_103_46862_20120204_203651_inLine +BABEL_OP1_103_46862_20120204_203651_outLine +BABEL_OP1_103_46887_20120202_214319_inLine +BABEL_OP1_103_46887_20120202_214320_outLine +BABEL_OP1_103_46900_20120204_225820_inLine +BABEL_OP1_103_46900_20120204_225820_outLine +BABEL_OP1_103_47151_20111229_233253_inLine +BABEL_OP1_103_47151_20111229_233253_outLine +BABEL_OP1_103_47177_20120127_201638_inLine +BABEL_OP1_103_47177_20120127_201638_outLine +BABEL_OP1_103_47416_20120729_181025_inLine +BABEL_OP1_103_47416_20120729_181025_outLine +BABEL_OP1_103_47424_20111231_203241_inLine +BABEL_OP1_103_47424_20111231_203241_outLine +BABEL_OP1_103_47574_20120207_034724_inLine +BABEL_OP1_103_47574_20120207_034724_outLine +BABEL_OP1_103_48176_20120206_023101_inLine +BABEL_OP1_103_48176_20120206_023101_outLine +BABEL_OP1_103_48259_20120116_022438_inLine +BABEL_OP1_103_48259_20120116_022438_outLine +BABEL_OP1_103_48518_20120121_195050_inLine +BABEL_OP1_103_48518_20120121_195050_outLine +BABEL_OP1_103_49175_20120206_214803_inLine +BABEL_OP1_103_49175_20120206_214803_outLine +BABEL_OP1_103_49520_20120523_172707_inLine +BABEL_OP1_103_49520_20120523_172707_outLine +BABEL_OP1_103_49629_20120104_040004_inLine +BABEL_OP1_103_49629_20120104_040004_outLine +BABEL_OP1_103_49755_20120110_010410_inLine +BABEL_OP1_103_49755_20120110_010410_outLine +BABEL_OP1_103_49819_20120127_012212_inLine +BABEL_OP1_103_49819_20120127_012212_outLine +BABEL_OP1_103_50492_20120123_211938_inLine +BABEL_OP1_103_50492_20120123_211938_outLine +BABEL_OP1_103_50523_20120607_185125_inLine +BABEL_OP1_103_50523_20120607_185126_outLine +BABEL_OP1_103_50798_20120131_022954_inLine +BABEL_OP1_103_50798_20120131_022954_outLine +BABEL_OP1_103_51243_20120201_200604_inLine +BABEL_OP1_103_51243_20120201_200604_outLine +BABEL_OP1_103_52122_20120207_025756_inLine +BABEL_OP1_103_52122_20120207_025756_outLine +BABEL_OP1_103_52604_20120131_233302_inLine +BABEL_OP1_103_52604_20120131_233302_outLine +BABEL_OP1_103_52753_20120521_000301_inLine +BABEL_OP1_103_52753_20120521_001422_inLine +BABEL_OP1_103_53067_20120127_225851_inLine +BABEL_OP1_103_53067_20120127_225851_outLine +BABEL_OP1_103_53262_20120204_194912_inLine +BABEL_OP1_103_53262_20120204_194912_outLine +BABEL_OP1_103_53346_20120128_214441_inLine +BABEL_OP1_103_53346_20120128_214441_outLine +BABEL_OP1_103_53636_20120127_000358_inLine +BABEL_OP1_103_53636_20120127_000358_outLine +BABEL_OP1_103_54030_20111230_220440_inLine +BABEL_OP1_103_54030_20111230_220440_outLine +BABEL_OP1_103_54263_20120206_225348_inLine +BABEL_OP1_103_54263_20120206_225349_outLine +BABEL_OP1_103_54417_20120522_172155_inLine +BABEL_OP1_103_54417_20120522_172155_outLine +BABEL_OP1_103_54606_20120205_175853_inLine +BABEL_OP1_103_54606_20120205_175853_outLine +BABEL_OP1_103_54975_20120207_015749_inLine +BABEL_OP1_103_54975_20120207_015749_outLine +BABEL_OP1_103_54991_20120206_003607_inLine +BABEL_OP1_103_54991_20120206_003607_outLine +BABEL_OP1_103_55166_20120119_180058_inLine +BABEL_OP1_103_55166_20120119_180058_outLine +BABEL_OP1_103_55194_20120529_215243_inLine +BABEL_OP1_103_55194_20120529_215243_outLine +BABEL_OP1_103_55316_20111226_180557_inLine +BABEL_OP1_103_55316_20111226_180557_outLine +BABEL_OP1_103_56704_20120606_171759_inLine +BABEL_OP1_103_56704_20120606_171759_outLine +BABEL_OP1_103_57092_20111227_044400_inLine +BABEL_OP1_103_57092_20111227_044400_outLine +BABEL_OP1_103_57232_20120126_020104_inLine +BABEL_OP1_103_57232_20120126_020104_outLine +BABEL_OP1_103_57351_20120612_182248_inLine +BABEL_OP1_103_57351_20120612_182248_outLine +BABEL_OP1_103_58283_20111231_230840_inLine +BABEL_OP1_103_58283_20111231_230840_outLine +BABEL_OP1_103_58925_20120113_212456_inLine +BABEL_OP1_103_58925_20120113_212456_outLine +BABEL_OP1_103_58925_20120113_214350_inLine +BABEL_OP1_103_58925_20120113_214350_outLine +BABEL_OP1_103_59482_20120612_190437_inLine +BABEL_OP1_103_59482_20120612_190437_outLine +BABEL_OP1_103_59558_20120121_234224_inLine +BABEL_OP1_103_59558_20120121_234224_outLine +BABEL_OP1_103_60524_20120109_213755_inLine +BABEL_OP1_103_60524_20120109_213755_outLine +BABEL_OP1_103_60571_20111228_183342_inLine +BABEL_OP1_103_60571_20111228_183342_outLine +BABEL_OP1_103_60806_20120117_233630_inLine +BABEL_OP1_103_60806_20120117_233630_outLine +BABEL_OP1_103_61229_20120616_151341_inLine +BABEL_OP1_103_61229_20120616_151341_outLine +BABEL_OP1_103_61558_20120106_205412_inLine +BABEL_OP1_103_61558_20120106_205412_outLine +BABEL_OP1_103_61592_20120125_225752_inLine +BABEL_OP1_103_61592_20120125_225752_outLine +BABEL_OP1_103_61629_20120127_192849_inLine +BABEL_OP1_103_61629_20120127_192849_outLine +BABEL_OP1_103_61733_20120201_183457_inLine +BABEL_OP1_103_61733_20120201_183457_outLine +BABEL_OP1_103_62097_20120307_164325_inLine +BABEL_OP1_103_62097_20120307_164325_outLine +BABEL_OP1_103_62182_20111231_003944_inLine +BABEL_OP1_103_62182_20111231_003944_outLine +BABEL_OP1_103_62222_20120122_201756_inLine +BABEL_OP1_103_62222_20120122_201756_outLine +BABEL_OP1_103_62479_20120306_025702_inLine +BABEL_OP1_103_62479_20120306_025702_outLine +BABEL_OP1_103_62558_20120124_220850_inLine +BABEL_OP1_103_62558_20120124_220850_outLine +BABEL_OP1_103_62652_20120306_015948_inLine +BABEL_OP1_103_62652_20120306_015948_outLine +BABEL_OP1_103_62720_20120308_164432_inLine +BABEL_OP1_103_62720_20120308_164432_outLine +BABEL_OP1_103_62720_20120308_165706_inLine +BABEL_OP1_103_62720_20120308_165706_outLine +BABEL_OP1_103_62843_20120310_235523_inLine +BABEL_OP1_103_62843_20120310_235523_outLine +BABEL_OP1_103_63127_20120311_184714_inLine +BABEL_OP1_103_63127_20120311_184714_outLine +BABEL_OP1_103_63129_20120311_193438_inLine +BABEL_OP1_103_63129_20120311_193438_outLine +BABEL_OP1_103_63194_20120312_010359_inLine +BABEL_OP1_103_63194_20120312_010359_outLine +BABEL_OP1_103_63215_20120513_191621_inLine +BABEL_OP1_103_63215_20120513_191621_outLine +BABEL_OP1_103_63240_20120312_021342_inLine +BABEL_OP1_103_63240_20120312_021342_outLine +BABEL_OP1_103_63373_20120315_025205_inLine +BABEL_OP1_103_63373_20120315_025205_outLine +BABEL_OP1_103_63384_20120315_031012_inLine +BABEL_OP1_103_63384_20120315_031012_outLine +BABEL_OP1_103_63422_20120315_034640_inLine +BABEL_OP1_103_63422_20120315_034640_outLine +BABEL_OP1_103_63510_20120318_221426_inLine +BABEL_OP1_103_63510_20120318_221426_outLine +BABEL_OP1_103_63680_20120319_214759_inLine +BABEL_OP1_103_63680_20120319_214759_outLine +BABEL_OP1_103_63687_20120320_181655_inLine +BABEL_OP1_103_63687_20120320_181655_outLine +BABEL_OP1_103_63923_20120320_172933_inLine +BABEL_OP1_103_63923_20120320_172933_outLine +BABEL_OP1_103_63929_20120123_192325_inLine +BABEL_OP1_103_63929_20120123_192325_outLine +BABEL_OP1_103_63950_20120320_184409_inLine +BABEL_OP1_103_63950_20120320_184409_outLine +BABEL_OP1_103_64039_20120320_215418_inLine +BABEL_OP1_103_64039_20120320_215418_outLine +BABEL_OP1_103_64145_20120404_204905_inLine +BABEL_OP1_103_64145_20120404_204905_outLine +BABEL_OP1_103_64153_20120403_180645_inLine +BABEL_OP1_103_64153_20120403_180645_outLine +BABEL_OP1_103_64177_20120404_212051_inLine +BABEL_OP1_103_64177_20120404_212051_outLine +BABEL_OP1_103_64231_20120310_224637_inLine +BABEL_OP1_103_64231_20120310_224637_outLine +BABEL_OP1_103_64610_20120125_223001_inLine +BABEL_OP1_103_64610_20120125_223001_outLine +BABEL_OP1_103_65512_20111229_045507_inLine +BABEL_OP1_103_65512_20111229_045507_outLine +BABEL_OP1_103_65818_20120127_011907_inLine +BABEL_OP1_103_65818_20120127_011907_outLine +BABEL_OP1_103_65954_20120205_190321_inLine +BABEL_OP1_103_65954_20120205_190321_outLine +BABEL_OP1_103_65991_20120229_215906_inLine +BABEL_OP1_103_65991_20120229_215906_outLine +BABEL_OP1_103_66005_20120229_221845_inLine +BABEL_OP1_103_66005_20120229_221845_outLine +BABEL_OP1_103_66048_20120229_225251_inLine +BABEL_OP1_103_66048_20120229_225251_outLine +BABEL_OP1_103_66287_20120108_191621_inLine +BABEL_OP1_103_66287_20120108_191621_outLine +BABEL_OP1_103_66309_20120229_232503_inLine +BABEL_OP1_103_66309_20120229_232503_outLine +BABEL_OP1_103_66659_20120229_235042_inLine +BABEL_OP1_103_66659_20120229_235042_outLine +BABEL_OP1_103_66719_20120116_002436_inLine +BABEL_OP1_103_66719_20120116_002436_outLine +BABEL_OP1_103_66813_20120127_151237_inLine +BABEL_OP1_103_66813_20120127_151237_outLine +BABEL_OP1_103_67001_20120305_223711_inLine +BABEL_OP1_103_67001_20120305_223711_outLine +BABEL_OP1_103_67288_20120305_233501_inLine +BABEL_OP1_103_67288_20120305_233501_outLine +BABEL_OP1_103_67358_20120128_224934_inLine +BABEL_OP1_103_67358_20120128_224934_outLine +BABEL_OP1_103_67484_20120306_212801_inLine +BABEL_OP1_103_67484_20120306_212801_outLine +BABEL_OP1_103_67604_20120306_201231_inLine +BABEL_OP1_103_67604_20120306_201231_outLine +BABEL_OP1_103_67685_20120118_163939_inLine +BABEL_OP1_103_67685_20120118_163939_outLine +BABEL_OP1_103_67814_20120522_200114_inLine +BABEL_OP1_103_67814_20120522_200114_outLine +BABEL_OP1_103_67824_20120116_000148_inLine +BABEL_OP1_103_67824_20120116_000148_outLine +BABEL_OP1_103_68144_20120201_183136_inLine +BABEL_OP1_103_68144_20120201_183136_outLine +BABEL_OP1_103_68602_20120729_174819_inLine +BABEL_OP1_103_68602_20120729_174819_outLine +BABEL_OP1_103_68811_20120531_155031_inLine +BABEL_OP1_103_68811_20120531_155031_outLine +BABEL_OP1_103_69771_20120118_183315_inLine +BABEL_OP1_103_69771_20120118_183315_outLine +BABEL_OP1_103_69969_20120309_020612_inLine +BABEL_OP1_103_69969_20120309_020612_outLine +BABEL_OP1_103_69990_20120305_153850_inLine +BABEL_OP1_103_69990_20120305_153850_outLine +BABEL_OP1_103_70200_20120311_000406_inLine +BABEL_OP1_103_70200_20120311_000406_outLine +BABEL_OP1_103_70442_20111231_223721_inLine +BABEL_OP1_103_70442_20111231_223721_outLine +BABEL_OP1_103_70476_20120117_202957_inLine +BABEL_OP1_103_70476_20120117_202957_outLine +BABEL_OP1_103_70476_20120117_204242_inLine +BABEL_OP1_103_70476_20120117_204242_outLine +BABEL_OP1_103_70484_20120524_210819_inLine +BABEL_OP1_103_70484_20120524_210819_outLine +BABEL_OP1_103_70651_20120131_034337_inLine +BABEL_OP1_103_70651_20120131_034337_outLine +BABEL_OP1_103_70762_20111230_015835_inLine +BABEL_OP1_103_70762_20111230_015835_outLine +BABEL_OP1_103_70858_20120201_191031_inLine +BABEL_OP1_103_70858_20120201_191031_outLine +BABEL_OP1_103_70897_20120118_020506_inLine +BABEL_OP1_103_70897_20120118_020506_outLine +BABEL_OP1_103_70919_20120202_170934_inLine +BABEL_OP1_103_70919_20120202_170934_outLine +BABEL_OP1_103_71215_20120207_001204_inLine +BABEL_OP1_103_71215_20120207_001204_outLine +BABEL_OP1_103_71293_20120101_212224_inLine +BABEL_OP1_103_71293_20120101_212224_outLine +BABEL_OP1_103_71450_20120514_181620_inLine +BABEL_OP1_103_71450_20120514_181621_outLine +BABEL_OP1_103_71666_20120514_223534_inLine +BABEL_OP1_103_71666_20120514_223534_outLine +BABEL_OP1_103_71691_20120109_034006_inLine +BABEL_OP1_103_71691_20120109_034007_outLine +BABEL_OP1_103_72176_20111226_224243_inLine +BABEL_OP1_103_72176_20111226_224243_outLine +BABEL_OP1_103_72179_20120511_023300_inLine +BABEL_OP1_103_72179_20120511_023300_outLine +BABEL_OP1_103_72709_20120204_231928_inLine +BABEL_OP1_103_72709_20120204_231928_outLine +BABEL_OP1_103_72714_20120126_001354_inLine +BABEL_OP1_103_72714_20120126_001354_outLine +BABEL_OP1_103_73264_20111228_184038_inLine +BABEL_OP1_103_73264_20111228_184038_outLine +BABEL_OP1_103_73881_20120120_041629_inLine +BABEL_OP1_103_73881_20120120_041629_outLine +BABEL_OP1_103_74188_20120522_172823_inLine +BABEL_OP1_103_74188_20120522_172823_outLine +BABEL_OP1_103_74334_20120102_033902_inLine +BABEL_OP1_103_74334_20120102_033902_outLine +BABEL_OP1_103_75402_20120120_190246_inLine +BABEL_OP1_103_75402_20120120_190246_outLine +BABEL_OP1_103_75797_20120125_192735_inLine +BABEL_OP1_103_75797_20120125_192735_outLine +BABEL_OP1_103_76069_20120608_031447_inLine +BABEL_OP1_103_76069_20120608_031447_outLine +BABEL_OP1_103_76276_20120114_191208_inLine +BABEL_OP1_103_76276_20120114_191208_outLine +BABEL_OP1_103_76347_20120601_011206_inLine +BABEL_OP1_103_76347_20120601_011206_outLine +BABEL_OP1_103_77097_20120109_024625_inLine +BABEL_OP1_103_77097_20120109_024625_outLine +BABEL_OP1_103_77737_20111230_143637_inLine +BABEL_OP1_103_77737_20111230_143637_outLine +BABEL_OP1_103_78722_20120126_234318_inLine +BABEL_OP1_103_78722_20120126_234318_outLine +BABEL_OP1_103_79127_20120205_215208_inLine +BABEL_OP1_103_79127_20120205_215208_outLine +BABEL_OP1_103_79788_20120201_222512_inLine +BABEL_OP1_103_79788_20120201_222512_outLine +BABEL_OP1_103_79803_20120730_020433_inLine +BABEL_OP1_103_79803_20120730_020433_outLine +BABEL_OP1_103_79857_20120111_205043_inLine +BABEL_OP1_103_79857_20120111_205043_outLine +BABEL_OP1_103_79901_20120202_193650_inLine +BABEL_OP1_103_79901_20120202_194746_inLine +BABEL_OP1_103_80118_20120126_010553_inLine +BABEL_OP1_103_80118_20120126_010553_outLine +BABEL_OP1_103_80183_20120513_182754_inLine +BABEL_OP1_103_80183_20120513_182754_outLine +BABEL_OP1_103_80313_20120106_200706_inLine +BABEL_OP1_103_80313_20120106_200706_outLine +BABEL_OP1_103_80319_20120120_231835_inLine +BABEL_OP1_103_80319_20120120_231835_outLine +BABEL_OP1_103_80943_20120125_185437_inLine +BABEL_OP1_103_80943_20120125_185437_outLine +BABEL_OP1_103_81800_20120531_180959_inLine +BABEL_OP1_103_81800_20120531_180959_outLine +BABEL_OP1_103_81800_20120531_182855_inLine +BABEL_OP1_103_81800_20120531_182855_outLine +BABEL_OP1_103_82094_20120522_225233_inLine +BABEL_OP1_103_82094_20120522_225233_outLine +BABEL_OP1_103_82135_20120117_213149_inLine +BABEL_OP1_103_82135_20120117_213149_outLine +BABEL_OP1_103_83819_20120125_193543_inLine +BABEL_OP1_103_83819_20120125_193543_outLine +BABEL_OP1_103_83835_20111231_193822_inLine +BABEL_OP1_103_83835_20111231_193822_outLine +BABEL_OP1_103_84654_20120515_201204_inLine +BABEL_OP1_103_84654_20120515_201204_outLine +BABEL_OP1_103_84754_20120523_180347_inLine +BABEL_OP1_103_84754_20120523_180347_outLine +BABEL_OP1_103_84854_20120205_001920_inLine +BABEL_OP1_103_84854_20120205_001920_outLine +BABEL_OP1_103_84985_20120105_205509_inLine +BABEL_OP1_103_84985_20120105_205509_outLine +BABEL_OP1_103_85457_20120521_204532_inLine +BABEL_OP1_103_85457_20120521_204532_outLine +BABEL_OP1_103_85577_20120729_215558_inLine +BABEL_OP1_103_85577_20120729_215558_outLine +BABEL_OP1_103_85730_20120116_233350_inLine +BABEL_OP1_103_85730_20120116_233350_outLine +BABEL_OP1_103_85764_20120129_192217_inLine +BABEL_OP1_103_85764_20120129_192217_outLine +BABEL_OP1_103_85897_20120120_171153_inLine +BABEL_OP1_103_85897_20120120_171153_outLine +BABEL_OP1_103_86537_20120511_195620_inLine +BABEL_OP1_103_86537_20120511_195620_outLine +BABEL_OP1_103_86614_20120521_220136_inLine +BABEL_OP1_103_86614_20120521_220136_outLine +BABEL_OP1_103_86680_20120105_191615_inLine +BABEL_OP1_103_86680_20120105_191615_outLine +BABEL_OP1_103_87453_20120515_170718_inLine +BABEL_OP1_103_87453_20120515_170718_outLine +BABEL_OP1_103_87677_20120121_224149_inLine +BABEL_OP1_103_87677_20120121_224149_outLine +BABEL_OP1_103_87723_20120518_211143_inLine +BABEL_OP1_103_87723_20120518_211143_outLine +BABEL_OP1_103_88604_20120206_014323_inLine +BABEL_OP1_103_88604_20120206_014323_outLine +BABEL_OP1_103_88604_20120206_015628_inLine +BABEL_OP1_103_88604_20120206_015628_outLine +BABEL_OP1_103_88677_20120112_032502_inLine +BABEL_OP1_103_88677_20120112_032502_outLine +BABEL_OP1_103_89464_20120205_204528_inLine +BABEL_OP1_103_89464_20120205_204528_outLine +BABEL_OP1_103_89702_20120109_021228_inLine +BABEL_OP1_103_89702_20120109_021228_outLine +BABEL_OP1_103_90041_20120201_190104_inLine +BABEL_OP1_103_90041_20120201_190104_outLine +BABEL_OP1_103_90129_20120126_221744_inLine +BABEL_OP1_103_90129_20120126_221744_outLine +BABEL_OP1_103_90641_20120102_212610_inLine +BABEL_OP1_103_90641_20120102_212610_outLine +BABEL_OP1_103_90882_20120530_230837_inLine +BABEL_OP1_103_90882_20120530_230837_outLine +BABEL_OP1_103_91161_20111229_202627_inLine +BABEL_OP1_103_91161_20111229_202627_outLine +BABEL_OP1_103_91372_20120115_023342_inLine +BABEL_OP1_103_91372_20120115_023342_outLine +BABEL_OP1_103_92722_20120512_132612_inLine +BABEL_OP1_103_92722_20120512_132612_outLine +BABEL_OP1_103_92793_20111229_200332_inLine +BABEL_OP1_103_92793_20111229_200332_outLine +BABEL_OP1_103_92910_20120205_195736_inLine +BABEL_OP1_103_92910_20120205_195736_outLine +BABEL_OP1_103_93026_20111228_235326_inLine +BABEL_OP1_103_93026_20111228_235326_outLine +BABEL_OP1_103_93358_20120107_025421_inLine +BABEL_OP1_103_93358_20120107_025421_outLine +BABEL_OP1_103_93742_20120529_184600_inLine +BABEL_OP1_103_93742_20120529_184600_outLine +BABEL_OP1_103_93907_20111228_051458_inLine +BABEL_OP1_103_93907_20111228_051458_outLine +BABEL_OP1_103_94572_20120131_224123_inLine +BABEL_OP1_103_94572_20120131_224123_outLine +BABEL_OP1_103_94793_20120102_034406_inLine +BABEL_OP1_103_94793_20120102_034406_outLine +BABEL_OP1_103_95349_20111229_201011_inLine +BABEL_OP1_103_95349_20111229_201011_outLine +BABEL_OP1_103_95349_20111229_225436_inLine +BABEL_OP1_103_95349_20111229_225436_outLine +BABEL_OP1_103_95360_20120206_204731_inLine +BABEL_OP1_103_95360_20120206_204731_outLine +BABEL_OP1_103_96186_20120128_212837_inLine +BABEL_OP1_103_96186_20120128_212837_outLine +BABEL_OP1_103_96537_20120729_165831_inLine +BABEL_OP1_103_96537_20120729_165831_outLine +BABEL_OP1_103_96690_20120131_213344_inLine +BABEL_OP1_103_96690_20120131_213344_outLine +BABEL_OP1_103_97679_20111229_191138_inLine +BABEL_OP1_103_97679_20111229_191138_outLine +BABEL_OP1_103_97971_20120111_020458_inLine +BABEL_OP1_103_97971_20120111_020459_outLine +BABEL_OP1_103_98331_20120131_213958_inLine +BABEL_OP1_103_98331_20120131_213958_outLine +BABEL_OP1_103_98446_20120101_215857_inLine +BABEL_OP1_103_98446_20120101_215857_outLine +BABEL_OP1_103_99093_20120514_161939_inLine +BABEL_OP1_103_99093_20120514_161939_outLine +BABEL_OP1_103_99510_20120515_175659_inLine +BABEL_OP1_103_99510_20120515_175659_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.list b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.list new file mode 100644 index 00000000000..4d5c081b1c2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.list @@ -0,0 +1,124 @@ +BABEL_OP1_103_10193_20111229_035249_inLine +BABEL_OP1_103_10193_20111229_035249_outLine +BABEL_OP1_103_10612_20111222_210911_inLine +BABEL_OP1_103_10612_20111222_210911_outLine +BABEL_OP1_103_11128_20120124_200626_inLine +BABEL_OP1_103_11128_20120124_200626_outLine +BABEL_OP1_103_12639_20111229_015021_inLine +BABEL_OP1_103_12639_20111229_015021_outLine +BABEL_OP1_103_12786_20111230_012748_inLine +BABEL_OP1_103_12786_20111230_012749_outLine +BABEL_OP1_103_14554_20120120_230548_inLine +BABEL_OP1_103_14554_20120120_230548_outLine +BABEL_OP1_103_16416_20120205_011943_inLine +BABEL_OP1_103_16416_20120205_011943_outLine +BABEL_OP1_103_19280_20120515_173629_inLine +BABEL_OP1_103_19280_20120515_173630_outLine +BABEL_OP1_103_22134_20120204_185956_inLine +BABEL_OP1_103_22134_20120204_185956_outLine +BABEL_OP1_103_22134_20120204_191024_inLine +BABEL_OP1_103_22134_20120204_191024_outLine +BABEL_OP1_103_22697_20120123_213617_inLine +BABEL_OP1_103_22697_20120123_213617_outLine +BABEL_OP1_103_30620_20111231_181228_inLine +BABEL_OP1_103_30620_20111231_181228_outLine +BABEL_OP1_103_30810_20111227_000227_inLine +BABEL_OP1_103_30810_20111227_000227_outLine +BABEL_OP1_103_32040_20120122_181109_inLine +BABEL_OP1_103_32040_20120122_181109_outLine +BABEL_OP1_103_36584_20120201_230611_inLine +BABEL_OP1_103_36584_20120201_230611_outLine +BABEL_OP1_103_38163_20120202_001843_inLine +BABEL_OP1_103_38163_20120202_001843_outLine +BABEL_OP1_103_39119_20120608_004832_inLine +BABEL_OP1_103_39119_20120608_004832_outLine +BABEL_OP1_103_41144_20120118_222314_inLine +BABEL_OP1_103_41144_20120118_222314_outLine +BABEL_OP1_103_41197_20120805_155112_inLine +BABEL_OP1_103_41197_20120805_155112_outLine +BABEL_OP1_103_41498_20120118_023411_inLine +BABEL_OP1_103_41498_20120118_023411_outLine +BABEL_OP1_103_42742_20120123_232130_inLine +BABEL_OP1_103_42742_20120123_232130_outLine +BABEL_OP1_103_43974_20120110_164058_inLine +BABEL_OP1_103_43974_20120110_164058_outLine +BABEL_OP1_103_44192_20120523_184414_inLine +BABEL_OP1_103_44192_20120523_184414_outLine +BABEL_OP1_103_45601_20120201_181124_inLine +BABEL_OP1_103_45601_20120201_181124_outLine +BABEL_OP1_103_45763_20120116_175349_inLine +BABEL_OP1_103_45763_20120116_175349_outLine +BABEL_OP1_103_46548_20120517_192114_inLine +BABEL_OP1_103_46548_20120517_192114_outLine +BABEL_OP1_103_46887_20120202_214319_inLine +BABEL_OP1_103_46887_20120202_214320_outLine +BABEL_OP1_103_46900_20120204_225820_inLine +BABEL_OP1_103_46900_20120204_225820_outLine +BABEL_OP1_103_48518_20120121_195050_inLine +BABEL_OP1_103_48518_20120121_195050_outLine +BABEL_OP1_103_52604_20120131_233302_inLine +BABEL_OP1_103_52604_20120131_233302_outLine +BABEL_OP1_103_54606_20120205_175853_inLine +BABEL_OP1_103_54606_20120205_175853_outLine +BABEL_OP1_103_55316_20111226_180557_inLine +BABEL_OP1_103_55316_20111226_180557_outLine +BABEL_OP1_103_57232_20120126_020104_inLine +BABEL_OP1_103_57232_20120126_020104_outLine +BABEL_OP1_103_59558_20120121_234224_inLine +BABEL_OP1_103_59558_20120121_234224_outLine +BABEL_OP1_103_60571_20111228_183342_inLine +BABEL_OP1_103_60571_20111228_183342_outLine +BABEL_OP1_103_63422_20120315_034640_inLine +BABEL_OP1_103_63422_20120315_034640_outLine +BABEL_OP1_103_63950_20120320_184409_inLine +BABEL_OP1_103_63950_20120320_184409_outLine +BABEL_OP1_103_64153_20120403_180645_inLine +BABEL_OP1_103_64153_20120403_180645_outLine +BABEL_OP1_103_66659_20120229_235042_inLine +BABEL_OP1_103_66659_20120229_235042_outLine +BABEL_OP1_103_67604_20120306_201231_inLine +BABEL_OP1_103_67604_20120306_201231_outLine +BABEL_OP1_103_68144_20120201_183136_inLine +BABEL_OP1_103_68144_20120201_183136_outLine +BABEL_OP1_103_69771_20120118_183315_inLine +BABEL_OP1_103_69771_20120118_183315_outLine +BABEL_OP1_103_70442_20111231_223721_inLine +BABEL_OP1_103_70442_20111231_223721_outLine +BABEL_OP1_103_70484_20120524_210819_inLine +BABEL_OP1_103_70484_20120524_210819_outLine +BABEL_OP1_103_72176_20111226_224243_inLine +BABEL_OP1_103_72176_20111226_224243_outLine +BABEL_OP1_103_75402_20120120_190246_inLine +BABEL_OP1_103_75402_20120120_190246_outLine +BABEL_OP1_103_76069_20120608_031447_inLine +BABEL_OP1_103_76069_20120608_031447_outLine +BABEL_OP1_103_76347_20120601_011206_inLine +BABEL_OP1_103_76347_20120601_011206_outLine +BABEL_OP1_103_77737_20111230_143637_inLine +BABEL_OP1_103_77737_20111230_143637_outLine +BABEL_OP1_103_80319_20120120_231835_inLine +BABEL_OP1_103_80319_20120120_231835_outLine +BABEL_OP1_103_84754_20120523_180347_inLine +BABEL_OP1_103_84754_20120523_180347_outLine +BABEL_OP1_103_84985_20120105_205509_inLine +BABEL_OP1_103_84985_20120105_205509_outLine +BABEL_OP1_103_85897_20120120_171153_inLine +BABEL_OP1_103_85897_20120120_171153_outLine +BABEL_OP1_103_87723_20120518_211143_inLine +BABEL_OP1_103_87723_20120518_211143_outLine +BABEL_OP1_103_88604_20120206_014323_inLine +BABEL_OP1_103_88604_20120206_014323_outLine +BABEL_OP1_103_88604_20120206_015628_inLine +BABEL_OP1_103_88604_20120206_015628_outLine +BABEL_OP1_103_90041_20120201_190104_inLine +BABEL_OP1_103_90041_20120201_190104_outLine +BABEL_OP1_103_90129_20120126_221744_inLine +BABEL_OP1_103_90129_20120126_221744_outLine +BABEL_OP1_103_93742_20120529_184600_inLine +BABEL_OP1_103_93742_20120529_184600_outLine +BABEL_OP1_103_94572_20120131_224123_inLine +BABEL_OP1_103_94572_20120131_224123_outLine +BABEL_OP1_103_95360_20120206_204731_inLine +BABEL_OP1_103_95360_20120206_204731_outLine +BABEL_OP1_103_96186_20120128_212837_inLine +BABEL_OP1_103_96186_20120128_212837_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..0b2264097e0 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.untranscribed.list @@ -0,0 +1,627 @@ +BABEL_OP1_103_10301_20111220_225237_inLine +BABEL_OP1_103_10301_20111220_225237_outLine +BABEL_OP1_103_10305_20111220_231100_inLine +BABEL_OP1_103_10305_20111220_231100_outLine +BABEL_OP1_103_10348_20120113_213854_inLine +BABEL_OP1_103_10348_20120113_213854_outLine +BABEL_OP1_103_10531_20120118_042000_inLine +BABEL_OP1_103_10531_20120118_042000_outLine +BABEL_OP1_103_10556_20111221_000031_inLine +BABEL_OP1_103_10556_20111221_000031_outLine +BABEL_OP1_103_10806_20111226_181132_inLine +BABEL_OP1_103_10806_20111226_181132_outLine +BABEL_OP1_103_11155_20111230_211626_inLine +BABEL_OP1_103_11155_20111230_211626_outLine +BABEL_OP1_103_11442_20120125_025606_inLine +BABEL_OP1_103_11442_20120125_025606_outLine +BABEL_OP1_103_12518_20111227_181020_inLine +BABEL_OP1_103_12518_20111227_181021_outLine +BABEL_OP1_103_12682_20120125_201902_inLine +BABEL_OP1_103_12682_20120125_201902_outLine +BABEL_OP1_103_12682_20120125_210238_inLine +BABEL_OP1_103_12682_20120125_210238_outLine +BABEL_OP1_103_12719_20120203_035027_inLine +BABEL_OP1_103_12719_20120203_035027_outLine +BABEL_OP1_103_12809_20111229_175926_inLine +BABEL_OP1_103_12809_20111229_175926_outLine +BABEL_OP1_103_12843_20120117_224043_inLine +BABEL_OP1_103_12843_20120117_224043_outLine +BABEL_OP1_103_13024_20111229_010356_inLine +BABEL_OP1_103_13024_20111229_010357_outLine +BABEL_OP1_103_13295_20120522_232550_inLine +BABEL_OP1_103_13295_20120522_232550_outLine +BABEL_OP1_103_13615_20120113_174612_inLine +BABEL_OP1_103_13615_20120113_174612_outLine +BABEL_OP1_103_13708_20120102_032700_inLine +BABEL_OP1_103_13708_20120102_032700_outLine +BABEL_OP1_103_13752_20120530_221929_inLine +BABEL_OP1_103_13752_20120530_221929_outLine +BABEL_OP1_103_14086_20120113_200751_inLine +BABEL_OP1_103_14086_20120113_200751_outLine +BABEL_OP1_103_14147_20120531_160226_inLine +BABEL_OP1_103_14147_20120531_160226_outLine +BABEL_OP1_103_14147_20120531_170020_inLine +BABEL_OP1_103_14147_20120531_170020_outLine +BABEL_OP1_103_14422_20120514_181741_inLine +BABEL_OP1_103_14422_20120514_181741_outLine +BABEL_OP1_103_14583_20120515_192730_inLine +BABEL_OP1_103_14583_20120515_192730_outLine +BABEL_OP1_103_14942_20120101_203529_inLine +BABEL_OP1_103_14942_20120101_203529_outLine +BABEL_OP1_103_15304_20120106_035227_inLine +BABEL_OP1_103_15304_20120106_035227_outLine +BABEL_OP1_103_15600_20111231_181856_inLine +BABEL_OP1_103_15600_20111231_181856_outLine +BABEL_OP1_103_15665_20120517_162750_inLine +BABEL_OP1_103_15665_20120517_162750_outLine +BABEL_OP1_103_15749_20111230_015120_inLine +BABEL_OP1_103_15749_20111230_015120_outLine +BABEL_OP1_103_15803_20120528_164556_inLine +BABEL_OP1_103_15803_20120528_164556_outLine +BABEL_OP1_103_16210_20120118_201234_inLine +BABEL_OP1_103_16210_20120118_201234_outLine +BABEL_OP1_103_16393_20111230_012139_inLine +BABEL_OP1_103_16393_20111230_012139_outLine +BABEL_OP1_103_16633_20120105_164800_inLine +BABEL_OP1_103_16633_20120105_164800_outLine +BABEL_OP1_103_16754_20120101_015558_inLine +BABEL_OP1_103_16754_20120101_015558_outLine +BABEL_OP1_103_17063_20120202_201950_inLine +BABEL_OP1_103_17063_20120202_201950_outLine +BABEL_OP1_103_17063_20120202_204211_inLine +BABEL_OP1_103_17063_20120202_204211_outLine +BABEL_OP1_103_17139_20120110_182115_inLine +BABEL_OP1_103_17139_20120110_182115_outLine +BABEL_OP1_103_17180_20120126_233802_inLine +BABEL_OP1_103_17180_20120126_233802_outLine +BABEL_OP1_103_17612_20120531_232906_inLine +BABEL_OP1_103_17876_20120203_220933_inLine +BABEL_OP1_103_17876_20120203_220933_outLine +BABEL_OP1_103_18244_20120514_000930_inLine +BABEL_OP1_103_18244_20120514_000931_outLine +BABEL_OP1_103_18556_20111231_233139_inLine +BABEL_OP1_103_18556_20111231_233139_outLine +BABEL_OP1_103_18755_20120103_012800_inLine +BABEL_OP1_103_18755_20120103_012800_outLine +BABEL_OP1_103_18861_20120612_231154_inLine +BABEL_OP1_103_18861_20120612_231154_outLine +BABEL_OP1_103_18938_20120515_163044_inLine +BABEL_OP1_103_18938_20120515_163044_outLine +BABEL_OP1_103_19118_20120206_194310_inLine +BABEL_OP1_103_19118_20120206_194310_outLine +BABEL_OP1_103_19324_20120114_223457_inLine +BABEL_OP1_103_19324_20120114_223457_outLine +BABEL_OP1_103_19564_20120112_151539_inLine +BABEL_OP1_103_19564_20120112_151539_outLine +BABEL_OP1_103_19697_20120107_043218_inLine +BABEL_OP1_103_19697_20120107_043218_outLine +BABEL_OP1_103_19885_20120517_200533_inLine +BABEL_OP1_103_19885_20120517_200533_outLine +BABEL_OP1_103_20481_20120118_004556_inLine +BABEL_OP1_103_20481_20120118_004556_outLine +BABEL_OP1_103_21020_20120517_182615_inLine +BABEL_OP1_103_21020_20120517_182615_outLine +BABEL_OP1_103_21076_20111231_203216_inLine +BABEL_OP1_103_21076_20111231_203216_outLine +BABEL_OP1_103_21208_20120113_192303_inLine +BABEL_OP1_103_21208_20120113_192303_outLine +BABEL_OP1_103_21417_20120115_235720_inLine +BABEL_OP1_103_21417_20120115_235720_outLine +BABEL_OP1_103_21648_20111229_024025_inLine +BABEL_OP1_103_21648_20111229_024025_outLine +BABEL_OP1_103_21928_20120205_210433_inLine +BABEL_OP1_103_21928_20120205_221157_inLine +BABEL_OP1_103_22338_20120203_214144_inLine +BABEL_OP1_103_22338_20120203_214144_outLine +BABEL_OP1_103_22528_20120204_221751_inLine +BABEL_OP1_103_22528_20120204_221751_outLine +BABEL_OP1_103_23237_20120201_204534_inLine +BABEL_OP1_103_23237_20120201_204534_outLine +BABEL_OP1_103_24235_20120205_171351_inLine +BABEL_OP1_103_24235_20120205_171351_outLine +BABEL_OP1_103_24340_20120526_221640_inLine +BABEL_OP1_103_24340_20120526_221640_outLine +BABEL_OP1_103_25040_20120129_215646_inLine +BABEL_OP1_103_25040_20120129_215647_outLine +BABEL_OP1_103_25489_20120107_015122_inLine +BABEL_OP1_103_25489_20120107_015122_outLine +BABEL_OP1_103_26464_20120115_153724_inLine +BABEL_OP1_103_26464_20120115_153725_outLine +BABEL_OP1_103_26603_20120519_190743_inLine +BABEL_OP1_103_26603_20120519_190743_outLine +BABEL_OP1_103_26980_20120114_151400_inLine +BABEL_OP1_103_26980_20120114_151400_outLine +BABEL_OP1_103_27159_20120109_175434_inLine +BABEL_OP1_103_27159_20120109_175434_outLine +BABEL_OP1_103_27298_20120519_164745_inLine +BABEL_OP1_103_27298_20120519_164745_outLine +BABEL_OP1_103_27374_20120608_213343_inLine +BABEL_OP1_103_27374_20120608_213344_outLine +BABEL_OP1_103_27889_20120405_184406_inLine +BABEL_OP1_103_27889_20120405_184406_outLine +BABEL_OP1_103_27895_20120407_201822_inLine +BABEL_OP1_103_27895_20120407_201822_outLine +BABEL_OP1_103_27997_20120406_024629_inLine +BABEL_OP1_103_27997_20120406_024629_outLine +BABEL_OP1_103_28150_20120421_030716_inLine +BABEL_OP1_103_28150_20120421_030716_outLine +BABEL_OP1_103_28281_20120204_215552_inLine +BABEL_OP1_103_28281_20120204_215552_outLine +BABEL_OP1_103_28325_20120421_034840_inLine +BABEL_OP1_103_28325_20120421_034840_outLine +BABEL_OP1_103_28349_20120422_230936_inLine +BABEL_OP1_103_28349_20120422_230936_outLine +BABEL_OP1_103_28452_20120423_002721_inLine +BABEL_OP1_103_28452_20120423_002721_outLine +BABEL_OP1_103_28820_20111231_235604_inLine +BABEL_OP1_103_28820_20111231_235604_outLine +BABEL_OP1_103_29083_20120524_203900_inLine +BABEL_OP1_103_29083_20120524_203900_outLine +BABEL_OP1_103_29368_20120109_152242_inLine +BABEL_OP1_103_29368_20120109_152242_outLine +BABEL_OP1_103_29757_20120607_155549_inLine +BABEL_OP1_103_29757_20120607_155549_outLine +BABEL_OP1_103_30012_20120523_210111_inLine +BABEL_OP1_103_30012_20120523_210111_outLine +BABEL_OP1_103_30031_20111231_051935_inLine +BABEL_OP1_103_30031_20111231_051935_outLine +BABEL_OP1_103_30040_20120114_164613_inLine +BABEL_OP1_103_30040_20120114_164613_outLine +BABEL_OP1_103_30243_20120115_005252_inLine +BABEL_OP1_103_30243_20120115_005252_outLine +BABEL_OP1_103_30711_20120612_211646_inLine +BABEL_OP1_103_30711_20120612_211646_outLine +BABEL_OP1_103_30847_20120108_235955_inLine +BABEL_OP1_103_30847_20120108_235955_outLine +BABEL_OP1_103_30904_20120522_013413_inLine +BABEL_OP1_103_30904_20120522_013413_outLine +BABEL_OP1_103_31201_20120523_211540_inLine +BABEL_OP1_103_31201_20120523_211540_outLine +BABEL_OP1_103_31871_20120115_205857_inLine +BABEL_OP1_103_31871_20120115_205857_outLine +BABEL_OP1_103_32453_20120116_174338_inLine +BABEL_OP1_103_32453_20120116_174338_outLine +BABEL_OP1_103_32722_20120115_005258_inLine +BABEL_OP1_103_32722_20120115_005258_outLine +BABEL_OP1_103_33223_20120108_225050_inLine +BABEL_OP1_103_33223_20120108_225050_outLine +BABEL_OP1_103_33316_20120528_173250_inLine +BABEL_OP1_103_33316_20120528_173250_outLine +BABEL_OP1_103_33534_20120122_020502_inLine +BABEL_OP1_103_33534_20120122_020502_outLine +BABEL_OP1_103_33551_20120122_194434_inLine +BABEL_OP1_103_33551_20120122_194434_outLine +BABEL_OP1_103_33699_20120122_173500_inLine +BABEL_OP1_103_33699_20120122_173500_outLine +BABEL_OP1_103_33807_20120122_190057_inLine +BABEL_OP1_103_33807_20120122_190057_outLine +BABEL_OP1_103_33885_20120125_172938_inLine +BABEL_OP1_103_33885_20120125_172938_outLine +BABEL_OP1_103_33991_20120117_202117_inLine +BABEL_OP1_103_33991_20120117_202118_outLine +BABEL_OP1_103_34137_20120529_224220_inLine +BABEL_OP1_103_34137_20120529_224220_outLine +BABEL_OP1_103_34332_20120204_191733_inLine +BABEL_OP1_103_34332_20120204_191733_outLine +BABEL_OP1_103_34545_20120118_173942_inLine +BABEL_OP1_103_34545_20120118_173942_outLine +BABEL_OP1_103_34564_20120530_211027_inLine +BABEL_OP1_103_34564_20120530_211027_outLine +BABEL_OP1_103_34925_20120112_154829_inLine +BABEL_OP1_103_34925_20120112_154829_outLine +BABEL_OP1_103_34994_20120115_213251_inLine +BABEL_OP1_103_34994_20120115_213251_outLine +BABEL_OP1_103_35144_20120123_230913_inLine +BABEL_OP1_103_35144_20120123_230913_outLine +BABEL_OP1_103_35152_20111230_220705_inLine +BABEL_OP1_103_35152_20111230_220705_outLine +BABEL_OP1_103_35157_20120124_010640_inLine +BABEL_OP1_103_35157_20120124_010640_outLine +BABEL_OP1_103_35444_20120612_203930_inLine +BABEL_OP1_103_35444_20120612_203930_outLine +BABEL_OP1_103_35660_20120122_013401_inLine +BABEL_OP1_103_35660_20120122_013402_outLine +BABEL_OP1_103_35750_20111230_025221_inLine +BABEL_OP1_103_35750_20111230_025221_outLine +BABEL_OP1_103_35892_20120120_205811_inLine +BABEL_OP1_103_35892_20120120_205811_outLine +BABEL_OP1_103_36748_20120121_230812_inLine +BABEL_OP1_103_36748_20120121_230812_outLine +BABEL_OP1_103_36962_20120810_005828_inLine +BABEL_OP1_103_36962_20120810_005828_outLine +BABEL_OP1_103_37131_20120522_165130_inLine +BABEL_OP1_103_37131_20120522_165130_outLine +BABEL_OP1_103_37551_20111229_232422_inLine +BABEL_OP1_103_37551_20111229_232422_outLine +BABEL_OP1_103_37604_20120122_203335_inLine +BABEL_OP1_103_37604_20120122_203335_outLine +BABEL_OP1_103_37687_20120124_220825_inLine +BABEL_OP1_103_37687_20120124_220826_outLine +BABEL_OP1_103_38573_20120120_234500_inLine +BABEL_OP1_103_38573_20120120_234500_outLine +BABEL_OP1_103_38573_20120121_000745_inLine +BABEL_OP1_103_38573_20120121_000745_outLine +BABEL_OP1_103_38588_20120522_215415_inLine +BABEL_OP1_103_38588_20120522_215415_outLine +BABEL_OP1_103_39320_20120207_022344_inLine +BABEL_OP1_103_39320_20120207_022344_outLine +BABEL_OP1_103_39769_20120127_213455_inLine +BABEL_OP1_103_39769_20120127_213455_outLine +BABEL_OP1_103_40410_20120124_204758_inLine +BABEL_OP1_103_40410_20120124_204758_outLine +BABEL_OP1_103_40442_20120202_174431_inLine +BABEL_OP1_103_40442_20120202_174431_outLine +BABEL_OP1_103_40889_20120206_221100_inLine +BABEL_OP1_103_40889_20120206_221100_outLine +BABEL_OP1_103_41172_20120114_134829_inLine +BABEL_OP1_103_41172_20120114_134829_outLine +BABEL_OP1_103_42332_20120126_191134_inLine +BABEL_OP1_103_42332_20120126_191134_outLine +BABEL_OP1_103_42332_20120126_192035_inLine +BABEL_OP1_103_42332_20120126_192035_outLine +BABEL_OP1_103_42651_20120122_000902_inLine +BABEL_OP1_103_42651_20120122_000902_outLine +BABEL_OP1_103_42698_20120123_230900_inLine +BABEL_OP1_103_42698_20120123_230900_outLine +BABEL_OP1_103_42790_20120129_205024_inLine +BABEL_OP1_103_42790_20120129_205025_outLine +BABEL_OP1_103_42986_20120125_204035_inLine +BABEL_OP1_103_42986_20120125_204035_outLine +BABEL_OP1_103_43442_20120120_033602_inLine +BABEL_OP1_103_43442_20120120_033602_outLine +BABEL_OP1_103_43571_20111226_210759_inLine +BABEL_OP1_103_43571_20111226_210759_outLine +BABEL_OP1_103_43812_20120124_005515_inLine +BABEL_OP1_103_43812_20120124_005515_outLine +BABEL_OP1_103_43959_20120125_223215_inLine +BABEL_OP1_103_43959_20120125_223215_outLine +BABEL_OP1_103_44838_20111229_014707_inLine +BABEL_OP1_103_44838_20111229_014707_outLine +BABEL_OP1_103_44948_20120203_011011_inLine +BABEL_OP1_103_44948_20120203_011011_outLine +BABEL_OP1_103_44967_20120207_025756_inLine +BABEL_OP1_103_44967_20120207_025756_outLine +BABEL_OP1_103_45020_20120522_170055_inLine +BABEL_OP1_103_45020_20120522_170055_outLine +BABEL_OP1_103_45029_20120608_010540_inLine +BABEL_OP1_103_45029_20120608_010540_outLine +BABEL_OP1_103_45565_20120125_220956_inLine +BABEL_OP1_103_45565_20120125_220956_outLine +BABEL_OP1_103_46197_20120524_220246_inLine +BABEL_OP1_103_46197_20120524_220246_outLine +BABEL_OP1_103_46460_20120530_183725_inLine +BABEL_OP1_103_46460_20120530_183725_outLine +BABEL_OP1_103_46460_20120530_185105_inLine +BABEL_OP1_103_46460_20120530_185106_outLine +BABEL_OP1_103_46862_20120124_195804_inLine +BABEL_OP1_103_46862_20120124_195804_outLine +BABEL_OP1_103_46862_20120204_203651_inLine +BABEL_OP1_103_46862_20120204_203651_outLine +BABEL_OP1_103_47151_20111229_233253_inLine +BABEL_OP1_103_47151_20111229_233253_outLine +BABEL_OP1_103_47177_20120127_201638_inLine +BABEL_OP1_103_47177_20120127_201638_outLine +BABEL_OP1_103_47416_20120729_181025_inLine +BABEL_OP1_103_47416_20120729_181025_outLine +BABEL_OP1_103_47424_20111231_203241_inLine +BABEL_OP1_103_47424_20111231_203241_outLine +BABEL_OP1_103_47574_20120207_034724_inLine +BABEL_OP1_103_47574_20120207_034724_outLine +BABEL_OP1_103_48176_20120206_023101_inLine +BABEL_OP1_103_48176_20120206_023101_outLine +BABEL_OP1_103_48259_20120116_022438_inLine +BABEL_OP1_103_48259_20120116_022438_outLine +BABEL_OP1_103_49175_20120206_214803_inLine +BABEL_OP1_103_49175_20120206_214803_outLine +BABEL_OP1_103_49520_20120523_172707_inLine +BABEL_OP1_103_49520_20120523_172707_outLine +BABEL_OP1_103_49629_20120104_040004_inLine +BABEL_OP1_103_49629_20120104_040004_outLine +BABEL_OP1_103_49755_20120110_010410_inLine +BABEL_OP1_103_49755_20120110_010410_outLine +BABEL_OP1_103_49819_20120127_012212_inLine +BABEL_OP1_103_49819_20120127_012212_outLine +BABEL_OP1_103_50492_20120123_211938_inLine +BABEL_OP1_103_50492_20120123_211938_outLine +BABEL_OP1_103_50523_20120607_185125_inLine +BABEL_OP1_103_50523_20120607_185126_outLine +BABEL_OP1_103_50798_20120131_022954_inLine +BABEL_OP1_103_50798_20120131_022954_outLine +BABEL_OP1_103_51243_20120201_200604_inLine +BABEL_OP1_103_51243_20120201_200604_outLine +BABEL_OP1_103_52122_20120207_025756_inLine +BABEL_OP1_103_52122_20120207_025756_outLine +BABEL_OP1_103_52753_20120521_000301_inLine +BABEL_OP1_103_52753_20120521_001422_inLine +BABEL_OP1_103_53067_20120127_225851_inLine +BABEL_OP1_103_53067_20120127_225851_outLine +BABEL_OP1_103_53262_20120204_194912_inLine +BABEL_OP1_103_53262_20120204_194912_outLine +BABEL_OP1_103_53346_20120128_214441_inLine +BABEL_OP1_103_53346_20120128_214441_outLine +BABEL_OP1_103_53636_20120127_000358_inLine +BABEL_OP1_103_53636_20120127_000358_outLine +BABEL_OP1_103_54030_20111230_220440_inLine +BABEL_OP1_103_54030_20111230_220440_outLine +BABEL_OP1_103_54263_20120206_225348_inLine +BABEL_OP1_103_54263_20120206_225349_outLine +BABEL_OP1_103_54417_20120522_172155_inLine +BABEL_OP1_103_54417_20120522_172155_outLine +BABEL_OP1_103_54975_20120207_015749_inLine +BABEL_OP1_103_54975_20120207_015749_outLine +BABEL_OP1_103_54991_20120206_003607_inLine +BABEL_OP1_103_54991_20120206_003607_outLine +BABEL_OP1_103_55166_20120119_180058_inLine +BABEL_OP1_103_55166_20120119_180058_outLine +BABEL_OP1_103_55194_20120529_215243_inLine +BABEL_OP1_103_55194_20120529_215243_outLine +BABEL_OP1_103_56704_20120606_171759_inLine +BABEL_OP1_103_56704_20120606_171759_outLine +BABEL_OP1_103_57092_20111227_044400_inLine +BABEL_OP1_103_57092_20111227_044400_outLine +BABEL_OP1_103_57351_20120612_182248_inLine +BABEL_OP1_103_57351_20120612_182248_outLine +BABEL_OP1_103_58283_20111231_230840_inLine +BABEL_OP1_103_58283_20111231_230840_outLine +BABEL_OP1_103_58925_20120113_212456_inLine +BABEL_OP1_103_58925_20120113_212456_outLine +BABEL_OP1_103_58925_20120113_214350_inLine +BABEL_OP1_103_58925_20120113_214350_outLine +BABEL_OP1_103_59482_20120612_190437_inLine +BABEL_OP1_103_59482_20120612_190437_outLine +BABEL_OP1_103_60524_20120109_213755_inLine +BABEL_OP1_103_60524_20120109_213755_outLine +BABEL_OP1_103_60806_20120117_233630_inLine +BABEL_OP1_103_60806_20120117_233630_outLine +BABEL_OP1_103_61229_20120616_151341_inLine +BABEL_OP1_103_61229_20120616_151341_outLine +BABEL_OP1_103_61558_20120106_205412_inLine +BABEL_OP1_103_61558_20120106_205412_outLine +BABEL_OP1_103_61592_20120125_225752_inLine +BABEL_OP1_103_61592_20120125_225752_outLine +BABEL_OP1_103_61629_20120127_192849_inLine +BABEL_OP1_103_61629_20120127_192849_outLine +BABEL_OP1_103_61733_20120201_183457_inLine +BABEL_OP1_103_61733_20120201_183457_outLine +BABEL_OP1_103_62097_20120307_164325_inLine +BABEL_OP1_103_62097_20120307_164325_outLine +BABEL_OP1_103_62182_20111231_003944_inLine +BABEL_OP1_103_62182_20111231_003944_outLine +BABEL_OP1_103_62222_20120122_201756_inLine +BABEL_OP1_103_62222_20120122_201756_outLine +BABEL_OP1_103_62479_20120306_025702_inLine +BABEL_OP1_103_62479_20120306_025702_outLine +BABEL_OP1_103_62558_20120124_220850_inLine +BABEL_OP1_103_62558_20120124_220850_outLine +BABEL_OP1_103_62652_20120306_015948_inLine +BABEL_OP1_103_62652_20120306_015948_outLine +BABEL_OP1_103_62720_20120308_164432_inLine +BABEL_OP1_103_62720_20120308_164432_outLine +BABEL_OP1_103_62720_20120308_165706_inLine +BABEL_OP1_103_62720_20120308_165706_outLine +BABEL_OP1_103_62843_20120310_235523_inLine +BABEL_OP1_103_62843_20120310_235523_outLine +BABEL_OP1_103_63127_20120311_184714_inLine +BABEL_OP1_103_63127_20120311_184714_outLine +BABEL_OP1_103_63129_20120311_193438_inLine +BABEL_OP1_103_63129_20120311_193438_outLine +BABEL_OP1_103_63194_20120312_010359_inLine +BABEL_OP1_103_63194_20120312_010359_outLine +BABEL_OP1_103_63215_20120513_191621_inLine +BABEL_OP1_103_63215_20120513_191621_outLine +BABEL_OP1_103_63240_20120312_021342_inLine +BABEL_OP1_103_63240_20120312_021342_outLine +BABEL_OP1_103_63373_20120315_025205_inLine +BABEL_OP1_103_63373_20120315_025205_outLine +BABEL_OP1_103_63384_20120315_031012_inLine +BABEL_OP1_103_63384_20120315_031012_outLine +BABEL_OP1_103_63510_20120318_221426_inLine +BABEL_OP1_103_63510_20120318_221426_outLine +BABEL_OP1_103_63680_20120319_214759_inLine +BABEL_OP1_103_63680_20120319_214759_outLine +BABEL_OP1_103_63687_20120320_181655_inLine +BABEL_OP1_103_63687_20120320_181655_outLine +BABEL_OP1_103_63923_20120320_172933_inLine +BABEL_OP1_103_63923_20120320_172933_outLine +BABEL_OP1_103_63929_20120123_192325_inLine +BABEL_OP1_103_63929_20120123_192325_outLine +BABEL_OP1_103_64039_20120320_215418_inLine +BABEL_OP1_103_64039_20120320_215418_outLine +BABEL_OP1_103_64145_20120404_204905_inLine +BABEL_OP1_103_64145_20120404_204905_outLine +BABEL_OP1_103_64177_20120404_212051_inLine +BABEL_OP1_103_64177_20120404_212051_outLine +BABEL_OP1_103_64231_20120310_224637_inLine +BABEL_OP1_103_64231_20120310_224637_outLine +BABEL_OP1_103_64610_20120125_223001_inLine +BABEL_OP1_103_64610_20120125_223001_outLine +BABEL_OP1_103_65512_20111229_045507_inLine +BABEL_OP1_103_65512_20111229_045507_outLine +BABEL_OP1_103_65818_20120127_011907_inLine +BABEL_OP1_103_65818_20120127_011907_outLine +BABEL_OP1_103_65954_20120205_190321_inLine +BABEL_OP1_103_65954_20120205_190321_outLine +BABEL_OP1_103_65991_20120229_215906_inLine +BABEL_OP1_103_65991_20120229_215906_outLine +BABEL_OP1_103_66005_20120229_221845_inLine +BABEL_OP1_103_66005_20120229_221845_outLine +BABEL_OP1_103_66048_20120229_225251_inLine +BABEL_OP1_103_66048_20120229_225251_outLine +BABEL_OP1_103_66287_20120108_191621_inLine +BABEL_OP1_103_66287_20120108_191621_outLine +BABEL_OP1_103_66309_20120229_232503_inLine +BABEL_OP1_103_66309_20120229_232503_outLine +BABEL_OP1_103_66719_20120116_002436_inLine +BABEL_OP1_103_66719_20120116_002436_outLine +BABEL_OP1_103_66813_20120127_151237_inLine +BABEL_OP1_103_66813_20120127_151237_outLine +BABEL_OP1_103_67001_20120305_223711_inLine +BABEL_OP1_103_67001_20120305_223711_outLine +BABEL_OP1_103_67288_20120305_233501_inLine +BABEL_OP1_103_67288_20120305_233501_outLine +BABEL_OP1_103_67358_20120128_224934_inLine +BABEL_OP1_103_67358_20120128_224934_outLine +BABEL_OP1_103_67484_20120306_212801_inLine +BABEL_OP1_103_67484_20120306_212801_outLine +BABEL_OP1_103_67685_20120118_163939_inLine +BABEL_OP1_103_67685_20120118_163939_outLine +BABEL_OP1_103_67814_20120522_200114_inLine +BABEL_OP1_103_67814_20120522_200114_outLine +BABEL_OP1_103_67824_20120116_000148_inLine +BABEL_OP1_103_67824_20120116_000148_outLine +BABEL_OP1_103_68602_20120729_174819_inLine +BABEL_OP1_103_68602_20120729_174819_outLine +BABEL_OP1_103_68811_20120531_155031_inLine +BABEL_OP1_103_68811_20120531_155031_outLine +BABEL_OP1_103_69969_20120309_020612_inLine +BABEL_OP1_103_69969_20120309_020612_outLine +BABEL_OP1_103_69990_20120305_153850_inLine +BABEL_OP1_103_69990_20120305_153850_outLine +BABEL_OP1_103_70200_20120311_000406_inLine +BABEL_OP1_103_70200_20120311_000406_outLine +BABEL_OP1_103_70476_20120117_202957_inLine +BABEL_OP1_103_70476_20120117_202957_outLine +BABEL_OP1_103_70476_20120117_204242_inLine +BABEL_OP1_103_70476_20120117_204242_outLine +BABEL_OP1_103_70651_20120131_034337_inLine +BABEL_OP1_103_70651_20120131_034337_outLine +BABEL_OP1_103_70762_20111230_015835_inLine +BABEL_OP1_103_70762_20111230_015835_outLine +BABEL_OP1_103_70858_20120201_191031_inLine +BABEL_OP1_103_70858_20120201_191031_outLine +BABEL_OP1_103_70897_20120118_020506_inLine +BABEL_OP1_103_70897_20120118_020506_outLine +BABEL_OP1_103_70919_20120202_170934_inLine +BABEL_OP1_103_70919_20120202_170934_outLine +BABEL_OP1_103_71215_20120207_001204_inLine +BABEL_OP1_103_71215_20120207_001204_outLine +BABEL_OP1_103_71293_20120101_212224_inLine +BABEL_OP1_103_71293_20120101_212224_outLine +BABEL_OP1_103_71450_20120514_181620_inLine +BABEL_OP1_103_71450_20120514_181621_outLine +BABEL_OP1_103_71666_20120514_223534_inLine +BABEL_OP1_103_71666_20120514_223534_outLine +BABEL_OP1_103_71691_20120109_034006_inLine +BABEL_OP1_103_71691_20120109_034007_outLine +BABEL_OP1_103_72179_20120511_023300_inLine +BABEL_OP1_103_72179_20120511_023300_outLine +BABEL_OP1_103_72709_20120204_231928_inLine +BABEL_OP1_103_72709_20120204_231928_outLine +BABEL_OP1_103_72714_20120126_001354_inLine +BABEL_OP1_103_72714_20120126_001354_outLine +BABEL_OP1_103_73264_20111228_184038_inLine +BABEL_OP1_103_73264_20111228_184038_outLine +BABEL_OP1_103_73881_20120120_041629_inLine +BABEL_OP1_103_73881_20120120_041629_outLine +BABEL_OP1_103_74188_20120522_172823_inLine +BABEL_OP1_103_74188_20120522_172823_outLine +BABEL_OP1_103_74334_20120102_033902_inLine +BABEL_OP1_103_74334_20120102_033902_outLine +BABEL_OP1_103_75797_20120125_192735_inLine +BABEL_OP1_103_75797_20120125_192735_outLine +BABEL_OP1_103_76276_20120114_191208_inLine +BABEL_OP1_103_76276_20120114_191208_outLine +BABEL_OP1_103_77097_20120109_024625_inLine +BABEL_OP1_103_77097_20120109_024625_outLine +BABEL_OP1_103_78722_20120126_234318_inLine +BABEL_OP1_103_78722_20120126_234318_outLine +BABEL_OP1_103_79127_20120205_215208_inLine +BABEL_OP1_103_79127_20120205_215208_outLine +BABEL_OP1_103_79788_20120201_222512_inLine +BABEL_OP1_103_79788_20120201_222512_outLine +BABEL_OP1_103_79803_20120730_020433_inLine +BABEL_OP1_103_79803_20120730_020433_outLine +BABEL_OP1_103_79857_20120111_205043_inLine +BABEL_OP1_103_79857_20120111_205043_outLine +BABEL_OP1_103_79901_20120202_193650_inLine +BABEL_OP1_103_79901_20120202_194746_inLine +BABEL_OP1_103_80118_20120126_010553_inLine +BABEL_OP1_103_80118_20120126_010553_outLine +BABEL_OP1_103_80183_20120513_182754_inLine +BABEL_OP1_103_80183_20120513_182754_outLine +BABEL_OP1_103_80313_20120106_200706_inLine +BABEL_OP1_103_80313_20120106_200706_outLine +BABEL_OP1_103_80943_20120125_185437_inLine +BABEL_OP1_103_80943_20120125_185437_outLine +BABEL_OP1_103_81800_20120531_180959_inLine +BABEL_OP1_103_81800_20120531_180959_outLine +BABEL_OP1_103_81800_20120531_182855_inLine +BABEL_OP1_103_81800_20120531_182855_outLine +BABEL_OP1_103_82094_20120522_225233_inLine +BABEL_OP1_103_82094_20120522_225233_outLine +BABEL_OP1_103_82135_20120117_213149_inLine +BABEL_OP1_103_82135_20120117_213149_outLine +BABEL_OP1_103_83819_20120125_193543_inLine +BABEL_OP1_103_83819_20120125_193543_outLine +BABEL_OP1_103_83835_20111231_193822_inLine +BABEL_OP1_103_83835_20111231_193822_outLine +BABEL_OP1_103_84654_20120515_201204_inLine +BABEL_OP1_103_84654_20120515_201204_outLine +BABEL_OP1_103_84854_20120205_001920_inLine +BABEL_OP1_103_84854_20120205_001920_outLine +BABEL_OP1_103_85457_20120521_204532_inLine +BABEL_OP1_103_85457_20120521_204532_outLine +BABEL_OP1_103_85577_20120729_215558_inLine +BABEL_OP1_103_85577_20120729_215558_outLine +BABEL_OP1_103_85730_20120116_233350_inLine +BABEL_OP1_103_85730_20120116_233350_outLine +BABEL_OP1_103_85764_20120129_192217_inLine +BABEL_OP1_103_85764_20120129_192217_outLine +BABEL_OP1_103_86537_20120511_195620_inLine +BABEL_OP1_103_86537_20120511_195620_outLine +BABEL_OP1_103_86614_20120521_220136_inLine +BABEL_OP1_103_86614_20120521_220136_outLine +BABEL_OP1_103_86680_20120105_191615_inLine +BABEL_OP1_103_86680_20120105_191615_outLine +BABEL_OP1_103_87453_20120515_170718_inLine +BABEL_OP1_103_87453_20120515_170718_outLine +BABEL_OP1_103_87677_20120121_224149_inLine +BABEL_OP1_103_87677_20120121_224149_outLine +BABEL_OP1_103_88677_20120112_032502_inLine +BABEL_OP1_103_88677_20120112_032502_outLine +BABEL_OP1_103_89464_20120205_204528_inLine +BABEL_OP1_103_89464_20120205_204528_outLine +BABEL_OP1_103_89702_20120109_021228_inLine +BABEL_OP1_103_89702_20120109_021228_outLine +BABEL_OP1_103_90641_20120102_212610_inLine +BABEL_OP1_103_90641_20120102_212610_outLine +BABEL_OP1_103_90882_20120530_230837_inLine +BABEL_OP1_103_90882_20120530_230837_outLine +BABEL_OP1_103_91161_20111229_202627_inLine +BABEL_OP1_103_91161_20111229_202627_outLine +BABEL_OP1_103_91372_20120115_023342_inLine +BABEL_OP1_103_91372_20120115_023342_outLine +BABEL_OP1_103_92722_20120512_132612_inLine +BABEL_OP1_103_92722_20120512_132612_outLine +BABEL_OP1_103_92793_20111229_200332_inLine +BABEL_OP1_103_92793_20111229_200332_outLine +BABEL_OP1_103_92910_20120205_195736_inLine +BABEL_OP1_103_92910_20120205_195736_outLine +BABEL_OP1_103_93026_20111228_235326_inLine +BABEL_OP1_103_93026_20111228_235326_outLine +BABEL_OP1_103_93358_20120107_025421_inLine +BABEL_OP1_103_93358_20120107_025421_outLine +BABEL_OP1_103_93907_20111228_051458_inLine +BABEL_OP1_103_93907_20111228_051458_outLine +BABEL_OP1_103_94793_20120102_034406_inLine +BABEL_OP1_103_94793_20120102_034406_outLine +BABEL_OP1_103_95349_20111229_201011_inLine +BABEL_OP1_103_95349_20111229_201011_outLine +BABEL_OP1_103_95349_20111229_225436_inLine +BABEL_OP1_103_95349_20111229_225436_outLine +BABEL_OP1_103_96537_20120729_165831_inLine +BABEL_OP1_103_96537_20120729_165831_outLine +BABEL_OP1_103_96690_20120131_213344_inLine +BABEL_OP1_103_96690_20120131_213344_outLine +BABEL_OP1_103_97679_20111229_191138_inLine +BABEL_OP1_103_97679_20111229_191138_outLine +BABEL_OP1_103_97971_20120111_020458_inLine +BABEL_OP1_103_97971_20120111_020459_outLine +BABEL_OP1_103_98331_20120131_213958_inLine +BABEL_OP1_103_98331_20120131_213958_outLine +BABEL_OP1_103_98446_20120101_215857_inLine +BABEL_OP1_103_98446_20120101_215857_outLine +BABEL_OP1_103_99093_20120514_161939_inLine +BABEL_OP1_103_99093_20120514_161939_outLine +BABEL_OP1_103_99510_20120515_175659_inLine +BABEL_OP1_103_99510_20120515_175659_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.untranscribed.list b/egs/babel/s5d/conf/lists/103-bengali/train.untranscribed.list new file mode 100644 index 00000000000..5a1273fe091 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.untranscribed.list @@ -0,0 +1,255 @@ +BABEL_OP1_103_10911_20120521_172505_inLine +BABEL_OP1_103_10911_20120521_172505_outLine +BABEL_OP1_103_10974_20121003_140938_inLine +BABEL_OP1_103_10974_20121003_140938_outLine +BABEL_OP1_103_11386_20121003_121747_inLine +BABEL_OP1_103_11386_20121003_121747_outLine +BABEL_OP1_103_12092_20121214_014753_inLine +BABEL_OP1_103_12092_20121214_014753_outLine +BABEL_OP1_103_13064_20120812_170202_inLine +BABEL_OP1_103_13064_20120812_170202_outLine +BABEL_OP1_103_13834_20121003_194231_inLine +BABEL_OP1_103_13834_20121003_194231_outLine +BABEL_OP1_103_14631_20120927_192723_inLine +BABEL_OP1_103_14631_20120927_192723_outLine +BABEL_OP1_103_15440_20120516_145148_inLine +BABEL_OP1_103_15440_20120516_145148_outLine +BABEL_OP1_103_17813_20120120_005856_inLine +BABEL_OP1_103_17813_20120120_005856_outLine +BABEL_OP1_103_17813_20120124_014523_inLine +BABEL_OP1_103_17813_20120124_014523_outLine +BABEL_OP1_103_18084_20120530_155334_inLine +BABEL_OP1_103_18331_20121007_213032_inLine +BABEL_OP1_103_18331_20121007_213032_outLine +BABEL_OP1_103_21083_20120514_145620_inLine +BABEL_OP1_103_21083_20120514_145620_outLine +BABEL_OP1_103_21352_20120930_143535_inLine +BABEL_OP1_103_21352_20120930_143535_outLine +BABEL_OP1_103_23378_20120531_010537_inLine +BABEL_OP1_103_23378_20120531_010537_outLine +BABEL_OP1_103_24303_20120809_003638_inLine +BABEL_OP1_103_24303_20120809_003638_outLine +BABEL_OP1_103_26536_20120523_220246_inLine +BABEL_OP1_103_26536_20120523_220246_outLine +BABEL_OP1_103_27187_20120929_030115_inLine +BABEL_OP1_103_27187_20120929_030115_outLine +BABEL_OP1_103_27356_20120618_212359_inLine +BABEL_OP1_103_27356_20120618_212359_outLine +BABEL_OP1_103_27378_20121005_211922_inLine +BABEL_OP1_103_27378_20121005_211922_outLine +BABEL_OP1_103_27679_20120528_215730_inLine +BABEL_OP1_103_27679_20120528_215730_outLine +BABEL_OP1_103_27891_20121006_234744_inLine +BABEL_OP1_103_27891_20121006_234744_outLine +BABEL_OP1_103_29103_20120516_171814_inLine +BABEL_OP1_103_29103_20120516_171814_outLine +BABEL_OP1_103_29690_20120930_135813_inLine +BABEL_OP1_103_29690_20120930_135813_outLine +BABEL_OP1_103_29911_20120607_231532_inLine +BABEL_OP1_103_30638_20120928_141651_inLine +BABEL_OP1_103_30638_20120928_141651_outLine +BABEL_OP1_103_30817_20120806_163759_inLine +BABEL_OP1_103_30817_20120806_163759_outLine +BABEL_OP1_103_31485_20120609_184729_inLine +BABEL_OP1_103_31485_20120609_184729_outLine +BABEL_OP1_103_33279_20120928_011938_inLine +BABEL_OP1_103_33279_20120928_011939_outLine +BABEL_OP1_103_37731_20120526_213340_inLine +BABEL_OP1_103_37731_20120526_213340_outLine +BABEL_OP1_103_39215_20121002_013230_inLine +BABEL_OP1_103_39215_20121002_013230_outLine +BABEL_OP1_103_39783_20121002_221911_inLine +BABEL_OP1_103_39783_20121002_221911_outLine +BABEL_OP1_103_42098_20121007_204200_inLine +BABEL_OP1_103_42098_20121007_204200_outLine +BABEL_OP1_103_44267_20121005_232936_inLine +BABEL_OP1_103_44267_20121005_232936_outLine +BABEL_OP1_103_44419_20121225_001833_inLine +BABEL_OP1_103_44419_20121225_001833_outLine +BABEL_OP1_103_44747_20121002_164108_inLine +BABEL_OP1_103_44747_20121002_164108_outLine +BABEL_OP1_103_46947_20120522_173213_inLine +BABEL_OP1_103_46947_20120522_173213_outLine +BABEL_OP1_103_47049_20120522_182020_inLine +BABEL_OP1_103_47049_20120522_182020_outLine +BABEL_OP1_103_47251_20120522_194654_inLine +BABEL_OP1_103_47251_20120522_194654_outLine +BABEL_OP1_103_48313_20120522_202903_inLine +BABEL_OP1_103_48313_20120522_202903_outLine +BABEL_OP1_103_48416_20120606_165040_inLine +BABEL_OP1_103_48416_20120606_165040_outLine +BABEL_OP1_103_48795_20120612_193506_inLine +BABEL_OP1_103_48795_20120612_193506_outLine +BABEL_OP1_103_49201_20120930_161546_inLine +BABEL_OP1_103_49201_20120930_161546_outLine +BABEL_OP1_103_49208_20120522_233157_inLine +BABEL_OP1_103_49208_20120522_233157_outLine +BABEL_OP1_103_49443_20120928_004643_inLine +BABEL_OP1_103_49443_20120928_004643_outLine +BABEL_OP1_103_49545_20120530_163034_inLine +BABEL_OP1_103_49545_20120530_163034_outLine +BABEL_OP1_103_49548_20120523_162625_inLine +BABEL_OP1_103_49548_20120523_162625_outLine +BABEL_OP1_103_49885_20121002_000523_inLine +BABEL_OP1_103_49885_20121002_000523_outLine +BABEL_OP1_103_51973_20120102_032210_inLine +BABEL_OP1_103_51973_20120102_032210_outLine +BABEL_OP1_103_51973_20120102_033759_inLine +BABEL_OP1_103_51973_20120102_033759_outLine +BABEL_OP1_103_53659_20120802_001534_inLine +BABEL_OP1_103_53659_20120802_001534_outLine +BABEL_OP1_103_54393_20120928_235549_inLine +BABEL_OP1_103_54393_20120928_235549_outLine +BABEL_OP1_103_55382_20120629_230445_inLine +BABEL_OP1_103_55382_20120629_230445_outLine +BABEL_OP1_103_56283_20121007_180739_inLine +BABEL_OP1_103_56283_20121007_180739_outLine +BABEL_OP1_103_57584_20120725_224449_inLine +BABEL_OP1_103_57584_20120725_224449_outLine +BABEL_OP1_103_58298_20120528_222416_inLine +BABEL_OP1_103_58298_20120528_222416_outLine +BABEL_OP1_103_59488_20120929_145848_inLine +BABEL_OP1_103_59488_20120929_145848_outLine +BABEL_OP1_103_59799_20121229_171348_inLine +BABEL_OP1_103_59799_20121229_171348_outLine +BABEL_OP1_103_60055_20120819_171855_inLine +BABEL_OP1_103_60055_20120819_171855_outLine +BABEL_OP1_103_60572_20120530_175437_inLine +BABEL_OP1_103_60572_20120530_175437_outLine +BABEL_OP1_103_60730_20120514_223932_inLine +BABEL_OP1_103_60730_20120514_223932_outLine +BABEL_OP1_103_61635_20120928_234720_inLine +BABEL_OP1_103_61635_20120928_234720_outLine +BABEL_OP1_103_61655_20120809_233557_inLine +BABEL_OP1_103_61655_20120809_233557_outLine +BABEL_OP1_103_62109_20120512_223919_inLine +BABEL_OP1_103_62109_20120512_223919_outLine +BABEL_OP1_103_63043_20121007_231348_inLine +BABEL_OP1_103_63043_20121007_231348_outLine +BABEL_OP1_103_63043_20121007_232702_inLine +BABEL_OP1_103_63043_20121007_232702_outLine +BABEL_OP1_103_63390_20120513_174652_inLine +BABEL_OP1_103_63390_20120513_174652_outLine +BABEL_OP1_103_63603_20121011_004426_inLine +BABEL_OP1_103_63603_20121011_004426_outLine +BABEL_OP1_103_63842_20121005_162812_inLine +BABEL_OP1_103_63842_20121005_162812_outLine +BABEL_OP1_103_63996_20120516_162255_inLine +BABEL_OP1_103_63996_20120516_162255_outLine +BABEL_OP1_103_64695_20120731_171306_inLine +BABEL_OP1_103_64695_20120731_171306_outLine +BABEL_OP1_103_66842_20120516_153359_inLine +BABEL_OP1_103_66842_20120516_153400_outLine +BABEL_OP1_103_66879_20120524_201608_inLine +BABEL_OP1_103_66879_20120524_201608_outLine +BABEL_OP1_103_68102_20120601_163256_inLine +BABEL_OP1_103_68102_20120601_163256_outLine +BABEL_OP1_103_68189_20120524_212606_inLine +BABEL_OP1_103_68189_20120524_212606_outLine +BABEL_OP1_103_68538_20120608_172925_inLine +BABEL_OP1_103_68538_20120608_172925_outLine +BABEL_OP1_103_68538_20120608_174508_inLine +BABEL_OP1_103_68538_20120608_174508_outLine +BABEL_OP1_103_71224_20121005_221009_inLine +BABEL_OP1_103_71224_20121005_221009_outLine +BABEL_OP1_103_71996_20120522_225024_inLine +BABEL_OP1_103_71996_20120522_225024_outLine +BABEL_OP1_103_72088_20121003_002504_inLine +BABEL_OP1_103_72088_20121003_002504_outLine +BABEL_OP1_103_75345_20121001_203932_inLine +BABEL_OP1_103_75345_20121001_203932_outLine +BABEL_OP1_103_76149_20121004_032258_inLine +BABEL_OP1_103_76149_20121004_032258_outLine +BABEL_OP1_103_76372_20120514_235628_inLine +BABEL_OP1_103_76372_20120514_235628_outLine +BABEL_OP1_103_76832_20120528_201751_inLine +BABEL_OP1_103_76832_20120528_201751_outLine +BABEL_OP1_103_77294_20120616_144707_inLine +BABEL_OP1_103_77294_20120616_144707_outLine +BABEL_OP1_103_78792_20120522_191207_inLine +BABEL_OP1_103_78792_20120522_191207_outLine +BABEL_OP1_103_78938_20120512_201016_inLine +BABEL_OP1_103_78938_20120512_201016_outLine +BABEL_OP1_103_79006_20120521_012957_outLine +BABEL_OP1_103_79387_20120522_211025_inLine +BABEL_OP1_103_79387_20120522_211025_outLine +BABEL_OP1_103_79989_20120928_013138_inLine +BABEL_OP1_103_79989_20120928_013138_outLine +BABEL_OP1_103_80679_20120930_163521_inLine +BABEL_OP1_103_80679_20120930_163521_outLine +BABEL_OP1_103_81492_20120206_014433_inLine +BABEL_OP1_103_81492_20120206_014433_outLine +BABEL_OP1_103_81492_20120206_020249_inLine +BABEL_OP1_103_81492_20120206_020249_outLine +BABEL_OP1_103_82181_20120929_042042_inLine +BABEL_OP1_103_82181_20120929_042042_outLine +BABEL_OP1_103_84111_20120930_144529_inLine +BABEL_OP1_103_84111_20120930_144529_outLine +BABEL_OP1_103_84946_20120619_234231_inLine +BABEL_OP1_103_84946_20120619_234231_outLine +BABEL_OP1_103_85272_20120531_172145_inLine +BABEL_OP1_103_85272_20120531_172145_outLine +BABEL_OP1_103_85388_20120512_131608_inLine +BABEL_OP1_103_85388_20120512_131608_outLine +BABEL_OP1_103_85443_20120512_163256_inLine +BABEL_OP1_103_85443_20120512_163256_outLine +BABEL_OP1_103_85443_20120512_164633_inLine +BABEL_OP1_103_85443_20120512_164633_outLine +BABEL_OP1_103_86067_20121008_031300_inLine +BABEL_OP1_103_86067_20121008_031300_outLine +BABEL_OP1_103_86121_20121011_040043_inLine +BABEL_OP1_103_86121_20121011_040043_outLine +BABEL_OP1_103_87741_20121231_225715_inLine +BABEL_OP1_103_87741_20121231_225715_outLine +BABEL_OP1_103_89091_20130104_015514_inLine +BABEL_OP1_103_89091_20130104_015514_outLine +BABEL_OP1_103_89091_20130104_032531_inLine +BABEL_OP1_103_89091_20130104_032531_outLine +BABEL_OP1_103_89190_20130106_003028_inLine +BABEL_OP1_103_89190_20130106_003028_outLine +BABEL_OP1_103_90326_20120522_175819_inLine +BABEL_OP1_103_90326_20120522_175819_outLine +BABEL_OP1_103_90672_20121225_182001_inLine +BABEL_OP1_103_90672_20121225_182001_outLine +BABEL_OP1_103_91105_20120516_141445_inLine +BABEL_OP1_103_91105_20120516_141445_outLine +BABEL_OP1_103_91670_20121225_195825_inLine +BABEL_OP1_103_91670_20121225_195825_outLine +BABEL_OP1_103_91723_20121226_011745_inLine +BABEL_OP1_103_91723_20121226_011745_outLine +BABEL_OP1_103_91733_20121227_001726_inLine +BABEL_OP1_103_91733_20121227_001726_outLine +BABEL_OP1_103_91744_20121227_005513_inLine +BABEL_OP1_103_91744_20121227_005513_outLine +BABEL_OP1_103_91815_20121230_215316_inLine +BABEL_OP1_103_91815_20121230_215316_outLine +BABEL_OP1_103_91838_20121230_210441_inLine +BABEL_OP1_103_91838_20121230_210441_outLine +BABEL_OP1_103_91957_20130103_192518_inLine +BABEL_OP1_103_91957_20130103_192518_outLine +BABEL_OP1_103_92027_20130104_160934_inLine +BABEL_OP1_103_92027_20130104_160934_outLine +BABEL_OP1_103_92083_20130105_170057_inLine +BABEL_OP1_103_92083_20130105_170057_outLine +BABEL_OP1_103_92192_20130105_180415_inLine +BABEL_OP1_103_92192_20130105_180415_outLine +BABEL_OP1_103_92277_20130105_173147_inLine +BABEL_OP1_103_92277_20130105_173147_outLine +BABEL_OP1_103_93600_20120812_151245_inLine +BABEL_OP1_103_93600_20120812_151245_outLine +BABEL_OP1_103_94057_20130101_231512_inLine +BABEL_OP1_103_94057_20130101_231513_outLine +BABEL_OP1_103_94065_20130105_015217_inLine +BABEL_OP1_103_94065_20130105_015217_outLine +BABEL_OP1_103_94069_20130101_234436_inLine +BABEL_OP1_103_94069_20130101_234436_outLine +BABEL_OP1_103_96844_20121224_193654_inLine +BABEL_OP1_103_96844_20121224_193654_outLine +BABEL_OP1_103_96868_20120528_161710_inLine +BABEL_OP1_103_96868_20120528_161710_outLine +BABEL_OP1_103_97289_20120806_174807_inLine +BABEL_OP1_103_97289_20120806_174807_outLine +BABEL_OP1_103_98325_20120805_170336_inLine +BABEL_OP1_103_98325_20120805_170336_outLine +BABEL_OP1_103_99446_20120523_164823_inLine +BABEL_OP1_103_99446_20120523_164823_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/dev.list b/egs/babel/s5d/conf/lists/104-pashto/dev.list new file mode 100644 index 00000000000..7624d5decb2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/dev.list @@ -0,0 +1,143 @@ +BABEL_BP_104_04221_20120310_194031_inLine +BABEL_BP_104_04221_20120310_194031_outLine +BABEL_BP_104_08861_20120226_050237_inLine +BABEL_BP_104_08861_20120226_050237_outLine +BABEL_BP_104_10712_20120205_004135_inLine +BABEL_BP_104_10712_20120205_004135_outLine +BABEL_BP_104_10712_20120205_005332_inLine +BABEL_BP_104_10712_20120205_005332_outLine +BABEL_BP_104_13196_20120130_151929_inLine +BABEL_BP_104_13196_20120130_151929_outLine +BABEL_BP_104_14002_20120218_235147_inLine +BABEL_BP_104_14002_20120218_235147_outLine +BABEL_BP_104_15268_20120110_154803_inLine +BABEL_BP_104_15268_20120110_154803_outLine +BABEL_BP_104_15268_20120110_154803_outLine +BABEL_BP_104_16210_20111223_035614_inLine +BABEL_BP_104_16210_20111223_041103_inLine +BABEL_BP_104_17749_20120314_233247_inLine +BABEL_BP_104_17749_20120314_233247_outLine +BABEL_BP_104_21113_20120319_010218_inLine +BABEL_BP_104_22338_20120128_204829_inLine +BABEL_BP_104_22713_20120205_170953_inLine +BABEL_BP_104_23560_20120124_200340_inLine +BABEL_BP_104_28102_20120326_164501_inLine +BABEL_BP_104_28102_20120326_164501_outLine +BABEL_BP_104_28102_20120326_171523_inLine +BABEL_BP_104_28102_20120326_171523_outLine +BABEL_BP_104_29368_20120321_233801_inLine +BABEL_BP_104_29368_20120321_233802_outLine +BABEL_BP_104_29368_20120321_235133_inLine +BABEL_BP_104_29368_20120321_235133_outLine +BABEL_BP_104_33955_20120218_033644_inLine +BABEL_BP_104_33955_20120218_033644_outLine +BABEL_BP_104_34541_20120321_005610_inLine +BABEL_BP_104_34541_20120321_005610_outLine +BABEL_BP_104_35756_20120311_223543_inLine +BABEL_BP_104_35756_20120311_223543_outLine +BABEL_BP_104_36867_20120208_233318_inLine +BABEL_BP_104_36867_20120208_233318_outLine +BABEL_BP_104_37314_20120208_184924_inLine +BABEL_BP_104_37314_20120208_184924_outLine +BABEL_BP_104_39030_20120119_225755_outLine +BABEL_BP_104_39279_20120227_144602_inLine +BABEL_BP_104_39279_20120227_144602_outLine +BABEL_BP_104_40410_20120320_225202_inLine +BABEL_BP_104_40410_20120320_225202_inLine +BABEL_BP_104_40410_20120320_225202_outLine +BABEL_BP_104_40475_20120205_221544_inLine +BABEL_BP_104_40956_20120310_235812_inLine +BABEL_BP_104_40956_20120310_235812_outLine +BABEL_BP_104_43170_20120205_035143_inLine +BABEL_BP_104_44838_20120324_232540_inLine +BABEL_BP_104_44838_20120324_232540_outLine +BABEL_BP_104_53864_20120203_213736_outLine +BABEL_BP_104_54222_20120309_160035_inLine +BABEL_BP_104_54222_20120309_160035_outLine +BABEL_BP_104_56005_20120113_205235_outLine +BABEL_BP_104_56226_20120205_235429_outLine +BABEL_BP_104_60524_20120319_160420_inLine +BABEL_BP_104_60524_20120319_160420_outLine +BABEL_BP_104_60524_20120319_161719_inLine +BABEL_BP_104_60524_20120319_161719_outLine +BABEL_BP_104_61592_20120126_181735_inLine +BABEL_BP_104_61592_20120126_181735_outLine +BABEL_BP_104_61616_20120108_214701_inLine +BABEL_BP_104_61616_20120108_214701_outLine +BABEL_BP_104_62984_20120219_053758_inLine +BABEL_BP_104_62984_20120219_053758_outLine +BABEL_BP_104_64610_20120302_153346_inLine +BABEL_BP_104_64610_20120302_153346_outLine +BABEL_BP_104_66017_20120215_233406_inLine +BABEL_BP_104_66017_20120215_233406_outLine +BABEL_BP_104_70476_20120309_130456_inLine +BABEL_BP_104_70476_20120309_130456_outLine +BABEL_BP_104_72176_20120213_194841_inLine +BABEL_BP_104_72176_20120213_194841_outLine +BABEL_BP_104_73728_20111222_192324_inLine +BABEL_BP_104_73728_20111222_192324_outLine +BABEL_BP_104_74678_20120314_021415_inLine +BABEL_BP_104_74678_20120314_021415_outLine +BABEL_BP_104_74824_20120218_204154_inLine +BABEL_BP_104_74824_20120218_204154_outLine +BABEL_BP_104_75839_20120208_035003_inLine +BABEL_BP_104_75839_20120208_035003_outLine +BABEL_BP_104_76654_20111220_202441_inLine +BABEL_BP_104_76812_20120320_180439_inLine +BABEL_BP_104_76812_20120320_180439_outLine +BABEL_BP_104_76812_20120320_181229_inLine +BABEL_BP_104_76812_20120320_181229_outLine +BABEL_BP_104_78141_20120317_034317_inLine +BABEL_BP_104_78141_20120317_034317_outLine +BABEL_BP_104_81274_20120207_202722_inLine +BABEL_BP_104_81510_20120217_194417_inLine +BABEL_BP_104_81510_20120217_194417_inLine +BABEL_BP_104_82160_20120126_022907_inLine +BABEL_BP_104_82160_20120126_022907_inLine +BABEL_BP_104_82160_20120126_022907_outLine +BABEL_BP_104_83980_20120205_184505_inLine +BABEL_BP_104_83980_20120205_184505_inLine +BABEL_BP_104_83992_20120219_185819_inLine +BABEL_BP_104_83992_20120219_185819_outLine +BABEL_BP_104_84041_20111222_044010_inLine +BABEL_BP_104_84041_20111222_044010_outLine +BABEL_BP_104_84274_20120216_161121_inLine +BABEL_BP_104_84274_20120216_161121_outLine +BABEL_BP_104_85078_20120320_212106_inLine +BABEL_BP_104_85078_20120320_212106_outLine +BABEL_BP_104_85424_20120216_025024_inLine +BABEL_BP_104_85424_20120216_025024_outLine +BABEL_BP_104_85455_20120310_210107_inLine +BABEL_BP_104_85455_20120310_210107_outLine +BABEL_BP_104_85730_20120128_041419_inLine +BABEL_BP_104_85730_20120128_041419_outLine +BABEL_BP_104_85730_20120128_041419_outLine +BABEL_BP_104_86614_20111222_040726_inLine +BABEL_BP_104_86614_20111222_040726_outLine +BABEL_BP_104_86680_20120309_180429_inLine +BABEL_BP_104_86680_20120309_180429_outLine +BABEL_BP_104_86680_20120309_181746_inLine +BABEL_BP_104_86680_20120309_181746_outLine +BABEL_BP_104_86680_20120309_181746_outLine +BABEL_BP_104_87723_20120206_183706_inLine +BABEL_BP_104_87723_20120206_183706_outLine +BABEL_BP_104_88598_20120216_014512_inLine +BABEL_BP_104_88598_20120216_014512_outLine +BABEL_BP_104_88598_20120216_022402_inLine +BABEL_BP_104_88598_20120216_022402_outLine +BABEL_BP_104_89308_20120131_214111_inLine +BABEL_BP_104_89308_20120131_214111_outLine +BABEL_BP_104_89382_20120207_192751_inLine +BABEL_BP_104_89382_20120207_192751_outLine +BABEL_BP_104_90003_20120127_173210_inLine +BABEL_BP_104_91275_20120219_055247_outLine +BABEL_BP_104_91372_20120309_201355_inLine +BABEL_BP_104_91372_20120309_201355_outLine +BABEL_BP_104_93026_20120121_010508_inLine +BABEL_BP_104_93026_20120121_010508_outLine +BABEL_BP_104_94682_20120126_173632_outLine +BABEL_BP_104_96606_20120308_154908_inLine +BABEL_BP_104_96606_20120308_154908_outLine +BABEL_BP_104_97950_20120129_035347_inLine +BABEL_BP_104_99407_20120217_190330_inLine +BABEL_BP_104_99407_20120217_190330_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/eval.list b/egs/babel/s5d/conf/lists/104-pashto/eval.list new file mode 100644 index 00000000000..f3b4a90b6e6 --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/eval.list @@ -0,0 +1,198 @@ +BABEL_BP_104_01275_20120316_230646_inLine +BABEL_BP_104_01275_20120316_230646_outLine +BABEL_BP_104_01275_20120316_231711_inLine +BABEL_BP_104_01275_20120316_231711_outLine +BABEL_BP_104_03053_20120129_025619_inLine +BABEL_BP_104_03053_20120129_025619_outLine +BABEL_BP_104_03053_20120129_030931_inLine +BABEL_BP_104_03053_20120129_030931_outLine +BABEL_BP_104_10348_20120313_005811_inLine +BABEL_BP_104_10348_20120313_005811_outLine +BABEL_BP_104_10494_20120219_173118_inLine +BABEL_BP_104_10494_20120219_173118_outLine +BABEL_BP_104_11386_20120501_221559_inLine +BABEL_BP_104_11386_20120501_221559_outLine +BABEL_BP_104_11894_20120120_154648_inLine +BABEL_BP_104_11894_20120120_154648_outLine +BABEL_BP_104_12719_20120309_214313_inLine +BABEL_BP_104_12722_20120409_210032_inLine +BABEL_BP_104_12722_20120409_210033_outLine +BABEL_BP_104_15269_20120508_173455_inLine +BABEL_BP_104_15997_20120212_170900_inLine +BABEL_BP_104_15997_20120212_170900_outLine +BABEL_BP_104_16352_20120206_004350_inLine +BABEL_BP_104_16352_20120206_004350_outLine +BABEL_BP_104_16629_20120501_184857_inLine +BABEL_BP_104_17218_20120206_041300_inLine +BABEL_BP_104_17218_20120206_041301_outLine +BABEL_BP_104_18358_20120525_224141_inLine +BABEL_BP_104_19112_20120316_145312_inLine +BABEL_BP_104_19112_20120316_145312_outLine +BABEL_BP_104_19686_20120518_175511_inLine +BABEL_BP_104_19760_20120501_194354_inLine +BABEL_BP_104_20157_20120122_171556_inLine +BABEL_BP_104_20157_20120122_171556_outLine +BABEL_BP_104_21180_20120216_024537_inLine +BABEL_BP_104_21180_20120216_024537_outLine +BABEL_BP_104_22422_20120213_143323_inLine +BABEL_BP_104_22422_20120213_143323_outLine +BABEL_BP_104_24378_20120322_201121_inLine +BABEL_BP_104_24378_20120322_201121_outLine +BABEL_BP_104_25365_20120525_172149_inLine +BABEL_BP_104_27891_20120331_012612_inLine +BABEL_BP_104_27891_20120331_012612_outLine +BABEL_BP_104_28573_20120322_135901_inLine +BABEL_BP_104_28573_20120322_135901_outLine +BABEL_BP_104_29083_20120128_011719_inLine +BABEL_BP_104_29083_20120128_011719_outLine +BABEL_BP_104_30978_20120309_013805_inLine +BABEL_BP_104_30978_20120309_013805_outLine +BABEL_BP_104_32245_20120115_200120_outLine +BABEL_BP_104_32669_20120223_164026_inLine +BABEL_BP_104_32669_20120223_164026_outLine +BABEL_BP_104_32708_20120518_190441_inLine +BABEL_BP_104_33429_20120303_015431_inLine +BABEL_BP_104_33429_20120303_015431_outLine +BABEL_BP_104_34137_20120219_183642_inLine +BABEL_BP_104_34137_20120219_183642_outLine +BABEL_BP_104_35317_20120208_173659_inLine +BABEL_BP_104_35317_20120208_173659_outLine +BABEL_BP_104_35764_20120518_193509_inLine +BABEL_BP_104_36227_20120211_165128_inLine +BABEL_BP_104_36227_20120211_165128_outLine +BABEL_BP_104_36227_20120211_181406_inLine +BABEL_BP_104_36227_20120211_181406_outLine +BABEL_BP_104_38954_20120316_173708_inLine +BABEL_BP_104_38954_20120316_173708_outLine +BABEL_BP_104_39577_20120321_011346_inLine +BABEL_BP_104_39577_20120321_011346_outLine +BABEL_BP_104_39696_20120218_034224_inLine +BABEL_BP_104_39696_20120218_034224_outLine +BABEL_BP_104_40445_20120314_225446_inLine +BABEL_BP_104_40445_20120314_225446_outLine +BABEL_BP_104_41545_20120317_151247_inLine +BABEL_BP_104_41545_20120317_151247_outLine +BABEL_BP_104_42397_20120219_050708_inLine +BABEL_BP_104_42397_20120219_050708_outLine +BABEL_BP_104_42427_20120229_145052_inLine +BABEL_BP_104_42427_20120229_145052_outLine +BABEL_BP_104_42728_20120204_220817_inLine +BABEL_BP_104_42728_20120204_220817_outLine +BABEL_BP_104_42730_20120229_010941_inLine +BABEL_BP_104_42730_20120229_010941_outLine +BABEL_BP_104_44792_20120210_023955_inLine +BABEL_BP_104_46216_20120207_194728_inLine +BABEL_BP_104_46216_20120207_194728_outLine +BABEL_BP_104_46862_20120316_155735_inLine +BABEL_BP_104_46862_20120316_155735_outLine +BABEL_BP_104_47771_20120519_163449_inLine +BABEL_BP_104_48518_20120219_154144_inLine +BABEL_BP_104_48518_20120219_154144_outLine +BABEL_BP_104_49662_20120518_205502_inLine +BABEL_BP_104_52533_20120310_204257_inLine +BABEL_BP_104_52533_20120310_204257_outLine +BABEL_BP_104_54646_20120119_215025_inLine +BABEL_BP_104_54646_20120119_215025_outLine +BABEL_BP_104_55043_20120316_021531_inLine +BABEL_BP_104_55043_20120316_021531_outLine +BABEL_BP_104_56605_20120220_012855_inLine +BABEL_BP_104_56605_20120220_012855_outLine +BABEL_BP_104_58283_20111227_182227_inLine +BABEL_BP_104_58283_20111227_182227_outLine +BABEL_BP_104_59121_20120120_170101_inLine +BABEL_BP_104_59121_20120120_170101_outLine +BABEL_BP_104_60055_20120120_151813_inLine +BABEL_BP_104_60055_20120120_151813_outLine +BABEL_BP_104_60523_20120303_012610_inLine +BABEL_BP_104_60523_20120303_012610_outLine +BABEL_BP_104_61400_20120518_194526_inLine +BABEL_BP_104_61755_20120518_180255_inLine +BABEL_BP_104_61786_20120216_204511_inLine +BABEL_BP_104_61786_20120216_204511_outLine +BABEL_BP_104_64198_20120219_231453_inLine +BABEL_BP_104_64198_20120219_231453_outLine +BABEL_BP_104_65668_20120203_175644_inLine +BABEL_BP_104_65668_20120203_175644_outLine +BABEL_BP_104_66153_20120212_161723_inLine +BABEL_BP_104_66153_20120212_161724_outLine +BABEL_BP_104_66842_20120126_174251_inLine +BABEL_BP_104_66842_20120126_174251_outLine +BABEL_BP_104_66847_20120308_230422_inLine +BABEL_BP_104_66847_20120308_230422_outLine +BABEL_BP_104_68538_20120314_231228_inLine +BABEL_BP_104_68538_20120314_231228_outLine +BABEL_BP_104_69336_20120201_211015_inLine +BABEL_BP_104_69336_20120201_211015_outLine +BABEL_BP_104_69336_20120201_213613_inLine +BABEL_BP_104_69336_20120201_213613_outLine +BABEL_BP_104_69728_20120129_180746_inLine +BABEL_BP_104_69728_20120129_180746_outLine +BABEL_BP_104_71284_20111228_210355_inLine +BABEL_BP_104_71284_20111228_210355_outLine +BABEL_BP_104_71284_20111228_215349_inLine +BABEL_BP_104_71284_20111228_215349_outLine +BABEL_BP_104_71925_20120309_151315_inLine +BABEL_BP_104_71925_20120309_151315_outLine +BABEL_BP_104_75869_20111220_204852_inLine +BABEL_BP_104_75869_20111220_204852_outLine +BABEL_BP_104_77082_20120109_183551_inLine +BABEL_BP_104_77082_20120109_183551_outLine +BABEL_BP_104_77290_20120403_023516_inLine +BABEL_BP_104_77290_20120403_023516_outLine +BABEL_BP_104_77621_20120517_225556_inLine +BABEL_BP_104_77737_20120320_204452_inLine +BABEL_BP_104_77737_20120320_204452_outLine +BABEL_BP_104_78298_20120308_204105_inLine +BABEL_BP_104_78298_20120308_204105_outLine +BABEL_BP_104_80644_20120222_222458_inLine +BABEL_BP_104_80644_20120222_222458_outLine +BABEL_BP_104_83327_20120217_233846_inLine +BABEL_BP_104_83327_20120217_233846_outLine +BABEL_BP_104_83782_20120519_153147_inLine +BABEL_BP_104_84398_20120219_052212_inLine +BABEL_BP_104_84398_20120219_052212_outLine +BABEL_BP_104_85897_20120221_033320_inLine +BABEL_BP_104_85897_20120221_033320_outLine +BABEL_BP_104_86231_20120224_065736_inLine +BABEL_BP_104_86231_20120224_065736_outLine +BABEL_BP_104_86793_20120309_185403_inLine +BABEL_BP_104_86793_20120309_185403_outLine +BABEL_BP_104_86873_20120519_160538_inLine +BABEL_BP_104_87124_20120315_000929_inLine +BABEL_BP_104_87124_20120315_000929_outLine +BABEL_BP_104_87734_20120117_154033_inLine +BABEL_BP_104_87734_20120117_154033_outLine +BABEL_BP_104_89463_20111225_195251_inLine +BABEL_BP_104_89463_20111225_195251_outLine +BABEL_BP_104_89702_20120318_005220_inLine +BABEL_BP_104_89702_20120318_005220_outLine +BABEL_BP_104_89851_20120322_183302_inLine +BABEL_BP_104_89851_20120322_183302_outLine +BABEL_BP_104_89851_20120322_194407_inLine +BABEL_BP_104_89851_20120322_194407_outLine +BABEL_BP_104_90758_20120315_015433_inLine +BABEL_BP_104_90758_20120315_015433_outLine +BABEL_BP_104_91105_20120501_195037_inLine +BABEL_BP_104_92247_20120220_023207_inLine +BABEL_BP_104_92247_20120220_023207_outLine +BABEL_BP_104_92721_20120401_235515_inLine +BABEL_BP_104_92721_20120401_235515_outLine +BABEL_BP_104_92721_20120402_000651_inLine +BABEL_BP_104_92721_20120402_000651_outLine +BABEL_BP_104_93180_20111223_033642_inLine +BABEL_BP_104_93180_20111223_033642_outLine +BABEL_BP_104_93742_20120308_233140_inLine +BABEL_BP_104_93742_20120308_233140_outLine +BABEL_BP_104_93748_20120316_223342_inLine +BABEL_BP_104_93748_20120316_223342_outLine +BABEL_BP_104_94934_20120525_175309_inLine +BABEL_BP_104_96186_20120320_210010_inLine +BABEL_BP_104_96186_20120320_210010_outLine +BABEL_BP_104_96868_20120326_145653_inLine +BABEL_BP_104_96868_20120326_145653_outLine +BABEL_BP_104_97574_20120228_161829_inLine +BABEL_BP_104_98271_20120110_010959_inLine +BABEL_BP_104_98271_20120110_010959_outLine +BABEL_BP_104_98420_20120507_174842_inLine +BABEL_BP_104_99428_20120211_174655_inLine +BABEL_BP_104_99428_20120211_174655_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/evalpart1.list b/egs/babel/s5d/conf/lists/104-pashto/evalpart1.list new file mode 100644 index 00000000000..2cf59b81f00 --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/evalpart1.list @@ -0,0 +1,70 @@ +BABEL_BP_104_11894_20120120_154648_inLine +BABEL_BP_104_11894_20120120_154648_outLine +BABEL_BP_104_12722_20120409_210032_inLine +BABEL_BP_104_12722_20120409_210033_outLine +BABEL_BP_104_16352_20120206_004350_inLine +BABEL_BP_104_16352_20120206_004350_outLine +BABEL_BP_104_20157_20120122_171556_inLine +BABEL_BP_104_20157_20120122_171556_outLine +BABEL_BP_104_21180_20120216_024537_inLine +BABEL_BP_104_21180_20120216_024537_outLine +BABEL_BP_104_24378_20120322_201121_inLine +BABEL_BP_104_24378_20120322_201121_outLine +BABEL_BP_104_27891_20120331_012612_inLine +BABEL_BP_104_27891_20120331_012612_outLine +BABEL_BP_104_28573_20120322_135901_inLine +BABEL_BP_104_28573_20120322_135901_outLine +BABEL_BP_104_32669_20120223_164026_inLine +BABEL_BP_104_32669_20120223_164026_outLine +BABEL_BP_104_34137_20120219_183642_inLine +BABEL_BP_104_34137_20120219_183642_outLine +BABEL_BP_104_35317_20120208_173659_inLine +BABEL_BP_104_35317_20120208_173659_outLine +BABEL_BP_104_36227_20120211_165128_inLine +BABEL_BP_104_36227_20120211_165128_outLine +BABEL_BP_104_36227_20120211_181406_inLine +BABEL_BP_104_36227_20120211_181406_outLine +BABEL_BP_104_39577_20120321_011346_inLine +BABEL_BP_104_39577_20120321_011346_outLine +BABEL_BP_104_39696_20120218_034224_inLine +BABEL_BP_104_39696_20120218_034224_outLine +BABEL_BP_104_42427_20120229_145052_inLine +BABEL_BP_104_42427_20120229_145052_outLine +BABEL_BP_104_48518_20120219_154144_inLine +BABEL_BP_104_48518_20120219_154144_outLine +BABEL_BP_104_52533_20120310_204257_inLine +BABEL_BP_104_52533_20120310_204257_outLine +BABEL_BP_104_54646_20120119_215025_inLine +BABEL_BP_104_54646_20120119_215025_outLine +BABEL_BP_104_66153_20120212_161723_inLine +BABEL_BP_104_66153_20120212_161724_outLine +BABEL_BP_104_69336_20120201_211015_inLine +BABEL_BP_104_69336_20120201_211015_outLine +BABEL_BP_104_69336_20120201_213613_inLine +BABEL_BP_104_69336_20120201_213613_outLine +BABEL_BP_104_75869_20111220_204852_inLine +BABEL_BP_104_75869_20111220_204852_outLine +BABEL_BP_104_77082_20120109_183551_inLine +BABEL_BP_104_77082_20120109_183551_outLine +BABEL_BP_104_78298_20120308_204105_inLine +BABEL_BP_104_78298_20120308_204105_outLine +BABEL_BP_104_85897_20120221_033320_inLine +BABEL_BP_104_85897_20120221_033320_outLine +BABEL_BP_104_86793_20120309_185403_inLine +BABEL_BP_104_86793_20120309_185403_outLine +BABEL_BP_104_87124_20120315_000929_inLine +BABEL_BP_104_87124_20120315_000929_outLine +BABEL_BP_104_89851_20120322_183302_inLine +BABEL_BP_104_89851_20120322_183302_outLine +BABEL_BP_104_89851_20120322_194407_inLine +BABEL_BP_104_89851_20120322_194407_outLine +BABEL_BP_104_92721_20120401_235515_inLine +BABEL_BP_104_92721_20120401_235515_outLine +BABEL_BP_104_92721_20120402_000651_inLine +BABEL_BP_104_92721_20120402_000651_outLine +BABEL_BP_104_93748_20120316_223342_inLine +BABEL_BP_104_93748_20120316_223342_outLine +BABEL_BP_104_96868_20120326_145653_inLine +BABEL_BP_104_96868_20120326_145653_outLine +BABEL_BP_104_99428_20120211_174655_inLine +BABEL_BP_104_99428_20120211_174655_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/train.40HrFLP.list b/egs/babel/s5d/conf/lists/104-pashto/train.40HrFLP.list new file mode 100644 index 00000000000..9aefcaef2bb --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/train.40HrFLP.list @@ -0,0 +1,512 @@ +BABEL_BP_104_03770_20120109_014606_inLine +BABEL_BP_104_03770_20120109_014606_outLine +BABEL_BP_104_08036_20111220_013826_inLine +BABEL_BP_104_08139_20120126_021604_inLine +BABEL_BP_104_10193_20120213_031930_inLine +BABEL_BP_104_10193_20120213_031930_outLine +BABEL_BP_104_10289_20120128_035330_inLine +BABEL_BP_104_10289_20120128_035330_outLine +BABEL_BP_104_10642_20120321_210945_outLine +BABEL_BP_104_10911_20111222_025120_inLine +BABEL_BP_104_10911_20111222_025120_outLine +BABEL_BP_104_11146_20120224_000248_inLine +BABEL_BP_104_11146_20120224_000248_outLine +BABEL_BP_104_11153_20120108_191820_inLine +BABEL_BP_104_11153_20120108_191820_outLine +BABEL_BP_104_11202_20120213_235334_inLine +BABEL_BP_104_11202_20120213_235334_outLine +BABEL_BP_104_11442_20120218_234445_inLine +BABEL_BP_104_11442_20120218_234445_outLine +BABEL_BP_104_11647_20120315_022645_inLine +BABEL_BP_104_11647_20120315_022645_outLine +BABEL_BP_104_12562_20120307_152654_inLine +BABEL_BP_104_12682_20120223_031401_inLine +BABEL_BP_104_13064_20120220_040256_inLine +BABEL_BP_104_13064_20120220_040256_outLine +BABEL_BP_104_13189_20120112_020041_inLine +BABEL_BP_104_13189_20120112_020041_outLine +BABEL_BP_104_13456_20120111_024843_outLine +BABEL_BP_104_13694_20120321_001123_outLine +BABEL_BP_104_13798_20120105_221125_inLine +BABEL_BP_104_13798_20120105_221125_outLine +BABEL_BP_104_13952_20120126_185217_inLine +BABEL_BP_104_13952_20120126_185217_outLine +BABEL_BP_104_14147_20120320_003436_inLine +BABEL_BP_104_14147_20120320_003436_outLine +BABEL_BP_104_14225_20120331_015956_inLine +BABEL_BP_104_14225_20120331_020908_inLine +BABEL_BP_104_14527_20120207_235446_inLine +BABEL_BP_104_14527_20120207_235446_outLine +BABEL_BP_104_14927_20111224_041309_inLine +BABEL_BP_104_14927_20111224_041309_outLine +BABEL_BP_104_15324_20120126_222036_inLine +BABEL_BP_104_15324_20120126_222036_outLine +BABEL_BP_104_15324_20120127_023323_inLine +BABEL_BP_104_15324_20120127_023323_outLine +BABEL_BP_104_15377_20120322_045329_inLine +BABEL_BP_104_15415_20120219_181352_inLine +BABEL_BP_104_15861_20120401_024411_inLine +BABEL_BP_104_15949_20120229_140434_inLine +BABEL_BP_104_15949_20120229_140434_outLine +BABEL_BP_104_16074_20120129_041107_inLine +BABEL_BP_104_16074_20120129_041107_outLine +BABEL_BP_104_16290_20120220_200234_inLine +BABEL_BP_104_16290_20120220_200234_outLine +BABEL_BP_104_16339_20120131_184255_inLine +BABEL_BP_104_16350_20120315_043233_outLine +BABEL_BP_104_16416_20120317_205531_inLine +BABEL_BP_104_16416_20120317_205531_outLine +BABEL_BP_104_16416_20120317_211129_inLine +BABEL_BP_104_16416_20120317_211129_outLine +BABEL_BP_104_16633_20120311_053635_inLine +BABEL_BP_104_16633_20120311_053635_outLine +BABEL_BP_104_17081_20120128_030343_inLine +BABEL_BP_104_17081_20120128_030343_outLine +BABEL_BP_104_17180_20120321_215255_inLine +BABEL_BP_104_17180_20120321_215255_outLine +BABEL_BP_104_17216_20120128_015245_inLine +BABEL_BP_104_17216_20120128_020324_inLine +BABEL_BP_104_17319_20111225_210159_inLine +BABEL_BP_104_17319_20111225_210159_outLine +BABEL_BP_104_17429_20120209_024521_inLine +BABEL_BP_104_17429_20120209_024521_outLine +BABEL_BP_104_17904_20120320_014817_inLine +BABEL_BP_104_17904_20120320_014817_outLine +BABEL_BP_104_18084_20111230_210850_outLine +BABEL_BP_104_18537_20120130_181101_inLine +BABEL_BP_104_18537_20120130_181101_outLine +BABEL_BP_104_18667_20120208_175014_inLine +BABEL_BP_104_18667_20120208_175014_outLine +BABEL_BP_104_19044_20120218_182247_outLine +BABEL_BP_104_19044_20120218_183017_outLine +BABEL_BP_104_19044_20120218_183849_outLine +BABEL_BP_104_19324_20120310_192849_inLine +BABEL_BP_104_19324_20120310_192849_outLine +BABEL_BP_104_19481_20120207_235626_inLine +BABEL_BP_104_19481_20120207_235626_outLine +BABEL_BP_104_20016_20120206_215156_inLine +BABEL_BP_104_20016_20120206_215156_outLine +BABEL_BP_104_21256_20120217_202248_inLine +BABEL_BP_104_21256_20120217_202248_outLine +BABEL_BP_104_21928_20120204_212612_inLine +BABEL_BP_104_21928_20120204_212612_outLine +BABEL_BP_104_21968_20120131_180237_inLine +BABEL_BP_104_21968_20120131_180237_outLine +BABEL_BP_104_22548_20120125_211519_inLine +BABEL_BP_104_22590_20120209_224232_inLine +BABEL_BP_104_22590_20120209_224232_outLine +BABEL_BP_104_23381_20120216_161115_outLine +BABEL_BP_104_24235_20120209_030431_outLine +BABEL_BP_104_24585_20120117_225722_inLine +BABEL_BP_104_24585_20120117_225722_outLine +BABEL_BP_104_24735_20120316_221529_inLine +BABEL_BP_104_24735_20120316_221529_outLine +BABEL_BP_104_24750_20120130_183131_inLine +BABEL_BP_104_24750_20120130_183131_outLine +BABEL_BP_104_24810_20120319_165838_outLine +BABEL_BP_104_25015_20120216_005135_inLine +BABEL_BP_104_25015_20120216_005135_outLine +BABEL_BP_104_25525_20120316_140847_outLine +BABEL_BP_104_25911_20111222_051549_inLine +BABEL_BP_104_25911_20111222_051549_outLine +BABEL_BP_104_26946_20120130_034221_outLine +BABEL_BP_104_27298_20111225_192028_inLine +BABEL_BP_104_27298_20111225_192028_outLine +BABEL_BP_104_28289_20120310_202856_inLine +BABEL_BP_104_28289_20120310_202856_outLine +BABEL_BP_104_28330_20120306_194033_inLine +BABEL_BP_104_28330_20120306_195756_inLine +BABEL_BP_104_28734_20120126_205422_inLine +BABEL_BP_104_28734_20120126_212950_inLine +BABEL_BP_104_29009_20120319_164025_outLine +BABEL_BP_104_29967_20120208_201355_inLine +BABEL_BP_104_29967_20120208_201355_outLine +BABEL_BP_104_30143_20111227_132440_inLine +BABEL_BP_104_30271_20120205_163755_inLine +BABEL_BP_104_30271_20120205_165111_inLine +BABEL_BP_104_30628_20120219_182744_inLine +BABEL_BP_104_30628_20120219_182744_outLine +BABEL_BP_104_30848_20120204_154057_inLine +BABEL_BP_104_30848_20120204_154058_outLine +BABEL_BP_104_31663_20120210_140419_inLine +BABEL_BP_104_31663_20120210_140419_outLine +BABEL_BP_104_31919_20120405_023221_inLine +BABEL_BP_104_31926_20120319_040036_outLine +BABEL_BP_104_32956_20120221_133851_inLine +BABEL_BP_104_32956_20120221_133851_outLine +BABEL_BP_104_33337_20120220_005047_inLine +BABEL_BP_104_33337_20120220_005047_outLine +BABEL_BP_104_33846_20120123_194027_inLine +BABEL_BP_104_34164_20120221_141502_inLine +BABEL_BP_104_34164_20120221_141502_outLine +BABEL_BP_104_34188_20120219_000455_inLine +BABEL_BP_104_34188_20120219_000455_outLine +BABEL_BP_104_34335_20111225_224055_outLine +BABEL_BP_104_34833_20120215_025837_inLine +BABEL_BP_104_34833_20120215_025837_outLine +BABEL_BP_104_34994_20120314_001810_outLine +BABEL_BP_104_34994_20120314_003701_outLine +BABEL_BP_104_35073_20120208_223917_outLine +BABEL_BP_104_35444_20120310_190608_inLine +BABEL_BP_104_35444_20120310_190608_outLine +BABEL_BP_104_35646_20120202_222418_inLine +BABEL_BP_104_35646_20120202_222418_outLine +BABEL_BP_104_35874_20120403_213324_inLine +BABEL_BP_104_35916_20120204_030147_inLine +BABEL_BP_104_35916_20120204_030147_outLine +BABEL_BP_104_35923_20120216_021137_inLine +BABEL_BP_104_35923_20120216_021137_outLine +BABEL_BP_104_36138_20120206_210519_inLine +BABEL_BP_104_36138_20120206_210519_outLine +BABEL_BP_104_36413_20120310_185758_inLine +BABEL_BP_104_36413_20120310_185758_outLine +BABEL_BP_104_36487_20120209_211827_inLine +BABEL_BP_104_36487_20120209_211827_outLine +BABEL_BP_104_37131_20120318_210220_inLine +BABEL_BP_104_37131_20120318_210220_outLine +BABEL_BP_104_37135_20120219_044437_inLine +BABEL_BP_104_37135_20120219_044437_outLine +BABEL_BP_104_37593_20120130_203434_inLine +BABEL_BP_104_37593_20120130_203434_outLine +BABEL_BP_104_38479_20120213_011154_inLine +BABEL_BP_104_38479_20120213_011154_outLine +BABEL_BP_104_38563_20120127_181357_outLine +BABEL_BP_104_39178_20120109_195710_inLine +BABEL_BP_104_39320_20120110_190913_inLine +BABEL_BP_104_39320_20120110_190913_outLine +BABEL_BP_104_39390_20120322_042714_outLine +BABEL_BP_104_39525_20120217_200400_inLine +BABEL_BP_104_39525_20120217_200400_outLine +BABEL_BP_104_39999_20120326_194721_inLine +BABEL_BP_104_39999_20120326_194721_outLine +BABEL_BP_104_40136_20120222_030818_inLine +BABEL_BP_104_40136_20120222_030823_outLine +BABEL_BP_104_40607_20120324_163524_inLine +BABEL_BP_104_40612_20120106_024347_inLine +BABEL_BP_104_40612_20120106_024347_outLine +BABEL_BP_104_40640_20120131_044455_outLine +BABEL_BP_104_41306_20120223_191213_inLine +BABEL_BP_104_41306_20120223_191213_outLine +BABEL_BP_104_41531_20120331_010320_inLine +BABEL_BP_104_41531_20120331_010320_outLine +BABEL_BP_104_42145_20120127_042217_inLine +BABEL_BP_104_42145_20120127_042217_outLine +BABEL_BP_104_42571_20120229_014427_inLine +BABEL_BP_104_42571_20120229_014427_outLine +BABEL_BP_104_42571_20120229_020000_inLine +BABEL_BP_104_42571_20120229_020000_outLine +BABEL_BP_104_42929_20120307_150902_inLine +BABEL_BP_104_42929_20120307_150902_outLine +BABEL_BP_104_43322_20120126_040725_inLine +BABEL_BP_104_43462_20120216_210005_inLine +BABEL_BP_104_43462_20120216_210005_outLine +BABEL_BP_104_43480_20120326_155717_inLine +BABEL_BP_104_43501_20120331_220724_outLine +BABEL_BP_104_43501_20120331_222326_outLine +BABEL_BP_104_43684_20120128_182736_outLine +BABEL_BP_104_43724_20120219_213737_inLine +BABEL_BP_104_43724_20120219_213737_outLine +BABEL_BP_104_43725_20120205_002936_inLine +BABEL_BP_104_43725_20120205_002936_outLine +BABEL_BP_104_43833_20120331_193735_outLine +BABEL_BP_104_44468_20120222_125222_inLine +BABEL_BP_104_44468_20120222_125222_outLine +BABEL_BP_104_44515_20120326_144709_inLine +BABEL_BP_104_44515_20120326_150551_inLine +BABEL_BP_104_44799_20120119_040419_inLine +BABEL_BP_104_44799_20120119_040419_outLine +BABEL_BP_104_45356_20120324_234702_outLine +BABEL_BP_104_45403_20111222_014909_outLine +BABEL_BP_104_45562_20120131_200753_inLine +BABEL_BP_104_45926_20120127_162212_inLine +BABEL_BP_104_45926_20120127_162212_outLine +BABEL_BP_104_45947_20120313_214251_inLine +BABEL_BP_104_46168_20120217_200729_inLine +BABEL_BP_104_46168_20120217_200729_outLine +BABEL_BP_104_46734_20120219_025954_outLine +BABEL_BP_104_46979_20120223_173811_inLine +BABEL_BP_104_46979_20120223_173811_outLine +BABEL_BP_104_47015_20120222_053105_inLine +BABEL_BP_104_47015_20120222_053105_outLine +BABEL_BP_104_47917_20120319_003035_inLine +BABEL_BP_104_47917_20120319_003035_outLine +BABEL_BP_104_48000_20120323_171146_inLine +BABEL_BP_104_48000_20120323_171146_outLine +BABEL_BP_104_48001_20120204_231603_inLine +BABEL_BP_104_48001_20120204_231603_outLine +BABEL_BP_104_48259_20120217_200412_inLine +BABEL_BP_104_48259_20120217_200412_outLine +BABEL_BP_104_48944_20120218_011825_inLine +BABEL_BP_104_48944_20120218_011825_outLine +BABEL_BP_104_48946_20120320_192250_inLine +BABEL_BP_104_48946_20120320_192250_outLine +BABEL_BP_104_49141_20120330_015342_inLine +BABEL_BP_104_49629_20120312_155816_outLine +BABEL_BP_104_50407_20120318_232348_inLine +BABEL_BP_104_50407_20120318_232348_outLine +BABEL_BP_104_50682_20120116_205741_inLine +BABEL_BP_104_50682_20120116_205741_outLine +BABEL_BP_104_50820_20120213_140300_inLine +BABEL_BP_104_50820_20120213_140300_outLine +BABEL_BP_104_51024_20120131_172745_inLine +BABEL_BP_104_51047_20120319_042347_outLine +BABEL_BP_104_51329_20120222_203129_inLine +BABEL_BP_104_51329_20120222_203129_outLine +BABEL_BP_104_51329_20120222_205332_inLine +BABEL_BP_104_51329_20120222_205332_outLine +BABEL_BP_104_51519_20120220_052247_inLine +BABEL_BP_104_51519_20120220_052247_outLine +BABEL_BP_104_51570_20120118_225333_inLine +BABEL_BP_104_51570_20120118_225333_outLine +BABEL_BP_104_51716_20120221_005215_inLine +BABEL_BP_104_51716_20120221_005215_outLine +BABEL_BP_104_52300_20120203_210256_inLine +BABEL_BP_104_52300_20120203_210256_outLine +BABEL_BP_104_52753_20120209_225916_inLine +BABEL_BP_104_52753_20120209_225916_outLine +BABEL_BP_104_52753_20120213_014050_inLine +BABEL_BP_104_52753_20120213_014050_outLine +BABEL_BP_104_52954_20120313_170902_inLine +BABEL_BP_104_52954_20120313_170902_outLine +BABEL_BP_104_53159_20120402_035901_inLine +BABEL_BP_104_53159_20120402_035901_outLine +BABEL_BP_104_53334_20120309_184805_inLine +BABEL_BP_104_53334_20120309_184805_outLine +BABEL_BP_104_53659_20120218_205643_inLine +BABEL_BP_104_53659_20120218_205643_outLine +BABEL_BP_104_53718_20120202_220720_outLine +BABEL_BP_104_54909_20120130_194003_inLine +BABEL_BP_104_54909_20120130_194003_outLine +BABEL_BP_104_55213_20120331_185824_outLine +BABEL_BP_104_55668_20120212_011829_inLine +BABEL_BP_104_55668_20120212_011829_outLine +BABEL_BP_104_56201_20120126_180227_outLine +BABEL_BP_104_56308_20120402_024809_outLine +BABEL_BP_104_56704_20120120_155806_inLine +BABEL_BP_104_56704_20120120_155806_outLine +BABEL_BP_104_56753_20120322_204356_outLine +BABEL_BP_104_56805_20120320_045112_inLine +BABEL_BP_104_56805_20120320_045112_outLine +BABEL_BP_104_57005_20120321_034143_inLine +BABEL_BP_104_57082_20120110_024829_inLine +BABEL_BP_104_57116_20120110_180036_inLine +BABEL_BP_104_57167_20111230_213737_outLine +BABEL_BP_104_57210_20120321_020212_inLine +BABEL_BP_104_57210_20120321_020212_outLine +BABEL_BP_104_57263_20120302_211404_inLine +BABEL_BP_104_57320_20120204_230109_inLine +BABEL_BP_104_57320_20120204_230109_outLine +BABEL_BP_104_57531_20120203_165801_inLine +BABEL_BP_104_57531_20120203_165801_outLine +BABEL_BP_104_57672_20120204_030206_outLine +BABEL_BP_104_58149_20120218_161613_outLine +BABEL_BP_104_58298_20120208_214852_inLine +BABEL_BP_104_58298_20120208_214852_outLine +BABEL_BP_104_58939_20120212_184855_inLine +BABEL_BP_104_58939_20120212_184855_outLine +BABEL_BP_104_58963_20120331_015840_inLine +BABEL_BP_104_58963_20120331_015840_outLine +BABEL_BP_104_59219_20120131_225115_outLine +BABEL_BP_104_59399_20120318_144751_inLine +BABEL_BP_104_59399_20120318_144752_outLine +BABEL_BP_104_59482_20120309_190927_inLine +BABEL_BP_104_59482_20120309_190927_outLine +BABEL_BP_104_59681_20120123_213306_inLine +BABEL_BP_104_59681_20120123_213306_outLine +BABEL_BP_104_60462_20120201_181707_inLine +BABEL_BP_104_60462_20120201_181707_outLine +BABEL_BP_104_60806_20120213_161652_outLine +BABEL_BP_104_61029_20120201_224200_outLine +BABEL_BP_104_61523_20120212_035522_inLine +BABEL_BP_104_61655_20120208_203143_inLine +BABEL_BP_104_61655_20120208_203143_outLine +BABEL_BP_104_61733_20120205_220251_outLine +BABEL_BP_104_61735_20120314_012744_inLine +BABEL_BP_104_61909_20120320_190739_inLine +BABEL_BP_104_61909_20120320_190739_outLine +BABEL_BP_104_62815_20120318_025812_outLine +BABEL_BP_104_62816_20120312_153937_outLine +BABEL_BP_104_63111_20120204_232445_outLine +BABEL_BP_104_63215_20120213_040737_inLine +BABEL_BP_104_63215_20120213_040737_outLine +BABEL_BP_104_63220_20120131_155658_inLine +BABEL_BP_104_63220_20120131_155658_outLine +BABEL_BP_104_63390_20120123_212718_outLine +BABEL_BP_104_63397_20120217_194928_inLine +BABEL_BP_104_63397_20120217_194928_outLine +BABEL_BP_104_63784_20120216_015608_inLine +BABEL_BP_104_63784_20120216_015608_outLine +BABEL_BP_104_63934_20120318_201706_inLine +BABEL_BP_104_63934_20120318_201706_outLine +BABEL_BP_104_64990_20120119_173958_inLine +BABEL_BP_104_64990_20120119_173958_outLine +BABEL_BP_104_65341_20120220_222356_inLine +BABEL_BP_104_65341_20120220_222356_outLine +BABEL_BP_104_65590_20120109_001414_inLine +BABEL_BP_104_65590_20120109_001414_outLine +BABEL_BP_104_65954_20120128_163139_inLine +BABEL_BP_104_65954_20120128_163139_outLine +BABEL_BP_104_65974_20120316_195524_inLine +BABEL_BP_104_65974_20120316_195524_outLine +BABEL_BP_104_66784_20111225_190506_outLine +BABEL_BP_104_66879_20120213_004555_inLine +BABEL_BP_104_66879_20120213_004555_outLine +BABEL_BP_104_67106_20120208_201829_inLine +BABEL_BP_104_67106_20120208_201829_outLine +BABEL_BP_104_67423_20120205_220658_outLine +BABEL_BP_104_67685_20120217_235729_inLine +BABEL_BP_104_67685_20120217_235729_outLine +BABEL_BP_104_67718_20120131_164436_inLine +BABEL_BP_104_67718_20120131_164436_outLine +BABEL_BP_104_68077_20120219_155535_outLine +BABEL_BP_104_68111_20120321_185146_outLine +BABEL_BP_104_68144_20120210_223106_outLine +BABEL_BP_104_68189_20120128_005011_inLine +BABEL_BP_104_68189_20120128_005011_outLine +BABEL_BP_104_68209_20120219_045221_inLine +BABEL_BP_104_68997_20120126_010839_inLine +BABEL_BP_104_70333_20120210_033437_outLine +BABEL_BP_104_70528_20120128_013553_inLine +BABEL_BP_104_70528_20120128_013553_outLine +BABEL_BP_104_70762_20120213_175054_outLine +BABEL_BP_104_70897_20120315_000410_inLine +BABEL_BP_104_70897_20120315_000410_outLine +BABEL_BP_104_70897_20120315_013535_inLine +BABEL_BP_104_70897_20120315_013535_outLine +BABEL_BP_104_71948_20120210_012347_inLine +BABEL_BP_104_71970_20120310_195048_inLine +BABEL_BP_104_72874_20120213_191257_inLine +BABEL_BP_104_72874_20120213_191257_outLine +BABEL_BP_104_72910_20120310_185203_outLine +BABEL_BP_104_73450_20120206_024342_inLine +BABEL_BP_104_73450_20120206_024342_outLine +BABEL_BP_104_73925_20120123_233630_inLine +BABEL_BP_104_73925_20120123_233630_outLine +BABEL_BP_104_74261_20120331_191708_outLine +BABEL_BP_104_74334_20111230_035012_inLine +BABEL_BP_104_74940_20120228_225523_inLine +BABEL_BP_104_74940_20120228_225523_outLine +BABEL_BP_104_75390_20120218_133736_inLine +BABEL_BP_104_75390_20120218_133736_outLine +BABEL_BP_104_75402_20120319_160944_inLine +BABEL_BP_104_76714_20120313_220017_inLine +BABEL_BP_104_76714_20120313_220017_outLine +BABEL_BP_104_76738_20120210_010510_inLine +BABEL_BP_104_77097_20120214_235954_inLine +BABEL_BP_104_77097_20120214_235954_outLine +BABEL_BP_104_77256_20120309_064948_inLine +BABEL_BP_104_77537_20120206_034628_outLine +BABEL_BP_104_77711_20120229_163050_inLine +BABEL_BP_104_77711_20120229_163050_outLine +BABEL_BP_104_77711_20120229_164115_inLine +BABEL_BP_104_77711_20120229_164115_outLine +BABEL_BP_104_78225_20120126_170942_outLine +BABEL_BP_104_78443_20120128_211331_inLine +BABEL_BP_104_78443_20120128_211331_outLine +BABEL_BP_104_79120_20120127_021912_inLine +BABEL_BP_104_79120_20120127_021912_outLine +BABEL_BP_104_79120_20120127_030132_inLine +BABEL_BP_104_79120_20120127_030132_outLine +BABEL_BP_104_79156_20120126_191440_outLine +BABEL_BP_104_79753_20120203_173233_inLine +BABEL_BP_104_79753_20120203_173233_outLine +BABEL_BP_104_80134_20120313_215613_inLine +BABEL_BP_104_80134_20120313_215613_outLine +BABEL_BP_104_80284_20120109_235306_inLine +BABEL_BP_104_80284_20120109_235306_outLine +BABEL_BP_104_80559_20120319_152020_outLine +BABEL_BP_104_80616_20120223_193040_inLine +BABEL_BP_104_80616_20120223_193040_outLine +BABEL_BP_104_80867_20120309_034536_inLine +BABEL_BP_104_80867_20120309_034536_outLine +BABEL_BP_104_80929_20120310_194854_inLine +BABEL_BP_104_80929_20120310_194854_outLine +BABEL_BP_104_81726_20120229_154500_inLine +BABEL_BP_104_81726_20120229_154500_outLine +BABEL_BP_104_81996_20120128_185859_outLine +BABEL_BP_104_82499_20120215_024134_inLine +BABEL_BP_104_82499_20120215_024134_outLine +BABEL_BP_104_82595_20120324_154901_outLine +BABEL_BP_104_82964_20120218_181351_outLine +BABEL_BP_104_83072_20120213_170201_inLine +BABEL_BP_104_83072_20120213_170201_outLine +BABEL_BP_104_83112_20120204_161112_inLine +BABEL_BP_104_83112_20120204_161112_outLine +BABEL_BP_104_83747_20120120_153904_outLine +BABEL_BP_104_83866_20120206_040504_inLine +BABEL_BP_104_83866_20120206_040505_outLine +BABEL_BP_104_84854_20120129_233819_inLine +BABEL_BP_104_84854_20120129_233819_outLine +BABEL_BP_104_84885_20120217_215436_inLine +BABEL_BP_104_84885_20120217_215436_outLine +BABEL_BP_104_84950_20120130_131546_inLine +BABEL_BP_104_84950_20120130_131546_outLine +BABEL_BP_104_85558_20120413_044033_inLine +BABEL_BP_104_86528_20120128_211228_inLine +BABEL_BP_104_86537_20120128_022125_inLine +BABEL_BP_104_86537_20120128_023523_inLine +BABEL_BP_104_87067_20120324_182930_inLine +BABEL_BP_104_87067_20120324_182930_outLine +BABEL_BP_104_87517_20120207_200619_inLine +BABEL_BP_104_87517_20120207_200619_outLine +BABEL_BP_104_88070_20120318_164350_outLine +BABEL_BP_104_88434_20120319_170128_inLine +BABEL_BP_104_88434_20120319_170128_outLine +BABEL_BP_104_88921_20120205_215225_inLine +BABEL_BP_104_88921_20120205_215225_outLine +BABEL_BP_104_89036_20120327_211455_inLine +BABEL_BP_104_89925_20120202_000208_inLine +BABEL_BP_104_89925_20120202_000208_outLine +BABEL_BP_104_89952_20120131_212850_inLine +BABEL_BP_104_89952_20120131_212850_outLine +BABEL_BP_104_90263_20120205_044035_inLine +BABEL_BP_104_90263_20120205_044035_outLine +BABEL_BP_104_90310_20120129_024342_outLine +BABEL_BP_104_91161_20120311_032449_inLine +BABEL_BP_104_91161_20120311_032449_outLine +BABEL_BP_104_92342_20120320_041334_inLine +BABEL_BP_104_92342_20120320_041334_outLine +BABEL_BP_104_92722_20120209_235113_outLine +BABEL_BP_104_92793_20120118_235358_inLine +BABEL_BP_104_93300_20120221_135558_inLine +BABEL_BP_104_93300_20120221_135558_outLine +BABEL_BP_104_93713_20120121_004435_inLine +BABEL_BP_104_93730_20120220_052912_outLine +BABEL_BP_104_93730_20120220_053327_outLine +BABEL_BP_104_93730_20120220_054726_outLine +BABEL_BP_104_93844_20120316_014157_inLine +BABEL_BP_104_93844_20120327_194612_inLine +BABEL_BP_104_94572_20120321_022026_inLine +BABEL_BP_104_94683_20120126_024342_inLine +BABEL_BP_104_94775_20120321_230436_inLine +BABEL_BP_104_94775_20120321_230436_outLine +BABEL_BP_104_94793_20120204_043218_inLine +BABEL_BP_104_94793_20120204_043218_outLine +BABEL_BP_104_95349_20111229_162101_inLine +BABEL_BP_104_95360_20120205_133312_inLine +BABEL_BP_104_95360_20120205_133312_outLine +BABEL_BP_104_95465_20120223_040653_inLine +BABEL_BP_104_95465_20120223_040653_outLine +BABEL_BP_104_95904_20120218_183758_inLine +BABEL_BP_104_95904_20120218_183758_outLine +BABEL_BP_104_96343_20120130_143444_outLine +BABEL_BP_104_96690_20120321_005155_inLine +BABEL_BP_104_96811_20120217_021933_inLine +BABEL_BP_104_96811_20120217_021933_outLine +BABEL_BP_104_96956_20120209_025537_inLine +BABEL_BP_104_96956_20120209_025537_outLine +BABEL_BP_104_97050_20120314_144713_outLine +BABEL_BP_104_97803_20120116_184019_inLine +BABEL_BP_104_97803_20120116_184019_outLine +BABEL_BP_104_97971_20120317_004835_inLine +BABEL_BP_104_97971_20120317_004835_outLine +BABEL_BP_104_98067_20120221_131601_inLine +BABEL_BP_104_98067_20120221_131601_outLine +BABEL_BP_104_98110_20120218_193615_outLine +BABEL_BP_104_98503_20120402_230340_inLine +BABEL_BP_104_98503_20120403_025554_inLine +BABEL_BP_104_98588_20120119_011655_inLine +BABEL_BP_104_98588_20120119_011655_outLine +BABEL_BP_104_98942_20120205_224026_outLine +BABEL_BP_104_99354_20120203_152733_inLine +BABEL_BP_104_99354_20120203_152733_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/train.LimitedLP.list b/egs/babel/s5d/conf/lists/104-pashto/train.LimitedLP.list new file mode 100644 index 00000000000..293419a111d --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/train.LimitedLP.list @@ -0,0 +1,131 @@ +BABEL_BP_104_08036_20111220_013826_inLine +BABEL_BP_104_08139_20120126_021604_inLine +BABEL_BP_104_11647_20120315_022645_inLine +BABEL_BP_104_11647_20120315_022645_outLine +BABEL_BP_104_13952_20120126_185217_inLine +BABEL_BP_104_13952_20120126_185217_outLine +BABEL_BP_104_14147_20120320_003436_inLine +BABEL_BP_104_14147_20120320_003436_outLine +BABEL_BP_104_14527_20120207_235446_inLine +BABEL_BP_104_14527_20120207_235446_outLine +BABEL_BP_104_15324_20120126_222036_inLine +BABEL_BP_104_15324_20120126_222036_outLine +BABEL_BP_104_15324_20120127_023323_inLine +BABEL_BP_104_15324_20120127_023323_outLine +BABEL_BP_104_15415_20120219_181352_inLine +BABEL_BP_104_15949_20120229_140434_inLine +BABEL_BP_104_15949_20120229_140434_outLine +BABEL_BP_104_16074_20120129_041107_inLine +BABEL_BP_104_16074_20120129_041107_outLine +BABEL_BP_104_16290_20120220_200234_inLine +BABEL_BP_104_16290_20120220_200234_outLine +BABEL_BP_104_16339_20120131_184255_inLine +BABEL_BP_104_17904_20120320_014817_inLine +BABEL_BP_104_17904_20120320_014817_outLine +BABEL_BP_104_18084_20111230_210850_outLine +BABEL_BP_104_19324_20120310_192849_inLine +BABEL_BP_104_19324_20120310_192849_outLine +BABEL_BP_104_21256_20120217_202248_inLine +BABEL_BP_104_21256_20120217_202248_outLine +BABEL_BP_104_23381_20120216_161115_outLine +BABEL_BP_104_24235_20120209_030431_outLine +BABEL_BP_104_24735_20120316_221529_inLine +BABEL_BP_104_24735_20120316_221529_outLine +BABEL_BP_104_26946_20120130_034221_outLine +BABEL_BP_104_28289_20120310_202856_inLine +BABEL_BP_104_28289_20120310_202856_outLine +BABEL_BP_104_28734_20120126_205422_inLine +BABEL_BP_104_28734_20120126_212950_inLine +BABEL_BP_104_30271_20120205_163755_inLine +BABEL_BP_104_30271_20120205_165111_inLine +BABEL_BP_104_30628_20120219_182744_inLine +BABEL_BP_104_30628_20120219_182744_outLine +BABEL_BP_104_34188_20120219_000455_inLine +BABEL_BP_104_34188_20120219_000455_outLine +BABEL_BP_104_35073_20120208_223917_outLine +BABEL_BP_104_35444_20120310_190608_inLine +BABEL_BP_104_35444_20120310_190608_outLine +BABEL_BP_104_36413_20120310_185758_inLine +BABEL_BP_104_36413_20120310_185758_outLine +BABEL_BP_104_38479_20120213_011154_inLine +BABEL_BP_104_38479_20120213_011154_outLine +BABEL_BP_104_39999_20120326_194721_inLine +BABEL_BP_104_39999_20120326_194721_outLine +BABEL_BP_104_41306_20120223_191213_inLine +BABEL_BP_104_41306_20120223_191213_outLine +BABEL_BP_104_42571_20120229_014427_inLine +BABEL_BP_104_42571_20120229_014427_outLine +BABEL_BP_104_42571_20120229_020000_inLine +BABEL_BP_104_42571_20120229_020000_outLine +BABEL_BP_104_43322_20120126_040725_inLine +BABEL_BP_104_43724_20120219_213737_inLine +BABEL_BP_104_43724_20120219_213737_outLine +BABEL_BP_104_45926_20120127_162212_inLine +BABEL_BP_104_45926_20120127_162212_outLine +BABEL_BP_104_46734_20120219_025954_outLine +BABEL_BP_104_48000_20120323_171146_inLine +BABEL_BP_104_48000_20120323_171146_outLine +BABEL_BP_104_48259_20120217_200412_inLine +BABEL_BP_104_48259_20120217_200412_outLine +BABEL_BP_104_48944_20120218_011825_inLine +BABEL_BP_104_48944_20120218_011825_outLine +BABEL_BP_104_48946_20120320_192250_inLine +BABEL_BP_104_48946_20120320_192250_outLine +BABEL_BP_104_50407_20120318_232348_inLine +BABEL_BP_104_50407_20120318_232348_outLine +BABEL_BP_104_51519_20120220_052247_inLine +BABEL_BP_104_51519_20120220_052247_outLine +BABEL_BP_104_51716_20120221_005215_inLine +BABEL_BP_104_51716_20120221_005215_outLine +BABEL_BP_104_52753_20120209_225916_inLine +BABEL_BP_104_52753_20120209_225916_outLine +BABEL_BP_104_52753_20120213_014050_inLine +BABEL_BP_104_52753_20120213_014050_outLine +BABEL_BP_104_56805_20120320_045112_inLine +BABEL_BP_104_56805_20120320_045112_outLine +BABEL_BP_104_57210_20120321_020212_inLine +BABEL_BP_104_57210_20120321_020212_outLine +BABEL_BP_104_57672_20120204_030206_outLine +BABEL_BP_104_59219_20120131_225115_outLine +BABEL_BP_104_60806_20120213_161652_outLine +BABEL_BP_104_63397_20120217_194928_inLine +BABEL_BP_104_63397_20120217_194928_outLine +BABEL_BP_104_63934_20120318_201706_inLine +BABEL_BP_104_63934_20120318_201706_outLine +BABEL_BP_104_65590_20120109_001414_inLine +BABEL_BP_104_65590_20120109_001414_outLine +BABEL_BP_104_66784_20111225_190506_outLine +BABEL_BP_104_67685_20120217_235729_inLine +BABEL_BP_104_67685_20120217_235729_outLine +BABEL_BP_104_68189_20120128_005011_inLine +BABEL_BP_104_68189_20120128_005011_outLine +BABEL_BP_104_68209_20120219_045221_inLine +BABEL_BP_104_68997_20120126_010839_inLine +BABEL_BP_104_70762_20120213_175054_outLine +BABEL_BP_104_70897_20120315_000410_inLine +BABEL_BP_104_70897_20120315_000410_outLine +BABEL_BP_104_70897_20120315_013535_inLine +BABEL_BP_104_70897_20120315_013535_outLine +BABEL_BP_104_71948_20120210_012347_inLine +BABEL_BP_104_73925_20120123_233630_inLine +BABEL_BP_104_73925_20120123_233630_outLine +BABEL_BP_104_76738_20120210_010510_inLine +BABEL_BP_104_77097_20120214_235954_inLine +BABEL_BP_104_77097_20120214_235954_outLine +BABEL_BP_104_80929_20120310_194854_inLine +BABEL_BP_104_80929_20120310_194854_outLine +BABEL_BP_104_81996_20120128_185859_outLine +BABEL_BP_104_87067_20120324_182930_inLine +BABEL_BP_104_87067_20120324_182930_outLine +BABEL_BP_104_92342_20120320_041334_inLine +BABEL_BP_104_92342_20120320_041334_outLine +BABEL_BP_104_92793_20120118_235358_inLine +BABEL_BP_104_94683_20120126_024342_inLine +BABEL_BP_104_94775_20120321_230436_inLine +BABEL_BP_104_94775_20120321_230436_outLine +BABEL_BP_104_95349_20111229_162101_inLine +BABEL_BP_104_95360_20120205_133312_inLine +BABEL_BP_104_95360_20120205_133312_outLine +BABEL_BP_104_95904_20120218_183758_inLine +BABEL_BP_104_95904_20120218_183758_outLine +BABEL_BP_104_96343_20120130_143444_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/training.list b/egs/babel/s5d/conf/lists/104-pashto/training.list new file mode 100644 index 00000000000..deb9bc55dfe --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/training.list @@ -0,0 +1,1026 @@ +BABEL_BP_104_01820_20120313_212614_inLine +BABEL_BP_104_01820_20120313_212614_outLine +BABEL_BP_104_02362_20120229_213454_inLine +BABEL_BP_104_03770_20120109_014606_inLine +BABEL_BP_104_03770_20120109_014606_outLine +BABEL_BP_104_04074_20120318_203458_outLine +BABEL_BP_104_05545_20120126_034408_inLine +BABEL_BP_104_05545_20120126_034408_outLine +BABEL_BP_104_08036_20111220_013826_inLine +BABEL_BP_104_08139_20120126_021604_inLine +BABEL_BP_104_10193_20120213_031930_inLine +BABEL_BP_104_10193_20120213_031930_outLine +BABEL_BP_104_10289_20120128_035330_inLine +BABEL_BP_104_10289_20120128_035330_outLine +BABEL_BP_104_10642_20120321_210945_outLine +BABEL_BP_104_10668_20120311_014815_inLine +BABEL_BP_104_10668_20120311_014815_outLine +BABEL_BP_104_10911_20111222_025120_inLine +BABEL_BP_104_10911_20111222_025120_outLine +BABEL_BP_104_11145_20120321_154029_inLine +BABEL_BP_104_11145_20120321_154029_outLine +BABEL_BP_104_11146_20120224_000248_inLine +BABEL_BP_104_11146_20120224_000248_outLine +BABEL_BP_104_11153_20120108_191820_inLine +BABEL_BP_104_11153_20120108_191820_outLine +BABEL_BP_104_11202_20120213_235334_inLine +BABEL_BP_104_11202_20120213_235334_outLine +BABEL_BP_104_11388_20120202_224148_inLine +BABEL_BP_104_11388_20120202_224148_outLine +BABEL_BP_104_11442_20120218_234445_inLine +BABEL_BP_104_11442_20120218_234445_outLine +BABEL_BP_104_11626_20120316_193802_inLine +BABEL_BP_104_11626_20120316_193802_outLine +BABEL_BP_104_11647_20120315_022645_inLine +BABEL_BP_104_11647_20120315_022645_outLine +BABEL_BP_104_12171_20120212_154823_inLine +BABEL_BP_104_12171_20120212_154823_outLine +BABEL_BP_104_12474_20120309_193318_inLine +BABEL_BP_104_12474_20120309_193318_outLine +BABEL_BP_104_12494_20120213_180757_inLine +BABEL_BP_104_12494_20120213_180757_outLine +BABEL_BP_104_12562_20120307_152654_inLine +BABEL_BP_104_12682_20120223_031401_inLine +BABEL_BP_104_12843_20120202_221656_inLine +BABEL_BP_104_12843_20120202_221656_outLine +BABEL_BP_104_12946_20120224_013645_inLine +BABEL_BP_104_12946_20120224_013645_outLine +BABEL_BP_104_13064_20120220_040256_inLine +BABEL_BP_104_13064_20120220_040256_outLine +BABEL_BP_104_13157_20120207_204725_inLine +BABEL_BP_104_13157_20120207_204725_outLine +BABEL_BP_104_13189_20120112_020041_inLine +BABEL_BP_104_13189_20120112_020041_outLine +BABEL_BP_104_13354_20120121_164912_inLine +BABEL_BP_104_13354_20120121_164912_outLine +BABEL_BP_104_13456_20120111_024843_outLine +BABEL_BP_104_13546_20120327_004548_outLine +BABEL_BP_104_13580_20120222_195120_inLine +BABEL_BP_104_13580_20120222_195120_outLine +BABEL_BP_104_13615_20120314_233732_inLine +BABEL_BP_104_13615_20120314_233732_outLine +BABEL_BP_104_13694_20120321_001123_outLine +BABEL_BP_104_13771_20120316_004856_inLine +BABEL_BP_104_13771_20120316_004856_outLine +BABEL_BP_104_13798_20120105_221125_inLine +BABEL_BP_104_13798_20120105_221125_outLine +BABEL_BP_104_13952_20120126_185217_inLine +BABEL_BP_104_13952_20120126_185217_outLine +BABEL_BP_104_14147_20120320_003436_inLine +BABEL_BP_104_14147_20120320_003436_outLine +BABEL_BP_104_14225_20120331_015956_inLine +BABEL_BP_104_14225_20120331_020908_inLine +BABEL_BP_104_14527_20120207_235446_inLine +BABEL_BP_104_14527_20120207_235446_outLine +BABEL_BP_104_14927_20111224_041309_inLine +BABEL_BP_104_14927_20111224_041309_outLine +BABEL_BP_104_14984_20120205_195333_inLine +BABEL_BP_104_14984_20120205_195333_outLine +BABEL_BP_104_15176_20120316_181716_outLine +BABEL_BP_104_15324_20120126_222036_inLine +BABEL_BP_104_15324_20120126_222036_outLine +BABEL_BP_104_15324_20120127_023323_inLine +BABEL_BP_104_15324_20120127_023323_outLine +BABEL_BP_104_15377_20120322_045329_inLine +BABEL_BP_104_15415_20120219_181352_inLine +BABEL_BP_104_15552_20120304_160459_inLine +BABEL_BP_104_15600_20111230_233908_inLine +BABEL_BP_104_15600_20111230_233908_outLine +BABEL_BP_104_15600_20111230_234837_inLine +BABEL_BP_104_15600_20111230_234837_outLine +BABEL_BP_104_15630_20120402_033748_inLine +BABEL_BP_104_15749_20120131_005221_inLine +BABEL_BP_104_15749_20120131_005221_outLine +BABEL_BP_104_15803_20120117_155821_inLine +BABEL_BP_104_15803_20120117_155821_outLine +BABEL_BP_104_15861_20120401_024411_inLine +BABEL_BP_104_15949_20120229_140434_inLine +BABEL_BP_104_15949_20120229_140434_outLine +BABEL_BP_104_16074_20120129_041107_inLine +BABEL_BP_104_16074_20120129_041107_outLine +BABEL_BP_104_16290_20120220_200234_inLine +BABEL_BP_104_16290_20120220_200234_outLine +BABEL_BP_104_16339_20120131_184255_inLine +BABEL_BP_104_16350_20120315_043233_outLine +BABEL_BP_104_16416_20120317_205531_inLine +BABEL_BP_104_16416_20120317_205531_outLine +BABEL_BP_104_16416_20120317_211129_inLine +BABEL_BP_104_16416_20120317_211129_outLine +BABEL_BP_104_16633_20120311_053635_inLine +BABEL_BP_104_16633_20120311_053635_outLine +BABEL_BP_104_17081_20120128_030343_inLine +BABEL_BP_104_17081_20120128_030343_outLine +BABEL_BP_104_17180_20120321_215255_inLine +BABEL_BP_104_17180_20120321_215255_outLine +BABEL_BP_104_17216_20120128_015245_inLine +BABEL_BP_104_17216_20120128_020324_inLine +BABEL_BP_104_17319_20111225_210159_inLine +BABEL_BP_104_17319_20111225_210159_outLine +BABEL_BP_104_17410_20120129_211432_inLine +BABEL_BP_104_17410_20120129_211432_outLine +BABEL_BP_104_17429_20120209_024521_inLine +BABEL_BP_104_17429_20120209_024521_outLine +BABEL_BP_104_17450_20120331_021646_inLine +BABEL_BP_104_17612_20120205_043931_outLine +BABEL_BP_104_17783_20120205_045923_inLine +BABEL_BP_104_17783_20120205_045923_outLine +BABEL_BP_104_17904_20120320_014817_inLine +BABEL_BP_104_17904_20120320_014817_outLine +BABEL_BP_104_17930_20120321_161410_outLine +BABEL_BP_104_18084_20111230_210850_outLine +BABEL_BP_104_18537_20120130_181101_inLine +BABEL_BP_104_18537_20120130_181101_outLine +BABEL_BP_104_18616_20120126_040622_inLine +BABEL_BP_104_18616_20120126_040622_outLine +BABEL_BP_104_18667_20120208_175014_inLine +BABEL_BP_104_18667_20120208_175014_outLine +BABEL_BP_104_18861_20120218_221303_inLine +BABEL_BP_104_19044_20120218_182247_outLine +BABEL_BP_104_19044_20120218_183017_outLine +BABEL_BP_104_19044_20120218_183849_outLine +BABEL_BP_104_19137_20120119_001516_inLine +BABEL_BP_104_19207_20111224_044525_inLine +BABEL_BP_104_19207_20111224_044525_outLine +BABEL_BP_104_19324_20120310_192849_inLine +BABEL_BP_104_19324_20120310_192849_outLine +BABEL_BP_104_19346_20120205_182121_outLine +BABEL_BP_104_19481_20120207_235626_inLine +BABEL_BP_104_19481_20120207_235626_outLine +BABEL_BP_104_19494_20120219_214920_inLine +BABEL_BP_104_19494_20120219_214920_outLine +BABEL_BP_104_19759_20111223_184346_outLine +BABEL_BP_104_20016_20120206_215156_inLine +BABEL_BP_104_20016_20120206_215156_outLine +BABEL_BP_104_20114_20120324_213414_inLine +BABEL_BP_104_20171_20120318_012849_inLine +BABEL_BP_104_20171_20120318_012849_outLine +BABEL_BP_104_20171_20120318_014226_inLine +BABEL_BP_104_20171_20120318_014226_outLine +BABEL_BP_104_20219_20120221_223942_inLine +BABEL_BP_104_20219_20120221_223942_outLine +BABEL_BP_104_20386_20120226_160551_inLine +BABEL_BP_104_20386_20120226_160551_outLine +BABEL_BP_104_20557_20120402_215807_inLine +BABEL_BP_104_20738_20120129_182528_inLine +BABEL_BP_104_20738_20120129_182528_outLine +BABEL_BP_104_21041_20120309_143920_inLine +BABEL_BP_104_21041_20120309_143920_outLine +BABEL_BP_104_21061_20120205_192140_inLine +BABEL_BP_104_21061_20120205_192140_outLine +BABEL_BP_104_21224_20120322_040006_inLine +BABEL_BP_104_21224_20120322_040006_outLine +BABEL_BP_104_21256_20120217_202248_inLine +BABEL_BP_104_21256_20120217_202248_outLine +BABEL_BP_104_21489_20120213_163025_inLine +BABEL_BP_104_21489_20120213_163025_outLine +BABEL_BP_104_21928_20120204_212612_inLine +BABEL_BP_104_21928_20120204_212612_outLine +BABEL_BP_104_21968_20120131_180237_inLine +BABEL_BP_104_21968_20120131_180237_outLine +BABEL_BP_104_22548_20120125_211519_inLine +BABEL_BP_104_22590_20120209_224232_inLine +BABEL_BP_104_22590_20120209_224232_outLine +BABEL_BP_104_23322_20120204_173810_inLine +BABEL_BP_104_23322_20120204_173810_outLine +BABEL_BP_104_23370_20120216_013240_inLine +BABEL_BP_104_23370_20120216_013240_outLine +BABEL_BP_104_23381_20120216_161115_outLine +BABEL_BP_104_23705_20120219_011051_inLine +BABEL_BP_104_23705_20120219_011051_outLine +BABEL_BP_104_24235_20120209_030431_outLine +BABEL_BP_104_24585_20120117_225722_inLine +BABEL_BP_104_24585_20120117_225722_outLine +BABEL_BP_104_24735_20120316_221529_inLine +BABEL_BP_104_24735_20120316_221529_outLine +BABEL_BP_104_24750_20120130_183131_inLine +BABEL_BP_104_24750_20120130_183131_outLine +BABEL_BP_104_24810_20120319_165838_outLine +BABEL_BP_104_25015_20120216_005135_inLine +BABEL_BP_104_25015_20120216_005135_outLine +BABEL_BP_104_25525_20120316_140847_outLine +BABEL_BP_104_25911_20111222_051549_inLine +BABEL_BP_104_25911_20111222_051549_outLine +BABEL_BP_104_26475_20120309_222554_inLine +BABEL_BP_104_26654_20120130_035807_inLine +BABEL_BP_104_26654_20120130_035807_outLine +BABEL_BP_104_26801_20120401_022159_inLine +BABEL_BP_104_26946_20120130_034221_outLine +BABEL_BP_104_27184_20120505_152626_outLine +BABEL_BP_104_27267_20120325_002713_outLine +BABEL_BP_104_27298_20111225_192028_inLine +BABEL_BP_104_27298_20111225_192028_outLine +BABEL_BP_104_27374_20120318_190552_outLine +BABEL_BP_104_28066_20120318_173932_inLine +BABEL_BP_104_28066_20120318_173932_outLine +BABEL_BP_104_28281_20120111_045749_inLine +BABEL_BP_104_28281_20120111_045749_outLine +BABEL_BP_104_28289_20120310_202856_inLine +BABEL_BP_104_28289_20120310_202856_outLine +BABEL_BP_104_28330_20120306_194033_inLine +BABEL_BP_104_28330_20120306_195756_inLine +BABEL_BP_104_28734_20120126_205422_inLine +BABEL_BP_104_28734_20120126_212950_inLine +BABEL_BP_104_29009_20120319_164025_outLine +BABEL_BP_104_29103_20120127_183035_inLine +BABEL_BP_104_29103_20120127_183035_outLine +BABEL_BP_104_29757_20120208_191006_inLine +BABEL_BP_104_29757_20120208_191006_outLine +BABEL_BP_104_29809_20120209_044252_inLine +BABEL_BP_104_29809_20120209_044252_outLine +BABEL_BP_104_29967_20120208_201355_inLine +BABEL_BP_104_29967_20120208_201355_outLine +BABEL_BP_104_30031_20120319_035012_inLine +BABEL_BP_104_30031_20120319_035012_outLine +BABEL_BP_104_30040_20120313_211534_inLine +BABEL_BP_104_30040_20120313_211534_outLine +BABEL_BP_104_30040_20120313_212609_inLine +BABEL_BP_104_30040_20120313_212609_outLine +BABEL_BP_104_30143_20111227_132440_inLine +BABEL_BP_104_30197_20120213_160025_inLine +BABEL_BP_104_30197_20120213_160025_outLine +BABEL_BP_104_30271_20120205_163755_inLine +BABEL_BP_104_30271_20120205_165111_inLine +BABEL_BP_104_30620_20120321_151904_inLine +BABEL_BP_104_30620_20120321_151904_outLine +BABEL_BP_104_30628_20120219_182744_inLine +BABEL_BP_104_30628_20120219_182744_outLine +BABEL_BP_104_30711_20120219_175435_outLine +BABEL_BP_104_30847_20120128_045058_inLine +BABEL_BP_104_30847_20120128_050033_inLine +BABEL_BP_104_30848_20120204_154057_inLine +BABEL_BP_104_30848_20120204_154058_outLine +BABEL_BP_104_31046_20120203_161436_inLine +BABEL_BP_104_31095_20120210_132537_inLine +BABEL_BP_104_31215_20120228_003446_inLine +BABEL_BP_104_31215_20120228_003446_outLine +BABEL_BP_104_31215_20120228_032743_inLine +BABEL_BP_104_31215_20120228_032743_outLine +BABEL_BP_104_31663_20120210_140419_inLine +BABEL_BP_104_31663_20120210_140419_outLine +BABEL_BP_104_31919_20120405_023221_inLine +BABEL_BP_104_31926_20120319_040036_outLine +BABEL_BP_104_32703_20120110_212645_outLine +BABEL_BP_104_32703_20120110_212646_inLine +BABEL_BP_104_32956_20120221_133851_inLine +BABEL_BP_104_32956_20120221_133851_outLine +BABEL_BP_104_33223_20120319_050332_inLine +BABEL_BP_104_33272_20120318_233319_outLine +BABEL_BP_104_33337_20120220_005047_inLine +BABEL_BP_104_33337_20120220_005047_outLine +BABEL_BP_104_33510_20120324_185136_outLine +BABEL_BP_104_33518_20120218_204645_inLine +BABEL_BP_104_33518_20120218_204645_outLine +BABEL_BP_104_33846_20120123_194027_inLine +BABEL_BP_104_34037_20120318_232512_inLine +BABEL_BP_104_34037_20120318_235541_inLine +BABEL_BP_104_34164_20120221_141502_inLine +BABEL_BP_104_34164_20120221_141502_outLine +BABEL_BP_104_34188_20120219_000455_inLine +BABEL_BP_104_34188_20120219_000455_outLine +BABEL_BP_104_34335_20111225_224055_outLine +BABEL_BP_104_34540_20120314_153124_inLine +BABEL_BP_104_34540_20120314_153124_outLine +BABEL_BP_104_34708_20120125_173011_inLine +BABEL_BP_104_34708_20120125_173011_outLine +BABEL_BP_104_34714_20120313_142435_inLine +BABEL_BP_104_34714_20120313_142435_outLine +BABEL_BP_104_34833_20120215_025837_inLine +BABEL_BP_104_34833_20120215_025837_outLine +BABEL_BP_104_34994_20120314_001810_outLine +BABEL_BP_104_34994_20120314_003701_outLine +BABEL_BP_104_35073_20120208_223917_outLine +BABEL_BP_104_35152_20111229_025446_inLine +BABEL_BP_104_35152_20111229_025446_outLine +BABEL_BP_104_35241_20120314_052346_inLine +BABEL_BP_104_35241_20120314_052346_outLine +BABEL_BP_104_35444_20120310_190608_inLine +BABEL_BP_104_35444_20120310_190608_outLine +BABEL_BP_104_35544_20120131_174538_inLine +BABEL_BP_104_35544_20120131_174538_outLine +BABEL_BP_104_35646_20120202_222418_inLine +BABEL_BP_104_35646_20120202_222418_outLine +BABEL_BP_104_35874_20120403_213324_inLine +BABEL_BP_104_35916_20120204_030147_inLine +BABEL_BP_104_35916_20120204_030147_outLine +BABEL_BP_104_35923_20120216_021137_inLine +BABEL_BP_104_35923_20120216_021137_outLine +BABEL_BP_104_35983_20120324_152856_outLine +BABEL_BP_104_36017_20120123_222703_outLine +BABEL_BP_104_36138_20120206_210519_inLine +BABEL_BP_104_36138_20120206_210519_outLine +BABEL_BP_104_36413_20120310_185758_inLine +BABEL_BP_104_36413_20120310_185758_outLine +BABEL_BP_104_36487_20120209_211827_inLine +BABEL_BP_104_36487_20120209_211827_outLine +BABEL_BP_104_37131_20120318_210220_inLine +BABEL_BP_104_37131_20120318_210220_outLine +BABEL_BP_104_37135_20120219_044437_inLine +BABEL_BP_104_37135_20120219_044437_outLine +BABEL_BP_104_37357_20120321_212732_inLine +BABEL_BP_104_37357_20120321_212732_outLine +BABEL_BP_104_37551_20120201_032910_inLine +BABEL_BP_104_37551_20120201_032910_outLine +BABEL_BP_104_37593_20120130_203434_inLine +BABEL_BP_104_37593_20120130_203434_outLine +BABEL_BP_104_37687_20120316_175600_outLine +BABEL_BP_104_37731_20120213_034923_inLine +BABEL_BP_104_37731_20120213_034923_outLine +BABEL_BP_104_37733_20120207_040916_inLine +BABEL_BP_104_37733_20120207_040936_outLine +BABEL_BP_104_38163_20120109_022356_inLine +BABEL_BP_104_38163_20120109_022356_outLine +BABEL_BP_104_38223_20120129_195918_inLine +BABEL_BP_104_38223_20120129_195918_outLine +BABEL_BP_104_38255_20120312_231219_inLine +BABEL_BP_104_38255_20120312_231819_inLine +BABEL_BP_104_38255_20120322_142237_inLine +BABEL_BP_104_38479_20120213_011154_inLine +BABEL_BP_104_38479_20120213_011154_outLine +BABEL_BP_104_38563_20120127_181357_outLine +BABEL_BP_104_38685_20120205_205815_inLine +BABEL_BP_104_38685_20120205_205815_outLine +BABEL_BP_104_38902_20120331_152704_inLine +BABEL_BP_104_39119_20120203_183149_inLine +BABEL_BP_104_39119_20120203_183149_outLine +BABEL_BP_104_39178_20120109_195710_inLine +BABEL_BP_104_39320_20120110_190913_inLine +BABEL_BP_104_39320_20120110_190913_outLine +BABEL_BP_104_39390_20120322_042714_outLine +BABEL_BP_104_39525_20120217_200400_inLine +BABEL_BP_104_39525_20120217_200400_outLine +BABEL_BP_104_39999_20120326_194721_inLine +BABEL_BP_104_39999_20120326_194721_outLine +BABEL_BP_104_40136_20120222_030818_inLine +BABEL_BP_104_40136_20120222_030823_outLine +BABEL_BP_104_40585_20120309_200652_outLine +BABEL_BP_104_40607_20120324_163524_inLine +BABEL_BP_104_40612_20120106_024347_inLine +BABEL_BP_104_40612_20120106_024347_outLine +BABEL_BP_104_40640_20120131_044455_outLine +BABEL_BP_104_40701_20120209_031300_inLine +BABEL_BP_104_40701_20120209_031301_outLine +BABEL_BP_104_40866_20120119_030533_inLine +BABEL_BP_104_40866_20120119_030533_outLine +BABEL_BP_104_40889_20120227_180714_inLine +BABEL_BP_104_40889_20120227_180714_outLine +BABEL_BP_104_41306_20120223_191213_inLine +BABEL_BP_104_41306_20120223_191213_outLine +BABEL_BP_104_41498_20120309_223111_outLine +BABEL_BP_104_41531_20120331_010320_inLine +BABEL_BP_104_41531_20120331_010320_outLine +BABEL_BP_104_41610_20111225_214331_inLine +BABEL_BP_104_41610_20111225_214331_outLine +BABEL_BP_104_41871_20120310_200016_inLine +BABEL_BP_104_42006_20120304_162643_inLine +BABEL_BP_104_42006_20120304_162643_outLine +BABEL_BP_104_42145_20120127_042217_inLine +BABEL_BP_104_42145_20120127_042217_outLine +BABEL_BP_104_42571_20120229_014427_inLine +BABEL_BP_104_42571_20120229_014427_outLine +BABEL_BP_104_42571_20120229_020000_inLine +BABEL_BP_104_42571_20120229_020000_outLine +BABEL_BP_104_42836_20120331_181552_outLine +BABEL_BP_104_42929_20120307_150902_inLine +BABEL_BP_104_42929_20120307_150902_outLine +BABEL_BP_104_42952_20120318_233729_inLine +BABEL_BP_104_43036_20120128_024308_inLine +BABEL_BP_104_43036_20120128_025047_inLine +BABEL_BP_104_43036_20120128_025047_outLine +BABEL_BP_104_43036_20120128_030158_inLine +BABEL_BP_104_43036_20120128_030158_outLine +BABEL_BP_104_43051_20120207_002833_inLine +BABEL_BP_104_43267_20120229_211432_inLine +BABEL_BP_104_43267_20120229_211432_outLine +BABEL_BP_104_43322_20120126_040725_inLine +BABEL_BP_104_43462_20120216_210005_inLine +BABEL_BP_104_43462_20120216_210005_outLine +BABEL_BP_104_43480_20120326_155717_inLine +BABEL_BP_104_43501_20120331_220724_outLine +BABEL_BP_104_43501_20120331_222326_outLine +BABEL_BP_104_43571_20120203_040537_inLine +BABEL_BP_104_43571_20120203_040537_outLine +BABEL_BP_104_43684_20120128_182736_outLine +BABEL_BP_104_43714_20120219_132220_inLine +BABEL_BP_104_43714_20120219_132220_outLine +BABEL_BP_104_43724_20120219_213737_inLine +BABEL_BP_104_43724_20120219_213737_outLine +BABEL_BP_104_43725_20120205_002936_inLine +BABEL_BP_104_43725_20120205_002936_outLine +BABEL_BP_104_43833_20120331_193735_outLine +BABEL_BP_104_43974_20120320_213041_inLine +BABEL_BP_104_43974_20120320_215224_inLine +BABEL_BP_104_44103_20120218_005711_inLine +BABEL_BP_104_44103_20120218_005711_outLine +BABEL_BP_104_44141_20120209_171547_inLine +BABEL_BP_104_44141_20120209_171547_outLine +BABEL_BP_104_44267_20120404_010500_inLine +BABEL_BP_104_44278_20120318_152209_inLine +BABEL_BP_104_44278_20120318_152209_outLine +BABEL_BP_104_44468_20120222_125222_inLine +BABEL_BP_104_44468_20120222_125222_outLine +BABEL_BP_104_44515_20120326_144709_inLine +BABEL_BP_104_44515_20120326_150551_inLine +BABEL_BP_104_44799_20120119_040419_inLine +BABEL_BP_104_44799_20120119_040419_outLine +BABEL_BP_104_44976_20120128_211450_inLine +BABEL_BP_104_44976_20120128_211450_outLine +BABEL_BP_104_45064_20120218_205233_inLine +BABEL_BP_104_45064_20120218_205233_outLine +BABEL_BP_104_45188_20120121_023218_outLine +BABEL_BP_104_45356_20120324_234702_outLine +BABEL_BP_104_45403_20111222_014909_outLine +BABEL_BP_104_45562_20120131_200753_inLine +BABEL_BP_104_45823_20120103_201816_inLine +BABEL_BP_104_45842_20120210_164857_inLine +BABEL_BP_104_45842_20120210_164857_outLine +BABEL_BP_104_45926_20120127_162212_inLine +BABEL_BP_104_45926_20120127_162212_outLine +BABEL_BP_104_45947_20120313_214251_inLine +BABEL_BP_104_46004_20120223_160156_inLine +BABEL_BP_104_46004_20120223_160156_outLine +BABEL_BP_104_46168_20120217_200729_inLine +BABEL_BP_104_46168_20120217_200729_outLine +BABEL_BP_104_46319_20120210_225923_outLine +BABEL_BP_104_46361_20120126_004615_outLine +BABEL_BP_104_46455_20120218_222247_inLine +BABEL_BP_104_46455_20120218_222247_outLine +BABEL_BP_104_46734_20120219_025954_outLine +BABEL_BP_104_46827_20120210_134310_inLine +BABEL_BP_104_46827_20120210_134310_outLine +BABEL_BP_104_46900_20120110_181315_inLine +BABEL_BP_104_46900_20120110_181315_outLine +BABEL_BP_104_46979_20120223_173811_inLine +BABEL_BP_104_46979_20120223_173811_outLine +BABEL_BP_104_47015_20120222_053105_inLine +BABEL_BP_104_47015_20120222_053105_outLine +BABEL_BP_104_47177_20120127_223720_outLine +BABEL_BP_104_47424_20120120_233633_inLine +BABEL_BP_104_47424_20120120_233633_outLine +BABEL_BP_104_47836_20120331_183954_outLine +BABEL_BP_104_47916_20120401_014343_inLine +BABEL_BP_104_47917_20120319_003035_inLine +BABEL_BP_104_47917_20120319_003035_outLine +BABEL_BP_104_48000_20120323_171146_inLine +BABEL_BP_104_48000_20120323_171146_outLine +BABEL_BP_104_48001_20120204_231603_inLine +BABEL_BP_104_48001_20120204_231603_outLine +BABEL_BP_104_48259_20120217_200412_inLine +BABEL_BP_104_48259_20120217_200412_outLine +BABEL_BP_104_48365_20120212_043935_inLine +BABEL_BP_104_48365_20120212_043935_outLine +BABEL_BP_104_48416_20120218_203541_inLine +BABEL_BP_104_48416_20120218_203542_outLine +BABEL_BP_104_48834_20111221_032658_inLine +BABEL_BP_104_48834_20111221_032658_outLine +BABEL_BP_104_48944_20120218_011825_inLine +BABEL_BP_104_48944_20120218_011825_outLine +BABEL_BP_104_48946_20120320_192250_inLine +BABEL_BP_104_48946_20120320_192250_outLine +BABEL_BP_104_49141_20120330_015342_inLine +BABEL_BP_104_49629_20120312_155816_outLine +BABEL_BP_104_50030_20120404_005406_inLine +BABEL_BP_104_50407_20120318_232348_inLine +BABEL_BP_104_50407_20120318_232348_outLine +BABEL_BP_104_50523_20120314_033747_inLine +BABEL_BP_104_50523_20120314_033747_outLine +BABEL_BP_104_50523_20120314_231004_inLine +BABEL_BP_104_50523_20120314_231004_outLine +BABEL_BP_104_50583_20120404_000547_inLine +BABEL_BP_104_50682_20120116_205741_inLine +BABEL_BP_104_50682_20120116_205741_outLine +BABEL_BP_104_50820_20120213_140300_inLine +BABEL_BP_104_50820_20120213_140300_outLine +BABEL_BP_104_50940_20120309_160847_inLine +BABEL_BP_104_50940_20120322_132036_inLine +BABEL_BP_104_51024_20120131_172745_inLine +BABEL_BP_104_51047_20120319_042347_outLine +BABEL_BP_104_51079_20120316_150756_outLine +BABEL_BP_104_51329_20120222_203129_inLine +BABEL_BP_104_51329_20120222_203129_outLine +BABEL_BP_104_51329_20120222_205332_inLine +BABEL_BP_104_51329_20120222_205332_outLine +BABEL_BP_104_51388_20120221_175113_inLine +BABEL_BP_104_51519_20120220_052247_inLine +BABEL_BP_104_51519_20120220_052247_outLine +BABEL_BP_104_51570_20120118_225333_inLine +BABEL_BP_104_51570_20120118_225333_outLine +BABEL_BP_104_51716_20120221_005215_inLine +BABEL_BP_104_51716_20120221_005215_outLine +BABEL_BP_104_52067_20120313_210602_inLine +BABEL_BP_104_52067_20120313_210602_outLine +BABEL_BP_104_52116_20120316_225019_inLine +BABEL_BP_104_52116_20120316_225020_outLine +BABEL_BP_104_52300_20120203_210256_inLine +BABEL_BP_104_52300_20120203_210256_outLine +BABEL_BP_104_52359_20120328_212912_inLine +BABEL_BP_104_52753_20120209_225916_inLine +BABEL_BP_104_52753_20120209_225916_outLine +BABEL_BP_104_52753_20120213_014050_inLine +BABEL_BP_104_52753_20120213_014050_outLine +BABEL_BP_104_52954_20120313_170902_inLine +BABEL_BP_104_52954_20120313_170902_outLine +BABEL_BP_104_53159_20120402_035901_inLine +BABEL_BP_104_53159_20120402_035901_outLine +BABEL_BP_104_53262_20120311_192937_inLine +BABEL_BP_104_53334_20120309_184805_inLine +BABEL_BP_104_53334_20120309_184805_outLine +BABEL_BP_104_53346_20120205_222257_inLine +BABEL_BP_104_53659_20120218_205643_inLine +BABEL_BP_104_53659_20120218_205643_outLine +BABEL_BP_104_53718_20120202_220720_outLine +BABEL_BP_104_53820_20120327_182222_inLine +BABEL_BP_104_53820_20120327_182222_outLine +BABEL_BP_104_54263_20120114_032041_inLine +BABEL_BP_104_54417_20120119_045736_inLine +BABEL_BP_104_54417_20120119_045736_outLine +BABEL_BP_104_54780_20120403_231516_inLine +BABEL_BP_104_54780_20120403_232436_inLine +BABEL_BP_104_54909_20120130_194003_inLine +BABEL_BP_104_54909_20120130_194003_outLine +BABEL_BP_104_54975_20120111_002324_inLine +BABEL_BP_104_54975_20120111_002324_outLine +BABEL_BP_104_55131_20111225_220753_outLine +BABEL_BP_104_55213_20120331_185824_outLine +BABEL_BP_104_55316_20111221_024834_inLine +BABEL_BP_104_55382_20120318_154619_inLine +BABEL_BP_104_55544_20120108_200418_inLine +BABEL_BP_104_55544_20120108_200418_outLine +BABEL_BP_104_55668_20120212_011829_inLine +BABEL_BP_104_55668_20120212_011829_outLine +BABEL_BP_104_55855_20111220_211829_outLine +BABEL_BP_104_56119_20120216_183711_inLine +BABEL_BP_104_56119_20120216_183711_outLine +BABEL_BP_104_56201_20120126_180227_outLine +BABEL_BP_104_56308_20120402_024809_outLine +BABEL_BP_104_56704_20120120_155806_inLine +BABEL_BP_104_56704_20120120_155806_outLine +BABEL_BP_104_56753_20120322_204356_outLine +BABEL_BP_104_56805_20120320_045112_inLine +BABEL_BP_104_56805_20120320_045112_outLine +BABEL_BP_104_57005_20120321_034143_inLine +BABEL_BP_104_57082_20120110_024829_inLine +BABEL_BP_104_57116_20120110_180036_inLine +BABEL_BP_104_57167_20111230_213737_outLine +BABEL_BP_104_57210_20120321_020212_inLine +BABEL_BP_104_57210_20120321_020212_outLine +BABEL_BP_104_57263_20120302_211404_inLine +BABEL_BP_104_57320_20120204_230109_inLine +BABEL_BP_104_57320_20120204_230109_outLine +BABEL_BP_104_57492_20120316_185552_inLine +BABEL_BP_104_57492_20120316_185552_outLine +BABEL_BP_104_57531_20120203_165801_inLine +BABEL_BP_104_57531_20120203_165801_outLine +BABEL_BP_104_57618_20120203_144717_inLine +BABEL_BP_104_57618_20120203_144717_outLine +BABEL_BP_104_57672_20120204_030206_outLine +BABEL_BP_104_58041_20120129_165617_inLine +BABEL_BP_104_58041_20120129_165617_outLine +BABEL_BP_104_58089_20120111_210636_inLine +BABEL_BP_104_58089_20120111_210636_outLine +BABEL_BP_104_58094_20120211_202938_outLine +BABEL_BP_104_58149_20120218_161613_outLine +BABEL_BP_104_58188_20120124_150608_inLine +BABEL_BP_104_58188_20120124_150608_outLine +BABEL_BP_104_58298_20120208_214852_inLine +BABEL_BP_104_58298_20120208_214852_outLine +BABEL_BP_104_58807_20120327_175726_outLine +BABEL_BP_104_58939_20120212_184855_inLine +BABEL_BP_104_58939_20120212_184855_outLine +BABEL_BP_104_58963_20120331_015840_inLine +BABEL_BP_104_58963_20120331_015840_outLine +BABEL_BP_104_59158_20120212_005248_inLine +BABEL_BP_104_59158_20120212_005248_outLine +BABEL_BP_104_59183_20120312_190106_inLine +BABEL_BP_104_59219_20120131_225115_outLine +BABEL_BP_104_59399_20120318_144751_inLine +BABEL_BP_104_59399_20120318_144752_outLine +BABEL_BP_104_59482_20120309_190927_inLine +BABEL_BP_104_59482_20120309_190927_outLine +BABEL_BP_104_59681_20120123_213306_inLine +BABEL_BP_104_59681_20120123_213306_outLine +BABEL_BP_104_59835_20120212_162802_inLine +BABEL_BP_104_59835_20120212_162802_outLine +BABEL_BP_104_60462_20120201_181707_inLine +BABEL_BP_104_60462_20120201_181707_outLine +BABEL_BP_104_60737_20120208_204130_inLine +BABEL_BP_104_60737_20120208_204130_outLine +BABEL_BP_104_60806_20120213_161652_outLine +BABEL_BP_104_60921_20120220_050615_inLine +BABEL_BP_104_60921_20120220_050615_outLine +BABEL_BP_104_61029_20120201_224200_outLine +BABEL_BP_104_61166_20120220_033838_inLine +BABEL_BP_104_61166_20120220_034717_inLine +BABEL_BP_104_61327_20120326_140350_inLine +BABEL_BP_104_61327_20120326_140350_outLine +BABEL_BP_104_61523_20120212_035522_inLine +BABEL_BP_104_61606_20120131_174533_inLine +BABEL_BP_104_61655_20120208_203143_inLine +BABEL_BP_104_61655_20120208_203143_outLine +BABEL_BP_104_61733_20120205_220251_outLine +BABEL_BP_104_61735_20120314_012744_inLine +BABEL_BP_104_61909_20120320_190739_inLine +BABEL_BP_104_61909_20120320_190739_outLine +BABEL_BP_104_62182_20111226_205547_inLine +BABEL_BP_104_62182_20111226_205547_outLine +BABEL_BP_104_62388_20120204_031740_inLine +BABEL_BP_104_62388_20120204_031740_outLine +BABEL_BP_104_62815_20120318_025812_outLine +BABEL_BP_104_62816_20120312_153937_outLine +BABEL_BP_104_62978_20120318_211036_inLine +BABEL_BP_104_62978_20120318_211036_outLine +BABEL_BP_104_63111_20120204_232445_outLine +BABEL_BP_104_63215_20120213_040737_inLine +BABEL_BP_104_63215_20120213_040737_outLine +BABEL_BP_104_63220_20120131_155658_inLine +BABEL_BP_104_63220_20120131_155658_outLine +BABEL_BP_104_63390_20120123_212718_outLine +BABEL_BP_104_63397_20120217_194928_inLine +BABEL_BP_104_63397_20120217_194928_outLine +BABEL_BP_104_63603_20120128_213000_outLine +BABEL_BP_104_63784_20120216_015608_inLine +BABEL_BP_104_63784_20120216_015608_outLine +BABEL_BP_104_63929_20120319_155419_inLine +BABEL_BP_104_63929_20120319_155419_outLine +BABEL_BP_104_63934_20120318_201706_inLine +BABEL_BP_104_63934_20120318_201706_outLine +BABEL_BP_104_64055_20120111_034236_outLine +BABEL_BP_104_64297_20120205_031234_inLine +BABEL_BP_104_64297_20120205_031234_outLine +BABEL_BP_104_64646_20120319_163845_outLine +BABEL_BP_104_64695_20120128_014256_inLine +BABEL_BP_104_64695_20120128_014256_outLine +BABEL_BP_104_64820_20120111_032311_inLine +BABEL_BP_104_64820_20120111_032311_outLine +BABEL_BP_104_64905_20120206_221140_inLine +BABEL_BP_104_64905_20120206_221140_outLine +BABEL_BP_104_64990_20120119_173958_inLine +BABEL_BP_104_64990_20120119_173958_outLine +BABEL_BP_104_65211_20120119_015405_inLine +BABEL_BP_104_65211_20120119_015405_outLine +BABEL_BP_104_65341_20120220_222356_inLine +BABEL_BP_104_65341_20120220_222356_outLine +BABEL_BP_104_65357_20120309_190849_inLine +BABEL_BP_104_65357_20120309_190849_outLine +BABEL_BP_104_65590_20120109_001414_inLine +BABEL_BP_104_65590_20120109_001414_outLine +BABEL_BP_104_65741_20120218_010022_inLine +BABEL_BP_104_65788_20120131_172922_outLine +BABEL_BP_104_65954_20120128_163139_inLine +BABEL_BP_104_65954_20120128_163139_outLine +BABEL_BP_104_65974_20120316_195524_inLine +BABEL_BP_104_65974_20120316_195524_outLine +BABEL_BP_104_66351_20120317_181035_inLine +BABEL_BP_104_66351_20120317_181035_outLine +BABEL_BP_104_66643_20120316_004947_inLine +BABEL_BP_104_66643_20120316_004947_outLine +BABEL_BP_104_66784_20111225_190506_outLine +BABEL_BP_104_66879_20120213_004555_inLine +BABEL_BP_104_66879_20120213_004555_outLine +BABEL_BP_104_67106_20120208_201829_inLine +BABEL_BP_104_67106_20120208_201829_outLine +BABEL_BP_104_67374_20120210_034059_inLine +BABEL_BP_104_67374_20120210_034100_outLine +BABEL_BP_104_67423_20120205_220658_outLine +BABEL_BP_104_67534_20120204_181436_inLine +BABEL_BP_104_67534_20120204_181436_outLine +BABEL_BP_104_67655_20120218_035728_outLine +BABEL_BP_104_67684_20120316_135144_inLine +BABEL_BP_104_67684_20120316_135144_outLine +BABEL_BP_104_67685_20120217_235729_inLine +BABEL_BP_104_67685_20120217_235729_outLine +BABEL_BP_104_67718_20120131_164436_inLine +BABEL_BP_104_67718_20120131_164436_outLine +BABEL_BP_104_67928_20120109_174230_inLine +BABEL_BP_104_67928_20120109_174230_outLine +BABEL_BP_104_68077_20120219_155535_outLine +BABEL_BP_104_68111_20120321_185146_outLine +BABEL_BP_104_68144_20120210_223106_outLine +BABEL_BP_104_68189_20120128_005011_inLine +BABEL_BP_104_68189_20120128_005011_outLine +BABEL_BP_104_68209_20120219_045221_inLine +BABEL_BP_104_68926_20120229_145934_inLine +BABEL_BP_104_68926_20120229_145934_outLine +BABEL_BP_104_68997_20120126_010839_inLine +BABEL_BP_104_69127_20120402_221743_outLine +BABEL_BP_104_69398_20111219_215754_inLine +BABEL_BP_104_69638_20120205_022624_inLine +BABEL_BP_104_69638_20120205_022624_outLine +BABEL_BP_104_69656_20120129_050158_inLine +BABEL_BP_104_69656_20120129_050158_outLine +BABEL_BP_104_69656_20120129_051238_inLine +BABEL_BP_104_69656_20120129_051238_outLine +BABEL_BP_104_69771_20120220_034015_inLine +BABEL_BP_104_69771_20120220_034015_outLine +BABEL_BP_104_70207_20120209_001133_inLine +BABEL_BP_104_70207_20120209_001133_outLine +BABEL_BP_104_70333_20120210_033437_outLine +BABEL_BP_104_70528_20120128_013553_inLine +BABEL_BP_104_70528_20120128_013553_outLine +BABEL_BP_104_70762_20120213_175054_outLine +BABEL_BP_104_70858_20120204_012205_inLine +BABEL_BP_104_70897_20120315_000410_inLine +BABEL_BP_104_70897_20120315_000410_outLine +BABEL_BP_104_70897_20120315_013535_inLine +BABEL_BP_104_70897_20120315_013535_outLine +BABEL_BP_104_71204_20120315_040441_inLine +BABEL_BP_104_71324_20111220_215105_outLine +BABEL_BP_104_71786_20120219_212052_outLine +BABEL_BP_104_71948_20120210_012347_inLine +BABEL_BP_104_71970_20120310_195048_inLine +BABEL_BP_104_72179_20120129_175206_inLine +BABEL_BP_104_72179_20120129_175206_outLine +BABEL_BP_104_72480_20120211_223904_inLine +BABEL_BP_104_72480_20120211_224426_inLine +BABEL_BP_104_72693_20120209_005646_inLine +BABEL_BP_104_72693_20120209_005646_outLine +BABEL_BP_104_72709_20120209_034548_inLine +BABEL_BP_104_72709_20120209_034548_outLine +BABEL_BP_104_72874_20120213_191257_inLine +BABEL_BP_104_72874_20120213_191257_outLine +BABEL_BP_104_72910_20120310_185203_outLine +BABEL_BP_104_73188_20120128_003921_inLine +BABEL_BP_104_73188_20120128_003921_outLine +BABEL_BP_104_73199_20120119_195108_outLine +BABEL_BP_104_73403_20120320_183508_outLine +BABEL_BP_104_73403_20120320_184757_outLine +BABEL_BP_104_73450_20120206_024342_inLine +BABEL_BP_104_73450_20120206_024342_outLine +BABEL_BP_104_73607_20120203_163328_inLine +BABEL_BP_104_73607_20120203_163328_outLine +BABEL_BP_104_73925_20120123_233630_inLine +BABEL_BP_104_73925_20120123_233630_outLine +BABEL_BP_104_74261_20120331_191708_outLine +BABEL_BP_104_74334_20111230_035012_inLine +BABEL_BP_104_74940_20120228_225523_inLine +BABEL_BP_104_74940_20120228_225523_outLine +BABEL_BP_104_75390_20120218_133736_inLine +BABEL_BP_104_75390_20120218_133736_outLine +BABEL_BP_104_75402_20120319_160944_inLine +BABEL_BP_104_75724_20120207_172820_outLine +BABEL_BP_104_75822_20120205_214035_inLine +BABEL_BP_104_75895_20120206_024214_inLine +BABEL_BP_104_75895_20120206_024214_outLine +BABEL_BP_104_76375_20120226_014726_inLine +BABEL_BP_104_76375_20120226_014726_outLine +BABEL_BP_104_76573_20120213_150121_inLine +BABEL_BP_104_76573_20120213_150121_outLine +BABEL_BP_104_76714_20120313_220017_inLine +BABEL_BP_104_76714_20120313_220017_outLine +BABEL_BP_104_76738_20120210_010510_inLine +BABEL_BP_104_76742_20111215_203118_outLine +BABEL_BP_104_76832_20120210_030141_outLine +BABEL_BP_104_77097_20120214_235954_inLine +BABEL_BP_104_77097_20120214_235954_outLine +BABEL_BP_104_77256_20120309_064948_inLine +BABEL_BP_104_77294_20120318_224422_inLine +BABEL_BP_104_77294_20120318_224422_outLine +BABEL_BP_104_77537_20120206_034628_outLine +BABEL_BP_104_77693_20111228_014255_outLine +BABEL_BP_104_77711_20120229_163050_inLine +BABEL_BP_104_77711_20120229_163050_outLine +BABEL_BP_104_77711_20120229_164115_inLine +BABEL_BP_104_77711_20120229_164115_outLine +BABEL_BP_104_78225_20120126_170942_outLine +BABEL_BP_104_78254_20120209_222912_inLine +BABEL_BP_104_78254_20120209_222912_outLine +BABEL_BP_104_78254_20120209_234516_inLine +BABEL_BP_104_78254_20120209_234516_outLine +BABEL_BP_104_78367_20120105_012610_inLine +BABEL_BP_104_78367_20120105_012610_outLine +BABEL_BP_104_78443_20120128_211331_inLine +BABEL_BP_104_78443_20120128_211331_outLine +BABEL_BP_104_78452_20120316_005121_inLine +BABEL_BP_104_78452_20120316_005121_outLine +BABEL_BP_104_78452_20120316_005946_inLine +BABEL_BP_104_78452_20120316_005946_outLine +BABEL_BP_104_78462_20120112_181459_inLine +BABEL_BP_104_78737_20120316_173217_inLine +BABEL_BP_104_78737_20120316_173217_outLine +BABEL_BP_104_78978_20120322_041159_inLine +BABEL_BP_104_78978_20120322_042345_inLine +BABEL_BP_104_79030_20120222_170416_inLine +BABEL_BP_104_79030_20120222_170416_outLine +BABEL_BP_104_79030_20120222_211653_inLine +BABEL_BP_104_79030_20120222_211653_outLine +BABEL_BP_104_79120_20120127_021912_inLine +BABEL_BP_104_79120_20120127_021912_outLine +BABEL_BP_104_79120_20120127_030132_inLine +BABEL_BP_104_79120_20120127_030132_outLine +BABEL_BP_104_79127_20120127_171446_outLine +BABEL_BP_104_79156_20120126_191440_outLine +BABEL_BP_104_79185_20120126_025253_inLine +BABEL_BP_104_79185_20120126_025253_outLine +BABEL_BP_104_79191_20120125_210322_inLine +BABEL_BP_104_79191_20120125_210322_outLine +BABEL_BP_104_79244_20111230_180239_inLine +BABEL_BP_104_79378_20120302_011529_outLine +BABEL_BP_104_79387_20120104_201110_inLine +BABEL_BP_104_79387_20120104_201110_outLine +BABEL_BP_104_79679_20120215_053807_inLine +BABEL_BP_104_79679_20120215_053807_outLine +BABEL_BP_104_79753_20120203_173233_inLine +BABEL_BP_104_79753_20120203_173233_outLine +BABEL_BP_104_79888_20120318_024215_outLine +BABEL_BP_104_80105_20120205_233041_inLine +BABEL_BP_104_80105_20120205_233041_outLine +BABEL_BP_104_80134_20120313_215613_inLine +BABEL_BP_104_80134_20120313_215613_outLine +BABEL_BP_104_80226_20120210_182546_inLine +BABEL_BP_104_80226_20120210_182546_outLine +BABEL_BP_104_80284_20120109_235306_inLine +BABEL_BP_104_80284_20120109_235306_outLine +BABEL_BP_104_80424_20120207_221904_inLine +BABEL_BP_104_80424_20120207_221904_outLine +BABEL_BP_104_80559_20120319_152020_outLine +BABEL_BP_104_80616_20120223_193040_inLine +BABEL_BP_104_80616_20120223_193040_outLine +BABEL_BP_104_80679_20120331_033903_outLine +BABEL_BP_104_80815_20120322_001246_outLine +BABEL_BP_104_80867_20120309_034536_inLine +BABEL_BP_104_80867_20120309_034536_outLine +BABEL_BP_104_80929_20120310_194854_inLine +BABEL_BP_104_80929_20120310_194854_outLine +BABEL_BP_104_81726_20120229_154500_inLine +BABEL_BP_104_81726_20120229_154500_outLine +BABEL_BP_104_81773_20120404_000845_outLine +BABEL_BP_104_81923_20120128_004752_inLine +BABEL_BP_104_81923_20120128_004752_outLine +BABEL_BP_104_81996_20120128_185859_outLine +BABEL_BP_104_82068_20120320_233307_inLine +BABEL_BP_104_82068_20120320_234626_inLine +BABEL_BP_104_82149_20120112_163113_inLine +BABEL_BP_104_82499_20120215_024134_inLine +BABEL_BP_104_82499_20120215_024134_outLine +BABEL_BP_104_82526_20120201_124800_inLine +BABEL_BP_104_82526_20120201_124800_outLine +BABEL_BP_104_82583_20120211_041829_outLine +BABEL_BP_104_82595_20120324_154901_outLine +BABEL_BP_104_82677_20120206_173830_outLine +BABEL_BP_104_82838_20120313_152742_inLine +BABEL_BP_104_82838_20120313_152742_outLine +BABEL_BP_104_82838_20120313_154639_inLine +BABEL_BP_104_82838_20120313_154639_outLine +BABEL_BP_104_82849_20120212_185110_inLine +BABEL_BP_104_82849_20120212_185110_outLine +BABEL_BP_104_82964_20120218_181351_outLine +BABEL_BP_104_83050_20120114_231129_inLine +BABEL_BP_104_83050_20120114_231129_outLine +BABEL_BP_104_83072_20120213_170201_inLine +BABEL_BP_104_83072_20120213_170201_outLine +BABEL_BP_104_83112_20120204_161112_inLine +BABEL_BP_104_83112_20120204_161112_outLine +BABEL_BP_104_83747_20120120_153904_outLine +BABEL_BP_104_83835_20120321_145755_inLine +BABEL_BP_104_83835_20120321_145755_outLine +BABEL_BP_104_83866_20120206_040504_inLine +BABEL_BP_104_83866_20120206_040505_outLine +BABEL_BP_104_83941_20120119_030904_inLine +BABEL_BP_104_83941_20120119_030904_outLine +BABEL_BP_104_84132_20120312_054349_outLine +BABEL_BP_104_84315_20120318_184410_outLine +BABEL_BP_104_84360_20111228_033339_inLine +BABEL_BP_104_84360_20111228_033339_outLine +BABEL_BP_104_84854_20120129_233819_inLine +BABEL_BP_104_84854_20120129_233819_outLine +BABEL_BP_104_84885_20120217_215436_inLine +BABEL_BP_104_84885_20120217_215436_outLine +BABEL_BP_104_84950_20120130_131546_inLine +BABEL_BP_104_84950_20120130_131546_outLine +BABEL_BP_104_84985_20120319_172452_outLine +BABEL_BP_104_84985_20120319_173047_outLine +BABEL_BP_104_85147_20120129_180533_inLine +BABEL_BP_104_85147_20120129_180533_outLine +BABEL_BP_104_85272_20120127_032845_inLine +BABEL_BP_104_85272_20120127_032845_outLine +BABEL_BP_104_85388_20120128_190259_inLine +BABEL_BP_104_85388_20120128_190259_outLine +BABEL_BP_104_85558_20120413_044033_inLine +BABEL_BP_104_85579_20120205_170917_inLine +BABEL_BP_104_85579_20120205_170917_outLine +BABEL_BP_104_85597_20120320_231227_inLine +BABEL_BP_104_86528_20120128_211228_inLine +BABEL_BP_104_86537_20120128_022125_inLine +BABEL_BP_104_86537_20120128_023523_inLine +BABEL_BP_104_87032_20120111_203623_inLine +BABEL_BP_104_87032_20120111_203623_outLine +BABEL_BP_104_87067_20120324_182930_inLine +BABEL_BP_104_87067_20120324_182930_outLine +BABEL_BP_104_87422_20120212_021635_outLine +BABEL_BP_104_87453_20120131_210831_inLine +BABEL_BP_104_87453_20120131_210831_outLine +BABEL_BP_104_87517_20120207_200619_inLine +BABEL_BP_104_87517_20120207_200619_outLine +BABEL_BP_104_87970_20120221_172638_inLine +BABEL_BP_104_87970_20120221_172638_outLine +BABEL_BP_104_88006_20120207_214550_inLine +BABEL_BP_104_88006_20120207_214550_outLine +BABEL_BP_104_88070_20120318_164350_outLine +BABEL_BP_104_88434_20120319_170128_inLine +BABEL_BP_104_88434_20120319_170128_outLine +BABEL_BP_104_88604_20120111_001257_inLine +BABEL_BP_104_88604_20120111_001257_outLine +BABEL_BP_104_88921_20120205_215225_inLine +BABEL_BP_104_88921_20120205_215225_outLine +BABEL_BP_104_89036_20120327_211455_inLine +BABEL_BP_104_89053_20120129_232038_inLine +BABEL_BP_104_89053_20120129_232038_outLine +BABEL_BP_104_89402_20120205_045136_outLine +BABEL_BP_104_89925_20120202_000208_inLine +BABEL_BP_104_89925_20120202_000208_outLine +BABEL_BP_104_89952_20120131_212850_inLine +BABEL_BP_104_89952_20120131_212850_outLine +BABEL_BP_104_90022_20120207_051223_inLine +BABEL_BP_104_90022_20120207_051223_outLine +BABEL_BP_104_90263_20120205_044035_inLine +BABEL_BP_104_90263_20120205_044035_outLine +BABEL_BP_104_90310_20120129_024342_outLine +BABEL_BP_104_91161_20120311_032449_inLine +BABEL_BP_104_91161_20120311_032449_outLine +BABEL_BP_104_91495_20120210_163107_inLine +BABEL_BP_104_91495_20120210_163107_outLine +BABEL_BP_104_91875_20120210_004013_inLine +BABEL_BP_104_91875_20120210_004013_outLine +BABEL_BP_104_91880_20120226_221957_inLine +BABEL_BP_104_91880_20120226_221957_outLine +BABEL_BP_104_92000_20120206_011350_inLine +BABEL_BP_104_92000_20120206_011350_outLine +BABEL_BP_104_92310_20120206_033517_inLine +BABEL_BP_104_92310_20120206_033517_outLine +BABEL_BP_104_92342_20120320_041334_inLine +BABEL_BP_104_92342_20120320_041334_outLine +BABEL_BP_104_92636_20120128_193247_inLine +BABEL_BP_104_92636_20120128_193247_outLine +BABEL_BP_104_92679_20111226_171331_outLine +BABEL_BP_104_92722_20120209_235113_outLine +BABEL_BP_104_92793_20120118_235358_inLine +BABEL_BP_104_93129_20120218_130813_inLine +BABEL_BP_104_93129_20120218_130813_outLine +BABEL_BP_104_93227_20120216_190245_inLine +BABEL_BP_104_93227_20120216_190245_outLine +BABEL_BP_104_93300_20120221_135558_inLine +BABEL_BP_104_93300_20120221_135558_outLine +BABEL_BP_104_93358_20120321_002737_inLine +BABEL_BP_104_93358_20120321_003427_inLine +BABEL_BP_104_93713_20120121_004435_inLine +BABEL_BP_104_93730_20120220_052912_outLine +BABEL_BP_104_93730_20120220_054726_outLine +BABEL_BP_104_93844_20120316_014157_inLine +BABEL_BP_104_93844_20120327_194612_inLine +BABEL_BP_104_93976_20120206_181449_outLine +BABEL_BP_104_94051_20120309_174814_outLine +BABEL_BP_104_94533_20120128_020431_inLine +BABEL_BP_104_94533_20120128_020431_outLine +BABEL_BP_104_94572_20120321_022026_inLine +BABEL_BP_104_94683_20120126_024342_inLine +BABEL_BP_104_94775_20120321_230436_inLine +BABEL_BP_104_94775_20120321_230436_outLine +BABEL_BP_104_94793_20120204_043218_inLine +BABEL_BP_104_94793_20120204_043218_outLine +BABEL_BP_104_94951_20120110_231948_inLine +BABEL_BP_104_94951_20120110_231948_outLine +BABEL_BP_104_95202_20120309_185925_inLine +BABEL_BP_104_95202_20120309_185925_outLine +BABEL_BP_104_95349_20111229_162101_inLine +BABEL_BP_104_95360_20120205_133312_inLine +BABEL_BP_104_95360_20120205_133312_outLine +BABEL_BP_104_95465_20120223_040653_inLine +BABEL_BP_104_95465_20120223_040653_outLine +BABEL_BP_104_95904_20120218_183758_inLine +BABEL_BP_104_95904_20120218_183758_outLine +BABEL_BP_104_96343_20120130_143444_outLine +BABEL_BP_104_96621_20120127_235745_inLine +BABEL_BP_104_96621_20120127_235745_outLine +BABEL_BP_104_96690_20120321_005155_inLine +BABEL_BP_104_96811_20120217_021933_inLine +BABEL_BP_104_96811_20120217_021933_outLine +BABEL_BP_104_96956_20120209_025537_inLine +BABEL_BP_104_96956_20120209_025537_outLine +BABEL_BP_104_97050_20120314_144713_outLine +BABEL_BP_104_97258_20120129_060817_inLine +BABEL_BP_104_97258_20120129_060817_outLine +BABEL_BP_104_97335_20120131_013929_inLine +BABEL_BP_104_97335_20120131_013929_outLine +BABEL_BP_104_97492_20120117_173450_inLine +BABEL_BP_104_97492_20120117_173450_outLine +BABEL_BP_104_97803_20120116_184019_inLine +BABEL_BP_104_97803_20120116_184019_outLine +BABEL_BP_104_97971_20120317_004835_inLine +BABEL_BP_104_97971_20120317_004835_outLine +BABEL_BP_104_98067_20120221_131601_inLine +BABEL_BP_104_98067_20120221_131601_outLine +BABEL_BP_104_98110_20120218_193615_outLine +BABEL_BP_104_98331_20120223_014233_inLine +BABEL_BP_104_98446_20120312_135630_inLine +BABEL_BP_104_98503_20120402_230340_inLine +BABEL_BP_104_98503_20120403_025554_inLine +BABEL_BP_104_98588_20120119_011655_inLine +BABEL_BP_104_98588_20120119_011655_outLine +BABEL_BP_104_98942_20120205_224026_outLine +BABEL_BP_104_98987_20120220_184452_inLine +BABEL_BP_104_98987_20120220_184452_outLine +BABEL_BP_104_98993_20120516_040504_inLine +BABEL_BP_104_98993_20120516_040504_outLine +BABEL_BP_104_99093_20120212_062850_inLine +BABEL_BP_104_99093_20120212_062850_outLine +BABEL_BP_104_99354_20120203_152733_inLine +BABEL_BP_104_99354_20120203_152733_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/dev.list b/egs/babel/s5d/conf/lists/105-turkish/dev.list new file mode 100644 index 00000000000..405c3a7662b --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/dev.list @@ -0,0 +1,127 @@ +BABEL_BP_105_11521_20120602_034839_inLine +BABEL_BP_105_11521_20120602_034839_outLine +BABEL_BP_105_12844_20120208_220114_inLine +BABEL_BP_105_12844_20120208_220114_outLine +BABEL_BP_105_12963_20120122_062911_inLine +BABEL_BP_105_12963_20120122_062911_outLine +BABEL_BP_105_13795_20120125_230526_inLine +BABEL_BP_105_13795_20120125_230526_outLine +BABEL_BP_105_13795_20120125_232747_inLine +BABEL_BP_105_13795_20120125_232747_outLine +BABEL_BP_105_15146_20120106_223718_inLine +BABEL_BP_105_15146_20120106_223719_outLine +BABEL_BP_105_15916_20120201_072825_inLine +BABEL_BP_105_15916_20120201_072825_outLine +BABEL_BP_105_16185_20120609_224507_inLine +BABEL_BP_105_16185_20120609_224507_outLine +BABEL_BP_105_19861_20120530_035456_inLine +BABEL_BP_105_19861_20120530_035456_outLine +BABEL_BP_105_20213_20120123_011920_inLine +BABEL_BP_105_20213_20120123_011920_outLine +BABEL_BP_105_21541_20120518_012528_inLine +BABEL_BP_105_22973_20120502_204152_inLine +BABEL_BP_105_22973_20120502_204152_outLine +BABEL_BP_105_26275_20120620_014345_inLine +BABEL_BP_105_26275_20120620_014345_outLine +BABEL_BP_105_29545_20120621_041202_inLine +BABEL_BP_105_29545_20120621_041203_outLine +BABEL_BP_105_31256_20120531_015506_inLine +BABEL_BP_105_31256_20120531_015506_outLine +BABEL_BP_105_31345_20120515_214849_inLine +BABEL_BP_105_31345_20120515_214849_outLine +BABEL_BP_105_32236_20120516_221311_inLine +BABEL_BP_105_32236_20120516_221311_outLine +BABEL_BP_105_35175_20120125_082450_inLine +BABEL_BP_105_35175_20120125_082450_outLine +BABEL_BP_105_39774_20120623_021020_inLine +BABEL_BP_105_39774_20120623_021020_outLine +BABEL_BP_105_39774_20120623_021946_inLine +BABEL_BP_105_39774_20120623_021946_outLine +BABEL_BP_105_39963_20120209_083935_inLine +BABEL_BP_105_39963_20120209_083935_outLine +BABEL_BP_105_40477_20120208_010255_inLine +BABEL_BP_105_40477_20120208_010256_outLine +BABEL_BP_105_40759_20120316_014011_inLine +BABEL_BP_105_40759_20120316_014011_outLine +BABEL_BP_105_42212_20120706_194059_inLine +BABEL_BP_105_42212_20120706_194059_outLine +BABEL_BP_105_42229_20120115_063922_inLine +BABEL_BP_105_42229_20120115_063922_outLine +BABEL_BP_105_44023_20120530_220359_inLine +BABEL_BP_105_44023_20120530_220359_outLine +BABEL_BP_105_44117_20120621_032955_inLine +BABEL_BP_105_44117_20120621_032956_outLine +BABEL_BP_105_48536_20120208_212737_inLine +BABEL_BP_105_48536_20120208_212737_outLine +BABEL_BP_105_49192_20120206_012605_inLine +BABEL_BP_105_49192_20120206_012605_outLine +BABEL_BP_105_54339_20120125_230415_inLine +BABEL_BP_105_54339_20120125_230415_outLine +BABEL_BP_105_55786_20120205_051854_inLine +BABEL_BP_105_55786_20120205_051854_outLine +BABEL_BP_105_55823_20120512_202135_inLine +BABEL_BP_105_55823_20120512_202135_outLine +BABEL_BP_105_56342_20120127_023015_inLine +BABEL_BP_105_56342_20120127_023015_outLine +BABEL_BP_105_60064_20120606_000812_inLine +BABEL_BP_105_60064_20120606_000812_outLine +BABEL_BP_105_60881_20120207_064233_inLine +BABEL_BP_105_60881_20120207_064233_outLine +BABEL_BP_105_66330_20120209_005003_inLine +BABEL_BP_105_66330_20120209_005003_outLine +BABEL_BP_105_66441_20120207_050412_inLine +BABEL_BP_105_66441_20120207_050412_outLine +BABEL_BP_105_66790_20120128_220452_inLine +BABEL_BP_105_66790_20120128_220452_outLine +BABEL_BP_105_66883_20120207_051718_inLine +BABEL_BP_105_66883_20120207_051718_outLine +BABEL_BP_105_67555_20120207_212802_inLine +BABEL_BP_105_67555_20120207_212802_outLine +BABEL_BP_105_67733_20120207_234950_inLine +BABEL_BP_105_67733_20120207_234950_outLine +BABEL_BP_105_69052_20120124_062415_inLine +BABEL_BP_105_69052_20120124_062415_outLine +BABEL_BP_105_75151_20120602_061054_inLine +BABEL_BP_105_75151_20120602_061054_outLine +BABEL_BP_105_76372_20120709_015738_inLine +BABEL_BP_105_76372_20120709_015738_outLine +BABEL_BP_105_76716_20120606_195423_inLine +BABEL_BP_105_76763_20120107_022524_inLine +BABEL_BP_105_76763_20120107_022524_outLine +BABEL_BP_105_78487_20120318_080534_inLine +BABEL_BP_105_80856_20120205_231607_inLine +BABEL_BP_105_80856_20120205_231607_outLine +BABEL_BP_105_84394_20120426_185010_inLine +BABEL_BP_105_84394_20120426_185010_outLine +BABEL_BP_105_84608_20120423_050353_inLine +BABEL_BP_105_84608_20120423_050353_outLine +BABEL_BP_105_87077_20120516_000252_inLine +BABEL_BP_105_87077_20120516_000252_outLine +BABEL_BP_105_87806_20120201_235442_inLine +BABEL_BP_105_87806_20120201_235442_outLine +BABEL_BP_105_88385_20120430_004520_inLine +BABEL_BP_105_88385_20120430_004520_outLine +BABEL_BP_105_90393_20120125_034434_inLine +BABEL_BP_105_90393_20120125_034434_outLine +BABEL_BP_105_91136_20120422_062317_inLine +BABEL_BP_105_91136_20120422_062317_outLine +BABEL_BP_105_91330_20120209_002721_inLine +BABEL_BP_105_91330_20120209_002721_outLine +BABEL_BP_105_91865_20120528_230057_inLine +BABEL_BP_105_91865_20120528_230057_outLine +BABEL_BP_105_92386_20120517_234302_inLine +BABEL_BP_105_92386_20120517_234302_outLine +BABEL_BP_105_92591_20120129_012358_inLine +BABEL_BP_105_92591_20120129_012358_outLine +BABEL_BP_105_93192_20120128_005138_inLine +BABEL_BP_105_93192_20120128_005138_outLine +BABEL_BP_105_93454_20120128_235224_inLine +BABEL_BP_105_93454_20120128_235224_outLine +BABEL_BP_105_93509_20120127_075513_inLine +BABEL_BP_105_93509_20120127_075513_outLine +BABEL_BP_105_95350_20120127_234045_inLine +BABEL_BP_105_95350_20120127_234045_outLine +BABEL_BP_105_95952_20120512_225006_inLine +BABEL_BP_105_95952_20120512_225006_outLine +BABEL_BP_105_95952_20120512_230254_inLine +BABEL_BP_105_95952_20120512_230254_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/eval.list b/egs/babel/s5d/conf/lists/105-turkish/eval.list new file mode 100644 index 00000000000..47736cf7f28 --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/eval.list @@ -0,0 +1,194 @@ +BABEL_BP_105_11158_20120609_061134_inLine +BABEL_BP_105_11158_20120609_061134_outLine +BABEL_BP_105_11478_20120128_081119_inLine +BABEL_BP_105_11478_20120128_081119_outLine +BABEL_BP_105_12535_20120528_235510_inLine +BABEL_BP_105_12535_20120528_235510_outLine +BABEL_BP_105_12667_20120502_025008_inLine +BABEL_BP_105_12667_20120502_025008_outLine +BABEL_BP_105_15859_20120313_033308_inLine +BABEL_BP_105_15859_20120313_033308_outLine +BABEL_BP_105_19153_20120125_060542_inLine +BABEL_BP_105_19153_20120125_060542_outLine +BABEL_BP_105_20332_20120615_235730_inLine +BABEL_BP_105_20332_20120615_235730_outLine +BABEL_BP_105_22229_20120106_234925_inLine +BABEL_BP_105_22229_20120106_234925_outLine +BABEL_BP_105_22229_20120107_000755_inLine +BABEL_BP_105_22229_20120107_000755_outLine +BABEL_BP_105_22566_20120621_011722_inLine +BABEL_BP_105_22566_20120621_011722_outLine +BABEL_BP_105_22696_20120529_224618_inLine +BABEL_BP_105_22696_20120529_224618_outLine +BABEL_BP_105_23714_20120531_230422_inLine +BABEL_BP_105_23714_20120531_230422_outLine +BABEL_BP_105_24642_20120525_033813_inLine +BABEL_BP_105_24642_20120525_033814_outLine +BABEL_BP_105_24661_20120615_203702_inLine +BABEL_BP_105_24661_20120615_203702_outLine +BABEL_BP_105_27178_20120816_063733_inLine +BABEL_BP_105_27178_20120816_063733_outLine +BABEL_BP_105_27645_20120501_182457_inLine +BABEL_BP_105_27645_20120501_182457_outLine +BABEL_BP_105_27825_20120205_013051_inLine +BABEL_BP_105_27825_20120205_013051_outLine +BABEL_BP_105_27916_20120530_234813_inLine +BABEL_BP_105_27916_20120530_234813_outLine +BABEL_BP_105_28768_20120531_033622_inLine +BABEL_BP_105_28768_20120531_033622_outLine +BABEL_BP_105_28768_20120531_035033_inLine +BABEL_BP_105_28768_20120531_035033_outLine +BABEL_BP_105_29512_20120129_020437_inLine +BABEL_BP_105_29512_20120129_020437_outLine +BABEL_BP_105_30227_20120519_234337_inLine +BABEL_BP_105_30227_20120519_234337_outLine +BABEL_BP_105_31393_20120814_054655_inLine +BABEL_BP_105_31393_20120814_054658_outLine +BABEL_BP_105_33969_20120430_013648_inLine +BABEL_BP_105_33969_20120430_013648_outLine +BABEL_BP_105_34370_20120209_233721_inLine +BABEL_BP_105_34370_20120209_233721_outLine +BABEL_BP_105_38464_20120531_202824_inLine +BABEL_BP_105_38464_20120531_202824_outLine +BABEL_BP_105_38985_20120123_064936_inLine +BABEL_BP_105_38985_20120123_064936_outLine +BABEL_BP_105_40385_20120626_182511_inLine +BABEL_BP_105_40385_20120626_182511_outLine +BABEL_BP_105_40439_20120603_221429_inLine +BABEL_BP_105_40439_20120603_221429_outLine +BABEL_BP_105_41513_20120127_091800_inLine +BABEL_BP_105_41513_20120127_091800_outLine +BABEL_BP_105_41541_20120610_220640_inLine +BABEL_BP_105_41989_20120828_232255_inLine +BABEL_BP_105_41989_20120828_232255_outLine +BABEL_BP_105_42749_20120504_192522_inLine +BABEL_BP_105_42749_20120504_192522_outLine +BABEL_BP_105_42768_20120517_203439_inLine +BABEL_BP_105_42768_20120517_203439_outLine +BABEL_BP_105_42768_20120517_204350_inLine +BABEL_BP_105_42768_20120517_204350_outLine +BABEL_BP_105_44038_20120628_032429_inLine +BABEL_BP_105_44038_20120628_032429_outLine +BABEL_BP_105_45106_20120106_231201_inLine +BABEL_BP_105_45106_20120106_231201_outLine +BABEL_BP_105_45145_20120207_231842_inLine +BABEL_BP_105_45145_20120207_231842_outLine +BABEL_BP_105_45677_20120527_022244_inLine +BABEL_BP_105_45677_20120527_022244_outLine +BABEL_BP_105_45786_20120518_034117_inLine +BABEL_BP_105_45786_20120518_034118_outLine +BABEL_BP_105_45893_20120131_060048_inLine +BABEL_BP_105_45893_20120131_060048_outLine +BABEL_BP_105_46427_20120208_230929_inLine +BABEL_BP_105_46427_20120208_230929_outLine +BABEL_BP_105_46813_20120521_040045_inLine +BABEL_BP_105_46813_20120521_040046_outLine +BABEL_BP_105_47263_20120603_001729_inLine +BABEL_BP_105_47263_20120603_001729_outLine +BABEL_BP_105_48191_20120616_010543_inLine +BABEL_BP_105_48191_20120616_010543_outLine +BABEL_BP_105_49714_20120529_004423_inLine +BABEL_BP_105_49714_20120529_004423_outLine +BABEL_BP_105_50915_20120606_030647_inLine +BABEL_BP_105_50915_20120606_030647_outLine +BABEL_BP_105_51042_20120609_053754_inLine +BABEL_BP_105_51042_20120609_053754_outLine +BABEL_BP_105_51374_20120808_021113_inLine +BABEL_BP_105_51374_20120808_021113_outLine +BABEL_BP_105_55450_20120201_022826_inLine +BABEL_BP_105_55450_20120201_022826_outLine +BABEL_BP_105_55777_20120529_060606_inLine +BABEL_BP_105_55777_20120529_060606_outLine +BABEL_BP_105_55777_20120529_065353_inLine +BABEL_BP_105_55777_20120529_065353_outLine +BABEL_BP_105_56812_20120601_070152_inLine +BABEL_BP_105_56812_20120601_070152_outLine +BABEL_BP_105_60848_20120627_050640_inLine +BABEL_BP_105_60848_20120627_050643_outLine +BABEL_BP_105_62160_20120815_073641_inLine +BABEL_BP_105_62160_20120815_073641_outLine +BABEL_BP_105_62177_20120206_010509_inLine +BABEL_BP_105_62177_20120206_010509_outLine +BABEL_BP_105_63459_20120316_010003_inLine +BABEL_BP_105_63491_20120131_020702_inLine +BABEL_BP_105_63491_20120131_020702_outLine +BABEL_BP_105_65601_20120130_233749_inLine +BABEL_BP_105_65601_20120130_233749_outLine +BABEL_BP_105_65732_20120210_054155_inLine +BABEL_BP_105_65732_20120210_054155_outLine +BABEL_BP_105_66188_20120611_222651_inLine +BABEL_BP_105_66188_20120611_222651_outLine +BABEL_BP_105_68671_20120607_065759_inLine +BABEL_BP_105_68671_20120607_065759_outLine +BABEL_BP_105_69145_20120607_070422_inLine +BABEL_BP_105_69145_20120607_070422_outLine +BABEL_BP_105_69275_20120607_085559_inLine +BABEL_BP_105_69275_20120607_085559_outLine +BABEL_BP_105_70077_20120615_070304_inLine +BABEL_BP_105_70077_20120615_070304_outLine +BABEL_BP_105_71654_20120129_031219_inLine +BABEL_BP_105_71654_20120129_031219_outLine +BABEL_BP_105_72011_20120708_195954_inLine +BABEL_BP_105_72011_20120708_195954_outLine +BABEL_BP_105_72011_20120708_201001_inLine +BABEL_BP_105_72011_20120708_201001_outLine +BABEL_BP_105_73562_20120206_084510_inLine +BABEL_BP_105_73562_20120206_084510_outLine +BABEL_BP_105_73757_20120206_093159_inLine +BABEL_BP_105_73757_20120206_093159_outLine +BABEL_BP_105_74295_20120122_020359_inLine +BABEL_BP_105_74295_20120122_020359_outLine +BABEL_BP_105_74607_20120208_041443_inLine +BABEL_BP_105_74607_20120208_041443_outLine +BABEL_BP_105_75020_20120808_014405_inLine +BABEL_BP_105_75020_20120808_014405_outLine +BABEL_BP_105_77771_20120529_022050_inLine +BABEL_BP_105_77771_20120529_022050_outLine +BABEL_BP_105_78245_20120815_044319_inLine +BABEL_BP_105_78245_20120815_044319_outLine +BABEL_BP_105_78728_20120210_014021_inLine +BABEL_BP_105_78728_20120210_014021_outLine +BABEL_BP_105_80174_20120606_185602_inLine +BABEL_BP_105_80174_20120606_185602_outLine +BABEL_BP_105_80535_20120611_065341_inLine +BABEL_BP_105_80535_20120611_065341_outLine +BABEL_BP_105_81944_20120531_010546_inLine +BABEL_BP_105_81944_20120531_010546_outLine +BABEL_BP_105_81996_20120208_060259_inLine +BABEL_BP_105_81996_20120208_060259_outLine +BABEL_BP_105_83012_20120529_010427_inLine +BABEL_BP_105_83012_20120529_010427_outLine +BABEL_BP_105_83053_20120121_030631_inLine +BABEL_BP_105_83053_20120121_030631_outLine +BABEL_BP_105_84700_20120530_041137_inLine +BABEL_BP_105_84700_20120530_041137_outLine +BABEL_BP_105_84865_20120619_034124_inLine +BABEL_BP_105_84865_20120619_034124_outLine +BABEL_BP_105_86305_20120201_230055_inLine +BABEL_BP_105_86305_20120201_230055_outLine +BABEL_BP_105_86998_20120613_030245_inLine +BABEL_BP_105_86998_20120613_030245_outLine +BABEL_BP_105_87885_20120709_012121_inLine +BABEL_BP_105_87885_20120709_012121_outLine +BABEL_BP_105_88245_20120430_200721_inLine +BABEL_BP_105_88245_20120430_200721_outLine +BABEL_BP_105_88982_20120128_051748_inLine +BABEL_BP_105_88982_20120128_051748_outLine +BABEL_BP_105_90180_20120611_232400_inLine +BABEL_BP_105_90180_20120611_232400_outLine +BABEL_BP_105_90313_20120128_001531_inLine +BABEL_BP_105_90313_20120128_001531_outLine +BABEL_BP_105_92308_20120616_231053_inLine +BABEL_BP_105_92308_20120616_231053_outLine +BABEL_BP_105_92328_20120611_062634_inLine +BABEL_BP_105_92328_20120611_062634_outLine +BABEL_BP_105_92820_20120521_005626_inLine +BABEL_BP_105_92820_20120521_005626_outLine +BABEL_BP_105_92852_20120221_033327_inLine +BABEL_BP_105_92852_20120221_033327_outLine +BABEL_BP_105_93151_20120208_021412_inLine +BABEL_BP_105_93151_20120208_021412_outLine +BABEL_BP_105_95861_20120202_000341_inLine +BABEL_BP_105_95861_20120202_000341_outLine +BABEL_BP_105_99929_20120603_000106_inLine +BABEL_BP_105_99929_20120603_000106_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/evalpart1.list b/egs/babel/s5d/conf/lists/105-turkish/evalpart1.list new file mode 100644 index 00000000000..87d6e0f050b --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/evalpart1.list @@ -0,0 +1,65 @@ +BABEL_BP_105_11478_20120128_081119_inLine +BABEL_BP_105_11478_20120128_081119_outLine +BABEL_BP_105_12667_20120502_025008_inLine +BABEL_BP_105_12667_20120502_025008_outLine +BABEL_BP_105_15859_20120313_033308_inLine +BABEL_BP_105_15859_20120313_033308_outLine +BABEL_BP_105_22566_20120621_011722_inLine +BABEL_BP_105_22566_20120621_011722_outLine +BABEL_BP_105_27645_20120501_182457_inLine +BABEL_BP_105_27645_20120501_182457_outLine +BABEL_BP_105_33969_20120430_013648_inLine +BABEL_BP_105_33969_20120430_013648_outLine +BABEL_BP_105_38985_20120123_064936_inLine +BABEL_BP_105_38985_20120123_064936_outLine +BABEL_BP_105_41989_20120828_232255_inLine +BABEL_BP_105_41989_20120828_232255_outLine +BABEL_BP_105_42749_20120504_192522_inLine +BABEL_BP_105_42749_20120504_192522_outLine +BABEL_BP_105_42768_20120517_203439_inLine +BABEL_BP_105_42768_20120517_203439_outLine +BABEL_BP_105_42768_20120517_204350_inLine +BABEL_BP_105_42768_20120517_204350_outLine +BABEL_BP_105_45106_20120106_231201_inLine +BABEL_BP_105_45106_20120106_231201_outLine +BABEL_BP_105_45677_20120527_022244_inLine +BABEL_BP_105_45677_20120527_022244_outLine +BABEL_BP_105_46427_20120208_230929_inLine +BABEL_BP_105_46427_20120208_230929_outLine +BABEL_BP_105_46813_20120521_040045_inLine +BABEL_BP_105_46813_20120521_040046_outLine +BABEL_BP_105_47263_20120603_001729_inLine +BABEL_BP_105_47263_20120603_001729_outLine +BABEL_BP_105_50915_20120606_030647_inLine +BABEL_BP_105_50915_20120606_030647_outLine +BABEL_BP_105_51374_20120808_021113_inLine +BABEL_BP_105_51374_20120808_021113_outLine +BABEL_BP_105_60848_20120627_050640_inLine +BABEL_BP_105_60848_20120627_050643_outLine +BABEL_BP_105_63459_20120316_010003_inLine +BABEL_BP_105_63491_20120131_020702_inLine +BABEL_BP_105_63491_20120131_020702_outLine +BABEL_BP_105_65601_20120130_233749_inLine +BABEL_BP_105_65601_20120130_233749_outLine +BABEL_BP_105_65732_20120210_054155_inLine +BABEL_BP_105_65732_20120210_054155_outLine +BABEL_BP_105_72011_20120708_195954_inLine +BABEL_BP_105_72011_20120708_195954_outLine +BABEL_BP_105_72011_20120708_201001_inLine +BABEL_BP_105_72011_20120708_201001_outLine +BABEL_BP_105_74295_20120122_020359_inLine +BABEL_BP_105_74295_20120122_020359_outLine +BABEL_BP_105_78245_20120815_044319_inLine +BABEL_BP_105_78245_20120815_044319_outLine +BABEL_BP_105_80174_20120606_185602_inLine +BABEL_BP_105_80174_20120606_185602_outLine +BABEL_BP_105_81944_20120531_010546_inLine +BABEL_BP_105_81944_20120531_010546_outLine +BABEL_BP_105_83053_20120121_030631_inLine +BABEL_BP_105_83053_20120121_030631_outLine +BABEL_BP_105_84700_20120530_041137_inLine +BABEL_BP_105_84700_20120530_041137_outLine +BABEL_BP_105_87885_20120709_012121_inLine +BABEL_BP_105_87885_20120709_012121_outLine +BABEL_BP_105_88982_20120128_051748_inLine +BABEL_BP_105_88982_20120128_051748_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/train.FullLP.list b/egs/babel/s5d/conf/lists/105-turkish/train.FullLP.list new file mode 100644 index 00000000000..6d810bffecc --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/train.FullLP.list @@ -0,0 +1,993 @@ +BABEL_BP_105_10160_20120107_220423_inLine +BABEL_BP_105_10160_20120107_220423_outLine +BABEL_BP_105_10211_20120602_185303_inLine +BABEL_BP_105_10211_20120602_185303_outLine +BABEL_BP_105_10467_20120520_004721_inLine +BABEL_BP_105_10467_20120520_004721_outLine +BABEL_BP_105_10973_20120604_181602_inLine +BABEL_BP_105_10973_20120604_181602_outLine +BABEL_BP_105_11022_20120126_221846_inLine +BABEL_BP_105_11022_20120126_221846_outLine +BABEL_BP_105_11152_20120608_002410_inLine +BABEL_BP_105_11152_20120608_002410_outLine +BABEL_BP_105_11371_20120110_001148_inLine +BABEL_BP_105_11371_20120110_001148_outLine +BABEL_BP_105_11422_20120110_233241_inLine +BABEL_BP_105_11422_20120110_233241_outLine +BABEL_BP_105_11627_20120209_232308_inLine +BABEL_BP_105_11868_20120518_025856_inLine +BABEL_BP_105_11868_20120518_025856_outLine +BABEL_BP_105_11982_20120520_192511_outLine +BABEL_BP_105_12003_20120220_085129_inLine +BABEL_BP_105_12003_20120220_085131_outLine +BABEL_BP_105_12120_20120621_024039_inLine +BABEL_BP_105_12120_20120621_024039_outLine +BABEL_BP_105_12439_20120520_215211_inLine +BABEL_BP_105_12439_20120520_215211_outLine +BABEL_BP_105_12643_20120628_010121_inLine +BABEL_BP_105_13065_20120208_032637_inLine +BABEL_BP_105_13065_20120208_032637_outLine +BABEL_BP_105_13118_20120130_042038_outLine +BABEL_BP_105_13389_20120530_002622_inLine +BABEL_BP_105_13389_20120530_002622_outLine +BABEL_BP_105_13530_20120604_015841_inLine +BABEL_BP_105_13530_20120604_015841_outLine +BABEL_BP_105_13660_20120314_062650_inLine +BABEL_BP_105_13660_20120314_062651_outLine +BABEL_BP_105_13702_20120512_204855_inLine +BABEL_BP_105_13702_20120512_204855_outLine +BABEL_BP_105_13913_20120121_005810_inLine +BABEL_BP_105_13913_20120121_005810_outLine +BABEL_BP_105_14054_20120205_012603_inLine +BABEL_BP_105_14054_20120205_012603_outLine +BABEL_BP_105_14707_20120121_003857_inLine +BABEL_BP_105_14707_20120121_003857_outLine +BABEL_BP_105_14891_20120107_224233_inLine +BABEL_BP_105_14891_20120107_224233_outLine +BABEL_BP_105_14936_20120528_215659_inLine +BABEL_BP_105_14936_20120528_215659_outLine +BABEL_BP_105_14997_20120314_212654_inLine +BABEL_BP_105_14997_20120314_212654_outLine +BABEL_BP_105_15022_20120204_043515_inLine +BABEL_BP_105_15022_20120204_043515_outLine +BABEL_BP_105_16066_20120205_105046_inLine +BABEL_BP_105_16066_20120205_105046_outLine +BABEL_BP_105_16257_20120709_025101_inLine +BABEL_BP_105_16257_20120709_025101_outLine +BABEL_BP_105_16346_20120122_031133_inLine +BABEL_BP_105_16346_20120122_031133_outLine +BABEL_BP_105_16617_20120315_024321_inLine +BABEL_BP_105_16617_20120315_024321_outLine +BABEL_BP_105_16646_20120209_075016_inLine +BABEL_BP_105_16646_20120209_075016_outLine +BABEL_BP_105_16855_20120210_062956_inLine +BABEL_BP_105_16855_20120210_062956_outLine +BABEL_BP_105_16875_20120626_033717_inLine +BABEL_BP_105_16875_20120626_033718_outLine +BABEL_BP_105_16883_20120121_060732_inLine +BABEL_BP_105_16883_20120121_060732_outLine +BABEL_BP_105_17013_20120314_031626_inLine +BABEL_BP_105_17013_20120314_031626_outLine +BABEL_BP_105_17018_20120421_182457_outLine +BABEL_BP_105_17511_20120128_212023_inLine +BABEL_BP_105_17511_20120128_212023_outLine +BABEL_BP_105_17606_20120530_230042_inLine +BABEL_BP_105_17606_20120530_230042_outLine +BABEL_BP_105_17933_20120130_062220_inLine +BABEL_BP_105_17948_20120120_073631_inLine +BABEL_BP_105_17948_20120120_073631_outLine +BABEL_BP_105_18209_20120129_215151_inLine +BABEL_BP_105_18209_20120129_215151_outLine +BABEL_BP_105_18234_20120220_051332_inLine +BABEL_BP_105_18672_20120131_015941_inLine +BABEL_BP_105_18672_20120131_015941_outLine +BABEL_BP_105_18701_20120127_035425_inLine +BABEL_BP_105_18701_20120127_035425_outLine +BABEL_BP_105_18716_20120218_070145_inLine +BABEL_BP_105_18802_20120620_222614_inLine +BABEL_BP_105_18802_20120620_222614_outLine +BABEL_BP_105_19248_20120504_193537_inLine +BABEL_BP_105_19248_20120504_193537_outLine +BABEL_BP_105_19404_20120829_192145_inLine +BABEL_BP_105_19404_20120829_192145_outLine +BABEL_BP_105_19479_20120527_195818_inLine +BABEL_BP_105_19479_20120527_195818_outLine +BABEL_BP_105_19479_20120527_200936_inLine +BABEL_BP_105_19479_20120527_200936_outLine +BABEL_BP_105_19731_20120519_190911_inLine +BABEL_BP_105_19731_20120519_190911_outLine +BABEL_BP_105_20320_20120207_211206_inLine +BABEL_BP_105_20320_20120207_211206_outLine +BABEL_BP_105_20347_20120504_231529_inLine +BABEL_BP_105_20347_20120504_232320_inLine +BABEL_BP_105_20462_20120605_192730_inLine +BABEL_BP_105_20462_20120605_192730_outLine +BABEL_BP_105_20471_20120125_013916_inLine +BABEL_BP_105_20471_20120125_013916_outLine +BABEL_BP_105_20471_20120125_015348_inLine +BABEL_BP_105_20471_20120125_015348_outLine +BABEL_BP_105_20483_20120202_013100_inLine +BABEL_BP_105_20483_20120202_013100_outLine +BABEL_BP_105_20518_20120202_070149_inLine +BABEL_BP_105_20518_20120202_070149_outLine +BABEL_BP_105_20590_20120106_021113_inLine +BABEL_BP_105_20590_20120106_021113_outLine +BABEL_BP_105_20591_20120126_045259_inLine +BABEL_BP_105_20591_20120126_045259_outLine +BABEL_BP_105_21258_20120528_002304_inLine +BABEL_BP_105_21258_20120528_002304_outLine +BABEL_BP_105_21367_20120120_050000_inLine +BABEL_BP_105_21367_20120120_050000_outLine +BABEL_BP_105_21370_20120605_185740_inLine +BABEL_BP_105_21370_20120605_185740_outLine +BABEL_BP_105_21430_20120129_024859_outLine +BABEL_BP_105_21518_20120118_195555_inLine +BABEL_BP_105_21518_20120118_195555_outLine +BABEL_BP_105_21714_20120518_223459_inLine +BABEL_BP_105_21714_20120518_223459_outLine +BABEL_BP_105_21782_20120130_003418_outLine +BABEL_BP_105_21946_20120504_035038_inLine +BABEL_BP_105_21946_20120504_035039_outLine +BABEL_BP_105_22179_20120206_023628_inLine +BABEL_BP_105_22179_20120206_023628_outLine +BABEL_BP_105_22272_20120430_191440_inLine +BABEL_BP_105_22272_20120430_191440_outLine +BABEL_BP_105_22408_20120131_202129_inLine +BABEL_BP_105_22408_20120131_202129_outLine +BABEL_BP_105_22408_20120131_210558_inLine +BABEL_BP_105_22408_20120131_210558_outLine +BABEL_BP_105_22509_20120429_020025_inLine +BABEL_BP_105_22509_20120429_020025_outLine +BABEL_BP_105_22898_20120129_040904_inLine +BABEL_BP_105_22898_20120129_040904_outLine +BABEL_BP_105_22903_20120204_205250_inLine +BABEL_BP_105_22903_20120204_205250_outLine +BABEL_BP_105_22910_20120208_013659_inLine +BABEL_BP_105_22910_20120208_013659_outLine +BABEL_BP_105_23167_20120520_193822_inLine +BABEL_BP_105_23167_20120520_193822_outLine +BABEL_BP_105_23502_20120129_223353_inLine +BABEL_BP_105_23502_20120129_223353_outLine +BABEL_BP_105_23571_20120131_040441_inLine +BABEL_BP_105_23571_20120131_040441_outLine +BABEL_BP_105_23629_20120503_212942_inLine +BABEL_BP_105_23629_20120503_212942_outLine +BABEL_BP_105_23930_20120127_051732_outLine +BABEL_BP_105_24094_20120203_230434_inLine +BABEL_BP_105_24094_20120203_230434_outLine +BABEL_BP_105_24420_20120122_053229_inLine +BABEL_BP_105_24420_20120122_053229_outLine +BABEL_BP_105_24589_20120530_180625_inLine +BABEL_BP_105_24589_20120530_180625_outLine +BABEL_BP_105_24608_20120111_023000_inLine +BABEL_BP_105_24608_20120111_023000_outLine +BABEL_BP_105_24638_20120120_040215_inLine +BABEL_BP_105_24638_20120120_040215_outLine +BABEL_BP_105_25035_20120221_014614_inLine +BABEL_BP_105_25035_20120221_014614_outLine +BABEL_BP_105_25106_20120129_003957_inLine +BABEL_BP_105_25106_20120129_003957_outLine +BABEL_BP_105_25236_20120209_002129_inLine +BABEL_BP_105_25236_20120209_002129_outLine +BABEL_BP_105_25278_20120208_203010_inLine +BABEL_BP_105_25278_20120208_203010_outLine +BABEL_BP_105_25315_20120516_232406_inLine +BABEL_BP_105_25315_20120516_232406_outLine +BABEL_BP_105_25735_20120520_030401_inLine +BABEL_BP_105_25735_20120520_030401_outLine +BABEL_BP_105_25934_20120105_020031_inLine +BABEL_BP_105_25992_20120120_012613_inLine +BABEL_BP_105_25992_20120120_012613_outLine +BABEL_BP_105_26164_20120627_210408_inLine +BABEL_BP_105_26164_20120627_210408_outLine +BABEL_BP_105_26350_20120113_221856_inLine +BABEL_BP_105_26350_20120113_221856_outLine +BABEL_BP_105_26598_20120124_055700_inLine +BABEL_BP_105_26598_20120124_055700_outLine +BABEL_BP_105_26644_20120517_212756_inLine +BABEL_BP_105_26644_20120517_212756_outLine +BABEL_BP_105_26684_20120125_030410_inLine +BABEL_BP_105_26684_20120125_030410_outLine +BABEL_BP_105_27349_20120129_233743_inLine +BABEL_BP_105_27349_20120129_233743_outLine +BABEL_BP_105_27605_20120129_073539_inLine +BABEL_BP_105_27605_20120129_073539_outLine +BABEL_BP_105_27724_20120130_023439_inLine +BABEL_BP_105_27724_20120130_023439_outLine +BABEL_BP_105_28107_20120221_061758_outLine +BABEL_BP_105_28204_20120130_031505_inLine +BABEL_BP_105_28204_20120130_031505_outLine +BABEL_BP_105_28889_20120204_200150_outLine +BABEL_BP_105_29133_20120220_042138_inLine +BABEL_BP_105_29168_20120131_214316_inLine +BABEL_BP_105_29168_20120131_214316_outLine +BABEL_BP_105_29259_20120607_190658_inLine +BABEL_BP_105_29259_20120607_190658_outLine +BABEL_BP_105_29276_20120209_054912_inLine +BABEL_BP_105_29276_20120209_054912_outLine +BABEL_BP_105_29290_20120130_044642_inLine +BABEL_BP_105_29302_20120128_044018_outLine +BABEL_BP_105_29335_20120125_090733_inLine +BABEL_BP_105_29335_20120125_090733_outLine +BABEL_BP_105_29407_20120531_013323_inLine +BABEL_BP_105_29407_20120531_013323_outLine +BABEL_BP_105_29421_20120127_235240_inLine +BABEL_BP_105_29421_20120127_235240_outLine +BABEL_BP_105_29444_20120204_050434_inLine +BABEL_BP_105_29444_20120204_050434_outLine +BABEL_BP_105_29771_20120430_234735_inLine +BABEL_BP_105_29771_20120430_234735_outLine +BABEL_BP_105_29988_20120120_075802_inLine +BABEL_BP_105_29988_20120120_075802_outLine +BABEL_BP_105_30168_20120209_192615_inLine +BABEL_BP_105_30168_20120209_192615_outLine +BABEL_BP_105_30554_20120126_022601_inLine +BABEL_BP_105_30554_20120126_022601_outLine +BABEL_BP_105_31281_20120130_004325_inLine +BABEL_BP_105_31281_20120130_004325_outLine +BABEL_BP_105_31460_20120603_224411_inLine +BABEL_BP_105_31460_20120603_224411_outLine +BABEL_BP_105_31917_20120202_083328_inLine +BABEL_BP_105_31917_20120202_083328_outLine +BABEL_BP_105_32120_20120627_232416_inLine +BABEL_BP_105_32120_20120627_232416_outLine +BABEL_BP_105_32263_20120125_003247_inLine +BABEL_BP_105_32263_20120125_003247_outLine +BABEL_BP_105_32295_20120201_060053_inLine +BABEL_BP_105_32334_20120126_064227_inLine +BABEL_BP_105_32334_20120126_064227_outLine +BABEL_BP_105_32642_20120518_185259_outLine +BABEL_BP_105_32663_20120709_040652_inLine +BABEL_BP_105_32663_20120709_040652_outLine +BABEL_BP_105_32710_20120320_040408_inLine +BABEL_BP_105_32710_20120320_040408_outLine +BABEL_BP_105_32818_20120530_032934_inLine +BABEL_BP_105_32818_20120530_032935_outLine +BABEL_BP_105_33671_20120314_060721_inLine +BABEL_BP_105_33671_20120314_060721_outLine +BABEL_BP_105_34169_20120209_195657_outLine +BABEL_BP_105_34194_20120206_104021_inLine +BABEL_BP_105_34194_20120206_104021_outLine +BABEL_BP_105_34235_20120206_051248_inLine +BABEL_BP_105_34248_20120628_013714_inLine +BABEL_BP_105_34248_20120628_013714_outLine +BABEL_BP_105_34480_20120605_033447_inLine +BABEL_BP_105_34480_20120605_033447_outLine +BABEL_BP_105_34498_20120127_071326_inLine +BABEL_BP_105_34498_20120127_071326_outLine +BABEL_BP_105_34590_20120829_000220_inLine +BABEL_BP_105_34590_20120829_000220_outLine +BABEL_BP_105_35006_20120118_204903_inLine +BABEL_BP_105_35006_20120118_204903_outLine +BABEL_BP_105_35011_20120314_000129_inLine +BABEL_BP_105_35229_20120621_203612_inLine +BABEL_BP_105_35229_20120621_203612_outLine +BABEL_BP_105_35324_20120117_204415_inLine +BABEL_BP_105_35324_20120117_204415_outLine +BABEL_BP_105_35329_20120203_051310_inLine +BABEL_BP_105_35329_20120203_051310_outLine +BABEL_BP_105_35357_20120530_040330_inLine +BABEL_BP_105_35357_20120530_040330_outLine +BABEL_BP_105_35576_20120530_184018_inLine +BABEL_BP_105_35576_20120530_184018_outLine +BABEL_BP_105_36276_20120519_000042_inLine +BABEL_BP_105_36276_20120519_000042_outLine +BABEL_BP_105_36360_20120121_024157_inLine +BABEL_BP_105_36360_20120121_024157_outLine +BABEL_BP_105_36383_20120126_014553_inLine +BABEL_BP_105_36561_20120125_091214_inLine +BABEL_BP_105_36561_20120125_091214_outLine +BABEL_BP_105_36711_20120817_211133_inLine +BABEL_BP_105_36711_20120817_211133_outLine +BABEL_BP_105_36722_20120420_012709_inLine +BABEL_BP_105_36722_20120420_012709_outLine +BABEL_BP_105_36975_20120119_201922_inLine +BABEL_BP_105_36975_20120119_201922_outLine +BABEL_BP_105_37094_20120111_013332_inLine +BABEL_BP_105_37094_20120111_013332_outLine +BABEL_BP_105_37110_20120113_201333_inLine +BABEL_BP_105_37110_20120113_201333_outLine +BABEL_BP_105_37111_20120504_215437_inLine +BABEL_BP_105_37111_20120504_215437_outLine +BABEL_BP_105_37260_20120314_015840_inLine +BABEL_BP_105_37260_20120314_015840_outLine +BABEL_BP_105_37444_20120518_221718_inLine +BABEL_BP_105_37444_20120518_221718_outLine +BABEL_BP_105_37461_20120530_010739_inLine +BABEL_BP_105_37461_20120530_010739_outLine +BABEL_BP_105_38108_20120129_001503_inLine +BABEL_BP_105_38108_20120129_001503_outLine +BABEL_BP_105_38640_20120208_010027_inLine +BABEL_BP_105_38640_20120208_010027_outLine +BABEL_BP_105_39066_20120206_073804_inLine +BABEL_BP_105_39066_20120206_073804_outLine +BABEL_BP_105_39114_20120516_035141_inLine +BABEL_BP_105_39114_20120516_035141_outLine +BABEL_BP_105_39384_20120525_200159_outLine +BABEL_BP_105_39384_20120525_200904_outLine +BABEL_BP_105_39915_20120527_221155_inLine +BABEL_BP_105_39915_20120527_221155_outLine +BABEL_BP_105_39997_20120202_204531_inLine +BABEL_BP_105_39997_20120202_204531_outLine +BABEL_BP_105_40002_20120202_061416_inLine +BABEL_BP_105_40002_20120202_061416_outLine +BABEL_BP_105_40040_20120125_211630_inLine +BABEL_BP_105_40040_20120125_211630_outLine +BABEL_BP_105_40046_20120110_013037_inLine +BABEL_BP_105_40046_20120110_013037_outLine +BABEL_BP_105_40084_20120127_075326_inLine +BABEL_BP_105_40084_20120127_075326_outLine +BABEL_BP_105_40123_20120527_021542_inLine +BABEL_BP_105_40123_20120527_021542_outLine +BABEL_BP_105_40346_20120109_223712_inLine +BABEL_BP_105_40346_20120109_223712_outLine +BABEL_BP_105_40510_20120128_063431_inLine +BABEL_BP_105_40510_20120128_063431_outLine +BABEL_BP_105_40980_20120208_102244_outLine +BABEL_BP_105_41170_20120110_004951_inLine +BABEL_BP_105_41170_20120110_004951_outLine +BABEL_BP_105_41456_20120316_021539_inLine +BABEL_BP_105_41456_20120316_021539_outLine +BABEL_BP_105_41540_20120121_064850_inLine +BABEL_BP_105_41540_20120121_064850_outLine +BABEL_BP_105_41561_20120708_205430_inLine +BABEL_BP_105_41561_20120708_205430_outLine +BABEL_BP_105_41661_20120206_073351_inLine +BABEL_BP_105_41661_20120206_073351_outLine +BABEL_BP_105_41797_20120208_054959_inLine +BABEL_BP_105_41797_20120208_054959_outLine +BABEL_BP_105_42145_20120210_004555_inLine +BABEL_BP_105_42145_20120210_004555_outLine +BABEL_BP_105_42309_20120530_225817_inLine +BABEL_BP_105_42309_20120530_225817_outLine +BABEL_BP_105_42471_20120210_064751_outLine +BABEL_BP_105_42651_20120208_003002_inLine +BABEL_BP_105_42651_20120208_003002_outLine +BABEL_BP_105_42788_20120520_202049_outLine +BABEL_BP_105_42853_20120105_232804_inLine +BABEL_BP_105_42853_20120105_232804_outLine +BABEL_BP_105_43017_20120814_005806_inLine +BABEL_BP_105_43017_20120814_005806_outLine +BABEL_BP_105_43277_20120122_050352_inLine +BABEL_BP_105_43277_20120122_050352_outLine +BABEL_BP_105_43317_20120516_181202_inLine +BABEL_BP_105_43317_20120516_181202_outLine +BABEL_BP_105_43383_20120814_060445_inLine +BABEL_BP_105_43383_20120814_060445_outLine +BABEL_BP_105_43425_20120520_223154_inLine +BABEL_BP_105_43425_20120520_223154_outLine +BABEL_BP_105_43425_20120520_224822_inLine +BABEL_BP_105_43425_20120520_224822_outLine +BABEL_BP_105_43426_20120127_054206_inLine +BABEL_BP_105_43426_20120127_054206_outLine +BABEL_BP_105_43991_20120201_043008_inLine +BABEL_BP_105_43991_20120201_043008_outLine +BABEL_BP_105_44129_20120203_031411_inLine +BABEL_BP_105_44209_20120130_072808_inLine +BABEL_BP_105_44209_20120130_072808_outLine +BABEL_BP_105_44500_20120531_224758_inLine +BABEL_BP_105_44500_20120531_224758_outLine +BABEL_BP_105_44568_20120315_215919_inLine +BABEL_BP_105_44568_20120315_215919_outLine +BABEL_BP_105_44756_20120125_222756_inLine +BABEL_BP_105_44829_20120816_071805_inLine +BABEL_BP_105_44829_20120816_071805_outLine +BABEL_BP_105_44836_20120208_085036_inLine +BABEL_BP_105_44836_20120208_085036_outLine +BABEL_BP_105_45214_20120209_223827_inLine +BABEL_BP_105_45214_20120209_223827_outLine +BABEL_BP_105_45227_20120208_205329_inLine +BABEL_BP_105_45227_20120208_205329_outLine +BABEL_BP_105_45511_20120601_001634_inLine +BABEL_BP_105_45511_20120601_001634_outLine +BABEL_BP_105_45512_20120208_063419_inLine +BABEL_BP_105_45512_20120208_063419_outLine +BABEL_BP_105_45655_20120206_065331_inLine +BABEL_BP_105_45655_20120206_065331_outLine +BABEL_BP_105_45681_20120209_002338_inLine +BABEL_BP_105_45681_20120209_002338_outLine +BABEL_BP_105_45929_20120208_051244_inLine +BABEL_BP_105_45929_20120208_051244_outLine +BABEL_BP_105_45931_20120816_011738_inLine +BABEL_BP_105_45931_20120816_011738_outLine +BABEL_BP_105_46603_20120430_193144_inLine +BABEL_BP_105_46603_20120430_193144_outLine +BABEL_BP_105_46977_20120210_043052_inLine +BABEL_BP_105_46977_20120210_043052_outLine +BABEL_BP_105_47037_20120118_025150_inLine +BABEL_BP_105_47037_20120118_025150_outLine +BABEL_BP_105_47128_20120206_014647_inLine +BABEL_BP_105_47128_20120206_014647_outLine +BABEL_BP_105_47429_20120512_193242_inLine +BABEL_BP_105_47429_20120512_193242_outLine +BABEL_BP_105_47433_20120124_032650_inLine +BABEL_BP_105_47433_20120124_032650_outLine +BABEL_BP_105_47566_20120210_004031_outLine +BABEL_BP_105_47625_20120210_031653_outLine +BABEL_BP_105_47646_20120130_220546_inLine +BABEL_BP_105_47646_20120130_220546_outLine +BABEL_BP_105_47733_20120124_050736_inLine +BABEL_BP_105_47733_20120124_050736_outLine +BABEL_BP_105_47794_20120517_013537_inLine +BABEL_BP_105_47794_20120517_013537_outLine +BABEL_BP_105_47794_20120517_014505_inLine +BABEL_BP_105_47794_20120517_014505_outLine +BABEL_BP_105_47821_20120430_182844_outLine +BABEL_BP_105_47823_20120209_005455_inLine +BABEL_BP_105_47823_20120209_005455_outLine +BABEL_BP_105_47845_20120604_014840_inLine +BABEL_BP_105_48061_20120201_084109_inLine +BABEL_BP_105_48061_20120201_084109_outLine +BABEL_BP_105_48247_20120814_194116_inLine +BABEL_BP_105_48247_20120814_194116_outLine +BABEL_BP_105_48281_20120527_205037_inLine +BABEL_BP_105_48281_20120527_205037_outLine +BABEL_BP_105_48281_20120527_210249_inLine +BABEL_BP_105_48281_20120527_210249_outLine +BABEL_BP_105_48317_20120201_220534_inLine +BABEL_BP_105_48410_20120816_072736_inLine +BABEL_BP_105_48410_20120816_072736_outLine +BABEL_BP_105_48418_20120517_235210_inLine +BABEL_BP_105_48418_20120517_235210_outLine +BABEL_BP_105_48491_20120814_025137_inLine +BABEL_BP_105_48491_20120814_025137_outLine +BABEL_BP_105_48559_20120120_085039_inLine +BABEL_BP_105_48559_20120120_085039_outLine +BABEL_BP_105_48976_20120209_021529_inLine +BABEL_BP_105_48976_20120209_021529_outLine +BABEL_BP_105_49186_20120627_224343_inLine +BABEL_BP_105_49186_20120627_224343_outLine +BABEL_BP_105_49239_20120121_234750_outLine +BABEL_BP_105_49541_20120205_233637_inLine +BABEL_BP_105_49541_20120205_233637_outLine +BABEL_BP_105_49624_20120129_090754_inLine +BABEL_BP_105_49624_20120129_090754_outLine +BABEL_BP_105_49689_20120429_224801_inLine +BABEL_BP_105_49689_20120429_224801_outLine +BABEL_BP_105_50028_20120628_020702_inLine +BABEL_BP_105_50028_20120628_020702_outLine +BABEL_BP_105_50141_20120516_230234_inLine +BABEL_BP_105_50141_20120516_230234_outLine +BABEL_BP_105_50201_20120314_220751_inLine +BABEL_BP_105_50201_20120314_220751_outLine +BABEL_BP_105_50416_20120120_030634_inLine +BABEL_BP_105_50416_20120120_030634_outLine +BABEL_BP_105_50416_20120120_032209_inLine +BABEL_BP_105_50416_20120120_032209_outLine +BABEL_BP_105_50641_20120519_213400_inLine +BABEL_BP_105_50641_20120519_213400_outLine +BABEL_BP_105_50752_20120530_202359_inLine +BABEL_BP_105_50752_20120530_202359_outLine +BABEL_BP_105_50798_20120814_222755_inLine +BABEL_BP_105_50798_20120814_222755_outLine +BABEL_BP_105_50932_20120131_024519_outLine +BABEL_BP_105_51052_20120125_203253_inLine +BABEL_BP_105_51052_20120125_203253_outLine +BABEL_BP_105_51149_20120517_022710_inLine +BABEL_BP_105_51149_20120517_022710_outLine +BABEL_BP_105_51448_20120512_221822_inLine +BABEL_BP_105_51448_20120512_221822_outLine +BABEL_BP_105_51521_20120528_232651_inLine +BABEL_BP_105_51521_20120528_232651_outLine +BABEL_BP_105_51569_20120113_191836_inLine +BABEL_BP_105_51569_20120113_191836_outLine +BABEL_BP_105_52219_20120122_061548_inLine +BABEL_BP_105_52219_20120122_061548_outLine +BABEL_BP_105_52335_20120602_042319_inLine +BABEL_BP_105_52335_20120602_042320_outLine +BABEL_BP_105_52602_20120130_010143_inLine +BABEL_BP_105_52602_20120130_010143_outLine +BABEL_BP_105_52642_20120120_062951_inLine +BABEL_BP_105_52642_20120120_062951_outLine +BABEL_BP_105_52900_20120207_074729_inLine +BABEL_BP_105_53179_20120126_014504_inLine +BABEL_BP_105_53179_20120126_014504_outLine +BABEL_BP_105_53181_20120209_221434_inLine +BABEL_BP_105_53181_20120209_221434_outLine +BABEL_BP_105_53352_20120313_025305_inLine +BABEL_BP_105_53352_20120313_025305_outLine +BABEL_BP_105_53653_20120601_203737_inLine +BABEL_BP_105_53653_20120601_203737_outLine +BABEL_BP_105_53653_20120601_205017_inLine +BABEL_BP_105_53653_20120601_205017_outLine +BABEL_BP_105_53994_20120501_063357_inLine +BABEL_BP_105_53994_20120501_063357_outLine +BABEL_BP_105_54285_20120501_170645_inLine +BABEL_BP_105_54285_20120501_170645_outLine +BABEL_BP_105_54621_20120604_220824_inLine +BABEL_BP_105_54621_20120604_220824_outLine +BABEL_BP_105_55355_20120602_030100_inLine +BABEL_BP_105_55355_20120602_030100_outLine +BABEL_BP_105_55399_20120207_220014_inLine +BABEL_BP_105_55399_20120207_220014_outLine +BABEL_BP_105_55470_20120515_231335_inLine +BABEL_BP_105_55470_20120515_231335_outLine +BABEL_BP_105_55820_20120120_200536_inLine +BABEL_BP_105_55820_20120120_200536_outLine +BABEL_BP_105_55838_20120519_183551_outLine +BABEL_BP_105_55838_20120519_184228_outLine +BABEL_BP_105_56039_20120207_012118_inLine +BABEL_BP_105_56039_20120207_012118_outLine +BABEL_BP_105_57148_20120208_234937_inLine +BABEL_BP_105_57148_20120208_234937_outLine +BABEL_BP_105_57454_20120123_082347_inLine +BABEL_BP_105_57454_20120123_082347_outLine +BABEL_BP_105_57457_20120203_040430_inLine +BABEL_BP_105_57457_20120203_040430_outLine +BABEL_BP_105_57619_20120530_212910_inLine +BABEL_BP_105_57619_20120530_212910_outLine +BABEL_BP_105_57629_20120109_193726_inLine +BABEL_BP_105_57629_20120109_193726_outLine +BABEL_BP_105_57637_20120207_092849_outLine +BABEL_BP_105_58108_20120516_200608_inLine +BABEL_BP_105_58108_20120516_200608_outLine +BABEL_BP_105_58108_20120516_215546_inLine +BABEL_BP_105_58108_20120516_215546_outLine +BABEL_BP_105_58192_20120530_040251_inLine +BABEL_BP_105_58192_20120530_040252_outLine +BABEL_BP_105_58232_20120221_044134_inLine +BABEL_BP_105_58232_20120221_044134_outLine +BABEL_BP_105_58357_20120602_203200_inLine +BABEL_BP_105_58357_20120602_203200_outLine +BABEL_BP_105_58413_20120220_081844_inLine +BABEL_BP_105_58413_20120220_081902_outLine +BABEL_BP_105_58536_20120207_101252_inLine +BABEL_BP_105_58536_20120207_101252_outLine +BABEL_BP_105_59169_20120126_071441_inLine +BABEL_BP_105_59169_20120126_071441_outLine +BABEL_BP_105_59175_20120814_014729_inLine +BABEL_BP_105_59175_20120814_014729_outLine +BABEL_BP_105_59454_20120210_050748_inLine +BABEL_BP_105_59454_20120210_050748_outLine +BABEL_BP_105_59924_20120520_193636_outLine +BABEL_BP_105_59925_20120531_025444_inLine +BABEL_BP_105_59925_20120531_025444_outLine +BABEL_BP_105_60106_20120206_012558_inLine +BABEL_BP_105_60106_20120206_012558_outLine +BABEL_BP_105_60193_20120208_022615_inLine +BABEL_BP_105_60193_20120208_022615_outLine +BABEL_BP_105_60605_20120121_024426_inLine +BABEL_BP_105_60605_20120121_024426_outLine +BABEL_BP_105_60826_20120127_052753_inLine +BABEL_BP_105_60826_20120127_052753_outLine +BABEL_BP_105_60842_20120207_082938_inLine +BABEL_BP_105_60842_20120207_082938_outLine +BABEL_BP_105_60995_20120708_212511_inLine +BABEL_BP_105_60995_20120708_212511_outLine +BABEL_BP_105_61119_20120120_011733_inLine +BABEL_BP_105_61119_20120120_011733_outLine +BABEL_BP_105_61750_20120430_182721_inLine +BABEL_BP_105_61750_20120430_182721_outLine +BABEL_BP_105_61762_20120208_192030_inLine +BABEL_BP_105_61762_20120208_192030_outLine +BABEL_BP_105_61906_20120125_055530_inLine +BABEL_BP_105_61906_20120125_055530_outLine +BABEL_BP_105_61936_20120626_050803_inLine +BABEL_BP_105_61936_20120626_050804_outLine +BABEL_BP_105_61988_20120207_042437_inLine +BABEL_BP_105_61988_20120207_042437_outLine +BABEL_BP_105_62277_20120504_191914_inLine +BABEL_BP_105_62277_20120504_191914_outLine +BABEL_BP_105_62286_20120206_001738_inLine +BABEL_BP_105_62286_20120206_001739_outLine +BABEL_BP_105_62589_20120208_070910_inLine +BABEL_BP_105_62589_20120208_070910_outLine +BABEL_BP_105_63116_20120210_011436_inLine +BABEL_BP_105_63116_20120210_011436_outLine +BABEL_BP_105_63233_20120209_025744_inLine +BABEL_BP_105_63339_20120113_014223_inLine +BABEL_BP_105_63339_20120113_014223_outLine +BABEL_BP_105_63352_20120529_062238_inLine +BABEL_BP_105_63352_20120529_062238_outLine +BABEL_BP_105_63369_20120504_000600_inLine +BABEL_BP_105_63369_20120504_000600_outLine +BABEL_BP_105_64404_20120109_210230_inLine +BABEL_BP_105_64404_20120109_210230_outLine +BABEL_BP_105_64661_20120206_064757_inLine +BABEL_BP_105_64724_20120502_185902_inLine +BABEL_BP_105_64724_20120502_185902_outLine +BABEL_BP_105_64889_20120430_041923_inLine +BABEL_BP_105_64889_20120430_041923_outLine +BABEL_BP_105_65069_20120205_053459_inLine +BABEL_BP_105_65069_20120205_053459_outLine +BABEL_BP_105_65248_20120106_003446_inLine +BABEL_BP_105_65248_20120106_003446_outLine +BABEL_BP_105_65580_20120107_231525_inLine +BABEL_BP_105_65580_20120107_231525_outLine +BABEL_BP_105_65629_20120206_013549_inLine +BABEL_BP_105_65629_20120206_013549_outLine +BABEL_BP_105_65783_20120206_225414_inLine +BABEL_BP_105_65783_20120206_225414_outLine +BABEL_BP_105_65923_20120207_201411_inLine +BABEL_BP_105_65923_20120207_201411_outLine +BABEL_BP_105_66419_20120521_015830_inLine +BABEL_BP_105_66419_20120521_015830_outLine +BABEL_BP_105_66451_20120208_202426_inLine +BABEL_BP_105_66451_20120208_202426_outLine +BABEL_BP_105_66798_20120208_003832_inLine +BABEL_BP_105_66798_20120208_003832_outLine +BABEL_BP_105_66839_20120520_203654_inLine +BABEL_BP_105_66839_20120520_203655_outLine +BABEL_BP_105_67150_20120619_230543_inLine +BABEL_BP_105_67150_20120619_230543_outLine +BABEL_BP_105_67227_20120518_213954_inLine +BABEL_BP_105_67227_20120518_213954_outLine +BABEL_BP_105_67304_20120206_233053_inLine +BABEL_BP_105_67304_20120206_233053_outLine +BABEL_BP_105_67628_20120122_014514_inLine +BABEL_BP_105_67628_20120122_014514_outLine +BABEL_BP_105_67798_20120207_222749_inLine +BABEL_BP_105_67798_20120207_222749_outLine +BABEL_BP_105_67916_20120624_041235_inLine +BABEL_BP_105_67916_20120624_041236_outLine +BABEL_BP_105_67916_20120624_042035_inLine +BABEL_BP_105_67916_20120624_042036_outLine +BABEL_BP_105_68129_20120129_010002_inLine +BABEL_BP_105_68276_20120530_043559_inLine +BABEL_BP_105_68276_20120530_043600_outLine +BABEL_BP_105_68295_20120125_213909_inLine +BABEL_BP_105_68295_20120125_213909_outLine +BABEL_BP_105_68362_20120516_233958_inLine +BABEL_BP_105_68362_20120516_233958_outLine +BABEL_BP_105_68545_20120130_195611_inLine +BABEL_BP_105_68545_20120130_195611_outLine +BABEL_BP_105_68767_20120208_195338_inLine +BABEL_BP_105_68767_20120208_195338_outLine +BABEL_BP_105_68861_20120206_042909_inLine +BABEL_BP_105_68861_20120206_042909_outLine +BABEL_BP_105_69137_20120205_012455_inLine +BABEL_BP_105_69137_20120205_012455_outLine +BABEL_BP_105_69368_20120209_025044_inLine +BABEL_BP_105_69368_20120209_025044_outLine +BABEL_BP_105_69548_20120206_002506_inLine +BABEL_BP_105_69548_20120206_002506_outLine +BABEL_BP_105_69621_20120208_050816_inLine +BABEL_BP_105_69621_20120208_050816_outLine +BABEL_BP_105_69764_20120209_041231_inLine +BABEL_BP_105_69764_20120209_041231_outLine +BABEL_BP_105_70243_20120130_002646_inLine +BABEL_BP_105_70243_20120130_002646_outLine +BABEL_BP_105_70285_20120520_195703_inLine +BABEL_BP_105_70285_20120520_195703_outLine +BABEL_BP_105_70511_20120129_071513_inLine +BABEL_BP_105_70511_20120129_071513_outLine +BABEL_BP_105_70548_20120209_030934_inLine +BABEL_BP_105_70548_20120209_030934_outLine +BABEL_BP_105_70615_20120112_204508_inLine +BABEL_BP_105_70615_20120112_204508_outLine +BABEL_BP_105_70680_20120109_201712_inLine +BABEL_BP_105_70906_20120521_022727_inLine +BABEL_BP_105_70906_20120521_022727_outLine +BABEL_BP_105_70975_20120527_224548_inLine +BABEL_BP_105_70975_20120527_224548_outLine +BABEL_BP_105_71178_20120124_044039_inLine +BABEL_BP_105_71178_20120124_044039_outLine +BABEL_BP_105_71739_20120422_024509_inLine +BABEL_BP_105_71739_20120422_024509_outLine +BABEL_BP_105_71741_20120314_230737_inLine +BABEL_BP_105_71741_20120314_230737_outLine +BABEL_BP_105_72119_20120202_041158_inLine +BABEL_BP_105_72119_20120202_041158_outLine +BABEL_BP_105_72141_20120125_085836_inLine +BABEL_BP_105_72141_20120125_085836_outLine +BABEL_BP_105_72297_20120602_030633_inLine +BABEL_BP_105_72297_20120602_030633_outLine +BABEL_BP_105_72330_20120520_201127_outLine +BABEL_BP_105_72330_20120520_201604_outLine +BABEL_BP_105_72718_20120525_180835_inLine +BABEL_BP_105_72718_20120525_180835_outLine +BABEL_BP_105_72746_20120205_020507_inLine +BABEL_BP_105_72746_20120205_020507_outLine +BABEL_BP_105_72879_20120125_032216_inLine +BABEL_BP_105_72879_20120125_032216_outLine +BABEL_BP_105_73051_20120817_204309_inLine +BABEL_BP_105_73051_20120817_204309_outLine +BABEL_BP_105_73059_20120520_222710_inLine +BABEL_BP_105_73059_20120520_222710_outLine +BABEL_BP_105_73072_20120607_013513_inLine +BABEL_BP_105_73072_20120607_013513_outLine +BABEL_BP_105_73452_20120527_020050_inLine +BABEL_BP_105_73452_20120527_020050_outLine +BABEL_BP_105_73542_20120118_000641_inLine +BABEL_BP_105_73542_20120118_000641_outLine +BABEL_BP_105_73752_20120520_212014_inLine +BABEL_BP_105_73752_20120520_212014_outLine +BABEL_BP_105_73761_20120119_040339_inLine +BABEL_BP_105_73761_20120119_040339_outLine +BABEL_BP_105_73780_20120430_230832_inLine +BABEL_BP_105_73780_20120430_230832_outLine +BABEL_BP_105_73944_20120207_022618_inLine +BABEL_BP_105_73944_20120207_022618_outLine +BABEL_BP_105_74012_20120207_031751_inLine +BABEL_BP_105_74012_20120207_031751_outLine +BABEL_BP_105_74571_20120709_032825_inLine +BABEL_BP_105_74588_20120208_231518_inLine +BABEL_BP_105_74588_20120208_231518_outLine +BABEL_BP_105_74709_20120123_195039_inLine +BABEL_BP_105_74709_20120123_195039_outLine +BABEL_BP_105_75248_20120621_004722_inLine +BABEL_BP_105_75248_20120621_004722_outLine +BABEL_BP_105_75333_20120517_033420_inLine +BABEL_BP_105_75333_20120517_033420_outLine +BABEL_BP_105_75354_20120520_012303_inLine +BABEL_BP_105_75354_20120520_012303_outLine +BABEL_BP_105_75498_20120123_090316_inLine +BABEL_BP_105_75498_20120123_090316_outLine +BABEL_BP_105_75680_20120627_220907_inLine +BABEL_BP_105_75680_20120627_220907_outLine +BABEL_BP_105_75799_20120121_081211_inLine +BABEL_BP_105_75799_20120121_081211_outLine +BABEL_BP_105_75845_20120126_093251_inLine +BABEL_BP_105_75845_20120126_093251_outLine +BABEL_BP_105_75990_20120210_003258_inLine +BABEL_BP_105_76252_20120708_232625_inLine +BABEL_BP_105_76252_20120708_232625_outLine +BABEL_BP_105_76320_20120520_214841_outLine +BABEL_BP_105_76451_20120520_012516_inLine +BABEL_BP_105_76451_20120520_012516_outLine +BABEL_BP_105_76691_20120501_060535_inLine +BABEL_BP_105_76691_20120501_060535_outLine +BABEL_BP_105_76925_20120207_051003_inLine +BABEL_BP_105_76925_20120207_051003_outLine +BABEL_BP_105_77137_20120120_003356_inLine +BABEL_BP_105_77137_20120120_003356_outLine +BABEL_BP_105_77244_20120530_230026_inLine +BABEL_BP_105_77244_20120530_230026_outLine +BABEL_BP_105_77342_20120126_053532_inLine +BABEL_BP_105_77348_20120109_231904_inLine +BABEL_BP_105_77348_20120109_231904_outLine +BABEL_BP_105_77483_20120126_061820_outLine +BABEL_BP_105_77487_20120131_083433_inLine +BABEL_BP_105_77487_20120131_083433_outLine +BABEL_BP_105_77584_20120119_043252_inLine +BABEL_BP_105_77584_20120119_043252_outLine +BABEL_BP_105_77802_20120120_034318_inLine +BABEL_BP_105_77802_20120120_034318_outLine +BABEL_BP_105_77811_20120619_203214_inLine +BABEL_BP_105_77811_20120619_203214_outLine +BABEL_BP_105_77965_20120110_195959_inLine +BABEL_BP_105_77965_20120110_195959_outLine +BABEL_BP_105_79284_20120520_000955_inLine +BABEL_BP_105_79284_20120520_000955_outLine +BABEL_BP_105_79293_20120313_050558_inLine +BABEL_BP_105_79412_20120814_020731_inLine +BABEL_BP_105_79412_20120814_020731_outLine +BABEL_BP_105_79456_20120315_065631_outLine +BABEL_BP_105_79495_20120107_211221_inLine +BABEL_BP_105_79495_20120107_211221_outLine +BABEL_BP_105_79619_20120204_034427_outLine +BABEL_BP_105_79899_20120519_004730_inLine +BABEL_BP_105_79899_20120519_004730_outLine +BABEL_BP_105_80008_20120206_073118_inLine +BABEL_BP_105_80008_20120206_073118_outLine +BABEL_BP_105_80028_20120620_230841_inLine +BABEL_BP_105_80028_20120620_230841_outLine +BABEL_BP_105_80247_20120501_021202_inLine +BABEL_BP_105_80247_20120501_021202_outLine +BABEL_BP_105_80290_20120502_211538_inLine +BABEL_BP_105_80290_20120502_211538_outLine +BABEL_BP_105_80838_20120130_015756_inLine +BABEL_BP_105_80838_20120130_015756_outLine +BABEL_BP_105_80874_20120209_070233_inLine +BABEL_BP_105_80874_20120209_070233_outLine +BABEL_BP_105_80953_20120126_025448_inLine +BABEL_BP_105_80953_20120126_025448_outLine +BABEL_BP_105_81015_20120122_211324_inLine +BABEL_BP_105_81015_20120122_211324_outLine +BABEL_BP_105_81056_20120220_045306_inLine +BABEL_BP_105_81056_20120220_045306_outLine +BABEL_BP_105_81084_20120125_044727_outLine +BABEL_BP_105_81096_20120205_000909_inLine +BABEL_BP_105_81096_20120205_000909_outLine +BABEL_BP_105_81244_20120120_234254_inLine +BABEL_BP_105_81244_20120120_234254_outLine +BABEL_BP_105_81261_20120206_044337_inLine +BABEL_BP_105_81261_20120206_044337_outLine +BABEL_BP_105_81321_20120127_073458_inLine +BABEL_BP_105_81321_20120127_073458_outLine +BABEL_BP_105_81583_20120206_035506_inLine +BABEL_BP_105_81583_20120206_035506_outLine +BABEL_BP_105_81587_20120530_022705_inLine +BABEL_BP_105_81587_20120530_022705_outLine +BABEL_BP_105_81717_20120121_075007_inLine +BABEL_BP_105_81717_20120121_075007_outLine +BABEL_BP_105_81799_20120122_044223_inLine +BABEL_BP_105_81799_20120122_044223_outLine +BABEL_BP_105_82006_20120119_235812_inLine +BABEL_BP_105_82006_20120119_235812_outLine +BABEL_BP_105_82103_20120207_074556_outLine +BABEL_BP_105_82443_20120623_012845_inLine +BABEL_BP_105_82443_20120623_012845_outLine +BABEL_BP_105_82591_20120201_222003_outLine +BABEL_BP_105_82766_20120130_011639_inLine +BABEL_BP_105_82766_20120130_011639_outLine +BABEL_BP_105_82880_20120708_225241_inLine +BABEL_BP_105_82880_20120708_225241_outLine +BABEL_BP_105_83256_20120424_212011_inLine +BABEL_BP_105_83256_20120424_212011_outLine +BABEL_BP_105_83529_20120520_020225_inLine +BABEL_BP_105_83529_20120520_020225_outLine +BABEL_BP_105_83531_20120202_033247_inLine +BABEL_BP_105_83531_20120202_033247_outLine +BABEL_BP_105_83700_20120205_032346_inLine +BABEL_BP_105_83700_20120205_032346_outLine +BABEL_BP_105_83702_20120122_070851_inLine +BABEL_BP_105_83702_20120122_070851_outLine +BABEL_BP_105_83713_20120123_051739_inLine +BABEL_BP_105_83713_20120123_051739_outLine +BABEL_BP_105_84171_20120520_204934_outLine +BABEL_BP_105_84916_20120209_213013_outLine +BABEL_BP_105_84943_20120208_061546_inLine +BABEL_BP_105_84943_20120208_061546_outLine +BABEL_BP_105_85031_20120205_234855_inLine +BABEL_BP_105_85031_20120205_234855_outLine +BABEL_BP_105_85083_20120502_172834_inLine +BABEL_BP_105_85083_20120502_172834_outLine +BABEL_BP_105_85222_20120623_191629_inLine +BABEL_BP_105_85222_20120623_191629_outLine +BABEL_BP_105_85883_20120130_035046_inLine +BABEL_BP_105_85883_20120130_035046_outLine +BABEL_BP_105_85941_20120122_072454_inLine +BABEL_BP_105_85941_20120122_072454_outLine +BABEL_BP_105_85948_20120429_220916_inLine +BABEL_BP_105_85948_20120429_220916_outLine +BABEL_BP_105_86004_20120121_235617_inLine +BABEL_BP_105_86004_20120121_235617_outLine +BABEL_BP_105_86014_20120130_071042_inLine +BABEL_BP_105_86014_20120130_071042_outLine +BABEL_BP_105_86259_20120130_021439_inLine +BABEL_BP_105_86259_20120130_021439_outLine +BABEL_BP_105_86801_20120531_045324_outLine +BABEL_BP_105_87107_20120606_210147_inLine +BABEL_BP_105_87107_20120606_210147_outLine +BABEL_BP_105_87850_20120122_034948_inLine +BABEL_BP_105_87850_20120122_034948_outLine +BABEL_BP_105_87857_20120602_232747_inLine +BABEL_BP_105_87857_20120602_232747_outLine +BABEL_BP_105_87862_20120119_190443_inLine +BABEL_BP_105_87862_20120119_190443_outLine +BABEL_BP_105_88243_20120126_081939_inLine +BABEL_BP_105_88243_20120126_081939_outLine +BABEL_BP_105_88253_20120521_025324_inLine +BABEL_BP_105_88253_20120521_025324_outLine +BABEL_BP_105_88294_20120123_071701_inLine +BABEL_BP_105_88294_20120123_071701_outLine +BABEL_BP_105_88383_20120205_064745_inLine +BABEL_BP_105_88383_20120205_064745_outLine +BABEL_BP_105_88506_20120315_203433_inLine +BABEL_BP_105_88506_20120315_203433_outLine +BABEL_BP_105_88932_20120209_024746_inLine +BABEL_BP_105_88932_20120209_024746_outLine +BABEL_BP_105_89345_20120123_012645_inLine +BABEL_BP_105_89345_20120123_012645_outLine +BABEL_BP_105_89565_20120208_075727_outLine +BABEL_BP_105_89583_20120208_041101_inLine +BABEL_BP_105_89583_20120208_041101_outLine +BABEL_BP_105_89674_20120207_210507_inLine +BABEL_BP_105_89674_20120207_210507_outLine +BABEL_BP_105_89818_20120111_002805_inLine +BABEL_BP_105_89818_20120111_002805_outLine +BABEL_BP_105_89838_20120219_225311_inLine +BABEL_BP_105_89838_20120219_225311_outLine +BABEL_BP_105_89867_20120124_044128_inLine +BABEL_BP_105_89867_20120124_044128_outLine +BABEL_BP_105_89867_20120124_050334_inLine +BABEL_BP_105_89867_20120124_050334_outLine +BABEL_BP_105_90046_20120605_010159_inLine +BABEL_BP_105_90046_20120605_010159_outLine +BABEL_BP_105_90055_20120205_015425_inLine +BABEL_BP_105_90055_20120205_015425_outLine +BABEL_BP_105_90490_20120107_011745_inLine +BABEL_BP_105_90490_20120107_011745_outLine +BABEL_BP_105_90559_20120601_213056_inLine +BABEL_BP_105_90559_20120601_213056_outLine +BABEL_BP_105_90577_20120106_010938_inLine +BABEL_BP_105_90577_20120106_010938_outLine +BABEL_BP_105_90730_20120127_001133_inLine +BABEL_BP_105_90730_20120127_001133_outLine +BABEL_BP_105_90819_20120130_023600_inLine +BABEL_BP_105_90819_20120130_023600_outLine +BABEL_BP_105_90951_20120127_014240_inLine +BABEL_BP_105_90951_20120127_014240_outLine +BABEL_BP_105_91002_20120517_195202_inLine +BABEL_BP_105_91002_20120517_195202_outLine +BABEL_BP_105_91358_20120614_031106_inLine +BABEL_BP_105_91358_20120614_031107_outLine +BABEL_BP_105_91386_20120625_201849_inLine +BABEL_BP_105_91386_20120625_201849_outLine +BABEL_BP_105_91703_20120126_003014_inLine +BABEL_BP_105_91703_20120126_003014_outLine +BABEL_BP_105_91975_20120622_002430_inLine +BABEL_BP_105_91975_20120622_002430_outLine +BABEL_BP_105_91975_20120622_004757_inLine +BABEL_BP_105_91975_20120622_004757_outLine +BABEL_BP_105_92252_20120119_001340_inLine +BABEL_BP_105_92252_20120119_001340_outLine +BABEL_BP_105_92407_20120206_090518_inLine +BABEL_BP_105_92407_20120206_090518_outLine +BABEL_BP_105_92628_20120202_065713_inLine +BABEL_BP_105_92628_20120202_065713_outLine +BABEL_BP_105_92752_20120131_065611_inLine +BABEL_BP_105_92752_20120131_065611_outLine +BABEL_BP_105_92789_20120208_092935_inLine +BABEL_BP_105_92789_20120208_092935_outLine +BABEL_BP_105_92800_20120204_062855_inLine +BABEL_BP_105_92800_20120204_062855_outLine +BABEL_BP_105_93004_20120203_214508_inLine +BABEL_BP_105_93004_20120203_214508_outLine +BABEL_BP_105_93044_20120530_205229_inLine +BABEL_BP_105_93044_20120530_205229_outLine +BABEL_BP_105_93044_20120530_210446_inLine +BABEL_BP_105_93044_20120530_210446_outLine +BABEL_BP_105_93314_20120204_045440_outLine +BABEL_BP_105_93436_20120605_021136_outLine +BABEL_BP_105_93541_20120207_220607_inLine +BABEL_BP_105_93541_20120207_220607_outLine +BABEL_BP_105_93637_20120208_014420_inLine +BABEL_BP_105_93637_20120208_014420_outLine +BABEL_BP_105_94149_20120528_213123_inLine +BABEL_BP_105_94149_20120528_213123_outLine +BABEL_BP_105_94162_20120121_020746_inLine +BABEL_BP_105_94162_20120121_020746_outLine +BABEL_BP_105_94168_20120127_071423_inLine +BABEL_BP_105_94168_20120127_071423_outLine +BABEL_BP_105_94223_20120813_060431_inLine +BABEL_BP_105_94223_20120813_060431_outLine +BABEL_BP_105_94226_20120126_200629_inLine +BABEL_BP_105_94226_20120126_200629_outLine +BABEL_BP_105_94235_20120131_090132_inLine +BABEL_BP_105_94235_20120131_090132_outLine +BABEL_BP_105_94542_20120503_002707_inLine +BABEL_BP_105_94542_20120503_002707_outLine +BABEL_BP_105_94694_20120127_060811_inLine +BABEL_BP_105_94694_20120127_060811_outLine +BABEL_BP_105_95034_20120130_072201_outLine +BABEL_BP_105_95533_20120527_024409_inLine +BABEL_BP_105_95533_20120527_024409_outLine +BABEL_BP_105_95650_20120110_225916_inLine +BABEL_BP_105_95650_20120110_225916_outLine +BABEL_BP_105_95736_20120128_235257_inLine +BABEL_BP_105_95736_20120128_235257_outLine +BABEL_BP_105_95815_20120201_065914_inLine +BABEL_BP_105_95815_20120201_065914_outLine +BABEL_BP_105_96108_20120201_013051_inLine +BABEL_BP_105_96108_20120201_013051_outLine +BABEL_BP_105_96302_20120518_220402_inLine +BABEL_BP_105_96302_20120518_220402_outLine +BABEL_BP_105_96438_20120208_042745_inLine +BABEL_BP_105_96438_20120208_042745_outLine +BABEL_BP_105_97004_20120628_024047_inLine +BABEL_BP_105_97260_20120128_060528_inLine +BABEL_BP_105_97260_20120128_060528_outLine +BABEL_BP_105_97274_20120202_091803_inLine +BABEL_BP_105_97274_20120202_091803_outLine +BABEL_BP_105_97298_20120706_190045_inLine +BABEL_BP_105_97298_20120706_190045_outLine +BABEL_BP_105_97318_20120606_000332_inLine +BABEL_BP_105_97318_20120606_000332_outLine +BABEL_BP_105_97405_20120128_051654_outLine +BABEL_BP_105_97629_20120606_230655_inLine +BABEL_BP_105_97629_20120606_230655_outLine +BABEL_BP_105_97635_20120519_194730_inLine +BABEL_BP_105_97635_20120519_194730_outLine +BABEL_BP_105_97650_20120124_023530_inLine +BABEL_BP_105_97650_20120124_023530_outLine +BABEL_BP_105_97699_20120619_014656_inLine +BABEL_BP_105_97699_20120619_014656_outLine +BABEL_BP_105_97760_20120503_205622_inLine +BABEL_BP_105_97760_20120503_205622_outLine +BABEL_BP_105_97797_20120130_025511_inLine +BABEL_BP_105_97797_20120130_025511_outLine +BABEL_BP_105_97941_20120123_224142_inLine +BABEL_BP_105_97941_20120123_224142_outLine +BABEL_BP_105_98279_20120121_021104_inLine +BABEL_BP_105_98279_20120121_021104_outLine +BABEL_BP_105_98402_20120518_004507_inLine +BABEL_BP_105_98402_20120518_004507_outLine +BABEL_BP_105_98476_20120314_082638_outLine +BABEL_BP_105_99414_20120618_212729_inLine +BABEL_BP_105_99414_20120618_212729_outLine +BABEL_BP_105_99514_20120126_232257_inLine +BABEL_BP_105_99514_20120126_232257_outLine +BABEL_BP_105_99694_20120202_034424_inLine +BABEL_BP_105_99694_20120202_034425_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/train.LimitedLP.list b/egs/babel/s5d/conf/lists/105-turkish/train.LimitedLP.list new file mode 100644 index 00000000000..18efca5b37c --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/train.LimitedLP.list @@ -0,0 +1,128 @@ +BABEL_BP_105_16257_20120709_025101_inLine +BABEL_BP_105_16257_20120709_025101_outLine +BABEL_BP_105_17013_20120314_031626_inLine +BABEL_BP_105_17013_20120314_031626_outLine +BABEL_BP_105_18672_20120131_015941_inLine +BABEL_BP_105_18672_20120131_015941_outLine +BABEL_BP_105_18716_20120218_070145_inLine +BABEL_BP_105_20347_20120504_231529_inLine +BABEL_BP_105_20347_20120504_232320_inLine +BABEL_BP_105_20471_20120125_013916_inLine +BABEL_BP_105_20471_20120125_013916_outLine +BABEL_BP_105_20471_20120125_015348_inLine +BABEL_BP_105_20471_20120125_015348_outLine +BABEL_BP_105_21370_20120605_185740_inLine +BABEL_BP_105_21370_20120605_185740_outLine +BABEL_BP_105_22272_20120430_191440_inLine +BABEL_BP_105_22272_20120430_191440_outLine +BABEL_BP_105_22408_20120131_202129_inLine +BABEL_BP_105_22408_20120131_202129_outLine +BABEL_BP_105_22408_20120131_210558_inLine +BABEL_BP_105_22408_20120131_210558_outLine +BABEL_BP_105_22898_20120129_040904_inLine +BABEL_BP_105_22898_20120129_040904_outLine +BABEL_BP_105_23629_20120503_212942_inLine +BABEL_BP_105_23629_20120503_212942_outLine +BABEL_BP_105_24608_20120111_023000_inLine +BABEL_BP_105_24608_20120111_023000_outLine +BABEL_BP_105_26164_20120627_210408_inLine +BABEL_BP_105_26164_20120627_210408_outLine +BABEL_BP_105_26644_20120517_212756_inLine +BABEL_BP_105_26644_20120517_212756_outLine +BABEL_BP_105_27724_20120130_023439_inLine +BABEL_BP_105_27724_20120130_023439_outLine +BABEL_BP_105_29421_20120127_235240_inLine +BABEL_BP_105_29421_20120127_235240_outLine +BABEL_BP_105_31460_20120603_224411_inLine +BABEL_BP_105_31460_20120603_224411_outLine +BABEL_BP_105_32663_20120709_040652_inLine +BABEL_BP_105_32663_20120709_040652_outLine +BABEL_BP_105_32818_20120530_032934_inLine +BABEL_BP_105_32818_20120530_032935_outLine +BABEL_BP_105_34590_20120829_000220_inLine +BABEL_BP_105_34590_20120829_000220_outLine +BABEL_BP_105_35329_20120203_051310_inLine +BABEL_BP_105_35329_20120203_051310_outLine +BABEL_BP_105_35576_20120530_184018_inLine +BABEL_BP_105_35576_20120530_184018_outLine +BABEL_BP_105_39066_20120206_073804_inLine +BABEL_BP_105_39066_20120206_073804_outLine +BABEL_BP_105_39114_20120516_035141_inLine +BABEL_BP_105_39114_20120516_035141_outLine +BABEL_BP_105_42145_20120210_004555_inLine +BABEL_BP_105_42145_20120210_004555_outLine +BABEL_BP_105_43317_20120516_181202_inLine +BABEL_BP_105_43317_20120516_181202_outLine +BABEL_BP_105_44209_20120130_072808_inLine +BABEL_BP_105_44209_20120130_072808_outLine +BABEL_BP_105_44500_20120531_224758_inLine +BABEL_BP_105_44500_20120531_224758_outLine +BABEL_BP_105_45511_20120601_001634_inLine +BABEL_BP_105_45511_20120601_001634_outLine +BABEL_BP_105_45512_20120208_063419_inLine +BABEL_BP_105_45512_20120208_063419_outLine +BABEL_BP_105_47429_20120512_193242_inLine +BABEL_BP_105_47429_20120512_193242_outLine +BABEL_BP_105_47823_20120209_005455_inLine +BABEL_BP_105_47823_20120209_005455_outLine +BABEL_BP_105_49186_20120627_224343_inLine +BABEL_BP_105_49186_20120627_224343_outLine +BABEL_BP_105_50416_20120120_030634_inLine +BABEL_BP_105_50416_20120120_030634_outLine +BABEL_BP_105_50416_20120120_032209_inLine +BABEL_BP_105_50416_20120120_032209_outLine +BABEL_BP_105_51149_20120517_022710_inLine +BABEL_BP_105_51149_20120517_022710_outLine +BABEL_BP_105_53352_20120313_025305_inLine +BABEL_BP_105_53352_20120313_025305_outLine +BABEL_BP_105_55355_20120602_030100_inLine +BABEL_BP_105_55355_20120602_030100_outLine +BABEL_BP_105_56039_20120207_012118_inLine +BABEL_BP_105_56039_20120207_012118_outLine +BABEL_BP_105_60995_20120708_212511_inLine +BABEL_BP_105_60995_20120708_212511_outLine +BABEL_BP_105_61750_20120430_182721_inLine +BABEL_BP_105_61750_20120430_182721_outLine +BABEL_BP_105_62286_20120206_001738_inLine +BABEL_BP_105_62286_20120206_001739_outLine +BABEL_BP_105_62589_20120208_070910_inLine +BABEL_BP_105_62589_20120208_070910_outLine +BABEL_BP_105_63116_20120210_011436_inLine +BABEL_BP_105_63116_20120210_011436_outLine +BABEL_BP_105_65069_20120205_053459_inLine +BABEL_BP_105_65069_20120205_053459_outLine +BABEL_BP_105_65783_20120206_225414_inLine +BABEL_BP_105_65783_20120206_225414_outLine +BABEL_BP_105_69764_20120209_041231_inLine +BABEL_BP_105_69764_20120209_041231_outLine +BABEL_BP_105_71739_20120422_024509_inLine +BABEL_BP_105_71739_20120422_024509_outLine +BABEL_BP_105_71741_20120314_230737_inLine +BABEL_BP_105_71741_20120314_230737_outLine +BABEL_BP_105_72718_20120525_180835_inLine +BABEL_BP_105_72718_20120525_180835_outLine +BABEL_BP_105_73059_20120520_222710_inLine +BABEL_BP_105_73059_20120520_222710_outLine +BABEL_BP_105_73452_20120527_020050_inLine +BABEL_BP_105_73452_20120527_020050_outLine +BABEL_BP_105_75354_20120520_012303_inLine +BABEL_BP_105_75354_20120520_012303_outLine +BABEL_BP_105_80247_20120501_021202_inLine +BABEL_BP_105_80247_20120501_021202_outLine +BABEL_BP_105_82591_20120201_222003_outLine +BABEL_BP_105_83256_20120424_212011_inLine +BABEL_BP_105_83256_20120424_212011_outLine +BABEL_BP_105_83702_20120122_070851_inLine +BABEL_BP_105_83702_20120122_070851_outLine +BABEL_BP_105_83713_20120123_051739_inLine +BABEL_BP_105_83713_20120123_051739_outLine +BABEL_BP_105_90046_20120605_010159_inLine +BABEL_BP_105_90046_20120605_010159_outLine +BABEL_BP_105_92800_20120204_062855_inLine +BABEL_BP_105_92800_20120204_062855_outLine +BABEL_BP_105_94542_20120503_002707_inLine +BABEL_BP_105_94542_20120503_002707_outLine +BABEL_BP_105_96438_20120208_042745_inLine +BABEL_BP_105_96438_20120208_042745_outLine +BABEL_BP_105_97760_20120503_205622_inLine +BABEL_BP_105_97760_20120503_205622_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/dev.list b/egs/babel/s5d/conf/lists/106-tagalog/dev.list new file mode 100644 index 00000000000..09f159f6574 --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/dev.list @@ -0,0 +1,146 @@ +BABEL_BP_106_05343_20120411_001147_inLine +BABEL_BP_106_05343_20120411_001147_outLine +BABEL_BP_106_11690_20120315_042036_inLine +BABEL_BP_106_11690_20120315_042036_outLine +BABEL_BP_106_11694_20120315_051701_inLine +BABEL_BP_106_11694_20120315_051701_outLine +BABEL_BP_106_11915_20120301_192127_outLine +BABEL_BP_106_11915_20120301_193624_outLine +BABEL_BP_106_14475_20120317_195829_inLine +BABEL_BP_106_14475_20120317_195829_outLine +BABEL_BP_106_16883_20120219_191154_inLine +BABEL_BP_106_16883_20120219_191154_outLine +BABEL_BP_106_16883_20120219_191914_inLine +BABEL_BP_106_16883_20120219_191914_outLine +BABEL_BP_106_17948_20120305_020044_inLine +BABEL_BP_106_17948_20120305_020044_outLine +BABEL_BP_106_19012_20120405_191535_inLine +BABEL_BP_106_19012_20120405_191535_outLine +BABEL_BP_106_24379_20120303_015051_inLine +BABEL_BP_106_24379_20120303_015051_outLine +BABEL_BP_106_25035_20120213_014750_inLine +BABEL_BP_106_25035_20120213_014750_outLine +BABEL_BP_106_28260_20120210_165445_inLine +BABEL_BP_106_28260_20120210_165445_outLine +BABEL_BP_106_28740_20120131_002533_inLine +BABEL_BP_106_28768_20120405_170206_inLine +BABEL_BP_106_28768_20120405_170206_outLine +BABEL_BP_106_28768_20120405_172419_inLine +BABEL_BP_106_28768_20120405_172419_outLine +BABEL_BP_106_29268_20120501_030651_inLine +BABEL_BP_106_29268_20120501_032051_inLine +BABEL_BP_106_29268_20120501_033313_inLine +BABEL_BP_106_30554_20120301_192050_inLine +BABEL_BP_106_30554_20120301_192050_outLine +BABEL_BP_106_30715_20120501_014624_inLine +BABEL_BP_106_31635_20120428_220813_inLine +BABEL_BP_106_32642_20120318_154011_inLine +BABEL_BP_106_32642_20120318_154011_outLine +BABEL_BP_106_35896_20120302_123550_inLine +BABEL_BP_106_36490_20120405_193235_inLine +BABEL_BP_106_36490_20120405_193235_outLine +BABEL_BP_106_40168_20120208_173832_outLine +BABEL_BP_106_40168_20120208_175258_outLine +BABEL_BP_106_42383_20120331_140217_inLine +BABEL_BP_106_42383_20120331_140217_outLine +BABEL_BP_106_42766_20120217_003639_inLine +BABEL_BP_106_42766_20120217_003639_outLine +BABEL_BP_106_47845_20120405_122139_inLine +BABEL_BP_106_47845_20120405_122139_outLine +BABEL_BP_106_47845_20120405_123415_inLine +BABEL_BP_106_47845_20120405_123415_outLine +BABEL_BP_106_48477_20120304_224818_inLine +BABEL_BP_106_48477_20120304_224818_outLine +BABEL_BP_106_53544_20120314_004506_inLine +BABEL_BP_106_53544_20120314_004506_outLine +BABEL_BP_106_53544_20120314_010454_inLine +BABEL_BP_106_53544_20120314_010454_outLine +BABEL_BP_106_53982_20120224_233136_inLine +BABEL_BP_106_53982_20120224_233136_outLine +BABEL_BP_106_57422_20120227_015422_inLine +BABEL_BP_106_57422_20120227_015422_outLine +BABEL_BP_106_58413_20120304_005849_inLine +BABEL_BP_106_58413_20120304_005849_outLine +BABEL_BP_106_58737_20120327_234027_inLine +BABEL_BP_106_58737_20120327_234027_outLine +BABEL_BP_106_59500_20120327_192807_inLine +BABEL_BP_106_59500_20120327_192807_outLine +BABEL_BP_106_61385_20120227_200049_inLine +BABEL_BP_106_61385_20120227_200049_outLine +BABEL_BP_106_65580_20120221_205300_inLine +BABEL_BP_106_65580_20120221_205300_outLine +BABEL_BP_106_65580_20120221_210222_inLine +BABEL_BP_106_65580_20120221_210222_outLine +BABEL_BP_106_66026_20120511_112437_inLine +BABEL_BP_106_66026_20120511_114127_inLine +BABEL_BP_106_66668_20120130_000343_inLine +BABEL_BP_106_66668_20120130_000343_outLine +BABEL_BP_106_66668_20120130_002819_inLine +BABEL_BP_106_66668_20120130_002819_outLine +BABEL_BP_106_68362_20120403_123939_inLine +BABEL_BP_106_68362_20120403_123939_outLine +BABEL_BP_106_69050_20120203_173053_inLine +BABEL_BP_106_69050_20120203_173053_outLine +BABEL_BP_106_72297_20120405_193507_inLine +BABEL_BP_106_72297_20120405_193507_outLine +BABEL_BP_106_72297_20120405_194943_inLine +BABEL_BP_106_72297_20120405_194943_outLine +BABEL_BP_106_73782_20120313_012825_inLine +BABEL_BP_106_73782_20120313_012825_outLine +BABEL_BP_106_75333_20120329_172440_inLine +BABEL_BP_106_75333_20120329_172440_outLine +BABEL_BP_106_75871_20120127_162002_inLine +BABEL_BP_106_75871_20120127_162002_outLine +BABEL_BP_106_76341_20120219_170650_inLine +BABEL_BP_106_76341_20120219_170650_outLine +BABEL_BP_106_76341_20120219_173824_inLine +BABEL_BP_106_76341_20120219_173824_outLine +BABEL_BP_106_78572_20120304_135853_inLine +BABEL_BP_106_79570_20120302_141553_outLine +BABEL_BP_106_79632_20120309_173547_inLine +BABEL_BP_106_79632_20120309_173547_outLine +BABEL_BP_106_79698_20120315_223952_inLine +BABEL_BP_106_79698_20120315_230838_inLine +BABEL_BP_106_79698_20120315_230838_outLine +BABEL_BP_106_81587_20120309_163209_inLine +BABEL_BP_106_81587_20120309_163209_outLine +BABEL_BP_106_83255_20120530_214353_inLine +BABEL_BP_106_83891_20120327_163405_inLine +BABEL_BP_106_83891_20120327_163405_outLine +BABEL_BP_106_85617_20120225_212818_inLine +BABEL_BP_106_85617_20120225_212818_outLine +BABEL_BP_106_90180_20120317_002331_inLine +BABEL_BP_106_90180_20120317_002331_outLine +BABEL_BP_106_90577_20120111_201742_inLine +BABEL_BP_106_90577_20120111_201742_outLine +BABEL_BP_106_90764_20120131_140951_inLine +BABEL_BP_106_90764_20120131_140951_outLine +BABEL_BP_106_90890_20120322_020338_inLine +BABEL_BP_106_90890_20120322_020338_outLine +BABEL_BP_106_92820_20120318_144230_inLine +BABEL_BP_106_92820_20120318_144230_outLine +BABEL_BP_106_93000_20120227_164805_inLine +BABEL_BP_106_93000_20120227_164805_outLine +BABEL_BP_106_94149_20120205_211427_inLine +BABEL_BP_106_94149_20120205_211427_outLine +BABEL_BP_106_94244_20120405_200522_inLine +BABEL_BP_106_94244_20120405_200522_outLine +BABEL_BP_106_94542_20120305_045905_inLine +BABEL_BP_106_94542_20120305_045905_outLine +BABEL_BP_106_95589_20120225_030746_inLine +BABEL_BP_106_95589_20120225_032340_inLine +BABEL_BP_106_95589_20120225_032340_outLine +BABEL_BP_106_96347_20120422_163204_inLine +BABEL_BP_106_96347_20120422_163808_inLine +BABEL_BP_106_97318_20120405_141943_inLine +BABEL_BP_106_97318_20120405_141943_outLine +BABEL_BP_106_97629_20120227_180122_inLine +BABEL_BP_106_97629_20120227_180122_outLine +BABEL_BP_106_97797_20120224_210655_inLine +BABEL_BP_106_97797_20120224_210655_outLine +BABEL_BP_106_97797_20120224_211935_inLine +BABEL_BP_106_97797_20120224_211935_outLine +BABEL_BP_106_98086_20120228_172810_inLine +BABEL_BP_106_98086_20120228_172810_outLine +BABEL_BP_106_98640_20120317_040411_inLine +BABEL_BP_106_98640_20120317_040412_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/eval.list b/egs/babel/s5d/conf/lists/106-tagalog/eval.list new file mode 100644 index 00000000000..b2c3042f61a --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/eval.list @@ -0,0 +1,241 @@ +BABEL_BP_106_00590_20120401_144745_inLine +BABEL_BP_106_00590_20120401_144745_outLine +BABEL_BP_106_05737_20120317_201434_inLine +BABEL_BP_106_05737_20120317_201434_outLine +BABEL_BP_106_08336_20120308_213905_inLine +BABEL_BP_106_08336_20120308_231058_inLine +BABEL_BP_106_08336_20120308_231812_inLine +BABEL_BP_106_08336_20120308_232516_inLine +BABEL_BP_106_08336_20120308_234130_inLine +BABEL_BP_106_09067_20120304_174532_inLine +BABEL_BP_106_09067_20120304_174532_outLine +BABEL_BP_106_10033_20120428_005441_inLine +BABEL_BP_106_10279_20120525_160616_inLine +BABEL_BP_106_11868_20120403_204010_inLine +BABEL_BP_106_11868_20120403_204010_outLine +BABEL_BP_106_12317_20120324_045054_inLine +BABEL_BP_106_12317_20120324_045054_outLine +BABEL_BP_106_12631_20120202_190009_inLine +BABEL_BP_106_12631_20120202_190009_outLine +BABEL_BP_106_13635_20120319_005136_inLine +BABEL_BP_106_13635_20120319_005136_outLine +BABEL_BP_106_13715_20120530_194000_inLine +BABEL_BP_106_13878_20120517_133306_inLine +BABEL_BP_106_14899_20120519_174015_inLine +BABEL_BP_106_14915_20120525_195519_inLine +BABEL_BP_106_14915_20120525_201940_inLine +BABEL_BP_106_14915_20120525_235128_inLine +BABEL_BP_106_18730_20120322_025159_inLine +BABEL_BP_106_18730_20120322_025159_outLine +BABEL_BP_106_18991_20120208_210053_inLine +BABEL_BP_106_18991_20120208_210053_outLine +BABEL_BP_106_20213_20120417_130013_inLine +BABEL_BP_106_20213_20120417_130013_outLine +BABEL_BP_106_20307_20120409_012136_inLine +BABEL_BP_106_20307_20120409_012136_outLine +BABEL_BP_106_20462_20120217_160808_inLine +BABEL_BP_106_20462_20120217_160808_outLine +BABEL_BP_106_20462_20120217_164536_inLine +BABEL_BP_106_20462_20120217_164536_outLine +BABEL_BP_106_20518_20120525_181959_inLine +BABEL_BP_106_20518_20120525_182614_inLine +BABEL_BP_106_20518_20120525_183956_inLine +BABEL_BP_106_20685_20120323_031815_inLine +BABEL_BP_106_20685_20120323_031815_outLine +BABEL_BP_106_21634_20120530_182237_inLine +BABEL_BP_106_22401_20120321_012046_inLine +BABEL_BP_106_22401_20120321_012046_outLine +BABEL_BP_106_22401_20120321_013515_inLine +BABEL_BP_106_22401_20120321_013515_outLine +BABEL_BP_106_22566_20120318_130741_inLine +BABEL_BP_106_22566_20120318_130741_outLine +BABEL_BP_106_25041_20120318_183127_inLine +BABEL_BP_106_25041_20120318_183127_outLine +BABEL_BP_106_25072_20120307_172016_inLine +BABEL_BP_106_25072_20120307_172016_outLine +BABEL_BP_106_25072_20120307_173008_inLine +BABEL_BP_106_25072_20120307_173008_outLine +BABEL_BP_106_27645_20120309_195238_inLine +BABEL_BP_106_27645_20120309_195238_outLine +BABEL_BP_106_27825_20120525_165434_inLine +BABEL_BP_106_29259_20120525_174551_inLine +BABEL_BP_106_30168_20120417_211215_inLine +BABEL_BP_106_30722_20120228_173748_inLine +BABEL_BP_106_30722_20120228_175207_inLine +BABEL_BP_106_30722_20120228_175207_outLine +BABEL_BP_106_30722_20120228_180341_inLine +BABEL_BP_106_30722_20120228_180341_outLine +BABEL_BP_106_31350_20120305_132208_inLine +BABEL_BP_106_31451_20120430_160735_inLine +BABEL_BP_106_31614_20120315_181514_inLine +BABEL_BP_106_31614_20120315_181514_outLine +BABEL_BP_106_32132_20120604_141124_inLine +BABEL_BP_106_34732_20120504_011240_inLine +BABEL_BP_106_34732_20120504_011240_outLine +BABEL_BP_106_36828_20120413_195545_inLine +BABEL_BP_106_36828_20120413_195545_outLine +BABEL_BP_106_37940_20120509_134420_inLine +BABEL_BP_106_38524_20120531_115250_inLine +BABEL_BP_106_40385_20120316_121848_inLine +BABEL_BP_106_40385_20120316_123312_inLine +BABEL_BP_106_41146_20120127_174843_outLine +BABEL_BP_106_41456_20120417_215741_inLine +BABEL_BP_106_41456_20120417_215741_outLine +BABEL_BP_106_41471_20120227_013419_inLine +BABEL_BP_106_41471_20120227_013419_outLine +BABEL_BP_106_41471_20120227_015846_inLine +BABEL_BP_106_41471_20120227_015846_outLine +BABEL_BP_106_41797_20120418_010121_inLine +BABEL_BP_106_41797_20120418_010121_outLine +BABEL_BP_106_44500_20120307_165936_inLine +BABEL_BP_106_44500_20120307_165936_outLine +BABEL_BP_106_45570_20120411_165807_inLine +BABEL_BP_106_45570_20120411_165807_outLine +BABEL_BP_106_45929_20120524_212453_inLine +BABEL_BP_106_45929_20120524_220624_inLine +BABEL_BP_106_46409_20120213_193348_outLine +BABEL_BP_106_48281_20120208_172243_inLine +BABEL_BP_106_48281_20120208_172243_outLine +BABEL_BP_106_48559_20120417_130856_inLine +BABEL_BP_106_48559_20120417_130856_outLine +BABEL_BP_106_48559_20120417_140813_inLine +BABEL_BP_106_48559_20120417_140813_outLine +BABEL_BP_106_48645_20120304_124310_inLine +BABEL_BP_106_48645_20120304_124310_outLine +BABEL_BP_106_48727_20120530_170050_inLine +BABEL_BP_106_49351_20120315_214910_inLine +BABEL_BP_106_49351_20120315_214910_outLine +BABEL_BP_106_50112_20120327_165821_inLine +BABEL_BP_106_50112_20120327_165821_outLine +BABEL_BP_106_50757_20120519_142209_inLine +BABEL_BP_106_53278_20120304_182746_inLine +BABEL_BP_106_53278_20120304_182746_outLine +BABEL_BP_106_54285_20120304_170422_inLine +BABEL_BP_106_54285_20120304_170422_outLine +BABEL_BP_106_54339_20120220_233532_inLine +BABEL_BP_106_54339_20120220_233532_outLine +BABEL_BP_106_54339_20120220_235208_inLine +BABEL_BP_106_54339_20120220_235208_outLine +BABEL_BP_106_56648_20120221_204115_inLine +BABEL_BP_106_56648_20120221_204115_outLine +BABEL_BP_106_59454_20120302_005653_inLine +BABEL_BP_106_59454_20120302_013702_inLine +BABEL_BP_106_59454_20120302_013702_outLine +BABEL_BP_106_59736_20120517_215232_inLine +BABEL_BP_106_60064_20120405_122048_inLine +BABEL_BP_106_60064_20120405_122049_outLine +BABEL_BP_106_60183_20120227_184542_outLine +BABEL_BP_106_60183_20120227_185937_outLine +BABEL_BP_106_61408_20120313_190656_inLine +BABEL_BP_106_61408_20120313_190656_outLine +BABEL_BP_106_61408_20120313_191850_inLine +BABEL_BP_106_61408_20120313_191850_outLine +BABEL_BP_106_61762_20120210_205954_inLine +BABEL_BP_106_61762_20120210_205954_outLine +BABEL_BP_106_62589_20120526_194818_inLine +BABEL_BP_106_62710_20120226_042014_inLine +BABEL_BP_106_62710_20120226_042014_outLine +BABEL_BP_106_62710_20120226_043927_inLine +BABEL_BP_106_62710_20120226_043927_outLine +BABEL_BP_106_63116_20120301_233405_inLine +BABEL_BP_106_63116_20120301_233405_outLine +BABEL_BP_106_64178_20120512_001535_inLine +BABEL_BP_106_64178_20120512_001535_outLine +BABEL_BP_106_64300_20120517_211937_inLine +BABEL_BP_106_64300_20120517_213314_inLine +BABEL_BP_106_65837_20120314_013343_inLine +BABEL_BP_106_65837_20120314_013343_outLine +BABEL_BP_106_69871_20120308_190521_inLine +BABEL_BP_106_69871_20120308_191814_inLine +BABEL_BP_106_69871_20120308_191814_outLine +BABEL_BP_106_70323_20120315_214239_inLine +BABEL_BP_106_70323_20120315_214239_outLine +BABEL_BP_106_70530_20120315_171715_inLine +BABEL_BP_106_70530_20120315_171715_outLine +BABEL_BP_106_70773_20120331_201706_inLine +BABEL_BP_106_70773_20120331_201706_outLine +BABEL_BP_106_72647_20120314_140705_inLine +BABEL_BP_106_72647_20120314_140705_outLine +BABEL_BP_106_72908_20120301_214516_inLine +BABEL_BP_106_72908_20120301_214516_outLine +BABEL_BP_106_73050_20120229_190728_inLine +BABEL_BP_106_73050_20120229_190728_outLine +BABEL_BP_106_73050_20120229_192106_inLine +BABEL_BP_106_73050_20120229_192106_outLine +BABEL_BP_106_73122_20120131_151743_inLine +BABEL_BP_106_73122_20120131_151743_outLine +BABEL_BP_106_73205_20120131_011807_inLine +BABEL_BP_106_73205_20120131_011807_outLine +BABEL_BP_106_74940_20120324_000134_inLine +BABEL_BP_106_74940_20120324_000134_outLine +BABEL_BP_106_78487_20120228_180247_inLine +BABEL_BP_106_78487_20120228_184448_inLine +BABEL_BP_106_78487_20120228_185132_inLine +BABEL_BP_106_78487_20120229_165653_inLine +BABEL_BP_106_78487_20120229_180156_inLine +BABEL_BP_106_82007_20120511_234807_inLine +BABEL_BP_106_82007_20120511_234807_outLine +BABEL_BP_106_83012_20120227_002142_inLine +BABEL_BP_106_83012_20120227_002142_outLine +BABEL_BP_106_83012_20120227_004851_inLine +BABEL_BP_106_83012_20120227_004851_outLine +BABEL_BP_106_83053_20120418_185830_inLine +BABEL_BP_106_85719_20120315_175358_inLine +BABEL_BP_106_85719_20120315_175358_outLine +BABEL_BP_106_85883_20120221_204813_inLine +BABEL_BP_106_85883_20120221_204813_outLine +BABEL_BP_106_85883_20120221_210017_inLine +BABEL_BP_106_85883_20120221_210017_outLine +BABEL_BP_106_86211_20120323_003846_inLine +BABEL_BP_106_86211_20120323_003846_outLine +BABEL_BP_106_86339_20120517_211109_inLine +BABEL_BP_106_86900_20120129_013513_inLine +BABEL_BP_106_86900_20120129_013513_outLine +BABEL_BP_106_86998_20120316_235214_inLine +BABEL_BP_106_86998_20120316_235214_outLine +BABEL_BP_106_88932_20120210_024536_inLine +BABEL_BP_106_88932_20120210_024536_outLine +BABEL_BP_106_88932_20120210_031316_inLine +BABEL_BP_106_88932_20120210_031316_outLine +BABEL_BP_106_89619_20120216_201137_inLine +BABEL_BP_106_89619_20120216_201137_outLine +BABEL_BP_106_89619_20120216_202208_inLine +BABEL_BP_106_89619_20120216_202208_outLine +BABEL_BP_106_89674_20120128_172359_inLine +BABEL_BP_106_89674_20120128_172359_outLine +BABEL_BP_106_89674_20120128_175646_inLine +BABEL_BP_106_89674_20120128_175646_outLine +BABEL_BP_106_89818_20120323_031837_inLine +BABEL_BP_106_89818_20120323_033337_inLine +BABEL_BP_106_89818_20120323_033337_outLine +BABEL_BP_106_90046_20120316_225047_inLine +BABEL_BP_106_90046_20120316_225047_outLine +BABEL_BP_106_90559_20120404_191014_inLine +BABEL_BP_106_90559_20120404_191014_outLine +BABEL_BP_106_91007_20120405_174537_inLine +BABEL_BP_106_91007_20120405_174537_outLine +BABEL_BP_106_92072_20120315_162353_inLine +BABEL_BP_106_92072_20120315_162353_outLine +BABEL_BP_106_92094_20120519_171316_inLine +BABEL_BP_106_92328_20120318_183827_inLine +BABEL_BP_106_92328_20120318_183827_outLine +BABEL_BP_106_93506_20120501_114215_inLine +BABEL_BP_106_94696_20120405_132036_inLine +BABEL_BP_106_94696_20120405_132036_outLine +BABEL_BP_106_94696_20120405_132924_inLine +BABEL_BP_106_94696_20120405_132924_outLine +BABEL_BP_106_95225_20120323_234548_inLine +BABEL_BP_106_95225_20120323_234548_outLine +BABEL_BP_106_95572_20120501_120940_inLine +BABEL_BP_106_95637_20120210_215628_inLine +BABEL_BP_106_95637_20120210_215628_outLine +BABEL_BP_106_97052_20120315_205207_inLine +BABEL_BP_106_97052_20120315_205207_outLine +BABEL_BP_106_97941_20120228_153714_inLine +BABEL_BP_106_97941_20120228_155826_inLine +BABEL_BP_106_98099_20120224_234716_inLine +BABEL_BP_106_98099_20120224_234716_outLine +BABEL_BP_106_99503_20120328_011545_inLine +BABEL_BP_106_99503_20120328_011545_outLine +BABEL_BP_106_99764_20120309_004852_inLine +BABEL_BP_106_99764_20120309_004852_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/evalpart1.list b/egs/babel/s5d/conf/lists/106-tagalog/evalpart1.list new file mode 100644 index 00000000000..690fec715fb --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/evalpart1.list @@ -0,0 +1,69 @@ +BABEL_BP_106_11868_20120403_204010_inLine +BABEL_BP_106_11868_20120403_204010_outLine +BABEL_BP_106_18730_20120322_025159_inLine +BABEL_BP_106_18730_20120322_025159_outLine +BABEL_BP_106_18991_20120208_210053_inLine +BABEL_BP_106_18991_20120208_210053_outLine +BABEL_BP_106_20213_20120417_130013_inLine +BABEL_BP_106_20213_20120417_130013_outLine +BABEL_BP_106_20307_20120409_012136_inLine +BABEL_BP_106_20307_20120409_012136_outLine +BABEL_BP_106_20685_20120323_031815_inLine +BABEL_BP_106_20685_20120323_031815_outLine +BABEL_BP_106_22401_20120321_012046_inLine +BABEL_BP_106_22401_20120321_012046_outLine +BABEL_BP_106_22401_20120321_013515_inLine +BABEL_BP_106_22401_20120321_013515_outLine +BABEL_BP_106_22566_20120318_130741_inLine +BABEL_BP_106_22566_20120318_130741_outLine +BABEL_BP_106_27645_20120309_195238_inLine +BABEL_BP_106_27645_20120309_195238_outLine +BABEL_BP_106_32132_20120604_141124_inLine +BABEL_BP_106_34732_20120504_011240_inLine +BABEL_BP_106_34732_20120504_011240_outLine +BABEL_BP_106_41471_20120227_013419_inLine +BABEL_BP_106_41471_20120227_013419_outLine +BABEL_BP_106_41471_20120227_015846_inLine +BABEL_BP_106_41471_20120227_015846_outLine +BABEL_BP_106_48281_20120208_172243_inLine +BABEL_BP_106_48281_20120208_172243_outLine +BABEL_BP_106_48645_20120304_124310_inLine +BABEL_BP_106_48645_20120304_124310_outLine +BABEL_BP_106_53278_20120304_182746_inLine +BABEL_BP_106_53278_20120304_182746_outLine +BABEL_BP_106_54285_20120304_170422_inLine +BABEL_BP_106_54285_20120304_170422_outLine +BABEL_BP_106_54339_20120220_233532_inLine +BABEL_BP_106_54339_20120220_233532_outLine +BABEL_BP_106_54339_20120220_235208_inLine +BABEL_BP_106_54339_20120220_235208_outLine +BABEL_BP_106_63116_20120301_233405_inLine +BABEL_BP_106_63116_20120301_233405_outLine +BABEL_BP_106_72647_20120314_140705_inLine +BABEL_BP_106_72647_20120314_140705_outLine +BABEL_BP_106_73050_20120229_190728_inLine +BABEL_BP_106_73050_20120229_190728_outLine +BABEL_BP_106_73050_20120229_192106_inLine +BABEL_BP_106_73050_20120229_192106_outLine +BABEL_BP_106_73122_20120131_151743_inLine +BABEL_BP_106_73122_20120131_151743_outLine +BABEL_BP_106_73205_20120131_011807_inLine +BABEL_BP_106_73205_20120131_011807_outLine +BABEL_BP_106_74940_20120324_000134_inLine +BABEL_BP_106_74940_20120324_000134_outLine +BABEL_BP_106_82007_20120511_234807_inLine +BABEL_BP_106_82007_20120511_234807_outLine +BABEL_BP_106_85719_20120315_175358_inLine +BABEL_BP_106_85719_20120315_175358_outLine +BABEL_BP_106_86998_20120316_235214_inLine +BABEL_BP_106_86998_20120316_235214_outLine +BABEL_BP_106_90046_20120316_225047_inLine +BABEL_BP_106_90046_20120316_225047_outLine +BABEL_BP_106_90559_20120404_191014_inLine +BABEL_BP_106_90559_20120404_191014_outLine +BABEL_BP_106_95637_20120210_215628_inLine +BABEL_BP_106_95637_20120210_215628_outLine +BABEL_BP_106_97052_20120315_205207_inLine +BABEL_BP_106_97052_20120315_205207_outLine +BABEL_BP_106_97941_20120228_153714_inLine +BABEL_BP_106_97941_20120228_155826_inLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/train.FullLP.list b/egs/babel/s5d/conf/lists/106-tagalog/train.FullLP.list new file mode 100644 index 00000000000..daa7243e0f2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/train.FullLP.list @@ -0,0 +1,1138 @@ +BABEL_BP_106_00300_20120415_005214_inLine +BABEL_BP_106_00315_20120419_231124_inLine +BABEL_BP_106_03420_20120409_204941_inLine +BABEL_BP_106_03420_20120409_204941_outLine +BABEL_BP_106_03420_20120409_211810_inLine +BABEL_BP_106_03420_20120409_211811_outLine +BABEL_BP_106_03695_20120401_185127_inLine +BABEL_BP_106_03695_20120401_190556_inLine +BABEL_BP_106_04577_20120409_220039_inLine +BABEL_BP_106_04577_20120409_220039_outLine +BABEL_BP_106_05510_20120505_014918_inLine +BABEL_BP_106_07199_20120407_224853_inLine +BABEL_BP_106_07199_20120407_224853_outLine +BABEL_BP_106_07924_20120414_191906_inLine +BABEL_BP_106_09087_20120304_155326_outLine +BABEL_BP_106_09087_20120304_161115_outLine +BABEL_BP_106_10160_20120322_024644_inLine +BABEL_BP_106_10160_20120322_024644_outLine +BABEL_BP_106_10271_20120307_153101_inLine +BABEL_BP_106_10271_20120307_153101_outLine +BABEL_BP_106_10470_20120229_011606_inLine +BABEL_BP_106_10470_20120229_011606_outLine +BABEL_BP_106_10545_20120315_185249_inLine +BABEL_BP_106_10545_20120315_185249_outLine +BABEL_BP_106_10643_20120407_222930_inLine +BABEL_BP_106_10643_20120407_222930_outLine +BABEL_BP_106_10732_20120604_111534_inLine +BABEL_BP_106_10732_20120604_113159_inLine +BABEL_BP_106_10985_20120313_013835_inLine +BABEL_BP_106_10985_20120313_013835_outLine +BABEL_BP_106_11004_20120603_171542_inLine +BABEL_BP_106_11152_20120421_140313_inLine +BABEL_BP_106_11158_20120314_183907_inLine +BABEL_BP_106_11158_20120314_183907_outLine +BABEL_BP_106_11158_20120314_193006_inLine +BABEL_BP_106_11158_20120314_193006_outLine +BABEL_BP_106_11197_20120327_225746_inLine +BABEL_BP_106_11197_20120327_225746_outLine +BABEL_BP_106_11197_20120327_231450_inLine +BABEL_BP_106_11197_20120327_231450_outLine +BABEL_BP_106_11208_20120409_211504_inLine +BABEL_BP_106_11233_20120407_231020_inLine +BABEL_BP_106_11233_20120407_231020_outLine +BABEL_BP_106_11366_20120323_024622_inLine +BABEL_BP_106_11366_20120323_024622_outLine +BABEL_BP_106_11366_20120323_025914_inLine +BABEL_BP_106_11366_20120323_025914_outLine +BABEL_BP_106_11479_20120202_183704_inLine +BABEL_BP_106_11603_20120331_150248_inLine +BABEL_BP_106_11603_20120331_150248_outLine +BABEL_BP_106_11603_20120331_151525_inLine +BABEL_BP_106_11603_20120331_151525_outLine +BABEL_BP_106_11627_20120210_040828_inLine +BABEL_BP_106_11650_20120315_191912_outLine +BABEL_BP_106_11650_20120315_215538_outLine +BABEL_BP_106_11982_20120219_202255_inLine +BABEL_BP_106_11982_20120219_202255_outLine +BABEL_BP_106_12003_20120205_192229_inLine +BABEL_BP_106_12003_20120205_192229_outLine +BABEL_BP_106_12120_20120318_023316_inLine +BABEL_BP_106_12120_20120318_024105_inLine +BABEL_BP_106_12120_20120318_024557_inLine +BABEL_BP_106_12120_20120318_025233_inLine +BABEL_BP_106_12248_20120304_225237_inLine +BABEL_BP_106_12486_20120302_130425_inLine +BABEL_BP_106_12486_20120302_130425_outLine +BABEL_BP_106_12535_20120228_130707_inLine +BABEL_BP_106_12535_20120228_131530_inLine +BABEL_BP_106_12535_20120228_135537_inLine +BABEL_BP_106_12643_20120315_235155_inLine +BABEL_BP_106_12643_20120315_235155_outLine +BABEL_BP_106_12667_20120308_204253_inLine +BABEL_BP_106_12667_20120308_204253_outLine +BABEL_BP_106_12807_20120312_175004_inLine +BABEL_BP_106_12807_20120312_175004_outLine +BABEL_BP_106_12963_20120309_184450_inLine +BABEL_BP_106_12963_20120309_184450_outLine +BABEL_BP_106_12979_20120308_200109_inLine +BABEL_BP_106_13065_20120422_032208_inLine +BABEL_BP_106_13065_20120422_035054_inLine +BABEL_BP_106_13071_20120315_000734_inLine +BABEL_BP_106_13071_20120315_000734_outLine +BABEL_BP_106_13071_20120315_001539_inLine +BABEL_BP_106_13071_20120315_001539_outLine +BABEL_BP_106_13341_20120601_211500_inLine +BABEL_BP_106_13441_20120226_235451_inLine +BABEL_BP_106_13441_20120226_235451_outLine +BABEL_BP_106_13476_20120307_215216_inLine +BABEL_BP_106_13476_20120307_215216_outLine +BABEL_BP_106_13530_20120404_122619_inLine +BABEL_BP_106_13530_20120404_123636_inLine +BABEL_BP_106_13709_20120501_184324_inLine +BABEL_BP_106_13795_20120213_233957_inLine +BABEL_BP_106_13795_20120213_233957_outLine +BABEL_BP_106_14059_20120323_040739_inLine +BABEL_BP_106_14059_20120323_040739_outLine +BABEL_BP_106_14524_20120416_134207_inLine +BABEL_BP_106_14524_20120419_235605_inLine +BABEL_BP_106_14591_20120511_002610_inLine +BABEL_BP_106_14770_20120323_025454_inLine +BABEL_BP_106_14770_20120323_025454_outLine +BABEL_BP_106_14836_20120221_185410_inLine +BABEL_BP_106_14836_20120221_185410_outLine +BABEL_BP_106_14840_20120419_212050_inLine +BABEL_BP_106_14936_20120201_174445_inLine +BABEL_BP_106_14936_20120201_174445_outLine +BABEL_BP_106_15234_20120229_012024_inLine +BABEL_BP_106_15234_20120229_012024_outLine +BABEL_BP_106_15353_20120229_125558_inLine +BABEL_BP_106_15353_20120229_125558_outLine +BABEL_BP_106_15859_20120229_175309_inLine +BABEL_BP_106_15859_20120229_175309_outLine +BABEL_BP_106_15940_20120229_001305_inLine +BABEL_BP_106_15940_20120229_001305_outLine +BABEL_BP_106_15966_20120414_160956_inLine +BABEL_BP_106_16117_20120315_004358_inLine +BABEL_BP_106_16117_20120315_004358_outLine +BABEL_BP_106_16185_20120314_174822_outLine +BABEL_BP_106_16307_20120408_002125_inLine +BABEL_BP_106_16307_20120408_002125_outLine +BABEL_BP_106_16385_20120212_202256_inLine +BABEL_BP_106_16385_20120212_202256_outLine +BABEL_BP_106_16406_20120309_161540_inLine +BABEL_BP_106_16783_20120601_214201_inLine +BABEL_BP_106_16984_20120226_022713_inLine +BABEL_BP_106_17013_20120227_184346_inLine +BABEL_BP_106_17013_20120227_184346_outLine +BABEL_BP_106_17093_20120217_180258_outLine +BABEL_BP_106_17203_20120129_171949_inLine +BABEL_BP_106_17353_20120314_160721_inLine +BABEL_BP_106_17353_20120314_163054_inLine +BABEL_BP_106_17452_20120408_203139_inLine +BABEL_BP_106_17452_20120408_204534_inLine +BABEL_BP_106_17452_20120408_205342_inLine +BABEL_BP_106_17511_20120301_194447_inLine +BABEL_BP_106_17511_20120301_194447_outLine +BABEL_BP_106_17606_20120225_235727_inLine +BABEL_BP_106_17606_20120225_235727_outLine +BABEL_BP_106_17850_20120224_223940_inLine +BABEL_BP_106_17850_20120224_223940_outLine +BABEL_BP_106_18209_20120304_004340_inLine +BABEL_BP_106_18209_20120304_004340_outLine +BABEL_BP_106_18701_20120302_161857_inLine +BABEL_BP_106_18701_20120302_161857_outLine +BABEL_BP_106_18802_20120318_014432_inLine +BABEL_BP_106_18802_20120318_014432_outLine +BABEL_BP_106_18903_20120317_121505_inLine +BABEL_BP_106_19063_20120415_183305_inLine +BABEL_BP_106_19248_20120307_192705_inLine +BABEL_BP_106_19248_20120307_192705_outLine +BABEL_BP_106_19290_20120605_180800_inLine +BABEL_BP_106_19479_20120501_154630_inLine +BABEL_BP_106_19479_20120501_155913_inLine +BABEL_BP_106_19479_20120501_165350_inLine +BABEL_BP_106_19619_20120219_023026_inLine +BABEL_BP_106_19619_20120219_023026_outLine +BABEL_BP_106_19656_20120227_201656_inLine +BABEL_BP_106_19656_20120227_201656_outLine +BABEL_BP_106_19861_20120308_181811_inLine +BABEL_BP_106_19861_20120308_181811_outLine +BABEL_BP_106_19867_20120428_022912_inLine +BABEL_BP_106_19915_20120129_043730_inLine +BABEL_BP_106_19915_20120129_043730_outLine +BABEL_BP_106_20320_20120206_212251_inLine +BABEL_BP_106_20320_20120206_212251_outLine +BABEL_BP_106_20591_20120225_172142_inLine +BABEL_BP_106_20680_20120314_195655_inLine +BABEL_BP_106_20680_20120314_233935_inLine +BABEL_BP_106_20680_20120314_233935_outLine +BABEL_BP_106_20740_20120229_234935_inLine +BABEL_BP_106_20741_20120604_131021_inLine +BABEL_BP_106_20775_20120309_184437_inLine +BABEL_BP_106_20775_20120309_184437_outLine +BABEL_BP_106_20985_20120314_184025_inLine +BABEL_BP_106_20985_20120314_184025_outLine +BABEL_BP_106_21050_20120317_181509_outLine +BABEL_BP_106_21258_20120205_012953_outLine +BABEL_BP_106_21259_20120331_174446_inLine +BABEL_BP_106_21259_20120331_174446_outLine +BABEL_BP_106_21259_20120331_184534_inLine +BABEL_BP_106_21259_20120331_184534_outLine +BABEL_BP_106_21259_20120331_225507_inLine +BABEL_BP_106_21259_20120331_225507_outLine +BABEL_BP_106_21306_20120417_233743_inLine +BABEL_BP_106_21367_20120317_185340_inLine +BABEL_BP_106_21367_20120317_185340_outLine +BABEL_BP_106_21430_20120207_184620_inLine +BABEL_BP_106_21430_20120207_184620_outLine +BABEL_BP_106_21518_20120225_224701_inLine +BABEL_BP_106_21518_20120225_224701_outLine +BABEL_BP_106_21556_20120313_021608_inLine +BABEL_BP_106_21556_20120313_021608_outLine +BABEL_BP_106_21714_20120318_174632_inLine +BABEL_BP_106_21714_20120318_174632_outLine +BABEL_BP_106_21845_20120310_002143_inLine +BABEL_BP_106_21845_20120310_002143_outLine +BABEL_BP_106_22034_20120317_021754_inLine +BABEL_BP_106_22034_20120317_021754_outLine +BABEL_BP_106_22272_20120318_201647_inLine +BABEL_BP_106_22272_20120318_201647_outLine +BABEL_BP_106_22408_20120213_221623_inLine +BABEL_BP_106_22408_20120213_221623_outLine +BABEL_BP_106_22696_20120308_195105_inLine +BABEL_BP_106_22696_20120308_195105_outLine +BABEL_BP_106_22903_20120224_164344_inLine +BABEL_BP_106_22903_20120224_164344_outLine +BABEL_BP_106_22910_20120129_213616_inLine +BABEL_BP_106_22910_20120129_213616_outLine +BABEL_BP_106_22973_20120311_224022_inLine +BABEL_BP_106_22973_20120311_224022_outLine +BABEL_BP_106_23167_20120128_183627_inLine +BABEL_BP_106_23167_20120128_183627_outLine +BABEL_BP_106_23571_20120229_180344_inLine +BABEL_BP_106_23571_20120229_180344_outLine +BABEL_BP_106_23629_20120304_212835_inLine +BABEL_BP_106_23629_20120304_212835_outLine +BABEL_BP_106_23878_20120209_170350_inLine +BABEL_BP_106_23878_20120209_170350_outLine +BABEL_BP_106_23995_20120225_011657_inLine +BABEL_BP_106_23995_20120225_011657_outLine +BABEL_BP_106_24084_20120318_015502_inLine +BABEL_BP_106_24124_20120415_182317_inLine +BABEL_BP_106_24335_20120408_005503_inLine +BABEL_BP_106_24335_20120408_012607_inLine +BABEL_BP_106_24335_20120408_012607_outLine +BABEL_BP_106_24441_20120417_211954_inLine +BABEL_BP_106_24569_20120307_232752_inLine +BABEL_BP_106_24569_20120307_232752_outLine +BABEL_BP_106_24580_20120604_165125_inLine +BABEL_BP_106_24638_20120419_013630_inLine +BABEL_BP_106_24661_20120322_221220_inLine +BABEL_BP_106_24661_20120322_221220_outLine +BABEL_BP_106_24817_20120301_031015_inLine +BABEL_BP_106_24817_20120301_031015_outLine +BABEL_BP_106_25279_20120401_195557_inLine +BABEL_BP_106_25479_20120315_154117_inLine +BABEL_BP_106_25479_20120315_154117_outLine +BABEL_BP_106_25479_20120315_160418_inLine +BABEL_BP_106_25479_20120315_160418_outLine +BABEL_BP_106_25502_20120129_015831_inLine +BABEL_BP_106_25502_20120129_015831_outLine +BABEL_BP_106_25735_20120314_233234_inLine +BABEL_BP_106_25735_20120314_233234_outLine +BABEL_BP_106_25751_20120227_221828_inLine +BABEL_BP_106_25751_20120227_221828_outLine +BABEL_BP_106_25866_20120304_181012_inLine +BABEL_BP_106_25866_20120304_181012_outLine +BABEL_BP_106_25871_20120228_005211_inLine +BABEL_BP_106_25871_20120228_005957_inLine +BABEL_BP_106_25871_20120228_012444_inLine +BABEL_BP_106_25904_20120213_182237_inLine +BABEL_BP_106_25904_20120213_182237_outLine +BABEL_BP_106_26164_20120401_201225_inLine +BABEL_BP_106_26164_20120401_201225_outLine +BABEL_BP_106_26164_20120401_202221_inLine +BABEL_BP_106_26164_20120401_202221_outLine +BABEL_BP_106_26348_20120314_173141_outLine +BABEL_BP_106_26598_20120415_181527_inLine +BABEL_BP_106_26644_20120411_154709_inLine +BABEL_BP_106_26684_20120211_170412_inLine +BABEL_BP_106_26684_20120211_170412_outLine +BABEL_BP_106_26786_20120306_151101_inLine +BABEL_BP_106_26786_20120306_151101_outLine +BABEL_BP_106_26901_20120212_192301_inLine +BABEL_BP_106_26901_20120212_192301_outLine +BABEL_BP_106_26901_20120212_193813_inLine +BABEL_BP_106_26901_20120212_193813_outLine +BABEL_BP_106_27363_20120315_165356_inLine +BABEL_BP_106_27890_20120302_171119_inLine +BABEL_BP_106_27890_20120302_171119_outLine +BABEL_BP_106_27916_20120403_232720_inLine +BABEL_BP_106_27916_20120403_232720_outLine +BABEL_BP_106_27916_20120403_233612_inLine +BABEL_BP_106_27916_20120403_233612_outLine +BABEL_BP_106_28683_20120331_165731_inLine +BABEL_BP_106_28754_20120205_171932_inLine +BABEL_BP_106_28754_20120205_171932_outLine +BABEL_BP_106_28754_20120205_174934_inLine +BABEL_BP_106_28754_20120205_174934_outLine +BABEL_BP_106_29087_20120315_125218_outLine +BABEL_BP_106_29087_20120315_130643_outLine +BABEL_BP_106_29097_20120127_001938_inLine +BABEL_BP_106_29097_20120127_001938_outLine +BABEL_BP_106_29133_20120129_171742_outLine +BABEL_BP_106_29290_20120212_151530_inLine +BABEL_BP_106_29290_20120212_151530_outLine +BABEL_BP_106_29328_20120212_210507_outLine +BABEL_BP_106_29407_20120403_225249_inLine +BABEL_BP_106_29421_20120213_182542_inLine +BABEL_BP_106_29421_20120213_182542_outLine +BABEL_BP_106_29512_20120226_190947_inLine +BABEL_BP_106_29512_20120226_190947_outLine +BABEL_BP_106_29545_20120331_153345_outLine +BABEL_BP_106_29589_20120225_144930_inLine +BABEL_BP_106_29589_20120225_144930_outLine +BABEL_BP_106_29988_20120301_225306_inLine +BABEL_BP_106_29988_20120301_234957_inLine +BABEL_BP_106_30418_20120401_162421_inLine +BABEL_BP_106_30418_20120401_162421_outLine +BABEL_BP_106_30583_20120129_163331_inLine +BABEL_BP_106_30583_20120129_163331_outLine +BABEL_BP_106_30642_20120302_150419_inLine +BABEL_BP_106_30642_20120302_150419_outLine +BABEL_BP_106_30818_20120503_004014_inLine +BABEL_BP_106_31031_20120215_010958_inLine +BABEL_BP_106_31031_20120215_010958_outLine +BABEL_BP_106_31256_20120317_140651_inLine +BABEL_BP_106_31256_20120317_140651_outLine +BABEL_BP_106_31265_20120311_235253_inLine +BABEL_BP_106_31265_20120311_235253_outLine +BABEL_BP_106_31328_20120212_180708_inLine +BABEL_BP_106_31328_20120212_180708_outLine +BABEL_BP_106_31606_20120403_225528_inLine +BABEL_BP_106_31783_20120331_154149_inLine +BABEL_BP_106_31783_20120331_163639_inLine +BABEL_BP_106_31975_20120309_181134_inLine +BABEL_BP_106_31975_20120309_181134_outLine +BABEL_BP_106_32263_20120225_201234_inLine +BABEL_BP_106_32263_20120225_203654_inLine +BABEL_BP_106_32334_20120304_193216_outLine +BABEL_BP_106_32400_20120307_235432_inLine +BABEL_BP_106_32400_20120307_235432_outLine +BABEL_BP_106_32562_20120307_193633_inLine +BABEL_BP_106_32562_20120307_193633_outLine +BABEL_BP_106_32710_20120418_235030_inLine +BABEL_BP_106_32887_20120327_221120_inLine +BABEL_BP_106_32887_20120327_222408_inLine +BABEL_BP_106_32890_20120221_193416_inLine +BABEL_BP_106_32890_20120221_193417_outLine +BABEL_BP_106_33023_20120203_000619_inLine +BABEL_BP_106_33023_20120203_000619_outLine +BABEL_BP_106_33192_20120516_170543_inLine +BABEL_BP_106_33192_20120516_172023_inLine +BABEL_BP_106_33540_20120221_204916_outLine +BABEL_BP_106_33540_20120221_210930_outLine +BABEL_BP_106_33671_20120206_215709_outLine +BABEL_BP_106_33707_20120403_172641_inLine +BABEL_BP_106_33742_20120229_020923_inLine +BABEL_BP_106_33742_20120229_020923_outLine +BABEL_BP_106_33817_20120301_165159_inLine +BABEL_BP_106_33817_20120301_165159_outLine +BABEL_BP_106_33969_20120310_001559_inLine +BABEL_BP_106_33969_20120310_001559_outLine +BABEL_BP_106_34328_20120225_012732_inLine +BABEL_BP_106_34328_20120225_012732_outLine +BABEL_BP_106_34439_20120301_190320_inLine +BABEL_BP_106_34439_20120301_190320_outLine +BABEL_BP_106_34480_20120405_141959_inLine +BABEL_BP_106_34498_20120314_171141_inLine +BABEL_BP_106_34498_20120314_171141_outLine +BABEL_BP_106_34498_20120314_172341_inLine +BABEL_BP_106_34498_20120314_172341_outLine +BABEL_BP_106_34857_20120301_183238_inLine +BABEL_BP_106_34857_20120301_183238_outLine +BABEL_BP_106_34859_20120328_231638_inLine +BABEL_BP_106_34859_20120328_231638_outLine +BABEL_BP_106_34859_20120328_233134_inLine +BABEL_BP_106_34859_20120328_233134_outLine +BABEL_BP_106_34894_20120328_014528_inLine +BABEL_BP_106_34894_20120328_014528_outLine +BABEL_BP_106_34961_20120130_011357_inLine +BABEL_BP_106_34961_20120130_011357_outLine +BABEL_BP_106_35016_20120405_195810_outLine +BABEL_BP_106_35153_20120502_162803_inLine +BABEL_BP_106_35153_20120502_170536_inLine +BABEL_BP_106_35179_20120225_063734_inLine +BABEL_BP_106_35179_20120225_063734_outLine +BABEL_BP_106_35188_20120315_154007_inLine +BABEL_BP_106_35188_20120315_154007_outLine +BABEL_BP_106_35305_20120308_195828_inLine +BABEL_BP_106_35305_20120308_195828_outLine +BABEL_BP_106_35318_20120130_203231_inLine +BABEL_BP_106_35318_20120130_203231_outLine +BABEL_BP_106_35329_20120302_140638_inLine +BABEL_BP_106_35329_20120302_140638_outLine +BABEL_BP_106_35441_20120414_194638_inLine +BABEL_BP_106_35470_20120307_190826_inLine +BABEL_BP_106_35470_20120307_190826_outLine +BABEL_BP_106_35576_20120224_211651_inLine +BABEL_BP_106_35576_20120224_211651_outLine +BABEL_BP_106_35612_20120303_000710_inLine +BABEL_BP_106_35612_20120303_000710_outLine +BABEL_BP_106_35706_20120501_011424_inLine +BABEL_BP_106_35951_20120419_001936_inLine +BABEL_BP_106_35972_20120411_154338_inLine +BABEL_BP_106_35972_20120411_155457_inLine +BABEL_BP_106_36143_20120128_230220_outLine +BABEL_BP_106_36268_20120209_180615_inLine +BABEL_BP_106_36268_20120209_180615_outLine +BABEL_BP_106_36276_20120317_130620_inLine +BABEL_BP_106_36276_20120317_134742_inLine +BABEL_BP_106_36276_20120317_134742_outLine +BABEL_BP_106_36383_20120225_021045_inLine +BABEL_BP_106_36383_20120225_021045_outLine +BABEL_BP_106_36391_20120205_201108_inLine +BABEL_BP_106_36391_20120205_201108_outLine +BABEL_BP_106_36868_20120417_204120_inLine +BABEL_BP_106_36868_20120417_210037_inLine +BABEL_BP_106_37064_20120324_130301_inLine +BABEL_BP_106_37064_20120324_130301_outLine +BABEL_BP_106_37258_20120304_002638_inLine +BABEL_BP_106_37258_20120304_002638_outLine +BABEL_BP_106_37260_20120130_191541_inLine +BABEL_BP_106_37260_20120130_191541_outLine +BABEL_BP_106_37766_20120229_163334_inLine +BABEL_BP_106_37766_20120229_163334_outLine +BABEL_BP_106_38175_20120209_214322_inLine +BABEL_BP_106_38175_20120209_214322_outLine +BABEL_BP_106_38248_20120404_214148_inLine +BABEL_BP_106_38248_20120404_222004_inLine +BABEL_BP_106_38248_20120404_222004_outLine +BABEL_BP_106_38248_20120404_223317_inLine +BABEL_BP_106_38248_20120404_223317_outLine +BABEL_BP_106_38396_20120323_023143_inLine +BABEL_BP_106_38396_20120323_023143_outLine +BABEL_BP_106_38464_20120318_215505_inLine +BABEL_BP_106_38464_20120318_215505_outLine +BABEL_BP_106_38464_20120318_220931_inLine +BABEL_BP_106_38464_20120318_220931_outLine +BABEL_BP_106_38635_20120605_171532_inLine +BABEL_BP_106_38640_20120130_174518_inLine +BABEL_BP_106_38640_20120130_174518_outLine +BABEL_BP_106_38656_20120321_230900_inLine +BABEL_BP_106_38656_20120321_232832_inLine +BABEL_BP_106_38879_20120203_203542_inLine +BABEL_BP_106_38912_20120307_023807_outLine +BABEL_BP_106_38956_20120127_010500_inLine +BABEL_BP_106_38956_20120127_010500_outLine +BABEL_BP_106_39080_20120225_180230_inLine +BABEL_BP_106_39080_20120225_180230_outLine +BABEL_BP_106_39114_20120315_131924_inLine +BABEL_BP_106_39114_20120315_135035_inLine +BABEL_BP_106_39114_20120315_142026_inLine +BABEL_BP_106_39179_20120331_134039_outLine +BABEL_BP_106_39179_20120331_134617_outLine +BABEL_BP_106_39264_20120228_015102_inLine +BABEL_BP_106_39264_20120228_015102_outLine +BABEL_BP_106_39264_20120228_022421_inLine +BABEL_BP_106_39264_20120228_022421_outLine +BABEL_BP_106_39563_20120414_162942_inLine +BABEL_BP_106_39756_20120312_165815_inLine +BABEL_BP_106_40002_20120301_225806_inLine +BABEL_BP_106_40197_20120308_211406_inLine +BABEL_BP_106_40211_20120329_005438_inLine +BABEL_BP_106_40211_20120329_005439_outLine +BABEL_BP_106_40288_20120516_161057_inLine +BABEL_BP_106_40439_20120405_122042_inLine +BABEL_BP_106_40510_20120221_155613_inLine +BABEL_BP_106_40510_20120221_155613_outLine +BABEL_BP_106_40680_20120511_153305_inLine +BABEL_BP_106_40882_20120418_205714_inLine +BABEL_BP_106_41327_20120128_163042_inLine +BABEL_BP_106_41327_20120128_163042_outLine +BABEL_BP_106_41541_20120315_003903_inLine +BABEL_BP_106_41541_20120315_003903_outLine +BABEL_BP_106_41557_20120324_040736_inLine +BABEL_BP_106_41557_20120324_040736_outLine +BABEL_BP_106_41557_20120324_043210_inLine +BABEL_BP_106_41557_20120324_043210_outLine +BABEL_BP_106_41710_20120410_205005_outLine +BABEL_BP_106_41733_20120307_171130_inLine +BABEL_BP_106_41816_20120415_184339_inLine +BABEL_BP_106_41949_20120213_174300_inLine +BABEL_BP_106_41949_20120213_174300_outLine +BABEL_BP_106_42183_20120323_223118_inLine +BABEL_BP_106_42183_20120327_190153_inLine +BABEL_BP_106_42651_20120131_020401_inLine +BABEL_BP_106_42651_20120131_020401_outLine +BABEL_BP_106_42768_20120411_173257_inLine +BABEL_BP_106_42820_20120415_180402_inLine +BABEL_BP_106_42910_20120128_213020_inLine +BABEL_BP_106_42910_20120128_213020_outLine +BABEL_BP_106_43069_20120409_204043_inLine +BABEL_BP_106_43069_20120409_204043_outLine +BABEL_BP_106_43306_20120210_032400_inLine +BABEL_BP_106_43306_20120210_032400_outLine +BABEL_BP_106_43425_20120317_174519_inLine +BABEL_BP_106_43425_20120317_175422_inLine +BABEL_BP_106_43425_20120317_183658_inLine +BABEL_BP_106_43652_20120208_010946_inLine +BABEL_BP_106_43652_20120208_010946_outLine +BABEL_BP_106_43939_20120317_194330_inLine +BABEL_BP_106_43939_20120317_194330_outLine +BABEL_BP_106_44038_20120317_204039_inLine +BABEL_BP_106_44038_20120317_205302_inLine +BABEL_BP_106_44052_20120327_234511_inLine +BABEL_BP_106_44052_20120327_234511_outLine +BABEL_BP_106_44052_20120330_222904_inLine +BABEL_BP_106_44052_20120330_222904_outLine +BABEL_BP_106_44369_20120318_231951_inLine +BABEL_BP_106_44369_20120319_020556_inLine +BABEL_BP_106_44756_20120301_235107_inLine +BABEL_BP_106_44756_20120301_235107_outLine +BABEL_BP_106_45106_20120118_001529_inLine +BABEL_BP_106_45106_20120118_001529_outLine +BABEL_BP_106_45145_20120219_143857_inLine +BABEL_BP_106_45361_20120228_002747_inLine +BABEL_BP_106_45361_20120228_002747_outLine +BABEL_BP_106_45453_20120404_225631_inLine +BABEL_BP_106_45453_20120404_225631_outLine +BABEL_BP_106_45511_20120129_010308_inLine +BABEL_BP_106_45511_20120129_010308_outLine +BABEL_BP_106_45642_20120203_042123_inLine +BABEL_BP_106_45642_20120203_042123_outLine +BABEL_BP_106_45677_20120315_012905_inLine +BABEL_BP_106_45677_20120315_013919_inLine +BABEL_BP_106_45681_20120306_210519_inLine +BABEL_BP_106_45702_20120226_175928_inLine +BABEL_BP_106_45702_20120226_175928_outLine +BABEL_BP_106_45793_20120127_170707_inLine +BABEL_BP_106_45793_20120127_170707_outLine +BABEL_BP_106_46427_20120303_200620_outLine +BABEL_BP_106_46435_20120317_184057_outLine +BABEL_BP_106_46603_20120227_192836_inLine +BABEL_BP_106_46603_20120227_192836_outLine +BABEL_BP_106_46744_20120324_002344_inLine +BABEL_BP_106_46744_20120324_002344_outLine +BABEL_BP_106_46813_20120416_015932_inLine +BABEL_BP_106_47263_20120305_023242_inLine +BABEL_BP_106_47429_20120329_195737_inLine +BABEL_BP_106_47429_20120329_195737_outLine +BABEL_BP_106_47469_20120210_221258_inLine +BABEL_BP_106_47469_20120210_221258_outLine +BABEL_BP_106_47661_20120131_002939_outLine +BABEL_BP_106_47794_20120403_181127_inLine +BABEL_BP_106_47794_20120403_182418_inLine +BABEL_BP_106_47821_20120228_011928_inLine +BABEL_BP_106_47823_20120302_214046_outLine +BABEL_BP_106_47906_20120418_223527_inLine +BABEL_BP_106_47906_20120418_225920_inLine +BABEL_BP_106_48059_20120317_161513_inLine +BABEL_BP_106_48059_20120317_161513_outLine +BABEL_BP_106_48061_20120303_234335_inLine +BABEL_BP_106_48181_20120211_011159_inLine +BABEL_BP_106_48181_20120211_011159_outLine +BABEL_BP_106_48188_20120307_034039_outLine +BABEL_BP_106_48317_20120301_002256_inLine +BABEL_BP_106_48317_20120301_002256_outLine +BABEL_BP_106_48418_20120407_165729_inLine +BABEL_BP_106_48536_20120129_053527_inLine +BABEL_BP_106_48683_20120505_022553_inLine +BABEL_BP_106_49239_20120317_123831_inLine +BABEL_BP_106_49309_20120330_230450_inLine +BABEL_BP_106_49346_20120405_185601_inLine +BABEL_BP_106_49381_20120414_193653_inLine +BABEL_BP_106_49582_20120213_230049_inLine +BABEL_BP_106_49582_20120213_230049_outLine +BABEL_BP_106_49624_20120224_194049_inLine +BABEL_BP_106_49624_20120224_194049_outLine +BABEL_BP_106_49689_20120225_153748_outLine +BABEL_BP_106_49714_20120227_191755_inLine +BABEL_BP_106_49714_20120227_191755_outLine +BABEL_BP_106_50141_20120309_225945_inLine +BABEL_BP_106_50141_20120309_225945_outLine +BABEL_BP_106_50298_20120227_005517_inLine +BABEL_BP_106_50298_20120227_005517_outLine +BABEL_BP_106_50387_20120229_175528_inLine +BABEL_BP_106_50387_20120229_175528_outLine +BABEL_BP_106_50409_20120319_185818_inLine +BABEL_BP_106_50409_20120319_185818_outLine +BABEL_BP_106_50410_20120229_183217_inLine +BABEL_BP_106_50410_20120229_183217_outLine +BABEL_BP_106_50468_20120417_231448_inLine +BABEL_BP_106_50476_20120304_171701_inLine +BABEL_BP_106_50476_20120304_171701_outLine +BABEL_BP_106_50555_20120605_134945_inLine +BABEL_BP_106_50589_20120128_192230_inLine +BABEL_BP_106_50589_20120128_192230_outLine +BABEL_BP_106_50641_20120317_180902_inLine +BABEL_BP_106_50641_20120317_180902_outLine +BABEL_BP_106_50752_20120310_001913_inLine +BABEL_BP_106_51042_20120313_230521_inLine +BABEL_BP_106_51073_20120128_200706_inLine +BABEL_BP_106_51073_20120128_200706_outLine +BABEL_BP_106_51149_20120329_174521_inLine +BABEL_BP_106_51149_20120329_174521_outLine +BABEL_BP_106_51448_20120413_214526_inLine +BABEL_BP_106_51448_20120413_220517_inLine +BABEL_BP_106_51727_20120229_000250_inLine +BABEL_BP_106_51727_20120229_000250_outLine +BABEL_BP_106_52033_20120228_001715_inLine +BABEL_BP_106_52033_20120228_001715_outLine +BABEL_BP_106_52154_20120312_004528_outLine +BABEL_BP_106_52325_20120211_220159_inLine +BABEL_BP_106_52366_20120124_164406_inLine +BABEL_BP_106_52366_20120124_164406_outLine +BABEL_BP_106_52642_20120222_175700_inLine +BABEL_BP_106_52642_20120222_175700_outLine +BABEL_BP_106_52902_20120605_184038_inLine +BABEL_BP_106_53179_20120301_181951_inLine +BABEL_BP_106_53179_20120301_181951_outLine +BABEL_BP_106_53315_20120329_182550_inLine +BABEL_BP_106_53315_20120329_182550_outLine +BABEL_BP_106_53376_20120323_000750_inLine +BABEL_BP_106_53376_20120323_000750_outLine +BABEL_BP_106_53463_20120605_180156_inLine +BABEL_BP_106_53653_20120405_182452_inLine +BABEL_BP_106_53653_20120405_183849_inLine +BABEL_BP_106_53824_20120227_025033_inLine +BABEL_BP_106_53824_20120227_025033_outLine +BABEL_BP_106_54358_20120229_223811_outLine +BABEL_BP_106_54621_20120227_235308_inLine +BABEL_BP_106_54621_20120227_235308_outLine +BABEL_BP_106_54785_20120303_011154_outLine +BABEL_BP_106_55182_20120422_185742_inLine +BABEL_BP_106_55204_20120330_230730_outLine +BABEL_BP_106_55288_20120503_010325_inLine +BABEL_BP_106_55355_20120405_180949_inLine +BABEL_BP_106_55450_20120302_125827_inLine +BABEL_BP_106_55450_20120302_125827_outLine +BABEL_BP_106_55823_20120329_210142_inLine +BABEL_BP_106_55823_20120329_210142_outLine +BABEL_BP_106_55838_20120318_160306_outLine +BABEL_BP_106_55922_20120322_021453_inLine +BABEL_BP_106_55922_20120322_021453_outLine +BABEL_BP_106_55922_20120322_022537_inLine +BABEL_BP_106_55922_20120322_022537_outLine +BABEL_BP_106_55944_20120306_172041_inLine +BABEL_BP_106_55944_20120306_172041_outLine +BABEL_BP_106_56117_20120313_001237_outLine +BABEL_BP_106_56342_20120605_162901_inLine +BABEL_BP_106_56634_20120328_235133_inLine +BABEL_BP_106_56634_20120328_235133_outLine +BABEL_BP_106_56868_20120203_012057_inLine +BABEL_BP_106_56868_20120203_012057_outLine +BABEL_BP_106_56943_20120126_224048_outLine +BABEL_BP_106_57020_20120305_121648_outLine +BABEL_BP_106_57039_20120314_003848_inLine +BABEL_BP_106_57039_20120314_005748_inLine +BABEL_BP_106_57609_20120304_174858_inLine +BABEL_BP_106_57609_20120304_174858_outLine +BABEL_BP_106_57609_20120304_180016_inLine +BABEL_BP_106_57609_20120304_180016_outLine +BABEL_BP_106_57638_20120414_164822_inLine +BABEL_BP_106_58108_20120411_180115_inLine +BABEL_BP_106_58192_20120308_182924_inLine +BABEL_BP_106_58192_20120308_182924_outLine +BABEL_BP_106_58232_20120226_031714_inLine +BABEL_BP_106_58232_20120226_031714_outLine +BABEL_BP_106_58447_20120329_013316_inLine +BABEL_BP_106_58447_20120329_013316_outLine +BABEL_BP_106_58536_20120210_221536_inLine +BABEL_BP_106_58536_20120210_221536_outLine +BABEL_BP_106_58572_20120401_203941_inLine +BABEL_BP_106_58572_20120401_203941_outLine +BABEL_BP_106_58587_20120411_003742_inLine +BABEL_BP_106_58587_20120411_003742_outLine +BABEL_BP_106_58746_20120308_211819_inLine +BABEL_BP_106_58746_20120308_211819_outLine +BABEL_BP_106_58956_20120602_130340_inLine +BABEL_BP_106_59071_20120228_033845_inLine +BABEL_BP_106_59071_20120228_033845_outLine +BABEL_BP_106_59175_20120221_181535_inLine +BABEL_BP_106_59175_20120221_181535_outLine +BABEL_BP_106_59383_20120317_170327_inLine +BABEL_BP_106_59383_20120317_175629_inLine +BABEL_BP_106_59544_20120209_182249_inLine +BABEL_BP_106_59544_20120209_182249_outLine +BABEL_BP_106_59565_20120321_231854_inLine +BABEL_BP_106_59565_20120321_233445_inLine +BABEL_BP_106_59565_20120321_234523_inLine +BABEL_BP_106_59628_20120309_181006_inLine +BABEL_BP_106_59628_20120309_181006_outLine +BABEL_BP_106_59746_20120225_061555_outLine +BABEL_BP_106_59764_20120222_204824_inLine +BABEL_BP_106_59764_20120222_204824_outLine +BABEL_BP_106_59846_20120318_164327_inLine +BABEL_BP_106_59878_20120505_021018_inLine +BABEL_BP_106_59925_20120403_160805_inLine +BABEL_BP_106_60106_20120422_155717_inLine +BABEL_BP_106_60238_20120307_161043_inLine +BABEL_BP_106_60238_20120307_165816_inLine +BABEL_BP_106_60238_20120307_165816_outLine +BABEL_BP_106_60250_20120203_013207_inLine +BABEL_BP_106_60250_20120203_013207_outLine +BABEL_BP_106_60598_20120324_022730_inLine +BABEL_BP_106_60598_20120324_022730_outLine +BABEL_BP_106_60677_20120418_162110_inLine +BABEL_BP_106_60677_20120418_162841_inLine +BABEL_BP_106_60693_20120331_185728_inLine +BABEL_BP_106_60693_20120414_201244_inLine +BABEL_BP_106_60753_20120120_175004_inLine +BABEL_BP_106_60753_20120120_175004_outLine +BABEL_BP_106_60826_20120605_170407_inLine +BABEL_BP_106_61073_20120428_141557_outLine +BABEL_BP_106_61446_20120603_125637_inLine +BABEL_BP_106_61489_20120417_203944_inLine +BABEL_BP_106_61566_20120323_224814_inLine +BABEL_BP_106_61566_20120324_111941_inLine +BABEL_BP_106_61772_20120502_164250_inLine +BABEL_BP_106_61906_20120418_175007_inLine +BABEL_BP_106_62163_20120313_231604_outLine +BABEL_BP_106_62163_20120313_232930_outLine +BABEL_BP_106_62255_20120314_190940_inLine +BABEL_BP_106_62255_20120314_190940_outLine +BABEL_BP_106_62452_20120420_001258_inLine +BABEL_BP_106_62452_20120420_002416_inLine +BABEL_BP_106_62610_20120511_023409_inLine +BABEL_BP_106_62610_20120511_024325_inLine +BABEL_BP_106_63114_20120226_074838_inLine +BABEL_BP_106_63114_20120226_074838_outLine +BABEL_BP_106_63305_20120324_030221_inLine +BABEL_BP_106_63305_20120324_030221_outLine +BABEL_BP_106_63368_20120604_114321_inLine +BABEL_BP_106_63392_20120405_141717_inLine +BABEL_BP_106_63468_20120409_200746_inLine +BABEL_BP_106_63468_20120409_200746_outLine +BABEL_BP_106_63711_20120129_023323_inLine +BABEL_BP_106_63711_20120129_023323_outLine +BABEL_BP_106_63741_20120328_001923_inLine +BABEL_BP_106_64172_20120331_141241_inLine +BABEL_BP_106_64172_20120331_141241_outLine +BABEL_BP_106_64172_20120331_152028_inLine +BABEL_BP_106_64172_20120331_152028_outLine +BABEL_BP_106_64226_20120404_231458_inLine +BABEL_BP_106_64334_20120528_174746_inLine +BABEL_BP_106_64351_20120217_181140_inLine +BABEL_BP_106_64351_20120217_181140_outLine +BABEL_BP_106_64889_20120307_175001_inLine +BABEL_BP_106_64889_20120307_175001_outLine +BABEL_BP_106_65248_20120321_230954_inLine +BABEL_BP_106_65248_20120321_230954_outLine +BABEL_BP_106_65371_20120308_201622_inLine +BABEL_BP_106_65371_20120308_201622_outLine +BABEL_BP_106_65579_20120317_123135_inLine +BABEL_BP_106_65601_20120211_212006_inLine +BABEL_BP_106_65631_20120216_021352_inLine +BABEL_BP_106_65631_20120216_021352_outLine +BABEL_BP_106_65656_20120309_195913_outLine +BABEL_BP_106_65989_20120605_163026_inLine +BABEL_BP_106_66101_20120227_174035_inLine +BABEL_BP_106_66101_20120227_174035_outLine +BABEL_BP_106_66188_20120316_230006_inLine +BABEL_BP_106_66188_20120316_230006_outLine +BABEL_BP_106_66247_20120331_214412_inLine +BABEL_BP_106_66247_20120331_214412_outLine +BABEL_BP_106_66416_20120225_122454_inLine +BABEL_BP_106_66416_20120225_122454_outLine +BABEL_BP_106_66559_20120505_033828_inLine +BABEL_BP_106_66709_20120302_222833_inLine +BABEL_BP_106_66872_20120302_010751_inLine +BABEL_BP_106_66872_20120302_012055_inLine +BABEL_BP_106_66964_20120416_132128_inLine +BABEL_BP_106_67304_20120222_212038_inLine +BABEL_BP_106_67304_20120222_212038_outLine +BABEL_BP_106_67411_20120210_155625_inLine +BABEL_BP_106_67411_20120210_155625_outLine +BABEL_BP_106_67630_20120324_031205_inLine +BABEL_BP_106_67630_20120324_031205_outLine +BABEL_BP_106_67630_20120324_033243_inLine +BABEL_BP_106_67630_20120324_033243_outLine +BABEL_BP_106_67733_20120219_180702_inLine +BABEL_BP_106_67772_20120130_225552_inLine +BABEL_BP_106_67772_20120130_225552_outLine +BABEL_BP_106_68111_20120419_232912_inLine +BABEL_BP_106_68276_20120308_201728_inLine +BABEL_BP_106_68276_20120308_203526_inLine +BABEL_BP_106_68287_20120408_172649_inLine +BABEL_BP_106_68392_20120331_224408_inLine +BABEL_BP_106_68392_20120331_224408_outLine +BABEL_BP_106_68490_20120227_152714_inLine +BABEL_BP_106_68610_20120505_011125_inLine +BABEL_BP_106_68665_20120409_202242_inLine +BABEL_BP_106_68671_20120407_164400_inLine +BABEL_BP_106_68803_20120121_171931_inLine +BABEL_BP_106_68803_20120121_171931_outLine +BABEL_BP_106_69145_20120319_175304_inLine +BABEL_BP_106_69145_20120319_175304_outLine +BABEL_BP_106_69236_20120216_195133_inLine +BABEL_BP_106_69275_20120318_200803_inLine +BABEL_BP_106_69275_20120318_204539_inLine +BABEL_BP_106_69275_20120318_204539_outLine +BABEL_BP_106_69446_20120416_010511_inLine +BABEL_BP_106_69621_20120130_005117_inLine +BABEL_BP_106_69621_20120130_005117_outLine +BABEL_BP_106_70077_20120315_191801_inLine +BABEL_BP_106_70285_20120128_211036_outLine +BABEL_BP_106_70511_20120224_173336_inLine +BABEL_BP_106_70511_20120224_173336_outLine +BABEL_BP_106_70983_20120516_163100_inLine +BABEL_BP_106_71160_20120224_221158_inLine +BABEL_BP_106_71176_20120225_190017_inLine +BABEL_BP_106_71176_20120225_190017_outLine +BABEL_BP_106_71310_20120221_005007_inLine +BABEL_BP_106_71310_20120221_005007_outLine +BABEL_BP_106_71313_20120419_205542_inLine +BABEL_BP_106_71741_20120127_162656_inLine +BABEL_BP_106_72119_20120315_153943_inLine +BABEL_BP_106_72142_20120327_171827_inLine +BABEL_BP_106_72142_20120327_173216_inLine +BABEL_BP_106_72142_20120327_173216_outLine +BABEL_BP_106_72801_20120325_202928_outLine +BABEL_BP_106_72801_20120325_222633_inLine +BABEL_BP_106_72801_20120325_222633_outLine +BABEL_BP_106_72858_20120605_123842_inLine +BABEL_BP_106_73059_20120309_164956_outLine +BABEL_BP_106_73250_20120410_001928_inLine +BABEL_BP_106_73250_20120410_003448_inLine +BABEL_BP_106_73438_20120314_002432_outLine +BABEL_BP_106_73440_20120131_160945_inLine +BABEL_BP_106_73440_20120131_160945_outLine +BABEL_BP_106_73752_20120228_142547_inLine +BABEL_BP_106_73752_20120228_142547_outLine +BABEL_BP_106_73780_20120304_170119_outLine +BABEL_BP_106_73786_20120202_230843_inLine +BABEL_BP_106_73786_20120202_230843_outLine +BABEL_BP_106_73911_20120219_194519_inLine +BABEL_BP_106_73911_20120219_194519_outLine +BABEL_BP_106_74214_20120503_012037_inLine +BABEL_BP_106_74295_20120317_213141_inLine +BABEL_BP_106_74295_20120317_214659_inLine +BABEL_BP_106_74368_20120317_141935_inLine +BABEL_BP_106_74395_20120414_202413_inLine +BABEL_BP_106_74508_20120209_223405_inLine +BABEL_BP_106_74508_20120209_223405_outLine +BABEL_BP_106_74533_20120502_222417_inLine +BABEL_BP_106_74986_20120418_222615_inLine +BABEL_BP_106_75036_20120224_163823_inLine +BABEL_BP_106_75036_20120224_163823_outLine +BABEL_BP_106_75125_20120325_173456_inLine +BABEL_BP_106_75151_20120405_172457_inLine +BABEL_BP_106_75243_20120314_181814_inLine +BABEL_BP_106_75243_20120314_193719_inLine +BABEL_BP_106_75243_20120314_194814_inLine +BABEL_BP_106_75740_20120128_205720_inLine +BABEL_BP_106_75740_20120128_205720_outLine +BABEL_BP_106_75919_20120419_222309_inLine +BABEL_BP_106_75932_20120301_185217_inLine +BABEL_BP_106_75932_20120301_185217_outLine +BABEL_BP_106_76252_20120318_131223_outLine +BABEL_BP_106_76320_20120317_171522_inLine +BABEL_BP_106_76451_20120128_174820_inLine +BABEL_BP_106_76451_20120128_174820_outLine +BABEL_BP_106_76733_20120317_205542_inLine +BABEL_BP_106_76733_20120317_205542_outLine +BABEL_BP_106_76748_20120313_033301_inLine +BABEL_BP_106_76748_20120313_033301_outLine +BABEL_BP_106_76919_20120301_013753_outLine +BABEL_BP_106_76989_20120212_225118_inLine +BABEL_BP_106_76993_20120227_180157_inLine +BABEL_BP_106_76993_20120227_180157_outLine +BABEL_BP_106_77104_20120320_000526_inLine +BABEL_BP_106_77244_20120317_154534_inLine +BABEL_BP_106_77244_20120317_160037_inLine +BABEL_BP_106_77315_20120227_153127_outLine +BABEL_BP_106_77342_20120224_193702_inLine +BABEL_BP_106_77342_20120224_193702_outLine +BABEL_BP_106_77342_20120224_201725_inLine +BABEL_BP_106_77342_20120224_201725_outLine +BABEL_BP_106_77487_20120310_023017_outLine +BABEL_BP_106_77584_20120228_185654_inLine +BABEL_BP_106_77584_20120228_185654_outLine +BABEL_BP_106_78094_20120127_144526_inLine +BABEL_BP_106_78406_20120331_145033_inLine +BABEL_BP_106_78406_20120331_145857_inLine +BABEL_BP_106_78516_20120324_012547_inLine +BABEL_BP_106_78516_20120324_012547_outLine +BABEL_BP_106_78617_20120325_220217_inLine +BABEL_BP_106_78617_20120325_220217_outLine +BABEL_BP_106_78617_20120327_225421_inLine +BABEL_BP_106_78617_20120327_225421_outLine +BABEL_BP_106_78753_20120511_020629_inLine +BABEL_BP_106_79284_20120317_154901_inLine +BABEL_BP_106_79284_20120317_190801_inLine +BABEL_BP_106_79519_20120407_175434_inLine +BABEL_BP_106_79526_20120418_000428_inLine +BABEL_BP_106_79593_20120528_180841_inLine +BABEL_BP_106_79650_20120410_220151_outLine +BABEL_BP_106_79650_20120410_221127_outLine +BABEL_BP_106_79970_20120420_164617_inLine +BABEL_BP_106_80068_20120414_213628_inLine +BABEL_BP_106_80075_20120605_113236_inLine +BABEL_BP_106_80150_20120229_200345_inLine +BABEL_BP_106_80150_20120229_200345_outLine +BABEL_BP_106_80174_20120422_023124_inLine +BABEL_BP_106_80290_20120311_231738_inLine +BABEL_BP_106_80290_20120311_231738_outLine +BABEL_BP_106_80290_20120311_234143_inLine +BABEL_BP_106_80290_20120311_234143_outLine +BABEL_BP_106_80535_20120319_003708_inLine +BABEL_BP_106_80598_20120416_002228_inLine +BABEL_BP_106_80638_20120411_224029_inLine +BABEL_BP_106_80701_20120315_153813_inLine +BABEL_BP_106_81065_20120603_120830_inLine +BABEL_BP_106_81096_20120604_122742_inLine +BABEL_BP_106_81119_20120417_201549_inLine +BABEL_BP_106_81601_20120205_223405_inLine +BABEL_BP_106_81601_20120205_223405_outLine +BABEL_BP_106_81642_20120218_232158_inLine +BABEL_BP_106_81642_20120218_232158_outLine +BABEL_BP_106_81647_20120304_131330_inLine +BABEL_BP_106_81647_20120304_131330_outLine +BABEL_BP_106_81769_20120330_213453_outLine +BABEL_BP_106_81799_20120220_212705_inLine +BABEL_BP_106_81799_20120220_212705_outLine +BABEL_BP_106_81820_20120329_190503_inLine +BABEL_BP_106_81844_20120226_002405_inLine +BABEL_BP_106_81844_20120226_002405_outLine +BABEL_BP_106_81944_20120404_120724_inLine +BABEL_BP_106_81944_20120404_120724_outLine +BABEL_BP_106_82409_20120227_002253_inLine +BABEL_BP_106_82409_20120227_002253_outLine +BABEL_BP_106_82443_20120315_182456_outLine +BABEL_BP_106_82484_20120210_215502_outLine +BABEL_BP_106_82766_20120211_165522_inLine +BABEL_BP_106_82766_20120211_165522_outLine +BABEL_BP_106_83186_20120227_203823_inLine +BABEL_BP_106_83186_20120227_203823_outLine +BABEL_BP_106_83531_20120210_162513_inLine +BABEL_BP_106_83531_20120210_162513_outLine +BABEL_BP_106_83634_20120317_172215_outLine +BABEL_BP_106_83702_20120212_030118_inLine +BABEL_BP_106_83702_20120212_030118_outLine +BABEL_BP_106_83921_20120227_141419_inLine +BABEL_BP_106_83921_20120227_141419_outLine +BABEL_BP_106_84025_20120307_171246_inLine +BABEL_BP_106_84025_20120307_171246_outLine +BABEL_BP_106_84171_20120317_135204_inLine +BABEL_BP_106_84171_20120317_135204_outLine +BABEL_BP_106_84284_20120304_161058_inLine +BABEL_BP_106_84284_20120304_161058_outLine +BABEL_BP_106_84394_20120226_172149_inLine +BABEL_BP_106_84394_20120226_172149_outLine +BABEL_BP_106_84488_20120329_015848_inLine +BABEL_BP_106_84488_20120329_015848_outLine +BABEL_BP_106_84608_20120304_145910_inLine +BABEL_BP_106_84608_20120304_145910_outLine +BABEL_BP_106_84700_20120308_185454_inLine +BABEL_BP_106_84700_20120308_185454_outLine +BABEL_BP_106_84756_20120324_004957_inLine +BABEL_BP_106_84756_20120324_004957_outLine +BABEL_BP_106_84779_20120313_035600_inLine +BABEL_BP_106_84779_20120313_041105_inLine +BABEL_BP_106_84779_20120313_041106_outLine +BABEL_BP_106_84980_20120227_014019_inLine +BABEL_BP_106_84980_20120227_014019_outLine +BABEL_BP_106_85101_20120401_193440_inLine +BABEL_BP_106_85101_20120401_193440_outLine +BABEL_BP_106_85533_20120130_235957_inLine +BABEL_BP_106_85533_20120130_235957_outLine +BABEL_BP_106_85752_20120301_023900_inLine +BABEL_BP_106_85752_20120301_023900_outLine +BABEL_BP_106_86014_20120605_153510_inLine +BABEL_BP_106_86016_20120211_173225_inLine +BABEL_BP_106_86016_20120211_173225_outLine +BABEL_BP_106_86029_20120130_001526_inLine +BABEL_BP_106_86029_20120130_001526_outLine +BABEL_BP_106_86337_20120411_130915_inLine +BABEL_BP_106_86344_20120323_230601_outLine +BABEL_BP_106_86344_20120323_231804_inLine +BABEL_BP_106_86344_20120323_231804_outLine +BABEL_BP_106_86344_20120323_232835_inLine +BABEL_BP_106_86344_20120323_232835_outLine +BABEL_BP_106_87124_20120411_050315_inLine +BABEL_BP_106_87124_20120411_050315_outLine +BABEL_BP_106_87139_20120227_175141_inLine +BABEL_BP_106_87139_20120227_175141_outLine +BABEL_BP_106_87210_20120212_183156_inLine +BABEL_BP_106_87210_20120212_183156_outLine +BABEL_BP_106_87218_20120501_004341_inLine +BABEL_BP_106_87281_20120318_175101_inLine +BABEL_BP_106_87281_20120318_175101_outLine +BABEL_BP_106_87520_20120428_013320_inLine +BABEL_BP_106_87520_20120428_014139_inLine +BABEL_BP_106_87539_20120228_005220_inLine +BABEL_BP_106_87539_20120228_005220_outLine +BABEL_BP_106_87564_20120225_141938_inLine +BABEL_BP_106_87564_20120225_141938_outLine +BABEL_BP_106_87607_20120221_144252_inLine +BABEL_BP_106_87607_20120221_150220_inLine +BABEL_BP_106_87607_20120221_153642_inLine +BABEL_BP_106_87634_20120203_031511_inLine +BABEL_BP_106_87634_20120203_031511_outLine +BABEL_BP_106_87850_20120212_182620_inLine +BABEL_BP_106_87850_20120212_184930_inLine +BABEL_BP_106_87850_20120212_190826_inLine +BABEL_BP_106_87862_20120224_185514_outLine +BABEL_BP_106_87985_20120328_214048_inLine +BABEL_BP_106_87985_20120328_214048_outLine +BABEL_BP_106_88245_20120309_175128_inLine +BABEL_BP_106_88385_20120307_191827_inLine +BABEL_BP_106_88385_20120307_191827_outLine +BABEL_BP_106_88506_20120411_195636_inLine +BABEL_BP_106_88929_20120421_132840_inLine +BABEL_BP_106_88929_20120421_134445_inLine +BABEL_BP_106_89301_20120229_011855_inLine +BABEL_BP_106_89301_20120229_011855_outLine +BABEL_BP_106_89301_20120229_012853_inLine +BABEL_BP_106_89301_20120229_012853_outLine +BABEL_BP_106_89417_20120131_014042_outLine +BABEL_BP_106_89583_20120304_211628_outLine +BABEL_BP_106_89583_20120304_214338_outLine +BABEL_BP_106_89727_20120404_165020_inLine +BABEL_BP_106_90127_20120228_034539_inLine +BABEL_BP_106_90202_20120311_205142_outLine +BABEL_BP_106_90389_20120318_150647_inLine +BABEL_BP_106_90389_20120318_151932_inLine +BABEL_BP_106_90393_20120211_230839_inLine +BABEL_BP_106_90393_20120211_230839_outLine +BABEL_BP_106_90436_20120228_014236_inLine +BABEL_BP_106_90436_20120228_014236_outLine +BABEL_BP_106_90490_20120322_030219_inLine +BABEL_BP_106_90490_20120322_033415_inLine +BABEL_BP_106_90506_20120128_002109_inLine +BABEL_BP_106_90506_20120128_002109_outLine +BABEL_BP_106_90511_20120210_164822_inLine +BABEL_BP_106_90511_20120210_164822_outLine +BABEL_BP_106_90742_20120501_022105_inLine +BABEL_BP_106_90742_20120501_022837_inLine +BABEL_BP_106_90951_20120302_230530_inLine +BABEL_BP_106_90951_20120302_232555_inLine +BABEL_BP_106_91000_20120311_230040_inLine +BABEL_BP_106_91000_20120311_230040_outLine +BABEL_BP_106_91000_20120311_231020_inLine +BABEL_BP_106_91000_20120311_231020_outLine +BABEL_BP_106_91002_20120411_201622_inLine +BABEL_BP_106_91143_20120413_234122_inLine +BABEL_BP_106_91358_20120312_180740_inLine +BABEL_BP_106_91401_20120131_014626_inLine +BABEL_BP_106_91401_20120131_014627_outLine +BABEL_BP_106_91481_20120303_192948_inLine +BABEL_BP_106_91481_20120303_202847_inLine +BABEL_BP_106_91583_20120415_170849_inLine +BABEL_BP_106_91583_20120415_172901_inLine +BABEL_BP_106_91660_20120307_172116_inLine +BABEL_BP_106_91660_20120307_172116_outLine +BABEL_BP_106_91668_20120312_231623_inLine +BABEL_BP_106_91668_20120312_234357_inLine +BABEL_BP_106_91687_20120530_211936_inLine +BABEL_BP_106_91703_20120301_180553_inLine +BABEL_BP_106_91703_20120301_180553_outLine +BABEL_BP_106_91865_20120227_132028_inLine +BABEL_BP_106_91865_20120227_132028_outLine +BABEL_BP_106_91905_20120225_044624_inLine +BABEL_BP_106_91905_20120225_044624_outLine +BABEL_BP_106_91975_20120318_202137_inLine +BABEL_BP_106_92002_20120301_010732_inLine +BABEL_BP_106_92346_20120410_232631_inLine +BABEL_BP_106_92346_20120410_234651_inLine +BABEL_BP_106_92591_20120301_232554_outLine +BABEL_BP_106_92642_20120214_041746_inLine +BABEL_BP_106_92642_20120214_041746_outLine +BABEL_BP_106_93044_20120405_184614_inLine +BABEL_BP_106_93169_20120126_190053_inLine +BABEL_BP_106_93237_20120412_202749_inLine +BABEL_BP_106_93268_20120316_173016_inLine +BABEL_BP_106_93268_20120316_185049_inLine +BABEL_BP_106_93277_20120314_162508_inLine +BABEL_BP_106_93277_20120314_162508_outLine +BABEL_BP_106_93302_20120530_221003_inLine +BABEL_BP_106_93326_20120329_003409_inLine +BABEL_BP_106_93326_20120329_003409_outLine +BABEL_BP_106_93436_20120314_172420_inLine +BABEL_BP_106_93506_20120501_114215_outLine +BABEL_BP_106_93607_20120304_213723_outLine +BABEL_BP_106_93811_20120419_004934_inLine +BABEL_BP_106_94126_20120331_143958_inLine +BABEL_BP_106_94126_20120331_143958_outLine +BABEL_BP_106_94162_20120418_180628_inLine +BABEL_BP_106_94223_20120219_180504_inLine +BABEL_BP_106_94223_20120219_185026_inLine +BABEL_BP_106_94223_20120219_191721_inLine +BABEL_BP_106_94223_20120219_194907_inLine +BABEL_BP_106_94514_20120225_190925_inLine +BABEL_BP_106_94514_20120225_190925_outLine +BABEL_BP_106_94514_20120225_192755_inLine +BABEL_BP_106_94514_20120225_192755_outLine +BABEL_BP_106_94694_20120315_140425_inLine +BABEL_BP_106_94694_20120315_140425_outLine +BABEL_BP_106_94814_20120211_015600_inLine +BABEL_BP_106_95034_20120222_020622_inLine +BABEL_BP_106_95034_20120222_020622_outLine +BABEL_BP_106_95120_20120226_174855_inLine +BABEL_BP_106_95120_20120226_174855_outLine +BABEL_BP_106_95121_20120314_000217_inLine +BABEL_BP_106_95121_20120314_000217_outLine +BABEL_BP_106_95325_20120225_072841_inLine +BABEL_BP_106_95325_20120225_072841_outLine +BABEL_BP_106_95514_20120404_160802_inLine +BABEL_BP_106_95533_20120227_180819_inLine +BABEL_BP_106_95572_20120501_120940_outLine +BABEL_BP_106_95628_20120331_180349_inLine +BABEL_BP_106_95628_20120331_180349_outLine +BABEL_BP_106_95650_20120203_023309_inLine +BABEL_BP_106_95650_20120203_023309_outLine +BABEL_BP_106_95791_20120323_213855_inLine +BABEL_BP_106_95849_20120317_111924_inLine +BABEL_BP_106_95849_20120317_112530_inLine +BABEL_BP_106_95849_20120317_114235_inLine +BABEL_BP_106_95893_20120304_185606_inLine +BABEL_BP_106_95952_20120413_203735_inLine +BABEL_BP_106_95952_20120413_203735_outLine +BABEL_BP_106_95952_20120413_204700_inLine +BABEL_BP_106_95952_20120413_204700_outLine +BABEL_BP_106_96108_20120306_154946_inLine +BABEL_BP_106_96108_20120306_154946_outLine +BABEL_BP_106_96302_20120317_192957_inLine +BABEL_BP_106_96302_20120317_193605_inLine +BABEL_BP_106_96302_20120317_195426_inLine +BABEL_BP_106_96302_20120317_195426_outLine +BABEL_BP_106_96425_20120314_181621_inLine +BABEL_BP_106_96425_20120314_183006_inLine +BABEL_BP_106_96463_20120304_141645_inLine +BABEL_BP_106_96630_20120204_003252_inLine +BABEL_BP_106_96630_20120204_003252_outLine +BABEL_BP_106_96636_20120605_122128_inLine +BABEL_BP_106_96717_20120331_232633_outLine +BABEL_BP_106_96922_20120331_201147_outLine +BABEL_BP_106_97486_20120214_032248_inLine +BABEL_BP_106_97486_20120214_032248_outLine +BABEL_BP_106_97486_20120214_035344_inLine +BABEL_BP_106_97486_20120214_035344_outLine +BABEL_BP_106_97635_20120330_174657_inLine +BABEL_BP_106_97649_20120312_212246_inLine +BABEL_BP_106_97649_20120312_213707_inLine +BABEL_BP_106_97699_20120317_154627_outLine +BABEL_BP_106_98279_20120228_221829_inLine +BABEL_BP_106_98279_20120228_222916_inLine +BABEL_BP_106_98279_20120228_224103_inLine +BABEL_BP_106_98402_20120311_211004_inLine +BABEL_BP_106_98402_20120311_211004_outLine +BABEL_BP_106_98465_20120408_005224_inLine +BABEL_BP_106_98465_20120408_005224_outLine +BABEL_BP_106_98476_20120301_235152_inLine +BABEL_BP_106_98476_20120301_235152_outLine +BABEL_BP_106_98675_20120418_012454_inLine +BABEL_BP_106_98807_20120331_181706_inLine +BABEL_BP_106_98807_20120331_181706_outLine +BABEL_BP_106_98807_20120331_182345_inLine +BABEL_BP_106_98807_20120331_182345_outLine +BABEL_BP_106_98807_20120331_183121_inLine +BABEL_BP_106_98807_20120331_183121_outLine +BABEL_BP_106_99012_20120419_224750_inLine +BABEL_BP_106_99697_20120229_185303_inLine +BABEL_BP_106_99856_20120226_184042_inLine +BABEL_BP_106_99856_20120226_184042_outLine +BABEL_BP_106_99856_20120226_191212_inLine +BABEL_BP_106_99856_20120226_191212_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/train.LimitedLP.list b/egs/babel/s5d/conf/lists/106-tagalog/train.LimitedLP.list new file mode 100644 index 00000000000..fee3e3adbaf --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/train.LimitedLP.list @@ -0,0 +1,134 @@ +BABEL_BP_106_03420_20120409_204941_inLine +BABEL_BP_106_03420_20120409_204941_outLine +BABEL_BP_106_03420_20120409_211810_inLine +BABEL_BP_106_03420_20120409_211811_outLine +BABEL_BP_106_10985_20120313_013835_inLine +BABEL_BP_106_10985_20120313_013835_outLine +BABEL_BP_106_11158_20120314_183907_inLine +BABEL_BP_106_11158_20120314_183907_outLine +BABEL_BP_106_11158_20120314_193006_inLine +BABEL_BP_106_11158_20120314_193006_outLine +BABEL_BP_106_12248_20120304_225237_inLine +BABEL_BP_106_13071_20120315_000734_inLine +BABEL_BP_106_13071_20120315_000734_outLine +BABEL_BP_106_13071_20120315_001539_inLine +BABEL_BP_106_13071_20120315_001539_outLine +BABEL_BP_106_16406_20120309_161540_inLine +BABEL_BP_106_19867_20120428_022912_inLine +BABEL_BP_106_20320_20120206_212251_inLine +BABEL_BP_106_20320_20120206_212251_outLine +BABEL_BP_106_20740_20120229_234935_inLine +BABEL_BP_106_22910_20120129_213616_inLine +BABEL_BP_106_22910_20120129_213616_outLine +BABEL_BP_106_23571_20120229_180344_inLine +BABEL_BP_106_23571_20120229_180344_outLine +BABEL_BP_106_23878_20120209_170350_inLine +BABEL_BP_106_23878_20120209_170350_outLine +BABEL_BP_106_25751_20120227_221828_inLine +BABEL_BP_106_25751_20120227_221828_outLine +BABEL_BP_106_25866_20120304_181012_inLine +BABEL_BP_106_25866_20120304_181012_outLine +BABEL_BP_106_27916_20120403_232720_inLine +BABEL_BP_106_27916_20120403_232720_outLine +BABEL_BP_106_27916_20120403_233612_inLine +BABEL_BP_106_27916_20120403_233612_outLine +BABEL_BP_106_30818_20120503_004014_inLine +BABEL_BP_106_31265_20120311_235253_inLine +BABEL_BP_106_31265_20120311_235253_outLine +BABEL_BP_106_32400_20120307_235432_inLine +BABEL_BP_106_32400_20120307_235432_outLine +BABEL_BP_106_32890_20120221_193416_inLine +BABEL_BP_106_32890_20120221_193417_outLine +BABEL_BP_106_33742_20120229_020923_inLine +BABEL_BP_106_33742_20120229_020923_outLine +BABEL_BP_106_34480_20120405_141959_inLine +BABEL_BP_106_34961_20120130_011357_inLine +BABEL_BP_106_34961_20120130_011357_outLine +BABEL_BP_106_35179_20120225_063734_inLine +BABEL_BP_106_35179_20120225_063734_outLine +BABEL_BP_106_35706_20120501_011424_inLine +BABEL_BP_106_36268_20120209_180615_inLine +BABEL_BP_106_36268_20120209_180615_outLine +BABEL_BP_106_38640_20120130_174518_inLine +BABEL_BP_106_38640_20120130_174518_outLine +BABEL_BP_106_38956_20120127_010500_inLine +BABEL_BP_106_38956_20120127_010500_outLine +BABEL_BP_106_40510_20120221_155613_inLine +BABEL_BP_106_40510_20120221_155613_outLine +BABEL_BP_106_40680_20120511_153305_inLine +BABEL_BP_106_45453_20120404_225631_inLine +BABEL_BP_106_45453_20120404_225631_outLine +BABEL_BP_106_46603_20120227_192836_inLine +BABEL_BP_106_46603_20120227_192836_outLine +BABEL_BP_106_47429_20120329_195737_inLine +BABEL_BP_106_47429_20120329_195737_outLine +BABEL_BP_106_48188_20120307_034039_outLine +BABEL_BP_106_49624_20120224_194049_inLine +BABEL_BP_106_49624_20120224_194049_outLine +BABEL_BP_106_49689_20120225_152557_outLine +BABEL_BP_106_49689_20120225_153748_outLine +BABEL_BP_106_49714_20120227_191755_inLine +BABEL_BP_106_49714_20120227_191755_outLine +BABEL_BP_106_50409_20120319_185818_inLine +BABEL_BP_106_50409_20120319_185818_outLine +BABEL_BP_106_51149_20120329_174521_inLine +BABEL_BP_106_51149_20120329_174521_outLine +BABEL_BP_106_52366_20120124_164406_inLine +BABEL_BP_106_52366_20120124_164406_outLine +BABEL_BP_106_53315_20120329_182550_inLine +BABEL_BP_106_53315_20120329_182550_outLine +BABEL_BP_106_53376_20120323_000750_inLine +BABEL_BP_106_53376_20120323_000750_outLine +BABEL_BP_106_55823_20120329_210142_inLine +BABEL_BP_106_55823_20120329_210142_outLine +BABEL_BP_106_55922_20120322_021453_inLine +BABEL_BP_106_55922_20120322_021453_outLine +BABEL_BP_106_55922_20120322_022537_inLine +BABEL_BP_106_55922_20120322_022537_outLine +BABEL_BP_106_58192_20120308_182924_inLine +BABEL_BP_106_58192_20120308_182924_outLine +BABEL_BP_106_60598_20120324_022730_inLine +BABEL_BP_106_60598_20120324_022730_outLine +BABEL_BP_106_63305_20120324_030221_inLine +BABEL_BP_106_63305_20120324_030221_outLine +BABEL_BP_106_65248_20120321_230954_inLine +BABEL_BP_106_65248_20120321_230954_outLine +BABEL_BP_106_68392_20120331_224408_inLine +BABEL_BP_106_68392_20120331_224408_outLine +BABEL_BP_106_68610_20120505_011125_inLine +BABEL_BP_106_70285_20120128_211036_outLine +BABEL_BP_106_71310_20120221_005007_inLine +BABEL_BP_106_71310_20120221_005007_outLine +BABEL_BP_106_75036_20120224_163823_inLine +BABEL_BP_106_75036_20120224_163823_outLine +BABEL_BP_106_75932_20120301_185217_inLine +BABEL_BP_106_75932_20120301_185217_outLine +BABEL_BP_106_76252_20120318_131223_outLine +BABEL_BP_106_79519_20120407_175434_inLine +BABEL_BP_106_80174_20120422_023124_inLine +BABEL_BP_106_81944_20120404_120724_inLine +BABEL_BP_106_81944_20120404_120724_outLine +BABEL_BP_106_82766_20120211_165522_inLine +BABEL_BP_106_82766_20120211_165522_outLine +BABEL_BP_106_86014_20120605_153510_inLine +BABEL_BP_106_87210_20120212_183156_inLine +BABEL_BP_106_87210_20120212_183156_outLine +BABEL_BP_106_89417_20120131_014042_inLine +BABEL_BP_106_89417_20120131_014042_outLine +BABEL_BP_106_89727_20120404_165020_inLine +BABEL_BP_106_91000_20120311_230040_inLine +BABEL_BP_106_91000_20120311_230040_outLine +BABEL_BP_106_91000_20120311_231020_inLine +BABEL_BP_106_91000_20120311_231020_outLine +BABEL_BP_106_91401_20120131_014626_inLine +BABEL_BP_106_91401_20120131_014627_outLine +BABEL_BP_106_91905_20120225_044624_inLine +BABEL_BP_106_91905_20120225_044624_outLine +BABEL_BP_106_93169_20120126_190053_inLine +BABEL_BP_106_94814_20120211_015600_inLine +BABEL_BP_106_95034_20120222_020622_inLine +BABEL_BP_106_95034_20120222_020622_outLine +BABEL_BP_106_96630_20120204_003252_inLine +BABEL_BP_106_96630_20120204_003252_outLine +BABEL_BP_106_98465_20120408_005224_inLine +BABEL_BP_106_98465_20120408_005224_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/dev.list b/egs/babel/s5d/conf/lists/107-vietnamese/dev.list new file mode 100644 index 00000000000..f44c76db308 --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/dev.list @@ -0,0 +1,132 @@ +BABEL_BP_107_11031_20120617_182613_inLine +BABEL_BP_107_11031_20120617_182613_outLine +BABEL_BP_107_12120_20120704_024505_inLine +BABEL_BP_107_12120_20120704_024505_outLine +BABEL_BP_107_12248_20120614_183345_inLine +BABEL_BP_107_12248_20120614_183345_outLine +BABEL_BP_107_12963_20120509_002346_inLine +BABEL_BP_107_12963_20120509_002346_outLine +BABEL_BP_107_12963_20120509_003852_inLine +BABEL_BP_107_12963_20120509_003852_outLine +BABEL_BP_107_13476_20120428_003452_inLine +BABEL_BP_107_13476_20120428_003452_outLine +BABEL_BP_107_14610_20120218_201908_inLine +BABEL_BP_107_14610_20120218_201908_outLine +BABEL_BP_107_14769_20120420_013147_inLine +BABEL_BP_107_14769_20120420_013147_outLine +BABEL_BP_107_14997_20120406_190013_inLine +BABEL_BP_107_14997_20120406_190013_outLine +BABEL_BP_107_14997_20120406_191102_inLine +BABEL_BP_107_14997_20120406_191102_outLine +BABEL_BP_107_15493_20120617_120952_inLine +BABEL_BP_107_15493_20120617_120952_outLine +BABEL_BP_107_15502_20120627_124423_inLine +BABEL_BP_107_15502_20120627_124423_outLine +BABEL_BP_107_16167_20120215_213113_inLine +BABEL_BP_107_16167_20120215_213113_outLine +BABEL_BP_107_18730_20120222_145916_inLine +BABEL_BP_107_18730_20120222_145916_outLine +BABEL_BP_107_19619_20120215_221131_inLine +BABEL_BP_107_19619_20120215_221131_outLine +BABEL_BP_107_19619_20120215_223011_inLine +BABEL_BP_107_19619_20120215_223011_outLine +BABEL_BP_107_21489_20120608_123945_inLine +BABEL_BP_107_21489_20120608_123945_outLine +BABEL_BP_107_26644_20120509_013405_inLine +BABEL_BP_107_26644_20120509_013405_outLine +BABEL_BP_107_28161_20120322_171027_inLine +BABEL_BP_107_28161_20120322_171027_outLine +BABEL_BP_107_28648_20120506_223200_inLine +BABEL_BP_107_28648_20120506_223200_outLine +BABEL_BP_107_29168_20120321_215013_inLine +BABEL_BP_107_29168_20120321_215013_outLine +BABEL_BP_107_31538_20120320_202748_inLine +BABEL_BP_107_31538_20120320_202748_outLine +BABEL_BP_107_32120_20120704_182238_inLine +BABEL_BP_107_32120_20120704_182238_outLine +BABEL_BP_107_32236_20120505_195420_inLine +BABEL_BP_107_32236_20120505_195420_outLine +BABEL_BP_107_33704_20120416_005402_inLine +BABEL_BP_107_33704_20120416_005402_outLine +BABEL_BP_107_35391_20120416_192241_inLine +BABEL_BP_107_35391_20120416_192241_outLine +BABEL_BP_107_35441_20120421_221245_inLine +BABEL_BP_107_35441_20120421_221245_outLine +BABEL_BP_107_39080_20120415_141817_inLine +BABEL_BP_107_39080_20120415_141817_outLine +BABEL_BP_107_39140_20120409_163031_inLine +BABEL_BP_107_39140_20120409_163031_outLine +BABEL_BP_107_39997_20120516_214034_inLine +BABEL_BP_107_39997_20120516_214035_outLine +BABEL_BP_107_41456_20120421_133628_inLine +BABEL_BP_107_41456_20120421_133628_outLine +BABEL_BP_107_41661_20120329_022249_inLine +BABEL_BP_107_41661_20120329_022249_outLine +BABEL_BP_107_41661_20120329_023848_inLine +BABEL_BP_107_41661_20120329_023848_outLine +BABEL_BP_107_43086_20120210_015927_inLine +BABEL_BP_107_43086_20120210_015927_outLine +BABEL_BP_107_45512_20120505_135144_inLine +BABEL_BP_107_45512_20120505_135144_outLine +BABEL_BP_107_45677_20120428_184714_inLine +BABEL_BP_107_45677_20120428_184714_outLine +BABEL_BP_107_47037_20120415_210047_inLine +BABEL_BP_107_47037_20120415_210047_outLine +BABEL_BP_107_54285_20120430_233928_inLine +BABEL_BP_107_54285_20120430_233928_outLine +BABEL_BP_107_56812_20120502_123725_inLine +BABEL_BP_107_56812_20120502_123725_outLine +BABEL_BP_107_57020_20120427_011940_inLine +BABEL_BP_107_57020_20120427_011940_outLine +BABEL_BP_107_57976_20120704_183740_inLine +BABEL_BP_107_57976_20120704_183740_outLine +BABEL_BP_107_59868_20120324_013729_inLine +BABEL_BP_107_59868_20120324_013729_outLine +BABEL_BP_107_59868_20120324_015118_inLine +BABEL_BP_107_59868_20120324_015118_outLine +BABEL_BP_107_59891_20120504_013809_inLine +BABEL_BP_107_59891_20120504_013809_outLine +BABEL_BP_107_63459_20120415_003841_inLine +BABEL_BP_107_63459_20120415_003841_outLine +BABEL_BP_107_65606_20120416_004652_inLine +BABEL_BP_107_65606_20120416_004652_outLine +BABEL_BP_107_70625_20120426_235142_inLine +BABEL_BP_107_70625_20120426_235142_outLine +BABEL_BP_107_71178_20120617_184313_inLine +BABEL_BP_107_71178_20120617_184313_outLine +BABEL_BP_107_73542_20120209_010311_inLine +BABEL_BP_107_73542_20120209_010311_outLine +BABEL_BP_107_75990_20120408_211713_inLine +BABEL_BP_107_75990_20120408_211713_outLine +BABEL_BP_107_76320_20120504_123902_inLine +BABEL_BP_107_76320_20120504_123902_outLine +BABEL_BP_107_77697_20120416_235254_inLine +BABEL_BP_107_77697_20120416_235254_outLine +BABEL_BP_107_77771_20120421_231323_inLine +BABEL_BP_107_77771_20120421_231323_outLine +BABEL_BP_107_79412_20120322_174955_inLine +BABEL_BP_107_79412_20120322_174955_outLine +BABEL_BP_107_79526_20120420_150504_inLine +BABEL_BP_107_79526_20120420_150504_outLine +BABEL_BP_107_83219_20120421_172919_inLine +BABEL_BP_107_83219_20120421_172919_outLine +BABEL_BP_107_85204_20120212_190017_inLine +BABEL_BP_107_85204_20120212_190017_outLine +BABEL_BP_107_86259_20120507_015816_inLine +BABEL_BP_107_86259_20120507_015816_outLine +BABEL_BP_107_87850_20120406_210353_inLine +BABEL_BP_107_87850_20120406_210354_outLine +BABEL_BP_107_88383_20120627_125444_inLine +BABEL_BP_107_88383_20120627_125444_outLine +BABEL_BP_107_88383_20120627_130611_inLine +BABEL_BP_107_88383_20120627_130611_outLine +BABEL_BP_107_89838_20120212_205650_inLine +BABEL_BP_107_89838_20120212_205650_outLine +BABEL_BP_107_90202_20120502_194459_inLine +BABEL_BP_107_90202_20120502_194459_outLine +BABEL_BP_107_92386_20120322_195456_inLine +BABEL_BP_107_92386_20120322_195456_outLine +BABEL_BP_107_96283_20120503_162149_inLine +BABEL_BP_107_96283_20120503_162149_outLine +BABEL_BP_107_97760_20120614_184333_inLine +BABEL_BP_107_97760_20120614_184333_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/eval.list b/egs/babel/s5d/conf/lists/107-vietnamese/eval.list new file mode 100644 index 00000000000..9cc6f7875ed --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/eval.list @@ -0,0 +1,981 @@ +BABEL_BP_107_10170_20120703_230849_inLine +BABEL_BP_107_10170_20120703_230850_outLine +BABEL_BP_107_10170_20120703_231552_inLine +BABEL_BP_107_10170_20120703_231552_outLine +BABEL_BP_107_10187_20120218_202839_inLine +BABEL_BP_107_10187_20120218_202839_outLine +BABEL_BP_107_10408_20120704_021830_inLine +BABEL_BP_107_10408_20120704_021830_outLine +BABEL_BP_107_10470_20120608_135407_inLine +BABEL_BP_107_10470_20120608_135407_outLine +BABEL_BP_107_10925_20120209_233924_inLine +BABEL_BP_107_10925_20120209_233924_outLine +BABEL_BP_107_11004_20120329_040734_inLine +BABEL_BP_107_11004_20120329_040734_outLine +BABEL_BP_107_11152_20120418_221056_inLine +BABEL_BP_107_11152_20120418_221056_outLine +BABEL_BP_107_11203_20120415_212056_inLine +BABEL_BP_107_11203_20120415_212056_outLine +BABEL_BP_107_11824_20120413_213002_inLine +BABEL_BP_107_11824_20120413_213002_outLine +BABEL_BP_107_12535_20120614_190306_inLine +BABEL_BP_107_12535_20120614_190306_outLine +BABEL_BP_107_12667_20120514_195317_inLine +BABEL_BP_107_12667_20120514_195317_outLine +BABEL_BP_107_12700_20120608_010254_inLine +BABEL_BP_107_12700_20120608_010254_outLine +BABEL_BP_107_13118_20120419_193637_inLine +BABEL_BP_107_13118_20120419_193637_outLine +BABEL_BP_107_13441_20120412_212102_inLine +BABEL_BP_107_13441_20120412_212102_outLine +BABEL_BP_107_13709_20120406_164042_inLine +BABEL_BP_107_13709_20120406_164043_outLine +BABEL_BP_107_13913_20120415_144214_inLine +BABEL_BP_107_13913_20120415_144214_outLine +BABEL_BP_107_14389_20120617_164138_inLine +BABEL_BP_107_14389_20120617_164138_outLine +BABEL_BP_107_14874_20120320_190424_inLine +BABEL_BP_107_14874_20120320_190424_outLine +BABEL_BP_107_14874_20120320_192210_inLine +BABEL_BP_107_14874_20120320_192210_outLine +BABEL_BP_107_15022_20120418_133337_inLine +BABEL_BP_107_15022_20120418_133337_outLine +BABEL_BP_107_15234_20120509_180434_inLine +BABEL_BP_107_15234_20120509_180434_outLine +BABEL_BP_107_15859_20120419_133516_inLine +BABEL_BP_107_15859_20120419_133516_outLine +BABEL_BP_107_15916_20120426_132306_inLine +BABEL_BP_107_15916_20120426_132306_outLine +BABEL_BP_107_16266_20120211_215251_inLine +BABEL_BP_107_16266_20120211_215251_outLine +BABEL_BP_107_16299_20120220_135944_inLine +BABEL_BP_107_16299_20120220_135944_outLine +BABEL_BP_107_16346_20120423_192454_inLine +BABEL_BP_107_16346_20120423_192454_outLine +BABEL_BP_107_16984_20120414_193034_inLine +BABEL_BP_107_16984_20120414_193034_outLine +BABEL_BP_107_17080_20120212_150122_inLine +BABEL_BP_107_17080_20120212_150122_outLine +BABEL_BP_107_17207_20120507_023403_inLine +BABEL_BP_107_17207_20120507_023403_outLine +BABEL_BP_107_17511_20120419_232032_inLine +BABEL_BP_107_17511_20120419_232032_outLine +BABEL_BP_107_17572_20120613_040637_inLine +BABEL_BP_107_17572_20120613_040637_outLine +BABEL_BP_107_17850_20120615_234216_inLine +BABEL_BP_107_17850_20120615_234216_outLine +BABEL_BP_107_17900_20120323_015142_inLine +BABEL_BP_107_17900_20120323_015142_outLine +BABEL_BP_107_18672_20120426_150856_inLine +BABEL_BP_107_18672_20120426_150856_outLine +BABEL_BP_107_18980_20120608_125749_inLine +BABEL_BP_107_18980_20120608_125749_outLine +BABEL_BP_107_19147_20120212_161206_inLine +BABEL_BP_107_19147_20120212_161206_outLine +BABEL_BP_107_19456_20120704_165824_inLine +BABEL_BP_107_19456_20120704_165824_outLine +BABEL_BP_107_19656_20120325_230731_inLine +BABEL_BP_107_19656_20120325_230731_outLine +BABEL_BP_107_19861_20120511_013731_inLine +BABEL_BP_107_19861_20120511_013731_outLine +BABEL_BP_107_19861_20120511_014743_inLine +BABEL_BP_107_19861_20120511_014744_outLine +BABEL_BP_107_19915_20120218_150645_inLine +BABEL_BP_107_19915_20120218_150645_outLine +BABEL_BP_107_20408_20120323_142004_inLine +BABEL_BP_107_20408_20120323_142004_outLine +BABEL_BP_107_20408_20120323_143722_inLine +BABEL_BP_107_20408_20120323_143722_outLine +BABEL_BP_107_20471_20120328_020935_inLine +BABEL_BP_107_20471_20120328_020935_outLine +BABEL_BP_107_20546_20120323_215948_inLine +BABEL_BP_107_20546_20120323_215948_outLine +BABEL_BP_107_20685_20120222_210447_inLine +BABEL_BP_107_20685_20120222_210447_outLine +BABEL_BP_107_20775_20120502_214146_inLine +BABEL_BP_107_20775_20120502_214146_outLine +BABEL_BP_107_21714_20120608_140615_inLine +BABEL_BP_107_21714_20120608_140615_outLine +BABEL_BP_107_21782_20120321_191431_inLine +BABEL_BP_107_21782_20120321_191431_outLine +BABEL_BP_107_21845_20120613_195420_inLine +BABEL_BP_107_21845_20120613_195420_outLine +BABEL_BP_107_22179_20120220_172322_inLine +BABEL_BP_107_22179_20120220_172322_outLine +BABEL_BP_107_22351_20120413_231618_inLine +BABEL_BP_107_22351_20120413_231618_outLine +BABEL_BP_107_22408_20120416_180244_inLine +BABEL_BP_107_22408_20120416_180244_outLine +BABEL_BP_107_22537_20120322_214458_inLine +BABEL_BP_107_22537_20120322_214458_outLine +BABEL_BP_107_22566_20120704_023628_inLine +BABEL_BP_107_22566_20120704_023628_outLine +BABEL_BP_107_22973_20120503_231406_inLine +BABEL_BP_107_22973_20120503_231406_outLine +BABEL_BP_107_23168_20120618_113427_inLine +BABEL_BP_107_23168_20120618_113427_outLine +BABEL_BP_107_23336_20120429_192926_inLine +BABEL_BP_107_23336_20120429_192926_outLine +BABEL_BP_107_23352_20120425_211848_inLine +BABEL_BP_107_23352_20120425_211848_outLine +BABEL_BP_107_23995_20120418_194620_inLine +BABEL_BP_107_23995_20120418_194620_outLine +BABEL_BP_107_24379_20120422_173418_inLine +BABEL_BP_107_24379_20120422_173418_outLine +BABEL_BP_107_24431_20120215_202205_inLine +BABEL_BP_107_24431_20120215_202205_outLine +BABEL_BP_107_24580_20120420_011554_inLine +BABEL_BP_107_24580_20120420_011554_outLine +BABEL_BP_107_24589_20120508_183427_inLine +BABEL_BP_107_24589_20120508_183427_outLine +BABEL_BP_107_25021_20120131_214134_inLine +BABEL_BP_107_25021_20120131_214134_outLine +BABEL_BP_107_25502_20120217_005526_inLine +BABEL_BP_107_25502_20120217_005526_outLine +BABEL_BP_107_25735_20120608_134208_inLine +BABEL_BP_107_25735_20120608_134208_outLine +BABEL_BP_107_25871_20120422_181122_inLine +BABEL_BP_107_25871_20120422_181122_outLine +BABEL_BP_107_25904_20120509_000636_inLine +BABEL_BP_107_25904_20120509_000636_outLine +BABEL_BP_107_26164_20120705_014122_inLine +BABEL_BP_107_26164_20120705_014122_outLine +BABEL_BP_107_27178_20120324_021235_inLine +BABEL_BP_107_27178_20120324_021235_outLine +BABEL_BP_107_27349_20120321_195149_inLine +BABEL_BP_107_27349_20120321_195149_outLine +BABEL_BP_107_27605_20120329_015050_inLine +BABEL_BP_107_27605_20120329_015050_outLine +BABEL_BP_107_27645_20120501_005559_inLine +BABEL_BP_107_27645_20120501_005559_outLine +BABEL_BP_107_27824_20120418_211841_inLine +BABEL_BP_107_27824_20120418_211841_outLine +BABEL_BP_107_27825_20120418_230344_inLine +BABEL_BP_107_27825_20120418_230344_outLine +BABEL_BP_107_27825_20120418_231611_inLine +BABEL_BP_107_27825_20120418_231611_outLine +BABEL_BP_107_28754_20120417_233136_inLine +BABEL_BP_107_28754_20120417_233136_outLine +BABEL_BP_107_28768_20120607_134003_inLine +BABEL_BP_107_28768_20120607_134003_outLine +BABEL_BP_107_28768_20120607_135648_inLine +BABEL_BP_107_28768_20120607_135648_outLine +BABEL_BP_107_28990_20120421_150239_inLine +BABEL_BP_107_28990_20120421_150239_outLine +BABEL_BP_107_29087_20120511_023457_inLine +BABEL_BP_107_29087_20120511_023457_outLine +BABEL_BP_107_29097_20120120_174353_inLine +BABEL_BP_107_29097_20120120_174353_outLine +BABEL_BP_107_29133_20120212_223742_inLine +BABEL_BP_107_29133_20120212_223742_outLine +BABEL_BP_107_29259_20120418_213018_inLine +BABEL_BP_107_29259_20120418_213018_outLine +BABEL_BP_107_29328_20120208_021903_inLine +BABEL_BP_107_29328_20120208_021903_outLine +BABEL_BP_107_29421_20120501_121237_inLine +BABEL_BP_107_29421_20120501_121237_outLine +BABEL_BP_107_29512_20120426_133304_inLine +BABEL_BP_107_29512_20120426_133304_outLine +BABEL_BP_107_29545_20120704_025504_inLine +BABEL_BP_107_29545_20120704_025504_outLine +BABEL_BP_107_30530_20120210_191257_inLine +BABEL_BP_107_30530_20120210_191257_outLine +BABEL_BP_107_30642_20120424_124529_inLine +BABEL_BP_107_30642_20120424_124529_outLine +BABEL_BP_107_31256_20120424_173937_inLine +BABEL_BP_107_31256_20120424_173937_outLine +BABEL_BP_107_31345_20120501_200006_inLine +BABEL_BP_107_31345_20120501_200006_outLine +BABEL_BP_107_31441_20120322_221247_inLine +BABEL_BP_107_31441_20120322_221247_outLine +BABEL_BP_107_31678_20120323_003303_inLine +BABEL_BP_107_31678_20120323_003303_outLine +BABEL_BP_107_31841_20120420_173052_inLine +BABEL_BP_107_31841_20120420_173052_outLine +BABEL_BP_107_31841_20120420_175428_inLine +BABEL_BP_107_31841_20120420_175428_outLine +BABEL_BP_107_31975_20120418_213316_inLine +BABEL_BP_107_31975_20120418_213316_outLine +BABEL_BP_107_32045_20120627_135349_inLine +BABEL_BP_107_32045_20120627_135349_outLine +BABEL_BP_107_32263_20120415_125245_inLine +BABEL_BP_107_32263_20120415_125245_outLine +BABEL_BP_107_32452_20120417_025731_inLine +BABEL_BP_107_32452_20120417_025731_outLine +BABEL_BP_107_32562_20120502_183523_inLine +BABEL_BP_107_32562_20120502_183523_outLine +BABEL_BP_107_32642_20120507_162602_inLine +BABEL_BP_107_32642_20120507_162602_outLine +BABEL_BP_107_32818_20120505_124034_inLine +BABEL_BP_107_32818_20120505_124034_outLine +BABEL_BP_107_32830_20120217_010905_inLine +BABEL_BP_107_32830_20120217_010905_outLine +BABEL_BP_107_32962_20120417_002922_inLine +BABEL_BP_107_32962_20120417_002922_outLine +BABEL_BP_107_33243_20120417_000926_inLine +BABEL_BP_107_33243_20120417_000926_outLine +BABEL_BP_107_33527_20120415_192039_inLine +BABEL_BP_107_33527_20120415_192039_outLine +BABEL_BP_107_34169_20120328_012436_inLine +BABEL_BP_107_34169_20120328_012436_outLine +BABEL_BP_107_34194_20120218_004244_inLine +BABEL_BP_107_34194_20120218_004244_outLine +BABEL_BP_107_34248_20120704_190743_inLine +BABEL_BP_107_34248_20120704_190743_outLine +BABEL_BP_107_34357_20120608_192929_inLine +BABEL_BP_107_34357_20120608_192929_outLine +BABEL_BP_107_34439_20120514_155943_inLine +BABEL_BP_107_34439_20120514_155943_outLine +BABEL_BP_107_35064_20120609_183707_inLine +BABEL_BP_107_35064_20120609_183707_outLine +BABEL_BP_107_35576_20120618_004603_inLine +BABEL_BP_107_35576_20120618_004603_outLine +BABEL_BP_107_35612_20120424_221417_inLine +BABEL_BP_107_35612_20120424_221418_outLine +BABEL_BP_107_35896_20120426_160252_inLine +BABEL_BP_107_35896_20120426_160252_outLine +BABEL_BP_107_35932_20120321_221039_inLine +BABEL_BP_107_35932_20120321_221039_outLine +BABEL_BP_107_35951_20120415_161914_inLine +BABEL_BP_107_35951_20120415_161914_outLine +BABEL_BP_107_35972_20120510_232832_inLine +BABEL_BP_107_35972_20120510_232832_outLine +BABEL_BP_107_36143_20120217_012635_inLine +BABEL_BP_107_36143_20120217_012635_outLine +BABEL_BP_107_36143_20120217_175752_inLine +BABEL_BP_107_36143_20120217_175752_outLine +BABEL_BP_107_36155_20120421_014500_inLine +BABEL_BP_107_36155_20120421_014500_outLine +BABEL_BP_107_36868_20120426_234641_inLine +BABEL_BP_107_36868_20120426_234641_outLine +BABEL_BP_107_37094_20120208_155100_inLine +BABEL_BP_107_37094_20120208_155100_outLine +BABEL_BP_107_37185_20120608_122828_inLine +BABEL_BP_107_37185_20120608_122828_outLine +BABEL_BP_107_37203_20120409_183756_inLine +BABEL_BP_107_37203_20120409_183756_outLine +BABEL_BP_107_37260_20120509_024525_inLine +BABEL_BP_107_37260_20120509_024525_outLine +BABEL_BP_107_37348_20120506_234059_inLine +BABEL_BP_107_37348_20120506_234938_inLine +BABEL_BP_107_37348_20120507_000848_inLine +BABEL_BP_107_37348_20120507_000848_outLine +BABEL_BP_107_37766_20120608_155216_inLine +BABEL_BP_107_37766_20120608_155217_outLine +BABEL_BP_107_37784_20120509_195942_inLine +BABEL_BP_107_37784_20120509_195942_outLine +BABEL_BP_107_37842_20120513_023632_inLine +BABEL_BP_107_37842_20120513_023632_outLine +BABEL_BP_107_38635_20120424_231446_inLine +BABEL_BP_107_38635_20120424_231446_outLine +BABEL_BP_107_38863_20120614_173605_inLine +BABEL_BP_107_38863_20120614_173605_outLine +BABEL_BP_107_38863_20120614_174335_inLine +BABEL_BP_107_38863_20120614_174335_outLine +BABEL_BP_107_38863_20120614_175009_inLine +BABEL_BP_107_38863_20120614_175009_outLine +BABEL_BP_107_38985_20120506_223622_inLine +BABEL_BP_107_38985_20120506_223622_outLine +BABEL_BP_107_39098_20120324_231724_inLine +BABEL_BP_107_39098_20120324_231724_outLine +BABEL_BP_107_39098_20120324_232726_inLine +BABEL_BP_107_39098_20120324_232726_outLine +BABEL_BP_107_39114_20120614_184836_inLine +BABEL_BP_107_39114_20120614_184836_outLine +BABEL_BP_107_39287_20120611_013320_inLine +BABEL_BP_107_39287_20120611_013320_outLine +BABEL_BP_107_39809_20120216_013447_inLine +BABEL_BP_107_39809_20120216_013447_outLine +BABEL_BP_107_39889_20120325_135610_inLine +BABEL_BP_107_39889_20120325_135610_outLine +BABEL_BP_107_39963_20120323_223603_inLine +BABEL_BP_107_39963_20120323_223603_outLine +BABEL_BP_107_39968_20120609_221724_inLine +BABEL_BP_107_39968_20120609_221724_outLine +BABEL_BP_107_40040_20120506_220308_outLine +BABEL_BP_107_40168_20120420_180808_inLine +BABEL_BP_107_40168_20120420_180808_outLine +BABEL_BP_107_40197_20120504_174115_inLine +BABEL_BP_107_40197_20120504_174115_outLine +BABEL_BP_107_40809_20120627_194401_inLine +BABEL_BP_107_40809_20120627_194401_outLine +BABEL_BP_107_41075_20120416_005109_inLine +BABEL_BP_107_41075_20120416_005109_outLine +BABEL_BP_107_41512_20120704_113900_inLine +BABEL_BP_107_41512_20120704_113900_outLine +BABEL_BP_107_41561_20120704_233037_inLine +BABEL_BP_107_41561_20120704_233037_outLine +BABEL_BP_107_41686_20120217_004524_inLine +BABEL_BP_107_41686_20120217_004524_outLine +BABEL_BP_107_41733_20120429_210259_inLine +BABEL_BP_107_41733_20120429_210259_outLine +BABEL_BP_107_41949_20120430_155207_inLine +BABEL_BP_107_41949_20120430_155207_outLine +BABEL_BP_107_41989_20120321_185501_inLine +BABEL_BP_107_41989_20120321_185501_outLine +BABEL_BP_107_41989_20120321_190714_inLine +BABEL_BP_107_41989_20120321_190714_outLine +BABEL_BP_107_42212_20120704_203258_inLine +BABEL_BP_107_42212_20120704_203258_outLine +BABEL_BP_107_42229_20120216_204712_inLine +BABEL_BP_107_42229_20120216_204712_outLine +BABEL_BP_107_42420_20120705_031347_inLine +BABEL_BP_107_42420_20120705_031347_outLine +BABEL_BP_107_42768_20120503_180000_inLine +BABEL_BP_107_42768_20120503_180000_outLine +BABEL_BP_107_42788_20120421_142943_inLine +BABEL_BP_107_42788_20120421_142943_outLine +BABEL_BP_107_43317_20120510_000906_inLine +BABEL_BP_107_43317_20120510_000906_outLine +BABEL_BP_107_43383_20120404_222305_inLine +BABEL_BP_107_43383_20120404_222305_outLine +BABEL_BP_107_43991_20120429_013420_inLine +BABEL_BP_107_43991_20120429_013420_outLine +BABEL_BP_107_44023_20120430_233729_inLine +BABEL_BP_107_44023_20120430_233730_outLine +BABEL_BP_107_44038_20120704_200232_inLine +BABEL_BP_107_44038_20120704_200232_outLine +BABEL_BP_107_44117_20120704_023955_inLine +BABEL_BP_107_44117_20120704_023955_outLine +BABEL_BP_107_44209_20120418_205150_inLine +BABEL_BP_107_44209_20120418_205150_outLine +BABEL_BP_107_44500_20120421_220207_inLine +BABEL_BP_107_44500_20120421_220207_outLine +BABEL_BP_107_44649_20120429_012920_inLine +BABEL_BP_107_44649_20120429_012920_outLine +BABEL_BP_107_45106_20120118_183909_inLine +BABEL_BP_107_45106_20120118_183909_outLine +BABEL_BP_107_45145_20120215_141231_inLine +BABEL_BP_107_45145_20120215_141231_outLine +BABEL_BP_107_45214_20120418_132013_inLine +BABEL_BP_107_45214_20120418_132013_outLine +BABEL_BP_107_45472_20120210_160318_inLine +BABEL_BP_107_45472_20120210_160318_outLine +BABEL_BP_107_45642_20120211_232703_inLine +BABEL_BP_107_45642_20120211_232703_outLine +BABEL_BP_107_45655_20120218_191119_inLine +BABEL_BP_107_45655_20120218_191119_outLine +BABEL_BP_107_45678_20120210_172837_inLine +BABEL_BP_107_45678_20120210_172837_outLine +BABEL_BP_107_45681_20120517_003820_inLine +BABEL_BP_107_45681_20120517_003820_outLine +BABEL_BP_107_45786_20120502_200051_inLine +BABEL_BP_107_45786_20120502_200051_outLine +BABEL_BP_107_46269_20120616_171713_inLine +BABEL_BP_107_46269_20120616_171713_outLine +BABEL_BP_107_46409_20120429_201101_inLine +BABEL_BP_107_46409_20120429_201101_outLine +BABEL_BP_107_46427_20120516_213127_inLine +BABEL_BP_107_46427_20120516_213127_outLine +BABEL_BP_107_46593_20120429_172814_inLine +BABEL_BP_107_46593_20120429_172814_outLine +BABEL_BP_107_46813_20120503_214109_inLine +BABEL_BP_107_46813_20120503_214109_outLine +BABEL_BP_107_47185_20120417_000125_inLine +BABEL_BP_107_47185_20120417_000125_outLine +BABEL_BP_107_47249_20120704_173500_inLine +BABEL_BP_107_47249_20120704_173500_outLine +BABEL_BP_107_47429_20120614_125021_inLine +BABEL_BP_107_47429_20120614_125021_outLine +BABEL_BP_107_47469_20120409_195752_inLine +BABEL_BP_107_47469_20120409_195752_outLine +BABEL_BP_107_47634_20120405_165429_inLine +BABEL_BP_107_47634_20120405_165429_outLine +BABEL_BP_107_47733_20120508_112151_inLine +BABEL_BP_107_47733_20120508_112151_outLine +BABEL_BP_107_48061_20120420_003849_inLine +BABEL_BP_107_48061_20120420_003849_outLine +BABEL_BP_107_48061_20120420_005250_inLine +BABEL_BP_107_48061_20120420_005250_outLine +BABEL_BP_107_48072_20120218_181934_inLine +BABEL_BP_107_48072_20120218_181934_outLine +BABEL_BP_107_48072_20120218_183449_inLine +BABEL_BP_107_48072_20120218_183449_outLine +BABEL_BP_107_48317_20120423_021629_inLine +BABEL_BP_107_48317_20120423_021629_outLine +BABEL_BP_107_48404_20120704_162020_inLine +BABEL_BP_107_48404_20120704_162020_outLine +BABEL_BP_107_48410_20120329_220200_inLine +BABEL_BP_107_48410_20120329_220200_outLine +BABEL_BP_107_48536_20120214_212101_inLine +BABEL_BP_107_48536_20120214_212101_outLine +BABEL_BP_107_48645_20120421_221346_inLine +BABEL_BP_107_48645_20120421_221346_outLine +BABEL_BP_107_49042_20120408_181734_inLine +BABEL_BP_107_49042_20120408_181734_outLine +BABEL_BP_107_49173_20120505_204557_inLine +BABEL_BP_107_49173_20120505_204557_outLine +BABEL_BP_107_49306_20120524_204041_inLine +BABEL_BP_107_49306_20120524_204041_outLine +BABEL_BP_107_49624_20120618_024358_inLine +BABEL_BP_107_49624_20120618_024358_outLine +BABEL_BP_107_50101_20120208_164249_inLine +BABEL_BP_107_50101_20120208_164249_outLine +BABEL_BP_107_50101_20120208_170815_inLine +BABEL_BP_107_50101_20120208_170815_outLine +BABEL_BP_107_50416_20120517_120502_inLine +BABEL_BP_107_50416_20120517_120502_outLine +BABEL_BP_107_50555_20120428_205621_inLine +BABEL_BP_107_50555_20120428_205621_outLine +BABEL_BP_107_50597_20120516_212308_inLine +BABEL_BP_107_50597_20120516_212308_outLine +BABEL_BP_107_50763_20120220_151302_inLine +BABEL_BP_107_50763_20120220_151302_outLine +BABEL_BP_107_50915_20120608_150955_inLine +BABEL_BP_107_50915_20120608_150955_outLine +BABEL_BP_107_51149_20120514_203206_inLine +BABEL_BP_107_51149_20120514_203207_outLine +BABEL_BP_107_51791_20120517_004528_inLine +BABEL_BP_107_51791_20120517_004528_outLine +BABEL_BP_107_52024_20120414_193538_inLine +BABEL_BP_107_52024_20120414_193538_outLine +BABEL_BP_107_52325_20120418_011735_inLine +BABEL_BP_107_52325_20120418_011735_outLine +BABEL_BP_107_52446_20120212_002618_inLine +BABEL_BP_107_52446_20120212_002618_outLine +BABEL_BP_107_52515_20120324_020411_inLine +BABEL_BP_107_52515_20120324_020411_outLine +BABEL_BP_107_52606_20120617_195206_inLine +BABEL_BP_107_52606_20120617_195206_outLine +BABEL_BP_107_52642_20120517_000300_inLine +BABEL_BP_107_52642_20120517_000300_outLine +BABEL_BP_107_52691_20120617_160904_inLine +BABEL_BP_107_52691_20120617_160904_outLine +BABEL_BP_107_52900_20120320_150335_inLine +BABEL_BP_107_52900_20120320_150335_outLine +BABEL_BP_107_52913_20120704_121758_inLine +BABEL_BP_107_52913_20120704_121759_outLine +BABEL_BP_107_53179_20120618_003820_inLine +BABEL_BP_107_53179_20120618_003820_outLine +BABEL_BP_107_53278_20120508_192335_inLine +BABEL_BP_107_53278_20120508_192335_outLine +BABEL_BP_107_53352_20120504_210910_inLine +BABEL_BP_107_53352_20120504_210910_outLine +BABEL_BP_107_53429_20120704_123624_inLine +BABEL_BP_107_53429_20120704_123624_outLine +BABEL_BP_107_53500_20120416_172018_inLine +BABEL_BP_107_53500_20120416_172018_outLine +BABEL_BP_107_53989_20120703_234506_inLine +BABEL_BP_107_53989_20120703_234506_outLine +BABEL_BP_107_53989_20120703_235719_inLine +BABEL_BP_107_53989_20120703_235719_outLine +BABEL_BP_107_54339_20120506_215557_inLine +BABEL_BP_107_54339_20120506_215557_outLine +BABEL_BP_107_55100_20120417_210019_inLine +BABEL_BP_107_55100_20120417_210020_outLine +BABEL_BP_107_55121_20120504_003327_inLine +BABEL_BP_107_55121_20120504_003327_outLine +BABEL_BP_107_55144_20120321_012306_inLine +BABEL_BP_107_55144_20120321_012306_outLine +BABEL_BP_107_55399_20120215_193434_inLine +BABEL_BP_107_55399_20120215_193434_outLine +BABEL_BP_107_55450_20120424_185013_inLine +BABEL_BP_107_55450_20120424_185013_outLine +BABEL_BP_107_55678_20120323_211821_inLine +BABEL_BP_107_55678_20120323_211821_outLine +BABEL_BP_107_55786_20120322_173045_inLine +BABEL_BP_107_55786_20120322_173045_outLine +BABEL_BP_107_55820_20120411_162436_inLine +BABEL_BP_107_55820_20120411_162436_outLine +BABEL_BP_107_55823_20120608_172512_inLine +BABEL_BP_107_55823_20120608_172512_outLine +BABEL_BP_107_56342_20120419_132008_inLine +BABEL_BP_107_56342_20120419_132008_outLine +BABEL_BP_107_56591_20120418_002004_inLine +BABEL_BP_107_56591_20120418_002004_outLine +BABEL_BP_107_56868_20120406_013202_inLine +BABEL_BP_107_56868_20120406_013202_outLine +BABEL_BP_107_56943_20120222_201642_inLine +BABEL_BP_107_56943_20120222_201642_outLine +BABEL_BP_107_57071_20120527_184402_inLine +BABEL_BP_107_57071_20120527_184402_outLine +BABEL_BP_107_57277_20120503_200553_inLine +BABEL_BP_107_57277_20120503_200553_outLine +BABEL_BP_107_57551_20120325_225227_inLine +BABEL_BP_107_57551_20120325_225227_outLine +BABEL_BP_107_57609_20120430_223510_inLine +BABEL_BP_107_57609_20120430_223510_outLine +BABEL_BP_107_57625_20120506_021834_inLine +BABEL_BP_107_57625_20120506_021834_outLine +BABEL_BP_107_57724_20120212_213811_inLine +BABEL_BP_107_57724_20120212_213811_outLine +BABEL_BP_107_57907_20120608_160937_inLine +BABEL_BP_107_57907_20120608_160939_outLine +BABEL_BP_107_58157_20120608_181026_inLine +BABEL_BP_107_58157_20120608_181026_outLine +BABEL_BP_107_58413_20120418_134444_inLine +BABEL_BP_107_58413_20120418_134444_outLine +BABEL_BP_107_58923_20120210_190334_inLine +BABEL_BP_107_58923_20120210_190334_outLine +BABEL_BP_107_59028_20120523_205355_inLine +BABEL_BP_107_59028_20120523_205355_outLine +BABEL_BP_107_59147_20120215_152227_inLine +BABEL_BP_107_59147_20120215_152227_outLine +BABEL_BP_107_59544_20120406_170833_inLine +BABEL_BP_107_59544_20120406_170833_outLine +BABEL_BP_107_59671_20120322_225750_inLine +BABEL_BP_107_59671_20120322_225750_outLine +BABEL_BP_107_59746_20120414_161308_inLine +BABEL_BP_107_59746_20120414_161308_outLine +BABEL_BP_107_60250_20120218_193537_inLine +BABEL_BP_107_60250_20120218_193537_outLine +BABEL_BP_107_60848_20120704_171856_inLine +BABEL_BP_107_60848_20120704_171856_outLine +BABEL_BP_107_60995_20120704_234842_inLine +BABEL_BP_107_60995_20120704_234843_outLine +BABEL_BP_107_61203_20120217_182644_inLine +BABEL_BP_107_61203_20120217_182644_outLine +BABEL_BP_107_61762_20120217_131207_inLine +BABEL_BP_107_61762_20120217_131207_outLine +BABEL_BP_107_61822_20120405_153356_inLine +BABEL_BP_107_61822_20120405_153357_outLine +BABEL_BP_107_61936_20120704_141205_inLine +BABEL_BP_107_61936_20120704_141205_outLine +BABEL_BP_107_61988_20120406_134336_inLine +BABEL_BP_107_61988_20120406_134336_outLine +BABEL_BP_107_62286_20120429_193945_inLine +BABEL_BP_107_62286_20120429_193945_outLine +BABEL_BP_107_62589_20120423_001315_inLine +BABEL_BP_107_62589_20120423_001315_outLine +BABEL_BP_107_62589_20120423_002039_inLine +BABEL_BP_107_62589_20120423_002039_outLine +BABEL_BP_107_63320_20120608_012846_inLine +BABEL_BP_107_63320_20120608_012846_outLine +BABEL_BP_107_63491_20120502_145101_inLine +BABEL_BP_107_63491_20120502_145101_outLine +BABEL_BP_107_64185_20120504_161653_inLine +BABEL_BP_107_64185_20120504_161653_outLine +BABEL_BP_107_64404_20120206_185707_inLine +BABEL_BP_107_64404_20120206_185708_outLine +BABEL_BP_107_64661_20120325_212204_inLine +BABEL_BP_107_64661_20120325_212204_outLine +BABEL_BP_107_64946_20120517_001754_inLine +BABEL_BP_107_64946_20120517_001754_outLine +BABEL_BP_107_65069_20120421_135835_inLine +BABEL_BP_107_65069_20120421_135835_outLine +BABEL_BP_107_65371_20120507_195517_inLine +BABEL_BP_107_65371_20120507_195517_outLine +BABEL_BP_107_65415_20120220_153755_inLine +BABEL_BP_107_65415_20120220_153755_outLine +BABEL_BP_107_65443_20120220_152901_inLine +BABEL_BP_107_65443_20120220_152901_outLine +BABEL_BP_107_65601_20120417_001124_inLine +BABEL_BP_107_65601_20120417_001124_outLine +BABEL_BP_107_65629_20120322_191551_inLine +BABEL_BP_107_65629_20120322_191551_outLine +BABEL_BP_107_65656_20120503_233657_inLine +BABEL_BP_107_65656_20120503_233658_outLine +BABEL_BP_107_65717_20120414_151906_inLine +BABEL_BP_107_65717_20120414_151906_outLine +BABEL_BP_107_65783_20120429_153408_inLine +BABEL_BP_107_65783_20120429_153408_outLine +BABEL_BP_107_65923_20120420_093839_inLine +BABEL_BP_107_65923_20120420_093839_outLine +BABEL_BP_107_66082_20120608_111438_inLine +BABEL_BP_107_66082_20120608_111438_outLine +BABEL_BP_107_66101_20120426_213014_inLine +BABEL_BP_107_66101_20120426_213014_outLine +BABEL_BP_107_66103_20120505_220240_inLine +BABEL_BP_107_66103_20120505_220240_outLine +BABEL_BP_107_66275_20120503_171050_inLine +BABEL_BP_107_66275_20120503_171050_outLine +BABEL_BP_107_66330_20120502_001133_inLine +BABEL_BP_107_66330_20120502_001133_outLine +BABEL_BP_107_66441_20120324_190814_inLine +BABEL_BP_107_66441_20120324_190814_outLine +BABEL_BP_107_66668_20120212_211947_inLine +BABEL_BP_107_66668_20120212_211947_outLine +BABEL_BP_107_66784_20120616_151422_inLine +BABEL_BP_107_66784_20120616_151422_outLine +BABEL_BP_107_66798_20120404_220411_inLine +BABEL_BP_107_66798_20120404_220411_outLine +BABEL_BP_107_67150_20120618_004347_inLine +BABEL_BP_107_67150_20120618_004347_outLine +BABEL_BP_107_67411_20120322_141025_inLine +BABEL_BP_107_67411_20120322_141025_outLine +BABEL_BP_107_67733_20120215_120553_inLine +BABEL_BP_107_67733_20120215_120553_outLine +BABEL_BP_107_67775_20120502_193035_inLine +BABEL_BP_107_67775_20120502_193035_outLine +BABEL_BP_107_68028_20120502_121140_inLine +BABEL_BP_107_68028_20120502_121140_outLine +BABEL_BP_107_68136_20120416_173551_inLine +BABEL_BP_107_68136_20120416_173551_outLine +BABEL_BP_107_68239_20120608_131431_inLine +BABEL_BP_107_68239_20120608_131431_outLine +BABEL_BP_107_68337_20120404_230000_inLine +BABEL_BP_107_68337_20120404_230000_outLine +BABEL_BP_107_68861_20120323_171053_inLine +BABEL_BP_107_68861_20120323_171053_outLine +BABEL_BP_107_68861_20120323_180450_inLine +BABEL_BP_107_68861_20120323_180450_outLine +BABEL_BP_107_69052_20120417_002628_inLine +BABEL_BP_107_69052_20120417_002628_outLine +BABEL_BP_107_69230_20120703_133459_inLine +BABEL_BP_107_69230_20120703_133459_outLine +BABEL_BP_107_69236_20120214_230344_inLine +BABEL_BP_107_69236_20120214_230344_outLine +BABEL_BP_107_69368_20120211_170226_inLine +BABEL_BP_107_69368_20120211_170226_outLine +BABEL_BP_107_69446_20120416_020122_inLine +BABEL_BP_107_69446_20120416_020122_outLine +BABEL_BP_107_69473_20120605_230319_inLine +BABEL_BP_107_69473_20120605_230319_outLine +BABEL_BP_107_69548_20120213_023955_inLine +BABEL_BP_107_69548_20120213_023955_outLine +BABEL_BP_107_69621_20120213_130748_inLine +BABEL_BP_107_69621_20120213_130748_outLine +BABEL_BP_107_69650_20120323_023553_inLine +BABEL_BP_107_69650_20120323_023553_outLine +BABEL_BP_107_69764_20120324_234039_inLine +BABEL_BP_107_69764_20120324_234039_outLine +BABEL_BP_107_70643_20120427_194211_inLine +BABEL_BP_107_70643_20120427_194211_outLine +BABEL_BP_107_70680_20120201_144426_inLine +BABEL_BP_107_70680_20120201_144426_outLine +BABEL_BP_107_70965_20120506_175829_inLine +BABEL_BP_107_70965_20120506_175829_outLine +BABEL_BP_107_71160_20120616_001355_inLine +BABEL_BP_107_71160_20120616_001355_outLine +BABEL_BP_107_72011_20120704_231031_inLine +BABEL_BP_107_72011_20120704_231031_outLine +BABEL_BP_107_72141_20120322_223344_inLine +BABEL_BP_107_72141_20120322_223344_outLine +BABEL_BP_107_72234_20120511_134939_inLine +BABEL_BP_107_72234_20120511_134939_outLine +BABEL_BP_107_72234_20120511_140008_inLine +BABEL_BP_107_72234_20120511_140008_outLine +BABEL_BP_107_72746_20120429_003515_inLine +BABEL_BP_107_72746_20120429_003515_outLine +BABEL_BP_107_72799_20120425_133035_inLine +BABEL_BP_107_72799_20120425_133035_outLine +BABEL_BP_107_72907_20120505_105259_inLine +BABEL_BP_107_72907_20120505_105259_outLine +BABEL_BP_107_73050_20120426_114239_inLine +BABEL_BP_107_73050_20120426_114239_outLine +BABEL_BP_107_73059_20120425_012258_inLine +BABEL_BP_107_73059_20120425_012258_outLine +BABEL_BP_107_73072_20120322_141121_inLine +BABEL_BP_107_73072_20120322_141121_outLine +BABEL_BP_107_73122_20120501_124450_inLine +BABEL_BP_107_73122_20120501_124450_outLine +BABEL_BP_107_73170_20120322_151236_inLine +BABEL_BP_107_73170_20120322_151236_outLine +BABEL_BP_107_73780_20120613_200802_inLine +BABEL_BP_107_73780_20120613_200802_outLine +BABEL_BP_107_73786_20120323_222826_inLine +BABEL_BP_107_73786_20120323_222826_outLine +BABEL_BP_107_73923_20120118_183938_inLine +BABEL_BP_107_73923_20120118_183938_outLine +BABEL_BP_107_74368_20120424_185039_inLine +BABEL_BP_107_74368_20120424_185039_outLine +BABEL_BP_107_74508_20120418_002925_inLine +BABEL_BP_107_74508_20120418_002925_outLine +BABEL_BP_107_74607_20120426_001241_inLine +BABEL_BP_107_74607_20120426_001241_outLine +BABEL_BP_107_74884_20120323_135739_inLine +BABEL_BP_107_74884_20120323_135739_outLine +BABEL_BP_107_75151_20120611_195147_inLine +BABEL_BP_107_75151_20120611_195147_outLine +BABEL_BP_107_75354_20120506_150750_inLine +BABEL_BP_107_75354_20120506_150750_outLine +BABEL_BP_107_75740_20120216_215302_inLine +BABEL_BP_107_75740_20120216_215302_outLine +BABEL_BP_107_75871_20120214_025447_inLine +BABEL_BP_107_75871_20120214_025447_outLine +BABEL_BP_107_75932_20120419_222819_inLine +BABEL_BP_107_75932_20120419_222819_outLine +BABEL_BP_107_76002_20120608_001301_inLine +BABEL_BP_107_76002_20120608_001301_outLine +BABEL_BP_107_76331_20120417_020306_inLine +BABEL_BP_107_76331_20120417_020306_outLine +BABEL_BP_107_76333_20120418_131111_inLine +BABEL_BP_107_76333_20120418_131111_outLine +BABEL_BP_107_76745_20120608_120713_inLine +BABEL_BP_107_76745_20120608_120713_outLine +BABEL_BP_107_77137_20120424_021726_inLine +BABEL_BP_107_77137_20120424_021726_outLine +BABEL_BP_107_77342_20120613_025311_inLine +BABEL_BP_107_77342_20120613_025311_outLine +BABEL_BP_107_77465_20120422_011705_inLine +BABEL_BP_107_77465_20120422_011705_outLine +BABEL_BP_107_77483_20120412_193453_inLine +BABEL_BP_107_77483_20120412_193453_outLine +BABEL_BP_107_77485_20120612_135036_inLine +BABEL_BP_107_77485_20120612_135036_outLine +BABEL_BP_107_77584_20120411_172119_inLine +BABEL_BP_107_77584_20120411_172119_outLine +BABEL_BP_107_77811_20120616_161504_inLine +BABEL_BP_107_77811_20120616_161504_outLine +BABEL_BP_107_77965_20120215_010556_inLine +BABEL_BP_107_77965_20120215_010556_outLine +BABEL_BP_107_78046_20120508_124043_inLine +BABEL_BP_107_78046_20120508_124043_outLine +BABEL_BP_107_78114_20120418_223932_inLine +BABEL_BP_107_78114_20120418_223932_outLine +BABEL_BP_107_78114_20120418_225258_inLine +BABEL_BP_107_78114_20120418_225258_outLine +BABEL_BP_107_78245_20120321_225726_inLine +BABEL_BP_107_78245_20120321_225726_outLine +BABEL_BP_107_78290_20120425_225137_inLine +BABEL_BP_107_78290_20120425_225137_outLine +BABEL_BP_107_78583_20120505_001318_inLine +BABEL_BP_107_78583_20120505_001318_outLine +BABEL_BP_107_78728_20120320_163004_inLine +BABEL_BP_107_78728_20120320_163004_outLine +BABEL_BP_107_78879_20120322_210341_inLine +BABEL_BP_107_78879_20120322_210341_outLine +BABEL_BP_107_79618_20120322_195037_inLine +BABEL_BP_107_79618_20120322_195037_outLine +BABEL_BP_107_79698_20120614_142804_inLine +BABEL_BP_107_79698_20120614_142804_outLine +BABEL_BP_107_79899_20120507_153432_inLine +BABEL_BP_107_79899_20120507_153432_outLine +BABEL_BP_107_80068_20120419_172811_inLine +BABEL_BP_107_80068_20120419_172811_outLine +BABEL_BP_107_80075_20120418_223142_inLine +BABEL_BP_107_80075_20120418_223142_outLine +BABEL_BP_107_80156_20120325_205810_inLine +BABEL_BP_107_80156_20120325_205810_outLine +BABEL_BP_107_80195_20120328_024036_inLine +BABEL_BP_107_80195_20120328_024036_outLine +BABEL_BP_107_80247_20120429_181855_inLine +BABEL_BP_107_80247_20120429_181855_outLine +BABEL_BP_107_80856_20120325_214845_inLine +BABEL_BP_107_80856_20120325_214845_outLine +BABEL_BP_107_81015_20120418_212020_inLine +BABEL_BP_107_81015_20120418_212020_outLine +BABEL_BP_107_81070_20120612_140617_inLine +BABEL_BP_107_81070_20120612_140617_outLine +BABEL_BP_107_81084_20120328_220200_inLine +BABEL_BP_107_81084_20120328_220200_outLine +BABEL_BP_107_81119_20120418_221853_inLine +BABEL_BP_107_81119_20120418_221853_outLine +BABEL_BP_107_81261_20120324_015429_inLine +BABEL_BP_107_81261_20120324_015429_outLine +BABEL_BP_107_81587_20120429_185902_inLine +BABEL_BP_107_81587_20120429_185902_outLine +BABEL_BP_107_81642_20120504_013042_inLine +BABEL_BP_107_81642_20120504_013042_outLine +BABEL_BP_107_81647_20120425_231333_inLine +BABEL_BP_107_81647_20120425_231333_outLine +BABEL_BP_107_81799_20120506_220843_inLine +BABEL_BP_107_81799_20120506_220843_outLine +BABEL_BP_107_81820_20120506_004426_inLine +BABEL_BP_107_81820_20120506_004426_outLine +BABEL_BP_107_81944_20120607_131513_inLine +BABEL_BP_107_81944_20120607_131513_outLine +BABEL_BP_107_82009_20120503_174403_inLine +BABEL_BP_107_82009_20120503_174403_outLine +BABEL_BP_107_82023_20120217_190453_inLine +BABEL_BP_107_82023_20120217_190453_outLine +BABEL_BP_107_82408_20120216_020857_inLine +BABEL_BP_107_82408_20120216_020857_outLine +BABEL_BP_107_82409_20120507_104757_inLine +BABEL_BP_107_82409_20120507_104757_outLine +BABEL_BP_107_82443_20120705_035534_inLine +BABEL_BP_107_82443_20120705_035535_outLine +BABEL_BP_107_82484_20120409_191254_inLine +BABEL_BP_107_82484_20120409_191254_outLine +BABEL_BP_107_82881_20120212_142555_inLine +BABEL_BP_107_82881_20120212_142555_outLine +BABEL_BP_107_83186_20120414_181142_inLine +BABEL_BP_107_83186_20120414_181142_outLine +BABEL_BP_107_83493_20120509_144229_inLine +BABEL_BP_107_83493_20120509_144229_outLine +BABEL_BP_107_83585_20120429_194403_inLine +BABEL_BP_107_83585_20120429_194403_outLine +BABEL_BP_107_83791_20120329_034633_inLine +BABEL_BP_107_83791_20120329_034633_outLine +BABEL_BP_107_84394_20120426_000543_inLine +BABEL_BP_107_84394_20120426_000543_outLine +BABEL_BP_107_84394_20120426_001306_inLine +BABEL_BP_107_84394_20120426_001306_outLine +BABEL_BP_107_84439_20120418_011204_inLine +BABEL_BP_107_84439_20120418_011204_outLine +BABEL_BP_107_84491_20120430_203802_inLine +BABEL_BP_107_84491_20120430_203802_outLine +BABEL_BP_107_84608_20120421_181859_inLine +BABEL_BP_107_84608_20120421_181859_outLine +BABEL_BP_107_84700_20120501_125141_inLine +BABEL_BP_107_84700_20120501_125141_outLine +BABEL_BP_107_84865_20120618_002645_inLine +BABEL_BP_107_84865_20120618_002645_outLine +BABEL_BP_107_84916_20120427_012731_inLine +BABEL_BP_107_84916_20120427_012731_outLine +BABEL_BP_107_84980_20120419_172354_inLine +BABEL_BP_107_84980_20120419_172354_outLine +BABEL_BP_107_85719_20120423_181434_inLine +BABEL_BP_107_85719_20120423_181434_outLine +BABEL_BP_107_85752_20120607_210520_inLine +BABEL_BP_107_85752_20120607_210520_outLine +BABEL_BP_107_85948_20120212_131910_inLine +BABEL_BP_107_85948_20120212_131910_outLine +BABEL_BP_107_86004_20120324_175639_inLine +BABEL_BP_107_86004_20120324_175639_outLine +BABEL_BP_107_86900_20120216_203256_inLine +BABEL_BP_107_86900_20120216_203256_outLine +BABEL_BP_107_86956_20120322_203435_inLine +BABEL_BP_107_86956_20120322_203435_outLine +BABEL_BP_107_87059_20120704_001703_inLine +BABEL_BP_107_87059_20120704_001703_outLine +BABEL_BP_107_87077_20120421_193746_inLine +BABEL_BP_107_87077_20120421_193746_outLine +BABEL_BP_107_87107_20120321_205615_inLine +BABEL_BP_107_87107_20120321_205615_outLine +BABEL_BP_107_87107_20120321_234308_inLine +BABEL_BP_107_87107_20120321_234308_outLine +BABEL_BP_107_87234_20120704_120118_inLine +BABEL_BP_107_87234_20120704_120118_outLine +BABEL_BP_107_87351_20120330_014139_inLine +BABEL_BP_107_87351_20120330_014139_outLine +BABEL_BP_107_87520_20120414_023319_inLine +BABEL_BP_107_87520_20120414_023319_outLine +BABEL_BP_107_87607_20120516_233058_inLine +BABEL_BP_107_87607_20120516_233058_outLine +BABEL_BP_107_87634_20120208_165319_inLine +BABEL_BP_107_87634_20120208_165319_outLine +BABEL_BP_107_87961_20120324_022603_inLine +BABEL_BP_107_87961_20120324_022603_outLine +BABEL_BP_107_88245_20120511_235523_inLine +BABEL_BP_107_88245_20120511_235523_outLine +BABEL_BP_107_88385_20120502_200409_inLine +BABEL_BP_107_88385_20120502_200409_outLine +BABEL_BP_107_88385_20120502_201320_inLine +BABEL_BP_107_88385_20120502_201320_outLine +BABEL_BP_107_88464_20120503_003553_inLine +BABEL_BP_107_88464_20120503_003553_outLine +BABEL_BP_107_88932_20120417_195406_inLine +BABEL_BP_107_88932_20120417_195406_outLine +BABEL_BP_107_88982_20120506_154243_inLine +BABEL_BP_107_88982_20120506_154243_outLine +BABEL_BP_107_89301_20120429_183901_inLine +BABEL_BP_107_89301_20120429_183901_outLine +BABEL_BP_107_89345_20120322_214445_inLine +BABEL_BP_107_89345_20120322_214445_outLine +BABEL_BP_107_89345_20120322_220001_inLine +BABEL_BP_107_89345_20120322_220001_outLine +BABEL_BP_107_89573_20120422_162720_inLine +BABEL_BP_107_89573_20120422_162720_outLine +BABEL_BP_107_89583_20120425_142134_inLine +BABEL_BP_107_89583_20120425_142134_outLine +BABEL_BP_107_89867_20120324_204851_inLine +BABEL_BP_107_89867_20120324_204851_outLine +BABEL_BP_107_90046_20120613_193455_inLine +BABEL_BP_107_90046_20120613_193455_outLine +BABEL_BP_107_90055_20120220_173056_inLine +BABEL_BP_107_90055_20120220_173056_outLine +BABEL_BP_107_90127_20120429_190926_inLine +BABEL_BP_107_90127_20120429_190926_outLine +BABEL_BP_107_90389_20120510_233725_inLine +BABEL_BP_107_90389_20120510_233725_outLine +BABEL_BP_107_90436_20120507_172546_inLine +BABEL_BP_107_90436_20120507_172546_outLine +BABEL_BP_107_90511_20120212_010634_inLine +BABEL_BP_107_90511_20120212_010634_outLine +BABEL_BP_107_90730_20120627_132153_inLine +BABEL_BP_107_90730_20120627_132153_outLine +BABEL_BP_107_90730_20120627_133239_inLine +BABEL_BP_107_90730_20120627_133239_outLine +BABEL_BP_107_90810_20120217_200922_inLine +BABEL_BP_107_90810_20120217_200922_outLine +BABEL_BP_107_90834_20120212_143912_inLine +BABEL_BP_107_90834_20120212_143912_outLine +BABEL_BP_107_91143_20120422_002758_inLine +BABEL_BP_107_91143_20120422_002758_outLine +BABEL_BP_107_91171_20120414_012621_inLine +BABEL_BP_107_91171_20120414_012621_outLine +BABEL_BP_107_91386_20120703_235839_inLine +BABEL_BP_107_91386_20120703_235839_outLine +BABEL_BP_107_91677_20120422_141358_inLine +BABEL_BP_107_91677_20120422_141358_outLine +BABEL_BP_107_91703_20120617_235231_inLine +BABEL_BP_107_92308_20120430_133906_inLine +BABEL_BP_107_92308_20120430_133906_outLine +BABEL_BP_107_92642_20120211_005506_inLine +BABEL_BP_107_92642_20120211_005506_outLine +BABEL_BP_107_92752_20120421_184804_inLine +BABEL_BP_107_92752_20120421_184805_outLine +BABEL_BP_107_92820_20120617_124233_inLine +BABEL_BP_107_92820_20120617_124233_outLine +BABEL_BP_107_92852_20120418_234454_inLine +BABEL_BP_107_92852_20120418_234454_outLine +BABEL_BP_107_93000_20120325_233431_inLine +BABEL_BP_107_93000_20120325_233431_outLine +BABEL_BP_107_93151_20120501_140536_inLine +BABEL_BP_107_93151_20120501_140536_outLine +BABEL_BP_107_93192_20120322_180400_inLine +BABEL_BP_107_93192_20120322_180400_outLine +BABEL_BP_107_93277_20120510_183523_inLine +BABEL_BP_107_93277_20120510_183523_outLine +BABEL_BP_107_93314_20120501_134510_inLine +BABEL_BP_107_93314_20120501_134510_outLine +BABEL_BP_107_93436_20120611_021137_inLine +BABEL_BP_107_93607_20120418_014651_inLine +BABEL_BP_107_93607_20120418_014651_outLine +BABEL_BP_107_93643_20120212_175939_inLine +BABEL_BP_107_93643_20120212_175939_outLine +BABEL_BP_107_93811_20120418_213351_inLine +BABEL_BP_107_93811_20120418_213351_outLine +BABEL_BP_107_94168_20120326_171855_inLine +BABEL_BP_107_94168_20120326_171855_outLine +BABEL_BP_107_94235_20120428_004200_inLine +BABEL_BP_107_94235_20120428_004200_outLine +BABEL_BP_107_94752_20120218_144213_inLine +BABEL_BP_107_94752_20120218_144213_outLine +BABEL_BP_107_95350_20120325_000241_inLine +BABEL_BP_107_95350_20120325_000241_outLine +BABEL_BP_107_95534_20120608_005148_inLine +BABEL_BP_107_95534_20120608_005148_outLine +BABEL_BP_107_95650_20120208_163126_inLine +BABEL_BP_107_95650_20120208_163126_outLine +BABEL_BP_107_95736_20120323_154852_inLine +BABEL_BP_107_95736_20120323_154852_outLine +BABEL_BP_107_95849_20120704_011515_inLine +BABEL_BP_107_95849_20120704_011515_outLine +BABEL_BP_107_95893_20120501_114843_inLine +BABEL_BP_107_95893_20120501_114843_outLine +BABEL_BP_107_95952_20120607_145525_inLine +BABEL_BP_107_95952_20120607_145526_outLine +BABEL_BP_107_96108_20120421_194651_inLine +BABEL_BP_107_96108_20120421_194651_outLine +BABEL_BP_107_96347_20120212_202200_inLine +BABEL_BP_107_96347_20120212_202200_outLine +BABEL_BP_107_96463_20120507_233133_inLine +BABEL_BP_107_96463_20120507_233133_outLine +BABEL_BP_107_96636_20120421_193514_inLine +BABEL_BP_107_96636_20120421_193514_outLine +BABEL_BP_107_96636_20120421_195252_inLine +BABEL_BP_107_96636_20120421_195252_outLine +BABEL_BP_107_96788_20120409_195914_inLine +BABEL_BP_107_96788_20120409_195914_outLine +BABEL_BP_107_97004_20120704_194048_inLine +BABEL_BP_107_97004_20120704_194048_outLine +BABEL_BP_107_97230_20120612_142451_inLine +BABEL_BP_107_97230_20120612_142451_outLine +BABEL_BP_107_97254_20120422_153600_inLine +BABEL_BP_107_97254_20120422_153600_outLine +BABEL_BP_107_97298_20120704_201748_inLine +BABEL_BP_107_97298_20120704_201748_outLine +BABEL_BP_107_97590_20120616_165917_inLine +BABEL_BP_107_97590_20120616_165917_outLine +BABEL_BP_107_97635_20120617_233435_inLine +BABEL_BP_107_97635_20120617_233435_outLine +BABEL_BP_107_97699_20120618_005543_inLine +BABEL_BP_107_97699_20120618_005543_outLine +BABEL_BP_107_97797_20120617_234645_inLine +BABEL_BP_107_97797_20120617_234645_outLine +BABEL_BP_107_97941_20120423_201113_inLine +BABEL_BP_107_97941_20120423_201113_outLine +BABEL_BP_107_97941_20120423_201934_inLine +BABEL_BP_107_97941_20120423_201934_outLine +BABEL_BP_107_98279_20120509_172421_inLine +BABEL_BP_107_98279_20120509_172421_outLine +BABEL_BP_107_98762_20120612_160310_inLine +BABEL_BP_107_98762_20120612_160310_outLine +BABEL_BP_107_99514_20120505_142249_inLine +BABEL_BP_107_99514_20120505_142249_outLine +BABEL_BP_107_99697_20120424_211952_inLine +BABEL_BP_107_99697_20120424_211952_outLine +BABEL_BP_107_99709_20120510_011731_inLine +BABEL_BP_107_99709_20120510_011731_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/evalpart1.list b/egs/babel/s5d/conf/lists/107-vietnamese/evalpart1.list new file mode 100644 index 00000000000..81896827fbf --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/evalpart1.list @@ -0,0 +1,194 @@ +BABEL_BP_107_11203_20120415_212056_inLine +BABEL_BP_107_11203_20120415_212056_outLine +BABEL_BP_107_11824_20120413_213002_inLine +BABEL_BP_107_11824_20120413_213002_outLine +BABEL_BP_107_14389_20120617_164138_inLine +BABEL_BP_107_14389_20120617_164138_outLine +BABEL_BP_107_14874_20120320_190424_inLine +BABEL_BP_107_14874_20120320_190424_outLine +BABEL_BP_107_14874_20120320_192210_inLine +BABEL_BP_107_14874_20120320_192210_outLine +BABEL_BP_107_15859_20120419_133516_inLine +BABEL_BP_107_15859_20120419_133516_outLine +BABEL_BP_107_17900_20120323_015142_inLine +BABEL_BP_107_17900_20120323_015142_outLine +BABEL_BP_107_20685_20120222_210447_inLine +BABEL_BP_107_20685_20120222_210447_outLine +BABEL_BP_107_20775_20120502_214146_inLine +BABEL_BP_107_20775_20120502_214146_outLine +BABEL_BP_107_22566_20120704_023628_inLine +BABEL_BP_107_22566_20120704_023628_outLine +BABEL_BP_107_24379_20120422_173418_inLine +BABEL_BP_107_24379_20120422_173418_outLine +BABEL_BP_107_24431_20120215_202205_inLine +BABEL_BP_107_24431_20120215_202205_outLine +BABEL_BP_107_25502_20120217_005526_inLine +BABEL_BP_107_25502_20120217_005526_outLine +BABEL_BP_107_25871_20120422_181122_inLine +BABEL_BP_107_25871_20120422_181122_outLine +BABEL_BP_107_27605_20120329_015050_inLine +BABEL_BP_107_27605_20120329_015050_outLine +BABEL_BP_107_27645_20120501_005559_inLine +BABEL_BP_107_27645_20120501_005559_outLine +BABEL_BP_107_28754_20120417_233136_inLine +BABEL_BP_107_28754_20120417_233136_outLine +BABEL_BP_107_29133_20120212_223742_inLine +BABEL_BP_107_29133_20120212_223742_outLine +BABEL_BP_107_29512_20120426_133304_inLine +BABEL_BP_107_29512_20120426_133304_outLine +BABEL_BP_107_31256_20120424_173937_inLine +BABEL_BP_107_31256_20120424_173937_outLine +BABEL_BP_107_32452_20120417_025731_inLine +BABEL_BP_107_32452_20120417_025731_outLine +BABEL_BP_107_32830_20120217_010905_inLine +BABEL_BP_107_32830_20120217_010905_outLine +BABEL_BP_107_32962_20120417_002922_inLine +BABEL_BP_107_32962_20120417_002922_outLine +BABEL_BP_107_34357_20120608_192929_inLine +BABEL_BP_107_34357_20120608_192929_outLine +BABEL_BP_107_34439_20120514_155943_inLine +BABEL_BP_107_34439_20120514_155943_outLine +BABEL_BP_107_35896_20120426_160252_inLine +BABEL_BP_107_35896_20120426_160252_outLine +BABEL_BP_107_36143_20120217_012635_inLine +BABEL_BP_107_36143_20120217_012635_outLine +BABEL_BP_107_36143_20120217_175752_inLine +BABEL_BP_107_36143_20120217_175752_outLine +BABEL_BP_107_37185_20120608_122828_inLine +BABEL_BP_107_37185_20120608_122828_outLine +BABEL_BP_107_37842_20120513_023632_inLine +BABEL_BP_107_37842_20120513_023632_outLine +BABEL_BP_107_38635_20120424_231446_inLine +BABEL_BP_107_38635_20120424_231446_outLine +BABEL_BP_107_38863_20120614_173605_inLine +BABEL_BP_107_38863_20120614_173605_outLine +BABEL_BP_107_38863_20120614_174335_inLine +BABEL_BP_107_38863_20120614_174335_outLine +BABEL_BP_107_38863_20120614_175009_inLine +BABEL_BP_107_38863_20120614_175009_outLine +BABEL_BP_107_41989_20120321_185501_inLine +BABEL_BP_107_41989_20120321_185501_outLine +BABEL_BP_107_41989_20120321_190714_inLine +BABEL_BP_107_41989_20120321_190714_outLine +BABEL_BP_107_42212_20120704_203258_inLine +BABEL_BP_107_42212_20120704_203258_outLine +BABEL_BP_107_42768_20120503_180000_inLine +BABEL_BP_107_42768_20120503_180000_outLine +BABEL_BP_107_43991_20120429_013420_inLine +BABEL_BP_107_43991_20120429_013420_outLine +BABEL_BP_107_44117_20120704_023955_inLine +BABEL_BP_107_44117_20120704_023955_outLine +BABEL_BP_107_45106_20120118_183909_inLine +BABEL_BP_107_45106_20120118_183909_outLine +BABEL_BP_107_45786_20120502_200051_inLine +BABEL_BP_107_45786_20120502_200051_outLine +BABEL_BP_107_46427_20120516_213127_inLine +BABEL_BP_107_46427_20120516_213127_outLine +BABEL_BP_107_46813_20120503_214109_inLine +BABEL_BP_107_46813_20120503_214109_outLine +BABEL_BP_107_47185_20120417_000125_inLine +BABEL_BP_107_47185_20120417_000125_outLine +BABEL_BP_107_47249_20120704_173500_inLine +BABEL_BP_107_47249_20120704_173500_outLine +BABEL_BP_107_48404_20120704_162020_inLine +BABEL_BP_107_48404_20120704_162020_outLine +BABEL_BP_107_50915_20120608_150955_inLine +BABEL_BP_107_50915_20120608_150955_outLine +BABEL_BP_107_51791_20120517_004528_inLine +BABEL_BP_107_51791_20120517_004528_outLine +BABEL_BP_107_52024_20120414_193538_inLine +BABEL_BP_107_52024_20120414_193538_outLine +BABEL_BP_107_52691_20120617_160904_inLine +BABEL_BP_107_52691_20120617_160904_outLine +BABEL_BP_107_52900_20120320_150335_inLine +BABEL_BP_107_52900_20120320_150335_outLine +BABEL_BP_107_53278_20120508_192335_inLine +BABEL_BP_107_53278_20120508_192335_outLine +BABEL_BP_107_55121_20120504_003327_inLine +BABEL_BP_107_55121_20120504_003327_outLine +BABEL_BP_107_55678_20120323_211821_inLine +BABEL_BP_107_55678_20120323_211821_outLine +BABEL_BP_107_56342_20120419_132008_inLine +BABEL_BP_107_56342_20120419_132008_outLine +BABEL_BP_107_57551_20120325_225227_inLine +BABEL_BP_107_57551_20120325_225227_outLine +BABEL_BP_107_57625_20120506_021834_inLine +BABEL_BP_107_57625_20120506_021834_outLine +BABEL_BP_107_59671_20120322_225750_inLine +BABEL_BP_107_59671_20120322_225750_outLine +BABEL_BP_107_60250_20120218_193537_inLine +BABEL_BP_107_60250_20120218_193537_outLine +BABEL_BP_107_61988_20120406_134336_inLine +BABEL_BP_107_61988_20120406_134336_outLine +BABEL_BP_107_63491_20120502_145101_inLine +BABEL_BP_107_63491_20120502_145101_outLine +BABEL_BP_107_65415_20120220_153755_inLine +BABEL_BP_107_65415_20120220_153755_outLine +BABEL_BP_107_65783_20120429_153408_inLine +BABEL_BP_107_65783_20120429_153408_outLine +BABEL_BP_107_66784_20120616_151422_inLine +BABEL_BP_107_66784_20120616_151422_outLine +BABEL_BP_107_68337_20120404_230000_inLine +BABEL_BP_107_68337_20120404_230000_outLine +BABEL_BP_107_69236_20120214_230344_inLine +BABEL_BP_107_69236_20120214_230344_outLine +BABEL_BP_107_70643_20120427_194211_inLine +BABEL_BP_107_70643_20120427_194211_outLine +BABEL_BP_107_72011_20120704_231031_inLine +BABEL_BP_107_72011_20120704_231031_outLine +BABEL_BP_107_73122_20120501_124450_inLine +BABEL_BP_107_73122_20120501_124450_outLine +BABEL_BP_107_75932_20120419_222819_inLine +BABEL_BP_107_75932_20120419_222819_outLine +BABEL_BP_107_76002_20120608_001301_inLine +BABEL_BP_107_76002_20120608_001301_outLine +BABEL_BP_107_76745_20120608_120713_inLine +BABEL_BP_107_76745_20120608_120713_outLine +BABEL_BP_107_78245_20120321_225726_inLine +BABEL_BP_107_78245_20120321_225726_outLine +BABEL_BP_107_79618_20120322_195037_inLine +BABEL_BP_107_79618_20120322_195037_outLine +BABEL_BP_107_79698_20120614_142804_inLine +BABEL_BP_107_79698_20120614_142804_outLine +BABEL_BP_107_80247_20120429_181855_inLine +BABEL_BP_107_80247_20120429_181855_outLine +BABEL_BP_107_81261_20120324_015429_inLine +BABEL_BP_107_81261_20120324_015429_outLine +BABEL_BP_107_81642_20120504_013042_inLine +BABEL_BP_107_81642_20120504_013042_outLine +BABEL_BP_107_81647_20120425_231333_inLine +BABEL_BP_107_81647_20120425_231333_outLine +BABEL_BP_107_81944_20120607_131513_inLine +BABEL_BP_107_81944_20120607_131513_outLine +BABEL_BP_107_83186_20120414_181142_inLine +BABEL_BP_107_83186_20120414_181142_outLine +BABEL_BP_107_84700_20120501_125141_inLine +BABEL_BP_107_84700_20120501_125141_outLine +BABEL_BP_107_84916_20120427_012731_inLine +BABEL_BP_107_84916_20120427_012731_outLine +BABEL_BP_107_85719_20120423_181434_inLine +BABEL_BP_107_85719_20120423_181434_outLine +BABEL_BP_107_87634_20120208_165319_inLine +BABEL_BP_107_87634_20120208_165319_outLine +BABEL_BP_107_88385_20120502_200409_inLine +BABEL_BP_107_88385_20120502_200409_outLine +BABEL_BP_107_88385_20120502_201320_inLine +BABEL_BP_107_88385_20120502_201320_outLine +BABEL_BP_107_92642_20120211_005506_inLine +BABEL_BP_107_92642_20120211_005506_outLine +BABEL_BP_107_92852_20120418_234454_inLine +BABEL_BP_107_92852_20120418_234454_outLine +BABEL_BP_107_93277_20120510_183523_inLine +BABEL_BP_107_93277_20120510_183523_outLine +BABEL_BP_107_95952_20120607_145525_inLine +BABEL_BP_107_95952_20120607_145526_outLine +BABEL_BP_107_97941_20120423_201113_inLine +BABEL_BP_107_97941_20120423_201113_outLine +BABEL_BP_107_97941_20120423_201934_inLine +BABEL_BP_107_97941_20120423_201934_outLine +BABEL_BP_107_98279_20120509_172421_inLine +BABEL_BP_107_98279_20120509_172421_outLine +BABEL_BP_107_98762_20120612_160310_inLine +BABEL_BP_107_98762_20120612_160310_outLine +BABEL_BP_107_99697_20120424_211952_inLine +BABEL_BP_107_99697_20120424_211952_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/train.FullLP.list b/egs/babel/s5d/conf/lists/107-vietnamese/train.FullLP.list new file mode 100644 index 00000000000..522b95fc080 --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/train.FullLP.list @@ -0,0 +1,1042 @@ +BABEL_BP_107_10033_20120208_180820_outLine +BABEL_BP_107_10066_20120428_121544_inLine +BABEL_BP_107_10066_20120428_121544_outLine +BABEL_BP_107_10190_20120424_023348_inLine +BABEL_BP_107_10190_20120425_012249_inLine +BABEL_BP_107_10211_20120323_013915_inLine +BABEL_BP_107_10211_20120323_013915_outLine +BABEL_BP_107_10545_20120424_184701_inLine +BABEL_BP_107_10697_20120516_194235_inLine +BABEL_BP_107_10732_20120328_172421_inLine +BABEL_BP_107_10732_20120328_172422_outLine +BABEL_BP_107_10900_20120322_022523_inLine +BABEL_BP_107_10900_20120322_022524_outLine +BABEL_BP_107_10945_20120322_222039_inLine +BABEL_BP_107_10945_20120322_222039_outLine +BABEL_BP_107_10973_20120404_233129_inLine +BABEL_BP_107_10973_20120404_233129_outLine +BABEL_BP_107_10985_20120502_123725_inLine +BABEL_BP_107_10985_20120502_123725_outLine +BABEL_BP_107_11022_20120422_013455_inLine +BABEL_BP_107_11022_20120422_013455_outLine +BABEL_BP_107_11422_20120208_160559_inLine +BABEL_BP_107_11422_20120208_160559_outLine +BABEL_BP_107_11479_20120212_011029_inLine +BABEL_BP_107_11479_20120212_011029_outLine +BABEL_BP_107_11827_20120322_205100_inLine +BABEL_BP_107_11827_20120322_205100_outLine +BABEL_BP_107_11949_20120704_001817_inLine +BABEL_BP_107_11949_20120704_001817_outLine +BABEL_BP_107_11982_20120217_004340_inLine +BABEL_BP_107_12486_20120424_174759_inLine +BABEL_BP_107_12552_20120503_152109_inLine +BABEL_BP_107_12569_20120609_190056_inLine +BABEL_BP_107_12569_20120609_190056_outLine +BABEL_BP_107_12587_20120322_230456_inLine +BABEL_BP_107_12587_20120322_230457_outLine +BABEL_BP_107_12643_20120704_185225_inLine +BABEL_BP_107_12643_20120704_185225_outLine +BABEL_BP_107_12897_20120413_195042_inLine +BABEL_BP_107_12897_20120413_195042_outLine +BABEL_BP_107_12897_20120413_200727_inLine +BABEL_BP_107_12897_20120413_200727_outLine +BABEL_BP_107_13065_20120425_034939_inLine +BABEL_BP_107_13065_20120425_034939_outLine +BABEL_BP_107_13229_20120417_201028_inLine +BABEL_BP_107_13229_20120417_201028_outLine +BABEL_BP_107_13272_20120320_141107_outLine +BABEL_BP_107_13272_20120320_142506_outLine +BABEL_BP_107_13389_20120406_141036_inLine +BABEL_BP_107_13389_20120406_141036_outLine +BABEL_BP_107_13419_20120218_213925_inLine +BABEL_BP_107_13419_20120218_214753_inLine +BABEL_BP_107_13781_20120516_204849_inLine +BABEL_BP_107_13781_20120516_204849_outLine +BABEL_BP_107_13795_20120418_190613_inLine +BABEL_BP_107_13795_20120418_190613_outLine +BABEL_BP_107_14075_20120507_004435_inLine +BABEL_BP_107_14294_20120328_010858_inLine +BABEL_BP_107_14294_20120328_010858_outLine +BABEL_BP_107_14468_20120321_003916_inLine +BABEL_BP_107_14468_20120321_003916_outLine +BABEL_BP_107_14475_20120704_204813_inLine +BABEL_BP_107_14475_20120704_204813_outLine +BABEL_BP_107_14500_20120429_194225_outLine +BABEL_BP_107_14707_20120429_004741_inLine +BABEL_BP_107_14707_20120429_004741_outLine +BABEL_BP_107_14707_20120429_005954_inLine +BABEL_BP_107_14891_20120118_195012_inLine +BABEL_BP_107_14707_20120429_005954_outLine +BABEL_BP_107_14729_20120429_200418_outLine +BABEL_BP_107_14836_20120507_235040_outLine +BABEL_BP_107_14891_20120118_195012_outLine +BABEL_BP_107_14936_20120405_224830_inLine +BABEL_BP_107_14936_20120405_224830_outLine +BABEL_BP_107_15073_20120417_011547_outLine +BABEL_BP_107_15142_20120322_132735_outLine +BABEL_BP_107_15353_20120504_193952_inLine +BABEL_BP_107_15353_20120504_193952_outLine +BABEL_BP_107_15460_20120426_224823_inLine +BABEL_BP_107_15460_20120426_224823_outLine +BABEL_BP_107_15473_20120217_231342_inLine +BABEL_BP_107_15696_20120328_010156_outLine +BABEL_BP_107_15719_20120612_122632_inLine +BABEL_BP_107_15719_20120612_122632_outLine +BABEL_BP_107_15744_20120608_123258_inLine +BABEL_BP_107_15873_20120405_224524_inLine +BABEL_BP_107_15873_20120405_224524_outLine +BABEL_BP_107_15881_20120322_233839_inLine +BABEL_BP_107_15940_20120424_221327_inLine +BABEL_BP_107_16406_20120324_011714_inLine +BABEL_BP_107_16406_20120324_011714_outLine +BABEL_BP_107_16617_20120228_014302_inLine +BABEL_BP_107_16646_20120418_130946_outLine +BABEL_BP_107_16660_20120210_231224_outLine +BABEL_BP_107_16669_20120208_140603_inLine +BABEL_BP_107_16801_20120418_121951_inLine +BABEL_BP_107_16801_20120418_203644_inLine +BABEL_BP_107_16875_20120704_133550_inLine +BABEL_BP_107_16875_20120704_133550_outLine +BABEL_BP_107_16883_20120501_194424_inLine +BABEL_BP_107_16883_20120501_194424_outLine +BABEL_BP_107_16950_20120704_155322_inLine +BABEL_BP_107_16950_20120704_155322_outLine +BABEL_BP_107_17013_20120501_002142_inLine +BABEL_BP_107_17013_20120501_002142_outLine +BABEL_BP_107_17018_20120322_220450_inLine +BABEL_BP_107_17018_20120322_220450_outLine +BABEL_BP_107_17093_20120501_202548_outLine +BABEL_BP_107_17203_20120212_220043_outLine +BABEL_BP_107_17353_20120617_133436_inLine +BABEL_BP_107_17353_20120617_133436_outLine +BABEL_BP_107_17933_20120421_134916_inLine +BABEL_BP_107_17933_20120421_134916_outLine +BABEL_BP_107_18187_20120608_125102_outLine +BABEL_BP_107_18209_20120420_004725_inLine +BABEL_BP_107_18234_20120210_230712_inLine +BABEL_BP_107_18495_20120618_003601_outLine +BABEL_BP_107_18534_20120504_132522_inLine +BABEL_BP_107_18534_20120504_132522_outLine +BABEL_BP_107_18858_20120209_004527_outLine +BABEL_BP_107_19012_20120503_215037_inLine +BABEL_BP_107_19012_20120503_215037_outLine +BABEL_BP_107_19248_20120508_210026_inLine +BABEL_BP_107_19248_20120508_210027_outLine +BABEL_BP_107_19290_20120421_141409_inLine +BABEL_BP_107_19290_20120421_141409_outLine +BABEL_BP_107_19404_20120321_171020_inLine +BABEL_BP_107_19404_20120321_171020_outLine +BABEL_BP_107_19479_20120407_014459_inLine +BABEL_BP_107_19479_20120407_014459_outLine +BABEL_BP_107_19731_20120506_011629_inLine +BABEL_BP_107_19731_20120515_001656_inLine +BABEL_BP_107_19869_20120608_012542_outLine +BABEL_BP_107_20320_20120212_214655_inLine +BABEL_BP_107_20332_20120426_010134_inLine +BABEL_BP_107_20332_20120426_010837_inLine +BABEL_BP_107_20332_20120426_010134_outLine +BABEL_BP_107_20332_20120426_010837_outLine +BABEL_BP_107_20483_20120416_171740_outLine +BABEL_BP_107_20518_20120418_211112_inLine +BABEL_BP_107_20582_20120322_220747_inLine +BABEL_BP_107_20582_20120322_220747_outLine +BABEL_BP_107_20740_20120427_193225_inLine +BABEL_BP_107_20740_20120427_193757_inLine +BABEL_BP_107_20741_20120325_181245_outLine +BABEL_BP_107_20799_20120515_010136_inLine +BABEL_BP_107_20799_20120515_010136_outLine +BABEL_BP_107_21052_20120415_204922_inLine +BABEL_BP_107_21139_20120425_192642_outLine +BABEL_BP_107_21258_20120418_145725_inLine +BABEL_BP_107_21367_20120629_140326_outLine +BABEL_BP_107_21430_20120608_003600_outLine +BABEL_BP_107_21477_20120323_185255_inLine +BABEL_BP_107_21477_20120323_185255_outLine +BABEL_BP_107_21518_20120501_152038_inLine +BABEL_BP_107_21518_20120501_152038_outLine +BABEL_BP_107_21584_20120217_004017_inLine +BABEL_BP_107_21584_20120217_004017_outLine +BABEL_BP_107_21758_20120407_010928_inLine +BABEL_BP_107_21758_20120407_010928_outLine +BABEL_BP_107_21758_20120407_011555_inLine +BABEL_BP_107_21758_20120407_011555_outLine +BABEL_BP_107_21929_20120323_015539_inLine +BABEL_BP_107_21929_20120323_022750_inLine +BABEL_BP_107_21946_20120507_015056_inLine +BABEL_BP_107_21946_20120507_015056_outLine +BABEL_BP_107_22010_20120608_182138_inLine +BABEL_BP_107_22010_20120608_182138_outLine +BABEL_BP_107_22272_20120511_232328_inLine +BABEL_BP_107_22272_20120511_232328_outLine +BABEL_BP_107_22494_20120613_122322_outLine +BABEL_BP_107_22898_20120322_144401_inLine +BABEL_BP_107_22898_20120322_144401_outLine +BABEL_BP_107_22910_20120214_213815_inLine +BABEL_BP_107_22910_20120214_213815_outLine +BABEL_BP_107_22979_20120505_000039_inLine +BABEL_BP_107_22979_20120505_000039_outLine +BABEL_BP_107_23167_20120217_212610_inLine +BABEL_BP_107_23167_20120217_212610_outLine +BABEL_BP_107_23629_20120501_173549_inLine +BABEL_BP_107_23629_20120501_173549_outLine +BABEL_BP_107_23930_20120506_214145_inLine +BABEL_BP_107_24014_20120618_010729_inLine +BABEL_BP_107_24014_20120618_010729_outLine +BABEL_BP_107_24094_20120421_134318_outLine +BABEL_BP_107_24569_20120507_123854_outLine +BABEL_BP_107_24608_20120208_170106_outLine +BABEL_BP_107_24638_20120504_004348_outLine +BABEL_BP_107_24642_20120505_201543_inLine +BABEL_BP_107_24642_20120505_201543_outLine +BABEL_BP_107_24799_20120508_232153_outLine +BABEL_BP_107_24817_20120422_203514_inLine +BABEL_BP_107_24833_20120218_171649_outLine +BABEL_BP_107_25035_20120214_230841_inLine +BABEL_BP_107_25072_20120429_144535_inLine +BABEL_BP_107_25479_20120506_161146_inLine +BABEL_BP_107_25479_20120506_161146_outLine +BABEL_BP_107_25576_20120321_222905_outLine +BABEL_BP_107_25866_20120426_193335_inLine +BABEL_BP_107_26348_20120508_100651_inLine +BABEL_BP_107_26348_20120508_102042_inLine +BABEL_BP_107_26350_20120209_004945_inLine +BABEL_BP_107_26350_20120209_004945_outLine +BABEL_BP_107_26350_20120209_012139_inLine +BABEL_BP_107_26350_20120209_012139_outLine +BABEL_BP_107_26598_20120425_143602_outLine +BABEL_BP_107_26684_20120530_155756_inLine +BABEL_BP_107_26786_20120423_191945_inLine +BABEL_BP_107_26786_20120423_191945_outLine +BABEL_BP_107_27064_20120222_210044_inLine +BABEL_BP_107_27064_20120222_210044_outLine +BABEL_BP_107_27503_20120212_221915_inLine +BABEL_BP_107_27619_20120328_023110_outLine +BABEL_BP_107_27698_20120212_005737_inLine +BABEL_BP_107_27698_20120212_005737_outLine +BABEL_BP_107_27724_20120407_130547_inLine +BABEL_BP_107_27724_20120407_130547_outLine +BABEL_BP_107_27890_20120428_235422_inLine +BABEL_BP_107_27890_20120428_235422_outLine +BABEL_BP_107_27916_20120607_114245_outLine +BABEL_BP_107_27916_20120607_115650_outLine +BABEL_BP_107_28016_20120405_222219_inLine +BABEL_BP_107_28016_20120405_222219_outLine +BABEL_BP_107_28107_20120208_142843_outLine +BABEL_BP_107_28107_20120208_144923_outLine +BABEL_BP_107_28132_20120405_152728_outLine +BABEL_BP_107_28260_20120212_153106_inLine +BABEL_BP_107_28557_20120507_001619_outLine +BABEL_BP_107_28675_20120607_231549_inLine +BABEL_BP_107_28675_20120607_231549_outLine +BABEL_BP_107_28675_20120607_233243_inLine +BABEL_BP_107_28675_20120607_233243_outLine +BABEL_BP_107_28740_20120212_150039_inLine +BABEL_BP_107_28740_20120212_150039_outLine +BABEL_BP_107_29280_20120607_184929_outLine +BABEL_BP_107_29280_20120607_190345_outLine +BABEL_BP_107_29290_20120415_102435_inLine +BABEL_BP_107_29335_20120424_013042_inLine +BABEL_BP_107_29335_20120424_013042_outLine +BABEL_BP_107_29407_20120607_132315_inLine +BABEL_BP_107_29407_20120607_135318_inLine +BABEL_BP_107_29444_20120322_191236_outLine +BABEL_BP_107_29771_20120504_010738_outLine +BABEL_BP_107_29959_20120418_001028_inLine +BABEL_BP_107_29959_20120418_001028_outLine +BABEL_BP_107_29988_20120516_233700_inLine +BABEL_BP_107_30210_20120427_140255_inLine +BABEL_BP_107_30210_20120502_202749_inLine +BABEL_BP_107_30554_20120617_231216_outLine +BABEL_BP_107_30583_20120212_210712_inLine +BABEL_BP_107_30722_20120505_103655_inLine +BABEL_BP_107_30722_20120505_103655_outLine +BABEL_BP_107_31031_20120501_205733_inLine +BABEL_BP_107_31031_20120501_210746_inLine +BABEL_BP_107_31298_20120322_125112_outLine +BABEL_BP_107_31393_20120325_171905_inLine +BABEL_BP_107_31460_20120325_193921_inLine +BABEL_BP_107_31606_20120607_131428_inLine +BABEL_BP_107_31738_20120704_101130_outLine +BABEL_BP_107_31902_20120417_015618_inLine +BABEL_BP_107_31902_20120417_015618_outLine +BABEL_BP_107_31917_20120501_202910_inLine +BABEL_BP_107_31917_20120501_202910_outLine +BABEL_BP_107_31980_20120212_174027_inLine +BABEL_BP_107_31980_20120212_174027_outLine +BABEL_BP_107_32132_20120418_211743_inLine +BABEL_BP_107_32274_20120324_011402_inLine +BABEL_BP_107_32295_20120617_141025_inLine +BABEL_BP_107_32295_20120617_141025_outLine +BABEL_BP_107_32334_20120429_005403_inLine +BABEL_BP_107_32334_20120429_005403_outLine +BABEL_BP_107_32400_20120426_000137_inLine +BABEL_BP_107_32400_20120426_000137_outLine +BABEL_BP_107_32710_20120418_215432_inLine +BABEL_BP_107_32710_20120418_215432_outLine +BABEL_BP_107_33012_20120611_155055_inLine +BABEL_BP_107_33364_20120617_011853_inLine +BABEL_BP_107_33364_20120617_011853_outLine +BABEL_BP_107_33577_20120704_152608_outLine +BABEL_BP_107_33671_20120330_001033_inLine +BABEL_BP_107_33671_20120330_001033_outLine +BABEL_BP_107_33742_20120608_143147_inLine +BABEL_BP_107_33742_20120608_143147_outLine +BABEL_BP_107_33817_20120423_130850_inLine +BABEL_BP_107_33817_20120423_130850_outLine +BABEL_BP_107_33969_20120429_214721_outLine +BABEL_BP_107_34235_20120218_205136_outLine +BABEL_BP_107_34480_20120608_151830_inLine +BABEL_BP_107_34498_20120429_140537_inLine +BABEL_BP_107_34498_20120429_140537_outLine +BABEL_BP_107_34590_20120323_134554_inLine +BABEL_BP_107_34590_20120323_134554_outLine +BABEL_BP_107_34857_20120419_235853_inLine +BABEL_BP_107_34961_20120212_223315_inLine +BABEL_BP_107_34961_20120212_223315_outLine +BABEL_BP_107_34961_20120212_224207_inLine +BABEL_BP_107_34961_20120212_224207_outLine +BABEL_BP_107_35011_20120321_223128_inLine +BABEL_BP_107_35011_20120321_223128_outLine +BABEL_BP_107_35016_20120611_185645_outLine +BABEL_BP_107_35074_20120608_164703_outLine +BABEL_BP_107_35179_20120414_153233_inLine +BABEL_BP_107_35179_20120414_153233_outLine +BABEL_BP_107_35188_20120614_131427_inLine +BABEL_BP_107_35305_20120422_120043_outLine +BABEL_BP_107_35357_20120614_212245_inLine +BABEL_BP_107_35357_20120614_212245_outLine +BABEL_BP_107_36037_20120616_153023_outLine +BABEL_BP_107_36196_20120608_110319_inLine +BABEL_BP_107_36196_20120608_111049_inLine +BABEL_BP_107_36268_20120406_211711_inLine +BABEL_BP_107_36268_20120406_211711_outLine +BABEL_BP_107_36356_20120211_173247_inLine +BABEL_BP_107_36356_20120211_173247_outLine +BABEL_BP_107_36383_20120416_225701_outLine +BABEL_BP_107_36391_20120505_171824_inLine +BABEL_BP_107_36424_20120421_130549_inLine +BABEL_BP_107_36424_20120421_130549_outLine +BABEL_BP_107_36424_20120421_133610_inLine +BABEL_BP_107_36424_20120421_133610_outLine +BABEL_BP_107_36502_20120617_145859_inLine +BABEL_BP_107_36502_20120617_145859_outLine +BABEL_BP_107_36711_20120325_230112_inLine +BABEL_BP_107_36711_20120325_230112_outLine +BABEL_BP_107_36722_20120627_122821_inLine +BABEL_BP_107_36722_20120627_122821_outLine +BABEL_BP_107_37110_20120209_002706_inLine +BABEL_BP_107_37110_20120209_002706_outLine +BABEL_BP_107_37210_20120322_205536_outLine +BABEL_BP_107_37285_20120325_000245_inLine +BABEL_BP_107_37285_20120325_000245_outLine +BABEL_BP_107_37335_20120616_150016_inLine +BABEL_BP_107_37335_20120616_150016_outLine +BABEL_BP_107_37374_20120418_185819_inLine +BABEL_BP_107_37940_20120424_004619_inLine +BABEL_BP_107_37940_20120424_004619_outLine +BABEL_BP_107_38464_20120422_105536_outLine +BABEL_BP_107_38592_20120704_150926_outLine +BABEL_BP_107_38640_20120215_030154_inLine +BABEL_BP_107_38640_20120215_030154_outLine +BABEL_BP_107_38698_20120322_213531_inLine +BABEL_BP_107_38698_20120322_213531_outLine +BABEL_BP_107_38879_20120406_150304_inLine +BABEL_BP_107_38879_20120406_150304_outLine +BABEL_BP_107_38912_20120414_160852_inLine +BABEL_BP_107_38912_20120414_160852_outLine +BABEL_BP_107_39246_20120613_202128_inLine +BABEL_BP_107_39246_20120613_202128_outLine +BABEL_BP_107_39264_20120417_191639_inLine +BABEL_BP_107_39264_20120417_191639_outLine +BABEL_BP_107_39296_20120705_025906_inLine +BABEL_BP_107_39384_20120324_010939_inLine +BABEL_BP_107_39384_20120324_010939_outLine +BABEL_BP_107_39384_20120324_011832_inLine +BABEL_BP_107_39384_20120324_011832_outLine +BABEL_BP_107_39430_20120325_015935_inLine +BABEL_BP_107_39430_20120325_015935_outLine +BABEL_BP_107_40002_20120502_174229_outLine +BABEL_BP_107_40123_20120505_191426_inLine +BABEL_BP_107_40123_20120505_191426_outLine +BABEL_BP_107_40385_20120704_143210_outLine +BABEL_BP_107_40477_20120323_194919_outLine +BABEL_BP_107_40510_20120426_153808_inLine +BABEL_BP_107_40510_20120426_153808_outLine +BABEL_BP_107_40980_20120416_233130_inLine +BABEL_BP_107_40980_20120416_233130_outLine +BABEL_BP_107_40980_20120417_001128_inLine +BABEL_BP_107_40980_20120417_001128_outLine +BABEL_BP_107_41146_20120211_162158_inLine +BABEL_BP_107_41170_20120201_205341_inLine +BABEL_BP_107_41170_20120201_205341_outLine +BABEL_BP_107_41590_20120610_162218_outLine +BABEL_BP_107_41797_20120420_003902_inLine +BABEL_BP_107_41797_20120420_003902_outLine +BABEL_BP_107_42145_20120418_131525_inLine +BABEL_BP_107_42266_20120407_182544_outLine +BABEL_BP_107_42309_20120608_215912_inLine +BABEL_BP_107_42309_20120608_215912_outLine +BABEL_BP_107_42651_20120211_192913_inLine +BABEL_BP_107_42651_20120211_192913_outLine +BABEL_BP_107_42910_20120212_154722_inLine +BABEL_BP_107_42910_20120212_154722_outLine +BABEL_BP_107_43017_20120322_170152_inLine +BABEL_BP_107_43017_20120322_170152_outLine +BABEL_BP_107_43306_20120409_184959_inLine +BABEL_BP_107_43306_20120409_184959_outLine +BABEL_BP_107_43423_20120504_001214_inLine +BABEL_BP_107_43423_20120504_010312_inLine +BABEL_BP_107_43426_20120426_183951_inLine +BABEL_BP_107_43426_20120426_183951_outLine +BABEL_BP_107_43587_20120506_182330_inLine +BABEL_BP_107_43652_20120416_175011_inLine +BABEL_BP_107_43652_20120418_093619_inLine +BABEL_BP_107_44129_20120512_023836_inLine +BABEL_BP_107_44129_20120512_023836_outLine +BABEL_BP_107_44369_20120504_024021_inLine +BABEL_BP_107_44369_20120504_024021_outLine +BABEL_BP_107_44403_20120322_214144_inLine +BABEL_BP_107_44403_20120322_214144_outLine +BABEL_BP_107_44756_20120426_155822_inLine +BABEL_BP_107_44756_20120426_155822_outLine +BABEL_BP_107_44829_20120404_224815_outLine +BABEL_BP_107_44836_20120417_003600_outLine +BABEL_BP_107_44943_20120506_191737_inLine +BABEL_BP_107_45227_20120210_223857_inLine +BABEL_BP_107_45511_20120212_170655_inLine +BABEL_BP_107_45511_20120212_170655_outLine +BABEL_BP_107_45570_20120509_151829_inLine +BABEL_BP_107_45570_20120509_151829_outLine +BABEL_BP_107_45793_20120211_040134_inLine +BABEL_BP_107_45793_20120211_040134_outLine +BABEL_BP_107_45929_20120418_215417_outLine +BABEL_BP_107_45931_20120322_143234_inLine +BABEL_BP_107_45931_20120322_143234_outLine +BABEL_BP_107_46243_20120210_233353_inLine +BABEL_BP_107_46243_20120210_233353_outLine +BABEL_BP_107_46332_20120418_002934_inLine +BABEL_BP_107_46332_20120418_002934_outLine +BABEL_BP_107_46603_20120421_113906_inLine +BABEL_BP_107_46756_20120429_195314_outLine +BABEL_BP_107_46977_20120426_015005_inLine +BABEL_BP_107_47263_20120422_150216_inLine +BABEL_BP_107_47433_20120210_185410_outLine +BABEL_BP_107_47618_20120502_004413_inLine +BABEL_BP_107_47618_20120502_004413_outLine +BABEL_BP_107_47661_20120216_224419_inLine +BABEL_BP_107_47661_20120216_224419_outLine +BABEL_BP_107_47794_20120514_175438_inLine +BABEL_BP_107_47794_20120514_175438_outLine +BABEL_BP_107_47823_20120516_204140_inLine +BABEL_BP_107_47845_20120613_004732_outLine +BABEL_BP_107_47906_20120415_224420_inLine +BABEL_BP_107_47906_20120415_224420_outLine +BABEL_BP_107_48188_20120422_150955_inLine +BABEL_BP_107_48188_20120422_150955_outLine +BABEL_BP_107_48418_20120421_163333_inLine +BABEL_BP_107_48511_20120322_145729_inLine +BABEL_BP_107_48511_20120322_145729_outLine +BABEL_BP_107_48559_20120502_201955_inLine +BABEL_BP_107_48559_20120502_201955_outLine +BABEL_BP_107_48607_20120607_215116_outLine +BABEL_BP_107_48733_20120418_142426_inLine +BABEL_BP_107_48733_20120418_142426_outLine +BABEL_BP_107_48753_20120426_134417_inLine +BABEL_BP_107_48753_20120426_134417_outLine +BABEL_BP_107_48812_20120323_162517_inLine +BABEL_BP_107_48812_20120324_182527_inLine +BABEL_BP_107_48976_20120220_152013_inLine +BABEL_BP_107_48976_20120220_152013_outLine +BABEL_BP_107_49186_20120704_180724_inLine +BABEL_BP_107_49186_20120704_180724_outLine +BABEL_BP_107_49192_20120421_190503_outLine +BABEL_BP_107_49239_20120429_144119_inLine +BABEL_BP_107_49346_20120611_192752_outLine +BABEL_BP_107_49351_20120614_132223_inLine +BABEL_BP_107_49351_20120614_132223_outLine +BABEL_BP_107_49371_20120608_002052_inLine +BABEL_BP_107_49541_20120325_223621_inLine +BABEL_BP_107_49541_20120325_223621_outLine +BABEL_BP_107_49552_20120614_140129_inLine +BABEL_BP_107_49689_20120415_163537_inLine +BABEL_BP_107_49689_20120415_163537_outLine +BABEL_BP_107_49714_20120509_113627_outLine +BABEL_BP_107_49773_20120211_151308_inLine +BABEL_BP_107_49773_20120211_151308_outLine +BABEL_BP_107_50028_20120704_192522_inLine +BABEL_BP_107_50028_20120704_192522_outLine +BABEL_BP_107_50141_20120505_233033_inLine +BABEL_BP_107_50141_20120505_233033_outLine +BABEL_BP_107_50201_20120216_001139_inLine +BABEL_BP_107_50201_20120216_001139_outLine +BABEL_BP_107_50267_20120421_135338_inLine +BABEL_BP_107_50267_20120421_135338_outLine +BABEL_BP_107_50298_20120507_152508_outLine +BABEL_BP_107_50409_20120608_205803_inLine +BABEL_BP_107_50468_20120420_114108_inLine +BABEL_BP_107_50468_20120420_114108_outLine +BABEL_BP_107_50468_20120420_115203_inLine +BABEL_BP_107_50468_20120420_115203_outLine +BABEL_BP_107_50476_20120430_225248_inLine +BABEL_BP_107_50476_20120430_225248_outLine +BABEL_BP_107_50718_20120321_125943_inLine +BABEL_BP_107_50752_20120421_202932_inLine +BABEL_BP_107_50752_20120421_202932_outLine +BABEL_BP_107_50883_20120328_013430_inLine +BABEL_BP_107_50883_20120328_013430_outLine +BABEL_BP_107_51052_20120424_004427_inLine +BABEL_BP_107_51052_20120424_004427_outLine +BABEL_BP_107_51073_20120216_010300_outLine +BABEL_BP_107_51117_20120211_034844_inLine +BABEL_BP_107_51117_20120211_034844_outLine +BABEL_BP_107_51136_20120405_142910_inLine +BABEL_BP_107_51136_20120405_142910_outLine +BABEL_BP_107_51446_20120417_221307_inLine +BABEL_BP_107_51446_20120417_221307_outLine +BABEL_BP_107_51448_20120608_170641_inLine +BABEL_BP_107_51448_20120608_171219_inLine +BABEL_BP_107_51663_20120506_160921_inLine +BABEL_BP_107_51727_20120424_225602_inLine +BABEL_BP_107_51727_20120424_225602_outLine +BABEL_BP_107_52154_20120503_203816_inLine +BABEL_BP_107_52219_20120417_113120_inLine +BABEL_BP_107_52219_20120417_113120_outLine +BABEL_BP_107_52807_20120608_171526_inLine +BABEL_BP_107_52807_20120608_171526_outLine +BABEL_BP_107_52902_20120421_150627_outLine +BABEL_BP_107_53181_20120211_163316_inLine +BABEL_BP_107_53181_20120211_163316_outLine +BABEL_BP_107_53463_20120421_150635_inLine +BABEL_BP_107_53463_20120421_150635_outLine +BABEL_BP_107_53463_20120421_152028_inLine +BABEL_BP_107_53463_20120421_152028_outLine +BABEL_BP_107_53649_20120611_193416_outLine +BABEL_BP_107_53653_20120607_150151_outLine +BABEL_BP_107_53703_20120502_153540_outLine +BABEL_BP_107_53824_20120503_223532_inLine +BABEL_BP_107_53824_20120503_223532_outLine +BABEL_BP_107_53824_20120503_225007_inLine +BABEL_BP_107_53824_20120503_225007_outLine +BABEL_BP_107_53982_20120509_013004_outLine +BABEL_BP_107_53994_20120501_161638_outLine +BABEL_BP_107_54199_20120607_200253_inLine +BABEL_BP_107_54199_20120607_202722_inLine +BABEL_BP_107_54199_20120607_202722_outLine +BABEL_BP_107_54241_20120324_013254_inLine +BABEL_BP_107_54241_20120324_013254_outLine +BABEL_BP_107_54332_20120608_182424_inLine +BABEL_BP_107_54332_20120608_183219_inLine +BABEL_BP_107_54518_20120608_120238_inLine +BABEL_BP_107_54621_20120421_132410_inLine +BABEL_BP_107_54621_20120421_132410_outLine +BABEL_BP_107_54785_20120602_195720_inLine +BABEL_BP_107_54787_20120405_202915_inLine +BABEL_BP_107_54787_20120405_202915_outLine +BABEL_BP_107_55182_20120209_015206_inLine +BABEL_BP_107_55355_20120608_155709_inLine +BABEL_BP_107_55355_20120612_142521_inLine +BABEL_BP_107_55396_20120321_141254_outLine +BABEL_BP_107_55470_20120421_134215_outLine +BABEL_BP_107_55777_20120421_234307_inLine +BABEL_BP_107_55777_20120421_234307_outLine +BABEL_BP_107_55874_20120504_184342_inLine +BABEL_BP_107_55874_20120504_184343_outLine +BABEL_BP_107_56039_20120516_215649_inLine +BABEL_BP_107_56039_20120516_215649_outLine +BABEL_BP_107_56070_20120220_174719_inLine +BABEL_BP_107_57148_20120217_014955_inLine +BABEL_BP_107_57148_20120217_014955_outLine +BABEL_BP_107_57148_20120217_024257_inLine +BABEL_BP_107_57148_20120217_024257_outLine +BABEL_BP_107_57422_20120508_014547_inLine +BABEL_BP_107_57422_20120508_014547_outLine +BABEL_BP_107_57457_20120617_193611_inLine +BABEL_BP_107_57457_20120617_193611_outLine +BABEL_BP_107_57619_20120505_151800_inLine +BABEL_BP_107_58108_20120509_141003_inLine +BABEL_BP_107_58108_20120509_141003_outLine +BABEL_BP_107_58137_20120421_185042_inLine +BABEL_BP_107_58137_20120421_185042_outLine +BABEL_BP_107_58190_20120506_195510_outLine +BABEL_BP_107_58232_20120501_122112_inLine +BABEL_BP_107_58232_20120501_122112_outLine +BABEL_BP_107_58357_20120507_125021_inLine +BABEL_BP_107_58357_20120507_125021_outLine +BABEL_BP_107_58536_20120501_013825_inLine +BABEL_BP_107_58536_20120501_013825_outLine +BABEL_BP_107_58746_20120614_181729_inLine +BABEL_BP_107_58746_20120614_181729_outLine +BABEL_BP_107_58863_20120218_011117_inLine +BABEL_BP_107_58863_20120218_011117_outLine +BABEL_BP_107_58863_20120218_012806_inLine +BABEL_BP_107_58863_20120218_012806_outLine +BABEL_BP_107_59071_20120423_184821_inLine +BABEL_BP_107_59175_20120212_225712_inLine +BABEL_BP_107_59175_20120212_225712_outLine +BABEL_BP_107_59383_20120502_205353_inLine +BABEL_BP_107_59383_20120502_205353_outLine +BABEL_BP_107_59628_20120428_215033_inLine +BABEL_BP_107_59764_20120524_205913_inLine +BABEL_BP_107_59924_20120417_194534_inLine +BABEL_BP_107_59924_20120417_194534_outLine +BABEL_BP_107_59961_20120218_211136_inLine +BABEL_BP_107_60106_20120211_003229_inLine +BABEL_BP_107_60106_20120211_003229_outLine +BABEL_BP_107_60183_20120428_164103_inLine +BABEL_BP_107_60183_20120428_164103_outLine +BABEL_BP_107_60193_20120328_014042_inLine +BABEL_BP_107_60238_20120506_132025_outLine +BABEL_BP_107_60338_20120505_131543_inLine +BABEL_BP_107_60338_20120505_131543_outLine +BABEL_BP_107_60605_20120506_215948_inLine +BABEL_BP_107_60677_20120415_145311_inLine +BABEL_BP_107_60677_20120415_145311_outLine +BABEL_BP_107_60677_20120415_150336_inLine +BABEL_BP_107_60677_20120415_150336_outLine +BABEL_BP_107_60826_20120424_235431_inLine +BABEL_BP_107_60826_20120424_235432_outLine +BABEL_BP_107_60842_20120617_190839_inLine +BABEL_BP_107_60842_20120617_190839_outLine +BABEL_BP_107_61073_20120322_193656_inLine +BABEL_BP_107_61073_20120322_193656_outLine +BABEL_BP_107_61408_20120628_141349_outLine +BABEL_BP_107_61449_20120421_232700_inLine +BABEL_BP_107_61449_20120421_232700_outLine +BABEL_BP_107_61906_20120414_201744_inLine +BABEL_BP_107_61906_20120414_201744_outLine +BABEL_BP_107_62132_20120506_160034_inLine +BABEL_BP_107_62160_20120323_180702_outLine +BABEL_BP_107_62163_20120628_180945_inLine +BABEL_BP_107_62163_20120628_182002_inLine +BABEL_BP_107_62177_20120323_001326_inLine +BABEL_BP_107_62255_20120506_204123_inLine +BABEL_BP_107_62255_20120506_204123_outLine +BABEL_BP_107_62277_20120504_173047_inLine +BABEL_BP_107_62696_20120508_135942_outLine +BABEL_BP_107_62696_20120509_100233_outLine +BABEL_BP_107_62923_20120322_163015_inLine +BABEL_BP_107_62923_20120322_163015_outLine +BABEL_BP_107_62993_20120608_130210_inLine +BABEL_BP_107_62993_20120608_130210_outLine +BABEL_BP_107_63076_20120704_011318_inLine +BABEL_BP_107_63116_20120419_163443_inLine +BABEL_BP_107_63233_20120323_003312_inLine +BABEL_BP_107_63352_20120421_222544_inLine +BABEL_BP_107_63368_20120418_215232_inLine +BABEL_BP_107_63368_20120418_215232_outLine +BABEL_BP_107_63368_20120418_220224_inLine +BABEL_BP_107_63368_20120418_220224_outLine +BABEL_BP_107_63368_20120418_222134_inLine +BABEL_BP_107_63368_20120418_222134_outLine +BABEL_BP_107_63369_20120614_191919_inLine +BABEL_BP_107_63711_20120212_183127_inLine +BABEL_BP_107_63711_20120212_183127_outLine +BABEL_BP_107_64205_20120428_020155_inLine +BABEL_BP_107_64351_20120513_193703_outLine +BABEL_BP_107_64724_20120503_155446_inLine +BABEL_BP_107_64724_20120503_155446_outLine +BABEL_BP_107_64889_20120503_174229_inLine +BABEL_BP_107_64889_20120503_174229_outLine +BABEL_BP_107_65414_20120608_131726_inLine +BABEL_BP_107_65743_20120404_191932_inLine +BABEL_BP_107_65743_20120404_191932_outLine +BABEL_BP_107_65989_20120419_141422_inLine +BABEL_BP_107_66346_20120703_161130_inLine +BABEL_BP_107_66346_20120703_161130_outLine +BABEL_BP_107_66419_20120505_205757_inLine +BABEL_BP_107_66419_20120505_205757_outLine +BABEL_BP_107_66451_20120214_215503_inLine +BABEL_BP_107_66451_20120214_215503_outLine +BABEL_BP_107_66499_20120610_220818_inLine +BABEL_BP_107_66559_20120421_185343_inLine +BABEL_BP_107_66709_20120617_152656_outLine +BABEL_BP_107_66709_20120617_153822_outLine +BABEL_BP_107_66790_20120421_182115_inLine +BABEL_BP_107_66839_20120613_192022_inLine +BABEL_BP_107_66839_20120613_192022_outLine +BABEL_BP_107_66866_20120418_001946_inLine +BABEL_BP_107_66866_20120418_001946_outLine +BABEL_BP_107_66903_20120210_183320_inLine +BABEL_BP_107_66903_20120210_183320_outLine +BABEL_BP_107_66964_20120419_205513_inLine +BABEL_BP_107_66964_20120419_205513_outLine +BABEL_BP_107_67304_20120523_201027_inLine +BABEL_BP_107_67304_20120523_201027_outLine +BABEL_BP_107_67555_20120323_130439_outLine +BABEL_BP_107_67628_20120418_215117_inLine +BABEL_BP_107_67798_20120627_141236_inLine +BABEL_BP_107_68009_20120608_112155_inLine +BABEL_BP_107_68129_20120611_013309_outLine +BABEL_BP_107_68191_20120428_114953_outLine +BABEL_BP_107_68295_20120506_210459_outLine +BABEL_BP_107_68362_20120503_194813_outLine +BABEL_BP_107_68545_20120421_220606_inLine +BABEL_BP_107_68545_20120421_220606_outLine +BABEL_BP_107_68671_20120608_205710_inLine +BABEL_BP_107_68671_20120608_205710_outLine +BABEL_BP_107_68767_20120214_214534_inLine +BABEL_BP_107_68767_20120214_214534_outLine +BABEL_BP_107_69028_20120430_132441_inLine +BABEL_BP_107_69049_20120322_215956_inLine +BABEL_BP_107_69137_20120424_183202_inLine +BABEL_BP_107_69137_20120424_183202_outLine +BABEL_BP_107_69275_20120608_210354_inLine +BABEL_BP_107_69295_20120501_154139_inLine +BABEL_BP_107_70000_20120618_004254_inLine +BABEL_BP_107_70000_20120618_004254_outLine +BABEL_BP_107_70077_20120428_170417_inLine +BABEL_BP_107_70120_20120418_213104_inLine +BABEL_BP_107_70120_20120418_213104_outLine +BABEL_BP_107_70285_20120212_214056_inLine +BABEL_BP_107_70323_20120617_122402_outLine +BABEL_BP_107_70441_20120704_163546_inLine +BABEL_BP_107_70441_20120704_163546_outLine +BABEL_BP_107_70511_20120618_124928_outLine +BABEL_BP_107_70615_20120208_233912_inLine +BABEL_BP_107_70615_20120208_233912_outLine +BABEL_BP_107_70975_20120407_011601_inLine +BABEL_BP_107_70975_20120407_011601_outLine +BABEL_BP_107_71176_20120418_195323_inLine +BABEL_BP_107_71176_20120418_195323_outLine +BABEL_BP_107_71739_20120430_125259_inLine +BABEL_BP_107_71741_20120211_231000_inLine +BABEL_BP_107_71741_20120211_231000_outLine +BABEL_BP_107_71778_20120427_132527_inLine +BABEL_BP_107_71778_20120427_132527_outLine +BABEL_BP_107_71844_20120212_180004_inLine +BABEL_BP_107_71927_20120516_204724_inLine +BABEL_BP_107_72269_20120416_010327_inLine +BABEL_BP_107_72269_20120416_010327_outLine +BABEL_BP_107_72297_20120608_185443_inLine +BABEL_BP_107_72297_20120608_185443_outLine +BABEL_BP_107_72297_20120608_190156_inLine +BABEL_BP_107_72297_20120608_190156_outLine +BABEL_BP_107_72647_20120614_125725_inLine +BABEL_BP_107_72718_20120505_025006_inLine +BABEL_BP_107_72718_20120505_025006_outLine +BABEL_BP_107_72879_20120403_141911_inLine +BABEL_BP_107_72879_20120403_141911_outLine +BABEL_BP_107_73205_20120211_191427_outLine +BABEL_BP_107_73438_20120502_201055_inLine +BABEL_BP_107_73438_20120502_201055_outLine +BABEL_BP_107_73440_20120416_172035_inLine +BABEL_BP_107_73452_20120504_170508_inLine +BABEL_BP_107_73452_20120504_170508_outLine +BABEL_BP_107_73752_20120610_174558_inLine +BABEL_BP_107_73761_20120424_154013_inLine +BABEL_BP_107_73761_20120424_154013_outLine +BABEL_BP_107_73911_20120215_175351_inLine +BABEL_BP_107_73911_20120215_175351_outLine +BABEL_BP_107_73921_20120501_000425_outLine +BABEL_BP_107_74043_20120323_014301_outLine +BABEL_BP_107_74234_20120328_020415_inLine +BABEL_BP_107_74234_20120328_020415_outLine +BABEL_BP_107_74317_20120502_225211_inLine +BABEL_BP_107_74317_20120502_225211_outLine +BABEL_BP_107_74395_20120418_140703_inLine +BABEL_BP_107_74395_20120418_140703_outLine +BABEL_BP_107_74404_20120212_134850_outLine +BABEL_BP_107_74625_20120425_234344_inLine +BABEL_BP_107_74700_20120610_233419_inLine +BABEL_BP_107_74823_20120217_022832_inLine +BABEL_BP_107_74823_20120217_022832_outLine +BABEL_BP_107_74935_20120616_144642_inLine +BABEL_BP_107_74974_20120617_143904_inLine +BABEL_BP_107_74974_20120617_143904_outLine +BABEL_BP_107_74986_20120416_011008_inLine +BABEL_BP_107_74986_20120416_011008_outLine +BABEL_BP_107_74986_20120416_011927_inLine +BABEL_BP_107_74986_20120416_011927_outLine +BABEL_BP_107_75036_20120325_233130_inLine +BABEL_BP_107_75036_20120325_233130_outLine +BABEL_BP_107_75333_20120505_200116_inLine +BABEL_BP_107_75333_20120505_200116_outLine +BABEL_BP_107_75498_20120506_171232_inLine +BABEL_BP_107_75498_20120506_171232_outLine +BABEL_BP_107_75680_20120704_175114_inLine +BABEL_BP_107_75680_20120704_175114_outLine +BABEL_BP_107_75799_20120429_140233_inLine +BABEL_BP_107_75799_20120429_140233_outLine +BABEL_BP_107_75815_20120217_141539_inLine +BABEL_BP_107_75815_20120217_141539_outLine +BABEL_BP_107_76252_20120705_003603_outLine +BABEL_BP_107_76341_20120215_201638_inLine +BABEL_BP_107_76341_20120215_201638_outLine +BABEL_BP_107_76661_20120405_132625_inLine +BABEL_BP_107_76691_20120501_002016_inLine +BABEL_BP_107_76716_20120418_215649_outLine +BABEL_BP_107_76733_20120424_181359_inLine +BABEL_BP_107_76733_20120424_181359_outLine +BABEL_BP_107_76733_20120424_183605_inLine +BABEL_BP_107_76733_20120424_183605_outLine +BABEL_BP_107_76748_20120504_181420_inLine +BABEL_BP_107_76919_20120507_010805_outLine +BABEL_BP_107_76925_20120407_015139_inLine +BABEL_BP_107_76944_20120505_000745_inLine +BABEL_BP_107_76944_20120505_000745_outLine +BABEL_BP_107_76993_20120501_125118_inLine +BABEL_BP_107_76993_20120501_125118_outLine +BABEL_BP_107_77238_20120322_211133_outLine +BABEL_BP_107_77244_20120429_164842_inLine +BABEL_BP_107_77244_20120429_164842_outLine +BABEL_BP_107_77315_20120527_222821_outLine +BABEL_BP_107_77338_20120617_171454_inLine +BABEL_BP_107_77338_20120617_171454_outLine +BABEL_BP_107_77473_20120610_000112_inLine +BABEL_BP_107_77886_20120326_191938_inLine +BABEL_BP_107_77886_20120326_191938_outLine +BABEL_BP_107_78094_20120212_205141_inLine +BABEL_BP_107_78094_20120212_205141_outLine +BABEL_BP_107_78487_20120430_133108_inLine +BABEL_BP_107_78487_20120430_133108_outLine +BABEL_BP_107_78514_20120617_131155_outLine +BABEL_BP_107_79284_20120511_180310_inLine +BABEL_BP_107_79284_20120511_180310_outLine +BABEL_BP_107_79495_20120222_195716_inLine +BABEL_BP_107_79619_20120420_115502_inLine +BABEL_BP_107_79619_20120420_115502_outLine +BABEL_BP_107_79632_20120428_182831_inLine +BABEL_BP_107_79632_20120428_182831_outLine +BABEL_BP_107_79860_20120328_023545_inLine +BABEL_BP_107_79944_20120424_213833_inLine +BABEL_BP_107_79970_20120418_214316_inLine +BABEL_BP_107_80008_20120218_225347_inLine +BABEL_BP_107_80008_20120218_225347_outLine +BABEL_BP_107_80282_20120627_190514_inLine +BABEL_BP_107_80282_20120627_190935_inLine +BABEL_BP_107_80290_20120501_134226_inLine +BABEL_BP_107_80290_20120501_134226_outLine +BABEL_BP_107_80337_20120608_000801_inLine +BABEL_BP_107_80337_20120608_000801_outLine +BABEL_BP_107_80638_20120501_223037_inLine +BABEL_BP_107_80638_20120501_223037_outLine +BABEL_BP_107_80786_20120212_204918_inLine +BABEL_BP_107_80786_20120212_204918_outLine +BABEL_BP_107_81056_20120502_155358_inLine +BABEL_BP_107_81056_20120502_155358_outLine +BABEL_BP_107_81096_20120418_221604_inLine +BABEL_BP_107_81096_20120418_221604_outLine +BABEL_BP_107_81321_20120329_030424_outLine +BABEL_BP_107_81486_20120213_035232_inLine +BABEL_BP_107_81486_20120213_040319_inLine +BABEL_BP_107_81535_20120421_151505_inLine +BABEL_BP_107_81535_20120421_151505_outLine +BABEL_BP_107_81611_20120616_154507_outLine +BABEL_BP_107_81717_20120426_185608_inLine +BABEL_BP_107_81771_20120615_224609_inLine +BABEL_BP_107_81771_20120615_224609_outLine +BABEL_BP_107_82006_20120417_133143_outLine +BABEL_BP_107_82025_20120325_012956_inLine +BABEL_BP_107_82103_20120326_172335_inLine +BABEL_BP_107_82103_20120326_172335_outLine +BABEL_BP_107_82131_20120704_135728_inLine +BABEL_BP_107_82131_20120704_211005_inLine +BABEL_BP_107_82591_20120407_185008_outLine +BABEL_BP_107_82599_20120608_140933_outLine +BABEL_BP_107_82766_20120627_112435_outLine +BABEL_BP_107_82880_20120705_001819_inLine +BABEL_BP_107_82880_20120705_001819_outLine +BABEL_BP_107_82947_20120426_103950_inLine +BABEL_BP_107_82947_20120426_103950_outLine +BABEL_BP_107_82947_20120509_202553_inLine +BABEL_BP_107_82947_20120509_202553_outLine +BABEL_BP_107_83017_20120608_125136_inLine +BABEL_BP_107_83053_20120426_184045_inLine +BABEL_BP_107_83256_20120212_162557_outLine +BABEL_BP_107_83360_20120418_000230_inLine +BABEL_BP_107_83360_20120418_000230_outLine +BABEL_BP_107_83529_20120608_152238_outLine +BABEL_BP_107_83700_20120427_121525_inLine +BABEL_BP_107_83700_20120427_121525_outLine +BABEL_BP_107_83702_20120418_010601_inLine +BABEL_BP_107_83702_20120418_010601_outLine +BABEL_BP_107_83982_20120704_125429_outLine +BABEL_BP_107_83982_20120704_125430_inLine +BABEL_BP_107_83982_20120704_131324_inLine +BABEL_BP_107_83982_20120704_131324_outLine +BABEL_BP_107_84171_20120504_185725_inLine +BABEL_BP_107_84335_20120418_002843_inLine +BABEL_BP_107_84397_20120608_080802_outLine +BABEL_BP_107_84532_20120703_171302_inLine +BABEL_BP_107_84540_20120328_205952_outLine +BABEL_BP_107_84543_20120503_005623_inLine +BABEL_BP_107_84543_20120503_005623_outLine +BABEL_BP_107_84943_20120405_134459_inLine +BABEL_BP_107_85083_20120425_024151_inLine +BABEL_BP_107_85354_20120704_145327_inLine +BABEL_BP_107_85354_20120704_145327_outLine +BABEL_BP_107_85573_20120208_152239_inLine +BABEL_BP_107_85617_20120415_171620_inLine +BABEL_BP_107_85617_20120415_171620_outLine +BABEL_BP_107_85686_20120627_180412_inLine +BABEL_BP_107_85686_20120627_180413_outLine +BABEL_BP_107_85716_20120330_201512_outLine +BABEL_BP_107_85716_20120330_202652_outLine +BABEL_BP_107_85819_20120705_030943_inLine +BABEL_BP_107_85819_20120705_030944_outLine +BABEL_BP_107_86016_20120417_225748_inLine +BABEL_BP_107_86029_20120212_235447_inLine +BABEL_BP_107_86419_20120209_010052_inLine +BABEL_BP_107_86419_20120209_010052_outLine +BABEL_BP_107_86801_20120429_211031_inLine +BABEL_BP_107_86801_20120429_211031_outLine +BABEL_BP_107_86890_20120322_202435_inLine +BABEL_BP_107_87167_20120211_230800_outLine +BABEL_BP_107_87481_20120513_191237_inLine +BABEL_BP_107_87481_20120513_191237_outLine +BABEL_BP_107_87539_20120418_225114_inLine +BABEL_BP_107_87539_20120418_225114_outLine +BABEL_BP_107_87671_20120218_011104_inLine +BABEL_BP_107_87857_20120325_000202_inLine +BABEL_BP_107_88243_20120322_210747_inLine +BABEL_BP_107_88243_20120322_210747_outLine +BABEL_BP_107_88253_20120511_165340_inLine +BABEL_BP_107_88253_20120511_165340_outLine +BABEL_BP_107_88294_20120322_163142_outLine +BABEL_BP_107_88506_20120503_191321_inLine +BABEL_BP_107_88506_20120503_191321_outLine +BABEL_BP_107_88532_20120416_012644_inLine +BABEL_BP_107_89619_20120217_174102_inLine +BABEL_BP_107_89619_20120217_174102_outLine +BABEL_BP_107_89644_20120501_170949_inLine +BABEL_BP_107_89644_20120501_170949_outLine +BABEL_BP_107_89657_20120610_213215_inLine +BABEL_BP_107_89657_20120610_213215_outLine +BABEL_BP_107_89674_20120212_162158_inLine +BABEL_BP_107_89674_20120212_162158_outLine +BABEL_BP_107_89965_20120505_003121_inLine +BABEL_BP_107_89965_20120505_003121_outLine +BABEL_BP_107_90313_20120325_200742_inLine +BABEL_BP_107_90393_20120417_220816_inLine +BABEL_BP_107_90393_20120417_220817_outLine +BABEL_BP_107_90559_20120608_184439_inLine +BABEL_BP_107_90559_20120608_184439_outLine +BABEL_BP_107_90577_20120118_141830_inLine +BABEL_BP_107_90577_20120118_141830_outLine +BABEL_BP_107_90609_20120216_194251_inLine +BABEL_BP_107_90764_20120418_004231_outLine +BABEL_BP_107_90975_20120428_231848_inLine +BABEL_BP_107_90975_20120428_231848_outLine +BABEL_BP_107_91000_20120529_151028_inLine +BABEL_BP_107_91002_20120429_192712_inLine +BABEL_BP_107_91002_20120429_192712_outLine +BABEL_BP_107_91007_20120612_144506_inLine +BABEL_BP_107_91040_20120618_152624_outLine +BABEL_BP_107_91136_20120427_122059_inLine +BABEL_BP_107_91401_20120213_010307_inLine +BABEL_BP_107_91401_20120213_010307_outLine +BABEL_BP_107_91406_20120429_193057_inLine +BABEL_BP_107_91406_20120429_193057_outLine +BABEL_BP_107_91409_20120520_225023_outLine +BABEL_BP_107_91409_20120520_231205_outLine +BABEL_BP_107_91660_20120510_181954_inLine +BABEL_BP_107_91660_20120510_181954_outLine +BABEL_BP_107_91660_20120510_182853_inLine +BABEL_BP_107_91660_20120510_182853_outLine +BABEL_BP_107_91660_20120510_184146_inLine +BABEL_BP_107_91660_20120510_184146_outLine +BABEL_BP_107_91723_20120323_144335_outLine +BABEL_BP_107_91865_20120429_214728_inLine +BABEL_BP_107_91865_20120429_214728_outLine +BABEL_BP_107_91905_20120504_210602_inLine +BABEL_BP_107_91905_20120504_210602_outLine +BABEL_BP_107_91975_20120703_173220_inLine +BABEL_BP_107_91975_20120703_173220_outLine +BABEL_BP_107_91979_20120209_000610_inLine +BABEL_BP_107_92002_20120418_214926_outLine +BABEL_BP_107_92407_20120210_183713_inLine +BABEL_BP_107_92407_20120210_183713_outLine +BABEL_BP_107_92436_20120213_013131_inLine +BABEL_BP_107_92436_20120213_013131_outLine +BABEL_BP_107_92591_20120505_140206_outLine +BABEL_BP_107_92602_20120216_214746_inLine +BABEL_BP_107_92602_20120216_215738_inLine +BABEL_BP_107_92603_20120416_011244_inLine +BABEL_BP_107_92603_20120416_011244_outLine +BABEL_BP_107_92628_20120323_014512_inLine +BABEL_BP_107_92628_20120323_014512_outLine +BABEL_BP_107_92643_20120608_122156_inLine +BABEL_BP_107_92643_20120608_123106_inLine +BABEL_BP_107_92735_20120413_181602_inLine +BABEL_BP_107_92789_20120416_165856_inLine +BABEL_BP_107_92800_20120412_013211_outLine +BABEL_BP_107_93044_20120607_140719_inLine +BABEL_BP_107_93044_20120607_140719_outLine +BABEL_BP_107_93509_20120321_230219_inLine +BABEL_BP_107_93509_20120321_230219_outLine +BABEL_BP_107_93804_20120703_232729_inLine +BABEL_BP_107_93804_20120703_233401_inLine +BABEL_BP_107_93974_20120627_184419_inLine +BABEL_BP_107_93974_20120627_184419_outLine +BABEL_BP_107_93979_20120422_134735_inLine +BABEL_BP_107_93979_20120422_134735_outLine +BABEL_BP_107_94149_20120405_220033_outLine +BABEL_BP_107_94162_20120425_235433_inLine +BABEL_BP_107_94223_20120215_204525_inLine +BABEL_BP_107_94514_20120417_001615_inLine +BABEL_BP_107_94514_20120417_001615_outLine +BABEL_BP_107_94514_20120417_003504_inLine +BABEL_BP_107_94514_20120417_003504_outLine +BABEL_BP_107_94541_20120705_024032_outLine +BABEL_BP_107_94542_20120512_223011_inLine +BABEL_BP_107_94542_20120512_223011_outLine +BABEL_BP_107_94694_20120508_120203_inLine +BABEL_BP_107_94694_20120508_120203_outLine +BABEL_BP_107_94696_20120608_185951_inLine +BABEL_BP_107_94696_20120608_185951_outLine +BABEL_BP_107_94814_20120501_130313_inLine +BABEL_BP_107_94814_20120501_130313_outLine +BABEL_BP_107_94989_20120627_120236_outLine +BABEL_BP_107_95121_20120628_123304_inLine +BABEL_BP_107_95423_20120415_201523_inLine +BABEL_BP_107_95423_20120415_201523_outLine +BABEL_BP_107_95533_20120505_005928_inLine +BABEL_BP_107_95533_20120505_005928_outLine +BABEL_BP_107_95542_20120502_223446_inLine +BABEL_BP_107_95542_20120502_223446_outLine +BABEL_BP_107_95566_20120505_162738_inLine +BABEL_BP_107_95572_20120406_151856_inLine +BABEL_BP_107_95572_20120406_151856_outLine +BABEL_BP_107_95589_20120419_162645_inLine +BABEL_BP_107_95589_20120419_162645_outLine +BABEL_BP_107_95815_20120322_160344_inLine +BABEL_BP_107_95815_20120322_160344_outLine +BABEL_BP_107_95996_20120324_230119_inLine +BABEL_BP_107_96302_20120510_023815_inLine +BABEL_BP_107_96302_20120510_023815_outLine +BABEL_BP_107_96322_20120218_202407_inLine +BABEL_BP_107_96322_20120218_202407_outLine +BABEL_BP_107_96667_20120426_182837_inLine +BABEL_BP_107_96667_20120426_182837_outLine +BABEL_BP_107_96959_20120505_014233_inLine +BABEL_BP_107_96959_20120505_014233_outLine +BABEL_BP_107_97260_20120324_012659_outLine +BABEL_BP_107_97318_20120608_183537_inLine +BABEL_BP_107_97318_20120608_183537_outLine +BABEL_BP_107_97629_20120420_202833_inLine +BABEL_BP_107_97629_20120420_202833_outLine +BABEL_BP_107_97946_20120411_213631_outLine +BABEL_BP_107_98086_20120609_185014_inLine +BABEL_BP_107_98086_20120609_185014_outLine +BABEL_BP_107_98099_20120618_120506_outLine +BABEL_BP_107_98219_20120512_202308_inLine +BABEL_BP_107_98219_20120512_202308_outLine +BABEL_BP_107_98219_20120512_203451_inLine +BABEL_BP_107_98219_20120512_203451_outLine +BABEL_BP_107_98402_20120421_162435_inLine +BABEL_BP_107_98402_20120421_162435_outLine +BABEL_BP_107_98640_20120425_213908_outLine +BABEL_BP_107_98675_20120419_225133_inLine +BABEL_BP_107_98675_20120419_225133_outLine +BABEL_BP_107_99414_20120430_200633_inLine +BABEL_BP_107_99414_20120430_200633_outLine +BABEL_BP_107_99567_20120405_154443_outLine +BABEL_BP_107_99571_20120322_165034_inLine +BABEL_BP_107_99571_20120322_165034_outLine +BABEL_BP_107_99694_20120322_165823_inLine +BABEL_BP_107_99694_20120322_165823_outLine +BABEL_BP_107_99731_20120618_005616_outLine +BABEL_BP_107_99764_20120415_202745_inLine +BABEL_BP_107_99823_20120511_002213_inLine +BABEL_BP_107_99823_20120511_002213_outLine +BABEL_BP_107_99929_20120612_143030_inLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.list b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.list new file mode 100644 index 00000000000..a47debb4917 --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.list @@ -0,0 +1,126 @@ +BABEL_BP_107_12643_20120704_185225_inLine +BABEL_BP_107_12643_20120704_185225_outLine +BABEL_BP_107_13065_20120425_034939_inLine +BABEL_BP_107_13065_20120425_034939_outLine +BABEL_BP_107_13389_20120406_141036_inLine +BABEL_BP_107_13389_20120406_141036_outLine +BABEL_BP_107_14468_20120321_003916_inLine +BABEL_BP_107_14468_20120321_003916_outLine +BABEL_BP_107_14475_20120704_204813_inLine +BABEL_BP_107_14475_20120704_204813_outLine +BABEL_BP_107_14891_20120118_195012_inLine +BABEL_BP_107_14891_20120118_195012_outLine +BABEL_BP_107_17933_20120421_134916_inLine +BABEL_BP_107_17933_20120421_134916_outLine +BABEL_BP_107_19479_20120407_014459_inLine +BABEL_BP_107_19479_20120407_014459_outLine +BABEL_BP_107_21477_20120323_185255_inLine +BABEL_BP_107_21477_20120323_185255_outLine +BABEL_BP_107_21518_20120501_152038_inLine +BABEL_BP_107_21518_20120501_152038_outLine +BABEL_BP_107_22010_20120608_182138_inLine +BABEL_BP_107_22010_20120608_182138_outLine +BABEL_BP_107_22272_20120511_232328_inLine +BABEL_BP_107_22272_20120511_232328_outLine +BABEL_BP_107_22979_20120505_000039_inLine +BABEL_BP_107_22979_20120505_000039_outLine +BABEL_BP_107_23629_20120501_173549_inLine +BABEL_BP_107_23629_20120501_173549_outLine +BABEL_BP_107_27724_20120407_130547_inLine +BABEL_BP_107_27724_20120407_130547_outLine +BABEL_BP_107_31980_20120212_174027_inLine +BABEL_BP_107_31980_20120212_174027_outLine +BABEL_BP_107_34590_20120323_134554_inLine +BABEL_BP_107_34590_20120323_134554_outLine +BABEL_BP_107_36722_20120627_122821_inLine +BABEL_BP_107_36722_20120627_122821_outLine +BABEL_BP_107_38912_20120414_160852_inLine +BABEL_BP_107_38912_20120414_160852_outLine +BABEL_BP_107_41170_20120201_205341_inLine +BABEL_BP_107_41170_20120201_205341_outLine +BABEL_BP_107_42309_20120608_215912_inLine +BABEL_BP_107_42309_20120608_215912_outLine +BABEL_BP_107_42651_20120211_192913_inLine +BABEL_BP_107_42651_20120211_192913_outLine +BABEL_BP_107_42910_20120212_154722_inLine +BABEL_BP_107_42910_20120212_154722_outLine +BABEL_BP_107_43306_20120409_184959_inLine +BABEL_BP_107_43306_20120409_184959_outLine +BABEL_BP_107_44369_20120504_024021_inLine +BABEL_BP_107_44369_20120504_024021_outLine +BABEL_BP_107_44403_20120322_214144_inLine +BABEL_BP_107_44403_20120322_214144_outLine +BABEL_BP_107_44756_20120426_155822_inLine +BABEL_BP_107_44756_20120426_155822_outLine +BABEL_BP_107_49186_20120704_180724_inLine +BABEL_BP_107_49186_20120704_180724_outLine +BABEL_BP_107_49552_20120614_140129_inLine +BABEL_BP_107_50267_20120421_135338_inLine +BABEL_BP_107_50267_20120421_135338_outLine +BABEL_BP_107_50883_20120328_013430_inLine +BABEL_BP_107_50883_20120328_013430_outLine +BABEL_BP_107_52219_20120417_113120_inLine +BABEL_BP_107_52219_20120417_113120_outLine +BABEL_BP_107_53181_20120211_163316_inLine +BABEL_BP_107_53181_20120211_163316_outLine +BABEL_BP_107_54199_20120607_200253_inLine +BABEL_BP_107_54199_20120607_202722_inLine +BABEL_BP_107_54199_20120607_202722_outLine +BABEL_BP_107_54621_20120421_132410_inLine +BABEL_BP_107_54621_20120421_132410_outLine +BABEL_BP_107_55777_20120421_234307_inLine +BABEL_BP_107_55777_20120421_234307_outLine +BABEL_BP_107_58357_20120507_125021_inLine +BABEL_BP_107_58357_20120507_125021_outLine +BABEL_BP_107_59175_20120212_225712_inLine +BABEL_BP_107_59175_20120212_225712_outLine +BABEL_BP_107_60677_20120415_145311_inLine +BABEL_BP_107_60677_20120415_145311_outLine +BABEL_BP_107_60677_20120415_150336_inLine +BABEL_BP_107_60677_20120415_150336_outLine +BABEL_BP_107_61073_20120322_193656_inLine +BABEL_BP_107_61073_20120322_193656_outLine +BABEL_BP_107_62923_20120322_163015_inLine +BABEL_BP_107_62923_20120322_163015_outLine +BABEL_BP_107_63711_20120212_183127_inLine +BABEL_BP_107_63711_20120212_183127_outLine +BABEL_BP_107_66346_20120703_161130_inLine +BABEL_BP_107_66346_20120703_161130_outLine +BABEL_BP_107_66419_20120505_205757_inLine +BABEL_BP_107_66419_20120505_205757_outLine +BABEL_BP_107_66903_20120210_183320_inLine +BABEL_BP_107_66903_20120210_183320_outLine +BABEL_BP_107_67304_20120523_201027_inLine +BABEL_BP_107_67304_20120523_201027_outLine +BABEL_BP_107_71778_20120427_132527_inLine +BABEL_BP_107_71778_20120427_132527_outLine +BABEL_BP_107_73452_20120504_170508_inLine +BABEL_BP_107_73452_20120504_170508_outLine +BABEL_BP_107_73752_20120610_174558_inLine +BABEL_BP_107_73911_20120215_175351_inLine +BABEL_BP_107_73911_20120215_175351_outLine +BABEL_BP_107_74234_20120328_020415_inLine +BABEL_BP_107_74234_20120328_020415_outLine +BABEL_BP_107_75680_20120704_175114_inLine +BABEL_BP_107_75680_20120704_175114_outLine +BABEL_BP_107_80786_20120212_204918_inLine +BABEL_BP_107_80786_20120212_204918_outLine +BABEL_BP_107_81096_20120418_221604_inLine +BABEL_BP_107_81096_20120418_221604_outLine +BABEL_BP_107_81771_20120615_224609_inLine +BABEL_BP_107_81771_20120615_224609_outLine +BABEL_BP_107_82947_20120426_103950_inLine +BABEL_BP_107_82947_20120426_103950_outLine +BABEL_BP_107_82947_20120509_202553_inLine +BABEL_BP_107_82947_20120509_202553_outLine +BABEL_BP_107_84397_20120608_080802_outLine +BABEL_BP_107_85617_20120415_171620_inLine +BABEL_BP_107_85617_20120415_171620_outLine +BABEL_BP_107_86801_20120429_211031_inLine +BABEL_BP_107_86801_20120429_211031_outLine +BABEL_BP_107_90559_20120608_184439_inLine +BABEL_BP_107_90559_20120608_184439_outLine +BABEL_BP_107_90975_20120428_231848_inLine +BABEL_BP_107_90975_20120428_231848_outLine +BABEL_BP_107_96322_20120218_202407_inLine +BABEL_BP_107_96322_20120218_202407_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..4379937a74f --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.untranscribed.list @@ -0,0 +1,916 @@ +BABEL_BP_107_10033_20120208_180820_outLine +BABEL_BP_107_10066_20120428_121544_inLine +BABEL_BP_107_10066_20120428_121544_outLine +BABEL_BP_107_10190_20120424_023348_inLine +BABEL_BP_107_10190_20120425_012249_inLine +BABEL_BP_107_10211_20120323_013915_inLine +BABEL_BP_107_10211_20120323_013915_outLine +BABEL_BP_107_10545_20120424_184701_inLine +BABEL_BP_107_10697_20120516_194235_inLine +BABEL_BP_107_10732_20120328_172421_inLine +BABEL_BP_107_10732_20120328_172422_outLine +BABEL_BP_107_10900_20120322_022523_inLine +BABEL_BP_107_10900_20120322_022524_outLine +BABEL_BP_107_10945_20120322_222039_inLine +BABEL_BP_107_10945_20120322_222039_outLine +BABEL_BP_107_10973_20120404_233129_inLine +BABEL_BP_107_10973_20120404_233129_outLine +BABEL_BP_107_10985_20120502_123725_inLine +BABEL_BP_107_10985_20120502_123725_outLine +BABEL_BP_107_11022_20120422_013455_inLine +BABEL_BP_107_11022_20120422_013455_outLine +BABEL_BP_107_11422_20120208_160559_inLine +BABEL_BP_107_11422_20120208_160559_outLine +BABEL_BP_107_11479_20120212_011029_inLine +BABEL_BP_107_11479_20120212_011029_outLine +BABEL_BP_107_11827_20120322_205100_inLine +BABEL_BP_107_11827_20120322_205100_outLine +BABEL_BP_107_11949_20120704_001817_inLine +BABEL_BP_107_11949_20120704_001817_outLine +BABEL_BP_107_11982_20120217_004340_inLine +BABEL_BP_107_12486_20120424_174759_inLine +BABEL_BP_107_12552_20120503_152109_inLine +BABEL_BP_107_12569_20120609_190056_inLine +BABEL_BP_107_12569_20120609_190056_outLine +BABEL_BP_107_12587_20120322_230456_inLine +BABEL_BP_107_12587_20120322_230457_outLine +BABEL_BP_107_12897_20120413_195042_inLine +BABEL_BP_107_12897_20120413_195042_outLine +BABEL_BP_107_12897_20120413_200727_inLine +BABEL_BP_107_12897_20120413_200727_outLine +BABEL_BP_107_13229_20120417_201028_inLine +BABEL_BP_107_13229_20120417_201028_outLine +BABEL_BP_107_13272_20120320_141107_outLine +BABEL_BP_107_13272_20120320_142506_outLine +BABEL_BP_107_13419_20120218_213925_inLine +BABEL_BP_107_13419_20120218_214753_inLine +BABEL_BP_107_13781_20120516_204849_inLine +BABEL_BP_107_13781_20120516_204849_outLine +BABEL_BP_107_13795_20120418_190613_inLine +BABEL_BP_107_13795_20120418_190613_outLine +BABEL_BP_107_14075_20120507_004435_inLine +BABEL_BP_107_14294_20120328_010858_inLine +BABEL_BP_107_14294_20120328_010858_outLine +BABEL_BP_107_14500_20120429_194225_outLine +BABEL_BP_107_14707_20120429_004741_inLine +BABEL_BP_107_14707_20120429_004741_outLine +BABEL_BP_107_14707_20120429_005954_inLine +BABEL_BP_107_14707_20120429_005954_outLine +BABEL_BP_107_14729_20120429_200418_outLine +BABEL_BP_107_14836_20120507_235040_outLine +BABEL_BP_107_14936_20120405_224830_inLine +BABEL_BP_107_14936_20120405_224830_outLine +BABEL_BP_107_15073_20120417_011547_outLine +BABEL_BP_107_15142_20120322_132735_outLine +BABEL_BP_107_15353_20120504_193952_inLine +BABEL_BP_107_15353_20120504_193952_outLine +BABEL_BP_107_15460_20120426_224823_inLine +BABEL_BP_107_15460_20120426_224823_outLine +BABEL_BP_107_15473_20120217_231342_inLine +BABEL_BP_107_15696_20120328_010156_outLine +BABEL_BP_107_15719_20120612_122632_inLine +BABEL_BP_107_15719_20120612_122632_outLine +BABEL_BP_107_15744_20120608_123258_inLine +BABEL_BP_107_15873_20120405_224524_inLine +BABEL_BP_107_15873_20120405_224524_outLine +BABEL_BP_107_15881_20120322_233839_inLine +BABEL_BP_107_15940_20120424_221327_inLine +BABEL_BP_107_16406_20120324_011714_inLine +BABEL_BP_107_16406_20120324_011714_outLine +BABEL_BP_107_16617_20120228_014302_inLine +BABEL_BP_107_16646_20120418_130946_outLine +BABEL_BP_107_16660_20120210_231224_outLine +BABEL_BP_107_16669_20120208_140603_inLine +BABEL_BP_107_16801_20120418_121951_inLine +BABEL_BP_107_16801_20120418_203644_inLine +BABEL_BP_107_16875_20120704_133550_inLine +BABEL_BP_107_16875_20120704_133550_outLine +BABEL_BP_107_16883_20120501_194424_inLine +BABEL_BP_107_16883_20120501_194424_outLine +BABEL_BP_107_16950_20120704_155322_inLine +BABEL_BP_107_16950_20120704_155322_outLine +BABEL_BP_107_17013_20120501_002142_inLine +BABEL_BP_107_17013_20120501_002142_outLine +BABEL_BP_107_17018_20120322_220450_inLine +BABEL_BP_107_17018_20120322_220450_outLine +BABEL_BP_107_17093_20120501_202548_outLine +BABEL_BP_107_17203_20120212_220043_outLine +BABEL_BP_107_17353_20120617_133436_inLine +BABEL_BP_107_17353_20120617_133436_outLine +BABEL_BP_107_18187_20120608_125102_outLine +BABEL_BP_107_18209_20120420_004725_inLine +BABEL_BP_107_18234_20120210_230712_inLine +BABEL_BP_107_18495_20120618_003601_outLine +BABEL_BP_107_18534_20120504_132522_inLine +BABEL_BP_107_18534_20120504_132522_outLine +BABEL_BP_107_18858_20120209_004527_outLine +BABEL_BP_107_19012_20120503_215037_inLine +BABEL_BP_107_19012_20120503_215037_outLine +BABEL_BP_107_19248_20120508_210026_inLine +BABEL_BP_107_19248_20120508_210027_outLine +BABEL_BP_107_19290_20120421_141409_inLine +BABEL_BP_107_19290_20120421_141409_outLine +BABEL_BP_107_19404_20120321_171020_inLine +BABEL_BP_107_19404_20120321_171020_outLine +BABEL_BP_107_19731_20120506_011629_inLine +BABEL_BP_107_19731_20120515_001656_inLine +BABEL_BP_107_19869_20120608_012542_outLine +BABEL_BP_107_20320_20120212_214655_inLine +BABEL_BP_107_20332_20120426_010134_inLine +BABEL_BP_107_20332_20120426_010837_inLine +BABEL_BP_107_20332_20120426_010134_outLine +BABEL_BP_107_20332_20120426_010837_outLine +BABEL_BP_107_20483_20120416_171740_outLine +BABEL_BP_107_20518_20120418_211112_inLine +BABEL_BP_107_20582_20120322_220747_inLine +BABEL_BP_107_20582_20120322_220747_outLine +BABEL_BP_107_20740_20120427_193225_inLine +BABEL_BP_107_20740_20120427_193757_inLine +BABEL_BP_107_20741_20120325_181245_outLine +BABEL_BP_107_20799_20120515_010136_inLine +BABEL_BP_107_20799_20120515_010136_outLine +BABEL_BP_107_21052_20120415_204922_inLine +BABEL_BP_107_21139_20120425_192642_outLine +BABEL_BP_107_21258_20120418_145725_inLine +BABEL_BP_107_21367_20120629_140326_outLine +BABEL_BP_107_21430_20120608_003600_outLine +BABEL_BP_107_21584_20120217_004017_inLine +BABEL_BP_107_21584_20120217_004017_outLine +BABEL_BP_107_21758_20120407_010928_inLine +BABEL_BP_107_21758_20120407_010928_outLine +BABEL_BP_107_21758_20120407_011555_inLine +BABEL_BP_107_21758_20120407_011555_outLine +BABEL_BP_107_21929_20120323_015539_inLine +BABEL_BP_107_21929_20120323_022750_inLine +BABEL_BP_107_21946_20120507_015056_inLine +BABEL_BP_107_21946_20120507_015056_outLine +BABEL_BP_107_22494_20120613_122322_outLine +BABEL_BP_107_22898_20120322_144401_inLine +BABEL_BP_107_22898_20120322_144401_outLine +BABEL_BP_107_22910_20120214_213815_inLine +BABEL_BP_107_22910_20120214_213815_outLine +BABEL_BP_107_23167_20120217_212610_inLine +BABEL_BP_107_23167_20120217_212610_outLine +BABEL_BP_107_23930_20120506_214145_inLine +BABEL_BP_107_24014_20120618_010729_inLine +BABEL_BP_107_24014_20120618_010729_outLine +BABEL_BP_107_24094_20120421_134318_outLine +BABEL_BP_107_24569_20120507_123854_outLine +BABEL_BP_107_24608_20120208_170106_outLine +BABEL_BP_107_24638_20120504_004348_outLine +BABEL_BP_107_24642_20120505_201543_inLine +BABEL_BP_107_24642_20120505_201543_outLine +BABEL_BP_107_24799_20120508_232153_outLine +BABEL_BP_107_24817_20120422_203514_inLine +BABEL_BP_107_24833_20120218_171649_outLine +BABEL_BP_107_25035_20120214_230841_inLine +BABEL_BP_107_25072_20120429_144535_inLine +BABEL_BP_107_25479_20120506_161146_inLine +BABEL_BP_107_25479_20120506_161146_outLine +BABEL_BP_107_25576_20120321_222905_outLine +BABEL_BP_107_25866_20120426_193335_inLine +BABEL_BP_107_26348_20120508_100651_inLine +BABEL_BP_107_26348_20120508_102042_inLine +BABEL_BP_107_26350_20120209_004945_inLine +BABEL_BP_107_26350_20120209_004945_outLine +BABEL_BP_107_26350_20120209_012139_inLine +BABEL_BP_107_26350_20120209_012139_outLine +BABEL_BP_107_26598_20120425_143602_outLine +BABEL_BP_107_26684_20120530_155756_inLine +BABEL_BP_107_26786_20120423_191945_inLine +BABEL_BP_107_26786_20120423_191945_outLine +BABEL_BP_107_27064_20120222_210044_inLine +BABEL_BP_107_27064_20120222_210044_outLine +BABEL_BP_107_27503_20120212_221915_inLine +BABEL_BP_107_27619_20120328_023110_outLine +BABEL_BP_107_27698_20120212_005737_inLine +BABEL_BP_107_27698_20120212_005737_outLine +BABEL_BP_107_27890_20120428_235422_inLine +BABEL_BP_107_27890_20120428_235422_outLine +BABEL_BP_107_27916_20120607_114245_outLine +BABEL_BP_107_27916_20120607_115650_outLine +BABEL_BP_107_28016_20120405_222219_inLine +BABEL_BP_107_28016_20120405_222219_outLine +BABEL_BP_107_28107_20120208_142843_outLine +BABEL_BP_107_28107_20120208_144923_outLine +BABEL_BP_107_28132_20120405_152728_outLine +BABEL_BP_107_28260_20120212_153106_inLine +BABEL_BP_107_28557_20120507_001619_outLine +BABEL_BP_107_28675_20120607_231549_inLine +BABEL_BP_107_28675_20120607_231549_outLine +BABEL_BP_107_28675_20120607_233243_inLine +BABEL_BP_107_28675_20120607_233243_outLine +BABEL_BP_107_28740_20120212_150039_inLine +BABEL_BP_107_28740_20120212_150039_outLine +BABEL_BP_107_29280_20120607_184929_outLine +BABEL_BP_107_29280_20120607_190345_outLine +BABEL_BP_107_29290_20120415_102435_inLine +BABEL_BP_107_29335_20120424_013042_inLine +BABEL_BP_107_29335_20120424_013042_outLine +BABEL_BP_107_29407_20120607_132315_inLine +BABEL_BP_107_29407_20120607_135318_inLine +BABEL_BP_107_29444_20120322_191236_outLine +BABEL_BP_107_29771_20120504_010738_outLine +BABEL_BP_107_29959_20120418_001028_inLine +BABEL_BP_107_29959_20120418_001028_outLine +BABEL_BP_107_29988_20120516_233700_inLine +BABEL_BP_107_30210_20120427_140255_inLine +BABEL_BP_107_30210_20120502_202749_inLine +BABEL_BP_107_30554_20120617_231216_outLine +BABEL_BP_107_30583_20120212_210712_inLine +BABEL_BP_107_30722_20120505_103655_inLine +BABEL_BP_107_30722_20120505_103655_outLine +BABEL_BP_107_31031_20120501_205733_inLine +BABEL_BP_107_31031_20120501_210746_inLine +BABEL_BP_107_31298_20120322_125112_outLine +BABEL_BP_107_31393_20120325_171905_inLine +BABEL_BP_107_31460_20120325_193921_inLine +BABEL_BP_107_31606_20120607_131428_inLine +BABEL_BP_107_31738_20120704_101130_outLine +BABEL_BP_107_31902_20120417_015618_inLine +BABEL_BP_107_31902_20120417_015618_outLine +BABEL_BP_107_31917_20120501_202910_inLine +BABEL_BP_107_31917_20120501_202910_outLine +BABEL_BP_107_32132_20120418_211743_inLine +BABEL_BP_107_32274_20120324_011402_inLine +BABEL_BP_107_32295_20120617_141025_inLine +BABEL_BP_107_32295_20120617_141025_outLine +BABEL_BP_107_32334_20120429_005403_inLine +BABEL_BP_107_32334_20120429_005403_outLine +BABEL_BP_107_32400_20120426_000137_inLine +BABEL_BP_107_32400_20120426_000137_outLine +BABEL_BP_107_32710_20120418_215432_inLine +BABEL_BP_107_32710_20120418_215432_outLine +BABEL_BP_107_33012_20120611_155055_inLine +BABEL_BP_107_33364_20120617_011853_inLine +BABEL_BP_107_33364_20120617_011853_outLine +BABEL_BP_107_33577_20120704_152608_outLine +BABEL_BP_107_33671_20120330_001033_inLine +BABEL_BP_107_33671_20120330_001033_outLine +BABEL_BP_107_33742_20120608_143147_inLine +BABEL_BP_107_33742_20120608_143147_outLine +BABEL_BP_107_33817_20120423_130850_inLine +BABEL_BP_107_33817_20120423_130850_outLine +BABEL_BP_107_33969_20120429_214721_outLine +BABEL_BP_107_34235_20120218_205136_outLine +BABEL_BP_107_34480_20120608_151830_inLine +BABEL_BP_107_34498_20120429_140537_inLine +BABEL_BP_107_34498_20120429_140537_outLine +BABEL_BP_107_34857_20120419_235853_inLine +BABEL_BP_107_34961_20120212_223315_inLine +BABEL_BP_107_34961_20120212_223315_outLine +BABEL_BP_107_34961_20120212_224207_inLine +BABEL_BP_107_34961_20120212_224207_outLine +BABEL_BP_107_35011_20120321_223128_inLine +BABEL_BP_107_35011_20120321_223128_outLine +BABEL_BP_107_35016_20120611_185645_outLine +BABEL_BP_107_35074_20120608_164703_outLine +BABEL_BP_107_35179_20120414_153233_inLine +BABEL_BP_107_35179_20120414_153233_outLine +BABEL_BP_107_35188_20120614_131427_inLine +BABEL_BP_107_35305_20120422_120043_outLine +BABEL_BP_107_35357_20120614_212245_inLine +BABEL_BP_107_35357_20120614_212245_outLine +BABEL_BP_107_36037_20120616_153023_outLine +BABEL_BP_107_36196_20120608_110319_inLine +BABEL_BP_107_36196_20120608_111049_inLine +BABEL_BP_107_36268_20120406_211711_inLine +BABEL_BP_107_36268_20120406_211711_outLine +BABEL_BP_107_36356_20120211_173247_inLine +BABEL_BP_107_36356_20120211_173247_outLine +BABEL_BP_107_36383_20120416_225701_outLine +BABEL_BP_107_36391_20120505_171824_inLine +BABEL_BP_107_36424_20120421_130549_inLine +BABEL_BP_107_36424_20120421_130549_outLine +BABEL_BP_107_36424_20120421_133610_inLine +BABEL_BP_107_36424_20120421_133610_outLine +BABEL_BP_107_36502_20120617_145859_inLine +BABEL_BP_107_36502_20120617_145859_outLine +BABEL_BP_107_36711_20120325_230112_inLine +BABEL_BP_107_36711_20120325_230112_outLine +BABEL_BP_107_37110_20120209_002706_inLine +BABEL_BP_107_37110_20120209_002706_outLine +BABEL_BP_107_37210_20120322_205536_outLine +BABEL_BP_107_37285_20120325_000245_inLine +BABEL_BP_107_37285_20120325_000245_outLine +BABEL_BP_107_37335_20120616_150016_inLine +BABEL_BP_107_37335_20120616_150016_outLine +BABEL_BP_107_37374_20120418_185819_inLine +BABEL_BP_107_37940_20120424_004619_inLine +BABEL_BP_107_37940_20120424_004619_outLine +BABEL_BP_107_38464_20120422_105536_outLine +BABEL_BP_107_38592_20120704_150926_outLine +BABEL_BP_107_38640_20120215_030154_inLine +BABEL_BP_107_38640_20120215_030154_outLine +BABEL_BP_107_38698_20120322_213531_inLine +BABEL_BP_107_38698_20120322_213531_outLine +BABEL_BP_107_38879_20120406_150304_inLine +BABEL_BP_107_38879_20120406_150304_outLine +BABEL_BP_107_39246_20120613_202128_inLine +BABEL_BP_107_39246_20120613_202128_outLine +BABEL_BP_107_39264_20120417_191639_inLine +BABEL_BP_107_39264_20120417_191639_outLine +BABEL_BP_107_39296_20120705_025906_inLine +BABEL_BP_107_39384_20120324_010939_inLine +BABEL_BP_107_39384_20120324_010939_outLine +BABEL_BP_107_39384_20120324_011832_inLine +BABEL_BP_107_39384_20120324_011832_outLine +BABEL_BP_107_39430_20120325_015935_inLine +BABEL_BP_107_39430_20120325_015935_outLine +BABEL_BP_107_40002_20120502_174229_outLine +BABEL_BP_107_40123_20120505_191426_inLine +BABEL_BP_107_40123_20120505_191426_outLine +BABEL_BP_107_40385_20120704_143210_outLine +BABEL_BP_107_40477_20120323_194919_outLine +BABEL_BP_107_40510_20120426_153808_inLine +BABEL_BP_107_40510_20120426_153808_outLine +BABEL_BP_107_40980_20120416_233130_inLine +BABEL_BP_107_40980_20120416_233130_outLine +BABEL_BP_107_40980_20120417_001128_inLine +BABEL_BP_107_40980_20120417_001128_outLine +BABEL_BP_107_41146_20120211_162158_inLine +BABEL_BP_107_41590_20120610_162218_outLine +BABEL_BP_107_41797_20120420_003902_inLine +BABEL_BP_107_41797_20120420_003902_outLine +BABEL_BP_107_42145_20120418_131525_inLine +BABEL_BP_107_42266_20120407_182544_outLine +BABEL_BP_107_43017_20120322_170152_inLine +BABEL_BP_107_43017_20120322_170152_outLine +BABEL_BP_107_43423_20120504_001214_inLine +BABEL_BP_107_43423_20120504_010312_inLine +BABEL_BP_107_43426_20120426_183951_inLine +BABEL_BP_107_43426_20120426_183951_outLine +BABEL_BP_107_43587_20120506_182330_inLine +BABEL_BP_107_43652_20120416_175011_inLine +BABEL_BP_107_43652_20120418_093619_inLine +BABEL_BP_107_44129_20120512_023836_inLine +BABEL_BP_107_44129_20120512_023836_outLine +BABEL_BP_107_44829_20120404_224815_outLine +BABEL_BP_107_44836_20120417_003600_outLine +BABEL_BP_107_44943_20120506_191737_inLine +BABEL_BP_107_45227_20120210_223857_inLine +BABEL_BP_107_45511_20120212_170655_inLine +BABEL_BP_107_45511_20120212_170655_outLine +BABEL_BP_107_45570_20120509_151829_inLine +BABEL_BP_107_45570_20120509_151829_outLine +BABEL_BP_107_45793_20120211_040134_inLine +BABEL_BP_107_45793_20120211_040134_outLine +BABEL_BP_107_45929_20120418_215417_outLine +BABEL_BP_107_45931_20120322_143234_inLine +BABEL_BP_107_45931_20120322_143234_outLine +BABEL_BP_107_46243_20120210_233353_inLine +BABEL_BP_107_46243_20120210_233353_outLine +BABEL_BP_107_46332_20120418_002934_inLine +BABEL_BP_107_46332_20120418_002934_outLine +BABEL_BP_107_46603_20120421_113906_inLine +BABEL_BP_107_46756_20120429_195314_outLine +BABEL_BP_107_46977_20120426_015005_inLine +BABEL_BP_107_47263_20120422_150216_inLine +BABEL_BP_107_47433_20120210_185410_outLine +BABEL_BP_107_47618_20120502_004413_inLine +BABEL_BP_107_47618_20120502_004413_outLine +BABEL_BP_107_47661_20120216_224419_inLine +BABEL_BP_107_47661_20120216_224419_outLine +BABEL_BP_107_47794_20120514_175438_inLine +BABEL_BP_107_47794_20120514_175438_outLine +BABEL_BP_107_47823_20120516_204140_inLine +BABEL_BP_107_47845_20120613_004732_outLine +BABEL_BP_107_47906_20120415_224420_inLine +BABEL_BP_107_47906_20120415_224420_outLine +BABEL_BP_107_48188_20120422_150955_inLine +BABEL_BP_107_48188_20120422_150955_outLine +BABEL_BP_107_48418_20120421_163333_inLine +BABEL_BP_107_48511_20120322_145729_inLine +BABEL_BP_107_48511_20120322_145729_outLine +BABEL_BP_107_48559_20120502_201955_inLine +BABEL_BP_107_48559_20120502_201955_outLine +BABEL_BP_107_48607_20120607_215116_outLine +BABEL_BP_107_48733_20120418_142426_inLine +BABEL_BP_107_48733_20120418_142426_outLine +BABEL_BP_107_48753_20120426_134417_inLine +BABEL_BP_107_48753_20120426_134417_outLine +BABEL_BP_107_48812_20120323_162517_inLine +BABEL_BP_107_48812_20120324_182527_inLine +BABEL_BP_107_48976_20120220_152013_inLine +BABEL_BP_107_48976_20120220_152013_outLine +BABEL_BP_107_49192_20120421_190503_outLine +BABEL_BP_107_49239_20120429_144119_inLine +BABEL_BP_107_49346_20120611_192752_outLine +BABEL_BP_107_49351_20120614_132223_inLine +BABEL_BP_107_49351_20120614_132223_outLine +BABEL_BP_107_49371_20120608_002052_inLine +BABEL_BP_107_49541_20120325_223621_inLine +BABEL_BP_107_49541_20120325_223621_outLine +BABEL_BP_107_49689_20120415_163537_inLine +BABEL_BP_107_49689_20120415_163537_outLine +BABEL_BP_107_49714_20120509_113627_outLine +BABEL_BP_107_49773_20120211_151308_inLine +BABEL_BP_107_49773_20120211_151308_outLine +BABEL_BP_107_50028_20120704_192522_inLine +BABEL_BP_107_50028_20120704_192522_outLine +BABEL_BP_107_50141_20120505_233033_inLine +BABEL_BP_107_50141_20120505_233033_outLine +BABEL_BP_107_50201_20120216_001139_inLine +BABEL_BP_107_50201_20120216_001139_outLine +BABEL_BP_107_50298_20120507_152508_outLine +BABEL_BP_107_50409_20120608_205803_inLine +BABEL_BP_107_50468_20120420_114108_inLine +BABEL_BP_107_50468_20120420_114108_outLine +BABEL_BP_107_50468_20120420_115203_inLine +BABEL_BP_107_50468_20120420_115203_outLine +BABEL_BP_107_50476_20120430_225248_inLine +BABEL_BP_107_50476_20120430_225248_outLine +BABEL_BP_107_50718_20120321_125943_inLine +BABEL_BP_107_50752_20120421_202932_inLine +BABEL_BP_107_50752_20120421_202932_outLine +BABEL_BP_107_51052_20120424_004427_inLine +BABEL_BP_107_51052_20120424_004427_outLine +BABEL_BP_107_51073_20120216_010300_outLine +BABEL_BP_107_51117_20120211_034844_inLine +BABEL_BP_107_51117_20120211_034844_outLine +BABEL_BP_107_51136_20120405_142910_inLine +BABEL_BP_107_51136_20120405_142910_outLine +BABEL_BP_107_51446_20120417_221307_inLine +BABEL_BP_107_51446_20120417_221307_outLine +BABEL_BP_107_51448_20120608_170641_inLine +BABEL_BP_107_51448_20120608_171219_inLine +BABEL_BP_107_51663_20120506_160921_inLine +BABEL_BP_107_51727_20120424_225602_inLine +BABEL_BP_107_51727_20120424_225602_outLine +BABEL_BP_107_52154_20120503_203816_inLine +BABEL_BP_107_52807_20120608_171526_inLine +BABEL_BP_107_52807_20120608_171526_outLine +BABEL_BP_107_52902_20120421_150627_outLine +BABEL_BP_107_53463_20120421_150635_inLine +BABEL_BP_107_53463_20120421_150635_outLine +BABEL_BP_107_53463_20120421_152028_inLine +BABEL_BP_107_53463_20120421_152028_outLine +BABEL_BP_107_53649_20120611_193416_outLine +BABEL_BP_107_53653_20120607_150151_outLine +BABEL_BP_107_53703_20120502_153540_outLine +BABEL_BP_107_53824_20120503_223532_inLine +BABEL_BP_107_53824_20120503_223532_outLine +BABEL_BP_107_53824_20120503_225007_inLine +BABEL_BP_107_53824_20120503_225007_outLine +BABEL_BP_107_53982_20120509_013004_outLine +BABEL_BP_107_53994_20120501_161638_outLine +BABEL_BP_107_54241_20120324_013254_inLine +BABEL_BP_107_54241_20120324_013254_outLine +BABEL_BP_107_54332_20120608_182424_inLine +BABEL_BP_107_54332_20120608_183219_inLine +BABEL_BP_107_54518_20120608_120238_inLine +BABEL_BP_107_54785_20120602_195720_inLine +BABEL_BP_107_54787_20120405_202915_inLine +BABEL_BP_107_54787_20120405_202915_outLine +BABEL_BP_107_55182_20120209_015206_inLine +BABEL_BP_107_55355_20120608_155709_inLine +BABEL_BP_107_55355_20120612_142521_inLine +BABEL_BP_107_55396_20120321_141254_outLine +BABEL_BP_107_55470_20120421_134215_outLine +BABEL_BP_107_55874_20120504_184342_inLine +BABEL_BP_107_55874_20120504_184343_outLine +BABEL_BP_107_56039_20120516_215649_inLine +BABEL_BP_107_56039_20120516_215649_outLine +BABEL_BP_107_56070_20120220_174719_inLine +BABEL_BP_107_57148_20120217_014955_inLine +BABEL_BP_107_57148_20120217_014955_outLine +BABEL_BP_107_57148_20120217_024257_inLine +BABEL_BP_107_57148_20120217_024257_outLine +BABEL_BP_107_57422_20120508_014547_inLine +BABEL_BP_107_57422_20120508_014547_outLine +BABEL_BP_107_57457_20120617_193611_inLine +BABEL_BP_107_57457_20120617_193611_outLine +BABEL_BP_107_57619_20120505_151800_inLine +BABEL_BP_107_58108_20120509_141003_inLine +BABEL_BP_107_58108_20120509_141003_outLine +BABEL_BP_107_58137_20120421_185042_inLine +BABEL_BP_107_58137_20120421_185042_outLine +BABEL_BP_107_58190_20120506_195510_outLine +BABEL_BP_107_58232_20120501_122112_inLine +BABEL_BP_107_58232_20120501_122112_outLine +BABEL_BP_107_58536_20120501_013825_inLine +BABEL_BP_107_58536_20120501_013825_outLine +BABEL_BP_107_58746_20120614_181729_inLine +BABEL_BP_107_58746_20120614_181729_outLine +BABEL_BP_107_58863_20120218_011117_inLine +BABEL_BP_107_58863_20120218_011117_outLine +BABEL_BP_107_58863_20120218_012806_inLine +BABEL_BP_107_58863_20120218_012806_outLine +BABEL_BP_107_59071_20120423_184821_inLine +BABEL_BP_107_59383_20120502_205353_inLine +BABEL_BP_107_59383_20120502_205353_outLine +BABEL_BP_107_59628_20120428_215033_inLine +BABEL_BP_107_59764_20120524_205913_inLine +BABEL_BP_107_59924_20120417_194534_inLine +BABEL_BP_107_59924_20120417_194534_outLine +BABEL_BP_107_59961_20120218_211136_inLine +BABEL_BP_107_60106_20120211_003229_inLine +BABEL_BP_107_60106_20120211_003229_outLine +BABEL_BP_107_60183_20120428_164103_inLine +BABEL_BP_107_60183_20120428_164103_outLine +BABEL_BP_107_60193_20120328_014042_inLine +BABEL_BP_107_60238_20120506_132025_outLine +BABEL_BP_107_60338_20120505_131543_inLine +BABEL_BP_107_60338_20120505_131543_outLine +BABEL_BP_107_60605_20120506_215948_inLine +BABEL_BP_107_60826_20120424_235431_inLine +BABEL_BP_107_60826_20120424_235432_outLine +BABEL_BP_107_60842_20120617_190839_inLine +BABEL_BP_107_60842_20120617_190839_outLine +BABEL_BP_107_61408_20120628_141349_outLine +BABEL_BP_107_61449_20120421_232700_inLine +BABEL_BP_107_61449_20120421_232700_outLine +BABEL_BP_107_61906_20120414_201744_inLine +BABEL_BP_107_61906_20120414_201744_outLine +BABEL_BP_107_62132_20120506_160034_inLine +BABEL_BP_107_62160_20120323_180702_outLine +BABEL_BP_107_62163_20120628_180945_inLine +BABEL_BP_107_62163_20120628_182002_inLine +BABEL_BP_107_62177_20120323_001326_inLine +BABEL_BP_107_62255_20120506_204123_inLine +BABEL_BP_107_62255_20120506_204123_outLine +BABEL_BP_107_62277_20120504_173047_inLine +BABEL_BP_107_62696_20120508_135942_outLine +BABEL_BP_107_62696_20120509_100233_outLine +BABEL_BP_107_62993_20120608_130210_inLine +BABEL_BP_107_62993_20120608_130210_outLine +BABEL_BP_107_63076_20120704_011318_inLine +BABEL_BP_107_63116_20120419_163443_inLine +BABEL_BP_107_63233_20120323_003312_inLine +BABEL_BP_107_63352_20120421_222544_inLine +BABEL_BP_107_63368_20120418_215232_inLine +BABEL_BP_107_63368_20120418_215232_outLine +BABEL_BP_107_63368_20120418_220224_inLine +BABEL_BP_107_63368_20120418_220224_outLine +BABEL_BP_107_63368_20120418_222134_inLine +BABEL_BP_107_63368_20120418_222134_outLine +BABEL_BP_107_63369_20120614_191919_inLine +BABEL_BP_107_64205_20120428_020155_inLine +BABEL_BP_107_64351_20120513_193703_outLine +BABEL_BP_107_64724_20120503_155446_inLine +BABEL_BP_107_64724_20120503_155446_outLine +BABEL_BP_107_64889_20120503_174229_inLine +BABEL_BP_107_64889_20120503_174229_outLine +BABEL_BP_107_65414_20120608_131726_inLine +BABEL_BP_107_65743_20120404_191932_inLine +BABEL_BP_107_65743_20120404_191932_outLine +BABEL_BP_107_65989_20120419_141422_inLine +BABEL_BP_107_66451_20120214_215503_inLine +BABEL_BP_107_66451_20120214_215503_outLine +BABEL_BP_107_66499_20120610_220818_inLine +BABEL_BP_107_66559_20120421_185343_inLine +BABEL_BP_107_66709_20120617_152656_outLine +BABEL_BP_107_66709_20120617_153822_outLine +BABEL_BP_107_66790_20120421_182115_inLine +BABEL_BP_107_66839_20120613_192022_inLine +BABEL_BP_107_66839_20120613_192022_outLine +BABEL_BP_107_66866_20120418_001946_inLine +BABEL_BP_107_66866_20120418_001946_outLine +BABEL_BP_107_66964_20120419_205513_inLine +BABEL_BP_107_66964_20120419_205513_outLine +BABEL_BP_107_67555_20120323_130439_outLine +BABEL_BP_107_67628_20120418_215117_inLine +BABEL_BP_107_67798_20120627_141236_inLine +BABEL_BP_107_68009_20120608_112155_inLine +BABEL_BP_107_68129_20120611_013309_outLine +BABEL_BP_107_68191_20120428_114953_outLine +BABEL_BP_107_68295_20120506_210459_outLine +BABEL_BP_107_68362_20120503_194813_outLine +BABEL_BP_107_68545_20120421_220606_inLine +BABEL_BP_107_68545_20120421_220606_outLine +BABEL_BP_107_68671_20120608_205710_inLine +BABEL_BP_107_68671_20120608_205710_outLine +BABEL_BP_107_68767_20120214_214534_inLine +BABEL_BP_107_68767_20120214_214534_outLine +BABEL_BP_107_69028_20120430_132441_inLine +BABEL_BP_107_69049_20120322_215956_inLine +BABEL_BP_107_69137_20120424_183202_inLine +BABEL_BP_107_69137_20120424_183202_outLine +BABEL_BP_107_69275_20120608_210354_inLine +BABEL_BP_107_69295_20120501_154139_inLine +BABEL_BP_107_70000_20120618_004254_inLine +BABEL_BP_107_70000_20120618_004254_outLine +BABEL_BP_107_70077_20120428_170417_inLine +BABEL_BP_107_70120_20120418_213104_inLine +BABEL_BP_107_70120_20120418_213104_outLine +BABEL_BP_107_70285_20120212_214056_inLine +BABEL_BP_107_70323_20120617_122402_outLine +BABEL_BP_107_70441_20120704_163546_inLine +BABEL_BP_107_70441_20120704_163546_outLine +BABEL_BP_107_70511_20120618_124928_outLine +BABEL_BP_107_70615_20120208_233912_inLine +BABEL_BP_107_70615_20120208_233912_outLine +BABEL_BP_107_70975_20120407_011601_inLine +BABEL_BP_107_70975_20120407_011601_outLine +BABEL_BP_107_71176_20120418_195323_inLine +BABEL_BP_107_71176_20120418_195323_outLine +BABEL_BP_107_71739_20120430_125259_inLine +BABEL_BP_107_71741_20120211_231000_inLine +BABEL_BP_107_71741_20120211_231000_outLine +BABEL_BP_107_71844_20120212_180004_inLine +BABEL_BP_107_71927_20120516_204724_inLine +BABEL_BP_107_72269_20120416_010327_inLine +BABEL_BP_107_72269_20120416_010327_outLine +BABEL_BP_107_72297_20120608_185443_inLine +BABEL_BP_107_72297_20120608_185443_outLine +BABEL_BP_107_72297_20120608_190156_inLine +BABEL_BP_107_72297_20120608_190156_outLine +BABEL_BP_107_72647_20120614_125725_inLine +BABEL_BP_107_72718_20120505_025006_inLine +BABEL_BP_107_72718_20120505_025006_outLine +BABEL_BP_107_72879_20120403_141911_inLine +BABEL_BP_107_72879_20120403_141911_outLine +BABEL_BP_107_73205_20120211_191427_outLine +BABEL_BP_107_73438_20120502_201055_inLine +BABEL_BP_107_73438_20120502_201055_outLine +BABEL_BP_107_73440_20120416_172035_inLine +BABEL_BP_107_73761_20120424_154013_inLine +BABEL_BP_107_73761_20120424_154013_outLine +BABEL_BP_107_73921_20120501_000425_outLine +BABEL_BP_107_74043_20120323_014301_outLine +BABEL_BP_107_74317_20120502_225211_inLine +BABEL_BP_107_74317_20120502_225211_outLine +BABEL_BP_107_74395_20120418_140703_inLine +BABEL_BP_107_74395_20120418_140703_outLine +BABEL_BP_107_74404_20120212_134850_outLine +BABEL_BP_107_74625_20120425_234344_inLine +BABEL_BP_107_74700_20120610_233419_inLine +BABEL_BP_107_74823_20120217_022832_inLine +BABEL_BP_107_74823_20120217_022832_outLine +BABEL_BP_107_74935_20120616_144642_inLine +BABEL_BP_107_74974_20120617_143904_inLine +BABEL_BP_107_74974_20120617_143904_outLine +BABEL_BP_107_74986_20120416_011008_inLine +BABEL_BP_107_74986_20120416_011008_outLine +BABEL_BP_107_74986_20120416_011927_inLine +BABEL_BP_107_74986_20120416_011927_outLine +BABEL_BP_107_75036_20120325_233130_inLine +BABEL_BP_107_75036_20120325_233130_outLine +BABEL_BP_107_75333_20120505_200116_inLine +BABEL_BP_107_75333_20120505_200116_outLine +BABEL_BP_107_75498_20120506_171232_inLine +BABEL_BP_107_75498_20120506_171232_outLine +BABEL_BP_107_75799_20120429_140233_inLine +BABEL_BP_107_75799_20120429_140233_outLine +BABEL_BP_107_75815_20120217_141539_inLine +BABEL_BP_107_75815_20120217_141539_outLine +BABEL_BP_107_76252_20120705_003603_outLine +BABEL_BP_107_76341_20120215_201638_inLine +BABEL_BP_107_76341_20120215_201638_outLine +BABEL_BP_107_76661_20120405_132625_inLine +BABEL_BP_107_76691_20120501_002016_inLine +BABEL_BP_107_76716_20120418_215649_outLine +BABEL_BP_107_76733_20120424_181359_inLine +BABEL_BP_107_76733_20120424_181359_outLine +BABEL_BP_107_76733_20120424_183605_inLine +BABEL_BP_107_76733_20120424_183605_outLine +BABEL_BP_107_76748_20120504_181420_inLine +BABEL_BP_107_76919_20120507_010805_outLine +BABEL_BP_107_76925_20120407_015139_inLine +BABEL_BP_107_76944_20120505_000745_inLine +BABEL_BP_107_76944_20120505_000745_outLine +BABEL_BP_107_76993_20120501_125118_inLine +BABEL_BP_107_76993_20120501_125118_outLine +BABEL_BP_107_77238_20120322_211133_outLine +BABEL_BP_107_77244_20120429_164842_inLine +BABEL_BP_107_77244_20120429_164842_outLine +BABEL_BP_107_77315_20120527_222821_outLine +BABEL_BP_107_77338_20120617_171454_inLine +BABEL_BP_107_77338_20120617_171454_outLine +BABEL_BP_107_77473_20120610_000112_inLine +BABEL_BP_107_77886_20120326_191938_inLine +BABEL_BP_107_77886_20120326_191938_outLine +BABEL_BP_107_78094_20120212_205141_inLine +BABEL_BP_107_78094_20120212_205141_outLine +BABEL_BP_107_78487_20120430_133108_inLine +BABEL_BP_107_78487_20120430_133108_outLine +BABEL_BP_107_78514_20120617_131155_outLine +BABEL_BP_107_79284_20120511_180310_inLine +BABEL_BP_107_79284_20120511_180310_outLine +BABEL_BP_107_79495_20120222_195716_inLine +BABEL_BP_107_79619_20120420_115502_inLine +BABEL_BP_107_79619_20120420_115502_outLine +BABEL_BP_107_79632_20120428_182831_inLine +BABEL_BP_107_79632_20120428_182831_outLine +BABEL_BP_107_79860_20120328_023545_inLine +BABEL_BP_107_79944_20120424_213833_inLine +BABEL_BP_107_79970_20120418_214316_inLine +BABEL_BP_107_80008_20120218_225347_inLine +BABEL_BP_107_80008_20120218_225347_outLine +BABEL_BP_107_80282_20120627_190514_inLine +BABEL_BP_107_80282_20120627_190935_inLine +BABEL_BP_107_80290_20120501_134226_inLine +BABEL_BP_107_80290_20120501_134226_outLine +BABEL_BP_107_80337_20120608_000801_inLine +BABEL_BP_107_80337_20120608_000801_outLine +BABEL_BP_107_80638_20120501_223037_inLine +BABEL_BP_107_80638_20120501_223037_outLine +BABEL_BP_107_81056_20120502_155358_inLine +BABEL_BP_107_81056_20120502_155358_outLine +BABEL_BP_107_81321_20120329_030424_outLine +BABEL_BP_107_81486_20120213_035232_inLine +BABEL_BP_107_81486_20120213_040319_inLine +BABEL_BP_107_81535_20120421_151505_inLine +BABEL_BP_107_81535_20120421_151505_outLine +BABEL_BP_107_81611_20120616_154507_outLine +BABEL_BP_107_81717_20120426_185608_inLine +BABEL_BP_107_82006_20120417_133143_outLine +BABEL_BP_107_82025_20120325_012956_inLine +BABEL_BP_107_82103_20120326_172335_inLine +BABEL_BP_107_82103_20120326_172335_outLine +BABEL_BP_107_82131_20120704_135728_inLine +BABEL_BP_107_82131_20120704_211005_inLine +BABEL_BP_107_82591_20120407_185008_outLine +BABEL_BP_107_82599_20120608_140933_outLine +BABEL_BP_107_82766_20120627_112435_outLine +BABEL_BP_107_82880_20120705_001819_inLine +BABEL_BP_107_82880_20120705_001819_outLine +BABEL_BP_107_83017_20120608_125136_inLine +BABEL_BP_107_83053_20120426_184045_inLine +BABEL_BP_107_83256_20120212_162557_outLine +BABEL_BP_107_83360_20120418_000230_inLine +BABEL_BP_107_83360_20120418_000230_outLine +BABEL_BP_107_83529_20120608_152238_outLine +BABEL_BP_107_83700_20120427_121525_inLine +BABEL_BP_107_83700_20120427_121525_outLine +BABEL_BP_107_83702_20120418_010601_inLine +BABEL_BP_107_83702_20120418_010601_outLine +BABEL_BP_107_83982_20120704_125429_outLine +BABEL_BP_107_83982_20120704_125430_inLine +BABEL_BP_107_83982_20120704_131324_inLine +BABEL_BP_107_83982_20120704_131324_outLine +BABEL_BP_107_84171_20120504_185725_inLine +BABEL_BP_107_84335_20120418_002843_inLine +BABEL_BP_107_84532_20120703_171302_inLine +BABEL_BP_107_84540_20120328_205952_outLine +BABEL_BP_107_84543_20120503_005623_inLine +BABEL_BP_107_84543_20120503_005623_outLine +BABEL_BP_107_84943_20120405_134459_inLine +BABEL_BP_107_85083_20120425_024151_inLine +BABEL_BP_107_85354_20120704_145327_inLine +BABEL_BP_107_85354_20120704_145327_outLine +BABEL_BP_107_85573_20120208_152239_inLine +BABEL_BP_107_85686_20120627_180412_inLine +BABEL_BP_107_85686_20120627_180413_outLine +BABEL_BP_107_85716_20120330_201512_outLine +BABEL_BP_107_85716_20120330_202652_outLine +BABEL_BP_107_85819_20120705_030943_inLine +BABEL_BP_107_85819_20120705_030944_outLine +BABEL_BP_107_86016_20120417_225748_inLine +BABEL_BP_107_86029_20120212_235447_inLine +BABEL_BP_107_86419_20120209_010052_inLine +BABEL_BP_107_86419_20120209_010052_outLine +BABEL_BP_107_86890_20120322_202435_inLine +BABEL_BP_107_87167_20120211_230800_outLine +BABEL_BP_107_87481_20120513_191237_inLine +BABEL_BP_107_87481_20120513_191237_outLine +BABEL_BP_107_87539_20120418_225114_inLine +BABEL_BP_107_87539_20120418_225114_outLine +BABEL_BP_107_87671_20120218_011104_inLine +BABEL_BP_107_87857_20120325_000202_inLine +BABEL_BP_107_88243_20120322_210747_inLine +BABEL_BP_107_88243_20120322_210747_outLine +BABEL_BP_107_88253_20120511_165340_inLine +BABEL_BP_107_88253_20120511_165340_outLine +BABEL_BP_107_88294_20120322_163142_outLine +BABEL_BP_107_88506_20120503_191321_inLine +BABEL_BP_107_88506_20120503_191321_outLine +BABEL_BP_107_88532_20120416_012644_inLine +BABEL_BP_107_89619_20120217_174102_inLine +BABEL_BP_107_89619_20120217_174102_outLine +BABEL_BP_107_89644_20120501_170949_inLine +BABEL_BP_107_89644_20120501_170949_outLine +BABEL_BP_107_89657_20120610_213215_inLine +BABEL_BP_107_89657_20120610_213215_outLine +BABEL_BP_107_89674_20120212_162158_inLine +BABEL_BP_107_89674_20120212_162158_outLine +BABEL_BP_107_89965_20120505_003121_inLine +BABEL_BP_107_89965_20120505_003121_outLine +BABEL_BP_107_90313_20120325_200742_inLine +BABEL_BP_107_90393_20120417_220816_inLine +BABEL_BP_107_90393_20120417_220817_outLine +BABEL_BP_107_90577_20120118_141830_inLine +BABEL_BP_107_90577_20120118_141830_outLine +BABEL_BP_107_90609_20120216_194251_inLine +BABEL_BP_107_90764_20120418_004231_outLine +BABEL_BP_107_91000_20120529_151028_inLine +BABEL_BP_107_91002_20120429_192712_inLine +BABEL_BP_107_91002_20120429_192712_outLine +BABEL_BP_107_91007_20120612_144506_inLine +BABEL_BP_107_91040_20120618_152624_outLine +BABEL_BP_107_91136_20120427_122059_inLine +BABEL_BP_107_91401_20120213_010307_inLine +BABEL_BP_107_91401_20120213_010307_outLine +BABEL_BP_107_91406_20120429_193057_inLine +BABEL_BP_107_91406_20120429_193057_outLine +BABEL_BP_107_91409_20120520_225023_outLine +BABEL_BP_107_91409_20120520_231205_outLine +BABEL_BP_107_91660_20120510_181954_inLine +BABEL_BP_107_91660_20120510_181954_outLine +BABEL_BP_107_91660_20120510_182853_inLine +BABEL_BP_107_91660_20120510_182853_outLine +BABEL_BP_107_91660_20120510_184146_inLine +BABEL_BP_107_91660_20120510_184146_outLine +BABEL_BP_107_91723_20120323_144335_outLine +BABEL_BP_107_91865_20120429_214728_inLine +BABEL_BP_107_91865_20120429_214728_outLine +BABEL_BP_107_91905_20120504_210602_inLine +BABEL_BP_107_91905_20120504_210602_outLine +BABEL_BP_107_91975_20120703_173220_inLine +BABEL_BP_107_91975_20120703_173220_outLine +BABEL_BP_107_91979_20120209_000610_inLine +BABEL_BP_107_92002_20120418_214926_outLine +BABEL_BP_107_92407_20120210_183713_inLine +BABEL_BP_107_92407_20120210_183713_outLine +BABEL_BP_107_92436_20120213_013131_inLine +BABEL_BP_107_92436_20120213_013131_outLine +BABEL_BP_107_92591_20120505_140206_outLine +BABEL_BP_107_92602_20120216_214746_inLine +BABEL_BP_107_92602_20120216_215738_inLine +BABEL_BP_107_92603_20120416_011244_inLine +BABEL_BP_107_92603_20120416_011244_outLine +BABEL_BP_107_92628_20120323_014512_inLine +BABEL_BP_107_92628_20120323_014512_outLine +BABEL_BP_107_92643_20120608_122156_inLine +BABEL_BP_107_92643_20120608_123106_inLine +BABEL_BP_107_92735_20120413_181602_inLine +BABEL_BP_107_92789_20120416_165856_inLine +BABEL_BP_107_92800_20120412_013211_outLine +BABEL_BP_107_93044_20120607_140719_inLine +BABEL_BP_107_93044_20120607_140719_outLine +BABEL_BP_107_93509_20120321_230219_inLine +BABEL_BP_107_93509_20120321_230219_outLine +BABEL_BP_107_93804_20120703_232729_inLine +BABEL_BP_107_93804_20120703_233401_inLine +BABEL_BP_107_93974_20120627_184419_inLine +BABEL_BP_107_93974_20120627_184419_outLine +BABEL_BP_107_93979_20120422_134735_inLine +BABEL_BP_107_93979_20120422_134735_outLine +BABEL_BP_107_94149_20120405_220033_outLine +BABEL_BP_107_94162_20120425_235433_inLine +BABEL_BP_107_94223_20120215_204525_inLine +BABEL_BP_107_94514_20120417_001615_inLine +BABEL_BP_107_94514_20120417_001615_outLine +BABEL_BP_107_94514_20120417_003504_inLine +BABEL_BP_107_94514_20120417_003504_outLine +BABEL_BP_107_94541_20120705_024032_outLine +BABEL_BP_107_94542_20120512_223011_inLine +BABEL_BP_107_94542_20120512_223011_outLine +BABEL_BP_107_94694_20120508_120203_inLine +BABEL_BP_107_94694_20120508_120203_outLine +BABEL_BP_107_94696_20120608_185951_inLine +BABEL_BP_107_94696_20120608_185951_outLine +BABEL_BP_107_94814_20120501_130313_inLine +BABEL_BP_107_94814_20120501_130313_outLine +BABEL_BP_107_94989_20120627_120236_outLine +BABEL_BP_107_95121_20120628_123304_inLine +BABEL_BP_107_95423_20120415_201523_inLine +BABEL_BP_107_95423_20120415_201523_outLine +BABEL_BP_107_95533_20120505_005928_inLine +BABEL_BP_107_95533_20120505_005928_outLine +BABEL_BP_107_95542_20120502_223446_inLine +BABEL_BP_107_95542_20120502_223446_outLine +BABEL_BP_107_95566_20120505_162738_inLine +BABEL_BP_107_95572_20120406_151856_inLine +BABEL_BP_107_95572_20120406_151856_outLine +BABEL_BP_107_95589_20120419_162645_inLine +BABEL_BP_107_95589_20120419_162645_outLine +BABEL_BP_107_95815_20120322_160344_inLine +BABEL_BP_107_95815_20120322_160344_outLine +BABEL_BP_107_95996_20120324_230119_inLine +BABEL_BP_107_96302_20120510_023815_inLine +BABEL_BP_107_96302_20120510_023815_outLine +BABEL_BP_107_96667_20120426_182837_inLine +BABEL_BP_107_96667_20120426_182837_outLine +BABEL_BP_107_96959_20120505_014233_inLine +BABEL_BP_107_96959_20120505_014233_outLine +BABEL_BP_107_97260_20120324_012659_outLine +BABEL_BP_107_97318_20120608_183537_inLine +BABEL_BP_107_97318_20120608_183537_outLine +BABEL_BP_107_97629_20120420_202833_inLine +BABEL_BP_107_97629_20120420_202833_outLine +BABEL_BP_107_97946_20120411_213631_outLine +BABEL_BP_107_98086_20120609_185014_inLine +BABEL_BP_107_98086_20120609_185014_outLine +BABEL_BP_107_98099_20120618_120506_outLine +BABEL_BP_107_98219_20120512_202308_inLine +BABEL_BP_107_98219_20120512_202308_outLine +BABEL_BP_107_98219_20120512_203451_inLine +BABEL_BP_107_98219_20120512_203451_outLine +BABEL_BP_107_98402_20120421_162435_inLine +BABEL_BP_107_98402_20120421_162435_outLine +BABEL_BP_107_98640_20120425_213908_outLine +BABEL_BP_107_98675_20120419_225133_inLine +BABEL_BP_107_98675_20120419_225133_outLine +BABEL_BP_107_99414_20120430_200633_inLine +BABEL_BP_107_99414_20120430_200633_outLine +BABEL_BP_107_99567_20120405_154443_outLine +BABEL_BP_107_99571_20120322_165034_inLine +BABEL_BP_107_99571_20120322_165034_outLine +BABEL_BP_107_99694_20120322_165823_inLine +BABEL_BP_107_99694_20120322_165823_outLine +BABEL_BP_107_99731_20120618_005616_outLine +BABEL_BP_107_99764_20120415_202745_inLine +BABEL_BP_107_99823_20120511_002213_inLine +BABEL_BP_107_99823_20120511_002213_outLine +BABEL_BP_107_99929_20120612_143030_inLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/dev.list b/egs/babel/s5d/conf/lists/201-haitian/dev.list new file mode 100644 index 00000000000..208f92ee9cb --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/dev.list @@ -0,0 +1,126 @@ +BABEL_OP1_201_10019_20130527_022947_inLine +BABEL_OP1_201_10019_20130527_022947_outLine +BABEL_OP1_201_10319_20130306_021244_inLine +BABEL_OP1_201_10319_20130306_021244_outLine +BABEL_OP1_201_14440_20130302_012105_inLine +BABEL_OP1_201_14440_20130302_012105_outLine +BABEL_OP1_201_15324_20130228_031225_inLine +BABEL_OP1_201_15324_20130228_031225_outLine +BABEL_OP1_201_15535_20130305_062354_inLine +BABEL_OP1_201_15535_20130305_062354_outLine +BABEL_OP1_201_15638_20130305_060156_inLine +BABEL_OP1_201_15638_20130305_060156_outLine +BABEL_OP1_201_16184_20130305_081912_inLine +BABEL_OP1_201_16184_20130305_081912_outLine +BABEL_OP1_201_21029_20130529_114410_inLine +BABEL_OP1_201_21029_20130529_114410_outLine +BABEL_OP1_201_21029_20130529_115127_inLine +BABEL_OP1_201_21029_20130529_115127_outLine +BABEL_OP1_201_21109_20130414_085917_inLine +BABEL_OP1_201_21109_20130414_085917_outLine +BABEL_OP1_201_21393_20130501_071647_inLine +BABEL_OP1_201_21393_20130501_071647_outLine +BABEL_OP1_201_23151_20130428_054353_inLine +BABEL_OP1_201_23151_20130428_054353_outLine +BABEL_OP1_201_23983_20130503_023139_inLine +BABEL_OP1_201_23983_20130503_023139_outLine +BABEL_OP1_201_23983_20130503_023729_inLine +BABEL_OP1_201_23983_20130503_023729_outLine +BABEL_OP1_201_26074_20130522_003756_inLine +BABEL_OP1_201_26074_20130522_003756_outLine +BABEL_OP1_201_26206_20130302_073520_inLine +BABEL_OP1_201_26206_20130302_073520_outLine +BABEL_OP1_201_32832_20130430_060411_inLine +BABEL_OP1_201_32832_20130430_060411_outLine +BABEL_OP1_201_32861_20130429_111248_inLine +BABEL_OP1_201_32861_20130429_111248_outLine +BABEL_OP1_201_32998_20130531_000201_inLine +BABEL_OP1_201_32998_20130531_000201_outLine +BABEL_OP1_201_35583_20130429_033957_inLine +BABEL_OP1_201_35583_20130429_033957_outLine +BABEL_OP1_201_41400_20130430_094739_inLine +BABEL_OP1_201_41400_20130430_094739_outLine +BABEL_OP1_201_41609_20130404_034518_inLine +BABEL_OP1_201_41609_20130404_034518_outLine +BABEL_OP1_201_45843_20130227_092425_inLine +BABEL_OP1_201_45843_20130227_092425_outLine +BABEL_OP1_201_45843_20130227_095551_inLine +BABEL_OP1_201_45843_20130227_095551_outLine +BABEL_OP1_201_46315_20130302_045420_inLine +BABEL_OP1_201_46315_20130302_045420_outLine +BABEL_OP1_201_47877_20130429_092603_inLine +BABEL_OP1_201_47877_20130429_092603_outLine +BABEL_OP1_201_49197_20130529_061436_inLine +BABEL_OP1_201_49197_20130529_061436_outLine +BABEL_OP1_201_49287_20130227_083257_inLine +BABEL_OP1_201_49287_20130227_083257_outLine +BABEL_OP1_201_51858_20130224_055705_inLine +BABEL_OP1_201_51858_20130224_055705_outLine +BABEL_OP1_201_52025_20130226_082606_inLine +BABEL_OP1_201_52025_20130226_082606_outLine +BABEL_OP1_201_52694_20130518_050051_inLine +BABEL_OP1_201_52694_20130518_050051_outLine +BABEL_OP1_201_54162_20130508_044116_inLine +BABEL_OP1_201_54162_20130508_044116_outLine +BABEL_OP1_201_59898_20130223_041449_inLine +BABEL_OP1_201_59898_20130223_041449_outLine +BABEL_OP1_201_61011_20130228_062832_inLine +BABEL_OP1_201_61011_20130228_062832_outLine +BABEL_OP1_201_61357_20130602_030259_inLine +BABEL_OP1_201_61357_20130602_030259_outLine +BABEL_OP1_201_62456_20130521_040629_inLine +BABEL_OP1_201_62456_20130521_040629_outLine +BABEL_OP1_201_63757_20130531_014819_inLine +BABEL_OP1_201_63757_20130531_014819_outLine +BABEL_OP1_201_65252_20130503_025634_inLine +BABEL_OP1_201_65252_20130503_025634_outLine +BABEL_OP1_201_65640_20130429_103434_inLine +BABEL_OP1_201_65640_20130429_103434_outLine +BABEL_OP1_201_67085_20130503_043953_inLine +BABEL_OP1_201_67085_20130503_043953_outLine +BABEL_OP1_201_67842_20130528_081111_inLine +BABEL_OP1_201_67842_20130528_081111_outLine +BABEL_OP1_201_70110_20130224_022802_inLine +BABEL_OP1_201_70110_20130224_022802_outLine +BABEL_OP1_201_70716_20130503_015538_inLine +BABEL_OP1_201_70716_20130503_015538_outLine +BABEL_OP1_201_70986_20130307_075426_inLine +BABEL_OP1_201_70986_20130307_075426_outLine +BABEL_OP1_201_71263_20130602_021725_inLine +BABEL_OP1_201_71263_20130602_021725_outLine +BABEL_OP1_201_72654_20130510_063658_inLine +BABEL_OP1_201_72654_20130510_063658_outLine +BABEL_OP1_201_74226_20130303_125222_inLine +BABEL_OP1_201_74226_20130303_125222_outLine +BABEL_OP1_201_75223_20130221_024906_inLine +BABEL_OP1_201_75223_20130221_024906_outLine +BABEL_OP1_201_77112_20130528_050544_inLine +BABEL_OP1_201_77112_20130528_050544_outLine +BABEL_OP1_201_78194_20121206_064008_inLine +BABEL_OP1_201_78194_20121206_064008_outLine +BABEL_OP1_201_78360_20130430_101414_inLine +BABEL_OP1_201_78360_20130430_101414_outLine +BABEL_OP1_201_78454_20130531_032436_inLine +BABEL_OP1_201_78454_20130531_032436_outLine +BABEL_OP1_201_79571_20130302_074959_inLine +BABEL_OP1_201_79571_20130302_074959_outLine +BABEL_OP1_201_80881_20130220_022131_inLine +BABEL_OP1_201_80881_20130220_022131_outLine +BABEL_OP1_201_81553_20130430_095301_inLine +BABEL_OP1_201_81553_20130430_095301_outLine +BABEL_OP1_201_82035_20130601_052036_inLine +BABEL_OP1_201_82035_20130601_052036_outLine +BABEL_OP1_201_84125_20130227_022410_inLine +BABEL_OP1_201_84125_20130227_022410_outLine +BABEL_OP1_201_85439_20130503_071053_inLine +BABEL_OP1_201_85439_20130503_071053_outLine +BABEL_OP1_201_88982_20130512_060722_inLine +BABEL_OP1_201_88982_20130512_060722_outLine +BABEL_OP1_201_96584_20130427_001740_inLine +BABEL_OP1_201_96584_20130427_001740_outLine +BABEL_OP1_201_96842_20130503_081834_inLine +BABEL_OP1_201_96842_20130503_081834_outLine +BABEL_OP1_201_96985_20130313_031020_inLine +BABEL_OP1_201_96985_20130313_031020_outLine +BABEL_OP1_201_99813_20130514_080612_inLine +BABEL_OP1_201_99813_20130514_080612_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/eval.list b/egs/babel/s5d/conf/lists/201-haitian/eval.list new file mode 100644 index 00000000000..d9a4445b43d --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/eval.list @@ -0,0 +1,194 @@ +BABEL_OP1_201_10188_20121207_034031_inLine +BABEL_OP1_201_10188_20121207_034031_outLine +BABEL_OP1_201_11581_20130524_035647_inLine +BABEL_OP1_201_11581_20130524_035647_outLine +BABEL_OP1_201_13427_20130517_044959_inLine +BABEL_OP1_201_13427_20130517_044959_outLine +BABEL_OP1_201_14228_20130312_063112_inLine +BABEL_OP1_201_14228_20130312_063112_outLine +BABEL_OP1_201_14537_20130604_084139_inLine +BABEL_OP1_201_14537_20130604_084139_outLine +BABEL_OP1_201_15926_20130302_065808_inLine +BABEL_OP1_201_15926_20130302_065808_outLine +BABEL_OP1_201_16056_20130328_050018_inLine +BABEL_OP1_201_16056_20130328_050018_outLine +BABEL_OP1_201_17165_20130509_020154_inLine +BABEL_OP1_201_17165_20130509_020154_outLine +BABEL_OP1_201_18242_20130603_023106_inLine +BABEL_OP1_201_18242_20130603_023106_outLine +BABEL_OP1_201_19101_20130521_032103_inLine +BABEL_OP1_201_19101_20130521_032103_outLine +BABEL_OP1_201_19545_20130517_060948_inLine +BABEL_OP1_201_19545_20130517_060948_outLine +BABEL_OP1_201_19621_20130517_031837_inLine +BABEL_OP1_201_19621_20130517_031837_outLine +BABEL_OP1_201_19672_20130301_110157_inLine +BABEL_OP1_201_19672_20130301_110157_outLine +BABEL_OP1_201_22641_20130222_024500_inLine +BABEL_OP1_201_22641_20130222_024500_outLine +BABEL_OP1_201_23260_20130502_085418_inLine +BABEL_OP1_201_23260_20130502_085418_outLine +BABEL_OP1_201_23395_20130521_052906_inLine +BABEL_OP1_201_23395_20130521_052906_outLine +BABEL_OP1_201_23628_20130528_052627_inLine +BABEL_OP1_201_23628_20130528_052627_outLine +BABEL_OP1_201_23731_20130517_014107_inLine +BABEL_OP1_201_23731_20130517_014107_outLine +BABEL_OP1_201_25412_20130531_050830_inLine +BABEL_OP1_201_25412_20130531_050830_outLine +BABEL_OP1_201_31484_20130304_060634_inLine +BABEL_OP1_201_31484_20130304_060634_outLine +BABEL_OP1_201_31583_20130630_090026_inLine +BABEL_OP1_201_31583_20130630_090026_outLine +BABEL_OP1_201_34019_20130224_123823_inLine +BABEL_OP1_201_34019_20130224_123823_outLine +BABEL_OP1_201_34688_20130226_033106_inLine +BABEL_OP1_201_34688_20130226_033106_outLine +BABEL_OP1_201_35202_20130228_143257_inLine +BABEL_OP1_201_35202_20130228_143257_outLine +BABEL_OP1_201_35202_20130228_144257_inLine +BABEL_OP1_201_35202_20130228_144257_outLine +BABEL_OP1_201_37064_20130528_095008_inLine +BABEL_OP1_201_37064_20130528_095008_outLine +BABEL_OP1_201_37290_20130602_070243_inLine +BABEL_OP1_201_37290_20130602_070243_outLine +BABEL_OP1_201_39159_20130226_043216_inLine +BABEL_OP1_201_39159_20130226_043216_outLine +BABEL_OP1_201_39744_20130226_025333_inLine +BABEL_OP1_201_39744_20130226_025333_outLine +BABEL_OP1_201_41038_20130301_095640_inLine +BABEL_OP1_201_41038_20130301_095640_outLine +BABEL_OP1_201_41745_20130530_021647_inLine +BABEL_OP1_201_41745_20130530_021647_outLine +BABEL_OP1_201_43285_20130303_112216_inLine +BABEL_OP1_201_43285_20130303_112216_outLine +BABEL_OP1_201_44255_20130427_232421_inLine +BABEL_OP1_201_44255_20130427_232421_outLine +BABEL_OP1_201_44255_20130427_233501_inLine +BABEL_OP1_201_44255_20130427_233501_outLine +BABEL_OP1_201_44847_20130228_021744_inLine +BABEL_OP1_201_44847_20130228_021744_outLine +BABEL_OP1_201_44847_20130301_014421_inLine +BABEL_OP1_201_44847_20130301_014421_outLine +BABEL_OP1_201_44868_20130301_094502_inLine +BABEL_OP1_201_44868_20130301_094502_outLine +BABEL_OP1_201_44868_20130301_095004_inLine +BABEL_OP1_201_44868_20130301_095004_outLine +BABEL_OP1_201_45106_20130228_110111_inLine +BABEL_OP1_201_45106_20130228_110111_outLine +BABEL_OP1_201_46202_20130301_041831_inLine +BABEL_OP1_201_46202_20130301_041831_outLine +BABEL_OP1_201_46712_20130527_095034_inLine +BABEL_OP1_201_46712_20130527_095034_outLine +BABEL_OP1_201_46974_20130305_032251_inLine +BABEL_OP1_201_46974_20130305_032251_outLine +BABEL_OP1_201_49775_20130312_061709_inLine +BABEL_OP1_201_49775_20130312_061709_outLine +BABEL_OP1_201_52222_20130221_115458_inLine +BABEL_OP1_201_52222_20130221_115458_outLine +BABEL_OP1_201_52442_20130511_033818_inLine +BABEL_OP1_201_52442_20130511_033818_outLine +BABEL_OP1_201_54405_20130512_043326_inLine +BABEL_OP1_201_54405_20130512_043326_outLine +BABEL_OP1_201_56523_20130530_035306_inLine +BABEL_OP1_201_56523_20130530_035306_outLine +BABEL_OP1_201_56720_20130305_084355_inLine +BABEL_OP1_201_56720_20130305_084355_outLine +BABEL_OP1_201_57609_20130519_003542_inLine +BABEL_OP1_201_57609_20130519_003542_outLine +BABEL_OP1_201_57922_20130601_024619_inLine +BABEL_OP1_201_57922_20130601_024619_outLine +BABEL_OP1_201_57935_20130522_034918_inLine +BABEL_OP1_201_57935_20130522_034918_outLine +BABEL_OP1_201_59645_20130510_022401_inLine +BABEL_OP1_201_59645_20130510_022401_outLine +BABEL_OP1_201_60352_20130301_071549_inLine +BABEL_OP1_201_60352_20130301_071549_outLine +BABEL_OP1_201_60352_20130301_072624_inLine +BABEL_OP1_201_60352_20130301_072624_outLine +BABEL_OP1_201_60508_20130221_023139_inLine +BABEL_OP1_201_60508_20130221_023139_outLine +BABEL_OP1_201_61440_20130602_061805_inLine +BABEL_OP1_201_61440_20130602_061805_outLine +BABEL_OP1_201_61963_20130430_084852_inLine +BABEL_OP1_201_61963_20130430_084852_outLine +BABEL_OP1_201_62155_20130507_055437_inLine +BABEL_OP1_201_62155_20130507_055437_outLine +BABEL_OP1_201_63309_20130214_111801_inLine +BABEL_OP1_201_63309_20130214_111801_outLine +BABEL_OP1_201_63481_20130306_031400_inLine +BABEL_OP1_201_63481_20130306_031400_outLine +BABEL_OP1_201_63511_20130704_101544_inLine +BABEL_OP1_201_63511_20130704_101544_outLine +BABEL_OP1_201_64638_20130228_015923_inLine +BABEL_OP1_201_64638_20130228_015923_outLine +BABEL_OP1_201_64870_20130521_011614_inLine +BABEL_OP1_201_64870_20130521_011614_outLine +BABEL_OP1_201_66967_20130223_042440_inLine +BABEL_OP1_201_66967_20130223_042440_outLine +BABEL_OP1_201_67552_20130302_031450_inLine +BABEL_OP1_201_67552_20130302_031450_outLine +BABEL_OP1_201_67592_20130413_085928_inLine +BABEL_OP1_201_67592_20130413_085928_outLine +BABEL_OP1_201_67794_20130528_054900_inLine +BABEL_OP1_201_67794_20130528_054900_outLine +BABEL_OP1_201_67794_20130528_060329_inLine +BABEL_OP1_201_67794_20130528_060329_outLine +BABEL_OP1_201_68059_20130514_015440_inLine +BABEL_OP1_201_68059_20130514_015440_outLine +BABEL_OP1_201_69633_20130302_015041_inLine +BABEL_OP1_201_69633_20130302_015041_outLine +BABEL_OP1_201_73757_20130510_051523_inLine +BABEL_OP1_201_73757_20130510_051523_outLine +BABEL_OP1_201_75359_20130502_024157_inLine +BABEL_OP1_201_75359_20130502_024157_outLine +BABEL_OP1_201_76773_20130529_015651_inLine +BABEL_OP1_201_76773_20130529_015651_outLine +BABEL_OP1_201_77139_20130221_080959_inLine +BABEL_OP1_201_77139_20130221_080959_outLine +BABEL_OP1_201_77391_20130529_083139_inLine +BABEL_OP1_201_77391_20130529_083139_outLine +BABEL_OP1_201_77567_20130305_071815_inLine +BABEL_OP1_201_77567_20130305_071815_outLine +BABEL_OP1_201_78630_20130604_103056_inLine +BABEL_OP1_201_78630_20130604_103056_outLine +BABEL_OP1_201_80897_20130602_013830_inLine +BABEL_OP1_201_80897_20130602_013830_outLine +BABEL_OP1_201_81229_20130529_053302_inLine +BABEL_OP1_201_81229_20130529_053302_outLine +BABEL_OP1_201_81404_20130528_042634_inLine +BABEL_OP1_201_81404_20130528_042634_outLine +BABEL_OP1_201_82030_20130704_095440_inLine +BABEL_OP1_201_82030_20130704_095440_outLine +BABEL_OP1_201_82030_20130704_100506_inLine +BABEL_OP1_201_82030_20130704_100506_outLine +BABEL_OP1_201_83366_20130228_065600_inLine +BABEL_OP1_201_83366_20130228_065600_outLine +BABEL_OP1_201_83783_20130524_015629_inLine +BABEL_OP1_201_83783_20130524_015629_outLine +BABEL_OP1_201_84327_20130305_092405_inLine +BABEL_OP1_201_84327_20130305_092405_outLine +BABEL_OP1_201_84583_20130518_020910_inLine +BABEL_OP1_201_84583_20130518_020910_outLine +BABEL_OP1_201_86748_20130428_024819_inLine +BABEL_OP1_201_86748_20130428_024819_outLine +BABEL_OP1_201_89045_20130307_055651_inLine +BABEL_OP1_201_89045_20130307_055651_outLine +BABEL_OP1_201_91930_20130429_004949_inLine +BABEL_OP1_201_91930_20130429_004949_outLine +BABEL_OP1_201_91930_20130429_005907_inLine +BABEL_OP1_201_91930_20130429_005907_outLine +BABEL_OP1_201_92060_20130502_110221_inLine +BABEL_OP1_201_92060_20130502_110221_outLine +BABEL_OP1_201_92698_20130510_005433_inLine +BABEL_OP1_201_92698_20130510_005433_outLine +BABEL_OP1_201_93861_20130512_005008_inLine +BABEL_OP1_201_93861_20130512_005008_outLine +BABEL_OP1_201_94141_20130430_122007_inLine +BABEL_OP1_201_94141_20130430_122007_outLine +BABEL_OP1_201_94166_20130429_044116_inLine +BABEL_OP1_201_94166_20130429_044116_outLine +BABEL_OP1_201_94587_20130305_100125_inLine +BABEL_OP1_201_94587_20130305_100125_outLine +BABEL_OP1_201_94745_20130301_131752_inLine +BABEL_OP1_201_94745_20130301_131752_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/evalpart1.list b/egs/babel/s5d/conf/lists/201-haitian/evalpart1.list new file mode 100644 index 00000000000..0b771a04457 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/evalpart1.list @@ -0,0 +1,64 @@ +BABEL_OP1_201_15926_20130302_065808_inLine +BABEL_OP1_201_15926_20130302_065808_outLine +BABEL_OP1_201_19545_20130517_060948_inLine +BABEL_OP1_201_19545_20130517_060948_outLine +BABEL_OP1_201_23395_20130521_052906_inLine +BABEL_OP1_201_23395_20130521_052906_outLine +BABEL_OP1_201_23628_20130528_052627_inLine +BABEL_OP1_201_23628_20130528_052627_outLine +BABEL_OP1_201_23731_20130517_014107_inLine +BABEL_OP1_201_23731_20130517_014107_outLine +BABEL_OP1_201_31583_20130630_090026_inLine +BABEL_OP1_201_31583_20130630_090026_outLine +BABEL_OP1_201_39159_20130226_043216_inLine +BABEL_OP1_201_39159_20130226_043216_outLine +BABEL_OP1_201_41745_20130530_021647_inLine +BABEL_OP1_201_41745_20130530_021647_outLine +BABEL_OP1_201_44255_20130427_232421_inLine +BABEL_OP1_201_44255_20130427_232421_outLine +BABEL_OP1_201_44255_20130427_233501_inLine +BABEL_OP1_201_44255_20130427_233501_outLine +BABEL_OP1_201_44868_20130301_094502_inLine +BABEL_OP1_201_44868_20130301_094502_outLine +BABEL_OP1_201_44868_20130301_095004_inLine +BABEL_OP1_201_44868_20130301_095004_outLine +BABEL_OP1_201_45106_20130228_110111_inLine +BABEL_OP1_201_45106_20130228_110111_outLine +BABEL_OP1_201_46712_20130527_095034_inLine +BABEL_OP1_201_46712_20130527_095034_outLine +BABEL_OP1_201_49775_20130312_061709_inLine +BABEL_OP1_201_49775_20130312_061709_outLine +BABEL_OP1_201_57922_20130601_024619_inLine +BABEL_OP1_201_57922_20130601_024619_outLine +BABEL_OP1_201_60508_20130221_023139_inLine +BABEL_OP1_201_60508_20130221_023139_outLine +BABEL_OP1_201_62155_20130507_055437_inLine +BABEL_OP1_201_62155_20130507_055437_outLine +BABEL_OP1_201_63481_20130306_031400_inLine +BABEL_OP1_201_63481_20130306_031400_outLine +BABEL_OP1_201_63511_20130704_101544_inLine +BABEL_OP1_201_63511_20130704_101544_outLine +BABEL_OP1_201_64638_20130228_015923_inLine +BABEL_OP1_201_64638_20130228_015923_outLine +BABEL_OP1_201_75359_20130502_024157_inLine +BABEL_OP1_201_75359_20130502_024157_outLine +BABEL_OP1_201_76773_20130529_015651_inLine +BABEL_OP1_201_76773_20130529_015651_outLine +BABEL_OP1_201_77139_20130221_080959_inLine +BABEL_OP1_201_77139_20130221_080959_outLine +BABEL_OP1_201_77567_20130305_071815_inLine +BABEL_OP1_201_77567_20130305_071815_outLine +BABEL_OP1_201_78630_20130604_103056_inLine +BABEL_OP1_201_78630_20130604_103056_outLine +BABEL_OP1_201_80897_20130602_013830_inLine +BABEL_OP1_201_80897_20130602_013830_outLine +BABEL_OP1_201_81229_20130529_053302_inLine +BABEL_OP1_201_81229_20130529_053302_outLine +BABEL_OP1_201_83366_20130228_065600_inLine +BABEL_OP1_201_83366_20130228_065600_outLine +BABEL_OP1_201_83783_20130524_015629_inLine +BABEL_OP1_201_83783_20130524_015629_outLine +BABEL_OP1_201_86748_20130428_024819_inLine +BABEL_OP1_201_86748_20130428_024819_outLine +BABEL_OP1_201_94141_20130430_122007_inLine +BABEL_OP1_201_94141_20130430_122007_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.FullLP.list b/egs/babel/s5d/conf/lists/201-haitian/train.FullLP.list new file mode 100644 index 00000000000..751c6ca4652 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.FullLP.list @@ -0,0 +1,760 @@ +BABEL_OP1_201_10002_20130212_152853_inLine +BABEL_OP1_201_10002_20130212_152853_outLine +BABEL_OP1_201_10036_20130528_005502_inLine +BABEL_OP1_201_10036_20130528_005502_outLine +BABEL_OP1_201_10482_20130305_105317_inLine +BABEL_OP1_201_10482_20130305_105317_outLine +BABEL_OP1_201_10647_20130428_045536_inLine +BABEL_OP1_201_10647_20130428_045536_outLine +BABEL_OP1_201_10901_20130529_031421_inLine +BABEL_OP1_201_10901_20130529_031421_outLine +BABEL_OP1_201_11096_20130603_043221_inLine +BABEL_OP1_201_11096_20130603_043221_outLine +BABEL_OP1_201_11663_20130601_002903_inLine +BABEL_OP1_201_11663_20130601_002903_outLine +BABEL_OP1_201_11673_20130226_015822_inLine +BABEL_OP1_201_11673_20130226_015822_outLine +BABEL_OP1_201_11797_20130328_033102_inLine +BABEL_OP1_201_11797_20130328_033102_outLine +BABEL_OP1_201_12220_20130528_051622_inLine +BABEL_OP1_201_12220_20130528_051622_outLine +BABEL_OP1_201_12242_20130603_033446_inLine +BABEL_OP1_201_12242_20130603_033446_outLine +BABEL_OP1_201_12606_20130429_120351_inLine +BABEL_OP1_201_12606_20130429_120351_outLine +BABEL_OP1_201_12606_20130429_121040_inLine +BABEL_OP1_201_12606_20130429_121040_outLine +BABEL_OP1_201_12635_20130429_040127_inLine +BABEL_OP1_201_12635_20130429_040127_outLine +BABEL_OP1_201_12767_20130509_005500_inLine +BABEL_OP1_201_12767_20130509_005500_outLine +BABEL_OP1_201_13178_20130301_043649_inLine +BABEL_OP1_201_13178_20130301_043649_outLine +BABEL_OP1_201_13324_20130529_035029_inLine +BABEL_OP1_201_13324_20130529_035029_outLine +BABEL_OP1_201_13483_20130306_062423_inLine +BABEL_OP1_201_13483_20130306_062423_outLine +BABEL_OP1_201_13490_20130508_033252_inLine +BABEL_OP1_201_13490_20130508_033252_outLine +BABEL_OP1_201_13664_20130117_073343_inLine +BABEL_OP1_201_13664_20130117_073343_outLine +BABEL_OP1_201_14179_20130303_111502_inLine +BABEL_OP1_201_14179_20130303_111502_outLine +BABEL_OP1_201_14229_20130528_023254_inLine +BABEL_OP1_201_14229_20130528_023254_outLine +BABEL_OP1_201_14539_20130501_223201_inLine +BABEL_OP1_201_14539_20130501_223201_outLine +BABEL_OP1_201_14560_20130301_065543_inLine +BABEL_OP1_201_14560_20130301_065543_outLine +BABEL_OP1_201_14807_20130522_012156_inLine +BABEL_OP1_201_14807_20130522_012156_outLine +BABEL_OP1_201_14899_20130301_035636_inLine +BABEL_OP1_201_14899_20130301_035636_outLine +BABEL_OP1_201_14972_20130518_025852_inLine +BABEL_OP1_201_14972_20130518_025852_outLine +BABEL_OP1_201_15216_20130503_005405_inLine +BABEL_OP1_201_15216_20130503_005405_outLine +BABEL_OP1_201_15322_20130701_030436_inLine +BABEL_OP1_201_15322_20130701_030436_outLine +BABEL_OP1_201_15382_20130228_050819_inLine +BABEL_OP1_201_15382_20130228_050819_outLine +BABEL_OP1_201_15702_20130301_041117_inLine +BABEL_OP1_201_15702_20130301_041117_outLine +BABEL_OP1_201_15730_20130305_034450_inLine +BABEL_OP1_201_15730_20130305_034450_outLine +BABEL_OP1_201_15848_20130130_070404_inLine +BABEL_OP1_201_15848_20130130_070404_outLine +BABEL_OP1_201_15902_20130323_005824_inLine +BABEL_OP1_201_15902_20130323_005824_outLine +BABEL_OP1_201_16149_20130322_021647_inLine +BABEL_OP1_201_16149_20130322_021647_outLine +BABEL_OP1_201_16467_20130704_025921_inLine +BABEL_OP1_201_16467_20130704_025921_outLine +BABEL_OP1_201_16800_20130702_085158_inLine +BABEL_OP1_201_16800_20130702_085158_outLine +BABEL_OP1_201_16924_20130301_032937_inLine +BABEL_OP1_201_16924_20130301_032937_outLine +BABEL_OP1_201_16938_20130514_072820_inLine +BABEL_OP1_201_16938_20130514_072820_outLine +BABEL_OP1_201_17032_20130306_103506_inLine +BABEL_OP1_201_17032_20130306_103506_outLine +BABEL_OP1_201_17113_20130519_093427_inLine +BABEL_OP1_201_17113_20130519_093427_outLine +BABEL_OP1_201_17472_20130311_075957_inLine +BABEL_OP1_201_17472_20130311_075957_outLine +BABEL_OP1_201_17496_20130301_030157_inLine +BABEL_OP1_201_17496_20130301_030157_outLine +BABEL_OP1_201_17520_20130518_012147_inLine +BABEL_OP1_201_17520_20130518_012147_outLine +BABEL_OP1_201_17567_20130512_065938_inLine +BABEL_OP1_201_17567_20130512_065938_outLine +BABEL_OP1_201_17881_20130429_230318_inLine +BABEL_OP1_201_17881_20130429_230318_outLine +BABEL_OP1_201_17923_20130529_021211_inLine +BABEL_OP1_201_17923_20130529_021211_outLine +BABEL_OP1_201_18118_20130501_084131_inLine +BABEL_OP1_201_18118_20130501_084131_outLine +BABEL_OP1_201_18766_20130502_102418_inLine +BABEL_OP1_201_18766_20130502_102418_outLine +BABEL_OP1_201_19134_20130601_040621_inLine +BABEL_OP1_201_19134_20130601_040621_outLine +BABEL_OP1_201_19589_20130502_093932_inLine +BABEL_OP1_201_19589_20130502_093932_outLine +BABEL_OP1_201_19722_20130425_005348_inLine +BABEL_OP1_201_19722_20130425_005348_outLine +BABEL_OP1_201_19749_20130429_090621_inLine +BABEL_OP1_201_19749_20130429_090621_outLine +BABEL_OP1_201_19767_20130502_130900_inLine +BABEL_OP1_201_19767_20130502_130900_outLine +BABEL_OP1_201_19877_20130502_085421_inLine +BABEL_OP1_201_19877_20130502_085421_outLine +BABEL_OP1_201_20330_20130429_035418_inLine +BABEL_OP1_201_20330_20130429_035418_outLine +BABEL_OP1_201_20437_20130216_094002_inLine +BABEL_OP1_201_20437_20130216_094002_outLine +BABEL_OP1_201_20768_20130701_035344_inLine +BABEL_OP1_201_20768_20130701_035344_outLine +BABEL_OP1_201_20800_20130529_035944_inLine +BABEL_OP1_201_20800_20130529_035944_outLine +BABEL_OP1_201_20972_20130603_035417_inLine +BABEL_OP1_201_20972_20130603_035417_outLine +BABEL_OP1_201_21244_20130602_073304_inLine +BABEL_OP1_201_21244_20130602_073304_outLine +BABEL_OP1_201_21807_20130522_042858_inLine +BABEL_OP1_201_21807_20130522_042858_outLine +BABEL_OP1_201_21892_20130430_033520_inLine +BABEL_OP1_201_21892_20130430_033520_outLine +BABEL_OP1_201_22466_20121206_070403_inLine +BABEL_OP1_201_22466_20121206_070403_outLine +BABEL_OP1_201_22494_20130305_052405_inLine +BABEL_OP1_201_22494_20130305_052405_outLine +BABEL_OP1_201_22624_20130305_121723_inLine +BABEL_OP1_201_22624_20130305_121723_outLine +BABEL_OP1_201_23046_20130527_110737_inLine +BABEL_OP1_201_23046_20130527_110737_outLine +BABEL_OP1_201_23119_20130321_054320_inLine +BABEL_OP1_201_23119_20130321_054320_outLine +BABEL_OP1_201_23190_20130603_224243_inLine +BABEL_OP1_201_23190_20130603_224243_outLine +BABEL_OP1_201_23195_20130227_050013_inLine +BABEL_OP1_201_23195_20130227_050013_outLine +BABEL_OP1_201_23239_20130305_093734_inLine +BABEL_OP1_201_23239_20130305_093734_outLine +BABEL_OP1_201_23893_20130430_080021_inLine +BABEL_OP1_201_23893_20130430_080021_outLine +BABEL_OP1_201_24231_20130502_123747_inLine +BABEL_OP1_201_24231_20130502_123747_outLine +BABEL_OP1_201_24239_20130703_230221_inLine +BABEL_OP1_201_24239_20130703_230221_outLine +BABEL_OP1_201_24270_20130530_020630_inLine +BABEL_OP1_201_24270_20130530_020630_outLine +BABEL_OP1_201_24290_20130703_074550_inLine +BABEL_OP1_201_24290_20130703_074550_outLine +BABEL_OP1_201_24470_20130531_024204_inLine +BABEL_OP1_201_24470_20130531_024204_outLine +BABEL_OP1_201_24501_20130429_102945_inLine +BABEL_OP1_201_24501_20130429_102945_outLine +BABEL_OP1_201_24532_20130307_060030_inLine +BABEL_OP1_201_24532_20130307_060030_outLine +BABEL_OP1_201_24586_20130430_025349_inLine +BABEL_OP1_201_24586_20130430_032300_inLine +BABEL_OP1_201_24586_20130430_032300_outLine +BABEL_OP1_201_24586_20130430_033306_inLine +BABEL_OP1_201_24586_20130430_033306_outLine +BABEL_OP1_201_24589_20130529_111014_inLine +BABEL_OP1_201_24589_20130529_111014_outLine +BABEL_OP1_201_24679_20130222_072407_inLine +BABEL_OP1_201_24679_20130222_072407_outLine +BABEL_OP1_201_24982_20130529_044009_inLine +BABEL_OP1_201_24982_20130529_044009_outLine +BABEL_OP1_201_25015_20130501_223825_inLine +BABEL_OP1_201_25015_20130501_223825_outLine +BABEL_OP1_201_25961_20130223_033405_inLine +BABEL_OP1_201_25961_20130223_033405_outLine +BABEL_OP1_201_26072_20130429_011940_inLine +BABEL_OP1_201_26072_20130429_011940_outLine +BABEL_OP1_201_26388_20130528_030259_inLine +BABEL_OP1_201_26388_20130528_030259_outLine +BABEL_OP1_201_26836_20130528_100100_inLine +BABEL_OP1_201_26836_20130528_100100_outLine +BABEL_OP1_201_26836_20130528_101331_inLine +BABEL_OP1_201_26836_20130528_101331_outLine +BABEL_OP1_201_26999_20130228_090136_inLine +BABEL_OP1_201_26999_20130228_090136_outLine +BABEL_OP1_201_27042_20130701_075011_inLine +BABEL_OP1_201_27042_20130701_075011_outLine +BABEL_OP1_201_27203_20130602_005950_inLine +BABEL_OP1_201_27203_20130602_005950_outLine +BABEL_OP1_201_27590_20130304_072243_inLine +BABEL_OP1_201_27590_20130304_072243_outLine +BABEL_OP1_201_28419_20130528_035005_inLine +BABEL_OP1_201_28419_20130528_035005_outLine +BABEL_OP1_201_28522_20130303_104614_inLine +BABEL_OP1_201_28522_20130303_104614_outLine +BABEL_OP1_201_28600_20130701_051100_inLine +BABEL_OP1_201_28600_20130701_051100_outLine +BABEL_OP1_201_28606_20130305_101646_inLine +BABEL_OP1_201_28606_20130305_101646_outLine +BABEL_OP1_201_28775_20130529_005204_inLine +BABEL_OP1_201_28775_20130529_005204_outLine +BABEL_OP1_201_28814_20130704_000405_inLine +BABEL_OP1_201_28814_20130704_000405_outLine +BABEL_OP1_201_28871_20121207_015933_inLine +BABEL_OP1_201_28871_20121207_015933_outLine +BABEL_OP1_201_28945_20130528_094913_inLine +BABEL_OP1_201_28945_20130528_094913_outLine +BABEL_OP1_201_29023_20130530_024701_inLine +BABEL_OP1_201_29023_20130530_024701_outLine +BABEL_OP1_201_29072_20130304_052508_inLine +BABEL_OP1_201_29072_20130304_052508_outLine +BABEL_OP1_201_29168_20130222_015942_inLine +BABEL_OP1_201_29168_20130222_015942_outLine +BABEL_OP1_201_30180_20130528_033242_inLine +BABEL_OP1_201_30180_20130528_033242_outLine +BABEL_OP1_201_30395_20130529_034626_inLine +BABEL_OP1_201_30395_20130529_034626_outLine +BABEL_OP1_201_30432_20130227_084229_inLine +BABEL_OP1_201_30432_20130227_084229_outLine +BABEL_OP1_201_30576_20130527_002801_inLine +BABEL_OP1_201_30576_20130527_002801_outLine +BABEL_OP1_201_31109_20130510_030741_inLine +BABEL_OP1_201_31109_20130510_030741_outLine +BABEL_OP1_201_31628_20130301_081256_inLine +BABEL_OP1_201_31628_20130301_081256_outLine +BABEL_OP1_201_32097_20130130_021717_inLine +BABEL_OP1_201_32097_20130130_021717_outLine +BABEL_OP1_201_32122_20130529_070011_inLine +BABEL_OP1_201_32122_20130529_070011_outLine +BABEL_OP1_201_32171_20130220_084632_inLine +BABEL_OP1_201_32171_20130220_084632_outLine +BABEL_OP1_201_32708_20130528_093343_inLine +BABEL_OP1_201_32708_20130528_093343_outLine +BABEL_OP1_201_33229_20130429_025144_inLine +BABEL_OP1_201_33229_20130429_025144_outLine +BABEL_OP1_201_33659_20130214_000335_inLine +BABEL_OP1_201_33659_20130214_000335_outLine +BABEL_OP1_201_33806_20130630_224040_inLine +BABEL_OP1_201_33806_20130630_224040_outLine +BABEL_OP1_201_34106_20130305_032650_inLine +BABEL_OP1_201_34106_20130305_032650_outLine +BABEL_OP1_201_34145_20130301_033324_inLine +BABEL_OP1_201_34145_20130301_033324_outLine +BABEL_OP1_201_34197_20130227_065321_inLine +BABEL_OP1_201_34197_20130227_065321_outLine +BABEL_OP1_201_34336_20130527_071806_inLine +BABEL_OP1_201_34336_20130527_071806_outLine +BABEL_OP1_201_34679_20130529_040931_inLine +BABEL_OP1_201_34679_20130529_040931_outLine +BABEL_OP1_201_34826_20130430_025628_inLine +BABEL_OP1_201_34826_20130430_025628_outLine +BABEL_OP1_201_34903_20130302_052444_inLine +BABEL_OP1_201_34903_20130302_052444_outLine +BABEL_OP1_201_35000_20130702_092721_inLine +BABEL_OP1_201_35000_20130702_092721_outLine +BABEL_OP1_201_35008_20130305_114402_inLine +BABEL_OP1_201_35008_20130305_114402_outLine +BABEL_OP1_201_35467_20130321_032230_inLine +BABEL_OP1_201_35467_20130321_032230_outLine +BABEL_OP1_201_36219_20130528_021139_inLine +BABEL_OP1_201_36219_20130528_021139_outLine +BABEL_OP1_201_36341_20130226_074136_inLine +BABEL_OP1_201_36341_20130226_074136_outLine +BABEL_OP1_201_36894_20130221_070614_inLine +BABEL_OP1_201_36894_20130221_070614_outLine +BABEL_OP1_201_37271_20130430_025526_inLine +BABEL_OP1_201_37271_20130430_025526_outLine +BABEL_OP1_201_37598_20130601_032226_inLine +BABEL_OP1_201_37598_20130601_032226_outLine +BABEL_OP1_201_38076_20130302_132339_inLine +BABEL_OP1_201_38076_20130302_132339_outLine +BABEL_OP1_201_38878_20130228_041057_inLine +BABEL_OP1_201_38878_20130228_041057_outLine +BABEL_OP1_201_39426_20130429_085957_inLine +BABEL_OP1_201_39426_20130429_085957_outLine +BABEL_OP1_201_39638_20130126_082343_inLine +BABEL_OP1_201_39638_20130126_082343_outLine +BABEL_OP1_201_40713_20130530_005109_inLine +BABEL_OP1_201_40713_20130530_005109_outLine +BABEL_OP1_201_41097_20130228_063046_inLine +BABEL_OP1_201_41097_20130228_063046_outLine +BABEL_OP1_201_41334_20130630_085009_inLine +BABEL_OP1_201_41334_20130630_085009_outLine +BABEL_OP1_201_41469_20130303_034949_inLine +BABEL_OP1_201_41469_20130303_034949_outLine +BABEL_OP1_201_41542_20130429_084921_inLine +BABEL_OP1_201_41542_20130429_084921_outLine +BABEL_OP1_201_41618_20130518_035113_inLine +BABEL_OP1_201_41618_20130518_035113_outLine +BABEL_OP1_201_41685_20130214_090836_inLine +BABEL_OP1_201_41685_20130214_090836_outLine +BABEL_OP1_201_41720_20130203_053934_inLine +BABEL_OP1_201_41720_20130203_053934_outLine +BABEL_OP1_201_41890_20130430_020800_inLine +BABEL_OP1_201_41890_20130430_020800_outLine +BABEL_OP1_201_42155_20130521_023245_inLine +BABEL_OP1_201_42155_20130521_023245_outLine +BABEL_OP1_201_42243_20130303_022442_inLine +BABEL_OP1_201_42243_20130303_022442_outLine +BABEL_OP1_201_42497_20130529_040557_inLine +BABEL_OP1_201_42497_20130529_040557_outLine +BABEL_OP1_201_42619_20130228_081700_inLine +BABEL_OP1_201_42619_20130228_081700_outLine +BABEL_OP1_201_42771_20130516_235914_inLine +BABEL_OP1_201_42771_20130516_235914_outLine +BABEL_OP1_201_42834_20130227_094847_inLine +BABEL_OP1_201_42834_20130227_094847_outLine +BABEL_OP1_201_42991_20130301_104105_inLine +BABEL_OP1_201_42991_20130301_104105_outLine +BABEL_OP1_201_43286_20130301_085932_inLine +BABEL_OP1_201_43286_20130301_085932_outLine +BABEL_OP1_201_43323_20130211_115349_inLine +BABEL_OP1_201_43323_20130211_120743_inLine +BABEL_OP1_201_43323_20130211_120743_outLine +BABEL_OP1_201_43588_20130430_054932_inLine +BABEL_OP1_201_43588_20130430_054932_outLine +BABEL_OP1_201_43646_20130130_080323_inLine +BABEL_OP1_201_43646_20130130_080323_outLine +BABEL_OP1_201_43784_20130529_104333_inLine +BABEL_OP1_201_43784_20130529_104333_outLine +BABEL_OP1_201_43794_20130603_014105_inLine +BABEL_OP1_201_43794_20130603_014105_outLine +BABEL_OP1_201_44477_20130302_072308_inLine +BABEL_OP1_201_44477_20130302_072308_outLine +BABEL_OP1_201_44477_20130302_073645_inLine +BABEL_OP1_201_44477_20130302_073645_outLine +BABEL_OP1_201_44478_20130502_075027_inLine +BABEL_OP1_201_44478_20130502_075027_outLine +BABEL_OP1_201_44709_20130303_114051_inLine +BABEL_OP1_201_44709_20130303_114051_outLine +BABEL_OP1_201_45559_20130503_033307_inLine +BABEL_OP1_201_45559_20130503_033307_outLine +BABEL_OP1_201_46066_20130429_123746_inLine +BABEL_OP1_201_46066_20130429_123746_outLine +BABEL_OP1_201_46169_20130702_011629_inLine +BABEL_OP1_201_46169_20130702_011629_outLine +BABEL_OP1_201_46310_20130328_024919_inLine +BABEL_OP1_201_46310_20130328_024919_outLine +BABEL_OP1_201_46550_20130528_065103_inLine +BABEL_OP1_201_46550_20130528_065103_outLine +BABEL_OP1_201_46558_20130220_030534_inLine +BABEL_OP1_201_46558_20130220_030534_outLine +BABEL_OP1_201_46589_20130302_082301_inLine +BABEL_OP1_201_46589_20130302_082301_outLine +BABEL_OP1_201_46625_20130308_141424_inLine +BABEL_OP1_201_46625_20130308_141424_outLine +BABEL_OP1_201_46681_20130530_033328_inLine +BABEL_OP1_201_46681_20130530_033328_outLine +BABEL_OP1_201_46770_20130429_011947_inLine +BABEL_OP1_201_46770_20130429_011947_outLine +BABEL_OP1_201_46976_20130517_023139_inLine +BABEL_OP1_201_46976_20130517_023139_outLine +BABEL_OP1_201_47270_20130427_010445_inLine +BABEL_OP1_201_47270_20130427_010445_outLine +BABEL_OP1_201_47270_20130427_011917_inLine +BABEL_OP1_201_47270_20130427_011917_outLine +BABEL_OP1_201_47270_20130427_013155_inLine +BABEL_OP1_201_47270_20130427_013155_outLine +BABEL_OP1_201_47802_20130524_044824_inLine +BABEL_OP1_201_47802_20130524_044824_outLine +BABEL_OP1_201_47878_20130522_021958_inLine +BABEL_OP1_201_47878_20130522_021958_outLine +BABEL_OP1_201_48243_20130602_122113_inLine +BABEL_OP1_201_48243_20130602_122113_outLine +BABEL_OP1_201_48299_20130226_120812_inLine +BABEL_OP1_201_48299_20130226_120812_outLine +BABEL_OP1_201_48299_20130226_122743_inLine +BABEL_OP1_201_48299_20130226_122743_outLine +BABEL_OP1_201_48907_20130429_093546_inLine +BABEL_OP1_201_48907_20130429_093546_outLine +BABEL_OP1_201_49027_20130529_101617_inLine +BABEL_OP1_201_49027_20130529_101617_outLine +BABEL_OP1_201_49118_20130429_023211_inLine +BABEL_OP1_201_49118_20130429_023211_outLine +BABEL_OP1_201_49216_20130314_070036_inLine +BABEL_OP1_201_49216_20130314_070036_outLine +BABEL_OP1_201_49502_20130302_064002_inLine +BABEL_OP1_201_49502_20130302_064002_outLine +BABEL_OP1_201_49630_20130306_105833_inLine +BABEL_OP1_201_49630_20130306_105833_outLine +BABEL_OP1_201_49637_20130426_020402_inLine +BABEL_OP1_201_49637_20130426_020402_outLine +BABEL_OP1_201_49768_20130529_082143_inLine +BABEL_OP1_201_49768_20130529_082143_outLine +BABEL_OP1_201_49902_20130527_063448_inLine +BABEL_OP1_201_49902_20130527_063448_outLine +BABEL_OP1_201_49907_20130529_101707_inLine +BABEL_OP1_201_49907_20130529_101707_outLine +BABEL_OP1_201_49945_20130501_080703_inLine +BABEL_OP1_201_49945_20130501_080703_outLine +BABEL_OP1_201_50549_20130428_053142_inLine +BABEL_OP1_201_50549_20130428_053142_outLine +BABEL_OP1_201_50549_20130428_055313_inLine +BABEL_OP1_201_50549_20130428_055313_outLine +BABEL_OP1_201_50601_20130521_045944_inLine +BABEL_OP1_201_50601_20130521_045944_outLine +BABEL_OP1_201_50681_20130228_015155_inLine +BABEL_OP1_201_50681_20130228_015155_outLine +BABEL_OP1_201_50681_20130228_020643_inLine +BABEL_OP1_201_50681_20130228_020643_outLine +BABEL_OP1_201_50726_20130228_033852_inLine +BABEL_OP1_201_50726_20130228_033852_outLine +BABEL_OP1_201_50779_20130522_051719_inLine +BABEL_OP1_201_50779_20130522_051719_outLine +BABEL_OP1_201_50810_20130312_055632_inLine +BABEL_OP1_201_50810_20130312_055632_outLine +BABEL_OP1_201_50940_20130309_041526_inLine +BABEL_OP1_201_50940_20130309_041526_outLine +BABEL_OP1_201_51611_20130530_094039_inLine +BABEL_OP1_201_51611_20130530_094039_outLine +BABEL_OP1_201_52301_20130223_024524_inLine +BABEL_OP1_201_52301_20130223_024524_outLine +BABEL_OP1_201_52404_20130301_233232_inLine +BABEL_OP1_201_52404_20130301_233232_outLine +BABEL_OP1_201_52422_20130428_023051_inLine +BABEL_OP1_201_52422_20130428_023051_outLine +BABEL_OP1_201_52490_20130220_051000_inLine +BABEL_OP1_201_52490_20130220_051000_outLine +BABEL_OP1_201_52804_20130529_032046_inLine +BABEL_OP1_201_52804_20130529_032046_outLine +BABEL_OP1_201_52818_20130301_121852_inLine +BABEL_OP1_201_52818_20130301_121852_outLine +BABEL_OP1_201_53917_20130429_091547_inLine +BABEL_OP1_201_53917_20130429_091547_outLine +BABEL_OP1_201_55259_20130526_073400_inLine +BABEL_OP1_201_55259_20130526_073400_outLine +BABEL_OP1_201_55267_20130228_064943_inLine +BABEL_OP1_201_55267_20130228_064943_outLine +BABEL_OP1_201_55968_20130314_043319_inLine +BABEL_OP1_201_55968_20130314_043319_outLine +BABEL_OP1_201_55968_20130314_044612_inLine +BABEL_OP1_201_55968_20130314_044612_outLine +BABEL_OP1_201_56023_20130501_081011_inLine +BABEL_OP1_201_56023_20130501_081011_outLine +BABEL_OP1_201_56307_20130301_024958_inLine +BABEL_OP1_201_56307_20130301_024958_outLine +BABEL_OP1_201_57065_20130302_033227_inLine +BABEL_OP1_201_57065_20130302_033227_outLine +BABEL_OP1_201_57093_20130510_071214_inLine +BABEL_OP1_201_57093_20130510_071214_outLine +BABEL_OP1_201_57233_20130206_090034_inLine +BABEL_OP1_201_57233_20130206_090034_outLine +BABEL_OP1_201_57464_20130428_051858_inLine +BABEL_OP1_201_57464_20130428_051858_outLine +BABEL_OP1_201_57548_20130518_042831_inLine +BABEL_OP1_201_57548_20130518_042831_outLine +BABEL_OP1_201_57678_20130528_022013_inLine +BABEL_OP1_201_57678_20130528_022013_outLine +BABEL_OP1_201_58107_20130518_004334_inLine +BABEL_OP1_201_58107_20130518_004334_outLine +BABEL_OP1_201_58145_20130602_044301_inLine +BABEL_OP1_201_58145_20130602_044301_outLine +BABEL_OP1_201_58313_20130522_055528_inLine +BABEL_OP1_201_58313_20130522_055528_outLine +BABEL_OP1_201_58585_20130429_003422_inLine +BABEL_OP1_201_58585_20130429_003422_outLine +BABEL_OP1_201_58821_20130306_091219_inLine +BABEL_OP1_201_58821_20130306_091219_outLine +BABEL_OP1_201_59039_20130220_090641_inLine +BABEL_OP1_201_59039_20130220_090641_outLine +BABEL_OP1_201_59509_20130227_090836_inLine +BABEL_OP1_201_59509_20130227_090836_outLine +BABEL_OP1_201_59509_20130227_092230_inLine +BABEL_OP1_201_59509_20130227_092230_outLine +BABEL_OP1_201_60115_20130301_114138_inLine +BABEL_OP1_201_60115_20130301_114138_outLine +BABEL_OP1_201_60418_20130301_073212_inLine +BABEL_OP1_201_60418_20130301_073212_outLine +BABEL_OP1_201_60436_20130503_044737_inLine +BABEL_OP1_201_60436_20130503_044737_outLine +BABEL_OP1_201_60474_20130527_081400_inLine +BABEL_OP1_201_60474_20130527_081400_outLine +BABEL_OP1_201_60661_20130529_023958_inLine +BABEL_OP1_201_60661_20130529_023958_outLine +BABEL_OP1_201_61435_20130430_031742_inLine +BABEL_OP1_201_61435_20130430_031742_outLine +BABEL_OP1_201_61873_20130519_030703_inLine +BABEL_OP1_201_61873_20130519_030703_outLine +BABEL_OP1_201_62014_20130228_083820_inLine +BABEL_OP1_201_62014_20130228_083820_outLine +BABEL_OP1_201_63081_20130226_035431_inLine +BABEL_OP1_201_63081_20130226_035431_outLine +BABEL_OP1_201_63084_20130301_114742_inLine +BABEL_OP1_201_63084_20130301_114742_outLine +BABEL_OP1_201_63307_20130521_235343_inLine +BABEL_OP1_201_63307_20130521_235343_outLine +BABEL_OP1_201_63425_20130301_080734_inLine +BABEL_OP1_201_63425_20130301_080734_outLine +BABEL_OP1_201_63604_20130412_021112_inLine +BABEL_OP1_201_63604_20130412_021112_outLine +BABEL_OP1_201_64259_20130202_090605_inLine +BABEL_OP1_201_64259_20130202_090605_outLine +BABEL_OP1_201_64398_20130301_084125_inLine +BABEL_OP1_201_64398_20130301_084125_outLine +BABEL_OP1_201_65064_20130521_061233_inLine +BABEL_OP1_201_65064_20130521_061233_outLine +BABEL_OP1_201_65561_20130305_120931_inLine +BABEL_OP1_201_65561_20130305_120931_outLine +BABEL_OP1_201_65723_20130529_004610_inLine +BABEL_OP1_201_65723_20130529_004610_outLine +BABEL_OP1_201_66045_20130509_044408_inLine +BABEL_OP1_201_66045_20130509_044408_outLine +BABEL_OP1_201_66472_20130517_041032_inLine +BABEL_OP1_201_66472_20130517_041032_outLine +BABEL_OP1_201_67213_20130224_044805_inLine +BABEL_OP1_201_67213_20130224_044805_outLine +BABEL_OP1_201_67283_20130223_012433_inLine +BABEL_OP1_201_67283_20130223_012433_outLine +BABEL_OP1_201_67401_20130522_063044_inLine +BABEL_OP1_201_67401_20130522_063044_outLine +BABEL_OP1_201_67622_20130306_012440_inLine +BABEL_OP1_201_67622_20130306_012440_outLine +BABEL_OP1_201_68040_20130517_004413_inLine +BABEL_OP1_201_68040_20130517_004413_outLine +BABEL_OP1_201_68068_20130302_042557_inLine +BABEL_OP1_201_68068_20130302_042557_outLine +BABEL_OP1_201_68244_20130228_052832_inLine +BABEL_OP1_201_68244_20130228_052832_outLine +BABEL_OP1_201_68306_20130301_132523_inLine +BABEL_OP1_201_68306_20130301_132523_outLine +BABEL_OP1_201_68748_20130301_051957_inLine +BABEL_OP1_201_68748_20130301_051957_outLine +BABEL_OP1_201_68924_20130228_031746_inLine +BABEL_OP1_201_68924_20130228_031746_outLine +BABEL_OP1_201_69107_20130518_053632_inLine +BABEL_OP1_201_69107_20130518_053632_outLine +BABEL_OP1_201_69574_20130313_015419_inLine +BABEL_OP1_201_69574_20130313_015419_outLine +BABEL_OP1_201_69578_20130509_033949_inLine +BABEL_OP1_201_69578_20130509_033949_outLine +BABEL_OP1_201_69636_20130302_024254_inLine +BABEL_OP1_201_69636_20130302_024254_outLine +BABEL_OP1_201_70343_20130302_035639_inLine +BABEL_OP1_201_70343_20130302_035639_outLine +BABEL_OP1_201_70343_20130302_040518_inLine +BABEL_OP1_201_70343_20130302_040518_outLine +BABEL_OP1_201_70386_20130528_033752_inLine +BABEL_OP1_201_70386_20130528_033752_outLine +BABEL_OP1_201_70601_20130528_025629_inLine +BABEL_OP1_201_70601_20130528_025629_outLine +BABEL_OP1_201_70794_20130314_065330_inLine +BABEL_OP1_201_70794_20130314_065330_outLine +BABEL_OP1_201_71121_20130215_075206_inLine +BABEL_OP1_201_71121_20130215_075206_outLine +BABEL_OP1_201_72324_20130227_080108_inLine +BABEL_OP1_201_72324_20130227_080108_outLine +BABEL_OP1_201_72349_20130527_005409_inLine +BABEL_OP1_201_72349_20130527_005409_outLine +BABEL_OP1_201_72587_20130227_092146_inLine +BABEL_OP1_201_72587_20130227_092146_outLine +BABEL_OP1_201_72844_20130320_030750_inLine +BABEL_OP1_201_72844_20130320_030750_outLine +BABEL_OP1_201_73430_20130306_070252_inLine +BABEL_OP1_201_73430_20130306_070252_outLine +BABEL_OP1_201_73485_20130704_012751_inLine +BABEL_OP1_201_73485_20130704_012751_outLine +BABEL_OP1_201_73511_20130305_064018_inLine +BABEL_OP1_201_73511_20130305_064018_outLine +BABEL_OP1_201_73518_20130427_020953_inLine +BABEL_OP1_201_73518_20130427_020953_outLine +BABEL_OP1_201_73591_20121205_085430_inLine +BABEL_OP1_201_73591_20121205_085430_outLine +BABEL_OP1_201_73591_20121205_091943_inLine +BABEL_OP1_201_73591_20121205_091943_outLine +BABEL_OP1_201_73964_20130502_060046_inLine +BABEL_OP1_201_73964_20130502_060046_outLine +BABEL_OP1_201_74280_20130307_060529_inLine +BABEL_OP1_201_74280_20130307_060529_outLine +BABEL_OP1_201_74728_20130502_015015_inLine +BABEL_OP1_201_74728_20130502_015015_outLine +BABEL_OP1_201_74799_20130530_004139_inLine +BABEL_OP1_201_74799_20130530_004139_outLine +BABEL_OP1_201_74921_20130302_015536_inLine +BABEL_OP1_201_74921_20130302_015536_outLine +BABEL_OP1_201_74921_20130302_020351_inLine +BABEL_OP1_201_74921_20130302_020351_outLine +BABEL_OP1_201_75064_20130528_032631_inLine +BABEL_OP1_201_75064_20130528_032631_outLine +BABEL_OP1_201_75342_20130305_071206_inLine +BABEL_OP1_201_75342_20130305_071206_outLine +BABEL_OP1_201_75764_20130428_041456_inLine +BABEL_OP1_201_75764_20130428_041456_outLine +BABEL_OP1_201_75993_20130529_053731_inLine +BABEL_OP1_201_75993_20130529_053731_outLine +BABEL_OP1_201_76683_20130524_053916_inLine +BABEL_OP1_201_76683_20130524_053916_outLine +BABEL_OP1_201_77126_20121205_072118_inLine +BABEL_OP1_201_77126_20121205_072118_outLine +BABEL_OP1_201_77427_20130528_003638_inLine +BABEL_OP1_201_77427_20130528_003638_outLine +BABEL_OP1_201_78116_20130304_074916_inLine +BABEL_OP1_201_78116_20130304_074916_outLine +BABEL_OP1_201_78398_20130529_023517_inLine +BABEL_OP1_201_78398_20130529_023517_outLine +BABEL_OP1_201_78943_20130528_034620_inLine +BABEL_OP1_201_78943_20130528_034620_outLine +BABEL_OP1_201_79129_20130524_031851_inLine +BABEL_OP1_201_79129_20130524_031851_outLine +BABEL_OP1_201_79167_20130303_071948_inLine +BABEL_OP1_201_79167_20130303_071948_outLine +BABEL_OP1_201_79167_20130303_093604_inLine +BABEL_OP1_201_79167_20130303_093604_outLine +BABEL_OP1_201_79429_20130216_152022_inLine +BABEL_OP1_201_79429_20130216_152022_outLine +BABEL_OP1_201_80306_20130509_071053_inLine +BABEL_OP1_201_80306_20130509_071053_outLine +BABEL_OP1_201_81287_20130305_141750_inLine +BABEL_OP1_201_81287_20130305_141750_outLine +BABEL_OP1_201_81392_20130304_082518_inLine +BABEL_OP1_201_81392_20130304_082518_outLine +BABEL_OP1_201_81424_20130304_080620_inLine +BABEL_OP1_201_81424_20130304_080620_outLine +BABEL_OP1_201_81433_20130514_063900_inLine +BABEL_OP1_201_81433_20130514_063900_outLine +BABEL_OP1_201_81674_20130224_134642_inLine +BABEL_OP1_201_81674_20130224_134642_outLine +BABEL_OP1_201_81810_20130302_043825_inLine +BABEL_OP1_201_81810_20130302_043825_outLine +BABEL_OP1_201_81971_20130227_030618_inLine +BABEL_OP1_201_81971_20130227_030618_outLine +BABEL_OP1_201_82123_20130505_053636_inLine +BABEL_OP1_201_82123_20130505_053636_outLine +BABEL_OP1_201_82138_20130509_063904_inLine +BABEL_OP1_201_82138_20130509_063904_outLine +BABEL_OP1_201_82140_20130510_013208_inLine +BABEL_OP1_201_82140_20130510_013208_outLine +BABEL_OP1_201_82637_20130227_044340_inLine +BABEL_OP1_201_82637_20130227_044340_outLine +BABEL_OP1_201_82904_20130427_005507_inLine +BABEL_OP1_201_82904_20130427_005507_outLine +BABEL_OP1_201_82979_20130529_063602_inLine +BABEL_OP1_201_82979_20130529_063602_outLine +BABEL_OP1_201_83238_20130514_054056_inLine +BABEL_OP1_201_83238_20130514_054056_outLine +BABEL_OP1_201_83430_20130210_094011_inLine +BABEL_OP1_201_83430_20130210_094011_outLine +BABEL_OP1_201_83455_20130511_053045_inLine +BABEL_OP1_201_83455_20130511_053045_outLine +BABEL_OP1_201_83625_20130128_091225_inLine +BABEL_OP1_201_83625_20130128_091225_outLine +BABEL_OP1_201_83651_20130604_075201_inLine +BABEL_OP1_201_83651_20130604_075201_outLine +BABEL_OP1_201_83929_20121205_055436_inLine +BABEL_OP1_201_83929_20121205_055436_outLine +BABEL_OP1_201_83929_20121206_061559_inLine +BABEL_OP1_201_83929_20121206_061559_outLine +BABEL_OP1_201_83935_20130305_104443_inLine +BABEL_OP1_201_83935_20130305_104443_outLine +BABEL_OP1_201_84547_20130227_041326_inLine +BABEL_OP1_201_84547_20130227_041326_outLine +BABEL_OP1_201_84715_20130429_094324_inLine +BABEL_OP1_201_84715_20130429_094324_outLine +BABEL_OP1_201_84936_20130301_073352_inLine +BABEL_OP1_201_84936_20130301_073352_outLine +BABEL_OP1_201_85010_20130206_122216_inLine +BABEL_OP1_201_85010_20130206_122216_outLine +BABEL_OP1_201_85047_20130510_055057_inLine +BABEL_OP1_201_85047_20130510_055057_outLine +BABEL_OP1_201_85048_20130522_072215_inLine +BABEL_OP1_201_85048_20130522_072215_outLine +BABEL_OP1_201_85647_20130511_015627_inLine +BABEL_OP1_201_85647_20130511_015627_outLine +BABEL_OP1_201_86191_20130528_045113_inLine +BABEL_OP1_201_86191_20130528_045113_outLine +BABEL_OP1_201_86191_20130528_051540_inLine +BABEL_OP1_201_86191_20130528_051540_outLine +BABEL_OP1_201_86433_20130303_035210_inLine +BABEL_OP1_201_86433_20130303_035210_outLine +BABEL_OP1_201_86467_20130221_031701_inLine +BABEL_OP1_201_86467_20130221_031701_outLine +BABEL_OP1_201_86557_20130306_054158_inLine +BABEL_OP1_201_86557_20130306_054158_outLine +BABEL_OP1_201_86635_20130227_080743_inLine +BABEL_OP1_201_86635_20130227_080743_outLine +BABEL_OP1_201_86676_20130302_034945_inLine +BABEL_OP1_201_86676_20130302_034945_outLine +BABEL_OP1_201_86888_20130301_011747_inLine +BABEL_OP1_201_86888_20130301_011747_outLine +BABEL_OP1_201_87074_20130529_072238_inLine +BABEL_OP1_201_87074_20130529_072238_outLine +BABEL_OP1_201_87179_20130414_223248_inLine +BABEL_OP1_201_87179_20130414_223248_outLine +BABEL_OP1_201_87298_20130530_035908_inLine +BABEL_OP1_201_87298_20130530_035908_outLine +BABEL_OP1_201_87313_20130228_054816_inLine +BABEL_OP1_201_87313_20130228_054816_outLine +BABEL_OP1_201_87545_20130501_052733_inLine +BABEL_OP1_201_87545_20130501_052733_outLine +BABEL_OP1_201_87731_20130216_084329_inLine +BABEL_OP1_201_87731_20130216_084329_outLine +BABEL_OP1_201_87796_20130531_043218_inLine +BABEL_OP1_201_87796_20130531_043218_outLine +BABEL_OP1_201_88445_20130228_100123_inLine +BABEL_OP1_201_88445_20130228_100123_outLine +BABEL_OP1_201_88661_20130305_103247_inLine +BABEL_OP1_201_88661_20130305_103247_outLine +BABEL_OP1_201_89059_20130429_001658_inLine +BABEL_OP1_201_89059_20130429_001658_outLine +BABEL_OP1_201_89877_20130602_052802_inLine +BABEL_OP1_201_89877_20130602_052802_outLine +BABEL_OP1_201_90347_20130601_020619_inLine +BABEL_OP1_201_90347_20130601_020619_outLine +BABEL_OP1_201_90777_20130530_043440_inLine +BABEL_OP1_201_90777_20130530_043440_outLine +BABEL_OP1_201_91125_20130301_044113_inLine +BABEL_OP1_201_91125_20130301_044113_outLine +BABEL_OP1_201_91336_20130511_010308_inLine +BABEL_OP1_201_91336_20130511_010308_outLine +BABEL_OP1_201_91891_20130306_084037_inLine +BABEL_OP1_201_91891_20130306_084037_outLine +BABEL_OP1_201_91944_20130529_030733_inLine +BABEL_OP1_201_91944_20130529_030733_outLine +BABEL_OP1_201_91977_20130228_225341_inLine +BABEL_OP1_201_91977_20130228_225341_outLine +BABEL_OP1_201_92509_20130222_064302_inLine +BABEL_OP1_201_92509_20130222_064302_outLine +BABEL_OP1_201_92557_20130428_115801_inLine +BABEL_OP1_201_92557_20130428_115801_outLine +BABEL_OP1_201_92740_20130301_044629_inLine +BABEL_OP1_201_92740_20130301_044629_outLine +BABEL_OP1_201_92792_20130630_124723_inLine +BABEL_OP1_201_92792_20130630_124723_outLine +BABEL_OP1_201_92942_20130601_011759_inLine +BABEL_OP1_201_92942_20130601_011759_outLine +BABEL_OP1_201_93222_20130127_012443_inLine +BABEL_OP1_201_93222_20130127_012443_outLine +BABEL_OP1_201_93224_20130227_095611_inLine +BABEL_OP1_201_93224_20130227_095611_outLine +BABEL_OP1_201_93604_20130502_071337_inLine +BABEL_OP1_201_93604_20130502_071337_outLine +BABEL_OP1_201_93964_20130511_000644_inLine +BABEL_OP1_201_93964_20130511_000644_outLine +BABEL_OP1_201_94025_20130303_091916_inLine +BABEL_OP1_201_94025_20130303_091916_outLine +BABEL_OP1_201_94316_20130503_072805_inLine +BABEL_OP1_201_94316_20130503_072805_outLine +BABEL_OP1_201_94449_20130704_033336_inLine +BABEL_OP1_201_94449_20130704_033336_outLine +BABEL_OP1_201_94487_20130502_053741_inLine +BABEL_OP1_201_94487_20130502_053741_outLine +BABEL_OP1_201_94666_20130512_052019_inLine +BABEL_OP1_201_94666_20130512_052019_outLine +BABEL_OP1_201_94869_20130313_052715_inLine +BABEL_OP1_201_94869_20130313_052715_outLine +BABEL_OP1_201_94923_20130531_054229_inLine +BABEL_OP1_201_94923_20130531_054229_outLine +BABEL_OP1_201_95446_20130430_051750_inLine +BABEL_OP1_201_95446_20130430_051750_outLine +BABEL_OP1_201_96059_20130430_034442_inLine +BABEL_OP1_201_96059_20130430_034442_outLine +BABEL_OP1_201_96376_20130704_011157_inLine +BABEL_OP1_201_96376_20130704_011157_outLine +BABEL_OP1_201_96820_20130514_032741_inLine +BABEL_OP1_201_96820_20130514_032741_outLine +BABEL_OP1_201_97363_20130528_063449_inLine +BABEL_OP1_201_97363_20130528_063449_outLine +BABEL_OP1_201_97557_20130228_004756_inLine +BABEL_OP1_201_97557_20130228_004756_outLine +BABEL_OP1_201_99202_20130521_003552_inLine +BABEL_OP1_201_99202_20130521_003552_outLine +BABEL_OP1_201_99955_20130429_001807_inLine +BABEL_OP1_201_99955_20130429_001807_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.list b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.list new file mode 100644 index 00000000000..c6271d71566 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.list @@ -0,0 +1,126 @@ +BABEL_OP1_201_13178_20130301_043649_inLine +BABEL_OP1_201_13178_20130301_043649_outLine +BABEL_OP1_201_14229_20130528_023254_inLine +BABEL_OP1_201_14229_20130528_023254_outLine +BABEL_OP1_201_15216_20130503_005405_inLine +BABEL_OP1_201_15216_20130503_005405_outLine +BABEL_OP1_201_15848_20130130_070404_inLine +BABEL_OP1_201_15848_20130130_070404_outLine +BABEL_OP1_201_16938_20130514_072820_inLine +BABEL_OP1_201_16938_20130514_072820_outLine +BABEL_OP1_201_17881_20130429_230318_inLine +BABEL_OP1_201_17881_20130429_230318_outLine +BABEL_OP1_201_17923_20130529_021211_inLine +BABEL_OP1_201_17923_20130529_021211_outLine +BABEL_OP1_201_18118_20130501_084131_inLine +BABEL_OP1_201_18118_20130501_084131_outLine +BABEL_OP1_201_19722_20130425_005348_inLine +BABEL_OP1_201_19722_20130425_005348_outLine +BABEL_OP1_201_19749_20130429_090621_inLine +BABEL_OP1_201_19749_20130429_090621_outLine +BABEL_OP1_201_20768_20130701_035344_inLine +BABEL_OP1_201_20768_20130701_035344_outLine +BABEL_OP1_201_20800_20130529_035944_inLine +BABEL_OP1_201_20800_20130529_035944_outLine +BABEL_OP1_201_21244_20130602_073304_inLine +BABEL_OP1_201_21244_20130602_073304_outLine +BABEL_OP1_201_24290_20130703_074550_inLine +BABEL_OP1_201_24290_20130703_074550_outLine +BABEL_OP1_201_24589_20130529_111014_inLine +BABEL_OP1_201_24589_20130529_111014_outLine +BABEL_OP1_201_26072_20130429_011940_inLine +BABEL_OP1_201_26072_20130429_011940_outLine +BABEL_OP1_201_28606_20130305_101646_inLine +BABEL_OP1_201_28606_20130305_101646_outLine +BABEL_OP1_201_28871_20121207_015933_inLine +BABEL_OP1_201_28871_20121207_015933_outLine +BABEL_OP1_201_31109_20130510_030741_inLine +BABEL_OP1_201_31109_20130510_030741_outLine +BABEL_OP1_201_36219_20130528_021139_inLine +BABEL_OP1_201_36219_20130528_021139_outLine +BABEL_OP1_201_36341_20130226_074136_inLine +BABEL_OP1_201_36341_20130226_074136_outLine +BABEL_OP1_201_37271_20130430_025526_inLine +BABEL_OP1_201_37271_20130430_025526_outLine +BABEL_OP1_201_40713_20130530_005109_inLine +BABEL_OP1_201_40713_20130530_005109_outLine +BABEL_OP1_201_41097_20130228_063046_inLine +BABEL_OP1_201_41097_20130228_063046_outLine +BABEL_OP1_201_41618_20130518_035113_inLine +BABEL_OP1_201_41618_20130518_035113_outLine +BABEL_OP1_201_42243_20130303_022442_inLine +BABEL_OP1_201_42243_20130303_022442_outLine +BABEL_OP1_201_42619_20130228_081700_inLine +BABEL_OP1_201_42619_20130228_081700_outLine +BABEL_OP1_201_43646_20130130_080323_inLine +BABEL_OP1_201_43646_20130130_080323_outLine +BABEL_OP1_201_45559_20130503_033307_inLine +BABEL_OP1_201_45559_20130503_033307_outLine +BABEL_OP1_201_46625_20130308_141424_inLine +BABEL_OP1_201_46625_20130308_141424_outLine +BABEL_OP1_201_47270_20130427_010445_inLine +BABEL_OP1_201_47270_20130427_010445_outLine +BABEL_OP1_201_47270_20130427_011917_inLine +BABEL_OP1_201_47270_20130427_011917_outLine +BABEL_OP1_201_47270_20130427_013155_inLine +BABEL_OP1_201_47270_20130427_013155_outLine +BABEL_OP1_201_48907_20130429_093546_inLine +BABEL_OP1_201_48907_20130429_093546_outLine +BABEL_OP1_201_49118_20130429_023211_inLine +BABEL_OP1_201_49118_20130429_023211_outLine +BABEL_OP1_201_49502_20130302_064002_inLine +BABEL_OP1_201_49502_20130302_064002_outLine +BABEL_OP1_201_49902_20130527_063448_inLine +BABEL_OP1_201_49902_20130527_063448_outLine +BABEL_OP1_201_50601_20130521_045944_inLine +BABEL_OP1_201_50601_20130521_045944_outLine +BABEL_OP1_201_50681_20130228_015155_inLine +BABEL_OP1_201_50681_20130228_015155_outLine +BABEL_OP1_201_50681_20130228_020643_inLine +BABEL_OP1_201_50681_20130228_020643_outLine +BABEL_OP1_201_50726_20130228_033852_inLine +BABEL_OP1_201_50726_20130228_033852_outLine +BABEL_OP1_201_52804_20130529_032046_inLine +BABEL_OP1_201_52804_20130529_032046_outLine +BABEL_OP1_201_53917_20130429_091547_inLine +BABEL_OP1_201_53917_20130429_091547_outLine +BABEL_OP1_201_57093_20130510_071214_inLine +BABEL_OP1_201_57093_20130510_071214_outLine +BABEL_OP1_201_60418_20130301_073212_inLine +BABEL_OP1_201_60418_20130301_073212_outLine +BABEL_OP1_201_63425_20130301_080734_inLine +BABEL_OP1_201_63425_20130301_080734_outLine +BABEL_OP1_201_65723_20130529_004610_inLine +BABEL_OP1_201_65723_20130529_004610_outLine +BABEL_OP1_201_68040_20130517_004413_inLine +BABEL_OP1_201_68040_20130517_004413_outLine +BABEL_OP1_201_70601_20130528_025629_inLine +BABEL_OP1_201_70601_20130528_025629_outLine +BABEL_OP1_201_71121_20130215_075206_inLine +BABEL_OP1_201_71121_20130215_075206_outLine +BABEL_OP1_201_72349_20130527_005409_inLine +BABEL_OP1_201_72349_20130527_005409_outLine +BABEL_OP1_201_74799_20130530_004139_inLine +BABEL_OP1_201_74799_20130530_004139_outLine +BABEL_OP1_201_77126_20121205_072118_inLine +BABEL_OP1_201_77126_20121205_072118_outLine +BABEL_OP1_201_81674_20130224_134642_inLine +BABEL_OP1_201_81674_20130224_134642_outLine +BABEL_OP1_201_83935_20130305_104443_inLine +BABEL_OP1_201_83935_20130305_104443_outLine +BABEL_OP1_201_85048_20130522_072215_inLine +BABEL_OP1_201_85048_20130522_072215_outLine +BABEL_OP1_201_87545_20130501_052733_inLine +BABEL_OP1_201_87545_20130501_052733_outLine +BABEL_OP1_201_91336_20130511_010308_inLine +BABEL_OP1_201_91336_20130511_010308_outLine +BABEL_OP1_201_92792_20130630_124723_inLine +BABEL_OP1_201_92792_20130630_124723_outLine +BABEL_OP1_201_92942_20130601_011759_inLine +BABEL_OP1_201_92942_20130601_011759_outLine +BABEL_OP1_201_93224_20130227_095611_inLine +BABEL_OP1_201_93224_20130227_095611_outLine +BABEL_OP1_201_94666_20130512_052019_inLine +BABEL_OP1_201_94666_20130512_052019_outLine +BABEL_OP1_201_94923_20130531_054229_inLine +BABEL_OP1_201_94923_20130531_054229_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..701e74d974b --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.untranscribed.list @@ -0,0 +1,634 @@ +BABEL_OP1_201_10002_20130212_152853_inLine +BABEL_OP1_201_10002_20130212_152853_outLine +BABEL_OP1_201_10036_20130528_005502_inLine +BABEL_OP1_201_10036_20130528_005502_outLine +BABEL_OP1_201_10482_20130305_105317_inLine +BABEL_OP1_201_10482_20130305_105317_outLine +BABEL_OP1_201_10647_20130428_045536_inLine +BABEL_OP1_201_10647_20130428_045536_outLine +BABEL_OP1_201_10901_20130529_031421_inLine +BABEL_OP1_201_10901_20130529_031421_outLine +BABEL_OP1_201_11096_20130603_043221_inLine +BABEL_OP1_201_11096_20130603_043221_outLine +BABEL_OP1_201_11663_20130601_002903_inLine +BABEL_OP1_201_11663_20130601_002903_outLine +BABEL_OP1_201_11673_20130226_015822_inLine +BABEL_OP1_201_11673_20130226_015822_outLine +BABEL_OP1_201_11797_20130328_033102_inLine +BABEL_OP1_201_11797_20130328_033102_outLine +BABEL_OP1_201_12220_20130528_051622_inLine +BABEL_OP1_201_12220_20130528_051622_outLine +BABEL_OP1_201_12242_20130603_033446_inLine +BABEL_OP1_201_12242_20130603_033446_outLine +BABEL_OP1_201_12606_20130429_120351_inLine +BABEL_OP1_201_12606_20130429_120351_outLine +BABEL_OP1_201_12606_20130429_121040_inLine +BABEL_OP1_201_12606_20130429_121040_outLine +BABEL_OP1_201_12635_20130429_040127_inLine +BABEL_OP1_201_12635_20130429_040127_outLine +BABEL_OP1_201_12767_20130509_005500_inLine +BABEL_OP1_201_12767_20130509_005500_outLine +BABEL_OP1_201_13324_20130529_035029_inLine +BABEL_OP1_201_13324_20130529_035029_outLine +BABEL_OP1_201_13483_20130306_062423_inLine +BABEL_OP1_201_13483_20130306_062423_outLine +BABEL_OP1_201_13490_20130508_033252_inLine +BABEL_OP1_201_13490_20130508_033252_outLine +BABEL_OP1_201_13664_20130117_073343_inLine +BABEL_OP1_201_13664_20130117_073343_outLine +BABEL_OP1_201_14179_20130303_111502_inLine +BABEL_OP1_201_14179_20130303_111502_outLine +BABEL_OP1_201_14539_20130501_223201_inLine +BABEL_OP1_201_14539_20130501_223201_outLine +BABEL_OP1_201_14560_20130301_065543_inLine +BABEL_OP1_201_14560_20130301_065543_outLine +BABEL_OP1_201_14807_20130522_012156_inLine +BABEL_OP1_201_14807_20130522_012156_outLine +BABEL_OP1_201_14899_20130301_035636_inLine +BABEL_OP1_201_14899_20130301_035636_outLine +BABEL_OP1_201_14972_20130518_025852_inLine +BABEL_OP1_201_14972_20130518_025852_outLine +BABEL_OP1_201_15322_20130701_030436_inLine +BABEL_OP1_201_15322_20130701_030436_outLine +BABEL_OP1_201_15382_20130228_050819_inLine +BABEL_OP1_201_15382_20130228_050819_outLine +BABEL_OP1_201_15702_20130301_041117_inLine +BABEL_OP1_201_15702_20130301_041117_outLine +BABEL_OP1_201_15730_20130305_034450_inLine +BABEL_OP1_201_15730_20130305_034450_outLine +BABEL_OP1_201_15902_20130323_005824_inLine +BABEL_OP1_201_15902_20130323_005824_outLine +BABEL_OP1_201_16149_20130322_021647_inLine +BABEL_OP1_201_16149_20130322_021647_outLine +BABEL_OP1_201_16467_20130704_025921_inLine +BABEL_OP1_201_16467_20130704_025921_outLine +BABEL_OP1_201_16800_20130702_085158_inLine +BABEL_OP1_201_16800_20130702_085158_outLine +BABEL_OP1_201_16924_20130301_032937_inLine +BABEL_OP1_201_16924_20130301_032937_outLine +BABEL_OP1_201_17032_20130306_103506_inLine +BABEL_OP1_201_17032_20130306_103506_outLine +BABEL_OP1_201_17113_20130519_093427_inLine +BABEL_OP1_201_17113_20130519_093427_outLine +BABEL_OP1_201_17472_20130311_075957_inLine +BABEL_OP1_201_17472_20130311_075957_outLine +BABEL_OP1_201_17496_20130301_030157_inLine +BABEL_OP1_201_17496_20130301_030157_outLine +BABEL_OP1_201_17520_20130518_012147_inLine +BABEL_OP1_201_17520_20130518_012147_outLine +BABEL_OP1_201_17567_20130512_065938_inLine +BABEL_OP1_201_17567_20130512_065938_outLine +BABEL_OP1_201_18766_20130502_102418_inLine +BABEL_OP1_201_18766_20130502_102418_outLine +BABEL_OP1_201_19134_20130601_040621_inLine +BABEL_OP1_201_19134_20130601_040621_outLine +BABEL_OP1_201_19589_20130502_093932_inLine +BABEL_OP1_201_19589_20130502_093932_outLine +BABEL_OP1_201_19767_20130502_130900_inLine +BABEL_OP1_201_19767_20130502_130900_outLine +BABEL_OP1_201_19877_20130502_085421_inLine +BABEL_OP1_201_19877_20130502_085421_outLine +BABEL_OP1_201_20330_20130429_035418_inLine +BABEL_OP1_201_20330_20130429_035418_outLine +BABEL_OP1_201_20437_20130216_094002_inLine +BABEL_OP1_201_20437_20130216_094002_outLine +BABEL_OP1_201_20972_20130603_035417_inLine +BABEL_OP1_201_20972_20130603_035417_outLine +BABEL_OP1_201_21807_20130522_042858_inLine +BABEL_OP1_201_21807_20130522_042858_outLine +BABEL_OP1_201_21892_20130430_033520_inLine +BABEL_OP1_201_21892_20130430_033520_outLine +BABEL_OP1_201_22466_20121206_070403_inLine +BABEL_OP1_201_22466_20121206_070403_outLine +BABEL_OP1_201_22494_20130305_052405_inLine +BABEL_OP1_201_22494_20130305_052405_outLine +BABEL_OP1_201_22624_20130305_121723_inLine +BABEL_OP1_201_22624_20130305_121723_outLine +BABEL_OP1_201_23046_20130527_110737_inLine +BABEL_OP1_201_23046_20130527_110737_outLine +BABEL_OP1_201_23119_20130321_054320_inLine +BABEL_OP1_201_23119_20130321_054320_outLine +BABEL_OP1_201_23190_20130603_224243_inLine +BABEL_OP1_201_23190_20130603_224243_outLine +BABEL_OP1_201_23195_20130227_050013_inLine +BABEL_OP1_201_23195_20130227_050013_outLine +BABEL_OP1_201_23239_20130305_093734_inLine +BABEL_OP1_201_23239_20130305_093734_outLine +BABEL_OP1_201_23893_20130430_080021_inLine +BABEL_OP1_201_23893_20130430_080021_outLine +BABEL_OP1_201_24231_20130502_123747_inLine +BABEL_OP1_201_24231_20130502_123747_outLine +BABEL_OP1_201_24239_20130703_230221_inLine +BABEL_OP1_201_24239_20130703_230221_outLine +BABEL_OP1_201_24270_20130530_020630_inLine +BABEL_OP1_201_24270_20130530_020630_outLine +BABEL_OP1_201_24470_20130531_024204_inLine +BABEL_OP1_201_24470_20130531_024204_outLine +BABEL_OP1_201_24501_20130429_102945_inLine +BABEL_OP1_201_24501_20130429_102945_outLine +BABEL_OP1_201_24532_20130307_060030_inLine +BABEL_OP1_201_24532_20130307_060030_outLine +BABEL_OP1_201_24586_20130430_025349_inLine +BABEL_OP1_201_24586_20130430_032300_inLine +BABEL_OP1_201_24586_20130430_032300_outLine +BABEL_OP1_201_24586_20130430_033306_inLine +BABEL_OP1_201_24586_20130430_033306_outLine +BABEL_OP1_201_24679_20130222_072407_inLine +BABEL_OP1_201_24679_20130222_072407_outLine +BABEL_OP1_201_24982_20130529_044009_inLine +BABEL_OP1_201_24982_20130529_044009_outLine +BABEL_OP1_201_25015_20130501_223825_inLine +BABEL_OP1_201_25015_20130501_223825_outLine +BABEL_OP1_201_25961_20130223_033405_inLine +BABEL_OP1_201_25961_20130223_033405_outLine +BABEL_OP1_201_26388_20130528_030259_inLine +BABEL_OP1_201_26388_20130528_030259_outLine +BABEL_OP1_201_26836_20130528_100100_inLine +BABEL_OP1_201_26836_20130528_100100_outLine +BABEL_OP1_201_26836_20130528_101331_inLine +BABEL_OP1_201_26836_20130528_101331_outLine +BABEL_OP1_201_26999_20130228_090136_inLine +BABEL_OP1_201_26999_20130228_090136_outLine +BABEL_OP1_201_27042_20130701_075011_inLine +BABEL_OP1_201_27042_20130701_075011_outLine +BABEL_OP1_201_27203_20130602_005950_inLine +BABEL_OP1_201_27203_20130602_005950_outLine +BABEL_OP1_201_27590_20130304_072243_inLine +BABEL_OP1_201_27590_20130304_072243_outLine +BABEL_OP1_201_28419_20130528_035005_inLine +BABEL_OP1_201_28419_20130528_035005_outLine +BABEL_OP1_201_28522_20130303_104614_inLine +BABEL_OP1_201_28522_20130303_104614_outLine +BABEL_OP1_201_28600_20130701_051100_inLine +BABEL_OP1_201_28600_20130701_051100_outLine +BABEL_OP1_201_28775_20130529_005204_inLine +BABEL_OP1_201_28775_20130529_005204_outLine +BABEL_OP1_201_28814_20130704_000405_inLine +BABEL_OP1_201_28814_20130704_000405_outLine +BABEL_OP1_201_28945_20130528_094913_inLine +BABEL_OP1_201_28945_20130528_094913_outLine +BABEL_OP1_201_29023_20130530_024701_inLine +BABEL_OP1_201_29023_20130530_024701_outLine +BABEL_OP1_201_29072_20130304_052508_inLine +BABEL_OP1_201_29072_20130304_052508_outLine +BABEL_OP1_201_29168_20130222_015942_inLine +BABEL_OP1_201_29168_20130222_015942_outLine +BABEL_OP1_201_30180_20130528_033242_inLine +BABEL_OP1_201_30180_20130528_033242_outLine +BABEL_OP1_201_30395_20130529_034626_inLine +BABEL_OP1_201_30395_20130529_034626_outLine +BABEL_OP1_201_30432_20130227_084229_inLine +BABEL_OP1_201_30432_20130227_084229_outLine +BABEL_OP1_201_30576_20130527_002801_inLine +BABEL_OP1_201_30576_20130527_002801_outLine +BABEL_OP1_201_31628_20130301_081256_inLine +BABEL_OP1_201_31628_20130301_081256_outLine +BABEL_OP1_201_32097_20130130_021717_inLine +BABEL_OP1_201_32097_20130130_021717_outLine +BABEL_OP1_201_32122_20130529_070011_inLine +BABEL_OP1_201_32122_20130529_070011_outLine +BABEL_OP1_201_32171_20130220_084632_inLine +BABEL_OP1_201_32171_20130220_084632_outLine +BABEL_OP1_201_32708_20130528_093343_inLine +BABEL_OP1_201_32708_20130528_093343_outLine +BABEL_OP1_201_33229_20130429_025144_inLine +BABEL_OP1_201_33229_20130429_025144_outLine +BABEL_OP1_201_33659_20130214_000335_inLine +BABEL_OP1_201_33659_20130214_000335_outLine +BABEL_OP1_201_33806_20130630_224040_inLine +BABEL_OP1_201_33806_20130630_224040_outLine +BABEL_OP1_201_34106_20130305_032650_inLine +BABEL_OP1_201_34106_20130305_032650_outLine +BABEL_OP1_201_34145_20130301_033324_inLine +BABEL_OP1_201_34145_20130301_033324_outLine +BABEL_OP1_201_34197_20130227_065321_inLine +BABEL_OP1_201_34197_20130227_065321_outLine +BABEL_OP1_201_34336_20130527_071806_inLine +BABEL_OP1_201_34336_20130527_071806_outLine +BABEL_OP1_201_34679_20130529_040931_inLine +BABEL_OP1_201_34679_20130529_040931_outLine +BABEL_OP1_201_34826_20130430_025628_inLine +BABEL_OP1_201_34826_20130430_025628_outLine +BABEL_OP1_201_34903_20130302_052444_inLine +BABEL_OP1_201_34903_20130302_052444_outLine +BABEL_OP1_201_35000_20130702_092721_inLine +BABEL_OP1_201_35000_20130702_092721_outLine +BABEL_OP1_201_35008_20130305_114402_inLine +BABEL_OP1_201_35008_20130305_114402_outLine +BABEL_OP1_201_35467_20130321_032230_inLine +BABEL_OP1_201_35467_20130321_032230_outLine +BABEL_OP1_201_36894_20130221_070614_inLine +BABEL_OP1_201_36894_20130221_070614_outLine +BABEL_OP1_201_37598_20130601_032226_inLine +BABEL_OP1_201_37598_20130601_032226_outLine +BABEL_OP1_201_38076_20130302_132339_inLine +BABEL_OP1_201_38076_20130302_132339_outLine +BABEL_OP1_201_38878_20130228_041057_inLine +BABEL_OP1_201_38878_20130228_041057_outLine +BABEL_OP1_201_39426_20130429_085957_inLine +BABEL_OP1_201_39426_20130429_085957_outLine +BABEL_OP1_201_39638_20130126_082343_inLine +BABEL_OP1_201_39638_20130126_082343_outLine +BABEL_OP1_201_41334_20130630_085009_inLine +BABEL_OP1_201_41334_20130630_085009_outLine +BABEL_OP1_201_41469_20130303_034949_inLine +BABEL_OP1_201_41469_20130303_034949_outLine +BABEL_OP1_201_41542_20130429_084921_inLine +BABEL_OP1_201_41542_20130429_084921_outLine +BABEL_OP1_201_41685_20130214_090836_inLine +BABEL_OP1_201_41685_20130214_090836_outLine +BABEL_OP1_201_41720_20130203_053934_inLine +BABEL_OP1_201_41720_20130203_053934_outLine +BABEL_OP1_201_41890_20130430_020800_inLine +BABEL_OP1_201_41890_20130430_020800_outLine +BABEL_OP1_201_42155_20130521_023245_inLine +BABEL_OP1_201_42155_20130521_023245_outLine +BABEL_OP1_201_42497_20130529_040557_inLine +BABEL_OP1_201_42497_20130529_040557_outLine +BABEL_OP1_201_42771_20130516_235914_inLine +BABEL_OP1_201_42771_20130516_235914_outLine +BABEL_OP1_201_42834_20130227_094847_inLine +BABEL_OP1_201_42834_20130227_094847_outLine +BABEL_OP1_201_42991_20130301_104105_inLine +BABEL_OP1_201_42991_20130301_104105_outLine +BABEL_OP1_201_43286_20130301_085932_inLine +BABEL_OP1_201_43286_20130301_085932_outLine +BABEL_OP1_201_43323_20130211_115349_inLine +BABEL_OP1_201_43323_20130211_120743_inLine +BABEL_OP1_201_43323_20130211_120743_outLine +BABEL_OP1_201_43588_20130430_054932_inLine +BABEL_OP1_201_43588_20130430_054932_outLine +BABEL_OP1_201_43784_20130529_104333_inLine +BABEL_OP1_201_43784_20130529_104333_outLine +BABEL_OP1_201_43794_20130603_014105_inLine +BABEL_OP1_201_43794_20130603_014105_outLine +BABEL_OP1_201_44477_20130302_072308_inLine +BABEL_OP1_201_44477_20130302_072308_outLine +BABEL_OP1_201_44477_20130302_073645_inLine +BABEL_OP1_201_44477_20130302_073645_outLine +BABEL_OP1_201_44478_20130502_075027_inLine +BABEL_OP1_201_44478_20130502_075027_outLine +BABEL_OP1_201_44709_20130303_114051_inLine +BABEL_OP1_201_44709_20130303_114051_outLine +BABEL_OP1_201_46066_20130429_123746_inLine +BABEL_OP1_201_46066_20130429_123746_outLine +BABEL_OP1_201_46169_20130702_011629_inLine +BABEL_OP1_201_46169_20130702_011629_outLine +BABEL_OP1_201_46310_20130328_024919_inLine +BABEL_OP1_201_46310_20130328_024919_outLine +BABEL_OP1_201_46550_20130528_065103_inLine +BABEL_OP1_201_46550_20130528_065103_outLine +BABEL_OP1_201_46558_20130220_030534_inLine +BABEL_OP1_201_46558_20130220_030534_outLine +BABEL_OP1_201_46589_20130302_082301_inLine +BABEL_OP1_201_46589_20130302_082301_outLine +BABEL_OP1_201_46681_20130530_033328_inLine +BABEL_OP1_201_46681_20130530_033328_outLine +BABEL_OP1_201_46770_20130429_011947_inLine +BABEL_OP1_201_46770_20130429_011947_outLine +BABEL_OP1_201_46976_20130517_023139_inLine +BABEL_OP1_201_46976_20130517_023139_outLine +BABEL_OP1_201_47802_20130524_044824_inLine +BABEL_OP1_201_47802_20130524_044824_outLine +BABEL_OP1_201_47878_20130522_021958_inLine +BABEL_OP1_201_47878_20130522_021958_outLine +BABEL_OP1_201_48243_20130602_122113_inLine +BABEL_OP1_201_48243_20130602_122113_outLine +BABEL_OP1_201_48299_20130226_120812_inLine +BABEL_OP1_201_48299_20130226_120812_outLine +BABEL_OP1_201_48299_20130226_122743_inLine +BABEL_OP1_201_48299_20130226_122743_outLine +BABEL_OP1_201_49027_20130529_101617_inLine +BABEL_OP1_201_49027_20130529_101617_outLine +BABEL_OP1_201_49216_20130314_070036_inLine +BABEL_OP1_201_49216_20130314_070036_outLine +BABEL_OP1_201_49630_20130306_105833_inLine +BABEL_OP1_201_49630_20130306_105833_outLine +BABEL_OP1_201_49637_20130426_020402_inLine +BABEL_OP1_201_49637_20130426_020402_outLine +BABEL_OP1_201_49768_20130529_082143_inLine +BABEL_OP1_201_49768_20130529_082143_outLine +BABEL_OP1_201_49907_20130529_101707_inLine +BABEL_OP1_201_49907_20130529_101707_outLine +BABEL_OP1_201_49945_20130501_080703_inLine +BABEL_OP1_201_49945_20130501_080703_outLine +BABEL_OP1_201_50549_20130428_053142_inLine +BABEL_OP1_201_50549_20130428_053142_outLine +BABEL_OP1_201_50549_20130428_055313_inLine +BABEL_OP1_201_50549_20130428_055313_outLine +BABEL_OP1_201_50779_20130522_051719_inLine +BABEL_OP1_201_50779_20130522_051719_outLine +BABEL_OP1_201_50810_20130312_055632_inLine +BABEL_OP1_201_50810_20130312_055632_outLine +BABEL_OP1_201_50940_20130309_041526_inLine +BABEL_OP1_201_50940_20130309_041526_outLine +BABEL_OP1_201_51611_20130530_094039_inLine +BABEL_OP1_201_51611_20130530_094039_outLine +BABEL_OP1_201_52301_20130223_024524_inLine +BABEL_OP1_201_52301_20130223_024524_outLine +BABEL_OP1_201_52404_20130301_233232_inLine +BABEL_OP1_201_52404_20130301_233232_outLine +BABEL_OP1_201_52422_20130428_023051_inLine +BABEL_OP1_201_52422_20130428_023051_outLine +BABEL_OP1_201_52490_20130220_051000_inLine +BABEL_OP1_201_52490_20130220_051000_outLine +BABEL_OP1_201_52818_20130301_121852_inLine +BABEL_OP1_201_52818_20130301_121852_outLine +BABEL_OP1_201_55259_20130526_073400_inLine +BABEL_OP1_201_55259_20130526_073400_outLine +BABEL_OP1_201_55267_20130228_064943_inLine +BABEL_OP1_201_55267_20130228_064943_outLine +BABEL_OP1_201_55968_20130314_043319_inLine +BABEL_OP1_201_55968_20130314_043319_outLine +BABEL_OP1_201_55968_20130314_044612_inLine +BABEL_OP1_201_55968_20130314_044612_outLine +BABEL_OP1_201_56023_20130501_081011_inLine +BABEL_OP1_201_56023_20130501_081011_outLine +BABEL_OP1_201_56307_20130301_024958_inLine +BABEL_OP1_201_56307_20130301_024958_outLine +BABEL_OP1_201_57065_20130302_033227_inLine +BABEL_OP1_201_57065_20130302_033227_outLine +BABEL_OP1_201_57233_20130206_090034_inLine +BABEL_OP1_201_57233_20130206_090034_outLine +BABEL_OP1_201_57464_20130428_051858_inLine +BABEL_OP1_201_57464_20130428_051858_outLine +BABEL_OP1_201_57548_20130518_042831_inLine +BABEL_OP1_201_57548_20130518_042831_outLine +BABEL_OP1_201_57678_20130528_022013_inLine +BABEL_OP1_201_57678_20130528_022013_outLine +BABEL_OP1_201_58107_20130518_004334_inLine +BABEL_OP1_201_58107_20130518_004334_outLine +BABEL_OP1_201_58145_20130602_044301_inLine +BABEL_OP1_201_58145_20130602_044301_outLine +BABEL_OP1_201_58313_20130522_055528_inLine +BABEL_OP1_201_58313_20130522_055528_outLine +BABEL_OP1_201_58585_20130429_003422_inLine +BABEL_OP1_201_58585_20130429_003422_outLine +BABEL_OP1_201_58821_20130306_091219_inLine +BABEL_OP1_201_58821_20130306_091219_outLine +BABEL_OP1_201_59039_20130220_090641_inLine +BABEL_OP1_201_59039_20130220_090641_outLine +BABEL_OP1_201_59509_20130227_090836_inLine +BABEL_OP1_201_59509_20130227_090836_outLine +BABEL_OP1_201_59509_20130227_092230_inLine +BABEL_OP1_201_59509_20130227_092230_outLine +BABEL_OP1_201_60115_20130301_114138_inLine +BABEL_OP1_201_60115_20130301_114138_outLine +BABEL_OP1_201_60436_20130503_044737_inLine +BABEL_OP1_201_60436_20130503_044737_outLine +BABEL_OP1_201_60474_20130527_081400_inLine +BABEL_OP1_201_60474_20130527_081400_outLine +BABEL_OP1_201_60661_20130529_023958_inLine +BABEL_OP1_201_60661_20130529_023958_outLine +BABEL_OP1_201_61435_20130430_031742_inLine +BABEL_OP1_201_61435_20130430_031742_outLine +BABEL_OP1_201_61873_20130519_030703_inLine +BABEL_OP1_201_61873_20130519_030703_outLine +BABEL_OP1_201_62014_20130228_083820_inLine +BABEL_OP1_201_62014_20130228_083820_outLine +BABEL_OP1_201_63081_20130226_035431_inLine +BABEL_OP1_201_63081_20130226_035431_outLine +BABEL_OP1_201_63084_20130301_114742_inLine +BABEL_OP1_201_63084_20130301_114742_outLine +BABEL_OP1_201_63307_20130521_235343_inLine +BABEL_OP1_201_63307_20130521_235343_outLine +BABEL_OP1_201_63604_20130412_021112_inLine +BABEL_OP1_201_63604_20130412_021112_outLine +BABEL_OP1_201_64259_20130202_090605_inLine +BABEL_OP1_201_64259_20130202_090605_outLine +BABEL_OP1_201_64398_20130301_084125_inLine +BABEL_OP1_201_64398_20130301_084125_outLine +BABEL_OP1_201_65064_20130521_061233_inLine +BABEL_OP1_201_65064_20130521_061233_outLine +BABEL_OP1_201_65561_20130305_120931_inLine +BABEL_OP1_201_65561_20130305_120931_outLine +BABEL_OP1_201_66045_20130509_044408_inLine +BABEL_OP1_201_66045_20130509_044408_outLine +BABEL_OP1_201_66472_20130517_041032_inLine +BABEL_OP1_201_66472_20130517_041032_outLine +BABEL_OP1_201_67213_20130224_044805_inLine +BABEL_OP1_201_67213_20130224_044805_outLine +BABEL_OP1_201_67283_20130223_012433_inLine +BABEL_OP1_201_67283_20130223_012433_outLine +BABEL_OP1_201_67401_20130522_063044_inLine +BABEL_OP1_201_67401_20130522_063044_outLine +BABEL_OP1_201_67622_20130306_012440_inLine +BABEL_OP1_201_67622_20130306_012440_outLine +BABEL_OP1_201_68068_20130302_042557_inLine +BABEL_OP1_201_68068_20130302_042557_outLine +BABEL_OP1_201_68244_20130228_052832_inLine +BABEL_OP1_201_68244_20130228_052832_outLine +BABEL_OP1_201_68306_20130301_132523_inLine +BABEL_OP1_201_68306_20130301_132523_outLine +BABEL_OP1_201_68748_20130301_051957_inLine +BABEL_OP1_201_68748_20130301_051957_outLine +BABEL_OP1_201_68924_20130228_031746_inLine +BABEL_OP1_201_68924_20130228_031746_outLine +BABEL_OP1_201_69107_20130518_053632_inLine +BABEL_OP1_201_69107_20130518_053632_outLine +BABEL_OP1_201_69574_20130313_015419_inLine +BABEL_OP1_201_69574_20130313_015419_outLine +BABEL_OP1_201_69578_20130509_033949_inLine +BABEL_OP1_201_69578_20130509_033949_outLine +BABEL_OP1_201_69636_20130302_024254_inLine +BABEL_OP1_201_69636_20130302_024254_outLine +BABEL_OP1_201_70343_20130302_035639_inLine +BABEL_OP1_201_70343_20130302_035639_outLine +BABEL_OP1_201_70343_20130302_040518_inLine +BABEL_OP1_201_70343_20130302_040518_outLine +BABEL_OP1_201_70386_20130528_033752_inLine +BABEL_OP1_201_70386_20130528_033752_outLine +BABEL_OP1_201_70794_20130314_065330_inLine +BABEL_OP1_201_70794_20130314_065330_outLine +BABEL_OP1_201_72324_20130227_080108_inLine +BABEL_OP1_201_72324_20130227_080108_outLine +BABEL_OP1_201_72587_20130227_092146_inLine +BABEL_OP1_201_72587_20130227_092146_outLine +BABEL_OP1_201_72844_20130320_030750_inLine +BABEL_OP1_201_72844_20130320_030750_outLine +BABEL_OP1_201_73430_20130306_070252_inLine +BABEL_OP1_201_73430_20130306_070252_outLine +BABEL_OP1_201_73485_20130704_012751_inLine +BABEL_OP1_201_73485_20130704_012751_outLine +BABEL_OP1_201_73511_20130305_064018_inLine +BABEL_OP1_201_73511_20130305_064018_outLine +BABEL_OP1_201_73518_20130427_020953_inLine +BABEL_OP1_201_73518_20130427_020953_outLine +BABEL_OP1_201_73591_20121205_085430_inLine +BABEL_OP1_201_73591_20121205_085430_outLine +BABEL_OP1_201_73591_20121205_091943_inLine +BABEL_OP1_201_73591_20121205_091943_outLine +BABEL_OP1_201_73964_20130502_060046_inLine +BABEL_OP1_201_73964_20130502_060046_outLine +BABEL_OP1_201_74280_20130307_060529_inLine +BABEL_OP1_201_74280_20130307_060529_outLine +BABEL_OP1_201_74728_20130502_015015_inLine +BABEL_OP1_201_74728_20130502_015015_outLine +BABEL_OP1_201_74921_20130302_015536_inLine +BABEL_OP1_201_74921_20130302_015536_outLine +BABEL_OP1_201_74921_20130302_020351_inLine +BABEL_OP1_201_74921_20130302_020351_outLine +BABEL_OP1_201_75064_20130528_032631_inLine +BABEL_OP1_201_75064_20130528_032631_outLine +BABEL_OP1_201_75342_20130305_071206_inLine +BABEL_OP1_201_75342_20130305_071206_outLine +BABEL_OP1_201_75764_20130428_041456_inLine +BABEL_OP1_201_75764_20130428_041456_outLine +BABEL_OP1_201_75993_20130529_053731_inLine +BABEL_OP1_201_75993_20130529_053731_outLine +BABEL_OP1_201_76683_20130524_053916_inLine +BABEL_OP1_201_76683_20130524_053916_outLine +BABEL_OP1_201_77427_20130528_003638_inLine +BABEL_OP1_201_77427_20130528_003638_outLine +BABEL_OP1_201_78116_20130304_074916_inLine +BABEL_OP1_201_78116_20130304_074916_outLine +BABEL_OP1_201_78398_20130529_023517_inLine +BABEL_OP1_201_78398_20130529_023517_outLine +BABEL_OP1_201_78943_20130528_034620_inLine +BABEL_OP1_201_78943_20130528_034620_outLine +BABEL_OP1_201_79129_20130524_031851_inLine +BABEL_OP1_201_79129_20130524_031851_outLine +BABEL_OP1_201_79167_20130303_071948_inLine +BABEL_OP1_201_79167_20130303_071948_outLine +BABEL_OP1_201_79167_20130303_093604_inLine +BABEL_OP1_201_79167_20130303_093604_outLine +BABEL_OP1_201_79429_20130216_152022_inLine +BABEL_OP1_201_79429_20130216_152022_outLine +BABEL_OP1_201_80306_20130509_071053_inLine +BABEL_OP1_201_80306_20130509_071053_outLine +BABEL_OP1_201_81287_20130305_141750_inLine +BABEL_OP1_201_81287_20130305_141750_outLine +BABEL_OP1_201_81392_20130304_082518_inLine +BABEL_OP1_201_81392_20130304_082518_outLine +BABEL_OP1_201_81424_20130304_080620_inLine +BABEL_OP1_201_81424_20130304_080620_outLine +BABEL_OP1_201_81433_20130514_063900_inLine +BABEL_OP1_201_81433_20130514_063900_outLine +BABEL_OP1_201_81810_20130302_043825_inLine +BABEL_OP1_201_81810_20130302_043825_outLine +BABEL_OP1_201_81971_20130227_030618_inLine +BABEL_OP1_201_81971_20130227_030618_outLine +BABEL_OP1_201_82123_20130505_053636_inLine +BABEL_OP1_201_82123_20130505_053636_outLine +BABEL_OP1_201_82138_20130509_063904_inLine +BABEL_OP1_201_82138_20130509_063904_outLine +BABEL_OP1_201_82140_20130510_013208_inLine +BABEL_OP1_201_82140_20130510_013208_outLine +BABEL_OP1_201_82637_20130227_044340_inLine +BABEL_OP1_201_82637_20130227_044340_outLine +BABEL_OP1_201_82904_20130427_005507_inLine +BABEL_OP1_201_82904_20130427_005507_outLine +BABEL_OP1_201_82979_20130529_063602_inLine +BABEL_OP1_201_82979_20130529_063602_outLine +BABEL_OP1_201_83238_20130514_054056_inLine +BABEL_OP1_201_83238_20130514_054056_outLine +BABEL_OP1_201_83430_20130210_094011_inLine +BABEL_OP1_201_83430_20130210_094011_outLine +BABEL_OP1_201_83455_20130511_053045_inLine +BABEL_OP1_201_83455_20130511_053045_outLine +BABEL_OP1_201_83625_20130128_091225_inLine +BABEL_OP1_201_83625_20130128_091225_outLine +BABEL_OP1_201_83651_20130604_075201_inLine +BABEL_OP1_201_83651_20130604_075201_outLine +BABEL_OP1_201_83929_20121205_055436_inLine +BABEL_OP1_201_83929_20121205_055436_outLine +BABEL_OP1_201_83929_20121206_061559_inLine +BABEL_OP1_201_83929_20121206_061559_outLine +BABEL_OP1_201_84547_20130227_041326_inLine +BABEL_OP1_201_84547_20130227_041326_outLine +BABEL_OP1_201_84715_20130429_094324_inLine +BABEL_OP1_201_84715_20130429_094324_outLine +BABEL_OP1_201_84936_20130301_073352_inLine +BABEL_OP1_201_84936_20130301_073352_outLine +BABEL_OP1_201_85010_20130206_122216_inLine +BABEL_OP1_201_85010_20130206_122216_outLine +BABEL_OP1_201_85047_20130510_055057_inLine +BABEL_OP1_201_85047_20130510_055057_outLine +BABEL_OP1_201_85647_20130511_015627_inLine +BABEL_OP1_201_85647_20130511_015627_outLine +BABEL_OP1_201_86191_20130528_045113_inLine +BABEL_OP1_201_86191_20130528_045113_outLine +BABEL_OP1_201_86191_20130528_051540_inLine +BABEL_OP1_201_86191_20130528_051540_outLine +BABEL_OP1_201_86433_20130303_035210_inLine +BABEL_OP1_201_86433_20130303_035210_outLine +BABEL_OP1_201_86467_20130221_031701_inLine +BABEL_OP1_201_86467_20130221_031701_outLine +BABEL_OP1_201_86557_20130306_054158_inLine +BABEL_OP1_201_86557_20130306_054158_outLine +BABEL_OP1_201_86635_20130227_080743_inLine +BABEL_OP1_201_86635_20130227_080743_outLine +BABEL_OP1_201_86676_20130302_034945_inLine +BABEL_OP1_201_86676_20130302_034945_outLine +BABEL_OP1_201_86888_20130301_011747_inLine +BABEL_OP1_201_86888_20130301_011747_outLine +BABEL_OP1_201_87074_20130529_072238_inLine +BABEL_OP1_201_87074_20130529_072238_outLine +BABEL_OP1_201_87179_20130414_223248_inLine +BABEL_OP1_201_87179_20130414_223248_outLine +BABEL_OP1_201_87298_20130530_035908_inLine +BABEL_OP1_201_87298_20130530_035908_outLine +BABEL_OP1_201_87313_20130228_054816_inLine +BABEL_OP1_201_87313_20130228_054816_outLine +BABEL_OP1_201_87731_20130216_084329_inLine +BABEL_OP1_201_87731_20130216_084329_outLine +BABEL_OP1_201_87796_20130531_043218_inLine +BABEL_OP1_201_87796_20130531_043218_outLine +BABEL_OP1_201_88445_20130228_100123_inLine +BABEL_OP1_201_88445_20130228_100123_outLine +BABEL_OP1_201_88661_20130305_103247_inLine +BABEL_OP1_201_88661_20130305_103247_outLine +BABEL_OP1_201_89059_20130429_001658_inLine +BABEL_OP1_201_89059_20130429_001658_outLine +BABEL_OP1_201_89877_20130602_052802_inLine +BABEL_OP1_201_89877_20130602_052802_outLine +BABEL_OP1_201_90347_20130601_020619_inLine +BABEL_OP1_201_90347_20130601_020619_outLine +BABEL_OP1_201_90777_20130530_043440_inLine +BABEL_OP1_201_90777_20130530_043440_outLine +BABEL_OP1_201_91125_20130301_044113_inLine +BABEL_OP1_201_91125_20130301_044113_outLine +BABEL_OP1_201_91891_20130306_084037_inLine +BABEL_OP1_201_91891_20130306_084037_outLine +BABEL_OP1_201_91944_20130529_030733_inLine +BABEL_OP1_201_91944_20130529_030733_outLine +BABEL_OP1_201_91977_20130228_225341_inLine +BABEL_OP1_201_91977_20130228_225341_outLine +BABEL_OP1_201_92509_20130222_064302_inLine +BABEL_OP1_201_92509_20130222_064302_outLine +BABEL_OP1_201_92557_20130428_115801_inLine +BABEL_OP1_201_92557_20130428_115801_outLine +BABEL_OP1_201_92740_20130301_044629_inLine +BABEL_OP1_201_92740_20130301_044629_outLine +BABEL_OP1_201_93222_20130127_012443_inLine +BABEL_OP1_201_93222_20130127_012443_outLine +BABEL_OP1_201_93604_20130502_071337_inLine +BABEL_OP1_201_93604_20130502_071337_outLine +BABEL_OP1_201_93964_20130511_000644_inLine +BABEL_OP1_201_93964_20130511_000644_outLine +BABEL_OP1_201_94025_20130303_091916_inLine +BABEL_OP1_201_94025_20130303_091916_outLine +BABEL_OP1_201_94316_20130503_072805_inLine +BABEL_OP1_201_94316_20130503_072805_outLine +BABEL_OP1_201_94449_20130704_033336_inLine +BABEL_OP1_201_94449_20130704_033336_outLine +BABEL_OP1_201_94487_20130502_053741_inLine +BABEL_OP1_201_94487_20130502_053741_outLine +BABEL_OP1_201_94869_20130313_052715_inLine +BABEL_OP1_201_94869_20130313_052715_outLine +BABEL_OP1_201_95446_20130430_051750_inLine +BABEL_OP1_201_95446_20130430_051750_outLine +BABEL_OP1_201_96059_20130430_034442_inLine +BABEL_OP1_201_96059_20130430_034442_outLine +BABEL_OP1_201_96376_20130704_011157_inLine +BABEL_OP1_201_96376_20130704_011157_outLine +BABEL_OP1_201_96820_20130514_032741_inLine +BABEL_OP1_201_96820_20130514_032741_outLine +BABEL_OP1_201_97363_20130528_063449_inLine +BABEL_OP1_201_97363_20130528_063449_outLine +BABEL_OP1_201_97557_20130228_004756_inLine +BABEL_OP1_201_97557_20130228_004756_outLine +BABEL_OP1_201_99202_20130521_003552_inLine +BABEL_OP1_201_99202_20130521_003552_outLine +BABEL_OP1_201_99955_20130429_001807_inLine +BABEL_OP1_201_99955_20130429_001807_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.untranscribed.list b/egs/babel/s5d/conf/lists/201-haitian/train.untranscribed.list new file mode 100644 index 00000000000..33da29dd0f7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.untranscribed.list @@ -0,0 +1,270 @@ +BABEL_OP1_201_10974_20130512_073026_inLine +BABEL_OP1_201_10974_20130512_073026_outLine +BABEL_OP1_201_11352_20130501_215210_inLine +BABEL_OP1_201_11352_20130501_215210_outLine +BABEL_OP1_201_13040_20130604_010848_inLine +BABEL_OP1_201_13040_20130604_010848_outLine +BABEL_OP1_201_14158_20130301_041642_inLine +BABEL_OP1_201_14158_20130301_041642_outLine +BABEL_OP1_201_15042_20130502_000845_inLine +BABEL_OP1_201_15042_20130502_000845_outLine +BABEL_OP1_201_17573_20130313_093021_inLine +BABEL_OP1_201_17573_20130313_093021_outLine +BABEL_OP1_201_18078_20130430_095821_inLine +BABEL_OP1_201_18078_20130430_095821_outLine +BABEL_OP1_201_19120_20130405_142951_inLine +BABEL_OP1_201_19120_20130405_142951_outLine +BABEL_OP1_201_21581_20130527_033524_inLine +BABEL_OP1_201_21581_20130527_033524_outLine +BABEL_OP1_201_21581_20130527_034908_inLine +BABEL_OP1_201_21581_20130527_034908_outLine +BABEL_OP1_201_22170_20130403_030729_inLine +BABEL_OP1_201_22170_20130403_030729_outLine +BABEL_OP1_201_27478_20130502_010501_inLine +BABEL_OP1_201_27478_20130502_010501_outLine +BABEL_OP1_201_28012_20130427_041255_inLine +BABEL_OP1_201_28012_20130427_041255_outLine +BABEL_OP1_201_28585_20130426_120901_inLine +BABEL_OP1_201_28585_20130426_120901_outLine +BABEL_OP1_201_29039_20130502_123143_inLine +BABEL_OP1_201_29039_20130502_123143_outLine +BABEL_OP1_201_29404_20130428_094208_inLine +BABEL_OP1_201_29404_20130428_094208_outLine +BABEL_OP1_201_29685_20130603_224641_inLine +BABEL_OP1_201_29685_20130603_224641_outLine +BABEL_OP1_201_29777_20130430_071717_inLine +BABEL_OP1_201_29777_20130430_071717_outLine +BABEL_OP1_201_30653_20130501_222756_inLine +BABEL_OP1_201_30653_20130501_222756_outLine +BABEL_OP1_201_31182_20130415_005506_inLine +BABEL_OP1_201_31182_20130415_005506_outLine +BABEL_OP1_201_32872_20130429_221658_inLine +BABEL_OP1_201_32872_20130429_221658_outLine +BABEL_OP1_201_32959_20130323_033657_inLine +BABEL_OP1_201_32959_20130323_033657_outLine +BABEL_OP1_201_35885_20130630_115617_inLine +BABEL_OP1_201_35885_20130630_115617_outLine +BABEL_OP1_201_36059_20130404_104841_inLine +BABEL_OP1_201_36059_20130404_104841_outLine +BABEL_OP1_201_40740_20130429_011150_inLine +BABEL_OP1_201_40740_20130429_011150_outLine +BABEL_OP1_201_41493_20130312_081558_inLine +BABEL_OP1_201_41493_20130312_081558_outLine +BABEL_OP1_201_41920_20130403_050458_inLine +BABEL_OP1_201_41920_20130403_050458_outLine +BABEL_OP1_201_42231_20130306_074634_inLine +BABEL_OP1_201_42231_20130306_074634_outLine +BABEL_OP1_201_42231_20130306_075939_inLine +BABEL_OP1_201_42231_20130306_075939_outLine +BABEL_OP1_201_42600_20130527_055528_inLine +BABEL_OP1_201_42600_20130527_055528_outLine +BABEL_OP1_201_42600_20130527_060503_inLine +BABEL_OP1_201_42600_20130527_060503_outLine +BABEL_OP1_201_42718_20130429_001514_inLine +BABEL_OP1_201_42718_20130429_001514_outLine +BABEL_OP1_201_44420_20130603_050431_inLine +BABEL_OP1_201_44420_20130603_050431_outLine +BABEL_OP1_201_45140_20130429_085359_inLine +BABEL_OP1_201_45140_20130429_085359_outLine +BABEL_OP1_201_45777_20130528_001753_inLine +BABEL_OP1_201_45777_20130528_001753_outLine +BABEL_OP1_201_45908_20130430_062256_inLine +BABEL_OP1_201_45908_20130430_062256_outLine +BABEL_OP1_201_46702_20130308_094852_inLine +BABEL_OP1_201_46702_20130308_094852_outLine +BABEL_OP1_201_48200_20130428_230807_inLine +BABEL_OP1_201_48200_20130428_230807_outLine +BABEL_OP1_201_48399_20130426_031102_inLine +BABEL_OP1_201_48399_20130426_031102_outLine +BABEL_OP1_201_48758_20130415_035720_inLine +BABEL_OP1_201_48758_20130415_035720_outLine +BABEL_OP1_201_49812_20130429_013208_inLine +BABEL_OP1_201_49812_20130429_013208_outLine +BABEL_OP1_201_50745_20130501_232950_inLine +BABEL_OP1_201_50745_20130501_232950_outLine +BABEL_OP1_201_50962_20130529_005739_inLine +BABEL_OP1_201_50962_20130529_005739_outLine +BABEL_OP1_201_50962_20130529_013505_inLine +BABEL_OP1_201_50962_20130529_013505_outLine +BABEL_OP1_201_51417_20130429_013022_inLine +BABEL_OP1_201_51417_20130429_013022_outLine +BABEL_OP1_201_51417_20130429_015210_inLine +BABEL_OP1_201_51417_20130429_015210_outLine +BABEL_OP1_201_52614_20130503_045833_inLine +BABEL_OP1_201_52614_20130503_045833_outLine +BABEL_OP1_201_52614_20130503_051217_inLine +BABEL_OP1_201_52614_20130503_051217_outLine +BABEL_OP1_201_53072_20130430_114228_inLine +BABEL_OP1_201_53072_20130430_114228_outLine +BABEL_OP1_201_53419_20130630_034136_inLine +BABEL_OP1_201_53419_20130630_034136_outLine +BABEL_OP1_201_54040_20130701_030051_inLine +BABEL_OP1_201_54040_20130701_030051_outLine +BABEL_OP1_201_54923_20130512_032825_inLine +BABEL_OP1_201_54923_20130512_032825_outLine +BABEL_OP1_201_55013_20130503_054608_inLine +BABEL_OP1_201_55013_20130503_054608_outLine +BABEL_OP1_201_56198_20130529_062601_inLine +BABEL_OP1_201_56198_20130529_062601_outLine +BABEL_OP1_201_56370_20130406_025411_inLine +BABEL_OP1_201_56370_20130406_025411_outLine +BABEL_OP1_201_56429_20130528_053349_inLine +BABEL_OP1_201_56429_20130528_053349_outLine +BABEL_OP1_201_56684_20130430_033812_inLine +BABEL_OP1_201_56684_20130430_033812_outLine +BABEL_OP1_201_57067_20130428_015420_inLine +BABEL_OP1_201_57067_20130428_015420_outLine +BABEL_OP1_201_57654_20130604_021427_inLine +BABEL_OP1_201_57654_20130604_021427_outLine +BABEL_OP1_201_58815_20130701_072119_inLine +BABEL_OP1_201_58815_20130701_072119_outLine +BABEL_OP1_201_58850_20130529_032635_inLine +BABEL_OP1_201_58850_20130529_032635_outLine +BABEL_OP1_201_59993_20130529_074044_inLine +BABEL_OP1_201_59993_20130529_074044_outLine +BABEL_OP1_201_60836_20130603_224729_inLine +BABEL_OP1_201_60836_20130603_224729_outLine +BABEL_OP1_201_62430_20130428_025620_inLine +BABEL_OP1_201_62430_20130428_025620_outLine +BABEL_OP1_201_62852_20130303_042827_inLine +BABEL_OP1_201_62852_20130303_042827_outLine +BABEL_OP1_201_63220_20130227_082602_inLine +BABEL_OP1_201_63220_20130227_082602_outLine +BABEL_OP1_201_63523_20130501_123402_inLine +BABEL_OP1_201_63523_20130501_123402_outLine +BABEL_OP1_201_64796_20130131_073304_inLine +BABEL_OP1_201_64796_20130131_073304_outLine +BABEL_OP1_201_65298_20130427_075419_inLine +BABEL_OP1_201_65298_20130427_075419_outLine +BABEL_OP1_201_66026_20130414_055206_inLine +BABEL_OP1_201_66026_20130414_055206_outLine +BABEL_OP1_201_66837_20130325_095909_inLine +BABEL_OP1_201_66837_20130325_095909_outLine +BABEL_OP1_201_66959_20130326_091943_inLine +BABEL_OP1_201_66959_20130326_091943_outLine +BABEL_OP1_201_67373_20130528_075634_inLine +BABEL_OP1_201_67373_20130528_075634_outLine +BABEL_OP1_201_71038_20130430_020855_inLine +BABEL_OP1_201_71038_20130430_020855_outLine +BABEL_OP1_201_71067_20130228_114156_inLine +BABEL_OP1_201_71067_20130228_114156_outLine +BABEL_OP1_201_71282_20130428_011003_inLine +BABEL_OP1_201_71282_20130428_011003_outLine +BABEL_OP1_201_71333_20130527_094400_inLine +BABEL_OP1_201_71333_20130527_094400_outLine +BABEL_OP1_201_71704_20130604_005411_inLine +BABEL_OP1_201_71704_20130604_005411_outLine +BABEL_OP1_201_71780_20130528_070831_inLine +BABEL_OP1_201_71780_20130528_070831_outLine +BABEL_OP1_201_73119_20130529_084814_inLine +BABEL_OP1_201_73119_20130529_084814_outLine +BABEL_OP1_201_74111_20130415_122650_inLine +BABEL_OP1_201_74111_20130415_122650_outLine +BABEL_OP1_201_74253_20130324_094324_inLine +BABEL_OP1_201_74253_20130324_094324_outLine +BABEL_OP1_201_74455_20130429_223748_inLine +BABEL_OP1_201_74455_20130429_223748_outLine +BABEL_OP1_201_75261_20130428_072427_inLine +BABEL_OP1_201_75261_20130428_072427_outLine +BABEL_OP1_201_76372_20130406_002653_inLine +BABEL_OP1_201_76372_20130406_002653_outLine +BABEL_OP1_201_79107_20130704_020050_inLine +BABEL_OP1_201_79107_20130704_020050_outLine +BABEL_OP1_201_80655_20130429_014151_inLine +BABEL_OP1_201_80655_20130429_014151_outLine +BABEL_OP1_201_80721_20130324_011204_inLine +BABEL_OP1_201_80721_20130324_011204_outLine +BABEL_OP1_201_81213_20130604_060123_inLine +BABEL_OP1_201_81213_20130604_060123_outLine +BABEL_OP1_201_82361_20130429_234744_inLine +BABEL_OP1_201_82361_20130429_234744_outLine +BABEL_OP1_201_82966_20130702_014841_inLine +BABEL_OP1_201_82966_20130702_014841_outLine +BABEL_OP1_201_83062_20130428_080508_inLine +BABEL_OP1_201_83062_20130428_080508_outLine +BABEL_OP1_201_83062_20130428_081244_inLine +BABEL_OP1_201_83062_20130428_081244_outLine +BABEL_OP1_201_83545_20130503_013151_inLine +BABEL_OP1_201_83545_20130503_013151_outLine +BABEL_OP1_201_84061_20130528_013733_inLine +BABEL_OP1_201_84061_20130528_013733_outLine +BABEL_OP1_201_85028_20130413_093438_inLine +BABEL_OP1_201_85028_20130413_093438_outLine +BABEL_OP1_201_85248_20130429_023317_inLine +BABEL_OP1_201_85248_20130429_023317_outLine +BABEL_OP1_201_87693_20130528_083347_inLine +BABEL_OP1_201_87693_20130528_083347_outLine +BABEL_OP1_201_88686_20130306_035740_inLine +BABEL_OP1_201_88686_20130306_035740_outLine +BABEL_OP1_201_88686_20130306_040902_inLine +BABEL_OP1_201_88686_20130306_040902_outLine +BABEL_OP1_201_89330_20130630_075430_inLine +BABEL_OP1_201_89330_20130630_075430_outLine +BABEL_OP1_201_89330_20130630_075936_inLine +BABEL_OP1_201_89330_20130630_075936_outLine +BABEL_OP1_201_89372_20130312_074918_inLine +BABEL_OP1_201_89372_20130312_074918_outLine +BABEL_OP1_201_89560_20130415_124517_inLine +BABEL_OP1_201_89560_20130415_124517_outLine +BABEL_OP1_201_89665_20130603_230819_inLine +BABEL_OP1_201_89665_20130603_230819_outLine +BABEL_OP1_201_89794_20130301_115507_inLine +BABEL_OP1_201_89794_20130301_115507_outLine +BABEL_OP1_201_89794_20130303_105823_inLine +BABEL_OP1_201_89794_20130303_105823_outLine +BABEL_OP1_201_90417_20130520_032334_inLine +BABEL_OP1_201_90417_20130520_032334_outLine +BABEL_OP1_201_90935_20130604_012414_inLine +BABEL_OP1_201_90935_20130604_012414_outLine +BABEL_OP1_201_91372_20130704_010321_inLine +BABEL_OP1_201_91372_20130704_010321_outLine +BABEL_OP1_201_91581_20130313_100349_inLine +BABEL_OP1_201_91581_20130313_100349_outLine +BABEL_OP1_201_91825_20130226_051913_inLine +BABEL_OP1_201_91825_20130226_051913_outLine +BABEL_OP1_201_92096_20130406_072054_inLine +BABEL_OP1_201_92096_20130406_072054_outLine +BABEL_OP1_201_92356_20130428_015350_inLine +BABEL_OP1_201_92356_20130428_015350_outLine +BABEL_OP1_201_92757_20130604_084623_inLine +BABEL_OP1_201_92757_20130604_084623_outLine +BABEL_OP1_201_92886_20130528_023229_inLine +BABEL_OP1_201_92886_20130528_023229_outLine +BABEL_OP1_201_92941_20130527_095346_inLine +BABEL_OP1_201_92941_20130527_095346_outLine +BABEL_OP1_201_93320_20130630_082741_inLine +BABEL_OP1_201_93320_20130630_082741_outLine +BABEL_OP1_201_93475_20130530_101306_inLine +BABEL_OP1_201_93475_20130530_101306_outLine +BABEL_OP1_201_93946_20130406_073121_inLine +BABEL_OP1_201_93946_20130406_073121_outLine +BABEL_OP1_201_94044_20130429_080249_inLine +BABEL_OP1_201_94044_20130429_080249_outLine +BABEL_OP1_201_95467_20130630_224512_inLine +BABEL_OP1_201_95467_20130630_224512_outLine +BABEL_OP1_201_96088_20130429_045832_inLine +BABEL_OP1_201_96088_20130429_045832_outLine +BABEL_OP1_201_96446_20130426_023651_inLine +BABEL_OP1_201_96446_20130426_023651_outLine +BABEL_OP1_201_97097_20130502_025744_inLine +BABEL_OP1_201_97097_20130502_025744_outLine +BABEL_OP1_201_97264_20130429_083940_inLine +BABEL_OP1_201_97264_20130429_083940_outLine +BABEL_OP1_201_97988_20130320_082635_inLine +BABEL_OP1_201_97988_20130320_082635_outLine +BABEL_OP1_201_98506_20130430_082503_inLine +BABEL_OP1_201_98506_20130430_082503_outLine +BABEL_OP1_201_98678_20130403_061826_inLine +BABEL_OP1_201_98678_20130403_061826_outLine +BABEL_OP1_201_98909_20130529_002845_inLine +BABEL_OP1_201_98909_20130529_002845_outLine +BABEL_OP1_201_98909_20130529_003625_inLine +BABEL_OP1_201_98909_20130529_003625_outLine +BABEL_OP1_201_98909_20130529_004310_inLine +BABEL_OP1_201_98909_20130529_004310_outLine +BABEL_OP1_201_98909_20130529_004845_inLine +BABEL_OP1_201_98909_20130529_004845_outLine +BABEL_OP1_201_99516_20130319_061728_inLine +BABEL_OP1_201_99516_20130319_061728_outLine +BABEL_OP1_201_99516_20130320_023645_inLine +BABEL_OP1_201_99516_20130320_023645_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/dev.list b/egs/babel/s5d/conf/lists/202-swahili/dev.list new file mode 100644 index 00000000000..21ae20c66d7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/dev.list @@ -0,0 +1,142 @@ +BABEL_OP2_202_10524_20131009_200043_inLine +BABEL_OP2_202_10524_20131009_200043_outLine +BABEL_OP2_202_12635_20131101_212012_inLine +BABEL_OP2_202_12635_20131101_212012_outLine +BABEL_OP2_202_12635_20131101_213218_inLine +BABEL_OP2_202_12635_20131101_213218_outLine +BABEL_OP2_202_14814_20140205_210842_inLine +BABEL_OP2_202_14814_20140205_210842_outLine +BABEL_OP2_202_15420_20140210_010333_inLine +BABEL_OP2_202_15420_20140210_010333_outLine +BABEL_OP2_202_16249_20131202_232723_inLine +BABEL_OP2_202_16249_20131202_232723_outLine +BABEL_OP2_202_17115_20140218_210921_inLine +BABEL_OP2_202_17115_20140218_210921_outLine +BABEL_OP2_202_18766_20140218_222017_inLine +BABEL_OP2_202_18766_20140218_222017_outLine +BABEL_OP2_202_24239_20140206_191516_inLine +BABEL_OP2_202_24239_20140206_191516_outLine +BABEL_OP2_202_24290_20140219_000423_inLine +BABEL_OP2_202_24290_20140219_000423_outLine +BABEL_OP2_202_25085_20140219_185114_inLine +BABEL_OP2_202_25085_20140219_185114_outLine +BABEL_OP2_202_25242_20131203_015232_inLine +BABEL_OP2_202_25242_20131203_015232_outLine +BABEL_OP2_202_27478_20140209_224101_inLine +BABEL_OP2_202_27478_20140209_224101_outLine +BABEL_OP2_202_29633_20131009_175514_inLine +BABEL_OP2_202_29633_20131009_175514_outLine +BABEL_OP2_202_29663_20131208_035816_inLine +BABEL_OP2_202_29663_20131208_035816_outLine +BABEL_OP2_202_32287_20131207_203757_inLine +BABEL_OP2_202_32287_20131207_203757_outLine +BABEL_OP2_202_33273_20130219_205419_inLine +BABEL_OP2_202_33273_20130219_205419_outLine +BABEL_OP2_202_33273_20130219_224915_inLine +BABEL_OP2_202_33273_20130219_224915_outLine +BABEL_OP2_202_34197_20121228_201800_inLine +BABEL_OP2_202_34197_20121228_201800_outLine +BABEL_OP2_202_38588_20130228_211322_inLine +BABEL_OP2_202_38588_20130228_211322_outLine +BABEL_OP2_202_39893_20140115_023429_inLine +BABEL_OP2_202_39893_20140115_023429_outLine +BABEL_OP2_202_44309_20140220_184116_inLine +BABEL_OP2_202_44309_20140220_184116_outLine +BABEL_OP2_202_44478_20131011_041636_inLine +BABEL_OP2_202_44478_20131011_041636_outLine +BABEL_OP2_202_45459_20131012_022245_inLine +BABEL_OP2_202_45459_20131012_022245_outLine +BABEL_OP2_202_46169_20131128_183232_inLine +BABEL_OP2_202_46169_20131128_183232_outLine +BABEL_OP2_202_46169_20131128_184600_inLine +BABEL_OP2_202_46169_20131128_184600_outLine +BABEL_OP2_202_46681_20130109_191412_inLine +BABEL_OP2_202_46681_20130109_191412_outLine +BABEL_OP2_202_47405_20131215_233528_inLine +BABEL_OP2_202_47405_20131215_233528_outLine +BABEL_OP2_202_48844_20130108_190416_inLine +BABEL_OP2_202_48844_20130108_190416_outLine +BABEL_OP2_202_52265_20140123_235252_inLine +BABEL_OP2_202_52265_20140123_235252_outLine +BABEL_OP2_202_53957_20131031_012125_inLine +BABEL_OP2_202_53957_20131031_012125_outLine +BABEL_OP2_202_54046_20140121_184347_inLine +BABEL_OP2_202_54046_20140121_184347_outLine +BABEL_OP2_202_55042_20131217_033729_inLine +BABEL_OP2_202_55042_20131217_033729_outLine +BABEL_OP2_202_55106_20131215_030617_inLine +BABEL_OP2_202_55106_20131215_030617_outLine +BABEL_OP2_202_55902_20140121_230205_inLine +BABEL_OP2_202_55902_20140121_230205_outLine +BABEL_OP2_202_59091_20140130_225624_inLine +BABEL_OP2_202_59091_20140130_225624_outLine +BABEL_OP2_202_59549_20131003_203701_inLine +BABEL_OP2_202_59549_20131003_203701_outLine +BABEL_OP2_202_59549_20131003_204655_inLine +BABEL_OP2_202_59549_20131003_204655_outLine +BABEL_OP2_202_60650_20131126_234235_inLine +BABEL_OP2_202_60650_20131126_234235_outLine +BABEL_OP2_202_61440_20140128_015556_inLine +BABEL_OP2_202_61440_20140128_015556_outLine +BABEL_OP2_202_63084_20130801_014407_inLine +BABEL_OP2_202_63084_20130801_014407_outLine +BABEL_OP2_202_63084_20130801_015957_inLine +BABEL_OP2_202_63084_20130801_015957_outLine +BABEL_OP2_202_63336_20140129_004138_inLine +BABEL_OP2_202_63336_20140129_004138_outLine +BABEL_OP2_202_63484_20140128_234153_inLine +BABEL_OP2_202_63484_20140128_234153_outLine +BABEL_OP2_202_63604_20121231_193706_inLine +BABEL_OP2_202_63604_20121231_193706_outLine +BABEL_OP2_202_63787_20130108_202518_inLine +BABEL_OP2_202_63787_20130108_202518_outLine +BABEL_OP2_202_63787_20130108_203416_inLine +BABEL_OP2_202_63787_20130108_203416_outLine +BABEL_OP2_202_66177_20140201_213827_inLine +BABEL_OP2_202_66177_20140201_213827_outLine +BABEL_OP2_202_66822_20130219_222318_inLine +BABEL_OP2_202_66822_20130219_222318_outLine +BABEL_OP2_202_66822_20130219_225918_inLine +BABEL_OP2_202_66822_20130219_225918_outLine +BABEL_OP2_202_68384_20131031_003533_inLine +BABEL_OP2_202_68384_20131031_003533_outLine +BABEL_OP2_202_68924_20130924_231821_inLine +BABEL_OP2_202_68924_20130924_231821_outLine +BABEL_OP2_202_69964_20131012_170534_inLine +BABEL_OP2_202_69964_20131012_170534_outLine +BABEL_OP2_202_72040_20131002_213605_inLine +BABEL_OP2_202_72040_20131002_213605_outLine +BABEL_OP2_202_73258_20130215_190454_inLine +BABEL_OP2_202_73258_20130215_190454_outLine +BABEL_OP2_202_73301_20140226_185528_inLine +BABEL_OP2_202_73301_20140226_185528_outLine +BABEL_OP2_202_73819_20130911_163458_inLine +BABEL_OP2_202_73819_20130911_163458_outLine +BABEL_OP2_202_73819_20130927_003321_inLine +BABEL_OP2_202_73819_20130927_003321_outLine +BABEL_OP2_202_75993_20140115_210258_inLine +BABEL_OP2_202_75993_20140115_210258_outLine +BABEL_OP2_202_76756_20130417_204823_inLine +BABEL_OP2_202_76756_20130417_204823_outLine +BABEL_OP2_202_76756_20130417_210400_inLine +BABEL_OP2_202_76756_20130417_210400_outLine +BABEL_OP2_202_77990_20131007_063102_inLine +BABEL_OP2_202_77990_20131007_063102_outLine +BABEL_OP2_202_82637_20121227_193227_inLine +BABEL_OP2_202_82637_20121227_193227_outLine +BABEL_OP2_202_82637_20121227_205425_inLine +BABEL_OP2_202_82637_20121227_205425_outLine +BABEL_OP2_202_84177_20131208_021104_inLine +BABEL_OP2_202_84177_20131208_021104_outLine +BABEL_OP2_202_88260_20130227_194941_inLine +BABEL_OP2_202_88260_20130227_194941_outLine +BABEL_OP2_202_88661_20130801_192922_inLine +BABEL_OP2_202_88661_20130801_192922_outLine +BABEL_OP2_202_90080_20140319_222809_inLine +BABEL_OP2_202_90080_20140319_222809_outLine +BABEL_OP2_202_92740_20130923_235638_inLine +BABEL_OP2_202_92740_20130923_235638_outLine +BABEL_OP2_202_98311_20130109_191639_inLine +BABEL_OP2_202_98311_20130109_191639_outLine +BABEL_OP2_202_98311_20130109_195922_inLine +BABEL_OP2_202_98311_20130109_195922_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/eval.list b/egs/babel/s5d/conf/lists/202-swahili/eval.list new file mode 100644 index 00000000000..8fb4fe490bf --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/eval.list @@ -0,0 +1,963 @@ +BABEL_OP2_202_10019_20130928_235503_inLine +BABEL_OP2_202_10019_20130928_235503_outLine +BABEL_OP2_202_10416_20130215_183832_inLine +BABEL_OP2_202_10416_20130215_183832_outLine +BABEL_OP2_202_11681_20131005_155822_inLine +BABEL_OP2_202_11681_20131005_155822_outLine +BABEL_OP2_202_11723_20131130_201430_inLine +BABEL_OP2_202_11723_20131130_201430_outLine +BABEL_OP2_202_11797_20130104_222532_inLine +BABEL_OP2_202_11797_20130104_222532_outLine +BABEL_OP2_202_12220_20130312_022037_inLine +BABEL_OP2_202_12220_20130312_022037_outLine +BABEL_OP2_202_12321_20140210_015215_inLine +BABEL_OP2_202_12321_20140210_015215_outLine +BABEL_OP2_202_12606_20131010_030508_inLine +BABEL_OP2_202_12606_20131010_030508_outLine +BABEL_OP2_202_13040_20131005_180024_inLine +BABEL_OP2_202_13040_20131005_180024_outLine +BABEL_OP2_202_13909_20140207_075853_inLine +BABEL_OP2_202_13909_20140207_075853_outLine +BABEL_OP2_202_13929_20140205_042603_inLine +BABEL_OP2_202_13929_20140205_042603_outLine +BABEL_OP2_202_14137_20131219_015746_inLine +BABEL_OP2_202_14137_20131219_015746_outLine +BABEL_OP2_202_14141_20131009_061849_inLine +BABEL_OP2_202_14141_20131009_061849_outLine +BABEL_OP2_202_14179_20130926_175610_inLine +BABEL_OP2_202_14179_20130926_175610_outLine +BABEL_OP2_202_14228_20131017_195830_inLine +BABEL_OP2_202_14228_20131017_195830_outLine +BABEL_OP2_202_14229_20140208_071149_inLine +BABEL_OP2_202_14229_20140208_071149_outLine +BABEL_OP2_202_14440_20130503_203601_inLine +BABEL_OP2_202_14440_20130503_203601_outLine +BABEL_OP2_202_14440_20130503_204507_inLine +BABEL_OP2_202_14440_20130503_204507_outLine +BABEL_OP2_202_14537_20131016_202630_inLine +BABEL_OP2_202_14537_20131016_202630_outLine +BABEL_OP2_202_14725_20130104_004026_inLine +BABEL_OP2_202_14725_20130104_004026_outLine +BABEL_OP2_202_14807_20140207_040450_inLine +BABEL_OP2_202_14807_20140207_040450_outLine +BABEL_OP2_202_15902_20130108_191503_inLine +BABEL_OP2_202_15902_20130108_191503_outLine +BABEL_OP2_202_16056_20130105_232626_inLine +BABEL_OP2_202_16056_20130105_232626_outLine +BABEL_OP2_202_16056_20130105_235157_inLine +BABEL_OP2_202_16056_20130105_235157_outLine +BABEL_OP2_202_16407_20131203_231519_inLine +BABEL_OP2_202_16407_20131203_231519_outLine +BABEL_OP2_202_16467_20131101_192502_inLine +BABEL_OP2_202_16467_20131101_192502_outLine +BABEL_OP2_202_16475_20130222_200416_inLine +BABEL_OP2_202_16475_20130222_200416_outLine +BABEL_OP2_202_16787_20130220_000429_inLine +BABEL_OP2_202_16787_20130220_000429_outLine +BABEL_OP2_202_17280_20130312_211445_inLine +BABEL_OP2_202_17280_20130312_211445_outLine +BABEL_OP2_202_17440_20131018_012538_inLine +BABEL_OP2_202_17440_20131018_012538_outLine +BABEL_OP2_202_17511_20140205_051449_inLine +BABEL_OP2_202_17511_20140205_051449_outLine +BABEL_OP2_202_17751_20140207_220944_inLine +BABEL_OP2_202_17751_20140207_220944_outLine +BABEL_OP2_202_17881_20131010_011054_inLine +BABEL_OP2_202_17881_20131010_011054_outLine +BABEL_OP2_202_17923_20131004_055753_inLine +BABEL_OP2_202_17923_20131004_055753_outLine +BABEL_OP2_202_18291_20140207_215404_inLine +BABEL_OP2_202_18291_20140207_215404_outLine +BABEL_OP2_202_18380_20130213_000457_inLine +BABEL_OP2_202_18380_20130213_000457_outLine +BABEL_OP2_202_18731_20131128_043434_inLine +BABEL_OP2_202_18731_20131128_043434_outLine +BABEL_OP2_202_19440_20131129_002711_inLine +BABEL_OP2_202_19440_20131129_002711_outLine +BABEL_OP2_202_19444_20131128_200206_inLine +BABEL_OP2_202_19444_20131128_200206_outLine +BABEL_OP2_202_19461_20131129_203023_inLine +BABEL_OP2_202_19461_20131129_203023_outLine +BABEL_OP2_202_19545_20130927_190707_inLine +BABEL_OP2_202_19545_20130927_190707_outLine +BABEL_OP2_202_19621_20130930_034444_inLine +BABEL_OP2_202_19621_20130930_034444_outLine +BABEL_OP2_202_19663_20130220_221050_inLine +BABEL_OP2_202_19663_20130220_221050_outLine +BABEL_OP2_202_19699_20131127_214845_inLine +BABEL_OP2_202_19699_20131127_214845_outLine +BABEL_OP2_202_20738_20131029_183614_inLine +BABEL_OP2_202_20738_20131029_183614_outLine +BABEL_OP2_202_20896_20131220_001523_inLine +BABEL_OP2_202_20896_20131220_001523_outLine +BABEL_OP2_202_20985_20130920_011520_inLine +BABEL_OP2_202_20985_20130920_011520_outLine +BABEL_OP2_202_21029_20131004_003216_inLine +BABEL_OP2_202_21029_20131004_003216_outLine +BABEL_OP2_202_21029_20131004_003949_inLine +BABEL_OP2_202_21029_20131004_003949_outLine +BABEL_OP2_202_21393_20140209_001300_inLine +BABEL_OP2_202_21393_20140209_001300_outLine +BABEL_OP2_202_21435_20131010_024821_inLine +BABEL_OP2_202_21435_20131010_024821_outLine +BABEL_OP2_202_21794_20130219_010105_inLine +BABEL_OP2_202_21794_20130219_010105_outLine +BABEL_OP2_202_22021_20131203_222315_inLine +BABEL_OP2_202_22021_20131203_222315_outLine +BABEL_OP2_202_22021_20131203_223002_inLine +BABEL_OP2_202_22021_20131203_223002_outLine +BABEL_OP2_202_22321_20130104_190713_inLine +BABEL_OP2_202_22321_20130104_190713_outLine +BABEL_OP2_202_22591_20131212_031002_inLine +BABEL_OP2_202_22591_20131212_031002_outLine +BABEL_OP2_202_22641_20131011_032157_inLine +BABEL_OP2_202_22641_20131011_032157_outLine +BABEL_OP2_202_23260_20131011_024723_inLine +BABEL_OP2_202_23260_20131011_024723_outLine +BABEL_OP2_202_23355_20131128_005023_inLine +BABEL_OP2_202_23355_20131128_005023_outLine +BABEL_OP2_202_23505_20130107_235621_inLine +BABEL_OP2_202_23505_20130107_235621_outLine +BABEL_OP2_202_23700_20131202_222611_inLine +BABEL_OP2_202_23700_20131202_222611_outLine +BABEL_OP2_202_23731_20130930_020336_inLine +BABEL_OP2_202_23731_20130930_020336_outLine +BABEL_OP2_202_23893_20140207_234800_inLine +BABEL_OP2_202_23893_20140207_234800_outLine +BABEL_OP2_202_23983_20131012_000311_inLine +BABEL_OP2_202_23983_20131012_000311_outLine +BABEL_OP2_202_23995_20131101_185025_inLine +BABEL_OP2_202_23995_20131101_185025_outLine +BABEL_OP2_202_24044_20140108_215521_inLine +BABEL_OP2_202_24044_20140108_215521_outLine +BABEL_OP2_202_24044_20140108_220416_inLine +BABEL_OP2_202_24044_20140108_220416_outLine +BABEL_OP2_202_24221_20131120_203727_inLine +BABEL_OP2_202_24221_20131120_203727_outLine +BABEL_OP2_202_24231_20131012_020132_inLine +BABEL_OP2_202_24231_20131012_020132_outLine +BABEL_OP2_202_24270_20130111_201422_inLine +BABEL_OP2_202_24270_20130111_201422_outLine +BABEL_OP2_202_24323_20130221_203951_inLine +BABEL_OP2_202_24323_20130221_203951_outLine +BABEL_OP2_202_24924_20140207_235730_inLine +BABEL_OP2_202_24924_20140207_235730_outLine +BABEL_OP2_202_25012_20131201_002441_inLine +BABEL_OP2_202_25012_20131201_002441_outLine +BABEL_OP2_202_25961_20130103_204145_inLine +BABEL_OP2_202_25961_20130103_204145_outLine +BABEL_OP2_202_26869_20131216_035718_inLine +BABEL_OP2_202_26869_20131216_035718_outLine +BABEL_OP2_202_28190_20131101_213802_inLine +BABEL_OP2_202_28190_20131101_213802_outLine +BABEL_OP2_202_28422_20130924_010422_inLine +BABEL_OP2_202_28422_20130924_010422_outLine +BABEL_OP2_202_28775_20131004_012212_inLine +BABEL_OP2_202_28775_20131004_012212_outLine +BABEL_OP2_202_28945_20131218_230914_inLine +BABEL_OP2_202_28945_20131218_230914_outLine +BABEL_OP2_202_28945_20131218_232558_inLine +BABEL_OP2_202_28945_20131218_232558_outLine +BABEL_OP2_202_29072_20130930_024744_inLine +BABEL_OP2_202_29072_20130930_024744_outLine +BABEL_OP2_202_29135_20121228_185350_inLine +BABEL_OP2_202_29135_20121228_185350_outLine +BABEL_OP2_202_29208_20130220_203235_inLine +BABEL_OP2_202_29208_20130220_203235_outLine +BABEL_OP2_202_29352_20140209_224923_inLine +BABEL_OP2_202_29352_20140209_224923_outLine +BABEL_OP2_202_29416_20131031_232830_inLine +BABEL_OP2_202_29416_20131031_232830_outLine +BABEL_OP2_202_29643_20131016_010339_inLine +BABEL_OP2_202_29643_20131016_010339_outLine +BABEL_OP2_202_30013_20130930_190028_inLine +BABEL_OP2_202_30013_20130930_190028_outLine +BABEL_OP2_202_30058_20131009_163633_inLine +BABEL_OP2_202_30058_20131009_163633_outLine +BABEL_OP2_202_30180_20130311_225750_inLine +BABEL_OP2_202_30180_20130311_225750_outLine +BABEL_OP2_202_30250_20121228_195004_inLine +BABEL_OP2_202_30250_20121228_195004_outLine +BABEL_OP2_202_30250_20121228_195937_inLine +BABEL_OP2_202_30250_20121228_195937_outLine +BABEL_OP2_202_30497_20131010_013817_inLine +BABEL_OP2_202_30497_20131010_013817_outLine +BABEL_OP2_202_30720_20140204_222643_inLine +BABEL_OP2_202_30720_20140204_222643_outLine +BABEL_OP2_202_31074_20131211_235228_inLine +BABEL_OP2_202_31074_20131211_235228_outLine +BABEL_OP2_202_31484_20130912_200823_inLine +BABEL_OP2_202_31484_20130912_200823_outLine +BABEL_OP2_202_31668_20131128_224714_inLine +BABEL_OP2_202_31668_20131128_224714_outLine +BABEL_OP2_202_31992_20130108_181649_inLine +BABEL_OP2_202_31992_20130108_181649_outLine +BABEL_OP2_202_32244_20131012_215830_inLine +BABEL_OP2_202_32244_20131012_215830_outLine +BABEL_OP2_202_32708_20131002_201520_inLine +BABEL_OP2_202_32708_20131002_201520_outLine +BABEL_OP2_202_32727_20131218_221722_inLine +BABEL_OP2_202_32727_20131218_221722_outLine +BABEL_OP2_202_32832_20140108_220009_inLine +BABEL_OP2_202_32832_20140108_220009_outLine +BABEL_OP2_202_32861_20140109_212532_inLine +BABEL_OP2_202_32861_20140109_212532_outLine +BABEL_OP2_202_33111_20131009_060839_inLine +BABEL_OP2_202_33111_20131009_060839_outLine +BABEL_OP2_202_33355_20130107_235044_inLine +BABEL_OP2_202_33355_20130107_235044_outLine +BABEL_OP2_202_33672_20131005_004220_inLine +BABEL_OP2_202_33672_20131005_004220_outLine +BABEL_OP2_202_33933_20131129_215148_inLine +BABEL_OP2_202_33933_20131129_215148_outLine +BABEL_OP2_202_34208_20131203_234204_inLine +BABEL_OP2_202_34208_20131203_234204_outLine +BABEL_OP2_202_34328_20130212_200451_inLine +BABEL_OP2_202_34328_20130212_200451_outLine +BABEL_OP2_202_34336_20131001_231731_inLine +BABEL_OP2_202_34336_20131001_231731_outLine +BABEL_OP2_202_34477_20131001_021709_inLine +BABEL_OP2_202_34477_20131001_021709_outLine +BABEL_OP2_202_34679_20131004_003331_inLine +BABEL_OP2_202_34679_20131004_003331_outLine +BABEL_OP2_202_34899_20140222_024004_inLine +BABEL_OP2_202_34899_20140222_024004_outLine +BABEL_OP2_202_35420_20131129_013142_inLine +BABEL_OP2_202_35420_20131129_013142_outLine +BABEL_OP2_202_35583_20131220_024235_inLine +BABEL_OP2_202_35583_20131220_024235_outLine +BABEL_OP2_202_36039_20131009_024333_inLine +BABEL_OP2_202_36039_20131009_024333_outLine +BABEL_OP2_202_36059_20131009_055135_inLine +BABEL_OP2_202_36059_20131009_055135_outLine +BABEL_OP2_202_36341_20121228_171758_inLine +BABEL_OP2_202_36341_20121228_171758_outLine +BABEL_OP2_202_36505_20131029_215901_inLine +BABEL_OP2_202_36505_20131029_215901_outLine +BABEL_OP2_202_36632_20131216_000901_inLine +BABEL_OP2_202_36632_20131216_000901_outLine +BABEL_OP2_202_36900_20140114_232052_inLine +BABEL_OP2_202_36900_20140114_232052_outLine +BABEL_OP2_202_37007_20140115_001317_inLine +BABEL_OP2_202_37007_20140115_001317_outLine +BABEL_OP2_202_37228_20140114_192926_inLine +BABEL_OP2_202_37228_20140114_192926_outLine +BABEL_OP2_202_37594_20131130_000553_inLine +BABEL_OP2_202_37594_20131130_000553_outLine +BABEL_OP2_202_38125_20131012_003034_inLine +BABEL_OP2_202_38125_20131012_003034_outLine +BABEL_OP2_202_38340_20131002_224306_inLine +BABEL_OP2_202_38340_20131002_224306_outLine +BABEL_OP2_202_38431_20140115_013439_inLine +BABEL_OP2_202_38431_20140115_013439_outLine +BABEL_OP2_202_38664_20130315_010258_inLine +BABEL_OP2_202_38664_20130315_010258_outLine +BABEL_OP2_202_38741_20131003_014930_inLine +BABEL_OP2_202_38741_20131003_014930_outLine +BABEL_OP2_202_38878_20130422_220726_inLine +BABEL_OP2_202_38878_20130422_220726_outLine +BABEL_OP2_202_38979_20131129_213001_inLine +BABEL_OP2_202_38979_20131129_213001_outLine +BABEL_OP2_202_39059_20140114_231114_inLine +BABEL_OP2_202_39059_20140114_231114_outLine +BABEL_OP2_202_39099_20131012_024904_inLine +BABEL_OP2_202_39099_20131012_024904_outLine +BABEL_OP2_202_39277_20131127_183847_inLine +BABEL_OP2_202_39277_20131127_183847_outLine +BABEL_OP2_202_39848_20130218_212103_inLine +BABEL_OP2_202_39848_20130218_212103_outLine +BABEL_OP2_202_39927_20131220_231538_inLine +BABEL_OP2_202_39927_20131220_231538_outLine +BABEL_OP2_202_40092_20131216_000009_inLine +BABEL_OP2_202_40092_20131216_000009_outLine +BABEL_OP2_202_40196_20140115_000031_inLine +BABEL_OP2_202_40196_20140115_000031_outLine +BABEL_OP2_202_40713_20131002_202108_inLine +BABEL_OP2_202_40713_20131002_202108_outLine +BABEL_OP2_202_41097_20130425_211559_inLine +BABEL_OP2_202_41097_20130425_211559_outLine +BABEL_OP2_202_41100_20130109_000954_inLine +BABEL_OP2_202_41100_20130109_000954_outLine +BABEL_OP2_202_41109_20140114_212001_inLine +BABEL_OP2_202_41109_20140114_212001_outLine +BABEL_OP2_202_41272_20131011_014240_inLine +BABEL_OP2_202_41272_20131011_014240_outLine +BABEL_OP2_202_41400_20140221_225008_inLine +BABEL_OP2_202_41400_20140221_225008_outLine +BABEL_OP2_202_41682_20131128_174746_inLine +BABEL_OP2_202_41682_20131128_174746_outLine +BABEL_OP2_202_41685_20131128_221337_inLine +BABEL_OP2_202_41685_20131128_221337_outLine +BABEL_OP2_202_41692_20131013_044515_inLine +BABEL_OP2_202_41692_20131013_044515_outLine +BABEL_OP2_202_41741_20130108_182526_inLine +BABEL_OP2_202_41741_20130108_182526_outLine +BABEL_OP2_202_41920_20130103_233550_inLine +BABEL_OP2_202_41920_20130103_233550_outLine +BABEL_OP2_202_42497_20131003_225108_inLine +BABEL_OP2_202_42497_20131003_225108_outLine +BABEL_OP2_202_42848_20131015_223830_inLine +BABEL_OP2_202_42848_20131015_223830_outLine +BABEL_OP2_202_42883_20131016_211736_inLine +BABEL_OP2_202_42883_20131016_211736_outLine +BABEL_OP2_202_43074_20140227_192107_inLine +BABEL_OP2_202_43074_20140227_192107_outLine +BABEL_OP2_202_43368_20130930_024429_inLine +BABEL_OP2_202_43368_20130930_024429_outLine +BABEL_OP2_202_43388_20130215_192049_inLine +BABEL_OP2_202_43388_20130215_192049_outLine +BABEL_OP2_202_43588_20131011_193321_inLine +BABEL_OP2_202_43588_20131011_193321_outLine +BABEL_OP2_202_43789_20130213_194416_inLine +BABEL_OP2_202_43789_20130213_194416_outLine +BABEL_OP2_202_44114_20140221_202130_inLine +BABEL_OP2_202_44114_20140221_202130_outLine +BABEL_OP2_202_44619_20131003_023727_inLine +BABEL_OP2_202_44619_20131003_023727_outLine +BABEL_OP2_202_44678_20131128_010554_inLine +BABEL_OP2_202_44678_20131128_010554_outLine +BABEL_OP2_202_44681_20131218_213752_inLine +BABEL_OP2_202_44681_20131218_213752_outLine +BABEL_OP2_202_44681_20131218_214913_inLine +BABEL_OP2_202_44681_20131218_214913_outLine +BABEL_OP2_202_45121_20131014_040623_inLine +BABEL_OP2_202_45121_20131014_040623_outLine +BABEL_OP2_202_45374_20131217_005647_inLine +BABEL_OP2_202_45374_20131217_005647_outLine +BABEL_OP2_202_45560_20130105_195053_inLine +BABEL_OP2_202_45560_20130105_195053_outLine +BABEL_OP2_202_45642_20130109_010614_inLine +BABEL_OP2_202_45642_20130109_010614_outLine +BABEL_OP2_202_45697_20131029_204657_inLine +BABEL_OP2_202_45697_20131029_204657_outLine +BABEL_OP2_202_45699_20131202_203725_inLine +BABEL_OP2_202_45699_20131202_203725_outLine +BABEL_OP2_202_45770_20130105_212856_inLine +BABEL_OP2_202_45770_20130105_212856_outLine +BABEL_OP2_202_45777_20130930_215344_inLine +BABEL_OP2_202_45777_20130930_215344_outLine +BABEL_OP2_202_45777_20130930_220539_inLine +BABEL_OP2_202_45777_20130930_220539_outLine +BABEL_OP2_202_45851_20131011_210832_inLine +BABEL_OP2_202_45851_20131011_210832_outLine +BABEL_OP2_202_46008_20131017_180431_inLine +BABEL_OP2_202_46008_20131017_180431_outLine +BABEL_OP2_202_46333_20140225_002629_inLine +BABEL_OP2_202_46333_20140225_002629_outLine +BABEL_OP2_202_46535_20131202_194843_inLine +BABEL_OP2_202_46535_20131202_194843_outLine +BABEL_OP2_202_46712_20140130_195615_inLine +BABEL_OP2_202_46712_20140130_195615_outLine +BABEL_OP2_202_46905_20131130_194813_inLine +BABEL_OP2_202_46905_20131130_194813_outLine +BABEL_OP2_202_46974_20130729_181547_inLine +BABEL_OP2_202_46974_20130729_181547_outLine +BABEL_OP2_202_47215_20131005_012123_inLine +BABEL_OP2_202_47215_20131005_012123_outLine +BABEL_OP2_202_47283_20131003_211344_inLine +BABEL_OP2_202_47283_20131003_211344_outLine +BABEL_OP2_202_47959_20131004_035713_inLine +BABEL_OP2_202_47959_20131004_035713_outLine +BABEL_OP2_202_48016_20140220_204253_inLine +BABEL_OP2_202_48016_20140220_204253_outLine +BABEL_OP2_202_48024_20131202_232637_inLine +BABEL_OP2_202_48024_20131202_232637_outLine +BABEL_OP2_202_48610_20130107_203818_inLine +BABEL_OP2_202_48610_20130107_203818_outLine +BABEL_OP2_202_48663_20140222_012910_inLine +BABEL_OP2_202_48663_20140222_012910_outLine +BABEL_OP2_202_48758_20131009_051338_inLine +BABEL_OP2_202_48758_20131009_051338_outLine +BABEL_OP2_202_48789_20130212_210506_inLine +BABEL_OP2_202_48789_20130212_210506_outLine +BABEL_OP2_202_49197_20130222_010455_inLine +BABEL_OP2_202_49197_20130222_010455_outLine +BABEL_OP2_202_49637_20130103_203801_inLine +BABEL_OP2_202_49637_20130103_203801_outLine +BABEL_OP2_202_49767_20140221_192835_inLine +BABEL_OP2_202_49767_20140221_192835_outLine +BABEL_OP2_202_50090_20130425_175113_inLine +BABEL_OP2_202_50090_20130425_175113_outLine +BABEL_OP2_202_50175_20131010_020435_inLine +BABEL_OP2_202_50175_20131010_020435_outLine +BABEL_OP2_202_50565_20121228_195128_inLine +BABEL_OP2_202_50565_20121228_195128_outLine +BABEL_OP2_202_50601_20130911_001026_inLine +BABEL_OP2_202_50601_20130911_001026_outLine +BABEL_OP2_202_50630_20130926_021713_inLine +BABEL_OP2_202_50630_20130926_021713_outLine +BABEL_OP2_202_50745_20131010_001443_inLine +BABEL_OP2_202_50745_20131010_001443_outLine +BABEL_OP2_202_50779_20130911_004921_inLine +BABEL_OP2_202_50779_20130911_004921_outLine +BABEL_OP2_202_50958_20130219_215809_inLine +BABEL_OP2_202_50958_20130219_215809_outLine +BABEL_OP2_202_50962_20131002_203346_inLine +BABEL_OP2_202_50962_20131002_203346_outLine +BABEL_OP2_202_51414_20131012_225839_inLine +BABEL_OP2_202_51414_20131012_225839_outLine +BABEL_OP2_202_51530_20131012_011011_inLine +BABEL_OP2_202_51530_20131012_011011_outLine +BABEL_OP2_202_51701_20140123_205529_inLine +BABEL_OP2_202_51701_20140123_205529_outLine +BABEL_OP2_202_52058_20131128_233329_inLine +BABEL_OP2_202_52058_20131128_233329_outLine +BABEL_OP2_202_52070_20140124_231122_inLine +BABEL_OP2_202_52070_20140124_231122_outLine +BABEL_OP2_202_52222_20131126_183055_inLine +BABEL_OP2_202_52222_20131126_183055_outLine +BABEL_OP2_202_52301_20140122_233501_inLine +BABEL_OP2_202_52301_20140122_233501_outLine +BABEL_OP2_202_52447_20131014_001157_inLine +BABEL_OP2_202_52447_20131014_001157_outLine +BABEL_OP2_202_52483_20140123_011106_inLine +BABEL_OP2_202_52483_20140123_011106_outLine +BABEL_OP2_202_52614_20131011_162942_inLine +BABEL_OP2_202_52614_20131011_162942_outLine +BABEL_OP2_202_52717_20130107_190619_inLine +BABEL_OP2_202_52717_20130107_190619_outLine +BABEL_OP2_202_52725_20131009_155625_inLine +BABEL_OP2_202_52725_20131009_155625_outLine +BABEL_OP2_202_52804_20131006_224625_inLine +BABEL_OP2_202_52804_20131006_224625_outLine +BABEL_OP2_202_53068_20131130_195134_inLine +BABEL_OP2_202_53068_20131130_195134_outLine +BABEL_OP2_202_53206_20131129_004718_inLine +BABEL_OP2_202_53206_20131129_004718_outLine +BABEL_OP2_202_53419_20140123_230101_inLine +BABEL_OP2_202_53419_20140123_230101_outLine +BABEL_OP2_202_53441_20131207_225909_inLine +BABEL_OP2_202_53441_20131207_225909_outLine +BABEL_OP2_202_53492_20131010_185348_inLine +BABEL_OP2_202_53492_20131010_185348_outLine +BABEL_OP2_202_53665_20131010_234640_inLine +BABEL_OP2_202_53665_20131010_234640_outLine +BABEL_OP2_202_54160_20130102_205447_inLine +BABEL_OP2_202_54160_20130102_205447_outLine +BABEL_OP2_202_54162_20130318_213750_inLine +BABEL_OP2_202_54162_20130318_213750_outLine +BABEL_OP2_202_54594_20131129_200245_inLine +BABEL_OP2_202_54594_20131129_200245_outLine +BABEL_OP2_202_54735_20140125_031824_inLine +BABEL_OP2_202_54735_20140125_031824_outLine +BABEL_OP2_202_54923_20140125_010451_inLine +BABEL_OP2_202_54923_20140125_010451_outLine +BABEL_OP2_202_55013_20131011_155605_inLine +BABEL_OP2_202_55013_20131011_155605_outLine +BABEL_OP2_202_55742_20140115_203307_inLine +BABEL_OP2_202_55742_20140115_203307_outLine +BABEL_OP2_202_55818_20130108_192939_inLine +BABEL_OP2_202_55818_20130108_192939_outLine +BABEL_OP2_202_56198_20131003_013538_inLine +BABEL_OP2_202_56198_20131003_013538_outLine +BABEL_OP2_202_56213_20140122_225210_inLine +BABEL_OP2_202_56213_20140122_225210_outLine +BABEL_OP2_202_56370_20130104_200151_inLine +BABEL_OP2_202_56370_20130104_200151_outLine +BABEL_OP2_202_56523_20130222_213416_inLine +BABEL_OP2_202_56523_20130222_213416_outLine +BABEL_OP2_202_56677_20140121_210326_inLine +BABEL_OP2_202_56677_20140121_210326_outLine +BABEL_OP2_202_56743_20130225_194854_inLine +BABEL_OP2_202_56743_20130225_194854_outLine +BABEL_OP2_202_56826_20131126_175456_inLine +BABEL_OP2_202_56826_20131126_175456_outLine +BABEL_OP2_202_57067_20140125_030302_inLine +BABEL_OP2_202_57067_20140125_030302_outLine +BABEL_OP2_202_57093_20131001_005041_inLine +BABEL_OP2_202_57093_20131001_005041_outLine +BABEL_OP2_202_57548_20130928_000636_inLine +BABEL_OP2_202_57548_20130928_000636_outLine +BABEL_OP2_202_57609_20130110_214448_inLine +BABEL_OP2_202_57609_20130110_214448_outLine +BABEL_OP2_202_57650_20131031_222920_inLine +BABEL_OP2_202_57650_20131031_222920_outLine +BABEL_OP2_202_57650_20131031_224035_inLine +BABEL_OP2_202_57650_20131031_224035_outLine +BABEL_OP2_202_57678_20140130_211104_inLine +BABEL_OP2_202_57678_20140130_211104_outLine +BABEL_OP2_202_57919_20131204_003234_inLine +BABEL_OP2_202_57919_20131204_003234_outLine +BABEL_OP2_202_58061_20131128_202231_inLine +BABEL_OP2_202_58061_20131128_202231_outLine +BABEL_OP2_202_58815_20131029_230825_inLine +BABEL_OP2_202_58815_20131029_230825_outLine +BABEL_OP2_202_58850_20130222_005155_inLine +BABEL_OP2_202_58850_20130222_005155_outLine +BABEL_OP2_202_58926_20131005_000157_inLine +BABEL_OP2_202_58926_20131005_000157_outLine +BABEL_OP2_202_59039_20131130_232650_inLine +BABEL_OP2_202_59039_20131130_232650_outLine +BABEL_OP2_202_59307_20131009_070225_inLine +BABEL_OP2_202_59307_20131009_070225_outLine +BABEL_OP2_202_59898_20130103_222102_inLine +BABEL_OP2_202_59898_20130103_222102_outLine +BABEL_OP2_202_60026_20130105_231529_inLine +BABEL_OP2_202_60026_20130105_231529_outLine +BABEL_OP2_202_60026_20130105_232525_inLine +BABEL_OP2_202_60026_20130105_232525_outLine +BABEL_OP2_202_60115_20130924_002929_inLine +BABEL_OP2_202_60115_20130924_002929_outLine +BABEL_OP2_202_60310_20131030_231919_inLine +BABEL_OP2_202_60310_20131030_231919_outLine +BABEL_OP2_202_60498_20131012_205044_inLine +BABEL_OP2_202_60498_20131012_205044_outLine +BABEL_OP2_202_60538_20130107_185811_inLine +BABEL_OP2_202_60538_20130107_185811_outLine +BABEL_OP2_202_60626_20131003_025140_inLine +BABEL_OP2_202_60626_20131003_025140_outLine +BABEL_OP2_202_60661_20131004_193207_inLine +BABEL_OP2_202_60661_20131004_193207_outLine +BABEL_OP2_202_60836_20131006_231246_inLine +BABEL_OP2_202_60836_20131006_231246_outLine +BABEL_OP2_202_61348_20130423_213656_inLine +BABEL_OP2_202_61348_20130423_213656_outLine +BABEL_OP2_202_61831_20140129_223655_inLine +BABEL_OP2_202_61831_20140129_223655_outLine +BABEL_OP2_202_61963_20140130_004249_inLine +BABEL_OP2_202_61963_20140130_004249_outLine +BABEL_OP2_202_62014_20130422_215514_inLine +BABEL_OP2_202_62014_20130422_215514_outLine +BABEL_OP2_202_62155_20131010_030043_inLine +BABEL_OP2_202_62155_20131010_030043_outLine +BABEL_OP2_202_62177_20131101_224431_inLine +BABEL_OP2_202_62177_20131101_224431_outLine +BABEL_OP2_202_62200_20130221_201143_inLine +BABEL_OP2_202_62200_20130221_201143_outLine +BABEL_OP2_202_62289_20131012_021114_inLine +BABEL_OP2_202_62289_20131012_021114_outLine +BABEL_OP2_202_62434_20130104_004333_inLine +BABEL_OP2_202_62434_20130104_004333_outLine +BABEL_OP2_202_62434_20130104_005350_inLine +BABEL_OP2_202_62434_20130104_005350_outLine +BABEL_OP2_202_62545_20131127_195440_inLine +BABEL_OP2_202_62545_20131127_195440_outLine +BABEL_OP2_202_62734_20130930_165147_inLine +BABEL_OP2_202_62734_20130930_165147_outLine +BABEL_OP2_202_62835_20130212_190421_inLine +BABEL_OP2_202_62835_20130212_190421_outLine +BABEL_OP2_202_63481_20121229_212430_inLine +BABEL_OP2_202_63481_20121229_212430_outLine +BABEL_OP2_202_63511_20140202_013550_inLine +BABEL_OP2_202_63511_20140202_013550_outLine +BABEL_OP2_202_63730_20140128_213539_inLine +BABEL_OP2_202_63730_20140128_213539_outLine +BABEL_OP2_202_63906_20140127_191132_inLine +BABEL_OP2_202_63906_20140127_191132_outLine +BABEL_OP2_202_63938_20140129_203743_inLine +BABEL_OP2_202_63938_20140129_203743_outLine +BABEL_OP2_202_64350_20130109_214951_inLine +BABEL_OP2_202_64350_20130109_214951_outLine +BABEL_OP2_202_64350_20130109_234646_inLine +BABEL_OP2_202_64350_20130109_234646_outLine +BABEL_OP2_202_64350_20130110_000149_inLine +BABEL_OP2_202_64350_20130110_000149_outLine +BABEL_OP2_202_64638_20130923_221504_inLine +BABEL_OP2_202_64638_20130923_221504_outLine +BABEL_OP2_202_64768_20130930_231452_inLine +BABEL_OP2_202_64768_20130930_231452_outLine +BABEL_OP2_202_64902_20131010_043148_inLine +BABEL_OP2_202_64902_20131010_043148_outLine +BABEL_OP2_202_65298_20131031_213621_inLine +BABEL_OP2_202_65298_20131031_213621_outLine +BABEL_OP2_202_65477_20130219_211638_inLine +BABEL_OP2_202_65477_20130219_211638_outLine +BABEL_OP2_202_65639_20131128_205641_inLine +BABEL_OP2_202_65639_20131128_205641_outLine +BABEL_OP2_202_65640_20131010_034809_inLine +BABEL_OP2_202_65640_20131010_034809_outLine +BABEL_OP2_202_65882_20131004_204102_inLine +BABEL_OP2_202_65882_20131004_204102_outLine +BABEL_OP2_202_65882_20131004_205447_inLine +BABEL_OP2_202_65882_20131004_205447_outLine +BABEL_OP2_202_66026_20140226_195114_inLine +BABEL_OP2_202_66026_20140226_195114_outLine +BABEL_OP2_202_66472_20130214_204424_inLine +BABEL_OP2_202_66472_20130214_204424_outLine +BABEL_OP2_202_66519_20130930_020855_inLine +BABEL_OP2_202_66519_20130930_020855_outLine +BABEL_OP2_202_66837_20131030_220432_inLine +BABEL_OP2_202_66837_20131030_220432_outLine +BABEL_OP2_202_66959_20131018_194733_inLine +BABEL_OP2_202_66959_20131018_194733_outLine +BABEL_OP2_202_66967_20130103_220521_inLine +BABEL_OP2_202_66967_20130103_220521_outLine +BABEL_OP2_202_67085_20131016_193800_inLine +BABEL_OP2_202_67085_20131016_193800_outLine +BABEL_OP2_202_67152_20131129_224301_inLine +BABEL_OP2_202_67152_20131129_224301_outLine +BABEL_OP2_202_67373_20131004_205550_inLine +BABEL_OP2_202_67373_20131004_205550_outLine +BABEL_OP2_202_67389_20140131_211249_inLine +BABEL_OP2_202_67389_20140131_211249_outLine +BABEL_OP2_202_67794_20131003_192439_inLine +BABEL_OP2_202_67794_20131003_192439_outLine +BABEL_OP2_202_67842_20131003_222534_inLine +BABEL_OP2_202_67842_20131003_222534_outLine +BABEL_OP2_202_67999_20140201_200014_inLine +BABEL_OP2_202_67999_20140201_200014_outLine +BABEL_OP2_202_68059_20140125_192613_inLine +BABEL_OP2_202_68059_20140125_192613_outLine +BABEL_OP2_202_68182_20131031_193507_inLine +BABEL_OP2_202_68182_20131031_193507_outLine +BABEL_OP2_202_68306_20130729_224017_inLine +BABEL_OP2_202_68306_20130729_224017_outLine +BABEL_OP2_202_68627_20130219_230718_inLine +BABEL_OP2_202_68627_20130219_230718_outLine +BABEL_OP2_202_68908_20131130_010731_inLine +BABEL_OP2_202_68908_20131130_010731_outLine +BABEL_OP2_202_69090_20131127_230541_inLine +BABEL_OP2_202_69090_20131127_230541_outLine +BABEL_OP2_202_69107_20130927_174817_inLine +BABEL_OP2_202_69107_20130927_174817_outLine +BABEL_OP2_202_69574_20131005_172205_inLine +BABEL_OP2_202_69574_20131005_172205_outLine +BABEL_OP2_202_69633_20130801_191800_inLine +BABEL_OP2_202_69633_20130801_191800_outLine +BABEL_OP2_202_69885_20131011_031936_inLine +BABEL_OP2_202_69885_20131011_031936_outLine +BABEL_OP2_202_69972_20140129_230607_inLine +BABEL_OP2_202_69972_20140129_230607_outLine +BABEL_OP2_202_69992_20130108_193548_inLine +BABEL_OP2_202_69992_20130108_193548_outLine +BABEL_OP2_202_70257_20131130_202722_inLine +BABEL_OP2_202_70257_20131130_202722_outLine +BABEL_OP2_202_70526_20131012_045553_inLine +BABEL_OP2_202_70526_20131012_045553_outLine +BABEL_OP2_202_70716_20131012_032544_inLine +BABEL_OP2_202_70716_20131012_032544_outLine +BABEL_OP2_202_71038_20140306_165543_inLine +BABEL_OP2_202_71038_20140306_165543_outLine +BABEL_OP2_202_71047_20140303_233000_inLine +BABEL_OP2_202_71047_20140303_233000_outLine +BABEL_OP2_202_71189_20131010_061651_inLine +BABEL_OP2_202_71189_20131010_061651_outLine +BABEL_OP2_202_71282_20131030_163454_inLine +BABEL_OP2_202_71282_20131030_163454_outLine +BABEL_OP2_202_71419_20131130_200448_inLine +BABEL_OP2_202_71419_20131130_200448_outLine +BABEL_OP2_202_71460_20131218_192638_inLine +BABEL_OP2_202_71460_20131218_192638_outLine +BABEL_OP2_202_71559_20140311_230424_inLine +BABEL_OP2_202_71559_20140311_230424_outLine +BABEL_OP2_202_71704_20130109_185345_inLine +BABEL_OP2_202_71704_20130109_185345_outLine +BABEL_OP2_202_71780_20131003_034729_inLine +BABEL_OP2_202_71780_20131003_034729_outLine +BABEL_OP2_202_72654_20130929_175728_inLine +BABEL_OP2_202_72654_20130929_175728_outLine +BABEL_OP2_202_72733_20131018_230438_inLine +BABEL_OP2_202_72733_20131018_230438_outLine +BABEL_OP2_202_73042_20130109_205002_inLine +BABEL_OP2_202_73042_20130109_205002_outLine +BABEL_OP2_202_73072_20130105_235040_inLine +BABEL_OP2_202_73072_20130105_235040_outLine +BABEL_OP2_202_73485_20131011_183811_inLine +BABEL_OP2_202_73485_20131011_183811_outLine +BABEL_OP2_202_73485_20131011_184857_inLine +BABEL_OP2_202_73485_20131011_184857_outLine +BABEL_OP2_202_73757_20130319_022121_inLine +BABEL_OP2_202_73757_20130319_022121_outLine +BABEL_OP2_202_73964_20131011_010642_inLine +BABEL_OP2_202_73964_20131011_010642_outLine +BABEL_OP2_202_74111_20131018_223020_inLine +BABEL_OP2_202_74111_20131018_223020_outLine +BABEL_OP2_202_74455_20131201_010424_inLine +BABEL_OP2_202_74455_20131201_010424_outLine +BABEL_OP2_202_74641_20130927_171309_inLine +BABEL_OP2_202_74641_20130927_171309_outLine +BABEL_OP2_202_74728_20131011_175203_inLine +BABEL_OP2_202_74728_20131011_175203_outLine +BABEL_OP2_202_74886_20130104_222216_inLine +BABEL_OP2_202_74886_20130104_222216_outLine +BABEL_OP2_202_75365_20131017_020033_inLine +BABEL_OP2_202_75365_20131017_020033_outLine +BABEL_OP2_202_75465_20140227_020909_inLine +BABEL_OP2_202_75465_20140227_020909_outLine +BABEL_OP2_202_75869_20131010_054546_inLine +BABEL_OP2_202_75869_20131010_054546_outLine +BABEL_OP2_202_75981_20131017_182656_inLine +BABEL_OP2_202_75981_20131017_182656_outLine +BABEL_OP2_202_76155_20130214_225045_inLine +BABEL_OP2_202_76155_20130214_225045_outLine +BABEL_OP2_202_76155_20130214_231141_inLine +BABEL_OP2_202_76155_20130214_231141_outLine +BABEL_OP2_202_76155_20130214_233751_inLine +BABEL_OP2_202_76155_20130214_233751_outLine +BABEL_OP2_202_76218_20130215_211824_inLine +BABEL_OP2_202_76218_20130215_211824_outLine +BABEL_OP2_202_76372_20131010_032300_inLine +BABEL_OP2_202_76372_20131010_032300_outLine +BABEL_OP2_202_76773_20131004_211703_inLine +BABEL_OP2_202_76773_20131004_211703_outLine +BABEL_OP2_202_77139_20121228_190704_inLine +BABEL_OP2_202_77139_20121228_190704_outLine +BABEL_OP2_202_77730_20130108_005804_inLine +BABEL_OP2_202_77730_20130108_005804_outLine +BABEL_OP2_202_78116_20130730_032152_inLine +BABEL_OP2_202_78116_20130730_032152_outLine +BABEL_OP2_202_78161_20131128_013256_inLine +BABEL_OP2_202_78161_20131128_013256_outLine +BABEL_OP2_202_78254_20140315_200641_inLine +BABEL_OP2_202_79131_20131011_031533_inLine +BABEL_OP2_202_79131_20131011_031533_outLine +BABEL_OP2_202_79167_20130801_173136_inLine +BABEL_OP2_202_79167_20130801_173136_outLine +BABEL_OP2_202_79505_20140304_011515_inLine +BABEL_OP2_202_79505_20140304_011515_outLine +BABEL_OP2_202_79590_20130214_233631_inLine +BABEL_OP2_202_79590_20130214_233631_outLine +BABEL_OP2_202_79820_20131002_224612_inLine +BABEL_OP2_202_79820_20131002_224612_outLine +BABEL_OP2_202_79858_20131007_202121_inLine +BABEL_OP2_202_79858_20131007_202121_outLine +BABEL_OP2_202_80241_20131208_061751_inLine +BABEL_OP2_202_80241_20131208_061751_outLine +BABEL_OP2_202_80577_20131101_002029_inLine +BABEL_OP2_202_80577_20131101_002029_outLine +BABEL_OP2_202_80721_20131018_215413_inLine +BABEL_OP2_202_80721_20131018_215413_outLine +BABEL_OP2_202_81424_20130731_174939_inLine +BABEL_OP2_202_81424_20130731_174939_outLine +BABEL_OP2_202_81427_20130930_033601_inLine +BABEL_OP2_202_81427_20130930_033601_outLine +BABEL_OP2_202_81427_20130930_034540_inLine +BABEL_OP2_202_81427_20130930_034540_outLine +BABEL_OP2_202_81581_20131130_234413_inLine +BABEL_OP2_202_81581_20131130_234413_outLine +BABEL_OP2_202_81674_20131129_201042_inLine +BABEL_OP2_202_81674_20131129_201042_outLine +BABEL_OP2_202_81810_20130731_202723_inLine +BABEL_OP2_202_81810_20130731_202723_outLine +BABEL_OP2_202_81854_20131016_235937_inLine +BABEL_OP2_202_81854_20131016_235937_outLine +BABEL_OP2_202_82089_20130213_201744_inLine +BABEL_OP2_202_82089_20130213_201744_outLine +BABEL_OP2_202_82140_20130411_203406_inLine +BABEL_OP2_202_82140_20130411_203406_outLine +BABEL_OP2_202_82145_20131009_152735_inLine +BABEL_OP2_202_82145_20131009_152735_outLine +BABEL_OP2_202_82145_20131010_055122_inLine +BABEL_OP2_202_82145_20131010_055122_outLine +BABEL_OP2_202_82863_20130213_003624_inLine +BABEL_OP2_202_82863_20130213_003624_outLine +BABEL_OP2_202_82979_20131002_205506_inLine +BABEL_OP2_202_82979_20131002_205506_outLine +BABEL_OP2_202_83062_20131129_191922_inLine +BABEL_OP2_202_83062_20131129_191922_outLine +BABEL_OP2_202_83935_20130801_192224_inLine +BABEL_OP2_202_83935_20130801_192224_outLine +BABEL_OP2_202_83935_20130801_194402_inLine +BABEL_OP2_202_83935_20130801_194402_outLine +BABEL_OP2_202_84061_20130929_235409_inLine +BABEL_OP2_202_84061_20130929_235409_outLine +BABEL_OP2_202_84079_20131208_050702_inLine +BABEL_OP2_202_84079_20131208_050702_outLine +BABEL_OP2_202_84125_20121222_184258_inLine +BABEL_OP2_202_84125_20121222_184258_outLine +BABEL_OP2_202_84125_20121222_185218_inLine +BABEL_OP2_202_84125_20121222_185218_outLine +BABEL_OP2_202_84327_20130730_193322_inLine +BABEL_OP2_202_84327_20130730_193322_outLine +BABEL_OP2_202_84605_20131003_053508_inLine +BABEL_OP2_202_84605_20131003_053508_outLine +BABEL_OP2_202_84737_20131031_211648_inLine +BABEL_OP2_202_84737_20131031_211648_outLine +BABEL_OP2_202_84815_20131018_211832_inLine +BABEL_OP2_202_84815_20131018_211832_outLine +BABEL_OP2_202_84823_20131031_020506_inLine +BABEL_OP2_202_84823_20131031_020506_outLine +BABEL_OP2_202_85048_20130911_014859_inLine +BABEL_OP2_202_85048_20130911_014859_outLine +BABEL_OP2_202_85179_20131101_192951_inLine +BABEL_OP2_202_85179_20131101_192951_outLine +BABEL_OP2_202_85248_20131030_022406_inLine +BABEL_OP2_202_85248_20131030_022406_outLine +BABEL_OP2_202_85322_20130108_190627_inLine +BABEL_OP2_202_85322_20130108_190627_outLine +BABEL_OP2_202_85322_20130108_191905_inLine +BABEL_OP2_202_85322_20130108_191905_outLine +BABEL_OP2_202_85325_20131011_181734_inLine +BABEL_OP2_202_85325_20131011_181734_outLine +BABEL_OP2_202_85439_20131012_024821_inLine +BABEL_OP2_202_85439_20131012_024821_outLine +BABEL_OP2_202_86467_20121231_205911_inLine +BABEL_OP2_202_86467_20121231_205911_outLine +BABEL_OP2_202_86472_20130803_213443_inLine +BABEL_OP2_202_86472_20130803_213443_outLine +BABEL_OP2_202_86826_20131015_204931_inLine +BABEL_OP2_202_86826_20131015_204931_outLine +BABEL_OP2_202_86830_20131031_221935_inLine +BABEL_OP2_202_86830_20131031_221935_outLine +BABEL_OP2_202_87074_20140114_001320_inLine +BABEL_OP2_202_87074_20140114_001320_outLine +BABEL_OP2_202_87470_20130225_202639_inLine +BABEL_OP2_202_87470_20130225_202639_outLine +BABEL_OP2_202_87545_20131012_025318_inLine +BABEL_OP2_202_87545_20131012_025318_outLine +BABEL_OP2_202_87866_20131215_203616_inLine +BABEL_OP2_202_87866_20131215_203616_outLine +BABEL_OP2_202_87871_20131031_222231_inLine +BABEL_OP2_202_87871_20131031_222231_outLine +BABEL_OP2_202_87921_20131017_204018_inLine +BABEL_OP2_202_87921_20131017_204018_outLine +BABEL_OP2_202_88372_20131012_023925_inLine +BABEL_OP2_202_88372_20131012_023925_outLine +BABEL_OP2_202_88550_20131017_004344_inLine +BABEL_OP2_202_88550_20131017_004344_outLine +BABEL_OP2_202_88550_20131017_005456_inLine +BABEL_OP2_202_88550_20131017_005456_outLine +BABEL_OP2_202_88601_20130212_205048_inLine +BABEL_OP2_202_88601_20130212_205048_outLine +BABEL_OP2_202_88873_20131004_003616_inLine +BABEL_OP2_202_88873_20131004_003616_outLine +BABEL_OP2_202_89226_20131203_030320_inLine +BABEL_OP2_202_89226_20131203_030320_outLine +BABEL_OP2_202_89560_20131018_222518_inLine +BABEL_OP2_202_89560_20131018_222518_outLine +BABEL_OP2_202_89650_20131202_204623_inLine +BABEL_OP2_202_89650_20131202_204623_outLine +BABEL_OP2_202_89718_20131203_002623_inLine +BABEL_OP2_202_89718_20131203_002623_outLine +BABEL_OP2_202_89888_20130109_184456_inLine +BABEL_OP2_202_89888_20130109_184456_outLine +BABEL_OP2_202_90935_20130226_232117_inLine +BABEL_OP2_202_90935_20130226_232117_outLine +BABEL_OP2_202_91189_20131017_013603_inLine +BABEL_OP2_202_91189_20131017_013603_outLine +BABEL_OP2_202_91336_20130318_212106_inLine +BABEL_OP2_202_91336_20130318_212106_outLine +BABEL_OP2_202_91411_20131130_013112_inLine +BABEL_OP2_202_91411_20131130_013112_outLine +BABEL_OP2_202_91581_20131018_012025_inLine +BABEL_OP2_202_91581_20131018_012025_outLine +BABEL_OP2_202_91808_20131204_000439_inLine +BABEL_OP2_202_91808_20131204_000439_outLine +BABEL_OP2_202_91930_20131009_204054_inLine +BABEL_OP2_202_91930_20131009_204054_outLine +BABEL_OP2_202_91971_20131203_013031_inLine +BABEL_OP2_202_91971_20131203_013031_outLine +BABEL_OP2_202_91977_20130803_020205_inLine +BABEL_OP2_202_91977_20130803_020205_outLine +BABEL_OP2_202_92096_20131010_010207_inLine +BABEL_OP2_202_92096_20131010_010207_outLine +BABEL_OP2_202_92356_20140319_233703_inLine +BABEL_OP2_202_92356_20140319_233703_outLine +BABEL_OP2_202_92459_20131001_210517_inLine +BABEL_OP2_202_92459_20131001_210517_outLine +BABEL_OP2_202_92509_20121228_220632_inLine +BABEL_OP2_202_92509_20121228_220632_outLine +BABEL_OP2_202_92698_20130930_170131_inLine +BABEL_OP2_202_92698_20130930_170131_outLine +BABEL_OP2_202_92698_20130930_171329_inLine +BABEL_OP2_202_92698_20130930_171329_outLine +BABEL_OP2_202_92757_20131012_012455_inLine +BABEL_OP2_202_92757_20131012_012455_outLine +BABEL_OP2_202_92809_20131010_013656_inLine +BABEL_OP2_202_92809_20131010_013656_outLine +BABEL_OP2_202_92941_20131001_030226_inLine +BABEL_OP2_202_92941_20131001_030226_outLine +BABEL_OP2_202_93411_20130411_203410_inLine +BABEL_OP2_202_93411_20130411_203410_outLine +BABEL_OP2_202_93475_20140115_204518_inLine +BABEL_OP2_202_93475_20140115_204518_outLine +BABEL_OP2_202_93515_20131012_015923_inLine +BABEL_OP2_202_93515_20131012_015923_outLine +BABEL_OP2_202_93861_20130417_181331_inLine +BABEL_OP2_202_93861_20130417_181331_outLine +BABEL_OP2_202_93861_20130417_184517_inLine +BABEL_OP2_202_93861_20130417_184517_outLine +BABEL_OP2_202_93946_20131018_213959_inLine +BABEL_OP2_202_93946_20131018_213959_outLine +BABEL_OP2_202_93964_20130411_173113_inLine +BABEL_OP2_202_93964_20130411_173113_outLine +BABEL_OP2_202_93964_20130411_174717_inLine +BABEL_OP2_202_93964_20130411_174717_outLine +BABEL_OP2_202_94044_20131127_234911_inLine +BABEL_OP2_202_94044_20131127_234911_outLine +BABEL_OP2_202_94166_20131101_013342_inLine +BABEL_OP2_202_94166_20131101_013342_outLine +BABEL_OP2_202_94212_20131129_213734_inLine +BABEL_OP2_202_94212_20131129_213734_outLine +BABEL_OP2_202_94442_20131014_165222_inLine +BABEL_OP2_202_94442_20131014_165222_outLine +BABEL_OP2_202_94465_20131018_014837_inLine +BABEL_OP2_202_94465_20131018_014837_outLine +BABEL_OP2_202_94487_20131011_165627_inLine +BABEL_OP2_202_94487_20131011_165627_outLine +BABEL_OP2_202_94713_20131130_020453_inLine +BABEL_OP2_202_94713_20131130_020453_outLine +BABEL_OP2_202_94745_20130807_024052_inLine +BABEL_OP2_202_94745_20130807_024052_outLine +BABEL_OP2_202_95269_20130228_201037_inLine +BABEL_OP2_202_95269_20130228_201037_outLine +BABEL_OP2_202_95583_20130104_184957_inLine +BABEL_OP2_202_95583_20130104_184957_outLine +BABEL_OP2_202_95598_20130207_212051_inLine +BABEL_OP2_202_95598_20130207_212051_outLine +BABEL_OP2_202_95677_20131216_002743_inLine +BABEL_OP2_202_95677_20131216_002743_outLine +BABEL_OP2_202_95942_20131009_231612_inLine +BABEL_OP2_202_95942_20131009_231612_outLine +BABEL_OP2_202_95966_20130216_005201_inLine +BABEL_OP2_202_95966_20130216_005201_outLine +BABEL_OP2_202_95966_20130216_010600_inLine +BABEL_OP2_202_95966_20130216_010600_outLine +BABEL_OP2_202_96041_20140317_233707_inLine +BABEL_OP2_202_96041_20140317_233707_outLine +BABEL_OP2_202_96059_20131012_001057_inLine +BABEL_OP2_202_96059_20131012_001057_outLine +BABEL_OP2_202_96077_20131215_014408_inLine +BABEL_OP2_202_96077_20131215_014408_outLine +BABEL_OP2_202_96158_20131127_202846_inLine +BABEL_OP2_202_96158_20131127_202846_outLine +BABEL_OP2_202_96190_20140114_004611_inLine +BABEL_OP2_202_96190_20140114_004611_outLine +BABEL_OP2_202_96205_20130213_183412_inLine +BABEL_OP2_202_96205_20130213_183412_outLine +BABEL_OP2_202_96405_20131002_203007_inLine +BABEL_OP2_202_96405_20131002_203007_outLine +BABEL_OP2_202_96446_20130103_231919_inLine +BABEL_OP2_202_96446_20130103_231919_outLine +BABEL_OP2_202_96446_20130103_232611_inLine +BABEL_OP2_202_96446_20130103_232611_outLine +BABEL_OP2_202_96934_20131001_205011_inLine +BABEL_OP2_202_96934_20131001_205011_outLine +BABEL_OP2_202_97097_20131010_022340_inLine +BABEL_OP2_202_97097_20131010_022340_outLine +BABEL_OP2_202_97448_20131202_225423_inLine +BABEL_OP2_202_97448_20131202_225423_outLine +BABEL_OP2_202_97896_20130222_200148_inLine +BABEL_OP2_202_97896_20130222_200148_outLine +BABEL_OP2_202_97896_20130222_201339_inLine +BABEL_OP2_202_97896_20130222_201339_outLine +BABEL_OP2_202_97988_20131017_202448_inLine +BABEL_OP2_202_97988_20131017_202448_outLine +BABEL_OP2_202_98165_20130928_235834_inLine +BABEL_OP2_202_98165_20130928_235834_outLine +BABEL_OP2_202_98165_20130929_001916_inLine +BABEL_OP2_202_98165_20130929_001916_outLine +BABEL_OP2_202_98255_20131130_002114_inLine +BABEL_OP2_202_98255_20131130_002114_outLine +BABEL_OP2_202_98365_20130912_012649_inLine +BABEL_OP2_202_98365_20130912_012649_outLine +BABEL_OP2_202_98365_20130912_013735_inLine +BABEL_OP2_202_98365_20130912_013735_outLine +BABEL_OP2_202_98489_20140113_195524_inLine +BABEL_OP2_202_98489_20140113_195524_outLine +BABEL_OP2_202_98506_20131009_055751_inLine +BABEL_OP2_202_98506_20131009_055751_outLine +BABEL_OP2_202_98565_20131204_010715_inLine +BABEL_OP2_202_98565_20131204_010715_outLine +BABEL_OP2_202_98888_20130214_225058_inLine +BABEL_OP2_202_98888_20130214_225058_outLine +BABEL_OP2_202_99202_20130111_190008_inLine +BABEL_OP2_202_99202_20130111_190008_outLine +BABEL_OP2_202_99487_20130109_013911_inLine +BABEL_OP2_202_99487_20130109_013911_outLine +BABEL_OP2_202_99920_20130109_211943_inLine +BABEL_OP2_202_99920_20130109_211943_outLine +BABEL_OP2_202_99952_20131016_024323_inLine +BABEL_OP2_202_99952_20131016_024323_outLine +BABEL_OP2_202_99975_20131127_204148_inLine +BABEL_OP2_202_99975_20131127_204148_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/evalpart1.list b/egs/babel/s5d/conf/lists/202-swahili/evalpart1.list new file mode 100644 index 00000000000..c01647b6d12 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/evalpart1.list @@ -0,0 +1,196 @@ +BABEL_OP2_202_10019_20130928_235503_inLine +BABEL_OP2_202_10019_20130928_235503_outLine +BABEL_OP2_202_10416_20130215_183832_inLine +BABEL_OP2_202_10416_20130215_183832_outLine +BABEL_OP2_202_12321_20140210_015215_inLine +BABEL_OP2_202_12321_20140210_015215_outLine +BABEL_OP2_202_13040_20131005_180024_inLine +BABEL_OP2_202_13040_20131005_180024_outLine +BABEL_OP2_202_13929_20140205_042603_inLine +BABEL_OP2_202_13929_20140205_042603_outLine +BABEL_OP2_202_14537_20131016_202630_inLine +BABEL_OP2_202_14537_20131016_202630_outLine +BABEL_OP2_202_16407_20131203_231519_inLine +BABEL_OP2_202_16407_20131203_231519_outLine +BABEL_OP2_202_16787_20130220_000429_inLine +BABEL_OP2_202_16787_20130220_000429_outLine +BABEL_OP2_202_17511_20140205_051449_inLine +BABEL_OP2_202_17511_20140205_051449_outLine +BABEL_OP2_202_19545_20130927_190707_inLine +BABEL_OP2_202_19545_20130927_190707_outLine +BABEL_OP2_202_20738_20131029_183614_inLine +BABEL_OP2_202_20738_20131029_183614_outLine +BABEL_OP2_202_20896_20131220_001523_inLine +BABEL_OP2_202_20896_20131220_001523_outLine +BABEL_OP2_202_21794_20130219_010105_inLine +BABEL_OP2_202_21794_20130219_010105_outLine +BABEL_OP2_202_22641_20131011_032157_inLine +BABEL_OP2_202_22641_20131011_032157_outLine +BABEL_OP2_202_23355_20131128_005023_inLine +BABEL_OP2_202_23355_20131128_005023_outLine +BABEL_OP2_202_23731_20130930_020336_inLine +BABEL_OP2_202_23731_20130930_020336_outLine +BABEL_OP2_202_24924_20140207_235730_inLine +BABEL_OP2_202_24924_20140207_235730_outLine +BABEL_OP2_202_26869_20131216_035718_inLine +BABEL_OP2_202_26869_20131216_035718_outLine +BABEL_OP2_202_28422_20130924_010422_inLine +BABEL_OP2_202_28422_20130924_010422_outLine +BABEL_OP2_202_30250_20121228_195004_inLine +BABEL_OP2_202_30250_20121228_195004_outLine +BABEL_OP2_202_30250_20121228_195937_inLine +BABEL_OP2_202_30250_20121228_195937_outLine +BABEL_OP2_202_30497_20131010_013817_inLine +BABEL_OP2_202_30497_20131010_013817_outLine +BABEL_OP2_202_31484_20130912_200823_inLine +BABEL_OP2_202_31484_20130912_200823_outLine +BABEL_OP2_202_32832_20140108_220009_inLine +BABEL_OP2_202_32832_20140108_220009_outLine +BABEL_OP2_202_36505_20131029_215901_inLine +BABEL_OP2_202_36505_20131029_215901_outLine +BABEL_OP2_202_38664_20130315_010258_inLine +BABEL_OP2_202_38664_20130315_010258_outLine +BABEL_OP2_202_38741_20131003_014930_inLine +BABEL_OP2_202_38741_20131003_014930_outLine +BABEL_OP2_202_39277_20131127_183847_inLine +BABEL_OP2_202_39277_20131127_183847_outLine +BABEL_OP2_202_41109_20140114_212001_inLine +BABEL_OP2_202_41109_20140114_212001_outLine +BABEL_OP2_202_44678_20131128_010554_inLine +BABEL_OP2_202_44678_20131128_010554_outLine +BABEL_OP2_202_44681_20131218_213752_inLine +BABEL_OP2_202_44681_20131218_213752_outLine +BABEL_OP2_202_44681_20131218_214913_inLine +BABEL_OP2_202_44681_20131218_214913_outLine +BABEL_OP2_202_45777_20130930_215344_inLine +BABEL_OP2_202_45777_20130930_215344_outLine +BABEL_OP2_202_45777_20130930_220539_inLine +BABEL_OP2_202_45777_20130930_220539_outLine +BABEL_OP2_202_46333_20140225_002629_inLine +BABEL_OP2_202_46333_20140225_002629_outLine +BABEL_OP2_202_46974_20130729_181547_inLine +BABEL_OP2_202_46974_20130729_181547_outLine +BABEL_OP2_202_47959_20131004_035713_inLine +BABEL_OP2_202_47959_20131004_035713_outLine +BABEL_OP2_202_48016_20140220_204253_inLine +BABEL_OP2_202_48016_20140220_204253_outLine +BABEL_OP2_202_48758_20131009_051338_inLine +BABEL_OP2_202_48758_20131009_051338_outLine +BABEL_OP2_202_49637_20130103_203801_inLine +BABEL_OP2_202_49637_20130103_203801_outLine +BABEL_OP2_202_50630_20130926_021713_inLine +BABEL_OP2_202_50630_20130926_021713_outLine +BABEL_OP2_202_50958_20130219_215809_inLine +BABEL_OP2_202_50958_20130219_215809_outLine +BABEL_OP2_202_50962_20131002_203346_inLine +BABEL_OP2_202_50962_20131002_203346_outLine +BABEL_OP2_202_51414_20131012_225839_inLine +BABEL_OP2_202_51414_20131012_225839_outLine +BABEL_OP2_202_52070_20140124_231122_inLine +BABEL_OP2_202_52070_20140124_231122_outLine +BABEL_OP2_202_52222_20131126_183055_inLine +BABEL_OP2_202_52222_20131126_183055_outLine +BABEL_OP2_202_52447_20131014_001157_inLine +BABEL_OP2_202_52447_20131014_001157_outLine +BABEL_OP2_202_52614_20131011_162942_inLine +BABEL_OP2_202_52614_20131011_162942_outLine +BABEL_OP2_202_53206_20131129_004718_inLine +BABEL_OP2_202_53206_20131129_004718_outLine +BABEL_OP2_202_55742_20140115_203307_inLine +BABEL_OP2_202_55742_20140115_203307_outLine +BABEL_OP2_202_56523_20130222_213416_inLine +BABEL_OP2_202_56523_20130222_213416_outLine +BABEL_OP2_202_57650_20131031_222920_inLine +BABEL_OP2_202_57650_20131031_222920_outLine +BABEL_OP2_202_57650_20131031_224035_inLine +BABEL_OP2_202_57650_20131031_224035_outLine +BABEL_OP2_202_60626_20131003_025140_inLine +BABEL_OP2_202_60626_20131003_025140_outLine +BABEL_OP2_202_62155_20131010_030043_inLine +BABEL_OP2_202_62155_20131010_030043_outLine +BABEL_OP2_202_62434_20130104_004333_inLine +BABEL_OP2_202_62434_20130104_004333_outLine +BABEL_OP2_202_62434_20130104_005350_inLine +BABEL_OP2_202_62434_20130104_005350_outLine +BABEL_OP2_202_62835_20130212_190421_inLine +BABEL_OP2_202_62835_20130212_190421_outLine +BABEL_OP2_202_63481_20121229_212430_inLine +BABEL_OP2_202_63481_20121229_212430_outLine +BABEL_OP2_202_63511_20140202_013550_inLine +BABEL_OP2_202_63511_20140202_013550_outLine +BABEL_OP2_202_64638_20130923_221504_inLine +BABEL_OP2_202_64638_20130923_221504_outLine +BABEL_OP2_202_66959_20131018_194733_inLine +BABEL_OP2_202_66959_20131018_194733_outLine +BABEL_OP2_202_66967_20130103_220521_inLine +BABEL_OP2_202_66967_20130103_220521_outLine +BABEL_OP2_202_67373_20131004_205550_inLine +BABEL_OP2_202_67373_20131004_205550_outLine +BABEL_OP2_202_67794_20131003_192439_inLine +BABEL_OP2_202_67794_20131003_192439_outLine +BABEL_OP2_202_69090_20131127_230541_inLine +BABEL_OP2_202_69090_20131127_230541_outLine +BABEL_OP2_202_69972_20140129_230607_inLine +BABEL_OP2_202_69972_20140129_230607_outLine +BABEL_OP2_202_71282_20131030_163454_inLine +BABEL_OP2_202_71282_20131030_163454_outLine +BABEL_OP2_202_71704_20130109_185345_inLine +BABEL_OP2_202_71704_20130109_185345_outLine +BABEL_OP2_202_73072_20130105_235040_inLine +BABEL_OP2_202_73072_20130105_235040_outLine +BABEL_OP2_202_74111_20131018_223020_inLine +BABEL_OP2_202_74111_20131018_223020_outLine +BABEL_OP2_202_74641_20130927_171309_inLine +BABEL_OP2_202_74641_20130927_171309_outLine +BABEL_OP2_202_76773_20131004_211703_inLine +BABEL_OP2_202_76773_20131004_211703_outLine +BABEL_OP2_202_83062_20131129_191922_inLine +BABEL_OP2_202_83062_20131129_191922_outLine +BABEL_OP2_202_84327_20130730_193322_inLine +BABEL_OP2_202_84327_20130730_193322_outLine +BABEL_OP2_202_87545_20131012_025318_inLine +BABEL_OP2_202_87545_20131012_025318_outLine +BABEL_OP2_202_89718_20131203_002623_inLine +BABEL_OP2_202_89718_20131203_002623_outLine +BABEL_OP2_202_90935_20130226_232117_inLine +BABEL_OP2_202_90935_20130226_232117_outLine +BABEL_OP2_202_91930_20131009_204054_inLine +BABEL_OP2_202_91930_20131009_204054_outLine +BABEL_OP2_202_91971_20131203_013031_inLine +BABEL_OP2_202_91971_20131203_013031_outLine +BABEL_OP2_202_92698_20130930_170131_inLine +BABEL_OP2_202_92698_20130930_170131_outLine +BABEL_OP2_202_92698_20130930_171329_inLine +BABEL_OP2_202_92698_20130930_171329_outLine +BABEL_OP2_202_93861_20130417_181331_inLine +BABEL_OP2_202_93861_20130417_181331_outLine +BABEL_OP2_202_93861_20130417_184517_inLine +BABEL_OP2_202_93861_20130417_184517_outLine +BABEL_OP2_202_93946_20131018_213959_inLine +BABEL_OP2_202_93946_20131018_213959_outLine +BABEL_OP2_202_94166_20131101_013342_inLine +BABEL_OP2_202_94166_20131101_013342_outLine +BABEL_OP2_202_94212_20131129_213734_inLine +BABEL_OP2_202_94212_20131129_213734_outLine +BABEL_OP2_202_95966_20130216_005201_inLine +BABEL_OP2_202_95966_20130216_005201_outLine +BABEL_OP2_202_95966_20130216_010600_inLine +BABEL_OP2_202_95966_20130216_010600_outLine +BABEL_OP2_202_96041_20140317_233707_inLine +BABEL_OP2_202_96041_20140317_233707_outLine +BABEL_OP2_202_96059_20131012_001057_inLine +BABEL_OP2_202_96059_20131012_001057_outLine +BABEL_OP2_202_96205_20130213_183412_inLine +BABEL_OP2_202_96205_20130213_183412_outLine +BABEL_OP2_202_96934_20131001_205011_inLine +BABEL_OP2_202_96934_20131001_205011_outLine +BABEL_OP2_202_97097_20131010_022340_inLine +BABEL_OP2_202_97097_20131010_022340_outLine +BABEL_OP2_202_97448_20131202_225423_inLine +BABEL_OP2_202_97448_20131202_225423_outLine +BABEL_OP2_202_98255_20131130_002114_inLine +BABEL_OP2_202_98255_20131130_002114_outLine +BABEL_OP2_202_98888_20130214_225058_inLine +BABEL_OP2_202_98888_20130214_225058_outLine +BABEL_OP2_202_99487_20130109_013911_inLine +BABEL_OP2_202_99487_20130109_013911_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/sub-train.list b/egs/babel/s5d/conf/lists/202-swahili/sub-train.list new file mode 100644 index 00000000000..ec4d25cd88a --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/sub-train.list @@ -0,0 +1,128 @@ +BABEL_OP2_202_11859_20140206_193130_inLine +BABEL_OP2_202_11859_20140206_193130_outLine +BABEL_OP2_202_14719_20131126_223914_inLine +BABEL_OP2_202_14719_20131126_223914_outLine +BABEL_OP2_202_16838_20140204_225359_inLine +BABEL_OP2_202_16838_20140204_225359_outLine +BABEL_OP2_202_21206_20140207_213800_inLine +BABEL_OP2_202_21206_20140207_213800_outLine +BABEL_OP2_202_24501_20140205_231355_inLine +BABEL_OP2_202_24501_20140205_231355_outLine +BABEL_OP2_202_27189_20131216_001758_inLine +BABEL_OP2_202_27189_20131216_001758_outLine +BABEL_OP2_202_28522_20130925_000938_inLine +BABEL_OP2_202_28522_20130925_000938_outLine +BABEL_OP2_202_28644_20140205_001525_inLine +BABEL_OP2_202_28644_20140205_001525_outLine +BABEL_OP2_202_30280_20140220_001618_inLine +BABEL_OP2_202_30280_20140220_001618_outLine +BABEL_OP2_202_30432_20130502_210534_inLine +BABEL_OP2_202_30432_20130502_210534_outLine +BABEL_OP2_202_30432_20130503_175016_inLine +BABEL_OP2_202_30432_20130503_175016_outLine +BABEL_OP2_202_30645_20130108_200114_inLine +BABEL_OP2_202_30645_20130108_200114_outLine +BABEL_OP2_202_32837_20131101_203319_inLine +BABEL_OP2_202_32837_20131101_203319_outLine +BABEL_OP2_202_35609_20140220_193923_inLine +BABEL_OP2_202_35609_20140220_193923_outLine +BABEL_OP2_202_38963_20131215_232437_inLine +BABEL_OP2_202_38963_20131215_232437_outLine +BABEL_OP2_202_43395_20140220_223151_inLine +BABEL_OP2_202_43395_20140220_223151_outLine +BABEL_OP2_202_46770_20140223_234733_inLine +BABEL_OP2_202_46770_20140223_234733_outLine +BABEL_OP2_202_48243_20131009_224543_inLine +BABEL_OP2_202_48243_20131009_224543_outLine +BABEL_OP2_202_48422_20140225_220708_inLine +BABEL_OP2_202_48422_20140225_220708_outLine +BABEL_OP2_202_51156_20131216_015429_inLine +BABEL_OP2_202_51156_20131216_015429_outLine +BABEL_OP2_202_51484_20140123_220444_inLine +BABEL_OP2_202_51484_20140123_220444_outLine +BABEL_OP2_202_51611_20130109_194912_inLine +BABEL_OP2_202_51611_20130109_194912_outLine +BABEL_OP2_202_53063_20140124_000041_inLine +BABEL_OP2_202_53063_20140124_000041_outLine +BABEL_OP2_202_54074_20140123_205035_inLine +BABEL_OP2_202_54074_20140123_205035_outLine +BABEL_OP2_202_54841_20140122_195114_inLine +BABEL_OP2_202_54841_20140122_195114_outLine +BABEL_OP2_202_54841_20140122_200157_inLine +BABEL_OP2_202_54841_20140122_200157_outLine +BABEL_OP2_202_55259_20130930_023554_inLine +BABEL_OP2_202_55259_20130930_023554_outLine +BABEL_OP2_202_55349_20131010_002325_inLine +BABEL_OP2_202_55349_20131010_002325_outLine +BABEL_OP2_202_56306_20140122_204419_inLine +BABEL_OP2_202_56306_20140122_204419_outLine +BABEL_OP2_202_56465_20140122_194039_inLine +BABEL_OP2_202_56465_20140122_194039_outLine +BABEL_OP2_202_57782_20140129_231340_inLine +BABEL_OP2_202_57782_20140129_231340_outLine +BABEL_OP2_202_59720_20130930_032445_inLine +BABEL_OP2_202_59720_20130930_032445_outLine +BABEL_OP2_202_60477_20140201_200420_inLine +BABEL_OP2_202_60477_20140201_200420_outLine +BABEL_OP2_202_60778_20131201_233949_inLine +BABEL_OP2_202_60778_20131201_233949_outLine +BABEL_OP2_202_61040_20140227_003457_inLine +BABEL_OP2_202_61040_20140227_003457_outLine +BABEL_OP2_202_63670_20140130_231139_inLine +BABEL_OP2_202_63670_20140130_231139_outLine +BABEL_OP2_202_65466_20131010_013521_inLine +BABEL_OP2_202_65466_20131010_013521_outLine +BABEL_OP2_202_66001_20130107_194345_inLine +BABEL_OP2_202_66001_20130107_194345_outLine +BABEL_OP2_202_66045_20130410_204151_inLine +BABEL_OP2_202_66045_20130410_204151_outLine +BABEL_OP2_202_66045_20130410_211501_inLine +BABEL_OP2_202_66045_20130410_211501_outLine +BABEL_OP2_202_67401_20130912_043928_inLine +BABEL_OP2_202_67401_20130912_043928_outLine +BABEL_OP2_202_67964_20140125_232737_inLine +BABEL_OP2_202_67964_20140125_232737_outLine +BABEL_OP2_202_68748_20130803_201133_inLine +BABEL_OP2_202_68748_20130803_201133_outLine +BABEL_OP2_202_71976_20131128_193641_inLine +BABEL_OP2_202_71976_20131128_193641_outLine +BABEL_OP2_202_74121_20130220_195721_inLine +BABEL_OP2_202_74121_20130220_195721_outLine +BABEL_OP2_202_74121_20130220_201735_inLine +BABEL_OP2_202_74121_20130220_201735_outLine +BABEL_OP2_202_75064_20140226_232411_inLine +BABEL_OP2_202_75064_20140226_232411_outLine +BABEL_OP2_202_75261_20140311_002541_inLine +BABEL_OP2_202_75261_20140311_002541_outLine +BABEL_OP2_202_75812_20131127_193133_inLine +BABEL_OP2_202_75812_20131127_193133_outLine +BABEL_OP2_202_76499_20130412_201900_inLine +BABEL_OP2_202_76499_20130412_201900_outLine +BABEL_OP2_202_77033_20140312_034901_inLine +BABEL_OP2_202_77033_20140312_034901_outLine +BABEL_OP2_202_79045_20140310_212332_inLine +BABEL_OP2_202_79045_20140310_212332_outLine +BABEL_OP2_202_80306_20130928_232209_inLine +BABEL_OP2_202_80306_20130928_232209_outLine +BABEL_OP2_202_80989_20131016_213255_inLine +BABEL_OP2_202_80989_20131016_213255_outLine +BABEL_OP2_202_81622_20130218_232606_inLine +BABEL_OP2_202_81622_20130218_232606_outLine +BABEL_OP2_202_83625_20131130_222251_inLine +BABEL_OP2_202_83625_20131130_222251_outLine +BABEL_OP2_202_84194_20131130_024921_inLine +BABEL_OP2_202_84194_20131130_024921_outLine +BABEL_OP2_202_84408_20130306_184336_inLine +BABEL_OP2_202_84408_20130306_184336_outLine +BABEL_OP2_202_84768_20130107_194303_inLine +BABEL_OP2_202_84768_20130107_194303_outLine +BABEL_OP2_202_87305_20131016_225546_inLine +BABEL_OP2_202_87305_20131016_225546_outLine +BABEL_OP2_202_89695_20130215_224831_inLine +BABEL_OP2_202_89695_20130215_224831_outLine +BABEL_OP2_202_90740_20131120_195825_inLine +BABEL_OP2_202_90740_20131120_195825_outLine +BABEL_OP2_202_91478_20131127_031740_inLine +BABEL_OP2_202_91478_20131127_031740_outLine +BABEL_OP2_202_95231_20131128_211454_inLine +BABEL_OP2_202_95231_20131128_211454_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/202-swahili/sub-train.untranscribed.list new file mode 100644 index 00000000000..6f18d1b31d9 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/sub-train.untranscribed.list @@ -0,0 +1,397 @@ +BABEL_OP2_202_10002_20131130_011225_inLine +BABEL_OP2_202_10002_20131130_011225_outLine +BABEL_OP2_202_10184_20130214_193710_inLine +BABEL_OP2_202_10184_20130214_193710_outLine +BABEL_OP2_202_10464_20131203_215404_inLine +BABEL_OP2_202_10464_20131203_215404_outLine +BABEL_OP2_202_10647_20131009_183755_inLine +BABEL_OP2_202_10647_20131009_183755_outLine +BABEL_OP2_202_10966_20131219_004736_inLine +BABEL_OP2_202_10966_20131219_004736_outLine +BABEL_OP2_202_11310_20131220_011737_inLine +BABEL_OP2_202_11310_20131220_011737_outLine +BABEL_OP2_202_11352_20131120_175331_inLine +BABEL_OP2_202_11352_20131120_175331_outLine +BABEL_OP2_202_11528_20131126_194053_inLine +BABEL_OP2_202_11528_20131126_194053_outLine +BABEL_OP2_202_12846_20140207_070059_inLine +BABEL_OP2_202_12846_20140207_070059_outLine +BABEL_OP2_202_12846_20140207_072228_inLine +BABEL_OP2_202_12846_20140207_072228_outLine +BABEL_OP2_202_13126_20131010_154341_inLine +BABEL_OP2_202_13126_20131010_154341_outLine +BABEL_OP2_202_13189_20131218_191846_inLine +BABEL_OP2_202_13189_20131218_191846_outLine +BABEL_OP2_202_13490_20130410_232045_inLine +BABEL_OP2_202_13490_20130410_232045_outLine +BABEL_OP2_202_13561_20130927_174413_inLine +BABEL_OP2_202_13561_20130927_174413_outLine +BABEL_OP2_202_14929_20130215_230011_inLine +BABEL_OP2_202_14929_20130215_230011_outLine +BABEL_OP2_202_15024_20130211_211646_inLine +BABEL_OP2_202_15024_20130211_211646_outLine +BABEL_OP2_202_15281_20131017_173858_inLine +BABEL_OP2_202_15281_20131017_173858_outLine +BABEL_OP2_202_16149_20130108_192505_inLine +BABEL_OP2_202_16149_20130108_192505_outLine +BABEL_OP2_202_16839_20131218_202752_inLine +BABEL_OP2_202_16839_20131218_202752_outLine +BABEL_OP2_202_16886_20130219_213720_inLine +BABEL_OP2_202_16886_20130219_213720_outLine +BABEL_OP2_202_17472_20131128_215323_inLine +BABEL_OP2_202_17472_20131128_215323_outLine +BABEL_OP2_202_18242_20131203_010326_inLine +BABEL_OP2_202_18242_20131203_010326_outLine +BABEL_OP2_202_18490_20140109_200346_inLine +BABEL_OP2_202_18490_20140109_200346_outLine +BABEL_OP2_202_18566_20140209_233124_inLine +BABEL_OP2_202_18566_20140209_233124_outLine +BABEL_OP2_202_19589_20131016_205832_inLine +BABEL_OP2_202_19589_20131016_205832_outLine +BABEL_OP2_202_19877_20131011_005357_inLine +BABEL_OP2_202_19877_20131011_005357_outLine +BABEL_OP2_202_21624_20131009_200818_inLine +BABEL_OP2_202_21624_20131009_200818_outLine +BABEL_OP2_202_21807_20130926_194526_inLine +BABEL_OP2_202_21807_20130926_194526_outLine +BABEL_OP2_202_22643_20131126_221057_inLine +BABEL_OP2_202_22643_20131126_221057_outLine +BABEL_OP2_202_22918_20131031_201038_inLine +BABEL_OP2_202_22918_20131031_201038_outLine +BABEL_OP2_202_23092_20131018_200124_inLine +BABEL_OP2_202_23092_20131018_200124_outLine +BABEL_OP2_202_23153_20130220_213017_inLine +BABEL_OP2_202_23153_20130220_213017_outLine +BABEL_OP2_202_23190_20130308_215320_inLine +BABEL_OP2_202_23190_20130308_215320_outLine +BABEL_OP2_202_23195_20140205_001534_inLine +BABEL_OP2_202_23195_20140205_001534_outLine +BABEL_OP2_202_24010_20140204_221739_inLine +BABEL_OP2_202_24010_20140204_221739_outLine +BABEL_OP2_202_24241_20140218_231626_inLine +BABEL_OP2_202_24241_20140218_231626_outLine +BABEL_OP2_202_24779_20140205_002210_inLine +BABEL_OP2_202_24779_20140205_002210_outLine +BABEL_OP2_202_24982_20131219_225432_inLine +BABEL_OP2_202_24982_20131219_225432_outLine +BABEL_OP2_202_25698_20140208_030726_inLine +BABEL_OP2_202_25698_20140208_030726_outLine +BABEL_OP2_202_25719_20140217_232330_inLine +BABEL_OP2_202_25719_20140217_232330_outLine +BABEL_OP2_202_26507_20131030_200210_inLine +BABEL_OP2_202_26507_20131030_200210_outLine +BABEL_OP2_202_27042_20140209_012004_inLine +BABEL_OP2_202_27042_20140209_012004_outLine +BABEL_OP2_202_27367_20131127_225822_inLine +BABEL_OP2_202_27367_20131127_225822_outLine +BABEL_OP2_202_28303_20130930_225539_inLine +BABEL_OP2_202_28303_20130930_225539_outLine +BABEL_OP2_202_28595_20140219_174344_inLine +BABEL_OP2_202_28595_20140219_174344_outLine +BABEL_OP2_202_29439_20131009_210851_inLine +BABEL_OP2_202_29439_20131009_210851_outLine +BABEL_OP2_202_29482_20140204_232809_inLine +BABEL_OP2_202_29482_20140204_232809_outLine +BABEL_OP2_202_29482_20140204_234658_inLine +BABEL_OP2_202_29482_20140204_234658_outLine +BABEL_OP2_202_30098_20140210_002512_inLine +BABEL_OP2_202_30098_20140210_002512_outLine +BABEL_OP2_202_30461_20140219_222004_inLine +BABEL_OP2_202_30461_20140219_222004_outLine +BABEL_OP2_202_31184_20130213_182811_inLine +BABEL_OP2_202_31184_20130213_182811_outLine +BABEL_OP2_202_31184_20130213_183600_inLine +BABEL_OP2_202_31184_20130213_183600_outLine +BABEL_OP2_202_31919_20131010_181805_inLine +BABEL_OP2_202_31919_20131010_181805_outLine +BABEL_OP2_202_32998_20131221_004354_inLine +BABEL_OP2_202_32998_20131221_004354_outLine +BABEL_OP2_202_33424_20131012_231429_inLine +BABEL_OP2_202_33424_20131012_231429_outLine +BABEL_OP2_202_33497_20130429_202650_inLine +BABEL_OP2_202_33497_20130429_202650_outLine +BABEL_OP2_202_33497_20130429_204336_inLine +BABEL_OP2_202_33497_20130429_204336_outLine +BABEL_OP2_202_33913_20131218_215213_inLine +BABEL_OP2_202_33913_20131218_215213_outLine +BABEL_OP2_202_34064_20131220_013348_inLine +BABEL_OP2_202_34064_20131220_013348_outLine +BABEL_OP2_202_34410_20131119_191059_inLine +BABEL_OP2_202_34410_20131119_191059_outLine +BABEL_OP2_202_34486_20131016_193212_inLine +BABEL_OP2_202_34486_20131016_193212_outLine +BABEL_OP2_202_34586_20131219_235945_inLine +BABEL_OP2_202_34586_20131219_235945_outLine +BABEL_OP2_202_34826_20131220_013036_inLine +BABEL_OP2_202_34826_20131220_013036_outLine +BABEL_OP2_202_34860_20131202_205952_inLine +BABEL_OP2_202_34860_20131202_205952_outLine +BABEL_OP2_202_35139_20131003_221114_inLine +BABEL_OP2_202_35139_20131003_221114_outLine +BABEL_OP2_202_36642_20140114_203343_inLine +BABEL_OP2_202_36642_20140114_203343_outLine +BABEL_OP2_202_36894_20121228_180620_inLine +BABEL_OP2_202_36894_20121228_180620_outLine +BABEL_OP2_202_37285_20130730_214031_inLine +BABEL_OP2_202_37285_20130730_214031_outLine +BABEL_OP2_202_39006_20140115_012801_inLine +BABEL_OP2_202_39006_20140115_012801_outLine +BABEL_OP2_202_40557_20131018_015314_inLine +BABEL_OP2_202_40557_20131018_015314_outLine +BABEL_OP2_202_40565_20130725_183219_inLine +BABEL_OP2_202_40565_20130725_183219_outLine +BABEL_OP2_202_41542_20131029_200308_inLine +BABEL_OP2_202_41542_20131029_200308_outLine +BABEL_OP2_202_41598_20140225_031321_inLine +BABEL_OP2_202_41598_20140225_031321_outLine +BABEL_OP2_202_41720_20131129_192607_inLine +BABEL_OP2_202_41720_20131129_192607_outLine +BABEL_OP2_202_41720_20131129_194102_inLine +BABEL_OP2_202_41720_20131129_194102_outLine +BABEL_OP2_202_42309_20140221_210458_inLine +BABEL_OP2_202_42309_20140221_210458_outLine +BABEL_OP2_202_42434_20130930_235132_inLine +BABEL_OP2_202_42434_20130930_235132_outLine +BABEL_OP2_202_42434_20131001_001757_inLine +BABEL_OP2_202_42434_20131001_001757_outLine +BABEL_OP2_202_42991_20130801_010705_inLine +BABEL_OP2_202_42991_20130801_010705_outLine +BABEL_OP2_202_43794_20131015_230636_inLine +BABEL_OP2_202_43794_20131015_230636_outLine +BABEL_OP2_202_46041_20131018_224852_inLine +BABEL_OP2_202_46041_20131018_224852_outLine +BABEL_OP2_202_46261_20130213_203255_inLine +BABEL_OP2_202_46261_20130213_203255_outLine +BABEL_OP2_202_46550_20131003_205134_inLine +BABEL_OP2_202_46550_20131003_205134_outLine +BABEL_OP2_202_46688_20130108_003601_inLine +BABEL_OP2_202_46688_20130108_003601_outLine +BABEL_OP2_202_46757_20130726_172556_inLine +BABEL_OP2_202_46757_20130726_172556_outLine +BABEL_OP2_202_46976_20130214_203921_inLine +BABEL_OP2_202_46976_20130214_203921_outLine +BABEL_OP2_202_47186_20131101_211007_inLine +BABEL_OP2_202_47186_20131101_211007_outLine +BABEL_OP2_202_47823_20131017_214917_inLine +BABEL_OP2_202_47823_20131017_214917_outLine +BABEL_OP2_202_47866_20131010_061153_inLine +BABEL_OP2_202_47866_20131010_061153_outLine +BABEL_OP2_202_48299_20131130_233044_inLine +BABEL_OP2_202_48399_20131005_030007_outLine +BABEL_OP2_202_49437_20131031_193108_inLine +BABEL_OP2_202_49437_20131031_193108_outLine +BABEL_OP2_202_49630_20130731_234235_inLine +BABEL_OP2_202_49630_20130731_234235_outLine +BABEL_OP2_202_49739_20131127_171846_inLine +BABEL_OP2_202_49739_20131127_171846_outLine +BABEL_OP2_202_49768_20131001_222725_inLine +BABEL_OP2_202_49768_20131001_222725_outLine +BABEL_OP2_202_49907_20131003_213256_inLine +BABEL_OP2_202_49907_20131003_213256_outLine +BABEL_OP2_202_50186_20131216_004336_inLine +BABEL_OP2_202_50186_20131216_004336_outLine +BABEL_OP2_202_52246_20140125_011930_inLine +BABEL_OP2_202_52246_20140125_011930_outLine +BABEL_OP2_202_52272_20130103_193203_inLine +BABEL_OP2_202_52272_20130103_193203_outLine +BABEL_OP2_202_52422_20140123_223352_inLine +BABEL_OP2_202_52422_20140123_223352_outLine +BABEL_OP2_202_53758_20131203_003849_inLine +BABEL_OP2_202_53758_20131203_003849_outLine +BABEL_OP2_202_54066_20140121_223255_inLine +BABEL_OP2_202_54066_20140121_223255_outLine +BABEL_OP2_202_54530_20140125_000633_inLine +BABEL_OP2_202_54530_20140125_000633_outLine +BABEL_OP2_202_54634_20140121_201449_inLine +BABEL_OP2_202_54634_20140121_201449_outLine +BABEL_OP2_202_55381_20140123_030341_inLine +BABEL_OP2_202_55381_20140123_030341_outLine +BABEL_OP2_202_56023_20140124_213010_inLine +BABEL_OP2_202_56023_20140124_213010_outLine +BABEL_OP2_202_56331_20140124_212336_inLine +BABEL_OP2_202_56331_20140124_212336_outLine +BABEL_OP2_202_56606_20140123_202633_inLine +BABEL_OP2_202_56606_20140123_202633_outLine +BABEL_OP2_202_56951_20131130_192609_inLine +BABEL_OP2_202_56951_20131130_192609_outLine +BABEL_OP2_202_57233_20131120_235941_inLine +BABEL_OP2_202_57233_20131120_235941_outLine +BABEL_OP2_202_58103_20130930_045229_inLine +BABEL_OP2_202_58103_20130930_045229_outLine +BABEL_OP2_202_58107_20130927_165258_inLine +BABEL_OP2_202_58107_20130927_165258_outLine +BABEL_OP2_202_58489_20140131_214025_inLine +BABEL_OP2_202_58489_20140131_214025_outLine +BABEL_OP2_202_58821_20130730_183731_inLine +BABEL_OP2_202_58821_20130730_183731_outLine +BABEL_OP2_202_59028_20140131_212747_inLine +BABEL_OP2_202_59028_20140131_212747_outLine +BABEL_OP2_202_59402_20140201_222141_inLine +BABEL_OP2_202_59402_20140201_222141_outLine +BABEL_OP2_202_59402_20140201_222847_inLine +BABEL_OP2_202_59402_20140201_222847_outLine +BABEL_OP2_202_60474_20140120_222223_inLine +BABEL_OP2_202_60474_20140120_222223_outLine +BABEL_OP2_202_61438_20131129_231819_inLine +BABEL_OP2_202_61438_20131129_231819_outLine +BABEL_OP2_202_61438_20131129_233030_inLine +BABEL_OP2_202_61438_20131129_233030_outLine +BABEL_OP2_202_61873_20130111_181915_inLine +BABEL_OP2_202_61873_20130111_181915_outLine +BABEL_OP2_202_62047_20140129_020943_inLine +BABEL_OP2_202_62047_20140129_020943_outLine +BABEL_OP2_202_62360_20131014_211636_inLine +BABEL_OP2_202_62360_20131014_211636_outLine +BABEL_OP2_202_62714_20131101_225706_inLine +BABEL_OP2_202_62714_20131101_225706_outLine +BABEL_OP2_202_63490_20131203_234940_inLine +BABEL_OP2_202_63490_20131203_234940_outLine +BABEL_OP2_202_63920_20131215_021712_inLine +BABEL_OP2_202_63920_20131215_021712_outLine +BABEL_OP2_202_64688_20140126_004157_inLine +BABEL_OP2_202_64688_20140126_004157_outLine +BABEL_OP2_202_65048_20140128_174534_inLine +BABEL_OP2_202_65048_20140128_174534_outLine +BABEL_OP2_202_65336_20140131_001312_inLine +BABEL_OP2_202_65336_20140131_001312_outLine +BABEL_OP2_202_65913_20140127_181419_inLine +BABEL_OP2_202_65913_20140127_181419_outLine +BABEL_OP2_202_66305_20140126_020747_inLine +BABEL_OP2_202_66305_20140126_020747_outLine +BABEL_OP2_202_66641_20131127_183344_inLine +BABEL_OP2_202_66641_20131127_183344_outLine +BABEL_OP2_202_66916_20121229_203810_inLine +BABEL_OP2_202_66916_20121229_203810_outLine +BABEL_OP2_202_66916_20121229_211053_inLine +BABEL_OP2_202_66916_20121229_211053_outLine +BABEL_OP2_202_68289_20131128_012756_inLine +BABEL_OP2_202_68289_20131128_012756_outLine +BABEL_OP2_202_68854_20131012_000134_inLine +BABEL_OP2_202_68854_20131012_000134_outLine +BABEL_OP2_202_69937_20140131_034019_inLine +BABEL_OP2_202_69937_20140131_034019_outLine +BABEL_OP2_202_71566_20140311_213752_inLine +BABEL_OP2_202_71566_20140311_213752_outLine +BABEL_OP2_202_72324_20130423_161716_inLine +BABEL_OP2_202_72324_20130423_161716_outLine +BABEL_OP2_202_73005_20131012_011254_inLine +BABEL_OP2_202_73005_20131012_011254_outLine +BABEL_OP2_202_73022_20140226_210050_inLine +BABEL_OP2_202_73022_20140226_210050_outLine +BABEL_OP2_202_73518_20140304_001655_inLine +BABEL_OP2_202_73518_20140304_001655_outLine +BABEL_OP2_202_74667_20130227_180657_inLine +BABEL_OP2_202_74667_20130227_180657_outLine +BABEL_OP2_202_75930_20131202_213433_inLine +BABEL_OP2_202_75930_20131202_213433_outLine +BABEL_OP2_202_76126_20131031_183234_inLine +BABEL_OP2_202_76126_20131031_183234_outLine +BABEL_OP2_202_76444_20131018_000013_inLine +BABEL_OP2_202_76444_20131018_000013_outLine +BABEL_OP2_202_77146_20121229_203404_inLine +BABEL_OP2_202_77146_20121229_203404_outLine +BABEL_OP2_202_78482_20140311_014827_inLine +BABEL_OP2_202_78482_20140311_014827_outLine +BABEL_OP2_202_79367_20130107_224252_inLine +BABEL_OP2_202_79367_20130107_224252_outLine +BABEL_OP2_202_79973_20131130_184708_inLine +BABEL_OP2_202_79973_20131130_184708_outLine +BABEL_OP2_202_79995_20140227_030446_inLine +BABEL_OP2_202_79995_20140227_030446_outLine +BABEL_OP2_202_80134_20131202_174756_inLine +BABEL_OP2_202_80134_20131202_174756_outLine +BABEL_OP2_202_80383_20131207_013517_inLine +BABEL_OP2_202_80383_20131207_013517_outLine +BABEL_OP2_202_81149_20131010_010411_inLine +BABEL_OP2_202_81149_20131010_010411_outLine +BABEL_OP2_202_82123_20131130_004859_inLine +BABEL_OP2_202_82123_20131130_004859_outLine +BABEL_OP2_202_82138_20130415_225929_inLine +BABEL_OP2_202_82138_20130415_225929_outLine +BABEL_OP2_202_82425_20130108_181846_inLine +BABEL_OP2_202_82425_20130108_181846_outLine +BABEL_OP2_202_82473_20131004_202625_inLine +BABEL_OP2_202_82473_20131004_202625_outLine +BABEL_OP2_202_82496_20130105_232830_inLine +BABEL_OP2_202_82496_20130105_232830_outLine +BABEL_OP2_202_82622_20131007_171417_inLine +BABEL_OP2_202_82622_20131007_171417_outLine +BABEL_OP2_202_83609_20131128_022206_inLine +BABEL_OP2_202_83609_20131128_022206_outLine +BABEL_OP2_202_83651_20131003_212624_inLine +BABEL_OP2_202_83651_20131003_212624_outLine +BABEL_OP2_202_84077_20131130_195755_inLine +BABEL_OP2_202_84077_20131130_195755_outLine +BABEL_OP2_202_84466_20131010_040505_inLine +BABEL_OP2_202_84466_20131010_040505_outLine +BABEL_OP2_202_84469_20131018_212735_inLine +BABEL_OP2_202_84469_20131018_212735_outLine +BABEL_OP2_202_86156_20131030_001706_inLine +BABEL_OP2_202_86156_20131030_001706_outLine +BABEL_OP2_202_87179_20140320_165556_inLine +BABEL_OP2_202_87179_20140320_165556_outLine +BABEL_OP2_202_88776_20130107_192204_inLine +BABEL_OP2_202_88776_20130107_192204_outLine +BABEL_OP2_202_88776_20130107_195623_inLine +BABEL_OP2_202_88776_20130107_195623_outLine +BABEL_OP2_202_88783_20131018_191706_inLine +BABEL_OP2_202_88783_20131018_191706_outLine +BABEL_OP2_202_88865_20140319_212413_inLine +BABEL_OP2_202_88865_20140319_212413_outLine +BABEL_OP2_202_89665_20140320_004314_inLine +BABEL_OP2_202_89665_20140320_004314_outLine +BABEL_OP2_202_90347_20130912_005052_inLine +BABEL_OP2_202_90347_20130912_005052_outLine +BABEL_OP2_202_90572_20131009_190400_inLine +BABEL_OP2_202_90572_20131009_190400_outLine +BABEL_OP2_202_90737_20130213_201303_inLine +BABEL_OP2_202_90737_20130213_201303_outLine +BABEL_OP2_202_90739_20130222_223815_inLine +BABEL_OP2_202_90739_20130222_223815_outLine +BABEL_OP2_202_91080_20130429_213558_inLine +BABEL_OP2_202_91080_20130429_213558_outLine +BABEL_OP2_202_91891_20130803_000104_inLine +BABEL_OP2_202_91891_20130803_000104_outLine +BABEL_OP2_202_92065_20140109_204802_inLine +BABEL_OP2_202_92065_20140109_204802_outLine +BABEL_OP2_202_92440_20131203_195407_inLine +BABEL_OP2_202_92440_20131203_195407_outLine +BABEL_OP2_202_92440_20131203_200046_inLine +BABEL_OP2_202_92440_20131203_200046_outLine +BABEL_OP2_202_92527_20130225_184732_inLine +BABEL_OP2_202_92527_20130225_184732_outLine +BABEL_OP2_202_93153_20131003_212947_inLine +BABEL_OP2_202_93153_20131003_212947_outLine +BABEL_OP2_202_93153_20131003_213722_inLine +BABEL_OP2_202_93153_20131003_213722_outLine +BABEL_OP2_202_93222_20131126_211540_inLine +BABEL_OP2_202_93222_20131126_211540_outLine +BABEL_OP2_202_94333_20130105_202651_inLine +BABEL_OP2_202_94333_20130105_202651_outLine +BABEL_OP2_202_94449_20131011_205657_inLine +BABEL_OP2_202_94449_20131011_205657_outLine +BABEL_OP2_202_94869_20121219_204921_inLine +BABEL_OP2_202_94869_20121219_204921_outLine +BABEL_OP2_202_95077_20140320_014923_inLine +BABEL_OP2_202_95077_20140320_014923_outLine +BABEL_OP2_202_96376_20131011_024111_inLine +BABEL_OP2_202_96376_20131011_024111_outLine +BABEL_OP2_202_96680_20131130_202936_inLine +BABEL_OP2_202_96680_20131130_202936_outLine +BABEL_OP2_202_96690_20130220_210217_inLine +BABEL_OP2_202_96690_20130220_210217_outLine +BABEL_OP2_202_96808_20131012_212254_inLine +BABEL_OP2_202_96808_20131012_212254_outLine +BABEL_OP2_202_97220_20140319_193818_inLine +BABEL_OP2_202_97220_20140319_193818_outLine +BABEL_OP2_202_97363_20131002_203133_inLine +BABEL_OP2_202_97363_20131002_203133_outLine +BABEL_OP2_202_97363_20131003_002739_inLine +BABEL_OP2_202_97363_20131003_002739_outLine +BABEL_OP2_202_97373_20130730_151855_inLine +BABEL_OP2_202_99401_20130108_001107_inLine +BABEL_OP2_202_99401_20130108_001107_outLine +BABEL_OP2_202_99594_20130220_222308_inLine +BABEL_OP2_202_99594_20130220_222308_outLine +BABEL_OP2_202_99883_20131120_212150_inLine +BABEL_OP2_202_99883_20131120_212150_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/training.list b/egs/babel/s5d/conf/lists/202-swahili/training.list new file mode 100644 index 00000000000..1f0477cdd00 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/training.list @@ -0,0 +1,525 @@ +BABEL_OP2_202_10002_20131130_011225_inLine +BABEL_OP2_202_10002_20131130_011225_outLine +BABEL_OP2_202_10184_20130214_193710_inLine +BABEL_OP2_202_10184_20130214_193710_outLine +BABEL_OP2_202_10464_20131203_215404_inLine +BABEL_OP2_202_10464_20131203_215404_outLine +BABEL_OP2_202_10647_20131009_183755_inLine +BABEL_OP2_202_10647_20131009_183755_outLine +BABEL_OP2_202_10966_20131219_004736_inLine +BABEL_OP2_202_10966_20131219_004736_outLine +BABEL_OP2_202_11310_20131220_011737_inLine +BABEL_OP2_202_11310_20131220_011737_outLine +BABEL_OP2_202_11352_20131120_175331_inLine +BABEL_OP2_202_11352_20131120_175331_outLine +BABEL_OP2_202_11528_20131126_194053_inLine +BABEL_OP2_202_11528_20131126_194053_outLine +BABEL_OP2_202_11859_20140206_193130_inLine +BABEL_OP2_202_11859_20140206_193130_outLine +BABEL_OP2_202_12846_20140207_070059_inLine +BABEL_OP2_202_12846_20140207_070059_outLine +BABEL_OP2_202_12846_20140207_072228_inLine +BABEL_OP2_202_12846_20140207_072228_outLine +BABEL_OP2_202_13126_20131010_154341_inLine +BABEL_OP2_202_13126_20131010_154341_outLine +BABEL_OP2_202_13189_20131218_191846_inLine +BABEL_OP2_202_13189_20131218_191846_outLine +BABEL_OP2_202_13490_20130410_232045_inLine +BABEL_OP2_202_13490_20130410_232045_outLine +BABEL_OP2_202_13561_20130927_174413_inLine +BABEL_OP2_202_13561_20130927_174413_outLine +BABEL_OP2_202_14719_20131126_223914_inLine +BABEL_OP2_202_14719_20131126_223914_outLine +BABEL_OP2_202_14929_20130215_230011_inLine +BABEL_OP2_202_14929_20130215_230011_outLine +BABEL_OP2_202_15024_20130211_211646_inLine +BABEL_OP2_202_15024_20130211_211646_outLine +BABEL_OP2_202_15281_20131017_173858_inLine +BABEL_OP2_202_15281_20131017_173858_outLine +BABEL_OP2_202_16149_20130108_192505_inLine +BABEL_OP2_202_16149_20130108_192505_outLine +BABEL_OP2_202_16838_20140204_225359_inLine +BABEL_OP2_202_16838_20140204_225359_outLine +BABEL_OP2_202_16839_20131218_202752_inLine +BABEL_OP2_202_16839_20131218_202752_outLine +BABEL_OP2_202_16886_20130219_213720_inLine +BABEL_OP2_202_16886_20130219_213720_outLine +BABEL_OP2_202_17472_20131128_215323_inLine +BABEL_OP2_202_17472_20131128_215323_outLine +BABEL_OP2_202_18242_20131203_010326_inLine +BABEL_OP2_202_18242_20131203_010326_outLine +BABEL_OP2_202_18490_20140109_200346_inLine +BABEL_OP2_202_18490_20140109_200346_outLine +BABEL_OP2_202_18566_20140209_233124_inLine +BABEL_OP2_202_18566_20140209_233124_outLine +BABEL_OP2_202_19589_20131016_205832_inLine +BABEL_OP2_202_19589_20131016_205832_outLine +BABEL_OP2_202_19877_20131011_005357_inLine +BABEL_OP2_202_19877_20131011_005357_outLine +BABEL_OP2_202_21206_20140207_213800_inLine +BABEL_OP2_202_21206_20140207_213800_outLine +BABEL_OP2_202_21624_20131009_200818_inLine +BABEL_OP2_202_21624_20131009_200818_outLine +BABEL_OP2_202_21807_20130926_194526_inLine +BABEL_OP2_202_21807_20130926_194526_outLine +BABEL_OP2_202_22643_20131126_221057_inLine +BABEL_OP2_202_22643_20131126_221057_outLine +BABEL_OP2_202_22918_20131031_201038_inLine +BABEL_OP2_202_22918_20131031_201038_outLine +BABEL_OP2_202_23092_20131018_200124_inLine +BABEL_OP2_202_23092_20131018_200124_outLine +BABEL_OP2_202_23153_20130220_213017_inLine +BABEL_OP2_202_23153_20130220_213017_outLine +BABEL_OP2_202_23190_20130308_215320_inLine +BABEL_OP2_202_23190_20130308_215320_outLine +BABEL_OP2_202_23195_20140205_001534_inLine +BABEL_OP2_202_23195_20140205_001534_outLine +BABEL_OP2_202_24010_20140204_221739_inLine +BABEL_OP2_202_24010_20140204_221739_outLine +BABEL_OP2_202_24241_20140218_231626_inLine +BABEL_OP2_202_24241_20140218_231626_outLine +BABEL_OP2_202_24501_20140205_231355_inLine +BABEL_OP2_202_24501_20140205_231355_outLine +BABEL_OP2_202_24779_20140205_002210_inLine +BABEL_OP2_202_24779_20140205_002210_outLine +BABEL_OP2_202_24982_20131219_225432_inLine +BABEL_OP2_202_24982_20131219_225432_outLine +BABEL_OP2_202_25698_20140208_030726_inLine +BABEL_OP2_202_25698_20140208_030726_outLine +BABEL_OP2_202_25719_20140217_232330_inLine +BABEL_OP2_202_25719_20140217_232330_outLine +BABEL_OP2_202_26507_20131030_200210_inLine +BABEL_OP2_202_26507_20131030_200210_outLine +BABEL_OP2_202_27042_20140209_012004_inLine +BABEL_OP2_202_27042_20140209_012004_outLine +BABEL_OP2_202_27189_20131216_001758_inLine +BABEL_OP2_202_27189_20131216_001758_outLine +BABEL_OP2_202_27367_20131127_225822_inLine +BABEL_OP2_202_27367_20131127_225822_outLine +BABEL_OP2_202_28303_20130930_225539_inLine +BABEL_OP2_202_28303_20130930_225539_outLine +BABEL_OP2_202_28522_20130925_000938_inLine +BABEL_OP2_202_28522_20130925_000938_outLine +BABEL_OP2_202_28595_20140219_174344_inLine +BABEL_OP2_202_28595_20140219_174344_outLine +BABEL_OP2_202_28644_20140205_001525_inLine +BABEL_OP2_202_28644_20140205_001525_outLine +BABEL_OP2_202_29439_20131009_210851_inLine +BABEL_OP2_202_29439_20131009_210851_outLine +BABEL_OP2_202_29482_20140204_232809_inLine +BABEL_OP2_202_29482_20140204_232809_outLine +BABEL_OP2_202_29482_20140204_234658_inLine +BABEL_OP2_202_29482_20140204_234658_outLine +BABEL_OP2_202_30098_20140210_002512_inLine +BABEL_OP2_202_30098_20140210_002512_outLine +BABEL_OP2_202_30280_20140220_001618_inLine +BABEL_OP2_202_30280_20140220_001618_outLine +BABEL_OP2_202_30432_20130502_210534_inLine +BABEL_OP2_202_30432_20130502_210534_outLine +BABEL_OP2_202_30432_20130503_175016_inLine +BABEL_OP2_202_30432_20130503_175016_outLine +BABEL_OP2_202_30461_20140219_222004_inLine +BABEL_OP2_202_30461_20140219_222004_outLine +BABEL_OP2_202_30645_20130108_200114_inLine +BABEL_OP2_202_30645_20130108_200114_outLine +BABEL_OP2_202_31184_20130213_182811_inLine +BABEL_OP2_202_31184_20130213_182811_outLine +BABEL_OP2_202_31184_20130213_183600_inLine +BABEL_OP2_202_31184_20130213_183600_outLine +BABEL_OP2_202_31919_20131010_181805_inLine +BABEL_OP2_202_31919_20131010_181805_outLine +BABEL_OP2_202_32837_20131101_203319_inLine +BABEL_OP2_202_32837_20131101_203319_outLine +BABEL_OP2_202_32998_20131221_004354_inLine +BABEL_OP2_202_32998_20131221_004354_outLine +BABEL_OP2_202_33424_20131012_231429_inLine +BABEL_OP2_202_33424_20131012_231429_outLine +BABEL_OP2_202_33497_20130429_202650_inLine +BABEL_OP2_202_33497_20130429_202650_outLine +BABEL_OP2_202_33497_20130429_204336_inLine +BABEL_OP2_202_33497_20130429_204336_outLine +BABEL_OP2_202_33913_20131218_215213_inLine +BABEL_OP2_202_33913_20131218_215213_outLine +BABEL_OP2_202_34064_20131220_013348_inLine +BABEL_OP2_202_34064_20131220_013348_outLine +BABEL_OP2_202_34410_20131119_191059_inLine +BABEL_OP2_202_34410_20131119_191059_outLine +BABEL_OP2_202_34486_20131016_193212_inLine +BABEL_OP2_202_34486_20131016_193212_outLine +BABEL_OP2_202_34586_20131219_235945_inLine +BABEL_OP2_202_34586_20131219_235945_outLine +BABEL_OP2_202_34826_20131220_013036_inLine +BABEL_OP2_202_34826_20131220_013036_outLine +BABEL_OP2_202_34860_20131202_205952_inLine +BABEL_OP2_202_34860_20131202_205952_outLine +BABEL_OP2_202_35139_20131003_221114_inLine +BABEL_OP2_202_35139_20131003_221114_outLine +BABEL_OP2_202_35609_20140220_193923_inLine +BABEL_OP2_202_35609_20140220_193923_outLine +BABEL_OP2_202_36642_20140114_203343_inLine +BABEL_OP2_202_36642_20140114_203343_outLine +BABEL_OP2_202_36894_20121228_180620_inLine +BABEL_OP2_202_36894_20121228_180620_outLine +BABEL_OP2_202_37285_20130730_214031_inLine +BABEL_OP2_202_37285_20130730_214031_outLine +BABEL_OP2_202_38963_20131215_232437_inLine +BABEL_OP2_202_38963_20131215_232437_outLine +BABEL_OP2_202_39006_20140115_012801_inLine +BABEL_OP2_202_39006_20140115_012801_outLine +BABEL_OP2_202_40557_20131018_015314_inLine +BABEL_OP2_202_40557_20131018_015314_outLine +BABEL_OP2_202_40565_20130725_183219_inLine +BABEL_OP2_202_40565_20130725_183219_outLine +BABEL_OP2_202_41542_20131029_200308_inLine +BABEL_OP2_202_41542_20131029_200308_outLine +BABEL_OP2_202_41598_20140225_031321_inLine +BABEL_OP2_202_41598_20140225_031321_outLine +BABEL_OP2_202_41720_20131129_192607_inLine +BABEL_OP2_202_41720_20131129_192607_outLine +BABEL_OP2_202_41720_20131129_194102_inLine +BABEL_OP2_202_41720_20131129_194102_outLine +BABEL_OP2_202_42309_20140221_210458_inLine +BABEL_OP2_202_42309_20140221_210458_outLine +BABEL_OP2_202_42434_20130930_235132_inLine +BABEL_OP2_202_42434_20130930_235132_outLine +BABEL_OP2_202_42434_20131001_001757_inLine +BABEL_OP2_202_42434_20131001_001757_outLine +BABEL_OP2_202_42991_20130801_010705_inLine +BABEL_OP2_202_42991_20130801_010705_outLine +BABEL_OP2_202_43395_20140220_223151_inLine +BABEL_OP2_202_43395_20140220_223151_outLine +BABEL_OP2_202_43794_20131015_230636_inLine +BABEL_OP2_202_43794_20131015_230636_outLine +BABEL_OP2_202_46041_20131018_224852_inLine +BABEL_OP2_202_46041_20131018_224852_outLine +BABEL_OP2_202_46261_20130213_203255_inLine +BABEL_OP2_202_46261_20130213_203255_outLine +BABEL_OP2_202_46550_20131003_205134_inLine +BABEL_OP2_202_46550_20131003_205134_outLine +BABEL_OP2_202_46688_20130108_003601_inLine +BABEL_OP2_202_46688_20130108_003601_outLine +BABEL_OP2_202_46757_20130726_172556_inLine +BABEL_OP2_202_46757_20130726_172556_outLine +BABEL_OP2_202_46770_20140223_234733_inLine +BABEL_OP2_202_46770_20140223_234733_outLine +BABEL_OP2_202_46976_20130214_203921_inLine +BABEL_OP2_202_46976_20130214_203921_outLine +BABEL_OP2_202_47186_20131101_211007_inLine +BABEL_OP2_202_47186_20131101_211007_outLine +BABEL_OP2_202_47823_20131017_214917_inLine +BABEL_OP2_202_47823_20131017_214917_outLine +BABEL_OP2_202_47866_20131010_061153_inLine +BABEL_OP2_202_47866_20131010_061153_outLine +BABEL_OP2_202_48243_20131009_224543_inLine +BABEL_OP2_202_48243_20131009_224543_outLine +BABEL_OP2_202_48299_20131130_233044_inLine +BABEL_OP2_202_48399_20131005_030007_outLine +BABEL_OP2_202_48422_20140225_220708_inLine +BABEL_OP2_202_48422_20140225_220708_outLine +BABEL_OP2_202_49437_20131031_193108_inLine +BABEL_OP2_202_49437_20131031_193108_outLine +BABEL_OP2_202_49630_20130731_234235_inLine +BABEL_OP2_202_49630_20130731_234235_outLine +BABEL_OP2_202_49739_20131127_171846_inLine +BABEL_OP2_202_49739_20131127_171846_outLine +BABEL_OP2_202_49768_20131001_222725_inLine +BABEL_OP2_202_49768_20131001_222725_outLine +BABEL_OP2_202_49907_20131003_213256_inLine +BABEL_OP2_202_49907_20131003_213256_outLine +BABEL_OP2_202_50186_20131216_004336_inLine +BABEL_OP2_202_50186_20131216_004336_outLine +BABEL_OP2_202_51156_20131216_015429_inLine +BABEL_OP2_202_51156_20131216_015429_outLine +BABEL_OP2_202_51484_20140123_220444_inLine +BABEL_OP2_202_51484_20140123_220444_outLine +BABEL_OP2_202_51611_20130109_194912_inLine +BABEL_OP2_202_51611_20130109_194912_outLine +BABEL_OP2_202_52246_20140125_011930_inLine +BABEL_OP2_202_52246_20140125_011930_outLine +BABEL_OP2_202_52272_20130103_193203_inLine +BABEL_OP2_202_52272_20130103_193203_outLine +BABEL_OP2_202_52422_20140123_223352_inLine +BABEL_OP2_202_52422_20140123_223352_outLine +BABEL_OP2_202_53063_20140124_000041_inLine +BABEL_OP2_202_53063_20140124_000041_outLine +BABEL_OP2_202_53758_20131203_003849_inLine +BABEL_OP2_202_53758_20131203_003849_outLine +BABEL_OP2_202_54066_20140121_223255_inLine +BABEL_OP2_202_54066_20140121_223255_outLine +BABEL_OP2_202_54074_20140123_205035_inLine +BABEL_OP2_202_54074_20140123_205035_outLine +BABEL_OP2_202_54530_20140125_000633_inLine +BABEL_OP2_202_54530_20140125_000633_outLine +BABEL_OP2_202_54634_20140121_201449_inLine +BABEL_OP2_202_54634_20140121_201449_outLine +BABEL_OP2_202_54841_20140122_195114_inLine +BABEL_OP2_202_54841_20140122_195114_outLine +BABEL_OP2_202_54841_20140122_200157_inLine +BABEL_OP2_202_54841_20140122_200157_outLine +BABEL_OP2_202_55259_20130930_023554_inLine +BABEL_OP2_202_55259_20130930_023554_outLine +BABEL_OP2_202_55349_20131010_002325_inLine +BABEL_OP2_202_55349_20131010_002325_outLine +BABEL_OP2_202_55381_20140123_030341_inLine +BABEL_OP2_202_55381_20140123_030341_outLine +BABEL_OP2_202_56023_20140124_213010_inLine +BABEL_OP2_202_56023_20140124_213010_outLine +BABEL_OP2_202_56306_20140122_204419_inLine +BABEL_OP2_202_56306_20140122_204419_outLine +BABEL_OP2_202_56331_20140124_212336_inLine +BABEL_OP2_202_56331_20140124_212336_outLine +BABEL_OP2_202_56465_20140122_194039_inLine +BABEL_OP2_202_56465_20140122_194039_outLine +BABEL_OP2_202_56606_20140123_202633_inLine +BABEL_OP2_202_56606_20140123_202633_outLine +BABEL_OP2_202_56951_20131130_192609_inLine +BABEL_OP2_202_56951_20131130_192609_outLine +BABEL_OP2_202_57233_20131120_235941_inLine +BABEL_OP2_202_57233_20131120_235941_outLine +BABEL_OP2_202_57782_20140129_231340_inLine +BABEL_OP2_202_57782_20140129_231340_outLine +BABEL_OP2_202_58103_20130930_045229_inLine +BABEL_OP2_202_58103_20130930_045229_outLine +BABEL_OP2_202_58107_20130927_165258_inLine +BABEL_OP2_202_58107_20130927_165258_outLine +BABEL_OP2_202_58489_20140131_214025_inLine +BABEL_OP2_202_58489_20140131_214025_outLine +BABEL_OP2_202_58821_20130730_183731_inLine +BABEL_OP2_202_58821_20130730_183731_outLine +BABEL_OP2_202_59028_20140131_212747_inLine +BABEL_OP2_202_59028_20140131_212747_outLine +BABEL_OP2_202_59402_20140201_222141_inLine +BABEL_OP2_202_59402_20140201_222141_outLine +BABEL_OP2_202_59402_20140201_222847_inLine +BABEL_OP2_202_59402_20140201_222847_outLine +BABEL_OP2_202_59720_20130930_032445_inLine +BABEL_OP2_202_59720_20130930_032445_outLine +BABEL_OP2_202_60474_20140120_222223_inLine +BABEL_OP2_202_60474_20140120_222223_outLine +BABEL_OP2_202_60477_20140201_200420_inLine +BABEL_OP2_202_60477_20140201_200420_outLine +BABEL_OP2_202_60778_20131201_233949_inLine +BABEL_OP2_202_60778_20131201_233949_outLine +BABEL_OP2_202_61040_20140227_003457_inLine +BABEL_OP2_202_61040_20140227_003457_outLine +BABEL_OP2_202_61438_20131129_231819_inLine +BABEL_OP2_202_61438_20131129_231819_outLine +BABEL_OP2_202_61438_20131129_233030_inLine +BABEL_OP2_202_61438_20131129_233030_outLine +BABEL_OP2_202_61873_20130111_181915_inLine +BABEL_OP2_202_61873_20130111_181915_outLine +BABEL_OP2_202_62047_20140129_020943_inLine +BABEL_OP2_202_62047_20140129_020943_outLine +BABEL_OP2_202_62360_20131014_211636_inLine +BABEL_OP2_202_62360_20131014_211636_outLine +BABEL_OP2_202_62714_20131101_225706_inLine +BABEL_OP2_202_62714_20131101_225706_outLine +BABEL_OP2_202_63490_20131203_234940_inLine +BABEL_OP2_202_63490_20131203_234940_outLine +BABEL_OP2_202_63670_20140130_231139_inLine +BABEL_OP2_202_63670_20140130_231139_outLine +BABEL_OP2_202_63920_20131215_021712_inLine +BABEL_OP2_202_63920_20131215_021712_outLine +BABEL_OP2_202_64688_20140126_004157_inLine +BABEL_OP2_202_64688_20140126_004157_outLine +BABEL_OP2_202_65048_20140128_174534_inLine +BABEL_OP2_202_65048_20140128_174534_outLine +BABEL_OP2_202_65336_20140131_001312_inLine +BABEL_OP2_202_65336_20140131_001312_outLine +BABEL_OP2_202_65466_20131010_013521_inLine +BABEL_OP2_202_65466_20131010_013521_outLine +BABEL_OP2_202_65913_20140127_181419_inLine +BABEL_OP2_202_65913_20140127_181419_outLine +BABEL_OP2_202_66001_20130107_194345_inLine +BABEL_OP2_202_66001_20130107_194345_outLine +BABEL_OP2_202_66045_20130410_204151_inLine +BABEL_OP2_202_66045_20130410_204151_outLine +BABEL_OP2_202_66045_20130410_211501_inLine +BABEL_OP2_202_66045_20130410_211501_outLine +BABEL_OP2_202_66305_20140126_020747_inLine +BABEL_OP2_202_66305_20140126_020747_outLine +BABEL_OP2_202_66641_20131127_183344_inLine +BABEL_OP2_202_66641_20131127_183344_outLine +BABEL_OP2_202_66916_20121229_203810_inLine +BABEL_OP2_202_66916_20121229_203810_outLine +BABEL_OP2_202_66916_20121229_211053_inLine +BABEL_OP2_202_66916_20121229_211053_outLine +BABEL_OP2_202_67401_20130912_043928_inLine +BABEL_OP2_202_67401_20130912_043928_outLine +BABEL_OP2_202_67964_20140125_232737_inLine +BABEL_OP2_202_67964_20140125_232737_outLine +BABEL_OP2_202_68289_20131128_012756_inLine +BABEL_OP2_202_68289_20131128_012756_outLine +BABEL_OP2_202_68748_20130803_201133_inLine +BABEL_OP2_202_68748_20130803_201133_outLine +BABEL_OP2_202_68854_20131012_000134_inLine +BABEL_OP2_202_68854_20131012_000134_outLine +BABEL_OP2_202_69937_20140131_034019_inLine +BABEL_OP2_202_69937_20140131_034019_outLine +BABEL_OP2_202_71566_20140311_213752_inLine +BABEL_OP2_202_71566_20140311_213752_outLine +BABEL_OP2_202_71976_20131128_193641_inLine +BABEL_OP2_202_71976_20131128_193641_outLine +BABEL_OP2_202_72324_20130423_161716_inLine +BABEL_OP2_202_72324_20130423_161716_outLine +BABEL_OP2_202_73005_20131012_011254_inLine +BABEL_OP2_202_73005_20131012_011254_outLine +BABEL_OP2_202_73022_20140226_210050_inLine +BABEL_OP2_202_73022_20140226_210050_outLine +BABEL_OP2_202_73518_20140304_001655_inLine +BABEL_OP2_202_73518_20140304_001655_outLine +BABEL_OP2_202_74121_20130220_195721_inLine +BABEL_OP2_202_74121_20130220_195721_outLine +BABEL_OP2_202_74121_20130220_201735_inLine +BABEL_OP2_202_74121_20130220_201735_outLine +BABEL_OP2_202_74667_20130227_180657_inLine +BABEL_OP2_202_74667_20130227_180657_outLine +BABEL_OP2_202_75064_20140226_232411_inLine +BABEL_OP2_202_75064_20140226_232411_outLine +BABEL_OP2_202_75261_20140311_002541_inLine +BABEL_OP2_202_75261_20140311_002541_outLine +BABEL_OP2_202_75812_20131127_193133_inLine +BABEL_OP2_202_75812_20131127_193133_outLine +BABEL_OP2_202_75930_20131202_213433_inLine +BABEL_OP2_202_75930_20131202_213433_outLine +BABEL_OP2_202_76126_20131031_183234_inLine +BABEL_OP2_202_76126_20131031_183234_outLine +BABEL_OP2_202_76444_20131018_000013_inLine +BABEL_OP2_202_76444_20131018_000013_outLine +BABEL_OP2_202_76499_20130412_201900_inLine +BABEL_OP2_202_76499_20130412_201900_outLine +BABEL_OP2_202_77033_20140312_034901_inLine +BABEL_OP2_202_77033_20140312_034901_outLine +BABEL_OP2_202_77146_20121229_203404_inLine +BABEL_OP2_202_77146_20121229_203404_outLine +BABEL_OP2_202_78482_20140311_014827_inLine +BABEL_OP2_202_78482_20140311_014827_outLine +BABEL_OP2_202_79045_20140310_212332_inLine +BABEL_OP2_202_79045_20140310_212332_outLine +BABEL_OP2_202_79367_20130107_224252_inLine +BABEL_OP2_202_79367_20130107_224252_outLine +BABEL_OP2_202_79973_20131130_184708_inLine +BABEL_OP2_202_79973_20131130_184708_outLine +BABEL_OP2_202_79995_20140227_030446_inLine +BABEL_OP2_202_79995_20140227_030446_outLine +BABEL_OP2_202_80134_20131202_174756_inLine +BABEL_OP2_202_80134_20131202_174756_outLine +BABEL_OP2_202_80306_20130928_232209_inLine +BABEL_OP2_202_80306_20130928_232209_outLine +BABEL_OP2_202_80383_20131207_013517_inLine +BABEL_OP2_202_80383_20131207_013517_outLine +BABEL_OP2_202_80989_20131016_213255_inLine +BABEL_OP2_202_80989_20131016_213255_outLine +BABEL_OP2_202_81149_20131010_010411_inLine +BABEL_OP2_202_81149_20131010_010411_outLine +BABEL_OP2_202_81622_20130218_232606_inLine +BABEL_OP2_202_81622_20130218_232606_outLine +BABEL_OP2_202_82123_20131130_004859_inLine +BABEL_OP2_202_82123_20131130_004859_outLine +BABEL_OP2_202_82138_20130415_225929_inLine +BABEL_OP2_202_82138_20130415_225929_outLine +BABEL_OP2_202_82425_20130108_181846_inLine +BABEL_OP2_202_82425_20130108_181846_outLine +BABEL_OP2_202_82473_20131004_202625_inLine +BABEL_OP2_202_82473_20131004_202625_outLine +BABEL_OP2_202_82496_20130105_232830_inLine +BABEL_OP2_202_82496_20130105_232830_outLine +BABEL_OP2_202_82622_20131007_171417_inLine +BABEL_OP2_202_82622_20131007_171417_outLine +BABEL_OP2_202_83609_20131128_022206_inLine +BABEL_OP2_202_83609_20131128_022206_outLine +BABEL_OP2_202_83625_20131130_222251_inLine +BABEL_OP2_202_83625_20131130_222251_outLine +BABEL_OP2_202_83651_20131003_212624_inLine +BABEL_OP2_202_83651_20131003_212624_outLine +BABEL_OP2_202_84077_20131130_195755_inLine +BABEL_OP2_202_84077_20131130_195755_outLine +BABEL_OP2_202_84194_20131130_024921_inLine +BABEL_OP2_202_84194_20131130_024921_outLine +BABEL_OP2_202_84408_20130306_184336_inLine +BABEL_OP2_202_84408_20130306_184336_outLine +BABEL_OP2_202_84466_20131010_040505_inLine +BABEL_OP2_202_84466_20131010_040505_outLine +BABEL_OP2_202_84469_20131018_212735_inLine +BABEL_OP2_202_84469_20131018_212735_outLine +BABEL_OP2_202_84768_20130107_194303_inLine +BABEL_OP2_202_84768_20130107_194303_outLine +BABEL_OP2_202_86156_20131030_001706_inLine +BABEL_OP2_202_86156_20131030_001706_outLine +BABEL_OP2_202_87179_20140320_165556_inLine +BABEL_OP2_202_87179_20140320_165556_outLine +BABEL_OP2_202_87305_20131016_225546_inLine +BABEL_OP2_202_87305_20131016_225546_outLine +BABEL_OP2_202_88776_20130107_192204_inLine +BABEL_OP2_202_88776_20130107_192204_outLine +BABEL_OP2_202_88776_20130107_195623_inLine +BABEL_OP2_202_88776_20130107_195623_outLine +BABEL_OP2_202_88783_20131018_191706_inLine +BABEL_OP2_202_88783_20131018_191706_outLine +BABEL_OP2_202_88865_20140319_212413_inLine +BABEL_OP2_202_88865_20140319_212413_outLine +BABEL_OP2_202_89665_20140320_004314_inLine +BABEL_OP2_202_89665_20140320_004314_outLine +BABEL_OP2_202_89695_20130215_224831_inLine +BABEL_OP2_202_89695_20130215_224831_outLine +BABEL_OP2_202_90347_20130912_005052_inLine +BABEL_OP2_202_90347_20130912_005052_outLine +BABEL_OP2_202_90572_20131009_190400_inLine +BABEL_OP2_202_90572_20131009_190400_outLine +BABEL_OP2_202_90737_20130213_201303_inLine +BABEL_OP2_202_90737_20130213_201303_outLine +BABEL_OP2_202_90739_20130222_223815_inLine +BABEL_OP2_202_90739_20130222_223815_outLine +BABEL_OP2_202_90740_20131120_195825_inLine +BABEL_OP2_202_90740_20131120_195825_outLine +BABEL_OP2_202_91080_20130429_213558_inLine +BABEL_OP2_202_91080_20130429_213558_outLine +BABEL_OP2_202_91478_20131127_031740_inLine +BABEL_OP2_202_91478_20131127_031740_outLine +BABEL_OP2_202_91891_20130803_000104_inLine +BABEL_OP2_202_91891_20130803_000104_outLine +BABEL_OP2_202_92065_20140109_204802_inLine +BABEL_OP2_202_92065_20140109_204802_outLine +BABEL_OP2_202_92440_20131203_195407_inLine +BABEL_OP2_202_92440_20131203_195407_outLine +BABEL_OP2_202_92440_20131203_200046_inLine +BABEL_OP2_202_92440_20131203_200046_outLine +BABEL_OP2_202_92527_20130225_184732_inLine +BABEL_OP2_202_92527_20130225_184732_outLine +BABEL_OP2_202_93153_20131003_212947_inLine +BABEL_OP2_202_93153_20131003_212947_outLine +BABEL_OP2_202_93153_20131003_213722_inLine +BABEL_OP2_202_93153_20131003_213722_outLine +BABEL_OP2_202_93222_20131126_211540_inLine +BABEL_OP2_202_93222_20131126_211540_outLine +BABEL_OP2_202_94333_20130105_202651_inLine +BABEL_OP2_202_94333_20130105_202651_outLine +BABEL_OP2_202_94449_20131011_205657_inLine +BABEL_OP2_202_94449_20131011_205657_outLine +BABEL_OP2_202_94869_20121219_204921_inLine +BABEL_OP2_202_94869_20121219_204921_outLine +BABEL_OP2_202_95077_20140320_014923_inLine +BABEL_OP2_202_95077_20140320_014923_outLine +BABEL_OP2_202_95231_20131128_211454_inLine +BABEL_OP2_202_95231_20131128_211454_outLine +BABEL_OP2_202_96376_20131011_024111_inLine +BABEL_OP2_202_96376_20131011_024111_outLine +BABEL_OP2_202_96680_20131130_202936_inLine +BABEL_OP2_202_96680_20131130_202936_outLine +BABEL_OP2_202_96690_20130220_210217_inLine +BABEL_OP2_202_96690_20130220_210217_outLine +BABEL_OP2_202_96808_20131012_212254_inLine +BABEL_OP2_202_96808_20131012_212254_outLine +BABEL_OP2_202_97220_20140319_193818_inLine +BABEL_OP2_202_97220_20140319_193818_outLine +BABEL_OP2_202_97363_20131002_203133_inLine +BABEL_OP2_202_97363_20131002_203133_outLine +BABEL_OP2_202_97363_20131003_002739_inLine +BABEL_OP2_202_97363_20131003_002739_outLine +BABEL_OP2_202_97373_20130730_151855_inLine +BABEL_OP2_202_99401_20130108_001107_inLine +BABEL_OP2_202_99401_20130108_001107_outLine +BABEL_OP2_202_99594_20130220_222308_inLine +BABEL_OP2_202_99594_20130220_222308_outLine +BABEL_OP2_202_99883_20131120_212150_inLine +BABEL_OP2_202_99883_20131120_212150_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/untranscribed-training.list b/egs/babel/s5d/conf/lists/202-swahili/untranscribed-training.list new file mode 100644 index 00000000000..72047620427 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/untranscribed-training.list @@ -0,0 +1,555 @@ +BABEL_OP2_202_10036_20130313_171555_outLine +BABEL_OP2_202_10058_20131017_230021_outLine +BABEL_OP2_202_10313_20140205_002214_inLine +BABEL_OP2_202_10319_20121229_224454_outLine +BABEL_OP2_202_10411_20140209_011824_inLine +BABEL_OP2_202_10411_20140209_011824_outLine +BABEL_OP2_202_10469_20131130_014924_inLine +BABEL_OP2_202_10638_20140205_005404_inLine +BABEL_OP2_202_10638_20140205_005404_outLine +BABEL_OP2_202_10901_20130913_004135_outLine +BABEL_OP2_202_10938_20130930_020020_inLine +BABEL_OP2_202_11096_20131016_000245_outLine +BABEL_OP2_202_11486_20140206_204420_inLine +BABEL_OP2_202_11486_20140206_204420_outLine +BABEL_OP2_202_11486_20140206_205137_inLine +BABEL_OP2_202_11486_20140206_205137_outLine +BABEL_OP2_202_12242_20140214_231330_inLine +BABEL_OP2_202_12609_20140207_172212_inLine +BABEL_OP2_202_13030_20131218_195618_outLine +BABEL_OP2_202_13324_20131219_001852_inLine +BABEL_OP2_202_13324_20131219_001852_outLine +BABEL_OP2_202_13547_20131127_025355_outLine +BABEL_OP2_202_13776_20140206_201743_inLine +BABEL_OP2_202_14560_20140206_011812_inLine +BABEL_OP2_202_14723_20140205_060355_inLine +BABEL_OP2_202_14723_20140205_060355_outLine +BABEL_OP2_202_15042_20131130_221534_inLine +BABEL_OP2_202_15042_20131130_221534_outLine +BABEL_OP2_202_15216_20140208_175430_inLine +BABEL_OP2_202_15322_20140208_191251_inLine +BABEL_OP2_202_15322_20140208_191251_outLine +BABEL_OP2_202_15466_20131127_213156_inLine +BABEL_OP2_202_15466_20131127_214339_inLine +BABEL_OP2_202_15535_20131001_012120_inLine +BABEL_OP2_202_15749_20140206_024112_inLine +BABEL_OP2_202_15749_20140206_024112_outLine +BABEL_OP2_202_15926_20130925_034742_inLine +BABEL_OP2_202_15926_20130925_034742_outLine +BABEL_OP2_202_15926_20130925_035312_inLine +BABEL_OP2_202_15926_20130925_035312_outLine +BABEL_OP2_202_16800_20131219_012534_outLine +BABEL_OP2_202_17127_20140113_203603_inLine +BABEL_OP2_202_17127_20140113_203603_outLine +BABEL_OP2_202_17165_20130410_211020_outLine +BABEL_OP2_202_17320_20140207_162515_outLine +BABEL_OP2_202_17320_20140207_163148_outLine +BABEL_OP2_202_17420_20131029_235015_inLine +BABEL_OP2_202_17496_20130926_185827_inLine +BABEL_OP2_202_17582_20140208_011506_inLine +BABEL_OP2_202_17913_20131128_031821_outLine +BABEL_OP2_202_17914_20131031_221433_inLine +BABEL_OP2_202_17937_20131220_004727_outLine +BABEL_OP2_202_18033_20131218_020549_inLine +BABEL_OP2_202_18033_20131218_020549_outLine +BABEL_OP2_202_18037_20140205_003923_inLine +BABEL_OP2_202_18037_20140205_003923_outLine +BABEL_OP2_202_18280_20140205_025345_inLine +BABEL_OP2_202_18280_20140205_025345_outLine +BABEL_OP2_202_18297_20131012_004111_inLine +BABEL_OP2_202_18297_20131012_004111_outLine +BABEL_OP2_202_18370_20140205_033926_inLine +BABEL_OP2_202_18370_20140205_033926_outLine +BABEL_OP2_202_18863_20131130_020252_inLine +BABEL_OP2_202_18863_20131130_032443_inLine +BABEL_OP2_202_19703_20131218_015339_inLine +BABEL_OP2_202_19773_20131220_220513_inLine +BABEL_OP2_202_19773_20131220_220513_outLine +BABEL_OP2_202_19782_20131102_001852_inLine +BABEL_OP2_202_20330_20140109_172943_outLine +BABEL_OP2_202_20330_20140109_174004_outLine +BABEL_OP2_202_20367_20140207_065137_outLine +BABEL_OP2_202_20454_20131217_005702_outLine +BABEL_OP2_202_20724_20131218_014801_outLine +BABEL_OP2_202_20800_20130109_234836_outLine +BABEL_OP2_202_20922_20140108_231607_outLine +BABEL_OP2_202_21004_20131017_221746_inLine +BABEL_OP2_202_21159_20140205_213005_inLine +BABEL_OP2_202_21244_20131015_194634_outLine +BABEL_OP2_202_21315_20140206_175302_inLine +BABEL_OP2_202_21327_20140206_001641_inLine +BABEL_OP2_202_21426_20140204_235517_inLine +BABEL_OP2_202_21426_20140204_235517_outLine +BABEL_OP2_202_22034_20140130_232345_inLine +BABEL_OP2_202_22034_20140130_234608_inLine +BABEL_OP2_202_22170_20131009_031606_inLine +BABEL_OP2_202_22288_20131212_003625_inLine +BABEL_OP2_202_22612_20131101_182509_inLine +BABEL_OP2_202_22965_20140214_212302_inLine +BABEL_OP2_202_22965_20140214_212302_outLine +BABEL_OP2_202_23046_20140207_214018_inLine +BABEL_OP2_202_23046_20140207_214018_outLine +BABEL_OP2_202_23196_20131130_210710_inLine +BABEL_OP2_202_23196_20131130_210710_outLine +BABEL_OP2_202_23239_20130923_232142_inLine +BABEL_OP2_202_23239_20130923_232142_outLine +BABEL_OP2_202_23681_20131016_231325_outLine +BABEL_OP2_202_23752_20140204_225435_inLine +BABEL_OP2_202_23752_20140204_225435_outLine +BABEL_OP2_202_24017_20131220_000437_outLine +BABEL_OP2_202_24209_20131203_184510_outLine +BABEL_OP2_202_24253_20131010_013952_outLine +BABEL_OP2_202_24587_20131127_230044_inLine +BABEL_OP2_202_24587_20131127_230044_outLine +BABEL_OP2_202_24648_20131128_030622_inLine +BABEL_OP2_202_25015_20140207_234017_inLine +BABEL_OP2_202_25198_20140219_203259_inLine +BABEL_OP2_202_25895_20131203_201422_inLine +BABEL_OP2_202_25895_20131203_202401_inLine +BABEL_OP2_202_26072_20140114_031432_inLine +BABEL_OP2_202_26072_20140114_031432_outLine +BABEL_OP2_202_26388_20131217_235617_outLine +BABEL_OP2_202_26478_20140207_223256_outLine +BABEL_OP2_202_26574_20140205_203902_inLine +BABEL_OP2_202_26574_20140205_203902_outLine +BABEL_OP2_202_26574_20140205_205040_inLine +BABEL_OP2_202_26574_20140205_205040_outLine +BABEL_OP2_202_27014_20140109_225600_outLine +BABEL_OP2_202_27014_20140109_231225_outLine +BABEL_OP2_202_27218_20131003_212404_inLine +BABEL_OP2_202_27841_20140206_002202_inLine +BABEL_OP2_202_27841_20140206_002202_outLine +BABEL_OP2_202_28280_20140205_194444_outLine +BABEL_OP2_202_28419_20140205_193403_inLine +BABEL_OP2_202_29023_20131219_010409_outLine +BABEL_OP2_202_29230_20140207_212300_inLine +BABEL_OP2_202_29323_20131031_234945_inLine +BABEL_OP2_202_29323_20131101_000454_inLine +BABEL_OP2_202_29563_20131212_060621_inLine +BABEL_OP2_202_29563_20131212_060621_outLine +BABEL_OP2_202_29746_20131217_001441_inLine +BABEL_OP2_202_29765_20140209_221538_inLine +BABEL_OP2_202_29765_20140209_221538_outLine +BABEL_OP2_202_29911_20131127_184715_outLine +BABEL_OP2_202_30345_20131220_003550_outLine +BABEL_OP2_202_30576_20131017_012418_inLine +BABEL_OP2_202_30653_20131015_175341_inLine +BABEL_OP2_202_30653_20131015_175341_outLine +BABEL_OP2_202_30974_20140219_013521_inLine +BABEL_OP2_202_31267_20140227_054848_inLine +BABEL_OP2_202_31267_20140227_054848_outLine +BABEL_OP2_202_31346_20131019_000000_outLine +BABEL_OP2_202_31346_20131101_003311_outLine +BABEL_OP2_202_31490_20130109_003835_inLine +BABEL_OP2_202_31490_20130109_005722_inLine +BABEL_OP2_202_31500_20131217_015833_inLine +BABEL_OP2_202_31500_20131217_015833_outLine +BABEL_OP2_202_31583_20131219_213900_outLine +BABEL_OP2_202_31628_20130921_212411_inLine +BABEL_OP2_202_31728_20131129_044747_inLine +BABEL_OP2_202_31728_20131129_044747_outLine +BABEL_OP2_202_32048_20131219_013244_inLine +BABEL_OP2_202_32328_20131030_210553_inLine +BABEL_OP2_202_32380_20131130_215631_outLine +BABEL_OP2_202_32872_20131016_190028_inLine +BABEL_OP2_202_32872_20131016_190028_outLine +BABEL_OP2_202_33149_20140225_001335_inLine +BABEL_OP2_202_33149_20140225_001335_outLine +BABEL_OP2_202_33229_20131218_213456_outLine +BABEL_OP2_202_33659_20140225_233435_outLine +BABEL_OP2_202_33800_20131126_211758_inLine +BABEL_OP2_202_33800_20131126_211758_outLine +BABEL_OP2_202_33806_20140224_202859_inLine +BABEL_OP2_202_33806_20140224_202859_outLine +BABEL_OP2_202_33840_20131101_001620_outLine +BABEL_OP2_202_33951_20130426_182755_outLine +BABEL_OP2_202_33951_20130426_184040_outLine +BABEL_OP2_202_33992_20131015_184831_inLine +BABEL_OP2_202_33992_20131015_184831_outLine +BABEL_OP2_202_34019_20140224_193636_outLine +BABEL_OP2_202_34106_20121227_222718_inLine +BABEL_OP2_202_34629_20131130_211927_outLine +BABEL_OP2_202_34713_20140225_014158_inLine +BABEL_OP2_202_34713_20140225_014158_outLine +BABEL_OP2_202_35008_20130722_185829_inLine +BABEL_OP2_202_35008_20130722_191623_inLine +BABEL_OP2_202_35143_20131018_192106_inLine +BABEL_OP2_202_35202_20130801_172530_inLine +BABEL_OP2_202_35202_20130801_195816_inLine +BABEL_OP2_202_35786_20131015_181857_inLine +BABEL_OP2_202_35786_20131015_181857_outLine +BABEL_OP2_202_36364_20131130_021940_inLine +BABEL_OP2_202_36669_20130213_192457_inLine +BABEL_OP2_202_37064_20131002_185856_inLine +BABEL_OP2_202_37064_20131002_185856_outLine +BABEL_OP2_202_37229_20140114_233648_outLine +BABEL_OP2_202_37271_20140114_192528_inLine +BABEL_OP2_202_37271_20140114_192528_outLine +BABEL_OP2_202_37499_20131016_183113_inLine +BABEL_OP2_202_37499_20131016_183113_outLine +BABEL_OP2_202_37684_20131203_022005_inLine +BABEL_OP2_202_37684_20131203_024603_inLine +BABEL_OP2_202_37776_20140115_213234_outLine +BABEL_OP2_202_37853_20131101_233956_inLine +BABEL_OP2_202_37853_20131101_233956_outLine +BABEL_OP2_202_37853_20131101_235036_inLine +BABEL_OP2_202_37853_20131101_235036_outLine +BABEL_OP2_202_38323_20140114_202816_inLine +BABEL_OP2_202_38750_20131018_005908_outLine +BABEL_OP2_202_39555_20140114_200302_outLine +BABEL_OP2_202_39579_20140115_214035_outLine +BABEL_OP2_202_39638_20131130_231218_inLine +BABEL_OP2_202_39680_20140115_212202_inLine +BABEL_OP2_202_39680_20140115_212202_outLine +BABEL_OP2_202_39920_20131208_045704_inLine +BABEL_OP2_202_39920_20131208_051609_inLine +BABEL_OP2_202_40648_20131215_203941_outLine +BABEL_OP2_202_40648_20131215_205022_outLine +BABEL_OP2_202_40686_20140114_232425_outLine +BABEL_OP2_202_40686_20140114_233413_outLine +BABEL_OP2_202_40740_20140114_212913_outLine +BABEL_OP2_202_40740_20140114_221533_outLine +BABEL_OP2_202_40939_20140115_005331_inLine +BABEL_OP2_202_40939_20140115_005331_outLine +BABEL_OP2_202_41073_20140114_201346_inLine +BABEL_OP2_202_41073_20140114_201346_outLine +BABEL_OP2_202_41174_20130222_214400_outLine +BABEL_OP2_202_41233_20131127_035936_inLine +BABEL_OP2_202_41233_20131127_035936_outLine +BABEL_OP2_202_41592_20130927_203118_inLine +BABEL_OP2_202_41745_20130222_224352_outLine +BABEL_OP2_202_41745_20130222_225523_outLine +BABEL_OP2_202_41745_20130226_220300_outLine +BABEL_OP2_202_41890_20131011_232931_inLine +BABEL_OP2_202_41890_20131011_232931_outLine +BABEL_OP2_202_41890_20131011_235301_inLine +BABEL_OP2_202_41890_20131011_235301_outLine +BABEL_OP2_202_41958_20131001_185053_inLine +BABEL_OP2_202_42146_20131011_232931_inLine +BABEL_OP2_202_42146_20131011_232931_outLine +BABEL_OP2_202_42146_20131011_233957_inLine +BABEL_OP2_202_42146_20131011_233957_outLine +BABEL_OP2_202_42231_20140224_221548_inLine +BABEL_OP2_202_42299_20140220_233422_outLine +BABEL_OP2_202_42526_20140228_035815_outLine +BABEL_OP2_202_42600_20131001_200025_outLine +BABEL_OP2_202_43115_20131012_005141_inLine +BABEL_OP2_202_43115_20131012_005141_outLine +BABEL_OP2_202_43323_20131129_040506_outLine +BABEL_OP2_202_43784_20131003_005323_inLine +BABEL_OP2_202_44029_20140224_224653_outLine +BABEL_OP2_202_44290_20131013_001608_inLine +BABEL_OP2_202_44290_20131013_001608_outLine +BABEL_OP2_202_44446_20131130_014441_inLine +BABEL_OP2_202_44868_20130806_210559_inLine +BABEL_OP2_202_45486_20140224_210341_outLine +BABEL_OP2_202_45559_20131016_215852_inLine +BABEL_OP2_202_45559_20131016_215852_outLine +BABEL_OP2_202_46268_20130107_230757_inLine +BABEL_OP2_202_46389_20131216_020541_inLine +BABEL_OP2_202_46389_20131216_020541_outLine +BABEL_OP2_202_46763_20131009_191902_inLine +BABEL_OP2_202_46763_20131009_191902_outLine +BABEL_OP2_202_46808_20140224_220014_inLine +BABEL_OP2_202_46808_20140224_220014_outLine +BABEL_OP2_202_47270_20140225_015137_inLine +BABEL_OP2_202_47487_20140224_232210_outLine +BABEL_OP2_202_47637_20140224_183628_inLine +BABEL_OP2_202_47637_20140224_183628_outLine +BABEL_OP2_202_47882_20131128_015709_outLine +BABEL_OP2_202_47923_20131129_211629_inLine +BABEL_OP2_202_47923_20131129_211629_outLine +BABEL_OP2_202_48200_20131128_211840_inLine +BABEL_OP2_202_49001_20131003_151102_inLine +BABEL_OP2_202_49001_20131003_151102_outLine +BABEL_OP2_202_49027_20131012_171107_outLine +BABEL_OP2_202_49118_20140223_214255_inLine +BABEL_OP2_202_49118_20140223_214255_outLine +BABEL_OP2_202_49216_20121227_221242_inLine +BABEL_OP2_202_49216_20121227_233227_inLine +BABEL_OP2_202_49330_20131130_015311_inLine +BABEL_OP2_202_49330_20131130_015311_outLine +BABEL_OP2_202_49502_20121227_234825_outLine +BABEL_OP2_202_49812_20140224_225827_inLine +BABEL_OP2_202_49812_20140224_225827_outLine +BABEL_OP2_202_49870_20140224_214828_inLine +BABEL_OP2_202_49870_20140224_214828_outLine +BABEL_OP2_202_49912_20140224_190150_inLine +BABEL_OP2_202_49912_20140224_190150_outLine +BABEL_OP2_202_50549_20140122_212548_inLine +BABEL_OP2_202_50549_20140122_212548_outLine +BABEL_OP2_202_50940_20131128_044329_outLine +BABEL_OP2_202_51015_20130919_230711_outLine +BABEL_OP2_202_51693_20140122_221655_outLine +BABEL_OP2_202_51955_20131004_201017_inLine +BABEL_OP2_202_51955_20131004_201017_outLine +BABEL_OP2_202_52322_20140122_020749_outLine +BABEL_OP2_202_52322_20140122_022032_outLine +BABEL_OP2_202_52438_20131002_193009_inLine +BABEL_OP2_202_52499_20131216_043452_inLine +BABEL_OP2_202_52694_20140123_012847_inLine +BABEL_OP2_202_52694_20140123_012847_outLine +BABEL_OP2_202_52803_20131129_235234_inLine +BABEL_OP2_202_52803_20131129_235234_outLine +BABEL_OP2_202_53010_20131129_193814_inLine +BABEL_OP2_202_53010_20131129_193814_outLine +BABEL_OP2_202_53144_20140123_025818_inLine +BABEL_OP2_202_53144_20140123_025818_outLine +BABEL_OP2_202_53415_20131101_205155_inLine +BABEL_OP2_202_54040_20140124_235842_inLine +BABEL_OP2_202_54040_20140124_235842_outLine +BABEL_OP2_202_54040_20140125_000629_inLine +BABEL_OP2_202_54040_20140125_000629_outLine +BABEL_OP2_202_54104_20130108_184048_inLine +BABEL_OP2_202_54390_20140123_021824_inLine +BABEL_OP2_202_54477_20140121_195829_inLine +BABEL_OP2_202_54477_20140121_195829_outLine +BABEL_OP2_202_54567_20130215_201456_outLine +BABEL_OP2_202_54697_20140124_011928_inLine +BABEL_OP2_202_54827_20131012_020910_inLine +BABEL_OP2_202_55136_20131120_230735_inLine +BABEL_OP2_202_55136_20131120_230735_outLine +BABEL_OP2_202_55136_20131120_231613_inLine +BABEL_OP2_202_55136_20131120_231613_outLine +BABEL_OP2_202_55267_20130429_211135_inLine +BABEL_OP2_202_55968_20121221_210945_outLine +BABEL_OP2_202_55968_20121222_190905_outLine +BABEL_OP2_202_56019_20140122_235300_inLine +BABEL_OP2_202_56019_20140122_235300_outLine +BABEL_OP2_202_56057_20131217_015911_outLine +BABEL_OP2_202_56076_20131012_005019_inLine +BABEL_OP2_202_56076_20131012_005019_outLine +BABEL_OP2_202_56307_20130925_024659_outLine +BABEL_OP2_202_56326_20131129_235243_inLine +BABEL_OP2_202_56427_20140115_215916_inLine +BABEL_OP2_202_56427_20140115_215916_outLine +BABEL_OP2_202_56468_20140125_021443_inLine +BABEL_OP2_202_56468_20140125_021443_outLine +BABEL_OP2_202_56684_20140122_005322_inLine +BABEL_OP2_202_56684_20140122_005322_outLine +BABEL_OP2_202_56925_20131215_232111_outLine +BABEL_OP2_202_57035_20131218_025223_inLine +BABEL_OP2_202_57035_20131218_025223_outLine +BABEL_OP2_202_57116_20121220_184622_outLine +BABEL_OP2_202_57219_20140128_183934_inLine +BABEL_OP2_202_57219_20140128_183934_outLine +BABEL_OP2_202_57464_20131012_002232_inLine +BABEL_OP2_202_57566_20140130_012514_inLine +BABEL_OP2_202_57566_20140130_012514_outLine +BABEL_OP2_202_57654_20140115_204117_inLine +BABEL_OP2_202_57654_20140115_204117_outLine +BABEL_OP2_202_57982_20131010_025934_outLine +BABEL_OP2_202_58026_20140129_211720_inLine +BABEL_OP2_202_58026_20140129_211720_outLine +BABEL_OP2_202_58585_20140201_230856_inLine +BABEL_OP2_202_58915_20140129_222116_outLine +BABEL_OP2_202_59078_20130911_214801_outLine +BABEL_OP2_202_59163_20131212_044108_outLine +BABEL_OP2_202_59291_20140201_004317_inLine +BABEL_OP2_202_59301_20140125_202811_inLine +BABEL_OP2_202_59301_20140125_202811_outLine +BABEL_OP2_202_59864_20140131_204919_inLine +BABEL_OP2_202_59993_20140114_021749_inLine +BABEL_OP2_202_59993_20140114_021749_outLine +BABEL_OP2_202_60299_20140131_075856_inLine +BABEL_OP2_202_60299_20140131_075856_outLine +BABEL_OP2_202_60307_20140131_184522_inLine +BABEL_OP2_202_60307_20140131_184522_outLine +BABEL_OP2_202_60397_20131202_205856_inLine +BABEL_OP2_202_60397_20131202_205856_outLine +BABEL_OP2_202_60458_20140130_223733_inLine +BABEL_OP2_202_60458_20140130_223733_outLine +BABEL_OP2_202_60706_20121228_005527_outLine +BABEL_OP2_202_61190_20131002_225904_outLine +BABEL_OP2_202_61219_20140120_234802_inLine +BABEL_OP2_202_61219_20140120_234802_outLine +BABEL_OP2_202_61684_20131130_211629_inLine +BABEL_OP2_202_61731_20131003_043735_outLine +BABEL_OP2_202_61971_20131010_034223_inLine +BABEL_OP2_202_62286_20130221_203131_outLine +BABEL_OP2_202_62362_20140201_220857_inLine +BABEL_OP2_202_62362_20140201_220857_outLine +BABEL_OP2_202_62471_20131203_193149_inLine +BABEL_OP2_202_62471_20131203_193149_outLine +BABEL_OP2_202_62491_20140131_223205_inLine +BABEL_OP2_202_62491_20140131_223205_outLine +BABEL_OP2_202_62656_20131203_001914_inLine +BABEL_OP2_202_62656_20131203_001914_outLine +BABEL_OP2_202_62724_20131017_221403_inLine +BABEL_OP2_202_63081_20121228_003935_inLine +BABEL_OP2_202_63094_20131016_235150_inLine +BABEL_OP2_202_63094_20131016_235150_outLine +BABEL_OP2_202_63265_20131216_224818_outLine +BABEL_OP2_202_63265_20131216_232337_outLine +BABEL_OP2_202_63334_20131128_190201_outLine +BABEL_OP2_202_63445_20121229_001208_outLine +BABEL_OP2_202_63671_20131213_034007_inLine +BABEL_OP2_202_63671_20131213_034007_outLine +BABEL_OP2_202_63766_20131217_022038_outLine +BABEL_OP2_202_64014_20140130_213507_inLine +BABEL_OP2_202_64014_20140130_213507_outLine +BABEL_OP2_202_64469_20131216_032049_outLine +BABEL_OP2_202_64635_20131129_013800_inLine +BABEL_OP2_202_64635_20131129_013800_outLine +BABEL_OP2_202_65252_20131016_183445_inLine +BABEL_OP2_202_65252_20131016_183445_outLine +BABEL_OP2_202_65268_20131220_020108_inLine +BABEL_OP2_202_65268_20131220_020108_outLine +BABEL_OP2_202_65268_20131220_021438_inLine +BABEL_OP2_202_65268_20131220_021438_outLine +BABEL_OP2_202_65367_20140129_234024_inLine +BABEL_OP2_202_65367_20140129_234024_outLine +BABEL_OP2_202_65370_20140201_195241_inLine +BABEL_OP2_202_65370_20140201_195241_outLine +BABEL_OP2_202_67066_20140130_212058_inLine +BABEL_OP2_202_67066_20140130_212058_outLine +BABEL_OP2_202_67213_20140130_185616_outLine +BABEL_OP2_202_67304_20140201_230632_inLine +BABEL_OP2_202_67304_20140201_230632_outLine +BABEL_OP2_202_67592_20140129_234213_inLine +BABEL_OP2_202_67659_20140115_234146_outLine +BABEL_OP2_202_67773_20140129_215114_inLine +BABEL_OP2_202_67773_20140129_215114_outLine +BABEL_OP2_202_67894_20140130_232658_inLine +BABEL_OP2_202_67894_20140130_232658_outLine +BABEL_OP2_202_68068_20130802_203147_outLine +BABEL_OP2_202_68385_20130208_214719_outLine +BABEL_OP2_202_68668_20140131_221117_inLine +BABEL_OP2_202_68668_20140131_221117_outLine +BABEL_OP2_202_68823_20131215_001456_outLine +BABEL_OP2_202_68910_20140127_211718_inLine +BABEL_OP2_202_68910_20140127_211718_outLine +BABEL_OP2_202_69096_20140312_022044_inLine +BABEL_OP2_202_69096_20140312_022044_outLine +BABEL_OP2_202_69153_20131128_194250_outLine +BABEL_OP2_202_69474_20130731_011215_inLine +BABEL_OP2_202_69474_20130731_012232_inLine +BABEL_OP2_202_69746_20140125_215609_inLine +BABEL_OP2_202_69746_20140125_215609_outLine +BABEL_OP2_202_69982_20140131_212729_inLine +BABEL_OP2_202_69982_20140131_212729_outLine +BABEL_OP2_202_69982_20140131_213451_inLine +BABEL_OP2_202_69982_20140131_213451_outLine +BABEL_OP2_202_70121_20130308_180634_inLine +BABEL_OP2_202_70221_20130429_160925_outLine +BABEL_OP2_202_70293_20131218_043924_inLine +BABEL_OP2_202_70293_20131218_043924_outLine +BABEL_OP2_202_70343_20130730_000937_outLine +BABEL_OP2_202_70452_20140115_230438_inLine +BABEL_OP2_202_70460_20131101_001026_inLine +BABEL_OP2_202_70460_20131101_001026_outLine +BABEL_OP2_202_70726_20131216_011153_inLine +BABEL_OP2_202_70726_20131216_011153_outLine +BABEL_OP2_202_71121_20131208_073117_outLine +BABEL_OP2_202_71404_20131004_002732_inLine +BABEL_OP2_202_72073_20131213_042304_inLine +BABEL_OP2_202_72349_20131128_183232_inLine +BABEL_OP2_202_72349_20131128_185336_inLine +BABEL_OP2_202_72844_20121222_003955_outLine +BABEL_OP2_202_73119_20140116_003550_outLine +BABEL_OP2_202_73119_20140120_205305_outLine +BABEL_OP2_202_73299_20131017_003841_outLine +BABEL_OP2_202_73909_20140303_224953_outLine +BABEL_OP2_202_74799_20130911_223303_outLine +BABEL_OP2_202_75342_20130731_021621_outLine +BABEL_OP2_202_75505_20121222_213031_inLine +BABEL_OP2_202_76437_20121219_005936_outLine +BABEL_OP2_202_76730_20131217_052627_outLine +BABEL_OP2_202_77112_20131003_031801_inLine +BABEL_OP2_202_77391_20140121_013824_inLine +BABEL_OP2_202_77391_20140121_013824_outLine +BABEL_OP2_202_77567_20121228_181102_inLine +BABEL_OP2_202_77803_20121222_202737_inLine +BABEL_OP2_202_77803_20121222_215157_inLine +BABEL_OP2_202_77904_20131221_020558_inLine +BABEL_OP2_202_77904_20131221_020558_outLine +BABEL_OP2_202_77909_20140126_064115_inLine +BABEL_OP2_202_77909_20140126_064115_outLine +BABEL_OP2_202_77921_20131127_232806_inLine +BABEL_OP2_202_77921_20131127_232806_outLine +BABEL_OP2_202_77921_20131127_234200_inLine +BABEL_OP2_202_77921_20131127_234200_outLine +BABEL_OP2_202_78016_20140226_215807_outLine +BABEL_OP2_202_78398_20131004_061913_outLine +BABEL_OP2_202_78511_20140226_231944_inLine +BABEL_OP2_202_78630_20140113_211239_inLine +BABEL_OP2_202_78630_20140113_211239_outLine +BABEL_OP2_202_78630_20140113_212040_inLine +BABEL_OP2_202_78630_20140113_212040_outLine +BABEL_OP2_202_78829_20131126_221958_outLine +BABEL_OP2_202_78833_20131119_205910_inLine +BABEL_OP2_202_78943_20140121_025623_inLine +BABEL_OP2_202_78943_20140121_025623_outLine +BABEL_OP2_202_78958_20131207_004716_inLine +BABEL_OP2_202_78976_20140115_235057_inLine +BABEL_OP2_202_78976_20140115_235057_outLine +BABEL_OP2_202_79139_20130928_173217_inLine +BABEL_OP2_202_79190_20130927_184727_outLine +BABEL_OP2_202_79451_20131004_055308_outLine +BABEL_OP2_202_80439_20131001_190050_outLine +BABEL_OP2_202_80559_20131007_063834_outLine +BABEL_OP2_202_80655_20131016_212951_inLine +BABEL_OP2_202_81213_20131004_205633_inLine +BABEL_OP2_202_81213_20131004_210252_inLine +BABEL_OP2_202_81287_20130731_193240_outLine +BABEL_OP2_202_81287_20130731_195716_outLine +BABEL_OP2_202_81392_20130728_234236_inLine +BABEL_OP2_202_81392_20130729_021638_inLine +BABEL_OP2_202_81404_20130314_220702_inLine +BABEL_OP2_202_81769_20131127_214614_outLine +BABEL_OP2_202_82742_20131029_223343_inLine +BABEL_OP2_202_83545_20131017_183706_inLine +BABEL_OP2_202_83783_20130911_225559_outLine +BABEL_OP2_202_84055_20131208_040856_outLine +BABEL_OP2_202_84430_20131217_023038_inLine +BABEL_OP2_202_84430_20131217_024752_inLine +BABEL_OP2_202_84467_20131126_224903_inLine +BABEL_OP2_202_84467_20131126_224903_outLine +BABEL_OP2_202_85010_20131201_004538_outLine +BABEL_OP2_202_85340_20131002_202217_outLine +BABEL_OP2_202_85647_20130416_223722_inLine +BABEL_OP2_202_86191_20140121_192414_inLine +BABEL_OP2_202_86191_20140121_192414_outLine +BABEL_OP2_202_86321_20131018_003746_inLine +BABEL_OP2_202_86676_20130802_223159_outLine +BABEL_OP2_202_86676_20130802_225309_outLine +BABEL_OP2_202_86722_20131001_193946_inLine +BABEL_OP2_202_86845_20131126_204553_outLine +BABEL_OP2_202_86845_20131126_210711_outLine +BABEL_OP2_202_86878_20131220_215841_inLine +BABEL_OP2_202_87693_20131004_231549_outLine +BABEL_OP2_202_87884_20131017_214906_inLine +BABEL_OP2_202_88982_20130930_042104_outLine +BABEL_OP2_202_89516_20131208_025053_inLine +BABEL_OP2_202_89516_20131208_025053_outLine +BABEL_OP2_202_89943_20131003_153927_outLine +BABEL_OP2_202_90318_20131215_222302_outLine +BABEL_OP2_202_91266_20131127_021953_inLine +BABEL_OP2_202_91266_20131127_021953_outLine +BABEL_OP2_202_92060_20131011_235309_outLine +BABEL_OP2_202_92176_20130319_022508_inLine +BABEL_OP2_202_92886_20131004_210342_inLine +BABEL_OP2_202_93443_20131129_212311_inLine +BABEL_OP2_202_93443_20131129_212311_outLine +BABEL_OP2_202_94025_20130801_210343_outLine +BABEL_OP2_202_94253_20131004_010116_outLine +BABEL_OP2_202_94316_20131017_194727_inLine +BABEL_OP2_202_94891_20131126_173659_inLine +BABEL_OP2_202_94891_20131126_173659_outLine +BABEL_OP2_202_95399_20130211_220740_inLine +BABEL_OP2_202_95399_20130211_230605_inLine +BABEL_OP2_202_95399_20130211_232555_inLine +BABEL_OP2_202_95937_20131217_005609_inLine +BABEL_OP2_202_96504_20140125_035346_inLine +BABEL_OP2_202_96504_20140125_035346_outLine +BABEL_OP2_202_96525_20131018_225425_inLine +BABEL_OP2_202_96525_20131018_225425_outLine +BABEL_OP2_202_96525_20131018_230802_inLine +BABEL_OP2_202_96525_20131018_230802_outLine +BABEL_OP2_202_97063_20131128_231626_outLine +BABEL_OP2_202_97461_20130928_010334_outLine +BABEL_OP2_202_97836_20131009_221934_outLine +BABEL_OP2_202_97925_20131203_210706_outLine +BABEL_OP2_202_98678_20131010_023001_outLine +BABEL_OP2_202_99732_20131126_215915_inLine +BABEL_OP2_202_99732_20131126_215915_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/dev.list b/egs/babel/s5d/conf/lists/203-lao/dev.list new file mode 100644 index 00000000000..3a31f075909 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/dev.list @@ -0,0 +1,131 @@ +BABEL_OP1_203_10188_20130220_225432_inLine +BABEL_OP1_203_10188_20130220_225432_outLine +BABEL_OP1_203_10188_20130220_230849_inLine +BABEL_OP1_203_10188_20130220_230849_outLine +BABEL_OP1_203_10319_20130314_213741_inLine +BABEL_OP1_203_10319_20130314_213741_outLine +BABEL_OP1_203_10319_20130314_214749_inLine +BABEL_OP1_203_10319_20130314_214749_outLine +BABEL_OP1_203_14158_20130409_181505_inLine +BABEL_OP1_203_14158_20130409_181505_outLine +BABEL_OP1_203_14158_20130409_182411_inLine +BABEL_OP1_203_14158_20130409_182411_outLine +BABEL_OP1_203_14158_20130409_183108_inLine +BABEL_OP1_203_14158_20130409_183108_outLine +BABEL_OP1_203_14228_20130405_154037_inLine +BABEL_OP1_203_14228_20130405_154037_outLine +BABEL_OP1_203_14228_20130405_163836_inLine +BABEL_OP1_203_14228_20130405_163836_outLine +BABEL_OP1_203_14440_20130509_205709_inLine +BABEL_OP1_203_14440_20130509_205709_outLine +BABEL_OP1_203_15042_20130727_173946_inLine +BABEL_OP1_203_15042_20130727_173946_outLine +BABEL_OP1_203_16800_20130421_140442_inLine +BABEL_OP1_203_16800_20130421_140442_outLine +BABEL_OP1_203_17127_20130421_131732_inLine +BABEL_OP1_203_17127_20130421_131732_outLine +BABEL_OP1_203_17127_20130421_132248_inLine +BABEL_OP1_203_17127_20130421_132248_outLine +BABEL_OP1_203_17573_20130331_192906_inLine +BABEL_OP1_203_17573_20130331_192906_outLine +BABEL_OP1_203_17890_20130329_160302_inLine +BABEL_OP1_203_17890_20130329_160302_outLine +BABEL_OP1_203_19621_20130330_192114_inLine +BABEL_OP1_203_19621_20130330_192114_outLine +BABEL_OP1_203_19663_20130322_163118_inLine +BABEL_OP1_203_19663_20130322_163118_outLine +BABEL_OP1_203_19672_20130401_204303_inLine +BABEL_OP1_203_19672_20130401_204303_outLine +BABEL_OP1_203_21581_20130327_180143_inLine +BABEL_OP1_203_21581_20130327_180143_outLine +BABEL_OP1_203_22170_20130424_213413_inLine +BABEL_OP1_203_22170_20130424_213413_outLine +BABEL_OP1_203_22216_20130307_190055_inLine +BABEL_OP1_203_22216_20130307_190055_outLine +BABEL_OP1_203_22466_20130218_191925_inLine +BABEL_OP1_203_22466_20130218_191925_outLine +BABEL_OP1_203_23151_20130408_192838_inLine +BABEL_OP1_203_23151_20130408_192838_outLine +BABEL_OP1_203_23260_20130726_170748_inLine +BABEL_OP1_203_23260_20130726_170748_outLine +BABEL_OP1_203_23681_20130730_162132_inLine +BABEL_OP1_203_23681_20130730_162132_outLine +BABEL_OP1_203_23995_20130731_195202_inLine +BABEL_OP1_203_23995_20130731_195202_outLine +BABEL_OP1_203_25012_20130814_141020_inLine +BABEL_OP1_203_25012_20130814_141020_outLine +BABEL_OP1_203_26206_20130328_193450_inLine +BABEL_OP1_203_26206_20130328_193450_outLine +BABEL_OP1_203_29208_20130320_141202_inLine +BABEL_OP1_203_29208_20130320_141202_outLine +BABEL_OP1_203_29765_20130426_185032_inLine +BABEL_OP1_203_29765_20130426_185032_outLine +BABEL_OP1_203_31484_20130404_184608_inLine +BABEL_OP1_203_31484_20130404_184608_outLine +BABEL_OP1_203_32861_20130424_133938_inLine +BABEL_OP1_203_32861_20130424_133938_outLine +BABEL_OP1_203_32959_20130406_145730_inLine +BABEL_OP1_203_32959_20130406_145730_outLine +BABEL_OP1_203_37499_20130512_203148_inLine +BABEL_OP1_203_37499_20130512_203148_outLine +BABEL_OP1_203_39744_20130307_140614_inLine +BABEL_OP1_203_39744_20130307_140614_outLine +BABEL_OP1_203_41400_20130728_194416_inLine +BABEL_OP1_203_41400_20130728_194416_outLine +BABEL_OP1_203_41920_20130310_185621_inLine +BABEL_OP1_203_41920_20130310_185621_outLine +BABEL_OP1_203_48789_20130324_180810_inLine +BABEL_OP1_203_48789_20130324_180810_outLine +BABEL_OP1_203_50565_20130307_164552_inLine +BABEL_OP1_203_50565_20130307_164552_outLine +BABEL_OP1_203_52025_20130306_143713_inLine +BABEL_OP1_203_52025_20130306_143713_outLine +BABEL_OP1_203_52725_20130410_214000_inLine +BABEL_OP1_203_52725_20130410_214000_outLine +BABEL_OP1_203_52932_20130314_203215_inLine +BABEL_OP1_203_52932_20130314_203215_outLine +BABEL_OP1_203_56090_20130304_141755_inLine +BABEL_OP1_203_56090_20130304_141755_outLine +BABEL_OP1_203_56429_20130313_200952_inLine +BABEL_OP1_203_56429_20130313_200952_outLine +BABEL_OP1_203_56743_20130319_152822_inLine +BABEL_OP1_203_56743_20130319_152822_outLine +BABEL_OP1_203_57609_20130330_155903_inLine +BABEL_OP1_203_57609_20130330_155903_outLine +BABEL_OP1_203_58717_20130505_152817_inLine +BABEL_OP1_203_58717_20130505_152817_outLine +BABEL_OP1_203_58734_20130309_204100_inLine +BABEL_OP1_203_60538_20130311_163456_inLine +BABEL_OP1_203_60538_20130311_163456_outLine +BABEL_OP1_203_60836_20130314_211014_inLine +BABEL_OP1_203_60836_20130314_211014_outLine +BABEL_OP1_203_61963_20130718_155107_inLine +BABEL_OP1_203_61963_20130718_155107_outLine +BABEL_OP1_203_62155_20130426_173905_inLine +BABEL_OP1_203_62155_20130426_173905_outLine +BABEL_OP1_203_65252_20130731_170815_inLine +BABEL_OP1_203_65252_20130731_170815_outLine +BABEL_OP1_203_66026_20130331_154806_inLine +BABEL_OP1_203_66026_20130331_154806_outLine +BABEL_OP1_203_67842_20130313_142229_inLine +BABEL_OP1_203_67842_20130313_142229_outLine +BABEL_OP1_203_72654_20130323_163248_inLine +BABEL_OP1_203_72654_20130323_163248_outLine +BABEL_OP1_203_72733_20130731_235502_inLine +BABEL_OP1_203_72733_20130731_235502_outLine +BABEL_OP1_203_79190_20130714_135011_inLine +BABEL_OP1_203_79190_20130714_135011_outLine +BABEL_OP1_203_84370_20130506_190748_inLine +BABEL_OP1_203_84370_20130506_190748_outLine +BABEL_OP1_203_88601_20130323_155050_inLine +BABEL_OP1_203_88601_20130323_155050_outLine +BABEL_OP1_203_90417_20130507_172057_inLine +BABEL_OP1_203_90417_20130507_172057_outLine +BABEL_OP1_203_93475_20130312_144135_inLine +BABEL_OP1_203_93475_20130312_144135_outLine +BABEL_OP1_203_95467_20130506_155929_inLine +BABEL_OP1_203_95467_20130506_155929_outLine +BABEL_OP1_203_96504_20130319_161923_inLine +BABEL_OP1_203_96504_20130319_161923_outLine +BABEL_OP1_203_99732_20130406_175258_inLine +BABEL_OP1_203_99732_20130406_175258_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/eval.list b/egs/babel/s5d/conf/lists/203-lao/eval.list new file mode 100644 index 00000000000..f231ad9d910 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/eval.list @@ -0,0 +1,192 @@ +BABEL_OP1_203_12321_20130406_165656_inLine +BABEL_OP1_203_12321_20130406_165656_outLine +BABEL_OP1_203_12916_20130309_200304_inLine +BABEL_OP1_203_12916_20130309_200304_outLine +BABEL_OP1_203_13040_20130312_181212_inLine +BABEL_OP1_203_13040_20130312_181212_outLine +BABEL_OP1_203_13427_20130428_153335_inLine +BABEL_OP1_203_13427_20130428_153335_outLine +BABEL_OP1_203_14537_20130726_183519_inLine +BABEL_OP1_203_14537_20130726_183519_outLine +BABEL_OP1_203_15262_20130311_163214_inLine +BABEL_OP1_203_15262_20130311_163214_outLine +BABEL_OP1_203_15848_20130304_193558_inLine +BABEL_OP1_203_15848_20130304_193558_outLine +BABEL_OP1_203_16056_20130309_212127_inLine +BABEL_OP1_203_16056_20130309_212127_outLine +BABEL_OP1_203_17165_20130323_193349_inLine +BABEL_OP1_203_17165_20130323_193349_outLine +BABEL_OP1_203_17420_20130410_223425_inLine +BABEL_OP1_203_17420_20130410_223425_outLine +BABEL_OP1_203_18863_20130423_201154_inLine +BABEL_OP1_203_18863_20130423_201154_outLine +BABEL_OP1_203_19545_20130328_181847_inLine +BABEL_OP1_203_19545_20130328_181847_outLine +BABEL_OP1_203_19767_20130729_162359_inLine +BABEL_OP1_203_20721_20130805_184106_inLine +BABEL_OP1_203_20721_20130805_184106_outLine +BABEL_OP1_203_20738_20130501_144021_inLine +BABEL_OP1_203_20738_20130501_144021_outLine +BABEL_OP1_203_20800_20130312_182739_inLine +BABEL_OP1_203_20800_20130312_182739_outLine +BABEL_OP1_203_20800_20130312_190729_inLine +BABEL_OP1_203_20800_20130312_190729_outLine +BABEL_OP1_203_21159_20130428_145928_inLine +BABEL_OP1_203_21159_20130428_145928_outLine +BABEL_OP1_203_21393_20130802_160502_inLine +BABEL_OP1_203_21393_20130802_160502_outLine +BABEL_OP1_203_21794_20130323_191728_inLine +BABEL_OP1_203_21794_20130323_191728_outLine +BABEL_OP1_203_22641_20130310_194352_inLine +BABEL_OP1_203_22641_20130310_194352_outLine +BABEL_OP1_203_23395_20130423_140708_inLine +BABEL_OP1_203_23395_20130423_140708_outLine +BABEL_OP1_203_23731_20130331_144735_inLine +BABEL_OP1_203_23731_20130331_144735_outLine +BABEL_OP1_203_24924_20130509_190210_inLine +BABEL_OP1_203_24924_20130509_190210_outLine +BABEL_OP1_203_27189_20130812_203016_inLine +BABEL_OP1_203_27189_20130812_203016_outLine +BABEL_OP1_203_28422_20130401_201546_inLine +BABEL_OP1_203_28422_20130401_201546_outLine +BABEL_OP1_203_28538_20130323_211503_inLine +BABEL_OP1_203_28538_20130323_211503_outLine +BABEL_OP1_203_28538_20130323_212946_inLine +BABEL_OP1_203_28538_20130323_212946_outLine +BABEL_OP1_203_29685_20130319_225955_inLine +BABEL_OP1_203_29685_20130319_225955_outLine +BABEL_OP1_203_30250_20130307_153941_inLine +BABEL_OP1_203_30250_20130307_153941_outLine +BABEL_OP1_203_32832_20130410_151037_inLine +BABEL_OP1_203_32832_20130410_151037_outLine +BABEL_OP1_203_32872_20130715_135603_inLine +BABEL_OP1_203_32872_20130715_135603_outLine +BABEL_OP1_203_33216_20130427_175935_inLine +BABEL_OP1_203_33216_20130427_175935_outLine +BABEL_OP1_203_33216_20130427_182630_inLine +BABEL_OP1_203_33216_20130427_182630_outLine +BABEL_OP1_203_33424_20130728_164533_inLine +BABEL_OP1_203_33424_20130728_164533_outLine +BABEL_OP1_203_40624_20130812_181331_inLine +BABEL_OP1_203_40624_20130812_181331_outLine +BABEL_OP1_203_41038_20130629_153757_inLine +BABEL_OP1_203_41038_20130629_153757_outLine +BABEL_OP1_203_41109_20130410_205358_inLine +BABEL_OP1_203_41109_20130410_205358_outLine +BABEL_OP1_203_41109_20130410_210805_inLine +BABEL_OP1_203_41109_20130410_210805_outLine +BABEL_OP1_203_41233_20130801_201451_inLine +BABEL_OP1_203_41233_20130801_201451_outLine +BABEL_OP1_203_41890_20130731_203018_inLine +BABEL_OP1_203_41890_20130731_203018_outLine +BABEL_OP1_203_42231_20130330_183550_inLine +BABEL_OP1_203_42231_20130330_183550_outLine +BABEL_OP1_203_43789_20130324_223656_inLine +BABEL_OP1_203_43789_20130324_223656_outLine +BABEL_OP1_203_44255_20130410_165447_inLine +BABEL_OP1_203_44255_20130410_165447_outLine +BABEL_OP1_203_44420_20130320_170344_inLine +BABEL_OP1_203_44420_20130320_170344_outLine +BABEL_OP1_203_45140_20130725_155519_inLine +BABEL_OP1_203_45140_20130725_155519_outLine +BABEL_OP1_203_45770_20130309_142629_inLine +BABEL_OP1_203_45770_20130309_142629_outLine +BABEL_OP1_203_45777_20130324_154017_inLine +BABEL_OP1_203_45777_20130324_154017_outLine +BABEL_OP1_203_45908_20130728_202553_inLine +BABEL_OP1_203_45908_20130728_202553_outLine +BABEL_OP1_203_46333_20130309_224915_inLine +BABEL_OP1_203_46333_20130309_224915_outLine +BABEL_OP1_203_46905_20130812_144116_inLine +BABEL_OP1_203_46905_20130812_144116_outLine +BABEL_OP1_203_47959_20130323_214413_inLine +BABEL_OP1_203_47959_20130323_214413_outLine +BABEL_OP1_203_48399_20130309_162921_inLine +BABEL_OP1_203_48399_20130309_162921_outLine +BABEL_OP1_203_48399_20130309_164247_inLine +BABEL_OP1_203_48399_20130309_164247_outLine +BABEL_OP1_203_49870_20130813_180458_inLine +BABEL_OP1_203_49870_20130813_180458_outLine +BABEL_OP1_203_50962_20130326_161422_inLine +BABEL_OP1_203_50962_20130326_161422_outLine +BABEL_OP1_203_53072_20130714_171830_inLine +BABEL_OP1_203_53072_20130714_171830_outLine +BABEL_OP1_203_56019_20130512_160906_inLine +BABEL_OP1_203_56019_20130512_160906_outLine +BABEL_OP1_203_56523_20130319_184906_inLine +BABEL_OP1_203_56523_20130319_184906_outLine +BABEL_OP1_203_57650_20130411_204456_inLine +BABEL_OP1_203_57650_20130411_204456_outLine +BABEL_OP1_203_57922_20130329_164830_inLine +BABEL_OP1_203_57922_20130329_164830_outLine +BABEL_OP1_203_59898_20130309_161351_inLine +BABEL_OP1_203_59898_20130309_161351_outLine +BABEL_OP1_203_62434_20130309_161135_inLine +BABEL_OP1_203_62434_20130309_161135_outLine +BABEL_OP1_203_65339_20130813_152743_inLine +BABEL_OP1_203_65339_20130813_152743_outLine +BABEL_OP1_203_67085_20130803_171200_inLine +BABEL_OP1_203_67085_20130803_171200_outLine +BABEL_OP1_203_67373_20130314_214840_inLine +BABEL_OP1_203_67373_20130314_214840_outLine +BABEL_OP1_203_70726_20130812_194620_inLine +BABEL_OP1_203_70726_20130812_194620_outLine +BABEL_OP1_203_71282_20130425_151939_inLine +BABEL_OP1_203_71282_20130425_151939_outLine +BABEL_OP1_203_71333_20130314_164236_inLine +BABEL_OP1_203_71333_20130314_164236_outLine +BABEL_OP1_203_72073_20130813_163908_inLine +BABEL_OP1_203_72073_20130813_163908_outLine +BABEL_OP1_203_73119_20130318_205141_inLine +BABEL_OP1_203_73119_20130318_205141_outLine +BABEL_OP1_203_73119_20130318_210234_inLine +BABEL_OP1_203_73119_20130318_210234_outLine +BABEL_OP1_203_73757_20130327_154312_inLine +BABEL_OP1_203_73757_20130327_154312_outLine +BABEL_OP1_203_73837_20130320_223755_inLine +BABEL_OP1_203_73837_20130320_223755_outLine +BABEL_OP1_203_74111_20130720_165204_inLine +BABEL_OP1_203_74111_20130720_165204_outLine +BABEL_OP1_203_74641_20130329_192047_inLine +BABEL_OP1_203_74641_20130329_192047_outLine +BABEL_OP1_203_75359_20130719_144824_inLine +BABEL_OP1_203_75359_20130719_144824_outLine +BABEL_OP1_203_77225_20130813_222437_inLine +BABEL_OP1_203_77225_20130813_222437_outLine +BABEL_OP1_203_82904_20130726_192222_inLine +BABEL_OP1_203_82904_20130726_192222_outLine +BABEL_OP1_203_83771_20130729_194808_inLine +BABEL_OP1_203_88394_20130813_004013_inLine +BABEL_OP1_203_88394_20130813_004013_outLine +BABEL_OP1_203_88550_20130714_194639_inLine +BABEL_OP1_203_88550_20130714_194639_outLine +BABEL_OP1_203_88686_20130307_221522_inLine +BABEL_OP1_203_88686_20130307_221522_outLine +BABEL_OP1_203_89372_20130306_162204_inLine +BABEL_OP1_203_89372_20130306_162204_outLine +BABEL_OP1_203_89794_20130714_144126_inLine +BABEL_OP1_203_89794_20130714_144126_outLine +BABEL_OP1_203_91930_20130424_162834_inLine +BABEL_OP1_203_91930_20130424_162834_outLine +BABEL_OP1_203_93861_20130327_171912_inLine +BABEL_OP1_203_93861_20130327_171912_outLine +BABEL_OP1_203_94002_20130324_154206_inLine +BABEL_OP1_203_94002_20130324_154206_outLine +BABEL_OP1_203_94237_20130801_180053_inLine +BABEL_OP1_203_94237_20130801_180053_outLine +BABEL_OP1_203_96088_20130714_191026_inLine +BABEL_OP1_203_96088_20130714_191026_outLine +BABEL_OP1_203_96525_20130713_172412_inLine +BABEL_OP1_203_96525_20130713_172412_outLine +BABEL_OP1_203_97097_20130721_180647_inLine +BABEL_OP1_203_97097_20130721_180647_outLine +BABEL_OP1_203_97570_20130501_151019_inLine +BABEL_OP1_203_97570_20130501_151019_outLine +BABEL_OP1_203_97911_20130427_144233_inLine +BABEL_OP1_203_97911_20130427_144233_outLine +BABEL_OP1_203_98489_20130314_215814_inLine +BABEL_OP1_203_98489_20130314_215814_outLine +BABEL_OP1_203_98580_20130324_195754_inLine +BABEL_OP1_203_98580_20130324_195754_outLine +BABEL_OP1_203_99264_20130726_161527_inLine +BABEL_OP1_203_99264_20130726_161527_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/evalpart1.list b/egs/babel/s5d/conf/lists/203-lao/evalpart1.list new file mode 100644 index 00000000000..a4ebcdd2d76 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/evalpart1.list @@ -0,0 +1,70 @@ +BABEL_OP1_203_18863_20130423_201154_inLine +BABEL_OP1_203_18863_20130423_201154_outLine +BABEL_OP1_203_19545_20130328_181847_inLine +BABEL_OP1_203_19545_20130328_181847_outLine +BABEL_OP1_203_20738_20130501_144021_inLine +BABEL_OP1_203_20738_20130501_144021_outLine +BABEL_OP1_203_21794_20130323_191728_inLine +BABEL_OP1_203_21794_20130323_191728_outLine +BABEL_OP1_203_23395_20130423_140708_inLine +BABEL_OP1_203_23395_20130423_140708_outLine +BABEL_OP1_203_28538_20130323_211503_inLine +BABEL_OP1_203_28538_20130323_211503_outLine +BABEL_OP1_203_28538_20130323_212946_inLine +BABEL_OP1_203_28538_20130323_212946_outLine +BABEL_OP1_203_30250_20130307_153941_inLine +BABEL_OP1_203_30250_20130307_153941_outLine +BABEL_OP1_203_32872_20130715_135603_inLine +BABEL_OP1_203_32872_20130715_135603_outLine +BABEL_OP1_203_41109_20130410_205358_inLine +BABEL_OP1_203_41109_20130410_205358_outLine +BABEL_OP1_203_41109_20130410_210805_inLine +BABEL_OP1_203_41109_20130410_210805_outLine +BABEL_OP1_203_44255_20130410_165447_inLine +BABEL_OP1_203_44255_20130410_165447_outLine +BABEL_OP1_203_45140_20130725_155519_inLine +BABEL_OP1_203_45140_20130725_155519_outLine +BABEL_OP1_203_45777_20130324_154017_inLine +BABEL_OP1_203_45777_20130324_154017_outLine +BABEL_OP1_203_47959_20130323_214413_inLine +BABEL_OP1_203_47959_20130323_214413_outLine +BABEL_OP1_203_48399_20130309_162921_inLine +BABEL_OP1_203_48399_20130309_162921_outLine +BABEL_OP1_203_48399_20130309_164247_inLine +BABEL_OP1_203_48399_20130309_164247_outLine +BABEL_OP1_203_56019_20130512_160906_inLine +BABEL_OP1_203_56019_20130512_160906_outLine +BABEL_OP1_203_56523_20130319_184906_inLine +BABEL_OP1_203_56523_20130319_184906_outLine +BABEL_OP1_203_57650_20130411_204456_inLine +BABEL_OP1_203_57650_20130411_204456_outLine +BABEL_OP1_203_57922_20130329_164830_inLine +BABEL_OP1_203_57922_20130329_164830_outLine +BABEL_OP1_203_59898_20130309_161351_inLine +BABEL_OP1_203_59898_20130309_161351_outLine +BABEL_OP1_203_67085_20130803_171200_inLine +BABEL_OP1_203_67085_20130803_171200_outLine +BABEL_OP1_203_71282_20130425_151939_inLine +BABEL_OP1_203_71282_20130425_151939_outLine +BABEL_OP1_203_73119_20130318_205141_inLine +BABEL_OP1_203_73119_20130318_205141_outLine +BABEL_OP1_203_73119_20130318_210234_inLine +BABEL_OP1_203_73119_20130318_210234_outLine +BABEL_OP1_203_73837_20130320_223755_inLine +BABEL_OP1_203_73837_20130320_223755_outLine +BABEL_OP1_203_74111_20130720_165204_inLine +BABEL_OP1_203_74111_20130720_165204_outLine +BABEL_OP1_203_75359_20130719_144824_inLine +BABEL_OP1_203_75359_20130719_144824_outLine +BABEL_OP1_203_89372_20130306_162204_inLine +BABEL_OP1_203_89372_20130306_162204_outLine +BABEL_OP1_203_93861_20130327_171912_inLine +BABEL_OP1_203_93861_20130327_171912_outLine +BABEL_OP1_203_94002_20130324_154206_inLine +BABEL_OP1_203_94002_20130324_154206_outLine +BABEL_OP1_203_97097_20130721_180647_inLine +BABEL_OP1_203_97097_20130721_180647_outLine +BABEL_OP1_203_97570_20130501_151019_inLine +BABEL_OP1_203_97570_20130501_151019_outLine +BABEL_OP1_203_98580_20130324_195754_inLine +BABEL_OP1_203_98580_20130324_195754_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.FullLP.list b/egs/babel/s5d/conf/lists/203-lao/train.FullLP.list new file mode 100644 index 00000000000..b7fb97d771f --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.FullLP.list @@ -0,0 +1,781 @@ +BABEL_OP1_203_10036_20130318_191401_inLine +BABEL_OP1_203_10036_20130318_191401_outLine +BABEL_OP1_203_10411_20130511_174439_inLine +BABEL_OP1_203_10411_20130511_174439_outLine +BABEL_OP1_203_10482_20130403_160013_inLine +BABEL_OP1_203_10482_20130403_160013_outLine +BABEL_OP1_203_10524_20130425_183925_inLine +BABEL_OP1_203_10524_20130425_183925_outLine +BABEL_OP1_203_10524_20130425_185048_inLine +BABEL_OP1_203_10524_20130425_185048_outLine +BABEL_OP1_203_10901_20130321_180232_inLine +BABEL_OP1_203_10901_20130321_180232_outLine +BABEL_OP1_203_10938_20130319_190809_inLine +BABEL_OP1_203_10938_20130319_190809_outLine +BABEL_OP1_203_10966_20130319_135742_inLine +BABEL_OP1_203_10966_20130319_135742_outLine +BABEL_OP1_203_10974_20130425_162609_inLine +BABEL_OP1_203_10974_20130425_162609_outLine +BABEL_OP1_203_11352_20130426_170450_inLine +BABEL_OP1_203_11352_20130426_170450_outLine +BABEL_OP1_203_11486_20130428_131348_inLine +BABEL_OP1_203_11486_20130428_131348_outLine +BABEL_OP1_203_11663_20130402_202025_inLine +BABEL_OP1_203_11663_20130402_202025_outLine +BABEL_OP1_203_11673_20130306_201125_inLine +BABEL_OP1_203_11673_20130306_201125_outLine +BABEL_OP1_203_11797_20130309_195420_inLine +BABEL_OP1_203_11797_20130309_195420_outLine +BABEL_OP1_203_11859_20130511_201411_inLine +BABEL_OP1_203_11859_20130511_201411_outLine +BABEL_OP1_203_12036_20130312_182225_inLine +BABEL_OP1_203_12036_20130312_182225_outLine +BABEL_OP1_203_12220_20130321_160841_inLine +BABEL_OP1_203_12220_20130321_160841_outLine +BABEL_OP1_203_12606_20130726_174724_inLine +BABEL_OP1_203_12606_20130726_174724_outLine +BABEL_OP1_203_12609_20130727_133133_outLine +BABEL_OP1_203_12767_20130313_214914_inLine +BABEL_OP1_203_12767_20130313_214914_outLine +BABEL_OP1_203_12851_20130304_181335_inLine +BABEL_OP1_203_12851_20130304_181335_outLine +BABEL_OP1_203_12851_20130304_182835_inLine +BABEL_OP1_203_12851_20130304_182835_outLine +BABEL_OP1_203_12851_20130304_185138_inLine +BABEL_OP1_203_12851_20130304_185138_outLine +BABEL_OP1_203_13126_20130421_175306_inLine +BABEL_OP1_203_13126_20130421_175306_outLine +BABEL_OP1_203_13126_20130421_180154_inLine +BABEL_OP1_203_13126_20130421_180154_outLine +BABEL_OP1_203_13324_20130313_185155_inLine +BABEL_OP1_203_13324_20130313_185155_outLine +BABEL_OP1_203_13483_20130409_231107_inLine +BABEL_OP1_203_13483_20130409_231107_outLine +BABEL_OP1_203_13490_20130322_143131_inLine +BABEL_OP1_203_13490_20130322_143131_outLine +BABEL_OP1_203_13664_20130304_155051_inLine +BABEL_OP1_203_13664_20130304_155051_outLine +BABEL_OP1_203_13709_20130410_222037_inLine +BABEL_OP1_203_13709_20130410_222037_outLine +BABEL_OP1_203_13744_20130307_215445_inLine +BABEL_OP1_203_13744_20130307_215445_outLine +BABEL_OP1_203_13792_20130310_142445_inLine +BABEL_OP1_203_13792_20130310_142445_outLine +BABEL_OP1_203_14137_20130314_181335_inLine +BABEL_OP1_203_14137_20130314_181335_outLine +BABEL_OP1_203_14141_20130410_212719_inLine +BABEL_OP1_203_14141_20130410_212719_outLine +BABEL_OP1_203_14179_20130402_211621_inLine +BABEL_OP1_203_14179_20130402_211621_outLine +BABEL_OP1_203_14229_20130324_162827_inLine +BABEL_OP1_203_14229_20130324_162827_outLine +BABEL_OP1_203_14237_20130313_222650_inLine +BABEL_OP1_203_14237_20130313_222650_outLine +BABEL_OP1_203_14560_20130425_140155_inLine +BABEL_OP1_203_14560_20130425_140155_outLine +BABEL_OP1_203_14719_20130406_191558_inLine +BABEL_OP1_203_14719_20130406_191558_outLine +BABEL_OP1_203_14725_20130309_185639_inLine +BABEL_OP1_203_14725_20130309_185639_outLine +BABEL_OP1_203_14729_20130411_214726_inLine +BABEL_OP1_203_14729_20130411_214726_outLine +BABEL_OP1_203_14814_20130314_133131_inLine +BABEL_OP1_203_14814_20130314_133131_outLine +BABEL_OP1_203_14899_20130311_184638_inLine +BABEL_OP1_203_14899_20130311_184638_outLine +BABEL_OP1_203_14929_20130324_184056_inLine +BABEL_OP1_203_14929_20130324_184056_outLine +BABEL_OP1_203_15024_20130322_152846_inLine +BABEL_OP1_203_15024_20130322_152846_outLine +BABEL_OP1_203_15163_20130319_154026_inLine +BABEL_OP1_203_15163_20130319_154026_outLine +BABEL_OP1_203_15227_20130513_222256_inLine +BABEL_OP1_203_15227_20130513_222256_outLine +BABEL_OP1_203_15322_20130511_152438_inLine +BABEL_OP1_203_15322_20130511_152438_outLine +BABEL_OP1_203_15324_20130512_224242_inLine +BABEL_OP1_203_15324_20130512_224242_outLine +BABEL_OP1_203_15324_20130512_225202_inLine +BABEL_OP1_203_15324_20130512_225202_outLine +BABEL_OP1_203_15535_20130329_143236_inLine +BABEL_OP1_203_15535_20130329_143236_outLine +BABEL_OP1_203_15638_20130409_150143_inLine +BABEL_OP1_203_15638_20130409_150143_outLine +BABEL_OP1_203_15730_20130307_201711_inLine +BABEL_OP1_203_15730_20130307_201711_outLine +BABEL_OP1_203_15749_20130407_175145_inLine +BABEL_OP1_203_15749_20130407_175145_outLine +BABEL_OP1_203_15902_20130309_193940_inLine +BABEL_OP1_203_15902_20130309_193940_outLine +BABEL_OP1_203_16149_20130309_171014_inLine +BABEL_OP1_203_16149_20130309_171014_outLine +BABEL_OP1_203_16924_20130720_175321_inLine +BABEL_OP1_203_16924_20130720_175321_outLine +BABEL_OP1_203_17032_20130402_175428_inLine +BABEL_OP1_203_17032_20130402_175428_outLine +BABEL_OP1_203_17097_20130430_173440_inLine +BABEL_OP1_203_17097_20130430_173440_outLine +BABEL_OP1_203_17115_20130425_173844_inLine +BABEL_OP1_203_17115_20130425_173844_outLine +BABEL_OP1_203_17115_20130425_175816_inLine +BABEL_OP1_203_17115_20130425_175816_outLine +BABEL_OP1_203_17472_20130408_215034_inLine +BABEL_OP1_203_17472_20130408_215034_outLine +BABEL_OP1_203_17567_20130425_145936_inLine +BABEL_OP1_203_17567_20130425_145936_outLine +BABEL_OP1_203_17751_20130512_155328_inLine +BABEL_OP1_203_17751_20130512_155328_outLine +BABEL_OP1_203_17914_20130503_215602_inLine +BABEL_OP1_203_17914_20130503_215602_outLine +BABEL_OP1_203_17923_20130314_203130_inLine +BABEL_OP1_203_17923_20130314_203130_outLine +BABEL_OP1_203_18118_20130730_191442_inLine +BABEL_OP1_203_18118_20130730_191442_outLine +BABEL_OP1_203_18380_20130327_214619_inLine +BABEL_OP1_203_18380_20130327_214619_outLine +BABEL_OP1_203_18566_20130503_153904_inLine +BABEL_OP1_203_18566_20130503_153904_outLine +BABEL_OP1_203_18939_20130311_144740_inLine +BABEL_OP1_203_18939_20130311_144740_outLine +BABEL_OP1_203_19101_20130423_142324_inLine +BABEL_OP1_203_19101_20130423_142324_outLine +BABEL_OP1_203_19134_20130328_220635_inLine +BABEL_OP1_203_19134_20130328_220635_outLine +BABEL_OP1_203_19589_20130727_143145_inLine +BABEL_OP1_203_19589_20130727_143145_outLine +BABEL_OP1_203_19703_20130318_160958_inLine +BABEL_OP1_203_19703_20130318_160958_outLine +BABEL_OP1_203_19703_20130318_162314_inLine +BABEL_OP1_203_19703_20130318_162314_outLine +BABEL_OP1_203_19773_20130407_183531_inLine +BABEL_OP1_203_19773_20130407_183531_outLine +BABEL_OP1_203_19782_20130404_170141_inLine +BABEL_OP1_203_19782_20130404_170141_outLine +BABEL_OP1_203_20133_20130304_160351_inLine +BABEL_OP1_203_20133_20130304_160351_outLine +BABEL_OP1_203_20330_20130410_161539_inLine +BABEL_OP1_203_20330_20130410_161539_outLine +BABEL_OP1_203_20682_20130406_194906_inLine +BABEL_OP1_203_20682_20130406_194906_outLine +BABEL_OP1_203_20768_20130407_190152_inLine +BABEL_OP1_203_20768_20130407_190152_outLine +BABEL_OP1_203_20985_20130330_210730_inLine +BABEL_OP1_203_20985_20130330_210730_outLine +BABEL_OP1_203_21004_20130410_181101_inLine +BABEL_OP1_203_21004_20130410_181101_outLine +BABEL_OP1_203_21004_20130410_182740_inLine +BABEL_OP1_203_21004_20130410_182740_outLine +BABEL_OP1_203_21109_20130406_161601_inLine +BABEL_OP1_203_21109_20130406_161601_outLine +BABEL_OP1_203_21206_20130312_164516_inLine +BABEL_OP1_203_21206_20130312_164516_outLine +BABEL_OP1_203_21315_20130501_151005_inLine +BABEL_OP1_203_21315_20130501_151005_outLine +BABEL_OP1_203_21327_20130405_203336_inLine +BABEL_OP1_203_21327_20130405_203336_outLine +BABEL_OP1_203_21435_20130423_181043_inLine +BABEL_OP1_203_21435_20130423_181043_outLine +BABEL_OP1_203_22280_20130329_161951_inLine +BABEL_OP1_203_22280_20130329_161951_outLine +BABEL_OP1_203_22321_20130309_191222_inLine +BABEL_OP1_203_22321_20130309_191222_outLine +BABEL_OP1_203_22446_20130309_134600_inLine +BABEL_OP1_203_22446_20130309_134600_outLine +BABEL_OP1_203_22494_20130402_171234_inLine +BABEL_OP1_203_22494_20130402_171234_outLine +BABEL_OP1_203_22612_20130406_220338_inLine +BABEL_OP1_203_22612_20130406_220338_outLine +BABEL_OP1_203_22624_20130403_190935_inLine +BABEL_OP1_203_22624_20130403_190935_outLine +BABEL_OP1_203_22918_20130410_190723_inLine +BABEL_OP1_203_22918_20130410_190723_outLine +BABEL_OP1_203_23006_20130319_211412_inLine +BABEL_OP1_203_23006_20130319_211412_outLine +BABEL_OP1_203_23046_20130322_165811_inLine +BABEL_OP1_203_23046_20130322_165811_outLine +BABEL_OP1_203_23092_20130406_014425_inLine +BABEL_OP1_203_23092_20130406_014425_outLine +BABEL_OP1_203_23092_20130406_015338_inLine +BABEL_OP1_203_23092_20130406_015338_outLine +BABEL_OP1_203_23153_20130320_194433_inLine +BABEL_OP1_203_23153_20130320_194433_outLine +BABEL_OP1_203_23239_20130331_171214_inLine +BABEL_OP1_203_23239_20130331_171214_outLine +BABEL_OP1_203_23505_20130309_204825_inLine +BABEL_OP1_203_23505_20130309_204825_outLine +BABEL_OP1_203_23980_20130321_193946_inLine +BABEL_OP1_203_23980_20130321_193946_outLine +BABEL_OP1_203_24017_20130424_174037_inLine +BABEL_OP1_203_24017_20130424_174037_outLine +BABEL_OP1_203_24253_20130423_175626_inLine +BABEL_OP1_203_24253_20130423_175626_outLine +BABEL_OP1_203_24270_20130329_153331_inLine +BABEL_OP1_203_24270_20130329_153331_outLine +BABEL_OP1_203_24290_20130423_133315_inLine +BABEL_OP1_203_24290_20130423_133315_outLine +BABEL_OP1_203_24323_20130320_160949_inLine +BABEL_OP1_203_24323_20130320_160949_outLine +BABEL_OP1_203_24470_20130329_205656_inLine +BABEL_OP1_203_24470_20130329_205656_outLine +BABEL_OP1_203_24501_20130421_141711_inLine +BABEL_OP1_203_24501_20130421_141711_outLine +BABEL_OP1_203_24569_20130405_200644_inLine +BABEL_OP1_203_24569_20130405_200644_outLine +BABEL_OP1_203_24586_20130506_203931_inLine +BABEL_OP1_203_24586_20130506_203931_outLine +BABEL_OP1_203_24589_20130323_190409_inLine +BABEL_OP1_203_24589_20130323_190409_outLine +BABEL_OP1_203_24589_20130323_192722_inLine +BABEL_OP1_203_24589_20130323_192722_outLine +BABEL_OP1_203_24590_20130321_221146_inLine +BABEL_OP1_203_24590_20130321_221146_outLine +BABEL_OP1_203_24679_20130307_145644_inLine +BABEL_OP1_203_24679_20130307_145644_outLine +BABEL_OP1_203_24779_20130426_183526_inLine +BABEL_OP1_203_24779_20130426_183526_outLine +BABEL_OP1_203_24982_20130327_153429_inLine +BABEL_OP1_203_24982_20130327_153429_outLine +BABEL_OP1_203_25015_20130728_150746_inLine +BABEL_OP1_203_25015_20130728_150746_outLine +BABEL_OP1_203_25085_20130508_145922_inLine +BABEL_OP1_203_25085_20130508_145922_outLine +BABEL_OP1_203_25220_20130502_183943_inLine +BABEL_OP1_203_25220_20130502_183943_outLine +BABEL_OP1_203_25412_20130329_201051_inLine +BABEL_OP1_203_25412_20130329_201051_outLine +BABEL_OP1_203_25698_20130509_182226_inLine +BABEL_OP1_203_25698_20130509_182226_outLine +BABEL_OP1_203_25719_20130426_202355_inLine +BABEL_OP1_203_25719_20130426_202355_outLine +BABEL_OP1_203_25767_20130311_183243_inLine +BABEL_OP1_203_25767_20130311_183243_outLine +BABEL_OP1_203_25961_20130311_171235_inLine +BABEL_OP1_203_25961_20130311_171235_outLine +BABEL_OP1_203_26388_20130318_200305_inLine +BABEL_OP1_203_26388_20130318_200305_outLine +BABEL_OP1_203_26507_20130430_234212_inLine +BABEL_OP1_203_26507_20130430_234212_outLine +BABEL_OP1_203_26574_20130411_160556_inLine +BABEL_OP1_203_26574_20130411_160556_outLine +BABEL_OP1_203_26602_20130801_171131_inLine +BABEL_OP1_203_26602_20130801_171131_outLine +BABEL_OP1_203_26836_20130315_160512_inLine +BABEL_OP1_203_26836_20130315_160512_outLine +BABEL_OP1_203_27125_20130308_003724_inLine +BABEL_OP1_203_27125_20130308_003724_outLine +BABEL_OP1_203_27218_20130312_194932_inLine +BABEL_OP1_203_27218_20130312_194932_outLine +BABEL_OP1_203_27478_20130501_195141_inLine +BABEL_OP1_203_27478_20130501_195141_outLine +BABEL_OP1_203_27478_20130501_200641_inLine +BABEL_OP1_203_27478_20130501_200641_outLine +BABEL_OP1_203_27590_20130405_200930_inLine +BABEL_OP1_203_27590_20130405_200930_outLine +BABEL_OP1_203_27841_20130403_211143_inLine +BABEL_OP1_203_27841_20130403_211143_outLine +BABEL_OP1_203_28190_20130730_195836_inLine +BABEL_OP1_203_28190_20130730_195836_outLine +BABEL_OP1_203_28280_20130501_220643_inLine +BABEL_OP1_203_28280_20130501_220643_outLine +BABEL_OP1_203_28419_20130319_165427_inLine +BABEL_OP1_203_28419_20130319_165427_outLine +BABEL_OP1_203_28522_20130328_170837_inLine +BABEL_OP1_203_28522_20130328_170837_outLine +BABEL_OP1_203_28775_20130313_213707_inLine +BABEL_OP1_203_28775_20130313_213707_outLine +BABEL_OP1_203_28775_20130313_215352_inLine +BABEL_OP1_203_28775_20130313_215352_outLine +BABEL_OP1_203_28945_20130315_171902_inLine +BABEL_OP1_203_28945_20130315_171902_outLine +BABEL_OP1_203_29023_20130313_194148_inLine +BABEL_OP1_203_29023_20130313_194148_outLine +BABEL_OP1_203_29023_20130313_195106_inLine +BABEL_OP1_203_29023_20130313_195106_outLine +BABEL_OP1_203_29039_20130402_153541_inLine +BABEL_OP1_203_29039_20130402_153541_outLine +BABEL_OP1_203_29168_20130306_213504_inLine +BABEL_OP1_203_29168_20130306_213504_outLine +BABEL_OP1_203_29323_20130403_215525_inLine +BABEL_OP1_203_29323_20130403_215525_outLine +BABEL_OP1_203_29416_20130421_133101_inLine +BABEL_OP1_203_29416_20130421_133101_outLine +BABEL_OP1_203_29439_20130422_150608_inLine +BABEL_OP1_203_29439_20130422_150608_outLine +BABEL_OP1_203_30013_20130331_170538_inLine +BABEL_OP1_203_30013_20130331_170538_outLine +BABEL_OP1_203_30395_20130318_180120_inLine +BABEL_OP1_203_30395_20130318_180120_outLine +BABEL_OP1_203_30645_20130309_151850_inLine +BABEL_OP1_203_30645_20130309_151850_outLine +BABEL_OP1_203_31184_20130322_141512_inLine +BABEL_OP1_203_31184_20130322_141512_outLine +BABEL_OP1_203_31184_20130322_142743_inLine +BABEL_OP1_203_31184_20130322_142743_outLine +BABEL_OP1_203_31490_20130321_210518_inLine +BABEL_OP1_203_31490_20130321_210518_outLine +BABEL_OP1_203_31992_20130313_143826_inLine +BABEL_OP1_203_31992_20130313_143826_outLine +BABEL_OP1_203_32097_20130304_195431_inLine +BABEL_OP1_203_32097_20130304_195431_outLine +BABEL_OP1_203_32122_20130320_174321_inLine +BABEL_OP1_203_32122_20130320_174321_outLine +BABEL_OP1_203_32122_20130320_175419_inLine +BABEL_OP1_203_32122_20130320_175419_outLine +BABEL_OP1_203_32244_20130728_182847_inLine +BABEL_OP1_203_32244_20130728_182847_outLine +BABEL_OP1_203_32914_20130411_174738_inLine +BABEL_OP1_203_32914_20130411_174738_outLine +BABEL_OP1_203_32998_20130329_155417_inLine +BABEL_OP1_203_32998_20130329_155417_outLine +BABEL_OP1_203_33175_20130307_204134_inLine +BABEL_OP1_203_33175_20130307_204134_outLine +BABEL_OP1_203_33476_20130320_140412_inLine +BABEL_OP1_203_33476_20130320_140412_outLine +BABEL_OP1_203_33672_20130312_165130_inLine +BABEL_OP1_203_33672_20130312_165130_outLine +BABEL_OP1_203_33704_20130405_220001_inLine +BABEL_OP1_203_33704_20130405_220001_outLine +BABEL_OP1_203_33840_20130803_192343_inLine +BABEL_OP1_203_33840_20130803_192343_outLine +BABEL_OP1_203_34145_20130331_145240_inLine +BABEL_OP1_203_34145_20130331_145240_outLine +BABEL_OP1_203_35139_20130313_143646_inLine +BABEL_OP1_203_35139_20130313_143646_outLine +BABEL_OP1_203_36505_20130731_191406_inLine +BABEL_OP1_203_36505_20130731_191406_outLine +BABEL_OP1_203_36594_20130421_182303_inLine +BABEL_OP1_203_36594_20130421_182303_outLine +BABEL_OP1_203_37598_20130330_000102_inLine +BABEL_OP1_203_37598_20130330_000102_outLine +BABEL_OP1_203_38979_20130409_173446_inLine +BABEL_OP1_203_38979_20130409_173446_outLine +BABEL_OP1_203_38979_20130409_174405_inLine +BABEL_OP1_203_38979_20130409_174405_outLine +BABEL_OP1_203_39006_20130506_192659_inLine +BABEL_OP1_203_39006_20130506_192659_outLine +BABEL_OP1_203_39555_20130720_183746_inLine +BABEL_OP1_203_39555_20130720_183746_outLine +BABEL_OP1_203_39848_20130320_133756_inLine +BABEL_OP1_203_39848_20130320_133756_outLine +BABEL_OP1_203_40557_20130404_005522_inLine +BABEL_OP1_203_40557_20130404_005522_outLine +BABEL_OP1_203_40565_20130331_171210_inLine +BABEL_OP1_203_40565_20130331_171210_outLine +BABEL_OP1_203_40713_20130321_155930_inLine +BABEL_OP1_203_40713_20130321_155930_outLine +BABEL_OP1_203_41073_20130721_172038_inLine +BABEL_OP1_203_41073_20130721_172038_outLine +BABEL_OP1_203_41097_20130427_224950_inLine +BABEL_OP1_203_41097_20130427_224950_outLine +BABEL_OP1_203_41100_20130313_161755_inLine +BABEL_OP1_203_41100_20130313_161755_outLine +BABEL_OP1_203_41174_20130318_203041_inLine +BABEL_OP1_203_41174_20130318_203041_outLine +BABEL_OP1_203_41334_20130501_232034_inLine +BABEL_OP1_203_41334_20130501_232034_outLine +BABEL_OP1_203_41442_20130404_174409_inLine +BABEL_OP1_203_41442_20130404_174409_outLine +BABEL_OP1_203_41469_20130313_185923_inLine +BABEL_OP1_203_41469_20130313_185923_outLine +BABEL_OP1_203_41609_20130309_175203_inLine +BABEL_OP1_203_41609_20130309_175203_outLine +BABEL_OP1_203_41680_20130304_134640_inLine +BABEL_OP1_203_41680_20130304_134640_outLine +BABEL_OP1_203_42029_20130403_184623_inLine +BABEL_OP1_203_42029_20130403_184623_outLine +BABEL_OP1_203_42126_20130805_213859_inLine +BABEL_OP1_203_42126_20130805_213859_outLine +BABEL_OP1_203_42243_20130313_170336_inLine +BABEL_OP1_203_42243_20130313_170336_outLine +BABEL_OP1_203_42299_20130508_203220_inLine +BABEL_OP1_203_42299_20130508_203220_outLine +BABEL_OP1_203_42299_20130508_204824_inLine +BABEL_OP1_203_42299_20130508_204824_outLine +BABEL_OP1_203_42309_20130428_191239_inLine +BABEL_OP1_203_42309_20130428_191239_outLine +BABEL_OP1_203_42434_20130323_160637_inLine +BABEL_OP1_203_42434_20130323_160637_outLine +BABEL_OP1_203_42834_20130404_194840_inLine +BABEL_OP1_203_42834_20130404_194840_outLine +BABEL_OP1_203_42848_20130513_201112_inLine +BABEL_OP1_203_42848_20130513_201112_outLine +BABEL_OP1_203_42883_20130729_171646_inLine +BABEL_OP1_203_42883_20130729_171646_outLine +BABEL_OP1_203_43368_20130327_215424_inLine +BABEL_OP1_203_43368_20130327_215424_outLine +BABEL_OP1_203_43388_20130327_192024_inLine +BABEL_OP1_203_43388_20130327_192024_outLine +BABEL_OP1_203_43588_20130714_163553_inLine +BABEL_OP1_203_43588_20130714_163553_outLine +BABEL_OP1_203_43784_20130314_171933_inLine +BABEL_OP1_203_43784_20130314_171933_outLine +BABEL_OP1_203_43788_20130504_173234_inLine +BABEL_OP1_203_43788_20130504_173234_outLine +BABEL_OP1_203_43920_20130405_194800_inLine +BABEL_OP1_203_43920_20130405_194800_outLine +BABEL_OP1_203_44477_20130331_190402_inLine +BABEL_OP1_203_44477_20130331_190402_outLine +BABEL_OP1_203_44478_20130730_170938_inLine +BABEL_OP1_203_44478_20130730_170938_outLine +BABEL_OP1_203_44619_20130313_175437_inLine +BABEL_OP1_203_44619_20130313_175437_outLine +BABEL_OP1_203_44709_20130331_183159_inLine +BABEL_OP1_203_44709_20130331_183159_outLine +BABEL_OP1_203_44961_20130311_173427_inLine +BABEL_OP1_203_44961_20130311_173427_outLine +BABEL_OP1_203_45560_20130309_173444_inLine +BABEL_OP1_203_45560_20130309_173444_outLine +BABEL_OP1_203_45642_20130313_202110_inLine +BABEL_OP1_203_45642_20130313_202110_outLine +BABEL_OP1_203_45851_20130801_014413_inLine +BABEL_OP1_203_45851_20130801_014413_outLine +BABEL_OP1_203_46310_20130309_211431_inLine +BABEL_OP1_203_46310_20130309_211431_outLine +BABEL_OP1_203_46550_20130313_153012_inLine +BABEL_OP1_203_46550_20130313_153012_outLine +BABEL_OP1_203_46625_20130304_201959_inLine +BABEL_OP1_203_46625_20130304_201959_outLine +BABEL_OP1_203_46681_20130313_203139_inLine +BABEL_OP1_203_46681_20130313_203139_outLine +BABEL_OP1_203_46688_20130314_212550_inLine +BABEL_OP1_203_46688_20130314_212550_outLine +BABEL_OP1_203_46763_20130426_160841_inLine +BABEL_OP1_203_46763_20130426_160841_outLine +BABEL_OP1_203_47186_20130405_120609_inLine +BABEL_OP1_203_47186_20130405_120609_outLine +BABEL_OP1_203_47270_20130410_160110_inLine +BABEL_OP1_203_47270_20130410_160110_outLine +BABEL_OP1_203_47487_20130321_145055_inLine +BABEL_OP1_203_47487_20130321_145055_outLine +BABEL_OP1_203_47823_20130406_151016_inLine +BABEL_OP1_203_47823_20130406_151016_outLine +BABEL_OP1_203_47866_20130723_152640_inLine +BABEL_OP1_203_47866_20130723_152640_outLine +BABEL_OP1_203_48422_20130425_175947_inLine +BABEL_OP1_203_48422_20130425_175947_outLine +BABEL_OP1_203_48610_20130309_222037_inLine +BABEL_OP1_203_48610_20130309_222037_outLine +BABEL_OP1_203_49001_20130315_160533_inLine +BABEL_OP1_203_49001_20130315_160533_outLine +BABEL_OP1_203_49216_20130307_211955_inLine +BABEL_OP1_203_49216_20130307_211955_outLine +BABEL_OP1_203_49287_20130331_155341_inLine +BABEL_OP1_203_49287_20130331_155341_outLine +BABEL_OP1_203_49437_20130405_194333_inLine +BABEL_OP1_203_49437_20130405_194333_outLine +BABEL_OP1_203_49437_20130405_195645_inLine +BABEL_OP1_203_49437_20130405_195645_outLine +BABEL_OP1_203_49630_20130408_182919_inLine +BABEL_OP1_203_49630_20130408_182919_outLine +BABEL_OP1_203_49637_20130313_134853_inLine +BABEL_OP1_203_49637_20130313_134853_outLine +BABEL_OP1_203_49768_20130320_164815_inLine +BABEL_OP1_203_49768_20130320_164815_outLine +BABEL_OP1_203_49902_20130323_175920_inLine +BABEL_OP1_203_49902_20130323_175920_outLine +BABEL_OP1_203_50090_20130726_145642_inLine +BABEL_OP1_203_50090_20130726_145642_outLine +BABEL_OP1_203_50175_20130311_181803_inLine +BABEL_OP1_203_50175_20130311_181803_outLine +BABEL_OP1_203_50726_20130307_135236_inLine +BABEL_OP1_203_50726_20130307_135236_outLine +BABEL_OP1_203_51414_20130729_152916_inLine +BABEL_OP1_203_51414_20130729_152916_outLine +BABEL_OP1_203_51530_20130803_174620_inLine +BABEL_OP1_203_51530_20130803_174620_outLine +BABEL_OP1_203_51611_20130312_195333_inLine +BABEL_OP1_203_51611_20130312_195333_outLine +BABEL_OP1_203_51701_20130508_232537_inLine +BABEL_OP1_203_51701_20130508_232537_outLine +BABEL_OP1_203_51819_20130328_150620_inLine +BABEL_OP1_203_51819_20130328_150620_outLine +BABEL_OP1_203_51955_20130314_175859_inLine +BABEL_OP1_203_51955_20130314_175859_outLine +BABEL_OP1_203_51955_20130314_180731_inLine +BABEL_OP1_203_51955_20130314_180731_outLine +BABEL_OP1_203_52246_20130319_221049_inLine +BABEL_OP1_203_52246_20130319_221049_outLine +BABEL_OP1_203_52272_20130313_140038_inLine +BABEL_OP1_203_52272_20130313_140038_outLine +BABEL_OP1_203_52404_20130409_005414_inLine +BABEL_OP1_203_52404_20130409_005414_outLine +BABEL_OP1_203_52422_20130427_140502_inLine +BABEL_OP1_203_52422_20130427_140502_outLine +BABEL_OP1_203_52447_20130513_224209_inLine +BABEL_OP1_203_52447_20130513_224209_outLine +BABEL_OP1_203_52490_20130309_141915_inLine +BABEL_OP1_203_52490_20130309_141915_outLine +BABEL_OP1_203_52717_20130311_173849_inLine +BABEL_OP1_203_52717_20130311_173849_outLine +BABEL_OP1_203_52854_20130221_192229_inLine +BABEL_OP1_203_52854_20130221_192229_outLine +BABEL_OP1_203_53063_20130407_210935_inLine +BABEL_OP1_203_53063_20130407_210935_outLine +BABEL_OP1_203_53665_20130727_150857_inLine +BABEL_OP1_203_53665_20130727_150857_outLine +BABEL_OP1_203_53842_20130322_165451_inLine +BABEL_OP1_203_53842_20130322_165451_outLine +BABEL_OP1_203_54046_20130804_193101_inLine +BABEL_OP1_203_54046_20130804_193101_outLine +BABEL_OP1_203_54074_20130319_150208_inLine +BABEL_OP1_203_54074_20130319_150208_outLine +BABEL_OP1_203_54104_20130309_204103_inLine +BABEL_OP1_203_54104_20130309_204103_outLine +BABEL_OP1_203_54390_20130313_161947_inLine +BABEL_OP1_203_54390_20130313_161947_outLine +BABEL_OP1_203_54477_20130408_133628_inLine +BABEL_OP1_203_54477_20130408_133628_outLine +BABEL_OP1_203_54530_20130424_194302_inLine +BABEL_OP1_203_54530_20130424_194302_outLine +BABEL_OP1_203_54697_20130405_153323_inLine +BABEL_OP1_203_54697_20130405_153323_outLine +BABEL_OP1_203_54744_20130311_153522_inLine +BABEL_OP1_203_54744_20130311_153522_outLine +BABEL_OP1_203_54827_20130803_201026_inLine +BABEL_OP1_203_54827_20130803_201026_outLine +BABEL_OP1_203_54953_20130319_135125_inLine +BABEL_OP1_203_54953_20130319_135125_outLine +BABEL_OP1_203_55259_20130323_181918_inLine +BABEL_OP1_203_55259_20130323_181918_outLine +BABEL_OP1_203_55818_20130309_163433_inLine +BABEL_OP1_203_55818_20130309_163433_outLine +BABEL_OP1_203_55950_20130728_141857_inLine +BABEL_OP1_203_55950_20130728_141857_outLine +BABEL_OP1_203_56076_20130728_212423_inLine +BABEL_OP1_203_56076_20130728_212423_outLine +BABEL_OP1_203_56198_20130314_163346_inLine +BABEL_OP1_203_56198_20130314_163346_outLine +BABEL_OP1_203_56198_20130314_164412_inLine +BABEL_OP1_203_56198_20130314_164412_outLine +BABEL_OP1_203_56213_20130407_184955_inLine +BABEL_OP1_203_56213_20130407_184955_outLine +BABEL_OP1_203_56306_20130408_202539_inLine +BABEL_OP1_203_56306_20130408_202539_outLine +BABEL_OP1_203_56307_20130401_212823_inLine +BABEL_OP1_203_56307_20130401_212823_outLine +BABEL_OP1_203_56465_20130503_211423_inLine +BABEL_OP1_203_56465_20130503_211423_outLine +BABEL_OP1_203_56677_20130407_020513_inLine +BABEL_OP1_203_56677_20130407_020513_outLine +BABEL_OP1_203_56826_20130403_155349_inLine +BABEL_OP1_203_56826_20130403_155349_outLine +BABEL_OP1_203_57093_20130323_155842_inLine +BABEL_OP1_203_57093_20130323_155842_outLine +BABEL_OP1_203_57116_20130306_200913_inLine +BABEL_OP1_203_57116_20130306_200913_outLine +BABEL_OP1_203_57529_20130404_225031_inLine +BABEL_OP1_203_57529_20130404_225031_outLine +BABEL_OP1_203_57678_20130319_173142_inLine +BABEL_OP1_203_57678_20130319_173142_outLine +BABEL_OP1_203_58107_20130331_163124_inLine +BABEL_OP1_203_58107_20130331_163124_outLine +BABEL_OP1_203_58107_20130331_164049_inLine +BABEL_OP1_203_58107_20130331_164049_outLine +BABEL_OP1_203_58145_20130404_174142_inLine +BABEL_OP1_203_58145_20130404_174142_outLine +BABEL_OP1_203_58489_20130406_171644_inLine +BABEL_OP1_203_58489_20130406_171644_outLine +BABEL_OP1_203_58821_20130330_171943_inLine +BABEL_OP1_203_58821_20130330_171943_outLine +BABEL_OP1_203_58850_20130320_210438_outLine +BABEL_OP1_203_58853_20130804_133710_inLine +BABEL_OP1_203_58853_20130804_133710_outLine +BABEL_OP1_203_58915_20130508_170813_inLine +BABEL_OP1_203_58915_20130508_170813_outLine +BABEL_OP1_203_58926_20130314_221922_inLine +BABEL_OP1_203_58926_20130314_221922_outLine +BABEL_OP1_203_59078_20130328_222520_inLine +BABEL_OP1_203_59078_20130328_222520_outLine +BABEL_OP1_203_59307_20130503_211805_inLine +BABEL_OP1_203_59307_20130503_211805_outLine +BABEL_OP1_203_59720_20130323_160840_inLine +BABEL_OP1_203_59720_20130323_160840_outLine +BABEL_OP1_203_59747_20130307_185538_inLine +BABEL_OP1_203_59747_20130307_185538_outLine +BABEL_OP1_203_59864_20130719_183902_inLine +BABEL_OP1_203_59864_20130719_183902_outLine +BABEL_OP1_203_59928_20130314_205249_inLine +BABEL_OP1_203_59928_20130314_205249_outLine +BABEL_OP1_203_60026_20130311_192442_inLine +BABEL_OP1_203_60026_20130311_192442_outLine +BABEL_OP1_203_60352_20130724_151721_inLine +BABEL_OP1_203_60352_20130724_151721_outLine +BABEL_OP1_203_60397_20130814_170113_inLine +BABEL_OP1_203_60397_20130814_170113_outLine +BABEL_OP1_203_60436_20130726_213808_inLine +BABEL_OP1_203_60436_20130726_213808_outLine +BABEL_OP1_203_60830_20130323_152836_inLine +BABEL_OP1_203_60830_20130323_152836_outLine +BABEL_OP1_203_61011_20130307_163948_inLine +BABEL_OP1_203_61011_20130307_163948_outLine +BABEL_OP1_203_61225_20130310_001509_inLine +BABEL_OP1_203_61225_20130310_001509_outLine +BABEL_OP1_203_61225_20130310_002607_inLine +BABEL_OP1_203_61225_20130310_002607_outLine +BABEL_OP1_203_61435_20130421_175121_inLine +BABEL_OP1_203_61435_20130421_175121_outLine +BABEL_OP1_203_61440_20130513_143551_inLine +BABEL_OP1_203_61440_20130513_143551_outLine +BABEL_OP1_203_61888_20130410_154115_inLine +BABEL_OP1_203_61888_20130410_154115_outLine +BABEL_OP1_203_62014_20130503_150317_inLine +BABEL_OP1_203_62014_20130503_150317_outLine +BABEL_OP1_203_62200_20130320_155842_inLine +BABEL_OP1_203_62200_20130320_155842_outLine +BABEL_OP1_203_62360_20130729_185133_inLine +BABEL_OP1_203_62360_20130729_185133_outLine +BABEL_OP1_203_62362_20130513_145108_inLine +BABEL_OP1_203_62362_20130513_145108_outLine +BABEL_OP1_203_62714_20130430_183624_inLine +BABEL_OP1_203_62714_20130430_183624_outLine +BABEL_OP1_203_62800_20130307_204137_inLine +BABEL_OP1_203_62800_20130307_204137_outLine +BABEL_OP1_203_62976_20130512_201748_inLine +BABEL_OP1_203_62976_20130512_201748_outLine +BABEL_OP1_203_63094_20130512_165833_inLine +BABEL_OP1_203_63094_20130512_165833_outLine +BABEL_OP1_203_63730_20130507_163540_inLine +BABEL_OP1_203_63730_20130507_163540_outLine +BABEL_OP1_203_64014_20130411_192910_inLine +BABEL_OP1_203_64014_20130411_192910_outLine +BABEL_OP1_203_64065_20130326_201717_inLine +BABEL_OP1_203_64065_20130326_201717_outLine +BABEL_OP1_203_64065_20130326_202638_inLine +BABEL_OP1_203_64065_20130326_202638_outLine +BABEL_OP1_203_65723_20130313_205922_inLine +BABEL_OP1_203_65723_20130313_205922_outLine +BABEL_OP1_203_65913_20130726_205358_inLine +BABEL_OP1_203_65913_20130726_205358_outLine +BABEL_OP1_203_66001_20130309_233448_inLine +BABEL_OP1_203_66001_20130309_233448_outLine +BABEL_OP1_203_66045_20130323_203735_inLine +BABEL_OP1_203_66045_20130323_203735_outLine +BABEL_OP1_203_66822_20130324_142935_inLine +BABEL_OP1_203_66822_20130324_142935_outLine +BABEL_OP1_203_66916_20130308_142310_inLine +BABEL_OP1_203_66916_20130308_142310_outLine +BABEL_OP1_203_66971_20130725_151439_inLine +BABEL_OP1_203_66971_20130725_151439_outLine +BABEL_OP1_203_67066_20130509_215551_inLine +BABEL_OP1_203_67066_20130509_215551_outLine +BABEL_OP1_203_68289_20130409_222355_inLine +BABEL_OP1_203_68289_20130409_222355_outLine +BABEL_OP1_203_68385_20130221_213027_inLine +BABEL_OP1_203_68385_20130221_213027_outLine +BABEL_OP1_203_69096_20130714_153203_inLine +BABEL_OP1_203_69096_20130714_153203_outLine +BABEL_OP1_203_69474_20130409_153705_inLine +BABEL_OP1_203_69474_20130409_153705_outLine +BABEL_OP1_203_69885_20130729_175242_inLine +BABEL_OP1_203_69885_20130729_175242_outLine +BABEL_OP1_203_69964_20130801_183705_inLine +BABEL_OP1_203_69964_20130801_183705_outLine +BABEL_OP1_203_70221_20130502_153055_inLine +BABEL_OP1_203_70221_20130502_153055_outLine +BABEL_OP1_203_70386_20130315_162835_inLine +BABEL_OP1_203_70386_20130315_162835_outLine +BABEL_OP1_203_70639_20130805_192027_inLine +BABEL_OP1_203_70639_20130805_192027_outLine +BABEL_OP1_203_70716_20130731_182939_inLine +BABEL_OP1_203_70716_20130731_182939_outLine +BABEL_OP1_203_71067_20130503_201919_inLine +BABEL_OP1_203_71067_20130503_201919_outLine +BABEL_OP1_203_71566_20130406_212124_inLine +BABEL_OP1_203_71566_20130406_212124_outLine +BABEL_OP1_203_72324_20130721_195442_inLine +BABEL_OP1_203_72324_20130721_195442_outLine +BABEL_OP1_203_72587_20130331_220349_inLine +BABEL_OP1_203_72587_20130331_220349_outLine +BABEL_OP1_203_73042_20130314_184552_inLine +BABEL_OP1_203_73042_20130314_184552_outLine +BABEL_OP1_203_73301_20130321_151848_inLine +BABEL_OP1_203_73301_20130321_151848_outLine +BABEL_OP1_203_73591_20130222_132516_inLine +BABEL_OP1_203_73591_20130222_132516_outLine +BABEL_OP1_203_74667_20130322_155857_inLine +BABEL_OP1_203_74667_20130322_155857_outLine +BABEL_OP1_203_74886_20130309_200304_inLine +BABEL_OP1_203_74886_20130309_200304_outLine +BABEL_OP1_203_75064_20130322_142556_inLine +BABEL_OP1_203_75064_20130322_142556_outLine +BABEL_OP1_203_75342_20130404_193602_inLine +BABEL_OP1_203_75342_20130404_193602_outLine +BABEL_OP1_203_75869_20130721_161850_inLine +BABEL_OP1_203_75869_20130721_161850_outLine +BABEL_OP1_203_76444_20130406_153810_inLine +BABEL_OP1_203_76444_20130406_153810_outLine +BABEL_OP1_203_76482_20130508_220808_inLine +BABEL_OP1_203_76482_20130508_220808_outLine +BABEL_OP1_203_77242_20130508_191854_inLine +BABEL_OP1_203_77242_20130508_191854_outLine +BABEL_OP1_203_78749_20130426_182140_inLine +BABEL_OP1_203_78749_20130426_182140_outLine +BABEL_OP1_203_79131_20130727_202021_inLine +BABEL_OP1_203_79131_20130727_202021_outLine +BABEL_OP1_203_79660_20130512_173422_inLine +BABEL_OP1_203_79660_20130512_173422_outLine +BABEL_OP1_203_80134_20130814_145021_inLine +BABEL_OP1_203_80134_20130814_145021_outLine +BABEL_OP1_203_81287_20130403_225530_inLine +BABEL_OP1_203_81287_20130403_225530_outLine +BABEL_OP1_203_82224_20130718_134750_inLine +BABEL_OP1_203_82224_20130718_134750_outLine +BABEL_OP1_203_83813_20130812_133548_inLine +BABEL_OP1_203_83813_20130812_133548_outLine +BABEL_OP1_203_84339_20130802_181641_inLine +BABEL_OP1_203_84339_20130802_181641_outLine +BABEL_OP1_203_84469_20130421_132749_inLine +BABEL_OP1_203_84469_20130421_132749_outLine +BABEL_OP1_203_84611_20130312_152852_inLine +BABEL_OP1_203_84611_20130312_152852_outLine +BABEL_OP1_203_85325_20130802_212902_inLine +BABEL_OP1_203_85325_20130802_212902_outLine +BABEL_OP1_203_86597_20130508_182316_inLine +BABEL_OP1_203_86597_20130508_182316_outLine +BABEL_OP1_203_86628_20130512_215243_inLine +BABEL_OP1_203_86628_20130512_215243_outLine +BABEL_OP1_203_86830_20130423_194221_inLine +BABEL_OP1_203_86830_20130423_194221_outLine +BABEL_OP1_203_86878_20130804_174949_inLine +BABEL_OP1_203_86878_20130804_174949_outLine +BABEL_OP1_203_86891_20130427_122020_inLine +BABEL_OP1_203_86891_20130427_122020_outLine +BABEL_OP1_203_87305_20130512_150816_inLine +BABEL_OP1_203_87305_20130512_150816_outLine +BABEL_OP1_203_89358_20130327_183946_inLine +BABEL_OP1_203_89358_20130327_183946_outLine +BABEL_OP1_203_89943_20130319_151705_inLine +BABEL_OP1_203_89943_20130319_151705_outLine +BABEL_OP1_203_90709_20130311_171156_inLine +BABEL_OP1_203_90709_20130311_171156_outLine +BABEL_OP1_203_91760_20130728_190550_inLine +BABEL_OP1_203_91760_20130728_190550_outLine +BABEL_OP1_203_92077_20130725_140650_inLine +BABEL_OP1_203_92077_20130725_140650_outLine +BABEL_OP1_203_93411_20130324_150550_inLine +BABEL_OP1_203_93411_20130324_150550_outLine +BABEL_OP1_203_93490_20130804_201521_inLine +BABEL_OP1_203_93490_20130804_201521_outLine +BABEL_OP1_203_93964_20130327_171307_inLine +BABEL_OP1_203_93964_20130327_171307_outLine +BABEL_OP1_203_94442_20130727_182743_inLine +BABEL_OP1_203_94442_20130727_182743_outLine +BABEL_OP1_203_94449_20130801_010717_inLine +BABEL_OP1_203_94449_20130801_010717_outLine +BABEL_OP1_203_95338_20130727_211019_inLine +BABEL_OP1_203_95338_20130727_211019_outLine +BABEL_OP1_203_96059_20130731_211048_inLine +BABEL_OP1_203_96059_20130731_211048_outLine +BABEL_OP1_203_96376_20130731_143340_outLine +BABEL_OP1_203_96690_20130320_183730_inLine +BABEL_OP1_203_96690_20130320_183730_outLine +BABEL_OP1_203_96690_20130320_185039_inLine +BABEL_OP1_203_96690_20130320_185039_outLine +BABEL_OP1_203_96842_20130726_140248_inLine +BABEL_OP1_203_96842_20130726_140248_outLine +BABEL_OP1_203_97220_20130508_165310_inLine +BABEL_OP1_203_97220_20130508_165310_outLine +BABEL_OP1_203_97836_20130430_195102_inLine +BABEL_OP1_203_97836_20130430_195102_outLine +BABEL_OP1_203_98192_20130511_210223_inLine +BABEL_OP1_203_98192_20130511_210223_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.list b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.list new file mode 100644 index 00000000000..bc4c7166c32 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.list @@ -0,0 +1,127 @@ +BABEL_OP1_203_10974_20130425_162609_inLine +BABEL_OP1_203_10974_20130425_162609_outLine +BABEL_OP1_203_14141_20130410_212719_inLine +BABEL_OP1_203_14141_20130410_212719_outLine +BABEL_OP1_203_14237_20130313_222650_inLine +BABEL_OP1_203_14237_20130313_222650_outLine +BABEL_OP1_203_15163_20130319_154026_inLine +BABEL_OP1_203_15163_20130319_154026_outLine +BABEL_OP1_203_15324_20130512_224242_inLine +BABEL_OP1_203_15324_20130512_224242_outLine +BABEL_OP1_203_15324_20130512_225202_inLine +BABEL_OP1_203_15324_20130512_225202_outLine +BABEL_OP1_203_15638_20130409_150143_inLine +BABEL_OP1_203_15638_20130409_150143_outLine +BABEL_OP1_203_17115_20130425_173844_inLine +BABEL_OP1_203_17115_20130425_173844_outLine +BABEL_OP1_203_17115_20130425_175816_inLine +BABEL_OP1_203_17115_20130425_175816_outLine +BABEL_OP1_203_17751_20130512_155328_inLine +BABEL_OP1_203_17751_20130512_155328_outLine +BABEL_OP1_203_17914_20130503_215602_inLine +BABEL_OP1_203_17914_20130503_215602_outLine +BABEL_OP1_203_17923_20130314_203130_inLine +BABEL_OP1_203_17923_20130314_203130_outLine +BABEL_OP1_203_20682_20130406_194906_inLine +BABEL_OP1_203_20682_20130406_194906_outLine +BABEL_OP1_203_22624_20130403_190935_inLine +BABEL_OP1_203_22624_20130403_190935_outLine +BABEL_OP1_203_24270_20130329_153331_inLine +BABEL_OP1_203_24270_20130329_153331_outLine +BABEL_OP1_203_24589_20130323_190409_inLine +BABEL_OP1_203_24589_20130323_190409_outLine +BABEL_OP1_203_24589_20130323_192722_inLine +BABEL_OP1_203_24589_20130323_192722_outLine +BABEL_OP1_203_25220_20130502_183943_inLine +BABEL_OP1_203_25220_20130502_183943_outLine +BABEL_OP1_203_27478_20130501_195141_inLine +BABEL_OP1_203_27478_20130501_195141_outLine +BABEL_OP1_203_27478_20130501_200641_inLine +BABEL_OP1_203_27478_20130501_200641_outLine +BABEL_OP1_203_28190_20130730_195836_inLine +BABEL_OP1_203_28190_20130730_195836_outLine +BABEL_OP1_203_28945_20130315_171902_inLine +BABEL_OP1_203_28945_20130315_171902_outLine +BABEL_OP1_203_32914_20130411_174738_inLine +BABEL_OP1_203_32914_20130411_174738_outLine +BABEL_OP1_203_33175_20130307_204134_inLine +BABEL_OP1_203_33175_20130307_204134_outLine +BABEL_OP1_203_40713_20130321_155930_inLine +BABEL_OP1_203_40713_20130321_155930_outLine +BABEL_OP1_203_41097_20130427_224950_inLine +BABEL_OP1_203_41097_20130427_224950_outLine +BABEL_OP1_203_41100_20130313_161755_inLine +BABEL_OP1_203_41100_20130313_161755_outLine +BABEL_OP1_203_41680_20130304_134640_inLine +BABEL_OP1_203_41680_20130304_134640_outLine +BABEL_OP1_203_42126_20130805_213859_inLine +BABEL_OP1_203_42126_20130805_213859_outLine +BABEL_OP1_203_42243_20130313_170336_inLine +BABEL_OP1_203_42243_20130313_170336_outLine +BABEL_OP1_203_42834_20130404_194840_inLine +BABEL_OP1_203_42834_20130404_194840_outLine +BABEL_OP1_203_42883_20130729_171646_inLine +BABEL_OP1_203_42883_20130729_171646_outLine +BABEL_OP1_203_44477_20130331_190402_inLine +BABEL_OP1_203_44477_20130331_190402_outLine +BABEL_OP1_203_45642_20130313_202110_inLine +BABEL_OP1_203_45642_20130313_202110_outLine +BABEL_OP1_203_46625_20130304_201959_inLine +BABEL_OP1_203_46625_20130304_201959_outLine +BABEL_OP1_203_46763_20130426_160841_inLine +BABEL_OP1_203_46763_20130426_160841_outLine +BABEL_OP1_203_47270_20130410_160110_inLine +BABEL_OP1_203_47270_20130410_160110_outLine +BABEL_OP1_203_49637_20130313_134853_inLine +BABEL_OP1_203_49637_20130313_134853_outLine +BABEL_OP1_203_49902_20130323_175920_inLine +BABEL_OP1_203_49902_20130323_175920_outLine +BABEL_OP1_203_50726_20130307_135236_inLine +BABEL_OP1_203_50726_20130307_135236_outLine +BABEL_OP1_203_51414_20130729_152916_inLine +BABEL_OP1_203_51414_20130729_152916_outLine +BABEL_OP1_203_52447_20130513_224209_inLine +BABEL_OP1_203_52447_20130513_224209_outLine +BABEL_OP1_203_52854_20130221_192229_inLine +BABEL_OP1_203_52854_20130221_192229_outLine +BABEL_OP1_203_54046_20130804_193101_inLine +BABEL_OP1_203_54046_20130804_193101_outLine +BABEL_OP1_203_54744_20130311_153522_inLine +BABEL_OP1_203_54744_20130311_153522_outLine +BABEL_OP1_203_55818_20130309_163433_inLine +BABEL_OP1_203_55818_20130309_163433_outLine +BABEL_OP1_203_56213_20130407_184955_inLine +BABEL_OP1_203_56213_20130407_184955_outLine +BABEL_OP1_203_56465_20130503_211423_inLine +BABEL_OP1_203_56465_20130503_211423_outLine +BABEL_OP1_203_56677_20130407_020513_inLine +BABEL_OP1_203_56677_20130407_020513_outLine +BABEL_OP1_203_58850_20130320_210438_outLine +BABEL_OP1_203_58853_20130804_133710_inLine +BABEL_OP1_203_58853_20130804_133710_outLine +BABEL_OP1_203_61011_20130307_163948_inLine +BABEL_OP1_203_61011_20130307_163948_outLine +BABEL_OP1_203_62362_20130513_145108_inLine +BABEL_OP1_203_62362_20130513_145108_outLine +BABEL_OP1_203_63094_20130512_165833_inLine +BABEL_OP1_203_63094_20130512_165833_outLine +BABEL_OP1_203_64014_20130411_192910_inLine +BABEL_OP1_203_64014_20130411_192910_outLine +BABEL_OP1_203_65723_20130313_205922_inLine +BABEL_OP1_203_65723_20130313_205922_outLine +BABEL_OP1_203_69885_20130729_175242_inLine +BABEL_OP1_203_69885_20130729_175242_outLine +BABEL_OP1_203_70639_20130805_192027_inLine +BABEL_OP1_203_70639_20130805_192027_outLine +BABEL_OP1_203_73042_20130314_184552_inLine +BABEL_OP1_203_73042_20130314_184552_outLine +BABEL_OP1_203_73301_20130321_151848_inLine +BABEL_OP1_203_73301_20130321_151848_outLine +BABEL_OP1_203_78749_20130426_182140_inLine +BABEL_OP1_203_78749_20130426_182140_outLine +BABEL_OP1_203_83813_20130812_133548_inLine +BABEL_OP1_203_83813_20130812_133548_outLine +BABEL_OP1_203_86830_20130423_194221_inLine +BABEL_OP1_203_86830_20130423_194221_outLine +BABEL_OP1_203_96842_20130726_140248_inLine +BABEL_OP1_203_96842_20130726_140248_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..500c68fda58 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.untranscribed.list @@ -0,0 +1,654 @@ +BABEL_OP1_203_10036_20130318_191401_inLine +BABEL_OP1_203_10036_20130318_191401_outLine +BABEL_OP1_203_10411_20130511_174439_inLine +BABEL_OP1_203_10411_20130511_174439_outLine +BABEL_OP1_203_10482_20130403_160013_inLine +BABEL_OP1_203_10482_20130403_160013_outLine +BABEL_OP1_203_10524_20130425_183925_inLine +BABEL_OP1_203_10524_20130425_183925_outLine +BABEL_OP1_203_10524_20130425_185048_inLine +BABEL_OP1_203_10524_20130425_185048_outLine +BABEL_OP1_203_10901_20130321_180232_inLine +BABEL_OP1_203_10901_20130321_180232_outLine +BABEL_OP1_203_10938_20130319_190809_inLine +BABEL_OP1_203_10938_20130319_190809_outLine +BABEL_OP1_203_10966_20130319_135742_inLine +BABEL_OP1_203_10966_20130319_135742_outLine +BABEL_OP1_203_11352_20130426_170450_inLine +BABEL_OP1_203_11352_20130426_170450_outLine +BABEL_OP1_203_11486_20130428_131348_inLine +BABEL_OP1_203_11486_20130428_131348_outLine +BABEL_OP1_203_11663_20130402_202025_inLine +BABEL_OP1_203_11663_20130402_202025_outLine +BABEL_OP1_203_11673_20130306_201125_inLine +BABEL_OP1_203_11673_20130306_201125_outLine +BABEL_OP1_203_11797_20130309_195420_inLine +BABEL_OP1_203_11797_20130309_195420_outLine +BABEL_OP1_203_11859_20130511_201411_inLine +BABEL_OP1_203_11859_20130511_201411_outLine +BABEL_OP1_203_12036_20130312_182225_inLine +BABEL_OP1_203_12036_20130312_182225_outLine +BABEL_OP1_203_12220_20130321_160841_inLine +BABEL_OP1_203_12220_20130321_160841_outLine +BABEL_OP1_203_12606_20130726_174724_inLine +BABEL_OP1_203_12606_20130726_174724_outLine +BABEL_OP1_203_12609_20130727_133133_outLine +BABEL_OP1_203_12767_20130313_214914_inLine +BABEL_OP1_203_12767_20130313_214914_outLine +BABEL_OP1_203_12851_20130304_181335_inLine +BABEL_OP1_203_12851_20130304_181335_outLine +BABEL_OP1_203_12851_20130304_182835_inLine +BABEL_OP1_203_12851_20130304_182835_outLine +BABEL_OP1_203_12851_20130304_185138_inLine +BABEL_OP1_203_12851_20130304_185138_outLine +BABEL_OP1_203_13126_20130421_175306_inLine +BABEL_OP1_203_13126_20130421_175306_outLine +BABEL_OP1_203_13126_20130421_180154_inLine +BABEL_OP1_203_13126_20130421_180154_outLine +BABEL_OP1_203_13324_20130313_185155_inLine +BABEL_OP1_203_13324_20130313_185155_outLine +BABEL_OP1_203_13483_20130409_231107_inLine +BABEL_OP1_203_13483_20130409_231107_outLine +BABEL_OP1_203_13490_20130322_143131_inLine +BABEL_OP1_203_13490_20130322_143131_outLine +BABEL_OP1_203_13664_20130304_155051_inLine +BABEL_OP1_203_13664_20130304_155051_outLine +BABEL_OP1_203_13709_20130410_222037_inLine +BABEL_OP1_203_13709_20130410_222037_outLine +BABEL_OP1_203_13744_20130307_215445_inLine +BABEL_OP1_203_13744_20130307_215445_outLine +BABEL_OP1_203_13792_20130310_142445_inLine +BABEL_OP1_203_13792_20130310_142445_outLine +BABEL_OP1_203_14137_20130314_181335_inLine +BABEL_OP1_203_14137_20130314_181335_outLine +BABEL_OP1_203_14179_20130402_211621_inLine +BABEL_OP1_203_14179_20130402_211621_outLine +BABEL_OP1_203_14229_20130324_162827_inLine +BABEL_OP1_203_14229_20130324_162827_outLine +BABEL_OP1_203_14560_20130425_140155_inLine +BABEL_OP1_203_14560_20130425_140155_outLine +BABEL_OP1_203_14719_20130406_191558_inLine +BABEL_OP1_203_14719_20130406_191558_outLine +BABEL_OP1_203_14725_20130309_185639_inLine +BABEL_OP1_203_14725_20130309_185639_outLine +BABEL_OP1_203_14729_20130411_214726_inLine +BABEL_OP1_203_14729_20130411_214726_outLine +BABEL_OP1_203_14814_20130314_133131_inLine +BABEL_OP1_203_14814_20130314_133131_outLine +BABEL_OP1_203_14899_20130311_184638_inLine +BABEL_OP1_203_14899_20130311_184638_outLine +BABEL_OP1_203_14929_20130324_184056_inLine +BABEL_OP1_203_14929_20130324_184056_outLine +BABEL_OP1_203_15024_20130322_152846_inLine +BABEL_OP1_203_15024_20130322_152846_outLine +BABEL_OP1_203_15227_20130513_222256_inLine +BABEL_OP1_203_15227_20130513_222256_outLine +BABEL_OP1_203_15322_20130511_152438_inLine +BABEL_OP1_203_15322_20130511_152438_outLine +BABEL_OP1_203_15535_20130329_143236_inLine +BABEL_OP1_203_15535_20130329_143236_outLine +BABEL_OP1_203_15730_20130307_201711_inLine +BABEL_OP1_203_15730_20130307_201711_outLine +BABEL_OP1_203_15749_20130407_175145_inLine +BABEL_OP1_203_15749_20130407_175145_outLine +BABEL_OP1_203_15902_20130309_193940_inLine +BABEL_OP1_203_15902_20130309_193940_outLine +BABEL_OP1_203_16149_20130309_171014_inLine +BABEL_OP1_203_16149_20130309_171014_outLine +BABEL_OP1_203_16924_20130720_175321_inLine +BABEL_OP1_203_16924_20130720_175321_outLine +BABEL_OP1_203_17032_20130402_175428_inLine +BABEL_OP1_203_17032_20130402_175428_outLine +BABEL_OP1_203_17097_20130430_173440_inLine +BABEL_OP1_203_17097_20130430_173440_outLine +BABEL_OP1_203_17472_20130408_215034_inLine +BABEL_OP1_203_17472_20130408_215034_outLine +BABEL_OP1_203_17567_20130425_145936_inLine +BABEL_OP1_203_17567_20130425_145936_outLine +BABEL_OP1_203_18118_20130730_191442_inLine +BABEL_OP1_203_18118_20130730_191442_outLine +BABEL_OP1_203_18380_20130327_214619_inLine +BABEL_OP1_203_18380_20130327_214619_outLine +BABEL_OP1_203_18566_20130503_153904_inLine +BABEL_OP1_203_18566_20130503_153904_outLine +BABEL_OP1_203_18939_20130311_144740_inLine +BABEL_OP1_203_18939_20130311_144740_outLine +BABEL_OP1_203_19101_20130423_142324_inLine +BABEL_OP1_203_19101_20130423_142324_outLine +BABEL_OP1_203_19134_20130328_220635_inLine +BABEL_OP1_203_19134_20130328_220635_outLine +BABEL_OP1_203_19589_20130727_143145_inLine +BABEL_OP1_203_19589_20130727_143145_outLine +BABEL_OP1_203_19703_20130318_160958_inLine +BABEL_OP1_203_19703_20130318_160958_outLine +BABEL_OP1_203_19703_20130318_162314_inLine +BABEL_OP1_203_19703_20130318_162314_outLine +BABEL_OP1_203_19773_20130407_183531_inLine +BABEL_OP1_203_19773_20130407_183531_outLine +BABEL_OP1_203_19782_20130404_170141_inLine +BABEL_OP1_203_19782_20130404_170141_outLine +BABEL_OP1_203_20133_20130304_160351_inLine +BABEL_OP1_203_20133_20130304_160351_outLine +BABEL_OP1_203_20330_20130410_161539_inLine +BABEL_OP1_203_20330_20130410_161539_outLine +BABEL_OP1_203_20768_20130407_190152_inLine +BABEL_OP1_203_20768_20130407_190152_outLine +BABEL_OP1_203_20985_20130330_210730_inLine +BABEL_OP1_203_20985_20130330_210730_outLine +BABEL_OP1_203_21004_20130410_181101_inLine +BABEL_OP1_203_21004_20130410_181101_outLine +BABEL_OP1_203_21004_20130410_182740_inLine +BABEL_OP1_203_21004_20130410_182740_outLine +BABEL_OP1_203_21109_20130406_161601_inLine +BABEL_OP1_203_21109_20130406_161601_outLine +BABEL_OP1_203_21206_20130312_164516_inLine +BABEL_OP1_203_21206_20130312_164516_outLine +BABEL_OP1_203_21315_20130501_151005_inLine +BABEL_OP1_203_21315_20130501_151005_outLine +BABEL_OP1_203_21327_20130405_203336_inLine +BABEL_OP1_203_21327_20130405_203336_outLine +BABEL_OP1_203_21435_20130423_181043_inLine +BABEL_OP1_203_21435_20130423_181043_outLine +BABEL_OP1_203_22280_20130329_161951_inLine +BABEL_OP1_203_22280_20130329_161951_outLine +BABEL_OP1_203_22321_20130309_191222_inLine +BABEL_OP1_203_22321_20130309_191222_outLine +BABEL_OP1_203_22446_20130309_134600_inLine +BABEL_OP1_203_22446_20130309_134600_outLine +BABEL_OP1_203_22494_20130402_171234_inLine +BABEL_OP1_203_22494_20130402_171234_outLine +BABEL_OP1_203_22612_20130406_220338_inLine +BABEL_OP1_203_22612_20130406_220338_outLine +BABEL_OP1_203_22918_20130410_190723_inLine +BABEL_OP1_203_22918_20130410_190723_outLine +BABEL_OP1_203_23006_20130319_211412_inLine +BABEL_OP1_203_23006_20130319_211412_outLine +BABEL_OP1_203_23046_20130322_165811_inLine +BABEL_OP1_203_23046_20130322_165811_outLine +BABEL_OP1_203_23092_20130406_014425_inLine +BABEL_OP1_203_23092_20130406_014425_outLine +BABEL_OP1_203_23092_20130406_015338_inLine +BABEL_OP1_203_23092_20130406_015338_outLine +BABEL_OP1_203_23153_20130320_194433_inLine +BABEL_OP1_203_23153_20130320_194433_outLine +BABEL_OP1_203_23239_20130331_171214_inLine +BABEL_OP1_203_23239_20130331_171214_outLine +BABEL_OP1_203_23505_20130309_204825_inLine +BABEL_OP1_203_23505_20130309_204825_outLine +BABEL_OP1_203_23980_20130321_193946_inLine +BABEL_OP1_203_23980_20130321_193946_outLine +BABEL_OP1_203_24017_20130424_174037_inLine +BABEL_OP1_203_24017_20130424_174037_outLine +BABEL_OP1_203_24253_20130423_175626_inLine +BABEL_OP1_203_24253_20130423_175626_outLine +BABEL_OP1_203_24290_20130423_133315_inLine +BABEL_OP1_203_24290_20130423_133315_outLine +BABEL_OP1_203_24323_20130320_160949_inLine +BABEL_OP1_203_24323_20130320_160949_outLine +BABEL_OP1_203_24470_20130329_205656_inLine +BABEL_OP1_203_24470_20130329_205656_outLine +BABEL_OP1_203_24501_20130421_141711_inLine +BABEL_OP1_203_24501_20130421_141711_outLine +BABEL_OP1_203_24569_20130405_200644_inLine +BABEL_OP1_203_24569_20130405_200644_outLine +BABEL_OP1_203_24586_20130506_203931_inLine +BABEL_OP1_203_24586_20130506_203931_outLine +BABEL_OP1_203_24590_20130321_221146_inLine +BABEL_OP1_203_24590_20130321_221146_outLine +BABEL_OP1_203_24679_20130307_145644_inLine +BABEL_OP1_203_24679_20130307_145644_outLine +BABEL_OP1_203_24779_20130426_183526_inLine +BABEL_OP1_203_24779_20130426_183526_outLine +BABEL_OP1_203_24982_20130327_153429_inLine +BABEL_OP1_203_24982_20130327_153429_outLine +BABEL_OP1_203_25015_20130728_150746_inLine +BABEL_OP1_203_25015_20130728_150746_outLine +BABEL_OP1_203_25085_20130508_145922_inLine +BABEL_OP1_203_25085_20130508_145922_outLine +BABEL_OP1_203_25412_20130329_201051_inLine +BABEL_OP1_203_25412_20130329_201051_outLine +BABEL_OP1_203_25698_20130509_182226_inLine +BABEL_OP1_203_25698_20130509_182226_outLine +BABEL_OP1_203_25719_20130426_202355_inLine +BABEL_OP1_203_25719_20130426_202355_outLine +BABEL_OP1_203_25767_20130311_183243_inLine +BABEL_OP1_203_25767_20130311_183243_outLine +BABEL_OP1_203_25961_20130311_171235_inLine +BABEL_OP1_203_25961_20130311_171235_outLine +BABEL_OP1_203_26388_20130318_200305_inLine +BABEL_OP1_203_26388_20130318_200305_outLine +BABEL_OP1_203_26507_20130430_234212_inLine +BABEL_OP1_203_26507_20130430_234212_outLine +BABEL_OP1_203_26574_20130411_160556_inLine +BABEL_OP1_203_26574_20130411_160556_outLine +BABEL_OP1_203_26602_20130801_171131_inLine +BABEL_OP1_203_26602_20130801_171131_outLine +BABEL_OP1_203_26836_20130315_160512_inLine +BABEL_OP1_203_26836_20130315_160512_outLine +BABEL_OP1_203_27125_20130308_003724_inLine +BABEL_OP1_203_27125_20130308_003724_outLine +BABEL_OP1_203_27218_20130312_194932_inLine +BABEL_OP1_203_27218_20130312_194932_outLine +BABEL_OP1_203_27590_20130405_200930_inLine +BABEL_OP1_203_27590_20130405_200930_outLine +BABEL_OP1_203_27841_20130403_211143_inLine +BABEL_OP1_203_27841_20130403_211143_outLine +BABEL_OP1_203_28280_20130501_220643_inLine +BABEL_OP1_203_28280_20130501_220643_outLine +BABEL_OP1_203_28419_20130319_165427_inLine +BABEL_OP1_203_28419_20130319_165427_outLine +BABEL_OP1_203_28522_20130328_170837_inLine +BABEL_OP1_203_28522_20130328_170837_outLine +BABEL_OP1_203_28775_20130313_213707_inLine +BABEL_OP1_203_28775_20130313_213707_outLine +BABEL_OP1_203_28775_20130313_215352_inLine +BABEL_OP1_203_28775_20130313_215352_outLine +BABEL_OP1_203_29023_20130313_194148_inLine +BABEL_OP1_203_29023_20130313_194148_outLine +BABEL_OP1_203_29023_20130313_195106_inLine +BABEL_OP1_203_29023_20130313_195106_outLine +BABEL_OP1_203_29039_20130402_153541_inLine +BABEL_OP1_203_29039_20130402_153541_outLine +BABEL_OP1_203_29168_20130306_213504_inLine +BABEL_OP1_203_29168_20130306_213504_outLine +BABEL_OP1_203_29323_20130403_215525_inLine +BABEL_OP1_203_29323_20130403_215525_outLine +BABEL_OP1_203_29416_20130421_133101_inLine +BABEL_OP1_203_29416_20130421_133101_outLine +BABEL_OP1_203_29439_20130422_150608_inLine +BABEL_OP1_203_29439_20130422_150608_outLine +BABEL_OP1_203_30013_20130331_170538_inLine +BABEL_OP1_203_30013_20130331_170538_outLine +BABEL_OP1_203_30395_20130318_180120_inLine +BABEL_OP1_203_30395_20130318_180120_outLine +BABEL_OP1_203_30645_20130309_151850_inLine +BABEL_OP1_203_30645_20130309_151850_outLine +BABEL_OP1_203_31184_20130322_141512_inLine +BABEL_OP1_203_31184_20130322_141512_outLine +BABEL_OP1_203_31184_20130322_142743_inLine +BABEL_OP1_203_31184_20130322_142743_outLine +BABEL_OP1_203_31490_20130321_210518_inLine +BABEL_OP1_203_31490_20130321_210518_outLine +BABEL_OP1_203_31992_20130313_143826_inLine +BABEL_OP1_203_31992_20130313_143826_outLine +BABEL_OP1_203_32097_20130304_195431_inLine +BABEL_OP1_203_32097_20130304_195431_outLine +BABEL_OP1_203_32122_20130320_174321_inLine +BABEL_OP1_203_32122_20130320_174321_outLine +BABEL_OP1_203_32122_20130320_175419_inLine +BABEL_OP1_203_32122_20130320_175419_outLine +BABEL_OP1_203_32244_20130728_182847_inLine +BABEL_OP1_203_32244_20130728_182847_outLine +BABEL_OP1_203_32998_20130329_155417_inLine +BABEL_OP1_203_32998_20130329_155417_outLine +BABEL_OP1_203_33476_20130320_140412_inLine +BABEL_OP1_203_33476_20130320_140412_outLine +BABEL_OP1_203_33672_20130312_165130_inLine +BABEL_OP1_203_33672_20130312_165130_outLine +BABEL_OP1_203_33704_20130405_220001_inLine +BABEL_OP1_203_33704_20130405_220001_outLine +BABEL_OP1_203_33840_20130803_192343_inLine +BABEL_OP1_203_33840_20130803_192343_outLine +BABEL_OP1_203_34145_20130331_145240_inLine +BABEL_OP1_203_34145_20130331_145240_outLine +BABEL_OP1_203_35139_20130313_143646_inLine +BABEL_OP1_203_35139_20130313_143646_outLine +BABEL_OP1_203_36505_20130731_191406_inLine +BABEL_OP1_203_36505_20130731_191406_outLine +BABEL_OP1_203_36594_20130421_182303_inLine +BABEL_OP1_203_36594_20130421_182303_outLine +BABEL_OP1_203_37598_20130330_000102_inLine +BABEL_OP1_203_37598_20130330_000102_outLine +BABEL_OP1_203_38979_20130409_173446_inLine +BABEL_OP1_203_38979_20130409_173446_outLine +BABEL_OP1_203_38979_20130409_174405_inLine +BABEL_OP1_203_38979_20130409_174405_outLine +BABEL_OP1_203_39006_20130506_192659_inLine +BABEL_OP1_203_39006_20130506_192659_outLine +BABEL_OP1_203_39555_20130720_183746_inLine +BABEL_OP1_203_39555_20130720_183746_outLine +BABEL_OP1_203_39848_20130320_133756_inLine +BABEL_OP1_203_39848_20130320_133756_outLine +BABEL_OP1_203_40557_20130404_005522_inLine +BABEL_OP1_203_40557_20130404_005522_outLine +BABEL_OP1_203_40565_20130331_171210_inLine +BABEL_OP1_203_40565_20130331_171210_outLine +BABEL_OP1_203_41073_20130721_172038_inLine +BABEL_OP1_203_41073_20130721_172038_outLine +BABEL_OP1_203_41174_20130318_203041_inLine +BABEL_OP1_203_41174_20130318_203041_outLine +BABEL_OP1_203_41334_20130501_232034_inLine +BABEL_OP1_203_41334_20130501_232034_outLine +BABEL_OP1_203_41442_20130404_174409_inLine +BABEL_OP1_203_41442_20130404_174409_outLine +BABEL_OP1_203_41469_20130313_185923_inLine +BABEL_OP1_203_41469_20130313_185923_outLine +BABEL_OP1_203_41609_20130309_175203_inLine +BABEL_OP1_203_41609_20130309_175203_outLine +BABEL_OP1_203_42029_20130403_184623_inLine +BABEL_OP1_203_42029_20130403_184623_outLine +BABEL_OP1_203_42299_20130508_203220_inLine +BABEL_OP1_203_42299_20130508_203220_outLine +BABEL_OP1_203_42299_20130508_204824_inLine +BABEL_OP1_203_42299_20130508_204824_outLine +BABEL_OP1_203_42309_20130428_191239_inLine +BABEL_OP1_203_42309_20130428_191239_outLine +BABEL_OP1_203_42434_20130323_160637_inLine +BABEL_OP1_203_42434_20130323_160637_outLine +BABEL_OP1_203_42848_20130513_201112_inLine +BABEL_OP1_203_42848_20130513_201112_outLine +BABEL_OP1_203_43368_20130327_215424_inLine +BABEL_OP1_203_43368_20130327_215424_outLine +BABEL_OP1_203_43388_20130327_192024_inLine +BABEL_OP1_203_43388_20130327_192024_outLine +BABEL_OP1_203_43588_20130714_163553_inLine +BABEL_OP1_203_43588_20130714_163553_outLine +BABEL_OP1_203_43784_20130314_171933_inLine +BABEL_OP1_203_43784_20130314_171933_outLine +BABEL_OP1_203_43788_20130504_173234_inLine +BABEL_OP1_203_43788_20130504_173234_outLine +BABEL_OP1_203_43920_20130405_194800_inLine +BABEL_OP1_203_43920_20130405_194800_outLine +BABEL_OP1_203_44478_20130730_170938_inLine +BABEL_OP1_203_44478_20130730_170938_outLine +BABEL_OP1_203_44619_20130313_175437_inLine +BABEL_OP1_203_44619_20130313_175437_outLine +BABEL_OP1_203_44709_20130331_183159_inLine +BABEL_OP1_203_44709_20130331_183159_outLine +BABEL_OP1_203_44961_20130311_173427_inLine +BABEL_OP1_203_44961_20130311_173427_outLine +BABEL_OP1_203_45560_20130309_173444_inLine +BABEL_OP1_203_45560_20130309_173444_outLine +BABEL_OP1_203_45851_20130801_014413_inLine +BABEL_OP1_203_45851_20130801_014413_outLine +BABEL_OP1_203_46310_20130309_211431_inLine +BABEL_OP1_203_46310_20130309_211431_outLine +BABEL_OP1_203_46550_20130313_153012_inLine +BABEL_OP1_203_46550_20130313_153012_outLine +BABEL_OP1_203_46681_20130313_203139_inLine +BABEL_OP1_203_46681_20130313_203139_outLine +BABEL_OP1_203_46688_20130314_212550_inLine +BABEL_OP1_203_46688_20130314_212550_outLine +BABEL_OP1_203_47186_20130405_120609_inLine +BABEL_OP1_203_47186_20130405_120609_outLine +BABEL_OP1_203_47487_20130321_145055_inLine +BABEL_OP1_203_47487_20130321_145055_outLine +BABEL_OP1_203_47823_20130406_151016_inLine +BABEL_OP1_203_47823_20130406_151016_outLine +BABEL_OP1_203_47866_20130723_152640_inLine +BABEL_OP1_203_47866_20130723_152640_outLine +BABEL_OP1_203_48422_20130425_175947_inLine +BABEL_OP1_203_48422_20130425_175947_outLine +BABEL_OP1_203_48610_20130309_222037_inLine +BABEL_OP1_203_48610_20130309_222037_outLine +BABEL_OP1_203_49001_20130315_160533_inLine +BABEL_OP1_203_49001_20130315_160533_outLine +BABEL_OP1_203_49216_20130307_211955_inLine +BABEL_OP1_203_49216_20130307_211955_outLine +BABEL_OP1_203_49287_20130331_155341_inLine +BABEL_OP1_203_49287_20130331_155341_outLine +BABEL_OP1_203_49437_20130405_194333_inLine +BABEL_OP1_203_49437_20130405_194333_outLine +BABEL_OP1_203_49437_20130405_195645_inLine +BABEL_OP1_203_49437_20130405_195645_outLine +BABEL_OP1_203_49630_20130408_182919_inLine +BABEL_OP1_203_49630_20130408_182919_outLine +BABEL_OP1_203_49768_20130320_164815_inLine +BABEL_OP1_203_49768_20130320_164815_outLine +BABEL_OP1_203_50090_20130726_145642_inLine +BABEL_OP1_203_50090_20130726_145642_outLine +BABEL_OP1_203_50175_20130311_181803_inLine +BABEL_OP1_203_50175_20130311_181803_outLine +BABEL_OP1_203_51530_20130803_174620_inLine +BABEL_OP1_203_51530_20130803_174620_outLine +BABEL_OP1_203_51611_20130312_195333_inLine +BABEL_OP1_203_51611_20130312_195333_outLine +BABEL_OP1_203_51701_20130508_232537_inLine +BABEL_OP1_203_51701_20130508_232537_outLine +BABEL_OP1_203_51819_20130328_150620_inLine +BABEL_OP1_203_51819_20130328_150620_outLine +BABEL_OP1_203_51955_20130314_175859_inLine +BABEL_OP1_203_51955_20130314_175859_outLine +BABEL_OP1_203_51955_20130314_180731_inLine +BABEL_OP1_203_51955_20130314_180731_outLine +BABEL_OP1_203_52246_20130319_221049_inLine +BABEL_OP1_203_52246_20130319_221049_outLine +BABEL_OP1_203_52272_20130313_140038_inLine +BABEL_OP1_203_52272_20130313_140038_outLine +BABEL_OP1_203_52404_20130409_005414_inLine +BABEL_OP1_203_52404_20130409_005414_outLine +BABEL_OP1_203_52422_20130427_140502_inLine +BABEL_OP1_203_52422_20130427_140502_outLine +BABEL_OP1_203_52490_20130309_141915_inLine +BABEL_OP1_203_52490_20130309_141915_outLine +BABEL_OP1_203_52717_20130311_173849_inLine +BABEL_OP1_203_52717_20130311_173849_outLine +BABEL_OP1_203_53063_20130407_210935_inLine +BABEL_OP1_203_53063_20130407_210935_outLine +BABEL_OP1_203_53665_20130727_150857_inLine +BABEL_OP1_203_53665_20130727_150857_outLine +BABEL_OP1_203_53842_20130322_165451_inLine +BABEL_OP1_203_53842_20130322_165451_outLine +BABEL_OP1_203_54074_20130319_150208_inLine +BABEL_OP1_203_54074_20130319_150208_outLine +BABEL_OP1_203_54104_20130309_204103_inLine +BABEL_OP1_203_54104_20130309_204103_outLine +BABEL_OP1_203_54390_20130313_161947_inLine +BABEL_OP1_203_54390_20130313_161947_outLine +BABEL_OP1_203_54477_20130408_133628_inLine +BABEL_OP1_203_54477_20130408_133628_outLine +BABEL_OP1_203_54530_20130424_194302_inLine +BABEL_OP1_203_54530_20130424_194302_outLine +BABEL_OP1_203_54697_20130405_153323_inLine +BABEL_OP1_203_54697_20130405_153323_outLine +BABEL_OP1_203_54827_20130803_201026_inLine +BABEL_OP1_203_54827_20130803_201026_outLine +BABEL_OP1_203_54953_20130319_135125_inLine +BABEL_OP1_203_54953_20130319_135125_outLine +BABEL_OP1_203_55259_20130323_181918_inLine +BABEL_OP1_203_55259_20130323_181918_outLine +BABEL_OP1_203_55950_20130728_141857_inLine +BABEL_OP1_203_55950_20130728_141857_outLine +BABEL_OP1_203_56076_20130728_212423_inLine +BABEL_OP1_203_56076_20130728_212423_outLine +BABEL_OP1_203_56198_20130314_163346_inLine +BABEL_OP1_203_56198_20130314_163346_outLine +BABEL_OP1_203_56198_20130314_164412_inLine +BABEL_OP1_203_56198_20130314_164412_outLine +BABEL_OP1_203_56306_20130408_202539_inLine +BABEL_OP1_203_56306_20130408_202539_outLine +BABEL_OP1_203_56307_20130401_212823_inLine +BABEL_OP1_203_56307_20130401_212823_outLine +BABEL_OP1_203_56826_20130403_155349_inLine +BABEL_OP1_203_56826_20130403_155349_outLine +BABEL_OP1_203_57093_20130323_155842_inLine +BABEL_OP1_203_57093_20130323_155842_outLine +BABEL_OP1_203_57116_20130306_200913_inLine +BABEL_OP1_203_57116_20130306_200913_outLine +BABEL_OP1_203_57529_20130404_225031_inLine +BABEL_OP1_203_57529_20130404_225031_outLine +BABEL_OP1_203_57678_20130319_173142_inLine +BABEL_OP1_203_57678_20130319_173142_outLine +BABEL_OP1_203_58107_20130331_163124_inLine +BABEL_OP1_203_58107_20130331_163124_outLine +BABEL_OP1_203_58107_20130331_164049_inLine +BABEL_OP1_203_58107_20130331_164049_outLine +BABEL_OP1_203_58145_20130404_174142_inLine +BABEL_OP1_203_58145_20130404_174142_outLine +BABEL_OP1_203_58489_20130406_171644_inLine +BABEL_OP1_203_58489_20130406_171644_outLine +BABEL_OP1_203_58821_20130330_171943_inLine +BABEL_OP1_203_58821_20130330_171943_outLine +BABEL_OP1_203_58915_20130508_170813_inLine +BABEL_OP1_203_58915_20130508_170813_outLine +BABEL_OP1_203_58926_20130314_221922_inLine +BABEL_OP1_203_58926_20130314_221922_outLine +BABEL_OP1_203_59078_20130328_222520_inLine +BABEL_OP1_203_59078_20130328_222520_outLine +BABEL_OP1_203_59307_20130503_211805_inLine +BABEL_OP1_203_59307_20130503_211805_outLine +BABEL_OP1_203_59720_20130323_160840_inLine +BABEL_OP1_203_59720_20130323_160840_outLine +BABEL_OP1_203_59747_20130307_185538_inLine +BABEL_OP1_203_59747_20130307_185538_outLine +BABEL_OP1_203_59864_20130719_183902_inLine +BABEL_OP1_203_59864_20130719_183902_outLine +BABEL_OP1_203_59928_20130314_205249_inLine +BABEL_OP1_203_59928_20130314_205249_outLine +BABEL_OP1_203_60026_20130311_192442_inLine +BABEL_OP1_203_60026_20130311_192442_outLine +BABEL_OP1_203_60352_20130724_151721_inLine +BABEL_OP1_203_60352_20130724_151721_outLine +BABEL_OP1_203_60397_20130814_170113_inLine +BABEL_OP1_203_60397_20130814_170113_outLine +BABEL_OP1_203_60436_20130726_213808_inLine +BABEL_OP1_203_60436_20130726_213808_outLine +BABEL_OP1_203_60830_20130323_152836_inLine +BABEL_OP1_203_60830_20130323_152836_outLine +BABEL_OP1_203_61225_20130310_001509_inLine +BABEL_OP1_203_61225_20130310_001509_outLine +BABEL_OP1_203_61225_20130310_002607_inLine +BABEL_OP1_203_61225_20130310_002607_outLine +BABEL_OP1_203_61435_20130421_175121_inLine +BABEL_OP1_203_61435_20130421_175121_outLine +BABEL_OP1_203_61440_20130513_143551_inLine +BABEL_OP1_203_61440_20130513_143551_outLine +BABEL_OP1_203_61888_20130410_154115_inLine +BABEL_OP1_203_61888_20130410_154115_outLine +BABEL_OP1_203_62014_20130503_150317_inLine +BABEL_OP1_203_62014_20130503_150317_outLine +BABEL_OP1_203_62200_20130320_155842_inLine +BABEL_OP1_203_62200_20130320_155842_outLine +BABEL_OP1_203_62360_20130729_185133_inLine +BABEL_OP1_203_62360_20130729_185133_outLine +BABEL_OP1_203_62714_20130430_183624_inLine +BABEL_OP1_203_62714_20130430_183624_outLine +BABEL_OP1_203_62800_20130307_204137_inLine +BABEL_OP1_203_62800_20130307_204137_outLine +BABEL_OP1_203_62976_20130512_201748_inLine +BABEL_OP1_203_62976_20130512_201748_outLine +BABEL_OP1_203_63730_20130507_163540_inLine +BABEL_OP1_203_63730_20130507_163540_outLine +BABEL_OP1_203_64065_20130326_201717_inLine +BABEL_OP1_203_64065_20130326_201717_outLine +BABEL_OP1_203_64065_20130326_202638_inLine +BABEL_OP1_203_64065_20130326_202638_outLine +BABEL_OP1_203_65913_20130726_205358_inLine +BABEL_OP1_203_65913_20130726_205358_outLine +BABEL_OP1_203_66001_20130309_233448_inLine +BABEL_OP1_203_66001_20130309_233448_outLine +BABEL_OP1_203_66045_20130323_203735_inLine +BABEL_OP1_203_66045_20130323_203735_outLine +BABEL_OP1_203_66822_20130324_142935_inLine +BABEL_OP1_203_66822_20130324_142935_outLine +BABEL_OP1_203_66916_20130308_142310_inLine +BABEL_OP1_203_66916_20130308_142310_outLine +BABEL_OP1_203_66971_20130725_151439_inLine +BABEL_OP1_203_66971_20130725_151439_outLine +BABEL_OP1_203_67066_20130509_215551_inLine +BABEL_OP1_203_67066_20130509_215551_outLine +BABEL_OP1_203_68289_20130409_222355_inLine +BABEL_OP1_203_68289_20130409_222355_outLine +BABEL_OP1_203_68385_20130221_213027_inLine +BABEL_OP1_203_68385_20130221_213027_outLine +BABEL_OP1_203_69096_20130714_153203_inLine +BABEL_OP1_203_69096_20130714_153203_outLine +BABEL_OP1_203_69474_20130409_153705_inLine +BABEL_OP1_203_69474_20130409_153705_outLine +BABEL_OP1_203_69964_20130801_183705_inLine +BABEL_OP1_203_69964_20130801_183705_outLine +BABEL_OP1_203_70221_20130502_153055_inLine +BABEL_OP1_203_70221_20130502_153055_outLine +BABEL_OP1_203_70386_20130315_162835_inLine +BABEL_OP1_203_70386_20130315_162835_outLine +BABEL_OP1_203_70716_20130731_182939_inLine +BABEL_OP1_203_70716_20130731_182939_outLine +BABEL_OP1_203_71067_20130503_201919_inLine +BABEL_OP1_203_71067_20130503_201919_outLine +BABEL_OP1_203_71566_20130406_212124_inLine +BABEL_OP1_203_71566_20130406_212124_outLine +BABEL_OP1_203_72324_20130721_195442_inLine +BABEL_OP1_203_72324_20130721_195442_outLine +BABEL_OP1_203_72587_20130331_220349_inLine +BABEL_OP1_203_72587_20130331_220349_outLine +BABEL_OP1_203_73591_20130222_132516_inLine +BABEL_OP1_203_73591_20130222_132516_outLine +BABEL_OP1_203_74667_20130322_155857_inLine +BABEL_OP1_203_74667_20130322_155857_outLine +BABEL_OP1_203_74886_20130309_200304_inLine +BABEL_OP1_203_74886_20130309_200304_outLine +BABEL_OP1_203_75064_20130322_142556_inLine +BABEL_OP1_203_75064_20130322_142556_outLine +BABEL_OP1_203_75342_20130404_193602_inLine +BABEL_OP1_203_75342_20130404_193602_outLine +BABEL_OP1_203_75869_20130721_161850_inLine +BABEL_OP1_203_75869_20130721_161850_outLine +BABEL_OP1_203_76444_20130406_153810_inLine +BABEL_OP1_203_76444_20130406_153810_outLine +BABEL_OP1_203_76482_20130508_220808_inLine +BABEL_OP1_203_76482_20130508_220808_outLine +BABEL_OP1_203_77242_20130508_191854_inLine +BABEL_OP1_203_77242_20130508_191854_outLine +BABEL_OP1_203_79131_20130727_202021_inLine +BABEL_OP1_203_79131_20130727_202021_outLine +BABEL_OP1_203_79660_20130512_173422_inLine +BABEL_OP1_203_79660_20130512_173422_outLine +BABEL_OP1_203_80134_20130814_145021_inLine +BABEL_OP1_203_80134_20130814_145021_outLine +BABEL_OP1_203_81287_20130403_225530_inLine +BABEL_OP1_203_81287_20130403_225530_outLine +BABEL_OP1_203_82224_20130718_134750_inLine +BABEL_OP1_203_82224_20130718_134750_outLine +BABEL_OP1_203_84339_20130802_181641_inLine +BABEL_OP1_203_84339_20130802_181641_outLine +BABEL_OP1_203_84469_20130421_132749_inLine +BABEL_OP1_203_84469_20130421_132749_outLine +BABEL_OP1_203_84611_20130312_152852_inLine +BABEL_OP1_203_84611_20130312_152852_outLine +BABEL_OP1_203_85325_20130802_212902_inLine +BABEL_OP1_203_85325_20130802_212902_outLine +BABEL_OP1_203_86597_20130508_182316_inLine +BABEL_OP1_203_86597_20130508_182316_outLine +BABEL_OP1_203_86628_20130512_215243_inLine +BABEL_OP1_203_86628_20130512_215243_outLine +BABEL_OP1_203_86878_20130804_174949_inLine +BABEL_OP1_203_86878_20130804_174949_outLine +BABEL_OP1_203_86891_20130427_122020_inLine +BABEL_OP1_203_86891_20130427_122020_outLine +BABEL_OP1_203_87305_20130512_150816_inLine +BABEL_OP1_203_87305_20130512_150816_outLine +BABEL_OP1_203_89358_20130327_183946_inLine +BABEL_OP1_203_89358_20130327_183946_outLine +BABEL_OP1_203_89943_20130319_151705_inLine +BABEL_OP1_203_89943_20130319_151705_outLine +BABEL_OP1_203_90709_20130311_171156_inLine +BABEL_OP1_203_90709_20130311_171156_outLine +BABEL_OP1_203_91760_20130728_190550_inLine +BABEL_OP1_203_91760_20130728_190550_outLine +BABEL_OP1_203_92077_20130725_140650_inLine +BABEL_OP1_203_92077_20130725_140650_outLine +BABEL_OP1_203_93411_20130324_150550_inLine +BABEL_OP1_203_93411_20130324_150550_outLine +BABEL_OP1_203_93490_20130804_201521_inLine +BABEL_OP1_203_93490_20130804_201521_outLine +BABEL_OP1_203_93964_20130327_171307_inLine +BABEL_OP1_203_93964_20130327_171307_outLine +BABEL_OP1_203_94442_20130727_182743_inLine +BABEL_OP1_203_94442_20130727_182743_outLine +BABEL_OP1_203_94449_20130801_010717_inLine +BABEL_OP1_203_94449_20130801_010717_outLine +BABEL_OP1_203_95338_20130727_211019_inLine +BABEL_OP1_203_95338_20130727_211019_outLine +BABEL_OP1_203_96059_20130731_211048_inLine +BABEL_OP1_203_96059_20130731_211048_outLine +BABEL_OP1_203_96376_20130731_143340_outLine +BABEL_OP1_203_96690_20130320_183730_inLine +BABEL_OP1_203_96690_20130320_183730_outLine +BABEL_OP1_203_96690_20130320_185039_inLine +BABEL_OP1_203_96690_20130320_185039_outLine +BABEL_OP1_203_97220_20130508_165310_inLine +BABEL_OP1_203_97220_20130508_165310_outLine +BABEL_OP1_203_97836_20130430_195102_inLine +BABEL_OP1_203_97836_20130430_195102_outLine +BABEL_OP1_203_98192_20130511_210223_inLine +BABEL_OP1_203_98192_20130511_210223_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.untranscribed.list b/egs/babel/s5d/conf/lists/203-lao/train.untranscribed.list new file mode 100644 index 00000000000..38bcbffd9e6 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.untranscribed.list @@ -0,0 +1,257 @@ +BABEL_OP1_203_16184_20130309_181723_inLine +BABEL_OP1_203_29777_20130424_230709_inLine +BABEL_OP1_203_29777_20130424_230709_outLine +BABEL_OP1_203_30253_20130406_221820_inLine +BABEL_OP1_203_30253_20130406_221820_outLine +BABEL_OP1_203_30497_20130724_152950_inLine +BABEL_OP1_203_30497_20130724_152950_outLine +BABEL_OP1_203_30497_20130724_154924_inLine +BABEL_OP1_203_30497_20130724_154924_outLine +BABEL_OP1_203_30653_20130422_190728_inLine +BABEL_OP1_203_30653_20130422_190728_outLine +BABEL_OP1_203_31182_20130407_165109_inLine +BABEL_OP1_203_31182_20130407_165109_outLine +BABEL_OP1_203_33229_20130716_174756_inLine +BABEL_OP1_203_33229_20130716_174756_outLine +BABEL_OP1_203_33273_20130321_185940_inLine +BABEL_OP1_203_33273_20130321_185940_outLine +BABEL_OP1_203_34688_20130309_150605_inLine +BABEL_OP1_203_34688_20130309_150605_outLine +BABEL_OP1_203_35202_20130403_172345_inLine +BABEL_OP1_203_35202_20130403_172345_outLine +BABEL_OP1_203_35885_20130423_124518_inLine +BABEL_OP1_203_35885_20130423_124518_outLine +BABEL_OP1_203_36017_20130727_194800_inLine +BABEL_OP1_203_36017_20130727_194800_outLine +BABEL_OP1_203_36059_20130420_141048_inLine +BABEL_OP1_203_36059_20130420_141048_outLine +BABEL_OP1_203_37064_20130315_163413_inLine +BABEL_OP1_203_37064_20130315_163413_outLine +BABEL_OP1_203_39159_20130307_161600_inLine +BABEL_OP1_203_39159_20130307_161600_outLine +BABEL_OP1_203_40740_20130425_194217_inLine +BABEL_OP1_203_40740_20130425_194217_outLine +BABEL_OP1_203_42718_20130719_184452_inLine +BABEL_OP1_203_42718_20130719_184452_outLine +BABEL_OP1_203_43285_20130403_143505_inLine +BABEL_OP1_203_43285_20130403_143505_outLine +BABEL_OP1_203_44309_20130724_151039_inLine +BABEL_OP1_203_44309_20130724_151039_outLine +BABEL_OP1_203_44681_20130808_235229_inLine +BABEL_OP1_203_44681_20130808_235229_outLine +BABEL_OP1_203_44847_20130713_184411_inLine +BABEL_OP1_203_44847_20130713_184411_outLine +BABEL_OP1_203_45201_20130802_170453_inLine +BABEL_OP1_203_45201_20130802_170453_outLine +BABEL_OP1_203_45697_20130410_000422_inLine +BABEL_OP1_203_45697_20130410_000422_outLine +BABEL_OP1_203_46702_20130306_164740_inLine +BABEL_OP1_203_46702_20130306_164740_outLine +BABEL_OP1_203_46712_20130323_203036_inLine +BABEL_OP1_203_46712_20130323_203036_outLine +BABEL_OP1_203_46881_20130307_203600_inLine +BABEL_OP1_203_46881_20130307_203600_outLine +BABEL_OP1_203_46974_20130404_232711_inLine +BABEL_OP1_203_46974_20130404_232711_outLine +BABEL_OP1_203_49197_20130318_181956_inLine +BABEL_OP1_203_49197_20130318_181956_outLine +BABEL_OP1_203_49767_20130430_202016_inLine +BABEL_OP1_203_49767_20130430_202016_outLine +BABEL_OP1_203_49812_20130808_171144_inLine +BABEL_OP1_203_49812_20130808_171144_outLine +BABEL_OP1_203_52070_20130808_163435_inLine +BABEL_OP1_203_52070_20130808_163435_outLine +BABEL_OP1_203_52442_20130506_145255_inLine +BABEL_OP1_203_52442_20130506_145255_outLine +BABEL_OP1_203_52614_20130727_194453_inLine +BABEL_OP1_203_52614_20130727_194453_outLine +BABEL_OP1_203_53419_20130406_175304_inLine +BABEL_OP1_203_53419_20130406_175304_outLine +BABEL_OP1_203_54040_20130406_184426_inLine +BABEL_OP1_203_54040_20130406_184426_outLine +BABEL_OP1_203_54405_20130729_003503_inLine +BABEL_OP1_203_54405_20130729_003503_outLine +BABEL_OP1_203_55267_20130505_191654_inLine +BABEL_OP1_203_55267_20130505_191654_outLine +BABEL_OP1_203_57219_20130502_194506_inLine +BABEL_OP1_203_57219_20130502_194506_outLine +BABEL_OP1_203_57464_20130725_171314_inLine +BABEL_OP1_203_57464_20130725_171314_outLine +BABEL_OP1_203_60626_20130322_152952_inLine +BABEL_OP1_203_60626_20130322_152952_outLine +BABEL_OP1_203_61971_20130725_164007_inLine +BABEL_OP1_203_61971_20130725_164007_outLine +BABEL_OP1_203_62047_20130407_151438_inLine +BABEL_OP1_203_62047_20130407_151438_outLine +BABEL_OP1_203_62286_20130320_214620_inLine +BABEL_OP1_203_62286_20130320_214620_outLine +BABEL_OP1_203_62456_20130328_142035_inLine +BABEL_OP1_203_62456_20130328_142035_outLine +BABEL_OP1_203_62835_20130323_203456_inLine +BABEL_OP1_203_62835_20130323_203456_outLine +BABEL_OP1_203_62852_20130306_200729_inLine +BABEL_OP1_203_62852_20130306_200729_outLine +BABEL_OP1_203_63220_20130331_212757_inLine +BABEL_OP1_203_63220_20130331_212757_outLine +BABEL_OP1_203_63445_20130307_151033_inLine +BABEL_OP1_203_63445_20130307_151033_outLine +BABEL_OP1_203_63757_20130328_223730_inLine +BABEL_OP1_203_63757_20130328_223730_outLine +BABEL_OP1_203_63938_20130410_173153_inLine +BABEL_OP1_203_63938_20130410_173153_outLine +BABEL_OP1_203_64494_20130313_131022_inLine +BABEL_OP1_203_64494_20130313_131022_outLine +BABEL_OP1_203_64638_20130410_142811_inLine +BABEL_OP1_203_64638_20130410_142811_outLine +BABEL_OP1_203_64759_20130309_200819_inLine +BABEL_OP1_203_64759_20130309_200819_outLine +BABEL_OP1_203_64796_20130307_184812_inLine +BABEL_OP1_203_64796_20130307_184812_outLine +BABEL_OP1_203_65466_20130725_163637_inLine +BABEL_OP1_203_65466_20130725_163637_outLine +BABEL_OP1_203_65477_20130320_173710_inLine +BABEL_OP1_203_65477_20130320_173710_outLine +BABEL_OP1_203_65477_20130320_180148_inLine +BABEL_OP1_203_65477_20130320_180148_outLine +BABEL_OP1_203_65477_20130320_201453_inLine +BABEL_OP1_203_65477_20130320_201453_outLine +BABEL_OP1_203_65639_20130806_171139_inLine +BABEL_OP1_203_65639_20130806_171139_outLine +BABEL_OP1_203_66837_20130405_182629_inLine +BABEL_OP1_203_66837_20130405_182629_outLine +BABEL_OP1_203_66959_20130401_000804_inLine +BABEL_OP1_203_66959_20130401_000804_outLine +BABEL_OP1_203_66967_20130309_193012_inLine +BABEL_OP1_203_66967_20130309_193012_outLine +BABEL_OP1_203_67726_20130815_142409_inLine +BABEL_OP1_203_67726_20130815_142409_outLine +BABEL_OP1_203_68910_20130819_161909_inLine +BABEL_OP1_203_68910_20130819_161909_outLine +BABEL_OP1_203_68910_20130819_163243_inLine +BABEL_OP1_203_68910_20130819_163243_outLine +BABEL_OP1_203_69633_20130425_200355_inLine +BABEL_OP1_203_69633_20130425_200355_outLine +BABEL_OP1_203_69982_20130506_163359_inLine +BABEL_OP1_203_69982_20130506_163359_outLine +BABEL_OP1_203_70282_20130329_152316_inLine +BABEL_OP1_203_70282_20130329_152316_outLine +BABEL_OP1_203_71704_20130312_213023_inLine +BABEL_OP1_203_71704_20130312_213023_outLine +BABEL_OP1_203_72349_20130726_200409_inLine +BABEL_OP1_203_72349_20130726_200409_outLine +BABEL_OP1_203_72844_20130307_143012_inLine +BABEL_OP1_203_72844_20130307_143012_outLine +BABEL_OP1_203_73622_20130311_175840_inLine +BABEL_OP1_203_73622_20130311_175840_outLine +BABEL_OP1_203_74253_20130403_190412_inLine +BABEL_OP1_203_74253_20130403_190412_outLine +BABEL_OP1_203_75366_20130430_153011_inLine +BABEL_OP1_203_75366_20130430_153011_outLine +BABEL_OP1_203_75465_20130408_174529_inLine +BABEL_OP1_203_75465_20130408_174529_outLine +BABEL_OP1_203_76218_20130320_160931_inLine +BABEL_OP1_203_76218_20130320_160931_outLine +BABEL_OP1_203_76218_20130320_162301_inLine +BABEL_OP1_203_76218_20130320_162301_outLine +BABEL_OP1_203_76773_20130313_174635_inLine +BABEL_OP1_203_76773_20130313_174635_outLine +BABEL_OP1_203_76970_20130502_140228_inLine +BABEL_OP1_203_76970_20130502_140228_outLine +BABEL_OP1_203_76970_20130502_141316_inLine +BABEL_OP1_203_76970_20130502_141316_outLine +BABEL_OP1_203_77391_20130321_134502_inLine +BABEL_OP1_203_77391_20130321_134502_outLine +BABEL_OP1_203_77567_20130307_183648_inLine +BABEL_OP1_203_77567_20130307_183648_outLine +BABEL_OP1_203_78609_20130411_135436_inLine +BABEL_OP1_203_78609_20130411_135436_outLine +BABEL_OP1_203_78958_20130815_152142_inLine +BABEL_OP1_203_78958_20130815_152142_outLine +BABEL_OP1_203_78976_20130320_143441_inLine +BABEL_OP1_203_78976_20130320_143441_outLine +BABEL_OP1_203_79107_20130501_145558_inLine +BABEL_OP1_203_79107_20130501_145558_outLine +BABEL_OP1_203_79571_20130401_193207_inLine +BABEL_OP1_203_79571_20130401_193207_outLine +BABEL_OP1_203_79858_20130309_212924_inLine +BABEL_OP1_203_79858_20130309_212924_outLine +BABEL_OP1_203_80721_20130402_142121_inLine +BABEL_OP1_203_80721_20130402_142121_outLine +BABEL_OP1_203_80897_20130328_174210_inLine +BABEL_OP1_203_80897_20130328_174210_outLine +BABEL_OP1_203_81229_20130321_133228_inLine +BABEL_OP1_203_81229_20130321_133228_outLine +BABEL_OP1_203_81854_20130730_230009_inLine +BABEL_OP1_203_81854_20130730_230009_outLine +BABEL_OP1_203_82966_20130405_153412_inLine +BABEL_OP1_203_82966_20130405_153412_outLine +BABEL_OP1_203_83366_20130428_224139_inLine +BABEL_OP1_203_83366_20130428_224139_outLine +BABEL_OP1_203_83775_20130319_135705_inLine +BABEL_OP1_203_83775_20130319_135705_outLine +BABEL_OP1_203_84029_20130812_185834_inLine +BABEL_OP1_203_84029_20130812_185834_outLine +BABEL_OP1_203_84125_20130306_192759_inLine +BABEL_OP1_203_84125_20130306_192759_outLine +BABEL_OP1_203_84583_20130409_145116_inLine +BABEL_OP1_203_84583_20130409_145116_outLine +BABEL_OP1_203_85248_20130403_172428_inLine +BABEL_OP1_203_85248_20130403_172428_outLine +BABEL_OP1_203_85248_20130403_173731_inLine +BABEL_OP1_203_85248_20130403_173731_outLine +BABEL_OP1_203_86748_20130424_181510_inLine +BABEL_OP1_203_86748_20130424_181510_outLine +BABEL_OP1_203_87871_20130403_233602_inLine +BABEL_OP1_203_87871_20130403_233602_outLine +BABEL_OP1_203_88812_20130724_142719_inLine +BABEL_OP1_203_88812_20130724_142719_outLine +BABEL_OP1_203_89045_20130306_200546_inLine +BABEL_OP1_203_89045_20130306_200546_outLine +BABEL_OP1_203_90935_20130319_215413_inLine +BABEL_OP1_203_90935_20130319_215413_outLine +BABEL_OP1_203_91581_20130406_211109_inLine +BABEL_OP1_203_91581_20130406_211109_outLine +BABEL_OP1_203_91593_20130511_222420_inLine +BABEL_OP1_203_91593_20130511_222420_outLine +BABEL_OP1_203_91825_20130310_211043_inLine +BABEL_OP1_203_91825_20130310_211043_outLine +BABEL_OP1_203_91884_20130422_190145_inLine +BABEL_OP1_203_91884_20130422_190145_outLine +BABEL_OP1_203_92176_20130322_143345_inLine +BABEL_OP1_203_92176_20130322_143345_outLine +BABEL_OP1_203_92356_20130715_210447_inLine +BABEL_OP1_203_92356_20130715_210447_outLine +BABEL_OP1_203_92698_20130327_174701_inLine +BABEL_OP1_203_92698_20130327_174701_outLine +BABEL_OP1_203_92698_20130327_175923_inLine +BABEL_OP1_203_92698_20130327_175923_outLine +BABEL_OP1_203_92886_20130314_211354_inLine +BABEL_OP1_203_92886_20130314_211354_outLine +BABEL_OP1_203_93224_20130503_144751_inLine +BABEL_OP1_203_93224_20130503_144751_outLine +BABEL_OP1_203_93320_20130502_175919_inLine +BABEL_OP1_203_93320_20130502_175919_outLine +BABEL_OP1_203_93946_20130406_004722_inLine +BABEL_OP1_203_93946_20130406_004722_outLine +BABEL_OP1_203_94212_20130806_184552_inLine +BABEL_OP1_203_94212_20130806_184552_outLine +BABEL_OP1_203_95966_20130320_201310_inLine +BABEL_OP1_203_95966_20130320_201310_outLine +BABEL_OP1_203_96205_20130324_175526_inLine +BABEL_OP1_203_96205_20130324_175526_outLine +BABEL_OP1_203_96584_20130410_144453_inLine +BABEL_OP1_203_96584_20130410_144453_outLine +BABEL_OP1_203_96934_20130319_142928_inLine +BABEL_OP1_203_96934_20130319_142928_outLine +BABEL_OP1_203_96985_20130313_141845_inLine +BABEL_OP1_203_96985_20130313_141845_outLine +BABEL_OP1_203_97136_20130410_190244_inLine +BABEL_OP1_203_97136_20130410_190244_outLine +BABEL_OP1_203_98506_20130423_152625_inLine +BABEL_OP1_203_98506_20130423_152625_outLine +BABEL_OP1_203_98678_20130721_152255_inLine +BABEL_OP1_203_98678_20130721_152255_outLine +BABEL_OP1_203_99487_20130311_174358_inLine +BABEL_OP1_203_99487_20130311_174358_outLine +BABEL_OP1_203_99516_20130309_164733_inLine +BABEL_OP1_203_99516_20130309_164733_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/dev.list b/egs/babel/s5d/conf/lists/204-tamil/dev.list new file mode 100644 index 00000000000..f793b6bf7fa --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/dev.list @@ -0,0 +1,125 @@ +BABEL_OP1_204_13189_20130613_161247_inLine +BABEL_OP1_204_13189_20130613_161247_outLine +BABEL_OP1_204_17881_20130219_205442_inLine +BABEL_OP1_204_17881_20130219_205442_outLine +BABEL_OP1_204_18924_20130224_150538_inLine +BABEL_OP1_204_18924_20130224_150538_outLine +BABEL_OP1_204_20682_20130209_174057_inLine +BABEL_OP1_204_20682_20130209_174057_outLine +BABEL_OP1_204_22021_20130818_153135_inLine +BABEL_OP1_204_22021_20130818_153135_outLine +BABEL_OP1_204_22288_20130820_021043_inLine +BABEL_OP1_204_22288_20130820_021043_outLine +BABEL_OP1_204_22288_20130820_022958_inLine +BABEL_OP1_204_22288_20130820_022958_outLine +BABEL_OP1_204_22466_20121213_214935_inLine +BABEL_OP1_204_22466_20121213_214935_outLine +BABEL_OP1_204_23700_20130825_003724_inLine +BABEL_OP1_204_23700_20130825_003724_outLine +BABEL_OP1_204_23700_20130825_004922_inLine +BABEL_OP1_204_23700_20130825_004922_outLine +BABEL_OP1_204_24239_20130227_004742_inLine +BABEL_OP1_204_24239_20130227_004742_outLine +BABEL_OP1_204_24290_20130228_200830_inLine +BABEL_OP1_204_24290_20130228_200830_outLine +BABEL_OP1_204_24679_20130112_222528_inLine +BABEL_OP1_204_24679_20130112_222528_outLine +BABEL_OP1_204_25895_20130830_022140_inLine +BABEL_OP1_204_25895_20130830_022140_outLine +BABEL_OP1_204_26602_20130215_003413_inLine +BABEL_OP1_204_26602_20130215_003413_outLine +BABEL_OP1_204_27218_20130102_192252_inLine +BABEL_OP1_204_27218_20130102_192252_outLine +BABEL_OP1_204_28606_20130126_221856_inLine +BABEL_OP1_204_28606_20130126_221856_outLine +BABEL_OP1_204_28945_20130102_221003_inLine +BABEL_OP1_204_28945_20130102_221003_outLine +BABEL_OP1_204_29076_20130222_205943_inLine +BABEL_OP1_204_29076_20130222_205943_outLine +BABEL_OP1_204_31624_20130107_221428_inLine +BABEL_OP1_204_31624_20130107_221428_outLine +BABEL_OP1_204_32287_20130902_231135_inLine +BABEL_OP1_204_32287_20130902_231135_outLine +BABEL_OP1_204_33672_20130115_033234_inLine +BABEL_OP1_204_33672_20130115_033234_outLine +BABEL_OP1_204_37290_20130707_161547_inLine +BABEL_OP1_204_37290_20130707_161547_outLine +BABEL_OP1_204_37594_20130805_155303_inLine +BABEL_OP1_204_37594_20130805_155303_outLine +BABEL_OP1_204_38979_20130516_003257_inLine +BABEL_OP1_204_38979_20130516_003257_outLine +BABEL_OP1_204_42155_20130122_030534_inLine +BABEL_OP1_204_42155_20130122_030534_outLine +BABEL_OP1_204_43239_20130216_055950_inLine +BABEL_OP1_204_43239_20130216_055950_outLine +BABEL_OP1_204_44029_20130824_003907_inLine +BABEL_OP1_204_44029_20130824_003907_outLine +BABEL_OP1_204_44619_20130104_192431_inLine +BABEL_OP1_204_44619_20130104_192431_outLine +BABEL_OP1_204_44961_20130106_015828_inLine +BABEL_OP1_204_44961_20130106_015828_outLine +BABEL_OP1_204_46535_20130818_001009_inLine +BABEL_OP1_204_46535_20130818_001009_outLine +BABEL_OP1_204_47451_20130210_010011_inLine +BABEL_OP1_204_47451_20130210_010011_outLine +BABEL_OP1_204_48024_20130829_223102_inLine +BABEL_OP1_204_48024_20130829_223102_outLine +BABEL_OP1_204_50565_20121224_203735_inLine +BABEL_OP1_204_50565_20121224_203735_outLine +BABEL_OP1_204_51701_20130312_022556_inLine +BABEL_OP1_204_51701_20130312_022556_outLine +BABEL_OP1_204_54160_20121231_225532_inLine +BABEL_OP1_204_54160_20121231_225532_outLine +BABEL_OP1_204_55136_20130705_164312_inLine +BABEL_OP1_204_55136_20130705_164312_outLine +BABEL_OP1_204_57935_20130126_234131_inLine +BABEL_OP1_204_57935_20130126_234131_outLine +BABEL_OP1_204_58047_20130222_222259_inLine +BABEL_OP1_204_58047_20130222_222259_outLine +BABEL_OP1_204_59747_20121222_160946_inLine +BABEL_OP1_204_59747_20121222_160946_outLine +BABEL_OP1_204_61440_20130627_182754_inLine +BABEL_OP1_204_61440_20130627_182754_outLine +BABEL_OP1_204_62545_20130703_202255_inLine +BABEL_OP1_204_62545_20130703_202255_outLine +BABEL_OP1_204_63484_20130821_005511_inLine +BABEL_OP1_204_63484_20130821_005511_outLine +BABEL_OP1_204_64350_20130102_195330_inLine +BABEL_OP1_204_64350_20130102_195330_outLine +BABEL_OP1_204_64902_20130215_191500_inLine +BABEL_OP1_204_64902_20130215_191500_outLine +BABEL_OP1_204_68244_20130129_184054_inLine +BABEL_OP1_204_70639_20130704_165905_inLine +BABEL_OP1_204_70639_20130704_165905_outLine +BABEL_OP1_204_71121_20130522_213640_inLine +BABEL_OP1_204_71121_20130522_213640_outLine +BABEL_OP1_204_73990_20130521_162632_inLine +BABEL_OP1_204_73990_20130521_162632_outLine +BABEL_OP1_204_78161_20130521_152635_inLine +BABEL_OP1_204_78161_20130521_152635_outLine +BABEL_OP1_204_83238_20130121_201216_inLine +BABEL_OP1_204_83238_20130121_201216_outLine +BABEL_OP1_204_84177_20130901_213641_inLine +BABEL_OP1_204_84177_20130901_213641_outLine +BABEL_OP1_204_84815_20130209_040750_inLine +BABEL_OP1_204_84815_20130209_040750_outLine +BABEL_OP1_204_86557_20130103_183044_inLine +BABEL_OP1_204_86557_20130103_183044_outLine +BABEL_OP1_204_87074_20130107_181209_inLine +BABEL_OP1_204_87074_20130107_181209_outLine +BABEL_OP1_204_87298_20130114_172850_inLine +BABEL_OP1_204_87298_20130114_172850_outLine +BABEL_OP1_204_90937_20130516_224543_inLine +BABEL_OP1_204_90937_20130516_224543_outLine +BABEL_OP1_204_91808_20130603_193623_inLine +BABEL_OP1_204_91808_20130603_193623_outLine +BABEL_OP1_204_92509_20130107_011707_inLine +BABEL_OP1_204_92509_20130107_011707_outLine +BABEL_OP1_204_94465_20130212_212918_inLine +BABEL_OP1_204_94465_20130212_212918_outLine +BABEL_OP1_204_94923_20130608_143347_inLine +BABEL_OP1_204_94923_20130608_143347_outLine +BABEL_OP1_204_96059_20130225_212517_inLine +BABEL_OP1_204_96059_20130225_212517_outLine +BABEL_OP1_204_97286_20130520_145640_inLine +BABEL_OP1_204_97286_20130520_145640_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/eval.list b/egs/babel/s5d/conf/lists/204-tamil/eval.list new file mode 100644 index 00000000000..1887ca15694 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/eval.list @@ -0,0 +1,947 @@ +BABEL_OP1_204_10058_20130305_040021_inLine +BABEL_OP1_204_10058_20130305_040021_outLine +BABEL_OP1_204_10313_20130705_155607_inLine +BABEL_OP1_204_10313_20130705_155607_outLine +BABEL_OP1_204_10524_20130219_145437_inLine +BABEL_OP1_204_10524_20130219_145437_outLine +BABEL_OP1_204_10524_20130219_235944_inLine +BABEL_OP1_204_10524_20130219_235944_outLine +BABEL_OP1_204_10524_20130220_000643_inLine +BABEL_OP1_204_10524_20130220_000643_outLine +BABEL_OP1_204_10638_20130510_124441_inLine +BABEL_OP1_204_10638_20130510_124441_outLine +BABEL_OP1_204_11768_20130825_151244_inLine +BABEL_OP1_204_11768_20130825_151244_outLine +BABEL_OP1_204_12321_20130220_211618_inLine +BABEL_OP1_204_12321_20130220_211618_outLine +BABEL_OP1_204_12635_20130601_152113_inLine +BABEL_OP1_204_12635_20130601_152113_outLine +BABEL_OP1_204_12916_20130107_224212_inLine +BABEL_OP1_204_12916_20130107_224212_outLine +BABEL_OP1_204_13561_20130120_185547_inLine +BABEL_OP1_204_13561_20130120_185547_outLine +BABEL_OP1_204_13664_20121218_221847_inLine +BABEL_OP1_204_13664_20121218_221847_outLine +BABEL_OP1_204_13909_20130313_210114_inLine +BABEL_OP1_204_13909_20130313_210114_outLine +BABEL_OP1_204_13929_20130716_200759_inLine +BABEL_OP1_204_13929_20130716_200759_outLine +BABEL_OP1_204_14028_20130820_214748_inLine +BABEL_OP1_204_14028_20130820_214748_outLine +BABEL_OP1_204_14229_20130112_024917_inLine +BABEL_OP1_204_14229_20130112_024917_outLine +BABEL_OP1_204_14350_20130113_023333_inLine +BABEL_OP1_204_14350_20130113_023333_outLine +BABEL_OP1_204_14537_20130303_005043_inLine +BABEL_OP1_204_14537_20130303_005043_outLine +BABEL_OP1_204_14723_20130710_180819_inLine +BABEL_OP1_204_14723_20130710_180819_outLine +BABEL_OP1_204_14875_20130111_192622_inLine +BABEL_OP1_204_14875_20130111_192622_outLine +BABEL_OP1_204_14929_20130131_002309_inLine +BABEL_OP1_204_14929_20130131_002309_outLine +BABEL_OP1_204_15163_20130130_004303_inLine +BABEL_OP1_204_15163_20130130_004303_outLine +BABEL_OP1_204_15227_20130624_180548_inLine +BABEL_OP1_204_15227_20130624_180548_outLine +BABEL_OP1_204_15382_20130126_201407_inLine +BABEL_OP1_204_15382_20130126_201407_outLine +BABEL_OP1_204_15420_20130901_223125_inLine +BABEL_OP1_204_15420_20130901_223125_outLine +BABEL_OP1_204_15466_20130521_205553_inLine +BABEL_OP1_204_15466_20130521_205553_outLine +BABEL_OP1_204_15848_20121218_180011_inLine +BABEL_OP1_204_15848_20121218_180011_outLine +BABEL_OP1_204_16056_20130102_234300_inLine +BABEL_OP1_204_16056_20130102_234300_outLine +BABEL_OP1_204_16184_20121220_210106_inLine +BABEL_OP1_204_16184_20121220_210106_outLine +BABEL_OP1_204_16351_20130705_205024_inLine +BABEL_OP1_204_16351_20130705_205024_outLine +BABEL_OP1_204_16749_20130224_215355_inLine +BABEL_OP1_204_16749_20130224_215355_outLine +BABEL_OP1_204_16787_20130121_045651_inLine +BABEL_OP1_204_16787_20130121_045651_outLine +BABEL_OP1_204_17165_20130130_191341_inLine +BABEL_OP1_204_17165_20130130_191341_outLine +BABEL_OP1_204_17511_20130716_180322_inLine +BABEL_OP1_204_17511_20130716_180322_outLine +BABEL_OP1_204_17520_20130122_040744_inLine +BABEL_OP1_204_17520_20130122_040744_outLine +BABEL_OP1_204_17914_20130311_035117_inLine +BABEL_OP1_204_17914_20130311_035117_outLine +BABEL_OP1_204_17937_20130803_170049_inLine +BABEL_OP1_204_17937_20130803_170049_outLine +BABEL_OP1_204_18033_20130906_011555_inLine +BABEL_OP1_204_18033_20130906_011555_outLine +BABEL_OP1_204_18863_20130210_164314_inLine +BABEL_OP1_204_18863_20130210_164314_outLine +BABEL_OP1_204_19545_20130122_164148_inLine +BABEL_OP1_204_19545_20130122_164148_outLine +BABEL_OP1_204_19663_20130126_195459_inLine +BABEL_OP1_204_19663_20130126_195459_outLine +BABEL_OP1_204_19749_20130707_153432_inLine +BABEL_OP1_204_19749_20130707_153432_outLine +BABEL_OP1_204_19773_20130217_220127_inLine +BABEL_OP1_204_19773_20130217_220127_outLine +BABEL_OP1_204_19773_20130217_234204_inLine +BABEL_OP1_204_19773_20130217_234204_outLine +BABEL_OP1_204_19782_20130209_175552_inLine +BABEL_OP1_204_19782_20130209_175552_outLine +BABEL_OP1_204_20133_20121218_172017_inLine +BABEL_OP1_204_20133_20121218_172017_outLine +BABEL_OP1_204_20800_20130102_180915_inLine +BABEL_OP1_204_20800_20130102_180915_outLine +BABEL_OP1_204_20896_20130822_163553_inLine +BABEL_OP1_204_20896_20130822_163553_outLine +BABEL_OP1_204_21004_20130209_230509_inLine +BABEL_OP1_204_21004_20130209_230509_outLine +BABEL_OP1_204_21029_20130107_212248_inLine +BABEL_OP1_204_21029_20130107_212248_outLine +BABEL_OP1_204_21109_20130301_151421_inLine +BABEL_OP1_204_21109_20130301_151421_outLine +BABEL_OP1_204_21159_20130607_143737_inLine +BABEL_OP1_204_21159_20130607_143737_outLine +BABEL_OP1_204_21244_20130627_172541_inLine +BABEL_OP1_204_21244_20130627_172541_outLine +BABEL_OP1_204_21581_20130118_192005_inLine +BABEL_OP1_204_21581_20130118_192005_outLine +BABEL_OP1_204_21794_20130129_233131_inLine +BABEL_OP1_204_21794_20130129_233131_outLine +BABEL_OP1_204_22280_20130222_191834_inLine +BABEL_OP1_204_22280_20130222_191834_outLine +BABEL_OP1_204_22641_20121224_195014_inLine +BABEL_OP1_204_22641_20121224_195014_outLine +BABEL_OP1_204_23196_20130605_144617_inLine +BABEL_OP1_204_23196_20130605_144617_outLine +BABEL_OP1_204_23355_20130812_203203_inLine +BABEL_OP1_204_23355_20130812_203203_outLine +BABEL_OP1_204_23355_20130812_204058_inLine +BABEL_OP1_204_23355_20130812_204058_outLine +BABEL_OP1_204_23395_20130126_013244_inLine +BABEL_OP1_204_23395_20130126_013244_outLine +BABEL_OP1_204_23505_20130108_173244_inLine +BABEL_OP1_204_23505_20130108_173244_outLine +BABEL_OP1_204_23681_20130625_175654_inLine +BABEL_OP1_204_23681_20130625_175654_outLine +BABEL_OP1_204_23983_20130707_144157_inLine +BABEL_OP1_204_23983_20130707_144157_outLine +BABEL_OP1_204_23983_20130707_145156_inLine +BABEL_OP1_204_23983_20130707_145156_outLine +BABEL_OP1_204_23995_20130209_202505_inLine +BABEL_OP1_204_23995_20130209_202505_outLine +BABEL_OP1_204_24017_20130209_202828_inLine +BABEL_OP1_204_24017_20130209_202828_outLine +BABEL_OP1_204_24037_20130708_184129_inLine +BABEL_OP1_204_24037_20130708_184129_outLine +BABEL_OP1_204_24323_20130121_041043_inLine +BABEL_OP1_204_24323_20130121_041043_outLine +BABEL_OP1_204_24589_20130111_223930_inLine +BABEL_OP1_204_24589_20130111_223930_outLine +BABEL_OP1_204_24779_20130607_183107_inLine +BABEL_OP1_204_24779_20130607_183107_outLine +BABEL_OP1_204_25012_20130705_184756_inLine +BABEL_OP1_204_25012_20130705_184756_outLine +BABEL_OP1_204_25015_20130329_012535_inLine +BABEL_OP1_204_25015_20130329_012535_outLine +BABEL_OP1_204_25068_20130901_230020_inLine +BABEL_OP1_204_25068_20130901_230020_outLine +BABEL_OP1_204_25068_20130901_235001_inLine +BABEL_OP1_204_25068_20130901_235001_outLine +BABEL_OP1_204_25085_20130612_170506_inLine +BABEL_OP1_204_25085_20130612_170506_outLine +BABEL_OP1_204_25198_20130625_185430_inLine +BABEL_OP1_204_25198_20130625_185430_outLine +BABEL_OP1_204_25220_20130715_192051_inLine +BABEL_OP1_204_25220_20130715_192051_outLine +BABEL_OP1_204_25767_20130107_180931_inLine +BABEL_OP1_204_25767_20130107_180931_outLine +BABEL_OP1_204_26206_20130129_191521_inLine +BABEL_OP1_204_26206_20130129_191521_outLine +BABEL_OP1_204_26574_20130216_002354_inLine +BABEL_OP1_204_26574_20130216_002354_outLine +BABEL_OP1_204_26574_20130218_013612_inLine +BABEL_OP1_204_26574_20130218_013612_outLine +BABEL_OP1_204_26869_20130815_190057_inLine +BABEL_OP1_204_26869_20130815_190057_outLine +BABEL_OP1_204_27014_20130708_191739_inLine +BABEL_OP1_204_27014_20130708_191739_outLine +BABEL_OP1_204_27367_20130708_153816_inLine +BABEL_OP1_204_27367_20130708_153816_outLine +BABEL_OP1_204_28012_20130211_213147_inLine +BABEL_OP1_204_28012_20130211_213147_outLine +BABEL_OP1_204_28303_20130112_003656_inLine +BABEL_OP1_204_28303_20130112_003656_outLine +BABEL_OP1_204_28585_20130208_014141_inLine +BABEL_OP1_204_28585_20130208_014141_outLine +BABEL_OP1_204_28775_20130119_000600_inLine +BABEL_OP1_204_28775_20130119_000600_outLine +BABEL_OP1_204_28814_20130224_202343_inLine +BABEL_OP1_204_28814_20130224_202343_outLine +BABEL_OP1_204_28871_20121219_184300_inLine +BABEL_OP1_204_28871_20121219_184300_outLine +BABEL_OP1_204_29021_20130227_043427_inLine +BABEL_OP1_204_29021_20130227_043427_outLine +BABEL_OP1_204_29072_20130127_023330_inLine +BABEL_OP1_204_29072_20130127_023330_outLine +BABEL_OP1_204_29168_20130112_230634_inLine +BABEL_OP1_204_29168_20130112_230634_outLine +BABEL_OP1_204_29208_20130127_011057_inLine +BABEL_OP1_204_29208_20130127_011057_outLine +BABEL_OP1_204_29352_20130628_145610_inLine +BABEL_OP1_204_29352_20130628_145610_outLine +BABEL_OP1_204_29663_20130829_225524_inLine +BABEL_OP1_204_29663_20130829_225524_outLine +BABEL_OP1_204_29765_20130607_162026_inLine +BABEL_OP1_204_29765_20130607_162026_outLine +BABEL_OP1_204_29777_20130211_193239_inLine +BABEL_OP1_204_29777_20130211_193239_outLine +BABEL_OP1_204_30461_20130628_160212_inLine +BABEL_OP1_204_30461_20130628_160212_outLine +BABEL_OP1_204_30653_20130216_171325_inLine +BABEL_OP1_204_30653_20130216_171325_outLine +BABEL_OP1_204_30720_20130524_153314_inLine +BABEL_OP1_204_30720_20130524_153314_outLine +BABEL_OP1_204_30869_20130211_004250_inLine +BABEL_OP1_204_30869_20130211_004250_outLine +BABEL_OP1_204_30974_20130508_113119_inLine +BABEL_OP1_204_30974_20130508_113119_outLine +BABEL_OP1_204_31109_20130121_195304_inLine +BABEL_OP1_204_31109_20130121_195304_outLine +BABEL_OP1_204_31267_20130311_024343_inLine +BABEL_OP1_204_31267_20130311_024343_outLine +BABEL_OP1_204_31668_20130603_155703_inLine +BABEL_OP1_204_31668_20130603_155703_outLine +BABEL_OP1_204_31919_20130322_030728_inLine +BABEL_OP1_204_31919_20130322_030728_outLine +BABEL_OP1_204_32380_20130812_163206_inLine +BABEL_OP1_204_32380_20130812_163206_outLine +BABEL_OP1_204_32630_20130618_150743_inLine +BABEL_OP1_204_32630_20130618_150743_outLine +BABEL_OP1_204_32708_20130107_000057_inLine +BABEL_OP1_204_32708_20130107_000057_outLine +BABEL_OP1_204_32832_20130208_200126_inLine +BABEL_OP1_204_32832_20130208_200126_outLine +BABEL_OP1_204_32837_20130211_011900_inLine +BABEL_OP1_204_32837_20130211_011900_outLine +BABEL_OP1_204_32914_20130218_021836_inLine +BABEL_OP1_204_32914_20130218_021836_outLine +BABEL_OP1_204_32914_20130218_023337_inLine +BABEL_OP1_204_32914_20130218_023337_outLine +BABEL_OP1_204_32961_20130518_164254_inLine +BABEL_OP1_204_32961_20130518_164254_outLine +BABEL_OP1_204_33149_20130901_211119_inLine +BABEL_OP1_204_33149_20130901_211119_outLine +BABEL_OP1_204_33333_20130818_163046_inLine +BABEL_OP1_204_33333_20130818_163046_outLine +BABEL_OP1_204_33635_20130127_024601_inLine +BABEL_OP1_204_33635_20130127_024601_outLine +BABEL_OP1_204_33992_20130625_183028_inLine +BABEL_OP1_204_33992_20130625_183028_outLine +BABEL_OP1_204_34208_20130815_173402_inLine +BABEL_OP1_204_34208_20130815_173402_outLine +BABEL_OP1_204_34336_20130111_190838_inLine +BABEL_OP1_204_34336_20130111_190838_outLine +BABEL_OP1_204_34564_20130217_024252_inLine +BABEL_OP1_204_34564_20130217_024252_outLine +BABEL_OP1_204_34629_20130524_214401_inLine +BABEL_OP1_204_34629_20130524_214401_outLine +BABEL_OP1_204_35069_20130211_183408_inLine +BABEL_OP1_204_35069_20130211_183408_outLine +BABEL_OP1_204_35139_20130114_222544_inLine +BABEL_OP1_204_35139_20130114_222544_outLine +BABEL_OP1_204_35467_20121221_225338_inLine +BABEL_OP1_204_35467_20121221_225338_outLine +BABEL_OP1_204_35583_20130224_214957_inLine +BABEL_OP1_204_35583_20130224_214957_outLine +BABEL_OP1_204_35786_20130625_191629_inLine +BABEL_OP1_204_35786_20130625_191629_outLine +BABEL_OP1_204_35885_20130225_225544_inLine +BABEL_OP1_204_35885_20130225_225544_outLine +BABEL_OP1_204_36147_20130902_003850_inLine +BABEL_OP1_204_36147_20130902_003850_outLine +BABEL_OP1_204_36219_20130116_023001_inLine +BABEL_OP1_204_36219_20130116_023001_outLine +BABEL_OP1_204_36300_20130802_173230_inLine +BABEL_OP1_204_36300_20130802_173230_outLine +BABEL_OP1_204_36364_20130802_160044_inLine +BABEL_OP1_204_36364_20130802_160044_outLine +BABEL_OP1_204_36505_20130209_151150_inLine +BABEL_OP1_204_36505_20130209_151150_outLine +BABEL_OP1_204_36505_20130212_211726_inLine +BABEL_OP1_204_36505_20130212_211726_outLine +BABEL_OP1_204_36632_20130725_160202_inLine +BABEL_OP1_204_36632_20130725_160202_outLine +BABEL_OP1_204_36900_20130210_013355_inLine +BABEL_OP1_204_36900_20130210_013355_outLine +BABEL_OP1_204_37007_20130708_211216_inLine +BABEL_OP1_204_37007_20130708_211216_outLine +BABEL_OP1_204_37068_20130815_173112_inLine +BABEL_OP1_204_37068_20130815_173112_outLine +BABEL_OP1_204_37281_20130131_020847_inLine +BABEL_OP1_204_37281_20130131_020847_outLine +BABEL_OP1_204_37499_20130627_150627_inLine +BABEL_OP1_204_37499_20130627_150627_outLine +BABEL_OP1_204_37598_20130607_165958_inLine +BABEL_OP1_204_37598_20130607_165958_outLine +BABEL_OP1_204_38323_20130311_030447_inLine +BABEL_OP1_204_38323_20130311_030447_outLine +BABEL_OP1_204_38554_20121221_210925_inLine +BABEL_OP1_204_38554_20121221_210925_outLine +BABEL_OP1_204_38741_20130103_233022_inLine +BABEL_OP1_204_38741_20130103_233022_outLine +BABEL_OP1_204_38963_20130830_013927_inLine +BABEL_OP1_204_38963_20130830_013927_outLine +BABEL_OP1_204_39006_20130310_042623_inLine +BABEL_OP1_204_39006_20130310_042623_outLine +BABEL_OP1_204_39277_20130710_203344_inLine +BABEL_OP1_204_39277_20130710_203344_outLine +BABEL_OP1_204_39426_20130218_002812_inLine +BABEL_OP1_204_39426_20130218_002812_outLine +BABEL_OP1_204_39579_20130724_163251_inLine +BABEL_OP1_204_39579_20130724_163251_outLine +BABEL_OP1_204_41073_20130211_210606_inLine +BABEL_OP1_204_41073_20130211_210606_outLine +BABEL_OP1_204_41100_20130108_172156_inLine +BABEL_OP1_204_41100_20130108_172156_outLine +BABEL_OP1_204_41109_20130211_003851_inLine +BABEL_OP1_204_41109_20130211_003851_outLine +BABEL_OP1_204_41174_20130117_215826_inLine +BABEL_OP1_204_41174_20130117_215826_outLine +BABEL_OP1_204_41400_20130702_161025_inLine +BABEL_OP1_204_41400_20130702_161025_outLine +BABEL_OP1_204_41493_20121218_185431_inLine +BABEL_OP1_204_41493_20121218_185431_outLine +BABEL_OP1_204_41609_20130102_232356_inLine +BABEL_OP1_204_41609_20130102_232356_outLine +BABEL_OP1_204_41680_20121219_175709_inLine +BABEL_OP1_204_41680_20121219_175709_outLine +BABEL_OP1_204_41692_20130624_195718_inLine +BABEL_OP1_204_41692_20130624_195718_outLine +BABEL_OP1_204_42243_20121222_194916_inLine +BABEL_OP1_204_42243_20121222_194916_outLine +BABEL_OP1_204_42309_20130521_001029_inLine +BABEL_OP1_204_42309_20130521_001029_outLine +BABEL_OP1_204_42434_20130116_230135_inLine +BABEL_OP1_204_42434_20130116_230135_outLine +BABEL_OP1_204_42600_20130111_202254_inLine +BABEL_OP1_204_42600_20130111_202254_outLine +BABEL_OP1_204_42771_20130228_025042_inLine +BABEL_OP1_204_42771_20130228_025042_outLine +BABEL_OP1_204_42848_20130627_222753_inLine +BABEL_OP1_204_42848_20130627_222753_outLine +BABEL_OP1_204_42877_20130815_164740_inLine +BABEL_OP1_204_42877_20130815_164740_outLine +BABEL_OP1_204_42883_20130624_202703_inLine +BABEL_OP1_204_42883_20130624_202703_outLine +BABEL_OP1_204_43074_20130509_115450_inLine +BABEL_OP1_204_43074_20130509_115450_outLine +BABEL_OP1_204_43285_20130130_012851_inLine +BABEL_OP1_204_43285_20130130_012851_outLine +BABEL_OP1_204_43388_20130129_230503_inLine +BABEL_OP1_204_43388_20130129_230503_outLine +BABEL_OP1_204_43395_20130313_164710_inLine +BABEL_OP1_204_43395_20130313_164710_outLine +BABEL_OP1_204_43646_20121218_215728_inLine +BABEL_OP1_204_43646_20121218_215728_outLine +BABEL_OP1_204_43990_20130521_142553_inLine +BABEL_OP1_204_44255_20130225_230219_inLine +BABEL_OP1_204_44255_20130225_230219_outLine +BABEL_OP1_204_44681_20130830_000000_inLine +BABEL_OP1_204_44681_20130830_000000_outLine +BABEL_OP1_204_44847_20130126_212511_inLine +BABEL_OP1_204_44847_20130126_212511_outLine +BABEL_OP1_204_45106_20130325_003034_inLine +BABEL_OP1_204_45106_20130325_003034_outLine +BABEL_OP1_204_45106_20130325_004324_inLine +BABEL_OP1_204_45106_20130325_004324_outLine +BABEL_OP1_204_45201_20130312_021424_inLine +BABEL_OP1_204_45201_20130312_021424_outLine +BABEL_OP1_204_45536_20130217_014642_inLine +BABEL_OP1_204_45536_20130217_014642_outLine +BABEL_OP1_204_45559_20130303_234142_inLine +BABEL_OP1_204_45559_20130303_234142_outLine +BABEL_OP1_204_45560_20130107_224441_inLine +BABEL_OP1_204_45560_20130107_224441_outLine +BABEL_OP1_204_45642_20130106_040244_inLine +BABEL_OP1_204_45642_20130106_040244_outLine +BABEL_OP1_204_45771_20130626_191013_inLine +BABEL_OP1_204_45771_20130626_191013_outLine +BABEL_OP1_204_45908_20130607_213719_inLine +BABEL_OP1_204_45908_20130607_213719_outLine +BABEL_OP1_204_46202_20130524_162004_inLine +BABEL_OP1_204_46202_20130524_162004_outLine +BABEL_OP1_204_46310_20130103_163932_inLine +BABEL_OP1_204_46310_20130103_163932_outLine +BABEL_OP1_204_46315_20130129_014152_inLine +BABEL_OP1_204_46315_20130129_014152_outLine +BABEL_OP1_204_46625_20121219_193926_inLine +BABEL_OP1_204_46625_20121219_193926_outLine +BABEL_OP1_204_46712_20130111_175849_inLine +BABEL_OP1_204_46712_20130111_175849_outLine +BABEL_OP1_204_46763_20130216_235210_inLine +BABEL_OP1_204_46763_20130216_235210_outLine +BABEL_OP1_204_46770_20130224_204253_inLine +BABEL_OP1_204_46770_20130224_204253_outLine +BABEL_OP1_204_46881_20121222_190526_inLine +BABEL_OP1_204_46881_20121222_190526_outLine +BABEL_OP1_204_47309_20130705_182329_inLine +BABEL_OP1_204_47309_20130705_182329_outLine +BABEL_OP1_204_47405_20130829_233945_inLine +BABEL_OP1_204_47405_20130829_233945_outLine +BABEL_OP1_204_47799_20130516_193711_inLine +BABEL_OP1_204_47799_20130516_193711_outLine +BABEL_OP1_204_47882_20130705_203354_inLine +BABEL_OP1_204_47882_20130705_203354_outLine +BABEL_OP1_204_48016_20130311_033904_inLine +BABEL_OP1_204_48016_20130311_033904_outLine +BABEL_OP1_204_48200_20130209_211626_inLine +BABEL_OP1_204_48200_20130209_211626_outLine +BABEL_OP1_204_48399_20130112_205650_inLine +BABEL_OP1_204_48399_20130112_205650_outLine +BABEL_OP1_204_48663_20130303_002413_inLine +BABEL_OP1_204_48663_20130303_002413_outLine +BABEL_OP1_204_48663_20130303_023530_inLine +BABEL_OP1_204_48663_20130303_023530_outLine +BABEL_OP1_204_49216_20130112_225803_inLine +BABEL_OP1_204_49216_20130112_225803_outLine +BABEL_OP1_204_49630_20130130_014200_inLine +BABEL_OP1_204_49630_20130130_014200_outLine +BABEL_OP1_204_49637_20130112_201836_inLine +BABEL_OP1_204_49637_20130112_201836_outLine +BABEL_OP1_204_49870_20130824_181019_inLine +BABEL_OP1_204_49870_20130824_181019_outLine +BABEL_OP1_204_49902_20130128_182704_inLine +BABEL_OP1_204_49902_20130128_182704_outLine +BABEL_OP1_204_50090_20130122_180653_inLine +BABEL_OP1_204_50090_20130122_180653_outLine +BABEL_OP1_204_50940_20130522_185117_inLine +BABEL_OP1_204_50940_20130522_185117_outLine +BABEL_OP1_204_50958_20130129_195029_inLine +BABEL_OP1_204_50958_20130129_195029_outLine +BABEL_OP1_204_51414_20130531_173250_inLine +BABEL_OP1_204_51414_20130531_173250_outLine +BABEL_OP1_204_51417_20130212_002429_inLine +BABEL_OP1_204_51417_20130212_002429_outLine +BABEL_OP1_204_51484_20130209_174419_inLine +BABEL_OP1_204_51484_20130209_174419_outLine +BABEL_OP1_204_51540_20130228_021352_inLine +BABEL_OP1_204_51540_20130228_021352_outLine +BABEL_OP1_204_51611_20130608_155952_inLine +BABEL_OP1_204_51611_20130608_155952_outLine +BABEL_OP1_204_52058_20130710_173207_inLine +BABEL_OP1_204_52058_20130710_173207_outLine +BABEL_OP1_204_52070_20130607_210255_inLine +BABEL_OP1_204_52070_20130607_210255_outLine +BABEL_OP1_204_52222_20130524_171039_inLine +BABEL_OP1_204_52222_20130524_171039_outLine +BABEL_OP1_204_52246_20130122_172528_inLine +BABEL_OP1_204_52246_20130122_172528_outLine +BABEL_OP1_204_52265_20130516_202551_inLine +BABEL_OP1_204_52265_20130516_202551_outLine +BABEL_OP1_204_52272_20130112_213528_inLine +BABEL_OP1_204_52272_20130112_213528_outLine +BABEL_OP1_204_52438_20130103_172024_inLine +BABEL_OP1_204_52438_20130103_172024_outLine +BABEL_OP1_204_52447_20130624_184145_inLine +BABEL_OP1_204_52447_20130624_184145_outLine +BABEL_OP1_204_53010_20130825_210105_inLine +BABEL_OP1_204_53010_20130825_210105_outLine +BABEL_OP1_204_53072_20130605_225249_inLine +BABEL_OP1_204_53072_20130605_225249_outLine +BABEL_OP1_204_53206_20130704_211512_inLine +BABEL_OP1_204_53206_20130704_211512_outLine +BABEL_OP1_204_53492_20130322_020510_inLine +BABEL_OP1_204_53492_20130322_020510_outLine +BABEL_OP1_204_53665_20130301_171513_inLine +BABEL_OP1_204_53665_20130301_171513_outLine +BABEL_OP1_204_53957_20130219_004311_inLine +BABEL_OP1_204_53957_20130219_004311_outLine +BABEL_OP1_204_53957_20130219_004930_inLine +BABEL_OP1_204_53957_20130219_004930_outLine +BABEL_OP1_204_54477_20130217_014421_inLine +BABEL_OP1_204_54477_20130217_014421_outLine +BABEL_OP1_204_54477_20130220_020436_inLine +BABEL_OP1_204_54477_20130220_020436_outLine +BABEL_OP1_204_54697_20130209_190625_inLine +BABEL_OP1_204_54697_20130209_190625_outLine +BABEL_OP1_204_54735_20130830_002922_inLine +BABEL_OP1_204_54735_20130830_002922_outLine +BABEL_OP1_204_54735_20130830_004018_inLine +BABEL_OP1_204_54735_20130830_004018_outLine +BABEL_OP1_204_55042_20130820_010539_inLine +BABEL_OP1_204_55042_20130820_010539_outLine +BABEL_OP1_204_55818_20130115_173558_inLine +BABEL_OP1_204_55818_20130115_173558_outLine +BABEL_OP1_204_55968_20121219_172146_inLine +BABEL_OP1_204_55968_20121219_172146_outLine +BABEL_OP1_204_56019_20130301_165116_inLine +BABEL_OP1_204_56019_20130301_165116_outLine +BABEL_OP1_204_56076_20130223_224429_inLine +BABEL_OP1_204_56076_20130223_224429_outLine +BABEL_OP1_204_56331_20130318_211453_inLine +BABEL_OP1_204_56331_20130318_211453_outLine +BABEL_OP1_204_56345_20130524_143829_inLine +BABEL_OP1_204_56345_20130524_143829_outLine +BABEL_OP1_204_56465_20130312_022322_inLine +BABEL_OP1_204_56465_20130312_022322_outLine +BABEL_OP1_204_56468_20130606_150512_inLine +BABEL_OP1_204_56468_20130606_150512_outLine +BABEL_OP1_204_56674_20130725_164519_inLine +BABEL_OP1_204_56674_20130725_164519_outLine +BABEL_OP1_204_56826_20130215_030029_inLine +BABEL_OP1_204_56826_20130215_030029_outLine +BABEL_OP1_204_57219_20130311_044204_inLine +BABEL_OP1_204_57219_20130311_044204_outLine +BABEL_OP1_204_57566_20130304_024842_inLine +BABEL_OP1_204_57566_20130304_024842_outLine +BABEL_OP1_204_57609_20130122_194937_inLine +BABEL_OP1_204_57609_20130122_194937_outLine +BABEL_OP1_204_57654_20130114_074621_inLine +BABEL_OP1_204_57654_20130114_074621_outLine +BABEL_OP1_204_57919_20130902_232635_inLine +BABEL_OP1_204_57919_20130902_232635_outLine +BABEL_OP1_204_58061_20130605_182326_inLine +BABEL_OP1_204_58061_20130605_182326_outLine +BABEL_OP1_204_58145_20130123_042048_inLine +BABEL_OP1_204_58145_20130123_042048_outLine +BABEL_OP1_204_58853_20130709_164717_inLine +BABEL_OP1_204_58853_20130709_164717_outLine +BABEL_OP1_204_58915_20130531_170755_inLine +BABEL_OP1_204_58915_20130531_170755_outLine +BABEL_OP1_204_59635_20130211_170439_inLine +BABEL_OP1_204_59635_20130211_170439_outLine +BABEL_OP1_204_59993_20130104_172518_inLine +BABEL_OP1_204_59993_20130104_172518_outLine +BABEL_OP1_204_60458_20130618_144323_inLine +BABEL_OP1_204_60458_20130618_144323_outLine +BABEL_OP1_204_60498_20130624_192541_inLine +BABEL_OP1_204_60498_20130624_192541_outLine +BABEL_OP1_204_60650_20130709_185316_inLine +BABEL_OP1_204_60650_20130709_185316_outLine +BABEL_OP1_204_60836_20130116_024921_inLine +BABEL_OP1_204_60836_20130116_024921_outLine +BABEL_OP1_204_61219_20130114_220900_inLine +BABEL_OP1_204_61219_20130114_220900_outLine +BABEL_OP1_204_61357_20130124_034332_inLine +BABEL_OP1_204_61357_20130124_034332_outLine +BABEL_OP1_204_61678_20121220_171940_inLine +BABEL_OP1_204_61678_20121220_171940_outLine +BABEL_OP1_204_61684_20130523_114244_inLine +BABEL_OP1_204_61684_20130523_114244_outLine +BABEL_OP1_204_62047_20130211_213702_inLine +BABEL_OP1_204_62047_20130211_213702_outLine +BABEL_OP1_204_62155_20130215_213833_inLine +BABEL_OP1_204_62155_20130215_213833_outLine +BABEL_OP1_204_62158_20130508_122027_inLine +BABEL_OP1_204_62158_20130508_122027_outLine +BABEL_OP1_204_62286_20130126_212818_inLine +BABEL_OP1_204_62286_20130126_212818_outLine +BABEL_OP1_204_62323_20130820_221917_inLine +BABEL_OP1_204_62323_20130820_221917_outLine +BABEL_OP1_204_62360_20130228_184057_inLine +BABEL_OP1_204_62360_20130228_184057_outLine +BABEL_OP1_204_62362_20130626_225421_inLine +BABEL_OP1_204_62362_20130626_225421_outLine +BABEL_OP1_204_62434_20130114_192752_inLine +BABEL_OP1_204_62434_20130114_192752_outLine +BABEL_OP1_204_62471_20130818_161031_inLine +BABEL_OP1_204_62471_20130818_161031_outLine +BABEL_OP1_204_62714_20130215_213205_inLine +BABEL_OP1_204_62714_20130215_213205_outLine +BABEL_OP1_204_63265_20130821_232031_inLine +BABEL_OP1_204_63265_20130821_232031_outLine +BABEL_OP1_204_63425_20130318_220104_inLine +BABEL_OP1_204_63425_20130318_220104_outLine +BABEL_OP1_204_63481_20121224_021602_inLine +BABEL_OP1_204_63481_20121224_021602_outLine +BABEL_OP1_204_63511_20130515_175657_inLine +BABEL_OP1_204_63511_20130515_175657_outLine +BABEL_OP1_204_63523_20130301_162515_inLine +BABEL_OP1_204_63523_20130301_162515_outLine +BABEL_OP1_204_63757_20130222_193431_inLine +BABEL_OP1_204_63757_20130222_193431_outLine +BABEL_OP1_204_63757_20130222_194438_inLine +BABEL_OP1_204_63757_20130222_194438_outLine +BABEL_OP1_204_63906_20130322_030332_inLine +BABEL_OP1_204_63906_20130322_030332_outLine +BABEL_OP1_204_63938_20130212_150410_inLine +BABEL_OP1_204_63938_20130212_150410_outLine +BABEL_OP1_204_64014_20130707_165607_inLine +BABEL_OP1_204_64014_20130707_165607_outLine +BABEL_OP1_204_64635_20130730_203724_inLine +BABEL_OP1_204_64635_20130730_203724_outLine +BABEL_OP1_204_64638_20130314_012822_inLine +BABEL_OP1_204_64638_20130314_012822_outLine +BABEL_OP1_204_64768_20130112_061048_inLine +BABEL_OP1_204_64768_20130112_061048_outLine +BABEL_OP1_204_65077_20121219_175859_inLine +BABEL_OP1_204_65077_20121219_175859_outLine +BABEL_OP1_204_65339_20130821_194428_inLine +BABEL_OP1_204_65339_20130821_194428_outLine +BABEL_OP1_204_65367_20130224_214222_inLine +BABEL_OP1_204_65367_20130224_214222_outLine +BABEL_OP1_204_65370_20130508_125401_inLine +BABEL_OP1_204_65370_20130508_125401_outLine +BABEL_OP1_204_65639_20130703_191008_inLine +BABEL_OP1_204_65639_20130703_191008_outLine +BABEL_OP1_204_65723_20130118_220849_inLine +BABEL_OP1_204_65723_20130118_220849_outLine +BABEL_OP1_204_66967_20130107_185021_inLine +BABEL_OP1_204_66967_20130107_185021_outLine +BABEL_OP1_204_67304_20130906_005328_inLine +BABEL_OP1_204_67304_20130906_005328_outLine +BABEL_OP1_204_67389_20130523_220733_inLine +BABEL_OP1_204_67389_20130523_220733_outLine +BABEL_OP1_204_67592_20130211_032508_inLine +BABEL_OP1_204_67592_20130211_032508_outLine +BABEL_OP1_204_68908_20130805_144908_inLine +BABEL_OP1_204_68908_20130805_144908_outLine +BABEL_OP1_204_69107_20130217_222355_inLine +BABEL_OP1_204_69107_20130217_222355_outLine +BABEL_OP1_204_69972_20130802_175346_inLine +BABEL_OP1_204_69972_20130802_175346_outLine +BABEL_OP1_204_69982_20130310_050949_inLine +BABEL_OP1_204_69982_20130310_050949_outLine +BABEL_OP1_204_70110_20121219_223303_inLine +BABEL_OP1_204_70110_20121219_223303_outLine +BABEL_OP1_204_70121_20130116_162838_inLine +BABEL_OP1_204_70121_20130116_162838_outLine +BABEL_OP1_204_70182_20130517_002241_inLine +BABEL_OP1_204_70182_20130517_002241_outLine +BABEL_OP1_204_70251_20130117_003349_inLine +BABEL_OP1_204_70251_20130117_003349_outLine +BABEL_OP1_204_70343_20130129_203836_inLine +BABEL_OP1_204_70343_20130129_203836_outLine +BABEL_OP1_204_70526_20130310_204157_inLine +BABEL_OP1_204_70526_20130310_204157_outLine +BABEL_OP1_204_70526_20130311_194113_inLine +BABEL_OP1_204_70526_20130311_194113_outLine +BABEL_OP1_204_70986_20130531_211537_inLine +BABEL_OP1_204_70986_20130531_211537_outLine +BABEL_OP1_204_71038_20130225_191007_inLine +BABEL_OP1_204_71038_20130225_191007_outLine +BABEL_OP1_204_71263_20130130_171712_inLine +BABEL_OP1_204_71263_20130130_171712_outLine +BABEL_OP1_204_71263_20130130_172902_inLine +BABEL_OP1_204_71263_20130130_172902_outLine +BABEL_OP1_204_71333_20130111_181914_inLine +BABEL_OP1_204_71333_20130111_181914_outLine +BABEL_OP1_204_71419_20130710_200227_inLine +BABEL_OP1_204_71419_20130710_200227_outLine +BABEL_OP1_204_71460_20130902_003219_inLine +BABEL_OP1_204_71460_20130902_003219_outLine +BABEL_OP1_204_71559_20130217_032759_inLine +BABEL_OP1_204_71559_20130217_032759_outLine +BABEL_OP1_204_71566_20130209_235200_inLine +BABEL_OP1_204_71566_20130209_235200_outLine +BABEL_OP1_204_71704_20130114_182140_inLine +BABEL_OP1_204_71704_20130114_182140_outLine +BABEL_OP1_204_71754_20130822_005036_inLine +BABEL_OP1_204_71754_20130822_005036_outLine +BABEL_OP1_204_71780_20130104_180509_inLine +BABEL_OP1_204_71780_20130104_180509_outLine +BABEL_OP1_204_72844_20121221_181459_inLine +BABEL_OP1_204_72844_20121221_181459_outLine +BABEL_OP1_204_73072_20130107_173326_inLine +BABEL_OP1_204_73072_20130107_173326_outLine +BABEL_OP1_204_73301_20130116_023950_inLine +BABEL_OP1_204_73301_20130116_023950_outLine +BABEL_OP1_204_73518_20130225_195225_inLine +BABEL_OP1_204_73518_20130225_195225_outLine +BABEL_OP1_204_73622_20130108_180939_inLine +BABEL_OP1_204_73622_20130108_180939_outLine +BABEL_OP1_204_73814_20130122_170515_inLine +BABEL_OP1_204_73814_20130122_170515_outLine +BABEL_OP1_204_73837_20130115_213251_inLine +BABEL_OP1_204_73837_20130115_213251_outLine +BABEL_OP1_204_73909_20130209_171219_inLine +BABEL_OP1_204_73909_20130209_171219_outLine +BABEL_OP1_204_74078_20130901_220513_inLine +BABEL_OP1_204_74078_20130901_220513_outLine +BABEL_OP1_204_75223_20130106_180539_inLine +BABEL_OP1_204_75223_20130106_180539_outLine +BABEL_OP1_204_75460_20130515_225130_inLine +BABEL_OP1_204_75460_20130515_225130_outLine +BABEL_OP1_204_75505_20121220_175919_inLine +BABEL_OP1_204_75505_20121220_175919_outLine +BABEL_OP1_204_75869_20130614_143452_inLine +BABEL_OP1_204_75869_20130614_143452_outLine +BABEL_OP1_204_75981_20130304_014705_inLine +BABEL_OP1_204_75981_20130304_014705_outLine +BABEL_OP1_204_76069_20130821_001213_inLine +BABEL_OP1_204_76069_20130821_001213_outLine +BABEL_OP1_204_76155_20130129_210554_inLine +BABEL_OP1_204_76155_20130129_210554_outLine +BABEL_OP1_204_76793_20130812_204256_inLine +BABEL_OP1_204_76793_20130812_204256_outLine +BABEL_OP1_204_76902_20130520_161816_inLine +BABEL_OP1_204_76902_20130520_161816_outLine +BABEL_OP1_204_77139_20130103_214953_inLine +BABEL_OP1_204_77139_20130103_214953_outLine +BABEL_OP1_204_77225_20130825_155026_inLine +BABEL_OP1_204_77225_20130825_155026_outLine +BABEL_OP1_204_77225_20130825_160328_inLine +BABEL_OP1_204_77225_20130825_160328_outLine +BABEL_OP1_204_77242_20130310_031438_inLine +BABEL_OP1_204_77242_20130310_031438_outLine +BABEL_OP1_204_77391_20130114_214011_inLine +BABEL_OP1_204_77391_20130114_214011_outLine +BABEL_OP1_204_77909_20130822_005415_inLine +BABEL_OP1_204_77909_20130822_005415_outLine +BABEL_OP1_204_77974_20130305_023753_inLine +BABEL_OP1_204_77974_20130305_023753_outLine +BABEL_OP1_204_78360_20130227_174048_inLine +BABEL_OP1_204_78360_20130227_174048_outLine +BABEL_OP1_204_78482_20130208_181819_inLine +BABEL_OP1_204_78482_20130208_181819_outLine +BABEL_OP1_204_78749_20130607_175636_inLine +BABEL_OP1_204_78749_20130607_175636_outLine +BABEL_OP1_204_79028_20130818_170543_inLine +BABEL_OP1_204_79028_20130818_170543_outLine +BABEL_OP1_204_79107_20130311_033735_inLine +BABEL_OP1_204_79107_20130311_033735_outLine +BABEL_OP1_204_79429_20130522_180804_inLine +BABEL_OP1_204_79429_20130522_180804_outLine +BABEL_OP1_204_79723_20130815_161014_inLine +BABEL_OP1_204_79723_20130815_161014_outLine +BABEL_OP1_204_79858_20130108_175702_inLine +BABEL_OP1_204_79858_20130108_175702_outLine +BABEL_OP1_204_79898_20130607_173143_inLine +BABEL_OP1_204_79898_20130607_173143_outLine +BABEL_OP1_204_80577_20130310_051912_inLine +BABEL_OP1_204_80577_20130310_051912_outLine +BABEL_OP1_204_80622_20130325_141431_inLine +BABEL_OP1_204_80622_20130325_141431_outLine +BABEL_OP1_204_80897_20130130_194208_inLine +BABEL_OP1_204_80897_20130130_194208_outLine +BABEL_OP1_204_81392_20130129_021012_inLine +BABEL_OP1_204_81392_20130129_021012_outLine +BABEL_OP1_204_81427_20130118_211419_inLine +BABEL_OP1_204_81427_20130118_211419_outLine +BABEL_OP1_204_81433_20130217_234814_inLine +BABEL_OP1_204_81433_20130217_234814_outLine +BABEL_OP1_204_81553_20130225_183924_inLine +BABEL_OP1_204_81553_20130225_183924_outLine +BABEL_OP1_204_81581_20130726_141606_inLine +BABEL_OP1_204_81581_20130726_141606_outLine +BABEL_OP1_204_81674_20130522_172505_inLine +BABEL_OP1_204_81674_20130522_172505_outLine +BABEL_OP1_204_81769_20130710_161840_inLine +BABEL_OP1_204_81769_20130710_161840_outLine +BABEL_OP1_204_82224_20130224_184149_inLine +BABEL_OP1_204_82224_20130224_184149_outLine +BABEL_OP1_204_83436_20130111_223716_inLine +BABEL_OP1_204_83436_20130111_223716_outLine +BABEL_OP1_204_83643_20130830_005334_inLine +BABEL_OP1_204_83643_20130830_005334_outLine +BABEL_OP1_204_83813_20130704_172117_inLine +BABEL_OP1_204_83813_20130704_172117_outLine +BABEL_OP1_204_83851_20130114_065704_inLine +BABEL_OP1_204_83851_20130114_065704_outLine +BABEL_OP1_204_83974_20130607_152537_inLine +BABEL_OP1_204_83974_20130607_152537_outLine +BABEL_OP1_204_84079_20130821_203040_inLine +BABEL_OP1_204_84079_20130821_203040_outLine +BABEL_OP1_204_84194_20130716_194041_inLine +BABEL_OP1_204_84194_20130716_194041_outLine +BABEL_OP1_204_84370_20130310_050228_inLine +BABEL_OP1_204_84370_20130310_050228_outLine +BABEL_OP1_204_84469_20130210_003435_inLine +BABEL_OP1_204_84469_20130210_003435_outLine +BABEL_OP1_204_84541_20130820_230752_inLine +BABEL_OP1_204_84541_20130820_230752_outLine +BABEL_OP1_204_84709_20130518_125528_inLine +BABEL_OP1_204_84709_20130518_125528_outLine +BABEL_OP1_204_84768_20130106_033700_inLine +BABEL_OP1_204_84768_20130106_033700_outLine +BABEL_OP1_204_84823_20130218_212443_inLine +BABEL_OP1_204_84823_20130218_212443_outLine +BABEL_OP1_204_85179_20130209_014947_inLine +BABEL_OP1_204_85179_20130209_014947_outLine +BABEL_OP1_204_85246_20130516_211538_inLine +BABEL_OP1_204_85246_20130516_211538_outLine +BABEL_OP1_204_85254_20130312_035109_inLine +BABEL_OP1_204_85254_20130312_035109_outLine +BABEL_OP1_204_85322_20130107_192937_inLine +BABEL_OP1_204_85322_20130107_192937_outLine +BABEL_OP1_204_85340_20130111_212907_inLine +BABEL_OP1_204_85340_20130111_212907_outLine +BABEL_OP1_204_85519_20130301_161437_inLine +BABEL_OP1_204_85519_20130301_161437_outLine +BABEL_OP1_204_85651_20130216_204250_inLine +BABEL_OP1_204_85651_20130216_204250_outLine +BABEL_OP1_204_86597_20130310_031951_inLine +BABEL_OP1_204_86597_20130310_031951_outLine +BABEL_OP1_204_86722_20130114_025704_inLine +BABEL_OP1_204_86722_20130114_025704_outLine +BABEL_OP1_204_86826_20130627_190707_inLine +BABEL_OP1_204_86826_20130627_190707_outLine +BABEL_OP1_204_86830_20130613_181407_inLine +BABEL_OP1_204_86830_20130613_181407_outLine +BABEL_OP1_204_86845_20130705_192829_inLine +BABEL_OP1_204_86845_20130705_192829_outLine +BABEL_OP1_204_86845_20130705_193447_inLine +BABEL_OP1_204_86845_20130705_193447_outLine +BABEL_OP1_204_86885_20130825_200228_inLine +BABEL_OP1_204_86885_20130825_200228_outLine +BABEL_OP1_204_86952_20121231_204819_inLine +BABEL_OP1_204_86952_20121231_204819_outLine +BABEL_OP1_204_87280_20130209_180508_inLine +BABEL_OP1_204_87280_20130209_180508_outLine +BABEL_OP1_204_87470_20130122_032958_inLine +BABEL_OP1_204_87470_20130122_032958_outLine +BABEL_OP1_204_87889_20130225_183607_inLine +BABEL_OP1_204_87889_20130225_183607_outLine +BABEL_OP1_204_88394_20130708_175704_inLine +BABEL_OP1_204_88394_20130708_175704_outLine +BABEL_OP1_204_88686_20121222_200228_inLine +BABEL_OP1_204_88686_20121222_200228_outLine +BABEL_OP1_204_88873_20130108_214456_inLine +BABEL_OP1_204_88873_20130108_214456_outLine +BABEL_OP1_204_88982_20130129_004023_inLine +BABEL_OP1_204_88982_20130129_004023_outLine +BABEL_OP1_204_89372_20121219_192043_inLine +BABEL_OP1_204_89372_20121219_192043_outLine +BABEL_OP1_204_89665_20130122_035608_inLine +BABEL_OP1_204_89665_20130122_035608_outLine +BABEL_OP1_204_89718_20130821_214732_inLine +BABEL_OP1_204_89718_20130821_214732_outLine +BABEL_OP1_204_89794_20130321_180037_inLine +BABEL_OP1_204_89794_20130321_180037_outLine +BABEL_OP1_204_89794_20130321_181250_inLine +BABEL_OP1_204_89794_20130321_181250_outLine +BABEL_OP1_204_89888_20130115_181504_inLine +BABEL_OP1_204_89888_20130115_181504_outLine +BABEL_OP1_204_90440_20130509_133501_inLine +BABEL_OP1_204_90440_20130509_133501_outLine +BABEL_OP1_204_90740_20130605_163314_inLine +BABEL_OP1_204_90740_20130605_163314_outLine +BABEL_OP1_204_90832_20130310_045516_inLine +BABEL_OP1_204_90832_20130310_045516_outLine +BABEL_OP1_204_90930_20130901_200839_inLine +BABEL_OP1_204_90930_20130901_200839_outLine +BABEL_OP1_204_90935_20130116_220822_inLine +BABEL_OP1_204_90935_20130116_220822_outLine +BABEL_OP1_204_91125_20130112_215414_inLine +BABEL_OP1_204_91125_20130112_215414_outLine +BABEL_OP1_204_91189_20130516_000538_inLine +BABEL_OP1_204_91189_20130516_000538_outLine +BABEL_OP1_204_91252_20130821_000400_inLine +BABEL_OP1_204_91252_20130821_000400_outLine +BABEL_OP1_204_91411_20130710_193521_inLine +BABEL_OP1_204_91411_20130710_193521_outLine +BABEL_OP1_204_91463_20130120_184700_inLine +BABEL_OP1_204_91463_20130120_184700_outLine +BABEL_OP1_204_91581_20130210_013423_inLine +BABEL_OP1_204_91581_20130210_013423_outLine +BABEL_OP1_204_91825_20121224_185428_inLine +BABEL_OP1_204_91825_20121224_185428_outLine +BABEL_OP1_204_91825_20121224_191424_inLine +BABEL_OP1_204_91825_20121224_191424_outLine +BABEL_OP1_204_91884_20130215_205051_inLine +BABEL_OP1_204_91884_20130215_205051_outLine +BABEL_OP1_204_91971_20130818_152604_inLine +BABEL_OP1_204_91971_20130818_152604_outLine +BABEL_OP1_204_92077_20130614_165026_inLine +BABEL_OP1_204_92077_20130614_165026_outLine +BABEL_OP1_204_92176_20130120_165309_inLine +BABEL_OP1_204_92176_20130120_165309_outLine +BABEL_OP1_204_92252_20130812_220232_inLine +BABEL_OP1_204_92252_20130812_220232_outLine +BABEL_OP1_204_92941_20130120_230410_inLine +BABEL_OP1_204_92941_20130120_230410_outLine +BABEL_OP1_204_93320_20130311_022333_inLine +BABEL_OP1_204_93320_20130311_022333_outLine +BABEL_OP1_204_93320_20130311_023402_inLine +BABEL_OP1_204_93320_20130311_023402_outLine +BABEL_OP1_204_93443_20130803_153015_inLine +BABEL_OP1_204_93443_20130803_153015_outLine +BABEL_OP1_204_93858_20130311_005700_inLine +BABEL_OP1_204_93858_20130311_005700_outLine +BABEL_OP1_204_93937_20130313_172438_inLine +BABEL_OP1_204_93937_20130313_172438_outLine +BABEL_OP1_204_93946_20130210_172621_inLine +BABEL_OP1_204_93946_20130210_172621_outLine +BABEL_OP1_204_93946_20130210_175020_inLine +BABEL_OP1_204_93946_20130210_175020_outLine +BABEL_OP1_204_94035_20130704_185858_inLine +BABEL_OP1_204_94035_20130704_185858_outLine +BABEL_OP1_204_94166_20130212_185608_inLine +BABEL_OP1_204_94166_20130212_185608_outLine +BABEL_OP1_204_94212_20130709_195201_inLine +BABEL_OP1_204_94212_20130709_195201_outLine +BABEL_OP1_204_94237_20130227_204940_inLine +BABEL_OP1_204_94237_20130227_204940_outLine +BABEL_OP1_204_94262_20130307_222214_inLine +BABEL_OP1_204_94262_20130307_222214_outLine +BABEL_OP1_204_94409_20130130_012526_inLine +BABEL_OP1_204_94409_20130130_012526_outLine +BABEL_OP1_204_94713_20130710_173705_inLine +BABEL_OP1_204_94713_20130710_173705_outLine +BABEL_OP1_204_94803_20130524_125715_inLine +BABEL_OP1_204_94803_20130524_125715_outLine +BABEL_OP1_204_94891_20130520_200303_inLine +BABEL_OP1_204_94891_20130520_200303_outLine +BABEL_OP1_204_94969_20130516_174057_inLine +BABEL_OP1_204_94969_20130516_174057_outLine +BABEL_OP1_204_95124_20130521_171211_inLine +BABEL_OP1_204_95124_20130521_171211_outLine +BABEL_OP1_204_95269_20130121_040957_inLine +BABEL_OP1_204_95269_20130121_040957_outLine +BABEL_OP1_204_95338_20130617_183230_inLine +BABEL_OP1_204_95338_20130617_183230_outLine +BABEL_OP1_204_95467_20130310_041013_inLine +BABEL_OP1_204_95467_20130310_041013_outLine +BABEL_OP1_204_95571_20130605_173956_inLine +BABEL_OP1_204_95571_20130605_173956_outLine +BABEL_OP1_204_95583_20130107_233706_inLine +BABEL_OP1_204_95583_20130107_233706_outLine +BABEL_OP1_204_95598_20121218_225349_inLine +BABEL_OP1_204_95598_20121218_225349_outLine +BABEL_OP1_204_96504_20130111_012757_inLine +BABEL_OP1_204_96504_20130111_012757_outLine +BABEL_OP1_204_96820_20130120_015641_inLine +BABEL_OP1_204_96820_20130120_015641_outLine +BABEL_OP1_204_96842_20130614_172939_inLine +BABEL_OP1_204_96842_20130614_172939_outLine +BABEL_OP1_204_96934_20130119_033411_inLine +BABEL_OP1_204_96934_20130119_033411_outLine +BABEL_OP1_204_96940_20130520_190004_inLine +BABEL_OP1_204_96940_20130520_190004_outLine +BABEL_OP1_204_97345_20130705_170655_inLine +BABEL_OP1_204_97345_20130705_170655_outLine +BABEL_OP1_204_97448_20130830_013253_inLine +BABEL_OP1_204_97448_20130830_013253_outLine +BABEL_OP1_204_97588_20130106_172133_inLine +BABEL_OP1_204_97588_20130106_172133_outLine +BABEL_OP1_204_97604_20130224_214511_inLine +BABEL_OP1_204_97604_20130224_214511_outLine +BABEL_OP1_204_97911_20130701_170644_inLine +BABEL_OP1_204_97911_20130701_170644_outLine +BABEL_OP1_204_98255_20130716_204027_inLine +BABEL_OP1_204_98255_20130716_204027_outLine +BABEL_OP1_204_98311_20130114_061903_inLine +BABEL_OP1_204_98311_20130114_061903_outLine +BABEL_OP1_204_98390_20130114_195309_inLine +BABEL_OP1_204_98390_20130114_195309_outLine +BABEL_OP1_204_98580_20130130_233406_inLine +BABEL_OP1_204_98580_20130130_233406_outLine +BABEL_OP1_204_98678_20130215_215447_inLine +BABEL_OP1_204_98678_20130215_215447_outLine +BABEL_OP1_204_98888_20130130_200414_inLine +BABEL_OP1_204_98888_20130130_200414_outLine +BABEL_OP1_204_98909_20130118_224024_inLine +BABEL_OP1_204_98909_20130118_224024_outLine +BABEL_OP1_204_99264_20130211_183956_inLine +BABEL_OP1_204_99264_20130211_183956_outLine +BABEL_OP1_204_99344_20130705_180532_inLine +BABEL_OP1_204_99344_20130705_180532_outLine +BABEL_OP1_204_99487_20130111_175232_inLine +BABEL_OP1_204_99487_20130111_175232_outLine +BABEL_OP1_204_99516_20130103_172113_inLine +BABEL_OP1_204_99516_20130103_172113_outLine +BABEL_OP1_204_99718_20130114_221147_inLine +BABEL_OP1_204_99718_20130114_221147_outLine +BABEL_OP1_204_99975_20130812_220558_inLine +BABEL_OP1_204_99975_20130812_220558_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/evalpart1.list b/egs/babel/s5d/conf/lists/204-tamil/evalpart1.list new file mode 100644 index 00000000000..c5dbddb1867 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/evalpart1.list @@ -0,0 +1,186 @@ +BABEL_OP1_204_10638_20130510_124441_inLine +BABEL_OP1_204_10638_20130510_124441_outLine +BABEL_OP1_204_12321_20130220_211618_inLine +BABEL_OP1_204_12321_20130220_211618_outLine +BABEL_OP1_204_12635_20130601_152113_inLine +BABEL_OP1_204_12635_20130601_152113_outLine +BABEL_OP1_204_14350_20130113_023333_inLine +BABEL_OP1_204_14350_20130113_023333_outLine +BABEL_OP1_204_14723_20130710_180819_inLine +BABEL_OP1_204_14723_20130710_180819_outLine +BABEL_OP1_204_14875_20130111_192622_inLine +BABEL_OP1_204_14875_20130111_192622_outLine +BABEL_OP1_204_15227_20130624_180548_inLine +BABEL_OP1_204_15227_20130624_180548_outLine +BABEL_OP1_204_15848_20121218_180011_inLine +BABEL_OP1_204_15848_20121218_180011_outLine +BABEL_OP1_204_16351_20130705_205024_inLine +BABEL_OP1_204_16351_20130705_205024_outLine +BABEL_OP1_204_17165_20130130_191341_inLine +BABEL_OP1_204_17165_20130130_191341_outLine +BABEL_OP1_204_18863_20130210_164314_inLine +BABEL_OP1_204_18863_20130210_164314_outLine +BABEL_OP1_204_19545_20130122_164148_inLine +BABEL_OP1_204_19545_20130122_164148_outLine +BABEL_OP1_204_21029_20130107_212248_inLine +BABEL_OP1_204_21029_20130107_212248_outLine +BABEL_OP1_204_21159_20130607_143737_inLine +BABEL_OP1_204_21159_20130607_143737_outLine +BABEL_OP1_204_21794_20130129_233131_inLine +BABEL_OP1_204_21794_20130129_233131_outLine +BABEL_OP1_204_22641_20121224_195014_inLine +BABEL_OP1_204_22641_20121224_195014_outLine +BABEL_OP1_204_23196_20130605_144617_inLine +BABEL_OP1_204_23196_20130605_144617_outLine +BABEL_OP1_204_23395_20130126_013244_inLine +BABEL_OP1_204_23395_20130126_013244_outLine +BABEL_OP1_204_25068_20130901_230020_inLine +BABEL_OP1_204_25068_20130901_230020_outLine +BABEL_OP1_204_25068_20130901_235001_inLine +BABEL_OP1_204_25068_20130901_235001_outLine +BABEL_OP1_204_28585_20130208_014141_inLine +BABEL_OP1_204_28585_20130208_014141_outLine +BABEL_OP1_204_28871_20121219_184300_inLine +BABEL_OP1_204_28871_20121219_184300_outLine +BABEL_OP1_204_29208_20130127_011057_inLine +BABEL_OP1_204_29208_20130127_011057_outLine +BABEL_OP1_204_29352_20130628_145610_inLine +BABEL_OP1_204_29352_20130628_145610_outLine +BABEL_OP1_204_29777_20130211_193239_inLine +BABEL_OP1_204_29777_20130211_193239_outLine +BABEL_OP1_204_32832_20130208_200126_inLine +BABEL_OP1_204_32832_20130208_200126_outLine +BABEL_OP1_204_32961_20130518_164254_inLine +BABEL_OP1_204_32961_20130518_164254_outLine +BABEL_OP1_204_33635_20130127_024601_inLine +BABEL_OP1_204_33635_20130127_024601_outLine +BABEL_OP1_204_37281_20130131_020847_inLine +BABEL_OP1_204_37281_20130131_020847_outLine +BABEL_OP1_204_39579_20130724_163251_inLine +BABEL_OP1_204_39579_20130724_163251_outLine +BABEL_OP1_204_41493_20121218_185431_inLine +BABEL_OP1_204_41493_20121218_185431_outLine +BABEL_OP1_204_44255_20130225_230219_inLine +BABEL_OP1_204_44255_20130225_230219_outLine +BABEL_OP1_204_44681_20130830_000000_inLine +BABEL_OP1_204_44681_20130830_000000_outLine +BABEL_OP1_204_45106_20130325_003034_inLine +BABEL_OP1_204_45106_20130325_003034_outLine +BABEL_OP1_204_45106_20130325_004324_inLine +BABEL_OP1_204_45106_20130325_004324_outLine +BABEL_OP1_204_46202_20130524_162004_inLine +BABEL_OP1_204_46202_20130524_162004_outLine +BABEL_OP1_204_46625_20121219_193926_inLine +BABEL_OP1_204_46625_20121219_193926_outLine +BABEL_OP1_204_47882_20130705_203354_inLine +BABEL_OP1_204_47882_20130705_203354_outLine +BABEL_OP1_204_48016_20130311_033904_inLine +BABEL_OP1_204_48016_20130311_033904_outLine +BABEL_OP1_204_48399_20130112_205650_inLine +BABEL_OP1_204_48399_20130112_205650_outLine +BABEL_OP1_204_50958_20130129_195029_inLine +BABEL_OP1_204_50958_20130129_195029_outLine +BABEL_OP1_204_53206_20130704_211512_inLine +BABEL_OP1_204_53206_20130704_211512_outLine +BABEL_OP1_204_56019_20130301_165116_inLine +BABEL_OP1_204_56019_20130301_165116_outLine +BABEL_OP1_204_57219_20130311_044204_inLine +BABEL_OP1_204_57219_20130311_044204_outLine +BABEL_OP1_204_57609_20130122_194937_inLine +BABEL_OP1_204_57609_20130122_194937_outLine +BABEL_OP1_204_57654_20130114_074621_inLine +BABEL_OP1_204_57654_20130114_074621_outLine +BABEL_OP1_204_59993_20130104_172518_inLine +BABEL_OP1_204_59993_20130104_172518_outLine +BABEL_OP1_204_62155_20130215_213833_inLine +BABEL_OP1_204_62155_20130215_213833_outLine +BABEL_OP1_204_63481_20121224_021602_inLine +BABEL_OP1_204_63481_20121224_021602_outLine +BABEL_OP1_204_63523_20130301_162515_inLine +BABEL_OP1_204_63523_20130301_162515_outLine +BABEL_OP1_204_65339_20130821_194428_inLine +BABEL_OP1_204_65339_20130821_194428_outLine +BABEL_OP1_204_67592_20130211_032508_inLine +BABEL_OP1_204_67592_20130211_032508_outLine +BABEL_OP1_204_69972_20130802_175346_inLine +BABEL_OP1_204_69972_20130802_175346_outLine +BABEL_OP1_204_69982_20130310_050949_inLine +BABEL_OP1_204_69982_20130310_050949_outLine +BABEL_OP1_204_70110_20121219_223303_inLine +BABEL_OP1_204_70110_20121219_223303_outLine +BABEL_OP1_204_71038_20130225_191007_inLine +BABEL_OP1_204_71038_20130225_191007_outLine +BABEL_OP1_204_71333_20130111_181914_inLine +BABEL_OP1_204_71333_20130111_181914_outLine +BABEL_OP1_204_71704_20130114_182140_inLine +BABEL_OP1_204_71704_20130114_182140_outLine +BABEL_OP1_204_71754_20130822_005036_inLine +BABEL_OP1_204_71754_20130822_005036_outLine +BABEL_OP1_204_73622_20130108_180939_inLine +BABEL_OP1_204_73622_20130108_180939_outLine +BABEL_OP1_204_73837_20130115_213251_inLine +BABEL_OP1_204_73837_20130115_213251_outLine +BABEL_OP1_204_77909_20130822_005415_inLine +BABEL_OP1_204_77909_20130822_005415_outLine +BABEL_OP1_204_81427_20130118_211419_inLine +BABEL_OP1_204_81427_20130118_211419_outLine +BABEL_OP1_204_84370_20130310_050228_inLine +BABEL_OP1_204_84370_20130310_050228_outLine +BABEL_OP1_204_84709_20130518_125528_inLine +BABEL_OP1_204_84709_20130518_125528_outLine +BABEL_OP1_204_84823_20130218_212443_inLine +BABEL_OP1_204_84823_20130218_212443_outLine +BABEL_OP1_204_86830_20130613_181407_inLine +BABEL_OP1_204_86830_20130613_181407_outLine +BABEL_OP1_204_88394_20130708_175704_inLine +BABEL_OP1_204_88394_20130708_175704_outLine +BABEL_OP1_204_88686_20121222_200228_inLine +BABEL_OP1_204_88686_20121222_200228_outLine +BABEL_OP1_204_88873_20130108_214456_inLine +BABEL_OP1_204_88873_20130108_214456_outLine +BABEL_OP1_204_88982_20130129_004023_inLine +BABEL_OP1_204_88982_20130129_004023_outLine +BABEL_OP1_204_89372_20121219_192043_inLine +BABEL_OP1_204_89372_20121219_192043_outLine +BABEL_OP1_204_89718_20130821_214732_inLine +BABEL_OP1_204_89718_20130821_214732_outLine +BABEL_OP1_204_89794_20130321_180037_inLine +BABEL_OP1_204_89794_20130321_180037_outLine +BABEL_OP1_204_89794_20130321_181250_inLine +BABEL_OP1_204_89794_20130321_181250_outLine +BABEL_OP1_204_90930_20130901_200839_inLine +BABEL_OP1_204_90930_20130901_200839_outLine +BABEL_OP1_204_90935_20130116_220822_inLine +BABEL_OP1_204_90935_20130116_220822_outLine +BABEL_OP1_204_91252_20130821_000400_inLine +BABEL_OP1_204_91252_20130821_000400_outLine +BABEL_OP1_204_91884_20130215_205051_inLine +BABEL_OP1_204_91884_20130215_205051_outLine +BABEL_OP1_204_91971_20130818_152604_inLine +BABEL_OP1_204_91971_20130818_152604_outLine +BABEL_OP1_204_92176_20130120_165309_inLine +BABEL_OP1_204_92176_20130120_165309_outLine +BABEL_OP1_204_92941_20130120_230410_inLine +BABEL_OP1_204_92941_20130120_230410_outLine +BABEL_OP1_204_94166_20130212_185608_inLine +BABEL_OP1_204_94166_20130212_185608_outLine +BABEL_OP1_204_94212_20130709_195201_inLine +BABEL_OP1_204_94212_20130709_195201_outLine +BABEL_OP1_204_95598_20121218_225349_inLine +BABEL_OP1_204_95598_20121218_225349_outLine +BABEL_OP1_204_96934_20130119_033411_inLine +BABEL_OP1_204_96934_20130119_033411_outLine +BABEL_OP1_204_97345_20130705_170655_inLine +BABEL_OP1_204_97345_20130705_170655_outLine +BABEL_OP1_204_97448_20130830_013253_inLine +BABEL_OP1_204_97448_20130830_013253_outLine +BABEL_OP1_204_98580_20130130_233406_inLine +BABEL_OP1_204_98580_20130130_233406_outLine +BABEL_OP1_204_98888_20130130_200414_inLine +BABEL_OP1_204_98888_20130130_200414_outLine +BABEL_OP1_204_99264_20130211_183956_inLine +BABEL_OP1_204_99264_20130211_183956_outLine +BABEL_OP1_204_99344_20130705_180532_inLine +BABEL_OP1_204_99344_20130705_180532_outLine +BABEL_OP1_204_99516_20130103_172113_inLine +BABEL_OP1_204_99516_20130103_172113_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.FullLP.list b/egs/babel/s5d/conf/lists/204-tamil/train.FullLP.list new file mode 100644 index 00000000000..84a8b1815a2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.FullLP.list @@ -0,0 +1,778 @@ +BABEL_OP1_204_10002_20130523_142107_inLine +BABEL_OP1_204_10002_20130523_142107_outLine +BABEL_OP1_204_10036_20130116_163652_inLine +BABEL_OP1_204_10036_20130116_163652_outLine +BABEL_OP1_204_10184_20130217_232154_inLine +BABEL_OP1_204_10184_20130217_232154_outLine +BABEL_OP1_204_10411_20130313_042405_inLine +BABEL_OP1_204_10411_20130313_042405_outLine +BABEL_OP1_204_10469_20130708_201653_inLine +BABEL_OP1_204_10469_20130708_201653_outLine +BABEL_OP1_204_10647_20130225_175457_inLine +BABEL_OP1_204_10647_20130225_175457_outLine +BABEL_OP1_204_10647_20130225_184106_inLine +BABEL_OP1_204_10647_20130225_184106_outLine +BABEL_OP1_204_10901_20130120_220533_inLine +BABEL_OP1_204_10901_20130120_220533_outLine +BABEL_OP1_204_10938_20130118_213056_inLine +BABEL_OP1_204_10938_20130118_213056_outLine +BABEL_OP1_204_10966_20130114_210156_inLine +BABEL_OP1_204_10966_20130114_210156_outLine +BABEL_OP1_204_11310_20130705_180254_inLine +BABEL_OP1_204_11310_20130705_180254_outLine +BABEL_OP1_204_11352_20130220_023807_inLine +BABEL_OP1_204_11352_20130220_023807_outLine +BABEL_OP1_204_11486_20130607_155406_inLine +BABEL_OP1_204_11486_20130607_155406_outLine +BABEL_OP1_204_11581_20130222_215500_inLine +BABEL_OP1_204_11581_20130222_215500_outLine +BABEL_OP1_204_11581_20130222_220101_inLine +BABEL_OP1_204_11581_20130222_220101_outLine +BABEL_OP1_204_11663_20130319_201815_inLine +BABEL_OP1_204_11663_20130319_201815_outLine +BABEL_OP1_204_11673_20121220_214236_inLine +BABEL_OP1_204_11673_20121220_214236_outLine +BABEL_OP1_204_11723_20130803_144247_inLine +BABEL_OP1_204_11723_20130803_144247_outLine +BABEL_OP1_204_11797_20130107_214732_inLine +BABEL_OP1_204_11797_20130107_214732_outLine +BABEL_OP1_204_12036_20130102_170500_inLine +BABEL_OP1_204_12036_20130102_170500_outLine +BABEL_OP1_204_12036_20130102_171149_inLine +BABEL_OP1_204_12036_20130102_171149_outLine +BABEL_OP1_204_12220_20130120_183204_inLine +BABEL_OP1_204_12220_20130120_183204_outLine +BABEL_OP1_204_12242_20130111_014802_inLine +BABEL_OP1_204_12242_20130111_014802_outLine +BABEL_OP1_204_12846_20130515_220132_inLine +BABEL_OP1_204_12846_20130515_220132_outLine +BABEL_OP1_204_12851_20121219_172018_inLine +BABEL_OP1_204_12851_20121219_172018_outLine +BABEL_OP1_204_13030_20130120_210514_inLine +BABEL_OP1_204_13184_20130228_032847_inLine +BABEL_OP1_204_13184_20130228_032847_outLine +BABEL_OP1_204_13324_20130103_211640_inLine +BABEL_OP1_204_13324_20130103_211640_outLine +BABEL_OP1_204_13490_20130314_031843_inLine +BABEL_OP1_204_13490_20130314_031843_outLine +BABEL_OP1_204_13744_20130106_232543_inLine +BABEL_OP1_204_13744_20130106_232543_outLine +BABEL_OP1_204_13776_20130626_215241_inLine +BABEL_OP1_204_13776_20130626_215241_outLine +BABEL_OP1_204_13792_20121231_015544_inLine +BABEL_OP1_204_13792_20121231_015544_outLine +BABEL_OP1_204_14719_20130219_231741_inLine +BABEL_OP1_204_14719_20130219_231741_outLine +BABEL_OP1_204_14719_20130219_232513_inLine +BABEL_OP1_204_14719_20130219_232513_outLine +BABEL_OP1_204_14725_20130111_204740_inLine +BABEL_OP1_204_14725_20130111_204740_outLine +BABEL_OP1_204_14807_20130222_213831_inLine +BABEL_OP1_204_14807_20130222_213831_outLine +BABEL_OP1_204_15730_20130103_154749_inLine +BABEL_OP1_204_15730_20130103_154749_outLine +BABEL_OP1_204_15985_20130627_154935_inLine +BABEL_OP1_204_15985_20130627_154935_outLine +BABEL_OP1_204_16249_20130906_003049_inLine +BABEL_OP1_204_16249_20130906_003049_outLine +BABEL_OP1_204_16726_20130815_164352_inLine +BABEL_OP1_204_16726_20130815_164352_outLine +BABEL_OP1_204_16800_20130307_025108_inLine +BABEL_OP1_204_16800_20130307_025108_outLine +BABEL_OP1_204_16802_20130821_234724_inLine +BABEL_OP1_204_16802_20130821_234724_outLine +BABEL_OP1_204_16838_20130703_183021_inLine +BABEL_OP1_204_16838_20130703_183021_outLine +BABEL_OP1_204_17032_20130129_012026_inLine +BABEL_OP1_204_17032_20130129_012026_outLine +BABEL_OP1_204_17420_20130426_172522_inLine +BABEL_OP1_204_17420_20130426_172522_outLine +BABEL_OP1_204_17420_20130426_174314_inLine +BABEL_OP1_204_17420_20130426_174314_outLine +BABEL_OP1_204_17496_20130325_015543_inLine +BABEL_OP1_204_17496_20130325_015543_outLine +BABEL_OP1_204_18037_20130825_200728_inLine +BABEL_OP1_204_18037_20130825_200728_outLine +BABEL_OP1_204_18280_20130818_172915_inLine +BABEL_OP1_204_18280_20130818_172915_outLine +BABEL_OP1_204_18939_20130110_214704_inLine +BABEL_OP1_204_18939_20130110_214704_outLine +BABEL_OP1_204_18992_20130830_001646_inLine +BABEL_OP1_204_18992_20130830_001646_outLine +BABEL_OP1_204_19134_20130120_191037_inLine +BABEL_OP1_204_19134_20130120_191037_outLine +BABEL_OP1_204_19461_20130704_154920_inLine +BABEL_OP1_204_19461_20130704_154920_outLine +BABEL_OP1_204_19589_20130304_020747_inLine +BABEL_OP1_204_19589_20130304_020747_outLine +BABEL_OP1_204_19688_20130708_194740_inLine +BABEL_OP1_204_19688_20130708_194740_outLine +BABEL_OP1_204_20330_20130217_225055_inLine +BABEL_OP1_204_20330_20130217_225055_outLine +BABEL_OP1_204_20367_20130312_024055_inLine +BABEL_OP1_204_20367_20130312_024055_outLine +BABEL_OP1_204_20437_20130523_235611_inLine +BABEL_OP1_204_20437_20130523_235611_outLine +BABEL_OP1_204_20721_20130704_183621_inLine +BABEL_OP1_204_20721_20130704_183621_outLine +BABEL_OP1_204_20916_20121218_174604_inLine +BABEL_OP1_204_20916_20121218_174604_outLine +BABEL_OP1_204_20985_20130129_225135_inLine +BABEL_OP1_204_20985_20130129_225135_outLine +BABEL_OP1_204_21426_20130515_212900_inLine +BABEL_OP1_204_21426_20130515_212900_outLine +BABEL_OP1_204_21435_20130215_200722_inLine +BABEL_OP1_204_21435_20130215_200722_outLine +BABEL_OP1_204_21543_20130901_203127_inLine +BABEL_OP1_204_21543_20130901_203127_outLine +BABEL_OP1_204_21807_20130127_033626_inLine +BABEL_OP1_204_21807_20130127_033626_outLine +BABEL_OP1_204_21807_20130127_041609_inLine +BABEL_OP1_204_21807_20130127_041609_outLine +BABEL_OP1_204_22321_20130107_231204_inLine +BABEL_OP1_204_22321_20130107_231204_outLine +BABEL_OP1_204_22643_20130709_192909_inLine +BABEL_OP1_204_22643_20130709_192909_outLine +BABEL_OP1_204_23006_20130115_200742_inLine +BABEL_OP1_204_23006_20130115_200742_outLine +BABEL_OP1_204_23046_20130114_165057_inLine +BABEL_OP1_204_23046_20130114_165057_outLine +BABEL_OP1_204_23153_20130128_223235_inLine +BABEL_OP1_204_23153_20130128_223235_outLine +BABEL_OP1_204_23190_20130116_191153_inLine +BABEL_OP1_204_23190_20130116_191153_outLine +BABEL_OP1_204_23752_20130517_181521_inLine +BABEL_OP1_204_23752_20130517_181521_outLine +BABEL_OP1_204_23980_20130127_031636_inLine +BABEL_OP1_204_23980_20130127_031636_outLine +BABEL_OP1_204_24010_20130510_160627_inLine +BABEL_OP1_204_24010_20130510_160627_outLine +BABEL_OP1_204_24221_20130803_162307_inLine +BABEL_OP1_204_24221_20130803_162307_outLine +BABEL_OP1_204_24231_20130702_165725_inLine +BABEL_OP1_204_24231_20130702_165725_outLine +BABEL_OP1_204_24253_20130216_173828_inLine +BABEL_OP1_204_24253_20130216_173828_outLine +BABEL_OP1_204_24587_20130812_201846_inLine +BABEL_OP1_204_24587_20130812_201846_outLine +BABEL_OP1_204_24605_20130111_185213_inLine +BABEL_OP1_204_24605_20130111_185213_outLine +BABEL_OP1_204_26381_20130906_003653_inLine +BABEL_OP1_204_26381_20130906_003653_outLine +BABEL_OP1_204_26388_20121222_180059_inLine +BABEL_OP1_204_26388_20121222_180059_outLine +BABEL_OP1_204_26478_20130628_163250_inLine +BABEL_OP1_204_26478_20130628_163250_outLine +BABEL_OP1_204_27042_20130215_015654_inLine +BABEL_OP1_204_27042_20130215_015654_outLine +BABEL_OP1_204_27203_20130123_034459_inLine +BABEL_OP1_204_27203_20130123_034459_outLine +BABEL_OP1_204_27478_20130219_233409_inLine +BABEL_OP1_204_27478_20130219_233409_outLine +BABEL_OP1_204_27841_20130225_192938_inLine +BABEL_OP1_204_27841_20130225_192938_outLine +BABEL_OP1_204_28522_20130130_021159_inLine +BABEL_OP1_204_28522_20130130_021159_outLine +BABEL_OP1_204_28595_20130515_165745_inLine +BABEL_OP1_204_28595_20130515_165745_outLine +BABEL_OP1_204_29135_20121226_012303_inLine +BABEL_OP1_204_29404_20130225_222910_inLine +BABEL_OP1_204_29404_20130225_222910_outLine +BABEL_OP1_204_29633_20130219_205935_inLine +BABEL_OP1_204_29633_20130219_205935_outLine +BABEL_OP1_204_29911_20130704_163449_inLine +BABEL_OP1_204_29911_20130704_163449_outLine +BABEL_OP1_204_30013_20130129_224621_inLine +BABEL_OP1_204_30013_20130129_224621_outLine +BABEL_OP1_204_30098_20130302_223148_inLine +BABEL_OP1_204_30098_20130302_223148_outLine +BABEL_OP1_204_30345_20130211_192641_inLine +BABEL_OP1_204_30345_20130211_192641_outLine +BABEL_OP1_204_30432_20130128_194847_inLine +BABEL_OP1_204_30432_20130128_194847_outLine +BABEL_OP1_204_31039_20130817_183417_inLine +BABEL_OP1_204_31039_20130817_183417_outLine +BABEL_OP1_204_31490_20130106_234029_inLine +BABEL_OP1_204_31490_20130106_234029_outLine +BABEL_OP1_204_31728_20130730_183500_inLine +BABEL_OP1_204_31728_20130730_183500_outLine +BABEL_OP1_204_32097_20121218_192753_inLine +BABEL_OP1_204_32097_20121218_192753_outLine +BABEL_OP1_204_32122_20130119_232805_inLine +BABEL_OP1_204_32122_20130119_232805_outLine +BABEL_OP1_204_32169_20130820_205304_inLine +BABEL_OP1_204_32169_20130820_205304_outLine +BABEL_OP1_204_32244_20130617_175424_inLine +BABEL_OP1_204_32244_20130617_175424_outLine +BABEL_OP1_204_32328_20130218_020809_inLine +BABEL_OP1_204_32328_20130218_020809_outLine +BABEL_OP1_204_32727_20130224_174507_inLine +BABEL_OP1_204_32727_20130224_174507_outLine +BABEL_OP1_204_33273_20130126_234135_inLine +BABEL_OP1_204_33273_20130126_234135_outLine +BABEL_OP1_204_33355_20130110_222048_inLine +BABEL_OP1_204_33355_20130110_222048_outLine +BABEL_OP1_204_33424_20130617_192727_inLine +BABEL_OP1_204_33424_20130617_192727_outLine +BABEL_OP1_204_33774_20130601_164240_inLine +BABEL_OP1_204_33774_20130601_164240_outLine +BABEL_OP1_204_33806_20130310_041206_inLine +BABEL_OP1_204_33806_20130310_041206_outLine +BABEL_OP1_204_33913_20130205_155246_inLine +BABEL_OP1_204_34197_20121229_204615_inLine +BABEL_OP1_204_34197_20121229_204615_outLine +BABEL_OP1_204_34486_20130626_205810_inLine +BABEL_OP1_204_34486_20130626_205810_outLine +BABEL_OP1_204_34688_20121231_163152_inLine +BABEL_OP1_204_34713_20130516_164824_inLine +BABEL_OP1_204_34713_20130516_164824_outLine +BABEL_OP1_204_34811_20130130_015529_inLine +BABEL_OP1_204_34811_20130130_015529_outLine +BABEL_OP1_204_34860_20130524_205736_inLine +BABEL_OP1_204_34860_20130524_205736_outLine +BABEL_OP1_204_35000_20130217_021526_inLine +BABEL_OP1_204_35000_20130217_021526_outLine +BABEL_OP1_204_36293_20130107_173251_inLine +BABEL_OP1_204_36293_20130107_173251_outLine +BABEL_OP1_204_37228_20130224_205648_inLine +BABEL_OP1_204_37228_20130224_205648_outLine +BABEL_OP1_204_38588_20130119_231312_inLine +BABEL_OP1_204_38588_20130119_231312_outLine +BABEL_OP1_204_38664_20130116_202337_inLine +BABEL_OP1_204_38664_20130116_202337_outLine +BABEL_OP1_204_38750_20130208_003349_inLine +BABEL_OP1_204_38750_20130208_003349_outLine +BABEL_OP1_204_39099_20130302_210320_inLine +BABEL_OP1_204_39099_20130302_210320_outLine +BABEL_OP1_204_39307_20130104_021512_inLine +BABEL_OP1_204_39307_20130104_021512_outLine +BABEL_OP1_204_39638_20130605_153521_inLine +BABEL_OP1_204_39638_20130605_153521_outLine +BABEL_OP1_204_39848_20130130_204605_inLine +BABEL_OP1_204_39848_20130130_204605_outLine +BABEL_OP1_204_39893_20130313_023055_inLine +BABEL_OP1_204_39893_20130313_023055_outLine +BABEL_OP1_204_40196_20130902_001447_inLine +BABEL_OP1_204_40196_20130902_001447_outLine +BABEL_OP1_204_40565_20130129_202204_inLine +BABEL_OP1_204_40565_20130129_202204_outLine +BABEL_OP1_204_40648_20130710_170435_inLine +BABEL_OP1_204_40648_20130710_170435_outLine +BABEL_OP1_204_40686_20130704_204726_inLine +BABEL_OP1_204_40686_20130704_204726_outLine +BABEL_OP1_204_41233_20130209_215355_inLine +BABEL_OP1_204_41233_20130209_215355_outLine +BABEL_OP1_204_41334_20130311_032651_inLine +BABEL_OP1_204_41334_20130311_032651_outLine +BABEL_OP1_204_41598_20130227_193020_inLine +BABEL_OP1_204_41598_20130227_193020_outLine +BABEL_OP1_204_41720_20130524_184216_inLine +BABEL_OP1_204_41720_20130524_184216_outLine +BABEL_OP1_204_41890_20130227_233410_inLine +BABEL_OP1_204_41890_20130227_233410_outLine +BABEL_OP1_204_41920_20130101_031856_inLine +BABEL_OP1_204_41958_20130120_013639_inLine +BABEL_OP1_204_41958_20130120_013639_outLine +BABEL_OP1_204_41958_20130120_014156_inLine +BABEL_OP1_204_41958_20130120_014156_outLine +BABEL_OP1_204_41958_20130120_015222_inLine +BABEL_OP1_204_41958_20130120_015222_outLine +BABEL_OP1_204_42299_20130613_164705_inLine +BABEL_OP1_204_42299_20130613_164705_outLine +BABEL_OP1_204_42526_20130225_185629_inLine +BABEL_OP1_204_42526_20130225_185629_outLine +BABEL_OP1_204_42942_20130127_014343_inLine +BABEL_OP1_204_42942_20130127_014343_outLine +BABEL_OP1_204_43157_20130514_222203_inLine +BABEL_OP1_204_43157_20130514_222203_outLine +BABEL_OP1_204_43286_20130104_031805_inLine +BABEL_OP1_204_43286_20130104_031805_outLine +BABEL_OP1_204_43323_20130523_152627_inLine +BABEL_OP1_204_43323_20130523_152627_outLine +BABEL_OP1_204_43368_20130118_201259_inLine +BABEL_OP1_204_43368_20130118_201259_outLine +BABEL_OP1_204_43794_20130627_212826_inLine +BABEL_OP1_204_43794_20130627_212826_outLine +BABEL_OP1_204_44347_20130220_035919_inLine +BABEL_OP1_204_44347_20130220_035919_outLine +BABEL_OP1_204_44898_20130705_195912_inLine +BABEL_OP1_204_44898_20130705_195912_outLine +BABEL_OP1_204_45121_20130618_153308_inLine +BABEL_OP1_204_45121_20130618_153308_outLine +BABEL_OP1_204_45374_20130906_011341_inLine +BABEL_OP1_204_45374_20130906_011341_outLine +BABEL_OP1_204_45459_20130302_031028_inLine +BABEL_OP1_204_45459_20130302_031028_outLine +BABEL_OP1_204_45699_20130815_000115_inLine +BABEL_OP1_204_45699_20130815_000115_outLine +BABEL_OP1_204_45770_20130116_214623_inLine +BABEL_OP1_204_45770_20130116_214623_outLine +BABEL_OP1_204_46066_20130226_201734_inLine +BABEL_OP1_204_46066_20130226_201734_outLine +BABEL_OP1_204_46169_20130218_214523_inLine +BABEL_OP1_204_46558_20130103_175101_inLine +BABEL_OP1_204_46558_20130103_175101_outLine +BABEL_OP1_204_46905_20130704_183507_inLine +BABEL_OP1_204_46905_20130704_183507_outLine +BABEL_OP1_204_47156_20130310_000732_inLine +BABEL_OP1_204_47156_20130310_000732_outLine +BABEL_OP1_204_47283_20130102_220157_inLine +BABEL_OP1_204_47283_20130102_220157_outLine +BABEL_OP1_204_47802_20130614_155949_inLine +BABEL_OP1_204_47802_20130614_155949_outLine +BABEL_OP1_204_47823_20130209_191710_inLine +BABEL_OP1_204_47823_20130209_191710_outLine +BABEL_OP1_204_47878_20130128_213649_inLine +BABEL_OP1_204_47878_20130128_213649_outLine +BABEL_OP1_204_47878_20130128_214921_inLine +BABEL_OP1_204_47878_20130128_214921_outLine +BABEL_OP1_204_47923_20130812_172435_inLine +BABEL_OP1_204_47923_20130812_172435_outLine +BABEL_OP1_204_48299_20130531_202054_inLine +BABEL_OP1_204_48299_20130531_202054_outLine +BABEL_OP1_204_48610_20130114_165811_inLine +BABEL_OP1_204_48610_20130114_165811_outLine +BABEL_OP1_204_49027_20130606_142005_inLine +BABEL_OP1_204_49027_20130606_142005_outLine +BABEL_OP1_204_49768_20130115_220927_inLine +BABEL_OP1_204_49768_20130115_220927_outLine +BABEL_OP1_204_49775_20121219_214712_inLine +BABEL_OP1_204_49912_20130313_040643_inLine +BABEL_OP1_204_49912_20130313_040643_outLine +BABEL_OP1_204_49945_20130624_173403_inLine +BABEL_OP1_204_49945_20130624_173403_outLine +BABEL_OP1_204_50745_20130216_163145_inLine +BABEL_OP1_204_50745_20130216_163145_outLine +BABEL_OP1_204_50810_20121218_184451_inLine +BABEL_OP1_204_50810_20121218_184451_outLine +BABEL_OP1_204_51156_20130821_223730_inLine +BABEL_OP1_204_51156_20130821_223730_outLine +BABEL_OP1_204_51185_20130517_170655_inLine +BABEL_OP1_204_51185_20130517_170655_outLine +BABEL_OP1_204_51407_20130127_042921_inLine +BABEL_OP1_204_51407_20130127_044800_inLine +BABEL_OP1_204_52301_20130113_034941_inLine +BABEL_OP1_204_52301_20130113_034941_outLine +BABEL_OP1_204_52322_20130524_175752_inLine +BABEL_OP1_204_52322_20130524_175752_outLine +BABEL_OP1_204_52717_20130107_043805_inLine +BABEL_OP1_204_52717_20130107_043805_outLine +BABEL_OP1_204_52803_20130802_163814_inLine +BABEL_OP1_204_52803_20130802_163814_outLine +BABEL_OP1_204_52804_20130103_212424_inLine +BABEL_OP1_204_52804_20130103_212424_outLine +BABEL_OP1_204_53068_20130830_003817_inLine +BABEL_OP1_204_53068_20130830_003817_outLine +BABEL_OP1_204_53144_20130217_224136_inLine +BABEL_OP1_204_53144_20130217_224136_outLine +BABEL_OP1_204_53144_20130217_225527_inLine +BABEL_OP1_204_53144_20130217_225527_outLine +BABEL_OP1_204_53441_20130825_001938_inLine +BABEL_OP1_204_53441_20130825_001938_outLine +BABEL_OP1_204_53917_20130217_215053_inLine +BABEL_OP1_204_53917_20130217_215053_outLine +BABEL_OP1_204_54066_20130514_211116_inLine +BABEL_OP1_204_54066_20130514_211116_outLine +BABEL_OP1_204_54074_20130131_005828_inLine +BABEL_OP1_204_54074_20130131_005828_outLine +BABEL_OP1_204_54104_20130107_180959_inLine +BABEL_OP1_204_54104_20130107_180959_outLine +BABEL_OP1_204_54162_20130130_185332_inLine +BABEL_OP1_204_54162_20130130_185332_outLine +BABEL_OP1_204_54390_20130104_174530_inLine +BABEL_OP1_204_54390_20130104_174530_outLine +BABEL_OP1_204_54567_20130222_184721_inLine +BABEL_OP1_204_54567_20130222_184721_outLine +BABEL_OP1_204_54594_20130704_191249_inLine +BABEL_OP1_204_54594_20130704_191249_outLine +BABEL_OP1_204_54634_20130626_181537_inLine +BABEL_OP1_204_54634_20130626_181537_outLine +BABEL_OP1_204_54923_20130313_190841_inLine +BABEL_OP1_204_54923_20130313_190841_outLine +BABEL_OP1_204_54923_20130313_192534_inLine +BABEL_OP1_204_54923_20130313_192534_outLine +BABEL_OP1_204_54923_20130313_194117_inLine +BABEL_OP1_204_54923_20130313_194117_outLine +BABEL_OP1_204_55259_20130119_230219_inLine +BABEL_OP1_204_55259_20130119_230219_outLine +BABEL_OP1_204_55815_20130821_003003_inLine +BABEL_OP1_204_55815_20130821_003003_outLine +BABEL_OP1_204_56023_20130216_222455_inLine +BABEL_OP1_204_56023_20130216_222455_outLine +BABEL_OP1_204_56117_20130815_152303_inLine +BABEL_OP1_204_56117_20130815_152303_outLine +BABEL_OP1_204_56326_20130704_194950_inLine +BABEL_OP1_204_56326_20130704_194950_outLine +BABEL_OP1_204_56606_20130730_211609_inLine +BABEL_OP1_204_56606_20130730_211609_outLine +BABEL_OP1_204_56925_20130901_220934_inLine +BABEL_OP1_204_56925_20130901_220934_outLine +BABEL_OP1_204_57067_20130227_191402_outLine +BABEL_OP1_204_57233_20130524_200041_inLine +BABEL_OP1_204_57233_20130524_200041_outLine +BABEL_OP1_204_57782_20130417_212234_inLine +BABEL_OP1_204_57782_20130417_212234_outLine +BABEL_OP1_204_57887_20130705_183438_inLine +BABEL_OP1_204_57887_20130705_183438_outLine +BABEL_OP1_204_58006_20130325_011740_inLine +BABEL_OP1_204_58006_20130325_011740_outLine +BABEL_OP1_204_58026_20130310_194418_inLine +BABEL_OP1_204_58026_20130310_194418_outLine +BABEL_OP1_204_58103_20130118_221354_inLine +BABEL_OP1_204_58103_20130118_221354_outLine +BABEL_OP1_204_58313_20130127_023416_inLine +BABEL_OP1_204_58313_20130127_023416_outLine +BABEL_OP1_204_58489_20130209_220922_inLine +BABEL_OP1_204_58489_20130209_220922_outLine +BABEL_OP1_204_58489_20130209_221554_inLine +BABEL_OP1_204_58489_20130209_221554_outLine +BABEL_OP1_204_58636_20130812_211303_inLine +BABEL_OP1_204_58636_20130812_211303_outLine +BABEL_OP1_204_58734_20130108_172420_inLine +BABEL_OP1_204_58734_20130108_172420_outLine +BABEL_OP1_204_59028_20130507_123451_inLine +BABEL_OP1_204_59028_20130507_123451_outLine +BABEL_OP1_204_59291_20130719_200731_inLine +BABEL_OP1_204_59291_20130719_200731_outLine +BABEL_OP1_204_59307_20130218_000435_inLine +BABEL_OP1_204_59307_20130218_000435_outLine +BABEL_OP1_204_59307_20130218_001152_inLine +BABEL_OP1_204_59307_20130218_001152_outLine +BABEL_OP1_204_59685_20130812_185114_inLine +BABEL_OP1_204_59685_20130812_185114_outLine +BABEL_OP1_204_59864_20130302_195039_inLine +BABEL_OP1_204_59864_20130302_195039_outLine +BABEL_OP1_204_59928_20130103_190414_inLine +BABEL_OP1_204_59928_20130103_190414_outLine +BABEL_OP1_204_60026_20130107_002905_inLine +BABEL_OP1_204_60026_20130107_002905_outLine +BABEL_OP1_204_60282_20130815_161243_inLine +BABEL_OP1_204_60282_20130815_161243_outLine +BABEL_OP1_204_60299_20130313_025357_inLine +BABEL_OP1_204_60299_20130313_025357_outLine +BABEL_OP1_204_60299_20130313_030001_inLine +BABEL_OP1_204_60299_20130313_030001_outLine +BABEL_OP1_204_60397_20130822_013145_inLine +BABEL_OP1_204_60397_20130822_013145_outLine +BABEL_OP1_204_60477_20130521_010650_inLine +BABEL_OP1_204_60477_20130521_010650_outLine +BABEL_OP1_204_61190_20130111_183015_inLine +BABEL_OP1_204_61190_20130111_183015_outLine +BABEL_OP1_204_61435_20130217_214434_inLine +BABEL_OP1_204_61435_20130217_214434_outLine +BABEL_OP1_204_61438_20130719_233853_inLine +BABEL_OP1_204_61438_20130719_233853_outLine +BABEL_OP1_204_61731_20130107_035739_inLine +BABEL_OP1_204_61731_20130107_035739_outLine +BABEL_OP1_204_62177_20130719_152209_inLine +BABEL_OP1_204_62177_20130719_152209_outLine +BABEL_OP1_204_62656_20130902_220800_inLine +BABEL_OP1_204_62656_20130902_220800_outLine +BABEL_OP1_204_62734_20130119_222114_inLine +BABEL_OP1_204_62810_20130106_161333_inLine +BABEL_OP1_204_62810_20130106_161333_outLine +BABEL_OP1_204_62976_20130129_174043_inLine +BABEL_OP1_204_62976_20130129_174043_outLine +BABEL_OP1_204_63334_20130729_183108_inLine +BABEL_OP1_204_63334_20130729_183108_outLine +BABEL_OP1_204_63671_20130817_171243_inLine +BABEL_OP1_204_63671_20130817_171243_outLine +BABEL_OP1_204_63730_20130310_032536_inLine +BABEL_OP1_204_63766_20130824_010950_inLine +BABEL_OP1_204_63766_20130824_010950_outLine +BABEL_OP1_204_63920_20130822_001336_inLine +BABEL_OP1_204_63920_20130822_001336_outLine +BABEL_OP1_204_64065_20130102_231436_inLine +BABEL_OP1_204_64065_20130102_231436_outLine +BABEL_OP1_204_64259_20130610_224356_inLine +BABEL_OP1_204_64259_20130610_224356_outLine +BABEL_OP1_204_64398_20130319_024434_inLine +BABEL_OP1_204_64398_20130319_024434_outLine +BABEL_OP1_204_64469_20130818_174134_inLine +BABEL_OP1_204_64469_20130818_174134_outLine +BABEL_OP1_204_64722_20130215_020559_inLine +BABEL_OP1_204_64722_20130215_020559_outLine +BABEL_OP1_204_65048_20130901_235622_inLine +BABEL_OP1_204_65048_20130901_235622_outLine +BABEL_OP1_204_65268_20130603_220955_inLine +BABEL_OP1_204_65268_20130603_220955_outLine +BABEL_OP1_204_66305_20130218_004015_inLine +BABEL_OP1_204_66305_20130218_004015_outLine +BABEL_OP1_204_66472_20130308_022324_inLine +BABEL_OP1_204_66822_20130121_042919_inLine +BABEL_OP1_204_66822_20130121_042919_outLine +BABEL_OP1_204_66837_20130209_003706_inLine +BABEL_OP1_204_66971_20130617_172242_inLine +BABEL_OP1_204_66971_20130617_172242_outLine +BABEL_OP1_204_67053_20130522_161823_inLine +BABEL_OP1_204_67053_20130522_161823_outLine +BABEL_OP1_204_67283_20130113_013031_inLine +BABEL_OP1_204_67283_20130113_013031_outLine +BABEL_OP1_204_67401_20130222_205647_inLine +BABEL_OP1_204_67401_20130222_205647_outLine +BABEL_OP1_204_67659_20130111_193800_inLine +BABEL_OP1_204_67659_20130111_193800_outLine +BABEL_OP1_204_68384_20130719_175720_inLine +BABEL_OP1_204_68384_20130719_175720_outLine +BABEL_OP1_204_68910_20130816_191414_inLine +BABEL_OP1_204_68910_20130816_191414_outLine +BABEL_OP1_204_68924_20130129_165613_inLine +BABEL_OP1_204_68924_20130129_165613_outLine +BABEL_OP1_204_69096_20130303_195234_inLine +BABEL_OP1_204_69096_20130303_195234_outLine +BABEL_OP1_204_69574_20121218_220812_inLine +BABEL_OP1_204_69574_20121218_220812_outLine +BABEL_OP1_204_69937_20130715_192435_inLine +BABEL_OP1_204_69937_20130715_192435_outLine +BABEL_OP1_204_69964_20130704_161248_inLine +BABEL_OP1_204_69964_20130704_161248_outLine +BABEL_OP1_204_69992_20130107_234311_inLine +BABEL_OP1_204_69992_20130107_234311_outLine +BABEL_OP1_204_70216_20130628_200952_inLine +BABEL_OP1_204_70216_20130628_200952_outLine +BABEL_OP1_204_70257_20130716_194637_inLine +BABEL_OP1_204_70257_20130716_194637_outLine +BABEL_OP1_204_70257_20130716_195558_inLine +BABEL_OP1_204_70257_20130716_195558_outLine +BABEL_OP1_204_70293_20130902_214220_inLine +BABEL_OP1_204_70293_20130902_214220_outLine +BABEL_OP1_204_70601_20130122_030105_inLine +BABEL_OP1_204_70601_20130122_030105_outLine +BABEL_OP1_204_70794_20121220_222614_inLine +BABEL_OP1_204_70794_20121220_222614_outLine +BABEL_OP1_204_71067_20130319_205826_inLine +BABEL_OP1_204_71067_20130319_205826_outLine +BABEL_OP1_204_71189_20130215_200359_inLine +BABEL_OP1_204_71189_20130215_200359_outLine +BABEL_OP1_204_71976_20130730_180338_inLine +BABEL_OP1_204_71976_20130730_180338_outLine +BABEL_OP1_204_72073_20130823_001235_inLine +BABEL_OP1_204_72073_20130823_001235_outLine +BABEL_OP1_204_72110_20130208_235019_inLine +BABEL_OP1_204_72110_20130208_235019_outLine +BABEL_OP1_204_73549_20130701_155700_inLine +BABEL_OP1_204_73549_20130701_155700_outLine +BABEL_OP1_204_73696_20130310_022514_inLine +BABEL_OP1_204_73696_20130310_022514_outLine +BABEL_OP1_204_73822_20130515_221842_inLine +BABEL_OP1_204_73822_20130515_221842_outLine +BABEL_OP1_204_74121_20130129_170655_inLine +BABEL_OP1_204_74121_20130129_170655_outLine +BABEL_OP1_204_74280_20121220_170635_inLine +BABEL_OP1_204_74280_20121220_170635_outLine +BABEL_OP1_204_74280_20121220_172100_inLine +BABEL_OP1_204_74280_20121220_172100_outLine +BABEL_OP1_204_74763_20130825_175903_inLine +BABEL_OP1_204_74763_20130825_175903_outLine +BABEL_OP1_204_75064_20130111_180636_inLine +BABEL_OP1_204_75064_20130111_180636_outLine +BABEL_OP1_204_75365_20130516_010147_inLine +BABEL_OP1_204_75365_20130516_010147_outLine +BABEL_OP1_204_75975_20130902_224807_inLine +BABEL_OP1_204_75975_20130902_224807_outLine +BABEL_OP1_204_76126_20130217_205227_outLine +BABEL_OP1_204_76238_20130205_022020_inLine +BABEL_OP1_204_76482_20130310_023337_inLine +BABEL_OP1_204_76482_20130310_023337_outLine +BABEL_OP1_204_76730_20130825_010524_inLine +BABEL_OP1_204_76730_20130825_010524_outLine +BABEL_OP1_204_77427_20130116_173650_inLine +BABEL_OP1_204_77427_20130116_173650_outLine +BABEL_OP1_204_77803_20121219_215121_inLine +BABEL_OP1_204_77803_20121219_215121_outLine +BABEL_OP1_204_78016_20130118_223813_inLine +BABEL_OP1_204_78016_20130118_223813_outLine +BABEL_OP1_204_78016_20130118_224939_inLine +BABEL_OP1_204_78016_20130118_224939_outLine +BABEL_OP1_204_78116_20130130_004511_inLine +BABEL_OP1_204_78116_20130130_004511_outLine +BABEL_OP1_204_78254_20130114_224850_inLine +BABEL_OP1_204_78254_20130114_224850_outLine +BABEL_OP1_204_78313_20130223_202010_inLine +BABEL_OP1_204_78313_20130223_202010_outLine +BABEL_OP1_204_78543_20130313_200956_inLine +BABEL_OP1_204_78743_20130210_214804_inLine +BABEL_OP1_204_78743_20130210_214804_outLine +BABEL_OP1_204_78829_20130724_210413_inLine +BABEL_OP1_204_78829_20130724_210413_outLine +BABEL_OP1_204_79045_20130213_233402_inLine +BABEL_OP1_204_79045_20130213_233402_outLine +BABEL_OP1_204_79080_20130224_194409_inLine +BABEL_OP1_204_79080_20130224_194409_outLine +BABEL_OP1_204_79129_20130222_200128_inLine +BABEL_OP1_204_79129_20130222_200128_outLine +BABEL_OP1_204_79367_20130110_223433_inLine +BABEL_OP1_204_79367_20130110_223433_outLine +BABEL_OP1_204_79505_20130223_203535_inLine +BABEL_OP1_204_79505_20130223_203535_outLine +BABEL_OP1_204_80069_20130310_201210_inLine +BABEL_OP1_204_80241_20130825_143825_inLine +BABEL_OP1_204_80241_20130825_143825_outLine +BABEL_OP1_204_80439_20130115_225051_inLine +BABEL_OP1_204_80439_20130115_225051_outLine +BABEL_OP1_204_81213_20130114_221437_inLine +BABEL_OP1_204_81213_20130114_221437_outLine +BABEL_OP1_204_81622_20130130_223905_inLine +BABEL_OP1_204_81622_20130130_223905_outLine +BABEL_OP1_204_81810_20130319_043547_inLine +BABEL_OP1_204_81810_20130319_043547_outLine +BABEL_OP1_204_81854_20130303_025438_inLine +BABEL_OP1_204_81854_20130303_025438_outLine +BABEL_OP1_204_82425_20130108_181556_inLine +BABEL_OP1_204_82425_20130108_181556_outLine +BABEL_OP1_204_82935_20130208_135243_inLine +BABEL_OP1_204_82935_20130208_135243_outLine +BABEL_OP1_204_82979_20130103_191447_inLine +BABEL_OP1_204_82979_20130103_191447_outLine +BABEL_OP1_204_83394_20130313_005013_inLine +BABEL_OP1_204_83394_20130313_005013_outLine +BABEL_OP1_204_83430_20130603_202255_inLine +BABEL_OP1_204_83430_20130603_202255_outLine +BABEL_OP1_204_83455_20130119_213254_inLine +BABEL_OP1_204_83455_20130119_213254_outLine +BABEL_OP1_204_83625_20130531_181104_inLine +BABEL_OP1_204_83625_20130531_181104_outLine +BABEL_OP1_204_83771_20130625_172000_inLine +BABEL_OP1_204_83771_20130625_172000_outLine +BABEL_OP1_204_84055_20130228_202242_inLine +BABEL_OP1_204_84055_20130228_202242_outLine +BABEL_OP1_204_84077_20130812_184211_inLine +BABEL_OP1_204_84077_20130812_184211_outLine +BABEL_OP1_204_84430_20130817_164608_inLine +BABEL_OP1_204_84430_20130817_164608_outLine +BABEL_OP1_204_84430_20130901_201534_inLine +BABEL_OP1_204_84430_20130901_201534_outLine +BABEL_OP1_204_84466_20130220_015953_inLine +BABEL_OP1_204_84466_20130220_015953_outLine +BABEL_OP1_204_84583_20130122_032028_outLine +BABEL_OP1_204_84715_20130225_194321_inLine +BABEL_OP1_204_84715_20130225_194321_outLine +BABEL_OP1_204_85010_20130531_160005_inLine +BABEL_OP1_204_85010_20130531_160005_outLine +BABEL_OP1_204_85028_20130301_204938_inLine +BABEL_OP1_204_85028_20130301_222343_inLine +BABEL_OP1_204_85048_20130423_000346_inLine +BABEL_OP1_204_85048_20130423_000346_outLine +BABEL_OP1_204_85331_20130310_030345_inLine +BABEL_OP1_204_85331_20130310_030345_outLine +BABEL_OP1_204_85331_20130310_033244_inLine +BABEL_OP1_204_85331_20130310_033244_outLine +BABEL_OP1_204_85647_20130120_023041_inLine +BABEL_OP1_204_85647_20130120_023041_outLine +BABEL_OP1_204_86433_20130126_230445_inLine +BABEL_OP1_204_86433_20130126_230445_outLine +BABEL_OP1_204_86715_20130313_002453_inLine +BABEL_OP1_204_86715_20130313_002453_outLine +BABEL_OP1_204_86715_20130313_003416_inLine +BABEL_OP1_204_86715_20130313_003416_outLine +BABEL_OP1_204_86891_20130605_215220_inLine +BABEL_OP1_204_86891_20130605_215220_outLine +BABEL_OP1_204_87073_20121220_221057_inLine +BABEL_OP1_204_87073_20121220_221057_outLine +BABEL_OP1_204_87073_20121220_221600_inLine +BABEL_OP1_204_87073_20121220_221600_outLine +BABEL_OP1_204_87073_20121220_222957_inLine +BABEL_OP1_204_87073_20121220_222957_outLine +BABEL_OP1_204_87305_20130515_233922_inLine +BABEL_OP1_204_87305_20130515_233922_outLine +BABEL_OP1_204_87731_20130523_205109_inLine +BABEL_OP1_204_87731_20130523_205109_outLine +BABEL_OP1_204_88445_20130129_191832_inLine +BABEL_OP1_204_88445_20130129_191832_outLine +BABEL_OP1_204_88673_20130705_173732_inLine +BABEL_OP1_204_88673_20130705_173732_outLine +BABEL_OP1_204_88865_20130707_151620_inLine +BABEL_OP1_204_88865_20130707_151620_outLine +BABEL_OP1_204_89516_20130729_214127_inLine +BABEL_OP1_204_89516_20130729_214127_outLine +BABEL_OP1_204_89695_20130130_001218_inLine +BABEL_OP1_204_89695_20130130_001218_outLine +BABEL_OP1_204_89877_20130129_192538_inLine +BABEL_OP1_204_89877_20130129_192538_outLine +BABEL_OP1_204_90347_20130124_030740_inLine +BABEL_OP1_204_90347_20130124_030740_outLine +BABEL_OP1_204_90709_20130107_232337_inLine +BABEL_OP1_204_90709_20130107_232337_outLine +BABEL_OP1_204_91319_20130225_184203_inLine +BABEL_OP1_204_91319_20130225_184203_outLine +BABEL_OP1_204_91383_20130702_173202_inLine +BABEL_OP1_204_91383_20130702_173202_outLine +BABEL_OP1_204_91475_20130701_163859_inLine +BABEL_OP1_204_91475_20130701_163859_outLine +BABEL_OP1_204_91606_20130312_032420_inLine +BABEL_OP1_204_91606_20130312_032420_outLine +BABEL_OP1_204_91760_20130618_160303_inLine +BABEL_OP1_204_91760_20130618_160303_outLine +BABEL_OP1_204_92605_20130518_145958_inLine +BABEL_OP1_204_92605_20130518_145958_outLine +BABEL_OP1_204_92809_20130116_171026_inLine +BABEL_OP1_204_92809_20130116_171026_outLine +BABEL_OP1_204_92942_20130127_233540_inLine +BABEL_OP1_204_92942_20130127_233540_outLine +BABEL_OP1_204_93222_20130604_000913_inLine +BABEL_OP1_204_93222_20130604_000913_outLine +BABEL_OP1_204_93411_20130128_182958_inLine +BABEL_OP1_204_93411_20130128_182958_outLine +BABEL_OP1_204_93469_20130302_033019_inLine +BABEL_OP1_204_93469_20130302_033019_outLine +BABEL_OP1_204_93490_20130209_033837_inLine +BABEL_OP1_204_93490_20130209_033837_outLine +BABEL_OP1_204_93490_20130209_140440_inLine +BABEL_OP1_204_93490_20130209_140440_outLine +BABEL_OP1_204_93681_20130901_204636_inLine +BABEL_OP1_204_93681_20130901_204636_outLine +BABEL_OP1_204_94442_20130617_164306_inLine +BABEL_OP1_204_94442_20130617_164306_outLine +BABEL_OP1_204_95028_20130518_173442_inLine +BABEL_OP1_204_95028_20130518_173442_outLine +BABEL_OP1_204_95399_20130125_184030_outLine +BABEL_OP1_204_95446_20130225_185013_inLine +BABEL_OP1_204_95446_20130225_185013_outLine +BABEL_OP1_204_95663_20121221_214944_inLine +BABEL_OP1_204_95663_20121221_214944_outLine +BABEL_OP1_204_95942_20130215_204023_inLine +BABEL_OP1_204_95942_20130215_204023_outLine +BABEL_OP1_204_96158_20130721_235954_inLine +BABEL_OP1_204_96158_20130721_235954_outLine +BABEL_OP1_204_96190_20130116_041341_inLine +BABEL_OP1_204_96190_20130116_041341_outLine +BABEL_OP1_204_96247_20130319_165606_inLine +BABEL_OP1_204_96247_20130319_165606_outLine +BABEL_OP1_204_96690_20130129_191200_inLine +BABEL_OP1_204_96690_20130129_191200_outLine +BABEL_OP1_204_96730_20130225_193316_inLine +BABEL_OP1_204_96730_20130225_193316_outLine +BABEL_OP1_204_96808_20130617_185713_inLine +BABEL_OP1_204_96808_20130617_185713_outLine +BABEL_OP1_204_96910_20130115_215424_inLine +BABEL_OP1_204_96910_20130115_215424_outLine +BABEL_OP1_204_97063_20130227_185803_inLine +BABEL_OP1_204_97063_20130227_185803_outLine +BABEL_OP1_204_97063_20130306_232138_inLine +BABEL_OP1_204_97063_20130306_232138_outLine +BABEL_OP1_204_97220_20130310_023745_inLine +BABEL_OP1_204_97220_20130310_023745_outLine +BABEL_OP1_204_97376_20130128_213930_inLine +BABEL_OP1_204_97376_20130128_213930_outLine +BABEL_OP1_204_97461_20130127_014703_inLine +BABEL_OP1_204_97461_20130127_014703_outLine +BABEL_OP1_204_97461_20130127_015849_inLine +BABEL_OP1_204_97461_20130127_015849_outLine +BABEL_OP1_204_97731_20130210_235215_inLine +BABEL_OP1_204_97731_20130210_235215_outLine +BABEL_OP1_204_97772_20121218_224525_inLine +BABEL_OP1_204_97772_20121218_224525_outLine +BABEL_OP1_204_97836_20130220_015139_inLine +BABEL_OP1_204_97836_20130220_015139_outLine +BABEL_OP1_204_98365_20130224_175209_inLine +BABEL_OP1_204_98365_20130224_175209_outLine +BABEL_OP1_204_98565_20130817_171905_inLine +BABEL_OP1_204_98565_20130817_171905_outLine +BABEL_OP1_204_99289_20130215_210617_inLine +BABEL_OP1_204_99289_20130215_210617_outLine +BABEL_OP1_204_99401_20130108_180622_inLine +BABEL_OP1_204_99401_20130108_180622_outLine +BABEL_OP1_204_99594_20130126_192710_inLine +BABEL_OP1_204_99594_20130126_192710_outLine +BABEL_OP1_204_99887_20130210_212207_inLine +BABEL_OP1_204_99887_20130210_212207_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.list b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.list new file mode 100644 index 00000000000..4c5afd85381 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.list @@ -0,0 +1,125 @@ +BABEL_OP1_204_10184_20130217_232154_inLine +BABEL_OP1_204_10184_20130217_232154_outLine +BABEL_OP1_204_11723_20130803_144247_inLine +BABEL_OP1_204_11723_20130803_144247_outLine +BABEL_OP1_204_12220_20130120_183204_inLine +BABEL_OP1_204_12220_20130120_183204_outLine +BABEL_OP1_204_13324_20130103_211640_inLine +BABEL_OP1_204_13324_20130103_211640_outLine +BABEL_OP1_204_13490_20130314_031843_inLine +BABEL_OP1_204_13490_20130314_031843_outLine +BABEL_OP1_204_13792_20121231_015544_inLine +BABEL_OP1_204_13792_20121231_015544_outLine +BABEL_OP1_204_14807_20130222_213831_inLine +BABEL_OP1_204_14807_20130222_213831_outLine +BABEL_OP1_204_16249_20130906_003049_inLine +BABEL_OP1_204_16249_20130906_003049_outLine +BABEL_OP1_204_17032_20130129_012026_inLine +BABEL_OP1_204_17032_20130129_012026_outLine +BABEL_OP1_204_20330_20130217_225055_inLine +BABEL_OP1_204_20330_20130217_225055_outLine +BABEL_OP1_204_20367_20130312_024055_inLine +BABEL_OP1_204_20367_20130312_024055_outLine +BABEL_OP1_204_22321_20130107_231204_inLine +BABEL_OP1_204_22321_20130107_231204_outLine +BABEL_OP1_204_23980_20130127_031636_inLine +BABEL_OP1_204_23980_20130127_031636_outLine +BABEL_OP1_204_24605_20130111_185213_inLine +BABEL_OP1_204_24605_20130111_185213_outLine +BABEL_OP1_204_27042_20130215_015654_inLine +BABEL_OP1_204_27042_20130215_015654_outLine +BABEL_OP1_204_27478_20130219_233409_inLine +BABEL_OP1_204_27478_20130219_233409_outLine +BABEL_OP1_204_27841_20130225_192938_inLine +BABEL_OP1_204_27841_20130225_192938_outLine +BABEL_OP1_204_31728_20130730_183500_inLine +BABEL_OP1_204_31728_20130730_183500_outLine +BABEL_OP1_204_32727_20130224_174507_inLine +BABEL_OP1_204_32727_20130224_174507_outLine +BABEL_OP1_204_33355_20130110_222048_inLine +BABEL_OP1_204_33355_20130110_222048_outLine +BABEL_OP1_204_34713_20130516_164824_inLine +BABEL_OP1_204_34713_20130516_164824_outLine +BABEL_OP1_204_38750_20130208_003349_inLine +BABEL_OP1_204_38750_20130208_003349_outLine +BABEL_OP1_204_39099_20130302_210320_inLine +BABEL_OP1_204_39099_20130302_210320_outLine +BABEL_OP1_204_40196_20130902_001447_inLine +BABEL_OP1_204_40196_20130902_001447_outLine +BABEL_OP1_204_40686_20130704_204726_inLine +BABEL_OP1_204_40686_20130704_204726_outLine +BABEL_OP1_204_41233_20130209_215355_inLine +BABEL_OP1_204_41233_20130209_215355_outLine +BABEL_OP1_204_42942_20130127_014343_inLine +BABEL_OP1_204_42942_20130127_014343_outLine +BABEL_OP1_204_43157_20130514_222203_inLine +BABEL_OP1_204_43157_20130514_222203_outLine +BABEL_OP1_204_43368_20130118_201259_inLine +BABEL_OP1_204_43368_20130118_201259_outLine +BABEL_OP1_204_45121_20130618_153308_inLine +BABEL_OP1_204_45121_20130618_153308_outLine +BABEL_OP1_204_45374_20130906_011341_inLine +BABEL_OP1_204_45374_20130906_011341_outLine +BABEL_OP1_204_45770_20130116_214623_inLine +BABEL_OP1_204_45770_20130116_214623_outLine +BABEL_OP1_204_49027_20130606_142005_inLine +BABEL_OP1_204_49027_20130606_142005_outLine +BABEL_OP1_204_50745_20130216_163145_inLine +BABEL_OP1_204_50745_20130216_163145_outLine +BABEL_OP1_204_53917_20130217_215053_inLine +BABEL_OP1_204_53917_20130217_215053_outLine +BABEL_OP1_204_58026_20130310_194418_inLine +BABEL_OP1_204_58026_20130310_194418_outLine +BABEL_OP1_204_60282_20130815_161243_inLine +BABEL_OP1_204_60282_20130815_161243_outLine +BABEL_OP1_204_63766_20130824_010950_inLine +BABEL_OP1_204_63766_20130824_010950_outLine +BABEL_OP1_204_68924_20130129_165613_inLine +BABEL_OP1_204_68924_20130129_165613_outLine +BABEL_OP1_204_69574_20121218_220812_inLine +BABEL_OP1_204_69574_20121218_220812_outLine +BABEL_OP1_204_70257_20130716_194637_inLine +BABEL_OP1_204_70257_20130716_194637_outLine +BABEL_OP1_204_70257_20130716_195558_inLine +BABEL_OP1_204_70257_20130716_195558_outLine +BABEL_OP1_204_73822_20130515_221842_inLine +BABEL_OP1_204_73822_20130515_221842_outLine +BABEL_OP1_204_74280_20121220_170635_inLine +BABEL_OP1_204_74280_20121220_170635_outLine +BABEL_OP1_204_74280_20121220_172100_inLine +BABEL_OP1_204_74280_20121220_172100_outLine +BABEL_OP1_204_79045_20130213_233402_inLine +BABEL_OP1_204_79045_20130213_233402_outLine +BABEL_OP1_204_79129_20130222_200128_inLine +BABEL_OP1_204_79129_20130222_200128_outLine +BABEL_OP1_204_80241_20130825_143825_inLine +BABEL_OP1_204_80241_20130825_143825_outLine +BABEL_OP1_204_81854_20130303_025438_inLine +BABEL_OP1_204_81854_20130303_025438_outLine +BABEL_OP1_204_83625_20130531_181104_inLine +BABEL_OP1_204_83625_20130531_181104_outLine +BABEL_OP1_204_85048_20130423_000346_inLine +BABEL_OP1_204_85048_20130423_000346_outLine +BABEL_OP1_204_87731_20130523_205109_inLine +BABEL_OP1_204_87731_20130523_205109_outLine +BABEL_OP1_204_89516_20130729_214127_inLine +BABEL_OP1_204_89516_20130729_214127_outLine +BABEL_OP1_204_91319_20130225_184203_inLine +BABEL_OP1_204_91319_20130225_184203_outLine +BABEL_OP1_204_91383_20130702_173202_inLine +BABEL_OP1_204_91383_20130702_173202_outLine +BABEL_OP1_204_91475_20130701_163859_inLine +BABEL_OP1_204_91475_20130701_163859_outLine +BABEL_OP1_204_91606_20130312_032420_inLine +BABEL_OP1_204_91606_20130312_032420_outLine +BABEL_OP1_204_93411_20130128_182958_inLine +BABEL_OP1_204_93411_20130128_182958_outLine +BABEL_OP1_204_95399_20130125_184030_outLine +BABEL_OP1_204_96910_20130115_215424_inLine +BABEL_OP1_204_96910_20130115_215424_outLine +BABEL_OP1_204_97731_20130210_235215_inLine +BABEL_OP1_204_97731_20130210_235215_outLine +BABEL_OP1_204_97836_20130220_015139_inLine +BABEL_OP1_204_97836_20130220_015139_outLine +BABEL_OP1_204_98565_20130817_171905_inLine +BABEL_OP1_204_98565_20130817_171905_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..09510717b52 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.untranscribed.list @@ -0,0 +1,653 @@ +BABEL_OP1_204_10002_20130523_142107_inLine +BABEL_OP1_204_10002_20130523_142107_outLine +BABEL_OP1_204_10036_20130116_163652_inLine +BABEL_OP1_204_10036_20130116_163652_outLine +BABEL_OP1_204_10411_20130313_042405_inLine +BABEL_OP1_204_10411_20130313_042405_outLine +BABEL_OP1_204_10469_20130708_201653_inLine +BABEL_OP1_204_10469_20130708_201653_outLine +BABEL_OP1_204_10647_20130225_175457_inLine +BABEL_OP1_204_10647_20130225_175457_outLine +BABEL_OP1_204_10647_20130225_184106_inLine +BABEL_OP1_204_10647_20130225_184106_outLine +BABEL_OP1_204_10901_20130120_220533_inLine +BABEL_OP1_204_10901_20130120_220533_outLine +BABEL_OP1_204_10938_20130118_213056_inLine +BABEL_OP1_204_10938_20130118_213056_outLine +BABEL_OP1_204_10966_20130114_210156_inLine +BABEL_OP1_204_10966_20130114_210156_outLine +BABEL_OP1_204_11310_20130705_180254_inLine +BABEL_OP1_204_11310_20130705_180254_outLine +BABEL_OP1_204_11352_20130220_023807_inLine +BABEL_OP1_204_11352_20130220_023807_outLine +BABEL_OP1_204_11486_20130607_155406_inLine +BABEL_OP1_204_11486_20130607_155406_outLine +BABEL_OP1_204_11581_20130222_215500_inLine +BABEL_OP1_204_11581_20130222_215500_outLine +BABEL_OP1_204_11581_20130222_220101_inLine +BABEL_OP1_204_11581_20130222_220101_outLine +BABEL_OP1_204_11663_20130319_201815_inLine +BABEL_OP1_204_11663_20130319_201815_outLine +BABEL_OP1_204_11673_20121220_214236_inLine +BABEL_OP1_204_11673_20121220_214236_outLine +BABEL_OP1_204_11797_20130107_214732_inLine +BABEL_OP1_204_11797_20130107_214732_outLine +BABEL_OP1_204_12036_20130102_170500_inLine +BABEL_OP1_204_12036_20130102_170500_outLine +BABEL_OP1_204_12036_20130102_171149_inLine +BABEL_OP1_204_12036_20130102_171149_outLine +BABEL_OP1_204_12242_20130111_014802_inLine +BABEL_OP1_204_12242_20130111_014802_outLine +BABEL_OP1_204_12846_20130515_220132_inLine +BABEL_OP1_204_12846_20130515_220132_outLine +BABEL_OP1_204_12851_20121219_172018_inLine +BABEL_OP1_204_12851_20121219_172018_outLine +BABEL_OP1_204_13030_20130120_210514_inLine +BABEL_OP1_204_13184_20130228_032847_inLine +BABEL_OP1_204_13184_20130228_032847_outLine +BABEL_OP1_204_13744_20130106_232543_inLine +BABEL_OP1_204_13744_20130106_232543_outLine +BABEL_OP1_204_13776_20130626_215241_inLine +BABEL_OP1_204_13776_20130626_215241_outLine +BABEL_OP1_204_14719_20130219_231741_inLine +BABEL_OP1_204_14719_20130219_231741_outLine +BABEL_OP1_204_14719_20130219_232513_inLine +BABEL_OP1_204_14719_20130219_232513_outLine +BABEL_OP1_204_14725_20130111_204740_inLine +BABEL_OP1_204_14725_20130111_204740_outLine +BABEL_OP1_204_15730_20130103_154749_inLine +BABEL_OP1_204_15730_20130103_154749_outLine +BABEL_OP1_204_15985_20130627_154935_inLine +BABEL_OP1_204_15985_20130627_154935_outLine +BABEL_OP1_204_16726_20130815_164352_inLine +BABEL_OP1_204_16726_20130815_164352_outLine +BABEL_OP1_204_16800_20130307_025108_inLine +BABEL_OP1_204_16800_20130307_025108_outLine +BABEL_OP1_204_16802_20130821_234724_inLine +BABEL_OP1_204_16802_20130821_234724_outLine +BABEL_OP1_204_16838_20130703_183021_inLine +BABEL_OP1_204_16838_20130703_183021_outLine +BABEL_OP1_204_17420_20130426_172522_inLine +BABEL_OP1_204_17420_20130426_172522_outLine +BABEL_OP1_204_17420_20130426_174314_inLine +BABEL_OP1_204_17420_20130426_174314_outLine +BABEL_OP1_204_17496_20130325_015543_inLine +BABEL_OP1_204_17496_20130325_015543_outLine +BABEL_OP1_204_18037_20130825_200728_inLine +BABEL_OP1_204_18037_20130825_200728_outLine +BABEL_OP1_204_18280_20130818_172915_inLine +BABEL_OP1_204_18280_20130818_172915_outLine +BABEL_OP1_204_18939_20130110_214704_inLine +BABEL_OP1_204_18939_20130110_214704_outLine +BABEL_OP1_204_18992_20130830_001646_inLine +BABEL_OP1_204_18992_20130830_001646_outLine +BABEL_OP1_204_19134_20130120_191037_inLine +BABEL_OP1_204_19134_20130120_191037_outLine +BABEL_OP1_204_19461_20130704_154920_inLine +BABEL_OP1_204_19461_20130704_154920_outLine +BABEL_OP1_204_19589_20130304_020747_inLine +BABEL_OP1_204_19589_20130304_020747_outLine +BABEL_OP1_204_19688_20130708_194740_inLine +BABEL_OP1_204_19688_20130708_194740_outLine +BABEL_OP1_204_20437_20130523_235611_inLine +BABEL_OP1_204_20437_20130523_235611_outLine +BABEL_OP1_204_20721_20130704_183621_inLine +BABEL_OP1_204_20721_20130704_183621_outLine +BABEL_OP1_204_20916_20121218_174604_inLine +BABEL_OP1_204_20916_20121218_174604_outLine +BABEL_OP1_204_20985_20130129_225135_inLine +BABEL_OP1_204_20985_20130129_225135_outLine +BABEL_OP1_204_21426_20130515_212900_inLine +BABEL_OP1_204_21426_20130515_212900_outLine +BABEL_OP1_204_21435_20130215_200722_inLine +BABEL_OP1_204_21435_20130215_200722_outLine +BABEL_OP1_204_21543_20130901_203127_inLine +BABEL_OP1_204_21543_20130901_203127_outLine +BABEL_OP1_204_21807_20130127_033626_inLine +BABEL_OP1_204_21807_20130127_033626_outLine +BABEL_OP1_204_21807_20130127_041609_inLine +BABEL_OP1_204_21807_20130127_041609_outLine +BABEL_OP1_204_22643_20130709_192909_inLine +BABEL_OP1_204_22643_20130709_192909_outLine +BABEL_OP1_204_23006_20130115_200742_inLine +BABEL_OP1_204_23006_20130115_200742_outLine +BABEL_OP1_204_23046_20130114_165057_inLine +BABEL_OP1_204_23046_20130114_165057_outLine +BABEL_OP1_204_23153_20130128_223235_inLine +BABEL_OP1_204_23153_20130128_223235_outLine +BABEL_OP1_204_23190_20130116_191153_inLine +BABEL_OP1_204_23190_20130116_191153_outLine +BABEL_OP1_204_23752_20130517_181521_inLine +BABEL_OP1_204_23752_20130517_181521_outLine +BABEL_OP1_204_24010_20130510_160627_inLine +BABEL_OP1_204_24010_20130510_160627_outLine +BABEL_OP1_204_24221_20130803_162307_inLine +BABEL_OP1_204_24221_20130803_162307_outLine +BABEL_OP1_204_24231_20130702_165725_inLine +BABEL_OP1_204_24231_20130702_165725_outLine +BABEL_OP1_204_24253_20130216_173828_inLine +BABEL_OP1_204_24253_20130216_173828_outLine +BABEL_OP1_204_24587_20130812_201846_inLine +BABEL_OP1_204_24587_20130812_201846_outLine +BABEL_OP1_204_26381_20130906_003653_inLine +BABEL_OP1_204_26381_20130906_003653_outLine +BABEL_OP1_204_26388_20121222_180059_inLine +BABEL_OP1_204_26388_20121222_180059_outLine +BABEL_OP1_204_26478_20130628_163250_inLine +BABEL_OP1_204_26478_20130628_163250_outLine +BABEL_OP1_204_27203_20130123_034459_inLine +BABEL_OP1_204_27203_20130123_034459_outLine +BABEL_OP1_204_28522_20130130_021159_inLine +BABEL_OP1_204_28522_20130130_021159_outLine +BABEL_OP1_204_28595_20130515_165745_inLine +BABEL_OP1_204_28595_20130515_165745_outLine +BABEL_OP1_204_29135_20121226_012303_inLine +BABEL_OP1_204_29404_20130225_222910_inLine +BABEL_OP1_204_29404_20130225_222910_outLine +BABEL_OP1_204_29633_20130219_205935_inLine +BABEL_OP1_204_29633_20130219_205935_outLine +BABEL_OP1_204_29911_20130704_163449_inLine +BABEL_OP1_204_29911_20130704_163449_outLine +BABEL_OP1_204_30013_20130129_224621_inLine +BABEL_OP1_204_30013_20130129_224621_outLine +BABEL_OP1_204_30098_20130302_223148_inLine +BABEL_OP1_204_30098_20130302_223148_outLine +BABEL_OP1_204_30345_20130211_192641_inLine +BABEL_OP1_204_30345_20130211_192641_outLine +BABEL_OP1_204_30432_20130128_194847_inLine +BABEL_OP1_204_30432_20130128_194847_outLine +BABEL_OP1_204_31039_20130817_183417_inLine +BABEL_OP1_204_31039_20130817_183417_outLine +BABEL_OP1_204_31490_20130106_234029_inLine +BABEL_OP1_204_31490_20130106_234029_outLine +BABEL_OP1_204_32097_20121218_192753_inLine +BABEL_OP1_204_32097_20121218_192753_outLine +BABEL_OP1_204_32122_20130119_232805_inLine +BABEL_OP1_204_32122_20130119_232805_outLine +BABEL_OP1_204_32169_20130820_205304_inLine +BABEL_OP1_204_32169_20130820_205304_outLine +BABEL_OP1_204_32244_20130617_175424_inLine +BABEL_OP1_204_32244_20130617_175424_outLine +BABEL_OP1_204_32328_20130218_020809_inLine +BABEL_OP1_204_32328_20130218_020809_outLine +BABEL_OP1_204_33273_20130126_234135_inLine +BABEL_OP1_204_33273_20130126_234135_outLine +BABEL_OP1_204_33424_20130617_192727_inLine +BABEL_OP1_204_33424_20130617_192727_outLine +BABEL_OP1_204_33774_20130601_164240_inLine +BABEL_OP1_204_33774_20130601_164240_outLine +BABEL_OP1_204_33806_20130310_041206_inLine +BABEL_OP1_204_33806_20130310_041206_outLine +BABEL_OP1_204_33913_20130205_155246_inLine +BABEL_OP1_204_34197_20121229_204615_inLine +BABEL_OP1_204_34197_20121229_204615_outLine +BABEL_OP1_204_34486_20130626_205810_inLine +BABEL_OP1_204_34486_20130626_205810_outLine +BABEL_OP1_204_34688_20121231_163152_inLine +BABEL_OP1_204_34811_20130130_015529_inLine +BABEL_OP1_204_34811_20130130_015529_outLine +BABEL_OP1_204_34860_20130524_205736_inLine +BABEL_OP1_204_34860_20130524_205736_outLine +BABEL_OP1_204_35000_20130217_021526_inLine +BABEL_OP1_204_35000_20130217_021526_outLine +BABEL_OP1_204_36293_20130107_173251_inLine +BABEL_OP1_204_36293_20130107_173251_outLine +BABEL_OP1_204_37228_20130224_205648_inLine +BABEL_OP1_204_37228_20130224_205648_outLine +BABEL_OP1_204_38588_20130119_231312_inLine +BABEL_OP1_204_38588_20130119_231312_outLine +BABEL_OP1_204_38664_20130116_202337_inLine +BABEL_OP1_204_38664_20130116_202337_outLine +BABEL_OP1_204_39307_20130104_021512_inLine +BABEL_OP1_204_39307_20130104_021512_outLine +BABEL_OP1_204_39638_20130605_153521_inLine +BABEL_OP1_204_39638_20130605_153521_outLine +BABEL_OP1_204_39848_20130130_204605_inLine +BABEL_OP1_204_39848_20130130_204605_outLine +BABEL_OP1_204_39893_20130313_023055_inLine +BABEL_OP1_204_39893_20130313_023055_outLine +BABEL_OP1_204_40565_20130129_202204_inLine +BABEL_OP1_204_40565_20130129_202204_outLine +BABEL_OP1_204_40648_20130710_170435_inLine +BABEL_OP1_204_40648_20130710_170435_outLine +BABEL_OP1_204_41334_20130311_032651_inLine +BABEL_OP1_204_41334_20130311_032651_outLine +BABEL_OP1_204_41598_20130227_193020_inLine +BABEL_OP1_204_41598_20130227_193020_outLine +BABEL_OP1_204_41720_20130524_184216_inLine +BABEL_OP1_204_41720_20130524_184216_outLine +BABEL_OP1_204_41890_20130227_233410_inLine +BABEL_OP1_204_41890_20130227_233410_outLine +BABEL_OP1_204_41920_20130101_031856_inLine +BABEL_OP1_204_41958_20130120_013639_inLine +BABEL_OP1_204_41958_20130120_013639_outLine +BABEL_OP1_204_41958_20130120_014156_inLine +BABEL_OP1_204_41958_20130120_014156_outLine +BABEL_OP1_204_41958_20130120_015222_inLine +BABEL_OP1_204_41958_20130120_015222_outLine +BABEL_OP1_204_42299_20130613_164705_inLine +BABEL_OP1_204_42299_20130613_164705_outLine +BABEL_OP1_204_42526_20130225_185629_inLine +BABEL_OP1_204_42526_20130225_185629_outLine +BABEL_OP1_204_43286_20130104_031805_inLine +BABEL_OP1_204_43286_20130104_031805_outLine +BABEL_OP1_204_43323_20130523_152627_inLine +BABEL_OP1_204_43323_20130523_152627_outLine +BABEL_OP1_204_43794_20130627_212826_inLine +BABEL_OP1_204_43794_20130627_212826_outLine +BABEL_OP1_204_44347_20130220_035919_inLine +BABEL_OP1_204_44347_20130220_035919_outLine +BABEL_OP1_204_44898_20130705_195912_inLine +BABEL_OP1_204_44898_20130705_195912_outLine +BABEL_OP1_204_45459_20130302_031028_inLine +BABEL_OP1_204_45459_20130302_031028_outLine +BABEL_OP1_204_45699_20130815_000115_inLine +BABEL_OP1_204_45699_20130815_000115_outLine +BABEL_OP1_204_46066_20130226_201734_inLine +BABEL_OP1_204_46066_20130226_201734_outLine +BABEL_OP1_204_46169_20130218_214523_inLine +BABEL_OP1_204_46558_20130103_175101_inLine +BABEL_OP1_204_46558_20130103_175101_outLine +BABEL_OP1_204_46905_20130704_183507_inLine +BABEL_OP1_204_46905_20130704_183507_outLine +BABEL_OP1_204_47156_20130310_000732_inLine +BABEL_OP1_204_47156_20130310_000732_outLine +BABEL_OP1_204_47283_20130102_220157_inLine +BABEL_OP1_204_47283_20130102_220157_outLine +BABEL_OP1_204_47802_20130614_155949_inLine +BABEL_OP1_204_47802_20130614_155949_outLine +BABEL_OP1_204_47823_20130209_191710_inLine +BABEL_OP1_204_47823_20130209_191710_outLine +BABEL_OP1_204_47878_20130128_213649_inLine +BABEL_OP1_204_47878_20130128_213649_outLine +BABEL_OP1_204_47878_20130128_214921_inLine +BABEL_OP1_204_47878_20130128_214921_outLine +BABEL_OP1_204_47923_20130812_172435_inLine +BABEL_OP1_204_47923_20130812_172435_outLine +BABEL_OP1_204_48299_20130531_202054_inLine +BABEL_OP1_204_48299_20130531_202054_outLine +BABEL_OP1_204_48610_20130114_165811_inLine +BABEL_OP1_204_48610_20130114_165811_outLine +BABEL_OP1_204_49768_20130115_220927_inLine +BABEL_OP1_204_49768_20130115_220927_outLine +BABEL_OP1_204_49775_20121219_214712_inLine +BABEL_OP1_204_49912_20130313_040643_inLine +BABEL_OP1_204_49912_20130313_040643_outLine +BABEL_OP1_204_49945_20130624_173403_inLine +BABEL_OP1_204_49945_20130624_173403_outLine +BABEL_OP1_204_50810_20121218_184451_inLine +BABEL_OP1_204_50810_20121218_184451_outLine +BABEL_OP1_204_51156_20130821_223730_inLine +BABEL_OP1_204_51156_20130821_223730_outLine +BABEL_OP1_204_51185_20130517_170655_inLine +BABEL_OP1_204_51185_20130517_170655_outLine +BABEL_OP1_204_51407_20130127_042921_inLine +BABEL_OP1_204_51407_20130127_044800_inLine +BABEL_OP1_204_52301_20130113_034941_inLine +BABEL_OP1_204_52301_20130113_034941_outLine +BABEL_OP1_204_52322_20130524_175752_inLine +BABEL_OP1_204_52322_20130524_175752_outLine +BABEL_OP1_204_52717_20130107_043805_inLine +BABEL_OP1_204_52717_20130107_043805_outLine +BABEL_OP1_204_52803_20130802_163814_inLine +BABEL_OP1_204_52803_20130802_163814_outLine +BABEL_OP1_204_52804_20130103_212424_inLine +BABEL_OP1_204_52804_20130103_212424_outLine +BABEL_OP1_204_53068_20130830_003817_inLine +BABEL_OP1_204_53068_20130830_003817_outLine +BABEL_OP1_204_53144_20130217_224136_inLine +BABEL_OP1_204_53144_20130217_224136_outLine +BABEL_OP1_204_53144_20130217_225527_inLine +BABEL_OP1_204_53144_20130217_225527_outLine +BABEL_OP1_204_53441_20130825_001938_inLine +BABEL_OP1_204_53441_20130825_001938_outLine +BABEL_OP1_204_54066_20130514_211116_inLine +BABEL_OP1_204_54066_20130514_211116_outLine +BABEL_OP1_204_54074_20130131_005828_inLine +BABEL_OP1_204_54074_20130131_005828_outLine +BABEL_OP1_204_54104_20130107_180959_inLine +BABEL_OP1_204_54104_20130107_180959_outLine +BABEL_OP1_204_54162_20130130_185332_inLine +BABEL_OP1_204_54162_20130130_185332_outLine +BABEL_OP1_204_54390_20130104_174530_inLine +BABEL_OP1_204_54390_20130104_174530_outLine +BABEL_OP1_204_54567_20130222_184721_inLine +BABEL_OP1_204_54567_20130222_184721_outLine +BABEL_OP1_204_54594_20130704_191249_inLine +BABEL_OP1_204_54594_20130704_191249_outLine +BABEL_OP1_204_54634_20130626_181537_inLine +BABEL_OP1_204_54634_20130626_181537_outLine +BABEL_OP1_204_54923_20130313_190841_inLine +BABEL_OP1_204_54923_20130313_190841_outLine +BABEL_OP1_204_54923_20130313_192534_inLine +BABEL_OP1_204_54923_20130313_192534_outLine +BABEL_OP1_204_54923_20130313_194117_inLine +BABEL_OP1_204_54923_20130313_194117_outLine +BABEL_OP1_204_55259_20130119_230219_inLine +BABEL_OP1_204_55259_20130119_230219_outLine +BABEL_OP1_204_55815_20130821_003003_inLine +BABEL_OP1_204_55815_20130821_003003_outLine +BABEL_OP1_204_56023_20130216_222455_inLine +BABEL_OP1_204_56023_20130216_222455_outLine +BABEL_OP1_204_56117_20130815_152303_inLine +BABEL_OP1_204_56117_20130815_152303_outLine +BABEL_OP1_204_56326_20130704_194950_inLine +BABEL_OP1_204_56326_20130704_194950_outLine +BABEL_OP1_204_56606_20130730_211609_inLine +BABEL_OP1_204_56606_20130730_211609_outLine +BABEL_OP1_204_56925_20130901_220934_inLine +BABEL_OP1_204_56925_20130901_220934_outLine +BABEL_OP1_204_57067_20130227_191402_outLine +BABEL_OP1_204_57233_20130524_200041_inLine +BABEL_OP1_204_57233_20130524_200041_outLine +BABEL_OP1_204_57782_20130417_212234_inLine +BABEL_OP1_204_57782_20130417_212234_outLine +BABEL_OP1_204_57887_20130705_183438_inLine +BABEL_OP1_204_57887_20130705_183438_outLine +BABEL_OP1_204_58006_20130325_011740_inLine +BABEL_OP1_204_58006_20130325_011740_outLine +BABEL_OP1_204_58103_20130118_221354_inLine +BABEL_OP1_204_58103_20130118_221354_outLine +BABEL_OP1_204_58313_20130127_023416_inLine +BABEL_OP1_204_58313_20130127_023416_outLine +BABEL_OP1_204_58489_20130209_220922_inLine +BABEL_OP1_204_58489_20130209_220922_outLine +BABEL_OP1_204_58489_20130209_221554_inLine +BABEL_OP1_204_58489_20130209_221554_outLine +BABEL_OP1_204_58636_20130812_211303_inLine +BABEL_OP1_204_58636_20130812_211303_outLine +BABEL_OP1_204_58734_20130108_172420_inLine +BABEL_OP1_204_58734_20130108_172420_outLine +BABEL_OP1_204_59028_20130507_123451_inLine +BABEL_OP1_204_59028_20130507_123451_outLine +BABEL_OP1_204_59291_20130719_200731_inLine +BABEL_OP1_204_59291_20130719_200731_outLine +BABEL_OP1_204_59307_20130218_000435_inLine +BABEL_OP1_204_59307_20130218_000435_outLine +BABEL_OP1_204_59307_20130218_001152_inLine +BABEL_OP1_204_59307_20130218_001152_outLine +BABEL_OP1_204_59685_20130812_185114_inLine +BABEL_OP1_204_59685_20130812_185114_outLine +BABEL_OP1_204_59864_20130302_195039_inLine +BABEL_OP1_204_59864_20130302_195039_outLine +BABEL_OP1_204_59928_20130103_190414_inLine +BABEL_OP1_204_59928_20130103_190414_outLine +BABEL_OP1_204_60026_20130107_002905_inLine +BABEL_OP1_204_60026_20130107_002905_outLine +BABEL_OP1_204_60299_20130313_025357_inLine +BABEL_OP1_204_60299_20130313_025357_outLine +BABEL_OP1_204_60299_20130313_030001_inLine +BABEL_OP1_204_60299_20130313_030001_outLine +BABEL_OP1_204_60397_20130822_013145_inLine +BABEL_OP1_204_60397_20130822_013145_outLine +BABEL_OP1_204_60477_20130521_010650_inLine +BABEL_OP1_204_60477_20130521_010650_outLine +BABEL_OP1_204_61190_20130111_183015_inLine +BABEL_OP1_204_61190_20130111_183015_outLine +BABEL_OP1_204_61435_20130217_214434_inLine +BABEL_OP1_204_61435_20130217_214434_outLine +BABEL_OP1_204_61438_20130719_233853_inLine +BABEL_OP1_204_61438_20130719_233853_outLine +BABEL_OP1_204_61731_20130107_035739_inLine +BABEL_OP1_204_61731_20130107_035739_outLine +BABEL_OP1_204_62177_20130719_152209_inLine +BABEL_OP1_204_62177_20130719_152209_outLine +BABEL_OP1_204_62656_20130902_220800_inLine +BABEL_OP1_204_62656_20130902_220800_outLine +BABEL_OP1_204_62734_20130119_222114_inLine +BABEL_OP1_204_62810_20130106_161333_inLine +BABEL_OP1_204_62810_20130106_161333_outLine +BABEL_OP1_204_62976_20130129_174043_inLine +BABEL_OP1_204_62976_20130129_174043_outLine +BABEL_OP1_204_63334_20130729_183108_inLine +BABEL_OP1_204_63334_20130729_183108_outLine +BABEL_OP1_204_63671_20130817_171243_inLine +BABEL_OP1_204_63671_20130817_171243_outLine +BABEL_OP1_204_63730_20130310_032536_inLine +BABEL_OP1_204_63920_20130822_001336_inLine +BABEL_OP1_204_63920_20130822_001336_outLine +BABEL_OP1_204_64065_20130102_231436_inLine +BABEL_OP1_204_64065_20130102_231436_outLine +BABEL_OP1_204_64259_20130610_224356_inLine +BABEL_OP1_204_64259_20130610_224356_outLine +BABEL_OP1_204_64398_20130319_024434_inLine +BABEL_OP1_204_64398_20130319_024434_outLine +BABEL_OP1_204_64469_20130818_174134_inLine +BABEL_OP1_204_64469_20130818_174134_outLine +BABEL_OP1_204_64722_20130215_020559_inLine +BABEL_OP1_204_64722_20130215_020559_outLine +BABEL_OP1_204_65048_20130901_235622_inLine +BABEL_OP1_204_65048_20130901_235622_outLine +BABEL_OP1_204_65268_20130603_220955_inLine +BABEL_OP1_204_65268_20130603_220955_outLine +BABEL_OP1_204_66305_20130218_004015_inLine +BABEL_OP1_204_66305_20130218_004015_outLine +BABEL_OP1_204_66472_20130308_022324_inLine +BABEL_OP1_204_66822_20130121_042919_inLine +BABEL_OP1_204_66822_20130121_042919_outLine +BABEL_OP1_204_66837_20130209_003706_inLine +BABEL_OP1_204_66971_20130617_172242_inLine +BABEL_OP1_204_66971_20130617_172242_outLine +BABEL_OP1_204_67053_20130522_161823_inLine +BABEL_OP1_204_67053_20130522_161823_outLine +BABEL_OP1_204_67283_20130113_013031_inLine +BABEL_OP1_204_67283_20130113_013031_outLine +BABEL_OP1_204_67401_20130222_205647_inLine +BABEL_OP1_204_67401_20130222_205647_outLine +BABEL_OP1_204_67659_20130111_193800_inLine +BABEL_OP1_204_67659_20130111_193800_outLine +BABEL_OP1_204_68384_20130719_175720_inLine +BABEL_OP1_204_68384_20130719_175720_outLine +BABEL_OP1_204_68910_20130816_191414_inLine +BABEL_OP1_204_68910_20130816_191414_outLine +BABEL_OP1_204_69096_20130303_195234_inLine +BABEL_OP1_204_69096_20130303_195234_outLine +BABEL_OP1_204_69937_20130715_192435_inLine +BABEL_OP1_204_69937_20130715_192435_outLine +BABEL_OP1_204_69964_20130704_161248_inLine +BABEL_OP1_204_69964_20130704_161248_outLine +BABEL_OP1_204_69992_20130107_234311_inLine +BABEL_OP1_204_69992_20130107_234311_outLine +BABEL_OP1_204_70216_20130628_200952_inLine +BABEL_OP1_204_70216_20130628_200952_outLine +BABEL_OP1_204_70293_20130902_214220_inLine +BABEL_OP1_204_70293_20130902_214220_outLine +BABEL_OP1_204_70601_20130122_030105_inLine +BABEL_OP1_204_70601_20130122_030105_outLine +BABEL_OP1_204_70794_20121220_222614_inLine +BABEL_OP1_204_70794_20121220_222614_outLine +BABEL_OP1_204_71067_20130319_205826_inLine +BABEL_OP1_204_71067_20130319_205826_outLine +BABEL_OP1_204_71189_20130215_200359_inLine +BABEL_OP1_204_71189_20130215_200359_outLine +BABEL_OP1_204_71976_20130730_180338_inLine +BABEL_OP1_204_71976_20130730_180338_outLine +BABEL_OP1_204_72073_20130823_001235_inLine +BABEL_OP1_204_72073_20130823_001235_outLine +BABEL_OP1_204_72110_20130208_235019_inLine +BABEL_OP1_204_72110_20130208_235019_outLine +BABEL_OP1_204_73549_20130701_155700_inLine +BABEL_OP1_204_73549_20130701_155700_outLine +BABEL_OP1_204_73696_20130310_022514_inLine +BABEL_OP1_204_73696_20130310_022514_outLine +BABEL_OP1_204_74121_20130129_170655_inLine +BABEL_OP1_204_74121_20130129_170655_outLine +BABEL_OP1_204_74763_20130825_175903_inLine +BABEL_OP1_204_74763_20130825_175903_outLine +BABEL_OP1_204_75064_20130111_180636_inLine +BABEL_OP1_204_75064_20130111_180636_outLine +BABEL_OP1_204_75365_20130516_010147_inLine +BABEL_OP1_204_75365_20130516_010147_outLine +BABEL_OP1_204_75975_20130902_224807_inLine +BABEL_OP1_204_75975_20130902_224807_outLine +BABEL_OP1_204_76126_20130217_205227_outLine +BABEL_OP1_204_76238_20130205_022020_inLine +BABEL_OP1_204_76482_20130310_023337_inLine +BABEL_OP1_204_76482_20130310_023337_outLine +BABEL_OP1_204_76730_20130825_010524_inLine +BABEL_OP1_204_76730_20130825_010524_outLine +BABEL_OP1_204_77427_20130116_173650_inLine +BABEL_OP1_204_77427_20130116_173650_outLine +BABEL_OP1_204_77803_20121219_215121_inLine +BABEL_OP1_204_77803_20121219_215121_outLine +BABEL_OP1_204_78016_20130118_223813_inLine +BABEL_OP1_204_78016_20130118_223813_outLine +BABEL_OP1_204_78016_20130118_224939_inLine +BABEL_OP1_204_78016_20130118_224939_outLine +BABEL_OP1_204_78116_20130130_004511_inLine +BABEL_OP1_204_78116_20130130_004511_outLine +BABEL_OP1_204_78254_20130114_224850_inLine +BABEL_OP1_204_78254_20130114_224850_outLine +BABEL_OP1_204_78313_20130223_202010_inLine +BABEL_OP1_204_78313_20130223_202010_outLine +BABEL_OP1_204_78543_20130313_200956_inLine +BABEL_OP1_204_78743_20130210_214804_inLine +BABEL_OP1_204_78743_20130210_214804_outLine +BABEL_OP1_204_78829_20130724_210413_inLine +BABEL_OP1_204_78829_20130724_210413_outLine +BABEL_OP1_204_79080_20130224_194409_inLine +BABEL_OP1_204_79080_20130224_194409_outLine +BABEL_OP1_204_79367_20130110_223433_inLine +BABEL_OP1_204_79367_20130110_223433_outLine +BABEL_OP1_204_79505_20130223_203535_inLine +BABEL_OP1_204_79505_20130223_203535_outLine +BABEL_OP1_204_80069_20130310_201210_inLine +BABEL_OP1_204_80439_20130115_225051_inLine +BABEL_OP1_204_80439_20130115_225051_outLine +BABEL_OP1_204_81213_20130114_221437_inLine +BABEL_OP1_204_81213_20130114_221437_outLine +BABEL_OP1_204_81622_20130130_223905_inLine +BABEL_OP1_204_81622_20130130_223905_outLine +BABEL_OP1_204_81810_20130319_043547_inLine +BABEL_OP1_204_81810_20130319_043547_outLine +BABEL_OP1_204_82425_20130108_181556_inLine +BABEL_OP1_204_82425_20130108_181556_outLine +BABEL_OP1_204_82935_20130208_135243_inLine +BABEL_OP1_204_82935_20130208_135243_outLine +BABEL_OP1_204_82979_20130103_191447_inLine +BABEL_OP1_204_82979_20130103_191447_outLine +BABEL_OP1_204_83394_20130313_005013_inLine +BABEL_OP1_204_83394_20130313_005013_outLine +BABEL_OP1_204_83430_20130603_202255_inLine +BABEL_OP1_204_83430_20130603_202255_outLine +BABEL_OP1_204_83455_20130119_213254_inLine +BABEL_OP1_204_83455_20130119_213254_outLine +BABEL_OP1_204_83771_20130625_172000_inLine +BABEL_OP1_204_83771_20130625_172000_outLine +BABEL_OP1_204_84055_20130228_202242_inLine +BABEL_OP1_204_84055_20130228_202242_outLine +BABEL_OP1_204_84077_20130812_184211_inLine +BABEL_OP1_204_84077_20130812_184211_outLine +BABEL_OP1_204_84430_20130817_164608_inLine +BABEL_OP1_204_84430_20130817_164608_outLine +BABEL_OP1_204_84430_20130901_201534_inLine +BABEL_OP1_204_84430_20130901_201534_outLine +BABEL_OP1_204_84466_20130220_015953_inLine +BABEL_OP1_204_84466_20130220_015953_outLine +BABEL_OP1_204_84583_20130122_032028_outLine +BABEL_OP1_204_84715_20130225_194321_inLine +BABEL_OP1_204_84715_20130225_194321_outLine +BABEL_OP1_204_85010_20130531_160005_inLine +BABEL_OP1_204_85010_20130531_160005_outLine +BABEL_OP1_204_85028_20130301_204938_inLine +BABEL_OP1_204_85028_20130301_222343_inLine +BABEL_OP1_204_85331_20130310_030345_inLine +BABEL_OP1_204_85331_20130310_030345_outLine +BABEL_OP1_204_85331_20130310_033244_inLine +BABEL_OP1_204_85331_20130310_033244_outLine +BABEL_OP1_204_85647_20130120_023041_inLine +BABEL_OP1_204_85647_20130120_023041_outLine +BABEL_OP1_204_86433_20130126_230445_inLine +BABEL_OP1_204_86433_20130126_230445_outLine +BABEL_OP1_204_86715_20130313_002453_inLine +BABEL_OP1_204_86715_20130313_002453_outLine +BABEL_OP1_204_86715_20130313_003416_inLine +BABEL_OP1_204_86715_20130313_003416_outLine +BABEL_OP1_204_86891_20130605_215220_inLine +BABEL_OP1_204_86891_20130605_215220_outLine +BABEL_OP1_204_87073_20121220_221057_inLine +BABEL_OP1_204_87073_20121220_221057_outLine +BABEL_OP1_204_87073_20121220_221600_inLine +BABEL_OP1_204_87073_20121220_221600_outLine +BABEL_OP1_204_87073_20121220_222957_inLine +BABEL_OP1_204_87073_20121220_222957_outLine +BABEL_OP1_204_87305_20130515_233922_inLine +BABEL_OP1_204_87305_20130515_233922_outLine +BABEL_OP1_204_88445_20130129_191832_inLine +BABEL_OP1_204_88445_20130129_191832_outLine +BABEL_OP1_204_88673_20130705_173732_inLine +BABEL_OP1_204_88673_20130705_173732_outLine +BABEL_OP1_204_88865_20130707_151620_inLine +BABEL_OP1_204_88865_20130707_151620_outLine +BABEL_OP1_204_89695_20130130_001218_inLine +BABEL_OP1_204_89695_20130130_001218_outLine +BABEL_OP1_204_89877_20130129_192538_inLine +BABEL_OP1_204_89877_20130129_192538_outLine +BABEL_OP1_204_90347_20130124_030740_inLine +BABEL_OP1_204_90347_20130124_030740_outLine +BABEL_OP1_204_90709_20130107_232337_inLine +BABEL_OP1_204_90709_20130107_232337_outLine +BABEL_OP1_204_91760_20130618_160303_inLine +BABEL_OP1_204_91760_20130618_160303_outLine +BABEL_OP1_204_92605_20130518_145958_inLine +BABEL_OP1_204_92605_20130518_145958_outLine +BABEL_OP1_204_92809_20130116_171026_inLine +BABEL_OP1_204_92809_20130116_171026_outLine +BABEL_OP1_204_92942_20130127_233540_inLine +BABEL_OP1_204_92942_20130127_233540_outLine +BABEL_OP1_204_93222_20130604_000913_inLine +BABEL_OP1_204_93222_20130604_000913_outLine +BABEL_OP1_204_93469_20130302_033019_inLine +BABEL_OP1_204_93469_20130302_033019_outLine +BABEL_OP1_204_93490_20130209_033837_inLine +BABEL_OP1_204_93490_20130209_033837_outLine +BABEL_OP1_204_93490_20130209_140440_inLine +BABEL_OP1_204_93490_20130209_140440_outLine +BABEL_OP1_204_93681_20130901_204636_inLine +BABEL_OP1_204_93681_20130901_204636_outLine +BABEL_OP1_204_94442_20130617_164306_inLine +BABEL_OP1_204_94442_20130617_164306_outLine +BABEL_OP1_204_95028_20130518_173442_inLine +BABEL_OP1_204_95028_20130518_173442_outLine +BABEL_OP1_204_95446_20130225_185013_inLine +BABEL_OP1_204_95446_20130225_185013_outLine +BABEL_OP1_204_95663_20121221_214944_inLine +BABEL_OP1_204_95663_20121221_214944_outLine +BABEL_OP1_204_95942_20130215_204023_inLine +BABEL_OP1_204_95942_20130215_204023_outLine +BABEL_OP1_204_96158_20130721_235954_inLine +BABEL_OP1_204_96158_20130721_235954_outLine +BABEL_OP1_204_96190_20130116_041341_inLine +BABEL_OP1_204_96190_20130116_041341_outLine +BABEL_OP1_204_96247_20130319_165606_inLine +BABEL_OP1_204_96247_20130319_165606_outLine +BABEL_OP1_204_96690_20130129_191200_inLine +BABEL_OP1_204_96690_20130129_191200_outLine +BABEL_OP1_204_96730_20130225_193316_inLine +BABEL_OP1_204_96730_20130225_193316_outLine +BABEL_OP1_204_96808_20130617_185713_inLine +BABEL_OP1_204_96808_20130617_185713_outLine +BABEL_OP1_204_97063_20130227_185803_inLine +BABEL_OP1_204_97063_20130227_185803_outLine +BABEL_OP1_204_97063_20130306_232138_inLine +BABEL_OP1_204_97063_20130306_232138_outLine +BABEL_OP1_204_97220_20130310_023745_inLine +BABEL_OP1_204_97220_20130310_023745_outLine +BABEL_OP1_204_97376_20130128_213930_inLine +BABEL_OP1_204_97376_20130128_213930_outLine +BABEL_OP1_204_97461_20130127_014703_inLine +BABEL_OP1_204_97461_20130127_014703_outLine +BABEL_OP1_204_97461_20130127_015849_inLine +BABEL_OP1_204_97461_20130127_015849_outLine +BABEL_OP1_204_97772_20121218_224525_inLine +BABEL_OP1_204_97772_20121218_224525_outLine +BABEL_OP1_204_98365_20130224_175209_inLine +BABEL_OP1_204_98365_20130224_175209_outLine +BABEL_OP1_204_99289_20130215_210617_inLine +BABEL_OP1_204_99289_20130215_210617_outLine +BABEL_OP1_204_99401_20130108_180622_inLine +BABEL_OP1_204_99401_20130108_180622_outLine +BABEL_OP1_204_99594_20130126_192710_inLine +BABEL_OP1_204_99594_20130126_192710_outLine +BABEL_OP1_204_99887_20130210_212207_inLine +BABEL_OP1_204_99887_20130210_212207_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.untranscribed.list b/egs/babel/s5d/conf/lists/204-tamil/train.untranscribed.list new file mode 100644 index 00000000000..cacb28a9b83 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.untranscribed.list @@ -0,0 +1,269 @@ +BABEL_OP1_204_10416_20130129_214039_outLine +BABEL_OP1_204_10464_20130816_191819_inLine +BABEL_OP1_204_10464_20130816_191819_outLine +BABEL_OP1_204_11528_20130611_211620_inLine +BABEL_OP1_204_11528_20130611_211620_outLine +BABEL_OP1_204_11859_20130313_032533_outLine +BABEL_OP1_204_12767_20130116_025609_outLine +BABEL_OP1_204_13126_20130217_010703_inLine +BABEL_OP1_204_13126_20130217_010703_outLine +BABEL_OP1_204_13178_20130325_020355_outLine +BABEL_OP1_204_13547_20130726_181100_inLine +BABEL_OP1_204_14097_20130815_163903_inLine +BABEL_OP1_204_14097_20130815_163903_outLine +BABEL_OP1_204_14137_20130111_210406_inLine +BABEL_OP1_204_14137_20130111_210406_outLine +BABEL_OP1_204_14560_20130325_002021_outLine +BABEL_OP1_204_14814_20130109_222610_outLine +BABEL_OP1_204_15024_20130312_175432_outLine +BABEL_OP1_204_15024_20130312_180805_outLine +BABEL_OP1_204_15216_20130212_014230_outLine +BABEL_OP1_204_15322_20130301_005753_outLine +BABEL_OP1_204_15869_20130818_163437_inLine +BABEL_OP1_204_15869_20130818_163437_outLine +BABEL_OP1_204_16149_20130116_033842_outLine +BABEL_OP1_204_16475_20130119_031738_outLine +BABEL_OP1_204_16839_20130215_160016_outLine +BABEL_OP1_204_16886_20130121_034643_outLine +BABEL_OP1_204_16938_20130121_204111_outLine +BABEL_OP1_204_17573_20130210_015840_outLine +BABEL_OP1_204_17751_20130313_054734_inLine +BABEL_OP1_204_18297_20130302_224344_inLine +BABEL_OP1_204_18297_20130302_224344_outLine +BABEL_OP1_204_18490_20130729_180159_inLine +BABEL_OP1_204_19120_20130216_232255_inLine +BABEL_OP1_204_19444_20130726_202328_inLine +BABEL_OP1_204_19767_20130227_011601_outLine +BABEL_OP1_204_20922_20130207_205901_inLine +BABEL_OP1_204_20972_20130426_122452_outLine +BABEL_OP1_204_21315_20130310_055422_inLine +BABEL_OP1_204_21624_20130219_005819_outLine +BABEL_OP1_204_22591_20130817_190345_inLine +BABEL_OP1_204_22591_20130817_190345_outLine +BABEL_OP1_204_22612_20130209_232523_outLine +BABEL_OP1_204_22918_20130228_021314_outLine +BABEL_OP1_204_23893_20130223_170306_inLine +BABEL_OP1_204_24209_20130814_213938_inLine +BABEL_OP1_204_24209_20130814_213938_outLine +BABEL_OP1_204_24501_20130217_012457_inLine +BABEL_OP1_204_24532_20121227_175136_outLine +BABEL_OP1_204_24586_20130217_014206_inLine +BABEL_OP1_204_24924_20130311_043001_outLine +BABEL_OP1_204_24982_20121228_191618_outLine +BABEL_OP1_204_25719_20130209_012505_outLine +BABEL_OP1_204_25961_20130107_180739_outLine +BABEL_OP1_204_26072_20130227_193336_inLine +BABEL_OP1_204_26836_20121228_170007_outLine +BABEL_OP1_204_28190_20130209_194352_inLine +BABEL_OP1_204_28190_20130225_194934_inLine +BABEL_OP1_204_28600_20130209_182228_outLine +BABEL_OP1_204_28644_20130724_180414_inLine +BABEL_OP1_204_29230_20130311_030639_outLine +BABEL_OP1_204_29563_20130724_172019_inLine +BABEL_OP1_204_29563_20130724_172019_outLine +BABEL_OP1_204_30253_20130216_045613_outLine +BABEL_OP1_204_31184_20130124_204831_outLine +BABEL_OP1_204_31346_20130216_053626_outLine +BABEL_OP1_204_32148_20130217_164600_inLine +BABEL_OP1_204_32148_20130217_164600_outLine +BABEL_OP1_204_32301_20130129_184613_outLine +BABEL_OP1_204_32861_20130227_173658_outLine +BABEL_OP1_204_32959_20130209_030319_outLine +BABEL_OP1_204_33704_20130226_220031_outLine +BABEL_OP1_204_33933_20130829_222537_inLine +BABEL_OP1_204_33933_20130829_222537_outLine +BABEL_OP1_204_34410_20130611_194642_inLine +BABEL_OP1_204_34410_20130611_194642_outLine +BABEL_OP1_204_34477_20130120_004221_inLine +BABEL_OP1_204_34477_20130120_010034_inLine +BABEL_OP1_204_34826_20130226_192804_inLine +BABEL_OP1_204_34899_20130311_034756_outLine +BABEL_OP1_204_35838_20130725_195132_inLine +BABEL_OP1_204_36341_20130107_025830_outLine +BABEL_OP1_204_36642_20130617_150620_outLine +BABEL_OP1_204_37064_20121227_220816_outLine +BABEL_OP1_204_37285_20130129_031728_inLine +BABEL_OP1_204_37285_20130129_031728_outLine +BABEL_OP1_204_38431_20130205_201344_outLine +BABEL_OP1_204_38878_20130124_001536_inLine +BABEL_OP1_204_40092_20130813_200028_inLine +BABEL_OP1_204_40092_20130813_200028_outLine +BABEL_OP1_204_40740_20130216_225837_outLine +BABEL_OP1_204_41542_20130225_183730_inLine +BABEL_OP1_204_41745_20130116_005714_outLine +BABEL_OP1_204_43115_20130302_023125_inLine +BABEL_OP1_204_43784_20121227_190820_outLine +BABEL_OP1_204_44446_20130523_123230_inLine +BABEL_OP1_204_44446_20130523_123230_outLine +BABEL_OP1_204_44477_20130121_193451_outLine +BABEL_OP1_204_44709_20130319_032435_inLine +BABEL_OP1_204_44709_20130319_032435_outLine +BABEL_OP1_204_45851_20130227_013648_inLine +BABEL_OP1_204_46389_20130814_160916_inLine +BABEL_OP1_204_46389_20130814_160916_outLine +BABEL_OP1_204_46389_20130814_161827_inLine +BABEL_OP1_204_46389_20130814_161827_outLine +BABEL_OP1_204_46808_20130829_232458_inLine +BABEL_OP1_204_46808_20130829_232458_outLine +BABEL_OP1_204_46974_20130129_181636_outLine +BABEL_OP1_204_47110_20130815_155025_inLine +BABEL_OP1_204_47110_20130815_155025_outLine +BABEL_OP1_204_48907_20130228_232925_outLine +BABEL_OP1_204_49001_20121228_172935_outLine +BABEL_OP1_204_49330_20130805_162032_inLine +BABEL_OP1_204_49330_20130805_162032_outLine +BABEL_OP1_204_49739_20130726_173931_inLine +BABEL_OP1_204_50175_20121222_205817_inLine +BABEL_OP1_204_50175_20121222_205817_outLine +BABEL_OP1_204_51015_20130130_013728_outLine +BABEL_OP1_204_51858_20130521_175757_inLine +BABEL_OP1_204_52381_20130224_210437_inLine +BABEL_OP1_204_52381_20130224_210437_outLine +BABEL_OP1_204_52404_20130119_200928_outLine +BABEL_OP1_204_52442_20130120_233503_inLine +BABEL_OP1_204_52442_20130120_233503_outLine +BABEL_OP1_204_52499_20130825_162347_inLine +BABEL_OP1_204_52499_20130825_162347_outLine +BABEL_OP1_204_53842_20130122_230928_outLine +BABEL_OP1_204_54046_20130209_030752_inLine +BABEL_OP1_204_54530_20130217_020357_inLine +BABEL_OP1_204_54953_20130224_201532_inLine +BABEL_OP1_204_55013_20130301_025827_inLine +BABEL_OP1_204_55013_20130301_025827_outLine +BABEL_OP1_204_55902_20130520_170810_inLine +BABEL_OP1_204_56370_20130107_193415_outLine +BABEL_OP1_204_56523_20130120_185614_inLine +BABEL_OP1_204_56523_20130120_185614_outLine +BABEL_OP1_204_56523_20130120_190444_inLine +BABEL_OP1_204_56523_20130120_190444_outLine +BABEL_OP1_204_56523_20130126_235544_inLine +BABEL_OP1_204_56523_20130126_235544_outLine +BABEL_OP1_204_56684_20130208_181923_inLine +BABEL_OP1_204_57141_20130216_214557_inLine +BABEL_OP1_204_57650_20130518_200728_outLine +BABEL_OP1_204_58585_20130225_194900_inLine +BABEL_OP1_204_58585_20130225_194900_outLine +BABEL_OP1_204_59163_20130829_230137_inLine +BABEL_OP1_204_59163_20130829_230137_outLine +BABEL_OP1_204_59549_20130116_224253_outLine +BABEL_OP1_204_59645_20130122_004956_inLine +BABEL_OP1_204_60436_20130303_010341_inLine +BABEL_OP1_204_60436_20130303_010341_outLine +BABEL_OP1_204_60474_20130111_204951_inLine +BABEL_OP1_204_60538_20130107_001630_outLine +BABEL_OP1_204_60626_20130104_165746_outLine +BABEL_OP1_204_61040_20130226_221158_outLine +BABEL_OP1_204_61873_20130128_002947_outLine +BABEL_OP1_204_62200_20130121_031957_inLine +BABEL_OP1_204_62200_20130121_031957_outLine +BABEL_OP1_204_62430_20130221_003525_inLine +BABEL_OP1_204_63604_20121231_023648_inLine +BABEL_OP1_204_63604_20121231_024400_inLine +BABEL_OP1_204_63787_20130115_003402_inLine +BABEL_OP1_204_63787_20130115_003402_outLine +BABEL_OP1_204_64494_20130118_033516_outLine +BABEL_OP1_204_64796_20130101_031431_inLine +BABEL_OP1_204_65477_20130121_031004_inLine +BABEL_OP1_204_66001_20130110_231018_outLine +BABEL_OP1_204_66001_20130110_232622_outLine +BABEL_OP1_204_66959_20130224_165508_inLine +BABEL_OP1_204_67622_20121224_014023_inLine +BABEL_OP1_204_69153_20130304_135528_inLine +BABEL_OP1_204_70452_20130111_164540_inLine +BABEL_OP1_204_70726_20130825_192242_inLine +BABEL_OP1_204_70726_20130825_192242_outLine +BABEL_OP1_204_71047_20130226_192147_inLine +BABEL_OP1_204_71047_20130226_192147_outLine +BABEL_OP1_204_72007_20130130_020438_outLine +BABEL_OP1_204_73258_20130129_221752_inLine +BABEL_OP1_204_73485_20130226_020310_inLine +BABEL_OP1_204_73485_20130226_020310_outLine +BABEL_OP1_204_75764_20130227_174139_inLine +BABEL_OP1_204_75764_20130227_174139_outLine +BABEL_OP1_204_76218_20130131_023737_outLine +BABEL_OP1_204_76444_20130613_172917_inLine +BABEL_OP1_204_76499_20130401_153504_inLine +BABEL_OP1_204_76499_20130401_153504_outLine +BABEL_OP1_204_77112_20130103_183710_outLine +BABEL_OP1_204_77126_20130110_175103_outLine +BABEL_OP1_204_77146_20121224_223748_outLine +BABEL_OP1_204_77567_20130103_223440_outLine +BABEL_OP1_204_78398_20130103_204208_inLine +BABEL_OP1_204_78398_20130103_204208_outLine +BABEL_OP1_204_78544_20130119_181147_outLine +BABEL_OP1_204_78630_20130116_034919_inLine +BABEL_OP1_204_78833_20130726_170037_inLine +BABEL_OP1_204_78943_20130109_215659_outLine +BABEL_OP1_204_79139_20130130_193601_inLine +BABEL_OP1_204_79190_20130127_012553_outLine +BABEL_OP1_204_79451_20121227_183417_outLine +BABEL_OP1_204_79820_20130104_184214_inLine +BABEL_OP1_204_79820_20130104_184214_outLine +BABEL_OP1_204_79973_20130729_211226_inLine +BABEL_OP1_204_79973_20130729_211226_outLine +BABEL_OP1_204_80559_20130103_205745_outLine +BABEL_OP1_204_81671_20130217_195401_outLine +BABEL_OP1_204_81971_20121225_005045_outLine +BABEL_OP1_204_82035_20130120_205546_inLine +BABEL_OP1_204_82138_20130328_213639_inLine +BABEL_OP1_204_82140_20130328_220209_inLine +BABEL_OP1_204_82140_20130328_220209_outLine +BABEL_OP1_204_82622_20121230_013735_inLine +BABEL_OP1_204_82622_20121230_013735_outLine +BABEL_OP1_204_82966_20130217_024614_outLine +BABEL_OP1_204_83609_20130716_211644_inLine +BABEL_OP1_204_83609_20130716_211644_outLine +BABEL_OP1_204_84605_20130114_234516_inLine +BABEL_OP1_204_84609_20130726_193719_inLine +BABEL_OP1_204_84737_20130614_151624_inLine +BABEL_OP1_204_85047_20130328_012807_outLine +BABEL_OP1_204_85260_20130822_000133_inLine +BABEL_OP1_204_85260_20130822_000133_outLine +BABEL_OP1_204_86467_20121224_182636_inLine +BABEL_OP1_204_86467_20121224_182636_outLine +BABEL_OP1_204_86628_20130516_235050_inLine +BABEL_OP1_204_87629_20130122_042941_inLine +BABEL_OP1_204_87629_20130122_042941_outLine +BABEL_OP1_204_87629_20130124_021257_inLine +BABEL_OP1_204_87629_20130124_021257_outLine +BABEL_OP1_204_88674_20130729_204202_inLine +BABEL_OP1_204_89059_20130224_201925_inLine +BABEL_OP1_204_89226_20130825_175510_inLine +BABEL_OP1_204_89226_20130825_175510_outLine +BABEL_OP1_204_89560_20130222_171412_outLine +BABEL_OP1_204_89560_20130222_172629_outLine +BABEL_OP1_204_89575_20130227_020958_outLine +BABEL_OP1_204_90318_20130825_173403_inLine +BABEL_OP1_204_90318_20130825_173403_outLine +BABEL_OP1_204_90572_20130221_011543_inLine +BABEL_OP1_204_90572_20130221_011543_outLine +BABEL_OP1_204_91372_20130311_005543_inLine +BABEL_OP1_204_91478_20130531_193258_inLine +BABEL_OP1_204_91478_20130531_193258_outLine +BABEL_OP1_204_92527_20130119_222341_outLine +BABEL_OP1_204_92792_20130225_210332_inLine +BABEL_OP1_204_92792_20130225_210332_outLine +BABEL_OP1_204_93007_20130628_153139_inLine +BABEL_OP1_204_93007_20130628_153139_outLine +BABEL_OP1_204_93153_20130108_171639_outLine +BABEL_OP1_204_93861_20130120_204242_inLine +BABEL_OP1_204_93861_20130120_210020_inLine +BABEL_OP1_204_94253_20130116_032205_inLine +BABEL_OP1_204_94333_20130110_220709_outLine +BABEL_OP1_204_94449_20130226_025646_outLine +BABEL_OP1_204_94666_20130122_132253_inLine +BABEL_OP1_204_95490_20130112_211544_outLine +BABEL_OP1_204_95677_20130818_153821_inLine +BABEL_OP1_204_95677_20130818_153821_outLine +BABEL_OP1_204_95750_20130830_003827_inLine +BABEL_OP1_204_95750_20130830_003827_outLine +BABEL_OP1_204_95966_20130131_013244_outLine +BABEL_OP1_204_96376_20130311_011036_inLine +BABEL_OP1_204_96405_20130104_164913_inLine +BABEL_OP1_204_96405_20130104_164913_outLine +BABEL_OP1_204_96985_20121231_002917_inLine +BABEL_OP1_204_97097_20130322_004237_inLine +BABEL_OP1_204_97264_20130216_205659_outLine +BABEL_OP1_204_97460_20130126_211058_outLine +BABEL_OP1_204_97557_20130123_172926_inLine +BABEL_OP1_204_99920_20130102_191548_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/dev.list b/egs/babel/s5d/conf/lists/205-kurmanji/dev.list new file mode 100644 index 00000000000..168081362fa --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/dev.list @@ -0,0 +1,132 @@ +BABEL_OP2_205_10019_20130330_212743_inLine +BABEL_OP2_205_10019_20130330_212743_outLine +BABEL_OP2_205_10319_20130304_201724_inLine +BABEL_OP2_205_10319_20130304_201724_outLine +BABEL_OP2_205_11096_20130410_000324_inLine +BABEL_OP2_205_11096_20130410_000324_outLine +BABEL_OP2_205_12036_20130315_061649_inLine +BABEL_OP2_205_12036_20130315_061649_outLine +BABEL_OP2_205_13792_20130307_054343_inLine +BABEL_OP2_205_13792_20130307_054343_outLine +BABEL_OP2_205_14229_20130325_212616_inLine +BABEL_OP2_205_14229_20130325_212616_outLine +BABEL_OP2_205_14440_20130327_213643_inLine +BABEL_OP2_205_14440_20130327_213643_outLine +BABEL_OP2_205_15216_20130406_215019_inLine +BABEL_OP2_205_15216_20130406_215019_outLine +BABEL_OP2_205_15216_20130406_215856_inLine +BABEL_OP2_205_15216_20130406_215856_outLine +BABEL_OP2_205_15638_20130331_200208_inLine +BABEL_OP2_205_15638_20130331_200208_outLine +BABEL_OP2_205_15730_20130303_011735_inLine +BABEL_OP2_205_15730_20130303_011735_outLine +BABEL_OP2_205_15848_20130228_192452_inLine +BABEL_OP2_205_15848_20130228_192452_outLine +BABEL_OP2_205_16056_20130323_010902_inLine +BABEL_OP2_205_16056_20130323_010902_outLine +BABEL_OP2_205_16787_20130323_072114_inLine +BABEL_OP2_205_16787_20130323_072114_outLine +BABEL_OP2_205_17127_20130407_044210_inLine +BABEL_OP2_205_17127_20130407_044210_outLine +BABEL_OP2_205_19663_20130320_062434_inLine +BABEL_OP2_205_19663_20130320_062434_outLine +BABEL_OP2_205_20454_20140125_002855_inLine +BABEL_OP2_205_20454_20140125_002855_outLine +BABEL_OP2_205_21029_20130313_025506_inLine +BABEL_OP2_205_21029_20130313_025506_outLine +BABEL_OP2_205_22288_20131228_021559_inLine +BABEL_OP2_205_22965_20130318_011526_inLine +BABEL_OP2_205_22965_20130318_011526_outLine +BABEL_OP2_205_23151_20130415_001434_inLine +BABEL_OP2_205_23151_20130415_001434_outLine +BABEL_OP2_205_23151_20130415_002727_inLine +BABEL_OP2_205_23151_20130415_002727_outLine +BABEL_OP2_205_23260_20130412_034843_inLine +BABEL_OP2_205_23260_20130412_034843_outLine +BABEL_OP2_205_24589_20130327_211515_inLine +BABEL_OP2_205_24589_20130327_211515_outLine +BABEL_OP2_205_26206_20130507_004626_inLine +BABEL_OP2_205_26206_20130507_004626_outLine +BABEL_OP2_205_26999_20130414_220838_inLine +BABEL_OP2_205_26999_20130414_220838_outLine +BABEL_OP2_205_28190_20130409_034344_inLine +BABEL_OP2_205_28190_20130409_034344_outLine +BABEL_OP2_205_28775_20130314_052506_inLine +BABEL_OP2_205_28775_20130314_052506_outLine +BABEL_OP2_205_28871_20130226_041104_inLine +BABEL_OP2_205_28871_20130226_041104_outLine +BABEL_OP2_205_28945_20130315_053607_inLine +BABEL_OP2_205_28945_20130315_053607_outLine +BABEL_OP2_205_29039_20130401_012825_inLine +BABEL_OP2_205_29039_20130401_012825_outLine +BABEL_OP2_205_29135_20130303_025305_inLine +BABEL_OP2_205_29135_20130303_025305_outLine +BABEL_OP2_205_29633_20130413_192214_inLine +BABEL_OP2_205_29633_20130413_192214_outLine +BABEL_OP2_205_29643_20130408_040750_inLine +BABEL_OP2_205_29643_20130408_040750_outLine +BABEL_OP2_205_29777_20130409_004437_inLine +BABEL_OP2_205_29777_20130409_004437_outLine +BABEL_OP2_205_30653_20130505_220845_inLine +BABEL_OP2_205_30653_20130505_220845_outLine +BABEL_OP2_205_31919_20130413_172911_inLine +BABEL_OP2_205_31919_20130413_172911_outLine +BABEL_OP2_205_33251_20130331_025243_inLine +BABEL_OP2_205_33251_20130331_025243_outLine +BABEL_OP2_205_34336_20130325_005404_inLine +BABEL_OP2_205_34336_20130325_005404_outLine +BABEL_OP2_205_35069_20130407_022433_inLine +BABEL_OP2_205_35069_20130407_022433_outLine +BABEL_OP2_205_35069_20130407_023338_inLine +BABEL_OP2_205_35069_20130407_023338_outLine +BABEL_OP2_205_35583_20130408_183143_inLine +BABEL_OP2_205_35583_20130408_183143_outLine +BABEL_OP2_205_35788_20131231_021724_inLine +BABEL_OP2_205_35788_20131231_021724_outLine +BABEL_OP2_205_36219_20130324_013816_inLine +BABEL_OP2_205_36219_20130324_013816_outLine +BABEL_OP2_205_36219_20130324_015535_inLine +BABEL_OP2_205_36219_20130324_015535_outLine +BABEL_OP2_205_36293_20130302_213235_inLine +BABEL_OP2_205_36293_20130302_213235_outLine +BABEL_OP2_205_41097_20130406_012211_inLine +BABEL_OP2_205_41097_20130406_012211_outLine +BABEL_OP2_205_44868_20130330_223802_inLine +BABEL_OP2_205_44868_20130330_223802_outLine +BABEL_OP2_205_45699_20140126_003136_inLine +BABEL_OP2_205_45699_20140126_003136_outLine +BABEL_OP2_205_46535_20140108_201338_inLine +BABEL_OP2_205_46535_20140108_201338_outLine +BABEL_OP2_205_50565_20130304_002644_inLine +BABEL_OP2_205_50565_20130304_002644_outLine +BABEL_OP2_205_51540_20130407_040411_inLine +BABEL_OP2_205_51540_20130407_040411_outLine +BABEL_OP2_205_51540_20130407_042258_inLine +BABEL_OP2_205_51540_20130407_042258_outLine +BABEL_OP2_205_54046_20130409_011916_inLine +BABEL_OP2_205_54046_20130409_011916_outLine +BABEL_OP2_205_54735_20131228_012336_inLine +BABEL_OP2_205_54735_20131228_012336_outLine +BABEL_OP2_205_60830_20131223_005744_inLine +BABEL_OP2_205_72903_20131225_002056_inLine +BABEL_OP2_205_72903_20131225_002056_outLine +BABEL_OP2_205_77225_20140106_235541_inLine +BABEL_OP2_205_77225_20140106_235541_outLine +BABEL_OP2_205_78360_20140123_011434_inLine +BABEL_OP2_205_78360_20140123_011434_outLine +BABEL_OP2_205_79139_20130621_004019_inLine +BABEL_OP2_205_79139_20130621_004019_outLine +BABEL_OP2_205_86830_20130413_224330_inLine +BABEL_OP2_205_86830_20130413_224330_outLine +BABEL_OP2_205_86830_20130413_225657_inLine +BABEL_OP2_205_86830_20130413_225657_outLine +BABEL_OP2_205_92060_20130413_223434_inLine +BABEL_OP2_205_92060_20130413_223434_outLine +BABEL_OP2_205_92643_20130413_053627_inLine +BABEL_OP2_205_92643_20130413_053627_outLine +BABEL_OP2_205_95399_20131222_015121_inLine +BABEL_OP2_205_95399_20131222_015121_outLine +BABEL_OP2_205_96808_20130412_211621_inLine +BABEL_OP2_205_96808_20130412_211621_outLine +BABEL_OP2_205_97136_20130525_003505_inLine +BABEL_OP2_205_97136_20130525_003505_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/eval.list b/egs/babel/s5d/conf/lists/205-kurmanji/eval.list new file mode 100644 index 00000000000..e0ceeb8f70d --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/eval.list @@ -0,0 +1,193 @@ +BABEL_OP2_205_10188_20130301_060141_inLine +BABEL_OP2_205_10188_20130301_060141_outLine +BABEL_OP2_205_10416_20130623_000709_inLine +BABEL_OP2_205_10416_20130623_000709_outLine +BABEL_OP2_205_11419_20140124_203146_inLine +BABEL_OP2_205_11419_20140124_203146_outLine +BABEL_OP2_205_13040_20130312_094024_inLine +BABEL_OP2_205_13040_20130312_094024_outLine +BABEL_OP2_205_13427_20130315_071728_inLine +BABEL_OP2_205_13427_20130315_071728_outLine +BABEL_OP2_205_13427_20130315_075858_inLine +BABEL_OP2_205_13427_20130315_075858_outLine +BABEL_OP2_205_14179_20130401_220334_inLine +BABEL_OP2_205_14179_20130401_220334_outLine +BABEL_OP2_205_14537_20130413_045331_inLine +BABEL_OP2_205_14537_20130413_045331_outLine +BABEL_OP2_205_14560_20130408_183055_inLine +BABEL_OP2_205_14560_20130408_183055_outLine +BABEL_OP2_205_15702_20130331_230832_inLine +BABEL_OP2_205_15702_20130331_230832_outLine +BABEL_OP2_205_16184_20130227_050048_inLine +BABEL_OP2_205_16184_20130227_050048_outLine +BABEL_OP2_205_16249_20140124_210751_inLine +BABEL_OP2_205_16249_20140124_210751_outLine +BABEL_OP2_205_16407_20140124_214655_inLine +BABEL_OP2_205_16407_20140124_214655_outLine +BABEL_OP2_205_16601_20130415_195023_inLine +BABEL_OP2_205_16601_20130415_195023_outLine +BABEL_OP2_205_17165_20130620_234702_inLine +BABEL_OP2_205_17165_20130620_234702_outLine +BABEL_OP2_205_17573_20130408_175948_inLine +BABEL_OP2_205_17573_20130408_175948_outLine +BABEL_OP2_205_17890_20130507_001713_inLine +BABEL_OP2_205_17890_20130507_001713_outLine +BABEL_OP2_205_18033_20140124_221028_inLine +BABEL_OP2_205_18033_20140124_221028_outLine +BABEL_OP2_205_18370_20140124_223813_inLine +BABEL_OP2_205_18370_20140124_223813_outLine +BABEL_OP2_205_18863_20130412_202349_inLine +BABEL_OP2_205_18863_20130412_202349_outLine +BABEL_OP2_205_19120_20130506_071138_inLine +BABEL_OP2_205_19120_20130506_071138_outLine +BABEL_OP2_205_19832_20130621_222438_inLine +BABEL_OP2_205_19832_20130621_222438_outLine +BABEL_OP2_205_20330_20130413_042945_inLine +BABEL_OP2_205_20330_20130413_042945_outLine +BABEL_OP2_205_22170_20131101_103425_inLine +BABEL_OP2_205_22170_20131101_103425_outLine +BABEL_OP2_205_22466_20130225_225235_inLine +BABEL_OP2_205_22466_20130225_225235_outLine +BABEL_OP2_205_22466_20130225_225943_inLine +BABEL_OP2_205_22466_20130225_225943_outLine +BABEL_OP2_205_22641_20130304_041448_inLine +BABEL_OP2_205_22641_20130304_041448_outLine +BABEL_OP2_205_23395_20130324_223525_inLine +BABEL_OP2_205_23395_20130324_223525_outLine +BABEL_OP2_205_23628_20130326_051335_inLine +BABEL_OP2_205_23628_20130326_051335_outLine +BABEL_OP2_205_24033_20130406_195331_inLine +BABEL_OP2_205_24033_20130406_195331_outLine +BABEL_OP2_205_24209_20140125_012503_inLine +BABEL_OP2_205_24209_20140125_012503_outLine +BABEL_OP2_205_24924_20130612_193640_inLine +BABEL_OP2_205_24924_20130612_193640_outLine +BABEL_OP2_205_25767_20130316_235631_inLine +BABEL_OP2_205_25767_20130316_235631_outLine +BABEL_OP2_205_26869_20140107_231859_inLine +BABEL_OP2_205_26869_20140107_231859_outLine +BABEL_OP2_205_28585_20130406_222735_inLine +BABEL_OP2_205_28585_20130406_222735_outLine +BABEL_OP2_205_29076_20130318_205813_inLine +BABEL_OP2_205_29076_20130318_205813_outLine +BABEL_OP2_205_29482_20140123_203957_inLine +BABEL_OP2_205_29482_20140123_203957_outLine +BABEL_OP2_205_30250_20130303_023602_inLine +BABEL_OP2_205_30250_20130303_023602_outLine +BABEL_OP2_205_30497_20130412_045747_inLine +BABEL_OP2_205_30497_20130412_045747_outLine +BABEL_OP2_205_31484_20130331_231345_inLine +BABEL_OP2_205_31484_20130331_231345_outLine +BABEL_OP2_205_31979_20130319_081826_inLine +BABEL_OP2_205_31979_20130319_081826_outLine +BABEL_OP2_205_32727_20130413_214408_inLine +BABEL_OP2_205_32727_20130413_214408_outLine +BABEL_OP2_205_33800_20140125_192240_inLine +BABEL_OP2_205_33800_20140125_192240_outLine +BABEL_OP2_205_33992_20130519_062650_inLine +BABEL_OP2_205_33992_20130519_062650_outLine +BABEL_OP2_205_34486_20130518_044858_inLine +BABEL_OP2_205_34486_20130518_044858_outLine +BABEL_OP2_205_34899_20130619_000929_inLine +BABEL_OP2_205_34899_20130619_000929_outLine +BABEL_OP2_205_35786_20130522_020532_inLine +BABEL_OP2_205_35786_20130522_020532_outLine +BABEL_OP2_205_36147_20140125_211617_inLine +BABEL_OP2_205_36147_20140125_211617_outLine +BABEL_OP2_205_37064_20130318_004959_inLine +BABEL_OP2_205_37064_20130318_004959_outLine +BABEL_OP2_205_38139_20130622_053934_inLine +BABEL_OP2_205_38139_20130622_053934_outLine +BABEL_OP2_205_38139_20130622_055315_inLine +BABEL_OP2_205_38139_20130622_055315_outLine +BABEL_OP2_205_38750_20130413_172545_inLine +BABEL_OP2_205_38750_20130413_172545_outLine +BABEL_OP2_205_38750_20130413_173308_inLine +BABEL_OP2_205_38750_20130413_173308_outLine +BABEL_OP2_205_39744_20130301_230818_inLine +BABEL_OP2_205_39744_20130301_230818_outLine +BABEL_OP2_205_41400_20140122_205716_inLine +BABEL_OP2_205_41400_20140122_205716_outLine +BABEL_OP2_205_41682_20140125_234229_inLine +BABEL_OP2_205_41682_20140125_234229_outLine +BABEL_OP2_205_42231_20130415_192437_inLine +BABEL_OP2_205_42231_20130415_192437_outLine +BABEL_OP2_205_42600_20130324_234058_inLine +BABEL_OP2_205_42600_20130324_234058_outLine +BABEL_OP2_205_43074_20130622_063932_inLine +BABEL_OP2_205_43074_20130622_063932_outLine +BABEL_OP2_205_43646_20130227_205147_inLine +BABEL_OP2_205_43646_20130227_205147_outLine +BABEL_OP2_205_44420_20130328_013519_inLine +BABEL_OP2_205_44420_20130328_013519_outLine +BABEL_OP2_205_45106_20130330_013041_inLine +BABEL_OP2_205_45106_20130330_013041_outLine +BABEL_OP2_205_45771_20130518_054435_inLine +BABEL_OP2_205_45771_20130518_054435_outLine +BABEL_OP2_205_45777_20130325_205405_inLine +BABEL_OP2_205_45777_20130325_205405_outLine +BABEL_OP2_205_45843_20130330_060240_inLine +BABEL_OP2_205_45843_20130330_060240_outLine +BABEL_OP2_205_45843_20130330_061029_inLine +BABEL_OP2_205_45843_20130330_061029_outLine +BABEL_OP2_205_46712_20130326_222120_inLine +BABEL_OP2_205_46712_20130326_222120_outLine +BABEL_OP2_205_46974_20130506_235400_inLine +BABEL_OP2_205_46974_20130506_235400_outLine +BABEL_OP2_205_46974_20130507_000125_inLine +BABEL_OP2_205_46974_20130507_000125_outLine +BABEL_OP2_205_47959_20130322_204503_inLine +BABEL_OP2_205_47959_20130322_204503_outLine +BABEL_OP2_205_50958_20130318_044644_inLine +BABEL_OP2_205_50958_20130318_044644_outLine +BABEL_OP2_205_50962_20130321_021704_inLine +BABEL_OP2_205_50962_20130321_021704_outLine +BABEL_OP2_205_51417_20130407_001304_inLine +BABEL_OP2_205_51417_20130407_001304_outLine +BABEL_OP2_205_56213_20130508_055436_inLine +BABEL_OP2_205_56213_20130508_055436_outLine +BABEL_OP2_205_56213_20130508_060404_inLine +BABEL_OP2_205_56213_20130508_060404_outLine +BABEL_OP2_205_57067_20130407_183303_inLine +BABEL_OP2_205_57067_20130407_183303_outLine +BABEL_OP2_205_57922_20130331_195052_inLine +BABEL_OP2_205_57922_20130331_195052_outLine +BABEL_OP2_205_60115_20130330_212943_inLine +BABEL_OP2_205_60115_20130330_212943_outLine +BABEL_OP2_205_62362_20130517_212752_inLine +BABEL_OP2_205_62362_20130517_212752_outLine +BABEL_OP2_205_63265_20131226_003348_inLine +BABEL_OP2_205_63265_20131226_003348_outLine +BABEL_OP2_205_63511_20131224_213929_inLine +BABEL_OP2_205_63511_20131224_213929_outLine +BABEL_OP2_205_65252_20130413_190417_inLine +BABEL_OP2_205_65252_20130413_190417_outLine +BABEL_OP2_205_65339_20131225_232144_inLine +BABEL_OP2_205_65339_20131225_232144_outLine +BABEL_OP2_205_70726_20140112_003521_inLine +BABEL_OP2_205_70726_20140112_003521_outLine +BABEL_OP2_205_76902_20140123_211702_inLine +BABEL_OP2_205_76902_20140123_211702_outLine +BABEL_OP2_205_78161_20140124_012828_inLine +BABEL_OP2_205_78161_20140124_012828_outLine +BABEL_OP2_205_78958_20140105_000039_inLine +BABEL_OP2_205_78958_20140105_000039_outLine +BABEL_OP2_205_81229_20130316_035102_inLine +BABEL_OP2_205_81229_20130316_035102_outLine +BABEL_OP2_205_85439_20130413_172716_inLine +BABEL_OP2_205_85439_20130413_172716_outLine +BABEL_OP2_205_90440_20140123_225611_inLine +BABEL_OP2_205_91930_20130413_193923_inLine +BABEL_OP2_205_91930_20130413_193923_outLine +BABEL_OP2_205_92698_20130622_032618_inLine +BABEL_OP2_205_92698_20130622_032618_outLine +BABEL_OP2_205_94141_20140118_223253_inLine +BABEL_OP2_205_94141_20140118_223253_outLine +BABEL_OP2_205_95966_20131224_023420_inLine +BABEL_OP2_205_96584_20130408_014557_inLine +BABEL_OP2_205_96584_20130408_014557_outLine +BABEL_OP2_205_96940_20140123_220447_inLine +BABEL_OP2_205_96940_20140123_220447_outLine +BABEL_OP2_205_97988_20130414_061145_inLine +BABEL_OP2_205_97988_20130414_061145_outLine +BABEL_OP2_205_98580_20131223_014628_inLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/evalpart1.list b/egs/babel/s5d/conf/lists/205-kurmanji/evalpart1.list new file mode 100644 index 00000000000..ff7234650d1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/evalpart1.list @@ -0,0 +1,63 @@ +BABEL_OP2_205_13040_20130312_094024_inLine +BABEL_OP2_205_13040_20130312_094024_outLine +BABEL_OP2_205_13427_20130315_071728_inLine +BABEL_OP2_205_13427_20130315_071728_outLine +BABEL_OP2_205_13427_20130315_075858_inLine +BABEL_OP2_205_13427_20130315_075858_outLine +BABEL_OP2_205_16184_20130227_050048_inLine +BABEL_OP2_205_16184_20130227_050048_outLine +BABEL_OP2_205_17165_20130620_234702_inLine +BABEL_OP2_205_17165_20130620_234702_outLine +BABEL_OP2_205_17573_20130408_175948_inLine +BABEL_OP2_205_17573_20130408_175948_outLine +BABEL_OP2_205_18863_20130412_202349_inLine +BABEL_OP2_205_18863_20130412_202349_outLine +BABEL_OP2_205_19120_20130506_071138_inLine +BABEL_OP2_205_19120_20130506_071138_outLine +BABEL_OP2_205_23628_20130326_051335_inLine +BABEL_OP2_205_23628_20130326_051335_outLine +BABEL_OP2_205_24033_20130406_195331_inLine +BABEL_OP2_205_24033_20130406_195331_outLine +BABEL_OP2_205_24209_20140125_012503_inLine +BABEL_OP2_205_24209_20140125_012503_outLine +BABEL_OP2_205_24924_20130612_193640_inLine +BABEL_OP2_205_24924_20130612_193640_outLine +BABEL_OP2_205_28585_20130406_222735_inLine +BABEL_OP2_205_28585_20130406_222735_outLine +BABEL_OP2_205_30250_20130303_023602_inLine +BABEL_OP2_205_30250_20130303_023602_outLine +BABEL_OP2_205_34899_20130619_000929_inLine +BABEL_OP2_205_34899_20130619_000929_outLine +BABEL_OP2_205_37064_20130318_004959_inLine +BABEL_OP2_205_37064_20130318_004959_outLine +BABEL_OP2_205_38750_20130413_172545_inLine +BABEL_OP2_205_38750_20130413_172545_outLine +BABEL_OP2_205_38750_20130413_173308_inLine +BABEL_OP2_205_38750_20130413_173308_outLine +BABEL_OP2_205_45106_20130330_013041_inLine +BABEL_OP2_205_45106_20130330_013041_outLine +BABEL_OP2_205_45777_20130325_205405_inLine +BABEL_OP2_205_45777_20130325_205405_outLine +BABEL_OP2_205_47959_20130322_204503_inLine +BABEL_OP2_205_47959_20130322_204503_outLine +BABEL_OP2_205_50958_20130318_044644_inLine +BABEL_OP2_205_50958_20130318_044644_outLine +BABEL_OP2_205_57067_20130407_183303_inLine +BABEL_OP2_205_57067_20130407_183303_outLine +BABEL_OP2_205_57922_20130331_195052_inLine +BABEL_OP2_205_57922_20130331_195052_outLine +BABEL_OP2_205_63511_20131224_213929_inLine +BABEL_OP2_205_63511_20131224_213929_outLine +BABEL_OP2_205_65339_20131225_232144_inLine +BABEL_OP2_205_65339_20131225_232144_outLine +BABEL_OP2_205_81229_20130316_035102_inLine +BABEL_OP2_205_81229_20130316_035102_outLine +BABEL_OP2_205_85439_20130413_172716_inLine +BABEL_OP2_205_85439_20130413_172716_outLine +BABEL_OP2_205_91930_20130413_193923_inLine +BABEL_OP2_205_91930_20130413_193923_outLine +BABEL_OP2_205_92698_20130622_032618_inLine +BABEL_OP2_205_92698_20130622_032618_outLine +BABEL_OP2_205_94141_20140118_223253_inLine +BABEL_OP2_205_94141_20140118_223253_outLine +BABEL_OP2_205_98580_20131223_014628_inLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.list b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.list new file mode 100644 index 00000000000..022ddf05869 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.list @@ -0,0 +1,133 @@ +BABEL_OP2_205_10184_20130315_054426_inLine +BABEL_OP2_205_10184_20130315_054426_outLine +BABEL_OP2_205_10647_20130413_190550_inLine +BABEL_OP2_205_10647_20130413_190550_outLine +BABEL_OP2_205_12220_20130323_002310_inLine +BABEL_OP2_205_12220_20130323_002310_outLine +BABEL_OP2_205_14807_20130326_065101_inLine +BABEL_OP2_205_14807_20130326_065101_outLine +BABEL_OP2_205_14807_20130326_070339_inLine +BABEL_OP2_205_14807_20130326_070339_outLine +BABEL_OP2_205_14875_20130319_211742_inLine +BABEL_OP2_205_14875_20130319_211742_outLine +BABEL_OP2_205_14875_20130319_213338_inLine +BABEL_OP2_205_14875_20130319_213338_outLine +BABEL_OP2_205_14929_20131223_022753_inLine +BABEL_OP2_205_15535_20130506_195619_inLine +BABEL_OP2_205_15535_20130506_195619_outLine +BABEL_OP2_205_17881_20130413_190631_inLine +BABEL_OP2_205_17881_20130413_190631_outLine +BABEL_OP2_205_17881_20130413_191638_inLine +BABEL_OP2_205_17881_20130413_191638_outLine +BABEL_OP2_205_17914_20130407_235720_inLine +BABEL_OP2_205_17914_20130407_235720_outLine +BABEL_OP2_205_18766_20130413_033911_inLine +BABEL_OP2_205_18766_20130413_033911_outLine +BABEL_OP2_205_19134_20130331_195936_inLine +BABEL_OP2_205_19134_20130331_195936_outLine +BABEL_OP2_205_19749_20130406_231234_inLine +BABEL_OP2_205_19749_20130406_231234_outLine +BABEL_OP2_205_20800_20130408_015430_inLine +BABEL_OP2_205_20800_20130408_015430_outLine +BABEL_OP2_205_20916_20130228_200116_inLine +BABEL_OP2_205_20916_20130228_200116_outLine +BABEL_OP2_205_21206_20130312_205638_inLine +BABEL_OP2_205_21206_20130312_205638_outLine +BABEL_OP2_205_22321_20130308_042214_inLine +BABEL_OP2_205_22321_20130308_042214_outLine +BABEL_OP2_205_23092_20130413_181637_inLine +BABEL_OP2_205_23092_20130413_181637_outLine +BABEL_OP2_205_23893_20140123_003759_inLine +BABEL_OP2_205_23893_20140123_003759_outLine +BABEL_OP2_205_24239_20130415_171824_inLine +BABEL_OP2_205_24239_20130415_171824_outLine +BABEL_OP2_205_24290_20130414_221432_inLine +BABEL_OP2_205_24290_20130414_221432_outLine +BABEL_OP2_205_24323_20130326_051101_inLine +BABEL_OP2_205_24323_20130326_051101_outLine +BABEL_OP2_205_24605_20130311_012103_inLine +BABEL_OP2_205_24605_20130311_012103_outLine +BABEL_OP2_205_25085_20130612_023620_inLine +BABEL_OP2_205_25085_20130612_023620_outLine +BABEL_OP2_205_26574_20130509_203057_inLine +BABEL_OP2_205_26574_20130509_203057_outLine +BABEL_OP2_205_26602_20130412_235831_inLine +BABEL_OP2_205_26602_20130412_235831_outLine +BABEL_OP2_205_28477_20130412_234819_inLine +BABEL_OP2_205_28477_20130412_234819_outLine +BABEL_OP2_205_28522_20130401_211215_inLine +BABEL_OP2_205_28522_20130401_211215_outLine +BABEL_OP2_205_31039_20140125_023755_inLine +BABEL_OP2_205_31039_20140125_023755_outLine +BABEL_OP2_205_32630_20130412_054815_inLine +BABEL_OP2_205_32630_20130412_054815_outLine +BABEL_OP2_205_33355_20130311_214515_inLine +BABEL_OP2_205_33355_20130311_214515_outLine +BABEL_OP2_205_33840_20130507_012940_inLine +BABEL_OP2_205_33840_20130507_012940_outLine +BABEL_OP2_205_34106_20130301_221919_inLine +BABEL_OP2_205_34106_20130301_221919_outLine +BABEL_OP2_205_34197_20130302_231101_inLine +BABEL_OP2_205_34197_20130302_231101_outLine +BABEL_OP2_205_34647_20140125_205318_inLine +BABEL_OP2_205_34647_20140125_205318_outLine +BABEL_OP2_205_37853_20130413_005407_inLine +BABEL_OP2_205_37853_20130413_005407_outLine +BABEL_OP2_205_38554_20130301_085606_inLine +BABEL_OP2_205_38554_20130301_085606_outLine +BABEL_OP2_205_38664_20130325_030156_inLine +BABEL_OP2_205_38664_20130325_030156_outLine +BABEL_OP2_205_38963_20131227_202341_inLine +BABEL_OP2_205_38963_20131227_202341_outLine +BABEL_OP2_205_39059_20130414_033146_inLine +BABEL_OP2_205_39059_20130414_033146_outLine +BABEL_OP2_205_39059_20130414_034411_inLine +BABEL_OP2_205_39059_20130414_034411_outLine +BABEL_OP2_205_40196_20140125_222906_inLine +BABEL_OP2_205_40196_20140125_222906_outLine +BABEL_OP2_205_41618_20130312_214004_inLine +BABEL_OP2_205_41618_20130312_214004_outLine +BABEL_OP2_205_41741_20130326_004056_inLine +BABEL_OP2_205_41741_20130326_004056_outLine +BABEL_OP2_205_42619_20130325_002736_inLine +BABEL_OP2_205_42619_20130325_002736_outLine +BABEL_OP2_205_43368_20130329_211826_inLine +BABEL_OP2_205_43368_20130329_211826_outLine +BABEL_OP2_205_43368_20130329_212612_inLine +BABEL_OP2_205_43368_20130329_212612_outLine +BABEL_OP2_205_45121_20130412_035841_inLine +BABEL_OP2_205_45121_20130412_035841_outLine +BABEL_OP2_205_46315_20130506_231421_inLine +BABEL_OP2_205_46315_20130506_231421_outLine +BABEL_OP2_205_49118_20130412_210858_inLine +BABEL_OP2_205_49118_20130412_210858_outLine +BABEL_OP2_205_49118_20130412_211622_inLine +BABEL_OP2_205_49118_20130412_211622_outLine +BABEL_OP2_205_50745_20130505_195625_inLine +BABEL_OP2_205_50745_20130505_195625_outLine +BABEL_OP2_205_53415_20131216_223652_inLine +BABEL_OP2_205_53415_20131216_223652_outLine +BABEL_OP2_205_58026_20131219_010750_inLine +BABEL_OP2_205_58026_20131219_010750_outLine +BABEL_OP2_205_58821_20130415_190958_inLine +BABEL_OP2_205_58821_20130415_190958_outLine +BABEL_OP2_205_59645_20130619_190548_inLine +BABEL_OP2_205_59645_20130619_190548_outLine +BABEL_OP2_205_62200_20130405_021524_inLine +BABEL_OP2_205_62200_20130405_021524_outLine +BABEL_OP2_205_62289_20140122_214709_inLine +BABEL_OP2_205_62289_20140122_214709_outLine +BABEL_OP2_205_70716_20130413_193114_inLine +BABEL_OP2_205_70716_20130413_193114_outLine +BABEL_OP2_205_77242_20130616_015950_inLine +BABEL_OP2_205_77242_20130616_015950_outLine +BABEL_OP2_205_84605_20130319_203823_inLine +BABEL_OP2_205_84605_20130319_203823_outLine +BABEL_OP2_205_84737_20130407_054058_inLine +BABEL_OP2_205_84737_20130407_054058_outLine +BABEL_OP2_205_84936_20130405_063301_inLine +BABEL_OP2_205_84936_20130405_063301_outLine +BABEL_OP2_205_86826_20130411_224207_inLine +BABEL_OP2_205_86826_20130411_224207_outLine +BABEL_OP2_205_90760_20130612_022556_inLine +BABEL_OP2_205_90760_20130612_022556_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.untranscribed.list new file mode 100644 index 00000000000..89ee0b28779 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.untranscribed.list @@ -0,0 +1,399 @@ +BABEL_OP2_205_10036_20130325_212656_inLine +BABEL_OP2_205_10036_20130325_212656_outLine +BABEL_OP2_205_10482_20130330_232812_inLine +BABEL_OP2_205_10482_20130330_232812_outLine +BABEL_OP2_205_10638_20140122_201207_inLine +BABEL_OP2_205_10638_20140122_201207_outLine +BABEL_OP2_205_10938_20130402_021742_inLine +BABEL_OP2_205_10938_20130402_021742_outLine +BABEL_OP2_205_10966_20130324_203837_inLine +BABEL_OP2_205_10966_20130324_203837_outLine +BABEL_OP2_205_11352_20130505_190427_inLine +BABEL_OP2_205_11352_20130505_190427_outLine +BABEL_OP2_205_11581_20130317_071927_inLine +BABEL_OP2_205_11581_20130317_071927_outLine +BABEL_OP2_205_11663_20130402_031747_inLine +BABEL_OP2_205_11663_20130402_031747_outLine +BABEL_OP2_205_11797_20130307_233702_inLine +BABEL_OP2_205_11797_20130307_233702_outLine +BABEL_OP2_205_11797_20130307_235053_inLine +BABEL_OP2_205_11797_20130307_235053_outLine +BABEL_OP2_205_12635_20130406_230527_inLine +BABEL_OP2_205_12635_20130406_230527_outLine +BABEL_OP2_205_13030_20130330_234019_inLine +BABEL_OP2_205_13030_20130330_234019_outLine +BABEL_OP2_205_13189_20130413_230649_inLine +BABEL_OP2_205_13189_20130413_230649_outLine +BABEL_OP2_205_13324_20130318_043359_inLine +BABEL_OP2_205_13324_20130318_043359_outLine +BABEL_OP2_205_13744_20130302_055938_inLine +BABEL_OP2_205_13744_20130302_055938_outLine +BABEL_OP2_205_14137_20130326_212737_inLine +BABEL_OP2_205_14137_20130326_212737_outLine +BABEL_OP2_205_14539_20130413_020822_inLine +BABEL_OP2_205_14539_20130413_020822_outLine +BABEL_OP2_205_14729_20130526_024319_inLine +BABEL_OP2_205_14729_20130526_024319_outLine +BABEL_OP2_205_14814_20130326_062123_inLine +BABEL_OP2_205_14814_20130326_062123_outLine +BABEL_OP2_205_14899_20130303_062436_inLine +BABEL_OP2_205_14899_20130303_062436_outLine +BABEL_OP2_205_14972_20130312_213702_inLine +BABEL_OP2_205_14972_20130312_213702_outLine +BABEL_OP2_205_15024_20131222_033424_inLine +BABEL_OP2_205_15024_20131222_033424_outLine +BABEL_OP2_205_15227_20130412_005202_inLine +BABEL_OP2_205_15227_20130412_005202_outLine +BABEL_OP2_205_15382_20130325_075405_inLine +BABEL_OP2_205_15382_20130325_075405_outLine +BABEL_OP2_205_16839_20130407_052530_inLine +BABEL_OP2_205_16839_20130407_052530_outLine +BABEL_OP2_205_16886_20130326_054927_inLine +BABEL_OP2_205_16886_20130326_054927_outLine +BABEL_OP2_205_16924_20130331_232254_inLine +BABEL_OP2_205_16924_20130331_232254_outLine +BABEL_OP2_205_17320_20130413_054847_inLine +BABEL_OP2_205_17320_20130413_054847_outLine +BABEL_OP2_205_17440_20130413_195207_inLine +BABEL_OP2_205_17440_20130413_195207_outLine +BABEL_OP2_205_17472_20130508_013928_inLine +BABEL_OP2_205_17472_20130508_013928_outLine +BABEL_OP2_205_17496_20130414_215325_inLine +BABEL_OP2_205_17496_20130414_215325_outLine +BABEL_OP2_205_17520_20130312_074120_inLine +BABEL_OP2_205_17520_20130312_074120_outLine +BABEL_OP2_205_17615_20130407_234405_inLine +BABEL_OP2_205_17615_20130407_234405_outLine +BABEL_OP2_205_18242_20130408_005657_inLine +BABEL_OP2_205_18242_20130408_005657_outLine +BABEL_OP2_205_18291_20130618_004811_inLine +BABEL_OP2_205_18291_20130618_004811_outLine +BABEL_OP2_205_18566_20130505_173829_inLine +BABEL_OP2_205_18566_20130505_173829_outLine +BABEL_OP2_205_19589_20130413_203154_inLine +BABEL_OP2_205_19589_20130413_203154_outLine +BABEL_OP2_205_19703_20130325_042858_inLine +BABEL_OP2_205_19703_20130325_042858_outLine +BABEL_OP2_205_19722_20130306_045231_inLine +BABEL_OP2_205_19722_20130306_045231_outLine +BABEL_OP2_205_20133_20130228_055409_inLine +BABEL_OP2_205_20133_20130228_055409_outLine +BABEL_OP2_205_20922_20130406_225439_inLine +BABEL_OP2_205_20922_20130406_225439_outLine +BABEL_OP2_205_20985_20130401_025757_inLine +BABEL_OP2_205_20985_20130401_025757_outLine +BABEL_OP2_205_21004_20130408_222653_inLine +BABEL_OP2_205_21004_20130408_222653_outLine +BABEL_OP2_205_21435_20130414_044944_inLine +BABEL_OP2_205_21435_20130414_044944_outLine +BABEL_OP2_205_21543_20140125_004741_inLine +BABEL_OP2_205_21543_20140125_004741_outLine +BABEL_OP2_205_21807_20130324_054526_inLine +BABEL_OP2_205_21892_20130507_023354_inLine +BABEL_OP2_205_21892_20130507_023354_outLine +BABEL_OP2_205_22446_20130309_073946_outLine +BABEL_OP2_205_22494_20130331_230611_inLine +BABEL_OP2_205_22494_20130331_230611_outLine +BABEL_OP2_205_22624_20130331_012106_inLine +BABEL_OP2_205_22624_20130331_012106_outLine +BABEL_OP2_205_22629_20131231_223232_inLine +BABEL_OP2_205_22629_20131231_223232_outLine +BABEL_OP2_205_22918_20130413_043023_inLine +BABEL_OP2_205_22918_20130413_043023_outLine +BABEL_OP2_205_22918_20130413_044543_inLine +BABEL_OP2_205_22918_20130413_044543_outLine +BABEL_OP2_205_23006_20130322_202429_inLine +BABEL_OP2_205_23006_20130322_202429_outLine +BABEL_OP2_205_23046_20130327_010653_inLine +BABEL_OP2_205_23046_20130327_010653_outLine +BABEL_OP2_205_23190_20130323_014750_inLine +BABEL_OP2_205_23190_20130323_014750_outLine +BABEL_OP2_205_23239_20130331_034518_inLine +BABEL_OP2_205_23239_20130331_034518_outLine +BABEL_OP2_205_23752_20140123_024924_inLine +BABEL_OP2_205_23752_20140123_024924_outLine +BABEL_OP2_205_24253_20130505_214600_inLine +BABEL_OP2_205_24253_20130505_214600_outLine +BABEL_OP2_205_24270_20130406_070358_inLine +BABEL_OP2_205_24270_20130406_070358_outLine +BABEL_OP2_205_24470_20130406_021646_inLine +BABEL_OP2_205_24470_20130406_021646_outLine +BABEL_OP2_205_24532_20130227_052040_inLine +BABEL_OP2_205_24532_20130227_052040_outLine +BABEL_OP2_205_24569_20130508_235213_inLine +BABEL_OP2_205_24569_20130508_235213_outLine +BABEL_OP2_205_24679_20130303_043753_inLine +BABEL_OP2_205_24679_20130303_043753_outLine +BABEL_OP2_205_25719_20130406_231631_inLine +BABEL_OP2_205_25719_20130406_231631_outLine +BABEL_OP2_205_25719_20130406_232555_inLine +BABEL_OP2_205_25719_20130406_232555_outLine +BABEL_OP2_205_25719_20130406_233313_inLine +BABEL_OP2_205_25719_20130406_233313_outLine +BABEL_OP2_205_25961_20130305_063202_inLine +BABEL_OP2_205_25961_20130305_063202_outLine +BABEL_OP2_205_26381_20140125_015707_inLine +BABEL_OP2_205_26381_20140125_015707_outLine +BABEL_OP2_205_26388_20130330_021001_inLine +BABEL_OP2_205_26388_20130330_021001_outLine +BABEL_OP2_205_26507_20131101_103425_inLine +BABEL_OP2_205_26507_20131101_103425_outLine +BABEL_OP2_205_27125_20130227_061700_inLine +BABEL_OP2_205_27125_20130227_061700_outLine +BABEL_OP2_205_27189_20140104_001032_inLine +BABEL_OP2_205_27189_20140104_001032_outLine +BABEL_OP2_205_27203_20130331_021946_inLine +BABEL_OP2_205_27203_20130331_021946_outLine +BABEL_OP2_205_27590_20130506_201921_inLine +BABEL_OP2_205_27590_20130506_201921_outLine +BABEL_OP2_205_27841_20130414_222155_inLine +BABEL_OP2_205_27841_20130414_222155_outLine +BABEL_OP2_205_28012_20130507_054019_inLine +BABEL_OP2_205_28012_20130507_054019_outLine +BABEL_OP2_205_28419_20130320_202136_inLine +BABEL_OP2_205_28419_20130320_202136_outLine +BABEL_OP2_205_29023_20130314_060343_inLine +BABEL_OP2_205_29023_20130314_060343_outLine +BABEL_OP2_205_29323_20130414_230355_inLine +BABEL_OP2_205_29323_20130414_230355_outLine +BABEL_OP2_205_29404_20130414_214714_inLine +BABEL_OP2_205_29404_20130414_214714_outLine +BABEL_OP2_205_29439_20130413_182356_inLine +BABEL_OP2_205_29439_20130413_182356_outLine +BABEL_OP2_205_30013_20130401_005939_inLine +BABEL_OP2_205_30013_20130401_005939_outLine +BABEL_OP2_205_30180_20130323_005331_inLine +BABEL_OP2_205_30180_20130323_005331_outLine +BABEL_OP2_205_30395_20130316_060814_inLine +BABEL_OP2_205_30395_20130316_060814_outLine +BABEL_OP2_205_30432_20130330_200303_inLine +BABEL_OP2_205_30432_20130330_200303_outLine +BABEL_OP2_205_30869_20130412_202311_inLine +BABEL_OP2_205_30869_20130412_202311_outLine +BABEL_OP2_205_31109_20130619_181905_inLine +BABEL_OP2_205_31109_20130619_181905_outLine +BABEL_OP2_205_31346_20130507_204621_inLine +BABEL_OP2_205_31346_20130507_204621_outLine +BABEL_OP2_205_32097_20130301_034527_inLine +BABEL_OP2_205_32097_20130301_034527_outLine +BABEL_OP2_205_32122_20130321_004623_inLine +BABEL_OP2_205_32122_20130321_004623_outLine +BABEL_OP2_205_32122_20130321_010341_inLine +BABEL_OP2_205_32122_20130321_010341_outLine +BABEL_OP2_205_32244_20130412_190534_inLine +BABEL_OP2_205_32244_20130412_190534_outLine +BABEL_OP2_205_32837_20130507_011223_inLine +BABEL_OP2_205_32837_20130507_011223_outLine +BABEL_OP2_205_33229_20130414_213157_inLine +BABEL_OP2_205_33229_20130414_213157_outLine +BABEL_OP2_205_33273_20130320_040141_inLine +BABEL_OP2_205_33273_20130320_040141_outLine +BABEL_OP2_205_33424_20130412_193538_inLine +BABEL_OP2_205_33424_20130412_193538_outLine +BABEL_OP2_205_33476_20130405_051711_inLine +BABEL_OP2_205_33476_20130405_051711_outLine +BABEL_OP2_205_33497_20130619_220728_inLine +BABEL_OP2_205_33497_20130619_220728_outLine +BABEL_OP2_205_33913_20130414_052534_inLine +BABEL_OP2_205_33913_20130414_052534_outLine +BABEL_OP2_205_33951_20130619_212409_inLine +BABEL_OP2_205_33951_20130619_212409_outLine +BABEL_OP2_205_34586_20140125_203417_inLine +BABEL_OP2_205_34586_20140125_203417_outLine +BABEL_OP2_205_34903_20130406_055051_inLine +BABEL_OP2_205_34903_20130406_055051_outLine +BABEL_OP2_205_35139_20130312_070415_inLine +BABEL_OP2_205_35139_20130312_070415_outLine +BABEL_OP2_205_35143_20130414_203900_inLine +BABEL_OP2_205_35143_20130414_203900_outLine +BABEL_OP2_205_35181_20130413_201739_inLine +BABEL_OP2_205_35181_20130413_201739_outLine +BABEL_OP2_205_36642_20130413_013238_inLine +BABEL_OP2_205_36642_20130413_013238_outLine +BABEL_OP2_205_37228_20130407_205807_inLine +BABEL_OP2_205_37228_20130407_205807_outLine +BABEL_OP2_205_37271_20130507_231712_inLine +BABEL_OP2_205_37271_20130507_231712_outLine +BABEL_OP2_205_37285_20130401_061737_inLine +BABEL_OP2_205_37285_20130401_061737_outLine +BABEL_OP2_205_37290_20130405_070403_inLine +BABEL_OP2_205_37290_20130405_070403_outLine +BABEL_OP2_205_37598_20130405_034853_inLine +BABEL_OP2_205_37598_20130405_034853_outLine +BABEL_OP2_205_37682_20130325_022952_inLine +BABEL_OP2_205_37682_20130325_022952_outLine +BABEL_OP2_205_37776_20140125_220835_inLine +BABEL_OP2_205_37776_20140125_220835_outLine +BABEL_OP2_205_38340_20130315_063442_inLine +BABEL_OP2_205_38340_20130315_063442_outLine +BABEL_OP2_205_38689_20130414_233704_inLine +BABEL_OP2_205_38689_20130414_233704_outLine +BABEL_OP2_205_38741_20130315_071146_inLine +BABEL_OP2_205_38741_20130315_071146_outLine +BABEL_OP2_205_38878_20130406_202135_inLine +BABEL_OP2_205_38878_20130406_202135_outLine +BABEL_OP2_205_39555_20130507_025010_inLine +BABEL_OP2_205_39555_20130507_025010_outLine +BABEL_OP2_205_40557_20130413_185709_inLine +BABEL_OP2_205_40557_20130413_185709_outLine +BABEL_OP2_205_40557_20130413_190849_inLine +BABEL_OP2_205_40557_20130413_190849_outLine +BABEL_OP2_205_40565_20130401_015506_inLine +BABEL_OP2_205_40565_20130401_015506_outLine +BABEL_OP2_205_40939_20140125_231452_inLine +BABEL_OP2_205_40939_20140125_231452_outLine +BABEL_OP2_205_41038_20130405_060002_inLine +BABEL_OP2_205_41038_20130405_060002_outLine +BABEL_OP2_205_41174_20130318_033313_inLine +BABEL_OP2_205_41174_20130318_033313_outLine +BABEL_OP2_205_42834_20130414_202256_inLine +BABEL_OP2_205_42834_20130414_202256_outLine +BABEL_OP2_205_42991_20130401_024013_inLine +BABEL_OP2_205_42991_20130401_024013_outLine +BABEL_OP2_205_42991_20130401_025044_inLine +BABEL_OP2_205_42991_20130401_025044_outLine +BABEL_OP2_205_43286_20130304_044510_inLine +BABEL_OP2_205_43286_20130304_044510_outLine +BABEL_OP2_205_43788_20130331_024429_inLine +BABEL_OP2_205_43788_20130331_024429_outLine +BABEL_OP2_205_43788_20130331_030508_inLine +BABEL_OP2_205_43788_20130331_030508_outLine +BABEL_OP2_205_44847_20130325_055635_inLine +BABEL_OP2_205_44847_20130325_055635_outLine +BABEL_OP2_205_45235_20130509_011826_inLine +BABEL_OP2_205_45235_20130509_011826_outLine +BABEL_OP2_205_45374_20140126_000904_inLine +BABEL_OP2_205_45374_20140126_000904_outLine +BABEL_OP2_205_46041_20130507_202255_inLine +BABEL_OP2_205_46041_20130507_202255_outLine +BABEL_OP2_205_46589_20130331_014535_inLine +BABEL_OP2_205_46589_20130331_014535_outLine +BABEL_OP2_205_46757_20130401_191649_inLine +BABEL_OP2_205_46757_20130401_191649_outLine +BABEL_OP2_205_46976_20131104_051409_inLine +BABEL_OP2_205_46976_20131104_051409_outLine +BABEL_OP2_205_47110_20140126_005953_inLine +BABEL_OP2_205_47110_20140126_005953_outLine +BABEL_OP2_205_47451_20130408_195325_inLine +BABEL_OP2_205_47451_20130408_195325_outLine +BABEL_OP2_205_47487_20130328_060026_inLine +BABEL_OP2_205_47487_20130328_060026_outLine +BABEL_OP2_205_47823_20130330_204952_inLine +BABEL_OP2_205_47823_20130330_204952_outLine +BABEL_OP2_205_47878_20130319_211057_inLine +BABEL_OP2_205_47878_20130319_211057_outLine +BABEL_OP2_205_48422_20130407_020759_inLine +BABEL_OP2_205_48422_20130407_020759_outLine +BABEL_OP2_205_49287_20130327_053930_inLine +BABEL_OP2_205_49287_20130327_053930_outLine +BABEL_OP2_205_49630_20130401_013908_inLine +BABEL_OP2_205_49630_20130401_013908_outLine +BABEL_OP2_205_49768_20130330_025558_inLine +BABEL_OP2_205_49768_20130330_025558_outLine +BABEL_OP2_205_50186_20140126_012415_inLine +BABEL_OP2_205_50186_20140126_012415_outLine +BABEL_OP2_205_50779_20130320_043549_inLine +BABEL_OP2_205_50779_20130320_043549_outLine +BABEL_OP2_205_50779_20130320_044244_inLine +BABEL_OP2_205_50779_20130320_044244_outLine +BABEL_OP2_205_51015_20130401_202255_inLine +BABEL_OP2_205_51015_20130401_202255_outLine +BABEL_OP2_205_52246_20130323_232916_inLine +BABEL_OP2_205_52246_20130323_232916_outLine +BABEL_OP2_205_52490_20130326_051608_inLine +BABEL_OP2_205_52490_20130326_051608_outLine +BABEL_OP2_205_53063_20130508_051415_inLine +BABEL_OP2_205_53063_20130508_051415_outLine +BABEL_OP2_205_53441_20140126_015538_inLine +BABEL_OP2_205_53441_20140126_015538_outLine +BABEL_OP2_205_53758_20131228_000238_inLine +BABEL_OP2_205_53758_20131228_000238_outLine +BABEL_OP2_205_54104_20130323_222459_inLine +BABEL_OP2_205_54104_20130323_222459_outLine +BABEL_OP2_205_54827_20130414_030516_inLine +BABEL_OP2_205_54827_20130414_030516_outLine +BABEL_OP2_205_54841_20130414_225855_inLine +BABEL_OP2_205_54841_20130414_225855_outLine +BABEL_OP2_205_54953_20130317_013652_inLine +BABEL_OP2_205_54953_20130317_013652_outLine +BABEL_OP2_205_56198_20130321_041358_inLine +BABEL_OP2_205_56198_20130321_041358_outLine +BABEL_OP2_205_56925_20140126_023234_inLine +BABEL_OP2_205_56925_20140126_023234_outLine +BABEL_OP2_205_57065_20130407_232501_inLine +BABEL_OP2_205_57065_20130407_232501_outLine +BABEL_OP2_205_57678_20130323_232415_inLine +BABEL_OP2_205_57678_20130323_232415_outLine +BABEL_OP2_205_57935_20130322_224501_inLine +BABEL_OP2_205_57935_20130322_224501_outLine +BABEL_OP2_205_59078_20130406_075721_inLine +BABEL_OP2_205_59078_20130406_075721_outLine +BABEL_OP2_205_59635_20130406_225014_inLine +BABEL_OP2_205_59635_20130406_225014_outLine +BABEL_OP2_205_60282_20140107_024858_inLine +BABEL_OP2_205_60282_20140107_024858_outLine +BABEL_OP2_205_60436_20130413_200129_inLine +BABEL_OP2_205_60436_20130413_200129_outLine +BABEL_OP2_205_61440_20130411_231312_inLine +BABEL_OP2_205_61440_20130411_231312_outLine +BABEL_OP2_205_61971_20130413_052620_inLine +BABEL_OP2_205_61971_20130413_052620_outLine +BABEL_OP2_205_62014_20130329_225214_inLine +BABEL_OP2_205_62014_20130329_225214_outLine +BABEL_OP2_205_62360_20140122_233956_inLine +BABEL_OP2_205_62810_20130304_075632_inLine +BABEL_OP2_205_62810_20130304_075632_outLine +BABEL_OP2_205_63084_20130405_025236_inLine +BABEL_OP2_205_63084_20130405_025236_outLine +BABEL_OP2_205_63787_20130310_001339_inLine +BABEL_OP2_205_63787_20130310_001339_outLine +BABEL_OP2_205_63920_20131226_014831_inLine +BABEL_OP2_205_64688_20131226_232545_inLine +BABEL_OP2_205_64688_20131226_232545_outLine +BABEL_OP2_205_66971_20130413_002731_inLine +BABEL_OP2_205_66971_20130413_002731_outLine +BABEL_OP2_205_67964_20140122_221653_inLine +BABEL_OP2_205_67964_20140122_221653_outLine +BABEL_OP2_205_68289_20130407_225726_inLine +BABEL_OP2_205_68289_20130407_225726_outLine +BABEL_OP2_205_68748_20130330_225712_inLine +BABEL_OP2_205_68748_20130330_225712_outLine +BABEL_OP2_205_70452_20130328_011715_inLine +BABEL_OP2_205_70452_20130328_011715_outLine +BABEL_OP2_205_70713_20131129_235040_inLine +BABEL_OP2_205_74799_20130407_030553_inLine +BABEL_OP2_205_74799_20130407_030553_outLine +BABEL_OP2_205_76683_20130331_201352_inLine +BABEL_OP2_205_76683_20130331_201352_outLine +BABEL_OP2_205_78254_20130323_051609_inLine +BABEL_OP2_205_78254_20130323_051609_outLine +BABEL_OP2_205_80559_20130323_224458_inLine +BABEL_OP2_205_80559_20130323_224458_outLine +BABEL_OP2_205_81149_20130412_061213_inLine +BABEL_OP2_205_81149_20130412_061213_outLine +BABEL_OP2_205_82138_20130622_210458_inLine +BABEL_OP2_205_82138_20130622_210458_outLine +BABEL_OP2_205_86191_20130323_060631_inLine +BABEL_OP2_205_86191_20130323_060631_outLine +BABEL_OP2_205_86433_20130325_084312_inLine +BABEL_OP2_205_86433_20130325_084312_outLine +BABEL_OP2_205_86676_20130331_014116_inLine +BABEL_OP2_205_86676_20130331_014116_outLine +BABEL_OP2_205_86715_20130618_002759_inLine +BABEL_OP2_205_86715_20130618_002759_outLine +BABEL_OP2_205_91336_20130622_230929_inLine +BABEL_OP2_205_91336_20130622_230929_outLine +BABEL_OP2_205_92605_20140123_032518_inLine +BABEL_OP2_205_92605_20140123_032518_outLine +BABEL_OP2_205_93964_20130623_014819_inLine +BABEL_OP2_205_93964_20130623_014819_outLine +BABEL_OP2_205_94891_20140123_222847_inLine +BABEL_OP2_205_94891_20140123_222847_outLine +BABEL_OP2_205_94978_20131126_045451_inLine +BABEL_OP2_205_94978_20131126_045451_outLine +BABEL_OP2_205_96376_20140120_211321_inLine +BABEL_OP2_205_96376_20140120_211321_outLine +BABEL_OP2_205_97772_20130301_071555_inLine +BABEL_OP2_205_97772_20130301_071555_outLine +BABEL_OP2_205_99594_20130320_070531_inLine +BABEL_OP2_205_99594_20130320_070531_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/training.list b/egs/babel/s5d/conf/lists/205-kurmanji/training.list new file mode 100644 index 00000000000..6f50b091eff --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/training.list @@ -0,0 +1,532 @@ +BABEL_OP2_205_10036_20130325_212656_inLine +BABEL_OP2_205_10036_20130325_212656_outLine +BABEL_OP2_205_10184_20130315_054426_inLine +BABEL_OP2_205_10184_20130315_054426_outLine +BABEL_OP2_205_10482_20130330_232812_inLine +BABEL_OP2_205_10482_20130330_232812_outLine +BABEL_OP2_205_10638_20140122_201207_inLine +BABEL_OP2_205_10638_20140122_201207_outLine +BABEL_OP2_205_10647_20130413_190550_inLine +BABEL_OP2_205_10647_20130413_190550_outLine +BABEL_OP2_205_10938_20130402_021742_inLine +BABEL_OP2_205_10938_20130402_021742_outLine +BABEL_OP2_205_10966_20130324_203837_inLine +BABEL_OP2_205_10966_20130324_203837_outLine +BABEL_OP2_205_11352_20130505_190427_inLine +BABEL_OP2_205_11352_20130505_190427_outLine +BABEL_OP2_205_11581_20130317_071927_inLine +BABEL_OP2_205_11581_20130317_071927_outLine +BABEL_OP2_205_11663_20130402_031747_inLine +BABEL_OP2_205_11663_20130402_031747_outLine +BABEL_OP2_205_11797_20130307_233702_inLine +BABEL_OP2_205_11797_20130307_233702_outLine +BABEL_OP2_205_11797_20130307_235053_inLine +BABEL_OP2_205_11797_20130307_235053_outLine +BABEL_OP2_205_12220_20130323_002310_inLine +BABEL_OP2_205_12220_20130323_002310_outLine +BABEL_OP2_205_12635_20130406_230527_inLine +BABEL_OP2_205_12635_20130406_230527_outLine +BABEL_OP2_205_13030_20130330_234019_inLine +BABEL_OP2_205_13030_20130330_234019_outLine +BABEL_OP2_205_13189_20130413_230649_inLine +BABEL_OP2_205_13189_20130413_230649_outLine +BABEL_OP2_205_13324_20130318_043359_inLine +BABEL_OP2_205_13324_20130318_043359_outLine +BABEL_OP2_205_13744_20130302_055938_inLine +BABEL_OP2_205_13744_20130302_055938_outLine +BABEL_OP2_205_14137_20130326_212737_inLine +BABEL_OP2_205_14137_20130326_212737_outLine +BABEL_OP2_205_14539_20130413_020822_inLine +BABEL_OP2_205_14539_20130413_020822_outLine +BABEL_OP2_205_14729_20130526_024319_inLine +BABEL_OP2_205_14729_20130526_024319_outLine +BABEL_OP2_205_14807_20130326_065101_inLine +BABEL_OP2_205_14807_20130326_065101_outLine +BABEL_OP2_205_14807_20130326_070339_inLine +BABEL_OP2_205_14807_20130326_070339_outLine +BABEL_OP2_205_14814_20130326_062123_inLine +BABEL_OP2_205_14814_20130326_062123_outLine +BABEL_OP2_205_14875_20130319_211742_inLine +BABEL_OP2_205_14875_20130319_211742_outLine +BABEL_OP2_205_14875_20130319_213338_inLine +BABEL_OP2_205_14875_20130319_213338_outLine +BABEL_OP2_205_14899_20130303_062436_inLine +BABEL_OP2_205_14899_20130303_062436_outLine +BABEL_OP2_205_14929_20131223_022753_inLine +BABEL_OP2_205_14972_20130312_213702_inLine +BABEL_OP2_205_14972_20130312_213702_outLine +BABEL_OP2_205_15024_20131222_033424_inLine +BABEL_OP2_205_15024_20131222_033424_outLine +BABEL_OP2_205_15227_20130412_005202_inLine +BABEL_OP2_205_15227_20130412_005202_outLine +BABEL_OP2_205_15382_20130325_075405_inLine +BABEL_OP2_205_15382_20130325_075405_outLine +BABEL_OP2_205_15535_20130506_195619_inLine +BABEL_OP2_205_15535_20130506_195619_outLine +BABEL_OP2_205_16839_20130407_052530_inLine +BABEL_OP2_205_16839_20130407_052530_outLine +BABEL_OP2_205_16886_20130326_054927_inLine +BABEL_OP2_205_16886_20130326_054927_outLine +BABEL_OP2_205_16924_20130331_232254_inLine +BABEL_OP2_205_16924_20130331_232254_outLine +BABEL_OP2_205_17320_20130413_054847_inLine +BABEL_OP2_205_17320_20130413_054847_outLine +BABEL_OP2_205_17440_20130413_195207_inLine +BABEL_OP2_205_17440_20130413_195207_outLine +BABEL_OP2_205_17472_20130508_013928_inLine +BABEL_OP2_205_17472_20130508_013928_outLine +BABEL_OP2_205_17496_20130414_215325_inLine +BABEL_OP2_205_17496_20130414_215325_outLine +BABEL_OP2_205_17520_20130312_074120_inLine +BABEL_OP2_205_17520_20130312_074120_outLine +BABEL_OP2_205_17615_20130407_234405_inLine +BABEL_OP2_205_17615_20130407_234405_outLine +BABEL_OP2_205_17881_20130413_190631_inLine +BABEL_OP2_205_17881_20130413_190631_outLine +BABEL_OP2_205_17881_20130413_191638_inLine +BABEL_OP2_205_17881_20130413_191638_outLine +BABEL_OP2_205_17914_20130407_235720_inLine +BABEL_OP2_205_17914_20130407_235720_outLine +BABEL_OP2_205_18242_20130408_005657_inLine +BABEL_OP2_205_18242_20130408_005657_outLine +BABEL_OP2_205_18291_20130618_004811_inLine +BABEL_OP2_205_18291_20130618_004811_outLine +BABEL_OP2_205_18566_20130505_173829_inLine +BABEL_OP2_205_18566_20130505_173829_outLine +BABEL_OP2_205_18766_20130413_033911_inLine +BABEL_OP2_205_18766_20130413_033911_outLine +BABEL_OP2_205_19134_20130331_195936_inLine +BABEL_OP2_205_19134_20130331_195936_outLine +BABEL_OP2_205_19589_20130413_203154_inLine +BABEL_OP2_205_19589_20130413_203154_outLine +BABEL_OP2_205_19703_20130325_042858_inLine +BABEL_OP2_205_19703_20130325_042858_outLine +BABEL_OP2_205_19722_20130306_045231_inLine +BABEL_OP2_205_19722_20130306_045231_outLine +BABEL_OP2_205_19749_20130406_231234_inLine +BABEL_OP2_205_19749_20130406_231234_outLine +BABEL_OP2_205_20133_20130228_055409_inLine +BABEL_OP2_205_20133_20130228_055409_outLine +BABEL_OP2_205_20800_20130408_015430_inLine +BABEL_OP2_205_20800_20130408_015430_outLine +BABEL_OP2_205_20916_20130228_200116_inLine +BABEL_OP2_205_20916_20130228_200116_outLine +BABEL_OP2_205_20922_20130406_225439_inLine +BABEL_OP2_205_20922_20130406_225439_outLine +BABEL_OP2_205_20985_20130401_025757_inLine +BABEL_OP2_205_20985_20130401_025757_outLine +BABEL_OP2_205_21004_20130408_222653_inLine +BABEL_OP2_205_21004_20130408_222653_outLine +BABEL_OP2_205_21206_20130312_205638_inLine +BABEL_OP2_205_21206_20130312_205638_outLine +BABEL_OP2_205_21435_20130414_044944_inLine +BABEL_OP2_205_21435_20130414_044944_outLine +BABEL_OP2_205_21543_20140125_004741_inLine +BABEL_OP2_205_21543_20140125_004741_outLine +BABEL_OP2_205_21807_20130324_054526_inLine +BABEL_OP2_205_21892_20130507_023354_inLine +BABEL_OP2_205_21892_20130507_023354_outLine +BABEL_OP2_205_22321_20130308_042214_inLine +BABEL_OP2_205_22321_20130308_042214_outLine +BABEL_OP2_205_22446_20130309_073946_outLine +BABEL_OP2_205_22494_20130331_230611_inLine +BABEL_OP2_205_22494_20130331_230611_outLine +BABEL_OP2_205_22624_20130331_012106_inLine +BABEL_OP2_205_22624_20130331_012106_outLine +BABEL_OP2_205_22629_20131231_223232_inLine +BABEL_OP2_205_22629_20131231_223232_outLine +BABEL_OP2_205_22918_20130413_043023_inLine +BABEL_OP2_205_22918_20130413_043023_outLine +BABEL_OP2_205_22918_20130413_044543_inLine +BABEL_OP2_205_22918_20130413_044543_outLine +BABEL_OP2_205_23006_20130322_202429_inLine +BABEL_OP2_205_23006_20130322_202429_outLine +BABEL_OP2_205_23046_20130327_010653_inLine +BABEL_OP2_205_23046_20130327_010653_outLine +BABEL_OP2_205_23092_20130413_181637_inLine +BABEL_OP2_205_23092_20130413_181637_outLine +BABEL_OP2_205_23190_20130323_014750_inLine +BABEL_OP2_205_23190_20130323_014750_outLine +BABEL_OP2_205_23239_20130331_034518_inLine +BABEL_OP2_205_23239_20130331_034518_outLine +BABEL_OP2_205_23752_20140123_024924_inLine +BABEL_OP2_205_23752_20140123_024924_outLine +BABEL_OP2_205_23893_20140123_003759_inLine +BABEL_OP2_205_23893_20140123_003759_outLine +BABEL_OP2_205_24239_20130415_171824_inLine +BABEL_OP2_205_24239_20130415_171824_outLine +BABEL_OP2_205_24253_20130505_214600_inLine +BABEL_OP2_205_24253_20130505_214600_outLine +BABEL_OP2_205_24270_20130406_070358_inLine +BABEL_OP2_205_24270_20130406_070358_outLine +BABEL_OP2_205_24290_20130414_221432_inLine +BABEL_OP2_205_24290_20130414_221432_outLine +BABEL_OP2_205_24323_20130326_051101_inLine +BABEL_OP2_205_24323_20130326_051101_outLine +BABEL_OP2_205_24470_20130406_021646_inLine +BABEL_OP2_205_24470_20130406_021646_outLine +BABEL_OP2_205_24532_20130227_052040_inLine +BABEL_OP2_205_24532_20130227_052040_outLine +BABEL_OP2_205_24569_20130508_235213_inLine +BABEL_OP2_205_24569_20130508_235213_outLine +BABEL_OP2_205_24605_20130311_012103_inLine +BABEL_OP2_205_24605_20130311_012103_outLine +BABEL_OP2_205_24679_20130303_043753_inLine +BABEL_OP2_205_24679_20130303_043753_outLine +BABEL_OP2_205_25085_20130612_023620_inLine +BABEL_OP2_205_25085_20130612_023620_outLine +BABEL_OP2_205_25719_20130406_231631_inLine +BABEL_OP2_205_25719_20130406_231631_outLine +BABEL_OP2_205_25719_20130406_232555_inLine +BABEL_OP2_205_25719_20130406_232555_outLine +BABEL_OP2_205_25719_20130406_233313_inLine +BABEL_OP2_205_25719_20130406_233313_outLine +BABEL_OP2_205_25961_20130305_063202_inLine +BABEL_OP2_205_25961_20130305_063202_outLine +BABEL_OP2_205_26381_20140125_015707_inLine +BABEL_OP2_205_26381_20140125_015707_outLine +BABEL_OP2_205_26388_20130330_021001_inLine +BABEL_OP2_205_26388_20130330_021001_outLine +BABEL_OP2_205_26507_20131101_103425_inLine +BABEL_OP2_205_26507_20131101_103425_outLine +BABEL_OP2_205_26574_20130509_203057_inLine +BABEL_OP2_205_26574_20130509_203057_outLine +BABEL_OP2_205_26602_20130412_235831_inLine +BABEL_OP2_205_26602_20130412_235831_outLine +BABEL_OP2_205_27125_20130227_061700_inLine +BABEL_OP2_205_27125_20130227_061700_outLine +BABEL_OP2_205_27189_20140104_001032_inLine +BABEL_OP2_205_27189_20140104_001032_outLine +BABEL_OP2_205_27203_20130331_021946_inLine +BABEL_OP2_205_27203_20130331_021946_outLine +BABEL_OP2_205_27590_20130506_201921_inLine +BABEL_OP2_205_27590_20130506_201921_outLine +BABEL_OP2_205_27841_20130414_222155_inLine +BABEL_OP2_205_27841_20130414_222155_outLine +BABEL_OP2_205_28012_20130507_054019_inLine +BABEL_OP2_205_28012_20130507_054019_outLine +BABEL_OP2_205_28419_20130320_202136_inLine +BABEL_OP2_205_28419_20130320_202136_outLine +BABEL_OP2_205_28477_20130412_234819_inLine +BABEL_OP2_205_28477_20130412_234819_outLine +BABEL_OP2_205_28522_20130401_211215_inLine +BABEL_OP2_205_28522_20130401_211215_outLine +BABEL_OP2_205_29023_20130314_060343_inLine +BABEL_OP2_205_29023_20130314_060343_outLine +BABEL_OP2_205_29323_20130414_230355_inLine +BABEL_OP2_205_29323_20130414_230355_outLine +BABEL_OP2_205_29404_20130414_214714_inLine +BABEL_OP2_205_29404_20130414_214714_outLine +BABEL_OP2_205_29439_20130413_182356_inLine +BABEL_OP2_205_29439_20130413_182356_outLine +BABEL_OP2_205_30013_20130401_005939_inLine +BABEL_OP2_205_30013_20130401_005939_outLine +BABEL_OP2_205_30180_20130323_005331_inLine +BABEL_OP2_205_30180_20130323_005331_outLine +BABEL_OP2_205_30395_20130316_060814_inLine +BABEL_OP2_205_30395_20130316_060814_outLine +BABEL_OP2_205_30432_20130330_200303_inLine +BABEL_OP2_205_30432_20130330_200303_outLine +BABEL_OP2_205_30869_20130412_202311_inLine +BABEL_OP2_205_30869_20130412_202311_outLine +BABEL_OP2_205_31039_20140125_023755_inLine +BABEL_OP2_205_31039_20140125_023755_outLine +BABEL_OP2_205_31109_20130619_181905_inLine +BABEL_OP2_205_31109_20130619_181905_outLine +BABEL_OP2_205_31346_20130507_204621_inLine +BABEL_OP2_205_31346_20130507_204621_outLine +BABEL_OP2_205_32097_20130301_034527_inLine +BABEL_OP2_205_32097_20130301_034527_outLine +BABEL_OP2_205_32122_20130321_004623_inLine +BABEL_OP2_205_32122_20130321_004623_outLine +BABEL_OP2_205_32122_20130321_010341_inLine +BABEL_OP2_205_32122_20130321_010341_outLine +BABEL_OP2_205_32244_20130412_190534_inLine +BABEL_OP2_205_32244_20130412_190534_outLine +BABEL_OP2_205_32630_20130412_054815_inLine +BABEL_OP2_205_32630_20130412_054815_outLine +BABEL_OP2_205_32837_20130507_011223_inLine +BABEL_OP2_205_32837_20130507_011223_outLine +BABEL_OP2_205_33229_20130414_213157_inLine +BABEL_OP2_205_33229_20130414_213157_outLine +BABEL_OP2_205_33273_20130320_040141_inLine +BABEL_OP2_205_33273_20130320_040141_outLine +BABEL_OP2_205_33355_20130311_214515_inLine +BABEL_OP2_205_33355_20130311_214515_outLine +BABEL_OP2_205_33424_20130412_193538_inLine +BABEL_OP2_205_33424_20130412_193538_outLine +BABEL_OP2_205_33476_20130405_051711_inLine +BABEL_OP2_205_33476_20130405_051711_outLine +BABEL_OP2_205_33497_20130619_220728_inLine +BABEL_OP2_205_33497_20130619_220728_outLine +BABEL_OP2_205_33840_20130507_012940_inLine +BABEL_OP2_205_33840_20130507_012940_outLine +BABEL_OP2_205_33913_20130414_052534_inLine +BABEL_OP2_205_33913_20130414_052534_outLine +BABEL_OP2_205_33951_20130619_212409_inLine +BABEL_OP2_205_33951_20130619_212409_outLine +BABEL_OP2_205_34106_20130301_221919_inLine +BABEL_OP2_205_34106_20130301_221919_outLine +BABEL_OP2_205_34197_20130302_231101_inLine +BABEL_OP2_205_34197_20130302_231101_outLine +BABEL_OP2_205_34586_20140125_203417_inLine +BABEL_OP2_205_34586_20140125_203417_outLine +BABEL_OP2_205_34647_20140125_205318_inLine +BABEL_OP2_205_34647_20140125_205318_outLine +BABEL_OP2_205_34903_20130406_055051_inLine +BABEL_OP2_205_34903_20130406_055051_outLine +BABEL_OP2_205_35139_20130312_070415_inLine +BABEL_OP2_205_35139_20130312_070415_outLine +BABEL_OP2_205_35143_20130414_203900_inLine +BABEL_OP2_205_35143_20130414_203900_outLine +BABEL_OP2_205_35181_20130413_201739_inLine +BABEL_OP2_205_35181_20130413_201739_outLine +BABEL_OP2_205_36642_20130413_013238_inLine +BABEL_OP2_205_36642_20130413_013238_outLine +BABEL_OP2_205_37228_20130407_205807_inLine +BABEL_OP2_205_37228_20130407_205807_outLine +BABEL_OP2_205_37271_20130507_231712_inLine +BABEL_OP2_205_37271_20130507_231712_outLine +BABEL_OP2_205_37285_20130401_061737_inLine +BABEL_OP2_205_37285_20130401_061737_outLine +BABEL_OP2_205_37290_20130405_070403_inLine +BABEL_OP2_205_37290_20130405_070403_outLine +BABEL_OP2_205_37598_20130405_034853_inLine +BABEL_OP2_205_37598_20130405_034853_outLine +BABEL_OP2_205_37682_20130325_022952_inLine +BABEL_OP2_205_37682_20130325_022952_outLine +BABEL_OP2_205_37776_20140125_220835_inLine +BABEL_OP2_205_37776_20140125_220835_outLine +BABEL_OP2_205_37853_20130413_005407_inLine +BABEL_OP2_205_37853_20130413_005407_outLine +BABEL_OP2_205_38340_20130315_063442_inLine +BABEL_OP2_205_38340_20130315_063442_outLine +BABEL_OP2_205_38554_20130301_085606_inLine +BABEL_OP2_205_38554_20130301_085606_outLine +BABEL_OP2_205_38664_20130325_030156_inLine +BABEL_OP2_205_38664_20130325_030156_outLine +BABEL_OP2_205_38689_20130414_233704_inLine +BABEL_OP2_205_38689_20130414_233704_outLine +BABEL_OP2_205_38741_20130315_071146_inLine +BABEL_OP2_205_38741_20130315_071146_outLine +BABEL_OP2_205_38878_20130406_202135_inLine +BABEL_OP2_205_38878_20130406_202135_outLine +BABEL_OP2_205_38963_20131227_202341_inLine +BABEL_OP2_205_38963_20131227_202341_outLine +BABEL_OP2_205_39059_20130414_033146_inLine +BABEL_OP2_205_39059_20130414_033146_outLine +BABEL_OP2_205_39059_20130414_034411_inLine +BABEL_OP2_205_39059_20130414_034411_outLine +BABEL_OP2_205_39555_20130507_025010_inLine +BABEL_OP2_205_39555_20130507_025010_outLine +BABEL_OP2_205_40196_20140125_222906_inLine +BABEL_OP2_205_40196_20140125_222906_outLine +BABEL_OP2_205_40557_20130413_185709_inLine +BABEL_OP2_205_40557_20130413_185709_outLine +BABEL_OP2_205_40557_20130413_190849_inLine +BABEL_OP2_205_40557_20130413_190849_outLine +BABEL_OP2_205_40565_20130401_015506_inLine +BABEL_OP2_205_40565_20130401_015506_outLine +BABEL_OP2_205_40939_20140125_231452_inLine +BABEL_OP2_205_40939_20140125_231452_outLine +BABEL_OP2_205_41038_20130405_060002_inLine +BABEL_OP2_205_41038_20130405_060002_outLine +BABEL_OP2_205_41174_20130318_033313_inLine +BABEL_OP2_205_41174_20130318_033313_outLine +BABEL_OP2_205_41618_20130312_214004_inLine +BABEL_OP2_205_41618_20130312_214004_outLine +BABEL_OP2_205_41741_20130326_004056_inLine +BABEL_OP2_205_41741_20130326_004056_outLine +BABEL_OP2_205_42619_20130325_002736_inLine +BABEL_OP2_205_42619_20130325_002736_outLine +BABEL_OP2_205_42834_20130414_202256_inLine +BABEL_OP2_205_42834_20130414_202256_outLine +BABEL_OP2_205_42991_20130401_024013_inLine +BABEL_OP2_205_42991_20130401_024013_outLine +BABEL_OP2_205_42991_20130401_025044_inLine +BABEL_OP2_205_42991_20130401_025044_outLine +BABEL_OP2_205_43286_20130304_044510_inLine +BABEL_OP2_205_43286_20130304_044510_outLine +BABEL_OP2_205_43368_20130329_211826_inLine +BABEL_OP2_205_43368_20130329_211826_outLine +BABEL_OP2_205_43368_20130329_212612_inLine +BABEL_OP2_205_43368_20130329_212612_outLine +BABEL_OP2_205_43788_20130331_024429_inLine +BABEL_OP2_205_43788_20130331_024429_outLine +BABEL_OP2_205_43788_20130331_030508_inLine +BABEL_OP2_205_43788_20130331_030508_outLine +BABEL_OP2_205_44847_20130325_055635_inLine +BABEL_OP2_205_44847_20130325_055635_outLine +BABEL_OP2_205_45121_20130412_035841_inLine +BABEL_OP2_205_45121_20130412_035841_outLine +BABEL_OP2_205_45235_20130509_011826_inLine +BABEL_OP2_205_45235_20130509_011826_outLine +BABEL_OP2_205_45374_20140126_000904_inLine +BABEL_OP2_205_45374_20140126_000904_outLine +BABEL_OP2_205_46041_20130507_202255_inLine +BABEL_OP2_205_46041_20130507_202255_outLine +BABEL_OP2_205_46315_20130506_231421_inLine +BABEL_OP2_205_46315_20130506_231421_outLine +BABEL_OP2_205_46589_20130331_014535_inLine +BABEL_OP2_205_46589_20130331_014535_outLine +BABEL_OP2_205_46757_20130401_191649_inLine +BABEL_OP2_205_46757_20130401_191649_outLine +BABEL_OP2_205_46976_20131104_051409_inLine +BABEL_OP2_205_46976_20131104_051409_outLine +BABEL_OP2_205_47110_20140126_005953_inLine +BABEL_OP2_205_47110_20140126_005953_outLine +BABEL_OP2_205_47451_20130408_195325_inLine +BABEL_OP2_205_47451_20130408_195325_outLine +BABEL_OP2_205_47487_20130328_060026_inLine +BABEL_OP2_205_47487_20130328_060026_outLine +BABEL_OP2_205_47823_20130330_204952_inLine +BABEL_OP2_205_47823_20130330_204952_outLine +BABEL_OP2_205_47878_20130319_211057_inLine +BABEL_OP2_205_47878_20130319_211057_outLine +BABEL_OP2_205_48422_20130407_020759_inLine +BABEL_OP2_205_48422_20130407_020759_outLine +BABEL_OP2_205_49118_20130412_210858_inLine +BABEL_OP2_205_49118_20130412_210858_outLine +BABEL_OP2_205_49118_20130412_211622_inLine +BABEL_OP2_205_49118_20130412_211622_outLine +BABEL_OP2_205_49287_20130327_053930_inLine +BABEL_OP2_205_49287_20130327_053930_outLine +BABEL_OP2_205_49630_20130401_013908_inLine +BABEL_OP2_205_49630_20130401_013908_outLine +BABEL_OP2_205_49768_20130330_025558_inLine +BABEL_OP2_205_49768_20130330_025558_outLine +BABEL_OP2_205_50186_20140126_012415_inLine +BABEL_OP2_205_50186_20140126_012415_outLine +BABEL_OP2_205_50745_20130505_195625_inLine +BABEL_OP2_205_50745_20130505_195625_outLine +BABEL_OP2_205_50779_20130320_043549_inLine +BABEL_OP2_205_50779_20130320_043549_outLine +BABEL_OP2_205_50779_20130320_044244_inLine +BABEL_OP2_205_50779_20130320_044244_outLine +BABEL_OP2_205_51015_20130401_202255_inLine +BABEL_OP2_205_51015_20130401_202255_outLine +BABEL_OP2_205_52246_20130323_232916_inLine +BABEL_OP2_205_52246_20130323_232916_outLine +BABEL_OP2_205_52490_20130326_051608_inLine +BABEL_OP2_205_52490_20130326_051608_outLine +BABEL_OP2_205_53063_20130508_051415_inLine +BABEL_OP2_205_53063_20130508_051415_outLine +BABEL_OP2_205_53415_20131216_223652_inLine +BABEL_OP2_205_53415_20131216_223652_outLine +BABEL_OP2_205_53441_20140126_015538_inLine +BABEL_OP2_205_53441_20140126_015538_outLine +BABEL_OP2_205_53758_20131228_000238_inLine +BABEL_OP2_205_53758_20131228_000238_outLine +BABEL_OP2_205_54104_20130323_222459_inLine +BABEL_OP2_205_54104_20130323_222459_outLine +BABEL_OP2_205_54827_20130414_030516_inLine +BABEL_OP2_205_54827_20130414_030516_outLine +BABEL_OP2_205_54841_20130414_225855_inLine +BABEL_OP2_205_54841_20130414_225855_outLine +BABEL_OP2_205_54953_20130317_013652_inLine +BABEL_OP2_205_54953_20130317_013652_outLine +BABEL_OP2_205_56198_20130321_041358_inLine +BABEL_OP2_205_56198_20130321_041358_outLine +BABEL_OP2_205_56925_20140126_023234_inLine +BABEL_OP2_205_56925_20140126_023234_outLine +BABEL_OP2_205_57065_20130407_232501_inLine +BABEL_OP2_205_57065_20130407_232501_outLine +BABEL_OP2_205_57678_20130323_232415_inLine +BABEL_OP2_205_57678_20130323_232415_outLine +BABEL_OP2_205_57935_20130322_224501_inLine +BABEL_OP2_205_57935_20130322_224501_outLine +BABEL_OP2_205_58026_20131219_010750_inLine +BABEL_OP2_205_58026_20131219_010750_outLine +BABEL_OP2_205_58821_20130415_190958_inLine +BABEL_OP2_205_58821_20130415_190958_outLine +BABEL_OP2_205_59078_20130406_075721_inLine +BABEL_OP2_205_59078_20130406_075721_outLine +BABEL_OP2_205_59635_20130406_225014_inLine +BABEL_OP2_205_59635_20130406_225014_outLine +BABEL_OP2_205_59645_20130619_190548_inLine +BABEL_OP2_205_59645_20130619_190548_outLine +BABEL_OP2_205_60282_20140107_024858_inLine +BABEL_OP2_205_60282_20140107_024858_outLine +BABEL_OP2_205_60436_20130413_200129_inLine +BABEL_OP2_205_60436_20130413_200129_outLine +BABEL_OP2_205_61440_20130411_231312_inLine +BABEL_OP2_205_61440_20130411_231312_outLine +BABEL_OP2_205_61971_20130413_052620_inLine +BABEL_OP2_205_61971_20130413_052620_outLine +BABEL_OP2_205_62014_20130329_225214_inLine +BABEL_OP2_205_62014_20130329_225214_outLine +BABEL_OP2_205_62200_20130405_021524_inLine +BABEL_OP2_205_62200_20130405_021524_outLine +BABEL_OP2_205_62289_20140122_214709_inLine +BABEL_OP2_205_62289_20140122_214709_outLine +BABEL_OP2_205_62360_20140122_233956_inLine +BABEL_OP2_205_62810_20130304_075632_inLine +BABEL_OP2_205_62810_20130304_075632_outLine +BABEL_OP2_205_63084_20130405_025236_inLine +BABEL_OP2_205_63084_20130405_025236_outLine +BABEL_OP2_205_63787_20130310_001339_inLine +BABEL_OP2_205_63787_20130310_001339_outLine +BABEL_OP2_205_63920_20131226_014831_inLine +BABEL_OP2_205_64688_20131226_232545_inLine +BABEL_OP2_205_64688_20131226_232545_outLine +BABEL_OP2_205_66971_20130413_002731_inLine +BABEL_OP2_205_66971_20130413_002731_outLine +BABEL_OP2_205_67964_20140122_221653_inLine +BABEL_OP2_205_67964_20140122_221653_outLine +BABEL_OP2_205_68289_20130407_225726_inLine +BABEL_OP2_205_68289_20130407_225726_outLine +BABEL_OP2_205_68748_20130330_225712_inLine +BABEL_OP2_205_68748_20130330_225712_outLine +BABEL_OP2_205_70452_20130328_011715_inLine +BABEL_OP2_205_70452_20130328_011715_outLine +BABEL_OP2_205_70713_20131129_235040_inLine +BABEL_OP2_205_70716_20130413_193114_inLine +BABEL_OP2_205_70716_20130413_193114_outLine +BABEL_OP2_205_74799_20130407_030553_inLine +BABEL_OP2_205_74799_20130407_030553_outLine +BABEL_OP2_205_76683_20130331_201352_inLine +BABEL_OP2_205_76683_20130331_201352_outLine +BABEL_OP2_205_77242_20130616_015950_inLine +BABEL_OP2_205_77242_20130616_015950_outLine +BABEL_OP2_205_78254_20130323_051609_inLine +BABEL_OP2_205_78254_20130323_051609_outLine +BABEL_OP2_205_80559_20130323_224458_inLine +BABEL_OP2_205_80559_20130323_224458_outLine +BABEL_OP2_205_81149_20130412_061213_inLine +BABEL_OP2_205_81149_20130412_061213_outLine +BABEL_OP2_205_82138_20130622_210458_inLine +BABEL_OP2_205_82138_20130622_210458_outLine +BABEL_OP2_205_84605_20130319_203823_inLine +BABEL_OP2_205_84605_20130319_203823_outLine +BABEL_OP2_205_84737_20130407_054058_inLine +BABEL_OP2_205_84737_20130407_054058_outLine +BABEL_OP2_205_84936_20130405_063301_inLine +BABEL_OP2_205_84936_20130405_063301_outLine +BABEL_OP2_205_86191_20130323_060631_inLine +BABEL_OP2_205_86191_20130323_060631_outLine +BABEL_OP2_205_86433_20130325_084312_inLine +BABEL_OP2_205_86433_20130325_084312_outLine +BABEL_OP2_205_86676_20130331_014116_inLine +BABEL_OP2_205_86676_20130331_014116_outLine +BABEL_OP2_205_86715_20130618_002759_inLine +BABEL_OP2_205_86715_20130618_002759_outLine +BABEL_OP2_205_86826_20130411_224207_inLine +BABEL_OP2_205_86826_20130411_224207_outLine +BABEL_OP2_205_90760_20130612_022556_inLine +BABEL_OP2_205_90760_20130612_022556_outLine +BABEL_OP2_205_91336_20130622_230929_inLine +BABEL_OP2_205_91336_20130622_230929_outLine +BABEL_OP2_205_92605_20140123_032518_inLine +BABEL_OP2_205_92605_20140123_032518_outLine +BABEL_OP2_205_93964_20130623_014819_inLine +BABEL_OP2_205_93964_20130623_014819_outLine +BABEL_OP2_205_94891_20140123_222847_inLine +BABEL_OP2_205_94891_20140123_222847_outLine +BABEL_OP2_205_94978_20131126_045451_inLine +BABEL_OP2_205_94978_20131126_045451_outLine +BABEL_OP2_205_96376_20140120_211321_inLine +BABEL_OP2_205_96376_20140120_211321_outLine +BABEL_OP2_205_97772_20130301_071555_inLine +BABEL_OP2_205_97772_20130301_071555_outLine +BABEL_OP2_205_99594_20130320_070531_inLine +BABEL_OP2_205_99594_20130320_070531_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/untranscribed-training.list b/egs/babel/s5d/conf/lists/205-kurmanji/untranscribed-training.list new file mode 100644 index 00000000000..0239610b1a7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/untranscribed-training.list @@ -0,0 +1,521 @@ +BABEL_OP2_205_12321_20131101_103424_inLine +BABEL_OP2_205_12321_20131101_103424_outLine +BABEL_OP2_205_14350_20130311_065704_inLine +BABEL_OP2_205_14350_20130311_065704_outLine +BABEL_OP2_205_15262_20130310_220350_inLine +BABEL_OP2_205_15262_20130310_220350_outLine +BABEL_OP2_205_15902_20130309_042954_inLine +BABEL_OP2_205_15902_20130309_042954_outLine +BABEL_OP2_205_16475_20130318_071049_inLine +BABEL_OP2_205_16475_20130318_071049_outLine +BABEL_OP2_205_17582_20130612_045820_inLine +BABEL_OP2_205_17582_20130612_045820_outLine +BABEL_OP2_205_17923_20130310_071855_inLine +BABEL_OP2_205_17923_20130310_071855_outLine +BABEL_OP2_205_18992_20131227_214303_inLine +BABEL_OP2_205_19545_20131103_054936_inLine +BABEL_OP2_205_19545_20131103_054936_outLine +BABEL_OP2_205_20724_20131226_220301_inLine +BABEL_OP2_205_20738_20131219_000457_inLine +BABEL_OP2_205_20738_20131219_000457_outLine +BABEL_OP2_205_20768_20131104_020043_inLine +BABEL_OP2_205_20768_20131104_020043_outLine +BABEL_OP2_205_21109_20131110_203639_inLine +BABEL_OP2_205_21109_20131110_203639_outLine +BABEL_OP2_205_21244_20130411_220542_inLine +BABEL_OP2_205_21244_20130411_220542_outLine +BABEL_OP2_205_23681_20130519_052009_outLine +BABEL_OP2_205_23731_20131104_055621_outLine +BABEL_OP2_205_26074_20130318_092803_inLine +BABEL_OP2_205_26074_20130318_092803_outLine +BABEL_OP2_205_26074_20130318_093434_inLine +BABEL_OP2_205_26074_20130318_093434_outLine +BABEL_OP2_205_26398_20130414_210257_outLine +BABEL_OP2_205_28538_20131222_010921_inLine +BABEL_OP2_205_29230_20130619_045232_inLine +BABEL_OP2_205_31182_20131104_010653_inLine +BABEL_OP2_205_31182_20131104_010653_outLine +BABEL_OP2_205_32301_20130401_015605_outLine +BABEL_OP2_205_36039_20130525_013805_inLine +BABEL_OP2_205_36039_20130525_013805_outLine +BABEL_OP2_205_36059_20131108_200854_inLine +BABEL_OP2_205_36059_20131108_200854_outLine +BABEL_OP2_205_36059_20131108_201758_inLine +BABEL_OP2_205_36059_20131108_201758_outLine +BABEL_OP2_205_36059_20131108_202426_inLine +BABEL_OP2_205_36059_20131108_202426_outLine +BABEL_OP2_205_37229_20130612_045130_inLine +BABEL_OP2_205_37229_20130612_045130_outLine +BABEL_OP2_205_37499_20130423_030008_inLine +BABEL_OP2_205_37499_20130423_030008_outLine +BABEL_OP2_205_38979_20131105_052419_inLine +BABEL_OP2_205_38979_20131105_052419_outLine +BABEL_OP2_205_39159_20130303_040403_inLine +BABEL_OP2_205_39159_20130303_040403_outLine +BABEL_OP2_205_40713_20130314_075828_inLine +BABEL_OP2_205_40713_20130314_075828_outLine +BABEL_OP2_205_40740_20130407_055052_inLine +BABEL_OP2_205_40740_20130407_055052_outLine +BABEL_OP2_205_41100_20130311_015856_inLine +BABEL_OP2_205_41100_20130311_015856_outLine +BABEL_OP2_205_41109_20130406_224530_inLine +BABEL_OP2_205_41109_20130406_224530_outLine +BABEL_OP2_205_41493_20130228_081724_inLine +BABEL_OP2_205_41493_20130228_081724_outLine +BABEL_OP2_205_41745_20130319_001127_inLine +BABEL_OP2_205_41745_20130319_001127_outLine +BABEL_OP2_205_41920_20130325_212001_inLine +BABEL_OP2_205_42155_20130312_064841_inLine +BABEL_OP2_205_42155_20130312_064841_outLine +BABEL_OP2_205_42243_20130305_063726_inLine +BABEL_OP2_205_42243_20130305_063726_outLine +BABEL_OP2_205_43239_20131110_055057_inLine +BABEL_OP2_205_43239_20131110_055057_outLine +BABEL_OP2_205_44255_20130406_205651_inLine +BABEL_OP2_205_44255_20130406_205651_outLine +BABEL_OP2_205_44531_20131108_194709_inLine +BABEL_OP2_205_44531_20131108_194709_outLine +BABEL_OP2_205_44619_20130314_074104_inLine +BABEL_OP2_205_44619_20130314_074104_outLine +BABEL_OP2_205_45642_20130311_052042_inLine +BABEL_OP2_205_45642_20130311_052042_outLine +BABEL_OP2_205_46558_20130304_053902_inLine +BABEL_OP2_205_46558_20130304_053902_outLine +BABEL_OP2_205_46702_20130301_025750_inLine +BABEL_OP2_205_46702_20130301_025750_outLine +BABEL_OP2_205_46763_20130505_222913_inLine +BABEL_OP2_205_46763_20130505_222913_outLine +BABEL_OP2_205_47215_20130408_021338_inLine +BABEL_OP2_205_47215_20130408_021338_outLine +BABEL_OP2_205_47270_20130406_211808_inLine +BABEL_OP2_205_47270_20130406_211808_outLine +BABEL_OP2_205_47405_20131231_032458_inLine +BABEL_OP2_205_47405_20131231_032458_outLine +BABEL_OP2_205_47877_20130407_205116_inLine +BABEL_OP2_205_47877_20130407_205116_outLine +BABEL_OP2_205_48399_20130304_002906_inLine +BABEL_OP2_205_48399_20130304_002906_outLine +BABEL_OP2_205_48758_20131107_075636_inLine +BABEL_OP2_205_48758_20131107_075636_outLine +BABEL_OP2_205_48758_20131107_080446_inLine +BABEL_OP2_205_48758_20131107_080446_outLine +BABEL_OP2_205_48789_20131103_043932_inLine +BABEL_OP2_205_48789_20131103_043932_outLine +BABEL_OP2_205_49775_20130227_060536_inLine +BABEL_OP2_205_49775_20130227_060536_outLine +BABEL_OP2_205_49812_20130407_020818_inLine +BABEL_OP2_205_49812_20130407_020818_outLine +BABEL_OP2_205_49945_20130412_222951_inLine +BABEL_OP2_205_49945_20130412_222951_outLine +BABEL_OP2_205_50090_20130329_203208_inLine +BABEL_OP2_205_50090_20130329_203208_outLine +BABEL_OP2_205_50681_20130330_050901_inLine +BABEL_OP2_205_50681_20130330_050901_outLine +BABEL_OP2_205_51530_20130414_210733_inLine +BABEL_OP2_205_51530_20130414_210733_outLine +BABEL_OP2_205_51611_20130311_072551_inLine +BABEL_OP2_205_51611_20130311_072551_outLine +BABEL_OP2_205_51819_20130401_010103_outLine +BABEL_OP2_205_51819_20130401_010745_outLine +BABEL_OP2_205_52447_20130412_001856_inLine +BABEL_OP2_205_52447_20130412_001856_outLine +BABEL_OP2_205_52483_20130621_205901_inLine +BABEL_OP2_205_52483_20130621_205901_outLine +BABEL_OP2_205_52804_20130311_002720_inLine +BABEL_OP2_205_52804_20130311_002720_outLine +BABEL_OP2_205_54040_20131104_013114_inLine +BABEL_OP2_205_54040_20131104_013114_outLine +BABEL_OP2_205_54160_20130306_033742_inLine +BABEL_OP2_205_54160_20130306_033742_outLine +BABEL_OP2_205_54405_20130618_202708_inLine +BABEL_OP2_205_54405_20130618_202708_outLine +BABEL_OP2_205_55818_20130309_080103_inLine +BABEL_OP2_205_55818_20130309_080103_outLine +BABEL_OP2_205_56090_20130227_204816_inLine +BABEL_OP2_205_56090_20130227_204816_outLine +BABEL_OP2_205_56306_20130407_012524_inLine +BABEL_OP2_205_56306_20130407_012524_outLine +BABEL_OP2_205_56306_20130407_013746_inLine +BABEL_OP2_205_56306_20130407_013746_outLine +BABEL_OP2_205_56331_20130413_043736_inLine +BABEL_OP2_205_56331_20130413_043736_outLine +BABEL_OP2_205_56370_20130306_061205_inLine +BABEL_OP2_205_56370_20130306_061205_outLine +BABEL_OP2_205_56429_20130311_053708_inLine +BABEL_OP2_205_56429_20130311_053708_outLine +BABEL_OP2_205_56523_20130317_224401_inLine +BABEL_OP2_205_56523_20130317_224401_outLine +BABEL_OP2_205_56720_20130506_182315_inLine +BABEL_OP2_205_56720_20130506_182315_outLine +BABEL_OP2_205_57566_20130407_031257_inLine +BABEL_OP2_205_57566_20130407_031257_outLine +BABEL_OP2_205_58915_20130611_221704_inLine +BABEL_OP2_205_58915_20130611_221704_outLine +BABEL_OP2_205_59928_20130321_012004_inLine +BABEL_OP2_205_59928_20130321_012004_outLine +BABEL_OP2_205_59993_20130321_045802_inLine +BABEL_OP2_205_59993_20130321_045802_outLine +BABEL_OP2_205_60474_20130324_213649_inLine +BABEL_OP2_205_60474_20130324_213649_outLine +BABEL_OP2_205_60508_20130304_205015_inLine +BABEL_OP2_205_60508_20130304_205015_outLine +BABEL_OP2_205_60538_20130310_004703_inLine +BABEL_OP2_205_60538_20130310_004703_outLine +BABEL_OP2_205_60626_20130315_071907_inLine +BABEL_OP2_205_60626_20130315_071907_outLine +BABEL_OP2_205_60706_20130307_053430_inLine +BABEL_OP2_205_60706_20130307_053430_outLine +BABEL_OP2_205_60836_20130330_072606_inLine +BABEL_OP2_205_60836_20130330_072606_outLine +BABEL_OP2_205_61167_20130326_222257_inLine +BABEL_OP2_205_61167_20130326_222257_outLine +BABEL_OP2_205_61190_20130325_004615_inLine +BABEL_OP2_205_61190_20130325_004615_outLine +BABEL_OP2_205_61219_20130325_212553_inLine +BABEL_OP2_205_61219_20130325_212553_outLine +BABEL_OP2_205_61357_20130330_232257_inLine +BABEL_OP2_205_61357_20130330_232257_outLine +BABEL_OP2_205_62434_20130305_215011_inLine +BABEL_OP2_205_62434_20130305_215011_outLine +BABEL_OP2_205_62434_20130305_220154_inLine +BABEL_OP2_205_62434_20130305_220154_outLine +BABEL_OP2_205_62734_20130328_050453_inLine +BABEL_OP2_205_62734_20130328_050453_outLine +BABEL_OP2_205_63081_20130309_012237_inLine +BABEL_OP2_205_63081_20130309_012237_outLine +BABEL_OP2_205_63094_20131113_030146_inLine +BABEL_OP2_205_63220_20130326_055356_inLine +BABEL_OP2_205_63220_20130326_055356_outLine +BABEL_OP2_205_63445_20130308_235018_inLine +BABEL_OP2_205_63445_20130308_235018_outLine +BABEL_OP2_205_63481_20130307_082632_inLine +BABEL_OP2_205_63481_20130307_082632_outLine +BABEL_OP2_205_63523_20140121_213251_inLine +BABEL_OP2_205_63757_20130406_055509_inLine +BABEL_OP2_205_63757_20130406_055509_outLine +BABEL_OP2_205_63938_20130413_044053_inLine +BABEL_OP2_205_63938_20130413_044053_outLine +BABEL_OP2_205_64014_20130413_004605_inLine +BABEL_OP2_205_64014_20130413_004605_outLine +BABEL_OP2_205_64065_20130315_044531_inLine +BABEL_OP2_205_64065_20130315_044531_outLine +BABEL_OP2_205_64494_20130313_043717_inLine +BABEL_OP2_205_64494_20130313_043717_outLine +BABEL_OP2_205_64638_20130408_004937_inLine +BABEL_OP2_205_64638_20130408_004937_outLine +BABEL_OP2_205_64722_20131107_210205_inLine +BABEL_OP2_205_64722_20131107_210205_outLine +BABEL_OP2_205_64759_20130307_214024_inLine +BABEL_OP2_205_64759_20130307_214024_outLine +BABEL_OP2_205_64759_20130307_215400_inLine +BABEL_OP2_205_64759_20130307_215400_outLine +BABEL_OP2_205_64768_20130327_213122_inLine +BABEL_OP2_205_64768_20130327_213122_outLine +BABEL_OP2_205_64796_20130307_042443_inLine +BABEL_OP2_205_64796_20130307_042443_outLine +BABEL_OP2_205_64902_20130414_052508_inLine +BABEL_OP2_205_64902_20130414_052508_outLine +BABEL_OP2_205_65882_20130311_013812_inLine +BABEL_OP2_205_65882_20130311_013812_outLine +BABEL_OP2_205_66026_20130508_223030_inLine +BABEL_OP2_205_66026_20130508_223030_outLine +BABEL_OP2_205_66959_20130414_224335_inLine +BABEL_OP2_205_66959_20130414_224335_outLine +BABEL_OP2_205_67085_20130414_180541_inLine +BABEL_OP2_205_67085_20130414_180541_outLine +BABEL_OP2_205_67389_20140111_225039_inLine +BABEL_OP2_205_67389_20140111_225039_outLine +BABEL_OP2_205_67552_20130331_072350_inLine +BABEL_OP2_205_67552_20130331_072350_outLine +BABEL_OP2_205_67552_20130331_073746_inLine +BABEL_OP2_205_67552_20130331_073746_outLine +BABEL_OP2_205_67592_20130509_213421_inLine +BABEL_OP2_205_67592_20130509_213421_outLine +BABEL_OP2_205_67794_20130315_081604_inLine +BABEL_OP2_205_67794_20130315_081604_outLine +BABEL_OP2_205_67999_20130407_223424_inLine +BABEL_OP2_205_67999_20130407_223424_outLine +BABEL_OP2_205_68059_20130619_053732_inLine +BABEL_OP2_205_68059_20130619_053732_outLine +BABEL_OP2_205_68182_20130415_041909_inLine +BABEL_OP2_205_68182_20130415_041909_outLine +BABEL_OP2_205_69633_20130331_021718_inLine +BABEL_OP2_205_69633_20130331_021718_outLine +BABEL_OP2_205_69633_20130331_023306_inLine +BABEL_OP2_205_69633_20130331_023306_outLine +BABEL_OP2_205_69885_20130415_051700_inLine +BABEL_OP2_205_69885_20130415_051700_outLine +BABEL_OP2_205_70110_20130302_074003_inLine +BABEL_OP2_205_70110_20130302_074003_outLine +BABEL_OP2_205_70343_20130401_203305_inLine +BABEL_OP2_205_70343_20130401_203305_outLine +BABEL_OP2_205_70526_20130416_033943_inLine +BABEL_OP2_205_70526_20130416_033943_outLine +BABEL_OP2_205_71047_20131109_013132_inLine +BABEL_OP2_205_71047_20131109_013132_outLine +BABEL_OP2_205_71333_20130326_225136_inLine +BABEL_OP2_205_71333_20130326_225136_outLine +BABEL_OP2_205_71614_20130506_175649_inLine +BABEL_OP2_205_71614_20130506_175649_outLine +BABEL_OP2_205_71704_20130317_002057_inLine +BABEL_OP2_205_71704_20130317_002057_outLine +BABEL_OP2_205_71754_20140115_014345_inLine +BABEL_OP2_205_71754_20140115_014345_outLine +BABEL_OP2_205_72040_20130321_022323_inLine +BABEL_OP2_205_72040_20130321_022323_outLine +BABEL_OP2_205_72733_20130415_183417_inLine +BABEL_OP2_205_72733_20130415_183417_outLine +BABEL_OP2_205_73042_20130317_000810_inLine +BABEL_OP2_205_73042_20130317_000810_outLine +BABEL_OP2_205_73072_20130311_213816_inLine +BABEL_OP2_205_73072_20130311_213816_outLine +BABEL_OP2_205_73301_20130330_062717_inLine +BABEL_OP2_205_73301_20130330_062717_outLine +BABEL_OP2_205_73301_20130330_064357_inLine +BABEL_OP2_205_73301_20130330_064357_outLine +BABEL_OP2_205_73408_20130622_062600_inLine +BABEL_OP2_205_73408_20130622_062600_outLine +BABEL_OP2_205_73837_20130330_054105_inLine +BABEL_OP2_205_73837_20130330_054105_outLine +BABEL_OP2_205_74111_20130507_182333_inLine +BABEL_OP2_205_74111_20130507_182333_outLine +BABEL_OP2_205_74280_20130301_022106_inLine +BABEL_OP2_205_74280_20130301_022106_outLine +BABEL_OP2_205_74455_20130414_041223_inLine +BABEL_OP2_205_74455_20130414_041223_outLine +BABEL_OP2_205_74641_20130314_060344_inLine +BABEL_OP2_205_74641_20130314_060344_outLine +BABEL_OP2_205_74921_20130331_061311_inLine +BABEL_OP2_205_74921_20130331_061311_outLine +BABEL_OP2_205_75223_20130306_045441_inLine +BABEL_OP2_205_75223_20130306_045441_outLine +BABEL_OP2_205_75261_20130408_234257_inLine +BABEL_OP2_205_75261_20130408_234257_outLine +BABEL_OP2_205_75342_20130415_192555_inLine +BABEL_OP2_205_75342_20130415_192555_outLine +BABEL_OP2_205_75981_20130413_042503_inLine +BABEL_OP2_205_75981_20130413_042503_outLine +BABEL_OP2_205_76773_20130312_051652_inLine +BABEL_OP2_205_76773_20130312_051652_outLine +BABEL_OP2_205_77139_20130305_045120_inLine +BABEL_OP2_205_77139_20130305_045120_outLine +BABEL_OP2_205_77744_20130328_012940_outLine +BABEL_OP2_205_78544_20130408_000050_inLine +BABEL_OP2_205_78544_20130408_000050_outLine +BABEL_OP2_205_78544_20130408_001043_inLine +BABEL_OP2_205_78544_20130408_001043_outLine +BABEL_OP2_205_78609_20130508_001720_inLine +BABEL_OP2_205_78609_20130508_001720_outLine +BABEL_OP2_205_78630_20130330_200921_inLine +BABEL_OP2_205_78630_20130330_200921_outLine +BABEL_OP2_205_78943_20130326_063742_inLine +BABEL_OP2_205_78943_20130326_063742_outLine +BABEL_OP2_205_79045_20130507_020315_inLine +BABEL_OP2_205_79045_20130507_020315_outLine +BABEL_OP2_205_79107_20130613_004324_inLine +BABEL_OP2_205_79107_20130613_004324_outLine +BABEL_OP2_205_79167_20130331_053551_inLine +BABEL_OP2_205_79167_20130331_053551_outLine +BABEL_OP2_205_79190_20130313_020401_inLine +BABEL_OP2_205_79190_20130313_020401_outLine +BABEL_OP2_205_79590_20131221_025241_inLine +BABEL_OP2_205_79590_20131221_025241_outLine +BABEL_OP2_205_79590_20131221_031508_inLine +BABEL_OP2_205_79590_20131221_031508_outLine +BABEL_OP2_205_79751_20130324_220236_inLine +BABEL_OP2_205_79751_20130324_220236_outLine +BABEL_OP2_205_79858_20130309_210841_inLine +BABEL_OP2_205_79858_20130309_210841_outLine +BABEL_OP2_205_80136_20130406_190838_inLine +BABEL_OP2_205_80136_20130406_190838_outLine +BABEL_OP2_205_80577_20131110_045204_inLine +BABEL_OP2_205_80577_20131110_045204_outLine +BABEL_OP2_205_80881_20130326_002818_inLine +BABEL_OP2_205_80881_20130326_002818_outLine +BABEL_OP2_205_80881_20130326_004157_inLine +BABEL_OP2_205_80881_20130326_004157_outLine +BABEL_OP2_205_80881_20130326_005241_inLine +BABEL_OP2_205_80881_20130326_005241_outLine +BABEL_OP2_205_81287_20130414_230143_inLine +BABEL_OP2_205_81287_20130414_230143_outLine +BABEL_OP2_205_81392_20130506_224137_inLine +BABEL_OP2_205_81392_20130506_224137_outLine +BABEL_OP2_205_81404_20130324_072708_inLine +BABEL_OP2_205_81404_20130324_072708_outLine +BABEL_OP2_205_81433_20131110_024152_inLine +BABEL_OP2_205_81433_20131110_024152_outLine +BABEL_OP2_205_81553_20130408_190946_inLine +BABEL_OP2_205_81671_20130407_193047_inLine +BABEL_OP2_205_81671_20130407_193047_outLine +BABEL_OP2_205_81854_20130413_035448_inLine +BABEL_OP2_205_81854_20130413_035448_outLine +BABEL_OP2_205_82030_20130416_024208_inLine +BABEL_OP2_205_82030_20130416_024208_outLine +BABEL_OP2_205_82145_20131101_103425_inLine +BABEL_OP2_205_82145_20131101_103425_outLine +BABEL_OP2_205_82863_20131110_214438_inLine +BABEL_OP2_205_82863_20131110_214438_outLine +BABEL_OP2_205_82863_20131110_220419_inLine +BABEL_OP2_205_82863_20131110_220419_outLine +BABEL_OP2_205_82979_20130321_013427_inLine +BABEL_OP2_205_83062_20130412_213219_inLine +BABEL_OP2_205_83062_20130412_213219_outLine +BABEL_OP2_205_83366_20130406_070242_outLine +BABEL_OP2_205_83436_20130306_064555_inLine +BABEL_OP2_205_83436_20130306_064555_outLine +BABEL_OP2_205_83545_20130413_183305_outLine +BABEL_OP2_205_83775_20130326_223716_inLine +BABEL_OP2_205_83775_20130326_223716_outLine +BABEL_OP2_205_83783_20130316_062751_inLine +BABEL_OP2_205_83783_20130316_062751_outLine +BABEL_OP2_205_84125_20130301_040550_inLine +BABEL_OP2_205_84125_20130301_040550_outLine +BABEL_OP2_205_84370_20130613_203134_inLine +BABEL_OP2_205_84370_20130613_213749_inLine +BABEL_OP2_205_84458_20130508_224724_inLine +BABEL_OP2_205_84458_20130508_224724_outLine +BABEL_OP2_205_84469_20130408_213237_inLine +BABEL_OP2_205_84469_20130408_213237_outLine +BABEL_OP2_205_84583_20130312_211219_inLine +BABEL_OP2_205_84583_20130312_211219_outLine +BABEL_OP2_205_84815_20130413_183704_inLine +BABEL_OP2_205_84815_20130413_183704_outLine +BABEL_OP2_205_84838_20130509_005525_inLine +BABEL_OP2_205_84838_20130509_005525_outLine +BABEL_OP2_205_85179_20130409_002521_inLine +BABEL_OP2_205_85179_20130409_002521_outLine +BABEL_OP2_205_85248_20130414_215500_inLine +BABEL_OP2_205_85248_20130414_215500_outLine +BABEL_OP2_205_85260_20140115_021714_inLine +BABEL_OP2_205_85260_20140115_021714_outLine +BABEL_OP2_205_85322_20130323_013257_inLine +BABEL_OP2_205_85325_20130414_061613_inLine +BABEL_OP2_205_85325_20130414_061613_outLine +BABEL_OP2_205_85331_20130616_014645_inLine +BABEL_OP2_205_85331_20130616_014645_outLine +BABEL_OP2_205_85340_20130321_040745_inLine +BABEL_OP2_205_85340_20130321_040745_outLine +BABEL_OP2_205_86321_20130413_173559_inLine +BABEL_OP2_205_86321_20130413_173559_outLine +BABEL_OP2_205_86467_20130305_074640_inLine +BABEL_OP2_205_86467_20130305_074640_outLine +BABEL_OP2_205_86472_20130408_003043_inLine +BABEL_OP2_205_86472_20130408_003043_outLine +BABEL_OP2_205_86557_20130304_044109_inLine +BABEL_OP2_205_86557_20130304_044109_outLine +BABEL_OP2_205_87298_20130322_201204_inLine +BABEL_OP2_205_87629_20130312_052701_inLine +BABEL_OP2_205_87629_20130312_052701_outLine +BABEL_OP2_205_87884_20130413_194754_inLine +BABEL_OP2_205_87884_20130413_194754_outLine +BABEL_OP2_205_87889_20130408_204610_inLine +BABEL_OP2_205_87889_20130408_204610_outLine +BABEL_OP2_205_88686_20130303_070128_inLine +BABEL_OP2_205_88686_20130303_070128_outLine +BABEL_OP2_205_88873_20130326_050532_inLine +BABEL_OP2_205_88873_20130326_050532_outLine +BABEL_OP2_205_89372_20130227_014653_inLine +BABEL_OP2_205_89372_20130227_014653_outLine +BABEL_OP2_205_89457_20131111_005030_inLine +BABEL_OP2_205_89457_20131111_005030_outLine +BABEL_OP2_205_89560_20130507_184514_inLine +BABEL_OP2_205_89560_20130507_184514_outLine +BABEL_OP2_205_89888_20130311_070650_inLine +BABEL_OP2_205_89888_20130311_070650_outLine +BABEL_OP2_205_90347_20130331_202436_outLine +BABEL_OP2_205_90935_20130324_042904_inLine +BABEL_OP2_205_90935_20130324_042904_outLine +BABEL_OP2_205_91189_20131108_222823_inLine +BABEL_OP2_205_91189_20131108_222823_outLine +BABEL_OP2_205_91319_20130408_214039_inLine +BABEL_OP2_205_91319_20130408_214039_outLine +BABEL_OP2_205_91463_20130331_044435_inLine +BABEL_OP2_205_91463_20130331_044435_outLine +BABEL_OP2_205_91581_20130408_184119_inLine +BABEL_OP2_205_91581_20130408_184119_outLine +BABEL_OP2_205_91884_20130413_195918_inLine +BABEL_OP2_205_91884_20130413_195918_outLine +BABEL_OP2_205_91891_20130330_214543_inLine +BABEL_OP2_205_91891_20130330_214543_outLine +BABEL_OP2_205_91971_20140108_225426_inLine +BABEL_OP2_205_91971_20140108_225426_outLine +BABEL_OP2_205_91977_20130331_024658_inLine +BABEL_OP2_205_91977_20130331_024658_outLine +BABEL_OP2_205_91977_20130331_030804_inLine +BABEL_OP2_205_91977_20130331_030804_outLine +BABEL_OP2_205_92077_20130413_012328_inLine +BABEL_OP2_205_92077_20130413_012328_outLine +BABEL_OP2_205_92459_20130330_001356_inLine +BABEL_OP2_205_92459_20130330_001356_outLine +BABEL_OP2_205_92509_20130303_235756_inLine +BABEL_OP2_205_92509_20130303_235756_outLine +BABEL_OP2_205_92740_20130330_215927_inLine +BABEL_OP2_205_92740_20130330_215927_outLine +BABEL_OP2_205_92886_20130318_002931_inLine +BABEL_OP2_205_92886_20130318_002931_outLine +BABEL_OP2_205_92941_20130329_233855_inLine +BABEL_OP2_205_92941_20130329_233855_outLine +BABEL_OP2_205_93224_20130329_231410_inLine +BABEL_OP2_205_93224_20130329_231410_outLine +BABEL_OP2_205_93224_20130329_233120_inLine +BABEL_OP2_205_93224_20130329_233120_outLine +BABEL_OP2_205_93475_20130318_075901_inLine +BABEL_OP2_205_93475_20130318_075901_outLine +BABEL_OP2_205_93861_20130623_023740_inLine +BABEL_OP2_205_93946_20130413_175030_inLine +BABEL_OP2_205_93946_20130413_175030_outLine +BABEL_OP2_205_93946_20130413_180241_inLine +BABEL_OP2_205_93946_20130413_180241_outLine +BABEL_OP2_205_94002_20131110_223144_inLine +BABEL_OP2_205_94002_20131110_223144_outLine +BABEL_OP2_205_94442_20130413_000848_inLine +BABEL_OP2_205_94442_20130413_000848_outLine +BABEL_OP2_205_94465_20130408_073150_inLine +BABEL_OP2_205_94465_20130408_073150_outLine +BABEL_OP2_205_94587_20130330_222137_inLine +BABEL_OP2_205_94587_20130330_222137_outLine +BABEL_OP2_205_95269_20130323_231507_inLine +BABEL_OP2_205_95269_20130323_231507_outLine +BABEL_OP2_205_95294_20130331_225911_inLine +BABEL_OP2_205_95294_20130331_225911_outLine +BABEL_OP2_205_95446_20131109_020511_inLine +BABEL_OP2_205_95446_20131109_020511_outLine +BABEL_OP2_205_95467_20130616_022551_inLine +BABEL_OP2_205_95490_20130303_001325_inLine +BABEL_OP2_205_95490_20130303_001325_outLine +BABEL_OP2_205_95583_20130305_224743_inLine +BABEL_OP2_205_95583_20130305_224743_outLine +BABEL_OP2_205_96324_20130307_062000_inLine +BABEL_OP2_205_96324_20130307_062000_outLine +BABEL_OP2_205_96842_20130413_004424_inLine +BABEL_OP2_205_96842_20130413_004424_outLine +BABEL_OP2_205_96934_20130330_010104_inLine +BABEL_OP2_205_96934_20130330_010104_outLine +BABEL_OP2_205_96985_20130305_215036_inLine +BABEL_OP2_205_96985_20130305_215036_outLine +BABEL_OP2_205_97264_20130407_190517_inLine +BABEL_OP2_205_97264_20130407_190517_outLine +BABEL_OP2_205_97363_20130321_030214_inLine +BABEL_OP2_205_97363_20130321_030214_outLine +BABEL_OP2_205_97570_20130406_072121_inLine +BABEL_OP2_205_97570_20130406_072121_outLine +BABEL_OP2_205_97604_20130408_175013_inLine +BABEL_OP2_205_97604_20130408_175013_outLine +BABEL_OP2_205_97731_20130413_011730_inLine +BABEL_OP2_205_97731_20130413_011730_outLine +BABEL_OP2_205_97731_20130413_013459_inLine +BABEL_OP2_205_97731_20130413_013459_outLine +BABEL_OP2_205_98311_20130311_063743_inLine +BABEL_OP2_205_98311_20130311_063743_outLine +BABEL_OP2_205_99264_20130412_221353_inLine +BABEL_OP2_205_99264_20130412_221353_outLine +BABEL_OP2_205_99487_20130310_062912_inLine +BABEL_OP2_205_99487_20130310_062912_outLine +BABEL_OP2_205_99516_20130304_070035_inLine +BABEL_OP2_205_99516_20130304_070035_outLine +BABEL_OP2_205_99718_20130311_081329_inLine +BABEL_OP2_205_99718_20130311_081329_outLine +BABEL_OP2_205_99813_20131110_022455_inLine +BABEL_OP2_205_99813_20131110_022455_outLine +BABEL_OP2_205_99920_20130408_013635_inLine +BABEL_OP2_205_99920_20130408_013635_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/dev.list b/egs/babel/s5d/conf/lists/206-zulu/dev.list new file mode 100644 index 00000000000..52d51a26c88 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/dev.list @@ -0,0 +1,141 @@ +BABEL_OP1_206_14350_20121123_042710_inLine +BABEL_OP1_206_14350_20121123_042710_outLine +BABEL_OP1_206_15042_20130124_002208_inLine +BABEL_OP1_206_15042_20130124_002208_outLine +BABEL_OP1_206_15042_20130124_003815_inLine +BABEL_OP1_206_15042_20130124_003815_outLine +BABEL_OP1_206_15163_20121129_232215_inLine +BABEL_OP1_206_15163_20121129_232215_outLine +BABEL_OP1_206_19621_20121219_031810_inLine +BABEL_OP1_206_19621_20121219_031810_outLine +BABEL_OP1_206_19663_20121219_173010_inLine +BABEL_OP1_206_19663_20121219_173010_outLine +BABEL_OP1_206_22466_20121130_231814_inLine +BABEL_OP1_206_22466_20121130_231814_outLine +BABEL_OP1_206_23995_20121215_221537_inLine +BABEL_OP1_206_23995_20121215_221537_outLine +BABEL_OP1_206_26999_20121213_022027_inLine +BABEL_OP1_206_26999_20121213_022027_outLine +BABEL_OP1_206_28190_20121213_031401_inLine +BABEL_OP1_206_28190_20121213_031401_outLine +BABEL_OP1_206_28606_20121215_000631_inLine +BABEL_OP1_206_28606_20121215_000631_outLine +BABEL_OP1_206_31182_20121222_050854_inLine +BABEL_OP1_206_31182_20121222_050854_outLine +BABEL_OP1_206_32727_20130601_012544_inLine +BABEL_OP1_206_32727_20130601_012544_outLine +BABEL_OP1_206_34477_20121130_183409_inLine +BABEL_OP1_206_34477_20121130_183409_outLine +BABEL_OP1_206_34477_20121130_184826_inLine +BABEL_OP1_206_34477_20121130_184826_outLine +BABEL_OP1_206_34899_20130602_004027_inLine +BABEL_OP1_206_34899_20130602_004027_outLine +BABEL_OP1_206_35583_20130529_005600_inLine +BABEL_OP1_206_35583_20130529_005600_outLine +BABEL_OP1_206_36219_20121130_184946_inLine +BABEL_OP1_206_36219_20121130_184946_outLine +BABEL_OP1_206_36594_20130601_002535_inLine +BABEL_OP1_206_36594_20130601_002535_outLine +BABEL_OP1_206_36990_20121130_212128_inLine +BABEL_OP1_206_36990_20121130_212128_outLine +BABEL_OP1_206_36990_20121130_213230_inLine +BABEL_OP1_206_36990_20121130_213230_outLine +BABEL_OP1_206_36990_20121130_220005_inLine +BABEL_OP1_206_36990_20121130_220005_outLine +BABEL_OP1_206_40740_20121214_002216_inLine +BABEL_OP1_206_40740_20121214_002216_outLine +BABEL_OP1_206_41100_20121129_002525_inLine +BABEL_OP1_206_41100_20121129_002525_outLine +BABEL_OP1_206_41100_20121129_003855_inLine +BABEL_OP1_206_41100_20121129_003855_outLine +BABEL_OP1_206_41493_20121128_222116_inLine +BABEL_OP1_206_41493_20121128_222116_outLine +BABEL_OP1_206_41493_20121128_230231_inLine +BABEL_OP1_206_41493_20121128_230231_outLine +BABEL_OP1_206_41920_20121129_204231_inLine +BABEL_OP1_206_41920_20121129_204231_outLine +BABEL_OP1_206_42600_20121206_212006_inLine +BABEL_OP1_206_42600_20121206_212006_outLine +BABEL_OP1_206_43646_20121206_213819_inLine +BABEL_OP1_206_43646_20121206_213819_outLine +BABEL_OP1_206_47877_20121212_233516_inLine +BABEL_OP1_206_47877_20121212_233516_outLine +BABEL_OP1_206_47877_20121213_000206_inLine +BABEL_OP1_206_47877_20121213_000206_outLine +BABEL_OP1_206_47877_20121213_030248_inLine +BABEL_OP1_206_47877_20121213_030248_outLine +BABEL_OP1_206_49767_20130530_203947_inLine +BABEL_OP1_206_49767_20130530_203947_outLine +BABEL_OP1_206_49902_20121201_230757_inLine +BABEL_OP1_206_49902_20121201_230757_outLine +BABEL_OP1_206_49902_20121202_000107_inLine +BABEL_OP1_206_49902_20121202_000107_outLine +BABEL_OP1_206_54405_20130522_224053_inLine +BABEL_OP1_206_54405_20130522_224053_outLine +BABEL_OP1_206_56198_20121128_190457_inLine +BABEL_OP1_206_56198_20121128_190457_outLine +BABEL_OP1_206_56429_20121220_005243_inLine +BABEL_OP1_206_56429_20121220_005243_outLine +BABEL_OP1_206_56684_20121212_010900_inLine +BABEL_OP1_206_56684_20121212_010900_outLine +BABEL_OP1_206_58815_20121216_231254_inLine +BABEL_OP1_206_58815_20121216_231254_outLine +BABEL_OP1_206_60538_20121205_021137_inLine +BABEL_OP1_206_60538_20121205_021137_outLine +BABEL_OP1_206_60706_20121128_191751_inLine +BABEL_OP1_206_60706_20121128_191751_outLine +BABEL_OP1_206_61011_20121219_024939_inLine +BABEL_OP1_206_61011_20121219_024939_outLine +BABEL_OP1_206_61219_20121204_234808_inLine +BABEL_OP1_206_61219_20121204_234808_outLine +BABEL_OP1_206_62362_20130301_013214_inLine +BABEL_OP1_206_62362_20130301_013214_outLine +BABEL_OP1_206_63220_20130531_002428_inLine +BABEL_OP1_206_63220_20130531_002428_outLine +BABEL_OP1_206_65692_20121212_230954_inLine +BABEL_OP1_206_65692_20121212_230954_outLine +BABEL_OP1_206_66837_20130111_182531_inLine +BABEL_OP1_206_66837_20130111_182531_outLine +BABEL_OP1_206_66959_20121218_192949_inLine +BABEL_OP1_206_66959_20121218_192949_outLine +BABEL_OP1_206_67066_20130604_231822_inLine +BABEL_OP1_206_67066_20130604_231822_outLine +BABEL_OP1_206_71780_20121219_010817_inLine +BABEL_OP1_206_71780_20121219_010817_outLine +BABEL_OP1_206_77225_20130604_013253_inLine +BABEL_OP1_206_77225_20130604_013253_outLine +BABEL_OP1_206_79858_20121126_013705_inLine +BABEL_OP1_206_79858_20121126_013705_outLine +BABEL_OP1_206_81854_20130122_210400_inLine +BABEL_OP1_206_81854_20130122_210400_outLine +BABEL_OP1_206_82224_20130602_234038_inLine +BABEL_OP1_206_82224_20130602_234038_outLine +BABEL_OP1_206_82966_20121213_231116_inLine +BABEL_OP1_206_82966_20121213_231116_outLine +BABEL_OP1_206_84838_20121210_051040_inLine +BABEL_OP1_206_84838_20121210_051040_outLine +BABEL_OP1_206_85048_20121220_202904_inLine +BABEL_OP1_206_85048_20121220_202904_outLine +BABEL_OP1_206_85340_20121129_000834_inLine +BABEL_OP1_206_85340_20121129_000834_outLine +BABEL_OP1_206_85340_20121129_231533_inLine +BABEL_OP1_206_85340_20121129_231533_outLine +BABEL_OP1_206_92252_20130601_235344_inLine +BABEL_OP1_206_92252_20130601_235344_outLine +BABEL_OP1_206_92886_20121128_042622_inLine +BABEL_OP1_206_92886_20121128_042622_outLine +BABEL_OP1_206_92886_20121128_045107_inLine +BABEL_OP1_206_92886_20121128_045107_outLine +BABEL_OP1_206_93007_20130528_211314_inLine +BABEL_OP1_206_93007_20130528_211314_outLine +BABEL_OP1_206_95490_20130103_005535_inLine +BABEL_OP1_206_95490_20130103_005535_outLine +BABEL_OP1_206_96584_20130121_011505_inLine +BABEL_OP1_206_96584_20130121_011505_outLine +BABEL_OP1_206_97849_20130123_000229_inLine +BABEL_OP1_206_97849_20130123_000229_outLine +BABEL_OP1_206_97988_20121212_223804_inLine +BABEL_OP1_206_97988_20121212_223804_outLine +BABEL_OP1_206_99594_20121220_022404_outLine +BABEL_OP1_206_99718_20121128_213548_inLine +BABEL_OP1_206_99718_20121128_213548_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/eval.list b/egs/babel/s5d/conf/lists/206-zulu/eval.list new file mode 100644 index 00000000000..b75e559d38b --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/eval.list @@ -0,0 +1,202 @@ +BABEL_OP1_206_10019_20121129_221847_inLine +BABEL_OP1_206_10019_20121129_221847_outLine +BABEL_OP1_206_10184_20130530_225826_inLine +BABEL_OP1_206_10184_20130530_225826_outLine +BABEL_OP1_206_10319_20121201_000052_inLine +BABEL_OP1_206_10319_20121201_000052_outLine +BABEL_OP1_206_10319_20121201_002831_inLine +BABEL_OP1_206_10319_20121201_002831_outLine +BABEL_OP1_206_10416_20121229_182422_inLine +BABEL_OP1_206_10416_20121229_182422_outLine +BABEL_OP1_206_13040_20121206_215505_inLine +BABEL_OP1_206_13040_20121206_215505_outLine +BABEL_OP1_206_13040_20121206_221350_inLine +BABEL_OP1_206_13040_20121206_221350_outLine +BABEL_OP1_206_14229_20121220_002130_inLine +BABEL_OP1_206_14229_20121220_002130_outLine +BABEL_OP1_206_14237_20121130_193638_inLine +BABEL_OP1_206_14237_20121130_193638_outLine +BABEL_OP1_206_15926_20121211_205054_inLine +BABEL_OP1_206_15926_20121211_205054_outLine +BABEL_OP1_206_16787_20121220_025209_inLine +BABEL_OP1_206_16787_20121220_025209_outLine +BABEL_OP1_206_17165_20121128_185603_inLine +BABEL_OP1_206_17165_20121128_185603_outLine +BABEL_OP1_206_17573_20121214_234307_inLine +BABEL_OP1_206_17573_20121214_234307_outLine +BABEL_OP1_206_18863_20121214_201427_inLine +BABEL_OP1_206_18863_20121214_201427_outLine +BABEL_OP1_206_19672_20121218_230453_inLine +BABEL_OP1_206_19672_20121218_230453_outLine +BABEL_OP1_206_21794_20121130_183726_inLine +BABEL_OP1_206_21794_20121130_183726_outLine +BABEL_OP1_206_22641_20130605_195037_inLine +BABEL_OP1_206_22641_20130605_195037_outLine +BABEL_OP1_206_23395_20130110_222315_inLine +BABEL_OP1_206_23395_20130110_222315_outLine +BABEL_OP1_206_23628_20121128_215213_inLine +BABEL_OP1_206_23628_20121128_215213_outLine +BABEL_OP1_206_25220_20130528_232132_inLine +BABEL_OP1_206_25220_20130528_232132_outLine +BABEL_OP1_206_26074_20121221_172845_inLine +BABEL_OP1_206_26074_20121221_172845_outLine +BABEL_OP1_206_26478_20130523_003304_inLine +BABEL_OP1_206_26478_20130523_003304_outLine +BABEL_OP1_206_29208_20121220_212757_inLine +BABEL_OP1_206_29208_20121220_212757_outLine +BABEL_OP1_206_29777_20121220_010458_inLine +BABEL_OP1_206_29777_20121220_010458_outLine +BABEL_OP1_206_29777_20121220_012240_inLine +BABEL_OP1_206_29777_20121220_012240_outLine +BABEL_OP1_206_30250_20121129_205052_inLine +BABEL_OP1_206_30250_20121129_205052_outLine +BABEL_OP1_206_31484_20130530_181941_inLine +BABEL_OP1_206_31484_20130530_181941_outLine +BABEL_OP1_206_31979_20130120_174010_inLine +BABEL_OP1_206_31979_20130120_174010_outLine +BABEL_OP1_206_35000_20121220_022037_inLine +BABEL_OP1_206_35000_20121220_022037_outLine +BABEL_OP1_206_35202_20121218_153251_inLine +BABEL_OP1_206_35202_20121218_153251_outLine +BABEL_OP1_206_35706_20130603_175544_inLine +BABEL_OP1_206_35706_20130603_175544_outLine +BABEL_OP1_206_36669_20130528_012812_inLine +BABEL_OP1_206_36669_20130528_012812_outLine +BABEL_OP1_206_37064_20121128_061027_inLine +BABEL_OP1_206_37064_20121128_061027_outLine +BABEL_OP1_206_37064_20121128_224230_inLine +BABEL_OP1_206_37064_20121128_224230_outLine +BABEL_OP1_206_37064_20121128_233033_inLine +BABEL_OP1_206_37064_20121128_233033_outLine +BABEL_OP1_206_40092_20130604_005619_inLine +BABEL_OP1_206_40092_20130604_005619_outLine +BABEL_OP1_206_41741_20121123_161203_inLine +BABEL_OP1_206_41741_20121123_161203_outLine +BABEL_OP1_206_41745_20121206_052354_inLine +BABEL_OP1_206_41745_20121206_052354_outLine +BABEL_OP1_206_42231_20121213_215559_inLine +BABEL_OP1_206_42231_20121213_215559_outLine +BABEL_OP1_206_43920_20130527_173524_inLine +BABEL_OP1_206_43920_20130527_173524_outLine +BABEL_OP1_206_45106_20121207_233620_inLine +BABEL_OP1_206_45106_20121207_233620_outLine +BABEL_OP1_206_45140_20130602_193439_inLine +BABEL_OP1_206_45140_20130602_193439_outLine +BABEL_OP1_206_45777_20121220_211320_inLine +BABEL_OP1_206_45777_20121220_211320_outLine +BABEL_OP1_206_45843_20130103_065538_inLine +BABEL_OP1_206_45843_20130103_065538_outLine +BABEL_OP1_206_46625_20121206_223937_inLine +BABEL_OP1_206_46625_20121206_223937_outLine +BABEL_OP1_206_46712_20121129_221717_inLine +BABEL_OP1_206_46712_20121129_221717_outLine +BABEL_OP1_206_48200_20121218_202643_inLine +BABEL_OP1_206_48200_20121218_202643_outLine +BABEL_OP1_206_48758_20130601_165902_inLine +BABEL_OP1_206_48758_20130601_165902_outLine +BABEL_OP1_206_50962_20121205_031651_inLine +BABEL_OP1_206_50962_20121205_031651_outLine +BABEL_OP1_206_53842_20121203_222845_inLine +BABEL_OP1_206_53842_20121203_222845_outLine +BABEL_OP1_206_54040_20121216_233328_inLine +BABEL_OP1_206_54040_20121216_233328_outLine +BABEL_OP1_206_55742_20121129_210507_inLine +BABEL_OP1_206_55742_20121129_210507_outLine +BABEL_OP1_206_56090_20121130_064154_inLine +BABEL_OP1_206_56090_20121130_064154_outLine +BABEL_OP1_206_56743_20121205_030951_inLine +BABEL_OP1_206_56743_20121205_030951_outLine +BABEL_OP1_206_57650_20130605_164821_inLine +BABEL_OP1_206_57650_20130605_164821_outLine +BABEL_OP1_206_57654_20121201_024813_inLine +BABEL_OP1_206_57654_20121201_024813_outLine +BABEL_OP1_206_59993_20121218_222534_inLine +BABEL_OP1_206_59993_20121218_222534_outLine +BABEL_OP1_206_60282_20130604_201941_inLine +BABEL_OP1_206_60282_20130604_201941_outLine +BABEL_OP1_206_60836_20130523_194516_inLine +BABEL_OP1_206_60836_20130523_194516_outLine +BABEL_OP1_206_62155_20130301_010901_inLine +BABEL_OP1_206_62155_20130301_010901_outLine +BABEL_OP1_206_62835_20121201_223026_inLine +BABEL_OP1_206_62835_20121201_223026_outLine +BABEL_OP1_206_66967_20121128_215012_inLine +BABEL_OP1_206_66967_20121128_215012_outLine +BABEL_OP1_206_67842_20130523_231054_inLine +BABEL_OP1_206_67842_20130523_231054_outLine +BABEL_OP1_206_71282_20121219_154752_inLine +BABEL_OP1_206_71282_20121219_154752_outLine +BABEL_OP1_206_71333_20121219_195507_inLine +BABEL_OP1_206_71333_20121219_195507_outLine +BABEL_OP1_206_71333_20121219_202710_inLine +BABEL_OP1_206_71333_20121219_202710_outLine +BABEL_OP1_206_71333_20121220_020603_inLine +BABEL_OP1_206_71333_20121220_020603_outLine +BABEL_OP1_206_71704_20121203_210805_inLine +BABEL_OP1_206_71704_20121203_210805_outLine +BABEL_OP1_206_73042_20130528_223845_inLine +BABEL_OP1_206_73042_20130528_223845_outLine +BABEL_OP1_206_73622_20121203_233522_inLine +BABEL_OP1_206_73622_20121203_233522_outLine +BABEL_OP1_206_73837_20121202_232509_inLine +BABEL_OP1_206_73837_20121202_232509_outLine +BABEL_OP1_206_73837_20121202_234026_inLine +BABEL_OP1_206_73837_20121202_234026_outLine +BABEL_OP1_206_74111_20130527_210704_inLine +BABEL_OP1_206_74111_20130527_210704_outLine +BABEL_OP1_206_74641_20130601_192414_inLine +BABEL_OP1_206_74641_20130601_192414_outLine +BABEL_OP1_206_76773_20121219_022906_inLine +BABEL_OP1_206_76773_20121219_022906_outLine +BABEL_OP1_206_78630_20130420_211941_inLine +BABEL_OP1_206_78630_20130420_211941_outLine +BABEL_OP1_206_78976_20121206_005749_inLine +BABEL_OP1_206_78976_20121206_005749_outLine +BABEL_OP1_206_79820_20121127_235837_inLine +BABEL_OP1_206_79820_20121127_235837_outLine +BABEL_OP1_206_81392_20121219_022235_inLine +BABEL_OP1_206_81392_20121219_022235_outLine +BABEL_OP1_206_81404_20121215_230948_inLine +BABEL_OP1_206_81404_20121215_230948_outLine +BABEL_OP1_206_84125_20121201_213358_inLine +BABEL_OP1_206_84125_20121201_213358_outLine +BABEL_OP1_206_88873_20121129_222922_inLine +BABEL_OP1_206_88873_20121129_222922_outLine +BABEL_OP1_206_89045_20121201_221210_inLine +BABEL_OP1_206_89045_20121201_221210_outLine +BABEL_OP1_206_89045_20121201_222746_inLine +BABEL_OP1_206_89045_20121201_222746_outLine +BABEL_OP1_206_89372_20130103_022242_inLine +BABEL_OP1_206_89372_20130103_022242_outLine +BABEL_OP1_206_90935_20121207_230747_inLine +BABEL_OP1_206_90935_20121207_230747_outLine +BABEL_OP1_206_91593_20130602_212217_inLine +BABEL_OP1_206_91593_20130602_212217_outLine +BABEL_OP1_206_91884_20130531_175329_inLine +BABEL_OP1_206_91884_20130531_175329_outLine +BABEL_OP1_206_92698_20121128_234824_inLine +BABEL_OP1_206_92698_20121128_234824_outLine +BABEL_OP1_206_92698_20121129_000933_inLine +BABEL_OP1_206_92698_20121129_000933_outLine +BABEL_OP1_206_93153_20130524_203739_inLine +BABEL_OP1_206_93153_20130524_203739_outLine +BABEL_OP1_206_93946_20130531_215200_inLine +BABEL_OP1_206_93946_20130531_215200_outLine +BABEL_OP1_206_94002_20121208_002204_inLine +BABEL_OP1_206_94002_20121208_002204_outLine +BABEL_OP1_206_95399_20130528_171818_inLine +BABEL_OP1_206_95399_20130528_171818_outLine +BABEL_OP1_206_96205_20121217_165620_inLine +BABEL_OP1_206_96205_20121217_165620_outLine +BABEL_OP1_206_96205_20121217_171026_inLine +BABEL_OP1_206_96205_20121217_171026_outLine +BABEL_OP1_206_96504_20121207_214704_inLine +BABEL_OP1_206_96504_20121207_214704_outLine +BABEL_OP1_206_98580_20121201_203508_inLine +BABEL_OP1_206_98580_20121201_203508_outLine +BABEL_OP1_206_98888_20130603_202859_inLine +BABEL_OP1_206_98888_20130603_202859_outLine +BABEL_OP1_206_99401_20121123_043326_inLine +BABEL_OP1_206_99401_20121123_043326_outLine +BABEL_OP1_206_99732_20121220_033454_inLine +BABEL_OP1_206_99732_20121220_033454_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/evalpart1.list b/egs/babel/s5d/conf/lists/206-zulu/evalpart1.list new file mode 100644 index 00000000000..6b6bf451b3e --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/evalpart1.list @@ -0,0 +1,72 @@ +BABEL_OP1_206_13040_20121206_215505_inLine +BABEL_OP1_206_13040_20121206_215505_outLine +BABEL_OP1_206_13040_20121206_221350_inLine +BABEL_OP1_206_13040_20121206_221350_outLine +BABEL_OP1_206_18863_20121214_201427_inLine +BABEL_OP1_206_18863_20121214_201427_outLine +BABEL_OP1_206_19672_20121218_230453_inLine +BABEL_OP1_206_19672_20121218_230453_outLine +BABEL_OP1_206_21794_20121130_183726_inLine +BABEL_OP1_206_21794_20121130_183726_outLine +BABEL_OP1_206_23395_20130110_222315_inLine +BABEL_OP1_206_23395_20130110_222315_outLine +BABEL_OP1_206_23628_20121128_215213_inLine +BABEL_OP1_206_23628_20121128_215213_outLine +BABEL_OP1_206_30250_20121129_205052_inLine +BABEL_OP1_206_30250_20121129_205052_outLine +BABEL_OP1_206_31979_20130120_174010_inLine +BABEL_OP1_206_31979_20130120_174010_outLine +BABEL_OP1_206_35202_20121218_153251_inLine +BABEL_OP1_206_35202_20121218_153251_outLine +BABEL_OP1_206_37064_20121128_061027_inLine +BABEL_OP1_206_37064_20121128_061027_outLine +BABEL_OP1_206_37064_20121128_224230_inLine +BABEL_OP1_206_37064_20121128_224230_outLine +BABEL_OP1_206_37064_20121128_233033_inLine +BABEL_OP1_206_37064_20121128_233033_outLine +BABEL_OP1_206_41745_20121206_052354_inLine +BABEL_OP1_206_41745_20121206_052354_outLine +BABEL_OP1_206_45140_20130602_193439_inLine +BABEL_OP1_206_45140_20130602_193439_outLine +BABEL_OP1_206_45777_20121220_211320_inLine +BABEL_OP1_206_45777_20121220_211320_outLine +BABEL_OP1_206_48758_20130601_165902_inLine +BABEL_OP1_206_48758_20130601_165902_outLine +BABEL_OP1_206_55742_20121129_210507_inLine +BABEL_OP1_206_55742_20121129_210507_outLine +BABEL_OP1_206_57650_20130605_164821_inLine +BABEL_OP1_206_57650_20130605_164821_outLine +BABEL_OP1_206_57654_20121201_024813_inLine +BABEL_OP1_206_57654_20121201_024813_outLine +BABEL_OP1_206_62155_20130301_010901_inLine +BABEL_OP1_206_62155_20130301_010901_outLine +BABEL_OP1_206_62835_20121201_223026_inLine +BABEL_OP1_206_62835_20121201_223026_outLine +BABEL_OP1_206_71333_20121219_195507_inLine +BABEL_OP1_206_71333_20121219_195507_outLine +BABEL_OP1_206_71333_20121219_202710_inLine +BABEL_OP1_206_71333_20121219_202710_outLine +BABEL_OP1_206_71333_20121220_020603_inLine +BABEL_OP1_206_71333_20121220_020603_outLine +BABEL_OP1_206_71704_20121203_210805_inLine +BABEL_OP1_206_71704_20121203_210805_outLine +BABEL_OP1_206_73622_20121203_233522_inLine +BABEL_OP1_206_73622_20121203_233522_outLine +BABEL_OP1_206_73837_20121202_232509_inLine +BABEL_OP1_206_73837_20121202_232509_outLine +BABEL_OP1_206_73837_20121202_234026_inLine +BABEL_OP1_206_73837_20121202_234026_outLine +BABEL_OP1_206_78630_20130420_211941_inLine +BABEL_OP1_206_78630_20130420_211941_outLine +BABEL_OP1_206_78976_20121206_005749_inLine +BABEL_OP1_206_78976_20121206_005749_outLine +BABEL_OP1_206_81392_20121219_022235_inLine +BABEL_OP1_206_81392_20121219_022235_outLine +BABEL_OP1_206_88873_20121129_222922_inLine +BABEL_OP1_206_88873_20121129_222922_outLine +BABEL_OP1_206_90935_20121207_230747_inLine +BABEL_OP1_206_90935_20121207_230747_outLine +BABEL_OP1_206_98580_20121201_203508_inLine +BABEL_OP1_206_98580_20121201_203508_outLine +BABEL_OP1_206_98888_20130603_202859_inLine +BABEL_OP1_206_98888_20130603_202859_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.FullLP.list b/egs/babel/s5d/conf/lists/206-zulu/train.FullLP.list new file mode 100644 index 00000000000..f47e8d654e1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.FullLP.list @@ -0,0 +1,829 @@ +BABEL_OP1_206_10901_20121128_230024_inLine +BABEL_OP1_206_10901_20121128_230024_outLine +BABEL_OP1_206_10901_20121129_003238_inLine +BABEL_OP1_206_10901_20121129_003238_outLine +BABEL_OP1_206_10966_20121205_213021_inLine +BABEL_OP1_206_10966_20121205_213021_outLine +BABEL_OP1_206_10966_20121205_214750_inLine +BABEL_OP1_206_10966_20121205_214750_outLine +BABEL_OP1_206_11581_20121213_020058_inLine +BABEL_OP1_206_11581_20121213_020058_outLine +BABEL_OP1_206_11797_20121207_001426_inLine +BABEL_OP1_206_11797_20121207_001426_outLine +BABEL_OP1_206_11797_20121207_002917_inLine +BABEL_OP1_206_11797_20121207_002917_outLine +BABEL_OP1_206_11859_20130602_013210_inLine +BABEL_OP1_206_11859_20130602_013210_outLine +BABEL_OP1_206_12242_20121218_022109_inLine +BABEL_OP1_206_12242_20121218_022109_outLine +BABEL_OP1_206_12851_20121215_010712_inLine +BABEL_OP1_206_12851_20121215_010712_outLine +BABEL_OP1_206_13030_20121129_225418_inLine +BABEL_OP1_206_13030_20121129_225418_outLine +BABEL_OP1_206_13184_20121216_223430_inLine +BABEL_OP1_206_13184_20121216_223430_outLine +BABEL_OP1_206_13184_20121216_224722_inLine +BABEL_OP1_206_13184_20121216_224722_outLine +BABEL_OP1_206_13483_20121219_205820_inLine +BABEL_OP1_206_13483_20121219_205820_outLine +BABEL_OP1_206_13483_20121219_212915_inLine +BABEL_OP1_206_13483_20121219_212915_outLine +BABEL_OP1_206_13490_20121221_005743_inLine +BABEL_OP1_206_13490_20121221_005743_outLine +BABEL_OP1_206_13744_20121205_205818_inLine +BABEL_OP1_206_13744_20121205_205818_outLine +BABEL_OP1_206_14137_20130118_010712_inLine +BABEL_OP1_206_14137_20130118_010712_outLine +BABEL_OP1_206_14137_20130122_014528_inLine +BABEL_OP1_206_14137_20130122_014528_outLine +BABEL_OP1_206_14179_20121210_224630_inLine +BABEL_OP1_206_14179_20121210_224630_outLine +BABEL_OP1_206_14440_20121218_231347_inLine +BABEL_OP1_206_14440_20121218_231347_outLine +BABEL_OP1_206_14719_20121213_040757_inLine +BABEL_OP1_206_14719_20121213_040757_outLine +BABEL_OP1_206_14729_20130531_183022_inLine +BABEL_OP1_206_14729_20130531_183022_outLine +BABEL_OP1_206_14807_20121221_150943_inLine +BABEL_OP1_206_14807_20121221_150943_outLine +BABEL_OP1_206_14814_20121129_203954_inLine +BABEL_OP1_206_14814_20121129_203954_outLine +BABEL_OP1_206_14899_20121203_021835_inLine +BABEL_OP1_206_14899_20121203_021835_outLine +BABEL_OP1_206_14929_20121203_232411_inLine +BABEL_OP1_206_14929_20121203_232411_outLine +BABEL_OP1_206_15024_20130527_234410_inLine +BABEL_OP1_206_15024_20130527_234410_outLine +BABEL_OP1_206_15324_20121208_010033_inLine +BABEL_OP1_206_15324_20121208_010033_outLine +BABEL_OP1_206_15702_20121214_225618_inLine +BABEL_OP1_206_15702_20121214_225618_outLine +BABEL_OP1_206_15702_20121214_231152_inLine +BABEL_OP1_206_15702_20121214_231152_outLine +BABEL_OP1_206_15702_20121214_232449_inLine +BABEL_OP1_206_15702_20121214_232449_outLine +BABEL_OP1_206_16149_20121201_010342_inLine +BABEL_OP1_206_16149_20121201_010342_outLine +BABEL_OP1_206_16467_20130531_200137_inLine +BABEL_OP1_206_16467_20130531_200137_outLine +BABEL_OP1_206_16475_20130121_210828_inLine +BABEL_OP1_206_16475_20130121_210828_outLine +BABEL_OP1_206_16475_20130121_212136_inLine +BABEL_OP1_206_16475_20130121_212136_outLine +BABEL_OP1_206_16839_20121217_170534_inLine +BABEL_OP1_206_16839_20121217_170534_outLine +BABEL_OP1_206_16886_20130524_232154_inLine +BABEL_OP1_206_16886_20130524_232154_outLine +BABEL_OP1_206_17032_20121219_220514_inLine +BABEL_OP1_206_17032_20121219_220514_outLine +BABEL_OP1_206_17280_20130527_191437_inLine +BABEL_OP1_206_17280_20130527_191437_outLine +BABEL_OP1_206_17440_20121227_213432_inLine +BABEL_OP1_206_17440_20121227_213432_outLine +BABEL_OP1_206_17472_20121214_193824_inLine +BABEL_OP1_206_17472_20121214_193824_outLine +BABEL_OP1_206_17567_20121209_205317_inLine +BABEL_OP1_206_17567_20121209_205317_outLine +BABEL_OP1_206_17567_20121209_211139_inLine +BABEL_OP1_206_17567_20121209_211139_outLine +BABEL_OP1_206_17615_20121214_193534_inLine +BABEL_OP1_206_17615_20121214_193534_outLine +BABEL_OP1_206_17881_20130121_005313_inLine +BABEL_OP1_206_17881_20130121_005313_outLine +BABEL_OP1_206_17923_20121130_214207_inLine +BABEL_OP1_206_17923_20121130_214207_outLine +BABEL_OP1_206_18291_20130604_183732_inLine +BABEL_OP1_206_18291_20130604_183732_outLine +BABEL_OP1_206_19722_20121130_203924_inLine +BABEL_OP1_206_19722_20121130_203924_outLine +BABEL_OP1_206_19773_20130101_015259_inLine +BABEL_OP1_206_19773_20130101_015259_outLine +BABEL_OP1_206_19818_20130529_204811_inLine +BABEL_OP1_206_19818_20130529_204811_outLine +BABEL_OP1_206_19877_20130123_175339_inLine +BABEL_OP1_206_19877_20130123_175339_outLine +BABEL_OP1_206_19877_20130123_181047_inLine +BABEL_OP1_206_19877_20130123_181047_outLine +BABEL_OP1_206_20682_20121213_030430_inLine +BABEL_OP1_206_20682_20121213_030430_outLine +BABEL_OP1_206_20800_20130523_220352_inLine +BABEL_OP1_206_20800_20130523_220352_outLine +BABEL_OP1_206_20916_20121205_203848_inLine +BABEL_OP1_206_20916_20121205_203848_outLine +BABEL_OP1_206_20922_20121214_231110_inLine +BABEL_OP1_206_20922_20121214_231110_outLine +BABEL_OP1_206_21004_20121210_215455_inLine +BABEL_OP1_206_21004_20121210_215455_outLine +BABEL_OP1_206_21004_20121210_223449_inLine +BABEL_OP1_206_21004_20121210_223449_outLine +BABEL_OP1_206_21206_20121220_001511_inLine +BABEL_OP1_206_21206_20121220_001511_outLine +BABEL_OP1_206_21327_20130111_022748_inLine +BABEL_OP1_206_21327_20130111_022748_outLine +BABEL_OP1_206_21892_20121213_235725_inLine +BABEL_OP1_206_21892_20121213_235725_outLine +BABEL_OP1_206_22494_20130530_004456_inLine +BABEL_OP1_206_22494_20130530_004456_outLine +BABEL_OP1_206_22624_20121219_210041_inLine +BABEL_OP1_206_22624_20121219_210041_outLine +BABEL_OP1_206_22826_20130121_231859_inLine +BABEL_OP1_206_22826_20130121_231859_outLine +BABEL_OP1_206_22826_20130121_233139_inLine +BABEL_OP1_206_22826_20130121_233139_outLine +BABEL_OP1_206_22965_20121128_011001_inLine +BABEL_OP1_206_22965_20121128_011001_outLine +BABEL_OP1_206_22965_20121128_012241_inLine +BABEL_OP1_206_22965_20121128_012241_outLine +BABEL_OP1_206_23006_20121203_004250_inLine +BABEL_OP1_206_23006_20121203_004250_outLine +BABEL_OP1_206_23006_20121203_073608_inLine +BABEL_OP1_206_23006_20121203_073608_outLine +BABEL_OP1_206_23092_20121227_211821_inLine +BABEL_OP1_206_23092_20121227_211821_outLine +BABEL_OP1_206_23151_20121217_034512_inLine +BABEL_OP1_206_23151_20121217_034512_outLine +BABEL_OP1_206_23153_20130102_224836_inLine +BABEL_OP1_206_23153_20130102_224836_outLine +BABEL_OP1_206_23190_20121219_204325_inLine +BABEL_OP1_206_23190_20121219_204325_outLine +BABEL_OP1_206_23239_20130118_000831_inLine +BABEL_OP1_206_23239_20130118_000831_outLine +BABEL_OP1_206_23505_20121203_010039_inLine +BABEL_OP1_206_23505_20121203_010039_outLine +BABEL_OP1_206_24253_20130120_235750_inLine +BABEL_OP1_206_24253_20130120_235750_outLine +BABEL_OP1_206_24253_20130121_000835_inLine +BABEL_OP1_206_24253_20130121_000835_outLine +BABEL_OP1_206_24253_20130121_012503_inLine +BABEL_OP1_206_24253_20130121_012503_outLine +BABEL_OP1_206_24323_20121214_212407_inLine +BABEL_OP1_206_24323_20121214_212407_outLine +BABEL_OP1_206_24323_20121214_213448_inLine +BABEL_OP1_206_24323_20121214_213448_outLine +BABEL_OP1_206_24532_20121201_203102_inLine +BABEL_OP1_206_24532_20121201_203102_outLine +BABEL_OP1_206_24569_20121210_211659_inLine +BABEL_OP1_206_24569_20121210_211659_outLine +BABEL_OP1_206_24590_20121201_210938_inLine +BABEL_OP1_206_24590_20121201_210938_outLine +BABEL_OP1_206_24590_20121201_215618_inLine +BABEL_OP1_206_24590_20121201_215618_outLine +BABEL_OP1_206_24605_20121218_201807_inLine +BABEL_OP1_206_24605_20121218_201807_outLine +BABEL_OP1_206_24982_20130603_194918_inLine +BABEL_OP1_206_24982_20130603_194918_outLine +BABEL_OP1_206_25412_20121210_201120_inLine +BABEL_OP1_206_25412_20121210_201120_outLine +BABEL_OP1_206_25412_20121210_203544_inLine +BABEL_OP1_206_25412_20121210_203544_outLine +BABEL_OP1_206_25496_20130529_000539_inLine +BABEL_OP1_206_25496_20130529_000539_outLine +BABEL_OP1_206_25698_20130603_011444_inLine +BABEL_OP1_206_25698_20130603_011444_outLine +BABEL_OP1_206_25719_20121215_000803_inLine +BABEL_OP1_206_25719_20121215_000803_outLine +BABEL_OP1_206_25767_20121204_021252_inLine +BABEL_OP1_206_25767_20121204_021252_outLine +BABEL_OP1_206_25961_20121202_232650_inLine +BABEL_OP1_206_25961_20121202_232650_outLine +BABEL_OP1_206_25961_20121202_234202_inLine +BABEL_OP1_206_25961_20121202_234202_outLine +BABEL_OP1_206_26206_20130529_172847_inLine +BABEL_OP1_206_26206_20130529_172847_outLine +BABEL_OP1_206_26388_20121202_191806_inLine +BABEL_OP1_206_26388_20121202_191806_outLine +BABEL_OP1_206_26836_20121201_210310_inLine +BABEL_OP1_206_26836_20121201_210310_outLine +BABEL_OP1_206_27042_20121219_230502_inLine +BABEL_OP1_206_27042_20121219_230502_outLine +BABEL_OP1_206_27082_20121220_012037_inLine +BABEL_OP1_206_27082_20121220_012037_outLine +BABEL_OP1_206_27125_20121203_012043_inLine +BABEL_OP1_206_27125_20121203_012043_outLine +BABEL_OP1_206_27203_20121214_210018_inLine +BABEL_OP1_206_27203_20121214_210018_outLine +BABEL_OP1_206_27590_20121216_180900_inLine +BABEL_OP1_206_27590_20121216_180900_outLine +BABEL_OP1_206_27841_20121216_014031_inLine +BABEL_OP1_206_27841_20121216_014031_outLine +BABEL_OP1_206_28303_20121128_201831_inLine +BABEL_OP1_206_28303_20121128_201831_outLine +BABEL_OP1_206_28419_20121207_221153_inLine +BABEL_OP1_206_28419_20121207_221153_outLine +BABEL_OP1_206_28775_20121203_022428_inLine +BABEL_OP1_206_28775_20121203_022428_outLine +BABEL_OP1_206_28945_20130118_003100_inLine +BABEL_OP1_206_28945_20130118_003100_outLine +BABEL_OP1_206_29023_20121201_234219_inLine +BABEL_OP1_206_29023_20121201_234219_outLine +BABEL_OP1_206_29039_20121220_013046_inLine +BABEL_OP1_206_29039_20121220_013046_outLine +BABEL_OP1_206_29135_20121219_224133_inLine +BABEL_OP1_206_29135_20121219_224133_outLine +BABEL_OP1_206_29323_20121219_201726_inLine +BABEL_OP1_206_29323_20121219_201726_outLine +BABEL_OP1_206_29323_20121219_203137_inLine +BABEL_OP1_206_29323_20121219_203137_outLine +BABEL_OP1_206_30395_20121206_014115_inLine +BABEL_OP1_206_30395_20121206_014115_outLine +BABEL_OP1_206_30869_20121227_221910_inLine +BABEL_OP1_206_30869_20121227_221910_outLine +BABEL_OP1_206_31109_20121224_061142_inLine +BABEL_OP1_206_31109_20121224_061142_outLine +BABEL_OP1_206_31490_20121128_234650_inLine +BABEL_OP1_206_31490_20121128_234650_outLine +BABEL_OP1_206_31624_20121123_081518_inLine +BABEL_OP1_206_31624_20121123_081518_outLine +BABEL_OP1_206_31628_20130528_194548_inLine +BABEL_OP1_206_31628_20130528_194548_outLine +BABEL_OP1_206_32122_20121128_184757_inLine +BABEL_OP1_206_32122_20121128_184757_outLine +BABEL_OP1_206_32301_20130530_191142_inLine +BABEL_OP1_206_32301_20130530_191142_outLine +BABEL_OP1_206_32328_20121215_181911_inLine +BABEL_OP1_206_32328_20121215_181911_outLine +BABEL_OP1_206_32708_20121231_225706_inLine +BABEL_OP1_206_32708_20121231_225706_outLine +BABEL_OP1_206_32837_20121213_221825_inLine +BABEL_OP1_206_32837_20121213_221825_outLine +BABEL_OP1_206_32837_20121213_223037_inLine +BABEL_OP1_206_32837_20121213_223037_outLine +BABEL_OP1_206_33111_20130601_200233_inLine +BABEL_OP1_206_33111_20130601_200233_outLine +BABEL_OP1_206_33273_20121129_201318_inLine +BABEL_OP1_206_33273_20121129_201318_outLine +BABEL_OP1_206_33355_20121130_055943_inLine +BABEL_OP1_206_33355_20121130_055943_outLine +BABEL_OP1_206_33672_20130524_171145_inLine +BABEL_OP1_206_33672_20130524_171145_outLine +BABEL_OP1_206_33704_20121213_214430_inLine +BABEL_OP1_206_33704_20121213_214430_outLine +BABEL_OP1_206_33840_20121213_230741_inLine +BABEL_OP1_206_33840_20121213_230741_outLine +BABEL_OP1_206_34197_20121128_232538_inLine +BABEL_OP1_206_34197_20121128_232538_outLine +BABEL_OP1_206_34328_20121202_184915_inLine +BABEL_OP1_206_34328_20121202_184915_outLine +BABEL_OP1_206_34564_20121214_020257_inLine +BABEL_OP1_206_34564_20121214_020257_outLine +BABEL_OP1_206_34679_20121206_000152_inLine +BABEL_OP1_206_34679_20121206_000152_outLine +BABEL_OP1_206_34826_20121215_005505_inLine +BABEL_OP1_206_34826_20121215_005505_outLine +BABEL_OP1_206_35008_20121216_210449_inLine +BABEL_OP1_206_35008_20121216_210449_outLine +BABEL_OP1_206_36505_20121213_222927_inLine +BABEL_OP1_206_36505_20121213_222927_outLine +BABEL_OP1_206_36894_20121128_201825_inLine +BABEL_OP1_206_36894_20121128_201825_outLine +BABEL_OP1_206_37598_20130111_224005_inLine +BABEL_OP1_206_37598_20130111_224005_outLine +BABEL_OP1_206_38431_20121214_013939_inLine +BABEL_OP1_206_38431_20121214_013939_outLine +BABEL_OP1_206_38554_20121123_025415_inLine +BABEL_OP1_206_38554_20121123_025415_outLine +BABEL_OP1_206_38689_20121217_013737_inLine +BABEL_OP1_206_38689_20121217_013737_outLine +BABEL_OP1_206_38878_20130530_172309_inLine +BABEL_OP1_206_38878_20130530_172309_outLine +BABEL_OP1_206_39059_20121215_230057_inLine +BABEL_OP1_206_39059_20121215_230057_outLine +BABEL_OP1_206_39059_20121216_000252_inLine +BABEL_OP1_206_39059_20121216_000252_outLine +BABEL_OP1_206_39307_20121207_024156_inLine +BABEL_OP1_206_39307_20121207_024156_outLine +BABEL_OP1_206_39426_20130120_232407_inLine +BABEL_OP1_206_39426_20130120_232407_outLine +BABEL_OP1_206_39426_20130120_233651_inLine +BABEL_OP1_206_39426_20130120_233651_outLine +BABEL_OP1_206_40557_20121218_025254_inLine +BABEL_OP1_206_40557_20121218_025254_outLine +BABEL_OP1_206_40713_20121129_215041_inLine +BABEL_OP1_206_40713_20121129_215041_outLine +BABEL_OP1_206_41097_20121215_173120_inLine +BABEL_OP1_206_41097_20121215_173120_outLine +BABEL_OP1_206_41174_20130604_193434_inLine +BABEL_OP1_206_41174_20130604_193434_outLine +BABEL_OP1_206_41233_20121215_001846_inLine +BABEL_OP1_206_41233_20121215_001846_outLine +BABEL_OP1_206_41598_20130102_233834_inLine +BABEL_OP1_206_41598_20130102_233834_outLine +BABEL_OP1_206_42029_20121220_181050_inLine +BABEL_OP1_206_42029_20121220_181050_outLine +BABEL_OP1_206_42434_20121202_195754_inLine +BABEL_OP1_206_42434_20121202_195754_outLine +BABEL_OP1_206_42434_20121202_202540_inLine +BABEL_OP1_206_42434_20121202_202540_outLine +BABEL_OP1_206_42619_20121213_204854_inLine +BABEL_OP1_206_42619_20121213_204854_outLine +BABEL_OP1_206_42771_20130601_203101_inLine +BABEL_OP1_206_42771_20130601_203101_outLine +BABEL_OP1_206_42834_20121219_015826_inLine +BABEL_OP1_206_42834_20121219_015826_outLine +BABEL_OP1_206_43286_20121125_054930_inLine +BABEL_OP1_206_43286_20121125_054930_outLine +BABEL_OP1_206_43286_20121125_060858_inLine +BABEL_OP1_206_43286_20121125_060858_outLine +BABEL_OP1_206_43286_20121126_003810_inLine +BABEL_OP1_206_43286_20121126_003810_outLine +BABEL_OP1_206_43368_20121128_203447_inLine +BABEL_OP1_206_43368_20121128_203447_outLine +BABEL_OP1_206_43784_20121230_224515_inLine +BABEL_OP1_206_43784_20121230_224515_outLine +BABEL_OP1_206_43788_20121223_235436_inLine +BABEL_OP1_206_43788_20121223_235436_outLine +BABEL_OP1_206_44477_20121228_020003_inLine +BABEL_OP1_206_44477_20121228_020003_outLine +BABEL_OP1_206_44619_20121129_201028_inLine +BABEL_OP1_206_44619_20121129_201028_outLine +BABEL_OP1_206_44619_20121129_203209_inLine +BABEL_OP1_206_44619_20121129_203209_outLine +BABEL_OP1_206_45235_20121213_044536_inLine +BABEL_OP1_206_45235_20121213_044536_outLine +BABEL_OP1_206_45536_20121212_023751_inLine +BABEL_OP1_206_45536_20121212_023751_outLine +BABEL_OP1_206_45560_20121210_054617_inLine +BABEL_OP1_206_45560_20121210_054617_outLine +BABEL_OP1_206_45770_20121205_213203_inLine +BABEL_OP1_206_45770_20121205_213203_outLine +BABEL_OP1_206_45851_20130123_013016_inLine +BABEL_OP1_206_45851_20130123_013016_outLine +BABEL_OP1_206_46066_20121218_015244_outLine +BABEL_OP1_206_46066_20121218_020520_inLine +BABEL_OP1_206_46066_20121218_020520_outLine +BABEL_OP1_206_46261_20130524_180914_inLine +BABEL_OP1_206_46261_20130524_180914_outLine +BABEL_OP1_206_46330_20121220_171612_inLine +BABEL_OP1_206_46330_20121220_171612_outLine +BABEL_OP1_206_46558_20121125_000809_inLine +BABEL_OP1_206_46558_20121125_000809_outLine +BABEL_OP1_206_46688_20121130_222025_inLine +BABEL_OP1_206_46688_20121130_222025_outLine +BABEL_OP1_206_46770_20121213_030348_inLine +BABEL_OP1_206_46770_20121213_030348_outLine +BABEL_OP1_206_46976_20121222_002626_inLine +BABEL_OP1_206_46976_20121222_002626_outLine +BABEL_OP1_206_47186_20121214_212658_inLine +BABEL_OP1_206_47186_20121214_212658_outLine +BABEL_OP1_206_47215_20121129_232526_inLine +BABEL_OP1_206_47215_20121129_232526_outLine +BABEL_OP1_206_47487_20121127_232736_inLine +BABEL_OP1_206_47487_20121127_232736_outLine +BABEL_OP1_206_47802_20121213_220928_inLine +BABEL_OP1_206_47802_20121213_220928_outLine +BABEL_OP1_206_47878_20121221_153159_inLine +BABEL_OP1_206_47878_20121221_153159_outLine +BABEL_OP1_206_48789_20121202_173639_inLine +BABEL_OP1_206_48789_20121202_173639_outLine +BABEL_OP1_206_48844_20121123_030435_inLine +BABEL_OP1_206_48844_20121123_030435_outLine +BABEL_OP1_206_48844_20121204_030447_inLine +BABEL_OP1_206_48844_20121204_030447_outLine +BABEL_OP1_206_49001_20121128_201907_inLine +BABEL_OP1_206_49001_20121128_201907_outLine +BABEL_OP1_206_49287_20121219_204754_inLine +BABEL_OP1_206_49287_20121219_204754_outLine +BABEL_OP1_206_49870_20130605_000829_inLine +BABEL_OP1_206_49870_20130605_000829_outLine +BABEL_OP1_206_49907_20121128_055731_inLine +BABEL_OP1_206_49907_20121128_055731_outLine +BABEL_OP1_206_49912_20130603_002155_inLine +BABEL_OP1_206_49912_20130603_002155_outLine +BABEL_OP1_206_50090_20121210_232617_inLine +BABEL_OP1_206_50090_20121210_232617_outLine +BABEL_OP1_206_50090_20121210_234419_inLine +BABEL_OP1_206_50090_20121210_234419_outLine +BABEL_OP1_206_50175_20130604_165733_inLine +BABEL_OP1_206_50175_20130604_165733_outLine +BABEL_OP1_206_50565_20121206_213949_inLine +BABEL_OP1_206_50565_20121206_213949_outLine +BABEL_OP1_206_50565_20121206_215103_inLine +BABEL_OP1_206_50565_20121206_215103_outLine +BABEL_OP1_206_50565_20121206_221547_inLine +BABEL_OP1_206_50565_20121206_221547_outLine +BABEL_OP1_206_50601_20121219_030519_inLine +BABEL_OP1_206_50601_20121219_030519_outLine +BABEL_OP1_206_50681_20121222_003908_inLine +BABEL_OP1_206_50681_20121222_003908_outLine +BABEL_OP1_206_50726_20130103_015437_inLine +BABEL_OP1_206_50726_20130103_015437_outLine +BABEL_OP1_206_51015_20121216_025307_inLine +BABEL_OP1_206_51015_20121216_025307_outLine +BABEL_OP1_206_51484_20121213_023814_inLine +BABEL_OP1_206_51484_20121213_023814_outLine +BABEL_OP1_206_51540_20121212_225359_inLine +BABEL_OP1_206_51540_20121212_225359_outLine +BABEL_OP1_206_51955_20121219_004818_inLine +BABEL_OP1_206_51955_20121219_004818_outLine +BABEL_OP1_206_52422_20121220_034724_inLine +BABEL_OP1_206_52422_20121220_034724_outLine +BABEL_OP1_206_52694_20130523_175759_inLine +BABEL_OP1_206_52694_20130523_175759_outLine +BABEL_OP1_206_52804_20121201_184720_inLine +BABEL_OP1_206_52804_20121201_184720_outLine +BABEL_OP1_206_52818_20121228_012038_inLine +BABEL_OP1_206_52818_20121228_012038_outLine +BABEL_OP1_206_52854_20121128_034458_inLine +BABEL_OP1_206_52854_20121128_034458_outLine +BABEL_OP1_206_52854_20121206_214928_inLine +BABEL_OP1_206_52854_20121206_214928_outLine +BABEL_OP1_206_52854_20121206_224251_inLine +BABEL_OP1_206_52854_20121206_224251_outLine +BABEL_OP1_206_52932_20121128_045304_inLine +BABEL_OP1_206_52932_20121128_045304_outLine +BABEL_OP1_206_52932_20121128_233739_inLine +BABEL_OP1_206_52932_20121128_233739_outLine +BABEL_OP1_206_53957_20130522_194644_inLine +BABEL_OP1_206_53957_20130522_194644_outLine +BABEL_OP1_206_54104_20130102_215440_inLine +BABEL_OP1_206_54104_20130102_215440_outLine +BABEL_OP1_206_54162_20121220_230656_inLine +BABEL_OP1_206_54162_20121220_230656_outLine +BABEL_OP1_206_54390_20121130_203012_inLine +BABEL_OP1_206_54390_20121130_203012_outLine +BABEL_OP1_206_54477_20121212_013137_inLine +BABEL_OP1_206_54477_20121212_013137_outLine +BABEL_OP1_206_54530_20130531_233153_inLine +BABEL_OP1_206_54530_20130531_233153_outLine +BABEL_OP1_206_54697_20121228_003256_inLine +BABEL_OP1_206_54697_20121228_003256_outLine +BABEL_OP1_206_54744_20130103_035406_inLine +BABEL_OP1_206_54744_20130103_035406_outLine +BABEL_OP1_206_54953_20121205_023337_inLine +BABEL_OP1_206_54953_20121205_023337_outLine +BABEL_OP1_206_55259_20130118_022049_inLine +BABEL_OP1_206_55259_20130118_022049_outLine +BABEL_OP1_206_55259_20130118_023307_inLine +BABEL_OP1_206_55259_20130118_023307_outLine +BABEL_OP1_206_55818_20121130_051150_inLine +BABEL_OP1_206_55818_20121130_051150_outLine +BABEL_OP1_206_55818_20121130_054331_inLine +BABEL_OP1_206_55818_20121130_054331_outLine +BABEL_OP1_206_55968_20121204_204317_inLine +BABEL_OP1_206_55968_20121204_204317_outLine +BABEL_OP1_206_55968_20121204_211213_inLine +BABEL_OP1_206_55968_20121204_211213_outLine +BABEL_OP1_206_56023_20121227_235521_inLine +BABEL_OP1_206_56023_20121227_235521_outLine +BABEL_OP1_206_56677_20130111_174028_inLine +BABEL_OP1_206_56677_20130111_174028_outLine +BABEL_OP1_206_57093_20121205_002300_inLine +BABEL_OP1_206_57093_20121205_002300_outLine +BABEL_OP1_206_57093_20121205_044909_inLine +BABEL_OP1_206_57093_20121205_044909_outLine +BABEL_OP1_206_57141_20121212_211734_inLine +BABEL_OP1_206_57141_20121212_211734_outLine +BABEL_OP1_206_57529_20121211_232002_inLine +BABEL_OP1_206_57529_20121211_232002_outLine +BABEL_OP1_206_57678_20121201_231032_inLine +BABEL_OP1_206_57678_20121201_231032_outLine +BABEL_OP1_206_58047_20121212_222839_inLine +BABEL_OP1_206_58047_20121212_222839_outLine +BABEL_OP1_206_58313_20121220_211354_inLine +BABEL_OP1_206_58313_20121220_211354_outLine +BABEL_OP1_206_58489_20121221_225602_inLine +BABEL_OP1_206_58489_20121221_225602_outLine +BABEL_OP1_206_58734_20121130_203502_inLine +BABEL_OP1_206_58734_20121130_203502_outLine +BABEL_OP1_206_58821_20130531_205929_inLine +BABEL_OP1_206_58821_20130531_205929_outLine +BABEL_OP1_206_60026_20121205_044105_inLine +BABEL_OP1_206_60026_20121205_044105_outLine +BABEL_OP1_206_60299_20130602_222928_inLine +BABEL_OP1_206_60299_20130602_222928_outLine +BABEL_OP1_206_60310_20121220_003756_inLine +BABEL_OP1_206_60310_20121220_003756_outLine +BABEL_OP1_206_60418_20130530_195743_inLine +BABEL_OP1_206_60418_20130530_195743_outLine +BABEL_OP1_206_61167_20121202_012318_inLine +BABEL_OP1_206_61167_20121202_012318_outLine +BABEL_OP1_206_61167_20121203_083125_inLine +BABEL_OP1_206_61167_20121203_083125_outLine +BABEL_OP1_206_61225_20121128_222308_inLine +BABEL_OP1_206_61225_20121128_222308_outLine +BABEL_OP1_206_61348_20121218_225731_inLine +BABEL_OP1_206_61348_20121218_225731_outLine +BABEL_OP1_206_61357_20130120_183001_inLine +BABEL_OP1_206_61357_20130120_183001_outLine +BABEL_OP1_206_61435_20121217_000451_inLine +BABEL_OP1_206_61435_20121217_000451_outLine +BABEL_OP1_206_61678_20121123_013649_inLine +BABEL_OP1_206_61678_20121123_013649_outLine +BABEL_OP1_206_61731_20121128_024803_inLine +BABEL_OP1_206_61731_20121128_024803_outLine +BABEL_OP1_206_61888_20130605_172611_inLine +BABEL_OP1_206_61888_20130605_172611_outLine +BABEL_OP1_206_62200_20130522_212226_inLine +BABEL_OP1_206_62200_20130522_212226_outLine +BABEL_OP1_206_62724_20121218_202436_inLine +BABEL_OP1_206_62724_20121218_202436_outLine +BABEL_OP1_206_62800_20121201_010750_inLine +BABEL_OP1_206_62800_20121201_010750_outLine +BABEL_OP1_206_62800_20121201_015047_inLine +BABEL_OP1_206_62800_20121201_015047_outLine +BABEL_OP1_206_62800_20121201_021942_inLine +BABEL_OP1_206_62800_20121201_021942_outLine +BABEL_OP1_206_62810_20121122_202600_inLine +BABEL_OP1_206_62810_20121122_202600_outLine +BABEL_OP1_206_63081_20121219_012926_inLine +BABEL_OP1_206_63081_20121219_012926_outLine +BABEL_OP1_206_63081_20121219_174450_inLine +BABEL_OP1_206_63081_20121219_174450_outLine +BABEL_OP1_206_63084_20121210_013516_inLine +BABEL_OP1_206_63084_20121210_013516_outLine +BABEL_OP1_206_63425_20121214_182639_inLine +BABEL_OP1_206_63425_20121214_182639_outLine +BABEL_OP1_206_63445_20121207_014019_inLine +BABEL_OP1_206_63445_20121207_014019_outLine +BABEL_OP1_206_63604_20130527_215715_inLine +BABEL_OP1_206_63604_20130527_215715_outLine +BABEL_OP1_206_63670_20121212_212623_inLine +BABEL_OP1_206_63670_20121212_212623_outLine +BABEL_OP1_206_63757_20121222_235730_inLine +BABEL_OP1_206_63757_20121222_235730_outLine +BABEL_OP1_206_63787_20130530_221300_inLine +BABEL_OP1_206_63787_20130530_221300_outLine +BABEL_OP1_206_63906_20130131_014942_inLine +BABEL_OP1_206_63906_20130131_014942_outLine +BABEL_OP1_206_64014_20130122_011323_inLine +BABEL_OP1_206_64014_20130122_011323_outLine +BABEL_OP1_206_64768_20121207_223917_inLine +BABEL_OP1_206_64768_20121207_223917_outLine +BABEL_OP1_206_65064_20121221_000939_inLine +BABEL_OP1_206_65064_20121221_000939_outLine +BABEL_OP1_206_65723_20121129_222430_inLine +BABEL_OP1_206_65723_20121129_222430_outLine +BABEL_OP1_206_65882_20121201_174526_inLine +BABEL_OP1_206_65882_20121201_174526_outLine +BABEL_OP1_206_66001_20130103_012213_inLine +BABEL_OP1_206_66001_20130103_012213_outLine +BABEL_OP1_206_66045_20121129_223013_inLine +BABEL_OP1_206_66045_20121129_223013_outLine +BABEL_OP1_206_66519_20121202_220401_inLine +BABEL_OP1_206_66519_20121202_220401_outLine +BABEL_OP1_206_66916_20130118_005447_inLine +BABEL_OP1_206_66916_20130118_005447_outLine +BABEL_OP1_206_66916_20130118_010520_inLine +BABEL_OP1_206_66916_20130118_010520_outLine +BABEL_OP1_206_67622_20121206_210526_inLine +BABEL_OP1_206_67622_20121206_210526_outLine +BABEL_OP1_206_67659_20121219_201336_inLine +BABEL_OP1_206_67659_20121219_201336_outLine +BABEL_OP1_206_68306_20121213_205817_inLine +BABEL_OP1_206_68306_20121213_205817_outLine +BABEL_OP1_206_68385_20121123_231120_inLine +BABEL_OP1_206_68385_20121123_231120_outLine +BABEL_OP1_206_68627_20130122_023725_inLine +BABEL_OP1_206_68627_20130122_023725_outLine +BABEL_OP1_206_68748_20121212_025750_inLine +BABEL_OP1_206_68748_20121212_025750_outLine +BABEL_OP1_206_68924_20121228_001758_inLine +BABEL_OP1_206_68924_20121228_001758_outLine +BABEL_OP1_206_69578_20121214_002009_inLine +BABEL_OP1_206_69578_20121214_002009_outLine +BABEL_OP1_206_69992_20130529_181609_inLine +BABEL_OP1_206_69992_20130529_181609_outLine +BABEL_OP1_206_70121_20121219_215051_inLine +BABEL_OP1_206_70121_20121219_215051_outLine +BABEL_OP1_206_70121_20121219_220824_inLine +BABEL_OP1_206_70121_20121219_220824_outLine +BABEL_OP1_206_70251_20121219_044415_inLine +BABEL_OP1_206_70251_20121219_044415_outLine +BABEL_OP1_206_70343_20121221_023826_inLine +BABEL_OP1_206_70343_20121221_023826_outLine +BABEL_OP1_206_70386_20121207_232647_inLine +BABEL_OP1_206_70386_20121207_232647_outLine +BABEL_OP1_206_71067_20121209_210046_inLine +BABEL_OP1_206_71067_20121209_210046_outLine +BABEL_OP1_206_71067_20121209_214030_inLine +BABEL_OP1_206_71067_20121209_214030_outLine +BABEL_OP1_206_71566_20130604_214443_inLine +BABEL_OP1_206_71566_20130604_214443_outLine +BABEL_OP1_206_72110_20121221_232617_inLine +BABEL_OP1_206_72110_20121221_232617_outLine +BABEL_OP1_206_72319_20130123_022502_inLine +BABEL_OP1_206_72319_20130123_022502_outLine +BABEL_OP1_206_72324_20130602_184851_inLine +BABEL_OP1_206_72324_20130602_184851_outLine +BABEL_OP1_206_72844_20121130_193956_inLine +BABEL_OP1_206_72844_20121130_193956_outLine +BABEL_OP1_206_73005_20130122_021229_inLine +BABEL_OP1_206_73005_20130122_021229_outLine +BABEL_OP1_206_73072_20121205_231914_inLine +BABEL_OP1_206_73072_20121205_231914_outLine +BABEL_OP1_206_73258_20130120_170200_inLine +BABEL_OP1_206_73258_20130120_170200_outLine +BABEL_OP1_206_73301_20130529_214428_inLine +BABEL_OP1_206_73301_20130529_214428_outLine +BABEL_OP1_206_73485_20130122_235208_inLine +BABEL_OP1_206_73485_20130122_235208_outLine +BABEL_OP1_206_73591_20121117_212751_inLine +BABEL_OP1_206_73591_20121117_212751_outLine +BABEL_OP1_206_73964_20130317_202534_inLine +BABEL_OP1_206_73964_20130317_202534_outLine +BABEL_OP1_206_74886_20121128_205141_inLine +BABEL_OP1_206_74886_20121128_205141_outLine +BABEL_OP1_206_75064_20121129_233512_inLine +BABEL_OP1_206_75064_20121129_233512_outLine +BABEL_OP1_206_75505_20130522_234600_inLine +BABEL_OP1_206_75505_20130522_234600_outLine +BABEL_OP1_206_75993_20121128_223040_inLine +BABEL_OP1_206_75993_20121128_223040_outLine +BABEL_OP1_206_76126_20121219_020552_inLine +BABEL_OP1_206_76126_20121219_020552_outLine +BABEL_OP1_206_76238_20130111_190815_inLine +BABEL_OP1_206_76238_20130111_190815_outLine +BABEL_OP1_206_76372_20130603_190448_inLine +BABEL_OP1_206_76372_20130603_190448_outLine +BABEL_OP1_206_76437_20121117_202446_inLine +BABEL_OP1_206_76437_20121117_202446_outLine +BABEL_OP1_206_77730_20130107_234021_inLine +BABEL_OP1_206_77730_20130107_234021_outLine +BABEL_OP1_206_77803_20121130_005638_inLine +BABEL_OP1_206_77803_20121130_005638_outLine +BABEL_OP1_206_78398_20121206_003319_inLine +BABEL_OP1_206_78398_20121206_003319_outLine +BABEL_OP1_206_78544_20121220_000743_inLine +BABEL_OP1_206_78544_20121220_000743_outLine +BABEL_OP1_206_78943_20121129_231930_inLine +BABEL_OP1_206_78943_20121129_231930_outLine +BABEL_OP1_206_79080_20121212_205306_inLine +BABEL_OP1_206_79080_20121212_205306_outLine +BABEL_OP1_206_79131_20130123_003404_inLine +BABEL_OP1_206_79131_20130123_003404_outLine +BABEL_OP1_206_79167_20130602_202526_inLine +BABEL_OP1_206_79167_20130602_202526_outLine +BABEL_OP1_206_79367_20121204_001524_inLine +BABEL_OP1_206_79367_20121204_001524_outLine +BABEL_OP1_206_79367_20121204_004137_inLine +BABEL_OP1_206_79367_20121204_004137_outLine +BABEL_OP1_206_79898_20130524_002505_inLine +BABEL_OP1_206_79898_20130524_002505_outLine +BABEL_OP1_206_80241_20130604_001309_inLine +BABEL_OP1_206_80241_20130604_001309_outLine +BABEL_OP1_206_80439_20130527_182722_inLine +BABEL_OP1_206_80439_20130527_182722_outLine +BABEL_OP1_206_80559_20121206_232755_inLine +BABEL_OP1_206_80559_20121206_232755_outLine +BABEL_OP1_206_80781_20121219_233131_inLine +BABEL_OP1_206_80781_20121219_233131_outLine +BABEL_OP1_206_80881_20121204_030141_inLine +BABEL_OP1_206_80881_20121204_030141_outLine +BABEL_OP1_206_81435_20121220_204044_inLine +BABEL_OP1_206_81435_20121220_204044_outLine +BABEL_OP1_206_82035_20121220_195943_inLine +BABEL_OP1_206_82035_20121220_195943_outLine +BABEL_OP1_206_82138_20121129_223223_inLine +BABEL_OP1_206_82138_20121129_223223_outLine +BABEL_OP1_206_82303_20130531_191551_inLine +BABEL_OP1_206_82303_20130531_191551_outLine +BABEL_OP1_206_82391_20121221_015423_inLine +BABEL_OP1_206_82391_20121221_015423_outLine +BABEL_OP1_206_82425_20121129_212519_inLine +BABEL_OP1_206_82425_20121129_212519_outLine +BABEL_OP1_206_82473_20121206_004738_inLine +BABEL_OP1_206_82473_20121206_004738_outLine +BABEL_OP1_206_82622_20130604_222219_inLine +BABEL_OP1_206_82622_20130604_222219_outLine +BABEL_OP1_206_83455_20121205_024244_inLine +BABEL_OP1_206_83455_20121205_024244_outLine +BABEL_OP1_206_84547_20121206_225105_inLine +BABEL_OP1_206_84547_20121206_225105_outLine +BABEL_OP1_206_84605_20121129_212603_inLine +BABEL_OP1_206_84605_20121129_212603_outLine +BABEL_OP1_206_84805_20121214_221155_inLine +BABEL_OP1_206_84805_20121214_221155_outLine +BABEL_OP1_206_85028_20121212_014236_inLine +BABEL_OP1_206_85028_20121212_014236_outLine +BABEL_OP1_206_85248_20121217_174710_inLine +BABEL_OP1_206_85248_20121217_174710_outLine +BABEL_OP1_206_85322_20130530_233851_inLine +BABEL_OP1_206_85322_20130530_233851_outLine +BABEL_OP1_206_85647_20121206_022317_inLine +BABEL_OP1_206_85647_20121206_022317_outLine +BABEL_OP1_206_85647_20121206_024354_inLine +BABEL_OP1_206_85647_20121206_024354_outLine +BABEL_OP1_206_85651_20130420_232505_inLine +BABEL_OP1_206_85651_20130420_232505_outLine +BABEL_OP1_206_86191_20121205_001218_inLine +BABEL_OP1_206_86191_20121205_001218_outLine +BABEL_OP1_206_86321_20121212_025212_inLine +BABEL_OP1_206_86321_20121212_025212_outLine +BABEL_OP1_206_86433_20121220_215310_inLine +BABEL_OP1_206_86433_20121220_215310_outLine +BABEL_OP1_206_86433_20121220_225718_inLine +BABEL_OP1_206_86433_20121220_225718_outLine +BABEL_OP1_206_86472_20121221_010912_inLine +BABEL_OP1_206_86472_20121221_010912_outLine +BABEL_OP1_206_86635_20121218_223238_inLine +BABEL_OP1_206_86635_20121218_223238_outLine +BABEL_OP1_206_86635_20121218_230141_inLine +BABEL_OP1_206_86635_20121218_230141_outLine +BABEL_OP1_206_86715_20130602_174900_inLine +BABEL_OP1_206_86715_20130602_174900_outLine +BABEL_OP1_206_86722_20121204_231838_inLine +BABEL_OP1_206_86722_20121204_231838_outLine +BABEL_OP1_206_86860_20130122_004822_inLine +BABEL_OP1_206_86860_20130122_004822_outLine +BABEL_OP1_206_86952_20130601_175321_inLine +BABEL_OP1_206_86952_20130601_175321_outLine +BABEL_OP1_206_87073_20130102_212334_inLine +BABEL_OP1_206_87073_20130102_212334_outLine +BABEL_OP1_206_87074_20121128_194554_inLine +BABEL_OP1_206_87074_20121128_194554_outLine +BABEL_OP1_206_87280_20121207_231125_inLine +BABEL_OP1_206_87280_20121207_231125_outLine +BABEL_OP1_206_87298_20121129_212519_inLine +BABEL_OP1_206_87298_20121129_212519_outLine +BABEL_OP1_206_87298_20121129_213610_inLine +BABEL_OP1_206_87298_20121129_213610_outLine +BABEL_OP1_206_87470_20121203_052237_inLine +BABEL_OP1_206_87470_20121203_052237_outLine +BABEL_OP1_206_87871_20121220_222250_inLine +BABEL_OP1_206_87871_20121220_222250_outLine +BABEL_OP1_206_87921_20121221_003205_inLine +BABEL_OP1_206_87921_20121221_003205_outLine +BABEL_OP1_206_88260_20121208_204256_inLine +BABEL_OP1_206_88260_20121208_204256_outLine +BABEL_OP1_206_88372_20130120_230911_inLine +BABEL_OP1_206_88372_20130120_230911_outLine +BABEL_OP1_206_88925_20130603_230637_inLine +BABEL_OP1_206_88925_20130603_230637_outLine +BABEL_OP1_206_89575_20121220_211420_inLine +BABEL_OP1_206_89575_20121220_211420_outLine +BABEL_OP1_206_89665_20121208_212046_inLine +BABEL_OP1_206_89665_20121208_212046_outLine +BABEL_OP1_206_89943_20121127_034521_inLine +BABEL_OP1_206_89943_20121127_034521_outLine +BABEL_OP1_206_89943_20121128_015307_inLine +BABEL_OP1_206_89943_20121128_015307_outLine +BABEL_OP1_206_90417_20130605_185956_inLine +BABEL_OP1_206_90417_20130605_185956_outLine +BABEL_OP1_206_90572_20130618_045832_inLine +BABEL_OP1_206_90572_20130618_045832_outLine +BABEL_OP1_206_90739_20130604_174758_inLine +BABEL_OP1_206_90739_20130604_174758_outLine +BABEL_OP1_206_90760_20130525_001351_inLine +BABEL_OP1_206_90760_20130525_001351_outLine +BABEL_OP1_206_91080_20121220_024658_inLine +BABEL_OP1_206_91080_20121220_024658_outLine +BABEL_OP1_206_91125_20121123_063516_inLine +BABEL_OP1_206_91125_20121123_063516_outLine +BABEL_OP1_206_91336_20121205_221404_inLine +BABEL_OP1_206_91336_20121205_221404_outLine +BABEL_OP1_206_91581_20121209_193208_inLine +BABEL_OP1_206_91581_20121209_193208_outLine +BABEL_OP1_206_92096_20130123_010912_inLine +BABEL_OP1_206_92096_20130123_010912_outLine +BABEL_OP1_206_92459_20130529_223322_inLine +BABEL_OP1_206_92459_20130529_223322_outLine +BABEL_OP1_206_92527_20121128_232151_inLine +BABEL_OP1_206_92527_20121128_232151_outLine +BABEL_OP1_206_92527_20121128_234105_inLine +BABEL_OP1_206_92527_20121128_234105_outLine +BABEL_OP1_206_92557_20121213_005100_inLine +BABEL_OP1_206_92557_20121213_005100_outLine +BABEL_OP1_206_92740_20121211_184826_inLine +BABEL_OP1_206_92740_20121211_184826_outLine +BABEL_OP1_206_93224_20121211_003624_inLine +BABEL_OP1_206_93224_20121211_003624_outLine +BABEL_OP1_206_93411_20121220_002408_inLine +BABEL_OP1_206_93411_20121220_002408_outLine +BABEL_OP1_206_93632_20121212_021207_inLine +BABEL_OP1_206_93632_20121212_021207_outLine +BABEL_OP1_206_93858_20130605_005238_inLine +BABEL_OP1_206_93858_20130605_005238_outLine +BABEL_OP1_206_93964_20121205_235339_inLine +BABEL_OP1_206_93964_20121205_235339_outLine +BABEL_OP1_206_94025_20121213_025224_inLine +BABEL_OP1_206_94025_20121213_025224_outLine +BABEL_OP1_206_94745_20130531_014707_inLine +BABEL_OP1_206_94745_20130531_014707_outLine +BABEL_OP1_206_94869_20121205_203951_inLine +BABEL_OP1_206_94869_20121205_203951_outLine +BABEL_OP1_206_95028_20130601_222202_inLine +BABEL_OP1_206_95028_20130601_222202_outLine +BABEL_OP1_206_95231_20130601_230414_inLine +BABEL_OP1_206_95231_20130601_230414_outLine +BABEL_OP1_206_95446_20121220_221335_inLine +BABEL_OP1_206_95446_20121220_221335_outLine +BABEL_OP1_206_96730_20121220_213139_inLine +BABEL_OP1_206_96730_20121220_213139_outLine +BABEL_OP1_206_96910_20121202_211324_inLine +BABEL_OP1_206_96910_20121202_211324_outLine +BABEL_OP1_206_97376_20121220_234456_inLine +BABEL_OP1_206_97376_20121220_234456_outLine +BABEL_OP1_206_97772_20121123_064042_inLine +BABEL_OP1_206_97772_20121123_064042_outLine +BABEL_OP1_206_98311_20130528_182109_inLine +BABEL_OP1_206_98311_20130528_182109_outLine +BABEL_OP1_206_98390_20121123_064010_inLine +BABEL_OP1_206_98390_20121123_064010_outLine +BABEL_OP1_206_98489_20121201_220216_inLine +BABEL_OP1_206_98489_20121201_220216_outLine +BABEL_OP1_206_99289_20130123_161855_inLine +BABEL_OP1_206_99289_20130123_161855_outLine +BABEL_OP1_206_99289_20130123_163456_inLine +BABEL_OP1_206_99289_20130123_163456_outLine +BABEL_OP1_206_99955_20121219_002822_inLine +BABEL_OP1_206_99955_20121219_002822_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.list b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.list new file mode 100644 index 00000000000..37be6f9253e --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.list @@ -0,0 +1,124 @@ +BABEL_OP1_206_13030_20121129_225418_inLine +BABEL_OP1_206_13030_20121129_225418_outLine +BABEL_OP1_206_14440_20121218_231347_inLine +BABEL_OP1_206_14440_20121218_231347_outLine +BABEL_OP1_206_15324_20121208_010033_inLine +BABEL_OP1_206_15324_20121208_010033_outLine +BABEL_OP1_206_17440_20121227_213432_inLine +BABEL_OP1_206_17440_20121227_213432_outLine +BABEL_OP1_206_17923_20121130_214207_inLine +BABEL_OP1_206_17923_20121130_214207_outLine +BABEL_OP1_206_18291_20130604_183732_inLine +BABEL_OP1_206_18291_20130604_183732_outLine +BABEL_OP1_206_20682_20121213_030430_inLine +BABEL_OP1_206_20682_20121213_030430_outLine +BABEL_OP1_206_20800_20130523_220352_inLine +BABEL_OP1_206_20800_20130523_220352_outLine +BABEL_OP1_206_23151_20121217_034512_inLine +BABEL_OP1_206_23151_20121217_034512_outLine +BABEL_OP1_206_24605_20121218_201807_inLine +BABEL_OP1_206_24605_20121218_201807_outLine +BABEL_OP1_206_26206_20130529_172847_inLine +BABEL_OP1_206_26206_20130529_172847_outLine +BABEL_OP1_206_27082_20121220_012037_inLine +BABEL_OP1_206_27082_20121220_012037_outLine +BABEL_OP1_206_28419_20121207_221153_inLine +BABEL_OP1_206_28419_20121207_221153_outLine +BABEL_OP1_206_28775_20121203_022428_inLine +BABEL_OP1_206_28775_20121203_022428_outLine +BABEL_OP1_206_31624_20121123_081518_inLine +BABEL_OP1_206_31624_20121123_081518_outLine +BABEL_OP1_206_32708_20121231_225706_inLine +BABEL_OP1_206_32708_20121231_225706_outLine +BABEL_OP1_206_34564_20121214_020257_inLine +BABEL_OP1_206_34564_20121214_020257_outLine +BABEL_OP1_206_36505_20121213_222927_inLine +BABEL_OP1_206_36505_20121213_222927_outLine +BABEL_OP1_206_38431_20121214_013939_inLine +BABEL_OP1_206_38431_20121214_013939_outLine +BABEL_OP1_206_45560_20121210_054617_inLine +BABEL_OP1_206_45560_20121210_054617_outLine +BABEL_OP1_206_45770_20121205_213203_inLine +BABEL_OP1_206_45770_20121205_213203_outLine +BABEL_OP1_206_47186_20121214_212658_inLine +BABEL_OP1_206_47186_20121214_212658_outLine +BABEL_OP1_206_47215_20121129_232526_inLine +BABEL_OP1_206_47215_20121129_232526_outLine +BABEL_OP1_206_48789_20121202_173639_inLine +BABEL_OP1_206_48789_20121202_173639_outLine +BABEL_OP1_206_50175_20130604_165733_inLine +BABEL_OP1_206_50175_20130604_165733_outLine +BABEL_OP1_206_50601_20121219_030519_inLine +BABEL_OP1_206_50601_20121219_030519_outLine +BABEL_OP1_206_50726_20130103_015437_inLine +BABEL_OP1_206_50726_20130103_015437_outLine +BABEL_OP1_206_51540_20121212_225359_inLine +BABEL_OP1_206_51540_20121212_225359_outLine +BABEL_OP1_206_52694_20130523_175759_inLine +BABEL_OP1_206_52694_20130523_175759_outLine +BABEL_OP1_206_53957_20130522_194644_inLine +BABEL_OP1_206_53957_20130522_194644_outLine +BABEL_OP1_206_54744_20130103_035406_inLine +BABEL_OP1_206_54744_20130103_035406_outLine +BABEL_OP1_206_55818_20121130_051150_inLine +BABEL_OP1_206_55818_20121130_051150_outLine +BABEL_OP1_206_55818_20121130_054331_inLine +BABEL_OP1_206_55818_20121130_054331_outLine +BABEL_OP1_206_57678_20121201_231032_inLine +BABEL_OP1_206_57678_20121201_231032_outLine +BABEL_OP1_206_60418_20130530_195743_inLine +BABEL_OP1_206_60418_20130530_195743_outLine +BABEL_OP1_206_61225_20121128_222308_inLine +BABEL_OP1_206_61225_20121128_222308_outLine +BABEL_OP1_206_63081_20121219_012926_inLine +BABEL_OP1_206_63081_20121219_012926_outLine +BABEL_OP1_206_63081_20121219_174450_inLine +BABEL_OP1_206_63081_20121219_174450_outLine +BABEL_OP1_206_63445_20121207_014019_inLine +BABEL_OP1_206_63445_20121207_014019_outLine +BABEL_OP1_206_63604_20130527_215715_inLine +BABEL_OP1_206_63604_20130527_215715_outLine +BABEL_OP1_206_65723_20121129_222430_inLine +BABEL_OP1_206_65723_20121129_222430_outLine +BABEL_OP1_206_65882_20121201_174526_inLine +BABEL_OP1_206_65882_20121201_174526_outLine +BABEL_OP1_206_66519_20121202_220401_inLine +BABEL_OP1_206_66519_20121202_220401_outLine +BABEL_OP1_206_67659_20121219_201336_inLine +BABEL_OP1_206_67659_20121219_201336_outLine +BABEL_OP1_206_73072_20121205_231914_inLine +BABEL_OP1_206_73072_20121205_231914_outLine +BABEL_OP1_206_73964_20130317_202534_inLine +BABEL_OP1_206_73964_20130317_202534_outLine +BABEL_OP1_206_76372_20130603_190448_inLine +BABEL_OP1_206_76372_20130603_190448_outLine +BABEL_OP1_206_77730_20130107_234021_inLine +BABEL_OP1_206_77730_20130107_234021_outLine +BABEL_OP1_206_79898_20130524_002505_inLine +BABEL_OP1_206_79898_20130524_002505_outLine +BABEL_OP1_206_80241_20130604_001309_inLine +BABEL_OP1_206_80241_20130604_001309_outLine +BABEL_OP1_206_80881_20121204_030141_inLine +BABEL_OP1_206_80881_20121204_030141_outLine +BABEL_OP1_206_85248_20121217_174710_inLine +BABEL_OP1_206_85248_20121217_174710_outLine +BABEL_OP1_206_86860_20130122_004822_inLine +BABEL_OP1_206_86860_20130122_004822_outLine +BABEL_OP1_206_86952_20130601_175321_inLine +BABEL_OP1_206_86952_20130601_175321_outLine +BABEL_OP1_206_87074_20121128_194554_inLine +BABEL_OP1_206_87074_20121128_194554_outLine +BABEL_OP1_206_87280_20121207_231125_inLine +BABEL_OP1_206_87280_20121207_231125_outLine +BABEL_OP1_206_90417_20130605_185956_inLine +BABEL_OP1_206_90417_20130605_185956_outLine +BABEL_OP1_206_91080_20121220_024658_inLine +BABEL_OP1_206_91080_20121220_024658_outLine +BABEL_OP1_206_91581_20121209_193208_inLine +BABEL_OP1_206_91581_20121209_193208_outLine +BABEL_OP1_206_92096_20130123_010912_inLine +BABEL_OP1_206_92096_20130123_010912_outLine +BABEL_OP1_206_93224_20121211_003624_inLine +BABEL_OP1_206_93224_20121211_003624_outLine +BABEL_OP1_206_98489_20121201_220216_inLine +BABEL_OP1_206_98489_20121201_220216_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..dd4d5d3c445 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.untranscribed.list @@ -0,0 +1,705 @@ +BABEL_OP1_206_10901_20121128_230024_inLine +BABEL_OP1_206_10901_20121128_230024_outLine +BABEL_OP1_206_10901_20121129_003238_inLine +BABEL_OP1_206_10901_20121129_003238_outLine +BABEL_OP1_206_10966_20121205_213021_inLine +BABEL_OP1_206_10966_20121205_213021_outLine +BABEL_OP1_206_10966_20121205_214750_inLine +BABEL_OP1_206_10966_20121205_214750_outLine +BABEL_OP1_206_11581_20121213_020058_inLine +BABEL_OP1_206_11581_20121213_020058_outLine +BABEL_OP1_206_11797_20121207_001426_inLine +BABEL_OP1_206_11797_20121207_001426_outLine +BABEL_OP1_206_11797_20121207_002917_inLine +BABEL_OP1_206_11797_20121207_002917_outLine +BABEL_OP1_206_11859_20130602_013210_inLine +BABEL_OP1_206_11859_20130602_013210_outLine +BABEL_OP1_206_12242_20121218_022109_inLine +BABEL_OP1_206_12242_20121218_022109_outLine +BABEL_OP1_206_12851_20121215_010712_inLine +BABEL_OP1_206_12851_20121215_010712_outLine +BABEL_OP1_206_13184_20121216_223430_inLine +BABEL_OP1_206_13184_20121216_223430_outLine +BABEL_OP1_206_13184_20121216_224722_inLine +BABEL_OP1_206_13184_20121216_224722_outLine +BABEL_OP1_206_13483_20121219_205820_inLine +BABEL_OP1_206_13483_20121219_205820_outLine +BABEL_OP1_206_13483_20121219_212915_inLine +BABEL_OP1_206_13483_20121219_212915_outLine +BABEL_OP1_206_13490_20121221_005743_inLine +BABEL_OP1_206_13490_20121221_005743_outLine +BABEL_OP1_206_13744_20121205_205818_inLine +BABEL_OP1_206_13744_20121205_205818_outLine +BABEL_OP1_206_14137_20130118_010712_inLine +BABEL_OP1_206_14137_20130118_010712_outLine +BABEL_OP1_206_14137_20130122_014528_inLine +BABEL_OP1_206_14137_20130122_014528_outLine +BABEL_OP1_206_14179_20121210_224630_inLine +BABEL_OP1_206_14179_20121210_224630_outLine +BABEL_OP1_206_14719_20121213_040757_inLine +BABEL_OP1_206_14719_20121213_040757_outLine +BABEL_OP1_206_14729_20130531_183022_inLine +BABEL_OP1_206_14729_20130531_183022_outLine +BABEL_OP1_206_14807_20121221_150943_inLine +BABEL_OP1_206_14807_20121221_150943_outLine +BABEL_OP1_206_14814_20121129_203954_inLine +BABEL_OP1_206_14814_20121129_203954_outLine +BABEL_OP1_206_14899_20121203_021835_inLine +BABEL_OP1_206_14899_20121203_021835_outLine +BABEL_OP1_206_14929_20121203_232411_inLine +BABEL_OP1_206_14929_20121203_232411_outLine +BABEL_OP1_206_15024_20130527_234410_inLine +BABEL_OP1_206_15024_20130527_234410_outLine +BABEL_OP1_206_15702_20121214_225618_inLine +BABEL_OP1_206_15702_20121214_225618_outLine +BABEL_OP1_206_15702_20121214_231152_inLine +BABEL_OP1_206_15702_20121214_231152_outLine +BABEL_OP1_206_15702_20121214_232449_inLine +BABEL_OP1_206_15702_20121214_232449_outLine +BABEL_OP1_206_16149_20121201_010342_inLine +BABEL_OP1_206_16149_20121201_010342_outLine +BABEL_OP1_206_16467_20130531_200137_inLine +BABEL_OP1_206_16467_20130531_200137_outLine +BABEL_OP1_206_16475_20130121_210828_inLine +BABEL_OP1_206_16475_20130121_210828_outLine +BABEL_OP1_206_16475_20130121_212136_inLine +BABEL_OP1_206_16475_20130121_212136_outLine +BABEL_OP1_206_16839_20121217_170534_inLine +BABEL_OP1_206_16839_20121217_170534_outLine +BABEL_OP1_206_16886_20130524_232154_inLine +BABEL_OP1_206_16886_20130524_232154_outLine +BABEL_OP1_206_17032_20121219_220514_inLine +BABEL_OP1_206_17032_20121219_220514_outLine +BABEL_OP1_206_17280_20130527_191437_inLine +BABEL_OP1_206_17280_20130527_191437_outLine +BABEL_OP1_206_17472_20121214_193824_inLine +BABEL_OP1_206_17472_20121214_193824_outLine +BABEL_OP1_206_17567_20121209_205317_inLine +BABEL_OP1_206_17567_20121209_205317_outLine +BABEL_OP1_206_17567_20121209_211139_inLine +BABEL_OP1_206_17567_20121209_211139_outLine +BABEL_OP1_206_17615_20121214_193534_inLine +BABEL_OP1_206_17615_20121214_193534_outLine +BABEL_OP1_206_17881_20130121_005313_inLine +BABEL_OP1_206_17881_20130121_005313_outLine +BABEL_OP1_206_19722_20121130_203924_inLine +BABEL_OP1_206_19722_20121130_203924_outLine +BABEL_OP1_206_19773_20130101_015259_inLine +BABEL_OP1_206_19773_20130101_015259_outLine +BABEL_OP1_206_19818_20130529_204811_inLine +BABEL_OP1_206_19818_20130529_204811_outLine +BABEL_OP1_206_19877_20130123_175339_inLine +BABEL_OP1_206_19877_20130123_175339_outLine +BABEL_OP1_206_19877_20130123_181047_inLine +BABEL_OP1_206_19877_20130123_181047_outLine +BABEL_OP1_206_20916_20121205_203848_inLine +BABEL_OP1_206_20916_20121205_203848_outLine +BABEL_OP1_206_20922_20121214_231110_inLine +BABEL_OP1_206_20922_20121214_231110_outLine +BABEL_OP1_206_21004_20121210_215455_inLine +BABEL_OP1_206_21004_20121210_215455_outLine +BABEL_OP1_206_21004_20121210_223449_inLine +BABEL_OP1_206_21004_20121210_223449_outLine +BABEL_OP1_206_21206_20121220_001511_inLine +BABEL_OP1_206_21206_20121220_001511_outLine +BABEL_OP1_206_21327_20130111_022748_inLine +BABEL_OP1_206_21327_20130111_022748_outLine +BABEL_OP1_206_21892_20121213_235725_inLine +BABEL_OP1_206_21892_20121213_235725_outLine +BABEL_OP1_206_22494_20130530_004456_inLine +BABEL_OP1_206_22494_20130530_004456_outLine +BABEL_OP1_206_22624_20121219_210041_inLine +BABEL_OP1_206_22624_20121219_210041_outLine +BABEL_OP1_206_22826_20130121_231859_inLine +BABEL_OP1_206_22826_20130121_231859_outLine +BABEL_OP1_206_22826_20130121_233139_inLine +BABEL_OP1_206_22826_20130121_233139_outLine +BABEL_OP1_206_22965_20121128_011001_inLine +BABEL_OP1_206_22965_20121128_011001_outLine +BABEL_OP1_206_22965_20121128_012241_inLine +BABEL_OP1_206_22965_20121128_012241_outLine +BABEL_OP1_206_23006_20121203_004250_inLine +BABEL_OP1_206_23006_20121203_004250_outLine +BABEL_OP1_206_23006_20121203_073608_inLine +BABEL_OP1_206_23006_20121203_073608_outLine +BABEL_OP1_206_23092_20121227_211821_inLine +BABEL_OP1_206_23092_20121227_211821_outLine +BABEL_OP1_206_23153_20130102_224836_inLine +BABEL_OP1_206_23153_20130102_224836_outLine +BABEL_OP1_206_23190_20121219_204325_inLine +BABEL_OP1_206_23190_20121219_204325_outLine +BABEL_OP1_206_23239_20130118_000831_inLine +BABEL_OP1_206_23239_20130118_000831_outLine +BABEL_OP1_206_23505_20121203_010039_inLine +BABEL_OP1_206_23505_20121203_010039_outLine +BABEL_OP1_206_24253_20130120_235750_inLine +BABEL_OP1_206_24253_20130120_235750_outLine +BABEL_OP1_206_24253_20130121_000835_inLine +BABEL_OP1_206_24253_20130121_000835_outLine +BABEL_OP1_206_24253_20130121_012503_inLine +BABEL_OP1_206_24253_20130121_012503_outLine +BABEL_OP1_206_24323_20121214_212407_inLine +BABEL_OP1_206_24323_20121214_212407_outLine +BABEL_OP1_206_24323_20121214_213448_inLine +BABEL_OP1_206_24323_20121214_213448_outLine +BABEL_OP1_206_24532_20121201_203102_inLine +BABEL_OP1_206_24532_20121201_203102_outLine +BABEL_OP1_206_24569_20121210_211659_inLine +BABEL_OP1_206_24569_20121210_211659_outLine +BABEL_OP1_206_24590_20121201_210938_inLine +BABEL_OP1_206_24590_20121201_210938_outLine +BABEL_OP1_206_24590_20121201_215618_inLine +BABEL_OP1_206_24590_20121201_215618_outLine +BABEL_OP1_206_24982_20130603_194918_inLine +BABEL_OP1_206_24982_20130603_194918_outLine +BABEL_OP1_206_25412_20121210_201120_inLine +BABEL_OP1_206_25412_20121210_201120_outLine +BABEL_OP1_206_25412_20121210_203544_inLine +BABEL_OP1_206_25412_20121210_203544_outLine +BABEL_OP1_206_25496_20130529_000539_inLine +BABEL_OP1_206_25496_20130529_000539_outLine +BABEL_OP1_206_25698_20130603_011444_inLine +BABEL_OP1_206_25698_20130603_011444_outLine +BABEL_OP1_206_25719_20121215_000803_inLine +BABEL_OP1_206_25719_20121215_000803_outLine +BABEL_OP1_206_25767_20121204_021252_inLine +BABEL_OP1_206_25767_20121204_021252_outLine +BABEL_OP1_206_25961_20121202_232650_inLine +BABEL_OP1_206_25961_20121202_232650_outLine +BABEL_OP1_206_25961_20121202_234202_inLine +BABEL_OP1_206_25961_20121202_234202_outLine +BABEL_OP1_206_26388_20121202_191806_inLine +BABEL_OP1_206_26388_20121202_191806_outLine +BABEL_OP1_206_26836_20121201_210310_inLine +BABEL_OP1_206_26836_20121201_210310_outLine +BABEL_OP1_206_27042_20121219_230502_inLine +BABEL_OP1_206_27042_20121219_230502_outLine +BABEL_OP1_206_27125_20121203_012043_inLine +BABEL_OP1_206_27125_20121203_012043_outLine +BABEL_OP1_206_27203_20121214_210018_inLine +BABEL_OP1_206_27203_20121214_210018_outLine +BABEL_OP1_206_27590_20121216_180900_inLine +BABEL_OP1_206_27590_20121216_180900_outLine +BABEL_OP1_206_27841_20121216_014031_inLine +BABEL_OP1_206_27841_20121216_014031_outLine +BABEL_OP1_206_28303_20121128_201831_inLine +BABEL_OP1_206_28303_20121128_201831_outLine +BABEL_OP1_206_28945_20130118_003100_inLine +BABEL_OP1_206_28945_20130118_003100_outLine +BABEL_OP1_206_29023_20121201_234219_inLine +BABEL_OP1_206_29023_20121201_234219_outLine +BABEL_OP1_206_29039_20121220_013046_inLine +BABEL_OP1_206_29039_20121220_013046_outLine +BABEL_OP1_206_29135_20121219_224133_inLine +BABEL_OP1_206_29135_20121219_224133_outLine +BABEL_OP1_206_29323_20121219_201726_inLine +BABEL_OP1_206_29323_20121219_201726_outLine +BABEL_OP1_206_29323_20121219_203137_inLine +BABEL_OP1_206_29323_20121219_203137_outLine +BABEL_OP1_206_30395_20121206_014115_inLine +BABEL_OP1_206_30395_20121206_014115_outLine +BABEL_OP1_206_30869_20121227_221910_inLine +BABEL_OP1_206_30869_20121227_221910_outLine +BABEL_OP1_206_31109_20121224_061142_inLine +BABEL_OP1_206_31109_20121224_061142_outLine +BABEL_OP1_206_31490_20121128_234650_inLine +BABEL_OP1_206_31490_20121128_234650_outLine +BABEL_OP1_206_31628_20130528_194548_inLine +BABEL_OP1_206_31628_20130528_194548_outLine +BABEL_OP1_206_32122_20121128_184757_inLine +BABEL_OP1_206_32122_20121128_184757_outLine +BABEL_OP1_206_32301_20130530_191142_inLine +BABEL_OP1_206_32301_20130530_191142_outLine +BABEL_OP1_206_32328_20121215_181911_inLine +BABEL_OP1_206_32328_20121215_181911_outLine +BABEL_OP1_206_32837_20121213_221825_inLine +BABEL_OP1_206_32837_20121213_221825_outLine +BABEL_OP1_206_32837_20121213_223037_inLine +BABEL_OP1_206_32837_20121213_223037_outLine +BABEL_OP1_206_33111_20130601_200233_inLine +BABEL_OP1_206_33111_20130601_200233_outLine +BABEL_OP1_206_33273_20121129_201318_inLine +BABEL_OP1_206_33273_20121129_201318_outLine +BABEL_OP1_206_33355_20121130_055943_inLine +BABEL_OP1_206_33355_20121130_055943_outLine +BABEL_OP1_206_33672_20130524_171145_inLine +BABEL_OP1_206_33672_20130524_171145_outLine +BABEL_OP1_206_33704_20121213_214430_inLine +BABEL_OP1_206_33704_20121213_214430_outLine +BABEL_OP1_206_33840_20121213_230741_inLine +BABEL_OP1_206_33840_20121213_230741_outLine +BABEL_OP1_206_34197_20121128_232538_inLine +BABEL_OP1_206_34197_20121128_232538_outLine +BABEL_OP1_206_34328_20121202_184915_inLine +BABEL_OP1_206_34328_20121202_184915_outLine +BABEL_OP1_206_34679_20121206_000152_inLine +BABEL_OP1_206_34679_20121206_000152_outLine +BABEL_OP1_206_34826_20121215_005505_inLine +BABEL_OP1_206_34826_20121215_005505_outLine +BABEL_OP1_206_35008_20121216_210449_inLine +BABEL_OP1_206_35008_20121216_210449_outLine +BABEL_OP1_206_36894_20121128_201825_inLine +BABEL_OP1_206_36894_20121128_201825_outLine +BABEL_OP1_206_37598_20130111_224005_inLine +BABEL_OP1_206_37598_20130111_224005_outLine +BABEL_OP1_206_38554_20121123_025415_inLine +BABEL_OP1_206_38554_20121123_025415_outLine +BABEL_OP1_206_38689_20121217_013737_inLine +BABEL_OP1_206_38689_20121217_013737_outLine +BABEL_OP1_206_38878_20130530_172309_inLine +BABEL_OP1_206_38878_20130530_172309_outLine +BABEL_OP1_206_39059_20121215_230057_inLine +BABEL_OP1_206_39059_20121215_230057_outLine +BABEL_OP1_206_39059_20121216_000252_inLine +BABEL_OP1_206_39059_20121216_000252_outLine +BABEL_OP1_206_39307_20121207_024156_inLine +BABEL_OP1_206_39307_20121207_024156_outLine +BABEL_OP1_206_39426_20130120_232407_inLine +BABEL_OP1_206_39426_20130120_232407_outLine +BABEL_OP1_206_39426_20130120_233651_inLine +BABEL_OP1_206_39426_20130120_233651_outLine +BABEL_OP1_206_40557_20121218_025254_inLine +BABEL_OP1_206_40557_20121218_025254_outLine +BABEL_OP1_206_40713_20121129_215041_inLine +BABEL_OP1_206_40713_20121129_215041_outLine +BABEL_OP1_206_41097_20121215_173120_inLine +BABEL_OP1_206_41097_20121215_173120_outLine +BABEL_OP1_206_41174_20130604_193434_inLine +BABEL_OP1_206_41174_20130604_193434_outLine +BABEL_OP1_206_41233_20121215_001846_inLine +BABEL_OP1_206_41233_20121215_001846_outLine +BABEL_OP1_206_41598_20130102_233834_inLine +BABEL_OP1_206_41598_20130102_233834_outLine +BABEL_OP1_206_42029_20121220_181050_inLine +BABEL_OP1_206_42029_20121220_181050_outLine +BABEL_OP1_206_42434_20121202_195754_inLine +BABEL_OP1_206_42434_20121202_195754_outLine +BABEL_OP1_206_42434_20121202_202540_inLine +BABEL_OP1_206_42434_20121202_202540_outLine +BABEL_OP1_206_42619_20121213_204854_inLine +BABEL_OP1_206_42619_20121213_204854_outLine +BABEL_OP1_206_42771_20130601_203101_inLine +BABEL_OP1_206_42771_20130601_203101_outLine +BABEL_OP1_206_42834_20121219_015826_inLine +BABEL_OP1_206_42834_20121219_015826_outLine +BABEL_OP1_206_43286_20121125_054930_inLine +BABEL_OP1_206_43286_20121125_054930_outLine +BABEL_OP1_206_43286_20121125_060858_inLine +BABEL_OP1_206_43286_20121125_060858_outLine +BABEL_OP1_206_43286_20121126_003810_inLine +BABEL_OP1_206_43286_20121126_003810_outLine +BABEL_OP1_206_43368_20121128_203447_inLine +BABEL_OP1_206_43368_20121128_203447_outLine +BABEL_OP1_206_43784_20121230_224515_inLine +BABEL_OP1_206_43784_20121230_224515_outLine +BABEL_OP1_206_43788_20121223_235436_inLine +BABEL_OP1_206_43788_20121223_235436_outLine +BABEL_OP1_206_44477_20121228_020003_inLine +BABEL_OP1_206_44477_20121228_020003_outLine +BABEL_OP1_206_44619_20121129_201028_inLine +BABEL_OP1_206_44619_20121129_201028_outLine +BABEL_OP1_206_44619_20121129_203209_inLine +BABEL_OP1_206_44619_20121129_203209_outLine +BABEL_OP1_206_45235_20121213_044536_inLine +BABEL_OP1_206_45235_20121213_044536_outLine +BABEL_OP1_206_45536_20121212_023751_inLine +BABEL_OP1_206_45536_20121212_023751_outLine +BABEL_OP1_206_45851_20130123_013016_inLine +BABEL_OP1_206_45851_20130123_013016_outLine +BABEL_OP1_206_46066_20121218_015244_outLine +BABEL_OP1_206_46066_20121218_020520_inLine +BABEL_OP1_206_46066_20121218_020520_outLine +BABEL_OP1_206_46261_20130524_180914_inLine +BABEL_OP1_206_46261_20130524_180914_outLine +BABEL_OP1_206_46330_20121220_171612_inLine +BABEL_OP1_206_46330_20121220_171612_outLine +BABEL_OP1_206_46558_20121125_000809_inLine +BABEL_OP1_206_46558_20121125_000809_outLine +BABEL_OP1_206_46688_20121130_222025_inLine +BABEL_OP1_206_46688_20121130_222025_outLine +BABEL_OP1_206_46770_20121213_030348_inLine +BABEL_OP1_206_46770_20121213_030348_outLine +BABEL_OP1_206_46976_20121222_002626_inLine +BABEL_OP1_206_46976_20121222_002626_outLine +BABEL_OP1_206_47487_20121127_232736_inLine +BABEL_OP1_206_47487_20121127_232736_outLine +BABEL_OP1_206_47802_20121213_220928_inLine +BABEL_OP1_206_47802_20121213_220928_outLine +BABEL_OP1_206_47878_20121221_153159_inLine +BABEL_OP1_206_47878_20121221_153159_outLine +BABEL_OP1_206_48844_20121123_030435_inLine +BABEL_OP1_206_48844_20121123_030435_outLine +BABEL_OP1_206_48844_20121204_030447_inLine +BABEL_OP1_206_48844_20121204_030447_outLine +BABEL_OP1_206_49001_20121128_201907_inLine +BABEL_OP1_206_49001_20121128_201907_outLine +BABEL_OP1_206_49287_20121219_204754_inLine +BABEL_OP1_206_49287_20121219_204754_outLine +BABEL_OP1_206_49870_20130605_000829_inLine +BABEL_OP1_206_49870_20130605_000829_outLine +BABEL_OP1_206_49907_20121128_055731_inLine +BABEL_OP1_206_49907_20121128_055731_outLine +BABEL_OP1_206_49912_20130603_002155_inLine +BABEL_OP1_206_49912_20130603_002155_outLine +BABEL_OP1_206_50090_20121210_232617_inLine +BABEL_OP1_206_50090_20121210_232617_outLine +BABEL_OP1_206_50090_20121210_234419_inLine +BABEL_OP1_206_50090_20121210_234419_outLine +BABEL_OP1_206_50565_20121206_213949_inLine +BABEL_OP1_206_50565_20121206_213949_outLine +BABEL_OP1_206_50565_20121206_215103_inLine +BABEL_OP1_206_50565_20121206_215103_outLine +BABEL_OP1_206_50565_20121206_221547_inLine +BABEL_OP1_206_50565_20121206_221547_outLine +BABEL_OP1_206_50681_20121222_003908_inLine +BABEL_OP1_206_50681_20121222_003908_outLine +BABEL_OP1_206_51015_20121216_025307_inLine +BABEL_OP1_206_51015_20121216_025307_outLine +BABEL_OP1_206_51484_20121213_023814_inLine +BABEL_OP1_206_51484_20121213_023814_outLine +BABEL_OP1_206_51955_20121219_004818_inLine +BABEL_OP1_206_51955_20121219_004818_outLine +BABEL_OP1_206_52422_20121220_034724_inLine +BABEL_OP1_206_52422_20121220_034724_outLine +BABEL_OP1_206_52804_20121201_184720_inLine +BABEL_OP1_206_52804_20121201_184720_outLine +BABEL_OP1_206_52818_20121228_012038_inLine +BABEL_OP1_206_52818_20121228_012038_outLine +BABEL_OP1_206_52854_20121128_034458_inLine +BABEL_OP1_206_52854_20121128_034458_outLine +BABEL_OP1_206_52854_20121206_214928_inLine +BABEL_OP1_206_52854_20121206_214928_outLine +BABEL_OP1_206_52854_20121206_224251_inLine +BABEL_OP1_206_52854_20121206_224251_outLine +BABEL_OP1_206_52932_20121128_045304_inLine +BABEL_OP1_206_52932_20121128_045304_outLine +BABEL_OP1_206_52932_20121128_233739_inLine +BABEL_OP1_206_52932_20121128_233739_outLine +BABEL_OP1_206_54104_20130102_215440_inLine +BABEL_OP1_206_54104_20130102_215440_outLine +BABEL_OP1_206_54162_20121220_230656_inLine +BABEL_OP1_206_54162_20121220_230656_outLine +BABEL_OP1_206_54390_20121130_203012_inLine +BABEL_OP1_206_54390_20121130_203012_outLine +BABEL_OP1_206_54477_20121212_013137_inLine +BABEL_OP1_206_54477_20121212_013137_outLine +BABEL_OP1_206_54530_20130531_233153_inLine +BABEL_OP1_206_54530_20130531_233153_outLine +BABEL_OP1_206_54697_20121228_003256_inLine +BABEL_OP1_206_54697_20121228_003256_outLine +BABEL_OP1_206_54953_20121205_023337_inLine +BABEL_OP1_206_54953_20121205_023337_outLine +BABEL_OP1_206_55259_20130118_022049_inLine +BABEL_OP1_206_55259_20130118_022049_outLine +BABEL_OP1_206_55259_20130118_023307_inLine +BABEL_OP1_206_55259_20130118_023307_outLine +BABEL_OP1_206_55968_20121204_204317_inLine +BABEL_OP1_206_55968_20121204_204317_outLine +BABEL_OP1_206_55968_20121204_211213_inLine +BABEL_OP1_206_55968_20121204_211213_outLine +BABEL_OP1_206_56023_20121227_235521_inLine +BABEL_OP1_206_56023_20121227_235521_outLine +BABEL_OP1_206_56677_20130111_174028_inLine +BABEL_OP1_206_56677_20130111_174028_outLine +BABEL_OP1_206_57093_20121205_002300_inLine +BABEL_OP1_206_57093_20121205_002300_outLine +BABEL_OP1_206_57093_20121205_044909_inLine +BABEL_OP1_206_57093_20121205_044909_outLine +BABEL_OP1_206_57141_20121212_211734_inLine +BABEL_OP1_206_57141_20121212_211734_outLine +BABEL_OP1_206_57529_20121211_232002_inLine +BABEL_OP1_206_57529_20121211_232002_outLine +BABEL_OP1_206_58047_20121212_222839_inLine +BABEL_OP1_206_58047_20121212_222839_outLine +BABEL_OP1_206_58313_20121220_211354_inLine +BABEL_OP1_206_58313_20121220_211354_outLine +BABEL_OP1_206_58489_20121221_225602_inLine +BABEL_OP1_206_58489_20121221_225602_outLine +BABEL_OP1_206_58734_20121130_203502_inLine +BABEL_OP1_206_58734_20121130_203502_outLine +BABEL_OP1_206_58821_20130531_205929_inLine +BABEL_OP1_206_58821_20130531_205929_outLine +BABEL_OP1_206_60026_20121205_044105_inLine +BABEL_OP1_206_60026_20121205_044105_outLine +BABEL_OP1_206_60299_20130602_222928_inLine +BABEL_OP1_206_60299_20130602_222928_outLine +BABEL_OP1_206_60310_20121220_003756_inLine +BABEL_OP1_206_60310_20121220_003756_outLine +BABEL_OP1_206_61167_20121202_012318_inLine +BABEL_OP1_206_61167_20121202_012318_outLine +BABEL_OP1_206_61167_20121203_083125_inLine +BABEL_OP1_206_61167_20121203_083125_outLine +BABEL_OP1_206_61348_20121218_225731_inLine +BABEL_OP1_206_61348_20121218_225731_outLine +BABEL_OP1_206_61357_20130120_183001_inLine +BABEL_OP1_206_61357_20130120_183001_outLine +BABEL_OP1_206_61435_20121217_000451_inLine +BABEL_OP1_206_61435_20121217_000451_outLine +BABEL_OP1_206_61678_20121123_013649_inLine +BABEL_OP1_206_61678_20121123_013649_outLine +BABEL_OP1_206_61731_20121128_024803_inLine +BABEL_OP1_206_61731_20121128_024803_outLine +BABEL_OP1_206_61888_20130605_172611_inLine +BABEL_OP1_206_61888_20130605_172611_outLine +BABEL_OP1_206_62200_20130522_212226_inLine +BABEL_OP1_206_62200_20130522_212226_outLine +BABEL_OP1_206_62724_20121218_202436_inLine +BABEL_OP1_206_62724_20121218_202436_outLine +BABEL_OP1_206_62800_20121201_010750_inLine +BABEL_OP1_206_62800_20121201_010750_outLine +BABEL_OP1_206_62800_20121201_015047_inLine +BABEL_OP1_206_62800_20121201_015047_outLine +BABEL_OP1_206_62800_20121201_021942_inLine +BABEL_OP1_206_62800_20121201_021942_outLine +BABEL_OP1_206_62810_20121122_202600_inLine +BABEL_OP1_206_62810_20121122_202600_outLine +BABEL_OP1_206_63084_20121210_013516_inLine +BABEL_OP1_206_63084_20121210_013516_outLine +BABEL_OP1_206_63425_20121214_182639_inLine +BABEL_OP1_206_63425_20121214_182639_outLine +BABEL_OP1_206_63670_20121212_212623_inLine +BABEL_OP1_206_63670_20121212_212623_outLine +BABEL_OP1_206_63757_20121222_235730_inLine +BABEL_OP1_206_63757_20121222_235730_outLine +BABEL_OP1_206_63787_20130530_221300_inLine +BABEL_OP1_206_63787_20130530_221300_outLine +BABEL_OP1_206_63906_20130131_014942_inLine +BABEL_OP1_206_63906_20130131_014942_outLine +BABEL_OP1_206_64014_20130122_011323_inLine +BABEL_OP1_206_64014_20130122_011323_outLine +BABEL_OP1_206_64768_20121207_223917_inLine +BABEL_OP1_206_64768_20121207_223917_outLine +BABEL_OP1_206_65064_20121221_000939_inLine +BABEL_OP1_206_65064_20121221_000939_outLine +BABEL_OP1_206_66001_20130103_012213_inLine +BABEL_OP1_206_66001_20130103_012213_outLine +BABEL_OP1_206_66045_20121129_223013_inLine +BABEL_OP1_206_66045_20121129_223013_outLine +BABEL_OP1_206_66916_20130118_005447_inLine +BABEL_OP1_206_66916_20130118_005447_outLine +BABEL_OP1_206_66916_20130118_010520_inLine +BABEL_OP1_206_66916_20130118_010520_outLine +BABEL_OP1_206_67622_20121206_210526_inLine +BABEL_OP1_206_67622_20121206_210526_outLine +BABEL_OP1_206_68306_20121213_205817_inLine +BABEL_OP1_206_68306_20121213_205817_outLine +BABEL_OP1_206_68385_20121123_231120_inLine +BABEL_OP1_206_68385_20121123_231120_outLine +BABEL_OP1_206_68627_20130122_023725_inLine +BABEL_OP1_206_68627_20130122_023725_outLine +BABEL_OP1_206_68748_20121212_025750_inLine +BABEL_OP1_206_68748_20121212_025750_outLine +BABEL_OP1_206_68924_20121228_001758_inLine +BABEL_OP1_206_68924_20121228_001758_outLine +BABEL_OP1_206_69578_20121214_002009_inLine +BABEL_OP1_206_69578_20121214_002009_outLine +BABEL_OP1_206_69992_20130529_181609_inLine +BABEL_OP1_206_69992_20130529_181609_outLine +BABEL_OP1_206_70121_20121219_215051_inLine +BABEL_OP1_206_70121_20121219_215051_outLine +BABEL_OP1_206_70121_20121219_220824_inLine +BABEL_OP1_206_70121_20121219_220824_outLine +BABEL_OP1_206_70251_20121219_044415_inLine +BABEL_OP1_206_70251_20121219_044415_outLine +BABEL_OP1_206_70343_20121221_023826_inLine +BABEL_OP1_206_70343_20121221_023826_outLine +BABEL_OP1_206_70386_20121207_232647_inLine +BABEL_OP1_206_70386_20121207_232647_outLine +BABEL_OP1_206_71067_20121209_210046_inLine +BABEL_OP1_206_71067_20121209_210046_outLine +BABEL_OP1_206_71067_20121209_214030_inLine +BABEL_OP1_206_71067_20121209_214030_outLine +BABEL_OP1_206_71566_20130604_214443_inLine +BABEL_OP1_206_71566_20130604_214443_outLine +BABEL_OP1_206_72110_20121221_232617_inLine +BABEL_OP1_206_72110_20121221_232617_outLine +BABEL_OP1_206_72319_20130123_022502_inLine +BABEL_OP1_206_72319_20130123_022502_outLine +BABEL_OP1_206_72324_20130602_184851_inLine +BABEL_OP1_206_72324_20130602_184851_outLine +BABEL_OP1_206_72844_20121130_193956_inLine +BABEL_OP1_206_72844_20121130_193956_outLine +BABEL_OP1_206_73005_20130122_021229_inLine +BABEL_OP1_206_73005_20130122_021229_outLine +BABEL_OP1_206_73258_20130120_170200_inLine +BABEL_OP1_206_73258_20130120_170200_outLine +BABEL_OP1_206_73301_20130529_214428_inLine +BABEL_OP1_206_73301_20130529_214428_outLine +BABEL_OP1_206_73485_20130122_235208_inLine +BABEL_OP1_206_73485_20130122_235208_outLine +BABEL_OP1_206_73591_20121117_212751_inLine +BABEL_OP1_206_73591_20121117_212751_outLine +BABEL_OP1_206_74886_20121128_205141_inLine +BABEL_OP1_206_74886_20121128_205141_outLine +BABEL_OP1_206_75064_20121129_233512_inLine +BABEL_OP1_206_75064_20121129_233512_outLine +BABEL_OP1_206_75505_20130522_234600_inLine +BABEL_OP1_206_75505_20130522_234600_outLine +BABEL_OP1_206_75993_20121128_223040_inLine +BABEL_OP1_206_75993_20121128_223040_outLine +BABEL_OP1_206_76126_20121219_020552_inLine +BABEL_OP1_206_76126_20121219_020552_outLine +BABEL_OP1_206_76238_20130111_190815_inLine +BABEL_OP1_206_76238_20130111_190815_outLine +BABEL_OP1_206_76437_20121117_202446_inLine +BABEL_OP1_206_76437_20121117_202446_outLine +BABEL_OP1_206_77803_20121130_005638_inLine +BABEL_OP1_206_77803_20121130_005638_outLine +BABEL_OP1_206_78398_20121206_003319_inLine +BABEL_OP1_206_78398_20121206_003319_outLine +BABEL_OP1_206_78544_20121220_000743_inLine +BABEL_OP1_206_78544_20121220_000743_outLine +BABEL_OP1_206_78943_20121129_231930_inLine +BABEL_OP1_206_78943_20121129_231930_outLine +BABEL_OP1_206_79080_20121212_205306_inLine +BABEL_OP1_206_79080_20121212_205306_outLine +BABEL_OP1_206_79131_20130123_003404_inLine +BABEL_OP1_206_79131_20130123_003404_outLine +BABEL_OP1_206_79167_20130602_202526_inLine +BABEL_OP1_206_79167_20130602_202526_outLine +BABEL_OP1_206_79367_20121204_001524_inLine +BABEL_OP1_206_79367_20121204_001524_outLine +BABEL_OP1_206_79367_20121204_004137_inLine +BABEL_OP1_206_79367_20121204_004137_outLine +BABEL_OP1_206_80439_20130527_182722_inLine +BABEL_OP1_206_80439_20130527_182722_outLine +BABEL_OP1_206_80559_20121206_232755_inLine +BABEL_OP1_206_80559_20121206_232755_outLine +BABEL_OP1_206_80781_20121219_233131_inLine +BABEL_OP1_206_80781_20121219_233131_outLine +BABEL_OP1_206_81435_20121220_204044_inLine +BABEL_OP1_206_81435_20121220_204044_outLine +BABEL_OP1_206_82035_20121220_195943_inLine +BABEL_OP1_206_82035_20121220_195943_outLine +BABEL_OP1_206_82138_20121129_223223_inLine +BABEL_OP1_206_82138_20121129_223223_outLine +BABEL_OP1_206_82303_20130531_191551_inLine +BABEL_OP1_206_82303_20130531_191551_outLine +BABEL_OP1_206_82391_20121221_015423_inLine +BABEL_OP1_206_82391_20121221_015423_outLine +BABEL_OP1_206_82425_20121129_212519_inLine +BABEL_OP1_206_82425_20121129_212519_outLine +BABEL_OP1_206_82473_20121206_004738_inLine +BABEL_OP1_206_82473_20121206_004738_outLine +BABEL_OP1_206_82622_20130604_222219_inLine +BABEL_OP1_206_82622_20130604_222219_outLine +BABEL_OP1_206_83455_20121205_024244_inLine +BABEL_OP1_206_83455_20121205_024244_outLine +BABEL_OP1_206_84547_20121206_225105_inLine +BABEL_OP1_206_84547_20121206_225105_outLine +BABEL_OP1_206_84605_20121129_212603_inLine +BABEL_OP1_206_84605_20121129_212603_outLine +BABEL_OP1_206_84805_20121214_221155_inLine +BABEL_OP1_206_84805_20121214_221155_outLine +BABEL_OP1_206_85028_20121212_014236_inLine +BABEL_OP1_206_85028_20121212_014236_outLine +BABEL_OP1_206_85322_20130530_233851_inLine +BABEL_OP1_206_85322_20130530_233851_outLine +BABEL_OP1_206_85647_20121206_022317_inLine +BABEL_OP1_206_85647_20121206_022317_outLine +BABEL_OP1_206_85647_20121206_024354_inLine +BABEL_OP1_206_85647_20121206_024354_outLine +BABEL_OP1_206_85651_20130420_232505_inLine +BABEL_OP1_206_85651_20130420_232505_outLine +BABEL_OP1_206_86191_20121205_001218_inLine +BABEL_OP1_206_86191_20121205_001218_outLine +BABEL_OP1_206_86321_20121212_025212_inLine +BABEL_OP1_206_86321_20121212_025212_outLine +BABEL_OP1_206_86433_20121220_215310_inLine +BABEL_OP1_206_86433_20121220_215310_outLine +BABEL_OP1_206_86433_20121220_225718_inLine +BABEL_OP1_206_86433_20121220_225718_outLine +BABEL_OP1_206_86472_20121221_010912_inLine +BABEL_OP1_206_86472_20121221_010912_outLine +BABEL_OP1_206_86635_20121218_223238_inLine +BABEL_OP1_206_86635_20121218_223238_outLine +BABEL_OP1_206_86635_20121218_230141_inLine +BABEL_OP1_206_86635_20121218_230141_outLine +BABEL_OP1_206_86715_20130602_174900_inLine +BABEL_OP1_206_86715_20130602_174900_outLine +BABEL_OP1_206_86722_20121204_231838_inLine +BABEL_OP1_206_86722_20121204_231838_outLine +BABEL_OP1_206_87073_20130102_212334_inLine +BABEL_OP1_206_87073_20130102_212334_outLine +BABEL_OP1_206_87298_20121129_212519_inLine +BABEL_OP1_206_87298_20121129_212519_outLine +BABEL_OP1_206_87298_20121129_213610_inLine +BABEL_OP1_206_87298_20121129_213610_outLine +BABEL_OP1_206_87470_20121203_052237_inLine +BABEL_OP1_206_87470_20121203_052237_outLine +BABEL_OP1_206_87871_20121220_222250_inLine +BABEL_OP1_206_87871_20121220_222250_outLine +BABEL_OP1_206_87921_20121221_003205_inLine +BABEL_OP1_206_87921_20121221_003205_outLine +BABEL_OP1_206_88260_20121208_204256_inLine +BABEL_OP1_206_88260_20121208_204256_outLine +BABEL_OP1_206_88372_20130120_230911_inLine +BABEL_OP1_206_88372_20130120_230911_outLine +BABEL_OP1_206_88925_20130603_230637_inLine +BABEL_OP1_206_88925_20130603_230637_outLine +BABEL_OP1_206_89575_20121220_211420_inLine +BABEL_OP1_206_89575_20121220_211420_outLine +BABEL_OP1_206_89665_20121208_212046_inLine +BABEL_OP1_206_89665_20121208_212046_outLine +BABEL_OP1_206_89943_20121127_034521_inLine +BABEL_OP1_206_89943_20121127_034521_outLine +BABEL_OP1_206_89943_20121128_015307_inLine +BABEL_OP1_206_89943_20121128_015307_outLine +BABEL_OP1_206_90572_20130618_045832_inLine +BABEL_OP1_206_90572_20130618_045832_outLine +BABEL_OP1_206_90739_20130604_174758_inLine +BABEL_OP1_206_90739_20130604_174758_outLine +BABEL_OP1_206_90760_20130525_001351_inLine +BABEL_OP1_206_90760_20130525_001351_outLine +BABEL_OP1_206_91125_20121123_063516_inLine +BABEL_OP1_206_91125_20121123_063516_outLine +BABEL_OP1_206_91336_20121205_221404_inLine +BABEL_OP1_206_91336_20121205_221404_outLine +BABEL_OP1_206_92459_20130529_223322_inLine +BABEL_OP1_206_92459_20130529_223322_outLine +BABEL_OP1_206_92527_20121128_232151_inLine +BABEL_OP1_206_92527_20121128_232151_outLine +BABEL_OP1_206_92527_20121128_234105_inLine +BABEL_OP1_206_92527_20121128_234105_outLine +BABEL_OP1_206_92557_20121213_005100_inLine +BABEL_OP1_206_92557_20121213_005100_outLine +BABEL_OP1_206_92740_20121211_184826_inLine +BABEL_OP1_206_92740_20121211_184826_outLine +BABEL_OP1_206_93411_20121220_002408_inLine +BABEL_OP1_206_93411_20121220_002408_outLine +BABEL_OP1_206_93632_20121212_021207_inLine +BABEL_OP1_206_93632_20121212_021207_outLine +BABEL_OP1_206_93858_20130605_005238_inLine +BABEL_OP1_206_93858_20130605_005238_outLine +BABEL_OP1_206_93964_20121205_235339_inLine +BABEL_OP1_206_93964_20121205_235339_outLine +BABEL_OP1_206_94025_20121213_025224_inLine +BABEL_OP1_206_94025_20121213_025224_outLine +BABEL_OP1_206_94745_20130531_014707_inLine +BABEL_OP1_206_94745_20130531_014707_outLine +BABEL_OP1_206_94869_20121205_203951_inLine +BABEL_OP1_206_94869_20121205_203951_outLine +BABEL_OP1_206_95028_20130601_222202_inLine +BABEL_OP1_206_95028_20130601_222202_outLine +BABEL_OP1_206_95231_20130601_230414_inLine +BABEL_OP1_206_95231_20130601_230414_outLine +BABEL_OP1_206_95446_20121220_221335_inLine +BABEL_OP1_206_95446_20121220_221335_outLine +BABEL_OP1_206_96730_20121220_213139_inLine +BABEL_OP1_206_96730_20121220_213139_outLine +BABEL_OP1_206_96910_20121202_211324_inLine +BABEL_OP1_206_96910_20121202_211324_outLine +BABEL_OP1_206_97376_20121220_234456_inLine +BABEL_OP1_206_97376_20121220_234456_outLine +BABEL_OP1_206_97772_20121123_064042_inLine +BABEL_OP1_206_97772_20121123_064042_outLine +BABEL_OP1_206_98311_20130528_182109_inLine +BABEL_OP1_206_98311_20130528_182109_outLine +BABEL_OP1_206_98390_20121123_064010_inLine +BABEL_OP1_206_98390_20121123_064010_outLine +BABEL_OP1_206_99289_20130123_161855_inLine +BABEL_OP1_206_99289_20130123_161855_outLine +BABEL_OP1_206_99289_20130123_163456_inLine +BABEL_OP1_206_99289_20130123_163456_outLine +BABEL_OP1_206_99955_20121219_002822_inLine +BABEL_OP1_206_99955_20121219_002822_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.untranscribed.list b/egs/babel/s5d/conf/lists/206-zulu/train.untranscribed.list new file mode 100644 index 00000000000..b9d6a50aad4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.untranscribed.list @@ -0,0 +1,285 @@ +BABEL_OP1_206_10974_20121228_005413_inLine +BABEL_OP1_206_10974_20121228_005413_outLine +BABEL_OP1_206_10974_20121228_024429_inLine +BABEL_OP1_206_10974_20121228_024429_outLine +BABEL_OP1_206_14228_20130111_014154_inLine +BABEL_OP1_206_14228_20130111_014154_outLine +BABEL_OP1_206_15262_20121229_174321_inLine +BABEL_OP1_206_15262_20121229_174321_outLine +BABEL_OP1_206_15262_20121230_013109_inLine +BABEL_OP1_206_15262_20121230_013109_outLine +BABEL_OP1_206_15848_20121219_014456_inLine +BABEL_OP1_206_15848_20121219_014456_outLine +BABEL_OP1_206_15848_20121219_020128_inLine +BABEL_OP1_206_15848_20121219_020128_outLine +BABEL_OP1_206_16056_20130618_231336_inLine +BABEL_OP1_206_16056_20130618_231336_outLine +BABEL_OP1_206_16938_20130418_204901_inLine +BABEL_OP1_206_16938_20130418_204901_outLine +BABEL_OP1_206_17115_20130704_003152_inLine +BABEL_OP1_206_17115_20130704_003152_outLine +BABEL_OP1_206_17127_20130607_184256_inLine +BABEL_OP1_206_17127_20130607_184256_outLine +BABEL_OP1_206_17496_20121213_021057_inLine +BABEL_OP1_206_17496_20121213_021057_outLine +BABEL_OP1_206_17890_20121218_232607_inLine +BABEL_OP1_206_17890_20121218_232607_outLine +BABEL_OP1_206_17890_20121218_234135_inLine +BABEL_OP1_206_17890_20121218_234135_outLine +BABEL_OP1_206_19130_20130618_230729_inLine +BABEL_OP1_206_19130_20130618_230729_outLine +BABEL_OP1_206_19130_20130618_233209_inLine +BABEL_OP1_206_19130_20130618_233209_outLine +BABEL_OP1_206_19782_20121212_231659_inLine +BABEL_OP1_206_19782_20121212_231659_outLine +BABEL_OP1_206_19832_20130619_213422_inLine +BABEL_OP1_206_19832_20130619_213422_outLine +BABEL_OP1_206_19832_20130621_212156_inLine +BABEL_OP1_206_19832_20130621_212156_outLine +BABEL_OP1_206_21159_20130409_220748_inLine +BABEL_OP1_206_21159_20130409_220748_outLine +BABEL_OP1_206_22034_20130823_052902_inLine +BABEL_OP1_206_22034_20130823_052902_outLine +BABEL_OP1_206_22216_20121206_230217_inLine +BABEL_OP1_206_22216_20121206_230217_outLine +BABEL_OP1_206_22612_20130111_030229_inLine +BABEL_OP1_206_22612_20130111_030229_outLine +BABEL_OP1_206_23983_20130318_001202_inLine +BABEL_OP1_206_23983_20130318_001202_outLine +BABEL_OP1_206_24239_20130123_200948_inLine +BABEL_OP1_206_24239_20130123_200948_outLine +BABEL_OP1_206_28871_20130316_231654_inLine +BABEL_OP1_206_28871_20130316_231654_outLine +BABEL_OP1_206_29168_20121219_024841_inLine +BABEL_OP1_206_29168_20121219_024841_outLine +BABEL_OP1_206_29230_20130607_212302_inLine +BABEL_OP1_206_29230_20130607_212302_outLine +BABEL_OP1_206_29685_20121209_215754_inLine +BABEL_OP1_206_29685_20121209_215754_outLine +BABEL_OP1_206_29685_20121218_164410_inLine +BABEL_OP1_206_29685_20121218_164410_outLine +BABEL_OP1_206_30653_20130609_003734_inLine +BABEL_OP1_206_30653_20130609_003734_outLine +BABEL_OP1_206_30653_20130609_010837_inLine +BABEL_OP1_206_30653_20130609_010837_outLine +BABEL_OP1_206_30720_20130717_175529_inLine +BABEL_OP1_206_30720_20130717_175529_outLine +BABEL_OP1_206_32832_20121210_200734_inLine +BABEL_OP1_206_32832_20121210_200734_outLine +BABEL_OP1_206_32872_20130709_004706_inLine +BABEL_OP1_206_32872_20130709_004706_outLine +BABEL_OP1_206_32961_20130708_045618_inLine +BABEL_OP1_206_32961_20130708_045618_outLine +BABEL_OP1_206_34629_20130719_022535_inLine +BABEL_OP1_206_34629_20130719_022535_outLine +BABEL_OP1_206_36017_20130123_211455_inLine +BABEL_OP1_206_36017_20130123_211455_outLine +BABEL_OP1_206_38139_20130714_222440_inLine +BABEL_OP1_206_38139_20130714_222440_outLine +BABEL_OP1_206_39159_20121219_215221_inLine +BABEL_OP1_206_39159_20121219_215221_outLine +BABEL_OP1_206_41272_20130123_012754_inLine +BABEL_OP1_206_41272_20130123_012754_outLine +BABEL_OP1_206_43157_20130702_170155_inLine +BABEL_OP1_206_43157_20130702_170155_outLine +BABEL_OP1_206_43789_20130704_211632_inLine +BABEL_OP1_206_43789_20130704_211632_outLine +BABEL_OP1_206_43789_20130704_214224_inLine +BABEL_OP1_206_43789_20130704_214224_outLine +BABEL_OP1_206_43990_20130717_000515_inLine +BABEL_OP1_206_43990_20130717_000515_outLine +BABEL_OP1_206_44290_20130122_225754_inLine +BABEL_OP1_206_44290_20130122_225754_outLine +BABEL_OP1_206_44290_20130122_230740_inLine +BABEL_OP1_206_44290_20130122_230740_outLine +BABEL_OP1_206_44290_20130122_231733_inLine +BABEL_OP1_206_44290_20130122_231733_outLine +BABEL_OP1_206_44420_20121207_005913_inLine +BABEL_OP1_206_44420_20121207_005913_outLine +BABEL_OP1_206_44847_20121213_214340_inLine +BABEL_OP1_206_44847_20121213_214340_outLine +BABEL_OP1_206_45908_20130128_214430_inLine +BABEL_OP1_206_45908_20130128_214430_outLine +BABEL_OP1_206_46315_20121215_035427_inLine +BABEL_OP1_206_46315_20121215_035427_outLine +BABEL_OP1_206_46881_20121207_203628_inLine +BABEL_OP1_206_46881_20121207_203628_outLine +BABEL_OP1_206_46881_20121207_205322_inLine +BABEL_OP1_206_46881_20121207_205322_outLine +BABEL_OP1_206_46974_20121217_030549_inLine +BABEL_OP1_206_46974_20121217_030549_outLine +BABEL_OP1_206_46974_20121217_175603_inLine +BABEL_OP1_206_46974_20121217_175603_outLine +BABEL_OP1_206_47270_20130610_005427_inLine +BABEL_OP1_206_47270_20130610_005427_outLine +BABEL_OP1_206_48663_20130618_220742_inLine +BABEL_OP1_206_48663_20130618_220742_outLine +BABEL_OP1_206_49197_20130102_213736_inLine +BABEL_OP1_206_49197_20130102_213736_outLine +BABEL_OP1_206_49630_20121219_190512_inLine +BABEL_OP1_206_49630_20121219_190512_outLine +BABEL_OP1_206_52438_20121205_011303_inLine +BABEL_OP1_206_52438_20121205_011303_outLine +BABEL_OP1_206_52442_20130103_034355_inLine +BABEL_OP1_206_52442_20130103_034355_outLine +BABEL_OP1_206_52483_20130719_011409_inLine +BABEL_OP1_206_52483_20130719_011409_outLine +BABEL_OP1_206_53206_20130717_214929_inLine +BABEL_OP1_206_53206_20130717_214929_outLine +BABEL_OP1_206_56213_20121211_204232_inLine +BABEL_OP1_206_56213_20121211_204232_outLine +BABEL_OP1_206_56345_20130716_043400_inLine +BABEL_OP1_206_56345_20130716_043400_outLine +BABEL_OP1_206_56370_20121207_023036_inLine +BABEL_OP1_206_56370_20121207_023036_outLine +BABEL_OP1_206_56523_20121205_023208_inLine +BABEL_OP1_206_56523_20121205_023208_outLine +BABEL_OP1_206_57067_20130102_012254_inLine +BABEL_OP1_206_57067_20130102_012254_outLine +BABEL_OP1_206_60307_20130719_015514_inLine +BABEL_OP1_206_60307_20130719_015514_outLine +BABEL_OP1_206_60307_20130719_020926_inLine +BABEL_OP1_206_60307_20130719_020926_outLine +BABEL_OP1_206_60307_20130719_024339_inLine +BABEL_OP1_206_60307_20130719_024339_outLine +BABEL_OP1_206_60508_20121207_000229_inLine +BABEL_OP1_206_60508_20121207_000229_outLine +BABEL_OP1_206_60661_20121130_205836_inLine +BABEL_OP1_206_60661_20121130_205836_outLine +BABEL_OP1_206_62158_20130710_235209_inLine +BABEL_OP1_206_62158_20130710_235209_outLine +BABEL_OP1_206_62286_20121128_234346_inLine +BABEL_OP1_206_62286_20121128_234346_outLine +BABEL_OP1_206_62286_20121129_203539_inLine +BABEL_OP1_206_62286_20121129_203539_outLine +BABEL_OP1_206_62286_20121129_212959_inLine +BABEL_OP1_206_62286_20121129_212959_outLine +BABEL_OP1_206_62434_20121219_215717_inLine +BABEL_OP1_206_62434_20121219_215717_outLine +BABEL_OP1_206_62456_20121213_021820_inLine +BABEL_OP1_206_62456_20121213_021820_outLine +BABEL_OP1_206_64638_20121219_213206_inLine +BABEL_OP1_206_64638_20121219_213206_outLine +BABEL_OP1_206_66361_20130716_054608_inLine +BABEL_OP1_206_66361_20130716_054608_outLine +BABEL_OP1_206_67389_20130710_003945_inLine +BABEL_OP1_206_67389_20130710_003945_outLine +BABEL_OP1_206_67726_20130722_002158_inLine +BABEL_OP1_206_67726_20130722_002158_outLine +BABEL_OP1_206_67794_20121205_012401_inLine +BABEL_OP1_206_67794_20121205_012401_outLine +BABEL_OP1_206_68823_20130823_044634_inLine +BABEL_OP1_206_68823_20130823_044634_outLine +BABEL_OP1_206_69885_20130610_194001_inLine +BABEL_OP1_206_69885_20130610_194001_outLine +BABEL_OP1_206_69982_20130706_192449_inLine +BABEL_OP1_206_69982_20130706_192449_outLine +BABEL_OP1_206_70282_20121214_191323_inLine +BABEL_OP1_206_70282_20121214_191323_outLine +BABEL_OP1_206_70526_20130123_020108_inLine +BABEL_OP1_206_70526_20130123_020108_outLine +BABEL_OP1_206_70986_20130710_195224_inLine +BABEL_OP1_206_70986_20130710_195224_outLine +BABEL_OP1_206_70986_20130710_200021_inLine +BABEL_OP1_206_70986_20130710_200021_outLine +BABEL_OP1_206_71189_20130122_234213_inLine +BABEL_OP1_206_71189_20130122_234213_outLine +BABEL_OP1_206_72654_20130418_185023_inLine +BABEL_OP1_206_72654_20130418_185023_outLine +BABEL_OP1_206_73408_20130706_195257_inLine +BABEL_OP1_206_73408_20130706_195257_outLine +BABEL_OP1_206_74226_20130709_222957_inLine +BABEL_OP1_206_74226_20130709_222957_outLine +BABEL_OP1_206_75359_20130128_221313_inLine +BABEL_OP1_206_75359_20130128_221313_outLine +BABEL_OP1_206_75366_20130626_043947_inLine +BABEL_OP1_206_75366_20130626_043947_outLine +BABEL_OP1_206_77567_20121206_015015_inLine +BABEL_OP1_206_77567_20121206_015015_outLine +BABEL_OP1_206_78609_20121210_172907_inLine +BABEL_OP1_206_78609_20121210_172907_outLine +BABEL_OP1_206_79139_20121130_021538_inLine +BABEL_OP1_206_79139_20121130_021538_outLine +BABEL_OP1_206_79571_20121208_220739_inLine +BABEL_OP1_206_79571_20121208_220739_outLine +BABEL_OP1_206_79751_20130709_233219_inLine +BABEL_OP1_206_79751_20130709_233219_outLine +BABEL_OP1_206_80897_20121220_195655_inLine +BABEL_OP1_206_80897_20121220_195655_outLine +BABEL_OP1_206_81229_20121203_035326_inLine +BABEL_OP1_206_81229_20121203_035331_outLine +BABEL_OP1_206_81424_20130114_180200_inLine +BABEL_OP1_206_81424_20130114_180200_outLine +BABEL_OP1_206_81553_20130114_222622_inLine +BABEL_OP1_206_81553_20130114_222622_outLine +BABEL_OP1_206_82030_20130607_164514_inLine +BABEL_OP1_206_82030_20130607_164514_outLine +BABEL_OP1_206_82030_20130607_170355_inLine +BABEL_OP1_206_82030_20130607_170355_outLine +BABEL_OP1_206_83366_20130112_225127_inLine +BABEL_OP1_206_83366_20130112_225127_outLine +BABEL_OP1_206_83851_20121219_214118_inLine +BABEL_OP1_206_83851_20121219_214118_outLine +BABEL_OP1_206_84327_20130605_221231_inLine +BABEL_OP1_206_84327_20130605_221231_outLine +BABEL_OP1_206_84339_20130610_180645_inLine +BABEL_OP1_206_84339_20130610_180645_outLine +BABEL_OP1_206_84583_20121209_183927_inLine +BABEL_OP1_206_84583_20121209_183927_outLine +BABEL_OP1_206_84709_20130718_233020_inLine +BABEL_OP1_206_84709_20130718_233020_outLine +BABEL_OP1_206_85246_20130708_202906_inLine +BABEL_OP1_206_85246_20130708_202906_outLine +BABEL_OP1_206_85439_20130123_002202_inLine +BABEL_OP1_206_85439_20130123_002202_outLine +BABEL_OP1_206_87693_20121205_012117_inLine +BABEL_OP1_206_87693_20121205_012117_outLine +BABEL_OP1_206_89718_20130811_194933_inLine +BABEL_OP1_206_89718_20130811_194933_outLine +BABEL_OP1_206_89794_20121214_233120_inLine +BABEL_OP1_206_89794_20121214_233120_outLine +BABEL_OP1_206_90440_20130718_001037_inLine +BABEL_OP1_206_90440_20130718_001037_outLine +BABEL_OP1_206_90440_20130718_002114_inLine +BABEL_OP1_206_90440_20130718_002114_outLine +BABEL_OP1_206_91825_20121229_025012_inLine +BABEL_OP1_206_91825_20121229_025012_outLine +BABEL_OP1_206_91930_20130609_211010_inLine +BABEL_OP1_206_91930_20130609_211010_outLine +BABEL_OP1_206_92176_20121206_000728_inLine +BABEL_OP1_206_92176_20121206_000728_outLine +BABEL_OP1_206_92281_20130715_213202_inLine +BABEL_OP1_206_92281_20130715_213202_outLine +BABEL_OP1_206_92509_20121207_014928_inLine +BABEL_OP1_206_92509_20121207_014928_outLine +BABEL_OP1_206_92757_20121114_211008_inLine +BABEL_OP1_206_92757_20121114_211009_outLine +BABEL_OP1_206_93443_20130619_014744_inLine +BABEL_OP1_206_93443_20130619_014744_outLine +BABEL_OP1_206_94044_20130717_183259_inLine +BABEL_OP1_206_94044_20130717_183259_outLine +BABEL_OP1_206_94044_20130717_184742_inLine +BABEL_OP1_206_94044_20130717_184742_outLine +BABEL_OP1_206_94166_20130618_012452_inLine +BABEL_OP1_206_94166_20130618_012452_outLine +BABEL_OP1_206_94803_20130702_181918_inLine +BABEL_OP1_206_94803_20130702_181918_outLine +BABEL_OP1_206_94969_20130618_233618_inLine +BABEL_OP1_206_95598_20121218_232233_inLine +BABEL_OP1_206_95598_20121218_232233_outLine +BABEL_OP1_206_96088_20130123_015420_inLine +BABEL_OP1_206_96088_20130123_015420_outLine +BABEL_OP1_206_96446_20121219_003144_inLine +BABEL_OP1_206_96446_20121219_003144_outLine +BABEL_OP1_206_96940_20130723_004026_inLine +BABEL_OP1_206_96940_20130723_004026_outLine +BABEL_OP1_206_96985_20121212_205933_inLine +BABEL_OP1_206_96985_20121212_205933_outLine +BABEL_OP1_206_97570_20130111_233033_inLine +BABEL_OP1_206_97570_20130111_233033_outLine +BABEL_OP1_206_99516_20121123_082052_inLine +BABEL_OP1_206_99516_20121123_082052_outLine +BABEL_OP1_206_99920_20130524_010643_inLine +BABEL_OP1_206_99920_20130524_010643_outLine +BABEL_OP1_206_99920_20130524_012051_inLine +BABEL_OP1_206_99920_20130524_012051_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/dev.list b/egs/babel/s5d/conf/lists/207-tokpisin/dev.list new file mode 100644 index 00000000000..a8ed2a6bc2a --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/dev.list @@ -0,0 +1,132 @@ +BABEL_OP2_207_14141_20130927_123928_inLine +BABEL_OP2_207_14141_20130927_123928_outLine +BABEL_OP2_207_14229_20130801_102759_inLine +BABEL_OP2_207_14229_20130801_102759_outLine +BABEL_OP2_207_14440_20130824_152406_inLine +BABEL_OP2_207_14440_20130824_152406_outLine +BABEL_OP2_207_14440_20130824_153139_inLine +BABEL_OP2_207_14440_20130824_153139_outLine +BABEL_OP2_207_14440_20130824_153643_inLine +BABEL_OP2_207_14440_20130824_153643_outLine +BABEL_OP2_207_14875_20130731_170626_inLine +BABEL_OP2_207_14875_20130731_170626_outLine +BABEL_OP2_207_15848_20130623_210617_inLine +BABEL_OP2_207_15848_20130623_210617_outLine +BABEL_OP2_207_17127_20130925_073246_inLine +BABEL_OP2_207_17127_20130925_073246_outLine +BABEL_OP2_207_17923_20130629_151018_inLine +BABEL_OP2_207_17923_20130629_151018_outLine +BABEL_OP2_207_20916_20130623_184646_inLine +BABEL_OP2_207_20916_20130623_184646_outLine +BABEL_OP2_207_20916_20130623_190432_inLine +BABEL_OP2_207_20916_20130623_190432_outLine +BABEL_OP2_207_21244_20131010_122553_inLine +BABEL_OP2_207_21244_20131010_122553_outLine +BABEL_OP2_207_22216_20130801_104847_inLine +BABEL_OP2_207_22216_20130801_104847_outLine +BABEL_OP2_207_23505_20130626_153607_inLine +BABEL_OP2_207_23505_20130626_153607_outLine +BABEL_OP2_207_23893_20130909_152137_inLine +BABEL_OP2_207_23893_20130909_152137_outLine +BABEL_OP2_207_24589_20130722_131056_inLine +BABEL_OP2_207_24589_20130722_131056_outLine +BABEL_OP2_207_27218_20130701_174655_inLine +BABEL_OP2_207_27218_20130701_174655_outLine +BABEL_OP2_207_29911_20131212_174224_inLine +BABEL_OP2_207_29911_20131212_174224_outLine +BABEL_OP2_207_32708_20130730_130556_inLine +BABEL_OP2_207_32708_20130730_130556_outLine +BABEL_OP2_207_32832_20130922_122814_inLine +BABEL_OP2_207_32832_20130922_122814_outLine +BABEL_OP2_207_33111_20130930_120538_inLine +BABEL_OP2_207_33111_20130930_120538_outLine +BABEL_OP2_207_33175_20130621_162225_inLine +BABEL_OP2_207_33175_20130621_162225_outLine +BABEL_OP2_207_34477_20130722_140642_inLine +BABEL_OP2_207_34477_20130722_140642_outLine +BABEL_OP2_207_38431_20130915_163140_inLine +BABEL_OP2_207_38431_20130915_163140_outLine +BABEL_OP2_207_40713_20130711_151622_inLine +BABEL_OP2_207_40713_20130711_151622_outLine +BABEL_OP2_207_41100_20130712_160739_inLine +BABEL_OP2_207_41100_20130712_160739_outLine +BABEL_OP2_207_43646_20130624_165324_inLine +BABEL_OP2_207_43646_20130624_165324_outLine +BABEL_OP2_207_45697_20130925_144605_inLine +BABEL_OP2_207_45697_20130925_144605_outLine +BABEL_OP2_207_46535_20131219_223648_inLine +BABEL_OP2_207_46535_20131219_223648_outLine +BABEL_OP2_207_46625_20130627_133432_inLine +BABEL_OP2_207_46625_20130627_133432_outLine +BABEL_OP2_207_46881_20130626_133140_inLine +BABEL_OP2_207_46881_20130626_133140_outLine +BABEL_OP2_207_47270_20130926_142206_inLine +BABEL_OP2_207_47270_20130926_142206_outLine +BABEL_OP2_207_54744_20130627_200004_inLine +BABEL_OP2_207_54744_20130627_200004_outLine +BABEL_OP2_207_56468_20131102_114004_inLine +BABEL_OP2_207_56468_20131102_114004_outLine +BABEL_OP2_207_59898_20130625_211705_inLine +BABEL_OP2_207_59898_20130625_211705_outLine +BABEL_OP2_207_59898_20130625_212216_inLine +BABEL_OP2_207_59898_20130625_212216_outLine +BABEL_OP2_207_59898_20130625_212948_inLine +BABEL_OP2_207_59898_20130625_212948_outLine +BABEL_OP2_207_60706_20130623_230602_inLine +BABEL_OP2_207_60706_20130623_230602_outLine +BABEL_OP2_207_61011_20130624_164607_inLine +BABEL_OP2_207_61011_20130624_164607_outLine +BABEL_OP2_207_61357_20130822_150714_inLine +BABEL_OP2_207_61357_20130822_150714_outLine +BABEL_OP2_207_61963_20130830_141616_inLine +BABEL_OP2_207_61963_20130830_141616_outLine +BABEL_OP2_207_65252_20131008_183014_inLine +BABEL_OP2_207_65252_20131008_183014_outLine +BABEL_OP2_207_67213_20131218_185924_inLine +BABEL_OP2_207_67213_20131218_185924_outLine +BABEL_OP2_207_70110_20130621_125315_inLine +BABEL_OP2_207_70110_20130621_125315_outLine +BABEL_OP2_207_70726_20131222_161540_inLine +BABEL_OP2_207_70726_20131222_161540_outLine +BABEL_OP2_207_73072_20130730_140848_inLine +BABEL_OP2_207_73072_20130730_140848_outLine +BABEL_OP2_207_74226_20130828_115915_inLine +BABEL_OP2_207_74226_20130828_115915_outLine +BABEL_OP2_207_76218_20130809_145308_inLine +BABEL_OP2_207_76218_20130809_145308_outLine +BABEL_OP2_207_76837_20131207_184347_inLine +BABEL_OP2_207_76837_20131207_184347_outLine +BABEL_OP2_207_77730_20130628_215628_inLine +BABEL_OP2_207_77730_20130628_215628_outLine +BABEL_OP2_207_79131_20130915_155341_inLine +BABEL_OP2_207_79131_20130915_155341_outLine +BABEL_OP2_207_80577_20130930_204532_inLine +BABEL_OP2_207_80577_20130930_204532_outLine +BABEL_OP2_207_80881_20130621_220309_inLine +BABEL_OP2_207_80881_20130621_220309_outLine +BABEL_OP2_207_82742_20130915_204759_inLine +BABEL_OP2_207_82742_20130915_204759_outLine +BABEL_OP2_207_83851_20130731_154045_inLine +BABEL_OP2_207_83851_20130731_154045_outLine +BABEL_OP2_207_84815_20130911_144350_inLine +BABEL_OP2_207_84815_20130911_144350_outLine +BABEL_OP2_207_85179_20130920_130213_inLine +BABEL_OP2_207_85179_20130920_130213_outLine +BABEL_OP2_207_85439_20131009_141636_inLine +BABEL_OP2_207_85439_20131009_141636_outLine +BABEL_OP2_207_86557_20130621_160840_inLine +BABEL_OP2_207_86557_20130621_160840_outLine +BABEL_OP2_207_86557_20130621_161939_inLine +BABEL_OP2_207_86557_20130621_161939_outLine +BABEL_OP2_207_90777_20130725_111134_inLine +BABEL_OP2_207_90777_20130725_111134_outLine +BABEL_OP2_207_92886_20130711_144627_inLine +BABEL_OP2_207_92886_20130711_144627_outLine +BABEL_OP2_207_96324_20130625_154301_inLine +BABEL_OP2_207_96324_20130625_154301_outLine +BABEL_OP2_207_97136_20131003_120422_inLine +BABEL_OP2_207_97136_20131003_120422_outLine +BABEL_OP2_207_97849_20131003_125642_inLine +BABEL_OP2_207_97849_20131003_125642_outLine +BABEL_OP2_207_99975_20131027_145501_inLine +BABEL_OP2_207_99975_20131027_145501_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/eval.list b/egs/babel/s5d/conf/lists/207-tokpisin/eval.list new file mode 100644 index 00000000000..57c92f399f4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/eval.list @@ -0,0 +1,192 @@ +BABEL_OP2_207_10416_20130808_151430_inLine +BABEL_OP2_207_10416_20130808_151430_outLine +BABEL_OP2_207_10974_20130821_152545_inLine +BABEL_OP2_207_10974_20130821_152545_outLine +BABEL_OP2_207_13040_20130711_172945_inLine +BABEL_OP2_207_13040_20130711_172945_outLine +BABEL_OP2_207_13427_20130817_155156_inLine +BABEL_OP2_207_13427_20130817_155156_outLine +BABEL_OP2_207_15042_20130915_183113_inLine +BABEL_OP2_207_15042_20130915_183113_outLine +BABEL_OP2_207_15163_20130809_152912_inLine +BABEL_OP2_207_15163_20130809_152912_outLine +BABEL_OP2_207_15926_20130905_125437_inLine +BABEL_OP2_207_15926_20130905_125437_outLine +BABEL_OP2_207_16184_20130625_002017_inLine +BABEL_OP2_207_16184_20130625_002017_outLine +BABEL_OP2_207_16467_20130918_155738_inLine +BABEL_OP2_207_16467_20130918_155738_outLine +BABEL_OP2_207_16467_20130918_160609_inLine +BABEL_OP2_207_16467_20130918_160609_outLine +BABEL_OP2_207_16601_20130906_133242_inLine +BABEL_OP2_207_16601_20130906_133242_outLine +BABEL_OP2_207_19545_20130821_135751_inLine +BABEL_OP2_207_19545_20130821_135751_outLine +BABEL_OP2_207_19672_20130903_141816_inLine +BABEL_OP2_207_19672_20130903_141816_outLine +BABEL_OP2_207_20896_20131224_170209_inLine +BABEL_OP2_207_20896_20131224_170209_outLine +BABEL_OP2_207_21029_20130702_120434_inLine +BABEL_OP2_207_21029_20130702_120434_outLine +BABEL_OP2_207_21581_20130724_161007_inLine +BABEL_OP2_207_21581_20130724_161007_outLine +BABEL_OP2_207_22170_20130828_151813_inLine +BABEL_OP2_207_22170_20130828_151813_outLine +BABEL_OP2_207_24010_20131023_153049_inLine +BABEL_OP2_207_24010_20131023_153049_outLine +BABEL_OP2_207_24033_20130930_123827_inLine +BABEL_OP2_207_24033_20130930_123827_outLine +BABEL_OP2_207_24221_20131028_153502_inLine +BABEL_OP2_207_24221_20131028_153502_outLine +BABEL_OP2_207_27082_20130812_162844_inLine +BABEL_OP2_207_27082_20130812_162844_outLine +BABEL_OP2_207_28422_20130905_135311_inLine +BABEL_OP2_207_28422_20130905_135311_outLine +BABEL_OP2_207_28871_20130621_163843_inLine +BABEL_OP2_207_28871_20130621_163843_outLine +BABEL_OP2_207_29230_20131015_133532_inLine +BABEL_OP2_207_29230_20131015_133532_outLine +BABEL_OP2_207_30250_20130720_111643_inLine +BABEL_OP2_207_30250_20130720_111643_outLine +BABEL_OP2_207_31484_20130906_164627_inLine +BABEL_OP2_207_31484_20130906_164627_outLine +BABEL_OP2_207_34019_20131218_205039_inLine +BABEL_OP2_207_34019_20131218_205039_outLine +BABEL_OP2_207_36017_20131003_111732_inLine +BABEL_OP2_207_36017_20131003_111732_outLine +BABEL_OP2_207_37068_20131211_133052_inLine +BABEL_OP2_207_37068_20131211_133052_outLine +BABEL_OP2_207_37499_20131009_162024_inLine +BABEL_OP2_207_37499_20131009_162024_outLine +BABEL_OP2_207_41493_20130628_222817_inLine +BABEL_OP2_207_41493_20130628_222817_outLine +BABEL_OP2_207_41920_20130730_105920_inLine +BABEL_OP2_207_41920_20130730_105920_outLine +BABEL_OP2_207_42600_20130724_152811_inLine +BABEL_OP2_207_42600_20130724_152811_outLine +BABEL_OP2_207_42600_20130724_154332_inLine +BABEL_OP2_207_42600_20130724_154332_outLine +BABEL_OP2_207_44255_20130925_074247_inLine +BABEL_OP2_207_44255_20130925_074247_outLine +BABEL_OP2_207_44678_20131029_142212_inLine +BABEL_OP2_207_44678_20131029_142212_outLine +BABEL_OP2_207_45235_20130918_123528_inLine +BABEL_OP2_207_45235_20130918_123528_outLine +BABEL_OP2_207_45777_20130731_140413_inLine +BABEL_OP2_207_45777_20130731_140413_outLine +BABEL_OP2_207_46041_20130919_111546_inLine +BABEL_OP2_207_46041_20130919_111546_outLine +BABEL_OP2_207_46702_20130627_192620_inLine +BABEL_OP2_207_46702_20130627_192620_outLine +BABEL_OP2_207_48663_20130828_133856_inLine +BABEL_OP2_207_48663_20130828_133856_outLine +BABEL_OP2_207_49775_20130711_130307_inLine +BABEL_OP2_207_49775_20130711_130307_outLine +BABEL_OP2_207_50186_20131207_163954_inLine +BABEL_OP2_207_50186_20131207_163954_outLine +BABEL_OP2_207_50962_20130712_152844_inLine +BABEL_OP2_207_50962_20130712_152844_outLine +BABEL_OP2_207_52070_20131018_160716_inLine +BABEL_OP2_207_52070_20131018_160716_outLine +BABEL_OP2_207_52694_20130819_142518_inLine +BABEL_OP2_207_52694_20130819_142518_outLine +BABEL_OP2_207_52854_20130701_173625_inLine +BABEL_OP2_207_52854_20130701_173625_outLine +BABEL_OP2_207_53419_20130915_212209_inLine +BABEL_OP2_207_53419_20130915_212209_outLine +BABEL_OP2_207_55742_20130628_204255_inLine +BABEL_OP2_207_55742_20130628_204255_outLine +BABEL_OP2_207_56429_20130729_115308_inLine +BABEL_OP2_207_56429_20130729_115308_outLine +BABEL_OP2_207_56743_20130731_145617_inLine +BABEL_OP2_207_56743_20130731_145617_outLine +BABEL_OP2_207_57654_20130711_145355_inLine +BABEL_OP2_207_57654_20130711_145355_outLine +BABEL_OP2_207_57654_20130711_150856_inLine +BABEL_OP2_207_57654_20130711_150856_outLine +BABEL_OP2_207_58815_20130917_153637_inLine +BABEL_OP2_207_58815_20130917_153637_outLine +BABEL_OP2_207_59993_20130712_145207_inLine +BABEL_OP2_207_59993_20130712_145207_outLine +BABEL_OP2_207_60418_20130829_155821_inLine +BABEL_OP2_207_60418_20130829_155821_outLine +BABEL_OP2_207_60508_20130801_182520_inLine +BABEL_OP2_207_60508_20130801_182520_outLine +BABEL_OP2_207_62430_20130930_120306_inLine +BABEL_OP2_207_62430_20130930_120306_outLine +BABEL_OP2_207_63445_20130730_154254_inLine +BABEL_OP2_207_63445_20130730_154254_outLine +BABEL_OP2_207_64796_20130627_095719_inLine +BABEL_OP2_207_64796_20130627_095719_outLine +BABEL_OP2_207_64796_20130627_102602_inLine +BABEL_OP2_207_64796_20130627_102602_outLine +BABEL_OP2_207_66519_20130724_134257_inLine +BABEL_OP2_207_66519_20130724_134257_outLine +BABEL_OP2_207_66519_20130724_135210_inLine +BABEL_OP2_207_66519_20130724_135210_outLine +BABEL_OP2_207_67373_20130629_154522_inLine +BABEL_OP2_207_67373_20130629_154522_outLine +BABEL_OP2_207_67794_20130629_150014_inLine +BABEL_OP2_207_67794_20130629_150014_outLine +BABEL_OP2_207_67794_20130629_152744_inLine +BABEL_OP2_207_67794_20130629_152744_outLine +BABEL_OP2_207_67842_20130711_144619_inLine +BABEL_OP2_207_67842_20130711_144619_outLine +BABEL_OP2_207_71333_20130711_155031_inLine +BABEL_OP2_207_71333_20130711_155031_outLine +BABEL_OP2_207_71704_20130701_154358_inLine +BABEL_OP2_207_71704_20130701_154358_outLine +BABEL_OP2_207_74111_20130922_211430_inLine +BABEL_OP2_207_74111_20130922_211430_outLine +BABEL_OP2_207_75366_20131018_141443_inLine +BABEL_OP2_207_75366_20131018_141443_outLine +BABEL_OP2_207_75465_20130919_133102_inLine +BABEL_OP2_207_75465_20130919_133102_outLine +BABEL_OP2_207_76372_20130930_220003_inLine +BABEL_OP2_207_76372_20130930_220003_outLine +BABEL_OP2_207_77139_20130624_231111_inLine +BABEL_OP2_207_77139_20130624_231111_outLine +BABEL_OP2_207_78630_20130802_140131_inLine +BABEL_OP2_207_78630_20130802_140131_outLine +BABEL_OP2_207_78976_20130701_162332_inLine +BABEL_OP2_207_78976_20130701_162332_outLine +BABEL_OP2_207_79028_20131211_173303_inLine +BABEL_OP2_207_79028_20131211_173303_outLine +BABEL_OP2_207_79660_20131011_163724_inLine +BABEL_OP2_207_79660_20131011_163724_outLine +BABEL_OP2_207_80655_20131001_101140_inLine +BABEL_OP2_207_80655_20131001_101140_outLine +BABEL_OP2_207_80721_20130910_121013_inLine +BABEL_OP2_207_80721_20130910_121013_outLine +BABEL_OP2_207_81392_20130905_165515_inLine +BABEL_OP2_207_81392_20130905_165515_outLine +BABEL_OP2_207_83366_20130824_150458_inLine +BABEL_OP2_207_83366_20130824_150458_outLine +BABEL_OP2_207_83545_20131009_133016_inLine +BABEL_OP2_207_83545_20131009_133016_outLine +BABEL_OP2_207_89888_20130730_133532_inLine +BABEL_OP2_207_89888_20130730_133532_outLine +BABEL_OP2_207_90318_20131224_133452_inLine +BABEL_OP2_207_90318_20131224_133452_outLine +BABEL_OP2_207_90935_20130725_162432_inLine +BABEL_OP2_207_90935_20130725_162432_outLine +BABEL_OP2_207_92941_20130722_163301_inLine +BABEL_OP2_207_92941_20130722_163301_outLine +BABEL_OP2_207_95598_20130625_170733_inLine +BABEL_OP2_207_95598_20130625_170733_outLine +BABEL_OP2_207_95966_20130811_204100_inLine +BABEL_OP2_207_95966_20130811_204100_outLine +BABEL_OP2_207_96934_20130723_143258_inLine +BABEL_OP2_207_96934_20130723_143258_outLine +BABEL_OP2_207_96985_20130626_084229_inLine +BABEL_OP2_207_96985_20130626_084229_outLine +BABEL_OP2_207_97988_20130909_215057_inLine +BABEL_OP2_207_97988_20130909_215057_outLine +BABEL_OP2_207_98165_20130724_141743_inLine +BABEL_OP2_207_98165_20130724_141743_outLine +BABEL_OP2_207_98506_20130930_135511_inLine +BABEL_OP2_207_98506_20130930_135511_outLine +BABEL_OP2_207_98580_20130809_144219_inLine +BABEL_OP2_207_98580_20130809_144219_outLine +BABEL_OP2_207_98678_20131001_204525_inLine +BABEL_OP2_207_98678_20131001_204525_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/evalpart1.list b/egs/babel/s5d/conf/lists/207-tokpisin/evalpart1.list new file mode 100644 index 00000000000..042fde9446d --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/evalpart1.list @@ -0,0 +1,64 @@ +BABEL_OP2_207_10416_20130808_151430_inLine +BABEL_OP2_207_10416_20130808_151430_outLine +BABEL_OP2_207_15926_20130905_125437_inLine +BABEL_OP2_207_15926_20130905_125437_outLine +BABEL_OP2_207_19545_20130821_135751_inLine +BABEL_OP2_207_19545_20130821_135751_outLine +BABEL_OP2_207_24033_20130930_123827_inLine +BABEL_OP2_207_24033_20130930_123827_outLine +BABEL_OP2_207_28422_20130905_135311_inLine +BABEL_OP2_207_28422_20130905_135311_outLine +BABEL_OP2_207_30250_20130720_111643_inLine +BABEL_OP2_207_30250_20130720_111643_outLine +BABEL_OP2_207_31484_20130906_164627_inLine +BABEL_OP2_207_31484_20130906_164627_outLine +BABEL_OP2_207_34019_20131218_205039_inLine +BABEL_OP2_207_34019_20131218_205039_outLine +BABEL_OP2_207_42600_20130724_152811_inLine +BABEL_OP2_207_42600_20130724_152811_outLine +BABEL_OP2_207_42600_20130724_154332_inLine +BABEL_OP2_207_42600_20130724_154332_outLine +BABEL_OP2_207_44255_20130925_074247_inLine +BABEL_OP2_207_44255_20130925_074247_outLine +BABEL_OP2_207_44678_20131029_142212_inLine +BABEL_OP2_207_44678_20131029_142212_outLine +BABEL_OP2_207_48663_20130828_133856_inLine +BABEL_OP2_207_48663_20130828_133856_outLine +BABEL_OP2_207_49775_20130711_130307_inLine +BABEL_OP2_207_49775_20130711_130307_outLine +BABEL_OP2_207_50962_20130712_152844_inLine +BABEL_OP2_207_50962_20130712_152844_outLine +BABEL_OP2_207_52070_20131018_160716_inLine +BABEL_OP2_207_52070_20131018_160716_outLine +BABEL_OP2_207_55742_20130628_204255_inLine +BABEL_OP2_207_55742_20130628_204255_outLine +BABEL_OP2_207_57654_20130711_145355_inLine +BABEL_OP2_207_57654_20130711_145355_outLine +BABEL_OP2_207_57654_20130711_150856_inLine +BABEL_OP2_207_57654_20130711_150856_outLine +BABEL_OP2_207_58815_20130917_153637_inLine +BABEL_OP2_207_58815_20130917_153637_outLine +BABEL_OP2_207_59993_20130712_145207_inLine +BABEL_OP2_207_59993_20130712_145207_outLine +BABEL_OP2_207_60508_20130801_182520_inLine +BABEL_OP2_207_60508_20130801_182520_outLine +BABEL_OP2_207_67373_20130629_154522_inLine +BABEL_OP2_207_67373_20130629_154522_outLine +BABEL_OP2_207_71704_20130701_154358_inLine +BABEL_OP2_207_71704_20130701_154358_outLine +BABEL_OP2_207_74111_20130922_211430_inLine +BABEL_OP2_207_74111_20130922_211430_outLine +BABEL_OP2_207_78976_20130701_162332_inLine +BABEL_OP2_207_78976_20130701_162332_outLine +BABEL_OP2_207_80655_20131001_101140_inLine +BABEL_OP2_207_80655_20131001_101140_outLine +BABEL_OP2_207_90935_20130725_162432_inLine +BABEL_OP2_207_90935_20130725_162432_outLine +BABEL_OP2_207_92941_20130722_163301_inLine +BABEL_OP2_207_92941_20130722_163301_outLine +BABEL_OP2_207_95966_20130811_204100_inLine +BABEL_OP2_207_95966_20130811_204100_outLine +BABEL_OP2_207_98580_20130809_144219_inLine +BABEL_OP2_207_98580_20130809_144219_outLine +BABEL_OP2_207_98678_20131001_204525_inLine +BABEL_OP2_207_98678_20131001_204525_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.list b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.list new file mode 100644 index 00000000000..0f3cabb11e7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.list @@ -0,0 +1,126 @@ +BABEL_OP2_207_10058_20131001_123723_inLine +BABEL_OP2_207_10058_20131001_123723_outLine +BABEL_OP2_207_11681_20130701_131708_inLine +BABEL_OP2_207_11681_20130701_131708_outLine +BABEL_OP2_207_11723_20131029_192512_inLine +BABEL_OP2_207_11723_20131029_192512_outLine +BABEL_OP2_207_13178_20130828_124504_inLine +BABEL_OP2_207_13178_20130828_124504_outLine +BABEL_OP2_207_13324_20130628_205651_inLine +BABEL_OP2_207_13324_20130628_205651_outLine +BABEL_OP2_207_13490_20130811_183642_inLine +BABEL_OP2_207_13490_20130811_183642_outLine +BABEL_OP2_207_13792_20130725_131748_inLine +BABEL_OP2_207_13792_20130725_131748_outLine +BABEL_OP2_207_14137_20130702_122633_inLine +BABEL_OP2_207_14137_20130702_122633_outLine +BABEL_OP2_207_16839_20130923_202105_inLine +BABEL_OP2_207_16839_20130923_202105_outLine +BABEL_OP2_207_17032_20130906_140931_inLine +BABEL_OP2_207_17032_20130906_140931_outLine +BABEL_OP2_207_17420_20130925_143517_inLine +BABEL_OP2_207_17420_20130925_143517_outLine +BABEL_OP2_207_17440_20130911_132642_inLine +BABEL_OP2_207_17440_20130911_132642_outLine +BABEL_OP2_207_22021_20131220_151707_inLine +BABEL_OP2_207_22021_20131220_151707_outLine +BABEL_OP2_207_26999_20130903_135935_inLine +BABEL_OP2_207_26999_20130903_135935_outLine +BABEL_OP2_207_28945_20130719_160541_inLine +BABEL_OP2_207_28945_20130719_160541_outLine +BABEL_OP2_207_29023_20130702_110704_inLine +BABEL_OP2_207_29023_20130702_110704_outLine +BABEL_OP2_207_29168_20130624_215131_inLine +BABEL_OP2_207_29168_20130624_215131_outLine +BABEL_OP2_207_30576_20131003_141444_inLine +BABEL_OP2_207_30576_20131003_141444_outLine +BABEL_OP2_207_31490_20130626_143343_inLine +BABEL_OP2_207_31490_20130626_143343_outLine +BABEL_OP2_207_31624_20130722_163153_inLine +BABEL_OP2_207_31624_20130722_163153_outLine +BABEL_OP2_207_32727_20130910_153130_inLine +BABEL_OP2_207_32727_20130910_153130_outLine +BABEL_OP2_207_33355_20130626_141603_inLine +BABEL_OP2_207_33355_20130626_141603_outLine +BABEL_OP2_207_34197_20130625_162431_inLine +BABEL_OP2_207_34197_20130625_162431_outLine +BABEL_OP2_207_42497_20130628_234333_inLine +BABEL_OP2_207_42497_20130628_234333_outLine +BABEL_OP2_207_42834_20130828_121531_inLine +BABEL_OP2_207_42834_20130828_121531_outLine +BABEL_OP2_207_44029_20131224_183902_inLine +BABEL_OP2_207_44029_20131224_183902_outLine +BABEL_OP2_207_44619_20130720_150103_inLine +BABEL_OP2_207_44619_20130720_150103_outLine +BABEL_OP2_207_48610_20130627_142410_inLine +BABEL_OP2_207_48610_20130627_142410_outLine +BABEL_OP2_207_50175_20130627_131732_inLine +BABEL_OP2_207_50175_20130627_131732_outLine +BABEL_OP2_207_50565_20130625_145121_inLine +BABEL_OP2_207_50565_20130625_145121_outLine +BABEL_OP2_207_52804_20130729_144756_inLine +BABEL_OP2_207_52804_20130729_144756_outLine +BABEL_OP2_207_53917_20130926_150707_inLine +BABEL_OP2_207_53917_20130926_150707_outLine +BABEL_OP2_207_54953_20130725_154539_inLine +BABEL_OP2_207_54953_20130725_154539_outLine +BABEL_OP2_207_56198_20130702_120906_inLine +BABEL_OP2_207_56198_20130702_120906_outLine +BABEL_OP2_207_60661_20130719_154858_inLine +BABEL_OP2_207_60661_20130719_154858_outLine +BABEL_OP2_207_60661_20130719_160027_inLine +BABEL_OP2_207_60661_20130719_160027_outLine +BABEL_OP2_207_62289_20130828_152328_inLine +BABEL_OP2_207_62289_20130828_152328_outLine +BABEL_OP2_207_62800_20130625_222225_inLine +BABEL_OP2_207_62800_20130625_222225_outLine +BABEL_OP2_207_64768_20130722_132745_inLine +BABEL_OP2_207_64768_20130722_132745_outLine +BABEL_OP2_207_69574_20130624_154052_inLine +BABEL_OP2_207_69574_20130624_154052_outLine +BABEL_OP2_207_69574_20130624_162442_inLine +BABEL_OP2_207_69574_20130624_162442_outLine +BABEL_OP2_207_70216_20131212_112351_inLine +BABEL_OP2_207_70216_20131212_112351_outLine +BABEL_OP2_207_70716_20131005_160013_inLine +BABEL_OP2_207_70716_20131005_160013_outLine +BABEL_OP2_207_71038_20130831_112716_inLine +BABEL_OP2_207_71038_20130831_112716_outLine +BABEL_OP2_207_71121_20131212_125525_inLine +BABEL_OP2_207_71121_20131212_125525_outLine +BABEL_OP2_207_74280_20130623_173429_inLine +BABEL_OP2_207_74280_20130623_173429_outLine +BABEL_OP2_207_77744_20130720_130633_inLine +BABEL_OP2_207_77744_20130720_130633_outLine +BABEL_OP2_207_78194_20130622_152343_inLine +BABEL_OP2_207_78194_20130622_152343_outLine +BABEL_OP2_207_78604_20130629_143534_inLine +BABEL_OP2_207_78604_20130629_143534_outLine +BABEL_OP2_207_78943_20130701_150832_inLine +BABEL_OP2_207_78943_20130701_150832_outLine +BABEL_OP2_207_86467_20130621_164129_inLine +BABEL_OP2_207_86467_20130621_164129_outLine +BABEL_OP2_207_86826_20131010_131452_inLine +BABEL_OP2_207_86826_20131010_131452_outLine +BABEL_OP2_207_87074_20130702_114658_inLine +BABEL_OP2_207_87074_20130702_114658_outLine +BABEL_OP2_207_87298_20130722_163007_inLine +BABEL_OP2_207_87298_20130722_163007_outLine +BABEL_OP2_207_87298_20130722_164947_inLine +BABEL_OP2_207_87298_20130722_164947_outLine +BABEL_OP2_207_89650_20131220_191027_inLine +BABEL_OP2_207_89650_20131220_191027_outLine +BABEL_OP2_207_95269_20130725_140512_inLine +BABEL_OP2_207_95269_20130725_140512_outLine +BABEL_OP2_207_97588_20130720_172415_inLine +BABEL_OP2_207_97588_20130720_172415_outLine +BABEL_OP2_207_97731_20130920_141703_inLine +BABEL_OP2_207_97731_20130920_141703_outLine +BABEL_OP2_207_97836_20130930_145119_inLine +BABEL_OP2_207_97836_20130930_145119_outLine +BABEL_OP2_207_97896_20130807_165056_inLine +BABEL_OP2_207_97896_20130807_165056_outLine +BABEL_OP2_207_97911_20131017_134323_inLine +BABEL_OP2_207_97911_20131017_134323_outLine +BABEL_OP2_207_98489_20130712_001025_inLine +BABEL_OP2_207_98489_20130712_001025_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.untranscribed.list new file mode 100644 index 00000000000..7fa52da3207 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP2_207_10036_20130724_130953_inLine +BABEL_OP2_207_10036_20130724_130953_outLine +BABEL_OP2_207_10638_20131023_161558_inLine +BABEL_OP2_207_10638_20131023_161558_outLine +BABEL_OP2_207_10647_20130930_130411_inLine +BABEL_OP2_207_10647_20130930_130411_outLine +BABEL_OP2_207_10938_20130723_154630_inLine +BABEL_OP2_207_10938_20130723_154630_outLine +BABEL_OP2_207_12036_20130628_172018_inLine +BABEL_OP2_207_12036_20130628_172018_outLine +BABEL_OP2_207_12242_20130720_122145_inLine +BABEL_OP2_207_12242_20130720_122145_outLine +BABEL_OP2_207_12851_20130624_231520_inLine +BABEL_OP2_207_12851_20130624_231520_outLine +BABEL_OP2_207_13483_20130914_124412_inLine +BABEL_OP2_207_13483_20130914_124412_outLine +BABEL_OP2_207_13664_20130624_131414_inLine +BABEL_OP2_207_13664_20130624_131414_outLine +BABEL_OP2_207_13709_20130925_114224_inLine +BABEL_OP2_207_13709_20130925_114224_outLine +BABEL_OP2_207_13776_20131010_175808_inLine +BABEL_OP2_207_13776_20131010_175808_outLine +BABEL_OP2_207_14179_20130905_113236_inLine +BABEL_OP2_207_14179_20130905_113236_outLine +BABEL_OP2_207_14972_20130821_111242_inLine +BABEL_OP2_207_14972_20130821_111242_outLine +BABEL_OP2_207_15024_20130820_131419_inLine +BABEL_OP2_207_15024_20130820_131419_outLine +BABEL_OP2_207_15382_20130827_130728_inLine +BABEL_OP2_207_15382_20130827_130728_outLine +BABEL_OP2_207_15730_20130627_154012_inLine +BABEL_OP2_207_15730_20130627_154012_outLine +BABEL_OP2_207_16149_20130720_115211_inLine +BABEL_OP2_207_16149_20130720_115211_outLine +BABEL_OP2_207_16749_20130830_154859_inLine +BABEL_OP2_207_16749_20130830_154859_outLine +BABEL_OP2_207_17472_20130910_165052_inLine +BABEL_OP2_207_17472_20130910_165052_outLine +BABEL_OP2_207_17496_20130827_154835_inLine +BABEL_OP2_207_17496_20130827_154835_outLine +BABEL_OP2_207_17520_20130820_160316_inLine +BABEL_OP2_207_17520_20130820_160316_outLine +BABEL_OP2_207_17615_20130903_123606_inLine +BABEL_OP2_207_17615_20130903_123606_outLine +BABEL_OP2_207_18078_20130920_135919_inLine +BABEL_OP2_207_18078_20130920_135919_outLine +BABEL_OP2_207_18297_20130828_161347_inLine +BABEL_OP2_207_18297_20130828_161347_outLine +BABEL_OP2_207_18370_20131205_182514_inLine +BABEL_OP2_207_18370_20131205_182514_outLine +BABEL_OP2_207_19134_20130822_145954_inLine +BABEL_OP2_207_19134_20130822_145954_outLine +BABEL_OP2_207_19703_20130720_154219_inLine +BABEL_OP2_207_19703_20130720_154219_outLine +BABEL_OP2_207_19818_20130826_134257_inLine +BABEL_OP2_207_19818_20130826_134257_outLine +BABEL_OP2_207_19877_20130912_151401_inLine +BABEL_OP2_207_19877_20130912_151401_outLine +BABEL_OP2_207_20437_20131030_165858_inLine +BABEL_OP2_207_20437_20131030_165858_outLine +BABEL_OP2_207_20985_20130905_145111_inLine +BABEL_OP2_207_20985_20130905_145111_outLine +BABEL_OP2_207_21004_20130909_140247_inLine +BABEL_OP2_207_21004_20130909_140247_outLine +BABEL_OP2_207_21004_20130909_141426_inLine +BABEL_OP2_207_21004_20130909_141426_outLine +BABEL_OP2_207_21206_20130630_201617_inLine +BABEL_OP2_207_21206_20130630_201617_outLine +BABEL_OP2_207_21327_20130912_132010_inLine +BABEL_OP2_207_21327_20130912_132010_outLine +BABEL_OP2_207_22446_20130725_155758_inLine +BABEL_OP2_207_22446_20130725_155758_outLine +BABEL_OP2_207_23006_20130722_133014_inLine +BABEL_OP2_207_23006_20130722_133014_outLine +BABEL_OP2_207_23046_20130729_122607_inLine +BABEL_OP2_207_23046_20130729_122607_outLine +BABEL_OP2_207_23092_20130911_151410_inLine +BABEL_OP2_207_23092_20130911_151410_outLine +BABEL_OP2_207_24532_20130626_162254_inLine +BABEL_OP2_207_24532_20130626_162254_outLine +BABEL_OP2_207_24586_20130930_115553_inLine +BABEL_OP2_207_24586_20130930_115553_outLine +BABEL_OP2_207_24590_20130807_162732_inLine +BABEL_OP2_207_24590_20130807_162732_outLine +BABEL_OP2_207_24679_20130625_144735_inLine +BABEL_OP2_207_24679_20130625_144735_outLine +BABEL_OP2_207_24982_20130729_152422_inLine +BABEL_OP2_207_24982_20130729_152422_outLine +BABEL_OP2_207_25767_20130628_220921_inLine +BABEL_OP2_207_25767_20130628_220921_outLine +BABEL_OP2_207_26388_20130722_152932_inLine +BABEL_OP2_207_26388_20130722_152932_outLine +BABEL_OP2_207_27590_20130912_155435_inLine +BABEL_OP2_207_27590_20130912_155435_outLine +BABEL_OP2_207_28012_20130920_162354_inLine +BABEL_OP2_207_28012_20130920_162354_outLine +BABEL_OP2_207_28303_20130731_132124_inLine +BABEL_OP2_207_28303_20130731_132124_outLine +BABEL_OP2_207_28522_20130906_172331_inLine +BABEL_OP2_207_28522_20130906_172331_outLine +BABEL_OP2_207_28595_20131022_154118_inLine +BABEL_OP2_207_28595_20131022_154118_outLine +BABEL_OP2_207_29404_20130930_154214_inLine +BABEL_OP2_207_29404_20130930_154214_outLine +BABEL_OP2_207_29633_20131001_114745_inLine +BABEL_OP2_207_29633_20131001_114745_outLine +BABEL_OP2_207_30058_20130927_094530_inLine +BABEL_OP2_207_30058_20130927_094530_outLine +BABEL_OP2_207_30180_20130725_150836_inLine +BABEL_OP2_207_30180_20130725_150836_outLine +BABEL_OP2_207_30180_20130725_152116_inLine +BABEL_OP2_207_30180_20130725_152116_outLine +BABEL_OP2_207_30395_20130701_130920_inLine +BABEL_OP2_207_30395_20130701_130920_outLine +BABEL_OP2_207_31039_20131219_232002_inLine +BABEL_OP2_207_31039_20131219_232002_outLine +BABEL_OP2_207_31074_20131206_183901_inLine +BABEL_OP2_207_31074_20131206_183901_outLine +BABEL_OP2_207_32122_20130725_140342_inLine +BABEL_OP2_207_32122_20130725_140342_outLine +BABEL_OP2_207_33951_20130812_152815_inLine +BABEL_OP2_207_33951_20130812_152815_outLine +BABEL_OP2_207_34486_20131009_154321_inLine +BABEL_OP2_207_34486_20131009_154321_outLine +BABEL_OP2_207_34679_20130722_131020_inLine +BABEL_OP2_207_34679_20130722_131020_outLine +BABEL_OP2_207_34860_20131031_170619_inLine +BABEL_OP2_207_34860_20131031_170619_outLine +BABEL_OP2_207_35008_20130909_114545_inLine +BABEL_OP2_207_35008_20130909_114545_outLine +BABEL_OP2_207_35139_20130701_113506_inLine +BABEL_OP2_207_35139_20130701_113506_outLine +BABEL_OP2_207_35467_20130627_092105_inLine +BABEL_OP2_207_35467_20130627_092105_outLine +BABEL_OP2_207_35467_20130627_093134_inLine +BABEL_OP2_207_35467_20130627_093134_outLine +BABEL_OP2_207_36293_20130722_173251_inLine +BABEL_OP2_207_36293_20130722_173251_outLine +BABEL_OP2_207_36642_20131007_171446_inLine +BABEL_OP2_207_36642_20131007_171446_outLine +BABEL_OP2_207_37285_20130906_152635_inLine +BABEL_OP2_207_37285_20130906_152635_outLine +BABEL_OP2_207_38741_20130702_112110_inLine +BABEL_OP2_207_38741_20130702_112110_outLine +BABEL_OP2_207_39307_20130625_162418_inLine +BABEL_OP2_207_39307_20130625_162418_outLine +BABEL_OP2_207_41542_20130925_125258_inLine +BABEL_OP2_207_41542_20130925_125258_outLine +BABEL_OP2_207_41680_20130621_172501_inLine +BABEL_OP2_207_41680_20130621_172501_outLine +BABEL_OP2_207_41720_20131031_110123_inLine +BABEL_OP2_207_41720_20131031_110123_outLine +BABEL_OP2_207_43794_20131010_152749_inLine +BABEL_OP2_207_43794_20131010_152749_outLine +BABEL_OP2_207_46268_20130626_132448_inLine +BABEL_OP2_207_46268_20130626_132448_outLine +BABEL_OP2_207_46550_20130720_181026_inLine +BABEL_OP2_207_46550_20130720_181026_outLine +BABEL_OP2_207_46558_20130622_140751_inLine +BABEL_OP2_207_46558_20130622_140751_outLine +BABEL_OP2_207_46589_20130904_135639_inLine +BABEL_OP2_207_46589_20130904_135639_outLine +BABEL_OP2_207_46681_20130702_082940_inLine +BABEL_OP2_207_46681_20130702_082940_outLine +BABEL_OP2_207_47283_20130719_175044_inLine +BABEL_OP2_207_47283_20130719_175044_outLine +BABEL_OP2_207_47451_20130909_142242_inLine +BABEL_OP2_207_47451_20130909_142242_outLine +BABEL_OP2_207_47637_20131212_210756_inLine +BABEL_OP2_207_47637_20131212_210756_outLine +BABEL_OP2_207_48844_20130712_140038_inLine +BABEL_OP2_207_48844_20130712_140038_outLine +BABEL_OP2_207_49768_20130722_145407_inLine +BABEL_OP2_207_49768_20130722_145407_outLine +BABEL_OP2_207_50427_20130820_120507_inLine +BABEL_OP2_207_50427_20130820_120507_outLine +BABEL_OP2_207_51185_20131025_171803_inLine +BABEL_OP2_207_51185_20131025_171803_outLine +BABEL_OP2_207_51955_20130702_113003_inLine +BABEL_OP2_207_51955_20130702_113003_outLine +BABEL_OP2_207_51955_20130702_113703_inLine +BABEL_OP2_207_51955_20130702_113703_outLine +BABEL_OP2_207_52272_20130729_145134_inLine +BABEL_OP2_207_52272_20130729_145134_outLine +BABEL_OP2_207_52322_20131022_130920_inLine +BABEL_OP2_207_52322_20131022_130920_outLine +BABEL_OP2_207_52404_20130903_132311_inLine +BABEL_OP2_207_52404_20130903_132311_outLine +BABEL_OP2_207_52490_20130731_141151_inLine +BABEL_OP2_207_52490_20130731_141151_outLine +BABEL_OP2_207_52499_20131224_143602_inLine +BABEL_OP2_207_52499_20131224_143602_outLine +BABEL_OP2_207_52932_20130712_142557_inLine +BABEL_OP2_207_52932_20130712_142557_outLine +BABEL_OP2_207_52932_20130712_143902_inLine +BABEL_OP2_207_52932_20130712_143902_outLine +BABEL_OP2_207_53063_20130915_191541_inLine +BABEL_OP2_207_53063_20130915_191541_outLine +BABEL_OP2_207_53957_20130914_133951_inLine +BABEL_OP2_207_53957_20130914_133951_outLine +BABEL_OP2_207_54390_20130720_163619_inLine +BABEL_OP2_207_54390_20130720_163619_outLine +BABEL_OP2_207_54530_20130914_111523_inLine +BABEL_OP2_207_54530_20130914_111523_outLine +BABEL_OP2_207_55902_20131026_192303_inLine +BABEL_OP2_207_55902_20131026_192303_outLine +BABEL_OP2_207_56326_20131105_180513_inLine +BABEL_OP2_207_56326_20131105_180513_outLine +BABEL_OP2_207_58006_20131001_163445_inLine +BABEL_OP2_207_58006_20131001_163445_outLine +BABEL_OP2_207_58926_20130720_155800_inLine +BABEL_OP2_207_58926_20130720_155800_outLine +BABEL_OP2_207_58926_20130720_162011_inLine +BABEL_OP2_207_58926_20130720_162011_outLine +BABEL_OP2_207_59720_20130723_144903_inLine +BABEL_OP2_207_59720_20130723_144903_outLine +BABEL_OP2_207_60115_20130905_120839_inLine +BABEL_OP2_207_60115_20130905_120839_outLine +BABEL_OP2_207_60474_20130724_150210_inLine +BABEL_OP2_207_60474_20130724_150210_outLine +BABEL_OP2_207_62734_20130724_141406_inLine +BABEL_OP2_207_62734_20130724_141406_outLine +BABEL_OP2_207_62810_20130628_195519_inLine +BABEL_OP2_207_62810_20130628_195519_outLine +BABEL_OP2_207_63787_20130628_150319_inLine +BABEL_OP2_207_63787_20130628_150319_outLine +BABEL_OP2_207_64065_20130711_144127_inLine +BABEL_OP2_207_64065_20130711_144127_outLine +BABEL_OP2_207_65723_20130628_225606_inLine +BABEL_OP2_207_65723_20130628_225606_outLine +BABEL_OP2_207_65882_20130711_131739_inLine +BABEL_OP2_207_65882_20130711_131739_outLine +BABEL_OP2_207_66001_20130627_130307_inLine +BABEL_OP2_207_66001_20130627_130307_outLine +BABEL_OP2_207_66916_20130625_141125_inLine +BABEL_OP2_207_66916_20130625_141125_outLine +BABEL_OP2_207_67283_20130626_165836_inLine +BABEL_OP2_207_67283_20130626_165836_outLine +BABEL_OP2_207_67659_20130730_103326_inLine +BABEL_OP2_207_67659_20130730_103326_outLine +BABEL_OP2_207_67659_20130730_104313_inLine +BABEL_OP2_207_67659_20130730_104313_outLine +BABEL_OP2_207_67726_20131212_115926_inLine +BABEL_OP2_207_67726_20131212_115926_outLine +BABEL_OP2_207_68924_20130824_111816_inLine +BABEL_OP2_207_68924_20130824_111816_outLine +BABEL_OP2_207_69636_20130903_113702_inLine +BABEL_OP2_207_69636_20130903_113702_outLine +BABEL_OP2_207_69992_20130628_145720_inLine +BABEL_OP2_207_69992_20130628_145720_outLine +BABEL_OP2_207_69992_20130628_151110_inLine +BABEL_OP2_207_69992_20130628_151110_outLine +BABEL_OP2_207_70452_20130719_143347_inLine +BABEL_OP2_207_70452_20130719_143347_outLine +BABEL_OP2_207_70794_20130622_150717_inLine +BABEL_OP2_207_70794_20130622_150717_outLine +BABEL_OP2_207_71404_20130712_141658_inLine +BABEL_OP2_207_71404_20130712_141658_outLine +BABEL_OP2_207_72587_20130826_152730_inLine +BABEL_OP2_207_72587_20130826_152730_outLine +BABEL_OP2_207_73022_20130924_132328_inLine +BABEL_OP2_207_73022_20130924_132328_outLine +BABEL_OP2_207_73591_20130625_194125_inLine +BABEL_OP2_207_73591_20130625_194125_outLine +BABEL_OP2_207_73814_20130822_124306_inLine +BABEL_OP2_207_73814_20130822_124306_outLine +BABEL_OP2_207_73990_20131029_162659_inLine +BABEL_OP2_207_73990_20131029_162659_outLine +BABEL_OP2_207_74667_20130808_161304_inLine +BABEL_OP2_207_74667_20130808_161304_outLine +BABEL_OP2_207_75064_20130720_134326_inLine +BABEL_OP2_207_75064_20130720_134326_outLine +BABEL_OP2_207_75505_20130627_155926_inLine +BABEL_OP2_207_75505_20130627_155926_outLine +BABEL_OP2_207_77146_20130625_205452_inLine +BABEL_OP2_207_77146_20130625_205452_outLine +BABEL_OP2_207_77803_20130626_144156_inLine +BABEL_OP2_207_77803_20130626_144156_outLine +BABEL_OP2_207_77990_20130701_144426_inLine +BABEL_OP2_207_77990_20130701_144426_outLine +BABEL_OP2_207_78482_20130919_144242_inLine +BABEL_OP2_207_78482_20130919_144242_outLine +BABEL_OP2_207_79080_20130922_214849_inLine +BABEL_OP2_207_79080_20130922_214849_outLine +BABEL_OP2_207_79367_20130626_150601_inLine +BABEL_OP2_207_79367_20130626_150601_outLine +BABEL_OP2_207_79451_20130712_135228_inLine +BABEL_OP2_207_79451_20130712_135228_outLine +BABEL_OP2_207_80439_20130722_161436_inLine +BABEL_OP2_207_80439_20130722_161436_outLine +BABEL_OP2_207_80559_20130712_144234_inLine +BABEL_OP2_207_80559_20130712_144234_outLine +BABEL_OP2_207_81971_20130623_113232_inLine +BABEL_OP2_207_81971_20130623_113232_outLine +BABEL_OP2_207_82425_20130626_153351_inLine +BABEL_OP2_207_82425_20130626_153351_outLine +BABEL_OP2_207_84547_20130626_230549_inLine +BABEL_OP2_207_84547_20130626_230549_outLine +BABEL_OP2_207_84611_20130630_210848_inLine +BABEL_OP2_207_84611_20130630_210848_outLine +BABEL_OP2_207_84768_20130627_204526_inLine +BABEL_OP2_207_84768_20130627_204526_outLine +BABEL_OP2_207_84805_20130922_111910_inLine +BABEL_OP2_207_84805_20130922_111910_outLine +BABEL_OP2_207_85010_20131031_114820_inLine +BABEL_OP2_207_85010_20131031_114820_outLine +BABEL_OP2_207_85340_20130731_141136_inLine +BABEL_OP2_207_85340_20130731_141136_outLine +BABEL_OP2_207_86191_20130720_132952_inLine +BABEL_OP2_207_86191_20130720_132952_outLine +BABEL_OP2_207_86628_20131011_145244_inLine +BABEL_OP2_207_86628_20131011_145244_outLine +BABEL_OP2_207_86713_20130924_095726_inLine +BABEL_OP2_207_86713_20130924_095726_outLine +BABEL_OP2_207_86722_20130723_173932_inLine +BABEL_OP2_207_86722_20130723_173932_outLine +BABEL_OP2_207_87489_20130925_122043_inLine +BABEL_OP2_207_87489_20130925_122043_outLine +BABEL_OP2_207_87777_20130827_113252_inLine +BABEL_OP2_207_87777_20130827_113252_outLine +BABEL_OP2_207_87884_20130911_154713_inLine +BABEL_OP2_207_87884_20130911_154713_outLine +BABEL_OP2_207_87921_20130909_222741_inLine +BABEL_OP2_207_87921_20130909_222741_outLine +BABEL_OP2_207_88776_20130628_223035_inLine +BABEL_OP2_207_88776_20130628_223035_outLine +BABEL_OP2_207_89059_20130830_150700_inLine +BABEL_OP2_207_89059_20130830_150700_outLine +BABEL_OP2_207_89877_20130822_133155_inLine +BABEL_OP2_207_89877_20130822_133155_outLine +BABEL_OP2_207_90572_20130927_112514_inLine +BABEL_OP2_207_90572_20130927_112514_outLine +BABEL_OP2_207_91125_20130622_154739_inLine +BABEL_OP2_207_91125_20130622_154739_outLine +BABEL_OP2_207_91383_20131017_164250_inLine +BABEL_OP2_207_91383_20131017_164250_outLine +BABEL_OP2_207_91760_20131008_175549_inLine +BABEL_OP2_207_91760_20131008_175549_outLine +BABEL_OP2_207_91888_20131002_140054_inLine +BABEL_OP2_207_91888_20131002_140054_outLine +BABEL_OP2_207_92736_20130913_142730_inLine +BABEL_OP2_207_92736_20130913_142730_outLine +BABEL_OP2_207_93475_20130712_141154_inLine +BABEL_OP2_207_93475_20130712_141154_outLine +BABEL_OP2_207_94262_20130912_223931_inLine +BABEL_OP2_207_94262_20130912_223931_outLine +BABEL_OP2_207_94869_20130627_162540_inLine +BABEL_OP2_207_94869_20130627_162540_outLine +BABEL_OP2_207_95077_20130910_113448_inLine +BABEL_OP2_207_95077_20130910_113448_outLine +BABEL_OP2_207_95231_20131029_145824_inLine +BABEL_OP2_207_95231_20131029_145824_outLine +BABEL_OP2_207_95663_20130626_085943_inLine +BABEL_OP2_207_95663_20130626_085943_outLine +BABEL_OP2_207_96190_20130730_105000_inLine +BABEL_OP2_207_96190_20130730_105000_outLine +BABEL_OP2_207_96525_20130919_151001_inLine +BABEL_OP2_207_96525_20130919_151001_outLine +BABEL_OP2_207_96690_20130808_133431_inLine +BABEL_OP2_207_96690_20130808_133431_outLine +BABEL_OP2_207_96808_20131007_222455_inLine +BABEL_OP2_207_96808_20131007_222455_outLine +BABEL_OP2_207_96820_20130815_171850_inLine +BABEL_OP2_207_96820_20130815_171850_outLine +BABEL_OP2_207_96820_20130815_172511_inLine +BABEL_OP2_207_96820_20130815_172511_outLine +BABEL_OP2_207_96910_20130723_132125_inLine +BABEL_OP2_207_96910_20130723_132125_outLine +BABEL_OP2_207_97220_20131015_210228_inLine +BABEL_OP2_207_97220_20131015_210228_outLine +BABEL_OP2_207_97557_20130824_125158_inLine +BABEL_OP2_207_97557_20130824_125158_outLine +BABEL_OP2_207_98390_20130630_121753_inLine +BABEL_OP2_207_98390_20130630_121753_outLine +BABEL_OP2_207_98565_20131220_143328_inLine +BABEL_OP2_207_98565_20131220_143328_outLine +BABEL_OP2_207_99289_20130930_212352_inLine +BABEL_OP2_207_99289_20130930_212352_outLine +BABEL_OP2_207_99998_20130730_104201_inLine +BABEL_OP2_207_99998_20130730_104201_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/training.list b/egs/babel/s5d/conf/lists/207-tokpisin/training.list new file mode 100644 index 00000000000..265ad40a321 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/training.list @@ -0,0 +1,506 @@ +BABEL_OP2_207_10036_20130724_130953_inLine +BABEL_OP2_207_10036_20130724_130953_outLine +BABEL_OP2_207_10058_20131001_123723_inLine +BABEL_OP2_207_10058_20131001_123723_outLine +BABEL_OP2_207_10638_20131023_161558_inLine +BABEL_OP2_207_10638_20131023_161558_outLine +BABEL_OP2_207_10647_20130930_130411_inLine +BABEL_OP2_207_10647_20130930_130411_outLine +BABEL_OP2_207_10938_20130723_154630_inLine +BABEL_OP2_207_10938_20130723_154630_outLine +BABEL_OP2_207_11681_20130701_131708_inLine +BABEL_OP2_207_11681_20130701_131708_outLine +BABEL_OP2_207_11723_20131029_192512_inLine +BABEL_OP2_207_11723_20131029_192512_outLine +BABEL_OP2_207_12036_20130628_172018_inLine +BABEL_OP2_207_12036_20130628_172018_outLine +BABEL_OP2_207_12242_20130720_122145_inLine +BABEL_OP2_207_12242_20130720_122145_outLine +BABEL_OP2_207_12851_20130624_231520_inLine +BABEL_OP2_207_12851_20130624_231520_outLine +BABEL_OP2_207_13178_20130828_124504_inLine +BABEL_OP2_207_13178_20130828_124504_outLine +BABEL_OP2_207_13324_20130628_205651_inLine +BABEL_OP2_207_13324_20130628_205651_outLine +BABEL_OP2_207_13483_20130914_124412_inLine +BABEL_OP2_207_13483_20130914_124412_outLine +BABEL_OP2_207_13490_20130811_183642_inLine +BABEL_OP2_207_13490_20130811_183642_outLine +BABEL_OP2_207_13664_20130624_131414_inLine +BABEL_OP2_207_13664_20130624_131414_outLine +BABEL_OP2_207_13709_20130925_114224_inLine +BABEL_OP2_207_13709_20130925_114224_outLine +BABEL_OP2_207_13776_20131010_175808_inLine +BABEL_OP2_207_13776_20131010_175808_outLine +BABEL_OP2_207_13792_20130725_131748_inLine +BABEL_OP2_207_13792_20130725_131748_outLine +BABEL_OP2_207_14137_20130702_122633_inLine +BABEL_OP2_207_14137_20130702_122633_outLine +BABEL_OP2_207_14179_20130905_113236_inLine +BABEL_OP2_207_14179_20130905_113236_outLine +BABEL_OP2_207_14972_20130821_111242_inLine +BABEL_OP2_207_14972_20130821_111242_outLine +BABEL_OP2_207_15024_20130820_131419_inLine +BABEL_OP2_207_15024_20130820_131419_outLine +BABEL_OP2_207_15382_20130827_130728_inLine +BABEL_OP2_207_15382_20130827_130728_outLine +BABEL_OP2_207_15730_20130627_154012_inLine +BABEL_OP2_207_15730_20130627_154012_outLine +BABEL_OP2_207_16149_20130720_115211_inLine +BABEL_OP2_207_16149_20130720_115211_outLine +BABEL_OP2_207_16749_20130830_154859_inLine +BABEL_OP2_207_16749_20130830_154859_outLine +BABEL_OP2_207_16839_20130923_202105_inLine +BABEL_OP2_207_16839_20130923_202105_outLine +BABEL_OP2_207_17032_20130906_140931_inLine +BABEL_OP2_207_17032_20130906_140931_outLine +BABEL_OP2_207_17420_20130925_143517_inLine +BABEL_OP2_207_17420_20130925_143517_outLine +BABEL_OP2_207_17440_20130911_132642_inLine +BABEL_OP2_207_17440_20130911_132642_outLine +BABEL_OP2_207_17472_20130910_165052_inLine +BABEL_OP2_207_17472_20130910_165052_outLine +BABEL_OP2_207_17496_20130827_154835_inLine +BABEL_OP2_207_17496_20130827_154835_outLine +BABEL_OP2_207_17520_20130820_160316_inLine +BABEL_OP2_207_17520_20130820_160316_outLine +BABEL_OP2_207_17615_20130903_123606_inLine +BABEL_OP2_207_17615_20130903_123606_outLine +BABEL_OP2_207_18078_20130920_135919_inLine +BABEL_OP2_207_18078_20130920_135919_outLine +BABEL_OP2_207_18297_20130828_161347_inLine +BABEL_OP2_207_18297_20130828_161347_outLine +BABEL_OP2_207_18370_20131205_182514_inLine +BABEL_OP2_207_18370_20131205_182514_outLine +BABEL_OP2_207_19134_20130822_145954_inLine +BABEL_OP2_207_19134_20130822_145954_outLine +BABEL_OP2_207_19703_20130720_154219_inLine +BABEL_OP2_207_19703_20130720_154219_outLine +BABEL_OP2_207_19818_20130826_134257_inLine +BABEL_OP2_207_19818_20130826_134257_outLine +BABEL_OP2_207_19877_20130912_151401_inLine +BABEL_OP2_207_19877_20130912_151401_outLine +BABEL_OP2_207_20437_20131030_165858_inLine +BABEL_OP2_207_20437_20131030_165858_outLine +BABEL_OP2_207_20985_20130905_145111_inLine +BABEL_OP2_207_20985_20130905_145111_outLine +BABEL_OP2_207_21004_20130909_140247_inLine +BABEL_OP2_207_21004_20130909_140247_outLine +BABEL_OP2_207_21004_20130909_141426_inLine +BABEL_OP2_207_21004_20130909_141426_outLine +BABEL_OP2_207_21206_20130630_201617_inLine +BABEL_OP2_207_21206_20130630_201617_outLine +BABEL_OP2_207_21327_20130912_132010_inLine +BABEL_OP2_207_21327_20130912_132010_outLine +BABEL_OP2_207_22021_20131220_151707_inLine +BABEL_OP2_207_22021_20131220_151707_outLine +BABEL_OP2_207_22446_20130725_155758_inLine +BABEL_OP2_207_22446_20130725_155758_outLine +BABEL_OP2_207_23006_20130722_133014_inLine +BABEL_OP2_207_23006_20130722_133014_outLine +BABEL_OP2_207_23046_20130729_122607_inLine +BABEL_OP2_207_23046_20130729_122607_outLine +BABEL_OP2_207_23092_20130911_151410_inLine +BABEL_OP2_207_23092_20130911_151410_outLine +BABEL_OP2_207_24532_20130626_162254_inLine +BABEL_OP2_207_24532_20130626_162254_outLine +BABEL_OP2_207_24586_20130930_115553_inLine +BABEL_OP2_207_24586_20130930_115553_outLine +BABEL_OP2_207_24590_20130807_162732_inLine +BABEL_OP2_207_24590_20130807_162732_outLine +BABEL_OP2_207_24679_20130625_144735_inLine +BABEL_OP2_207_24679_20130625_144735_outLine +BABEL_OP2_207_24982_20130729_152422_inLine +BABEL_OP2_207_24982_20130729_152422_outLine +BABEL_OP2_207_25767_20130628_220921_inLine +BABEL_OP2_207_25767_20130628_220921_outLine +BABEL_OP2_207_26388_20130722_152932_inLine +BABEL_OP2_207_26388_20130722_152932_outLine +BABEL_OP2_207_26999_20130903_135935_inLine +BABEL_OP2_207_26999_20130903_135935_outLine +BABEL_OP2_207_27590_20130912_155435_inLine +BABEL_OP2_207_27590_20130912_155435_outLine +BABEL_OP2_207_28012_20130920_162354_inLine +BABEL_OP2_207_28012_20130920_162354_outLine +BABEL_OP2_207_28303_20130731_132124_inLine +BABEL_OP2_207_28303_20130731_132124_outLine +BABEL_OP2_207_28522_20130906_172331_inLine +BABEL_OP2_207_28522_20130906_172331_outLine +BABEL_OP2_207_28595_20131022_154118_inLine +BABEL_OP2_207_28595_20131022_154118_outLine +BABEL_OP2_207_28945_20130719_160541_inLine +BABEL_OP2_207_28945_20130719_160541_outLine +BABEL_OP2_207_29023_20130702_110704_inLine +BABEL_OP2_207_29023_20130702_110704_outLine +BABEL_OP2_207_29168_20130624_215131_inLine +BABEL_OP2_207_29168_20130624_215131_outLine +BABEL_OP2_207_29404_20130930_154214_inLine +BABEL_OP2_207_29404_20130930_154214_outLine +BABEL_OP2_207_29633_20131001_114745_inLine +BABEL_OP2_207_29633_20131001_114745_outLine +BABEL_OP2_207_30058_20130927_094530_inLine +BABEL_OP2_207_30058_20130927_094530_outLine +BABEL_OP2_207_30180_20130725_150836_inLine +BABEL_OP2_207_30180_20130725_150836_outLine +BABEL_OP2_207_30180_20130725_152116_inLine +BABEL_OP2_207_30180_20130725_152116_outLine +BABEL_OP2_207_30395_20130701_130920_inLine +BABEL_OP2_207_30395_20130701_130920_outLine +BABEL_OP2_207_30576_20131003_141444_inLine +BABEL_OP2_207_30576_20131003_141444_outLine +BABEL_OP2_207_31039_20131219_232002_inLine +BABEL_OP2_207_31039_20131219_232002_outLine +BABEL_OP2_207_31074_20131206_183901_inLine +BABEL_OP2_207_31074_20131206_183901_outLine +BABEL_OP2_207_31490_20130626_143343_inLine +BABEL_OP2_207_31490_20130626_143343_outLine +BABEL_OP2_207_31624_20130722_163153_inLine +BABEL_OP2_207_31624_20130722_163153_outLine +BABEL_OP2_207_32122_20130725_140342_inLine +BABEL_OP2_207_32122_20130725_140342_outLine +BABEL_OP2_207_32727_20130910_153130_inLine +BABEL_OP2_207_32727_20130910_153130_outLine +BABEL_OP2_207_33355_20130626_141603_inLine +BABEL_OP2_207_33355_20130626_141603_outLine +BABEL_OP2_207_33951_20130812_152815_inLine +BABEL_OP2_207_33951_20130812_152815_outLine +BABEL_OP2_207_34197_20130625_162431_inLine +BABEL_OP2_207_34197_20130625_162431_outLine +BABEL_OP2_207_34486_20131009_154321_inLine +BABEL_OP2_207_34486_20131009_154321_outLine +BABEL_OP2_207_34679_20130722_131020_inLine +BABEL_OP2_207_34679_20130722_131020_outLine +BABEL_OP2_207_34860_20131031_170619_inLine +BABEL_OP2_207_34860_20131031_170619_outLine +BABEL_OP2_207_35008_20130909_114545_inLine +BABEL_OP2_207_35008_20130909_114545_outLine +BABEL_OP2_207_35139_20130701_113506_inLine +BABEL_OP2_207_35139_20130701_113506_outLine +BABEL_OP2_207_35467_20130627_092105_inLine +BABEL_OP2_207_35467_20130627_092105_outLine +BABEL_OP2_207_35467_20130627_093134_inLine +BABEL_OP2_207_35467_20130627_093134_outLine +BABEL_OP2_207_36293_20130722_173251_inLine +BABEL_OP2_207_36293_20130722_173251_outLine +BABEL_OP2_207_36642_20131007_171446_inLine +BABEL_OP2_207_36642_20131007_171446_outLine +BABEL_OP2_207_37285_20130906_152635_inLine +BABEL_OP2_207_37285_20130906_152635_outLine +BABEL_OP2_207_38741_20130702_112110_inLine +BABEL_OP2_207_38741_20130702_112110_outLine +BABEL_OP2_207_39307_20130625_162418_inLine +BABEL_OP2_207_39307_20130625_162418_outLine +BABEL_OP2_207_41542_20130925_125258_inLine +BABEL_OP2_207_41542_20130925_125258_outLine +BABEL_OP2_207_41680_20130621_172501_inLine +BABEL_OP2_207_41680_20130621_172501_outLine +BABEL_OP2_207_41720_20131031_110123_inLine +BABEL_OP2_207_41720_20131031_110123_outLine +BABEL_OP2_207_42497_20130628_234333_inLine +BABEL_OP2_207_42497_20130628_234333_outLine +BABEL_OP2_207_42834_20130828_121531_inLine +BABEL_OP2_207_42834_20130828_121531_outLine +BABEL_OP2_207_43794_20131010_152749_inLine +BABEL_OP2_207_43794_20131010_152749_outLine +BABEL_OP2_207_44029_20131224_183902_inLine +BABEL_OP2_207_44029_20131224_183902_outLine +BABEL_OP2_207_44619_20130720_150103_inLine +BABEL_OP2_207_44619_20130720_150103_outLine +BABEL_OP2_207_46268_20130626_132448_inLine +BABEL_OP2_207_46268_20130626_132448_outLine +BABEL_OP2_207_46550_20130720_181026_inLine +BABEL_OP2_207_46550_20130720_181026_outLine +BABEL_OP2_207_46558_20130622_140751_inLine +BABEL_OP2_207_46558_20130622_140751_outLine +BABEL_OP2_207_46589_20130904_135639_inLine +BABEL_OP2_207_46589_20130904_135639_outLine +BABEL_OP2_207_46681_20130702_082940_inLine +BABEL_OP2_207_46681_20130702_082940_outLine +BABEL_OP2_207_47283_20130719_175044_inLine +BABEL_OP2_207_47283_20130719_175044_outLine +BABEL_OP2_207_47451_20130909_142242_inLine +BABEL_OP2_207_47451_20130909_142242_outLine +BABEL_OP2_207_47637_20131212_210756_inLine +BABEL_OP2_207_47637_20131212_210756_outLine +BABEL_OP2_207_48610_20130627_142410_inLine +BABEL_OP2_207_48610_20130627_142410_outLine +BABEL_OP2_207_48844_20130712_140038_inLine +BABEL_OP2_207_48844_20130712_140038_outLine +BABEL_OP2_207_49768_20130722_145407_inLine +BABEL_OP2_207_49768_20130722_145407_outLine +BABEL_OP2_207_50175_20130627_131732_inLine +BABEL_OP2_207_50175_20130627_131732_outLine +BABEL_OP2_207_50427_20130820_120507_inLine +BABEL_OP2_207_50427_20130820_120507_outLine +BABEL_OP2_207_50565_20130625_145121_inLine +BABEL_OP2_207_50565_20130625_145121_outLine +BABEL_OP2_207_51185_20131025_171803_inLine +BABEL_OP2_207_51185_20131025_171803_outLine +BABEL_OP2_207_51955_20130702_113003_inLine +BABEL_OP2_207_51955_20130702_113003_outLine +BABEL_OP2_207_51955_20130702_113703_inLine +BABEL_OP2_207_51955_20130702_113703_outLine +BABEL_OP2_207_52272_20130729_145134_inLine +BABEL_OP2_207_52272_20130729_145134_outLine +BABEL_OP2_207_52322_20131022_130920_inLine +BABEL_OP2_207_52322_20131022_130920_outLine +BABEL_OP2_207_52404_20130903_132311_inLine +BABEL_OP2_207_52404_20130903_132311_outLine +BABEL_OP2_207_52490_20130731_141151_inLine +BABEL_OP2_207_52490_20130731_141151_outLine +BABEL_OP2_207_52499_20131224_143602_inLine +BABEL_OP2_207_52499_20131224_143602_outLine +BABEL_OP2_207_52804_20130729_144756_inLine +BABEL_OP2_207_52804_20130729_144756_outLine +BABEL_OP2_207_52932_20130712_142557_inLine +BABEL_OP2_207_52932_20130712_142557_outLine +BABEL_OP2_207_52932_20130712_143902_inLine +BABEL_OP2_207_52932_20130712_143902_outLine +BABEL_OP2_207_53063_20130915_191541_inLine +BABEL_OP2_207_53063_20130915_191541_outLine +BABEL_OP2_207_53917_20130926_150707_inLine +BABEL_OP2_207_53917_20130926_150707_outLine +BABEL_OP2_207_53957_20130914_133951_inLine +BABEL_OP2_207_53957_20130914_133951_outLine +BABEL_OP2_207_54390_20130720_163619_inLine +BABEL_OP2_207_54390_20130720_163619_outLine +BABEL_OP2_207_54530_20130914_111523_inLine +BABEL_OP2_207_54530_20130914_111523_outLine +BABEL_OP2_207_54953_20130725_154539_inLine +BABEL_OP2_207_54953_20130725_154539_outLine +BABEL_OP2_207_55902_20131026_192303_inLine +BABEL_OP2_207_55902_20131026_192303_outLine +BABEL_OP2_207_56198_20130702_120906_inLine +BABEL_OP2_207_56198_20130702_120906_outLine +BABEL_OP2_207_56326_20131105_180513_inLine +BABEL_OP2_207_56326_20131105_180513_outLine +BABEL_OP2_207_58006_20131001_163445_inLine +BABEL_OP2_207_58006_20131001_163445_outLine +BABEL_OP2_207_58926_20130720_155800_inLine +BABEL_OP2_207_58926_20130720_155800_outLine +BABEL_OP2_207_58926_20130720_162011_inLine +BABEL_OP2_207_58926_20130720_162011_outLine +BABEL_OP2_207_59720_20130723_144903_inLine +BABEL_OP2_207_59720_20130723_144903_outLine +BABEL_OP2_207_60115_20130905_120839_inLine +BABEL_OP2_207_60115_20130905_120839_outLine +BABEL_OP2_207_60474_20130724_150210_inLine +BABEL_OP2_207_60474_20130724_150210_outLine +BABEL_OP2_207_60661_20130719_154858_inLine +BABEL_OP2_207_60661_20130719_154858_outLine +BABEL_OP2_207_60661_20130719_160027_inLine +BABEL_OP2_207_60661_20130719_160027_outLine +BABEL_OP2_207_62289_20130828_152328_inLine +BABEL_OP2_207_62289_20130828_152328_outLine +BABEL_OP2_207_62734_20130724_141406_inLine +BABEL_OP2_207_62734_20130724_141406_outLine +BABEL_OP2_207_62800_20130625_222225_inLine +BABEL_OP2_207_62800_20130625_222225_outLine +BABEL_OP2_207_62810_20130628_195519_inLine +BABEL_OP2_207_62810_20130628_195519_outLine +BABEL_OP2_207_63787_20130628_150319_inLine +BABEL_OP2_207_63787_20130628_150319_outLine +BABEL_OP2_207_64065_20130711_144127_inLine +BABEL_OP2_207_64065_20130711_144127_outLine +BABEL_OP2_207_64768_20130722_132745_inLine +BABEL_OP2_207_64768_20130722_132745_outLine +BABEL_OP2_207_65723_20130628_225606_inLine +BABEL_OP2_207_65723_20130628_225606_outLine +BABEL_OP2_207_65882_20130711_131739_inLine +BABEL_OP2_207_65882_20130711_131739_outLine +BABEL_OP2_207_66001_20130627_130307_inLine +BABEL_OP2_207_66001_20130627_130307_outLine +BABEL_OP2_207_66916_20130625_141125_inLine +BABEL_OP2_207_66916_20130625_141125_outLine +BABEL_OP2_207_67283_20130626_165836_inLine +BABEL_OP2_207_67283_20130626_165836_outLine +BABEL_OP2_207_67659_20130730_103326_inLine +BABEL_OP2_207_67659_20130730_103326_outLine +BABEL_OP2_207_67659_20130730_104313_inLine +BABEL_OP2_207_67659_20130730_104313_outLine +BABEL_OP2_207_67726_20131212_115926_inLine +BABEL_OP2_207_67726_20131212_115926_outLine +BABEL_OP2_207_68924_20130824_111816_inLine +BABEL_OP2_207_68924_20130824_111816_outLine +BABEL_OP2_207_69574_20130624_154052_inLine +BABEL_OP2_207_69574_20130624_154052_outLine +BABEL_OP2_207_69574_20130624_162442_inLine +BABEL_OP2_207_69574_20130624_162442_outLine +BABEL_OP2_207_69636_20130903_113702_inLine +BABEL_OP2_207_69636_20130903_113702_outLine +BABEL_OP2_207_69992_20130628_145720_inLine +BABEL_OP2_207_69992_20130628_145720_outLine +BABEL_OP2_207_69992_20130628_151110_inLine +BABEL_OP2_207_69992_20130628_151110_outLine +BABEL_OP2_207_70216_20131212_112351_inLine +BABEL_OP2_207_70216_20131212_112351_outLine +BABEL_OP2_207_70452_20130719_143347_inLine +BABEL_OP2_207_70452_20130719_143347_outLine +BABEL_OP2_207_70716_20131005_160013_inLine +BABEL_OP2_207_70716_20131005_160013_outLine +BABEL_OP2_207_70794_20130622_150717_inLine +BABEL_OP2_207_70794_20130622_150717_outLine +BABEL_OP2_207_71038_20130831_112716_inLine +BABEL_OP2_207_71038_20130831_112716_outLine +BABEL_OP2_207_71121_20131212_125525_inLine +BABEL_OP2_207_71121_20131212_125525_outLine +BABEL_OP2_207_71404_20130712_141658_inLine +BABEL_OP2_207_71404_20130712_141658_outLine +BABEL_OP2_207_72587_20130826_152730_inLine +BABEL_OP2_207_72587_20130826_152730_outLine +BABEL_OP2_207_73022_20130924_132328_inLine +BABEL_OP2_207_73022_20130924_132328_outLine +BABEL_OP2_207_73591_20130625_194125_inLine +BABEL_OP2_207_73591_20130625_194125_outLine +BABEL_OP2_207_73814_20130822_124306_inLine +BABEL_OP2_207_73814_20130822_124306_outLine +BABEL_OP2_207_73990_20131029_162659_inLine +BABEL_OP2_207_73990_20131029_162659_outLine +BABEL_OP2_207_74280_20130623_173429_inLine +BABEL_OP2_207_74280_20130623_173429_outLine +BABEL_OP2_207_74667_20130808_161304_inLine +BABEL_OP2_207_74667_20130808_161304_outLine +BABEL_OP2_207_75064_20130720_134326_inLine +BABEL_OP2_207_75064_20130720_134326_outLine +BABEL_OP2_207_75505_20130627_155926_inLine +BABEL_OP2_207_75505_20130627_155926_outLine +BABEL_OP2_207_77146_20130625_205452_inLine +BABEL_OP2_207_77146_20130625_205452_outLine +BABEL_OP2_207_77744_20130720_130633_inLine +BABEL_OP2_207_77744_20130720_130633_outLine +BABEL_OP2_207_77803_20130626_144156_inLine +BABEL_OP2_207_77803_20130626_144156_outLine +BABEL_OP2_207_77990_20130701_144426_inLine +BABEL_OP2_207_77990_20130701_144426_outLine +BABEL_OP2_207_78194_20130622_152343_inLine +BABEL_OP2_207_78194_20130622_152343_outLine +BABEL_OP2_207_78482_20130919_144242_inLine +BABEL_OP2_207_78482_20130919_144242_outLine +BABEL_OP2_207_78604_20130629_143534_inLine +BABEL_OP2_207_78604_20130629_143534_outLine +BABEL_OP2_207_78943_20130701_150832_inLine +BABEL_OP2_207_78943_20130701_150832_outLine +BABEL_OP2_207_79080_20130922_214849_inLine +BABEL_OP2_207_79080_20130922_214849_outLine +BABEL_OP2_207_79367_20130626_150601_inLine +BABEL_OP2_207_79367_20130626_150601_outLine +BABEL_OP2_207_79451_20130712_135228_inLine +BABEL_OP2_207_79451_20130712_135228_outLine +BABEL_OP2_207_80439_20130722_161436_inLine +BABEL_OP2_207_80439_20130722_161436_outLine +BABEL_OP2_207_80559_20130712_144234_inLine +BABEL_OP2_207_80559_20130712_144234_outLine +BABEL_OP2_207_81971_20130623_113232_inLine +BABEL_OP2_207_81971_20130623_113232_outLine +BABEL_OP2_207_82425_20130626_153351_inLine +BABEL_OP2_207_82425_20130626_153351_outLine +BABEL_OP2_207_84547_20130626_230549_inLine +BABEL_OP2_207_84547_20130626_230549_outLine +BABEL_OP2_207_84611_20130630_210848_inLine +BABEL_OP2_207_84611_20130630_210848_outLine +BABEL_OP2_207_84768_20130627_204526_inLine +BABEL_OP2_207_84768_20130627_204526_outLine +BABEL_OP2_207_84805_20130922_111910_inLine +BABEL_OP2_207_84805_20130922_111910_outLine +BABEL_OP2_207_85010_20131031_114820_inLine +BABEL_OP2_207_85010_20131031_114820_outLine +BABEL_OP2_207_85340_20130731_141136_inLine +BABEL_OP2_207_85340_20130731_141136_outLine +BABEL_OP2_207_86191_20130720_132952_inLine +BABEL_OP2_207_86191_20130720_132952_outLine +BABEL_OP2_207_86467_20130621_164129_inLine +BABEL_OP2_207_86467_20130621_164129_outLine +BABEL_OP2_207_86628_20131011_145244_inLine +BABEL_OP2_207_86628_20131011_145244_outLine +BABEL_OP2_207_86713_20130924_095726_inLine +BABEL_OP2_207_86713_20130924_095726_outLine +BABEL_OP2_207_86722_20130723_173932_inLine +BABEL_OP2_207_86722_20130723_173932_outLine +BABEL_OP2_207_86826_20131010_131452_inLine +BABEL_OP2_207_86826_20131010_131452_outLine +BABEL_OP2_207_87074_20130702_114658_inLine +BABEL_OP2_207_87074_20130702_114658_outLine +BABEL_OP2_207_87298_20130722_163007_inLine +BABEL_OP2_207_87298_20130722_163007_outLine +BABEL_OP2_207_87298_20130722_164947_inLine +BABEL_OP2_207_87298_20130722_164947_outLine +BABEL_OP2_207_87489_20130925_122043_inLine +BABEL_OP2_207_87489_20130925_122043_outLine +BABEL_OP2_207_87777_20130827_113252_inLine +BABEL_OP2_207_87777_20130827_113252_outLine +BABEL_OP2_207_87884_20130911_154713_inLine +BABEL_OP2_207_87884_20130911_154713_outLine +BABEL_OP2_207_87921_20130909_222741_inLine +BABEL_OP2_207_87921_20130909_222741_outLine +BABEL_OP2_207_88776_20130628_223035_inLine +BABEL_OP2_207_88776_20130628_223035_outLine +BABEL_OP2_207_89059_20130830_150700_inLine +BABEL_OP2_207_89059_20130830_150700_outLine +BABEL_OP2_207_89650_20131220_191027_inLine +BABEL_OP2_207_89650_20131220_191027_outLine +BABEL_OP2_207_89877_20130822_133155_inLine +BABEL_OP2_207_89877_20130822_133155_outLine +BABEL_OP2_207_90572_20130927_112514_inLine +BABEL_OP2_207_90572_20130927_112514_outLine +BABEL_OP2_207_91125_20130622_154739_inLine +BABEL_OP2_207_91125_20130622_154739_outLine +BABEL_OP2_207_91383_20131017_164250_inLine +BABEL_OP2_207_91383_20131017_164250_outLine +BABEL_OP2_207_91760_20131008_175549_inLine +BABEL_OP2_207_91760_20131008_175549_outLine +BABEL_OP2_207_91888_20131002_140054_inLine +BABEL_OP2_207_91888_20131002_140054_outLine +BABEL_OP2_207_92736_20130913_142730_inLine +BABEL_OP2_207_92736_20130913_142730_outLine +BABEL_OP2_207_93475_20130712_141154_inLine +BABEL_OP2_207_93475_20130712_141154_outLine +BABEL_OP2_207_94262_20130912_223931_inLine +BABEL_OP2_207_94262_20130912_223931_outLine +BABEL_OP2_207_94869_20130627_162540_inLine +BABEL_OP2_207_94869_20130627_162540_outLine +BABEL_OP2_207_95077_20130910_113448_inLine +BABEL_OP2_207_95077_20130910_113448_outLine +BABEL_OP2_207_95231_20131029_145824_inLine +BABEL_OP2_207_95231_20131029_145824_outLine +BABEL_OP2_207_95269_20130725_140512_inLine +BABEL_OP2_207_95269_20130725_140512_outLine +BABEL_OP2_207_95663_20130626_085943_inLine +BABEL_OP2_207_95663_20130626_085943_outLine +BABEL_OP2_207_96190_20130730_105000_inLine +BABEL_OP2_207_96190_20130730_105000_outLine +BABEL_OP2_207_96525_20130919_151001_inLine +BABEL_OP2_207_96525_20130919_151001_outLine +BABEL_OP2_207_96690_20130808_133431_inLine +BABEL_OP2_207_96690_20130808_133431_outLine +BABEL_OP2_207_96808_20131007_222455_inLine +BABEL_OP2_207_96808_20131007_222455_outLine +BABEL_OP2_207_96820_20130815_171850_inLine +BABEL_OP2_207_96820_20130815_171850_outLine +BABEL_OP2_207_96820_20130815_172511_inLine +BABEL_OP2_207_96820_20130815_172511_outLine +BABEL_OP2_207_96910_20130723_132125_inLine +BABEL_OP2_207_96910_20130723_132125_outLine +BABEL_OP2_207_97220_20131015_210228_inLine +BABEL_OP2_207_97220_20131015_210228_outLine +BABEL_OP2_207_97557_20130824_125158_inLine +BABEL_OP2_207_97557_20130824_125158_outLine +BABEL_OP2_207_97588_20130720_172415_inLine +BABEL_OP2_207_97588_20130720_172415_outLine +BABEL_OP2_207_97731_20130920_141703_inLine +BABEL_OP2_207_97731_20130920_141703_outLine +BABEL_OP2_207_97836_20130930_145119_inLine +BABEL_OP2_207_97836_20130930_145119_outLine +BABEL_OP2_207_97896_20130807_165056_inLine +BABEL_OP2_207_97896_20130807_165056_outLine +BABEL_OP2_207_97911_20131017_134323_inLine +BABEL_OP2_207_97911_20131017_134323_outLine +BABEL_OP2_207_98390_20130630_121753_inLine +BABEL_OP2_207_98390_20130630_121753_outLine +BABEL_OP2_207_98489_20130712_001025_inLine +BABEL_OP2_207_98489_20130712_001025_outLine +BABEL_OP2_207_98565_20131220_143328_inLine +BABEL_OP2_207_98565_20131220_143328_outLine +BABEL_OP2_207_99289_20130930_212352_inLine +BABEL_OP2_207_99289_20130930_212352_outLine +BABEL_OP2_207_99998_20130730_104201_inLine +BABEL_OP2_207_99998_20130730_104201_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/untranscribed-training.list b/egs/babel/s5d/conf/lists/207-tokpisin/untranscribed-training.list new file mode 100644 index 00000000000..bd95fc6c89a --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/untranscribed-training.list @@ -0,0 +1,539 @@ +BABEL_OP2_207_11096_20131010_155716_inLine +BABEL_OP2_207_11096_20131010_155716_outLine +BABEL_OP2_207_12635_20130926_134703_inLine +BABEL_OP2_207_12635_20130926_134703_outLine +BABEL_OP2_207_13189_20130924_113930_inLine +BABEL_OP2_207_13189_20130924_113930_outLine +BABEL_OP2_207_14097_20131211_145352_inLine +BABEL_OP2_207_14097_20131211_145352_outLine +BABEL_OP2_207_15324_20130824_114737_inLine +BABEL_OP2_207_15324_20130824_114737_outLine +BABEL_OP2_207_15324_20130824_115222_inLine +BABEL_OP2_207_15324_20130824_115222_outLine +BABEL_OP2_207_15324_20130824_120315_inLine +BABEL_OP2_207_15324_20130824_120315_outLine +BABEL_OP2_207_16787_20130807_141736_inLine +BABEL_OP2_207_16787_20130807_141736_outLine +BABEL_OP2_207_17165_20130811_161522_inLine +BABEL_OP2_207_17165_20130811_161522_outLine +BABEL_OP2_207_17582_20131014_140754_inLine +BABEL_OP2_207_17582_20131014_140754_outLine +BABEL_OP2_207_17881_20130927_103059_inLine +BABEL_OP2_207_17881_20130927_103059_outLine +BABEL_OP2_207_17890_20130905_165333_inLine +BABEL_OP2_207_17890_20130905_165333_outLine +BABEL_OP2_207_17914_20130926_134141_inLine +BABEL_OP2_207_17914_20130926_134141_outLine +BABEL_OP2_207_18118_20130912_105508_inLine +BABEL_OP2_207_18118_20130912_105508_outLine +BABEL_OP2_207_18380_20130811_091120_inLine +BABEL_OP2_207_18380_20130811_091120_outLine +BABEL_OP2_207_18766_20131007_145032_inLine +BABEL_OP2_207_18766_20131007_145032_outLine +BABEL_OP2_207_19120_20131001_112430_inLine +BABEL_OP2_207_19120_20131001_113821_inLine +BABEL_OP2_207_19130_20130915_130323_inLine +BABEL_OP2_207_19130_20130915_130323_outLine +BABEL_OP2_207_19130_20130915_170627_inLine +BABEL_OP2_207_19130_20130915_170627_outLine +BABEL_OP2_207_19444_20131027_115915_inLine +BABEL_OP2_207_19444_20131027_115915_outLine +BABEL_OP2_207_19621_20130820_123522_inLine +BABEL_OP2_207_19621_20130820_123522_outLine +BABEL_OP2_207_19663_20130808_130208_inLine +BABEL_OP2_207_19663_20130808_130208_outLine +BABEL_OP2_207_19832_20131023_131334_inLine +BABEL_OP2_207_19832_20131023_131334_outLine +BABEL_OP2_207_20738_20130925_150141_inLine +BABEL_OP2_207_20738_20130925_150141_outLine +BABEL_OP2_207_20768_20130918_153000_inLine +BABEL_OP2_207_20768_20130918_153000_outLine +BABEL_OP2_207_21393_20131009_171742_inLine +BABEL_OP2_207_21393_20131009_172913_inLine +BABEL_OP2_207_21435_20130930_113048_inLine +BABEL_OP2_207_21435_20130930_113048_outLine +BABEL_OP2_207_23355_20131028_195808_inLine +BABEL_OP2_207_23355_20131028_195808_outLine +BABEL_OP2_207_23395_20130819_172407_inLine +BABEL_OP2_207_23395_20130819_172407_outLine +BABEL_OP2_207_24017_20130920_143300_inLine +BABEL_OP2_207_24017_20130920_143300_outLine +BABEL_OP2_207_24231_20131004_142046_inLine +BABEL_OP2_207_24231_20131004_142046_outLine +BABEL_OP2_207_24241_20131022_163927_inLine +BABEL_OP2_207_24241_20131022_163927_outLine +BABEL_OP2_207_24587_20131028_181902_inLine +BABEL_OP2_207_24587_20131028_181902_outLine +BABEL_OP2_207_25068_20131128_155214_inLine +BABEL_OP2_207_25068_20131128_155214_outLine +BABEL_OP2_207_25198_20131009_144048_inLine +BABEL_OP2_207_25198_20131009_144048_outLine +BABEL_OP2_207_26206_20130905_123052_inLine +BABEL_OP2_207_26206_20130905_123052_outLine +BABEL_OP2_207_26398_20131007_122710_inLine +BABEL_OP2_207_26398_20131007_122710_outLine +BABEL_OP2_207_27042_20130913_145438_inLine +BABEL_OP2_207_27042_20130913_145438_outLine +BABEL_OP2_207_28538_20130809_210336_inLine +BABEL_OP2_207_28538_20130809_210336_outLine +BABEL_OP2_207_28585_20130921_125721_inLine +BABEL_OP2_207_28585_20130921_125721_outLine +BABEL_OP2_207_29021_20131002_153001_inLine +BABEL_OP2_207_29021_20131002_153001_outLine +BABEL_OP2_207_29208_20130808_130335_inLine +BABEL_OP2_207_29208_20130808_130335_outLine +BABEL_OP2_207_29643_20131010_214342_inLine +BABEL_OP2_207_29643_20131010_214342_outLine +BABEL_OP2_207_30497_20131001_130218_inLine +BABEL_OP2_207_30497_20131001_130218_outLine +BABEL_OP2_207_30869_20130920_162014_inLine +BABEL_OP2_207_30869_20130920_162014_outLine +BABEL_OP2_207_31182_20130917_210449_inLine +BABEL_OP2_207_31182_20130917_210449_outLine +BABEL_OP2_207_31184_20130809_153124_inLine +BABEL_OP2_207_31184_20130809_153124_outLine +BABEL_OP2_207_31583_20130916_202055_inLine +BABEL_OP2_207_31583_20130916_202055_outLine +BABEL_OP2_207_31628_20130829_135440_inLine +BABEL_OP2_207_31628_20130829_135440_outLine +BABEL_OP2_207_31979_20130808_164711_inLine +BABEL_OP2_207_31979_20130808_164711_outLine +BABEL_OP2_207_31979_20130808_165705_inLine +BABEL_OP2_207_31979_20130808_165705_outLine +BABEL_OP2_207_32301_20130905_171450_inLine +BABEL_OP2_207_32301_20130905_171450_outLine +BABEL_OP2_207_32861_20131001_201155_inLine +BABEL_OP2_207_32861_20131001_201155_outLine +BABEL_OP2_207_32872_20131007_132753_inLine +BABEL_OP2_207_32872_20131007_132753_outLine +BABEL_OP2_207_32914_20130828_143138_inLine +BABEL_OP2_207_32914_20130828_143138_outLine +BABEL_OP2_207_33251_20130826_113656_inLine +BABEL_OP2_207_33251_20130826_113656_outLine +BABEL_OP2_207_33635_20130810_084448_inLine +BABEL_OP2_207_33635_20130810_084448_outLine +BABEL_OP2_207_34336_20130723_154022_inLine +BABEL_OP2_207_34336_20130723_154022_outLine +BABEL_OP2_207_34903_20130826_111451_inLine +BABEL_OP2_207_34903_20130826_111451_outLine +BABEL_OP2_207_34903_20130826_112452_inLine +BABEL_OP2_207_34903_20130826_112452_outLine +BABEL_OP2_207_35202_20130904_140235_inLine +BABEL_OP2_207_35202_20130904_140235_outLine +BABEL_OP2_207_36059_20130828_142450_inLine +BABEL_OP2_207_36059_20130828_142450_outLine +BABEL_OP2_207_36147_20131128_144158_inLine +BABEL_OP2_207_36147_20131128_144158_outLine +BABEL_OP2_207_36219_20130801_142236_inLine +BABEL_OP2_207_36219_20130801_142236_outLine +BABEL_OP2_207_36990_20130813_203843_inLine +BABEL_OP2_207_36990_20130813_203843_outLine +BABEL_OP2_207_36990_20130813_205054_inLine +BABEL_OP2_207_36990_20130813_205054_outLine +BABEL_OP2_207_37064_20130802_163007_inLine +BABEL_OP2_207_37064_20130802_163007_outLine +BABEL_OP2_207_37229_20131014_133555_inLine +BABEL_OP2_207_37229_20131014_133555_outLine +BABEL_OP2_207_37281_20130809_155629_inLine +BABEL_OP2_207_37281_20130809_155629_outLine +BABEL_OP2_207_37598_20130822_115445_inLine +BABEL_OP2_207_37598_20130822_115445_outLine +BABEL_OP2_207_38076_20130828_114052_inLine +BABEL_OP2_207_38076_20130828_114052_outLine +BABEL_OP2_207_38750_20130912_115957_inLine +BABEL_OP2_207_38750_20130912_115957_outLine +BABEL_OP2_207_38979_20130925_142422_inLine +BABEL_OP2_207_38979_20130925_142422_outLine +BABEL_OP2_207_39059_20130924_141830_inLine +BABEL_OP2_207_39059_20130924_141830_outLine +BABEL_OP2_207_39159_20130802_184611_inLine +BABEL_OP2_207_39159_20130802_184611_outLine +BABEL_OP2_207_39680_20130924_150026_inLine +BABEL_OP2_207_39680_20130924_150026_outLine +BABEL_OP2_207_41097_20130826_120511_inLine +BABEL_OP2_207_41097_20130826_120511_outLine +BABEL_OP2_207_41233_20130919_151406_inLine +BABEL_OP2_207_41233_20130919_151406_outLine +BABEL_OP2_207_41692_20131008_115554_inLine +BABEL_OP2_207_41692_20131008_115554_outLine +BABEL_OP2_207_42155_20130820_153344_inLine +BABEL_OP2_207_42155_20130820_153344_outLine +BABEL_OP2_207_42155_20130820_155002_inLine +BABEL_OP2_207_42155_20130820_155002_outLine +BABEL_OP2_207_42243_20130730_171007_inLine +BABEL_OP2_207_42243_20130730_171620_inLine +BABEL_OP2_207_42526_20130903_162134_inLine +BABEL_OP2_207_42526_20130903_162134_outLine +BABEL_OP2_207_42526_20130903_163434_inLine +BABEL_OP2_207_42526_20130903_163434_outLine +BABEL_OP2_207_42718_20130828_165932_inLine +BABEL_OP2_207_42718_20130828_165932_outLine +BABEL_OP2_207_42848_20131010_143925_inLine +BABEL_OP2_207_42848_20131010_143925_outLine +BABEL_OP2_207_42883_20131008_131439_inLine +BABEL_OP2_207_42883_20131008_131439_outLine +BABEL_OP2_207_43074_20131213_105423_inLine +BABEL_OP2_207_43074_20131213_105423_outLine +BABEL_OP2_207_43285_20130905_162602_inLine +BABEL_OP2_207_43285_20130905_162602_outLine +BABEL_OP2_207_43388_20130809_194529_inLine +BABEL_OP2_207_43388_20130809_194529_outLine +BABEL_OP2_207_43789_20130809_213917_inLine +BABEL_OP2_207_43789_20130809_213917_outLine +BABEL_OP2_207_43990_20131027_190409_inLine +BABEL_OP2_207_43990_20131027_190409_outLine +BABEL_OP2_207_44290_20131002_160104_inLine +BABEL_OP2_207_44290_20131002_160104_outLine +BABEL_OP2_207_44847_20130827_155200_inLine +BABEL_OP2_207_44847_20130827_155200_outLine +BABEL_OP2_207_44868_20130904_135956_inLine +BABEL_OP2_207_44868_20130904_135956_outLine +BABEL_OP2_207_45106_20130823_154724_inLine +BABEL_OP2_207_45106_20130823_154724_outLine +BABEL_OP2_207_46315_20130905_150622_inLine +BABEL_OP2_207_46315_20130905_150622_outLine +BABEL_OP2_207_47799_20131023_123730_inLine +BABEL_OP2_207_47799_20131023_123730_outLine +BABEL_OP2_207_47877_20130902_143454_inLine +BABEL_OP2_207_47877_20130902_143454_outLine +BABEL_OP2_207_48200_20130921_155444_inLine +BABEL_OP2_207_48200_20130921_155444_outLine +BABEL_OP2_207_48789_20130812_134605_inLine +BABEL_OP2_207_48789_20130812_134605_outLine +BABEL_OP2_207_49118_20130920_121936_inLine +BABEL_OP2_207_49118_20130920_121936_outLine +BABEL_OP2_207_49197_20130807_131817_inLine +BABEL_OP2_207_49197_20130807_131817_outLine +BABEL_OP2_207_49812_20130922_204620_inLine +BABEL_OP2_207_49812_20130922_204620_outLine +BABEL_OP2_207_49902_20130724_154629_inLine +BABEL_OP2_207_49902_20130724_154629_outLine +BABEL_OP2_207_50630_20130905_150725_inLine +BABEL_OP2_207_50630_20130905_150725_outLine +BABEL_OP2_207_50745_20130930_091255_inLine +BABEL_OP2_207_50745_20130930_091255_outLine +BABEL_OP2_207_50810_20130625_080815_outLine +BABEL_OP2_207_50940_20131212_122606_inLine +BABEL_OP2_207_50940_20131212_122606_outLine +BABEL_OP2_207_50958_20130808_153539_inLine +BABEL_OP2_207_50958_20130808_155452_inLine +BABEL_OP2_207_51414_20131008_124320_inLine +BABEL_OP2_207_51414_20131008_124320_outLine +BABEL_OP2_207_51540_20130920_151858_inLine +BABEL_OP2_207_51540_20130920_151858_outLine +BABEL_OP2_207_52222_20131101_145127_inLine +BABEL_OP2_207_52222_20131101_145127_outLine +BABEL_OP2_207_52442_20130814_204040_inLine +BABEL_OP2_207_52442_20130814_204040_outLine +BABEL_OP2_207_52483_20131023_121543_inLine +BABEL_OP2_207_52483_20131023_121543_outLine +BABEL_OP2_207_53072_20131007_135601_inLine +BABEL_OP2_207_53072_20131007_135601_outLine +BABEL_OP2_207_53665_20131001_124434_inLine +BABEL_OP2_207_53665_20131001_124434_outLine +BABEL_OP2_207_54634_20131106_133052_inLine +BABEL_OP2_207_54634_20131106_133052_outLine +BABEL_OP2_207_56023_20130922_114453_inLine +BABEL_OP2_207_56023_20130922_114453_outLine +BABEL_OP2_207_56213_20130911_155753_inLine +BABEL_OP2_207_56213_20130911_155753_outLine +BABEL_OP2_207_56306_20130903_120057_inLine +BABEL_OP2_207_56306_20130903_120057_outLine +BABEL_OP2_207_56345_20131030_214035_inLine +BABEL_OP2_207_56345_20131030_214035_outLine +BABEL_OP2_207_56677_20130911_181638_inLine +BABEL_OP2_207_56677_20130911_181638_outLine +BABEL_OP2_207_56720_20130910_114920_inLine +BABEL_OP2_207_56720_20130910_114920_outLine +BABEL_OP2_207_57065_20130903_113823_inLine +BABEL_OP2_207_57065_20130903_113823_outLine +BABEL_OP2_207_57219_20131016_161014_inLine +BABEL_OP2_207_57219_20131016_161014_outLine +BABEL_OP2_207_57464_20131002_141306_inLine +BABEL_OP2_207_57464_20131002_141306_outLine +BABEL_OP2_207_57566_20130921_125810_inLine +BABEL_OP2_207_57566_20130921_125810_outLine +BABEL_OP2_207_57609_20130819_123817_inLine +BABEL_OP2_207_57609_20130819_123817_outLine +BABEL_OP2_207_57678_20130802_172845_inLine +BABEL_OP2_207_57678_20130802_172845_outLine +BABEL_OP2_207_57919_20131130_111652_inLine +BABEL_OP2_207_57919_20131130_111652_outLine +BABEL_OP2_207_58026_20131017_142517_inLine +BABEL_OP2_207_58026_20131017_142517_outLine +BABEL_OP2_207_58717_20130821_131155_inLine +BABEL_OP2_207_58717_20130821_131155_outLine +BABEL_OP2_207_59928_20130701_151227_inLine +BABEL_OP2_207_60310_20130915_115206_inLine +BABEL_OP2_207_60310_20130915_115206_outLine +BABEL_OP2_207_61040_20130915_174923_inLine +BABEL_OP2_207_61040_20130915_174923_outLine +BABEL_OP2_207_61225_20130624_075839_inLine +BABEL_OP2_207_61225_20130624_075839_outLine +BABEL_OP2_207_61225_20130816_102014_inLine +BABEL_OP2_207_61225_20130816_102014_outLine +BABEL_OP2_207_61435_20130920_151632_inLine +BABEL_OP2_207_61435_20130920_151632_outLine +BABEL_OP2_207_61888_20130925_153044_inLine +BABEL_OP2_207_61888_20130925_153044_outLine +BABEL_OP2_207_61971_20131002_122934_inLine +BABEL_OP2_207_61971_20131002_122934_outLine +BABEL_OP2_207_61971_20131002_124937_inLine +BABEL_OP2_207_61971_20131002_124937_outLine +BABEL_OP2_207_62286_20130808_152914_inLine +BABEL_OP2_207_62286_20130808_152914_outLine +BABEL_OP2_207_62456_20131008_120833_inLine +BABEL_OP2_207_62456_20131008_120833_outLine +BABEL_OP2_207_62835_20130813_200412_inLine +BABEL_OP2_207_62835_20130813_200412_outLine +BABEL_OP2_207_63220_20130826_161151_inLine +BABEL_OP2_207_63220_20130826_161151_outLine +BABEL_OP2_207_63309_20131218_175444_inLine +BABEL_OP2_207_63309_20131218_175444_outLine +BABEL_OP2_207_63425_20130829_145909_inLine +BABEL_OP2_207_63425_20130829_145909_outLine +BABEL_OP2_207_63523_20130829_130857_inLine +BABEL_OP2_207_63523_20130829_130857_outLine +BABEL_OP2_207_63523_20130829_131711_inLine +BABEL_OP2_207_63523_20130829_131711_outLine +BABEL_OP2_207_63730_20131015_214600_inLine +BABEL_OP2_207_63730_20131015_214600_outLine +BABEL_OP2_207_63938_20130926_154144_inLine +BABEL_OP2_207_63938_20130926_154144_outLine +BABEL_OP2_207_63938_20130926_155144_inLine +BABEL_OP2_207_63938_20130926_155144_outLine +BABEL_OP2_207_64014_20130926_150824_inLine +BABEL_OP2_207_64014_20130926_150824_outLine +BABEL_OP2_207_64259_20131102_110911_inLine +BABEL_OP2_207_64259_20131102_110911_outLine +BABEL_OP2_207_64638_20130829_133013_inLine +BABEL_OP2_207_64638_20130829_133013_outLine +BABEL_OP2_207_64902_20130930_143110_inLine +BABEL_OP2_207_64902_20130930_143110_outLine +BABEL_OP2_207_65064_20130820_141717_inLine +BABEL_OP2_207_65064_20130820_141717_outLine +BABEL_OP2_207_65477_20130807_163701_inLine +BABEL_OP2_207_65640_20131002_143110_inLine +BABEL_OP2_207_65640_20131002_143110_outLine +BABEL_OP2_207_66026_20130911_163013_inLine +BABEL_OP2_207_66026_20130911_163013_outLine +BABEL_OP2_207_66959_20130910_082006_inLine +BABEL_OP2_207_66959_20130910_082006_outLine +BABEL_OP2_207_66959_20130910_082705_inLine +BABEL_OP2_207_66959_20130910_082705_outLine +BABEL_OP2_207_66959_20130910_083542_inLine +BABEL_OP2_207_66959_20130910_083542_outLine +BABEL_OP2_207_66975_20131203_124359_inLine +BABEL_OP2_207_66975_20131203_124359_outLine +BABEL_OP2_207_67085_20131004_122616_inLine +BABEL_OP2_207_67085_20131004_122616_outLine +BABEL_OP2_207_67552_20130904_171052_inLine +BABEL_OP2_207_67552_20130904_171052_outLine +BABEL_OP2_207_67964_20131003_163118_inLine +BABEL_OP2_207_67964_20131003_163118_outLine +BABEL_OP2_207_68306_20130906_161631_inLine +BABEL_OP2_207_68306_20130906_161631_outLine +BABEL_OP2_207_69107_20130821_115813_inLine +BABEL_OP2_207_69107_20130821_115813_outLine +BABEL_OP2_207_69107_20130821_120807_inLine +BABEL_OP2_207_69107_20130821_120807_outLine +BABEL_OP2_207_69153_20130912_183854_inLine +BABEL_OP2_207_69153_20130912_183854_outLine +BABEL_OP2_207_69885_20130907_114201_inLine +BABEL_OP2_207_69885_20130907_114201_outLine +BABEL_OP2_207_69982_20131018_120252_inLine +BABEL_OP2_207_69982_20131018_120252_outLine +BABEL_OP2_207_70182_20131014_163540_inLine +BABEL_OP2_207_70182_20131014_163540_outLine +BABEL_OP2_207_70343_20130907_114751_inLine +BABEL_OP2_207_70343_20130907_114751_outLine +BABEL_OP2_207_70460_20130925_151332_inLine +BABEL_OP2_207_70460_20130925_151332_outLine +BABEL_OP2_207_70460_20130925_152713_inLine +BABEL_OP2_207_70460_20130925_152713_outLine +BABEL_OP2_207_70526_20130908_193512_inLine +BABEL_OP2_207_70526_20130908_193512_outLine +BABEL_OP2_207_70986_20131030_190232_inLine +BABEL_OP2_207_70986_20131030_190232_outLine +BABEL_OP2_207_71189_20130930_121030_inLine +BABEL_OP2_207_71189_20130930_121030_outLine +BABEL_OP2_207_71460_20131128_152217_inLine +BABEL_OP2_207_71460_20131128_152217_outLine +BABEL_OP2_207_72007_20130906_152449_inLine +BABEL_OP2_207_72007_20130906_152449_outLine +BABEL_OP2_207_72349_20131002_145602_inLine +BABEL_OP2_207_72349_20131002_145602_outLine +BABEL_OP2_207_73301_20130801_133004_inLine +BABEL_OP2_207_73301_20130801_133004_outLine +BABEL_OP2_207_73485_20130907_132923_inLine +BABEL_OP2_207_73485_20130907_132923_outLine +BABEL_OP2_207_73757_20130813_005856_inLine +BABEL_OP2_207_73757_20130813_005856_outLine +BABEL_OP2_207_73757_20130813_011142_inLine +BABEL_OP2_207_73757_20130813_011142_outLine +BABEL_OP2_207_75342_20130906_143544_inLine +BABEL_OP2_207_75342_20130906_143544_outLine +BABEL_OP2_207_75460_20131014_160822_inLine +BABEL_OP2_207_75460_20131014_160822_outLine +BABEL_OP2_207_76793_20131028_174027_inLine +BABEL_OP2_207_76793_20131028_174027_outLine +BABEL_OP2_207_76970_20131018_142728_inLine +BABEL_OP2_207_76970_20131018_142728_outLine +BABEL_OP2_207_77242_20131015_210438_inLine +BABEL_OP2_207_77242_20131015_210438_outLine +BABEL_OP2_207_78016_20130725_161812_outLine +BABEL_OP2_207_78116_20130906_165511_inLine +BABEL_OP2_207_78116_20130906_165511_outLine +BABEL_OP2_207_78360_20130926_154542_inLine +BABEL_OP2_207_78360_20130926_154542_outLine +BABEL_OP2_207_78544_20130829_140559_inLine +BABEL_OP2_207_78544_20130829_140559_outLine +BABEL_OP2_207_79139_20130811_111254_inLine +BABEL_OP2_207_79139_20130811_111254_outLine +BABEL_OP2_207_80622_20130824_120649_inLine +BABEL_OP2_207_80622_20130824_120649_outLine +BABEL_OP2_207_80897_20130824_111625_inLine +BABEL_OP2_207_80897_20130824_111625_outLine +BABEL_OP2_207_81229_20130807_135935_inLine +BABEL_OP2_207_81229_20130807_135935_outLine +BABEL_OP2_207_81810_20130831_144019_inLine +BABEL_OP2_207_81810_20130831_144019_outLine +BABEL_OP2_207_81810_20130831_145233_inLine +BABEL_OP2_207_81810_20130831_145233_outLine +BABEL_OP2_207_82089_20130809_131053_inLine +BABEL_OP2_207_82089_20130809_131053_outLine +BABEL_OP2_207_82224_20130923_132931_inLine +BABEL_OP2_207_82361_20131001_152932_inLine +BABEL_OP2_207_82361_20131001_152932_outLine +BABEL_OP2_207_82473_20130702_072644_inLine +BABEL_OP2_207_82966_20130918_130322_inLine +BABEL_OP2_207_82966_20130918_130322_outLine +BABEL_OP2_207_83062_20131002_115950_inLine +BABEL_OP2_207_83062_20131002_115950_outLine +BABEL_OP2_207_83929_20130621_172038_outLine +BABEL_OP2_207_83935_20130906_160858_inLine +BABEL_OP2_207_83935_20130906_160858_outLine +BABEL_OP2_207_84055_20130926_130321_inLine +BABEL_OP2_207_84055_20130926_130321_outLine +BABEL_OP2_207_84055_20130926_131535_inLine +BABEL_OP2_207_84055_20130926_131535_outLine +BABEL_OP2_207_84061_20130725_161035_inLine +BABEL_OP2_207_84061_20130725_161035_outLine +BABEL_OP2_207_84327_20130907_112232_inLine +BABEL_OP2_207_84327_20130907_112232_outLine +BABEL_OP2_207_84339_20130908_213808_inLine +BABEL_OP2_207_84339_20130908_213808_outLine +BABEL_OP2_207_84370_20131017_114407_inLine +BABEL_OP2_207_84370_20131017_114407_outLine +BABEL_OP2_207_84458_20130911_150603_inLine +BABEL_OP2_207_84458_20130911_150603_outLine +BABEL_OP2_207_84469_20130911_152956_inLine +BABEL_OP2_207_84469_20130911_152956_outLine +BABEL_OP2_207_84709_20131025_164240_inLine +BABEL_OP2_207_84709_20131025_164240_outLine +BABEL_OP2_207_84737_20130924_104520_inLine +BABEL_OP2_207_84737_20130924_104520_outLine +BABEL_OP2_207_84838_20130918_142125_inLine +BABEL_OP2_207_84838_20130918_142125_outLine +BABEL_OP2_207_85254_20131016_163511_inLine +BABEL_OP2_207_85254_20131016_163511_outLine +BABEL_OP2_207_85325_20130908_204430_inLine +BABEL_OP2_207_85325_20130908_204430_outLine +BABEL_OP2_207_86597_20131015_223953_inLine +BABEL_OP2_207_86597_20131015_223953_outLine +BABEL_OP2_207_86888_20130823_120853_inLine +BABEL_OP2_207_86888_20130823_120853_outLine +BABEL_OP2_207_86888_20130823_122304_inLine +BABEL_OP2_207_86888_20130823_122304_outLine +BABEL_OP2_207_87545_20131004_134332_inLine +BABEL_OP2_207_87545_20131004_134332_outLine +BABEL_OP2_207_87889_20130828_123340_inLine +BABEL_OP2_207_87889_20130828_123340_outLine +BABEL_OP2_207_88372_20130927_123913_inLine +BABEL_OP2_207_88372_20130927_123913_outLine +BABEL_OP2_207_88550_20131002_133933_inLine +BABEL_OP2_207_88550_20131002_133933_outLine +BABEL_OP2_207_88601_20130812_143956_inLine +BABEL_OP2_207_88601_20130812_143956_outLine +BABEL_OP2_207_88669_20130823_134613_inLine +BABEL_OP2_207_88669_20130823_134613_outLine +BABEL_OP2_207_89358_20130820_133904_inLine +BABEL_OP2_207_89358_20130820_133904_outLine +BABEL_OP2_207_89794_20130828_091302_inLine +BABEL_OP2_207_89794_20130828_091302_outLine +BABEL_OP2_207_90080_20131003_143629_inLine +BABEL_OP2_207_90080_20131003_143629_outLine +BABEL_OP2_207_90440_20131027_175417_inLine +BABEL_OP2_207_90440_20131027_175417_outLine +BABEL_OP2_207_90709_20130627_182820_inLine +BABEL_OP2_207_90709_20130627_182820_outLine +BABEL_OP2_207_90739_20130807_151133_inLine +BABEL_OP2_207_90739_20130807_151133_outLine +BABEL_OP2_207_90760_20131016_111829_inLine +BABEL_OP2_207_90760_20131016_111829_outLine +BABEL_OP2_207_91189_20131011_125932_inLine +BABEL_OP2_207_91189_20131011_125932_outLine +BABEL_OP2_207_91372_20130909_134637_inLine +BABEL_OP2_207_91372_20130909_134637_outLine +BABEL_OP2_207_91930_20131001_222834_inLine +BABEL_OP2_207_91930_20131001_222834_outLine +BABEL_OP2_207_91930_20131001_223632_inLine +BABEL_OP2_207_91930_20131001_223632_outLine +BABEL_OP2_207_92077_20131007_163003_inLine +BABEL_OP2_207_92077_20131007_163003_outLine +BABEL_OP2_207_92176_20130813_133457_inLine +BABEL_OP2_207_92176_20130813_133457_outLine +BABEL_OP2_207_92557_20130924_134800_inLine +BABEL_OP2_207_92557_20130924_134800_outLine +BABEL_OP2_207_92643_20131007_150231_inLine +BABEL_OP2_207_92643_20131007_150231_outLine +BABEL_OP2_207_92698_20130812_235059_inLine +BABEL_OP2_207_92698_20130812_235059_outLine +BABEL_OP2_207_92757_20130902_145657_inLine +BABEL_OP2_207_92757_20130902_145657_outLine +BABEL_OP2_207_92757_20130902_151025_inLine +BABEL_OP2_207_92757_20130902_151025_outLine +BABEL_OP2_207_92757_20130902_152031_inLine +BABEL_OP2_207_92757_20130902_152031_outLine +BABEL_OP2_207_93469_20131004_145605_inLine +BABEL_OP2_207_93469_20131004_145605_outLine +BABEL_OP2_207_94002_20130813_140301_inLine +BABEL_OP2_207_94002_20130813_140301_outLine +BABEL_OP2_207_94025_20130904_125944_inLine +BABEL_OP2_207_94025_20130904_125944_outLine +BABEL_OP2_207_94025_20130904_130959_inLine +BABEL_OP2_207_94025_20130904_130959_outLine +BABEL_OP2_207_94166_20130925_152248_inLine +BABEL_OP2_207_94166_20130925_152248_outLine +BABEL_OP2_207_94237_20131004_202859_inLine +BABEL_OP2_207_94237_20131004_202859_outLine +BABEL_OP2_207_94409_20130809_220412_inLine +BABEL_OP2_207_94409_20130809_220412_outLine +BABEL_OP2_207_94465_20130909_125729_inLine +BABEL_OP2_207_94465_20130909_125729_outLine +BABEL_OP2_207_94465_20130909_130933_inLine +BABEL_OP2_207_94465_20130909_130933_outLine +BABEL_OP2_207_94745_20130829_131647_inLine +BABEL_OP2_207_94745_20130829_131647_outLine +BABEL_OP2_207_94803_20131101_171456_inLine +BABEL_OP2_207_94803_20131101_171456_outLine +BABEL_OP2_207_95670_20130801_184732_inLine +BABEL_OP2_207_95670_20130801_184732_outLine +BABEL_OP2_207_95670_20130801_185813_inLine +BABEL_OP2_207_95670_20130801_185813_outLine +BABEL_OP2_207_95903_20130927_143755_inLine +BABEL_OP2_207_95903_20130927_143755_outLine +BABEL_OP2_207_96088_20131002_131712_inLine +BABEL_OP2_207_96088_20131002_131712_outLine +BABEL_OP2_207_96205_20130820_122740_inLine +BABEL_OP2_207_96205_20130820_122740_outLine +BABEL_OP2_207_96405_20130802_164853_inLine +BABEL_OP2_207_96405_20130802_164853_outLine +BABEL_OP2_207_96504_20130719_153914_inLine +BABEL_OP2_207_96504_20130719_155023_inLine +BABEL_OP2_207_96504_20130802_132920_inLine +BABEL_OP2_207_96584_20130926_074218_inLine +BABEL_OP2_207_96584_20130926_074218_outLine +BABEL_OP2_207_97097_20131001_132614_inLine +BABEL_OP2_207_97097_20131001_132614_outLine +BABEL_OP2_207_99887_20130924_102355_inLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/dev.list b/egs/babel/s5d/conf/lists/301-cebuano/dev.list new file mode 100644 index 00000000000..ecf3753ee7d --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/dev.list @@ -0,0 +1,134 @@ +BABEL_OP2_301_13792_20131111_122219_inLine +BABEL_OP2_301_13792_20131111_122219_outLine +BABEL_OP2_301_14141_20140118_202248_inLine +BABEL_OP2_301_14141_20140118_202248_outLine +BABEL_OP2_301_15262_20131105_213812_inLine +BABEL_OP2_301_15262_20131105_213812_outLine +BABEL_OP2_301_15262_20131105_230709_inLine +BABEL_OP2_301_15262_20131105_230709_outLine +BABEL_OP2_301_15638_20131210_131327_inLine +BABEL_OP2_301_15638_20131210_131327_outLine +BABEL_OP2_301_17127_20140106_175906_inLine +BABEL_OP2_301_17127_20140106_175906_outLine +BABEL_OP2_301_17881_20140122_201653_inLine +BABEL_OP2_301_17881_20140122_201653_outLine +BABEL_OP2_301_18078_20131226_153406_inLine +BABEL_OP2_301_18078_20131226_153406_outLine +BABEL_OP2_301_21109_20140102_180619_inLine +BABEL_OP2_301_21109_20140102_180619_outLine +BABEL_OP2_301_22280_20140206_202952_inLine +BABEL_OP2_301_22280_20140206_202952_outLine +BABEL_OP2_301_22466_20131015_174457_inLine +BABEL_OP2_301_22466_20131015_174457_outLine +BABEL_OP2_301_22612_20131217_202720_inLine +BABEL_OP2_301_22612_20131217_202720_outLine +BABEL_OP2_301_23505_20131023_135517_inLine +BABEL_OP2_301_23505_20131023_135517_outLine +BABEL_OP2_301_24241_20140214_170629_inLine +BABEL_OP2_301_24241_20140214_170629_outLine +BABEL_OP2_301_27082_20131209_203149_inLine +BABEL_OP2_301_27082_20131209_203149_outLine +BABEL_OP2_301_29685_20131203_182746_inLine +BABEL_OP2_301_29685_20131203_182746_outLine +BABEL_OP2_301_29685_20131203_184526_inLine +BABEL_OP2_301_29685_20131203_184526_outLine +BABEL_OP2_301_36059_20140118_204512_inLine +BABEL_OP2_301_36059_20140118_204512_outLine +BABEL_OP2_301_37281_20131205_190107_inLine +BABEL_OP2_301_37281_20131205_190107_outLine +BABEL_OP2_301_38340_20131128_145618_inLine +BABEL_OP2_301_38340_20131128_145618_outLine +BABEL_OP2_301_40713_20131126_193850_inLine +BABEL_OP2_301_40713_20131126_193850_outLine +BABEL_OP2_301_41958_20131127_145018_inLine +BABEL_OP2_301_41958_20131127_145018_outLine +BABEL_OP2_301_43239_20140102_190746_inLine +BABEL_OP2_301_43239_20140102_190746_outLine +BABEL_OP2_301_43646_20131019_165638_inLine +BABEL_OP2_301_43646_20131019_165638_outLine +BABEL_OP2_301_46008_20140126_192930_inLine +BABEL_OP2_301_46008_20140126_192930_outLine +BABEL_OP2_301_46333_20131027_181031_inLine +BABEL_OP2_301_46333_20131027_181031_outLine +BABEL_OP2_301_48789_20131209_181711_inLine +BABEL_OP2_301_48789_20131209_181711_outLine +BABEL_OP2_301_49902_20131127_180426_inLine +BABEL_OP2_301_49902_20131127_180426_outLine +BABEL_OP2_301_50565_20131025_202729_inLine +BABEL_OP2_301_50565_20131025_202729_outLine +BABEL_OP2_301_51530_20140125_195307_inLine +BABEL_OP2_301_51530_20140125_195307_outLine +BABEL_OP2_301_51955_20131125_182037_inLine +BABEL_OP2_301_51955_20131125_182037_outLine +BABEL_OP2_301_52301_20131107_133036_inLine +BABEL_OP2_301_52301_20131107_133036_outLine +BABEL_OP2_301_52301_20131107_135543_inLine +BABEL_OP2_301_52301_20131107_135543_outLine +BABEL_OP2_301_54744_20131202_184432_inLine +BABEL_OP2_301_54744_20131202_184432_outLine +BABEL_OP2_301_56370_20131101_175739_inLine +BABEL_OP2_301_56370_20131101_175739_outLine +BABEL_OP2_301_60299_20140202_130806_inLine +BABEL_OP2_301_60299_20140202_130806_outLine +BABEL_OP2_301_62362_20140129_154002_inLine +BABEL_OP2_301_62362_20140129_154002_outLine +BABEL_OP2_301_63425_20131213_184303_inLine +BABEL_OP2_301_63425_20131213_184303_outLine +BABEL_OP2_301_64759_20131103_154236_inLine +BABEL_OP2_301_64759_20131103_154236_outLine +BABEL_OP2_301_64870_20131226_133240_inLine +BABEL_OP2_301_64870_20131226_133240_outLine +BABEL_OP2_301_65252_20140126_190555_inLine +BABEL_OP2_301_65252_20140126_190555_outLine +BABEL_OP2_301_66026_20131216_194850_inLine +BABEL_OP2_301_66026_20131216_194850_outLine +BABEL_OP2_301_67085_20140126_181613_inLine +BABEL_OP2_301_67085_20140126_181613_outLine +BABEL_OP2_301_68306_20131212_171648_inLine +BABEL_OP2_301_68306_20131212_171648_outLine +BABEL_OP2_301_71404_20131112_205323_inLine +BABEL_OP2_301_71404_20131112_205323_outLine +BABEL_OP2_301_71404_20131112_211451_inLine +BABEL_OP2_301_71404_20131112_211451_outLine +BABEL_OP2_301_74226_20131213_195309_inLine +BABEL_OP2_301_74226_20131213_195309_outLine +BABEL_OP2_301_74455_20140115_152935_inLine +BABEL_OP2_301_74455_20140115_152935_outLine +BABEL_OP2_301_78194_20131015_181857_inLine +BABEL_OP2_301_78194_20131015_181857_outLine +BABEL_OP2_301_78194_20131015_183910_inLine +BABEL_OP2_301_78194_20131015_183910_outLine +BABEL_OP2_301_78360_20140110_190526_inLine +BABEL_OP2_301_78360_20140110_190526_outLine +BABEL_OP2_301_79660_20140201_160331_inLine +BABEL_OP2_301_79660_20140201_160331_outLine +BABEL_OP2_301_79820_20131127_235459_inLine +BABEL_OP2_301_79820_20131127_235459_outLine +BABEL_OP2_301_80897_20140206_142309_inLine +BABEL_OP2_301_80897_20140206_142309_outLine +BABEL_OP2_301_81427_20131126_151401_inLine +BABEL_OP2_301_81427_20131126_151401_outLine +BABEL_OP2_301_84611_20131125_193454_inLine +BABEL_OP2_301_84611_20131125_193454_outLine +BABEL_OP2_301_84709_20140220_141332_inLine +BABEL_OP2_301_84709_20140220_141332_outLine +BABEL_OP2_301_85179_20131227_172225_inLine +BABEL_OP2_301_85179_20131227_172225_outLine +BABEL_OP2_301_86467_20131112_182159_inLine +BABEL_OP2_301_86467_20131112_182159_outLine +BABEL_OP2_301_86467_20131112_193636_inLine +BABEL_OP2_301_86467_20131112_193636_outLine +BABEL_OP2_301_88550_20140128_150822_inLine +BABEL_OP2_301_88550_20140128_150822_outLine +BABEL_OP2_301_88873_20131202_130910_inLine +BABEL_OP2_301_88873_20131202_130910_outLine +BABEL_OP2_301_92792_20140123_104047_inLine +BABEL_OP2_301_92792_20140123_104047_outLine +BABEL_OP2_301_96985_20131021_164130_inLine +BABEL_OP2_301_96985_20131021_164130_outLine +BABEL_OP2_301_98489_20131123_232017_inLine +BABEL_OP2_301_98489_20131123_232017_outLine +BABEL_OP2_301_98489_20131123_233440_inLine +BABEL_OP2_301_98489_20131123_233440_outLine +BABEL_OP2_301_99516_20131022_111915_inLine +BABEL_OP2_301_99516_20131022_111915_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/eval.list b/egs/babel/s5d/conf/lists/301-cebuano/eval.list new file mode 100644 index 00000000000..6958122726d --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/eval.list @@ -0,0 +1,190 @@ +BABEL_OP2_301_10019_20131127_165625_inLine +BABEL_OP2_301_10019_20131127_165625_outLine +BABEL_OP2_301_10416_20131203_193332_inLine +BABEL_OP2_301_10416_20131203_193332_outLine +BABEL_OP2_301_12767_20131122_204245_inLine +BABEL_OP2_301_12767_20131122_204245_outLine +BABEL_OP2_301_13427_20131226_153605_inLine +BABEL_OP2_301_13427_20131226_153605_outLine +BABEL_OP2_301_13490_20131209_200441_inLine +BABEL_OP2_301_13490_20131209_200441_outLine +BABEL_OP2_301_14440_20131217_152957_inLine +BABEL_OP2_301_14440_20131217_152957_outLine +BABEL_OP2_301_14537_20140126_192700_inLine +BABEL_OP2_301_14537_20140126_192700_outLine +BABEL_OP2_301_16056_20131112_135620_inLine +BABEL_OP2_301_16056_20131112_135620_outLine +BABEL_OP2_301_16056_20131112_140413_inLine +BABEL_OP2_301_16056_20131112_140413_outLine +BABEL_OP2_301_16184_20131018_004611_inLine +BABEL_OP2_301_16184_20131018_004611_outLine +BABEL_OP2_301_16839_20140106_195749_inLine +BABEL_OP2_301_16839_20140106_195749_outLine +BABEL_OP2_301_17165_20131203_150708_inLine +BABEL_OP2_301_17165_20131203_150708_outLine +BABEL_OP2_301_18766_20140127_140851_inLine +BABEL_OP2_301_18766_20140127_140851_outLine +BABEL_OP2_301_19782_20131220_143639_inLine +BABEL_OP2_301_19782_20131220_143639_outLine +BABEL_OP2_301_19832_20140214_144414_inLine +BABEL_OP2_301_19832_20140214_144414_outLine +BABEL_OP2_301_20800_20131119_233324_inLine +BABEL_OP2_301_20800_20131119_233324_outLine +BABEL_OP2_301_22641_20131112_223928_inLine +BABEL_OP2_301_22641_20131112_223928_outLine +BABEL_OP2_301_23196_20140224_145440_inLine +BABEL_OP2_301_23196_20140224_145440_outLine +BABEL_OP2_301_23628_20131121_202709_inLine +BABEL_OP2_301_23628_20131121_202709_outLine +BABEL_OP2_301_26074_20140214_150738_inLine +BABEL_OP2_301_26074_20140214_150738_outLine +BABEL_OP2_301_28585_20140103_174051_inLine +BABEL_OP2_301_28585_20140103_174051_outLine +BABEL_OP2_301_29777_20131227_175745_inLine +BABEL_OP2_301_29777_20131227_175745_outLine +BABEL_OP2_301_32914_20140102_183534_inLine +BABEL_OP2_301_32914_20140102_183534_outLine +BABEL_OP2_301_33992_20140128_153304_inLine +BABEL_OP2_301_33992_20140128_153304_outLine +BABEL_OP2_301_35069_20140104_210141_inLine +BABEL_OP2_301_35069_20140104_210141_outLine +BABEL_OP2_301_36219_20131125_140227_inLine +BABEL_OP2_301_36219_20131125_140227_outLine +BABEL_OP2_301_36219_20131125_141324_inLine +BABEL_OP2_301_36219_20131125_141324_outLine +BABEL_OP2_301_36341_20131024_131700_inLine +BABEL_OP2_301_36341_20131024_131700_outLine +BABEL_OP2_301_36341_20131025_165924_inLine +BABEL_OP2_301_36341_20131025_165924_outLine +BABEL_OP2_301_37499_20140129_153724_inLine +BABEL_OP2_301_37499_20140129_153724_outLine +BABEL_OP2_301_40740_20140106_203616_inLine +BABEL_OP2_301_40740_20140106_203616_outLine +BABEL_OP2_301_41493_20131025_161722_inLine +BABEL_OP2_301_41493_20131025_161722_outLine +BABEL_OP2_301_41920_20131110_141258_inLine +BABEL_OP2_301_41920_20131110_141258_outLine +BABEL_OP2_301_41920_20131110_142621_inLine +BABEL_OP2_301_41920_20131110_142621_outLine +BABEL_OP2_301_42600_20131125_184712_inLine +BABEL_OP2_301_42600_20131125_184712_outLine +BABEL_OP2_301_42600_20131125_185254_inLine +BABEL_OP2_301_42600_20131125_185254_outLine +BABEL_OP2_301_43789_20131205_204932_inLine +BABEL_OP2_301_43789_20131205_204932_outLine +BABEL_OP2_301_45777_20131129_214116_inLine +BABEL_OP2_301_45777_20131129_214116_outLine +BABEL_OP2_301_47877_20140109_182631_inLine +BABEL_OP2_301_47877_20140109_182631_outLine +BABEL_OP2_301_48399_20131115_184608_inLine +BABEL_OP2_301_48399_20131115_184608_outLine +BABEL_OP2_301_48422_20140104_203017_inLine +BABEL_OP2_301_48422_20140104_203017_outLine +BABEL_OP2_301_49287_20140110_233951_inLine +BABEL_OP2_301_49287_20140110_233951_outLine +BABEL_OP2_301_49502_20131025_191447_inLine +BABEL_OP2_301_49502_20131025_191447_outLine +BABEL_OP2_301_49812_20140108_153912_inLine +BABEL_OP2_301_49812_20140108_153912_outLine +BABEL_OP2_301_51417_20140104_191034_inLine +BABEL_OP2_301_51417_20140104_191034_outLine +BABEL_OP2_301_52447_20140128_140241_inLine +BABEL_OP2_301_52447_20140128_140241_outLine +BABEL_OP2_301_58145_20140205_195241_inLine +BABEL_OP2_301_58145_20140205_195241_outLine +BABEL_OP2_301_58815_20131219_183200_inLine +BABEL_OP2_301_58815_20131219_183200_outLine +BABEL_OP2_301_58915_20140204_180046_inLine +BABEL_OP2_301_58915_20140204_180046_outLine +BABEL_OP2_301_60508_20131023_221321_inLine +BABEL_OP2_301_60508_20131023_221321_outLine +BABEL_OP2_301_61348_20131210_184944_inLine +BABEL_OP2_301_61348_20131210_184944_outLine +BABEL_OP2_301_61357_20140205_203135_inLine +BABEL_OP2_301_61357_20140205_203135_outLine +BABEL_OP2_301_61678_20131130_195119_inLine +BABEL_OP2_301_61678_20131130_195119_outLine +BABEL_OP2_301_61684_20140224_141104_inLine +BABEL_OP2_301_61684_20140224_141104_outLine +BABEL_OP2_301_62434_20131027_204412_inLine +BABEL_OP2_301_62434_20131027_204412_outLine +BABEL_OP2_301_62835_20131205_201607_inLine +BABEL_OP2_301_62835_20131205_201607_outLine +BABEL_OP2_301_62852_20131112_145306_inLine +BABEL_OP2_301_62852_20131112_145306_outLine +BABEL_OP2_301_63445_20131017_163305_inLine +BABEL_OP2_301_63445_20131017_163305_outLine +BABEL_OP2_301_63481_20131018_205953_inLine +BABEL_OP2_301_63481_20131018_205953_outLine +BABEL_OP2_301_63523_20140127_032850_inLine +BABEL_OP2_301_63523_20140127_032850_outLine +BABEL_OP2_301_65268_20140224_143314_inLine +BABEL_OP2_301_65268_20140224_143314_outLine +BABEL_OP2_301_66967_20131119_230046_inLine +BABEL_OP2_301_66967_20131119_230046_outLine +BABEL_OP2_301_67592_20131223_194021_inLine +BABEL_OP2_301_67592_20131223_194021_outLine +BABEL_OP2_301_69885_20140126_142648_inLine +BABEL_OP2_301_69885_20140126_142648_outLine +BABEL_OP2_301_71282_20140115_180924_inLine +BABEL_OP2_301_71282_20140115_180924_outLine +BABEL_OP2_301_71333_20131126_155505_inLine +BABEL_OP2_301_71333_20131126_155505_outLine +BABEL_OP2_301_73622_20131030_201514_inLine +BABEL_OP2_301_73622_20131030_201514_outLine +BABEL_OP2_301_75359_20140127_022948_inLine +BABEL_OP2_301_75359_20140127_022948_outLine +BABEL_OP2_301_75460_20140130_145829_inLine +BABEL_OP2_301_75460_20140130_145829_outLine +BABEL_OP2_301_76218_20131205_183037_inLine +BABEL_OP2_301_76218_20131205_183037_outLine +BABEL_OP2_301_77139_20131112_164236_inLine +BABEL_OP2_301_77139_20131112_164236_outLine +BABEL_OP2_301_78454_20140206_205852_inLine +BABEL_OP2_301_78454_20140206_205852_outLine +BABEL_OP2_301_78630_20131125_133236_inLine +BABEL_OP2_301_78630_20131125_133236_outLine +BABEL_OP2_301_79590_20131204_214240_inLine +BABEL_OP2_301_79590_20131204_214240_outLine +BABEL_OP2_301_80881_20131106_185321_inLine +BABEL_OP2_301_80881_20131106_185321_outLine +BABEL_OP2_301_83775_20131124_022216_inLine +BABEL_OP2_301_83775_20131124_022216_outLine +BABEL_OP2_301_84370_20140204_202527_inLine +BABEL_OP2_301_84370_20140204_202527_outLine +BABEL_OP2_301_85439_20140126_191119_inLine +BABEL_OP2_301_85439_20140126_191119_outLine +BABEL_OP2_301_86748_20140112_204921_inLine +BABEL_OP2_301_86748_20140112_204921_outLine +BABEL_OP2_301_87693_20131204_010154_inLine +BABEL_OP2_301_87693_20131204_010154_outLine +BABEL_OP2_301_88601_20131208_212307_inLine +BABEL_OP2_301_88601_20131208_212307_outLine +BABEL_OP2_301_88686_20131023_165851_inLine +BABEL_OP2_301_88686_20131023_165851_outLine +BABEL_OP2_301_89457_20131206_124818_inLine +BABEL_OP2_301_89457_20131206_124818_outLine +BABEL_OP2_301_90777_20131126_025413_inLine +BABEL_OP2_301_90777_20131126_025413_outLine +BABEL_OP2_301_92060_20140126_194852_inLine +BABEL_OP2_301_92060_20140126_194852_outLine +BABEL_OP2_301_92281_20140214_190838_inLine +BABEL_OP2_301_92281_20140214_190838_outLine +BABEL_OP2_301_92509_20131019_131304_inLine +BABEL_OP2_301_92509_20131019_131304_outLine +BABEL_OP2_301_92698_20131203_135210_inLine +BABEL_OP2_301_92698_20131203_135210_outLine +BABEL_OP2_301_93604_20140125_212930_inLine +BABEL_OP2_301_93604_20140125_212930_outLine +BABEL_OP2_301_94587_20131213_182558_inLine +BABEL_OP2_301_94587_20131213_182558_outLine +BABEL_OP2_301_95598_20131020_194214_inLine +BABEL_OP2_301_95598_20131020_194214_outLine +BABEL_OP2_301_95966_20131205_151956_inLine +BABEL_OP2_301_95966_20131205_151956_outLine +BABEL_OP2_301_96088_20140128_155726_inLine +BABEL_OP2_301_96088_20140128_155726_outLine +BABEL_OP2_301_96808_20140127_174411_inLine +BABEL_OP2_301_96808_20140127_174411_outLine +BABEL_OP2_301_98580_20131204_210023_inLine +BABEL_OP2_301_98580_20131204_210023_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/evalpart1.list b/egs/babel/s5d/conf/lists/301-cebuano/evalpart1.list new file mode 100644 index 00000000000..31455174b8e --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/evalpart1.list @@ -0,0 +1,62 @@ +BABEL_OP2_301_13427_20131226_153605_inLine +BABEL_OP2_301_13427_20131226_153605_outLine +BABEL_OP2_301_18766_20140127_140851_inLine +BABEL_OP2_301_18766_20140127_140851_outLine +BABEL_OP2_301_19832_20140214_144414_inLine +BABEL_OP2_301_19832_20140214_144414_outLine +BABEL_OP2_301_23628_20131121_202709_inLine +BABEL_OP2_301_23628_20131121_202709_outLine +BABEL_OP2_301_26074_20140214_150738_inLine +BABEL_OP2_301_26074_20140214_150738_outLine +BABEL_OP2_301_28585_20140103_174051_inLine +BABEL_OP2_301_28585_20140103_174051_outLine +BABEL_OP2_301_33992_20140128_153304_inLine +BABEL_OP2_301_33992_20140128_153304_outLine +BABEL_OP2_301_42600_20131125_184712_inLine +BABEL_OP2_301_42600_20131125_184712_outLine +BABEL_OP2_301_42600_20131125_185254_inLine +BABEL_OP2_301_42600_20131125_185254_outLine +BABEL_OP2_301_60508_20131023_221321_inLine +BABEL_OP2_301_60508_20131023_221321_outLine +BABEL_OP2_301_61357_20140205_203135_inLine +BABEL_OP2_301_61357_20140205_203135_outLine +BABEL_OP2_301_62434_20131027_204412_inLine +BABEL_OP2_301_62434_20131027_204412_outLine +BABEL_OP2_301_62835_20131205_201607_inLine +BABEL_OP2_301_62835_20131205_201607_outLine +BABEL_OP2_301_62852_20131112_145306_inLine +BABEL_OP2_301_62852_20131112_145306_outLine +BABEL_OP2_301_63481_20131018_205953_inLine +BABEL_OP2_301_63481_20131018_205953_outLine +BABEL_OP2_301_63523_20140127_032850_inLine +BABEL_OP2_301_63523_20140127_032850_outLine +BABEL_OP2_301_71282_20140115_180924_inLine +BABEL_OP2_301_71282_20140115_180924_outLine +BABEL_OP2_301_71333_20131126_155505_inLine +BABEL_OP2_301_71333_20131126_155505_outLine +BABEL_OP2_301_75359_20140127_022948_inLine +BABEL_OP2_301_75359_20140127_022948_outLine +BABEL_OP2_301_75460_20140130_145829_inLine +BABEL_OP2_301_75460_20140130_145829_outLine +BABEL_OP2_301_78630_20131125_133236_inLine +BABEL_OP2_301_78630_20131125_133236_outLine +BABEL_OP2_301_83775_20131124_022216_inLine +BABEL_OP2_301_83775_20131124_022216_outLine +BABEL_OP2_301_86748_20140112_204921_inLine +BABEL_OP2_301_86748_20140112_204921_outLine +BABEL_OP2_301_88601_20131208_212307_inLine +BABEL_OP2_301_88601_20131208_212307_outLine +BABEL_OP2_301_92060_20140126_194852_inLine +BABEL_OP2_301_92060_20140126_194852_outLine +BABEL_OP2_301_92281_20140214_190838_inLine +BABEL_OP2_301_92281_20140214_190838_outLine +BABEL_OP2_301_93604_20140125_212930_inLine +BABEL_OP2_301_93604_20140125_212930_outLine +BABEL_OP2_301_94587_20131213_182558_inLine +BABEL_OP2_301_94587_20131213_182558_outLine +BABEL_OP2_301_95966_20131205_151956_inLine +BABEL_OP2_301_95966_20131205_151956_outLine +BABEL_OP2_301_96808_20140127_174411_inLine +BABEL_OP2_301_96808_20140127_174411_outLine +BABEL_OP2_301_98580_20131204_210023_inLine +BABEL_OP2_301_98580_20131204_210023_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/sub-train.list b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.list new file mode 100644 index 00000000000..8347770b847 --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.list @@ -0,0 +1,126 @@ +BABEL_OP2_301_10482_20131213_185259_inLine +BABEL_OP2_301_10482_20131213_185259_outLine +BABEL_OP2_301_11681_20131121_134611_inLine +BABEL_OP2_301_11681_20131121_134611_outLine +BABEL_OP2_301_12220_20131205_210711_inLine +BABEL_OP2_301_12220_20131205_210711_outLine +BABEL_OP2_301_14229_20131129_210206_inLine +BABEL_OP2_301_14229_20131129_210206_outLine +BABEL_OP2_301_14807_20140214_134654_inLine +BABEL_OP2_301_14807_20140214_134654_outLine +BABEL_OP2_301_15163_20131203_221053_inLine +BABEL_OP2_301_15163_20131203_221053_outLine +BABEL_OP2_301_17113_20140202_140244_inLine +BABEL_OP2_301_17113_20140202_140244_outLine +BABEL_OP2_301_18380_20131208_205543_inLine +BABEL_OP2_301_18380_20131208_205543_outLine +BABEL_OP2_301_20437_20140223_171247_inLine +BABEL_OP2_301_20437_20140223_171247_outLine +BABEL_OP2_301_22216_20131024_101416_inLine +BABEL_OP2_301_22216_20131024_101416_outLine +BABEL_OP2_301_28595_20140214_164503_inLine +BABEL_OP2_301_28595_20140214_164503_outLine +BABEL_OP2_301_28945_20131123_183004_inLine +BABEL_OP2_301_28945_20131123_183004_outLine +BABEL_OP2_301_32708_20131122_134009_inLine +BABEL_OP2_301_32708_20131122_134009_outLine +BABEL_OP2_301_32708_20131122_134900_inLine +BABEL_OP2_301_32708_20131122_134900_outLine +BABEL_OP2_301_33175_20131019_231650_inLine +BABEL_OP2_301_33175_20131019_231650_outLine +BABEL_OP2_301_33216_20140131_183344_inLine +BABEL_OP2_301_33216_20140131_183344_outLine +BABEL_OP2_301_33355_20131021_130538_inLine +BABEL_OP2_301_33355_20131021_130538_outLine +BABEL_OP2_301_34106_20131020_192105_inLine +BABEL_OP2_301_34106_20131020_192105_outLine +BABEL_OP2_301_34811_20131204_195646_inLine +BABEL_OP2_301_34811_20131204_195646_outLine +BABEL_OP2_301_37228_20140109_190716_inLine +BABEL_OP2_301_37228_20140109_190716_outLine +BABEL_OP2_301_38554_20131024_134203_inLine +BABEL_OP2_301_38554_20131024_134203_outLine +BABEL_OP2_301_39680_20140115_193747_inLine +BABEL_OP2_301_39680_20140115_193747_outLine +BABEL_OP2_301_41680_20131016_202751_inLine +BABEL_OP2_301_41680_20131016_202751_outLine +BABEL_OP2_301_43388_20131203_204504_inLine +BABEL_OP2_301_43388_20131203_204504_outLine +BABEL_OP2_301_45559_20140127_145550_inLine +BABEL_OP2_301_45559_20140127_145550_outLine +BABEL_OP2_301_45560_20131104_171401_inLine +BABEL_OP2_301_45560_20131104_171401_outLine +BABEL_OP2_301_46066_20140110_170456_inLine +BABEL_OP2_301_46066_20140110_170456_outLine +BABEL_OP2_301_46268_20131021_142020_inLine +BABEL_OP2_301_46268_20131021_142020_outLine +BABEL_OP2_301_50810_20131025_174542_inLine +BABEL_OP2_301_50810_20131025_174542_outLine +BABEL_OP2_301_52265_20140216_163445_inLine +BABEL_OP2_301_52265_20140216_163445_outLine +BABEL_OP2_301_54162_20131210_170602_inLine +BABEL_OP2_301_54162_20131210_170602_outLine +BABEL_OP2_301_54953_20131127_005926_inLine +BABEL_OP2_301_54953_20131127_005926_outLine +BABEL_OP2_301_55818_20131110_111534_inLine +BABEL_OP2_301_55818_20131110_111534_outLine +BABEL_OP2_301_55818_20131110_121457_inLine +BABEL_OP2_301_55818_20131110_121457_outLine +BABEL_OP2_301_56306_20140108_175350_inLine +BABEL_OP2_301_56306_20140108_175350_outLine +BABEL_OP2_301_64902_20140123_130547_inLine +BABEL_OP2_301_64902_20140123_130547_outLine +BABEL_OP2_301_65298_20140115_174724_inLine +BABEL_OP2_301_65298_20140115_174724_outLine +BABEL_OP2_301_67213_20140220_182122_inLine +BABEL_OP2_301_67213_20140220_182122_outLine +BABEL_OP2_301_67622_20131023_150210_inLine +BABEL_OP2_301_67622_20131023_150210_outLine +BABEL_OP2_301_68924_20131210_145459_inLine +BABEL_OP2_301_68924_20131210_145459_outLine +BABEL_OP2_301_69746_20140108_182845_inLine +BABEL_OP2_301_69746_20140108_182845_outLine +BABEL_OP2_301_71263_20140205_210654_inLine +BABEL_OP2_301_71263_20140205_210654_outLine +BABEL_OP2_301_72733_20140126_155036_inLine +BABEL_OP2_301_72733_20140126_155036_outLine +BABEL_OP2_301_73042_20131114_135827_inLine +BABEL_OP2_301_73042_20131114_135827_outLine +BABEL_OP2_301_73591_20131016_200144_inLine +BABEL_OP2_301_73591_20131016_200144_outLine +BABEL_OP2_301_73591_20131016_201810_inLine +BABEL_OP2_301_73591_20131016_201810_outLine +BABEL_OP2_301_75869_20140122_141000_inLine +BABEL_OP2_301_75869_20140122_141000_outLine +BABEL_OP2_301_78482_20131227_163840_inLine +BABEL_OP2_301_78482_20131227_163840_outLine +BABEL_OP2_301_81810_20131214_030628_inLine +BABEL_OP2_301_81810_20131214_030628_outLine +BABEL_OP2_301_81854_20140127_151841_inLine +BABEL_OP2_301_81854_20140127_151841_outLine +BABEL_OP2_301_84547_20131025_143053_inLine +BABEL_OP2_301_84547_20131025_143053_outLine +BABEL_OP2_301_85248_20140115_144605_inLine +BABEL_OP2_301_85248_20140115_144605_outLine +BABEL_OP2_301_87545_20140125_194128_inLine +BABEL_OP2_301_87545_20140125_194128_outLine +BABEL_OP2_301_91372_20140126_145526_inLine +BABEL_OP2_301_91372_20140126_145526_outLine +BABEL_OP2_301_91463_20140206_144651_inLine +BABEL_OP2_301_91463_20140206_144651_outLine +BABEL_OP2_301_91884_20140118_220510_inLine +BABEL_OP2_301_91884_20140118_220510_outLine +BABEL_OP2_301_93475_20131119_183619_inLine +BABEL_OP2_301_93475_20131119_183619_outLine +BABEL_OP2_301_93515_20140125_212344_inLine +BABEL_OP2_301_93515_20140125_212344_outLine +BABEL_OP2_301_94409_20131204_145545_inLine +BABEL_OP2_301_94409_20131204_145545_outLine +BABEL_OP2_301_95399_20131206_150920_inLine +BABEL_OP2_301_95399_20131206_150920_outLine +BABEL_OP2_301_96190_20131122_024403_inLine +BABEL_OP2_301_96190_20131122_024403_outLine +BABEL_OP2_301_99202_20131226_202006_inLine +BABEL_OP2_301_99202_20131226_202006_outLine +BABEL_OP2_301_99955_20140110_162703_inLine +BABEL_OP2_301_99955_20140110_162703_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.untranscribed.list new file mode 100644 index 00000000000..690d88bbe06 --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.untranscribed.list @@ -0,0 +1,376 @@ +BABEL_OP2_301_10647_20140122_182555_inLine +BABEL_OP2_301_10647_20140122_182555_outLine +BABEL_OP2_301_11581_20140214_131627_inLine +BABEL_OP2_301_11581_20140214_131627_outLine +BABEL_OP2_301_11673_20131025_130227_inLine +BABEL_OP2_301_11673_20131025_130227_outLine +BABEL_OP2_301_12846_20140130_151205_inLine +BABEL_OP2_301_12846_20140130_151205_outLine +BABEL_OP2_301_13184_20140106_154520_inLine +BABEL_OP2_301_13184_20140106_154520_outLine +BABEL_OP2_301_13776_20140129_150321_inLine +BABEL_OP2_301_13776_20140129_150321_outLine +BABEL_OP2_301_14137_20131129_173610_inLine +BABEL_OP2_301_14137_20131129_173610_outLine +BABEL_OP2_301_14729_20140120_213036_inLine +BABEL_OP2_301_14729_20140120_213036_outLine +BABEL_OP2_301_14929_20131204_221803_inLine +BABEL_OP2_301_14929_20131204_221803_outLine +BABEL_OP2_301_15466_20140220_163311_inLine +BABEL_OP2_301_15466_20140220_163311_outLine +BABEL_OP2_301_15617_20140216_200848_inLine +BABEL_OP2_301_15617_20140216_200848_outLine +BABEL_OP2_301_15730_20131103_191134_inLine +BABEL_OP2_301_15730_20131103_191134_outLine +BABEL_OP2_301_16149_20131122_182440_inLine +BABEL_OP2_301_16149_20131122_182440_outLine +BABEL_OP2_301_16749_20140109_145017_inLine +BABEL_OP2_301_16749_20140109_145017_outLine +BABEL_OP2_301_18118_20140214_195210_inLine +BABEL_OP2_301_18118_20140214_195210_outLine +BABEL_OP2_301_19589_20140126_182029_inLine +BABEL_OP2_301_19589_20140126_182029_outLine +BABEL_OP2_301_20133_20131017_002355_inLine +BABEL_OP2_301_20133_20131017_002355_outLine +BABEL_OP2_301_20922_20140103_201925_inLine +BABEL_OP2_301_20922_20140103_201925_outLine +BABEL_OP2_301_21206_20131113_200040_inLine +BABEL_OP2_301_21206_20131113_200040_outLine +BABEL_OP2_301_21435_20140123_135222_inLine +BABEL_OP2_301_21435_20140123_135222_outLine +BABEL_OP2_301_22321_20131101_141744_inLine +BABEL_OP2_301_22321_20131101_141744_outLine +BABEL_OP2_301_22494_20131210_134219_inLine +BABEL_OP2_301_22494_20131210_134219_outLine +BABEL_OP2_301_23893_20140214_163728_inLine +BABEL_OP2_301_23893_20140214_163728_outLine +BABEL_OP2_301_24239_20140126_152805_inLine +BABEL_OP2_301_24239_20140126_152805_outLine +BABEL_OP2_301_24679_20131023_181905_inLine +BABEL_OP2_301_24679_20131023_181905_outLine +BABEL_OP2_301_24982_20131125_001842_inLine +BABEL_OP2_301_24982_20131125_001842_outLine +BABEL_OP2_301_25719_20140104_183539_inLine +BABEL_OP2_301_25719_20140104_183539_outLine +BABEL_OP2_301_25767_20131112_191047_inLine +BABEL_OP2_301_25767_20131112_191047_outLine +BABEL_OP2_301_26072_20140110_183220_inLine +BABEL_OP2_301_26072_20140110_183220_outLine +BABEL_OP2_301_28012_20140103_194242_inLine +BABEL_OP2_301_28012_20140103_194242_outLine +BABEL_OP2_301_28303_20131125_142043_inLine +BABEL_OP2_301_28303_20131125_142043_outLine +BABEL_OP2_301_28600_20131220_191009_inLine +BABEL_OP2_301_28600_20131220_191009_outLine +BABEL_OP2_301_28814_20140109_152108_inLine +BABEL_OP2_301_28814_20140109_152108_outLine +BABEL_OP2_301_29023_20131123_171357_inLine +BABEL_OP2_301_29023_20131123_171357_outLine +BABEL_OP2_301_29023_20131123_173406_inLine +BABEL_OP2_301_29023_20131123_173406_outLine +BABEL_OP2_301_29168_20131018_141724_inLine +BABEL_OP2_301_29168_20131018_141724_outLine +BABEL_OP2_301_29323_20140112_184600_inLine +BABEL_OP2_301_29323_20140112_184600_outLine +BABEL_OP2_301_29404_20140123_123004_inLine +BABEL_OP2_301_29404_20140123_123004_outLine +BABEL_OP2_301_30645_20131116_183545_inLine +BABEL_OP2_301_30645_20131116_183545_outLine +BABEL_OP2_301_31490_20131130_190602_inLine +BABEL_OP2_301_31490_20131130_190602_outLine +BABEL_OP2_301_32630_20140127_181615_inLine +BABEL_OP2_301_32630_20140127_181615_outLine +BABEL_OP2_301_32998_20140206_212018_inLine +BABEL_OP2_301_32998_20140206_212018_outLine +BABEL_OP2_301_33672_20131112_192407_inLine +BABEL_OP2_301_33672_20131112_192407_outLine +BABEL_OP2_301_33672_20131112_194343_inLine +BABEL_OP2_301_33672_20131112_194343_outLine +BABEL_OP2_301_33806_20140204_204611_inLine +BABEL_OP2_301_33806_20140204_204611_outLine +BABEL_OP2_301_34019_20140220_175645_inLine +BABEL_OP2_301_34019_20140220_175645_outLine +BABEL_OP2_301_34629_20140224_174043_inLine +BABEL_OP2_301_34629_20140224_174043_outLine +BABEL_OP2_301_34860_20140224_170732_inLine +BABEL_OP2_301_34860_20140224_170732_outLine +BABEL_OP2_301_36039_20140121_002746_inLine +BABEL_OP2_301_36039_20140121_002746_outLine +BABEL_OP2_301_36669_20131208_191649_inLine +BABEL_OP2_301_36669_20131208_191649_outLine +BABEL_OP2_301_37598_20140206_190701_inLine +BABEL_OP2_301_37598_20140206_190701_outLine +BABEL_OP2_301_37682_20131128_161814_inLine +BABEL_OP2_301_37682_20131128_161814_outLine +BABEL_OP2_301_38323_20140205_184350_inLine +BABEL_OP2_301_38323_20140205_184350_outLine +BABEL_OP2_301_39006_20140204_195257_inLine +BABEL_OP2_301_39006_20140204_195257_outLine +BABEL_OP2_301_39099_20140127_003852_inLine +BABEL_OP2_301_39099_20140127_003852_outLine +BABEL_OP2_301_39307_20131024_204807_inLine +BABEL_OP2_301_39307_20131024_204807_outLine +BABEL_OP2_301_39638_20140224_155231_inLine +BABEL_OP2_301_39638_20140224_155231_outLine +BABEL_OP2_301_39848_20131204_163640_inLine +BABEL_OP2_301_39848_20131204_163640_outLine +BABEL_OP2_301_41469_20131123_114935_inLine +BABEL_OP2_301_41469_20131123_114935_outLine +BABEL_OP2_301_41469_20131123_115625_inLine +BABEL_OP2_301_41469_20131123_115625_outLine +BABEL_OP2_301_41685_20140223_155438_inLine +BABEL_OP2_301_41685_20140223_155438_outLine +BABEL_OP2_301_42029_20140115_163832_inLine +BABEL_OP2_301_42029_20140115_163832_outLine +BABEL_OP2_301_42434_20131127_190632_inLine +BABEL_OP2_301_42434_20131127_190632_outLine +BABEL_OP2_301_43115_20140125_145846_inLine +BABEL_OP2_301_43115_20140125_145846_outLine +BABEL_OP2_301_43323_20140223_191949_inLine +BABEL_OP2_301_43323_20140223_191949_outLine +BABEL_OP2_301_44347_20140102_122651_inLine +BABEL_OP2_301_44347_20140102_122651_outLine +BABEL_OP2_301_44619_20131122_014112_inLine +BABEL_OP2_301_44619_20131122_014112_outLine +BABEL_OP2_301_45121_20140127_190059_inLine +BABEL_OP2_301_45121_20140127_190059_outLine +BABEL_OP2_301_45851_20140127_224015_inLine +BABEL_OP2_301_45851_20140127_224015_outLine +BABEL_OP2_301_46310_20131104_001007_inLine +BABEL_OP2_301_46310_20131104_001007_outLine +BABEL_OP2_301_49001_20131126_004357_inLine +BABEL_OP2_301_49001_20131126_004357_outLine +BABEL_OP2_301_49216_20131020_181355_inLine +BABEL_OP2_301_49216_20131020_181355_outLine +BABEL_OP2_301_49945_20140127_184032_inLine +BABEL_OP2_301_49945_20140127_184032_outLine +BABEL_OP2_301_50175_20131019_212339_inLine +BABEL_OP2_301_50175_20131019_212339_outLine +BABEL_OP2_301_51484_20131220_211835_inLine +BABEL_OP2_301_51484_20131220_211835_outLine +BABEL_OP2_301_51540_20140106_172711_inLine +BABEL_OP2_301_51540_20140106_172711_outLine +BABEL_OP2_301_51701_20140205_193018_inLine +BABEL_OP2_301_51701_20140205_193018_outLine +BABEL_OP2_301_51968_20131204_190129_inLine +BABEL_OP2_301_51968_20131204_190129_outLine +BABEL_OP2_301_52272_20131030_202958_inLine +BABEL_OP2_301_52272_20131030_202958_outLine +BABEL_OP2_301_52381_20140109_155159_inLine +BABEL_OP2_301_52381_20140109_155159_outLine +BABEL_OP2_301_52404_20131211_192143_inLine +BABEL_OP2_301_52404_20131211_192143_outLine +BABEL_OP2_301_52804_20131122_192606_inLine +BABEL_OP2_301_52804_20131122_192606_outLine +BABEL_OP2_301_53842_20131205_212824_inLine +BABEL_OP2_301_53842_20131205_212824_outLine +BABEL_OP2_301_53842_20131205_214030_inLine +BABEL_OP2_301_53842_20131205_214030_outLine +BABEL_OP2_301_54074_20131204_200954_inLine +BABEL_OP2_301_54074_20131204_200954_outLine +BABEL_OP2_301_54530_20131218_184644_inLine +BABEL_OP2_301_54530_20131218_184644_outLine +BABEL_OP2_301_54567_20131205_193927_inLine +BABEL_OP2_301_54567_20131205_193927_outLine +BABEL_OP2_301_54827_20140126_184228_inLine +BABEL_OP2_301_54827_20140126_184228_outLine +BABEL_OP2_301_55106_20140119_161343_inLine +BABEL_OP2_301_55106_20140119_161343_outLine +BABEL_OP2_301_55349_20140121_152059_inLine +BABEL_OP2_301_55349_20140121_152059_outLine +BABEL_OP2_301_55381_20140103_163729_inLine +BABEL_OP2_301_55381_20140103_163729_outLine +BABEL_OP2_301_57116_20131129_012420_inLine +BABEL_OP2_301_57116_20131129_012420_outLine +BABEL_OP2_301_57233_20140224_172256_inLine +BABEL_OP2_301_57233_20140224_172256_outLine +BABEL_OP2_301_57542_20140122_150942_inLine +BABEL_OP2_301_57542_20140122_150942_outLine +BABEL_OP2_301_57566_20140106_150720_inLine +BABEL_OP2_301_57566_20140106_150720_outLine +BABEL_OP2_301_58006_20140122_203731_inLine +BABEL_OP2_301_58006_20140122_203731_outLine +BABEL_OP2_301_58313_20140207_172512_inLine +BABEL_OP2_301_58313_20140207_172512_outLine +BABEL_OP2_301_58926_20131124_131005_inLine +BABEL_OP2_301_58926_20131124_131005_outLine +BABEL_OP2_301_59039_20140220_172820_inLine +BABEL_OP2_301_59039_20140220_172820_outLine +BABEL_OP2_301_59078_20140206_221105_inLine +BABEL_OP2_301_59078_20140206_221105_outLine +BABEL_OP2_301_59549_20131115_144344_inLine +BABEL_OP2_301_59549_20131115_144344_outLine +BABEL_OP2_301_59549_20131115_145934_inLine +BABEL_OP2_301_59549_20131115_145934_outLine +BABEL_OP2_301_59928_20131208_181057_inLine +BABEL_OP2_301_59928_20131208_181057_outLine +BABEL_OP2_301_60436_20140126_184303_inLine +BABEL_OP2_301_60436_20140126_184303_outLine +BABEL_OP2_301_60458_20140127_174755_inLine +BABEL_OP2_301_60458_20140127_174755_outLine +BABEL_OP2_301_60474_20131125_202818_inLine +BABEL_OP2_301_60474_20131125_202818_outLine +BABEL_OP2_301_60477_20140131_142240_inLine +BABEL_OP2_301_60477_20140131_142240_outLine +BABEL_OP2_301_60498_20140128_144917_inLine +BABEL_OP2_301_60498_20140128_144917_outLine +BABEL_OP2_301_60626_20131123_194530_inLine +BABEL_OP2_301_60626_20131123_194530_outLine +BABEL_OP2_301_61440_20140129_162338_inLine +BABEL_OP2_301_61440_20140129_162338_outLine +BABEL_OP2_301_62047_20131223_201629_inLine +BABEL_OP2_301_62047_20131223_201629_outLine +BABEL_OP2_301_62734_20131127_125913_inLine +BABEL_OP2_301_62734_20131127_125913_outLine +BABEL_OP2_301_62800_20131023_133254_inLine +BABEL_OP2_301_62800_20131023_133254_outLine +BABEL_OP2_301_63787_20131112_234133_inLine +BABEL_OP2_301_63787_20131112_234133_outLine +BABEL_OP2_301_63906_20140122_195218_inLine +BABEL_OP2_301_63906_20140122_195218_outLine +BABEL_OP2_301_64768_20131129_183309_inLine +BABEL_OP2_301_64768_20131129_183309_outLine +BABEL_OP2_301_65466_20140122_211719_inLine +BABEL_OP2_301_65466_20140122_211719_outLine +BABEL_OP2_301_66045_20131203_142944_inLine +BABEL_OP2_301_66045_20131203_142944_outLine +BABEL_OP2_301_66361_20140223_153258_inLine +BABEL_OP2_301_66361_20140223_153258_outLine +BABEL_OP2_301_66916_20131023_223807_inLine +BABEL_OP2_301_66916_20131023_223807_outLine +BABEL_OP2_301_67152_20140119_212917_inLine +BABEL_OP2_301_67152_20140119_212917_outLine +BABEL_OP2_301_68182_20140115_183030_inLine +BABEL_OP2_301_68182_20140115_183030_outLine +BABEL_OP2_301_69096_20140128_171512_inLine +BABEL_OP2_301_69096_20140128_171512_outLine +BABEL_OP2_301_69937_20140131_181058_inLine +BABEL_OP2_301_69937_20140131_181058_outLine +BABEL_OP2_301_69992_20131110_135349_inLine +BABEL_OP2_301_69992_20131110_135349_outLine +BABEL_OP2_301_70386_20140102_173141_inLine +BABEL_OP2_301_70386_20140102_173141_outLine +BABEL_OP2_301_71121_20140223_161906_inLine +BABEL_OP2_301_71121_20140223_161906_outLine +BABEL_OP2_301_72844_20131023_180119_inLine +BABEL_OP2_301_72844_20131023_180119_outLine +BABEL_OP2_301_73005_20140126_193903_inLine +BABEL_OP2_301_73005_20140126_193903_outLine +BABEL_OP2_301_73258_20131203_200331_inLine +BABEL_OP2_301_73258_20131203_200331_outLine +BABEL_OP2_301_73485_20140128_210522_inLine +BABEL_OP2_301_73485_20140128_210522_outLine +BABEL_OP2_301_73549_20140131_160208_inLine +BABEL_OP2_301_73549_20140131_160208_outLine +BABEL_OP2_301_73964_20140214_161434_inLine +BABEL_OP2_301_73964_20140214_161434_outLine +BABEL_OP2_301_74886_20131102_122938_inLine +BABEL_OP2_301_74886_20131102_122938_outLine +BABEL_OP2_301_75261_20131226_160602_inLine +BABEL_OP2_301_75261_20131226_160602_outLine +BABEL_OP2_301_75981_20140127_143431_inLine +BABEL_OP2_301_75981_20140127_143431_outLine +BABEL_OP2_301_76155_20131203_185301_inLine +BABEL_OP2_301_76155_20131203_185301_outLine +BABEL_OP2_301_77146_20131023_185146_inLine +BABEL_OP2_301_77146_20131023_185146_outLine +BABEL_OP2_301_77427_20131124_013134_inLine +BABEL_OP2_301_77427_20131124_013134_outLine +BABEL_OP2_301_77427_20131124_014748_inLine +BABEL_OP2_301_77427_20131124_014748_outLine +BABEL_OP2_301_77744_20131117_154739_inLine +BABEL_OP2_301_77744_20131117_154739_outLine +BABEL_OP2_301_78543_20140131_010053_inLine +BABEL_OP2_301_78543_20140131_010053_outLine +BABEL_OP2_301_78743_20131220_201406_inLine +BABEL_OP2_301_78743_20131220_201406_outLine +BABEL_OP2_301_78943_20131120_175430_inLine +BABEL_OP2_301_78943_20131120_175430_outLine +BABEL_OP2_301_79451_20131125_114859_inLine +BABEL_OP2_301_79451_20131125_114859_outLine +BABEL_OP2_301_81622_20131204_193304_inLine +BABEL_OP2_301_81622_20131204_193304_outLine +BABEL_OP2_301_82089_20131208_202028_inLine +BABEL_OP2_301_82089_20131208_202028_outLine +BABEL_OP2_301_82425_20131113_010203_inLine +BABEL_OP2_301_82425_20131113_010203_outLine +BABEL_OP2_301_82626_20140131_233635_inLine +BABEL_OP2_301_82626_20140131_233635_outLine +BABEL_OP2_301_83436_20131116_194233_inLine +BABEL_OP2_301_83436_20131116_194233_outLine +BABEL_OP2_301_83455_20131129_211537_inLine +BABEL_OP2_301_83455_20131129_211537_outLine +BABEL_OP2_301_83455_20131129_212747_inLine +BABEL_OP2_301_83455_20131129_212747_outLine +BABEL_OP2_301_83625_20140224_161632_inLine +BABEL_OP2_301_83625_20140224_161632_outLine +BABEL_OP2_301_84458_20131216_193109_inLine +BABEL_OP2_301_84458_20131216_193109_outLine +BABEL_OP2_301_85322_20131112_183356_inLine +BABEL_OP2_301_85322_20131112_183356_outLine +BABEL_OP2_301_85519_20140103_170652_inLine +BABEL_OP2_301_85519_20140103_170652_outLine +BABEL_OP2_301_86156_20140122_185516_inLine +BABEL_OP2_301_86156_20140122_185516_outLine +BABEL_OP2_301_87470_20131128_003454_inLine +BABEL_OP2_301_87470_20131128_003454_outLine +BABEL_OP2_301_88812_20140126_203311_inLine +BABEL_OP2_301_88812_20140126_203311_outLine +BABEL_OP2_301_88925_20131220_151054_inLine +BABEL_OP2_301_88925_20131220_151054_outLine +BABEL_OP2_301_88938_20140104_195418_inLine +BABEL_OP2_301_88938_20140104_195418_outLine +BABEL_OP2_301_89059_20140109_141228_inLine +BABEL_OP2_301_89059_20140109_141228_outLine +BABEL_OP2_301_89358_20131209_174055_inLine +BABEL_OP2_301_89358_20131209_174055_outLine +BABEL_OP2_301_89665_20131206_143535_inLine +BABEL_OP2_301_89665_20131206_143535_outLine +BABEL_OP2_301_89695_20131203_225429_inLine +BABEL_OP2_301_89695_20131203_225429_outLine +BABEL_OP2_301_89877_20140205_200816_inLine +BABEL_OP2_301_89877_20140205_200816_outLine +BABEL_OP2_301_90709_20131109_170505_inLine +BABEL_OP2_301_90709_20131109_170505_outLine +BABEL_OP2_301_90737_20131206_160650_inLine +BABEL_OP2_301_90737_20131206_160650_outLine +BABEL_OP2_301_91478_20140224_170543_inLine +BABEL_OP2_301_91478_20140224_170543_outLine +BABEL_OP2_301_91760_20140127_183930_inLine +BABEL_OP2_301_91760_20140127_183930_outLine +BABEL_OP2_301_91891_20131213_192340_inLine +BABEL_OP2_301_91891_20131213_192340_outLine +BABEL_OP2_301_91944_20131114_123915_inLine +BABEL_OP2_301_91944_20131114_123915_outLine +BABEL_OP2_301_92809_20131124_142340_inLine +BABEL_OP2_301_92809_20131124_142340_outLine +BABEL_OP2_301_92809_20131124_143817_inLine +BABEL_OP2_301_92809_20131124_143817_outLine +BABEL_OP2_301_92942_20140206_180304_inLine +BABEL_OP2_301_92942_20140206_180304_outLine +BABEL_OP2_301_93153_20131114_144733_inLine +BABEL_OP2_301_93153_20131114_144733_outLine +BABEL_OP2_301_93153_20131114_151704_inLine +BABEL_OP2_301_93153_20131114_151704_outLine +BABEL_OP2_301_93964_20131130_172431_inLine +BABEL_OP2_301_93964_20131130_172431_outLine +BABEL_OP2_301_94978_20140119_185149_inLine +BABEL_OP2_301_94978_20140119_185149_outLine +BABEL_OP2_301_95338_20140127_192317_inLine +BABEL_OP2_301_95338_20140127_192317_outLine +BABEL_OP2_301_95583_20131029_002312_inLine +BABEL_OP2_301_95583_20131029_002312_outLine +BABEL_OP2_301_95663_20131025_134113_inLine +BABEL_OP2_301_95663_20131025_134113_outLine +BABEL_OP2_301_95935_20140103_190515_inLine +BABEL_OP2_301_95935_20140103_190515_outLine +BABEL_OP2_301_96324_20131026_023101_inLine +BABEL_OP2_301_96324_20131026_023101_outLine +BABEL_OP2_301_96376_20140126_140015_inLine +BABEL_OP2_301_96376_20140126_140015_outLine +BABEL_OP2_301_96910_20131124_183403_inLine +BABEL_OP2_301_96910_20131124_183403_outLine +BABEL_OP2_301_97136_20140120_235804_inLine +BABEL_OP2_301_97136_20140120_235804_outLine +BABEL_OP2_301_97588_20131025_185012_inLine +BABEL_OP2_301_97588_20131025_185012_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/training.list b/egs/babel/s5d/conf/lists/301-cebuano/training.list new file mode 100644 index 00000000000..e6ea8dcfeff --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/training.list @@ -0,0 +1,502 @@ +BABEL_OP2_301_10482_20131213_185259_inLine +BABEL_OP2_301_10482_20131213_185259_outLine +BABEL_OP2_301_10647_20140122_182555_inLine +BABEL_OP2_301_10647_20140122_182555_outLine +BABEL_OP2_301_11581_20140214_131627_inLine +BABEL_OP2_301_11581_20140214_131627_outLine +BABEL_OP2_301_11673_20131025_130227_inLine +BABEL_OP2_301_11673_20131025_130227_outLine +BABEL_OP2_301_11681_20131121_134611_inLine +BABEL_OP2_301_11681_20131121_134611_outLine +BABEL_OP2_301_12220_20131205_210711_inLine +BABEL_OP2_301_12220_20131205_210711_outLine +BABEL_OP2_301_12846_20140130_151205_inLine +BABEL_OP2_301_12846_20140130_151205_outLine +BABEL_OP2_301_13184_20140106_154520_inLine +BABEL_OP2_301_13184_20140106_154520_outLine +BABEL_OP2_301_13776_20140129_150321_inLine +BABEL_OP2_301_13776_20140129_150321_outLine +BABEL_OP2_301_14137_20131129_173610_inLine +BABEL_OP2_301_14137_20131129_173610_outLine +BABEL_OP2_301_14229_20131129_210206_inLine +BABEL_OP2_301_14229_20131129_210206_outLine +BABEL_OP2_301_14729_20140120_213036_inLine +BABEL_OP2_301_14729_20140120_213036_outLine +BABEL_OP2_301_14807_20140214_134654_inLine +BABEL_OP2_301_14807_20140214_134654_outLine +BABEL_OP2_301_14929_20131204_221803_inLine +BABEL_OP2_301_14929_20131204_221803_outLine +BABEL_OP2_301_15163_20131203_221053_inLine +BABEL_OP2_301_15163_20131203_221053_outLine +BABEL_OP2_301_15466_20140220_163311_inLine +BABEL_OP2_301_15466_20140220_163311_outLine +BABEL_OP2_301_15617_20140216_200848_inLine +BABEL_OP2_301_15617_20140216_200848_outLine +BABEL_OP2_301_15730_20131103_191134_inLine +BABEL_OP2_301_15730_20131103_191134_outLine +BABEL_OP2_301_16149_20131122_182440_inLine +BABEL_OP2_301_16149_20131122_182440_outLine +BABEL_OP2_301_16749_20140109_145017_inLine +BABEL_OP2_301_16749_20140109_145017_outLine +BABEL_OP2_301_17113_20140202_140244_inLine +BABEL_OP2_301_17113_20140202_140244_outLine +BABEL_OP2_301_18118_20140214_195210_inLine +BABEL_OP2_301_18118_20140214_195210_outLine +BABEL_OP2_301_18380_20131208_205543_inLine +BABEL_OP2_301_18380_20131208_205543_outLine +BABEL_OP2_301_19589_20140126_182029_inLine +BABEL_OP2_301_19589_20140126_182029_outLine +BABEL_OP2_301_20133_20131017_002355_inLine +BABEL_OP2_301_20133_20131017_002355_outLine +BABEL_OP2_301_20437_20140223_171247_inLine +BABEL_OP2_301_20437_20140223_171247_outLine +BABEL_OP2_301_20922_20140103_201925_inLine +BABEL_OP2_301_20922_20140103_201925_outLine +BABEL_OP2_301_21206_20131113_200040_inLine +BABEL_OP2_301_21206_20131113_200040_outLine +BABEL_OP2_301_21435_20140123_135222_inLine +BABEL_OP2_301_21435_20140123_135222_outLine +BABEL_OP2_301_22216_20131024_101416_inLine +BABEL_OP2_301_22216_20131024_101416_outLine +BABEL_OP2_301_22321_20131101_141744_inLine +BABEL_OP2_301_22321_20131101_141744_outLine +BABEL_OP2_301_22494_20131210_134219_inLine +BABEL_OP2_301_22494_20131210_134219_outLine +BABEL_OP2_301_23893_20140214_163728_inLine +BABEL_OP2_301_23893_20140214_163728_outLine +BABEL_OP2_301_24239_20140126_152805_inLine +BABEL_OP2_301_24239_20140126_152805_outLine +BABEL_OP2_301_24679_20131023_181905_inLine +BABEL_OP2_301_24679_20131023_181905_outLine +BABEL_OP2_301_24982_20131125_001842_inLine +BABEL_OP2_301_24982_20131125_001842_outLine +BABEL_OP2_301_25719_20140104_183539_inLine +BABEL_OP2_301_25719_20140104_183539_outLine +BABEL_OP2_301_25767_20131112_191047_inLine +BABEL_OP2_301_25767_20131112_191047_outLine +BABEL_OP2_301_26072_20140110_183220_inLine +BABEL_OP2_301_26072_20140110_183220_outLine +BABEL_OP2_301_28012_20140103_194242_inLine +BABEL_OP2_301_28012_20140103_194242_outLine +BABEL_OP2_301_28303_20131125_142043_inLine +BABEL_OP2_301_28303_20131125_142043_outLine +BABEL_OP2_301_28595_20140214_164503_inLine +BABEL_OP2_301_28595_20140214_164503_outLine +BABEL_OP2_301_28600_20131220_191009_inLine +BABEL_OP2_301_28600_20131220_191009_outLine +BABEL_OP2_301_28814_20140109_152108_inLine +BABEL_OP2_301_28814_20140109_152108_outLine +BABEL_OP2_301_28945_20131123_183004_inLine +BABEL_OP2_301_28945_20131123_183004_outLine +BABEL_OP2_301_29023_20131123_171357_inLine +BABEL_OP2_301_29023_20131123_171357_outLine +BABEL_OP2_301_29023_20131123_173406_inLine +BABEL_OP2_301_29023_20131123_173406_outLine +BABEL_OP2_301_29168_20131018_141724_inLine +BABEL_OP2_301_29168_20131018_141724_outLine +BABEL_OP2_301_29323_20140112_184600_inLine +BABEL_OP2_301_29323_20140112_184600_outLine +BABEL_OP2_301_29404_20140123_123004_inLine +BABEL_OP2_301_29404_20140123_123004_outLine +BABEL_OP2_301_30645_20131116_183545_inLine +BABEL_OP2_301_30645_20131116_183545_outLine +BABEL_OP2_301_31490_20131130_190602_inLine +BABEL_OP2_301_31490_20131130_190602_outLine +BABEL_OP2_301_32630_20140127_181615_inLine +BABEL_OP2_301_32630_20140127_181615_outLine +BABEL_OP2_301_32708_20131122_134009_inLine +BABEL_OP2_301_32708_20131122_134009_outLine +BABEL_OP2_301_32708_20131122_134900_inLine +BABEL_OP2_301_32708_20131122_134900_outLine +BABEL_OP2_301_32998_20140206_212018_inLine +BABEL_OP2_301_32998_20140206_212018_outLine +BABEL_OP2_301_33175_20131019_231650_inLine +BABEL_OP2_301_33175_20131019_231650_outLine +BABEL_OP2_301_33216_20140131_183344_inLine +BABEL_OP2_301_33216_20140131_183344_outLine +BABEL_OP2_301_33355_20131021_130538_inLine +BABEL_OP2_301_33355_20131021_130538_outLine +BABEL_OP2_301_33672_20131112_192407_inLine +BABEL_OP2_301_33672_20131112_192407_outLine +BABEL_OP2_301_33672_20131112_194343_inLine +BABEL_OP2_301_33672_20131112_194343_outLine +BABEL_OP2_301_33806_20140204_204611_inLine +BABEL_OP2_301_33806_20140204_204611_outLine +BABEL_OP2_301_34019_20140220_175645_inLine +BABEL_OP2_301_34019_20140220_175645_outLine +BABEL_OP2_301_34106_20131020_192105_inLine +BABEL_OP2_301_34106_20131020_192105_outLine +BABEL_OP2_301_34629_20140224_174043_inLine +BABEL_OP2_301_34629_20140224_174043_outLine +BABEL_OP2_301_34811_20131204_195646_inLine +BABEL_OP2_301_34811_20131204_195646_outLine +BABEL_OP2_301_34860_20140224_170732_inLine +BABEL_OP2_301_34860_20140224_170732_outLine +BABEL_OP2_301_36039_20140121_002746_inLine +BABEL_OP2_301_36039_20140121_002746_outLine +BABEL_OP2_301_36669_20131208_191649_inLine +BABEL_OP2_301_36669_20131208_191649_outLine +BABEL_OP2_301_37228_20140109_190716_inLine +BABEL_OP2_301_37228_20140109_190716_outLine +BABEL_OP2_301_37598_20140206_190701_inLine +BABEL_OP2_301_37598_20140206_190701_outLine +BABEL_OP2_301_37682_20131128_161814_inLine +BABEL_OP2_301_37682_20131128_161814_outLine +BABEL_OP2_301_38323_20140205_184350_inLine +BABEL_OP2_301_38323_20140205_184350_outLine +BABEL_OP2_301_38554_20131024_134203_inLine +BABEL_OP2_301_38554_20131024_134203_outLine +BABEL_OP2_301_39006_20140204_195257_inLine +BABEL_OP2_301_39006_20140204_195257_outLine +BABEL_OP2_301_39099_20140127_003852_inLine +BABEL_OP2_301_39099_20140127_003852_outLine +BABEL_OP2_301_39307_20131024_204807_inLine +BABEL_OP2_301_39307_20131024_204807_outLine +BABEL_OP2_301_39638_20140224_155231_inLine +BABEL_OP2_301_39638_20140224_155231_outLine +BABEL_OP2_301_39680_20140115_193747_inLine +BABEL_OP2_301_39680_20140115_193747_outLine +BABEL_OP2_301_39848_20131204_163640_inLine +BABEL_OP2_301_39848_20131204_163640_outLine +BABEL_OP2_301_41469_20131123_114935_inLine +BABEL_OP2_301_41469_20131123_114935_outLine +BABEL_OP2_301_41469_20131123_115625_inLine +BABEL_OP2_301_41469_20131123_115625_outLine +BABEL_OP2_301_41680_20131016_202751_inLine +BABEL_OP2_301_41680_20131016_202751_outLine +BABEL_OP2_301_41685_20140223_155438_inLine +BABEL_OP2_301_41685_20140223_155438_outLine +BABEL_OP2_301_42029_20140115_163832_inLine +BABEL_OP2_301_42029_20140115_163832_outLine +BABEL_OP2_301_42434_20131127_190632_inLine +BABEL_OP2_301_42434_20131127_190632_outLine +BABEL_OP2_301_43115_20140125_145846_inLine +BABEL_OP2_301_43115_20140125_145846_outLine +BABEL_OP2_301_43323_20140223_191949_inLine +BABEL_OP2_301_43323_20140223_191949_outLine +BABEL_OP2_301_43388_20131203_204504_inLine +BABEL_OP2_301_43388_20131203_204504_outLine +BABEL_OP2_301_44347_20140102_122651_inLine +BABEL_OP2_301_44347_20140102_122651_outLine +BABEL_OP2_301_44619_20131122_014112_inLine +BABEL_OP2_301_44619_20131122_014112_outLine +BABEL_OP2_301_45121_20140127_190059_inLine +BABEL_OP2_301_45121_20140127_190059_outLine +BABEL_OP2_301_45559_20140127_145550_inLine +BABEL_OP2_301_45559_20140127_145550_outLine +BABEL_OP2_301_45560_20131104_171401_inLine +BABEL_OP2_301_45560_20131104_171401_outLine +BABEL_OP2_301_45851_20140127_224015_inLine +BABEL_OP2_301_45851_20140127_224015_outLine +BABEL_OP2_301_46066_20140110_170456_inLine +BABEL_OP2_301_46066_20140110_170456_outLine +BABEL_OP2_301_46268_20131021_142020_inLine +BABEL_OP2_301_46268_20131021_142020_outLine +BABEL_OP2_301_46310_20131104_001007_inLine +BABEL_OP2_301_46310_20131104_001007_outLine +BABEL_OP2_301_49001_20131126_004357_inLine +BABEL_OP2_301_49001_20131126_004357_outLine +BABEL_OP2_301_49216_20131020_181355_inLine +BABEL_OP2_301_49216_20131020_181355_outLine +BABEL_OP2_301_49945_20140127_184032_inLine +BABEL_OP2_301_49945_20140127_184032_outLine +BABEL_OP2_301_50175_20131019_212339_inLine +BABEL_OP2_301_50175_20131019_212339_outLine +BABEL_OP2_301_50810_20131025_174542_inLine +BABEL_OP2_301_50810_20131025_174542_outLine +BABEL_OP2_301_51484_20131220_211835_inLine +BABEL_OP2_301_51484_20131220_211835_outLine +BABEL_OP2_301_51540_20140106_172711_inLine +BABEL_OP2_301_51540_20140106_172711_outLine +BABEL_OP2_301_51701_20140205_193018_inLine +BABEL_OP2_301_51701_20140205_193018_outLine +BABEL_OP2_301_51968_20131204_190129_inLine +BABEL_OP2_301_51968_20131204_190129_outLine +BABEL_OP2_301_52265_20140216_163445_inLine +BABEL_OP2_301_52265_20140216_163445_outLine +BABEL_OP2_301_52272_20131030_202958_inLine +BABEL_OP2_301_52272_20131030_202958_outLine +BABEL_OP2_301_52381_20140109_155159_inLine +BABEL_OP2_301_52381_20140109_155159_outLine +BABEL_OP2_301_52404_20131211_192143_inLine +BABEL_OP2_301_52404_20131211_192143_outLine +BABEL_OP2_301_52804_20131122_192606_inLine +BABEL_OP2_301_52804_20131122_192606_outLine +BABEL_OP2_301_53842_20131205_212824_inLine +BABEL_OP2_301_53842_20131205_212824_outLine +BABEL_OP2_301_53842_20131205_214030_inLine +BABEL_OP2_301_53842_20131205_214030_outLine +BABEL_OP2_301_54074_20131204_200954_inLine +BABEL_OP2_301_54074_20131204_200954_outLine +BABEL_OP2_301_54162_20131210_170602_inLine +BABEL_OP2_301_54162_20131210_170602_outLine +BABEL_OP2_301_54530_20131218_184644_inLine +BABEL_OP2_301_54530_20131218_184644_outLine +BABEL_OP2_301_54567_20131205_193927_inLine +BABEL_OP2_301_54567_20131205_193927_outLine +BABEL_OP2_301_54827_20140126_184228_inLine +BABEL_OP2_301_54827_20140126_184228_outLine +BABEL_OP2_301_54953_20131127_005926_inLine +BABEL_OP2_301_54953_20131127_005926_outLine +BABEL_OP2_301_55106_20140119_161343_inLine +BABEL_OP2_301_55106_20140119_161343_outLine +BABEL_OP2_301_55349_20140121_152059_inLine +BABEL_OP2_301_55349_20140121_152059_outLine +BABEL_OP2_301_55381_20140103_163729_inLine +BABEL_OP2_301_55381_20140103_163729_outLine +BABEL_OP2_301_55818_20131110_111534_inLine +BABEL_OP2_301_55818_20131110_111534_outLine +BABEL_OP2_301_55818_20131110_121457_inLine +BABEL_OP2_301_55818_20131110_121457_outLine +BABEL_OP2_301_56306_20140108_175350_inLine +BABEL_OP2_301_56306_20140108_175350_outLine +BABEL_OP2_301_57116_20131129_012420_inLine +BABEL_OP2_301_57116_20131129_012420_outLine +BABEL_OP2_301_57233_20140224_172256_inLine +BABEL_OP2_301_57233_20140224_172256_outLine +BABEL_OP2_301_57542_20140122_150942_inLine +BABEL_OP2_301_57542_20140122_150942_outLine +BABEL_OP2_301_57566_20140106_150720_inLine +BABEL_OP2_301_57566_20140106_150720_outLine +BABEL_OP2_301_58006_20140122_203731_inLine +BABEL_OP2_301_58006_20140122_203731_outLine +BABEL_OP2_301_58313_20140207_172512_inLine +BABEL_OP2_301_58313_20140207_172512_outLine +BABEL_OP2_301_58926_20131124_131005_inLine +BABEL_OP2_301_58926_20131124_131005_outLine +BABEL_OP2_301_59039_20140220_172820_inLine +BABEL_OP2_301_59039_20140220_172820_outLine +BABEL_OP2_301_59078_20140206_221105_inLine +BABEL_OP2_301_59078_20140206_221105_outLine +BABEL_OP2_301_59549_20131115_144344_inLine +BABEL_OP2_301_59549_20131115_144344_outLine +BABEL_OP2_301_59549_20131115_145934_inLine +BABEL_OP2_301_59549_20131115_145934_outLine +BABEL_OP2_301_59928_20131208_181057_inLine +BABEL_OP2_301_59928_20131208_181057_outLine +BABEL_OP2_301_60436_20140126_184303_inLine +BABEL_OP2_301_60436_20140126_184303_outLine +BABEL_OP2_301_60458_20140127_174755_inLine +BABEL_OP2_301_60458_20140127_174755_outLine +BABEL_OP2_301_60474_20131125_202818_inLine +BABEL_OP2_301_60474_20131125_202818_outLine +BABEL_OP2_301_60477_20140131_142240_inLine +BABEL_OP2_301_60477_20140131_142240_outLine +BABEL_OP2_301_60498_20140128_144917_inLine +BABEL_OP2_301_60498_20140128_144917_outLine +BABEL_OP2_301_60626_20131123_194530_inLine +BABEL_OP2_301_60626_20131123_194530_outLine +BABEL_OP2_301_61440_20140129_162338_inLine +BABEL_OP2_301_61440_20140129_162338_outLine +BABEL_OP2_301_62047_20131223_201629_inLine +BABEL_OP2_301_62047_20131223_201629_outLine +BABEL_OP2_301_62734_20131127_125913_inLine +BABEL_OP2_301_62734_20131127_125913_outLine +BABEL_OP2_301_62800_20131023_133254_inLine +BABEL_OP2_301_62800_20131023_133254_outLine +BABEL_OP2_301_63787_20131112_234133_inLine +BABEL_OP2_301_63787_20131112_234133_outLine +BABEL_OP2_301_63906_20140122_195218_inLine +BABEL_OP2_301_63906_20140122_195218_outLine +BABEL_OP2_301_64768_20131129_183309_inLine +BABEL_OP2_301_64768_20131129_183309_outLine +BABEL_OP2_301_64902_20140123_130547_inLine +BABEL_OP2_301_64902_20140123_130547_outLine +BABEL_OP2_301_65298_20140115_174724_inLine +BABEL_OP2_301_65298_20140115_174724_outLine +BABEL_OP2_301_65466_20140122_211719_inLine +BABEL_OP2_301_65466_20140122_211719_outLine +BABEL_OP2_301_66045_20131203_142944_inLine +BABEL_OP2_301_66045_20131203_142944_outLine +BABEL_OP2_301_66361_20140223_153258_inLine +BABEL_OP2_301_66361_20140223_153258_outLine +BABEL_OP2_301_66916_20131023_223807_inLine +BABEL_OP2_301_66916_20131023_223807_outLine +BABEL_OP2_301_67152_20140119_212917_inLine +BABEL_OP2_301_67152_20140119_212917_outLine +BABEL_OP2_301_67213_20140220_182122_inLine +BABEL_OP2_301_67213_20140220_182122_outLine +BABEL_OP2_301_67622_20131023_150210_inLine +BABEL_OP2_301_67622_20131023_150210_outLine +BABEL_OP2_301_68182_20140115_183030_inLine +BABEL_OP2_301_68182_20140115_183030_outLine +BABEL_OP2_301_68924_20131210_145459_inLine +BABEL_OP2_301_68924_20131210_145459_outLine +BABEL_OP2_301_69096_20140128_171512_inLine +BABEL_OP2_301_69096_20140128_171512_outLine +BABEL_OP2_301_69746_20140108_182845_inLine +BABEL_OP2_301_69746_20140108_182845_outLine +BABEL_OP2_301_69937_20140131_181058_inLine +BABEL_OP2_301_69937_20140131_181058_outLine +BABEL_OP2_301_69992_20131110_135349_inLine +BABEL_OP2_301_69992_20131110_135349_outLine +BABEL_OP2_301_70386_20140102_173141_inLine +BABEL_OP2_301_70386_20140102_173141_outLine +BABEL_OP2_301_71121_20140223_161906_inLine +BABEL_OP2_301_71121_20140223_161906_outLine +BABEL_OP2_301_71263_20140205_210654_inLine +BABEL_OP2_301_71263_20140205_210654_outLine +BABEL_OP2_301_72733_20140126_155036_inLine +BABEL_OP2_301_72733_20140126_155036_outLine +BABEL_OP2_301_72844_20131023_180119_inLine +BABEL_OP2_301_72844_20131023_180119_outLine +BABEL_OP2_301_73005_20140126_193903_inLine +BABEL_OP2_301_73005_20140126_193903_outLine +BABEL_OP2_301_73042_20131114_135827_inLine +BABEL_OP2_301_73042_20131114_135827_outLine +BABEL_OP2_301_73258_20131203_200331_inLine +BABEL_OP2_301_73258_20131203_200331_outLine +BABEL_OP2_301_73485_20140128_210522_inLine +BABEL_OP2_301_73485_20140128_210522_outLine +BABEL_OP2_301_73549_20140131_160208_inLine +BABEL_OP2_301_73549_20140131_160208_outLine +BABEL_OP2_301_73591_20131016_200144_inLine +BABEL_OP2_301_73591_20131016_200144_outLine +BABEL_OP2_301_73591_20131016_201810_inLine +BABEL_OP2_301_73591_20131016_201810_outLine +BABEL_OP2_301_73964_20140214_161434_inLine +BABEL_OP2_301_73964_20140214_161434_outLine +BABEL_OP2_301_74886_20131102_122938_inLine +BABEL_OP2_301_74886_20131102_122938_outLine +BABEL_OP2_301_75261_20131226_160602_inLine +BABEL_OP2_301_75261_20131226_160602_outLine +BABEL_OP2_301_75869_20140122_141000_inLine +BABEL_OP2_301_75869_20140122_141000_outLine +BABEL_OP2_301_75981_20140127_143431_inLine +BABEL_OP2_301_75981_20140127_143431_outLine +BABEL_OP2_301_76155_20131203_185301_inLine +BABEL_OP2_301_76155_20131203_185301_outLine +BABEL_OP2_301_77146_20131023_185146_inLine +BABEL_OP2_301_77146_20131023_185146_outLine +BABEL_OP2_301_77427_20131124_013134_inLine +BABEL_OP2_301_77427_20131124_013134_outLine +BABEL_OP2_301_77427_20131124_014748_inLine +BABEL_OP2_301_77427_20131124_014748_outLine +BABEL_OP2_301_77744_20131117_154739_inLine +BABEL_OP2_301_77744_20131117_154739_outLine +BABEL_OP2_301_78482_20131227_163840_inLine +BABEL_OP2_301_78482_20131227_163840_outLine +BABEL_OP2_301_78543_20140131_010053_inLine +BABEL_OP2_301_78543_20140131_010053_outLine +BABEL_OP2_301_78743_20131220_201406_inLine +BABEL_OP2_301_78743_20131220_201406_outLine +BABEL_OP2_301_78943_20131120_175430_inLine +BABEL_OP2_301_78943_20131120_175430_outLine +BABEL_OP2_301_79451_20131125_114859_inLine +BABEL_OP2_301_79451_20131125_114859_outLine +BABEL_OP2_301_81622_20131204_193304_inLine +BABEL_OP2_301_81622_20131204_193304_outLine +BABEL_OP2_301_81810_20131214_030628_inLine +BABEL_OP2_301_81810_20131214_030628_outLine +BABEL_OP2_301_81854_20140127_151841_inLine +BABEL_OP2_301_81854_20140127_151841_outLine +BABEL_OP2_301_82089_20131208_202028_inLine +BABEL_OP2_301_82089_20131208_202028_outLine +BABEL_OP2_301_82425_20131113_010203_inLine +BABEL_OP2_301_82425_20131113_010203_outLine +BABEL_OP2_301_82626_20140131_233635_inLine +BABEL_OP2_301_82626_20140131_233635_outLine +BABEL_OP2_301_83436_20131116_194233_inLine +BABEL_OP2_301_83436_20131116_194233_outLine +BABEL_OP2_301_83455_20131129_211537_inLine +BABEL_OP2_301_83455_20131129_211537_outLine +BABEL_OP2_301_83455_20131129_212747_inLine +BABEL_OP2_301_83455_20131129_212747_outLine +BABEL_OP2_301_83625_20140224_161632_inLine +BABEL_OP2_301_83625_20140224_161632_outLine +BABEL_OP2_301_84458_20131216_193109_inLine +BABEL_OP2_301_84458_20131216_193109_outLine +BABEL_OP2_301_84547_20131025_143053_inLine +BABEL_OP2_301_84547_20131025_143053_outLine +BABEL_OP2_301_85248_20140115_144605_inLine +BABEL_OP2_301_85248_20140115_144605_outLine +BABEL_OP2_301_85322_20131112_183356_inLine +BABEL_OP2_301_85322_20131112_183356_outLine +BABEL_OP2_301_85519_20140103_170652_inLine +BABEL_OP2_301_85519_20140103_170652_outLine +BABEL_OP2_301_86156_20140122_185516_inLine +BABEL_OP2_301_86156_20140122_185516_outLine +BABEL_OP2_301_87470_20131128_003454_inLine +BABEL_OP2_301_87470_20131128_003454_outLine +BABEL_OP2_301_87545_20140125_194128_inLine +BABEL_OP2_301_87545_20140125_194128_outLine +BABEL_OP2_301_88812_20140126_203311_inLine +BABEL_OP2_301_88812_20140126_203311_outLine +BABEL_OP2_301_88925_20131220_151054_inLine +BABEL_OP2_301_88925_20131220_151054_outLine +BABEL_OP2_301_88938_20140104_195418_inLine +BABEL_OP2_301_88938_20140104_195418_outLine +BABEL_OP2_301_89059_20140109_141228_inLine +BABEL_OP2_301_89059_20140109_141228_outLine +BABEL_OP2_301_89358_20131209_174055_inLine +BABEL_OP2_301_89358_20131209_174055_outLine +BABEL_OP2_301_89665_20131206_143535_inLine +BABEL_OP2_301_89665_20131206_143535_outLine +BABEL_OP2_301_89695_20131203_225429_inLine +BABEL_OP2_301_89695_20131203_225429_outLine +BABEL_OP2_301_89877_20140205_200816_inLine +BABEL_OP2_301_89877_20140205_200816_outLine +BABEL_OP2_301_90709_20131109_170505_inLine +BABEL_OP2_301_90709_20131109_170505_outLine +BABEL_OP2_301_90737_20131206_160650_inLine +BABEL_OP2_301_90737_20131206_160650_outLine +BABEL_OP2_301_91372_20140126_145526_inLine +BABEL_OP2_301_91372_20140126_145526_outLine +BABEL_OP2_301_91463_20140206_144651_inLine +BABEL_OP2_301_91463_20140206_144651_outLine +BABEL_OP2_301_91478_20140224_170543_inLine +BABEL_OP2_301_91478_20140224_170543_outLine +BABEL_OP2_301_91760_20140127_183930_inLine +BABEL_OP2_301_91760_20140127_183930_outLine +BABEL_OP2_301_91884_20140118_220510_inLine +BABEL_OP2_301_91884_20140118_220510_outLine +BABEL_OP2_301_91891_20131213_192340_inLine +BABEL_OP2_301_91891_20131213_192340_outLine +BABEL_OP2_301_91944_20131114_123915_inLine +BABEL_OP2_301_91944_20131114_123915_outLine +BABEL_OP2_301_92809_20131124_142340_inLine +BABEL_OP2_301_92809_20131124_142340_outLine +BABEL_OP2_301_92809_20131124_143817_inLine +BABEL_OP2_301_92809_20131124_143817_outLine +BABEL_OP2_301_92942_20140206_180304_inLine +BABEL_OP2_301_92942_20140206_180304_outLine +BABEL_OP2_301_93153_20131114_144733_inLine +BABEL_OP2_301_93153_20131114_144733_outLine +BABEL_OP2_301_93153_20131114_151704_inLine +BABEL_OP2_301_93153_20131114_151704_outLine +BABEL_OP2_301_93475_20131119_183619_inLine +BABEL_OP2_301_93475_20131119_183619_outLine +BABEL_OP2_301_93515_20140125_212344_inLine +BABEL_OP2_301_93515_20140125_212344_outLine +BABEL_OP2_301_93964_20131130_172431_inLine +BABEL_OP2_301_93964_20131130_172431_outLine +BABEL_OP2_301_94409_20131204_145545_inLine +BABEL_OP2_301_94409_20131204_145545_outLine +BABEL_OP2_301_94978_20140119_185149_inLine +BABEL_OP2_301_94978_20140119_185149_outLine +BABEL_OP2_301_95338_20140127_192317_inLine +BABEL_OP2_301_95338_20140127_192317_outLine +BABEL_OP2_301_95399_20131206_150920_inLine +BABEL_OP2_301_95399_20131206_150920_outLine +BABEL_OP2_301_95583_20131029_002312_inLine +BABEL_OP2_301_95583_20131029_002312_outLine +BABEL_OP2_301_95663_20131025_134113_inLine +BABEL_OP2_301_95663_20131025_134113_outLine +BABEL_OP2_301_95935_20140103_190515_inLine +BABEL_OP2_301_95935_20140103_190515_outLine +BABEL_OP2_301_96190_20131122_024403_inLine +BABEL_OP2_301_96190_20131122_024403_outLine +BABEL_OP2_301_96324_20131026_023101_inLine +BABEL_OP2_301_96324_20131026_023101_outLine +BABEL_OP2_301_96376_20140126_140015_inLine +BABEL_OP2_301_96376_20140126_140015_outLine +BABEL_OP2_301_96910_20131124_183403_inLine +BABEL_OP2_301_96910_20131124_183403_outLine +BABEL_OP2_301_97136_20140120_235804_inLine +BABEL_OP2_301_97136_20140120_235804_outLine +BABEL_OP2_301_97588_20131025_185012_inLine +BABEL_OP2_301_97588_20131025_185012_outLine +BABEL_OP2_301_99202_20131226_202006_inLine +BABEL_OP2_301_99202_20131226_202006_outLine +BABEL_OP2_301_99955_20140110_162703_inLine +BABEL_OP2_301_99955_20140110_162703_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/untranscribed-training.list b/egs/babel/s5d/conf/lists/301-cebuano/untranscribed-training.list new file mode 100644 index 00000000000..f0033cd47ec --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/untranscribed-training.list @@ -0,0 +1,548 @@ +BABEL_OP2_301_10188_20131015_200722_inLine +BABEL_OP2_301_10188_20131015_200722_outLine +BABEL_OP2_301_10188_20131015_201921_inLine +BABEL_OP2_301_10188_20131015_201921_outLine +BABEL_OP2_301_10974_20131226_183511_inLine +BABEL_OP2_301_10974_20131226_183511_outLine +BABEL_OP2_301_11096_20140129_200046_inLine +BABEL_OP2_301_11096_20140129_200046_outLine +BABEL_OP2_301_11663_20140206_183134_inLine +BABEL_OP2_301_11663_20140206_183134_outLine +BABEL_OP2_301_12851_20131019_233929_inLine +BABEL_OP2_301_12851_20131019_233929_outLine +BABEL_OP2_301_12851_20131026_182349_inLine +BABEL_OP2_301_12851_20131026_182349_outLine +BABEL_OP2_301_13030_20131128_165148_inLine +BABEL_OP2_301_13030_20131128_165148_outLine +BABEL_OP2_301_13040_20131113_202409_outLine +BABEL_OP2_301_13744_20131020_151336_inLine +BABEL_OP2_301_13744_20131020_160305_inLine +BABEL_OP2_301_13909_20140201_154926_inLine +BABEL_OP2_301_13909_20140201_154926_outLine +BABEL_OP2_301_14228_20131223_203905_outLine +BABEL_OP2_301_15024_20131206_182911_inLine +BABEL_OP2_301_15024_20131206_182911_outLine +BABEL_OP2_301_15535_20131212_140356_inLine +BABEL_OP2_301_15535_20131212_140356_outLine +BABEL_OP2_301_15749_20131226_201016_inLine +BABEL_OP2_301_15749_20131226_201016_outLine +BABEL_OP2_301_15902_20131116_134056_inLine +BABEL_OP2_301_15902_20131116_134056_outLine +BABEL_OP2_301_17567_20131223_202218_inLine +BABEL_OP2_301_17567_20131223_202218_outLine +BABEL_OP2_301_17573_20131221_234136_inLine +BABEL_OP2_301_17573_20131221_234136_outLine +BABEL_OP2_301_17751_20140201_163439_inLine +BABEL_OP2_301_17751_20140201_163439_outLine +BABEL_OP2_301_17914_20140114_191137_inLine +BABEL_OP2_301_17914_20140114_191137_outLine +BABEL_OP2_301_18863_20140103_143408_inLine +BABEL_OP2_301_18863_20140103_145207_inLine +BABEL_OP2_301_18939_20131108_215217_outLine +BABEL_OP2_301_19120_20140129_153621_inLine +BABEL_OP2_301_19120_20140129_153621_outLine +BABEL_OP2_301_19545_20131223_225812_inLine +BABEL_OP2_301_19545_20131223_225812_outLine +BABEL_OP2_301_19703_20131203_181434_inLine +BABEL_OP2_301_19703_20131203_181434_outLine +BABEL_OP2_301_20682_20131220_204542_inLine +BABEL_OP2_301_20682_20131220_204542_outLine +BABEL_OP2_301_20738_20140115_135411_inLine +BABEL_OP2_301_20738_20140115_135411_outLine +BABEL_OP2_301_21426_20140216_181528_outLine +BABEL_OP2_301_21426_20140216_182606_outLine +BABEL_OP2_301_21581_20131128_151038_inLine +BABEL_OP2_301_21581_20131128_151038_outLine +BABEL_OP2_301_21794_20131203_210336_inLine +BABEL_OP2_301_21794_20131203_210336_outLine +BABEL_OP2_301_21794_20131203_211241_inLine +BABEL_OP2_301_21794_20131203_211241_outLine +BABEL_OP2_301_21794_20131203_212201_inLine +BABEL_OP2_301_21794_20131203_212201_outLine +BABEL_OP2_301_22170_20140119_235310_inLine +BABEL_OP2_301_22170_20140119_235310_outLine +BABEL_OP2_301_23151_20140115_191742_inLine +BABEL_OP2_301_23151_20140115_191742_outLine +BABEL_OP2_301_23260_20140123_165218_inLine +BABEL_OP2_301_23260_20140123_165218_outLine +BABEL_OP2_301_23681_20140129_150558_inLine +BABEL_OP2_301_23681_20140129_150558_outLine +BABEL_OP2_301_23983_20140125_164849_inLine +BABEL_OP2_301_23983_20140125_164849_outLine +BABEL_OP2_301_24033_20140108_160013_inLine +BABEL_OP2_301_24033_20140108_160013_outLine +BABEL_OP2_301_24470_20140206_191002_inLine +BABEL_OP2_301_24470_20140206_191002_outLine +BABEL_OP2_301_25085_20140204_170633_inLine +BABEL_OP2_301_25085_20140204_170633_outLine +BABEL_OP2_301_25220_20140202_012113_inLine +BABEL_OP2_301_25220_20140202_012113_outLine +BABEL_OP2_301_25698_20140202_155327_inLine +BABEL_OP2_301_25698_20140202_155327_outLine +BABEL_OP2_301_26398_20140125_202344_inLine +BABEL_OP2_301_26398_20140125_202344_outLine +BABEL_OP2_301_26574_20131226_194917_inLine +BABEL_OP2_301_26574_20131226_194917_outLine +BABEL_OP2_301_27203_20140205_212839_inLine +BABEL_OP2_301_27203_20140205_212839_outLine +BABEL_OP2_301_27478_20140120_183015_inLine +BABEL_OP2_301_27478_20140120_183015_outLine +BABEL_OP2_301_28190_20140103_204548_inLine +BABEL_OP2_301_28190_20140103_204548_outLine +BABEL_OP2_301_28190_20140103_211418_inLine +BABEL_OP2_301_28190_20140103_211418_outLine +BABEL_OP2_301_28538_20131206_201510_inLine +BABEL_OP2_301_28538_20131206_201510_outLine +BABEL_OP2_301_28775_20131117_184047_inLine +BABEL_OP2_301_28775_20131117_184047_outLine +BABEL_OP2_301_28775_20131117_184742_inLine +BABEL_OP2_301_28775_20131117_184742_outLine +BABEL_OP2_301_28775_20131117_190311_inLine +BABEL_OP2_301_28775_20131117_190311_outLine +BABEL_OP2_301_29072_20131212_183347_inLine +BABEL_OP2_301_29072_20131212_183347_outLine +BABEL_OP2_301_29076_20140207_194512_inLine +BABEL_OP2_301_29076_20140207_194512_outLine +BABEL_OP2_301_29352_20140131_181124_inLine +BABEL_OP2_301_29352_20140131_181124_outLine +BABEL_OP2_301_29633_20140121_164509_inLine +BABEL_OP2_301_29633_20140121_164509_outLine +BABEL_OP2_301_29643_20140129_200354_outLine +BABEL_OP2_301_29765_20140131_185401_inLine +BABEL_OP2_301_29765_20140131_185401_outLine +BABEL_OP2_301_30253_20131217_200910_inLine +BABEL_OP2_301_30253_20131217_200910_outLine +BABEL_OP2_301_30280_20140210_171213_inLine +BABEL_OP2_301_30280_20140210_171213_outLine +BABEL_OP2_301_30497_20140123_162323_inLine +BABEL_OP2_301_30497_20140123_162323_outLine +BABEL_OP2_301_31109_20131225_234903_inLine +BABEL_OP2_301_31109_20131225_234903_outLine +BABEL_OP2_301_31182_20140102_125318_inLine +BABEL_OP2_301_31182_20140102_125318_outLine +BABEL_OP2_301_31182_20140102_130533_inLine +BABEL_OP2_301_31182_20140102_130533_outLine +BABEL_OP2_301_31184_20131208_184700_inLine +BABEL_OP2_301_31184_20131208_184700_outLine +BABEL_OP2_301_31484_20131210_183412_inLine +BABEL_OP2_301_31484_20131210_183412_outLine +BABEL_OP2_301_31583_20131220_145426_inLine +BABEL_OP2_301_31583_20131220_145426_outLine +BABEL_OP2_301_32048_20140107_184712_inLine +BABEL_OP2_301_32048_20140107_184712_outLine +BABEL_OP2_301_32861_20140110_193920_outLine +BABEL_OP2_301_32872_20140126_181540_inLine +BABEL_OP2_301_32872_20140126_181540_outLine +BABEL_OP2_301_32959_20131218_155238_inLine +BABEL_OP2_301_32959_20131218_155238_outLine +BABEL_OP2_301_33229_20140112_190633_inLine +BABEL_OP2_301_33229_20140112_190633_outLine +BABEL_OP2_301_33251_20140206_154015_inLine +BABEL_OP2_301_33251_20140206_154015_outLine +BABEL_OP2_301_33659_20140223_185752_outLine +BABEL_OP2_301_34336_20131125_162020_inLine +BABEL_OP2_301_34336_20131125_162020_outLine +BABEL_OP2_301_34336_20131125_163318_inLine +BABEL_OP2_301_34336_20131125_163318_outLine +BABEL_OP2_301_34477_20131129_201317_inLine +BABEL_OP2_301_34477_20131129_201317_outLine +BABEL_OP2_301_34688_20131107_151905_inLine +BABEL_OP2_301_34713_20140216_184756_outLine +BABEL_OP2_301_34899_20140201_183710_inLine +BABEL_OP2_301_34899_20140201_183710_outLine +BABEL_OP2_301_36017_20140123_220745_inLine +BABEL_OP2_301_36017_20140123_220745_outLine +BABEL_OP2_301_36894_20131113_201325_inLine +BABEL_OP2_301_38750_20131218_210138_inLine +BABEL_OP2_301_38750_20131218_210138_outLine +BABEL_OP2_301_39059_20140115_160435_inLine +BABEL_OP2_301_39059_20140115_160435_outLine +BABEL_OP2_301_39059_20140115_161237_inLine +BABEL_OP2_301_39059_20140115_161237_outLine +BABEL_OP2_301_39893_20140201_164926_inLine +BABEL_OP2_301_39893_20140201_164926_outLine +BABEL_OP2_301_41097_20131218_194351_inLine +BABEL_OP2_301_41097_20131218_194351_outLine +BABEL_OP2_301_41100_20131130_204102_inLine +BABEL_OP2_301_41100_20131130_204102_outLine +BABEL_OP2_301_41100_20131130_204814_inLine +BABEL_OP2_301_41100_20131130_204814_outLine +BABEL_OP2_301_41109_20140107_200127_inLine +BABEL_OP2_301_41109_20140107_200127_outLine +BABEL_OP2_301_41272_20140126_163911_inLine +BABEL_OP2_301_41272_20140126_163911_outLine +BABEL_OP2_301_41442_20131220_182530_inLine +BABEL_OP2_301_41442_20131220_182530_outLine +BABEL_OP2_301_41442_20131220_183940_inLine +BABEL_OP2_301_41442_20131220_183940_outLine +BABEL_OP2_301_42231_20131213_161445_inLine +BABEL_OP2_301_42231_20131213_161445_outLine +BABEL_OP2_301_42243_20131124_191210_inLine +BABEL_OP2_301_42243_20131124_191210_outLine +BABEL_OP2_301_42718_20140126_222724_inLine +BABEL_OP2_301_42718_20140126_222724_outLine +BABEL_OP2_301_43074_20140213_170948_inLine +BABEL_OP2_301_43074_20140213_170948_outLine +BABEL_OP2_301_43157_20140214_155422_inLine +BABEL_OP2_301_43157_20140214_155422_outLine +BABEL_OP2_301_43588_20140128_173254_inLine +BABEL_OP2_301_43588_20140128_173254_outLine +BABEL_OP2_301_43588_20140128_174720_inLine +BABEL_OP2_301_43588_20140128_174720_outLine +BABEL_OP2_301_43990_20140220_141338_inLine +BABEL_OP2_301_43990_20140220_141338_outLine +BABEL_OP2_301_44255_20140115_001546_inLine +BABEL_OP2_301_44255_20140115_001546_outLine +BABEL_OP2_301_44290_20140126_145048_inLine +BABEL_OP2_301_44290_20140126_145048_outLine +BABEL_OP2_301_44531_20140118_212803_inLine +BABEL_OP2_301_44531_20140118_212803_outLine +BABEL_OP2_301_44847_20131214_204251_inLine +BABEL_OP2_301_44847_20131214_204251_outLine +BABEL_OP2_301_44847_20131214_230118_inLine +BABEL_OP2_301_44847_20131214_230118_outLine +BABEL_OP2_301_45697_20140214_220139_inLine +BABEL_OP2_301_45697_20140214_220139_outLine +BABEL_OP2_301_46169_20131220_162551_inLine +BABEL_OP2_301_46169_20131220_162551_outLine +BABEL_OP2_301_46202_20140224_155801_inLine +BABEL_OP2_301_46202_20140224_155801_outLine +BABEL_OP2_301_46315_20131211_204949_inLine +BABEL_OP2_301_46315_20131211_204949_outLine +BABEL_OP2_301_46625_20131026_225140_outLine +BABEL_OP2_301_46974_20131211_200449_inLine +BABEL_OP2_301_46974_20131211_200449_outLine +BABEL_OP2_301_47637_20140213_164701_inLine +BABEL_OP2_301_47637_20140213_164701_outLine +BABEL_OP2_301_47799_20140216_165643_inLine +BABEL_OP2_301_47799_20140216_165643_outLine +BABEL_OP2_301_48016_20140205_174755_inLine +BABEL_OP2_301_48016_20140205_174755_outLine +BABEL_OP2_301_48299_20140224_163951_inLine +BABEL_OP2_301_48299_20140224_163951_outLine +BABEL_OP2_301_48610_20131113_182547_outLine +BABEL_OP2_301_48663_20140126_210156_inLine +BABEL_OP2_301_48663_20140126_210156_outLine +BABEL_OP2_301_48758_20140122_144530_inLine +BABEL_OP2_301_48758_20140122_144530_outLine +BABEL_OP2_301_48758_20140122_155747_inLine +BABEL_OP2_301_48758_20140122_155747_outLine +BABEL_OP2_301_48907_20140127_134337_inLine +BABEL_OP2_301_48907_20140127_134337_outLine +BABEL_OP2_301_49637_20131030_211145_inLine +BABEL_OP2_301_49767_20140131_135142_inLine +BABEL_OP2_301_49767_20140131_135142_outLine +BABEL_OP2_301_50779_20140207_191951_inLine +BABEL_OP2_301_50779_20140207_191951_outLine +BABEL_OP2_301_50940_20140220_201041_inLine +BABEL_OP2_301_50940_20140220_201041_outLine +BABEL_OP2_301_51858_20140220_170150_inLine +BABEL_OP2_301_51858_20140220_170150_outLine +BABEL_OP2_301_52222_20140224_160657_inLine +BABEL_OP2_301_52222_20140224_160657_outLine +BABEL_OP2_301_52483_20140214_142008_inLine +BABEL_OP2_301_52483_20140214_142008_outLine +BABEL_OP2_301_52854_20131015_224412_inLine +BABEL_OP2_301_52854_20131015_224412_outLine +BABEL_OP2_301_52854_20131015_225109_inLine +BABEL_OP2_301_52854_20131015_225109_outLine +BABEL_OP2_301_52854_20131015_230437_inLine +BABEL_OP2_301_52854_20131015_230437_outLine +BABEL_OP2_301_53072_20140128_162233_inLine +BABEL_OP2_301_53072_20140128_162233_outLine +BABEL_OP2_301_53415_20140119_182758_inLine +BABEL_OP2_301_53415_20140119_182758_outLine +BABEL_OP2_301_53419_20131222_184412_inLine +BABEL_OP2_301_53419_20131222_184412_outLine +BABEL_OP2_301_53492_20140122_223158_inLine +BABEL_OP2_301_53492_20140122_223158_outLine +BABEL_OP2_301_53492_20140122_223724_inLine +BABEL_OP2_301_53492_20140122_223724_outLine +BABEL_OP2_301_54040_20140102_113546_inLine +BABEL_OP2_301_54040_20140102_113546_outLine +BABEL_OP2_301_54066_20140214_153112_inLine +BABEL_OP2_301_54066_20140214_153112_outLine +BABEL_OP2_301_54405_20131227_152052_inLine +BABEL_OP2_301_54405_20131227_152052_outLine +BABEL_OP2_301_54634_20140225_214816_inLine +BABEL_OP2_301_54634_20140225_214816_outLine +BABEL_OP2_301_54923_20140201_161814_inLine +BABEL_OP2_301_54923_20140201_161814_outLine +BABEL_OP2_301_55013_20140214_165830_inLine +BABEL_OP2_301_55013_20140214_165830_outLine +BABEL_OP2_301_56019_20140117_192119_inLine +BABEL_OP2_301_56019_20140117_192119_outLine +BABEL_OP2_301_56090_20131016_191346_inLine +BABEL_OP2_301_56090_20131016_191346_outLine +BABEL_OP2_301_56213_20131216_202911_inLine +BABEL_OP2_301_56213_20131216_202911_outLine +BABEL_OP2_301_56345_20140223_203712_inLine +BABEL_OP2_301_56345_20140223_203712_outLine +BABEL_OP2_301_56429_20131112_172026_inLine +BABEL_OP2_301_56429_20131112_172026_outLine +BABEL_OP2_301_56465_20140205_174245_inLine +BABEL_OP2_301_56465_20140205_174245_outLine +BABEL_OP2_301_56468_20140225_204233_inLine +BABEL_OP2_301_56468_20140225_204233_outLine +BABEL_OP2_301_56677_20131217_201344_inLine +BABEL_OP2_301_56677_20131217_201344_outLine +BABEL_OP2_301_56684_20140105_193720_inLine +BABEL_OP2_301_56684_20140105_193720_outLine +BABEL_OP2_301_57067_20140107_192621_inLine +BABEL_OP2_301_57067_20140107_192621_outLine +BABEL_OP2_301_57219_20140205_155125_outLine +BABEL_OP2_301_57219_20140205_160417_outLine +BABEL_OP2_301_57529_20131217_195013_inLine +BABEL_OP2_301_57529_20131217_195013_outLine +BABEL_OP2_301_57609_20131224_152505_inLine +BABEL_OP2_301_57609_20131224_152505_outLine +BABEL_OP2_301_57650_20140114_203646_inLine +BABEL_OP2_301_57650_20140114_203646_outLine +BABEL_OP2_301_57654_20131123_151724_inLine +BABEL_OP2_301_57654_20131123_151724_outLine +BABEL_OP2_301_57654_20131123_152356_inLine +BABEL_OP2_301_57654_20131123_152356_outLine +BABEL_OP2_301_57654_20131123_154603_inLine +BABEL_OP2_301_57654_20131123_154603_outLine +BABEL_OP2_301_58717_20131223_213724_inLine +BABEL_OP2_301_58717_20131223_213724_outLine +BABEL_OP2_301_59028_20140201_153656_inLine +BABEL_OP2_301_59028_20140201_153656_outLine +BABEL_OP2_301_59645_20131224_162758_inLine +BABEL_OP2_301_59645_20131224_162758_outLine +BABEL_OP2_301_60307_20140213_205247_inLine +BABEL_OP2_301_60307_20140213_205247_outLine +BABEL_OP2_301_61011_20131020_212453_inLine +BABEL_OP2_301_61011_20131020_212453_outLine +BABEL_OP2_301_62155_20140121_235400_inLine +BABEL_OP2_301_62155_20140121_235400_outLine +BABEL_OP2_301_62430_20140123_160035_inLine +BABEL_OP2_301_62430_20140123_160035_outLine +BABEL_OP2_301_63094_20140129_205122_inLine +BABEL_OP2_301_63094_20140129_205122_outLine +BABEL_OP2_301_63220_20131218_184307_inLine +BABEL_OP2_301_63220_20131218_184307_outLine +BABEL_OP2_301_63511_20140214_161858_inLine +BABEL_OP2_301_63511_20140214_161858_outLine +BABEL_OP2_301_63670_20131216_201258_inLine +BABEL_OP2_301_63670_20131216_201258_outLine +BABEL_OP2_301_63730_20140204_182322_inLine +BABEL_OP2_301_63730_20140204_182322_outLine +BABEL_OP2_301_63757_20140206_214404_inLine +BABEL_OP2_301_63757_20140206_214404_outLine +BABEL_OP2_301_64014_20140114_133546_inLine +BABEL_OP2_301_64014_20140114_133546_outLine +BABEL_OP2_301_64259_20140225_211407_inLine +BABEL_OP2_301_64259_20140225_211407_outLine +BABEL_OP2_301_64398_20131213_201128_inLine +BABEL_OP2_301_64398_20131213_201128_outLine +BABEL_OP2_301_65064_20140207_185319_inLine +BABEL_OP2_301_65064_20140207_185319_outLine +BABEL_OP2_301_65370_20140201_200500_inLine +BABEL_OP2_301_65370_20140201_200500_outLine +BABEL_OP2_301_65640_20140123_140233_inLine +BABEL_OP2_301_65640_20140123_140233_outLine +BABEL_OP2_301_66001_20131115_220236_inLine +BABEL_OP2_301_66001_20131115_220236_outLine +BABEL_OP2_301_66519_20131128_144732_inLine +BABEL_OP2_301_66519_20131128_144732_outLine +BABEL_OP2_301_66519_20131128_150056_inLine +BABEL_OP2_301_66519_20131128_150056_outLine +BABEL_OP2_301_67283_20131023_173705_inLine +BABEL_OP2_301_67283_20131023_173705_outLine +BABEL_OP2_301_67389_20140219_142647_inLine +BABEL_OP2_301_67389_20140219_142647_outLine +BABEL_OP2_301_67401_20140207_182426_inLine +BABEL_OP2_301_67401_20140207_182426_outLine +BABEL_OP2_301_68385_20131016_193158_inLine +BABEL_OP2_301_69153_20131216_202419_inLine +BABEL_OP2_301_69153_20131216_202419_outLine +BABEL_OP2_301_69578_20131201_211250_inLine +BABEL_OP2_301_69578_20131201_211250_outLine +BABEL_OP2_301_69578_20131201_212353_inLine +BABEL_OP2_301_69578_20131201_212353_outLine +BABEL_OP2_301_70221_20131223_190148_inLine +BABEL_OP2_301_70221_20131223_190148_outLine +BABEL_OP2_301_70343_20131212_181613_inLine +BABEL_OP2_301_70343_20131212_181613_outLine +BABEL_OP2_301_70526_20140127_161237_inLine +BABEL_OP2_301_70526_20140127_161237_outLine +BABEL_OP2_301_70986_20140223_164925_inLine +BABEL_OP2_301_70986_20140223_164925_outLine +BABEL_OP2_301_72110_20131218_192930_inLine +BABEL_OP2_301_72110_20131218_192930_outLine +BABEL_OP2_301_72110_20131220_163212_inLine +BABEL_OP2_301_72110_20131220_163212_outLine +BABEL_OP2_301_73301_20131208_194427_inLine +BABEL_OP2_301_73301_20131208_194427_outLine +BABEL_OP2_301_73408_20140213_184704_inLine +BABEL_OP2_301_73408_20140213_184704_outLine +BABEL_OP2_301_73822_20140216_175714_inLine +BABEL_OP2_301_73822_20140216_175714_outLine +BABEL_OP2_301_73837_20131124_173546_inLine +BABEL_OP2_301_74728_20140214_203632_outLine +BABEL_OP2_301_75064_20131129_123930_inLine +BABEL_OP2_301_75064_20131129_123930_outLine +BABEL_OP2_301_75064_20131129_124541_inLine +BABEL_OP2_301_75064_20131129_124541_outLine +BABEL_OP2_301_75342_20131217_201144_inLine +BABEL_OP2_301_75342_20131217_201144_outLine +BABEL_OP2_301_75366_20140131_192045_inLine +BABEL_OP2_301_75366_20140131_192045_outLine +BABEL_OP2_301_75465_20131221_182948_inLine +BABEL_OP2_301_75465_20131221_182948_outLine +BABEL_OP2_301_77242_20140204_192041_inLine +BABEL_OP2_301_77242_20140204_192041_outLine +BABEL_OP2_301_77803_20131024_201026_inLine +BABEL_OP2_301_77803_20131024_201026_outLine +BABEL_OP2_301_79107_20140204_212236_inLine +BABEL_OP2_301_79107_20140204_212236_outLine +BABEL_OP2_301_79139_20131203_165343_outLine +BABEL_OP2_301_79429_20140220_203629_inLine +BABEL_OP2_301_79429_20140220_203629_outLine +BABEL_OP2_301_79858_20131024_220616_outLine +BABEL_OP2_301_80306_20131203_161230_inLine +BABEL_OP2_301_80306_20131203_161230_outLine +BABEL_OP2_301_80306_20131203_162810_inLine +BABEL_OP2_301_80306_20131203_162810_outLine +BABEL_OP2_301_80439_20131202_210809_inLine +BABEL_OP2_301_80439_20131202_210809_outLine +BABEL_OP2_301_80655_20140123_205823_inLine +BABEL_OP2_301_80655_20140123_205823_outLine +BABEL_OP2_301_80721_20131225_182955_inLine +BABEL_OP2_301_80721_20131225_182955_outLine +BABEL_OP2_301_81213_20131114_184213_outLine +BABEL_OP2_301_81213_20131114_190753_outLine +BABEL_OP2_301_81404_20131206_140303_outLine +BABEL_OP2_301_81971_20131029_141333_outLine +BABEL_OP2_301_82030_20140126_162146_inLine +BABEL_OP2_301_82030_20140126_162146_outLine +BABEL_OP2_301_82140_20131201_202210_inLine +BABEL_OP2_301_82140_20131201_202210_outLine +BABEL_OP2_301_82224_20140108_145115_inLine +BABEL_OP2_301_82224_20140108_145115_outLine +BABEL_OP2_301_82361_20140123_185800_outLine +BABEL_OP2_301_82966_20131229_171324_inLine +BABEL_OP2_301_82966_20131229_171324_outLine +BABEL_OP2_301_83062_20140123_143457_inLine +BABEL_OP2_301_83062_20140123_143457_outLine +BABEL_OP2_301_83935_20131216_140532_inLine +BABEL_OP2_301_83935_20131216_140532_outLine +BABEL_OP2_301_84327_20131217_123632_inLine +BABEL_OP2_301_84327_20131217_123632_outLine +BABEL_OP2_301_84823_20131218_180840_inLine +BABEL_OP2_301_84823_20131218_180840_outLine +BABEL_OP2_301_85246_20140216_194331_inLine +BABEL_OP2_301_85246_20140216_194331_outLine +BABEL_OP2_301_85254_20140131_161411_inLine +BABEL_OP2_301_85254_20140131_161411_outLine +BABEL_OP2_301_85254_20140131_162620_inLine +BABEL_OP2_301_85254_20140131_162620_outLine +BABEL_OP2_301_86557_20131019_195730_outLine +BABEL_OP2_301_86597_20140204_185521_inLine +BABEL_OP2_301_86597_20140204_185521_outLine +BABEL_OP2_301_86715_20140201_181648_inLine +BABEL_OP2_301_86715_20140201_181648_outLine +BABEL_OP2_301_86826_20140129_155917_inLine +BABEL_OP2_301_86826_20140129_155917_outLine +BABEL_OP2_301_87280_20131220_194114_inLine +BABEL_OP2_301_87280_20131220_194114_outLine +BABEL_OP2_301_87731_20140220_185807_inLine +BABEL_OP2_301_87731_20140220_185807_outLine +BABEL_OP2_301_87777_20140208_173157_inLine +BABEL_OP2_301_87777_20140208_173157_outLine +BABEL_OP2_301_89045_20131025_163532_inLine +BABEL_OP2_301_89045_20131025_163532_outLine +BABEL_OP2_301_90347_20140206_160505_inLine +BABEL_OP2_301_90347_20140206_160505_outLine +BABEL_OP2_301_90417_20140202_164404_inLine +BABEL_OP2_301_90417_20140202_164404_outLine +BABEL_OP2_301_90760_20140204_173413_inLine +BABEL_OP2_301_90760_20140204_173413_outLine +BABEL_OP2_301_91189_20140130_134130_inLine +BABEL_OP2_301_91189_20140130_134130_outLine +BABEL_OP2_301_91581_20131218_193124_inLine +BABEL_OP2_301_91581_20131218_193124_outLine +BABEL_OP2_301_91593_20140201_174423_inLine +BABEL_OP2_301_91593_20140201_174423_outLine +BABEL_OP2_301_91888_20140128_153319_inLine +BABEL_OP2_301_91888_20140128_153319_outLine +BABEL_OP2_301_92077_20140127_141441_inLine +BABEL_OP2_301_92077_20140127_141441_outLine +BABEL_OP2_301_92356_20140112_181929_inLine +BABEL_OP2_301_92356_20140112_181929_outLine +BABEL_OP2_301_92557_20140115_150859_inLine +BABEL_OP2_301_92557_20140115_150859_outLine +BABEL_OP2_301_92643_20140127_134733_inLine +BABEL_OP2_301_92643_20140127_134733_outLine +BABEL_OP2_301_93007_20140201_215259_inLine +BABEL_OP2_301_93007_20140201_215259_outLine +BABEL_OP2_301_93222_20140224_152228_inLine +BABEL_OP2_301_93222_20140224_152228_outLine +BABEL_OP2_301_93681_20131129_223439_inLine +BABEL_OP2_301_93681_20131129_223439_outLine +BABEL_OP2_301_93858_20140202_152245_inLine +BABEL_OP2_301_93858_20140202_152245_outLine +BABEL_OP2_301_94044_20140225_200641_inLine +BABEL_OP2_301_94044_20140225_200641_outLine +BABEL_OP2_301_94141_20140214_174210_inLine +BABEL_OP2_301_94141_20140214_174210_outLine +BABEL_OP2_301_94141_20140214_174838_inLine +BABEL_OP2_301_94141_20140214_174838_outLine +BABEL_OP2_301_94166_20140114_223757_inLine +BABEL_OP2_301_94166_20140114_223757_outLine +BABEL_OP2_301_94237_20140125_154005_inLine +BABEL_OP2_301_94237_20140125_154005_outLine +BABEL_OP2_301_94487_20140214_181548_inLine +BABEL_OP2_301_94487_20140214_181548_outLine +BABEL_OP2_301_94969_20140216_191950_inLine +BABEL_OP2_301_94969_20140216_191950_outLine +BABEL_OP2_301_95467_20140204_202122_inLine +BABEL_OP2_301_95467_20140204_202122_outLine +BABEL_OP2_301_95490_20131019_201427_inLine +BABEL_OP2_301_95571_20140225_185558_inLine +BABEL_OP2_301_95571_20140225_185558_outLine +BABEL_OP2_301_95670_20131119_163101_inLine +BABEL_OP2_301_95670_20131119_163101_outLine +BABEL_OP2_301_95670_20131119_163931_inLine +BABEL_OP2_301_95670_20131119_163931_outLine +BABEL_OP2_301_96205_20131208_194017_inLine +BABEL_OP2_301_96205_20131208_194017_outLine +BABEL_OP2_301_96205_20131208_195213_inLine +BABEL_OP2_301_96205_20131208_195213_outLine +BABEL_OP2_301_96446_20131030_214504_inLine +BABEL_OP2_301_96446_20131030_214504_outLine +BABEL_OP2_301_96584_20140114_144108_inLine +BABEL_OP2_301_96584_20140114_144108_outLine +BABEL_OP2_301_96934_20131202_185517_inLine +BABEL_OP2_301_96934_20131202_185517_outLine +BABEL_OP2_301_96940_20140223_150250_inLine +BABEL_OP2_301_96940_20140223_150250_outLine +BABEL_OP2_301_97097_20140122_232217_inLine +BABEL_OP2_301_97097_20140122_232217_outLine +BABEL_OP2_301_97220_20140204_184737_inLine +BABEL_OP2_301_97220_20140204_184737_outLine +BABEL_OP2_301_97604_20140112_103548_inLine +BABEL_OP2_301_97604_20140112_103548_outLine +BABEL_OP2_301_97849_20140123_174235_inLine +BABEL_OP2_301_97849_20140123_174235_outLine +BABEL_OP2_301_97911_20140131_152336_inLine +BABEL_OP2_301_97911_20140131_152336_outLine +BABEL_OP2_301_97911_20140131_153328_inLine +BABEL_OP2_301_97911_20140131_153328_outLine +BABEL_OP2_301_97988_20131219_190252_inLine +BABEL_OP2_301_97988_20131219_190252_outLine +BABEL_OP2_301_97988_20140114_012737_inLine +BABEL_OP2_301_97988_20140114_012737_outLine +BABEL_OP2_301_98192_20140205_153043_inLine +BABEL_OP2_301_98192_20140205_153043_outLine +BABEL_OP2_301_98506_20140122_174742_inLine +BABEL_OP2_301_98506_20140122_174742_outLine +BABEL_OP2_301_98678_20140122_124908_inLine +BABEL_OP2_301_98678_20140122_124908_outLine +BABEL_OP2_301_99401_20131024_225414_inLine +BABEL_OP2_301_99401_20131024_225414_outLine +BABEL_OP2_301_99732_20131220_214613_inLine +BABEL_OP2_301_99732_20131220_214613_outLine +BABEL_OP2_301_99813_20131216_151916_inLine +BABEL_OP2_301_99813_20131216_151916_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/dev.list b/egs/babel/s5d/conf/lists/302-kazakh/dev.list new file mode 100644 index 00000000000..31a554efeef --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/dev.list @@ -0,0 +1,140 @@ +BABEL_OP2_302_10002_20140316_215637_inLine +BABEL_OP2_302_10002_20140316_215637_outLine +BABEL_OP2_302_10188_20131030_194100_inLine +BABEL_OP2_302_10188_20131030_194100_outLine +BABEL_OP2_302_11673_20131104_223908_inLine +BABEL_OP2_302_11673_20131104_223908_outLine +BABEL_OP2_302_13324_20131115_220718_inLine +BABEL_OP2_302_13324_20131115_220718_outLine +BABEL_OP2_302_17440_20140218_204311_inLine +BABEL_OP2_302_17440_20140218_204311_outLine +BABEL_OP2_302_17573_20140312_030325_inLine +BABEL_OP2_302_17573_20140312_030325_outLine +BABEL_OP2_302_17914_20140126_234956_inLine +BABEL_OP2_302_17914_20140126_234956_outLine +BABEL_OP2_302_17923_20131116_222221_inLine +BABEL_OP2_302_17923_20131116_222221_outLine +BABEL_OP2_302_18939_20131111_213325_inLine +BABEL_OP2_302_18939_20131111_213325_outLine +BABEL_OP2_302_19663_20131212_235807_inLine +BABEL_OP2_302_19663_20131212_235807_outLine +BABEL_OP2_302_19703_20131202_234704_inLine +BABEL_OP2_302_19703_20131202_234704_outLine +BABEL_OP2_302_20682_20140114_221052_inLine +BABEL_OP2_302_20682_20140114_221052_outLine +BABEL_OP2_302_20768_20140203_185125_inLine +BABEL_OP2_302_20768_20140203_185125_outLine +BABEL_OP2_302_20768_20140203_190423_inLine +BABEL_OP2_302_20768_20140203_190423_outLine +BABEL_OP2_302_21109_20140111_215428_inLine +BABEL_OP2_302_21109_20140111_215428_outLine +BABEL_OP2_302_21581_20131217_222306_inLine +BABEL_OP2_302_21581_20131217_222306_outLine +BABEL_OP2_302_22216_20131104_153600_inLine +BABEL_OP2_302_22216_20131104_153600_outLine +BABEL_OP2_302_23355_20140317_191841_inLine +BABEL_OP2_302_23355_20140317_191841_outLine +BABEL_OP2_302_24589_20131129_215929_inLine +BABEL_OP2_302_24589_20131129_215929_outLine +BABEL_OP2_302_26072_20140131_184053_inLine +BABEL_OP2_302_26072_20140131_184053_outLine +BABEL_OP2_302_33175_20131105_201906_inLine +BABEL_OP2_302_33175_20131105_201906_outLine +BABEL_OP2_302_33355_20131112_211255_inLine +BABEL_OP2_302_33355_20131112_211255_outLine +BABEL_OP2_302_33355_20131112_213746_inLine +BABEL_OP2_302_33355_20131112_213746_outLine +BABEL_OP2_302_34328_20131219_023407_inLine +BABEL_OP2_302_34328_20131219_023407_outLine +BABEL_OP2_302_36341_20131101_170216_inLine +BABEL_OP2_302_36341_20131101_170216_outLine +BABEL_OP2_302_36341_20131101_171111_inLine +BABEL_OP2_302_36341_20131101_171111_outLine +BABEL_OP2_302_36669_20131206_164229_inLine +BABEL_OP2_302_36669_20131206_164229_outLine +BABEL_OP2_302_41174_20131212_200450_inLine +BABEL_OP2_302_41174_20131212_200450_outLine +BABEL_OP2_302_41442_20140125_220923_inLine +BABEL_OP2_302_41442_20140125_220923_outLine +BABEL_OP2_302_42497_20131116_001033_inLine +BABEL_OP2_302_42497_20131116_001033_outLine +BABEL_OP2_302_42497_20131116_002236_inLine +BABEL_OP2_302_42497_20131116_002236_outLine +BABEL_OP2_302_43789_20140108_210806_inLine +BABEL_OP2_302_43789_20140108_210806_outLine +BABEL_OP2_302_44868_20131217_205108_inLine +BABEL_OP2_302_44868_20131217_205108_outLine +BABEL_OP2_302_44868_20131217_205716_inLine +BABEL_OP2_302_44868_20131217_205716_outLine +BABEL_OP2_302_44868_20131217_211035_inLine +BABEL_OP2_302_44868_20131217_211035_outLine +BABEL_OP2_302_45642_20131114_014119_inLine +BABEL_OP2_302_45642_20131114_014119_outLine +BABEL_OP2_302_47156_20140313_011009_inLine +BABEL_OP2_302_47156_20140313_011009_outLine +BABEL_OP2_302_49502_20131104_181501_inLine +BABEL_OP2_302_49502_20131104_181501_outLine +BABEL_OP2_302_50565_20131103_225947_inLine +BABEL_OP2_302_50565_20131103_225947_outLine +BABEL_OP2_302_50726_20131118_025621_inLine +BABEL_OP2_302_50726_20131118_025621_outLine +BABEL_OP2_302_50745_20140214_021844_inLine +BABEL_OP2_302_50745_20140214_021844_outLine +BABEL_OP2_302_60830_20131205_223823_inLine +BABEL_OP2_302_60830_20131205_223823_outLine +BABEL_OP2_302_60830_20131205_225122_inLine +BABEL_OP2_302_60830_20131205_225122_outLine +BABEL_OP2_302_61011_20131110_191134_inLine +BABEL_OP2_302_61011_20131110_191134_outLine +BABEL_OP2_302_61040_20140123_215906_inLine +BABEL_OP2_302_61040_20140123_215906_outLine +BABEL_OP2_302_61963_20140119_184816_inLine +BABEL_OP2_302_61963_20140119_184816_outLine +BABEL_OP2_302_66916_20131121_223838_inLine +BABEL_OP2_302_66916_20131121_223838_outLine +BABEL_OP2_302_70110_20131109_190313_inLine +BABEL_OP2_302_70110_20131109_190313_outLine +BABEL_OP2_302_70182_20140214_185232_inLine +BABEL_OP2_302_70182_20140214_185232_outLine +BABEL_OP2_302_72654_20131207_162604_inLine +BABEL_OP2_302_72654_20131207_162604_outLine +BABEL_OP2_302_77730_20131114_223327_inLine +BABEL_OP2_302_77730_20131114_223327_outLine +BABEL_OP2_302_77730_20131114_230511_inLine +BABEL_OP2_302_77730_20131114_230511_outLine +BABEL_OP2_302_77730_20131114_231344_inLine +BABEL_OP2_302_77730_20131114_231344_outLine +BABEL_OP2_302_79080_20140203_192545_inLine +BABEL_OP2_302_79080_20140203_192545_outLine +BABEL_OP2_302_80577_20140126_190012_inLine +BABEL_OP2_302_80577_20140126_190012_outLine +BABEL_OP2_302_81854_20140203_161410_inLine +BABEL_OP2_302_81854_20140203_161410_outLine +BABEL_OP2_302_81971_20131101_194252_inLine +BABEL_OP2_302_81971_20131101_194252_outLine +BABEL_OP2_302_81971_20131101_195016_inLine +BABEL_OP2_302_81971_20131101_195016_outLine +BABEL_OP2_302_84823_20140213_015014_inLine +BABEL_OP2_302_84823_20140213_015014_outLine +BABEL_OP2_302_85248_20140123_204317_inLine +BABEL_OP2_302_85248_20140123_204317_outLine +BABEL_OP2_302_85322_20131108_161437_inLine +BABEL_OP2_302_85322_20131108_161437_outLine +BABEL_OP2_302_86557_20131121_000022_inLine +BABEL_OP2_302_86557_20131121_000022_outLine +BABEL_OP2_302_87889_20140119_163150_inLine +BABEL_OP2_302_87889_20140119_163150_outLine +BABEL_OP2_302_90080_20140120_230635_inLine +BABEL_OP2_302_90080_20140120_230635_outLine +BABEL_OP2_302_91593_20140215_175049_inLine +BABEL_OP2_302_91593_20140215_175049_outLine +BABEL_OP2_302_92509_20131114_030809_inLine +BABEL_OP2_302_92509_20131114_030809_outLine +BABEL_OP2_302_93320_20140218_173001_inLine +BABEL_OP2_302_93320_20140218_173001_outLine +BABEL_OP2_302_93475_20131115_203137_inLine +BABEL_OP2_302_93475_20131115_203137_outLine +BABEL_OP2_302_95583_20131112_203137_inLine +BABEL_OP2_302_95583_20131112_203137_outLine +BABEL_OP2_302_96842_20140131_154710_inLine +BABEL_OP2_302_96842_20140131_154710_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/eval.list b/egs/babel/s5d/conf/lists/302-kazakh/eval.list new file mode 100644 index 00000000000..cf23788087e --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/eval.list @@ -0,0 +1,191 @@ +BABEL_OP2_302_10416_20131210_035651_inLine +BABEL_OP2_302_10416_20131210_035651_outLine +BABEL_OP2_302_11096_20140219_220112_inLine +BABEL_OP2_302_11096_20140219_220112_outLine +BABEL_OP2_302_12916_20131107_171154_inLine +BABEL_OP2_302_12916_20131107_171154_outLine +BABEL_OP2_302_15216_20140219_211720_inLine +BABEL_OP2_302_15216_20140219_211720_outLine +BABEL_OP2_302_16787_20131207_203127_inLine +BABEL_OP2_302_16787_20131207_203127_outLine +BABEL_OP2_302_17582_20140215_204647_inLine +BABEL_OP2_302_17582_20140215_204647_outLine +BABEL_OP2_302_17751_20140216_211124_inLine +BABEL_OP2_302_17751_20140216_211124_outLine +BABEL_OP2_302_18291_20140215_182410_inLine +BABEL_OP2_302_18291_20140215_182410_outLine +BABEL_OP2_302_18863_20140118_154802_inLine +BABEL_OP2_302_18863_20140118_154802_outLine +BABEL_OP2_302_19545_20131213_220625_inLine +BABEL_OP2_302_19545_20131213_220625_outLine +BABEL_OP2_302_19672_20131217_215636_inLine +BABEL_OP2_302_19672_20131217_215636_outLine +BABEL_OP2_302_19782_20140125_222442_inLine +BABEL_OP2_302_19782_20140125_222442_outLine +BABEL_OP2_302_20738_20140126_201239_inLine +BABEL_OP2_302_20738_20140126_201239_outLine +BABEL_OP2_302_22624_20140116_163601_inLine +BABEL_OP2_302_22624_20140116_163601_outLine +BABEL_OP2_302_22641_20131104_232148_inLine +BABEL_OP2_302_22641_20131104_232148_outLine +BABEL_OP2_302_23628_20131206_185035_inLine +BABEL_OP2_302_23628_20131206_185035_outLine +BABEL_OP2_302_23731_20131211_000104_inLine +BABEL_OP2_302_23731_20131211_000104_outLine +BABEL_OP2_302_23893_20140314_000251_inLine +BABEL_OP2_302_23893_20140314_000251_outLine +BABEL_OP2_302_24924_20140219_171405_inLine +BABEL_OP2_302_24924_20140219_171405_outLine +BABEL_OP2_302_28422_20131224_204108_inLine +BABEL_OP2_302_28422_20131224_204108_outLine +BABEL_OP2_302_28871_20131030_171711_inLine +BABEL_OP2_302_28871_20131030_171711_outLine +BABEL_OP2_302_29352_20140304_201752_inLine +BABEL_OP2_302_29352_20140304_201752_outLine +BABEL_OP2_302_29777_20140114_172507_inLine +BABEL_OP2_302_29777_20140114_172507_outLine +BABEL_OP2_302_31979_20131206_224314_inLine +BABEL_OP2_302_31979_20131206_224314_outLine +BABEL_OP2_302_32914_20140106_220002_inLine +BABEL_OP2_302_32914_20140106_220002_outLine +BABEL_OP2_302_33635_20131206_225838_inLine +BABEL_OP2_302_33635_20131206_225838_outLine +BABEL_OP2_302_33672_20131111_153638_inLine +BABEL_OP2_302_33672_20131111_153638_outLine +BABEL_OP2_302_37064_20131207_191407_inLine +BABEL_OP2_302_37064_20131207_191407_outLine +BABEL_OP2_302_37499_20140225_222508_inLine +BABEL_OP2_302_37499_20140225_222508_outLine +BABEL_OP2_302_38139_20140315_230332_inLine +BABEL_OP2_302_38139_20140315_230332_outLine +BABEL_OP2_302_38979_20140126_212312_inLine +BABEL_OP2_302_38979_20140126_212312_outLine +BABEL_OP2_302_41493_20131031_190908_inLine +BABEL_OP2_302_41493_20131031_190908_outLine +BABEL_OP2_302_42299_20140216_142852_inLine +BABEL_OP2_302_42299_20140216_142852_outLine +BABEL_OP2_302_42942_20131207_000752_inLine +BABEL_OP2_302_42942_20131207_000752_outLine +BABEL_OP2_302_43388_20131222_214138_inLine +BABEL_OP2_302_43388_20131222_214138_outLine +BABEL_OP2_302_45777_20131209_205207_inLine +BABEL_OP2_302_45777_20131209_205207_outLine +BABEL_OP2_302_46974_20140108_014337_inLine +BABEL_OP2_302_46974_20140108_014337_outLine +BABEL_OP2_302_47877_20140118_204004_inLine +BABEL_OP2_302_47877_20140118_204004_outLine +BABEL_OP2_302_48016_20140220_174426_inLine +BABEL_OP2_302_48016_20140220_174426_outLine +BABEL_OP2_302_49775_20131103_031204_inLine +BABEL_OP2_302_49775_20131103_031204_outLine +BABEL_OP2_302_49902_20131218_203252_inLine +BABEL_OP2_302_49902_20131218_203252_outLine +BABEL_OP2_302_52025_20131108_191032_inLine +BABEL_OP2_302_52025_20131108_191032_outLine +BABEL_OP2_302_52025_20131108_193401_inLine +BABEL_OP2_302_52025_20131108_193401_outLine +BABEL_OP2_302_54744_20131111_235401_inLine +BABEL_OP2_302_54744_20131111_235401_outLine +BABEL_OP2_302_55742_20131118_154051_inLine +BABEL_OP2_302_55742_20131118_154051_outLine +BABEL_OP2_302_56019_20140226_155123_inLine +BABEL_OP2_302_56019_20140226_155123_outLine +BABEL_OP2_302_56370_20131120_230147_inLine +BABEL_OP2_302_56370_20131120_230147_outLine +BABEL_OP2_302_56429_20131117_181816_inLine +BABEL_OP2_302_56429_20131117_181816_outLine +BABEL_OP2_302_56523_20131215_162313_inLine +BABEL_OP2_302_56523_20131215_162313_outLine +BABEL_OP2_302_57219_20140218_190044_inLine +BABEL_OP2_302_57219_20140218_190044_outLine +BABEL_OP2_302_57650_20140126_224015_inLine +BABEL_OP2_302_57650_20140126_224015_outLine +BABEL_OP2_302_58815_20140125_201759_inLine +BABEL_OP2_302_58815_20140125_201759_outLine +BABEL_OP2_302_60836_20131115_015627_inLine +BABEL_OP2_302_60836_20131115_015627_outLine +BABEL_OP2_302_61219_20131128_233326_inLine +BABEL_OP2_302_61219_20131128_233326_outLine +BABEL_OP2_302_62286_20131214_174209_inLine +BABEL_OP2_302_62286_20131214_174209_outLine +BABEL_OP2_302_63481_20131105_213305_inLine +BABEL_OP2_302_63481_20131105_213305_outLine +BABEL_OP2_302_64759_20131107_153706_inLine +BABEL_OP2_302_66967_20131125_200431_inLine +BABEL_OP2_302_66967_20131125_200431_outLine +BABEL_OP2_302_66967_20131125_201605_inLine +BABEL_OP2_302_66967_20131125_201605_outLine +BABEL_OP2_302_66967_20131125_202216_inLine +BABEL_OP2_302_66967_20131125_202216_outLine +BABEL_OP2_302_67066_20140215_220827_inLine +BABEL_OP2_302_67066_20140215_220827_outLine +BABEL_OP2_302_71404_20131128_225018_inLine +BABEL_OP2_302_71404_20131128_225018_outLine +BABEL_OP2_302_71780_20131121_222518_inLine +BABEL_OP2_302_71780_20131121_222518_outLine +BABEL_OP2_302_73042_20131115_165006_inLine +BABEL_OP2_302_73042_20131115_165006_outLine +BABEL_OP2_302_73119_20131128_222112_inLine +BABEL_OP2_302_73119_20131128_222112_outLine +BABEL_OP2_302_73622_20131117_223750_inLine +BABEL_OP2_302_73622_20131117_223750_outLine +BABEL_OP2_302_73622_20131117_230514_inLine +BABEL_OP2_302_73622_20131117_230514_outLine +BABEL_OP2_302_76372_20140121_204025_inLine +BABEL_OP2_302_76372_20140121_204025_outLine +BABEL_OP2_302_76773_20131117_001202_inLine +BABEL_OP2_302_76773_20131117_001202_outLine +BABEL_OP2_302_77112_20131127_221650_inLine +BABEL_OP2_302_77112_20131127_221650_outLine +BABEL_OP2_302_78604_20131117_205614_inLine +BABEL_OP2_302_78604_20131117_205614_outLine +BABEL_OP2_302_78604_20131117_210914_inLine +BABEL_OP2_302_78604_20131117_210914_outLine +BABEL_OP2_302_78749_20140305_221314_inLine +BABEL_OP2_302_78749_20140305_221314_outLine +BABEL_OP2_302_79107_20140223_160949_inLine +BABEL_OP2_302_79107_20140223_160949_outLine +BABEL_OP2_302_79505_20140221_191940_inLine +BABEL_OP2_302_79505_20140221_191940_outLine +BABEL_OP2_302_79571_20131224_210857_inLine +BABEL_OP2_302_79571_20131224_210857_outLine +BABEL_OP2_302_80881_20131130_200459_inLine +BABEL_OP2_302_80881_20131130_200459_outLine +BABEL_OP2_302_80897_20131226_221806_inLine +BABEL_OP2_302_80897_20131226_221806_outLine +BABEL_OP2_302_82966_20140203_200450_inLine +BABEL_OP2_302_82966_20140203_200450_outLine +BABEL_OP2_302_85179_20140113_180639_inLine +BABEL_OP2_302_85179_20140113_180639_outLine +BABEL_OP2_302_87280_20140123_211738_inLine +BABEL_OP2_302_87280_20140123_211738_outLine +BABEL_OP2_302_88686_20140131_165805_inLine +BABEL_OP2_302_88686_20140131_165805_outLine +BABEL_OP2_302_89372_20131106_214629_inLine +BABEL_OP2_302_89372_20131106_214629_outLine +BABEL_OP2_302_90417_20140215_195110_inLine +BABEL_OP2_302_90417_20140215_195110_outLine +BABEL_OP2_302_90935_20131207_172013_inLine +BABEL_OP2_302_90935_20131207_172013_outLine +BABEL_OP2_302_92281_20140312_223937_inLine +BABEL_OP2_302_92281_20140312_223937_outLine +BABEL_OP2_302_93224_20131219_004305_inLine +BABEL_OP2_302_93224_20131219_004305_outLine +BABEL_OP2_302_93861_20131208_195418_inLine +BABEL_OP2_302_93861_20131208_195418_outLine +BABEL_OP2_302_95663_20131031_164153_inLine +BABEL_OP2_302_95663_20131031_164153_outLine +BABEL_OP2_302_97097_20140121_214508_inLine +BABEL_OP2_302_97097_20140121_214508_outLine +BABEL_OP2_302_97220_20140216_214954_inLine +BABEL_OP2_302_97220_20140216_214954_outLine +BABEL_OP2_302_97264_20140203_010930_inLine +BABEL_OP2_302_97264_20140203_010930_outLine +BABEL_OP2_302_97988_20140226_180453_inLine +BABEL_OP2_302_97988_20140226_180453_outLine +BABEL_OP2_302_98888_20140224_195320_inLine +BABEL_OP2_302_98888_20140224_195320_outLine +BABEL_OP2_302_99344_20140317_184547_inLine +BABEL_OP2_302_99344_20140317_184547_outLine +BABEL_OP2_302_99516_20131109_182628_inLine +BABEL_OP2_302_99516_20131109_182628_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/evalpart1.list b/egs/babel/s5d/conf/lists/302-kazakh/evalpart1.list new file mode 100644 index 00000000000..402c6ca4cb0 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/evalpart1.list @@ -0,0 +1,61 @@ +BABEL_OP2_302_10416_20131210_035651_inLine +BABEL_OP2_302_10416_20131210_035651_outLine +BABEL_OP2_302_16787_20131207_203127_inLine +BABEL_OP2_302_16787_20131207_203127_outLine +BABEL_OP2_302_18863_20140118_154802_inLine +BABEL_OP2_302_18863_20140118_154802_outLine +BABEL_OP2_302_19672_20131217_215636_inLine +BABEL_OP2_302_19672_20131217_215636_outLine +BABEL_OP2_302_23628_20131206_185035_inLine +BABEL_OP2_302_23628_20131206_185035_outLine +BABEL_OP2_302_23731_20131211_000104_inLine +BABEL_OP2_302_23731_20131211_000104_outLine +BABEL_OP2_302_33635_20131206_225838_inLine +BABEL_OP2_302_33635_20131206_225838_outLine +BABEL_OP2_302_42942_20131207_000752_inLine +BABEL_OP2_302_42942_20131207_000752_outLine +BABEL_OP2_302_45777_20131209_205207_inLine +BABEL_OP2_302_45777_20131209_205207_outLine +BABEL_OP2_302_46974_20140108_014337_inLine +BABEL_OP2_302_46974_20140108_014337_outLine +BABEL_OP2_302_48016_20140220_174426_inLine +BABEL_OP2_302_48016_20140220_174426_outLine +BABEL_OP2_302_49775_20131103_031204_inLine +BABEL_OP2_302_49775_20131103_031204_outLine +BABEL_OP2_302_54744_20131111_235401_inLine +BABEL_OP2_302_54744_20131111_235401_outLine +BABEL_OP2_302_55742_20131118_154051_inLine +BABEL_OP2_302_55742_20131118_154051_outLine +BABEL_OP2_302_56019_20140226_155123_inLine +BABEL_OP2_302_56019_20140226_155123_outLine +BABEL_OP2_302_56429_20131117_181816_inLine +BABEL_OP2_302_56429_20131117_181816_outLine +BABEL_OP2_302_57650_20140126_224015_inLine +BABEL_OP2_302_57650_20140126_224015_outLine +BABEL_OP2_302_58815_20140125_201759_inLine +BABEL_OP2_302_58815_20140125_201759_outLine +BABEL_OP2_302_63481_20131105_213305_inLine +BABEL_OP2_302_63481_20131105_213305_outLine +BABEL_OP2_302_64759_20131107_153706_inLine +BABEL_OP2_302_71780_20131121_222518_inLine +BABEL_OP2_302_71780_20131121_222518_outLine +BABEL_OP2_302_73042_20131115_165006_inLine +BABEL_OP2_302_73042_20131115_165006_outLine +BABEL_OP2_302_73119_20131128_222112_inLine +BABEL_OP2_302_73119_20131128_222112_outLine +BABEL_OP2_302_76773_20131117_001202_inLine +BABEL_OP2_302_76773_20131117_001202_outLine +BABEL_OP2_302_78604_20131117_205614_inLine +BABEL_OP2_302_78604_20131117_205614_outLine +BABEL_OP2_302_78604_20131117_210914_inLine +BABEL_OP2_302_78604_20131117_210914_outLine +BABEL_OP2_302_80897_20131226_221806_inLine +BABEL_OP2_302_80897_20131226_221806_outLine +BABEL_OP2_302_89372_20131106_214629_inLine +BABEL_OP2_302_89372_20131106_214629_outLine +BABEL_OP2_302_92281_20140312_223937_inLine +BABEL_OP2_302_92281_20140312_223937_outLine +BABEL_OP2_302_97097_20140121_214508_inLine +BABEL_OP2_302_97097_20140121_214508_outLine +BABEL_OP2_302_98888_20140224_195320_inLine +BABEL_OP2_302_98888_20140224_195320_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/sub-train.list b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.list new file mode 100644 index 00000000000..ef82fb8fc17 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.list @@ -0,0 +1,130 @@ +BABEL_OP2_302_13483_20140111_145619_inLine +BABEL_OP2_302_13483_20140111_145619_outLine +BABEL_OP2_302_13792_20131105_160713_inLine +BABEL_OP2_302_13792_20131105_160713_outLine +BABEL_OP2_302_14137_20131205_201718_inLine +BABEL_OP2_302_14137_20131205_201718_outLine +BABEL_OP2_302_15638_20131227_190456_inLine +BABEL_OP2_302_15638_20131227_190456_outLine +BABEL_OP2_302_16467_20140125_193127_inLine +BABEL_OP2_302_16467_20140125_193127_outLine +BABEL_OP2_302_16886_20131209_211339_inLine +BABEL_OP2_302_16886_20131209_211339_outLine +BABEL_OP2_302_17113_20140216_165407_inLine +BABEL_OP2_302_17113_20140216_165407_outLine +BABEL_OP2_302_17567_20131227_223417_inLine +BABEL_OP2_302_17567_20131227_223417_outLine +BABEL_OP2_302_18118_20140312_010735_inLine +BABEL_OP2_302_18118_20140312_010735_outLine +BABEL_OP2_302_19722_20131106_001542_inLine +BABEL_OP2_302_19722_20131106_001542_outLine +BABEL_OP2_302_22280_20131214_220249_inLine +BABEL_OP2_302_22280_20131214_220249_outLine +BABEL_OP2_302_23505_20131113_214234_inLine +BABEL_OP2_302_23505_20131113_214234_outLine +BABEL_OP2_302_23505_20131113_215736_inLine +BABEL_OP2_302_23505_20131113_215736_outLine +BABEL_OP2_302_24323_20131207_212641_inLine +BABEL_OP2_302_24323_20131207_212641_outLine +BABEL_OP2_302_25085_20140216_161934_inLine +BABEL_OP2_302_25085_20140216_161934_outLine +BABEL_OP2_302_29135_20131031_201509_inLine +BABEL_OP2_302_29135_20131031_201509_outLine +BABEL_OP2_302_29416_20140125_222019_inLine +BABEL_OP2_302_29416_20140125_222019_outLine +BABEL_OP2_302_31490_20131120_230743_inLine +BABEL_OP2_302_31490_20131120_230743_outLine +BABEL_OP2_302_32287_20140316_185534_inLine +BABEL_OP2_302_32287_20140316_185534_outLine +BABEL_OP2_302_32301_20140108_212650_inLine +BABEL_OP2_302_32301_20140108_212650_outLine +BABEL_OP2_302_34197_20131203_173358_inLine +BABEL_OP2_302_34197_20131203_173358_outLine +BABEL_OP2_302_34477_20131205_030548_inLine +BABEL_OP2_302_34477_20131205_030548_outLine +BABEL_OP2_302_34477_20131205_035623_inLine +BABEL_OP2_302_34477_20131205_035623_outLine +BABEL_OP2_302_37598_20131218_200535_inLine +BABEL_OP2_302_37598_20131218_200535_outLine +BABEL_OP2_302_38588_20131216_211052_inLine +BABEL_OP2_302_38588_20131216_211052_outLine +BABEL_OP2_302_39744_20131031_182731_inLine +BABEL_OP2_302_39744_20131031_182731_outLine +BABEL_OP2_302_41233_20140111_195838_inLine +BABEL_OP2_302_41233_20140111_195838_outLine +BABEL_OP2_302_43646_20131204_185430_inLine +BABEL_OP2_302_43646_20131204_185430_outLine +BABEL_OP2_302_43920_20140312_031242_inLine +BABEL_OP2_302_43920_20140312_031242_outLine +BABEL_OP2_302_44619_20131212_234348_inLine +BABEL_OP2_302_44619_20131212_234348_outLine +BABEL_OP2_302_46763_20140225_183302_inLine +BABEL_OP2_302_46763_20140225_183302_outLine +BABEL_OP2_302_48243_20131128_221311_inLine +BABEL_OP2_302_48243_20131128_221311_outLine +BABEL_OP2_302_49912_20140217_201647_inLine +BABEL_OP2_302_49912_20140217_201647_outLine +BABEL_OP2_302_50779_20131219_172746_inLine +BABEL_OP2_302_50779_20131219_172746_outLine +BABEL_OP2_302_53492_20140124_221354_inLine +BABEL_OP2_302_53492_20140124_221354_outLine +BABEL_OP2_302_53492_20140124_231722_inLine +BABEL_OP2_302_53492_20140124_231722_outLine +BABEL_OP2_302_56306_20140115_190808_inLine +BABEL_OP2_302_56306_20140115_190808_outLine +BABEL_OP2_302_58850_20131209_231304_inLine +BABEL_OP2_302_58850_20131209_231304_outLine +BABEL_OP2_302_61888_20140127_161005_inLine +BABEL_OP2_302_61888_20140127_161005_outLine +BABEL_OP2_302_70386_20131203_030837_inLine +BABEL_OP2_302_70386_20131203_030837_outLine +BABEL_OP2_302_70452_20131219_032729_inLine +BABEL_OP2_302_70452_20131219_032729_outLine +BABEL_OP2_302_71038_20140119_172132_inLine +BABEL_OP2_302_71038_20140119_172132_outLine +BABEL_OP2_302_71067_20140130_194954_inLine +BABEL_OP2_302_71067_20140130_194954_outLine +BABEL_OP2_302_75223_20131130_211714_inLine +BABEL_OP2_302_75223_20131130_211714_outLine +BABEL_OP2_302_75223_20131130_212825_inLine +BABEL_OP2_302_75223_20131130_212825_outLine +BABEL_OP2_302_77126_20131111_012344_inLine +BABEL_OP2_302_77126_20131111_012344_outLine +BABEL_OP2_302_77242_20140217_184823_inLine +BABEL_OP2_302_77242_20140217_184823_outLine +BABEL_OP2_302_79898_20140310_200258_inLine +BABEL_OP2_302_79898_20140310_200258_outLine +BABEL_OP2_302_80781_20131207_183741_inLine +BABEL_OP2_302_80781_20131207_183741_outLine +BABEL_OP2_302_81213_20131118_175514_inLine +BABEL_OP2_302_81213_20131118_175514_outLine +BABEL_OP2_302_82138_20131206_045140_inLine +BABEL_OP2_302_82138_20131206_045140_outLine +BABEL_OP2_302_82145_20140301_225354_inLine +BABEL_OP2_302_82145_20140301_225354_outLine +BABEL_OP2_302_82224_20140203_014024_inLine +BABEL_OP2_302_82224_20140203_014024_outLine +BABEL_OP2_302_83436_20131106_170059_inLine +BABEL_OP2_302_83436_20131106_170059_outLine +BABEL_OP2_302_84408_20131207_204020_inLine +BABEL_OP2_302_84408_20131207_204020_outLine +BABEL_OP2_302_85010_20140316_222754_inLine +BABEL_OP2_302_85010_20140316_222754_outLine +BABEL_OP2_302_87298_20140130_191447_inLine +BABEL_OP2_302_87298_20140130_191447_outLine +BABEL_OP2_302_87693_20131121_041057_inLine +BABEL_OP2_302_87693_20131121_041057_outLine +BABEL_OP2_302_94803_20140313_225823_inLine +BABEL_OP2_302_94803_20140313_225823_outLine +BABEL_OP2_302_95598_20131101_172634_inLine +BABEL_OP2_302_95598_20131101_172634_outLine +BABEL_OP2_302_95598_20131101_175037_inLine +BABEL_OP2_302_95598_20131101_175037_outLine +BABEL_OP2_302_95903_20140303_002203_inLine +BABEL_OP2_302_95903_20140303_002203_outLine +BABEL_OP2_302_97731_20140114_201001_inLine +BABEL_OP2_302_97731_20140114_201001_outLine +BABEL_OP2_302_97772_20131107_223232_inLine +BABEL_OP2_302_97772_20131107_223232_outLine +BABEL_OP2_302_98489_20131204_181216_inLine +BABEL_OP2_302_98489_20131204_181216_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.untranscribed.list new file mode 100644 index 00000000000..668576c2888 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.untranscribed.list @@ -0,0 +1,398 @@ +BABEL_OP2_302_10036_20131223_231808_inLine +BABEL_OP2_302_10036_20131223_231808_outLine +BABEL_OP2_302_10313_20140319_000910_inLine +BABEL_OP2_302_10313_20140319_000910_outLine +BABEL_OP2_302_10938_20140110_231500_inLine +BABEL_OP2_302_10938_20140110_231500_outLine +BABEL_OP2_302_10966_20131201_171745_inLine +BABEL_OP2_302_10966_20131201_171745_outLine +BABEL_OP2_302_11486_20140327_014542_inLine +BABEL_OP2_302_11486_20140327_014542_outLine +BABEL_OP2_302_11528_20140313_172050_inLine +BABEL_OP2_302_11528_20140313_172050_outLine +BABEL_OP2_302_11581_20131224_173459_inLine +BABEL_OP2_302_11581_20131224_173459_outLine +BABEL_OP2_302_11797_20131123_210739_inLine +BABEL_OP2_302_11797_20131123_210739_outLine +BABEL_OP2_302_12220_20131208_170136_inLine +BABEL_OP2_302_12220_20131208_170136_outLine +BABEL_OP2_302_12606_20140203_201955_inLine +BABEL_OP2_302_12606_20140203_201955_outLine +BABEL_OP2_302_12609_20140213_010711_inLine +BABEL_OP2_302_12609_20140213_010711_outLine +BABEL_OP2_302_12767_20131109_202610_inLine +BABEL_OP2_302_12767_20131109_202610_outLine +BABEL_OP2_302_12846_20140216_173021_inLine +BABEL_OP2_302_12846_20140216_173021_outLine +BABEL_OP2_302_12851_20131030_220616_inLine +BABEL_OP2_302_12851_20131030_220616_outLine +BABEL_OP2_302_13664_20131030_032900_inLine +BABEL_OP2_302_13664_20131030_032900_outLine +BABEL_OP2_302_13709_20140126_163818_inLine +BABEL_OP2_302_13709_20140126_163818_outLine +BABEL_OP2_302_14725_20131106_204535_inLine +BABEL_OP2_302_14725_20131106_204535_outLine +BABEL_OP2_302_14807_20131220_203507_inLine +BABEL_OP2_302_14807_20131220_203507_outLine +BABEL_OP2_302_14814_20131206_165156_inLine +BABEL_OP2_302_14814_20131206_165156_outLine +BABEL_OP2_302_14899_20131101_223556_inLine +BABEL_OP2_302_14899_20131101_223556_outLine +BABEL_OP2_302_14972_20131220_203939_inLine +BABEL_OP2_302_14972_20131220_203939_outLine +BABEL_OP2_302_15535_20131227_221937_inLine +BABEL_OP2_302_15535_20131227_221937_outLine +BABEL_OP2_302_15617_20140312_215158_inLine +BABEL_OP2_302_15617_20140312_215158_outLine +BABEL_OP2_302_15730_20131121_044516_inLine +BABEL_OP2_302_15730_20131121_044516_outLine +BABEL_OP2_302_16839_20140203_151410_inLine +BABEL_OP2_302_16839_20140203_151410_outLine +BABEL_OP2_302_17032_20140108_211239_inLine +BABEL_OP2_302_17032_20140108_211239_outLine +BABEL_OP2_302_17097_20140310_234246_inLine +BABEL_OP2_302_17097_20140310_234246_outLine +BABEL_OP2_302_17280_20131214_140641_inLine +BABEL_OP2_302_17280_20131214_140641_outLine +BABEL_OP2_302_17320_20140203_165125_inLine +BABEL_OP2_302_17320_20140203_165125_outLine +BABEL_OP2_302_18078_20140219_195739_inLine +BABEL_OP2_302_18078_20140219_195739_outLine +BABEL_OP2_302_18242_20140219_185647_inLine +BABEL_OP2_302_18242_20140219_185647_outLine +BABEL_OP2_302_19773_20140113_201049_inLine +BABEL_OP2_302_19773_20140113_201049_outLine +BABEL_OP2_302_20133_20131225_003913_inLine +BABEL_OP2_302_20133_20131225_003913_outLine +BABEL_OP2_302_20367_20140220_000514_inLine +BABEL_OP2_302_20367_20140220_000514_outLine +BABEL_OP2_302_20437_20140317_015757_inLine +BABEL_OP2_302_20437_20140317_015757_outLine +BABEL_OP2_302_20916_20131031_232512_inLine +BABEL_OP2_302_20916_20131031_232512_outLine +BABEL_OP2_302_20922_20140115_174224_inLine +BABEL_OP2_302_20922_20140115_174224_outLine +BABEL_OP2_302_20972_20140301_200910_inLine +BABEL_OP2_302_20972_20140301_200910_outLine +BABEL_OP2_302_20985_20131227_225613_inLine +BABEL_OP2_302_20985_20131227_225613_outLine +BABEL_OP2_302_20985_20131227_230755_inLine +BABEL_OP2_302_20985_20131227_230755_outLine +BABEL_OP2_302_21206_20131209_212818_inLine +BABEL_OP2_302_21206_20131209_212818_outLine +BABEL_OP2_302_21206_20131209_214221_inLine +BABEL_OP2_302_21206_20131209_214221_outLine +BABEL_OP2_302_21435_20140201_181751_inLine +BABEL_OP2_302_21435_20140201_181751_outLine +BABEL_OP2_302_21624_20140302_191929_inLine +BABEL_OP2_302_21624_20140302_191929_outLine +BABEL_OP2_302_21807_20131215_163416_inLine +BABEL_OP2_302_21807_20131215_163416_outLine +BABEL_OP2_302_22321_20131204_001445_inLine +BABEL_OP2_302_22321_20131204_001445_outLine +BABEL_OP2_302_22321_20131204_002854_inLine +BABEL_OP2_302_22321_20131204_002854_outLine +BABEL_OP2_302_22446_20131107_221527_inLine +BABEL_OP2_302_22446_20131107_221527_outLine +BABEL_OP2_302_23239_20131227_213345_inLine +BABEL_OP2_302_23239_20131227_213345_outLine +BABEL_OP2_302_23239_20131227_214733_inLine +BABEL_OP2_302_23239_20131227_214733_outLine +BABEL_OP2_302_23980_20131206_213027_inLine +BABEL_OP2_302_23980_20131206_213027_outLine +BABEL_OP2_302_24239_20140314_185042_inLine +BABEL_OP2_302_24239_20140314_185042_outLine +BABEL_OP2_302_24241_20140312_211507_inLine +BABEL_OP2_302_24241_20140312_211507_outLine +BABEL_OP2_302_24270_20131218_184807_inLine +BABEL_OP2_302_24270_20131218_184807_outLine +BABEL_OP2_302_24586_20140301_162559_inLine +BABEL_OP2_302_24586_20140301_162559_outLine +BABEL_OP2_302_24605_20131109_160432_inLine +BABEL_OP2_302_24605_20131109_160432_outLine +BABEL_OP2_302_24648_20140313_194015_inLine +BABEL_OP2_302_24648_20140313_194015_outLine +BABEL_OP2_302_24679_20131101_171953_inLine +BABEL_OP2_302_24679_20131101_171953_outLine +BABEL_OP2_302_24679_20131101_173035_inLine +BABEL_OP2_302_24679_20131101_173035_outLine +BABEL_OP2_302_24982_20131128_202029_inLine +BABEL_OP2_302_24982_20131128_202029_outLine +BABEL_OP2_302_24982_20131128_202537_inLine +BABEL_OP2_302_24982_20131128_202537_outLine +BABEL_OP2_302_24982_20131128_203436_inLine +BABEL_OP2_302_24982_20131128_203436_outLine +BABEL_OP2_302_25496_20140228_212748_inLine +BABEL_OP2_302_25496_20140228_212748_outLine +BABEL_OP2_302_25767_20131108_203252_inLine +BABEL_OP2_302_25767_20131108_203252_outLine +BABEL_OP2_302_25767_20131108_205755_inLine +BABEL_OP2_302_25767_20131108_205755_outLine +BABEL_OP2_302_25961_20131122_214450_inLine +BABEL_OP2_302_25961_20131122_214450_outLine +BABEL_OP2_302_26388_20140203_173156_inLine +BABEL_OP2_302_26388_20140203_173156_outLine +BABEL_OP2_302_26836_20131207_194346_inLine +BABEL_OP2_302_26836_20131207_194346_outLine +BABEL_OP2_302_27367_20140317_000858_inLine +BABEL_OP2_302_27367_20140317_000858_outLine +BABEL_OP2_302_28012_20140115_155940_inLine +BABEL_OP2_302_28012_20140115_155940_outLine +BABEL_OP2_302_28477_20140127_173004_inLine +BABEL_OP2_302_28477_20140127_173004_outLine +BABEL_OP2_302_28595_20140312_200036_inLine +BABEL_OP2_302_28595_20140312_200036_outLine +BABEL_OP2_302_28814_20140115_202820_inLine +BABEL_OP2_302_28814_20140115_202820_outLine +BABEL_OP2_302_29072_20131224_215008_inLine +BABEL_OP2_302_29072_20131224_215008_outLine +BABEL_OP2_302_29439_20140226_160155_inLine +BABEL_OP2_302_29439_20140226_160155_outLine +BABEL_OP2_302_30013_20140111_202103_inLine +BABEL_OP2_302_30013_20140111_202103_outLine +BABEL_OP2_302_30345_20140113_154634_inLine +BABEL_OP2_302_30345_20140113_154634_outLine +BABEL_OP2_302_30461_20140305_205327_inLine +BABEL_OP2_302_30461_20140305_205327_outLine +BABEL_OP2_302_30720_20140312_002038_inLine +BABEL_OP2_302_30720_20140312_002038_outLine +BABEL_OP2_302_31267_20140221_194733_inLine +BABEL_OP2_302_31267_20140221_194733_outLine +BABEL_OP2_302_32097_20131106_232714_inLine +BABEL_OP2_302_32097_20131106_232714_outLine +BABEL_OP2_302_32097_20131106_233937_inLine +BABEL_OP2_302_32097_20131106_233937_outLine +BABEL_OP2_302_34106_20131118_201548_inLine +BABEL_OP2_302_34106_20131118_201548_outLine +BABEL_OP2_302_34486_20140313_003302_inLine +BABEL_OP2_302_34486_20140313_003302_outLine +BABEL_OP2_302_34811_20131210_202739_inLine +BABEL_OP2_302_34811_20131210_202739_outLine +BABEL_OP2_302_34826_20140127_205243_inLine +BABEL_OP2_302_34826_20140127_205243_outLine +BABEL_OP2_302_35000_20140126_011711_inLine +BABEL_OP2_302_35000_20140126_011711_outLine +BABEL_OP2_302_35139_20131117_174341_inLine +BABEL_OP2_302_35139_20131117_174341_outLine +BABEL_OP2_302_36894_20131113_172242_inLine +BABEL_OP2_302_36894_20131113_172242_outLine +BABEL_OP2_302_37271_20131228_201109_inLine +BABEL_OP2_302_37271_20131228_201109_outLine +BABEL_OP2_302_37682_20131218_170241_inLine +BABEL_OP2_302_37682_20131218_170241_outLine +BABEL_OP2_302_39006_20140220_200207_inLine +BABEL_OP2_302_39006_20140220_200207_outLine +BABEL_OP2_302_39555_20140110_211809_inLine +BABEL_OP2_302_39555_20140110_211809_outLine +BABEL_OP2_302_39848_20131210_214951_inLine +BABEL_OP2_302_39848_20131210_214951_outLine +BABEL_OP2_302_41680_20131031_034941_inLine +BABEL_OP2_302_41680_20131031_034941_outLine +BABEL_OP2_302_42526_20140119_151507_inLine +BABEL_OP2_302_42526_20140119_151507_outLine +BABEL_OP2_302_42771_20131210_163330_inLine +BABEL_OP2_302_42771_20131210_163330_outLine +BABEL_OP2_302_43286_20131105_180949_inLine +BABEL_OP2_302_43286_20131105_180949_outLine +BABEL_OP2_302_43784_20131128_211932_inLine +BABEL_OP2_302_43784_20131128_211932_outLine +BABEL_OP2_302_43788_20131225_201206_inLine +BABEL_OP2_302_43788_20131225_201206_outLine +BABEL_OP2_302_44961_20131111_223956_inLine +BABEL_OP2_302_44961_20131111_223956_outLine +BABEL_OP2_302_45770_20131107_181651_inLine +BABEL_OP2_302_45770_20131107_181651_outLine +BABEL_OP2_302_46268_20131113_230054_inLine +BABEL_OP2_302_46268_20131113_230054_outLine +BABEL_OP2_302_46558_20131204_011205_inLine +BABEL_OP2_302_46558_20131204_011205_outLine +BABEL_OP2_302_46688_20131114_195414_inLine +BABEL_OP2_302_46688_20131114_195414_outLine +BABEL_OP2_302_46757_20131227_210756_inLine +BABEL_OP2_302_46757_20131227_210756_outLine +BABEL_OP2_302_46976_20131212_235226_inLine +BABEL_OP2_302_46976_20131212_235226_outLine +BABEL_OP2_302_47866_20140122_200544_inLine +BABEL_OP2_302_47866_20140122_200544_outLine +BABEL_OP2_302_48610_20131112_232839_inLine +BABEL_OP2_302_48610_20131112_232839_outLine +BABEL_OP2_302_50549_20140130_172816_inLine +BABEL_OP2_302_50549_20140130_172816_outLine +BABEL_OP2_302_50810_20131031_180316_inLine +BABEL_OP2_302_50810_20131031_180316_outLine +BABEL_OP2_302_51955_20131117_200909_inLine +BABEL_OP2_302_51955_20131117_200909_outLine +BABEL_OP2_302_52272_20131106_175931_inLine +BABEL_OP2_302_52272_20131106_175931_outLine +BABEL_OP2_302_52381_20140118_163935_inLine +BABEL_OP2_302_52381_20140118_163935_outLine +BABEL_OP2_302_52404_20131226_172656_inLine +BABEL_OP2_302_52404_20131226_172656_outLine +BABEL_OP2_302_53063_20140219_175252_inLine +BABEL_OP2_302_53063_20140219_175252_outLine +BABEL_OP2_302_54074_20131213_143818_inLine +BABEL_OP2_302_54074_20131213_143818_outLine +BABEL_OP2_302_54104_20131108_172927_inLine +BABEL_OP2_302_54104_20131108_172927_outLine +BABEL_OP2_302_54697_20140125_210815_inLine +BABEL_OP2_302_54697_20140125_210815_outLine +BABEL_OP2_302_54953_20131207_170139_inLine +BABEL_OP2_302_54953_20131207_170139_outLine +BABEL_OP2_302_55106_20140226_210229_inLine +BABEL_OP2_302_55106_20140226_210229_outLine +BABEL_OP2_302_57065_20140105_155451_inLine +BABEL_OP2_302_57065_20140105_155451_outLine +BABEL_OP2_302_57548_20131220_025554_inLine +BABEL_OP2_302_57548_20131220_025554_outLine +BABEL_OP2_302_57566_20140129_021108_inLine +BABEL_OP2_302_57566_20140129_021108_outLine +BABEL_OP2_302_58926_20131121_050015_inLine +BABEL_OP2_302_58926_20131121_050015_outLine +BABEL_OP2_302_59509_20140203_215611_inLine +BABEL_OP2_302_59509_20140203_215611_outLine +BABEL_OP2_302_59635_20140124_183551_inLine +BABEL_OP2_302_59635_20140124_183551_outLine +BABEL_OP2_302_59720_20131206_145023_inLine +BABEL_OP2_302_59720_20131206_145023_outLine +BABEL_OP2_302_59747_20131104_164054_inLine +BABEL_OP2_302_59747_20131104_164054_outLine +BABEL_OP2_302_60299_20140220_162147_inLine +BABEL_OP2_302_60299_20140220_162147_outLine +BABEL_OP2_302_63307_20131213_180556_inLine +BABEL_OP2_302_63307_20131213_180556_outLine +BABEL_OP2_302_63999_20140214_214838_inLine +BABEL_OP2_302_63999_20140214_214838_outLine +BABEL_OP2_302_64870_20131226_033837_inLine +BABEL_OP2_302_64870_20131226_033837_outLine +BABEL_OP2_302_66001_20131114_212059_inLine +BABEL_OP2_302_66001_20131114_212059_outLine +BABEL_OP2_302_66822_20131207_210025_inLine +BABEL_OP2_302_66822_20131207_210025_outLine +BABEL_OP2_302_67401_20131220_213115_inLine +BABEL_OP2_302_67401_20131220_213115_outLine +BABEL_OP2_302_67622_20131109_180009_inLine +BABEL_OP2_302_67622_20131109_180009_outLine +BABEL_OP2_302_67659_20131130_221743_inLine +BABEL_OP2_302_67659_20131130_221743_outLine +BABEL_OP2_302_67773_20140318_210730_inLine +BABEL_OP2_302_67773_20140318_210730_outLine +BABEL_OP2_302_68244_20140131_200512_inLine +BABEL_OP2_302_68244_20140131_200512_outLine +BABEL_OP2_302_68289_20140126_231552_inLine +BABEL_OP2_302_68289_20140126_231552_outLine +BABEL_OP2_302_68924_20140130_230433_inLine +BABEL_OP2_302_68924_20140130_230433_outLine +BABEL_OP2_302_69578_20131207_134320_inLine +BABEL_OP2_302_69578_20131207_134320_outLine +BABEL_OP2_302_69746_20140118_173307_inLine +BABEL_OP2_302_69746_20140118_173307_outLine +BABEL_OP2_302_70121_20131209_204053_inLine +BABEL_OP2_302_70121_20131209_204053_outLine +BABEL_OP2_302_70251_20131109_191513_inLine +BABEL_OP2_302_70251_20131109_191513_outLine +BABEL_OP2_302_70460_20140222_223545_inLine +BABEL_OP2_302_70460_20140222_223545_outLine +BABEL_OP2_302_70713_20140126_171356_inLine +BABEL_OP2_302_70713_20140126_171356_outLine +BABEL_OP2_302_70794_20131107_185831_inLine +BABEL_OP2_302_70794_20131107_185831_outLine +BABEL_OP2_302_72952_20140215_000239_inLine +BABEL_OP2_302_72952_20140215_000239_outLine +BABEL_OP2_302_73518_20140123_190347_inLine +BABEL_OP2_302_73518_20140123_190347_outLine +BABEL_OP2_302_74921_20131225_190044_inLine +BABEL_OP2_302_74921_20131225_190044_outLine +BABEL_OP2_302_75505_20131104_234450_inLine +BABEL_OP2_302_75505_20131104_234450_outLine +BABEL_OP2_302_75505_20131104_235752_inLine +BABEL_OP2_302_75505_20131104_235752_outLine +BABEL_OP2_302_76756_20131219_183439_inLine +BABEL_OP2_302_76756_20131219_183439_outLine +BABEL_OP2_302_77033_20140127_183412_inLine +BABEL_OP2_302_77033_20140127_183412_outLine +BABEL_OP2_302_77990_20131117_222127_inLine +BABEL_OP2_302_77990_20131117_222127_outLine +BABEL_OP2_302_78398_20131116_213051_inLine +BABEL_OP2_302_78398_20131116_213051_outLine +BABEL_OP2_302_78943_20131206_035541_inLine +BABEL_OP2_302_78943_20131206_035541_outLine +BABEL_OP2_302_78943_20131206_042746_inLine +BABEL_OP2_302_78943_20131206_042746_outLine +BABEL_OP2_302_79995_20140125_225240_inLine +BABEL_OP2_302_79995_20140125_225240_outLine +BABEL_OP2_302_80306_20131206_235538_inLine +BABEL_OP2_302_80306_20131206_235538_outLine +BABEL_OP2_302_81287_20140115_190105_inLine +BABEL_OP2_302_81287_20140115_190105_outLine +BABEL_OP2_302_81671_20140131_220121_inLine +BABEL_OP2_302_81671_20140131_220121_outLine +BABEL_OP2_302_82089_20131206_191938_inLine +BABEL_OP2_302_82089_20131206_191938_outLine +BABEL_OP2_302_82425_20131115_005742_inLine +BABEL_OP2_302_82425_20131115_005742_outLine +BABEL_OP2_302_82935_20140114_204802_inLine +BABEL_OP2_302_82935_20140114_204802_outLine +BABEL_OP2_302_83430_20140315_203750_inLine +BABEL_OP2_302_83430_20140315_203750_outLine +BABEL_OP2_302_83455_20131208_201956_inLine +BABEL_OP2_302_83455_20131208_201956_outLine +BABEL_OP2_302_84469_20140107_205046_inLine +BABEL_OP2_302_84469_20140107_205046_outLine +BABEL_OP2_302_84715_20140127_201518_inLine +BABEL_OP2_302_84715_20140127_201518_outLine +BABEL_OP2_302_84936_20140108_204108_inLine +BABEL_OP2_302_84936_20140108_204108_outLine +BABEL_OP2_302_86628_20140215_171431_inLine +BABEL_OP2_302_86628_20140215_171431_outLine +BABEL_OP2_302_86715_20140215_174540_inLine +BABEL_OP2_302_86715_20140215_174540_outLine +BABEL_OP2_302_87305_20140214_225515_inLine +BABEL_OP2_302_87305_20140214_225515_outLine +BABEL_OP2_302_87777_20140127_145958_inLine +BABEL_OP2_302_87777_20140127_145958_outLine +BABEL_OP2_302_88661_20131225_211835_inLine +BABEL_OP2_302_88661_20131225_211835_outLine +BABEL_OP2_302_88938_20140202_215623_inLine +BABEL_OP2_302_88938_20140202_215623_outLine +BABEL_OP2_302_89059_20140115_214308_inLine +BABEL_OP2_302_89059_20140115_214308_outLine +BABEL_OP2_302_90709_20131111_143953_inLine +BABEL_OP2_302_90709_20131111_143953_outLine +BABEL_OP2_302_92557_20140125_230505_inLine +BABEL_OP2_302_92557_20140125_230505_outLine +BABEL_OP2_302_92736_20140119_170328_inLine +BABEL_OP2_302_92736_20140119_170328_outLine +BABEL_OP2_302_92809_20131109_182045_inLine +BABEL_OP2_302_92809_20131109_182045_outLine +BABEL_OP2_302_92942_20131219_014744_inLine +BABEL_OP2_302_92942_20131219_014744_outLine +BABEL_OP2_302_93632_20140203_154221_inLine +BABEL_OP2_302_93632_20140203_154221_outLine +BABEL_OP2_302_93964_20131208_002934_inLine +BABEL_OP2_302_93964_20131208_002934_outLine +BABEL_OP2_302_94253_20131114_215945_inLine +BABEL_OP2_302_94253_20131114_215945_outLine +BABEL_OP2_302_94449_20140314_185933_inLine +BABEL_OP2_302_94449_20140314_185933_outLine +BABEL_OP2_302_95670_20131130_185901_inLine +BABEL_OP2_302_95670_20131130_185901_outLine +BABEL_OP2_302_96525_20140110_201817_inLine +BABEL_OP2_302_96525_20140110_201817_outLine +BABEL_OP2_302_96690_20131204_221739_inLine +BABEL_OP2_302_96690_20131204_221739_outLine +BABEL_OP2_302_96910_20140130_210316_inLine +BABEL_OP2_302_96910_20140130_210316_outLine +BABEL_OP2_302_97461_20131211_211339_inLine +BABEL_OP2_302_97461_20131211_211339_outLine +BABEL_OP2_302_97557_20131219_192714_inLine +BABEL_OP2_302_97557_20131219_192714_outLine +BABEL_OP2_302_97588_20131101_161834_inLine +BABEL_OP2_302_97588_20131101_161834_outLine +BABEL_OP2_302_97588_20131101_163947_inLine +BABEL_OP2_302_97588_20131101_163947_outLine +BABEL_OP2_302_98909_20131117_153948_inLine +BABEL_OP2_302_98909_20131117_153948_outLine +BABEL_OP2_302_99401_20131114_221114_inLine +BABEL_OP2_302_99401_20131114_221114_outLine +BABEL_OP2_302_99887_20140129_162421_inLine +BABEL_OP2_302_99887_20140129_162421_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/training.list b/egs/babel/s5d/conf/lists/302-kazakh/training.list new file mode 100644 index 00000000000..c2026850026 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/training.list @@ -0,0 +1,528 @@ +BABEL_OP2_302_10036_20131223_231808_inLine +BABEL_OP2_302_10036_20131223_231808_outLine +BABEL_OP2_302_10313_20140319_000910_inLine +BABEL_OP2_302_10313_20140319_000910_outLine +BABEL_OP2_302_10938_20140110_231500_inLine +BABEL_OP2_302_10938_20140110_231500_outLine +BABEL_OP2_302_10966_20131201_171745_inLine +BABEL_OP2_302_10966_20131201_171745_outLine +BABEL_OP2_302_11486_20140327_014542_inLine +BABEL_OP2_302_11486_20140327_014542_outLine +BABEL_OP2_302_11528_20140313_172050_inLine +BABEL_OP2_302_11528_20140313_172050_outLine +BABEL_OP2_302_11581_20131224_173459_inLine +BABEL_OP2_302_11581_20131224_173459_outLine +BABEL_OP2_302_11797_20131123_210739_inLine +BABEL_OP2_302_11797_20131123_210739_outLine +BABEL_OP2_302_12220_20131208_170136_inLine +BABEL_OP2_302_12220_20131208_170136_outLine +BABEL_OP2_302_12606_20140203_201955_inLine +BABEL_OP2_302_12606_20140203_201955_outLine +BABEL_OP2_302_12609_20140213_010711_inLine +BABEL_OP2_302_12609_20140213_010711_outLine +BABEL_OP2_302_12767_20131109_202610_inLine +BABEL_OP2_302_12767_20131109_202610_outLine +BABEL_OP2_302_12846_20140216_173021_inLine +BABEL_OP2_302_12846_20140216_173021_outLine +BABEL_OP2_302_12851_20131030_220616_inLine +BABEL_OP2_302_12851_20131030_220616_outLine +BABEL_OP2_302_13483_20140111_145619_inLine +BABEL_OP2_302_13483_20140111_145619_outLine +BABEL_OP2_302_13664_20131030_032900_inLine +BABEL_OP2_302_13664_20131030_032900_outLine +BABEL_OP2_302_13709_20140126_163818_inLine +BABEL_OP2_302_13709_20140126_163818_outLine +BABEL_OP2_302_13792_20131105_160713_inLine +BABEL_OP2_302_13792_20131105_160713_outLine +BABEL_OP2_302_14137_20131205_201718_inLine +BABEL_OP2_302_14137_20131205_201718_outLine +BABEL_OP2_302_14725_20131106_204535_inLine +BABEL_OP2_302_14725_20131106_204535_outLine +BABEL_OP2_302_14807_20131220_203507_inLine +BABEL_OP2_302_14807_20131220_203507_outLine +BABEL_OP2_302_14814_20131206_165156_inLine +BABEL_OP2_302_14814_20131206_165156_outLine +BABEL_OP2_302_14899_20131101_223556_inLine +BABEL_OP2_302_14899_20131101_223556_outLine +BABEL_OP2_302_14972_20131220_203939_inLine +BABEL_OP2_302_14972_20131220_203939_outLine +BABEL_OP2_302_15535_20131227_221937_inLine +BABEL_OP2_302_15535_20131227_221937_outLine +BABEL_OP2_302_15617_20140312_215158_inLine +BABEL_OP2_302_15617_20140312_215158_outLine +BABEL_OP2_302_15638_20131227_190456_inLine +BABEL_OP2_302_15638_20131227_190456_outLine +BABEL_OP2_302_15730_20131121_044516_inLine +BABEL_OP2_302_15730_20131121_044516_outLine +BABEL_OP2_302_16467_20140125_193127_inLine +BABEL_OP2_302_16467_20140125_193127_outLine +BABEL_OP2_302_16839_20140203_151410_inLine +BABEL_OP2_302_16839_20140203_151410_outLine +BABEL_OP2_302_16886_20131209_211339_inLine +BABEL_OP2_302_16886_20131209_211339_outLine +BABEL_OP2_302_17032_20140108_211239_inLine +BABEL_OP2_302_17032_20140108_211239_outLine +BABEL_OP2_302_17097_20140310_234246_inLine +BABEL_OP2_302_17097_20140310_234246_outLine +BABEL_OP2_302_17113_20140216_165407_inLine +BABEL_OP2_302_17113_20140216_165407_outLine +BABEL_OP2_302_17280_20131214_140641_inLine +BABEL_OP2_302_17280_20131214_140641_outLine +BABEL_OP2_302_17320_20140203_165125_inLine +BABEL_OP2_302_17320_20140203_165125_outLine +BABEL_OP2_302_17567_20131227_223417_inLine +BABEL_OP2_302_17567_20131227_223417_outLine +BABEL_OP2_302_18078_20140219_195739_inLine +BABEL_OP2_302_18078_20140219_195739_outLine +BABEL_OP2_302_18118_20140312_010735_inLine +BABEL_OP2_302_18118_20140312_010735_outLine +BABEL_OP2_302_18242_20140219_185647_inLine +BABEL_OP2_302_18242_20140219_185647_outLine +BABEL_OP2_302_19722_20131106_001542_inLine +BABEL_OP2_302_19722_20131106_001542_outLine +BABEL_OP2_302_19773_20140113_201049_inLine +BABEL_OP2_302_19773_20140113_201049_outLine +BABEL_OP2_302_20133_20131225_003913_inLine +BABEL_OP2_302_20133_20131225_003913_outLine +BABEL_OP2_302_20367_20140220_000514_inLine +BABEL_OP2_302_20367_20140220_000514_outLine +BABEL_OP2_302_20437_20140317_015757_inLine +BABEL_OP2_302_20437_20140317_015757_outLine +BABEL_OP2_302_20916_20131031_232512_inLine +BABEL_OP2_302_20916_20131031_232512_outLine +BABEL_OP2_302_20922_20140115_174224_inLine +BABEL_OP2_302_20922_20140115_174224_outLine +BABEL_OP2_302_20972_20140301_200910_inLine +BABEL_OP2_302_20972_20140301_200910_outLine +BABEL_OP2_302_20985_20131227_225613_inLine +BABEL_OP2_302_20985_20131227_225613_outLine +BABEL_OP2_302_20985_20131227_230755_inLine +BABEL_OP2_302_20985_20131227_230755_outLine +BABEL_OP2_302_21206_20131209_212818_inLine +BABEL_OP2_302_21206_20131209_212818_outLine +BABEL_OP2_302_21206_20131209_214221_inLine +BABEL_OP2_302_21206_20131209_214221_outLine +BABEL_OP2_302_21435_20140201_181751_inLine +BABEL_OP2_302_21435_20140201_181751_outLine +BABEL_OP2_302_21624_20140302_191929_inLine +BABEL_OP2_302_21624_20140302_191929_outLine +BABEL_OP2_302_21807_20131215_163416_inLine +BABEL_OP2_302_21807_20131215_163416_outLine +BABEL_OP2_302_22280_20131214_220249_inLine +BABEL_OP2_302_22280_20131214_220249_outLine +BABEL_OP2_302_22321_20131204_001445_inLine +BABEL_OP2_302_22321_20131204_001445_outLine +BABEL_OP2_302_22321_20131204_002854_inLine +BABEL_OP2_302_22321_20131204_002854_outLine +BABEL_OP2_302_22446_20131107_221527_inLine +BABEL_OP2_302_22446_20131107_221527_outLine +BABEL_OP2_302_23239_20131227_213345_inLine +BABEL_OP2_302_23239_20131227_213345_outLine +BABEL_OP2_302_23239_20131227_214733_inLine +BABEL_OP2_302_23239_20131227_214733_outLine +BABEL_OP2_302_23505_20131113_214234_inLine +BABEL_OP2_302_23505_20131113_214234_outLine +BABEL_OP2_302_23505_20131113_215736_inLine +BABEL_OP2_302_23505_20131113_215736_outLine +BABEL_OP2_302_23980_20131206_213027_inLine +BABEL_OP2_302_23980_20131206_213027_outLine +BABEL_OP2_302_24239_20140314_185042_inLine +BABEL_OP2_302_24239_20140314_185042_outLine +BABEL_OP2_302_24241_20140312_211507_inLine +BABEL_OP2_302_24241_20140312_211507_outLine +BABEL_OP2_302_24270_20131218_184807_inLine +BABEL_OP2_302_24270_20131218_184807_outLine +BABEL_OP2_302_24323_20131207_212641_inLine +BABEL_OP2_302_24323_20131207_212641_outLine +BABEL_OP2_302_24586_20140301_162559_inLine +BABEL_OP2_302_24586_20140301_162559_outLine +BABEL_OP2_302_24605_20131109_160432_inLine +BABEL_OP2_302_24605_20131109_160432_outLine +BABEL_OP2_302_24648_20140313_194015_inLine +BABEL_OP2_302_24648_20140313_194015_outLine +BABEL_OP2_302_24679_20131101_171953_inLine +BABEL_OP2_302_24679_20131101_171953_outLine +BABEL_OP2_302_24679_20131101_173035_inLine +BABEL_OP2_302_24679_20131101_173035_outLine +BABEL_OP2_302_24982_20131128_202029_inLine +BABEL_OP2_302_24982_20131128_202029_outLine +BABEL_OP2_302_24982_20131128_202537_inLine +BABEL_OP2_302_24982_20131128_202537_outLine +BABEL_OP2_302_24982_20131128_203436_inLine +BABEL_OP2_302_24982_20131128_203436_outLine +BABEL_OP2_302_25085_20140216_161934_inLine +BABEL_OP2_302_25085_20140216_161934_outLine +BABEL_OP2_302_25496_20140228_212748_inLine +BABEL_OP2_302_25496_20140228_212748_outLine +BABEL_OP2_302_25767_20131108_203252_inLine +BABEL_OP2_302_25767_20131108_203252_outLine +BABEL_OP2_302_25767_20131108_205755_inLine +BABEL_OP2_302_25767_20131108_205755_outLine +BABEL_OP2_302_25961_20131122_214450_inLine +BABEL_OP2_302_25961_20131122_214450_outLine +BABEL_OP2_302_26388_20140203_173156_inLine +BABEL_OP2_302_26388_20140203_173156_outLine +BABEL_OP2_302_26836_20131207_194346_inLine +BABEL_OP2_302_26836_20131207_194346_outLine +BABEL_OP2_302_27367_20140317_000858_inLine +BABEL_OP2_302_27367_20140317_000858_outLine +BABEL_OP2_302_28012_20140115_155940_inLine +BABEL_OP2_302_28012_20140115_155940_outLine +BABEL_OP2_302_28477_20140127_173004_inLine +BABEL_OP2_302_28477_20140127_173004_outLine +BABEL_OP2_302_28595_20140312_200036_inLine +BABEL_OP2_302_28595_20140312_200036_outLine +BABEL_OP2_302_28814_20140115_202820_inLine +BABEL_OP2_302_28814_20140115_202820_outLine +BABEL_OP2_302_29072_20131224_215008_inLine +BABEL_OP2_302_29072_20131224_215008_outLine +BABEL_OP2_302_29135_20131031_201509_inLine +BABEL_OP2_302_29135_20131031_201509_outLine +BABEL_OP2_302_29416_20140125_222019_inLine +BABEL_OP2_302_29416_20140125_222019_outLine +BABEL_OP2_302_29439_20140226_160155_inLine +BABEL_OP2_302_29439_20140226_160155_outLine +BABEL_OP2_302_30013_20140111_202103_inLine +BABEL_OP2_302_30013_20140111_202103_outLine +BABEL_OP2_302_30345_20140113_154634_inLine +BABEL_OP2_302_30345_20140113_154634_outLine +BABEL_OP2_302_30461_20140305_205327_inLine +BABEL_OP2_302_30461_20140305_205327_outLine +BABEL_OP2_302_30720_20140312_002038_inLine +BABEL_OP2_302_30720_20140312_002038_outLine +BABEL_OP2_302_31267_20140221_194733_inLine +BABEL_OP2_302_31267_20140221_194733_outLine +BABEL_OP2_302_31490_20131120_230743_inLine +BABEL_OP2_302_31490_20131120_230743_outLine +BABEL_OP2_302_32097_20131106_232714_inLine +BABEL_OP2_302_32097_20131106_232714_outLine +BABEL_OP2_302_32097_20131106_233937_inLine +BABEL_OP2_302_32097_20131106_233937_outLine +BABEL_OP2_302_32287_20140316_185534_inLine +BABEL_OP2_302_32287_20140316_185534_outLine +BABEL_OP2_302_32301_20140108_212650_inLine +BABEL_OP2_302_32301_20140108_212650_outLine +BABEL_OP2_302_34106_20131118_201548_inLine +BABEL_OP2_302_34106_20131118_201548_outLine +BABEL_OP2_302_34197_20131203_173358_inLine +BABEL_OP2_302_34197_20131203_173358_outLine +BABEL_OP2_302_34477_20131205_030548_inLine +BABEL_OP2_302_34477_20131205_030548_outLine +BABEL_OP2_302_34477_20131205_035623_inLine +BABEL_OP2_302_34477_20131205_035623_outLine +BABEL_OP2_302_34486_20140313_003302_inLine +BABEL_OP2_302_34486_20140313_003302_outLine +BABEL_OP2_302_34811_20131210_202739_inLine +BABEL_OP2_302_34811_20131210_202739_outLine +BABEL_OP2_302_34826_20140127_205243_inLine +BABEL_OP2_302_34826_20140127_205243_outLine +BABEL_OP2_302_35000_20140126_011711_inLine +BABEL_OP2_302_35000_20140126_011711_outLine +BABEL_OP2_302_35139_20131117_174341_inLine +BABEL_OP2_302_35139_20131117_174341_outLine +BABEL_OP2_302_36894_20131113_172242_inLine +BABEL_OP2_302_36894_20131113_172242_outLine +BABEL_OP2_302_37271_20131228_201109_inLine +BABEL_OP2_302_37271_20131228_201109_outLine +BABEL_OP2_302_37598_20131218_200535_inLine +BABEL_OP2_302_37598_20131218_200535_outLine +BABEL_OP2_302_37682_20131218_170241_inLine +BABEL_OP2_302_37682_20131218_170241_outLine +BABEL_OP2_302_38588_20131216_211052_inLine +BABEL_OP2_302_38588_20131216_211052_outLine +BABEL_OP2_302_39006_20140220_200207_inLine +BABEL_OP2_302_39006_20140220_200207_outLine +BABEL_OP2_302_39555_20140110_211809_inLine +BABEL_OP2_302_39555_20140110_211809_outLine +BABEL_OP2_302_39744_20131031_182731_inLine +BABEL_OP2_302_39744_20131031_182731_outLine +BABEL_OP2_302_39848_20131210_214951_inLine +BABEL_OP2_302_39848_20131210_214951_outLine +BABEL_OP2_302_41233_20140111_195838_inLine +BABEL_OP2_302_41233_20140111_195838_outLine +BABEL_OP2_302_41680_20131031_034941_inLine +BABEL_OP2_302_41680_20131031_034941_outLine +BABEL_OP2_302_42526_20140119_151507_inLine +BABEL_OP2_302_42526_20140119_151507_outLine +BABEL_OP2_302_42771_20131210_163330_inLine +BABEL_OP2_302_42771_20131210_163330_outLine +BABEL_OP2_302_43286_20131105_180949_inLine +BABEL_OP2_302_43286_20131105_180949_outLine +BABEL_OP2_302_43646_20131204_185430_inLine +BABEL_OP2_302_43646_20131204_185430_outLine +BABEL_OP2_302_43784_20131128_211932_inLine +BABEL_OP2_302_43784_20131128_211932_outLine +BABEL_OP2_302_43788_20131225_201206_inLine +BABEL_OP2_302_43788_20131225_201206_outLine +BABEL_OP2_302_43920_20140312_031242_inLine +BABEL_OP2_302_43920_20140312_031242_outLine +BABEL_OP2_302_44619_20131212_234348_inLine +BABEL_OP2_302_44619_20131212_234348_outLine +BABEL_OP2_302_44961_20131111_223956_inLine +BABEL_OP2_302_44961_20131111_223956_outLine +BABEL_OP2_302_45770_20131107_181651_inLine +BABEL_OP2_302_45770_20131107_181651_outLine +BABEL_OP2_302_46268_20131113_230054_inLine +BABEL_OP2_302_46268_20131113_230054_outLine +BABEL_OP2_302_46558_20131204_011205_inLine +BABEL_OP2_302_46558_20131204_011205_outLine +BABEL_OP2_302_46688_20131114_195414_inLine +BABEL_OP2_302_46688_20131114_195414_outLine +BABEL_OP2_302_46757_20131227_210756_inLine +BABEL_OP2_302_46757_20131227_210756_outLine +BABEL_OP2_302_46763_20140225_183302_inLine +BABEL_OP2_302_46763_20140225_183302_outLine +BABEL_OP2_302_46976_20131212_235226_inLine +BABEL_OP2_302_46976_20131212_235226_outLine +BABEL_OP2_302_47866_20140122_200544_inLine +BABEL_OP2_302_47866_20140122_200544_outLine +BABEL_OP2_302_48243_20131128_221311_inLine +BABEL_OP2_302_48243_20131128_221311_outLine +BABEL_OP2_302_48610_20131112_232839_inLine +BABEL_OP2_302_48610_20131112_232839_outLine +BABEL_OP2_302_49912_20140217_201647_inLine +BABEL_OP2_302_49912_20140217_201647_outLine +BABEL_OP2_302_50549_20140130_172816_inLine +BABEL_OP2_302_50549_20140130_172816_outLine +BABEL_OP2_302_50779_20131219_172746_inLine +BABEL_OP2_302_50779_20131219_172746_outLine +BABEL_OP2_302_50810_20131031_180316_inLine +BABEL_OP2_302_50810_20131031_180316_outLine +BABEL_OP2_302_51955_20131117_200909_inLine +BABEL_OP2_302_51955_20131117_200909_outLine +BABEL_OP2_302_52272_20131106_175931_inLine +BABEL_OP2_302_52272_20131106_175931_outLine +BABEL_OP2_302_52381_20140118_163935_inLine +BABEL_OP2_302_52381_20140118_163935_outLine +BABEL_OP2_302_52404_20131226_172656_inLine +BABEL_OP2_302_52404_20131226_172656_outLine +BABEL_OP2_302_53063_20140219_175252_inLine +BABEL_OP2_302_53063_20140219_175252_outLine +BABEL_OP2_302_53492_20140124_221354_inLine +BABEL_OP2_302_53492_20140124_221354_outLine +BABEL_OP2_302_53492_20140124_231722_inLine +BABEL_OP2_302_53492_20140124_231722_outLine +BABEL_OP2_302_54074_20131213_143818_inLine +BABEL_OP2_302_54074_20131213_143818_outLine +BABEL_OP2_302_54104_20131108_172927_inLine +BABEL_OP2_302_54104_20131108_172927_outLine +BABEL_OP2_302_54697_20140125_210815_inLine +BABEL_OP2_302_54697_20140125_210815_outLine +BABEL_OP2_302_54953_20131207_170139_inLine +BABEL_OP2_302_54953_20131207_170139_outLine +BABEL_OP2_302_55106_20140226_210229_inLine +BABEL_OP2_302_55106_20140226_210229_outLine +BABEL_OP2_302_56306_20140115_190808_inLine +BABEL_OP2_302_56306_20140115_190808_outLine +BABEL_OP2_302_57065_20140105_155451_inLine +BABEL_OP2_302_57065_20140105_155451_outLine +BABEL_OP2_302_57548_20131220_025554_inLine +BABEL_OP2_302_57548_20131220_025554_outLine +BABEL_OP2_302_57566_20140129_021108_inLine +BABEL_OP2_302_57566_20140129_021108_outLine +BABEL_OP2_302_58850_20131209_231304_inLine +BABEL_OP2_302_58850_20131209_231304_outLine +BABEL_OP2_302_58926_20131121_050015_inLine +BABEL_OP2_302_58926_20131121_050015_outLine +BABEL_OP2_302_59509_20140203_215611_inLine +BABEL_OP2_302_59509_20140203_215611_outLine +BABEL_OP2_302_59635_20140124_183551_inLine +BABEL_OP2_302_59635_20140124_183551_outLine +BABEL_OP2_302_59720_20131206_145023_inLine +BABEL_OP2_302_59720_20131206_145023_outLine +BABEL_OP2_302_59747_20131104_164054_inLine +BABEL_OP2_302_59747_20131104_164054_outLine +BABEL_OP2_302_60299_20140220_162147_inLine +BABEL_OP2_302_60299_20140220_162147_outLine +BABEL_OP2_302_61888_20140127_161005_inLine +BABEL_OP2_302_61888_20140127_161005_outLine +BABEL_OP2_302_63307_20131213_180556_inLine +BABEL_OP2_302_63307_20131213_180556_outLine +BABEL_OP2_302_63999_20140214_214838_inLine +BABEL_OP2_302_63999_20140214_214838_outLine +BABEL_OP2_302_64870_20131226_033837_inLine +BABEL_OP2_302_64870_20131226_033837_outLine +BABEL_OP2_302_66001_20131114_212059_inLine +BABEL_OP2_302_66001_20131114_212059_outLine +BABEL_OP2_302_66822_20131207_210025_inLine +BABEL_OP2_302_66822_20131207_210025_outLine +BABEL_OP2_302_67401_20131220_213115_inLine +BABEL_OP2_302_67401_20131220_213115_outLine +BABEL_OP2_302_67622_20131109_180009_inLine +BABEL_OP2_302_67622_20131109_180009_outLine +BABEL_OP2_302_67659_20131130_221743_inLine +BABEL_OP2_302_67659_20131130_221743_outLine +BABEL_OP2_302_67773_20140318_210730_inLine +BABEL_OP2_302_67773_20140318_210730_outLine +BABEL_OP2_302_68244_20140131_200512_inLine +BABEL_OP2_302_68244_20140131_200512_outLine +BABEL_OP2_302_68289_20140126_231552_inLine +BABEL_OP2_302_68289_20140126_231552_outLine +BABEL_OP2_302_68924_20140130_230433_inLine +BABEL_OP2_302_68924_20140130_230433_outLine +BABEL_OP2_302_69578_20131207_134320_inLine +BABEL_OP2_302_69578_20131207_134320_outLine +BABEL_OP2_302_69746_20140118_173307_inLine +BABEL_OP2_302_69746_20140118_173307_outLine +BABEL_OP2_302_70121_20131209_204053_inLine +BABEL_OP2_302_70121_20131209_204053_outLine +BABEL_OP2_302_70251_20131109_191513_inLine +BABEL_OP2_302_70251_20131109_191513_outLine +BABEL_OP2_302_70386_20131203_030837_inLine +BABEL_OP2_302_70386_20131203_030837_outLine +BABEL_OP2_302_70452_20131219_032729_inLine +BABEL_OP2_302_70452_20131219_032729_outLine +BABEL_OP2_302_70460_20140222_223545_inLine +BABEL_OP2_302_70460_20140222_223545_outLine +BABEL_OP2_302_70713_20140126_171356_inLine +BABEL_OP2_302_70713_20140126_171356_outLine +BABEL_OP2_302_70794_20131107_185831_inLine +BABEL_OP2_302_70794_20131107_185831_outLine +BABEL_OP2_302_71038_20140119_172132_inLine +BABEL_OP2_302_71038_20140119_172132_outLine +BABEL_OP2_302_71067_20140130_194954_inLine +BABEL_OP2_302_71067_20140130_194954_outLine +BABEL_OP2_302_72952_20140215_000239_inLine +BABEL_OP2_302_72952_20140215_000239_outLine +BABEL_OP2_302_73518_20140123_190347_inLine +BABEL_OP2_302_73518_20140123_190347_outLine +BABEL_OP2_302_74921_20131225_190044_inLine +BABEL_OP2_302_74921_20131225_190044_outLine +BABEL_OP2_302_75223_20131130_211714_inLine +BABEL_OP2_302_75223_20131130_211714_outLine +BABEL_OP2_302_75223_20131130_212825_inLine +BABEL_OP2_302_75223_20131130_212825_outLine +BABEL_OP2_302_75505_20131104_234450_inLine +BABEL_OP2_302_75505_20131104_234450_outLine +BABEL_OP2_302_75505_20131104_235752_inLine +BABEL_OP2_302_75505_20131104_235752_outLine +BABEL_OP2_302_76756_20131219_183439_inLine +BABEL_OP2_302_76756_20131219_183439_outLine +BABEL_OP2_302_77033_20140127_183412_inLine +BABEL_OP2_302_77033_20140127_183412_outLine +BABEL_OP2_302_77126_20131111_012344_inLine +BABEL_OP2_302_77126_20131111_012344_outLine +BABEL_OP2_302_77242_20140217_184823_inLine +BABEL_OP2_302_77242_20140217_184823_outLine +BABEL_OP2_302_77990_20131117_222127_inLine +BABEL_OP2_302_77990_20131117_222127_outLine +BABEL_OP2_302_78398_20131116_213051_inLine +BABEL_OP2_302_78398_20131116_213051_outLine +BABEL_OP2_302_78943_20131206_035541_inLine +BABEL_OP2_302_78943_20131206_035541_outLine +BABEL_OP2_302_78943_20131206_042746_inLine +BABEL_OP2_302_78943_20131206_042746_outLine +BABEL_OP2_302_79898_20140310_200258_inLine +BABEL_OP2_302_79898_20140310_200258_outLine +BABEL_OP2_302_79995_20140125_225240_inLine +BABEL_OP2_302_79995_20140125_225240_outLine +BABEL_OP2_302_80306_20131206_235538_inLine +BABEL_OP2_302_80306_20131206_235538_outLine +BABEL_OP2_302_80781_20131207_183741_inLine +BABEL_OP2_302_80781_20131207_183741_outLine +BABEL_OP2_302_81213_20131118_175514_inLine +BABEL_OP2_302_81213_20131118_175514_outLine +BABEL_OP2_302_81287_20140115_190105_inLine +BABEL_OP2_302_81287_20140115_190105_outLine +BABEL_OP2_302_81671_20140131_220121_inLine +BABEL_OP2_302_81671_20140131_220121_outLine +BABEL_OP2_302_82089_20131206_191938_inLine +BABEL_OP2_302_82089_20131206_191938_outLine +BABEL_OP2_302_82138_20131206_045140_inLine +BABEL_OP2_302_82138_20131206_045140_outLine +BABEL_OP2_302_82145_20140301_225354_inLine +BABEL_OP2_302_82145_20140301_225354_outLine +BABEL_OP2_302_82224_20140203_014024_inLine +BABEL_OP2_302_82224_20140203_014024_outLine +BABEL_OP2_302_82425_20131115_005742_inLine +BABEL_OP2_302_82425_20131115_005742_outLine +BABEL_OP2_302_82935_20140114_204802_inLine +BABEL_OP2_302_82935_20140114_204802_outLine +BABEL_OP2_302_83430_20140315_203750_inLine +BABEL_OP2_302_83430_20140315_203750_outLine +BABEL_OP2_302_83436_20131106_170059_inLine +BABEL_OP2_302_83436_20131106_170059_outLine +BABEL_OP2_302_83455_20131208_201956_inLine +BABEL_OP2_302_83455_20131208_201956_outLine +BABEL_OP2_302_84408_20131207_204020_inLine +BABEL_OP2_302_84408_20131207_204020_outLine +BABEL_OP2_302_84469_20140107_205046_inLine +BABEL_OP2_302_84469_20140107_205046_outLine +BABEL_OP2_302_84715_20140127_201518_inLine +BABEL_OP2_302_84715_20140127_201518_outLine +BABEL_OP2_302_84936_20140108_204108_inLine +BABEL_OP2_302_84936_20140108_204108_outLine +BABEL_OP2_302_85010_20140316_222754_inLine +BABEL_OP2_302_85010_20140316_222754_outLine +BABEL_OP2_302_86628_20140215_171431_inLine +BABEL_OP2_302_86628_20140215_171431_outLine +BABEL_OP2_302_86715_20140215_174540_inLine +BABEL_OP2_302_86715_20140215_174540_outLine +BABEL_OP2_302_87298_20140130_191447_inLine +BABEL_OP2_302_87298_20140130_191447_outLine +BABEL_OP2_302_87305_20140214_225515_inLine +BABEL_OP2_302_87305_20140214_225515_outLine +BABEL_OP2_302_87693_20131121_041057_inLine +BABEL_OP2_302_87693_20131121_041057_outLine +BABEL_OP2_302_87777_20140127_145958_inLine +BABEL_OP2_302_87777_20140127_145958_outLine +BABEL_OP2_302_88661_20131225_211835_inLine +BABEL_OP2_302_88661_20131225_211835_outLine +BABEL_OP2_302_88938_20140202_215623_inLine +BABEL_OP2_302_88938_20140202_215623_outLine +BABEL_OP2_302_89059_20140115_214308_inLine +BABEL_OP2_302_89059_20140115_214308_outLine +BABEL_OP2_302_90709_20131111_143953_inLine +BABEL_OP2_302_90709_20131111_143953_outLine +BABEL_OP2_302_92557_20140125_230505_inLine +BABEL_OP2_302_92557_20140125_230505_outLine +BABEL_OP2_302_92736_20140119_170328_inLine +BABEL_OP2_302_92736_20140119_170328_outLine +BABEL_OP2_302_92809_20131109_182045_inLine +BABEL_OP2_302_92809_20131109_182045_outLine +BABEL_OP2_302_92942_20131219_014744_inLine +BABEL_OP2_302_92942_20131219_014744_outLine +BABEL_OP2_302_93632_20140203_154221_inLine +BABEL_OP2_302_93632_20140203_154221_outLine +BABEL_OP2_302_93964_20131208_002934_inLine +BABEL_OP2_302_93964_20131208_002934_outLine +BABEL_OP2_302_94253_20131114_215945_inLine +BABEL_OP2_302_94253_20131114_215945_outLine +BABEL_OP2_302_94449_20140314_185933_inLine +BABEL_OP2_302_94449_20140314_185933_outLine +BABEL_OP2_302_94803_20140313_225823_inLine +BABEL_OP2_302_94803_20140313_225823_outLine +BABEL_OP2_302_95598_20131101_172634_inLine +BABEL_OP2_302_95598_20131101_172634_outLine +BABEL_OP2_302_95598_20131101_175037_inLine +BABEL_OP2_302_95598_20131101_175037_outLine +BABEL_OP2_302_95670_20131130_185901_inLine +BABEL_OP2_302_95670_20131130_185901_outLine +BABEL_OP2_302_95903_20140303_002203_inLine +BABEL_OP2_302_95903_20140303_002203_outLine +BABEL_OP2_302_96525_20140110_201817_inLine +BABEL_OP2_302_96525_20140110_201817_outLine +BABEL_OP2_302_96690_20131204_221739_inLine +BABEL_OP2_302_96690_20131204_221739_outLine +BABEL_OP2_302_96910_20140130_210316_inLine +BABEL_OP2_302_96910_20140130_210316_outLine +BABEL_OP2_302_97461_20131211_211339_inLine +BABEL_OP2_302_97461_20131211_211339_outLine +BABEL_OP2_302_97557_20131219_192714_inLine +BABEL_OP2_302_97557_20131219_192714_outLine +BABEL_OP2_302_97588_20131101_161834_inLine +BABEL_OP2_302_97588_20131101_161834_outLine +BABEL_OP2_302_97588_20131101_163947_inLine +BABEL_OP2_302_97588_20131101_163947_outLine +BABEL_OP2_302_97731_20140114_201001_inLine +BABEL_OP2_302_97731_20140114_201001_outLine +BABEL_OP2_302_97772_20131107_223232_inLine +BABEL_OP2_302_97772_20131107_223232_outLine +BABEL_OP2_302_98489_20131204_181216_inLine +BABEL_OP2_302_98489_20131204_181216_outLine +BABEL_OP2_302_98909_20131117_153948_inLine +BABEL_OP2_302_98909_20131117_153948_outLine +BABEL_OP2_302_99401_20131114_221114_inLine +BABEL_OP2_302_99401_20131114_221114_outLine +BABEL_OP2_302_99887_20140129_162421_inLine +BABEL_OP2_302_99887_20140129_162421_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/untranscribed-training.list b/egs/babel/s5d/conf/lists/302-kazakh/untranscribed-training.list new file mode 100644 index 00000000000..52a1f686ddc --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/untranscribed-training.list @@ -0,0 +1,569 @@ +BABEL_OP2_302_11723_20140320_021007_inLine +BABEL_OP2_302_11723_20140320_021007_outLine +BABEL_OP2_302_11723_20140320_030801_inLine +BABEL_OP2_302_11723_20140320_030801_outLine +BABEL_OP2_302_11768_20140319_010224_inLine +BABEL_OP2_302_11768_20140319_010224_outLine +BABEL_OP2_302_13776_20140225_201416_inLine +BABEL_OP2_302_13776_20140225_201416_outLine +BABEL_OP2_302_13776_20140225_203014_inLine +BABEL_OP2_302_13776_20140225_203014_outLine +BABEL_OP2_302_14179_20131218_222613_inLine +BABEL_OP2_302_14179_20131218_222613_outLine +BABEL_OP2_302_14179_20131218_223829_inLine +BABEL_OP2_302_14179_20131218_223829_outLine +BABEL_OP2_302_14179_20131218_224616_inLine +BABEL_OP2_302_14179_20131218_224616_outLine +BABEL_OP2_302_14537_20140204_020213_inLine +BABEL_OP2_302_14537_20140204_020213_outLine +BABEL_OP2_302_14575_20140317_000954_inLine +BABEL_OP2_302_14575_20140317_000954_outLine +BABEL_OP2_302_15322_20140223_032907_inLine +BABEL_OP2_302_15322_20140223_032907_outLine +BABEL_OP2_302_15466_20140311_190939_inLine +BABEL_OP2_302_15466_20140311_190939_outLine +BABEL_OP2_302_15749_20140106_224305_inLine +BABEL_OP2_302_15749_20140106_224305_outLine +BABEL_OP2_302_15869_20140319_024910_inLine +BABEL_OP2_302_15869_20140319_024910_outLine +BABEL_OP2_302_15926_20131223_153308_inLine +BABEL_OP2_302_15926_20131223_153308_outLine +BABEL_OP2_302_15926_20131223_154557_inLine +BABEL_OP2_302_15926_20131223_154557_outLine +BABEL_OP2_302_15926_20131223_155325_inLine +BABEL_OP2_302_15926_20131223_155325_outLine +BABEL_OP2_302_15926_20131223_160015_inLine +BABEL_OP2_302_15926_20131223_160015_outLine +BABEL_OP2_302_15926_20131223_160509_inLine +BABEL_OP2_302_15926_20131223_160509_outLine +BABEL_OP2_302_15926_20131223_161316_inLine +BABEL_OP2_302_15926_20131223_161316_outLine +BABEL_OP2_302_17115_20140326_194730_inLine +BABEL_OP2_302_17115_20140326_194730_outLine +BABEL_OP2_302_17420_20140222_233405_inLine +BABEL_OP2_302_17420_20140222_233405_outLine +BABEL_OP2_302_17615_20140107_025352_inLine +BABEL_OP2_302_17615_20140107_025352_outLine +BABEL_OP2_302_19440_20140328_012334_inLine +BABEL_OP2_302_19440_20140328_012334_outLine +BABEL_OP2_302_20800_20131116_165644_inLine +BABEL_OP2_302_20800_20131116_165644_outLine +BABEL_OP2_302_20896_20140319_002117_inLine +BABEL_OP2_302_20896_20140319_002117_outLine +BABEL_OP2_302_21244_20140221_181738_inLine +BABEL_OP2_302_21244_20140221_181738_outLine +BABEL_OP2_302_21244_20140221_185615_inLine +BABEL_OP2_302_21244_20140221_185615_outLine +BABEL_OP2_302_21315_20140304_163357_inLine +BABEL_OP2_302_21315_20140304_163357_outLine +BABEL_OP2_302_21393_20140303_153741_inLine +BABEL_OP2_302_21393_20140303_153741_outLine +BABEL_OP2_302_22170_20140203_213129_inLine +BABEL_OP2_302_22170_20140203_213129_outLine +BABEL_OP2_302_22494_20131227_200715_inLine +BABEL_OP2_302_22918_20140203_004320_inLine +BABEL_OP2_302_22918_20140203_004320_outLine +BABEL_OP2_302_23092_20140218_224204_inLine +BABEL_OP2_302_23092_20140218_224204_outLine +BABEL_OP2_302_24033_20140131_000701_inLine +BABEL_OP2_302_24033_20140131_000701_outLine +BABEL_OP2_302_24209_20140328_231409_inLine +BABEL_OP2_302_24209_20140328_231409_outLine +BABEL_OP2_302_24587_20140318_225950_inLine +BABEL_OP2_302_24587_20140318_225950_outLine +BABEL_OP2_302_25719_20140129_155329_inLine +BABEL_OP2_302_25719_20140129_155329_outLine +BABEL_OP2_302_26507_20140308_203259_inLine +BABEL_OP2_302_26507_20140308_203259_outLine +BABEL_OP2_302_26574_20140201_202449_inLine +BABEL_OP2_302_26574_20140201_202449_outLine +BABEL_OP2_302_27218_20131115_211108_inLine +BABEL_OP2_302_27218_20131115_211108_outLine +BABEL_OP2_302_27841_20140124_002040_inLine +BABEL_OP2_302_27841_20140124_002040_outLine +BABEL_OP2_302_27841_20140124_003521_inLine +BABEL_OP2_302_27841_20140124_003521_outLine +BABEL_OP2_302_28538_20131218_015743_inLine +BABEL_OP2_302_28600_20140111_172326_inLine +BABEL_OP2_302_28600_20140111_172326_outLine +BABEL_OP2_302_28600_20140111_173213_inLine +BABEL_OP2_302_28600_20140111_173213_outLine +BABEL_OP2_302_28606_20140108_222927_inLine +BABEL_OP2_302_28606_20140108_222927_outLine +BABEL_OP2_302_28606_20140108_223822_inLine +BABEL_OP2_302_28606_20140108_223822_outLine +BABEL_OP2_302_28775_20131116_210510_outLine +BABEL_OP2_302_29076_20131214_155845_inLine +BABEL_OP2_302_29076_20131214_155845_outLine +BABEL_OP2_302_29076_20131214_160555_inLine +BABEL_OP2_302_29076_20131214_160555_outLine +BABEL_OP2_302_29323_20140130_190425_inLine +BABEL_OP2_302_29323_20140130_190425_outLine +BABEL_OP2_302_29404_20140214_174021_inLine +BABEL_OP2_302_29404_20140214_174021_outLine +BABEL_OP2_302_29643_20140213_000617_inLine +BABEL_OP2_302_29643_20140213_000617_outLine +BABEL_OP2_302_30974_20140315_192921_inLine +BABEL_OP2_302_30974_20140315_192921_outLine +BABEL_OP2_302_31182_20140107_213519_outLine +BABEL_OP2_302_31346_20131230_220709_inLine +BABEL_OP2_302_31346_20131230_221548_inLine +BABEL_OP2_302_31919_20140120_211340_inLine +BABEL_OP2_302_31919_20140120_211340_outLine +BABEL_OP2_302_32727_20140304_200331_inLine +BABEL_OP2_302_32727_20140304_200331_outLine +BABEL_OP2_302_32832_20140114_180910_inLine +BABEL_OP2_302_32832_20140114_180910_outLine +BABEL_OP2_302_32872_20140311_203050_inLine +BABEL_OP2_302_32872_20140311_203050_outLine +BABEL_OP2_302_32872_20140311_204413_inLine +BABEL_OP2_302_32872_20140311_204413_outLine +BABEL_OP2_302_32959_20140301_192344_inLine +BABEL_OP2_302_32959_20140301_192344_outLine +BABEL_OP2_302_33273_20131214_184246_inLine +BABEL_OP2_302_33273_20131214_184246_outLine +BABEL_OP2_302_33273_20131214_191106_inLine +BABEL_OP2_302_33273_20131214_191106_outLine +BABEL_OP2_302_34410_20140321_182956_inLine +BABEL_OP2_302_34410_20140321_182956_outLine +BABEL_OP2_302_34629_20140315_192329_inLine +BABEL_OP2_302_34629_20140315_192329_outLine +BABEL_OP2_302_35202_20131221_211514_inLine +BABEL_OP2_302_35202_20131221_211514_outLine +BABEL_OP2_302_35202_20131221_220228_inLine +BABEL_OP2_302_35202_20131221_220228_outLine +BABEL_OP2_302_35609_20140326_155717_inLine +BABEL_OP2_302_35609_20140326_155717_outLine +BABEL_OP2_302_35786_20140201_193528_inLine +BABEL_OP2_302_35786_20140201_193528_outLine +BABEL_OP2_302_36147_20140314_230249_inLine +BABEL_OP2_302_36147_20140314_230249_outLine +BABEL_OP2_302_36632_20140316_220512_inLine +BABEL_OP2_302_36632_20140316_220512_outLine +BABEL_OP2_302_36642_20140203_204149_inLine +BABEL_OP2_302_36642_20140203_204149_outLine +BABEL_OP2_302_37281_20131223_193947_inLine +BABEL_OP2_302_37281_20131223_193947_outLine +BABEL_OP2_302_38554_20131107_231324_inLine +BABEL_OP2_302_38554_20131107_231324_outLine +BABEL_OP2_302_39159_20131101_181154_inLine +BABEL_OP2_302_39159_20131101_181154_outLine +BABEL_OP2_302_39159_20131101_182621_inLine +BABEL_OP2_302_39159_20131101_182621_outLine +BABEL_OP2_302_39277_20140313_204841_inLine +BABEL_OP2_302_39277_20140313_204841_outLine +BABEL_OP2_302_39920_20140308_212443_inLine +BABEL_OP2_302_39920_20140308_212443_outLine +BABEL_OP2_302_40196_20140316_214624_inLine +BABEL_OP2_302_40196_20140316_214624_outLine +BABEL_OP2_302_40740_20140203_175842_inLine +BABEL_OP2_302_40740_20140203_175842_outLine +BABEL_OP2_302_41109_20140129_151014_outLine +BABEL_OP2_302_41109_20140129_151930_outLine +BABEL_OP2_302_41682_20140316_184028_inLine +BABEL_OP2_302_41682_20140316_184028_outLine +BABEL_OP2_302_41685_20140319_224731_inLine +BABEL_OP2_302_41685_20140319_224731_outLine +BABEL_OP2_302_41745_20131216_195331_inLine +BABEL_OP2_302_41745_20131216_195331_outLine +BABEL_OP2_302_42126_20140319_211544_inLine +BABEL_OP2_302_42126_20140319_211544_outLine +BABEL_OP2_302_42243_20131104_193524_inLine +BABEL_OP2_302_42243_20131104_193524_outLine +BABEL_OP2_302_42718_20140303_235926_inLine +BABEL_OP2_302_42718_20140303_235926_outLine +BABEL_OP2_302_42883_20140301_215140_inLine +BABEL_OP2_302_42883_20140301_215140_outLine +BABEL_OP2_302_42883_20140301_220413_inLine +BABEL_OP2_302_42883_20140301_220413_outLine +BABEL_OP2_302_43285_20131218_173818_inLine +BABEL_OP2_302_43285_20131218_173818_outLine +BABEL_OP2_302_43285_20131218_175248_inLine +BABEL_OP2_302_43285_20131218_175248_outLine +BABEL_OP2_302_43323_20140320_012405_inLine +BABEL_OP2_302_43323_20140320_012405_outLine +BABEL_OP2_302_43990_20140319_003408_inLine +BABEL_OP2_302_44255_20140203_221612_inLine +BABEL_OP2_302_44255_20140203_221612_outLine +BABEL_OP2_302_44681_20140316_231417_inLine +BABEL_OP2_302_44681_20140316_231417_outLine +BABEL_OP2_302_45106_20140130_195527_inLine +BABEL_OP2_302_45106_20140130_195527_outLine +BABEL_OP2_302_45140_20140204_000835_inLine +BABEL_OP2_302_45140_20140204_000835_outLine +BABEL_OP2_302_45374_20140316_190302_inLine +BABEL_OP2_302_45374_20140316_190302_outLine +BABEL_OP2_302_46066_20140131_180512_inLine +BABEL_OP2_302_46066_20140131_180512_outLine +BABEL_OP2_302_46315_20140112_051606_inLine +BABEL_OP2_302_46315_20140112_051606_outLine +BABEL_OP2_302_46315_20140112_053032_inLine +BABEL_OP2_302_46315_20140112_053032_outLine +BABEL_OP2_302_46333_20131106_193911_inLine +BABEL_OP2_302_46333_20131106_193911_outLine +BABEL_OP2_302_46389_20140317_000314_inLine +BABEL_OP2_302_46389_20140317_000314_outLine +BABEL_OP2_302_46589_20131218_200246_inLine +BABEL_OP2_302_47799_20140310_191802_inLine +BABEL_OP2_302_47799_20140310_191802_outLine +BABEL_OP2_302_48200_20140111_171610_inLine +BABEL_OP2_302_48200_20140111_171610_outLine +BABEL_OP2_302_48758_20140222_204731_inLine +BABEL_OP2_302_48758_20140222_204731_outLine +BABEL_OP2_302_49027_20140307_172629_inLine +BABEL_OP2_302_49027_20140307_172629_outLine +BABEL_OP2_302_49118_20140114_164903_inLine +BABEL_OP2_302_49118_20140114_164903_outLine +BABEL_OP2_302_49437_20140123_224810_inLine +BABEL_OP2_302_49437_20140123_224810_outLine +BABEL_OP2_302_49739_20140314_204410_inLine +BABEL_OP2_302_49739_20140314_204410_outLine +BABEL_OP2_302_51015_20131225_194000_inLine +BABEL_OP2_302_51015_20131225_194000_outLine +BABEL_OP2_302_51407_20131210_182141_inLine +BABEL_OP2_302_51407_20131210_182141_outLine +BABEL_OP2_302_51414_20140301_231945_inLine +BABEL_OP2_302_51414_20140301_231945_outLine +BABEL_OP2_302_51414_20140301_232951_inLine +BABEL_OP2_302_51414_20140301_232951_outLine +BABEL_OP2_302_51530_20140303_173734_inLine +BABEL_OP2_302_51530_20140303_173734_outLine +BABEL_OP2_302_51693_20140317_180609_inLine +BABEL_OP2_302_51693_20140317_180609_outLine +BABEL_OP2_302_51819_20140108_231905_inLine +BABEL_OP2_302_51819_20140108_231905_outLine +BABEL_OP2_302_51819_20140108_232624_inLine +BABEL_OP2_302_51819_20140108_232624_outLine +BABEL_OP2_302_51858_20140314_235721_inLine +BABEL_OP2_302_51858_20140314_235721_outLine +BABEL_OP2_302_52070_20140320_231722_inLine +BABEL_OP2_302_52070_20140320_231722_outLine +BABEL_OP2_302_52222_20140314_185604_inLine +BABEL_OP2_302_52222_20140314_185604_outLine +BABEL_OP2_302_52265_20140317_214257_inLine +BABEL_OP2_302_52265_20140317_214257_outLine +BABEL_OP2_302_52483_20140318_192930_inLine +BABEL_OP2_302_52483_20140318_192930_outLine +BABEL_OP2_302_52490_20131204_173409_inLine +BABEL_OP2_302_52490_20131204_173409_outLine +BABEL_OP2_302_52725_20140224_182027_inLine +BABEL_OP2_302_52725_20140224_182027_outLine +BABEL_OP2_302_53072_20140307_191159_inLine +BABEL_OP2_302_53072_20140307_191159_outLine +BABEL_OP2_302_53415_20140301_180358_inLine +BABEL_OP2_302_53415_20140301_180358_outLine +BABEL_OP2_302_53917_20140214_214823_inLine +BABEL_OP2_302_53917_20140214_214823_outLine +BABEL_OP2_302_54046_20140111_191512_inLine +BABEL_OP2_302_54046_20140111_191512_outLine +BABEL_OP2_302_54160_20131105_233517_inLine +BABEL_OP2_302_54160_20131105_233517_outLine +BABEL_OP2_302_54405_20140111_185837_inLine +BABEL_OP2_302_54405_20140111_185837_outLine +BABEL_OP2_302_54477_20140108_182424_inLine +BABEL_OP2_302_54477_20140108_182424_outLine +BABEL_OP2_302_54923_20140216_224345_inLine +BABEL_OP2_302_54923_20140216_224345_outLine +BABEL_OP2_302_55259_20140110_235646_inLine +BABEL_OP2_302_55259_20140110_235646_outLine +BABEL_OP2_302_56331_20140116_230347_inLine +BABEL_OP2_302_56331_20140116_230347_outLine +BABEL_OP2_302_56345_20140316_214007_inLine +BABEL_OP2_302_56345_20140316_214007_outLine +BABEL_OP2_302_56468_20140313_205811_inLine +BABEL_OP2_302_56468_20140313_205811_outLine +BABEL_OP2_302_56743_20131216_222343_inLine +BABEL_OP2_302_56743_20131216_222343_outLine +BABEL_OP2_302_56925_20140324_224547_inLine +BABEL_OP2_302_56925_20140324_224547_outLine +BABEL_OP2_302_57116_20131030_223921_inLine +BABEL_OP2_302_57116_20131030_223921_outLine +BABEL_OP2_302_57542_20140122_203736_inLine +BABEL_OP2_302_57542_20140122_203736_outLine +BABEL_OP2_302_57654_20131117_191902_inLine +BABEL_OP2_302_57654_20131117_191902_outLine +BABEL_OP2_302_57654_20131117_192605_inLine +BABEL_OP2_302_57654_20131117_192605_outLine +BABEL_OP2_302_57678_20131219_025602_inLine +BABEL_OP2_302_57678_20131219_025602_outLine +BABEL_OP2_302_58047_20131218_204521_inLine +BABEL_OP2_302_58047_20131218_204521_outLine +BABEL_OP2_302_58734_20131113_233358_inLine +BABEL_OP2_302_58734_20131113_233358_outLine +BABEL_OP2_302_59091_20140128_234107_inLine +BABEL_OP2_302_59091_20140128_234107_outLine +BABEL_OP2_302_59301_20140114_221332_inLine +BABEL_OP2_302_59301_20140114_221332_outLine +BABEL_OP2_302_60115_20131217_170350_inLine +BABEL_OP2_302_60115_20131217_170350_outLine +BABEL_OP2_302_60661_20131116_191211_inLine +BABEL_OP2_302_60661_20131116_191211_outLine +BABEL_OP2_302_62155_20140201_185809_inLine +BABEL_OP2_302_62155_20140201_185809_outLine +BABEL_OP2_302_62158_20140319_223940_inLine +BABEL_OP2_302_62158_20140319_223940_outLine +BABEL_OP2_302_62200_20131209_215237_inLine +BABEL_OP2_302_62200_20131209_215237_outLine +BABEL_OP2_302_62362_20140225_004754_inLine +BABEL_OP2_302_62362_20140225_004754_outLine +BABEL_OP2_302_62430_20140301_152214_inLine +BABEL_OP2_302_62430_20140301_152214_outLine +BABEL_OP2_302_62724_20140304_224111_inLine +BABEL_OP2_302_62724_20140304_224111_outLine +BABEL_OP2_302_62835_20131223_201212_inLine +BABEL_OP2_302_63220_20140127_003053_inLine +BABEL_OP2_302_63220_20140127_003053_outLine +BABEL_OP2_302_63425_20140119_000855_inLine +BABEL_OP2_302_63425_20140119_000855_outLine +BABEL_OP2_302_63445_20131121_234555_inLine +BABEL_OP2_302_63511_20140311_232611_inLine +BABEL_OP2_302_63511_20140311_232611_outLine +BABEL_OP2_302_63523_20140219_180149_inLine +BABEL_OP2_302_63523_20140219_180149_outLine +BABEL_OP2_302_63906_20140120_224621_inLine +BABEL_OP2_302_63906_20140120_224621_outLine +BABEL_OP2_302_63938_20140129_205148_inLine +BABEL_OP2_302_63938_20140129_205148_outLine +BABEL_OP2_302_64350_20131117_225845_inLine +BABEL_OP2_302_64350_20131117_225845_outLine +BABEL_OP2_302_64350_20131117_232849_inLine +BABEL_OP2_302_64350_20131117_232849_outLine +BABEL_OP2_302_65639_20140320_150018_inLine +BABEL_OP2_302_65639_20140320_150018_outLine +BABEL_OP2_302_66026_20140326_234154_inLine +BABEL_OP2_302_66026_20140326_234154_outLine +BABEL_OP2_302_66361_20140319_194108_inLine +BABEL_OP2_302_66361_20140319_194108_outLine +BABEL_OP2_302_67213_20140327_183232_inLine +BABEL_OP2_302_67213_20140327_183232_outLine +BABEL_OP2_302_67304_20140327_170105_inLine +BABEL_OP2_302_67304_20140327_170105_outLine +BABEL_OP2_302_67389_20140316_224805_inLine +BABEL_OP2_302_67389_20140316_224805_outLine +BABEL_OP2_302_67389_20140316_230159_inLine +BABEL_OP2_302_67389_20140316_230159_outLine +BABEL_OP2_302_67552_20131225_215450_inLine +BABEL_OP2_302_67552_20131225_215450_outLine +BABEL_OP2_302_67592_20140113_211110_inLine +BABEL_OP2_302_67592_20140113_211110_outLine +BABEL_OP2_302_67726_20140319_013401_outLine +BABEL_OP2_302_67726_20140319_014304_outLine +BABEL_OP2_302_67794_20131117_183019_inLine +BABEL_OP2_302_67794_20131117_183019_outLine +BABEL_OP2_302_68402_20140327_221916_inLine +BABEL_OP2_302_68402_20140327_221916_outLine +BABEL_OP2_302_68627_20131204_225115_inLine +BABEL_OP2_302_68627_20131204_225115_outLine +BABEL_OP2_302_68748_20131217_195520_inLine +BABEL_OP2_302_68748_20131217_195520_outLine +BABEL_OP2_302_68748_20131217_201343_inLine +BABEL_OP2_302_68748_20131217_201343_outLine +BABEL_OP2_302_68908_20140320_022955_inLine +BABEL_OP2_302_68908_20140320_022955_outLine +BABEL_OP2_302_69982_20140311_005531_inLine +BABEL_OP2_302_69982_20140311_005531_outLine +BABEL_OP2_302_70282_20131220_160010_inLine +BABEL_OP2_302_70282_20131220_160010_outLine +BABEL_OP2_302_70639_20140318_221840_inLine +BABEL_OP2_302_70639_20140318_221840_outLine +BABEL_OP2_302_70726_20140319_183341_inLine +BABEL_OP2_302_70726_20140319_183341_outLine +BABEL_OP2_302_71419_20140314_222627_inLine +BABEL_OP2_302_71419_20140314_222627_outLine +BABEL_OP2_302_73408_20140326_185144_inLine +BABEL_OP2_302_73408_20140326_185144_outLine +BABEL_OP2_302_73408_20140326_190631_inLine +BABEL_OP2_302_73408_20140326_190631_outLine +BABEL_OP2_302_73591_20131029_231600_inLine +BABEL_OP2_302_73814_20131226_180746_inLine +BABEL_OP2_302_73814_20131226_180746_outLine +BABEL_OP2_302_73814_20131226_181941_inLine +BABEL_OP2_302_73814_20131226_181941_outLine +BABEL_OP2_302_73964_20140303_232725_inLine +BABEL_OP2_302_73964_20140303_232725_outLine +BABEL_OP2_302_74078_20140324_220859_outLine +BABEL_OP2_302_74121_20131206_165002_inLine +BABEL_OP2_302_74253_20140203_174833_inLine +BABEL_OP2_302_74253_20140203_174833_outLine +BABEL_OP2_302_74728_20140307_151132_inLine +BABEL_OP2_302_74728_20140307_151132_outLine +BABEL_OP2_302_75064_20131205_015445_inLine +BABEL_OP2_302_75261_20140218_213238_inLine +BABEL_OP2_302_75261_20140218_213238_outLine +BABEL_OP2_302_75359_20140220_000334_inLine +BABEL_OP2_302_75359_20140220_000334_outLine +BABEL_OP2_302_75366_20140310_224545_inLine +BABEL_OP2_302_75366_20140310_224545_outLine +BABEL_OP2_302_75465_20140125_194816_inLine +BABEL_OP2_302_75465_20140125_194816_outLine +BABEL_OP2_302_75764_20140123_173321_inLine +BABEL_OP2_302_75764_20140123_173321_outLine +BABEL_OP2_302_75869_20140118_180045_inLine +BABEL_OP2_302_75869_20140118_180045_outLine +BABEL_OP2_302_75993_20131118_164850_inLine +BABEL_OP2_302_75993_20131118_164850_outLine +BABEL_OP2_302_76444_20140304_213108_inLine +BABEL_OP2_302_76444_20140304_213108_outLine +BABEL_OP2_302_76970_20140327_002045_inLine +BABEL_OP2_302_76970_20140327_002045_outLine +BABEL_OP2_302_77904_20140316_204739_inLine +BABEL_OP2_302_77904_20140316_204739_outLine +BABEL_OP2_302_78360_20140131_201120_inLine +BABEL_OP2_302_78360_20140131_201120_outLine +BABEL_OP2_302_78630_20131115_232537_inLine +BABEL_OP2_302_78630_20131115_232537_outLine +BABEL_OP2_302_78976_20131128_230615_inLine +BABEL_OP2_302_78976_20131128_230615_outLine +BABEL_OP2_302_79167_20131225_175926_inLine +BABEL_OP2_302_79167_20131225_175926_outLine +BABEL_OP2_302_79367_20131112_222137_inLine +BABEL_OP2_302_79367_20131112_222137_outLine +BABEL_OP2_302_79858_20131116_000426_inLine +BABEL_OP2_302_81404_20131213_230929_inLine +BABEL_OP2_302_81404_20131213_230929_outLine +BABEL_OP2_302_81427_20131211_221442_inLine +BABEL_OP2_302_81427_20131211_221442_outLine +BABEL_OP2_302_81674_20140315_024749_inLine +BABEL_OP2_302_81674_20140315_024749_outLine +BABEL_OP2_302_82140_20131206_055551_inLine +BABEL_OP2_302_82140_20131206_055551_outLine +BABEL_OP2_302_82361_20140204_014603_inLine +BABEL_OP2_302_82361_20140204_014603_outLine +BABEL_OP2_302_82622_20131105_002634_inLine +BABEL_OP2_302_82622_20131105_002634_outLine +BABEL_OP2_302_82904_20140203_194011_inLine +BABEL_OP2_302_82904_20140203_194011_outLine +BABEL_OP2_302_83366_20131223_172753_inLine +BABEL_OP2_302_83366_20131223_172753_outLine +BABEL_OP2_302_83775_20131203_184707_inLine +BABEL_OP2_302_83775_20131203_184707_outLine +BABEL_OP2_302_83783_20131218_212844_inLine +BABEL_OP2_302_83783_20131218_212844_outLine +BABEL_OP2_302_84327_20140112_031943_inLine +BABEL_OP2_302_84327_20140112_031943_outLine +BABEL_OP2_302_84327_20140112_033431_inLine +BABEL_OP2_302_84327_20140112_033431_outLine +BABEL_OP2_302_84458_20140226_001547_inLine +BABEL_OP2_302_84458_20140226_001547_outLine +BABEL_OP2_302_84583_20131220_210443_inLine +BABEL_OP2_302_84583_20131220_210443_outLine +BABEL_OP2_302_84838_20140112_004851_inLine +BABEL_OP2_302_84838_20140112_004851_outLine +BABEL_OP2_302_84838_20140112_011030_inLine +BABEL_OP2_302_84838_20140112_011030_outLine +BABEL_OP2_302_85028_20140106_232649_inLine +BABEL_OP2_302_85028_20140106_232649_outLine +BABEL_OP2_302_85260_20140318_235730_inLine +BABEL_OP2_302_85260_20140318_235730_outLine +BABEL_OP2_302_85260_20140319_021618_inLine +BABEL_OP2_302_85260_20140319_021618_outLine +BABEL_OP2_302_85519_20140111_210933_inLine +BABEL_OP2_302_85519_20140111_210933_outLine +BABEL_OP2_302_85651_20140108_220631_inLine +BABEL_OP2_302_85651_20140108_220631_outLine +BABEL_OP2_302_85651_20140108_221652_inLine +BABEL_OP2_302_85651_20140108_221652_outLine +BABEL_OP2_302_85651_20140108_222943_inLine +BABEL_OP2_302_85651_20140108_222943_outLine +BABEL_OP2_302_86321_20140304_184505_outLine +BABEL_OP2_302_86321_20140304_190052_outLine +BABEL_OP2_302_86676_20131221_194024_inLine +BABEL_OP2_302_86676_20131221_194024_outLine +BABEL_OP2_302_86826_20140221_213850_inLine +BABEL_OP2_302_86826_20140221_213850_outLine +BABEL_OP2_302_86885_20140319_172338_inLine +BABEL_OP2_302_86885_20140319_172338_outLine +BABEL_OP2_302_86888_20131221_183239_inLine +BABEL_OP2_302_86888_20131221_183239_outLine +BABEL_OP2_302_86952_20131105_224050_inLine +BABEL_OP2_302_86952_20131105_224050_outLine +BABEL_OP2_302_87073_20131108_174654_inLine +BABEL_OP2_302_87073_20131108_174654_outLine +BABEL_OP2_302_87545_20140303_174324_inLine +BABEL_OP2_302_87545_20140303_174324_outLine +BABEL_OP2_302_87629_20131226_030820_inLine +BABEL_OP2_302_87629_20131226_030820_outLine +BABEL_OP2_302_90440_20140314_190637_inLine +BABEL_OP2_302_90440_20140314_190637_outLine +BABEL_OP2_302_90740_20140312_213002_inLine +BABEL_OP2_302_90740_20140312_213002_outLine +BABEL_OP2_302_91606_20140223_185235_inLine +BABEL_OP2_302_91606_20140223_185235_outLine +BABEL_OP2_302_91808_20140315_215351_inLine +BABEL_OP2_302_91808_20140315_215351_outLine +BABEL_OP2_302_91977_20131217_185642_inLine +BABEL_OP2_302_91977_20131217_185642_outLine +BABEL_OP2_302_92096_20140121_222052_inLine +BABEL_OP2_302_92096_20140121_222052_outLine +BABEL_OP2_302_92096_20140121_222833_inLine +BABEL_OP2_302_92096_20140121_222833_outLine +BABEL_OP2_302_92096_20140121_223620_inLine +BABEL_OP2_302_92096_20140121_223620_outLine +BABEL_OP2_302_92252_20140317_205453_outLine +BABEL_OP2_302_92740_20131217_174305_inLine +BABEL_OP2_302_92740_20131217_174305_outLine +BABEL_OP2_302_92886_20131204_201740_inLine +BABEL_OP2_302_93153_20131115_225858_inLine +BABEL_OP2_302_93153_20131115_225858_outLine +BABEL_OP2_302_93443_20140320_025047_inLine +BABEL_OP2_302_93443_20140320_025047_outLine +BABEL_OP2_302_93490_20140113_184331_inLine +BABEL_OP2_302_93490_20140113_184331_outLine +BABEL_OP2_302_93946_20140225_213901_inLine +BABEL_OP2_302_93946_20140225_213901_outLine +BABEL_OP2_302_94044_20140314_194724_inLine +BABEL_OP2_302_94044_20140314_194724_outLine +BABEL_OP2_302_94044_20140314_195844_inLine +BABEL_OP2_302_94044_20140314_195844_outLine +BABEL_OP2_302_94141_20140220_002237_inLine +BABEL_OP2_302_94141_20140220_002237_outLine +BABEL_OP2_302_94212_20140313_231224_inLine +BABEL_OP2_302_94212_20140313_231224_outLine +BABEL_OP2_302_94487_20140303_234032_inLine +BABEL_OP2_302_94487_20140303_234032_outLine +BABEL_OP2_302_94587_20140115_224719_inLine +BABEL_OP2_302_94587_20140115_224719_outLine +BABEL_OP2_302_94587_20140115_225600_inLine +BABEL_OP2_302_94587_20140115_225600_outLine +BABEL_OP2_302_94587_20140115_230344_inLine +BABEL_OP2_302_94587_20140115_230344_outLine +BABEL_OP2_302_95467_20140217_181554_inLine +BABEL_OP2_302_95467_20140217_181554_outLine +BABEL_OP2_302_95490_20131119_220530_inLine +BABEL_OP2_302_95490_20131119_220530_outLine +BABEL_OP2_302_95490_20131119_221754_inLine +BABEL_OP2_302_95490_20131119_221754_outLine +BABEL_OP2_302_95571_20140315_172644_inLine +BABEL_OP2_302_95571_20140315_172644_outLine +BABEL_OP2_302_95966_20131213_023122_inLine +BABEL_OP2_302_95966_20131213_023122_outLine +BABEL_OP2_302_96088_20140307_165731_inLine +BABEL_OP2_302_96088_20140307_165731_outLine +BABEL_OP2_302_96247_20140120_221340_inLine +BABEL_OP2_302_96247_20140120_221340_outLine +BABEL_OP2_302_96247_20140120_224135_inLine +BABEL_OP2_302_96247_20140120_224135_outLine +BABEL_OP2_302_96584_20140127_164106_inLine +BABEL_OP2_302_96584_20140127_164106_outLine +BABEL_OP2_302_96934_20131203_232255_inLine +BABEL_OP2_302_97570_20131223_175908_inLine +BABEL_OP2_302_97570_20131223_175908_outLine +BABEL_OP2_302_97570_20131223_180949_inLine +BABEL_OP2_302_97570_20131223_180949_outLine +BABEL_OP2_302_97849_20140203_203804_inLine +BABEL_OP2_302_97849_20140203_203804_outLine +BABEL_OP2_302_97896_20131212_155943_inLine +BABEL_OP2_302_97896_20131212_155943_outLine +BABEL_OP2_302_98165_20131218_211431_inLine +BABEL_OP2_302_98165_20131218_211431_outLine +BABEL_OP2_302_99202_20131226_015321_inLine +BABEL_OP2_302_99202_20131226_015321_outLine +BABEL_OP2_302_99264_20140111_215716_inLine +BABEL_OP2_302_99264_20140111_215716_outLine +BABEL_OP2_302_99487_20131109_171503_inLine +BABEL_OP2_302_99718_20131113_003931_inLine +BABEL_OP2_302_99718_20131113_003931_outLine +BABEL_OP2_302_99952_20140203_225818_inLine +BABEL_OP2_302_99955_20140127_230118_inLine +BABEL_OP2_302_99975_20140317_202757_inLine +BABEL_OP2_302_99975_20140317_202757_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/dev.2h.list b/egs/babel/s5d/conf/lists/303-telugu/dev.2h.list new file mode 100644 index 00000000000..2109ba73287 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/dev.2h.list @@ -0,0 +1,126 @@ +BABEL_OP2_303_12220_20131108_232918_inLine +BABEL_OP2_303_12220_20131108_232918_outLine +BABEL_OP2_303_13040_20131115_232722_inLine +BABEL_OP2_303_13040_20131115_232722_outLine +BABEL_OP2_303_14158_20131204_203458_inLine +BABEL_OP2_303_14158_20131204_203458_outLine +BABEL_OP2_303_15466_20140204_191250_inLine +BABEL_OP2_303_15466_20140204_191250_outLine +BABEL_OP2_303_16056_20131030_201705_inLine +BABEL_OP2_303_16056_20131030_201705_outLine +BABEL_OP2_303_16475_20131109_024735_inLine +BABEL_OP2_303_16475_20131109_024735_outLine +BABEL_OP2_303_17280_20131105_033157_inLine +BABEL_OP2_303_17280_20131105_033157_outLine +BABEL_OP2_303_19703_20131114_213952_inLine +BABEL_OP2_303_19703_20131114_213952_outLine +BABEL_OP2_303_21029_20131112_180205_inLine +BABEL_OP2_303_21029_20131112_180205_outLine +BABEL_OP2_303_22965_20131114_213605_inLine +BABEL_OP2_303_22965_20131114_213605_outLine +BABEL_OP2_303_28585_20131204_042033_inLine +BABEL_OP2_303_28585_20131204_042033_outLine +BABEL_OP2_303_28600_20131201_001853_inLine +BABEL_OP2_303_28600_20131201_001853_outLine +BABEL_OP2_303_28945_20131111_210924_inLine +BABEL_OP2_303_28945_20131111_210924_outLine +BABEL_OP2_303_34197_20131105_003635_inLine +BABEL_OP2_303_34197_20131105_003635_outLine +BABEL_OP2_303_34336_20131114_162157_inLine +BABEL_OP2_303_34336_20131114_162157_outLine +BABEL_OP2_303_36059_20131218_034050_inLine +BABEL_OP2_303_36059_20131218_034050_outLine +BABEL_OP2_303_37499_20140129_194730_inLine +BABEL_OP2_303_37499_20140129_194730_outLine +BABEL_OP2_303_37499_20140130_010436_inLine +BABEL_OP2_303_37499_20140130_010436_outLine +BABEL_OP2_303_38554_20131024_205502_inLine +BABEL_OP2_303_38554_20131024_205502_outLine +BABEL_OP2_303_39848_20131113_195552_inLine +BABEL_OP2_303_39848_20131113_195552_outLine +BABEL_OP2_303_40713_20131111_182733_inLine +BABEL_OP2_303_40713_20131111_182733_outLine +BABEL_OP2_303_40740_20131205_003945_inLine +BABEL_OP2_303_40740_20131205_003945_outLine +BABEL_OP2_303_41272_20140204_204727_inLine +BABEL_OP2_303_41272_20140204_204727_outLine +BABEL_OP2_303_41400_20140222_205655_inLine +BABEL_OP2_303_41400_20140222_205655_outLine +BABEL_OP2_303_43794_20140131_221611_inLine +BABEL_OP2_303_43794_20140131_221611_outLine +BABEL_OP2_303_45560_20131029_184514_inLine +BABEL_OP2_303_45560_20131029_184514_outLine +BABEL_OP2_303_46333_20131102_160049_inLine +BABEL_OP2_303_46333_20131102_160049_outLine +BABEL_OP2_303_46702_20131023_225137_inLine +BABEL_OP2_303_46702_20131023_225137_outLine +BABEL_OP2_303_49287_20131115_193114_inLine +BABEL_OP2_303_49287_20131115_193114_outLine +BABEL_OP2_303_49306_20140204_203901_inLine +BABEL_OP2_303_49306_20140204_203901_outLine +BABEL_OP2_303_51858_20140219_183931_inLine +BABEL_OP2_303_51858_20140219_183931_outLine +BABEL_OP2_303_52854_20131105_013802_inLine +BABEL_OP2_303_52854_20131105_013802_outLine +BABEL_OP2_303_55818_20131027_191439_inLine +BABEL_OP2_303_55818_20131027_191439_outLine +BABEL_OP2_303_56684_20131205_182944_inLine +BABEL_OP2_303_56684_20131205_182944_outLine +BABEL_OP2_303_56720_20131122_215343_inLine +BABEL_OP2_303_56720_20131122_215343_outLine +BABEL_OP2_303_58734_20131109_181122_inLine +BABEL_OP2_303_58734_20131109_181122_outLine +BABEL_OP2_303_60474_20131113_232723_inLine +BABEL_OP2_303_60474_20131113_232723_outLine +BABEL_OP2_303_61167_20131104_210455_inLine +BABEL_OP2_303_61167_20131104_210455_outLine +BABEL_OP2_303_62289_20140222_212804_inLine +BABEL_OP2_303_62289_20140222_212804_outLine +BABEL_OP2_303_64759_20131104_194712_inLine +BABEL_OP2_303_64759_20131104_194712_outLine +BABEL_OP2_303_64759_20131104_195356_inLine +BABEL_OP2_303_64759_20131104_195356_outLine +BABEL_OP2_303_65370_20140222_225324_inLine +BABEL_OP2_303_65370_20140222_225324_outLine +BABEL_OP2_303_69574_20131027_004044_inLine +BABEL_OP2_303_69574_20131027_004044_outLine +BABEL_OP2_303_70110_20131025_151421_inLine +BABEL_OP2_303_70110_20131025_151421_outLine +BABEL_OP2_303_73119_20131115_162847_inLine +BABEL_OP2_303_73119_20131115_162847_outLine +BABEL_OP2_303_73119_20131115_164236_inLine +BABEL_OP2_303_73119_20131115_164236_outLine +BABEL_OP2_303_73446_20140111_183215_inLine +BABEL_OP2_303_73446_20140111_183215_outLine +BABEL_OP2_303_74280_20131025_160420_inLine +BABEL_OP2_303_74280_20131025_160420_outLine +BABEL_OP2_303_75064_20131114_174949_inLine +BABEL_OP2_303_75064_20131114_174949_outLine +BABEL_OP2_303_77112_20131114_020655_inLine +BABEL_OP2_303_77112_20131114_020655_outLine +BABEL_OP2_303_82089_20131111_003358_inLine +BABEL_OP2_303_82089_20131111_003358_outLine +BABEL_OP2_303_83455_20131115_205335_inLine +BABEL_OP2_303_83455_20131115_205335_outLine +BABEL_OP2_303_84709_20140205_175937_inLine +BABEL_OP2_303_84709_20140205_175937_outLine +BABEL_OP2_303_86472_20131204_195705_inLine +BABEL_OP2_303_86472_20131204_195705_outLine +BABEL_OP2_303_86557_20131025_175510_inLine +BABEL_OP2_303_86557_20131025_175510_outLine +BABEL_OP2_303_87073_20131027_001213_inLine +BABEL_OP2_303_87073_20131027_001213_outLine +BABEL_OP2_303_87629_20131114_030529_inLine +BABEL_OP2_303_87629_20131114_030529_outLine +BABEL_OP2_303_88988_20140218_203032_inLine +BABEL_OP2_303_88988_20140218_203032_outLine +BABEL_OP2_303_91825_20131025_170933_inLine +BABEL_OP2_303_91825_20131025_170933_outLine +BABEL_OP2_303_91977_20131130_190309_inLine +BABEL_OP2_303_91977_20131130_190309_outLine +BABEL_OP2_303_92096_20131226_204359_inLine +BABEL_OP2_303_92096_20131226_204359_outLine +BABEL_OP2_303_92509_20131027_003447_inLine +BABEL_OP2_303_92509_20131027_003447_outLine +BABEL_OP2_303_99487_20131027_195100_inLine +BABEL_OP2_303_99487_20131027_195100_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/dev.list b/egs/babel/s5d/conf/lists/303-telugu/dev.list new file mode 100644 index 00000000000..2109ba73287 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/dev.list @@ -0,0 +1,126 @@ +BABEL_OP2_303_12220_20131108_232918_inLine +BABEL_OP2_303_12220_20131108_232918_outLine +BABEL_OP2_303_13040_20131115_232722_inLine +BABEL_OP2_303_13040_20131115_232722_outLine +BABEL_OP2_303_14158_20131204_203458_inLine +BABEL_OP2_303_14158_20131204_203458_outLine +BABEL_OP2_303_15466_20140204_191250_inLine +BABEL_OP2_303_15466_20140204_191250_outLine +BABEL_OP2_303_16056_20131030_201705_inLine +BABEL_OP2_303_16056_20131030_201705_outLine +BABEL_OP2_303_16475_20131109_024735_inLine +BABEL_OP2_303_16475_20131109_024735_outLine +BABEL_OP2_303_17280_20131105_033157_inLine +BABEL_OP2_303_17280_20131105_033157_outLine +BABEL_OP2_303_19703_20131114_213952_inLine +BABEL_OP2_303_19703_20131114_213952_outLine +BABEL_OP2_303_21029_20131112_180205_inLine +BABEL_OP2_303_21029_20131112_180205_outLine +BABEL_OP2_303_22965_20131114_213605_inLine +BABEL_OP2_303_22965_20131114_213605_outLine +BABEL_OP2_303_28585_20131204_042033_inLine +BABEL_OP2_303_28585_20131204_042033_outLine +BABEL_OP2_303_28600_20131201_001853_inLine +BABEL_OP2_303_28600_20131201_001853_outLine +BABEL_OP2_303_28945_20131111_210924_inLine +BABEL_OP2_303_28945_20131111_210924_outLine +BABEL_OP2_303_34197_20131105_003635_inLine +BABEL_OP2_303_34197_20131105_003635_outLine +BABEL_OP2_303_34336_20131114_162157_inLine +BABEL_OP2_303_34336_20131114_162157_outLine +BABEL_OP2_303_36059_20131218_034050_inLine +BABEL_OP2_303_36059_20131218_034050_outLine +BABEL_OP2_303_37499_20140129_194730_inLine +BABEL_OP2_303_37499_20140129_194730_outLine +BABEL_OP2_303_37499_20140130_010436_inLine +BABEL_OP2_303_37499_20140130_010436_outLine +BABEL_OP2_303_38554_20131024_205502_inLine +BABEL_OP2_303_38554_20131024_205502_outLine +BABEL_OP2_303_39848_20131113_195552_inLine +BABEL_OP2_303_39848_20131113_195552_outLine +BABEL_OP2_303_40713_20131111_182733_inLine +BABEL_OP2_303_40713_20131111_182733_outLine +BABEL_OP2_303_40740_20131205_003945_inLine +BABEL_OP2_303_40740_20131205_003945_outLine +BABEL_OP2_303_41272_20140204_204727_inLine +BABEL_OP2_303_41272_20140204_204727_outLine +BABEL_OP2_303_41400_20140222_205655_inLine +BABEL_OP2_303_41400_20140222_205655_outLine +BABEL_OP2_303_43794_20140131_221611_inLine +BABEL_OP2_303_43794_20140131_221611_outLine +BABEL_OP2_303_45560_20131029_184514_inLine +BABEL_OP2_303_45560_20131029_184514_outLine +BABEL_OP2_303_46333_20131102_160049_inLine +BABEL_OP2_303_46333_20131102_160049_outLine +BABEL_OP2_303_46702_20131023_225137_inLine +BABEL_OP2_303_46702_20131023_225137_outLine +BABEL_OP2_303_49287_20131115_193114_inLine +BABEL_OP2_303_49287_20131115_193114_outLine +BABEL_OP2_303_49306_20140204_203901_inLine +BABEL_OP2_303_49306_20140204_203901_outLine +BABEL_OP2_303_51858_20140219_183931_inLine +BABEL_OP2_303_51858_20140219_183931_outLine +BABEL_OP2_303_52854_20131105_013802_inLine +BABEL_OP2_303_52854_20131105_013802_outLine +BABEL_OP2_303_55818_20131027_191439_inLine +BABEL_OP2_303_55818_20131027_191439_outLine +BABEL_OP2_303_56684_20131205_182944_inLine +BABEL_OP2_303_56684_20131205_182944_outLine +BABEL_OP2_303_56720_20131122_215343_inLine +BABEL_OP2_303_56720_20131122_215343_outLine +BABEL_OP2_303_58734_20131109_181122_inLine +BABEL_OP2_303_58734_20131109_181122_outLine +BABEL_OP2_303_60474_20131113_232723_inLine +BABEL_OP2_303_60474_20131113_232723_outLine +BABEL_OP2_303_61167_20131104_210455_inLine +BABEL_OP2_303_61167_20131104_210455_outLine +BABEL_OP2_303_62289_20140222_212804_inLine +BABEL_OP2_303_62289_20140222_212804_outLine +BABEL_OP2_303_64759_20131104_194712_inLine +BABEL_OP2_303_64759_20131104_194712_outLine +BABEL_OP2_303_64759_20131104_195356_inLine +BABEL_OP2_303_64759_20131104_195356_outLine +BABEL_OP2_303_65370_20140222_225324_inLine +BABEL_OP2_303_65370_20140222_225324_outLine +BABEL_OP2_303_69574_20131027_004044_inLine +BABEL_OP2_303_69574_20131027_004044_outLine +BABEL_OP2_303_70110_20131025_151421_inLine +BABEL_OP2_303_70110_20131025_151421_outLine +BABEL_OP2_303_73119_20131115_162847_inLine +BABEL_OP2_303_73119_20131115_162847_outLine +BABEL_OP2_303_73119_20131115_164236_inLine +BABEL_OP2_303_73119_20131115_164236_outLine +BABEL_OP2_303_73446_20140111_183215_inLine +BABEL_OP2_303_73446_20140111_183215_outLine +BABEL_OP2_303_74280_20131025_160420_inLine +BABEL_OP2_303_74280_20131025_160420_outLine +BABEL_OP2_303_75064_20131114_174949_inLine +BABEL_OP2_303_75064_20131114_174949_outLine +BABEL_OP2_303_77112_20131114_020655_inLine +BABEL_OP2_303_77112_20131114_020655_outLine +BABEL_OP2_303_82089_20131111_003358_inLine +BABEL_OP2_303_82089_20131111_003358_outLine +BABEL_OP2_303_83455_20131115_205335_inLine +BABEL_OP2_303_83455_20131115_205335_outLine +BABEL_OP2_303_84709_20140205_175937_inLine +BABEL_OP2_303_84709_20140205_175937_outLine +BABEL_OP2_303_86472_20131204_195705_inLine +BABEL_OP2_303_86472_20131204_195705_outLine +BABEL_OP2_303_86557_20131025_175510_inLine +BABEL_OP2_303_86557_20131025_175510_outLine +BABEL_OP2_303_87073_20131027_001213_inLine +BABEL_OP2_303_87073_20131027_001213_outLine +BABEL_OP2_303_87629_20131114_030529_inLine +BABEL_OP2_303_87629_20131114_030529_outLine +BABEL_OP2_303_88988_20140218_203032_inLine +BABEL_OP2_303_88988_20140218_203032_outLine +BABEL_OP2_303_91825_20131025_170933_inLine +BABEL_OP2_303_91825_20131025_170933_outLine +BABEL_OP2_303_91977_20131130_190309_inLine +BABEL_OP2_303_91977_20131130_190309_outLine +BABEL_OP2_303_92096_20131226_204359_inLine +BABEL_OP2_303_92096_20131226_204359_outLine +BABEL_OP2_303_92509_20131027_003447_inLine +BABEL_OP2_303_92509_20131027_003447_outLine +BABEL_OP2_303_99487_20131027_195100_inLine +BABEL_OP2_303_99487_20131027_195100_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/eval.list b/egs/babel/s5d/conf/lists/303-telugu/eval.list new file mode 100644 index 00000000000..e40856e3e9d --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/eval.list @@ -0,0 +1,192 @@ +BABEL_OP2_303_10416_20131110_200711_inLine +BABEL_OP2_303_10416_20131110_200711_outLine +BABEL_OP2_303_10416_20131110_202619_inLine +BABEL_OP2_303_10416_20131110_202619_outLine +BABEL_OP2_303_10974_20131115_172420_inLine +BABEL_OP2_303_10974_20131115_172420_outLine +BABEL_OP2_303_10974_20131115_193956_inLine +BABEL_OP2_303_10974_20131115_193956_outLine +BABEL_OP2_303_11096_20140214_163324_inLine +BABEL_OP2_303_11096_20140214_163324_outLine +BABEL_OP2_303_12321_20131129_164832_inLine +BABEL_OP2_303_12321_20131129_164832_outLine +BABEL_OP2_303_12635_20131211_184855_inLine +BABEL_OP2_303_12635_20131211_184855_outLine +BABEL_OP2_303_12916_20131029_201419_inLine +BABEL_OP2_303_12916_20131029_201419_outLine +BABEL_OP2_303_14729_20131215_013912_inLine +BABEL_OP2_303_14729_20131215_013912_outLine +BABEL_OP2_303_17115_20140211_231649_inLine +BABEL_OP2_303_17115_20140211_231649_outLine +BABEL_OP2_303_17165_20131113_202255_inLine +BABEL_OP2_303_17165_20131113_202255_outLine +BABEL_OP2_303_19120_20131224_010850_inLine +BABEL_OP2_303_19120_20131224_010850_outLine +BABEL_OP2_303_23151_20131206_220005_inLine +BABEL_OP2_303_23151_20131206_220005_outLine +BABEL_OP2_303_23983_20140201_224449_inLine +BABEL_OP2_303_23983_20140201_224449_outLine +BABEL_OP2_303_24033_20131205_013346_inLine +BABEL_OP2_303_24033_20131205_013346_outLine +BABEL_OP2_303_26206_20131116_212034_inLine +BABEL_OP2_303_26206_20131116_212034_outLine +BABEL_OP2_303_27218_20131101_202112_inLine +BABEL_OP2_303_27218_20131101_202112_outLine +BABEL_OP2_303_28422_20131130_210214_inLine +BABEL_OP2_303_28422_20131130_210214_outLine +BABEL_OP2_303_29168_20131105_002039_inLine +BABEL_OP2_303_29168_20131105_002039_outLine +BABEL_OP2_303_31668_20140204_210838_inLine +BABEL_OP2_303_31668_20140204_210838_outLine +BABEL_OP2_303_31992_20131107_183659_inLine +BABEL_OP2_303_31992_20131107_183659_outLine +BABEL_OP2_303_32872_20140127_213914_inLine +BABEL_OP2_303_32872_20140127_213914_outLine +BABEL_OP2_303_32961_20140218_193151_inLine +BABEL_OP2_303_32961_20140218_193151_outLine +BABEL_OP2_303_33635_20131109_185729_inLine +BABEL_OP2_303_33635_20131109_185729_outLine +BABEL_OP2_303_34019_20140219_191126_inLine +BABEL_OP2_303_34019_20140219_191126_outLine +BABEL_OP2_303_34019_20140219_192321_inLine +BABEL_OP2_303_34019_20140219_192321_outLine +BABEL_OP2_303_34688_20131031_000954_inLine +BABEL_OP2_303_34688_20131031_000954_outLine +BABEL_OP2_303_37853_20131230_224659_inLine +BABEL_OP2_303_37853_20131230_224659_outLine +BABEL_OP2_303_39159_20131024_202413_inLine +BABEL_OP2_303_39159_20131024_202413_outLine +BABEL_OP2_303_42600_20131114_231539_inLine +BABEL_OP2_303_42600_20131114_231539_outLine +BABEL_OP2_303_43990_20140204_202831_inLine +BABEL_OP2_303_43990_20140204_202831_outLine +BABEL_OP2_303_44290_20140204_193649_inLine +BABEL_OP2_303_44290_20140204_193649_outLine +BABEL_OP2_303_45642_20131114_201049_inLine +BABEL_OP2_303_45642_20131114_201049_outLine +BABEL_OP2_303_45642_20131114_203559_inLine +BABEL_OP2_303_45642_20131114_203559_outLine +BABEL_OP2_303_45770_20131029_180305_inLine +BABEL_OP2_303_45770_20131029_180305_outLine +BABEL_OP2_303_45908_20140211_224100_inLine +BABEL_OP2_303_45908_20140211_224100_outLine +BABEL_OP2_303_46974_20131116_205026_inLine +BABEL_OP2_303_46974_20131116_205026_outLine +BABEL_OP2_303_47959_20131113_020835_inLine +BABEL_OP2_303_47959_20131113_020835_outLine +BABEL_OP2_303_48610_20131031_175448_inLine +BABEL_OP2_303_48610_20131031_175448_outLine +BABEL_OP2_303_49775_20131029_201844_inLine +BABEL_OP2_303_49775_20131029_201844_outLine +BABEL_OP2_303_49812_20131208_222038_inLine +BABEL_OP2_303_49812_20131208_222038_outLine +BABEL_OP2_303_51530_20140203_190540_inLine +BABEL_OP2_303_51530_20140203_190540_outLine +BABEL_OP2_303_52025_20131025_023135_inLine +BABEL_OP2_303_52025_20131025_023135_outLine +BABEL_OP2_303_52422_20131205_220934_inLine +BABEL_OP2_303_52422_20131205_220934_outLine +BABEL_OP2_303_52442_20131115_192454_inLine +BABEL_OP2_303_52442_20131115_192454_outLine +BABEL_OP2_303_52614_20140204_214212_inLine +BABEL_OP2_303_52614_20140204_214212_outLine +BABEL_OP2_303_53072_20140116_175409_inLine +BABEL_OP2_303_53072_20140116_175409_outLine +BABEL_OP2_303_56090_20131108_182022_inLine +BABEL_OP2_303_56090_20131108_182022_outLine +BABEL_OP2_303_57678_20131112_230248_inLine +BABEL_OP2_303_57678_20131112_230248_outLine +BABEL_OP2_303_58061_20140219_230114_inLine +BABEL_OP2_303_58061_20140219_230114_outLine +BABEL_OP2_303_59898_20131101_004202_inLine +BABEL_OP2_303_59898_20131101_004202_outLine +BABEL_OP2_303_59928_20131113_223724_inLine +BABEL_OP2_303_59928_20131113_223724_outLine +BABEL_OP2_303_59928_20131113_225824_inLine +BABEL_OP2_303_59928_20131113_225824_outLine +BABEL_OP2_303_60026_20131107_170611_inLine +BABEL_OP2_303_60026_20131107_170611_outLine +BABEL_OP2_303_60626_20131111_190013_inLine +BABEL_OP2_303_60626_20131111_190013_outLine +BABEL_OP2_303_62852_20131105_205005_inLine +BABEL_OP2_303_62852_20131105_205005_outLine +BABEL_OP2_303_63481_20131028_222923_inLine +BABEL_OP2_303_63481_20131028_222923_outLine +BABEL_OP2_303_63523_20140211_213504_inLine +BABEL_OP2_303_63523_20140211_213504_outLine +BABEL_OP2_303_64638_20131202_192509_inLine +BABEL_OP2_303_64638_20131202_192509_outLine +BABEL_OP2_303_64796_20131114_235122_inLine +BABEL_OP2_303_64796_20131114_235122_outLine +BABEL_OP2_303_65640_20140203_210724_inLine +BABEL_OP2_303_65640_20140203_210724_outLine +BABEL_OP2_303_66026_20131201_225144_inLine +BABEL_OP2_303_66026_20131201_225144_outLine +BABEL_OP2_303_66837_20131116_170219_inLine +BABEL_OP2_303_66837_20131116_170219_outLine +BABEL_OP2_303_66959_20131201_000211_inLine +BABEL_OP2_303_66959_20131201_000211_outLine +BABEL_OP2_303_66967_20131026_202801_inLine +BABEL_OP2_303_66967_20131026_202801_outLine +BABEL_OP2_303_67373_20131115_001228_inLine +BABEL_OP2_303_67373_20131115_001228_outLine +BABEL_OP2_303_67389_20140205_200604_inLine +BABEL_OP2_303_67389_20140205_200604_outLine +BABEL_OP2_303_67389_20140205_201314_inLine +BABEL_OP2_303_67389_20140205_201314_outLine +BABEL_OP2_303_70282_20131115_224940_inLine +BABEL_OP2_303_70282_20131115_224940_outLine +BABEL_OP2_303_73301_20131113_213007_inLine +BABEL_OP2_303_73301_20131113_213007_outLine +BABEL_OP2_303_74253_20131118_232619_inLine +BABEL_OP2_303_74253_20131118_232619_outLine +BABEL_OP2_303_75359_20140222_204832_inLine +BABEL_OP2_303_75359_20140222_204832_outLine +BABEL_OP2_303_77567_20131107_170005_inLine +BABEL_OP2_303_77567_20131107_170005_outLine +BABEL_OP2_303_79139_20131113_181752_inLine +BABEL_OP2_303_79139_20131113_181752_outLine +BABEL_OP2_303_79858_20131109_210103_inLine +BABEL_OP2_303_79858_20131109_210103_outLine +BABEL_OP2_303_81229_20131115_205519_inLine +BABEL_OP2_303_81229_20131115_205519_outLine +BABEL_OP2_303_81392_20131118_201348_inLine +BABEL_OP2_303_81392_20131118_201348_outLine +BABEL_OP2_303_81404_20131105_042501_inLine +BABEL_OP2_303_81404_20131105_042501_outLine +BABEL_OP2_303_83436_20131027_190144_inLine +BABEL_OP2_303_83436_20131027_190144_outLine +BABEL_OP2_303_84055_20131215_032429_inLine +BABEL_OP2_303_84055_20131215_032429_outLine +BABEL_OP2_303_84583_20131114_154624_inLine +BABEL_OP2_303_84583_20131114_154624_outLine +BABEL_OP2_303_87545_20140203_185743_inLine +BABEL_OP2_303_87545_20140203_185743_outLine +BABEL_OP2_303_87921_20131204_182122_inLine +BABEL_OP2_303_87921_20131204_182122_outLine +BABEL_OP2_303_89330_20140219_012432_inLine +BABEL_OP2_303_89330_20140219_012432_outLine +BABEL_OP2_303_93224_20131114_192358_inLine +BABEL_OP2_303_93224_20131114_192358_outLine +BABEL_OP2_303_94587_20131120_180235_inLine +BABEL_OP2_303_94587_20131120_180235_outLine +BABEL_OP2_303_95294_20131204_200315_inLine +BABEL_OP2_303_95294_20131204_200315_outLine +BABEL_OP2_303_95571_20140219_211426_inLine +BABEL_OP2_303_95571_20140219_211426_outLine +BABEL_OP2_303_96405_20131113_205241_inLine +BABEL_OP2_303_96405_20131113_205241_outLine +BABEL_OP2_303_96504_20131113_192045_inLine +BABEL_OP2_303_96504_20131113_192045_outLine +BABEL_OP2_303_96934_20131115_014431_inLine +BABEL_OP2_303_96934_20131115_014431_outLine +BABEL_OP2_303_96985_20131030_204329_inLine +BABEL_OP2_303_96985_20131030_204329_outLine +BABEL_OP2_303_97570_20131115_235518_inLine +BABEL_OP2_303_97570_20131115_235518_outLine +BABEL_OP2_303_97849_20140203_203326_inLine +BABEL_OP2_303_97849_20140203_203326_outLine +BABEL_OP2_303_99516_20131026_193835_inLine +BABEL_OP2_303_99516_20131026_193835_outLine +BABEL_OP2_303_99718_20131031_171828_inLine +BABEL_OP2_303_99718_20131031_171828_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/evalpart1.list b/egs/babel/s5d/conf/lists/303-telugu/evalpart1.list new file mode 100644 index 00000000000..528cd0840d9 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/evalpart1.list @@ -0,0 +1,62 @@ +BABEL_OP2_303_11096_20140214_163324_inLine +BABEL_OP2_303_11096_20140214_163324_outLine +BABEL_OP2_303_14729_20131215_013912_inLine +BABEL_OP2_303_14729_20131215_013912_outLine +BABEL_OP2_303_17165_20131113_202255_inLine +BABEL_OP2_303_17165_20131113_202255_outLine +BABEL_OP2_303_23983_20140201_224449_inLine +BABEL_OP2_303_23983_20140201_224449_outLine +BABEL_OP2_303_24033_20131205_013346_inLine +BABEL_OP2_303_24033_20131205_013346_outLine +BABEL_OP2_303_29168_20131105_002039_inLine +BABEL_OP2_303_29168_20131105_002039_outLine +BABEL_OP2_303_32872_20140127_213914_inLine +BABEL_OP2_303_32872_20140127_213914_outLine +BABEL_OP2_303_33635_20131109_185729_inLine +BABEL_OP2_303_33635_20131109_185729_outLine +BABEL_OP2_303_34019_20140219_191126_inLine +BABEL_OP2_303_34019_20140219_191126_outLine +BABEL_OP2_303_34019_20140219_192321_inLine +BABEL_OP2_303_34019_20140219_192321_outLine +BABEL_OP2_303_44290_20140204_193649_inLine +BABEL_OP2_303_44290_20140204_193649_outLine +BABEL_OP2_303_47959_20131113_020835_inLine +BABEL_OP2_303_47959_20131113_020835_outLine +BABEL_OP2_303_49775_20131029_201844_inLine +BABEL_OP2_303_49775_20131029_201844_outLine +BABEL_OP2_303_52442_20131115_192454_inLine +BABEL_OP2_303_52442_20131115_192454_outLine +BABEL_OP2_303_56090_20131108_182022_inLine +BABEL_OP2_303_56090_20131108_182022_outLine +BABEL_OP2_303_60626_20131111_190013_inLine +BABEL_OP2_303_60626_20131111_190013_outLine +BABEL_OP2_303_63481_20131028_222923_inLine +BABEL_OP2_303_63481_20131028_222923_outLine +BABEL_OP2_303_63523_20140211_213504_inLine +BABEL_OP2_303_63523_20140211_213504_outLine +BABEL_OP2_303_66959_20131201_000211_inLine +BABEL_OP2_303_66959_20131201_000211_outLine +BABEL_OP2_303_66967_20131026_202801_inLine +BABEL_OP2_303_66967_20131026_202801_outLine +BABEL_OP2_303_74253_20131118_232619_inLine +BABEL_OP2_303_74253_20131118_232619_outLine +BABEL_OP2_303_75359_20140222_204832_inLine +BABEL_OP2_303_75359_20140222_204832_outLine +BABEL_OP2_303_77567_20131107_170005_inLine +BABEL_OP2_303_77567_20131107_170005_outLine +BABEL_OP2_303_79858_20131109_210103_inLine +BABEL_OP2_303_79858_20131109_210103_outLine +BABEL_OP2_303_81229_20131115_205519_inLine +BABEL_OP2_303_81229_20131115_205519_outLine +BABEL_OP2_303_84583_20131114_154624_inLine +BABEL_OP2_303_84583_20131114_154624_outLine +BABEL_OP2_303_89330_20140219_012432_inLine +BABEL_OP2_303_89330_20140219_012432_outLine +BABEL_OP2_303_95294_20131204_200315_inLine +BABEL_OP2_303_95294_20131204_200315_outLine +BABEL_OP2_303_95571_20140219_211426_inLine +BABEL_OP2_303_95571_20140219_211426_outLine +BABEL_OP2_303_96934_20131115_014431_inLine +BABEL_OP2_303_96934_20131115_014431_outLine +BABEL_OP2_303_97570_20131115_235518_inLine +BABEL_OP2_303_97570_20131115_235518_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/sub-train.list b/egs/babel/s5d/conf/lists/303-telugu/sub-train.list new file mode 100644 index 00000000000..3694701cd97 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/sub-train.list @@ -0,0 +1,134 @@ +BABEL_OP2_303_10188_20131108_175933_inLine +BABEL_OP2_303_10188_20131108_175933_outLine +BABEL_OP2_303_11673_20131026_034803_inLine +BABEL_OP2_303_11673_20131026_034803_outLine +BABEL_OP2_303_13030_20131109_023950_inLine +BABEL_OP2_303_13030_20131109_023950_outLine +BABEL_OP2_303_14875_20131112_211504_inLine +BABEL_OP2_303_14875_20131112_211504_outLine +BABEL_OP2_303_14929_20131112_164303_inLine +BABEL_OP2_303_14929_20131112_164303_outLine +BABEL_OP2_303_14929_20131112_165202_inLine +BABEL_OP2_303_14929_20131112_165202_outLine +BABEL_OP2_303_14929_20131112_171242_inLine +BABEL_OP2_303_14929_20131112_171242_outLine +BABEL_OP2_303_17127_20131224_002728_inLine +BABEL_OP2_303_17127_20131224_002728_outLine +BABEL_OP2_303_18380_20131111_015535_inLine +BABEL_OP2_303_18380_20131111_015535_outLine +BABEL_OP2_303_18380_20131119_224151_inLine +BABEL_OP2_303_18380_20131119_224151_outLine +BABEL_OP2_303_21435_20131226_175809_inLine +BABEL_OP2_303_21435_20131226_175809_outLine +BABEL_OP2_303_21435_20131226_181138_inLine +BABEL_OP2_303_21435_20131226_181138_outLine +BABEL_OP2_303_23681_20140119_223006_inLine +BABEL_OP2_303_23681_20140119_223006_outLine +BABEL_OP2_303_24231_20140201_230638_inLine +BABEL_OP2_303_24231_20140201_230638_outLine +BABEL_OP2_303_24589_20131114_182843_inLine +BABEL_OP2_303_24589_20131114_182843_outLine +BABEL_OP2_303_25767_20131028_161454_inLine +BABEL_OP2_303_25767_20131028_161454_outLine +BABEL_OP2_303_25961_20131030_225755_inLine +BABEL_OP2_303_25961_20131030_225755_outLine +BABEL_OP2_303_31490_20131105_010342_inLine +BABEL_OP2_303_31490_20131105_010342_outLine +BABEL_OP2_303_31490_20131105_011345_inLine +BABEL_OP2_303_31490_20131105_011345_outLine +BABEL_OP2_303_32861_20131216_223500_inLine +BABEL_OP2_303_32861_20131216_223500_outLine +BABEL_OP2_303_33704_20131210_195453_inLine +BABEL_OP2_303_33704_20131210_195453_outLine +BABEL_OP2_303_35069_20131205_165127_inLine +BABEL_OP2_303_35069_20131205_165127_outLine +BABEL_OP2_303_36341_20131024_221132_inLine +BABEL_OP2_303_36341_20131024_221132_outLine +BABEL_OP2_303_36669_20131110_155909_inLine +BABEL_OP2_303_36669_20131110_155909_outLine +BABEL_OP2_303_37682_20131105_023703_inLine +BABEL_OP2_303_37682_20131105_023703_outLine +BABEL_OP2_303_39307_20131027_043600_inLine +BABEL_OP2_303_39307_20131027_043600_outLine +BABEL_OP2_303_40565_20131116_182747_inLine +BABEL_OP2_303_40565_20131116_182747_outLine +BABEL_OP2_303_41493_20131027_155001_inLine +BABEL_OP2_303_41493_20131027_155001_outLine +BABEL_OP2_303_42718_20140118_201247_inLine +BABEL_OP2_303_42718_20140118_201247_outLine +BABEL_OP2_303_43115_20140201_195115_inLine +BABEL_OP2_303_43115_20140201_195115_outLine +BABEL_OP2_303_43789_20131111_163502_inLine +BABEL_OP2_303_43789_20131111_163502_outLine +BABEL_OP2_303_46550_20131111_233520_inLine +BABEL_OP2_303_46550_20131111_233520_outLine +BABEL_OP2_303_46558_20131028_190003_inLine +BABEL_OP2_303_46558_20131028_190003_outLine +BABEL_OP2_303_47823_20131201_004209_inLine +BABEL_OP2_303_47823_20131201_004209_outLine +BABEL_OP2_303_50726_20131028_210641_inLine +BABEL_OP2_303_50726_20131028_210641_outLine +BABEL_OP2_303_51540_20131204_041920_inLine +BABEL_OP2_303_51540_20131204_041920_outLine +BABEL_OP2_303_60538_20131111_200459_inLine +BABEL_OP2_303_60538_20131111_200459_outLine +BABEL_OP2_303_63084_20131115_202655_inLine +BABEL_OP2_303_63084_20131115_202655_outLine +BABEL_OP2_303_64768_20131113_203120_inLine +BABEL_OP2_303_64768_20131113_203120_outLine +BABEL_OP2_303_65077_20131024_174953_inLine +BABEL_OP2_303_65077_20131024_174953_outLine +BABEL_OP2_303_67964_20140222_211658_inLine +BABEL_OP2_303_67964_20140222_211658_outLine +BABEL_OP2_303_69107_20131113_222827_inLine +BABEL_OP2_303_69107_20131113_222827_outLine +BABEL_OP2_303_69633_20131130_193122_inLine +BABEL_OP2_303_69633_20131130_193122_outLine +BABEL_OP2_303_72587_20131115_221128_inLine +BABEL_OP2_303_72587_20131115_221128_outLine +BABEL_OP2_303_73990_20140219_201105_inLine +BABEL_OP2_303_73990_20140219_201105_outLine +BABEL_OP2_303_73990_20140219_202300_inLine +BABEL_OP2_303_73990_20140219_202300_outLine +BABEL_OP2_303_74886_20131101_194728_inLine +BABEL_OP2_303_74886_20131101_194728_outLine +BABEL_OP2_303_75365_20140218_173521_inLine +BABEL_OP2_303_75365_20140218_173521_outLine +BABEL_OP2_303_76756_20131115_182926_inLine +BABEL_OP2_303_76756_20131115_182926_outLine +BABEL_OP2_303_78454_20131114_230026_inLine +BABEL_OP2_303_78454_20131114_230026_outLine +BABEL_OP2_303_79820_20131114_181827_inLine +BABEL_OP2_303_79820_20131114_181827_outLine +BABEL_OP2_303_80881_20131027_165716_inLine +BABEL_OP2_303_80881_20131027_165716_outLine +BABEL_OP2_303_81424_20131120_192659_inLine +BABEL_OP2_303_81424_20131120_192659_outLine +BABEL_OP2_303_83935_20131122_222948_inLine +BABEL_OP2_303_83935_20131122_222948_outLine +BABEL_OP2_303_84061_20131104_224830_inLine +BABEL_OP2_303_84061_20131104_224830_outLine +BABEL_OP2_303_84327_20131122_203936_inLine +BABEL_OP2_303_84327_20131122_203936_outLine +BABEL_OP2_303_85248_20131206_184028_inLine +BABEL_OP2_303_85248_20131206_184028_outLine +BABEL_OP2_303_86952_20131105_173230_inLine +BABEL_OP2_303_86952_20131105_173230_outLine +BABEL_OP2_303_87884_20131206_022424_inLine +BABEL_OP2_303_87884_20131206_022424_outLine +BABEL_OP2_303_87889_20131213_215703_inLine +BABEL_OP2_303_87889_20131213_215703_outLine +BABEL_OP2_303_88982_20131115_181618_inLine +BABEL_OP2_303_88982_20131115_181618_outLine +BABEL_OP2_303_90080_20131228_233334_inLine +BABEL_OP2_303_90080_20131228_233334_outLine +BABEL_OP2_303_90740_20140221_220031_inLine +BABEL_OP2_303_90740_20140221_220031_outLine +BABEL_OP2_303_92176_20131115_153306_inLine +BABEL_OP2_303_92176_20131115_153306_outLine +BABEL_OP2_303_96324_20131107_162546_inLine +BABEL_OP2_303_96324_20131107_162546_outLine +BABEL_OP2_303_97988_20131204_195626_inLine +BABEL_OP2_303_97988_20131204_195626_outLine +BABEL_OP2_303_97988_20131204_211137_inLine +BABEL_OP2_303_97988_20131204_211137_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/303-telugu/sub-train.untranscribed.list new file mode 100644 index 00000000000..7d4ce3b8a3d --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP2_303_10058_20140205_001109_inLine +BABEL_OP2_303_10058_20140205_001109_outLine +BABEL_OP2_303_10638_20140218_213711_inLine +BABEL_OP2_303_10638_20140218_213711_outLine +BABEL_OP2_303_10938_20131104_204555_inLine +BABEL_OP2_303_10938_20131104_204555_outLine +BABEL_OP2_303_11352_20131224_005439_inLine +BABEL_OP2_303_11352_20131224_005439_outLine +BABEL_OP2_303_12036_20131101_174653_inLine +BABEL_OP2_303_12036_20131101_174653_outLine +BABEL_OP2_303_12242_20131113_222307_inLine +BABEL_OP2_303_12242_20131113_222307_outLine +BABEL_OP2_303_13324_20131107_211806_inLine +BABEL_OP2_303_13324_20131107_211806_outLine +BABEL_OP2_303_13586_20131115_180921_inLine +BABEL_OP2_303_13586_20131115_180921_outLine +BABEL_OP2_303_13664_20131108_184651_inLine +BABEL_OP2_303_13664_20131108_184651_outLine +BABEL_OP2_303_13744_20131026_234054_inLine +BABEL_OP2_303_13744_20131026_234054_outLine +BABEL_OP2_303_14229_20131114_032214_inLine +BABEL_OP2_303_14229_20131114_032214_outLine +BABEL_OP2_303_14350_20131105_195640_inLine +BABEL_OP2_303_14350_20131105_195640_outLine +BABEL_OP2_303_14899_20131102_204324_inLine +BABEL_OP2_303_14899_20131102_204324_outLine +BABEL_OP2_303_14972_20131114_023627_inLine +BABEL_OP2_303_14972_20131114_023627_outLine +BABEL_OP2_303_15702_20131206_225729_inLine +BABEL_OP2_303_15702_20131206_225729_outLine +BABEL_OP2_303_15730_20131101_163118_inLine +BABEL_OP2_303_15730_20131101_163118_outLine +BABEL_OP2_303_16184_20131204_033225_inLine +BABEL_OP2_303_16184_20131204_033225_outLine +BABEL_OP2_303_16839_20131223_215734_inLine +BABEL_OP2_303_16839_20131223_215734_outLine +BABEL_OP2_303_16886_20131108_204525_inLine +BABEL_OP2_303_16886_20131108_204525_outLine +BABEL_OP2_303_16938_20131112_015544_inLine +BABEL_OP2_303_16938_20131112_015544_outLine +BABEL_OP2_303_17520_20131114_164811_inLine +BABEL_OP2_303_17520_20131114_164811_outLine +BABEL_OP2_303_18242_20140218_014910_inLine +BABEL_OP2_303_18242_20140218_014910_outLine +BABEL_OP2_303_18924_20131112_001935_inLine +BABEL_OP2_303_18924_20131112_001935_outLine +BABEL_OP2_303_20437_20140202_232910_inLine +BABEL_OP2_303_20437_20140202_232910_outLine +BABEL_OP2_303_20437_20140202_234756_inLine +BABEL_OP2_303_20437_20140202_234756_outLine +BABEL_OP2_303_20985_20131122_183435_inLine +BABEL_OP2_303_20985_20131122_183435_outLine +BABEL_OP2_303_23006_20131113_150924_inLine +BABEL_OP2_303_23006_20131113_150924_outLine +BABEL_OP2_303_23046_20131114_171927_inLine +BABEL_OP2_303_23046_20131114_171927_outLine +BABEL_OP2_303_23239_20131206_181414_inLine +BABEL_OP2_303_23239_20131206_181414_outLine +BABEL_OP2_303_23260_20140203_194817_inLine +BABEL_OP2_303_23260_20140203_194817_outLine +BABEL_OP2_303_23505_20131109_184015_inLine +BABEL_OP2_303_23505_20131109_184015_outLine +BABEL_OP2_303_23980_20131114_202648_inLine +BABEL_OP2_303_23980_20131114_202648_outLine +BABEL_OP2_303_24010_20140218_224141_inLine +BABEL_OP2_303_24010_20140218_224141_outLine +BABEL_OP2_303_24323_20131201_180512_inLine +BABEL_OP2_303_24323_20131201_180512_outLine +BABEL_OP2_303_24470_20131204_174323_inLine +BABEL_OP2_303_24470_20131204_174323_outLine +BABEL_OP2_303_24982_20131114_012226_inLine +BABEL_OP2_303_24982_20131114_012226_outLine +BABEL_OP2_303_25198_20140121_180931_inLine +BABEL_OP2_303_25198_20140121_180931_outLine +BABEL_OP2_303_25719_20131205_191053_inLine +BABEL_OP2_303_25719_20131205_191053_outLine +BABEL_OP2_303_26072_20131216_221839_inLine +BABEL_OP2_303_26072_20131216_221839_outLine +BABEL_OP2_303_26388_20131113_034454_inLine +BABEL_OP2_303_26388_20131113_034454_outLine +BABEL_OP2_303_27125_20131024_195716_inLine +BABEL_OP2_303_27125_20131024_195716_outLine +BABEL_OP2_303_27590_20131118_222641_inLine +BABEL_OP2_303_27590_20131118_222641_outLine +BABEL_OP2_303_28419_20131113_195258_inLine +BABEL_OP2_303_28419_20131113_195258_outLine +BABEL_OP2_303_29404_20131224_014921_inLine +BABEL_OP2_303_29404_20131224_014921_outLine +BABEL_OP2_303_29482_20140219_221449_inLine +BABEL_OP2_303_29482_20140219_221449_outLine +BABEL_OP2_303_29685_20131105_180851_inLine +BABEL_OP2_303_29685_20131105_180851_outLine +BABEL_OP2_303_30013_20131116_185844_inLine +BABEL_OP2_303_30013_20131116_185844_outLine +BABEL_OP2_303_30345_20131224_005453_inLine +BABEL_OP2_303_30345_20131224_005453_outLine +BABEL_OP2_303_30395_20131112_004350_inLine +BABEL_OP2_303_30395_20131112_004350_outLine +BABEL_OP2_303_30645_20131029_193530_inLine +BABEL_OP2_303_30645_20131029_193530_outLine +BABEL_OP2_303_32048_20131204_223219_inLine +BABEL_OP2_303_32048_20131204_223219_outLine +BABEL_OP2_303_32171_20140203_203242_inLine +BABEL_OP2_303_32171_20140203_203242_outLine +BABEL_OP2_303_32301_20131120_212820_inLine +BABEL_OP2_303_32301_20131120_212820_outLine +BABEL_OP2_303_33229_20131206_220332_inLine +BABEL_OP2_303_33229_20131206_220332_outLine +BABEL_OP2_303_33424_20140129_211552_inLine +BABEL_OP2_303_33424_20140129_211552_outLine +BABEL_OP2_303_33672_20131029_201146_inLine +BABEL_OP2_303_33672_20131029_201146_outLine +BABEL_OP2_303_33913_20131116_003805_inLine +BABEL_OP2_303_33913_20131116_003805_outLine +BABEL_OP2_303_34106_20131027_203150_inLine +BABEL_OP2_303_34106_20131027_203150_outLine +BABEL_OP2_303_34811_20131115_235931_inLine +BABEL_OP2_303_34811_20131115_235931_outLine +BABEL_OP2_303_35000_20131210_184313_inLine +BABEL_OP2_303_35000_20131210_184313_outLine +BABEL_OP2_303_35008_20131120_185919_inLine +BABEL_OP2_303_35008_20131120_185919_outLine +BABEL_OP2_303_35143_20131206_023320_inLine +BABEL_OP2_303_35143_20131206_023320_outLine +BABEL_OP2_303_36594_20131215_014334_inLine +BABEL_OP2_303_36594_20131215_014334_outLine +BABEL_OP2_303_36594_20131215_022952_inLine +BABEL_OP2_303_36594_20131215_022952_outLine +BABEL_OP2_303_37228_20131216_171725_inLine +BABEL_OP2_303_37228_20131216_171725_outLine +BABEL_OP2_303_41469_20131025_210607_inLine +BABEL_OP2_303_41469_20131025_210607_outLine +BABEL_OP2_303_41609_20131031_164009_inLine +BABEL_OP2_303_41609_20131031_164009_outLine +BABEL_OP2_303_41680_20131108_184050_inLine +BABEL_OP2_303_41680_20131108_184050_outLine +BABEL_OP2_303_41692_20140119_000215_inLine +BABEL_OP2_303_41692_20140119_000215_outLine +BABEL_OP2_303_41692_20140120_002447_inLine +BABEL_OP2_303_41692_20140120_002447_outLine +BABEL_OP2_303_42526_20131216_190003_inLine +BABEL_OP2_303_42526_20131216_190003_outLine +BABEL_OP2_303_43784_20131115_013454_inLine +BABEL_OP2_303_43784_20131115_013454_outLine +BABEL_OP2_303_43784_20131115_014528_inLine +BABEL_OP2_303_43784_20131115_014528_outLine +BABEL_OP2_303_43788_20131202_222520_inLine +BABEL_OP2_303_43788_20131202_222520_outLine +BABEL_OP2_303_43920_20131130_143746_inLine +BABEL_OP2_303_43920_20131130_143746_outLine +BABEL_OP2_303_45459_20140201_203718_inLine +BABEL_OP2_303_45459_20140201_203718_outLine +BABEL_OP2_303_46330_20131210_212701_inLine +BABEL_OP2_303_46330_20131210_212701_outLine +BABEL_OP2_303_46688_20131108_184839_inLine +BABEL_OP2_303_46688_20131108_184839_outLine +BABEL_OP2_303_46757_20131116_193234_inLine +BABEL_OP2_303_46757_20131116_193234_outLine +BABEL_OP2_303_47215_20131108_200333_inLine +BABEL_OP2_303_47215_20131108_200333_outLine +BABEL_OP2_303_47487_20131104_200239_inLine +BABEL_OP2_303_47487_20131104_200239_outLine +BABEL_OP2_303_47637_20140222_233717_inLine +BABEL_OP2_303_47637_20140222_233717_outLine +BABEL_OP2_303_47866_20131230_165319_inLine +BABEL_OP2_303_47866_20131230_165319_outLine +BABEL_OP2_303_47878_20131116_184454_inLine +BABEL_OP2_303_47878_20131116_184454_outLine +BABEL_OP2_303_48844_20131030_014630_inLine +BABEL_OP2_303_48844_20131030_014630_outLine +BABEL_OP2_303_49027_20140127_225946_inLine +BABEL_OP2_303_49027_20140127_225946_outLine +BABEL_OP2_303_49197_20131115_221049_inLine +BABEL_OP2_303_49197_20131115_221049_outLine +BABEL_OP2_303_49216_20131031_011232_inLine +BABEL_OP2_303_49216_20131031_011232_outLine +BABEL_OP2_303_49437_20131211_205647_inLine +BABEL_OP2_303_49437_20131211_205647_outLine +BABEL_OP2_303_50565_20131102_213418_inLine +BABEL_OP2_303_50565_20131102_213418_outLine +BABEL_OP2_303_50779_20131215_002945_inLine +BABEL_OP2_303_50779_20131215_002945_outLine +BABEL_OP2_303_51015_20131121_004617_inLine +BABEL_OP2_303_51015_20131121_004617_outLine +BABEL_OP2_303_51968_20131113_214616_inLine +BABEL_OP2_303_51968_20131113_214616_outLine +BABEL_OP2_303_51968_20131113_220135_inLine +BABEL_OP2_303_51968_20131113_220135_outLine +BABEL_OP2_303_52272_20131027_195752_inLine +BABEL_OP2_303_52272_20131027_195752_outLine +BABEL_OP2_303_52381_20131216_174822_inLine +BABEL_OP2_303_52381_20131216_174822_outLine +BABEL_OP2_303_52490_20131027_172351_inLine +BABEL_OP2_303_52490_20131027_172351_outLine +BABEL_OP2_303_52804_20131105_185205_inLine +BABEL_OP2_303_52804_20131105_185205_outLine +BABEL_OP2_303_53144_20131227_024859_inLine +BABEL_OP2_303_53144_20131227_024859_outLine +BABEL_OP2_303_53665_20140204_194114_inLine +BABEL_OP2_303_53665_20140204_194114_outLine +BABEL_OP2_303_54104_20131030_190134_inLine +BABEL_OP2_303_54104_20131030_190134_outLine +BABEL_OP2_303_54162_20131114_015157_inLine +BABEL_OP2_303_54162_20131114_015157_outLine +BABEL_OP2_303_54744_20131101_012632_inLine +BABEL_OP2_303_54744_20131101_012632_outLine +BABEL_OP2_303_55968_20131027_154130_inLine +BABEL_OP2_303_55968_20131027_154130_outLine +BABEL_OP2_303_57141_20131129_191059_inLine +BABEL_OP2_303_57141_20131129_191059_outLine +BABEL_OP2_303_57464_20140204_205308_inLine +BABEL_OP2_303_57464_20140204_205308_outLine +BABEL_OP2_303_57464_20140204_220733_inLine +BABEL_OP2_303_57464_20140204_220733_outLine +BABEL_OP2_303_57566_20131205_002558_inLine +BABEL_OP2_303_57566_20131205_002558_outLine +BABEL_OP2_303_57782_20140222_210824_inLine +BABEL_OP2_303_57782_20140222_210824_outLine +BABEL_OP2_303_58313_20131114_234055_inLine +BABEL_OP2_303_58313_20131114_234055_outLine +BABEL_OP2_303_58821_20131121_205344_inLine +BABEL_OP2_303_58821_20131121_205344_outLine +BABEL_OP2_303_59509_20131130_021844_inLine +BABEL_OP2_303_59509_20131130_021844_outLine +BABEL_OP2_303_59635_20131205_021406_inLine +BABEL_OP2_303_59635_20131205_021406_outLine +BABEL_OP2_303_62014_20131114_203925_inLine +BABEL_OP2_303_62014_20131114_203925_outLine +BABEL_OP2_303_62714_20131228_155020_inLine +BABEL_OP2_303_62714_20131228_155020_outLine +BABEL_OP2_303_62810_20131028_225346_inLine +BABEL_OP2_303_62810_20131028_225346_outLine +BABEL_OP2_303_63604_20131101_000901_inLine +BABEL_OP2_303_63604_20131101_000901_outLine +BABEL_OP2_303_63730_20140218_210748_inLine +BABEL_OP2_303_63730_20140218_210748_outLine +BABEL_OP2_303_64014_20131229_214739_inLine +BABEL_OP2_303_64014_20131229_214739_outLine +BABEL_OP2_303_64065_20131111_230551_inLine +BABEL_OP2_303_64065_20131111_230551_outLine +BABEL_OP2_303_65561_20131122_180110_inLine +BABEL_OP2_303_65561_20131122_180110_outLine +BABEL_OP2_303_66001_20131031_192905_inLine +BABEL_OP2_303_66001_20131031_192905_outLine +BABEL_OP2_303_66361_20140203_182323_inLine +BABEL_OP2_303_66361_20140203_182323_outLine +BABEL_OP2_303_67283_20131109_213605_inLine +BABEL_OP2_303_67283_20131109_213605_outLine +BABEL_OP2_303_67401_20131114_215749_inLine +BABEL_OP2_303_67401_20131114_215749_outLine +BABEL_OP2_303_67401_20131114_221127_inLine +BABEL_OP2_303_67401_20131114_221127_outLine +BABEL_OP2_303_68068_20131204_212345_inLine +BABEL_OP2_303_68068_20131204_212345_outLine +BABEL_OP2_303_69153_20131204_184008_inLine +BABEL_OP2_303_69153_20131204_184008_outLine +BABEL_OP2_303_69992_20131030_011814_inLine +BABEL_OP2_303_69992_20131030_011814_outLine +BABEL_OP2_303_70221_20131124_180244_inLine +BABEL_OP2_303_70221_20131124_180244_outLine +BABEL_OP2_303_70251_20131027_201724_inLine +BABEL_OP2_303_70251_20131027_201724_outLine +BABEL_OP2_303_70452_20131115_202651_inLine +BABEL_OP2_303_70452_20131115_202651_outLine +BABEL_OP2_303_71067_20131115_221146_inLine +BABEL_OP2_303_71067_20131115_221146_outLine +BABEL_OP2_303_71189_20131225_050235_inLine +BABEL_OP2_303_71189_20131225_050235_outLine +BABEL_OP2_303_72040_20131112_173033_inLine +BABEL_OP2_303_72040_20131112_173033_outLine +BABEL_OP2_303_72844_20131111_192144_inLine +BABEL_OP2_303_72844_20131111_192144_outLine +BABEL_OP2_303_73022_20131216_173848_inLine +BABEL_OP2_303_73022_20131216_173848_outLine +BABEL_OP2_303_73299_20140217_173212_inLine +BABEL_OP2_303_73299_20140217_173212_outLine +BABEL_OP2_303_73591_20131020_193026_inLine +BABEL_OP2_303_73591_20131020_193026_outLine +BABEL_OP2_303_75342_20131122_191140_inLine +BABEL_OP2_303_75342_20131122_191140_outLine +BABEL_OP2_303_75505_20131102_220904_inLine +BABEL_OP2_303_75505_20131102_220904_outLine +BABEL_OP2_303_76902_20140205_233041_inLine +BABEL_OP2_303_76902_20140205_233041_outLine +BABEL_OP2_303_77730_20131107_221840_inLine +BABEL_OP2_303_77730_20131107_221840_outLine +BABEL_OP2_303_77744_20131113_232408_inLine +BABEL_OP2_303_77744_20131113_232408_outLine +BABEL_OP2_303_78544_20131204_194704_inLine +BABEL_OP2_303_78544_20131204_194704_outLine +BABEL_OP2_303_78604_20131101_194153_inLine +BABEL_OP2_303_78604_20131101_194153_outLine +BABEL_OP2_303_78943_20131115_213626_inLine +BABEL_OP2_303_78943_20131115_213626_outLine +BABEL_OP2_303_79451_20131114_213026_inLine +BABEL_OP2_303_79451_20131114_213026_outLine +BABEL_OP2_303_79590_20131113_222157_inLine +BABEL_OP2_303_79590_20131113_222157_outLine +BABEL_OP2_303_79751_20131105_025908_inLine +BABEL_OP2_303_79751_20131105_025908_outLine +BABEL_OP2_303_80559_20131101_190006_inLine +BABEL_OP2_303_80559_20131101_190006_outLine +BABEL_OP2_303_80622_20131130_040503_inLine +BABEL_OP2_303_80622_20131130_040503_outLine +BABEL_OP2_303_81149_20140203_201343_inLine +BABEL_OP2_303_81149_20140203_201343_outLine +BABEL_OP2_303_81287_20131121_184328_inLine +BABEL_OP2_303_81287_20131121_184328_outLine +BABEL_OP2_303_81671_20131205_004357_inLine +BABEL_OP2_303_81671_20131205_004357_outLine +BABEL_OP2_303_82622_20131029_212941_inLine +BABEL_OP2_303_82622_20131029_212941_outLine +BABEL_OP2_303_82935_20131205_024033_inLine +BABEL_OP2_303_82935_20131205_024033_outLine +BABEL_OP2_303_82935_20131205_025919_inLine +BABEL_OP2_303_82935_20131205_025919_outLine +BABEL_OP2_303_83771_20140119_181859_inLine +BABEL_OP2_303_83771_20140119_181859_outLine +BABEL_OP2_303_84458_20131204_213157_inLine +BABEL_OP2_303_84458_20131204_213157_outLine +BABEL_OP2_303_84547_20131026_230544_inLine +BABEL_OP2_303_84547_20131026_230544_outLine +BABEL_OP2_303_84605_20131112_192034_inLine +BABEL_OP2_303_84605_20131112_192034_outLine +BABEL_OP2_303_84805_20131204_153317_inLine +BABEL_OP2_303_84805_20131204_153317_outLine +BABEL_OP2_303_84936_20131115_204004_inLine +BABEL_OP2_303_84936_20131115_204004_outLine +BABEL_OP2_303_85340_20131111_215301_inLine +BABEL_OP2_303_85340_20131111_215301_outLine +BABEL_OP2_303_86191_20131114_221742_inLine +BABEL_OP2_303_86191_20131114_221742_outLine +BABEL_OP2_303_86321_20131204_175915_inLine +BABEL_OP2_303_86321_20131204_175915_outLine +BABEL_OP2_303_86467_20131025_013235_inLine +BABEL_OP2_303_86467_20131025_013235_outLine +BABEL_OP2_303_86676_20131204_185429_inLine +BABEL_OP2_303_86676_20131204_185429_outLine +BABEL_OP2_303_86713_20131206_165123_inLine +BABEL_OP2_303_86713_20131206_165123_outLine +BABEL_OP2_303_86891_20140222_195106_inLine +BABEL_OP2_303_86891_20140222_195106_outLine +BABEL_OP2_303_87313_20131116_193233_inLine +BABEL_OP2_303_87313_20131116_193233_outLine +BABEL_OP2_303_88776_20131031_184652_inLine +BABEL_OP2_303_88776_20131031_184652_outLine +BABEL_OP2_303_91125_20131102_191721_inLine +BABEL_OP2_303_91125_20131102_191721_outLine +BABEL_OP2_303_91944_20131107_214314_inLine +BABEL_OP2_303_91944_20131107_214314_outLine +BABEL_OP2_303_92605_20140205_192703_inLine +BABEL_OP2_303_92605_20140205_192703_outLine +BABEL_OP2_303_92757_20140211_221207_inLine +BABEL_OP2_303_92757_20140211_221207_outLine +BABEL_OP2_303_92792_20131223_042728_inLine +BABEL_OP2_303_92792_20131223_042728_outLine +BABEL_OP2_303_94025_20131211_211933_inLine +BABEL_OP2_303_94025_20131211_211933_outLine +BABEL_OP2_303_94333_20131029_193545_inLine +BABEL_OP2_303_94333_20131029_193545_outLine +BABEL_OP2_303_94745_20131204_205747_inLine +BABEL_OP2_303_94745_20131204_205747_outLine +BABEL_OP2_303_94869_20131101_184934_inLine +BABEL_OP2_303_94869_20131101_184934_outLine +BABEL_OP2_303_96690_20131114_194453_inLine +BABEL_OP2_303_96690_20131114_194453_outLine +BABEL_OP2_303_97286_20140205_223354_inLine +BABEL_OP2_303_97286_20140205_223354_outLine +BABEL_OP2_303_97772_20131024_230426_inLine +BABEL_OP2_303_97772_20131024_230426_outLine +BABEL_OP2_303_98311_20131107_224445_inLine +BABEL_OP2_303_98311_20131107_224445_outLine +BABEL_OP2_303_98356_20131121_191712_inLine +BABEL_OP2_303_98356_20131121_191712_outLine +BABEL_OP2_303_98390_20131029_164425_inLine +BABEL_OP2_303_98390_20131029_164425_outLine +BABEL_OP2_303_99955_20131215_222330_inLine +BABEL_OP2_303_99955_20131215_222330_outLine +BABEL_OP2_303_99955_20131216_231047_inLine +BABEL_OP2_303_99955_20131216_231047_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/training.list b/egs/babel/s5d/conf/lists/303-telugu/training.list new file mode 100644 index 00000000000..fec579c4325 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/training.list @@ -0,0 +1,514 @@ +BABEL_OP2_303_10058_20140205_001109_inLine +BABEL_OP2_303_10058_20140205_001109_outLine +BABEL_OP2_303_10188_20131108_175933_inLine +BABEL_OP2_303_10188_20131108_175933_outLine +BABEL_OP2_303_10638_20140218_213711_inLine +BABEL_OP2_303_10638_20140218_213711_outLine +BABEL_OP2_303_10938_20131104_204555_inLine +BABEL_OP2_303_10938_20131104_204555_outLine +BABEL_OP2_303_11352_20131224_005439_inLine +BABEL_OP2_303_11352_20131224_005439_outLine +BABEL_OP2_303_11673_20131026_034803_inLine +BABEL_OP2_303_11673_20131026_034803_outLine +BABEL_OP2_303_12036_20131101_174653_inLine +BABEL_OP2_303_12036_20131101_174653_outLine +BABEL_OP2_303_12242_20131113_222307_inLine +BABEL_OP2_303_12242_20131113_222307_outLine +BABEL_OP2_303_13030_20131109_023950_inLine +BABEL_OP2_303_13030_20131109_023950_outLine +BABEL_OP2_303_13324_20131107_211806_inLine +BABEL_OP2_303_13324_20131107_211806_outLine +BABEL_OP2_303_13586_20131115_180921_inLine +BABEL_OP2_303_13586_20131115_180921_outLine +BABEL_OP2_303_13664_20131108_184651_inLine +BABEL_OP2_303_13664_20131108_184651_outLine +BABEL_OP2_303_13744_20131026_234054_inLine +BABEL_OP2_303_13744_20131026_234054_outLine +BABEL_OP2_303_14229_20131114_032214_inLine +BABEL_OP2_303_14229_20131114_032214_outLine +BABEL_OP2_303_14350_20131105_195640_inLine +BABEL_OP2_303_14350_20131105_195640_outLine +BABEL_OP2_303_14875_20131112_211504_inLine +BABEL_OP2_303_14875_20131112_211504_outLine +BABEL_OP2_303_14899_20131102_204324_inLine +BABEL_OP2_303_14899_20131102_204324_outLine +BABEL_OP2_303_14929_20131112_164303_inLine +BABEL_OP2_303_14929_20131112_164303_outLine +BABEL_OP2_303_14929_20131112_165202_inLine +BABEL_OP2_303_14929_20131112_165202_outLine +BABEL_OP2_303_14929_20131112_171242_inLine +BABEL_OP2_303_14929_20131112_171242_outLine +BABEL_OP2_303_14972_20131114_023627_inLine +BABEL_OP2_303_14972_20131114_023627_outLine +BABEL_OP2_303_15702_20131206_225729_inLine +BABEL_OP2_303_15702_20131206_225729_outLine +BABEL_OP2_303_15730_20131101_163118_inLine +BABEL_OP2_303_15730_20131101_163118_outLine +BABEL_OP2_303_16184_20131204_033225_inLine +BABEL_OP2_303_16184_20131204_033225_outLine +BABEL_OP2_303_16839_20131223_215734_inLine +BABEL_OP2_303_16839_20131223_215734_outLine +BABEL_OP2_303_16886_20131108_204525_inLine +BABEL_OP2_303_16886_20131108_204525_outLine +BABEL_OP2_303_16938_20131112_015544_inLine +BABEL_OP2_303_16938_20131112_015544_outLine +BABEL_OP2_303_17127_20131224_002728_inLine +BABEL_OP2_303_17127_20131224_002728_outLine +BABEL_OP2_303_17520_20131114_164811_inLine +BABEL_OP2_303_17520_20131114_164811_outLine +BABEL_OP2_303_18242_20140218_014910_inLine +BABEL_OP2_303_18242_20140218_014910_outLine +BABEL_OP2_303_18380_20131111_015535_inLine +BABEL_OP2_303_18380_20131111_015535_outLine +BABEL_OP2_303_18380_20131119_224151_inLine +BABEL_OP2_303_18380_20131119_224151_outLine +BABEL_OP2_303_18924_20131112_001935_inLine +BABEL_OP2_303_18924_20131112_001935_outLine +BABEL_OP2_303_20437_20140202_232910_inLine +BABEL_OP2_303_20437_20140202_232910_outLine +BABEL_OP2_303_20437_20140202_234756_inLine +BABEL_OP2_303_20437_20140202_234756_outLine +BABEL_OP2_303_20985_20131122_183435_inLine +BABEL_OP2_303_20985_20131122_183435_outLine +BABEL_OP2_303_21435_20131226_175809_inLine +BABEL_OP2_303_21435_20131226_175809_outLine +BABEL_OP2_303_21435_20131226_181138_inLine +BABEL_OP2_303_21435_20131226_181138_outLine +BABEL_OP2_303_23006_20131113_150924_inLine +BABEL_OP2_303_23006_20131113_150924_outLine +BABEL_OP2_303_23046_20131114_171927_inLine +BABEL_OP2_303_23046_20131114_171927_outLine +BABEL_OP2_303_23239_20131206_181414_inLine +BABEL_OP2_303_23239_20131206_181414_outLine +BABEL_OP2_303_23260_20140203_194817_inLine +BABEL_OP2_303_23260_20140203_194817_outLine +BABEL_OP2_303_23505_20131109_184015_inLine +BABEL_OP2_303_23505_20131109_184015_outLine +BABEL_OP2_303_23681_20140119_223006_inLine +BABEL_OP2_303_23681_20140119_223006_outLine +BABEL_OP2_303_23980_20131114_202648_inLine +BABEL_OP2_303_23980_20131114_202648_outLine +BABEL_OP2_303_24010_20140218_224141_inLine +BABEL_OP2_303_24010_20140218_224141_outLine +BABEL_OP2_303_24231_20140201_230638_inLine +BABEL_OP2_303_24231_20140201_230638_outLine +BABEL_OP2_303_24323_20131201_180512_inLine +BABEL_OP2_303_24323_20131201_180512_outLine +BABEL_OP2_303_24470_20131204_174323_inLine +BABEL_OP2_303_24470_20131204_174323_outLine +BABEL_OP2_303_24589_20131114_182843_inLine +BABEL_OP2_303_24589_20131114_182843_outLine +BABEL_OP2_303_24982_20131114_012226_inLine +BABEL_OP2_303_24982_20131114_012226_outLine +BABEL_OP2_303_25198_20140121_180931_inLine +BABEL_OP2_303_25198_20140121_180931_outLine +BABEL_OP2_303_25719_20131205_191053_inLine +BABEL_OP2_303_25719_20131205_191053_outLine +BABEL_OP2_303_25767_20131028_161454_inLine +BABEL_OP2_303_25767_20131028_161454_outLine +BABEL_OP2_303_25961_20131030_225755_inLine +BABEL_OP2_303_25961_20131030_225755_outLine +BABEL_OP2_303_26072_20131216_221839_inLine +BABEL_OP2_303_26072_20131216_221839_outLine +BABEL_OP2_303_26388_20131113_034454_inLine +BABEL_OP2_303_26388_20131113_034454_outLine +BABEL_OP2_303_27125_20131024_195716_inLine +BABEL_OP2_303_27125_20131024_195716_outLine +BABEL_OP2_303_27590_20131118_222641_inLine +BABEL_OP2_303_27590_20131118_222641_outLine +BABEL_OP2_303_28419_20131113_195258_inLine +BABEL_OP2_303_28419_20131113_195258_outLine +BABEL_OP2_303_29404_20131224_014921_inLine +BABEL_OP2_303_29404_20131224_014921_outLine +BABEL_OP2_303_29482_20140219_221449_inLine +BABEL_OP2_303_29482_20140219_221449_outLine +BABEL_OP2_303_29685_20131105_180851_inLine +BABEL_OP2_303_29685_20131105_180851_outLine +BABEL_OP2_303_30013_20131116_185844_inLine +BABEL_OP2_303_30013_20131116_185844_outLine +BABEL_OP2_303_30345_20131224_005453_inLine +BABEL_OP2_303_30345_20131224_005453_outLine +BABEL_OP2_303_30395_20131112_004350_inLine +BABEL_OP2_303_30395_20131112_004350_outLine +BABEL_OP2_303_30645_20131029_193530_inLine +BABEL_OP2_303_30645_20131029_193530_outLine +BABEL_OP2_303_31490_20131105_010342_inLine +BABEL_OP2_303_31490_20131105_010342_outLine +BABEL_OP2_303_31490_20131105_011345_inLine +BABEL_OP2_303_31490_20131105_011345_outLine +BABEL_OP2_303_32048_20131204_223219_inLine +BABEL_OP2_303_32048_20131204_223219_outLine +BABEL_OP2_303_32171_20140203_203242_inLine +BABEL_OP2_303_32171_20140203_203242_outLine +BABEL_OP2_303_32301_20131120_212820_inLine +BABEL_OP2_303_32301_20131120_212820_outLine +BABEL_OP2_303_32861_20131216_223500_inLine +BABEL_OP2_303_32861_20131216_223500_outLine +BABEL_OP2_303_33229_20131206_220332_inLine +BABEL_OP2_303_33229_20131206_220332_outLine +BABEL_OP2_303_33424_20140129_211552_inLine +BABEL_OP2_303_33424_20140129_211552_outLine +BABEL_OP2_303_33672_20131029_201146_inLine +BABEL_OP2_303_33672_20131029_201146_outLine +BABEL_OP2_303_33704_20131210_195453_inLine +BABEL_OP2_303_33704_20131210_195453_outLine +BABEL_OP2_303_33913_20131116_003805_inLine +BABEL_OP2_303_33913_20131116_003805_outLine +BABEL_OP2_303_34106_20131027_203150_inLine +BABEL_OP2_303_34106_20131027_203150_outLine +BABEL_OP2_303_34811_20131115_235931_inLine +BABEL_OP2_303_34811_20131115_235931_outLine +BABEL_OP2_303_35000_20131210_184313_inLine +BABEL_OP2_303_35000_20131210_184313_outLine +BABEL_OP2_303_35008_20131120_185919_inLine +BABEL_OP2_303_35008_20131120_185919_outLine +BABEL_OP2_303_35069_20131205_165127_inLine +BABEL_OP2_303_35069_20131205_165127_outLine +BABEL_OP2_303_35143_20131206_023320_inLine +BABEL_OP2_303_35143_20131206_023320_outLine +BABEL_OP2_303_36341_20131024_221132_inLine +BABEL_OP2_303_36341_20131024_221132_outLine +BABEL_OP2_303_36594_20131215_014334_inLine +BABEL_OP2_303_36594_20131215_014334_outLine +BABEL_OP2_303_36594_20131215_022952_inLine +BABEL_OP2_303_36594_20131215_022952_outLine +BABEL_OP2_303_36669_20131110_155909_inLine +BABEL_OP2_303_36669_20131110_155909_outLine +BABEL_OP2_303_37228_20131216_171725_inLine +BABEL_OP2_303_37228_20131216_171725_outLine +BABEL_OP2_303_37682_20131105_023703_inLine +BABEL_OP2_303_37682_20131105_023703_outLine +BABEL_OP2_303_39307_20131027_043600_inLine +BABEL_OP2_303_39307_20131027_043600_outLine +BABEL_OP2_303_40565_20131116_182747_inLine +BABEL_OP2_303_40565_20131116_182747_outLine +BABEL_OP2_303_41469_20131025_210607_inLine +BABEL_OP2_303_41469_20131025_210607_outLine +BABEL_OP2_303_41493_20131027_155001_inLine +BABEL_OP2_303_41493_20131027_155001_outLine +BABEL_OP2_303_41609_20131031_164009_inLine +BABEL_OP2_303_41609_20131031_164009_outLine +BABEL_OP2_303_41680_20131108_184050_inLine +BABEL_OP2_303_41680_20131108_184050_outLine +BABEL_OP2_303_41692_20140119_000215_inLine +BABEL_OP2_303_41692_20140119_000215_outLine +BABEL_OP2_303_41692_20140120_002447_inLine +BABEL_OP2_303_41692_20140120_002447_outLine +BABEL_OP2_303_42526_20131216_190003_inLine +BABEL_OP2_303_42526_20131216_190003_outLine +BABEL_OP2_303_42718_20140118_201247_inLine +BABEL_OP2_303_42718_20140118_201247_outLine +BABEL_OP2_303_43115_20140201_195115_inLine +BABEL_OP2_303_43115_20140201_195115_outLine +BABEL_OP2_303_43784_20131115_013454_inLine +BABEL_OP2_303_43784_20131115_013454_outLine +BABEL_OP2_303_43784_20131115_014528_inLine +BABEL_OP2_303_43784_20131115_014528_outLine +BABEL_OP2_303_43788_20131202_222520_inLine +BABEL_OP2_303_43788_20131202_222520_outLine +BABEL_OP2_303_43789_20131111_163502_inLine +BABEL_OP2_303_43789_20131111_163502_outLine +BABEL_OP2_303_43920_20131130_143746_inLine +BABEL_OP2_303_43920_20131130_143746_outLine +BABEL_OP2_303_45459_20140201_203718_inLine +BABEL_OP2_303_45459_20140201_203718_outLine +BABEL_OP2_303_46330_20131210_212701_inLine +BABEL_OP2_303_46330_20131210_212701_outLine +BABEL_OP2_303_46550_20131111_233520_inLine +BABEL_OP2_303_46550_20131111_233520_outLine +BABEL_OP2_303_46558_20131028_190003_inLine +BABEL_OP2_303_46558_20131028_190003_outLine +BABEL_OP2_303_46688_20131108_184839_inLine +BABEL_OP2_303_46688_20131108_184839_outLine +BABEL_OP2_303_46757_20131116_193234_inLine +BABEL_OP2_303_46757_20131116_193234_outLine +BABEL_OP2_303_47215_20131108_200333_inLine +BABEL_OP2_303_47215_20131108_200333_outLine +BABEL_OP2_303_47487_20131104_200239_inLine +BABEL_OP2_303_47487_20131104_200239_outLine +BABEL_OP2_303_47637_20140222_233717_inLine +BABEL_OP2_303_47637_20140222_233717_outLine +BABEL_OP2_303_47823_20131201_004209_inLine +BABEL_OP2_303_47823_20131201_004209_outLine +BABEL_OP2_303_47866_20131230_165319_inLine +BABEL_OP2_303_47866_20131230_165319_outLine +BABEL_OP2_303_47878_20131116_184454_inLine +BABEL_OP2_303_47878_20131116_184454_outLine +BABEL_OP2_303_48844_20131030_014630_inLine +BABEL_OP2_303_48844_20131030_014630_outLine +BABEL_OP2_303_49027_20140127_225946_inLine +BABEL_OP2_303_49027_20140127_225946_outLine +BABEL_OP2_303_49197_20131115_221049_inLine +BABEL_OP2_303_49197_20131115_221049_outLine +BABEL_OP2_303_49216_20131031_011232_inLine +BABEL_OP2_303_49216_20131031_011232_outLine +BABEL_OP2_303_49437_20131211_205647_inLine +BABEL_OP2_303_49437_20131211_205647_outLine +BABEL_OP2_303_50565_20131102_213418_inLine +BABEL_OP2_303_50565_20131102_213418_outLine +BABEL_OP2_303_50726_20131028_210641_inLine +BABEL_OP2_303_50726_20131028_210641_outLine +BABEL_OP2_303_50779_20131215_002945_inLine +BABEL_OP2_303_50779_20131215_002945_outLine +BABEL_OP2_303_51015_20131121_004617_inLine +BABEL_OP2_303_51015_20131121_004617_outLine +BABEL_OP2_303_51540_20131204_041920_inLine +BABEL_OP2_303_51540_20131204_041920_outLine +BABEL_OP2_303_51968_20131113_214616_inLine +BABEL_OP2_303_51968_20131113_214616_outLine +BABEL_OP2_303_51968_20131113_220135_inLine +BABEL_OP2_303_51968_20131113_220135_outLine +BABEL_OP2_303_52272_20131027_195752_inLine +BABEL_OP2_303_52272_20131027_195752_outLine +BABEL_OP2_303_52381_20131216_174822_inLine +BABEL_OP2_303_52381_20131216_174822_outLine +BABEL_OP2_303_52490_20131027_172351_inLine +BABEL_OP2_303_52490_20131027_172351_outLine +BABEL_OP2_303_52804_20131105_185205_inLine +BABEL_OP2_303_52804_20131105_185205_outLine +BABEL_OP2_303_53144_20131227_024859_inLine +BABEL_OP2_303_53144_20131227_024859_outLine +BABEL_OP2_303_53665_20140204_194114_inLine +BABEL_OP2_303_53665_20140204_194114_outLine +BABEL_OP2_303_54104_20131030_190134_inLine +BABEL_OP2_303_54104_20131030_190134_outLine +BABEL_OP2_303_54162_20131114_015157_inLine +BABEL_OP2_303_54162_20131114_015157_outLine +BABEL_OP2_303_54744_20131101_012632_inLine +BABEL_OP2_303_54744_20131101_012632_outLine +BABEL_OP2_303_55968_20131027_154130_inLine +BABEL_OP2_303_55968_20131027_154130_outLine +BABEL_OP2_303_57141_20131129_191059_inLine +BABEL_OP2_303_57141_20131129_191059_outLine +BABEL_OP2_303_57464_20140204_205308_inLine +BABEL_OP2_303_57464_20140204_205308_outLine +BABEL_OP2_303_57464_20140204_220733_inLine +BABEL_OP2_303_57464_20140204_220733_outLine +BABEL_OP2_303_57566_20131205_002558_inLine +BABEL_OP2_303_57566_20131205_002558_outLine +BABEL_OP2_303_57782_20140222_210824_inLine +BABEL_OP2_303_57782_20140222_210824_outLine +BABEL_OP2_303_58313_20131114_234055_inLine +BABEL_OP2_303_58313_20131114_234055_outLine +BABEL_OP2_303_58821_20131121_205344_inLine +BABEL_OP2_303_58821_20131121_205344_outLine +BABEL_OP2_303_59509_20131130_021844_inLine +BABEL_OP2_303_59509_20131130_021844_outLine +BABEL_OP2_303_59635_20131205_021406_inLine +BABEL_OP2_303_59635_20131205_021406_outLine +BABEL_OP2_303_60538_20131111_200459_inLine +BABEL_OP2_303_60538_20131111_200459_outLine +BABEL_OP2_303_62014_20131114_203925_inLine +BABEL_OP2_303_62014_20131114_203925_outLine +BABEL_OP2_303_62714_20131228_155020_inLine +BABEL_OP2_303_62714_20131228_155020_outLine +BABEL_OP2_303_62810_20131028_225346_inLine +BABEL_OP2_303_62810_20131028_225346_outLine +BABEL_OP2_303_63084_20131115_202655_inLine +BABEL_OP2_303_63084_20131115_202655_outLine +BABEL_OP2_303_63604_20131101_000901_inLine +BABEL_OP2_303_63604_20131101_000901_outLine +BABEL_OP2_303_63730_20140218_210748_inLine +BABEL_OP2_303_63730_20140218_210748_outLine +BABEL_OP2_303_64014_20131229_214739_inLine +BABEL_OP2_303_64014_20131229_214739_outLine +BABEL_OP2_303_64065_20131111_230551_inLine +BABEL_OP2_303_64065_20131111_230551_outLine +BABEL_OP2_303_64768_20131113_203120_inLine +BABEL_OP2_303_64768_20131113_203120_outLine +BABEL_OP2_303_65077_20131024_174953_inLine +BABEL_OP2_303_65077_20131024_174953_outLine +BABEL_OP2_303_65561_20131122_180110_inLine +BABEL_OP2_303_65561_20131122_180110_outLine +BABEL_OP2_303_66001_20131031_192905_inLine +BABEL_OP2_303_66001_20131031_192905_outLine +BABEL_OP2_303_66361_20140203_182323_inLine +BABEL_OP2_303_66361_20140203_182323_outLine +BABEL_OP2_303_67283_20131109_213605_inLine +BABEL_OP2_303_67283_20131109_213605_outLine +BABEL_OP2_303_67401_20131114_215749_inLine +BABEL_OP2_303_67401_20131114_215749_outLine +BABEL_OP2_303_67401_20131114_221127_inLine +BABEL_OP2_303_67401_20131114_221127_outLine +BABEL_OP2_303_67964_20140222_211658_inLine +BABEL_OP2_303_67964_20140222_211658_outLine +BABEL_OP2_303_68068_20131204_212345_inLine +BABEL_OP2_303_68068_20131204_212345_outLine +BABEL_OP2_303_69107_20131113_222827_inLine +BABEL_OP2_303_69107_20131113_222827_outLine +BABEL_OP2_303_69153_20131204_184008_inLine +BABEL_OP2_303_69153_20131204_184008_outLine +BABEL_OP2_303_69633_20131130_193122_inLine +BABEL_OP2_303_69633_20131130_193122_outLine +BABEL_OP2_303_69992_20131030_011814_inLine +BABEL_OP2_303_69992_20131030_011814_outLine +BABEL_OP2_303_70221_20131124_180244_inLine +BABEL_OP2_303_70221_20131124_180244_outLine +BABEL_OP2_303_70251_20131027_201724_inLine +BABEL_OP2_303_70251_20131027_201724_outLine +BABEL_OP2_303_70452_20131115_202651_inLine +BABEL_OP2_303_70452_20131115_202651_outLine +BABEL_OP2_303_71067_20131115_221146_inLine +BABEL_OP2_303_71067_20131115_221146_outLine +BABEL_OP2_303_71189_20131225_050235_inLine +BABEL_OP2_303_71189_20131225_050235_outLine +BABEL_OP2_303_72040_20131112_173033_inLine +BABEL_OP2_303_72040_20131112_173033_outLine +BABEL_OP2_303_72587_20131115_221128_inLine +BABEL_OP2_303_72587_20131115_221128_outLine +BABEL_OP2_303_72844_20131111_192144_inLine +BABEL_OP2_303_72844_20131111_192144_outLine +BABEL_OP2_303_73022_20131216_173848_inLine +BABEL_OP2_303_73022_20131216_173848_outLine +BABEL_OP2_303_73299_20140217_173212_inLine +BABEL_OP2_303_73299_20140217_173212_outLine +BABEL_OP2_303_73591_20131020_193026_inLine +BABEL_OP2_303_73591_20131020_193026_outLine +BABEL_OP2_303_73990_20140219_201105_inLine +BABEL_OP2_303_73990_20140219_201105_outLine +BABEL_OP2_303_73990_20140219_202300_inLine +BABEL_OP2_303_73990_20140219_202300_outLine +BABEL_OP2_303_74886_20131101_194728_inLine +BABEL_OP2_303_74886_20131101_194728_outLine +BABEL_OP2_303_75342_20131122_191140_inLine +BABEL_OP2_303_75342_20131122_191140_outLine +BABEL_OP2_303_75365_20140218_173521_inLine +BABEL_OP2_303_75365_20140218_173521_outLine +BABEL_OP2_303_75505_20131102_220904_inLine +BABEL_OP2_303_75505_20131102_220904_outLine +BABEL_OP2_303_76756_20131115_182926_inLine +BABEL_OP2_303_76756_20131115_182926_outLine +BABEL_OP2_303_76902_20140205_233041_inLine +BABEL_OP2_303_76902_20140205_233041_outLine +BABEL_OP2_303_77730_20131107_221840_inLine +BABEL_OP2_303_77730_20131107_221840_outLine +BABEL_OP2_303_77744_20131113_232408_inLine +BABEL_OP2_303_77744_20131113_232408_outLine +BABEL_OP2_303_78454_20131114_230026_inLine +BABEL_OP2_303_78454_20131114_230026_outLine +BABEL_OP2_303_78544_20131204_194704_inLine +BABEL_OP2_303_78544_20131204_194704_outLine +BABEL_OP2_303_78604_20131101_194153_inLine +BABEL_OP2_303_78604_20131101_194153_outLine +BABEL_OP2_303_78943_20131115_213626_inLine +BABEL_OP2_303_78943_20131115_213626_outLine +BABEL_OP2_303_79451_20131114_213026_inLine +BABEL_OP2_303_79451_20131114_213026_outLine +BABEL_OP2_303_79590_20131113_222157_inLine +BABEL_OP2_303_79590_20131113_222157_outLine +BABEL_OP2_303_79751_20131105_025908_inLine +BABEL_OP2_303_79751_20131105_025908_outLine +BABEL_OP2_303_79820_20131114_181827_inLine +BABEL_OP2_303_79820_20131114_181827_outLine +BABEL_OP2_303_80559_20131101_190006_inLine +BABEL_OP2_303_80559_20131101_190006_outLine +BABEL_OP2_303_80622_20131130_040503_inLine +BABEL_OP2_303_80622_20131130_040503_outLine +BABEL_OP2_303_80881_20131027_165716_inLine +BABEL_OP2_303_80881_20131027_165716_outLine +BABEL_OP2_303_81149_20140203_201343_inLine +BABEL_OP2_303_81149_20140203_201343_outLine +BABEL_OP2_303_81287_20131121_184328_inLine +BABEL_OP2_303_81287_20131121_184328_outLine +BABEL_OP2_303_81424_20131120_192659_inLine +BABEL_OP2_303_81424_20131120_192659_outLine +BABEL_OP2_303_81671_20131205_004357_inLine +BABEL_OP2_303_81671_20131205_004357_outLine +BABEL_OP2_303_82622_20131029_212941_inLine +BABEL_OP2_303_82622_20131029_212941_outLine +BABEL_OP2_303_82935_20131205_024033_inLine +BABEL_OP2_303_82935_20131205_024033_outLine +BABEL_OP2_303_82935_20131205_025919_inLine +BABEL_OP2_303_82935_20131205_025919_outLine +BABEL_OP2_303_83771_20140119_181859_inLine +BABEL_OP2_303_83771_20140119_181859_outLine +BABEL_OP2_303_83935_20131122_222948_inLine +BABEL_OP2_303_83935_20131122_222948_outLine +BABEL_OP2_303_84061_20131104_224830_inLine +BABEL_OP2_303_84061_20131104_224830_outLine +BABEL_OP2_303_84327_20131122_203936_inLine +BABEL_OP2_303_84327_20131122_203936_outLine +BABEL_OP2_303_84458_20131204_213157_inLine +BABEL_OP2_303_84458_20131204_213157_outLine +BABEL_OP2_303_84547_20131026_230544_inLine +BABEL_OP2_303_84547_20131026_230544_outLine +BABEL_OP2_303_84605_20131112_192034_inLine +BABEL_OP2_303_84605_20131112_192034_outLine +BABEL_OP2_303_84805_20131204_153317_inLine +BABEL_OP2_303_84805_20131204_153317_outLine +BABEL_OP2_303_84936_20131115_204004_inLine +BABEL_OP2_303_84936_20131115_204004_outLine +BABEL_OP2_303_85248_20131206_184028_inLine +BABEL_OP2_303_85248_20131206_184028_outLine +BABEL_OP2_303_85340_20131111_215301_inLine +BABEL_OP2_303_85340_20131111_215301_outLine +BABEL_OP2_303_86191_20131114_221742_inLine +BABEL_OP2_303_86191_20131114_221742_outLine +BABEL_OP2_303_86321_20131204_175915_inLine +BABEL_OP2_303_86321_20131204_175915_outLine +BABEL_OP2_303_86467_20131025_013235_inLine +BABEL_OP2_303_86467_20131025_013235_outLine +BABEL_OP2_303_86676_20131204_185429_inLine +BABEL_OP2_303_86676_20131204_185429_outLine +BABEL_OP2_303_86713_20131206_165123_inLine +BABEL_OP2_303_86713_20131206_165123_outLine +BABEL_OP2_303_86891_20140222_195106_inLine +BABEL_OP2_303_86891_20140222_195106_outLine +BABEL_OP2_303_86952_20131105_173230_inLine +BABEL_OP2_303_86952_20131105_173230_outLine +BABEL_OP2_303_87313_20131116_193233_inLine +BABEL_OP2_303_87313_20131116_193233_outLine +BABEL_OP2_303_87884_20131206_022424_inLine +BABEL_OP2_303_87884_20131206_022424_outLine +BABEL_OP2_303_87889_20131213_215703_inLine +BABEL_OP2_303_87889_20131213_215703_outLine +BABEL_OP2_303_88776_20131031_184652_inLine +BABEL_OP2_303_88776_20131031_184652_outLine +BABEL_OP2_303_88982_20131115_181618_inLine +BABEL_OP2_303_88982_20131115_181618_outLine +BABEL_OP2_303_90080_20131228_233334_inLine +BABEL_OP2_303_90080_20131228_233334_outLine +BABEL_OP2_303_90740_20140221_220031_inLine +BABEL_OP2_303_90740_20140221_220031_outLine +BABEL_OP2_303_91125_20131102_191721_inLine +BABEL_OP2_303_91125_20131102_191721_outLine +BABEL_OP2_303_91944_20131107_214314_inLine +BABEL_OP2_303_91944_20131107_214314_outLine +BABEL_OP2_303_92176_20131115_153306_inLine +BABEL_OP2_303_92176_20131115_153306_outLine +BABEL_OP2_303_92605_20140205_192703_inLine +BABEL_OP2_303_92605_20140205_192703_outLine +BABEL_OP2_303_92757_20140211_221207_inLine +BABEL_OP2_303_92757_20140211_221207_outLine +BABEL_OP2_303_92792_20131223_042728_inLine +BABEL_OP2_303_92792_20131223_042728_outLine +BABEL_OP2_303_94025_20131211_211933_inLine +BABEL_OP2_303_94025_20131211_211933_outLine +BABEL_OP2_303_94333_20131029_193545_inLine +BABEL_OP2_303_94333_20131029_193545_outLine +BABEL_OP2_303_94745_20131204_205747_inLine +BABEL_OP2_303_94745_20131204_205747_outLine +BABEL_OP2_303_94869_20131101_184934_inLine +BABEL_OP2_303_94869_20131101_184934_outLine +BABEL_OP2_303_96324_20131107_162546_inLine +BABEL_OP2_303_96324_20131107_162546_outLine +BABEL_OP2_303_96690_20131114_194453_inLine +BABEL_OP2_303_96690_20131114_194453_outLine +BABEL_OP2_303_97286_20140205_223354_inLine +BABEL_OP2_303_97286_20140205_223354_outLine +BABEL_OP2_303_97772_20131024_230426_inLine +BABEL_OP2_303_97772_20131024_230426_outLine +BABEL_OP2_303_97988_20131204_195626_inLine +BABEL_OP2_303_97988_20131204_195626_outLine +BABEL_OP2_303_97988_20131204_211137_inLine +BABEL_OP2_303_97988_20131204_211137_outLine +BABEL_OP2_303_98311_20131107_224445_inLine +BABEL_OP2_303_98311_20131107_224445_outLine +BABEL_OP2_303_98356_20131121_191712_inLine +BABEL_OP2_303_98356_20131121_191712_outLine +BABEL_OP2_303_98390_20131029_164425_inLine +BABEL_OP2_303_98390_20131029_164425_outLine +BABEL_OP2_303_99955_20131215_222330_inLine +BABEL_OP2_303_99955_20131215_222330_outLine +BABEL_OP2_303_99955_20131216_231047_inLine +BABEL_OP2_303_99955_20131216_231047_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/untranscribed-training.list b/egs/babel/s5d/conf/lists/303-telugu/untranscribed-training.list new file mode 100644 index 00000000000..29a7e3f80b4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/untranscribed-training.list @@ -0,0 +1,501 @@ +BABEL_OP2_303_11310_20140309_202017_inLine +BABEL_OP2_303_11310_20140309_202017_outLine +BABEL_OP2_303_11723_20140320_222729_inLine +BABEL_OP2_303_11723_20140320_222729_outLine +BABEL_OP2_303_11723_20140320_223508_inLine +BABEL_OP2_303_11723_20140320_223508_outLine +BABEL_OP2_303_13184_20131204_011559_inLine +BABEL_OP2_303_13184_20131204_011559_outLine +BABEL_OP2_303_13189_20131211_195308_inLine +BABEL_OP2_303_13189_20131211_195308_outLine +BABEL_OP2_303_13792_20131029_222536_inLine +BABEL_OP2_303_13792_20131029_222536_outLine +BABEL_OP2_303_13929_20140327_182253_inLine +BABEL_OP2_303_13929_20140327_182253_outLine +BABEL_OP2_303_14575_20140328_215314_inLine +BABEL_OP2_303_14575_20140328_215314_outLine +BABEL_OP2_303_14723_20140327_220200_inLine +BABEL_OP2_303_14723_20140327_220200_outLine +BABEL_OP2_303_14884_20140320_193514_inLine +BABEL_OP2_303_14884_20140320_193514_outLine +BABEL_OP2_303_14884_20140320_195858_inLine +BABEL_OP2_303_14884_20140320_195858_outLine +BABEL_OP2_303_15926_20131130_215154_inLine +BABEL_OP2_303_15926_20131130_215154_outLine +BABEL_OP2_303_16351_20140309_193931_inLine +BABEL_OP2_303_16351_20140309_193931_outLine +BABEL_OP2_303_16726_20140328_174353_inLine +BABEL_OP2_303_16726_20140328_174353_outLine +BABEL_OP2_303_17511_20140327_212725_inLine +BABEL_OP2_303_17511_20140327_212725_outLine +BABEL_OP2_303_17751_20140130_221610_inLine +BABEL_OP2_303_17751_20140130_221610_outLine +BABEL_OP2_303_17890_20131116_201518_inLine +BABEL_OP2_303_17890_20131116_201518_outLine +BABEL_OP2_303_17914_20131229_223237_inLine +BABEL_OP2_303_17914_20131229_223237_outLine +BABEL_OP2_303_17937_20140319_174736_inLine +BABEL_OP2_303_17937_20140319_174736_outLine +BABEL_OP2_303_18280_20140328_223246_inLine +BABEL_OP2_303_18280_20140328_223246_outLine +BABEL_OP2_303_18297_20140125_191248_inLine +BABEL_OP2_303_18297_20140125_191248_outLine +BABEL_OP2_303_18566_20131228_173117_inLine +BABEL_OP2_303_18566_20131228_173117_outLine +BABEL_OP2_303_19101_20131114_161754_inLine +BABEL_OP2_303_19101_20131114_161754_outLine +BABEL_OP2_303_19440_20140325_010253_inLine +BABEL_OP2_303_19440_20140325_010253_outLine +BABEL_OP2_303_19444_20140324_030047_inLine +BABEL_OP2_303_19444_20140324_030047_outLine +BABEL_OP2_303_19621_20131117_014609_inLine +BABEL_OP2_303_19621_20131117_014609_outLine +BABEL_OP2_303_20682_20131128_201847_inLine +BABEL_OP2_303_20682_20131128_201847_outLine +BABEL_OP2_303_20738_20131230_225647_inLine +BABEL_OP2_303_20738_20131230_225647_outLine +BABEL_OP2_303_20896_20140328_234931_inLine +BABEL_OP2_303_20896_20140328_234931_outLine +BABEL_OP2_303_21159_20140318_195039_inLine +BABEL_OP2_303_21159_20140318_195039_outLine +BABEL_OP2_303_21244_20140129_215632_inLine +BABEL_OP2_303_21244_20140129_215632_outLine +BABEL_OP2_303_21315_20140405_194002_inLine +BABEL_OP2_303_21315_20140405_194002_outLine +BABEL_OP2_303_22021_20140413_225936_inLine +BABEL_OP2_303_22021_20140413_225936_outLine +BABEL_OP2_303_22591_20140404_023216_inLine +BABEL_OP2_303_22591_20140404_023216_outLine +BABEL_OP2_303_22641_20131025_191802_inLine +BABEL_OP2_303_22641_20131025_191802_outLine +BABEL_OP2_303_22643_20140319_183843_inLine +BABEL_OP2_303_22643_20140319_183843_outLine +BABEL_OP2_303_23355_20140324_163413_inLine +BABEL_OP2_303_23355_20140324_163413_outLine +BABEL_OP2_303_23628_20131114_233248_inLine +BABEL_OP2_303_23628_20131114_233248_outLine +BABEL_OP2_303_23700_20140330_203130_inLine +BABEL_OP2_303_23700_20140330_203130_outLine +BABEL_OP2_303_24587_20140324_011441_inLine +BABEL_OP2_303_24587_20140324_011441_outLine +BABEL_OP2_303_24587_20140324_035935_inLine +BABEL_OP2_303_24587_20140324_035935_outLine +BABEL_OP2_303_24648_20140324_212818_inLine +BABEL_OP2_303_24648_20140324_212818_outLine +BABEL_OP2_303_25012_20140309_203215_inLine +BABEL_OP2_303_25012_20140309_203215_outLine +BABEL_OP2_303_25085_20140213_175133_inLine +BABEL_OP2_303_25085_20140213_175133_outLine +BABEL_OP2_303_25242_20140308_200459_inLine +BABEL_OP2_303_25242_20140308_200459_outLine +BABEL_OP2_303_25496_20140325_025625_inLine +BABEL_OP2_303_25496_20140325_025625_outLine +BABEL_OP2_303_26074_20131114_211040_inLine +BABEL_OP2_303_26074_20131114_211040_outLine +BABEL_OP2_303_27014_20140309_212535_inLine +BABEL_OP2_303_27014_20140309_212535_outLine +BABEL_OP2_303_27478_20131228_145746_inLine +BABEL_OP2_303_27478_20131228_145746_outLine +BABEL_OP2_303_28303_20131030_203335_inLine +BABEL_OP2_303_28303_20131030_203335_outLine +BABEL_OP2_303_28814_20131216_215127_inLine +BABEL_OP2_303_28814_20131216_215127_outLine +BABEL_OP2_303_29072_20131118_191936_outLine +BABEL_OP2_303_29563_20140327_193023_inLine +BABEL_OP2_303_29563_20140327_193023_outLine +BABEL_OP2_303_29643_20140131_234915_inLine +BABEL_OP2_303_29643_20140131_234915_outLine +BABEL_OP2_303_29765_20140317_141957_inLine +BABEL_OP2_303_29765_20140317_141957_outLine +BABEL_OP2_303_30084_20140212_191819_inLine +BABEL_OP2_303_30084_20140212_191819_outLine +BABEL_OP2_303_30250_20131105_004442_inLine +BABEL_OP2_303_30250_20131105_004442_outLine +BABEL_OP2_303_32832_20131204_034501_inLine +BABEL_OP2_303_32832_20131204_034501_outLine +BABEL_OP2_303_33273_20131106_231154_inLine +BABEL_OP2_303_33273_20131106_231154_outLine +BABEL_OP2_303_33774_20140325_031929_inLine +BABEL_OP2_303_33774_20140325_031929_outLine +BABEL_OP2_303_34064_20140324_183744_inLine +BABEL_OP2_303_34064_20140324_183744_outLine +BABEL_OP2_303_34208_20140404_030609_inLine +BABEL_OP2_303_34208_20140404_030609_outLine +BABEL_OP2_303_34477_20131113_195424_inLine +BABEL_OP2_303_34477_20131113_195424_outLine +BABEL_OP2_303_35420_20140318_214611_inLine +BABEL_OP2_303_35420_20140318_214611_outLine +BABEL_OP2_303_35467_20131114_210333_inLine +BABEL_OP2_303_35467_20131114_210333_outLine +BABEL_OP2_303_35885_20131225_181427_inLine +BABEL_OP2_303_35885_20131225_181427_outLine +BABEL_OP2_303_36017_20140204_222306_inLine +BABEL_OP2_303_36017_20140204_222306_outLine +BABEL_OP2_303_36147_20140402_224231_inLine +BABEL_OP2_303_36147_20140402_224231_outLine +BABEL_OP2_303_36900_20131223_225105_inLine +BABEL_OP2_303_36900_20131223_225105_outLine +BABEL_OP2_303_36990_20131111_022257_inLine +BABEL_OP2_303_36990_20131111_022257_outLine +BABEL_OP2_303_37290_20131114_034451_inLine +BABEL_OP2_303_37290_20131114_034451_outLine +BABEL_OP2_303_38340_20131114_184816_inLine +BABEL_OP2_303_38340_20131114_184816_outLine +BABEL_OP2_303_39099_20140127_233334_inLine +BABEL_OP2_303_39099_20140127_233334_outLine +BABEL_OP2_303_39277_20140324_193505_inLine +BABEL_OP2_303_39277_20140324_193505_outLine +BABEL_OP2_303_39579_20140327_191248_inLine +BABEL_OP2_303_39579_20140327_191248_outLine +BABEL_OP2_303_39680_20131211_183650_inLine +BABEL_OP2_303_39680_20131211_183650_outLine +BABEL_OP2_303_40092_20140329_200501_inLine +BABEL_OP2_303_40092_20140329_200501_outLine +BABEL_OP2_303_40092_20140329_201239_inLine +BABEL_OP2_303_40092_20140329_201239_outLine +BABEL_OP2_303_40092_20140329_202122_inLine +BABEL_OP2_303_40092_20140329_202122_outLine +BABEL_OP2_303_40648_20140319_195523_inLine +BABEL_OP2_303_40648_20140319_195523_outLine +BABEL_OP2_303_40939_20140415_195416_inLine +BABEL_OP2_303_40939_20140415_195416_outLine +BABEL_OP2_303_41745_20131109_041340_inLine +BABEL_OP2_303_41745_20131109_041340_outLine +BABEL_OP2_303_42155_20131114_053239_inLine +BABEL_OP2_303_42155_20131114_053239_outLine +BABEL_OP2_303_42243_20131025_222121_inLine +BABEL_OP2_303_42243_20131025_222121_outLine +BABEL_OP2_303_42619_20131124_172939_inLine +BABEL_OP2_303_42619_20131124_172939_outLine +BABEL_OP2_303_42834_20131115_023812_inLine +BABEL_OP2_303_42834_20131115_023812_outLine +BABEL_OP2_303_43395_20140405_161423_inLine +BABEL_OP2_303_43395_20140405_161423_outLine +BABEL_OP2_303_44114_20140405_145238_inLine +BABEL_OP2_303_44114_20140405_145238_outLine +BABEL_OP2_303_44619_20131109_201926_inLine +BABEL_OP2_303_44619_20131109_201926_outLine +BABEL_OP2_303_44678_20140320_185927_inLine +BABEL_OP2_303_44678_20140320_185927_outLine +BABEL_OP2_303_44898_20140309_220734_inLine +BABEL_OP2_303_44898_20140309_220734_outLine +BABEL_OP2_303_45121_20140207_012357_inLine +BABEL_OP2_303_45121_20140207_012357_outLine +BABEL_OP2_303_45140_20140205_001649_inLine +BABEL_OP2_303_45140_20140205_001649_outLine +BABEL_OP2_303_45777_20131116_041840_inLine +BABEL_OP2_303_45777_20131116_041840_outLine +BABEL_OP2_303_46535_20140404_014728_inLine +BABEL_OP2_303_46535_20140404_014728_outLine +BABEL_OP2_303_46712_20131114_191120_inLine +BABEL_OP2_303_46712_20131114_191120_outLine +BABEL_OP2_303_47877_20131218_041443_inLine +BABEL_OP2_303_47877_20131218_041443_outLine +BABEL_OP2_303_47882_20140309_225723_inLine +BABEL_OP2_303_47882_20140309_225723_outLine +BABEL_OP2_303_48024_20140324_154856_inLine +BABEL_OP2_303_48024_20140324_154856_outLine +BABEL_OP2_303_49001_20131114_194536_inLine +BABEL_OP2_303_49001_20131114_194536_outLine +BABEL_OP2_303_49870_20140330_002407_inLine +BABEL_OP2_303_49870_20140330_002407_outLine +BABEL_OP2_303_49870_20140330_003441_inLine +BABEL_OP2_303_49870_20140330_003441_outLine +BABEL_OP2_303_49902_20131104_154633_inLine +BABEL_OP2_303_49902_20131104_154633_outLine +BABEL_OP2_303_49907_20131114_011516_inLine +BABEL_OP2_303_49907_20131114_011516_outLine +BABEL_OP2_303_50427_20131113_234859_inLine +BABEL_OP2_303_50427_20131113_234859_outLine +BABEL_OP2_303_50630_20131130_231747_inLine +BABEL_OP2_303_50630_20131130_231747_outLine +BABEL_OP2_303_50940_20140203_224023_inLine +BABEL_OP2_303_50940_20140203_224023_outLine +BABEL_OP2_303_50958_20131110_200903_inLine +BABEL_OP2_303_50958_20131110_200903_outLine +BABEL_OP2_303_51414_20140118_210505_inLine +BABEL_OP2_303_51414_20140118_210505_outLine +BABEL_OP2_303_51417_20131205_015949_inLine +BABEL_OP2_303_51417_20131205_015949_outLine +BABEL_OP2_303_52058_20140318_223046_inLine +BABEL_OP2_303_52058_20140318_223046_outLine +BABEL_OP2_303_52058_20140318_223719_inLine +BABEL_OP2_303_52058_20140318_223719_outLine +BABEL_OP2_303_52322_20140319_164229_inLine +BABEL_OP2_303_52322_20140319_164229_outLine +BABEL_OP2_303_52818_20131115_053831_inLine +BABEL_OP2_303_52818_20131115_053831_outLine +BABEL_OP2_303_53010_20140403_235230_inLine +BABEL_OP2_303_53010_20140403_235230_outLine +BABEL_OP2_303_53068_20140321_041556_inLine +BABEL_OP2_303_53068_20140321_041556_outLine +BABEL_OP2_303_53206_20140308_201930_inLine +BABEL_OP2_303_53206_20140308_201930_outLine +BABEL_OP2_303_54405_20131113_021212_inLine +BABEL_OP2_303_54405_20131113_021212_outLine +BABEL_OP2_303_54953_20131109_030545_inLine +BABEL_OP2_303_54953_20131109_030545_outLine +BABEL_OP2_303_55013_20140204_205447_inLine +BABEL_OP2_303_55013_20140204_205447_outLine +BABEL_OP2_303_55742_20131114_230121_inLine +BABEL_OP2_303_55742_20131114_230121_outLine +BABEL_OP2_303_56306_20131206_164521_inLine +BABEL_OP2_303_56306_20131206_164521_outLine +BABEL_OP2_303_56326_20140309_213505_inLine +BABEL_OP2_303_56326_20140309_213505_outLine +BABEL_OP2_303_56370_20131030_191610_inLine +BABEL_OP2_303_56370_20131030_191610_outLine +BABEL_OP2_303_56523_20131109_044230_inLine +BABEL_OP2_303_56523_20131109_044230_outLine +BABEL_OP2_303_56743_20131109_043328_inLine +BABEL_OP2_303_56743_20131109_043328_outLine +BABEL_OP2_303_57065_20131204_193037_inLine +BABEL_OP2_303_57065_20131204_193037_outLine +BABEL_OP2_303_57650_20131230_182126_inLine +BABEL_OP2_303_57650_20131230_182126_outLine +BABEL_OP2_303_58717_20131115_231922_inLine +BABEL_OP2_303_58717_20131115_231922_outLine +BABEL_OP2_303_59039_20140219_180738_inLine +BABEL_OP2_303_59039_20140219_180738_outLine +BABEL_OP2_303_59091_20131206_183149_inLine +BABEL_OP2_303_59091_20131206_183149_outLine +BABEL_OP2_303_59163_20140416_164729_inLine +BABEL_OP2_303_59163_20140416_164729_outLine +BABEL_OP2_303_59301_20131205_012957_inLine +BABEL_OP2_303_59301_20131205_012957_outLine +BABEL_OP2_303_59747_20131114_224542_inLine +BABEL_OP2_303_59747_20131114_224542_outLine +BABEL_OP2_303_60352_20131115_205920_inLine +BABEL_OP2_303_60352_20131115_205920_outLine +BABEL_OP2_303_60352_20131115_210809_inLine +BABEL_OP2_303_60352_20131115_210809_outLine +BABEL_OP2_303_60418_20131115_210956_inLine +BABEL_OP2_303_60418_20131115_210956_outLine +BABEL_OP2_303_60508_20131101_185756_inLine +BABEL_OP2_303_60508_20131101_185756_outLine +BABEL_OP2_303_60650_20140319_182240_inLine +BABEL_OP2_303_60650_20140319_182240_outLine +BABEL_OP2_303_60836_20131112_201953_inLine +BABEL_OP2_303_60836_20131112_201953_outLine +BABEL_OP2_303_61219_20131114_181005_inLine +BABEL_OP2_303_61219_20131114_181005_outLine +BABEL_OP2_303_61435_20131123_235604_inLine +BABEL_OP2_303_61435_20131123_235604_outLine +BABEL_OP2_303_61684_20140220_032432_inLine +BABEL_OP2_303_61684_20140220_032432_outLine +BABEL_OP2_303_61873_20131114_011706_inLine +BABEL_OP2_303_61873_20131114_011706_outLine +BABEL_OP2_303_61971_20131228_000329_inLine +BABEL_OP2_303_61971_20131228_000329_outLine +BABEL_OP2_303_62286_20131129_203236_inLine +BABEL_OP2_303_62286_20131129_203236_outLine +BABEL_OP2_303_62362_20140129_183345_inLine +BABEL_OP2_303_62362_20140129_183345_outLine +BABEL_OP2_303_62471_20140328_192801_inLine +BABEL_OP2_303_62471_20140328_192801_outLine +BABEL_OP2_303_62734_20131108_203310_inLine +BABEL_OP2_303_62734_20131108_203310_outLine +BABEL_OP2_303_63445_20131101_180928_inLine +BABEL_OP2_303_63445_20131101_180928_outLine +BABEL_OP2_303_63787_20131029_232219_inLine +BABEL_OP2_303_63787_20131029_232219_outLine +BABEL_OP2_303_63938_20131225_194045_inLine +BABEL_OP2_303_63938_20131225_194045_outLine +BABEL_OP2_303_65298_20140222_213911_inLine +BABEL_OP2_303_65298_20140222_213911_outLine +BABEL_OP2_303_65639_20140320_184458_inLine +BABEL_OP2_303_65639_20140320_184458_outLine +BABEL_OP2_303_65723_20131106_221517_inLine +BABEL_OP2_303_65723_20131106_221517_outLine +BABEL_OP2_303_66305_20131224_012218_inLine +BABEL_OP2_303_66305_20131224_022308_inLine +BABEL_OP2_303_67085_20140223_030002_inLine +BABEL_OP2_303_67085_20140223_030002_outLine +BABEL_OP2_303_67304_20140319_193543_inLine +BABEL_OP2_303_67304_20140319_193543_outLine +BABEL_OP2_303_67794_20131111_173553_inLine +BABEL_OP2_303_67794_20131111_173553_outLine +BABEL_OP2_303_68040_20131116_041049_inLine +BABEL_OP2_303_68040_20131116_041049_outLine +BABEL_OP2_303_68182_20131206_203404_inLine +BABEL_OP2_303_68182_20131206_203404_outLine +BABEL_OP2_303_68402_20140319_235557_inLine +BABEL_OP2_303_68402_20140319_235557_outLine +BABEL_OP2_303_68854_20140125_191013_inLine +BABEL_OP2_303_68854_20140125_191013_outLine +BABEL_OP2_303_69090_20140322_190538_inLine +BABEL_OP2_303_69090_20140322_190538_outLine +BABEL_OP2_303_69964_20140201_215153_inLine +BABEL_OP2_303_69964_20140201_215153_outLine +BABEL_OP2_303_69972_20140412_213250_inLine +BABEL_OP2_303_69972_20140412_213250_outLine +BABEL_OP2_303_70182_20140131_021121_inLine +BABEL_OP2_303_70182_20140131_021121_outLine +BABEL_OP2_303_70216_20140309_212242_inLine +BABEL_OP2_303_70216_20140309_212242_outLine +BABEL_OP2_303_70526_20140121_191817_inLine +BABEL_OP2_303_70526_20140121_191817_outLine +BABEL_OP2_303_71282_20131206_205821_inLine +BABEL_OP2_303_71282_20131206_205821_outLine +BABEL_OP2_303_71333_20131114_201026_inLine +BABEL_OP2_303_71333_20131114_201026_outLine +BABEL_OP2_303_71704_20131107_231553_inLine +BABEL_OP2_303_71704_20131107_231553_outLine +BABEL_OP2_303_71754_20140327_221321_inLine +BABEL_OP2_303_71754_20140327_221321_outLine +BABEL_OP2_303_73258_20131110_190632_inLine +BABEL_OP2_303_73258_20131110_190632_outLine +BABEL_OP2_303_73305_20140219_214719_inLine +BABEL_OP2_303_73305_20140219_214719_outLine +BABEL_OP2_303_73408_20140222_222505_inLine +BABEL_OP2_303_73408_20140222_222505_outLine +BABEL_OP2_303_73837_20131114_035127_inLine +BABEL_OP2_303_73837_20131114_035127_outLine +BABEL_OP2_303_74121_20131109_193228_inLine +BABEL_OP2_303_74121_20131109_193228_outLine +BABEL_OP2_303_75366_20140222_194703_inLine +BABEL_OP2_303_75366_20140222_194703_outLine +BABEL_OP2_303_75460_20140211_182910_inLine +BABEL_OP2_303_75460_20140211_182910_outLine +BABEL_OP2_303_77139_20131105_210350_inLine +BABEL_OP2_303_77139_20131105_210350_outLine +BABEL_OP2_303_79028_20140416_181014_inLine +BABEL_OP2_303_79028_20140416_181014_outLine +BABEL_OP2_303_79080_20131208_203223_inLine +BABEL_OP2_303_79080_20131208_203223_outLine +BABEL_OP2_303_79129_20131114_034645_outLine +BABEL_OP2_303_79723_20140413_221551_inLine +BABEL_OP2_303_79723_20140413_221551_outLine +BABEL_OP2_303_79898_20140309_211140_inLine +BABEL_OP2_303_79898_20140309_211140_outLine +BABEL_OP2_303_80721_20131201_171555_inLine +BABEL_OP2_303_80721_20131201_171555_outLine +BABEL_OP2_303_81427_20131105_001654_inLine +BABEL_OP2_303_81427_20131105_001654_outLine +BABEL_OP2_303_81674_20140202_220306_inLine +BABEL_OP2_303_81674_20140202_220306_outLine +BABEL_OP2_303_82361_20140204_232359_inLine +BABEL_OP2_303_82361_20140204_232359_outLine +BABEL_OP2_303_82626_20140315_024235_inLine +BABEL_OP2_303_82626_20140315_024235_outLine +BABEL_OP2_303_82863_20131111_030006_inLine +BABEL_OP2_303_82863_20131111_030006_outLine +BABEL_OP2_303_82904_20140204_205103_inLine +BABEL_OP2_303_83062_20140204_210837_inLine +BABEL_OP2_303_83062_20140204_210837_outLine +BABEL_OP2_303_83813_20140320_010221_inLine +BABEL_OP2_303_83813_20140320_010221_outLine +BABEL_OP2_303_84125_20131025_195026_inLine +BABEL_OP2_303_84125_20131025_195026_outLine +BABEL_OP2_303_84339_20140111_180841_inLine +BABEL_OP2_303_84339_20140111_180841_outLine +BABEL_OP2_303_84815_20131204_190755_inLine +BABEL_OP2_303_84815_20131204_190755_outLine +BABEL_OP2_303_85048_20131114_222244_inLine +BABEL_OP2_303_85048_20131114_222244_outLine +BABEL_OP2_303_85260_20140327_224114_inLine +BABEL_OP2_303_85260_20140327_224114_outLine +BABEL_OP2_303_86715_20140312_223757_inLine +BABEL_OP2_303_86715_20140312_223757_outLine +BABEL_OP2_303_86748_20131206_231713_inLine +BABEL_OP2_303_86748_20131206_231713_outLine +BABEL_OP2_303_86830_20131211_192459_inLine +BABEL_OP2_303_86830_20131211_192459_outLine +BABEL_OP2_303_86860_20140204_194637_inLine +BABEL_OP2_303_86860_20140204_194637_outLine +BABEL_OP2_303_88686_20131028_192526_inLine +BABEL_OP2_303_88686_20131028_192526_outLine +BABEL_OP2_303_88812_20140125_200044_inLine +BABEL_OP2_303_88812_20140125_200044_outLine +BABEL_OP2_303_88873_20131112_214623_inLine +BABEL_OP2_303_88873_20131112_214623_outLine +BABEL_OP2_303_89045_20131024_213611_inLine +BABEL_OP2_303_89045_20131024_213611_outLine +BABEL_OP2_303_89372_20131025_175446_inLine +BABEL_OP2_303_89372_20131025_175446_outLine +BABEL_OP2_303_89457_20131113_185151_inLine +BABEL_OP2_303_89457_20131113_185151_outLine +BABEL_OP2_303_89575_20131129_162850_inLine +BABEL_OP2_303_89575_20131129_162850_outLine +BABEL_OP2_303_89650_20140414_003815_inLine +BABEL_OP2_303_89650_20140414_003815_outLine +BABEL_OP2_303_90417_20140206_194432_inLine +BABEL_OP2_303_90417_20140206_194432_outLine +BABEL_OP2_303_91189_20140214_204530_inLine +BABEL_OP2_303_91189_20140214_204530_outLine +BABEL_OP2_303_91336_20131114_155601_inLine +BABEL_OP2_303_91336_20131114_155601_outLine +BABEL_OP2_303_91411_20140318_210954_inLine +BABEL_OP2_303_91411_20140318_210954_outLine +BABEL_OP2_303_91463_20131115_000512_inLine +BABEL_OP2_303_91463_20131115_000512_outLine +BABEL_OP2_303_91581_20131202_041422_inLine +BABEL_OP2_303_91581_20131202_041422_outLine +BABEL_OP2_303_91593_20140209_040916_inLine +BABEL_OP2_303_91593_20140209_040916_outLine +BABEL_OP2_303_91606_20140325_030918_inLine +BABEL_OP2_303_91606_20140325_030918_outLine +BABEL_OP2_303_91760_20140129_023507_inLine +BABEL_OP2_303_91760_20140129_023507_outLine +BABEL_OP2_303_91808_20140324_180442_inLine +BABEL_OP2_303_91808_20140324_180442_outLine +BABEL_OP2_303_91884_20131224_170738_inLine +BABEL_OP2_303_91884_20131224_170738_outLine +BABEL_OP2_303_91971_20140401_140304_inLine +BABEL_OP2_303_91971_20140401_140304_outLine +BABEL_OP2_303_92281_20140225_212826_inLine +BABEL_OP2_303_92281_20140225_212826_outLine +BABEL_OP2_303_93007_20140325_033131_inLine +BABEL_OP2_303_93007_20140325_033131_outLine +BABEL_OP2_303_93443_20140320_235342_inLine +BABEL_OP2_303_93443_20140320_235342_outLine +BABEL_OP2_303_93681_20140322_200153_inLine +BABEL_OP2_303_93681_20140322_200153_outLine +BABEL_OP2_303_93861_20131114_005221_inLine +BABEL_OP2_303_93861_20131114_005221_outLine +BABEL_OP2_303_93861_20131114_011200_inLine +BABEL_OP2_303_93861_20131114_011200_outLine +BABEL_OP2_303_93937_20140312_225604_inLine +BABEL_OP2_303_93937_20140312_225604_outLine +BABEL_OP2_303_93946_20131204_180611_inLine +BABEL_OP2_303_93946_20131204_180611_outLine +BABEL_OP2_303_94002_20131113_163221_inLine +BABEL_OP2_303_94002_20131113_163221_outLine +BABEL_OP2_303_94035_20140320_015111_inLine +BABEL_OP2_303_94035_20140320_015111_outLine +BABEL_OP2_303_94044_20140221_020012_inLine +BABEL_OP2_303_94044_20140221_020012_outLine +BABEL_OP2_303_94212_20140328_202919_inLine +BABEL_OP2_303_94212_20140328_202919_outLine +BABEL_OP2_303_94713_20140319_231311_inLine +BABEL_OP2_303_94713_20140319_231311_outLine +BABEL_OP2_303_95028_20140206_001106_inLine +BABEL_OP2_303_95028_20140206_001106_outLine +BABEL_OP2_303_95028_20140320_001627_inLine +BABEL_OP2_303_95028_20140320_001627_outLine +BABEL_OP2_303_95467_20140218_202005_inLine +BABEL_OP2_303_95467_20140218_202005_outLine +BABEL_OP2_303_95490_20131101_164715_inLine +BABEL_OP2_303_95490_20131101_164715_outLine +BABEL_OP2_303_95663_20131116_161029_inLine +BABEL_OP2_303_95663_20131116_161029_outLine +BABEL_OP2_303_95935_20131204_145738_inLine +BABEL_OP2_303_95935_20131204_145738_outLine +BABEL_OP2_303_96088_20140127_224534_inLine +BABEL_OP2_303_96088_20140127_224534_outLine +BABEL_OP2_303_97097_20140122_030319_inLine +BABEL_OP2_303_97097_20140122_030319_outLine +BABEL_OP2_303_97264_20131205_020902_inLine +BABEL_OP2_303_97264_20131205_020902_outLine +BABEL_OP2_303_97588_20131027_011205_inLine +BABEL_OP2_303_97588_20131027_011205_outLine +BABEL_OP2_303_98255_20140322_200157_inLine +BABEL_OP2_303_98255_20140322_200157_outLine +BABEL_OP2_303_98580_20131112_204407_inLine +BABEL_OP2_303_98580_20131112_204407_outLine +BABEL_OP2_303_99813_20131115_032632_inLine +BABEL_OP2_303_99813_20131115_032632_outLine +BABEL_OP2_303_99883_20140326_192513_inLine +BABEL_OP2_303_99883_20140326_192513_outLine +BABEL_OP2_303_99952_20140212_000327_inLine +BABEL_OP2_303_99952_20140212_000327_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/dev.2h.list b/egs/babel/s5d/conf/lists/304-lithuanian/dev.2h.list new file mode 100644 index 00000000000..37f27ef3750 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/dev.2h.list @@ -0,0 +1,122 @@ +BABEL_OP2_304_13040_20131130_214521_inLine +BABEL_OP2_304_13040_20131130_214521_outLine +BABEL_OP2_304_14158_20140115_023605_inLine +BABEL_OP2_304_14158_20140115_023605_outLine +BABEL_OP2_304_14575_20131024_232334_inLine +BABEL_OP2_304_14575_20131024_232334_outLine +BABEL_OP2_304_14575_20131024_235230_inLine +BABEL_OP2_304_14575_20131024_235230_outLine +BABEL_OP2_304_15163_20140108_001236_inLine +BABEL_OP2_304_15163_20140108_001236_outLine +BABEL_OP2_304_15262_20131210_004932_inLine +BABEL_OP2_304_15262_20131210_004932_outLine +BABEL_OP2_304_16056_20140123_070422_inLine +BABEL_OP2_304_16056_20140123_070422_outLine +BABEL_OP2_304_16787_20131206_025653_inLine +BABEL_OP2_304_16787_20131206_025653_outLine +BABEL_OP2_304_17511_20131126_055458_inLine +BABEL_OP2_304_17511_20131126_055458_outLine +BABEL_OP2_304_17573_20140203_230300_inLine +BABEL_OP2_304_17573_20140203_230300_outLine +BABEL_OP2_304_17914_20140228_184910_inLine +BABEL_OP2_304_17914_20140228_184910_outLine +BABEL_OP2_304_21581_20131216_220706_inLine +BABEL_OP2_304_21581_20131216_220706_outLine +BABEL_OP2_304_22021_20131023_221926_inLine +BABEL_OP2_304_22021_20131023_221926_outLine +BABEL_OP2_304_22288_20131112_035653_inLine +BABEL_OP2_304_22288_20131112_035653_outLine +BABEL_OP2_304_26206_20140120_022753_inLine +BABEL_OP2_304_26206_20140120_022753_outLine +BABEL_OP2_304_29777_20140217_064220_inLine +BABEL_OP2_304_29777_20140217_064220_outLine +BABEL_OP2_304_31500_20131109_033149_inLine +BABEL_OP2_304_31500_20131109_033149_outLine +BABEL_OP2_304_31979_20140109_015624_inLine +BABEL_OP2_304_31979_20140109_015624_outLine +BABEL_OP2_304_32959_20140210_005641_inLine +BABEL_OP2_304_32959_20140210_005641_outLine +BABEL_OP2_304_33800_20131023_012145_inLine +BABEL_OP2_304_33800_20131023_012145_outLine +BABEL_OP2_304_34208_20131031_044912_inLine +BABEL_OP2_304_34208_20131031_044912_outLine +BABEL_OP2_304_35069_20140304_002856_inLine +BABEL_OP2_304_35069_20140304_002856_outLine +BABEL_OP2_304_35202_20140111_000728_inLine +BABEL_OP2_304_35202_20140111_000728_outLine +BABEL_OP2_304_37064_20131129_035959_inLine +BABEL_OP2_304_37064_20131129_035959_outLine +BABEL_OP2_304_37068_20131023_011604_inLine +BABEL_OP2_304_37068_20131023_011604_outLine +BABEL_OP2_304_39927_20131021_221542_inLine +BABEL_OP2_304_39927_20131021_221542_outLine +BABEL_OP2_304_40330_20131109_021648_inLine +BABEL_OP2_304_40330_20131109_021648_outLine +BABEL_OP2_304_42877_20131022_230033_inLine +BABEL_OP2_304_42877_20131022_230033_outLine +BABEL_OP2_304_44420_20131214_233135_inLine +BABEL_OP2_304_44420_20131214_233135_outLine +BABEL_OP2_304_46702_20131115_213311_inLine +BABEL_OP2_304_46702_20131115_213311_outLine +BABEL_OP2_304_46712_20131209_044650_inLine +BABEL_OP2_304_46712_20131209_044650_outLine +BABEL_OP2_304_46974_20140220_023915_inLine +BABEL_OP2_304_46974_20140220_023915_outLine +BABEL_OP2_304_54735_20131112_025013_inLine +BABEL_OP2_304_54735_20131112_025013_outLine +BABEL_OP2_304_63265_20131108_044545_inLine +BABEL_OP2_304_63265_20131108_044545_outLine +BABEL_OP2_304_63307_20140121_215145_inLine +BABEL_OP2_304_63307_20140121_215145_outLine +BABEL_OP2_304_63938_20140303_232624_inLine +BABEL_OP2_304_63938_20140303_232624_outLine +BABEL_OP2_304_64494_20131212_025147_inLine +BABEL_OP2_304_64494_20131212_025147_outLine +BABEL_OP2_304_67671_20131106_030834_inLine +BABEL_OP2_304_67671_20131106_030834_outLine +BABEL_OP2_304_70110_20131118_222225_inLine +BABEL_OP2_304_70110_20131118_222225_outLine +BABEL_OP2_304_70282_20140114_194359_inLine +BABEL_OP2_304_70282_20140114_194359_outLine +BABEL_OP2_304_71704_20131215_005510_inLine +BABEL_OP2_304_71704_20131215_005510_outLine +BABEL_OP2_304_73622_20131216_061333_inLine +BABEL_OP2_304_73622_20131216_061333_outLine +BABEL_OP2_304_76837_20131020_200525_inLine +BABEL_OP2_304_76837_20131020_200525_outLine +BABEL_OP2_304_78877_20131023_202733_inLine +BABEL_OP2_304_78877_20131023_202733_outLine +BABEL_OP2_304_84079_20131112_195009_inLine +BABEL_OP2_304_84079_20131112_195009_outLine +BABEL_OP2_304_86878_20131129_043842_inLine +BABEL_OP2_304_86878_20131129_043842_outLine +BABEL_OP2_304_87629_20140121_223247_inLine +BABEL_OP2_304_87629_20140121_223247_outLine +BABEL_OP2_304_87693_20131214_012505_inLine +BABEL_OP2_304_87693_20131214_012505_outLine +BABEL_OP2_304_88394_20131030_012001_inLine +BABEL_OP2_304_88394_20131030_012001_outLine +BABEL_OP2_304_88873_20131215_052029_inLine +BABEL_OP2_304_88873_20131215_052029_outLine +BABEL_OP2_304_89457_20140107_011232_inLine +BABEL_OP2_304_89457_20140107_011232_outLine +BABEL_OP2_304_91411_20140214_045051_inLine +BABEL_OP2_304_91411_20140214_045051_outLine +BABEL_OP2_304_94002_20140106_061517_inLine +BABEL_OP2_304_94002_20140106_061517_outLine +BABEL_OP2_304_94035_20131028_044307_inLine +BABEL_OP2_304_94035_20131028_044307_outLine +BABEL_OP2_304_94166_20140222_223654_inLine +BABEL_OP2_304_94166_20140222_223654_outLine +BABEL_OP2_304_94587_20140203_223943_inLine +BABEL_OP2_304_94587_20140203_223943_outLine +BABEL_OP2_304_95966_20140116_013030_inLine +BABEL_OP2_304_95966_20140116_013030_outLine +BABEL_OP2_304_96041_20131110_011619_inLine +BABEL_OP2_304_96041_20131110_011619_outLine +BABEL_OP2_304_96934_20131207_231603_inLine +BABEL_OP2_304_96934_20131207_231603_outLine +BABEL_OP2_304_97604_20140221_172005_inLine +BABEL_OP2_304_97604_20140221_172005_outLine +BABEL_OP2_304_99732_20140213_211724_inLine +BABEL_OP2_304_99732_20140213_211724_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/dev.list b/egs/babel/s5d/conf/lists/304-lithuanian/dev.list new file mode 100644 index 00000000000..37f27ef3750 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/dev.list @@ -0,0 +1,122 @@ +BABEL_OP2_304_13040_20131130_214521_inLine +BABEL_OP2_304_13040_20131130_214521_outLine +BABEL_OP2_304_14158_20140115_023605_inLine +BABEL_OP2_304_14158_20140115_023605_outLine +BABEL_OP2_304_14575_20131024_232334_inLine +BABEL_OP2_304_14575_20131024_232334_outLine +BABEL_OP2_304_14575_20131024_235230_inLine +BABEL_OP2_304_14575_20131024_235230_outLine +BABEL_OP2_304_15163_20140108_001236_inLine +BABEL_OP2_304_15163_20140108_001236_outLine +BABEL_OP2_304_15262_20131210_004932_inLine +BABEL_OP2_304_15262_20131210_004932_outLine +BABEL_OP2_304_16056_20140123_070422_inLine +BABEL_OP2_304_16056_20140123_070422_outLine +BABEL_OP2_304_16787_20131206_025653_inLine +BABEL_OP2_304_16787_20131206_025653_outLine +BABEL_OP2_304_17511_20131126_055458_inLine +BABEL_OP2_304_17511_20131126_055458_outLine +BABEL_OP2_304_17573_20140203_230300_inLine +BABEL_OP2_304_17573_20140203_230300_outLine +BABEL_OP2_304_17914_20140228_184910_inLine +BABEL_OP2_304_17914_20140228_184910_outLine +BABEL_OP2_304_21581_20131216_220706_inLine +BABEL_OP2_304_21581_20131216_220706_outLine +BABEL_OP2_304_22021_20131023_221926_inLine +BABEL_OP2_304_22021_20131023_221926_outLine +BABEL_OP2_304_22288_20131112_035653_inLine +BABEL_OP2_304_22288_20131112_035653_outLine +BABEL_OP2_304_26206_20140120_022753_inLine +BABEL_OP2_304_26206_20140120_022753_outLine +BABEL_OP2_304_29777_20140217_064220_inLine +BABEL_OP2_304_29777_20140217_064220_outLine +BABEL_OP2_304_31500_20131109_033149_inLine +BABEL_OP2_304_31500_20131109_033149_outLine +BABEL_OP2_304_31979_20140109_015624_inLine +BABEL_OP2_304_31979_20140109_015624_outLine +BABEL_OP2_304_32959_20140210_005641_inLine +BABEL_OP2_304_32959_20140210_005641_outLine +BABEL_OP2_304_33800_20131023_012145_inLine +BABEL_OP2_304_33800_20131023_012145_outLine +BABEL_OP2_304_34208_20131031_044912_inLine +BABEL_OP2_304_34208_20131031_044912_outLine +BABEL_OP2_304_35069_20140304_002856_inLine +BABEL_OP2_304_35069_20140304_002856_outLine +BABEL_OP2_304_35202_20140111_000728_inLine +BABEL_OP2_304_35202_20140111_000728_outLine +BABEL_OP2_304_37064_20131129_035959_inLine +BABEL_OP2_304_37064_20131129_035959_outLine +BABEL_OP2_304_37068_20131023_011604_inLine +BABEL_OP2_304_37068_20131023_011604_outLine +BABEL_OP2_304_39927_20131021_221542_inLine +BABEL_OP2_304_39927_20131021_221542_outLine +BABEL_OP2_304_40330_20131109_021648_inLine +BABEL_OP2_304_40330_20131109_021648_outLine +BABEL_OP2_304_42877_20131022_230033_inLine +BABEL_OP2_304_42877_20131022_230033_outLine +BABEL_OP2_304_44420_20131214_233135_inLine +BABEL_OP2_304_44420_20131214_233135_outLine +BABEL_OP2_304_46702_20131115_213311_inLine +BABEL_OP2_304_46702_20131115_213311_outLine +BABEL_OP2_304_46712_20131209_044650_inLine +BABEL_OP2_304_46712_20131209_044650_outLine +BABEL_OP2_304_46974_20140220_023915_inLine +BABEL_OP2_304_46974_20140220_023915_outLine +BABEL_OP2_304_54735_20131112_025013_inLine +BABEL_OP2_304_54735_20131112_025013_outLine +BABEL_OP2_304_63265_20131108_044545_inLine +BABEL_OP2_304_63265_20131108_044545_outLine +BABEL_OP2_304_63307_20140121_215145_inLine +BABEL_OP2_304_63307_20140121_215145_outLine +BABEL_OP2_304_63938_20140303_232624_inLine +BABEL_OP2_304_63938_20140303_232624_outLine +BABEL_OP2_304_64494_20131212_025147_inLine +BABEL_OP2_304_64494_20131212_025147_outLine +BABEL_OP2_304_67671_20131106_030834_inLine +BABEL_OP2_304_67671_20131106_030834_outLine +BABEL_OP2_304_70110_20131118_222225_inLine +BABEL_OP2_304_70110_20131118_222225_outLine +BABEL_OP2_304_70282_20140114_194359_inLine +BABEL_OP2_304_70282_20140114_194359_outLine +BABEL_OP2_304_71704_20131215_005510_inLine +BABEL_OP2_304_71704_20131215_005510_outLine +BABEL_OP2_304_73622_20131216_061333_inLine +BABEL_OP2_304_73622_20131216_061333_outLine +BABEL_OP2_304_76837_20131020_200525_inLine +BABEL_OP2_304_76837_20131020_200525_outLine +BABEL_OP2_304_78877_20131023_202733_inLine +BABEL_OP2_304_78877_20131023_202733_outLine +BABEL_OP2_304_84079_20131112_195009_inLine +BABEL_OP2_304_84079_20131112_195009_outLine +BABEL_OP2_304_86878_20131129_043842_inLine +BABEL_OP2_304_86878_20131129_043842_outLine +BABEL_OP2_304_87629_20140121_223247_inLine +BABEL_OP2_304_87629_20140121_223247_outLine +BABEL_OP2_304_87693_20131214_012505_inLine +BABEL_OP2_304_87693_20131214_012505_outLine +BABEL_OP2_304_88394_20131030_012001_inLine +BABEL_OP2_304_88394_20131030_012001_outLine +BABEL_OP2_304_88873_20131215_052029_inLine +BABEL_OP2_304_88873_20131215_052029_outLine +BABEL_OP2_304_89457_20140107_011232_inLine +BABEL_OP2_304_89457_20140107_011232_outLine +BABEL_OP2_304_91411_20140214_045051_inLine +BABEL_OP2_304_91411_20140214_045051_outLine +BABEL_OP2_304_94002_20140106_061517_inLine +BABEL_OP2_304_94002_20140106_061517_outLine +BABEL_OP2_304_94035_20131028_044307_inLine +BABEL_OP2_304_94035_20131028_044307_outLine +BABEL_OP2_304_94166_20140222_223654_inLine +BABEL_OP2_304_94166_20140222_223654_outLine +BABEL_OP2_304_94587_20140203_223943_inLine +BABEL_OP2_304_94587_20140203_223943_outLine +BABEL_OP2_304_95966_20140116_013030_inLine +BABEL_OP2_304_95966_20140116_013030_outLine +BABEL_OP2_304_96041_20131110_011619_inLine +BABEL_OP2_304_96041_20131110_011619_outLine +BABEL_OP2_304_96934_20131207_231603_inLine +BABEL_OP2_304_96934_20131207_231603_outLine +BABEL_OP2_304_97604_20140221_172005_inLine +BABEL_OP2_304_97604_20140221_172005_outLine +BABEL_OP2_304_99732_20140213_211724_inLine +BABEL_OP2_304_99732_20140213_211724_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/eval.list b/egs/babel/s5d/conf/lists/304-lithuanian/eval.list new file mode 100644 index 00000000000..506241eadc5 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/eval.list @@ -0,0 +1,192 @@ +BABEL_OP2_304_10416_20140107_061620_inLine +BABEL_OP2_304_10416_20140107_061620_outLine +BABEL_OP2_304_14723_20131125_042706_inLine +BABEL_OP2_304_14723_20131125_042706_outLine +BABEL_OP2_304_16351_20131027_201533_inLine +BABEL_OP2_304_16351_20131027_201533_outLine +BABEL_OP2_304_16802_20131108_055143_inLine +BABEL_OP2_304_16802_20131108_055143_outLine +BABEL_OP2_304_18863_20140222_035802_inLine +BABEL_OP2_304_18863_20140222_035802_outLine +BABEL_OP2_304_20724_20131109_014600_inLine +BABEL_OP2_304_20724_20131109_014600_outLine +BABEL_OP2_304_22641_20131201_215149_inLine +BABEL_OP2_304_22641_20131201_215149_outLine +BABEL_OP2_304_23355_20131126_211038_inLine +BABEL_OP2_304_23355_20131126_211038_outLine +BABEL_OP2_304_23395_20140214_042808_inLine +BABEL_OP2_304_23395_20140214_042808_outLine +BABEL_OP2_304_23628_20131208_203311_inLine +BABEL_OP2_304_23628_20131208_203311_outLine +BABEL_OP2_304_23700_20131025_204511_inLine +BABEL_OP2_304_23700_20131025_204511_outLine +BABEL_OP2_304_23731_20140111_003449_inLine +BABEL_OP2_304_23731_20140111_003449_outLine +BABEL_OP2_304_24033_20140304_045137_inLine +BABEL_OP2_304_24033_20140304_045137_outLine +BABEL_OP2_304_24209_20131022_193019_inLine +BABEL_OP2_304_24209_20131022_193019_outLine +BABEL_OP2_304_24209_20131022_193936_inLine +BABEL_OP2_304_24209_20131022_193936_outLine +BABEL_OP2_304_25068_20131019_030524_inLine +BABEL_OP2_304_25068_20131019_030524_outLine +BABEL_OP2_304_26869_20131031_215636_inLine +BABEL_OP2_304_26869_20131031_215636_outLine +BABEL_OP2_304_28422_20140112_043550_inLine +BABEL_OP2_304_28422_20140112_043550_outLine +BABEL_OP2_304_28538_20140106_011449_inLine +BABEL_OP2_304_28538_20140106_011449_outLine +BABEL_OP2_304_28585_20140225_043733_inLine +BABEL_OP2_304_28585_20140225_043733_outLine +BABEL_OP2_304_30250_20140120_020901_inLine +BABEL_OP2_304_30250_20140120_020901_outLine +BABEL_OP2_304_36219_20131216_035438_inLine +BABEL_OP2_304_36219_20131216_035438_outLine +BABEL_OP2_304_36632_20131024_201211_inLine +BABEL_OP2_304_36632_20131024_201211_outLine +BABEL_OP2_304_39159_20131208_045854_inLine +BABEL_OP2_304_39159_20131208_045854_outLine +BABEL_OP2_304_39277_20131020_204845_inLine +BABEL_OP2_304_39277_20131020_204845_outLine +BABEL_OP2_304_41109_20140220_021208_inLine +BABEL_OP2_304_41109_20140220_021208_outLine +BABEL_OP2_304_43285_20140124_012117_inLine +BABEL_OP2_304_43285_20140124_012117_outLine +BABEL_OP2_304_44255_20140222_010712_inLine +BABEL_OP2_304_44255_20140222_010712_outLine +BABEL_OP2_304_44681_20131023_205447_inLine +BABEL_OP2_304_44681_20131023_205447_outLine +BABEL_OP2_304_45106_20140117_233013_inLine +BABEL_OP2_304_45106_20140117_233013_outLine +BABEL_OP2_304_45699_20131022_213702_inLine +BABEL_OP2_304_45699_20131022_213702_outLine +BABEL_OP2_304_46905_20131025_213636_inLine +BABEL_OP2_304_46905_20131025_213636_outLine +BABEL_OP2_304_47882_20131027_194825_inLine +BABEL_OP2_304_47882_20131027_194825_outLine +BABEL_OP2_304_48200_20140221_015225_inLine +BABEL_OP2_304_48200_20140221_015225_outLine +BABEL_OP2_304_49641_20131112_211903_inLine +BABEL_OP2_304_49641_20131112_211903_outLine +BABEL_OP2_304_49775_20131114_210107_inLine +BABEL_OP2_304_49775_20131114_210107_outLine +BABEL_OP2_304_50962_20131206_052346_inLine +BABEL_OP2_304_50962_20131206_052346_outLine +BABEL_OP2_304_53206_20131021_231814_inLine +BABEL_OP2_304_53206_20131021_231814_outLine +BABEL_OP2_304_53441_20131026_001731_inLine +BABEL_OP2_304_53441_20131026_001731_outLine +BABEL_OP2_304_53758_20131110_023501_inLine +BABEL_OP2_304_53758_20131110_023501_outLine +BABEL_OP2_304_54040_20140207_031046_inLine +BABEL_OP2_304_54040_20140207_031046_outLine +BABEL_OP2_304_55742_20131210_035616_inLine +BABEL_OP2_304_55742_20131210_035616_outLine +BABEL_OP2_304_57650_20140228_212617_inLine +BABEL_OP2_304_57650_20140228_212617_outLine +BABEL_OP2_304_57654_20131129_021919_inLine +BABEL_OP2_304_57654_20131129_021919_outLine +BABEL_OP2_304_57922_20140212_234031_inLine +BABEL_OP2_304_57922_20140212_234031_outLine +BABEL_OP2_304_60508_20131213_013224_inLine +BABEL_OP2_304_60508_20131213_013224_outLine +BABEL_OP2_304_62434_20131204_015115_inLine +BABEL_OP2_304_62434_20131204_015115_outLine +BABEL_OP2_304_63481_20131218_054343_inLine +BABEL_OP2_304_63481_20131218_054343_outLine +BABEL_OP2_304_63484_20131108_002450_inLine +BABEL_OP2_304_63484_20131108_002450_outLine +BABEL_OP2_304_65339_20131108_025612_inLine +BABEL_OP2_304_65339_20131108_025612_outLine +BABEL_OP2_304_66967_20131211_212833_inLine +BABEL_OP2_304_66967_20131211_212833_outLine +BABEL_OP2_304_67373_20131213_035431_inLine +BABEL_OP2_304_67373_20131213_035431_outLine +BABEL_OP2_304_67726_20131021_224218_inLine +BABEL_OP2_304_67726_20131021_224218_outLine +BABEL_OP2_304_67794_20131211_225335_inLine +BABEL_OP2_304_67794_20131211_225335_outLine +BABEL_OP2_304_68823_20131020_204717_inLine +BABEL_OP2_304_68823_20131020_204717_outLine +BABEL_OP2_304_69090_20131028_014204_inLine +BABEL_OP2_304_69090_20131028_014204_outLine +BABEL_OP2_304_69574_20131114_192607_inLine +BABEL_OP2_304_69574_20131114_192607_outLine +BABEL_OP2_304_70726_20131024_044755_inLine +BABEL_OP2_304_70726_20131024_044755_outLine +BABEL_OP2_304_71278_20131021_222320_inLine +BABEL_OP2_304_71278_20131021_222320_outLine +BABEL_OP2_304_73837_20131203_050134_inLine +BABEL_OP2_304_73837_20131203_050134_outLine +BABEL_OP2_304_74111_20140214_221515_inLine +BABEL_OP2_304_74111_20140214_221515_outLine +BABEL_OP2_304_74280_20131114_221312_inLine +BABEL_OP2_304_74280_20131114_221312_outLine +BABEL_OP2_304_75465_20140214_020356_inLine +BABEL_OP2_304_75465_20140214_020356_outLine +BABEL_OP2_304_76773_20131201_022925_inLine +BABEL_OP2_304_76773_20131201_022925_outLine +BABEL_OP2_304_77904_20131023_031446_inLine +BABEL_OP2_304_77904_20131023_031446_outLine +BABEL_OP2_304_77990_20131201_021431_inLine +BABEL_OP2_304_77990_20131201_021431_outLine +BABEL_OP2_304_78609_20140215_083334_inLine +BABEL_OP2_304_78609_20140215_083334_outLine +BABEL_OP2_304_78630_20131216_203357_inLine +BABEL_OP2_304_78630_20131216_203357_outLine +BABEL_OP2_304_78958_20131106_193325_inLine +BABEL_OP2_304_78958_20131106_193325_outLine +BABEL_OP2_304_78976_20131207_040932_inLine +BABEL_OP2_304_78976_20131207_040932_outLine +BABEL_OP2_304_80241_20131031_000650_inLine +BABEL_OP2_304_80241_20131031_000650_outLine +BABEL_OP2_304_83366_20140114_021841_inLine +BABEL_OP2_304_83366_20140114_021841_outLine +BABEL_OP2_304_83643_20131112_015611_inLine +BABEL_OP2_304_83643_20131112_015611_outLine +BABEL_OP2_304_83775_20140106_012027_inLine +BABEL_OP2_304_83775_20140106_012027_outLine +BABEL_OP2_304_83783_20140123_015127_inLine +BABEL_OP2_304_83783_20140123_015127_outLine +BABEL_OP2_304_84029_20131107_051843_inLine +BABEL_OP2_304_84029_20131107_051843_outLine +BABEL_OP2_304_85260_20131024_194755_inLine +BABEL_OP2_304_85260_20131024_194755_outLine +BABEL_OP2_304_86885_20131024_233222_inLine +BABEL_OP2_304_86885_20131024_233222_outLine +BABEL_OP2_304_89045_20131115_232122_inLine +BABEL_OP2_304_89045_20131115_232122_outLine +BABEL_OP2_304_89226_20131024_203728_inLine +BABEL_OP2_304_89226_20131024_203728_outLine +BABEL_OP2_304_89372_20131115_002102_inLine +BABEL_OP2_304_89372_20131115_002102_outLine +BABEL_OP2_304_90930_20131020_000019_inLine +BABEL_OP2_304_90930_20131020_000019_outLine +BABEL_OP2_304_90935_20131204_230914_inLine +BABEL_OP2_304_90935_20131204_230914_outLine +BABEL_OP2_304_91971_20131023_230515_inLine +BABEL_OP2_304_91971_20131023_230515_outLine +BABEL_OP2_304_92509_20131210_214423_inLine +BABEL_OP2_304_92509_20131210_214423_outLine +BABEL_OP2_304_92698_20140118_013836_inLine +BABEL_OP2_304_92698_20140118_013836_outLine +BABEL_OP2_304_93946_20140213_192924_inLine +BABEL_OP2_304_93946_20140213_192924_outLine +BABEL_OP2_304_94869_20131114_004423_inLine +BABEL_OP2_304_94869_20131114_004423_outLine +BABEL_OP2_304_95077_20140213_032447_inLine +BABEL_OP2_304_95077_20140213_032447_outLine +BABEL_OP2_304_96504_20131215_211136_inLine +BABEL_OP2_304_96504_20131215_211136_outLine +BABEL_OP2_304_96504_20131215_212158_inLine +BABEL_OP2_304_96504_20131215_212158_outLine +BABEL_OP2_304_97448_20131109_203008_inLine +BABEL_OP2_304_97448_20131109_203008_outLine +BABEL_OP2_304_97570_20140114_012633_inLine +BABEL_OP2_304_97570_20140114_012633_outLine +BABEL_OP2_304_97772_20131115_013811_inLine +BABEL_OP2_304_97772_20131115_013811_outLine +BABEL_OP2_304_98255_20131126_040940_inLine +BABEL_OP2_304_98255_20131126_040940_outLine +BABEL_OP2_304_98888_20140116_000206_inLine +BABEL_OP2_304_98888_20140116_000206_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/evalpart1.list b/egs/babel/s5d/conf/lists/304-lithuanian/evalpart1.list new file mode 100644 index 00000000000..bf4691f0f34 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/evalpart1.list @@ -0,0 +1,60 @@ +BABEL_OP2_304_10416_20140107_061620_inLine +BABEL_OP2_304_10416_20140107_061620_outLine +BABEL_OP2_304_14723_20131125_042706_inLine +BABEL_OP2_304_14723_20131125_042706_outLine +BABEL_OP2_304_16351_20131027_201533_inLine +BABEL_OP2_304_16351_20131027_201533_outLine +BABEL_OP2_304_18863_20140222_035802_inLine +BABEL_OP2_304_18863_20140222_035802_outLine +BABEL_OP2_304_22641_20131201_215149_inLine +BABEL_OP2_304_22641_20131201_215149_outLine +BABEL_OP2_304_25068_20131019_030524_inLine +BABEL_OP2_304_25068_20131019_030524_outLine +BABEL_OP2_304_28422_20140112_043550_inLine +BABEL_OP2_304_28422_20140112_043550_outLine +BABEL_OP2_304_28585_20140225_043733_inLine +BABEL_OP2_304_28585_20140225_043733_outLine +BABEL_OP2_304_30250_20140120_020901_inLine +BABEL_OP2_304_30250_20140120_020901_outLine +BABEL_OP2_304_36219_20131216_035438_inLine +BABEL_OP2_304_36219_20131216_035438_outLine +BABEL_OP2_304_39159_20131208_045854_inLine +BABEL_OP2_304_39159_20131208_045854_outLine +BABEL_OP2_304_41109_20140220_021208_inLine +BABEL_OP2_304_41109_20140220_021208_outLine +BABEL_OP2_304_43285_20140124_012117_inLine +BABEL_OP2_304_43285_20140124_012117_outLine +BABEL_OP2_304_44255_20140222_010712_inLine +BABEL_OP2_304_44255_20140222_010712_outLine +BABEL_OP2_304_44681_20131023_205447_inLine +BABEL_OP2_304_44681_20131023_205447_outLine +BABEL_OP2_304_45106_20140117_233013_inLine +BABEL_OP2_304_45106_20140117_233013_outLine +BABEL_OP2_304_45699_20131022_213702_inLine +BABEL_OP2_304_45699_20131022_213702_outLine +BABEL_OP2_304_53206_20131021_231814_inLine +BABEL_OP2_304_53206_20131021_231814_outLine +BABEL_OP2_304_57922_20140212_234031_inLine +BABEL_OP2_304_57922_20140212_234031_outLine +BABEL_OP2_304_60508_20131213_013224_inLine +BABEL_OP2_304_60508_20131213_013224_outLine +BABEL_OP2_304_63481_20131218_054343_inLine +BABEL_OP2_304_63481_20131218_054343_outLine +BABEL_OP2_304_65339_20131108_025612_inLine +BABEL_OP2_304_65339_20131108_025612_outLine +BABEL_OP2_304_66967_20131211_212833_inLine +BABEL_OP2_304_66967_20131211_212833_outLine +BABEL_OP2_304_70726_20131024_044755_inLine +BABEL_OP2_304_70726_20131024_044755_outLine +BABEL_OP2_304_78609_20140215_083334_inLine +BABEL_OP2_304_78609_20140215_083334_outLine +BABEL_OP2_304_83366_20140114_021841_inLine +BABEL_OP2_304_83366_20140114_021841_outLine +BABEL_OP2_304_83775_20140106_012027_inLine +BABEL_OP2_304_83775_20140106_012027_outLine +BABEL_OP2_304_85260_20131024_194755_inLine +BABEL_OP2_304_85260_20131024_194755_outLine +BABEL_OP2_304_97448_20131109_203008_inLine +BABEL_OP2_304_97448_20131109_203008_outLine +BABEL_OP2_304_98888_20140116_000206_inLine +BABEL_OP2_304_98888_20140116_000206_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.list b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.list new file mode 100644 index 00000000000..858a278660f --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.list @@ -0,0 +1,120 @@ +BABEL_OP2_304_10019_20131215_000700_inLine +BABEL_OP2_304_10019_20131215_000700_outLine +BABEL_OP2_304_11768_20131025_195124_inLine +BABEL_OP2_304_11768_20131025_195124_outLine +BABEL_OP2_304_13929_20131020_015822_inLine +BABEL_OP2_304_13929_20131020_015822_outLine +BABEL_OP2_304_15420_20131207_024154_inLine +BABEL_OP2_304_15420_20131207_024154_outLine +BABEL_OP2_304_17937_20131127_033509_inLine +BABEL_OP2_304_17937_20131127_033509_outLine +BABEL_OP2_304_18037_20131024_213803_inLine +BABEL_OP2_304_18037_20131024_213803_outLine +BABEL_OP2_304_18731_20131023_003305_inLine +BABEL_OP2_304_18731_20131023_003305_outLine +BABEL_OP2_304_20916_20131114_013626_inLine +BABEL_OP2_304_20916_20131114_013626_outLine +BABEL_OP2_304_21029_20131212_035937_inLine +BABEL_OP2_304_21029_20131212_035937_outLine +BABEL_OP2_304_22170_20140304_071139_inLine +BABEL_OP2_304_22170_20140304_071139_outLine +BABEL_OP2_304_23098_20131107_033644_inLine +BABEL_OP2_304_23098_20131107_033644_outLine +BABEL_OP2_304_26074_20140112_023253_inLine +BABEL_OP2_304_26074_20140112_023253_outLine +BABEL_OP2_304_34564_20140213_195420_inLine +BABEL_OP2_304_34564_20140213_195420_outLine +BABEL_OP2_304_35420_20131029_043734_inLine +BABEL_OP2_304_35420_20131029_043734_outLine +BABEL_OP2_304_35838_20131024_211303_inLine +BABEL_OP2_304_35838_20131024_211303_outLine +BABEL_OP2_304_36147_20131019_040800_inLine +BABEL_OP2_304_36147_20131019_040800_outLine +BABEL_OP2_304_39688_20131109_222248_inLine +BABEL_OP2_304_39688_20131109_222248_outLine +BABEL_OP2_304_40092_20131031_014914_inLine +BABEL_OP2_304_40092_20131031_014914_outLine +BABEL_OP2_304_41493_20131113_221501_inLine +BABEL_OP2_304_41493_20131113_221501_outLine +BABEL_OP2_304_42126_20131024_215636_inLine +BABEL_OP2_304_42126_20131024_215636_outLine +BABEL_OP2_304_46333_20131204_195151_inLine +BABEL_OP2_304_46333_20131204_195151_outLine +BABEL_OP2_304_47877_20140227_065455_inLine +BABEL_OP2_304_47877_20140227_065455_outLine +BABEL_OP2_304_48789_20140108_012933_inLine +BABEL_OP2_304_48789_20140108_012933_outLine +BABEL_OP2_304_51417_20140228_011906_inLine +BABEL_OP2_304_51417_20140228_011906_outLine +BABEL_OP2_304_52025_20131116_004427_inLine +BABEL_OP2_304_52025_20131116_004427_outLine +BABEL_OP2_304_56429_20131129_223408_inLine +BABEL_OP2_304_56429_20131129_223408_outLine +BABEL_OP2_304_56684_20140223_001031_inLine +BABEL_OP2_304_56684_20140223_001031_outLine +BABEL_OP2_304_56720_20140119_005254_inLine +BABEL_OP2_304_56720_20140119_005254_outLine +BABEL_OP2_304_56743_20131218_042118_inLine +BABEL_OP2_304_56743_20131218_042118_outLine +BABEL_OP2_304_57609_20140121_202504_inLine +BABEL_OP2_304_57609_20140121_202504_outLine +BABEL_OP2_304_58103_20131212_013517_inLine +BABEL_OP2_304_58103_20131212_013517_outLine +BABEL_OP2_304_59291_20140207_213735_inLine +BABEL_OP2_304_59291_20140207_213735_outLine +BABEL_OP2_304_60418_20140111_062723_inLine +BABEL_OP2_304_60418_20140111_062723_outLine +BABEL_OP2_304_61219_20131206_061726_inLine +BABEL_OP2_304_61219_20131206_061726_outLine +BABEL_OP2_304_61357_20140113_232629_inLine +BABEL_OP2_304_61357_20140113_232629_outLine +BABEL_OP2_304_61963_20140226_192451_inLine +BABEL_OP2_304_61963_20140226_192451_outLine +BABEL_OP2_304_62323_20131113_001039_inLine +BABEL_OP2_304_62323_20131113_001039_outLine +BABEL_OP2_304_63445_20131127_005349_inLine +BABEL_OP2_304_63445_20131127_005349_outLine +BABEL_OP2_304_64759_20140118_203442_inLine +BABEL_OP2_304_64759_20140118_203442_outLine +BABEL_OP2_304_64796_20131128_060852_inLine +BABEL_OP2_304_64796_20131128_060852_outLine +BABEL_OP2_304_65077_20131115_005739_inLine +BABEL_OP2_304_65077_20131115_005739_outLine +BABEL_OP2_304_66026_20140212_224055_inLine +BABEL_OP2_304_66026_20140212_224055_outLine +BABEL_OP2_304_68910_20131101_042132_inLine +BABEL_OP2_304_68910_20131101_042132_outLine +BABEL_OP2_304_72903_20131113_023457_inLine +BABEL_OP2_304_72903_20131113_023457_outLine +BABEL_OP2_304_73042_20131214_052022_inLine +BABEL_OP2_304_73042_20131214_052022_outLine +BABEL_OP2_304_74455_20140224_013111_inLine +BABEL_OP2_304_74455_20140224_013111_outLine +BABEL_OP2_304_78360_20140301_020449_inLine +BABEL_OP2_304_78360_20140301_020449_outLine +BABEL_OP2_304_79723_20131023_023756_inLine +BABEL_OP2_304_79723_20131023_023756_outLine +BABEL_OP2_304_79820_20131214_042918_inLine +BABEL_OP2_304_79820_20131214_042918_outLine +BABEL_OP2_304_80721_20140213_051749_inLine +BABEL_OP2_304_80721_20140213_051749_outLine +BABEL_OP2_304_81427_20131211_012524_inLine +BABEL_OP2_304_81427_20131211_012524_outLine +BABEL_OP2_304_83813_20131028_033118_inLine +BABEL_OP2_304_83813_20131028_033118_outLine +BABEL_OP2_304_83851_20131203_212613_inLine +BABEL_OP2_304_83851_20131203_212613_outLine +BABEL_OP2_304_84125_20131115_235931_inLine +BABEL_OP2_304_84125_20131115_235931_outLine +BABEL_OP2_304_85179_20140214_071121_inLine +BABEL_OP2_304_85179_20140214_071121_outLine +BABEL_OP2_304_92252_20131022_042600_inLine +BABEL_OP2_304_92252_20131022_042600_outLine +BABEL_OP2_304_93443_20131127_032037_inLine +BABEL_OP2_304_93443_20131127_032037_outLine +BABEL_OP2_304_96205_20140107_233946_inLine +BABEL_OP2_304_96205_20140107_233946_outLine +BABEL_OP2_304_98565_20131023_235505_inLine +BABEL_OP2_304_98565_20131023_235505_outLine +BABEL_OP2_304_99920_20140211_023914_inLine +BABEL_OP2_304_99920_20140211_023914_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.untranscribed.list new file mode 100644 index 00000000000..5ddd7320c00 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.untranscribed.list @@ -0,0 +1,364 @@ +BABEL_OP2_304_10313_20131021_235202_inLine +BABEL_OP2_304_10313_20131021_235202_outLine +BABEL_OP2_304_10469_20131103_031709_inLine +BABEL_OP2_304_10469_20131103_031709_outLine +BABEL_OP2_304_11419_20131022_014303_inLine +BABEL_OP2_304_11419_20131022_014303_outLine +BABEL_OP2_304_11681_20131213_001647_inLine +BABEL_OP2_304_11681_20131213_001647_outLine +BABEL_OP2_304_12220_20131217_183010_inLine +BABEL_OP2_304_12220_20131217_183010_outLine +BABEL_OP2_304_13030_20131214_223348_inLine +BABEL_OP2_304_13030_20131214_223348_outLine +BABEL_OP2_304_13483_20140121_014427_inLine +BABEL_OP2_304_13483_20140121_014427_outLine +BABEL_OP2_304_13547_20131025_230206_inLine +BABEL_OP2_304_13547_20131025_230206_outLine +BABEL_OP2_304_14229_20131203_213430_inLine +BABEL_OP2_304_14229_20131203_213430_outLine +BABEL_OP2_304_14440_20140116_035720_inLine +BABEL_OP2_304_14440_20140116_035720_outLine +BABEL_OP2_304_14875_20131215_025538_inLine +BABEL_OP2_304_14875_20131215_025538_outLine +BABEL_OP2_304_15535_20140120_031512_inLine +BABEL_OP2_304_15535_20140120_031512_outLine +BABEL_OP2_304_15869_20131024_035059_inLine +BABEL_OP2_304_15869_20131024_035059_outLine +BABEL_OP2_304_16249_20131019_215021_inLine +BABEL_OP2_304_16249_20131019_215021_outLine +BABEL_OP2_304_16938_20140117_232323_inLine +BABEL_OP2_304_16938_20140117_232323_outLine +BABEL_OP2_304_17032_20140121_010326_inLine +BABEL_OP2_304_17032_20140121_010326_outLine +BABEL_OP2_304_17923_20140112_012407_inLine +BABEL_OP2_304_17923_20140112_012407_outLine +BABEL_OP2_304_18033_20131019_011702_inLine +BABEL_OP2_304_18033_20131019_011702_outLine +BABEL_OP2_304_19440_20131022_001353_inLine +BABEL_OP2_304_19440_20131022_001353_outLine +BABEL_OP2_304_19782_20140214_025658_inLine +BABEL_OP2_304_19782_20140214_025658_outLine +BABEL_OP2_304_20330_20140222_024609_inLine +BABEL_OP2_304_20330_20140222_024609_outLine +BABEL_OP2_304_20454_20131022_030532_inLine +BABEL_OP2_304_20454_20131022_030532_outLine +BABEL_OP2_304_20800_20140109_021508_inLine +BABEL_OP2_304_20800_20140109_021508_outLine +BABEL_OP2_304_21109_20140217_041609_inLine +BABEL_OP2_304_21109_20140217_041609_outLine +BABEL_OP2_304_22629_20131106_052813_inLine +BABEL_OP2_304_22629_20131106_052813_outLine +BABEL_OP2_304_23995_20140221_014044_inLine +BABEL_OP2_304_23995_20140221_014044_outLine +BABEL_OP2_304_24532_20131115_041442_inLine +BABEL_OP2_304_24532_20131115_041442_outLine +BABEL_OP2_304_24589_20131211_034826_inLine +BABEL_OP2_304_24589_20131211_034826_outLine +BABEL_OP2_304_24648_20131023_232628_inLine +BABEL_OP2_304_24648_20131023_232628_outLine +BABEL_OP2_304_25895_20131106_022638_inLine +BABEL_OP2_304_25895_20131106_022638_outLine +BABEL_OP2_304_26602_20140220_035529_inLine +BABEL_OP2_304_26602_20140220_035529_outLine +BABEL_OP2_304_27042_20140214_003359_inLine +BABEL_OP2_304_27042_20140214_003359_outLine +BABEL_OP2_304_27125_20131115_034249_inLine +BABEL_OP2_304_27125_20131115_034249_outLine +BABEL_OP2_304_27189_20131107_042859_inLine +BABEL_OP2_304_27189_20131107_042859_outLine +BABEL_OP2_304_27218_20131214_022520_inLine +BABEL_OP2_304_27218_20131214_022520_outLine +BABEL_OP2_304_28945_20131208_071533_inLine +BABEL_OP2_304_28945_20131208_071533_outLine +BABEL_OP2_304_29135_20131211_225441_inLine +BABEL_OP2_304_29135_20131211_225441_outLine +BABEL_OP2_304_29168_20131210_211235_inLine +BABEL_OP2_304_29168_20131210_211235_outLine +BABEL_OP2_304_29208_20140108_013943_inLine +BABEL_OP2_304_29208_20140108_013943_outLine +BABEL_OP2_304_29663_20131101_024202_inLine +BABEL_OP2_304_29663_20131101_024202_outLine +BABEL_OP2_304_29746_20131020_192452_inLine +BABEL_OP2_304_29746_20131020_192452_outLine +BABEL_OP2_304_30253_20140210_055904_inLine +BABEL_OP2_304_30253_20140210_055904_outLine +BABEL_OP2_304_30426_20131108_232120_inLine +BABEL_OP2_304_30426_20131108_232120_outLine +BABEL_OP2_304_31624_20131216_054258_inLine +BABEL_OP2_304_31624_20131216_054258_outLine +BABEL_OP2_304_32169_20131101_054038_inLine +BABEL_OP2_304_32169_20131101_054038_outLine +BABEL_OP2_304_32832_20140223_005017_inLine +BABEL_OP2_304_32832_20140223_005017_outLine +BABEL_OP2_304_32861_20140303_235600_inLine +BABEL_OP2_304_32861_20140303_235600_outLine +BABEL_OP2_304_33111_20140304_043553_inLine +BABEL_OP2_304_33111_20140304_043553_outLine +BABEL_OP2_304_33149_20131127_000224_inLine +BABEL_OP2_304_33149_20131127_000224_outLine +BABEL_OP2_304_34064_20131020_210038_inLine +BABEL_OP2_304_34064_20131020_210038_outLine +BABEL_OP2_304_34064_20131021_223728_inLine +BABEL_OP2_304_34064_20131021_223728_outLine +BABEL_OP2_304_34328_20140106_031822_inLine +BABEL_OP2_304_34328_20140106_031822_outLine +BABEL_OP2_304_34336_20131210_042513_inLine +BABEL_OP2_304_34336_20131210_042513_outLine +BABEL_OP2_304_34647_20131109_231717_inLine +BABEL_OP2_304_34647_20131109_231717_outLine +BABEL_OP2_304_36300_20131030_210103_inLine +BABEL_OP2_304_36300_20131030_210103_outLine +BABEL_OP2_304_36341_20131208_040050_inLine +BABEL_OP2_304_36341_20131208_040050_outLine +BABEL_OP2_304_36990_20140106_050927_inLine +BABEL_OP2_304_36990_20140106_050927_outLine +BABEL_OP2_304_37684_20131019_020843_inLine +BABEL_OP2_304_37684_20131019_020843_outLine +BABEL_OP2_304_38963_20131109_190146_inLine +BABEL_OP2_304_38963_20131109_190146_outLine +BABEL_OP2_304_39680_20140226_002516_inLine +BABEL_OP2_304_39680_20140226_002516_outLine +BABEL_OP2_304_40624_20131107_024514_inLine +BABEL_OP2_304_40624_20131107_024514_outLine +BABEL_OP2_304_40713_20131210_063734_inLine +BABEL_OP2_304_40713_20131210_063734_outLine +BABEL_OP2_304_41233_20140222_034336_inLine +BABEL_OP2_304_41233_20140222_034336_outLine +BABEL_OP2_304_41442_20140214_035912_inLine +BABEL_OP2_304_41442_20140214_035912_outLine +BABEL_OP2_304_41741_20131215_020846_inLine +BABEL_OP2_304_41741_20131215_020846_outLine +BABEL_OP2_304_42243_20131218_052141_inLine +BABEL_OP2_304_42243_20131218_052141_outLine +BABEL_OP2_304_42497_20131130_034031_inLine +BABEL_OP2_304_42497_20131130_034031_outLine +BABEL_OP2_304_44868_20140110_204822_inLine +BABEL_OP2_304_44868_20140110_204822_outLine +BABEL_OP2_304_45374_20131019_200425_inLine +BABEL_OP2_304_45374_20131019_200425_outLine +BABEL_OP2_304_45642_20140114_234140_inLine +BABEL_OP2_304_45642_20140114_234140_outLine +BABEL_OP2_304_45843_20140114_205141_inLine +BABEL_OP2_304_45843_20140114_205141_outLine +BABEL_OP2_304_46315_20140122_004043_inLine +BABEL_OP2_304_46315_20140122_004043_outLine +BABEL_OP2_304_46389_20131022_050904_inLine +BABEL_OP2_304_46389_20131022_050904_outLine +BABEL_OP2_304_47110_20131023_015940_inLine +BABEL_OP2_304_47110_20131023_015940_outLine +BABEL_OP2_304_47270_20140222_021820_inLine +BABEL_OP2_304_47270_20140222_021820_outLine +BABEL_OP2_304_47451_20140203_224639_inLine +BABEL_OP2_304_47451_20140203_224639_outLine +BABEL_OP2_304_48024_20131031_215347_inLine +BABEL_OP2_304_48024_20131031_215347_outLine +BABEL_OP2_304_49001_20131214_003327_inLine +BABEL_OP2_304_49001_20131214_003327_outLine +BABEL_OP2_304_49287_20140118_013355_inLine +BABEL_OP2_304_49287_20140118_013355_outLine +BABEL_OP2_304_50175_20131124_033223_inLine +BABEL_OP2_304_50175_20131124_033223_outLine +BABEL_OP2_304_50175_20131124_035833_inLine +BABEL_OP2_304_50175_20131124_035833_outLine +BABEL_OP2_304_50565_20140124_052942_inLine +BABEL_OP2_304_50565_20140124_052942_outLine +BABEL_OP2_304_50726_20131213_031251_inLine +BABEL_OP2_304_50726_20131213_031251_outLine +BABEL_OP2_304_51540_20140304_011452_inLine +BABEL_OP2_304_51540_20140304_011452_outLine +BABEL_OP2_304_52058_20131022_055536_inLine +BABEL_OP2_304_52058_20131022_055536_outLine +BABEL_OP2_304_52438_20131206_043319_inLine +BABEL_OP2_304_52438_20131206_043319_outLine +BABEL_OP2_304_52818_20140112_011936_inLine +BABEL_OP2_304_52818_20140112_011936_outLine +BABEL_OP2_304_53419_20140213_061844_inLine +BABEL_OP2_304_53419_20140213_061844_outLine +BABEL_OP2_304_53842_20140109_012849_inLine +BABEL_OP2_304_53842_20140109_012849_outLine +BABEL_OP2_304_54744_20131205_024818_inLine +BABEL_OP2_304_54744_20131205_024818_outLine +BABEL_OP2_304_55042_20131112_051412_inLine +BABEL_OP2_304_55042_20131112_051412_outLine +BABEL_OP2_304_55381_20140217_005926_inLine +BABEL_OP2_304_55381_20140217_005926_outLine +BABEL_OP2_304_55818_20131218_020051_inLine +BABEL_OP2_304_55818_20131218_020051_outLine +BABEL_OP2_304_56057_20131112_043401_inLine +BABEL_OP2_304_56057_20131112_043401_outLine +BABEL_OP2_304_56117_20131023_035134_inLine +BABEL_OP2_304_56117_20131023_035134_outLine +BABEL_OP2_304_56674_20131024_233415_inLine +BABEL_OP2_304_56674_20131024_233415_outLine +BABEL_OP2_304_57035_20131106_183242_inLine +BABEL_OP2_304_57035_20131106_183242_outLine +BABEL_OP2_304_57566_20140227_000622_inLine +BABEL_OP2_304_57566_20140227_000622_outLine +BABEL_OP2_304_57935_20140211_015542_inLine +BABEL_OP2_304_57935_20140211_015542_outLine +BABEL_OP2_304_58585_20140226_022746_inLine +BABEL_OP2_304_58585_20140226_022746_outLine +BABEL_OP2_304_58717_20140112_000351_inLine +BABEL_OP2_304_58717_20140112_000351_outLine +BABEL_OP2_304_59163_20131022_033947_inLine +BABEL_OP2_304_59163_20131022_033947_outLine +BABEL_OP2_304_59645_20140110_210530_inLine +BABEL_OP2_304_59645_20140110_210530_outLine +BABEL_OP2_304_60282_20131031_040356_inLine +BABEL_OP2_304_60282_20131031_040356_outLine +BABEL_OP2_304_60397_20131024_183527_inLine +BABEL_OP2_304_60397_20131024_183527_outLine +BABEL_OP2_304_60538_20131211_043030_inLine +BABEL_OP2_304_60538_20131211_043030_outLine +BABEL_OP2_304_60830_20140106_224130_inLine +BABEL_OP2_304_60830_20140106_224130_outLine +BABEL_OP2_304_61011_20131206_213833_inLine +BABEL_OP2_304_61011_20131206_213833_outLine +BABEL_OP2_304_61225_20131113_052324_inLine +BABEL_OP2_304_61225_20131113_052324_outLine +BABEL_OP2_304_61831_20131201_042817_inLine +BABEL_OP2_304_61831_20131201_042817_outLine +BABEL_OP2_304_61888_20140228_181648_inLine +BABEL_OP2_304_61888_20140228_181648_outLine +BABEL_OP2_304_62177_20140227_184207_inLine +BABEL_OP2_304_62177_20140227_184207_outLine +BABEL_OP2_304_63081_20131206_195135_inLine +BABEL_OP2_304_63081_20131206_195135_outLine +BABEL_OP2_304_63671_20131024_002535_inLine +BABEL_OP2_304_63671_20131024_002535_outLine +BABEL_OP2_304_63920_20131108_182401_inLine +BABEL_OP2_304_63920_20131108_182401_outLine +BABEL_OP2_304_64014_20140228_033939_inLine +BABEL_OP2_304_64014_20140228_033939_outLine +BABEL_OP2_304_64469_20131023_182630_inLine +BABEL_OP2_304_64469_20131023_182630_outLine +BABEL_OP2_304_64688_20131109_040635_inLine +BABEL_OP2_304_64688_20131109_040635_outLine +BABEL_OP2_304_65336_20131109_051329_inLine +BABEL_OP2_304_65336_20131109_051329_outLine +BABEL_OP2_304_65723_20131201_233928_inLine +BABEL_OP2_304_65723_20131201_233928_outLine +BABEL_OP2_304_65882_20131128_220533_inLine +BABEL_OP2_304_65882_20131128_220533_outLine +BABEL_OP2_304_66001_20131208_023839_inLine +BABEL_OP2_304_66001_20131208_023839_outLine +BABEL_OP2_304_66350_20131022_021812_inLine +BABEL_OP2_304_66350_20131022_021812_outLine +BABEL_OP2_304_66837_20140213_053859_inLine +BABEL_OP2_304_66837_20140213_053859_outLine +BABEL_OP2_304_67304_20140216_025015_inLine +BABEL_OP2_304_67304_20140216_025015_outLine +BABEL_OP2_304_67552_20140114_011538_inLine +BABEL_OP2_304_67552_20140114_011538_outLine +BABEL_OP2_304_67894_20131112_060500_inLine +BABEL_OP2_304_67894_20131112_060500_outLine +BABEL_OP2_304_68059_20140111_025607_inLine +BABEL_OP2_304_68059_20140111_025607_outLine +BABEL_OP2_304_68908_20131127_032840_inLine +BABEL_OP2_304_68908_20131127_032840_outLine +BABEL_OP2_304_69107_20140123_192506_inLine +BABEL_OP2_304_69107_20140123_192506_outLine +BABEL_OP2_304_69153_20140212_204658_inLine +BABEL_OP2_304_69153_20140212_204658_outLine +BABEL_OP2_304_69992_20131213_195450_inLine +BABEL_OP2_304_69992_20131213_195450_outLine +BABEL_OP2_304_70216_20131020_173420_inLine +BABEL_OP2_304_70216_20131020_173420_outLine +BABEL_OP2_304_71263_20140113_223556_inLine +BABEL_OP2_304_71263_20140113_223556_outLine +BABEL_OP2_304_71401_20131020_005620_inLine +BABEL_OP2_304_71401_20131020_005620_outLine +BABEL_OP2_304_72844_20131115_202958_inLine +BABEL_OP2_304_72844_20131115_202958_outLine +BABEL_OP2_304_74226_20140217_044122_inLine +BABEL_OP2_304_74226_20140217_044122_outLine +BABEL_OP2_304_75064_20131209_035217_inLine +BABEL_OP2_304_75064_20131209_035217_outLine +BABEL_OP2_304_75223_20131205_012248_inLine +BABEL_OP2_304_75223_20131205_012248_outLine +BABEL_OP2_304_75930_20131020_013042_inLine +BABEL_OP2_304_75930_20131020_013042_outLine +BABEL_OP2_304_75975_20131019_054431_inLine +BABEL_OP2_304_75975_20131019_054431_outLine +BABEL_OP2_304_76069_20131113_042346_inLine +BABEL_OP2_304_76069_20131113_042346_outLine +BABEL_OP2_304_76730_20131025_213853_inLine +BABEL_OP2_304_76730_20131025_213853_outLine +BABEL_OP2_304_76793_20131126_013011_inLine +BABEL_OP2_304_76793_20131126_013011_outLine +BABEL_OP2_304_77033_20140228_043125_inLine +BABEL_OP2_304_77033_20140228_043125_outLine +BABEL_OP2_304_77730_20131217_042107_inLine +BABEL_OP2_304_77730_20131217_042107_outLine +BABEL_OP2_304_78943_20131208_222716_inLine +BABEL_OP2_304_78943_20131208_222716_outLine +BABEL_OP2_304_79028_20131022_221243_inLine +BABEL_OP2_304_79028_20131022_221243_outLine +BABEL_OP2_304_79045_20140214_202301_inLine +BABEL_OP2_304_79045_20140214_202301_outLine +BABEL_OP2_304_80209_20131112_232041_inLine +BABEL_OP2_304_80209_20131112_232041_outLine +BABEL_OP2_304_80383_20131107_233543_inLine +BABEL_OP2_304_80383_20131107_233543_outLine +BABEL_OP2_304_80577_20140301_014201_inLine +BABEL_OP2_304_80577_20140301_014201_outLine +BABEL_OP2_304_80881_20131205_175435_inLine +BABEL_OP2_304_80881_20131205_175435_outLine +BABEL_OP2_304_81404_20131213_041501_inLine +BABEL_OP2_304_81404_20131213_041501_outLine +BABEL_OP2_304_81769_20140105_005749_inLine +BABEL_OP2_304_81769_20140105_005749_outLine +BABEL_OP2_304_82863_20140106_054346_inLine +BABEL_OP2_304_82863_20140106_054346_outLine +BABEL_OP2_304_83436_20131211_025218_inLine +BABEL_OP2_304_83436_20131211_025218_outLine +BABEL_OP2_304_83935_20140123_034100_inLine +BABEL_OP2_304_83935_20140123_034100_outLine +BABEL_OP2_304_84194_20131129_040805_inLine +BABEL_OP2_304_84194_20131129_040805_outLine +BABEL_OP2_304_84605_20131215_005949_inLine +BABEL_OP2_304_84605_20131215_005949_outLine +BABEL_OP2_304_85248_20140222_235016_inLine +BABEL_OP2_304_85248_20140222_235016_outLine +BABEL_OP2_304_86100_20131112_221929_inLine +BABEL_OP2_304_86100_20131112_221929_outLine +BABEL_OP2_304_86472_20140116_050058_inLine +BABEL_OP2_304_86472_20140116_050058_outLine +BABEL_OP2_304_86557_20131130_234925_inLine +BABEL_OP2_304_86557_20131130_234925_outLine +BABEL_OP2_304_86829_20131107_180321_inLine +BABEL_OP2_304_86829_20131107_180321_outLine +BABEL_OP2_304_86830_20140228_051058_inLine +BABEL_OP2_304_86830_20140228_051058_outLine +BABEL_OP2_304_87280_20140207_030432_inLine +BABEL_OP2_304_87280_20140207_030432_outLine +BABEL_OP2_304_87866_20131106_002751_inLine +BABEL_OP2_304_87866_20131106_002751_outLine +BABEL_OP2_304_88982_20140111_233039_inLine +BABEL_OP2_304_88982_20140111_233039_outLine +BABEL_OP2_304_89650_20131024_023031_inLine +BABEL_OP2_304_89650_20131024_023031_outLine +BABEL_OP2_304_91581_20140203_231410_inLine +BABEL_OP2_304_91581_20140203_231410_outLine +BABEL_OP2_304_92440_20131109_003559_inLine +BABEL_OP2_304_92440_20131109_003559_outLine +BABEL_OP2_304_92886_20131202_233808_inLine +BABEL_OP2_304_92886_20131202_233808_outLine +BABEL_OP2_304_93224_20140121_191942_inLine +BABEL_OP2_304_93224_20140121_191942_outLine +BABEL_OP2_304_93475_20131213_025105_inLine +BABEL_OP2_304_93475_20131213_025105_outLine +BABEL_OP2_304_93681_20131202_212236_inLine +BABEL_OP2_304_93681_20131202_212236_outLine +BABEL_OP2_304_94465_20140213_013300_inLine +BABEL_OP2_304_94465_20140213_013300_outLine +BABEL_OP2_304_94923_20140212_021923_inLine +BABEL_OP2_304_94923_20140212_021923_outLine +BABEL_OP2_304_95677_20131024_031406_inLine +BABEL_OP2_304_95677_20131024_031406_outLine +BABEL_OP2_304_96405_20131214_205112_inLine +BABEL_OP2_304_96405_20131214_205112_outLine +BABEL_OP2_304_98165_20131218_234422_inLine +BABEL_OP2_304_98165_20131218_234422_outLine +BABEL_OP2_304_99264_20140222_211846_inLine +BABEL_OP2_304_99264_20140222_211846_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/training.list b/egs/babel/s5d/conf/lists/304-lithuanian/training.list new file mode 100644 index 00000000000..72d421bf1a9 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/training.list @@ -0,0 +1,484 @@ +BABEL_OP2_304_10019_20131215_000700_inLine +BABEL_OP2_304_10019_20131215_000700_outLine +BABEL_OP2_304_10313_20131021_235202_inLine +BABEL_OP2_304_10313_20131021_235202_outLine +BABEL_OP2_304_10469_20131103_031709_inLine +BABEL_OP2_304_10469_20131103_031709_outLine +BABEL_OP2_304_11419_20131022_014303_inLine +BABEL_OP2_304_11419_20131022_014303_outLine +BABEL_OP2_304_11681_20131213_001647_inLine +BABEL_OP2_304_11681_20131213_001647_outLine +BABEL_OP2_304_11768_20131025_195124_inLine +BABEL_OP2_304_11768_20131025_195124_outLine +BABEL_OP2_304_12220_20131217_183010_inLine +BABEL_OP2_304_12220_20131217_183010_outLine +BABEL_OP2_304_13030_20131214_223348_inLine +BABEL_OP2_304_13030_20131214_223348_outLine +BABEL_OP2_304_13483_20140121_014427_inLine +BABEL_OP2_304_13483_20140121_014427_outLine +BABEL_OP2_304_13547_20131025_230206_inLine +BABEL_OP2_304_13547_20131025_230206_outLine +BABEL_OP2_304_13929_20131020_015822_inLine +BABEL_OP2_304_13929_20131020_015822_outLine +BABEL_OP2_304_14229_20131203_213430_inLine +BABEL_OP2_304_14229_20131203_213430_outLine +BABEL_OP2_304_14440_20140116_035720_inLine +BABEL_OP2_304_14440_20140116_035720_outLine +BABEL_OP2_304_14875_20131215_025538_inLine +BABEL_OP2_304_14875_20131215_025538_outLine +BABEL_OP2_304_15420_20131207_024154_inLine +BABEL_OP2_304_15420_20131207_024154_outLine +BABEL_OP2_304_15535_20140120_031512_inLine +BABEL_OP2_304_15535_20140120_031512_outLine +BABEL_OP2_304_15869_20131024_035059_inLine +BABEL_OP2_304_15869_20131024_035059_outLine +BABEL_OP2_304_16249_20131019_215021_inLine +BABEL_OP2_304_16249_20131019_215021_outLine +BABEL_OP2_304_16938_20140117_232323_inLine +BABEL_OP2_304_16938_20140117_232323_outLine +BABEL_OP2_304_17032_20140121_010326_inLine +BABEL_OP2_304_17032_20140121_010326_outLine +BABEL_OP2_304_17923_20140112_012407_inLine +BABEL_OP2_304_17923_20140112_012407_outLine +BABEL_OP2_304_17937_20131127_033509_inLine +BABEL_OP2_304_17937_20131127_033509_outLine +BABEL_OP2_304_18033_20131019_011702_inLine +BABEL_OP2_304_18033_20131019_011702_outLine +BABEL_OP2_304_18037_20131024_213803_inLine +BABEL_OP2_304_18037_20131024_213803_outLine +BABEL_OP2_304_18731_20131023_003305_inLine +BABEL_OP2_304_18731_20131023_003305_outLine +BABEL_OP2_304_19440_20131022_001353_inLine +BABEL_OP2_304_19440_20131022_001353_outLine +BABEL_OP2_304_19782_20140214_025658_inLine +BABEL_OP2_304_19782_20140214_025658_outLine +BABEL_OP2_304_20330_20140222_024609_inLine +BABEL_OP2_304_20330_20140222_024609_outLine +BABEL_OP2_304_20454_20131022_030532_inLine +BABEL_OP2_304_20454_20131022_030532_outLine +BABEL_OP2_304_20800_20140109_021508_inLine +BABEL_OP2_304_20800_20140109_021508_outLine +BABEL_OP2_304_20916_20131114_013626_inLine +BABEL_OP2_304_20916_20131114_013626_outLine +BABEL_OP2_304_21029_20131212_035937_inLine +BABEL_OP2_304_21029_20131212_035937_outLine +BABEL_OP2_304_21109_20140217_041609_inLine +BABEL_OP2_304_21109_20140217_041609_outLine +BABEL_OP2_304_22170_20140304_071139_inLine +BABEL_OP2_304_22170_20140304_071139_outLine +BABEL_OP2_304_22629_20131106_052813_inLine +BABEL_OP2_304_22629_20131106_052813_outLine +BABEL_OP2_304_23098_20131107_033644_inLine +BABEL_OP2_304_23098_20131107_033644_outLine +BABEL_OP2_304_23995_20140221_014044_inLine +BABEL_OP2_304_23995_20140221_014044_outLine +BABEL_OP2_304_24532_20131115_041442_inLine +BABEL_OP2_304_24532_20131115_041442_outLine +BABEL_OP2_304_24589_20131211_034826_inLine +BABEL_OP2_304_24589_20131211_034826_outLine +BABEL_OP2_304_24648_20131023_232628_inLine +BABEL_OP2_304_24648_20131023_232628_outLine +BABEL_OP2_304_25895_20131106_022638_inLine +BABEL_OP2_304_25895_20131106_022638_outLine +BABEL_OP2_304_26074_20140112_023253_inLine +BABEL_OP2_304_26074_20140112_023253_outLine +BABEL_OP2_304_26602_20140220_035529_inLine +BABEL_OP2_304_26602_20140220_035529_outLine +BABEL_OP2_304_27042_20140214_003359_inLine +BABEL_OP2_304_27042_20140214_003359_outLine +BABEL_OP2_304_27125_20131115_034249_inLine +BABEL_OP2_304_27125_20131115_034249_outLine +BABEL_OP2_304_27189_20131107_042859_inLine +BABEL_OP2_304_27189_20131107_042859_outLine +BABEL_OP2_304_27218_20131214_022520_inLine +BABEL_OP2_304_27218_20131214_022520_outLine +BABEL_OP2_304_28945_20131208_071533_inLine +BABEL_OP2_304_28945_20131208_071533_outLine +BABEL_OP2_304_29135_20131211_225441_inLine +BABEL_OP2_304_29135_20131211_225441_outLine +BABEL_OP2_304_29168_20131210_211235_inLine +BABEL_OP2_304_29168_20131210_211235_outLine +BABEL_OP2_304_29208_20140108_013943_inLine +BABEL_OP2_304_29208_20140108_013943_outLine +BABEL_OP2_304_29663_20131101_024202_inLine +BABEL_OP2_304_29663_20131101_024202_outLine +BABEL_OP2_304_29746_20131020_192452_inLine +BABEL_OP2_304_29746_20131020_192452_outLine +BABEL_OP2_304_30253_20140210_055904_inLine +BABEL_OP2_304_30253_20140210_055904_outLine +BABEL_OP2_304_30426_20131108_232120_inLine +BABEL_OP2_304_30426_20131108_232120_outLine +BABEL_OP2_304_31624_20131216_054258_inLine +BABEL_OP2_304_31624_20131216_054258_outLine +BABEL_OP2_304_32169_20131101_054038_inLine +BABEL_OP2_304_32169_20131101_054038_outLine +BABEL_OP2_304_32832_20140223_005017_inLine +BABEL_OP2_304_32832_20140223_005017_outLine +BABEL_OP2_304_32861_20140303_235600_inLine +BABEL_OP2_304_32861_20140303_235600_outLine +BABEL_OP2_304_33111_20140304_043553_inLine +BABEL_OP2_304_33111_20140304_043553_outLine +BABEL_OP2_304_33149_20131127_000224_inLine +BABEL_OP2_304_33149_20131127_000224_outLine +BABEL_OP2_304_34064_20131020_210038_inLine +BABEL_OP2_304_34064_20131020_210038_outLine +BABEL_OP2_304_34064_20131021_223728_inLine +BABEL_OP2_304_34064_20131021_223728_outLine +BABEL_OP2_304_34328_20140106_031822_inLine +BABEL_OP2_304_34328_20140106_031822_outLine +BABEL_OP2_304_34336_20131210_042513_inLine +BABEL_OP2_304_34336_20131210_042513_outLine +BABEL_OP2_304_34564_20140213_195420_inLine +BABEL_OP2_304_34564_20140213_195420_outLine +BABEL_OP2_304_34647_20131109_231717_inLine +BABEL_OP2_304_34647_20131109_231717_outLine +BABEL_OP2_304_35420_20131029_043734_inLine +BABEL_OP2_304_35420_20131029_043734_outLine +BABEL_OP2_304_35838_20131024_211303_inLine +BABEL_OP2_304_35838_20131024_211303_outLine +BABEL_OP2_304_36147_20131019_040800_inLine +BABEL_OP2_304_36147_20131019_040800_outLine +BABEL_OP2_304_36300_20131030_210103_inLine +BABEL_OP2_304_36300_20131030_210103_outLine +BABEL_OP2_304_36341_20131208_040050_inLine +BABEL_OP2_304_36341_20131208_040050_outLine +BABEL_OP2_304_36990_20140106_050927_inLine +BABEL_OP2_304_36990_20140106_050927_outLine +BABEL_OP2_304_37684_20131019_020843_inLine +BABEL_OP2_304_37684_20131019_020843_outLine +BABEL_OP2_304_38963_20131109_190146_inLine +BABEL_OP2_304_38963_20131109_190146_outLine +BABEL_OP2_304_39680_20140226_002516_inLine +BABEL_OP2_304_39680_20140226_002516_outLine +BABEL_OP2_304_39688_20131109_222248_inLine +BABEL_OP2_304_39688_20131109_222248_outLine +BABEL_OP2_304_40092_20131031_014914_inLine +BABEL_OP2_304_40092_20131031_014914_outLine +BABEL_OP2_304_40624_20131107_024514_inLine +BABEL_OP2_304_40624_20131107_024514_outLine +BABEL_OP2_304_40713_20131210_063734_inLine +BABEL_OP2_304_40713_20131210_063734_outLine +BABEL_OP2_304_41233_20140222_034336_inLine +BABEL_OP2_304_41233_20140222_034336_outLine +BABEL_OP2_304_41442_20140214_035912_inLine +BABEL_OP2_304_41442_20140214_035912_outLine +BABEL_OP2_304_41493_20131113_221501_inLine +BABEL_OP2_304_41493_20131113_221501_outLine +BABEL_OP2_304_41741_20131215_020846_inLine +BABEL_OP2_304_41741_20131215_020846_outLine +BABEL_OP2_304_42126_20131024_215636_inLine +BABEL_OP2_304_42126_20131024_215636_outLine +BABEL_OP2_304_42243_20131218_052141_inLine +BABEL_OP2_304_42243_20131218_052141_outLine +BABEL_OP2_304_42497_20131130_034031_inLine +BABEL_OP2_304_42497_20131130_034031_outLine +BABEL_OP2_304_44868_20140110_204822_inLine +BABEL_OP2_304_44868_20140110_204822_outLine +BABEL_OP2_304_45374_20131019_200425_inLine +BABEL_OP2_304_45374_20131019_200425_outLine +BABEL_OP2_304_45642_20140114_234140_inLine +BABEL_OP2_304_45642_20140114_234140_outLine +BABEL_OP2_304_45843_20140114_205141_inLine +BABEL_OP2_304_45843_20140114_205141_outLine +BABEL_OP2_304_46315_20140122_004043_inLine +BABEL_OP2_304_46315_20140122_004043_outLine +BABEL_OP2_304_46333_20131204_195151_inLine +BABEL_OP2_304_46333_20131204_195151_outLine +BABEL_OP2_304_46389_20131022_050904_inLine +BABEL_OP2_304_46389_20131022_050904_outLine +BABEL_OP2_304_47110_20131023_015940_inLine +BABEL_OP2_304_47110_20131023_015940_outLine +BABEL_OP2_304_47270_20140222_021820_inLine +BABEL_OP2_304_47270_20140222_021820_outLine +BABEL_OP2_304_47451_20140203_224639_inLine +BABEL_OP2_304_47451_20140203_224639_outLine +BABEL_OP2_304_47877_20140227_065455_inLine +BABEL_OP2_304_47877_20140227_065455_outLine +BABEL_OP2_304_48024_20131031_215347_inLine +BABEL_OP2_304_48024_20131031_215347_outLine +BABEL_OP2_304_48789_20140108_012933_inLine +BABEL_OP2_304_48789_20140108_012933_outLine +BABEL_OP2_304_49001_20131214_003327_inLine +BABEL_OP2_304_49001_20131214_003327_outLine +BABEL_OP2_304_49287_20140118_013355_inLine +BABEL_OP2_304_49287_20140118_013355_outLine +BABEL_OP2_304_50175_20131124_033223_inLine +BABEL_OP2_304_50175_20131124_033223_outLine +BABEL_OP2_304_50175_20131124_035833_inLine +BABEL_OP2_304_50175_20131124_035833_outLine +BABEL_OP2_304_50565_20140124_052942_inLine +BABEL_OP2_304_50565_20140124_052942_outLine +BABEL_OP2_304_50726_20131213_031251_inLine +BABEL_OP2_304_50726_20131213_031251_outLine +BABEL_OP2_304_51417_20140228_011906_inLine +BABEL_OP2_304_51417_20140228_011906_outLine +BABEL_OP2_304_51540_20140304_011452_inLine +BABEL_OP2_304_51540_20140304_011452_outLine +BABEL_OP2_304_52025_20131116_004427_inLine +BABEL_OP2_304_52025_20131116_004427_outLine +BABEL_OP2_304_52058_20131022_055536_inLine +BABEL_OP2_304_52058_20131022_055536_outLine +BABEL_OP2_304_52438_20131206_043319_inLine +BABEL_OP2_304_52438_20131206_043319_outLine +BABEL_OP2_304_52818_20140112_011936_inLine +BABEL_OP2_304_52818_20140112_011936_outLine +BABEL_OP2_304_53419_20140213_061844_inLine +BABEL_OP2_304_53419_20140213_061844_outLine +BABEL_OP2_304_53842_20140109_012849_inLine +BABEL_OP2_304_53842_20140109_012849_outLine +BABEL_OP2_304_54744_20131205_024818_inLine +BABEL_OP2_304_54744_20131205_024818_outLine +BABEL_OP2_304_55042_20131112_051412_inLine +BABEL_OP2_304_55042_20131112_051412_outLine +BABEL_OP2_304_55381_20140217_005926_inLine +BABEL_OP2_304_55381_20140217_005926_outLine +BABEL_OP2_304_55818_20131218_020051_inLine +BABEL_OP2_304_55818_20131218_020051_outLine +BABEL_OP2_304_56057_20131112_043401_inLine +BABEL_OP2_304_56057_20131112_043401_outLine +BABEL_OP2_304_56117_20131023_035134_inLine +BABEL_OP2_304_56117_20131023_035134_outLine +BABEL_OP2_304_56429_20131129_223408_inLine +BABEL_OP2_304_56429_20131129_223408_outLine +BABEL_OP2_304_56674_20131024_233415_inLine +BABEL_OP2_304_56674_20131024_233415_outLine +BABEL_OP2_304_56684_20140223_001031_inLine +BABEL_OP2_304_56684_20140223_001031_outLine +BABEL_OP2_304_56720_20140119_005254_inLine +BABEL_OP2_304_56720_20140119_005254_outLine +BABEL_OP2_304_56743_20131218_042118_inLine +BABEL_OP2_304_56743_20131218_042118_outLine +BABEL_OP2_304_57035_20131106_183242_inLine +BABEL_OP2_304_57035_20131106_183242_outLine +BABEL_OP2_304_57566_20140227_000622_inLine +BABEL_OP2_304_57566_20140227_000622_outLine +BABEL_OP2_304_57609_20140121_202504_inLine +BABEL_OP2_304_57609_20140121_202504_outLine +BABEL_OP2_304_57935_20140211_015542_inLine +BABEL_OP2_304_57935_20140211_015542_outLine +BABEL_OP2_304_58103_20131212_013517_inLine +BABEL_OP2_304_58103_20131212_013517_outLine +BABEL_OP2_304_58585_20140226_022746_inLine +BABEL_OP2_304_58585_20140226_022746_outLine +BABEL_OP2_304_58717_20140112_000351_inLine +BABEL_OP2_304_58717_20140112_000351_outLine +BABEL_OP2_304_59163_20131022_033947_inLine +BABEL_OP2_304_59163_20131022_033947_outLine +BABEL_OP2_304_59291_20140207_213735_inLine +BABEL_OP2_304_59291_20140207_213735_outLine +BABEL_OP2_304_59645_20140110_210530_inLine +BABEL_OP2_304_59645_20140110_210530_outLine +BABEL_OP2_304_60282_20131031_040356_inLine +BABEL_OP2_304_60282_20131031_040356_outLine +BABEL_OP2_304_60397_20131024_183527_inLine +BABEL_OP2_304_60397_20131024_183527_outLine +BABEL_OP2_304_60418_20140111_062723_inLine +BABEL_OP2_304_60418_20140111_062723_outLine +BABEL_OP2_304_60538_20131211_043030_inLine +BABEL_OP2_304_60538_20131211_043030_outLine +BABEL_OP2_304_60830_20140106_224130_inLine +BABEL_OP2_304_60830_20140106_224130_outLine +BABEL_OP2_304_61011_20131206_213833_inLine +BABEL_OP2_304_61011_20131206_213833_outLine +BABEL_OP2_304_61219_20131206_061726_inLine +BABEL_OP2_304_61219_20131206_061726_outLine +BABEL_OP2_304_61225_20131113_052324_inLine +BABEL_OP2_304_61225_20131113_052324_outLine +BABEL_OP2_304_61357_20140113_232629_inLine +BABEL_OP2_304_61357_20140113_232629_outLine +BABEL_OP2_304_61831_20131201_042817_inLine +BABEL_OP2_304_61831_20131201_042817_outLine +BABEL_OP2_304_61888_20140228_181648_inLine +BABEL_OP2_304_61888_20140228_181648_outLine +BABEL_OP2_304_61963_20140226_192451_inLine +BABEL_OP2_304_61963_20140226_192451_outLine +BABEL_OP2_304_62177_20140227_184207_inLine +BABEL_OP2_304_62177_20140227_184207_outLine +BABEL_OP2_304_62323_20131113_001039_inLine +BABEL_OP2_304_62323_20131113_001039_outLine +BABEL_OP2_304_63081_20131206_195135_inLine +BABEL_OP2_304_63081_20131206_195135_outLine +BABEL_OP2_304_63445_20131127_005349_inLine +BABEL_OP2_304_63445_20131127_005349_outLine +BABEL_OP2_304_63671_20131024_002535_inLine +BABEL_OP2_304_63671_20131024_002535_outLine +BABEL_OP2_304_63920_20131108_182401_inLine +BABEL_OP2_304_63920_20131108_182401_outLine +BABEL_OP2_304_64014_20140228_033939_inLine +BABEL_OP2_304_64014_20140228_033939_outLine +BABEL_OP2_304_64469_20131023_182630_inLine +BABEL_OP2_304_64469_20131023_182630_outLine +BABEL_OP2_304_64688_20131109_040635_inLine +BABEL_OP2_304_64688_20131109_040635_outLine +BABEL_OP2_304_64759_20140118_203442_inLine +BABEL_OP2_304_64759_20140118_203442_outLine +BABEL_OP2_304_64796_20131128_060852_inLine +BABEL_OP2_304_64796_20131128_060852_outLine +BABEL_OP2_304_65077_20131115_005739_inLine +BABEL_OP2_304_65077_20131115_005739_outLine +BABEL_OP2_304_65336_20131109_051329_inLine +BABEL_OP2_304_65336_20131109_051329_outLine +BABEL_OP2_304_65723_20131201_233928_inLine +BABEL_OP2_304_65723_20131201_233928_outLine +BABEL_OP2_304_65882_20131128_220533_inLine +BABEL_OP2_304_65882_20131128_220533_outLine +BABEL_OP2_304_66001_20131208_023839_inLine +BABEL_OP2_304_66001_20131208_023839_outLine +BABEL_OP2_304_66026_20140212_224055_inLine +BABEL_OP2_304_66026_20140212_224055_outLine +BABEL_OP2_304_66350_20131022_021812_inLine +BABEL_OP2_304_66350_20131022_021812_outLine +BABEL_OP2_304_66837_20140213_053859_inLine +BABEL_OP2_304_66837_20140213_053859_outLine +BABEL_OP2_304_67304_20140216_025015_inLine +BABEL_OP2_304_67304_20140216_025015_outLine +BABEL_OP2_304_67552_20140114_011538_inLine +BABEL_OP2_304_67552_20140114_011538_outLine +BABEL_OP2_304_67894_20131112_060500_inLine +BABEL_OP2_304_67894_20131112_060500_outLine +BABEL_OP2_304_68059_20140111_025607_inLine +BABEL_OP2_304_68059_20140111_025607_outLine +BABEL_OP2_304_68908_20131127_032840_inLine +BABEL_OP2_304_68908_20131127_032840_outLine +BABEL_OP2_304_68910_20131101_042132_inLine +BABEL_OP2_304_68910_20131101_042132_outLine +BABEL_OP2_304_69107_20140123_192506_inLine +BABEL_OP2_304_69107_20140123_192506_outLine +BABEL_OP2_304_69153_20140212_204658_inLine +BABEL_OP2_304_69153_20140212_204658_outLine +BABEL_OP2_304_69992_20131213_195450_inLine +BABEL_OP2_304_69992_20131213_195450_outLine +BABEL_OP2_304_70216_20131020_173420_inLine +BABEL_OP2_304_70216_20131020_173420_outLine +BABEL_OP2_304_71263_20140113_223556_inLine +BABEL_OP2_304_71263_20140113_223556_outLine +BABEL_OP2_304_71401_20131020_005620_inLine +BABEL_OP2_304_71401_20131020_005620_outLine +BABEL_OP2_304_72844_20131115_202958_inLine +BABEL_OP2_304_72844_20131115_202958_outLine +BABEL_OP2_304_72903_20131113_023457_inLine +BABEL_OP2_304_72903_20131113_023457_outLine +BABEL_OP2_304_73042_20131214_052022_inLine +BABEL_OP2_304_73042_20131214_052022_outLine +BABEL_OP2_304_74226_20140217_044122_inLine +BABEL_OP2_304_74226_20140217_044122_outLine +BABEL_OP2_304_74455_20140224_013111_inLine +BABEL_OP2_304_74455_20140224_013111_outLine +BABEL_OP2_304_75064_20131209_035217_inLine +BABEL_OP2_304_75064_20131209_035217_outLine +BABEL_OP2_304_75223_20131205_012248_inLine +BABEL_OP2_304_75223_20131205_012248_outLine +BABEL_OP2_304_75930_20131020_013042_inLine +BABEL_OP2_304_75930_20131020_013042_outLine +BABEL_OP2_304_75975_20131019_054431_inLine +BABEL_OP2_304_75975_20131019_054431_outLine +BABEL_OP2_304_76069_20131113_042346_inLine +BABEL_OP2_304_76069_20131113_042346_outLine +BABEL_OP2_304_76730_20131025_213853_inLine +BABEL_OP2_304_76730_20131025_213853_outLine +BABEL_OP2_304_76793_20131126_013011_inLine +BABEL_OP2_304_76793_20131126_013011_outLine +BABEL_OP2_304_77033_20140228_043125_inLine +BABEL_OP2_304_77033_20140228_043125_outLine +BABEL_OP2_304_77730_20131217_042107_inLine +BABEL_OP2_304_77730_20131217_042107_outLine +BABEL_OP2_304_78360_20140301_020449_inLine +BABEL_OP2_304_78360_20140301_020449_outLine +BABEL_OP2_304_78943_20131208_222716_inLine +BABEL_OP2_304_78943_20131208_222716_outLine +BABEL_OP2_304_79028_20131022_221243_inLine +BABEL_OP2_304_79028_20131022_221243_outLine +BABEL_OP2_304_79045_20140214_202301_inLine +BABEL_OP2_304_79045_20140214_202301_outLine +BABEL_OP2_304_79723_20131023_023756_inLine +BABEL_OP2_304_79723_20131023_023756_outLine +BABEL_OP2_304_79820_20131214_042918_inLine +BABEL_OP2_304_79820_20131214_042918_outLine +BABEL_OP2_304_80209_20131112_232041_inLine +BABEL_OP2_304_80209_20131112_232041_outLine +BABEL_OP2_304_80383_20131107_233543_inLine +BABEL_OP2_304_80383_20131107_233543_outLine +BABEL_OP2_304_80577_20140301_014201_inLine +BABEL_OP2_304_80577_20140301_014201_outLine +BABEL_OP2_304_80721_20140213_051749_inLine +BABEL_OP2_304_80721_20140213_051749_outLine +BABEL_OP2_304_80881_20131205_175435_inLine +BABEL_OP2_304_80881_20131205_175435_outLine +BABEL_OP2_304_81404_20131213_041501_inLine +BABEL_OP2_304_81404_20131213_041501_outLine +BABEL_OP2_304_81427_20131211_012524_inLine +BABEL_OP2_304_81427_20131211_012524_outLine +BABEL_OP2_304_81769_20140105_005749_inLine +BABEL_OP2_304_81769_20140105_005749_outLine +BABEL_OP2_304_82863_20140106_054346_inLine +BABEL_OP2_304_82863_20140106_054346_outLine +BABEL_OP2_304_83436_20131211_025218_inLine +BABEL_OP2_304_83436_20131211_025218_outLine +BABEL_OP2_304_83813_20131028_033118_inLine +BABEL_OP2_304_83813_20131028_033118_outLine +BABEL_OP2_304_83851_20131203_212613_inLine +BABEL_OP2_304_83851_20131203_212613_outLine +BABEL_OP2_304_83935_20140123_034100_inLine +BABEL_OP2_304_83935_20140123_034100_outLine +BABEL_OP2_304_84125_20131115_235931_inLine +BABEL_OP2_304_84125_20131115_235931_outLine +BABEL_OP2_304_84194_20131129_040805_inLine +BABEL_OP2_304_84194_20131129_040805_outLine +BABEL_OP2_304_84605_20131215_005949_inLine +BABEL_OP2_304_84605_20131215_005949_outLine +BABEL_OP2_304_85179_20140214_071121_inLine +BABEL_OP2_304_85179_20140214_071121_outLine +BABEL_OP2_304_85248_20140222_235016_inLine +BABEL_OP2_304_85248_20140222_235016_outLine +BABEL_OP2_304_86100_20131112_221929_inLine +BABEL_OP2_304_86100_20131112_221929_outLine +BABEL_OP2_304_86472_20140116_050058_inLine +BABEL_OP2_304_86472_20140116_050058_outLine +BABEL_OP2_304_86557_20131130_234925_inLine +BABEL_OP2_304_86557_20131130_234925_outLine +BABEL_OP2_304_86829_20131107_180321_inLine +BABEL_OP2_304_86829_20131107_180321_outLine +BABEL_OP2_304_86830_20140228_051058_inLine +BABEL_OP2_304_86830_20140228_051058_outLine +BABEL_OP2_304_87280_20140207_030432_inLine +BABEL_OP2_304_87280_20140207_030432_outLine +BABEL_OP2_304_87866_20131106_002751_inLine +BABEL_OP2_304_87866_20131106_002751_outLine +BABEL_OP2_304_88982_20140111_233039_inLine +BABEL_OP2_304_88982_20140111_233039_outLine +BABEL_OP2_304_89650_20131024_023031_inLine +BABEL_OP2_304_89650_20131024_023031_outLine +BABEL_OP2_304_91581_20140203_231410_inLine +BABEL_OP2_304_91581_20140203_231410_outLine +BABEL_OP2_304_92252_20131022_042600_inLine +BABEL_OP2_304_92252_20131022_042600_outLine +BABEL_OP2_304_92440_20131109_003559_inLine +BABEL_OP2_304_92440_20131109_003559_outLine +BABEL_OP2_304_92886_20131202_233808_inLine +BABEL_OP2_304_92886_20131202_233808_outLine +BABEL_OP2_304_93224_20140121_191942_inLine +BABEL_OP2_304_93224_20140121_191942_outLine +BABEL_OP2_304_93443_20131127_032037_inLine +BABEL_OP2_304_93443_20131127_032037_outLine +BABEL_OP2_304_93475_20131213_025105_inLine +BABEL_OP2_304_93475_20131213_025105_outLine +BABEL_OP2_304_93681_20131202_212236_inLine +BABEL_OP2_304_93681_20131202_212236_outLine +BABEL_OP2_304_94465_20140213_013300_inLine +BABEL_OP2_304_94465_20140213_013300_outLine +BABEL_OP2_304_94923_20140212_021923_inLine +BABEL_OP2_304_94923_20140212_021923_outLine +BABEL_OP2_304_95677_20131024_031406_inLine +BABEL_OP2_304_95677_20131024_031406_outLine +BABEL_OP2_304_96205_20140107_233946_inLine +BABEL_OP2_304_96205_20140107_233946_outLine +BABEL_OP2_304_96405_20131214_205112_inLine +BABEL_OP2_304_96405_20131214_205112_outLine +BABEL_OP2_304_98165_20131218_234422_inLine +BABEL_OP2_304_98165_20131218_234422_outLine +BABEL_OP2_304_98565_20131023_235505_inLine +BABEL_OP2_304_98565_20131023_235505_outLine +BABEL_OP2_304_99264_20140222_211846_inLine +BABEL_OP2_304_99264_20140222_211846_outLine +BABEL_OP2_304_99920_20140211_023914_inLine +BABEL_OP2_304_99920_20140211_023914_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/untranscribed-training.list b/egs/babel/s5d/conf/lists/304-lithuanian/untranscribed-training.list new file mode 100644 index 00000000000..fe18640b4ca --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/untranscribed-training.list @@ -0,0 +1,524 @@ +BABEL_OP2_304_10319_20131123_212421_inLine +BABEL_OP2_304_10938_20131210_232654_outLine +BABEL_OP2_304_10974_20140112_002642_inLine +BABEL_OP2_304_10974_20140112_002642_outLine +BABEL_OP2_304_11663_20140228_001249_inLine +BABEL_OP2_304_11663_20140228_001249_outLine +BABEL_OP2_304_12036_20140212_031355_inLine +BABEL_OP2_304_12036_20140212_031355_outLine +BABEL_OP2_304_13184_20140226_230154_inLine +BABEL_OP2_304_13184_20140226_230154_outLine +BABEL_OP2_304_13490_20140115_234942_inLine +BABEL_OP2_304_13490_20140115_234942_outLine +BABEL_OP2_304_13709_20140301_011301_inLine +BABEL_OP2_304_13709_20140301_011301_outLine +BABEL_OP2_304_14179_20140213_202716_inLine +BABEL_OP2_304_14179_20140213_202716_outLine +BABEL_OP2_304_14228_20140214_035430_inLine +BABEL_OP2_304_14228_20140214_035430_outLine +BABEL_OP2_304_14350_20131215_015450_outLine +BABEL_OP2_304_14884_20131019_215509_outLine +BABEL_OP2_304_14929_20140120_043551_inLine +BABEL_OP2_304_15324_20140111_022619_inLine +BABEL_OP2_304_15324_20140111_022619_outLine +BABEL_OP2_304_15702_20140121_182136_outLine +BABEL_OP2_304_15926_20140114_014611_inLine +BABEL_OP2_304_15926_20140114_014611_outLine +BABEL_OP2_304_16149_20131218_020646_inLine +BABEL_OP2_304_16149_20131218_020646_outLine +BABEL_OP2_304_17165_20140115_221908_inLine +BABEL_OP2_304_17165_20140115_221908_outLine +BABEL_OP2_304_17440_20140210_030642_inLine +BABEL_OP2_304_17440_20140210_030642_outLine +BABEL_OP2_304_17496_20140124_200649_inLine +BABEL_OP2_304_17496_20140124_200649_outLine +BABEL_OP2_304_17567_20140112_001028_inLine +BABEL_OP2_304_17567_20140112_001028_outLine +BABEL_OP2_304_18370_20131019_230544_inLine +BABEL_OP2_304_18924_20140122_013011_inLine +BABEL_OP2_304_19134_20140211_192844_inLine +BABEL_OP2_304_19134_20140211_192844_outLine +BABEL_OP2_304_19621_20140111_004245_inLine +BABEL_OP2_304_19621_20140111_004245_outLine +BABEL_OP2_304_19672_20140111_213853_inLine +BABEL_OP2_304_19672_20140111_213853_outLine +BABEL_OP2_304_20682_20140212_225527_inLine +BABEL_OP2_304_20682_20140212_225527_outLine +BABEL_OP2_304_20721_20131126_051609_inLine +BABEL_OP2_304_20896_20131024_162213_outLine +BABEL_OP2_304_21004_20140202_211736_inLine +BABEL_OP2_304_21004_20140202_211736_outLine +BABEL_OP2_304_21794_20140107_223934_inLine +BABEL_OP2_304_21794_20140107_223934_outLine +BABEL_OP2_304_22280_20140112_040339_inLine +BABEL_OP2_304_22280_20140112_040339_outLine +BABEL_OP2_304_22612_20140214_015154_inLine +BABEL_OP2_304_22612_20140214_015154_outLine +BABEL_OP2_304_23092_20140210_000452_inLine +BABEL_OP2_304_23092_20140210_000452_outLine +BABEL_OP2_304_23151_20140226_015000_inLine +BABEL_OP2_304_23151_20140226_015000_outLine +BABEL_OP2_304_23153_20140108_004710_inLine +BABEL_OP2_304_23153_20140108_004710_outLine +BABEL_OP2_304_23239_20140113_023420_inLine +BABEL_OP2_304_23239_20140113_023420_outLine +BABEL_OP2_304_23505_20140207_012218_inLine +BABEL_OP2_304_23505_20140207_012218_outLine +BABEL_OP2_304_24270_20140112_035305_inLine +BABEL_OP2_304_24270_20140112_035305_outLine +BABEL_OP2_304_24470_20140213_013507_inLine +BABEL_OP2_304_24470_20140213_013507_outLine +BABEL_OP2_304_26072_20140303_191300_inLine +BABEL_OP2_304_26072_20140303_191300_outLine +BABEL_OP2_304_26836_20131203_035109_inLine +BABEL_OP2_304_27203_20140114_184748_inLine +BABEL_OP2_304_27203_20140114_184748_outLine +BABEL_OP2_304_27590_20140120_000746_inLine +BABEL_OP2_304_27590_20140120_000746_outLine +BABEL_OP2_304_27841_20140224_192246_inLine +BABEL_OP2_304_27841_20140224_192246_outLine +BABEL_OP2_304_28190_20140216_014935_inLine +BABEL_OP2_304_28190_20140216_014935_outLine +BABEL_OP2_304_28303_20131204_205256_inLine +BABEL_OP2_304_28303_20131204_205256_outLine +BABEL_OP2_304_28606_20140118_214444_inLine +BABEL_OP2_304_28606_20140118_214444_outLine +BABEL_OP2_304_28814_20140221_030045_inLine +BABEL_OP2_304_28814_20140221_030045_outLine +BABEL_OP2_304_29076_20140214_031718_inLine +BABEL_OP2_304_29076_20140214_031718_outLine +BABEL_OP2_304_29323_20140224_220848_inLine +BABEL_OP2_304_29323_20140224_220848_outLine +BABEL_OP2_304_29416_20140209_221011_inLine +BABEL_OP2_304_29416_20140209_221011_outLine +BABEL_OP2_304_29685_20131203_214940_inLine +BABEL_OP2_304_29685_20131203_214940_outLine +BABEL_OP2_304_30345_20140220_044057_inLine +BABEL_OP2_304_30345_20140220_044057_outLine +BABEL_OP2_304_30432_20140114_200919_inLine +BABEL_OP2_304_30432_20140114_200919_outLine +BABEL_OP2_304_31109_20140117_222519_outLine +BABEL_OP2_304_31184_20140107_010323_inLine +BABEL_OP2_304_31184_20140107_010323_outLine +BABEL_OP2_304_31346_20140214_012447_inLine +BABEL_OP2_304_31346_20140214_012447_outLine +BABEL_OP2_304_31583_20140217_070401_inLine +BABEL_OP2_304_31583_20140217_070401_outLine +BABEL_OP2_304_31628_20140113_222259_inLine +BABEL_OP2_304_31628_20140113_222259_outLine +BABEL_OP2_304_31728_20131026_182200_outLine +BABEL_OP2_304_32380_20131202_212228_inLine +BABEL_OP2_304_32708_20131214_184617_inLine +BABEL_OP2_304_33175_20131218_055325_inLine +BABEL_OP2_304_33175_20131218_055325_outLine +BABEL_OP2_304_33229_20140224_173431_inLine +BABEL_OP2_304_33229_20140224_173431_outLine +BABEL_OP2_304_33635_20140108_055209_inLine +BABEL_OP2_304_33635_20140108_055209_outLine +BABEL_OP2_304_33913_20140208_001736_outLine +BABEL_OP2_304_33951_20140107_014839_inLine +BABEL_OP2_304_33951_20140107_014839_outLine +BABEL_OP2_304_34197_20140120_003604_inLine +BABEL_OP2_304_34197_20140120_010000_inLine +BABEL_OP2_304_34477_20131207_235614_inLine +BABEL_OP2_304_34477_20131207_235614_outLine +BABEL_OP2_304_34811_20140111_213202_inLine +BABEL_OP2_304_34811_20140111_213202_outLine +BABEL_OP2_304_34903_20140114_055610_outLine +BABEL_OP2_304_35000_20140214_054306_inLine +BABEL_OP2_304_35000_20140214_054306_outLine +BABEL_OP2_304_35143_20140213_194234_inLine +BABEL_OP2_304_35143_20140213_194234_outLine +BABEL_OP2_304_35583_20140221_183221_inLine +BABEL_OP2_304_35583_20140221_183221_outLine +BABEL_OP2_304_36669_20140107_024312_inLine +BABEL_OP2_304_36669_20140107_024312_outLine +BABEL_OP2_304_36894_20131214_210131_inLine +BABEL_OP2_304_36894_20131214_210131_outLine +BABEL_OP2_304_37271_20140214_210456_inLine +BABEL_OP2_304_37271_20140214_210456_outLine +BABEL_OP2_304_37281_20140112_044135_outLine +BABEL_OP2_304_37776_20131029_063539_inLine +BABEL_OP2_304_37853_20140228_011410_outLine +BABEL_OP2_304_38689_20140122_053200_inLine +BABEL_OP2_304_38689_20140122_053200_outLine +BABEL_OP2_304_39059_20140223_225026_inLine +BABEL_OP2_304_39059_20140223_225026_outLine +BABEL_OP2_304_39307_20131212_235627_outLine +BABEL_OP2_304_39426_20140222_001315_inLine +BABEL_OP2_304_39426_20140222_001315_outLine +BABEL_OP2_304_39555_20140217_001722_inLine +BABEL_OP2_304_39555_20140217_001722_outLine +BABEL_OP2_304_39744_20131205_204607_inLine +BABEL_OP2_304_39744_20131205_204607_outLine +BABEL_OP2_304_40557_20140214_015226_inLine +BABEL_OP2_304_40557_20140214_015226_outLine +BABEL_OP2_304_40648_20140105_023520_inLine +BABEL_OP2_304_40648_20140105_023520_outLine +BABEL_OP2_304_40740_20140303_034635_inLine +BABEL_OP2_304_40740_20140303_034635_outLine +BABEL_OP2_304_41038_20140120_031808_inLine +BABEL_OP2_304_41038_20140120_031808_outLine +BABEL_OP2_304_41097_20140114_065635_inLine +BABEL_OP2_304_41097_20140114_065635_outLine +BABEL_OP2_304_41100_20131212_035211_outLine +BABEL_OP2_304_41682_20131027_233404_outLine +BABEL_OP2_304_42029_20140225_042923_outLine +BABEL_OP2_304_42600_20131203_001235_inLine +BABEL_OP2_304_42619_20140124_050222_inLine +BABEL_OP2_304_42619_20140124_050222_outLine +BABEL_OP2_304_42942_20140122_011934_inLine +BABEL_OP2_304_42942_20140122_011934_outLine +BABEL_OP2_304_43788_20140113_035220_inLine +BABEL_OP2_304_43788_20140113_035220_outLine +BABEL_OP2_304_43789_20140108_205751_inLine +BABEL_OP2_304_43789_20140108_205751_outLine +BABEL_OP2_304_44477_20140116_062617_outLine +BABEL_OP2_304_44709_20140111_061727_inLine +BABEL_OP2_304_44709_20140111_061727_outLine +BABEL_OP2_304_44847_20140123_043546_inLine +BABEL_OP2_304_44847_20140123_043546_outLine +BABEL_OP2_304_46041_20140216_224708_inLine +BABEL_OP2_304_46041_20140216_224708_outLine +BABEL_OP2_304_46066_20140303_215904_inLine +BABEL_OP2_304_46066_20140303_215904_outLine +BABEL_OP2_304_46757_20140123_053540_outLine +BABEL_OP2_304_47186_20140213_022646_inLine +BABEL_OP2_304_47186_20140213_022646_outLine +BABEL_OP2_304_47283_20131208_055437_inLine +BABEL_OP2_304_47283_20131208_055437_outLine +BABEL_OP2_304_47823_20140213_200425_inLine +BABEL_OP2_304_47823_20140213_200425_outLine +BABEL_OP2_304_47959_20131206_002421_inLine +BABEL_OP2_304_47959_20131206_002421_outLine +BABEL_OP2_304_48243_20131130_191603_inLine +BABEL_OP2_304_48243_20131130_191603_outLine +BABEL_OP2_304_48422_20140226_030714_inLine +BABEL_OP2_304_48422_20140226_030714_outLine +BABEL_OP2_304_49118_20140224_035159_inLine +BABEL_OP2_304_49118_20140224_035159_outLine +BABEL_OP2_304_49502_20131128_050926_inLine +BABEL_OP2_304_49502_20131128_050926_outLine +BABEL_OP2_304_49637_20131204_225149_outLine +BABEL_OP2_304_49739_20131025_221623_outLine +BABEL_OP2_304_49812_20140221_004200_inLine +BABEL_OP2_304_49812_20140221_004200_outLine +BABEL_OP2_304_49870_20131026_011340_outLine +BABEL_OP2_304_49902_20131215_011726_inLine +BABEL_OP2_304_49902_20131215_011726_outLine +BABEL_OP2_304_50427_20140121_041440_inLine +BABEL_OP2_304_50427_20140121_041440_outLine +BABEL_OP2_304_50549_20140225_015442_inLine +BABEL_OP2_304_50549_20140225_015442_outLine +BABEL_OP2_304_50630_20140115_225817_outLine +BABEL_OP2_304_50958_20140107_035922_inLine +BABEL_OP2_304_50958_20140107_035922_outLine +BABEL_OP2_304_51156_20131108_034329_outLine +BABEL_OP2_304_51611_20131215_000933_outLine +BABEL_OP2_304_51611_20131215_001818_outLine +BABEL_OP2_304_51955_20131130_031610_inLine +BABEL_OP2_304_51955_20131130_031610_outLine +BABEL_OP2_304_51968_20140115_014540_outLine +BABEL_OP2_304_52404_20140111_203352_inLine +BABEL_OP2_304_52404_20140111_203352_outLine +BABEL_OP2_304_52422_20140227_022646_inLine +BABEL_OP2_304_52422_20140227_022646_outLine +BABEL_OP2_304_52442_20140111_220136_inLine +BABEL_OP2_304_52442_20140111_220136_outLine +BABEL_OP2_304_52717_20131212_005407_inLine +BABEL_OP2_304_52717_20131212_005407_outLine +BABEL_OP2_304_52932_20131203_060700_inLine +BABEL_OP2_304_53957_20140213_050235_inLine +BABEL_OP2_304_53957_20140213_050235_outLine +BABEL_OP2_304_54104_20140122_065051_inLine +BABEL_OP2_304_54162_20140117_004147_outLine +BABEL_OP2_304_54405_20140111_033306_inLine +BABEL_OP2_304_54405_20140111_033306_outLine +BABEL_OP2_304_54477_20140214_014521_outLine +BABEL_OP2_304_56198_20131207_044103_inLine +BABEL_OP2_304_56198_20131207_044103_outLine +BABEL_OP2_304_56198_20131207_044824_inLine +BABEL_OP2_304_56198_20131207_044824_outLine +BABEL_OP2_304_56370_20131210_222152_inLine +BABEL_OP2_304_56370_20131210_222152_outLine +BABEL_OP2_304_56606_20131029_001934_inLine +BABEL_OP2_304_57067_20140226_203014_inLine +BABEL_OP2_304_57067_20140226_203014_outLine +BABEL_OP2_304_57093_20140108_195135_inLine +BABEL_OP2_304_57093_20140108_195135_outLine +BABEL_OP2_304_57529_20140213_194042_inLine +BABEL_OP2_304_57529_20140213_194042_outLine +BABEL_OP2_304_57919_20131019_181730_inLine +BABEL_OP2_304_58489_20140213_201405_inLine +BABEL_OP2_304_58489_20140213_201405_outLine +BABEL_OP2_304_58821_20140122_043719_inLine +BABEL_OP2_304_58821_20140122_043719_outLine +BABEL_OP2_304_59078_20140114_043013_inLine +BABEL_OP2_304_59078_20140114_043013_outLine +BABEL_OP2_304_59301_20140220_055528_inLine +BABEL_OP2_304_59301_20140220_055528_outLine +BABEL_OP2_304_59301_20140220_061405_inLine +BABEL_OP2_304_59301_20140220_061405_outLine +BABEL_OP2_304_59509_20140111_012159_inLine +BABEL_OP2_304_59509_20140111_012159_outLine +BABEL_OP2_304_59993_20131207_052409_inLine +BABEL_OP2_304_59993_20131207_052409_outLine +BABEL_OP2_304_60650_20131104_000431_outLine +BABEL_OP2_304_61040_20140214_024448_inLine +BABEL_OP2_304_61040_20140214_024448_outLine +BABEL_OP2_304_61167_20131218_000849_inLine +BABEL_OP2_304_61167_20131218_000849_outLine +BABEL_OP2_304_61190_20131202_233122_inLine +BABEL_OP2_304_61190_20131202_233122_outLine +BABEL_OP2_304_61435_20140220_043508_inLine +BABEL_OP2_304_61435_20140220_043508_outLine +BABEL_OP2_304_62014_20140110_225736_inLine +BABEL_OP2_304_62014_20140110_225736_outLine +BABEL_OP2_304_62286_20140107_221925_inLine +BABEL_OP2_304_62286_20140107_221925_outLine +BABEL_OP2_304_62471_20131023_045947_inLine +BABEL_OP2_304_62471_20131023_045947_outLine +BABEL_OP2_304_62491_20131024_043538_outLine +BABEL_OP2_304_62734_20131211_050743_inLine +BABEL_OP2_304_62734_20131211_050743_outLine +BABEL_OP2_304_62810_20131205_000409_outLine +BABEL_OP2_304_62810_20131205_001411_outLine +BABEL_OP2_304_62835_20140111_023213_inLine +BABEL_OP2_304_62835_20140111_023213_outLine +BABEL_OP2_304_62852_20131217_062508_outLine +BABEL_OP2_304_63220_20140118_205257_outLine +BABEL_OP2_304_63757_20140112_050031_inLine +BABEL_OP2_304_64065_20131208_042849_inLine +BABEL_OP2_304_64065_20131208_042849_outLine +BABEL_OP2_304_64350_20140112_002927_inLine +BABEL_OP2_304_64350_20140112_002927_outLine +BABEL_OP2_304_64398_20140114_001457_inLine +BABEL_OP2_304_64635_20131031_005941_outLine +BABEL_OP2_304_64768_20131205_215037_inLine +BABEL_OP2_304_64768_20131205_215037_outLine +BABEL_OP2_304_64870_20140121_052458_inLine +BABEL_OP2_304_64870_20140121_052458_outLine +BABEL_OP2_304_65064_20140211_004709_inLine +BABEL_OP2_304_65064_20140211_004709_outLine +BABEL_OP2_304_65298_20140225_223150_outLine +BABEL_OP2_304_65367_20140223_221652_inLine +BABEL_OP2_304_65367_20140223_221652_outLine +BABEL_OP2_304_66519_20131217_221237_inLine +BABEL_OP2_304_66519_20131217_221237_outLine +BABEL_OP2_304_66959_20140214_010021_inLine +BABEL_OP2_304_66959_20140214_010021_outLine +BABEL_OP2_304_67659_20131210_070128_inLine +BABEL_OP2_304_67659_20131210_070128_outLine +BABEL_OP2_304_67842_20131209_002442_inLine +BABEL_OP2_304_67842_20131209_002442_outLine +BABEL_OP2_304_68244_20140112_003451_inLine +BABEL_OP2_304_68244_20140112_003451_outLine +BABEL_OP2_304_69578_20140116_015102_inLine +BABEL_OP2_304_69578_20140116_015102_outLine +BABEL_OP2_304_69633_20140112_001408_inLine +BABEL_OP2_304_69633_20140112_001408_outLine +BABEL_OP2_304_69636_20140114_002409_outLine +BABEL_OP2_304_70221_20140114_051222_inLine +BABEL_OP2_304_70221_20140114_051222_outLine +BABEL_OP2_304_70293_20131019_050801_inLine +BABEL_OP2_304_70343_20140123_031245_inLine +BABEL_OP2_304_70343_20140123_031245_outLine +BABEL_OP2_304_70713_20140304_052610_inLine +BABEL_OP2_304_70713_20140304_052610_outLine +BABEL_OP2_304_71038_20140227_011955_inLine +BABEL_OP2_304_71038_20140227_011955_outLine +BABEL_OP2_304_71067_20140111_025531_inLine +BABEL_OP2_304_71067_20140111_025531_outLine +BABEL_OP2_304_71282_20140303_003653_inLine +BABEL_OP2_304_71282_20140303_003653_outLine +BABEL_OP2_304_71559_20140217_031954_inLine +BABEL_OP2_304_71559_20140217_031954_outLine +BABEL_OP2_304_72324_20140111_214356_inLine +BABEL_OP2_304_72324_20140111_214356_outLine +BABEL_OP2_304_73072_20131211_203538_inLine +BABEL_OP2_304_73119_20131207_030241_inLine +BABEL_OP2_304_73119_20131207_030241_outLine +BABEL_OP2_304_73518_20140227_044044_inLine +BABEL_OP2_304_73518_20140227_044044_outLine +BABEL_OP2_304_74253_20140209_020556_outLine +BABEL_OP2_304_75261_20140214_002012_inLine +BABEL_OP2_304_75261_20140214_002012_outLine +BABEL_OP2_304_76218_20140112_030818_inLine +BABEL_OP2_304_76238_20140213_065615_outLine +BABEL_OP2_304_77391_20131206_031416_inLine +BABEL_OP2_304_77391_20131206_031416_outLine +BABEL_OP2_304_77567_20131217_032300_inLine +BABEL_OP2_304_78016_20131211_033559_inLine +BABEL_OP2_304_78016_20131211_033559_outLine +BABEL_OP2_304_78016_20131211_034555_inLine +BABEL_OP2_304_78016_20131211_034555_outLine +BABEL_OP2_304_78016_20131211_035830_inLine +BABEL_OP2_304_78016_20131211_035830_outLine +BABEL_OP2_304_78544_20140118_220548_inLine +BABEL_OP2_304_78544_20140118_220548_outLine +BABEL_OP2_304_78544_20140118_221258_inLine +BABEL_OP2_304_78544_20140118_221258_outLine +BABEL_OP2_304_78544_20140118_222525_inLine +BABEL_OP2_304_78544_20140118_222525_outLine +BABEL_OP2_304_78833_20131024_214927_outLine +BABEL_OP2_304_79129_20140303_004430_inLine +BABEL_OP2_304_79129_20140303_004430_outLine +BABEL_OP2_304_79139_20140117_233824_inLine +BABEL_OP2_304_79139_20140117_233824_outLine +BABEL_OP2_304_79167_20140113_043213_inLine +BABEL_OP2_304_79167_20140113_043213_outLine +BABEL_OP2_304_79571_20140115_210036_inLine +BABEL_OP2_304_79571_20140115_210036_outLine +BABEL_OP2_304_79590_20140115_194001_inLine +BABEL_OP2_304_79590_20140115_194001_outLine +BABEL_OP2_304_80136_20140221_210907_inLine +BABEL_OP2_304_80136_20140221_210907_outLine +BABEL_OP2_304_80306_20140113_211243_inLine +BABEL_OP2_304_80306_20140113_211243_outLine +BABEL_OP2_304_80781_20131207_214652_inLine +BABEL_OP2_304_80781_20131207_214652_outLine +BABEL_OP2_304_81392_20140120_040823_inLine +BABEL_OP2_304_81392_20140120_040823_outLine +BABEL_OP2_304_81435_20140122_044047_inLine +BABEL_OP2_304_81435_20140122_044047_outLine +BABEL_OP2_304_81553_20140221_190721_inLine +BABEL_OP2_304_81553_20140221_190721_outLine +BABEL_OP2_304_81622_20140115_191114_inLine +BABEL_OP2_304_81622_20140115_191114_outLine +BABEL_OP2_304_81671_20140303_000114_inLine +BABEL_OP2_304_81671_20140303_000114_outLine +BABEL_OP2_304_82138_20140108_210521_inLine +BABEL_OP2_304_82138_20140108_210521_outLine +BABEL_OP2_304_82140_20140109_010030_inLine +BABEL_OP2_304_82140_20140109_010030_outLine +BABEL_OP2_304_82966_20140212_003555_outLine +BABEL_OP2_304_82979_20131206_030414_inLine +BABEL_OP2_304_82979_20131206_030414_outLine +BABEL_OP2_304_83238_20140121_050333_inLine +BABEL_OP2_304_83238_20140121_050333_outLine +BABEL_OP2_304_83609_20131031_045140_inLine +BABEL_OP2_304_83609_20131031_045140_outLine +BABEL_OP2_304_84055_20140304_014209_inLine +BABEL_OP2_304_84055_20140304_014209_outLine +BABEL_OP2_304_84327_20140119_004436_inLine +BABEL_OP2_304_84327_20140119_004436_outLine +BABEL_OP2_304_84430_20131024_015151_inLine +BABEL_OP2_304_84467_20131030_000051_outLine +BABEL_OP2_304_84541_20131113_030920_inLine +BABEL_OP2_304_84583_20140122_023451_inLine +BABEL_OP2_304_84583_20140122_023451_outLine +BABEL_OP2_304_84715_20140225_204018_inLine +BABEL_OP2_304_84715_20140225_204018_outLine +BABEL_OP2_304_84823_20140213_070220_inLine +BABEL_OP2_304_84823_20140213_070220_outLine +BABEL_OP2_304_84936_20140115_025845_inLine +BABEL_OP2_304_84936_20140115_025845_outLine +BABEL_OP2_304_85028_20140216_043545_inLine +BABEL_OP2_304_85028_20140216_043545_outLine +BABEL_OP2_304_85048_20140213_194500_inLine +BABEL_OP2_304_85048_20140213_194500_outLine +BABEL_OP2_304_85651_20131213_231614_inLine +BABEL_OP2_304_85651_20131213_231614_outLine +BABEL_OP2_304_86191_20131208_035829_inLine +BABEL_OP2_304_86191_20131208_035829_outLine +BABEL_OP2_304_86433_20140122_053030_inLine +BABEL_OP2_304_86433_20140122_053030_outLine +BABEL_OP2_304_86676_20140109_025931_inLine +BABEL_OP2_304_86676_20140109_025931_outLine +BABEL_OP2_304_86748_20140225_213348_inLine +BABEL_OP2_304_86748_20140225_213348_outLine +BABEL_OP2_304_87073_20131114_031449_outLine +BABEL_OP2_304_87074_20131216_043306_inLine +BABEL_OP2_304_87074_20131216_043306_outLine +BABEL_OP2_304_87313_20140115_011909_inLine +BABEL_OP2_304_87313_20140115_011909_outLine +BABEL_OP2_304_87889_20140225_010303_inLine +BABEL_OP2_304_87889_20140225_010303_outLine +BABEL_OP2_304_88445_20140112_004454_inLine +BABEL_OP2_304_88445_20140112_004454_outLine +BABEL_OP2_304_88661_20140109_034129_inLine +BABEL_OP2_304_88661_20140109_034129_outLine +BABEL_OP2_304_88674_20131029_180931_outLine +BABEL_OP2_304_88686_20131124_012926_outLine +BABEL_OP2_304_88938_20140226_001937_inLine +BABEL_OP2_304_89575_20140213_180857_inLine +BABEL_OP2_304_89575_20140213_180857_outLine +BABEL_OP2_304_89695_20140107_232450_inLine +BABEL_OP2_304_89695_20140107_232450_outLine +BABEL_OP2_304_89718_20131112_202612_outLine +BABEL_OP2_304_89877_20140114_202105_inLine +BABEL_OP2_304_89877_20140114_202105_outLine +BABEL_OP2_304_89888_20131215_015024_outLine +BABEL_OP2_304_89943_20131214_030426_inLine +BABEL_OP2_304_89943_20131214_030426_outLine +BABEL_OP2_304_91252_20131113_035252_inLine +BABEL_OP2_304_91319_20140222_040737_inLine +BABEL_OP2_304_91319_20140222_040737_outLine +BABEL_OP2_304_91336_20140109_002119_inLine +BABEL_OP2_304_91336_20140109_002119_outLine +BABEL_OP2_304_91463_20140212_010126_inLine +BABEL_OP2_304_91463_20140212_012624_inLine +BABEL_OP2_304_91825_20131214_190413_inLine +BABEL_OP2_304_91825_20131214_190413_outLine +BABEL_OP2_304_91825_20131214_191357_inLine +BABEL_OP2_304_91825_20131214_191357_outLine +BABEL_OP2_304_92065_20140213_212712_inLine +BABEL_OP2_304_92065_20140213_212712_outLine +BABEL_OP2_304_92065_20140213_213512_inLine +BABEL_OP2_304_92065_20140213_213512_outLine +BABEL_OP2_304_92065_20140213_214440_inLine +BABEL_OP2_304_92065_20140213_214440_outLine +BABEL_OP2_304_92176_20140114_011149_inLine +BABEL_OP2_304_92176_20140114_011149_outLine +BABEL_OP2_304_92356_20140225_005836_inLine +BABEL_OP2_304_92356_20140225_005836_outLine +BABEL_OP2_304_92459_20131207_233730_outLine +BABEL_OP2_304_92527_20131217_074850_inLine +BABEL_OP2_304_92527_20131217_074850_outLine +BABEL_OP2_304_92740_20140112_014905_inLine +BABEL_OP2_304_92740_20140112_014905_outLine +BABEL_OP2_304_92809_20131213_011040_inLine +BABEL_OP2_304_93153_20140207_015821_inLine +BABEL_OP2_304_93153_20140207_015821_outLine +BABEL_OP2_304_93490_20140216_035543_inLine +BABEL_OP2_304_93490_20140216_035543_outLine +BABEL_OP2_304_93964_20140108_222311_inLine +BABEL_OP2_304_94212_20131020_203106_inLine +BABEL_OP2_304_94253_20131216_233242_inLine +BABEL_OP2_304_94253_20131216_233242_outLine +BABEL_OP2_304_94713_20131125_035926_outLine +BABEL_OP2_304_95446_20140221_224816_inLine +BABEL_OP2_304_95446_20140221_224816_outLine +BABEL_OP2_304_95937_20131108_201706_inLine +BABEL_OP2_304_96077_20131107_020023_outLine +BABEL_OP2_304_96446_20131204_220739_inLine +BABEL_OP2_304_96446_20131204_220739_outLine +BABEL_OP2_304_96525_20140216_231544_inLine +BABEL_OP2_304_96525_20140216_231544_outLine +BABEL_OP2_304_96584_20140228_045227_inLine +BABEL_OP2_304_96584_20140228_045227_outLine +BABEL_OP2_304_96910_20131203_185444_inLine +BABEL_OP2_304_96910_20131203_185444_outLine +BABEL_OP2_304_97264_20140225_193258_inLine +BABEL_OP2_304_97264_20140225_193258_outLine +BABEL_OP2_304_97376_20140121_204102_inLine +BABEL_OP2_304_97376_20140121_204102_outLine +BABEL_OP2_304_97461_20140111_022155_inLine +BABEL_OP2_304_97461_20140111_022155_outLine +BABEL_OP2_304_97988_20140212_022710_inLine +BABEL_OP2_304_97988_20140212_022710_outLine +BABEL_OP2_304_98390_20140123_203258_inLine +BABEL_OP2_304_98390_20140123_203258_outLine +BABEL_OP2_304_98580_20140115_202801_inLine +BABEL_OP2_304_98580_20140115_202801_outLine +BABEL_OP2_304_99202_20140121_200458_outLine +BABEL_OP2_304_99487_20131211_225837_outLine +BABEL_OP2_304_99516_20131202_003142_inLine +BABEL_OP2_304_99594_20140107_024518_inLine +BABEL_OP2_304_99594_20140107_024518_outLine +BABEL_OP2_304_99813_20140115_223643_inLine +BABEL_OP2_304_99813_20140115_223643_outLine +BABEL_OP2_304_99887_20140220_032712_inLine +BABEL_OP2_304_99887_20140220_032712_outLine +BABEL_OP2_304_99955_20140303_223109_inLine +BABEL_OP2_304_99955_20140303_223109_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/dev.2h.list b/egs/babel/s5d/conf/lists/305-guarani/dev.2h.list new file mode 100644 index 00000000000..4e8210eeac3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/dev.2h.list @@ -0,0 +1,124 @@ +BABEL_OP3_305_13483_20150218_082518_inLine +BABEL_OP3_305_13483_20150218_082518_outLine +BABEL_OP3_305_18992_20140612_060247_inLine +BABEL_OP3_305_18992_20140612_060247_outLine +BABEL_OP3_305_20721_20150114_090748_inLine +BABEL_OP3_305_20721_20150114_090748_outLine +BABEL_OP3_305_21004_20150217_083755_inLine +BABEL_OP3_305_21004_20150217_083755_outLine +BABEL_OP3_305_21624_20150222_054542_inLine +BABEL_OP3_305_21624_20150222_054542_outLine +BABEL_OP3_305_22034_20141017_000534_inLine +BABEL_OP3_305_22034_20141017_000534_outLine +BABEL_OP3_305_22288_20140611_014728_inLine +BABEL_OP3_305_22288_20140611_014728_outLine +BABEL_OP3_305_22446_20140619_021336_inLine +BABEL_OP3_305_22446_20140619_021336_outLine +BABEL_OP3_305_23006_20140807_062702_inLine +BABEL_OP3_305_23006_20140807_062702_outLine +BABEL_OP3_305_23239_20150208_054506_inLine +BABEL_OP3_305_23239_20150208_054506_outLine +BABEL_OP3_305_24253_20150219_085207_inLine +BABEL_OP3_305_24253_20150219_085207_outLine +BABEL_OP3_305_27046_20140614_013755_inLine +BABEL_OP3_305_27046_20140614_013755_outLine +BABEL_OP3_305_30645_20140619_062447_inLine +BABEL_OP3_305_30645_20140619_062447_outLine +BABEL_OP3_305_32097_20140615_023706_inLine +BABEL_OP3_305_32097_20140615_023706_outLine +BABEL_OP3_305_32169_20140612_043749_inLine +BABEL_OP3_305_32169_20140612_043749_outLine +BABEL_OP3_305_34208_20140612_034755_inLine +BABEL_OP3_305_34208_20140612_034755_outLine +BABEL_OP3_305_37064_20140917_032644_inLine +BABEL_OP3_305_37064_20140917_032644_outLine +BABEL_OP3_305_38963_20140611_064935_inLine +BABEL_OP3_305_38963_20140611_064935_outLine +BABEL_OP3_305_39307_20140823_040640_inLine +BABEL_OP3_305_39307_20140823_040640_outLine +BABEL_OP3_305_39555_20141022_235815_inLine +BABEL_OP3_305_39555_20141022_235815_outLine +BABEL_OP3_305_39555_20141023_010258_inLine +BABEL_OP3_305_39555_20141023_010258_outLine +BABEL_OP3_305_41685_20150320_083024_inLine +BABEL_OP3_305_41685_20150320_083024_outLine +BABEL_OP3_305_43395_20150303_092614_inLine +BABEL_OP3_305_43395_20150303_092614_outLine +BABEL_OP3_305_44619_20140621_050143_inLine +BABEL_OP3_305_44619_20140621_050143_outLine +BABEL_OP3_305_45235_20141022_025027_inLine +BABEL_OP3_305_45235_20141022_025027_outLine +BABEL_OP3_305_46169_20150122_044028_inLine +BABEL_OP3_305_46169_20150122_044028_outLine +BABEL_OP3_305_46389_20141017_013950_inLine +BABEL_OP3_305_46389_20141017_013950_outLine +BABEL_OP3_305_46550_20140906_022304_inLine +BABEL_OP3_305_46550_20140906_022304_outLine +BABEL_OP3_305_46550_20140906_023533_inLine +BABEL_OP3_305_46550_20140906_023533_outLine +BABEL_OP3_305_46808_20140613_063242_inLine +BABEL_OP3_305_46808_20140613_063242_outLine +BABEL_OP3_305_47283_20140827_041341_inLine +BABEL_OP3_305_47283_20140827_041341_outLine +BABEL_OP3_305_49870_20140612_044921_inLine +BABEL_OP3_305_49870_20140612_044921_outLine +BABEL_OP3_305_50090_20150206_002321_inLine +BABEL_OP3_305_50090_20150206_002321_outLine +BABEL_OP3_305_50810_20140619_063147_inLine +BABEL_OP3_305_50810_20140619_063147_outLine +BABEL_OP3_305_50962_20140621_015129_inLine +BABEL_OP3_305_50962_20140621_015129_outLine +BABEL_OP3_305_51156_20140613_063549_inLine +BABEL_OP3_305_51156_20140613_063549_outLine +BABEL_OP3_305_52717_20140619_062206_inLine +BABEL_OP3_305_52717_20140619_062206_outLine +BABEL_OP3_305_53441_20140612_055846_inLine +BABEL_OP3_305_53441_20140612_055846_outLine +BABEL_OP3_305_56019_20150221_084856_inLine +BABEL_OP3_305_56019_20150221_084856_outLine +BABEL_OP3_305_58107_20150201_050424_inLine +BABEL_OP3_305_58107_20150201_050424_outLine +BABEL_OP3_305_58717_20150201_022141_inLine +BABEL_OP3_305_58717_20150201_022141_outLine +BABEL_OP3_305_61971_20150328_064233_inLine +BABEL_OP3_305_61971_20150328_064233_outLine +BABEL_OP3_305_66305_20150220_030810_inLine +BABEL_OP3_305_66305_20150220_030810_outLine +BABEL_OP3_305_67659_20140808_040651_inLine +BABEL_OP3_305_67659_20140808_040651_outLine +BABEL_OP3_305_73430_20150218_080038_inLine +BABEL_OP3_305_73430_20150218_080038_outLine +BABEL_OP3_305_73511_20150213_081754_inLine +BABEL_OP3_305_73511_20150213_081754_outLine +BABEL_OP3_305_76756_20150206_024436_inLine +BABEL_OP3_305_76756_20150206_024436_outLine +BABEL_OP3_305_78161_20150312_093559_inLine +BABEL_OP3_305_78161_20150312_093559_outLine +BABEL_OP3_305_78609_20141021_002844_inLine +BABEL_OP3_305_78609_20141021_002844_outLine +BABEL_OP3_305_81229_20140904_012832_inLine +BABEL_OP3_305_81229_20140904_012832_outLine +BABEL_OP3_305_81287_20150215_053321_inLine +BABEL_OP3_305_81287_20150215_053321_outLine +BABEL_OP3_305_81424_20150213_073659_inLine +BABEL_OP3_305_81424_20150213_073659_outLine +BABEL_OP3_305_84029_20140613_050741_inLine +BABEL_OP3_305_84029_20140613_050741_outLine +BABEL_OP3_305_84541_20140612_075946_inLine +BABEL_OP3_305_84541_20140612_075946_outLine +BABEL_OP3_305_84768_20140619_061958_inLine +BABEL_OP3_305_84768_20140619_061958_outLine +BABEL_OP3_305_86885_20140612_074001_inLine +BABEL_OP3_305_86885_20140612_074001_outLine +BABEL_OP3_305_88686_20140906_002505_inLine +BABEL_OP3_305_88686_20140906_002505_outLine +BABEL_OP3_305_90737_20141020_235210_inLine +BABEL_OP3_305_90737_20141020_235210_outLine +BABEL_OP3_305_91383_20150307_051712_inLine +BABEL_OP3_305_91383_20150307_051712_outLine +BABEL_OP3_305_96446_20140620_020014_inLine +BABEL_OP3_305_96446_20140620_020014_outLine +BABEL_OP3_305_97588_20140806_063029_inLine +BABEL_OP3_305_97588_20140806_063029_outLine +BABEL_OP3_305_97911_20150304_082443_inLine +BABEL_OP3_305_97911_20150304_082443_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/dev.list b/egs/babel/s5d/conf/lists/305-guarani/dev.list new file mode 100644 index 00000000000..4e8210eeac3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/dev.list @@ -0,0 +1,124 @@ +BABEL_OP3_305_13483_20150218_082518_inLine +BABEL_OP3_305_13483_20150218_082518_outLine +BABEL_OP3_305_18992_20140612_060247_inLine +BABEL_OP3_305_18992_20140612_060247_outLine +BABEL_OP3_305_20721_20150114_090748_inLine +BABEL_OP3_305_20721_20150114_090748_outLine +BABEL_OP3_305_21004_20150217_083755_inLine +BABEL_OP3_305_21004_20150217_083755_outLine +BABEL_OP3_305_21624_20150222_054542_inLine +BABEL_OP3_305_21624_20150222_054542_outLine +BABEL_OP3_305_22034_20141017_000534_inLine +BABEL_OP3_305_22034_20141017_000534_outLine +BABEL_OP3_305_22288_20140611_014728_inLine +BABEL_OP3_305_22288_20140611_014728_outLine +BABEL_OP3_305_22446_20140619_021336_inLine +BABEL_OP3_305_22446_20140619_021336_outLine +BABEL_OP3_305_23006_20140807_062702_inLine +BABEL_OP3_305_23006_20140807_062702_outLine +BABEL_OP3_305_23239_20150208_054506_inLine +BABEL_OP3_305_23239_20150208_054506_outLine +BABEL_OP3_305_24253_20150219_085207_inLine +BABEL_OP3_305_24253_20150219_085207_outLine +BABEL_OP3_305_27046_20140614_013755_inLine +BABEL_OP3_305_27046_20140614_013755_outLine +BABEL_OP3_305_30645_20140619_062447_inLine +BABEL_OP3_305_30645_20140619_062447_outLine +BABEL_OP3_305_32097_20140615_023706_inLine +BABEL_OP3_305_32097_20140615_023706_outLine +BABEL_OP3_305_32169_20140612_043749_inLine +BABEL_OP3_305_32169_20140612_043749_outLine +BABEL_OP3_305_34208_20140612_034755_inLine +BABEL_OP3_305_34208_20140612_034755_outLine +BABEL_OP3_305_37064_20140917_032644_inLine +BABEL_OP3_305_37064_20140917_032644_outLine +BABEL_OP3_305_38963_20140611_064935_inLine +BABEL_OP3_305_38963_20140611_064935_outLine +BABEL_OP3_305_39307_20140823_040640_inLine +BABEL_OP3_305_39307_20140823_040640_outLine +BABEL_OP3_305_39555_20141022_235815_inLine +BABEL_OP3_305_39555_20141022_235815_outLine +BABEL_OP3_305_39555_20141023_010258_inLine +BABEL_OP3_305_39555_20141023_010258_outLine +BABEL_OP3_305_41685_20150320_083024_inLine +BABEL_OP3_305_41685_20150320_083024_outLine +BABEL_OP3_305_43395_20150303_092614_inLine +BABEL_OP3_305_43395_20150303_092614_outLine +BABEL_OP3_305_44619_20140621_050143_inLine +BABEL_OP3_305_44619_20140621_050143_outLine +BABEL_OP3_305_45235_20141022_025027_inLine +BABEL_OP3_305_45235_20141022_025027_outLine +BABEL_OP3_305_46169_20150122_044028_inLine +BABEL_OP3_305_46169_20150122_044028_outLine +BABEL_OP3_305_46389_20141017_013950_inLine +BABEL_OP3_305_46389_20141017_013950_outLine +BABEL_OP3_305_46550_20140906_022304_inLine +BABEL_OP3_305_46550_20140906_022304_outLine +BABEL_OP3_305_46550_20140906_023533_inLine +BABEL_OP3_305_46550_20140906_023533_outLine +BABEL_OP3_305_46808_20140613_063242_inLine +BABEL_OP3_305_46808_20140613_063242_outLine +BABEL_OP3_305_47283_20140827_041341_inLine +BABEL_OP3_305_47283_20140827_041341_outLine +BABEL_OP3_305_49870_20140612_044921_inLine +BABEL_OP3_305_49870_20140612_044921_outLine +BABEL_OP3_305_50090_20150206_002321_inLine +BABEL_OP3_305_50090_20150206_002321_outLine +BABEL_OP3_305_50810_20140619_063147_inLine +BABEL_OP3_305_50810_20140619_063147_outLine +BABEL_OP3_305_50962_20140621_015129_inLine +BABEL_OP3_305_50962_20140621_015129_outLine +BABEL_OP3_305_51156_20140613_063549_inLine +BABEL_OP3_305_51156_20140613_063549_outLine +BABEL_OP3_305_52717_20140619_062206_inLine +BABEL_OP3_305_52717_20140619_062206_outLine +BABEL_OP3_305_53441_20140612_055846_inLine +BABEL_OP3_305_53441_20140612_055846_outLine +BABEL_OP3_305_56019_20150221_084856_inLine +BABEL_OP3_305_56019_20150221_084856_outLine +BABEL_OP3_305_58107_20150201_050424_inLine +BABEL_OP3_305_58107_20150201_050424_outLine +BABEL_OP3_305_58717_20150201_022141_inLine +BABEL_OP3_305_58717_20150201_022141_outLine +BABEL_OP3_305_61971_20150328_064233_inLine +BABEL_OP3_305_61971_20150328_064233_outLine +BABEL_OP3_305_66305_20150220_030810_inLine +BABEL_OP3_305_66305_20150220_030810_outLine +BABEL_OP3_305_67659_20140808_040651_inLine +BABEL_OP3_305_67659_20140808_040651_outLine +BABEL_OP3_305_73430_20150218_080038_inLine +BABEL_OP3_305_73430_20150218_080038_outLine +BABEL_OP3_305_73511_20150213_081754_inLine +BABEL_OP3_305_73511_20150213_081754_outLine +BABEL_OP3_305_76756_20150206_024436_inLine +BABEL_OP3_305_76756_20150206_024436_outLine +BABEL_OP3_305_78161_20150312_093559_inLine +BABEL_OP3_305_78161_20150312_093559_outLine +BABEL_OP3_305_78609_20141021_002844_inLine +BABEL_OP3_305_78609_20141021_002844_outLine +BABEL_OP3_305_81229_20140904_012832_inLine +BABEL_OP3_305_81229_20140904_012832_outLine +BABEL_OP3_305_81287_20150215_053321_inLine +BABEL_OP3_305_81287_20150215_053321_outLine +BABEL_OP3_305_81424_20150213_073659_inLine +BABEL_OP3_305_81424_20150213_073659_outLine +BABEL_OP3_305_84029_20140613_050741_inLine +BABEL_OP3_305_84029_20140613_050741_outLine +BABEL_OP3_305_84541_20140612_075946_inLine +BABEL_OP3_305_84541_20140612_075946_outLine +BABEL_OP3_305_84768_20140619_061958_inLine +BABEL_OP3_305_84768_20140619_061958_outLine +BABEL_OP3_305_86885_20140612_074001_inLine +BABEL_OP3_305_86885_20140612_074001_outLine +BABEL_OP3_305_88686_20140906_002505_inLine +BABEL_OP3_305_88686_20140906_002505_outLine +BABEL_OP3_305_90737_20141020_235210_inLine +BABEL_OP3_305_90737_20141020_235210_outLine +BABEL_OP3_305_91383_20150307_051712_inLine +BABEL_OP3_305_91383_20150307_051712_outLine +BABEL_OP3_305_96446_20140620_020014_inLine +BABEL_OP3_305_96446_20140620_020014_outLine +BABEL_OP3_305_97588_20140806_063029_inLine +BABEL_OP3_305_97588_20140806_063029_outLine +BABEL_OP3_305_97911_20150304_082443_inLine +BABEL_OP3_305_97911_20150304_082443_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/eval.list b/egs/babel/s5d/conf/lists/305-guarani/eval.list new file mode 100644 index 00000000000..517ff94f450 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/eval.list @@ -0,0 +1,186 @@ +BABEL_OP3_305_10036_20140807_033554_inLine +BABEL_OP3_305_10036_20140807_033554_outLine +BABEL_OP3_305_10188_20140614_030926_inLine +BABEL_OP3_305_10188_20140614_030926_outLine +BABEL_OP3_305_10482_20150214_093450_inLine +BABEL_OP3_305_10482_20150214_093450_outLine +BABEL_OP3_305_10638_20150318_093445_inLine +BABEL_OP3_305_10638_20150318_093445_outLine +BABEL_OP3_305_11352_20150219_080531_inLine +BABEL_OP3_305_11352_20150219_080531_outLine +BABEL_OP3_305_11528_20150328_055924_inLine +BABEL_OP3_305_11528_20150328_055924_outLine +BABEL_OP3_305_13126_20150222_063432_inLine +BABEL_OP3_305_13126_20150222_063432_outLine +BABEL_OP3_305_13586_20150122_061859_inLine +BABEL_OP3_305_13586_20150122_061859_outLine +BABEL_OP3_305_13744_20140615_004815_inLine +BABEL_OP3_305_13744_20140615_004815_outLine +BABEL_OP3_305_15163_20141021_042732_inLine +BABEL_OP3_305_15163_20141021_042732_outLine +BABEL_OP3_305_16475_20140910_050557_inLine +BABEL_OP3_305_16475_20140910_050557_outLine +BABEL_OP3_305_16726_20140620_054123_inLine +BABEL_OP3_305_16726_20140620_054123_outLine +BABEL_OP3_305_20724_20140612_032059_inLine +BABEL_OP3_305_20724_20140612_032059_outLine +BABEL_OP3_305_22494_20150210_082201_inLine +BABEL_OP3_305_22494_20150210_082201_outLine +BABEL_OP3_305_22643_20150331_062123_inLine +BABEL_OP3_305_22643_20150331_062123_outLine +BABEL_OP3_305_25895_20140611_072609_inLine +BABEL_OP3_305_25895_20140611_072609_outLine +BABEL_OP3_305_26206_20150212_091700_inLine +BABEL_OP3_305_26206_20150212_091700_outLine +BABEL_OP3_305_26388_20140731_014108_inLine +BABEL_OP3_305_26388_20140731_014108_outLine +BABEL_OP3_305_27082_20141016_051726_inLine +BABEL_OP3_305_27082_20141016_051726_outLine +BABEL_OP3_305_28775_20140621_055220_inLine +BABEL_OP3_305_28775_20140621_055220_outLine +BABEL_OP3_305_28945_20140906_010750_inLine +BABEL_OP3_305_28945_20140906_010750_outLine +BABEL_OP3_305_29023_20140821_052317_inLine +BABEL_OP3_305_29023_20140821_052317_outLine +BABEL_OP3_305_29023_20140821_053525_inLine +BABEL_OP3_305_29023_20140821_053525_outLine +BABEL_OP3_305_29765_20150307_085516_inLine +BABEL_OP3_305_29765_20150307_085516_outLine +BABEL_OP3_305_30461_20150310_062851_inLine +BABEL_OP3_305_30461_20150310_062851_outLine +BABEL_OP3_305_30653_20150219_091045_inLine +BABEL_OP3_305_30653_20150219_091045_outLine +BABEL_OP3_305_31484_20150212_074454_inLine +BABEL_OP3_305_31484_20150212_074454_outLine +BABEL_OP3_305_31628_20150208_021858_inLine +BABEL_OP3_305_31628_20150208_021858_outLine +BABEL_OP3_305_32301_20150212_011150_inLine +BABEL_OP3_305_32301_20150212_011150_outLine +BABEL_OP3_305_32328_20150122_041147_inLine +BABEL_OP3_305_32328_20150122_041147_outLine +BABEL_OP3_305_35139_20140822_065230_inLine +BABEL_OP3_305_35139_20140822_065230_outLine +BABEL_OP3_305_35181_20150221_032331_inLine +BABEL_OP3_305_35181_20150221_032331_outLine +BABEL_OP3_305_36505_20141211_012908_inLine +BABEL_OP3_305_36505_20141211_012908_outLine +BABEL_OP3_305_36505_20141211_014026_inLine +BABEL_OP3_305_36505_20141211_014026_outLine +BABEL_OP3_305_37684_20140612_021940_inLine +BABEL_OP3_305_37684_20140612_021940_outLine +BABEL_OP3_305_41174_20140904_033334_inLine +BABEL_OP3_305_41174_20140904_033334_outLine +BABEL_OP3_305_41920_20140618_052053_inLine +BABEL_OP3_305_41920_20140618_052053_outLine +BABEL_OP3_305_42600_20140731_005108_inLine +BABEL_OP3_305_42600_20140731_005108_outLine +BABEL_OP3_305_43788_20150208_044657_inLine +BABEL_OP3_305_43788_20150208_044657_outLine +BABEL_OP3_305_46315_20150214_012323_inLine +BABEL_OP3_305_46315_20150214_012323_outLine +BABEL_OP3_305_46625_20140618_065851_inLine +BABEL_OP3_305_46625_20140618_065851_outLine +BABEL_OP3_305_48758_20150220_092254_inLine +BABEL_OP3_305_48758_20150220_092254_outLine +BABEL_OP3_305_49216_20140615_041916_inLine +BABEL_OP3_305_49216_20140615_041916_outLine +BABEL_OP3_305_49637_20140619_051340_inLine +BABEL_OP3_305_49637_20140619_051340_outLine +BABEL_OP3_305_50175_20140620_063847_inLine +BABEL_OP3_305_50175_20140620_063847_outLine +BABEL_OP3_305_50630_20150211_101833_inLine +BABEL_OP3_305_50630_20150211_101833_outLine +BABEL_OP3_305_51417_20141028_072402_inLine +BABEL_OP3_305_51417_20141028_072402_outLine +BABEL_OP3_305_52438_20140621_014747_inLine +BABEL_OP3_305_52438_20140621_014747_outLine +BABEL_OP3_305_52804_20140822_074104_inLine +BABEL_OP3_305_52804_20140822_074104_outLine +BABEL_OP3_305_53758_20140611_060640_inLine +BABEL_OP3_305_53758_20140611_060640_outLine +BABEL_OP3_305_56468_20150327_024417_inLine +BABEL_OP3_305_56468_20150327_024417_outLine +BABEL_OP3_305_56677_20150226_094545_inLine +BABEL_OP3_305_56677_20150226_094545_outLine +BABEL_OP3_305_58821_20150217_093203_inLine +BABEL_OP3_305_58821_20150217_093203_outLine +BABEL_OP3_305_59163_20140614_065953_inLine +BABEL_OP3_305_59163_20140614_065953_outLine +BABEL_OP3_305_60538_20140619_023839_inLine +BABEL_OP3_305_60538_20140619_023839_outLine +BABEL_OP3_305_60661_20140822_055802_inLine +BABEL_OP3_305_60661_20140822_055802_outLine +BABEL_OP3_305_61011_20140911_035151_inLine +BABEL_OP3_305_61011_20140911_035151_outLine +BABEL_OP3_305_63484_20140614_064915_inLine +BABEL_OP3_305_63484_20140614_064915_outLine +BABEL_OP3_305_66916_20141021_002433_inLine +BABEL_OP3_305_66916_20141021_002433_outLine +BABEL_OP3_305_67152_20150228_091753_inLine +BABEL_OP3_305_67152_20150228_091753_outLine +BABEL_OP3_305_67894_20140614_021409_inLine +BABEL_OP3_305_67894_20140614_021409_outLine +BABEL_OP3_305_69633_20150211_042219_inLine +BABEL_OP3_305_69633_20150211_042219_outLine +BABEL_OP3_305_70386_20140823_041550_inLine +BABEL_OP3_305_70386_20140823_041550_outLine +BABEL_OP3_305_71614_20150220_005206_inLine +BABEL_OP3_305_71614_20150220_005206_outLine +BABEL_OP3_305_72007_20150218_073351_inLine +BABEL_OP3_305_72007_20150218_073351_outLine +BABEL_OP3_305_73072_20140620_003027_inLine +BABEL_OP3_305_73072_20140620_003027_outLine +BABEL_OP3_305_73622_20140731_060846_inLine +BABEL_OP3_305_73622_20140731_060846_outLine +BABEL_OP3_305_75930_20140613_043849_inLine +BABEL_OP3_305_75930_20140613_043849_outLine +BABEL_OP3_305_75930_20140613_045058_inLine +BABEL_OP3_305_75930_20140613_045058_outLine +BABEL_OP3_305_76218_20140912_034653_inLine +BABEL_OP3_305_76218_20140912_034653_outLine +BABEL_OP3_305_77112_20141017_061539_inLine +BABEL_OP3_305_77112_20141017_061539_outLine +BABEL_OP3_305_78398_20140807_031509_inLine +BABEL_OP3_305_78398_20140807_031509_outLine +BABEL_OP3_305_78543_20150307_102417_inLine +BABEL_OP3_305_78543_20150307_102417_outLine +BABEL_OP3_305_78604_20140801_052426_inLine +BABEL_OP3_305_78604_20140801_052426_outLine +BABEL_OP3_305_79107_20150301_081556_inLine +BABEL_OP3_305_79107_20150301_081556_outLine +BABEL_OP3_305_80383_20140612_015419_inLine +BABEL_OP3_305_80383_20140612_015419_outLine +BABEL_OP3_305_81404_20140821_230151_inLine +BABEL_OP3_305_81404_20140821_230151_outLine +BABEL_OP3_305_83775_20140808_011711_inLine +BABEL_OP3_305_83775_20140808_011711_outLine +BABEL_OP3_305_84370_20150301_074935_inLine +BABEL_OP3_305_84370_20150301_074935_outLine +BABEL_OP3_305_84466_20150220_080109_inLine +BABEL_OP3_305_84466_20150220_080109_outLine +BABEL_OP3_305_86676_20150207_083733_inLine +BABEL_OP3_305_86676_20150207_083733_outLine +BABEL_OP3_305_87074_20140821_062711_inLine +BABEL_OP3_305_87074_20140821_062711_outLine +BABEL_OP3_305_87693_20140913_020630_inLine +BABEL_OP3_305_87693_20140913_020630_outLine +BABEL_OP3_305_88372_20150301_060900_inLine +BABEL_OP3_305_88372_20150301_060900_outLine +BABEL_OP3_305_88661_20150207_095906_inLine +BABEL_OP3_305_88661_20150207_095906_outLine +BABEL_OP3_305_88982_20150122_052417_inLine +BABEL_OP3_305_88982_20150122_052417_outLine +BABEL_OP3_305_88988_20150318_090506_inLine +BABEL_OP3_305_88988_20150318_090506_outLine +BABEL_OP3_305_89059_20141028_073001_inLine +BABEL_OP3_305_89059_20141028_073001_outLine +BABEL_OP3_305_89226_20140614_041314_inLine +BABEL_OP3_305_89226_20140614_041314_outLine +BABEL_OP3_305_90935_20140808_010525_inLine +BABEL_OP3_305_90935_20140808_010525_outLine +BABEL_OP3_305_93007_20150311_015743_inLine +BABEL_OP3_305_93007_20150311_015743_outLine +BABEL_OP3_305_95663_20140806_050031_inLine +BABEL_OP3_305_95663_20140806_050031_outLine +BABEL_OP3_305_96910_20140911_043108_inLine +BABEL_OP3_305_96910_20140911_043108_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/sub-train.list b/egs/babel/s5d/conf/lists/305-guarani/sub-train.list new file mode 100644 index 00000000000..ff3a375b9e6 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/sub-train.list @@ -0,0 +1,134 @@ +BABEL_OP3_305_11419_20140620_004343_inLine +BABEL_OP3_305_11419_20140620_004343_outLine +BABEL_OP3_305_12242_20140808_034042_inLine +BABEL_OP3_305_12242_20140808_034042_outLine +BABEL_OP3_305_12242_20140808_035409_inLine +BABEL_OP3_305_12242_20140808_035409_outLine +BABEL_OP3_305_14814_20140621_005436_inLine +BABEL_OP3_305_14814_20140621_005436_outLine +BABEL_OP3_305_14814_20140621_011333_inLine +BABEL_OP3_305_14814_20140621_011333_outLine +BABEL_OP3_305_15926_20150211_090843_inLine +BABEL_OP3_305_15926_20150211_090843_outLine +BABEL_OP3_305_17032_20150213_094305_inLine +BABEL_OP3_305_17032_20150213_094305_outLine +BABEL_OP3_305_17032_20150213_095552_inLine +BABEL_OP3_305_17032_20150213_095552_outLine +BABEL_OP3_305_20454_20140619_022112_inLine +BABEL_OP3_305_20454_20140619_022112_outLine +BABEL_OP3_305_21543_20141018_050405_inLine +BABEL_OP3_305_21543_20141018_050405_outLine +BABEL_OP3_305_21794_20141021_022208_inLine +BABEL_OP3_305_21794_20141021_022208_outLine +BABEL_OP3_305_30180_20140906_231005_inLine +BABEL_OP3_305_30180_20140906_231005_outLine +BABEL_OP3_305_33111_20150228_023906_inLine +BABEL_OP3_305_33111_20150228_023906_outLine +BABEL_OP3_305_33149_20141021_034616_inLine +BABEL_OP3_305_33149_20141021_034616_outLine +BABEL_OP3_305_33251_20150130_021517_inLine +BABEL_OP3_305_33251_20150130_021517_outLine +BABEL_OP3_305_34629_20150327_010455_inLine +BABEL_OP3_305_34629_20150327_010455_outLine +BABEL_OP3_305_35467_20140806_032442_inLine +BABEL_OP3_305_35467_20140806_032442_outLine +BABEL_OP3_305_35706_20150221_093541_inLine +BABEL_OP3_305_35706_20150221_093541_outLine +BABEL_OP3_305_37007_20150331_081658_inLine +BABEL_OP3_305_37007_20150331_081658_outLine +BABEL_OP3_305_38664_20140807_042817_inLine +BABEL_OP3_305_38664_20140807_042817_outLine +BABEL_OP3_305_43368_20140822_071919_inLine +BABEL_OP3_305_43368_20140822_071919_outLine +BABEL_OP3_305_45486_20150331_070439_inLine +BABEL_OP3_305_45486_20150331_070439_outLine +BABEL_OP3_305_46681_20140729_053142_inLine +BABEL_OP3_305_46681_20140729_053142_outLine +BABEL_OP3_305_46688_20140620_060408_inLine +BABEL_OP3_305_46688_20140620_060408_outLine +BABEL_OP3_305_46757_20150211_011836_inLine +BABEL_OP3_305_46757_20150211_011836_outLine +BABEL_OP3_305_46757_20150211_013224_inLine +BABEL_OP3_305_46757_20150211_013224_outLine +BABEL_OP3_305_48844_20140621_034908_inLine +BABEL_OP3_305_48844_20140621_034908_outLine +BABEL_OP3_305_48844_20140621_035628_inLine +BABEL_OP3_305_48844_20140621_035628_outLine +BABEL_OP3_305_49768_20140731_031152_inLine +BABEL_OP3_305_49768_20140731_031152_outLine +BABEL_OP3_305_51015_20150211_235649_inLine +BABEL_OP3_305_51015_20150211_235649_outLine +BABEL_OP3_305_51611_20140619_070031_inLine +BABEL_OP3_305_51611_20140619_070031_outLine +BABEL_OP3_305_51611_20140619_071006_inLine +BABEL_OP3_305_51611_20140619_071006_outLine +BABEL_OP3_305_52725_20150227_111722_inLine +BABEL_OP3_305_52725_20150227_111722_outLine +BABEL_OP3_305_55815_20140612_000452_inLine +BABEL_OP3_305_55815_20140612_000452_outLine +BABEL_OP3_305_55818_20140620_003329_inLine +BABEL_OP3_305_55818_20140620_003329_outLine +BABEL_OP3_305_56198_20140904_224843_inLine +BABEL_OP3_305_56198_20140904_224843_outLine +BABEL_OP3_305_57116_20140618_021028_inLine +BABEL_OP3_305_57116_20140618_021028_outLine +BABEL_OP3_305_57654_20140917_034820_inLine +BABEL_OP3_305_57654_20140917_034820_outLine +BABEL_OP3_305_58061_20150326_103607_inLine +BABEL_OP3_305_58061_20150326_103607_outLine +BABEL_OP3_305_58734_20140620_003259_inLine +BABEL_OP3_305_58734_20140620_003259_outLine +BABEL_OP3_305_62158_20150313_013514_inLine +BABEL_OP3_305_62158_20150313_013514_outLine +BABEL_OP3_305_62734_20140821_221916_inLine +BABEL_OP3_305_62734_20140821_221916_outLine +BABEL_OP3_305_62852_20140618_072924_inLine +BABEL_OP3_305_62852_20140618_072924_outLine +BABEL_OP3_305_65466_20150222_074001_inLine +BABEL_OP3_305_65466_20150222_074001_outLine +BABEL_OP3_305_66967_20140618_044613_inLine +BABEL_OP3_305_66967_20140618_044613_outLine +BABEL_OP3_305_67373_20140822_005349_inLine +BABEL_OP3_305_67373_20140822_005349_outLine +BABEL_OP3_305_67389_20150317_083510_inLine +BABEL_OP3_305_67389_20150317_083510_outLine +BABEL_OP3_305_68068_20150206_100103_inLine +BABEL_OP3_305_68068_20150206_100103_outLine +BABEL_OP3_305_69090_20141018_010121_inLine +BABEL_OP3_305_69090_20141018_010121_outLine +BABEL_OP3_305_70251_20140618_233739_inLine +BABEL_OP3_305_70251_20140618_233739_outLine +BABEL_OP3_305_71333_20140808_025232_inLine +BABEL_OP3_305_71333_20140808_025232_outLine +BABEL_OP3_305_73301_20140808_235747_inLine +BABEL_OP3_305_73301_20140808_235747_outLine +BABEL_OP3_305_77225_20140612_003002_inLine +BABEL_OP3_305_77225_20140612_003002_outLine +BABEL_OP3_305_79028_20140621_005114_inLine +BABEL_OP3_305_79028_20140621_005114_outLine +BABEL_OP3_305_82626_20150307_100633_inLine +BABEL_OP3_305_82626_20150307_100633_outLine +BABEL_OP3_305_83436_20140619_060309_inLine +BABEL_OP3_305_83436_20140619_060309_outLine +BABEL_OP3_305_83935_20150213_091523_inLine +BABEL_OP3_305_83935_20150213_091523_outLine +BABEL_OP3_305_84055_20150221_083133_inLine +BABEL_OP3_305_84055_20150221_083133_outLine +BABEL_OP3_305_84079_20140613_053813_inLine +BABEL_OP3_305_84079_20140613_053813_outLine +BABEL_OP3_305_84605_20140903_033325_inLine +BABEL_OP3_305_84605_20140903_033325_outLine +BABEL_OP3_305_84605_20140903_034415_inLine +BABEL_OP3_305_84605_20140903_034415_outLine +BABEL_OP3_305_86433_20150211_094926_inLine +BABEL_OP3_305_86433_20150211_094926_outLine +BABEL_OP3_305_92941_20140911_002247_inLine +BABEL_OP3_305_92941_20140911_002247_outLine +BABEL_OP3_305_95269_20140912_000910_inLine +BABEL_OP3_305_95269_20140912_000910_outLine +BABEL_OP3_305_96041_20140611_065313_inLine +BABEL_OP3_305_96041_20140611_065313_outLine +BABEL_OP3_305_97220_20150303_234352_inLine +BABEL_OP3_305_97220_20150303_234352_outLine +BABEL_OP3_305_98192_20150306_053152_inLine +BABEL_OP3_305_98192_20150306_053152_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/305-guarani/sub-train.untranscribed.list new file mode 100644 index 00000000000..165c7e95f06 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/sub-train.untranscribed.list @@ -0,0 +1,392 @@ +BABEL_OP3_305_10901_20141017_014336_inLine +BABEL_OP3_305_10901_20141017_014336_outLine +BABEL_OP3_305_11723_20141021_053536_inLine +BABEL_OP3_305_11723_20141021_053536_outLine +BABEL_OP3_305_12851_20140618_061651_inLine +BABEL_OP3_305_12851_20140618_061651_outLine +BABEL_OP3_305_13040_20140621_000510_inLine +BABEL_OP3_305_13040_20140621_000510_outLine +BABEL_OP3_305_13189_20141107_012921_inLine +BABEL_OP3_305_13189_20141107_012921_outLine +BABEL_OP3_305_13664_20140615_011412_inLine +BABEL_OP3_305_13664_20140615_011412_outLine +BABEL_OP3_305_14158_20150207_011013_inLine +BABEL_OP3_305_14158_20150207_011013_outLine +BABEL_OP3_305_15617_20150318_012704_inLine +BABEL_OP3_305_15617_20150318_012704_outLine +BABEL_OP3_305_15638_20150212_081118_inLine +BABEL_OP3_305_15638_20150212_081118_outLine +BABEL_OP3_305_16249_20140615_022748_inLine +BABEL_OP3_305_16249_20140615_022748_outLine +BABEL_OP3_305_16886_20141016_232346_inLine +BABEL_OP3_305_16886_20141016_232346_outLine +BABEL_OP3_305_17115_20150310_055940_inLine +BABEL_OP3_305_17115_20150310_055940_outLine +BABEL_OP3_305_17511_20150118_093132_inLine +BABEL_OP3_305_17511_20150118_093132_outLine +BABEL_OP3_305_17511_20150118_094117_inLine +BABEL_OP3_305_17511_20150118_094117_outLine +BABEL_OP3_305_17881_20150220_094906_inLine +BABEL_OP3_305_17881_20150220_094906_outLine +BABEL_OP3_305_17890_20150212_094355_inLine +BABEL_OP3_305_17890_20150212_094355_outLine +BABEL_OP3_305_17923_20140801_003933_inLine +BABEL_OP3_305_17923_20140801_003933_outLine +BABEL_OP3_305_18037_20140611_044623_inLine +BABEL_OP3_305_18037_20140611_044623_outLine +BABEL_OP3_305_18566_20150219_072100_inLine +BABEL_OP3_305_18566_20150219_072100_outLine +BABEL_OP3_305_19101_20150123_042130_inLine +BABEL_OP3_305_19101_20150123_042130_outLine +BABEL_OP3_305_19101_20150123_043206_inLine +BABEL_OP3_305_19101_20150123_043206_outLine +BABEL_OP3_305_19621_20150122_072624_inLine +BABEL_OP3_305_19621_20150122_072624_outLine +BABEL_OP3_305_20922_20141107_000604_inLine +BABEL_OP3_305_20922_20141107_000604_outLine +BABEL_OP3_305_21581_20140822_033738_inLine +BABEL_OP3_305_21581_20140822_033738_outLine +BABEL_OP3_305_22624_20150215_050752_inLine +BABEL_OP3_305_22624_20150215_050752_outLine +BABEL_OP3_305_22624_20150215_051632_inLine +BABEL_OP3_305_22624_20150215_051632_outLine +BABEL_OP3_305_23190_20140907_002648_inLine +BABEL_OP3_305_23190_20140907_002648_outLine +BABEL_OP3_305_23195_20150328_071332_inLine +BABEL_OP3_305_23195_20150328_071332_outLine +BABEL_OP3_305_23752_20150319_043326_inLine +BABEL_OP3_305_23752_20150319_043326_outLine +BABEL_OP3_305_24323_20141021_014706_inLine +BABEL_OP3_305_24323_20141021_014706_outLine +BABEL_OP3_305_24586_20150227_100127_inLine +BABEL_OP3_305_24586_20150227_100127_outLine +BABEL_OP3_305_24589_20140822_030512_inLine +BABEL_OP3_305_24589_20140822_030512_outLine +BABEL_OP3_305_24924_20150306_061542_inLine +BABEL_OP3_305_24924_20150306_061542_outLine +BABEL_OP3_305_25220_20150311_004737_inLine +BABEL_OP3_305_25220_20150311_004737_outLine +BABEL_OP3_305_25412_20150123_041255_inLine +BABEL_OP3_305_25412_20150123_041255_outLine +BABEL_OP3_305_27042_20150124_044459_inLine +BABEL_OP3_305_27042_20150124_044459_outLine +BABEL_OP3_305_27125_20140618_065021_inLine +BABEL_OP3_305_27125_20140618_065021_outLine +BABEL_OP3_305_28303_20140806_030759_inLine +BABEL_OP3_305_28303_20140806_030759_outLine +BABEL_OP3_305_28477_20141107_050727_inLine +BABEL_OP3_305_28477_20141107_050727_outLine +BABEL_OP3_305_28606_20150213_101119_inLine +BABEL_OP3_305_28606_20150213_101119_outLine +BABEL_OP3_305_29072_20150212_084053_inLine +BABEL_OP3_305_29072_20150212_084053_outLine +BABEL_OP3_305_30280_20150310_080905_inLine +BABEL_OP3_305_30280_20150310_080905_outLine +BABEL_OP3_305_30869_20141030_043630_inLine +BABEL_OP3_305_30869_20141030_043630_outLine +BABEL_OP3_305_31668_20150313_021804_inLine +BABEL_OP3_305_31668_20150313_021804_outLine +BABEL_OP3_305_32708_20140822_052506_inLine +BABEL_OP3_305_32708_20140822_052506_outLine +BABEL_OP3_305_33355_20140619_231328_inLine +BABEL_OP3_305_33355_20140619_231328_outLine +BABEL_OP3_305_33635_20141021_015047_inLine +BABEL_OP3_305_33635_20141021_015047_outLine +BABEL_OP3_305_34145_20150211_103633_inLine +BABEL_OP3_305_34145_20150211_103633_outLine +BABEL_OP3_305_34410_20150319_085843_inLine +BABEL_OP3_305_34410_20150319_085843_outLine +BABEL_OP3_305_35008_20150214_095953_inLine +BABEL_OP3_305_35008_20150214_095953_outLine +BABEL_OP3_305_35609_20150310_091253_inLine +BABEL_OP3_305_35609_20150310_091253_outLine +BABEL_OP3_305_36147_20140612_063038_inLine +BABEL_OP3_305_36147_20140612_063038_outLine +BABEL_OP3_305_37285_20150213_015416_inLine +BABEL_OP3_305_37285_20150213_015416_outLine +BABEL_OP3_305_38741_20140906_040000_inLine +BABEL_OP3_305_38741_20140906_040000_outLine +BABEL_OP3_305_39638_20150328_073733_inLine +BABEL_OP3_305_39638_20150328_073733_outLine +BABEL_OP3_305_39920_20150301_070243_inLine +BABEL_OP3_305_39920_20150301_070243_outLine +BABEL_OP3_305_40092_20140611_040031_inLine +BABEL_OP3_305_40092_20140611_040031_outLine +BABEL_OP3_305_40565_20150210_092106_inLine +BABEL_OP3_305_40565_20150210_092106_outLine +BABEL_OP3_305_41334_20150305_082911_inLine +BABEL_OP3_305_41334_20150305_082911_outLine +BABEL_OP3_305_42231_20150217_080721_inLine +BABEL_OP3_305_42231_20150217_080721_outLine +BABEL_OP3_305_42434_20140822_053733_inLine +BABEL_OP3_305_42434_20140822_053733_outLine +BABEL_OP3_305_42497_20140823_034443_inLine +BABEL_OP3_305_42497_20140823_034443_outLine +BABEL_OP3_305_43789_20141017_015101_inLine +BABEL_OP3_305_43789_20141017_015101_outLine +BABEL_OP3_305_43990_20150312_102420_inLine +BABEL_OP3_305_43990_20150312_102420_outLine +BABEL_OP3_305_44868_20150206_083108_inLine +BABEL_OP3_305_44868_20150206_083108_outLine +BABEL_OP3_305_44961_20140619_013154_inLine +BABEL_OP3_305_44961_20140619_013154_outLine +BABEL_OP3_305_46558_20140905_012017_inLine +BABEL_OP3_305_46558_20140905_012017_outLine +BABEL_OP3_305_46558_20140905_013000_inLine +BABEL_OP3_305_46558_20140905_013000_outLine +BABEL_OP3_305_46589_20150207_091824_inLine +BABEL_OP3_305_46589_20150207_091824_outLine +BABEL_OP3_305_46702_20140619_050719_inLine +BABEL_OP3_305_46702_20140619_050719_outLine +BABEL_OP3_305_47823_20150214_081513_inLine +BABEL_OP3_305_47823_20150214_081513_outLine +BABEL_OP3_305_49641_20140613_041400_inLine +BABEL_OP3_305_49641_20140613_041400_outLine +BABEL_OP3_305_49902_20140809_050813_inLine +BABEL_OP3_305_49902_20140809_050813_outLine +BABEL_OP3_305_50186_20140619_044546_inLine +BABEL_OP3_305_50186_20140619_044546_outLine +BABEL_OP3_305_50186_20140619_045904_inLine +BABEL_OP3_305_50186_20140619_045904_outLine +BABEL_OP3_305_50565_20140612_072129_inLine +BABEL_OP3_305_50565_20140612_072129_outLine +BABEL_OP3_305_50745_20150219_082842_inLine +BABEL_OP3_305_50745_20150219_082842_outLine +BABEL_OP3_305_51819_20150210_085538_inLine +BABEL_OP3_305_51819_20150210_085538_outLine +BABEL_OP3_305_52404_20150208_070706_inLine +BABEL_OP3_305_52404_20150208_070706_outLine +BABEL_OP3_305_52818_20150206_104316_inLine +BABEL_OP3_305_52818_20150206_104316_outLine +BABEL_OP3_305_52854_20140620_010725_inLine +BABEL_OP3_305_52854_20140620_010725_outLine +BABEL_OP3_305_53144_20150220_084533_inLine +BABEL_OP3_305_53144_20150220_084533_outLine +BABEL_OP3_305_54594_20150114_073509_inLine +BABEL_OP3_305_54594_20150114_073509_outLine +BABEL_OP3_305_55042_20140614_022059_inLine +BABEL_OP3_305_55042_20140614_022059_outLine +BABEL_OP3_305_55106_20150221_080452_inLine +BABEL_OP3_305_55106_20150221_080452_outLine +BABEL_OP3_305_57609_20150127_040742_inLine +BABEL_OP3_305_57609_20150127_040742_outLine +BABEL_OP3_305_57935_20150203_072757_inLine +BABEL_OP3_305_57935_20150203_072757_outLine +BABEL_OP3_305_59549_20140620_001253_inLine +BABEL_OP3_305_59549_20140620_001253_outLine +BABEL_OP3_305_59720_20140807_043323_inLine +BABEL_OP3_305_59720_20140807_043323_outLine +BABEL_OP3_305_60115_20150211_025109_inLine +BABEL_OP3_305_60115_20150211_025109_outLine +BABEL_OP3_305_60282_20140612_025229_inLine +BABEL_OP3_305_60282_20140612_025229_outLine +BABEL_OP3_305_60477_20150304_092057_inLine +BABEL_OP3_305_60477_20150304_092057_outLine +BABEL_OP3_305_60626_20141018_012739_inLine +BABEL_OP3_305_60626_20141018_012739_outLine +BABEL_OP3_305_60650_20150331_055502_inLine +BABEL_OP3_305_60650_20150331_055502_outLine +BABEL_OP3_305_60830_20141017_004525_inLine +BABEL_OP3_305_60830_20141017_004525_outLine +BABEL_OP3_305_60830_20141017_053807_inLine +BABEL_OP3_305_60830_20141017_053807_outLine +BABEL_OP3_305_61348_20141017_014818_inLine +BABEL_OP3_305_61348_20141017_014818_outLine +BABEL_OP3_305_61348_20141017_060653_inLine +BABEL_OP3_305_61348_20141017_060653_outLine +BABEL_OP3_305_61873_20150123_024415_inLine +BABEL_OP3_305_61873_20150123_024415_outLine +BABEL_OP3_305_62200_20141017_014602_inLine +BABEL_OP3_305_62200_20141017_014602_outLine +BABEL_OP3_305_62471_20140619_072350_inLine +BABEL_OP3_305_62471_20140619_072350_outLine +BABEL_OP3_305_63084_20150207_074116_inLine +BABEL_OP3_305_63084_20150207_074116_outLine +BABEL_OP3_305_64469_20140620_063122_inLine +BABEL_OP3_305_64469_20140620_063122_outLine +BABEL_OP3_305_64768_20140822_043008_inLine +BABEL_OP3_305_64768_20140822_043008_outLine +BABEL_OP3_305_64902_20150220_102326_inLine +BABEL_OP3_305_64902_20150220_102326_outLine +BABEL_OP3_305_65477_20141016_234600_inLine +BABEL_OP3_305_65477_20141016_234600_outLine +BABEL_OP3_305_65477_20141016_235812_inLine +BABEL_OP3_305_65477_20141016_235812_outLine +BABEL_OP3_305_65692_20150127_044937_inLine +BABEL_OP3_305_65692_20150127_044937_outLine +BABEL_OP3_305_66045_20140822_062953_inLine +BABEL_OP3_305_66045_20140822_062953_outLine +BABEL_OP3_305_66177_20150221_091456_inLine +BABEL_OP3_305_66177_20150221_091456_outLine +BABEL_OP3_305_66975_20140615_024703_inLine +BABEL_OP3_305_66975_20140615_024703_outLine +BABEL_OP3_305_67053_20150312_031258_inLine +BABEL_OP3_305_67053_20150312_031258_outLine +BABEL_OP3_305_67283_20140618_075016_inLine +BABEL_OP3_305_67283_20140618_075016_outLine +BABEL_OP3_305_67842_20140906_014501_inLine +BABEL_OP3_305_67842_20140906_014501_outLine +BABEL_OP3_305_68244_20150208_045135_inLine +BABEL_OP3_305_68244_20150208_045135_outLine +BABEL_OP3_305_68668_20140614_053023_inLine +BABEL_OP3_305_68668_20140614_053023_outLine +BABEL_OP3_305_69574_20140618_231512_inLine +BABEL_OP3_305_69574_20140618_231512_outLine +BABEL_OP3_305_70282_20150127_012555_inLine +BABEL_OP3_305_70282_20150127_012555_outLine +BABEL_OP3_305_70794_20140614_073231_inLine +BABEL_OP3_305_70794_20140614_073231_outLine +BABEL_OP3_305_70986_20150320_092518_inLine +BABEL_OP3_305_70986_20150320_092518_outLine +BABEL_OP3_305_71189_20150227_092723_inLine +BABEL_OP3_305_71189_20150227_092723_outLine +BABEL_OP3_305_71278_20140614_040622_inLine +BABEL_OP3_305_71278_20140614_040622_outLine +BABEL_OP3_305_71282_20141028_054244_inLine +BABEL_OP3_305_71282_20141028_054244_outLine +BABEL_OP3_305_71566_20150217_074338_inLine +BABEL_OP3_305_71566_20150217_074338_outLine +BABEL_OP3_305_72110_20150214_074424_inLine +BABEL_OP3_305_72110_20150214_074424_outLine +BABEL_OP3_305_72903_20140612_021516_inLine +BABEL_OP3_305_72903_20140612_021516_outLine +BABEL_OP3_305_74667_20140904_050532_inLine +BABEL_OP3_305_74667_20140904_050532_outLine +BABEL_OP3_305_74763_20140612_011204_inLine +BABEL_OP3_305_74763_20140612_011204_outLine +BABEL_OP3_305_74921_20150208_081422_inLine +BABEL_OP3_305_74921_20150208_081422_outLine +BABEL_OP3_305_75064_20140621_012128_inLine +BABEL_OP3_305_75064_20140621_012128_outLine +BABEL_OP3_305_75223_20140618_232223_inLine +BABEL_OP3_305_75223_20140618_232223_outLine +BABEL_OP3_305_76126_20150122_051345_inLine +BABEL_OP3_305_76126_20150122_051345_outLine +BABEL_OP3_305_76437_20140615_023448_inLine +BABEL_OP3_305_76437_20140615_023448_outLine +BABEL_OP3_305_77744_20140821_052246_inLine +BABEL_OP3_305_77744_20140821_052246_outLine +BABEL_OP3_305_77909_20140613_035103_inLine +BABEL_OP3_305_77909_20140613_035103_outLine +BABEL_OP3_305_78016_20140821_222210_inLine +BABEL_OP3_305_78016_20140821_222210_outLine +BABEL_OP3_305_79167_20150208_063508_inLine +BABEL_OP3_305_79167_20150208_063508_outLine +BABEL_OP3_305_79571_20150211_095226_inLine +BABEL_OP3_305_79571_20150211_095226_outLine +BABEL_OP3_305_79751_20140821_233858_inLine +BABEL_OP3_305_79751_20140821_233858_outLine +BABEL_OP3_305_80241_20140612_015921_inLine +BABEL_OP3_305_80241_20140612_015921_outLine +BABEL_OP3_305_80577_20150221_073930_inLine +BABEL_OP3_305_80577_20150221_073930_outLine +BABEL_OP3_305_81213_20140822_005322_inLine +BABEL_OP3_305_81213_20140822_005322_outLine +BABEL_OP3_305_82622_20140619_013825_inLine +BABEL_OP3_305_82622_20140619_013825_outLine +BABEL_OP3_305_84061_20140807_063818_inLine +BABEL_OP3_305_84061_20140807_063818_outLine +BABEL_OP3_305_84177_20141021_041721_inLine +BABEL_OP3_305_84177_20141021_041721_outLine +BABEL_OP3_305_84177_20141021_043314_inLine +BABEL_OP3_305_84177_20141021_043314_outLine +BABEL_OP3_305_84327_20150213_084928_inLine +BABEL_OP3_305_84327_20150213_084928_outLine +BABEL_OP3_305_84823_20150122_033347_inLine +BABEL_OP3_305_84823_20150122_033347_outLine +BABEL_OP3_305_84838_20141022_010706_inLine +BABEL_OP3_305_84838_20141022_010706_outLine +BABEL_OP3_305_85047_20150122_030457_inLine +BABEL_OP3_305_85047_20150122_030457_outLine +BABEL_OP3_305_85254_20150303_094409_inLine +BABEL_OP3_305_85254_20150303_094409_outLine +BABEL_OP3_305_85260_20140611_042042_inLine +BABEL_OP3_305_85260_20140611_042042_outLine +BABEL_OP3_305_85322_20140822_031748_inLine +BABEL_OP3_305_85322_20140822_031748_outLine +BABEL_OP3_305_85340_20140826_232921_inLine +BABEL_OP3_305_85340_20140826_232921_outLine +BABEL_OP3_305_85519_20141022_033125_inLine +BABEL_OP3_305_85519_20141022_033125_outLine +BABEL_OP3_305_86100_20140612_024853_inLine +BABEL_OP3_305_86100_20140612_024853_outLine +BABEL_OP3_305_86321_20150218_084806_inLine +BABEL_OP3_305_86321_20150218_084806_outLine +BABEL_OP3_305_86597_20150305_020834_inLine +BABEL_OP3_305_86597_20150305_020834_outLine +BABEL_OP3_305_86830_20141028_051738_inLine +BABEL_OP3_305_86830_20141028_051738_outLine +BABEL_OP3_305_86878_20150115_115301_inLine +BABEL_OP3_305_86878_20150115_115301_outLine +BABEL_OP3_305_87179_20141021_010357_inLine +BABEL_OP3_305_87179_20141021_010357_outLine +BABEL_OP3_305_88669_20150226_082958_inLine +BABEL_OP3_305_88669_20150226_082958_outLine +BABEL_OP3_305_89330_20150305_075359_inLine +BABEL_OP3_305_89330_20150305_075359_outLine +BABEL_OP3_305_89372_20140806_054633_inLine +BABEL_OP3_305_89372_20140806_054633_outLine +BABEL_OP3_305_89650_20140606_064449_inLine +BABEL_OP3_305_89650_20140606_064449_outLine +BABEL_OP3_305_90572_20150221_011508_inLine +BABEL_OP3_305_90572_20150221_011508_outLine +BABEL_OP3_305_90739_20140910_010202_inLine +BABEL_OP3_305_90739_20140910_010202_outLine +BABEL_OP3_305_90739_20140910_011127_inLine +BABEL_OP3_305_90739_20140910_011127_outLine +BABEL_OP3_305_90777_20140910_031558_inLine +BABEL_OP3_305_90777_20140910_031558_outLine +BABEL_OP3_305_90930_20140612_073132_inLine +BABEL_OP3_305_90930_20140612_073132_outLine +BABEL_OP3_305_91252_20140612_013640_inLine +BABEL_OP3_305_91252_20140612_013640_outLine +BABEL_OP3_305_91463_20150204_001924_inLine +BABEL_OP3_305_91463_20150204_001924_outLine +BABEL_OP3_305_91977_20150210_012536_inLine +BABEL_OP3_305_91977_20150210_012536_outLine +BABEL_OP3_305_92281_20150312_104117_inLine +BABEL_OP3_305_92281_20150312_104117_outLine +BABEL_OP3_305_92509_20140620_020408_inLine +BABEL_OP3_305_92509_20140620_020408_outLine +BABEL_OP3_305_92605_20150312_090817_inLine +BABEL_OP3_305_92605_20150312_090817_outLine +BABEL_OP3_305_92740_20150210_020753_inLine +BABEL_OP3_305_92740_20150210_020753_outLine +BABEL_OP3_305_93320_20150305_072620_inLine +BABEL_OP3_305_93320_20150305_072620_outLine +BABEL_OP3_305_93681_20141018_044334_inLine +BABEL_OP3_305_93681_20141018_044334_outLine +BABEL_OP3_305_93861_20150123_004904_inLine +BABEL_OP3_305_93861_20150123_004904_outLine +BABEL_OP3_305_94253_20140902_015125_inLine +BABEL_OP3_305_94253_20140902_015125_outLine +BABEL_OP3_305_94587_20150214_091538_inLine +BABEL_OP3_305_94587_20150214_091538_outLine +BABEL_OP3_305_94713_20150114_082431_inLine +BABEL_OP3_305_94713_20150114_082431_outLine +BABEL_OP3_305_95598_20140615_025323_inLine +BABEL_OP3_305_95598_20140615_025323_outLine +BABEL_OP3_305_95903_20150222_060503_inLine +BABEL_OP3_305_95903_20150222_060503_outLine +BABEL_OP3_305_95942_20150227_105233_inLine +BABEL_OP3_305_95942_20150227_105233_outLine +BABEL_OP3_305_96504_20140808_230422_inLine +BABEL_OP3_305_96504_20140808_230422_outLine +BABEL_OP3_305_96504_20140808_231336_inLine +BABEL_OP3_305_96504_20140808_231336_outLine +BABEL_OP3_305_97772_20140618_074519_inLine +BABEL_OP3_305_97772_20140618_074519_outLine +BABEL_OP3_305_97896_20140904_071346_inLine +BABEL_OP3_305_97896_20140904_071346_outLine +BABEL_OP3_305_98255_20150115_095803_inLine +BABEL_OP3_305_98255_20150115_095803_outLine +BABEL_OP3_305_98255_20150115_101856_inLine +BABEL_OP3_305_98255_20150115_101856_outLine +BABEL_OP3_305_98909_20140730_054930_inLine +BABEL_OP3_305_98909_20140730_054930_outLine +BABEL_OP3_305_98909_20140730_055859_inLine +BABEL_OP3_305_98909_20140730_055859_outLine +BABEL_OP3_305_99289_20150227_102036_inLine +BABEL_OP3_305_99289_20150227_102036_outLine +BABEL_OP3_305_99516_20140620_054149_inLine +BABEL_OP3_305_99516_20140620_054149_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/training.list b/egs/babel/s5d/conf/lists/305-guarani/training.list new file mode 100644 index 00000000000..d191e6ac974 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/training.list @@ -0,0 +1,526 @@ +BABEL_OP3_305_10901_20141017_014336_inLine +BABEL_OP3_305_10901_20141017_014336_outLine +BABEL_OP3_305_11419_20140620_004343_inLine +BABEL_OP3_305_11419_20140620_004343_outLine +BABEL_OP3_305_11723_20141021_053536_inLine +BABEL_OP3_305_11723_20141021_053536_outLine +BABEL_OP3_305_12242_20140808_034042_inLine +BABEL_OP3_305_12242_20140808_034042_outLine +BABEL_OP3_305_12242_20140808_035409_inLine +BABEL_OP3_305_12242_20140808_035409_outLine +BABEL_OP3_305_12851_20140618_061651_inLine +BABEL_OP3_305_12851_20140618_061651_outLine +BABEL_OP3_305_13040_20140621_000510_inLine +BABEL_OP3_305_13040_20140621_000510_outLine +BABEL_OP3_305_13189_20141107_012921_inLine +BABEL_OP3_305_13189_20141107_012921_outLine +BABEL_OP3_305_13664_20140615_011412_inLine +BABEL_OP3_305_13664_20140615_011412_outLine +BABEL_OP3_305_14158_20150207_011013_inLine +BABEL_OP3_305_14158_20150207_011013_outLine +BABEL_OP3_305_14814_20140621_005436_inLine +BABEL_OP3_305_14814_20140621_005436_outLine +BABEL_OP3_305_14814_20140621_011333_inLine +BABEL_OP3_305_14814_20140621_011333_outLine +BABEL_OP3_305_15617_20150318_012704_inLine +BABEL_OP3_305_15617_20150318_012704_outLine +BABEL_OP3_305_15638_20150212_081118_inLine +BABEL_OP3_305_15638_20150212_081118_outLine +BABEL_OP3_305_15926_20150211_090843_inLine +BABEL_OP3_305_15926_20150211_090843_outLine +BABEL_OP3_305_16249_20140615_022748_inLine +BABEL_OP3_305_16249_20140615_022748_outLine +BABEL_OP3_305_16886_20141016_232346_inLine +BABEL_OP3_305_16886_20141016_232346_outLine +BABEL_OP3_305_17032_20150213_094305_inLine +BABEL_OP3_305_17032_20150213_094305_outLine +BABEL_OP3_305_17032_20150213_095552_inLine +BABEL_OP3_305_17032_20150213_095552_outLine +BABEL_OP3_305_17115_20150310_055940_inLine +BABEL_OP3_305_17115_20150310_055940_outLine +BABEL_OP3_305_17511_20150118_093132_inLine +BABEL_OP3_305_17511_20150118_093132_outLine +BABEL_OP3_305_17511_20150118_094117_inLine +BABEL_OP3_305_17511_20150118_094117_outLine +BABEL_OP3_305_17881_20150220_094906_inLine +BABEL_OP3_305_17881_20150220_094906_outLine +BABEL_OP3_305_17890_20150212_094355_inLine +BABEL_OP3_305_17890_20150212_094355_outLine +BABEL_OP3_305_17923_20140801_003933_inLine +BABEL_OP3_305_17923_20140801_003933_outLine +BABEL_OP3_305_18037_20140611_044623_inLine +BABEL_OP3_305_18037_20140611_044623_outLine +BABEL_OP3_305_18566_20150219_072100_inLine +BABEL_OP3_305_18566_20150219_072100_outLine +BABEL_OP3_305_19101_20150123_042130_inLine +BABEL_OP3_305_19101_20150123_042130_outLine +BABEL_OP3_305_19101_20150123_043206_inLine +BABEL_OP3_305_19101_20150123_043206_outLine +BABEL_OP3_305_19621_20150122_072624_inLine +BABEL_OP3_305_19621_20150122_072624_outLine +BABEL_OP3_305_20454_20140619_022112_inLine +BABEL_OP3_305_20454_20140619_022112_outLine +BABEL_OP3_305_20922_20141107_000604_inLine +BABEL_OP3_305_20922_20141107_000604_outLine +BABEL_OP3_305_21543_20141018_050405_inLine +BABEL_OP3_305_21543_20141018_050405_outLine +BABEL_OP3_305_21581_20140822_033738_inLine +BABEL_OP3_305_21581_20140822_033738_outLine +BABEL_OP3_305_21794_20141021_022208_inLine +BABEL_OP3_305_21794_20141021_022208_outLine +BABEL_OP3_305_22624_20150215_050752_inLine +BABEL_OP3_305_22624_20150215_050752_outLine +BABEL_OP3_305_22624_20150215_051632_inLine +BABEL_OP3_305_22624_20150215_051632_outLine +BABEL_OP3_305_23190_20140907_002648_inLine +BABEL_OP3_305_23190_20140907_002648_outLine +BABEL_OP3_305_23195_20150328_071332_inLine +BABEL_OP3_305_23195_20150328_071332_outLine +BABEL_OP3_305_23752_20150319_043326_inLine +BABEL_OP3_305_23752_20150319_043326_outLine +BABEL_OP3_305_24323_20141021_014706_inLine +BABEL_OP3_305_24323_20141021_014706_outLine +BABEL_OP3_305_24586_20150227_100127_inLine +BABEL_OP3_305_24586_20150227_100127_outLine +BABEL_OP3_305_24589_20140822_030512_inLine +BABEL_OP3_305_24589_20140822_030512_outLine +BABEL_OP3_305_24924_20150306_061542_inLine +BABEL_OP3_305_24924_20150306_061542_outLine +BABEL_OP3_305_25220_20150311_004737_inLine +BABEL_OP3_305_25220_20150311_004737_outLine +BABEL_OP3_305_25412_20150123_041255_inLine +BABEL_OP3_305_25412_20150123_041255_outLine +BABEL_OP3_305_27042_20150124_044459_inLine +BABEL_OP3_305_27042_20150124_044459_outLine +BABEL_OP3_305_27125_20140618_065021_inLine +BABEL_OP3_305_27125_20140618_065021_outLine +BABEL_OP3_305_28303_20140806_030759_inLine +BABEL_OP3_305_28303_20140806_030759_outLine +BABEL_OP3_305_28477_20141107_050727_inLine +BABEL_OP3_305_28477_20141107_050727_outLine +BABEL_OP3_305_28606_20150213_101119_inLine +BABEL_OP3_305_28606_20150213_101119_outLine +BABEL_OP3_305_29072_20150212_084053_inLine +BABEL_OP3_305_29072_20150212_084053_outLine +BABEL_OP3_305_30180_20140906_231005_inLine +BABEL_OP3_305_30180_20140906_231005_outLine +BABEL_OP3_305_30280_20150310_080905_inLine +BABEL_OP3_305_30280_20150310_080905_outLine +BABEL_OP3_305_30869_20141030_043630_inLine +BABEL_OP3_305_30869_20141030_043630_outLine +BABEL_OP3_305_31668_20150313_021804_inLine +BABEL_OP3_305_31668_20150313_021804_outLine +BABEL_OP3_305_32708_20140822_052506_inLine +BABEL_OP3_305_32708_20140822_052506_outLine +BABEL_OP3_305_33111_20150228_023906_inLine +BABEL_OP3_305_33111_20150228_023906_outLine +BABEL_OP3_305_33149_20141021_034616_inLine +BABEL_OP3_305_33149_20141021_034616_outLine +BABEL_OP3_305_33251_20150130_021517_inLine +BABEL_OP3_305_33251_20150130_021517_outLine +BABEL_OP3_305_33355_20140619_231328_inLine +BABEL_OP3_305_33355_20140619_231328_outLine +BABEL_OP3_305_33635_20141021_015047_inLine +BABEL_OP3_305_33635_20141021_015047_outLine +BABEL_OP3_305_34145_20150211_103633_inLine +BABEL_OP3_305_34145_20150211_103633_outLine +BABEL_OP3_305_34410_20150319_085843_inLine +BABEL_OP3_305_34410_20150319_085843_outLine +BABEL_OP3_305_34629_20150327_010455_inLine +BABEL_OP3_305_34629_20150327_010455_outLine +BABEL_OP3_305_35008_20150214_095953_inLine +BABEL_OP3_305_35008_20150214_095953_outLine +BABEL_OP3_305_35467_20140806_032442_inLine +BABEL_OP3_305_35467_20140806_032442_outLine +BABEL_OP3_305_35609_20150310_091253_inLine +BABEL_OP3_305_35609_20150310_091253_outLine +BABEL_OP3_305_35706_20150221_093541_inLine +BABEL_OP3_305_35706_20150221_093541_outLine +BABEL_OP3_305_36147_20140612_063038_inLine +BABEL_OP3_305_36147_20140612_063038_outLine +BABEL_OP3_305_37007_20150331_081658_inLine +BABEL_OP3_305_37007_20150331_081658_outLine +BABEL_OP3_305_37285_20150213_015416_inLine +BABEL_OP3_305_37285_20150213_015416_outLine +BABEL_OP3_305_38664_20140807_042817_inLine +BABEL_OP3_305_38664_20140807_042817_outLine +BABEL_OP3_305_38741_20140906_040000_inLine +BABEL_OP3_305_38741_20140906_040000_outLine +BABEL_OP3_305_39638_20150328_073733_inLine +BABEL_OP3_305_39638_20150328_073733_outLine +BABEL_OP3_305_39920_20150301_070243_inLine +BABEL_OP3_305_39920_20150301_070243_outLine +BABEL_OP3_305_40092_20140611_040031_inLine +BABEL_OP3_305_40092_20140611_040031_outLine +BABEL_OP3_305_40565_20150210_092106_inLine +BABEL_OP3_305_40565_20150210_092106_outLine +BABEL_OP3_305_41334_20150305_082911_inLine +BABEL_OP3_305_41334_20150305_082911_outLine +BABEL_OP3_305_42231_20150217_080721_inLine +BABEL_OP3_305_42231_20150217_080721_outLine +BABEL_OP3_305_42434_20140822_053733_inLine +BABEL_OP3_305_42434_20140822_053733_outLine +BABEL_OP3_305_42497_20140823_034443_inLine +BABEL_OP3_305_42497_20140823_034443_outLine +BABEL_OP3_305_43368_20140822_071919_inLine +BABEL_OP3_305_43368_20140822_071919_outLine +BABEL_OP3_305_43789_20141017_015101_inLine +BABEL_OP3_305_43789_20141017_015101_outLine +BABEL_OP3_305_43990_20150312_102420_inLine +BABEL_OP3_305_43990_20150312_102420_outLine +BABEL_OP3_305_44868_20150206_083108_inLine +BABEL_OP3_305_44868_20150206_083108_outLine +BABEL_OP3_305_44961_20140619_013154_inLine +BABEL_OP3_305_44961_20140619_013154_outLine +BABEL_OP3_305_45486_20150331_070439_inLine +BABEL_OP3_305_45486_20150331_070439_outLine +BABEL_OP3_305_46558_20140905_012017_inLine +BABEL_OP3_305_46558_20140905_012017_outLine +BABEL_OP3_305_46558_20140905_013000_inLine +BABEL_OP3_305_46558_20140905_013000_outLine +BABEL_OP3_305_46589_20150207_091824_inLine +BABEL_OP3_305_46589_20150207_091824_outLine +BABEL_OP3_305_46681_20140729_053142_inLine +BABEL_OP3_305_46681_20140729_053142_outLine +BABEL_OP3_305_46688_20140620_060408_inLine +BABEL_OP3_305_46688_20140620_060408_outLine +BABEL_OP3_305_46702_20140619_050719_inLine +BABEL_OP3_305_46702_20140619_050719_outLine +BABEL_OP3_305_46757_20150211_011836_inLine +BABEL_OP3_305_46757_20150211_011836_outLine +BABEL_OP3_305_46757_20150211_013224_inLine +BABEL_OP3_305_46757_20150211_013224_outLine +BABEL_OP3_305_47823_20150214_081513_inLine +BABEL_OP3_305_47823_20150214_081513_outLine +BABEL_OP3_305_48844_20140621_034908_inLine +BABEL_OP3_305_48844_20140621_034908_outLine +BABEL_OP3_305_48844_20140621_035628_inLine +BABEL_OP3_305_48844_20140621_035628_outLine +BABEL_OP3_305_49641_20140613_041400_inLine +BABEL_OP3_305_49641_20140613_041400_outLine +BABEL_OP3_305_49768_20140731_031152_inLine +BABEL_OP3_305_49768_20140731_031152_outLine +BABEL_OP3_305_49902_20140809_050813_inLine +BABEL_OP3_305_49902_20140809_050813_outLine +BABEL_OP3_305_50186_20140619_044546_inLine +BABEL_OP3_305_50186_20140619_044546_outLine +BABEL_OP3_305_50186_20140619_045904_inLine +BABEL_OP3_305_50186_20140619_045904_outLine +BABEL_OP3_305_50565_20140612_072129_inLine +BABEL_OP3_305_50565_20140612_072129_outLine +BABEL_OP3_305_50745_20150219_082842_inLine +BABEL_OP3_305_50745_20150219_082842_outLine +BABEL_OP3_305_51015_20150211_235649_inLine +BABEL_OP3_305_51015_20150211_235649_outLine +BABEL_OP3_305_51611_20140619_070031_inLine +BABEL_OP3_305_51611_20140619_070031_outLine +BABEL_OP3_305_51611_20140619_071006_inLine +BABEL_OP3_305_51611_20140619_071006_outLine +BABEL_OP3_305_51819_20150210_085538_inLine +BABEL_OP3_305_51819_20150210_085538_outLine +BABEL_OP3_305_52404_20150208_070706_inLine +BABEL_OP3_305_52404_20150208_070706_outLine +BABEL_OP3_305_52725_20150227_111722_inLine +BABEL_OP3_305_52725_20150227_111722_outLine +BABEL_OP3_305_52818_20150206_104316_inLine +BABEL_OP3_305_52818_20150206_104316_outLine +BABEL_OP3_305_52854_20140620_010725_inLine +BABEL_OP3_305_52854_20140620_010725_outLine +BABEL_OP3_305_53144_20150220_084533_inLine +BABEL_OP3_305_53144_20150220_084533_outLine +BABEL_OP3_305_54594_20150114_073509_inLine +BABEL_OP3_305_54594_20150114_073509_outLine +BABEL_OP3_305_55042_20140614_022059_inLine +BABEL_OP3_305_55042_20140614_022059_outLine +BABEL_OP3_305_55106_20150221_080452_inLine +BABEL_OP3_305_55106_20150221_080452_outLine +BABEL_OP3_305_55815_20140612_000452_inLine +BABEL_OP3_305_55815_20140612_000452_outLine +BABEL_OP3_305_55818_20140620_003329_inLine +BABEL_OP3_305_55818_20140620_003329_outLine +BABEL_OP3_305_56198_20140904_224843_inLine +BABEL_OP3_305_56198_20140904_224843_outLine +BABEL_OP3_305_57116_20140618_021028_inLine +BABEL_OP3_305_57116_20140618_021028_outLine +BABEL_OP3_305_57609_20150127_040742_inLine +BABEL_OP3_305_57609_20150127_040742_outLine +BABEL_OP3_305_57654_20140917_034820_inLine +BABEL_OP3_305_57654_20140917_034820_outLine +BABEL_OP3_305_57935_20150203_072757_inLine +BABEL_OP3_305_57935_20150203_072757_outLine +BABEL_OP3_305_58061_20150326_103607_inLine +BABEL_OP3_305_58061_20150326_103607_outLine +BABEL_OP3_305_58734_20140620_003259_inLine +BABEL_OP3_305_58734_20140620_003259_outLine +BABEL_OP3_305_59549_20140620_001253_inLine +BABEL_OP3_305_59549_20140620_001253_outLine +BABEL_OP3_305_59720_20140807_043323_inLine +BABEL_OP3_305_59720_20140807_043323_outLine +BABEL_OP3_305_60115_20150211_025109_inLine +BABEL_OP3_305_60115_20150211_025109_outLine +BABEL_OP3_305_60282_20140612_025229_inLine +BABEL_OP3_305_60282_20140612_025229_outLine +BABEL_OP3_305_60477_20150304_092057_inLine +BABEL_OP3_305_60477_20150304_092057_outLine +BABEL_OP3_305_60626_20141018_012739_inLine +BABEL_OP3_305_60626_20141018_012739_outLine +BABEL_OP3_305_60650_20150331_055502_inLine +BABEL_OP3_305_60650_20150331_055502_outLine +BABEL_OP3_305_60830_20141017_004525_inLine +BABEL_OP3_305_60830_20141017_004525_outLine +BABEL_OP3_305_60830_20141017_053807_inLine +BABEL_OP3_305_60830_20141017_053807_outLine +BABEL_OP3_305_61348_20141017_014818_inLine +BABEL_OP3_305_61348_20141017_014818_outLine +BABEL_OP3_305_61348_20141017_060653_inLine +BABEL_OP3_305_61348_20141017_060653_outLine +BABEL_OP3_305_61873_20150123_024415_inLine +BABEL_OP3_305_61873_20150123_024415_outLine +BABEL_OP3_305_62158_20150313_013514_inLine +BABEL_OP3_305_62158_20150313_013514_outLine +BABEL_OP3_305_62200_20141017_014602_inLine +BABEL_OP3_305_62200_20141017_014602_outLine +BABEL_OP3_305_62471_20140619_072350_inLine +BABEL_OP3_305_62471_20140619_072350_outLine +BABEL_OP3_305_62734_20140821_221916_inLine +BABEL_OP3_305_62734_20140821_221916_outLine +BABEL_OP3_305_62852_20140618_072924_inLine +BABEL_OP3_305_62852_20140618_072924_outLine +BABEL_OP3_305_63084_20150207_074116_inLine +BABEL_OP3_305_63084_20150207_074116_outLine +BABEL_OP3_305_64469_20140620_063122_inLine +BABEL_OP3_305_64469_20140620_063122_outLine +BABEL_OP3_305_64768_20140822_043008_inLine +BABEL_OP3_305_64768_20140822_043008_outLine +BABEL_OP3_305_64902_20150220_102326_inLine +BABEL_OP3_305_64902_20150220_102326_outLine +BABEL_OP3_305_65466_20150222_074001_inLine +BABEL_OP3_305_65466_20150222_074001_outLine +BABEL_OP3_305_65477_20141016_234600_inLine +BABEL_OP3_305_65477_20141016_234600_outLine +BABEL_OP3_305_65477_20141016_235812_inLine +BABEL_OP3_305_65477_20141016_235812_outLine +BABEL_OP3_305_65692_20150127_044937_inLine +BABEL_OP3_305_65692_20150127_044937_outLine +BABEL_OP3_305_66045_20140822_062953_inLine +BABEL_OP3_305_66045_20140822_062953_outLine +BABEL_OP3_305_66177_20150221_091456_inLine +BABEL_OP3_305_66177_20150221_091456_outLine +BABEL_OP3_305_66967_20140618_044613_inLine +BABEL_OP3_305_66967_20140618_044613_outLine +BABEL_OP3_305_66975_20140615_024703_inLine +BABEL_OP3_305_66975_20140615_024703_outLine +BABEL_OP3_305_67053_20150312_031258_inLine +BABEL_OP3_305_67053_20150312_031258_outLine +BABEL_OP3_305_67283_20140618_075016_inLine +BABEL_OP3_305_67283_20140618_075016_outLine +BABEL_OP3_305_67373_20140822_005349_inLine +BABEL_OP3_305_67373_20140822_005349_outLine +BABEL_OP3_305_67389_20150317_083510_inLine +BABEL_OP3_305_67389_20150317_083510_outLine +BABEL_OP3_305_67842_20140906_014501_inLine +BABEL_OP3_305_67842_20140906_014501_outLine +BABEL_OP3_305_68068_20150206_100103_inLine +BABEL_OP3_305_68068_20150206_100103_outLine +BABEL_OP3_305_68244_20150208_045135_inLine +BABEL_OP3_305_68244_20150208_045135_outLine +BABEL_OP3_305_68668_20140614_053023_inLine +BABEL_OP3_305_68668_20140614_053023_outLine +BABEL_OP3_305_69090_20141018_010121_inLine +BABEL_OP3_305_69090_20141018_010121_outLine +BABEL_OP3_305_69574_20140618_231512_inLine +BABEL_OP3_305_69574_20140618_231512_outLine +BABEL_OP3_305_70251_20140618_233739_inLine +BABEL_OP3_305_70251_20140618_233739_outLine +BABEL_OP3_305_70282_20150127_012555_inLine +BABEL_OP3_305_70282_20150127_012555_outLine +BABEL_OP3_305_70794_20140614_073231_inLine +BABEL_OP3_305_70794_20140614_073231_outLine +BABEL_OP3_305_70986_20150320_092518_inLine +BABEL_OP3_305_70986_20150320_092518_outLine +BABEL_OP3_305_71189_20150227_092723_inLine +BABEL_OP3_305_71189_20150227_092723_outLine +BABEL_OP3_305_71278_20140614_040622_inLine +BABEL_OP3_305_71278_20140614_040622_outLine +BABEL_OP3_305_71282_20141028_054244_inLine +BABEL_OP3_305_71282_20141028_054244_outLine +BABEL_OP3_305_71333_20140808_025232_inLine +BABEL_OP3_305_71333_20140808_025232_outLine +BABEL_OP3_305_71566_20150217_074338_inLine +BABEL_OP3_305_71566_20150217_074338_outLine +BABEL_OP3_305_72110_20150214_074424_inLine +BABEL_OP3_305_72110_20150214_074424_outLine +BABEL_OP3_305_72903_20140612_021516_inLine +BABEL_OP3_305_72903_20140612_021516_outLine +BABEL_OP3_305_73301_20140808_235747_inLine +BABEL_OP3_305_73301_20140808_235747_outLine +BABEL_OP3_305_74667_20140904_050532_inLine +BABEL_OP3_305_74667_20140904_050532_outLine +BABEL_OP3_305_74763_20140612_011204_inLine +BABEL_OP3_305_74763_20140612_011204_outLine +BABEL_OP3_305_74921_20150208_081422_inLine +BABEL_OP3_305_74921_20150208_081422_outLine +BABEL_OP3_305_75064_20140621_012128_inLine +BABEL_OP3_305_75064_20140621_012128_outLine +BABEL_OP3_305_75223_20140618_232223_inLine +BABEL_OP3_305_75223_20140618_232223_outLine +BABEL_OP3_305_76126_20150122_051345_inLine +BABEL_OP3_305_76126_20150122_051345_outLine +BABEL_OP3_305_76437_20140615_023448_inLine +BABEL_OP3_305_76437_20140615_023448_outLine +BABEL_OP3_305_77225_20140612_003002_inLine +BABEL_OP3_305_77225_20140612_003002_outLine +BABEL_OP3_305_77744_20140821_052246_inLine +BABEL_OP3_305_77744_20140821_052246_outLine +BABEL_OP3_305_77909_20140613_035103_inLine +BABEL_OP3_305_77909_20140613_035103_outLine +BABEL_OP3_305_78016_20140821_222210_inLine +BABEL_OP3_305_78016_20140821_222210_outLine +BABEL_OP3_305_79028_20140621_005114_inLine +BABEL_OP3_305_79028_20140621_005114_outLine +BABEL_OP3_305_79167_20150208_063508_inLine +BABEL_OP3_305_79167_20150208_063508_outLine +BABEL_OP3_305_79571_20150211_095226_inLine +BABEL_OP3_305_79571_20150211_095226_outLine +BABEL_OP3_305_79751_20140821_233858_inLine +BABEL_OP3_305_79751_20140821_233858_outLine +BABEL_OP3_305_80241_20140612_015921_inLine +BABEL_OP3_305_80241_20140612_015921_outLine +BABEL_OP3_305_80577_20150221_073930_inLine +BABEL_OP3_305_80577_20150221_073930_outLine +BABEL_OP3_305_81213_20140822_005322_inLine +BABEL_OP3_305_81213_20140822_005322_outLine +BABEL_OP3_305_82622_20140619_013825_inLine +BABEL_OP3_305_82622_20140619_013825_outLine +BABEL_OP3_305_82626_20150307_100633_inLine +BABEL_OP3_305_82626_20150307_100633_outLine +BABEL_OP3_305_83436_20140619_060309_inLine +BABEL_OP3_305_83436_20140619_060309_outLine +BABEL_OP3_305_83935_20150213_091523_inLine +BABEL_OP3_305_83935_20150213_091523_outLine +BABEL_OP3_305_84055_20150221_083133_inLine +BABEL_OP3_305_84055_20150221_083133_outLine +BABEL_OP3_305_84061_20140807_063818_inLine +BABEL_OP3_305_84061_20140807_063818_outLine +BABEL_OP3_305_84079_20140613_053813_inLine +BABEL_OP3_305_84079_20140613_053813_outLine +BABEL_OP3_305_84177_20141021_041721_inLine +BABEL_OP3_305_84177_20141021_041721_outLine +BABEL_OP3_305_84177_20141021_043314_inLine +BABEL_OP3_305_84177_20141021_043314_outLine +BABEL_OP3_305_84327_20150213_084928_inLine +BABEL_OP3_305_84327_20150213_084928_outLine +BABEL_OP3_305_84605_20140903_033325_inLine +BABEL_OP3_305_84605_20140903_033325_outLine +BABEL_OP3_305_84605_20140903_034415_inLine +BABEL_OP3_305_84605_20140903_034415_outLine +BABEL_OP3_305_84823_20150122_033347_inLine +BABEL_OP3_305_84823_20150122_033347_outLine +BABEL_OP3_305_84838_20141022_010706_inLine +BABEL_OP3_305_84838_20141022_010706_outLine +BABEL_OP3_305_85047_20150122_030457_inLine +BABEL_OP3_305_85047_20150122_030457_outLine +BABEL_OP3_305_85254_20150303_094409_inLine +BABEL_OP3_305_85254_20150303_094409_outLine +BABEL_OP3_305_85260_20140611_042042_inLine +BABEL_OP3_305_85260_20140611_042042_outLine +BABEL_OP3_305_85322_20140822_031748_inLine +BABEL_OP3_305_85322_20140822_031748_outLine +BABEL_OP3_305_85340_20140826_232921_inLine +BABEL_OP3_305_85340_20140826_232921_outLine +BABEL_OP3_305_85519_20141022_033125_inLine +BABEL_OP3_305_85519_20141022_033125_outLine +BABEL_OP3_305_86100_20140612_024853_inLine +BABEL_OP3_305_86100_20140612_024853_outLine +BABEL_OP3_305_86321_20150218_084806_inLine +BABEL_OP3_305_86321_20150218_084806_outLine +BABEL_OP3_305_86433_20150211_094926_inLine +BABEL_OP3_305_86433_20150211_094926_outLine +BABEL_OP3_305_86597_20150305_020834_inLine +BABEL_OP3_305_86597_20150305_020834_outLine +BABEL_OP3_305_86830_20141028_051738_inLine +BABEL_OP3_305_86830_20141028_051738_outLine +BABEL_OP3_305_86878_20150115_115301_inLine +BABEL_OP3_305_86878_20150115_115301_outLine +BABEL_OP3_305_87179_20141021_010357_inLine +BABEL_OP3_305_87179_20141021_010357_outLine +BABEL_OP3_305_88669_20150226_082958_inLine +BABEL_OP3_305_88669_20150226_082958_outLine +BABEL_OP3_305_89330_20150305_075359_inLine +BABEL_OP3_305_89330_20150305_075359_outLine +BABEL_OP3_305_89372_20140806_054633_inLine +BABEL_OP3_305_89372_20140806_054633_outLine +BABEL_OP3_305_89650_20140606_064449_inLine +BABEL_OP3_305_89650_20140606_064449_outLine +BABEL_OP3_305_90572_20150221_011508_inLine +BABEL_OP3_305_90572_20150221_011508_outLine +BABEL_OP3_305_90739_20140910_010202_inLine +BABEL_OP3_305_90739_20140910_010202_outLine +BABEL_OP3_305_90739_20140910_011127_inLine +BABEL_OP3_305_90739_20140910_011127_outLine +BABEL_OP3_305_90777_20140910_031558_inLine +BABEL_OP3_305_90777_20140910_031558_outLine +BABEL_OP3_305_90930_20140612_073132_inLine +BABEL_OP3_305_90930_20140612_073132_outLine +BABEL_OP3_305_91252_20140612_013640_inLine +BABEL_OP3_305_91252_20140612_013640_outLine +BABEL_OP3_305_91463_20150204_001924_inLine +BABEL_OP3_305_91463_20150204_001924_outLine +BABEL_OP3_305_91977_20150210_012536_inLine +BABEL_OP3_305_91977_20150210_012536_outLine +BABEL_OP3_305_92281_20150312_104117_inLine +BABEL_OP3_305_92281_20150312_104117_outLine +BABEL_OP3_305_92509_20140620_020408_inLine +BABEL_OP3_305_92509_20140620_020408_outLine +BABEL_OP3_305_92605_20150312_090817_inLine +BABEL_OP3_305_92605_20150312_090817_outLine +BABEL_OP3_305_92740_20150210_020753_inLine +BABEL_OP3_305_92740_20150210_020753_outLine +BABEL_OP3_305_92941_20140911_002247_inLine +BABEL_OP3_305_92941_20140911_002247_outLine +BABEL_OP3_305_93320_20150305_072620_inLine +BABEL_OP3_305_93320_20150305_072620_outLine +BABEL_OP3_305_93681_20141018_044334_inLine +BABEL_OP3_305_93681_20141018_044334_outLine +BABEL_OP3_305_93861_20150123_004904_inLine +BABEL_OP3_305_93861_20150123_004904_outLine +BABEL_OP3_305_94253_20140902_015125_inLine +BABEL_OP3_305_94253_20140902_015125_outLine +BABEL_OP3_305_94587_20150214_091538_inLine +BABEL_OP3_305_94587_20150214_091538_outLine +BABEL_OP3_305_94713_20150114_082431_inLine +BABEL_OP3_305_94713_20150114_082431_outLine +BABEL_OP3_305_95269_20140912_000910_inLine +BABEL_OP3_305_95269_20140912_000910_outLine +BABEL_OP3_305_95598_20140615_025323_inLine +BABEL_OP3_305_95598_20140615_025323_outLine +BABEL_OP3_305_95903_20150222_060503_inLine +BABEL_OP3_305_95903_20150222_060503_outLine +BABEL_OP3_305_95942_20150227_105233_inLine +BABEL_OP3_305_95942_20150227_105233_outLine +BABEL_OP3_305_96041_20140611_065313_inLine +BABEL_OP3_305_96041_20140611_065313_outLine +BABEL_OP3_305_96504_20140808_230422_inLine +BABEL_OP3_305_96504_20140808_230422_outLine +BABEL_OP3_305_96504_20140808_231336_inLine +BABEL_OP3_305_96504_20140808_231336_outLine +BABEL_OP3_305_97220_20150303_234352_inLine +BABEL_OP3_305_97220_20150303_234352_outLine +BABEL_OP3_305_97772_20140618_074519_inLine +BABEL_OP3_305_97772_20140618_074519_outLine +BABEL_OP3_305_97896_20140904_071346_inLine +BABEL_OP3_305_97896_20140904_071346_outLine +BABEL_OP3_305_98192_20150306_053152_inLine +BABEL_OP3_305_98192_20150306_053152_outLine +BABEL_OP3_305_98255_20150115_095803_inLine +BABEL_OP3_305_98255_20150115_095803_outLine +BABEL_OP3_305_98255_20150115_101856_inLine +BABEL_OP3_305_98255_20150115_101856_outLine +BABEL_OP3_305_98909_20140730_054930_inLine +BABEL_OP3_305_98909_20140730_054930_outLine +BABEL_OP3_305_98909_20140730_055859_inLine +BABEL_OP3_305_98909_20140730_055859_outLine +BABEL_OP3_305_99289_20150227_102036_inLine +BABEL_OP3_305_99289_20150227_102036_outLine +BABEL_OP3_305_99516_20140620_054149_inLine +BABEL_OP3_305_99516_20140620_054149_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/untranscribed-training.list b/egs/babel/s5d/conf/lists/305-guarani/untranscribed-training.list new file mode 100644 index 00000000000..3b4e995995f --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/untranscribed-training.list @@ -0,0 +1,525 @@ +BABEL_OP3_305_10002_20150327_045715_inLine +BABEL_OP3_305_10002_20150327_045715_outLine +BABEL_OP3_305_12846_20150711_092831_inLine +BABEL_OP3_305_12846_20150711_092831_outLine +BABEL_OP3_305_13561_20150122_000259_inLine +BABEL_OP3_305_13561_20150122_000259_outLine +BABEL_OP3_305_13792_20140619_010014_inLine +BABEL_OP3_305_13792_20140619_010014_outLine +BABEL_OP3_305_13909_20150709_071634_inLine +BABEL_OP3_305_13909_20150709_071634_outLine +BABEL_OP3_305_13929_20150429_060818_inLine +BABEL_OP3_305_13929_20150429_060818_outLine +BABEL_OP3_305_14179_20150211_104346_inLine +BABEL_OP3_305_14537_20150507_004514_inLine +BABEL_OP3_305_14537_20150507_004514_outLine +BABEL_OP3_305_14560_20150208_054722_inLine +BABEL_OP3_305_14575_20150501_043914_inLine +BABEL_OP3_305_14575_20150501_043914_outLine +BABEL_OP3_305_14807_20150124_062928_inLine +BABEL_OP3_305_14807_20150124_062928_outLine +BABEL_OP3_305_14875_20140808_063210_inLine +BABEL_OP3_305_14972_20150123_045130_inLine +BABEL_OP3_305_14972_20150123_045130_outLine +BABEL_OP3_305_15324_20150226_034700_inLine +BABEL_OP3_305_15324_20150226_034700_outLine +BABEL_OP3_305_15382_20150211_004401_inLine +BABEL_OP3_305_15382_20150211_004401_outLine +BABEL_OP3_305_15466_20150319_020617_inLine +BABEL_OP3_305_15466_20150319_020617_outLine +BABEL_OP3_305_15702_20150207_022910_inLine +BABEL_OP3_305_15869_20140613_063410_inLine +BABEL_OP3_305_15869_20140613_063410_outLine +BABEL_OP3_305_15985_20150712_053914_inLine +BABEL_OP3_305_15985_20150712_053914_outLine +BABEL_OP3_305_16056_20140618_060252_inLine +BABEL_OP3_305_16056_20140618_060252_outLine +BABEL_OP3_305_16802_20140613_064802_outLine +BABEL_OP3_305_16838_20150428_014210_inLine +BABEL_OP3_305_16838_20150428_014210_outLine +BABEL_OP3_305_16938_20150127_074437_inLine +BABEL_OP3_305_16938_20150127_074437_outLine +BABEL_OP3_305_17472_20150226_001559_inLine +BABEL_OP3_305_17472_20150226_001559_outLine +BABEL_OP3_305_17520_20150123_072609_inLine +BABEL_OP3_305_17520_20150123_072609_outLine +BABEL_OP3_305_17573_20150225_054303_inLine +BABEL_OP3_305_17573_20150225_054303_outLine +BABEL_OP3_305_17751_20150709_064430_inLine +BABEL_OP3_305_17751_20150709_064430_outLine +BABEL_OP3_305_19545_20150224_095516_inLine +BABEL_OP3_305_19545_20150224_095516_outLine +BABEL_OP3_305_19589_20150605_040559_inLine +BABEL_OP3_305_19589_20150605_040559_outLine +BABEL_OP3_305_19722_20140620_011143_inLine +BABEL_OP3_305_19722_20140620_011143_outLine +BABEL_OP3_305_19722_20140620_012427_inLine +BABEL_OP3_305_19722_20140620_012427_outLine +BABEL_OP3_305_20738_20150303_004715_inLine +BABEL_OP3_305_20738_20150303_004715_outLine +BABEL_OP3_305_21029_20140823_005012_outLine +BABEL_OP3_305_21426_20150317_013855_inLine +BABEL_OP3_305_21426_20150317_013855_outLine +BABEL_OP3_305_22170_20150219_024431_inLine +BABEL_OP3_305_22170_20150219_024431_outLine +BABEL_OP3_305_23395_20150124_020906_inLine +BABEL_OP3_305_23395_20150124_020906_outLine +BABEL_OP3_305_24037_20150408_020032_inLine +BABEL_OP3_305_24037_20150408_020032_outLine +BABEL_OP3_305_24270_20150127_065231_inLine +BABEL_OP3_305_24270_20150127_065231_outLine +BABEL_OP3_305_24648_20150720_024919_inLine +BABEL_OP3_305_24648_20150720_024919_outLine +BABEL_OP3_305_25698_20150713_041848_inLine +BABEL_OP3_305_25698_20150713_041848_outLine +BABEL_OP3_305_26074_20150123_054227_inLine +BABEL_OP3_305_26074_20150123_054227_outLine +BABEL_OP3_305_26507_20150228_085010_inLine +BABEL_OP3_305_26507_20150228_085010_outLine +BABEL_OP3_305_26869_20140611_062738_inLine +BABEL_OP3_305_26869_20140611_062738_outLine +BABEL_OP3_305_26999_20150211_213027_outLine +BABEL_OP3_305_27203_20150203_021148_inLine +BABEL_OP3_305_27203_20150203_021148_outLine +BABEL_OP3_305_28522_20150210_024545_inLine +BABEL_OP3_305_28522_20150210_024545_outLine +BABEL_OP3_305_28595_20150311_092304_inLine +BABEL_OP3_305_28595_20150311_092304_outLine +BABEL_OP3_305_28644_20150501_021643_inLine +BABEL_OP3_305_28644_20150501_021643_outLine +BABEL_OP3_305_28814_20141028_061920_inLine +BABEL_OP3_305_28814_20141028_061920_outLine +BABEL_OP3_305_29039_20150225_033135_inLine +BABEL_OP3_305_29039_20150225_033135_outLine +BABEL_OP3_305_29135_20140620_020910_inLine +BABEL_OP3_305_29135_20140620_020910_outLine +BABEL_OP3_305_29643_20150712_020443_inLine +BABEL_OP3_305_29643_20150712_020443_outLine +BABEL_OP3_305_29911_20150425_022101_inLine +BABEL_OP3_305_29911_20150425_022101_outLine +BABEL_OP3_305_30084_20150711_110851_inLine +BABEL_OP3_305_30084_20150711_110851_outLine +BABEL_OP3_305_30253_20150226_074731_inLine +BABEL_OP3_305_30345_20150801_030841_inLine +BABEL_OP3_305_30345_20150801_030841_outLine +BABEL_OP3_305_30395_20140913_031713_inLine +BABEL_OP3_305_30395_20140913_031713_outLine +BABEL_OP3_305_30395_20140913_033401_inLine +BABEL_OP3_305_30395_20140913_033401_outLine +BABEL_OP3_305_31109_20150201_061030_inLine +BABEL_OP3_305_31131_20150318_083818_inLine +BABEL_OP3_305_31184_20141016_042343_inLine +BABEL_OP3_305_31184_20141016_042343_outLine +BABEL_OP3_305_31490_20140618_043106_inLine +BABEL_OP3_305_31490_20140618_043106_outLine +BABEL_OP3_305_32171_20150313_090240_inLine +BABEL_OP3_305_32171_20150313_090240_outLine +BABEL_OP3_305_32244_20150508_010834_inLine +BABEL_OP3_305_32244_20150508_010834_outLine +BABEL_OP3_305_32630_20150508_025319_inLine +BABEL_OP3_305_32630_20150508_025319_outLine +BABEL_OP3_305_32959_20150218_010038_inLine +BABEL_OP3_305_32959_20150218_010038_outLine +BABEL_OP3_305_32961_20150312_083747_inLine +BABEL_OP3_305_32961_20150312_083747_outLine +BABEL_OP3_305_33216_20150305_093049_outLine +BABEL_OP3_305_34482_20140612_002439_inLine +BABEL_OP3_305_34482_20140612_002439_outLine +BABEL_OP3_305_34688_20140620_051303_inLine +BABEL_OP3_305_34688_20140620_051303_outLine +BABEL_OP3_305_34899_20150708_044950_inLine +BABEL_OP3_305_34899_20150708_044950_outLine +BABEL_OP3_305_34903_20150513_000213_inLine +BABEL_OP3_305_34903_20150513_000213_outLine +BABEL_OP3_305_35838_20150505_025409_inLine +BABEL_OP3_305_35838_20150505_025409_outLine +BABEL_OP3_305_36642_20150529_004314_inLine +BABEL_OP3_305_36642_20150529_004314_outLine +BABEL_OP3_305_37229_20150711_062628_inLine +BABEL_OP3_305_37229_20150711_062628_outLine +BABEL_OP3_305_37776_20141021_051359_inLine +BABEL_OP3_305_37776_20141021_051359_outLine +BABEL_OP3_305_38554_20140618_050525_inLine +BABEL_OP3_305_38554_20140618_050525_outLine +BABEL_OP3_305_38689_20150215_061537_inLine +BABEL_OP3_305_38689_20150215_061537_outLine +BABEL_OP3_305_38750_20150512_033350_outLine +BABEL_OP3_305_38878_20150226_001924_inLine +BABEL_OP3_305_38878_20150226_001924_outLine +BABEL_OP3_305_38979_20150222_070549_inLine +BABEL_OP3_305_38979_20150222_071202_inLine +BABEL_OP3_305_39006_20150305_001413_inLine +BABEL_OP3_305_39006_20150305_001413_outLine +BABEL_OP3_305_40330_20140613_044545_inLine +BABEL_OP3_305_40330_20140613_044545_outLine +BABEL_OP3_305_40648_20150425_034647_inLine +BABEL_OP3_305_40648_20150425_034647_outLine +BABEL_OP3_305_41720_20150327_013143_inLine +BABEL_OP3_305_42029_20141107_005557_inLine +BABEL_OP3_305_42029_20141107_005557_outLine +BABEL_OP3_305_42126_20150428_014342_inLine +BABEL_OP3_305_42126_20150428_014342_outLine +BABEL_OP3_305_42126_20150428_021652_inLine +BABEL_OP3_305_42126_20150428_021652_outLine +BABEL_OP3_305_42619_20150211_044149_inLine +BABEL_OP3_305_42619_20150211_044149_outLine +BABEL_OP3_305_42834_20150212_100155_inLine +BABEL_OP3_305_42834_20150212_100155_outLine +BABEL_OP3_305_42848_20150711_053624_inLine +BABEL_OP3_305_42848_20150711_053624_outLine +BABEL_OP3_305_43157_20150313_015446_inLine +BABEL_OP3_305_43157_20150313_015446_outLine +BABEL_OP3_305_43285_20150210_022647_inLine +BABEL_OP3_305_43323_20150719_100142_inLine +BABEL_OP3_305_43323_20150719_100142_outLine +BABEL_OP3_305_43794_20150712_055921_inLine +BABEL_OP3_305_43794_20150712_055921_outLine +BABEL_OP3_305_44309_20150221_054810_inLine +BABEL_OP3_305_44309_20150221_054810_outLine +BABEL_OP3_305_44681_20150506_011354_inLine +BABEL_OP3_305_44681_20150506_011354_outLine +BABEL_OP3_305_45699_20140621_010650_inLine +BABEL_OP3_305_45699_20140621_010650_outLine +BABEL_OP3_305_45771_20150509_034615_inLine +BABEL_OP3_305_45771_20150509_034615_outLine +BABEL_OP3_305_46974_20150214_020116_inLine +BABEL_OP3_305_46974_20150214_020116_outLine +BABEL_OP3_305_47309_20150409_072623_inLine +BABEL_OP3_305_47309_20150409_072623_outLine +BABEL_OP3_305_47405_20140612_010358_inLine +BABEL_OP3_305_47405_20140612_010358_outLine +BABEL_OP3_305_47451_20150226_004537_inLine +BABEL_OP3_305_47451_20150226_004537_outLine +BABEL_OP3_305_47866_20150221_013305_inLine +BABEL_OP3_305_47866_20150221_013305_outLine +BABEL_OP3_305_47866_20150221_014014_inLine +BABEL_OP3_305_47866_20150221_014014_outLine +BABEL_OP3_305_48016_20150306_064336_inLine +BABEL_OP3_305_48016_20150306_064336_outLine +BABEL_OP3_305_48299_20150325_094035_inLine +BABEL_OP3_305_48299_20150325_094035_outLine +BABEL_OP3_305_49775_20140618_071800_inLine +BABEL_OP3_305_49775_20140618_071800_outLine +BABEL_OP3_305_49912_20150713_052104_inLine +BABEL_OP3_305_49912_20150713_052104_outLine +BABEL_OP3_305_49945_20150507_042152_inLine +BABEL_OP3_305_49945_20150507_042152_outLine +BABEL_OP3_305_50726_20140620_231413_inLine +BABEL_OP3_305_50726_20140620_231413_outLine +BABEL_OP3_305_50779_20150124_073920_inLine +BABEL_OP3_305_50779_20150124_073920_outLine +BABEL_OP3_305_51414_20150508_035339_inLine +BABEL_OP3_305_51414_20150508_035339_outLine +BABEL_OP3_305_52058_20150425_021345_inLine +BABEL_OP3_305_52058_20150425_021345_outLine +BABEL_OP3_305_52070_20150708_053057_inLine +BABEL_OP3_305_52070_20150708_053057_outLine +BABEL_OP3_305_53063_20150227_005949_inLine +BABEL_OP3_305_53063_20150227_005949_outLine +BABEL_OP3_305_54066_20150314_023944_inLine +BABEL_OP3_305_55136_20150720_024100_inLine +BABEL_OP3_305_55136_20150720_024100_outLine +BABEL_OP3_305_56057_20140614_044506_inLine +BABEL_OP3_305_56057_20140614_044506_outLine +BABEL_OP3_305_56326_20150422_010519_inLine +BABEL_OP3_305_56326_20150422_010519_outLine +BABEL_OP3_305_56345_20150327_043440_inLine +BABEL_OP3_305_56465_20150306_050918_inLine +BABEL_OP3_305_56465_20150306_050918_outLine +BABEL_OP3_305_56674_20150501_040501_inLine +BABEL_OP3_305_56674_20150501_040501_outLine +BABEL_OP3_305_56684_20150801_003245_inLine +BABEL_OP3_305_56684_20150801_003245_outLine +BABEL_OP3_305_56951_20150501_022425_inLine +BABEL_OP3_305_56951_20150501_022425_outLine +BABEL_OP3_305_57093_20150122_014446_inLine +BABEL_OP3_305_57093_20150122_021223_inLine +BABEL_OP3_305_57782_20150310_044823_inLine +BABEL_OP3_305_57782_20150310_044823_outLine +BABEL_OP3_305_58047_20150124_055910_inLine +BABEL_OP3_305_58047_20150124_055910_outLine +BABEL_OP3_305_58313_20150124_015438_inLine +BABEL_OP3_305_58313_20150124_015438_outLine +BABEL_OP3_305_58489_20150217_090604_inLine +BABEL_OP3_305_58850_20141017_005516_inLine +BABEL_OP3_305_58850_20141017_010823_inLine +BABEL_OP3_305_59028_20150709_062445_inLine +BABEL_OP3_305_59028_20150709_062445_outLine +BABEL_OP3_305_59028_20150712_043120_inLine +BABEL_OP3_305_59028_20150712_043120_outLine +BABEL_OP3_305_59078_20150127_073310_inLine +BABEL_OP3_305_59078_20150127_073310_outLine +BABEL_OP3_305_59509_20150206_012130_inLine +BABEL_OP3_305_59509_20150206_013211_inLine +BABEL_OP3_305_59747_20140620_004831_inLine +BABEL_OP3_305_59747_20140620_004831_outLine +BABEL_OP3_305_60307_20150310_094538_inLine +BABEL_OP3_305_60307_20150310_094538_outLine +BABEL_OP3_305_60436_20150529_012621_inLine +BABEL_OP3_305_60436_20150529_012621_outLine +BABEL_OP3_305_60458_20150508_023847_inLine +BABEL_OP3_305_60458_20150508_023847_outLine +BABEL_OP3_305_60498_20150508_031033_inLine +BABEL_OP3_305_60498_20150508_031033_outLine +BABEL_OP3_305_60508_20140822_014453_inLine +BABEL_OP3_305_60778_20150425_005047_inLine +BABEL_OP3_305_60778_20150425_005047_outLine +BABEL_OP3_305_60836_20140809_005847_inLine +BABEL_OP3_305_61219_20140730_063954_inLine +BABEL_OP3_305_61219_20140730_063954_outLine +BABEL_OP3_305_61225_20140620_001221_inLine +BABEL_OP3_305_61225_20140620_001221_outLine +BABEL_OP3_305_61438_20150423_020808_inLine +BABEL_OP3_305_61438_20150423_020808_outLine +BABEL_OP3_305_61731_20140621_035703_inLine +BABEL_OP3_305_61731_20140621_035703_outLine +BABEL_OP3_305_61731_20140621_041145_inLine +BABEL_OP3_305_61731_20140621_041145_outLine +BABEL_OP3_305_62323_20140612_010032_inLine +BABEL_OP3_305_62323_20140612_010032_outLine +BABEL_OP3_305_62362_20150712_082552_inLine +BABEL_OP3_305_62362_20150712_082552_outLine +BABEL_OP3_305_62430_20150219_045422_inLine +BABEL_OP3_305_62430_20150219_045422_outLine +BABEL_OP3_305_62545_20150424_004115_inLine +BABEL_OP3_305_62545_20150424_004115_outLine +BABEL_OP3_305_63094_20150712_100827_inLine +BABEL_OP3_305_63094_20150712_100827_outLine +BABEL_OP3_305_63265_20140611_234727_inLine +BABEL_OP3_305_63265_20140611_234727_outLine +BABEL_OP3_305_63265_20140611_235803_inLine +BABEL_OP3_305_63265_20140611_235803_outLine +BABEL_OP3_305_63307_20150122_063418_inLine +BABEL_OP3_305_63307_20150122_063418_outLine +BABEL_OP3_305_63307_20150122_065933_inLine +BABEL_OP3_305_63307_20150122_065933_outLine +BABEL_OP3_305_63309_20150319_003832_inLine +BABEL_OP3_305_63309_20150319_003832_outLine +BABEL_OP3_305_63336_20140614_051945_inLine +BABEL_OP3_305_63336_20140614_051945_outLine +BABEL_OP3_305_63490_20150408_025018_inLine +BABEL_OP3_305_63490_20150408_025018_outLine +BABEL_OP3_305_63490_20150408_025711_inLine +BABEL_OP3_305_63490_20150408_025711_outLine +BABEL_OP3_305_63730_20150305_010517_inLine +BABEL_OP3_305_63730_20150305_010517_outLine +BABEL_OP3_305_63906_20150221_045610_inLine +BABEL_OP3_305_64259_20150719_092713_inLine +BABEL_OP3_305_64259_20150719_092713_outLine +BABEL_OP3_305_65077_20140801_012944_inLine +BABEL_OP3_305_65077_20140801_012944_outLine +BABEL_OP3_305_65561_20150214_033031_inLine +BABEL_OP3_305_65561_20150214_033031_outLine +BABEL_OP3_305_65639_20150428_024614_inLine +BABEL_OP3_305_65639_20150428_024614_outLine +BABEL_OP3_305_66001_20140620_042612_inLine +BABEL_OP3_305_66001_20140620_042612_outLine +BABEL_OP3_305_66361_20150320_085921_inLine +BABEL_OP3_305_66361_20150320_085921_outLine +BABEL_OP3_305_66959_20150225_060511_inLine +BABEL_OP3_305_66959_20150225_060511_outLine +BABEL_OP3_305_66971_20150507_025406_inLine +BABEL_OP3_305_66971_20150507_025406_outLine +BABEL_OP3_305_67085_20150522_035734_inLine +BABEL_OP3_305_67085_20150522_035734_outLine +BABEL_OP3_305_68924_20150203_052345_inLine +BABEL_OP3_305_68924_20150203_052345_outLine +BABEL_OP3_305_69107_20150123_023939_inLine +BABEL_OP3_305_69107_20150123_023939_outLine +BABEL_OP3_305_69474_20150217_095752_inLine +BABEL_OP3_305_69474_20150217_095752_outLine +BABEL_OP3_305_70216_20150418_044143_inLine +BABEL_OP3_305_70216_20150418_044143_outLine +BABEL_OP3_305_70216_20150418_045222_inLine +BABEL_OP3_305_70216_20150418_045222_outLine +BABEL_OP3_305_70343_20150213_010739_inLine +BABEL_OP3_305_70343_20150213_010739_outLine +BABEL_OP3_305_71067_20150206_022645_inLine +BABEL_OP3_305_71067_20150206_022645_outLine +BABEL_OP3_305_71704_20140730_042541_inLine +BABEL_OP3_305_71704_20140730_042541_outLine +BABEL_OP3_305_72040_20140905_002224_inLine +BABEL_OP3_305_72040_20140905_002224_outLine +BABEL_OP3_305_72952_20150712_063306_inLine +BABEL_OP3_305_72952_20150712_063306_outLine +BABEL_OP3_305_73299_20150712_044814_inLine +BABEL_OP3_305_73299_20150712_044814_outLine +BABEL_OP3_305_73305_20150328_030752_inLine +BABEL_OP3_305_73305_20150328_030752_outLine +BABEL_OP3_305_73814_20150207_014107_inLine +BABEL_OP3_305_74226_20150211_232229_inLine +BABEL_OP3_305_74226_20150211_232229_outLine +BABEL_OP3_305_74886_20140620_052822_inLine +BABEL_OP3_305_74886_20140620_052822_outLine +BABEL_OP3_305_75342_20150513_235657_inLine +BABEL_OP3_305_75342_20150513_235657_outLine +BABEL_OP3_305_75366_20150310_042904_inLine +BABEL_OP3_305_75366_20150310_042904_outLine +BABEL_OP3_305_75460_20150711_021713_inLine +BABEL_OP3_305_75460_20150711_021713_outLine +BABEL_OP3_305_76730_20140729_052201_inLine +BABEL_OP3_305_76773_20140823_031314_inLine +BABEL_OP3_305_76773_20140823_031314_outLine +BABEL_OP3_305_76902_20150320_043734_inLine +BABEL_OP3_305_76902_20150320_043734_outLine +BABEL_OP3_305_77730_20140730_051628_inLine +BABEL_OP3_305_77730_20140730_051628_outLine +BABEL_OP3_305_77832_20150317_003741_inLine +BABEL_OP3_305_77832_20150317_003741_outLine +BABEL_OP3_305_78116_20150213_013547_inLine +BABEL_OP3_305_78116_20150213_013547_outLine +BABEL_OP3_305_78194_20140618_010449_inLine +BABEL_OP3_305_78194_20140618_010449_outLine +BABEL_OP3_305_78254_20140801_003005_inLine +BABEL_OP3_305_78254_20140801_003005_outLine +BABEL_OP3_305_78454_20150127_025616_inLine +BABEL_OP3_305_78454_20150127_025616_outLine +BABEL_OP3_305_78511_20150225_034550_inLine +BABEL_OP3_305_78511_20150225_034550_outLine +BABEL_OP3_305_78877_20150428_004749_inLine +BABEL_OP3_305_78877_20150428_004749_outLine +BABEL_OP3_305_79429_20150319_013246_inLine +BABEL_OP3_305_79429_20150319_013246_outLine +BABEL_OP3_305_79660_20150712_042549_inLine +BABEL_OP3_305_79660_20150712_042549_outLine +BABEL_OP3_305_79898_20150307_091426_inLine +BABEL_OP3_305_79898_20150307_091426_outLine +BABEL_OP3_305_79898_20150307_093317_inLine +BABEL_OP3_305_79898_20150307_093317_outLine +BABEL_OP3_305_80559_20140731_042258_inLine +BABEL_OP3_305_80559_20140731_042258_outLine +BABEL_OP3_305_80989_20150712_091615_inLine +BABEL_OP3_305_80989_20150712_091615_outLine +BABEL_OP3_305_81433_20150127_070550_inLine +BABEL_OP3_305_81433_20150127_070550_outLine +BABEL_OP3_305_81810_20150208_075542_inLine +BABEL_OP3_305_81810_20150208_075542_outLine +BABEL_OP3_305_82145_20150301_063108_inLine +BABEL_OP3_305_82145_20150301_063108_outLine +BABEL_OP3_305_82145_20150301_064502_inLine +BABEL_OP3_305_82145_20150301_064502_outLine +BABEL_OP3_305_82425_20140620_053637_inLine +BABEL_OP3_305_82425_20140620_053637_outLine +BABEL_OP3_305_82742_20150124_054325_inLine +BABEL_OP3_305_82863_20141021_023356_inLine +BABEL_OP3_305_82863_20141021_023356_outLine +BABEL_OP3_305_83545_20150605_042852_inLine +BABEL_OP3_305_83545_20150605_042852_outLine +BABEL_OP3_305_83771_20150509_012937_inLine +BABEL_OP3_305_83771_20150509_012937_outLine +BABEL_OP3_305_83771_20150509_013635_inLine +BABEL_OP3_305_83771_20150509_013635_outLine +BABEL_OP3_305_83813_20150429_053518_inLine +BABEL_OP3_305_83813_20150429_053518_outLine +BABEL_OP3_305_84125_20140614_072153_inLine +BABEL_OP3_305_84125_20140614_072153_outLine +BABEL_OP3_305_84583_20150123_062012_inLine +BABEL_OP3_305_84583_20150123_062012_outLine +BABEL_OP3_305_85010_20150327_022501_inLine +BABEL_OP3_305_85010_20150327_022501_outLine +BABEL_OP3_305_85048_20150124_074706_inLine +BABEL_OP3_305_85048_20150124_074706_outLine +BABEL_OP3_305_85246_20150317_090655_inLine +BABEL_OP3_305_85246_20150317_090655_outLine +BABEL_OP3_305_85246_20150317_091545_inLine +BABEL_OP3_305_85246_20150317_091545_outLine +BABEL_OP3_305_85647_20150124_020413_inLine +BABEL_OP3_305_85647_20150124_020413_outLine +BABEL_OP3_305_85647_20150124_021612_inLine +BABEL_OP3_305_85647_20150124_021612_outLine +BABEL_OP3_305_86628_20150709_074216_inLine +BABEL_OP3_305_86628_20150709_074216_outLine +BABEL_OP3_305_86826_20150711_081659_inLine +BABEL_OP3_305_86826_20150711_081659_outLine +BABEL_OP3_305_87629_20150123_042545_inLine +BABEL_OP3_305_87629_20150123_042545_outLine +BABEL_OP3_305_87731_20150720_063702_inLine +BABEL_OP3_305_87731_20150720_063702_outLine +BABEL_OP3_305_87884_20150218_232216_inLine +BABEL_OP3_305_87884_20150218_232216_outLine +BABEL_OP3_305_88445_20150208_035054_inLine +BABEL_OP3_305_88673_20150719_085433_inLine +BABEL_OP3_305_88673_20150719_085433_outLine +BABEL_OP3_305_89203_20150802_011814_inLine +BABEL_OP3_305_89203_20150802_011814_outLine +BABEL_OP3_305_89888_20140730_034633_inLine +BABEL_OP3_305_89888_20140730_034633_outLine +BABEL_OP3_305_90417_20150721_002233_inLine +BABEL_OP3_305_90417_20150721_002233_outLine +BABEL_OP3_305_90740_20150326_035521_inLine +BABEL_OP3_305_90740_20150326_035521_outLine +BABEL_OP3_305_91189_20150708_000127_inLine +BABEL_OP3_305_91189_20150708_000127_outLine +BABEL_OP3_305_91336_20150122_061156_inLine +BABEL_OP3_305_91336_20150122_061156_outLine +BABEL_OP3_305_91411_20150425_040553_inLine +BABEL_OP3_305_91411_20150425_040553_outLine +BABEL_OP3_305_91760_20150508_021256_inLine +BABEL_OP3_305_91760_20150508_021256_outLine +BABEL_OP3_305_91891_20150214_084516_inLine +BABEL_OP3_305_91891_20150214_084516_outLine +BABEL_OP3_305_91930_20150219_015305_inLine +BABEL_OP3_305_91930_20150219_015305_outLine +BABEL_OP3_305_91930_20150219_015722_inLine +BABEL_OP3_305_91930_20150219_015722_outLine +BABEL_OP3_305_92065_20150227_020241_inLine +BABEL_OP3_305_92077_20150528_054240_inLine +BABEL_OP3_305_92077_20150528_054240_outLine +BABEL_OP3_305_92096_20150224_003248_inLine +BABEL_OP3_305_92096_20150224_003248_outLine +BABEL_OP3_305_92252_20140619_005416_inLine +BABEL_OP3_305_92252_20140619_005416_outLine +BABEL_OP3_305_92792_20150220_033116_inLine +BABEL_OP3_305_93222_20150325_030317_inLine +BABEL_OP3_305_93222_20150325_030317_outLine +BABEL_OP3_305_93515_20150528_011902_inLine +BABEL_OP3_305_93515_20150528_011902_outLine +BABEL_OP3_305_93604_20150522_043957_inLine +BABEL_OP3_305_93604_20150522_043957_outLine +BABEL_OP3_305_93632_20150730_105129_inLine +BABEL_OP3_305_93632_20150730_105129_outLine +BABEL_OP3_305_93964_20150122_021516_inLine +BABEL_OP3_305_93964_20150122_021516_outLine +BABEL_OP3_305_93964_20150122_024514_inLine +BABEL_OP3_305_93964_20150122_024514_outLine +BABEL_OP3_305_93964_20150122_025759_inLine +BABEL_OP3_305_93964_20150122_025759_outLine +BABEL_OP3_305_94035_20150429_013519_inLine +BABEL_OP3_305_94035_20150429_013519_outLine +BABEL_OP3_305_94212_20150425_011456_inLine +BABEL_OP3_305_94212_20150425_011456_outLine +BABEL_OP3_305_94442_20150507_034412_inLine +BABEL_OP3_305_94442_20150507_034412_outLine +BABEL_OP3_305_94803_20150317_093455_inLine +BABEL_OP3_305_94803_20150317_093455_outLine +BABEL_OP3_305_94869_20140619_054259_inLine +BABEL_OP3_305_94891_20150720_080832_inLine +BABEL_OP3_305_94891_20150720_080832_outLine +BABEL_OP3_305_94923_20150123_064032_inLine +BABEL_OP3_305_94923_20150123_064032_outLine +BABEL_OP3_305_95028_20150320_013045_inLine +BABEL_OP3_305_95028_20150320_013944_inLine +BABEL_OP3_305_95294_20150207_015416_inLine +BABEL_OP3_305_95294_20150207_020517_inLine +BABEL_OP3_305_95571_20150326_084852_inLine +BABEL_OP3_305_95571_20150326_084852_outLine +BABEL_OP3_305_96405_20140621_013139_inLine +BABEL_OP3_305_96405_20140621_013139_outLine +BABEL_OP3_305_96405_20140621_015225_inLine +BABEL_OP3_305_96405_20140621_015225_outLine +BABEL_OP3_305_96584_20141107_045031_inLine +BABEL_OP3_305_96584_20141107_045031_outLine +BABEL_OP3_305_96808_20150507_011006_inLine +BABEL_OP3_305_96808_20150507_011006_outLine +BABEL_OP3_305_96940_20150320_051125_inLine +BABEL_OP3_305_96940_20150320_051125_outLine +BABEL_OP3_305_97136_20150224_085912_inLine +BABEL_OP3_305_97136_20150224_085912_outLine +BABEL_OP3_305_97731_20150731_083617_inLine +BABEL_OP3_305_97731_20150731_083617_outLine +BABEL_OP3_305_98390_20140619_004932_inLine +BABEL_OP3_305_98390_20140619_004932_outLine +BABEL_OP3_305_99813_20150127_075030_inLine +BABEL_OP3_305_99813_20150127_075030_outLine +BABEL_OP3_305_99887_20141028_055805_inLine +BABEL_OP3_305_99887_20141028_055805_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/dev.2h.list b/egs/babel/s5d/conf/lists/306-igbo/dev.2h.list new file mode 100644 index 00000000000..cf0824db01d --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/dev.2h.list @@ -0,0 +1,136 @@ +BABEL_OP3_306_10036_20140729_233849_inLine +BABEL_OP3_306_10036_20140729_233849_outLine +BABEL_OP3_306_10036_20140729_234612_inLine +BABEL_OP3_306_10036_20140729_234612_outLine +BABEL_OP3_306_11681_20140620_015031_inLine +BABEL_OP3_306_11681_20140620_015031_outLine +BABEL_OP3_306_11681_20140620_020405_inLine +BABEL_OP3_306_11681_20140620_020405_outLine +BABEL_OP3_306_13427_20140810_232413_inLine +BABEL_OP3_306_13427_20140810_232413_outLine +BABEL_OP3_306_13744_20150303_033441_inLine +BABEL_OP3_306_13744_20150303_033441_outLine +BABEL_OP3_306_19722_20150304_045710_inLine +BABEL_OP3_306_19782_20141026_011352_inLine +BABEL_OP3_306_19782_20141026_011352_outLine +BABEL_OP3_306_19818_20140801_211130_inLine +BABEL_OP3_306_19818_20140801_211130_outLine +BABEL_OP3_306_21807_20150310_215245_inLine +BABEL_OP3_306_21807_20150310_215245_outLine +BABEL_OP3_306_23098_20150410_035508_inLine +BABEL_OP3_306_23098_20150410_035508_outLine +BABEL_OP3_306_25961_20140607_021757_inLine +BABEL_OP3_306_25961_20140607_021757_outLine +BABEL_OP3_306_28419_20140606_201307_inLine +BABEL_OP3_306_28419_20140606_201307_outLine +BABEL_OP3_306_29023_20140614_002447_inLine +BABEL_OP3_306_29023_20140614_002447_outLine +BABEL_OP3_306_33497_20140730_031414_inLine +BABEL_OP3_306_33497_20140730_031414_outLine +BABEL_OP3_306_33497_20140803_034655_inLine +BABEL_OP3_306_33497_20140803_034655_outLine +BABEL_OP3_306_34197_20140520_215059_inLine +BABEL_OP3_306_34197_20140520_215059_outLine +BABEL_OP3_306_35420_20140527_001314_inLine +BABEL_OP3_306_35420_20140527_001314_outLine +BABEL_OP3_306_36990_20140803_235016_inLine +BABEL_OP3_306_36990_20140803_235016_outLine +BABEL_OP3_306_36990_20140804_000605_inLine +BABEL_OP3_306_36990_20140804_000605_outLine +BABEL_OP3_306_39744_20140514_001627_inLine +BABEL_OP3_306_39744_20140514_001627_outLine +BABEL_OP3_306_40740_20141030_012619_inLine +BABEL_OP3_306_40740_20141030_012619_outLine +BABEL_OP3_306_44347_20141028_001614_inLine +BABEL_OP3_306_44347_20141028_001614_outLine +BABEL_OP3_306_47882_20140524_204056_inLine +BABEL_OP3_306_47882_20140524_204056_outLine +BABEL_OP3_306_50427_20140805_190819_inLine +BABEL_OP3_306_50427_20140805_190819_outLine +BABEL_OP3_306_50726_20140521_235356_inLine +BABEL_OP3_306_50726_20140521_235356_outLine +BABEL_OP3_306_51417_20141103_210924_inLine +BABEL_OP3_306_51417_20141103_210924_outLine +BABEL_OP3_306_52301_20140607_003158_inLine +BABEL_OP3_306_52301_20140607_003158_outLine +BABEL_OP3_306_53842_20140905_005627_inLine +BABEL_OP3_306_53842_20140905_005627_outLine +BABEL_OP3_306_54530_20141006_030910_inLine +BABEL_OP3_306_54530_20141006_030910_outLine +BABEL_OP3_306_56677_20141007_020945_inLine +BABEL_OP3_306_56677_20141007_020945_outLine +BABEL_OP3_306_57141_20141026_224125_inLine +BABEL_OP3_306_57141_20141026_224125_outLine +BABEL_OP3_306_58107_20140805_204322_inLine +BABEL_OP3_306_58107_20140805_204322_outLine +BABEL_OP3_306_58585_20141028_233305_inLine +BABEL_OP3_306_58585_20141028_233305_outLine +BABEL_OP3_306_59635_20141031_194036_inLine +BABEL_OP3_306_59635_20141031_194036_outLine +BABEL_OP3_306_60508_20140521_055301_inLine +BABEL_OP3_306_60508_20140521_055301_outLine +BABEL_OP3_306_60778_20140527_195205_inLine +BABEL_OP3_306_60778_20140527_195205_outLine +BABEL_OP3_306_63334_20150216_005033_inLine +BABEL_OP3_306_63334_20150216_005033_outLine +BABEL_OP3_306_63490_20140524_215813_inLine +BABEL_OP3_306_63490_20140524_215813_outLine +BABEL_OP3_306_64722_20141223_013811_inLine +BABEL_OP3_306_64722_20141223_013811_outLine +BABEL_OP3_306_66959_20141031_215547_inLine +BABEL_OP3_306_66959_20141031_215547_outLine +BABEL_OP3_306_68289_20141113_024309_inLine +BABEL_OP3_306_68289_20141113_024309_outLine +BABEL_OP3_306_69636_20140804_020846_inLine +BABEL_OP3_306_69636_20140804_020846_outLine +BABEL_OP3_306_71047_20141028_021029_inLine +BABEL_OP3_306_71047_20141028_021029_outLine +BABEL_OP3_306_71460_20150215_025120_inLine +BABEL_OP3_306_71460_20150215_025120_outLine +BABEL_OP3_306_76756_20140803_011009_inLine +BABEL_OP3_306_76756_20140803_011009_outLine +BABEL_OP3_306_76756_20140803_011841_inLine +BABEL_OP3_306_76756_20140803_011841_outLine +BABEL_OP3_306_76756_20140803_012244_inLine +BABEL_OP3_306_76756_20140803_012244_outLine +BABEL_OP3_306_77112_20140609_224704_inLine +BABEL_OP3_306_77112_20140609_224704_outLine +BABEL_OP3_306_77803_20140517_202422_inLine +BABEL_OP3_306_77803_20140517_202422_outLine +BABEL_OP3_306_79451_20140608_012042_inLine +BABEL_OP3_306_79451_20140608_012042_outLine +BABEL_OP3_306_79723_20150331_184104_inLine +BABEL_OP3_306_79723_20150331_184104_outLine +BABEL_OP3_306_79995_20141025_230126_inLine +BABEL_OP3_306_79995_20141025_230126_outLine +BABEL_OP3_306_82145_20141223_031926_inLine +BABEL_OP3_306_82145_20141223_031926_outLine +BABEL_OP3_306_83455_20140804_235008_inLine +BABEL_OP3_306_83455_20140804_235008_outLine +BABEL_OP3_306_83643_20150404_031037_inLine +BABEL_OP3_306_83643_20150404_031037_outLine +BABEL_OP3_306_84079_20150402_221122_inLine +BABEL_OP3_306_84079_20150402_221122_outLine +BABEL_OP3_306_87280_20141026_002639_inLine +BABEL_OP3_306_87280_20141026_002639_outLine +BABEL_OP3_306_87298_20140609_033909_inLine +BABEL_OP3_306_87298_20140609_033909_outLine +BABEL_OP3_306_87313_20140802_000850_inLine +BABEL_OP3_306_87313_20140802_000850_outLine +BABEL_OP3_306_87313_20140802_001509_inLine +BABEL_OP3_306_87313_20140802_001509_outLine +BABEL_OP3_306_87313_20140802_002411_inLine +BABEL_OP3_306_87313_20140802_002411_outLine +BABEL_OP3_306_88925_20141025_235636_inLine +BABEL_OP3_306_88925_20141025_235636_outLine +BABEL_OP3_306_92176_20140803_000102_inLine +BABEL_OP3_306_92176_20140803_000102_outLine +BABEL_OP3_306_94035_20140528_224527_inLine +BABEL_OP3_306_94035_20140528_224527_outLine +BABEL_OP3_306_94212_20140525_012758_inLine +BABEL_OP3_306_94212_20140525_012758_outLine +BABEL_OP3_306_95077_20141031_230550_inLine +BABEL_OP3_306_95077_20141031_230550_outLine +BABEL_OP3_306_95294_20140808_012803_inLine +BABEL_OP3_306_95663_20140513_213124_inLine +BABEL_OP3_306_95663_20140513_213124_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/dev.list b/egs/babel/s5d/conf/lists/306-igbo/dev.list new file mode 100644 index 00000000000..cf0824db01d --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/dev.list @@ -0,0 +1,136 @@ +BABEL_OP3_306_10036_20140729_233849_inLine +BABEL_OP3_306_10036_20140729_233849_outLine +BABEL_OP3_306_10036_20140729_234612_inLine +BABEL_OP3_306_10036_20140729_234612_outLine +BABEL_OP3_306_11681_20140620_015031_inLine +BABEL_OP3_306_11681_20140620_015031_outLine +BABEL_OP3_306_11681_20140620_020405_inLine +BABEL_OP3_306_11681_20140620_020405_outLine +BABEL_OP3_306_13427_20140810_232413_inLine +BABEL_OP3_306_13427_20140810_232413_outLine +BABEL_OP3_306_13744_20150303_033441_inLine +BABEL_OP3_306_13744_20150303_033441_outLine +BABEL_OP3_306_19722_20150304_045710_inLine +BABEL_OP3_306_19782_20141026_011352_inLine +BABEL_OP3_306_19782_20141026_011352_outLine +BABEL_OP3_306_19818_20140801_211130_inLine +BABEL_OP3_306_19818_20140801_211130_outLine +BABEL_OP3_306_21807_20150310_215245_inLine +BABEL_OP3_306_21807_20150310_215245_outLine +BABEL_OP3_306_23098_20150410_035508_inLine +BABEL_OP3_306_23098_20150410_035508_outLine +BABEL_OP3_306_25961_20140607_021757_inLine +BABEL_OP3_306_25961_20140607_021757_outLine +BABEL_OP3_306_28419_20140606_201307_inLine +BABEL_OP3_306_28419_20140606_201307_outLine +BABEL_OP3_306_29023_20140614_002447_inLine +BABEL_OP3_306_29023_20140614_002447_outLine +BABEL_OP3_306_33497_20140730_031414_inLine +BABEL_OP3_306_33497_20140730_031414_outLine +BABEL_OP3_306_33497_20140803_034655_inLine +BABEL_OP3_306_33497_20140803_034655_outLine +BABEL_OP3_306_34197_20140520_215059_inLine +BABEL_OP3_306_34197_20140520_215059_outLine +BABEL_OP3_306_35420_20140527_001314_inLine +BABEL_OP3_306_35420_20140527_001314_outLine +BABEL_OP3_306_36990_20140803_235016_inLine +BABEL_OP3_306_36990_20140803_235016_outLine +BABEL_OP3_306_36990_20140804_000605_inLine +BABEL_OP3_306_36990_20140804_000605_outLine +BABEL_OP3_306_39744_20140514_001627_inLine +BABEL_OP3_306_39744_20140514_001627_outLine +BABEL_OP3_306_40740_20141030_012619_inLine +BABEL_OP3_306_40740_20141030_012619_outLine +BABEL_OP3_306_44347_20141028_001614_inLine +BABEL_OP3_306_44347_20141028_001614_outLine +BABEL_OP3_306_47882_20140524_204056_inLine +BABEL_OP3_306_47882_20140524_204056_outLine +BABEL_OP3_306_50427_20140805_190819_inLine +BABEL_OP3_306_50427_20140805_190819_outLine +BABEL_OP3_306_50726_20140521_235356_inLine +BABEL_OP3_306_50726_20140521_235356_outLine +BABEL_OP3_306_51417_20141103_210924_inLine +BABEL_OP3_306_51417_20141103_210924_outLine +BABEL_OP3_306_52301_20140607_003158_inLine +BABEL_OP3_306_52301_20140607_003158_outLine +BABEL_OP3_306_53842_20140905_005627_inLine +BABEL_OP3_306_53842_20140905_005627_outLine +BABEL_OP3_306_54530_20141006_030910_inLine +BABEL_OP3_306_54530_20141006_030910_outLine +BABEL_OP3_306_56677_20141007_020945_inLine +BABEL_OP3_306_56677_20141007_020945_outLine +BABEL_OP3_306_57141_20141026_224125_inLine +BABEL_OP3_306_57141_20141026_224125_outLine +BABEL_OP3_306_58107_20140805_204322_inLine +BABEL_OP3_306_58107_20140805_204322_outLine +BABEL_OP3_306_58585_20141028_233305_inLine +BABEL_OP3_306_58585_20141028_233305_outLine +BABEL_OP3_306_59635_20141031_194036_inLine +BABEL_OP3_306_59635_20141031_194036_outLine +BABEL_OP3_306_60508_20140521_055301_inLine +BABEL_OP3_306_60508_20140521_055301_outLine +BABEL_OP3_306_60778_20140527_195205_inLine +BABEL_OP3_306_60778_20140527_195205_outLine +BABEL_OP3_306_63334_20150216_005033_inLine +BABEL_OP3_306_63334_20150216_005033_outLine +BABEL_OP3_306_63490_20140524_215813_inLine +BABEL_OP3_306_63490_20140524_215813_outLine +BABEL_OP3_306_64722_20141223_013811_inLine +BABEL_OP3_306_64722_20141223_013811_outLine +BABEL_OP3_306_66959_20141031_215547_inLine +BABEL_OP3_306_66959_20141031_215547_outLine +BABEL_OP3_306_68289_20141113_024309_inLine +BABEL_OP3_306_68289_20141113_024309_outLine +BABEL_OP3_306_69636_20140804_020846_inLine +BABEL_OP3_306_69636_20140804_020846_outLine +BABEL_OP3_306_71047_20141028_021029_inLine +BABEL_OP3_306_71047_20141028_021029_outLine +BABEL_OP3_306_71460_20150215_025120_inLine +BABEL_OP3_306_71460_20150215_025120_outLine +BABEL_OP3_306_76756_20140803_011009_inLine +BABEL_OP3_306_76756_20140803_011009_outLine +BABEL_OP3_306_76756_20140803_011841_inLine +BABEL_OP3_306_76756_20140803_011841_outLine +BABEL_OP3_306_76756_20140803_012244_inLine +BABEL_OP3_306_76756_20140803_012244_outLine +BABEL_OP3_306_77112_20140609_224704_inLine +BABEL_OP3_306_77112_20140609_224704_outLine +BABEL_OP3_306_77803_20140517_202422_inLine +BABEL_OP3_306_77803_20140517_202422_outLine +BABEL_OP3_306_79451_20140608_012042_inLine +BABEL_OP3_306_79451_20140608_012042_outLine +BABEL_OP3_306_79723_20150331_184104_inLine +BABEL_OP3_306_79723_20150331_184104_outLine +BABEL_OP3_306_79995_20141025_230126_inLine +BABEL_OP3_306_79995_20141025_230126_outLine +BABEL_OP3_306_82145_20141223_031926_inLine +BABEL_OP3_306_82145_20141223_031926_outLine +BABEL_OP3_306_83455_20140804_235008_inLine +BABEL_OP3_306_83455_20140804_235008_outLine +BABEL_OP3_306_83643_20150404_031037_inLine +BABEL_OP3_306_83643_20150404_031037_outLine +BABEL_OP3_306_84079_20150402_221122_inLine +BABEL_OP3_306_84079_20150402_221122_outLine +BABEL_OP3_306_87280_20141026_002639_inLine +BABEL_OP3_306_87280_20141026_002639_outLine +BABEL_OP3_306_87298_20140609_033909_inLine +BABEL_OP3_306_87298_20140609_033909_outLine +BABEL_OP3_306_87313_20140802_000850_inLine +BABEL_OP3_306_87313_20140802_000850_outLine +BABEL_OP3_306_87313_20140802_001509_inLine +BABEL_OP3_306_87313_20140802_001509_outLine +BABEL_OP3_306_87313_20140802_002411_inLine +BABEL_OP3_306_87313_20140802_002411_outLine +BABEL_OP3_306_88925_20141025_235636_inLine +BABEL_OP3_306_88925_20141025_235636_outLine +BABEL_OP3_306_92176_20140803_000102_inLine +BABEL_OP3_306_92176_20140803_000102_outLine +BABEL_OP3_306_94035_20140528_224527_inLine +BABEL_OP3_306_94035_20140528_224527_outLine +BABEL_OP3_306_94212_20140525_012758_inLine +BABEL_OP3_306_94212_20140525_012758_outLine +BABEL_OP3_306_95077_20141031_230550_inLine +BABEL_OP3_306_95077_20141031_230550_outLine +BABEL_OP3_306_95294_20140808_012803_inLine +BABEL_OP3_306_95663_20140513_213124_inLine +BABEL_OP3_306_95663_20140513_213124_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/eval.list b/egs/babel/s5d/conf/lists/306-igbo/eval.list new file mode 100644 index 00000000000..c9db48fb2e1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/eval.list @@ -0,0 +1,194 @@ +BABEL_OP3_306_11673_20140513_040551_inLine +BABEL_OP3_306_11673_20140513_040551_outLine +BABEL_OP3_306_12321_20141027_232351_inLine +BABEL_OP3_306_12321_20141027_232351_outLine +BABEL_OP3_306_12635_20141101_005451_inLine +BABEL_OP3_306_12635_20141101_005451_outLine +BABEL_OP3_306_13490_20140802_230433_inLine +BABEL_OP3_306_13490_20140802_230433_outLine +BABEL_OP3_306_13490_20140802_232130_inLine +BABEL_OP3_306_13490_20140802_232130_outLine +BABEL_OP3_306_13586_20140802_035824_inLine +BABEL_OP3_306_13586_20140802_035824_outLine +BABEL_OP3_306_13792_20140531_014010_inLine +BABEL_OP3_306_13792_20140531_014010_outLine +BABEL_OP3_306_14537_20150311_192951_inLine +BABEL_OP3_306_14537_20150311_192951_outLine +BABEL_OP3_306_15730_20140521_222017_inLine +BABEL_OP3_306_15730_20140521_222017_outLine +BABEL_OP3_306_15848_20140510_004027_inLine +BABEL_OP3_306_15848_20140510_004027_outLine +BABEL_OP3_306_18924_20140814_021546_inLine +BABEL_OP3_306_18924_20140814_021546_outLine +BABEL_OP3_306_20916_20140520_205947_inLine +BABEL_OP3_306_20916_20140520_205947_outLine +BABEL_OP3_306_21206_20140621_194701_inLine +BABEL_OP3_306_21206_20140621_194701_outLine +BABEL_OP3_306_22641_20150312_020316_inLine +BABEL_OP3_306_22641_20150312_020316_outLine +BABEL_OP3_306_23628_20140603_213715_inLine +BABEL_OP3_306_23628_20140603_213715_outLine +BABEL_OP3_306_26999_20140729_223316_inLine +BABEL_OP3_306_26999_20140729_223316_outLine +BABEL_OP3_306_28775_20140620_234019_inLine +BABEL_OP3_306_28775_20140620_234019_outLine +BABEL_OP3_306_29135_20140509_234939_inLine +BABEL_OP3_306_29135_20140509_234939_outLine +BABEL_OP3_306_29352_20150316_234927_inLine +BABEL_OP3_306_29352_20150316_234927_outLine +BABEL_OP3_306_30058_20141221_102805_inLine +BABEL_OP3_306_30058_20141221_102805_outLine +BABEL_OP3_306_30345_20141029_013617_inLine +BABEL_OP3_306_30345_20141029_013617_outLine +BABEL_OP3_306_31490_20150416_203824_inLine +BABEL_OP3_306_31490_20150416_203824_outLine +BABEL_OP3_306_32301_20140924_003519_inLine +BABEL_OP3_306_32301_20140924_003519_outLine +BABEL_OP3_306_32328_20141029_221831_inLine +BABEL_OP3_306_32328_20141029_221831_outLine +BABEL_OP3_306_33273_20141016_012203_inLine +BABEL_OP3_306_33273_20141016_012203_outLine +BABEL_OP3_306_34903_20140812_000146_inLine +BABEL_OP3_306_34903_20140812_000146_outLine +BABEL_OP3_306_35788_20150410_005320_inLine +BABEL_OP3_306_35788_20150410_005320_outLine +BABEL_OP3_306_36341_20140509_022205_inLine +BABEL_OP3_306_36341_20140509_022205_outLine +BABEL_OP3_306_36341_20140509_022936_inLine +BABEL_OP3_306_36341_20140509_022936_outLine +BABEL_OP3_306_37064_20140606_222758_inLine +BABEL_OP3_306_37064_20140606_222758_outLine +BABEL_OP3_306_38689_20141009_214007_inLine +BABEL_OP3_306_38689_20141009_214007_outLine +BABEL_OP3_306_39159_20140509_230506_inLine +BABEL_OP3_306_39159_20140509_230506_outLine +BABEL_OP3_306_39927_20150216_011520_inLine +BABEL_OP3_306_39927_20150216_011520_outLine +BABEL_OP3_306_41174_20140730_214115_inLine +BABEL_OP3_306_41174_20140730_214115_outLine +BABEL_OP3_306_41542_20141031_044512_inLine +BABEL_OP3_306_41542_20141031_044512_outLine +BABEL_OP3_306_42834_20140813_025421_inLine +BABEL_OP3_306_42834_20140813_025421_outLine +BABEL_OP3_306_42942_20141010_020223_inLine +BABEL_OP3_306_42942_20141010_020223_outLine +BABEL_OP3_306_43646_20140510_012702_inLine +BABEL_OP3_306_43646_20140510_012702_outLine +BABEL_OP3_306_46333_20150412_023828_inLine +BABEL_OP3_306_46333_20150412_023828_outLine +BABEL_OP3_306_47215_20140714_024322_inLine +BABEL_OP3_306_47215_20140714_024322_outLine +BABEL_OP3_306_48399_20140531_222338_inLine +BABEL_OP3_306_48399_20140531_222338_outLine +BABEL_OP3_306_49216_20140512_234713_inLine +BABEL_OP3_306_49216_20140512_234713_outLine +BABEL_OP3_306_51407_20140808_210301_inLine +BABEL_OP3_306_51407_20140808_210301_outLine +BABEL_OP3_306_51407_20140808_211334_inLine +BABEL_OP3_306_51407_20140808_211334_outLine +BABEL_OP3_306_51955_20140604_224650_inLine +BABEL_OP3_306_51955_20140604_224650_outLine +BABEL_OP3_306_52694_20140811_233144_inLine +BABEL_OP3_306_52694_20140811_233144_outLine +BABEL_OP3_306_53917_20141031_222826_inLine +BABEL_OP3_306_53917_20141031_222826_outLine +BABEL_OP3_306_56429_20140622_011257_inLine +BABEL_OP3_306_56429_20140622_011257_outLine +BABEL_OP3_306_56606_20150403_212810_inLine +BABEL_OP3_306_56606_20150403_212810_outLine +BABEL_OP3_306_56743_20140802_030717_inLine +BABEL_OP3_306_56743_20140802_030717_outLine +BABEL_OP3_306_57035_20150410_033837_inLine +BABEL_OP3_306_57035_20150410_033837_outLine +BABEL_OP3_306_57093_20140728_215709_inLine +BABEL_OP3_306_57093_20140728_215709_outLine +BABEL_OP3_306_57093_20140728_221243_inLine +BABEL_OP3_306_57093_20140728_221243_outLine +BABEL_OP3_306_57093_20140729_003342_inLine +BABEL_OP3_306_57093_20140729_003342_outLine +BABEL_OP3_306_57116_20140517_222852_inLine +BABEL_OP3_306_57116_20140517_222852_outLine +BABEL_OP3_306_59928_20140610_024019_inLine +BABEL_OP3_306_59928_20140610_024019_outLine +BABEL_OP3_306_60706_20140531_003048_inLine +BABEL_OP3_306_60706_20140531_003048_outLine +BABEL_OP3_306_61684_20150420_023032_inLine +BABEL_OP3_306_61684_20150420_023032_outLine +BABEL_OP3_306_62545_20140527_204602_inLine +BABEL_OP3_306_62545_20140527_204602_outLine +BABEL_OP3_306_62835_20140905_002934_inLine +BABEL_OP3_306_62835_20140905_002934_outLine +BABEL_OP3_306_63081_20140509_000544_inLine +BABEL_OP3_306_63081_20140509_000544_outLine +BABEL_OP3_306_63445_20140521_030723_inLine +BABEL_OP3_306_63445_20140521_030723_outLine +BABEL_OP3_306_63481_20140522_195610_inLine +BABEL_OP3_306_63481_20140522_195610_outLine +BABEL_OP3_306_64494_20140605_043852_inLine +BABEL_OP3_306_64494_20140605_043852_outLine +BABEL_OP3_306_66026_20141101_233612_inLine +BABEL_OP3_306_66026_20141101_233612_outLine +BABEL_OP3_306_67283_20140606_231809_inLine +BABEL_OP3_306_67283_20140606_231809_outLine +BABEL_OP3_306_69992_20150421_045903_inLine +BABEL_OP3_306_69992_20150421_045903_outLine +BABEL_OP3_306_70452_20140531_022425_inLine +BABEL_OP3_306_70452_20140531_022425_outLine +BABEL_OP3_306_72073_20150220_210400_inLine +BABEL_OP3_306_72073_20150220_210400_outLine +BABEL_OP3_306_73518_20141028_214326_inLine +BABEL_OP3_306_73518_20141028_214326_outLine +BABEL_OP3_306_73591_20140510_022335_inLine +BABEL_OP3_306_73591_20140510_022335_outLine +BABEL_OP3_306_73814_20140724_034710_inLine +BABEL_OP3_306_73814_20140724_034710_outLine +BABEL_OP3_306_75342_20141006_210132_inLine +BABEL_OP3_306_75342_20141006_210132_outLine +BABEL_OP3_306_75342_20141006_211900_inLine +BABEL_OP3_306_75342_20141006_211900_outLine +BABEL_OP3_306_76499_20140729_230952_inLine +BABEL_OP3_306_76499_20140729_230952_outLine +BABEL_OP3_306_78877_20140527_221925_inLine +BABEL_OP3_306_78877_20140527_221925_outLine +BABEL_OP3_306_79107_20150418_021409_inLine +BABEL_OP3_306_79107_20150418_021409_outLine +BABEL_OP3_306_84029_20150415_035216_inLine +BABEL_OP3_306_84029_20150415_035216_outLine +BABEL_OP3_306_84125_20140519_232101_inLine +BABEL_OP3_306_84125_20140519_232101_outLine +BABEL_OP3_306_84547_20140514_224528_inLine +BABEL_OP3_306_84547_20140514_224528_outLine +BABEL_OP3_306_87693_20140620_002643_inLine +BABEL_OP3_306_87693_20140620_002643_outLine +BABEL_OP3_306_88686_20150402_213711_inLine +BABEL_OP3_306_88686_20150402_213711_outLine +BABEL_OP3_306_88988_20150317_002311_inLine +BABEL_OP3_306_88988_20150317_002311_outLine +BABEL_OP3_306_90935_20140725_035705_inLine +BABEL_OP3_306_90935_20140725_035705_outLine +BABEL_OP3_306_92942_20140723_005927_inLine +BABEL_OP3_306_92942_20140723_005927_outLine +BABEL_OP3_306_93937_20150317_060204_inLine +BABEL_OP3_306_93937_20150317_060204_outLine +BABEL_OP3_306_94713_20140529_005611_inLine +BABEL_OP3_306_94713_20140529_005611_outLine +BABEL_OP3_306_95490_20140521_225751_inLine +BABEL_OP3_306_95490_20140521_225751_outLine +BABEL_OP3_306_95935_20141028_222645_inLine +BABEL_OP3_306_95935_20141028_222645_outLine +BABEL_OP3_306_96324_20140531_010613_inLine +BABEL_OP3_306_96324_20140531_010613_outLine +BABEL_OP3_306_96405_20140606_005741_inLine +BABEL_OP3_306_96405_20140606_005741_outLine +BABEL_OP3_306_96680_20140528_005805_inLine +BABEL_OP3_306_96680_20140528_005805_outLine +BABEL_OP3_306_96910_20140605_201948_inLine +BABEL_OP3_306_96910_20140605_201948_outLine +BABEL_OP3_306_96934_20140604_223915_inLine +BABEL_OP3_306_96934_20140604_223915_outLine +BABEL_OP3_306_98489_20140612_194947_inLine +BABEL_OP3_306_98489_20140612_194947_outLine +BABEL_OP3_306_98489_20140612_195637_inLine +BABEL_OP3_306_98489_20140612_195637_outLine +BABEL_OP3_306_99401_20140714_020007_inLine +BABEL_OP3_306_99401_20140714_020007_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/sub-train.list b/egs/babel/s5d/conf/lists/306-igbo/sub-train.list new file mode 100644 index 00000000000..f72794f4c94 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/sub-train.list @@ -0,0 +1,132 @@ +BABEL_OP3_306_10524_20150307_210859_inLine +BABEL_OP3_306_10524_20150307_210859_outLine +BABEL_OP3_306_14575_20140530_194144_inLine +BABEL_OP3_306_14575_20140530_194144_outLine +BABEL_OP3_306_15926_20140815_011013_inLine +BABEL_OP3_306_18490_20150402_010442_inLine +BABEL_OP3_306_18490_20150402_010442_outLine +BABEL_OP3_306_19444_20150214_045709_inLine +BABEL_OP3_306_19444_20150214_045709_outLine +BABEL_OP3_306_20721_20140529_000851_inLine +BABEL_OP3_306_20721_20140529_000851_outLine +BABEL_OP3_306_21581_20140724_022000_inLine +BABEL_OP3_306_21581_20140724_022000_outLine +BABEL_OP3_306_24037_20140524_232238_inLine +BABEL_OP3_306_24037_20140524_232238_outLine +BABEL_OP3_306_26074_20140815_015119_inLine +BABEL_OP3_306_26074_20140815_015119_outLine +BABEL_OP3_306_26478_20150317_053650_inLine +BABEL_OP3_306_26478_20150317_053650_outLine +BABEL_OP3_306_27218_20140625_013736_inLine +BABEL_OP3_306_28538_20140919_192901_inLine +BABEL_OP3_306_28538_20140919_192901_outLine +BABEL_OP3_306_28945_20140610_222125_inLine +BABEL_OP3_306_28945_20140610_222125_outLine +BABEL_OP3_306_31182_20141028_015316_inLine +BABEL_OP3_306_31182_20141028_015316_outLine +BABEL_OP3_306_31346_20141029_183248_inLine +BABEL_OP3_306_31346_20141029_183248_outLine +BABEL_OP3_306_33840_20141031_013533_inLine +BABEL_OP3_306_33840_20141031_013533_outLine +BABEL_OP3_306_33840_20141031_014151_inLine +BABEL_OP3_306_33840_20141031_014151_outLine +BABEL_OP3_306_36293_20140521_011821_inLine +BABEL_OP3_306_36293_20140521_011821_outLine +BABEL_OP3_306_40686_20140523_014206_inLine +BABEL_OP3_306_40686_20140523_014206_outLine +BABEL_OP3_306_44709_20140728_212605_inLine +BABEL_OP3_306_44709_20140728_212605_outLine +BABEL_OP3_306_48610_20140604_003825_inLine +BABEL_OP3_306_48610_20140604_003825_outLine +BABEL_OP3_306_49437_20141029_030600_inLine +BABEL_OP3_306_49437_20141029_030600_outLine +BABEL_OP3_306_50175_20150402_210041_inLine +BABEL_OP3_306_50175_20150402_210041_outLine +BABEL_OP3_306_50962_20140605_232213_inLine +BABEL_OP3_306_50962_20140605_232213_outLine +BABEL_OP3_306_55818_20140603_031605_inLine +BABEL_OP3_306_55818_20140603_031605_outLine +BABEL_OP3_306_55902_20150313_043244_inLine +BABEL_OP3_306_55902_20150313_043244_outLine +BABEL_OP3_306_55968_20140515_005800_inLine +BABEL_OP3_306_55968_20140515_005800_outLine +BABEL_OP3_306_56925_20150214_231609_inLine +BABEL_OP3_306_56925_20150214_231609_outLine +BABEL_OP3_306_59898_20150411_024935_inLine +BABEL_OP3_306_59898_20150411_024935_outLine +BABEL_OP3_306_62491_20140528_021234_inLine +BABEL_OP3_306_62491_20140528_021234_outLine +BABEL_OP3_306_62724_20141031_231843_inLine +BABEL_OP3_306_62724_20141031_231843_outLine +BABEL_OP3_306_63265_20150115_213217_inLine +BABEL_OP3_306_63265_20150115_213217_outLine +BABEL_OP3_306_63671_20150420_041005_inLine +BABEL_OP3_306_63671_20150420_041005_outLine +BABEL_OP3_306_66641_20150422_025109_inLine +BABEL_OP3_306_66641_20150422_025109_outLine +BABEL_OP3_306_67622_20140521_015356_inLine +BABEL_OP3_306_67622_20140521_015356_outLine +BABEL_OP3_306_70110_20140514_211101_inLine +BABEL_OP3_306_70110_20140514_211101_outLine +BABEL_OP3_306_70110_20140514_221144_inLine +BABEL_OP3_306_70110_20140514_221144_outLine +BABEL_OP3_306_72324_20140724_022916_inLine +BABEL_OP3_306_72324_20140724_022916_outLine +BABEL_OP3_306_72324_20140724_024048_inLine +BABEL_OP3_306_72324_20140724_024048_outLine +BABEL_OP3_306_73119_20140603_013443_inLine +BABEL_OP3_306_73119_20140603_013443_outLine +BABEL_OP3_306_74121_20140920_001224_inLine +BABEL_OP3_306_74121_20140920_001224_outLine +BABEL_OP3_306_74280_20140515_234933_inLine +BABEL_OP3_306_74280_20140515_234933_outLine +BABEL_OP3_306_78398_20140604_220522_inLine +BABEL_OP3_306_78398_20140604_220522_outLine +BABEL_OP3_306_78511_20141030_232402_inLine +BABEL_OP3_306_78511_20141030_232402_outLine +BABEL_OP3_306_80306_20140729_235651_inLine +BABEL_OP3_306_80306_20140729_235651_outLine +BABEL_OP3_306_81287_20141009_184932_inLine +BABEL_OP3_306_81287_20141009_184932_outLine +BABEL_OP3_306_82035_20140812_211933_inLine +BABEL_OP3_306_82035_20140812_211933_outLine +BABEL_OP3_306_82935_20141027_220108_inLine +BABEL_OP3_306_82935_20141027_220108_outLine +BABEL_OP3_306_82935_20141027_221034_inLine +BABEL_OP3_306_82935_20141027_221034_outLine +BABEL_OP3_306_83651_20140606_023153_inLine +BABEL_OP3_306_83651_20140606_023153_outLine +BABEL_OP3_306_84768_20150416_212057_inLine +BABEL_OP3_306_84768_20150416_212057_outLine +BABEL_OP3_306_85028_20141029_200629_inLine +BABEL_OP3_306_85028_20141029_200629_outLine +BABEL_OP3_306_85647_20140805_005301_inLine +BABEL_OP3_306_85647_20140805_005301_outLine +BABEL_OP3_306_86888_20140801_232454_inLine +BABEL_OP3_306_86888_20140801_232454_outLine +BABEL_OP3_306_89358_20141003_194649_inLine +BABEL_OP3_306_89358_20141003_194649_outLine +BABEL_OP3_306_90737_20140903_235501_inLine +BABEL_OP3_306_90737_20140903_235501_outLine +BABEL_OP3_306_91266_20150215_015545_inLine +BABEL_OP3_306_91266_20150215_015545_outLine +BABEL_OP3_306_91266_20150215_022001_inLine +BABEL_OP3_306_91266_20150215_022001_outLine +BABEL_OP3_306_92941_20140607_001711_inLine +BABEL_OP3_306_92941_20140607_001711_outLine +BABEL_OP3_306_92941_20140607_003034_inLine +BABEL_OP3_306_92941_20140607_003034_outLine +BABEL_OP3_306_93632_20141103_184555_inLine +BABEL_OP3_306_93632_20141103_184555_outLine +BABEL_OP3_306_93946_20141101_211743_inLine +BABEL_OP3_306_93946_20141101_211743_outLine +BABEL_OP3_306_93964_20140730_022556_inLine +BABEL_OP3_306_93964_20140730_022556_outLine +BABEL_OP3_306_94409_20141006_205245_inLine +BABEL_OP3_306_94409_20141006_205245_outLine +BABEL_OP3_306_95399_20140905_005504_inLine +BABEL_OP3_306_95399_20140905_005504_outLine +BABEL_OP3_306_97588_20140521_051503_inLine +BABEL_OP3_306_97588_20140521_051503_outLine +BABEL_OP3_306_99344_20140801_002154_inLine +BABEL_OP3_306_99344_20140801_002154_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/306-igbo/sub-train.untranscribed.list new file mode 100644 index 00000000000..7ca400d26e5 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP3_306_10188_20140511_001332_inLine +BABEL_OP3_306_10188_20140511_001332_outLine +BABEL_OP3_306_10313_20140523_024428_inLine +BABEL_OP3_306_10313_20140523_024428_outLine +BABEL_OP3_306_10319_20140522_015112_inLine +BABEL_OP3_306_10319_20140522_015112_outLine +BABEL_OP3_306_10416_20140802_195508_inLine +BABEL_OP3_306_10416_20140802_195508_outLine +BABEL_OP3_306_10974_20140805_011808_inLine +BABEL_OP3_306_10974_20140805_011808_outLine +BABEL_OP3_306_12036_20140604_193658_inLine +BABEL_OP3_306_12036_20140604_193658_outLine +BABEL_OP3_306_12242_20140601_233200_inLine +BABEL_OP3_306_12242_20140601_233200_outLine +BABEL_OP3_306_13324_20140625_222242_inLine +BABEL_OP3_306_13324_20140625_223418_inLine +BABEL_OP3_306_13561_20140802_043219_inLine +BABEL_OP3_306_13561_20140802_043219_outLine +BABEL_OP3_306_14141_20141223_040734_inLine +BABEL_OP3_306_14141_20141223_040734_outLine +BABEL_OP3_306_14229_20150304_204617_inLine +BABEL_OP3_306_14237_20140531_215051_inLine +BABEL_OP3_306_14237_20140531_215051_outLine +BABEL_OP3_306_14814_20140602_011013_inLine +BABEL_OP3_306_14814_20140602_011013_outLine +BABEL_OP3_306_15024_20140904_235714_inLine +BABEL_OP3_306_15024_20140904_235714_outLine +BABEL_OP3_306_15163_20141006_024649_inLine +BABEL_OP3_306_15163_20141006_024649_outLine +BABEL_OP3_306_15382_20140730_010226_inLine +BABEL_OP3_306_15382_20140730_010226_outLine +BABEL_OP3_306_16184_20140519_222131_inLine +BABEL_OP3_306_16184_20140519_222131_outLine +BABEL_OP3_306_16351_20140524_195830_inLine +BABEL_OP3_306_16351_20140524_195830_outLine +BABEL_OP3_306_16787_20140802_223754_inLine +BABEL_OP3_306_16787_20140802_223754_outLine +BABEL_OP3_306_16839_20141030_003721_inLine +BABEL_OP3_306_16839_20141030_003721_outLine +BABEL_OP3_306_16938_20140809_233743_inLine +BABEL_OP3_306_17472_20150318_193931_inLine +BABEL_OP3_306_17472_20150318_193931_outLine +BABEL_OP3_306_17511_20150116_221327_inLine +BABEL_OP3_306_17511_20150116_221327_outLine +BABEL_OP3_306_17881_20150304_004415_inLine +BABEL_OP3_306_17881_20150304_004415_outLine +BABEL_OP3_306_18280_20150223_175908_inLine +BABEL_OP3_306_18280_20150223_175908_outLine +BABEL_OP3_306_18370_20150223_190452_inLine +BABEL_OP3_306_18370_20150223_190452_outLine +BABEL_OP3_306_18863_20141103_232200_inLine +BABEL_OP3_306_18863_20141103_232200_outLine +BABEL_OP3_306_19767_20150317_173511_inLine +BABEL_OP3_306_19767_20150317_173511_outLine +BABEL_OP3_306_21244_20150303_021843_inLine +BABEL_OP3_306_21244_20150303_021843_outLine +BABEL_OP3_306_21892_20141031_004104_inLine +BABEL_OP3_306_21892_20141031_004104_outLine +BABEL_OP3_306_22021_20150421_200500_inLine +BABEL_OP3_306_22021_20150421_200500_outLine +BABEL_OP3_306_22494_20141004_000311_inLine +BABEL_OP3_306_22494_20141004_000311_outLine +BABEL_OP3_306_22643_20140526_192640_inLine +BABEL_OP3_306_22643_20140526_192640_outLine +BABEL_OP3_306_23355_20150306_040413_inLine +BABEL_OP3_306_23355_20150306_040413_outLine +BABEL_OP3_306_23395_20140815_012335_inLine +BABEL_OP3_306_23395_20140815_012335_outLine +BABEL_OP3_306_24270_20141009_010150_inLine +BABEL_OP3_306_24270_20141009_010150_outLine +BABEL_OP3_306_24679_20140521_043344_inLine +BABEL_OP3_306_24679_20140521_043344_outLine +BABEL_OP3_306_25767_20140603_022935_inLine +BABEL_OP3_306_25767_20140603_022935_outLine +BABEL_OP3_306_26388_20140605_212825_inLine +BABEL_OP3_306_26388_20140605_212825_outLine +BABEL_OP3_306_26574_20141028_193409_inLine +BABEL_OP3_306_26574_20141028_193409_outLine +BABEL_OP3_306_26836_20140606_012758_inLine +BABEL_OP3_306_26836_20140606_012758_outLine +BABEL_OP3_306_26869_20150311_010234_inLine +BABEL_OP3_306_26869_20150311_010234_outLine +BABEL_OP3_306_27014_20140525_005218_inLine +BABEL_OP3_306_27014_20140525_005218_outLine +BABEL_OP3_306_27367_20140524_212214_inLine +BABEL_OP3_306_27367_20140524_212214_outLine +BABEL_OP3_306_30250_20140520_201955_inLine +BABEL_OP3_306_30250_20140520_201955_outLine +BABEL_OP3_306_30395_20140620_010240_inLine +BABEL_OP3_306_30395_20140620_010240_outLine +BABEL_OP3_306_30395_20140620_011044_inLine +BABEL_OP3_306_30395_20140620_011044_outLine +BABEL_OP3_306_31074_20150120_001644_inLine +BABEL_OP3_306_31074_20150120_001644_outLine +BABEL_OP3_306_31184_20141006_222942_inLine +BABEL_OP3_306_31184_20141006_222942_outLine +BABEL_OP3_306_31992_20140714_213448_inLine +BABEL_OP3_306_31992_20140714_213448_outLine +BABEL_OP3_306_32169_20150311_001538_inLine +BABEL_OP3_306_32169_20150311_001538_outLine +BABEL_OP3_306_32832_20141027_221739_inLine +BABEL_OP3_306_32832_20141027_221739_outLine +BABEL_OP3_306_33251_20140725_025307_inLine +BABEL_OP3_306_33251_20140725_025307_outLine +BABEL_OP3_306_33476_20140730_232844_inLine +BABEL_OP3_306_33476_20140730_232844_outLine +BABEL_OP3_306_33951_20140725_061646_inLine +BABEL_OP3_306_33951_20140725_061646_outLine +BABEL_OP3_306_34564_20141026_225715_inLine +BABEL_OP3_306_34564_20141026_225715_outLine +BABEL_OP3_306_34564_20141026_230434_inLine +BABEL_OP3_306_34564_20141026_230434_outLine +BABEL_OP3_306_36059_20141223_034056_inLine +BABEL_OP3_306_36059_20141223_034056_outLine +BABEL_OP3_306_36147_20150215_051814_inLine +BABEL_OP3_306_36147_20150215_051814_outLine +BABEL_OP3_306_36364_20150123_012425_inLine +BABEL_OP3_306_36364_20150123_012425_outLine +BABEL_OP3_306_36505_20141027_211503_inLine +BABEL_OP3_306_36505_20141027_211503_outLine +BABEL_OP3_306_37007_20140527_013428_inLine +BABEL_OP3_306_37007_20140527_013428_outLine +BABEL_OP3_306_38323_20150418_203354_inLine +BABEL_OP3_306_38323_20150418_203354_outLine +BABEL_OP3_306_38554_20140517_054801_inLine +BABEL_OP3_306_38554_20140517_054801_outLine +BABEL_OP3_306_38554_20140517_055631_inLine +BABEL_OP3_306_38554_20140517_055631_outLine +BABEL_OP3_306_39555_20141030_012732_inLine +BABEL_OP3_306_39555_20141030_012732_outLine +BABEL_OP3_306_40330_20150418_213611_inLine +BABEL_OP3_306_40330_20150418_213611_outLine +BABEL_OP3_306_40713_20140605_205025_inLine +BABEL_OP3_306_40713_20140605_205025_outLine +BABEL_OP3_306_41233_20141029_235039_inLine +BABEL_OP3_306_41233_20141029_235039_outLine +BABEL_OP3_306_41233_20141030_004714_inLine +BABEL_OP3_306_41233_20141030_004714_outLine +BABEL_OP3_306_41469_20150405_025457_inLine +BABEL_OP3_306_41469_20150405_025457_outLine +BABEL_OP3_306_41592_20140731_180118_inLine +BABEL_OP3_306_41592_20140731_180118_outLine +BABEL_OP3_306_41920_20140531_032613_inLine +BABEL_OP3_306_41920_20140531_032613_outLine +BABEL_OP3_306_42126_20140528_024621_inLine +BABEL_OP3_306_42126_20140528_024621_outLine +BABEL_OP3_306_42231_20141009_191123_inLine +BABEL_OP3_306_42231_20141009_191123_outLine +BABEL_OP3_306_43286_20140522_203724_inLine +BABEL_OP3_306_43286_20140522_203724_outLine +BABEL_OP3_306_43388_20140802_221518_inLine +BABEL_OP3_306_43388_20140802_221518_outLine +BABEL_OP3_306_43388_20140802_222040_inLine +BABEL_OP3_306_43388_20140802_222040_outLine +BABEL_OP3_306_43388_20140802_222715_inLine +BABEL_OP3_306_43388_20140802_222715_outLine +BABEL_OP3_306_43784_20140608_022047_inLine +BABEL_OP3_306_43784_20140608_022047_outLine +BABEL_OP3_306_46066_20141027_233339_inLine +BABEL_OP3_306_46066_20141027_233339_outLine +BABEL_OP3_306_46310_20140602_230134_inLine +BABEL_OP3_306_46310_20140602_230134_outLine +BABEL_OP3_306_46550_20140605_222807_inLine +BABEL_OP3_306_46550_20140605_222807_outLine +BABEL_OP3_306_46625_20140606_202920_inLine +BABEL_OP3_306_46625_20140606_202920_outLine +BABEL_OP3_306_46757_20140920_030716_inLine +BABEL_OP3_306_46757_20140920_030716_outLine +BABEL_OP3_306_46905_20140528_215718_inLine +BABEL_OP3_306_46905_20140528_215718_outLine +BABEL_OP3_306_47923_20150131_000157_inLine +BABEL_OP3_306_47923_20150131_000157_outLine +BABEL_OP3_306_49502_20150403_222234_inLine +BABEL_OP3_306_49502_20150403_222234_outLine +BABEL_OP3_306_50565_20140521_040110_inLine +BABEL_OP3_306_50565_20140521_040110_outLine +BABEL_OP3_306_51156_20150116_191446_inLine +BABEL_OP3_306_51156_20150116_191446_outLine +BABEL_OP3_306_52058_20140526_231450_inLine +BABEL_OP3_306_52058_20140526_231450_outLine +BABEL_OP3_306_52265_20150320_030911_inLine +BABEL_OP3_306_52265_20150320_030911_outLine +BABEL_OP3_306_52932_20140608_003800_inLine +BABEL_OP3_306_52932_20140608_003800_outLine +BABEL_OP3_306_53206_20140523_191711_inLine +BABEL_OP3_306_53206_20140523_191711_outLine +BABEL_OP3_306_53758_20150227_224132_inLine +BABEL_OP3_306_53758_20150227_224132_outLine +BABEL_OP3_306_54160_20140602_201949_inLine +BABEL_OP3_306_54160_20140602_201949_outLine +BABEL_OP3_306_54594_20140528_232952_inLine +BABEL_OP3_306_54594_20140528_232952_outLine +BABEL_OP3_306_54697_20141027_014534_inLine +BABEL_OP3_306_54697_20141027_014534_outLine +BABEL_OP3_306_54697_20141027_015651_inLine +BABEL_OP3_306_54697_20141027_015651_outLine +BABEL_OP3_306_56306_20141111_210052_inLine +BABEL_OP3_306_56306_20141111_210052_outLine +BABEL_OP3_306_56523_20140729_211409_inLine +BABEL_OP3_306_56523_20140729_211409_outLine +BABEL_OP3_306_56826_20141005_005430_inLine +BABEL_OP3_306_56826_20141005_005430_outLine +BABEL_OP3_306_57065_20140813_021110_inLine +BABEL_OP3_306_57065_20140813_021110_outLine +BABEL_OP3_306_57654_20140622_013309_inLine +BABEL_OP3_306_57654_20140622_013309_outLine +BABEL_OP3_306_58145_20140724_045437_inLine +BABEL_OP3_306_58145_20140724_045437_outLine +BABEL_OP3_306_58489_20141026_005336_inLine +BABEL_OP3_306_58489_20141026_005336_outLine +BABEL_OP3_306_59078_20141009_004020_inLine +BABEL_OP3_306_59078_20141009_004020_outLine +BABEL_OP3_306_59509_20140805_224009_inLine +BABEL_OP3_306_59509_20140805_224009_outLine +BABEL_OP3_306_59509_20140805_224625_inLine +BABEL_OP3_306_59509_20140805_224625_outLine +BABEL_OP3_306_60310_20141004_230555_inLine +BABEL_OP3_306_60310_20141004_230555_outLine +BABEL_OP3_306_60352_20140806_021626_inLine +BABEL_OP3_306_60352_20140806_021626_outLine +BABEL_OP3_306_61219_20140603_003614_inLine +BABEL_OP3_306_61219_20140603_003614_outLine +BABEL_OP3_306_61225_20140515_013438_inLine +BABEL_OP3_306_61225_20140515_013438_outLine +BABEL_OP3_306_61435_20141029_014344_inLine +BABEL_OP3_306_61435_20141029_014344_outLine +BABEL_OP3_306_61438_20140527_213221_inLine +BABEL_OP3_306_61438_20140527_213221_outLine +BABEL_OP3_306_61888_20141104_013244_inLine +BABEL_OP3_306_61888_20141104_013244_outLine +BABEL_OP3_306_63220_20140811_231222_inLine +BABEL_OP3_306_63220_20140811_231222_outLine +BABEL_OP3_306_63766_20150226_043203_inLine +BABEL_OP3_306_63766_20150226_043203_outLine +BABEL_OP3_306_64350_20140621_190438_inLine +BABEL_OP3_306_64350_20140621_190438_outLine +BABEL_OP3_306_64398_20140731_012313_inLine +BABEL_OP3_306_64398_20140731_012313_outLine +BABEL_OP3_306_64796_20150407_014947_inLine +BABEL_OP3_306_64796_20150407_014947_outLine +BABEL_OP3_306_65723_20140604_235722_inLine +BABEL_OP3_306_65723_20140604_235722_outLine +BABEL_OP3_306_66350_20150225_020558_inLine +BABEL_OP3_306_66350_20150225_020558_outLine +BABEL_OP3_306_66916_20140522_002931_inLine +BABEL_OP3_306_66916_20140522_002931_outLine +BABEL_OP3_306_66967_20140606_235110_inLine +BABEL_OP3_306_66967_20140606_235110_outLine +BABEL_OP3_306_67373_20140624_224214_inLine +BABEL_OP3_306_67373_20140624_224214_outLine +BABEL_OP3_306_67373_20140624_225314_inLine +BABEL_OP3_306_67373_20140624_225314_outLine +BABEL_OP3_306_67401_20140815_032242_inLine +BABEL_OP3_306_67401_20140815_032242_outLine +BABEL_OP3_306_67592_20141028_032006_inLine +BABEL_OP3_306_67592_20141028_032006_outLine +BABEL_OP3_306_67726_20140523_010156_inLine +BABEL_OP3_306_67726_20140523_010156_outLine +BABEL_OP3_306_67999_20141031_215535_inLine +BABEL_OP3_306_67999_20141031_215535_outLine +BABEL_OP3_306_68059_20140802_023600_inLine +BABEL_OP3_306_68059_20140802_023600_outLine +BABEL_OP3_306_68068_20141004_180553_inLine +BABEL_OP3_306_68068_20141004_180553_outLine +BABEL_OP3_306_69578_20140729_234354_inLine +BABEL_OP3_306_69578_20140729_234354_outLine +BABEL_OP3_306_70221_20140801_213304_inLine +BABEL_OP3_306_70221_20140801_213304_outLine +BABEL_OP3_306_70293_20150216_013540_inLine +BABEL_OP3_306_70293_20150216_013540_outLine +BABEL_OP3_306_70639_20140528_035113_inLine +BABEL_OP3_306_70639_20140528_035113_outLine +BABEL_OP3_306_70794_20140520_000549_inLine +BABEL_OP3_306_70794_20140520_000549_outLine +BABEL_OP3_306_71401_20150221_015039_inLine +BABEL_OP3_306_71401_20150221_015039_outLine +BABEL_OP3_306_71976_20140529_033557_inLine +BABEL_OP3_306_71976_20140529_033557_outLine +BABEL_OP3_306_72587_20140811_040036_inLine +BABEL_OP3_306_72587_20140811_040036_outLine +BABEL_OP3_306_72844_20140518_010610_inLine +BABEL_OP3_306_72844_20140518_010610_outLine +BABEL_OP3_306_73511_20141005_215627_inLine +BABEL_OP3_306_73511_20141005_215627_outLine +BABEL_OP3_306_75505_20140512_201003_inLine +BABEL_OP3_306_75505_20140512_201003_outLine +BABEL_OP3_306_77146_20140521_002843_inLine +BABEL_OP3_306_77146_20140521_002843_outLine +BABEL_OP3_306_77225_20150327_022842_inLine +BABEL_OP3_306_77225_20150327_022842_outLine +BABEL_OP3_306_78194_20140611_232911_inLine +BABEL_OP3_306_78194_20140611_232911_outLine +BABEL_OP3_306_78254_20140602_034556_inLine +BABEL_OP3_306_78254_20140602_034556_outLine +BABEL_OP3_306_78604_20140625_020654_inLine +BABEL_OP3_306_78604_20140625_020654_outLine +BABEL_OP3_306_79139_20140725_061931_inLine +BABEL_OP3_306_79139_20140725_061931_outLine +BABEL_OP3_306_80383_20150325_015939_inLine +BABEL_OP3_306_80383_20150325_015939_outLine +BABEL_OP3_306_80781_20140729_014618_inLine +BABEL_OP3_306_80781_20140729_014618_outLine +BABEL_OP3_306_81427_20140723_232926_inLine +BABEL_OP3_306_81427_20140723_232926_outLine +BABEL_OP3_306_81581_20140529_022004_inLine +BABEL_OP3_306_81581_20140529_022004_outLine +BABEL_OP3_306_82303_20150313_044844_inLine +BABEL_OP3_306_82303_20150313_044844_outLine +BABEL_OP3_306_82637_20140514_233142_inLine +BABEL_OP3_306_82637_20140514_233142_outLine +BABEL_OP3_306_84430_20150331_191720_inLine +BABEL_OP3_306_84430_20150331_191720_outLine +BABEL_OP3_306_84609_20150401_222657_inLine +BABEL_OP3_306_84609_20150401_222657_outLine +BABEL_OP3_306_84611_20140605_003243_inLine +BABEL_OP3_306_84611_20140605_003243_outLine +BABEL_OP3_306_84815_20141101_002538_inLine +BABEL_OP3_306_84815_20141101_002538_outLine +BABEL_OP3_306_86191_20140603_042134_inLine +BABEL_OP3_306_86191_20140603_042134_outLine +BABEL_OP3_306_86433_20140816_072513_inLine +BABEL_OP3_306_86433_20140816_072513_outLine +BABEL_OP3_306_86472_20140730_223950_inLine +BABEL_OP3_306_86472_20140730_223950_outLine +BABEL_OP3_306_86845_20140524_192542_inLine +BABEL_OP3_306_86845_20140524_192542_outLine +BABEL_OP3_306_86952_20140531_040557_inLine +BABEL_OP3_306_86952_20140531_040557_outLine +BABEL_OP3_306_87179_20141029_021040_inLine +BABEL_OP3_306_87179_20141029_021040_outLine +BABEL_OP3_306_87353_20150327_191436_inLine +BABEL_OP3_306_87353_20150327_191436_outLine +BABEL_OP3_306_87884_20141101_223809_inLine +BABEL_OP3_306_87884_20141101_223809_outLine +BABEL_OP3_306_88601_20141003_171755_inLine +BABEL_OP3_306_88601_20141003_171755_outLine +BABEL_OP3_306_88661_20141005_225341_inLine +BABEL_OP3_306_88661_20141005_225341_outLine +BABEL_OP3_306_89045_20140517_213454_inLine +BABEL_OP3_306_89045_20140517_213454_outLine +BABEL_OP3_306_89059_20141104_210223_inLine +BABEL_OP3_306_89059_20141104_210223_outLine +BABEL_OP3_306_89059_20141104_211433_inLine +BABEL_OP3_306_89059_20141104_211433_outLine +BABEL_OP3_306_89457_20140730_002520_inLine +BABEL_OP3_306_89457_20140730_002520_outLine +BABEL_OP3_306_90440_20150312_002806_inLine +BABEL_OP3_306_90440_20150312_002806_outLine +BABEL_OP3_306_91463_20140729_001809_inLine +BABEL_OP3_306_91463_20140729_001809_outLine +BABEL_OP3_306_91825_20150408_204309_inLine +BABEL_OP3_306_91825_20150408_204309_outLine +BABEL_OP3_306_91891_20141009_203853_inLine +BABEL_OP3_306_91891_20141009_203853_outLine +BABEL_OP3_306_92440_20150326_232645_inLine +BABEL_OP3_306_92440_20150326_232645_outLine +BABEL_OP3_306_92509_20140521_023136_inLine +BABEL_OP3_306_92509_20140521_023136_outLine +BABEL_OP3_306_92809_20150419_011906_inLine +BABEL_OP3_306_92809_20150419_011906_outLine +BABEL_OP3_306_94253_20140606_032103_inLine +BABEL_OP3_306_94253_20140606_032103_outLine +BABEL_OP3_306_94869_20140515_230712_inLine +BABEL_OP3_306_94869_20140515_230712_outLine +BABEL_OP3_306_94978_20141115_234420_inLine +BABEL_OP3_306_94978_20141115_234420_outLine +BABEL_OP3_306_95124_20150416_012109_inLine +BABEL_OP3_306_95124_20150416_012109_outLine +BABEL_OP3_306_95598_20140509_043406_inLine +BABEL_OP3_306_95598_20140509_043406_outLine +BABEL_OP3_306_96730_20141028_230035_inLine +BABEL_OP3_306_96730_20141028_230035_outLine +BABEL_OP3_306_96820_20140802_051525_inLine +BABEL_OP3_306_96820_20140802_051525_outLine +BABEL_OP3_306_97570_20140801_224422_inLine +BABEL_OP3_306_97570_20140801_224422_outLine +BABEL_OP3_306_98311_20140604_201838_inLine +BABEL_OP3_306_98311_20140604_201838_outLine +BABEL_OP3_306_99920_20140604_212052_inLine +BABEL_OP3_306_99920_20140604_212052_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/training.list b/egs/babel/s5d/conf/lists/306-igbo/training.list new file mode 100644 index 00000000000..0504de58fb1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/training.list @@ -0,0 +1,512 @@ +BABEL_OP3_306_10188_20140511_001332_inLine +BABEL_OP3_306_10188_20140511_001332_outLine +BABEL_OP3_306_10313_20140523_024428_inLine +BABEL_OP3_306_10313_20140523_024428_outLine +BABEL_OP3_306_10319_20140522_015112_inLine +BABEL_OP3_306_10319_20140522_015112_outLine +BABEL_OP3_306_10416_20140802_195508_inLine +BABEL_OP3_306_10416_20140802_195508_outLine +BABEL_OP3_306_10524_20150307_210859_inLine +BABEL_OP3_306_10524_20150307_210859_outLine +BABEL_OP3_306_10974_20140805_011808_inLine +BABEL_OP3_306_10974_20140805_011808_outLine +BABEL_OP3_306_12036_20140604_193658_inLine +BABEL_OP3_306_12036_20140604_193658_outLine +BABEL_OP3_306_12242_20140601_233200_inLine +BABEL_OP3_306_12242_20140601_233200_outLine +BABEL_OP3_306_13324_20140625_222242_inLine +BABEL_OP3_306_13324_20140625_223418_inLine +BABEL_OP3_306_13561_20140802_043219_inLine +BABEL_OP3_306_13561_20140802_043219_outLine +BABEL_OP3_306_14141_20141223_040734_inLine +BABEL_OP3_306_14141_20141223_040734_outLine +BABEL_OP3_306_14229_20150304_204617_inLine +BABEL_OP3_306_14237_20140531_215051_inLine +BABEL_OP3_306_14237_20140531_215051_outLine +BABEL_OP3_306_14575_20140530_194144_inLine +BABEL_OP3_306_14575_20140530_194144_outLine +BABEL_OP3_306_14814_20140602_011013_inLine +BABEL_OP3_306_14814_20140602_011013_outLine +BABEL_OP3_306_15024_20140904_235714_inLine +BABEL_OP3_306_15024_20140904_235714_outLine +BABEL_OP3_306_15163_20141006_024649_inLine +BABEL_OP3_306_15163_20141006_024649_outLine +BABEL_OP3_306_15382_20140730_010226_inLine +BABEL_OP3_306_15382_20140730_010226_outLine +BABEL_OP3_306_15926_20140815_011013_inLine +BABEL_OP3_306_16184_20140519_222131_inLine +BABEL_OP3_306_16184_20140519_222131_outLine +BABEL_OP3_306_16351_20140524_195830_inLine +BABEL_OP3_306_16351_20140524_195830_outLine +BABEL_OP3_306_16787_20140802_223754_inLine +BABEL_OP3_306_16787_20140802_223754_outLine +BABEL_OP3_306_16839_20141030_003721_inLine +BABEL_OP3_306_16839_20141030_003721_outLine +BABEL_OP3_306_16938_20140809_233743_inLine +BABEL_OP3_306_17472_20150318_193931_inLine +BABEL_OP3_306_17472_20150318_193931_outLine +BABEL_OP3_306_17511_20150116_221327_inLine +BABEL_OP3_306_17511_20150116_221327_outLine +BABEL_OP3_306_17881_20150304_004415_inLine +BABEL_OP3_306_17881_20150304_004415_outLine +BABEL_OP3_306_18280_20150223_175908_inLine +BABEL_OP3_306_18280_20150223_175908_outLine +BABEL_OP3_306_18370_20150223_190452_inLine +BABEL_OP3_306_18370_20150223_190452_outLine +BABEL_OP3_306_18490_20150402_010442_inLine +BABEL_OP3_306_18490_20150402_010442_outLine +BABEL_OP3_306_18863_20141103_232200_inLine +BABEL_OP3_306_18863_20141103_232200_outLine +BABEL_OP3_306_19444_20150214_045709_inLine +BABEL_OP3_306_19444_20150214_045709_outLine +BABEL_OP3_306_19767_20150317_173511_inLine +BABEL_OP3_306_19767_20150317_173511_outLine +BABEL_OP3_306_20721_20140529_000851_inLine +BABEL_OP3_306_20721_20140529_000851_outLine +BABEL_OP3_306_21244_20150303_021843_inLine +BABEL_OP3_306_21244_20150303_021843_outLine +BABEL_OP3_306_21581_20140724_022000_inLine +BABEL_OP3_306_21581_20140724_022000_outLine +BABEL_OP3_306_21892_20141031_004104_inLine +BABEL_OP3_306_21892_20141031_004104_outLine +BABEL_OP3_306_22021_20150421_200500_inLine +BABEL_OP3_306_22021_20150421_200500_outLine +BABEL_OP3_306_22494_20141004_000311_inLine +BABEL_OP3_306_22494_20141004_000311_outLine +BABEL_OP3_306_22643_20140526_192640_inLine +BABEL_OP3_306_22643_20140526_192640_outLine +BABEL_OP3_306_23355_20150306_040413_inLine +BABEL_OP3_306_23355_20150306_040413_outLine +BABEL_OP3_306_23395_20140815_012335_inLine +BABEL_OP3_306_23395_20140815_012335_outLine +BABEL_OP3_306_24037_20140524_232238_inLine +BABEL_OP3_306_24037_20140524_232238_outLine +BABEL_OP3_306_24270_20141009_010150_inLine +BABEL_OP3_306_24270_20141009_010150_outLine +BABEL_OP3_306_24679_20140521_043344_inLine +BABEL_OP3_306_24679_20140521_043344_outLine +BABEL_OP3_306_25767_20140603_022935_inLine +BABEL_OP3_306_25767_20140603_022935_outLine +BABEL_OP3_306_26074_20140815_015119_inLine +BABEL_OP3_306_26074_20140815_015119_outLine +BABEL_OP3_306_26388_20140605_212825_inLine +BABEL_OP3_306_26388_20140605_212825_outLine +BABEL_OP3_306_26478_20150317_053650_inLine +BABEL_OP3_306_26478_20150317_053650_outLine +BABEL_OP3_306_26574_20141028_193409_inLine +BABEL_OP3_306_26574_20141028_193409_outLine +BABEL_OP3_306_26836_20140606_012758_inLine +BABEL_OP3_306_26836_20140606_012758_outLine +BABEL_OP3_306_26869_20150311_010234_inLine +BABEL_OP3_306_26869_20150311_010234_outLine +BABEL_OP3_306_27014_20140525_005218_inLine +BABEL_OP3_306_27014_20140525_005218_outLine +BABEL_OP3_306_27218_20140625_013736_inLine +BABEL_OP3_306_27367_20140524_212214_inLine +BABEL_OP3_306_27367_20140524_212214_outLine +BABEL_OP3_306_28538_20140919_192901_inLine +BABEL_OP3_306_28538_20140919_192901_outLine +BABEL_OP3_306_28945_20140610_222125_inLine +BABEL_OP3_306_28945_20140610_222125_outLine +BABEL_OP3_306_30250_20140520_201955_inLine +BABEL_OP3_306_30250_20140520_201955_outLine +BABEL_OP3_306_30395_20140620_010240_inLine +BABEL_OP3_306_30395_20140620_010240_outLine +BABEL_OP3_306_30395_20140620_011044_inLine +BABEL_OP3_306_30395_20140620_011044_outLine +BABEL_OP3_306_31074_20150120_001644_inLine +BABEL_OP3_306_31074_20150120_001644_outLine +BABEL_OP3_306_31182_20141028_015316_inLine +BABEL_OP3_306_31182_20141028_015316_outLine +BABEL_OP3_306_31184_20141006_222942_inLine +BABEL_OP3_306_31184_20141006_222942_outLine +BABEL_OP3_306_31346_20141029_183248_inLine +BABEL_OP3_306_31346_20141029_183248_outLine +BABEL_OP3_306_31992_20140714_213448_inLine +BABEL_OP3_306_31992_20140714_213448_outLine +BABEL_OP3_306_32169_20150311_001538_inLine +BABEL_OP3_306_32169_20150311_001538_outLine +BABEL_OP3_306_32832_20141027_221739_inLine +BABEL_OP3_306_32832_20141027_221739_outLine +BABEL_OP3_306_33251_20140725_025307_inLine +BABEL_OP3_306_33251_20140725_025307_outLine +BABEL_OP3_306_33476_20140730_232844_inLine +BABEL_OP3_306_33476_20140730_232844_outLine +BABEL_OP3_306_33840_20141031_013533_inLine +BABEL_OP3_306_33840_20141031_013533_outLine +BABEL_OP3_306_33840_20141031_014151_inLine +BABEL_OP3_306_33840_20141031_014151_outLine +BABEL_OP3_306_33951_20140725_061646_inLine +BABEL_OP3_306_33951_20140725_061646_outLine +BABEL_OP3_306_34564_20141026_225715_inLine +BABEL_OP3_306_34564_20141026_225715_outLine +BABEL_OP3_306_34564_20141026_230434_inLine +BABEL_OP3_306_34564_20141026_230434_outLine +BABEL_OP3_306_36059_20141223_034056_inLine +BABEL_OP3_306_36059_20141223_034056_outLine +BABEL_OP3_306_36147_20150215_051814_inLine +BABEL_OP3_306_36147_20150215_051814_outLine +BABEL_OP3_306_36293_20140521_011821_inLine +BABEL_OP3_306_36293_20140521_011821_outLine +BABEL_OP3_306_36364_20150123_012425_inLine +BABEL_OP3_306_36364_20150123_012425_outLine +BABEL_OP3_306_36505_20141027_211503_inLine +BABEL_OP3_306_36505_20141027_211503_outLine +BABEL_OP3_306_37007_20140527_013428_inLine +BABEL_OP3_306_37007_20140527_013428_outLine +BABEL_OP3_306_38323_20150418_203354_inLine +BABEL_OP3_306_38323_20150418_203354_outLine +BABEL_OP3_306_38554_20140517_054801_inLine +BABEL_OP3_306_38554_20140517_054801_outLine +BABEL_OP3_306_38554_20140517_055631_inLine +BABEL_OP3_306_38554_20140517_055631_outLine +BABEL_OP3_306_39555_20141030_012732_inLine +BABEL_OP3_306_39555_20141030_012732_outLine +BABEL_OP3_306_40330_20150418_213611_inLine +BABEL_OP3_306_40330_20150418_213611_outLine +BABEL_OP3_306_40686_20140523_014206_inLine +BABEL_OP3_306_40686_20140523_014206_outLine +BABEL_OP3_306_40713_20140605_205025_inLine +BABEL_OP3_306_40713_20140605_205025_outLine +BABEL_OP3_306_41233_20141029_235039_inLine +BABEL_OP3_306_41233_20141029_235039_outLine +BABEL_OP3_306_41233_20141030_004714_inLine +BABEL_OP3_306_41233_20141030_004714_outLine +BABEL_OP3_306_41469_20150405_025457_inLine +BABEL_OP3_306_41469_20150405_025457_outLine +BABEL_OP3_306_41592_20140731_180118_inLine +BABEL_OP3_306_41592_20140731_180118_outLine +BABEL_OP3_306_41920_20140531_032613_inLine +BABEL_OP3_306_41920_20140531_032613_outLine +BABEL_OP3_306_42126_20140528_024621_inLine +BABEL_OP3_306_42126_20140528_024621_outLine +BABEL_OP3_306_42231_20141009_191123_inLine +BABEL_OP3_306_42231_20141009_191123_outLine +BABEL_OP3_306_43286_20140522_203724_inLine +BABEL_OP3_306_43286_20140522_203724_outLine +BABEL_OP3_306_43388_20140802_221518_inLine +BABEL_OP3_306_43388_20140802_221518_outLine +BABEL_OP3_306_43388_20140802_222040_inLine +BABEL_OP3_306_43388_20140802_222040_outLine +BABEL_OP3_306_43388_20140802_222715_inLine +BABEL_OP3_306_43388_20140802_222715_outLine +BABEL_OP3_306_43784_20140608_022047_inLine +BABEL_OP3_306_43784_20140608_022047_outLine +BABEL_OP3_306_44709_20140728_212605_inLine +BABEL_OP3_306_44709_20140728_212605_outLine +BABEL_OP3_306_46066_20141027_233339_inLine +BABEL_OP3_306_46066_20141027_233339_outLine +BABEL_OP3_306_46310_20140602_230134_inLine +BABEL_OP3_306_46310_20140602_230134_outLine +BABEL_OP3_306_46550_20140605_222807_inLine +BABEL_OP3_306_46550_20140605_222807_outLine +BABEL_OP3_306_46625_20140606_202920_inLine +BABEL_OP3_306_46625_20140606_202920_outLine +BABEL_OP3_306_46757_20140920_030716_inLine +BABEL_OP3_306_46757_20140920_030716_outLine +BABEL_OP3_306_46905_20140528_215718_inLine +BABEL_OP3_306_46905_20140528_215718_outLine +BABEL_OP3_306_47923_20150131_000157_inLine +BABEL_OP3_306_47923_20150131_000157_outLine +BABEL_OP3_306_48610_20140604_003825_inLine +BABEL_OP3_306_48610_20140604_003825_outLine +BABEL_OP3_306_49437_20141029_030600_inLine +BABEL_OP3_306_49437_20141029_030600_outLine +BABEL_OP3_306_49502_20150403_222234_inLine +BABEL_OP3_306_49502_20150403_222234_outLine +BABEL_OP3_306_50175_20150402_210041_inLine +BABEL_OP3_306_50175_20150402_210041_outLine +BABEL_OP3_306_50565_20140521_040110_inLine +BABEL_OP3_306_50565_20140521_040110_outLine +BABEL_OP3_306_50962_20140605_232213_inLine +BABEL_OP3_306_50962_20140605_232213_outLine +BABEL_OP3_306_51156_20150116_191446_inLine +BABEL_OP3_306_51156_20150116_191446_outLine +BABEL_OP3_306_52058_20140526_231450_inLine +BABEL_OP3_306_52058_20140526_231450_outLine +BABEL_OP3_306_52265_20150320_030911_inLine +BABEL_OP3_306_52265_20150320_030911_outLine +BABEL_OP3_306_52932_20140608_003800_inLine +BABEL_OP3_306_52932_20140608_003800_outLine +BABEL_OP3_306_53206_20140523_191711_inLine +BABEL_OP3_306_53206_20140523_191711_outLine +BABEL_OP3_306_53758_20150227_224132_inLine +BABEL_OP3_306_53758_20150227_224132_outLine +BABEL_OP3_306_54160_20140602_201949_inLine +BABEL_OP3_306_54160_20140602_201949_outLine +BABEL_OP3_306_54594_20140528_232952_inLine +BABEL_OP3_306_54594_20140528_232952_outLine +BABEL_OP3_306_54697_20141027_014534_inLine +BABEL_OP3_306_54697_20141027_014534_outLine +BABEL_OP3_306_54697_20141027_015651_inLine +BABEL_OP3_306_54697_20141027_015651_outLine +BABEL_OP3_306_55818_20140603_031605_inLine +BABEL_OP3_306_55818_20140603_031605_outLine +BABEL_OP3_306_55902_20150313_043244_inLine +BABEL_OP3_306_55902_20150313_043244_outLine +BABEL_OP3_306_55968_20140515_005800_inLine +BABEL_OP3_306_55968_20140515_005800_outLine +BABEL_OP3_306_56306_20141111_210052_inLine +BABEL_OP3_306_56306_20141111_210052_outLine +BABEL_OP3_306_56523_20140729_211409_inLine +BABEL_OP3_306_56523_20140729_211409_outLine +BABEL_OP3_306_56826_20141005_005430_inLine +BABEL_OP3_306_56826_20141005_005430_outLine +BABEL_OP3_306_56925_20150214_231609_inLine +BABEL_OP3_306_56925_20150214_231609_outLine +BABEL_OP3_306_57065_20140813_021110_inLine +BABEL_OP3_306_57065_20140813_021110_outLine +BABEL_OP3_306_57654_20140622_013309_inLine +BABEL_OP3_306_57654_20140622_013309_outLine +BABEL_OP3_306_58145_20140724_045437_inLine +BABEL_OP3_306_58145_20140724_045437_outLine +BABEL_OP3_306_58489_20141026_005336_inLine +BABEL_OP3_306_58489_20141026_005336_outLine +BABEL_OP3_306_59078_20141009_004020_inLine +BABEL_OP3_306_59078_20141009_004020_outLine +BABEL_OP3_306_59509_20140805_224009_inLine +BABEL_OP3_306_59509_20140805_224009_outLine +BABEL_OP3_306_59509_20140805_224625_inLine +BABEL_OP3_306_59509_20140805_224625_outLine +BABEL_OP3_306_59898_20150411_024935_inLine +BABEL_OP3_306_59898_20150411_024935_outLine +BABEL_OP3_306_60310_20141004_230555_inLine +BABEL_OP3_306_60310_20141004_230555_outLine +BABEL_OP3_306_60352_20140806_021626_inLine +BABEL_OP3_306_60352_20140806_021626_outLine +BABEL_OP3_306_61219_20140603_003614_inLine +BABEL_OP3_306_61219_20140603_003614_outLine +BABEL_OP3_306_61225_20140515_013438_inLine +BABEL_OP3_306_61225_20140515_013438_outLine +BABEL_OP3_306_61435_20141029_014344_inLine +BABEL_OP3_306_61435_20141029_014344_outLine +BABEL_OP3_306_61438_20140527_213221_inLine +BABEL_OP3_306_61438_20140527_213221_outLine +BABEL_OP3_306_61888_20141104_013244_inLine +BABEL_OP3_306_61888_20141104_013244_outLine +BABEL_OP3_306_62491_20140528_021234_inLine +BABEL_OP3_306_62491_20140528_021234_outLine +BABEL_OP3_306_62724_20141031_231843_inLine +BABEL_OP3_306_62724_20141031_231843_outLine +BABEL_OP3_306_63220_20140811_231222_inLine +BABEL_OP3_306_63220_20140811_231222_outLine +BABEL_OP3_306_63265_20150115_213217_inLine +BABEL_OP3_306_63265_20150115_213217_outLine +BABEL_OP3_306_63671_20150420_041005_inLine +BABEL_OP3_306_63671_20150420_041005_outLine +BABEL_OP3_306_63766_20150226_043203_inLine +BABEL_OP3_306_63766_20150226_043203_outLine +BABEL_OP3_306_64350_20140621_190438_inLine +BABEL_OP3_306_64350_20140621_190438_outLine +BABEL_OP3_306_64398_20140731_012313_inLine +BABEL_OP3_306_64398_20140731_012313_outLine +BABEL_OP3_306_64796_20150407_014947_inLine +BABEL_OP3_306_64796_20150407_014947_outLine +BABEL_OP3_306_65723_20140604_235722_inLine +BABEL_OP3_306_65723_20140604_235722_outLine +BABEL_OP3_306_66350_20150225_020558_inLine +BABEL_OP3_306_66350_20150225_020558_outLine +BABEL_OP3_306_66641_20150422_025109_inLine +BABEL_OP3_306_66641_20150422_025109_outLine +BABEL_OP3_306_66916_20140522_002931_inLine +BABEL_OP3_306_66916_20140522_002931_outLine +BABEL_OP3_306_66967_20140606_235110_inLine +BABEL_OP3_306_66967_20140606_235110_outLine +BABEL_OP3_306_67373_20140624_224214_inLine +BABEL_OP3_306_67373_20140624_224214_outLine +BABEL_OP3_306_67373_20140624_225314_inLine +BABEL_OP3_306_67373_20140624_225314_outLine +BABEL_OP3_306_67401_20140815_032242_inLine +BABEL_OP3_306_67401_20140815_032242_outLine +BABEL_OP3_306_67592_20141028_032006_inLine +BABEL_OP3_306_67592_20141028_032006_outLine +BABEL_OP3_306_67622_20140521_015356_inLine +BABEL_OP3_306_67622_20140521_015356_outLine +BABEL_OP3_306_67726_20140523_010156_inLine +BABEL_OP3_306_67726_20140523_010156_outLine +BABEL_OP3_306_67999_20141031_215535_inLine +BABEL_OP3_306_67999_20141031_215535_outLine +BABEL_OP3_306_68059_20140802_023600_inLine +BABEL_OP3_306_68059_20140802_023600_outLine +BABEL_OP3_306_68068_20141004_180553_inLine +BABEL_OP3_306_68068_20141004_180553_outLine +BABEL_OP3_306_69578_20140729_234354_inLine +BABEL_OP3_306_69578_20140729_234354_outLine +BABEL_OP3_306_70110_20140514_211101_inLine +BABEL_OP3_306_70110_20140514_211101_outLine +BABEL_OP3_306_70110_20140514_221144_inLine +BABEL_OP3_306_70110_20140514_221144_outLine +BABEL_OP3_306_70221_20140801_213304_inLine +BABEL_OP3_306_70221_20140801_213304_outLine +BABEL_OP3_306_70293_20150216_013540_inLine +BABEL_OP3_306_70293_20150216_013540_outLine +BABEL_OP3_306_70639_20140528_035113_inLine +BABEL_OP3_306_70639_20140528_035113_outLine +BABEL_OP3_306_70794_20140520_000549_inLine +BABEL_OP3_306_70794_20140520_000549_outLine +BABEL_OP3_306_71401_20150221_015039_inLine +BABEL_OP3_306_71401_20150221_015039_outLine +BABEL_OP3_306_71976_20140529_033557_inLine +BABEL_OP3_306_71976_20140529_033557_outLine +BABEL_OP3_306_72324_20140724_022916_inLine +BABEL_OP3_306_72324_20140724_022916_outLine +BABEL_OP3_306_72324_20140724_024048_inLine +BABEL_OP3_306_72324_20140724_024048_outLine +BABEL_OP3_306_72587_20140811_040036_inLine +BABEL_OP3_306_72587_20140811_040036_outLine +BABEL_OP3_306_72844_20140518_010610_inLine +BABEL_OP3_306_72844_20140518_010610_outLine +BABEL_OP3_306_73119_20140603_013443_inLine +BABEL_OP3_306_73119_20140603_013443_outLine +BABEL_OP3_306_73511_20141005_215627_inLine +BABEL_OP3_306_73511_20141005_215627_outLine +BABEL_OP3_306_74121_20140920_001224_inLine +BABEL_OP3_306_74121_20140920_001224_outLine +BABEL_OP3_306_74280_20140515_234933_inLine +BABEL_OP3_306_74280_20140515_234933_outLine +BABEL_OP3_306_75505_20140512_201003_inLine +BABEL_OP3_306_75505_20140512_201003_outLine +BABEL_OP3_306_77146_20140521_002843_inLine +BABEL_OP3_306_77146_20140521_002843_outLine +BABEL_OP3_306_77225_20150327_022842_inLine +BABEL_OP3_306_77225_20150327_022842_outLine +BABEL_OP3_306_78194_20140611_232911_inLine +BABEL_OP3_306_78194_20140611_232911_outLine +BABEL_OP3_306_78254_20140602_034556_inLine +BABEL_OP3_306_78254_20140602_034556_outLine +BABEL_OP3_306_78398_20140604_220522_inLine +BABEL_OP3_306_78398_20140604_220522_outLine +BABEL_OP3_306_78511_20141030_232402_inLine +BABEL_OP3_306_78511_20141030_232402_outLine +BABEL_OP3_306_78604_20140625_020654_inLine +BABEL_OP3_306_78604_20140625_020654_outLine +BABEL_OP3_306_79139_20140725_061931_inLine +BABEL_OP3_306_79139_20140725_061931_outLine +BABEL_OP3_306_80306_20140729_235651_inLine +BABEL_OP3_306_80306_20140729_235651_outLine +BABEL_OP3_306_80383_20150325_015939_inLine +BABEL_OP3_306_80383_20150325_015939_outLine +BABEL_OP3_306_80781_20140729_014618_inLine +BABEL_OP3_306_80781_20140729_014618_outLine +BABEL_OP3_306_81287_20141009_184932_inLine +BABEL_OP3_306_81287_20141009_184932_outLine +BABEL_OP3_306_81427_20140723_232926_inLine +BABEL_OP3_306_81427_20140723_232926_outLine +BABEL_OP3_306_81581_20140529_022004_inLine +BABEL_OP3_306_81581_20140529_022004_outLine +BABEL_OP3_306_82035_20140812_211933_inLine +BABEL_OP3_306_82035_20140812_211933_outLine +BABEL_OP3_306_82303_20150313_044844_inLine +BABEL_OP3_306_82303_20150313_044844_outLine +BABEL_OP3_306_82637_20140514_233142_inLine +BABEL_OP3_306_82637_20140514_233142_outLine +BABEL_OP3_306_82935_20141027_220108_inLine +BABEL_OP3_306_82935_20141027_220108_outLine +BABEL_OP3_306_82935_20141027_221034_inLine +BABEL_OP3_306_82935_20141027_221034_outLine +BABEL_OP3_306_83651_20140606_023153_inLine +BABEL_OP3_306_83651_20140606_023153_outLine +BABEL_OP3_306_84430_20150331_191720_inLine +BABEL_OP3_306_84430_20150331_191720_outLine +BABEL_OP3_306_84609_20150401_222657_inLine +BABEL_OP3_306_84609_20150401_222657_outLine +BABEL_OP3_306_84611_20140605_003243_inLine +BABEL_OP3_306_84611_20140605_003243_outLine +BABEL_OP3_306_84768_20150416_212057_inLine +BABEL_OP3_306_84768_20150416_212057_outLine +BABEL_OP3_306_84815_20141101_002538_inLine +BABEL_OP3_306_84815_20141101_002538_outLine +BABEL_OP3_306_85028_20141029_200629_inLine +BABEL_OP3_306_85028_20141029_200629_outLine +BABEL_OP3_306_85647_20140805_005301_inLine +BABEL_OP3_306_85647_20140805_005301_outLine +BABEL_OP3_306_86191_20140603_042134_inLine +BABEL_OP3_306_86191_20140603_042134_outLine +BABEL_OP3_306_86433_20140816_072513_inLine +BABEL_OP3_306_86433_20140816_072513_outLine +BABEL_OP3_306_86472_20140730_223950_inLine +BABEL_OP3_306_86472_20140730_223950_outLine +BABEL_OP3_306_86845_20140524_192542_inLine +BABEL_OP3_306_86845_20140524_192542_outLine +BABEL_OP3_306_86888_20140801_232454_inLine +BABEL_OP3_306_86888_20140801_232454_outLine +BABEL_OP3_306_86952_20140531_040557_inLine +BABEL_OP3_306_86952_20140531_040557_outLine +BABEL_OP3_306_87179_20141029_021040_inLine +BABEL_OP3_306_87179_20141029_021040_outLine +BABEL_OP3_306_87353_20150327_191436_inLine +BABEL_OP3_306_87353_20150327_191436_outLine +BABEL_OP3_306_87884_20141101_223809_inLine +BABEL_OP3_306_87884_20141101_223809_outLine +BABEL_OP3_306_88601_20141003_171755_inLine +BABEL_OP3_306_88601_20141003_171755_outLine +BABEL_OP3_306_88661_20141005_225341_inLine +BABEL_OP3_306_88661_20141005_225341_outLine +BABEL_OP3_306_89045_20140517_213454_inLine +BABEL_OP3_306_89045_20140517_213454_outLine +BABEL_OP3_306_89059_20141104_210223_inLine +BABEL_OP3_306_89059_20141104_210223_outLine +BABEL_OP3_306_89059_20141104_211433_inLine +BABEL_OP3_306_89059_20141104_211433_outLine +BABEL_OP3_306_89358_20141003_194649_inLine +BABEL_OP3_306_89358_20141003_194649_outLine +BABEL_OP3_306_89457_20140730_002520_inLine +BABEL_OP3_306_89457_20140730_002520_outLine +BABEL_OP3_306_90440_20150312_002806_inLine +BABEL_OP3_306_90440_20150312_002806_outLine +BABEL_OP3_306_90737_20140903_235501_inLine +BABEL_OP3_306_90737_20140903_235501_outLine +BABEL_OP3_306_91266_20150215_015545_inLine +BABEL_OP3_306_91266_20150215_015545_outLine +BABEL_OP3_306_91266_20150215_022001_inLine +BABEL_OP3_306_91266_20150215_022001_outLine +BABEL_OP3_306_91463_20140729_001809_inLine +BABEL_OP3_306_91463_20140729_001809_outLine +BABEL_OP3_306_91825_20150408_204309_inLine +BABEL_OP3_306_91825_20150408_204309_outLine +BABEL_OP3_306_91891_20141009_203853_inLine +BABEL_OP3_306_91891_20141009_203853_outLine +BABEL_OP3_306_92440_20150326_232645_inLine +BABEL_OP3_306_92440_20150326_232645_outLine +BABEL_OP3_306_92509_20140521_023136_inLine +BABEL_OP3_306_92509_20140521_023136_outLine +BABEL_OP3_306_92809_20150419_011906_inLine +BABEL_OP3_306_92809_20150419_011906_outLine +BABEL_OP3_306_92941_20140607_001711_inLine +BABEL_OP3_306_92941_20140607_001711_outLine +BABEL_OP3_306_92941_20140607_003034_inLine +BABEL_OP3_306_92941_20140607_003034_outLine +BABEL_OP3_306_93632_20141103_184555_inLine +BABEL_OP3_306_93632_20141103_184555_outLine +BABEL_OP3_306_93946_20141101_211743_inLine +BABEL_OP3_306_93946_20141101_211743_outLine +BABEL_OP3_306_93964_20140730_022556_inLine +BABEL_OP3_306_93964_20140730_022556_outLine +BABEL_OP3_306_94253_20140606_032103_inLine +BABEL_OP3_306_94253_20140606_032103_outLine +BABEL_OP3_306_94409_20141006_205245_inLine +BABEL_OP3_306_94409_20141006_205245_outLine +BABEL_OP3_306_94869_20140515_230712_inLine +BABEL_OP3_306_94869_20140515_230712_outLine +BABEL_OP3_306_94978_20141115_234420_inLine +BABEL_OP3_306_94978_20141115_234420_outLine +BABEL_OP3_306_95124_20150416_012109_inLine +BABEL_OP3_306_95124_20150416_012109_outLine +BABEL_OP3_306_95399_20140905_005504_inLine +BABEL_OP3_306_95399_20140905_005504_outLine +BABEL_OP3_306_95598_20140509_043406_inLine +BABEL_OP3_306_95598_20140509_043406_outLine +BABEL_OP3_306_96730_20141028_230035_inLine +BABEL_OP3_306_96730_20141028_230035_outLine +BABEL_OP3_306_96820_20140802_051525_inLine +BABEL_OP3_306_96820_20140802_051525_outLine +BABEL_OP3_306_97570_20140801_224422_inLine +BABEL_OP3_306_97570_20140801_224422_outLine +BABEL_OP3_306_97588_20140521_051503_inLine +BABEL_OP3_306_97588_20140521_051503_outLine +BABEL_OP3_306_98311_20140604_201838_inLine +BABEL_OP3_306_98311_20140604_201838_outLine +BABEL_OP3_306_99344_20140801_002154_inLine +BABEL_OP3_306_99344_20140801_002154_outLine +BABEL_OP3_306_99920_20140604_212052_inLine +BABEL_OP3_306_99920_20140604_212052_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/untranscribed-training.list b/egs/babel/s5d/conf/lists/306-igbo/untranscribed-training.list new file mode 100644 index 00000000000..0369662c6a8 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/untranscribed-training.list @@ -0,0 +1,537 @@ +BABEL_OP3_306_10647_20150310_235644_inLine +BABEL_OP3_306_10647_20150310_235644_outLine +BABEL_OP3_306_11310_20140523_212606_inLine +BABEL_OP3_306_11859_20150210_222000_inLine +BABEL_OP3_306_11859_20150210_222000_outLine +BABEL_OP3_306_12220_20140729_004621_inLine +BABEL_OP3_306_12220_20140729_004621_outLine +BABEL_OP3_306_13040_20140621_040421_inLine +BABEL_OP3_306_13040_20140621_040421_outLine +BABEL_OP3_306_13126_20141222_235100_inLine +BABEL_OP3_306_13126_20141222_235100_outLine +BABEL_OP3_306_13189_20141031_050830_inLine +BABEL_OP3_306_13189_20141031_050830_outLine +BABEL_OP3_306_13483_20141007_044436_inLine +BABEL_OP3_306_13483_20141007_044436_outLine +BABEL_OP3_306_14137_20140602_222616_inLine +BABEL_OP3_306_14137_20140602_222616_outLine +BABEL_OP3_306_14179_20140814_232925_inLine +BABEL_OP3_306_14179_20140814_232925_outLine +BABEL_OP3_306_14179_20140814_233948_inLine +BABEL_OP3_306_14179_20140814_233948_outLine +BABEL_OP3_306_15869_20150421_011752_inLine +BABEL_OP3_306_15869_20150421_011752_outLine +BABEL_OP3_306_16249_20150113_220219_inLine +BABEL_OP3_306_16249_20150113_220219_outLine +BABEL_OP3_306_16407_20150227_040000_inLine +BABEL_OP3_306_16407_20150227_040000_outLine +BABEL_OP3_306_16467_20141025_211050_inLine +BABEL_OP3_306_16467_20141025_211050_outLine +BABEL_OP3_306_16802_20150115_213527_inLine +BABEL_OP3_306_16802_20150115_213527_outLine +BABEL_OP3_306_16886_20140731_022140_inLine +BABEL_OP3_306_16886_20140731_022140_outLine +BABEL_OP3_306_17032_20150311_185659_inLine +BABEL_OP3_306_17032_20150311_185659_outLine +BABEL_OP3_306_17496_20140803_035835_inLine +BABEL_OP3_306_17496_20140803_035835_outLine +BABEL_OP3_306_17567_20140803_225538_inLine +BABEL_OP3_306_17567_20140803_225538_outLine +BABEL_OP3_306_17890_20140919_221004_inLine +BABEL_OP3_306_17890_20140919_221004_outLine +BABEL_OP3_306_17923_20140625_232123_inLine +BABEL_OP3_306_17923_20140625_232123_outLine +BABEL_OP3_306_18992_20150303_000227_inLine +BABEL_OP3_306_18992_20150303_000227_outLine +BABEL_OP3_306_19461_20140527_041640_inLine +BABEL_OP3_306_19621_20140803_005045_inLine +BABEL_OP3_306_19621_20140803_005045_outLine +BABEL_OP3_306_19672_20141005_040103_inLine +BABEL_OP3_306_19672_20141005_040103_outLine +BABEL_OP3_306_19672_20141005_040626_inLine +BABEL_OP3_306_19672_20141005_040626_outLine +BABEL_OP3_306_19688_20140524_223141_inLine +BABEL_OP3_306_19703_20140602_002345_inLine +BABEL_OP3_306_19703_20140602_002345_outLine +BABEL_OP3_306_20133_20140514_195807_inLine +BABEL_OP3_306_20133_20140514_195807_outLine +BABEL_OP3_306_20133_20140514_202548_inLine +BABEL_OP3_306_20133_20140514_202548_outLine +BABEL_OP3_306_20682_20141027_012441_inLine +BABEL_OP3_306_20768_20150306_035010_inLine +BABEL_OP3_306_20768_20150306_035010_outLine +BABEL_OP3_306_20800_20140625_005044_inLine +BABEL_OP3_306_20800_20140625_005044_outLine +BABEL_OP3_306_20800_20140625_005605_inLine +BABEL_OP3_306_20800_20140625_005605_outLine +BABEL_OP3_306_21004_20141026_004641_inLine +BABEL_OP3_306_21004_20141026_004641_outLine +BABEL_OP3_306_21029_20140614_210102_outLine +BABEL_OP3_306_21159_20150210_210334_inLine +BABEL_OP3_306_21159_20150210_210334_outLine +BABEL_OP3_306_22216_20140530_234149_inLine +BABEL_OP3_306_22280_20141009_011742_inLine +BABEL_OP3_306_22280_20141009_011742_outLine +BABEL_OP3_306_22321_20140602_210645_inLine +BABEL_OP3_306_22321_20140602_212529_inLine +BABEL_OP3_306_22446_20140531_021922_inLine +BABEL_OP3_306_22466_20140510_200019_inLine +BABEL_OP3_306_22466_20140510_200019_outLine +BABEL_OP3_306_22918_20141101_233512_inLine +BABEL_OP3_306_22918_20141101_233512_outLine +BABEL_OP3_306_22965_20140612_215959_outLine +BABEL_OP3_306_22965_20140612_220852_outLine +BABEL_OP3_306_23151_20150211_025354_inLine +BABEL_OP3_306_23151_20150211_025354_outLine +BABEL_OP3_306_23190_20140729_204900_inLine +BABEL_OP3_306_23190_20140729_204900_outLine +BABEL_OP3_306_23731_20140804_002220_inLine +BABEL_OP3_306_23731_20140804_002220_outLine +BABEL_OP3_306_24470_20141008_233522_inLine +BABEL_OP3_306_24470_20141008_233522_outLine +BABEL_OP3_306_24569_20141101_214133_inLine +BABEL_OP3_306_24569_20141101_214133_outLine +BABEL_OP3_306_24982_20140606_004556_inLine +BABEL_OP3_306_24982_20140606_004556_outLine +BABEL_OP3_306_25012_20140523_200250_inLine +BABEL_OP3_306_25012_20140523_200250_outLine +BABEL_OP3_306_25068_20150203_020803_inLine +BABEL_OP3_306_25068_20150203_020803_outLine +BABEL_OP3_306_25412_20140815_220223_inLine +BABEL_OP3_306_25412_20140815_220223_outLine +BABEL_OP3_306_25895_20150311_050131_outLine +BABEL_OP3_306_26602_20141029_024837_inLine +BABEL_OP3_306_26602_20141029_024837_outLine +BABEL_OP3_306_27203_20140725_002808_inLine +BABEL_OP3_306_27203_20140725_002808_outLine +BABEL_OP3_306_28190_20141031_000818_inLine +BABEL_OP3_306_28190_20141031_000818_outLine +BABEL_OP3_306_28280_20150316_040438_inLine +BABEL_OP3_306_28280_20150316_040438_outLine +BABEL_OP3_306_28522_20140814_224406_inLine +BABEL_OP3_306_28522_20140814_224406_outLine +BABEL_OP3_306_28585_20141028_213521_inLine +BABEL_OP3_306_28585_20141028_213521_outLine +BABEL_OP3_306_28600_20141025_233550_inLine +BABEL_OP3_306_28600_20141025_233550_outLine +BABEL_OP3_306_29076_20140815_025044_inLine +BABEL_OP3_306_29076_20140815_025044_outLine +BABEL_OP3_306_29076_20140815_030534_inLine +BABEL_OP3_306_29076_20140815_030534_outLine +BABEL_OP3_306_29168_20140520_233011_inLine +BABEL_OP3_306_29168_20140520_233011_outLine +BABEL_OP3_306_29168_20140520_234151_inLine +BABEL_OP3_306_29168_20140520_234151_outLine +BABEL_OP3_306_29168_20140520_235529_inLine +BABEL_OP3_306_29168_20140520_235529_outLine +BABEL_OP3_306_29416_20141026_232903_inLine +BABEL_OP3_306_29416_20141026_232903_outLine +BABEL_OP3_306_29482_20150314_051634_inLine +BABEL_OP3_306_29482_20150314_051634_outLine +BABEL_OP3_306_29663_20150311_033510_outLine +BABEL_OP3_306_29911_20140527_033850_inLine +BABEL_OP3_306_29911_20140527_033850_outLine +BABEL_OP3_306_30180_20140728_235122_inLine +BABEL_OP3_306_30180_20140728_235122_outLine +BABEL_OP3_306_30645_20150420_223611_inLine +BABEL_OP3_306_30645_20150420_223611_outLine +BABEL_OP3_306_31624_20140605_214151_inLine +BABEL_OP3_306_31624_20140605_215209_inLine +BABEL_OP3_306_31728_20150401_203654_inLine +BABEL_OP3_306_31728_20150401_203654_outLine +BABEL_OP3_306_32048_20141028_234758_inLine +BABEL_OP3_306_32048_20141028_234758_outLine +BABEL_OP3_306_32380_20150131_012448_inLine +BABEL_OP3_306_32380_20150131_012448_outLine +BABEL_OP3_306_32837_20141031_024422_inLine +BABEL_OP3_306_32837_20141031_024422_outLine +BABEL_OP3_306_33216_20150314_024409_inLine +BABEL_OP3_306_33229_20141029_230937_inLine +BABEL_OP3_306_33229_20141029_230937_outLine +BABEL_OP3_306_34482_20150223_031106_inLine +BABEL_OP3_306_34482_20150223_031106_outLine +BABEL_OP3_306_34811_20140803_015207_inLine +BABEL_OP3_306_34811_20140803_015207_outLine +BABEL_OP3_306_34826_20141028_005224_inLine +BABEL_OP3_306_34826_20141028_005224_outLine +BABEL_OP3_306_35143_20141031_235658_inLine +BABEL_OP3_306_35143_20141031_235658_outLine +BABEL_OP3_306_36039_20141116_001002_outLine +BABEL_OP3_306_37682_20140725_003616_inLine +BABEL_OP3_306_37682_20140725_003616_outLine +BABEL_OP3_306_37853_20141101_023348_inLine +BABEL_OP3_306_37853_20141101_023348_outLine +BABEL_OP3_306_38340_20140611_234929_inLine +BABEL_OP3_306_38340_20140611_234929_outLine +BABEL_OP3_306_38340_20140611_235849_inLine +BABEL_OP3_306_38340_20140611_235849_outLine +BABEL_OP3_306_38588_20140728_230958_inLine +BABEL_OP3_306_38588_20140728_230958_outLine +BABEL_OP3_306_38664_20140730_025027_inLine +BABEL_OP3_306_38664_20140730_025027_outLine +BABEL_OP3_306_38750_20141101_221241_inLine +BABEL_OP3_306_38750_20141101_221241_outLine +BABEL_OP3_306_39307_20140522_010101_inLine +BABEL_OP3_306_39426_20141102_040515_inLine +BABEL_OP3_306_39426_20141102_040515_outLine +BABEL_OP3_306_39579_20150123_014947_inLine +BABEL_OP3_306_39579_20150123_014947_outLine +BABEL_OP3_306_39638_20150418_005151_inLine +BABEL_OP3_306_39638_20150418_005151_outLine +BABEL_OP3_306_39848_20141006_034744_inLine +BABEL_OP3_306_39848_20141006_034744_outLine +BABEL_OP3_306_40557_20141101_025253_inLine +BABEL_OP3_306_40557_20141101_025253_outLine +BABEL_OP3_306_41038_20140812_205140_inLine +BABEL_OP3_306_41038_20140812_205140_outLine +BABEL_OP3_306_41100_20140718_034152_inLine +BABEL_OP3_306_41100_20140718_034152_outLine +BABEL_OP3_306_41100_20140718_040923_inLine +BABEL_OP3_306_41100_20140718_040923_outLine +BABEL_OP3_306_41442_20141026_003328_inLine +BABEL_OP3_306_41442_20141026_003328_outLine +BABEL_OP3_306_41493_20140515_044422_inLine +BABEL_OP3_306_41493_20140515_044422_outLine +BABEL_OP3_306_41609_20150418_225730_inLine +BABEL_OP3_306_41609_20150418_225730_outLine +BABEL_OP3_306_42243_20150408_003626_inLine +BABEL_OP3_306_42243_20150408_003626_outLine +BABEL_OP3_306_42434_20140724_015333_inLine +BABEL_OP3_306_42434_20140724_015333_outLine +BABEL_OP3_306_42497_20140622_023839_inLine +BABEL_OP3_306_42497_20140622_023839_outLine +BABEL_OP3_306_42991_20140808_231227_inLine +BABEL_OP3_306_42991_20140808_231227_outLine +BABEL_OP3_306_42991_20140809_015233_inLine +BABEL_OP3_306_42991_20140809_015233_outLine +BABEL_OP3_306_43285_20140814_222223_inLine +BABEL_OP3_306_43920_20141031_035638_inLine +BABEL_OP3_306_43920_20141031_035638_outLine +BABEL_OP3_306_44477_20140804_041338_inLine +BABEL_OP3_306_44477_20140804_041338_outLine +BABEL_OP3_306_44681_20140528_001629_inLine +BABEL_OP3_306_44681_20140528_001629_outLine +BABEL_OP3_306_44898_20140524_184833_inLine +BABEL_OP3_306_44898_20140524_184833_outLine +BABEL_OP3_306_45374_20150120_011233_inLine +BABEL_OP3_306_45374_20150120_011233_outLine +BABEL_OP3_306_45697_20141031_035336_inLine +BABEL_OP3_306_45697_20141031_035336_outLine +BABEL_OP3_306_46041_20141029_203936_inLine +BABEL_OP3_306_46041_20141029_203936_outLine +BABEL_OP3_306_46041_20141029_210843_inLine +BABEL_OP3_306_46041_20141029_210843_outLine +BABEL_OP3_306_46261_20141009_055048_inLine +BABEL_OP3_306_46261_20141009_055048_outLine +BABEL_OP3_306_46268_20150417_042038_inLine +BABEL_OP3_306_46268_20150417_042038_outLine +BABEL_OP3_306_46589_20141005_224403_inLine +BABEL_OP3_306_46589_20141005_224403_outLine +BABEL_OP3_306_46702_20140517_231741_inLine +BABEL_OP3_306_46702_20140517_231741_outLine +BABEL_OP3_306_46712_20140606_212859_inLine +BABEL_OP3_306_46712_20140606_212859_outLine +BABEL_OP3_306_46712_20140606_214018_inLine +BABEL_OP3_306_46712_20140606_214018_outLine +BABEL_OP3_306_46881_20150403_054836_inLine +BABEL_OP3_306_46881_20150403_054836_outLine +BABEL_OP3_306_48200_20141028_004545_inLine +BABEL_OP3_306_48200_20141028_004545_outLine +BABEL_OP3_306_48422_20141102_004117_inLine +BABEL_OP3_306_48422_20141102_004117_outLine +BABEL_OP3_306_48789_20141015_223422_inLine +BABEL_OP3_306_48789_20141015_223422_outLine +BABEL_OP3_306_48844_20150421_064019_inLine +BABEL_OP3_306_48844_20150421_064019_outLine +BABEL_OP3_306_49118_20141028_000839_inLine +BABEL_OP3_306_49118_20141028_000839_outLine +BABEL_OP3_306_49767_20150312_012314_inLine +BABEL_OP3_306_49767_20150312_012314_outLine +BABEL_OP3_306_49812_20141105_222036_inLine +BABEL_OP3_306_49812_20141105_222036_outLine +BABEL_OP3_306_49907_20140606_231957_inLine +BABEL_OP3_306_49907_20140606_231957_outLine +BABEL_OP3_306_50090_20140804_031708_inLine +BABEL_OP3_306_50090_20140804_031708_outLine +BABEL_OP3_306_50810_20140514_184240_inLine +BABEL_OP3_306_50958_20141016_004240_inLine +BABEL_OP3_306_50958_20141016_004240_outLine +BABEL_OP3_306_50958_20141016_005618_inLine +BABEL_OP3_306_50958_20141016_005618_outLine +BABEL_OP3_306_51484_20141026_005632_inLine +BABEL_OP3_306_51819_20140923_230818_inLine +BABEL_OP3_306_51819_20140923_230818_outLine +BABEL_OP3_306_51858_20150416_031524_inLine +BABEL_OP3_306_51858_20150416_031524_outLine +BABEL_OP3_306_52246_20140730_012314_inLine +BABEL_OP3_306_52246_20140730_012314_outLine +BABEL_OP3_306_52717_20140717_164851_inLine +BABEL_OP3_306_52717_20140717_164851_outLine +BABEL_OP3_306_52818_20140812_202317_inLine +BABEL_OP3_306_52818_20140812_202317_outLine +BABEL_OP3_306_53063_20141102_002734_inLine +BABEL_OP3_306_53063_20141102_002734_outLine +BABEL_OP3_306_54046_20141030_225348_inLine +BABEL_OP3_306_54046_20141030_225348_outLine +BABEL_OP3_306_54074_20140919_194620_inLine +BABEL_OP3_306_54074_20140919_194620_outLine +BABEL_OP3_306_54074_20140919_195619_inLine +BABEL_OP3_306_54074_20140919_195619_outLine +BABEL_OP3_306_54104_20150420_015927_inLine +BABEL_OP3_306_54104_20150420_015927_outLine +BABEL_OP3_306_54953_20140730_001818_inLine +BABEL_OP3_306_54953_20140730_001818_outLine +BABEL_OP3_306_56023_20141029_001317_inLine +BABEL_OP3_306_56023_20141029_001317_outLine +BABEL_OP3_306_56023_20141029_002053_inLine +BABEL_OP3_306_56023_20141029_002053_outLine +BABEL_OP3_306_56023_20141029_003640_inLine +BABEL_OP3_306_56023_20141029_003640_outLine +BABEL_OP3_306_56090_20140511_020343_inLine +BABEL_OP3_306_56090_20140511_020343_outLine +BABEL_OP3_306_56198_20140612_204109_inLine +BABEL_OP3_306_56198_20140612_204109_outLine +BABEL_OP3_306_56326_20140523_001458_inLine +BABEL_OP3_306_56326_20140523_001458_outLine +BABEL_OP3_306_56720_20141006_203142_inLine +BABEL_OP3_306_56720_20141006_203142_outLine +BABEL_OP3_306_57609_20140808_220254_inLine +BABEL_OP3_306_57609_20140808_220254_outLine +BABEL_OP3_306_58850_20140731_002418_inLine +BABEL_OP3_306_58850_20140731_002418_outLine +BABEL_OP3_306_58926_20140605_035534_inLine +BABEL_OP3_306_59549_20140621_221900_inLine +BABEL_OP3_306_59549_20140621_221900_outLine +BABEL_OP3_306_59549_20140621_223133_inLine +BABEL_OP3_306_59549_20140621_223133_outLine +BABEL_OP3_306_59747_20140530_225826_inLine +BABEL_OP3_306_59747_20140530_225826_outLine +BABEL_OP3_306_59747_20140530_231320_inLine +BABEL_OP3_306_59747_20140530_231320_outLine +BABEL_OP3_306_59993_20140606_000233_inLine +BABEL_OP3_306_60626_20140614_202445_inLine +BABEL_OP3_306_60626_20140614_202445_outLine +BABEL_OP3_306_60830_20141006_215349_inLine +BABEL_OP3_306_60830_20141006_215349_outLine +BABEL_OP3_306_61011_20140515_030617_inLine +BABEL_OP3_306_61963_20141028_202812_inLine +BABEL_OP3_306_61963_20141028_202812_outLine +BABEL_OP3_306_62014_20140804_205329_inLine +BABEL_OP3_306_62014_20140804_205329_outLine +BABEL_OP3_306_62047_20141028_035724_inLine +BABEL_OP3_306_62047_20141028_035724_outLine +BABEL_OP3_306_62434_20150414_000517_outLine +BABEL_OP3_306_62810_20150409_183507_inLine +BABEL_OP3_306_62810_20150409_183507_outLine +BABEL_OP3_306_62976_20140811_223219_inLine +BABEL_OP3_306_63084_20140809_013406_inLine +BABEL_OP3_306_63084_20140809_013406_outLine +BABEL_OP3_306_63309_20150417_061125_inLine +BABEL_OP3_306_63309_20150417_061125_outLine +BABEL_OP3_306_63336_20150221_022703_inLine +BABEL_OP3_306_63336_20150221_022703_outLine +BABEL_OP3_306_64065_20140610_210016_inLine +BABEL_OP3_306_64065_20140610_210016_outLine +BABEL_OP3_306_64768_20140604_000427_inLine +BABEL_OP3_306_64768_20140604_000427_outLine +BABEL_OP3_306_65077_20140516_204250_inLine +BABEL_OP3_306_65077_20140516_204250_outLine +BABEL_OP3_306_65692_20140802_044543_inLine +BABEL_OP3_306_65692_20140802_044543_outLine +BABEL_OP3_306_66177_20141104_024434_inLine +BABEL_OP3_306_66177_20141104_024434_outLine +BABEL_OP3_306_67659_20140602_021238_inLine +BABEL_OP3_306_68040_20140802_182145_inLine +BABEL_OP3_306_68385_20140511_024349_inLine +BABEL_OP3_306_68385_20140511_024349_outLine +BABEL_OP3_306_68385_20140511_025326_inLine +BABEL_OP3_306_68385_20140511_025326_outLine +BABEL_OP3_306_68823_20150123_213140_inLine +BABEL_OP3_306_68823_20150123_213140_outLine +BABEL_OP3_306_68910_20150311_040225_inLine +BABEL_OP3_306_68910_20150311_040225_outLine +BABEL_OP3_306_69574_20140517_001243_inLine +BABEL_OP3_306_69574_20140517_001243_outLine +BABEL_OP3_306_70601_20140725_010325_inLine +BABEL_OP3_306_70601_20140725_011335_inLine +BABEL_OP3_306_70726_20150220_234954_inLine +BABEL_OP3_306_70726_20150220_234954_outLine +BABEL_OP3_306_71419_20140526_222116_inLine +BABEL_OP3_306_71419_20140526_222116_outLine +BABEL_OP3_306_71566_20141026_022020_inLine +BABEL_OP3_306_71566_20141026_022020_outLine +BABEL_OP3_306_73022_20141102_005954_inLine +BABEL_OP3_306_73022_20141102_005954_outLine +BABEL_OP3_306_73022_20141102_010949_inLine +BABEL_OP3_306_73022_20141102_010949_outLine +BABEL_OP3_306_73072_20140603_222119_inLine +BABEL_OP3_306_73549_20150312_223219_inLine +BABEL_OP3_306_73549_20150312_223219_outLine +BABEL_OP3_306_74455_20141030_231535_inLine +BABEL_OP3_306_74455_20141030_231535_outLine +BABEL_OP3_306_74667_20140730_220428_inLine +BABEL_OP3_306_74667_20140730_220428_outLine +BABEL_OP3_306_74763_20150422_000233_inLine +BABEL_OP3_306_74763_20150422_000233_outLine +BABEL_OP3_306_74799_20141016_010127_inLine +BABEL_OP3_306_74799_20141016_010127_outLine +BABEL_OP3_306_74921_20140804_005230_inLine +BABEL_OP3_306_74921_20140804_005230_outLine +BABEL_OP3_306_75465_20141025_231951_inLine +BABEL_OP3_306_75465_20141025_231951_outLine +BABEL_OP3_306_76069_20150223_021350_inLine +BABEL_OP3_306_76069_20150223_021350_outLine +BABEL_OP3_306_76238_20141007_011009_inLine +BABEL_OP3_306_76238_20141007_011009_outLine +BABEL_OP3_306_76683_20140813_015005_inLine +BABEL_OP3_306_76683_20140813_015005_outLine +BABEL_OP3_306_76773_20140621_234123_inLine +BABEL_OP3_306_76773_20140621_234123_outLine +BABEL_OP3_306_77033_20141102_032017_inLine +BABEL_OP3_306_77033_20141102_032017_outLine +BABEL_OP3_306_77427_20140803_024549_inLine +BABEL_OP3_306_77427_20140803_024549_outLine +BABEL_OP3_306_78609_20141029_012144_inLine +BABEL_OP3_306_78609_20141029_012144_outLine +BABEL_OP3_306_79571_20140814_212942_inLine +BABEL_OP3_306_79571_20140814_212942_outLine +BABEL_OP3_306_79590_20141006_195244_inLine +BABEL_OP3_306_79590_20141006_195244_outLine +BABEL_OP3_306_79590_20141006_200315_inLine +BABEL_OP3_306_79590_20141006_200315_outLine +BABEL_OP3_306_80439_20140605_000944_inLine +BABEL_OP3_306_80439_20140605_000944_outLine +BABEL_OP3_306_80559_20140625_032329_inLine +BABEL_OP3_306_80559_20140625_032329_outLine +BABEL_OP3_306_80897_20140725_020057_inLine +BABEL_OP3_306_80897_20140725_020057_outLine +BABEL_OP3_306_81229_20140730_223530_inLine +BABEL_OP3_306_81229_20140730_223530_outLine +BABEL_OP3_306_81404_20140725_025731_inLine +BABEL_OP3_306_81810_20140728_223725_inLine +BABEL_OP3_306_81971_20140509_013738_inLine +BABEL_OP3_306_82138_20140730_174109_inLine +BABEL_OP3_306_82138_20140730_174109_outLine +BABEL_OP3_306_82425_20140714_035045_inLine +BABEL_OP3_306_82425_20140714_035045_outLine +BABEL_OP3_306_82496_20150418_234759_inLine +BABEL_OP3_306_82496_20150418_234759_outLine +BABEL_OP3_306_82622_20150411_050327_inLine +BABEL_OP3_306_82622_20150411_050327_outLine +BABEL_OP3_306_82979_20140612_012812_outLine +BABEL_OP3_306_83238_20140809_203535_inLine +BABEL_OP3_306_83238_20140809_203535_outLine +BABEL_OP3_306_83238_20140809_205023_inLine +BABEL_OP3_306_83238_20140809_205023_outLine +BABEL_OP3_306_83775_20140724_231716_inLine +BABEL_OP3_306_83813_20140528_211112_inLine +BABEL_OP3_306_83813_20140528_211112_outLine +BABEL_OP3_306_84061_20140730_005053_inLine +BABEL_OP3_306_84061_20140730_005053_outLine +BABEL_OP3_306_84177_20150214_011945_inLine +BABEL_OP3_306_84177_20150214_011945_outLine +BABEL_OP3_306_84327_20141006_211803_inLine +BABEL_OP3_306_84327_20141006_211803_outLine +BABEL_OP3_306_84408_20140729_231948_inLine +BABEL_OP3_306_84408_20140729_231948_outLine +BABEL_OP3_306_84737_20141031_010833_inLine +BABEL_OP3_306_84737_20141031_010833_outLine +BABEL_OP3_306_84823_20141006_034008_inLine +BABEL_OP3_306_84823_20141006_034008_outLine +BABEL_OP3_306_84838_20141029_023621_inLine +BABEL_OP3_306_84838_20141029_023621_outLine +BABEL_OP3_306_85179_20141101_012428_inLine +BABEL_OP3_306_85179_20141101_012428_outLine +BABEL_OP3_306_85322_20150420_034604_inLine +BABEL_OP3_306_85322_20150420_034604_outLine +BABEL_OP3_306_86100_20150328_002625_inLine +BABEL_OP3_306_86100_20150328_002625_outLine +BABEL_OP3_306_86830_20141031_030135_inLine +BABEL_OP3_306_86830_20141031_030135_outLine +BABEL_OP3_306_87073_20140516_232026_inLine +BABEL_OP3_306_87073_20140516_232026_outLine +BABEL_OP3_306_87470_20140729_214135_inLine +BABEL_OP3_306_87470_20140729_214135_outLine +BABEL_OP3_306_87796_20140816_000301_inLine +BABEL_OP3_306_87796_20140816_000301_outLine +BABEL_OP3_306_88260_20140725_033250_inLine +BABEL_OP3_306_88260_20140725_033250_outLine +BABEL_OP3_306_88394_20140525_002127_inLine +BABEL_OP3_306_88394_20140525_002127_outLine +BABEL_OP3_306_88669_20140802_011238_inLine +BABEL_OP3_306_88669_20140802_011238_outLine +BABEL_OP3_306_88669_20140802_011732_inLine +BABEL_OP3_306_88669_20140802_011732_outLine +BABEL_OP3_306_88669_20140802_012458_inLine +BABEL_OP3_306_88669_20140802_012458_outLine +BABEL_OP3_306_88673_20140731_231306_inLine +BABEL_OP3_306_88673_20140731_231306_outLine +BABEL_OP3_306_88783_20141031_212634_inLine +BABEL_OP3_306_88783_20141031_212634_outLine +BABEL_OP3_306_88938_20141102_003357_inLine +BABEL_OP3_306_88938_20141102_003357_outLine +BABEL_OP3_306_89372_20140516_004539_inLine +BABEL_OP3_306_89372_20140516_004539_outLine +BABEL_OP3_306_89560_20141029_203632_inLine +BABEL_OP3_306_89560_20141029_203632_outLine +BABEL_OP3_306_89650_20150331_011100_inLine +BABEL_OP3_306_89650_20150331_011100_outLine +BABEL_OP3_306_89665_20140725_015846_inLine +BABEL_OP3_306_89665_20140725_015846_outLine +BABEL_OP3_306_89695_20141006_020223_inLine +BABEL_OP3_306_89695_20141006_020223_outLine +BABEL_OP3_306_89794_20140813_221738_inLine +BABEL_OP3_306_89794_20140813_221738_outLine +BABEL_OP3_306_89943_20140607_005926_inLine +BABEL_OP3_306_89943_20140607_005926_outLine +BABEL_OP3_306_90347_20140814_172652_inLine +BABEL_OP3_306_90347_20140814_172652_outLine +BABEL_OP3_306_91125_20140522_213937_inLine +BABEL_OP3_306_91125_20140522_213937_outLine +BABEL_OP3_306_91125_20140522_214703_inLine +BABEL_OP3_306_91125_20140522_214703_outLine +BABEL_OP3_306_91319_20141028_013449_inLine +BABEL_OP3_306_91319_20141028_013449_outLine +BABEL_OP3_306_91971_20150331_203936_inLine +BABEL_OP3_306_91971_20150331_203936_outLine +BABEL_OP3_306_91977_20141004_202232_inLine +BABEL_OP3_306_91977_20141004_202232_outLine +BABEL_OP3_306_92252_20150327_024334_inLine +BABEL_OP3_306_92557_20141031_213221_inLine +BABEL_OP3_306_92557_20141031_213221_outLine +BABEL_OP3_306_92740_20141004_182215_inLine +BABEL_OP3_306_92740_20141004_182215_outLine +BABEL_OP3_306_92886_20140611_015551_inLine +BABEL_OP3_306_92886_20140611_015551_outLine +BABEL_OP3_306_93007_20150314_033427_outLine +BABEL_OP3_306_93475_20140625_235211_inLine +BABEL_OP3_306_93475_20140625_235211_outLine +BABEL_OP3_306_94166_20141102_014755_inLine +BABEL_OP3_306_94166_20141102_014755_outLine +BABEL_OP3_306_94333_20150418_031427_inLine +BABEL_OP3_306_94333_20150418_031427_outLine +BABEL_OP3_306_95446_20141028_001455_inLine +BABEL_OP3_306_95446_20141028_001455_outLine +BABEL_OP3_306_96077_20150327_033005_inLine +BABEL_OP3_306_96077_20150327_033005_outLine +BABEL_OP3_306_96190_20140614_223920_inLine +BABEL_OP3_306_96190_20140614_223920_outLine +BABEL_OP3_306_96584_20141104_034807_inLine +BABEL_OP3_306_97264_20141028_220710_inLine +BABEL_OP3_306_97264_20141028_220710_outLine +BABEL_OP3_306_97363_20140612_224303_inLine +BABEL_OP3_306_97363_20140612_224303_outLine +BABEL_OP3_306_97557_20140802_234323_inLine +BABEL_OP3_306_97557_20140802_234323_outLine +BABEL_OP3_306_97557_20140802_235634_inLine +BABEL_OP3_306_97557_20140802_235634_outLine +BABEL_OP3_306_97896_20140731_015336_inLine +BABEL_OP3_306_97896_20140731_015336_outLine +BABEL_OP3_306_97988_20141101_013315_inLine +BABEL_OP3_306_97988_20141101_013315_outLine +BABEL_OP3_306_98356_20140929_235521_inLine +BABEL_OP3_306_98356_20140929_235521_outLine +BABEL_OP3_306_98565_20150327_040438_inLine +BABEL_OP3_306_98565_20150327_040438_outLine +BABEL_OP3_306_98888_20141006_032811_inLine +BABEL_OP3_306_98888_20141006_032811_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/dev.2h.list b/egs/babel/s5d/conf/lists/307-amharic/dev.2h.list new file mode 100644 index 00000000000..933a75246bc --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/dev.2h.list @@ -0,0 +1,123 @@ +BABEL_OP3_307_11096_20140823_004817_inLine +BABEL_OP3_307_11096_20140823_004817_outLine +BABEL_OP3_307_13030_20140510_014335_inLine +BABEL_OP3_307_13030_20140510_014335_outLine +BABEL_OP3_307_14440_20140601_192635_inLine +BABEL_OP3_307_14440_20140601_192635_outLine +BABEL_OP3_307_15324_20140531_195640_inLine +BABEL_OP3_307_15324_20140531_195640_outLine +BABEL_OP3_307_15848_20140414_191259_inLine +BABEL_OP3_307_15848_20140414_191259_outLine +BABEL_OP3_307_16601_20140616_191918_inLine +BABEL_OP3_307_16601_20140616_191918_outLine +BABEL_OP3_307_17280_20140509_005048_inLine +BABEL_OP3_307_17280_20140509_005048_outLine +BABEL_OP3_307_17881_20140721_204147_inLine +BABEL_OP3_307_17881_20140721_204147_outLine +BABEL_OP3_307_18766_20140725_193025_inLine +BABEL_OP3_307_18766_20140725_193025_outLine +BABEL_OP3_307_19621_20140517_232031_inLine +BABEL_OP3_307_19621_20140517_232031_outLine +BABEL_OP3_307_19782_20140702_230513_inLine +BABEL_OP3_307_19782_20140702_230513_outLine +BABEL_OP3_307_21029_20140430_192710_inLine +BABEL_OP3_307_21029_20140430_192710_outLine +BABEL_OP3_307_28871_20140414_214155_inLine +BABEL_OP3_307_28871_20140414_214155_outLine +BABEL_OP3_307_29168_20140415_202128_inLine +BABEL_OP3_307_29168_20140415_202128_outLine +BABEL_OP3_307_29765_20140823_220912_inLine +BABEL_OP3_307_29765_20140823_220912_outLine +BABEL_OP3_307_30280_20140909_000751_outLine +BABEL_OP3_307_32048_20140705_013312_inLine +BABEL_OP3_307_32048_20140705_013312_outLine +BABEL_OP3_307_32708_20140429_224318_inLine +BABEL_OP3_307_32708_20140429_224318_outLine +BABEL_OP3_307_36219_20140405_235707_inLine +BABEL_OP3_307_36219_20140405_235707_outLine +BABEL_OP3_307_37285_20140618_224046_inLine +BABEL_OP3_307_37285_20140618_224046_outLine +BABEL_OP3_307_41741_20140422_000845_inLine +BABEL_OP3_307_41741_20140422_000845_outLine +BABEL_OP3_307_42848_20140822_203249_inLine +BABEL_OP3_307_42848_20140822_203249_outLine +BABEL_OP3_307_42883_20140823_230118_inLine +BABEL_OP3_307_42883_20140823_230118_outLine +BABEL_OP3_307_44619_20140405_193041_inLine +BABEL_OP3_307_44619_20140405_193041_outLine +BABEL_OP3_307_44961_20140421_215913_inLine +BABEL_OP3_307_44961_20140421_215913_outLine +BABEL_OP3_307_46625_20140414_224528_inLine +BABEL_OP3_307_46625_20140414_224528_outLine +BABEL_OP3_307_47799_20140902_200301_inLine +BABEL_OP3_307_47799_20140902_200301_outLine +BABEL_OP3_307_49902_20140510_004310_inLine +BABEL_OP3_307_49902_20140510_004310_outLine +BABEL_OP3_307_50090_20140531_225332_inLine +BABEL_OP3_307_50090_20140531_225332_outLine +BABEL_OP3_307_52438_20140429_232836_inLine +BABEL_OP3_307_52438_20140429_232836_outLine +BABEL_OP3_307_54160_20140402_232820_inLine +BABEL_OP3_307_54160_20140402_232820_outLine +BABEL_OP3_307_58717_20140518_204047_inLine +BABEL_OP3_307_58717_20140518_204047_outLine +BABEL_OP3_307_60498_20140823_192847_inLine +BABEL_OP3_307_60498_20140823_192847_outLine +BABEL_OP3_307_61011_20140415_180846_inLine +BABEL_OP3_307_61011_20140415_180846_outLine +BABEL_OP3_307_61011_20140415_181727_inLine +BABEL_OP3_307_61011_20140415_181727_outLine +BABEL_OP3_307_61357_20140602_184817_inLine +BABEL_OP3_307_61357_20140602_184817_outLine +BABEL_OP3_307_62200_20140505_000149_inLine +BABEL_OP3_307_62200_20140505_000149_outLine +BABEL_OP3_307_62286_20140503_220651_inLine +BABEL_OP3_307_62286_20140503_220651_outLine +BABEL_OP3_307_64870_20140518_011602_inLine +BABEL_OP3_307_64870_20140518_011602_outLine +BABEL_OP3_307_65692_20140517_182352_inLine +BABEL_OP3_307_65692_20140517_182352_outLine +BABEL_OP3_307_66519_20140510_212511_inLine +BABEL_OP3_307_66519_20140510_212511_outLine +BABEL_OP3_307_69153_20140624_193324_inLine +BABEL_OP3_307_69153_20140624_193324_outLine +BABEL_OP3_307_69633_20140607_233440_inLine +BABEL_OP3_307_69633_20140607_233440_outLine +BABEL_OP3_307_71038_20140712_000601_inLine +BABEL_OP3_307_71038_20140712_000601_outLine +BABEL_OP3_307_73757_20140512_231155_inLine +BABEL_OP3_307_73757_20140512_231155_outLine +BABEL_OP3_307_76372_20140721_231708_inLine +BABEL_OP3_307_76372_20140721_231708_outLine +BABEL_OP3_307_81553_20140707_003952_inLine +BABEL_OP3_307_81553_20140707_003952_outLine +BABEL_OP3_307_85439_20140814_215435_inLine +BABEL_OP3_307_85439_20140814_215435_outLine +BABEL_OP3_307_88550_20140809_212521_inLine +BABEL_OP3_307_88550_20140809_212521_outLine +BABEL_OP3_307_88601_20140512_171733_inLine +BABEL_OP3_307_88601_20140512_171733_outLine +BABEL_OP3_307_89888_20140520_191659_inLine +BABEL_OP3_307_89888_20140520_191659_outLine +BABEL_OP3_307_90777_20140507_231811_inLine +BABEL_OP3_307_90777_20140507_231811_outLine +BABEL_OP3_307_92176_20140515_231853_inLine +BABEL_OP3_307_92176_20140515_231853_outLine +BABEL_OP3_307_92643_20140806_220922_inLine +BABEL_OP3_307_92643_20140806_220922_outLine +BABEL_OP3_307_92886_20140430_194417_inLine +BABEL_OP3_307_92886_20140430_194417_outLine +BABEL_OP3_307_93320_20140823_214255_inLine +BABEL_OP3_307_93320_20140823_214255_outLine +BABEL_OP3_307_94002_20140511_172143_inLine +BABEL_OP3_307_94002_20140511_172143_outLine +BABEL_OP3_307_94237_20140814_181922_inLine +BABEL_OP3_307_94237_20140814_181922_outLine +BABEL_OP3_307_95124_20140828_224047_inLine +BABEL_OP3_307_95124_20140828_224047_outLine +BABEL_OP3_307_96940_20140901_181148_inLine +BABEL_OP3_307_96940_20140901_181148_outLine +BABEL_OP3_307_96985_20140503_190037_inLine +BABEL_OP3_307_96985_20140503_190037_outLine +BABEL_OP3_307_98506_20140807_170934_inLine +BABEL_OP3_307_98506_20140807_170934_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/dev.list b/egs/babel/s5d/conf/lists/307-amharic/dev.list new file mode 100644 index 00000000000..933a75246bc --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/dev.list @@ -0,0 +1,123 @@ +BABEL_OP3_307_11096_20140823_004817_inLine +BABEL_OP3_307_11096_20140823_004817_outLine +BABEL_OP3_307_13030_20140510_014335_inLine +BABEL_OP3_307_13030_20140510_014335_outLine +BABEL_OP3_307_14440_20140601_192635_inLine +BABEL_OP3_307_14440_20140601_192635_outLine +BABEL_OP3_307_15324_20140531_195640_inLine +BABEL_OP3_307_15324_20140531_195640_outLine +BABEL_OP3_307_15848_20140414_191259_inLine +BABEL_OP3_307_15848_20140414_191259_outLine +BABEL_OP3_307_16601_20140616_191918_inLine +BABEL_OP3_307_16601_20140616_191918_outLine +BABEL_OP3_307_17280_20140509_005048_inLine +BABEL_OP3_307_17280_20140509_005048_outLine +BABEL_OP3_307_17881_20140721_204147_inLine +BABEL_OP3_307_17881_20140721_204147_outLine +BABEL_OP3_307_18766_20140725_193025_inLine +BABEL_OP3_307_18766_20140725_193025_outLine +BABEL_OP3_307_19621_20140517_232031_inLine +BABEL_OP3_307_19621_20140517_232031_outLine +BABEL_OP3_307_19782_20140702_230513_inLine +BABEL_OP3_307_19782_20140702_230513_outLine +BABEL_OP3_307_21029_20140430_192710_inLine +BABEL_OP3_307_21029_20140430_192710_outLine +BABEL_OP3_307_28871_20140414_214155_inLine +BABEL_OP3_307_28871_20140414_214155_outLine +BABEL_OP3_307_29168_20140415_202128_inLine +BABEL_OP3_307_29168_20140415_202128_outLine +BABEL_OP3_307_29765_20140823_220912_inLine +BABEL_OP3_307_29765_20140823_220912_outLine +BABEL_OP3_307_30280_20140909_000751_outLine +BABEL_OP3_307_32048_20140705_013312_inLine +BABEL_OP3_307_32048_20140705_013312_outLine +BABEL_OP3_307_32708_20140429_224318_inLine +BABEL_OP3_307_32708_20140429_224318_outLine +BABEL_OP3_307_36219_20140405_235707_inLine +BABEL_OP3_307_36219_20140405_235707_outLine +BABEL_OP3_307_37285_20140618_224046_inLine +BABEL_OP3_307_37285_20140618_224046_outLine +BABEL_OP3_307_41741_20140422_000845_inLine +BABEL_OP3_307_41741_20140422_000845_outLine +BABEL_OP3_307_42848_20140822_203249_inLine +BABEL_OP3_307_42848_20140822_203249_outLine +BABEL_OP3_307_42883_20140823_230118_inLine +BABEL_OP3_307_42883_20140823_230118_outLine +BABEL_OP3_307_44619_20140405_193041_inLine +BABEL_OP3_307_44619_20140405_193041_outLine +BABEL_OP3_307_44961_20140421_215913_inLine +BABEL_OP3_307_44961_20140421_215913_outLine +BABEL_OP3_307_46625_20140414_224528_inLine +BABEL_OP3_307_46625_20140414_224528_outLine +BABEL_OP3_307_47799_20140902_200301_inLine +BABEL_OP3_307_47799_20140902_200301_outLine +BABEL_OP3_307_49902_20140510_004310_inLine +BABEL_OP3_307_49902_20140510_004310_outLine +BABEL_OP3_307_50090_20140531_225332_inLine +BABEL_OP3_307_50090_20140531_225332_outLine +BABEL_OP3_307_52438_20140429_232836_inLine +BABEL_OP3_307_52438_20140429_232836_outLine +BABEL_OP3_307_54160_20140402_232820_inLine +BABEL_OP3_307_54160_20140402_232820_outLine +BABEL_OP3_307_58717_20140518_204047_inLine +BABEL_OP3_307_58717_20140518_204047_outLine +BABEL_OP3_307_60498_20140823_192847_inLine +BABEL_OP3_307_60498_20140823_192847_outLine +BABEL_OP3_307_61011_20140415_180846_inLine +BABEL_OP3_307_61011_20140415_180846_outLine +BABEL_OP3_307_61011_20140415_181727_inLine +BABEL_OP3_307_61011_20140415_181727_outLine +BABEL_OP3_307_61357_20140602_184817_inLine +BABEL_OP3_307_61357_20140602_184817_outLine +BABEL_OP3_307_62200_20140505_000149_inLine +BABEL_OP3_307_62200_20140505_000149_outLine +BABEL_OP3_307_62286_20140503_220651_inLine +BABEL_OP3_307_62286_20140503_220651_outLine +BABEL_OP3_307_64870_20140518_011602_inLine +BABEL_OP3_307_64870_20140518_011602_outLine +BABEL_OP3_307_65692_20140517_182352_inLine +BABEL_OP3_307_65692_20140517_182352_outLine +BABEL_OP3_307_66519_20140510_212511_inLine +BABEL_OP3_307_66519_20140510_212511_outLine +BABEL_OP3_307_69153_20140624_193324_inLine +BABEL_OP3_307_69153_20140624_193324_outLine +BABEL_OP3_307_69633_20140607_233440_inLine +BABEL_OP3_307_69633_20140607_233440_outLine +BABEL_OP3_307_71038_20140712_000601_inLine +BABEL_OP3_307_71038_20140712_000601_outLine +BABEL_OP3_307_73757_20140512_231155_inLine +BABEL_OP3_307_73757_20140512_231155_outLine +BABEL_OP3_307_76372_20140721_231708_inLine +BABEL_OP3_307_76372_20140721_231708_outLine +BABEL_OP3_307_81553_20140707_003952_inLine +BABEL_OP3_307_81553_20140707_003952_outLine +BABEL_OP3_307_85439_20140814_215435_inLine +BABEL_OP3_307_85439_20140814_215435_outLine +BABEL_OP3_307_88550_20140809_212521_inLine +BABEL_OP3_307_88550_20140809_212521_outLine +BABEL_OP3_307_88601_20140512_171733_inLine +BABEL_OP3_307_88601_20140512_171733_outLine +BABEL_OP3_307_89888_20140520_191659_inLine +BABEL_OP3_307_89888_20140520_191659_outLine +BABEL_OP3_307_90777_20140507_231811_inLine +BABEL_OP3_307_90777_20140507_231811_outLine +BABEL_OP3_307_92176_20140515_231853_inLine +BABEL_OP3_307_92176_20140515_231853_outLine +BABEL_OP3_307_92643_20140806_220922_inLine +BABEL_OP3_307_92643_20140806_220922_outLine +BABEL_OP3_307_92886_20140430_194417_inLine +BABEL_OP3_307_92886_20140430_194417_outLine +BABEL_OP3_307_93320_20140823_214255_inLine +BABEL_OP3_307_93320_20140823_214255_outLine +BABEL_OP3_307_94002_20140511_172143_inLine +BABEL_OP3_307_94002_20140511_172143_outLine +BABEL_OP3_307_94237_20140814_181922_inLine +BABEL_OP3_307_94237_20140814_181922_outLine +BABEL_OP3_307_95124_20140828_224047_inLine +BABEL_OP3_307_95124_20140828_224047_outLine +BABEL_OP3_307_96940_20140901_181148_inLine +BABEL_OP3_307_96940_20140901_181148_outLine +BABEL_OP3_307_96985_20140503_190037_inLine +BABEL_OP3_307_96985_20140503_190037_outLine +BABEL_OP3_307_98506_20140807_170934_inLine +BABEL_OP3_307_98506_20140807_170934_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/eval.list b/egs/babel/s5d/conf/lists/307-amharic/eval.list new file mode 100644 index 00000000000..9687fe69738 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/eval.list @@ -0,0 +1,186 @@ +BABEL_OP3_307_10319_20140417_201202_inLine +BABEL_OP3_307_10319_20140417_201202_outLine +BABEL_OP3_307_12846_20140820_004747_inLine +BABEL_OP3_307_12846_20140820_004747_outLine +BABEL_OP3_307_13040_20140519_010732_inLine +BABEL_OP3_307_13040_20140519_010732_outLine +BABEL_OP3_307_13427_20140517_185634_inLine +BABEL_OP3_307_13427_20140517_185634_outLine +BABEL_OP3_307_15617_20140902_211446_inLine +BABEL_OP3_307_15617_20140902_211446_outLine +BABEL_OP3_307_16056_20140403_224737_inLine +BABEL_OP3_307_16056_20140403_224737_outLine +BABEL_OP3_307_16787_20140504_192345_inLine +BABEL_OP3_307_16787_20140504_192345_outLine +BABEL_OP3_307_16787_20140504_193044_inLine +BABEL_OP3_307_16787_20140504_193044_outLine +BABEL_OP3_307_18242_20140822_194420_inLine +BABEL_OP3_307_18242_20140822_194420_outLine +BABEL_OP3_307_19672_20140610_182836_inLine +BABEL_OP3_307_19672_20140610_182836_outLine +BABEL_OP3_307_20738_20140714_223501_inLine +BABEL_OP3_307_20738_20140714_223501_outLine +BABEL_OP3_307_20800_20140501_001836_inLine +BABEL_OP3_307_20800_20140501_001836_outLine +BABEL_OP3_307_21581_20140510_172450_inLine +BABEL_OP3_307_21581_20140510_172450_outLine +BABEL_OP3_307_22641_20140417_190251_inLine +BABEL_OP3_307_22641_20140417_190251_outLine +BABEL_OP3_307_23260_20140809_221233_inLine +BABEL_OP3_307_23260_20140809_221233_outLine +BABEL_OP3_307_23983_20140814_233159_inLine +BABEL_OP3_307_23983_20140814_233159_outLine +BABEL_OP3_307_24033_20140705_202406_inLine +BABEL_OP3_307_24033_20140705_202406_outLine +BABEL_OP3_307_26072_20140707_234609_inLine +BABEL_OP3_307_26072_20140707_234609_outLine +BABEL_OP3_307_28585_20140703_170913_inLine +BABEL_OP3_307_28585_20140703_170913_outLine +BABEL_OP3_307_28606_20140617_001826_inLine +BABEL_OP3_307_28606_20140617_001826_outLine +BABEL_OP3_307_31668_20140827_172922_inLine +BABEL_OP3_307_31668_20140827_172922_outLine +BABEL_OP3_307_33635_20140508_230911_inLine +BABEL_OP3_307_33635_20140508_230911_outLine +BABEL_OP3_307_33659_20140824_234408_inLine +BABEL_OP3_307_33659_20140824_234408_outLine +BABEL_OP3_307_34486_20140824_163426_inLine +BABEL_OP3_307_34486_20140824_163426_outLine +BABEL_OP3_307_34564_20140703_183530_inLine +BABEL_OP3_307_34564_20140703_183530_outLine +BABEL_OP3_307_34713_20140903_004337_inLine +BABEL_OP3_307_34713_20140903_004337_outLine +BABEL_OP3_307_35202_20140609_172217_inLine +BABEL_OP3_307_35202_20140609_172217_outLine +BABEL_OP3_307_35609_20140907_195928_inLine +BABEL_OP3_307_35609_20140907_195928_outLine +BABEL_OP3_307_36017_20140811_180507_inLine +BABEL_OP3_307_36017_20140811_180507_outLine +BABEL_OP3_307_37064_20140405_195726_inLine +BABEL_OP3_307_37064_20140405_195726_outLine +BABEL_OP3_307_41745_20140508_193418_inLine +BABEL_OP3_307_41745_20140508_193418_outLine +BABEL_OP3_307_42231_20140616_222234_inLine +BABEL_OP3_307_42231_20140616_222234_outLine +BABEL_OP3_307_43285_20140607_212542_inLine +BABEL_OP3_307_43285_20140607_212542_outLine +BABEL_OP3_307_44420_20140503_221325_inLine +BABEL_OP3_307_44420_20140503_221325_outLine +BABEL_OP3_307_44847_20140527_221753_inLine +BABEL_OP3_307_44847_20140527_221753_outLine +BABEL_OP3_307_45106_20140530_183351_inLine +BABEL_OP3_307_45106_20140530_183351_outLine +BABEL_OP3_307_45777_20140506_181506_inLine +BABEL_OP3_307_45777_20140506_181506_outLine +BABEL_OP3_307_47877_20140705_224331_inLine +BABEL_OP3_307_47877_20140705_224331_outLine +BABEL_OP3_307_47959_20140505_185302_inLine +BABEL_OP3_307_47959_20140505_185302_outLine +BABEL_OP3_307_48399_20140403_003150_inLine +BABEL_OP3_307_48399_20140403_003150_outLine +BABEL_OP3_307_49637_20140417_211134_inLine +BABEL_OP3_307_49637_20140417_211134_outLine +BABEL_OP3_307_50175_20140415_222418_inLine +BABEL_OP3_307_50175_20140415_222418_outLine +BABEL_OP3_307_50630_20140609_215223_inLine +BABEL_OP3_307_50630_20140609_215223_outLine +BABEL_OP3_307_51858_20140829_174031_inLine +BABEL_OP3_307_51858_20140829_174031_outLine +BABEL_OP3_307_52694_20140519_182152_inLine +BABEL_OP3_307_52694_20140519_182152_outLine +BABEL_OP3_307_53072_20140810_001530_inLine +BABEL_OP3_307_53072_20140810_001530_outLine +BABEL_OP3_307_54405_20140517_202903_inLine +BABEL_OP3_307_54405_20140517_202903_outLine +BABEL_OP3_307_57609_20140519_194402_inLine +BABEL_OP3_307_57609_20140519_194402_outLine +BABEL_OP3_307_60307_20140907_225330_inLine +BABEL_OP3_307_60307_20140907_225330_outLine +BABEL_OP3_307_60538_20140423_174547_inLine +BABEL_OP3_307_60538_20140423_174547_outLine +BABEL_OP3_307_62362_20140824_175404_inLine +BABEL_OP3_307_62362_20140824_175404_outLine +BABEL_OP3_307_62852_20140416_014025_inLine +BABEL_OP3_307_62852_20140416_014025_outLine +BABEL_OP3_307_63309_20140828_003208_inLine +BABEL_OP3_307_63309_20140828_003208_outLine +BABEL_OP3_307_63445_20140401_225339_inLine +BABEL_OP3_307_63445_20140401_225339_outLine +BABEL_OP3_307_64494_20140430_224138_inLine +BABEL_OP3_307_64494_20140430_224138_outLine +BABEL_OP3_307_64638_20140609_213059_inLine +BABEL_OP3_307_64638_20140609_213059_outLine +BABEL_OP3_307_65252_20140813_202634_inLine +BABEL_OP3_307_65252_20140813_202634_outLine +BABEL_OP3_307_65370_20140907_174141_inLine +BABEL_OP3_307_65370_20140907_174141_outLine +BABEL_OP3_307_67794_20140430_211624_inLine +BABEL_OP3_307_67794_20140430_211624_outLine +BABEL_OP3_307_67794_20140430_212806_inLine +BABEL_OP3_307_67794_20140430_212806_outLine +BABEL_OP3_307_70110_20140414_223000_inLine +BABEL_OP3_307_70110_20140414_223000_outLine +BABEL_OP3_307_73042_20140403_013739_inLine +BABEL_OP3_307_73042_20140403_013739_outLine +BABEL_OP3_307_75460_20140821_232032_inLine +BABEL_OP3_307_75460_20140821_232032_outLine +BABEL_OP3_307_76773_20140403_224239_inLine +BABEL_OP3_307_76773_20140403_224239_outLine +BABEL_OP3_307_77112_20140405_232547_inLine +BABEL_OP3_307_77112_20140405_232547_outLine +BABEL_OP3_307_77391_20140404_205514_inLine +BABEL_OP3_307_77391_20140404_205514_outLine +BABEL_OP3_307_79820_20140404_235700_inLine +BABEL_OP3_307_79820_20140404_235700_outLine +BABEL_OP3_307_80897_20140605_185417_inLine +BABEL_OP3_307_80897_20140605_185417_outLine +BABEL_OP3_307_82361_20140811_190547_inLine +BABEL_OP3_307_82361_20140811_190547_outLine +BABEL_OP3_307_82966_20140704_224020_inLine +BABEL_OP3_307_82966_20140704_224020_outLine +BABEL_OP3_307_83062_20140730_214025_inLine +BABEL_OP3_307_83062_20140730_214025_outLine +BABEL_OP3_307_83366_20140529_193250_inLine +BABEL_OP3_307_83366_20140529_193250_outLine +BABEL_OP3_307_83545_20140813_230842_inLine +BABEL_OP3_307_83545_20140813_230842_outLine +BABEL_OP3_307_83775_20140510_215248_inLine +BABEL_OP3_307_83775_20140510_215248_outLine +BABEL_OP3_307_83775_20140510_220305_inLine +BABEL_OP3_307_83775_20140510_220305_outLine +BABEL_OP3_307_83851_20140404_202207_inLine +BABEL_OP3_307_83851_20140404_202207_outLine +BABEL_OP3_307_86748_20140707_202225_inLine +BABEL_OP3_307_86748_20140707_202225_outLine +BABEL_OP3_307_87073_20140327_221923_inLine +BABEL_OP3_307_87073_20140327_221923_outLine +BABEL_OP3_307_87693_20140503_194632_inLine +BABEL_OP3_307_87693_20140503_194632_outLine +BABEL_OP3_307_89045_20140519_191547_inLine +BABEL_OP3_307_89045_20140519_191547_outLine +BABEL_OP3_307_89330_20140821_234229_inLine +BABEL_OP3_307_89330_20140821_234229_outLine +BABEL_OP3_307_89794_20140531_224759_inLine +BABEL_OP3_307_89794_20140531_224759_outLine +BABEL_OP3_307_90440_20140829_001435_inLine +BABEL_OP3_307_90440_20140829_001435_outLine +BABEL_OP3_307_90935_20140508_183907_inLine +BABEL_OP3_307_90935_20140508_183907_outLine +BABEL_OP3_307_91463_20140603_203737_inLine +BABEL_OP3_307_91463_20140603_203737_outLine +BABEL_OP3_307_92060_20140814_230458_inLine +BABEL_OP3_307_92060_20140814_230458_outLine +BABEL_OP3_307_92698_20140510_215147_inLine +BABEL_OP3_307_92698_20140510_215147_outLine +BABEL_OP3_307_94587_20140614_000734_inLine +BABEL_OP3_307_94587_20140614_000734_outLine +BABEL_OP3_307_96205_20140512_195746_inLine +BABEL_OP3_307_96205_20140512_195746_outLine +BABEL_OP3_307_97264_20140705_170053_inLine +BABEL_OP3_307_97264_20140705_170053_outLine +BABEL_OP3_307_98580_20140504_195655_inLine +BABEL_OP3_307_98580_20140504_195655_outLine +BABEL_OP3_307_99487_20140518_212249_inLine +BABEL_OP3_307_99487_20140518_212249_outLine +BABEL_OP3_307_99952_20140822_185201_inLine +BABEL_OP3_307_99952_20140822_185201_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/sub-train.list b/egs/babel/s5d/conf/lists/307-amharic/sub-train.list new file mode 100644 index 00000000000..a21532c03d7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/sub-train.list @@ -0,0 +1,122 @@ +BABEL_OP3_307_14229_20140503_233516_inLine +BABEL_OP3_307_14229_20140503_233516_outLine +BABEL_OP3_307_14725_20140421_212856_inLine +BABEL_OP3_307_14725_20140421_212856_outLine +BABEL_OP3_307_15216_20140628_231525_inLine +BABEL_OP3_307_15216_20140628_231525_outLine +BABEL_OP3_307_15902_20140422_235151_inLine +BABEL_OP3_307_15902_20140422_235151_outLine +BABEL_OP3_307_16475_20140511_014949_inLine +BABEL_OP3_307_16475_20140511_014949_outLine +BABEL_OP3_307_17496_20140530_181532_inLine +BABEL_OP3_307_17496_20140530_181532_outLine +BABEL_OP3_307_22321_20140417_205436_inLine +BABEL_OP3_307_22321_20140417_205436_outLine +BABEL_OP3_307_22612_20140624_171814_inLine +BABEL_OP3_307_22612_20140624_171814_outLine +BABEL_OP3_307_23006_20140506_191811_inLine +BABEL_OP3_307_23006_20140506_191811_outLine +BABEL_OP3_307_25767_20140403_234644_inLine +BABEL_OP3_307_25767_20140403_234644_outLine +BABEL_OP3_307_26602_20140702_235542_inLine +BABEL_OP3_307_26602_20140702_235542_outLine +BABEL_OP3_307_27125_20140414_222204_inLine +BABEL_OP3_307_27125_20140414_222204_outLine +BABEL_OP3_307_28190_20140703_190209_inLine +BABEL_OP3_307_28190_20140703_190209_outLine +BABEL_OP3_307_29076_20140605_214715_inLine +BABEL_OP3_307_29076_20140605_214715_outLine +BABEL_OP3_307_33251_20140603_185012_inLine +BABEL_OP3_307_33251_20140603_185012_outLine +BABEL_OP3_307_34197_20140401_235309_inLine +BABEL_OP3_307_34197_20140401_235309_outLine +BABEL_OP3_307_34336_20140405_010509_inLine +BABEL_OP3_307_34336_20140405_010509_outLine +BABEL_OP3_307_35583_20140706_224724_inLine +BABEL_OP3_307_35583_20140706_224724_outLine +BABEL_OP3_307_38076_20140531_001406_inLine +BABEL_OP3_307_38076_20140531_001406_outLine +BABEL_OP3_307_39059_20140717_183250_inLine +BABEL_OP3_307_39059_20140717_183250_outLine +BABEL_OP3_307_41097_20140531_181736_inLine +BABEL_OP3_307_41097_20140531_181736_outLine +BABEL_OP3_307_41685_20140825_205956_inLine +BABEL_OP3_307_41685_20140825_205956_outLine +BABEL_OP3_307_44446_20140827_003250_inLine +BABEL_OP3_307_44446_20140827_003250_outLine +BABEL_OP3_307_49502_20140415_220754_inLine +BABEL_OP3_307_49502_20140415_220754_outLine +BABEL_OP3_307_51611_20140423_232011_inLine +BABEL_OP3_307_51611_20140423_232011_outLine +BABEL_OP3_307_53842_20140513_184522_inLine +BABEL_OP3_307_53842_20140513_184522_outLine +BABEL_OP3_307_56198_20140501_005036_inLine +BABEL_OP3_307_56198_20140501_005036_outLine +BABEL_OP3_307_57678_20140405_000739_inLine +BABEL_OP3_307_57678_20140405_000739_outLine +BABEL_OP3_307_61971_20140811_182130_inLine +BABEL_OP3_307_61971_20140811_182130_outLine +BABEL_OP3_307_64350_20140403_011744_inLine +BABEL_OP3_307_64350_20140403_011744_outLine +BABEL_OP3_307_64768_20140404_233306_inLine +BABEL_OP3_307_64768_20140404_233306_outLine +BABEL_OP3_307_67552_20140611_194432_inLine +BABEL_OP3_307_67552_20140611_194432_outLine +BABEL_OP3_307_70986_20140825_003434_inLine +BABEL_OP3_307_70986_20140825_003434_outLine +BABEL_OP3_307_71263_20140602_180728_inLine +BABEL_OP3_307_71263_20140602_180728_outLine +BABEL_OP3_307_73446_20140809_165436_inLine +BABEL_OP3_307_73446_20140809_165436_outLine +BABEL_OP3_307_74799_20140602_191429_inLine +BABEL_OP3_307_74799_20140602_191429_outLine +BABEL_OP3_307_77139_20140416_004159_inLine +BABEL_OP3_307_77139_20140416_004159_outLine +BABEL_OP3_307_77803_20140402_001929_inLine +BABEL_OP3_307_77803_20140402_001929_outLine +BABEL_OP3_307_78161_20140828_164656_inLine +BABEL_OP3_307_78161_20140828_164656_outLine +BABEL_OP3_307_78194_20140411_164649_inLine +BABEL_OP3_307_78194_20140411_164649_outLine +BABEL_OP3_307_79167_20140606_224734_inLine +BABEL_OP3_307_79167_20140606_224734_outLine +BABEL_OP3_307_79429_20140826_212728_inLine +BABEL_OP3_307_79429_20140826_212728_outLine +BABEL_OP3_307_80069_20140821_213402_inLine +BABEL_OP3_307_80069_20140821_213402_outLine +BABEL_OP3_307_82140_20140513_191321_inLine +BABEL_OP3_307_82140_20140513_191321_outLine +BABEL_OP3_307_82863_20140511_183302_inLine +BABEL_OP3_307_82863_20140511_183302_outLine +BABEL_OP3_307_82904_20140730_002106_inLine +BABEL_OP3_307_82904_20140730_002106_outLine +BABEL_OP3_307_86472_20140609_222936_inLine +BABEL_OP3_307_86472_20140609_222936_outLine +BABEL_OP3_307_86888_20140530_190736_inLine +BABEL_OP3_307_86888_20140530_190736_outLine +BABEL_OP3_307_87074_20140429_185857_inLine +BABEL_OP3_307_87074_20140429_185857_outLine +BABEL_OP3_307_90417_20140822_223028_inLine +BABEL_OP3_307_90417_20140822_223028_outLine +BABEL_OP3_307_90417_20140822_224049_inLine +BABEL_OP3_307_90417_20140822_224049_outLine +BABEL_OP3_307_90709_20140421_235753_inLine +BABEL_OP3_307_90709_20140421_235753_outLine +BABEL_OP3_307_91189_20140821_210308_inLine +BABEL_OP3_307_91189_20140821_210308_outLine +BABEL_OP3_307_91581_20140623_234855_inLine +BABEL_OP3_307_91581_20140623_234855_outLine +BABEL_OP3_307_91884_20140723_193506_inLine +BABEL_OP3_307_91884_20140723_193506_outLine +BABEL_OP3_307_91888_20140813_180920_inLine +BABEL_OP3_307_91888_20140813_180920_outLine +BABEL_OP3_307_92757_20140809_200327_inLine +BABEL_OP3_307_92757_20140809_200327_outLine +BABEL_OP3_307_93469_20140813_214657_inLine +BABEL_OP3_307_93469_20140813_214657_outLine +BABEL_OP3_307_94465_20140622_180637_inLine +BABEL_OP3_307_94465_20140622_180637_outLine +BABEL_OP3_307_94891_20140830_193021_inLine +BABEL_OP3_307_94891_20140830_193021_outLine +BABEL_OP3_307_97588_20140415_223241_inLine +BABEL_OP3_307_97588_20140415_223241_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/307-amharic/sub-train.untranscribed.list new file mode 100644 index 00000000000..fce3045a1ed --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/sub-train.untranscribed.list @@ -0,0 +1,364 @@ +BABEL_OP3_307_10638_20140902_000559_inLine +BABEL_OP3_307_10638_20140902_000559_outLine +BABEL_OP3_307_10647_20140721_185220_inLine +BABEL_OP3_307_10647_20140721_185220_outLine +BABEL_OP3_307_10938_20140511_203436_inLine +BABEL_OP3_307_10938_20140511_203436_outLine +BABEL_OP3_307_11673_20140403_181549_inLine +BABEL_OP3_307_11673_20140403_181549_outLine +BABEL_OP3_307_11797_20140403_212832_inLine +BABEL_OP3_307_11797_20140403_212832_outLine +BABEL_OP3_307_12767_20140403_010841_inLine +BABEL_OP3_307_12767_20140403_010841_outLine +BABEL_OP3_307_13490_20140511_183719_inLine +BABEL_OP3_307_13490_20140511_183719_outLine +BABEL_OP3_307_13664_20140414_233828_inLine +BABEL_OP3_307_13664_20140414_233828_outLine +BABEL_OP3_307_13709_20140712_220945_inLine +BABEL_OP3_307_13709_20140712_220945_outLine +BABEL_OP3_307_13776_20140824_184628_inLine +BABEL_OP3_307_13776_20140824_184628_outLine +BABEL_OP3_307_14237_20140417_200235_inLine +BABEL_OP3_307_14237_20140417_200235_outLine +BABEL_OP3_307_14814_20140505_232452_inLine +BABEL_OP3_307_14814_20140505_232452_outLine +BABEL_OP3_307_15227_20140821_214005_inLine +BABEL_OP3_307_15227_20140821_214005_outLine +BABEL_OP3_307_15227_20140822_215614_inLine +BABEL_OP3_307_15227_20140822_215614_outLine +BABEL_OP3_307_15535_20140614_181940_inLine +BABEL_OP3_307_15535_20140614_181940_outLine +BABEL_OP3_307_15730_20140520_180833_inLine +BABEL_OP3_307_15730_20140520_180833_outLine +BABEL_OP3_307_16149_20140403_005747_inLine +BABEL_OP3_307_16149_20140403_005747_outLine +BABEL_OP3_307_17520_20140518_010259_inLine +BABEL_OP3_307_17520_20140518_010259_outLine +BABEL_OP3_307_18566_20140730_203138_inLine +BABEL_OP3_307_18566_20140730_203138_outLine +BABEL_OP3_307_18939_20140417_155733_inLine +BABEL_OP3_307_18939_20140417_155733_outLine +BABEL_OP3_307_18939_20140417_160632_inLine +BABEL_OP3_307_18939_20140417_160632_outLine +BABEL_OP3_307_19818_20140529_184253_inLine +BABEL_OP3_307_19818_20140529_184253_outLine +BABEL_OP3_307_20437_20140825_181004_inLine +BABEL_OP3_307_20437_20140825_181004_outLine +BABEL_OP3_307_20916_20140415_014115_inLine +BABEL_OP3_307_20916_20140415_014115_outLine +BABEL_OP3_307_20972_20140821_181210_inLine +BABEL_OP3_307_20972_20140821_181210_outLine +BABEL_OP3_307_21327_20140624_183416_inLine +BABEL_OP3_307_21327_20140624_183416_outLine +BABEL_OP3_307_21435_20140715_021926_inLine +BABEL_OP3_307_21435_20140715_021926_outLine +BABEL_OP3_307_23980_20140508_223043_inLine +BABEL_OP3_307_23980_20140508_223043_outLine +BABEL_OP3_307_24010_20140903_194143_inLine +BABEL_OP3_307_24010_20140903_194143_outLine +BABEL_OP3_307_24017_20140630_191336_inLine +BABEL_OP3_307_24017_20140630_191336_outLine +BABEL_OP3_307_24270_20140602_192257_inLine +BABEL_OP3_307_24270_20140602_192257_outLine +BABEL_OP3_307_24470_20140604_230747_inLine +BABEL_OP3_307_24470_20140604_230747_outLine +BABEL_OP3_307_26388_20140504_193621_inLine +BABEL_OP3_307_26388_20140504_193621_outLine +BABEL_OP3_307_29021_20140725_002551_inLine +BABEL_OP3_307_29021_20140725_002551_outLine +BABEL_OP3_307_29072_20140613_182323_inLine +BABEL_OP3_307_29072_20140613_182323_outLine +BABEL_OP3_307_29633_20140722_010644_inLine +BABEL_OP3_307_29633_20140722_010644_outLine +BABEL_OP3_307_30098_20140725_200446_inLine +BABEL_OP3_307_30098_20140725_200446_outLine +BABEL_OP3_307_31346_20140704_220402_inLine +BABEL_OP3_307_31346_20140704_220402_outLine +BABEL_OP3_307_32122_20140510_002050_inLine +BABEL_OP3_307_32122_20140510_002050_outLine +BABEL_OP3_307_32171_20140827_233808_inLine +BABEL_OP3_307_32171_20140827_233808_outLine +BABEL_OP3_307_32301_20140618_175857_inLine +BABEL_OP3_307_32301_20140618_175857_outLine +BABEL_OP3_307_32328_20140701_173938_inLine +BABEL_OP3_307_32328_20140701_173938_outLine +BABEL_OP3_307_32837_20140628_224152_inLine +BABEL_OP3_307_32837_20140628_224152_outLine +BABEL_OP3_307_33175_20140416_211640_inLine +BABEL_OP3_307_33175_20140416_211640_outLine +BABEL_OP3_307_33229_20140717_222336_inLine +BABEL_OP3_307_33229_20140717_222336_outLine +BABEL_OP3_307_33273_20140504_190501_inLine +BABEL_OP3_307_33273_20140504_190501_outLine +BABEL_OP3_307_34679_20140405_000658_inLine +BABEL_OP3_307_34679_20140405_000658_outLine +BABEL_OP3_307_34811_20140517_012722_inLine +BABEL_OP3_307_34811_20140517_012722_outLine +BABEL_OP3_307_35139_20140403_212641_inLine +BABEL_OP3_307_35139_20140403_212641_outLine +BABEL_OP3_307_35181_20140719_185816_inLine +BABEL_OP3_307_35181_20140719_185816_outLine +BABEL_OP3_307_36669_20140512_181519_inLine +BABEL_OP3_307_36669_20140512_181519_outLine +BABEL_OP3_307_37228_20140706_173354_inLine +BABEL_OP3_307_37228_20140706_173354_outLine +BABEL_OP3_307_38588_20140505_183744_inLine +BABEL_OP3_307_38588_20140505_183744_outLine +BABEL_OP3_307_38664_20140508_003821_inLine +BABEL_OP3_307_38664_20140508_003821_outLine +BABEL_OP3_307_41720_20140824_215221_inLine +BABEL_OP3_307_41720_20140824_215221_outLine +BABEL_OP3_307_43286_20140519_004615_inLine +BABEL_OP3_307_43286_20140519_004615_outLine +BABEL_OP3_307_43323_20140824_230200_inLine +BABEL_OP3_307_43323_20140824_230200_outLine +BABEL_OP3_307_43784_20140430_225016_inLine +BABEL_OP3_307_43784_20140430_225016_outLine +BABEL_OP3_307_43794_20140902_183511_inLine +BABEL_OP3_307_43794_20140902_183511_outLine +BABEL_OP3_307_43920_20140622_222232_inLine +BABEL_OP3_307_43920_20140622_222232_outLine +BABEL_OP3_307_44477_20140611_180941_inLine +BABEL_OP3_307_44477_20140611_180941_outLine +BABEL_OP3_307_45771_20140824_012354_inLine +BABEL_OP3_307_45771_20140824_012354_outLine +BABEL_OP3_307_46041_20140705_175737_inLine +BABEL_OP3_307_46041_20140705_175737_outLine +BABEL_OP3_307_46310_20140417_192000_inLine +BABEL_OP3_307_46310_20140417_192000_outLine +BABEL_OP3_307_46589_20140606_191357_inLine +BABEL_OP3_307_46589_20140606_191357_outLine +BABEL_OP3_307_46681_20140403_002233_inLine +BABEL_OP3_307_46681_20140403_002233_outLine +BABEL_OP3_307_46770_20140706_002306_inLine +BABEL_OP3_307_46770_20140706_002306_outLine +BABEL_OP3_307_46976_20140516_234604_inLine +BABEL_OP3_307_46976_20140516_234604_outLine +BABEL_OP3_307_47451_20140624_234108_inLine +BABEL_OP3_307_47451_20140624_234108_outLine +BABEL_OP3_307_48243_20140423_214726_inLine +BABEL_OP3_307_48243_20140423_214726_outLine +BABEL_OP3_307_49027_20140811_191512_inLine +BABEL_OP3_307_49027_20140811_191512_outLine +BABEL_OP3_307_49287_20140527_215142_inLine +BABEL_OP3_307_49287_20140527_215142_outLine +BABEL_OP3_307_49768_20140505_000629_inLine +BABEL_OP3_307_49768_20140505_000629_outLine +BABEL_OP3_307_49907_20140429_214231_inLine +BABEL_OP3_307_49907_20140429_214231_outLine +BABEL_OP3_307_50427_20140519_180652_inLine +BABEL_OP3_307_50427_20140519_180652_outLine +BABEL_OP3_307_50940_20140902_173543_inLine +BABEL_OP3_307_50940_20140902_173543_outLine +BABEL_OP3_307_51185_20140901_232033_inLine +BABEL_OP3_307_51185_20140901_232033_outLine +BABEL_OP3_307_51484_20140703_181343_inLine +BABEL_OP3_307_51484_20140703_181343_outLine +BABEL_OP3_307_51968_20140503_185322_inLine +BABEL_OP3_307_51968_20140503_185322_outLine +BABEL_OP3_307_51968_20140503_185916_inLine +BABEL_OP3_307_51968_20140503_185916_outLine +BABEL_OP3_307_52301_20140423_210352_inLine +BABEL_OP3_307_52301_20140423_210352_outLine +BABEL_OP3_307_52381_20140705_233901_inLine +BABEL_OP3_307_52381_20140705_233901_outLine +BABEL_OP3_307_52404_20140607_181619_inLine +BABEL_OP3_307_52404_20140607_181619_outLine +BABEL_OP3_307_52422_20140707_220639_inLine +BABEL_OP3_307_52422_20140707_220639_outLine +BABEL_OP3_307_54104_20140503_183514_inLine +BABEL_OP3_307_54104_20140503_183514_outLine +BABEL_OP3_307_54477_20140705_174757_inLine +BABEL_OP3_307_54477_20140705_174757_outLine +BABEL_OP3_307_54827_20140814_180107_inLine +BABEL_OP3_307_54827_20140814_180107_outLine +BABEL_OP3_307_54841_20140713_170956_inLine +BABEL_OP3_307_54841_20140713_170956_outLine +BABEL_OP3_307_55902_20140829_192235_inLine +BABEL_OP3_307_55902_20140829_192235_outLine +BABEL_OP3_307_56023_20140704_191158_inLine +BABEL_OP3_307_56023_20140704_191158_outLine +BABEL_OP3_307_57464_20140728_215432_inLine +BABEL_OP3_307_57464_20140728_215432_outLine +BABEL_OP3_307_58103_20140511_191956_inLine +BABEL_OP3_307_58103_20140511_191956_outLine +BABEL_OP3_307_58145_20140605_175238_inLine +BABEL_OP3_307_58145_20140605_175238_outLine +BABEL_OP3_307_58313_20140605_235938_inLine +BABEL_OP3_307_58313_20140605_235938_outLine +BABEL_OP3_307_58585_20140717_221803_inLine +BABEL_OP3_307_58585_20140717_221803_outLine +BABEL_OP3_307_58734_20140422_182501_inLine +BABEL_OP3_307_58734_20140422_182501_outLine +BABEL_OP3_307_59028_20140820_184151_inLine +BABEL_OP3_307_59028_20140820_184151_outLine +BABEL_OP3_307_59091_20140706_233018_inLine +BABEL_OP3_307_59091_20140706_233018_outLine +BABEL_OP3_307_59307_20140730_225719_inLine +BABEL_OP3_307_59307_20140730_225719_outLine +BABEL_OP3_307_59635_20140705_193327_inLine +BABEL_OP3_307_59635_20140705_193327_outLine +BABEL_OP3_307_60026_20140416_210913_inLine +BABEL_OP3_307_60026_20140416_210913_outLine +BABEL_OP3_307_60474_20140503_215918_inLine +BABEL_OP3_307_60474_20140503_215918_outLine +BABEL_OP3_307_61167_20140511_204037_inLine +BABEL_OP3_307_61167_20140511_204037_outLine +BABEL_OP3_307_61731_20140407_191634_inLine +BABEL_OP3_307_61731_20140407_191634_outLine +BABEL_OP3_307_62158_20140907_190726_inLine +BABEL_OP3_307_62158_20140907_190726_outLine +BABEL_OP3_307_64065_20140502_190738_inLine +BABEL_OP3_307_64065_20140502_190738_outLine +BABEL_OP3_307_65064_20140604_223702_inLine +BABEL_OP3_307_65064_20140604_223702_outLine +BABEL_OP3_307_65367_20140706_182846_inLine +BABEL_OP3_307_65367_20140706_182846_outLine +BABEL_OP3_307_66001_20140518_232707_inLine +BABEL_OP3_307_66001_20140518_232707_outLine +BABEL_OP3_307_66305_20140807_184053_inLine +BABEL_OP3_307_66305_20140807_184053_outLine +BABEL_OP3_307_66822_20140504_164117_inLine +BABEL_OP3_307_66822_20140504_164117_outLine +BABEL_OP3_307_67283_20140421_213932_inLine +BABEL_OP3_307_67283_20140421_213932_outLine +BABEL_OP3_307_67659_20140503_214825_inLine +BABEL_OP3_307_67659_20140503_214825_outLine +BABEL_OP3_307_68748_20140609_212915_inLine +BABEL_OP3_307_68748_20140609_212915_outLine +BABEL_OP3_307_69096_20140813_192001_inLine +BABEL_OP3_307_69096_20140813_192001_outLine +BABEL_OP3_307_69992_20140502_183707_inLine +BABEL_OP3_307_69992_20140502_183707_outLine +BABEL_OP3_307_70452_20140504_180340_inLine +BABEL_OP3_307_70452_20140504_180340_outLine +BABEL_OP3_307_71189_20140715_012540_inLine +BABEL_OP3_307_71189_20140715_012540_outLine +BABEL_OP3_307_71404_20140423_203052_inLine +BABEL_OP3_307_71404_20140423_203052_outLine +BABEL_OP3_307_72587_20140529_225152_inLine +BABEL_OP3_307_72587_20140529_225152_outLine +BABEL_OP3_307_72952_20140819_214300_inLine +BABEL_OP3_307_72952_20140819_214300_outLine +BABEL_OP3_307_73005_20140815_000302_inLine +BABEL_OP3_307_73005_20140815_000302_outLine +BABEL_OP3_307_73258_20140508_180508_inLine +BABEL_OP3_307_73258_20140508_180508_outLine +BABEL_OP3_307_73299_20140822_002656_inLine +BABEL_OP3_307_73299_20140822_002656_outLine +BABEL_OP3_307_73511_20140614_171020_inLine +BABEL_OP3_307_73511_20140614_171020_outLine +BABEL_OP3_307_74667_20140508_225904_inLine +BABEL_OP3_307_74667_20140508_225904_outLine +BABEL_OP3_307_75365_20140821_220730_inLine +BABEL_OP3_307_75365_20140821_220730_outLine +BABEL_OP3_307_75993_20140404_202655_inLine +BABEL_OP3_307_75993_20140404_202655_outLine +BABEL_OP3_307_76238_20140623_222754_inLine +BABEL_OP3_307_76238_20140623_222754_outLine +BABEL_OP3_307_76499_20140512_232123_inLine +BABEL_OP3_307_76499_20140512_232123_outLine +BABEL_OP3_307_76902_20140829_203049_inLine +BABEL_OP3_307_76902_20140829_203049_outLine +BABEL_OP3_307_77427_20140508_024629_inLine +BABEL_OP3_307_77427_20140508_024629_outLine +BABEL_OP3_307_77832_20140903_183557_inLine +BABEL_OP3_307_77832_20140903_183557_outLine +BABEL_OP3_307_78943_20140505_000428_inLine +BABEL_OP3_307_78943_20140505_000428_outLine +BABEL_OP3_307_79451_20140417_185927_inLine +BABEL_OP3_307_79451_20140417_185927_outLine +BABEL_OP3_307_79660_20140820_174118_inLine +BABEL_OP3_307_79660_20140820_174118_outLine +BABEL_OP3_307_80136_20140706_191530_inLine +BABEL_OP3_307_80136_20140706_191530_outLine +BABEL_OP3_307_80306_20140510_220902_inLine +BABEL_OP3_307_80306_20140510_220902_outLine +BABEL_OP3_307_81213_20140501_002133_inLine +BABEL_OP3_307_81213_20140501_002133_outLine +BABEL_OP3_307_81287_20140616_182444_inLine +BABEL_OP3_307_81287_20140616_182444_outLine +BABEL_OP3_307_81424_20140614_215540_inLine +BABEL_OP3_307_81424_20140614_215540_outLine +BABEL_OP3_307_81435_20140529_235732_inLine +BABEL_OP3_307_81435_20140529_235732_outLine +BABEL_OP3_307_81671_20140704_213446_inLine +BABEL_OP3_307_81671_20140704_213446_outLine +BABEL_OP3_307_82496_20140429_221502_inLine +BABEL_OP3_307_82496_20140429_221502_outLine +BABEL_OP3_307_82626_20140825_181202_inLine +BABEL_OP3_307_82626_20140825_181202_outLine +BABEL_OP3_307_82935_20140702_173347_inLine +BABEL_OP3_307_82935_20140702_173347_outLine +BABEL_OP3_307_86191_20140505_200151_inLine +BABEL_OP3_307_86191_20140505_200151_outLine +BABEL_OP3_307_86433_20140601_173214_inLine +BABEL_OP3_307_86433_20140601_173214_outLine +BABEL_OP3_307_86713_20140704_201850_inLine +BABEL_OP3_307_86713_20140704_201850_outLine +BABEL_OP3_307_86715_20140820_191201_inLine +BABEL_OP3_307_86715_20140820_191201_outLine +BABEL_OP3_307_86722_20140404_001449_inLine +BABEL_OP3_307_86722_20140404_001449_outLine +BABEL_OP3_307_88756_20140908_011014_inLine +BABEL_OP3_307_88756_20140908_011014_outLine +BABEL_OP3_307_88776_20140417_180154_inLine +BABEL_OP3_307_88776_20140417_180154_outLine +BABEL_OP3_307_88783_20140623_173406_inLine +BABEL_OP3_307_88783_20140623_173406_outLine +BABEL_OP3_307_89203_20140705_004511_inLine +BABEL_OP3_307_89203_20140705_004511_outLine +BABEL_OP3_307_89358_20140513_014405_inLine +BABEL_OP3_307_89358_20140513_014405_outLine +BABEL_OP3_307_89575_20140705_220326_inLine +BABEL_OP3_307_89575_20140705_220326_outLine +BABEL_OP3_307_89877_20140602_225948_inLine +BABEL_OP3_307_89877_20140602_225948_outLine +BABEL_OP3_307_90572_20140723_230358_inLine +BABEL_OP3_307_90572_20140723_230358_outLine +BABEL_OP3_307_90739_20140503_223700_inLine +BABEL_OP3_307_90739_20140503_223700_outLine +BABEL_OP3_307_91944_20140430_182005_inLine +BABEL_OP3_307_91944_20140430_182005_outLine +BABEL_OP3_307_92605_20140902_013736_inLine +BABEL_OP3_307_92605_20140902_013736_outLine +BABEL_OP3_307_92942_20140603_223928_inLine +BABEL_OP3_307_92942_20140603_223928_outLine +BABEL_OP3_307_93490_20140704_173442_inLine +BABEL_OP3_307_93490_20140704_173442_outLine +BABEL_OP3_307_93604_20140814_210305_inLine +BABEL_OP3_307_93604_20140814_210305_outLine +BABEL_OP3_307_93858_20140822_215929_inLine +BABEL_OP3_307_93858_20140822_215929_outLine +BABEL_OP3_307_94025_20140606_214625_inLine +BABEL_OP3_307_94025_20140606_214625_outLine +BABEL_OP3_307_94253_20140423_183534_inLine +BABEL_OP3_307_94253_20140423_183534_outLine +BABEL_OP3_307_94316_20140814_001643_inLine +BABEL_OP3_307_94316_20140814_001643_outLine +BABEL_OP3_307_94333_20140417_212859_inLine +BABEL_OP3_307_94333_20140417_212859_outLine +BABEL_OP3_307_94409_20140506_174815_inLine +BABEL_OP3_307_94409_20140506_174815_outLine +BABEL_OP3_307_94442_20140725_195152_inLine +BABEL_OP3_307_94442_20140725_195152_outLine +BABEL_OP3_307_94969_20140903_171944_inLine +BABEL_OP3_307_94969_20140903_171944_outLine +BABEL_OP3_307_95077_20140622_221523_inLine +BABEL_OP3_307_95077_20140622_221523_outLine +BABEL_OP3_307_95670_20140417_201744_inLine +BABEL_OP3_307_95670_20140417_201744_outLine +BABEL_OP3_307_96690_20140507_212636_inLine +BABEL_OP3_307_96690_20140507_212636_outLine +BABEL_OP3_307_96820_20140517_194553_inLine +BABEL_OP3_307_96820_20140517_194553_outLine +BABEL_OP3_307_96910_20140504_223516_inLine +BABEL_OP3_307_96910_20140504_223516_outLine +BABEL_OP3_307_98192_20140823_224529_inLine +BABEL_OP3_307_98192_20140823_224529_outLine +BABEL_OP3_307_98365_20140606_004323_inLine +BABEL_OP3_307_98365_20140606_004323_outLine +BABEL_OP3_307_99202_20140519_213506_inLine +BABEL_OP3_307_99202_20140519_213506_outLine +BABEL_OP3_307_99594_20140508_192558_inLine +BABEL_OP3_307_99594_20140508_192558_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/training.list b/egs/babel/s5d/conf/lists/307-amharic/training.list new file mode 100644 index 00000000000..e58883c0fc7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/training.list @@ -0,0 +1,486 @@ +BABEL_OP3_307_10638_20140902_000559_inLine +BABEL_OP3_307_10638_20140902_000559_outLine +BABEL_OP3_307_10647_20140721_185220_inLine +BABEL_OP3_307_10647_20140721_185220_outLine +BABEL_OP3_307_10938_20140511_203436_inLine +BABEL_OP3_307_10938_20140511_203436_outLine +BABEL_OP3_307_11673_20140403_181549_inLine +BABEL_OP3_307_11673_20140403_181549_outLine +BABEL_OP3_307_11797_20140403_212832_inLine +BABEL_OP3_307_11797_20140403_212832_outLine +BABEL_OP3_307_12767_20140403_010841_inLine +BABEL_OP3_307_12767_20140403_010841_outLine +BABEL_OP3_307_13490_20140511_183719_inLine +BABEL_OP3_307_13490_20140511_183719_outLine +BABEL_OP3_307_13664_20140414_233828_inLine +BABEL_OP3_307_13664_20140414_233828_outLine +BABEL_OP3_307_13709_20140712_220945_inLine +BABEL_OP3_307_13709_20140712_220945_outLine +BABEL_OP3_307_13776_20140824_184628_inLine +BABEL_OP3_307_13776_20140824_184628_outLine +BABEL_OP3_307_14229_20140503_233516_inLine +BABEL_OP3_307_14229_20140503_233516_outLine +BABEL_OP3_307_14237_20140417_200235_inLine +BABEL_OP3_307_14237_20140417_200235_outLine +BABEL_OP3_307_14725_20140421_212856_inLine +BABEL_OP3_307_14725_20140421_212856_outLine +BABEL_OP3_307_14814_20140505_232452_inLine +BABEL_OP3_307_14814_20140505_232452_outLine +BABEL_OP3_307_15216_20140628_231525_inLine +BABEL_OP3_307_15216_20140628_231525_outLine +BABEL_OP3_307_15227_20140821_214005_inLine +BABEL_OP3_307_15227_20140821_214005_outLine +BABEL_OP3_307_15227_20140822_215614_inLine +BABEL_OP3_307_15227_20140822_215614_outLine +BABEL_OP3_307_15535_20140614_181940_inLine +BABEL_OP3_307_15535_20140614_181940_outLine +BABEL_OP3_307_15730_20140520_180833_inLine +BABEL_OP3_307_15730_20140520_180833_outLine +BABEL_OP3_307_15902_20140422_235151_inLine +BABEL_OP3_307_15902_20140422_235151_outLine +BABEL_OP3_307_16149_20140403_005747_inLine +BABEL_OP3_307_16149_20140403_005747_outLine +BABEL_OP3_307_16475_20140511_014949_inLine +BABEL_OP3_307_16475_20140511_014949_outLine +BABEL_OP3_307_17496_20140530_181532_inLine +BABEL_OP3_307_17496_20140530_181532_outLine +BABEL_OP3_307_17520_20140518_010259_inLine +BABEL_OP3_307_17520_20140518_010259_outLine +BABEL_OP3_307_18566_20140730_203138_inLine +BABEL_OP3_307_18566_20140730_203138_outLine +BABEL_OP3_307_18939_20140417_155733_inLine +BABEL_OP3_307_18939_20140417_155733_outLine +BABEL_OP3_307_18939_20140417_160632_inLine +BABEL_OP3_307_18939_20140417_160632_outLine +BABEL_OP3_307_19818_20140529_184253_inLine +BABEL_OP3_307_19818_20140529_184253_outLine +BABEL_OP3_307_20437_20140825_181004_inLine +BABEL_OP3_307_20437_20140825_181004_outLine +BABEL_OP3_307_20916_20140415_014115_inLine +BABEL_OP3_307_20916_20140415_014115_outLine +BABEL_OP3_307_20972_20140821_181210_inLine +BABEL_OP3_307_20972_20140821_181210_outLine +BABEL_OP3_307_21327_20140624_183416_inLine +BABEL_OP3_307_21327_20140624_183416_outLine +BABEL_OP3_307_21435_20140715_021926_inLine +BABEL_OP3_307_21435_20140715_021926_outLine +BABEL_OP3_307_22321_20140417_205436_inLine +BABEL_OP3_307_22321_20140417_205436_outLine +BABEL_OP3_307_22612_20140624_171814_inLine +BABEL_OP3_307_22612_20140624_171814_outLine +BABEL_OP3_307_23006_20140506_191811_inLine +BABEL_OP3_307_23006_20140506_191811_outLine +BABEL_OP3_307_23980_20140508_223043_inLine +BABEL_OP3_307_23980_20140508_223043_outLine +BABEL_OP3_307_24010_20140903_194143_inLine +BABEL_OP3_307_24010_20140903_194143_outLine +BABEL_OP3_307_24017_20140630_191336_inLine +BABEL_OP3_307_24017_20140630_191336_outLine +BABEL_OP3_307_24270_20140602_192257_inLine +BABEL_OP3_307_24270_20140602_192257_outLine +BABEL_OP3_307_24470_20140604_230747_inLine +BABEL_OP3_307_24470_20140604_230747_outLine +BABEL_OP3_307_25767_20140403_234644_inLine +BABEL_OP3_307_25767_20140403_234644_outLine +BABEL_OP3_307_26388_20140504_193621_inLine +BABEL_OP3_307_26388_20140504_193621_outLine +BABEL_OP3_307_26602_20140702_235542_inLine +BABEL_OP3_307_26602_20140702_235542_outLine +BABEL_OP3_307_27125_20140414_222204_inLine +BABEL_OP3_307_27125_20140414_222204_outLine +BABEL_OP3_307_28190_20140703_190209_inLine +BABEL_OP3_307_28190_20140703_190209_outLine +BABEL_OP3_307_29021_20140725_002551_inLine +BABEL_OP3_307_29021_20140725_002551_outLine +BABEL_OP3_307_29072_20140613_182323_inLine +BABEL_OP3_307_29072_20140613_182323_outLine +BABEL_OP3_307_29076_20140605_214715_inLine +BABEL_OP3_307_29076_20140605_214715_outLine +BABEL_OP3_307_29633_20140722_010644_inLine +BABEL_OP3_307_29633_20140722_010644_outLine +BABEL_OP3_307_30098_20140725_200446_inLine +BABEL_OP3_307_30098_20140725_200446_outLine +BABEL_OP3_307_31346_20140704_220402_inLine +BABEL_OP3_307_31346_20140704_220402_outLine +BABEL_OP3_307_32122_20140510_002050_inLine +BABEL_OP3_307_32122_20140510_002050_outLine +BABEL_OP3_307_32171_20140827_233808_inLine +BABEL_OP3_307_32171_20140827_233808_outLine +BABEL_OP3_307_32301_20140618_175857_inLine +BABEL_OP3_307_32301_20140618_175857_outLine +BABEL_OP3_307_32328_20140701_173938_inLine +BABEL_OP3_307_32328_20140701_173938_outLine +BABEL_OP3_307_32837_20140628_224152_inLine +BABEL_OP3_307_32837_20140628_224152_outLine +BABEL_OP3_307_33175_20140416_211640_inLine +BABEL_OP3_307_33175_20140416_211640_outLine +BABEL_OP3_307_33229_20140717_222336_inLine +BABEL_OP3_307_33229_20140717_222336_outLine +BABEL_OP3_307_33251_20140603_185012_inLine +BABEL_OP3_307_33251_20140603_185012_outLine +BABEL_OP3_307_33273_20140504_190501_inLine +BABEL_OP3_307_33273_20140504_190501_outLine +BABEL_OP3_307_34197_20140401_235309_inLine +BABEL_OP3_307_34197_20140401_235309_outLine +BABEL_OP3_307_34336_20140405_010509_inLine +BABEL_OP3_307_34336_20140405_010509_outLine +BABEL_OP3_307_34679_20140405_000658_inLine +BABEL_OP3_307_34679_20140405_000658_outLine +BABEL_OP3_307_34811_20140517_012722_inLine +BABEL_OP3_307_34811_20140517_012722_outLine +BABEL_OP3_307_35139_20140403_212641_inLine +BABEL_OP3_307_35139_20140403_212641_outLine +BABEL_OP3_307_35181_20140719_185816_inLine +BABEL_OP3_307_35181_20140719_185816_outLine +BABEL_OP3_307_35583_20140706_224724_inLine +BABEL_OP3_307_35583_20140706_224724_outLine +BABEL_OP3_307_36669_20140512_181519_inLine +BABEL_OP3_307_36669_20140512_181519_outLine +BABEL_OP3_307_37228_20140706_173354_inLine +BABEL_OP3_307_37228_20140706_173354_outLine +BABEL_OP3_307_38076_20140531_001406_inLine +BABEL_OP3_307_38076_20140531_001406_outLine +BABEL_OP3_307_38588_20140505_183744_inLine +BABEL_OP3_307_38588_20140505_183744_outLine +BABEL_OP3_307_38664_20140508_003821_inLine +BABEL_OP3_307_38664_20140508_003821_outLine +BABEL_OP3_307_39059_20140717_183250_inLine +BABEL_OP3_307_39059_20140717_183250_outLine +BABEL_OP3_307_41097_20140531_181736_inLine +BABEL_OP3_307_41097_20140531_181736_outLine +BABEL_OP3_307_41685_20140825_205956_inLine +BABEL_OP3_307_41685_20140825_205956_outLine +BABEL_OP3_307_41720_20140824_215221_inLine +BABEL_OP3_307_41720_20140824_215221_outLine +BABEL_OP3_307_43286_20140519_004615_inLine +BABEL_OP3_307_43286_20140519_004615_outLine +BABEL_OP3_307_43323_20140824_230200_inLine +BABEL_OP3_307_43323_20140824_230200_outLine +BABEL_OP3_307_43784_20140430_225016_inLine +BABEL_OP3_307_43784_20140430_225016_outLine +BABEL_OP3_307_43794_20140902_183511_inLine +BABEL_OP3_307_43794_20140902_183511_outLine +BABEL_OP3_307_43920_20140622_222232_inLine +BABEL_OP3_307_43920_20140622_222232_outLine +BABEL_OP3_307_44446_20140827_003250_inLine +BABEL_OP3_307_44446_20140827_003250_outLine +BABEL_OP3_307_44477_20140611_180941_inLine +BABEL_OP3_307_44477_20140611_180941_outLine +BABEL_OP3_307_45771_20140824_012354_inLine +BABEL_OP3_307_45771_20140824_012354_outLine +BABEL_OP3_307_46041_20140705_175737_inLine +BABEL_OP3_307_46041_20140705_175737_outLine +BABEL_OP3_307_46310_20140417_192000_inLine +BABEL_OP3_307_46310_20140417_192000_outLine +BABEL_OP3_307_46589_20140606_191357_inLine +BABEL_OP3_307_46589_20140606_191357_outLine +BABEL_OP3_307_46681_20140403_002233_inLine +BABEL_OP3_307_46681_20140403_002233_outLine +BABEL_OP3_307_46770_20140706_002306_inLine +BABEL_OP3_307_46770_20140706_002306_outLine +BABEL_OP3_307_46976_20140516_234604_inLine +BABEL_OP3_307_46976_20140516_234604_outLine +BABEL_OP3_307_47451_20140624_234108_inLine +BABEL_OP3_307_47451_20140624_234108_outLine +BABEL_OP3_307_48243_20140423_214726_inLine +BABEL_OP3_307_48243_20140423_214726_outLine +BABEL_OP3_307_49027_20140811_191512_inLine +BABEL_OP3_307_49027_20140811_191512_outLine +BABEL_OP3_307_49287_20140527_215142_inLine +BABEL_OP3_307_49287_20140527_215142_outLine +BABEL_OP3_307_49502_20140415_220754_inLine +BABEL_OP3_307_49502_20140415_220754_outLine +BABEL_OP3_307_49768_20140505_000629_inLine +BABEL_OP3_307_49768_20140505_000629_outLine +BABEL_OP3_307_49907_20140429_214231_inLine +BABEL_OP3_307_49907_20140429_214231_outLine +BABEL_OP3_307_50427_20140519_180652_inLine +BABEL_OP3_307_50427_20140519_180652_outLine +BABEL_OP3_307_50940_20140902_173543_inLine +BABEL_OP3_307_50940_20140902_173543_outLine +BABEL_OP3_307_51185_20140901_232033_inLine +BABEL_OP3_307_51185_20140901_232033_outLine +BABEL_OP3_307_51484_20140703_181343_inLine +BABEL_OP3_307_51484_20140703_181343_outLine +BABEL_OP3_307_51611_20140423_232011_inLine +BABEL_OP3_307_51611_20140423_232011_outLine +BABEL_OP3_307_51968_20140503_185322_inLine +BABEL_OP3_307_51968_20140503_185322_outLine +BABEL_OP3_307_51968_20140503_185916_inLine +BABEL_OP3_307_51968_20140503_185916_outLine +BABEL_OP3_307_52301_20140423_210352_inLine +BABEL_OP3_307_52301_20140423_210352_outLine +BABEL_OP3_307_52381_20140705_233901_inLine +BABEL_OP3_307_52381_20140705_233901_outLine +BABEL_OP3_307_52404_20140607_181619_inLine +BABEL_OP3_307_52404_20140607_181619_outLine +BABEL_OP3_307_52422_20140707_220639_inLine +BABEL_OP3_307_52422_20140707_220639_outLine +BABEL_OP3_307_53842_20140513_184522_inLine +BABEL_OP3_307_53842_20140513_184522_outLine +BABEL_OP3_307_54104_20140503_183514_inLine +BABEL_OP3_307_54104_20140503_183514_outLine +BABEL_OP3_307_54477_20140705_174757_inLine +BABEL_OP3_307_54477_20140705_174757_outLine +BABEL_OP3_307_54827_20140814_180107_inLine +BABEL_OP3_307_54827_20140814_180107_outLine +BABEL_OP3_307_54841_20140713_170956_inLine +BABEL_OP3_307_54841_20140713_170956_outLine +BABEL_OP3_307_55902_20140829_192235_inLine +BABEL_OP3_307_55902_20140829_192235_outLine +BABEL_OP3_307_56023_20140704_191158_inLine +BABEL_OP3_307_56023_20140704_191158_outLine +BABEL_OP3_307_56198_20140501_005036_inLine +BABEL_OP3_307_56198_20140501_005036_outLine +BABEL_OP3_307_57464_20140728_215432_inLine +BABEL_OP3_307_57464_20140728_215432_outLine +BABEL_OP3_307_57678_20140405_000739_inLine +BABEL_OP3_307_57678_20140405_000739_outLine +BABEL_OP3_307_58103_20140511_191956_inLine +BABEL_OP3_307_58103_20140511_191956_outLine +BABEL_OP3_307_58145_20140605_175238_inLine +BABEL_OP3_307_58145_20140605_175238_outLine +BABEL_OP3_307_58313_20140605_235938_inLine +BABEL_OP3_307_58313_20140605_235938_outLine +BABEL_OP3_307_58585_20140717_221803_inLine +BABEL_OP3_307_58585_20140717_221803_outLine +BABEL_OP3_307_58734_20140422_182501_inLine +BABEL_OP3_307_58734_20140422_182501_outLine +BABEL_OP3_307_59028_20140820_184151_inLine +BABEL_OP3_307_59028_20140820_184151_outLine +BABEL_OP3_307_59091_20140706_233018_inLine +BABEL_OP3_307_59091_20140706_233018_outLine +BABEL_OP3_307_59307_20140730_225719_inLine +BABEL_OP3_307_59307_20140730_225719_outLine +BABEL_OP3_307_59635_20140705_193327_inLine +BABEL_OP3_307_59635_20140705_193327_outLine +BABEL_OP3_307_60026_20140416_210913_inLine +BABEL_OP3_307_60026_20140416_210913_outLine +BABEL_OP3_307_60474_20140503_215918_inLine +BABEL_OP3_307_60474_20140503_215918_outLine +BABEL_OP3_307_61167_20140511_204037_inLine +BABEL_OP3_307_61167_20140511_204037_outLine +BABEL_OP3_307_61731_20140407_191634_inLine +BABEL_OP3_307_61731_20140407_191634_outLine +BABEL_OP3_307_61971_20140811_182130_inLine +BABEL_OP3_307_61971_20140811_182130_outLine +BABEL_OP3_307_62158_20140907_190726_inLine +BABEL_OP3_307_62158_20140907_190726_outLine +BABEL_OP3_307_64065_20140502_190738_inLine +BABEL_OP3_307_64065_20140502_190738_outLine +BABEL_OP3_307_64350_20140403_011744_inLine +BABEL_OP3_307_64350_20140403_011744_outLine +BABEL_OP3_307_64768_20140404_233306_inLine +BABEL_OP3_307_64768_20140404_233306_outLine +BABEL_OP3_307_65064_20140604_223702_inLine +BABEL_OP3_307_65064_20140604_223702_outLine +BABEL_OP3_307_65367_20140706_182846_inLine +BABEL_OP3_307_65367_20140706_182846_outLine +BABEL_OP3_307_66001_20140518_232707_inLine +BABEL_OP3_307_66001_20140518_232707_outLine +BABEL_OP3_307_66305_20140807_184053_inLine +BABEL_OP3_307_66305_20140807_184053_outLine +BABEL_OP3_307_66822_20140504_164117_inLine +BABEL_OP3_307_66822_20140504_164117_outLine +BABEL_OP3_307_67283_20140421_213932_inLine +BABEL_OP3_307_67283_20140421_213932_outLine +BABEL_OP3_307_67552_20140611_194432_inLine +BABEL_OP3_307_67552_20140611_194432_outLine +BABEL_OP3_307_67659_20140503_214825_inLine +BABEL_OP3_307_67659_20140503_214825_outLine +BABEL_OP3_307_68748_20140609_212915_inLine +BABEL_OP3_307_68748_20140609_212915_outLine +BABEL_OP3_307_69096_20140813_192001_inLine +BABEL_OP3_307_69096_20140813_192001_outLine +BABEL_OP3_307_69992_20140502_183707_inLine +BABEL_OP3_307_69992_20140502_183707_outLine +BABEL_OP3_307_70452_20140504_180340_inLine +BABEL_OP3_307_70452_20140504_180340_outLine +BABEL_OP3_307_70986_20140825_003434_inLine +BABEL_OP3_307_70986_20140825_003434_outLine +BABEL_OP3_307_71189_20140715_012540_inLine +BABEL_OP3_307_71189_20140715_012540_outLine +BABEL_OP3_307_71263_20140602_180728_inLine +BABEL_OP3_307_71263_20140602_180728_outLine +BABEL_OP3_307_71404_20140423_203052_inLine +BABEL_OP3_307_71404_20140423_203052_outLine +BABEL_OP3_307_72587_20140529_225152_inLine +BABEL_OP3_307_72587_20140529_225152_outLine +BABEL_OP3_307_72952_20140819_214300_inLine +BABEL_OP3_307_72952_20140819_214300_outLine +BABEL_OP3_307_73005_20140815_000302_inLine +BABEL_OP3_307_73005_20140815_000302_outLine +BABEL_OP3_307_73258_20140508_180508_inLine +BABEL_OP3_307_73258_20140508_180508_outLine +BABEL_OP3_307_73299_20140822_002656_inLine +BABEL_OP3_307_73299_20140822_002656_outLine +BABEL_OP3_307_73446_20140809_165436_inLine +BABEL_OP3_307_73446_20140809_165436_outLine +BABEL_OP3_307_73511_20140614_171020_inLine +BABEL_OP3_307_73511_20140614_171020_outLine +BABEL_OP3_307_74667_20140508_225904_inLine +BABEL_OP3_307_74667_20140508_225904_outLine +BABEL_OP3_307_74799_20140602_191429_inLine +BABEL_OP3_307_74799_20140602_191429_outLine +BABEL_OP3_307_75365_20140821_220730_inLine +BABEL_OP3_307_75365_20140821_220730_outLine +BABEL_OP3_307_75993_20140404_202655_inLine +BABEL_OP3_307_75993_20140404_202655_outLine +BABEL_OP3_307_76238_20140623_222754_inLine +BABEL_OP3_307_76238_20140623_222754_outLine +BABEL_OP3_307_76499_20140512_232123_inLine +BABEL_OP3_307_76499_20140512_232123_outLine +BABEL_OP3_307_76902_20140829_203049_inLine +BABEL_OP3_307_76902_20140829_203049_outLine +BABEL_OP3_307_77139_20140416_004159_inLine +BABEL_OP3_307_77139_20140416_004159_outLine +BABEL_OP3_307_77427_20140508_024629_inLine +BABEL_OP3_307_77427_20140508_024629_outLine +BABEL_OP3_307_77803_20140402_001929_inLine +BABEL_OP3_307_77803_20140402_001929_outLine +BABEL_OP3_307_77832_20140903_183557_inLine +BABEL_OP3_307_77832_20140903_183557_outLine +BABEL_OP3_307_78161_20140828_164656_inLine +BABEL_OP3_307_78161_20140828_164656_outLine +BABEL_OP3_307_78194_20140411_164649_inLine +BABEL_OP3_307_78194_20140411_164649_outLine +BABEL_OP3_307_78943_20140505_000428_inLine +BABEL_OP3_307_78943_20140505_000428_outLine +BABEL_OP3_307_79167_20140606_224734_inLine +BABEL_OP3_307_79167_20140606_224734_outLine +BABEL_OP3_307_79429_20140826_212728_inLine +BABEL_OP3_307_79429_20140826_212728_outLine +BABEL_OP3_307_79451_20140417_185927_inLine +BABEL_OP3_307_79451_20140417_185927_outLine +BABEL_OP3_307_79660_20140820_174118_inLine +BABEL_OP3_307_79660_20140820_174118_outLine +BABEL_OP3_307_80069_20140821_213402_inLine +BABEL_OP3_307_80069_20140821_213402_outLine +BABEL_OP3_307_80136_20140706_191530_inLine +BABEL_OP3_307_80136_20140706_191530_outLine +BABEL_OP3_307_80306_20140510_220902_inLine +BABEL_OP3_307_80306_20140510_220902_outLine +BABEL_OP3_307_81213_20140501_002133_inLine +BABEL_OP3_307_81213_20140501_002133_outLine +BABEL_OP3_307_81287_20140616_182444_inLine +BABEL_OP3_307_81287_20140616_182444_outLine +BABEL_OP3_307_81424_20140614_215540_inLine +BABEL_OP3_307_81424_20140614_215540_outLine +BABEL_OP3_307_81435_20140529_235732_inLine +BABEL_OP3_307_81435_20140529_235732_outLine +BABEL_OP3_307_81671_20140704_213446_inLine +BABEL_OP3_307_81671_20140704_213446_outLine +BABEL_OP3_307_82140_20140513_191321_inLine +BABEL_OP3_307_82140_20140513_191321_outLine +BABEL_OP3_307_82496_20140429_221502_inLine +BABEL_OP3_307_82496_20140429_221502_outLine +BABEL_OP3_307_82626_20140825_181202_inLine +BABEL_OP3_307_82626_20140825_181202_outLine +BABEL_OP3_307_82863_20140511_183302_inLine +BABEL_OP3_307_82863_20140511_183302_outLine +BABEL_OP3_307_82904_20140730_002106_inLine +BABEL_OP3_307_82904_20140730_002106_outLine +BABEL_OP3_307_82935_20140702_173347_inLine +BABEL_OP3_307_82935_20140702_173347_outLine +BABEL_OP3_307_86191_20140505_200151_inLine +BABEL_OP3_307_86191_20140505_200151_outLine +BABEL_OP3_307_86433_20140601_173214_inLine +BABEL_OP3_307_86433_20140601_173214_outLine +BABEL_OP3_307_86472_20140609_222936_inLine +BABEL_OP3_307_86472_20140609_222936_outLine +BABEL_OP3_307_86713_20140704_201850_inLine +BABEL_OP3_307_86713_20140704_201850_outLine +BABEL_OP3_307_86715_20140820_191201_inLine +BABEL_OP3_307_86715_20140820_191201_outLine +BABEL_OP3_307_86722_20140404_001449_inLine +BABEL_OP3_307_86722_20140404_001449_outLine +BABEL_OP3_307_86888_20140530_190736_inLine +BABEL_OP3_307_86888_20140530_190736_outLine +BABEL_OP3_307_87074_20140429_185857_inLine +BABEL_OP3_307_87074_20140429_185857_outLine +BABEL_OP3_307_88756_20140908_011014_inLine +BABEL_OP3_307_88756_20140908_011014_outLine +BABEL_OP3_307_88776_20140417_180154_inLine +BABEL_OP3_307_88776_20140417_180154_outLine +BABEL_OP3_307_88783_20140623_173406_inLine +BABEL_OP3_307_88783_20140623_173406_outLine +BABEL_OP3_307_89203_20140705_004511_inLine +BABEL_OP3_307_89203_20140705_004511_outLine +BABEL_OP3_307_89358_20140513_014405_inLine +BABEL_OP3_307_89358_20140513_014405_outLine +BABEL_OP3_307_89575_20140705_220326_inLine +BABEL_OP3_307_89575_20140705_220326_outLine +BABEL_OP3_307_89877_20140602_225948_inLine +BABEL_OP3_307_89877_20140602_225948_outLine +BABEL_OP3_307_90417_20140822_223028_inLine +BABEL_OP3_307_90417_20140822_223028_outLine +BABEL_OP3_307_90417_20140822_224049_inLine +BABEL_OP3_307_90417_20140822_224049_outLine +BABEL_OP3_307_90572_20140723_230358_inLine +BABEL_OP3_307_90572_20140723_230358_outLine +BABEL_OP3_307_90709_20140421_235753_inLine +BABEL_OP3_307_90709_20140421_235753_outLine +BABEL_OP3_307_90739_20140503_223700_inLine +BABEL_OP3_307_90739_20140503_223700_outLine +BABEL_OP3_307_91189_20140821_210308_inLine +BABEL_OP3_307_91189_20140821_210308_outLine +BABEL_OP3_307_91581_20140623_234855_inLine +BABEL_OP3_307_91581_20140623_234855_outLine +BABEL_OP3_307_91884_20140723_193506_inLine +BABEL_OP3_307_91884_20140723_193506_outLine +BABEL_OP3_307_91888_20140813_180920_inLine +BABEL_OP3_307_91888_20140813_180920_outLine +BABEL_OP3_307_91944_20140430_182005_inLine +BABEL_OP3_307_91944_20140430_182005_outLine +BABEL_OP3_307_92605_20140902_013736_inLine +BABEL_OP3_307_92605_20140902_013736_outLine +BABEL_OP3_307_92757_20140809_200327_inLine +BABEL_OP3_307_92757_20140809_200327_outLine +BABEL_OP3_307_92942_20140603_223928_inLine +BABEL_OP3_307_92942_20140603_223928_outLine +BABEL_OP3_307_93469_20140813_214657_inLine +BABEL_OP3_307_93469_20140813_214657_outLine +BABEL_OP3_307_93490_20140704_173442_inLine +BABEL_OP3_307_93490_20140704_173442_outLine +BABEL_OP3_307_93604_20140814_210305_inLine +BABEL_OP3_307_93604_20140814_210305_outLine +BABEL_OP3_307_93858_20140822_215929_inLine +BABEL_OP3_307_93858_20140822_215929_outLine +BABEL_OP3_307_94025_20140606_214625_inLine +BABEL_OP3_307_94025_20140606_214625_outLine +BABEL_OP3_307_94253_20140423_183534_inLine +BABEL_OP3_307_94253_20140423_183534_outLine +BABEL_OP3_307_94316_20140814_001643_inLine +BABEL_OP3_307_94316_20140814_001643_outLine +BABEL_OP3_307_94333_20140417_212859_inLine +BABEL_OP3_307_94333_20140417_212859_outLine +BABEL_OP3_307_94409_20140506_174815_inLine +BABEL_OP3_307_94409_20140506_174815_outLine +BABEL_OP3_307_94442_20140725_195152_inLine +BABEL_OP3_307_94442_20140725_195152_outLine +BABEL_OP3_307_94465_20140622_180637_inLine +BABEL_OP3_307_94465_20140622_180637_outLine +BABEL_OP3_307_94891_20140830_193021_inLine +BABEL_OP3_307_94891_20140830_193021_outLine +BABEL_OP3_307_94969_20140903_171944_inLine +BABEL_OP3_307_94969_20140903_171944_outLine +BABEL_OP3_307_95077_20140622_221523_inLine +BABEL_OP3_307_95077_20140622_221523_outLine +BABEL_OP3_307_95670_20140417_201744_inLine +BABEL_OP3_307_95670_20140417_201744_outLine +BABEL_OP3_307_96690_20140507_212636_inLine +BABEL_OP3_307_96690_20140507_212636_outLine +BABEL_OP3_307_96820_20140517_194553_inLine +BABEL_OP3_307_96820_20140517_194553_outLine +BABEL_OP3_307_96910_20140504_223516_inLine +BABEL_OP3_307_96910_20140504_223516_outLine +BABEL_OP3_307_97588_20140415_223241_inLine +BABEL_OP3_307_97588_20140415_223241_outLine +BABEL_OP3_307_98192_20140823_224529_inLine +BABEL_OP3_307_98192_20140823_224529_outLine +BABEL_OP3_307_98365_20140606_004323_inLine +BABEL_OP3_307_98365_20140606_004323_outLine +BABEL_OP3_307_99202_20140519_213506_inLine +BABEL_OP3_307_99202_20140519_213506_outLine +BABEL_OP3_307_99594_20140508_192558_inLine +BABEL_OP3_307_99594_20140508_192558_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/untranscribed-training.list b/egs/babel/s5d/conf/lists/307-amharic/untranscribed-training.list new file mode 100644 index 00000000000..2015539e910 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/untranscribed-training.list @@ -0,0 +1,568 @@ +BABEL_OP3_307_10019_20140510_215248_inLine +BABEL_OP3_307_10019_20140510_215248_outLine +BABEL_OP3_307_10019_20140510_220549_inLine +BABEL_OP3_307_10019_20140510_220549_outLine +BABEL_OP3_307_10188_20140414_190900_inLine +BABEL_OP3_307_10188_20140414_190900_outLine +BABEL_OP3_307_10974_20140518_232844_inLine +BABEL_OP3_307_10974_20140518_232844_outLine +BABEL_OP3_307_13586_20140517_192301_inLine +BABEL_OP3_307_13586_20140517_192301_outLine +BABEL_OP3_307_14137_20140504_224411_inLine +BABEL_OP3_307_14137_20140504_224411_outLine +BABEL_OP3_307_14141_20140729_225447_inLine +BABEL_OP3_307_14141_20140729_225447_outLine +BABEL_OP3_307_14158_20140609_183923_inLine +BABEL_OP3_307_14158_20140609_183923_outLine +BABEL_OP3_307_14719_20140630_214352_inLine +BABEL_OP3_307_14719_20140630_214352_outLine +BABEL_OP3_307_14719_20140630_215754_inLine +BABEL_OP3_307_14719_20140630_215754_outLine +BABEL_OP3_307_14807_20140603_161507_inLine +BABEL_OP3_307_14807_20140603_161507_outLine +BABEL_OP3_307_14807_20140603_163538_inLine +BABEL_OP3_307_14807_20140603_163538_outLine +BABEL_OP3_307_15163_20140505_213531_inLine +BABEL_OP3_307_15163_20140505_213531_outLine +BABEL_OP3_307_15466_20140829_012731_inLine +BABEL_OP3_307_15466_20140829_012731_outLine +BABEL_OP3_307_15638_20140613_232945_inLine +BABEL_OP3_307_15638_20140613_232945_outLine +BABEL_OP3_307_16886_20140507_225852_inLine +BABEL_OP3_307_16886_20140507_225852_outLine +BABEL_OP3_307_16938_20140518_195229_inLine +BABEL_OP3_307_16938_20140518_195229_outLine +BABEL_OP3_307_17113_20140822_183518_inLine +BABEL_OP3_307_17113_20140822_183518_outLine +BABEL_OP3_307_17127_20140710_180949_inLine +BABEL_OP3_307_17127_20140710_180949_outLine +BABEL_OP3_307_17165_20140510_233034_inLine +BABEL_OP3_307_17165_20140510_233034_outLine +BABEL_OP3_307_17165_20140520_203751_inLine +BABEL_OP3_307_17165_20140520_203751_outLine +BABEL_OP3_307_17567_20140518_203832_inLine +BABEL_OP3_307_17567_20140518_203832_outLine +BABEL_OP3_307_17582_20140822_232433_inLine +BABEL_OP3_307_17582_20140822_232433_outLine +BABEL_OP3_307_17890_20140617_183508_inLine +BABEL_OP3_307_17890_20140617_183508_outLine +BABEL_OP3_307_18863_20140629_183439_inLine +BABEL_OP3_307_18863_20140629_183439_outLine +BABEL_OP3_307_19120_20140730_221602_inLine +BABEL_OP3_307_19120_20140730_221602_outLine +BABEL_OP3_307_19703_20140504_214945_inLine +BABEL_OP3_307_19703_20140504_214945_outLine +BABEL_OP3_307_19749_20140718_000521_inLine +BABEL_OP3_307_19749_20140718_000521_outLine +BABEL_OP3_307_19767_20140811_181547_inLine +BABEL_OP3_307_19767_20140811_181547_outLine +BABEL_OP3_307_20330_20140716_175203_inLine +BABEL_OP3_307_20330_20140716_175203_outLine +BABEL_OP3_307_21109_20140703_171502_inLine +BABEL_OP3_307_21109_20140703_171502_outLine +BABEL_OP3_307_21109_20140703_180309_inLine +BABEL_OP3_307_21109_20140703_180309_outLine +BABEL_OP3_307_21159_20140824_225236_inLine +BABEL_OP3_307_21159_20140824_225236_outLine +BABEL_OP3_307_21159_20140901_165658_inLine +BABEL_OP3_307_21159_20140901_165658_outLine +BABEL_OP3_307_21393_20140814_190327_inLine +BABEL_OP3_307_21393_20140814_190327_outLine +BABEL_OP3_307_23395_20140605_170532_inLine +BABEL_OP3_307_23395_20140605_170532_outLine +BABEL_OP3_307_23395_20140605_171255_inLine +BABEL_OP3_307_23395_20140605_171255_outLine +BABEL_OP3_307_23681_20140823_184904_inLine +BABEL_OP3_307_23681_20140823_184904_outLine +BABEL_OP3_307_23681_20140823_190005_inLine +BABEL_OP3_307_23681_20140823_190005_outLine +BABEL_OP3_307_24290_20140717_001151_inLine +BABEL_OP3_307_24290_20140717_001151_outLine +BABEL_OP3_307_24323_20140508_012148_inLine +BABEL_OP3_307_24323_20140508_012148_outLine +BABEL_OP3_307_24323_20140508_020931_inLine +BABEL_OP3_307_24323_20140508_020931_outLine +BABEL_OP3_307_24323_20140508_022325_inLine +BABEL_OP3_307_24323_20140508_022325_outLine +BABEL_OP3_307_24605_20140403_000212_inLine +BABEL_OP3_307_24605_20140403_000212_outLine +BABEL_OP3_307_24779_20140905_004858_inLine +BABEL_OP3_307_24779_20140905_004858_outLine +BABEL_OP3_307_25085_20140822_212709_inLine +BABEL_OP3_307_25085_20140822_212709_outLine +BABEL_OP3_307_25220_20140905_000706_inLine +BABEL_OP3_307_25220_20140905_000706_outLine +BABEL_OP3_307_25412_20140604_234923_inLine +BABEL_OP3_307_25412_20140604_234923_outLine +BABEL_OP3_307_25412_20140605_000418_inLine +BABEL_OP3_307_25412_20140605_000418_outLine +BABEL_OP3_307_25961_20140403_011918_inLine +BABEL_OP3_307_25961_20140403_011918_outLine +BABEL_OP3_307_26074_20140604_165342_inLine +BABEL_OP3_307_26074_20140604_165342_outLine +BABEL_OP3_307_26478_20140824_185710_inLine +BABEL_OP3_307_26478_20140824_185710_outLine +BABEL_OP3_307_26999_20140530_190830_inLine +BABEL_OP3_307_26999_20140530_190830_outLine +BABEL_OP3_307_27042_20140630_180037_inLine +BABEL_OP3_307_27042_20140630_180037_outLine +BABEL_OP3_307_27218_20140518_235212_inLine +BABEL_OP3_307_27218_20140518_235212_outLine +BABEL_OP3_307_27478_20140808_231848_inLine +BABEL_OP3_307_27478_20140808_231848_outLine +BABEL_OP3_307_27590_20140618_185748_inLine +BABEL_OP3_307_27590_20140618_185748_outLine +BABEL_OP3_307_27590_20140618_190731_inLine +BABEL_OP3_307_27590_20140618_190731_outLine +BABEL_OP3_307_28303_20140503_225229_inLine +BABEL_OP3_307_28303_20140503_225229_outLine +BABEL_OP3_307_28422_20140607_215944_inLine +BABEL_OP3_307_28422_20140607_215944_outLine +BABEL_OP3_307_28538_20140513_205210_inLine +BABEL_OP3_307_28538_20140513_205210_outLine +BABEL_OP3_307_29352_20140824_172023_inLine +BABEL_OP3_307_29352_20140824_172023_outLine +BABEL_OP3_307_29416_20140705_233008_inLine +BABEL_OP3_307_29416_20140705_233008_outLine +BABEL_OP3_307_29777_20140705_173903_inLine +BABEL_OP3_307_29777_20140705_173903_outLine +BABEL_OP3_307_30253_20140624_003258_inLine +BABEL_OP3_307_30253_20140624_003258_outLine +BABEL_OP3_307_30497_20140809_181809_inLine +BABEL_OP3_307_30497_20140809_181809_outLine +BABEL_OP3_307_30869_20140630_183404_inLine +BABEL_OP3_307_30869_20140630_183404_outLine +BABEL_OP3_307_30869_20140630_184229_inLine +BABEL_OP3_307_30869_20140630_184229_outLine +BABEL_OP3_307_31109_20140518_201149_inLine +BABEL_OP3_307_31109_20140518_201149_outLine +BABEL_OP3_307_31182_20140701_221449_inLine +BABEL_OP3_307_31182_20140701_221449_outLine +BABEL_OP3_307_31182_20140702_223108_inLine +BABEL_OP3_307_31182_20140702_223108_outLine +BABEL_OP3_307_31628_20140610_230053_inLine +BABEL_OP3_307_31628_20140610_230053_outLine +BABEL_OP3_307_31979_20140512_015136_inLine +BABEL_OP3_307_31979_20140512_015136_outLine +BABEL_OP3_307_32630_20140821_180259_inLine +BABEL_OP3_307_32630_20140821_180259_outLine +BABEL_OP3_307_32630_20140821_181033_inLine +BABEL_OP3_307_32630_20140821_181033_outLine +BABEL_OP3_307_32630_20140821_182004_inLine +BABEL_OP3_307_32630_20140821_182004_outLine +BABEL_OP3_307_32959_20140621_191212_inLine +BABEL_OP3_307_32959_20140621_191212_outLine +BABEL_OP3_307_33216_20140904_190946_inLine +BABEL_OP3_307_33216_20140904_190946_outLine +BABEL_OP3_307_33704_20140705_224629_inLine +BABEL_OP3_307_33704_20140705_224629_outLine +BABEL_OP3_307_33840_20140628_232051_inLine +BABEL_OP3_307_33840_20140628_232051_outLine +BABEL_OP3_307_34328_20140511_224229_inLine +BABEL_OP3_307_34328_20140511_224229_outLine +BABEL_OP3_307_34899_20140824_175928_inLine +BABEL_OP3_307_34899_20140824_175928_outLine +BABEL_OP3_307_35885_20140717_212149_inLine +BABEL_OP3_307_35885_20140717_212149_outLine +BABEL_OP3_307_36059_20140725_170553_inLine +BABEL_OP3_307_36059_20140725_170553_outLine +BABEL_OP3_307_36059_20140725_171011_inLine +BABEL_OP3_307_36059_20140725_171011_outLine +BABEL_OP3_307_36341_20140414_233501_inLine +BABEL_OP3_307_36341_20140414_233501_outLine +BABEL_OP3_307_36341_20140415_224118_inLine +BABEL_OP3_307_36341_20140415_224118_outLine +BABEL_OP3_307_37229_20140820_214115_inLine +BABEL_OP3_307_37229_20140820_214115_outLine +BABEL_OP3_307_37229_20140820_215332_inLine +BABEL_OP3_307_37229_20140820_215332_outLine +BABEL_OP3_307_37499_20140823_233015_inLine +BABEL_OP3_307_37499_20140823_233015_outLine +BABEL_OP3_307_37598_20140602_183825_inLine +BABEL_OP3_307_37598_20140602_183825_outLine +BABEL_OP3_307_38554_20140414_233433_inLine +BABEL_OP3_307_38554_20140414_233433_outLine +BABEL_OP3_307_38979_20140711_231114_inLine +BABEL_OP3_307_38979_20140711_231114_outLine +BABEL_OP3_307_38979_20140711_232222_inLine +BABEL_OP3_307_38979_20140711_232222_outLine +BABEL_OP3_307_39099_20140814_210148_inLine +BABEL_OP3_307_39099_20140814_210148_outLine +BABEL_OP3_307_39307_20140422_002337_inLine +BABEL_OP3_307_39307_20140422_002337_outLine +BABEL_OP3_307_39680_20140710_174506_inLine +BABEL_OP3_307_39680_20140710_174506_outLine +BABEL_OP3_307_41038_20140611_180425_inLine +BABEL_OP3_307_41038_20140611_180425_outLine +BABEL_OP3_307_41233_20140629_192647_inLine +BABEL_OP3_307_41233_20140629_192647_outLine +BABEL_OP3_307_41400_20140811_174054_inLine +BABEL_OP3_307_41400_20140811_174054_outLine +BABEL_OP3_307_41692_20140824_185620_inLine +BABEL_OP3_307_41692_20140824_185620_outLine +BABEL_OP3_307_42600_20140504_183919_inLine +BABEL_OP3_307_42600_20140504_183919_outLine +BABEL_OP3_307_42619_20140527_230410_inLine +BABEL_OP3_307_42619_20140527_230410_outLine +BABEL_OP3_307_42718_20140809_190046_inLine +BABEL_OP3_307_42718_20140809_190046_outLine +BABEL_OP3_307_42942_20140508_233559_inLine +BABEL_OP3_307_42942_20140508_233559_outLine +BABEL_OP3_307_42991_20140611_192649_inLine +BABEL_OP3_307_42991_20140611_192649_outLine +BABEL_OP3_307_42991_20140611_193641_inLine +BABEL_OP3_307_42991_20140611_193641_outLine +BABEL_OP3_307_43239_20140703_231849_inLine +BABEL_OP3_307_43239_20140703_231849_outLine +BABEL_OP3_307_43368_20140510_180418_inLine +BABEL_OP3_307_43368_20140510_180418_outLine +BABEL_OP3_307_43368_20140510_183109_inLine +BABEL_OP3_307_43368_20140510_183109_outLine +BABEL_OP3_307_43395_20140825_200133_inLine +BABEL_OP3_307_43395_20140825_200133_outLine +BABEL_OP3_307_43646_20140414_232607_inLine +BABEL_OP3_307_43646_20140414_232607_outLine +BABEL_OP3_307_44255_20140716_232402_inLine +BABEL_OP3_307_44255_20140716_232402_outLine +BABEL_OP3_307_44255_20140716_233533_inLine +BABEL_OP3_307_44255_20140716_233533_outLine +BABEL_OP3_307_44347_20140707_214025_inLine +BABEL_OP3_307_44347_20140707_214025_outLine +BABEL_OP3_307_44531_20140724_233405_inLine +BABEL_OP3_307_44531_20140724_233405_outLine +BABEL_OP3_307_44868_20140609_224633_inLine +BABEL_OP3_307_44868_20140609_224633_outLine +BABEL_OP3_307_45201_20140811_170742_inLine +BABEL_OP3_307_45201_20140811_170742_outLine +BABEL_OP3_307_45235_20140705_174005_inLine +BABEL_OP3_307_45235_20140705_174005_outLine +BABEL_OP3_307_45559_20140725_180337_inLine +BABEL_OP3_307_45559_20140725_180337_outLine +BABEL_OP3_307_45560_20140403_200140_inLine +BABEL_OP3_307_45560_20140403_200140_outLine +BABEL_OP3_307_45642_20140417_165148_inLine +BABEL_OP3_307_45642_20140417_165148_outLine +BABEL_OP3_307_45642_20140417_171209_inLine +BABEL_OP3_307_45642_20140417_171209_outLine +BABEL_OP3_307_45697_20140723_174904_inLine +BABEL_OP3_307_45697_20140723_174904_outLine +BABEL_OP3_307_45770_20140417_234713_inLine +BABEL_OP3_307_45770_20140417_234713_outLine +BABEL_OP3_307_45770_20140418_001050_inLine +BABEL_OP3_307_45770_20140418_001050_outLine +BABEL_OP3_307_45908_20140811_183550_inLine +BABEL_OP3_307_45908_20140811_183550_outLine +BABEL_OP3_307_46008_20140811_173939_inLine +BABEL_OP3_307_46008_20140811_173939_outLine +BABEL_OP3_307_46169_20140628_213057_inLine +BABEL_OP3_307_46169_20140628_213057_outLine +BABEL_OP3_307_46315_20140613_173444_inLine +BABEL_OP3_307_46315_20140613_173444_outLine +BABEL_OP3_307_46688_20140422_234341_inLine +BABEL_OP3_307_46688_20140422_234341_outLine +BABEL_OP3_307_46881_20140416_183617_inLine +BABEL_OP3_307_46881_20140416_183617_outLine +BABEL_OP3_307_46881_20140416_184809_inLine +BABEL_OP3_307_46881_20140416_184809_outLine +BABEL_OP3_307_47215_20140421_184514_inLine +BABEL_OP3_307_47215_20140421_184514_outLine +BABEL_OP3_307_48016_20140820_165510_inLine +BABEL_OP3_307_48016_20140820_165510_outLine +BABEL_OP3_307_48663_20140811_165120_inLine +BABEL_OP3_307_48663_20140811_165120_outLine +BABEL_OP3_307_48844_20140430_185608_inLine +BABEL_OP3_307_48844_20140430_185608_outLine +BABEL_OP3_307_49197_20140503_171214_inLine +BABEL_OP3_307_49197_20140503_171214_outLine +BABEL_OP3_307_49767_20140904_203629_inLine +BABEL_OP3_307_49767_20140904_203629_outLine +BABEL_OP3_307_49775_20140329_000415_inLine +BABEL_OP3_307_49775_20140329_000415_outLine +BABEL_OP3_307_49775_20140329_002350_inLine +BABEL_OP3_307_49775_20140329_002350_outLine +BABEL_OP3_307_50726_20140404_005620_inLine +BABEL_OP3_307_50726_20140404_005620_outLine +BABEL_OP3_307_50958_20140508_000931_inLine +BABEL_OP3_307_50958_20140508_000931_outLine +BABEL_OP3_307_50962_20140430_182307_inLine +BABEL_OP3_307_50962_20140430_182307_outLine +BABEL_OP3_307_51015_20140619_234356_inLine +BABEL_OP3_307_51015_20140619_234356_outLine +BABEL_OP3_307_51414_20140824_173748_inLine +BABEL_OP3_307_51414_20140824_173748_outLine +BABEL_OP3_307_51414_20140824_175004_inLine +BABEL_OP3_307_51414_20140824_175004_outLine +BABEL_OP3_307_51530_20140813_230739_inLine +BABEL_OP3_307_51530_20140813_230739_outLine +BABEL_OP3_307_52025_20140520_191824_inLine +BABEL_OP3_307_52025_20140520_191824_outLine +BABEL_OP3_307_52070_20140904_203955_inLine +BABEL_OP3_307_52070_20140904_203955_outLine +BABEL_OP3_307_52442_20140516_214027_inLine +BABEL_OP3_307_52442_20140516_214027_outLine +BABEL_OP3_307_52447_20140822_171509_inLine +BABEL_OP3_307_52447_20140822_171509_outLine +BABEL_OP3_307_52447_20140822_172455_inLine +BABEL_OP3_307_52447_20140822_172455_outLine +BABEL_OP3_307_52725_20140725_190010_inLine +BABEL_OP3_307_52725_20140725_190010_outLine +BABEL_OP3_307_52725_20140725_190854_inLine +BABEL_OP3_307_52725_20140725_190854_outLine +BABEL_OP3_307_52804_20140502_184324_inLine +BABEL_OP3_307_52804_20140502_184324_outLine +BABEL_OP3_307_53144_20140807_225121_inLine +BABEL_OP3_307_53144_20140807_225121_outLine +BABEL_OP3_307_53492_20140730_174335_inLine +BABEL_OP3_307_53492_20140730_174335_outLine +BABEL_OP3_307_53665_20140809_225603_inLine +BABEL_OP3_307_53665_20140809_225603_outLine +BABEL_OP3_307_55818_20140403_203355_inLine +BABEL_OP3_307_55818_20140403_203355_outLine +BABEL_OP3_307_56019_20140716_230530_inLine +BABEL_OP3_307_56019_20140716_230530_outLine +BABEL_OP3_307_56076_20140810_005108_inLine +BABEL_OP3_307_56076_20140810_005108_outLine +BABEL_OP3_307_56213_20140621_172222_inLine +BABEL_OP3_307_56213_20140621_172222_outLine +BABEL_OP3_307_56306_20140705_225134_inLine +BABEL_OP3_307_56306_20140705_225134_outLine +BABEL_OP3_307_56684_20140630_231811_inLine +BABEL_OP3_307_56684_20140630_231811_outLine +BABEL_OP3_307_56720_20140616_224418_inLine +BABEL_OP3_307_56720_20140616_224418_outLine +BABEL_OP3_307_57542_20140720_222540_inLine +BABEL_OP3_307_57542_20140720_222540_outLine +BABEL_OP3_307_57650_20140712_172810_inLine +BABEL_OP3_307_57650_20140712_172810_outLine +BABEL_OP3_307_57922_20140603_234523_inLine +BABEL_OP3_307_57922_20140603_234523_outLine +BABEL_OP3_307_60310_20140628_231715_inLine +BABEL_OP3_307_60310_20140628_231715_outLine +BABEL_OP3_307_60436_20140730_191522_inLine +BABEL_OP3_307_60436_20140730_191522_outLine +BABEL_OP3_307_60706_20140401_190403_inLine +BABEL_OP3_307_60706_20140401_190403_outLine +BABEL_OP3_307_60836_20140405_213236_inLine +BABEL_OP3_307_60836_20140405_213236_outLine +BABEL_OP3_307_60836_20140406_002450_inLine +BABEL_OP3_307_60836_20140406_002450_outLine +BABEL_OP3_307_61219_20140404_200459_inLine +BABEL_OP3_307_61219_20140404_200459_outLine +BABEL_OP3_307_61225_20140414_220024_inLine +BABEL_OP3_307_61225_20140414_220024_outLine +BABEL_OP3_307_61963_20140710_174351_inLine +BABEL_OP3_307_61963_20140710_174351_outLine +BABEL_OP3_307_62155_20140721_233109_inLine +BABEL_OP3_307_62155_20140721_233109_outLine +BABEL_OP3_307_62289_20140811_205629_inLine +BABEL_OP3_307_62289_20140811_205629_outLine +BABEL_OP3_307_63906_20140807_235743_inLine +BABEL_OP3_307_63906_20140807_235743_outLine +BABEL_OP3_307_63938_20140715_225113_inLine +BABEL_OP3_307_63938_20140715_225113_outLine +BABEL_OP3_307_64014_20140717_232855_inLine +BABEL_OP3_307_64014_20140717_232855_outLine +BABEL_OP3_307_65298_20140718_004934_inLine +BABEL_OP3_307_65298_20140718_004934_outLine +BABEL_OP3_307_65477_20140507_213725_inLine +BABEL_OP3_307_65477_20140507_213725_outLine +BABEL_OP3_307_65477_20140507_214428_inLine +BABEL_OP3_307_65477_20140507_214428_outLine +BABEL_OP3_307_65477_20140507_215852_inLine +BABEL_OP3_307_65477_20140507_215852_outLine +BABEL_OP3_307_65913_20140811_185916_inLine +BABEL_OP3_307_65913_20140811_185916_outLine +BABEL_OP3_307_66026_20140622_001323_inLine +BABEL_OP3_307_66026_20140622_001323_outLine +BABEL_OP3_307_66026_20140622_003222_inLine +BABEL_OP3_307_66026_20140622_003222_outLine +BABEL_OP3_307_66837_20140622_193057_inLine +BABEL_OP3_307_66837_20140622_193057_outLine +BABEL_OP3_307_68182_20140712_230018_inLine +BABEL_OP3_307_68182_20140712_230018_outLine +BABEL_OP3_307_68306_20140619_234111_inLine +BABEL_OP3_307_68306_20140619_234111_outLine +BABEL_OP3_307_69746_20140708_002605_inLine +BABEL_OP3_307_69746_20140708_002605_outLine +BABEL_OP3_307_69885_20140809_214354_inLine +BABEL_OP3_307_69885_20140809_214354_outLine +BABEL_OP3_307_69885_20140809_221241_inLine +BABEL_OP3_307_69885_20140809_221241_outLine +BABEL_OP3_307_70221_20140531_232511_inLine +BABEL_OP3_307_70221_20140531_232511_outLine +BABEL_OP3_307_71121_20140827_212105_inLine +BABEL_OP3_307_71121_20140827_212105_outLine +BABEL_OP3_307_71282_20140712_184618_inLine +BABEL_OP3_307_71282_20140712_184618_outLine +BABEL_OP3_307_72349_20140811_213219_inLine +BABEL_OP3_307_72349_20140811_213219_outLine +BABEL_OP3_307_72844_20140414_222309_inLine +BABEL_OP3_307_72844_20140414_222309_outLine +BABEL_OP3_307_72844_20140414_223414_inLine +BABEL_OP3_307_72844_20140414_223414_outLine +BABEL_OP3_307_73549_20140905_002803_inLine +BABEL_OP3_307_73549_20140905_002803_outLine +BABEL_OP3_307_73964_20140809_233453_inLine +BABEL_OP3_307_73964_20140809_233453_outLine +BABEL_OP3_307_73964_20140809_234749_inLine +BABEL_OP3_307_73964_20140809_234749_outLine +BABEL_OP3_307_74111_20140630_190239_inLine +BABEL_OP3_307_74111_20140630_190239_outLine +BABEL_OP3_307_74253_20140621_235240_inLine +BABEL_OP3_307_74253_20140621_235240_outLine +BABEL_OP3_307_74280_20140414_183758_inLine +BABEL_OP3_307_74280_20140414_183758_outLine +BABEL_OP3_307_74455_20140715_191928_inLine +BABEL_OP3_307_74455_20140715_191928_outLine +BABEL_OP3_307_75223_20140401_234318_inLine +BABEL_OP3_307_75223_20140401_234318_outLine +BABEL_OP3_307_75223_20140401_235025_inLine +BABEL_OP3_307_75223_20140401_235025_outLine +BABEL_OP3_307_75261_20140630_231504_inLine +BABEL_OP3_307_75261_20140630_231504_outLine +BABEL_OP3_307_75342_20140617_225740_inLine +BABEL_OP3_307_75342_20140617_225740_outLine +BABEL_OP3_307_75342_20140617_231149_inLine +BABEL_OP3_307_75342_20140617_231149_outLine +BABEL_OP3_307_75359_20140812_195810_inLine +BABEL_OP3_307_75359_20140812_195810_outLine +BABEL_OP3_307_75366_20140905_004427_inLine +BABEL_OP3_307_75366_20140905_004427_outLine +BABEL_OP3_307_75465_20140629_190739_inLine +BABEL_OP3_307_75465_20140629_190739_outLine +BABEL_OP3_307_75869_20140722_003619_inLine +BABEL_OP3_307_75869_20140722_003619_outLine +BABEL_OP3_307_75981_20140730_202631_inLine +BABEL_OP3_307_75981_20140730_202631_outLine +BABEL_OP3_307_78544_20140610_183736_inLine +BABEL_OP3_307_78544_20140610_183736_outLine +BABEL_OP3_307_78609_20140702_235349_inLine +BABEL_OP3_307_78609_20140702_235349_outLine +BABEL_OP3_307_78749_20140904_210224_inLine +BABEL_OP3_307_78749_20140904_210224_outLine +BABEL_OP3_307_79139_20140510_225328_inLine +BABEL_OP3_307_79139_20140510_225328_outLine +BABEL_OP3_307_79898_20140904_214416_inLine +BABEL_OP3_307_79898_20140904_214416_outLine +BABEL_OP3_307_80577_20140715_181331_inLine +BABEL_OP3_307_80577_20140715_181331_outLine +BABEL_OP3_307_80655_20140812_230923_inLine +BABEL_OP3_307_80655_20140812_230923_outLine +BABEL_OP3_307_80655_20140812_233001_inLine +BABEL_OP3_307_80655_20140812_233001_outLine +BABEL_OP3_307_80721_20140621_190505_inLine +BABEL_OP3_307_80721_20140621_190505_outLine +BABEL_OP3_307_80881_20140422_202404_inLine +BABEL_OP3_307_80881_20140422_202404_outLine +BABEL_OP3_307_81674_20140826_223550_inLine +BABEL_OP3_307_81674_20140826_223550_outLine +BABEL_OP3_307_83783_20140605_230854_inLine +BABEL_OP3_307_83783_20140605_230854_outLine +BABEL_OP3_307_83783_20140605_231912_inLine +BABEL_OP3_307_83783_20140605_231912_outLine +BABEL_OP3_307_83935_20140614_224802_inLine +BABEL_OP3_307_83935_20140614_224802_outLine +BABEL_OP3_307_84061_20140511_233610_inLine +BABEL_OP3_307_84061_20140511_233610_outLine +BABEL_OP3_307_84125_20140331_234215_inLine +BABEL_OP3_307_84125_20140331_234215_outLine +BABEL_OP3_307_84370_20140820_212437_inLine +BABEL_OP3_307_84370_20140820_212437_outLine +BABEL_OP3_307_84408_20140503_212710_inLine +BABEL_OP3_307_84408_20140503_212710_outLine +BABEL_OP3_307_84737_20140708_221232_inLine +BABEL_OP3_307_84737_20140708_221232_outLine +BABEL_OP3_307_84823_20140630_223225_inLine +BABEL_OP3_307_84823_20140630_223225_outLine +BABEL_OP3_307_84936_20140531_001856_inLine +BABEL_OP3_307_84936_20140531_001856_outLine +BABEL_OP3_307_84936_20140531_002943_inLine +BABEL_OP3_307_84936_20140531_002943_outLine +BABEL_OP3_307_85048_20140605_171622_inLine +BABEL_OP3_307_85048_20140605_171622_outLine +BABEL_OP3_307_85254_20140827_191205_inLine +BABEL_OP3_307_85254_20140827_191205_outLine +BABEL_OP3_307_85340_20140430_212442_inLine +BABEL_OP3_307_85340_20140430_212442_outLine +BABEL_OP3_307_87298_20140404_234437_inLine +BABEL_OP3_307_87298_20140404_234437_outLine +BABEL_OP3_307_88982_20140517_225450_inLine +BABEL_OP3_307_88982_20140517_225450_outLine +BABEL_OP3_307_89560_20140708_181828_inLine +BABEL_OP3_307_89560_20140708_181828_outLine +BABEL_OP3_307_90080_20140730_192002_inLine +BABEL_OP3_307_90080_20140730_192002_outLine +BABEL_OP3_307_90760_20140822_233431_inLine +BABEL_OP3_307_90760_20140822_233431_outLine +BABEL_OP3_307_91125_20140417_193326_inLine +BABEL_OP3_307_91125_20140417_193326_outLine +BABEL_OP3_307_91930_20140723_214657_inLine +BABEL_OP3_307_91930_20140723_214657_outLine +BABEL_OP3_307_91977_20140609_172756_inLine +BABEL_OP3_307_91977_20140609_172756_outLine +BABEL_OP3_307_92077_20140725_182941_inLine +BABEL_OP3_307_92077_20140725_182941_outLine +BABEL_OP3_307_92096_20140720_214645_inLine +BABEL_OP3_307_92096_20140720_214645_outLine +BABEL_OP3_307_92356_20140710_165331_inLine +BABEL_OP3_307_92356_20140710_165331_outLine +BABEL_OP3_307_92736_20140628_222129_inLine +BABEL_OP3_307_92736_20140628_222129_outLine +BABEL_OP3_307_92792_20140806_183000_inLine +BABEL_OP3_307_92792_20140806_183000_outLine +BABEL_OP3_307_92792_20140806_184601_inLine +BABEL_OP3_307_92792_20140806_184601_outLine +BABEL_OP3_307_93411_20140511_171810_inLine +BABEL_OP3_307_93411_20140511_171810_outLine +BABEL_OP3_307_93411_20140511_172906_inLine +BABEL_OP3_307_93411_20140511_172906_outLine +BABEL_OP3_307_93861_20140513_195727_inLine +BABEL_OP3_307_94141_20140813_184047_inLine +BABEL_OP3_307_94141_20140813_184047_outLine +BABEL_OP3_307_94166_20140717_182459_inLine +BABEL_OP3_307_94666_20140517_180258_inLine +BABEL_OP3_307_94666_20140517_180258_outLine +BABEL_OP3_307_95399_20140514_005142_inLine +BABEL_OP3_307_95399_20140514_005142_outLine +BABEL_OP3_307_95467_20140822_201531_inLine +BABEL_OP3_307_95467_20140822_201531_outLine +BABEL_OP3_307_95598_20140415_012206_inLine +BABEL_OP3_307_95598_20140415_012206_outLine +BABEL_OP3_307_95935_20140702_232733_inLine +BABEL_OP3_307_95935_20140702_232733_outLine +BABEL_OP3_307_95966_20140504_202018_inLine +BABEL_OP3_307_95966_20140504_202018_outLine +BABEL_OP3_307_96247_20140721_235224_inLine +BABEL_OP3_307_96247_20140721_235224_outLine +BABEL_OP3_307_96584_20140717_173523_inLine +BABEL_OP3_307_96584_20140717_173523_outLine +BABEL_OP3_307_96842_20140725_185113_inLine +BABEL_OP3_307_96842_20140725_185113_outLine +BABEL_OP3_307_96934_20140407_232228_inLine +BABEL_OP3_307_96934_20140407_232228_outLine +BABEL_OP3_307_97136_20140731_173922_inLine +BABEL_OP3_307_97136_20140731_173922_outLine +BABEL_OP3_307_97570_20140529_233742_inLine +BABEL_OP3_307_97570_20140529_233742_outLine +BABEL_OP3_307_97836_20140730_225750_inLine +BABEL_OP3_307_97836_20140730_225750_outLine +BABEL_OP3_307_97849_20140813_181409_inLine +BABEL_OP3_307_97849_20140813_181409_outLine +BABEL_OP3_307_97911_20140904_224017_inLine +BABEL_OP3_307_97911_20140904_224017_outLine +BABEL_OP3_307_97988_20140620_223625_inLine +BABEL_OP3_307_97988_20140620_223625_outLine +BABEL_OP3_307_98489_20140404_222049_inLine +BABEL_OP3_307_98489_20140404_222049_outLine +BABEL_OP3_307_98678_20140721_224047_inLine +BABEL_OP3_307_98678_20140721_224047_outLine +BABEL_OP3_307_99401_20140422_215803_inLine +BABEL_OP3_307_99401_20140422_215803_outLine +BABEL_OP3_307_99718_20140417_190158_inLine +BABEL_OP3_307_99718_20140417_190158_outLine +BABEL_OP3_307_99732_20140630_175525_inLine +BABEL_OP3_307_99732_20140630_175525_outLine +BABEL_OP3_307_99813_20140516_235439_inLine +BABEL_OP3_307_99813_20140516_235439_outLine +BABEL_OP3_307_99920_20140404_002016_inLine +BABEL_OP3_307_99920_20140404_002016_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/dev.2h.list b/egs/babel/s5d/conf/lists/401-mongolian/dev.2h.list new file mode 100644 index 00000000000..47596e1204d --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/dev.2h.list @@ -0,0 +1,124 @@ +BABEL_OP3_401_10319_20140923_150904_inLine +BABEL_OP3_401_10319_20140923_150904_outLine +BABEL_OP3_401_12916_20140930_182205_inLine +BABEL_OP3_401_12916_20140930_182205_outLine +BABEL_OP3_401_14229_20141015_145028_inLine +BABEL_OP3_401_14229_20141015_145028_outLine +BABEL_OP3_401_14350_20141002_192854_inLine +BABEL_OP3_401_14350_20141002_192854_outLine +BABEL_OP3_401_14875_20141013_220929_inLine +BABEL_OP3_401_14875_20141013_220929_outLine +BABEL_OP3_401_14875_20141013_222027_inLine +BABEL_OP3_401_14875_20141013_222027_outLine +BABEL_OP3_401_15163_20141020_201846_inLine +BABEL_OP3_401_15163_20141020_201846_outLine +BABEL_OP3_401_15216_20141104_171637_inLine +BABEL_OP3_401_15216_20141104_171637_outLine +BABEL_OP3_401_15324_20141031_194259_inLine +BABEL_OP3_401_15324_20141031_194259_outLine +BABEL_OP3_401_15848_20140916_174516_inLine +BABEL_OP3_401_15848_20140916_174516_outLine +BABEL_OP3_401_16184_20140924_115115_inLine +BABEL_OP3_401_16184_20140924_115115_outLine +BABEL_OP3_401_17440_20141014_172206_inLine +BABEL_OP3_401_17440_20141014_172206_outLine +BABEL_OP3_401_19621_20141027_174015_inLine +BABEL_OP3_401_19621_20141027_174015_outLine +BABEL_OP3_401_21109_20141102_133420_inLine +BABEL_OP3_401_21109_20141102_133420_outLine +BABEL_OP3_401_23505_20140930_172516_inLine +BABEL_OP3_401_23505_20140930_172516_outLine +BABEL_OP3_401_26074_20141031_001437_inLine +BABEL_OP3_401_26074_20141031_001437_outLine +BABEL_OP3_401_27125_20140916_141748_inLine +BABEL_OP3_401_27125_20140916_141748_outLine +BABEL_OP3_401_27478_20141119_222255_inLine +BABEL_OP3_401_27478_20141119_222255_outLine +BABEL_OP3_401_28606_20140930_180938_inLine +BABEL_OP3_401_28606_20140930_180938_outLine +BABEL_OP3_401_29023_20141021_134200_inLine +BABEL_OP3_401_29023_20141021_134200_outLine +BABEL_OP3_401_29135_20140919_181952_inLine +BABEL_OP3_401_29135_20140919_181952_outLine +BABEL_OP3_401_29208_20141018_152040_inLine +BABEL_OP3_401_29208_20141018_152040_outLine +BABEL_OP3_401_29777_20141105_172935_inLine +BABEL_OP3_401_29777_20141105_172935_outLine +BABEL_OP3_401_31490_20141001_195242_inLine +BABEL_OP3_401_31490_20141001_195242_outLine +BABEL_OP3_401_32301_20140927_150237_inLine +BABEL_OP3_401_32301_20140927_150237_outLine +BABEL_OP3_401_32727_20141014_193244_inLine +BABEL_OP3_401_32727_20141014_193244_outLine +BABEL_OP3_401_32861_20141112_183418_inLine +BABEL_OP3_401_32861_20141112_183418_outLine +BABEL_OP3_401_32914_20141101_192546_inLine +BABEL_OP3_401_32914_20141101_192546_outLine +BABEL_OP3_401_36219_20141014_150115_inLine +BABEL_OP3_401_36219_20141014_150115_outLine +BABEL_OP3_401_36505_20141104_134657_inLine +BABEL_OP3_401_36505_20141104_134657_outLine +BABEL_OP3_401_38554_20140917_124843_inLine +BABEL_OP3_401_38554_20140917_124843_outLine +BABEL_OP3_401_41100_20141001_131139_inLine +BABEL_OP3_401_41100_20141001_131139_outLine +BABEL_OP3_401_41741_20141002_230232_inLine +BABEL_OP3_401_41741_20141002_230232_outLine +BABEL_OP3_401_42243_20140924_154551_inLine +BABEL_OP3_401_42243_20140924_154551_outLine +BABEL_OP3_401_43368_20141016_160322_inLine +BABEL_OP3_401_43368_20141016_160322_outLine +BABEL_OP3_401_43388_20141019_234056_inLine +BABEL_OP3_401_43388_20141019_234056_outLine +BABEL_OP3_401_43789_20141020_153059_inLine +BABEL_OP3_401_43789_20141020_153059_outLine +BABEL_OP3_401_44347_20141103_201828_inLine +BABEL_OP3_401_44347_20141103_201828_outLine +BABEL_OP3_401_44420_20141014_143409_inLine +BABEL_OP3_401_44420_20141014_143409_outLine +BABEL_OP3_401_44531_20141122_231122_inLine +BABEL_OP3_401_44531_20141122_231122_outLine +BABEL_OP3_401_44619_20141003_141028_inLine +BABEL_OP3_401_44619_20141003_141028_outLine +BABEL_OP3_401_44868_20140925_153133_inLine +BABEL_OP3_401_44868_20140925_153133_outLine +BABEL_OP3_401_46625_20140919_144521_inLine +BABEL_OP3_401_46625_20140919_144521_outLine +BABEL_OP3_401_47215_20141001_143242_inLine +BABEL_OP3_401_47215_20141001_143242_outLine +BABEL_OP3_401_48200_20141104_174608_inLine +BABEL_OP3_401_48200_20141104_174608_outLine +BABEL_OP3_401_52025_20140917_170707_inLine +BABEL_OP3_401_52025_20140917_170707_outLine +BABEL_OP3_401_54046_20141105_192438_inLine +BABEL_OP3_401_54046_20141105_192438_outLine +BABEL_OP3_401_54744_20141001_143512_inLine +BABEL_OP3_401_54744_20141001_143512_outLine +BABEL_OP3_401_56090_20140917_155639_inLine +BABEL_OP3_401_56090_20140917_155639_outLine +BABEL_OP3_401_59898_20140930_142511_inLine +BABEL_OP3_401_59898_20140930_142511_outLine +BABEL_OP3_401_61011_20140919_134829_inLine +BABEL_OP3_401_61011_20140919_134829_outLine +BABEL_OP3_401_61011_20140919_141527_inLine +BABEL_OP3_401_61011_20140919_141527_outLine +BABEL_OP3_401_61678_20140919_183209_inLine +BABEL_OP3_401_61678_20140919_183209_outLine +BABEL_OP3_401_62724_20141016_200105_inLine +BABEL_OP3_401_62724_20141016_200105_outLine +BABEL_OP3_401_63081_20140919_142151_inLine +BABEL_OP3_401_63081_20140919_142151_outLine +BABEL_OP3_401_72007_20140930_173109_inLine +BABEL_OP3_401_72007_20140930_173109_outLine +BABEL_OP3_401_78544_20140924_155131_inLine +BABEL_OP3_401_78544_20140924_155131_outLine +BABEL_OP3_401_81424_20140927_134153_inLine +BABEL_OP3_401_81424_20140927_134153_outLine +BABEL_OP3_401_81553_20141112_153426_inLine +BABEL_OP3_401_81553_20141112_153426_outLine +BABEL_OP3_401_84815_20141014_163920_inLine +BABEL_OP3_401_84815_20141014_163920_outLine +BABEL_OP3_401_87884_20141014_190149_inLine +BABEL_OP3_401_87884_20141014_190149_outLine +BABEL_OP3_401_98506_20141124_133100_inLine +BABEL_OP3_401_98506_20141124_133100_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/dev.list b/egs/babel/s5d/conf/lists/401-mongolian/dev.list new file mode 100644 index 00000000000..47596e1204d --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/dev.list @@ -0,0 +1,124 @@ +BABEL_OP3_401_10319_20140923_150904_inLine +BABEL_OP3_401_10319_20140923_150904_outLine +BABEL_OP3_401_12916_20140930_182205_inLine +BABEL_OP3_401_12916_20140930_182205_outLine +BABEL_OP3_401_14229_20141015_145028_inLine +BABEL_OP3_401_14229_20141015_145028_outLine +BABEL_OP3_401_14350_20141002_192854_inLine +BABEL_OP3_401_14350_20141002_192854_outLine +BABEL_OP3_401_14875_20141013_220929_inLine +BABEL_OP3_401_14875_20141013_220929_outLine +BABEL_OP3_401_14875_20141013_222027_inLine +BABEL_OP3_401_14875_20141013_222027_outLine +BABEL_OP3_401_15163_20141020_201846_inLine +BABEL_OP3_401_15163_20141020_201846_outLine +BABEL_OP3_401_15216_20141104_171637_inLine +BABEL_OP3_401_15216_20141104_171637_outLine +BABEL_OP3_401_15324_20141031_194259_inLine +BABEL_OP3_401_15324_20141031_194259_outLine +BABEL_OP3_401_15848_20140916_174516_inLine +BABEL_OP3_401_15848_20140916_174516_outLine +BABEL_OP3_401_16184_20140924_115115_inLine +BABEL_OP3_401_16184_20140924_115115_outLine +BABEL_OP3_401_17440_20141014_172206_inLine +BABEL_OP3_401_17440_20141014_172206_outLine +BABEL_OP3_401_19621_20141027_174015_inLine +BABEL_OP3_401_19621_20141027_174015_outLine +BABEL_OP3_401_21109_20141102_133420_inLine +BABEL_OP3_401_21109_20141102_133420_outLine +BABEL_OP3_401_23505_20140930_172516_inLine +BABEL_OP3_401_23505_20140930_172516_outLine +BABEL_OP3_401_26074_20141031_001437_inLine +BABEL_OP3_401_26074_20141031_001437_outLine +BABEL_OP3_401_27125_20140916_141748_inLine +BABEL_OP3_401_27125_20140916_141748_outLine +BABEL_OP3_401_27478_20141119_222255_inLine +BABEL_OP3_401_27478_20141119_222255_outLine +BABEL_OP3_401_28606_20140930_180938_inLine +BABEL_OP3_401_28606_20140930_180938_outLine +BABEL_OP3_401_29023_20141021_134200_inLine +BABEL_OP3_401_29023_20141021_134200_outLine +BABEL_OP3_401_29135_20140919_181952_inLine +BABEL_OP3_401_29135_20140919_181952_outLine +BABEL_OP3_401_29208_20141018_152040_inLine +BABEL_OP3_401_29208_20141018_152040_outLine +BABEL_OP3_401_29777_20141105_172935_inLine +BABEL_OP3_401_29777_20141105_172935_outLine +BABEL_OP3_401_31490_20141001_195242_inLine +BABEL_OP3_401_31490_20141001_195242_outLine +BABEL_OP3_401_32301_20140927_150237_inLine +BABEL_OP3_401_32301_20140927_150237_outLine +BABEL_OP3_401_32727_20141014_193244_inLine +BABEL_OP3_401_32727_20141014_193244_outLine +BABEL_OP3_401_32861_20141112_183418_inLine +BABEL_OP3_401_32861_20141112_183418_outLine +BABEL_OP3_401_32914_20141101_192546_inLine +BABEL_OP3_401_32914_20141101_192546_outLine +BABEL_OP3_401_36219_20141014_150115_inLine +BABEL_OP3_401_36219_20141014_150115_outLine +BABEL_OP3_401_36505_20141104_134657_inLine +BABEL_OP3_401_36505_20141104_134657_outLine +BABEL_OP3_401_38554_20140917_124843_inLine +BABEL_OP3_401_38554_20140917_124843_outLine +BABEL_OP3_401_41100_20141001_131139_inLine +BABEL_OP3_401_41100_20141001_131139_outLine +BABEL_OP3_401_41741_20141002_230232_inLine +BABEL_OP3_401_41741_20141002_230232_outLine +BABEL_OP3_401_42243_20140924_154551_inLine +BABEL_OP3_401_42243_20140924_154551_outLine +BABEL_OP3_401_43368_20141016_160322_inLine +BABEL_OP3_401_43368_20141016_160322_outLine +BABEL_OP3_401_43388_20141019_234056_inLine +BABEL_OP3_401_43388_20141019_234056_outLine +BABEL_OP3_401_43789_20141020_153059_inLine +BABEL_OP3_401_43789_20141020_153059_outLine +BABEL_OP3_401_44347_20141103_201828_inLine +BABEL_OP3_401_44347_20141103_201828_outLine +BABEL_OP3_401_44420_20141014_143409_inLine +BABEL_OP3_401_44420_20141014_143409_outLine +BABEL_OP3_401_44531_20141122_231122_inLine +BABEL_OP3_401_44531_20141122_231122_outLine +BABEL_OP3_401_44619_20141003_141028_inLine +BABEL_OP3_401_44619_20141003_141028_outLine +BABEL_OP3_401_44868_20140925_153133_inLine +BABEL_OP3_401_44868_20140925_153133_outLine +BABEL_OP3_401_46625_20140919_144521_inLine +BABEL_OP3_401_46625_20140919_144521_outLine +BABEL_OP3_401_47215_20141001_143242_inLine +BABEL_OP3_401_47215_20141001_143242_outLine +BABEL_OP3_401_48200_20141104_174608_inLine +BABEL_OP3_401_48200_20141104_174608_outLine +BABEL_OP3_401_52025_20140917_170707_inLine +BABEL_OP3_401_52025_20140917_170707_outLine +BABEL_OP3_401_54046_20141105_192438_inLine +BABEL_OP3_401_54046_20141105_192438_outLine +BABEL_OP3_401_54744_20141001_143512_inLine +BABEL_OP3_401_54744_20141001_143512_outLine +BABEL_OP3_401_56090_20140917_155639_inLine +BABEL_OP3_401_56090_20140917_155639_outLine +BABEL_OP3_401_59898_20140930_142511_inLine +BABEL_OP3_401_59898_20140930_142511_outLine +BABEL_OP3_401_61011_20140919_134829_inLine +BABEL_OP3_401_61011_20140919_134829_outLine +BABEL_OP3_401_61011_20140919_141527_inLine +BABEL_OP3_401_61011_20140919_141527_outLine +BABEL_OP3_401_61678_20140919_183209_inLine +BABEL_OP3_401_61678_20140919_183209_outLine +BABEL_OP3_401_62724_20141016_200105_inLine +BABEL_OP3_401_62724_20141016_200105_outLine +BABEL_OP3_401_63081_20140919_142151_inLine +BABEL_OP3_401_63081_20140919_142151_outLine +BABEL_OP3_401_72007_20140930_173109_inLine +BABEL_OP3_401_72007_20140930_173109_outLine +BABEL_OP3_401_78544_20140924_155131_inLine +BABEL_OP3_401_78544_20140924_155131_outLine +BABEL_OP3_401_81424_20140927_134153_inLine +BABEL_OP3_401_81424_20140927_134153_outLine +BABEL_OP3_401_81553_20141112_153426_inLine +BABEL_OP3_401_81553_20141112_153426_outLine +BABEL_OP3_401_84815_20141014_163920_inLine +BABEL_OP3_401_84815_20141014_163920_outLine +BABEL_OP3_401_87884_20141014_190149_inLine +BABEL_OP3_401_87884_20141014_190149_outLine +BABEL_OP3_401_98506_20141124_133100_inLine +BABEL_OP3_401_98506_20141124_133100_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/eval.list b/egs/babel/s5d/conf/lists/401-mongolian/eval.list new file mode 100644 index 00000000000..d6756127490 --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/eval.list @@ -0,0 +1,186 @@ +BABEL_OP3_401_10416_20141019_182621_inLine +BABEL_OP3_401_10416_20141019_182621_outLine +BABEL_OP3_401_10974_20141027_160927_inLine +BABEL_OP3_401_10974_20141027_160927_outLine +BABEL_OP3_401_13040_20141003_135314_inLine +BABEL_OP3_401_13040_20141003_135314_outLine +BABEL_OP3_401_14158_20140923_184703_inLine +BABEL_OP3_401_14158_20140923_184703_outLine +BABEL_OP3_401_15262_20141001_153004_inLine +BABEL_OP3_401_15262_20141001_153004_outLine +BABEL_OP3_401_16056_20140930_142245_inLine +BABEL_OP3_401_16056_20140930_142245_outLine +BABEL_OP3_401_16601_20140930_160550_inLine +BABEL_OP3_401_16601_20140930_160550_outLine +BABEL_OP3_401_16787_20141017_144614_inLine +BABEL_OP3_401_16787_20141017_144614_outLine +BABEL_OP3_401_17573_20141001_155309_inLine +BABEL_OP3_401_17573_20141001_155309_outLine +BABEL_OP3_401_18863_20141104_193022_inLine +BABEL_OP3_401_18863_20141104_193022_outLine +BABEL_OP3_401_19120_20141119_194530_inLine +BABEL_OP3_401_19120_20141119_194530_outLine +BABEL_OP3_401_21029_20141003_135457_inLine +BABEL_OP3_401_21029_20141003_135457_outLine +BABEL_OP3_401_21581_20141019_224155_inLine +BABEL_OP3_401_21581_20141019_224155_outLine +BABEL_OP3_401_21794_20141020_182533_inLine +BABEL_OP3_401_21794_20141020_182533_outLine +BABEL_OP3_401_22216_20140922_180034_inLine +BABEL_OP3_401_22216_20140922_180034_outLine +BABEL_OP3_401_22612_20141020_164557_inLine +BABEL_OP3_401_22612_20141020_164557_outLine +BABEL_OP3_401_22641_20140930_120916_inLine +BABEL_OP3_401_22641_20140930_120916_outLine +BABEL_OP3_401_23395_20141029_191310_inLine +BABEL_OP3_401_23395_20141029_191310_outLine +BABEL_OP3_401_23731_20141027_182446_inLine +BABEL_OP3_401_23731_20141027_182446_outLine +BABEL_OP3_401_23983_20141204_001919_inLine +BABEL_OP3_401_23983_20141204_001919_outLine +BABEL_OP3_401_24589_20141014_140038_inLine +BABEL_OP3_401_24589_20141014_140038_outLine +BABEL_OP3_401_27082_20141110_145114_inLine +BABEL_OP3_401_27082_20141110_145114_outLine +BABEL_OP3_401_27218_20141002_130035_inLine +BABEL_OP3_401_27218_20141002_130035_outLine +BABEL_OP3_401_28538_20141020_154852_inLine +BABEL_OP3_401_28538_20141020_154852_outLine +BABEL_OP3_401_28585_20141205_234451_inLine +BABEL_OP3_401_28585_20141205_234451_outLine +BABEL_OP3_401_28945_20141013_144506_inLine +BABEL_OP3_401_28945_20141013_144506_outLine +BABEL_OP3_401_30250_20140919_185656_inLine +BABEL_OP3_401_30250_20140919_185656_outLine +BABEL_OP3_401_30497_20141124_222137_inLine +BABEL_OP3_401_30497_20141124_222137_outLine +BABEL_OP3_401_31979_20141018_172147_inLine +BABEL_OP3_401_31979_20141018_172147_outLine +BABEL_OP3_401_32959_20141010_150730_inLine +BABEL_OP3_401_32959_20141010_150730_outLine +BABEL_OP3_401_34688_20140930_171613_inLine +BABEL_OP3_401_34688_20140930_171613_outLine +BABEL_OP3_401_35069_20141111_153956_inLine +BABEL_OP3_401_35069_20141111_153956_outLine +BABEL_OP3_401_36341_20140919_151216_inLine +BABEL_OP3_401_36341_20140919_151216_outLine +BABEL_OP3_401_37281_20141021_145639_inLine +BABEL_OP3_401_37281_20141021_145639_outLine +BABEL_OP3_401_38431_20141017_210308_inLine +BABEL_OP3_401_38431_20141017_210308_outLine +BABEL_OP3_401_39059_20141113_134730_inLine +BABEL_OP3_401_39059_20141113_134730_outLine +BABEL_OP3_401_39159_20140919_165446_inLine +BABEL_OP3_401_39159_20140919_165446_outLine +BABEL_OP3_401_39680_20141114_221332_inLine +BABEL_OP3_401_39680_20141114_221332_outLine +BABEL_OP3_401_41400_20141201_173539_inLine +BABEL_OP3_401_41400_20141201_173539_outLine +BABEL_OP3_401_41542_20141117_192728_inLine +BABEL_OP3_401_41542_20141117_192728_outLine +BABEL_OP3_401_41920_20141001_131923_inLine +BABEL_OP3_401_41920_20141001_131923_outLine +BABEL_OP3_401_42600_20141015_152342_inLine +BABEL_OP3_401_42600_20141015_152342_outLine +BABEL_OP3_401_42991_20140922_191649_inLine +BABEL_OP3_401_42991_20140922_191649_outLine +BABEL_OP3_401_43920_20141001_185918_inLine +BABEL_OP3_401_43920_20141001_185918_outLine +BABEL_OP3_401_44847_20141101_215443_inLine +BABEL_OP3_401_44847_20141101_215443_outLine +BABEL_OP3_401_45106_20141103_141740_inLine +BABEL_OP3_401_45106_20141103_141740_outLine +BABEL_OP3_401_45106_20141103_142537_inLine +BABEL_OP3_401_45106_20141103_142537_outLine +BABEL_OP3_401_45642_20141001_205602_inLine +BABEL_OP3_401_45642_20141001_205602_outLine +BABEL_OP3_401_45777_20141015_141952_inLine +BABEL_OP3_401_45777_20141015_141952_outLine +BABEL_OP3_401_46333_20140929_163346_inLine +BABEL_OP3_401_46333_20140929_163346_outLine +BABEL_OP3_401_46702_20140917_183418_inLine +BABEL_OP3_401_46702_20140917_183418_outLine +BABEL_OP3_401_47877_20141111_151410_inLine +BABEL_OP3_401_47877_20141111_151410_outLine +BABEL_OP3_401_48789_20141020_160327_inLine +BABEL_OP3_401_48789_20141020_160327_outLine +BABEL_OP3_401_50630_20140926_164312_inLine +BABEL_OP3_401_50630_20140926_164312_outLine +BABEL_OP3_401_50726_20140922_142113_inLine +BABEL_OP3_401_50726_20140922_142113_outLine +BABEL_OP3_401_50962_20141009_174305_inLine +BABEL_OP3_401_50962_20141009_174305_outLine +BABEL_OP3_401_51540_20141110_152608_inLine +BABEL_OP3_401_51540_20141110_152608_outLine +BABEL_OP3_401_52438_20141014_155319_inLine +BABEL_OP3_401_52438_20141014_155319_outLine +BABEL_OP3_401_52442_20141023_165129_inLine +BABEL_OP3_401_52442_20141023_165129_outLine +BABEL_OP3_401_53063_20141016_144707_inLine +BABEL_OP3_401_53063_20141016_144707_outLine +BABEL_OP3_401_53419_20141018_182244_inLine +BABEL_OP3_401_53419_20141018_182244_outLine +BABEL_OP3_401_56213_20141016_153651_inLine +BABEL_OP3_401_56213_20141016_153651_outLine +BABEL_OP3_401_57922_20141030_180727_inLine +BABEL_OP3_401_57922_20141030_180727_outLine +BABEL_OP3_401_58047_20141030_203452_inLine +BABEL_OP3_401_58047_20141030_203452_outLine +BABEL_OP3_401_58489_20141001_175646_inLine +BABEL_OP3_401_58489_20141001_175646_outLine +BABEL_OP3_401_59993_20141006_214918_inLine +BABEL_OP3_401_59993_20141006_214918_outLine +BABEL_OP3_401_62155_20141124_185836_inLine +BABEL_OP3_401_62155_20141124_185836_outLine +BABEL_OP3_401_62852_20140922_125106_inLine +BABEL_OP3_401_62852_20140922_125106_outLine +BABEL_OP3_401_63670_20141016_165949_inLine +BABEL_OP3_401_63670_20141016_165949_outLine +BABEL_OP3_401_64494_20141007_112731_inLine +BABEL_OP3_401_64494_20141007_112731_outLine +BABEL_OP3_401_66026_20141016_173200_inLine +BABEL_OP3_401_66026_20141016_173200_outLine +BABEL_OP3_401_67842_20141006_131608_inLine +BABEL_OP3_401_67842_20141006_131608_outLine +BABEL_OP3_401_68306_20140929_200051_inLine +BABEL_OP3_401_68306_20140929_200051_outLine +BABEL_OP3_401_69153_20141016_161457_inLine +BABEL_OP3_401_69153_20141016_161457_outLine +BABEL_OP3_401_70282_20141030_201700_inLine +BABEL_OP3_401_70282_20141030_201700_outLine +BABEL_OP3_401_77567_20140921_154030_inLine +BABEL_OP3_401_77567_20140921_154030_outLine +BABEL_OP3_401_78511_20141001_195118_inLine +BABEL_OP3_401_78511_20141001_195118_outLine +BABEL_OP3_401_79139_20141110_182604_inLine +BABEL_OP3_401_79139_20141110_182604_outLine +BABEL_OP3_401_80897_20141030_171507_inLine +BABEL_OP3_401_80897_20141030_171507_outLine +BABEL_OP3_401_81392_20140929_161849_inLine +BABEL_OP3_401_81392_20140929_161849_outLine +BABEL_OP3_401_81404_20141016_154459_inLine +BABEL_OP3_401_81404_20141016_154459_outLine +BABEL_OP3_401_81404_20141016_155649_inLine +BABEL_OP3_401_81404_20141016_155649_outLine +BABEL_OP3_401_83935_20140930_153105_inLine +BABEL_OP3_401_83935_20140930_153105_outLine +BABEL_OP3_401_84327_20140930_203221_inLine +BABEL_OP3_401_84327_20140930_203221_outLine +BABEL_OP3_401_84823_20141018_193727_inLine +BABEL_OP3_401_84823_20141018_193727_outLine +BABEL_OP3_401_87280_20141021_202831_inLine +BABEL_OP3_401_87280_20141021_202831_outLine +BABEL_OP3_401_88550_20141127_184443_inLine +BABEL_OP3_401_88550_20141127_184443_outLine +BABEL_OP3_401_89372_20140921_132733_inLine +BABEL_OP3_401_89372_20140921_132733_outLine +BABEL_OP3_401_91581_20141001_163329_inLine +BABEL_OP3_401_91581_20141001_163329_outLine +BABEL_OP3_401_93946_20141016_192913_inLine +BABEL_OP3_401_93946_20141016_192913_outLine +BABEL_OP3_401_94002_20141020_150022_inLine +BABEL_OP3_401_94002_20141020_150022_outLine +BABEL_OP3_401_96504_20141014_144817_inLine +BABEL_OP3_401_96504_20141014_144817_outLine +BABEL_OP3_401_99732_20141018_150700_inLine +BABEL_OP3_401_99732_20141018_150700_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/sub-train.list b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.list new file mode 100644 index 00000000000..f4b87dcaef8 --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.list @@ -0,0 +1,126 @@ +BABEL_OP3_401_13030_20141015_163112_inLine +BABEL_OP3_401_13030_20141015_163112_outLine +BABEL_OP3_401_13324_20141002_165637_inLine +BABEL_OP3_401_13324_20141002_165637_outLine +BABEL_OP3_401_13586_20141023_193242_inLine +BABEL_OP3_401_13586_20141023_193242_outLine +BABEL_OP3_401_14560_20140922_140509_inLine +BABEL_OP3_401_14560_20140922_140509_outLine +BABEL_OP3_401_15902_20140930_144526_inLine +BABEL_OP3_401_15902_20140930_144526_outLine +BABEL_OP3_401_16475_20141016_143941_inLine +BABEL_OP3_401_16475_20141016_143941_outLine +BABEL_OP3_401_17567_20141023_213629_inLine +BABEL_OP3_401_17567_20141023_213629_outLine +BABEL_OP3_401_17923_20141002_172711_inLine +BABEL_OP3_401_17923_20141002_172711_outLine +BABEL_OP3_401_19101_20141029_183652_inLine +BABEL_OP3_401_19101_20141029_183652_outLine +BABEL_OP3_401_19722_20140930_200553_inLine +BABEL_OP3_401_19722_20140930_200553_outLine +BABEL_OP3_401_20916_20140922_174215_inLine +BABEL_OP3_401_20916_20140922_174215_outLine +BABEL_OP3_401_22321_20140929_180456_inLine +BABEL_OP3_401_22321_20140929_180456_outLine +BABEL_OP3_401_23893_20141125_213344_inLine +BABEL_OP3_401_23893_20141125_213344_outLine +BABEL_OP3_401_24290_20141124_184351_inLine +BABEL_OP3_401_24290_20141124_184351_outLine +BABEL_OP3_401_24323_20141017_151036_inLine +BABEL_OP3_401_24323_20141017_151036_outLine +BABEL_OP3_401_24470_20141029_145653_inLine +BABEL_OP3_401_24470_20141029_145653_outLine +BABEL_OP3_401_24605_20141001_142727_inLine +BABEL_OP3_401_24605_20141001_142727_outLine +BABEL_OP3_401_25961_20140929_183632_inLine +BABEL_OP3_401_25961_20140929_183632_outLine +BABEL_OP3_401_26072_20141112_173131_inLine +BABEL_OP3_401_26072_20141112_173131_outLine +BABEL_OP3_401_26398_20141204_001557_inLine +BABEL_OP3_401_26398_20141204_001557_outLine +BABEL_OP3_401_26574_20141103_163656_inLine +BABEL_OP3_401_26574_20141103_163656_outLine +BABEL_OP3_401_26999_20141101_213851_inLine +BABEL_OP3_401_26999_20141101_213851_outLine +BABEL_OP3_401_27042_20141017_184608_inLine +BABEL_OP3_401_27042_20141017_184608_outLine +BABEL_OP3_401_27841_20141113_200006_inLine +BABEL_OP3_401_27841_20141113_200006_outLine +BABEL_OP3_401_28775_20141003_162126_inLine +BABEL_OP3_401_28775_20141003_162126_outLine +BABEL_OP3_401_29076_20141031_003943_inLine +BABEL_OP3_401_29076_20141031_003943_outLine +BABEL_OP3_401_29404_20141121_153054_inLine +BABEL_OP3_401_29404_20141121_153054_outLine +BABEL_OP3_401_29685_20141019_210404_inLine +BABEL_OP3_401_29685_20141019_210404_outLine +BABEL_OP3_401_29685_20141019_210959_inLine +BABEL_OP3_401_29685_20141019_210959_outLine +BABEL_OP3_401_31624_20141003_192655_inLine +BABEL_OP3_401_31624_20141003_192655_outLine +BABEL_OP3_401_31628_20140923_145349_inLine +BABEL_OP3_401_31628_20140923_145349_outLine +BABEL_OP3_401_32708_20141003_200927_inLine +BABEL_OP3_401_32708_20141003_200927_outLine +BABEL_OP3_401_33111_20141122_223105_inLine +BABEL_OP3_401_33111_20141122_223105_outLine +BABEL_OP3_401_33672_20140930_132456_inLine +BABEL_OP3_401_33672_20140930_132456_outLine +BABEL_OP3_401_33672_20140930_133426_inLine +BABEL_OP3_401_33672_20140930_133426_outLine +BABEL_OP3_401_35143_20141010_163440_inLine +BABEL_OP3_401_35143_20141010_163440_outLine +BABEL_OP3_401_38878_20141031_201014_inLine +BABEL_OP3_401_38878_20141031_201014_outLine +BABEL_OP3_401_40713_20141003_155735_inLine +BABEL_OP3_401_40713_20141003_155735_outLine +BABEL_OP3_401_41618_20141028_201644_inLine +BABEL_OP3_401_41618_20141028_201644_outLine +BABEL_OP3_401_42619_20141104_204106_inLine +BABEL_OP3_401_42619_20141104_204106_outLine +BABEL_OP3_401_42834_20141103_204826_inLine +BABEL_OP3_401_42834_20141103_204826_outLine +BABEL_OP3_401_43646_20140917_164218_inLine +BABEL_OP3_401_43646_20140917_164218_outLine +BABEL_OP3_401_45560_20140930_190100_inLine +BABEL_OP3_401_45560_20140930_190100_outLine +BABEL_OP3_401_46881_20140922_175212_inLine +BABEL_OP3_401_46881_20140922_175212_outLine +BABEL_OP3_401_47283_20141006_193958_inLine +BABEL_OP3_401_47283_20141006_193958_outLine +BABEL_OP3_401_51407_20141027_182114_inLine +BABEL_OP3_401_51407_20141027_182114_outLine +BABEL_OP3_401_52725_20141123_224942_inLine +BABEL_OP3_401_52725_20141123_224942_outLine +BABEL_OP3_401_52818_20140922_184227_inLine +BABEL_OP3_401_52818_20140922_184227_outLine +BABEL_OP3_401_54162_20141107_221207_inLine +BABEL_OP3_401_54162_20141107_221207_outLine +BABEL_OP3_401_56677_20141020_160804_inLine +BABEL_OP3_401_56677_20141020_160804_outLine +BABEL_OP3_401_57065_20140924_135508_inLine +BABEL_OP3_401_57065_20140924_135508_outLine +BABEL_OP3_401_60310_20141017_165419_inLine +BABEL_OP3_401_60310_20141017_165419_outLine +BABEL_OP3_401_63906_20141124_212323_inLine +BABEL_OP3_401_63906_20141124_212323_outLine +BABEL_OP3_401_64398_20140922_165727_inLine +BABEL_OP3_401_64398_20140922_165727_outLine +BABEL_OP3_401_73022_20141111_173204_inLine +BABEL_OP3_401_73022_20141111_173204_outLine +BABEL_OP3_401_74921_20140924_165937_inLine +BABEL_OP3_401_74921_20140924_165937_outLine +BABEL_OP3_401_77744_20141014_125609_inLine +BABEL_OP3_401_77744_20141014_125609_outLine +BABEL_OP3_401_77744_20141014_140124_inLine +BABEL_OP3_401_77744_20141014_140124_outLine +BABEL_OP3_401_79167_20140925_132420_inLine +BABEL_OP3_401_79167_20140925_132420_outLine +BABEL_OP3_401_81287_20141001_145404_inLine +BABEL_OP3_401_81287_20141001_145404_outLine +BABEL_OP3_401_82224_20141111_175445_inLine +BABEL_OP3_401_82224_20141111_175445_outLine +BABEL_OP3_401_87073_20140917_201716_inLine +BABEL_OP3_401_87073_20140917_201716_outLine +BABEL_OP3_401_91977_20140925_184203_inLine +BABEL_OP3_401_91977_20140925_184203_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.untranscribed.list new file mode 100644 index 00000000000..550224d6e16 --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.untranscribed.list @@ -0,0 +1,392 @@ +BABEL_OP3_401_10524_20141119_213218_inLine +BABEL_OP3_401_10524_20141119_213218_outLine +BABEL_OP3_401_10647_20141119_154922_inLine +BABEL_OP3_401_10647_20141119_154922_outLine +BABEL_OP3_401_10901_20141021_124158_inLine +BABEL_OP3_401_10901_20141021_124158_outLine +BABEL_OP3_401_10966_20141019_192715_inLine +BABEL_OP3_401_10966_20141019_192715_outLine +BABEL_OP3_401_11581_20141030_214939_inLine +BABEL_OP3_401_11581_20141030_214939_outLine +BABEL_OP3_401_11663_20141105_143103_inLine +BABEL_OP3_401_11663_20141105_143103_outLine +BABEL_OP3_401_11673_20140917_163413_inLine +BABEL_OP3_401_11673_20140917_163413_outLine +BABEL_OP3_401_11797_20140929_205622_inLine +BABEL_OP3_401_11797_20140929_205622_outLine +BABEL_OP3_401_12036_20141002_134817_inLine +BABEL_OP3_401_12036_20141002_134817_outLine +BABEL_OP3_401_12242_20141014_143019_inLine +BABEL_OP3_401_12242_20141014_143019_outLine +BABEL_OP3_401_12635_20141117_185400_inLine +BABEL_OP3_401_12635_20141117_185400_outLine +BABEL_OP3_401_12767_20141001_130658_inLine +BABEL_OP3_401_12767_20141001_130658_outLine +BABEL_OP3_401_12851_20140919_135242_inLine +BABEL_OP3_401_12851_20140919_135242_outLine +BABEL_OP3_401_13184_20141110_163330_inLine +BABEL_OP3_401_13184_20141110_163330_outLine +BABEL_OP3_401_13184_20141110_163902_inLine +BABEL_OP3_401_13184_20141110_163902_outLine +BABEL_OP3_401_13490_20141110_152643_inLine +BABEL_OP3_401_13490_20141110_152643_outLine +BABEL_OP3_401_13561_20141027_154606_inLine +BABEL_OP3_401_13561_20141027_154606_outLine +BABEL_OP3_401_13664_20140922_131741_inLine +BABEL_OP3_401_13664_20140922_131741_outLine +BABEL_OP3_401_13709_20141118_170840_inLine +BABEL_OP3_401_13709_20141118_170840_outLine +BABEL_OP3_401_13744_20140919_122844_inLine +BABEL_OP3_401_13744_20140919_122844_outLine +BABEL_OP3_401_14719_20141017_215720_inLine +BABEL_OP3_401_14719_20141017_215720_outLine +BABEL_OP3_401_14725_20140929_155627_inLine +BABEL_OP3_401_14725_20140929_155627_outLine +BABEL_OP3_401_14807_20141030_232134_inLine +BABEL_OP3_401_14807_20141030_232134_outLine +BABEL_OP3_401_14814_20141014_184415_inLine +BABEL_OP3_401_14814_20141014_184415_outLine +BABEL_OP3_401_14972_20141028_200051_inLine +BABEL_OP3_401_14972_20141028_200051_outLine +BABEL_OP3_401_15702_20140923_180447_inLine +BABEL_OP3_401_15702_20140923_180447_outLine +BABEL_OP3_401_15730_20140924_135900_inLine +BABEL_OP3_401_15730_20140924_135900_outLine +BABEL_OP3_401_16749_20141112_193028_inLine +BABEL_OP3_401_16749_20141112_193028_outLine +BABEL_OP3_401_16839_20141110_174923_inLine +BABEL_OP3_401_16839_20141110_174923_outLine +BABEL_OP3_401_16886_20141017_152623_inLine +BABEL_OP3_401_16886_20141017_152623_outLine +BABEL_OP3_401_16924_20140923_164321_inLine +BABEL_OP3_401_16924_20140923_164321_outLine +BABEL_OP3_401_17320_20141125_170435_inLine +BABEL_OP3_401_17320_20141125_170435_outLine +BABEL_OP3_401_17420_20141118_190621_inLine +BABEL_OP3_401_17420_20141118_190621_outLine +BABEL_OP3_401_17615_20140924_144400_inLine +BABEL_OP3_401_17615_20140924_144400_outLine +BABEL_OP3_401_18078_20141113_162556_inLine +BABEL_OP3_401_18078_20141113_162556_outLine +BABEL_OP3_401_18380_20141023_154240_inLine +BABEL_OP3_401_18380_20141023_154240_outLine +BABEL_OP3_401_18566_20141120_004140_inLine +BABEL_OP3_401_18566_20141120_004140_outLine +BABEL_OP3_401_18924_20141030_205111_inLine +BABEL_OP3_401_18924_20141030_205111_outLine +BABEL_OP3_401_18939_20141001_200418_inLine +BABEL_OP3_401_18939_20141001_200418_outLine +BABEL_OP3_401_19134_20141030_191814_inLine +BABEL_OP3_401_19134_20141030_191814_outLine +BABEL_OP3_401_19134_20141030_192931_inLine +BABEL_OP3_401_19134_20141030_192931_outLine +BABEL_OP3_401_19773_20141101_211403_inLine +BABEL_OP3_401_19773_20141101_211403_outLine +BABEL_OP3_401_19818_20141103_184746_inLine +BABEL_OP3_401_19818_20141103_184746_outLine +BABEL_OP3_401_19818_20141103_185728_inLine +BABEL_OP3_401_19818_20141103_185728_outLine +BABEL_OP3_401_20133_20140919_173858_inLine +BABEL_OP3_401_20133_20140919_173858_outLine +BABEL_OP3_401_20922_20141110_190444_inLine +BABEL_OP3_401_20922_20141110_190444_outLine +BABEL_OP3_401_21206_20141003_120941_inLine +BABEL_OP3_401_21206_20141003_120941_outLine +BABEL_OP3_401_21206_20141003_122457_inLine +BABEL_OP3_401_21206_20141003_122457_outLine +BABEL_OP3_401_21327_20141020_204038_inLine +BABEL_OP3_401_21327_20141020_204038_outLine +BABEL_OP3_401_21807_20141029_214508_inLine +BABEL_OP3_401_21807_20141029_214508_outLine +BABEL_OP3_401_22446_20140929_133647_inLine +BABEL_OP3_401_22446_20140929_133647_outLine +BABEL_OP3_401_22624_20141001_141008_inLine +BABEL_OP3_401_22624_20141001_141008_outLine +BABEL_OP3_401_22918_20141114_145920_inLine +BABEL_OP3_401_22918_20141114_145920_outLine +BABEL_OP3_401_23006_20141014_190149_inLine +BABEL_OP3_401_23006_20141014_190149_outLine +BABEL_OP3_401_23046_20141014_150823_inLine +BABEL_OP3_401_23046_20141014_150823_outLine +BABEL_OP3_401_23092_20141010_141138_inLine +BABEL_OP3_401_23092_20141010_141138_outLine +BABEL_OP3_401_23153_20141018_201630_inLine +BABEL_OP3_401_23153_20141018_201630_outLine +BABEL_OP3_401_23980_20141018_192714_inLine +BABEL_OP3_401_23980_20141018_192714_outLine +BABEL_OP3_401_24270_20141030_195323_inLine +BABEL_OP3_401_24270_20141030_195323_outLine +BABEL_OP3_401_24569_20141016_182323_inLine +BABEL_OP3_401_24569_20141016_182323_outLine +BABEL_OP3_401_24586_20141117_160948_inLine +BABEL_OP3_401_24586_20141117_160948_outLine +BABEL_OP3_401_24590_20141017_175757_inLine +BABEL_OP3_401_24590_20141017_175757_outLine +BABEL_OP3_401_24679_20140919_185323_inLine +BABEL_OP3_401_24679_20140919_185323_outLine +BABEL_OP3_401_24982_20141008_150245_inLine +BABEL_OP3_401_24982_20141008_150245_outLine +BABEL_OP3_401_25412_20141031_171749_inLine +BABEL_OP3_401_25412_20141031_171749_outLine +BABEL_OP3_401_25719_20141110_191042_inLine +BABEL_OP3_401_25719_20141110_191042_outLine +BABEL_OP3_401_26507_20141118_210109_inLine +BABEL_OP3_401_26507_20141118_210109_outLine +BABEL_OP3_401_27203_20141030_164916_inLine +BABEL_OP3_401_27203_20141030_164916_outLine +BABEL_OP3_401_28522_20140927_172947_inLine +BABEL_OP3_401_28522_20140927_172947_outLine +BABEL_OP3_401_28600_20141021_194818_inLine +BABEL_OP3_401_28600_20141021_194818_outLine +BABEL_OP3_401_28814_20141112_190902_inLine +BABEL_OP3_401_28814_20141112_190902_outLine +BABEL_OP3_401_29021_20141118_205619_inLine +BABEL_OP3_401_29021_20141118_205619_outLine +BABEL_OP3_401_29323_20141113_190829_inLine +BABEL_OP3_401_29323_20141113_190829_outLine +BABEL_OP3_401_30013_20140927_141830_inLine +BABEL_OP3_401_30013_20140927_141830_outLine +BABEL_OP3_401_30058_20141118_221622_inLine +BABEL_OP3_401_30058_20141118_221622_outLine +BABEL_OP3_401_31346_20141103_145401_inLine +BABEL_OP3_401_31346_20141103_145401_outLine +BABEL_OP3_401_31992_20141001_135942_inLine +BABEL_OP3_401_31992_20141001_135942_outLine +BABEL_OP3_401_32122_20141016_212210_inLine +BABEL_OP3_401_32122_20141016_212210_outLine +BABEL_OP3_401_32328_20141018_200856_inLine +BABEL_OP3_401_32328_20141018_200856_outLine +BABEL_OP3_401_33273_20141021_153659_inLine +BABEL_OP3_401_33273_20141021_153659_outLine +BABEL_OP3_401_33355_20141001_174510_inLine +BABEL_OP3_401_33355_20141001_174510_outLine +BABEL_OP3_401_33497_20141106_201923_inLine +BABEL_OP3_401_33497_20141106_201923_outLine +BABEL_OP3_401_33913_20141020_135517_inLine +BABEL_OP3_401_33913_20141020_135517_outLine +BABEL_OP3_401_34197_20140919_193654_inLine +BABEL_OP3_401_34197_20140919_193654_outLine +BABEL_OP3_401_34328_20141020_142248_inLine +BABEL_OP3_401_34328_20141020_142248_outLine +BABEL_OP3_401_34679_20141006_155637_inLine +BABEL_OP3_401_34679_20141006_155637_outLine +BABEL_OP3_401_35139_20141002_182038_inLine +BABEL_OP3_401_35139_20141002_182038_outLine +BABEL_OP3_401_35467_20140919_155737_inLine +BABEL_OP3_401_35467_20140919_155737_outLine +BABEL_OP3_401_35467_20140919_162819_inLine +BABEL_OP3_401_35467_20140919_162819_outLine +BABEL_OP3_401_36894_20140921_162105_inLine +BABEL_OP3_401_36894_20140921_162105_outLine +BABEL_OP3_401_37285_20140929_192149_inLine +BABEL_OP3_401_37285_20140929_192149_outLine +BABEL_OP3_401_37290_20141031_174340_inLine +BABEL_OP3_401_37290_20141031_174340_outLine +BABEL_OP3_401_37598_20141031_155805_inLine +BABEL_OP3_401_37598_20141031_155805_outLine +BABEL_OP3_401_38340_20141003_182953_inLine +BABEL_OP3_401_38340_20141003_182953_outLine +BABEL_OP3_401_39307_20140922_113434_inLine +BABEL_OP3_401_39307_20140922_113434_outLine +BABEL_OP3_401_39426_20141114_165136_inLine +BABEL_OP3_401_39426_20141114_165136_outLine +BABEL_OP3_401_39920_20141118_215327_inLine +BABEL_OP3_401_39920_20141118_215327_outLine +BABEL_OP3_401_40557_20141014_182351_inLine +BABEL_OP3_401_40557_20141014_182351_outLine +BABEL_OP3_401_41592_20141020_140853_inLine +BABEL_OP3_401_41592_20141020_140853_outLine +BABEL_OP3_401_41598_20141113_151053_inLine +BABEL_OP3_401_41598_20141113_151053_outLine +BABEL_OP3_401_42029_20141113_160852_inLine +BABEL_OP3_401_42029_20141113_160852_outLine +BABEL_OP3_401_42155_20141028_185638_inLine +BABEL_OP3_401_42155_20141028_185638_outLine +BABEL_OP3_401_42434_20141019_233012_inLine +BABEL_OP3_401_42434_20141019_233012_outLine +BABEL_OP3_401_42497_20141002_144745_inLine +BABEL_OP3_401_42497_20141002_144745_outLine +BABEL_OP3_401_42771_20141028_135131_inLine +BABEL_OP3_401_42771_20141028_135131_outLine +BABEL_OP3_401_42942_20141018_160034_inLine +BABEL_OP3_401_42942_20141018_160034_outLine +BABEL_OP3_401_43286_20140923_144213_inLine +BABEL_OP3_401_43286_20140923_144213_outLine +BABEL_OP3_401_43784_20141008_215339_inLine +BABEL_OP3_401_43784_20141008_215339_outLine +BABEL_OP3_401_43788_20140925_172756_inLine +BABEL_OP3_401_43788_20140925_172756_outLine +BABEL_OP3_401_45201_20141127_132656_inLine +BABEL_OP3_401_45201_20141127_132656_outLine +BABEL_OP3_401_46261_20141021_185026_inLine +BABEL_OP3_401_46261_20141021_185026_outLine +BABEL_OP3_401_46310_20140930_153138_inLine +BABEL_OP3_401_46310_20140930_153138_outLine +BABEL_OP3_401_46550_20141006_181152_inLine +BABEL_OP3_401_46550_20141006_181152_outLine +BABEL_OP3_401_46558_20140924_164642_inLine +BABEL_OP3_401_46558_20140924_164642_outLine +BABEL_OP3_401_46589_20140924_191634_inLine +BABEL_OP3_401_46589_20140924_191634_outLine +BABEL_OP3_401_46681_20141002_163836_inLine +BABEL_OP3_401_46681_20141002_163836_outLine +BABEL_OP3_401_46688_20141001_201358_inLine +BABEL_OP3_401_46688_20141001_201358_outLine +BABEL_OP3_401_46770_20141111_221929_inLine +BABEL_OP3_401_46770_20141111_221929_outLine +BABEL_OP3_401_47487_20141016_162401_inLine +BABEL_OP3_401_47487_20141016_162401_outLine +BABEL_OP3_401_47866_20141124_164427_inLine +BABEL_OP3_401_47866_20141124_164427_outLine +BABEL_OP3_401_47878_20141030_173221_inLine +BABEL_OP3_401_47878_20141030_173221_outLine +BABEL_OP3_401_48243_20141006_175215_inLine +BABEL_OP3_401_48243_20141006_175215_outLine +BABEL_OP3_401_48610_20141001_225254_inLine +BABEL_OP3_401_48610_20141001_225254_outLine +BABEL_OP3_401_49001_20141014_165716_inLine +BABEL_OP3_401_49001_20141014_165716_outLine +BABEL_OP3_401_49306_20141124_193818_inLine +BABEL_OP3_401_49306_20141124_193818_outLine +BABEL_OP3_401_50427_20141028_152244_inLine +BABEL_OP3_401_50427_20141028_152244_outLine +BABEL_OP3_401_51968_20141019_151724_inLine +BABEL_OP3_401_51968_20141019_151724_outLine +BABEL_OP3_401_52404_20140924_182534_inLine +BABEL_OP3_401_52404_20140924_182534_outLine +BABEL_OP3_401_53957_20141020_142913_inLine +BABEL_OP3_401_53957_20141020_142913_outLine +BABEL_OP3_401_54074_20141021_142528_inLine +BABEL_OP3_401_54074_20141021_142528_outLine +BABEL_OP3_401_56331_20141124_184702_inLine +BABEL_OP3_401_56331_20141124_184702_outLine +BABEL_OP3_401_57529_20141017_181551_inLine +BABEL_OP3_401_57529_20141017_181551_outLine +BABEL_OP3_401_57542_20141122_182629_inLine +BABEL_OP3_401_57542_20141122_182629_outLine +BABEL_OP3_401_58006_20141124_153854_inLine +BABEL_OP3_401_58006_20141124_153854_outLine +BABEL_OP3_401_58006_20141124_155107_inLine +BABEL_OP3_401_58006_20141124_155107_outLine +BABEL_OP3_401_58734_20140930_173126_inLine +BABEL_OP3_401_58734_20140930_173126_outLine +BABEL_OP3_401_58821_20140930_211254_inLine +BABEL_OP3_401_58821_20140930_211254_outLine +BABEL_OP3_401_59078_20141030_203852_inLine +BABEL_OP3_401_59078_20141030_203852_outLine +BABEL_OP3_401_59078_20141030_205139_inLine +BABEL_OP3_401_59078_20141030_205139_outLine +BABEL_OP3_401_60026_20141002_115024_inLine +BABEL_OP3_401_60026_20141002_115024_outLine +BABEL_OP3_401_60474_20141015_154855_inLine +BABEL_OP3_401_60474_20141015_154855_outLine +BABEL_OP3_401_65077_20140917_151315_inLine +BABEL_OP3_401_65077_20140917_151315_outLine +BABEL_OP3_401_65367_20141111_163221_inLine +BABEL_OP3_401_65367_20141111_163221_outLine +BABEL_OP3_401_66472_20141027_173935_inLine +BABEL_OP3_401_66472_20141027_173935_outLine +BABEL_OP3_401_68068_20140925_140055_inLine +BABEL_OP3_401_68068_20140925_140055_outLine +BABEL_OP3_401_68384_20141020_225435_inLine +BABEL_OP3_401_68384_20141020_225435_outLine +BABEL_OP3_401_68385_20140919_175351_inLine +BABEL_OP3_401_68385_20140919_175351_outLine +BABEL_OP3_401_68748_20140925_160756_inLine +BABEL_OP3_401_68748_20140925_160756_outLine +BABEL_OP3_401_69474_20140930_190551_inLine +BABEL_OP3_401_69474_20140930_190551_outLine +BABEL_OP3_401_69636_20140924_174446_inLine +BABEL_OP3_401_69636_20140924_174446_outLine +BABEL_OP3_401_71566_20141001_171842_inLine +BABEL_OP3_401_71566_20141001_171842_outLine +BABEL_OP3_401_72040_20141009_171306_inLine +BABEL_OP3_401_72040_20141009_171306_outLine +BABEL_OP3_401_72110_20141001_122146_inLine +BABEL_OP3_401_72110_20141001_122146_outLine +BABEL_OP3_401_72844_20140919_154733_inLine +BABEL_OP3_401_72844_20140919_154733_outLine +BABEL_OP3_401_72844_20140919_162600_inLine +BABEL_OP3_401_72844_20140919_162600_outLine +BABEL_OP3_401_73430_20140930_142250_inLine +BABEL_OP3_401_73430_20140930_142250_outLine +BABEL_OP3_401_73591_20140904_190044_inLine +BABEL_OP3_401_73591_20140904_190044_outLine +BABEL_OP3_401_74667_20141017_173017_inLine +BABEL_OP3_401_74667_20141017_173017_outLine +BABEL_OP3_401_74799_20141030_203910_inLine +BABEL_OP3_401_74799_20141030_203910_outLine +BABEL_OP3_401_75505_20140917_155231_inLine +BABEL_OP3_401_75505_20140917_155231_outLine +BABEL_OP3_401_76126_20141018_171804_inLine +BABEL_OP3_401_76126_20141018_171804_outLine +BABEL_OP3_401_76437_20140904_161741_inLine +BABEL_OP3_401_76437_20140904_161741_outLine +BABEL_OP3_401_76444_20141014_203500_inLine +BABEL_OP3_401_76444_20141014_203500_outLine +BABEL_OP3_401_76499_20141022_151625_inLine +BABEL_OP3_401_76499_20141022_151625_outLine +BABEL_OP3_401_78482_20141104_155857_inLine +BABEL_OP3_401_78482_20141104_155857_outLine +BABEL_OP3_401_79080_20141112_120644_inLine +BABEL_OP3_401_79080_20141112_120644_outLine +BABEL_OP3_401_79131_20141125_193444_inLine +BABEL_OP3_401_79131_20141125_193444_outLine +BABEL_OP3_401_79995_20141020_232746_inLine +BABEL_OP3_401_79995_20141020_232746_outLine +BABEL_OP3_401_80136_20141112_134414_inLine +BABEL_OP3_401_80136_20141112_134414_outLine +BABEL_OP3_401_80306_20141110_184642_inLine +BABEL_OP3_401_80306_20141110_184642_outLine +BABEL_OP3_401_80439_20141015_141847_inLine +BABEL_OP3_401_80439_20141015_141847_outLine +BABEL_OP3_401_80559_20141003_131820_inLine +BABEL_OP3_401_80559_20141003_131820_outLine +BABEL_OP3_401_81433_20141027_184533_inLine +BABEL_OP3_401_81433_20141027_184533_outLine +BABEL_OP3_401_81622_20141021_162012_inLine +BABEL_OP3_401_81622_20141021_162012_outLine +BABEL_OP3_401_82035_20141030_173356_inLine +BABEL_OP3_401_82035_20141030_173356_outLine +BABEL_OP3_401_82035_20141030_174442_inLine +BABEL_OP3_401_82035_20141030_174442_outLine +BABEL_OP3_401_84547_20140917_192745_inLine +BABEL_OP3_401_84547_20140917_192745_outLine +BABEL_OP3_401_84547_20140917_194346_inLine +BABEL_OP3_401_84547_20140917_194346_outLine +BABEL_OP3_401_86676_20140924_200749_inLine +BABEL_OP3_401_86676_20140924_200749_outLine +BABEL_OP3_401_87871_20141018_185934_inLine +BABEL_OP3_401_87871_20141018_185934_outLine +BABEL_OP3_401_87921_20141010_173551_inLine +BABEL_OP3_401_87921_20141010_173551_outLine +BABEL_OP3_401_88783_20141022_171250_inLine +BABEL_OP3_401_88783_20141022_171250_outLine +BABEL_OP3_401_90737_20141020_180826_inLine +BABEL_OP3_401_90737_20141020_180826_outLine +BABEL_OP3_401_91891_20141001_130023_inLine +BABEL_OP3_401_91891_20141001_130023_outLine +BABEL_OP3_401_92065_20141017_191557_inLine +BABEL_OP3_401_92065_20141017_191557_outLine +BABEL_OP3_401_92736_20141017_194915_inLine +BABEL_OP3_401_92736_20141017_194915_outLine +BABEL_OP3_401_92740_20140926_150615_inLine +BABEL_OP3_401_92740_20140926_150615_outLine +BABEL_OP3_401_93490_20141106_171428_inLine +BABEL_OP3_401_93490_20141106_171428_outLine +BABEL_OP3_401_94745_20140923_154933_inLine +BABEL_OP3_401_94745_20140923_154933_outLine +BABEL_OP3_401_95077_20141010_153959_inLine +BABEL_OP3_401_95077_20141010_153959_outLine +BABEL_OP3_401_95294_20140923_173007_inLine +BABEL_OP3_401_95294_20140923_173007_outLine +BABEL_OP3_401_95446_20141112_154248_inLine +BABEL_OP3_401_95446_20141112_154248_outLine +BABEL_OP3_401_95663_20140917_182410_inLine +BABEL_OP3_401_95663_20140917_182410_outLine +BABEL_OP3_401_96324_20140921_170922_inLine +BABEL_OP3_401_96324_20140921_170922_outLine +BABEL_OP3_401_97376_20140929_154000_inLine +BABEL_OP3_401_97376_20140929_154000_outLine +BABEL_OP3_401_97772_20140917_144539_inLine +BABEL_OP3_401_97772_20140917_144539_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/training.list b/egs/babel/s5d/conf/lists/401-mongolian/training.list new file mode 100644 index 00000000000..ebad291922b --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/training.list @@ -0,0 +1,518 @@ +BABEL_OP3_401_10524_20141119_213218_inLine +BABEL_OP3_401_10524_20141119_213218_outLine +BABEL_OP3_401_10647_20141119_154922_inLine +BABEL_OP3_401_10647_20141119_154922_outLine +BABEL_OP3_401_10901_20141021_124158_inLine +BABEL_OP3_401_10901_20141021_124158_outLine +BABEL_OP3_401_10966_20141019_192715_inLine +BABEL_OP3_401_10966_20141019_192715_outLine +BABEL_OP3_401_11581_20141030_214939_inLine +BABEL_OP3_401_11581_20141030_214939_outLine +BABEL_OP3_401_11663_20141105_143103_inLine +BABEL_OP3_401_11663_20141105_143103_outLine +BABEL_OP3_401_11673_20140917_163413_inLine +BABEL_OP3_401_11673_20140917_163413_outLine +BABEL_OP3_401_11797_20140929_205622_inLine +BABEL_OP3_401_11797_20140929_205622_outLine +BABEL_OP3_401_12036_20141002_134817_inLine +BABEL_OP3_401_12036_20141002_134817_outLine +BABEL_OP3_401_12242_20141014_143019_inLine +BABEL_OP3_401_12242_20141014_143019_outLine +BABEL_OP3_401_12635_20141117_185400_inLine +BABEL_OP3_401_12635_20141117_185400_outLine +BABEL_OP3_401_12767_20141001_130658_inLine +BABEL_OP3_401_12767_20141001_130658_outLine +BABEL_OP3_401_12851_20140919_135242_inLine +BABEL_OP3_401_12851_20140919_135242_outLine +BABEL_OP3_401_13030_20141015_163112_inLine +BABEL_OP3_401_13030_20141015_163112_outLine +BABEL_OP3_401_13184_20141110_163330_inLine +BABEL_OP3_401_13184_20141110_163330_outLine +BABEL_OP3_401_13184_20141110_163902_inLine +BABEL_OP3_401_13184_20141110_163902_outLine +BABEL_OP3_401_13324_20141002_165637_inLine +BABEL_OP3_401_13324_20141002_165637_outLine +BABEL_OP3_401_13490_20141110_152643_inLine +BABEL_OP3_401_13490_20141110_152643_outLine +BABEL_OP3_401_13561_20141027_154606_inLine +BABEL_OP3_401_13561_20141027_154606_outLine +BABEL_OP3_401_13586_20141023_193242_inLine +BABEL_OP3_401_13586_20141023_193242_outLine +BABEL_OP3_401_13664_20140922_131741_inLine +BABEL_OP3_401_13664_20140922_131741_outLine +BABEL_OP3_401_13709_20141118_170840_inLine +BABEL_OP3_401_13709_20141118_170840_outLine +BABEL_OP3_401_13744_20140919_122844_inLine +BABEL_OP3_401_13744_20140919_122844_outLine +BABEL_OP3_401_14560_20140922_140509_inLine +BABEL_OP3_401_14560_20140922_140509_outLine +BABEL_OP3_401_14719_20141017_215720_inLine +BABEL_OP3_401_14719_20141017_215720_outLine +BABEL_OP3_401_14725_20140929_155627_inLine +BABEL_OP3_401_14725_20140929_155627_outLine +BABEL_OP3_401_14807_20141030_232134_inLine +BABEL_OP3_401_14807_20141030_232134_outLine +BABEL_OP3_401_14814_20141014_184415_inLine +BABEL_OP3_401_14814_20141014_184415_outLine +BABEL_OP3_401_14972_20141028_200051_inLine +BABEL_OP3_401_14972_20141028_200051_outLine +BABEL_OP3_401_15702_20140923_180447_inLine +BABEL_OP3_401_15702_20140923_180447_outLine +BABEL_OP3_401_15730_20140924_135900_inLine +BABEL_OP3_401_15730_20140924_135900_outLine +BABEL_OP3_401_15902_20140930_144526_inLine +BABEL_OP3_401_15902_20140930_144526_outLine +BABEL_OP3_401_16475_20141016_143941_inLine +BABEL_OP3_401_16475_20141016_143941_outLine +BABEL_OP3_401_16749_20141112_193028_inLine +BABEL_OP3_401_16749_20141112_193028_outLine +BABEL_OP3_401_16839_20141110_174923_inLine +BABEL_OP3_401_16839_20141110_174923_outLine +BABEL_OP3_401_16886_20141017_152623_inLine +BABEL_OP3_401_16886_20141017_152623_outLine +BABEL_OP3_401_16924_20140923_164321_inLine +BABEL_OP3_401_16924_20140923_164321_outLine +BABEL_OP3_401_17320_20141125_170435_inLine +BABEL_OP3_401_17320_20141125_170435_outLine +BABEL_OP3_401_17420_20141118_190621_inLine +BABEL_OP3_401_17420_20141118_190621_outLine +BABEL_OP3_401_17567_20141023_213629_inLine +BABEL_OP3_401_17567_20141023_213629_outLine +BABEL_OP3_401_17615_20140924_144400_inLine +BABEL_OP3_401_17615_20140924_144400_outLine +BABEL_OP3_401_17923_20141002_172711_inLine +BABEL_OP3_401_17923_20141002_172711_outLine +BABEL_OP3_401_18078_20141113_162556_inLine +BABEL_OP3_401_18078_20141113_162556_outLine +BABEL_OP3_401_18380_20141023_154240_inLine +BABEL_OP3_401_18380_20141023_154240_outLine +BABEL_OP3_401_18566_20141120_004140_inLine +BABEL_OP3_401_18566_20141120_004140_outLine +BABEL_OP3_401_18924_20141030_205111_inLine +BABEL_OP3_401_18924_20141030_205111_outLine +BABEL_OP3_401_18939_20141001_200418_inLine +BABEL_OP3_401_18939_20141001_200418_outLine +BABEL_OP3_401_19101_20141029_183652_inLine +BABEL_OP3_401_19101_20141029_183652_outLine +BABEL_OP3_401_19134_20141030_191814_inLine +BABEL_OP3_401_19134_20141030_191814_outLine +BABEL_OP3_401_19134_20141030_192931_inLine +BABEL_OP3_401_19134_20141030_192931_outLine +BABEL_OP3_401_19722_20140930_200553_inLine +BABEL_OP3_401_19722_20140930_200553_outLine +BABEL_OP3_401_19773_20141101_211403_inLine +BABEL_OP3_401_19773_20141101_211403_outLine +BABEL_OP3_401_19818_20141103_184746_inLine +BABEL_OP3_401_19818_20141103_184746_outLine +BABEL_OP3_401_19818_20141103_185728_inLine +BABEL_OP3_401_19818_20141103_185728_outLine +BABEL_OP3_401_20133_20140919_173858_inLine +BABEL_OP3_401_20133_20140919_173858_outLine +BABEL_OP3_401_20916_20140922_174215_inLine +BABEL_OP3_401_20916_20140922_174215_outLine +BABEL_OP3_401_20922_20141110_190444_inLine +BABEL_OP3_401_20922_20141110_190444_outLine +BABEL_OP3_401_21206_20141003_120941_inLine +BABEL_OP3_401_21206_20141003_120941_outLine +BABEL_OP3_401_21206_20141003_122457_inLine +BABEL_OP3_401_21206_20141003_122457_outLine +BABEL_OP3_401_21327_20141020_204038_inLine +BABEL_OP3_401_21327_20141020_204038_outLine +BABEL_OP3_401_21807_20141029_214508_inLine +BABEL_OP3_401_21807_20141029_214508_outLine +BABEL_OP3_401_22321_20140929_180456_inLine +BABEL_OP3_401_22321_20140929_180456_outLine +BABEL_OP3_401_22446_20140929_133647_inLine +BABEL_OP3_401_22446_20140929_133647_outLine +BABEL_OP3_401_22624_20141001_141008_inLine +BABEL_OP3_401_22624_20141001_141008_outLine +BABEL_OP3_401_22918_20141114_145920_inLine +BABEL_OP3_401_22918_20141114_145920_outLine +BABEL_OP3_401_23006_20141014_190149_inLine +BABEL_OP3_401_23006_20141014_190149_outLine +BABEL_OP3_401_23046_20141014_150823_inLine +BABEL_OP3_401_23046_20141014_150823_outLine +BABEL_OP3_401_23092_20141010_141138_inLine +BABEL_OP3_401_23092_20141010_141138_outLine +BABEL_OP3_401_23153_20141018_201630_inLine +BABEL_OP3_401_23153_20141018_201630_outLine +BABEL_OP3_401_23893_20141125_213344_inLine +BABEL_OP3_401_23893_20141125_213344_outLine +BABEL_OP3_401_23980_20141018_192714_inLine +BABEL_OP3_401_23980_20141018_192714_outLine +BABEL_OP3_401_24270_20141030_195323_inLine +BABEL_OP3_401_24270_20141030_195323_outLine +BABEL_OP3_401_24290_20141124_184351_inLine +BABEL_OP3_401_24290_20141124_184351_outLine +BABEL_OP3_401_24323_20141017_151036_inLine +BABEL_OP3_401_24323_20141017_151036_outLine +BABEL_OP3_401_24470_20141029_145653_inLine +BABEL_OP3_401_24470_20141029_145653_outLine +BABEL_OP3_401_24569_20141016_182323_inLine +BABEL_OP3_401_24569_20141016_182323_outLine +BABEL_OP3_401_24586_20141117_160948_inLine +BABEL_OP3_401_24586_20141117_160948_outLine +BABEL_OP3_401_24590_20141017_175757_inLine +BABEL_OP3_401_24590_20141017_175757_outLine +BABEL_OP3_401_24605_20141001_142727_inLine +BABEL_OP3_401_24605_20141001_142727_outLine +BABEL_OP3_401_24679_20140919_185323_inLine +BABEL_OP3_401_24679_20140919_185323_outLine +BABEL_OP3_401_24982_20141008_150245_inLine +BABEL_OP3_401_24982_20141008_150245_outLine +BABEL_OP3_401_25412_20141031_171749_inLine +BABEL_OP3_401_25412_20141031_171749_outLine +BABEL_OP3_401_25719_20141110_191042_inLine +BABEL_OP3_401_25719_20141110_191042_outLine +BABEL_OP3_401_25961_20140929_183632_inLine +BABEL_OP3_401_25961_20140929_183632_outLine +BABEL_OP3_401_26072_20141112_173131_inLine +BABEL_OP3_401_26072_20141112_173131_outLine +BABEL_OP3_401_26398_20141204_001557_inLine +BABEL_OP3_401_26398_20141204_001557_outLine +BABEL_OP3_401_26507_20141118_210109_inLine +BABEL_OP3_401_26507_20141118_210109_outLine +BABEL_OP3_401_26574_20141103_163656_inLine +BABEL_OP3_401_26574_20141103_163656_outLine +BABEL_OP3_401_26999_20141101_213851_inLine +BABEL_OP3_401_26999_20141101_213851_outLine +BABEL_OP3_401_27042_20141017_184608_inLine +BABEL_OP3_401_27042_20141017_184608_outLine +BABEL_OP3_401_27203_20141030_164916_inLine +BABEL_OP3_401_27203_20141030_164916_outLine +BABEL_OP3_401_27841_20141113_200006_inLine +BABEL_OP3_401_27841_20141113_200006_outLine +BABEL_OP3_401_28522_20140927_172947_inLine +BABEL_OP3_401_28522_20140927_172947_outLine +BABEL_OP3_401_28600_20141021_194818_inLine +BABEL_OP3_401_28600_20141021_194818_outLine +BABEL_OP3_401_28775_20141003_162126_inLine +BABEL_OP3_401_28775_20141003_162126_outLine +BABEL_OP3_401_28814_20141112_190902_inLine +BABEL_OP3_401_28814_20141112_190902_outLine +BABEL_OP3_401_29021_20141118_205619_inLine +BABEL_OP3_401_29021_20141118_205619_outLine +BABEL_OP3_401_29076_20141031_003943_inLine +BABEL_OP3_401_29076_20141031_003943_outLine +BABEL_OP3_401_29323_20141113_190829_inLine +BABEL_OP3_401_29323_20141113_190829_outLine +BABEL_OP3_401_29404_20141121_153054_inLine +BABEL_OP3_401_29404_20141121_153054_outLine +BABEL_OP3_401_29685_20141019_210404_inLine +BABEL_OP3_401_29685_20141019_210404_outLine +BABEL_OP3_401_29685_20141019_210959_inLine +BABEL_OP3_401_29685_20141019_210959_outLine +BABEL_OP3_401_30013_20140927_141830_inLine +BABEL_OP3_401_30013_20140927_141830_outLine +BABEL_OP3_401_30058_20141118_221622_inLine +BABEL_OP3_401_30058_20141118_221622_outLine +BABEL_OP3_401_31346_20141103_145401_inLine +BABEL_OP3_401_31346_20141103_145401_outLine +BABEL_OP3_401_31624_20141003_192655_inLine +BABEL_OP3_401_31624_20141003_192655_outLine +BABEL_OP3_401_31628_20140923_145349_inLine +BABEL_OP3_401_31628_20140923_145349_outLine +BABEL_OP3_401_31992_20141001_135942_inLine +BABEL_OP3_401_31992_20141001_135942_outLine +BABEL_OP3_401_32122_20141016_212210_inLine +BABEL_OP3_401_32122_20141016_212210_outLine +BABEL_OP3_401_32328_20141018_200856_inLine +BABEL_OP3_401_32328_20141018_200856_outLine +BABEL_OP3_401_32708_20141003_200927_inLine +BABEL_OP3_401_32708_20141003_200927_outLine +BABEL_OP3_401_33111_20141122_223105_inLine +BABEL_OP3_401_33111_20141122_223105_outLine +BABEL_OP3_401_33273_20141021_153659_inLine +BABEL_OP3_401_33273_20141021_153659_outLine +BABEL_OP3_401_33355_20141001_174510_inLine +BABEL_OP3_401_33355_20141001_174510_outLine +BABEL_OP3_401_33497_20141106_201923_inLine +BABEL_OP3_401_33497_20141106_201923_outLine +BABEL_OP3_401_33672_20140930_132456_inLine +BABEL_OP3_401_33672_20140930_132456_outLine +BABEL_OP3_401_33672_20140930_133426_inLine +BABEL_OP3_401_33672_20140930_133426_outLine +BABEL_OP3_401_33913_20141020_135517_inLine +BABEL_OP3_401_33913_20141020_135517_outLine +BABEL_OP3_401_34197_20140919_193654_inLine +BABEL_OP3_401_34197_20140919_193654_outLine +BABEL_OP3_401_34328_20141020_142248_inLine +BABEL_OP3_401_34328_20141020_142248_outLine +BABEL_OP3_401_34679_20141006_155637_inLine +BABEL_OP3_401_34679_20141006_155637_outLine +BABEL_OP3_401_35139_20141002_182038_inLine +BABEL_OP3_401_35139_20141002_182038_outLine +BABEL_OP3_401_35143_20141010_163440_inLine +BABEL_OP3_401_35143_20141010_163440_outLine +BABEL_OP3_401_35467_20140919_155737_inLine +BABEL_OP3_401_35467_20140919_155737_outLine +BABEL_OP3_401_35467_20140919_162819_inLine +BABEL_OP3_401_35467_20140919_162819_outLine +BABEL_OP3_401_36894_20140921_162105_inLine +BABEL_OP3_401_36894_20140921_162105_outLine +BABEL_OP3_401_37285_20140929_192149_inLine +BABEL_OP3_401_37285_20140929_192149_outLine +BABEL_OP3_401_37290_20141031_174340_inLine +BABEL_OP3_401_37290_20141031_174340_outLine +BABEL_OP3_401_37598_20141031_155805_inLine +BABEL_OP3_401_37598_20141031_155805_outLine +BABEL_OP3_401_38340_20141003_182953_inLine +BABEL_OP3_401_38340_20141003_182953_outLine +BABEL_OP3_401_38878_20141031_201014_inLine +BABEL_OP3_401_38878_20141031_201014_outLine +BABEL_OP3_401_39307_20140922_113434_inLine +BABEL_OP3_401_39307_20140922_113434_outLine +BABEL_OP3_401_39426_20141114_165136_inLine +BABEL_OP3_401_39426_20141114_165136_outLine +BABEL_OP3_401_39920_20141118_215327_inLine +BABEL_OP3_401_39920_20141118_215327_outLine +BABEL_OP3_401_40557_20141014_182351_inLine +BABEL_OP3_401_40557_20141014_182351_outLine +BABEL_OP3_401_40713_20141003_155735_inLine +BABEL_OP3_401_40713_20141003_155735_outLine +BABEL_OP3_401_41592_20141020_140853_inLine +BABEL_OP3_401_41592_20141020_140853_outLine +BABEL_OP3_401_41598_20141113_151053_inLine +BABEL_OP3_401_41598_20141113_151053_outLine +BABEL_OP3_401_41618_20141028_201644_inLine +BABEL_OP3_401_41618_20141028_201644_outLine +BABEL_OP3_401_42029_20141113_160852_inLine +BABEL_OP3_401_42029_20141113_160852_outLine +BABEL_OP3_401_42155_20141028_185638_inLine +BABEL_OP3_401_42155_20141028_185638_outLine +BABEL_OP3_401_42434_20141019_233012_inLine +BABEL_OP3_401_42434_20141019_233012_outLine +BABEL_OP3_401_42497_20141002_144745_inLine +BABEL_OP3_401_42497_20141002_144745_outLine +BABEL_OP3_401_42619_20141104_204106_inLine +BABEL_OP3_401_42619_20141104_204106_outLine +BABEL_OP3_401_42771_20141028_135131_inLine +BABEL_OP3_401_42771_20141028_135131_outLine +BABEL_OP3_401_42834_20141103_204826_inLine +BABEL_OP3_401_42834_20141103_204826_outLine +BABEL_OP3_401_42942_20141018_160034_inLine +BABEL_OP3_401_42942_20141018_160034_outLine +BABEL_OP3_401_43286_20140923_144213_inLine +BABEL_OP3_401_43286_20140923_144213_outLine +BABEL_OP3_401_43646_20140917_164218_inLine +BABEL_OP3_401_43646_20140917_164218_outLine +BABEL_OP3_401_43784_20141008_215339_inLine +BABEL_OP3_401_43784_20141008_215339_outLine +BABEL_OP3_401_43788_20140925_172756_inLine +BABEL_OP3_401_43788_20140925_172756_outLine +BABEL_OP3_401_45201_20141127_132656_inLine +BABEL_OP3_401_45201_20141127_132656_outLine +BABEL_OP3_401_45560_20140930_190100_inLine +BABEL_OP3_401_45560_20140930_190100_outLine +BABEL_OP3_401_46261_20141021_185026_inLine +BABEL_OP3_401_46261_20141021_185026_outLine +BABEL_OP3_401_46310_20140930_153138_inLine +BABEL_OP3_401_46310_20140930_153138_outLine +BABEL_OP3_401_46550_20141006_181152_inLine +BABEL_OP3_401_46550_20141006_181152_outLine +BABEL_OP3_401_46558_20140924_164642_inLine +BABEL_OP3_401_46558_20140924_164642_outLine +BABEL_OP3_401_46589_20140924_191634_inLine +BABEL_OP3_401_46589_20140924_191634_outLine +BABEL_OP3_401_46681_20141002_163836_inLine +BABEL_OP3_401_46681_20141002_163836_outLine +BABEL_OP3_401_46688_20141001_201358_inLine +BABEL_OP3_401_46688_20141001_201358_outLine +BABEL_OP3_401_46770_20141111_221929_inLine +BABEL_OP3_401_46770_20141111_221929_outLine +BABEL_OP3_401_46881_20140922_175212_inLine +BABEL_OP3_401_46881_20140922_175212_outLine +BABEL_OP3_401_47283_20141006_193958_inLine +BABEL_OP3_401_47283_20141006_193958_outLine +BABEL_OP3_401_47487_20141016_162401_inLine +BABEL_OP3_401_47487_20141016_162401_outLine +BABEL_OP3_401_47866_20141124_164427_inLine +BABEL_OP3_401_47866_20141124_164427_outLine +BABEL_OP3_401_47878_20141030_173221_inLine +BABEL_OP3_401_47878_20141030_173221_outLine +BABEL_OP3_401_48243_20141006_175215_inLine +BABEL_OP3_401_48243_20141006_175215_outLine +BABEL_OP3_401_48610_20141001_225254_inLine +BABEL_OP3_401_48610_20141001_225254_outLine +BABEL_OP3_401_49001_20141014_165716_inLine +BABEL_OP3_401_49001_20141014_165716_outLine +BABEL_OP3_401_49306_20141124_193818_inLine +BABEL_OP3_401_49306_20141124_193818_outLine +BABEL_OP3_401_50427_20141028_152244_inLine +BABEL_OP3_401_50427_20141028_152244_outLine +BABEL_OP3_401_51407_20141027_182114_inLine +BABEL_OP3_401_51407_20141027_182114_outLine +BABEL_OP3_401_51968_20141019_151724_inLine +BABEL_OP3_401_51968_20141019_151724_outLine +BABEL_OP3_401_52404_20140924_182534_inLine +BABEL_OP3_401_52404_20140924_182534_outLine +BABEL_OP3_401_52725_20141123_224942_inLine +BABEL_OP3_401_52725_20141123_224942_outLine +BABEL_OP3_401_52818_20140922_184227_inLine +BABEL_OP3_401_52818_20140922_184227_outLine +BABEL_OP3_401_53957_20141020_142913_inLine +BABEL_OP3_401_53957_20141020_142913_outLine +BABEL_OP3_401_54074_20141021_142528_inLine +BABEL_OP3_401_54074_20141021_142528_outLine +BABEL_OP3_401_54162_20141107_221207_inLine +BABEL_OP3_401_54162_20141107_221207_outLine +BABEL_OP3_401_56331_20141124_184702_inLine +BABEL_OP3_401_56331_20141124_184702_outLine +BABEL_OP3_401_56677_20141020_160804_inLine +BABEL_OP3_401_56677_20141020_160804_outLine +BABEL_OP3_401_57065_20140924_135508_inLine +BABEL_OP3_401_57065_20140924_135508_outLine +BABEL_OP3_401_57529_20141017_181551_inLine +BABEL_OP3_401_57529_20141017_181551_outLine +BABEL_OP3_401_57542_20141122_182629_inLine +BABEL_OP3_401_57542_20141122_182629_outLine +BABEL_OP3_401_58006_20141124_153854_inLine +BABEL_OP3_401_58006_20141124_153854_outLine +BABEL_OP3_401_58006_20141124_155107_inLine +BABEL_OP3_401_58006_20141124_155107_outLine +BABEL_OP3_401_58734_20140930_173126_inLine +BABEL_OP3_401_58734_20140930_173126_outLine +BABEL_OP3_401_58821_20140930_211254_inLine +BABEL_OP3_401_58821_20140930_211254_outLine +BABEL_OP3_401_59078_20141030_203852_inLine +BABEL_OP3_401_59078_20141030_203852_outLine +BABEL_OP3_401_59078_20141030_205139_inLine +BABEL_OP3_401_59078_20141030_205139_outLine +BABEL_OP3_401_60026_20141002_115024_inLine +BABEL_OP3_401_60026_20141002_115024_outLine +BABEL_OP3_401_60310_20141017_165419_inLine +BABEL_OP3_401_60310_20141017_165419_outLine +BABEL_OP3_401_60474_20141015_154855_inLine +BABEL_OP3_401_60474_20141015_154855_outLine +BABEL_OP3_401_63906_20141124_212323_inLine +BABEL_OP3_401_63906_20141124_212323_outLine +BABEL_OP3_401_64398_20140922_165727_inLine +BABEL_OP3_401_64398_20140922_165727_outLine +BABEL_OP3_401_65077_20140917_151315_inLine +BABEL_OP3_401_65077_20140917_151315_outLine +BABEL_OP3_401_65367_20141111_163221_inLine +BABEL_OP3_401_65367_20141111_163221_outLine +BABEL_OP3_401_66472_20141027_173935_inLine +BABEL_OP3_401_66472_20141027_173935_outLine +BABEL_OP3_401_68068_20140925_140055_inLine +BABEL_OP3_401_68068_20140925_140055_outLine +BABEL_OP3_401_68384_20141020_225435_inLine +BABEL_OP3_401_68384_20141020_225435_outLine +BABEL_OP3_401_68385_20140919_175351_inLine +BABEL_OP3_401_68385_20140919_175351_outLine +BABEL_OP3_401_68748_20140925_160756_inLine +BABEL_OP3_401_68748_20140925_160756_outLine +BABEL_OP3_401_69474_20140930_190551_inLine +BABEL_OP3_401_69474_20140930_190551_outLine +BABEL_OP3_401_69636_20140924_174446_inLine +BABEL_OP3_401_69636_20140924_174446_outLine +BABEL_OP3_401_71566_20141001_171842_inLine +BABEL_OP3_401_71566_20141001_171842_outLine +BABEL_OP3_401_72040_20141009_171306_inLine +BABEL_OP3_401_72040_20141009_171306_outLine +BABEL_OP3_401_72110_20141001_122146_inLine +BABEL_OP3_401_72110_20141001_122146_outLine +BABEL_OP3_401_72844_20140919_154733_inLine +BABEL_OP3_401_72844_20140919_154733_outLine +BABEL_OP3_401_72844_20140919_162600_inLine +BABEL_OP3_401_72844_20140919_162600_outLine +BABEL_OP3_401_73022_20141111_173204_inLine +BABEL_OP3_401_73022_20141111_173204_outLine +BABEL_OP3_401_73430_20140930_142250_inLine +BABEL_OP3_401_73430_20140930_142250_outLine +BABEL_OP3_401_73591_20140904_190044_inLine +BABEL_OP3_401_73591_20140904_190044_outLine +BABEL_OP3_401_74667_20141017_173017_inLine +BABEL_OP3_401_74667_20141017_173017_outLine +BABEL_OP3_401_74799_20141030_203910_inLine +BABEL_OP3_401_74799_20141030_203910_outLine +BABEL_OP3_401_74921_20140924_165937_inLine +BABEL_OP3_401_74921_20140924_165937_outLine +BABEL_OP3_401_75505_20140917_155231_inLine +BABEL_OP3_401_75505_20140917_155231_outLine +BABEL_OP3_401_76126_20141018_171804_inLine +BABEL_OP3_401_76126_20141018_171804_outLine +BABEL_OP3_401_76437_20140904_161741_inLine +BABEL_OP3_401_76437_20140904_161741_outLine +BABEL_OP3_401_76444_20141014_203500_inLine +BABEL_OP3_401_76444_20141014_203500_outLine +BABEL_OP3_401_76499_20141022_151625_inLine +BABEL_OP3_401_76499_20141022_151625_outLine +BABEL_OP3_401_77744_20141014_125609_inLine +BABEL_OP3_401_77744_20141014_125609_outLine +BABEL_OP3_401_77744_20141014_140124_inLine +BABEL_OP3_401_77744_20141014_140124_outLine +BABEL_OP3_401_78482_20141104_155857_inLine +BABEL_OP3_401_78482_20141104_155857_outLine +BABEL_OP3_401_79080_20141112_120644_inLine +BABEL_OP3_401_79080_20141112_120644_outLine +BABEL_OP3_401_79131_20141125_193444_inLine +BABEL_OP3_401_79131_20141125_193444_outLine +BABEL_OP3_401_79167_20140925_132420_inLine +BABEL_OP3_401_79167_20140925_132420_outLine +BABEL_OP3_401_79995_20141020_232746_inLine +BABEL_OP3_401_79995_20141020_232746_outLine +BABEL_OP3_401_80136_20141112_134414_inLine +BABEL_OP3_401_80136_20141112_134414_outLine +BABEL_OP3_401_80306_20141110_184642_inLine +BABEL_OP3_401_80306_20141110_184642_outLine +BABEL_OP3_401_80439_20141015_141847_inLine +BABEL_OP3_401_80439_20141015_141847_outLine +BABEL_OP3_401_80559_20141003_131820_inLine +BABEL_OP3_401_80559_20141003_131820_outLine +BABEL_OP3_401_81287_20141001_145404_inLine +BABEL_OP3_401_81287_20141001_145404_outLine +BABEL_OP3_401_81433_20141027_184533_inLine +BABEL_OP3_401_81433_20141027_184533_outLine +BABEL_OP3_401_81622_20141021_162012_inLine +BABEL_OP3_401_81622_20141021_162012_outLine +BABEL_OP3_401_82035_20141030_173356_inLine +BABEL_OP3_401_82035_20141030_173356_outLine +BABEL_OP3_401_82035_20141030_174442_inLine +BABEL_OP3_401_82035_20141030_174442_outLine +BABEL_OP3_401_82224_20141111_175445_inLine +BABEL_OP3_401_82224_20141111_175445_outLine +BABEL_OP3_401_84547_20140917_192745_inLine +BABEL_OP3_401_84547_20140917_192745_outLine +BABEL_OP3_401_84547_20140917_194346_inLine +BABEL_OP3_401_84547_20140917_194346_outLine +BABEL_OP3_401_86676_20140924_200749_inLine +BABEL_OP3_401_86676_20140924_200749_outLine +BABEL_OP3_401_87073_20140917_201716_inLine +BABEL_OP3_401_87073_20140917_201716_outLine +BABEL_OP3_401_87871_20141018_185934_inLine +BABEL_OP3_401_87871_20141018_185934_outLine +BABEL_OP3_401_87921_20141010_173551_inLine +BABEL_OP3_401_87921_20141010_173551_outLine +BABEL_OP3_401_88783_20141022_171250_inLine +BABEL_OP3_401_88783_20141022_171250_outLine +BABEL_OP3_401_90737_20141020_180826_inLine +BABEL_OP3_401_90737_20141020_180826_outLine +BABEL_OP3_401_91891_20141001_130023_inLine +BABEL_OP3_401_91891_20141001_130023_outLine +BABEL_OP3_401_91977_20140925_184203_inLine +BABEL_OP3_401_91977_20140925_184203_outLine +BABEL_OP3_401_92065_20141017_191557_inLine +BABEL_OP3_401_92065_20141017_191557_outLine +BABEL_OP3_401_92736_20141017_194915_inLine +BABEL_OP3_401_92736_20141017_194915_outLine +BABEL_OP3_401_92740_20140926_150615_inLine +BABEL_OP3_401_92740_20140926_150615_outLine +BABEL_OP3_401_93490_20141106_171428_inLine +BABEL_OP3_401_93490_20141106_171428_outLine +BABEL_OP3_401_94745_20140923_154933_inLine +BABEL_OP3_401_94745_20140923_154933_outLine +BABEL_OP3_401_95077_20141010_153959_inLine +BABEL_OP3_401_95077_20141010_153959_outLine +BABEL_OP3_401_95294_20140923_173007_inLine +BABEL_OP3_401_95294_20140923_173007_outLine +BABEL_OP3_401_95446_20141112_154248_inLine +BABEL_OP3_401_95446_20141112_154248_outLine +BABEL_OP3_401_95663_20140917_182410_inLine +BABEL_OP3_401_95663_20140917_182410_outLine +BABEL_OP3_401_96324_20140921_170922_inLine +BABEL_OP3_401_96324_20140921_170922_outLine +BABEL_OP3_401_97376_20140929_154000_inLine +BABEL_OP3_401_97376_20140929_154000_outLine +BABEL_OP3_401_97772_20140917_144539_inLine +BABEL_OP3_401_97772_20140917_144539_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/untranscribed-training.list b/egs/babel/s5d/conf/lists/401-mongolian/untranscribed-training.list new file mode 100644 index 00000000000..45d13cc017e --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/untranscribed-training.list @@ -0,0 +1,530 @@ +BABEL_OP3_401_10184_20141027_150129_inLine +BABEL_OP3_401_10184_20141027_150129_outLine +BABEL_OP3_401_12321_20141101_210546_inLine +BABEL_OP3_401_12321_20141101_210546_outLine +BABEL_OP3_401_13178_20141101_181249_inLine +BABEL_OP3_401_13178_20141101_181249_outLine +BABEL_OP3_401_13189_20141114_170101_inLine +BABEL_OP3_401_13189_20141114_170101_outLine +BABEL_OP3_401_13189_20141114_174825_inLine +BABEL_OP3_401_13189_20141114_174825_outLine +BABEL_OP3_401_13427_20141027_145236_inLine +BABEL_OP3_401_13427_20141027_145236_outLine +BABEL_OP3_401_13792_20141001_135314_inLine +BABEL_OP3_401_13792_20141001_135314_outLine +BABEL_OP3_401_14440_20141101_191122_inLine +BABEL_OP3_401_14440_20141101_191122_outLine +BABEL_OP3_401_15042_20141125_181147_inLine +BABEL_OP3_401_15042_20141125_181147_outLine +BABEL_OP3_401_17280_20141016_160258_inLine +BABEL_OP3_401_17280_20141016_160258_outLine +BABEL_OP3_401_17496_20141103_155636_inLine +BABEL_OP3_401_17496_20141103_155636_outLine +BABEL_OP3_401_17496_20141103_160636_inLine +BABEL_OP3_401_17496_20141103_160636_outLine +BABEL_OP3_401_18118_20141125_212628_inLine +BABEL_OP3_401_18118_20141125_212628_outLine +BABEL_OP3_401_19130_20141125_202758_inLine +BABEL_OP3_401_19130_20141125_202758_outLine +BABEL_OP3_401_20738_20141118_152747_inLine +BABEL_OP3_401_20738_20141118_152747_outLine +BABEL_OP3_401_20800_20141002_124944_inLine +BABEL_OP3_401_20800_20141002_124944_outLine +BABEL_OP3_401_21435_20141121_182922_inLine +BABEL_OP3_401_21435_20141121_182922_outLine +BABEL_OP3_401_21624_20141124_180637_inLine +BABEL_OP3_401_21624_20141124_180637_outLine +BABEL_OP3_401_22170_20141118_234144_inLine +BABEL_OP3_401_22170_20141118_234144_outLine +BABEL_OP3_401_22280_20141029_152053_inLine +BABEL_OP3_401_22280_20141029_152053_outLine +BABEL_OP3_401_23151_20141113_164930_inLine +BABEL_OP3_401_23151_20141113_164930_outLine +BABEL_OP3_401_23151_20141113_165631_inLine +BABEL_OP3_401_23151_20141113_165631_outLine +BABEL_OP3_401_24017_20141104_165843_inLine +BABEL_OP3_401_24017_20141104_165843_outLine +BABEL_OP3_401_24017_20141104_170453_inLine +BABEL_OP3_401_24017_20141104_170453_outLine +BABEL_OP3_401_24033_20141111_211829_inLine +BABEL_OP3_401_24033_20141111_211829_outLine +BABEL_OP3_401_24239_20141207_015922_inLine +BABEL_OP3_401_24239_20141207_015922_outLine +BABEL_OP3_401_24501_20141117_164716_inLine +BABEL_OP3_401_24501_20141117_164716_outLine +BABEL_OP3_401_25015_20141125_191714_inLine +BABEL_OP3_401_25015_20141125_191714_outLine +BABEL_OP3_401_30653_20141119_225659_inLine +BABEL_OP3_401_30653_20141119_225659_outLine +BABEL_OP3_401_30869_20141105_163908_inLine +BABEL_OP3_401_30869_20141105_163908_outLine +BABEL_OP3_401_30869_20141105_165054_inLine +BABEL_OP3_401_30869_20141105_165054_outLine +BABEL_OP3_401_31182_20141103_193721_inLine +BABEL_OP3_401_31182_20141103_193721_outLine +BABEL_OP3_401_31583_20141017_202348_inLine +BABEL_OP3_401_31583_20141017_202348_outLine +BABEL_OP3_401_32832_20141106_155802_inLine +BABEL_OP3_401_32832_20141106_155802_outLine +BABEL_OP3_401_33635_20141018_183504_inLine +BABEL_OP3_401_33635_20141018_183504_outLine +BABEL_OP3_401_33635_20141018_204625_inLine +BABEL_OP3_401_33635_20141018_204625_outLine +BABEL_OP3_401_33840_20141105_190509_inLine +BABEL_OP3_401_33840_20141105_190509_outLine +BABEL_OP3_401_34336_20141015_173115_inLine +BABEL_OP3_401_34336_20141015_173115_outLine +BABEL_OP3_401_35706_20141123_232430_inLine +BABEL_OP3_401_35706_20141123_232430_outLine +BABEL_OP3_401_36059_20141120_202614_inLine +BABEL_OP3_401_36059_20141120_202614_outLine +BABEL_OP3_401_36594_20141118_225937_inLine +BABEL_OP3_401_36594_20141118_225937_outLine +BABEL_OP3_401_36669_20141028_143332_inLine +BABEL_OP3_401_36669_20141028_143332_outLine +BABEL_OP3_401_36900_20141105_173543_inLine +BABEL_OP3_401_36900_20141105_173543_outLine +BABEL_OP3_401_38076_20141101_181606_inLine +BABEL_OP3_401_38076_20141101_181606_outLine +BABEL_OP3_401_38125_20141207_005829_inLine +BABEL_OP3_401_38125_20141207_005829_outLine +BABEL_OP3_401_38125_20141207_010858_inLine +BABEL_OP3_401_38125_20141207_010858_outLine +BABEL_OP3_401_38588_20141016_200521_inLine +BABEL_OP3_401_38588_20141016_200521_outLine +BABEL_OP3_401_39099_20141204_002759_inLine +BABEL_OP3_401_39099_20141204_002759_outLine +BABEL_OP3_401_40740_20141110_180540_inLine +BABEL_OP3_401_40740_20141110_180540_outLine +BABEL_OP3_401_41109_20141111_174909_inLine +BABEL_OP3_401_41109_20141111_174909_outLine +BABEL_OP3_401_41233_20141104_180556_inLine +BABEL_OP3_401_41233_20141104_180556_outLine +BABEL_OP3_401_41609_20140930_160252_inLine +BABEL_OP3_401_41609_20140930_160252_outLine +BABEL_OP3_401_42718_20141203_164339_inLine +BABEL_OP3_401_42718_20141203_164339_outLine +BABEL_OP3_401_42718_20141203_165811_inLine +BABEL_OP3_401_42718_20141203_165811_outLine +BABEL_OP3_401_44255_20141114_190226_inLine +BABEL_OP3_401_44255_20141114_190226_outLine +BABEL_OP3_401_44290_20141125_182137_inLine +BABEL_OP3_401_44290_20141125_182137_outLine +BABEL_OP3_401_45770_20140930_173734_inLine +BABEL_OP3_401_45770_20140930_173734_outLine +BABEL_OP3_401_46008_20141127_210910_inLine +BABEL_OP3_401_46008_20141127_210910_outLine +BABEL_OP3_401_46763_20141119_173306_inLine +BABEL_OP3_401_46763_20141119_173306_outLine +BABEL_OP3_401_48907_20141125_203242_inLine +BABEL_OP3_401_48907_20141125_203242_outLine +BABEL_OP3_401_49287_20141102_150144_inLine +BABEL_OP3_401_49287_20141102_150144_outLine +BABEL_OP3_401_49502_20140924_135047_inLine +BABEL_OP3_401_49502_20140924_135047_outLine +BABEL_OP3_401_49637_20140929_203313_inLine +BABEL_OP3_401_49637_20140929_203313_outLine +BABEL_OP3_401_49775_20140917_162425_inLine +BABEL_OP3_401_49775_20140917_162425_outLine +BABEL_OP3_401_49812_20141111_182212_inLine +BABEL_OP3_401_49812_20141111_182212_outLine +BABEL_OP3_401_49902_20141015_154547_inLine +BABEL_OP3_401_49902_20141015_154547_outLine +BABEL_OP3_401_50090_20141031_162652_inLine +BABEL_OP3_401_50090_20141031_162652_outLine +BABEL_OP3_401_50175_20140923_130231_inLine +BABEL_OP3_401_50175_20140923_130231_outLine +BABEL_OP3_401_50958_20141018_223514_inLine +BABEL_OP3_401_50958_20141018_223514_outLine +BABEL_OP3_401_51417_20141110_191727_inLine +BABEL_OP3_401_51417_20141110_191727_outLine +BABEL_OP3_401_51530_20141204_001348_inLine +BABEL_OP3_401_51530_20141204_001348_outLine +BABEL_OP3_401_53072_20141127_201357_inLine +BABEL_OP3_401_53072_20141127_201357_outLine +BABEL_OP3_401_53415_20141118_232010_inLine +BABEL_OP3_401_53415_20141118_232010_outLine +BABEL_OP3_401_53492_20141124_201111_inLine +BABEL_OP3_401_53492_20141124_201111_outLine +BABEL_OP3_401_53665_20141125_180322_inLine +BABEL_OP3_401_53665_20141125_180322_outLine +BABEL_OP3_401_54160_20140930_215406_inLine +BABEL_OP3_401_54160_20140930_215406_outLine +BABEL_OP3_401_54405_20141027_133437_inLine +BABEL_OP3_401_54405_20141027_133437_outLine +BABEL_OP3_401_55742_20141003_153216_inLine +BABEL_OP3_401_55742_20141003_153216_outLine +BABEL_OP3_401_55818_20140930_191724_inLine +BABEL_OP3_401_55818_20140930_191724_outLine +BABEL_OP3_401_55950_20141125_195752_inLine +BABEL_OP3_401_55950_20141125_195752_outLine +BABEL_OP3_401_56019_20141118_211141_inLine +BABEL_OP3_401_56019_20141118_211141_outLine +BABEL_OP3_401_56523_20141017_152325_inLine +BABEL_OP3_401_56523_20141017_152325_outLine +BABEL_OP3_401_56743_20141016_193127_inLine +BABEL_OP3_401_56743_20141016_193127_outLine +BABEL_OP3_401_57067_20141110_211445_inLine +BABEL_OP3_401_57067_20141110_211445_outLine +BABEL_OP3_401_57609_20141028_162956_inLine +BABEL_OP3_401_57609_20141028_162956_outLine +BABEL_OP3_401_57650_20141117_142921_inLine +BABEL_OP3_401_57650_20141117_142921_outLine +BABEL_OP3_401_57654_20141002_120228_inLine +BABEL_OP3_401_57654_20141002_120228_outLine +BABEL_OP3_401_57678_20141015_161604_inLine +BABEL_OP3_401_57678_20141015_161604_outLine +BABEL_OP3_401_58585_20141112_192259_inLine +BABEL_OP3_401_58585_20141112_192259_outLine +BABEL_OP3_401_58850_20141017_141308_inLine +BABEL_OP3_401_58850_20141017_141308_outLine +BABEL_OP3_401_58926_20141003_143419_inLine +BABEL_OP3_401_58926_20141003_143419_outLine +BABEL_OP3_401_59291_20141017_162350_inLine +BABEL_OP3_401_59291_20141017_162350_outLine +BABEL_OP3_401_59864_20141206_195010_inLine +BABEL_OP3_401_59864_20141206_195010_outLine +BABEL_OP3_401_60626_20141003_151111_inLine +BABEL_OP3_401_60626_20141003_151111_outLine +BABEL_OP3_401_60661_20141002_182507_inLine +BABEL_OP3_401_60661_20141002_182507_outLine +BABEL_OP3_401_60836_20141013_164932_inLine +BABEL_OP3_401_60836_20141013_164932_outLine +BABEL_OP3_401_61219_20141015_175439_inLine +BABEL_OP3_401_61219_20141015_175439_outLine +BABEL_OP3_401_61357_20141113_164017_inLine +BABEL_OP3_401_61357_20141113_164017_outLine +BABEL_OP3_401_61435_20141104_205806_inLine +BABEL_OP3_401_61435_20141104_205806_outLine +BABEL_OP3_401_62177_20141114_161832_inLine +BABEL_OP3_401_62177_20141114_161832_outLine +BABEL_OP3_401_62289_20141204_011459_inLine +BABEL_OP3_401_62289_20141204_011459_outLine +BABEL_OP3_401_62289_20141204_012356_inLine +BABEL_OP3_401_62289_20141204_012356_outLine +BABEL_OP3_401_62430_20141117_174830_inLine +BABEL_OP3_401_62430_20141117_174830_outLine +BABEL_OP3_401_62835_20141020_153234_inLine +BABEL_OP3_401_62835_20141020_153234_outLine +BABEL_OP3_401_63220_20141101_205612_inLine +BABEL_OP3_401_63220_20141101_205612_outLine +BABEL_OP3_401_63523_20141204_010313_inLine +BABEL_OP3_401_63523_20141204_010313_outLine +BABEL_OP3_401_63757_20141029_150937_inLine +BABEL_OP3_401_63757_20141029_150937_outLine +BABEL_OP3_401_63938_20141114_163623_inLine +BABEL_OP3_401_63938_20141114_163623_outLine +BABEL_OP3_401_64350_20141002_131743_inLine +BABEL_OP3_401_64350_20141002_131743_outLine +BABEL_OP3_401_64638_20140923_193255_inLine +BABEL_OP3_401_64638_20140923_193255_outLine +BABEL_OP3_401_64759_20140930_133630_inLine +BABEL_OP3_401_64759_20140930_133630_outLine +BABEL_OP3_401_64768_20141015_185430_inLine +BABEL_OP3_401_64768_20141015_185430_outLine +BABEL_OP3_401_64796_20140922_122936_inLine +BABEL_OP3_401_64796_20140922_122936_outLine +BABEL_OP3_401_65298_20141113_154021_inLine +BABEL_OP3_401_65298_20141113_154021_outLine +BABEL_OP3_401_65477_20141017_155857_inLine +BABEL_OP3_401_65477_20141017_155857_outLine +BABEL_OP3_401_65882_20141003_133913_inLine +BABEL_OP3_401_65882_20141003_133913_outLine +BABEL_OP3_401_66045_20141023_123024_inLine +BABEL_OP3_401_66045_20141023_123024_outLine +BABEL_OP3_401_66177_20141118_200110_inLine +BABEL_OP3_401_66177_20141118_200110_outLine +BABEL_OP3_401_66967_20140929_190454_inLine +BABEL_OP3_401_66967_20140929_190454_outLine +BABEL_OP3_401_67373_20141003_140545_inLine +BABEL_OP3_401_67373_20141003_140545_outLine +BABEL_OP3_401_67592_20141102_134846_outLine +BABEL_OP3_401_67794_20141003_133705_inLine +BABEL_OP3_401_67794_20141003_133705_outLine +BABEL_OP3_401_67964_20141201_174143_inLine +BABEL_OP3_401_67964_20141201_174143_outLine +BABEL_OP3_401_67999_20141111_153758_inLine +BABEL_OP3_401_67999_20141111_153758_outLine +BABEL_OP3_401_68182_20141119_114536_inLine +BABEL_OP3_401_68182_20141119_114536_outLine +BABEL_OP3_401_68182_20141119_115542_inLine +BABEL_OP3_401_68182_20141119_115542_outLine +BABEL_OP3_401_69992_20140930_195445_inLine +BABEL_OP3_401_69992_20140930_195445_outLine +BABEL_OP3_401_70110_20140917_141249_inLine +BABEL_OP3_401_70110_20140917_141249_outLine +BABEL_OP3_401_70386_20141015_182629_inLine +BABEL_OP3_401_70386_20141015_182629_outLine +BABEL_OP3_401_70601_20141016_160902_inLine +BABEL_OP3_401_70601_20141016_160902_outLine +BABEL_OP3_401_70713_20141118_164200_inLine +BABEL_OP3_401_70713_20141118_164200_outLine +BABEL_OP3_401_71038_20141112_182205_inLine +BABEL_OP3_401_71038_20141112_182205_outLine +BABEL_OP3_401_71038_20141112_183801_inLine +BABEL_OP3_401_71038_20141112_183801_outLine +BABEL_OP3_401_71038_20141112_184910_inLine +BABEL_OP3_401_71038_20141112_184910_outLine +BABEL_OP3_401_71282_20141113_172102_inLine +BABEL_OP3_401_71282_20141113_172102_outLine +BABEL_OP3_401_71333_20141014_190834_inLine +BABEL_OP3_401_71333_20141014_190834_outLine +BABEL_OP3_401_71704_20141002_173424_inLine +BABEL_OP3_401_71704_20141002_173424_outLine +BABEL_OP3_401_71780_20141006_202842_inLine +BABEL_OP3_401_71780_20141006_202842_outLine +BABEL_OP3_401_72349_20141125_020034_inLine +BABEL_OP3_401_72349_20141125_020034_outLine +BABEL_OP3_401_72587_20141107_174322_inLine +BABEL_OP3_401_72587_20141107_174322_outLine +BABEL_OP3_401_72733_20141126_185701_inLine +BABEL_OP3_401_72733_20141126_185701_outLine +BABEL_OP3_401_73072_20141001_214124_inLine +BABEL_OP3_401_73072_20141001_214124_outLine +BABEL_OP3_401_73119_20141016_201748_inLine +BABEL_OP3_401_73119_20141016_201748_outLine +BABEL_OP3_401_73301_20141014_154044_inLine +BABEL_OP3_401_73301_20141014_154044_outLine +BABEL_OP3_401_73622_20141001_214706_inLine +BABEL_OP3_401_73622_20141001_214706_outLine +BABEL_OP3_401_73757_20141022_145713_inLine +BABEL_OP3_401_73757_20141022_145713_outLine +BABEL_OP3_401_73837_20141014_174244_inLine +BABEL_OP3_401_73837_20141014_174244_outLine +BABEL_OP3_401_74111_20141102_152314_inLine +BABEL_OP3_401_74280_20140917_171519_inLine +BABEL_OP3_401_74280_20140917_171519_outLine +BABEL_OP3_401_74455_20141113_142847_inLine +BABEL_OP3_401_74455_20141113_142847_outLine +BABEL_OP3_401_74641_20141029_170835_inLine +BABEL_OP3_401_74641_20141029_170835_outLine +BABEL_OP3_401_74728_20141125_185810_inLine +BABEL_OP3_401_74728_20141125_185810_outLine +BABEL_OP3_401_75223_20140929_144010_inLine +BABEL_OP3_401_75223_20140929_144010_outLine +BABEL_OP3_401_75869_20141122_162915_inLine +BABEL_OP3_401_75869_20141122_162915_outLine +BABEL_OP3_401_75869_20141122_163817_inLine +BABEL_OP3_401_75869_20141122_163817_outLine +BABEL_OP3_401_75993_20141003_155108_inLine +BABEL_OP3_401_75993_20141003_155108_outLine +BABEL_OP3_401_76155_20141018_235119_inLine +BABEL_OP3_401_76155_20141018_235119_outLine +BABEL_OP3_401_76372_20141122_205123_inLine +BABEL_OP3_401_76372_20141122_205123_outLine +BABEL_OP3_401_76756_20141031_190329_inLine +BABEL_OP3_401_76756_20141031_190329_outLine +BABEL_OP3_401_76773_20141002_161621_inLine +BABEL_OP3_401_76773_20141002_161621_outLine +BABEL_OP3_401_77112_20141008_135410_inLine +BABEL_OP3_401_77112_20141008_135410_outLine +BABEL_OP3_401_77391_20141014_202916_inLine +BABEL_OP3_401_77391_20141014_202916_outLine +BABEL_OP3_401_77391_20141014_204156_inLine +BABEL_OP3_401_77391_20141014_204156_outLine +BABEL_OP3_401_77427_20141019_151638_inLine +BABEL_OP3_401_77427_20141019_151638_outLine +BABEL_OP3_401_78360_20141112_174704_inLine +BABEL_OP3_401_78360_20141112_174704_outLine +BABEL_OP3_401_78454_20141030_190417_inLine +BABEL_OP3_401_78454_20141030_190417_outLine +BABEL_OP3_401_78609_20141101_190650_inLine +BABEL_OP3_401_78609_20141101_190650_outLine +BABEL_OP3_401_78609_20141101_191730_inLine +BABEL_OP3_401_78609_20141101_191730_outLine +BABEL_OP3_401_78943_20141015_141252_inLine +BABEL_OP3_401_78943_20141015_141252_outLine +BABEL_OP3_401_78976_20141016_202006_inLine +BABEL_OP3_401_78976_20141016_202006_outLine +BABEL_OP3_401_79505_20141125_151308_inLine +BABEL_OP3_401_79505_20141125_151308_outLine +BABEL_OP3_401_79590_20141019_151813_inLine +BABEL_OP3_401_79590_20141019_151813_outLine +BABEL_OP3_401_79820_20141015_191402_inLine +BABEL_OP3_401_79820_20141015_191402_outLine +BABEL_OP3_401_79858_20140930_180452_inLine +BABEL_OP3_401_79858_20140930_180452_outLine +BABEL_OP3_401_80577_20141124_151617_inLine +BABEL_OP3_401_80577_20141124_151617_outLine +BABEL_OP3_401_80622_20141031_193633_inLine +BABEL_OP3_401_80622_20141031_193633_outLine +BABEL_OP3_401_81229_20141017_145439_inLine +BABEL_OP3_401_81229_20141017_145439_outLine +BABEL_OP3_401_82030_20141126_190214_inLine +BABEL_OP3_401_82030_20141126_190214_outLine +BABEL_OP3_401_82637_20140922_152004_inLine +BABEL_OP3_401_82637_20140922_152004_outLine +BABEL_OP3_401_82863_20141020_125644_inLine +BABEL_OP3_401_82863_20141020_125644_outLine +BABEL_OP3_401_82979_20141016_150329_inLine +BABEL_OP3_401_82979_20141016_150329_outLine +BABEL_OP3_401_83062_20141124_210713_inLine +BABEL_OP3_401_83062_20141124_210713_outLine +BABEL_OP3_401_83366_20141107_185153_inLine +BABEL_OP3_401_83366_20141107_185153_outLine +BABEL_OP3_401_83775_20141016_165202_inLine +BABEL_OP3_401_83775_20141016_165202_outLine +BABEL_OP3_401_83783_20141029_142056_inLine +BABEL_OP3_401_83783_20141029_142056_outLine +BABEL_OP3_401_84055_20141118_213900_inLine +BABEL_OP3_401_84055_20141118_213900_outLine +BABEL_OP3_401_84061_20141019_160653_inLine +BABEL_OP3_401_84061_20141019_160653_outLine +BABEL_OP3_401_84125_20140919_142411_inLine +BABEL_OP3_401_84125_20140919_142411_outLine +BABEL_OP3_401_84583_20141028_135606_inLine +BABEL_OP3_401_84583_20141028_135606_outLine +BABEL_OP3_401_84605_20141013_223927_inLine +BABEL_OP3_401_84605_20141013_223927_outLine +BABEL_OP3_401_84737_20141114_223714_inLine +BABEL_OP3_401_84737_20141114_223714_outLine +BABEL_OP3_401_84768_20141001_160652_inLine +BABEL_OP3_401_84768_20141001_160652_outLine +BABEL_OP3_401_85048_20141030_163324_inLine +BABEL_OP3_401_85048_20141030_163324_outLine +BABEL_OP3_401_85179_20141105_155540_inLine +BABEL_OP3_401_85179_20141105_155540_outLine +BABEL_OP3_401_85248_20141114_150825_inLine +BABEL_OP3_401_85248_20141114_150825_outLine +BABEL_OP3_401_85248_20141114_152742_inLine +BABEL_OP3_401_85248_20141114_152742_outLine +BABEL_OP3_401_85325_20141127_141209_inLine +BABEL_OP3_401_85325_20141127_141209_outLine +BABEL_OP3_401_85340_20141006_165058_inLine +BABEL_OP3_401_85340_20141006_165058_outLine +BABEL_OP3_401_86472_20140924_120802_inLine +BABEL_OP3_401_86472_20140924_120802_outLine +BABEL_OP3_401_86748_20141117_205420_inLine +BABEL_OP3_401_86748_20141117_205420_outLine +BABEL_OP3_401_86860_20141204_001000_inLine +BABEL_OP3_401_86860_20141204_001000_outLine +BABEL_OP3_401_86888_20141101_175833_inLine +BABEL_OP3_401_86888_20141101_175833_outLine +BABEL_OP3_401_86952_20141003_103859_inLine +BABEL_OP3_401_86952_20141003_103859_outLine +BABEL_OP3_401_87074_20141006_143605_inLine +BABEL_OP3_401_87074_20141006_143605_outLine +BABEL_OP3_401_87489_20141118_173238_inLine +BABEL_OP3_401_87489_20141118_173238_outLine +BABEL_OP3_401_87545_20141204_001833_inLine +BABEL_OP3_401_87545_20141204_001833_outLine +BABEL_OP3_401_87629_20141028_191608_inLine +BABEL_OP3_401_87629_20141028_191608_outLine +BABEL_OP3_401_87693_20141003_190102_inLine +BABEL_OP3_401_87693_20141003_190102_outLine +BABEL_OP3_401_88372_20141125_142302_inLine +BABEL_OP3_401_88372_20141125_142302_outLine +BABEL_OP3_401_88601_20141023_164043_inLine +BABEL_OP3_401_88601_20141023_164043_outLine +BABEL_OP3_401_88669_20141031_182135_inLine +BABEL_OP3_401_88669_20141031_182135_outLine +BABEL_OP3_401_88812_20141203_173638_inLine +BABEL_OP3_401_88812_20141203_173638_outLine +BABEL_OP3_401_88812_20141203_180453_inLine +BABEL_OP3_401_88812_20141203_180453_outLine +BABEL_OP3_401_89045_20140917_131337_inLine +BABEL_OP3_401_89045_20140917_131337_outLine +BABEL_OP3_401_89059_20141111_185303_inLine +BABEL_OP3_401_89059_20141111_185303_outLine +BABEL_OP3_401_89457_20141020_143004_inLine +BABEL_OP3_401_89457_20141020_143004_outLine +BABEL_OP3_401_89560_20141102_161259_inLine +BABEL_OP3_401_89560_20141102_161259_outLine +BABEL_OP3_401_89888_20141002_173642_inLine +BABEL_OP3_401_89888_20141002_173642_outLine +BABEL_OP3_401_89888_20141002_175247_inLine +BABEL_OP3_401_89888_20141002_175247_outLine +BABEL_OP3_401_89943_20141014_163254_inLine +BABEL_OP3_401_89943_20141014_163254_outLine +BABEL_OP3_401_89943_20141014_165144_inLine +BABEL_OP3_401_89943_20141014_165144_outLine +BABEL_OP3_401_90080_20141124_210928_inLine +BABEL_OP3_401_90080_20141124_210928_outLine +BABEL_OP3_401_91080_20141107_184614_inLine +BABEL_OP3_401_91080_20141107_184614_outLine +BABEL_OP3_401_91336_20141022_164858_inLine +BABEL_OP3_401_91336_20141022_164858_outLine +BABEL_OP3_401_91372_20141126_174359_inLine +BABEL_OP3_401_91372_20141126_174359_outLine +BABEL_OP3_401_91825_20140930_140910_inLine +BABEL_OP3_401_91825_20140930_140910_outLine +BABEL_OP3_401_91825_20140930_142615_inLine +BABEL_OP3_401_91825_20140930_142615_outLine +BABEL_OP3_401_91930_20141117_203237_inLine +BABEL_OP3_401_91930_20141117_203237_outLine +BABEL_OP3_401_91944_20141002_002457_inLine +BABEL_OP3_401_91944_20141002_002457_outLine +BABEL_OP3_401_92096_20141122_181058_inLine +BABEL_OP3_401_92096_20141122_181058_outLine +BABEL_OP3_401_92176_20141022_194334_inLine +BABEL_OP3_401_92176_20141022_194334_outLine +BABEL_OP3_401_92356_20141113_184902_inLine +BABEL_OP3_401_92356_20141113_184902_outLine +BABEL_OP3_401_92509_20140919_170134_inLine +BABEL_OP3_401_92509_20140919_170134_outLine +BABEL_OP3_401_92557_20141113_141949_inLine +BABEL_OP3_401_92557_20141113_141949_outLine +BABEL_OP3_401_92886_20141008_194243_inLine +BABEL_OP3_401_92886_20141008_194243_outLine +BABEL_OP3_401_92942_20141031_154005_inLine +BABEL_OP3_401_92942_20141031_154005_outLine +BABEL_OP3_401_93469_20141204_000050_inLine +BABEL_OP3_401_93469_20141204_000050_outLine +BABEL_OP3_401_93515_20141207_011722_inLine +BABEL_OP3_401_93515_20141207_011722_outLine +BABEL_OP3_401_93604_20141206_154822_inLine +BABEL_OP3_401_93604_20141206_154822_outLine +BABEL_OP3_401_93861_20141022_174829_inLine +BABEL_OP3_401_93861_20141022_174829_outLine +BABEL_OP3_401_94141_20141125_195408_inLine +BABEL_OP3_401_94141_20141125_195408_outLine +BABEL_OP3_401_94409_20141019_155250_inLine +BABEL_OP3_401_94409_20141019_155250_outLine +BABEL_OP3_401_95269_20141016_175058_inLine +BABEL_OP3_401_95269_20141016_175058_outLine +BABEL_OP3_401_95269_20141016_175950_inLine +BABEL_OP3_401_95269_20141016_175950_outLine +BABEL_OP3_401_95399_20141021_140337_inLine +BABEL_OP3_401_95399_20141021_140337_outLine +BABEL_OP3_401_96059_20141201_200308_inLine +BABEL_OP3_401_96059_20141201_200308_outLine +BABEL_OP3_401_96190_20141013_142533_inLine +BABEL_OP3_401_96190_20141013_142533_outLine +BABEL_OP3_401_96405_20141013_185112_inLine +BABEL_OP3_401_96405_20141013_185112_outLine +BABEL_OP3_401_96405_20141013_195512_inLine +BABEL_OP3_401_96405_20141013_195512_outLine +BABEL_OP3_401_96584_20141114_205949_inLine +BABEL_OP3_401_96584_20141114_205949_outLine +BABEL_OP3_401_96934_20141015_153021_inLine +BABEL_OP3_401_96934_20141015_153021_outLine +BABEL_OP3_401_97097_20141122_194201_inLine +BABEL_OP3_401_97097_20141122_194201_outLine +BABEL_OP3_401_97731_20141105_135405_inLine +BABEL_OP3_401_97731_20141105_135405_outLine +BABEL_OP3_401_97896_20141021_124204_inLine +BABEL_OP3_401_97896_20141021_124204_outLine +BABEL_OP3_401_98365_20141029_133629_inLine +BABEL_OP3_401_98365_20141029_133629_outLine +BABEL_OP3_401_98580_20141021_140835_inLine +BABEL_OP3_401_98580_20141021_140835_outLine +BABEL_OP3_401_98888_20141019_153225_inLine +BABEL_OP3_401_98888_20141019_153225_outLine +BABEL_OP3_401_98888_20141019_160421_inLine +BABEL_OP3_401_98888_20141019_160421_outLine +BABEL_OP3_401_99264_20141104_195940_inLine +BABEL_OP3_401_99264_20141104_195940_outLine +BABEL_OP3_401_99264_20141104_200707_inLine +BABEL_OP3_401_99264_20141104_200707_outLine +BABEL_OP3_401_99289_20141122_150548_inLine +BABEL_OP3_401_99289_20141122_150548_outLine +BABEL_OP3_401_99487_20141001_154915_inLine +BABEL_OP3_401_99487_20141001_154915_outLine +BABEL_OP3_401_99487_20141001_155922_inLine +BABEL_OP3_401_99487_20141001_155922_outLine +BABEL_OP3_401_99516_20140924_152057_inLine +BABEL_OP3_401_99516_20140924_152057_outLine +BABEL_OP3_401_99718_20141003_130643_inLine +BABEL_OP3_401_99718_20141003_130643_outLine +BABEL_OP3_401_99813_20141027_183714_inLine +BABEL_OP3_401_99813_20141027_183714_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/dev.2h.list b/egs/babel/s5d/conf/lists/402-javanese/dev.2h.list new file mode 100644 index 00000000000..46233026964 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/dev.2h.list @@ -0,0 +1,122 @@ +BABEL_OP3_402_10184_20141119_194233_inLine +BABEL_OP3_402_10184_20141119_194233_outLine +BABEL_OP3_402_11581_20141124_181058_inLine +BABEL_OP3_402_11581_20141124_181058_outLine +BABEL_OP3_402_15535_20150104_232347_inLine +BABEL_OP3_402_15535_20150104_232347_outLine +BABEL_OP3_402_20133_20140911_170812_inLine +BABEL_OP3_402_20133_20140911_170812_outLine +BABEL_OP3_402_21393_20150304_163256_inLine +BABEL_OP3_402_21393_20150304_163256_outLine +BABEL_OP3_402_21581_20141107_151147_inLine +BABEL_OP3_402_21581_20141107_151147_outLine +BABEL_OP3_402_21807_20141125_194924_inLine +BABEL_OP3_402_21807_20141125_194924_outLine +BABEL_OP3_402_23046_20141103_212247_inLine +BABEL_OP3_402_23046_20141103_212247_outLine +BABEL_OP3_402_23505_20141029_003347_inLine +BABEL_OP3_402_23505_20141029_003347_outLine +BABEL_OP3_402_24982_20141027_223126_inLine +BABEL_OP3_402_24982_20141027_223126_outLine +BABEL_OP3_402_27590_20141227_191710_inLine +BABEL_OP3_402_27590_20141227_191710_outLine +BABEL_OP3_402_27841_20150112_180404_inLine +BABEL_OP3_402_27841_20150112_180404_outLine +BABEL_OP3_402_28012_20150105_215005_inLine +BABEL_OP3_402_28012_20150105_215005_outLine +BABEL_OP3_402_36293_20141001_145552_inLine +BABEL_OP3_402_36293_20141001_145552_outLine +BABEL_OP3_402_36505_20150106_201700_inLine +BABEL_OP3_402_36505_20150106_201700_outLine +BABEL_OP3_402_36894_20140919_222930_inLine +BABEL_OP3_402_36894_20140919_222930_outLine +BABEL_OP3_402_41592_20141118_011026_inLine +BABEL_OP3_402_41592_20141118_011026_outLine +BABEL_OP3_402_41598_20150201_142509_inLine +BABEL_OP3_402_41598_20150201_142509_outLine +BABEL_OP3_402_41745_20141108_162338_inLine +BABEL_OP3_402_41745_20141108_162338_outLine +BABEL_OP3_402_46261_20141112_161528_inLine +BABEL_OP3_402_46261_20141112_161528_outLine +BABEL_OP3_402_49118_20150201_023112_inLine +BABEL_OP3_402_49118_20150201_023112_outLine +BABEL_OP3_402_49437_20150112_204645_inLine +BABEL_OP3_402_49437_20150112_204645_outLine +BABEL_OP3_402_50427_20141119_174123_inLine +BABEL_OP3_402_50427_20141119_174123_outLine +BABEL_OP3_402_50549_20150113_123204_inLine +BABEL_OP3_402_50549_20150113_123204_outLine +BABEL_OP3_402_52490_20140916_192446_inLine +BABEL_OP3_402_52490_20140916_192446_outLine +BABEL_OP3_402_52717_20140923_130849_inLine +BABEL_OP3_402_52717_20140923_130849_outLine +BABEL_OP3_402_54162_20141116_183833_inLine +BABEL_OP3_402_54162_20141116_183833_outLine +BABEL_OP3_402_55968_20140912_204820_inLine +BABEL_OP3_402_55968_20140912_204820_outLine +BABEL_OP3_402_56306_20150103_203751_inLine +BABEL_OP3_402_56306_20150103_203751_outLine +BABEL_OP3_402_61731_20141008_152133_inLine +BABEL_OP3_402_61731_20141008_152133_outLine +BABEL_OP3_402_64494_20141012_193548_inLine +BABEL_OP3_402_64494_20141012_193548_outLine +BABEL_OP3_402_65882_20141102_005627_inLine +BABEL_OP3_402_65882_20141102_005627_outLine +BABEL_OP3_402_66519_20141107_200757_inLine +BABEL_OP3_402_66519_20141107_200757_outLine +BABEL_OP3_402_68068_20150119_135822_inLine +BABEL_OP3_402_68068_20150119_135822_outLine +BABEL_OP3_402_68182_20150111_002528_inLine +BABEL_OP3_402_68182_20150111_002528_outLine +BABEL_OP3_402_68289_20150216_010725_inLine +BABEL_OP3_402_68289_20150216_010725_outLine +BABEL_OP3_402_68385_20140911_143047_inLine +BABEL_OP3_402_68385_20140911_143047_outLine +BABEL_OP3_402_69746_20150110_165836_inLine +BABEL_OP3_402_69746_20150110_165836_outLine +BABEL_OP3_402_70343_20150212_004248_inLine +BABEL_OP3_402_70343_20150212_004248_outLine +BABEL_OP3_402_70386_20141116_170547_inLine +BABEL_OP3_402_70386_20141116_170547_outLine +BABEL_OP3_402_72324_20141201_191618_inLine +BABEL_OP3_402_72324_20141201_191618_outLine +BABEL_OP3_402_73511_20141226_133330_inLine +BABEL_OP3_402_73511_20141226_133330_outLine +BABEL_OP3_402_73837_20141101_183259_inLine +BABEL_OP3_402_73837_20141101_183259_outLine +BABEL_OP3_402_78398_20141107_225319_inLine +BABEL_OP3_402_78398_20141107_225319_outLine +BABEL_OP3_402_78454_20141128_203259_inLine +BABEL_OP3_402_78454_20141128_203259_outLine +BABEL_OP3_402_78604_20141031_181612_inLine +BABEL_OP3_402_78604_20141031_181612_outLine +BABEL_OP3_402_81433_20141121_014829_inLine +BABEL_OP3_402_81433_20141121_014829_outLine +BABEL_OP3_402_81553_20150124_004852_inLine +BABEL_OP3_402_81553_20150124_004852_outLine +BABEL_OP3_402_82935_20150104_005835_inLine +BABEL_OP3_402_82935_20150104_005835_outLine +BABEL_OP3_402_86467_20140920_125939_inLine +BABEL_OP3_402_86467_20140920_125939_outLine +BABEL_OP3_402_86748_20150131_001317_inLine +BABEL_OP3_402_86748_20150131_001317_outLine +BABEL_OP3_402_87921_20141225_203350_inLine +BABEL_OP3_402_87921_20141225_203350_outLine +BABEL_OP3_402_88445_20141205_204305_inLine +BABEL_OP3_402_88445_20141205_204305_outLine +BABEL_OP3_402_89203_20150131_215344_inLine +BABEL_OP3_402_89203_20150131_215344_outLine +BABEL_OP3_402_89457_20141117_212710_inLine +BABEL_OP3_402_89457_20141117_212710_outLine +BABEL_OP3_402_92176_20141216_022926_inLine +BABEL_OP3_402_92176_20141216_022926_outLine +BABEL_OP3_402_92176_20141222_021733_inLine +BABEL_OP3_402_92176_20141222_021733_outLine +BABEL_OP3_402_93632_20150119_150118_inLine +BABEL_OP3_402_93632_20150119_150118_outLine +BABEL_OP3_402_95399_20141111_162356_inLine +BABEL_OP3_402_95399_20141111_162356_outLine +BABEL_OP3_402_96584_20150107_184515_inLine +BABEL_OP3_402_96584_20150107_184515_outLine +BABEL_OP3_402_99401_20141024_202205_inLine +BABEL_OP3_402_99401_20141024_202205_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/dev.list b/egs/babel/s5d/conf/lists/402-javanese/dev.list new file mode 100644 index 00000000000..46233026964 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/dev.list @@ -0,0 +1,122 @@ +BABEL_OP3_402_10184_20141119_194233_inLine +BABEL_OP3_402_10184_20141119_194233_outLine +BABEL_OP3_402_11581_20141124_181058_inLine +BABEL_OP3_402_11581_20141124_181058_outLine +BABEL_OP3_402_15535_20150104_232347_inLine +BABEL_OP3_402_15535_20150104_232347_outLine +BABEL_OP3_402_20133_20140911_170812_inLine +BABEL_OP3_402_20133_20140911_170812_outLine +BABEL_OP3_402_21393_20150304_163256_inLine +BABEL_OP3_402_21393_20150304_163256_outLine +BABEL_OP3_402_21581_20141107_151147_inLine +BABEL_OP3_402_21581_20141107_151147_outLine +BABEL_OP3_402_21807_20141125_194924_inLine +BABEL_OP3_402_21807_20141125_194924_outLine +BABEL_OP3_402_23046_20141103_212247_inLine +BABEL_OP3_402_23046_20141103_212247_outLine +BABEL_OP3_402_23505_20141029_003347_inLine +BABEL_OP3_402_23505_20141029_003347_outLine +BABEL_OP3_402_24982_20141027_223126_inLine +BABEL_OP3_402_24982_20141027_223126_outLine +BABEL_OP3_402_27590_20141227_191710_inLine +BABEL_OP3_402_27590_20141227_191710_outLine +BABEL_OP3_402_27841_20150112_180404_inLine +BABEL_OP3_402_27841_20150112_180404_outLine +BABEL_OP3_402_28012_20150105_215005_inLine +BABEL_OP3_402_28012_20150105_215005_outLine +BABEL_OP3_402_36293_20141001_145552_inLine +BABEL_OP3_402_36293_20141001_145552_outLine +BABEL_OP3_402_36505_20150106_201700_inLine +BABEL_OP3_402_36505_20150106_201700_outLine +BABEL_OP3_402_36894_20140919_222930_inLine +BABEL_OP3_402_36894_20140919_222930_outLine +BABEL_OP3_402_41592_20141118_011026_inLine +BABEL_OP3_402_41592_20141118_011026_outLine +BABEL_OP3_402_41598_20150201_142509_inLine +BABEL_OP3_402_41598_20150201_142509_outLine +BABEL_OP3_402_41745_20141108_162338_inLine +BABEL_OP3_402_41745_20141108_162338_outLine +BABEL_OP3_402_46261_20141112_161528_inLine +BABEL_OP3_402_46261_20141112_161528_outLine +BABEL_OP3_402_49118_20150201_023112_inLine +BABEL_OP3_402_49118_20150201_023112_outLine +BABEL_OP3_402_49437_20150112_204645_inLine +BABEL_OP3_402_49437_20150112_204645_outLine +BABEL_OP3_402_50427_20141119_174123_inLine +BABEL_OP3_402_50427_20141119_174123_outLine +BABEL_OP3_402_50549_20150113_123204_inLine +BABEL_OP3_402_50549_20150113_123204_outLine +BABEL_OP3_402_52490_20140916_192446_inLine +BABEL_OP3_402_52490_20140916_192446_outLine +BABEL_OP3_402_52717_20140923_130849_inLine +BABEL_OP3_402_52717_20140923_130849_outLine +BABEL_OP3_402_54162_20141116_183833_inLine +BABEL_OP3_402_54162_20141116_183833_outLine +BABEL_OP3_402_55968_20140912_204820_inLine +BABEL_OP3_402_55968_20140912_204820_outLine +BABEL_OP3_402_56306_20150103_203751_inLine +BABEL_OP3_402_56306_20150103_203751_outLine +BABEL_OP3_402_61731_20141008_152133_inLine +BABEL_OP3_402_61731_20141008_152133_outLine +BABEL_OP3_402_64494_20141012_193548_inLine +BABEL_OP3_402_64494_20141012_193548_outLine +BABEL_OP3_402_65882_20141102_005627_inLine +BABEL_OP3_402_65882_20141102_005627_outLine +BABEL_OP3_402_66519_20141107_200757_inLine +BABEL_OP3_402_66519_20141107_200757_outLine +BABEL_OP3_402_68068_20150119_135822_inLine +BABEL_OP3_402_68068_20150119_135822_outLine +BABEL_OP3_402_68182_20150111_002528_inLine +BABEL_OP3_402_68182_20150111_002528_outLine +BABEL_OP3_402_68289_20150216_010725_inLine +BABEL_OP3_402_68289_20150216_010725_outLine +BABEL_OP3_402_68385_20140911_143047_inLine +BABEL_OP3_402_68385_20140911_143047_outLine +BABEL_OP3_402_69746_20150110_165836_inLine +BABEL_OP3_402_69746_20150110_165836_outLine +BABEL_OP3_402_70343_20150212_004248_inLine +BABEL_OP3_402_70343_20150212_004248_outLine +BABEL_OP3_402_70386_20141116_170547_inLine +BABEL_OP3_402_70386_20141116_170547_outLine +BABEL_OP3_402_72324_20141201_191618_inLine +BABEL_OP3_402_72324_20141201_191618_outLine +BABEL_OP3_402_73511_20141226_133330_inLine +BABEL_OP3_402_73511_20141226_133330_outLine +BABEL_OP3_402_73837_20141101_183259_inLine +BABEL_OP3_402_73837_20141101_183259_outLine +BABEL_OP3_402_78398_20141107_225319_inLine +BABEL_OP3_402_78398_20141107_225319_outLine +BABEL_OP3_402_78454_20141128_203259_inLine +BABEL_OP3_402_78454_20141128_203259_outLine +BABEL_OP3_402_78604_20141031_181612_inLine +BABEL_OP3_402_78604_20141031_181612_outLine +BABEL_OP3_402_81433_20141121_014829_inLine +BABEL_OP3_402_81433_20141121_014829_outLine +BABEL_OP3_402_81553_20150124_004852_inLine +BABEL_OP3_402_81553_20150124_004852_outLine +BABEL_OP3_402_82935_20150104_005835_inLine +BABEL_OP3_402_82935_20150104_005835_outLine +BABEL_OP3_402_86467_20140920_125939_inLine +BABEL_OP3_402_86467_20140920_125939_outLine +BABEL_OP3_402_86748_20150131_001317_inLine +BABEL_OP3_402_86748_20150131_001317_outLine +BABEL_OP3_402_87921_20141225_203350_inLine +BABEL_OP3_402_87921_20141225_203350_outLine +BABEL_OP3_402_88445_20141205_204305_inLine +BABEL_OP3_402_88445_20141205_204305_outLine +BABEL_OP3_402_89203_20150131_215344_inLine +BABEL_OP3_402_89203_20150131_215344_outLine +BABEL_OP3_402_89457_20141117_212710_inLine +BABEL_OP3_402_89457_20141117_212710_outLine +BABEL_OP3_402_92176_20141216_022926_inLine +BABEL_OP3_402_92176_20141216_022926_outLine +BABEL_OP3_402_92176_20141222_021733_inLine +BABEL_OP3_402_92176_20141222_021733_outLine +BABEL_OP3_402_93632_20150119_150118_inLine +BABEL_OP3_402_93632_20150119_150118_outLine +BABEL_OP3_402_95399_20141111_162356_inLine +BABEL_OP3_402_95399_20141111_162356_outLine +BABEL_OP3_402_96584_20150107_184515_inLine +BABEL_OP3_402_96584_20150107_184515_outLine +BABEL_OP3_402_99401_20141024_202205_inLine +BABEL_OP3_402_99401_20141024_202205_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/eval.list b/egs/babel/s5d/conf/lists/402-javanese/eval.list new file mode 100644 index 00000000000..e0b81487a54 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/eval.list @@ -0,0 +1,188 @@ +BABEL_OP3_402_10036_20141124_025321_inLine +BABEL_OP3_402_10036_20141124_025321_outLine +BABEL_OP3_402_10974_20141130_234329_inLine +BABEL_OP3_402_10974_20141130_234329_outLine +BABEL_OP3_402_12036_20141009_181351_inLine +BABEL_OP3_402_12036_20141009_181351_outLine +BABEL_OP3_402_12321_20150111_184045_inLine +BABEL_OP3_402_12321_20150111_184045_outLine +BABEL_OP3_402_12321_20150111_185205_inLine +BABEL_OP3_402_12321_20150111_185205_outLine +BABEL_OP3_402_13040_20141030_172740_inLine +BABEL_OP3_402_13040_20141030_172740_outLine +BABEL_OP3_402_13490_20141201_021241_inLine +BABEL_OP3_402_13490_20141201_021241_outLine +BABEL_OP3_402_13490_20141204_021416_inLine +BABEL_OP3_402_13490_20141204_021416_outLine +BABEL_OP3_402_14719_20150114_153747_inLine +BABEL_OP3_402_14719_20150114_153747_outLine +BABEL_OP3_402_15024_20141112_173834_inLine +BABEL_OP3_402_15024_20141112_173834_outLine +BABEL_OP3_402_15730_20141001_154550_inLine +BABEL_OP3_402_15730_20141001_154550_outLine +BABEL_OP3_402_16938_20141118_194456_inLine +BABEL_OP3_402_16938_20141118_194456_outLine +BABEL_OP3_402_17165_20141115_171729_inLine +BABEL_OP3_402_17165_20141115_171729_outLine +BABEL_OP3_402_19749_20150130_162450_inLine +BABEL_OP3_402_19749_20150130_162450_outLine +BABEL_OP3_402_19818_20141213_194147_inLine +BABEL_OP3_402_19818_20141213_194147_outLine +BABEL_OP3_402_21206_20141019_210210_inLine +BABEL_OP3_402_21206_20141019_210210_outLine +BABEL_OP3_402_23395_20141120_192928_inLine +BABEL_OP3_402_23395_20141120_192928_outLine +BABEL_OP3_402_23628_20141123_183457_inLine +BABEL_OP3_402_23628_20141123_183457_outLine +BABEL_OP3_402_26388_20141031_184504_inLine +BABEL_OP3_402_26388_20141031_184504_outLine +BABEL_OP3_402_28419_20141107_004309_inLine +BABEL_OP3_402_28419_20141107_004309_outLine +BABEL_OP3_402_28814_20150108_133247_inLine +BABEL_OP3_402_28814_20150108_133247_outLine +BABEL_OP3_402_29023_20141016_155119_inLine +BABEL_OP3_402_29023_20141016_155119_outLine +BABEL_OP3_402_30395_20141126_165430_inLine +BABEL_OP3_402_30395_20141126_165430_outLine +BABEL_OP3_402_30653_20150301_200332_inLine +BABEL_OP3_402_30653_20150301_200332_outLine +BABEL_OP3_402_33497_20141228_021512_inLine +BABEL_OP3_402_33497_20141228_021512_outLine +BABEL_OP3_402_33497_20141228_022418_inLine +BABEL_OP3_402_33497_20141228_022418_outLine +BABEL_OP3_402_34197_20140926_180505_inLine +BABEL_OP3_402_34197_20140926_180505_outLine +BABEL_OP3_402_35202_20150201_211802_inLine +BABEL_OP3_402_35202_20150201_211802_outLine +BABEL_OP3_402_36669_20141112_195148_inLine +BABEL_OP3_402_36669_20141112_195148_outLine +BABEL_OP3_402_36990_20141114_221330_inLine +BABEL_OP3_402_36990_20141114_221330_outLine +BABEL_OP3_402_38664_20141123_163506_inLine +BABEL_OP3_402_38664_20141123_163506_outLine +BABEL_OP3_402_38741_20141020_160936_inLine +BABEL_OP3_402_38741_20141020_160936_outLine +BABEL_OP3_402_40713_20141019_145210_inLine +BABEL_OP3_402_40713_20141019_145210_outLine +BABEL_OP3_402_44347_20150111_142153_inLine +BABEL_OP3_402_44347_20150111_142153_outLine +BABEL_OP3_402_44420_20141031_175058_inLine +BABEL_OP3_402_44420_20141031_175058_outLine +BABEL_OP3_402_44531_20150302_195023_inLine +BABEL_OP3_402_44531_20150302_195023_outLine +BABEL_OP3_402_45642_20140923_154729_inLine +BABEL_OP3_402_45642_20140923_154729_outLine +BABEL_OP3_402_46681_20141013_161421_inLine +BABEL_OP3_402_46681_20141013_161421_outLine +BABEL_OP3_402_46976_20141119_183300_inLine +BABEL_OP3_402_46976_20141119_183300_outLine +BABEL_OP3_402_49775_20140915_151515_inLine +BABEL_OP3_402_49775_20140915_151515_outLine +BABEL_OP3_402_51407_20141228_213554_inLine +BABEL_OP3_402_51407_20141228_213554_outLine +BABEL_OP3_402_51955_20141103_200423_inLine +BABEL_OP3_402_51955_20141103_200423_outLine +BABEL_OP3_402_52694_20141123_140609_inLine +BABEL_OP3_402_52694_20141123_140609_outLine +BABEL_OP3_402_53419_20141226_140523_inLine +BABEL_OP3_402_53419_20141226_140523_outLine +BABEL_OP3_402_53917_20150201_201004_inLine +BABEL_OP3_402_53917_20150201_201004_outLine +BABEL_OP3_402_54841_20150108_004608_inLine +BABEL_OP3_402_54841_20150108_004608_outLine +BABEL_OP3_402_56743_20141108_140926_inLine +BABEL_OP3_402_56743_20141108_140926_outLine +BABEL_OP3_402_56826_20141224_134149_inLine +BABEL_OP3_402_56826_20141224_134149_outLine +BABEL_OP3_402_58103_20141104_192009_inLine +BABEL_OP3_402_58103_20141104_192009_outLine +BABEL_OP3_402_58926_20141014_174318_inLine +BABEL_OP3_402_58926_20141014_174318_outLine +BABEL_OP3_402_59091_20150104_000026_inLine +BABEL_OP3_402_59091_20150104_000026_outLine +BABEL_OP3_402_59928_20140929_174836_inLine +BABEL_OP3_402_59928_20140929_174836_outLine +BABEL_OP3_402_59993_20141103_183340_inLine +BABEL_OP3_402_59993_20141103_183340_outLine +BABEL_OP3_402_60626_20141019_135020_inLine +BABEL_OP3_402_60626_20141019_135020_outLine +BABEL_OP3_402_61011_20141003_131410_inLine +BABEL_OP3_402_61011_20141003_131410_outLine +BABEL_OP3_402_61190_20141102_132003_inLine +BABEL_OP3_402_61190_20141102_132003_outLine +BABEL_OP3_402_61225_20140912_171906_inLine +BABEL_OP3_402_61225_20140912_171906_outLine +BABEL_OP3_402_63604_20141101_235656_inLine +BABEL_OP3_402_63604_20141101_235656_outLine +BABEL_OP3_402_64638_20141214_234141_inLine +BABEL_OP3_402_64638_20141214_234141_outLine +BABEL_OP3_402_66967_20140917_153139_inLine +BABEL_OP3_402_66967_20140917_153139_outLine +BABEL_OP3_402_69474_20150111_235831_inLine +BABEL_OP3_402_69474_20150111_235831_outLine +BABEL_OP3_402_71047_20150107_194822_inLine +BABEL_OP3_402_71047_20150107_194822_outLine +BABEL_OP3_402_72007_20141219_183621_inLine +BABEL_OP3_402_72007_20141219_183621_outLine +BABEL_OP3_402_73042_20141013_175542_inLine +BABEL_OP3_402_73042_20141013_175542_outLine +BABEL_OP3_402_73072_20140923_135906_inLine +BABEL_OP3_402_73072_20140923_135906_outLine +BABEL_OP3_402_74226_20141220_000133_inLine +BABEL_OP3_402_74226_20141220_000133_outLine +BABEL_OP3_402_74280_20140915_174124_inLine +BABEL_OP3_402_74280_20140915_174124_outLine +BABEL_OP3_402_76126_20141224_141342_inLine +BABEL_OP3_402_76126_20141224_141342_outLine +BABEL_OP3_402_77033_20150108_180731_inLine +BABEL_OP3_402_77033_20150108_180731_outLine +BABEL_OP3_402_77112_20140929_201352_inLine +BABEL_OP3_402_77112_20140929_201352_outLine +BABEL_OP3_402_77391_20141102_204007_inLine +BABEL_OP3_402_77391_20141102_204007_outLine +BABEL_OP3_402_77567_20140920_134449_inLine +BABEL_OP3_402_77567_20140920_134449_outLine +BABEL_OP3_402_77730_20141021_174646_inLine +BABEL_OP3_402_77730_20141021_174646_outLine +BABEL_OP3_402_78544_20141215_000405_inLine +BABEL_OP3_402_78544_20141215_000405_outLine +BABEL_OP3_402_79505_20150227_172147_inLine +BABEL_OP3_402_79505_20150227_172147_outLine +BABEL_OP3_402_81622_20141115_215444_inLine +BABEL_OP3_402_81622_20141115_215444_outLine +BABEL_OP3_402_82145_20150108_195326_inLine +BABEL_OP3_402_82145_20150108_195326_outLine +BABEL_OP3_402_82863_20141114_212757_inLine +BABEL_OP3_402_82863_20141114_212757_outLine +BABEL_OP3_402_84583_20141123_201337_inLine +BABEL_OP3_402_84583_20141123_201337_outLine +BABEL_OP3_402_87074_20141030_183257_inLine +BABEL_OP3_402_87074_20141030_183257_outLine +BABEL_OP3_402_87298_20141103_203537_inLine +BABEL_OP3_402_87298_20141103_203537_outLine +BABEL_OP3_402_88372_20150201_000904_inLine +BABEL_OP3_402_88372_20150201_000904_outLine +BABEL_OP3_402_88982_20141130_182335_inLine +BABEL_OP3_402_88982_20141130_182335_outLine +BABEL_OP3_402_91336_20141122_023555_inLine +BABEL_OP3_402_91336_20141122_023555_outLine +BABEL_OP3_402_92792_20150227_162129_inLine +BABEL_OP3_402_92792_20150227_162129_outLine +BABEL_OP3_402_93411_20141120_155834_inLine +BABEL_OP3_402_93411_20141120_155834_outLine +BABEL_OP3_402_94978_20150107_204930_inLine +BABEL_OP3_402_94978_20150107_204930_outLine +BABEL_OP3_402_95663_20141103_142815_inLine +BABEL_OP3_402_95663_20141103_142815_outLine +BABEL_OP3_402_96405_20141006_202624_inLine +BABEL_OP3_402_96405_20141006_202624_outLine +BABEL_OP3_402_96730_20150110_161027_inLine +BABEL_OP3_402_96730_20150110_161027_outLine +BABEL_OP3_402_96934_20141101_192258_inLine +BABEL_OP3_402_96934_20141101_192258_outLine +BABEL_OP3_402_97376_20141221_191608_inLine +BABEL_OP3_402_97376_20141221_191608_outLine +BABEL_OP3_402_97604_20150121_010739_inLine +BABEL_OP3_402_97604_20150121_010739_outLine +BABEL_OP3_402_98489_20141028_122528_inLine +BABEL_OP3_402_98489_20141028_122528_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/sub-train.list b/egs/babel/s5d/conf/lists/402-javanese/sub-train.list new file mode 100644 index 00000000000..58306104f42 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/sub-train.list @@ -0,0 +1,122 @@ +BABEL_OP3_402_16184_20141120_143943_inLine +BABEL_OP3_402_16184_20141120_143943_outLine +BABEL_OP3_402_16749_20150110_182247_inLine +BABEL_OP3_402_16749_20150110_182247_outLine +BABEL_OP3_402_17914_20150107_192833_inLine +BABEL_OP3_402_17914_20150107_192833_outLine +BABEL_OP3_402_20738_20150201_004014_inLine +BABEL_OP3_402_20738_20150201_004014_outLine +BABEL_OP3_402_20768_20150110_125415_inLine +BABEL_OP3_402_20768_20150110_125415_outLine +BABEL_OP3_402_20985_20141209_223858_inLine +BABEL_OP3_402_20985_20141209_223858_outLine +BABEL_OP3_402_21794_20141110_000434_inLine +BABEL_OP3_402_21794_20141110_000434_outLine +BABEL_OP3_402_22494_20150127_212514_inLine +BABEL_OP3_402_22494_20150127_212514_outLine +BABEL_OP3_402_24270_20141127_181536_inLine +BABEL_OP3_402_24270_20141127_181536_outLine +BABEL_OP3_402_31346_20150106_163812_inLine +BABEL_OP3_402_31346_20150106_163812_outLine +BABEL_OP3_402_31346_20150107_000948_inLine +BABEL_OP3_402_31346_20150107_000948_outLine +BABEL_OP3_402_31992_20141104_154739_inLine +BABEL_OP3_402_31992_20141104_154739_outLine +BABEL_OP3_402_34336_20141101_214014_inLine +BABEL_OP3_402_34336_20141101_214014_outLine +BABEL_OP3_402_34477_20141103_012729_inLine +BABEL_OP3_402_34477_20141103_012729_outLine +BABEL_OP3_402_34564_20150110_174105_inLine +BABEL_OP3_402_34564_20150110_174105_outLine +BABEL_OP3_402_38431_20150104_193523_inLine +BABEL_OP3_402_38431_20150104_193523_outLine +BABEL_OP3_402_39059_20150201_151819_inLine +BABEL_OP3_402_39059_20150201_151819_outLine +BABEL_OP3_402_41680_20140911_133458_inLine +BABEL_OP3_402_41680_20140911_133458_outLine +BABEL_OP3_402_43784_20141027_205748_inLine +BABEL_OP3_402_43784_20141027_205748_outLine +BABEL_OP3_402_45536_20150131_234119_inLine +BABEL_OP3_402_45536_20150131_234119_outLine +BABEL_OP3_402_46688_20140927_210143_inLine +BABEL_OP3_402_46688_20140927_210143_outLine +BABEL_OP3_402_48243_20141031_160102_inLine +BABEL_OP3_402_48243_20141031_160102_outLine +BABEL_OP3_402_49197_20141123_183541_inLine +BABEL_OP3_402_49197_20141123_183541_outLine +BABEL_OP3_402_49502_20150201_200343_inLine +BABEL_OP3_402_49502_20150201_200343_outLine +BABEL_OP3_402_50779_20141124_211935_inLine +BABEL_OP3_402_50779_20141124_211935_outLine +BABEL_OP3_402_50962_20141004_143222_inLine +BABEL_OP3_402_50962_20141004_143222_outLine +BABEL_OP3_402_51015_20141209_214156_inLine +BABEL_OP3_402_51015_20141209_214156_outLine +BABEL_OP3_402_52246_20141115_174547_inLine +BABEL_OP3_402_52246_20141115_174547_outLine +BABEL_OP3_402_54074_20141110_001507_inLine +BABEL_OP3_402_54074_20141110_001507_outLine +BABEL_OP3_402_56198_20141103_152946_inLine +BABEL_OP3_402_56198_20141103_152946_outLine +BABEL_OP3_402_57065_20141213_175712_inLine +BABEL_OP3_402_57065_20141213_175712_outLine +BABEL_OP3_402_58313_20141121_191107_inLine +BABEL_OP3_402_58313_20141121_191107_outLine +BABEL_OP3_402_58489_20150110_155118_inLine +BABEL_OP3_402_58489_20150110_155118_outLine +BABEL_OP3_402_59078_20141127_201549_inLine +BABEL_OP3_402_59078_20141127_201549_outLine +BABEL_OP3_402_64768_20141116_180927_inLine +BABEL_OP3_402_64768_20141116_180927_outLine +BABEL_OP3_402_64796_20141122_163640_inLine +BABEL_OP3_402_64796_20141122_163640_outLine +BABEL_OP3_402_65367_20150103_224736_inLine +BABEL_OP3_402_65367_20150103_224736_outLine +BABEL_OP3_402_65692_20141228_202914_inLine +BABEL_OP3_402_65692_20141228_202914_outLine +BABEL_OP3_402_66177_20150131_201057_inLine +BABEL_OP3_402_66177_20150131_201057_outLine +BABEL_OP3_402_70221_20141222_002645_inLine +BABEL_OP3_402_70221_20141222_002645_outLine +BABEL_OP3_402_73119_20141031_182314_inLine +BABEL_OP3_402_73119_20141031_182314_outLine +BABEL_OP3_402_73301_20141117_004450_inLine +BABEL_OP3_402_73301_20141117_004450_outLine +BABEL_OP3_402_76444_20141227_143452_inLine +BABEL_OP3_402_76444_20141227_143452_outLine +BABEL_OP3_402_76683_20141128_201732_inLine +BABEL_OP3_402_76683_20141128_201732_outLine +BABEL_OP3_402_78116_20141229_210212_inLine +BABEL_OP3_402_78116_20141229_210212_outLine +BABEL_OP3_402_78254_20141101_235022_inLine +BABEL_OP3_402_78254_20141101_235022_outLine +BABEL_OP3_402_79139_20141115_153558_inLine +BABEL_OP3_402_79139_20141115_153558_outLine +BABEL_OP3_402_81229_20141116_224932_inLine +BABEL_OP3_402_81229_20141116_224932_outLine +BABEL_OP3_402_81427_20141110_165047_inLine +BABEL_OP3_402_81427_20141110_165047_outLine +BABEL_OP3_402_82089_20141113_162038_inLine +BABEL_OP3_402_82089_20141113_162038_outLine +BABEL_OP3_402_83651_20141009_145412_inLine +BABEL_OP3_402_83651_20141009_145412_outLine +BABEL_OP3_402_85048_20141204_194855_inLine +BABEL_OP3_402_85048_20141204_194855_outLine +BABEL_OP3_402_85340_20141021_182050_inLine +BABEL_OP3_402_85340_20141021_182050_outLine +BABEL_OP3_402_86713_20150101_014831_inLine +BABEL_OP3_402_86713_20150101_014831_outLine +BABEL_OP3_402_87073_20140915_154336_inLine +BABEL_OP3_402_87073_20140915_154336_outLine +BABEL_OP3_402_87871_20141224_130949_inLine +BABEL_OP3_402_87871_20141224_130949_outLine +BABEL_OP3_402_88601_20141209_160621_inLine +BABEL_OP3_402_88601_20141209_160621_outLine +BABEL_OP3_402_93604_20150304_152208_inLine +BABEL_OP3_402_93604_20150304_152208_outLine +BABEL_OP3_402_93964_20141216_021155_inLine +BABEL_OP3_402_93964_20141216_021155_outLine +BABEL_OP3_402_94869_20140912_195117_inLine +BABEL_OP3_402_94869_20140912_195117_outLine +BABEL_OP3_402_95446_20150110_150658_inLine +BABEL_OP3_402_95446_20150110_150658_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/402-javanese/sub-train.untranscribed.list new file mode 100644 index 00000000000..4f81d9daca4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/sub-train.untranscribed.list @@ -0,0 +1,370 @@ +BABEL_OP3_402_10416_20141126_133029_inLine +BABEL_OP3_402_10416_20141126_133029_outLine +BABEL_OP3_402_10901_20141116_141701_inLine +BABEL_OP3_402_10901_20141116_141701_outLine +BABEL_OP3_402_12220_20141106_021950_inLine +BABEL_OP3_402_12220_20141106_021950_outLine +BABEL_OP3_402_12767_20140924_184905_inLine +BABEL_OP3_402_12767_20140924_184905_outLine +BABEL_OP3_402_13030_20141107_173701_inLine +BABEL_OP3_402_13030_20141107_173701_outLine +BABEL_OP3_402_13664_20140911_160207_inLine +BABEL_OP3_402_13664_20140911_160207_outLine +BABEL_OP3_402_13709_20150131_161040_inLine +BABEL_OP3_402_13709_20150131_161040_outLine +BABEL_OP3_402_14141_20150215_162503_inLine +BABEL_OP3_402_14141_20150215_162503_outLine +BABEL_OP3_402_14229_20141108_200257_inLine +BABEL_OP3_402_14229_20141108_200257_outLine +BABEL_OP3_402_14350_20141104_165111_inLine +BABEL_OP3_402_14350_20141104_165111_outLine +BABEL_OP3_402_14807_20141126_174048_inLine +BABEL_OP3_402_14807_20141126_174048_outLine +BABEL_OP3_402_14875_20140929_193054_inLine +BABEL_OP3_402_14875_20140929_193054_outLine +BABEL_OP3_402_14899_20140925_165651_inLine +BABEL_OP3_402_14899_20140925_165651_outLine +BABEL_OP3_402_14929_20141110_005633_inLine +BABEL_OP3_402_14929_20141110_005633_outLine +BABEL_OP3_402_14972_20141123_182012_inLine +BABEL_OP3_402_14972_20141123_182012_outLine +BABEL_OP3_402_15163_20141123_152731_inLine +BABEL_OP3_402_15163_20141123_152731_outLine +BABEL_OP3_402_15262_20140922_152302_inLine +BABEL_OP3_402_15262_20140922_152302_outLine +BABEL_OP3_402_15749_20150105_125933_inLine +BABEL_OP3_402_15749_20150105_125933_outLine +BABEL_OP3_402_16787_20141107_025835_inLine +BABEL_OP3_402_16787_20141107_025835_outLine +BABEL_OP3_402_17520_20141123_170854_inLine +BABEL_OP3_402_17520_20141123_170854_outLine +BABEL_OP3_402_17890_20150108_163627_inLine +BABEL_OP3_402_17890_20150108_163627_outLine +BABEL_OP3_402_18380_20141113_173424_inLine +BABEL_OP3_402_18380_20141113_173424_outLine +BABEL_OP3_402_19134_20141130_163804_inLine +BABEL_OP3_402_19134_20141130_163804_outLine +BABEL_OP3_402_19621_20141123_181810_inLine +BABEL_OP3_402_19621_20141123_181810_outLine +BABEL_OP3_402_19672_20141208_162907_inLine +BABEL_OP3_402_19672_20141208_162907_outLine +BABEL_OP3_402_19703_20141102_190851_inLine +BABEL_OP3_402_19703_20141102_190851_outLine +BABEL_OP3_402_20330_20150131_162055_inLine +BABEL_OP3_402_20330_20150131_162055_outLine +BABEL_OP3_402_20800_20141013_185736_inLine +BABEL_OP3_402_20800_20141013_185736_outLine +BABEL_OP3_402_20922_20150131_235414_inLine +BABEL_OP3_402_20922_20150131_235414_outLine +BABEL_OP3_402_21004_20150108_210410_inLine +BABEL_OP3_402_21004_20150108_210410_outLine +BABEL_OP3_402_22170_20150108_185847_inLine +BABEL_OP3_402_22170_20150108_185847_outLine +BABEL_OP3_402_23151_20150110_202409_inLine +BABEL_OP3_402_23151_20150110_202409_outLine +BABEL_OP3_402_23731_20141120_162409_inLine +BABEL_OP3_402_23731_20141120_162409_outLine +BABEL_OP3_402_23731_20141120_163618_inLine +BABEL_OP3_402_23731_20141120_163618_outLine +BABEL_OP3_402_24323_20141111_182649_inLine +BABEL_OP3_402_24323_20141111_182649_outLine +BABEL_OP3_402_24470_20141205_154028_inLine +BABEL_OP3_402_24470_20141205_154028_outLine +BABEL_OP3_402_24589_20141106_144156_inLine +BABEL_OP3_402_24589_20141106_144156_outLine +BABEL_OP3_402_25412_20141128_212603_inLine +BABEL_OP3_402_25412_20141128_212603_outLine +BABEL_OP3_402_26072_20150131_110154_inLine +BABEL_OP3_402_26072_20150131_110154_outLine +BABEL_OP3_402_26398_20150304_162600_inLine +BABEL_OP3_402_26398_20150304_162600_outLine +BABEL_OP3_402_28303_20141122_153440_inLine +BABEL_OP3_402_28303_20141122_153440_outLine +BABEL_OP3_402_29021_20150131_010036_inLine +BABEL_OP3_402_29021_20150131_010036_outLine +BABEL_OP3_402_29076_20141121_164742_inLine +BABEL_OP3_402_29076_20141121_164742_outLine +BABEL_OP3_402_29168_20140926_164602_inLine +BABEL_OP3_402_29168_20140926_164602_outLine +BABEL_OP3_402_29323_20150108_000937_inLine +BABEL_OP3_402_29323_20150108_000937_outLine +BABEL_OP3_402_30250_20140929_162020_inLine +BABEL_OP3_402_30250_20140929_162020_outLine +BABEL_OP3_402_31184_20141112_204308_inLine +BABEL_OP3_402_31184_20141112_204308_outLine +BABEL_OP3_402_31624_20141017_204521_inLine +BABEL_OP3_402_31624_20141017_204521_outLine +BABEL_OP3_402_32708_20141127_210435_inLine +BABEL_OP3_402_32708_20141127_210435_outLine +BABEL_OP3_402_32832_20150214_160609_inLine +BABEL_OP3_402_32832_20150214_160609_outLine +BABEL_OP3_402_32837_20150114_173357_inLine +BABEL_OP3_402_32837_20150114_173357_outLine +BABEL_OP3_402_33175_20141011_151643_inLine +BABEL_OP3_402_33175_20141011_151643_outLine +BABEL_OP3_402_33355_20141222_030242_inLine +BABEL_OP3_402_33355_20141222_030242_outLine +BABEL_OP3_402_33704_20150108_121853_inLine +BABEL_OP3_402_33704_20150108_121853_outLine +BABEL_OP3_402_33951_20141115_015656_inLine +BABEL_OP3_402_33951_20141115_015656_outLine +BABEL_OP3_402_34679_20141012_230850_inLine +BABEL_OP3_402_34679_20141012_230850_outLine +BABEL_OP3_402_34688_20141027_170150_inLine +BABEL_OP3_402_34688_20141027_170150_outLine +BABEL_OP3_402_35069_20150216_023523_inLine +BABEL_OP3_402_35069_20150216_023523_outLine +BABEL_OP3_402_35583_20150121_013548_inLine +BABEL_OP3_402_35583_20150121_013548_outLine +BABEL_OP3_402_37228_20150120_211131_inLine +BABEL_OP3_402_37228_20150120_211131_outLine +BABEL_OP3_402_37281_20141110_214558_inLine +BABEL_OP3_402_37281_20141110_214558_outLine +BABEL_OP3_402_37682_20141103_210556_inLine +BABEL_OP3_402_37682_20141103_210556_outLine +BABEL_OP3_402_37853_20150107_154609_inLine +BABEL_OP3_402_37853_20150107_154609_outLine +BABEL_OP3_402_38340_20141020_170141_inLine +BABEL_OP3_402_38340_20141020_170141_outLine +BABEL_OP3_402_39159_20140930_201318_inLine +BABEL_OP3_402_39159_20140930_201318_outLine +BABEL_OP3_402_39426_20150202_103633_inLine +BABEL_OP3_402_39426_20150202_103633_outLine +BABEL_OP3_402_39680_20150131_151358_inLine +BABEL_OP3_402_39680_20150131_151358_outLine +BABEL_OP3_402_39920_20150216_014707_inLine +BABEL_OP3_402_39920_20150216_014707_outLine +BABEL_OP3_402_41109_20150101_021923_inLine +BABEL_OP3_402_41109_20150101_021923_outLine +BABEL_OP3_402_43239_20150205_011521_inLine +BABEL_OP3_402_43239_20150205_011521_outLine +BABEL_OP3_402_43368_20141107_210043_inLine +BABEL_OP3_402_43368_20141107_210043_outLine +BABEL_OP3_402_43920_20141228_001637_inLine +BABEL_OP3_402_43920_20141228_001637_outLine +BABEL_OP3_402_44255_20150131_183155_inLine +BABEL_OP3_402_44255_20150131_183155_outLine +BABEL_OP3_402_44961_20140921_154533_inLine +BABEL_OP3_402_44961_20140921_154533_outLine +BABEL_OP3_402_46702_20140929_141902_inLine +BABEL_OP3_402_46702_20140929_141902_outLine +BABEL_OP3_402_46770_20150124_001351_inLine +BABEL_OP3_402_46770_20150124_001351_outLine +BABEL_OP3_402_46881_20141028_192343_inLine +BABEL_OP3_402_46881_20141028_192343_outLine +BABEL_OP3_402_47270_20150128_163211_inLine +BABEL_OP3_402_47270_20150128_163211_outLine +BABEL_OP3_402_48422_20150101_193320_inLine +BABEL_OP3_402_48422_20150101_193320_outLine +BABEL_OP3_402_48422_20150101_194803_inLine +BABEL_OP3_402_48422_20150101_194803_outLine +BABEL_OP3_402_48610_20140920_172026_inLine +BABEL_OP3_402_48610_20140920_172026_outLine +BABEL_OP3_402_48789_20141113_181720_inLine +BABEL_OP3_402_48789_20141113_181720_outLine +BABEL_OP3_402_49001_20141010_142908_inLine +BABEL_OP3_402_49001_20141010_142908_outLine +BABEL_OP3_402_49001_20141010_152312_inLine +BABEL_OP3_402_49001_20141010_152312_outLine +BABEL_OP3_402_49907_20141006_162735_inLine +BABEL_OP3_402_49907_20141006_162735_outLine +BABEL_OP3_402_50601_20141121_182643_inLine +BABEL_OP3_402_50601_20141121_182643_outLine +BABEL_OP3_402_50810_20140912_181008_inLine +BABEL_OP3_402_50810_20140912_181008_outLine +BABEL_OP3_402_51540_20150131_203108_inLine +BABEL_OP3_402_51540_20150131_203108_outLine +BABEL_OP3_402_51611_20141010_163542_inLine +BABEL_OP3_402_51611_20141010_163542_outLine +BABEL_OP3_402_51968_20141109_154701_inLine +BABEL_OP3_402_51968_20141109_154701_outLine +BABEL_OP3_402_52422_20150128_142229_inLine +BABEL_OP3_402_52422_20150128_142229_outLine +BABEL_OP3_402_52854_20140910_200850_inLine +BABEL_OP3_402_52854_20140910_200850_outLine +BABEL_OP3_402_52932_20141007_182635_inLine +BABEL_OP3_402_52932_20141007_182635_outLine +BABEL_OP3_402_54104_20141104_173741_inLine +BABEL_OP3_402_54104_20141104_173741_outLine +BABEL_OP3_402_54405_20141123_173044_inLine +BABEL_OP3_402_54405_20141123_173044_outLine +BABEL_OP3_402_55267_20141221_184118_inLine +BABEL_OP3_402_55267_20141221_184118_outLine +BABEL_OP3_402_56720_20141228_190653_inLine +BABEL_OP3_402_56720_20141228_190653_outLine +BABEL_OP3_402_57650_20150107_171335_inLine +BABEL_OP3_402_57650_20150107_171335_outLine +BABEL_OP3_402_57654_20141031_172711_inLine +BABEL_OP3_402_57654_20141031_172711_outLine +BABEL_OP3_402_57922_20141130_172609_inLine +BABEL_OP3_402_57922_20141130_172609_outLine +BABEL_OP3_402_58850_20141115_223848_inLine +BABEL_OP3_402_58850_20141115_223848_outLine +BABEL_OP3_402_59402_20150103_181612_inLine +BABEL_OP3_402_59402_20150103_181612_outLine +BABEL_OP3_402_60418_20141219_231820_inLine +BABEL_OP3_402_60418_20141219_231820_outLine +BABEL_OP3_402_60474_20141101_195523_inLine +BABEL_OP3_402_60474_20141101_195523_outLine +BABEL_OP3_402_61167_20141106_195710_inLine +BABEL_OP3_402_61167_20141106_195710_outLine +BABEL_OP3_402_61219_20141101_192955_inLine +BABEL_OP3_402_61219_20141101_192955_outLine +BABEL_OP3_402_61888_20150108_210230_inLine +BABEL_OP3_402_61888_20150108_210230_outLine +BABEL_OP3_402_62456_20141203_005134_inLine +BABEL_OP3_402_62456_20141203_005134_outLine +BABEL_OP3_402_62800_20141028_170241_inLine +BABEL_OP3_402_62800_20141028_170241_outLine +BABEL_OP3_402_62810_20140917_184635_inLine +BABEL_OP3_402_62810_20140917_184635_outLine +BABEL_OP3_402_63081_20141003_151638_inLine +BABEL_OP3_402_63081_20141003_151638_outLine +BABEL_OP3_402_64014_20150108_162849_inLine +BABEL_OP3_402_64014_20150108_162849_outLine +BABEL_OP3_402_64065_20141020_152452_inLine +BABEL_OP3_402_64065_20141020_152452_outLine +BABEL_OP3_402_64870_20141228_184201_inLine +BABEL_OP3_402_64870_20141228_184201_outLine +BABEL_OP3_402_65064_20141125_162638_inLine +BABEL_OP3_402_65064_20141125_162638_outLine +BABEL_OP3_402_65298_20150130_232120_inLine +BABEL_OP3_402_65298_20150130_232120_outLine +BABEL_OP3_402_65723_20141022_231832_inLine +BABEL_OP3_402_65723_20141022_231832_outLine +BABEL_OP3_402_66001_20140921_123931_inLine +BABEL_OP3_402_66001_20140921_123931_outLine +BABEL_OP3_402_66045_20141115_162944_inLine +BABEL_OP3_402_66045_20141115_162944_outLine +BABEL_OP3_402_67152_20150107_163104_inLine +BABEL_OP3_402_67152_20150107_163104_outLine +BABEL_OP3_402_67373_20141014_152719_inLine +BABEL_OP3_402_67373_20141014_152719_outLine +BABEL_OP3_402_68627_20141107_033600_inLine +BABEL_OP3_402_68627_20141107_033600_outLine +BABEL_OP3_402_69107_20141123_145802_inLine +BABEL_OP3_402_69107_20141123_145802_outLine +BABEL_OP3_402_69574_20140915_170204_inLine +BABEL_OP3_402_69574_20140915_170204_outLine +BABEL_OP3_402_70282_20141128_162640_inLine +BABEL_OP3_402_70282_20141128_162640_outLine +BABEL_OP3_402_70601_20141104_190522_inLine +BABEL_OP3_402_70601_20141104_190522_outLine +BABEL_OP3_402_70794_20141122_201302_inLine +BABEL_OP3_402_70794_20141122_201302_outLine +BABEL_OP3_402_71566_20150109_002519_inLine +BABEL_OP3_402_71566_20150109_002519_outLine +BABEL_OP3_402_71704_20141030_192615_inLine +BABEL_OP3_402_71704_20141030_192615_outLine +BABEL_OP3_402_72844_20150216_194719_inLine +BABEL_OP3_402_72844_20150216_194719_outLine +BABEL_OP3_402_73022_20150103_135209_inLine +BABEL_OP3_402_73022_20150103_135209_outLine +BABEL_OP3_402_73757_20141115_190524_inLine +BABEL_OP3_402_73757_20141115_190524_outLine +BABEL_OP3_402_74111_20150102_112305_inLine +BABEL_OP3_402_74111_20150102_112305_outLine +BABEL_OP3_402_74455_20150201_180158_inLine +BABEL_OP3_402_74455_20150201_180158_outLine +BABEL_OP3_402_74799_20141129_202734_inLine +BABEL_OP3_402_74799_20141129_202734_outLine +BABEL_OP3_402_75764_20150202_000719_inLine +BABEL_OP3_402_75764_20150202_000719_outLine +BABEL_OP3_402_75993_20141021_183118_inLine +BABEL_OP3_402_75993_20141021_183118_outLine +BABEL_OP3_402_78360_20150131_163647_inLine +BABEL_OP3_402_78360_20150131_163647_outLine +BABEL_OP3_402_78630_20140930_135924_inLine +BABEL_OP3_402_78630_20140930_135924_outLine +BABEL_OP3_402_79751_20141104_200346_inLine +BABEL_OP3_402_79751_20141104_200346_outLine +BABEL_OP3_402_79751_20141104_201600_inLine +BABEL_OP3_402_79751_20141104_201600_outLine +BABEL_OP3_402_80439_20141104_195124_inLine +BABEL_OP3_402_80439_20141104_195124_outLine +BABEL_OP3_402_82224_20150101_162311_inLine +BABEL_OP3_402_82224_20150101_162311_outLine +BABEL_OP3_402_82637_20141006_173314_inLine +BABEL_OP3_402_82637_20141006_173314_outLine +BABEL_OP3_402_83238_20141122_140740_inLine +BABEL_OP3_402_83238_20141122_140740_outLine +BABEL_OP3_402_83436_20141017_162042_inLine +BABEL_OP3_402_83436_20141017_162042_outLine +BABEL_OP3_402_84061_20141107_162356_inLine +BABEL_OP3_402_84061_20141107_162356_outLine +BABEL_OP3_402_84611_20141023_205020_inLine +BABEL_OP3_402_84611_20141023_205020_outLine +BABEL_OP3_402_84737_20150129_233418_inLine +BABEL_OP3_402_84737_20150129_233418_outLine +BABEL_OP3_402_84815_20141225_185456_inLine +BABEL_OP3_402_84815_20141225_185456_outLine +BABEL_OP3_402_85248_20150109_001722_inLine +BABEL_OP3_402_85248_20150109_001722_outLine +BABEL_OP3_402_86191_20141105_130254_inLine +BABEL_OP3_402_86191_20141105_130254_outLine +BABEL_OP3_402_86722_20141101_204411_inLine +BABEL_OP3_402_86722_20141101_204411_outLine +BABEL_OP3_402_86952_20141105_144737_inLine +BABEL_OP3_402_86952_20141105_144737_outLine +BABEL_OP3_402_87179_20150203_020351_inLine +BABEL_OP3_402_87179_20150203_020351_outLine +BABEL_OP3_402_88776_20140921_133554_inLine +BABEL_OP3_402_88776_20140921_133554_outLine +BABEL_OP3_402_88873_20140930_131622_inLine +BABEL_OP3_402_88873_20140930_131622_outLine +BABEL_OP3_402_89794_20141213_211839_inLine +BABEL_OP3_402_89794_20141213_211839_outLine +BABEL_OP3_402_89877_20150107_013739_inLine +BABEL_OP3_402_89877_20150107_013739_outLine +BABEL_OP3_402_89877_20150107_014426_inLine +BABEL_OP3_402_89877_20150107_014426_outLine +BABEL_OP3_402_90777_20141106_234557_inLine +BABEL_OP3_402_90777_20141106_234557_outLine +BABEL_OP3_402_91884_20150302_183207_inLine +BABEL_OP3_402_91884_20150302_183207_outLine +BABEL_OP3_402_91891_20150108_203636_inLine +BABEL_OP3_402_91891_20150108_203636_outLine +BABEL_OP3_402_91977_20141225_143539_inLine +BABEL_OP3_402_91977_20141225_143539_outLine +BABEL_OP3_402_92356_20150109_005846_inLine +BABEL_OP3_402_92356_20150109_005846_outLine +BABEL_OP3_402_92459_20141102_124516_inLine +BABEL_OP3_402_92459_20141102_124516_outLine +BABEL_OP3_402_92557_20150201_205110_inLine +BABEL_OP3_402_92557_20150201_205110_outLine +BABEL_OP3_402_92698_20141115_182138_inLine +BABEL_OP3_402_92698_20141115_182138_outLine +BABEL_OP3_402_93475_20141119_140615_inLine +BABEL_OP3_402_93475_20141119_140615_outLine +BABEL_OP3_402_93490_20150106_174211_inLine +BABEL_OP3_402_93490_20150106_174211_outLine +BABEL_OP3_402_94002_20141216_015659_inLine +BABEL_OP3_402_94002_20141216_015659_outLine +BABEL_OP3_402_94166_20150128_151103_inLine +BABEL_OP3_402_94166_20150128_151103_outLine +BABEL_OP3_402_94409_20141214_185032_inLine +BABEL_OP3_402_94409_20141214_185032_outLine +BABEL_OP3_402_94923_20141201_154601_inLine +BABEL_OP3_402_94923_20141201_154601_outLine +BABEL_OP3_402_96190_20141103_161533_inLine +BABEL_OP3_402_96190_20141103_161533_outLine +BABEL_OP3_402_96205_20141126_152921_inLine +BABEL_OP3_402_96205_20141126_152921_outLine +BABEL_OP3_402_97264_20150131_205411_inLine +BABEL_OP3_402_97264_20150131_205411_outLine +BABEL_OP3_402_97772_20140915_200919_inLine +BABEL_OP3_402_97772_20140915_200919_outLine +BABEL_OP3_402_97896_20141122_161128_inLine +BABEL_OP3_402_97896_20141122_161128_outLine +BABEL_OP3_402_98165_20141106_191239_inLine +BABEL_OP3_402_98165_20141106_191239_outLine +BABEL_OP3_402_98888_20141108_211953_inLine +BABEL_OP3_402_98888_20141108_211953_outLine +BABEL_OP3_402_99202_20141123_162817_inLine +BABEL_OP3_402_99202_20141123_162817_outLine +BABEL_OP3_402_99516_20140917_174712_inLine +BABEL_OP3_402_99516_20140917_174712_outLine +BABEL_OP3_402_99594_20141111_170413_inLine +BABEL_OP3_402_99594_20141111_170413_outLine +BABEL_OP3_402_99887_20150104_230431_inLine +BABEL_OP3_402_99887_20150104_230431_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/training.list b/egs/babel/s5d/conf/lists/402-javanese/training.list new file mode 100644 index 00000000000..ce7313fceeb --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/training.list @@ -0,0 +1,492 @@ +BABEL_OP3_402_10416_20141126_133029_inLine +BABEL_OP3_402_10416_20141126_133029_outLine +BABEL_OP3_402_10901_20141116_141701_inLine +BABEL_OP3_402_10901_20141116_141701_outLine +BABEL_OP3_402_12220_20141106_021950_inLine +BABEL_OP3_402_12220_20141106_021950_outLine +BABEL_OP3_402_12767_20140924_184905_inLine +BABEL_OP3_402_12767_20140924_184905_outLine +BABEL_OP3_402_13030_20141107_173701_inLine +BABEL_OP3_402_13030_20141107_173701_outLine +BABEL_OP3_402_13664_20140911_160207_inLine +BABEL_OP3_402_13664_20140911_160207_outLine +BABEL_OP3_402_13709_20150131_161040_inLine +BABEL_OP3_402_13709_20150131_161040_outLine +BABEL_OP3_402_14141_20150215_162503_inLine +BABEL_OP3_402_14141_20150215_162503_outLine +BABEL_OP3_402_14229_20141108_200257_inLine +BABEL_OP3_402_14229_20141108_200257_outLine +BABEL_OP3_402_14350_20141104_165111_inLine +BABEL_OP3_402_14350_20141104_165111_outLine +BABEL_OP3_402_14807_20141126_174048_inLine +BABEL_OP3_402_14807_20141126_174048_outLine +BABEL_OP3_402_14875_20140929_193054_inLine +BABEL_OP3_402_14875_20140929_193054_outLine +BABEL_OP3_402_14899_20140925_165651_inLine +BABEL_OP3_402_14899_20140925_165651_outLine +BABEL_OP3_402_14929_20141110_005633_inLine +BABEL_OP3_402_14929_20141110_005633_outLine +BABEL_OP3_402_14972_20141123_182012_inLine +BABEL_OP3_402_14972_20141123_182012_outLine +BABEL_OP3_402_15163_20141123_152731_inLine +BABEL_OP3_402_15163_20141123_152731_outLine +BABEL_OP3_402_15262_20140922_152302_inLine +BABEL_OP3_402_15262_20140922_152302_outLine +BABEL_OP3_402_15749_20150105_125933_inLine +BABEL_OP3_402_15749_20150105_125933_outLine +BABEL_OP3_402_16184_20141120_143943_inLine +BABEL_OP3_402_16184_20141120_143943_outLine +BABEL_OP3_402_16749_20150110_182247_inLine +BABEL_OP3_402_16749_20150110_182247_outLine +BABEL_OP3_402_16787_20141107_025835_inLine +BABEL_OP3_402_16787_20141107_025835_outLine +BABEL_OP3_402_17520_20141123_170854_inLine +BABEL_OP3_402_17520_20141123_170854_outLine +BABEL_OP3_402_17890_20150108_163627_inLine +BABEL_OP3_402_17890_20150108_163627_outLine +BABEL_OP3_402_17914_20150107_192833_inLine +BABEL_OP3_402_17914_20150107_192833_outLine +BABEL_OP3_402_18380_20141113_173424_inLine +BABEL_OP3_402_18380_20141113_173424_outLine +BABEL_OP3_402_19134_20141130_163804_inLine +BABEL_OP3_402_19134_20141130_163804_outLine +BABEL_OP3_402_19621_20141123_181810_inLine +BABEL_OP3_402_19621_20141123_181810_outLine +BABEL_OP3_402_19672_20141208_162907_inLine +BABEL_OP3_402_19672_20141208_162907_outLine +BABEL_OP3_402_19703_20141102_190851_inLine +BABEL_OP3_402_19703_20141102_190851_outLine +BABEL_OP3_402_20330_20150131_162055_inLine +BABEL_OP3_402_20330_20150131_162055_outLine +BABEL_OP3_402_20738_20150201_004014_inLine +BABEL_OP3_402_20738_20150201_004014_outLine +BABEL_OP3_402_20768_20150110_125415_inLine +BABEL_OP3_402_20768_20150110_125415_outLine +BABEL_OP3_402_20800_20141013_185736_inLine +BABEL_OP3_402_20800_20141013_185736_outLine +BABEL_OP3_402_20922_20150131_235414_inLine +BABEL_OP3_402_20922_20150131_235414_outLine +BABEL_OP3_402_20985_20141209_223858_inLine +BABEL_OP3_402_20985_20141209_223858_outLine +BABEL_OP3_402_21004_20150108_210410_inLine +BABEL_OP3_402_21004_20150108_210410_outLine +BABEL_OP3_402_21794_20141110_000434_inLine +BABEL_OP3_402_21794_20141110_000434_outLine +BABEL_OP3_402_22170_20150108_185847_inLine +BABEL_OP3_402_22170_20150108_185847_outLine +BABEL_OP3_402_22494_20150127_212514_inLine +BABEL_OP3_402_22494_20150127_212514_outLine +BABEL_OP3_402_23151_20150110_202409_inLine +BABEL_OP3_402_23151_20150110_202409_outLine +BABEL_OP3_402_23731_20141120_162409_inLine +BABEL_OP3_402_23731_20141120_162409_outLine +BABEL_OP3_402_23731_20141120_163618_inLine +BABEL_OP3_402_23731_20141120_163618_outLine +BABEL_OP3_402_24270_20141127_181536_inLine +BABEL_OP3_402_24270_20141127_181536_outLine +BABEL_OP3_402_24323_20141111_182649_inLine +BABEL_OP3_402_24323_20141111_182649_outLine +BABEL_OP3_402_24470_20141205_154028_inLine +BABEL_OP3_402_24470_20141205_154028_outLine +BABEL_OP3_402_24589_20141106_144156_inLine +BABEL_OP3_402_24589_20141106_144156_outLine +BABEL_OP3_402_25412_20141128_212603_inLine +BABEL_OP3_402_25412_20141128_212603_outLine +BABEL_OP3_402_26072_20150131_110154_inLine +BABEL_OP3_402_26072_20150131_110154_outLine +BABEL_OP3_402_26398_20150304_162600_inLine +BABEL_OP3_402_26398_20150304_162600_outLine +BABEL_OP3_402_28303_20141122_153440_inLine +BABEL_OP3_402_28303_20141122_153440_outLine +BABEL_OP3_402_29021_20150131_010036_inLine +BABEL_OP3_402_29021_20150131_010036_outLine +BABEL_OP3_402_29076_20141121_164742_inLine +BABEL_OP3_402_29076_20141121_164742_outLine +BABEL_OP3_402_29168_20140926_164602_inLine +BABEL_OP3_402_29168_20140926_164602_outLine +BABEL_OP3_402_29323_20150108_000937_inLine +BABEL_OP3_402_29323_20150108_000937_outLine +BABEL_OP3_402_30250_20140929_162020_inLine +BABEL_OP3_402_30250_20140929_162020_outLine +BABEL_OP3_402_31184_20141112_204308_inLine +BABEL_OP3_402_31184_20141112_204308_outLine +BABEL_OP3_402_31346_20150106_163812_inLine +BABEL_OP3_402_31346_20150106_163812_outLine +BABEL_OP3_402_31346_20150107_000948_inLine +BABEL_OP3_402_31346_20150107_000948_outLine +BABEL_OP3_402_31624_20141017_204521_inLine +BABEL_OP3_402_31624_20141017_204521_outLine +BABEL_OP3_402_31992_20141104_154739_inLine +BABEL_OP3_402_31992_20141104_154739_outLine +BABEL_OP3_402_32708_20141127_210435_inLine +BABEL_OP3_402_32708_20141127_210435_outLine +BABEL_OP3_402_32832_20150214_160609_inLine +BABEL_OP3_402_32832_20150214_160609_outLine +BABEL_OP3_402_32837_20150114_173357_inLine +BABEL_OP3_402_32837_20150114_173357_outLine +BABEL_OP3_402_33175_20141011_151643_inLine +BABEL_OP3_402_33175_20141011_151643_outLine +BABEL_OP3_402_33355_20141222_030242_inLine +BABEL_OP3_402_33355_20141222_030242_outLine +BABEL_OP3_402_33704_20150108_121853_inLine +BABEL_OP3_402_33704_20150108_121853_outLine +BABEL_OP3_402_33951_20141115_015656_inLine +BABEL_OP3_402_33951_20141115_015656_outLine +BABEL_OP3_402_34336_20141101_214014_inLine +BABEL_OP3_402_34336_20141101_214014_outLine +BABEL_OP3_402_34477_20141103_012729_inLine +BABEL_OP3_402_34477_20141103_012729_outLine +BABEL_OP3_402_34564_20150110_174105_inLine +BABEL_OP3_402_34564_20150110_174105_outLine +BABEL_OP3_402_34679_20141012_230850_inLine +BABEL_OP3_402_34679_20141012_230850_outLine +BABEL_OP3_402_34688_20141027_170150_inLine +BABEL_OP3_402_34688_20141027_170150_outLine +BABEL_OP3_402_35069_20150216_023523_inLine +BABEL_OP3_402_35069_20150216_023523_outLine +BABEL_OP3_402_35583_20150121_013548_inLine +BABEL_OP3_402_35583_20150121_013548_outLine +BABEL_OP3_402_37228_20150120_211131_inLine +BABEL_OP3_402_37228_20150120_211131_outLine +BABEL_OP3_402_37281_20141110_214558_inLine +BABEL_OP3_402_37281_20141110_214558_outLine +BABEL_OP3_402_37682_20141103_210556_inLine +BABEL_OP3_402_37682_20141103_210556_outLine +BABEL_OP3_402_37853_20150107_154609_inLine +BABEL_OP3_402_37853_20150107_154609_outLine +BABEL_OP3_402_38340_20141020_170141_inLine +BABEL_OP3_402_38340_20141020_170141_outLine +BABEL_OP3_402_38431_20150104_193523_inLine +BABEL_OP3_402_38431_20150104_193523_outLine +BABEL_OP3_402_39059_20150201_151819_inLine +BABEL_OP3_402_39059_20150201_151819_outLine +BABEL_OP3_402_39159_20140930_201318_inLine +BABEL_OP3_402_39159_20140930_201318_outLine +BABEL_OP3_402_39426_20150202_103633_inLine +BABEL_OP3_402_39426_20150202_103633_outLine +BABEL_OP3_402_39680_20150131_151358_inLine +BABEL_OP3_402_39680_20150131_151358_outLine +BABEL_OP3_402_39920_20150216_014707_inLine +BABEL_OP3_402_39920_20150216_014707_outLine +BABEL_OP3_402_41109_20150101_021923_inLine +BABEL_OP3_402_41109_20150101_021923_outLine +BABEL_OP3_402_41680_20140911_133458_inLine +BABEL_OP3_402_41680_20140911_133458_outLine +BABEL_OP3_402_43239_20150205_011521_inLine +BABEL_OP3_402_43239_20150205_011521_outLine +BABEL_OP3_402_43368_20141107_210043_inLine +BABEL_OP3_402_43368_20141107_210043_outLine +BABEL_OP3_402_43784_20141027_205748_inLine +BABEL_OP3_402_43784_20141027_205748_outLine +BABEL_OP3_402_43920_20141228_001637_inLine +BABEL_OP3_402_43920_20141228_001637_outLine +BABEL_OP3_402_44255_20150131_183155_inLine +BABEL_OP3_402_44255_20150131_183155_outLine +BABEL_OP3_402_44961_20140921_154533_inLine +BABEL_OP3_402_44961_20140921_154533_outLine +BABEL_OP3_402_45536_20150131_234119_inLine +BABEL_OP3_402_45536_20150131_234119_outLine +BABEL_OP3_402_46688_20140927_210143_inLine +BABEL_OP3_402_46688_20140927_210143_outLine +BABEL_OP3_402_46702_20140929_141902_inLine +BABEL_OP3_402_46702_20140929_141902_outLine +BABEL_OP3_402_46770_20150124_001351_inLine +BABEL_OP3_402_46770_20150124_001351_outLine +BABEL_OP3_402_46881_20141028_192343_inLine +BABEL_OP3_402_46881_20141028_192343_outLine +BABEL_OP3_402_47270_20150128_163211_inLine +BABEL_OP3_402_47270_20150128_163211_outLine +BABEL_OP3_402_48243_20141031_160102_inLine +BABEL_OP3_402_48243_20141031_160102_outLine +BABEL_OP3_402_48422_20150101_193320_inLine +BABEL_OP3_402_48422_20150101_193320_outLine +BABEL_OP3_402_48422_20150101_194803_inLine +BABEL_OP3_402_48422_20150101_194803_outLine +BABEL_OP3_402_48610_20140920_172026_inLine +BABEL_OP3_402_48610_20140920_172026_outLine +BABEL_OP3_402_48789_20141113_181720_inLine +BABEL_OP3_402_48789_20141113_181720_outLine +BABEL_OP3_402_49001_20141010_142908_inLine +BABEL_OP3_402_49001_20141010_142908_outLine +BABEL_OP3_402_49001_20141010_152312_inLine +BABEL_OP3_402_49001_20141010_152312_outLine +BABEL_OP3_402_49197_20141123_183541_inLine +BABEL_OP3_402_49197_20141123_183541_outLine +BABEL_OP3_402_49502_20150201_200343_inLine +BABEL_OP3_402_49502_20150201_200343_outLine +BABEL_OP3_402_49907_20141006_162735_inLine +BABEL_OP3_402_49907_20141006_162735_outLine +BABEL_OP3_402_50601_20141121_182643_inLine +BABEL_OP3_402_50601_20141121_182643_outLine +BABEL_OP3_402_50779_20141124_211935_inLine +BABEL_OP3_402_50779_20141124_211935_outLine +BABEL_OP3_402_50810_20140912_181008_inLine +BABEL_OP3_402_50810_20140912_181008_outLine +BABEL_OP3_402_50962_20141004_143222_inLine +BABEL_OP3_402_50962_20141004_143222_outLine +BABEL_OP3_402_51015_20141209_214156_inLine +BABEL_OP3_402_51015_20141209_214156_outLine +BABEL_OP3_402_51540_20150131_203108_inLine +BABEL_OP3_402_51540_20150131_203108_outLine +BABEL_OP3_402_51611_20141010_163542_inLine +BABEL_OP3_402_51611_20141010_163542_outLine +BABEL_OP3_402_51968_20141109_154701_inLine +BABEL_OP3_402_51968_20141109_154701_outLine +BABEL_OP3_402_52246_20141115_174547_inLine +BABEL_OP3_402_52246_20141115_174547_outLine +BABEL_OP3_402_52422_20150128_142229_inLine +BABEL_OP3_402_52422_20150128_142229_outLine +BABEL_OP3_402_52854_20140910_200850_inLine +BABEL_OP3_402_52854_20140910_200850_outLine +BABEL_OP3_402_52932_20141007_182635_inLine +BABEL_OP3_402_52932_20141007_182635_outLine +BABEL_OP3_402_54074_20141110_001507_inLine +BABEL_OP3_402_54074_20141110_001507_outLine +BABEL_OP3_402_54104_20141104_173741_inLine +BABEL_OP3_402_54104_20141104_173741_outLine +BABEL_OP3_402_54405_20141123_173044_inLine +BABEL_OP3_402_54405_20141123_173044_outLine +BABEL_OP3_402_55267_20141221_184118_inLine +BABEL_OP3_402_55267_20141221_184118_outLine +BABEL_OP3_402_56198_20141103_152946_inLine +BABEL_OP3_402_56198_20141103_152946_outLine +BABEL_OP3_402_56720_20141228_190653_inLine +BABEL_OP3_402_56720_20141228_190653_outLine +BABEL_OP3_402_57065_20141213_175712_inLine +BABEL_OP3_402_57065_20141213_175712_outLine +BABEL_OP3_402_57650_20150107_171335_inLine +BABEL_OP3_402_57650_20150107_171335_outLine +BABEL_OP3_402_57654_20141031_172711_inLine +BABEL_OP3_402_57654_20141031_172711_outLine +BABEL_OP3_402_57922_20141130_172609_inLine +BABEL_OP3_402_57922_20141130_172609_outLine +BABEL_OP3_402_58313_20141121_191107_inLine +BABEL_OP3_402_58313_20141121_191107_outLine +BABEL_OP3_402_58489_20150110_155118_inLine +BABEL_OP3_402_58489_20150110_155118_outLine +BABEL_OP3_402_58850_20141115_223848_inLine +BABEL_OP3_402_58850_20141115_223848_outLine +BABEL_OP3_402_59078_20141127_201549_inLine +BABEL_OP3_402_59078_20141127_201549_outLine +BABEL_OP3_402_59402_20150103_181612_inLine +BABEL_OP3_402_59402_20150103_181612_outLine +BABEL_OP3_402_60418_20141219_231820_inLine +BABEL_OP3_402_60418_20141219_231820_outLine +BABEL_OP3_402_60474_20141101_195523_inLine +BABEL_OP3_402_60474_20141101_195523_outLine +BABEL_OP3_402_61167_20141106_195710_inLine +BABEL_OP3_402_61167_20141106_195710_outLine +BABEL_OP3_402_61219_20141101_192955_inLine +BABEL_OP3_402_61219_20141101_192955_outLine +BABEL_OP3_402_61888_20150108_210230_inLine +BABEL_OP3_402_61888_20150108_210230_outLine +BABEL_OP3_402_62456_20141203_005134_inLine +BABEL_OP3_402_62456_20141203_005134_outLine +BABEL_OP3_402_62800_20141028_170241_inLine +BABEL_OP3_402_62800_20141028_170241_outLine +BABEL_OP3_402_62810_20140917_184635_inLine +BABEL_OP3_402_62810_20140917_184635_outLine +BABEL_OP3_402_63081_20141003_151638_inLine +BABEL_OP3_402_63081_20141003_151638_outLine +BABEL_OP3_402_64014_20150108_162849_inLine +BABEL_OP3_402_64014_20150108_162849_outLine +BABEL_OP3_402_64065_20141020_152452_inLine +BABEL_OP3_402_64065_20141020_152452_outLine +BABEL_OP3_402_64768_20141116_180927_inLine +BABEL_OP3_402_64768_20141116_180927_outLine +BABEL_OP3_402_64796_20141122_163640_inLine +BABEL_OP3_402_64796_20141122_163640_outLine +BABEL_OP3_402_64870_20141228_184201_inLine +BABEL_OP3_402_64870_20141228_184201_outLine +BABEL_OP3_402_65064_20141125_162638_inLine +BABEL_OP3_402_65064_20141125_162638_outLine +BABEL_OP3_402_65298_20150130_232120_inLine +BABEL_OP3_402_65298_20150130_232120_outLine +BABEL_OP3_402_65367_20150103_224736_inLine +BABEL_OP3_402_65367_20150103_224736_outLine +BABEL_OP3_402_65692_20141228_202914_inLine +BABEL_OP3_402_65692_20141228_202914_outLine +BABEL_OP3_402_65723_20141022_231832_inLine +BABEL_OP3_402_65723_20141022_231832_outLine +BABEL_OP3_402_66001_20140921_123931_inLine +BABEL_OP3_402_66001_20140921_123931_outLine +BABEL_OP3_402_66045_20141115_162944_inLine +BABEL_OP3_402_66045_20141115_162944_outLine +BABEL_OP3_402_66177_20150131_201057_inLine +BABEL_OP3_402_66177_20150131_201057_outLine +BABEL_OP3_402_67152_20150107_163104_inLine +BABEL_OP3_402_67152_20150107_163104_outLine +BABEL_OP3_402_67373_20141014_152719_inLine +BABEL_OP3_402_67373_20141014_152719_outLine +BABEL_OP3_402_68627_20141107_033600_inLine +BABEL_OP3_402_68627_20141107_033600_outLine +BABEL_OP3_402_69107_20141123_145802_inLine +BABEL_OP3_402_69107_20141123_145802_outLine +BABEL_OP3_402_69574_20140915_170204_inLine +BABEL_OP3_402_69574_20140915_170204_outLine +BABEL_OP3_402_70221_20141222_002645_inLine +BABEL_OP3_402_70221_20141222_002645_outLine +BABEL_OP3_402_70282_20141128_162640_inLine +BABEL_OP3_402_70282_20141128_162640_outLine +BABEL_OP3_402_70601_20141104_190522_inLine +BABEL_OP3_402_70601_20141104_190522_outLine +BABEL_OP3_402_70794_20141122_201302_inLine +BABEL_OP3_402_70794_20141122_201302_outLine +BABEL_OP3_402_71566_20150109_002519_inLine +BABEL_OP3_402_71566_20150109_002519_outLine +BABEL_OP3_402_71704_20141030_192615_inLine +BABEL_OP3_402_71704_20141030_192615_outLine +BABEL_OP3_402_72844_20150216_194719_inLine +BABEL_OP3_402_72844_20150216_194719_outLine +BABEL_OP3_402_73022_20150103_135209_inLine +BABEL_OP3_402_73022_20150103_135209_outLine +BABEL_OP3_402_73119_20141031_182314_inLine +BABEL_OP3_402_73119_20141031_182314_outLine +BABEL_OP3_402_73301_20141117_004450_inLine +BABEL_OP3_402_73301_20141117_004450_outLine +BABEL_OP3_402_73757_20141115_190524_inLine +BABEL_OP3_402_73757_20141115_190524_outLine +BABEL_OP3_402_74111_20150102_112305_inLine +BABEL_OP3_402_74111_20150102_112305_outLine +BABEL_OP3_402_74455_20150201_180158_inLine +BABEL_OP3_402_74455_20150201_180158_outLine +BABEL_OP3_402_74799_20141129_202734_inLine +BABEL_OP3_402_74799_20141129_202734_outLine +BABEL_OP3_402_75764_20150202_000719_inLine +BABEL_OP3_402_75764_20150202_000719_outLine +BABEL_OP3_402_75993_20141021_183118_inLine +BABEL_OP3_402_75993_20141021_183118_outLine +BABEL_OP3_402_76444_20141227_143452_inLine +BABEL_OP3_402_76444_20141227_143452_outLine +BABEL_OP3_402_76683_20141128_201732_inLine +BABEL_OP3_402_76683_20141128_201732_outLine +BABEL_OP3_402_78116_20141229_210212_inLine +BABEL_OP3_402_78116_20141229_210212_outLine +BABEL_OP3_402_78254_20141101_235022_inLine +BABEL_OP3_402_78254_20141101_235022_outLine +BABEL_OP3_402_78360_20150131_163647_inLine +BABEL_OP3_402_78360_20150131_163647_outLine +BABEL_OP3_402_78630_20140930_135924_inLine +BABEL_OP3_402_78630_20140930_135924_outLine +BABEL_OP3_402_79139_20141115_153558_inLine +BABEL_OP3_402_79139_20141115_153558_outLine +BABEL_OP3_402_79751_20141104_200346_inLine +BABEL_OP3_402_79751_20141104_200346_outLine +BABEL_OP3_402_79751_20141104_201600_inLine +BABEL_OP3_402_79751_20141104_201600_outLine +BABEL_OP3_402_80439_20141104_195124_inLine +BABEL_OP3_402_80439_20141104_195124_outLine +BABEL_OP3_402_81229_20141116_224932_inLine +BABEL_OP3_402_81229_20141116_224932_outLine +BABEL_OP3_402_81427_20141110_165047_inLine +BABEL_OP3_402_81427_20141110_165047_outLine +BABEL_OP3_402_82089_20141113_162038_inLine +BABEL_OP3_402_82089_20141113_162038_outLine +BABEL_OP3_402_82224_20150101_162311_inLine +BABEL_OP3_402_82224_20150101_162311_outLine +BABEL_OP3_402_82637_20141006_173314_inLine +BABEL_OP3_402_82637_20141006_173314_outLine +BABEL_OP3_402_83238_20141122_140740_inLine +BABEL_OP3_402_83238_20141122_140740_outLine +BABEL_OP3_402_83436_20141017_162042_inLine +BABEL_OP3_402_83436_20141017_162042_outLine +BABEL_OP3_402_83651_20141009_145412_inLine +BABEL_OP3_402_83651_20141009_145412_outLine +BABEL_OP3_402_84061_20141107_162356_inLine +BABEL_OP3_402_84061_20141107_162356_outLine +BABEL_OP3_402_84611_20141023_205020_inLine +BABEL_OP3_402_84611_20141023_205020_outLine +BABEL_OP3_402_84737_20150129_233418_inLine +BABEL_OP3_402_84737_20150129_233418_outLine +BABEL_OP3_402_84815_20141225_185456_inLine +BABEL_OP3_402_84815_20141225_185456_outLine +BABEL_OP3_402_85048_20141204_194855_inLine +BABEL_OP3_402_85048_20141204_194855_outLine +BABEL_OP3_402_85248_20150109_001722_inLine +BABEL_OP3_402_85248_20150109_001722_outLine +BABEL_OP3_402_85340_20141021_182050_inLine +BABEL_OP3_402_85340_20141021_182050_outLine +BABEL_OP3_402_86191_20141105_130254_inLine +BABEL_OP3_402_86191_20141105_130254_outLine +BABEL_OP3_402_86713_20150101_014831_inLine +BABEL_OP3_402_86713_20150101_014831_outLine +BABEL_OP3_402_86722_20141101_204411_inLine +BABEL_OP3_402_86722_20141101_204411_outLine +BABEL_OP3_402_86952_20141105_144737_inLine +BABEL_OP3_402_86952_20141105_144737_outLine +BABEL_OP3_402_87073_20140915_154336_inLine +BABEL_OP3_402_87073_20140915_154336_outLine +BABEL_OP3_402_87179_20150203_020351_inLine +BABEL_OP3_402_87179_20150203_020351_outLine +BABEL_OP3_402_87871_20141224_130949_inLine +BABEL_OP3_402_87871_20141224_130949_outLine +BABEL_OP3_402_88601_20141209_160621_inLine +BABEL_OP3_402_88601_20141209_160621_outLine +BABEL_OP3_402_88776_20140921_133554_inLine +BABEL_OP3_402_88776_20140921_133554_outLine +BABEL_OP3_402_88873_20140930_131622_inLine +BABEL_OP3_402_88873_20140930_131622_outLine +BABEL_OP3_402_89794_20141213_211839_inLine +BABEL_OP3_402_89794_20141213_211839_outLine +BABEL_OP3_402_89877_20150107_013739_inLine +BABEL_OP3_402_89877_20150107_013739_outLine +BABEL_OP3_402_89877_20150107_014426_inLine +BABEL_OP3_402_89877_20150107_014426_outLine +BABEL_OP3_402_90777_20141106_234557_inLine +BABEL_OP3_402_90777_20141106_234557_outLine +BABEL_OP3_402_91884_20150302_183207_inLine +BABEL_OP3_402_91884_20150302_183207_outLine +BABEL_OP3_402_91891_20150108_203636_inLine +BABEL_OP3_402_91891_20150108_203636_outLine +BABEL_OP3_402_91977_20141225_143539_inLine +BABEL_OP3_402_91977_20141225_143539_outLine +BABEL_OP3_402_92356_20150109_005846_inLine +BABEL_OP3_402_92356_20150109_005846_outLine +BABEL_OP3_402_92459_20141102_124516_inLine +BABEL_OP3_402_92459_20141102_124516_outLine +BABEL_OP3_402_92557_20150201_205110_inLine +BABEL_OP3_402_92557_20150201_205110_outLine +BABEL_OP3_402_92698_20141115_182138_inLine +BABEL_OP3_402_92698_20141115_182138_outLine +BABEL_OP3_402_93475_20141119_140615_inLine +BABEL_OP3_402_93475_20141119_140615_outLine +BABEL_OP3_402_93490_20150106_174211_inLine +BABEL_OP3_402_93490_20150106_174211_outLine +BABEL_OP3_402_93604_20150304_152208_inLine +BABEL_OP3_402_93604_20150304_152208_outLine +BABEL_OP3_402_93964_20141216_021155_inLine +BABEL_OP3_402_93964_20141216_021155_outLine +BABEL_OP3_402_94002_20141216_015659_inLine +BABEL_OP3_402_94002_20141216_015659_outLine +BABEL_OP3_402_94166_20150128_151103_inLine +BABEL_OP3_402_94166_20150128_151103_outLine +BABEL_OP3_402_94409_20141214_185032_inLine +BABEL_OP3_402_94409_20141214_185032_outLine +BABEL_OP3_402_94869_20140912_195117_inLine +BABEL_OP3_402_94869_20140912_195117_outLine +BABEL_OP3_402_94923_20141201_154601_inLine +BABEL_OP3_402_94923_20141201_154601_outLine +BABEL_OP3_402_95446_20150110_150658_inLine +BABEL_OP3_402_95446_20150110_150658_outLine +BABEL_OP3_402_96190_20141103_161533_inLine +BABEL_OP3_402_96190_20141103_161533_outLine +BABEL_OP3_402_96205_20141126_152921_inLine +BABEL_OP3_402_96205_20141126_152921_outLine +BABEL_OP3_402_97264_20150131_205411_inLine +BABEL_OP3_402_97264_20150131_205411_outLine +BABEL_OP3_402_97772_20140915_200919_inLine +BABEL_OP3_402_97772_20140915_200919_outLine +BABEL_OP3_402_97896_20141122_161128_inLine +BABEL_OP3_402_97896_20141122_161128_outLine +BABEL_OP3_402_98165_20141106_191239_inLine +BABEL_OP3_402_98165_20141106_191239_outLine +BABEL_OP3_402_98888_20141108_211953_inLine +BABEL_OP3_402_98888_20141108_211953_outLine +BABEL_OP3_402_99202_20141123_162817_inLine +BABEL_OP3_402_99202_20141123_162817_outLine +BABEL_OP3_402_99516_20140917_174712_inLine +BABEL_OP3_402_99516_20140917_174712_outLine +BABEL_OP3_402_99594_20141111_170413_inLine +BABEL_OP3_402_99594_20141111_170413_outLine +BABEL_OP3_402_99887_20150104_230431_inLine +BABEL_OP3_402_99887_20150104_230431_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/untranscribed-training.list b/egs/babel/s5d/conf/lists/402-javanese/untranscribed-training.list new file mode 100644 index 00000000000..f37a27dda8a --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/untranscribed-training.list @@ -0,0 +1,519 @@ +BABEL_OP3_402_10188_20140910_192244_inLine +BABEL_OP3_402_10411_20150414_130427_inLine +BABEL_OP3_402_10411_20150414_130427_outLine +BABEL_OP3_402_11352_20150313_163143_inLine +BABEL_OP3_402_11352_20150313_163143_outLine +BABEL_OP3_402_11797_20141023_002654_inLine +BABEL_OP3_402_11859_20150414_112255_inLine +BABEL_OP3_402_11859_20150414_112255_outLine +BABEL_OP3_402_12846_20150402_131845_inLine +BABEL_OP3_402_12846_20150402_131845_outLine +BABEL_OP3_402_13189_20150131_234926_inLine +BABEL_OP3_402_13189_20150131_234926_outLine +BABEL_OP3_402_13427_20141119_154114_outLine +BABEL_OP3_402_13483_20150212_013903_inLine +BABEL_OP3_402_13561_20141117_221410_inLine +BABEL_OP3_402_13561_20141117_221410_outLine +BABEL_OP3_402_13776_20150415_193538_inLine +BABEL_OP3_402_13776_20150415_193538_outLine +BABEL_OP3_402_13909_20150330_234000_inLine +BABEL_OP3_402_13909_20150330_234000_outLine +BABEL_OP3_402_14537_20150403_143039_inLine +BABEL_OP3_402_14537_20150403_143039_outLine +BABEL_OP3_402_14539_20150313_172051_inLine +BABEL_OP3_402_14539_20150313_172051_outLine +BABEL_OP3_402_15042_20150315_155219_inLine +BABEL_OP3_402_15042_20150315_155219_outLine +BABEL_OP3_402_15926_20141206_172139_inLine +BABEL_OP3_402_15926_20141206_172139_outLine +BABEL_OP3_402_17032_20141227_164236_inLine +BABEL_OP3_402_17032_20141227_164236_outLine +BABEL_OP3_402_17280_20141104_204141_inLine +BABEL_OP3_402_17440_20141226_140345_inLine +BABEL_OP3_402_17440_20141226_140345_outLine +BABEL_OP3_402_17615_20141214_223314_inLine +BABEL_OP3_402_17615_20141214_223314_outLine +BABEL_OP3_402_17615_20141214_231451_inLine +BABEL_OP3_402_17615_20141214_231451_outLine +BABEL_OP3_402_17923_20141107_150744_inLine +BABEL_OP3_402_18291_20150414_140952_inLine +BABEL_OP3_402_18291_20150414_140952_outLine +BABEL_OP3_402_18759_20150310_160837_inLine +BABEL_OP3_402_18759_20150310_160837_outLine +BABEL_OP3_402_19120_20150310_162359_inLine +BABEL_OP3_402_19120_20150310_162359_outLine +BABEL_OP3_402_19589_20150305_151023_inLine +BABEL_OP3_402_19589_20150305_151023_outLine +BABEL_OP3_402_19722_20141017_143042_inLine +BABEL_OP3_402_19722_20141017_143042_outLine +BABEL_OP3_402_19767_20150308_132354_inLine +BABEL_OP3_402_19767_20150308_132354_outLine +BABEL_OP3_402_19767_20150308_133241_inLine +BABEL_OP3_402_19767_20150308_133241_outLine +BABEL_OP3_402_19877_20150316_132035_inLine +BABEL_OP3_402_19877_20150316_132035_outLine +BABEL_OP3_402_19877_20150317_111220_inLine +BABEL_OP3_402_19877_20150317_111220_outLine +BABEL_OP3_402_21029_20141015_150255_inLine +BABEL_OP3_402_21029_20141015_150255_outLine +BABEL_OP3_402_21244_20150415_115154_inLine +BABEL_OP3_402_21244_20150415_115154_outLine +BABEL_OP3_402_22216_20141027_200224_inLine +BABEL_OP3_402_22612_20150106_171601_inLine +BABEL_OP3_402_22612_20150106_171601_outLine +BABEL_OP3_402_22641_20140919_145256_inLine +BABEL_OP3_402_22641_20140919_145256_outLine +BABEL_OP3_402_22826_20150316_130545_inLine +BABEL_OP3_402_22826_20150316_130545_outLine +BABEL_OP3_402_22965_20141011_161936_inLine +BABEL_OP3_402_23006_20141031_183901_inLine +BABEL_OP3_402_23006_20141031_185939_inLine +BABEL_OP3_402_23190_20141124_020320_inLine +BABEL_OP3_402_23980_20141115_204429_inLine +BABEL_OP3_402_23980_20141115_204429_outLine +BABEL_OP3_402_24231_20150302_210042_inLine +BABEL_OP3_402_24231_20150302_210042_outLine +BABEL_OP3_402_24569_20141226_184242_inLine +BABEL_OP3_402_24569_20141226_184242_outLine +BABEL_OP3_402_24586_20150315_000448_inLine +BABEL_OP3_402_24586_20150315_000448_outLine +BABEL_OP3_402_25961_20140916_191139_inLine +BABEL_OP3_402_25961_20140916_191139_outLine +BABEL_OP3_402_26574_20150105_134517_inLine +BABEL_OP3_402_26836_20141007_171841_inLine +BABEL_OP3_402_26836_20141007_171841_outLine +BABEL_OP3_402_27082_20141116_214244_inLine +BABEL_OP3_402_27082_20141116_214244_outLine +BABEL_OP3_402_27203_20150131_212241_inLine +BABEL_OP3_402_27478_20150310_153450_inLine +BABEL_OP3_402_27478_20150310_153450_outLine +BABEL_OP3_402_27478_20150310_154447_inLine +BABEL_OP3_402_27478_20150310_154447_outLine +BABEL_OP3_402_27490_20150312_182925_inLine +BABEL_OP3_402_27490_20150312_182925_outLine +BABEL_OP3_402_28422_20141206_152718_inLine +BABEL_OP3_402_28422_20141206_152718_outLine +BABEL_OP3_402_28477_20150108_171338_inLine +BABEL_OP3_402_28477_20150108_171338_outLine +BABEL_OP3_402_28522_20141208_162113_inLine +BABEL_OP3_402_28522_20141208_162113_outLine +BABEL_OP3_402_28585_20150103_145903_inLine +BABEL_OP3_402_28585_20150103_145903_outLine +BABEL_OP3_402_28775_20141028_172233_inLine +BABEL_OP3_402_28775_20141028_172233_outLine +BABEL_OP3_402_29039_20141226_192341_inLine +BABEL_OP3_402_29039_20141226_192341_outLine +BABEL_OP3_402_29352_20150416_231130_inLine +BABEL_OP3_402_29352_20150416_231130_outLine +BABEL_OP3_402_29416_20150111_173045_inLine +BABEL_OP3_402_29416_20150111_173045_outLine +BABEL_OP3_402_29439_20150309_224912_inLine +BABEL_OP3_402_29439_20150309_224912_outLine +BABEL_OP3_402_30058_20150214_030757_inLine +BABEL_OP3_402_30058_20150214_030757_outLine +BABEL_OP3_402_30084_20150330_183021_inLine +BABEL_OP3_402_30084_20150330_183021_outLine +BABEL_OP3_402_30180_20141122_150213_inLine +BABEL_OP3_402_30497_20150315_020936_inLine +BABEL_OP3_402_30497_20150315_020936_outLine +BABEL_OP3_402_31484_20150108_183810_inLine +BABEL_OP3_402_31484_20150108_183810_outLine +BABEL_OP3_402_32122_20141113_151807_inLine +BABEL_OP3_402_32122_20141113_151807_outLine +BABEL_OP3_402_32861_20150131_115104_inLine +BABEL_OP3_402_32861_20150131_115104_outLine +BABEL_OP3_402_33216_20150417_123910_inLine +BABEL_OP3_402_33216_20150417_123910_outLine +BABEL_OP3_402_33424_20150330_235558_inLine +BABEL_OP3_402_33424_20150330_235558_outLine +BABEL_OP3_402_33424_20150331_001041_inLine +BABEL_OP3_402_33424_20150331_001041_outLine +BABEL_OP3_402_33476_20141115_210108_inLine +BABEL_OP3_402_33476_20141115_210108_outLine +BABEL_OP3_402_34486_20150412_192213_inLine +BABEL_OP3_402_34486_20150412_192213_outLine +BABEL_OP3_402_34826_20150112_001042_inLine +BABEL_OP3_402_34826_20150112_001042_outLine +BABEL_OP3_402_35467_20141028_184845_inLine +BABEL_OP3_402_35467_20141028_184845_outLine +BABEL_OP3_402_35885_20150323_112854_inLine +BABEL_OP3_402_35885_20150323_112854_outLine +BABEL_OP3_402_35885_20150323_114745_inLine +BABEL_OP3_402_35885_20150323_114745_outLine +BABEL_OP3_402_36017_20150314_194053_inLine +BABEL_OP3_402_36017_20150314_194053_outLine +BABEL_OP3_402_36039_20150216_122319_inLine +BABEL_OP3_402_36039_20150216_122319_outLine +BABEL_OP3_402_36059_20150314_212955_inLine +BABEL_OP3_402_36059_20150314_212955_outLine +BABEL_OP3_402_36594_20150215_003738_inLine +BABEL_OP3_402_36594_20150215_003738_outLine +BABEL_OP3_402_36642_20150319_114619_inLine +BABEL_OP3_402_36642_20150319_114619_outLine +BABEL_OP3_402_38689_20141217_212559_inLine +BABEL_OP3_402_38689_20141217_212559_outLine +BABEL_OP3_402_38979_20150215_215359_inLine +BABEL_OP3_402_38979_20150215_215359_outLine +BABEL_OP3_402_41174_20141115_214537_inLine +BABEL_OP3_402_41174_20141115_214537_outLine +BABEL_OP3_402_41272_20150314_043848_inLine +BABEL_OP3_402_41272_20150314_043848_outLine +BABEL_OP3_402_41469_20140919_165056_inLine +BABEL_OP3_402_41469_20140919_165056_outLine +BABEL_OP3_402_41542_20150131_173858_inLine +BABEL_OP3_402_41890_20150304_001538_inLine +BABEL_OP3_402_41890_20150304_001538_outLine +BABEL_OP3_402_42155_20141204_014246_inLine +BABEL_OP3_402_42155_20141204_014246_outLine +BABEL_OP3_402_42231_20150108_013906_inLine +BABEL_OP3_402_42231_20150108_013906_outLine +BABEL_OP3_402_42299_20150415_172418_inLine +BABEL_OP3_402_42299_20150415_172418_outLine +BABEL_OP3_402_42497_20141011_142043_inLine +BABEL_OP3_402_42771_20141125_154524_inLine +BABEL_OP3_402_42771_20141125_154524_outLine +BABEL_OP3_402_43285_20141208_172008_inLine +BABEL_OP3_402_43285_20141208_172008_outLine +BABEL_OP3_402_43388_20141124_193332_inLine +BABEL_OP3_402_43388_20141124_193332_outLine +BABEL_OP3_402_44114_20150416_181819_inLine +BABEL_OP3_402_44114_20150416_181819_outLine +BABEL_OP3_402_44114_20150416_183630_inLine +BABEL_OP3_402_44114_20150416_183630_outLine +BABEL_OP3_402_45140_20150314_190952_inLine +BABEL_OP3_402_45140_20150314_190952_outLine +BABEL_OP3_402_45235_20150201_004752_inLine +BABEL_OP3_402_45235_20150201_004752_outLine +BABEL_OP3_402_45771_20150412_195546_inLine +BABEL_OP3_402_45771_20150412_195546_outLine +BABEL_OP3_402_45777_20141106_211401_inLine +BABEL_OP3_402_45777_20141106_211401_outLine +BABEL_OP3_402_45851_20150315_161428_inLine +BABEL_OP3_402_45851_20150315_161428_outLine +BABEL_OP3_402_45851_20150315_162642_inLine +BABEL_OP3_402_45851_20150315_162642_outLine +BABEL_OP3_402_46066_20150103_140632_inLine +BABEL_OP3_402_46066_20150103_140632_outLine +BABEL_OP3_402_46169_20141229_163719_inLine +BABEL_OP3_402_46169_20141229_163719_outLine +BABEL_OP3_402_46315_20141229_191221_inLine +BABEL_OP3_402_46315_20141229_191221_outLine +BABEL_OP3_402_46330_20150112_002124_inLine +BABEL_OP3_402_46330_20150112_002124_outLine +BABEL_OP3_402_46589_20141217_181108_inLine +BABEL_OP3_402_46589_20141217_181108_outLine +BABEL_OP3_402_47215_20141013_000842_inLine +BABEL_OP3_402_47487_20141110_190705_inLine +BABEL_OP3_402_47487_20141110_190705_outLine +BABEL_OP3_402_47802_20141201_001110_inLine +BABEL_OP3_402_47802_20141201_001110_outLine +BABEL_OP3_402_47878_20141124_200607_inLine +BABEL_OP3_402_47878_20141124_200607_outLine +BABEL_OP3_402_48016_20150417_192509_inLine +BABEL_OP3_402_48016_20150417_192509_outLine +BABEL_OP3_402_48758_20150313_180048_inLine +BABEL_OP3_402_48758_20150313_180048_outLine +BABEL_OP3_402_48907_20150308_125109_inLine +BABEL_OP3_402_48907_20150308_125109_outLine +BABEL_OP3_402_49216_20141102_152914_inLine +BABEL_OP3_402_49216_20141102_152914_outLine +BABEL_OP3_402_49767_20150416_181833_inLine +BABEL_OP3_402_49767_20150416_181833_outLine +BABEL_OP3_402_49945_20150326_155920_inLine +BABEL_OP3_402_49945_20150326_155920_outLine +BABEL_OP3_402_50745_20150314_184529_inLine +BABEL_OP3_402_50745_20150314_184529_outLine +BABEL_OP3_402_51417_20150106_233844_outLine +BABEL_OP3_402_51819_20150108_181637_inLine +BABEL_OP3_402_51819_20150108_181637_outLine +BABEL_OP3_402_52381_20150117_151106_inLine +BABEL_OP3_402_52447_20150331_155658_inLine +BABEL_OP3_402_52447_20150331_155658_outLine +BABEL_OP3_402_52614_20150305_202702_inLine +BABEL_OP3_402_52614_20150305_202702_outLine +BABEL_OP3_402_53665_20150305_203655_inLine +BABEL_OP3_402_53665_20150305_203655_outLine +BABEL_OP3_402_54390_20141022_220633_inLine +BABEL_OP3_402_54477_20141224_161244_inLine +BABEL_OP3_402_54567_20141116_185111_inLine +BABEL_OP3_402_54567_20141116_185111_outLine +BABEL_OP3_402_54827_20150316_003319_inLine +BABEL_OP3_402_54827_20150316_003319_outLine +BABEL_OP3_402_54827_20150316_134423_inLine +BABEL_OP3_402_54827_20150316_134423_outLine +BABEL_OP3_402_55013_20150305_194735_inLine +BABEL_OP3_402_55013_20150305_194735_outLine +BABEL_OP3_402_55106_20150203_162853_inLine +BABEL_OP3_402_55106_20150203_162853_outLine +BABEL_OP3_402_55259_20141105_180934_inLine +BABEL_OP3_402_55259_20141105_180934_outLine +BABEL_OP3_402_55349_20150310_153012_inLine +BABEL_OP3_402_55349_20150310_153012_outLine +BABEL_OP3_402_55381_20150204_201519_inLine +BABEL_OP3_402_55381_20150204_203110_inLine +BABEL_OP3_402_56076_20150306_190854_inLine +BABEL_OP3_402_56076_20150306_190854_outLine +BABEL_OP3_402_56307_20141206_163118_inLine +BABEL_OP3_402_56307_20141206_163118_outLine +BABEL_OP3_402_56523_20141119_203619_inLine +BABEL_OP3_402_56523_20141119_203619_outLine +BABEL_OP3_402_57067_20150103_190024_inLine +BABEL_OP3_402_57067_20150103_190024_outLine +BABEL_OP3_402_57464_20150314_153140_inLine +BABEL_OP3_402_57464_20150314_153140_outLine +BABEL_OP3_402_57548_20141121_144924_inLine +BABEL_OP3_402_57548_20141121_144924_outLine +BABEL_OP3_402_57566_20150215_212628_inLine +BABEL_OP3_402_57566_20150215_212628_outLine +BABEL_OP3_402_58047_20141120_201112_inLine +BABEL_OP3_402_58047_20141120_201112_outLine +BABEL_OP3_402_58107_20141228_011533_inLine +BABEL_OP3_402_58107_20141228_014953_inLine +BABEL_OP3_402_58145_20141207_150852_inLine +BABEL_OP3_402_58145_20141207_150852_outLine +BABEL_OP3_402_58585_20150131_223219_inLine +BABEL_OP3_402_58585_20150131_223219_outLine +BABEL_OP3_402_58717_20150204_003429_inLine +BABEL_OP3_402_58717_20150204_003429_outLine +BABEL_OP3_402_58815_20150106_203552_inLine +BABEL_OP3_402_58815_20150106_203552_outLine +BABEL_OP3_402_58821_20150112_000647_inLine +BABEL_OP3_402_58821_20150112_000647_outLine +BABEL_OP3_402_59028_20150331_004006_inLine +BABEL_OP3_402_59028_20150331_004006_outLine +BABEL_OP3_402_59291_20150114_175706_inLine +BABEL_OP3_402_59291_20150114_175706_outLine +BABEL_OP3_402_59635_20150101_154832_inLine +BABEL_OP3_402_59635_20150101_154832_outLine +BABEL_OP3_402_59898_20141103_202730_outLine +BABEL_OP3_402_60115_20141206_190510_inLine +BABEL_OP3_402_60115_20141206_190510_outLine +BABEL_OP3_402_60299_20150413_191144_inLine +BABEL_OP3_402_60299_20150413_191144_outLine +BABEL_OP3_402_60310_20141224_122329_inLine +BABEL_OP3_402_60310_20141224_122329_outLine +BABEL_OP3_402_60436_20150305_163917_inLine +BABEL_OP3_402_60436_20150305_163917_outLine +BABEL_OP3_402_60498_20150402_122035_inLine +BABEL_OP3_402_60498_20150402_122035_outLine +BABEL_OP3_402_61348_20141112_165406_outLine +BABEL_OP3_402_61348_20141116_174305_outLine +BABEL_OP3_402_61348_20141116_175022_outLine +BABEL_OP3_402_61678_20141126_211128_inLine +BABEL_OP3_402_61963_20150201_203302_inLine +BABEL_OP3_402_61963_20150201_203302_outLine +BABEL_OP3_402_61971_20150311_025217_inLine +BABEL_OP3_402_61971_20150311_025217_outLine +BABEL_OP3_402_61971_20150311_032439_inLine +BABEL_OP3_402_61971_20150311_032439_outLine +BABEL_OP3_402_62362_20150415_200843_inLine +BABEL_OP3_402_62362_20150415_200843_outLine +BABEL_OP3_402_62724_20141225_182011_inLine +BABEL_OP3_402_62724_20141225_182011_outLine +BABEL_OP3_402_62734_20141110_172820_inLine +BABEL_OP3_402_62734_20141110_172820_outLine +BABEL_OP3_402_63307_20141122_114633_outLine +BABEL_OP3_402_63445_20140925_160334_inLine +BABEL_OP3_402_63445_20140925_160334_outLine +BABEL_OP3_402_63523_20150409_150241_inLine +BABEL_OP3_402_63523_20150409_150241_outLine +BABEL_OP3_402_63648_20150317_142220_inLine +BABEL_OP3_402_63648_20150317_142220_outLine +BABEL_OP3_402_63648_20150317_143418_inLine +BABEL_OP3_402_63648_20150317_143418_outLine +BABEL_OP3_402_63757_20141127_190053_inLine +BABEL_OP3_402_63938_20150216_054808_inLine +BABEL_OP3_402_63938_20150216_054808_outLine +BABEL_OP3_402_63999_20150329_140522_inLine +BABEL_OP3_402_63999_20150329_140522_outLine +BABEL_OP3_402_63999_20150329_144023_inLine +BABEL_OP3_402_63999_20150329_144023_outLine +BABEL_OP3_402_64722_20150409_172232_inLine +BABEL_OP3_402_64722_20150409_172232_outLine +BABEL_OP3_402_65077_20140915_211109_inLine +BABEL_OP3_402_65561_20150108_014921_inLine +BABEL_OP3_402_65561_20150108_014921_outLine +BABEL_OP3_402_65640_20150313_234015_inLine +BABEL_OP3_402_65640_20150313_234015_outLine +BABEL_OP3_402_66305_20150314_195357_inLine +BABEL_OP3_402_66305_20150314_195357_outLine +BABEL_OP3_402_66971_20150327_134302_inLine +BABEL_OP3_402_66971_20150327_134302_outLine +BABEL_OP3_402_67085_20150307_155234_inLine +BABEL_OP3_402_67085_20150307_155234_outLine +BABEL_OP3_402_67622_20141001_173720_inLine +BABEL_OP3_402_67622_20141001_173720_outLine +BABEL_OP3_402_67659_20141102_162850_inLine +BABEL_OP3_402_67659_20141102_162850_outLine +BABEL_OP3_402_67964_20150313_144207_inLine +BABEL_OP3_402_67964_20150313_144207_outLine +BABEL_OP3_402_67999_20150103_202040_outLine +BABEL_OP3_402_68748_20150131_213425_inLine +BABEL_OP3_402_68748_20150131_213425_outLine +BABEL_OP3_402_68924_20141213_175705_inLine +BABEL_OP3_402_68924_20141213_175705_outLine +BABEL_OP3_402_69633_20150103_195020_inLine +BABEL_OP3_402_69633_20150103_195020_outLine +BABEL_OP3_402_70110_20140927_010427_inLine +BABEL_OP3_402_70110_20140927_010427_outLine +BABEL_OP3_402_71038_20150203_000908_inLine +BABEL_OP3_402_71038_20150203_000908_outLine +BABEL_OP3_402_71189_20150411_001925_inLine +BABEL_OP3_402_71189_20150411_001925_outLine +BABEL_OP3_402_71333_20141103_014203_inLine +BABEL_OP3_402_71333_20141103_014203_outLine +BABEL_OP3_402_71780_20141019_173617_inLine +BABEL_OP3_402_71780_20141019_173617_outLine +BABEL_OP3_402_71850_20150415_111357_inLine +BABEL_OP3_402_71850_20150415_111357_outLine +BABEL_OP3_402_73005_20150307_164753_inLine +BABEL_OP3_402_73005_20150307_164753_outLine +BABEL_OP3_402_73430_20150208_181645_inLine +BABEL_OP3_402_73430_20150208_181645_outLine +BABEL_OP3_402_73446_20150412_180706_inLine +BABEL_OP3_402_73446_20150412_180706_outLine +BABEL_OP3_402_73518_20150103_211617_inLine +BABEL_OP3_402_73518_20150103_211617_outLine +BABEL_OP3_402_74641_20141124_185314_inLine +BABEL_OP3_402_74641_20141124_185314_outLine +BABEL_OP3_402_75981_20150327_130110_inLine +BABEL_OP3_402_75981_20150327_130110_outLine +BABEL_OP3_402_76218_20141110_195047_inLine +BABEL_OP3_402_76218_20141110_195047_outLine +BABEL_OP3_402_77744_20141105_195905_inLine +BABEL_OP3_402_77744_20141105_195905_outLine +BABEL_OP3_402_77974_20150305_190614_inLine +BABEL_OP3_402_78016_20141104_182140_outLine +BABEL_OP3_402_78016_20141104_194136_outLine +BABEL_OP3_402_78511_20141228_004040_inLine +BABEL_OP3_402_78511_20141228_010153_inLine +BABEL_OP3_402_78976_20141030_185556_inLine +BABEL_OP3_402_78976_20141030_185556_outLine +BABEL_OP3_402_79045_20150114_155912_inLine +BABEL_OP3_402_79045_20150114_155912_outLine +BABEL_OP3_402_79080_20150110_190015_inLine +BABEL_OP3_402_79080_20150110_190015_outLine +BABEL_OP3_402_79129_20141130_180012_outLine +BABEL_OP3_402_79660_20150407_192210_inLine +BABEL_OP3_402_79660_20150407_192210_outLine +BABEL_OP3_402_80655_20150314_181243_inLine +BABEL_OP3_402_80655_20150314_181243_outLine +BABEL_OP3_402_81149_20150314_060925_inLine +BABEL_OP3_402_81149_20150314_060925_outLine +BABEL_OP3_402_81287_20141217_002122_inLine +BABEL_OP3_402_81287_20141217_002122_outLine +BABEL_OP3_402_81671_20141230_233802_inLine +BABEL_OP3_402_81671_20141230_233802_outLine +BABEL_OP3_402_82035_20141221_140850_inLine +BABEL_OP3_402_82035_20141221_140850_outLine +BABEL_OP3_402_82391_20150105_161651_inLine +BABEL_OP3_402_82391_20150105_161651_outLine +BABEL_OP3_402_82473_20141013_175410_inLine +BABEL_OP3_402_82473_20141013_175410_outLine +BABEL_OP3_402_82622_20141104_150303_inLine +BABEL_OP3_402_82742_20141224_153706_inLine +BABEL_OP3_402_82742_20141224_153706_outLine +BABEL_OP3_402_82904_20150314_144007_inLine +BABEL_OP3_402_82904_20150314_144007_outLine +BABEL_OP3_402_82979_20141007_015257_inLine +BABEL_OP3_402_82979_20141007_015257_outLine +BABEL_OP3_402_83394_20150413_180513_inLine +BABEL_OP3_402_83394_20150413_180513_outLine +BABEL_OP3_402_83455_20141222_014307_inLine +BABEL_OP3_402_83455_20141222_014307_outLine +BABEL_OP3_402_83545_20150306_214611_inLine +BABEL_OP3_402_83545_20150306_214611_outLine +BABEL_OP3_402_83851_20141101_203855_outLine +BABEL_OP3_402_84339_20150309_193354_inLine +BABEL_OP3_402_84339_20150309_193354_outLine +BABEL_OP3_402_84466_20150311_162506_inLine +BABEL_OP3_402_84466_20150311_162506_outLine +BABEL_OP3_402_84466_20150311_164841_inLine +BABEL_OP3_402_84466_20150311_164841_outLine +BABEL_OP3_402_84547_20141031_011002_inLine +BABEL_OP3_402_84547_20141031_011002_outLine +BABEL_OP3_402_85028_20150203_222949_inLine +BABEL_OP3_402_85028_20150203_222949_outLine +BABEL_OP3_402_85519_20150205_150346_inLine +BABEL_OP3_402_85519_20150205_150346_outLine +BABEL_OP3_402_85647_20141202_010613_inLine +BABEL_OP3_402_85647_20141202_010613_outLine +BABEL_OP3_402_85651_20150111_195212_inLine +BABEL_OP3_402_85651_20150111_195212_outLine +BABEL_OP3_402_86557_20140919_134613_inLine +BABEL_OP3_402_86557_20140919_134613_outLine +BABEL_OP3_402_86597_20150415_233059_inLine +BABEL_OP3_402_86597_20150415_233059_outLine +BABEL_OP3_402_86628_20150406_142110_inLine +BABEL_OP3_402_86628_20150406_142110_outLine +BABEL_OP3_402_86676_20141208_175123_inLine +BABEL_OP3_402_86676_20141208_175123_outLine +BABEL_OP3_402_86826_20150414_193901_inLine +BABEL_OP3_402_86826_20150414_193901_outLine +BABEL_OP3_402_86830_20150131_191140_inLine +BABEL_OP3_402_87305_20150415_152636_outLine +BABEL_OP3_402_87545_20150308_173713_inLine +BABEL_OP3_402_87545_20150308_173713_outLine +BABEL_OP3_402_87693_20141105_154104_inLine +BABEL_OP3_402_87693_20141105_154104_outLine +BABEL_OP3_402_88661_20141206_194640_inLine +BABEL_OP3_402_88661_20141206_194640_outLine +BABEL_OP3_402_88661_20141206_195854_inLine +BABEL_OP3_402_88661_20141206_195854_outLine +BABEL_OP3_402_88661_20141206_200827_inLine +BABEL_OP3_402_88661_20141206_200827_outLine +BABEL_OP3_402_88783_20141228_150438_inLine +BABEL_OP3_402_88783_20141228_150438_outLine +BABEL_OP3_402_88865_20150316_142749_inLine +BABEL_OP3_402_88865_20150316_142749_outLine +BABEL_OP3_402_88938_20150104_171742_inLine +BABEL_OP3_402_88938_20150104_171742_outLine +BABEL_OP3_402_89560_20150106_231355_inLine +BABEL_OP3_402_89560_20150106_231355_outLine +BABEL_OP3_402_89695_20141115_012527_outLine +BABEL_OP3_402_91372_20150306_193038_inLine +BABEL_OP3_402_91372_20150306_193038_outLine +BABEL_OP3_402_92077_20150313_145153_inLine +BABEL_OP3_402_92077_20150313_145153_outLine +BABEL_OP3_402_92736_20150106_183108_inLine +BABEL_OP3_402_92736_20150106_183108_outLine +BABEL_OP3_402_92809_20140924_164438_inLine +BABEL_OP3_402_93469_20150308_223956_inLine +BABEL_OP3_402_93469_20150308_223956_outLine +BABEL_OP3_402_93515_20150318_184223_inLine +BABEL_OP3_402_93515_20150318_184223_outLine +BABEL_OP3_402_93861_20141126_021459_outLine +BABEL_OP3_402_93861_20141202_013129_outLine +BABEL_OP3_402_94141_20150312_184456_inLine +BABEL_OP3_402_94141_20150312_184456_outLine +BABEL_OP3_402_94237_20150319_141146_inLine +BABEL_OP3_402_94237_20150319_141146_outLine +BABEL_OP3_402_94262_20150308_140603_inLine +BABEL_OP3_402_94262_20150308_140603_outLine +BABEL_OP3_402_94442_20150326_164734_inLine +BABEL_OP3_402_94442_20150326_164734_outLine +BABEL_OP3_402_94449_20150315_122812_inLine +BABEL_OP3_402_94449_20150315_122812_outLine +BABEL_OP3_402_94465_20141227_155756_inLine +BABEL_OP3_402_94465_20141227_155756_outLine +BABEL_OP3_402_94487_20150312_163837_inLine +BABEL_OP3_402_94487_20150312_163837_outLine +BABEL_OP3_402_94587_20150128_234118_inLine +BABEL_OP3_402_94587_20150128_234118_outLine +BABEL_OP3_402_94745_20141214_225333_inLine +BABEL_OP3_402_94745_20141214_225333_outLine +BABEL_OP3_402_95935_20150106_123341_inLine +BABEL_OP3_402_95935_20150106_123341_outLine +BABEL_OP3_402_95966_20141110_203915_inLine +BABEL_OP3_402_95966_20141110_203915_outLine +BABEL_OP3_402_96446_20141106_013329_inLine +BABEL_OP3_402_96446_20141106_013329_outLine +BABEL_OP3_402_96525_20150102_120919_inLine +BABEL_OP3_402_96842_20150327_193159_inLine +BABEL_OP3_402_96842_20150327_193159_outLine +BABEL_OP3_402_97363_20140929_125711_outLine +BABEL_OP3_402_97731_20150102_215016_outLine +BABEL_OP3_402_98365_20141120_164222_inLine +BABEL_OP3_402_98506_20150314_191311_inLine +BABEL_OP3_402_98506_20150314_191311_outLine +BABEL_OP3_402_99732_20141224_145056_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/dev.2h.list b/egs/babel/s5d/conf/lists/403-dholuo/dev.2h.list new file mode 100644 index 00000000000..195f3e16bf3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/dev.2h.list @@ -0,0 +1,122 @@ +BABEL_OP3_403_10019_20141027_010545_inLine +BABEL_OP3_403_10019_20141027_010545_outLine +BABEL_OP3_403_12220_20141026_204025_inLine +BABEL_OP3_403_12220_20141026_204025_outLine +BABEL_OP3_403_13178_20141128_223039_inLine +BABEL_OP3_403_13178_20141128_223039_outLine +BABEL_OP3_403_14440_20141129_004855_inLine +BABEL_OP3_403_14440_20141129_004855_outLine +BABEL_OP3_403_15042_20150313_165638_inLine +BABEL_OP3_403_15042_20150313_165638_outLine +BABEL_OP3_403_17440_20141210_204026_inLine +BABEL_OP3_403_17440_20141210_204026_outLine +BABEL_OP3_403_17440_20141210_204535_inLine +BABEL_OP3_403_17440_20141210_204535_outLine +BABEL_OP3_403_19663_20141029_190739_inLine +BABEL_OP3_403_19663_20141029_190739_outLine +BABEL_OP3_403_19782_20141216_211916_inLine +BABEL_OP3_403_19782_20141216_211916_outLine +BABEL_OP3_403_22216_20141014_202442_inLine +BABEL_OP3_403_22216_20141014_202442_outLine +BABEL_OP3_403_23151_20150108_032700_inLine +BABEL_OP3_403_23151_20150108_032700_outLine +BABEL_OP3_403_25012_20150201_000040_inLine +BABEL_OP3_403_25012_20150201_000040_outLine +BABEL_OP3_403_28606_20141205_184257_inLine +BABEL_OP3_403_28606_20141205_184257_outLine +BABEL_OP3_403_32727_20141210_200505_inLine +BABEL_OP3_403_32727_20141210_200505_outLine +BABEL_OP3_403_33175_20141014_202944_inLine +BABEL_OP3_403_33175_20141014_202944_outLine +BABEL_OP3_403_33251_20141118_224420_inLine +BABEL_OP3_403_33251_20141118_224420_outLine +BABEL_OP3_403_34564_20141212_001647_inLine +BABEL_OP3_403_34564_20141212_001647_outLine +BABEL_OP3_403_36341_20141013_224204_inLine +BABEL_OP3_403_36341_20141013_224204_outLine +BABEL_OP3_403_41100_20141006_230147_inLine +BABEL_OP3_403_41100_20141006_230147_outLine +BABEL_OP3_403_42243_20141016_231219_inLine +BABEL_OP3_403_42243_20141016_231219_outLine +BABEL_OP3_403_42497_20141004_235231_inLine +BABEL_OP3_403_42497_20141004_235231_outLine +BABEL_OP3_403_43388_20141028_212938_inLine +BABEL_OP3_403_43388_20141028_212938_outLine +BABEL_OP3_403_44847_20141127_190752_inLine +BABEL_OP3_403_44847_20141127_190752_outLine +BABEL_OP3_403_45560_20141012_204242_inLine +BABEL_OP3_403_45560_20141012_204242_outLine +BABEL_OP3_403_45697_20150211_181356_inLine +BABEL_OP3_403_45697_20150211_181356_outLine +BABEL_OP3_403_46881_20141014_210231_inLine +BABEL_OP3_403_46881_20141014_210231_outLine +BABEL_OP3_403_47877_20150105_200005_inLine +BABEL_OP3_403_47877_20150105_200005_outLine +BABEL_OP3_403_47882_20150131_215134_inLine +BABEL_OP3_403_47882_20150131_215134_outLine +BABEL_OP3_403_48789_20141031_205407_inLine +BABEL_OP3_403_48789_20141031_205407_outLine +BABEL_OP3_403_49502_20141013_230428_inLine +BABEL_OP3_403_49502_20141013_230428_outLine +BABEL_OP3_403_49902_20141025_214609_inLine +BABEL_OP3_403_49902_20141025_214609_outLine +BABEL_OP3_403_50726_20141015_222945_inLine +BABEL_OP3_403_50726_20141015_222945_outLine +BABEL_OP3_403_52438_20141005_211825_inLine +BABEL_OP3_403_52438_20141005_211825_outLine +BABEL_OP3_403_54160_20141012_225050_inLine +BABEL_OP3_403_54160_20141012_225050_outLine +BABEL_OP3_403_56090_20141001_220534_inLine +BABEL_OP3_403_56090_20141001_220534_outLine +BABEL_OP3_403_58850_20141030_190407_inLine +BABEL_OP3_403_58850_20141030_190407_outLine +BABEL_OP3_403_60538_20141007_015704_inLine +BABEL_OP3_403_60538_20141007_015704_outLine +BABEL_OP3_403_60706_20141014_225721_inLine +BABEL_OP3_403_60706_20141014_225721_outLine +BABEL_OP3_403_61225_20141014_225524_inLine +BABEL_OP3_403_61225_20141014_225524_outLine +BABEL_OP3_403_62456_20141107_224816_inLine +BABEL_OP3_403_62456_20141107_224816_outLine +BABEL_OP3_403_62545_20150203_205015_inLine +BABEL_OP3_403_62545_20150203_205015_outLine +BABEL_OP3_403_63081_20141013_184721_inLine +BABEL_OP3_403_63081_20141013_184721_outLine +BABEL_OP3_403_63938_20150304_184136_inLine +BABEL_OP3_403_63938_20150304_184136_outLine +BABEL_OP3_403_65723_20141004_231950_inLine +BABEL_OP3_403_65723_20141004_231950_outLine +BABEL_OP3_403_65882_20141005_214649_inLine +BABEL_OP3_403_65882_20141005_214649_outLine +BABEL_OP3_403_66026_20141207_212517_inLine +BABEL_OP3_403_66026_20141207_212517_outLine +BABEL_OP3_403_68306_20141206_183801_inLine +BABEL_OP3_403_68306_20141206_183801_outLine +BABEL_OP3_403_70110_20141016_195210_inLine +BABEL_OP3_403_70110_20141016_195210_outLine +BABEL_OP3_403_71780_20141006_005652_inLine +BABEL_OP3_403_71780_20141006_005652_outLine +BABEL_OP3_403_72349_20150313_194307_inLine +BABEL_OP3_403_72349_20150313_194307_outLine +BABEL_OP3_403_78877_20150203_012549_inLine +BABEL_OP3_403_78877_20150203_012549_outLine +BABEL_OP3_403_79820_20141005_212016_inLine +BABEL_OP3_403_79820_20141005_212016_outLine +BABEL_OP3_403_87280_20141217_230121_inLine +BABEL_OP3_403_87280_20141217_230121_outLine +BABEL_OP3_403_88938_20141219_211017_inLine +BABEL_OP3_403_88938_20141219_211017_outLine +BABEL_OP3_403_90777_20141028_012959_inLine +BABEL_OP3_403_90777_20141028_012959_outLine +BABEL_OP3_403_92356_20150305_033040_inLine +BABEL_OP3_403_92356_20150305_033040_outLine +BABEL_OP3_403_94035_20150201_183321_inLine +BABEL_OP3_403_94035_20150201_183321_outLine +BABEL_OP3_403_96446_20141013_215249_inLine +BABEL_OP3_403_96446_20141013_215249_outLine +BABEL_OP3_403_97264_20141220_220653_inLine +BABEL_OP3_403_97264_20141220_220653_outLine +BABEL_OP3_403_97849_20150313_175528_inLine +BABEL_OP3_403_97849_20150313_175528_outLine +BABEL_OP3_403_99813_20141106_211637_inLine +BABEL_OP3_403_99813_20141106_211637_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/dev.list b/egs/babel/s5d/conf/lists/403-dholuo/dev.list new file mode 100644 index 00000000000..195f3e16bf3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/dev.list @@ -0,0 +1,122 @@ +BABEL_OP3_403_10019_20141027_010545_inLine +BABEL_OP3_403_10019_20141027_010545_outLine +BABEL_OP3_403_12220_20141026_204025_inLine +BABEL_OP3_403_12220_20141026_204025_outLine +BABEL_OP3_403_13178_20141128_223039_inLine +BABEL_OP3_403_13178_20141128_223039_outLine +BABEL_OP3_403_14440_20141129_004855_inLine +BABEL_OP3_403_14440_20141129_004855_outLine +BABEL_OP3_403_15042_20150313_165638_inLine +BABEL_OP3_403_15042_20150313_165638_outLine +BABEL_OP3_403_17440_20141210_204026_inLine +BABEL_OP3_403_17440_20141210_204026_outLine +BABEL_OP3_403_17440_20141210_204535_inLine +BABEL_OP3_403_17440_20141210_204535_outLine +BABEL_OP3_403_19663_20141029_190739_inLine +BABEL_OP3_403_19663_20141029_190739_outLine +BABEL_OP3_403_19782_20141216_211916_inLine +BABEL_OP3_403_19782_20141216_211916_outLine +BABEL_OP3_403_22216_20141014_202442_inLine +BABEL_OP3_403_22216_20141014_202442_outLine +BABEL_OP3_403_23151_20150108_032700_inLine +BABEL_OP3_403_23151_20150108_032700_outLine +BABEL_OP3_403_25012_20150201_000040_inLine +BABEL_OP3_403_25012_20150201_000040_outLine +BABEL_OP3_403_28606_20141205_184257_inLine +BABEL_OP3_403_28606_20141205_184257_outLine +BABEL_OP3_403_32727_20141210_200505_inLine +BABEL_OP3_403_32727_20141210_200505_outLine +BABEL_OP3_403_33175_20141014_202944_inLine +BABEL_OP3_403_33175_20141014_202944_outLine +BABEL_OP3_403_33251_20141118_224420_inLine +BABEL_OP3_403_33251_20141118_224420_outLine +BABEL_OP3_403_34564_20141212_001647_inLine +BABEL_OP3_403_34564_20141212_001647_outLine +BABEL_OP3_403_36341_20141013_224204_inLine +BABEL_OP3_403_36341_20141013_224204_outLine +BABEL_OP3_403_41100_20141006_230147_inLine +BABEL_OP3_403_41100_20141006_230147_outLine +BABEL_OP3_403_42243_20141016_231219_inLine +BABEL_OP3_403_42243_20141016_231219_outLine +BABEL_OP3_403_42497_20141004_235231_inLine +BABEL_OP3_403_42497_20141004_235231_outLine +BABEL_OP3_403_43388_20141028_212938_inLine +BABEL_OP3_403_43388_20141028_212938_outLine +BABEL_OP3_403_44847_20141127_190752_inLine +BABEL_OP3_403_44847_20141127_190752_outLine +BABEL_OP3_403_45560_20141012_204242_inLine +BABEL_OP3_403_45560_20141012_204242_outLine +BABEL_OP3_403_45697_20150211_181356_inLine +BABEL_OP3_403_45697_20150211_181356_outLine +BABEL_OP3_403_46881_20141014_210231_inLine +BABEL_OP3_403_46881_20141014_210231_outLine +BABEL_OP3_403_47877_20150105_200005_inLine +BABEL_OP3_403_47877_20150105_200005_outLine +BABEL_OP3_403_47882_20150131_215134_inLine +BABEL_OP3_403_47882_20150131_215134_outLine +BABEL_OP3_403_48789_20141031_205407_inLine +BABEL_OP3_403_48789_20141031_205407_outLine +BABEL_OP3_403_49502_20141013_230428_inLine +BABEL_OP3_403_49502_20141013_230428_outLine +BABEL_OP3_403_49902_20141025_214609_inLine +BABEL_OP3_403_49902_20141025_214609_outLine +BABEL_OP3_403_50726_20141015_222945_inLine +BABEL_OP3_403_50726_20141015_222945_outLine +BABEL_OP3_403_52438_20141005_211825_inLine +BABEL_OP3_403_52438_20141005_211825_outLine +BABEL_OP3_403_54160_20141012_225050_inLine +BABEL_OP3_403_54160_20141012_225050_outLine +BABEL_OP3_403_56090_20141001_220534_inLine +BABEL_OP3_403_56090_20141001_220534_outLine +BABEL_OP3_403_58850_20141030_190407_inLine +BABEL_OP3_403_58850_20141030_190407_outLine +BABEL_OP3_403_60538_20141007_015704_inLine +BABEL_OP3_403_60538_20141007_015704_outLine +BABEL_OP3_403_60706_20141014_225721_inLine +BABEL_OP3_403_60706_20141014_225721_outLine +BABEL_OP3_403_61225_20141014_225524_inLine +BABEL_OP3_403_61225_20141014_225524_outLine +BABEL_OP3_403_62456_20141107_224816_inLine +BABEL_OP3_403_62456_20141107_224816_outLine +BABEL_OP3_403_62545_20150203_205015_inLine +BABEL_OP3_403_62545_20150203_205015_outLine +BABEL_OP3_403_63081_20141013_184721_inLine +BABEL_OP3_403_63081_20141013_184721_outLine +BABEL_OP3_403_63938_20150304_184136_inLine +BABEL_OP3_403_63938_20150304_184136_outLine +BABEL_OP3_403_65723_20141004_231950_inLine +BABEL_OP3_403_65723_20141004_231950_outLine +BABEL_OP3_403_65882_20141005_214649_inLine +BABEL_OP3_403_65882_20141005_214649_outLine +BABEL_OP3_403_66026_20141207_212517_inLine +BABEL_OP3_403_66026_20141207_212517_outLine +BABEL_OP3_403_68306_20141206_183801_inLine +BABEL_OP3_403_68306_20141206_183801_outLine +BABEL_OP3_403_70110_20141016_195210_inLine +BABEL_OP3_403_70110_20141016_195210_outLine +BABEL_OP3_403_71780_20141006_005652_inLine +BABEL_OP3_403_71780_20141006_005652_outLine +BABEL_OP3_403_72349_20150313_194307_inLine +BABEL_OP3_403_72349_20150313_194307_outLine +BABEL_OP3_403_78877_20150203_012549_inLine +BABEL_OP3_403_78877_20150203_012549_outLine +BABEL_OP3_403_79820_20141005_212016_inLine +BABEL_OP3_403_79820_20141005_212016_outLine +BABEL_OP3_403_87280_20141217_230121_inLine +BABEL_OP3_403_87280_20141217_230121_outLine +BABEL_OP3_403_88938_20141219_211017_inLine +BABEL_OP3_403_88938_20141219_211017_outLine +BABEL_OP3_403_90777_20141028_012959_inLine +BABEL_OP3_403_90777_20141028_012959_outLine +BABEL_OP3_403_92356_20150305_033040_inLine +BABEL_OP3_403_92356_20150305_033040_outLine +BABEL_OP3_403_94035_20150201_183321_inLine +BABEL_OP3_403_94035_20150201_183321_outLine +BABEL_OP3_403_96446_20141013_215249_inLine +BABEL_OP3_403_96446_20141013_215249_outLine +BABEL_OP3_403_97264_20141220_220653_inLine +BABEL_OP3_403_97264_20141220_220653_outLine +BABEL_OP3_403_97849_20150313_175528_inLine +BABEL_OP3_403_97849_20150313_175528_outLine +BABEL_OP3_403_99813_20141106_211637_inLine +BABEL_OP3_403_99813_20141106_211637_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/eval.list b/egs/babel/s5d/conf/lists/403-dholuo/eval.list new file mode 100644 index 00000000000..4fc564e5b78 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/eval.list @@ -0,0 +1,182 @@ +BABEL_OP3_403_13040_20141004_235933_inLine +BABEL_OP3_403_13040_20141004_235933_outLine +BABEL_OP3_403_13929_20150204_022153_inLine +BABEL_OP3_403_13929_20150204_022153_outLine +BABEL_OP3_403_14350_20141007_001036_inLine +BABEL_OP3_403_14350_20141007_001036_outLine +BABEL_OP3_403_14575_20150205_194428_inLine +BABEL_OP3_403_14575_20150205_194428_outLine +BABEL_OP3_403_15262_20141008_011520_inLine +BABEL_OP3_403_15262_20141008_011520_outLine +BABEL_OP3_403_15848_20141001_223454_inLine +BABEL_OP3_403_15848_20141001_223454_outLine +BABEL_OP3_403_15902_20141006_235206_inLine +BABEL_OP3_403_15902_20141006_235206_outLine +BABEL_OP3_403_16056_20141007_015057_inLine +BABEL_OP3_403_16056_20141007_015057_outLine +BABEL_OP3_403_16184_20141003_220544_inLine +BABEL_OP3_403_16184_20141003_220544_outLine +BABEL_OP3_403_17165_20141103_175355_inLine +BABEL_OP3_403_17165_20141103_175355_outLine +BABEL_OP3_403_19120_20150320_014910_inLine +BABEL_OP3_403_19120_20150320_014910_outLine +BABEL_OP3_403_19545_20141107_193534_inLine +BABEL_OP3_403_19545_20141107_193534_outLine +BABEL_OP3_403_21029_20141010_220724_inLine +BABEL_OP3_403_21029_20141010_220724_outLine +BABEL_OP3_403_21581_20141026_010129_inLine +BABEL_OP3_403_21581_20141026_010129_outLine +BABEL_OP3_403_23260_20150313_211958_inLine +BABEL_OP3_403_23260_20150313_211958_outLine +BABEL_OP3_403_29777_20141218_221709_inLine +BABEL_OP3_403_29777_20141218_221709_outLine +BABEL_OP3_403_30497_20150314_160011_inLine +BABEL_OP3_403_30497_20150314_160011_outLine +BABEL_OP3_403_31583_20141216_190359_inLine +BABEL_OP3_403_31583_20141216_190359_outLine +BABEL_OP3_403_32048_20141219_213429_inLine +BABEL_OP3_403_32048_20141219_213429_outLine +BABEL_OP3_403_32959_20141208_210738_inLine +BABEL_OP3_403_32959_20141208_210738_outLine +BABEL_OP3_403_33635_20141029_220701_inLine +BABEL_OP3_403_33635_20141029_220701_outLine +BABEL_OP3_403_35069_20141219_230111_inLine +BABEL_OP3_403_35069_20141219_230111_outLine +BABEL_OP3_403_35885_20150319_180147_inLine +BABEL_OP3_403_35885_20150319_180147_outLine +BABEL_OP3_403_36219_20141024_182040_inLine +BABEL_OP3_403_36219_20141024_182040_outLine +BABEL_OP3_403_37281_20141028_212708_inLine +BABEL_OP3_403_37281_20141028_212708_outLine +BABEL_OP3_403_39277_20150204_013404_inLine +BABEL_OP3_403_39277_20150204_013404_outLine +BABEL_OP3_403_44290_20150313_161518_inLine +BABEL_OP3_403_44290_20150313_161518_outLine +BABEL_OP3_403_44681_20150202_013205_inLine +BABEL_OP3_403_44681_20150202_013205_outLine +BABEL_OP3_403_45140_20150314_202244_inLine +BABEL_OP3_403_45140_20150314_202244_outLine +BABEL_OP3_403_47270_20150305_004557_inLine +BABEL_OP3_403_47270_20150305_004557_outLine +BABEL_OP3_403_47309_20150131_232140_inLine +BABEL_OP3_403_47309_20150131_232140_outLine +BABEL_OP3_403_50090_20141128_005549_inLine +BABEL_OP3_403_50090_20141128_005549_outLine +BABEL_OP3_403_52025_20141016_194738_inLine +BABEL_OP3_403_52025_20141016_194738_outLine +BABEL_OP3_403_52381_20150106_000156_inLine +BABEL_OP3_403_52381_20150106_000156_outLine +BABEL_OP3_403_53419_20141216_202007_inLine +BABEL_OP3_403_53419_20141216_202007_outLine +BABEL_OP3_403_54046_20141221_013345_inLine +BABEL_OP3_403_54046_20141221_013345_outLine +BABEL_OP3_403_54405_20141105_215311_inLine +BABEL_OP3_403_54405_20141105_215311_outLine +BABEL_OP3_403_56429_20141004_212928_inLine +BABEL_OP3_403_56429_20141004_212928_outLine +BABEL_OP3_403_56523_20141027_234249_inLine +BABEL_OP3_403_56523_20141027_234249_outLine +BABEL_OP3_403_56720_20141204_213606_inLine +BABEL_OP3_403_56720_20141204_213606_outLine +BABEL_OP3_403_56743_20141027_224527_inLine +BABEL_OP3_403_56743_20141027_224527_outLine +BABEL_OP3_403_57654_20141004_222740_inLine +BABEL_OP3_403_57654_20141004_222740_outLine +BABEL_OP3_403_57922_20141119_003457_inLine +BABEL_OP3_403_57922_20141119_003457_outLine +BABEL_OP3_403_60508_20141015_194223_inLine +BABEL_OP3_403_60508_20141015_194223_outLine +BABEL_OP3_403_60626_20141007_020141_inLine +BABEL_OP3_403_60626_20141007_020141_outLine +BABEL_OP3_403_61219_20141025_212855_inLine +BABEL_OP3_403_61219_20141025_212855_outLine +BABEL_OP3_403_62286_20141029_183256_inLine +BABEL_OP3_403_62286_20141029_183256_outLine +BABEL_OP3_403_62852_20141016_194911_inLine +BABEL_OP3_403_62852_20141016_194911_outLine +BABEL_OP3_403_63445_20141016_201418_inLine +BABEL_OP3_403_63445_20141016_201418_outLine +BABEL_OP3_403_63481_20141014_201444_inLine +BABEL_OP3_403_63481_20141014_201444_outLine +BABEL_OP3_403_64494_20141005_003938_inLine +BABEL_OP3_403_64494_20141005_003938_outLine +BABEL_OP3_403_64796_20141014_213212_inLine +BABEL_OP3_403_64796_20141014_213212_outLine +BABEL_OP3_403_64902_20150319_231944_inLine +BABEL_OP3_403_64902_20150319_231944_outLine +BABEL_OP3_403_65477_20141029_190115_inLine +BABEL_OP3_403_65477_20141029_190115_outLine +BABEL_OP3_403_66519_20141026_200412_inLine +BABEL_OP3_403_66519_20141026_200412_outLine +BABEL_OP3_403_67552_20141204_235240_inLine +BABEL_OP3_403_67552_20141204_235240_outLine +BABEL_OP3_403_67842_20141005_213633_inLine +BABEL_OP3_403_67842_20141005_213633_outLine +BABEL_OP3_403_70639_20150201_224933_inLine +BABEL_OP3_403_70639_20150201_224933_outLine +BABEL_OP3_403_71282_20150304_001933_inLine +BABEL_OP3_403_71282_20150304_001933_outLine +BABEL_OP3_403_71566_20141210_221853_inLine +BABEL_OP3_403_71566_20141210_221853_outLine +BABEL_OP3_403_71704_20141005_194010_inLine +BABEL_OP3_403_71704_20141005_194010_outLine +BABEL_OP3_403_73042_20141004_213024_inLine +BABEL_OP3_403_73042_20141004_213024_outLine +BABEL_OP3_403_73119_20141024_013927_inLine +BABEL_OP3_403_73119_20141024_013927_outLine +BABEL_OP3_403_74641_20141104_204017_inLine +BABEL_OP3_403_74641_20141104_204017_outLine +BABEL_OP3_403_75359_20150306_233416_inLine +BABEL_OP3_403_75359_20150306_233416_outLine +BABEL_OP3_403_77567_20141016_212214_inLine +BABEL_OP3_403_77567_20141016_212214_outLine +BABEL_OP3_403_80655_20150313_202935_inLine +BABEL_OP3_403_80655_20150313_202935_outLine +BABEL_OP3_403_81229_20141028_221835_inLine +BABEL_OP3_403_81229_20141028_221835_outLine +BABEL_OP3_403_81404_20141027_225835_inLine +BABEL_OP3_403_81404_20141027_225835_outLine +BABEL_OP3_403_81427_20141025_192229_inLine +BABEL_OP3_403_81427_20141025_192229_outLine +BABEL_OP3_403_81581_20150205_214253_inLine +BABEL_OP3_403_81581_20150205_214253_outLine +BABEL_OP3_403_82966_20141215_232026_inLine +BABEL_OP3_403_82966_20141215_232026_outLine +BABEL_OP3_403_83062_20150314_182244_inLine +BABEL_OP3_403_83062_20150314_182244_outLine +BABEL_OP3_403_84715_20150106_201437_inLine +BABEL_OP3_403_84715_20150106_201437_outLine +BABEL_OP3_403_86748_20150305_041204_inLine +BABEL_OP3_403_86748_20150305_041204_outLine +BABEL_OP3_403_87629_20141107_235904_inLine +BABEL_OP3_403_87629_20141107_235904_outLine +BABEL_OP3_403_88686_20141014_185730_inLine +BABEL_OP3_403_88686_20141014_185730_outLine +BABEL_OP3_403_88873_20141005_183048_inLine +BABEL_OP3_403_88873_20141005_183048_outLine +BABEL_OP3_403_90080_20150305_215921_inLine +BABEL_OP3_403_90080_20150305_215921_outLine +BABEL_OP3_403_91825_20141016_185730_inLine +BABEL_OP3_403_91825_20141016_185730_outLine +BABEL_OP3_403_94166_20150304_233340_inLine +BABEL_OP3_403_94166_20150304_233340_outLine +BABEL_OP3_403_94212_20150203_035128_inLine +BABEL_OP3_403_94212_20150203_035128_outLine +BABEL_OP3_403_94587_20141206_200001_inLine +BABEL_OP3_403_94587_20141206_200001_outLine +BABEL_OP3_403_95077_20141211_172737_inLine +BABEL_OP3_403_95077_20141211_172737_outLine +BABEL_OP3_403_95490_20141015_192814_inLine +BABEL_OP3_403_95490_20141015_192814_outLine +BABEL_OP3_403_96088_20150307_205122_inLine +BABEL_OP3_403_96088_20150307_205122_outLine +BABEL_OP3_403_96934_20141025_215407_inLine +BABEL_OP3_403_96934_20141025_215407_outLine +BABEL_OP3_403_98255_20150204_194911_inLine +BABEL_OP3_403_98255_20150204_194911_outLine +BABEL_OP3_403_98580_20141029_181611_inLine +BABEL_OP3_403_98580_20141029_181611_outLine +BABEL_OP3_403_98888_20141028_214127_inLine +BABEL_OP3_403_98888_20141028_214127_outLine +BABEL_OP3_403_99264_20141216_011902_inLine +BABEL_OP3_403_99264_20141216_011902_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/sub-train.list b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.list new file mode 100644 index 00000000000..138a27efd31 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.list @@ -0,0 +1,122 @@ +BABEL_OP3_403_11681_20141010_203514_inLine +BABEL_OP3_403_11681_20141010_203514_outLine +BABEL_OP3_403_13324_20141004_204835_inLine +BABEL_OP3_403_13324_20141004_204835_outLine +BABEL_OP3_403_13490_20141103_182258_inLine +BABEL_OP3_403_13490_20141103_182258_outLine +BABEL_OP3_403_16475_20141027_225406_inLine +BABEL_OP3_403_16475_20141027_225406_outLine +BABEL_OP3_403_16938_20141117_002438_inLine +BABEL_OP3_403_16938_20141117_002438_outLine +BABEL_OP3_403_17280_20141027_223651_inLine +BABEL_OP3_403_17280_20141027_223651_outLine +BABEL_OP3_403_19722_20141013_214859_inLine +BABEL_OP3_403_19722_20141013_214859_outLine +BABEL_OP3_403_19749_20150211_210545_inLine +BABEL_OP3_403_19749_20150211_210545_outLine +BABEL_OP3_403_22321_20141012_234503_inLine +BABEL_OP3_403_22321_20141012_234503_outLine +BABEL_OP3_403_23893_20150311_211416_inLine +BABEL_OP3_403_23893_20150311_211416_outLine +BABEL_OP3_403_24589_20141023_173937_inLine +BABEL_OP3_403_24589_20141023_173937_outLine +BABEL_OP3_403_25961_20141016_201537_inLine +BABEL_OP3_403_25961_20141016_201537_outLine +BABEL_OP3_403_28190_20141218_232404_inLine +BABEL_OP3_403_28190_20141218_232404_outLine +BABEL_OP3_403_28775_20141005_002735_inLine +BABEL_OP3_403_28775_20141005_002735_outLine +BABEL_OP3_403_28945_20141006_202723_inLine +BABEL_OP3_403_28945_20141006_202723_outLine +BABEL_OP3_403_29168_20141013_195745_inLine +BABEL_OP3_403_29168_20141013_195745_outLine +BABEL_OP3_403_29323_20150303_223419_inLine +BABEL_OP3_403_29323_20150303_223419_outLine +BABEL_OP3_403_31109_20141107_213704_inLine +BABEL_OP3_403_31109_20141107_213704_outLine +BABEL_OP3_403_31490_20141006_210241_inLine +BABEL_OP3_403_31490_20141006_210241_outLine +BABEL_OP3_403_31624_20141014_211203_inLine +BABEL_OP3_403_31624_20141014_211203_outLine +BABEL_OP3_403_32861_20150107_160647_inLine +BABEL_OP3_403_32861_20150107_160647_outLine +BABEL_OP3_403_33840_20141219_200146_inLine +BABEL_OP3_403_33840_20141219_200146_outLine +BABEL_OP3_403_36632_20150206_041325_inLine +BABEL_OP3_403_36632_20150206_041325_outLine +BABEL_OP3_403_36990_20141030_231441_inLine +BABEL_OP3_403_36990_20141030_231441_outLine +BABEL_OP3_403_38878_20141121_184540_inLine +BABEL_OP3_403_38878_20141121_184540_outLine +BABEL_OP3_403_42718_20150306_202240_inLine +BABEL_OP3_403_42718_20150306_202240_outLine +BABEL_OP3_403_43368_20141026_000458_inLine +BABEL_OP3_403_43368_20141026_000458_outLine +BABEL_OP3_403_44868_20141129_220211_inLine +BABEL_OP3_403_44868_20141129_220211_outLine +BABEL_OP3_403_44961_20141006_233622_inLine +BABEL_OP3_403_44961_20141006_233622_outLine +BABEL_OP3_403_44961_20141006_235203_inLine +BABEL_OP3_403_44961_20141006_235203_outLine +BABEL_OP3_403_47215_20141007_230222_inLine +BABEL_OP3_403_47215_20141007_230222_outLine +BABEL_OP3_403_48663_20150306_181741_inLine +BABEL_OP3_403_48663_20150306_181741_outLine +BABEL_OP3_403_51955_20141004_212210_inLine +BABEL_OP3_403_51955_20141004_212210_outLine +BABEL_OP3_403_53842_20141031_193507_inLine +BABEL_OP3_403_53842_20141031_193507_outLine +BABEL_OP3_403_55950_20150312_174125_inLine +BABEL_OP3_403_55950_20150312_174125_outLine +BABEL_OP3_403_56198_20141005_222956_inLine +BABEL_OP3_403_56198_20141005_222956_outLine +BABEL_OP3_403_58047_20141118_184454_inLine +BABEL_OP3_403_58047_20141118_184454_outLine +BABEL_OP3_403_58585_20150106_172737_inLine +BABEL_OP3_403_58585_20150106_172737_outLine +BABEL_OP3_403_60310_20141217_205059_inLine +BABEL_OP3_403_60310_20141217_205059_outLine +BABEL_OP3_403_60418_20141129_235907_inLine +BABEL_OP3_403_60418_20141129_235907_outLine +BABEL_OP3_403_61348_20141103_230857_inLine +BABEL_OP3_403_61348_20141103_230857_outLine +BABEL_OP3_403_65640_20150314_163101_inLine +BABEL_OP3_403_65640_20150314_163101_outLine +BABEL_OP3_403_69107_20141106_000151_inLine +BABEL_OP3_403_69107_20141106_000151_outLine +BABEL_OP3_403_69746_20141220_191513_inLine +BABEL_OP3_403_69746_20141220_191513_outLine +BABEL_OP3_403_72007_20141205_002010_inLine +BABEL_OP3_403_72007_20141205_002010_outLine +BABEL_OP3_403_72110_20141210_212045_inLine +BABEL_OP3_403_72110_20141210_212045_outLine +BABEL_OP3_403_72844_20141004_005248_inLine +BABEL_OP3_403_72844_20141004_005248_outLine +BABEL_OP3_403_75223_20141016_194054_inLine +BABEL_OP3_403_75223_20141016_194054_outLine +BABEL_OP3_403_77974_20150312_200046_inLine +BABEL_OP3_403_77974_20150312_200046_outLine +BABEL_OP3_403_78360_20150107_231519_inLine +BABEL_OP3_403_78360_20150107_231519_outLine +BABEL_OP3_403_78544_20141201_192016_inLine +BABEL_OP3_403_78544_20141201_192016_outLine +BABEL_OP3_403_82391_20141206_001207_inLine +BABEL_OP3_403_82391_20141206_001207_outLine +BABEL_OP3_403_83436_20141012_221126_inLine +BABEL_OP3_403_83436_20141012_221126_outLine +BABEL_OP3_403_84469_20141211_002526_inLine +BABEL_OP3_403_84469_20141211_002526_outLine +BABEL_OP3_403_84605_20141005_214529_inLine +BABEL_OP3_403_84605_20141005_214529_outLine +BABEL_OP3_403_87921_20141210_233414_inLine +BABEL_OP3_403_87921_20141210_233414_outLine +BABEL_OP3_403_92509_20141014_232528_inLine +BABEL_OP3_403_92509_20141014_232528_outLine +BABEL_OP3_403_95269_20141026_235206_inLine +BABEL_OP3_403_95269_20141026_235206_outLine +BABEL_OP3_403_96324_20141014_194024_inLine +BABEL_OP3_403_96324_20141014_194024_outLine +BABEL_OP3_403_97588_20141015_193851_inLine +BABEL_OP3_403_97588_20141015_193851_outLine +BABEL_OP3_403_98506_20150319_151741_inLine +BABEL_OP3_403_98506_20150319_151741_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.untranscribed.list new file mode 100644 index 00000000000..b22e404cf6c --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP3_403_10313_20150130_193605_inLine +BABEL_OP3_403_10313_20150130_193605_outLine +BABEL_OP3_403_10469_20150130_211522_inLine +BABEL_OP3_403_10469_20150130_211522_outLine +BABEL_OP3_403_10966_20141025_191612_inLine +BABEL_OP3_403_10966_20141025_191612_outLine +BABEL_OP3_403_11663_20141205_201130_inLine +BABEL_OP3_403_11663_20141205_201130_outLine +BABEL_OP3_403_11663_20141205_204332_inLine +BABEL_OP3_403_11663_20141205_204332_outLine +BABEL_OP3_403_11797_20141013_012556_inLine +BABEL_OP3_403_11797_20141013_012556_outLine +BABEL_OP3_403_12606_20150314_170830_inLine +BABEL_OP3_403_12606_20150314_170830_outLine +BABEL_OP3_403_12609_20150313_183914_inLine +BABEL_OP3_403_12609_20150313_183914_outLine +BABEL_OP3_403_13483_20141205_201059_inLine +BABEL_OP3_403_13483_20141205_201059_outLine +BABEL_OP3_403_14807_20141117_201842_inLine +BABEL_OP3_403_14807_20141117_201842_outLine +BABEL_OP3_403_14814_20141023_004823_inLine +BABEL_OP3_403_14814_20141023_004823_outLine +BABEL_OP3_403_14929_20141029_222741_inLine +BABEL_OP3_403_14929_20141029_222741_outLine +BABEL_OP3_403_15281_20150307_201454_inLine +BABEL_OP3_403_15281_20150307_201454_outLine +BABEL_OP3_403_15322_20150319_184826_inLine +BABEL_OP3_403_15322_20150319_184826_outLine +BABEL_OP3_403_15702_20141202_010728_inLine +BABEL_OP3_403_15702_20141202_010728_outLine +BABEL_OP3_403_16749_20141221_021003_inLine +BABEL_OP3_403_16749_20141221_021003_outLine +BABEL_OP3_403_16800_20141216_181241_inLine +BABEL_OP3_403_16800_20141216_181241_outLine +BABEL_OP3_403_16839_20141219_222837_inLine +BABEL_OP3_403_16839_20141219_222837_outLine +BABEL_OP3_403_16924_20141202_004432_inLine +BABEL_OP3_403_16924_20141202_004432_outLine +BABEL_OP3_403_17496_20141202_004936_inLine +BABEL_OP3_403_17496_20141202_004936_outLine +BABEL_OP3_403_18924_20141117_193058_inLine +BABEL_OP3_403_18924_20141117_193058_outLine +BABEL_OP3_403_18939_20141007_221546_inLine +BABEL_OP3_403_18939_20141007_221546_outLine +BABEL_OP3_403_19688_20150205_181600_inLine +BABEL_OP3_403_19688_20150205_181600_outLine +BABEL_OP3_403_19703_20141024_194336_inLine +BABEL_OP3_403_19703_20141024_194336_outLine +BABEL_OP3_403_20133_20141001_221247_inLine +BABEL_OP3_403_20133_20141001_221247_outLine +BABEL_OP3_403_20916_20141003_230543_inLine +BABEL_OP3_403_20916_20141003_230543_outLine +BABEL_OP3_403_21004_20141210_214422_inLine +BABEL_OP3_403_21004_20141210_214422_outLine +BABEL_OP3_403_21206_20141004_231905_inLine +BABEL_OP3_403_21206_20141004_231905_outLine +BABEL_OP3_403_21327_20141207_194922_inLine +BABEL_OP3_403_21327_20141207_194922_outLine +BABEL_OP3_403_21892_20141216_205644_inLine +BABEL_OP3_403_21892_20141216_205644_outLine +BABEL_OP3_403_22643_20150131_005325_inLine +BABEL_OP3_403_22643_20150131_005325_outLine +BABEL_OP3_403_22918_20150305_185811_inLine +BABEL_OP3_403_22918_20150305_185811_outLine +BABEL_OP3_403_22965_20141005_232023_inLine +BABEL_OP3_403_22965_20141005_232023_outLine +BABEL_OP3_403_23006_20141024_182721_inLine +BABEL_OP3_403_23006_20141024_182721_outLine +BABEL_OP3_403_23046_20141023_002436_inLine +BABEL_OP3_403_23046_20141023_002436_outLine +BABEL_OP3_403_24017_20141218_235010_inLine +BABEL_OP3_403_24017_20141218_235010_outLine +BABEL_OP3_403_24290_20150319_170027_inLine +BABEL_OP3_403_24290_20150319_170027_outLine +BABEL_OP3_403_25015_20150312_185754_inLine +BABEL_OP3_403_25015_20150312_185754_outLine +BABEL_OP3_403_25242_20150129_211027_inLine +BABEL_OP3_403_25242_20150129_211027_outLine +BABEL_OP3_403_25767_20141007_010749_inLine +BABEL_OP3_403_25767_20141007_010749_outLine +BABEL_OP3_403_26072_20150107_183553_inLine +BABEL_OP3_403_26072_20150107_183553_outLine +BABEL_OP3_403_27125_20141005_201825_inLine +BABEL_OP3_403_27125_20141005_201825_outLine +BABEL_OP3_403_27367_20150201_011720_inLine +BABEL_OP3_403_27367_20150201_011720_outLine +BABEL_OP3_403_28522_20141129_232715_inLine +BABEL_OP3_403_28522_20141129_232715_outLine +BABEL_OP3_403_28814_20141220_183346_inLine +BABEL_OP3_403_28814_20141220_183346_outLine +BABEL_OP3_403_31346_20141217_195511_inLine +BABEL_OP3_403_31346_20141217_195511_outLine +BABEL_OP3_403_31919_20150306_024725_inLine +BABEL_OP3_403_31919_20150306_024725_outLine +BABEL_OP3_403_32301_20141202_224639_inLine +BABEL_OP3_403_32301_20141202_224639_outLine +BABEL_OP3_403_32328_20141218_000510_inLine +BABEL_OP3_403_32328_20141218_000510_outLine +BABEL_OP3_403_33913_20141211_183339_inLine +BABEL_OP3_403_33913_20141211_183339_outLine +BABEL_OP3_403_34197_20141117_004710_inLine +BABEL_OP3_403_34197_20141117_004710_outLine +BABEL_OP3_403_34477_20141023_231734_inLine +BABEL_OP3_403_34477_20141023_231734_outLine +BABEL_OP3_403_35008_20141204_201241_inLine +BABEL_OP3_403_35008_20141204_201241_outLine +BABEL_OP3_403_35139_20141004_205621_inLine +BABEL_OP3_403_35139_20141004_205621_outLine +BABEL_OP3_403_35143_20141210_230830_inLine +BABEL_OP3_403_35143_20141210_230830_outLine +BABEL_OP3_403_35467_20141004_175833_inLine +BABEL_OP3_403_35467_20141004_175833_outLine +BABEL_OP3_403_35583_20150108_024439_inLine +BABEL_OP3_403_35583_20150108_024439_outLine +BABEL_OP3_403_36293_20141015_192540_inLine +BABEL_OP3_403_36293_20141015_192540_outLine +BABEL_OP3_403_37228_20150108_000217_inLine +BABEL_OP3_403_37228_20150108_000217_outLine +BABEL_OP3_403_37682_20141028_002656_inLine +BABEL_OP3_403_37682_20141028_002656_outLine +BABEL_OP3_403_37853_20150303_020840_inLine +BABEL_OP3_403_37853_20150303_020840_outLine +BABEL_OP3_403_38588_20141026_220103_inLine +BABEL_OP3_403_38588_20141026_220103_outLine +BABEL_OP3_403_38664_20141028_005703_inLine +BABEL_OP3_403_38664_20141028_005703_outLine +BABEL_OP3_403_38689_20141204_215950_inLine +BABEL_OP3_403_38689_20141204_215950_outLine +BABEL_OP3_403_38741_20141005_205401_inLine +BABEL_OP3_403_38741_20141005_205401_outLine +BABEL_OP3_403_39099_20150306_193032_inLine +BABEL_OP3_403_39099_20150306_193032_outLine +BABEL_OP3_403_39307_20141014_234344_inLine +BABEL_OP3_403_39307_20141014_234344_outLine +BABEL_OP3_403_39555_20141217_205213_inLine +BABEL_OP3_403_39555_20141217_205213_outLine +BABEL_OP3_403_41442_20141216_224519_inLine +BABEL_OP3_403_41442_20141216_224519_outLine +BABEL_OP3_403_41958_20141026_202739_inLine +BABEL_OP3_403_41958_20141026_202739_outLine +BABEL_OP3_403_42434_20141026_001223_inLine +BABEL_OP3_403_42434_20141026_001223_outLine +BABEL_OP3_403_42771_20141104_215437_inLine +BABEL_OP3_403_42771_20141104_215437_outLine +BABEL_OP3_403_43784_20141005_193431_inLine +BABEL_OP3_403_43784_20141005_193431_outLine +BABEL_OP3_403_43788_20141202_235051_inLine +BABEL_OP3_403_43788_20141202_235051_outLine +BABEL_OP3_403_44309_20150305_200025_inLine +BABEL_OP3_403_44309_20150305_200025_outLine +BABEL_OP3_403_44478_20150307_223313_inLine +BABEL_OP3_403_44478_20150307_223313_outLine +BABEL_OP3_403_45486_20150130_235157_inLine +BABEL_OP3_403_45486_20150130_235157_outLine +BABEL_OP3_403_45536_20141219_005329_inLine +BABEL_OP3_403_45536_20141219_005329_outLine +BABEL_OP3_403_46008_20150307_190844_inLine +BABEL_OP3_403_46008_20150307_190844_outLine +BABEL_OP3_403_46041_20141217_222544_inLine +BABEL_OP3_403_46041_20141217_222544_outLine +BABEL_OP3_403_46310_20141012_204940_inLine +BABEL_OP3_403_46310_20141012_204940_outLine +BABEL_OP3_403_46757_20141202_212733_inLine +BABEL_OP3_403_46757_20141202_212733_outLine +BABEL_OP3_403_47283_20141005_204650_inLine +BABEL_OP3_403_47283_20141005_204650_outLine +BABEL_OP3_403_47487_20141025_235747_inLine +BABEL_OP3_403_47487_20141025_235747_outLine +BABEL_OP3_403_47866_20150317_213617_inLine +BABEL_OP3_403_47866_20150317_213617_outLine +BABEL_OP3_403_47878_20141118_193135_inLine +BABEL_OP3_403_47878_20141118_193135_outLine +BABEL_OP3_403_48243_20141004_221542_inLine +BABEL_OP3_403_48243_20141004_221542_outLine +BABEL_OP3_403_48610_20141007_225901_inLine +BABEL_OP3_403_48610_20141007_225901_outLine +BABEL_OP3_403_48844_20141007_004947_inLine +BABEL_OP3_403_48844_20141007_004947_outLine +BABEL_OP3_403_48844_20141007_011027_inLine +BABEL_OP3_403_48844_20141007_011027_outLine +BABEL_OP3_403_49027_20150307_230828_inLine +BABEL_OP3_403_49027_20150307_230828_outLine +BABEL_OP3_403_49630_20141205_233804_inLine +BABEL_OP3_403_49630_20141205_233804_outLine +BABEL_OP3_403_49768_20141026_000059_inLine +BABEL_OP3_403_49768_20141026_000059_outLine +BABEL_OP3_403_49907_20141005_215057_inLine +BABEL_OP3_403_49907_20141005_215057_outLine +BABEL_OP3_403_50427_20141116_233807_inLine +BABEL_OP3_403_50427_20141116_233807_outLine +BABEL_OP3_403_50549_20150304_014353_inLine +BABEL_OP3_403_50549_20150304_014353_outLine +BABEL_OP3_403_50779_20141118_221929_inLine +BABEL_OP3_403_50779_20141118_221929_outLine +BABEL_OP3_403_50779_20141118_230132_inLine +BABEL_OP3_403_50779_20141118_230132_outLine +BABEL_OP3_403_52490_20141016_230923_inLine +BABEL_OP3_403_52490_20141016_230923_outLine +BABEL_OP3_403_52717_20141008_003843_inLine +BABEL_OP3_403_52717_20141008_003843_outLine +BABEL_OP3_403_53063_20141207_192558_inLine +BABEL_OP3_403_53063_20141207_192558_outLine +BABEL_OP3_403_53063_20141207_194007_inLine +BABEL_OP3_403_53063_20141207_194007_outLine +BABEL_OP3_403_54104_20141006_230139_inLine +BABEL_OP3_403_54104_20141006_230139_outLine +BABEL_OP3_403_54104_20141006_230643_inLine +BABEL_OP3_403_54104_20141006_230643_outLine +BABEL_OP3_403_54162_20141103_190601_inLine +BABEL_OP3_403_54162_20141103_190601_outLine +BABEL_OP3_403_54477_20141216_200349_inLine +BABEL_OP3_403_54477_20141216_200349_outLine +BABEL_OP3_403_54477_20141216_213534_inLine +BABEL_OP3_403_54477_20141216_213534_outLine +BABEL_OP3_403_54530_20141217_220934_inLine +BABEL_OP3_403_54530_20141217_220934_outLine +BABEL_OP3_403_54594_20150204_003149_inLine +BABEL_OP3_403_54594_20150204_003149_outLine +BABEL_OP3_403_55259_20141025_175845_inLine +BABEL_OP3_403_55259_20141025_175845_outLine +BABEL_OP3_403_55968_20141004_005950_inLine +BABEL_OP3_403_55968_20141004_005950_outLine +BABEL_OP3_403_56326_20150129_020103_inLine +BABEL_OP3_403_56326_20150129_020103_outLine +BABEL_OP3_403_57093_20141103_221842_inLine +BABEL_OP3_403_57093_20141103_221842_outLine +BABEL_OP3_403_57141_20141215_224302_inLine +BABEL_OP3_403_57141_20141215_224302_outLine +BABEL_OP3_403_57529_20141207_002135_inLine +BABEL_OP3_403_57529_20141207_002135_outLine +BABEL_OP3_403_59262_20141216_193024_inLine +BABEL_OP3_403_59262_20141216_193024_outLine +BABEL_OP3_403_60115_20141129_235248_inLine +BABEL_OP3_403_60115_20141129_235248_outLine +BABEL_OP3_403_60650_20150131_013236_inLine +BABEL_OP3_403_60650_20150131_013236_outLine +BABEL_OP3_403_61678_20141003_231023_inLine +BABEL_OP3_403_61678_20141003_231023_outLine +BABEL_OP3_403_61731_20141005_201612_inLine +BABEL_OP3_403_61731_20141005_201612_outLine +BABEL_OP3_403_61971_20150307_004145_inLine +BABEL_OP3_403_61971_20150307_004145_outLine +BABEL_OP3_403_62014_20141127_180004_inLine +BABEL_OP3_403_62014_20141127_180004_outLine +BABEL_OP3_403_62734_20141025_192117_inLine +BABEL_OP3_403_62734_20141025_192117_outLine +BABEL_OP3_403_62810_20141016_191619_inLine +BABEL_OP3_403_62810_20141016_191619_outLine +BABEL_OP3_403_63670_20141215_221926_inLine +BABEL_OP3_403_63670_20141215_221926_outLine +BABEL_OP3_403_63787_20141006_214400_inLine +BABEL_OP3_403_63787_20141006_214400_outLine +BABEL_OP3_403_63906_20150305_205105_inLine +BABEL_OP3_403_63906_20150305_205105_outLine +BABEL_OP3_403_65367_20150108_004325_inLine +BABEL_OP3_403_65367_20150108_004325_outLine +BABEL_OP3_403_66001_20141007_230508_inLine +BABEL_OP3_403_66001_20141007_230508_outLine +BABEL_OP3_403_66822_20141029_224921_inLine +BABEL_OP3_403_66822_20141029_224921_outLine +BABEL_OP3_403_66916_20141015_215414_inLine +BABEL_OP3_403_66916_20141015_215414_outLine +BABEL_OP3_403_67622_20141014_193846_inLine +BABEL_OP3_403_67622_20141014_193846_outLine +BABEL_OP3_403_67659_20141023_013756_inLine +BABEL_OP3_403_67659_20141023_013756_outLine +BABEL_OP3_403_68384_20141216_000507_inLine +BABEL_OP3_403_68384_20141216_000507_outLine +BABEL_OP3_403_68748_20141130_014650_inLine +BABEL_OP3_403_68748_20141130_014650_outLine +BABEL_OP3_403_68854_20150306_195508_inLine +BABEL_OP3_403_68854_20150306_195508_outLine +BABEL_OP3_403_69096_20150309_190140_inLine +BABEL_OP3_403_69096_20150309_190140_outLine +BABEL_OP3_403_70121_20141026_225432_inLine +BABEL_OP3_403_70121_20141026_225432_outLine +BABEL_OP3_403_70216_20150128_234110_inLine +BABEL_OP3_403_70216_20150128_234110_outLine +BABEL_OP3_403_70257_20150204_032020_inLine +BABEL_OP3_403_70257_20150204_032020_outLine +BABEL_OP3_403_70343_20141205_225856_inLine +BABEL_OP3_403_70343_20141205_225856_outLine +BABEL_OP3_403_71047_20150106_190413_inLine +BABEL_OP3_403_71047_20150106_190413_outLine +BABEL_OP3_403_72040_20141006_004959_inLine +BABEL_OP3_403_72040_20141006_004959_outLine +BABEL_OP3_403_73430_20141205_233006_inLine +BABEL_OP3_403_73430_20141205_233006_outLine +BABEL_OP3_403_73591_20140930_234521_inLine +BABEL_OP3_403_73591_20140930_234521_outLine +BABEL_OP3_403_74728_20150312_182026_inLine +BABEL_OP3_403_74728_20150312_182026_outLine +BABEL_OP3_403_75064_20141022_225629_inLine +BABEL_OP3_403_75064_20141022_225629_outLine +BABEL_OP3_403_76499_20141103_232220_inLine +BABEL_OP3_403_76499_20141103_232220_outLine +BABEL_OP3_403_77427_20141027_223134_inLine +BABEL_OP3_403_77427_20141027_223134_outLine +BABEL_OP3_403_77990_20141004_201020_inLine +BABEL_OP3_403_77990_20141004_201020_outLine +BABEL_OP3_403_78116_20141208_213333_inLine +BABEL_OP3_403_78116_20141208_213333_outLine +BABEL_OP3_403_78116_20141208_214155_inLine +BABEL_OP3_403_78116_20141208_214155_outLine +BABEL_OP3_403_78254_20141024_234037_inLine +BABEL_OP3_403_78254_20141024_234037_outLine +BABEL_OP3_403_78604_20141006_193457_inLine +BABEL_OP3_403_78604_20141006_193457_outLine +BABEL_OP3_403_78833_20150205_204459_inLine +BABEL_OP3_403_78833_20150205_204459_outLine +BABEL_OP3_403_80439_20141023_195331_inLine +BABEL_OP3_403_80439_20141023_195331_outLine +BABEL_OP3_403_80781_20141026_214157_inLine +BABEL_OP3_403_80781_20141026_214157_outLine +BABEL_OP3_403_81149_20150313_000213_inLine +BABEL_OP3_403_81149_20150313_000213_outLine +BABEL_OP3_403_81213_20141004_213211_inLine +BABEL_OP3_403_81213_20141004_213211_outLine +BABEL_OP3_403_82425_20141007_231028_inLine +BABEL_OP3_403_82425_20141007_231028_outLine +BABEL_OP3_403_83238_20141107_233257_inLine +BABEL_OP3_403_83238_20141107_233257_outLine +BABEL_OP3_403_83455_20141103_225146_inLine +BABEL_OP3_403_83455_20141103_225146_outLine +BABEL_OP3_403_83651_20141005_194737_inLine +BABEL_OP3_403_83651_20141005_194737_outLine +BABEL_OP3_403_84194_20150204_213858_inLine +BABEL_OP3_403_84194_20150204_213858_outLine +BABEL_OP3_403_84458_20141208_005012_inLine +BABEL_OP3_403_84458_20141208_005012_outLine +BABEL_OP3_403_84547_20141013_223556_inLine +BABEL_OP3_403_84547_20141013_223556_outLine +BABEL_OP3_403_84737_20150303_195506_inLine +BABEL_OP3_403_84737_20150303_195506_outLine +BABEL_OP3_403_85647_20141103_192225_inLine +BABEL_OP3_403_85647_20141103_192225_outLine +BABEL_OP3_403_86845_20150201_015753_inLine +BABEL_OP3_403_86845_20150201_015753_outLine +BABEL_OP3_403_87889_20150107_001827_inLine +BABEL_OP3_403_87889_20150107_001827_outLine +BABEL_OP3_403_88260_20141029_205951_inLine +BABEL_OP3_403_88260_20141029_205951_outLine +BABEL_OP3_403_88812_20150307_181013_inLine +BABEL_OP3_403_88812_20150307_181013_outLine +BABEL_OP3_403_89059_20141220_191342_inLine +BABEL_OP3_403_89059_20141220_191342_outLine +BABEL_OP3_403_89358_20141030_231758_inLine +BABEL_OP3_403_89358_20141030_231758_outLine +BABEL_OP3_403_90709_20141007_234900_inLine +BABEL_OP3_403_90709_20141007_234900_outLine +BABEL_OP3_403_90739_20141028_224009_inLine +BABEL_OP3_403_90739_20141028_224009_outLine +BABEL_OP3_403_92527_20141026_192704_inLine +BABEL_OP3_403_92527_20141026_192704_outLine +BABEL_OP3_403_92740_20141130_011740_inLine +BABEL_OP3_403_92740_20141130_011740_outLine +BABEL_OP3_403_94409_20141028_214356_inLine +BABEL_OP3_403_94409_20141028_214356_outLine +BABEL_OP3_403_94449_20150309_193606_inLine +BABEL_OP3_403_94449_20150309_193606_outLine +BABEL_OP3_403_94487_20150312_014136_inLine +BABEL_OP3_403_94487_20150312_014136_outLine +BABEL_OP3_403_95583_20141013_184937_inLine +BABEL_OP3_403_95583_20141013_184937_outLine +BABEL_OP3_403_95670_20141016_214958_inLine +BABEL_OP3_403_95670_20141016_214958_outLine +BABEL_OP3_403_96525_20141217_223842_inLine +BABEL_OP3_403_96525_20141217_223842_outLine +BABEL_OP3_403_96910_20141024_195822_inLine +BABEL_OP3_403_96910_20141024_195822_outLine +BABEL_OP3_403_97376_20141206_215930_inLine +BABEL_OP3_403_97376_20141206_215930_outLine +BABEL_OP3_403_97772_20141003_213919_inLine +BABEL_OP3_403_97772_20141003_213919_outLine +BABEL_OP3_403_98311_20141005_195843_inLine +BABEL_OP3_403_98311_20141005_195843_outLine +BABEL_OP3_403_99202_20141108_002737_inLine +BABEL_OP3_403_99202_20141108_002737_outLine +BABEL_OP3_403_99955_20150107_213836_inLine +BABEL_OP3_403_99955_20150107_213836_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/training.list b/egs/babel/s5d/conf/lists/403-dholuo/training.list new file mode 100644 index 00000000000..3b32ed92b92 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/training.list @@ -0,0 +1,502 @@ +BABEL_OP3_403_10313_20150130_193605_inLine +BABEL_OP3_403_10313_20150130_193605_outLine +BABEL_OP3_403_10469_20150130_211522_inLine +BABEL_OP3_403_10469_20150130_211522_outLine +BABEL_OP3_403_10966_20141025_191612_inLine +BABEL_OP3_403_10966_20141025_191612_outLine +BABEL_OP3_403_11663_20141205_201130_inLine +BABEL_OP3_403_11663_20141205_201130_outLine +BABEL_OP3_403_11663_20141205_204332_inLine +BABEL_OP3_403_11663_20141205_204332_outLine +BABEL_OP3_403_11681_20141010_203514_inLine +BABEL_OP3_403_11681_20141010_203514_outLine +BABEL_OP3_403_11797_20141013_012556_inLine +BABEL_OP3_403_11797_20141013_012556_outLine +BABEL_OP3_403_12606_20150314_170830_inLine +BABEL_OP3_403_12606_20150314_170830_outLine +BABEL_OP3_403_12609_20150313_183914_inLine +BABEL_OP3_403_12609_20150313_183914_outLine +BABEL_OP3_403_13324_20141004_204835_inLine +BABEL_OP3_403_13324_20141004_204835_outLine +BABEL_OP3_403_13483_20141205_201059_inLine +BABEL_OP3_403_13483_20141205_201059_outLine +BABEL_OP3_403_13490_20141103_182258_inLine +BABEL_OP3_403_13490_20141103_182258_outLine +BABEL_OP3_403_14807_20141117_201842_inLine +BABEL_OP3_403_14807_20141117_201842_outLine +BABEL_OP3_403_14814_20141023_004823_inLine +BABEL_OP3_403_14814_20141023_004823_outLine +BABEL_OP3_403_14929_20141029_222741_inLine +BABEL_OP3_403_14929_20141029_222741_outLine +BABEL_OP3_403_15281_20150307_201454_inLine +BABEL_OP3_403_15281_20150307_201454_outLine +BABEL_OP3_403_15322_20150319_184826_inLine +BABEL_OP3_403_15322_20150319_184826_outLine +BABEL_OP3_403_15702_20141202_010728_inLine +BABEL_OP3_403_15702_20141202_010728_outLine +BABEL_OP3_403_16475_20141027_225406_inLine +BABEL_OP3_403_16475_20141027_225406_outLine +BABEL_OP3_403_16749_20141221_021003_inLine +BABEL_OP3_403_16749_20141221_021003_outLine +BABEL_OP3_403_16800_20141216_181241_inLine +BABEL_OP3_403_16800_20141216_181241_outLine +BABEL_OP3_403_16839_20141219_222837_inLine +BABEL_OP3_403_16839_20141219_222837_outLine +BABEL_OP3_403_16924_20141202_004432_inLine +BABEL_OP3_403_16924_20141202_004432_outLine +BABEL_OP3_403_16938_20141117_002438_inLine +BABEL_OP3_403_16938_20141117_002438_outLine +BABEL_OP3_403_17280_20141027_223651_inLine +BABEL_OP3_403_17280_20141027_223651_outLine +BABEL_OP3_403_17496_20141202_004936_inLine +BABEL_OP3_403_17496_20141202_004936_outLine +BABEL_OP3_403_18924_20141117_193058_inLine +BABEL_OP3_403_18924_20141117_193058_outLine +BABEL_OP3_403_18939_20141007_221546_inLine +BABEL_OP3_403_18939_20141007_221546_outLine +BABEL_OP3_403_19688_20150205_181600_inLine +BABEL_OP3_403_19688_20150205_181600_outLine +BABEL_OP3_403_19703_20141024_194336_inLine +BABEL_OP3_403_19703_20141024_194336_outLine +BABEL_OP3_403_19722_20141013_214859_inLine +BABEL_OP3_403_19722_20141013_214859_outLine +BABEL_OP3_403_19749_20150211_210545_inLine +BABEL_OP3_403_19749_20150211_210545_outLine +BABEL_OP3_403_20133_20141001_221247_inLine +BABEL_OP3_403_20133_20141001_221247_outLine +BABEL_OP3_403_20916_20141003_230543_inLine +BABEL_OP3_403_20916_20141003_230543_outLine +BABEL_OP3_403_21004_20141210_214422_inLine +BABEL_OP3_403_21004_20141210_214422_outLine +BABEL_OP3_403_21206_20141004_231905_inLine +BABEL_OP3_403_21206_20141004_231905_outLine +BABEL_OP3_403_21327_20141207_194922_inLine +BABEL_OP3_403_21327_20141207_194922_outLine +BABEL_OP3_403_21892_20141216_205644_inLine +BABEL_OP3_403_21892_20141216_205644_outLine +BABEL_OP3_403_22321_20141012_234503_inLine +BABEL_OP3_403_22321_20141012_234503_outLine +BABEL_OP3_403_22643_20150131_005325_inLine +BABEL_OP3_403_22643_20150131_005325_outLine +BABEL_OP3_403_22918_20150305_185811_inLine +BABEL_OP3_403_22918_20150305_185811_outLine +BABEL_OP3_403_22965_20141005_232023_inLine +BABEL_OP3_403_22965_20141005_232023_outLine +BABEL_OP3_403_23006_20141024_182721_inLine +BABEL_OP3_403_23006_20141024_182721_outLine +BABEL_OP3_403_23046_20141023_002436_inLine +BABEL_OP3_403_23046_20141023_002436_outLine +BABEL_OP3_403_23893_20150311_211416_inLine +BABEL_OP3_403_23893_20150311_211416_outLine +BABEL_OP3_403_24017_20141218_235010_inLine +BABEL_OP3_403_24017_20141218_235010_outLine +BABEL_OP3_403_24290_20150319_170027_inLine +BABEL_OP3_403_24290_20150319_170027_outLine +BABEL_OP3_403_24589_20141023_173937_inLine +BABEL_OP3_403_24589_20141023_173937_outLine +BABEL_OP3_403_25015_20150312_185754_inLine +BABEL_OP3_403_25015_20150312_185754_outLine +BABEL_OP3_403_25242_20150129_211027_inLine +BABEL_OP3_403_25242_20150129_211027_outLine +BABEL_OP3_403_25767_20141007_010749_inLine +BABEL_OP3_403_25767_20141007_010749_outLine +BABEL_OP3_403_25961_20141016_201537_inLine +BABEL_OP3_403_25961_20141016_201537_outLine +BABEL_OP3_403_26072_20150107_183553_inLine +BABEL_OP3_403_26072_20150107_183553_outLine +BABEL_OP3_403_27125_20141005_201825_inLine +BABEL_OP3_403_27125_20141005_201825_outLine +BABEL_OP3_403_27367_20150201_011720_inLine +BABEL_OP3_403_27367_20150201_011720_outLine +BABEL_OP3_403_28190_20141218_232404_inLine +BABEL_OP3_403_28190_20141218_232404_outLine +BABEL_OP3_403_28522_20141129_232715_inLine +BABEL_OP3_403_28522_20141129_232715_outLine +BABEL_OP3_403_28775_20141005_002735_inLine +BABEL_OP3_403_28775_20141005_002735_outLine +BABEL_OP3_403_28814_20141220_183346_inLine +BABEL_OP3_403_28814_20141220_183346_outLine +BABEL_OP3_403_28945_20141006_202723_inLine +BABEL_OP3_403_28945_20141006_202723_outLine +BABEL_OP3_403_29168_20141013_195745_inLine +BABEL_OP3_403_29168_20141013_195745_outLine +BABEL_OP3_403_29323_20150303_223419_inLine +BABEL_OP3_403_29323_20150303_223419_outLine +BABEL_OP3_403_31109_20141107_213704_inLine +BABEL_OP3_403_31109_20141107_213704_outLine +BABEL_OP3_403_31346_20141217_195511_inLine +BABEL_OP3_403_31346_20141217_195511_outLine +BABEL_OP3_403_31490_20141006_210241_inLine +BABEL_OP3_403_31490_20141006_210241_outLine +BABEL_OP3_403_31624_20141014_211203_inLine +BABEL_OP3_403_31624_20141014_211203_outLine +BABEL_OP3_403_31919_20150306_024725_inLine +BABEL_OP3_403_31919_20150306_024725_outLine +BABEL_OP3_403_32301_20141202_224639_inLine +BABEL_OP3_403_32301_20141202_224639_outLine +BABEL_OP3_403_32328_20141218_000510_inLine +BABEL_OP3_403_32328_20141218_000510_outLine +BABEL_OP3_403_32861_20150107_160647_inLine +BABEL_OP3_403_32861_20150107_160647_outLine +BABEL_OP3_403_33840_20141219_200146_inLine +BABEL_OP3_403_33840_20141219_200146_outLine +BABEL_OP3_403_33913_20141211_183339_inLine +BABEL_OP3_403_33913_20141211_183339_outLine +BABEL_OP3_403_34197_20141117_004710_inLine +BABEL_OP3_403_34197_20141117_004710_outLine +BABEL_OP3_403_34477_20141023_231734_inLine +BABEL_OP3_403_34477_20141023_231734_outLine +BABEL_OP3_403_35008_20141204_201241_inLine +BABEL_OP3_403_35008_20141204_201241_outLine +BABEL_OP3_403_35139_20141004_205621_inLine +BABEL_OP3_403_35139_20141004_205621_outLine +BABEL_OP3_403_35143_20141210_230830_inLine +BABEL_OP3_403_35143_20141210_230830_outLine +BABEL_OP3_403_35467_20141004_175833_inLine +BABEL_OP3_403_35467_20141004_175833_outLine +BABEL_OP3_403_35583_20150108_024439_inLine +BABEL_OP3_403_35583_20150108_024439_outLine +BABEL_OP3_403_36293_20141015_192540_inLine +BABEL_OP3_403_36293_20141015_192540_outLine +BABEL_OP3_403_36632_20150206_041325_inLine +BABEL_OP3_403_36632_20150206_041325_outLine +BABEL_OP3_403_36990_20141030_231441_inLine +BABEL_OP3_403_36990_20141030_231441_outLine +BABEL_OP3_403_37228_20150108_000217_inLine +BABEL_OP3_403_37228_20150108_000217_outLine +BABEL_OP3_403_37682_20141028_002656_inLine +BABEL_OP3_403_37682_20141028_002656_outLine +BABEL_OP3_403_37853_20150303_020840_inLine +BABEL_OP3_403_37853_20150303_020840_outLine +BABEL_OP3_403_38588_20141026_220103_inLine +BABEL_OP3_403_38588_20141026_220103_outLine +BABEL_OP3_403_38664_20141028_005703_inLine +BABEL_OP3_403_38664_20141028_005703_outLine +BABEL_OP3_403_38689_20141204_215950_inLine +BABEL_OP3_403_38689_20141204_215950_outLine +BABEL_OP3_403_38741_20141005_205401_inLine +BABEL_OP3_403_38741_20141005_205401_outLine +BABEL_OP3_403_38878_20141121_184540_inLine +BABEL_OP3_403_38878_20141121_184540_outLine +BABEL_OP3_403_39099_20150306_193032_inLine +BABEL_OP3_403_39099_20150306_193032_outLine +BABEL_OP3_403_39307_20141014_234344_inLine +BABEL_OP3_403_39307_20141014_234344_outLine +BABEL_OP3_403_39555_20141217_205213_inLine +BABEL_OP3_403_39555_20141217_205213_outLine +BABEL_OP3_403_41442_20141216_224519_inLine +BABEL_OP3_403_41442_20141216_224519_outLine +BABEL_OP3_403_41958_20141026_202739_inLine +BABEL_OP3_403_41958_20141026_202739_outLine +BABEL_OP3_403_42434_20141026_001223_inLine +BABEL_OP3_403_42434_20141026_001223_outLine +BABEL_OP3_403_42718_20150306_202240_inLine +BABEL_OP3_403_42718_20150306_202240_outLine +BABEL_OP3_403_42771_20141104_215437_inLine +BABEL_OP3_403_42771_20141104_215437_outLine +BABEL_OP3_403_43368_20141026_000458_inLine +BABEL_OP3_403_43368_20141026_000458_outLine +BABEL_OP3_403_43784_20141005_193431_inLine +BABEL_OP3_403_43784_20141005_193431_outLine +BABEL_OP3_403_43788_20141202_235051_inLine +BABEL_OP3_403_43788_20141202_235051_outLine +BABEL_OP3_403_44309_20150305_200025_inLine +BABEL_OP3_403_44309_20150305_200025_outLine +BABEL_OP3_403_44478_20150307_223313_inLine +BABEL_OP3_403_44478_20150307_223313_outLine +BABEL_OP3_403_44868_20141129_220211_inLine +BABEL_OP3_403_44868_20141129_220211_outLine +BABEL_OP3_403_44961_20141006_233622_inLine +BABEL_OP3_403_44961_20141006_233622_outLine +BABEL_OP3_403_44961_20141006_235203_inLine +BABEL_OP3_403_44961_20141006_235203_outLine +BABEL_OP3_403_45486_20150130_235157_inLine +BABEL_OP3_403_45486_20150130_235157_outLine +BABEL_OP3_403_45536_20141219_005329_inLine +BABEL_OP3_403_45536_20141219_005329_outLine +BABEL_OP3_403_46008_20150307_190844_inLine +BABEL_OP3_403_46008_20150307_190844_outLine +BABEL_OP3_403_46041_20141217_222544_inLine +BABEL_OP3_403_46041_20141217_222544_outLine +BABEL_OP3_403_46310_20141012_204940_inLine +BABEL_OP3_403_46310_20141012_204940_outLine +BABEL_OP3_403_46757_20141202_212733_inLine +BABEL_OP3_403_46757_20141202_212733_outLine +BABEL_OP3_403_47215_20141007_230222_inLine +BABEL_OP3_403_47215_20141007_230222_outLine +BABEL_OP3_403_47283_20141005_204650_inLine +BABEL_OP3_403_47283_20141005_204650_outLine +BABEL_OP3_403_47487_20141025_235747_inLine +BABEL_OP3_403_47487_20141025_235747_outLine +BABEL_OP3_403_47866_20150317_213617_inLine +BABEL_OP3_403_47866_20150317_213617_outLine +BABEL_OP3_403_47878_20141118_193135_inLine +BABEL_OP3_403_47878_20141118_193135_outLine +BABEL_OP3_403_48243_20141004_221542_inLine +BABEL_OP3_403_48243_20141004_221542_outLine +BABEL_OP3_403_48610_20141007_225901_inLine +BABEL_OP3_403_48610_20141007_225901_outLine +BABEL_OP3_403_48663_20150306_181741_inLine +BABEL_OP3_403_48663_20150306_181741_outLine +BABEL_OP3_403_48844_20141007_004947_inLine +BABEL_OP3_403_48844_20141007_004947_outLine +BABEL_OP3_403_48844_20141007_011027_inLine +BABEL_OP3_403_48844_20141007_011027_outLine +BABEL_OP3_403_49027_20150307_230828_inLine +BABEL_OP3_403_49027_20150307_230828_outLine +BABEL_OP3_403_49630_20141205_233804_inLine +BABEL_OP3_403_49630_20141205_233804_outLine +BABEL_OP3_403_49768_20141026_000059_inLine +BABEL_OP3_403_49768_20141026_000059_outLine +BABEL_OP3_403_49907_20141005_215057_inLine +BABEL_OP3_403_49907_20141005_215057_outLine +BABEL_OP3_403_50427_20141116_233807_inLine +BABEL_OP3_403_50427_20141116_233807_outLine +BABEL_OP3_403_50549_20150304_014353_inLine +BABEL_OP3_403_50549_20150304_014353_outLine +BABEL_OP3_403_50779_20141118_221929_inLine +BABEL_OP3_403_50779_20141118_221929_outLine +BABEL_OP3_403_50779_20141118_230132_inLine +BABEL_OP3_403_50779_20141118_230132_outLine +BABEL_OP3_403_51955_20141004_212210_inLine +BABEL_OP3_403_51955_20141004_212210_outLine +BABEL_OP3_403_52490_20141016_230923_inLine +BABEL_OP3_403_52490_20141016_230923_outLine +BABEL_OP3_403_52717_20141008_003843_inLine +BABEL_OP3_403_52717_20141008_003843_outLine +BABEL_OP3_403_53063_20141207_192558_inLine +BABEL_OP3_403_53063_20141207_192558_outLine +BABEL_OP3_403_53063_20141207_194007_inLine +BABEL_OP3_403_53063_20141207_194007_outLine +BABEL_OP3_403_53842_20141031_193507_inLine +BABEL_OP3_403_53842_20141031_193507_outLine +BABEL_OP3_403_54104_20141006_230139_inLine +BABEL_OP3_403_54104_20141006_230139_outLine +BABEL_OP3_403_54104_20141006_230643_inLine +BABEL_OP3_403_54104_20141006_230643_outLine +BABEL_OP3_403_54162_20141103_190601_inLine +BABEL_OP3_403_54162_20141103_190601_outLine +BABEL_OP3_403_54477_20141216_200349_inLine +BABEL_OP3_403_54477_20141216_200349_outLine +BABEL_OP3_403_54477_20141216_213534_inLine +BABEL_OP3_403_54477_20141216_213534_outLine +BABEL_OP3_403_54530_20141217_220934_inLine +BABEL_OP3_403_54530_20141217_220934_outLine +BABEL_OP3_403_54594_20150204_003149_inLine +BABEL_OP3_403_54594_20150204_003149_outLine +BABEL_OP3_403_55259_20141025_175845_inLine +BABEL_OP3_403_55259_20141025_175845_outLine +BABEL_OP3_403_55950_20150312_174125_inLine +BABEL_OP3_403_55950_20150312_174125_outLine +BABEL_OP3_403_55968_20141004_005950_inLine +BABEL_OP3_403_55968_20141004_005950_outLine +BABEL_OP3_403_56198_20141005_222956_inLine +BABEL_OP3_403_56198_20141005_222956_outLine +BABEL_OP3_403_56326_20150129_020103_inLine +BABEL_OP3_403_56326_20150129_020103_outLine +BABEL_OP3_403_57093_20141103_221842_inLine +BABEL_OP3_403_57093_20141103_221842_outLine +BABEL_OP3_403_57141_20141215_224302_inLine +BABEL_OP3_403_57141_20141215_224302_outLine +BABEL_OP3_403_57529_20141207_002135_inLine +BABEL_OP3_403_57529_20141207_002135_outLine +BABEL_OP3_403_58047_20141118_184454_inLine +BABEL_OP3_403_58047_20141118_184454_outLine +BABEL_OP3_403_58585_20150106_172737_inLine +BABEL_OP3_403_58585_20150106_172737_outLine +BABEL_OP3_403_59262_20141216_193024_inLine +BABEL_OP3_403_59262_20141216_193024_outLine +BABEL_OP3_403_60115_20141129_235248_inLine +BABEL_OP3_403_60115_20141129_235248_outLine +BABEL_OP3_403_60310_20141217_205059_inLine +BABEL_OP3_403_60310_20141217_205059_outLine +BABEL_OP3_403_60418_20141129_235907_inLine +BABEL_OP3_403_60418_20141129_235907_outLine +BABEL_OP3_403_60650_20150131_013236_inLine +BABEL_OP3_403_60650_20150131_013236_outLine +BABEL_OP3_403_61348_20141103_230857_inLine +BABEL_OP3_403_61348_20141103_230857_outLine +BABEL_OP3_403_61678_20141003_231023_inLine +BABEL_OP3_403_61678_20141003_231023_outLine +BABEL_OP3_403_61731_20141005_201612_inLine +BABEL_OP3_403_61731_20141005_201612_outLine +BABEL_OP3_403_61971_20150307_004145_inLine +BABEL_OP3_403_61971_20150307_004145_outLine +BABEL_OP3_403_62014_20141127_180004_inLine +BABEL_OP3_403_62014_20141127_180004_outLine +BABEL_OP3_403_62734_20141025_192117_inLine +BABEL_OP3_403_62734_20141025_192117_outLine +BABEL_OP3_403_62810_20141016_191619_inLine +BABEL_OP3_403_62810_20141016_191619_outLine +BABEL_OP3_403_63670_20141215_221926_inLine +BABEL_OP3_403_63670_20141215_221926_outLine +BABEL_OP3_403_63787_20141006_214400_inLine +BABEL_OP3_403_63787_20141006_214400_outLine +BABEL_OP3_403_63906_20150305_205105_inLine +BABEL_OP3_403_63906_20150305_205105_outLine +BABEL_OP3_403_65367_20150108_004325_inLine +BABEL_OP3_403_65367_20150108_004325_outLine +BABEL_OP3_403_65640_20150314_163101_inLine +BABEL_OP3_403_65640_20150314_163101_outLine +BABEL_OP3_403_66001_20141007_230508_inLine +BABEL_OP3_403_66001_20141007_230508_outLine +BABEL_OP3_403_66822_20141029_224921_inLine +BABEL_OP3_403_66822_20141029_224921_outLine +BABEL_OP3_403_66916_20141015_215414_inLine +BABEL_OP3_403_66916_20141015_215414_outLine +BABEL_OP3_403_67622_20141014_193846_inLine +BABEL_OP3_403_67622_20141014_193846_outLine +BABEL_OP3_403_67659_20141023_013756_inLine +BABEL_OP3_403_67659_20141023_013756_outLine +BABEL_OP3_403_68384_20141216_000507_inLine +BABEL_OP3_403_68384_20141216_000507_outLine +BABEL_OP3_403_68748_20141130_014650_inLine +BABEL_OP3_403_68748_20141130_014650_outLine +BABEL_OP3_403_68854_20150306_195508_inLine +BABEL_OP3_403_68854_20150306_195508_outLine +BABEL_OP3_403_69096_20150309_190140_inLine +BABEL_OP3_403_69096_20150309_190140_outLine +BABEL_OP3_403_69107_20141106_000151_inLine +BABEL_OP3_403_69107_20141106_000151_outLine +BABEL_OP3_403_69746_20141220_191513_inLine +BABEL_OP3_403_69746_20141220_191513_outLine +BABEL_OP3_403_70121_20141026_225432_inLine +BABEL_OP3_403_70121_20141026_225432_outLine +BABEL_OP3_403_70216_20150128_234110_inLine +BABEL_OP3_403_70216_20150128_234110_outLine +BABEL_OP3_403_70257_20150204_032020_inLine +BABEL_OP3_403_70257_20150204_032020_outLine +BABEL_OP3_403_70343_20141205_225856_inLine +BABEL_OP3_403_70343_20141205_225856_outLine +BABEL_OP3_403_71047_20150106_190413_inLine +BABEL_OP3_403_71047_20150106_190413_outLine +BABEL_OP3_403_72007_20141205_002010_inLine +BABEL_OP3_403_72007_20141205_002010_outLine +BABEL_OP3_403_72040_20141006_004959_inLine +BABEL_OP3_403_72040_20141006_004959_outLine +BABEL_OP3_403_72110_20141210_212045_inLine +BABEL_OP3_403_72110_20141210_212045_outLine +BABEL_OP3_403_72844_20141004_005248_inLine +BABEL_OP3_403_72844_20141004_005248_outLine +BABEL_OP3_403_73430_20141205_233006_inLine +BABEL_OP3_403_73430_20141205_233006_outLine +BABEL_OP3_403_73591_20140930_234521_inLine +BABEL_OP3_403_73591_20140930_234521_outLine +BABEL_OP3_403_74728_20150312_182026_inLine +BABEL_OP3_403_74728_20150312_182026_outLine +BABEL_OP3_403_75064_20141022_225629_inLine +BABEL_OP3_403_75064_20141022_225629_outLine +BABEL_OP3_403_75223_20141016_194054_inLine +BABEL_OP3_403_75223_20141016_194054_outLine +BABEL_OP3_403_76499_20141103_232220_inLine +BABEL_OP3_403_76499_20141103_232220_outLine +BABEL_OP3_403_77427_20141027_223134_inLine +BABEL_OP3_403_77427_20141027_223134_outLine +BABEL_OP3_403_77974_20150312_200046_inLine +BABEL_OP3_403_77974_20150312_200046_outLine +BABEL_OP3_403_77990_20141004_201020_inLine +BABEL_OP3_403_77990_20141004_201020_outLine +BABEL_OP3_403_78116_20141208_213333_inLine +BABEL_OP3_403_78116_20141208_213333_outLine +BABEL_OP3_403_78116_20141208_214155_inLine +BABEL_OP3_403_78116_20141208_214155_outLine +BABEL_OP3_403_78254_20141024_234037_inLine +BABEL_OP3_403_78254_20141024_234037_outLine +BABEL_OP3_403_78360_20150107_231519_inLine +BABEL_OP3_403_78360_20150107_231519_outLine +BABEL_OP3_403_78544_20141201_192016_inLine +BABEL_OP3_403_78544_20141201_192016_outLine +BABEL_OP3_403_78604_20141006_193457_inLine +BABEL_OP3_403_78604_20141006_193457_outLine +BABEL_OP3_403_78833_20150205_204459_inLine +BABEL_OP3_403_78833_20150205_204459_outLine +BABEL_OP3_403_80439_20141023_195331_inLine +BABEL_OP3_403_80439_20141023_195331_outLine +BABEL_OP3_403_80781_20141026_214157_inLine +BABEL_OP3_403_80781_20141026_214157_outLine +BABEL_OP3_403_81149_20150313_000213_inLine +BABEL_OP3_403_81149_20150313_000213_outLine +BABEL_OP3_403_81213_20141004_213211_inLine +BABEL_OP3_403_81213_20141004_213211_outLine +BABEL_OP3_403_82391_20141206_001207_inLine +BABEL_OP3_403_82391_20141206_001207_outLine +BABEL_OP3_403_82425_20141007_231028_inLine +BABEL_OP3_403_82425_20141007_231028_outLine +BABEL_OP3_403_83238_20141107_233257_inLine +BABEL_OP3_403_83238_20141107_233257_outLine +BABEL_OP3_403_83436_20141012_221126_inLine +BABEL_OP3_403_83436_20141012_221126_outLine +BABEL_OP3_403_83455_20141103_225146_inLine +BABEL_OP3_403_83455_20141103_225146_outLine +BABEL_OP3_403_83651_20141005_194737_inLine +BABEL_OP3_403_83651_20141005_194737_outLine +BABEL_OP3_403_84194_20150204_213858_inLine +BABEL_OP3_403_84194_20150204_213858_outLine +BABEL_OP3_403_84458_20141208_005012_inLine +BABEL_OP3_403_84458_20141208_005012_outLine +BABEL_OP3_403_84469_20141211_002526_inLine +BABEL_OP3_403_84469_20141211_002526_outLine +BABEL_OP3_403_84547_20141013_223556_inLine +BABEL_OP3_403_84547_20141013_223556_outLine +BABEL_OP3_403_84605_20141005_214529_inLine +BABEL_OP3_403_84605_20141005_214529_outLine +BABEL_OP3_403_84737_20150303_195506_inLine +BABEL_OP3_403_84737_20150303_195506_outLine +BABEL_OP3_403_85647_20141103_192225_inLine +BABEL_OP3_403_85647_20141103_192225_outLine +BABEL_OP3_403_86845_20150201_015753_inLine +BABEL_OP3_403_86845_20150201_015753_outLine +BABEL_OP3_403_87889_20150107_001827_inLine +BABEL_OP3_403_87889_20150107_001827_outLine +BABEL_OP3_403_87921_20141210_233414_inLine +BABEL_OP3_403_87921_20141210_233414_outLine +BABEL_OP3_403_88260_20141029_205951_inLine +BABEL_OP3_403_88260_20141029_205951_outLine +BABEL_OP3_403_88812_20150307_181013_inLine +BABEL_OP3_403_88812_20150307_181013_outLine +BABEL_OP3_403_89059_20141220_191342_inLine +BABEL_OP3_403_89059_20141220_191342_outLine +BABEL_OP3_403_89358_20141030_231758_inLine +BABEL_OP3_403_89358_20141030_231758_outLine +BABEL_OP3_403_90709_20141007_234900_inLine +BABEL_OP3_403_90709_20141007_234900_outLine +BABEL_OP3_403_90739_20141028_224009_inLine +BABEL_OP3_403_90739_20141028_224009_outLine +BABEL_OP3_403_92509_20141014_232528_inLine +BABEL_OP3_403_92509_20141014_232528_outLine +BABEL_OP3_403_92527_20141026_192704_inLine +BABEL_OP3_403_92527_20141026_192704_outLine +BABEL_OP3_403_92740_20141130_011740_inLine +BABEL_OP3_403_92740_20141130_011740_outLine +BABEL_OP3_403_94409_20141028_214356_inLine +BABEL_OP3_403_94409_20141028_214356_outLine +BABEL_OP3_403_94449_20150309_193606_inLine +BABEL_OP3_403_94449_20150309_193606_outLine +BABEL_OP3_403_94487_20150312_014136_inLine +BABEL_OP3_403_94487_20150312_014136_outLine +BABEL_OP3_403_95269_20141026_235206_inLine +BABEL_OP3_403_95269_20141026_235206_outLine +BABEL_OP3_403_95583_20141013_184937_inLine +BABEL_OP3_403_95583_20141013_184937_outLine +BABEL_OP3_403_95670_20141016_214958_inLine +BABEL_OP3_403_95670_20141016_214958_outLine +BABEL_OP3_403_96324_20141014_194024_inLine +BABEL_OP3_403_96324_20141014_194024_outLine +BABEL_OP3_403_96525_20141217_223842_inLine +BABEL_OP3_403_96525_20141217_223842_outLine +BABEL_OP3_403_96910_20141024_195822_inLine +BABEL_OP3_403_96910_20141024_195822_outLine +BABEL_OP3_403_97376_20141206_215930_inLine +BABEL_OP3_403_97376_20141206_215930_outLine +BABEL_OP3_403_97588_20141015_193851_inLine +BABEL_OP3_403_97588_20141015_193851_outLine +BABEL_OP3_403_97772_20141003_213919_inLine +BABEL_OP3_403_97772_20141003_213919_outLine +BABEL_OP3_403_98311_20141005_195843_inLine +BABEL_OP3_403_98311_20141005_195843_outLine +BABEL_OP3_403_98506_20150319_151741_inLine +BABEL_OP3_403_98506_20150319_151741_outLine +BABEL_OP3_403_99202_20141108_002737_inLine +BABEL_OP3_403_99202_20141108_002737_outLine +BABEL_OP3_403_99955_20150107_213836_inLine +BABEL_OP3_403_99955_20150107_213836_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/untranscribed-training.list b/egs/babel/s5d/conf/lists/403-dholuo/untranscribed-training.list new file mode 100644 index 00000000000..21ec3e2d9b4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/untranscribed-training.list @@ -0,0 +1,533 @@ +BABEL_OP3_403_10319_20141014_223750_inLine +BABEL_OP3_403_10319_20141014_223750_outLine +BABEL_OP3_403_10901_20141103_193402_inLine +BABEL_OP3_403_10901_20141103_193402_outLine +BABEL_OP3_403_10974_20141107_215600_inLine +BABEL_OP3_403_10974_20141107_215600_outLine +BABEL_OP3_403_11673_20141013_203235_inLine +BABEL_OP3_403_11673_20141013_203235_outLine +BABEL_OP3_403_12767_20141006_233130_inLine +BABEL_OP3_403_12767_20141006_233130_outLine +BABEL_OP3_403_13561_20141106_192514_inLine +BABEL_OP3_403_13561_20141106_192514_outLine +BABEL_OP3_403_13664_20141002_224345_inLine +BABEL_OP3_403_13664_20141002_224345_outLine +BABEL_OP3_403_14028_20150407_193852_inLine +BABEL_OP3_403_14028_20150407_193852_outLine +BABEL_OP3_403_14158_20141201_232657_inLine +BABEL_OP3_403_14179_20141129_222039_inLine +BABEL_OP3_403_14179_20141129_222039_outLine +BABEL_OP3_403_14719_20141215_230523_inLine +BABEL_OP3_403_14719_20141215_230523_outLine +BABEL_OP3_403_14723_20150205_004549_inLine +BABEL_OP3_403_14723_20150205_004549_outLine +BABEL_OP3_403_14875_20141005_213932_inLine +BABEL_OP3_403_14875_20141005_213932_outLine +BABEL_OP3_403_14972_20141116_221959_inLine +BABEL_OP3_403_14972_20141116_221959_outLine +BABEL_OP3_403_15216_20141219_215848_inLine +BABEL_OP3_403_15216_20141219_215848_outLine +BABEL_OP3_403_15382_20141127_202406_inLine +BABEL_OP3_403_15382_20141127_202406_outLine +BABEL_OP3_403_15749_20141218_230749_inLine +BABEL_OP3_403_15749_20141218_230749_outLine +BABEL_OP3_403_16787_20141029_182118_inLine +BABEL_OP3_403_16787_20141029_182118_outLine +BABEL_OP3_403_17472_20141210_175708_inLine +BABEL_OP3_403_17472_20141210_175708_outLine +BABEL_OP3_403_17472_20141210_180354_inLine +BABEL_OP3_403_17472_20141210_180354_outLine +BABEL_OP3_403_17615_20141201_004945_inLine +BABEL_OP3_403_17615_20141201_004945_outLine +BABEL_OP3_403_17890_20141203_003513_inLine +BABEL_OP3_403_17890_20141203_003513_outLine +BABEL_OP3_403_18037_20150418_210458_inLine +BABEL_OP3_403_18037_20150418_210458_outLine +BABEL_OP3_403_18380_20141031_010759_inLine +BABEL_OP3_403_18380_20141031_010759_outLine +BABEL_OP3_403_19101_20141106_010120_inLine +BABEL_OP3_403_19101_20141106_010120_outLine +BABEL_OP3_403_19134_20141120_182846_inLine +BABEL_OP3_403_19134_20141120_182846_outLine +BABEL_OP3_403_19672_20141130_215944_outLine +BABEL_OP3_403_20330_20150304_212548_inLine +BABEL_OP3_403_20330_20150304_212548_outLine +BABEL_OP3_403_20724_20150414_221749_inLine +BABEL_OP3_403_20724_20150414_221749_outLine +BABEL_OP3_403_20768_20141215_213303_inLine +BABEL_OP3_403_20768_20141215_213303_outLine +BABEL_OP3_403_22641_20141016_232215_inLine +BABEL_OP3_403_22641_20141016_232215_outLine +BABEL_OP3_403_23098_20150413_232746_inLine +BABEL_OP3_403_23098_20150413_232746_outLine +BABEL_OP3_403_23395_20141121_205256_outLine +BABEL_OP3_403_23505_20141008_003349_inLine +BABEL_OP3_403_23505_20141008_003349_outLine +BABEL_OP3_403_23628_20141023_001612_outLine +BABEL_OP3_403_23980_20141029_203114_inLine +BABEL_OP3_403_23980_20141029_203114_outLine +BABEL_OP3_403_24239_20150310_212947_inLine +BABEL_OP3_403_24239_20150310_212947_outLine +BABEL_OP3_403_24239_20150310_213506_inLine +BABEL_OP3_403_24239_20150310_213506_outLine +BABEL_OP3_403_24239_20150310_214027_inLine +BABEL_OP3_403_24239_20150310_214027_outLine +BABEL_OP3_403_24270_20141118_203034_inLine +BABEL_OP3_403_24270_20141118_203034_outLine +BABEL_OP3_403_25719_20141220_201504_inLine +BABEL_OP3_403_25719_20141220_201504_outLine +BABEL_OP3_403_25895_20150413_214536_inLine +BABEL_OP3_403_25895_20150413_214536_outLine +BABEL_OP3_403_26836_20141005_221645_inLine +BABEL_OP3_403_26836_20141005_221645_outLine +BABEL_OP3_403_26869_20150416_200345_inLine +BABEL_OP3_403_26869_20150416_201016_inLine +BABEL_OP3_403_27046_20150406_230902_inLine +BABEL_OP3_403_27046_20150406_230902_outLine +BABEL_OP3_403_27082_20141103_211126_inLine +BABEL_OP3_403_27082_20141103_211126_outLine +BABEL_OP3_403_28422_20141130_174755_inLine +BABEL_OP3_403_28422_20141130_174755_outLine +BABEL_OP3_403_28538_20141031_223359_inLine +BABEL_OP3_403_28538_20141031_223359_outLine +BABEL_OP3_403_29072_20141202_234748_inLine +BABEL_OP3_403_29072_20141202_234748_outLine +BABEL_OP3_403_29663_20150415_221129_inLine +BABEL_OP3_403_29663_20150415_221129_outLine +BABEL_OP3_403_29663_20150415_221719_inLine +BABEL_OP3_403_29663_20150415_221719_outLine +BABEL_OP3_403_29685_20141026_200131_inLine +BABEL_OP3_403_29685_20141026_200131_outLine +BABEL_OP3_403_30013_20141203_194552_inLine +BABEL_OP3_403_30013_20141203_194552_outLine +BABEL_OP3_403_30426_20150412_233250_inLine +BABEL_OP3_403_30426_20150412_233250_outLine +BABEL_OP3_403_30645_20141006_230955_inLine +BABEL_OP3_403_30645_20141006_230955_outLine +BABEL_OP3_403_30653_20150318_184710_inLine +BABEL_OP3_403_30653_20150318_184710_outLine +BABEL_OP3_403_30869_20141220_202310_inLine +BABEL_OP3_403_30869_20141220_202310_outLine +BABEL_OP3_403_31182_20141218_211632_outLine +BABEL_OP3_403_31628_20141201_204314_inLine +BABEL_OP3_403_31628_20141201_204314_outLine +BABEL_OP3_403_32169_20150415_234749_inLine +BABEL_OP3_403_32169_20150415_234749_outLine +BABEL_OP3_403_32914_20141219_204204_inLine +BABEL_OP3_403_32914_20141219_204204_outLine +BABEL_OP3_403_33704_20141216_231752_inLine +BABEL_OP3_403_33704_20141216_231752_outLine +BABEL_OP3_403_33933_20150426_001110_inLine +BABEL_OP3_403_33933_20150426_001110_outLine +BABEL_OP3_403_34145_20141129_000102_inLine +BABEL_OP3_403_34145_20141129_000102_outLine +BABEL_OP3_403_34328_20141031_214721_inLine +BABEL_OP3_403_34328_20141031_214721_outLine +BABEL_OP3_403_34903_20141127_193345_inLine +BABEL_OP3_403_34903_20141127_193345_outLine +BABEL_OP3_403_35202_20141202_231605_inLine +BABEL_OP3_403_35202_20141202_231605_outLine +BABEL_OP3_403_36894_20141014_211920_inLine +BABEL_OP3_403_36894_20141014_211920_outLine +BABEL_OP3_403_37064_20141005_205701_inLine +BABEL_OP3_403_37064_20141005_205701_outLine +BABEL_OP3_403_37271_20141217_183739_inLine +BABEL_OP3_403_37271_20141217_183739_outLine +BABEL_OP3_403_38076_20141128_204027_inLine +BABEL_OP3_403_38076_20141128_204027_outLine +BABEL_OP3_403_38431_20141215_205449_inLine +BABEL_OP3_403_38431_20141215_205449_outLine +BABEL_OP3_403_38554_20141003_222444_outLine +BABEL_OP3_403_39426_20150304_222409_inLine +BABEL_OP3_403_39426_20150304_222409_outLine +BABEL_OP3_403_39426_20150304_223342_inLine +BABEL_OP3_403_39426_20150304_223342_outLine +BABEL_OP3_403_39744_20141013_213656_inLine +BABEL_OP3_403_39744_20141013_213656_outLine +BABEL_OP3_403_40565_20141203_003932_inLine +BABEL_OP3_403_40565_20141203_003932_outLine +BABEL_OP3_403_40740_20141220_223558_inLine +BABEL_OP3_403_40740_20141220_223558_outLine +BABEL_OP3_403_41038_20141201_222108_inLine +BABEL_OP3_403_41038_20141201_222108_outLine +BABEL_OP3_403_41174_20141028_231225_inLine +BABEL_OP3_403_41174_20141028_231225_outLine +BABEL_OP3_403_41272_20150312_024107_inLine +BABEL_OP3_403_41272_20150312_024107_outLine +BABEL_OP3_403_41493_20141006_010651_inLine +BABEL_OP3_403_41493_20141006_010651_outLine +BABEL_OP3_403_41745_20141027_234835_inLine +BABEL_OP3_403_41745_20141027_234835_outLine +BABEL_OP3_403_42155_20141116_204154_inLine +BABEL_OP3_403_42155_20141116_204154_outLine +BABEL_OP3_403_42526_20150106_224056_inLine +BABEL_OP3_403_42526_20150106_224056_outLine +BABEL_OP3_403_42834_20141128_220047_inLine +BABEL_OP3_403_42834_20141128_220047_outLine +BABEL_OP3_403_42942_20141030_235147_inLine +BABEL_OP3_403_42942_20141030_235147_outLine +BABEL_OP3_403_44255_20150305_013502_inLine +BABEL_OP3_403_44347_20141219_190407_inLine +BABEL_OP3_403_44347_20141219_190407_outLine +BABEL_OP3_403_44420_20141023_214836_inLine +BABEL_OP3_403_44420_20141023_214836_outLine +BABEL_OP3_403_44477_20141201_010216_inLine +BABEL_OP3_403_44477_20141201_010216_outLine +BABEL_OP3_403_44709_20141201_213014_inLine +BABEL_OP3_403_44709_20141201_213014_outLine +BABEL_OP3_403_45106_20141120_182301_inLine +BABEL_OP3_403_45106_20141120_182301_outLine +BABEL_OP3_403_46169_20141217_192351_inLine +BABEL_OP3_403_46169_20141217_192351_outLine +BABEL_OP3_403_46333_20141014_002918_inLine +BABEL_OP3_403_46333_20141014_002918_outLine +BABEL_OP3_403_46702_20141003_234833_inLine +BABEL_OP3_403_46702_20141003_234833_outLine +BABEL_OP3_403_46712_20141023_221319_inLine +BABEL_OP3_403_46712_20141023_221319_outLine +BABEL_OP3_403_46763_20150318_203035_inLine +BABEL_OP3_403_46763_20150318_203035_outLine +BABEL_OP3_403_47959_20141024_223125_inLine +BABEL_OP3_403_47959_20141024_223125_outLine +BABEL_OP3_403_49001_20141006_010425_inLine +BABEL_OP3_403_49001_20141006_010425_outLine +BABEL_OP3_403_49641_20150402_214738_outLine +BABEL_OP3_403_49775_20141004_211924_inLine +BABEL_OP3_403_49775_20141004_211924_outLine +BABEL_OP3_403_50962_20141005_192714_outLine +BABEL_OP3_403_51156_20150416_171911_inLine +BABEL_OP3_403_51156_20150416_171911_outLine +BABEL_OP3_403_51417_20141220_002122_inLine +BABEL_OP3_403_51417_20141220_002122_outLine +BABEL_OP3_403_51540_20141219_215956_inLine +BABEL_OP3_403_51540_20141219_215956_outLine +BABEL_OP3_403_51968_20141028_223645_inLine +BABEL_OP3_403_51968_20141028_223645_outLine +BABEL_OP3_403_52442_20141105_011029_outLine +BABEL_OP3_403_52818_20141203_184905_inLine +BABEL_OP3_403_52818_20141203_184905_outLine +BABEL_OP3_403_53010_20150418_185722_inLine +BABEL_OP3_403_53010_20150418_185722_outLine +BABEL_OP3_403_53068_20150426_230124_inLine +BABEL_OP3_403_53068_20150426_230124_outLine +BABEL_OP3_403_53144_20150319_193813_inLine +BABEL_OP3_403_53144_20150319_193813_outLine +BABEL_OP3_403_54040_20141219_003109_inLine +BABEL_OP3_403_54040_20141219_003109_outLine +BABEL_OP3_403_54390_20141006_214754_inLine +BABEL_OP3_403_54390_20141006_214754_outLine +BABEL_OP3_403_54697_20141215_211116_inLine +BABEL_OP3_403_54697_20141215_211116_outLine +BABEL_OP3_403_54953_20141027_223433_inLine +BABEL_OP3_403_54953_20141027_223433_outLine +BABEL_OP3_403_55042_20150331_225750_inLine +BABEL_OP3_403_55042_20150331_225750_outLine +BABEL_OP3_403_55381_20141218_191630_inLine +BABEL_OP3_403_55381_20141218_191630_outLine +BABEL_OP3_403_55742_20141004_201921_inLine +BABEL_OP3_403_55742_20141004_204835_inLine +BABEL_OP3_403_55818_20141006_220912_inLine +BABEL_OP3_403_55818_20141006_220912_outLine +BABEL_OP3_403_56370_20141014_185314_inLine +BABEL_OP3_403_56370_20141014_185314_outLine +BABEL_OP3_403_56677_20141208_200823_inLine +BABEL_OP3_403_56677_20141208_200823_outLine +BABEL_OP3_403_56826_20141217_233213_inLine +BABEL_OP3_403_56826_20141217_233213_outLine +BABEL_OP3_403_57919_20150418_200246_inLine +BABEL_OP3_403_57919_20150418_200246_outLine +BABEL_OP3_403_57919_20150418_201847_inLine +BABEL_OP3_403_57919_20150418_201847_outLine +BABEL_OP3_403_58107_20141106_011114_inLine +BABEL_OP3_403_58107_20141106_011114_outLine +BABEL_OP3_403_58145_20141120_190441_inLine +BABEL_OP3_403_58145_20141120_190441_outLine +BABEL_OP3_403_58636_20150426_202602_inLine +BABEL_OP3_403_58636_20150426_202602_outLine +BABEL_OP3_403_58717_20141104_223801_inLine +BABEL_OP3_403_58717_20141104_223801_outLine +BABEL_OP3_403_59301_20141220_230943_inLine +BABEL_OP3_403_59301_20141220_230943_outLine +BABEL_OP3_403_59549_20141004_234422_inLine +BABEL_OP3_403_59549_20141004_234422_outLine +BABEL_OP3_403_59747_20141014_191829_inLine +BABEL_OP3_403_59747_20141014_191829_outLine +BABEL_OP3_403_59864_20150306_210405_inLine +BABEL_OP3_403_59864_20150306_210405_outLine +BABEL_OP3_403_59993_20141005_224301_inLine +BABEL_OP3_403_59993_20141005_224301_outLine +BABEL_OP3_403_59993_20141005_225220_inLine +BABEL_OP3_403_59993_20141005_225220_outLine +BABEL_OP3_403_59993_20141005_230254_inLine +BABEL_OP3_403_59993_20141005_230254_outLine +BABEL_OP3_403_60026_20141006_002312_inLine +BABEL_OP3_403_60026_20141006_002312_outLine +BABEL_OP3_403_61011_20141013_233414_inLine +BABEL_OP3_403_61011_20141013_233414_outLine +BABEL_OP3_403_61040_20141217_000352_inLine +BABEL_OP3_403_61040_20141217_000352_outLine +BABEL_OP3_403_61167_20141026_233020_inLine +BABEL_OP3_403_61167_20141026_233020_outLine +BABEL_OP3_403_61435_20141216_223049_inLine +BABEL_OP3_403_61435_20141216_223049_outLine +BABEL_OP3_403_61963_20150106_232605_outLine +BABEL_OP3_403_62155_20150318_181046_inLine +BABEL_OP3_403_62155_20150318_181046_outLine +BABEL_OP3_403_62177_20150303_192318_inLine +BABEL_OP3_403_62177_20150303_192318_outLine +BABEL_OP3_403_62177_20150303_192933_inLine +BABEL_OP3_403_62177_20150303_192933_outLine +BABEL_OP3_403_62835_20141031_215252_inLine +BABEL_OP3_403_62976_20141120_211316_inLine +BABEL_OP3_403_62976_20141120_211316_outLine +BABEL_OP3_403_63220_20141128_003242_inLine +BABEL_OP3_403_63220_20141128_003242_outLine +BABEL_OP3_403_63265_20150416_213544_inLine +BABEL_OP3_403_63265_20150416_214859_inLine +BABEL_OP3_403_63307_20141116_205038_inLine +BABEL_OP3_403_63307_20141116_205038_outLine +BABEL_OP3_403_63484_20150413_210246_inLine +BABEL_OP3_403_63484_20150413_210246_outLine +BABEL_OP3_403_63757_20141118_001039_inLine +BABEL_OP3_403_63757_20141118_001039_outLine +BABEL_OP3_403_63920_20150413_175014_inLine +BABEL_OP3_403_63920_20150413_175014_outLine +BABEL_OP3_403_64635_20150418_171656_inLine +BABEL_OP3_403_64635_20150418_171656_outLine +BABEL_OP3_403_64638_20141130_205142_inLine +BABEL_OP3_403_64638_20141130_205142_outLine +BABEL_OP3_403_64688_20150327_215407_inLine +BABEL_OP3_403_64688_20150327_215407_outLine +BABEL_OP3_403_64759_20141012_211953_inLine +BABEL_OP3_403_64759_20141012_211953_outLine +BABEL_OP3_403_65064_20141120_180442_inLine +BABEL_OP3_403_65064_20141120_180442_outLine +BABEL_OP3_403_65561_20141206_000223_inLine +BABEL_OP3_403_65561_20141206_000223_outLine +BABEL_OP3_403_66959_20141211_191140_inLine +BABEL_OP3_403_66959_20141211_191140_outLine +BABEL_OP3_403_66967_20141016_233136_inLine +BABEL_OP3_403_66967_20141016_233136_outLine +BABEL_OP3_403_67373_20141010_191456_inLine +BABEL_OP3_403_67373_20141010_191456_outLine +BABEL_OP3_403_67401_20141118_192332_inLine +BABEL_OP3_403_67401_20141118_192332_outLine +BABEL_OP3_403_68823_20150416_201411_inLine +BABEL_OP3_403_68823_20150416_201411_outLine +BABEL_OP3_403_69153_20141207_201546_inLine +BABEL_OP3_403_69153_20141207_201546_outLine +BABEL_OP3_403_69153_20141207_202942_inLine +BABEL_OP3_403_69153_20141207_202942_outLine +BABEL_OP3_403_69474_20141204_202057_inLine +BABEL_OP3_403_69474_20141204_202057_outLine +BABEL_OP3_403_69992_20141006_215605_inLine +BABEL_OP3_403_69992_20141006_215605_outLine +BABEL_OP3_403_70526_20150317_192457_inLine +BABEL_OP3_403_70526_20150317_192457_outLine +BABEL_OP3_403_71038_20150106_205857_inLine +BABEL_OP3_403_71038_20150106_205857_outLine +BABEL_OP3_403_71067_20141120_212952_inLine +BABEL_OP3_403_71067_20141120_212952_outLine +BABEL_OP3_403_71067_20141120_214426_inLine +BABEL_OP3_403_71067_20141120_214426_outLine +BABEL_OP3_403_71189_20150318_162559_inLine +BABEL_OP3_403_71189_20150318_162559_outLine +BABEL_OP3_403_71263_20141120_195808_inLine +BABEL_OP3_403_71263_20141120_195808_outLine +BABEL_OP3_403_71263_20141120_200524_inLine +BABEL_OP3_403_71263_20141120_200524_outLine +BABEL_OP3_403_71263_20141120_201201_inLine +BABEL_OP3_403_71263_20141120_201201_outLine +BABEL_OP3_403_71419_20150130_163036_inLine +BABEL_OP3_403_71419_20150130_163036_outLine +BABEL_OP3_403_71419_20150130_170259_inLine +BABEL_OP3_403_71419_20150130_170259_outLine +BABEL_OP3_403_71850_20150317_201433_inLine +BABEL_OP3_403_71850_20150317_201433_outLine +BABEL_OP3_403_71850_20150317_204336_inLine +BABEL_OP3_403_71850_20150317_204336_outLine +BABEL_OP3_403_72587_20141127_221927_inLine +BABEL_OP3_403_72587_20141127_221927_outLine +BABEL_OP3_403_72587_20141127_222705_inLine +BABEL_OP3_403_72587_20141127_222705_outLine +BABEL_OP3_403_73446_20150317_233038_inLine +BABEL_OP3_403_73446_20150317_233038_outLine +BABEL_OP3_403_73757_20141103_184243_inLine +BABEL_OP3_403_74121_20141029_192619_inLine +BABEL_OP3_403_74121_20141029_192619_outLine +BABEL_OP3_403_74455_20150304_010648_inLine +BABEL_OP3_403_74455_20150304_010648_outLine +BABEL_OP3_403_74763_20150412_222934_inLine +BABEL_OP3_403_74763_20150412_222934_outLine +BABEL_OP3_403_75465_20141216_203010_inLine +BABEL_OP3_403_75764_20150106_010413_inLine +BABEL_OP3_403_75764_20150106_010413_outLine +BABEL_OP3_403_76238_20141207_205931_inLine +BABEL_OP3_403_76238_20141207_205931_outLine +BABEL_OP3_403_76238_20141207_211123_inLine +BABEL_OP3_403_76238_20141207_211123_outLine +BABEL_OP3_403_76756_20141121_192227_inLine +BABEL_OP3_403_76756_20141121_192227_outLine +BABEL_OP3_403_77146_20141013_203551_inLine +BABEL_OP3_403_77146_20141013_203551_outLine +BABEL_OP3_403_77391_20141025_014416_inLine +BABEL_OP3_403_77391_20141025_014416_outLine +BABEL_OP3_403_77803_20141013_223521_inLine +BABEL_OP3_403_77803_20141013_223521_outLine +BABEL_OP3_403_77904_20150426_181110_inLine +BABEL_OP3_403_77904_20150426_181110_outLine +BABEL_OP3_403_77909_20150330_191417_inLine +BABEL_OP3_403_77909_20150330_191417_outLine +BABEL_OP3_403_78609_20141217_215450_inLine +BABEL_OP3_403_78609_20141217_215450_outLine +BABEL_OP3_403_78743_20141216_183731_inLine +BABEL_OP3_403_78743_20141216_183731_outLine +BABEL_OP3_403_78976_20141025_002547_inLine +BABEL_OP3_403_78976_20141025_002547_outLine +BABEL_OP3_403_79045_20141219_213058_inLine +BABEL_OP3_403_79045_20141219_213058_outLine +BABEL_OP3_403_79129_20141117_210821_inLine +BABEL_OP3_403_79129_20141117_210821_outLine +BABEL_OP3_403_79139_20141103_204223_inLine +BABEL_OP3_403_79139_20141103_204223_outLine +BABEL_OP3_403_80881_20141016_231419_inLine +BABEL_OP3_403_80881_20141016_231419_outLine +BABEL_OP3_403_80897_20141118_205921_inLine +BABEL_OP3_403_81392_20141202_223505_inLine +BABEL_OP3_403_81392_20141202_223505_outLine +BABEL_OP3_403_81553_20150108_011830_inLine +BABEL_OP3_403_81553_20150108_011830_outLine +BABEL_OP3_403_81971_20141013_202229_inLine +BABEL_OP3_403_81971_20141013_202229_outLine +BABEL_OP3_403_82089_20141103_180402_inLine +BABEL_OP3_403_82089_20141103_180402_outLine +BABEL_OP3_403_82138_20141103_203306_inLine +BABEL_OP3_403_82138_20141103_203306_outLine +BABEL_OP3_403_82140_20141103_203606_inLine +BABEL_OP3_403_82140_20141103_203606_outLine +BABEL_OP3_403_82224_20141221_020512_inLine +BABEL_OP3_403_82224_20141221_020512_outLine +BABEL_OP3_403_82361_20150313_215812_inLine +BABEL_OP3_403_82361_20150313_215812_outLine +BABEL_OP3_403_82637_20141013_202558_inLine +BABEL_OP3_403_82637_20141013_202558_outLine +BABEL_OP3_403_82742_20141217_192623_inLine +BABEL_OP3_403_82742_20141217_192623_outLine +BABEL_OP3_403_82742_20141217_193955_inLine +BABEL_OP3_403_82742_20141217_193955_outLine +BABEL_OP3_403_82935_20141220_194756_inLine +BABEL_OP3_403_82935_20141220_194756_outLine +BABEL_OP3_403_83783_20141117_201033_inLine +BABEL_OP3_403_83783_20141117_201033_outLine +BABEL_OP3_403_83813_20150201_234438_inLine +BABEL_OP3_403_83813_20150201_234438_outLine +BABEL_OP3_403_83929_20140926_001811_inLine +BABEL_OP3_403_83929_20140926_001811_outLine +BABEL_OP3_403_83935_20141205_002539_inLine +BABEL_OP3_403_83935_20141205_002539_outLine +BABEL_OP3_403_83935_20141205_223342_inLine +BABEL_OP3_403_83935_20141205_223342_outLine +BABEL_OP3_403_84061_20141027_225533_inLine +BABEL_OP3_403_84061_20141027_225533_outLine +BABEL_OP3_403_84125_20141005_234430_inLine +BABEL_OP3_403_84125_20141005_234430_outLine +BABEL_OP3_403_84408_20141026_210154_inLine +BABEL_OP3_403_84408_20141026_210154_outLine +BABEL_OP3_403_84936_20141127_181420_inLine +BABEL_OP3_403_84936_20141127_181420_outLine +BABEL_OP3_403_85047_20141031_202048_inLine +BABEL_OP3_403_85047_20141031_202048_outLine +BABEL_OP3_403_85322_20141006_225220_inLine +BABEL_OP3_403_85322_20141006_225220_outLine +BABEL_OP3_403_85340_20141005_204959_inLine +BABEL_OP3_403_85340_20141005_204959_outLine +BABEL_OP3_403_86321_20141208_193101_inLine +BABEL_OP3_403_86321_20141208_193101_outLine +BABEL_OP3_403_86557_20141016_213938_inLine +BABEL_OP3_403_86557_20141016_213938_outLine +BABEL_OP3_403_86829_20150413_201100_inLine +BABEL_OP3_403_86829_20150413_201100_outLine +BABEL_OP3_403_87298_20141024_181414_inLine +BABEL_OP3_403_87298_20141024_181414_outLine +BABEL_OP3_403_87796_20141116_204525_inLine +BABEL_OP3_403_87796_20141116_204525_outLine +BABEL_OP3_403_87871_20141217_212127_inLine +BABEL_OP3_403_87871_20141217_212127_outLine +BABEL_OP3_403_88550_20150307_215430_inLine +BABEL_OP3_403_88550_20150307_215430_outLine +BABEL_OP3_403_88550_20150307_221516_inLine +BABEL_OP3_403_88550_20150307_221516_outLine +BABEL_OP3_403_88661_20141201_185938_inLine +BABEL_OP3_403_88661_20141201_185938_outLine +BABEL_OP3_403_88661_20141201_192152_inLine +BABEL_OP3_403_88661_20141201_192152_outLine +BABEL_OP3_403_88674_20150418_221617_inLine +BABEL_OP3_403_88674_20150418_221617_outLine +BABEL_OP3_403_89045_20141003_224541_outLine +BABEL_OP3_403_89372_20141003_233243_inLine +BABEL_OP3_403_89372_20141004_235806_inLine +BABEL_OP3_403_89560_20141217_191117_inLine +BABEL_OP3_403_89560_20141217_191117_outLine +BABEL_OP3_403_89794_20141129_193030_inLine +BABEL_OP3_403_89794_20141129_193030_outLine +BABEL_OP3_403_89877_20141120_182454_inLine +BABEL_OP3_403_89877_20141120_182454_outLine +BABEL_OP3_403_90347_20141207_221437_inLine +BABEL_OP3_403_90347_20141207_221437_outLine +BABEL_OP3_403_90935_20141026_200818_inLine +BABEL_OP3_403_91336_20141103_203505_inLine +BABEL_OP3_403_91336_20141103_203505_outLine +BABEL_OP3_403_91411_20150130_181331_inLine +BABEL_OP3_403_91411_20150130_181331_outLine +BABEL_OP3_403_91411_20150130_185140_inLine +BABEL_OP3_403_91411_20150130_185140_outLine +BABEL_OP3_403_91891_20141205_223437_inLine +BABEL_OP3_403_91891_20141205_223437_outLine +BABEL_OP3_403_91891_20141205_224513_inLine +BABEL_OP3_403_91891_20141205_224513_outLine +BABEL_OP3_403_92440_20150413_001701_inLine +BABEL_OP3_403_92440_20150413_001701_outLine +BABEL_OP3_403_92698_20141104_003927_inLine +BABEL_OP3_403_92698_20141104_003927_outLine +BABEL_OP3_403_92757_20150307_000144_inLine +BABEL_OP3_403_92757_20150307_000144_outLine +BABEL_OP3_403_92757_20150307_001520_inLine +BABEL_OP3_403_92757_20150307_001520_outLine +BABEL_OP3_403_92792_20150319_214450_inLine +BABEL_OP3_403_92792_20150319_214450_outLine +BABEL_OP3_403_93861_20141031_233412_inLine +BABEL_OP3_403_93946_20141208_202019_inLine +BABEL_OP3_403_93946_20141208_202019_outLine +BABEL_OP3_403_94002_20141120_194833_inLine +BABEL_OP3_403_94002_20141120_194833_outLine +BABEL_OP3_403_94141_20150311_224536_inLine +BABEL_OP3_403_94141_20150311_224536_outLine +BABEL_OP3_403_94666_20141106_230027_inLine +BABEL_OP3_403_94666_20141106_230027_outLine +BABEL_OP3_403_94745_20141202_235317_inLine +BABEL_OP3_403_94745_20141202_235317_outLine +BABEL_OP3_403_95294_20141202_001855_inLine +BABEL_OP3_403_95294_20141202_001855_outLine +BABEL_OP3_403_95598_20141004_012914_outLine +BABEL_OP3_403_95663_20141013_194657_inLine +BABEL_OP3_403_95663_20141013_194657_outLine +BABEL_OP3_403_95966_20141028_211011_inLine +BABEL_OP3_403_95966_20141028_211011_outLine +BABEL_OP3_403_96820_20141105_001821_inLine +BABEL_OP3_403_96820_20141105_001821_outLine +BABEL_OP3_403_97448_20150330_211249_inLine +BABEL_OP3_403_97448_20150330_211249_outLine +BABEL_OP3_403_97896_20141031_234221_inLine +BABEL_OP3_403_97896_20141031_234221_outLine +BABEL_OP3_403_97988_20141211_193604_inLine +BABEL_OP3_403_97988_20141211_193604_outLine +BABEL_OP3_403_98165_20141026_210536_inLine +BABEL_OP3_403_98165_20141026_210536_outLine +BABEL_OP3_403_98365_20141117_210300_inLine +BABEL_OP3_403_98365_20141117_210300_outLine +BABEL_OP3_403_98489_20141007_213814_inLine +BABEL_OP3_403_98489_20141007_213814_outLine +BABEL_OP3_403_99516_20141016_194316_inLine +BABEL_OP3_403_99516_20141016_194316_outLine +BABEL_OP3_403_99732_20141217_232949_inLine +BABEL_OP3_403_99732_20141217_232949_outLine diff --git a/egs/babel/s5d/conf/mfcc.conf b/egs/babel/s5d/conf/mfcc.conf new file mode 100644 index 00000000000..45280a4e3a0 --- /dev/null +++ b/egs/babel/s5d/conf/mfcc.conf @@ -0,0 +1,2 @@ +--use-energy=false # only non-default option. +--sample-frequency=8000 # Switchboard is sampled at 8kHz diff --git a/egs/babel/s5d/conf/mfcc_hires.conf b/egs/babel/s5d/conf/mfcc_hires.conf new file mode 100644 index 00000000000..d870ab04c38 --- /dev/null +++ b/egs/babel/s5d/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=8000 # Switchboard is sampled at 8kHz +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800) diff --git a/egs/babel/s5d/conf/online_cmvn.conf b/egs/babel/s5d/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/babel/s5d/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/babel/s5d/conf/pitch.conf b/egs/babel/s5d/conf/pitch.conf new file mode 100644 index 00000000000..926bcfca92a --- /dev/null +++ b/egs/babel/s5d/conf/pitch.conf @@ -0,0 +1 @@ +--sample-frequency=8000 diff --git a/egs/babel/s5d/conf/plp.conf b/egs/babel/s5d/conf/plp.conf new file mode 100644 index 00000000000..926bcfca92a --- /dev/null +++ b/egs/babel/s5d/conf/plp.conf @@ -0,0 +1 @@ +--sample-frequency=8000 diff --git a/egs/babel/s5d/conf/slurm.bluecrab.conf b/egs/babel/s5d/conf/slurm.bluecrab.conf new file mode 100644 index 00000000000..d0c5fd1f904 --- /dev/null +++ b/egs/babel/s5d/conf/slurm.bluecrab.conf @@ -0,0 +1,11 @@ +command sbatch --export=PATH --ntasks-per-node=1 --exclude=compute[0001-0014,0017,0021,0022,0038] +option time=* --time=$0 +option mem=* --mem-per-cpu=$0 +option mem=0 # Do not add anything to qsub_opts +option num_threads=* --cpus-per-task=$0 --ntasks-per-node=1 +option num_threads=1 --cpus-per-task=1 --ntasks-per-node=1 # Do not add anything to qsub_opts +option max_jobs_run=* # Do nothing +default gpu=0 +# option gpu=0 -p scavenger --qos=scavenger +option gpu=0 -p shared +option gpu=* -p gpu --gres=gpu:$0 --cpus-per-task=6 --exclude=gpu[019,026] --time=4:0:0 # in reality, we probably should have --cpus-per-task=$((6*$0)) diff --git a/egs/babel/s5d/local/ali_to_rttm.sh b/egs/babel/s5d/local/ali_to_rttm.sh new file mode 100755 index 00000000000..60d0598f007 --- /dev/null +++ b/egs/babel/s5d/local/ali_to_rttm.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# Copyright 2012-2013 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#This script will take the ali directory andcreate the corresponding rttm file +#Example +#steps/align_sgmm2.sh --nj 20 --cmd "$decode_cmd" \ +# --transform-dir exp/tri5/decode_dev2h.uem \ +# data/dev2h.uem data/lang exp/sgmm5 exp/sgmm5/align_dev2h.uem +#local/ali_to_rttm.sh data/dev2h data/lang exp/sgmm5/align_dev2h/ + +cmd=run.pl +scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +beam=10 +retry_beam=40 +boost_silence=1.0 + +if [ -f path.sh ]; then . path.sh; fi +. parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo "This script takes an ali directory and creates the corresponding RTTM file" + echo "" + echo "Usage: align_text.sh " + echo " e.g.: align_text.sh data/heldout data/lang exp/heldout_ali" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) " + + exit 1; +fi + +set -e +set -o pipefail +set -u + +data=$1 +lang=$2 +dir=$3 + +oov=`cat $lang/oov.txt` +mkdir -p $dir/log + +echo "$0: writing alignments." +wbegin=`grep "#1" $lang/phones.txt | head -1 | awk '{print $2}'` +wend=`grep "#2" $lang/phones.txt | head -1 | awk '{print $2}'` + +if [ ! -f $lang/L_align.fst ]; then + echo "$0: generating $lang/L_align.fst" + local/make_L_align.sh data/local/tmp.lang/ $lang $lang 2>&1 | tee $dir/log/L_align.log +fi + +$cmd $dir/log/align_to_words.log \ + ali-to-phones $dir/final.mdl "ark:gunzip -c $dir/ali.*.gz|" ark,t:- \| \ + phones-to-prons $lang/L_align.fst $wbegin $wend ark:- "ark,s:utils/sym2int.pl -f 2- --map-oov '$oov' $lang/words.txt <$data/text|" ark,t:- \| \ + prons-to-wordali ark:- "ark:ali-to-phones --write-lengths=true $dir/final.mdl 'ark:gunzip -c $dir/ali.*.gz|' ark,t:- |" ark,t:$dir/align.txt + +echo "$0: done writing alignments." + +echo "$0: writing rttm." +[ ! -x local/txt_to_rttm.pl ] && \ + echo "Not creating rttm because local/txt2rttm.pl does not exist or not executable." && exit 1; + +local/txt_to_rttm.pl --symtab=$lang/words.txt --segment=$data/segments $dir/align.txt $dir/rttm 2>$dir/log/rttm.log +local/txt_to_rttm.pl --symtab=$lang/words.txt $dir/align.txt $dir/rttm.per-utt 2>$dir/log/rttm.per-utt.log +echo "$0: done writing rttm." + +exit 0; diff --git a/egs/babel/s5d/local/annotated_kwlist_to_KWs.pl b/egs/babel/s5d/local/annotated_kwlist_to_KWs.pl new file mode 100755 index 00000000000..a4c80cef345 --- /dev/null +++ b/egs/babel/s5d/local/annotated_kwlist_to_KWs.pl @@ -0,0 +1,124 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < [category] + e.g.: annotated_kwlist_to_KWs.pl kwlist.annot.list keywords.list "NGram Order:2,3,4" + +This script reads an annotated kwlist xml file and writes a list of keywords, according +to the given categories. The "category" is a "key:value" pair in the annotated kwlist xml +file. For example +1. "NGram Order:2,3,4" +2. "NGram Order:2" +3. "NGram Order:-" +where "NGram Order" is the category name. The first line means print keywords that are +bigram, trigram and 4gram; The second line means print keywords only for bigram; The last +line means print all possible ngram keywords. +If no "category" is specified, the script will print out the possible categories. + +Allowed options: +EOU + +GetOptions(); + +@ARGV >= 2 || die $Usage; + +# Workout the input/output source +my $kwlist_filename = shift @ARGV; +my $kws_filename = shift @ARGV; + +my $source = "STDIN"; +if ($kwlist_filename ne "-") { + open(KWLIST, "<$kwlist_filename") || die "Fail to open kwlist file: $kwlist_filename\n"; + $source = "KWLIST"; +} + +# Process kwlist.annot.xml +my %attr; +my %attr_kws; +my $kwid=""; +my $name=""; +my $value=""; +while (<$source>) { + chomp; + if (m//) {($name) = /(.*)<\/name>/; next;} + if (m//) { + ($value) = /(.*)<\/value>/; + if (defined($attr{$name})) { + $attr{"$name"}->{"$value"} = 1; + } else { + $attr{"$name"} = {"$value", 1}; + } + if (defined($attr_kws{"${name}_$value"})) { + $attr_kws{"${name}_$value"}->{"$kwid"} = 1; + } else { + $attr_kws{"${name}_$value"} = {"$kwid", 1}; + } + } +} + +my $output = ""; +if (@ARGV == 0) { + # If no category provided, print out the possible categories + $output .= "Possible categories are:\n\n"; + foreach my $name (keys %attr) { + $output .= "$name:"; + my $count = 0; + foreach my $value (keys %{$attr{$name}}) { + if ($value eq "") {$value = "\"\"";} + if ($count == 0) { + $output .= "$value"; + $count ++; next; + } + if ($count == 6) { + $output .= ", ..."; + last; + } + $output .= ",$value"; $count ++; + } + $output .= "\n"; + } + print STDERR $output; + $output = ""; +} else { + my %keywords; + while (@ARGV > 0) { + my $category = shift @ARGV; + my @col = split(/:/, $category); + @col == 2 || die "Bad category \"$category\"\n"; + $name = $col[0]; + if ($col[1] eq "-") { + foreach my $value (keys %{$attr{$name}}) { + foreach my $kw (keys %{$attr_kws{"${name}_$value"}}) { + $keywords{$kw} = 1; + } + } + } else { + my @col1 = split(/,/, $col[1]); + foreach my $value (@col1) { + foreach my $kw (keys %{$attr_kws{"${name}_$value"}}) { + $keywords{$kw} = 1; + } + } + } + } + foreach my $kw (keys %keywords) { + $output .= "$kw\n"; + } +} + +if ($kwlist_filename ne "-") {close(KWLIST);} +if ($kws_filename eq "-") { print $output;} +else { + open(O, ">$kws_filename") || die "Fail to open file $kws_filename\n"; + print O $output; + close(O); +} diff --git a/egs/babel/s5d/local/apply_g2p.sh b/egs/babel/s5d/local/apply_g2p.sh new file mode 100755 index 00000000000..385b1f3536e --- /dev/null +++ b/egs/babel/s5d/local/apply_g2p.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +iters=5 +stage=0 +encoding='utf-8' +remove_tags=true +only_words=true +icu_transform="Any-Lower" +var_counts=3 #Generate upto N variants +var_mass=0.9 #Generate so many variants to produce 90 % of the prob mass +cmd=run.pl +nj=10 #Split the task into several parallel, to speedup things +model= +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e + +if [ $# != 3 ]; then + echo "Usage: $0 [options] " + echo "... where is a list of words whose pronunciation is to be generated" + echo " is a directory used as a target during training of G2P" + echo " is the directory where the output lexicon should be stored" + echo "e.g.: $0 oov_words exp/g2p exp/g2p/oov_lex" + echo "" + echo "main options (for others, see top of script file)" + echo " --nj # How many tasks should be spawn (to speedup things)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +wordlist=$1 +modeldir=$2 +output=$3 + + +mkdir -p $output/log + +model=$modeldir/g2p.model.final +[ ! -f ${model:-} ] && echo "File $model not found in the directory $modeldir." && exit 1 +#[ ! -x $wordlist ] && echo "File $wordlist not found!" && exit 1 + +cp $wordlist $output/wordlist.orig.txt + +if [ ! -z $icu_transform ] ; then + #we have to keep a correspondence map A -> trasnform(A) + paste \ + <(cat $output/wordlist.orig.txt | uconv -f $encoding -t $encoding -x $icu_transform) \ + $output/wordlist.orig.txt \ + > $output/transform_map.txt + cut -f 1 $output/transform_map.txt | sort -u > $output/wordlist.txt +else + cp $output/wordlist.orig.txt $output/wordlist.txt +fi + +if ! g2p=`which g2p.py` ; then + echo "The Sequitur was not found !" + echo "Go to $KALDI_ROOT/tools and execute extras/install_sequitur.sh" + exit 1 +fi + + +echo "Applying the G2P model to wordlist $wordlist" + +if [ $stage -le 0 ]; then + $cmd JOBS=1:$nj $output/log/apply.JOBS.log \ + split -n l/JOBS/$nj $output/wordlist.txt \| \ + g2p.py -V $var_mass --variants-number $var_counts --encoding $encoding \ + --model $modeldir/g2p.model.final --apply - \ + \> $output/output.JOBS +fi +cat $output/output.* > $output/output + +#Remap the words from output file back to the original casing +#Conversion of some of thems might have failed, so we have to be careful +#and use the transform_map file we generated beforehand +#Also, because the sequitur output is not readily usable as lexicon (it adds +#one more column with ordering of the pron. variants) convert it into the proper lexicon form +output_lex=$output/lexicon.lex +if [ ! -z $icu_transform ] ; then + #also, the transform is generally N -> 1, i.e. we have to take + #extra care of words that might have been mapped into the same one + perl -e 'open(WORDS, $ARGV[0]) or die "Could not open file $ARGV[0]"; + while() { chomp; @F=split; + if ($MAP{$F[0]} ) { push @{$MAP{$F[0]}}, $F[1]; } + else { $MAP{$F[0]} = [$F[1]]; } + } + close(WORDS); + open(LEX, $ARGV[1]) or die "Could not open file $ARGV[1]"; + while() {chomp; @F=split /\t/; + if ( $#F != 3 ) { + print STDERR "WARNING: Non-acceptable entry \"" . join(" ", @F) . "\" ($#F splits)\n"; + next; + } + foreach $word (@{$MAP{$F[0]}} ) { + print "$word\t$F[2]\t$F[3]\n"; + } + } + close(LEX); + ' \ + $output/transform_map.txt $output/output | sort -u > $output_lex +else + #Just convert it to a proper lexicon format + cut -f 1,3,4 $output/output $output_lex +fi + +#Some words might have been removed or skipped during the process, +#let's check it and warn the user if so... +nlex=`cut -f 1 $output_lex | sort -u | wc -l` +nwlist=`cut -f 1 $output/wordlist.orig.txt | sort -u | wc -l` +if [ $nlex -ne $nwlist ] ; then + echo "WARNING: Unable to generate pronunciation for all words. "; + echo "WARINNG: Wordlist: $nwlist words" + echo "WARNING: Lexicon : $nlex words" + echo "WARNING:Diff example: " + diff <(cut -f 1 $output_lex | sort -u ) \ + <(cut -f 1 $output/wordlist.orig.txt | sort -u ) || true +fi +exit 0 diff --git a/egs/babel/s5d/local/apply_map_tab_preserving.pl b/egs/babel/s5d/local/apply_map_tab_preserving.pl new file mode 100755 index 00000000000..b57262f1930 --- /dev/null +++ b/egs/babel/s5d/local/apply_map_tab_preserving.pl @@ -0,0 +1,94 @@ +#!/usr/bin/env perl +use warnings; #sed replacement for -w perl parameter + +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey) +# Apache 2.0. + + +# This program is a bit like ./sym2int.pl in that it applies a map +# to things in a file, but it's a bit more general in that it doesn't +# assume the things being mapped to are single tokens, they could +# be sequences of tokens. See the usage message. +# this version preserves tabs. + +if (@ARGV > 0 && $ARGV[0] eq "-f") { + shift @ARGV; + $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } +} + +# Mapping is obligatory +$permissive = 0; +if (@ARGV > 0 && $ARGV[0] eq '--permissive') { + shift @ARGV; + # Mapping is optional (missing key is printed to output) + $permissive = 1; +} + +if(@ARGV != 1) { + print STDERR "Usage: apply_map_tab_preserving.pl [options] map output\n" . + "options: [-f ]\n" . + "Applies the map 'map' to all input text, where each line of the map\n" . + "is interpreted as a map from the first field to the list of the other fields\n" . + "Note: can look like 4-5, or 4-, or 5-, or 1, it means the field\n" . + "range in the input to apply the map to.\n" . + "e.g.: echo A B | apply_map.pl a.txt\n" . + "where a.txt is:\n" . + "A a1 a2\n" . + "B b\n" . + "will produce:\n" . + "a1 a2 b\n"; + exit(1); +} + +($map) = @ARGV; +open(M, "<$map") || die "Error opening map file $map: $!"; + +while () { + @A = split(" ", $_); + @A >= 1 || die "apply_map.pl: empty line."; + $i = shift @A; + $o = join(" ", @A); + $map{$i} = $o; +} + +while() { + @A = split("\t", $_); + $field_offset = 0; + for ($n = 0; $n < @A; $n++) { + @B = split(" ", $A[$n]); + + for ($x = 0; $x < @B; $x++) { + $y = $x + $field_offset; + if ( (!defined $field_begin || $y >= $field_begin) + && (!defined $field_end || $y <= $field_end)) { + $b = $B[$x]; + if (!defined $map{$b}) { + if (!$permissive) { + die "apply_map.pl: undefined key $a\n"; + } else { + print STDERR "apply_map.pl: warning! missing key $a\n"; + } + } else { + $B[$x] = $map{$b}; + } + } + } + $field_offset += @B; + $A[$n] = join(" ", @B); + } + print join("\t", @A) . "\n"; +} diff --git a/egs/babel/s5d/local/arpa2G.sh b/egs/babel/s5d/local/arpa2G.sh new file mode 100755 index 00000000000..40c269fbb22 --- /dev/null +++ b/egs/babel/s5d/local/arpa2G.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# Copyright 2013-2014 Johns Hopkins University (authors: Yenda Trmal, Daniel Povey) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#Simple utility script to convert the gzipped ARPA lm into a G.fst file + + +oov_prob_file= +unk_fraction= +cleanup=true +#end configuration section. + + + +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo "Options: --oov-prob-file # e.g. data/local/oov2prob" + echo " # with this option it will replace with OOVs in G.fst." + exit 1; +fi + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code + +lmfile=$1 +langdir=$2 +destdir=$3 + +mkdir $destdir 2>/dev/null || true + + +if [ ! -z "$oov_prob_file" ]; then + if [ ! -s "$oov_prob_file" ]; then + echo "$0: oov-prob file $oov_prob_file does not exist" + exit 1; + fi + if [ -z "$unk_fraction" ]; then + echo "--oov-prob option requires --unk-fraction option"; + exit 1; + fi + + min_prob=$(gunzip -c $lmfile | perl -e ' $minlogprob = 0.0; + while() { if (m/\\(\d)-grams:/) { $order = $1; } + if ($order == 1) { @A = split; + if ($A[0] < $minlogprob && $A[0] != -99) { $minlogprob = $A[0]; }}} print $minlogprob') + echo "Minimum prob in LM file is $min_prob" + + echo "$0: creating LM file with unk words, using $oov_prob_file, in $destdir/lm_tmp.gz" + gunzip -c $lmfile | \ + perl -e ' ($oov_prob_file,$min_prob,$unk_fraction) = @ARGV; $ceilinged=0; + $min_prob < 0.0 || die "Bad min_prob"; # this is a log-prob + $unk_fraction > 0.0 || die "Bad unk_fraction"; # this is a prob + open(F, "<$oov_prob_file") || die "opening oov file"; + while () { push @OOVS, $_; } + $num_oovs = @F; + while() { + if (m/^ngram 1=(\d+)/) { $n = $1 + $num_oovs; print "ngram 1=$n\n"; } + else { print; } # print all lines unchanged except the one that says ngram 1=X. + if (m/^\\1-grams:$/) { + foreach $l (@OOVS) { + @A = split(" ", $l); + @A == 2 || die "bad line in oov2prob: $_;"; + ($word, $prob) = @A; + $log10prob = (log($prob * $unk_fraction) / log(10.0)); + if ($log10prob > $min_prob) { $log10prob = $min_prob; $ceilinged++;} + print "$log10prob $word\n"; + } + }} print STDERR "Ceilinged $ceilinged unk-probs\n";' \ + $oov_prob_file $min_prob $unk_fraction | gzip -c > $destdir/lm_tmp.gz + lmfile=$destdir/lm_tmp.gz +fi + +if [[ $lmfile == *.bz2 ]] ; then + decompress="bunzip2 -c $lmfile" +elif [[ $lmfile == *.gz ]] ; then + decompress="gunzip -c $lmfile" +else + decompress="cat $lmfile" +fi + +$decompress | \ + grep -v ' ' | grep -v ' ' | grep -v ' ' | \ + arpa2fst - | \ + fstprint | \ + utils/eps2disambig.pl | \ + utils/s2eps.pl | \ + fstcompile --isymbols=$langdir/words.txt \ + --osymbols=$langdir/words.txt --keep_isymbols=false --keep_osymbols=false | \ + fstrmepsilon | fstarcsort --sort_type=ilabel > $destdir/G.fst || exit 1 +fstisstochastic $destdir/G.fst || true; + +if $cleanup; then + rm $destdir/lm_tmp.gz 2>/dev/null || true; +fi + +exit 0 diff --git a/egs/babel/s5d/local/augment_original_stm.pl b/egs/babel/s5d/local/augment_original_stm.pl new file mode 100755 index 00000000000..c5ad87fd286 --- /dev/null +++ b/egs/babel/s5d/local/augment_original_stm.pl @@ -0,0 +1,110 @@ +#!/usr/bin/env perl +use warnings; #sed replacement for -w perl parameter +# Copyright 2012 Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0. + +#This script takes the original BABEL STM file (part of the IndusDB) +#and replaces the "Aggregated" field with a correct speaker ID. +#As a result, the scoring will be done on per-speaker basis as well +#As the segment from segment mapping generally do not correspond to +#the segmentation of the original STM file, it combines the files +#segments and utt2spk to work out the correct speaker ID for +#the reference segment +#In case of overlay, it will either use the previous speaker or +#prints out an error message + +use strict; +use warnings; + +use Data::Dumper; + +@ARGV == 2 || die "$0 \n"; + +my $warn_count = 0; +my $warn_max = 10; +my $stm_file = shift @ARGV; +my $data_dir = shift @ARGV; +my %utt2spk; +my %segments; + +open(F_u, "<$data_dir/utt2spk") || die "Could not open the file $data_dir/utt2spk\n"; +while() { + chop; + (my $utt, my $spk) = split; + $utt2spk{$utt} = $spk; +} +close(F_u); + +open(F_s, "<$data_dir/segments") || die "Could not open the file $data_dir/segments\n"; +while() { + chop; + (my $utt, my $file, my $seg_start, my $seg_end) = split; + push @{$segments{$file}}, [ $seg_start, $seg_end, $utt2spk{$utt}]; +} +close(F_s); + +open(STM, "<$stm_file") || die "Could not opent the STM file $stm_file"; +open(STMOUT, ">$data_dir/stm") || die "Could not open the output STM file $data_dir/stm"; +open(RECO, ">$data_dir/reco2file_and_channel") or die "Could not create the output file $data_dir/reco2file_and_channel"; + +my $prev_filename = ""; +my @timestamps; +my $i = 0; +while() { + chop; + (my $filename, my $line, my $aggregated, my $seg_start, my $seg_end, my $text) = split(/\s+/, $_, 6); + #print "$filename, $seg_start, $seg_end, $text\n"; + $line="1"; + if (( $prev_filename ne $filename ) && ( ";;$prev_filename" ne $filename)){ + my $_filename = $filename; + $_filename =~ s/^;;//g; + next if not exists $segments{$_filename}; + #print $filename, "\n"; + $prev_filename = $_filename; + @timestamps = @{$segments{$_filename}}; + #print Dumper(\@timestamps); + $i=0; + print RECO "$_filename $_filename $line\n"; + } + + my $max_i=@timestamps; + while ( ($i < $max_i ) && ($seg_start > @{$timestamps[$i]}[0] ) ) { + $i+= 1; + } + + if (($i >= $max_i ) && ($timestamps[$i-1][1]) <= $seg_start ){ + #We are over the start of the last segment -> we assing the last speaker ID + if ($warn_count < $warn_max) { + print STDERR "Warning: $prev_filename: the segment from the STM file starts after the last segment from the segments file ends\n"; + print STDERR "Warning: Additional info: STM: ($seg_start, $seg_end), segments file: ($timestamps[$i-1][0] $timestamps[$i-1][1])\n"; + $warn_count += 1; + + if ($warn_count >= $warn_max) { + print STDERR "Warning: Maximum number of warning reached, not warning anymore...\n" + } + } + #print "$i, $filename, $timestamps[$max_i - 1][2]\n"; + print STMOUT "$filename $line $timestamps[$max_i - 1][2] $seg_start $seg_end $text\n"; + } elsif ( $i == 0 ) { + if ($warn_count < $warn_max) { + print STDERR "Warning: $prev_filename: The segment from the STM file start before the first segment from the segments file\n"; + print STDERR "Warning: Additional info: STM: ($seg_start, $seg_end), segments file: ($timestamps[$i][0] $timestamps[$i][1])\n"; + $warn_count += 1; + + if ($warn_count >= $warn_max) { + print STDERR "Warning: Maximum number of warning reached, not warning anymore...\n" + } + } + #Even the first segment's start time was higher then the stm segment start time + #That means we do not really know which speaker the stm segment belongs + print STMOUT "$filename $line $timestamps[$i][2] $seg_start $seg_end $text\n"; + #print "$i, $filename, $timestamps[$i][2]\n"; + } else { + print STMOUT "$filename $line $timestamps[$i-1][2] $seg_start $seg_end $text\n"; + #print "$i, $filename, $timestamps[$i-1][2]\n"; + } +} + +close(STMOUT); +close(STM); +close(RECO); diff --git a/egs/babel/s5d/local/best_path_weights.sh b/egs/babel/s5d/local/best_path_weights.sh new file mode 100755 index 00000000000..52782ee3655 --- /dev/null +++ b/egs/babel/s5d/local/best_path_weights.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# This script combines frame-level posteriors from different decode +# directories. The first decode directory is assumed to be the primary +# and is used to get the best path. The posteriors from other decode +# directories are interpolated with the posteriors of the best path. +# The output is a new directory with final.mdl, tree from the primary +# decode-dir and the best path alignments and weights in a decode-directory +# with the same basename as the primary directory. +# This is typically used to get better posteriors for semisupervised training +# of DNN +# e.g. local/combine_posteriors.sh exp/tri6_nnet/decode_train_unt.seg +# exp/sgmm_mmi_b0.1/decode_fmllr_train_unt.seg_it4 exp/combine_dnn_sgmm +# Here the final.mdl and tree are copied from exp/tri6_nnet to +# exp/combine_dnn_sgmm. best_path_ali.*.gz obtained from the primary dir and +# the interpolated posteriors in weights.*.gz are placed in +# exp/combine_dnn_sgmm/decode_train_unt.seg + +set -e + +# begin configuration section. +cmd=run.pl +stage=-10 +#end configuration section. + +help_message="Usage: "$(basename $0)" [options] [:weight] [:weight] [[:weight] ... ] + E.g. "$(basename $0)" data/train_unt.seg data/lang exp/tri1/decode:0.5 exp/tri2/decode:0.25 exp/tri3/decode:0.25 exp/combine +Options: + --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. +"; + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + + +if [ $# -lt 4 ]; then + printf "$help_message\n"; + exit 1; +fi + +data=$1 +lang=$2 +dir=${@: -1} # last argument to the script +shift 2; +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +mkdir -p $dir +mkdir -p $dir/log + +decode_dir=`echo ${decode_dirs[0]} | cut -d: -f1` +nj=`cat $decode_dir/num_jobs` + +out_decode=$dir/`basename $decode_dir` +mkdir -p $out_decode + +if [ $stage -lt -1 ]; then + mkdir -p $out_decode/log + $cmd JOB=1:$nj $out_decode/log/best_path.JOB.log \ + lattice-best-path --acoustic-scale=0.1 \ + "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz |" \ + ark:/dev/null "ark:| gzip -c > $out_decode/best_path_ali.JOB.gz" || exit 1 +fi + +weights_sum=0.0 + +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + + weight=`echo $decode_dir | cut -d: -s -f2` + [ -z "$weight" ] && weight=1.0 + + if [ $i -eq 0 ]; then + file_list="\"ark,s,cs:gunzip -c $out_decode/weights.$i.JOB.gz | vector-scale --scale=$weight ark:- ark:- |\"" + else + file_list="$file_list \"ark,s,cs:gunzip -c $out_decode/weights.$i.JOB.gz | vector-scale --scale=$weight ark:- ark:- |\"" + fi + + weights_sum=`perl -e "print STDOUT $weights_sum + $weight"` +done + +inv_weights_sum=`perl -e "print STDOUT 1.0/$weights_sum"` + +for i in `seq 0 $[num_sys-1]`; do + if [ $stage -lt $i ]; then + decode_dir=`echo ${decode_dirs[$i]} | cut -d: -f1` + + model=`dirname $decode_dir`/final.mdl # model one level up from decode dir + tree=`dirname $decode_dir`/tree # tree one level up from decode dir + + for f in $model $decode_dir/lat.1.gz $tree; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; + done + if [ $i -eq 0 ]; then + nj=`cat $decode_dir/num_jobs` || exit 1; + cp $model $dir || exit 1 + cp $tree $dir || exit 1 + echo $nj > $out_decode/num_jobs + else + if [ $nj != `cat $decode_dir/num_jobs` ]; then + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + exit 1; + fi + fi + + $cmd JOB=1:$nj $dir/log/get_post.$i.JOB.log \ + lattice-to-post --acoustic-scale=0.1 \ + "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz|" ark:- \| \ + post-to-pdf-post $model ark,s,cs:- ark:- \| \ + get-post-on-ali ark,s,cs:- "ark,s,cs:gunzip -c $out_decode/best_path_ali.JOB.gz | convert-ali $dir/final.mdl $model $tree ark,s,cs:- ark:- | ali-to-pdf $model ark,s,cs:- ark:- |" "ark:| gzip -c > $out_decode/weights.$i.JOB.gz" || exit 1 + fi +done + +if [ $stage -lt $num_sys ]; then + if [ "$num_sys" -eq 1 ]; then + $cmd JOB=1:$nj $dir/log/move_post.JOB.log \ + mv $out_decode/weights.0.JOB.gz $out_decode/weights.JOB.gz || exit 1 + else + $cmd JOB=1:$nj $dir/log/interpolate_post.JOB.log \ + vector-sum $file_list \ + "ark:| vector-scale --scale=$inv_weights_sum ark:- ark:- | gzip -c > $out_decode/weights.JOB.gz" || exit 1 + fi +fi + +exit 0 diff --git a/egs/babel/s5d/local/best_scores.sh b/egs/babel/s5d/local/best_scores.sh new file mode 100755 index 00000000000..a3b2af187e1 --- /dev/null +++ b/egs/babel/s5d/local/best_scores.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -o nounset # Treat unset variables as an error + + +if [ ! -x results ] ; then + data=$(readlink -f ./local) + data=$(dirname $data) + mkdir -p $data/results + ln -s $data/results results +fi + +if [ ! -e ./RESULTS ] ; then + p=$(basename `readlink -f lang.conf`) + p=${p##.*} + filename=results.${p}.${USER}.$(date --iso-8601=seconds) + echo "#Created on $(date --iso-8601=seconds) by $0" >> results/$filename + ln -sf results/$filename RESULTS +fi + + +set -f +export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) +set +f +( + echo -e "#\n# STT Task performance (WER), evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + for f in "${mydirs[@]}"; do + find $f -name "*.sys" -not -name "*char*" | xargs grep Avg | utils/best_wer.sh + done | column -t +) >> RESULTS + +( + ls exp/tri5/decode*dev10h*/score_*/*char*sys >/dev/null 2>&1 || exit 0 + echo -e "#\n# STT Task performance (CER), evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + for f in "${mydirs[@]}"; do + find $f -name "*.sys" -name "*char*" | xargs grep Avg | utils/best_wer.sh + done | column -t +) >> RESULTS + diff --git a/egs/babel/s5d/local/best_scores_kws.sh b/egs/babel/s5d/local/best_scores_kws.sh new file mode 100755 index 00000000000..dcf4508d5e1 --- /dev/null +++ b/egs/babel/s5d/local/best_scores_kws.sh @@ -0,0 +1,179 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -o nounset # Treat unset variables as an error + + +if [ ! -x results ] ; then + data=$(readlink -f ./local) + data=$(dirname $data) + mkdir -p $data/results + ln -s $data/results results +fi + +if [ ! -e ./RESULTS.kws ] ; then + p=$(basename `readlink -f lang.conf`) + p=${p##.*} + filename=kws_results.${p}.${USER}.$(date --iso-8601=seconds) + echo "#Created on $(date --iso-8601=seconds) by $0" >> results/$filename + ln -sf results/$filename RESULTS.kws +fi + + +set -f +export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) +set +f +export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) +( + #### Word search (converted lattices) + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + done + + #### Syllab search (converted word lattices) + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -ipath "*syllabs*" | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), syllabic search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -ipath "*syllabs*" | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + done + + + #### Phone search (converted word lattices) + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -ipath "*phones*" | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), phonetic search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -ipath "*phones*" | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + + done + + + set -f + export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.syll.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) + set +f + if [ ! -z ${mydirs+x} ] ; then + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + #declare -p kwsets + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), syllabic decode+search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + + done + fi + + set -f + export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.phn.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) + set +f + if [ ! -z ${mydirs+x} ] ; then + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + #declare -p kwsets + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), phonetic decode+search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + + done + fi +) | tee RESULTS.kws diff --git a/egs/babel/s5d/local/build_edit_distance_fst.pl b/egs/babel/s5d/local/build_edit_distance_fst.pl new file mode 100755 index 00000000000..51c46667727 --- /dev/null +++ b/egs/babel/s5d/local/build_edit_distance_fst.pl @@ -0,0 +1,127 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + Buld a edit distance FST at the phone level. + +Allowed options: + --confusion-matrix : Matrix for insertion, deletion and substitution. (string, default="") + --ins-cost : Insertion cost (double, default=1 ) + --del-cost : Deletion cost (double, default=1 ) + --subs-cost : substitution cost (double, default=1 ) + --boundary-ins-cost : Cost for insertions at work boundary (double, default=0.1) + --boundary-off : No insertions at word boundary (boolean, default=true) +EOU + +my $confusion_matrix = ""; +my $insertion_cost = 1; +my $deletion_cost = 1; +my $substitution_cost = 1; +my $boundary_ins_cost = 0.1; +my $boundary_off="true"; +GetOptions('confusion-matrix=s' => \$confusion_matrix, + 'ins-cost=f' => \$insertion_cost, + 'del-cost=f' => \$deletion_cost, + 'subs-cost=f' => \$substitution_cost, + 'boundary-ins-cost=f' => \$boundary_ins_cost, + 'boundary-off=s' => \$boundary_off); + +@ARGV == 2 || die $Usage; + +$boundary_off eq "true" || $boundary_off eq "false" || die "$0: Bad value for option --boundary-off\n"; + +# Workout the input and output parameters +my $phone_in = shift @ARGV; +my $fst_out = shift @ARGV; + +open(I, "<$phone_in") || die "$0: Fail to open lexicon $phone_in\n"; +open(O, ">$fst_out") || die "$0: Fail to write FST $fst_out\n"; + +# Read confusion matrix +my %confusion; +if ($confusion_matrix ne "") { + open(M, "<$confusion_matrix") || die "$0: Fail to open confusion matrix $confusion_matrix\n"; + while () { + chomp; + my @col = split(); + @col == 3 || die "$0: Bad line in confusion matrix \"$_\"\n"; + $confusion{"$col[0]_$col[1]"} = $col[2]; + } + close(M); +} + +# Start processing +my @phones; +while () { + chomp; + my @col = split(); + @col == 1 || die "$0: Bad number of columns in phone list \"$_\"\n"; + if ($col[0] eq "") {next;} + push(@phones, $col[0]); +} + +# Add insertions, deletions +my $fst = ""; +foreach my $p (@phones) { + if ($confusion_matrix eq "") { + $fst .= "1 1 $p $deletion_cost\n"; # Deletions + $fst .= "1 1 $p $insertion_cost\n"; # Insertions + if ($boundary_off eq "false") { + $fst .= "0 0 $p $boundary_ins_cost\n"; + $fst .= "0 1 $p $boundary_ins_cost\n"; + $fst .= "2 2 $p $boundary_ins_cost\n"; + $fst .= "1 2 $p $boundary_ins_cost\n"; + } + } else { + my $key = "${p}_"; + if (defined($confusion{$key})) { + $fst .= "1 1 $p $confusion{$key}\n"; + } + $key = "_${p}"; + if (defined($confusion{$key})) { + $fst .= "1 1 $p $confusion{$key}\n"; + if ($boundary_off eq "false") { + $fst .= "0 0 $p $confusion{$key}\n"; + $fst .= "0 1 $p $confusion{$key}\n"; + $fst .= "2 2 $p $confusion{$key}\n"; + $fst .= "1 2 $p $confusion{$key}\n"; + } + } + } +} +foreach my $p1 (@phones) { + foreach my $p2 (@phones) { + if ($p1 eq $p2) { + $fst .= "1 1 $p1 $p2 0\n"; + } else { + if ($confusion_matrix eq "") { + $fst .= "1 1 $p1 $p2 $substitution_cost\n"; + } else { + my $key = "${p1}_${p2}"; + if (defined($confusion{$key})) { + $fst .= "1 1 $p1 $p2 $confusion{$key}\n"; + } + } + } + } +} +if ($boundary_off eq "false") { + $fst .= "0 1 0\n"; + $fst .= "1 2 0\n"; + $fst .= "2\n"; +} else { + $fst .= "1\n"; +} + +print O $fst; + +close(I); +close(O); diff --git a/egs/babel/s5d/local/chain/run_blstm.sh b/egs/babel/s5d/local/chain/run_blstm.sh new file mode 100755 index 00000000000..6d13c55fc7d --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 46.8 | 19252 60586 | 57.6 28.5 13.8 4.5 46.8 31.7 | -0.643 | exp/chain_cleaned/blstm_sp_bi/decode_dev10h.pem/score_8/penalty_0.25/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=-2 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab1.sh b/egs/babel/s5d/local/chain/run_blstm_bab1.sh new file mode 100755 index 00000000000..ba8da0e14bc --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab1.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 45.5 | 19252 60586 | 58.9 27.5 13.5 4.5 45.5 31.4 | -0.660 | exp/chain_cleaned/blstmbab1_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab1 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab2.sh b/egs/babel/s5d/local/chain/run_blstm_bab2.sh new file mode 100755 index 00000000000..f5d698e262c --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab2.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 46.7 | 19252 60586 | 57.1 26.1 16.8 3.8 46.7 31.9 | -0.692 | exp/chain_cleaned/blstmbab2_sp_bi/decode_dev10h.pem/score_10/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab2 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 6 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab3.sh b/egs/babel/s5d/local/chain/run_blstm_bab3.sh new file mode 100755 index 00000000000..7ad51204c6f --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab3.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 45.9 | 19252 60586 | 58.7 28.0 13.3 4.6 45.9 31.6 | -0.668 | exp/chain_cleaned/blstmbab3_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab3 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 256 \ + --non-recurrent-projection-dim 256 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab4.sh b/egs/babel/s5d/local/chain/run_blstm_bab4.sh new file mode 100755 index 00000000000..72aaeb8778f --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab4.sh @@ -0,0 +1,179 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab4 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab5.sh b/egs/babel/s5d/local/chain/run_blstm_bab5.sh new file mode 100755 index 00000000000..1bae225022e --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab5.sh @@ -0,0 +1,179 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab5 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 256 \ + --non-recurrent-projection-dim 256 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_ivector_common.sh b/egs/babel/s5d/local/chain/run_ivector_common.sh new file mode 100755 index 00000000000..7354d59465b --- /dev/null +++ b/egs/babel/s5d/local/chain/run_ivector_common.sh @@ -0,0 +1,240 @@ +#!/bin/bash + +set -e -o pipefail + + +# This script is called from local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh (and may eventually +# be called by more scripts). It contains the common feature preparation and iVector-related parts +# of the script. See those scripts for examples of usage. + + +stage=0 +nj=30 +min_seg_len=1.55 # min length in seconds... we do this because chain training + # will discard segments shorter than 1.5 seconds. Must remain in sync + # with the same option given to prepare_lores_feats_and_alignments.sh +train_set=train_cleaned # you might set this to e.g. train. +gmm=tri5_cleaned # This specifies a GMM-dir from the features of the type you're training the system on; + # it should contain alignments for 'train_set'. +langdir=data/langp/tri5_ali + +num_threads_ubm=12 +nnet3_affix=_cleaned # affix for exp/nnet3 directory to put iVector stuff in, so it + # becomes exp/nnet3_cleaned or whatever. +add_pitch=false + +. ./cmd.sh +. ./path.sh + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp_comb + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + + + +if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then + echo "$0: data/${train_set}_sp_hires/feats.scp already exists." + echo " ... Please either remove it, or rerun this script with stage > 2." + exit 1 +fi + + +if [ $stage -le 1 ]; then + echo "$0: preparing directory for speed-perturbed data" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: creating high-resolution MFCC features" + + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + utils/copy_data_dir.sh data/${train_set}_sp data/${train_set}_sp_hires + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires + + for datadir in ${train_set}_sp ; do + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires + steps/compute_cmvn_stats.sh data/${datadir}_hires + utils/fix_data_dir.sh data/${datadir}_hires + done +fi + +if [ $stage -le 3 ]; then + echo "$0: combining short segments of speed-perturbed high-resolution MFCC training data" + # we have to combine short segments or we won't be able to train chain models + # on those segments. + utils/data/combine_short_segments.sh \ + data/${train_set}_sp_hires $min_seg_len data/${train_set}_sp_hires_comb + + # just copy over the CMVN to avoid having to recompute it. + cp data/${train_set}_sp_hires/cmvn.scp data/${train_set}_sp_hires_comb/ + utils/fix_data_dir.sh data/${train_set}_sp_hires_comb/ +fi + +if [ $stage -le 4 ]; then + echo "$0: selecting segments of hires training data that were also present in the" + echo " ... original training data." + + # note, these data-dirs are temporary; we put them in a sub-directory + # of the place where we'll make the alignments. + temp_data_root=exp/nnet3${nnet3_affix}/tri5 + mkdir -p $temp_data_root + + utils/data/subset_data_dir.sh --utt-list data/${train_set}/feats.scp \ + data/${train_set}_sp_hires $temp_data_root/${train_set}_hires + + # note: essentially all the original segments should be in the hires data. + n1=$(wc -l 4 option." + exit 1; + fi + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 7 --mllt-iters "2 4 6" \ + --splice-opts "--left-context=3 --right-context=3" \ + --boost-silence $boost_sil \ + $numLeavesMLLT $numGaussMLLT $temp_data_root/${train_set}_hires $langdir \ + $gmm_dir exp/nnet3${nnet3_affix}/tri5 +fi + + +if [ $stage -le 5 ]; then + echo "$0: computing a subset of data to train the diagonal UBM." + + mkdir -p exp/nnet3${nnet3_affix}/diag_ubm + temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm + + # train a diagonal UBM using a subset of about a quarter of the data + # we don't use the _comb data for this as there is no need for compatibility with + # the alignments, and using the non-combined data is more efficient for I/O + # (no messing about with piped commands). + num_utts_total=$(wc -l 11 option." + exit 1 + fi + echo "$0: aligning with the perturbed, short-segment-combined low-resolution data" + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + data/${train_set}_sp_comb $langdir $gmm_dir $ali_dir +fi + + +exit 0; diff --git a/egs/babel/s5d/local/chain/run_tdnn.sh b/egs/babel/s5d/local/chain/run_tdnn.sh new file mode 100755 index 00000000000..3ce53fa9292 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 47.0 | 19252 60586 | 58.0 28.0 14.0 5.0 47.0 31.6 | -0.540 | exp/chain_cleaned/tdnn_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab1.sh b/egs/babel/s5d/local/chain/run_tdnn_bab1.sh new file mode 100755 index 00000000000..db82c0f358a --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab1.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 48.2 | 19252 60586 | 56.9 28.9 14.2 5.1 48.2 32.1 | -0.662 | exp/chain_cleaned/tdnnbab1_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab1 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab2.sh b/egs/babel/s5d/local/chain/run_tdnn_bab2.sh new file mode 100755 index 00000000000..51387901683 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab2.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 47.7 | 19252 60586 | 56.5 27.2 16.3 4.3 47.7 31.8 | -0.468 | exp/chain_cleaned/tdnnbab2_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab2 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab3.sh b/egs/babel/s5d/local/chain/run_tdnn_bab3.sh new file mode 100755 index 00000000000..098c3de0482 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab3.sh @@ -0,0 +1,178 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# %WER 46.7 | 19252 60586 | 57.4 26.4 16.2 4.0 46.7 31.6 | -0.469 | exp/chain_cleaned/tdnnbab3_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab3 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab4.sh b/egs/babel/s5d/local/chain/run_tdnn_bab4.sh new file mode 100755 index 00000000000..5831cfc28f0 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab4.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 47.3 | 19252 60586 | 57.5 29.1 13.4 4.8 47.3 31.7 | -0.595 | exp/chain_cleaned/tdnnbab4_sp_bi/decode_dev10h.pem/score_8/penalty_0.25/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab4 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 400 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/check_models.sh b/egs/babel/s5d/local/check_models.sh new file mode 100755 index 00000000000..88b3dacc94b --- /dev/null +++ b/egs/babel/s5d/local/check_models.sh @@ -0,0 +1,34 @@ +#!/bin/bash + + +check_model () { + model=$1 + if [ -s $model ]; then echo $model + else + dir=`dirname $model` + latest_model=`ls -lt $dir/{?,??}.mdl 2>/dev/null | head -1 | awk '{print $9}'` + echo "*$model is not there, latest is: $latest_model" + fi +} + +for model in exp/mono/final.mdl exp/tri{1,2,3}/final.mdl; do + check_model $model +done + +if [ ! -f exp/tri4/final.mdl ]; then + echo "*exp/tri4/final.mdl is not there*" + exit 1 +fi + +if [ -f exp/tri4/trans.1 ]; then # This is LimitedLP. + models="exp/tri4/final.alimdl exp/sgmm5/final.alimdl exp/sgmm5_mmi_b0.1/final.mdl exp/tri5_nnet/final.mdl" +else + models="exp/tri4/final.mdl exp/tri5/final.alimdl exp/sgmm5/final.alimdl exp/sgmm5_mmi_b0.1/final.mdl exp/tri6_nnet/final.mdl" +fi +models="$models exp_BNF/tri5/final.mdl exp_BNF/tri6/final.alimdl exp_BNF/sgmm7/final.alimdl" + +for model in $models; do + check_model $model +done + + diff --git a/egs/babel/s5d/local/check_tools.sh b/egs/babel/s5d/local/check_tools.sh new file mode 100755 index 00000000000..ca8800def41 --- /dev/null +++ b/egs/babel/s5d/local/check_tools.sh @@ -0,0 +1,40 @@ +#!/bin/bash -u + +# Copyright 2015 (c) Johns Hopkins University (Jan Trmal ) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +[ -f ./path.sh ] && . ./path.sh + +sph2pipe=`command -v sph2pipe 2>/dev/null` \ + || { echo >&2 "sph2pipe not found on PATH. Did you run make in the $KALDI_ROOT/tools directory?"; return 1; } + +srilm=`command -v ngram 2>/dev/null` \ + || { echo >&2 "srilm not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_srilm.sh"; return 1; } + +sox=`command -v sox 2>/dev/null` \ + || { echo >&2 "sox not found on PATH. Please install it manually (you will need version 14.4.0 and higher)."; return 1; } + +# If sox is found on path, check if the version is correct +if [ ! -z "$sox" ]; then + sox_version=`$sox --version 2>&1| head -1 | sed -e 's?.*: ??' -e 's?.* ??'` + if [[ ! $sox_version =~ v14.4.* ]]; then + echo "Unsupported sox version $sox_version found on path. You will need version v14.4.0 and higher." + return 1 + fi +fi + +exit 0 + + diff --git a/egs/babel/s5d/local/check_wers.sh b/egs/babel/s5d/local/check_wers.sh new file mode 100755 index 00000000000..10e1a89ee3a --- /dev/null +++ b/egs/babel/s5d/local/check_wers.sh @@ -0,0 +1,50 @@ +#!/bin/bash + + + +check_wer () { + dir=$1 + if [ -d $dir ]; then + seen_dir=false + for ddir in $dir/decode*; do + if [ -d $ddir ]; then + seen_dir=true + printf " % -40s " $ddir + line=`grep Sum $ddir/score_*/*.sys 2>/dev/null | $char_command | utils/best_wer.sh` + if [ -z "$line" ]; then echo "------" + else echo $line | cut -c 1-65; fi + fi + done + ! $seen_dir && echo "$dir ********** no decode dirs" + fi + +} + +final=false +char_command="grep -v char" + +for n in `seq 10`; do + if [ "$1" == "--final" ]; then + final=true + shift + fi + if [ "$1" == "--char" ]; then + char_command="grep char" + shift + fi +done + +if [ $# != 0 ]; then + echo "Usage: local/check_wers.sh [--final] [--char]" + exit 1; +fi + +if $final; then + for dir in exp/sgmm5_mmi_b0.1 exp/tri5_nnet exp/tri6_nnet exp_BNF/sgmm7 exp_BNF/sgmm7_mmi_b0.1 exp/combine*; do + check_wer $dir + done +else + for dir in exp/tri{2,3,4,5} exp/sgmm5 exp/sgmm5_mmi_b0.1 exp/tri5_nnet exp/tri6_nnet exp_BNF/* exp/combine_*; do + check_wer $dir + done +fi diff --git a/egs/babel/s5d/local/cmu_uem2kaldi_dir.sh b/egs/babel/s5d/local/cmu_uem2kaldi_dir.sh new file mode 100755 index 00000000000..f320cfa19cd --- /dev/null +++ b/egs/babel/s5d/local/cmu_uem2kaldi_dir.sh @@ -0,0 +1,124 @@ +#!/bin/bash -e + +# Creating a UEM decoding setup with CMU segmentation from Florian (Feb 15, 2013). +dummy_text=true +text= +filelist= +#end of configuration + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ] ; then + echo "$0: Converts the CMU segmentation database file into a kaldi data directory for UEM decoding" + echo "" + echo "cmu_ume2kaldi_dir.sh " + echo "example: cmu_ume2kaldi_dir.sh db-tag-eval-utt.dat /export/babel/data/106-tagalog/audio data/eval.uem" + echo "Was called with: $*" + exit 1; +fi + +database=$1 +audiopath=$2 +datadir=$3 + +echo $0 $@ +mkdir -p $datadir +# 1. Create the segments file: +[ ! -f $database ] && echo "Database file $1 does not exist!" && exit 1; + +echo "Converting `basename $database` to kaldi directory $datadir " +cat $database | perl -pe 's:.+(BABEL):BABEL:; s:\}\s+\{FROM\s+: :; s:\}\s+\{TO\s+: :; s:\}.+::;' | \ + perl -ne '@K = split; + $utteranceID = @K[0]; + $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; + $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; + $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; + $utteranceID .= sprintf ("_%06i", (100*@K[2])); + printf("%s %s %.2f %.2f\n", $utteranceID, @K[0], @K[1], @K[2]);' | sort > $datadir/segments + +if [ ! -z $filelist ] ; then + mv $datadir/segments $datadir/segments.full + grep -F -f $filelist $datadir/segments.full > $datadir/segments + + l=`grep -v -F -f $filelist $datadir/segments.full | cut -f 2 -d ' ' | sort -u | wc -l` + echo "Because of using filelist, $l files omitted" +fi + + + # 2. Create the utt2spk file: + +echo "Creating the $datadir/utt2spk file" +cut -f1 -d' ' $datadir/segments | \ + perl -ne 'chomp; m:([^_]+_[AB]).*:; print "$_ $1\n";' | \ + sort > $datadir/utt2spk + + # 3. Create the spk2utt file: + +echo "Creating the $datadir/spk2utt file" +perl -ne '{chomp; @K=split; $utt{@K[1]}.=" @K[0]";} + END{foreach $spk (sort keys %utt) { + printf("%s%s\n", $spk, $utt{$spk}); + } + }' < $datadir/utt2spk | sort > $datadir/spk2utt + +# 4. Create the wav.scp file: +sph2pipe=`which sph2pipe || which $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe` +if [ $? -ne 0 ] ; then + echo "Could not find sph2pipe binary. Add it to PATH" + exit 1; +fi +sox=`which sox` +if [ $? -ne 0 ] ; then + echo "Could not find sox binary. Add it to PATH" + exit 1; +fi + +echo "Creating the $datadir/wav.scp file" +( + set -o pipefail + for file in `cut -f 2 -d ' ' $datadir/segments` ; do + if [ -f $audiopath/audio/$file.sph ] ; then + echo "$file $sph2pipe -f wav -p -c 1 $audiopath/audio/$file.sph |" + elif [ -f $audiopath/audio/$file.wav ] ; then + echo "$file $sox $audiopath/audio/$file.wav -r 8000 -c 1 -b 16 -t wav - downsample |" + else + echo "Audio file $audiopath/audio/$file.sph does not exist!" >&2 + exit 1 + fi + done | sort -u > $datadir/wav.scp + if [ $? -ne 0 ] ; then + echo "Error producing the wav.scp file" + exit 1 + fi +) || exit 1 + +l1=`wc -l $datadir/wav.scp | cut -f 1 -d ' ' ` +echo "wav.scp contains $l1 files" +if [ ! -z $filelist ] ; then + l2=`wc -l $filelist | cut -f 1 -d ' '` + echo "filelist `basename $filelist` contains $l2 files" + + if [ "$l1" -ne "$l2" ] ; then + echo "WARNING: Not all files from the specified fileset made their way into wav.scp" + fi +fi + +# 5. Create the text file: +echo "Creating the $datadir/text file" +if [ ! -z $text ] ; then + cp $text $datadir/text || echo "Could not copy the source text file \"$text\" " && exit 1 +elif $dummy_text ; then + cut -f1 -d' ' $datadir/segments | \ + sed -e 's/$/ IGNORE_TIME_SEGMENT_IN_SCORING/' | \ + sort > $datadir/text +fi + +# 6. reco2file_and_channel +echo "Creating the $datadir/reco2file_and_channel file" +(for f in $( cut -f 1 -d ' ' $datadir/wav.scp ) ; do echo $f $f "1"; done) > $datadir/reco2file_and_channel +echo "Everything done" + + + diff --git a/egs/babel/s5d/local/count_to_logprob.pl b/egs/babel/s5d/local/count_to_logprob.pl new file mode 100755 index 00000000000..7d779321810 --- /dev/null +++ b/egs/babel/s5d/local/count_to_logprob.pl @@ -0,0 +1,94 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + This script takes in the confusion phone pair counts and converts + the counts into negated log probabilities. The counts should be in + the following format: + p1 p2 count1 // For substitution + p3 count2 // For deletion + p4 count3 // For insertion + +Allowed options: + --cutoff : Minimal count to be considered (int , default=1) +EOU + +my $cutoff = 1; +GetOptions('cutoff=i' => \$cutoff); + +@ARGV == 2 || die $Usage; + +# Workout the input and output parameters +my $cm_in = shift @ARGV; +my $cm_out = shift @ARGV; + +open(I, "<$cm_in") || die "$0: Fail to open keywords file $cm_in\n"; +open(O, ">$cm_out") || die "$0: Fail to write confusion matrix $cm_out\n"; + +# Collect counts +my %ins; +my %del; +my %subs; +my %phone_count; +my $ins_count = 0; +my $del_count = 0; +while () { + chomp; + my @col = split(); + @col == 3 || die "$0: Bad line in confusion matrix file: $_\n"; + my ($p1, $p2, $count) = ($col[0], $col[1], $col[2]); + $count >= $cutoff || next; + if ($p1 eq "" && $p2 ne "") { + $ins{$p2} = $count; + $ins_count += $count; + } elsif ($p1 ne "" && $p2 eq "") { + $del{$p1} = $count; + $del_count += $count; + } elsif ($p1 ne "" && $p2 ne "") { + $p1 ne $p2 || next; # Skip same phone convert + $subs{"${p1}_$p2"} = $count; + if (defined($phone_count{$p1})) { + $phone_count{$p1} += $count; + } else { + $phone_count{$p1} = $count; + } + } +} + +# Compute negated log probability +foreach my $key (keys %ins) { + $ins{$key} = -log($ins{$key}/$ins_count); +} +foreach my $key (keys %del) { + $del{$key} = -log($del{$key}/$del_count); +} +foreach my $key (keys %subs) { + my @col = split(/_/, $key); + $subs{$key} = -log($subs{$key}/$phone_count{$col[0]}); +} + +# print results +my $output = ""; +foreach my $key (keys %ins) { + $output .= " $key $ins{$key}\n"; +} +foreach my $key (keys %del) { + $output .= "$key $del{$key}\n"; +} +foreach my $key (keys %subs) { + my @col = split(/_/, $key); + $output .= "$col[0] $col[1] $subs{$key}\n"; +} + +print O $output; + +close(I); +close(O); diff --git a/egs/babel/s5d/local/create_shadow_dataset.sh b/egs/babel/s5d/local/create_shadow_dataset.sh new file mode 100755 index 00000000000..49467ed28c1 --- /dev/null +++ b/egs/babel/s5d/local/create_shadow_dataset.sh @@ -0,0 +1,176 @@ +#!/bin/bash +# Copyright 2012 Johns Hopkins University +# Apache 2.0. + +stage=0 + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +[ -f /export/babel/data/software/env.sh ] && . /export/babel/data/software/env.sh + +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: create_shadow_dataset.sh " + exit 1 +fi + +dest=$1 +src1=$2 +src2=$3 + +mkdir -p $dest/kws + +if [ $stage -le 0 ] ; then + utils/combine_data.sh $dest $src1 $src2 || exit 1 +fi + +if [ $stage -le 1 ] ; then + #zkombinovat ecf + echo "Combining ECF files..." + perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + use strict; + use warnings; + + + my $src1 = XMLin($ARGV[0]); + my $src2 = XMLin($ARGV[1]); + my $tgt={}; + my %filename_hash; + + my $expected_duration=0.0; + my $duration=0.0; + + if ( $src1->{language} ne $src2->{language} ) { + die "ECF languages differ in the source ecf.xml files" + } + $expected_duration=$src1->{source_signal_duration} + $src2->{source_signal_duration}; + + $tgt->{source_signal_duration} = $expected_duration; + $tgt->{language}=$src1->{language}; + $tgt->{version}="Generated automatically by the shadow_set.sh script"; + $tgt->{excerpt}= []; + + #print Dumper(\$src1); + foreach my $excerpt ( @{$src1->{excerpt}} ) { + push @{$tgt->{excerpt}}, $excerpt; + if ( exists $filename_hash{$excerpt->{audio_filename}} ) { + print STDERR "[WARN]: Duplicate filename $excerpt->{audio_filename} \n" + } else { + $duration += $excerpt->{dur} ; + $filename_hash{$excerpt->{audio_filename}} = $excerpt; + } + } + foreach my $excerpt ( @{$src2->{excerpt}} ) { + push @{$tgt->{excerpt}}, $excerpt; + if ( exists $filename_hash{$excerpt->{audio_filename}} ) { + print STDERR "[WARN]: Duplicate filename $excerpt->{audio_filename} \n" + } else { + $duration += $excerpt->{dur} ; + $filename_hash{$excerpt->{audio_filename}} = $excerpt; + } + } + $tgt->{source_signal_duration} = $duration; + + my $tgtxml = XMLout($tgt, RootName=>"ecf"); + print $tgtxml; + ' $src1/kws/ecf.xml $src2/kws/ecf.xml > $dest/kws/ecf.xml +fi + +if [ $stage -le 2 ] ; then + #zkombinovat kwlist + echo "Combining the KWLIST files" + perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + use strict; + use warnings; + + my $src1 = XMLin($ARGV[0], ForceArray => 1); + my $src2 = XMLin($ARGV[1], ForceArray => 1); + my $tgt={}; + my %kwid_hash; + + if ( $src1->{compareNormalize} ne $src2->{compareNormalize} ) { + die "KWLIST compareNormalize attributes differ in the source kwlist.xml files"; + } + if ( $src1->{language} ne $src2->{language} ) { + die "KWLIST languages differ in the source kwlist.xml files"; + } + + $tgt->{ecf_filename} = ""; + $tgt->{language}=$src1->{language}; + $tgt->{compareNormalize}=$src1->{compareNormalize}; + $tgt->{encoding}=$src1->{encoding}; + $tgt->{version}="1"; + $tgt->{kw}= []; + + + foreach my $kw ( @{$src1->{kw}} ) { + $kw->{kwid} = $kw->{kwid} . "-A"; + if ( exists $kwid_hash{$kw->{kwid}} ) { + print STDERR "[WARN]: Duplicate kwid $kw->{kwid}\n"; + } else { + $kwid_hash{$kw->{kwid}} = $kw; + } + push @{$tgt->{kw}}, $kw; + } + foreach my $kw ( @{$src2->{kw}} ) { + $kw->{kwid} = $kw->{kwid} . "-B"; + if ( exists $kwid_hash{$kw->{kwid}} ) { + print STDERR "[WARN]: Duplicate kwid $kw->{kwid}\n"; + } else { + $kwid_hash{$kw->{kwid}} = $kw; + } + push @{$tgt->{kw}}, $kw; + } + + my $tgtxml = XMLout($tgt, RootName=>"kwlist", KeyAttr=>""); + print $tgtxml; + ' $src1/kws/kwlist.xml $src2/kws/kwlist.xml > $dest/kws/kwlist.xml || exit 1 +fi + +if [ $stage -le 3 ] ; then + echo "Making KWLIST maps" + perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + use strict; + use warnings; + + my $src1 = XMLin($ARGV[0], ForceArray => 1); + open TGT_DEV, ">", $ARGV[1] or die $!; + open TGT_TST, ">", $ARGV[2] or die $!; + + foreach my $kw ( @{$src1->{kw}} ) { + if ( $kw->{kwid} =~ "KW.+-A\$" ) { + my $new_kw = $kw->{kwid}; + my $old_kw = substr $new_kw, 0, -2; + print TGT_DEV "$old_kw\t$new_kw\n"; + } elsif ( $kw->{kwid} =~ "KW.+-B\$" ) { + my $new_kw = $kw->{kwid}; + my $old_kw = substr $new_kw, 0, -2; + print TGT_TST "$old_kw\t$new_kw\n"; + } else { + die "Unsupported or unknown KW ID: $kw->{kwid}\n"; + } + } + ' $dest/kws/kwlist.xml $dest/kws/kws_map.dev.txt $dest/kws/kws_map.test.txt || exit 1 +fi + +exit 0 + diff --git a/egs/babel/s5d/local/cstr_ndx2flist.pl b/egs/babel/s5d/local/cstr_ndx2flist.pl new file mode 100755 index 00000000000..79daa1a99db --- /dev/null +++ b/egs/babel/s5d/local/cstr_ndx2flist.pl @@ -0,0 +1,54 @@ +#!/usr/bin/env perl + +# Copyright 2010-2011 Microsoft Corporation + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +# This is modified from the script in standard Kaldi recipe to account +# for the way the WSJ data is structured on the Edinburgh systems. +# - Arnab Ghoshal, 12/1/12 + +# This program takes as its standard input an .ndx file from the WSJ corpus that looks +# like this: +#;; File: tr_s_wv1.ndx, updated 04/26/94 +#;; +#;; Index for WSJ0 SI-short Sennheiser training data +#;; Data is read WSJ sentences, Sennheiser mic. +#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts +#;; per speaker TI) = 7236 utts +#;; +#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1 +#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1 +#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1 + +# and as command-line argument it takes the names of the WSJ disk locations, e.g.: +# /group/corpora/public/wsjcam0/data on DICE machines. +# It outputs a list of absolute pathnames. + +$wsj_dir = $ARGV[0]; + +while(){ + if(m/^;/){ next; } # Comment. Ignore it. + else { + m/^([0-9_]+):\s*(\S+)$/ || die "Could not parse line $_"; + $filename = $2; # as a subdirectory of the distributed disk. + if ($filename !~ m/\.wv1$/) { $filename .= ".wv1"; } + $filename = "$wsj_dir/$filename"; + if (-e $filename) { + print "$filename\n"; + } else { + print STDERR "File $filename found in the index but not on disk\n"; + } + } +} diff --git a/egs/babel/s5d/local/ctm2segments.pl b/egs/babel/s5d/local/ctm2segments.pl new file mode 100755 index 00000000000..55a8bd84fc8 --- /dev/null +++ b/egs/babel/s5d/local/ctm2segments.pl @@ -0,0 +1,159 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Getopt::Long; + +my $cf_needed = 0.9; +my $cf_needed_upper = 1; +my $extend_segments = 0.0 ; + +my $Usage = < + +Allowed options: + --min-cf : Minimum CF to include the word (float, default = 0.9) + --max-cf : Maximum CF to include the word (float, default = 1.0) + --extend-segments : Add this delta to the boundaries of the segments (float, default = 0.0) +EOU + +GetOptions('min-cf=f' => \$cf_needed, + 'max-cf=f' => \$cf_needed_upper, + 'extend-segments=f' => \$extend_segments, + ); + + +# Get parameters +my $filein = shift @ARGV; +my $dirout = shift @ARGV; + + +my @segments; +my @utterances; +my @text; + +my $words = ""; +my $seg_end = -1; +my $seg_start = -1; +my $filename; + +my $total_seconds=0; +my $extracted_seconds=0; +open(FILEIN, $filein); +while (my $line= ) { + chop $line; + my @entries = split(/ /, $line); + die "Cannot parse line \"$line\"" if scalar @entries != 6; + + ($filename, my $chann_id, my $beg, my $end, my $word, my $conf) = @entries; + + $total_seconds += $end * 1.0; + + if ($conf >= $cf_needed ) { + if ( $words ne "" ) { + #print "Extend segment\n"; + $words .= " $word"; + $seg_end = $beg * 1.0 + $end*1.0; + } else { + #start a new segment + #print "Start segment\n"; + $seg_start = $beg; + $seg_end = $beg * 1.0 + $end*1.0; + $words = $word; + } + } else { + #flush the segment + if ( $words ) { + my @filename_parts = split(/_/, $filename); + my $channel="C"; + if ($filename_parts[6] eq "inLine" ) { + $channel="A"; + } elsif ($filename_parts[6] eq "outLine" ) { + $channel="B"; + } + + $extracted_seconds+= ($seg_end - $seg_start); + $seg_start -= $extend_segments; + $seg_end += $extend_segments; + + my $spk_id=$filename_parts[3] . "_" . $channel; + my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); + my $last_part = sprintf("%06d", $seg_start * 100); + $utt_id .= "_" . $last_part; + #print $utt_id . " $beg \n"; + + #14350_A_20121123_042710_001337 + + #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 + my $segment = "$utt_id $filename $seg_start $seg_end"; + #14350_A_20121123_042710_001337 14350_A + my $utt2spk = "$utt_id $spk_id"; + #10901_A_20121128_230024_000227 hayi Lovemore + my $text = "$utt_id $words"; + push @segments, $segment; + push @utterances, $utt2spk; + push @text, $text; + $words = ""; + } + + } +} +if ( $words ) { + #print "Flush.\n"; + my @filename_parts = split(/_/, $filename); + my $channel="C"; + if ($filename_parts[6] eq "inLine" ) { + $channel="A"; + } elsif ($filename_parts[6] eq "outLine" ) { + $channel="B"; + } + + $extracted_seconds+= ($seg_end - $seg_start); + $seg_start -= $extend_segments; + $seg_end += $extend_segments; + + my $spk_id=$filename_parts[3] . "_" . $channel; + my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); + my $last_part = sprintf("%06d", $seg_start * 100); + $utt_id .= "_" . $last_part; + #print $utt_id . " $beg \n"; + + #14350_A_20121123_042710_001337 + + #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 + my $segment = "$utt_id $filename $seg_start $seg_end"; + #14350_A_20121123_042710_001337 14350_A + my $utt2spk = "$utt_id $spk_id"; + #10901_A_20121128_230024_000227 hayi Lovemore + my $text = "$utt_id $words"; + push @segments, $segment; + push @utterances, $utt2spk; + push @text, $text; + $words = ""; +} + +open(SEGMENTS, "> $dirout/segments"); +foreach my $line (@segments) { + print SEGMENTS "$line\n"; +} +close(SEGMENTS); + +open(TEXT, "> $dirout/text"); +foreach my $line (@text) { + print TEXT "$line\n"; +} +close(TEXT); + +open(UTT, "> $dirout/utt2spk"); +foreach my $line (@utterances) { + print UTT "$line\n"; +} +close(UTT); + +my $total_hours=sprintf("%.2f", $total_seconds/3600); +my $extracted_hours=sprintf("%.2f", $extracted_seconds/3600); +my $s_ex_secs=sprintf("%d", $extracted_seconds); + +print "Fragments extracted: $s_ex_secs seconds ($extracted_hours hours) out of $total_hours hours\n"; + diff --git a/egs/babel/s5d/local/datasets/basic_kws.sh b/egs/babel/s5d/local/datasets/basic_kws.sh new file mode 100644 index 00000000000..cff34eba69c --- /dev/null +++ b/egs/babel/s5d/local/datasets/basic_kws.sh @@ -0,0 +1,28 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +if [ "${dataset_kind}" == "supervised" ] ; then + mandatory_variables="my_ecf_file my_kwlists my_rttm_file" + optional_variables="my_subset_ecf" +else + mandatory_variables="my_ecf_file my_kwlists" + optional_variables="my_subset_ecf" +fi + +check_variables_are_set + +if [ ! -f ${dataset_dir}/kws/.done ] ; then + kws_flags=( --use-icu true ) + if [ "${dataset_kind}" == "supervised" ] || [ !-z "$my_rttm_file" ] ; then + kws_flags+=(--rttm-file $my_rttm_file ) + fi + if $my_subset_ecf ; then + kws_flags+=(--subset-ecf $my_data_list) + fi + local/kws_setup.sh --case_insensitive $case_insensitive \ + "${kws_flags[@]}" "${icu_opt[@]}" \ + $my_ecf_file $my_kwlist_file $lang ${dataset_dir} || exit 1 + touch ${dataset_dir}/kws/.done +fi diff --git a/egs/babel/s5d/local/datasets/extra_kws.sh b/egs/babel/s5d/local/datasets/extra_kws.sh new file mode 100644 index 00000000000..d00eab1b06f --- /dev/null +++ b/egs/babel/s5d/local/datasets/extra_kws.sh @@ -0,0 +1,137 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +if [ "${dataset_kind}" == "supervised" ] ; then + mandatory_variables="my_ecf_file my_kwlists my_rttm_file" + optional_variables="my_subset_ecf" +else + mandatory_variables="my_ecf_file my_kwlists" + optional_variables="my_subset_ecf" +fi + +check_variables_are_set + +function register_extraid { + local dataset_dir=$1 + local extraid=$2 + echo "Registering $extraid" + echo $extraid >> $dataset_dir/extra_kws_tasks; + sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks +} + +function setup_oov_search { + local phone_cutoff=0 + + local g2p_nbest=10 + local g2p_mass=0.95 + + + local data_dir=$1 + local source_dir=$2 + local extraid=$3 + + local kwsdatadir=$data_dir/${extraid}_kws + + mkdir -p $kwsdatadir + + for file in $source_dir/rttm ; do + [ -f $file ] && cp -f $file $kwsdatadir + done + + for file in $source_dir/utter_* $source_dir/kwlist*.xml $source_dir/ecf.xml ; do + cp -f $file $kwsdatadir + done + + kwlist=$source_dir/kwlist_outvocab.xml + #Get the KW list + paste \ + <(cat $kwlist | grep -o -P "(?<=kwid=\").*(?=\")") \ + <(cat $kwlist | grep -o -P "(?<=).*(?=)" | uconv -f utf-8 -t utf-8 -x Any-Lower) \ + >$kwsdatadir/keywords.txt + cut -f 2 $kwsdatadir/keywords.txt | \ + sed 's/\s\s*/\n/g' | sort -u > $kwsdatadir/oov.txt + + + #Generate the confusion matrix + #NB, this has to be done only once, as it is training corpora dependent, + #instead of search collection dependent + if [ ! -f exp/conf_matrix/.done ] ; then + local/generate_confusion_matrix.sh --cmd "$decode_cmd" --nj $my_nj \ + exp/sgmm5_denlats/dengraph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix || return 1 + touch exp/conf_matrix/.done + fi + confusion=exp/conf_matrix/confusions.txt + + if [ ! -f exp/g2p/.done ] ; then + if [ -f data/.extlex ]; then + local/train_g2p.sh data/local/lexicon_orig.txt exp/g2p || return 1; + else + local/train_g2p.sh data/local/lexicon.txt exp/g2p || return 1; + fi + touch exp/g2p/.done + fi + local/apply_g2p.sh --nj $my_nj --cmd "$decode_cmd" \ + --var-counts $g2p_nbest --var-mass $g2p_mass \ + $kwsdatadir/oov.txt exp/g2p $kwsdatadir/g2p || return 1 + L2_lex=$kwsdatadir/g2p/lexicon.lex + + if [ -z "$L1_lex" ] ; then + L1_lex=data/local/lexiconp.txt + fi + + local/kws_data_prep_proxy.sh \ + --cmd "$decode_cmd" --nj $my_nj \ + --case-insensitive true \ + --confusion-matrix $confusion \ + --phone-cutoff $phone_cutoff \ + --pron-probs true --beam $proxy_beam --nbest $proxy_nbest \ + --phone-beam $proxy_phone_beam --phone-nbest $proxy_phone_nbest \ + $lang $data_dir $L1_lex $L2_lex $kwsdatadir + +} + + +kws_flags=( --use-icu true ) +if [ "${dataset_kind}" == "supervised" ] || [ ! -z "$my_rttm_file" ]; then + #The presence of the file had been already verified, so just + #add the correct switches + kws_flags+=(--rttm-file $my_rttm_file ) +fi +if $my_subset_ecf ; then + kws_flags+=(--subset-ecf $my_data_list) +fi + +if [ ${#my_kwlists[@]} -ne 0 ] ; then + + touch $dataset_dir/extra_kws_tasks + + for extraid in "${!my_kwlists[@]}" ; do + #The next line will help us in running only one. We don't really + #know in which directory the KWS setup will reside in, so we will + #place the .done file directly into the data directory + [ -f $dataset_dir/.done.kws.$extraid ] && continue; + kwlist=${my_kwlists[$extraid]} + + local/kws_setup.sh --extraid $extraid --case_insensitive $case_insensitive \ + "${kws_flags[@]}" "${icu_opt[@]}" \ + $my_ecf_file $kwlist $lang ${dataset_dir} || exit 1 + + #Register the dataset for default running... + #We can do it without any problem here -- the kws_stt_tasks will not + #run it, unless called with --run-extra-tasks true switch + register_extraid $dataset_dir $extraid + touch $dataset_dir/.done.kws.$extraid + done + for extraid in "${!my_kwlists[@]}" ; do + #The next line will help us in running only one. We don't really + #know in which directory the KWS setup will reside in, so we will + #place the .done file directly into the data directory + [ -f $dataset_dir/.done.kws.${extraid}_oov ] && continue; + setup_oov_search $dataset_dir $dataset_dir/${extraid}_kws ${extraid}_oov || exit 1 + register_extraid $dataset_dir ${extraid}_oov + touch $dataset_dir/.done.kws.${extraid}_oov + done +fi + diff --git a/egs/babel/s5d/local/datasets/supervised_pem.sh b/egs/babel/s5d/local/datasets/supervised_pem.sh new file mode 100644 index 00000000000..e131fae40fa --- /dev/null +++ b/egs/babel/s5d/local/datasets/supervised_pem.sh @@ -0,0 +1,35 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. +if [ "${dataset_type}" != "supervised" ] ; then + mandatory_variables="my_data_dir my_data_list my_nj " + optional_variables="" +else + mandatory_variables="my_data_dir my_data_list my_nj " + optional_variables="my_stm_file " +fi + +check_variables_are_set + + +if [[ ! -f ${dataset_dir}/wav.scp || ${dataset_dir}/wav.scp -ot "$my_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} data lists in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + mkdir -p ${dataset_dir} + local/prepare_acoustic_training_data.pl --fragmentMarkers \-\*\~ \ + $my_data_dir ${dataset_dir} > ${dataset_dir}/skipped_utts.log || exit 1 +fi + +if [ "$dataset_kind" == "supervised" ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + else + local/prepare_stm.pl --fragmentMarkers \-\*\~ ${dataset_dir} + fi +fi + diff --git a/egs/babel/s5d/local/datasets/supervised_seg.sh b/egs/babel/s5d/local/datasets/supervised_seg.sh new file mode 100644 index 00000000000..45cc7f28593 --- /dev/null +++ b/egs/babel/s5d/local/datasets/supervised_seg.sh @@ -0,0 +1,90 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. +if [ ${dataset_type} != "supervised" ] ; then + mandatory_variables="my_data_dir my_data_list my_nj" + optional_variables="" +else + mandatory_variables="my_data_dir my_data_list my_nj" + optional_variables="my_stm_file" +fi + +check_variables_are_set + +segmentation_opts="--isolated-resegmentation \ + --min-inter-utt-silence-length 1.0 \ + --silence-proportion 0.05 " + +workdir=exp/make_seg/${dataset_id} +unseg_dir=$workdir +mkdir -p $unseg_dir +# 4. Create the wav.scp file: +sph2pipe=`which sph2pipe || which $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe` +if [ $? -ne 0 ] ; then + echo "Could not find sph2pipe binary. Add it to PATH" + exit 1; +fi +sox=`which sox` +if [ $? -ne 0 ] ; then + echo "Could not find sox binary. Add it to PATH" + exit 1; +fi + +echo "Creating the $unseg_dir/wav.scp file" +audiodir=$my_data_dir/audio +for file in `cat $my_data_list | sort -u` ; do + if [ -f $audiodir/$file.sph ] ; then + echo "$file $sph2pipe -f wav -p -c 1 $audiodir/$file.sph |" + elif [ -f $audiodir/$file.wav ] ; then + echo "$file $sox $audiodir/$file.wav -r 8000 -c 1 -b 16 -t wav - downsample |" + else + echo "Audio file $audiodir/$file.(sph|wav) does not exist!" >&2 + exit 1 + fi +done | sort -u > $unseg_dir/wav.scp + +l1=`cat $unseg_dir/wav.scp | wc -l ` +l2=`cat $my_data_list | wc -l ` +if [ "$l1" -ne "$l2" ] ; then + echo "wav.scp number of files: $l1" + echo "filelist number of files: $l2" + echo "Not all files from the list $my_data_list found their way into wav.scp" + exit 1 +fi + +echo "Creating the $unseg_dir/reco2file_and_channel file" +cat $unseg_dir/wav.scp | awk '{print $1, $1, "A";}' > $unseg_dir/reco2file_and_channel +cat $unseg_dir/wav.scp | awk '{print $1, $1;}' > $unseg_dir/utt2spk +utils/utt2spk_to_spk2utt.pl $unseg_dir/utt2spk > $unseg_dir/spk2utt + +make_plp $unseg_dir $workdir/make_plp $workdir/plp || exit 1 + +local/resegment/generate_segments.sh --nj $my_nj --cmd "$decode_cmd" \ + --noise_oov false --segmentation_opts "$segmentation_opts" \ + $unseg_dir data/lang exp/tri4b_seg \ + $workdir $dataset_dir || exit 1 + +num_hours=`cat ${dataset_dir}/segments | \ + awk '{secs+= $4-$3;} END{print(secs/3600);}'` + +echo "Number of hours of the newly segmented data: $num_hours" + +if [ "$dataset_kind" == "supervised" ]; then + echo --------------------------------------------------------------------- + echo "preparing ${dataset_id} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + else + local/prepare_stm.pl --fragmentmarkers \-\*\~ ${dataset_dir} + fi +else + echo --------------------------------------------------------------------- + echo "preparing ${dataset_id} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + fi +fi + diff --git a/egs/babel/s5d/local/datasets/supervised_uem.sh b/egs/babel/s5d/local/datasets/supervised_uem.sh new file mode 100644 index 00000000000..5ac1e003d5d --- /dev/null +++ b/egs/babel/s5d/local/datasets/supervised_uem.sh @@ -0,0 +1,36 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +eval my_data_cmudb=\$${dataset_type}_data_cmudb + +if [ "${dataset_kind}" != "supervised" ] ; then + mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" + optional_variables="" +else + mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" + optional_variables="my_stm_file" +fi + +check_variables_are_set + +if [[ ! -f ${dataset_dir}/wav.scp || ${dataset_dir}/wav.scp -ot "$my_data_cmudb" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} data lists in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + mkdir -p ${dataset_dir} + local/cmu_uem2kaldi_dir.sh --filelist $my_data_list \ + $my_data_cmudb $my_data_dir ${dataset_dir} +fi + +if [ "$dataset_kind" == "supervised" ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + else + local/prepare_stm.pl --fragmentMarkers \-\*\~ ${dataset_dir} + fi +fi diff --git a/egs/babel/s5d/local/datasets/unsupervised_seg.sh b/egs/babel/s5d/local/datasets/unsupervised_seg.sh new file mode 120000 index 00000000000..9e2e12b5bad --- /dev/null +++ b/egs/babel/s5d/local/datasets/unsupervised_seg.sh @@ -0,0 +1 @@ +supervised_seg.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/datasets/unsupervised_uem.sh b/egs/babel/s5d/local/datasets/unsupervised_uem.sh new file mode 120000 index 00000000000..81440969d5c --- /dev/null +++ b/egs/babel/s5d/local/datasets/unsupervised_uem.sh @@ -0,0 +1 @@ +supervised_uem.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/datasets/vocab_kws.sh b/egs/babel/s5d/local/datasets/vocab_kws.sh new file mode 100644 index 00000000000..d161fc77b67 --- /dev/null +++ b/egs/babel/s5d/local/datasets/vocab_kws.sh @@ -0,0 +1,51 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +if [ "${dataset_kind}" == "supervised" ] ; then + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + optional_variables="my_subset_ecf" +else + mandatory_variables="my_ecf_file my_kwlist_file" + optional_variables="my_subset_ecf" +fi + +check_variables_are_set + +if [ "$dataset_kind" == "shadow" ]; then + true #we do not support multiple kw lists for shadow set system + +elif [ ! -f $dataset_dir/.done.kws.fullvocab ] ; then + #a This will work for both supervised and unsupervised dataset kinds + kws_flags=() + if [ "$dataset_kind" == "supervised" ] || [ ! -z "$my_rttm_file" ] ; then + kws_flags+=(--rttm-file $my_rttm_file ) + fi + if $my_subset_ecf ; then + kws_flags+=(--subset-ecf $my_data_list) + fi + + #We just could come with some bogus naming scheme, + #but as long as the audio files can tell the iarpa lang id, we will use that + langid=`ls -1 $my_data_dir/audio/ | head -n 1| cut -d '_' -f 3` + + #NB: we assume the default KWS search is already done and will "borrow" + #the rttm and ecf files. + #We could easily generate the ecf file, but the RTTM assumes the decoding + #had been already done. That could be done + #Ideally, these files should be generated here! + + local/kws_setup.sh --kwlist-wordlist true "${kws_flags[@]}" \ + --extraid fullvocab $my_ecf_file \ + <(cat $lang/words.txt | \ + grep -v "^<" | grep -v "^#" | \ + awk "{printf \"KWID$langid-FULLVOCAB-%05d %s\\n\", \$2, \$1 }" ) \ + $lang ${dataset_dir} || exit 1 + + echo fullvocab >> $dataset_dir/extra_kws_tasks; + sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks + touch $dataset_dir/.done.kws.fullvocab +fi + + diff --git a/egs/babel/s5d/local/decode_helper.sh b/egs/babel/s5d/local/decode_helper.sh new file mode 100755 index 00000000000..d2bed774c68 --- /dev/null +++ b/egs/babel/s5d/local/decode_helper.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +. ./cmd.sh + +TYPE=$1 +LANGDIR=$2 +MODELDIR=$3 +DEVDIR=$4 +TRANSFORMDIR=$5 + +echo "$@" + +if [ "$1" == "SI" ]; then + utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 + steps/decode.sh --nj 20 --cmd "$decode_cmd" \ + $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 +elif [ "$1" == "FMLLR" ]; then + utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 + steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ + $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 +elif [ "$1" == "SGMM" ]; then + utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 + + steps/decode_sgmm.sh --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR \ + $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1; + + steps/decode_sgmm.sh --use-fmllr true --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR\ + $MODELDIR/graph $DEVDIR $MODELDIR/decode_fmllr || exit 1; + +fi + + diff --git a/egs/babel/s5d/local/eval_kw_subsets.sh b/egs/babel/s5d/local/eval_kw_subsets.sh new file mode 100755 index 00000000000..8a67225da52 --- /dev/null +++ b/egs/babel/s5d/local/eval_kw_subsets.sh @@ -0,0 +1,4 @@ +KWSEval -e ecf.xml -r rttm -t keyword_outvocab.xml -s kwslist.xml -c -o -b -d -f ./kws/outvocab +KWSEval -e ecf.xml -r rttm -t keyword_invocab.xml -s kwslist.xml -c -o -b -d -f ./kws/invocab +KWSEval -e ecf.xml -r rttm -t kws.xml -s kwslist.xml -c -o -b -d -f ./kws/fullvocab + diff --git a/egs/babel/s5d/local/extend_lexicon.sh b/egs/babel/s5d/local/extend_lexicon.sh new file mode 100755 index 00000000000..c930b1729e0 --- /dev/null +++ b/egs/babel/s5d/local/extend_lexicon.sh @@ -0,0 +1,572 @@ +#!/bin/bash + +# Copyright 2014 Johns Hopkins University (authors: Daniel Povey, Yenda Trmal) +# 2014 Guoguo Chen +# 2015 MIT Lincoln Labs (author: Fred Richardson) +# Apache 2.0. + +# This script takes an input lexicon (e.g. lexicon.txt) and generates likely +# out of vocabulary words from it, with their associated spellings. It outputs +# two files: lexiconp.txt (this is the lexicon format that has pronunciation +# probabilities; the words in the original lexicon have probability one), and +# oov2prob, which says how the OOV mass is distributed among the new OOV words +# in the lexicon. + +# It assumes that the syllables in pronunciations in the input lexicon.txt are +# separated by tabs, as is normal for the BABEL setup; the syllable boundaries +# are necessary for the method that this script uses. + +# We use SRILM to train an lm (lm.gz) by treating the sequence of syllables in a +# pronunciation like the sequence of words in a sentence; we use a 3-gram +# Kneser-Ney smoothed model, as this seemed to work best. We then generate +# "sentences" (really, pronunciations) from this LM using the "ngram" command +# from SRILM with the "-gen" option. We do this in parallel, and also use SRILM +# to compute the probabilities of these "sentences". Then the "--num-prons" +# most likely generated pronunciations are selected (by default: one million). + +# Next, we use the g2p tool from "Sequitur" to learn a mapping from +# pronuciations of words to their spellings. This is the opposite of the normal +# direction of prediction, so we refer to the models as "p2g". To do this, we +# give g2p a reversed version of the input lexicon, so while the input lexicon +# might have entries like +# Hi h ay +# the reversed lexicon would have entries like +# hay H i +# We were concerned that depending on the way the phones are represented as +# letters, there might be a lot of ambiguity introduced when we get rid of the +# spaces (e.g. does "hay" come from h+ay, or h+a+y?), and that this might hurt +# the accuracy of the g2p prediction. We did not want to introduce a separator +# because we felt that this would make the mapping harder for g2p to learn. +# Instead we mapped the phones to unique letters; this is what the "phone_map" +# file is about. Furthermore, in BABEL we have the concept of tags on the +# phones, e.g. in a tonal language, ay_3 might be the phone "ay" with tone 3. +# As far as Kaldi is concerned, ay_3 is a single phone. To avoid the number of +# letters blowing up too much, we make these tags separate letters when generating +# phone_map, so ay_3 might be mapped to kX with ay mapping to k and 3 mapping to +# X. To avoid ambiguity being introduced, we ensure that the alphabets for the +# phones and the tags are distinct (and in general, we allow multiple tags, with +# the tags in different positions having distinct alphabets). + +# Once we have our g2p models trained (and the g2p training is the most time +# consuming aspect of this script), we apply g2p to all of our generated +# pronunciations to give us likely spelling variants. The number of +# alternatives is controlled by the options --var-mass (default: 0.8, meaning we +# generate 0.8 of the entire probability mass), and --var-counts (default: 3, +# meaning we generate at most 3 alternative spellings per pronunciation). We +# take the probabilities of the OOVs (as assigned by the syllable-level LM) and +# multiply them by the spelling probabilities assigned by g2p, to give us the +# probability of the (pronunciation, word) pair. From these pairs we strip out +# those with words (spellings) that were in the original lexicon, and those with +# pronunciations shorter than a specified minimum --min-phones (default: 3). We +# then limit the total number of pairs to --num-prons (default: one million) and +# scale us the probabilities of the pairs pairs so that they sum to one overall. + +# We format this information as two pieces: a lexicon with probabilities +# (lexiconp.txt) and a file that gives us the probability of each OOV word +# (oov2prob). The probabilities in lexiconp.txt are normalized so that the most +# probable pronunciation of each word is 1; the probabilities in oov2prob are +# normalized such that if we multiply by the pronunciation probability in +# lexiconp.txt, we would get the probability we assigned to that (pronunciation, +# word) pair. + +# These outputs are used as follows: lexiconp.txt will be used by +# utils/prepare_lang.sh to generate L.fst and L_disambig.fst in the lang/ +# directory, so the lexicon FSTs and words.txt will include the generated OOVs. +# oov2prob will be used when generating the grammar transducer G.fst by +# local/arpa2G.sh. For example, if you call arpa2G.sh with the options +# --oov-prob-file some/dir/oov2prob --unk-fraction 0.33, it will put all the OOVs +# listed in some/dir/oov2prob as if they were unigrams in G.fst, with probability +# equal to 0.33 times the probability listed in oov2prob. However, that script +# will not allow the unigram probability of any OOV word to be more probable than +# the least probable word which was originally in the ARPA file (not counting , +# which generally has probability -99); this is applied as a ceiling on the +# unknown-word probabilities. Note: the --unk-fraction should probably be +# similar to the OOV rate in that language. Calculating the OOV rate on some +# dev data is one reasonable way to set this; see the commands at the very +# bottom of this file for an example of how we can compute the OOV rate. +# (Arguably, one should give an even higher fraction than this, because given the +# unigram state, the probability of seeing an unknown word is higher). +# It might seem appropriate to use as "unk-fraction" the probability of +# the unknown word ( or ) in the LM itself. However, this depends +# how the LM was estimated; I think in the BABEL setup, appears as +# an actual word in the transcripts, and the probability that the LM assigns +# to it seems to be lower than appropriate. + +stage=-5 +g2p_iters=5 +num_prons=1000000 # number of prons to generate. +num_sent_gen=12000000 # number of sents to generate. this should + # exceed num_prons by a factor of at least + # several. +nj=40 # number of jobs to use for generation. +encoding='utf-8' # option for g2p; leave this as it is. +# the following two options are used in g2p generation. +var_counts=3 #Generate up to N variants in g2p +var_mass=0.8 #Generate enough variants to produce 80 % of the prob mass +min_phones=3 # minimum number of phones we allow in generated words + # (very short generated words could contribute to graph blowup, + # and might hurt the decoding accuracy also). +skip_done=false # if true, allows us to skip over done g2p stages. +cmd=run.pl +cleanup=true + +echo "$0 $@" # Print the command line for logging + +. utils/parse_options.sh +. path.sh + +if [ $# -ne 2 ] && [ $# -ne 3 ]; then + echo "$0: usage: extend_lexicon.sh [options] [dev_text]" + echo " e.g.: $0 data/local/lexicon_orig.txt data/local/extend/" + echo "Will create in the files lexiconp.txt and oov2prob" + echo "where lexiconp.txt is an extended lexicon with pronunciation" + echo "probabilities, and oov2prob has lines which divide" + echo "the OOV probability mass among the introduced OOV words." + echo "Important options:" + echo " --cmd # how to run jobs, default run.pl" + echo " --num-prons # how many prons to generate, default 1000000" + exit 1; +fi + + +input_lexicon=$1 +toplevel_dir=$2 # e.g. data/local/extend +dev_text= +if [ $# -eq 3 ]; then + dev_text=$3 +fi + +dir=$2/tmp # most of our work happens in this "tmp" directory. + +mkdir -p $dir + +if [ ! -s $input_lexicon ]; then + echo "$0: expected input lexicon $input_lexicon to exist"; +fi + +cp $input_lexicon $toplevel_dir/input_lexicon.txt # just to have a record of what we started with. + +loc=`which ngram-count`; +if [ -z $loc ]; then + if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... + sdir=`pwd`/../../../tools/srilm/bin/i686-m64 + else + sdir=`pwd`/../../../tools/srilm/bin/i686 + fi + if [ -f $sdir/ngram-count ]; then + echo Using SRILM tools from $sdir + export PATH=$PATH:$sdir + else + echo You appear to not have SRILM tools installed, either on your path, + echo or installed in $sdir. See tools/install_srilm.sh for installation + echo instructions. + exit 1 + fi +fi + + +if ! which g2p.py >&/dev/null; then + if [ ! -d $KALDI_ROOT/tools/sequitur ]; then + echo "Sequitur was not found !" + echo "Go to $KALDI/tools and execute extras/install_sequitur.sh" + else + echo "Problems running sequitur. Check that your path.sh is putting it on the path." + echo "e.g. that it is sourcing KALDI_ROOT/tools/env.sh and that that env.sh file exists" + fi + exit 1; +fi + +if ! which g2p.py >/dev/null ; then + exit 1 +fi + + +if [ $stage -le -5 ]; then + # Map the phones to a more unambiguous representation so that when we + # concatenate the letters of them, we won't lose information. This will + # also make g2p's life easier because each phone goes to a single letter, + # which g2p will treat as a single symbol (remember, g2p is designed + # to produce graphemes, so the tokens it produces are letters). + + cat $toplevel_dir/input_lexicon.txt | \ + awk '{for(n=2;n<=NF;n++) seen[$n]=1;} END{for (key in seen) print key;}' >$dir/phonelist + + cat $dir/phonelist | perl -e ' @ids = ("a".."z", "A".."Z", "0".."9", ":", "=", "?", "@", "[", "]", "^", "+", "\$", "%", "&", "#", "*", "!", "(", ")", "{", "}" ); + @map = (); while(<>) { + chomp; $output = "$_ "; + @col = split("_"); + # Loop over different positions. + for ($p = 0; $p < @col; $p++) { + # New position that has not been assigned a hash. + if (@map <= $p) { push(@map, {}); } + # Assign map for each position. + if (!defined($map[$p]->{$col[$p]})) { + if (@ids == 0) { # We have used all the ids... die here. + die "Used up all the un-mapped ids, cannot continue\n"; + } + $map[$p]->{$col[$p]} = shift @ids; + } + $output .= "$map[$p]->{$col[$p]}"; + } + print "$output\n"; }' > $dir/phone_map + cat $dir/phone_map | awk '{print $2, $1}' > $dir/phone_map.reverse + + cat $toplevel_dir/input_lexicon.txt | \ + local/apply_map_tab_preserving.pl -f 2- $dir/phone_map > $dir/lexicon_in.txt +fi + + +if [ $stage -le -4 ]; then + cat $dir/lexicon_in.txt | perl -ane 'if (! m/^\<\S+\>\s/) { print; } ' > $dir/lexicon_in_nosil.txt + + cat $dir/lexicon_in.txt | perl -ane 's/^(\S+\s+)/${1}1.0\t/;print;' > $dir/lexiconp_in.txt +fi + + + + +if [ $stage -le -3 ]; then + # Each syllable will be given a "word" representation; we join the phones using comma "," + perl -e 'while() { s/^\S+\s*//; s/ /,/g; print }' <$dir/lexicon_in_nosil.txt >$dir/syllable_text.txt + + echo "$0: using SRILM to train syllable LM" + + ngram-count -lm $dir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $dir/syllable_text.txt -sort + + rm $dir/lm.gz 2>/dev/null + ln -s 3gram.kn022.gz $dir/lm.gz +fi + + +ngram=$(which ngram) + +if [ $stage -le -2 ]; then + mkdir -p $dir/log + echo "$0: generating words from the syllable LM" + + per_job_num_sent_gen=$[$num_sent_gen/$nj] + + $cmd JOB=1:$nj $dir/log/gen.JOB.log \ + $ngram -lm $dir/lm.gz -gen $per_job_num_sent_gen -seed JOB \| \ + sort -u \> $dir/sents.JOB || exit 1; +fi + +if [ $stage -le -1 ]; then + echo "$0: computing probs for the generated sentences" + rm $dir/probs.* 2>/dev/null + + echo '#!/usr/bin/perl +while(1) { + $sent = <>; $line=<>; if ($line !~ m/sentences/) { $sent =~ m/^file/ || die "Bad sent $sent"; exit(0); } + $line = <>; if ($line !~ m/logprob= (\S+)/) { die "Bad line $line"; } print "$1 $sent"; + $line = <>; $line eq "\n" || die "expected blank line"; }' >$dir/temp.pl + chmod +x $dir/temp.pl + + $cmd JOB=1:$nj $dir/log/compute_prob.JOB.log \ + $ngram -debug 1 -lm $dir/lm.gz -ppl $dir/sents.JOB \| $dir/temp.pl \| sort -gr \> $dir/probs.JOB || exit 1; + + if $cleanup; then + rm $dir/sents.*; + fi + sort -m -gr $dir/probs.* > $dir/probs.all + uniq $dir/probs.all | head -n $num_prons > $dir/probs || true + if $cleanup; then + rm $dir/probs.*; + fi + + mass=$(cat $dir/probs | awk '{x += exp($1 * log(10));} END{print x}') + + echo "$0: total probability mass in generated words is $mass" + echo " this should ideally be close to 1 (although we lose a little due to the" + echo " empty sentence). You can get closer by increasing --num-sent-gen and/or" + echo " --nj" + + nl=$(cat $dir/probs | wc -l) + if [ $nl -lt $num_prons ]; then + echo "$0: Number of generated lines $nl is less than number of requested words $num_prons:" + echo " please run with larger --nj, currently $nj " + exit 1; + fi +fi + + +# Next we train a reverse g2p, which is really p2g. Suppose a line in the lexicon is +# sugar s uh g ax r +# The basic idea is that we'd transform it to the following in reverse_lex.sh +# suhgaxr s u g a r +# We may lose a little information by doing this, though, because the segmentation +# into phonemes may be ambiguous. So we create a mapping from the original phonemes +# and tags to letters of the alphabet. Note: tags are things like s_3 for a phone: here +# s is the phone and _3 is the tag. + + +if [ $stage -le 0 ]; then + cat $dir/lexicon_in_nosil.txt | perl -ane ' + use Encode qw(decode encode); + @A = split; $w = shift @A; + $w = Encode::decode("'$encoding'", $w); + $w = join(" ", split("", $w)); + $w = Encode::encode("'$encoding'", $w); + print join("", @A) . "\t" . $w . "\n";' > $dir/lexicon_reverse.txt + + echo "$0: Training the G2P model (iter 0)" + if ! $skip_done || [ ! -f $dir/p2g.model.0 ]; then + $cmd $dir/log/g2p.0.log \ + g2p.py -S --encoding $encoding --train $dir/lexicon_reverse.txt --devel 5% --write-model $dir/p2g.model.0 || exit 1; + else + echo "$0: $dir/p2g.model.0 already exists: skipping it since --skip-done is true" + fi +fi + +for i in `seq 0 $(($g2p_iters-2))`; do + if [ $stage -le $[i+1] ]; then + if ! $skip_done || [ ! -f $dir/p2g.model.$[$i+1] ]; then + echo "$0: Training the G2P model (iter $[$i + 1] )" + $cmd $dir/log/g2p.$[$i+1].log \ + g2p.py -S --encoding $encoding --model $dir/p2g.model.$i --ramp-up \ + --train $dir/lexicon_reverse.txt --devel 5% \ + --write-model $dir/p2g.model.$(($i+1)) + else + ii=$[$i+1]; + echo "$0: $dir/p2g.model.$ii already exists: skipping it since --skip-done is true" + fi + fi + rm -f $dir/p2g.model.final + ln -s p2g.model.$(($i+1)) $dir/p2g.model.final +done + + + +if [ $stage -le $g2p_iters ]; then + # get the word-list to apply g2p to; each one is just a sequence + # of phones, formed by appending the syllables in the "generated sentences" + # (really generated syllable-sequences) in $dir/probs, and removing the + # separator. + + cat $dir/probs | head -n $num_prons | awk '{$1=""; print $0}' | \ + sed "s/,//g;s/ //g;" | sort | uniq > $dir/fake_word_list.txt + + echo "$0: Applying the G2P model to wordlist $wordlist" + + $cmd JOB=1:$nj $dir/log/apply_p2g.JOB.log \ + split -n l/JOB/$nj $dir/fake_word_list.txt \| \ + g2p.py -V $var_mass --variants-number $var_counts --encoding $encoding \ + --model $dir/p2g.model.final --apply - \ + \> $dir/p2g_output.JOB || exit 1; + perl -wlne 'use strict; + our %P; + my ($prn,$num,$prb,$spl)=m/^(\S+)\s+(\S+)\s+(\S+)\s+(.*)$/; + my $tok=$prn."=".$spl; + $P{$tok} = [ $num, $prb ] unless (defined($P{$tok}) && $P{$tok}[1] < $prb); + END { + map{ my ($prn,$spl)=m/^(.*)=(.*)$/; + my ($num, $prb) = @{$P{$tok}}; + print join("\t",$prn,$num,$prb,$spl) + } sort keys %P + }' $dir/p2g_output.* > $dir/p2g_output + rm $dir/p2g_output.* +fi + +if [ $stage -le $[$g2p_iters+1] ]; then + + # the NF >= 4 is about pruning out any empty spellings, that would + # produce an empty word. + # pron2spelling contains lines like ak>a 0.957937 aka + cat $dir/p2g_output | \ + awk '{if (NF >= 4) {printf("%s %s ", $1, $3); for (n=4;n<=NF;n++) {printf("%s", $n);} printf("\n"); }}' | \ + sort | uniq > $dir/pron2spelling + + # Now remove from pron2spelling, any words that appear in $dir/lexiconp_in.txt + # (this also contains the excluded words like ). + cat $dir/pron2spelling | \ + perl -e 'open(F, $ARGV[0]) || die "opening $ARGV[0]"; while() { @A=split; $seen_word{$A[0]}=1; } + while() { @A=split; if (! $seen_word{$A[2]}) { print; }} ' $dir/lexiconp_in.txt > $dir/pron2spelling.excluded + # $dir/pron2spelling.excluded contains lines like + #ab syllable1 syllable2 ... + # e.g. + # Kuku 0.000002642 k>&u k>&u + + cat $dir/probs | \ + perl -e ' while(){ @A = split; $prob = shift @A; $pron=join("", @A); + $pron =~ tr/,//d; print "$pron $_"; } '> $dir/probs.with_pron + # $dir/probs.with_pron contains lines like the following: + # ak>a -2.43244 a &k>&a + # This is so we can get the pronunciation in the same form that we put it in, for + # the p2g training, for easier comparison with the lines in $dir/pron2spelling.excluded + + perl -e ' ($p2s, $probs_with_pron) = @ARGV; + open(P2S, "<$p2s" || die); open(PROBS, "<$probs_with_pron")||die; + while () { + @A = split; + ($pron,$pronprob,$spelling) = @A; + if (!defined $prons{$pron}) { $prons{$pron} = [ ]; } # new anonymous array + $ref = $prons{$pron}; + push @$ref, "$pronprob $spelling"; + } + $log10 = log(10.0); + while () { + @A = split; + $pron = shift @A; # pron in same format as used by p2g model. + $logprob = shift @A; + $syllable_pron = join(" ", @A); # pron separated by syllable + $p = exp($logprob * $log10); + $ref = $prons{$pron}; + if (defined $ref) { + foreach $str (@$ref) { + @B = split(" ", $str); + ($pronprob,$spelling) = @B; + $pair_prob = $p * $pronprob; + print "$spelling $pair_prob $syllable_pron\n"; + } + } + } ' $dir/pron2spelling.excluded $dir/probs.with_pron > $dir/lexicon.oov.raw + + # $dir/lexicon.oov.raw contains lines like: + # ukuzi 0.000342399163717093 u &k>&u &z&i + + mass=$(cat $dir/lexicon.oov.raw | awk '{x+=$2;} END{print x}') + echo "$0: Total probability mass of unseen words (before removing prons" + echo " shorter than $min_phones phones) is $mass" + + # the next stage does 3 things: (1) it converts the pronunciations to be + # tab-separated lists of syllables and removes the seprator ","; (2) it limits us + # to prons containing at least $min_phones phones; and (3) it limits to the + # most likely $num_prons pairs of (spelling, pron) + perl -e ' while () { + @A = split; + $spelling = shift @A; + $prob = shift @A; + for ($n = 0; $n < @A; $n++) { # replace separator in syllable with space. + $A[$n] =~ tr/,/ /d; # replace the separator with space. + } + $final_pron = join("\t", @A); + print "$spelling\t$prob\t$final_pron\n"; + } ' <$dir/lexicon.oov.raw | sort -k2,2 -gr | \ + awk -v min=$min_phones '{if(NF>=min+2){print;}}' | head -n $num_prons >$dir/lexicon.oov + + + mass=$(cat $dir/lexicon.oov | awk '{x+=$2;} END{print x}') + echo "$0: Total probability mass of unseen words (after removing prons" + echo " shorter than $min_phones phones) is $mass." + + + # $dir/lexicon.oov contains lines like the following: + # ngisa 0.00340513074018366 N g i s a + # where the multiple-spaces are actually tabs. + + # Now renormalize the probability to sum to one, decompose $dir/lexicon.oov + # into two pieces: a lexicon $dir/lexiconp_oov.txt, which contains the + # probabilities of different spellings of words (with the most likely one at + # 1.0), and $dir/oov2prob which contains the probabilities of the words + # (we'll use it later to adjust the LM). + + # the uniq here shouldn't be needed, actually. [relates to a bug in a previous + # step that is now fixed. This script relies on the fact that lexicon.oov + # is sorted in reverse order of probability. + cat $dir/lexicon.oov | awk -v mass=$mass 'BEGIN{OFS=FS="\t";} {$2 = $2/mass; print;}' | uniq | \ + perl -e ' ($lexiconp,$words_probs) = @ARGV; + open(L, "|sort -u >$lexiconp") || die "opening lexicon $lexiconp"; + open(W, "|sort -u >$words_probs") || die "opening probs file $words_probs"; + while () { + @A = split("\t", $_); + $word = shift @A; $prob = shift @A; $pron = join("\t", @A); + if (!defined $maxprob{$word}) { # max prob is always the first. + $maxprob{$word} = $prob; + print W "$word $prob\n"; + } + $pronprob = $prob / $maxprob{$word}; + $pronprob <= 1 || die "bad pronprob $pronprob\n"; + print L "$word\t$pronprob\t$pron"; + } close(L); close(W); # wait for sort to finish. ' \ + $dir/lexiconp_oov.txt $dir/oov2prob + + # lexiconp_oov.txt contains lines like: + #leyanga 0.96471840417664 l 3 j_" a_" N a + #leyanga 1 l 3 j_" a_" N g a + + # oov2prob looks like this: + #-Uni 8.77716315938887e-07 + #Adlule 9.62418179264897e-08 + #Afuna 2.23048402109824e-06 +fi + +if [ $stage -le $[$g2p_iters+2] ]; then + # put it to the output directory $localdir e.g. data/local/ + cat $dir/lexiconp_in.txt $dir/lexiconp_oov.txt | \ + local/apply_map_tab_preserving.pl -f 3- $dir/phone_map.reverse | sort -u > $toplevel_dir/lexiconp.txt + cp $dir/oov2prob $toplevel_dir/oov2prob +fi + +# Finally, if $dev_text is not empty, print out OOV rate. We assame $dev_text is +# in the following format: +# 14350_A_20121123_042710_001717 yebo yini +# where "14350_A_20121123_042710_001717" is the utterance id and "yebo yini" is +# the actual words. +if [ ! -z $dev_text ]; then + # Original token OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot tokens; token OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/input_lexicon.txt > $toplevel_dir/original_oov_rates + + # New token OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot tokens; token OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/lexiconp.txt > $toplevel_dir/new_oov_rates + + # Original type OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | sort -u |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot types; type OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/input_lexicon.txt >> $toplevel_dir/original_oov_rates + + # New type OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | sort -u |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot types; type OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/lexiconp.txt >> $toplevel_dir/new_oov_rates +fi + +exit 0; + +###BELOW HERE IS JUST COMMENTS ########### + +#cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt | \ +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt | \ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); printf("Seen $invoc out of $tot tokens; OOV rate is %.2f\n", $oov_rate); ' $x +done +# OOV rate measured on the words in the FullLP lexicon. +#Seen 13675 out of 60613 tokens; OOV rate is 77.44 +#Seen 26936 out of 60613 tokens; OOV rate is 55.56 + +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +cat data/dev10h/text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | \ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); printf("Seen $invoc out of $tot tokens; OOV rate is %.2f\n", $oov_rate); ' $x +done +# zulu limitedlp, dev10h: +# With the million-word lexicon we more than halve the per-token OOV rate of dev10h. +#Seen 44680 out of 66891 tokens; OOV rate is 33.20 +#Seen 57095 out of 66891 tokens; OOV rate is 14.64 diff --git a/egs/babel/s5d/local/extract_oov_words.pl b/egs/babel/s5d/local/extract_oov_words.pl new file mode 100755 index 00000000000..08f8f5d1436 --- /dev/null +++ b/egs/babel/s5d/local/extract_oov_words.pl @@ -0,0 +1,70 @@ +#!/usr/bin/env perl +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +use Data::Dumper; +$Data::Dumper::Indent = 1; + +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; + +$ignore_oov = 0; +$ignore_first_field = 0; +for($x = 0; $x < 2; $x++) { + if ($ARGV[0] eq "-f") { + shift @ARGV; + $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } + } +} + +$symtab = shift @ARGV; +if (!defined $symtab) { + print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" . + "options: [--map-oov ] [-f ]\n" . + "note: can look like 4-5, or 4-, or 5-, or 1.\n"; +} + + +open(F, "<:encoding(UTF-8)", $symtab) || die "Error opening symbol table file $symtab"; +while() { + @A = split(" ", $_); + @A == 2 || die "bad line in symbol table file: $_"; + + if ( not defined( $sym2int{$A[0]} ) ) { + $sym2int{$A[0]} = []; + } + push @{ $sym2int{$A[0]} }, $A[1] + 0; +} + + +$lines=0; +while (<>) { + @A = split(" ", $_); + @B = (); + for ($n = 0; $n < @A; $n++) { + if ( (!defined $field_begin || $n >= $field_begin) + && (!defined $field_end || $n <= $field_end)) { + $a = $A[$n]; + $i = $sym2int{$a}; + if (!defined ($i)) { + print $a . "\n"; + } + } + } +} + + diff --git a/egs/babel/s5d/local/filter_keywords.pl b/egs/babel/s5d/local/filter_keywords.pl new file mode 100755 index 00000000000..a724ad77f1a --- /dev/null +++ b/egs/babel/s5d/local/filter_keywords.pl @@ -0,0 +1,68 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Encode; + +my $Usage = < + +EOU + +if(@ARGV != 3) { + die $Usage; +} + +# Get parameters +my $dictin = shift @ARGV; +my $filein = shift @ARGV; +my $fileout = shift @ARGV; + +# Open dictionary +if (!open(D, "<$dictin")) {print "Fail to open dictionary: $dictin\n"; exit 1;} + +# Get input source +my $source = ""; +if ($filein eq "-") { + $source = "STDIN"; +} else { + if (!open(I, "<$filein")) {print "Fail to open input file: $filein\n"; exit 1;} + $source = "I"; +} + +# Open output fst list +my $sourceout = ""; +if ($fileout ne "-") { + if (!open(O, ">$fileout")) {print "Fail to open output file: $fileout\n"; exit 1;} + $sourceout = "O"; +} + +# Read in the dictionary +my %dict = (); +while () { + chomp; + my @col = split(" ", $_); + my $word = shift @col; + my $original_w = $word; + $word =~ tr/a-z/A-Z/; + $dict{$word} = $original_w; +} + +# Process the queries +my $word; +while (<$source>) { + chomp; + my @col = split(" ", $_); + foreach $word (@col) { + if (defined($dict{$word})) { + eval "print $sourceout \"$dict{$word} \""; + } else { + eval "print $sourceout \"$word \""; + } + } + eval "print $sourceout \"\n\""; +} + +close(D); +if ($filein ne "-") {close(I);} +if ($fileout ne "-") {close(O);} diff --git a/egs/babel/s5d/local/filter_kwslist.pl b/egs/babel/s5d/local/filter_kwslist.pl new file mode 100755 index 00000000000..7c57b62517a --- /dev/null +++ b/egs/babel/s5d/local/filter_kwslist.pl @@ -0,0 +1,55 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. +# +use strict; +use warnings; +use Getopt::Long; +use XML::Simple; + +my $data = XMLin(\*STDIN); +my $duptime= $ARGV[0]; + +#print Dumper($data); + +# Filters duplicate keywords that have the same keyword and about the same time. +# Relies on the fact that its input is sorted from largest to smallest score. + +foreach my $kwentry (@{$data->{detected_kwlist}}) { + #print "$kwentry->{kwid}\n"; + my $prev_time; + my $prev_file; + + if(ref($kwentry->{kw}) eq 'ARRAY'){ + my @arr = @{$kwentry->{kw}}; + my @newarray = (); + + push @newarray, $arr[0]; + #$arr[0]->{tbeg} . "\n"; + for (my $i = 1; $i < scalar(@arr); $i +=1) { + + my $found = 0; + foreach my $kw (@newarray) { + if (( abs($arr[$i]->{tbeg} - $kw->{tbeg}) < $duptime ) && + ( $arr[$i]->{channel} == $kw->{channel}) && + ( $arr[$i]->{file} eq $kw->{file}) ) { + + $found = 1; + + #print $arr[$i]->{tbeg} . "\n"; + } + } + if ( $found == 0 ) { + push @newarray, $arr[$i]; + } + } + + $kwentry->{kw} = \@newarray; + }else{ + #print $kwentry->{kw}->{tbeg} . "\n"; + } +# print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; +} +my $xml = XMLout($data, RootName => "kwslist", NoSort=>1); +print $xml; diff --git a/egs/babel/s5d/local/fix_kwslist.pl b/egs/babel/s5d/local/fix_kwslist.pl new file mode 100755 index 00000000000..33c6dc30e82 --- /dev/null +++ b/egs/babel/s5d/local/fix_kwslist.pl @@ -0,0 +1,89 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; +use XML::Simple; +use Data::Dumper; +use File::Basename; + +sub mysort { + if ($a->{kwid} =~ m/[0-9]+$/ and $b->{kwid} =~ m/[0-9]+$/) { + ($a->{kwid} =~ /([0-9]*)$/)[0] <=> ($b->{kwid} =~ /([0-9]*)$/)[0] + } else { + $a->{kwid} cmp $b->{kwid}; + } +} + +my $Usage = < + e.g.: fix_kwslist.pl --kwlist-filename=kwlist.xml kwlist.xml kwslist.xml fixed_kwslist.xml + +Allowed options: + --kwlist-filename : Kwlist filename with version info (string, default = "") + +EOU + +my $kwlist_filename=""; +GetOptions('kwlist-filename=s' => \$kwlist_filename); + +if (@ARGV != 3) { + die $Usage; +} + +# Workout the input/output source +my $kwlist_in = shift @ARGV; +my $kwslist_in = shift @ARGV; +my $fixed_kwslist_out = shift @ARGV; + +my $KW = XMLin($kwlist_in); +my $KWS = XMLin($kwslist_in); + +# Extract keywords from kwlist.xml +my %kwlist; +my $language = $KW->{language}; +foreach my $kwentry (@{$KW->{kw}}) { + $kwlist{$kwentry->{kwid}} = 1; +} + +# Now work on the kwslist +$KWS->{language} = $language; +if ($kwlist_filename ne "") { + $KWS->{kwlist_filename} = basename($kwlist_filename); +} elsif ($KWS->{kwlist_filename} eq "") { + $KWS->{kwlist_filename} = basename($kwlist_in); +} +foreach my $kwentry (@{$KWS->{detected_kwlist}}) { + if (defined($kwlist{$kwentry->{kwid}})) { + delete $kwlist{$kwentry->{kwid}}; + } +} + +# Empty entries... +foreach my $kw (keys %kwlist) { + my %empty; + my @tmp = []; + $empty{search_time} = 1; + $empty{kwid} = $kw; + $empty{oov_count} = 0; + push(@{$KWS->{detected_kwlist}}, \%empty); +} + +my @sorted = sort mysort @{$KWS->{detected_kwlist}}; +$KWS->{detected_kwlist} = \@sorted; + +my $xml = XMLout($KWS, RootName => "kwslist", NoSort=>0); +if ($fixed_kwslist_out eq "-") { + print $xml; +} else { + if (!open(O, ">$fixed_kwslist_out")) { + print "Fail to open output file: $fixed_kwslist_out\n"; + exit 1; + } + print O $xml; + close(O); +} diff --git a/egs/babel/s5d/local/generate_confusion_matrix.sh b/egs/babel/s5d/local/generate_confusion_matrix.sh new file mode 100755 index 00000000000..48263e729de --- /dev/null +++ b/egs/babel/s5d/local/generate_confusion_matrix.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +nj=4 +cmd=run.pl +acwt=0.1 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 5 ]; then + echo "Usage $0 [options] " + echo " e.g.: local/prepare_confusions.sh --nj 32 exp/sgmm5/graph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix" + echo "" + echo "main options (for others, see top of script file)" + echo " --nj # number of parallel jobs" + echo " --cmd # Command to run in parallel with" + echo " --acwt # Acoustic model weight. Value will be used for 1-best path decoding of the lattices" + echo "" + echo "Please note that the output confusion matrix will be phoneme-based" + echo "and all the phone contexts (singleton, intra, begin, end) or phoneme" + echo "tags (such as tone or stress) will be collapsed into a single monophone" + echo "" + echo "The output format is line oriented." + echo "Each line can have one of these four formats (A, B being different phones, special symbol" + echo " A A count #Number of hits, i.e. correctly determined phones" + echo " A B count #Number of substitutions of A with B " + echo " A count #Number of deletions" + echo " A count #Number of insertions" + exit 1; +fi + +set -u +set -e +set -o pipefail + +data=$1; shift +modeldir=$1; shift +alidir=$1; shift +latdir=$1; shift +wdir=$1; shift + +model=$modeldir/final.mdl +[ ! -f $model ] && echo "File $model does not exist!" && exit 1 +phones=$data/phones.txt +[ ! -f $phones ] && echo "File $phones does not exist!" && exit 1 + +! ali_nj=`cat $alidir/num_jobs` && echo "Could not open the file $alidir/num_jobs" && exit 1 +! lat_nj=`cat $latdir/num_jobs` && echo "Could not open the file $latdir/num_jobs" && exit 1 +if [ $ali_nj -ne $lat_nj ] ; then + echo "Alignments num_jobs and lattices num_jobs mismatch!" + exit 1 +fi +[ ! $nj -le $ali_nj ] && echo "Number of jobs is too high (max is $ali_nj)." && nj=$ali_nj + +mkdir -p $wdir/log + +cat $data/phones.txt | sed 's/_[B|E|I|S]//g' |\ + sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g' > $wdir/phones.txt + +echo "Converting alignments to phone sequences..." +$cmd JOB=1:$nj $wdir/log/ali_to_phones.JOB.log \ + align-text\ + ark:\<\( \ + ali-to-phones $model ark:"gunzip -c $alidir/ali.JOB.gz|" ark,t:- \|\ + int2sym.pl -f 2- $wdir/phones.txt - \) \ + ark:\<\( \ + lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \| \ + lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \| \ + int2sym.pl -f 2- $wdir/phones.txt - \) \ + ark:$wdir/confusions.JOB.txt || exit 1 + +confusion_files="" +for i in `seq 1 $nj` ; do + confusion_files="$confusion_files $wdir/confusions.$i.txt" +done + +echo "Converting statistics..." +cat $confusion_files | cut -f 2- -d ' ' | sed 's/ *; */\n/g'| sort | uniq -c | \ + grep -v -E '|||SIL' | \ + perl -ane ' + die unless scalar @F == 3; + print "$F[1] $F[2] $F[0]\n"; + ' > $wdir/confusions.txt + +exit 0 +#-echo "Converting alignments to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/ali_to_phones.JOB.log \ +#- ali-to-phones $model ark:"gunzip -c $alidir/ali.JOB.gz|" ark,t:- \|\ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/ali.JOB.txt +#- +#-echo "Converting lattices to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/lat_to_phones.JOB.log \ +#- lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \| \ +#- lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \| \ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/lat.JOB.txt + diff --git a/egs/babel/s5d/local/generate_example_kws.sh b/egs/babel/s5d/local/generate_example_kws.sh new file mode 100755 index 00000000000..e90752926b3 --- /dev/null +++ b/egs/babel/s5d/local/generate_example_kws.sh @@ -0,0 +1,110 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. + + +if [ $# -ne 2 ]; then + echo "Usage: local/generate_example_kws.sh " + echo " e.g.: local/generate_example_kws.sh data/test_eval92/ " + exit 1; +fi + +datadir=$1; +kwsdatadir=$2; +text=$datadir/text; + +mkdir -p $kwsdatadir; + +# Generate keywords; we generate 20 unigram keywords with at least 20 counts, +# 20 bigram keywords with at least 10 counts and 10 trigram keywords with at +# least 5 counts. +cat $text | perl -e ' + %unigram = (); + %bigram = (); + %trigram = (); + while(<>) { + chomp; + @col=split(" ", $_); + shift @col; + for($i = 0; $i < @col; $i++) { + # unigram case + if (!defined($unigram{$col[$i]})) { + $unigram{$col[$i]} = 0; + } + $unigram{$col[$i]}++; + + # bigram case + if ($i < @col-1) { + $word = $col[$i] . " " . $col[$i+1]; + if (!defined($bigram{$word})) { + $bigram{$word} = 0; + } + $bigram{$word}++; + } + + # trigram case + if ($i < @col-2) { + $word = $col[$i] . " " . $col[$i+1] . " " . $col[$i+2]; + if (!defined($trigram{$word})) { + $trigram{$word} = 0; + } + $trigram{$word}++; + } + } + } + + $max_count = 100; + $total = 20; + $current = 0; + $min_count = 20; + while ($current < $total && $min_count <= $max_count) { + foreach $x (keys %unigram) { + if ($unigram{$x} == $min_count) { + print "$x\n"; + $unigram{$x} = 0; + $current++; + } + if ($current == $total) { + last; + } + } + $min_count++; + } + + $total = 20; + $current = 0; + $min_count = 4; + while ($current < $total && $min_count <= $max_count) { + foreach $x (keys %bigram) { + if ($bigram{$x} == $min_count) { + print "$x\n"; + $bigram{$x} = 0; + $current++; + } + if ($current == $total) { + last; + } + } + $min_count++; + } + + $total = 10; + $current = 0; + $min_count = 3; + while ($current < $total && $min_count <= $max_count) { + foreach $x (keys %trigram) { + if ($trigram{$x} == $min_count) { + print "$x\n"; + $trigram{$x} = 0; + $current++; + } + if ($current == $total) { + last; + } + } + $min_count++; + } + ' > $kwsdatadir/raw_keywords.txt + +echo "Keywords generation succeeded" diff --git a/egs/babel/s5d/local/generate_phoneme_transcription.sh b/egs/babel/s5d/local/generate_phoneme_transcription.sh new file mode 100755 index 00000000000..4ef0e556277 --- /dev/null +++ b/egs/babel/s5d/local/generate_phoneme_transcription.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +nj=4 +cmd=run.pl +acwt=0.1 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. utils/parse_options.sh || exit 1; + +echo "$#" +if [ $# != 4 ]; then + echo "Usage $0 [options] " + echo " e.g.: local/prepare_confusions.sh --nj 32 exp/sgmm5/graph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix" + echo "" + echo "main options (for others, see top of script file)" + echo " --nj # number of parallel jobs" + echo " --cmd # Command to run in parallel with" + echo " --acwt # Acoustic model weight. Value will be used for 1-best path decoding of the lattices" + echo "" + echo "Please note that the output confusion matrix will be phoneme-based" + echo "and all the phone contexts (singleton, intra, begin, end) or phoneme" + echo "tags (such as tone or stress) will be collapsed into a single monophone" + echo "" + echo "The output format is line oriented." + echo "Each line can have one of these four formats (A, B being different phones, special symbol" + echo " A A count #Number of hits, i.e. correctly determined phones" + echo " A B count #Number of substitutions of A with B " + echo " A count #Number of deletions" + echo " A count #Number of insertions" + exit 1; +fi + +set -u +set -e +set -o pipefail + +data=$1; shift +modeldir=$1; shift +latdir=$1; shift +wdir=$1; shift + +model=$modeldir/final.mdl +[ ! -f $model ] && echo "File $model does not exist!" && exit 1 +phones=$data/phones.txt +[ ! -f $phones ] && echo "File $phones does not exist!" && exit 1 + +! lat_nj=`cat $latdir/num_jobs` && echo "Could not open the file $latdir/num_jobs" && exit 1 +[ ! $nj -le $lat_nj ] && echo "Number of jobs is too high (max is $lat_nj)." && nj=$lat_nj + +mkdir -p $wdir/log + +cat $data/phones.txt | sed 's/_[B|E|I|S]//g' |\ + sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g' > $wdir/phone_map + +echo "Converting alignments to phone sequences..." +$cmd JOB=1:$nj $wdir/log/phones.JOB.log \ + lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \|\ + lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \|\ + int2sym.pl -f 2- $wdir/phone_map - \> $wdir/phones.JOB.txt || exit 1 + +confusion_files="" +for i in `seq 1 $nj` ; do + confusion_files="$confusion_files $wdir/phones.$i.txt" +done + +echo "Converting statistics..." +cat $confusion_files | sort > $wdir/phones.txt + +exit 0 +#-echo "Converting alignments to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/ali_to_phones.JOB.log \ +#- ali-to-phones $model ark:"gunzip -c $alidir/ali.JOB.gz|" ark,t:- \|\ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/ali.JOB.txt +#- +#-echo "Converting lattices to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/lat_to_phones.JOB.log \ +#- lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \| \ +#- lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \| \ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/lat.JOB.txt + diff --git a/egs/babel/s5d/local/generate_proxy_keywords.sh b/egs/babel/s5d/local/generate_proxy_keywords.sh new file mode 100755 index 00000000000..584f7d7902e --- /dev/null +++ b/egs/babel/s5d/local/generate_proxy_keywords.sh @@ -0,0 +1,176 @@ +#!/bin/bash + +# Copyright 2012-2014 Guoguo Chen +# Apache 2.0. + +# Begin configuration section. +nj=8 +cmd=run.pl +beam=-1 # Beam for proxy FST, -1 means no prune +phone_beam=-1 # Beam for KxL2xE FST, -1 means no prune +nbest=-1 # Use top n best proxy keywords in proxy FST, -1 means all + # proxies +phone_nbest=50 # Use top n best phone sequences in KxL2xE, -1 means all + # phone sequences +confusion_matrix= # If supplied, using corresponding E transducer +count_cutoff=1 # Minimal count to be considered in the confusion matrix; + # will ignore phone pairs that have count less than this. +pron_probs=false # If true, then lexicon looks like: + # Word Prob Phone1 Phone2... +# End configuration section. + +[ -f ./path.sh ] && . ./path.sh; # source the path. +echo "$0 " "$@" +. parse_options.sh || exit 1; + +if [ $# -ne 1 ]; then + echo "Generate proxy keywords for IV/OOV keywords. Phone confusions will be" + echo "used when generating the proxies if the confusion matrix is supplied." + echo "If you are going to use the confusion matrix, please use the following" + echo "format for the file \$confusion_matrix:" + echo " p1 p2 count1 // For substitution" + echo " p3 count2 // For deletion" + echo " p4 count3 // For insertion" + echo "" + echo "Proxies keywords are generated using:" + echo "K x L2 x E x L1'" + echo "where K is a keyword FST, L2 is a lexicon that contains pronunciations" + echo "of keywords in K, E is an edit distance FST that contains the phone" + echo "confusions and L1 is the original lexicon." + echo "" + echo "The script assumes that L1.lex, L2.lex, words.txt and keywords.txt have" + echo "been prepared and stored in the directory ." + echo "" + echo "Usage: local/generate_example_kws.sh " + echo " e.g.: local/generate_example_kws.sh data/dev10h/kws_proxy/" + exit 1; +fi + +set -e +set -o pipefail + +kwsdatadir=$1 + +# Checks some files. +for f in $kwsdatadir/L1.lex $kwsdatadir/L2.lex \ + $kwsdatadir/words.txt $kwsdatadir/keywords.txt; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1 +done + +# Gets phone symbols +phone_start=2 +if $pron_probs; then + phone_start=3 +fi + +pron_probs_param=""; +if $pron_probs; then + pron_probs_param="--pron-probs"; +fi + +cat $kwsdatadir/L1.lex | \ + perl -e ' + while ( $line = ) { + chomp $line; + ($word, $pron) = split " ", $line, 2; + $pron = join(" ", split(" ", $pron)); + push @{$LEX{$pron}}, $word; + } + + open(L1, "| sort -u > $ARGV[0]") or die "Cannot open $ARGV[0]\n"; + open(MAP, "| sort -u > $ARGV[1]") or die "Cannot open $ARGV[1]\n"; + foreach $pron (keys %LEX) { + $head = $LEX{$pron}->[0]; + print L1 "$head $pron\n"; + foreach $alt (@{$LEX{$pron}}) { + print MAP "0 0 $alt $head\n"; + } + } + print MAP "0\n"; + close(L1); + close(MAP); +' $kwsdatadir/L1_dedup.lex $kwsdatadir/L1.revdup.fst.txt + +fstcompile --isymbols=$kwsdatadir/words.txt --osymbols=$kwsdatadir/words.txt $kwsdatadir/L1.revdup.fst.txt | \ + fstarcsort --sort_type=olabel - $kwsdatadir/L1.revdup.fst + +ndisambig=`utils/add_lex_disambig.pl \ + $pron_probs_param $kwsdatadir/L1_dedup.lex $kwsdatadir/L1_disambig.lex` +ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST. +( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $kwsdatadir/disambig.txt + +cat $kwsdatadir/L2.lex $kwsdatadir/L1.lex |\ + awk '{for(i='$phone_start'; i <= NF; i++) {print $i;}}' |\ + sort -u | sed '1i\' |\ + cat - $kwsdatadir/disambig.txt | awk 'BEGIN{x=0} {print $0"\t"x; x++;}' \ + > $kwsdatadir/phones.txt + +# Compiles lexicon into FST +cat $kwsdatadir/L2.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$kwsdatadir/phones.txt \ + --osymbols=$kwsdatadir/words.txt - |\ + fstinvert | fstarcsort --sort_type=olabel > $kwsdatadir/L2.fst + +echo $kwsdatadir/phones.txt +phone_disambig_symbol=`grep \#0 $kwsdatadir/phones.txt | awk '{print $2}'` +word_disambig_symbol=`grep \#0 $kwsdatadir/words.txt | awk '{print $2}'` +phone_disambig_symbols=`grep "^#" $kwsdatadir/phones.txt |\ + awk '{print $2}' | tr "\n" " "` +word_disambig_symbols=`grep "^#" $kwsdatadir/words.txt |\ + awk '{print $2}' | tr "\n" " "` +cat $kwsdatadir/L1_disambig.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$kwsdatadir/phones.txt \ + --osymbols=$kwsdatadir/words.txt - |\ + fstaddselfloops "echo $phone_disambig_symbol |" \ + "echo $word_disambig_symbol |" |\ + fstdeterminize | fstrmsymbols "echo $phone_disambig_symbols|" |\ + fstrmsymbols --remove-from-output=true "echo $word_disambig_symbols|" |\ + fstarcsort --sort_type=ilabel > $kwsdatadir/L1.fst + +# Compiles E.fst +confusion_matrix_param="" +if [ ! -z $confusion_matrix ]; then + echo "$0: Using confusion matrix, normalizing" + local/count_to_logprob.pl --cutoff $count_cutoff \ + $confusion_matrix $kwsdatadir/confusion.txt + confusion_matrix_param="--confusion-matrix $kwsdatadir/confusion.txt" +fi +cat $kwsdatadir/phones.txt |\ + grep -v -E "<.*>" | grep -v "SIL" | awk '{print $1;}' |\ + local/build_edit_distance_fst.pl --boundary-off=true \ + $confusion_matrix_param - - |\ + fstcompile --isymbols=$kwsdatadir/phones.txt \ + --osymbols=$kwsdatadir/phones.txt - $kwsdatadir/E.fst + +# Pre-composes L2 and E, for the sake of efficiency +fstcompose $kwsdatadir/L2.fst $kwsdatadir/E.fst |\ + fstarcsort --sort_type=ilabel > $kwsdatadir/L2xE.fst + +keywords=$kwsdatadir/keywords.int +# Prepares for parallelization +cat $kwsdatadir/keywords.txt |\ + utils/sym2int.pl -f 2- $kwsdatadir/words.txt | sort -R > $keywords + +nof_keywords=`cat $keywords|wc -l` +if [ $nj -gt $nof_keywords ]; then + nj=$nof_keywords + echo "$0: Too many number of jobs, using $nj instead" +fi + +# Generates the proxy keywords +mkdir -p $kwsdatadir/split/log +$cmd JOB=1:$nj $kwsdatadir/split/log/proxy.JOB.log \ + split -n r/JOB/$nj $keywords \| \ + generate-proxy-keywords --verbose=1 \ + --proxy-beam=$beam --proxy-nbest=$nbest \ + --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ + $kwsdatadir/L2xE.fst $kwsdatadir/L1.fst ark:- ark,t:$kwsdatadir/split/proxy.JOB.fsts + +proxy_fsts="" +for j in `seq 1 $nj`; do + proxy_fsts="$proxy_fsts $kwsdatadir/split/proxy.$j.fsts" +done +cat $proxy_fsts | fsttablecompose $kwsdatadir/L1.revdup.fst ark:- ark:- | \ + fsts-project ark:- ark:$kwsdatadir/keywords.fsts diff --git a/egs/babel/s5d/local/kaldi_dir2uem.py b/egs/babel/s5d/local/kaldi_dir2uem.py new file mode 100755 index 00000000000..26b4ec1aaba --- /dev/null +++ b/egs/babel/s5d/local/kaldi_dir2uem.py @@ -0,0 +1,101 @@ +#! /usr/bin/env python + +import argparse, sys +from argparse import ArgumentParser +import re + +def main(): + parser = ArgumentParser(description='Convert kaldi data directory to uem dat files', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--verbose', type=int, \ + dest='verbose', default=0, \ + help='Give higher verbose for more logging') + parser.add_argument('--get-text', action='store_true', \ + help='Get text in dat file') + parser.add_argument('--prefix', type=str, \ + help='Add db file name as db--{utt/spk}.dat') + parser.add_argument('kaldi_dir', \ + help='Kaldi data directory') + parser.add_argument('output_dir', \ + help='Directory to store uem dat files') + parser.usage=':'.join(parser.format_usage().split(':')[1:]) \ + + 'e.g. : %(prog)s --prefix 203-lao-v0 data/dev10h.seg CMU_db' + options = parser.parse_args() + + if options.get_text: + try: + text_file = open(options.kaldi_dir+'/text', 'r') + except IOError as e: + repr(e) + sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/text')) + sys.exit(1) + + try: + segments_file = open(options.kaldi_dir+'/segments', 'r') + except IOError as e: + repr(e) + sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/segments')) + sys.exit(1) + + try: + scp_file = open(options.kaldi_dir+'/wav.scp', 'r') + except IOError as e: + repr(e) + sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/wav.scp')) + sys.exit(1) + + reco2file_map = {} + for line in scp_file.readlines(): + splits = line.strip().split() + m = re.search(r".*/(?P[0-9A-Za-z_]*\.(sph|wav)).*", line) + if not m: + sys.stderr.write("%s does not contain a valid speech file (.wav or .sph)\n" % line.strip()) + sys.exit(1) + reco2file_map[splits[0]] = m.group('file_name') + # End for + + spk2utt_map = {} + + if options.prefix == None: + prefix = options.kaldi_dir.split('/')[-1].split('.')[0] + else: + prefix = options.prefix + + try: + utt_dat = open(options.output_dir+'/db-'+prefix+'-utt.dat', 'w') + spk_dat = open(options.output_dir+'/db-'+prefix+'-spk.dat', 'w') + except IOError as e: + repr(e) + sys.stderr.write("%s: Could not write dat files in %s\n" % (sys.argv[0], options.output_dir)) + sys.exit(1) + + for line in segments_file.readlines(): + utt_id, file_id, start, end = line.strip().split() + + if (options.get_text): + splits = text_file.readline().split() + while splits[0] < utt_id: + splits = text_file.readline().split() + text = ' '.join(splits[1:]) + else: + text = "" + + utt_dat.write("{UTTID %s} {UTT %s} {SPK %s} {FROM %s} {TO %s} {TEXT %s}\n" % (utt_id, utt_id, file_id, start, end, text)) + spk2utt_map.setdefault(file_id, []) + spk2utt_map[file_id].append(utt_id) + + for spk, utts in spk2utt_map.items(): + try: + spk_dat.write("{SEGS %s} {ADC %s} {CONV %s.wav} {CHANNEL 1} {DUR }\n" % (' '.join(utts), reco2file_map[spk], spk)) + except KeyError as e: + repr(e) + sys.stderr.write("%s: Error in getting file for %s\n" % (sys.argv[0], spk)) + sys.exit(1) + # End for + + segments_file.close() + utt_dat.close() + spk_dat.close() + +if __name__ == '__main__': + main() diff --git a/egs/babel/s5d/local/kwords2indices.pl b/egs/babel/s5d/local/kwords2indices.pl new file mode 100755 index 00000000000..5d5f0a3ad45 --- /dev/null +++ b/egs/babel/s5d/local/kwords2indices.pl @@ -0,0 +1,123 @@ +#!/usr/bin/env perl +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +use Data::Dumper; +$Data::Dumper::Indent = 1; + +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; + +sub permute { + + my $last = pop @_; + + unless(@_) { + return map([$_], @$last); + } + + return map { + my $left = $_; + map([@$left, $_], @$last) + } + permute(@_); +} + +$oov_count=0; + +$ignore_oov = 0; +$ignore_first_field = 0; +for($x = 0; $x < 2; $x++) { + if ($ARGV[0] eq "--map-oov") { + shift @ARGV; $map_oov = shift @ARGV; + } + if ($ARGV[0] eq "-f") { + shift @ARGV; + $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } + } +} + +$symtab = shift @ARGV; +if (!defined $symtab) { + print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" . + "options: [--map-oov ] [-f ]\n" . + "note: can look like 4-5, or 4-, or 5-, or 1.\n"; +} +open(F, "<:encoding(UTF-8)", $symtab) || die "Error opening symbol table file $symtab"; +while() { + @A = split(" ", $_); + @A == 2 || die "bad line in symbol table file: $_"; + + if ( not defined( $sym2int{$A[0]} ) ) { + $sym2int{$A[0]} = []; + } + push @{ $sym2int{$A[0]} }, $A[1] + 0; +} +#print Dumper(\%sym2int); + +if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up + if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; } + $map_oov = $sym2int{$map_oov}; +} + +$lines=0; +while (<>) { + @A = split(" ", $_); + @B = (); + $lines = $lines + 1; + $undefined_words = 0; + for ($n = 1; $n < @A; $n++) { + $a = $A[$n]; + $i = $sym2int{$a}; + if (!defined ($i)) { + if (defined $map_oov) { + if ($num_warning++ < $max_warning) { + print STDERR "sym2int.pl: replacing $a with $map_oov\n"; + if ($num_warning == $max_warning) { + print STDERR "sym2int.pl: not warning for OOVs any more times\n"; + } + } + $i = [ $map_oov ]; + } else { + $pos = $n+1; + die "sym2int.pl: undefined symbol $a (in position $pos)\n"; + } + $undefined_words = $undefined_words + 1; + } + $a = $i; + push @B, $a; + } + #if ( defined $sym2int{$A[$n]} ) { + # push @B, $sym2int{$A[$n]}; + #} else { + # push @B, [0]; + #} + if ($undefined_words > 0) { + $oov_count = $oov_count + 1; + } + @C = permute @B; + #print Dumper(\@B); + #print Dumper(\@C); + foreach $phrase ( @C ) { + print "$A[0] "; + print join(" ", @{$phrase}); + print "\n"; + } +} + +print STDERR "Remaped/ignored $oov_count phrases...\n"; + diff --git a/egs/babel/s5d/local/kws_combine.sh b/egs/babel/s5d/local/kws_combine.sh new file mode 100755 index 00000000000..8934faf7d30 --- /dev/null +++ b/egs/babel/s5d/local/kws_combine.sh @@ -0,0 +1,119 @@ +#!/bin/bash + +# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# Script for system combination of the KWS posting lists + +# begin configuration section. +cmd=run.pl +stage=0 +# Ntrue-scale +ntrue_scale=1.1 +min_lmw=8 +max_lmw=12 +extraid= +skip_scoring=false +optimize_weights=false +#end of configuration section + +help_message="Usage: $(basename $0) [options] [:lmwt-bias] [:lmwt-bias] [[:lmwt-bias] ... ] +E.g.: $(basename $0) data/dev10h.pem data/lang exp/tri6_nnet/decode_dev10h.pem/kws_10/ exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/ exp/combine/dev10hx.pem +" +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + +datadir=$1 +lang=$2 +odir=${@: -1} # last argument to the script +shift 2; +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + + +if [ -z "$extraid" ] ; then + kwsdatadir=$datadir/kws + kwsoutputdir="$odir/kws" +else + kwsdatadir=$datadir/${extraid}_kws + kwsoutputdir="$odir/${extraid}_kws" +fi + +for f in $kwsdatadir/ecf.xml $kwsdatadir/kwlist.xml ; do + [ ! -f $f ] && echo "$0: file $f does not exist" && exit 1; +done +ecf=$kwsdatadir/ecf.xml +kwlist=$kwsdatadir/kwlist.xml + +# Duration +duration=`head -1 $ecf |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` + +mkdir -p $kwsoutputdir/log + +total_sum=0 +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. + [ -z "$offset" ] && offset=1 + total_sum=$(($total_sum+$offset)) +done + +systems="" +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. + decode_dir=`echo $decode_dir | cut -d: -f1` + [ -z "$offset" ] && offset=1 + + weight=$(perl -e "print ($offset/$total_sum);") + if [ -f $decode_dir ] ; then + systems+="$weight $decode_dir " + else + kwsfile=$decode_dir/kwslist.unnormalized.xml + [ ! -f ${kwsfile} ] && echo "The file ${kwsfile} does not exist!" && exit 1 + systems+="$weight ${kwsfile} " + fi +done + +echo $systems + +# Combination of the weighted sum and power rule +$cmd PWR=1:9 $kwsoutputdir/log/combine_kws.PWR.log \ + mkdir -p ${kwsoutputdir}_PWR '&&' \ + local/naive_comb.pl --method=2 --power=0.PWR \ + $systems ${kwsoutputdir}_PWR/kwslist.unnormalized.xml || exit 1 + +$cmd PWR=1:9 $kwsoutputdir/log/postprocess_kws.PWR.log \ + utils/kwslist_post_process.pl --duration=${duration} --digits=3 \ + --normalize=true --Ntrue-scale=${ntrue_scale} \ + ${kwsoutputdir}_PWR/kwslist.unnormalized.xml \ + ${kwsoutputdir}_PWR/kwslist.xml || exit 1 + +echo "Scoring..." +if ! $skip_scoring ; then +$cmd PWR=1:9 $kwsoutputdir/log/score_kws.PWR.log \ + local/kws_score.sh --extraid "${extraid}" $datadir ${kwsoutputdir}_PWR || exit 1 +fi + + diff --git a/egs/babel/s5d/local/kws_data_prep.sh b/egs/babel/s5d/local/kws_data_prep.sh new file mode 100755 index 00000000000..3882c99ce6d --- /dev/null +++ b/egs/babel/s5d/local/kws_data_prep.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. + +# Begin configuration section. +case_insensitive=true +use_icu=true +icu_transform="Any-Lower" +silence_word= # Optional silence word to insert (once) between words of the transcript. +# End configuration section. + +echo $0 "$@" + +help_message=" + Usage: local/kws_data_prep.sh + e.g.: local/kws_data_prep.sh data/lang/ data/eval/ data/kws/ + Input is in : kwlist.xml, ecf.xml (rttm file not needed). + Output is in : keywords.txt, keywords_all.int, kwlist_invocab.xml, + kwlist_outvocab.xml, keywords.fsts + Note: most important output is keywords.fsts + allowed switches: + --case-sensitive # Shall we be case-sensitive or not? + # Please not the case-sensitivness depends + # on the shell locale! + --use-uconv # Use the ICU uconv binary to normalize casing + --icu-transform # When using ICU, use this transliteration + +" + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 3 ]; then + printf "FATAL: invalid number of arguments.\n\n" + printf "$help_message\n" + exit 1; +fi + +set -u +set -e +set -o pipefail + +langdir=$1; +datadir=$2; +kwsdatadir=$3; +keywords=$kwsdatadir/kwlist.xml + + +mkdir -p $kwsdatadir; + +cat $keywords | perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + my $data = XMLin(\*STDIN); + + #print Dumper($data->{kw}); + foreach $kwentry (@{$data->{kw}}) { + #print Dumper($kwentry); + print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; + } +' > $kwsdatadir/keywords.txt + + +# Map the keywords to integers; note that we remove the keywords that +# are not in our $langdir/words.txt, as we won't find them anyway... +#cat $kwsdatadir/keywords.txt | babel/filter_keywords.pl $langdir/words.txt - - | \ +# sym2int.pl --map-oov 0 -f 2- $langdir/words.txt | \ +if $case_insensitive && ! $use_icu ; then + echo "$0: Running case insensitive processing" + cat $langdir/words.txt | tr '[:lower:]' '[:upper:]' > $kwsdatadir/words.txt + [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ + echo "$0: Warning, multiple words in dictionary differ only in case: " + + + cat $kwsdatadir/keywords.txt | tr '[:lower:]' '[:upper:]' | \ + sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int +elif $case_insensitive && $use_icu ; then + echo "$0: Running case insensitive processing (using ICU with transform \"$icu_transform\")" + cat $langdir/words.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" > $kwsdatadir/words.txt + [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ + echo "$0: Warning, multiple words in dictionary differ only in case: " + + paste <(cut -f 1 $kwsdatadir/keywords.txt ) \ + <(cut -f 2 $kwsdatadir/keywords.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" ) |\ + local/kwords2indices.pl --map-oov 0 $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int +else + cp $langdir/words.txt $kwsdatadir/words.txt + cat $kwsdatadir/keywords.txt | \ + sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int +fi + +(cat $kwsdatadir/keywords_all.int | \ + grep -v " 0 " | grep -v " 0$" > $kwsdatadir/keywords.int ) || true + +(cut -f 1 -d ' ' $kwsdatadir/keywords.int | \ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_invocab.xml) || true + +(cat $kwsdatadir/keywords_all.int | \ + egrep " 0 | 0$" | cut -f 1 -d ' ' | \ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_outvocab.xml) || true + + +# Compile keywords into FSTs +if [ -s $kwsdatadir/keywords.int ]; then + if [ -z $silence_word ]; then + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:$kwsdatadir/keywords.fsts + else + silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'` + [ -z $silence_int ] && \ + echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:- | \ + awk -v 'OFS=\t' -v silint=$silence_int '{if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; }' \ + > $kwsdatadir/keywords.fsts + fi +else + echo "WARNING: $kwsdatadir/keywords.int is zero-size. That means no keyword" + echo "WARNING: was found in the dictionary. That might be OK -- or not." + touch $kwsdatadir/keywords.fsts +fi + +# Create utterance id for each utterance +cat $datadir/segments | \ + awk '{print $1}' | \ + sort | uniq | perl -e ' + $idx=1; + while(<>) { + chomp; + print "$_ $idx\n"; + $idx++; + }' > $kwsdatadir/utter_id + +# Map utterance to the names that will appear in the rttm file. You have +# to modify the commands below accoring to your rttm file +cat $datadir/segments | awk '{print $1" "$2}' | sort | uniq > $kwsdatadir/utter_map; + +echo "$0: Kws data preparation succeeded" diff --git a/egs/babel/s5d/local/kws_data_prep_proxy.sh b/egs/babel/s5d/local/kws_data_prep_proxy.sh new file mode 100755 index 00000000000..04cc59b6499 --- /dev/null +++ b/egs/babel/s5d/local/kws_data_prep_proxy.sh @@ -0,0 +1,270 @@ +#!/bin/bash + +# Copyright 2014 Guoguo Chen +# Apache 2.0. + +# Begin configuration section. +nj=8 +cmd=run.pl +beam=-1 # Beam for proxy FST, -1 means no prune +phone_beam=-1 # Beam for KxL2xE FST, -1 means no prune +nbest=-1 # Use top n best proxy keywords in proxy FST, -1 means all + # proxies +phone_nbest=50 # Use top n best phone sequences in KxL2xE, -1 means all + # phone sequences +phone_cutoff=5 # We don't generate proxy keywords for OOV keywords that + # have less phones than the specified cutoff as they may + # introduce a lot false alarms +max_phone_cutoff=9990 # We don't generate proxy keywords for OOV keywords that + # have more than this phonemes. This can be used when + # we need to use different parameters for keywords of + # different lengths. +confusion_matrix= # If supplied, using corresponding E transducer +count_cutoff=1 # Minimal count to be considered in the confusion matrix; + # will ignore phone pairs that have count less than this. +pron_probs=false # If true, then lexicon looks like: + # Word Prob Phone1 Phone2... +case_insensitive=true +icu_transform="Any-Lower" +proxy_set= # List of keywords to generate proxies for, one KWID per + # line. If empty, then by default generate proxies for all + # OOV keywords. +# End configuration section. + +[ -f ./path.sh ] && . ./path.sh; # source the path. +echo $0 "$@" +. parse_options.sh || exit 1; + +if [ $# -ne 5 ]; then + echo "Usage: local/kws_data_prep_proxy.sh \\" + echo " " + echo " e.g.: local/kws_data_prep_proxy.sh data/lang/ data/dev10h/ \\" + echo " data/local/tmp.lang/lexiconp.txt oov_lexicon.txt data/dev10h/kws/" + echo "allowed options:" + echo " --case-sensitive # Being case-sensitive or not" + echo " --icu-transform # Transliteration for upper/lower case" + echo " # mapping" + echo " --proxy-set # Keyword set for generating proxies" + exit 1 +fi + +set -e +set -o pipefail + +langdir=$1 +datadir=$2 +l1_lexicon=$3 +l2_lexicon=$4 +kwsdatadir=$5 + +# Checks some files. +for f in $langdir/words.txt $kwsdatadir/kwlist.xml $l1_lexicon $l2_lexicon; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1 +done + +keywords=$kwsdatadir/kwlist.xml +mkdir -p $kwsdatadir/tmp/ + +cat $keywords | perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + my $data = XMLin(\*STDIN); + + #print Dumper($data->{kw}); + foreach $kwentry (@{$data->{kw}}) { + #print Dumper($kwentry); + print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; + }' > $kwsdatadir/raw_keywords_all.txt + +# Takes care of upper/lower case. +cp $langdir/words.txt $kwsdatadir/words.txt +cat $l1_lexicon | sed 's/\s/ /g' > $kwsdatadir/tmp/L1.tmp.lex +if $case_insensitive; then + echo "$0: Running case insensitive processing" + echo "$0: Using ICU with transofrm \"$icu_transform\"" + + # Processing words.txt + cat $kwsdatadir/words.txt |\ + uconv -f utf8 -t utf8 -x "${icu_transform}" > $kwsdatadir/words.norm.txt + + # Processing lexicon + cat $l2_lexicon | sed 's/\s/ /g' | cut -d ' ' -f 1 |\ + uconv -f utf8 -t utf8 -x "${icu_transform}" |\ + paste -d ' ' - <(cat $l2_lexicon | sed 's/\s/ /g' | cut -d ' ' -f 2-) \ + > $kwsdatadir/tmp/L2.tmp.lex + + paste <(cut -f 1 $kwsdatadir/raw_keywords_all.txt) \ + <(cut -f 2 $kwsdatadir/raw_keywords_all.txt |\ + uconv -f utf8 -t utf8 -x "${icu_transform}") \ + > $kwsdatadir/keywords_all.txt + cat $kwsdatadir/keywords_all.txt |\ + local/kwords2indices.pl --map-oov 0 $kwsdatadir/words.norm.txt \ + > $kwsdatadir/keywords_all.int +else + cat $l2_lexicon | sed 's/\s/ /g' > $kwsdatadir/tmp/L2.tmp.lex + cp $kwsdatadir/raw_keywords_all.txt $kwsdatadir/keywords_all.txt + + cat $kwsdatadir/keywords_all.txt | \ + sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt \ + > $kwsdatadir/keywords_all.int +fi + +# Writes some scoring related files. +cat $kwsdatadir/keywords_all.int |\ + (grep -E -v " 0 | 0$" || true) | cut -f 1 -d ' ' |\ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_invocab.xml + +cat $kwsdatadir/keywords_all.int |\ + (grep -E " 0 | 0$" || true) | cut -f 1 -d ' ' |\ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_outvocab.xml + +# Selects a set to generate proxies for. By default, generate proxies for OOV +# keywords. +if [ -z $proxy_set ]; then + cat $kwsdatadir/keywords_all.int |\ + (grep -E " 0 | 0$" || true) | awk '{print $1;}' | sort -u \ + > $kwsdatadir/keywords_proxy.list +else + cp $proxy_set $kwsdatadir/keywords_proxy.list +fi +cat $kwsdatadir/keywords_all.txt |\ + grep -f $kwsdatadir/keywords_proxy.list > $kwsdatadir/keywords_proxy.txt +cat $kwsdatadir/keywords_proxy.txt |\ + cut -f 2- | awk '{for(x=1;x<=NF;x++) {print $x;}}' |\ + sort -u > $kwsdatadir/keywords_proxy_words.list + +# Maps original phone set to a "reduced" phone set. We limit L2 to only cover +# the words that are actually used in keywords_proxy.txt for efficiency purpose. +# Besides, if L1 and L2 contains the same words, we use the pronunciation from +# L1 since it is the lexicon used for the LVCSR training. +cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 1 |\ + paste -d ' ' - <(cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 2-|\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + awk '{if(NF>=2) {print $0}}' > $kwsdatadir/tmp/L1.lex +cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 1 |\ + paste -d ' ' - <(cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 2-|\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + awk '{if(NF>=2) {print $0}}' | perl -e ' + ($lex1, $words) = @ARGV; + open(L, "<$lex1") || die "Fail to open $lex1.\n"; + open(W, "<$words") || die "Fail to open $words.\n"; + while () { + chomp; + @col = split; + @col >= 2 || die "Too few columsn in \"$_\".\n"; + $w = $col[0]; + $w_p = $_; + if (defined($lex1{$w})) { + push(@{$lex1{$w}}, $w_p); + } else { + $lex1{$w} = [$w_p]; + } + } + close(L); + while () { + chomp; + @col = split; + @col >= 2 || die "Too few columsn in \"$_\".\n"; + $w = $col[0]; + $w_p = $_; + if (defined($lex1{$w})) { + next; + } + if (defined($lex2{$w})) { + push(@{$lex2{$w}}, $w_p); + } else { + $lex2{$w} = [$w_p]; + } + } + %lex = (%lex1, %lex2); + while () { + chomp; + if (defined($lex{$_})) { + foreach $x (@{$lex{$_}}) { + print "$x\n"; + } + } + } + close(W); + ' $kwsdatadir/tmp/L1.lex $kwsdatadir/keywords_proxy_words.list \ + > $kwsdatadir/tmp/L2.lex +rm -f $kwsdatadir/tmp/L1.tmp.lex $kwsdatadir/tmp/L2.tmp.lex + +# Creates words.txt that covers all the words in L1.lex and L2.lex. We append +# new words to the original word symbol table. +max_id=`cat $kwsdatadir/words.txt | awk '{print $2}' | sort -n | tail -1`; +cat $kwsdatadir/keywords_proxy.txt |\ + awk '{for(i=2; i <= NF; i++) {print $i;}}' |\ + cat - <(cat $kwsdatadir/tmp/L2.lex | awk '{print $1;}') |\ + cat - <(cat $kwsdatadir/tmp/L1.lex | awk '{print $1;}') |\ + sort -u | \ + (grep -F -v -x -f <(cat $kwsdatadir/words.txt | awk '{print $1;}') || true)|\ + awk 'BEGIN{x='$max_id'+1}{print $0"\t"x; x++;}' |\ + cat $kwsdatadir/words.txt - > $kwsdatadir/tmp/words.txt + +# Creates keyword list that we need to generate proxies for. +cat $kwsdatadir/keywords_proxy.txt | perl -e ' + open(W, "<'$kwsdatadir/tmp/L2.lex'") || + die "Fail to open L2 lexicon: '$kwsdatadir/tmp/L2.lex'\n"; + my %lexicon; + while () { + chomp; + my @col = split(); + @col >= 2 || die "'$0': Bad line in lexicon: $_\n"; + if ('$pron_probs' eq "false") { + $lexicon{$col[0]} = scalar(@col)-1; + } else { + $lexicon{$col[0]} = scalar(@col)-2; + } + } + while (<>) { + chomp; + my $line = $_; + my @col = split(); + @col >= 2 || die "Bad line in keywords file: $_\n"; + my $len = 0; + for (my $i = 1; $i < scalar(@col); $i ++) { + if (defined($lexicon{$col[$i]})) { + $len += $lexicon{$col[$i]}; + } else { + print STEDRR "'$0': No pronunciation found for word: $col[$i]\n"; + } + } + if (($len >= '$phone_cutoff') && ($len <= '$max_phone_cutoff')){ + print "$line\n"; + } elsif ($len > '$max_phone_cutoff'){ + print STDERR "'$0': Keyword $col[0] is too long, not generating proxy\n"; + } else { + print STDERR "'$0': Keyword $col[0] is too short, not generating proxy\n"; + } + }' > $kwsdatadir/tmp/keywords.txt + +# Creates proxy keywords. +local/generate_proxy_keywords.sh \ + --cmd "$cmd" --nj "$nj" --beam "$beam" --nbest "$nbest" \ + --phone-beam $phone_beam --phone-nbest $phone_nbest \ + --confusion-matrix "$confusion_matrix" --count-cutoff "$count_cutoff" \ + --pron-probs "$pron_probs" $kwsdatadir/tmp/ +cp $kwsdatadir/tmp/keywords.fsts $kwsdatadir + +# Creates utterance id for each utterance. +cat $datadir/segments | \ + awk '{print $1}' | \ + sort | uniq | perl -e ' + $idx=1; + while(<>) { + chomp; + print "$_ $idx\n"; + $idx++; + }' > $kwsdatadir/utter_id + +# Map utterance to the names that will appear in the rttm file. You have +# to modify the commands below accoring to your rttm file +cat $datadir/segments | awk '{print $1" "$2}' |\ + sort | uniq > $kwsdatadir/utter_map; + +echo "$0: Kws data preparation succeeded" diff --git a/egs/babel/s5d/local/kws_gen_oracle_lattices.sh b/egs/babel/s5d/local/kws_gen_oracle_lattices.sh new file mode 100755 index 00000000000..b73112b191d --- /dev/null +++ b/egs/babel/s5d/local/kws_gen_oracle_lattices.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. + +# Begin configuration section. +cmd=run.pl +duptime=0.5 +model=final.mdl +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo "Usage $0 [options] " + echo "" + echo "Main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo "" + exit 1; +fi + +lang=$1; +data=$2; +decodedir=$3; + + +kwsdatadir=$data/kws +oracledir=$decodedir/kws_oracle +mkdir -p $oracledir/log + +for filename in $lang/words.txt $decodedir/num_jobs \ + $data/text $decodedir/lat.1.gz \ + $decodedir/../$model ; do + if [[ ! -f $filename ]] ; then + echo "FATAL: File $filename does not exist!" + exit 1; + fi +done + +nj=`cat $decodedir/num_jobs` + +(cd $decodedir; ln -s ../$model final.mdl ) +(cd $oracledir; echo "$nj" > num_jobs ) + +$cmd LAT=1:$nj $oracledir/log/lat.LAT.log \ + cat $data/text \| \ + sed 's/- / /g' \| \ + sym2int.pl --map-oov '""' -f 2- $lang/words.txt \| \ + lattice-oracle --word-symbol-table=$lang/words.txt \ + --write-lattices="ark:|gzip -c > $oracledir/lat.LAT.gz" \ + "ark:gzip -cdf $decodedir/lat.LAT.gz|" ark:- ark,t:$oracledir/lat.LAT.tra; + diff --git a/egs/babel/s5d/local/kws_oracle.sh b/egs/babel/s5d/local/kws_oracle.sh new file mode 100755 index 00000000000..c7aa661664f --- /dev/null +++ b/egs/babel/s5d/local/kws_oracle.sh @@ -0,0 +1,136 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal) +# 2013 Johns Hopkins University +# Apache 2.0. + +. ./path.sh +. ./cmd.sh + +# Begin configuration section. +cmd=run.pl +acwt=0.09091 #Acoustic weight -- should not be necessary for oracle lattices +duptime=0.6 #Max time difference in which the occurences of the same KW will be seen as duplicates +text= # an alternative reference text to use. when not specified, the /text will be used +model= # acoustic model to use +extraid= # kws setup extra ID (kws task was setup using kws_setup.sh --extraid +stage=0 # to resume the computation from different stage +# End configuration section. + +set -e +set -o pipefail + +echo "$0 $@" # Print the command line for logging + + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo "Usage $0 [options] " + echo "" + echo "Main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --text #The alternative text file in the format SEGMENT W1 W2 W3..., " + echo " #The default text file is taken from /text" + echo "" + exit 1; +fi + +lang=$1; +data=$2; +decodedir=$3; + +if [ -z $text ] ; then + text=$data/text +fi + +if [ -z "$model" ]; then # if --model was not specified on the command line... + srcdir=`dirname $decodedir`; # The model directory is one level up from decoding directory. + model=$srcdir/final.mdl; +fi + +if [ -z $extraid ] ; then # the same logic as with kws_setup.sh + kwsdatadir=$data/kws +else + kwsdatadir=$data/${extraid}_kws +fi + +nj=`cat $decodedir/num_jobs`; + +oracledir=$decodedir/kws_oracle +mkdir -p $oracledir +mkdir -p $oracledir/log + +if [ $stage -le 0 ] ; then + echo "$nj" > $oracledir/num_jobs + $cmd LAT=1:$nj $oracledir/log/oracle_lat.LAT.log \ + cat $text \| \ + sed 's/- / /g' \| \ + sym2int.pl --map-oov '""' -f 2- $lang/words.txt \| \ + lattice-oracle --word-symbol-table=$lang/words.txt \ + --write-lattices="ark:|gzip -c > $oracledir/lat.LAT.gz" \ + "ark:gzip -cdf $decodedir/lat.LAT.gz|" ark:- ark,t:$oracledir/lat.LAT.tra; +fi + +if [ $stage -le 1 ] ; then + steps/make_index.sh --cmd "$cmd" --acwt $acwt --model $model \ + $kwsdatadir $lang $oracledir $oracledir +fi + +if [ $stage -le 2 ] ; then + steps/search_index.sh --cmd "$cmd" $kwsdatadir $oracledir +fi + +if [ $stage -le 3 ]; then + + #TODO: this stage should be probably moved in a single script file + # and used accross all the kw search scripts + duration=`head -1 $kwsdatadir/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + grep -o -E "[0-9]*[\.]*[0-9]*" |\ + perl -e 'while(<>) {print $_/2;}'` + + + cat $oracledir/result.* | \ + utils/write_kwslist.pl --flen=0.01 --duration=$duration \ + --segments=$data/segments --normalize=true --duptime=$duptime\ + --map-utter=$kwsdatadir/utter_map --remove-dup=true \ + - $oracledir/kwslist_orig.xml + + #This does not do much -- just adds empty entries for keywords for which + #not even one occurence has not been found + local/fix_kwslist.pl $kwsdatadir/kwlist.xml $oracledir/kwslist_orig.xml $oracledir/kwslist.xml +fi + + +if [ $stage -le 4 ]; then + #As there is a missing functionality in the F4DE for scoring + #subsets of the original set, lets keep this commented out. + #Alternatively:TODO: write a filter_kwslist.pl script + #That will produce kwslist on a basis of given kwlist.xml subset + + local/kws_score_f4de.sh `dirname $kwsdatadir` $oracledir + #-local/kws_score_f4de.sh --kwlist $kwsdatadir/kwlist_outvocab.xml \ + #- --f4de-prefix outvocab `dirname $kwsdatadir` $oracledir || exit 1 + #-local/kws_score_f4de.sh --kwlist $kwsdatadir/kwlist_invocab.xml \ + #- --f4de-prefix invocab `dirname $kwsdatadir` $oracledir || exit 1 + + echo "=======================================================" + ( + echo -n "ATWV-full " + grep Occurrence $oracledir/sum.txt | cut -d '|' -f 13 + ) + + #-( + #-echo -n "ATWV-invocab " + #-grep Occurrence $oracledir/invocab.sum.txt | cut -d '|' -f 13 + #-) || echo "Error occured getting the invocab results" + + #-( + #-echo -n "ATWV-outvocab " + #-grep Occurrence $oracledir/outvocab.sum.txt | cut -d '|' -f 13 + #-) || echo "Error occured getting the outvocab results" + + echo "=======================================================" +fi diff --git a/egs/babel/s5d/local/kws_oracle_threshold.pl b/egs/babel/s5d/local/kws_oracle_threshold.pl new file mode 100755 index 00000000000..e1dc153767e --- /dev/null +++ b/egs/babel/s5d/local/kws_oracle_threshold.pl @@ -0,0 +1,200 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +sub KeywordSort { + if ($a->[0] ne $b->[0]) { + $b->[0] <=> $a->[0]; + } else { + $b->[1] <=> $a->[1]; + } +} + +my $Usage = < + e.g.: kws_oracle_threshold.pl alignment.csv + +Allowed options: + --beta : Beta value when computing ATWV (float, default = 999.9) + --duration : Duration of all audio, you must set this (float, default = 999.9) + +EOU + +my $beta = 999.9; +my $duration = 999.9; +GetOptions( + 'beta=f' => \$beta, + 'duration=f' => \$duration); + +@ARGV == 1 || die $Usage; + +# Works out the input/output source. +my $alignment_in = shift @ARGV; + +# Hash alignment file. For each instance we store a 3-dimension vector: +# [score, ref, res] +# where "score" is the confidence of that instance, "ref" equals 0 means there's +# no reference at that place and 1 means there's corresponding reference, "res" +# 0 means the instance is not considered when scoring, 1 means it's a false +# alarm and 2 means it's a true hit. +open(A, "<$alignment_in") || die "$0: Fail to open alignment file: $alignment_in\n"; +my %Ntrue; +my %keywords; +my %alignment; +my $lattice_miss = 0; +my $lattice_ref = 0; +my %keywords_lattice_miss; +my %keywords_lattice_ref; +while () { + chomp; + my @col = split(','); + @col == 12 || die "$0: Bad number of columns in $alignment_in: $_\n"; + + # First line of the csv file. + if ($col[11] eq "alignment") {next;} + + # Instances that do not have corresponding references. + if ($col[11] eq "CORR!DET" || $col[11] eq "FA") { + if (!defined($alignment{$col[3]})) { + $alignment{$col[3]} = []; + } + my $ref = 0; + my $res = 0; + if ($col[11] eq "FA") { + $res = 1; + } + push(@{$alignment{$col[3]}}, [$col[9], $ref, $res]); + next; + } + + # Instances that have corresponding references. + if ($col[11] eq "CORR" || $col[11] eq "MISS") { + if (!defined($alignment{$col[3]})) { + $alignment{$col[3]} = []; + $Ntrue{$col[3]} = 0; + $keywords_lattice_miss{$col[3]} = 0; + $keywords_lattice_ref{$col[3]} = 0; + } + my $ref = 1; + my $res = 0; + if ($col[10] ne "") { + if ($col[11] eq "CORR") { + $res = 2; + } + push(@{$alignment{$col[3]}}, [$col[9], $ref, $res]); + } + $Ntrue{$col[3]} += 1; + $keywords{$col[3]} = 1; + + # The following is for lattice recall and STWV. + $lattice_ref ++; + $keywords_lattice_ref{$col[3]} ++; + if ($col[11] eq "MISS" && $col[10] eq "") { + $lattice_miss ++; + $keywords_lattice_miss{$col[3]} ++; + } + next; + } +} +close(A); + +# Works out the oracle ATWV by sweeping the threshold. +my $atwv = 0.0; +my $otwv = 0.0; +my %mtwv_sweep; +foreach my $kwid (keys %keywords) { + # Sort the instances by confidence score. + my @instances = sort KeywordSort @{$alignment{$kwid}}; + my $local_otwv = 0.0; + my $max_local_otwv = 0.0; + my $local_atwv = 0.0; + my $active_otwv_threshold = ""; + foreach my $instance (@instances) { + my @ins = @{$instance}; + my $gain = 1.0 / $Ntrue{$kwid}; + my $cost = $beta / ($duration - $Ntrue{$kwid}); + # OTWV. + if ($local_otwv > $max_local_otwv && + $active_otwv_threshold ne "" && $active_otwv_threshold != $ins[0]) { + $max_local_otwv = $local_otwv; + } + if ($ins[1] == 1) { + $local_otwv += $gain; + } else { + $local_otwv -= $cost; + } + $active_otwv_threshold = $ins[0]; + if ($active_otwv_threshold == 1.0) { + # If score = 1.0, we always accept the instance as YES. + $max_local_otwv = $local_otwv; + } + + # ATWV. + if ($ins[2] == 1) { + $local_atwv -= $cost; + } elsif ($ins[2] == 2) { + $local_atwv += $gain; + } + + # MTWV. + for (my $threshold = 0.000; $threshold <= $ins[0]; $threshold += 0.001) { + if ($ins[1] == 1) { + $mtwv_sweep{$threshold} += $gain; + } else { + $mtwv_sweep{$threshold} -= $cost; + } + } + } + if ($local_otwv > $max_local_otwv) { + $max_local_otwv = $local_otwv; + } + $atwv += $local_atwv; + $otwv += $max_local_otwv; +} + +# Works out the MTWV. +my $mtwv = 0.0; +my $mtwv_threshold = 0.0; +for my $threshold (keys %mtwv_sweep) { + if ($mtwv_sweep{$threshold} > $mtwv) { + $mtwv = $mtwv_sweep{$threshold}; + $mtwv_threshold = $threshold; + } +} + +# Works out the STWV. +my $stwv = 0.0; +for my $kw (keys %keywords_lattice_miss) { + $stwv += $keywords_lattice_miss{$kw} / $keywords_lattice_ref{$kw}; +} +$stwv = 1 - $stwv / scalar(keys %keywords); + +$atwv /= scalar(keys %keywords); +$atwv = sprintf("%.4f", $atwv); +$otwv /= scalar(keys %keywords); +$otwv = sprintf("%.4f", $otwv); +$mtwv /= scalar(keys %keywords); +$mtwv = sprintf("%.4f", $mtwv); +my $lattice_recall = 1 - $lattice_miss / $lattice_ref; +$lattice_recall = sprintf("%.4f", $lattice_recall); +$stwv = sprintf("%.4f", $stwv); +print "ATWV = $atwv\n"; +print "OTWV = $otwv\n"; +print "STWV = $stwv\n"; +print "MTWV = $mtwv, THRESHOLD = $mtwv_threshold\n"; +print "Lattice Recall = $lattice_recall\n"; diff --git a/egs/babel/s5d/local/kws_score.sh b/egs/babel/s5d/local/kws_score.sh new file mode 120000 index 00000000000..9b896c530a7 --- /dev/null +++ b/egs/babel/s5d/local/kws_score.sh @@ -0,0 +1 @@ +kws_score_f4de.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/kws_score_f4de.sh b/egs/babel/s5d/local/kws_score_f4de.sh new file mode 100755 index 00000000000..4f79e1925a9 --- /dev/null +++ b/egs/babel/s5d/local/kws_score_f4de.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +# case_insensitive=true +extraid= +kwlist= +ecf= +rttm= +f4de_prefix= +# End configuration section. + +help_message="$0: score the kwslist using the F4DE scorer from NIST + Example: + $0 [additional-parameters] + where the most important additional parameters can be: + --extraid #for using, when a non-default kws tasks are setup + (using the kws_setup.sh --extraid) for a kaldi-single data-dir + --kwlist #allows for an alternative kwlist -- if not set, the default + kwlist is taken from + --f4de-prefix #allows for scoring the same results using + different kwlists and storing them in the same dir " + +echo $0 $@ +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 2 ]; then + printf "FATAL: incorrect number of variables given to the script\n\n" + printf "$help_message\n" + exit 1; +fi + +if [ -z $extraid ] ; then + kwsdatadir=$1/kws +else + kwsdatadir=$1/${extraid}_kws +fi +kwsoutputdir="$2/" + +if [ -z $kwlist ] ; then + kwlist=$kwsdatadir/kwlist.xml +fi + +if [ -z $rttm ] ; then + rttm=$kwsdatadir/rttm +fi + +if [ -z $ecf ] ; then + ecf=$kwsdatadir/ecf.xml +fi + +if [ ! -z ${f4de_prefix} ] ; then + f4de_prefix="/${f4de_prefix}" +fi + +if [[ ! -d "$kwsdatadir" ]] ; then + echo "FATAL: the KWS input data directory does not exist!" + exit 1; +fi + +for file in $ecf $rttm $kwlist ; do + if [[ ! -f "$file" ]] ; then + echo "FATAL: file $file does not exist!" + exit 1; + fi +done + +echo KWSEval -e $ecf -r $rttm -t $kwlist \ + -s $kwsoutputdir/kwslist.xml -c -o -b -d -f $kwsoutputdir + +if [ -f $kwsdatadir/categories ]; then + if ! grep -q "NGramOrder" "$kwlist"; then + cat $kwlist | local/search/annotate_kwlist.pl $kwsdatadir/categories > $kwsoutputdir/kwlist.xml + kwlist=$kwsoutputdir/kwlist.xml + elif ! grep -q "Characters" "$kwlist"; then + cat $kwlist | local/search/annotate_kwlist.pl $kwsdatadir/categories > $kwsoutputdir/kwlist.xml + kwlist=$kwsoutputdir/kwlist.xml + fi +fi + +KWSEval -e $ecf -r $rttm -t $kwlist -a --zGlobalMeasures MAP \ + --zGlobalMeasures MAPpct --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -s $kwsoutputdir/kwslist.xml -c -o -b -d -f ${kwsoutputdir}${f4de_prefix} || exit 1; + +duration=`cat ${kwsoutputdir}${f4de_prefix}/sum.txt | grep TotDur | cut -f 3 -d '|' | sed "s/\s*//g"` + +local/kws_oracle_threshold.pl --duration $duration ${kwsoutputdir}${f4de_prefix}/alignment.csv > ${kwsoutputdir}${f4de_prefix}/metrics.txt + +exit 0; + + diff --git a/egs/babel/s5d/local/kws_search.sh b/egs/babel/s5d/local/kws_search.sh new file mode 100755 index 00000000000..39177e8a4c5 --- /dev/null +++ b/egs/babel/s5d/local/kws_search.sh @@ -0,0 +1,230 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + + +help_message="$(basename $0): do keyword indexing and search. data-dir is assumed to have + kws/ subdirectory that specifies the terms to search for. Output is in + decode-dir/kws/ + Usage: + $(basename $0) " + +# Begin configuration section. +#acwt=0.0909091 +min_lmwt=7 +max_lmwt=17 +duptime=0.6 +cmd=run.pl +model= +skip_scoring=false +skip_optimization=false # true can speed it up if #keywords is small. +max_states=150000 +indices_dir= +kwsout_dir= +stage=0 +word_ins_penalty=0 +extraid= +silence_word= # specify this if you did to in kws_setup.sh, it's more accurate. +ntrue_scale=1.0 +nbest=900 +max_silence_frames=50 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e +set -o pipefail + + +if [[ "$#" -ne "3" ]] ; then + echo -e "$0: FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +silence_opt= + +langdir=$1 +datadir=$2 +decodedir=$3 + +if [ -z $extraid ] ; then + kwsdatadir=$datadir/kws +else + kwsdatadir=$datadir/${extraid}_kws +fi + +if [ -z $kwsout_dir ] ; then + if [ -z $extraid ] ; then + kwsoutdir=$decodedir/kws + else + kwsoutdir=$decodedir/${extraid}_kws + fi +else + kwsoutdir=$kwsout_dir +fi +mkdir -p $kwsoutdir + +if [ -z $indices_dir ]; then + indices_dir=$kwsoutdir +fi + +for d in "$datadir" "$kwsdatadir" "$langdir" "$decodedir"; do + if [ ! -d "$d" ]; then + echo "$0: FATAL: expected directory $d to exist" + exit 1; + fi +done +if [[ ! -f "$kwsdatadir/ecf.xml" ]] ; then + echo "$0: FATAL: the $kwsdatadir does not contain the ecf.xml file" + exit 1; +fi + +echo $kwsdatadir +duration=`head -1 $kwsdatadir/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` + +#duration=`head -1 $kwsdatadir/ecf.xml |\ +# grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ +# grep -o -E "[0-9]*[\.]*[0-9]*" |\ +# perl -e 'while(<>) {print $_/2;}'` + +echo "Duration: $duration" + +if [ ! -z "$model" ]; then + model_flags="--model $model" +else + model_flags= +fi + +frame_subsampling_factor=1 +if [ -f $decodedir/../frame_subsampling_factor ] ; then + frame_subsampling_factor=$(cat $decodedir/../frame_subsampling_factor) + echo "Frame subsampling factor autodetected: $frame_subsampling_factor" +fi + +if [ $stage -le 0 ] ; then + if [ ! -f $indices_dir/.done.index ] ; then + [ ! -d $indices_dir ] && mkdir $indices_dir + for lmwt in `seq $min_lmwt $max_lmwt` ; do + indices=${indices_dir}_$lmwt + mkdir -p $indices + + acwt=`perl -e "print (1.0/$lmwt);"` + [ ! -z $silence_word ] && silence_opt="--silence-word $silence_word" + steps/make_index.sh $silence_opt --cmd "$cmd" --acwt $acwt $model_flags\ + --skip-optimization $skip_optimization --max-states $max_states \ + --word-ins-penalty $word_ins_penalty --max-silence-frames $max_silence_frames\ + --frame-subsampling-factor ${frame_subsampling_factor} \ + $kwsdatadir $langdir $decodedir $indices || exit 1 + done + touch $indices_dir/.done.index + else + echo "Assuming indexing has been aready done. If you really need to re-run " + echo "the indexing again, delete the file $indices_dir/.done.index" + fi +fi + + +if [ $stage -le 1 ]; then + for lmwt in `seq $min_lmwt $max_lmwt` ; do + kwsoutput=${kwsoutdir}_$lmwt + indices=${indices_dir}_$lmwt + mkdir -p $kwsoutdir + local/search_index.sh --cmd "$cmd" --indices-dir $indices \ + --strict false --frame-subsampling-factor ${frame_subsampling_factor}\ + $kwsdatadir $kwsoutput || exit 1 + + nj=`cat $indices/num_jobs` + #this will truncate the file + rm -f $kwsoutput/results; touch $kwsoutput/results + + # This is a memory-efficient way how to do the filtration + # we do this in this way because the result.* files can be fairly big + # and we do not want to run into troubles with memory + #% files="" + #% for job in `seq 1 $nj`; do + #% if [ -f $kwsoutput/results.${job}.gz ] ; then + #% files="$files <(gunzip -c $kwsoutput/results.${job}.gz)" + #% elif[ -f $kwsoutput/results.${job} ] ; then + #% files="$files $kwsoutput/results.${job}" + #% else + #% echo >&2 "The file $kwsoutput/results.${job}[.gz] does not exist" + #% return 1 + #% fi + #% done + #% sort -m -u $files | local/search/filter_kws_results.pl --nbest $nbest |\ + #% sort -u > $kwsoutput/results + + # this is similar to the previous code -- should produce the same + # results (albeit more slowly as it's relying on temporary files + # the memory requirements are extremely limited + # I decided to go for this as the previous code does rely + # on the assumption the partial result files are sorted. + # that is not true for the older generation of pipeline + for job in `seq 1 $nj`; do + { + if [ -f $kwsoutput/result.${job}.gz ]; then + gunzip -c $kwsoutput/result.${job}.gz + else + cat $kwsoutput/result.${job} + fi + } | cat - $kwsoutput/results | \ + local/search/filter_kws_results.pl --nbest $nbest | \ + sort -u > $kwsoutput/results.${job} + mv $kwsoutput/results.${job} $kwsoutput/results + done + + done + + +fi + +if [ $stage -le 2 ]; then + echo "Writing unnormalized results" + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/write_unnormalized.LMWT.log \ + set -e ';' set -o pipefail ';'\ + cat ${kwsoutdir}_LMWT/results \| sort -u \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$duration \ + --segments=$datadir/segments --normalize=false --duptime=$duptime --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map\ + - ${kwsoutdir}_LMWT/kwslist.unnormalized.xml || exit 1; +fi + +if [ $stage -le 3 ]; then + echo "Writing normalized results" + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/write_normalized.LMWT.log \ + set -e ';' set -o pipefail ';'\ + cat ${kwsoutdir}_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$duration \ + --segments=$datadir/segments --normalize=true --duptime=$duptime --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map --digits=3\ + - ${kwsoutdir}_LMWT/kwslist.xml || exit 1 +fi + + +if [ -z $extraid ] ; then + extraid_flags= +else + extraid_flags=" --extraid ""$extraid"" " +fi + +if [ $stage -le 4 ]; then + if [[ (! -x local/kws_score.sh ) ]] ; then + echo "Not scoring, because the file local/kws_score.sh is not present" + elif [[ $skip_scoring == true ]] ; then + echo "Not scoring, because --skip-scoring true was issued" + else + echo "Scoring KWS results" + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/scoring.LMWT.log \ + local/kws_score.sh $extraid_flags $datadir ${kwsoutdir}_LMWT || exit 1; + fi +fi + +exit 0 diff --git a/egs/babel/s5d/local/kws_setup.sh b/egs/babel/s5d/local/kws_setup.sh new file mode 100755 index 00000000000..93513a56d94 --- /dev/null +++ b/egs/babel/s5d/local/kws_setup.sh @@ -0,0 +1,158 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +cmd=run.pl +case_insensitive=true +subset_ecf= +rttm_file= +extraid= +use_icu=true +icu_transform="Any-Lower" +kwlist_wordlist=false +langid=107 +annotate=true +silence_word= # Optional silence word to insert (once) between words of the transcript. +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +set -e +set -u +set -o pipefail + +help_message="$0: Initialize and setup the KWS task directory +Usage: + $0 [rttm-file] +allowed switches: + --subset-ecf /path/to/filelist # The script will subset the ecf file + # to contain only the files from the filelist + --rttm-file /path/to/rttm # the preferred way how to specify the rttm + # the older way (as an in-line parameter is + # obsolete and will be removed in near future + --case-insensitive # Shall we be case-sensitive or not? + # Please not the case-sensitivness depends + # on the shell locale! + --annotate + --use-icu # Use the ICU uconv binary to normalize casing + --icu-transform # When using ICU, use this transliteration + --kwlist-wordlist # The file with the list of words is not an xml + " + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. utils/parse_options.sh || exit 1; + +if [ "$#" -ne "5" ] && [ "$#" -ne "4" ] ; then + printf "FATAL: invalid number of arguments.\n\n" + printf "$help_message\n" + exit 1 +fi + +ecf_file=$1 +kwlist_file=$2 +if [ "$#" -eq "5" ] ; then + rttm_file=$3 + langdir=$4 + datadir=$5 +else + langdir=$3 + datadir=$4 +fi + +# don't quote rttm_file as it's valid for it to be empty. +for filename in "$ecf_file" "$kwlist_file" $rttm_file; do + echo $filename + if [ ! -e $filename ] ; then + printf "FATAL: filename \'$filename\' does not refer to a valid file\n" + printf "$help_message\n" + exit 1; + fi +done +for dirname in "$langdir" "$datadir" ; do + if [ ! -d $dirname ] ; then + printf "FATAL: dirname \'$dirname\' does not refer to a valid directory\n" + printf "$help_message\n" + exit 1; + fi +done + +if [ ! -z $extraid ]; then + kwsdatadir=$datadir/${extraid}_kws +else + kwsdatadir=$datadir/kws +fi + +mkdir -p $kwsdatadir + +if [ -z $subset_ecf ] ; then + test -f $kwsdatadir/ecf.xml && rm -f $kwsdatadir/ecf.xml + cp "$ecf_file" $kwsdatadir/ecf.xml || exit 1 +else + local/make_ecf_subset.sh $subset_ecf $ecf_file > $kwsdatadir/ecf.xml +fi + +if $kwlist_wordlist ; then +( + echo '' + awk '{ printf(" \n", $1); + printf(" "); for (n=2;n<=NF;n++){ printf("%s", $n); if(n\n"); + printf(" \n"); }' < ${kwlist_file} + # while read line; do + # id_str=`echo $line | cut -f 1 -d ' '` + # kw_str=`echo $line | cut -f 2- -d ' '` + # echo " " + # echo " $kw_str" + # echo " " + # done < ${kwlist_file} + echo '' +) > $kwsdatadir/kwlist.xml || exit 1 +else + test -f $kwsdatadir/kwlist.xml && rm -f $kwsdatadir/kwlist.xml + cp "$kwlist_file" $kwsdatadir/kwlist.xml || exit 1 +fi + +if [ ! -z $rttm_file ] ; then + test -f $kwsdatadir/rttm && rm -f $kwsdatadir/rttm + cp "$rttm_file" $kwsdatadir/rttm || exit 1 +fi + +sil_opt= +[ ! -z $silence_word ] && sil_opt="--silence-word $silence_word" +local/kws_data_prep.sh --case-insensitive ${case_insensitive} \ + $sil_opt --use_icu ${use_icu} --icu-transform "${icu_transform}" \ + $langdir $datadir $kwsdatadir || exit 1 + +if $annotate ; then + set -x + rm -f $kwsdatadir/kwlist.xml + cat $kwsdatadir/keywords.txt | local/search/create_categories.pl | local/search/normalize_categories.pl > $kwsdatadir/categories + cat "$kwlist_file" | local/search/annotate_kwlist.pl $kwsdatadir/categories > $kwsdatadir/kwlist.xml || exit 1 +fi +#~ ( +#~ echo '' +#~ while read line; do +#~ id_str=`echo $line | cut -f 1 -d ' '` +#~ kw_str=`echo $line | cut -f 2- -d ' '` +#~ echo " " +#~ echo " $kw_str" +#~ echo " " +#~ done < ${kwlist_file} +#~ echo '' +#~ ) > $kwsdatadir/kwlist.xml || exit 1 +#~ +#-( +#-echo '' +#-id=1 +#-while read line; do +#- id_str=$( printf "KWS$langid-%04d\n" $id ) +#- echo " " +#- echo " $line" +#- echo " " +#- id=$(( $id + 1 )) +#-done < ${kwlist_file} +#-echo '' +#-) > $kwsdatadir/kwlist.xml || exit 1 +#- diff --git a/egs/babel/s5d/local/lattice_to_ctm.sh b/egs/babel/s5d/local/lattice_to_ctm.sh new file mode 100755 index 00000000000..5fbde42d237 --- /dev/null +++ b/egs/babel/s5d/local/lattice_to_ctm.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. + +# begin configuration section. +cmd=run.pl +stage=0 +decode_mbr=true +beam=5 +word_ins_penalty=0.5 +min_lmwt=7 +max_lmwt=17 +model= + +#end configuration section. + +#debugging stuff +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " && exit; + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1) # (createCTM | filterCTM )." + exit 1; +fi + +data=$1 +lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. +dir=$3 + +if [ -z "$model" ] ; then + model=`dirname $dir`/final.mdl # Relative path does not work in some cases + #model=$dir/../final.mdl # assume model one level up from decoding dir. + #[ ! -f $model ] && model=`(set +P; cd $dir/../; pwd)`/final.mdl +fi + + +for f in $lang/words.txt $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; +done + +name=`basename $data`; # e.g. eval2000 + +mkdir -p $dir/scoring/log + +if [ $stage -le 0 ]; then + if [ ! -f $lang/phones/word_boundary.int ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + fi +fi + +if [ $stage -le 1 ]; then + # Remove some stuff we don't want to score, from the ctm. + for x in $dir/score_*/$name.ctm; do + cp $x $x.bkup1; + cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ + grep -v -E '|%HESITATION|\(\(\)\)' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + perl -e '@list = (); %list = (); + while(<>) { + chomp; + @col = split(" ", $_); + push(@list, $_); + $key = "$col[0]" . " $col[1]"; + $list{$key} = 1; + } + foreach(sort keys %list) { + $key = $_; + foreach(grep(/$key/, @list)) { + print "$_\n"; + } + }' > $x; + done +fi + + +echo "Lattice2CTM finished on " `date` +exit 0 diff --git a/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py b/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py new file mode 100755 index 00000000000..b6d4b9ab944 --- /dev/null +++ b/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py @@ -0,0 +1,469 @@ +#!/usr/bin/env python + +# Copyright 2016 Johns Hopkins University (Author: Matthew Wiesner) +# Apache 2.0 + +# ============ Make unicode-based graphemic lexicon ============= +# +# This script takes a list of either words or words and corresponding +# morphemes and returns a kaldi format lexicon. +# =============================================================== + +# Import Statements + +from __future__ import print_function +import codecs +import argparse +import unicodedata +import os +import re +import sys +import numpy as np + + +def main(): + args = parse_input() + baseforms = get_word_list(args.lex_in, args.fmt) + unicode_transcription = baseform2unicode(baseforms) + encoded_transcription, table = encode(unicode_transcription, + args.tag_percentage, + log=args.verbose) + write_table(table, args.lex_out) + + # Extract dictionary of nonspeech pronunciations + try: + nonspeech = {} + with codecs.open(args.nonspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + nonspeech[line_vals[0]] = line_vals[1] + except (IOError, TypeError): + pass + + # Extract dictionary of extraspeech pronunciations (normally ) + try: + extraspeech = {} + with codecs.open(args.extraspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + extraspeech[line_vals[0]] = line_vals[1] + except (IOError, TypeError): + pass + + write_lexicon(baseforms, encoded_transcription, args.lex_out, + nonspeech=nonspeech, extraspeech=extraspeech) + + +def parse_input(): + ''' + Parse commandline input. + ''' + if len(sys.argv[1:]) == 0: + print("Usage: ./make_unicode_lexicon.py [opts] lex_in lex_out") + sys.exit(1) + + parser = argparse.ArgumentParser() + parser.add_argument("lex_in", help="Path of input word list optionally " + "paired with a baseform. 1 word per line with the " + "baseform separated by a tab") + parser.add_argument("lex_out", help="Path of output output " + "graphemc lexicon") + parser.add_argument("-F", "--fmt", help="Format of input word list", + action="store", default="word_list") + parser.add_argument("-T", "--tag_percentage", help="Percentage of least" + " frequently occurring graphemes to be tagged", + type=float, action="store", default=0.1) + parser.add_argument("--nonspeech", help="File with map of nonspeech words" + " and pronunciations", action="store", default=None) + parser.add_argument("--extraspeech", help="File with map of extra speech" + " words", action="store", default=None) + parser.add_argument("-V", "--verbose", help="Include useful print outs", + action="store_true") + args = parser.parse_args() + return args + + +def _read_word_list_line(line): + try: + count, word = line.strip().split(None, 1) + float(count) + return word + except ValueError: + return line.strip() + + +def get_word_list(input_file, fmt): + ''' + Read from input file the words and potential baseforms. + + Arguments: input_file -- path to the input word list + fmt -- format of input word list ["word_list", "morfessor"] + Output: + words -- list of tuples (word, baseform) + ''' + with codecs.open(input_file, "r", "utf-8") as f: + if fmt == "word_list" or fmt is None: + words = [] + for line in f: + w = _read_word_list_line(line) + words.append((w, w)) + assert "." not in w, "FORMAT ERROR. Use --fmt [-F] morfessor" + elif fmt == "morfessor": + words = [] + for line in f: + w, bf = line.strip().split(None, 1) + words.append((w, bf)) + else: + sys.exit("Error: Bad input format name") + + return words + + +def baseform2unicode(baseforms): + ''' + Convert each baseform in the list, baseforms, to a parsed unicode + description stored as a list of lists of dictionaries. + + unicode_transcription = [ + [{'NAME':'word1_grapheme1','FIELD1':'FIELD1_VAL',...}, + {'NAME':'word1_grapheme2','FIELD1':'FIELD1_VAL',...},...], + [{'NAME':'word2_grapheme1,'FIELD1:'FIELD1_VAL',...}, + {},...] + ,...,[]] + + Arguments: + baseforms -- List of tuples (word, baseform) + e.g. baseforms = get_word_list() + + Output: + unicode_transcription -- See above description + ''' + + # Regular expression for parsing unicode descriptions + pattern = re.compile( + r"(?P[^\s]+)\s" + r"(?PSMALL\s|CAPITAL\s)?(?P" + "(?:SUBJOINED )?LETTER |(?:INDEPENDENT VOWEL )" + r"|(?:VOWEL SIGN )|VOWEL |SIGN " + r"|CHARACTER |JONGSEONG |CHOSEONG |SYMBOL |MARK |DIGIT " + r"|SEMIVOWEL |TONE |SYLLABLE |LIGATURE |KATAKANA )" + r"(?P((?!WITH).)+)" + r"(?PWITH .+)?" + ) + + # For each graphemic baseform generate a parsed unicode description + unicode_transcription = [] + for w, bf in baseforms: + # Initialize empty list of words + baseform_transcription = [] + # For each grapheme parse the unicode description + for graph in bf: + unicode_desc = unicodedata.name(graph) + # Use the canonical unicode decomposition + tags = unicodedata.normalize('NFD', graph) + match_obj = pattern.match(unicode_desc) + + # Grapheme's unicode description is non-standard + if(not match_obj): + # Underscore, dash, hastag have special meaning + if(graph in ("_", "-", "#")): + graph_dict = { + 'CHAR_TYPE': 'LINK', + 'SYMBOL': graph, + 'NAME': graph + } + # The grapheme is whitespace + elif(unicode_desc in ("ZERO WIDTH SPACE", + "ZERO WIDTH NON-JOINER", + "ZERO WIDTH JOINER", + "SPACE")): + # Ignore whitespace + continue + else: + graph_dict = {'SYMBOL': graph, 'NAME': 'NOT_FOUND'} + + # Grapheme's unicode description is standard + else: + graph_dict = match_obj.groupdict() + graph_dict["SYMBOL"] = graph + # Add tags to dictionary (The first element of tags is actually + # the base grapheme, so we only check all tags after the first. + if(len(tags) > 1): + for i, t in enumerate(tags[1:]): + graph_dict["TAG" + str(i)] = unicodedata.name(t) + + # Add grapheme unicode description dictionary to baseform list + baseform_transcription.append(graph_dict) + # Add baseform transcription to unicode transcription list + unicode_transcription.append(baseform_transcription) + return unicode_transcription + + +def encode(unicode_transcription, tag_percentage, log=False): + ''' + Arguments: + unicode_transcription -- a list of words whose graphemes are + respresented as a list of dictionaries whose + fields contain information about parsed + unicode descriptions. + + tag_percentage -- percent of least frequent graphemes to tag + log -- optional printing + + Outputs: + Lexicon -- Encoded baseforms + ''' + # Constants + VOWELS = "AEIOU" + SKIP = "/()" + + graphemes = [] + table = [] + encoded_transcription = [] + # Accumulate grapheme statistics over corpus at some point. For now just + # use the lexicon word list. For estimating grapheme frequency this is + # probably sufficient since we have many words each with many + # graphemes. We do unfortunately have to assume that case does not matter. + # We do not count dashes, underscores, parentheses, etc. . Just letters. + graph_list = [] + for w in unicode_transcription: + for graph in w: + if graph["SYMBOL"] not in "()\/,-_#.": + graph_list.append(graph["SYMBOL"].lower()) + + graph2int = {v: k for k, v in enumerate(set(graph_list))} + int2graph = {v: k for k, v in graph2int.items()} + graph_list_int = [graph2int[g] for g in graph_list] + bin_edges = range(0, len(int2graph.keys()) + 1) + graph_counts = np.histogram(graph_list_int, bins=bin_edges)[0] / float(len(graph_list_int)) + # Set count threshold to frequency that tags the bottom 10% of graphemes + bottom_idx = int(np.floor(tag_percentage * len(graph_counts))) + count_thresh = sorted(graph_counts)[bottom_idx] + graph_counts_dict = {} + for i, count in enumerate(graph_counts): + graph_counts_dict[int2graph[i]] = count + + graph_counts = graph_counts_dict + + # Print grapheme counts to histogram + if log: + graph_counts_sorted = sorted(graph_counts, reverse=True, + key=graph_counts.get) + if not os.path.exists("lex_log"): + os.makedirs("lex_log") + with codecs.open("lex_log/grapheme_histogram.txt", "w", "utf-8") as fp: + fp.write("Graphemes (Count Threshold = %.6f)\n" % count_thresh) + for g in graph_counts_sorted: + weight = ("-" * int(np.ceil(500.0 * graph_counts[g])) + + " %.6f\n" % graph_counts[g]) + fp.write("%s -" % (g) + weight) + + # Find a new baseform for each word + for w in unicode_transcription: + word_transcription = "" + + # Find a "pronunciation" for each grapheme in the word + for graph in w: + # Case 1: Check that the grapheme has a unicode description type + # --------------------------------------------------------------- + if("CHAR_TYPE" not in [k.strip() for k in graph.keys()]): + if(graph["SYMBOL"] == "."): + graph["MAP0"] = "\t" + if word_transcription[-1] == " ": + word_transcription = word_transcription[:-1] + "\t" + + elif(graph["SYMBOL"] not in SKIP): + graph["MAP0"] = graph["SYMBOL"].lower() + word_transcription += graph["MAP0"] + " " + + # Case 2: Standard Grapheme + # --------------------------------------------------------------- + elif(graph["CHAR_TYPE"].strip() in + ("LETTER", "VOWEL", "VOWEL SIGN", "SIGN")): + # Backoff diacritics + base_grapheme = graph["NAME"].strip().replace(" ", "-").lower() + graph["MAP0"] = _backoff_diacritics(graph["SYMBOL"].lower(), + base_grapheme, + graph_counts, + count_thresh) + # Add final space + word_transcription += graph["MAP0"] + " " + + # Case 3: Syllable (Assume consonant vowel pattern) + # This is basically just here for Amharic + # ---------------------------------------------------------------- + elif(graph["CHAR_TYPE"].strip() == "SYLLABLE"): + # Multi-word description + if(len(graph["NAME"].strip().split(' ')) > 1): + g_name = graph["NAME"].strip().replace(" ", "-").lower() + graph["MAP0"] = g_name + word_transcription += graph["MAP0"] + "\t" + + # Consonant Vowel Pattern + else: + cv_pattern = (r"(?P[^%s]*)(?P[%s]+)" % + (VOWELS, VOWELS)) + parsed_graph = re.match(cv_pattern, graph["NAME"]) + if(not parsed_graph): + sys.exit("Syllable did not obey" + "consonant-vowel pattern.") + + graph_dict = parsed_graph.groupdict() + + # Get consonant if it exists + if("CONSONANT" in graph_dict.keys() and + graph_dict["CONSONANT"]): + graph["MAP0"] = graph_dict["CONSONANT"].lower() + word_transcription += graph["MAP0"] + " " + + # Get vowel if it exists + if("VOWEL" in graph_dict.keys() and graph_dict["VOWEL"]): + graph["MAP1"] = graph_dict["VOWEL"].lower() + word_transcription += graph["MAP1"] + "\t" + + # Case 4: Commonly occurring symbols + # ---------------------------------------------------------------- + elif(graph["CHAR_TYPE"].strip() == "LINK"): + # Add tab for underscores (kaldi lexicon format) + if(graph["SYMBOL"] in ("_", "#")): + graph["MAP0"] = "\t" + if(len(word_transcription) >= 3 and + word_transcription[-2] == "\t"): + word_transcription = word_transcription[:-3] + "\t" + elif(len(word_transcription) >= 1): + word_transcription += "\t" + else: + sys.exit("Unknown rule for initial underscore") + elif(graph["SYMBOL"] == "-"): + graph["MAP0"] = "" + continue + else: + sys.exit("Unknown linking symbol found.") + sys.exit(1) + + # Update table of observed graphemes + if(graph["SYMBOL"] not in graphemes): + table.append(graph) + graphemes.append(graph["SYMBOL"]) + + # Append the newly transcribed word + encoded_transcription.append(word_transcription.strip()) + return encoded_transcription, table + + +def _backoff_diacritics(grapheme, base_grapheme, graph_counts, count_thresh): + ''' + Add diacritics as tags if the grapheme with diacritics occurs + infrequently. The grapheme built by successively peeling away + diacritics until a frequent grapheme in the lexicon is discovered. + This grapheme is then considered a distinct unit and all peeled off + diacritics are added as kaldi style tags + + Arguments: + grapheme -- the raw grapheme to be processed + base_grapheme -- the grapheme with no combining marks + (see unicode normalization NFD for more details) + graph_counts -- A dictionary of all seen graphemes as keys with + counts as values + count_thresh -- The frequency threshold below which diacritics + should be peeled away + ''' + # Initialize variables before loop + new_grapheme = grapheme + removed = [] + parts = unicodedata.normalize("NFD", new_grapheme) + # Find a backed-off (in terms of number of diacritics) grapheme with count + # above the frequency threshold (count_thresh) + while(len(parts) > 1 and + (graph_counts[new_grapheme] <= count_thresh)): + new_grapheme = unicodedata.normalize("NFC", parts[0:-1]) + tag = unicodedata.name(parts[-1]).strip().replace(" ", "").lower() + removed.append(tag) + parts = unicodedata.normalize("NFD", new_grapheme) + + # Collect all diactritics that will not be added as tags + split_tags = [] + for p in parts[1:]: + split_tag = unicodedata.name(p).strip().replace(" ", "").lower() + split_tags.append(split_tag) + + # Append non-tag diacritics to the base grapheme + base_grapheme = "".join([base_grapheme] + split_tags) + # Return the tagged grapheme + return "_".join([base_grapheme] + removed) + + +def write_table(table, outfile): + ''' + Creates table of graphemes and fields of each grapheme's corresponding + unicode description. + + Arguments: + table -- table to write + outfile -- name of the output lexicon file + ''' + + # Create output table name + outfile = os.path.splitext(outfile)[0] + "_table.txt" + # Sort keys for convenience + table_sorted = sorted(table, key=lambda k: k["NAME"]) + # Start writing to output + with codecs.open(outfile, "w", "utf-8") as fo: + # Get header names + header_names = sorted(set().union(*[d.keys() for d in table])) + # Write headers + for h in header_names[:-1]: + fo.write("%s\t" % h) + + fo.write("%s\n" % header_names[-1]) + + # Write values if present + for t in table_sorted: + for h in header_names[:-1]: + if(h in t.keys() and t[h]): + fo.write("%s\t" % t[h]) + else: + fo.write("''\t") + if(header_names[-1] in t.keys() and t[header_names[-1]]): + fo.write("%s\n" % t[header_names[-1]]) + else: + fo.write("''\n") + + +def write_lexicon(baseforms, encoded_transcription, outfile, nonspeech=None, + extraspeech=None): + ''' + Write out the encoded transcription of words + + Arguments: + words -- list of words from a word list + encoded_transcription -- input encoded lexicon + outfile -- output lexicon + ''' + # Write Lexicon File + with codecs.open(outfile, "w", "utf-8") as f: + # First write the non-speech words + try: + for w in nonspeech.iterkeys(): + f.write("%s\t%s\n" % (w, nonspeech[w])) + except AttributeError: + pass + + # Then write extra-speech words + try: + for w in extraspeech.iterkeys(): + f.write("%s\t%s\n" % (w, extraspeech[w])) + except AttributeError: + pass + + # Then write the rest of the words + for idx, w in enumerate(baseforms): + # This is really just for BABEL in case is written as a word + if(w[0].lower() == ""): + f.write("%s\t\n" % (unicode(w[0]))) + else: + f.write("%s\t%s\n" % (unicode(w[0]), + encoded_transcription[idx])) + +if __name__ == "__main__": + main() diff --git a/egs/babel/s5d/local/lexicon/make_word_list.py b/egs/babel/s5d/local/lexicon/make_word_list.py new file mode 100755 index 00000000000..a1ff385a035 --- /dev/null +++ b/egs/babel/s5d/local/lexicon/make_word_list.py @@ -0,0 +1,93 @@ +#!/usr/bin/python + +from __future__ import print_function +import sys +import os +import codecs +import argparse +import unicodedata +import pdb + + +def process_transcripts(transcripts_dir, transcripts_list): + ''' + This looks through each transcript file, and collects the words. + Arguments: transcripts -- file with list of babel training transcripts + ''' + transcripts = os.path.join(transcripts_dir, transcripts_list) + with open(transcripts, "r") as f: + transcript_files = [] + for l in f: + l_path = os.path.join(transcripts_dir, l.strip() + ".txt") + transcript_files.append(l_path) + + word_list = {} + misprons = {} + for i_f, f in enumerate(transcript_files): + print("\rFile ", i_f + 1, "of ", len(transcript_files), end="") + with codecs.open(f, "r", "utf-8") as fp: + for line in fp: + # Don't use the lines with time markers + if not line.startswith("["): + words = line.strip().split(" ") + for w in words: + if (not w.startswith("<") and not + w.startswith("(") and not + w.endswith("-") and not w.startswith("-")): + # Get rid of mispronunciation markings + if (not w.startswith("*") and not + w.endswith("*") and + w != "~"): + try: + word_list[w] += 1 + except KeyError: + word_list[w] = 1 + else: + w = w.replace("*", "") + if(w != "~"): + try: + misprons[w] += 1 + except KeyError: + misprons[w] = 1 + + word_list = sorted(word_list.items(), key=lambda x: x[0]) + misprons = sorted(misprons.items(), key=lambda x: x[0]) + print("") + + return word_list, misprons + + +def main(): + if len(sys.argv[1:]) == 0: + print("Usage: ./make_word_list.py" + " ") + sys.exit(1) + + parser = argparse.ArgumentParser() + parser.add_argument("transcripts_list", help="Path to list of training " + "transcripts") + parser.add_argument("transcripts_dir", help="Path to the training " + "transcripts directory") + parser.add_argument("word_list", help="Path to the generated word list" + " of training words") + parser.add_argument("--misprons", help="Path to the generated word list" + " of mispronounced words", + action="store", default=None) + args = parser.parse_args() + + # Collect words + words, misprons = process_transcripts(args.transcripts_dir, + args.transcripts_list) + + # Print the word list + with codecs.open(args.word_list, "w", "utf-8") as f: + for word, count in words: + f.write("%d %s\n" % (count, unicode(word))) + + if args.misprons is not None: + with codecs.open(args.misprons, "w", "utf-8") as f: + for word, count in misprons: + f.write("%d %s\n" % (count, word)) + +if __name__ == "__main__": + main() diff --git a/egs/babel/s5d/local/lonestar.py b/egs/babel/s5d/local/lonestar.py new file mode 100755 index 00000000000..e1594e55ada --- /dev/null +++ b/egs/babel/s5d/local/lonestar.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python +from pylauncher import * +import pylauncher +import sys + +import os +import errno + +def make_path(path): + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + elif not os.path.isdir(path): + raise + +def tail(n, filename): + import subprocess + p=subprocess.Popen(['tail','-n',str(n),filename], stdout=subprocess.PIPE) + soutput,sinput=p.communicate() + soutput=soutput.split("\n") + return soutput + +def KaldiLauncher(lo, **kwargs): + import time; + jobid = JobId() + debug = kwargs.pop("debug","") + qdir= os.path.join(lo.qdir, lo.taskname); + cores = lo.nof_threads; + + ce=SSHExecutor(workdir=qdir, debug=debug, force_workdir=True, catch_output=True) + ce.outstring="out." + ce.execstring=lo.taskname + "." + + hostpool=HostPool(hostlist=HostListByName(), commandexecutor=ce ) + + completion=lambda x:FileCompletion( taskid=x, stamproot="done.", stampdir=qdir) + + logfiles = list() + commands = list() + for q in xrange(lo.jobstart, lo.jobend+1): + s = "bash " + lo.queue_scriptfile + " " + str(q) + commands.append(s) + + logfile = lo.logfile.replace("${PY_LAUNCHER_ID}", str(q)) + logfiles.append(logfile) + + generator=ListCommandlineGenerator(list=commands, cores=cores) + tasks = TaskGenerator(generator, completion=completion, debug=debug ) + + job = LauncherJob( hostpool=hostpool, taskgenerator=tasks, debug=debug,**kwargs) + + job.run() + #At this point all the .done files should exist and everything should be finalized. + num_failed=0; + time.sleep(1); #Lets wait for a while to give the shared fs time to sync + error_pending=True + for logfile in logfiles: + import time + sched_rate=[0, 0.5, 1, 2, 4, 8, 15, 32 ]; + for delay in sched_rate: + time.sleep(delay); + if os.path.isfile(logfile): + break; + if not os.path.isfile(logfile): + sys.stderr.write("ERROR: " + "The following file is missing:\n") + sys.stderr.write("ERROR: " + "\t" + logfile + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + + error_pending=True; + for delay in sched_rate: + time.sleep(delay); + + lines=tail(10, logfile) + with_status=filter(lambda x:re.search(r'with status (\d+)', x), lines) + + if len(with_status) == 0: + sys.stderr.write("The last line(s) of the log-file " + logfile + " does not seem" + " to indicate return status as expected\n"); + elif len(with_status) > 1: + sys.stderr.write("The last line(s) of the log-file " + logfile + " does seem" + " to indicate multiple return statuses \n"); + else: + status_re=re.search(r'with status (\d+)', with_status[0]); + status=status_re.group(1); + if status == '0': + error_pending=False; + break; + sys.stderr.write("INFO: Waiting for status in files, sleeping %d seconds\n" % (delay,)) + if error_pending: + num_failed+=1; + + if num_failed != 0: + sys.stderr.write(sys.argv[0] + ": " + str(num_failed) + "/" + str(len(logfiles)) + " failed \n"); + sys.stderr.write(sys.argv[0] + ": See " + lo.logfile.replace("${PY_LAUNCHER_ID}", "*" ) + " for details\n"); + sys.exit(-1); + + #Remove service files. Be careful not to remove something that might be needed in problem diagnostics + for i in xrange(len(commands)): + out_file=os.path.join(qdir, ce.outstring+str(i)) + + #First, let's wait on files missing (it might be that those are missing + #just because of slow shared filesystem synchronization + if not os.path.isfile(out_file): + import time + sched_rate=[0.5, 1, 2, 4, 8 ]; + for delay in sched_rate: + time.sleep(delay); + if os.path.isfile(out_file): + break; + if not os.path.isfile(out_file): + sys.stderr.write("ERROR: " + "The following file is missing:\n") + sys.stderr.write("ERROR: " + "\t" + out_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + + if os.stat(out_file).st_size != 0: + sys.stderr.write("ERROR: " + "The following file has non-zero size:\n") + sys.stderr.write("ERROR: " + "\t" + out_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + else: + exec_file=os.path.join(qdir, ce.execstring+str(i)) + done_file=os.path.join(qdir, "done."+str(i)) + if (not os.path.isfile(exec_file) ) or (not os.path.isfile(done_file)): + sys.stderr.write("ERROR: " + "One of the following files is missing:\n") + sys.stderr.write("ERROR: " + "\t" + exec_file + "\n") + sys.stderr.write("ERROR: " + "\t" + done_file + "\n") + sys.stderr.write("ERROR: " + "\t" + out_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + elif os.stat(done_file).st_size != 0: + sys.stderr.write("ERROR: " + "The following file has non-zero size:\n") + sys.stderr.write("ERROR: " + "\t" + done_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + else: + os.remove(exec_file) + os.remove(done_file) + os.remove(out_file) + try: + os.rmdir(qdir) + except OSError: + sys.stderr.write("ERROR: " + "Failed to remove the pylauncher task dir " + qdir + "\n"); + sys.stderr.write("ERROR: " + "Find out what is wrong and fix it\n") + sys.exit(-1); + + #print job.final_report() + +class LauncherOpts: + def __init__(self): + self.sync=0 + self.nof_threads = 1 + self.qsub_opts = None + + self.jobname=None + self.jobstart=None + self.jobend=None + pass + +def CmdLineParser(argv): + import re; + sync=0 + qsub_opts='' + nof_threads=1 + + while len(argv) >= 2 and argv[0].startswith('-'): + switch = argv.pop(0); + + if switch == '-V': + qsub_opts += switch + ' '; + else: + option = argv.pop(0) + + if switch == "-sync" and (option in ['Y', 'y']): + sync=1; + qsub_opts += switch + ' ' + option + ' '; + if switch == "-pe": + option2 = argv.pop(0); + qsub_opts += option2 + ' '; + nof_threads = int(option2); + + #Now we have to parse the JOB specifier + jobname = "" + jobstart = 0 + jobend = 0 + if (re.match( r"^[A-Za-z_]\w*=\d+:\d+$", argv[0])): + m=re.match( r"^([A-Za-z_]\w*)=(\d+):(\d+)$", argv[0]) + jobname=m.group(1) + jobstart=int(m.group(2)) + jobend=int(m.group(3)) + argv.pop(0) + elif(re.match( r"^[A-Za-z_]\w*=\d+$", argv[0])): + m=re.match( r"^([A-Za-z_]\w*)=(\d+)$", argv[0]) + jobname=m.group(1) + jobstart=int(m.group(2)) + jobend=int(m.group(2)) + argv.pop(0) + elif re.match("^.+=.*:.*$", argv[0]): + print >> sys.stderr, "warning: suspicious JOB argument " + argv[0]; + + if jobstart > jobend: + sys.stderr.write("lonestar.py: JOBSTART("+ str(jobstart) + ") must be lower than JOBEND(" + str(jobend) + ")\n") + sys.exit(1) + + logfile=argv.pop(0) + + opts=LauncherOpts() + opts.sync = sync + opts.nof_threads=nof_threads; + opts.qsub_opts=qsub_opts + opts.varname=jobname + opts.jobstart=jobstart + opts.jobend=jobend + opts.logfile=logfile + + opts.cmd = escape_cmd(argv); + + return (opts, argv) + +def escape_cmd(argv): + cmd ="" + for x in argv: + #print x + " -> ", + if re.search("^\S+$", x): + #print " A -> ", + cmd += x + " " + elif '"' in x: + cmd += "'''" + x + "''' " + else: + cmd += "\"" + x + "\" " + #print cmd + return cmd + +def setup_paths_and_vars(opts): + cwd = os.getcwd() + + if opts.varname and (opts.varname not in opts.logfile ) and (opts.jobstart != opts.jobend): + print >>sys.stderr, "lonestar.py: you are trying to run a parallel job" \ + "but you are putting the output into just one log file (" + opts.logfile + ")"; + sys.exit(1) + + if not os.path.isabs(opts.logfile): + opts.logfile = os.path.join(cwd, opts.logfile); + logfile=opts.logfile + + dir = os.path.dirname(logfile) + base = os.path.basename(logfile) + qdir = os.path.join(dir, "q"); + + if re.search("log/*q", qdir, flags=re.IGNORECASE): + qdir = re.sub("log/*q", "/q", qdir, flags=re.IGNORECASE) + + + queue_logfile= os.path.join(qdir, base) + if opts.varname: + queue_logfile = re.sub("\.?"+opts.varname, "", queue_logfile) + + taskname=os.path.basename(queue_logfile) + taskname = taskname.replace(".log", ""); + if taskname == "": + print >> sys.stderr, "lonestar.py: you specified the log file name in such form " \ + "that leads to an empty task name ("+logfile + ")"; + sys.exit(1) + + if not os.path.isabs(queue_logfile): + queue_logfile= os.path.join(cwd, queue_logfile) + + if opts.varname: + opts.logfile = opts.logfile.replace(opts.varname, "${PY_LAUNCHER_ID}") + opts.cmd = opts.cmd.replace(opts.varname, "${PY_LAUNCHER_ID}"); + + queue_scriptfile=queue_logfile; + if re.search("\.[a-zA-Z]{1,5}$", queue_scriptfile): + queue_scriptfile = re.sub("\.[a-zA-Z]{1,5}$", ".sh", queue_scriptfile); + if not os.path.isabs(queue_scriptfile): + queue_scriptfile= os.path.join(cwd, queue_scriptfile) + + + make_path(qdir) + make_path(dir) + + opts.qdir = qdir + opts.log_dir = dir + opts.queue_scriptfile = queue_scriptfile + opts.queue_logfile = queue_logfile + opts.taskname = taskname + + return opts + + + +def create_scriptfile(scriptname, opts): + import os + logfile = opts.logfile + cmd = opts.cmd + nof_threads=opts.nof_threads; + cwd = os.getcwd() + #print scriptname + f = open(scriptname, "wb") + f.write("#!/bin/bash\n") + f.write("export PY_LAUNCHER_ID=$1; shift;\n") + f.write("cd " + cwd + "\n") + f.write(". ./path.sh\n") + f.write("( echo '#' Running on `hostname`\n") + f.write(" echo '#' Started at `date`\n") + f.write(" echo -n '# '; cat < " +logfile + "\n") + f.write("time1=`date +\"%s\"`\n") + f.write("( " + cmd + ") 2>>" + logfile + " >>" + logfile + " \n") + f.write("ret=$?\n") + f.write("time2=`date +\"%s\"`\n") + f.write("echo '#' Accounting time=$(($time2 - $time1)) threads=" + str(nof_threads) + " >> " + logfile + "\n") + + f.write("echo '#' Finished at `date` with status $ret >>" + logfile + "\n") + f.write("exit $ret \n") + f.close() + + + +if __name__ == "__main__": + (opts, cmd) = CmdLineParser(sys.argv[1:]); + setup_paths_and_vars(opts) + create_scriptfile(opts.queue_scriptfile, opts); + + #pylauncher.ClassicLauncher(["true && sleep 10s", "false || sleep 1s" ], debug="job+host+task+exec+ssh") + KaldiLauncher(opts, debug="") + + diff --git a/egs/babel/s5d/local/make_L_align.sh b/egs/babel/s5d/local/make_L_align.sh new file mode 100755 index 00000000000..50e46a00493 --- /dev/null +++ b/egs/babel/s5d/local/make_L_align.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Guoguo Chen, Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +set -o pipefail +set -e + +if [ $# -ne 3 ]; then + echo "This is a simple script that will generate the L_align.fst" + echo "The FST L_align.fst is used for getting the force-aligned " + echo "utterances" + echo "The script automaticky recognizes the probabilistic lexicon" + echo "is used and will use the correct file" + echo "" + echo "usage: local/L_align.sh " + echo "e.g.: local/L_align.sh data/local/lang data/lang data/lang" + exit 1; +fi + +tmpdir=$1 +dir=$2 +outdir=$3 + +silphone=`cat $dir/phones/optional_silence.txt` || exit 1; + +# Create lexicon with alignment info +if [ -f $tmpdir/lexicon.txt ] ; then + cat $tmpdir/lexicon.txt | \ + awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' +elif [ -f $tmpdir/lexiconp.txt ] ; then + cat $tmpdir/lexiconp.txt | \ + awk '{printf("%s #1 ", $1); for (n=3; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' +else + echo "Neither $tmpdir/lexicon.txt nor $tmpdir/lexiconp.txt does not exist" + exit 1 +fi | utils/make_lexicon_fst.pl - 0.5 $silphone | \ +fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \ + --keep_isymbols=false --keep_osymbols=false | \ +fstarcsort --sort_type=olabel > $outdir/L_align.fst + +exit 0; diff --git a/egs/babel/s5d/local/make_corpus_subset.sh b/egs/babel/s5d/local/make_corpus_subset.sh new file mode 100755 index 00000000000..12925830268 --- /dev/null +++ b/egs/babel/s5d/local/make_corpus_subset.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +#Begin configuration +ignore_missing_txt=false #If the reference transcript txt is missing, \ + #shall we ignore it or treat it as a fatal error? +#End configuration +echo "$0 $@" # Print the command line for logging + +help_message="$0: create subset of the input directory (specified as the first directory). + The subset is specified by the second parameter. + The directory in which the subset should be created is the third parameter + Example: + $0 " + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [[ "$#" -lt "3" ]] ; then + echo -e "FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +output_data_dir=${@: -1} # last argument to the script +sources=( $@ ) +unset sources[${#sources[@]}-1] # 'pop' the last argument which is odir +num_src=${#sources[@]} # number of systems to combine + +if [ $(( $num_src % 2 )) -ne 0 ]; then + echo -e "FATAL: wrong number of script parameters!" + echo -e " : The input directories are not in pairs!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +mkdir -p $output_data_dir/transcription +mkdir -p $output_data_dir/audio + +num_warns_def=3; + +rm -f $output_data_dir/filelist.list + +for i in `seq 0 $(( $num_src / 2 - 1))` ; do + num_warns=$num_warns_def; + input_data_dir=${sources[ $[2 * $i] ]} + input_data_list=${sources[ $((2 * $i + 1)) ]} + + abs_src_dir=`readlink -f $input_data_dir` + abs_tgt_dir=`readlink -f $output_data_dir` + + if [[ ! -d "$input_data_dir" ]] ; then + echo "FATAL: input data directory does not exist"; + exit 1; + fi + if [[ ! -f "$input_data_list" ]] ; then + echo "FATAL: input data list file does not exist!"; + exit 1; + fi + + idl=`basename $input_data_list` + echo "Making subsets from $input_data_dir according to $idl" + + for file_basename in `cat $input_data_list`; do + if [[ -e $abs_src_dir/audio/$file_basename.sph ]] ; then + ln -sf $abs_src_dir/audio/$file_basename.sph $abs_tgt_dir/audio || exit 1 + else + if [[ -e $abs_src_dir/audio/$file_basename.wav ]] ; then + ln -sf $abs_src_dir/audio/$file_basename.wav $abs_tgt_dir/audio || exit 1 + else + echo "File $abs_src_dir/audio/$file_basename.sph|wav does not exist!" >&2 + exit 1 + fi + fi + + if [[ -e $abs_src_dir/transcription/$file_basename.txt ]] ; then + ln -sf $abs_src_dir/transcription/$file_basename.txt $abs_tgt_dir/transcription || exit 1 + else + if ! $ignore_missing_txt ; then + echo "File $abs_src_dir/transcription/$file_basename.txt does not exist!" + exit 1; + elif [ $num_warns -gt 0 ]; then + echo "WARNING: File $file_basename.txt does not exist!" + num_warns=$(($num_warns - 1)) + elif [ $num_warns -eq 0 ]; then + echo "Not warning anymore" + num_warns=$(($num_warns - 1)) + fi + fi + done + cat $input_data_list >> $output_data_dir/filelist.list +done + + diff --git a/egs/babel/s5d/local/make_ecf_subset.sh b/egs/babel/s5d/local/make_ecf_subset.sh new file mode 100755 index 00000000000..9bdd95c3e27 --- /dev/null +++ b/egs/babel/s5d/local/make_ecf_subset.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +echo "$0 $@" 1>&2 # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +help_message="$0: generates an subset ecf file for spoken term detection evaluation. + The first parameter specifies the descriptor of the subset, + the second parameter specifies the original ecf file. + The file will be generated in the kws subdirectory of the directory + given as a third parameter and will be named ecf.xml + Output goes to stdout. + Usage: + $0 " + + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [[ "$#" -ne "2" ]] ; then + echo -e "FATAL: wrong number of script parameters!\n\n" 1>&2 + printf "$help_message\n\n" 1>&2 + exit 1; +fi + +list_file=$1 +src_ecf_file=$2 + +if [[ ! -f "$list_file" ]]; then + echo -e "FATAL: The list file does not exist! \n\n" 1>&2 + printf "$help_message\n" 1>&2 + exit 1; +fi +if [[ ! -f "$src_ecf_file" ]]; then + echo -e "FATAL: The source ecf file does not exist! \n\n" 1>&2 + printf "$help_message\n" 1>&2 + exit -1 +fi + + +duration=`grep -F -f $list_file $src_ecf_file | sed "s/.*dur=\"\([0-9.][0-9.]*\).*/\1 /g" | awk '{x += $1;} END{print x;}'` + +# Output is produced here: +( + grep "" +) diff --git a/egs/babel/s5d/local/make_lexicon_fst_special.pl b/egs/babel/s5d/local/make_lexicon_fst_special.pl new file mode 100755 index 00000000000..3df6e7a9527 --- /dev/null +++ b/egs/babel/s5d/local/make_lexicon_fst_special.pl @@ -0,0 +1,53 @@ +#!/usr/bin/env perl +use warnings; #sed replacement for -w perl parameter +# Copyright 2012 Johns Hopkins University (author: Daniel Povey) + +# makes lexicon FST -- special version only for use in keyword search +# for allowing optional silences between words. This version has +# no pron-probs involved, and +# does support an optional silence, but this silence is only allowed +# between words (where it may occur an arbitrary number of times), +# not at the beginning or end of the file. + +if(@ARGV != 2) { + die "Usage: make_lexicon_fst_special.pl lexicon.txt silphone >lexiconfst.txt" +} + +$lexfn = shift @ARGV; +$silphone = shift @ARGV; + +open(L, "<$lexfn") || die "Error opening lexicon $lexfn"; + + +$startstate = 0; +$silstate = 1; +$endstate = 2; +$nextstate = 3; + +sub create_wseq { + my $init_state = shift @_; + my $end_state = shift @_; + my $word_or_eps = shift @_; + my @phones = @_; + if (@phones == 0) { push @phones, ""; } + my $x; + my $curstate = $init_state; + for ($x = 0; $x + 1 < @phones; $x++) { + print "$curstate\t$nextstate\t$phones[$x]\t$word_or_eps\n"; + $word_or_eps = ""; + $curstate = $nextstate; + $nextstate++; + } + print "$curstate\t$end_state\t$phones[$x]\t$word_or_eps\n"; +} + + +while() { + @A = split(" ", $_); + $w = shift @A; + create_wseq($startstate, $endstate, $w, @A); + create_wseq($endstate, $endstate, $w, @A); + create_wseq($silstate, $endstate, $w, @A); +} +print "$endstate\t$silstate\t$silphone\t\n"; +print "$endstate\t0\n"; # final-cost. diff --git a/egs/babel/s5d/local/make_lexicon_subset.sh b/egs/babel/s5d/local/make_lexicon_subset.sh new file mode 100755 index 00000000000..1e77fcaa2b9 --- /dev/null +++ b/egs/babel/s5d/local/make_lexicon_subset.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +transcriptions=$1 +input_lexicon_file=$2 +output_lexicon_file=$3 + +( + #find $dev_data_dir/transcription/ -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' + find $transcriptions -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' +) | sort -u | awk ' + BEGIN { + while(( getline line< ARGV[2] ) > 0 ) { + split(line, e, "\t") + LEXICON[ e[1] ]=line + } + FILENAME="-" + i=0 + + while(( getline word< ARGV[1] ) > 0 ) { + if (word in LEXICON) + print LEXICON[word] + } + } +' - $input_lexicon_file | sort -u > $output_lexicon_file + diff --git a/egs/babel/s5d/local/make_wordlist.sh b/egs/babel/s5d/local/make_wordlist.sh new file mode 100644 index 00000000000..ef589b917cb --- /dev/null +++ b/egs/babel/s5d/local/make_wordlist.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +transcriptions=$1 +wordlist=$2 + +( + find $transcriptions -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' +) | sort -u | grep -v -E '.*\*.*|<.*>|\(\(\)\)|^-.*|.*-$' > $wordlist + diff --git a/egs/babel/s5d/local/map_lang.sh b/egs/babel/s5d/local/map_lang.sh new file mode 100755 index 00000000000..998a11d0cd0 --- /dev/null +++ b/egs/babel/s5d/local/map_lang.sh @@ -0,0 +1,81 @@ +#! /usr/bin/bash + +VARIABLES=`diff <(compgen -A variable) <(. ./lang.conf.orig; compgen -A variable) | grep '^>'| sed 's/^> *//g'` + +. ./conf/common_vars.sh +. ./lang.conf.orig + +for variable in $VARIABLES ; do + + eval VAL=\$${variable} + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $variable=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + #declare -x $variable + declare -p $variable + fi +done + +for kwlist in $( (compgen -A variable) | grep _data_list ) ; do + declare -p $kwlist + eval KEYS="\${!${kwlist}[@]}" + #declare -p my_more_kwlist_keys + for key in $KEYS # make sure you include the quotes there + do + #echo $key + eval VAL="\${${kwlist}[$key]}" + #echo $my_more_kwlist_val + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + fi + done + declare -p $kwlist +done +unset VAL +unset KEYS + +for kwlist in $( (compgen -A variable) | grep _data_dir ) ; do + declare -p $kwlist + eval KEYS="\${!${kwlist}[@]}" + #declare -p my_more_kwlist_keys + for key in $KEYS # make sure you include the quotes there + do + #echo $key + eval VAL="\${${kwlist}[$key]}" + #echo $my_more_kwlist_val + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + fi + done + declare -p $kwlist +done +unset VAL +unset KEYS + +for kwlist in $( (compgen -A variable) | grep _more_kwlists ) ; do + declare -p $kwlist + eval KEYS="\${!${kwlist}[@]}" + #declare -p my_more_kwlist_keys + for key in $KEYS # make sure you include the quotes there + do + #echo $key + eval VAL="\${${kwlist}[$key]}" + #echo $my_more_kwlist_val + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + fi + done + declare -p $kwlist +done +unset VAL +unset KEYS + +if [ "$babel_type" == "limited" ] ; then + train_nj=32 +else + train_nj=64 +fi +dev10h_nj=60 +unsup_nj=120 +shadow_nj=60 +shadow2_nj=120 +eval_nj=120 diff --git a/egs/babel/s5d/local/naive_comb.pl b/egs/babel/s5d/local/naive_comb.pl new file mode 100755 index 00000000000..74ad20d84e3 --- /dev/null +++ b/egs/babel/s5d/local/naive_comb.pl @@ -0,0 +1,234 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; +use XML::Simple; +use Data::Dumper; +use File::Basename; + +my $tolerance = 0.5; + +sub ReadKwslist { + my $kwslist_in = shift @_; + + my $source = "STDIN"; + if ($kwslist_in ne "-") { + open(I, "<$kwslist_in") || die "Fail to open kwslist $kwslist_in.\n"; + $source = "I"; + } + + # Read in the kwslist and parse it. Note that this is a naive parse -- I simply + # assume that the kwslist is "properly" generated + my @KWS; + my (@info, $kwid, $tbeg, $dur, $file, $score, $channel); + my ($kwlist_filename, $language, $system_id) = ("", "", ""); + while (<$source>) { + chomp; + + if (/[0]\" language=\"$info->[1]\" system_id=\"$info->[2]\">\n"; + my $prev_kw = ""; + foreach my $kwentry (@{$KWS}) { + if ($prev_kw ne $kwentry->[0]) { + if ($prev_kw ne "") {$kwslist .= " \n";} + $kwslist .= " [0]\" oov_count=\"0\">\n"; + $prev_kw = $kwentry->[0]; + } + $kwslist .= " [1]\" channel=\"$kwentry->[2]\" tbeg=\"$kwentry->[3]\" dur=\"$kwentry->[4]\" score=\"$kwentry->[5]\" decision=\"$kwentry->[6]\""; + if (defined($kwentry->[7])) {$kwslist .= " threshold=\"$kwentry->[7]\"";} + if (defined($kwentry->[8])) {$kwslist .= " raw_score=\"$kwentry->[8]\"";} + $kwslist .= "/>\n"; + } + $kwslist .= " \n"; + $kwslist .= "\n"; + + return $kwslist; +} + +sub KwslistTimeCompare { + my ($a, $b) = @_; + + if ($a->[0] eq $b->[0]) { + if ($a->[1] eq $b->[1]) { + if (abs($a->[3]-$b->[3]) <= $tolerance) { + if (abs($a->[3]+$a->[4]-$b->[3]-$b->[4]) <= $tolerance) { + return 0; + } else { + return ($a->[3]+$a->[4]) <=> ($b->[3]+$b->[4]); + } + } else { + return $a->[3] <=> $b->[3]; + } + } else { + return $a->[1] cmp $b->[1]; + } + } else { + $a->[0] cmp $b->[0]; + } +} + +sub KwslistTimeSort { + return KwslistTimeCompare($a, $b); +} + +my $Usage = < w2 ... + e.g.: naive_comb.pl 0.5 kwslist1.xml 0.5 kwslist2.xml ... kwslist_comb.xml + +Allowed options: + --method : Use different combination method (int, default = 1) + 1 -- Weighted sum + 2 -- Weighted "powered" + --power : The power of method 2 (float, default = 0.5) + --tolerance : Tolerance for being the same hits (float, default = 0.5) + +EOU + +my $method = 1; +my $power = 0.5; +GetOptions('tolerance=f' => \$tolerance, + 'method=i' => \$method, + 'power=f' => \$power, + 'inv-power=f' => sub { (my $opt, my $val) = @_; $power = 1.0/$val;}); + +@ARGV >= 3 || die $Usage; + +# Workout the input/output source +@ARGV % 2 == 1 || die "Bad number of (weight, kwslist) pair.\n"; +my @kwslist_file = (); +my @weight = (); +while (@ARGV != 1) { + my $w = shift @ARGV; + $w =~ m/^[0-9.]*$/ || die "Bad weight: $w.\n"; + push(@weight, $w); + push(@kwslist_file, shift @ARGV); +} +my $output = shift @ARGV; + +# Open the first kwslist +my ($info, $KWS) = @{ReadKwslist($kwslist_file[0])}; + +# Open the rest kwslists +my @kwslist = (); +for (my $i = 1; $i < @kwslist_file; $i ++) { + push(@kwslist, @{ReadKwslist($kwslist_file[$i])}[1]); +} + +# Process the first kwslist +my @KWS = sort KwslistTimeSort @{$KWS}; +my $w = shift @weight; +foreach my $kwentry (@$KWS) { + if ($method == 1) { + $kwentry->[5] = $kwentry->[5] * $w; + } elsif ($method == 2) { + $kwentry->[5] = ($kwentry->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } +} + +# Start merging the rest kwslists +while (@kwslist > 0) { + my $w = shift @weight; + my @kws = sort KwslistTimeSort @{shift @kwslist}; + + # We'll take time information from the first system + my ($i, $j) = (0, 0); + my @from_kws; + while ($i < @KWS and $j < @kws) { + my $cmp = KwslistTimeCompare($KWS[$i], $kws[$j]); + if ($cmp == 0) { + if ($method == 1) { + $KWS[$i]->[5] += $kws[$j]->[5] * $w; + } elsif ($method == 2) { + $KWS[$i]->[5] += ($kws[$j]->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } + $i ++; + $j ++; + } elsif ($cmp == -1) { + $i ++; + } else { + if ($method == 1) { + $kws[$j]->[5] = $kws[$j]->[5] * $w; + } elsif ($method == 2) { + $kws[$j]->[5] = ($kws[$j]->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } + push(@from_kws, $kws[$j]); + $j ++; + } + } + while ($j < @kws) { + if ($method == 1) { + $kws[$j]->[5] = $kws[$j]->[5] * $w; + } elsif ($method == 2) { + $kws[$j]->[5] = ($kws[$j]->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } + push(@from_kws, $kws[$j]); + $j ++; + } + + # Sort again + @from_kws = (@KWS, @from_kws); + @KWS = sort KwslistTimeSort @from_kws; +} + +if ($method == 2) { + foreach my $kwentry (@KWS) { + $kwentry->[5] = $kwentry->[5]**(1.0/$power); + } +} + +# Sorting and pringting +my $kwslist = PrintKwslist(\@{$info}, \@KWS); + +if ($output eq "-") { + print $kwslist; +} else { + open(O, ">$output") || die "Fail to open output file: $output\n"; + print O $kwslist; + close(O); +} diff --git a/egs/babel/s5d/local/nist_eval/create_compound_set.sh b/egs/babel/s5d/local/nist_eval/create_compound_set.sh new file mode 100755 index 00000000000..737f7a0fcd9 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/create_compound_set.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +#Simple script to create compound set info that will allow for more automatized +#work with the shadow set. +# +#The notion of shadow data set came from the need to be able to verify +#the output of the recognizer during decoding the evaluation data. +#The idea is simple -- instead of decoding just the eval data, decode both +#eval data plus the dev data (or at least some portion of it) interleved +#randomly +#After decoding, we can isolate (split) the output from the decoding (and kws) +#so that we can score the dev data subset and if the score is identical to +#the score obtained by decoding the dev set previously, we can be little bit +#more sure that the eval set results are correct. + +. ./path.sh + +flen=0.01 + +[ ! -f lang.conf ] && echo "File lang.conf must exist (and contain a valid config)" +. ./lang.conf + +devset=dev10h.pem +evlset=eval.seg +tgtset=shadow.seg +tgtdir= + +. utils/parse_options.sh +[ -z $tgtdir ] && tgtdir=data/$tgtset + +devset_basename=${devset%%.*} +devset_segments=${devset#*.} + +evlset_basename=${evlset%%.*} +evlset_segments=${evlset#*.} + +eval devset_flist=\$${devset_basename}_data_list +eval devset_ecf=\$${devset_basename}_ecf_file +eval devset_rttm=\$${devset_basename}_rttm_file +eval devset_stm=\$${devset_basename}_stm_file + +eval evlset_flist=\$${evlset_basename}_data_list +eval evlset_ecf=\$${evlset_basename}_ecf_file +eval evlset_rttm=\$${evlset_basename}_rttm_file +eval evlset_stm=\$${evlset_basename}_stm_file + +rm -rf $tgtdir/compounds +mkdir -p $tgtdir/compounds +mkdir -p $tgtdir/compounds/$devset +mkdir -p $tgtdir/compounds/$evlset + +echo "Creating compound $tgtdir/compounds/$devset" +( + echo "DEVSET file list: $devset_flist" + cat `readlink -f $devset_flist` > $tgtdir/compounds/$devset/files.list + echo "DEVSET ECF file : $devset_ecf" + cat `readlink -f $devset_ecf` > $tgtdir/compounds/$devset/ecf.xml + echo "DEVSET RTTM file: $devset_rttm" + cat `readlink -f $devset_rttm` > $tgtdir/compounds/$devset/rttm + echo "DEVSET STM file : $devset_stm" + cat `readlink -f $devset_stm` | sed 's/ 1 / A /g' > $tgtdir/compounds/$devset/stm + + cat $tgtdir/segments | grep -w -F -f $tgtdir/compounds/$devset/files.list > $tgtdir/compounds/$devset/segments + awk '{print $1}' $tgtdir/compounds/$devset/segments > $tgtdir/compounds/$devset/utterances + + for kwset_path in $tgtdir/kwset_*; do + kwset=`basename $kwset_path` + output=$tgtdir/compounds/$devset/$kwset + + mkdir -p $output/tmp + cp $tgtdir/$kwset/kwlist.xml $output/ + cp $tgtdir/$kwset/utt.map $output/ + cp $tgtdir/compounds/$devset/ecf.xml $output/ + cp $tgtdir/compounds/$devset/rttm $output/ + local/search/rttm_to_hitlists.sh --segments $tgtdir/segments \ + --utt-table $tgtdir/$kwset/utt.map $tgtdir/compounds/$devset/rttm \ + $tgtdir/$kwset/kwlist.xml $tgtdir/compounds/$devset/ecf.xml \ + $output/tmp $output/hitlist 2> $output/hitlist.fails + + n1=`cat $output/hitlist.fails | wc -l` + n2=`awk '{print $13}' $output/hitlist.fails | sort |uniq -c | wc -l` + + echo "INFO: For kwlist $kwset, $n2 KW types won't be found ($n1 tokens in total)" + + duration=$(cat $devset_ecf | perl -ne 'BEGIN{$dur=0;}{next unless $_ =~ /dur\=/; s/.*dur="([^"]*)".*/$1/; $dur+=$_;}END{print $dur/2}') + + echo $duration > $output/trials + echo $flen > $output/frame_length + + echo "Number of trials: `cat $output/trials`" + echo "Frame lengths: `cat $output/frame_length`" + { + cat $tgtdir/$kwset/f4de_attribs | grep kwlist_name + language=$(grep kwlist $tgtdir/$kwset/kwlist.xml | head -n 1 | sed -E 's/.*language="([^"]*)".*/\1/g') + echo "language=$language" + echo "flen=$flen" + } > $output/f4de_attribs + + cp $tgtdir/$kwset/categories $output/ + done +) + +echo "Creating compound $tgtdir/compounds/$evlset" +( + echo "EVLSET file list: $evlset_flist" + cat `readlink -f $evlset_flist` > $tgtdir/compounds/$evlset/files.list + echo "EVLSET ECF file : $evlset_ecf" + cat `readlink -f $evlset_ecf` > $tgtdir/compounds/$evlset/ecf.xml + if [ ! -z "$evlset_rttm" ]; then + echo "EVLSET RTTM file: $evlset_rttm" + cat `readlink -f $evlset_rttm` > $tgtdir/compounds/$evlset/rttm + fi + if [ ! -z "$evlset_stm" ]; then + echo "EVLSET STM file : $evlset_stm" + cat `readlink -f $evlset_stm` | sed 's/ 1 / A /g' > $tgtdir/compounds/$evlset/stm + fi + + cat $tgtdir/segments | \ + grep -w -F -f $tgtdir/compounds/$evlset/files.list > $tgtdir/compounds/$evlset/segments + awk '{print $1}' $tgtdir/compounds/$evlset/segments > $tgtdir/compounds/$evlset/utterances + + for kwset_path in $tgtdir/kwset_*; do + kwset=`basename $kwset_path` + output=$tgtdir/compounds/$evlset/$kwset + + mkdir -p $output/tmp + cp $tgtdir/$kwset/kwlist.xml $output/ + cp $tgtdir/$kwset/utt.map $output/ + cp $tgtdir/compounds/$evlset/ecf.xml $output/ + + if [ -f "$tgtdir/compounds/$evlset/rttm" ]; then + cp $tgtdir/compounds/$evlset/rttm $output/ + local/search/rttm_to_hitlists.sh --segments $tgtdir/segments \ + --utt-table $tgtdir/$kwset/utt.map $tgtdir/compounds/$evlset/rttm \ + $tgtdir/$kwset/kwlist.xml $tgtdir/compounds/$evlset/ecf.xml \ + $output/tmp $output/hitlist 2> $output/hitlist.fails + + n1=`cat $output/hitlist.fails | wc -l` + n2=`awk '{print $13}' $output/hitlist.fails | sort |uniq -c | wc -l` + + echo "INFO: For kwlist $kwset, $n2 KW types won't be found ($n1 tokens in total)" + fi + + duration=$(cat $evlset_ecf | perl -ne 'BEGIN{$dur=0;}{next unless $_ =~ /dur\=/; s/.*dur="([^"]*)".*/$1/; $dur+=$_;}END{print $dur/2}') + + echo $duration > $output/trials + echo $flen > $output/frame_length + + echo "Number of trials: `cat $output/trials`" + echo "Frame lengths: `cat $output/frame_length`" + { + cat $tgtdir/$kwset/f4de_attribs | grep kwlist_name + language=$(grep kwlist $tgtdir/$kwset/kwlist.xml | head -n 1 | sed -E 's/.*language="([^"]*)".*/\1/g') + echo "language=$language" + echo "flen=$flen" + } > $output/f4de_attribs + + cp $tgtdir/$kwset/categories $output/ + done +) + +echo "Compound creation OK." + + diff --git a/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh b/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh new file mode 100755 index 00000000000..2af8dc9e410 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh @@ -0,0 +1,236 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +language="201-haitian" +corpus=/export/babel/data/ +indus=/export/babel/data/scoring/IndusDB +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +corpus=$corpus/$language +lists=./conf/lists/$language/ + +corpusdir=$(find $corpus -maxdepth 1 \( -name "release-current" -o -name "release-current-b" \) -type d) || exit 1 +[ -z "$corpusdir" ] && corpusdir=$(find $corpus -maxdepth 1 -name "*-build" -type d) +[ -z "$corpusdir" ] && echo >&2 "Corpus directory for $language not found!" && exit 1 + +train_dir=$(find $corpusdir -ipath "*/conversational/*" -name "training" -type d) || exit 1 +[ -z "$train_dir" ] && echo >&2 "Corpus directory $corpusdir/*/training/ not found!" && exit 1 + +train_rom_dir=$(find $train_dir -name "transcript_roman" -type d) || exit 1 +echo "# include common settings for fullLP systems." +echo ". conf/common.fullLP || exit 1;" +echo -e "\n" + +echo "#speech corpora files location" +echo "train_data_dir=$train_dir" +if [ -f "$lists/training.list" ] ; then + echo "train_data_list=$lists/training.list" +elif [ -f "$lists/train.FullLP.list" ] ; then + echo "train_data_list=$lists/train.FullLP.list" +else + echo >&2 "Training list $lists/training.list not found" +fi + +echo "train_nj=32" +echo -e "\n" + + +indusid=$(find $corpus -name "IARPA*-build" -type d) +[ -z $indusid ] && indusid=$(find $corpus \( -name "release-current" -o -name "release-current-b" \) -type d) +[ -z $indusid ] && echo >&2 "Didn't find anything that could be used as IndusDB id" && exit 1 + +indusid=$(basename ${indusid}) +indusid=${indusid%%-build} +dataset=dev10h +dev10h_dir=$(find $corpusdir -ipath "*/conversational/*" -name "dev" -type d) || exit 1 +indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) +if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB entry \"$indusid*dev\" not found -- removing the version and retrying" + indusid=${indusid%%-v*} + indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) + if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB entry \"$indusid*dev\" not found -- keeping only the language code and retrying" + indusid=${language%%-*} + indusdev10=$(find $indus/ -maxdepth 1 -name "*${indusid}*dev" -type d) + if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB configuration for the language code $indusid not found" + exit 1 + fi + fi +fi + +if [ -z "$indusdev10" ] ; then + echo "" +else + dev10h_rttm=$(find $indusdev10/ -name "*mitllfa3.rttm" ) + dev10h_ecf=$(find $indusdev10/ -name "*ecf.xml" ) + dev10h_stm=$(find $indusdev10/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indusdev10/ -name "*.kwlist.xml" | sort -V ) + kwlists2=$(find $indusdev10/ -name "*.kwlist?*.xml" | sort -V ) + kwlists="$kwlists1 $kwlists2" + dev10h_kwlists="$kwlists" +fi + +echo "#Radical reduced DEV corpora files location" +echo "dev2h_data_dir=$dev10h_dir" +echo "dev2h_data_list=$lists/dev.2h.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev2h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev2h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev2h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev2h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev2h_kwlists" +fi +echo "dev2h_nj=16" +echo "dev2h_subset_ecf=true" +echo -e "\n" + +echo "#Official DEV corpora files location" +echo "dev10h_data_dir=$dev10h_dir" +echo "dev10h_data_list=$lists/dev.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev10h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev10h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev10h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev10h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev10h_kwlists" +fi +echo "dev10h_nj=32" +echo -e "\n" + +dataset="eval" +eval_dir=$(find $corpus -ipath "*-eval/*/conversational/*" -name "$dataset" -type d) || exit 1 +[ -z "$eval_dir" ] && { eval_dir=$(find $corpusdir -ipath "*/conversational/*" -name "eval" -type d) || exit 1; } +if [ ! -z "$eval_dir" ] ; then + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + eval_ecf=$(find $indus/ -maxdepth 1 -type f -name "*$indusid*${dataset}.ecf.xml" ) + eval_kwlists1=$(find $indus -name "*$indusid*${dataset}.kwlist*.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*$indusid*${dataset}.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlists1 $kwlists2" + else + eval_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + eval_ecf=$(find $indus_set/ -name "*ecf.xml" ) + eval_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + eval_kwlists1=$(find $indus -name "*.kwlist.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlist1 $kwlist2" + fi + echo "#Official EVAL period evaluation data files" + echo "eval_data_dir=$eval_dir" + echo "eval_data_list=$lists/eval.list" + echo "${dataset}_ecf_file=$eval_ecf" + echo "${dataset}_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # ${dataset}_kwlists" + echo "eval_nj=32" + echo -e "\n" + + dataset=evalpart1 + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + echo >&2 "IndusDB entry \"$indusid*$dataset\" not found -- keeping only the language code and retrying" + indusid=${language%%-*} + indus_set=$(find $indus/ -maxdepth 1 -name "*${indusid}*$dataset" -type d) + if [ -z "$indus_set" ] ; then + echo >&2 "IndusDB configuration for the language code $indus_set not found" + fi + fi + if [ ! -z "$indus_set" ] ; then + evalpart1_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + evalpart1_ecf=$(find $indus_set/ -name "*ecf.xml" ) + evalpart1_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indus_set/ -name "*.kwlist.xml" | sort -V) + kwlists2=$(find $indus_set/ -name "*.kwlist?*.xml" | sort -V) + kwlists="$kwlists1 $kwlists2" + + kwlists="$dev10h_kwlists $eval_kwlists $kwlists" + echo "#Official post-EVAL period data files" + echo "${dataset}_data_dir=$eval_dir" + echo "${dataset}_data_list=$lists/${dataset}.list" + echo "${dataset}_rttm_file=$evalpart1_rttm" + echo "${dataset}_ecf_file=$evalpart1_ecf" + echo "${dataset}_stm_file=$evalpart1_stm" + echo "${dataset}_kwlists=(" + declare -A tmp_kwlists; + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + tmp_kwlists[$id]="$list" + done + + indices=$( + for id in "${!tmp_kwlists[@]}"; do + echo $id + done | sort -V | paste -s + ) + for id in $indices; do + echo " [$id]=${tmp_kwlists[$id]}" + done + echo ") # ${dataset}_kwlists" + echo "${dataset}_nj=32" + echo -e "\n" + fi + + dataset=shadow + echo "#Shadow data files" + echo "shadow_data_dir=(" + echo " $dev10h_dir" + echo " $eval_dir" + echo ") # shadow_data_dir" + echo "shadow_data_list=(" + echo " $lists/dev.list" + echo " $lists/eval.list" + echo ") # shadow_data_dir" + echo "shadow_ecf_file=$dev10h_ecf" + echo "shadow_rttm_file=$dev10h_rttm" + echo "shadow_stm_file=$dev10h_stm" + echo "shadow_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # shadow_kwlists" + echo "shadow_nj=32" + echo -e "\n" +fi + +dataset=untranscribed-training +unsup_dir=$(find $corpusdir -ipath "*/conversational/*" -name "$dataset" -type d) || exit 1 +unsup_list=$lists/untranscribed-training.list +[ ! -f $unsup_list ] && echo >&2 "Unsupervised training set not found $unsup_list" +if [ -f $unsup_list ] ; then + echo "#Unsupervised dataset for FullLP condition" + echo "unsup_data_dir=$unsup_dir" + echo "unsup_data_list=$unsup_list" + echo "unsup_nj=32" + echo -e "\n" +else + echo "#Unsupervised training set file ($unsup_list) not found." +fi + +lexicon=$(find $corpusdir -ipath "*/conversational/*" -name "lexicon.txt" -type f) || exit 1 +echo "lexicon_file=$lexicon" + +if [ ! -z "$train_rom_dir" ] ; then + echo "lexiconFlags=\"--romanized --oov \"" +fi +echo -e "\n\n" + + diff --git a/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh new file mode 100755 index 00000000000..2ffb73810e3 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh @@ -0,0 +1,204 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +language="201-haitian" +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +corpus=/export/babel/data/$language +lists=./conf/lists/$language/ +indus=/export/babel/data/scoring/IndusDB + +corpusdir=$(find $corpus -maxdepth 1 -name "*-build" -type d) || exit 1 +[ -z "$corpusdir" ] && "Corpus directory for $language not found!" && exit 1 + +train_dir=$(find $corpusdir -ipath "*/conversational/*" -name "training" -type d) || exit 1 +[ -z "$train_dir" ] && "Corpus directory $corpusdir/*/training/ not found!" && exit 1 + +train_rom_dir=$(find $train_dir -name "transcript_roman" -type d) || exit 1 +echo "# include common settings for fullLP systems." +echo ". conf/common.limitedLP || exit 1;" +echo -e "\n" + +echo "#speech corpora files location" +echo "train_data_dir=$train_dir" +echo "train_data_list=$lists/sub-train.list" +echo "train_nj=32" +echo -e "\n" + + +indusid=$(find $corpus -name "IARPA*-build" -type d) +indusid=$(basename ${indusid}) +indusid=${indusid%%-build} +dataset=dev10h +dev10h_dir=$(find $corpusdir -ipath "*/conversational/*" -name "dev" -type d) || exit 1 +indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) +if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB entry \"$indusid*dev\" not found -- removing the version and retrying" + indusid=${indusid%%-v*} + indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) +fi + +if [ -z "$indusdev10" ] ; then + echo "" +else + dev10h_rttm=$(find $indusdev10/ -name "*mitllfa3.rttm" ) + dev10h_ecf=$(find $indusdev10/ -name "*ecf.xml" ) + dev10h_stm=$(find $indusdev10/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indusdev10/ -name "*.kwlist.xml" | sort -V ) + kwlists2=$(find $indusdev10/ -name "*.kwlist?*.xml" | sort -V ) + kwlists="$kwlists1 $kwlists2" +fi + +echo "#Radical reduced DEV corpora files location" +echo "dev2h_data_dir=$dev10h_dir" +echo "dev2h_data_list=$lists/dev.2h.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev2h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev2h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev2h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev2h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev2h_kwlists" +fi +echo "dev2h_nj=16" +echo "dev2h_subset_ecf=true" +echo -e "\n" + +echo "#Official DEV corpora files location" +echo "dev10h_data_dir=$dev10h_dir" +echo "dev10h_data_list=$lists/dev.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev10h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev10h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev10h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev10h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev10h_kwlists" +fi +echo "dev10h_nj=32" +echo -e "\n" + +dataset="eval" +eval_dir=$(find $corpus -ipath "*-eval/*/conversational/*" -name "$dataset" -type d) || exit 1 +if [ ! -z "$eval_dir" ] ; then + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + eval_ecf=$(find $indus/ -maxdepth 1 -type f -name "*$indusid*${dataset}.ecf.xml" ) + eval_kwlists1=$(find $indus -name "*$indusid*${dataset}.kwlist*.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*$indusid*${dataset}.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlists1 $kwlists2" + else + eval_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + eval_ecf=$(find $indus_set/ -name "*ecf.xml" ) + eval_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + eval_kwlists1=$(find $indus -name "*.kwlist.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlist1 $kwlist2" + fi + echo "#Official EVAL period evaluation data files" + echo "eval_data_dir=$eval_dir" + echo "eval_data_list=$lists/eval.list" + echo "${dataset}_ecf_file=$eval_ecf" + echo "${dataset}_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # ${dataset}_kwlists" + echo "eval_nj=32" + echo -e "\n" + + dataset=evalpart1 + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + echo "" + else + evalpart1_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + evalpart1_ecf=$(find $indus_set/ -name "*ecf.xml" ) + evalpart1_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indus_set/ -name "*.kwlist.xml" | sort -V) + kwlists2=$(find $indus_set/ -name "*.kwlist?*.xml" | sort -V) + kwlists="$kwlists1 $kwlists2" + fi + echo "#Official post-EVAL period data files" + echo "${dataset}_data_dir=$eval_dir" + echo "${dataset}_data_list=$lists/${dataset}.list" + echo "${dataset}_rttm_file=$evalpart1_rttm" + echo "${dataset}_ecf_file=$evalpart1_ecf" + echo "${dataset}_stm_file=$evalpart1_stm" + echo "${dataset}_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # ${dataset}_kwlists" + echo "${dataset}_nj=32" + echo -e "\n" + + + dataset=shadow + echo "#Shadow data files" + echo "shadow_data_dir=(" + echo " $dev10h_dir" + echo " $eval_dir" + echo ") # shadow_data_dir" + echo "shadow_data_list=(" + echo " $lists/dev.list" + echo " $lists/eval.lists" + echo ") # shadow_data_dir" + echo "shadow_ecf_file=$dev10h_ecf" + echo "shadow_rttm_file=$dev10h_rttm" + echo "shadow_stm_file=$dev10h_stm" + echo "shadow_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # shadow_kwlists" + echo "shadow_nj=32" + echo -e "\n" +fi + +dataset=untranscribed-training +unsup_dir=$(find $corpusdir -ipath "*/conversational/*" -name "$dataset" -type d) || exit 1 +unsup_lists=( $lists/untranscribed-training.list $lists/sub-train.untranscribed.list) +unsup_dirs=( $unsup_dir $train_dir ) +echo "#Unsupervised dataset for LimitedLP condition" +echo "unsup_data_list=(" +for list in ${unsup_lists[*]}; do + [ ! -f $list ] && echo "Unsupervised training set not found $list" + echo " $list"; +done +echo ") # unsup_data_list" + +echo "unsup_data_dir=(" +for dir in ${unsup_dirs[*]}; do + [ ! -d $dir ] && echo "Unsupervised training data dir not found $dir" + echo " $dir"; +done +echo ") # unsup_data_dir" + +echo "unsup_nj=32" +echo -e "\n" + +lexicon=$(find $corpusdir -ipath "*/conversational/*" -name "lexicon.sub-train.txt" -type f) || exit 1 +echo "lexicon_file=$lexicon" + +if [ ! -z "$train_rom_dir" ] ; then + echo "lexiconFlags=\"--romanized --oov \"" +fi +echo -e "\n\n" + + diff --git a/egs/babel/s5d/local/nist_eval/export_systems.sh b/egs/babel/s5d/local/nist_eval/export_systems.sh new file mode 100755 index 00000000000..d0af608416c --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/export_systems.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -e +set -o pipefail + +. ./cmd.sh; . ./path.sh; + + +#( +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* +#) & +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/tri6*_nnet*/decode_shadow.uem* +#wait + +( +bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +) & +bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/tri6*_nnet*/decode_shadow.uem +wait + +wait +exit + +bash make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1 --data data/shadow.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp/tri6b_nnet --data data/shadow.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1 --data data/shadow.uem --master dev10h.uem lang.conf ./release + +bash make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1 --extrasys "NEWJHU" --data data/dev10h.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp/tri6b_nnet --extrasys "NEWJHU" --data data/dev10h.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1 --extrasys "NEWJHU" --data data/dev10h.uem --master dev10h.uem lang.conf ./release + + diff --git a/egs/babel/s5d/local/nist_eval/filter_data.sh b/egs/babel/s5d/local/nist_eval/filter_data.sh new file mode 100755 index 00000000000..143102032c2 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/filter_data.sh @@ -0,0 +1,152 @@ +. ./path.sh + +min_lmwt=5 +max_lmwt=25 +cer=0 +nbest=-1 +cmd=run.pl +ntrue_from= +. ./utils/parse_options.sh + +min_lmwt_start=$min_lmwt +max_lmwt_start=$max_lmwt + +datadir=$1; shift +name=$1; shift +. ./lang.conf + +set -e +set -o pipefail + +[ ! -d $datadir/compounds/$name ] && echo "Component called $name does not exist in $datadir/compounds/" && exit 1 +ecf=$datadir/compounds/$name/ecf.xml +cat $ecf | grep -P -o '(?<=audio_filename\=")[^"]*' > $datadir/compounds/$name/files.list +filelist=$datadir/compounds/$name/files.list +[ -f $datadir/compounds/$name/rttm ] && rttm=$datadir/compounds/$name/rttm +[ -f $datadir/compounds/$name/stm ] && stm=$datadir/compounds/$name/stm + +if [ -f $ecf ] ; then + duration=`head -1 $ecf |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2.0;}'` + echo "INFO: Using duration $duration seconds (from ECF)." +else + echo "WARNING: Using default duration. ECF wasn't specified?" + duration=9999 +fi + +inputname=`basename $datadir` +outputname=$name + +while (( "$#" )); do + resultdir=$1;shift + echo "Processing data directory $resultdir" + + [ ! -d $resultdir ] && echo "Decode dir $resultdir does not exist!" && exit 1; + + targetdir=$resultdir/$outputname + + + min_existing= + max_existing= + for lmw in `seq $min_lmwt_start $max_lmwt_start`; do + [ -d $resultdir/score_$lmw ] && [ -z $min_existing ] && min_existing=$lmw + [ -d $resultdir/score_$lmw ] && [ ! -z $min_existing ] && max_existing=$lmw + done + if [ -z $min_existing ] || [ -z $max_existing ] ; then + for lmw in `seq $min_lmwt_start $max_lmwt_start`; do + [ -d $resultdir/kwset_kwlist_$lmw ] && [ -z $min_existing ] && min_existing=$lmw + [ -d $resultdir/kwset_kwlist_$lmw ] && [ ! -z $min_existing ] && max_existing=$lmw + done + fi + [ -z $min_existing ] && echo "Data directories to be scored could not be found!" && exit 1 + [ -z $max_existing ] && echo "Data directories to be scored could not be found!" && exit 1 + min_lmwt=$min_existing + max_lmwt=$max_existing + echo "Found data directories for range LMWT=$min_lmwt:$max_lmwt" + + if [ -d $resultdir/score_${min_lmwt} ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $targetdir/scoring/filter.LMWT.log \ + set -e';' set -o pipefail';' \ + mkdir -p $targetdir/score_LMWT/';'\ + test -f $resultdir/score_LMWT/$inputname.ctm '&&' \ + utils/filter_scp.pl $filelist $resultdir/score_LMWT/$inputname.ctm '>' \ + $targetdir/score_LMWT/$outputname.ctm || exit 1 + + if [ ! -z $stm ] && [ -f $stm ] ; then + echo "For scoring CTMs, this STM is used $stm" + local/score_stm.sh --min-lmwt $min_lmwt --max-lmwt $max_lmwt --cer $cer --cmd "$cmd" $datadir/compounds/$name data/lang $targetdir + else + echo "Not running scoring, $datadir/compounds/$name/stm does not exist" + fi + fi + + + kws_tasks="" + + for kws in $datadir/kwset_*; do + kws=`basename $kws` + echo $kws + kws_tasks+=" $kws" + done + + for kws in $kws_tasks ; do + echo "Processing KWS task: $kws" + mkdir -p $targetdir/$kws + + echo -e "\tFiltering... $kws LMWT=$min_lmwt:$max_lmwt" + + indices_dir=$resultdir/kws_indices + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + kwsoutput=${targetdir}/${kws}_${lmwt} + indices=${indices_dir}_$lmwt + nj=$(cat $indices/num_jobs) + + # This is a memory-efficient way how to do the filtration + # we do this in this way because the result.* files can be fairly big + # and we do not want to run into troubles with memory + files="" + for job in $(seq 1 $nj); do + if [ -f $resultdir/${kws}_${lmwt}/result.${job}.gz ] ; then + files="$files <(gunzip -c $resultdir/${kws}_${lmwt}/result.${job}.gz)" + elif [ -f $resultdir/${kws}_${lmwt}/result.${job} ] ; then + files="$files $resultdir/${kws}_${lmwt}/result.${job} " + else + echo >&2 "The file $resultdir/${$kws}_${lmwt}/result.${job}[.gz] does not exist" + exit 1 + fi + done + # we have to call it using eval as we need the bash to interpret + # the (possible) command substitution in case of gz files + # bash -c would probably work as well, but would spawn another + # shell instance + echo $kwsoutput + echo $datadir/compounds/$name/utterances + mkdir -p $kwsoutput + eval "sort -m -u $files" |\ + int2sym.pl -f 2 $datadir/$kws/utt.map | \ + utils/filter_scp.pl -f 2 $datadir/compounds/$name/utterances |\ + sym2int.pl -f 2 $datadir/$kws/utt.map |\ + local/search/filter_kws_results.pl --likes --nbest $nbest > $kwsoutput/results || exit 1 + done + + ntrue_from_args="" + if [ ! -z "$ntrue_from" ]; then + echo "Using $resultdir/$ntrue_from/$kws for NTRUE" + ntrue_from_args=" --ntrue-from $resultdir/$ntrue_from/$kws" + fi + if [ ! -z $rttm ] ; then + local/search/score.sh --cmd "$cmd" --extraid ${kws##kwset_}\ + --min-lmwt $min_lmwt --max-lmwt $max_lmwt $ntrue_from_args \ + data/lang $datadir/compounds/$name ${targetdir}/${kws} || exit 1; + elif [ ! -z $ntrue_from ] ; then + local/search/normalize.sh --cmd "$cmd" --extraid ${kws##kwset_}\ + --min-lmwt $min_lmwt --max-lmwt $max_lmwt $ntrue_from_args \ + data/lang $datadir/compounds/$name ${targetdir}/${kws} || exit 1; + else + echo >&2 "Cannot score and don't know which compound set to use to inherit the config" + exit 1 + fi + done + +done diff --git a/egs/babel/s5d/local/nist_eval/get_training_times.sh b/egs/babel/s5d/local/nist_eval/get_training_times.sh new file mode 100755 index 00000000000..f5b0012c2f2 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/get_training_times.sh @@ -0,0 +1,229 @@ +if [ -z $1 ] ; then + dir=`pwd` +else + dir=$1 +fi +echo $dir + + +convertsecs() { + h=$(($1/3600)) + m=$((($1/60)%60)) + s=$(($1%60)) + printf "%02d:%02d:%02d\n" $h $m $s +} + +function process { + count=1 + if [ ! -z $1 ]; then + count=$1 + fi + + replace="" + for a in `seq 1 $count` ; do + replace+="\t" + done + + ( + eval `grep "group=all"` + echo -n "threads=$total_threads" + echo -n " cpu_time=$total_cpu_time wall_time=$clock_time" + echo -n " human_cpu_time="`convertsecs $total_cpu_time` + echo -n " human_wall_time="`convertsecs $clock_time` + echo "" + ) | sed 's/^/'$replace'/g' +} + +function legend { + echo -ne '"'"$@"'" ' +} + +legend Parameterization dev/train +local/summarize_logs.pl $dir/exp/make_*/*train*/ | process + +if [ -d $dir/data/local/extend ] ; then + legend "Extending the lexicon" + local/summarize_logs.pl $dir/data/local/extend/tmp/log | process +fi + +legend "Training upto stage tri5" +local/summarize_logs.pl $dir/exp/mono*/log $dir/exp/tri{1..5}/log $dir/exp/tri{1..4}_ali*/log | process + +legend "SGMM2 stage training" +local/summarize_logs.pl $dir/exp/ubm5/log $dir/exp/sgmm5/log $dir/exp/tri5_ali/log | process + +legend "SGMM2+bMMI stage training" +local/summarize_logs.pl $dir/exp/sgmm5_*/log $dir/exp/ubm5/log $dir/exp/sgmm5_denlats/log/* | process + +nnet=tri6_nnet +[ ! -d $dir/exp/$nnet ] && nnet=tri6b_nnet + +legend "DNN stage training GPU" +local/summarize_logs.pl $dir/exp/$nnet/log | process + +legend "BNF stage training" +local/summarize_logs.pl $dir/exp_bnf/tri6_bnf/log | process + +legend "BNF stage training GPU" +local/summarize_logs.pl $dir/exp_bnf/tri{5,6}/log $dir/exp_bnf/sgmm7*/log \ + $dir/exp_bnf/sgmm7_denlats/log/* $dir/exp_bnf/ubm7 | process + +legend "SEGMENTATION TRAINING: " +local/summarize_logs.pl $dir/exp/tri4_train_seg_ali/log \ + $dir/exp/make_plp_pitch/train_seg/ \ + $dir/exp/tri4b_seg/log | process + +semisup=exp_bnf_semisup2 +if [ -d $dir/param_bnf_semisup ] || [ -d $dir/param_bnf_semisup2 ] ; then + [ ! -d $dir/$semisup ] && semisup=exp_bnf_semisup + + decode=unsup.seg + legend "BNF_SEMISUP training, segmentation " + local/summarize_logs.pl $dir/exp/make_seg/$decode/log \ + $dir/exp/make_seg/$decode/make_plp/ \ + $dir/exp/tri4b_seg/decode_${decode}/log \ + $dir/exp/make_plp/$decode | process + + legend "BNF_SEMISUP training, ecode unsup.seg TRI5 " + local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process + legend "BNF_SEMISUP training, ecode unsup.seg PLP " + local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process + legend "BNF_SEMISUP training, ecode unsup.seg DNN " + local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process + legend "BNF_SEMISUP training, data preparation for BNF_SEMISUP " + local/summarize_logs.pl $dir/exp/combine2_post/unsup.seg/log \ + $dir/exp/combine2_post/unsup.seg/decode_unsup.seg/log\ + $dir/exp/tri6_nnet_ali/log | process + + legend "BNF_SEMISUP training, TRAIN BNF_SEMISUP BNF GPU " + local/summarize_logs.pl $dir/$semisup/tri6_bnf/log | process + legend "BNF_SEMISUP training, TRAIN BNF_SEMISUP BNF " + local/summarize_logs.pl $dir/$semisup/tri{5,6}/log $dir/exp_bnf/sgmm7*/log \ + $dir/exp_bnf/sgmm7_denlats/log/* $dir/exp_bnf/ubm7 | process +fi + +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "DNN_MPE stage CPU training" + local/summarize_logs.pl $dir/exp/tri6_nnet_ali/log/ \ + $dir/exp/tri6_nnet_denlats/log/* | process + + legend "DNN_MPE stage GPU training" + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/log/ | process +fi + +#~decode=dev10h.seg +#~legend "DEV10H.SEG decoding" +#~legend "Segmentation: " +#~local/summarize_logs.pl $dir/exp/make_seg/$decode/log \ +#~ $dir/exp/make_seg/$decode/make_plp/ \ +#~ $dir/exp/tri4b_seg/decode_${decode}/log \ +#~ $dir/exp/make_plp/$decode | process +#~legend "Decode $decode TRI5: " +#~local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process +#~legend "Decode $decode PLP: " +#~local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process +#~legend "Decode $decode DNN: " +#~local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process +#~legend "Decode $decode PLP: " +#~local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process + +legend "G2P and confusion matrix: " +local/summarize_logs.pl $dir/exp/conf_matrix/log $dir/exp/g2p/log | process +if [ -d $dir/data/shadow2.uem ]; then + decode=shadow2.uem +else + decode=shadow.uem +fi + +legend "Segmentation $decode: provided..." +echo +#--legend "Segmentation: " +#--local/summarize_logs.pl $dir/exp/make_seg/$decode/log \ +#-- $dir/exp/make_seg/$decode/make_plp/ \ +#-- $dir/exp/tri4b_seg/decode_${decode}/log \ +#-- $dir/exp/make_plp/$decode | process +legend "Parametrization: " +local/summarize_logs.pl $dir/exp/make_plp/$decode | process +legend "Decode $decode TRI5: " +local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process +legend "Decode $decode PLP: " +local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process +legend "Decode $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process +legend "Decode $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/{tri6,sgmm7,sgmm7_mmi_b0.1}/decode_*${decode}*/log | process +if [ -d $dir/$semisup ] ; then + legend "Decode $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/{tri6,sgmm7,sgmm7_mmi_b0.1}/decode_*${decode}*/log | process +fi +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Decode $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/log | process +fi + +legend "Indexing $decode PLP: " +local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process +legend "Indexing $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/kws_indices*/log | process +legend "Indexing $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process +if [ -d $dir/$semisup ] ; then + legend "Indexing $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process +fi +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Indexing $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/kws_indices*/log | process +fi + +legend "Search $decode PLP: " +local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_kws \ + $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_kws_*/log | process +legend "Search $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/evalKW_kws \ + $dir/exp/$nnet/decode_*${decode}*/evalKW_kws_*/log | process +legend "Search $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws \ + $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws_*/log | process +if [ -d $dir/$semisup ] ; then + legend "Search $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws/ \ + $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws*/log | process +fi +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Search $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_kws \ + $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_kws*/log | process +fi + +legend "Proxies generation: " +local/summarize_logs.pl $dir/data/$decode/evalKW_oov_kws/g2p/log \ + $dir/data/$decode/evalKW_oov_kws/tmp/split/log | process +legend "Search $decode PLP: " +local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws \ + $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws_*/log | process +legend "Search $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/evalKW_oov_kws \ + $dir/exp/$nnet/decode_*${decode}*/evalKW_oov_kws_*/log | process +legend "Search $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws \ + $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws_*/log | process + +if [ -d $dir/$semisup ] ; then + legend "Search $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws/ \ + $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws*/log | process +fi + + +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Search $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_oov_kws \ + $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_oov_kws*/log | process +fi + + + + + + diff --git a/egs/babel/s5d/local/nist_eval/make_release.sh b/egs/babel/s5d/local/nist_eval/make_release.sh new file mode 100755 index 00000000000..aff89f92846 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/make_release.sh @@ -0,0 +1,356 @@ +#!/bin/bash + +team=RADICAL +corpusid= +partition= +scase=BaEval #BaDev|BaEval +master= +version=1 +sysid= +prim=c +cer=0 +dryrun=true +dir="exp/sgmm5_mmi_b0.1/" +data=data/dev10h.seg +master=dev10h +extrasys= +final=false + +#end of configuration + + +echo $0 " " "$@" + +[ -f ./cmd.sh ] && . ./cmd.sh +[ -f ./path.sh ] && . ./path.sh +. ./utils/parse_options.sh + +. $1 +outputdir=$2 + +set -e +set -o pipefail + +function submit_to_google { + SYSPATH=$1 + #curl 'https://docs.google.com/forms/d/1MV4gf-iVOX79ZEAekEiLIo7L_UVrJnoPjdtICK5F-nc/formResponse' \ + # --data 'entry.1721972547='$MTWV'&entry.485509816='$ATWV'&entry.694031153='$RESPATH'&entry.1851048707='$(whoami)'&submit=Submit' \ + # --compressed + curl -sS 'https://docs.google.com/forms/d/1MV4gf-iVOX79ZEAekEiLIo7L_UVrJnoPjdtICK5F-nc/formResponse' \ + --data 'entry.1721972547='$MTWV'&entry.485509816='$ATWV'&entry.694031153='$SYSPATH'&entry.1851048707='$(whoami)'&entry.880350279='$STWV'&entry.60995624='$OTWV'&entry.1338769660='$LatticeRecall'&entry.1333349334='$THRESHOLD'&entry.1423358838='$(pwd)'&submit=Submit' --compressed |\ + grep --color "Your response has been recorded." || return 1 + return 0 +} + +function export_file { + #set -x + source_file=$1 + target_file=$2 + if [ ! -f $source_file ] ; then + echo "The file $source_file does not exist!" + exit 1 + else + if [ ! -f $target_file ] ; then + if ! $dryrun ; then + ln -s `readlink -f $source_file` $target_file || exit 1 + ls -al $target_file + else + echo "$source_file -> $target_file" + fi + + else + echo "The file is already there, not doing anything. Either change the version (using --version), or delete that file manually)" + exit 1 + fi + fi + #set +x + return 0 +} + +function export_kws_file { + source_xml=$1 + fixed_xml=$2 + kwlist=$3 + export_xml=$4 + + echo "Exporting KWS $source_xml as `basename $export_xml`" + if [ -f $source_xml ] ; then + cp $source_xml $fixed_xml.bak + fdate=`stat --printf='%y' $source_xml` + echo "The source file $source_xml has timestamp of $fdate" + echo "Authorizing empty terms from `basename $kwlist`..." + if ! $dryrun ; then + local/fix_kwslist.pl $kwlist $source_xml $fixed_xml || exit 1 + else + fixed_xml=$source_xml + fi + echo "Exporting...export_file $fixed_xml $export_xml " + export_file $fixed_xml $export_xml || exit 1 + else + echo "The file $source_xml does not exist. Exiting..." + exit 1 + fi + echo "Export done successfully..." + return 0 +} + +function find_best_kws_result { + local dir=$1 + local mask=$2 + local record=`(find $dir -name "sum.txt" -path "$mask" -not -ipath "*rescored*" | xargs grep "^| *Occ") | cut -f 1,13,17 -d '|' | sed 's/|//g' | column -t | sort -r -n -k 3 | head -n 1` + echo $record >&2 + local file=`echo $record | awk -F ":" '{print $1}'` + #echo $file >&2 + local path=`dirname $file` + #echo $path >&2 + echo $path +} + +function find_best_stt_result { + local dir=$1 + local mask=$2 + local record=`(find $dir -name "*.ctm.sys" -path "$mask" -not -ipath "*rescore*" | xargs grep Avg) | sed 's/|//g' | column -t | sort -n -k 9 | head -n 1` + + echo $record >&2 + local file=`echo $record | awk -F ":" '{print $1}'` + #echo $file >&2 + local path=`dirname $file` + #echo $path >&2 + echo $path +} + +function create_sysid { + local best_one=$1 + local sysid= + local taskid=`basename $best_one` + local system_path=`dirname $best_one` + if [[ $system_path =~ .*sgmm5.* ]] ; then + sysid=PLP + elif [[ $system_path =~ .*nnet.* ]] ; then + sysid=DNN + elif [[ $system_path =~ .*sgmm7.* ]] ; then + sysid=BNF + elif [[ $system_path =~ .*4way.* ]] ; then + sysid=4way-comb + else + echo "Unknown system path ($system_path), cannot deduce the systemID" >&2 + exit 1 + fi + if [[ $taskid == *kws_* ]] ; then + local kwsid=${taskid//kws_*/} + kwsid=${kwsid//_/} + if [ -z $kwsid ]; then + echo ${sysid} + else + echo ${sysid}-$kwsid + fi + else + echo ${sysid} + fi +} + + +function get_ecf_name { + local best_one=$1 + local taskid=`basename $best_one` + local kwstask=${taskid//kws_*/kws} + local kwlist= + #echo $kwstask + if [ -z $kwstask ] ; then + #echo $data/kws/kwlist.xml + kwlist= `readlink -f $data/kws/kwlist.xml` + else + #echo $data/$kwstask/kwlist.xml + kwlist=`readlink -f $data/$kwstask/kwlist.xml` + fi + ecf=`head -n 1 $kwlist | grep -Po "(?<=ecf_filename=\")[^\"]*"` + echo -e "\tFound ECF: $ecf" >&2 + echo $ecf + return 0 +} + +function compose_expid { + local task=$1 + local best_one=$2 + local extraid=$3 + echo "TASK: $task" >&2 + echo "BEST ONE: $best_one" >&2 + echo "EXTRA ID: $extraid" >&2 + + [ ! -z $extraid ] && extraid="-$extraid" + local sysid=`create_sysid $best_one` + echo "SYS ID: $sysid" >&2 + if [ "$task" == "KWS" ]; then + ext="kwslist.xml" + elif [ "$task" == "STT" ]; then + ext="ctm" + else + echo "Incorrect task ID ($task) given to compose_expid function!" >&2 + exit 1 + fi + echo "${corpusid}" >&2 + echo "${partition}" >&2 + echo "${scase}" >&2 + echo "KWS14_${team}_${corpusid}_${partition}_${scase}_${task}_${prim}-${sysid}${extraid}_$version.$ext" + return 0 +} + +function figure_out_scase { + local ecf=`basename $1` + if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then + local basnam=${ecf%%.ecf.xml} + local scase=`echo $basnam | awk -F _ '{print $2}'` + + if [[ $scase =~ conv-dev(\..*)? ]]; then + echo "BaDev" + elif [[ $scase =~ conv-eval(\..*)? ]]; then + echo "BaEval" + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "WARNING: Does not contain conv-dev|conv-eval ($scase)" >&2 + echo "BaDev" + return 1 + fi + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "WARNING: Does not match the mask IARPA-babel.*.ecf.xml" >&2 + echo "BaDev" + return 1 + fi + return 0 +} + +function figure_out_partition { + local ecf=`basename $1` + if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then + local basnam=${ecf%%.ecf.xml} + local scase=`echo $basnam | awk -F _ '{print $2}'` + + if [[ $scase =~ conv-dev(\..*)? ]]; then + echo "conv-dev" + elif [[ $scase =~ conv-eval(\..*)? ]]; then + echo "conv-eval" + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "conv-dev" + return 1 + fi + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "conv-dev" + return 1 + fi + return 0 +} + +function figure_out_corpusid { + local ecf=`basename $1` + if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then + local basnam=${ecf%%.ecf.xml} + local corpusid=`echo $basnam | awk -F _ '{print $1}'` + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + local corpusid=${ecf%%.*} + fi + echo $corpusid +} + +mkdir -p $outputdir +extrasys_unnorm="unnorm" +if [ ! -z $extrasys ] ; then + extrasys_unnorm="${extrasys}-unnorm" +fi + +#data=data/shadow.uem +dirid=`basename $data` +kws_tasks="kws " +[ -f $data/extra_kws_tasks ] && kws_tasks+=`cat $data/extra_kws_tasks | awk '{print $1"_kws"}'` +[ -d $data/compounds ] && compounds=`ls $data/compounds` + +if [ -z "$compounds" ] ; then + for kws in $kws_tasks ; do + echo $kws + best_one=`find_best_kws_result "$dir/decode_*${dirid}*/${kws}_*" "*"` + sysid=`create_sysid $best_one` + ecf=`get_ecf_name $best_one` + scase=`figure_out_scase $ecf` || break + partition=`figure_out_partition $ecf` || break + corpusid=`figure_out_corpusid $ecf` + + expid=`compose_expid KWS $best_one "$extrasys"` + echo -e "\tEXPORT NORMALIZED as: $expid" + expid_unnormalized=`compose_expid KWS $best_one "$extrasys_unnorm"` + echo -e "\tEXPORT UNNORMALIZED as: $expid_unnormalized" + + export_kws_file $best_one/kwslist.xml $best_one/kwslist.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid + export_kws_file $best_one/kwslist.unnormalized.xml $best_one/kwslist.unnormalized.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid_unnormalized + done +else + [ -z $master ] && echo "You must choose the master compound (--master ) for compound data set" && exit 1 + for kws in $kws_tasks ; do + echo $kws + best_one=`find_best_kws_result "$dir/decode_*${dirid}*/$master/${kws}_*" "*"` + ( + eval "`cat $best_one/metrics.txt | sed 's/ *= */=/g' | sed 's/,/;/g' | sed 's/Lattice Recall/LatticeRecall/g' `" + submit_to_google $best_one $ATWV $MTWV + ) || echo "Submission failed!" + + + for compound in $compounds ; do + compound_best_one=`echo $best_one | sed "s:$master/${kws}_:$compound/${kws}_:g"` + echo "From ($kws) $best_one going to $compound_best_one" + echo -e "\tPREPARE EXPORT: $compound_best_one" + sysid=`create_sysid $compound_best_one` + #ecf=`get_ecf_name $best_one` + ecf=`readlink -f $data/compounds/$compound/ecf.xml` + scase=`figure_out_scase $ecf` + partition=`figure_out_partition $ecf` + corpusid=`figure_out_corpusid $ecf` + expid=`compose_expid KWS $compound_best_one "$extrasys"` + echo -e "\tEXPORT NORMALIZED as: $expid" + expid_unnormalized=`compose_expid KWS $compound_best_one "$extrasys_unnorm"` + echo -e "\tEXPORT UNNORMALIZED as: $expid_unnormalized" + + export_kws_file $compound_best_one/kwslist.xml $compound_best_one/kwslist.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid + export_kws_file $compound_best_one/kwslist.unnormalized.xml $compound_best_one/kwslist.unnormalized.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid_unnormalized + done + done +fi + +##EXporting STT -- more straightforward, because there is only one task +if [ -z "$compounds" ] ; then + #best_one=`find_best_stt_result "$dir/decode_*${dirid}*/score_*" "*"` + best_one=`find_best_stt_result "$dir/*${dirid}*/score_*" "*"` + echo -e "\tERROR: I don't know how to do this, yet" + ecf=`get_ecf_name kws` + sysid=`create_sysid $best_one` + scase=`figure_out_scase $ecf` || break + partition=`figure_out_partition $ecf` + corpusid=`figure_out_corpusid $ecf` + expid=`compose_expid STT $best_one "$extrasys"` + echo -e "\tEXPORT NORMALIZED as: $expid" + export_file $best_one/${dirid}.ctm $outputdir/$expid +else + [ -z $master ] && echo "You must choose the master compound (--master ) for compound data set" && exit 1 + #best_one=`find_best_stt_result "$dir/decode_*${dirid}*/$master/score_*" "*"` + best_one=`find_best_stt_result "$dir/*${dirid}*/$master/score_*" "*"` + + for compound in $compounds ; do + compound_best_one=`echo $best_one | sed "s:$master/score_:$compound/score_:g"` + echo -e "\tPREPARE EXPORT: $compound_best_one" + sysid=`create_sysid $compound_best_one` + #ecf=`get_ecf_name $best_one` + ecf=`readlink -f $data/compounds/$compound/ecf.xml` + scase=`figure_out_scase $ecf` + partition=`figure_out_partition $ecf` + corpusid=`figure_out_corpusid $ecf` + expid=`compose_expid STT $compound_best_one $extrasys` + echo -e "\tEXPORT NORMALIZED as: $expid" + + export_file $compound_best_one/${compound}.ctm $outputdir/$expid + done +fi + +echo "Everything looks fine, good luck!" +exit 0 + diff --git a/egs/babel/s5d/local/nist_eval/split_compound_set.sh b/egs/babel/s5d/local/nist_eval/split_compound_set.sh new file mode 100755 index 00000000000..59ea4c162d7 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/split_compound_set.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh; + +devset=dev10h.pem +evalset=eval.seg +cmd="$decode_cmd" + + +rootdir=exp/nnet3/lstm_bidirectional_sp/decode_shadow.seg +combinedir=exp/combine/lstm_bidirectional_sp/shadow.seg + +[ ! -d data/shadow.seg/compounds/$devset ] && \ + echo >&2 "data/shadow.seg/compounds/$devset does not exist!" && exit 1 +[ ! -d data/shadow.seg/compounds/$evalset ] && \ + echo >&2 "data/shadow.seg/compounds/$evalset does not exist!" && exit 1 + +for decode in $rootdir/{,phones,syllabs}; do + [ ! -d $decode ] && \ + echo >&2 "$decode does not exist!" && exit 1 + local/nist_eval/filter_data.sh \ + data/shadow.seg ${devset} $decode + local/nist_eval/filter_data.sh --ntrue-from ${devset} \ + data/shadow.seg ${evalset} $decode +done + + + +for kwset in data/shadow.seg/compounds/$devset/kwset_* ; do + kwsetdir=$(basename $kwset) + kwsetid=${kwsetdir#*_} + + echo "Processing kwset id=$kwsetid" + local/search/combine.sh --extraid "$kwsetid" --cmd "$cmd" \ + data/shadow.seg/compounds/${devset}/ data/langp_test \ + $rootdir/{,syllabs/,phones/}${devset}/${kwsetdir} $combinedir/${devset} + + local/search/combine_special.sh --extraid "$kwsetid" --cmd "$cmd" \ + data/shadow.seg/compounds/${evalset}/ data/langp_test \ + $combinedir/${devset}/${kwsetdir}/ \ + $rootdir/{,syllabs/,phones/}${evalset}/${kwsetdir} $combinedir/${evalset} +done + + + + diff --git a/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh new file mode 100755 index 00000000000..3b12222e13a --- /dev/null +++ b/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh @@ -0,0 +1,374 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# Copyright 2014 Vimal Manohar +# Apache 2.0. + +# This script, which will generally be called from other neural-net training +# scripts, extracts the training examples used to train the neural net (and also +# the validation examples used for diagnostics), and puts them in separate archives. +# This is similar to the script steps/nnet2/get_egs.sh, but this also extracts +# frames from unsupervsied data. Decode directory for unsupervised data which +# has the best path done along with posteriors (can be done using local/combine_posteriors.sh) + +set -o pipefail + +# Begin configuration section. +cmd=run.pl +feat_type= +num_utts_subset=300 # number of utterances in validation and training + # subsets used for shrinkage and diagnostics +num_valid_frames_combine=0 # #valid frames for combination weights at the very end. +num_train_frames_combine=10000 # # train frames for the above. +num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs +samples_per_iter=400000 # each iteration of training, see this many samples + # per job. This is just a guideline; it will pick a number + # that divides the number of samples in the entire data. +transform_dir_sup= # If supplied, overrides alidir +transform_dir_unsup= +num_jobs_nnet=16 # Number of neural net jobs to run in parallel +stage=-10 +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. +splice_width=4 # meaning +- 4 frames on each side for second LDA +spk_vecs_dir_sup= +spk_vecs_dir_unsup= +random_copy=false +weight_threshold=0.7 # Threshold on confidence factor of an unsupervised data + # frame for it to not be ignored +supervised_copies=3 # Make x copies of supervised data. +use_frame_selection=true +use_frame_weights=false # TODO: Not coded + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + + +if [ $# != 6 ]; then + echo "Usage: steps/nnet2/get_egs_semi_supervised.sh [opts] " + echo " e.g.: steps/nnet2/get_egs_semi_supervised.sh data/train data/train_unt data/lang exp/tri3_ali exp/tri3/dev_unt exp/tri4_nnet" + echo "" + echo "Main options (for others, see top of script file)" + echo " --config # config file containing options" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --num-jobs-nnet # Number of parallel jobs to use for main neural net" + echo " # training (will affect results as well as speed; try 8, 16)" + echo " # Note: if you increase this, you may want to also increase" + echo " # the learning rate." + echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" + echo " # process." + echo " --splice-width # Number of frames on each side to append for feature input" + echo " # (note: we splice processed, typically 40-dimensional frames" + echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics" + echo " --num-valid-frames-combine <#frames|10000> # Number of frames used in getting combination weights at the" + echo " # very end." + echo " --stage # Used to run a partially-completed training process from somewhere in" + echo " # the middle." + echo " --weight-threshold # Threshold on the confidence factor below which the " + echo " # a frame of unsupervised training data is ignored." + echo " --supervised-copies <#copies|3> # Make copies of supervised data" + echo " --transform-dir-sup # Directory with transforms for supervised training data" + echo " --transform-dir-unsup # Directory with transforms for unsupervised training data" + + exit 1; +fi + +data_sup=$1 +data_unsup=$2 +lang=$3 +alidir=$4 +latdir=$5 +dir=$6 + +# Check some files. +for f in $data_sup/feats.scp $data_unsup/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree $latdir/best_path_ali.1.gz $latdir/weights.1.gz $latdir/../final.mdl $latdir/../tree; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1; +done + +# Set some variables. +oov=`cat $lang/oov.int` +num_leaves=`tree-info $alidir/tree 2>/dev/null | awk '{print $2}'` || exit 1; +silphonelist=`cat $lang/phones/silence.csl` || exit 1; +[ -z $num_leaves ] && echo "\$num_leaves is unset" && exit 1 +[ "$num_leaves" -eq "0" ] && echo "\$num_leaves is 0" && exit 1 + +nj_sup=`cat $alidir/num_jobs` || exit 1; # number of jobs in alignment dir... +nj_unsup=`cat $latdir/num_jobs` || exit 1; # number of jobs in decode dir +# in this dir we'll have just one job. +sdata_sup=$data_sup/split$nj_sup +sdata_unsup=$data_unsup/split$nj_unsup +utils/split_data.sh $data_sup $nj_sup +utils/split_data.sh $data_unsup $nj_unsup + +mkdir -p $dir/log +echo $nj_sup > $dir/num_jobs_sup +echo $nj_unsup > $dir/num_jobs_unsup + +cp $alidir/tree $dir + +awk '{print $1}' $data_sup/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset > $dir/valid_uttlist + +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +if [ -f $data_sup/utt2uniq ]; then + echo "File $data_sup/utt2uniq exists, so augmenting valid_uttlist to" + echo "include all perturbed versions of the same 'real' utterances." + mv $dir/valid_uttlist $dir/valid_uttlist.tmp + utils/utt2spk_to_spk2utt.pl $data_sup/utt2uniq > $dir/uniq2utt + cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data_sup/utt2uniq | \ + sort | uniq | utils/apply_map.pl $dir/uniq2utt | \ + awk '{for(n=1;n<=NF;n++) print $n;}' | sort > $dir/valid_uttlist + rm $dir/uniq2utt $dir/valid_uttlist.tmp +fi + +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +awk '{print $1}' $data_sup/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \ + head -$num_utts_subset > $dir/train_subset_uttlist + +[ -z "$transform_dir_sup" ] && transform_dir_sup=$alidir +[ -z "$transform_dir_unsup" ] && transform_dir_unsup=$latdir +norm_vars=`cat $alidir/norm_vars 2>/dev/null` || norm_vars=false # cmn/cmvn option, default false. +norm_vars_unsup=`cat $latdir/norm_vars 2>/dev/null` || norm_vars_unsup=false + +if [ "$norm_vars" != "$norm_vars_unsup" ]; then + echo "ERROR: Features mismatch for supervised and unsupervised data!" + echo "Variance normalization $norm_vars for supervised data vs $norm_vars_unsup for unsupervised data" + exit 1 +fi +cp $alidir/norm_vars $dir 2>/dev/null + +## Set up features. +if [ -z $feat_type ]; then + if [ -f $alidir/final.mat ] && [ ! -f $transform_dir_sup/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi +fi + +echo "$0: feature type is $feat_type" + +case $feat_type in + raw) feats_sup="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata_sup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_sup/JOB/utt2spk scp:$sdata_sup/JOB/cmvn.scp scp:- ark:- |" + feats_unsup="ark,s,cs:cat $sdata_unsup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_unsup/JOB/utt2spk scp:$sdata_unsup/JOB/cmvn.scp scp:- ark:- |" + valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" + train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" + ;; + lda) + splice_opts=`cat $alidir/splice_opts 2>/dev/null` + #splice_opts_unsup=`cat $latdir/../splice_opts 2>/dev/null` + #if [ "$splice_opts" -ne "$splice_opts_unsup" ]; then + # echo "ERROR: Features mismatch for supervised and unsupervised data!" + # echo "Splice options $splice_opts for supervised data vs $splice_opts_unsup for unsupervised data" + # exit 1 + #fi + cp $alidir/splice_opts $dir/splice_opts 2>/dev/null + + #if [ "`diff $alidir/final.mat $latdir/../final.mat &> /dev/null; echo $?`" -ne "0" ]; then + # echo "ERROR: Features mismatch for supervised and unsupervised data!" + # echo "LDA matrices $alidir/final.mat for supervised data and $latdir/../final.mat for unsupervised data don't match" + # exit 1 + #fi + + cp $alidir/final.mat $dir + feats_sup="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata_sup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_sup/JOB/utt2spk scp:$sdata_sup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + feats_unsup="ark,s,cs:cat $sdata_unsup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_unsup/JOB/utt2spk scp:$sdata_unsup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + ;; + *) echo "$0: invalid feature type $feat_type" && exit 1; +esac + +if [ -f $transform_dir_sup/trans.1 ] && [ $feat_type != "raw" ]; then + echo "$0: using transforms from $transform_dir_sup for supervised data" + feats_sup="$feats_sup transform-feats --utt2spk=ark:$sdata_sup/JOB/utt2spk ark:$transform_dir_sup/trans.JOB ark:- ark:- |" + valid_feats="$valid_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/trans.*|' ark:- ark:- |" + train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/trans.*|' ark:- ark:- |" +fi +if [ -f $transform_dir_sup/raw_trans.1 ] && [ $feat_type == "raw" ]; then + echo "$0: using raw-fMLLR transforms from $transform_dir_sup" + feats_sup="$feats_sup transform-feats --utt2spk=ark:$sdata_sup/JOB/utt2spk ark:$transform_dir_sup/raw_trans.JOB ark:- ark:- |" + valid_feats="$valid_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/raw_trans.*|' ark:- ark:- |" + train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/raw_trans.*|' ark:- ark:- |" +fi + +if [ -f $transform_dir_unsup/trans.1 ] && [ $feat_type != "raw" ]; then + echo "$0: using transforms from $transform_dir_unsup for supervised data" + feats_unsup="$feats_unsup transform-feats --utt2spk=ark:$sdata_unsup/JOB/utt2spk ark:$transform_dir_unsup/trans.JOB ark:- ark:- |" +fi +if [ -f $transform_dir_unsup/raw_trans.1 ] && [ $feat_type == "raw" ]; then + echo "$0: using raw-fMLLR transforms from $transform_dir_unsup" + feats_unsup="$feats_unsup transform-feats --utt2spk=ark:$sdata_unsup/JOB/utt2spk ark:$transform_dir_unsup/raw_trans.JOB ark:- ark:- |" +fi + +if [ $stage -le 0 ]; then + echo "$0: working out number of frames of training data" + num_sup_frames=`feat-to-len scp:$data_sup/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1; + num_unsup_frames=`feat-to-len scp:$data_unsup/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1; + num_frames=$(perl -e "print STDOUT ($num_sup_frames * $supervised_copies + $num_unsup_frames)") + echo $num_frames > $dir/num_frames +else + num_frames=`cat $dir/num_frames` || exit 1; +fi + +# Working out number of iterations per epoch. +iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1; +[ $iters_per_epoch -eq 0 ] && iters_per_epoch=1 +samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)] +echo "$0: Every epoch, splitting the data up into $iters_per_epoch iterations," +echo "$0: giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)." + +# Making soft links to storage directories. +for x in `seq 1 $num_jobs_nnet`; do + for y in `seq 0 $[$iters_per_epoch-1]`; do + utils/create_data_link.pl $dir/egs/egs.$x.$y.ark + utils/create_data_link.pl $dir/egs/egs_tmp.$x.$y.ark + done + for y in `seq 1 $nj_sup`; do + utils/create_data_link.pl $dir/egs/egs_orig.$x.$y.ark + done +done + +nnet_context_opts="--left-context=$splice_width --right-context=$splice_width" +mkdir -p $dir/egs + +if [ ! -z $spk_vecs_dir_sup ]; then + [ ! -f $spk_vecs_dir_sup/vecs.1 ] && echo "No such file $spk_vecs_dir_sup/vecs.1" && exit 1; + spk_vecs_opt_sup=("--spk-vecs=ark:cat $spk_vecs_dir_sup/vecs.*|" "--utt2spk=ark:$data_sup/utt2spk") +else + spk_vecs_opt_sup=() +fi + +if [ ! -z $spk_vecs_dir_unsup ]; then + [ ! -f $spk_vecs_dir_unsup/vecs.1 ] && echo "No such file $spk_vecs_dir_unsup/vecs.1" && exit 1; + spk_vecs_opt_unsup=("--spk-vecs=ark:cat $spk_vecs_dir_unsup/vecs.*|" "--utt2spk=ark:$data_unsup/utt2spk") +else + spk_vecs_opt_unsup=() +fi + +if [ $stage -le 2 ]; then + echo "Getting validation and training subset examples." + rm $dir/.error 2>/dev/null + $cmd $dir/log/create_valid_subset.log \ + nnet-get-egs $nnet_context_opts "${spk_vecs_opt_sup[@]}" "$valid_feats" \ + "ark,cs:gunzip -c $alidir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \ + "ark:$dir/egs/valid_all.egs" || touch $dir/.error & + $cmd $dir/log/create_train_subset.log \ + nnet-get-egs $nnet_context_opts "${spk_vecs_opt_sup[@]}" "$train_subset_feats" \ + "ark,cs:gunzip -c $alidir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \ + "ark:$dir/egs/train_subset_all.egs" || touch $dir/.error & + wait; + [ -f $dir/.error ] && exit 1; + echo "Getting subsets of validation examples for diagnostics and combination." + $cmd $dir/log/create_valid_subset_combine.log \ + nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/egs/valid_all.egs \ + ark:$dir/egs/valid_combine.egs || touch $dir/.error & + $cmd $dir/log/create_valid_subset_diagnostic.log \ + nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/egs/valid_all.egs \ + ark:$dir/egs/valid_diagnostic.egs || touch $dir/.error & + + $cmd $dir/log/create_train_subset_combine.log \ + nnet-subset-egs --n=$num_train_frames_combine ark:$dir/egs/train_subset_all.egs \ + ark:$dir/egs/train_combine.egs || touch $dir/.error & + $cmd $dir/log/create_train_subset_diagnostic.log \ + nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/egs/train_subset_all.egs \ + ark:$dir/egs/train_diagnostic.egs || touch $dir/.error & + wait + cat $dir/egs/valid_combine.egs $dir/egs/train_combine.egs > $dir/egs/combine.egs + + for f in $dir/egs/{combine,train_diagnostic,valid_diagnostic}.egs; do + [ ! -s $f ] && echo "No examples in file $f" && exit 1; + done + rm $dir/egs/valid_all.egs $dir/egs/train_subset_all.egs $dir/egs/{train,valid}_combine.egs +fi + +if [ $stage -le 3 ]; then + mkdir -p $dir/temp + + # Other scripts might need to know the following info: + echo $num_jobs_nnet >$dir/egs/num_jobs_nnet + echo $iters_per_epoch >$dir/egs/iters_per_epoch + echo $samples_per_iter_real >$dir/egs/samples_per_iter + + echo "Creating training examples"; + # in $dir/egs, create $num_jobs_nnet separate files with training examples. + # The order is not randomized at this point. + + echo "Generating training examples on disk" + # The examples will go round-robin to egs_list. + + egs_list= + for n in `seq 1 $num_jobs_nnet`; do + egs_list="$egs_list ark:$dir/egs/egs_orig.$n.JOB.ark" + done + + $cmd $io_opts JOB=1:$nj_unsup $dir/log/get_weighted_egs.JOB.log \ + nnet-get-weighted-egs $nnet_context_opts "${spk_vecs_opt_unsup[@]}" \ + --weight-threshold=$weight_threshold --use-frame-weights=$use_frame_weights \ + --use-frame-selection=$use_frame_selection "$feats_unsup" \ + "ark,s,cs:gunzip -c $latdir/best_path_ali.JOB.gz | convert-ali $latdir/../final.mdl $alidir/final.mdl $dir/tree ark:- ark:- | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \ + "ark,s,cs:gunzip -c $latdir/weights.JOB.gz |" ark:- \| \ + nnet-copy-egs ark:- $egs_list || exit 1; + + for (( i=0; i $dir/egs/egs_tmp.$n.0.ark || exit 1; + rm $dir/egs/egs_orig.$n.*.ark # don't "|| exit 1", due to NFS bugs... + done + else # We'll have to split it up using nnet-copy-egs. + egs_list= + for n in `seq 0 $[$iters_per_epoch-1]`; do + egs_list="$egs_list ark:$dir/egs/egs_tmp.JOB.$n.ark" + done + # note, the "|| true" below is a workaround for NFS bugs + # we encountered running this script with Debian-7, NFS-v4. + $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/split_egs.JOB.log \ + nnet-copy-egs --random=$random_copy --srand=JOB \ + "ark:cat $dir/egs/egs_orig.JOB.*.ark|" $egs_list '&&' \ + '(' rm $dir/egs/egs_orig.JOB.*.ark '||' true ')' || exit 1; + fi +fi + +if [ $stage -le 5 ]; then + # Next, shuffle the order of the examples in each of those files. + # Each one should not be too large, so we can do this in memory. + echo "Shuffling the order of training examples" + echo "(in order to avoid stressing the disk, these won't all run at once)." + + + # note, the "|| true" below is a workaround for NFS bugs + # we encountered running this script with Debian-7, NFS-v4. + for n in `seq 0 $[$iters_per_epoch-1]`; do + $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \ + nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \ + ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark '&&' \ + '(' rm $dir/egs/egs_tmp.JOB.$n.ark '||' true ')' || exit 1; + done +fi + +echo "$0: Finished preparing training examples" diff --git a/egs/babel/s5d/local/nnet3/run_blstm.sh b/egs/babel/s5d/local/nnet3/run_blstm.sh new file mode 100755 index 00000000000..6833baa0d72 --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_blstm.sh @@ -0,0 +1,29 @@ + +stage=0 +train_stage=-10 +cell_dim=512 +rp_dim=128 +nrp_dim=128 +affix=bidirectional +multicondition=true +common_egs_dir= +num_epochs=8 + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +local/nnet3/run_lstm.sh --affix $affix \ + --stage $stage \ + --train-stage $train_stage \ + --num-epochs $num_epochs \ + --lstm-delay " [-1,1] [-2,2] [-3,3] " \ + --label-delay 0 \ + --cell-dim $cell_dim \ + --recurrent-projection-dim $rp_dim \ + --non-recurrent-projection-dim $nrp_dim \ + --common-egs-dir "$common_egs_dir" \ + --multicondition $multicondition \ + --chunk-left-context 40 \ + --chunk-right-context 40 diff --git a/egs/babel/s5d/local/nnet3/run_blstm_realigned.sh b/egs/babel/s5d/local/nnet3/run_blstm_realigned.sh new file mode 100755 index 00000000000..05c9a057512 --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_blstm_realigned.sh @@ -0,0 +1,32 @@ +stage=0 +train_stage=-10 +cell_dim=512 +rp_dim=128 +nrp_dim=128 +affix=bidirectional +multicondition=false +common_egs_dir= +num_epochs=8 +align_model_dir=exp/nnet3/tdnn_sp +extra_align_opts= + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +local/nnet3/run_lstm_realigned.sh --affix $affix \ + --stage $stage \ + --train-stage $train_stage \ + --num-epochs $num_epochs \ + --lstm-delay " [-1,1] [-2,2] [-3,3] " \ + --label-delay 0 \ + --cell-dim $cell_dim \ + --recurrent-projection-dim $rp_dim \ + --non-recurrent-projection-dim $nrp_dim \ + --common-egs-dir "$common_egs_dir" \ + --multicondition $multicondition \ + --chunk-left-context 40 \ + --chunk-right-context 40 \ + --extra-align-opts "$extra_align_opts" \ + --align-model-dir "$align_model_dir" diff --git a/egs/babel/s5d/local/nnet3/run_ivector_common.sh b/egs/babel/s5d/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..bfe66d13f76 --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_ivector_common.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +. ./cmd.sh +set -e +stage=1 +generate_alignments=true # false if doing ctc training +speed_perturb=true + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +# perturbed data preparation +train_set=train +if [ "$speed_perturb" == "true" ]; then + if [ $stage -le 1 ]; then + #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment + # _sp stands for speed-perturbed + for datadir in train; do + utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1 + utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2 + utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2 + utils/validate_data_dir.sh --no-feats data/${datadir}_tmp + rm -r data/temp1 data/temp2 + + featdir=plp_perturbed + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp_pitch/${datadir}_tmp $featdir + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir + fi + + steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir || exit 1; + utils/fix_data_dir.sh data/${datadir}_tmp + + utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0 + utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0 + utils/fix_data_dir.sh data/${datadir}_sp + rm -r data/temp0 data/${datadir}_tmp + done + fi + + train_set=train_sp + if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then + #obtain the alignment of the perturbed data + steps/align_fmllr.sh \ + --nj 70 --cmd "$train_cmd" \ + --boost-silence $boost_sil \ + data/$train_set data/langp/tri5_ali exp/tri5 exp/tri5_ali_sp || exit 1 + touch exp/tri5_ali_sp/.done + fi +fi + +if [ $stage -le 3 ]; then + mfccdir=mfcc_hires + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + date=$(date +'%m_%d_%H_%M') + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/swbd-$date/s5b/$mfccdir/storage $mfccdir/storage + fi + + # the 100k_nodup directory is copied seperately, as + # we want to use exp/tri2_ali_100k_nodup for lda_mllt training + # the main train directory might be speed_perturbed + for dataset in $train_set ; do + utils/copy_data_dir.sh data/$dataset data/${dataset}_hires + + # scale the waveforms, this is useful as we don't use CMVN + data_dir=data/${dataset}_hires + cat $data_dir/wav.scp | python -c " +import sys, os, subprocess, re, random +scale_low = 1.0/8 +scale_high = 2.0 +for line in sys.stdin.readlines(): + if len(line.strip()) == 0: + continue + print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) +"| sort -k1,1 -u > $data_dir/wav.scp_scaled || exit 1; + mv $data_dir/wav.scp_scaled $data_dir/wav.scp + + steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir; + steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir; + + # Remove the small number of utterances that couldn't be extracted for some + # reason (e.g. too short; no such file). + utils/fix_data_dir.sh data/${dataset}_hires; + done + +fi + +# ivector extractor training +if [ $stage -le 5 ]; then + # We need to build a small system just because we need the LDA+MLLT transform + # to train the diag-UBM on top of. We use --num-iters 13 because after we get + # the transform (12th iter is the last), any further training is pointless. + # this decision is based on fisher_english + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ + --splice-opts "--left-context=3 --right-context=3" \ + --boost-silence $boost_sil \ + $numLeavesMLLT $numGaussMLLT data/${train_set}_hires \ + data/langp/tri5_ali/ exp/tri5_ali_sp exp/nnet3/tri3b +fi + +if [ $stage -le 6 ]; then + # To train a diagonal UBM we don't need very much data, so use the smallest subset. + steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-threads 12 --num-frames 200000 \ + data/${train_set}_hires 512 exp/nnet3/tri3b exp/nnet3/diag_ubm +fi + +if [ $stage -le 7 ]; then + # iVector extractors can be sensitive to the amount of data, but this one has a + # fairly small dim (defaults to 100) so we don't use all of it, we use just the + # 100k subset (just under half the data). + steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ + data/${train_set}_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; +fi + +if [ $stage -le 8 ]; then + # We extract iVectors on all the train_nodup data, which will be what we + # train the system on. + + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ + data/${train_set}_max2_hires exp/nnet3/extractor exp/nnet3/ivectors_$train_set || exit 1; + +fi + +exit 0; diff --git a/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh b/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh new file mode 100755 index 00000000000..8d3973e65bc --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh @@ -0,0 +1,208 @@ +#!/bin/bash + +. ./cmd.sh +set -e +stage=1 +train_stage=-10 +generate_alignments=true # false if doing ctc training +speed_perturb=true +snrs="20:15:10" +num_data_reps=3 +ali_dir=exp/ +db_string="'air' 'rwcp' 'rvb2014'" # RIR dbs to be used in the experiment + # only dbs used for ASpIRE submission system have been used here +RIR_home=db/RIR_databases/ # parent directory of the RIR databases files +download_rirs=true # download the RIR databases from the urls or assume they are present in the RIR_home directory + + + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +# perturbed data preparation +train_set=train +if [ "$speed_perturb" == "true" ]; then + if [ $stage -le 1 ]; then + #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment + # _sp stands for speed-perturbed + for datadir in train; do + utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1 + utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2 + utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2 + utils/validate_data_dir.sh --no-feats data/${datadir}_tmp + rm -r data/temp1 data/temp2 + + featdir=plp_perturbed + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp_pitch/${datadir}_tmp $featdir + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir + fi + + steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir || exit 1; + utils/fix_data_dir.sh data/${datadir}_tmp + + utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0 + utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0 + utils/fix_data_dir.sh data/${datadir}_sp + rm -r data/temp0 data/${datadir}_tmp + done + fi + + train_set=train_sp + if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then + #obtain the alignment of the perturbed data + steps/align_fmllr.sh \ + --nj 70 --cmd "$train_cmd" \ + --boost-silence $boost_sil \ + data/$train_set data/langp/tri5_ali exp/tri5 exp/tri5_ali_sp || exit 1 + touch exp/tri5_ali_sp/.done + fi +fi + +if [ $stage -le 3 ]; then + mfccdir=mfcc_hires + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + date=$(date +'%m_%d_%H_%M') + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/swbd-$date/s5b/$mfccdir/storage $mfccdir/storage + fi + + # the 100k_nodup directory is copied seperately, as + # we want to use exp/tri2_ali_100k_nodup for lda_mllt training + # the main train directory might be speed_perturbed + for dataset in $train_set ; do + utils/copy_data_dir.sh data/$dataset data/${dataset}_hires + + # scale the waveforms, this is useful as we don't use CMVN + data_dir=data/${dataset}_hires + cat $data_dir/wav.scp | python -c " +import sys, os, subprocess, re, random +scale_low = 1.0/8 +scale_high = 2.0 +for line in sys.stdin.readlines(): + if len(line.strip()) == 0: + continue + print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) +"| sort -k1,1 -u > $data_dir/wav.scp_scaled || exit 1; + mv $data_dir/wav.scp_scaled $data_dir/wav.scp + + steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir; + steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir; + + # Remove the small number of utterances that couldn't be extracted for some + # reason (e.g. too short; no such file). + utils/fix_data_dir.sh data/${dataset}_hires; + done + +fi + +# check if the required tools are present +$KALDI_ROOT/egs/aspire/s5/local/multi_condition/check_version.sh || exit 1; +mkdir -p exp/nnet3_multicondition +if [ $stage -le 4 ]; then + # prepare the impulse responses + local/multi_condition/prepare_impulses_noises.sh --log-dir exp/make_reverb/log \ + --db-string "$db_string" \ + --download-rirs $download_rirs \ + --RIR-home $RIR_home \ + data/impulses_noises || exit 1; +fi + +if [ $stage -le 5 ]; then + # corrupt the training data to generate multi-condition data + for data_dir in train_sp; do + num_reps=$num_data_reps + reverb_data_dirs= + for i in `seq 1 $num_reps`; do + cur_dest_dir=" data/temp_${data_dir}_${i}" + $KALDI_ROOT/egs/aspire/s5/local/multi_condition/reverberate_data_dir.sh --random-seed $i \ + --snrs "$snrs" --log-dir exp/make_corrupted_wav \ + data/${data_dir} data/impulses_noises $cur_dest_dir + reverb_data_dirs+=" $cur_dest_dir" + done + utils/combine_data.sh --extra-files utt2uniq data/${data_dir}_mc data/${data_dir} $reverb_data_dirs + rm -rf $reverb_data_dirs + done +fi + +if [ $stage -le 6 ]; then + # copy the alignments for the newly created utterance ids + ali_dirs= + for i in `seq 1 $num_data_reps`; do + local/multi_condition/copy_ali_dir.sh --utt-prefix "rev${i}_" exp/tri5_ali_sp exp/tri5_ali_sp_temp_$i || exit 1; + ali_dirs+=" exp/tri5_ali_sp_temp_$i" + done + local/multi_condition/copy_ali_dir.sh exp/tri5_ali_sp exp/tri5_ali_sp_copy || exit 1; + ali_dirs+=" exp/tri5_ali_sp_copy" + utils/combine_ali_dirs.sh --num-jobs 32 \ + data/train_sp_mc exp/tri5_ali_sp_mc $ali_dirs || exit 1; + rm -rf $ali_dirs +fi + +train_set=train_sp_mc +if [ $stage -le 7 ]; then + mfccdir=mfcc_reverb + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + date=$(date +'%m_%d_%H_%M') + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/babel_reverb-$date/s5/$mfccdir/storage $mfccdir/storage + fi + for data_dir in $train_set; do + utils/copy_data_dir.sh data/$data_dir data/${data_dir}_hires + steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${data_dir}_hires \ + exp/make_reverb_hires/${data_dir} $mfccdir || exit 1; + steps/compute_cmvn_stats.sh data/${data_dir}_hires exp/make_reverb_hires/${data_dir} $mfccdir || exit 1; + utils/fix_data_dir.sh data/${data_dir}_hires + utils/validate_data_dir.sh data/${data_dir}_hires + done +fi + +# ivector extractor training +if [ $stage -le 8 ]; then + # We need to build a small system just because we need the LDA+MLLT transform + # to train the diag-UBM on top of. We use --num-iters 13 because after we get + # the transform (12th iter is the last), any further training is pointless. + # this decision is based on fisher_english + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ + --splice-opts "--left-context=3 --right-context=3" \ + --boost-silence $boost_sil \ + $numLeavesMLLT $numGaussMLLT data/${train_set}_hires \ + data/langp/tri5_ali exp/tri5_ali_sp_mc exp/nnet3_multicondition/tri3b +fi + +if [ $stage -le 9 ]; then + # To train a diagonal UBM we don't need very much data, so use the smallest subset. + steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \ + data/${train_set}_hires 512 exp/nnet3_multicondition/tri3b exp/nnet3_multicondition/diag_ubm +fi + +if [ $stage -le 10 ]; then + # iVector extractors can be sensitive to the amount of data, but this one has a + # fairly small dim (defaults to 100) so we don't use all of it, we use just the + # 100k subset (just under half the data). + steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ + data/${train_set}_hires exp/nnet3_multicondition/diag_ubm exp/nnet3_multicondition/extractor || exit 1; +fi + +if [ $stage -le 11 ]; then + # We extract iVectors on all the train_nodup data, which will be what we + # train the system on. + + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ + data/${train_set}_max2_hires exp/nnet3_multicondition/extractor exp/nnet3_multicondition/ivectors_$train_set || exit 1; + +fi + +exit 0; diff --git a/egs/babel/s5d/local/nnet3/run_lstm.sh b/egs/babel/s5d/local/nnet3/run_lstm.sh new file mode 100755 index 00000000000..8105cfda387 --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_lstm.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +# Copyright 2015 Johns Hopkins University (Author: Daniel Povey). +# 2015 Vijayaditya Peddinti +# 2015 Xingyu Na +# 2015 Pegah Ghahrmani +# 2016 Xiaohui Zhang +# Apache 2.0. + + +# this is a basic lstm script +# LSTM script runs for more epochs than the TDNN script +# and each epoch takes twice the time + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call lstm/train.sh with --gpu false + +stage=0 +train_stage=-10 +has_fisher=true +affix= +speed_perturb=true +multicondition=true +common_egs_dir= +reporting_email= + +# LSTM options +splice_indexes="-2,-1,0,1,2 0 0" +lstm_delay=" -1 -2 -3 " +label_delay=5 +num_lstm_layers=3 +cell_dim=1024 +hidden_dim=1024 +recurrent_projection_dim=256 +non_recurrent_projection_dim=256 +chunk_width=20 +chunk_left_context=40 +chunk_right_context=0 + + +# training options +num_epochs=8 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=2 +num_jobs_final=6 +momentum=0.5 +num_chunk_per_minibatch=100 +samples_per_iter=20000 +remove_egs=true + +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh +. ./cmd.sh + +if ! cuda-compiled; then + cat < transcript2"; +$noise_word = shift @ARGV; + +while() { + $_ =~ m:^(\S+) (.+): || die "bad line $_"; + $utt = $1; + $trans = $2; + print "$utt"; + foreach $w (split (" ",$trans)) { + $w =~ tr:a-z:A-Z:; # Upcase everything to match the CMU dictionary. . + $w =~ s:\\::g; # Remove backslashes. We don't need the quoting. + $w =~ s:^\%PERCENT$:PERCENT:; # Normalization for Nov'93 test transcripts. + $w =~ s:^\.POINT$:POINT:; # Normalization for Nov'93 test transcripts. + if($w =~ m:^\[\<\w+\]$: || # E.g. [\]$: || # E.g. [door_slam>], this means a door slammed in the next word. Delete. + $w =~ m:\[\w+/\]$: || # E.g. [phone_ring/], which indicates the start of this phenomenon. + $w =~ m:\[\/\w+]$: || # E.g. [/phone_ring], which indicates the end of this phenomenon. + $w eq "~" || # This is used to indicate truncation of an utterance. Not a word. + $w eq ".") { # "." is used to indicate a pause. Silence is optional anyway so not much + # point including this in the transcript. + next; # we won't print this word. + } elsif($w =~ m:\[\w+\]:) { # Other noises, e.g. [loud_breath]. + print " $noise_word"; + } elsif($w =~ m:^\<([\w\']+)\>$:) { + # e.g. replace with and. (the <> means verbal deletion of a word).. but it's pronounced. + print " $1"; + } elsif($w eq "--DASH") { + print " -DASH"; # This is a common issue; the CMU dictionary has it as -DASH. +# } elsif($w =~ m:(.+)\-DASH$:) { # E.g. INCORPORATED-DASH... seems the DASH gets combined with previous word +# print " $1 -DASH"; + } else { + print " $w"; + } + } + print "\n"; +} diff --git a/egs/babel/s5d/local/optimize/OptimizeParams.pm b/egs/babel/s5d/local/optimize/OptimizeParams.pm new file mode 100644 index 00000000000..d9fb3647ddd --- /dev/null +++ b/egs/babel/s5d/local/optimize/OptimizeParams.pm @@ -0,0 +1,631 @@ +# Author: Jason Eisner, Univ. of Pennsylvania +# +# $Revision: 3.11 $ of $Date: 2006/04/12 08:53:23 $ + +# !!! should add root-finding methods with derivative (newton-raphson: +# use rtsafe, section 9.4) and in multiple dimensions (sections 9.5, 9.6). + +package OptimizeParams; +use strict; + +BEGIN { + use Exporter (); + use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); + $VERSION = do { my @r = (q$Revision: 3.11 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # must be all one line, for MakeMaker + + @ISA = qw(Exporter); + @EXPORT_OK = qw(&powell &easybrent &easydbrent &easyzbrent + &mnbrak &brent &dbrent &zbrent + $machine_epsilon $inf &basisvectors); +} + +# A sample program with simple examples on a one-dimensional function. +# +# #!/usr/local/bin/perl5 -w +# +# use OptimizeParams qw(&powell &easybrent &easydbrent &zbrent); +# use strict 'vars'; +# +# sub f { sin(($_[0]-12.34567)/8)**2-0.5 } # function +# sub df { sin(2*($_[0]-12.34567)/8)/8 } # derivative +# sub fdf { my($temp)=($_[0]-12.34567)/8; # (function, derivative) computed at one go +# (sin($temp)**2-0.5, sin(2*$temp)/8) } +# +# # Three ways to find (x,f(x)) at minimum of function, namely (12.34567,-0.5) +# print join(" ",easybrent(0,1,\&f)), "\n"; +# print join(" ",easydbrent(0,1,\&f,\&df)), "\n"; +# print join(" ",easydbrent(0,1,\&fdf)), "\n"; +# +# # A fourth way, using a multidimensional optimizer even though f happens +# # to be 1-dimensional. The vector [0] is our starting guess. +# my($xvec,$fx) = powell(\&f,[0]); +# print join(" ",@$xvec,$fx), "\n"; +# +# # Find zero of function, namely 6.06 +# my($x)=zbrent(\&f,0,13); print $x," ",&f($x),"\n"; + +# ---------------------------------------------------------------------- + +use vars @EXPORT_OK; +$inf=exp(1e307); # could just use the bareword inf, which seems to work but generates warnings with -w +$machine_epsilon = 1; $machine_epsilon /= 2 while 1 + $machine_epsilon/2 > 1; + +sub FMAX { # (maximum) + $_[0] > $_[1] ? $_[0] : $_[1]; +} + +sub SIGN { + $_[1] >= 0 ? abs($_[0]) : -abs($_[0]); +} + + +# Direction Set (Powell's) Methods in Multidimensions +# From Numerical Recipes in C, Section 10.5, p. 417ff. Ported to Perl. +# +# Minimization of a function of n variables [for which the gradient is +# not known]. Required arguments are (a reference to) the function +# and (a reference to) a length-n vector holding the coordinates of +# the starting point. Optional arguments are a fractional tolerance in +# the output value (used as a stopping criterion), a fractional +# tolerance in the input value (used as a stopping criterion on +# one-dimensional searches), and (a reference to) a list of n +# (references to) such vectors, holding an initial set of directions. +# Return values are a reference to a vector holding the coordinates at +# the minimum; the value of the function at that minimum; the number +# of iterations taken; and the final set of directions. +# +# This Perl version has a few different representational conventions. +# It's now the ROWS of $xi (not the columns) that hold the direction vectors. +# And the coordinates are 0-indexed, not 1-indexed. +# The $itol argument is new. + +sub powell { + my($funcref,$p,$ftol,$iftol,$xi) = @_; + my($n) = scalar @$p; # Number of dimensions. + my($ITMAX)=200; # Maximum allowed iterations. + + # Defaults for optional arguments + $ftol = $machine_epsilon unless defined $ftol; + $iftol = 2.0e-4 unless defined $iftol; # in the C version, this is TOL (defined at linmin) + $xi = &basisvectors($n) unless (defined $xi); + + my($fret) = &$funcref(@$p); + my(@pt) = @$p; # Save the initial point. + my($iter); + for($iter=1;;++$iter) { + my($fp) = $fret; + my($ibig) = 0; + my($del) = 0; # Will be the biggest function decrease. + my($i); + for ($i=0;$i<$n;$i++) { # In each iteration, loop over all directions in the set. + my($xit) = \@{$xi->[$i]}; # Copy the direction, + my($fptt) = $fret; + $fret = &linmin($p,$xit,$funcref,$iftol); # minimize along it, + if (abs($fptt-$fret) > $del) { # and record it if it is the largest decrease so far. + $del=abs($fptt-$fret); + $ibig=$i; + } + } + if (2*abs($fp-$fret) <= $ftol*(abs($fp)+abs($fret))) { # Termination criterion. + return($p,$fret,$iter,$xi); + } + die "$0: powell exceeding maximum of $ITMAX iterations" if ($iter==$ITMAX); + + { + my($xit); + my(@ptt); + my($j); + for ($j=0;$j<$n;$j++) { # Construct the extrapolated point and the average direction moved. Save the old starting point. + $ptt[$j] = 2*$p->[$j] - $pt[$j]; + $xit->[$j] = $p->[$j] - $pt[$j]; + $pt[$j] = $p->[$j]; + } + my($fptt) = &$funcref(@ptt); + if ($fptt < $fp) { + my($t) = 2 * ($fp-2*$fret+$fptt) * ($fp-$fret-$del)**2 - $del*($fp-$fptt)**2; + if ($t < 0) { + $fret = &linmin($p,$xit,$funcref); + $xi->[$ibig] = $xi->[$n-1]; + $xi->[$n-1] = $xit; + } + } + } + } # Back for another iteration + + die "$0: internal error in powell: should never have reached this line"; +} + +sub basisvectors { # returns the basis vectors in the given dimension (a reference to a list of references to lists) + my($n) = @_; + my($vects); + my($i,$j); + for ($i=0;$i<$n;$i++) { + for ($j=0;$j<$n;$j++) { + $vects->[$i][$j] = ($i==$j ? 1 : 0); + } + } + return $vects; +} + + + +{ + my($ncom); # "Global" variables for linmin to communicate with f1dim. + my(@pcom, @xicom, $nrfuncref); + + # Routine called by powell. + # From Numerical Recipes in C, Section 10.5, p. 419. Ported to Perl. + # + # Given an n-dimensional point $p and an n-dimensional direction + # vector $xi (both references to lists), moves and resets $p to + # where the function $funcref takes on a minimum along the direction + # $xi from $p, and replaces $xi by the actual vector displacement that + # $p was moved. Returns the value of $funcref at $p. This is actually + # all accomplished by calling the routines mnbrak and brent. + # $iftol is a tolerance on the input value, passed to brent. + + sub linmin { + my($p,$xi,$funcref,$iftol) = @_; + + print STDERR "$0: linmin: searching from (",join(", ",@$p),") in direction (",join(", ",@$xi),")\n"; + + $ncom = @$p; # Define the global variables. + $nrfuncref = $funcref; + @pcom = @$p; + @xicom = @$xi; + + my($ax) = 0; # Initial guess for brackets. + my($xx) = 1; + my($bx); + ($ax,$xx,$bx) = &mnbrak($ax,$xx,\&f1dim); + my($xmin,$fret) = &brent($ax,$xx,$bx,\&f1dim,$iftol); + my($j); + for ($j=0;$j<$ncom;$j++) { + $p->[$j] += ($xi->[$j] *= $xmin); + } + return $fret; + } + + # Function minimized by linmin. + + sub f1dim { + my($x) = @_; + my(@xt); + my($j); + for($j=0; $j<$ncom;$j++) { + $xt[$j] = $pcom[$j] + $x * $xicom[$j]; + } + return &$nrfuncref(@xt); + } +} + + + +# Easy way to call mnbrak and brent together in order to minimize +# a function. +# +# ax and bx are any distinct points; we'll look for a minimum in the +# downhill direction on the line through (ax,f(ax)) and (bx,f(bx)). +# +# Return value is the same as brent, namely (x,f(x)). But we might +# fail to find a minimum! If the function never increases again so +# far as we can tell -- it plateaus, or decreases toward infinity, or +# increases in a range that mnbrak doesn't sample -- then we'll return +# (+/-inf, minimum value we found). Here the +/- is according to +# which direction we searched in, and the minimum value is f(x) for +# the last finite x we considered; this value may or may not be +# finite, but should indicate the asymptotic behavior of the function. +# +# Just as in brent, the tolerance $tol can be omitted. + +sub easybrent { + my($ax,$bx,$funcref,$tol) = @_; + my($newa,$newb,$newc,$fa,$fb,$fc) = &mnbrak($ax,$bx,$funcref); + return ($newc,$fb) if ($newc==$inf || $newc==-$inf); + &brent($newa,$newb,$newc,$funcref,$tol); +} + +# Easy way to call mnbrak and dbrent together in order to minimize +# a function whose derivative is known. +# ax and bx are any distinct points; we'll look for a minimum in the +# downhill direction on the line through (ax,f(ax)) and (bx,f(bx)). +# +# See easybrent for return value convention when we fail. +# +# Just as in dbrent, the tolerance $tol can be omitted. So can +# $dfuncref, if $funcref returns a pair of values -- both the function +# and its derivative. + +sub easydbrent { + my($ax,$bx,$funcref,$dfuncref,$tol) = @_; + my($newa,$newb,$newc,$fa,$fb,$fc) = &mnbrak($ax,$bx,$funcref); + return ($newc,$fb) if ($newc==$inf || $newc==-$inf); + &dbrent($newa,$newb,$newc,$funcref,$dfuncref,$tol); + # If we want to check output against brent: + # my(@ans1)=&dbrent($newa,$newb,$newc,$funcref,$dfuncref); + # my(@ans2)=&brent($newa,$newb,$newc,$funcref); + # die "dbrent $ans1[0], brent $ans2[0]\n" unless &main::near($ans1[0]+1e6,$ans2[0]+1e6); + # @ans1; +} + +# Easy way to TRY to bracket a root and then call zbrent to find the +# root. The calling convention is similar to easybrent: we are given +# two starting points. If they have different signs, we just call +# zbrent. If they have the same sign and are both positive, we search +# in the downhill direction for a negative value (using mnbrak +# together with a modified golden-section minimizer (section 10.1) +# that stops as soon as it crosses zero). Similarly, if they have the +# same sign and are both positive, we search uphill for a positive +# value. + +sub easyzbrent { + my($ax,$bx,$funcref) = @_; + die "Not implemented yet; must call zbrent directly" +} + + +# Parabolic Interpolation and Brent's Method in one dimension +# From Numerical Recipes in C, Section 10.2, p. 404. Ported to Perl. +# +# Given a continuous function of one variable referenced by $funcref, +# and given a bracketing triplet of abcissas $ax, $bx, $cx as returned +# by mnbrak, this routine isolates the minimum to a fractional +# precision of about $tol using Brent's method. Returns (x, f(x)) at +# the minimum. $tol is set to a good default if omitted. +# +# See easybrent for an easier way to call this. + +sub brent { + my($ax, $bx, $cx, $funcref, $tol) = @_; + $tol = sqrt($machine_epsilon) unless defined $tol; + my($e) = 0.0; # This will be the distance moved on the step before last. + my($ITMAX) = 100; # The maximum allowed number of iterations. + my($CGOLD) = 0.3819660; # The golden ratio. [Actually, 1-golden ratio.] + my($ZEPS) = 1.0e-10; + + my($a) =($ax < $cx ? $ax : $cx); # a and b must be in ascending order, but input abscissas need not be. + my($b) =($ax > $cx ? $ax : $cx); + my($x,$w,$v); $x=$w=$v=$bx; # Initializations ... + die "brent: inputs out of order\n" unless $a < $x && $x < $b; # probably should also check f(x) < f(a),f(b) + my($fw,$fv,$fx); ($fw)=($fv)=($fx)=&$funcref($x); + my($d,$u,$fu); + + my($iter); + for ($iter=1; $iter<=$ITMAX; $iter++) { # Main program loop. + my($xm) = 0.5*($a+$b); + my($tol1)=$tol*abs($x)+$ZEPS; + my($tol2)=2.0*$tol1; + return ($x,$fx) if (abs($x-$xm) <= ($tol2-0.5*($b-$a))); # Test for done here. + if (abs($e) > $tol1) { # Construct a trial parabolic fit. + my($r) = ($x-$w)*($fx-$fv); + my($q) = ($x-$v)*($fx-$fw); + my($p) = ($x-$v)*$q - ($x-$w)*$r; + $q=2.0*($q-$r); + $p = -$p if $q > 0; + $q = abs($q); + my($etemp)=$e; + $e=$d; + if (abs($p) >= abs(0.5*$q*$etemp) || $p <= $q*($a-$x) || $p >= $q*($b-$x)) { + $d = $CGOLD*($e = ($x >= $xm ? $a-$x : $b-$x)); + } + # The above conditions determine the acceptability of the parabolic + # fit. Here we take the golden section step into the larger of the two + # segments. + else { + $d=$p/$q; # Take the parabolic step. + $u=$x+$d; + $d = &SIGN($tol1,$xm-$x) if ($u-$a < $tol2 || $b-$u < $tol2); + } + } else { + $d=$CGOLD*($e=($x >= $xm ? $a-$x : $b-$x)); + } + $u = (abs($d) >= $tol1 ? $x+$d : $x+&SIGN($tol1,$d)); + ($fu) = &$funcref($u); # This is the one function evaluation per iteration. + if ($fu <= $fx) { # Now decide what to do with our function evaluation. + ($u >= $x ? $a : $b) = $x; + ($v, $w, $x) = ($w, $x, $u); # Housekeeping follows: + ($fv, $fw, $fx) = ($fw, $fx, $fu); + } else { + ($u < $x ? $a : $b) = $u; + if ($fu <= $fw || $w == $x) { + $v=$w; + $w=$u; + $fv=$fw; + $fw=$fu; + } elsif ($fu <= $fv || $v == $x || $v == $w) { + $v = $u; + $fv = $fu; + } + } # Done with housekeeping. Back for another iteration. + } + die "$0: brent: Maximum number of iterations ($ITMAX) exceeded"; +} + +# One-Dimensional Search with First Derivatives +# From Numerical Recipes in C, Section 10.3, p. 405. Ported to Perl. +# +# Given a continuous function of one variable referenced by $funcref, +# and its derivative referenced by $dfuncref, and given a bracketing +# triplet of abcissas $ax, $bx, $cx as returned by mnbrak, this +# routine isolates the minimum to a fractional precision of about $tol +# using a modification of Brent's method that uses derivatives. +# Returns (x, f(x)) at the minimum. $tol is set to a good default if +# omitted. +# +# See easydbrent for an easier way to call this. + +sub dbrent { + my($ax, $bx, $cx, $funcref, $dfuncref, $tol) = @_; + $tol = sqrt($machine_epsilon) unless defined $tol; + + my($e) = 0.0; # This will be the distance moved on the step before last. + my($ITMAX) = 100; # The maximum allowed number of iterations. + my($ZEPS) = 1.0e-10; + + my($a) =($ax < $cx ? $ax : $cx); # a and b must be in ascending order, but input abscissas need not be. + my($b) =($ax > $cx ? $ax : $cx); + my($w,$v,$x,$u); $w=$v=$x=$bx; # Initializations ... + die "dbrent: inputs out of order\n" unless $a < $x && $x < $b; # probably should also check f(x) < f(a),f(b) + my($fx,$dx)=&$funcref($x); + $dx=&$dfuncref($x) unless defined $dx; # if $funcref only returned one value in previous line + my($fw,$fv,$fu); $fw=$fv=$fx; + my($dw,$dv,$du); $dw=$dv=$dx; # All our housekeeping chores are doubled by the necessity of moving derivative values around as well as function values. + my($d); + + my($iter); + for ($iter=1; $iter<=$ITMAX; $iter++) { # Main program loop. + my($xm) = 0.5*($a+$b); + my($tol1)=$tol*abs($x)+$ZEPS; + my($tol2)=2.0*$tol1; + # print "a $a b $b x $x xm $xm\n"; + return ($x,$fx) if (abs($x-$xm) <= ($tol2-0.5*($b-$a))); # Test for done here. + if (abs($e) > $tol1) { # Construct a trial parabolic fit. + my($d1)=2.0*($b-$a); # Initialize these d's to an out-of-bracket value + my($d2)=$d1; + $d1 = ($w-$x)*$dx/($dx-$dw) if ($dw != $dx); # Secant method with one point. + $d2 = ($v-$x)*$dx/($dx-$dv) if ($dv != $dx); # And the other. + # Which of these two estimates of d shall we take? + # We will insist that they be within the bracket, and on + # the side pointed to by the derivative at x: + my($u1)=$x+$d1; + my($u2)=$x+$d2; + my($ok1) = ($a-$u1)*($u1-$b) > 0 && $dx*$d1 <= 0; + my($ok2) = ($a-$u2)*($u2-$b) > 0 && $dx*$d2 <= 0; + my($olde) = $e; # Movement on the step before last. + $e = $d; + if ($ok1 || $ok2) { # Take only an acceptable d, and if both are acceptable, then take the smallest one. + if ($ok1 && $ok2) { + $d=(abs($d1) < abs($d2) ? $d1 : $d2); + } elsif ($ok1) { + $d=$d1; + } else { + $d=$d2; + } + if (abs($d) <= abs(0.5*$olde)) { + $u=$x+$d; + $d=&SIGN($tol1,$xm-$x) if ($u-$a < $tol2 || $b-$u < $tol2); + } else { # Bisect, not golden section. + $d=0.5*($e=($dx >= 0 ? $a-$x : $b-$x)); # Decide which segment by the sign of the derivative. + } + } else { + $d=0.5*($e=($dx >= 0 ? $a-$x : $b-$x)); + } + } else { + $d=0.5*($e=($dx >= 0 ? $a-$x : $b-$x)); + } + if (abs($d) >= $tol1) { + $u=$x+$d; + ($fu,$du)=&$funcref($u); + } else { + $u=$x+&SIGN($tol1,$d); + ($fu,$du)=&$funcref($u); + return ($x,$fx) if ($fu > $fx); # If the minimum step in the downhill direction takes us uphill, then we are done. + } + # Now all the housekeeping, sigh. + $du=&$dfuncref($u) unless defined $du; # if $funcref only returned one value just above + if ($fu <= $fx) { + ($u >= $x ? $a : $b) = $x; + ($v,$fv,$dv)=($w,$fw,$dw); + ($w,$fw,$dw)=($x,$fx,$dx); + ($x,$fx,$dx)=($u,$fu,$du); + } else { + ($u < $x ? $a : $b) = $u; + if ($fu <= $fw || $w==$x) { + ($v,$fv,$dv)=($w,$fw,$dw); + ($w,$fw,$dw)=($u,$fu,$du); + } elsif ($fu < $fv || $v == $x || $v == $w) { + ($v,$fv,$dv)=($u,$fu,$du); + } + } + } + die "$0: dbrent: Maximum number of iterations ($ITMAX) exceeded\n"; + # Alternative: + # warn "$0: dbrent: Maximum number of iterations ($ITMAX) exceeded. Trying brent ...\n"; + # &brent($ax,$bx,$cx,$funcref,$tol); +} + + +# Routine for Initially Bracketing a Minimum. +# From Numerical Recipes in C, Section 10.1, p. 400. Ported to Perl. +# +# Given a continuous function referenced by $funcref, and distinct +# initial points $ax and $bx, this routine searches in the downhill +# direction (defined by the function as evaluated at the initial +# points) and returns new points $ax, $bx, $cx that bracket a minimum +# of the function [in the sense that b is between a and c, and f(b) is +# less than both f(a) and f(c)]. Also returned are the function values +# at the three points, $fa, $fb, and $fc. +# +# JME: If $cx is +inf (resp. -inf), this means that we searched in the +# positive (resp. negative) direction and the function just decreased +# forever (either to a plateau or without bound - look at $fb to see +# the last finite value). At least, it decreased at all the points +# where we sampled it - we might have skipped right over a spike. So +# either there is no minimum in the direction we searched, or we +# missed it; in either case our return values won't bracket any minimum +# and the caller should either give up or try something else! +# +# JME: Note that it's also possible that $cx remains finite, but that +# the minimum $fb that we bracket is -$inf (and typically $fc will be +# -$inf too). +# +# JME: f(b) is now required to be STRICTLY less than f(a) and f(c). +# This avoids counting an "extended" point of inflection as a minimum. +# I imagine the minimization routines would nonetheless be willing to +# find such if it's in the interval (should check...), but requiring +# us to search past it here is important for the previous paragraph: +# if the function value is eventually -inf forever due to overflow, we +# still keep searching forever until the abcissa is also +/- inf, +# rather than saying we've hit a plateau and that's enough to stop. +# +# It's ok if &$funcref returns multiple values; we'll evaluate it in +# list context and use only the first value. This is useful because +# of the calling convention for dbrent; e.g., easydbrent relies on it. + +sub mnbrak { + my($ax, $bx, $funcref) = @_; + my($GOLD) = 1.618034; + my($GLIMIT) = 100.0; + my($TINY) = 1.0e-20; + + die "mnbrak: $ax and $bx must be different\n" if $ax==$bx; # JME: added + my($fa) = &$funcref($ax); + my($fb) = &$funcref($bx); + if ($fb > $fa) { + # Switch roles of a and b so that we can go downhill in the direction + # from a to b. + ($ax, $bx) = ($bx, $ax); + ($fa, $fb) = ($fb, $fa); + } + + my($cx) = $bx + $GOLD*($bx-$ax); # First guess for c. + my($fc) = &$funcref($cx); + + # Keep looping here until we bracket. + while ($fb >= $fc && $cx != $inf && $cx != -$inf) { # JME: added the inf tests, and changed >= to > to make sure we keep searching all the way to inf if necessary in order to get $ax $bx $cx strictly in order + # print("ax $ax bx $bx cx $cx // fa $fa fb $fb fc $fc\n"), + + # Compute u by parabolic extrapolation from a, b, c. + # $TINY is used to prevent any possible division by zero. + my($r) = ($bx-$ax)*($fb-$fc); + my($q) = ($bx-$cx)*($fb-$fa); + my($u) = $bx -(($bx-$cx)*$q - ($bx-$ax)*$r)/(2.0*&SIGN(&FMAX(abs($q-$r),$TINY),$q-$r)); + my($ulim) = $bx + $GLIMIT*($cx-$bx); + my($fu); + # We won't go farther than this. Test various possibilities: + if (($bx - $u)*($u - $cx) > 0) { # Parabolic u is (strictly) between b and c: try it. + ($fu) = &$funcref($u); + if ($fu < $fc) { # Got a minimum between b and c. + ($ax,$bx) = ($bx,$u); + ($fa,$fb) = ($fb,$fu); + return($ax, $bx, $cx, $fa, $fb, $fc) if ($ax-$bx)*($bx-$cx)>0 && $fb < $fa && $fb < $fc; + die "mnbrak: oops, trying to return $ax $bx $cx out of order, or else middle value of $fa $fb $fc is not smallest\n"; + } elsif ($fu > $fb) { # Got a minimum between a and u. + $cx = $u; + $fc = $fu; + return($ax, $bx, $cx, $fa, $fb, $fc) if ($ax-$bx)*($bx-$cx)>0 && $fb < $fa && $fb < $fc; + die "mnbrak: oops, trying to return $ax $bx $cx out of order, or else middle value of $fa $fb $fc is not smallest\n"; + } + $u = $cx + $GOLD*($cx-$bx); # Parabolic fit was no use. Use default magnification. + ($fu) = &$funcref($u); + } elsif (($cx-$u)*($u-$ulim) > 0) { # Parabolic fit is between c and its allowed limit + ($fu) = &$funcref($u); + if ($fu < $fc) { + ($bx, $cx, $u) = ($cx, $u, $u+$GOLD*($u-$cx)); # JME: formerly $cx+$GOLD*($cx-$bx), but that seems to have been a bug since the new u might not be beyond the new cx. + ($fb, $fc, $fu) = ($fc, $fu, &$funcref($u)); + } + } elsif (($u-$ulim)*($ulim-$cx) > 0) { # Limit parabolic u to maximum allowed value. JME: Changed >= to > so that we are guaranteed $u > $cx strictly. See comment at top of loop. + $u=$ulim; + ($fu) = &$funcref($u); + } else { # Reject parabolic u, use default magnification. + $u=$cx+$GOLD*($cx-$bx); + ($fu)=&$funcref($u); + } + ($ax,$bx,$cx) = ($bx,$cx,$u); # Eliminate oldest point and continue. + ($fa,$fb,$fc) = ($fb,$fc,$fu); + } + return($ax, $bx, $cx, $fa, $fb, $fc) if ($ax-$bx)*($bx-$cx)>0 && $fb <= $fa && ($fb <= $fc || $cx==$inf || $cx==-$inf); + die "mnbrak: oops, trying to return $ax $bx $cx out of order, or else middle value of $fa $fb $fc is not smallest but we didn't run into infinity with cx=$fc\n"; +} + + +# Using the Van Wijngaarden-Dekker-Brent method, find the root of a +# function f (referenced by $funcref) between x1 and x2, where f(x1) +# and f(x2) must have different signs. The root will be refined until +# its accuracy is $tol (which defaults to the machine epsilon if +# omitted). +# +# See easyzbrent for a sometimes easier way to call this. + +sub zbrent { + my($funcref, $x1, $x2, $tol) = @_; + $tol = $machine_epsilon unless defined $tol; + + my($ITMAX) = 100; # The maximum allowed number of iterations. + my($EPS) = $machine_epsilon; # Machine floating-point precision. (Defined as 3.0e-8 in C version.) + + my($a,$b,$c)=($x1,$x2,$x2); + my($d,$e,$min1,$min2); + my($fa,$fb) = (&$funcref($a), &$funcref($b)); + my($p,$q,$r,$s,$tol1,$xm); + + die "zbrent: root must be bracketed between x1=$x1 and x2=$x2, but f(x1)=$fa, f(x2)=$fb" if $fb*$fa > 0; + + my($fc)=$fb; + my($iter); + for ($iter=1;$iter<=$ITMAX;$iter++) { + if ($fb*$fc > 0) { + $c=$a; # Rename a, b, c and adjust bounding interval d. + $fc=$fa; + $e=$d=$b-$a; + } + if (abs($fc) < abs($fb)) { + $a=$b; + $b=$c; + $c=$a; + $fa=$fb; + $fb=$fc; + $fc=$fa; + } + $tol1=2*$EPS*abs($b)+0.5*$tol; # Convergence check. + $xm=0.5*($c-$b); + return $b if (abs($xm) <= $tol1 || $fb == 0); + if (abs($e) >= $tol1 && abs($fa) > abs($fb)) { + $s=$fb/$fa; # Attempt inverse quadratic interpolation. + if ($a == $c) { + $p=2*$xm*$s; + $q=1-$s; + } else { + $q=$fa/$fc; + $r=$fb/$fc; + $p=$s*(2*$xm*$q*($q-$r)-($b-$a)*($r-1)); + $q=($q-1)*($r-1)*($s-1); + } + $q = -$q if ($p > 0); # Check whether in bounds. + $p=abs($p); + $min1=3*$xm*$q-abs($tol1*$q); + $min2=abs($e*$q); + if (2*$p < ($min1 < $min2 ? $min1 : $min2)) { + $e=$d; # Accept interpolation. + $d=$p/$q; + } else { + $d=$xm; # Interpolation failed, use bisection. + $e=$d; + } + } else { # Bounds decreasing too slowly, use bisection. + $d=$xm; + $e=$d; + } + $a=$b; # Move last best guess to $a. + $fa=$fb; + if (abs($d) > $tol1) { # Evaluate new trial root. + $b += $d; + } else { + $b += ($xm > 0 ? abs($tol1) : -abs($tol1)); + } + $fb=&$funcref($b); + } + die "$0: zbrent: Maximum number of iterations ($ITMAX) exceeded"; +} + +1; diff --git a/egs/babel/s5d/local/optimize2.pl b/egs/babel/s5d/local/optimize2.pl new file mode 100755 index 00000000000..ead70129ec8 --- /dev/null +++ b/egs/babel/s5d/local/optimize2.pl @@ -0,0 +1,152 @@ +#!/usr/bin/env perl +use strict; +use warnings; + +use FindBin; +use lib "$FindBin::RealBin/optimize/"; +use OptimizeParams qw(&powell &easybrent &easydbrent &zbrent); +use Data::Dumper; +use Scalar::Util qw(looks_like_number); + +use 5.010; + +my @cmd_array = (); +my %opts = (); +my $output_dir = ""; +my $result_regexp = "(.*)"; +my $cmd; +my $ftol = 3e-2; +my $iftol = 1e-1; + +while (@ARGV) { + my $parm = shift @ARGV; + if ($parm eq "--var") { + my $var = shift; + die "$0: The variable $var does not contain starting value" unless $var =~ /.*=.*/; + my @F = split "=", $var; + die "$0: The variable $var has more than one assignments" unless @F == 2; + die "$0: Multiple varable $F[0] definition" if defined $opts{$F[0]}; + $opts{$F[0]} = $F[1]; + } elsif ($parm eq "--output-dir") { + $output_dir = shift; + } elsif ($parm eq "--ftol") { + $ftol = shift; + die "$0: ftol parameter has to be a floating-point number" unless looks_like_number($ftol); + } elsif ($parm eq "--iftol") { + $iftol = shift; + die "$0: iftol parameter has to be a floating-point number" unless looks_like_number($ftol); + } elsif ($parm eq "--result-regexp") { + $result_regexp = shift; + } else { + push @cmd_array, $parm; + while (@ARGV) { + push @cmd_array, shift @ARGV; + } + } + +} + + +sub substitute { + my $cmd_proto = $_[0]; + my %valhash = %{$_[1]}; + + + my $cmd_out = $cmd_proto; + + foreach my $key (keys %valhash) { + #print $elem . "($key, " . $valhash{$key}. ")->"; + my $prev_cmd_out = $cmd_out; + $cmd_out =~ s/\b$key\b/$valhash{$key}/g; + die "$0: The variable $key is not used in the command." if $prev_cmd_out eq $cmd_out; + #print $elem . "\n"; + } + + return $cmd_out; +} + +sub f { + state $iter = 0; + my @params = @_; + my $i = 0; + + my %curr_opts; + foreach my $v (sort keys %opts) { + $curr_opts{$v} = abs($params[$i]); + $i += 1; + } + + my $result; + my $k = join(" ", substitute( $cmd, \%curr_opts)); + print "$0: Debug: $k\n"; + open(my $fh, '-|', "(set -e -o pipefail; $k) 2>&1") or die $!; + while (my $line=<$fh>) { + print $line; + chomp $line; + if ($line =~ /$result_regexp/) { + print "$0: Line $line matches the regexp \"$result_regexp\"\n"; + $result = $line; + $result =~ s/$result_regexp/$1/g; + } + } + close($fh) or die "$0: The command didn't finish successfully: $!\n"; + + my $exit = $? >> 8; + if ( $exit != 0) { + die "$0: The command return status indicates failure: $exit\n"; + } + + if (not defined $result) { + die "$0: Matching the regexp on the command output regexp didn't yield any results"; + } + print "$0: Iteration $iter: " . join(" ", "[", @params, "] =>", $result) . "\n"; + + $iter += 1; + return -1.0 * $result+0.0; +} + + +print "$0: Optimizing with " . join(" ", %opts) . "\n"; +#print Dumper(\@cmd_array); + +$cmd = join(" ", @cmd_array); + +die "$0: Empty command \"$cmd\"" unless $cmd; +die "$0: Empty command \"$cmd\"" if $cmd =~ /^\s*$/; + +my @params; +foreach my $key (sort keys %opts) { + push @params, $opts{$key}; +} + +#my($xvec,$fx) = (\@params, 1); +my($xvec,$fx) = powell(\&f,\@params, $ftol, $iftol); +print "$0: Optimization finished with: " . join(" ",@$xvec, -$fx), "\n"; + + +@params=@{$xvec}; +foreach my $v (sort keys %opts) { + $opts{$v} = abs(shift @params); +} +$cmd=substitute($cmd, \%opts); + +{ + open(my $param_file, "> $output_dir/params") || die "Cannot open file $output_dir/params: $!"; + print $param_file "$_=$opts{$_}\n" for (sort keys %opts); + print $param_file "criterion=", -$fx; + close($param_file); +} + +{ + open(my $param_file, "> $output_dir/command.sh"); + print $param_file "$cmd\n"; + close($param_file); +} + +{ + open(my $param_file, "> $output_dir/params.sh"); + print $param_file "declare -A params;\n"; + print $param_file "params[$_]=$opts{$_}\n" for (sort keys %opts); + close($param_file); +} + diff --git a/egs/babel/s5d/local/prepare_acoustic_training_data.pl b/egs/babel/s5d/local/prepare_acoustic_training_data.pl new file mode 100755 index 00000000000..bc7c2812831 --- /dev/null +++ b/egs/babel/s5d/local/prepare_acoustic_training_data.pl @@ -0,0 +1,484 @@ +#!/usr/bin/env perl +use Getopt::Long; + +######################################################################## +# +# Script to prepare the Babel acoustic training data for Kaldi. +# +# - Place transcripts in a file named "text" +# Each line contains: utteranceID word1 word2 ... +# +# - Place the utterance-to-speaker map in a file named "utt2spk" +# Each line contains: utteranceID speakerID +# speakerID MUST BE be a prefix of the utteranceID +# Kaldi code does not require it, but some training scripts do. +# +# - Place the utterance-to-segment map in a file named "segments" +# Each line contains: utteranceID recordingID startTime endTime +# +# - Place the recordingID-to-waveformFile map in "wav.scp" +# Each line contains: recordingIB Input_pipe_for_reading_waveform| +# +# - Place the speaker-utterance map in a file named "spk2utt" +# Each line contains: speakerID utteranceID_1 utteranceID_2 ... +# This is the inverse of the utt2spk mapping +# +# Note 1: the utteranceIDs in the first 3 files must match exactly, and +# the recordingIDSs in the last 2 files must match exactly. +# +# Note 2: Babel data formats and file-naming conventions are assumed. +# +# - The transcriptions and waveforms are in subdirectories named +# audio/.sph +# transcription/.txt +# There is 1 pair of files per recording, with extensions as above +# +# - The audio is in NIST sphere format, so shp2pipe may be used, e.g. +# BABEL_BP_101_11694_20111204_205320_inLine \ +# /export/babel/sanjeev/kaldi-trunk/tools/sph2pipe_v2.5/sph2pipe \ +# -f wav -p -c 1 \ +# BABEL_BP_101_11694_20111204_205320_inLine.sph| +# +# - The filename contains speaker information, e.g. +# BABEL_BP_101_37210_20111102_170037_O1_scripted.sph -> 37210_A +# BABEL_BP_101_37210_20111102_172955_inLine.sph -> 37210_A +# BABEL_BP_101_37210_20111102_172955_outLine.sph -> 37210_B +# Specifically, the inLine speaker is the same as scripted +# +# - The transcription file has time marks in square brackets, e.g. +# [0.0] +# +# [7.05] +# 啊 听 听唔听到 啊 你 而家 仲未 上课 系 嘛 +# [14.07] +# +# - If a vocabulary is provided, map all OOV tokens to an OOV symbol, +# and write out an OOV list with counts to a file named "oovCounts" +# +# If one or more word-fragment markers are provided, this script +# checks if an OOV token can be made in-vocabulary by stripping off +# the markers one by one from either end of the token. +# +# The default settings are +# + $vocabFile = ""; # No vocab file; nothing is mapped to OOV + $OOV_symbol = ""; # Default OOV symbol + $fragMarkers = ""; # No characters are word-fragment markers +# +# - Babel transcriptions contain 4 kinds of untranscribed words +# +# (()) designates unintelligible words +# designates a word in another language +# designates a sequence of pre-recorded words +# designates two simultaneous foreground speakers +# +# This script maps them to OOV. They are not included in oovCounts +# +# - Babel transcriptions also contain a few non-linguistics tokens +# +# map to a vocal noise symbol +# map to a vocal noise symbol +# map to a vocal noise symbol +# map to a vocal noise symbol +# +# map to a nonvocal noise symbol +# map to a nonvocal noise symbol +# map to a nonvocal noise symbol +# map to a nonvocal noise symbol +# +# designates silence > 1 sec. +# + $vocalNoise = ""; + $nVoclNoise = ""; + $silence = ""; + $icu_transform=""; + $get_whole_transcripts = "false"; +# +######################################################################## + +print STDERR "$0 " . join(" ", @ARGV) . "\n"; +GetOptions("fragmentMarkers=s" => \$fragMarkers, + "oov=s" => \$OOV_symbol, + "vocab=s" => \$vocabFile, + "icu-transform=s" => \$icu_transform, + "get-whole-transcripts=s" => \$get_whole_transcripts + ); + +if ($#ARGV == 1) { + $inDir = $ARGV[0]; + $outDir = $ARGV[1]; + print STDERR ("$0: $inDir $outDir\n"); + if($vocabFile) { + print STDERR ("\tLimiting transcriptions to words in $vocabFile\n"); + print STDERR ("\tMapping OOV tokens to \"$OOV_symbol\"\n"); + print STDERR ("\tif they remain OOV even after removing [$fragMarkers] from either end\n") if ($fragMarkers); + } + print STDERR ("$0 ADVICE: Use full path for the Input Directory\n") unless ($inDir=~m:^/:); +} else { + print STDERR ("Usage: $0 [--options] InputDir OutputDir\n"); + print STDERR ("\t--vocab File containing the permitted vocabulary\n"); + print STDERR ("\t--oov Use this symbol for OOV words (default )\n"); + print STDERR ("\t--fragmentMarkers Remove these from ends of words to minimize OOVs (default none)\n"); + print STDERR ("\t--get-whole-transcripts (true|false) Do not remove utterances containing no speech\n"); + exit(1); +} + +######################################################################## +# Read and save the vocabulary and map anything not in the vocab +######################################################################## + +if ($vocabFile) { + open (VOCAB, $vocabFile) + || die "Unable to open vocabulary file $vocabFile"; + $numWords = 0; + while () { + next unless (m:^([^\s]+):); + $numWords++ unless (exists $inVocab{$1}); # Don't count word repetitions + $inVocab{$1} = 1; # commonly found in lexicons + } + close(VOCAB); + print STDERR ("Read $numWords unique words from $vocabFile\n"); +} + +######################################################################## +# First read segmentation information from all the transcription files +######################################################################## + +$TranscriptionDir = "$inDir/transcription"; +if (-d $TranscriptionDir) { + @TranscriptionFiles = `ls ${TranscriptionDir}/*.txt`; + if ($#TranscriptionFiles >= 0) { + printf STDERR ("$0: Found %d .txt files in $TranscriptionDir\n", ($#TranscriptionFiles +1)); + $numFiles = $numUtterances = $numWords = $numOOV = $numSilence = 0; + while ($filename = shift @TranscriptionFiles) { + $fileID = $filename; # To capture the base file name + $fileID =~ s:.+/::; # remove path prefix + $fileID =~ s:\.txt\s*$::; # remove file extension + # For each transcription file, extract and save segmentation data + $numUtterancesThisFile = 0; + $prevTimeMark = -1.0; + $text = ""; + if ( $icu_transform ) { + $inputspec="uconv -f utf8 -t utf8 -x \"$icu_transform\" $filename |"; + } else { + $inputspec=$filename; + } + open (TRANSCRIPT, $inputspec) || die "Unable to open $filename"; + while ($line=) { + chomp $line; + if ($line =~ m:^\s*\[([0-9]+\.*[0-9]*)\]\s*$:) { + $thisTimeMark = $1; + if ($thisTimeMark < $prevTimeMark) { + print STDERR ("$0 ERROR: Found segment with negative duration in $filename\n"); + print STDERR ("\tStart time = $prevTimeMark, End time = $thisTimeMark\n"); + print STDERR ("\tThis could be a sign of something seriously wrong!\n"); + print STDERR ("\tFix the file by hand or remove it from the directory, and retry.\n"); + exit(1); + } + if ($prevTimeMark<0) { + # Record the first timemark and continue + $prevTimeMark = $thisTimeMark; + next; + } + ################################################## + # Create an utteranceID using fileID & start time + # - Assume Babel file naming conventions + # - Remove prefix: program_phase_language + # - inLine = scripted = spkr A, outLine = B + # - Move A/B so that utteranceIDs sort by spkr + # - Assume utterance start time < 10000 sec. + ################################################## + $utteranceID = $fileID; + $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; + $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; + $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; + $utteranceID .= sprintf ("_%06i", (100*$prevTimeMark)); + ################################################## + # Then save segmentation, transcription, spkeaerID + ################################################## + if (exists $transcription{$utteranceID}) { + # utteranceIDs should be unique, but this one is not! + # Either time marks in the transcription file are bad, + # or something went wrong in generating the utteranceID + print STDERR ("$0 WARNING: Skipping duplicate utterance $utteranceID\n"); + } + elsif ($text eq "") { + # Could be due to text filtering done below + # Output information to STDOUT to enable > /dev/null + print STDOUT ("$0: Skipping empty transcription $utteranceID\n"); + } else { + $transcription{$utteranceID} = $text; + $startTime{$utteranceID} = $prevTimeMark; + $endTime{$utteranceID} = $thisTimeMark; + if ($utteranceID =~ m:([^_]+_[AB]).*:) { + $speakerID{$utteranceID} = $1; + } else { + # default: one speaker per audio file + $speakerID{$utteranceID} = $fileID; + } + $baseFileID{$utteranceID} = $fileID; + $numUtterancesThisFile++; + $numUtterances++; + $text = ""; + } + $prevTimeMark = $thisTimeMark; + } else { + @tokens = split(/\s+/, $line); + $text = ""; + while ($w = shift(@tokens)) { + # First, some Babel-specific transcription filtering + if (($w eq "")||($w eq "")||($w eq "")||($w eq "~")) { + next; + } elsif (($w eq "")||($w eq "")||($w eq "")||($w eq "")) { + $text .= " $vocalNoise"; + $numWords++; + } elsif (($w eq "")||($w eq "")||($w eq "")||($w eq "")){ + $text .= " $nVoclNoise"; + $numWords++; + } elsif (($w eq "(())")||($w eq "")||($w eq "")||($w eq "")) { + $text .= " $OOV_symbol"; + $oovCount{$w}++; + $numOOV++; + $numWords++; + } elsif ($w eq "") { + $text .= " $silence"; + $numSilence++; + } else { + # This is a just regular spoken word + if ($vocabFile && (! $inVocab{$w}) && $fragMarkers) { + print "Not in vocab: $w\n"; + # $w is a potential OOV token + # Remove fragMarkers to see if $w becomes in-vocabulary + while ($w =~ m:^(\S+[$fragMarkers]|[$fragMarkers]\S+)$:) { + if ($w =~ m:^(\S+)[$fragMarkers]$:) { + $w = $1; + last if ($inVocab{$w}); + } elsif ($w =~m:^[$fragMarkers](\S+)$:) { + $w = $1; + last if ($inVocab{$w}); + } else { + die "Logically, the program should never reach here!"; + } + } + } + # If still an OOV, replace $w by $OOV_symbol + if ($vocabFile && (! $inVocab{$w})) { + # $w is definitely an OOV token + if (exists $oovCount{$w}) { + $oovCount{$w}++; + } else { + $oovCount{$w} = 1; + } + $w = $OOV_symbol; + $numOOV++; + } + $text .= " $w"; + $numWords++; + } + } + $text =~ s:^\s+::; # Remove leading white space, if any + # Transcriptions must contain real words to be useful in training + if ($get_whole_transcripts ne "true") { + $text =~ s:^(($OOV_symbol|$vocalNoise|$nVoclNoise|$silence)[ ]{0,1})+$::; + } + } + } + close(TRANSCRIPTION); + if ($numUtterancesThisFile>0) { + $lastTimeMarkInFile{$fileID} = $prevTimeMark; + $numUtterancesInFile{$fileID} = $numUtterancesThisFile; + $numUtterancesThisFile = 0; + } + $numFiles++; + } + print STDERR ("$0: Recorded $numUtterances non-empty utterances from $numFiles files\n"); + } else { + print STDERR ("$0 ERROR: No .txt files found $TranscriptionDir\n"); + exit(1); + } +} else { + print STDERR ("$0 ERROR: No directory named $TranscriptionDir\n"); + exit(1); +} + +######################################################################## +# Then verify existence of corresponding audio files and their durations +######################################################################## + +$AudioDir = "$inDir/audio"; +if (-d $AudioDir) { + @AudioFiles = `ls ${AudioDir}/*.sph`; + if ($#AudioFiles >= 0) { + printf STDERR ("$0: Found %d .sph files in $AudioDir\n", ($#AudioFiles +1)); + $numFiles = 0; + while ($filename = shift @AudioFiles) { + $fileID = $filename; + $fileID =~ s:.+/::; # remove path prefix + $fileID =~ s:\.sph\s*::; # remove file extension + if (exists $numUtterancesInFile{$fileID}) { + # Some portion of this file has training transcriptions + @Info = `head $filename`; + $SampleCount = -1; + $SampleRate = 8000; #default + while ($#Info>=0) { + $line = shift @Info; + $SampleCount = $1 if ($line =~ m:sample_count -i (\d+):); + $SampleRate = $1 if ($line =~ m:sample_rate -i (\d+):); + } + if ($SampleCount<0) { + # Unable to extract a valid duration from the sphere header + print STDERR ("Unable to extract duration: skipping file $filename"); + } else { + $waveformName{$fileID} = $filename; chomp $waveformName{$fileID}; + $duration{$fileID} = $SampleCount/$SampleRate; + $numFiles++; + } + } else { + # Could be due to text filtering resulting in an empty transcription + # Output information to STDOUT to enable > /dev/null + print STDOUT ("$0: No transcriptions for audio file ${fileID}.sph\n"); + } + } + print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); + } else { + print STDERR ("$0 NOTICE: No .sph files in $AudioDir\n"); + } + + @AudioFiles = `ls ${AudioDir}/*.wav`; + if ($#AudioFiles >= 0) { + $soxi=`which soxi` or die "$0: Could not find soxi binary -- do you have sox installed?\n"; + chomp $soxi; + printf STDERR ("$0: Found %d .wav files in $AudioDir\n", ($#AudioFiles +1)); + $numFiles = 0; + while ($filename = shift @AudioFiles) { + $fileID = $filename; + $fileID =~ s:.+/::; # remove path prefix + $fileID =~ s:\.wav\s*::; # remove file extension + if (exists $numUtterancesInFile{$fileID}) { + # Some portion of this file has training transcriptions + $duration = `$soxi -D $filename`; + if ($duration <=0) { + # Unable to extract a valid duration from the sphere header + print STDERR ("Unable to extract duration: skipping file $filename"); + } else { + if (exists $waveformName{$fileID} ) { + print STDERR ("$0 ERROR: duplicate fileID \"$fileID\" for files \"$filename\" and \"" . $waveformName{$fileID} ."\"\n"); + exit(1); + } + $waveformName{$fileID} = $filename; chomp $waveformName{$fileID}; + $duration{$fileID} = $duration; + $numFiles++; + } + } else { + # Could be due to text filtering resulting in an empty transcription + # Output information to STDOUT to enable > /dev/null + print STDOUT ("$0: No transcriptions for audio file ${fileID}.sph\n"); + } + } + print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); + } else { + print STDERR ("$0 NOTICE: No .wav files in $AudioDir\n"); + } + + if ( $#waveformName == 0 ) { + print STDERR ("$0 ERROR: No audio files found!"); + } +} else { + print STDERR ("$0 ERROR: No directory named $AudioDir\n"); + exit(1); +} + +######################################################################## +# Now all the needed information is available. Write out the 4 files. +######################################################################## + +unless (-d $outDir) { + print STDERR ("$0: Creating output directory $outDir\n"); + die "Failed to create output directory" if (`mkdir -p $outDir`); # i.e. if the exit status is not zero. +} +print STDERR ("$0: Writing 5 output files to $outDir\n"); + +$textFileName = "$outDir/text"; +open (TEXT, "> $textFileName") || die "$0 ERROR: Unable to write text file $textFileName\n"; + +$utt2spkFileName = "$outDir/utt2spk"; +open (UTT2SPK, "> $utt2spkFileName") || die "$0 ERROR: Unable to write utt2spk file $utt2spkFileName\n"; + +$segmentsFileName = "$outDir/segments"; +open (SEGMENTS, "> $segmentsFileName") || die "$0 ERROR: Unable to write segments file $segmentsFileName\n"; + +$scpFileName = "$outDir/wav.scp"; +open (SCP, "| sort -u > $scpFileName") || die "$0 ERROR: Unable to write wav.scp file $scpFileName\n"; +my $binary=`which sph2pipe` or die "Could not find the sph2pipe command"; chomp $binary; +$SPH2PIPE ="$binary -f wav -p -c 1"; +my $SOXBINARY =`which sox` or die "Could not find the sph2pipe command"; chomp $SOXBINARY; +$SOXFLAGS ="-r 8000 -c 1 -b 16 -t wav - downsample"; + +$spk2uttFileName = "$outDir/spk2utt"; +open (SPK2UTT, "> $spk2uttFileName") || die "$0 ERROR: Unable to write spk2utt file $spk2uttFileName\n"; + +$oovFileName = "$outDir/oovCounts"; +open (OOV, "| sort -nrk2 > $oovFileName") || die "$0 ERROR: Unable to write oov file $oovFileName\n"; + +$numUtterances = $numSpeakers = $numWaveforms = 0; +$totalSpeech = $totalSpeechSq = 0.0; +foreach $utteranceID (sort keys %transcription) { + $fileID = $baseFileID{$utteranceID}; + if (exists $waveformName{$fileID}) { + # There are matching transcriptions and audio + $numUtterances++; + $totalSpeech += ($endTime{$utteranceID} - $startTime{$utteranceID}); + $totalSpeechSq += (($endTime{$utteranceID} - $startTime{$utteranceID}) + *($endTime{$utteranceID} - $startTime{$utteranceID})); + print TEXT ("$utteranceID $transcription{$utteranceID}\n"); + print UTT2SPK ("$utteranceID $speakerID{$utteranceID}\n"); + print SEGMENTS ("$utteranceID $fileID $startTime{$utteranceID} $endTime{$utteranceID}\n"); + if (exists $uttList{$speakerID{$utteranceID}}) { + $uttList{$speakerID{$utteranceID}} .= " $utteranceID"; + } else { + $numSpeakers++; + $uttList{$speakerID{$utteranceID}} = "$utteranceID"; + } + next if (exists $scpEntry{$fileID}); + $numWaveforms++; + if ($waveformName{$fileID} =~ /.*\.sph/ ) { + $scpEntry{$fileID} = "$SPH2PIPE $waveformName{$fileID} |"; + } else { + $scpEntry{$fileID} = "$SOXBINARY $waveformName{$fileID} $SOXFLAGS |"; + } + } else { + print STDERR ("$0 WARNING: No audio file for transcription $utteranceID\n"); + } +} +foreach $fileID (sort keys %scpEntry) { + print SCP ("$fileID $scpEntry{$fileID}\n"); +} +foreach $speakerID (sort keys %uttList) { + print SPK2UTT ("$speakerID $uttList{$speakerID}\n"); +} +foreach $w (sort keys %oovCount) { + print OOV ("$w\t$oovCount{$w}\n"); +} +exit(1) unless (close(TEXT) && close(UTT2SPK) && close(SEGMENTS) && close(SCP) && close(SPK2UTT) && close(OOV)); + +print STDERR ("$0: Summary\n"); +print STDERR ("\tWrote $numUtterances lines each to text, utt2spk and segments\n"); +print STDERR ("\tWrote $numWaveforms lines to wav.scp\n"); +print STDERR ("\tWrote $numSpeakers lines to spk2utt\n"); +print STDERR ("\tHmmm ... $numSpeakers distinct speakers in this corpus? Unusual!\n") + if (($numSpeakers<($numUtterances/500.0)) || ($numSpeakers>($numUtterances/2.0))); +print STDERR ("\tTotal # words = $numWords (including $numOOV OOVs) + $numSilence $silence\n") + if ($vocabFile); +printf STDERR ("\tAmount of speech = %.2f hours (including some due to $silence)\n", $totalSpeech/3600.0); +if ($numUtterances>0) { + printf STDERR ("\tAverage utterance length = %.2f sec +/- %.2f sec, and %.2f words\n", + $totalSpeech /= $numUtterances, + sqrt(($totalSpeechSq/$numUtterances)-($totalSpeech*$totalSpeech)), + $numWords/$numUtterances); +} + +exit(0); + +######################################################################## +# Done! +######################################################################## diff --git a/egs/babel/s5d/local/prepare_extended_lexicon.sh b/egs/babel/s5d/local/prepare_extended_lexicon.sh new file mode 100644 index 00000000000..3cc5ca6c21f --- /dev/null +++ b/egs/babel/s5d/local/prepare_extended_lexicon.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +unk_fraction_boost=1.0 +num_sent_gen=12000000 +num_prons=1000000 +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +# Extend the original lexicon. +# Will creates the files data/local/extend/{lexiconp.txt,oov2prob}. +local/extend_lexicon.sh --cmd "$train_cmd" --cleanup false \ + --num-sent-gen $num_sent_gen --num-prons $num_prons \ + data/local/lexicon.txt data/local/lang_ext data/dev2h/text + + +extend_lexicon_param=() +[ -f data/local/extend/original_oov_rates ] || exit 1; +unk_fraction=`cat data/local/extend/original_oov_rates |\ + grep "token" | awk -v x=$unk_fraction_boost '{print $NF/100.0*x}'` +extend_lexicon_param=(--cleanup false --unk-fraction $unk_fraction \ + --oov-prob-file data/local/lang_ext/oov2prob) + +cp -r data/lang data/lang_ext +local/arpa2G.sh ${extend_lexicon_param[@]} \ + data/srilm/lm.gz data/lang_ext data/lang_ext + diff --git a/egs/babel/s5d/local/prepare_lexicon.pl b/egs/babel/s5d/local/prepare_lexicon.pl new file mode 100755 index 00000000000..ff128f07637 --- /dev/null +++ b/egs/babel/s5d/local/prepare_lexicon.pl @@ -0,0 +1,404 @@ +#!/usr/bin/env perl +use Getopt::Long; +use Data::Dumper; + +############################################################################### +# +# Convert a Babel-formatted dictionary to work with Kaldi, and optionally +# add non-speech "words" that appear in the transcription. e.g. +# +# Convert dictionary from entries of the form +# +# WORD Romanization pronunciation1 pronunciation2 ... +# +# where each pronunciation has syllable boundaries [.#] and tags _X, " or % +# +# Phone1 Phone2 _TAG . Phone1 Phone2 Phone3 _TAG +# +# and so on, e.g. +# +# 㓤 gat1 g 6 t _1 h O: t _3 k i: t _1 +# 兄妹 hing1mui2 h i: N _1 . m u:j _2 h i: N _1 . m u:j _6 +# +# to entries of the form +# +# 㓤 g_1 6_1 t_1 +# 㓤 h_3 O:_3 t_3 +# 㓤 k_1 i:_1 t_1 +# 兄妹 h_1 i:_1 N_1 m_2 u:j_2 +# 兄妹 h_1 i:_1 N_1 m_6 u:j_6 +# +# +# Write only one pronunciation per line +# Transfer any tags, prefixed by underscores, to phones in the syllable +# Remove the syllable boundary markers, given by periods or pound signs +# +# NOTE: The Romainzation is present only for some languages. See -r option. +# +# This script will create 5 new files +# +# - lexicon.txt: words from the original lexicon + some non-speech "words" +# + $OOV_symbol = ""; # Default OOV symbol: pronunciation + $vocalNoise = ""; # Vocal noise symvol: pronunciation + $nVoclNoise = ""; # Nonvocal noise: pronunciation + $silence = ""; # Silence > 1 second: pronunciation $sil + $icu_transform = ""; + $phonemap=""; +# +# - nonsilence_phones.txt: tagged phones from the new lexicon +# +# - optional_silence.txt: phones used to model silence in acoustic training +# + $sil = "SIL"; # Also the pronunciation of the word token $silence +# +# - silence_phones.txt: $sil and special phones for non-speech "words" +# +# - extra_questions.txt: sets of phones of the form *_TAG, one set per line +# +# The last file provides sets of phones that share a tag, so that questions can +# effectively be asked about the tag of a neighboring phone during clustering. +# +############################################################################### + +GetOptions("add=s" => \$nsWordsFile, + "oov=s" => \$OOV_symbol, + "romanized!" => \$romanized, + "sil=s" => \$sil, + "icu-transform=s" => \$icu_transform, + "phonemap=s" => \$phonemap + ); + +if ($#ARGV == 1) { + $inDict = $ARGV[0]; + $outDir = $ARGV[1]; + print STDERR ("$0: $inDict $outDir\n"); + print STDERR ("\tNon-speech words will be added from $nsWordsFile\n") if ($nsWordsFile); + print STDERR ("\tUnknown words will be represented by \"$OOV_symbol\"\n") unless ($OOV_symbol eq ""); + print STDERR ("\tRomanized forms of words expected in the dictionary\n") if ($romanized); + print STDERR ("\tThe optional silence phone will be \"$OOV_symbol\"\n") unless ($sil eq "SIL"); + print STDERR ("\tThe ICU transform for case-conversion will be: \"$icu_transform\"\n") if ($icu_transform); +} else { + print STDERR ("Usage: $0 [--options] BabelDictionary OutputDir\n"); + print STDERR ("\t--add Add these nonspeech words to lexicon\n"); + print STDERR ("\t--oov Use this symbol for OOV words (default )\n"); + print STDERR ("\t--romanized Dictionary contains (omissible) romanized word-forms\n"); + print STDERR ("\t--phonemap During reading the dictionary, perform the specified \n"); + print STDERR ("\t phoneme mapping. The format is: p1=p1' p2' p3';p2=p4'\n"); + print STDERR ("\t where p1 and p2 are existing phonemes and p1'..p4' are\n"); + print STDERR ("\t either new or existing phonemes\n"); + print STDERR ("\t--icu-transform ICU transform to be used during the ICU transliteration\n"); + exit(1); +} + +unless (-d $outDir) { + print STDERR ("$0: Creating output directory $outDir\n"); + die "Unable to create output directory $outDir" + if system("mkdir -p $outDir"); # mkdir returned with status != 0 +} +$outLex = "$outDir/lexicon.txt"; +$nspFile = "$outDir/nonsilence_phones.txt"; +$spFile = "$outDir/silence_phones.txt"; +$osFile = "$outDir/optional_silence.txt"; +$exqFile = "$outDir/extra_questions.txt"; + + +#The phonemap is in the form of "ph1=a b c;ph2=a f g;...." +%phonemap_hash; +if ($phonemap) { + $phonemap=join(" ", split(/\s+/, $phonemap)); + print $phonemap . "\n"; + @phone_map_instances=split(/;/, $phonemap); + foreach $instance (@phone_map_instances) { + ($phoneme, $tgt) = split(/=/, $instance); + $phoneme =~ s/^\s+|\s+$//g; + $tgt =~ s/^\s+|\s+$//g; + #print "$phoneme=>$tgt\n"; + @tgtseq=split(/\s+/,$tgt); + $phonemap_hash{$phoneme} = []; + push @{$phonemap_hash{$phoneme}}, @tgtseq; + } +} + +#print Dumper(\%phonemap_hash); + +############################################################################### +# Read input lexicon, write output lexicon, and save the set of phones & tags. +############################################################################### + + +open (INLEX, $inDict) + || die "Unable to open input dictionary $inDict"; + +open (OUTLEX, "| sort -u > $outLex") + || die "Unable to open output dictionary $outLex"; + +$numWords = $numProns = 0; +while ($line=) { + chomp; + ############################################### + # Romainzed forms necessitate \t\S+ below, else + # if ($line =~ m:^([^\t]+)(\t[^\t]+)+$:) { + ############################################### + if ( ($romanized && ($line =~ m:^([^\t]+)\t\S+((\t[^\t]+)+)$:)) || + ((!$romanized) && ($line =~ m:^([^\t]+)((\t[^\t]+)+)$:)) ) { + $word = $1; + + if ( $icu_transform ) { + $xform_word=`echo \"$word\" | uconv -f utf8 -t utf8 -x \"$icu_transform\"`; + chop $xform_word; + #print $xform_word; + #$xform_word="[$word]$xform_word"; + } else { + $xform_word=$word; + } + $prons = $2; + $prons =~ s:^\s+::; # Remove leading white-space + $prons =~ s:\s+$::; # Remove trailing white-space + @pron = split("\t", $prons); + for ($p=0; $p<=$#pron; ++$p) { + $new_pron = ""; + while ($pron[$p] =~ s:^([^\.\#]+)[\.\#]{0,1}::) { push (@syllables, $1); } + while ($syllable = shift @syllables) { + $syllable =~ s:^\s+::; + $syllable =~ s:\s+$::; + $syllable =~ s:\s+: :g; + @original_phones = split(" ", $syllable); + @substituted_original_phones=(); + + foreach $phone (@original_phones) { + if (defined $phonemap_hash{$phone} ) { + #print "Sub: $phone => " . join (' ', @{$phonemap_hash{$phone}}) . "\n"; + push @substituted_original_phones, @{$phonemap_hash{$phone}}; + } else { + push @substituted_original_phones, $phone; + } + } + #print join(' ', @original_phones) . "=>" . join(' ',@substituted_original_phones) . "\n"; + @original_phones = @substituted_original_phones; + + $sylTag = ""; + $new_phones = ""; + while ($phone = shift @original_phones) { + if ($phone =~ m:^\_\S+:) { + # It is a tag; save it for later + $is_original_tag{$phone} = 1; + $sylTag .= $phone; + } elsif ($phone =~ m:^[\"\%]$:) { + # It is a stress marker; save it like a tag + $phone = "_$phone"; + $is_original_tag{$phone} = 1; + $sylTag .= $phone; + } elsif ( $phone =~ m:_:) { + # It is a phone containing "_" (underscore) + $new_phone=$phone; + $new_phone=~ s/\_//g; + if (( $is_original_phone{$phone} ) and not defined( $substituted_phones{phone}) ) { + die "ERROR, the $new_phone and $phone are both existing phones, so we cannot do automatic map!"; + } else { + print STDERR "WARNING, phone $phone was replaced with $new_phone\n" unless $substituted_phones{$phone}; + } + $is_original_phone{$new_phone} = "$new_phone"; + $substituted_phones{$phone} = $new_phone; + $new_phones .= " $new_phone"; + } else { + # It is a phone + if ( $substituted_phones{phone} ) { + die "ERROR, the $new_phone and $phone are both existing phones, so we cannot do automatic map!"; + } + $is_original_phone{$phone} = "$phone"; + $new_phones .= " $phone"; + } + } + $new_phones =~ s:(\S+):$1${sylTag}:g; + $new_pron .= $new_phones . "\t"; # the tab added by Dan, to keep track of + # syllable boundaries. + $is_compound_tag{$sylTag} = 1; + while ($new_phones =~ s:^\s*(\S+)::) { $is_new_phone{$1} = 1; } + } + $new_pron =~ s:^\s+::; + print OUTLEX ("$xform_word\t$new_pron\n"); + $numProns++; + } + @pron = (); + $numWords++; + } else { + print STDERR ("$0 WARNING: Skipping unparsable line $. in $inDict\n"); + } +} +close(INLEX) + && print STDERR ("$0: Read $numWords entries from $inDict\n"); + +############################################################################### +# Read a list of non-speech words if given, and write their "pronunciations" +# - Such lexicon entries are typically created for , etc. +# - If provided explicitly, they each get their own private phone models +# - Otherwise, they are mapped to an OOV symbol with a shared phone +# - All such phones are grouped with the $sil phone for clustering purposes, +# which means that they remain context-independent and form a question set. +############################################################################### + +if ($nsWordsFile) { + open (NSW, $nsWordsFile) + || die "Unable to open non-speech words file $nsWordsFile"; + $numNSWords = 0; + while ($line=) { + next unless ($line =~ m:^\s*([^\s]+)\s*:); # Take the first word if present + print OUTLEX ("$1\t$1\n"); # The word itself is its pronunciation + $is_silence_phone{$1} = 1; # Add it to the list of silence phones + $numProns++; + $numNSWords++; + } + close(NSW) + && print STDERR ("$0: Adding $numNSWords non-speech words from $nsWordsFile to $outLex\n"); +} + +# Add the OOV symbol to the lexicon +print OUTLEX ("$OOV_symbol\t\n"); # The symbol is assumed not to be +$is_silence_phone{""} = 1; # a phone in the original lexicon :-) +$numProns++; + +# Add the vocal noise symbol to the lexicon +print OUTLEX ("$vocalNoise\t\n"); # The symbol is assumed not to be +$is_silence_phone{""} = 1; # a phone in the original lexicon :-) +$numProns++; + +# Add the nonvocal noise symbol to the lexicon +print OUTLEX ("$nVoclNoise\t\n"); # The symbol is assumed not to be +$is_silence_phone{""} = 1; # a phone in the original lexicon :-) +$numProns++; + +# Finally, add the silence symbol to the lexicon +print OUTLEX ("$silence\t$sil\n"); +$is_silence_phone{$sil} = 1; +$numProns++; + +close(OUTLEX) + && print STDERR ("$0: Wrote $numProns pronunciations to $outLex\n"); + +############################################################################### +# - nonsilence_phones.txt: tagged phones from the new lexicon, 1 phone/line +############################################################################### + +foreach $phone (sort keys %is_new_phone) { + $tagLess_phone = $phone; + $tagLess_phone =~ s:^([^_]+).*:$1:; # underscore marks tag beginnings + if ($is_original_phone{$tagLess_phone}) { + # save $phone for writing later to the NSP file + $is_original_phone{$tagLess_phone} .= " $phone"; + } else { + print STDERR ("$0 WARNING: Skipping unexpected tagged phone $phone.\n"); + print STDERR ("\tCheck if original lexicon has phones containing \"\_\"\n"); + die "Cannot continue"; + } +} + +open (NSP, "| sort > $nspFile") + || die "Unable to write nonsilence phones to $nspFile"; +$p = 0; +foreach $phone (sort keys %is_original_phone) { + $tagged_phones = $is_original_phone{$phone}; + $tagged_phones =~ s:^\S+\s*::; # Remove the original (untagged) phone + unless ($phone eq "") { + print NSP ("$tagged_phones\n"); # Write out the remaining (tagged) phones + $p++; + } +} + +close(NSP) + && print STDERR ("$0: Wrote $p (sets of) nonsilence phones to $nspFile\n"); + +if ( $p > (0.5*$numWords) ) { + print STDERR ("$0 WARNING: Original dictionary had $numWords words, and\n"); + print STDERR ("\t\t$p nonspeech phones were found! This is highly unusual.\n"); + print STDERR ("\t\tCheck if the dictionary contains other tab-separated values\n"); + print STDERR ("\t\tthat are being mistaken for pronunciations by this script.\n"); + print STDERR ("$0 ADVICE: Use --romanized for omitting romanized word forms\n") unless ($romanized); +} + +############################################################################### +# - silence_phones.txt: $sil and special phones for non-speech "words" +############################################################################### + +open (SPF, "| sort > $spFile") + || die "Unable to write silence phones to $spFile"; +$p = 0; +foreach $phone (keys %is_silence_phone) { + print SPF ("$phone\n"); + $p++; +} +close(SPF) + && print STDERR ("$0: Wrote $p silence phones to $spFile\n"); + +############################################################################### +# - optional_silence.txt: the reserved (?) phone +############################################################################### + +$is_optional_silence{$sil} = 1; +open (OSF, "| sort > $osFile") + || die "Unable to write optional silence phones to $osFile"; +$p = 0; +foreach $phone (keys %is_optional_silence) { + print OSF ("$phone\n"); + $p++; +} +close(OSF) + && print STDERR ("$0: Wrote $p optional silence phones to $osFile\n"); + +############################################################################### +# - extra_questions.txt: sets of phones of the form *_TAG, one set per line +############################################################################### + +open (EXQ, "| sort > $exqFile") + || die "Unable to write the extra questions file $exqFile"; + +# First make sets of all tagged phones that share the (single) original tags + +$numExtraQs = 0; +foreach $tag (sort keys %is_original_tag) { + $question = ""; + foreach $phone (sort keys %is_new_phone) { + $question .= " $phone" if ($phone =~ m:$tag:); + } + $question =~ s:^\s+::; + print EXQ ("$question\n") unless ($question eq ""); + $numExtraQs++; +} +print STDERR ("$0: Found $numExtraQs unique individual tags in $inDict\n"); + +# It is possible to go overboard by creating questions with all 2^K possible +# subsets of the original tags. E.g. ($phone=~m:$tag1:)||($phone=~m:$tag2:) +# Do this by hand if it is linguistically meaningful for some language +# It is not worth doing this generically for all languages and tag sets. + +# If each syllable has only one tag, then questions with conjunctions of tags +# such as ($phone=~m:$tag1:)&&($phone=~m:$tag2:) will yield empty questions +# However, if syllables carry multiple tags, e.g. tone and stress, then one +# could similarly go overboard with conjunctions of overlapping tags. +# This too is not worth doing generically for all languages and tag sets. + +# Instead, just make sets of all tagged phones with the same new (compound) tag + +foreach $tag (sort keys %is_compound_tag) { + next if ($is_original_tag{$tag}); + $question = ""; + foreach $phone (sort keys %is_new_phone) { + $question .= " $phone" if ($phone =~ m:$tag:); + } + $question =~ s:^\s+::; + print EXQ ("$question\n") unless ($question eq ""); + $numExtraQs++; +} + +# Finally, add the silence phones as a set for use as a clustering question + +$question = ""; +foreach $phone (sort keys %is_silence_phone) { + $question .= " $phone"; +} +$question =~ s:^\s+::; +print EXQ ("$question\n") unless ($question eq ""); +$numExtraQs++; + +close(EXQ) + && print STDERR ("$0: Wrote $numExtraQs extra questions (incl compound tags and sil) to $exqFile\n"); diff --git a/egs/babel/s5d/local/prepare_stm.pl b/egs/babel/s5d/local/prepare_stm.pl new file mode 100755 index 00000000000..b4daec585e3 --- /dev/null +++ b/egs/babel/s5d/local/prepare_stm.pl @@ -0,0 +1,345 @@ +#!/usr/bin/env perl +use Getopt::Long; +use Encode; + +################################################################################ +# +# Script to prepare a NIST .stm file for scoring ASR output. Based on the files +# that are naturally created for Kaldi acoustic training: +# +# - data/segments: contains segmentID, recordingID, start-time & end-time +# +# - data/wav.scp: contains recordingID & waveform-name (or sph2pipe command) +# +# - data/utt2spk: contains segmentID % speakerID +# +# - data/text: contains segment ID and transcription +# +# The .stm file has lines of the form +# +# waveform-name channel speakerID start-time end-time [] transcription +# +# Clearly, most of the information needed for creating the STM file is present +# in the four Kaldi files mentioned above, except channel --- its value will be +# obtained from the sph2pipe command if present, or will default to "1" --- and +# from a separate demographics.tsv file. (A feature to add later?) +# +# Note: Some text filtering is done by this script, such as removing non-speech +# tokens from the transcription, e.g. , , etc. + + $fragMarkers = ""; # If given by the user, they are stripped from words + +# But two types of tokens are retained as is, if present. +# + $Hesitation = ""; # which captures hesitations, filled pauses, etc. + $OOV_symbol = ""; # which our system outputs occasionally. +# +# Note: The .stm file must be sorted by filename and channel in ASCII order and +# by the start=time in numerical order. NIST recommends the unix command +# "sort +0 -1 +1 -2 +3nb -4" +# +# This script will also produce an auxilliary file named reco2file_and_channel +# which is used by Kaldi scripts to produce output in .ctm format for scoring. +# So any channel ID assigned here will be consistent between ref and output. +# +# If the training text is Viterbi-aligned to the speech to obtain time marks, +# it should be straightforward to modify this script to produce a .ctm file: +# +# waveform-file channel start-time duration word +# +# which lists the transcriptions with word-level time marks. +# +# Note: A .ctm file must be sorted via "sort +0 -1 +1 -2 +2nb -3" +# +################################################################################ +GetOptions("fragmentMarkers=s" => \$fragMarkers, "hesitationToken=s" => \$Hesitation,"oovToken=s" => \$OOV_symbol); + +if ($#ARGV == 0) { + $inDir = $ARGV[0]; + print STDERR ("$0: Making stm file from information in $inDir\n"); + print STDERR ("\tRemoving [$fragMarkers]+ from ends of tokens\n") if ($fragMarkers); + print STDERR ("\tPreserving hesitation tokens $Hesitation\n") unless ($Hesitation eq ""); + print STDERR ("\tUsing $OOV_symbol as the OOV symbol\n") unless ($OOV_symbol eq ""); +} else { + print STDERR ("Usage: $0 [--options] DataDir\n"); + print STDERR ("\t--fragmentMarkers Strip these from ends of each token (default: none)\n"); + print STDERR ("\t--hesitationToken Preserve when deleting non-speech tokens (default: )\n"); + print STDERR ("\t--oovToken Use to replace hard-coded OOVs (default: )\n"); + exit(1); +} + +$segmentsFile = "$inDir/segments"; +$scpFile = "$inDir/wav.scp"; +$utt2spkFile = "$inDir/utt2spk"; +$textFile = "$inDir/text"; +$stmFile = "$inDir/stm"; +$charStmFile = "$inDir/char.stm"; +$reco2ctmFile = "$inDir/reco2file_and_channel"; + +################################################################################ +# Read the segmentIDs, file-IDs, start- and end-times from the segments file +################################################################################ + +my $num_failed_parses=0; +my $num_failed_parses_max=10; + +die "Current version of script requires a segments file" unless (-e $segmentsFile); + +open(SEGMENTS, $segmentsFile) + || die "Unable to read segments file $segmentsFile"; +$numSegments = 0; +while ($line=) { + @tokens = split(/\s+/, $line); + unless ($#tokens == 3) { + $num_failed_parses+=1; + print STDERR "$0: Couldn't parse line $. in $segmentsFile\n" + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses le $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached. Not warning anymore\n" + if ($num_failed_parses eq $num_failed_parses_max); + next; + } + $segmentID = shift @tokens; + if (exists $fileID{$segmentID}) { + print STDERR ("$0: Skipping duplicate segment ID $segmentID in $segmentsFile\n"); + next; + } + $fileID{$segmentID} = shift @tokens; + $startTime{$segmentID} = shift @tokens; + $endTime{$segmentID} = shift @tokens; + ++$numSegments; +} +close(SEGMENTS); +print STDERR ("$0: Read info about $numSegments segment IDs from $segmentsFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Read the waveform filenames from the wav.scp file. (Parse sph2pipe command.) +################################################################################ + +open(SCP, $scpFile) + || die "Unable to open scp file $scpFile\n"; +$numRecordings = 0; +$num_failed_parses=0; +while ($line=) { + chomp; + if ($line =~ m:^\s*(\S+)\s+(.+)$:) { + $recordingID = $1; + $waveformFile = $2; + } else { + $num_failed_parses+=1; + print STDERR ("$0: Couldn't parse line $. in $scpFile\n") + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses le $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached. Not warning anymore\n" + if ($num_failed_parses eq $num_failed_parses_max); + next; + } + if (exists $waveform{$recordingID}) { + print STDERR ("$0: Skipping duplicate recording ID $recordingID in $scpFile\n"); + # BUG ALERT: This check may need to be turned off for multi-channel recordings, + # since the same recording may appear with with different channels? + next; + } + if ($waveformFile =~ m:^\S+$:) { + # This is a single filename, no shp2pipe or gunzip for reading waveforms + $waveform{$recordingID} = $waveformFile; + } elsif (($waveformFile =~ m:(sph2pipe|gunzip|gzip|cat|zcat)\s+:) && + ($waveformFile =~ m:\s+(\S+)\s*\|$:)) { + # HACK ALERT: the filename is *assumed* to be at the END of the command + $waveform{$recordingID} = $1; + $channel{$recordingID} = $1 if ($waveformFile =~ m:sph2pipe\s+.*\-c\s+(\S+)\s+.+:); + } elsif (($waveformFile =~ m:(sox)\s+:) && + ($waveformFile =~ m:\s+(\S+)\s*\|$:)) { + # HACK ALERT: the first element that does ends with '.wav' is assumed to + # be the original filename + @elems=split(/\s+/, $waveformFile); + foreach $elem (@elems) { + if ($elem =~ m/.*\.wav/) { + $filename=$elem; + last; + } + } + die ("$0: Couldn't parse waveform filename on line $. in $scpFile\n\t$line\n") if not defined $filename; + die ("$0: Filename $filename does not exist: in $scpFile\n\t$line\n") unless (-e $filename); + + $waveform{$recordingID} = $filename; + #$channel{$recordingID} = $filename; + } else { + print STDERR ("$0: Couldn't parse waveform filename on line $. in $scpFile\n\t$line\n"); + next; + } + $waveform{$recordingID} =~ s:.+/::; # remove path prefix + $waveform{$recordingID} =~ s:\.(sph|wav)\s*$::; # remove file extension + $channel{$recordingID} = 1 # Default + unless (exists $channel{$recordingID}); + ++$numRecordings; +} +close(SCP); +print STDERR ("$0: Read filenames for $numRecordings recording IDs from $scpFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Read speaker information from the utt2spk file +################################################################################ + +open(UTT2SPK, $utt2spkFile) + || die "Unable to read utt2spk file $utt2spkFile"; +$numSegments = 0; +$num_failed_parses = 0; +while ($line=) { + @tokens = split(/\s+/, $line); + if (! ($#tokens == 1)) { + $num_failed_parses+=1; + print STDERR ("$0: Couldn't parse line $. in $utt2spkFile\n") + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses le $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached. Not warning anymore\n" + if ($num_failed_parses eq $num_failed_parses_max); + next; + } + $segmentID = shift @tokens; + if (exists $speakerID{$segmentID}) { + print STDERR ("$0: Skipping duplicate segment ID $segmentID in $utt2spkFile\n"); + next; + } + $speakerID{$segmentID} = shift @tokens; + ++$numSegments; +} +close(UTT2SPK); +print STDERR ("$0: Read speaker IDs for $numSegments segments from $utt2spkFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Read the transcriptions from the text file +################################################################################ + +open(TEXT, $textFile) + || die "Unable to read text file $textFile"; +$numSegments = $numWords = 0; +$num_failed_parses = 0; +while ($line=) { + chomp; + if ($line =~ m:^(\S+)\s+(.+)$:) { + $segmentID = $1; + $text = $2; + } else { + $num_failed_parses+=1; + print STDERR ("$0: Couldn't parse line $. in $textFile\n") + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses <= $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached ($num_failed_parses/$num_failed_parses_max). Not warning anymore\n" + if ($num_failed_parses == $num_failed_parses_max); + next; + } + if (exists $transcription{$segmentID}) { + print STDERR ("$0: Skipping duplicate segment ID $segmentID in $segmentsFile\n"); + next; + } + $transcription{$segmentID} = ""; + @tokens = split(/\s+/, $text); + # This is where one could filter the transcription as necessary. + # E.g. remove noise tokens, mark non-scoring segments, etc. + # HACK ALERT: Current version does this is an ad hoc manner! + while ($w = shift(@tokens)) { + # Substitute OOV tokens specific to the Babel data + $w = $OOV_symbol if ($w eq "(())"); + # Remove fragMarkers, if provided, from either end of the word + $w =~ s:(^[$fragMarkers]|[$fragMarkers]$)::g if ($fragMarkers); + # Omit non-speech symbols such as , , etc. + $w =~ s:^<[^>]+>$:: unless (($w eq $OOV_symbol) || ($w eq $Hesitation)); + next if ($w eq ""); + $transcription{$segmentID} .= " $w"; + $numWords++; + } + $transcription{$segmentID} =~ s:^\s+::; # Remove leading white space + $transcription{$segmentID} =~ s:\s+$::; # Remove training white space + $transcription{$segmentID} =~ s:\s+: :g; # Normalize remaining white space + # Transcriptions containing no words, or only OOVs and hesitations are not scored + $transcription{$segmentID} = "IGNORE_TIME_SEGMENT_IN_SCORING" + if (($transcription{$segmentID} eq "") || + ($transcription{$segmentID} =~ m:^(($OOV_symbol|$Hesitation)\s*)+$:)); + ++$numSegments; +} +close(TEXT); +print STDERR ("$0: Read transcriptions for $numSegments segments ($numWords words) from $textFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Write the transcriptions in stm format to a file named stm +################################################################################ + +print STDERR ("$0: Overwriting existing stm file $stmFile\n") + if (-s $stmFile); +open(STM, "| sort +0 -1 +1 -2 +3nb -4 > $stmFile") + || die "Unable to write to stm file $stmFile"; +$numSegments = 0; +foreach $segmentID (sort keys %fileID) { + if (exists $waveform{$fileID{$segmentID}}) { + printf STM ("%s %s %s %.2f %.2f", + $waveform{$fileID{$segmentID}}, + $channel{$fileID{$segmentID}}, + $speakerID{$segmentID}, + $startTime{$segmentID}, + $endTime{$segmentID}); + printf STM (" <%s>", $attributes{$segmentID}) if (exists $attributes{$segmentID}); + printf STM (" %s\n", $transcription{$segmentID}); + ++$numSegments; + } else { + print STDERR ("$0: No waveform found for segment $segmentID, file $fileID{$segmentID}\n"); + } +} +close(STM); +print STDERR ("$0: Wrote reference transcriptions for $numSegments segments to $stmFile\n"); + +################################################################################ +# Write a character-separated stm file as well, for CER computation +################################################################################ + +print STDERR ("$0: Overwriting existing stm file $charStmFile\n") + if (-s $charStmFile); +open(STM, "$stmFile") + || die "Unable to read back stm file $stmFile"; +binmode STM,":encoding(utf8)"; +open(CHARSTM, "> $charStmFile") + || die "Unable to write to char.stm file $charStmFile"; +binmode CHARSTM,":encoding(utf8)"; +while ($line=) { + @tokens = split(/\s+/, $line); + # The first 5 tokens are filename, channel, speaker, start- and end-time + for ($n=0; $n<5; $n++) { + $w = shift @tokens; + print CHARSTM ("$w "); + } + # CER is used only for some scripts, e.g. CJK. So only non-ASCII characters + # in the remaining tokens should be split into individual tokens. + $w = join (" ", @tokens); + $w =~ s:([^\x00-\x7F])(?=[^\x00-\x7F]):$1 :g; # split adjacent non-ASCII chars + print CHARSTM ("$w\n"); +} +close(CHARSTM); +close(STM); +print STDERR ("$0: Wrote char.stm file $charStmFile\n"); + +################################################################################ +# Write the reco2file_and_channel file for use by Kaldi scripts +################################################################################ + +print STDERR ("$0: Overwriting existing reco2file_and_channel file $reco2ctmFile\n") + if (-s $reco2ctmFile); +open(RECO2CTM, "| sort > $reco2ctmFile") + || die "Unable to write to reco2file_and_channel file $reco2ctmFile"; +$numRecordings = 0; +foreach $recordingID (sort keys %waveform) { + printf RECO2CTM ("%s %s %s\n", $recordingID, $waveform{$recordingID}, $channel{$recordingID}); + ++$numRecordings; +} +close(RECO2CTM); +print STDERR ("$0: Wrote file_and_channel info for $numRecordings recordings to $reco2ctmFile\n"); + +print STDERR ("$0: Done!\n"); +exit(0); diff --git a/egs/babel/s5d/local/prepare_unicode_lexicon.py b/egs/babel/s5d/local/prepare_unicode_lexicon.py new file mode 100755 index 00000000000..ec2d9e64c37 --- /dev/null +++ b/egs/babel/s5d/local/prepare_unicode_lexicon.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python + +# Copyright 2016 Johns Hopkins University (Author: Matthew Wiesner) +# Apache 2.0 + +# ======= Prepare data/local directory for babel data with unicode tags ====== +# This script creates all files in the data/local directory for babel formats, +# except for the filtered_lexicon.txt file which is created by the +# make_lexicon_subset.sh script. +# +# This script basically takes the place of the prepare_lexicon.pl script. It +# creates the following files. +# +# 1. lexicon.txt (via local/lexicon/make_unicode_lexicon.py which happens prior +# to running this script. +# 2. nonsilence_phones.txt +# 3. silence_phones.txt +# 4. optional_silence.txt +# 5. extra_questions.txt +# ============================================================================ + +from __future__ import print_function +import codecs +import sys +import os +import argparse + +SKIP = ("", "''", "<", ">", "#") + + +# Extract a sorted set of distinct unicode graphemes from the lexicon +def extract_graphemes(table): + ''' + Extract a sorted set of distinct unicode graphemes from the lexicon. + + Usage: extract_graphemes(PATH_TO_LEXICON_TABLE) + + Arguments: + table -- path to the lexicon table output by make_unicode_lexicon.py + + Output: + unicode_graphemes -- the sorted set of distinct unicode graphemes + that occurred in the lexicon. + ''' + with codecs.open(table, "r", "utf-8") as fp: + + # Get relevant header columns for extracting graphemes used in lexicon + # -------------------------------------------------------------------- + header = fp.readline() + idx = [] + for i, j in enumerate(header.strip().split('\t')): + if j.startswith("MAP"): + idx.append(i) + + # -------------------------------------------------------------------- + # Extract all unique graphemes. Place into formats ... + # 1. unicode_graphemes = [g1, g2, g3, ... , gN] + # + # 2. Grapheme dict as keys for each base (without tags) grapheme along + # with all distinct graphmes starting with the base grapheme. + # phones_dict = {p1: p1_with_tags_1, p1_with_tags_2, ... , p2: ... } + # -------------------------------------------------------------------- + unicode_graphemes = [] + graphemes_dict = {} + for line in fp: + for i in idx: + grapheme = line.strip().split('\t')[i] + if grapheme not in SKIP: + unicode_graphemes.append(grapheme) + + # Create the sorted set of distinct unicode graphemes in the lexicon + unicode_graphemes = sorted(set(unicode_graphemes)) + for g in unicode_graphemes: + base_graph = g.split("_")[0] + if(base_graph not in graphemes_dict.keys()): + graphemes_dict[base_graph] = [] + + graphemes_dict[base_graph].append(g) + + return unicode_graphemes, graphemes_dict + + +def write_nonsilence_phones(graphemes_dict, nonsilence_phones, + extraspeech=None): + with codecs.open(nonsilence_phones, "w", "utf-8") as fp: + try: + with codecs.open(extraspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + fp.write("%s\n" % line_vals[1]) + except (IOError, TypeError): + pass + + # Write each base grapheme with all tags on the same line + for base_grapheme in sorted(graphemes_dict.keys()): + line = "" + for grapheme in graphemes_dict[base_grapheme]: + line += grapheme + " " + fp.write("%s\n" % line.strip()) + + +def write_extra_questions(unicode_graphemes, graphemes_dict, tags, + extra_questions, nonspeech=None, extraspeech=None): + with codecs.open(extra_questions, "w", "utf-8") as fp: + # Write all unique "phones" but graphemes in this case, plus to a + # single line. + + # Write the extraspeech + try: + with codecs.open(extraspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + fp.write("%s " % line_vals[1]) + except (IOError, TypeError): + pass + + for g in unicode_graphemes: + fp.write("%s " % g) + fp.write("\n") + + # Write the nonspeech + try: + with codecs.open(nonspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + fp.write("%s " % line_vals[1]) + fp.write("\n") + except (IOError, TypeError): + pass + + # Write all possible phone_tag combinations that occur in the lexicon + for tag in tags: + for g in graphemes_dict.keys(): + tagged_grapheme = "_".join([g, tag]) + if(tagged_grapheme in graphemes_dict[g]): + fp.write("%s " % tagged_grapheme) + fp.write("\n") + + +def main(): + # --------------- Extract unicode_graphemes from the table -------------- + if(len(sys.argv[1:]) == 0): + print("Usage: local/prepare_unicode_lexicon.txt " + " " ) + sys.exit(1) + + parser = argparse.ArgumentParser() + parser.add_argument("table", help="Table containing all information about" + " how to map unicode graphemes to unicode descriptors") + parser.add_argument("lex_dir", help="Directory to which all files" + " should be written") + parser.add_argument("--nonspeech", help="File with map of nonspeech words", + action="store", default=None) + parser.add_argument("--extraspeech", help="File with map of extraspeech" + " words", action="store", default=None) + args = parser.parse_args() + unicode_graphemes, graphemes_dict = extract_graphemes(args.table) + + # ---------------- Prepare the directory data/local and a few files ------ + # Create the data/local directory if it does not yet exist + if not os.path.exists(args.lex_dir): + os.makedirs(args.lex_dir) + + # Write the slience_phones.txt file + with open(os.path.join(args.lex_dir, "silence_phones.txt"), "w") as fo: + with open(args.nonspeech, "r") as fi: + for line in fi: + line_vals = line.strip().split() + fo.write("%s\n" % line_vals[1]) + + # Write the optional_silence.txt file + with open(os.path.join(args.lex_dir, "optional_silence.txt"), "w") as fp: + fp.write("SIL\n") + + # --------------- Write the nonsilence_phones.txt file ------------------- + write_nonsilence_phones(graphemes_dict, + os.path.join(args.lex_dir, "nonsilence_phones.txt"), + extraspeech=args.extraspeech) + + # ------------------------- Extract tags --------------------------------- + tags = [] + for g in unicode_graphemes: + # Only consider graphemes with tags + g_tags = g.split("_") + if(len(g_tags) > 1): + tag = "_".join(g_tags[1:]) + if(tag not in tags): + tags.append(tag) + + # --------------- Write the extra questions file ------------------------- + write_extra_questions(unicode_graphemes, graphemes_dict, tags, + os.path.join(args.lex_dir, "extra_questions.txt"), + nonspeech=args.nonspeech, + extraspeech=args.extraspeech) + + +if __name__ == "__main__": + main() diff --git a/egs/babel/s5d/local/reestimate_langp.sh b/egs/babel/s5d/local/reestimate_langp.sh new file mode 100755 index 00000000000..059fba52043 --- /dev/null +++ b/egs/babel/s5d/local/reestimate_langp.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +unk="" +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +datadir=$1 +langdir=$2 +idict=$3 +amdir=$4 +odict=$5 +olocallang=$6 +olang=$7 + + +mkdir -p $odict +mkdir -p $olang +mkdir -p $olocallang +steps/get_prons.sh --cmd "$train_cmd" $datadir $langdir $amdir +utils/dict_dir_add_pronprobs.sh --max-normalize true $idict \ + $amdir/pron_counts_nowb.txt $amdir/sil_counts_nowb.txt \ + $amdir/pron_bigram_counts_nowb.txt $odict + +utils/prepare_lang.sh --phone-symbol-table $langdir/phones.txt \ + $odict "$unk" $olocallang $olang + diff --git a/egs/babel/s5d/local/resegment/evaluate_segmentation.pl b/egs/babel/s5d/local/resegment/evaluate_segmentation.pl new file mode 100755 index 00000000000..9d865cca8c9 --- /dev/null +++ b/egs/babel/s5d/local/resegment/evaluate_segmentation.pl @@ -0,0 +1,198 @@ +#!/usr/bin/env perl + +# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar +# Apache 2.0 + +################################################################################ +# +# This script was written to check the goodness of automatic segmentation tools +# It assumes input in the form of two Kaldi segments files, i.e. a file each of +# whose lines contain four space-separated values: +# +# UtteranceID FileID StartTime EndTime +# +# It computes # missed frames, # false positives and # overlapping frames. +# +################################################################################ + +if ($#ARGV == 1) { + $ReferenceSegmentation = $ARGV[0]; + $HypothesizedSegmentation = $ARGV[1]; + printf STDERR ("Comparing reference segmentation\n\t%s\nwith proposed segmentation\n\t%s\n", + $ReferenceSegmentation, + $HypothesizedSegmentation); +} else { + printf STDERR "This program compares the reference segmenation with the proposted segmentation\n"; + printf STDERR "Usage: $0 reference_segments_filename proposed_segments_filename\n"; + printf STDERR "e.g. $0 data/dev10h/segments data/dev10h.seg/segments\n"; + exit (0); +} + +################################################################################ +# First read the reference segmentation, and +# store the start- and end-times of all segments in each file. +################################################################################ + +open (SEGMENTS, "cat $ReferenceSegmentation | sort -k2,2 -k3n,3 -k4n,4 |") + || die "Unable to open $ReferenceSegmentation"; +$numLines = 0; +while ($line=) { + chomp $line; + @field = split("[ \t]+", $line); + unless ($#field == 3) { + exit (1); + printf STDERR "Skipping unparseable line in file $ReferenceSegmentation\n\t$line\n"; + next; + } + $fileID = $field[1]; + unless (exists $firstSeg{$fileID}) { + $firstSeg{$fileID} = $numLines; + $actualSpeech{$fileID} = 0.0; + $hypothesizedSpeech{$fileID} = 0.0; + $foundSpeech{$fileID} = 0.0; + $falseAlarm{$fileID} = 0.0; + $minStartTime{$fileID} = 0.0; + $maxEndTime{$fileID} = 0.0; + } + $refSegName[$numLines] = $field[0]; + $refSegStart[$numLines] = $field[2]; + $refSegEnd[$numLines] = $field[3]; + $actualSpeech{$fileID} += ($field[3]-$field[2]); + $minStartTime{$fileID} = $field[2] if ($minStartTime{$fileID}>$field[2]); + $maxEndTime{$fileID} = $field[3] if ($maxEndTime{$fileID}<$field[3]); + $lastSeg{$fileID} = $numLines; + ++$numLines; +} +close(SEGMENTS); +print STDERR "Read $numLines segments from $ReferenceSegmentation\n"; + +################################################################################ +# Process hypothesized segments sequentially, and gather speech/nonspeech stats +################################################################################ + +open (SEGMENTS, "cat $HypothesizedSegmentation | sort -k2,2 -k1,1 |") + # Kaldi segments files are sorted by UtteranceID, but we re-sort them here + # so that all segments of a file are read together, sorted by start-time. + || die "Unable to open $HypothesizedSegmentation"; +$numLines = 0; +$totalHypSpeech = 0.0; +$totalFoundSpeech = 0.0; +$totalFalseAlarm = 0.0; +$numShortSegs = 0; +$numLongSegs = 0; +while ($line=) { + chomp $line; + @field = split("[ \t]+", $line); + unless ($#field == 3) { + exit (1); + printf STDERR "Skipping unparseable line in file $HypothesizedSegmentation\n\t$line\n"; + next; + } + $fileID = $field[1]; + $segStart = $field[2]; + $segEnd = $field[3]; + if (exists $firstSeg{$fileID}) { + # This FileID exists in the reference segmentation + # So gather statistics for this UtteranceID + $hypothesizedSpeech{$fileID} += ($segEnd-$segStart); + $totalHypSpeech += ($segEnd-$segStart); + if (($segStart>=$maxEndTime{$fileID}) || ($segEnd<=$minStartTime{$fileID})) { + # This entire segment is a false alarm + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } else { + # This segment may overlap one or more reference segments + $p = $firstSeg{$fileID}; + while ($refSegEnd[$p]<=$segStart) { + ++$p; + } + # The overlap, if any, begins at the reference segment p + $q = $lastSeg{$fileID}; + while ($refSegStart[$q]>=$segEnd) { + --$q; + } + # The overlap, if any, ends at the reference segment q + if ($q<$p) { + # This segment sits entirely in the nonspeech region + # between the two reference speech segments q and p + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } else { + if (($segEnd-$segStart)<0.20) { + # For diagnosing Pascal's VAD segmentation + print STDOUT "Found short speech region $line\n"; + ++$numShortSegs; + } elsif (($segEnd-$segStart)>60.0) { + ++$numLongSegs; + # For diagnosing Pascal's VAD segmentation + print STDOUT "Found long speech region $line\n"; + } + # There is some overlap with segments p through q + for ($s=$p; $s<=$q; ++$s) { + if ($segStart<$refSegStart[$s]) { + # There is a leading false alarm portion before s + $falseAlarm{$fileID} += ($refSegStart[$s]-$segStart); + $totalFalseAlarm += ($refSegStart[$s]-$segStart); + $segStart=$refSegStart[$s]; + } + $speechPortion = ($refSegEnd[$s]<$segEnd) ? + ($refSegEnd[$s]-$segStart) : ($segEnd-$segStart); + $foundSpeech{$fileID} += $speechPortion; + $totalFoundSpeech += $speechPortion; + $segStart=$refSegEnd[$s]; + } + if ($segEnd>$segStart) { + # There is a trailing false alarm portion after q + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } + } + } + } else { + # This FileID does not exist in the reference segmentation + # So all this speech counts as a false alarm + exit (1); + printf STDERR ("Unexpected fileID in hypothesized segments: %s", $fileID); + $totalFalseAlarm += ($segEnd-$segStart); + } + ++$numLines; +} +close(SEGMENTS); +print STDERR "Read $numLines segments from $HypothesizedSegmentation\n"; + +################################################################################ +# Now that all hypothesized segments have been processed, compute needed stats +################################################################################ + +$totalActualSpeech = 0.0; +$totalNonSpeechEst = 0.0; # This is just a crude estimate of total nonspeech. +foreach $fileID (sort keys %actualSpeech) { + $totalActualSpeech += $actualSpeech{$fileID}; + $totalNonSpeechEst += $maxEndTime{$fileID} - $actualSpeech{$fileID}; + ####################################################################### + # Print file-wise statistics to STDOUT; can pipe to /dev/null is needed + ####################################################################### + printf STDOUT ("%s: %.2f min actual speech, %.2f min hypothesized: %.2f min overlap (%d\%), %.2f min false alarm (~%d\%)\n", + $fileID, + ($actualSpeech{$fileID}/60.0), + ($hypothesizedSpeech{$fileID}/60.0), + ($foundSpeech{$fileID}/60.0), + ($foundSpeech{$fileID}*100/($actualSpeech{$fileID}+0.01)), + ($falseAlarm{$fileID}/60.0), + ($falseAlarm{$fileID}*100/($maxEndTime{$fileID}-$actualSpeech{$fileID}+0.01))); +} + +################################################################################ +# Finally, we have everything needed to report the segmentation statistics. +################################################################################ + +printf STDERR ("------------------------------------------------------------------------\n"); +printf STDERR ("TOTAL: %.2f hrs actual speech, %.2f hrs hypothesized: %.2f hrs overlap (%d\%), %.2f hrs false alarm (~%d\%)\n", + ($totalActualSpeech/3600.0), + ($totalHypSpeech/3600.0), + ($totalFoundSpeech/3600.0), + ($totalFoundSpeech*100/($totalActualSpeech+0.000001)), + ($totalFalseAlarm/3600.0), + ($totalFalseAlarm*100/($totalNonSpeechEst+0.000001))); +printf STDERR ("\t$numShortSegs segments < 0.2 sec and $numLongSegs segments > 60.0 sec\n"); +printf STDERR ("------------------------------------------------------------------------\n"); diff --git a/egs/babel/s5d/local/resegment/generate_segments.sh b/egs/babel/s5d/local/resegment/generate_segments.sh new file mode 100755 index 00000000000..95e88deb87d --- /dev/null +++ b/egs/babel/s5d/local/resegment/generate_segments.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0 + +set -o pipefail +set -e + +nj=8 +cmd=run.pl +stage=0 +segmentation_opts="--isolated-resegmentation --min-inter-utt-silence-length 1.0 --silence-proportion 0.05" +decoder_extra_opts="" +reference_rttm= +get_text=false # Get text corresponding to new segments in ${output_dir} + # Assuming text is in $data/$type directory. + # Does not work very well because the data does not get aligned to many training transcriptions. +noise_oov=false # Treat as noise instead of speech +beam=7.0 +max_active=1000 + +#debugging stuff +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +set -u + +if [ $# -ne 5 ]; then + echo "Usage: $0 [options] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --nj # Number of parallel jobs. " + echo " For the standard data directories of dev10h, dev2h and eval" + echo " this is taken from the lang.conf file" + echo " --segmentation-opts '--opt1 opt1val --opt2 opt2val' # options for segmentation.py" + echo " --reference-rttm # Reference RTTM file that will be used for analysis of the segmentation" + echo " --get-text (true|false) # Convert text from base data directory to correspond to the new segments" + echo + echo "e.g.:" + echo "$0 data/dev10h data/lang exp/tri4b_seg exp/tri4b_resegment_dev10h" + exit 1 +fi + +datadir=$1 # The base data directory that contains at least the files wav.scp and reco2file_and_channel +lang=$2 +model_dir=$3 # Segmentation model directory created using local/resegment/run_segmentation_train.sh +temp_dir=$4 # Temporary directory to store some intermediate files during segmentation +output_dir=$5 # The target directory + +############################################################################### +# +# Phone Decoder +# +############################################################################### + +mkdir -p $temp_dir +dirid=`basename $datadir` +total_time=0 +t1=$(date +%s) + +if [ $stage -le 0 ] ; then + steps/decode_nolats.sh ${decode_extra_opts+} --write-words false --write-alignments true \ + --cmd "$cmd" --nj $nj --beam $beam --max-active $max_active \ + $model_dir/phone_graph $datadir $model_dir/decode_${dirid} || exit 1 +fi + +if [ $stage -le 1 ]; then + [ ! -f $model_dir/decode_${dirid}/ali.1.gz ] && echo "File $model_dir/decode_${dirid}/ali.1.gz does not exist!" && exit 1 + $cmd JOB=1:$nj $model_dir/decode_${dirid}/log/predict.JOB.log \ + gunzip -c $model_dir/decode_${dirid}/ali.JOB.gz \| \ + ali-to-phones --per-frame=true $model_dir/final.mdl ark:- ark,t:- \| \ + utils/int2sym.pl -f 2- $lang/phones.txt \| \ + gzip -c '>' $temp_dir/pred.JOB.gz || exit 1 + + mkdir -p $temp_dir/pred + gunzip -c $temp_dir/pred.*.gz | \ + perl -ne '($file, $phones)=split / /, $_, 2; + open($fh, ">'$temp_dir/pred/'$file.pred" ) or die $!; + print {$fh} "$file $phones"; + close($fh);' || exit 1 + +fi +t2=$(date +%s) +total_time=$((total_time + t2 - t1)) +echo "SI decoding done in $((t2-t1)) seconds" + + +############################################################################### +# +# Resegmenter +# +############################################################################### + +if ! [ `cat $lang/phones/optional_silence.txt | wc -w` -eq 1 ]; then + echo "Error: this script only works if $lang/phones/optional_silence.txt contains exactly one entry."; + echo "You'd have to modify the script to handle other cases." + exit 1; +fi + +silphone=`cat $lang/phones/optional_silence.txt` +# silphone will typically be "sil" or "SIL". + +# 3 sets of phones: 0 is silence, 1 is noise, 2 is speech., +( +echo "$silphone 0" +if ! $noise_oov; then + grep -v -w $silphone $lang/phones/silence.txt \ + | awk '{print $1, 1;}' \ + | sed 's/SIL\(.*\)1/SIL\10/' \ + | sed 's/\(.*\)1/\12/' +else + grep -v -w $silphone $lang/phones/silence.txt \ + | awk '{print $1, 1;}' \ + | sed 's/SIL\(.*\)1/SIL\10/' +fi +cat $lang/phones/nonsilence.txt | awk '{print $1, 2;}' | sed 's/\(<.*>.*\)2/\11/' | sed 's/\(.*\)1/\12/' +) > $temp_dir/phone_map.txt + +mkdir -p $output_dir +mkdir -p $temp_dir/log + +local/resegment/segmentation.py --verbose 2 $segmentation_opts \ + $temp_dir/pred $temp_dir/phone_map.txt 2>$temp_dir/log/resegment.log | \ + sort > $output_dir/segments || exit 1 + +if [ ! -s $output_dir/segments ] ; then + echo "Zero segments created during segmentation process." + echo "That means something failed. Try the cause and re-run!" + exit 1 +fi + +t2=$(date +%s) +total_time=$((total_time + t2 - t1)) +echo "Resegment data done in $((t2-t1)) seconds" + +for file in reco2file_and_channel wav.scp ; do + [ ! -f $datadir/$file ] && echo "Expected file $datadir/$file to exist" && exit 1 + cp $datadir/$file $output_dir/$file +done + +# We'll make the speaker-ids be the same as the recording-ids (e.g. conversation +# sides). This will normally be OK for telephone data. +cat $output_dir/segments | awk '{print $1, $2}' > $output_dir/utt2spk || exit 1 +utils/utt2spk_to_spk2utt.pl ${output_dir}/utt2spk > $output_dir/spk2utt || exit 1 + + +dur_hours=`cat ${output_dir}/segments | awk '{num_secs += $4 - $3;} END{print (num_secs/3600);}'` +echo "Extracted segments of total length of $dur_hours hours audio" + +echo --------------------------------------------------------------------- +echo "Resegment data Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/local/resegment/segmentation.py b/egs/babel/s5d/local/resegment/segmentation.py new file mode 100755 index 00000000000..7c5c8665a16 --- /dev/null +++ b/egs/babel/s5d/local/resegment/segmentation.py @@ -0,0 +1,1508 @@ +#! /usr/bin/env python + +# Copyright 2014 Vimal Manohar +# Apache 2.0 + +import os, glob, argparse, sys, re, time +from argparse import ArgumentParser + +use_numpy = True +try: + import numpy as np +except ImportError: + use_numpy = False + +# Global stats for analysis taking RTTM file as reference +global_analysis_get_initial_segments = None +global_analysis_set_nonspeech_proportion = None +global_analysis_final = None + +def mean(l): + if len(l) > 0: + return float(sum(l)) / len(l) + return 0 + +# Analysis class +# Stores statistics like the confusion matrix, length of the segments etc. +class Analysis: + def __init__(self, file_id, frame_shift, prefix): + self.confusion_matrix = [0] * 9 + self.type_counts = [ [[] for j in range(0,9)] for i in range(0,3) ] + self.state_count = [ [] for i in range(0,9) ] + self.markers = [ [] for i in range(0,9) ] + self.phones = [ [] for i in range(0,9) ] + self.min_length = [0] * 9 + self.max_length = [0] * 9 + self.mean_length = [0] * 9 + self.percentile25 = [0] * 9 + self.percentile50 = [0] * 9 + self.percentile75 = [0] * 9 + self.file_id = file_id + self.frame_shift = frame_shift + self.prefix = prefix + + # Add the statistics of this object to another object a + # Typically used in a global object to accumulate stats + # from local objects + def add(self, a): + for i in range(0,9): + self.confusion_matrix[i] += a.confusion_matrix[i] + self.state_count[i] += a.state_count[i] + + # Print the confusion matrix + # The interpretation of 'speech', 'noise' and 'silence' are bound to change + # through the different post-processing stages. e.g at the end, speech and silence + # correspond respectively to 'in segment' and 'out of segment' + def write_confusion_matrix(self, write_hours = False, file_handle = sys.stderr): + sys.stderr.write("Total counts: \n") + + name = ['Silence as silence', \ + 'Silence as noise', \ + 'Silence as speech', \ + 'Noise as silence', \ + 'Noise as noise', \ + 'Noise as speech', \ + 'Speech as silence', \ + 'Speech as noise', \ + 'Speech as speech'] + + for j in range(0,9): + if self.frame_shift != None: + # The conventional usage is for frame_shift to have a value. + # But this function can handle other counts like the number of frames. + # This function is called to print in counts instead of seconds in + # functions like merge_segments + if write_hours: + # Write stats in hours instead of seconds + sys.stderr.write("File %s: %s : %s : %8.3f hrs\n" % + (self.file_id, self.prefix, name[j], + self.confusion_matrix[j] * self.frame_shift / 3600.0)) + else: + sys.stderr.write("File %s: %s : %s : %8.3f seconds\n" % + (self.file_id, self.prefix, name[j], + self.confusion_matrix[j] * self.frame_shift)) + # End if write_hours + else: + sys.stderr.write("File %s: %s : Confusion: Type %d : %8.3f counts\n" % + (self.file_id, self.prefix, j, self.confusion_matrix[j])) + # End if + # End for loop over 9 cells of confusion matrix + + # Print the total stats that are just row and column sums of + # 3x3 confusion matrix + def write_total_stats(self, write_hours = True, file_handle = sys.stderr): + sys.stderr.write("Total Stats: \n") + + name = ['Actual Silence', \ + 'Actual Noise', \ + 'Actual Speech'] + + for j in [0,1,2]: + if self.frame_shift != None: + # The conventional usage is for frame_shift to have a value. + # But this function can handle other counts like the number of frames. + # This function is called to print in counts instead of seconds in + # functions like merge_segments + if write_hours: + # Write stats in hours instead of seconds + sys.stderr.write("File %s: %s : %s : %8.3f hrs\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[3*j:3*j+3]) * self.frame_shift / 3600.0)) + else: + sys.stderr.write("File %s: %s : %s : %8.3f seconds\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[3*j:3*j+3]) * self.frame_shift)) + # End if write_hours + else: + sys.stderr.write("File %s: %s : %s : %8.3f counts\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[3*j:3*j+3]))) + # End if + # End for loop over 3 rows of confusion matrix + + name = ['Predicted Silence', \ + 'Predicted Noise', \ + 'Predicted Speech'] + + for j in [0,1,2]: + if self.frame_shift != None: + # The conventional usage is for frame_shift to have a value. + # But this function can handle other counts like the number of frames. + # This function is called to print in counts instead of seconds in + # functions like merge_segments + if write_hours: + # Write stats in hours instead of seconds + sys.stderr.write("File %s: %s : %s : %8.3f hrs\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[j:7+j:3]) * self.frame_shift / 3600.0)) + else: + sys.stderr.write("File %s: %s : %s : %8.3f seconds\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[j:7+j:3]) * self.frame_shift)) + # End if write_hours + else: + sys.stderr.write("File %s: %s : %s : %8.3f counts\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[j:7+j:3]))) + # End if + # End for loop over 3 columns of confusion matrix + + # Print detailed stats of lengths of each of the 3 types of frames + # in 8 kinds of segments + def write_type_stats(self, file_handle = sys.stderr): + for j in range(0,3): + # 3 types of frames. Silence, noise, speech. + # Typically, we store the number of frames of each type here. + for i in range(0,9): + # 2^3 = 8 kinds of segments like 'segment contains only silence', + # 'segment contains only noise', 'segment contains noise and speech'. + # For compatibility with the rest of the analysis code, + # the for loop is over 9 kinds. + max_length = max([0]+self.type_counts[j][i]) + min_length = min([10000]+self.type_counts[j][i]) + mean_length = mean(self.type_counts[j][i]) + if use_numpy: + try: + percentile25 = np.percentile(self.type_counts[j][i], 25) + except ValueError: + percentile25 = 0 + try: + percentile50 = np.percentile(self.type_counts[j][i], 50) + except ValueError: + percentile50 = 0 + try: + percentile75 = np.percentile(self.type_counts[j][i], 75) + except ValueError: + percentile75 = 0 + + file_handle.write("File %s: %s : TypeStats: Type %d %d: Min: %4d Max: %4d Mean: %4d percentile25: %4d percentile50: %4d percentile75: %4d\n" % (self.file_id, self.prefix, j, i, min_length, max_length, mean_length, percentile25, percentile50, percentile75)) + # End for loop over 9 different kinds of segments + # End for loop over 3 types of frames + + # Print detailed stats of each cell of the confusion matrix. + # The stats include different statistical measures like mean, max, min + # and median of the length of continuous regions of frames in + # each of the 9 cells of the confusion matrix + def write_length_stats(self, file_handle = sys.stderr): + for i in range(0,9): + self.max_length[i] = max([0]+self.state_count[i]) + self.min_length[i] = min([10000]+self.state_count[i]) + self.mean_length[i] = mean(self.state_count[i]) + if use_numpy: + try: + self.percentile25[i] = np.percentile(self.state_count[i], 25) + except ValueError: + self.percentile25[i] = 0 + try: + self.percentile50[i] = np.percentile(self.state_count[i], 50) + except ValueError: + self.percentile50[i] = 0 + try: + self.percentile75[i] = np.percentile(self.state_count[i], 75) + except ValueError: + self.percentile75[i] = 0 + + file_handle.write("File %s: %s : Length: Type %d: Min: %4d Max: %4d Mean: %4d percentile25: %4d percentile50: %4d percentile75: %4d\n" % (self.file_id, self.prefix, i, self.min_length[i], self.max_length[i], self.mean_length[i], self.percentile25[i], self.percentile50[i], self.percentile75[i])) + # End for loop over 9 cells + + # Print detailed stats of each cell of the confusion matrix. + # Similar structure to the above function. But this also prints additional + # details. Format is like this - + # Markers: Type : () () + # The hypothesized_phones can be looked at to see what phones are + # present in the hypothesis from start_frame for num_of_frames frames. + def write_markers(self, file_handle = sys.stderr): + file_handle.write("Start frames of different segments:\n") + for j in range(0,9): + if self.phones[j] == []: + file_handle.write("File %s: %s : Markers: Type %d: %s\n" % (self.file_id, self.prefix, j, str(sorted([str(self.markers[j][i])+' ('+ str(self.state_count[j][i])+ ')' for i in range(0, len(self.state_count[j]))],key=lambda x:int(x.split()[0]))))) + else: + file_handle.write("File %s: %s : Markers: Type %d: %s\n" % (self.file_id, self.prefix, j, str(sorted([str(self.markers[j][i])+' ('+ str(self.state_count[j][i])+') ( ' + str(self.phones[j][i]) + ')' for i in range(0, len(self.state_count[j]))],key=lambda x:int(x.split()[0]))))) + # End for loop over 9 cells + +# Function to read a standard IARPA Babel RTTM file +# as structure in Jan 16, 2014 +def read_rttm_file(rttm_file, temp_dir, frame_shift): + file_id = None + this_file = [] + ref_file_handle = None + reference = {} + for line in open(rttm_file).readlines(): + splits = line.strip().split() + type1 = splits[0] + if type1 == "SPEAKER": + continue + if splits[1] != file_id: + # A different file_id. Need to open a different file to write + if this_file != []: + # If this_file is empty, no reference RTTM corresponding to the file_id + # is read. This will happen at the start of the file_id. Otherwise it means a + # contiguous segment of previous file_id is processed. So write it to the file. + # corresponding to the previous file_id + try: + ref_file_handle.write(' '.join(this_file)) + # Close the previous file if any + ref_file_handle.close() + this_file = [] + except AttributeError: + # Ignore AttributeError. It is expected. + 1==1 + # End if + + file_id = splits[1] + if (file_id not in reference): + # First time seeing this file_id. Open a new file for writing. + reference[file_id] = 1 + try: + ref_file_handle = open(temp_dir+"/"+file_id+".ref", 'w') + except IOError: + sys.stderr.write("Unable to open " + temp_dir+"/"+file_id+".ref for writing\n") + sys.exit(1) + ref_file_handle.write(file_id + "\t") + else: + # This file has been seen before but not in the previous iteration. + # The file has already been closed. So open it for append. + try: + this_file = open(temp_dir+"/"+file_id+".ref").readline().strip().split()[1:] + ref_file_handle = open(temp_dir+"/"+file_id+".ref", 'a') + except IOError: + sys.stderr.write("Unable to open " + temp_dir+"/"+file_id+".ref for appending\n") + sys.exit(1) + # End if + # End if + + i = len(this_file) + category = splits[6] + word = splits[5] + start_time = int(float(splits[3])/frame_shift + 0.5) + duration = int(float(splits[4])/frame_shift + 0.5) + if i < start_time: + this_file.extend(["0"]*(start_time - i)) + if type1 == "NON-LEX": + if category == "other": + # is taken as Silence + this_file.extend(["0"]*duration) + else: + this_file.extend(["1"]*duration) + if type1 == "LEXEME": + this_file.extend(["2"]*duration) + if type1 == "NON-SPEECH": + this_file.extend(["1"]*duration) + + ref_file_handle.write(' '.join(this_file)) + ref_file_handle.close() + +# Stats class to store some basic stats about the number of +# times the post-processor goes through particular loops or blocks +# of code in the algorithm. This is just for debugging. +class Stats: + def __init__(self): + self.inter_utt_nonspeech = 0 + self.merge_nonspeech_segment = 0 + self.merge_segments = 0 + self.split_segments = 0 + self.silence_only = 0 + self.noise_only = 0 + + def print_stats(self): + sys.stderr.write("Inter-utt nonspeech: %d\n" % self.inter_utt_nonspeech) + sys.stderr.write("Merge nonspeech segment: %d\n" % self.merge_nonspeech_segment) + sys.stderr.write("Merge segment: %d\n" % self.merge_segments) + sys.stderr.write("Split segments: %d\n" % self.split_segments) + sys.stderr.write("Noise only: %d\n" % self.noise_only) + sys.stderr.write("Silence only: %d\n" % self.silence_only) + + def reset(self): + self.inter_utt_nonspeech = 0 + self.merge_nonspeech_segment = 0 + self.merge_segments = 0 + self.split_segments = 0 + self.silence_only = 0 + self.noise_only = 0 + +# Timer class to time functions +class Timer: + def __enter__(self): + self.start = time.clock() + return self + def __exit__(self, *args): + self.end = time.clock() + self.interval = self.end - self.start + +# The main class for post-processing a file. +# This does the segmentation either looking at the file isolated +# or by looking at both classes simultaneously +class JointResegmenter: + def __init__(self, P, A, f, options, phone_map, stats = None, reference = None): + + # Pointers to prediction arrays and Initialization + self.P = P # Predicted phones + self.B = [ i for i in A ] # Original predicted classes + self.A = A # Predicted classes + self.file_id = f # File name + self.N = len(A) # Length of the prediction (= Num of frames in the audio file) + self.S = [False] * self.N # Array of Start boundary markers + self.E = [False] * (self.N+1) # Array of End boundary markers + + self.phone_map = phone_map + self.options = options + + # Configuration + + self.frame_shift = options.frame_shift + # Convert length in seconds to frames + self.max_frames = int(options.max_segment_length / options.frame_shift) + self.hard_max_frames = int(options.hard_max_segment_length / options.frame_shift) + self.min_inter_utt_nonspeech_length = int(options.min_inter_utt_silence_length / options.frame_shift) + if ( options.remove_noise_only_segments == "false" ): + self.remove_noise_segments = False + elif ( options.remove_noise_only_segments == "true" ): + self.remove_noise_segments = True + + # End of Configuration + + # Define Frame Type Constants + self.THIS_SILENCE = ("0","1","2") + self.THIS_NOISE = ("3","4","5") + self.THIS_SPEECH = ("6", "7", "8") + self.THIS_SPEECH_THAT_SIL = ("6",) + self.THIS_SPEECH_THAT_NOISE = ("7",) + self.THIS_SIL_CONVERT_THAT_SIL = ("9",) + self.THIS_SIL_CONVERT_THAT_NOISE = ("10",) + self.THIS_SIL_CONVERT = ("9","10","11") + self.THIS_SILENCE_CONVERT = ("9","10","11") + self.THIS_NOISE_CONVERT_THAT_SIL = ("12",) + self.THIS_NOISE_CONVERT_THAT_NOISE = ("13",) + self.THIS_NOISE_CONVERT = ("12","13","14") + self.THIS_NOISE_OR_SILENCE = self.THIS_NOISE + self.THIS_SILENCE + self.THIS_SILENCE_OR_NOISE = self.THIS_NOISE + self.THIS_SILENCE + self.THIS_CONVERT = self.THIS_SILENCE_CONVERT + self.THIS_NOISE_CONVERT + self.THIS_SILENCE_PLUS = self.THIS_SILENCE + self.THIS_SILENCE_CONVERT + self.THIS_NOISE_PLUS = self.THIS_NOISE + self.THIS_NOISE_CONVERT + self.THIS_SPEECH_PLUS = self.THIS_SPEECH + self.THIS_CONVERT + + if stats != None: + self.stats = stats + + self.reference = None + if reference != None: + if len(reference) < self.N: + self.reference = reference + ["0"] * (self.N - len(reference)) + assert (len(self.reference) == self.N) + else: + self.reference = reference + + # This function restricts the output to length N + def restrict(self, N): + self.B = self.B[0:N] + self.A = self.A[0:N] + self.S = self.S[0:N] + self.E = self.E[0:N+1] + if sum(self.S) == sum(self.E) + 1: + self.E[N] = True + self.N = N + + # Main resegment function that calls other functions + def resegment(self): + with Timer() as t: + self.get_initial_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: get_initial_segments took %f sec\n" % (self.file_id, t.interval)) + with Timer() as t: + self.set_nonspeech_proportion() + if self.options.verbose > 1: + sys.stderr.write("For %s: set_nonspeech_proportion took %f sec\n" % (self.file_id, t.interval)) + with Timer() as t: + self.merge_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: merge took %f sec\n" % (self.file_id, t.interval)) + with Timer() as t: + self.split_long_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: split took %f sec\n" % (self.file_id, t.interval)) + if self.remove_noise_segments: + with Timer() as t: + self.remove_noise_only_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: remove took %f sec\n" % (self.file_id, t.interval)) + elif self.min_inter_utt_nonspeech_length > 0.0: + # This is the typical one with augmented training setup + self.remove_silence_only_segments() + + if self.options.verbose > 1: + sys.stderr.write("For file %s\n" % self.file_id) + self.stats.print_stats() + sys.stderr.write("\n") + self.stats.reset() + + def get_initial_segments(self): + for i in range(0, self.N): + if (i > 0) and self.A[i-1] != self.A[i]: + # This frame is different from the previous frame. + if self.A[i] in self.THIS_SPEECH: + # This frame is speech. + if self.A[i-1] in self.THIS_SPEECH: + # Both this and the previous frames are speech + # But they are different. e.g. "8 7" + # So this is the end of the previous region and + # the beginning of the next region + self.S[i] = True + self.E[i] = True + else: + # The previous frame is non-speech, but not this one. + # So this frame is the beginning of a new segment + self.S[i] = True + else: + # This frame is non-speech + if self.A[i-1] in self.THIS_SPEECH: + # Previous frame is speech, but this one is not. + # So this frame is the end of the previous segment + self.E[i] = True + elif i == 0 and self.A[i] in self.THIS_SPEECH: + # The frame is speech. So this is the start of a new segment. + self.S[i] = True + if self.A[self.N-1] in self.THIS_SPEECH: + # Handle the special case where the last frame of file is not nonspeech + self.E[self.N] = True + assert(sum(self.S) == sum(self.E)) + + ########################################################################### + # Analysis section + self.C = ["0"] * self.N + C = self.C + a = Analysis(self.file_id, self.frame_shift,"Analysis after get_initial_segments") + + if self.reference != None: + count = 0 + for i in range(0,self.N): + if self.reference[i] == "0" and self.A[i] in self.THIS_SILENCE: + C[i] = "0" + elif self.reference[i] == "0" and self.A[i] in self.THIS_NOISE: + C[i] = "1" + elif self.reference[i] == "0" and self.A[i] in self.THIS_SPEECH: + C[i] = "2" + elif self.reference[i] == "1" and self.A[i] in self.THIS_SILENCE: + C[i] = "3" + elif self.reference[i] == "1" and self.A[i] in self.THIS_NOISE: + C[i] = "4" + elif self.reference[i] == "1" and self.A[i] in self.THIS_SPEECH: + C[i] = "5" + elif self.reference[i] == "2" and self.A[i] in self.THIS_SILENCE: + C[i] = "6" + elif self.reference[i] == "2" and self.A[i] in self.THIS_NOISE: + C[i] = "7" + elif self.reference[i] == "2" and self.A[i] in self.THIS_SPEECH: + C[i] = "8" + if i > 0 and C[i-1] != C[i]: + a.state_count[int(C[i-1])].append(count) + a.markers[int(C[i-1])].append(i - count) + a.phones[int(C[i-1])].append(' '.join(set(self.P[i-count:i]))) + count = 1 + else: + count += 1 + + for j in range(0,9): + a.confusion_matrix[j] = sum([C[i] == str(j) for i in range(0,self.N)]) + + global_analysis_get_initial_segments.add(a) + + if self.reference != None and self.options.verbose > 0: + a.write_confusion_matrix() + a.write_length_stats() + if self.reference != None and self.options.verbose > 1: + a.write_markers() + ########################################################################### + + def set_nonspeech_proportion(self): + num_speech_frames = 0 + in_segment = False + + # Active frames are the frames that are either segment starts + # or segment ends + active_frames = [] + for n in range(0, self.N + 1): + if self.E[n]: + assert(in_segment) + in_segment = False + active_frames.append(n) + if n < self.N and self.S[n]: + assert(not in_segment) + in_segment = True + active_frames.append(n) + if n < self.N: + if in_segment: + # Count the number of speech frames + num_speech_frames += 1 + assert (not in_segment) + if num_speech_frames == 0: + sys.stderr.write("%s: Warning: no speech found for recording %s\n" % (sys.argv[0], self.file_id)) + + # Set the number of non-speech frames to be added depending on the + # silence proportion. The target number of frames in the segments + # is computed as below: + target_segment_frames = int(num_speech_frames / (1.0 - self.options.silence_proportion)) + + # The number of frames currently in the segments + num_segment_frames = num_speech_frames + + count = 0 + while num_segment_frames < target_segment_frames: + count += 1 + changed = False + for i in range(0, len(active_frames)): + # At each active frame, try include a nonspeech frame into + # segment. Thus padding the speech segments with some + # non-speech frames. These converted non-speech frames are + # labelled 9...14 depending on whether they were originally + # 0...5 respectively + n = active_frames[i] + if self.E[n] and n < self.N and not self.S[n]: + # This must be the beginning of a non-speech region. + # Include some of this non-speech in the segments + assert (self.A[n] not in self.THIS_SPEECH) + + # Convert the non-speech frame to be included in segment + self.A[n] = str(int(self.B[n]) + 9) + if self.B[n-1] != self.B[n]: + # In this frame there is a transition from + # one type of non-speech (0, 1 ... 5) to another + # So its the start of a segment. Also add it to the + # end of the active frames list + self.S[n] = True + active_frames.append(n+1) + else: + # We need to extend the segment end since we have + # included a non-speeech frame. Remove the current segment end mark + # and one to the next frame + self.E[n] = False + active_frames[i] = n + 1 + self.E[n+1] = True + # Increment the number of frames in the segments + num_segment_frames += 1 + changed = True + if n < self.N and self.S[n] and n > 0 and not self.E[n]: + # This must be the beginning of a speech region. + # Include some non-speech before it into the segments + assert (self.A[n-1] not in self.THIS_SPEECH) + self.A[n-1] = str(int(self.B[n-1]) + 9) + if self.B[n-1] != self.B[n]: + self.E[n] = True + active_frames.append(n-1) + else: + self.S[n] = False + active_frames[i] = n - 1 + self.S[n-1] = True + num_segment_frames += 1 + changed = True + if num_segment_frames >= target_segment_frames: + break + if not changed: # avoid an infinite loop. if no changes, then break. + break + if num_segment_frames < target_segment_frames: + proportion = float(num_segment_frames - num_speech_frames) / num_segment_frames + sys.stderr.write("%s: Warning: for recording %s, only got a proportion %f of non-speech frames, versus target %f\n" % (sys.argv[0], self.file_id, proportion, self.options.silence_proportion)) + + ########################################################################### + # Analysis section + self.C = ["0"] * self.N + C = self.C + a = Analysis(self.file_id, self.frame_shift,"Analysis after set_nonspeech_proportion") + + if self.reference != None: + count = 0 + for i in range(0,self.N): + if self.reference[i] == "0" and self.A[i] in (self.THIS_SILENCE + self.THIS_NOISE): + C[i] = "0" + elif self.reference[i] == "0" and self.A[i] in self.THIS_CONVERT: + C[i] = "1" + elif self.reference[i] == "0" and self.A[i] in self.THIS_SPEECH: + C[i] = "2" + elif self.reference[i] == "1" and self.A[i] in (self.THIS_SILENCE + self.THIS_NOISE): + C[i] = "3" + elif self.reference[i] == "1" and self.A[i] in self.THIS_CONVERT: + C[i] = "4" + elif self.reference[i] == "1" and self.A[i] in self.THIS_SPEECH: + C[i] = "5" + elif self.reference[i] == "2" and self.A[i] in (self.THIS_SILENCE + self.THIS_NOISE): + C[i] = "6" + elif self.reference[i] == "2" and self.A[i] in self.THIS_CONVERT: + C[i] = "7" + elif self.reference[i] == "2" and self.A[i] in self.THIS_SPEECH: + C[i] = "8" + if i > 0 and C[i-1] != C[i]: + a.state_count[int(C[i-1])].append(count) + a.markers[int(C[i-1])].append(i - count) + a.phones[int(C[i-1])].append(' '.join(set(self.P[i-count:i]))) + count = 1 + else: + count += 1 + + for j in range(0,9): + a.confusion_matrix[j] = sum([C[i] == str(j) for i in range(0,self.N)]) + + global_analysis_set_nonspeech_proportion.add(a) + + if self.reference != None and self.options.verbose > 0: + a.write_confusion_matrix() + a.write_length_stats() + if self.reference != None and self.options.verbose > 1: + a.write_markers() + ########################################################################### + + def merge_segments(self): + # Get list of frames which have segment start and segment end + # markers into separate lists + segment_starts = [i for i, val in enumerate(self.S) if val] + segment_ends = [i for i, val in enumerate(self.E) if val] + assert (sum(self.S) == sum(self.E)) + + if self.options.verbose > 3: + sys.stderr.write("Length of segment starts before non-speech adding: %d\n" % len(segment_starts)) + + if self.min_inter_utt_nonspeech_length > 0.0: + segment_starts = list(set([0] + segment_starts + segment_ends + [self.N])) + segment_starts.sort() + segment_starts.pop() + segment_ends= list(set([0] + segment_starts + segment_ends + [self.N])) + segment_ends.sort() + segment_ends.pop(0) + if self.options.verbose > 3: + sys.stderr.write("Length of segment starts after non-speech adding: %d\n" % len(segment_starts)) + for i in segment_starts: + self.S[i] = True + for i in segment_ends: + self.E[i] = True + + # Just a check. There must always be equal number of segment starts + # and segment ends + assert (len(segment_starts) == len(segment_ends)) + + # A boundary is a frame which is both a segment start and a segment end + # The list of boundaries is obtained in the following step along with + # a few statistics like the type of segment on either side of the boundary + # and the length of the segment on either side of it + boundaries = [] + i = 0 + j = 0 + while i < len(segment_starts) and j < len(segment_ends): + if segment_ends[j] < segment_starts[i]: + # The segment end marker is before the segment start marker. + # This means that this segment end marker corresponds to a segment + # that is before the one indicated by the segment start marker. + # So advance the segment end pointer to the next segment end to + # check if that is a 'boundary' + j += 1 + elif segment_ends[j] > segment_starts[i]: + # The segment end marker is after the segment start marker. + # This means that this segment end marker would corresponds + # to segment indicated by the segment start marker. + # So advance the segment start pointer to the next segment start to + # check if that is a 'boundary' + i += 1 + else: + assert(i < len(segment_starts) and j < len(segment_ends)) + # A boundary: + # Find the segment score as the min of lengths of the segments + # to the left and to the right. + # This segment score will be used to prioritize merging of + # the segment with its neighbor + assert ((j + 1) < len(segment_ends)) + segment_score = min(segment_starts[i] - segment_starts[i-1], \ + segment_ends[j+1] - segment_ends[j]) + # Also find the type of tranisition of the segments at the boundary. + # This is also used to prioritize the merging of the segment + boundaries.append((segment_ends[j], segment_score, \ + self.transition_type(segment_ends[j]))) + + # Sort the boundaries based on segment score + boundaries.sort(key = lambda x: x[1]) + # Then sort based on the type of transition by keeping it still + # sorted within each transition type based on segment score + boundaries.sort(key = lambda x: x[2]) + i += 1 + j += 1 + # End if + # End while loop + + # Begin merging of segments by removing the start and end mark + # at the boundary to be merged + count = 0 + for b in boundaries: + count += 1 + segment_length = 0 + + if self.min_inter_utt_nonspeech_length > 0.0 and not self.E[b[0]]: + # This will happen only if the boundary is at the end of + # a non-speech region that has already been merged or removed + # b[0] will then not be an end mark. + continue + + # Count the number of frames in the segment to the + # left of the boundary + p = b[0] - 1 + while p >= 0: + if self.S[p]: + break + p -= 1 + # End if + # End while loop + p_left = p + segment_length += b[0] - p + + # Count the number of frames in the segment to the + # right of the boundary + p = b[0] + 1 + while p <= self.N: + if self.E[p]: + break + p += 1 + assert (self.min_inter_utt_nonspeech_length == 0 or p == self.N or self.S[p] or self.A[p] in self.THIS_SILENCE_OR_NOISE) + + if self.min_inter_utt_nonspeech_length > 0 and self.A[b[0]] in self.THIS_SILENCE_OR_NOISE: + assert(b[2] == 6 or b[2] == 7) + if (p - b[0]) > self.min_inter_utt_nonspeech_length: + # This is a non-speech segment that is longer than the minimum + # inter-utterance non-speech length. + # Therefore treat this non-speech as inter-utterance non-speech and + # remove it from the segments + self.S[b[0]] = False + self.E[p] = False + + # Count the number of times inter utt non-speech + # length is greater than the set threshold + # This is the number of times the silence is + # not merged with adjacent speech + self.stats.inter_utt_nonspeech += 1 + + # This is boundary is no longer valid. + # So we can continue to the next boundary + continue + # End if + + # This non-speech segment is less than the minimum inter-utterance + # non-speech length. It is possible to merge this segment + # with the adjacent ones as long as the length of the + # segment after merging to see if its within limits. + p_temp = p + p += 1 + while p <= self.N: + if self.E[p]: + break + p += 1 + # End while loop + segment_length += p - b[0] + if segment_length < self.max_frames: + # Merge the non-speech segment with the segments + # on either sides + + # Count the number of times segment merge happens + self.stats.merge_nonspeech_segment += 1 + + if p_temp < self.N: + self.S[p_temp] = False + self.E[p_temp] = False + self.S[b[0]] = False + self.E[b[0]] = False + continue + else: + # The merged segment length is longer than max_frames. + # Therefore treat this non-speech as inter-utterance non-speech and + # remove it from the segments + self.S[b[0]] = False + self.E[p_temp] = False + continue + # End if + elif self.min_inter_utt_nonspeech_length > 0 and (b[2] == 8 or b[2] == 9): + assert(p_left == 0) + if b[0] - p_left > self.min_inter_utt_nonspeech_length: + self.S[p_left] = False + self.E[b[0]] = False + continue + # End if + # End if + segment_length += p - b[0] + + if segment_length < self.max_frames: + self.stats.merge_segments += 1 + self.S[b[0]] = False + self.E[b[0]] = False + # End if + # End for loop over boundaries + + assert (sum(self.S) == sum(self.E)) + + ########################################################################### + # Analysis section + + if self.reference != None and self.options.verbose > 3: + a = self.segmentation_analysis("Analysis after merge_segments") + a.write_confusion_matrix() + + if self.reference != None and self.options.verbose > 4: + a.write_type_stats() + # End if + + if self.reference != None and self.options.verbose > 4: + a.write_markers() + # End if + # End if + ########################################################################### + # End function merge_segments + + def split_long_segments(self): + assert (sum(self.S) == sum(self.E)) + for n in range(0, self.N): + if self.S[n]: + p = n + 1 + while p <= self.N: + if self.E[p]: + break + p += 1 + segment_length = p - n + if segment_length > self.hard_max_frames: + # Count the number of times long segments are split + self.stats.split_segments += 1 + + num_pieces = int((float(segment_length) / self.hard_max_frames) + 0.99999) + sys.stderr.write("%s: Warning: for recording %s, " \ + % (sys.argv[0], self.file_id) \ + + "splitting segment of length %f seconds into %d pieces " \ + % (segment_length * self.frame_shift, num_pieces) \ + + "(--hard-max-segment-length %f)\n" \ + % self.options.hard_max_segment_length) + frames_per_piece = int(segment_length / num_pieces) + for i in range(1,num_pieces): + q = n + i * frames_per_piece + self.S[q] = True + self.E[q] = True + if p - 1 > n: + n = p - 1 + assert (sum(self.S) == sum(self.E)) + # End function split_long_segments + + def remove_silence_only_segments(self): + for n in range(0, self.N): + # Run through to find a segment start + if self.S[n]: + p = n + saw_nonsilence = False + # From the segment start, go till the segment end to see + # if there is speech in it + while p <= self.N: + if self.E[p] and p != n: + break + if p < self.N and self.A[p] not in self.THIS_SILENCE: + saw_nonsilence = True + p += 1 + # End of while loop through the segment + assert (p > self.N or self.E[p]) + if not saw_nonsilence: + # Count the number of silence only segments + self.stats.silence_only += 1 + + self.S[n] = False + self.E[p] = False + # End if + if p - 1 > n: + # Go to the end of the segment since that segment is + # already processed + n = p - 1 + # End if + if self.reference != None and self.options.verbose > 3: + a = self.segmentation_analysis("Analysis after remove_silence_only_segments") + a.write_confusion_matrix() + + if self.reference != None and self.options.verbose > 4: + a.write_type_stats() + # End if + + if self.reference != None and self.options.verbose > 4: + a.write_markers() + # End if + # End if + # End function remove_silence_only_segments + + def remove_noise_only_segments(self): + for n in range(0, self.N): + if self.S[n]: + p = n + saw_speech = False + while p <= self.N: + if self.E[p] and p != n: + break + if self.A[p] in self.THIS_SPEECH: + saw_speech = True + p += 1 + assert (self.E[p]) + if not saw_speech: + # Count the number of segments with no speech + self.stats.noise_only += 1 + self.S[n] = False + self.E[p] = False + # End if + if p - 1 > n: + n = p - 1 + # End if + # End if + # End for loop over frames + + ########################################################################### + # Analysis section + + if self.reference != None and self.options.verbose > 3: + a = self.segmentation_analysis("Analysis after remove_noise_only_segments") + a.write_confusion_matrix() + + if self.reference != None and self.options.verbose > 4: + a.write_type_stats() + # End if + + if self.reference != None and self.options.verbose > 4: + a.write_markers() + # End if + # End if + ########################################################################### + # End function remove_noise_only_segments + + # Return the transition type from frame j-1 to frame j + def transition_type(self, j): + assert (j > 0) + assert (self.A[j-1] != self.A[j] or self.A[j] in self.THIS_CONVERT) + if self.A[j-1] in (self.THIS_SPEECH_THAT_NOISE + self.THIS_SPEECH_THAT_SIL) and self.A[j] in (self.THIS_SPEECH_THAT_NOISE + self.THIS_SPEECH_THAT_SIL): + return 0 + if self.A[j-1] in self.THIS_SPEECH and self.A[j] in self.THIS_SPEECH: + return 1 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT_THAT_SIL + self.THIS_NOISE_CONVERT_THAT_NOISE) and self.A[j] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT_THAT_SIL + self.THIS_NOISE_CONVERT_THAT_NOISE): + return 2 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT) and self.A[j] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT): + return 3 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT + self.THIS_SIL_CONVERT_THAT_SIL + self.THIS_SIL_CONVERT_THAT_NOISE) and self.A[j] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT + self.THIS_SIL_CONVERT_THAT_SIL + self.THIS_SIL_CONVERT_THAT_NOISE): + return 4 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_CONVERT) and self.A[j] in (self.THIS_SPEECH + self.THIS_CONVERT): + return 5 + if self.A[j-1] in self.THIS_SPEECH_PLUS and self.A[j] in (self.THIS_SPEECH_PLUS + self.THIS_NOISE): + return 6 + if self.A[j-1] in self.THIS_SPEECH_PLUS and self.A[j] in (self.THIS_SPEECH_PLUS + self.THIS_SILENCE): + return 7 + if self.A[j-1] in (self.THIS_SPEECH_PLUS + self.THIS_NOISE) and self.A[j] in self.THIS_SPEECH_PLUS: + return 8 + if self.A[j-1] in (self.THIS_SPEECH_PLUS + self.THIS_SILENCE) and self.A[j] in self.THIS_SPEECH_PLUS: + return 9 + assert (False) + + # Output the final segments + def print_segments(self, out_file_handle = sys.stdout): + # We also do some sanity checking here. + segments = [] + + assert (self.N == len(self.S)) + assert (self.N + 1 == len(self.E)) + + max_end_time = 0 + n = 0 + while n < self.N: + if self.E[n] and not self.S[n]: + sys.stderr.write("%s: Error: Ending segment before starting it: n=%d\n" % (sys.argv[0], n)) + if self.S[n]: + p = n + 1 + while p < self.N and not self.E[p]: + assert (not self.S[p]) + p += 1 + assert (p == self.N or self.E[p]) + segments.append((n,p)) + max_end_time = p + if p < self.N and self.S[p]: + n = p - 1 + else: + n = p + n += 1 + + if len(segments) == 0: + sys.stderr.write("%s: Warning: no segments for recording %s\n" % (sys.argv[0], self.file_id)) + sys.exit(1) + + ############################################################################ + # Analysis section + + self.C = ["0"] * self.N + C = self.C + a = Analysis(self.file_id, self.frame_shift,"Analysis final") + + if self.reference != None: + count = 0 + in_seg = False + for i in range(0,self.N): + if in_seg and self.E[i]: + in_seg = False + if i == 0 and self.S[i]: + in_seg = True + if not in_seg and self.S[i]: + in_seg = True + if self.reference[i] == "0" and not in_seg: + C[i] = "0" + elif self.reference[i] == "0" and in_seg: + C[i] = "2" + elif self.reference[i] == "1" and not in_seg: + C[i] = "3" + elif self.reference[i] == "1" and in_seg: + C[i] = "5" + elif self.reference[i] == "2" and not in_seg: + C[i] = "6" + elif self.reference[i] == "2" and in_seg: + C[i] = "8" + if i > 0 and C[i-1] != C[i]: + a.state_count[int(C[i-1])].append(count) + a.markers[int(C[i-1])].append(i - count) + a.phones[int(C[i-1])].append(' '.join(set(self.P[i-count:i]))) + count = 1 + else: + count += 1 + + for j in range(0,9): + a.confusion_matrix[j] = sum([C[i] == str(j) for i in range(0,self.N)]) + + if self.options.verbose > 0: + a.write_confusion_matrix() + a.write_length_stats() + if self.options.verbose > 1: + a.write_markers() + + global_analysis_final.add(a) + ############################################################################ + + # we'll be printing the times out in hundredths of a second (regardless of the + # value of $frame_shift), and first need to know how many digits we need (we'll be + # printing with "%05d" or similar, for zero-padding. + max_end_time_hundredths_second = int(100.0 * self.frame_shift * max_end_time) + num_digits = 1 + i = 1 + while i < max_end_time_hundredths_second: + i *= 10 + num_digits += 1 + format_str = r"%0" + "%d" % num_digits + "d" # e.g. "%05d" + + for start, end in segments: + assert (end > start) + start_seconds = "%.2f" % (self.frame_shift * start) + end_seconds = "%.2f" % (self.frame_shift * end) + start_str = format_str % (start * self.frame_shift * 100.0) + end_str = format_str % (end * self.frame_shift * 100.0) + utterance_id = "%s%s%s%s%s" % (self.file_id, self.options.first_separator, start_str, self.options.second_separator, end_str) + # Output: + out_file_handle.write("%s %s %s %s\n" % (utterance_id, self.file_id, start_seconds, end_seconds)) + + # Some intermediate stage analysis of the segmentation + def segmentation_analysis(self, title = "Analysis"): + # In this analysis, we are trying to find in each segment, + # the number of frames that are speech, noise and silence + # in the reference RTTM + + # First get the segment start and segment ends + # Note that they are in sync by construction + segment_starts = [i for i in range(0,self.N) if self.S[i]] + segment_ends = [i for i in range(0,self.N+1) if self.E[i]] + + D = {} + for i,st in enumerate(segment_starts): + en = segment_ends[i] + types = {} + for val in self.reference[st:en]: + # The segment is defined by the indices st:en + # Count the number of frames in the segment that + # are silence, speech and noise in the reference. + types[val] = types.get(val,0) + 1 + # End for loop over a particular segment + # Make a tuple out of the counts of the types of frames + D[st] = (en, types.get("0",0), types.get("1", 0), types.get("2", 0)) + # End for loop over all segments + + a = Analysis(self.file_id, None, title) + for st, info in D.items(): + en = info[0] + + if info[1] > 0 and info[2] == 0 and info[3] == 0: + # All frames silence + a.confusion_matrix[0] += 1 + a.state_count[0].append((en-st,)+info[1:]) + a.type_counts[0][0].append(info[1]) + a.type_counts[1][0].append(info[2]) + a.type_counts[2][0].append(info[3]) + a.markers[0].append(st) + elif info[1] == 0 and info[2] > 0 and info[3] == 0: + # All frames noise + a.confusion_matrix[1] += 1 + a.state_count[1].append((en-st,)+info[1:]) + a.type_counts[0][1].append(info[1]) + a.type_counts[1][1].append(info[2]) + a.type_counts[2][1].append(info[3]) + a.markers[1].append(st) + elif info[1] == 0 and info[2] == 0 and info[3] > 0: + # All frames speech + a.confusion_matrix[2] += 1 + a.state_count[2].append((en-st,)+info[1:]) + a.type_counts[0][2].append(info[1]) + a.type_counts[1][2].append(info[2]) + a.type_counts[2][2].append(info[3]) + a.markers[2].append(st) + elif info[1] > 0 and info[2] > 0 and info[3] == 0: + # Segment contains both silence and noise + a.confusion_matrix[3] += 1 + a.state_count[3].append((en-st,)+info[1:]) + a.type_counts[0][3].append(info[1]) + a.type_counts[1][3].append(info[2]) + a.type_counts[2][3].append(info[3]) + a.markers[3].append(st) + elif info[1] > 0 and info[2] == 0 and info[3] > 0: + # Segment contains both silence and speech + a.confusion_matrix[4] += 1 + a.type_counts[0][4].append(info[1]) + a.type_counts[1][4].append(info[2]) + a.type_counts[2][4].append(info[3]) + a.state_count[4].append((en-st,)+info[1:]) + a.markers[4].append(st) + elif info[1] == 0 and info[2] > 0 and info[3] > 0: + # Segment contains both noise and speech + a.confusion_matrix[5] += 1 + a.state_count[5].append((en-st,)+info[1:]) + a.type_counts[0][5].append(info[1]) + a.type_counts[1][5].append(info[2]) + a.type_counts[2][5].append(info[3]) + a.markers[5].append(st) + elif info[1] > 0 and info[2] > 0 and info[3] > 0: + # Segment contains silence, noise and speech + a.confusion_matrix[6] += 1 + a.state_count[6].append((en-st,)+info[1:]) + a.type_counts[0][6].append(info[1]) + a.type_counts[1][6].append(info[2]) + a.type_counts[2][6].append(info[3]) + a.markers[6].append(st) + else: + # Should never be here + assert (False) + # End if + # End for loop over all stats + return a + # End function segmentation_analysis + +def map_prediction(A1, A2, phone_map, speech_cap = None, f = None): + if A2 == None: + B = [] + # Isolated segmentation + prev_x = None + len_x = 0 + i = 0 + for x in A1: + if prev_x == None or x == prev_x: + len_x += 1 + else: + assert (len_x > 0) + #sys.stderr.write("PHONE_LENGTH %s %d %s %d\n" % (prev_x, len_x, f, i - len_x)) + if phone_map[prev_x] == "0": + B.extend(["0"] * len_x) + elif (speech_cap != None and len_x > speech_cap) or phone_map[prev_x] == "1": + B.extend(["4"] * len_x) + elif phone_map[prev_x] == "2": + B.extend(["8"] * len_x) + # End if + len_x = 1 + # End if + prev_x = x + i += 1 + # End for + try: + assert (len_x > 0) + except AssertionError as e: + repr(e) + sys.stderr.write("In file %s\n" % f) + sys.exit(1) + + if phone_map[prev_x] == "0": + B.extend(["0"] * len_x) + elif (speech_cap != None and len_x > speech_cap) or phone_map[prev_x] == "1": + B.extend(["4"] * len_x) + elif phone_map[prev_x] == "2": + B.extend(["8"] * len_x) + # End if + return B + # End if (isolated segmentation) + + # Assuming len(A1) > len(A2) + # Otherwise A1 and A2 must be interchanged before + # passing to this function + B1 = [] + B2 = [] + for i in range(0, len(A2)): + if phone_map[A1[i]] == "0" and phone_map[A2[i]] == "0": + B1.append("0") + B2.append("0") + if phone_map[A1[i]] == "0" and phone_map[A2[i]] == "1": + B1.append("1") + B2.append("3") + if phone_map[A1[i]] == "0" and phone_map[A2[i]] == "2": + B1.append("2") + B2.append("6") + if phone_map[A1[i]] == "1" and phone_map[A2[i]] == "0": + B1.append("3") + B2.append("1") + if phone_map[A1[i]] == "1" and phone_map[A2[i]] == "1": + B1.append("4") + B2.append("4") + if phone_map[A1[i]] == "1" and phone_map[A2[i]] == "2": + B1.append("5") + B2.append("7") + if phone_map[A1[i]] == "2" and phone_map[A2[i]] == "0": + B1.append("6") + B2.append("2") + if phone_map[A1[i]] == "2" and phone_map[A2[i]] == "1": + B1.append("7") + B2.append("5") + if phone_map[A1[i]] == "2" and phone_map[A2[i]] == "2": + B1.append("8") + B2.append("8") + for i in range(len(A2), len(A1)): + if phone_map[A1[i]] == "0": + B1.append("0") + B2.append("0") + if phone_map[A1[i]] == "1": + B1.append("3") + B2.append("1") + if phone_map[A1[i]] == "2": + B1.append("6") + B2.append("2") + return (B1, B2) + +def main(): + parser = ArgumentParser(description='Get segmentation arguments') + parser.add_argument('--verbose', type=int, \ + dest='verbose', default=0, \ + help='Give higher verbose for more logging (default: %(default)s)') + parser.add_argument('--silence-proportion', type=float, \ + dest='silence_proportion', default=0.05, \ + help="The amount of silence at the sides of segments is " \ + + "tuned to give this proportion of silence. (default: %(default)s)") + parser.add_argument('--frame-shift', type=float, \ + dest='frame_shift', default=0.01, \ + help="Time difference between adjacent frame (default: %(default)s)s") + parser.add_argument('--max-segment-length', type=float, \ + dest='max_segment_length', default=10.0, \ + help="Maximum segment length while we are marging segments (default: %(default)s)") + parser.add_argument('--hard-max-segment-length', type=float, \ + dest='hard_max_segment_length', default=15.0, \ + help="Hard maximum on the segment length above which the segment " \ + + "will be broken even if in the middle of speech (default: %(default)s)") + parser.add_argument('--first-separator', type=str, \ + dest='first_separator', default="-", \ + help="Separator between recording-id and start-time (default: %(default)s)") + parser.add_argument('--second-separator', type=str, \ + dest='second_separator', default="-", \ + help="Separator between start-time and end-time (default: %(default)s)") + parser.add_argument('--remove-noise-only-segments', type=str, \ + dest='remove_noise_only_segments', default="true", choices=("true", "false"), \ + help="Remove segments that have only noise. (default: %(default)s)") + parser.add_argument('--min-inter-utt-silence-length', type=float, \ + dest='min_inter_utt_silence_length', default=1.0, \ + help="Minimum silence that must exist between two separate utterances (default: %(default)s)"); + parser.add_argument('--channel1-file', type=str, \ + dest='channel1_file', default="inLine", \ + help="String that matches with the channel 1 file (default: %(default)s)") + parser.add_argument('--channel2-file', type=str, \ + dest='channel2_file', default="outLine", \ + help="String that matches with the channel 2 file (default: %(default)s)") + parser.add_argument('--isolated-resegmentation', \ + dest='isolated_resegmentation', \ + action='store_true', help="Do not do joint segmentation (default: %(default)s)") + parser.add_argument('--max-length-diff', type=float, \ + dest='max_length_diff', default=1.0, \ + help="Maximum difference in the lengths of the two channels for joint " \ + + "segmentation to be done (default: %(default)s)") + parser.add_argument('--reference-rttm', dest='reference_rttm', \ + help="RTTM file to compare and get statistics (default: %(default)s)") + parser.add_argument('--speech-cap-length', type=float, default=None, \ + help="Maximum length in seconds of a particular speech phone prediction." \ + + "\nAny length above this will be considered as noise") + parser.add_argument('prediction_dir', \ + help='Directory where the predicted phones (.pred files) are found') + parser.add_argument('phone_map', \ + help='Phone Map file that maps from phones to classes') + parser.add_argument('output_segments', nargs='?', default="-", \ + help='Output segments file') + parser.usage=':'.join(parser.format_usage().split(':')[1:]) \ + + 'e.g. : %(prog)s exp/tri4b_whole_resegment_dev10h/pred exp/tri4b_whole_resegment_dev10h/phone_map.txt data/dev10h.seg/segments' + options = parser.parse_args() + + sys.stderr.write(' '.join(sys.argv) + "\n") + if not ( options.silence_proportion \ + > 0.01 and options.silence_proportion < 0.99 ): + sys.stderr.write("%s: Error: Invalid silence-proportion value %f\n" \ + % options.silence_proportion) + sys.exit(1) + + if not ( options.remove_noise_only_segments == "false" or options.remove_noise_only_segments == "true" ): + sys.stderr.write("%s: Error: Invalid value for remove-noise-only segments %s. Must be true or false.\n" \ + % options.remove_noise_only_segments) + sys.exit(1) + + if options.output_segments == '-': + out_file = sys.stdout + else: + try: + out_file = open(options.output_segments, 'w') + except IOError as e: + sys.stderr.write("%s: %s: Unable to open file %s\n" % (sys.argv[0], e, options.output_segments)) + sys.exit(1) + # End if + + phone_map = {} + try: + for line in open(options.phone_map).readlines(): + phone, cls = line.strip().split() + phone_map[phone] = cls + except IOError as e: + repr(e) + sys.exit(1) + + prediction_dir = options.prediction_dir + channel1_file = options.channel1_file + channel2_file = options.channel2_file + + temp_dir = prediction_dir + "/../rttm_classes" + os.system("mkdir -p %s" % temp_dir) + if options.reference_rttm != None: + read_rttm_file(options.reference_rttm, temp_dir, options.frame_shift) + else: + temp_dir = None + + stats = Stats() + + pred_files = dict([ (f.split('/')[-1][0:-5], False) \ + for f in glob.glob(os.path.join(prediction_dir, "*.pred")) ]) + + global global_analysis_get_initial_segments + global_analysis_get_initial_segments = Analysis("TOTAL_Get_Initial_Segments", options.frame_shift, "Global Analysis after get_initial_segments") + + global global_analysis_set_nonspeech_proportion + global_analysis_set_nonspeech_proportion = Analysis("TOTAL_set_nonspeech_proportion", options.frame_shift, "Global Analysis after set_nonspeech_proportion") + + global global_analysis_final + global_analysis_final= Analysis("TOTAL_Final", options.frame_shift, "Global Analysis Final") + + speech_cap = None + if options.speech_cap_length != None: + speech_cap = int( options.speech_cap_length / options.frame_shift ) + # End if + + for f in pred_files: + if pred_files[f]: + continue + if re.match(".*_"+channel1_file, f) is None: + if re.match(".*_"+channel2_file, f) is None: + sys.stderr.write("%s does not match pattern .*_%s or .*_%s\n" \ + % (f,channel1_file, channel2_file)) + sys.exit(1) + else: + f1 = f + f2 = f + f1 = re.sub("(.*_)"+channel2_file, r"\1"+channel1_file, f1) + else: + f1 = f + f2 = f + f2 = re.sub("(.*_)"+channel1_file, r"\1"+channel2_file, f2) + + if options.isolated_resegmentation or f2 not in pred_files or f1 not in pred_files: + pred_files[f] = True + try: + A = open(os.path.join(prediction_dir, f+".pred")).readline().strip().split()[1:] + except IndexError: + sys.stderr.write("Incorrect format of file %s/%s.pred\n" % (prediction_dir, f)) + sys.exit(1) + + B = map_prediction(A, None, phone_map, speech_cap, f) + + if temp_dir != None: + try: + reference = open(os.path.join(temp_dir, f+".ref")).readline().strip().split()[1:] + except IOError: + reference = None + else: + reference = None + r = JointResegmenter(A, B, f, options, phone_map, stats, reference) + r.resegment() + r.print_segments(out_file) + else: + if pred_files[f1] and pred_files[f2]: + continue + pred_files[f1] = True + pred_files[f2] = True + try: + A1 = open(os.path.join(prediction_dir, f1+".pred")).readline().strip().split()[1:] + except IndexError: + sys.stderr.write("Incorrect format of file %s/%s.pred\n" % (prediction_dir, f1)) + sys.exit(1) + try: + A2 = open(os.path.join(prediction_dir, f2+".pred")).readline().strip().split()[1:] + except IndexError: + sys.stderr.write("Incorrect format of file %s/%s.pred\n" % (prediction_dir, f2)) + sys.exit(1) + + if len(A1) < len(A2): + A3 = A1 + A1 = A2 + A2 = A3 + + f3 = f1 + f1 = f2 + f2 = f3 + # End if + + if (len(A1) - len(A2)) > options.max_length_diff / options.frame_shift: + sys.stderr.write( \ + "%s: Warning: Lengths of %s and %s differ by more than %f. " \ + % (sys.argv[0], f1,f2, options.max_length_diff) \ + + "So using isolated resegmentation\n") + B1 = map_prediction(A1, None, phone_map, speech_cap) + B2 = map_prediction(A2, None, phone_map, speech_cap) + else: + B1,B2 = map_prediction(A1, A2, phone_map, speech_cap) + # End if + + if temp_dir != None: + try: + reference1 = open(os.path.join(temp_dir, f1+".ref")).readline().strip().split()[1:] + except IOError: + reference1 = None + else: + reference1 = None + r1 = JointResegmenter(A1, B1, f1, options, phone_map, stats, reference1) + r1.resegment() + r1.print_segments(out_file) + + if temp_dir != None: + try: + reference2 = open(os.path.join(temp_dir, f2+".ref")).readline().strip().split()[1:] + except IOError: + reference2= None + else: + reference2 = None + r2 = JointResegmenter(A1, B2, f2, options, phone_map, stats, reference2) + r2.resegment() + r2.restrict(len(A2)) + r2.print_segments(out_file) + # End if + # End for loop over files + + if options.reference_rttm != None: + global_analysis_get_initial_segments.write_confusion_matrix(True) + global_analysis_get_initial_segments.write_total_stats(True) + global_analysis_get_initial_segments.write_length_stats() + global_analysis_set_nonspeech_proportion.write_confusion_matrix(True) + global_analysis_set_nonspeech_proportion.write_total_stats(True) + global_analysis_set_nonspeech_proportion.write_length_stats() + global_analysis_final.write_confusion_matrix(True) + global_analysis_final.write_total_stats(True) + global_analysis_final.write_length_stats() + +if __name__ == '__main__': + with Timer() as t: + main() + sys.stderr.write("\nSegmentation done!\nTook %f sec\n" % t.interval) + diff --git a/egs/babel/s5d/local/resegment/train_segmentation.sh b/egs/babel/s5d/local/resegment/train_segmentation.sh new file mode 100755 index 00000000000..511c451993e --- /dev/null +++ b/egs/babel/s5d/local/resegment/train_segmentation.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0 + +nj=16 # nj for training subset of whole +cmd=run.pl # How to run the parallel tasks +boost_sil=1.0 +ext_alidir= # Use this alignment directory instead for getting new one + +# End of configuration + +. utils/parse_options.sh + +set -o pipefail +set -e +set -u +if [ $# -ne 4 ]; then + echo "Usage: $0 [options] " + echo " e.g.:" + echo "$0 exp/tri4 data/train data/lang exp/tri4b_seg" + echo " Options (selection. For full options, see the script itself):" + echo " --nj # Number of parallel jobs" + echo " --cmd # How to run the parallel tasks" + exit 1 +fi + +in_model_dir=$1 # Model used for alignment +train_data_dir=$2 +lang=$3 +out_model_dir=$4 + +if [ ! -d $train_data_dir ] ; then + echo "$0: Unable to find directory $train_data_dir." + echo "$0: Run run-0-fillers.sh or run-1-main.sh first to prepare data directory" + exit 1 +fi + +# Align train_whole_sub3 using tri4 models and train a LDA + MLLT model +# on it. +alidir=${in_model_dir}_train_seg_ali + +if [ ! -z $ext_alidir ] && [ -s $ext_alidir/ali.1.gz ]; then + alidir=$ext_alidir +elif [ ! -f $alidir/.done ]; then + steps/align_fmllr.sh --nj $nj --cmd "$cmd" --boost-silence $boost_sil \ + $train_data_dir $lang $in_model_dir $alidir || exit 1; + touch $alidir/.done +fi + +if [ ! -f $out_model_dir/.done ]; then + steps/train_lda_mllt.sh --cmd "$cmd" --realign-iters "" --boost-silence $boost_sil \ + 1000 10000 $train_data_dir $lang $alidir $out_model_dir || exit 1; + touch $out_model_dir/.done +fi + +if [ ! -f $out_model_dir/graph.done ]; then + # Make the phone decoding-graph. + steps/make_phone_graph.sh $lang $alidir $out_model_dir || exit 1; + utils/mkgraph.sh $lang $out_model_dir $out_model_dir/graph | \ + tee $out_model_dir/mkgraph.log || exit 1 + touch $out_model_dir/graph.done +fi diff --git a/egs/babel/s5d/local/rttm_to_text.pl b/egs/babel/s5d/local/rttm_to_text.pl new file mode 100755 index 00000000000..d33c71e2f17 --- /dev/null +++ b/egs/babel/s5d/local/rttm_to_text.pl @@ -0,0 +1,151 @@ +#!/usr/bin/env perl + +use warnings; +use strict; +use utf8; + +use Data::Dumper; + +sub float_gt { + my ($A, $B) = @_; + #print Dumper(\@_); + + if ( ($A - $B) < 1e-12 ) { + return 0; + } elsif ($A > $B ) { + return 1; + } else { + return 0; + } +} + +binmode(STDOUT, ":utf8"); +binmode(STDERR, ":utf8"); + +my $datadir=$ARGV[0]; +my $rttm_filename=$ARGV[1]; + + +my $filename=""; +my %rttm; +my @times; + + +open(rttm_f, "<:utf8", $rttm_filename) or die "Cannot open the RTTM file"; +while ( ) { + chop; + my @elems = split; + my $_filename= $elems[1]; + my $_time=$elems[3]; + my $_dur=$elems[4]; + my $_text=$elems[5]; + + #We could simply pull-out the vector of times + #from the hash, but in case the RTTM is not sorted + #there might be some other problem somewhere + #(as the RTTMs are normally sorted). So instead of being + #"smart", let's make the user notice! + if ( exists($rttm{$_filename}) ) { + die "The RTTM file is not sorted!"; + } + + if ( $filename ne $_filename ) { + if ( $filename ne "" ) { + #print $filename . "\n"; + my @tmp = @times; + $rttm{$filename} = \@tmp; + #if ($filename eq "BABEL_BP_101_10470_20111118_172644_inLine" ) { + # print "$filename\n"; + # print Dumper($rttm{$filename}); + #} + #print Dumper($rttm{"BABEL_BP_101_10470_20111118_172644_inLine"}); + } + + @times = (); + $filename = $_filename; + } + + #I don't really know what is the distinction between all + #of these. Let's throw away the SPEAKER, as it does not + #really contain information that is to be found in the transcript + #and keep the others + if ( $elems[0] eq "LEXEME") { + push @times, [$_time, $_time + $_dur, $_text]; + } elsif ( $elems[0] eq "NON-SPEECH" ) { + push @times, [$_time, $_time + $_dur, $_text]; + } elsif ( $elems[0] eq "NON-LEX" ) { + push @times, [$_time, $_time + $_dur, $_text]; + } elsif ( $elems[0] eq "SPEAKER") { + ; + } else { + #This is just a safety precaution if a new flag/type appears. + die "Unknown first element $elems[0] of line '" . join(" ", @elems) . "'\n"; + } + + #We compare the two last entries of the #times vector, if they + #are ordered properly. Again, this is just a safety recaution + #In a well-formed RTTM, this is normal. + if ( (@times > 1) && float_gt($times[-2][1], $times[-1][0]) ) { + #print Dumper(\@times); + my $A = $times[-2][0]; + my $B = $times[-1][0]; + my $Aend = $times[-2][1]; + my $Bend = $times[-1][1]; + + #print "WARNING: Elements in the RTTM file are not sorted for FILENAME $filename!\n"; + #print $times[-2][0] . " " . $times[-2][1] - $times[-2][0]. " " . $times[-2][2] . "\n"; + #print $times[-1][0] . " " . $times[-1][1] - $times[-1][0]. " " . $times[-1][2] . "\n"; + #print "\n"; + + my @sorted = sort {$a <=> $b} ($A, $B, $Aend, $Bend); + #print Dumper(\@sorted); + $times[-1][0] = $sorted[0]; + $times[-1][1] = $sorted[2]; #We omit the gap between these two words + $times[-2][0] = $sorted[2]; + $times[-2][1] = $sorted[3]; + + } +} +if ( $filename ne "" ) { + #print $filename . "\n"; + $rttm{$filename} = \@times; +} +close(rttm_f); + +open(segments_f, "<:utf8", "$datadir/segments") or die "Cannot open file $datadir/segments"; +while ( ) { + chop; + my ($segmentname, $filename, $start, $end) = split; + + if (! exists $rttm{$filename} ) { + print "Filename $filename does not exists in the RTTM file\n"; + die; + } + my @times = @{$rttm{$filename}}; + my $i; + my $j; + + + #if ($segmentname ne "10470_A_20111118_172644_000000" ) { + # next; + #} + + #print $filename . "\n"; + + #print Dumper(\@times); + $i = 0; + #print $start . " " . $times[$i][0] . " " . $times[$i][1] . "\n"; + while (($i < @times) && ( $times[$i][1] < $start ) ) { $i += 1; }; + $j = $i; + while (($j < @times) && ( $times[$j][0] < $end ) ) { $j += 1; }; + + print $segmentname . " "; + while ( $i < $j ) { + #print Dumper($times[$i]); + print $times[$i][2] . " "; + $i += 1; + } + print "\n"; + #die +} +close(segments_f); diff --git a/egs/babel/s5d/local/run_cleanup_segmentation.sh b/egs/babel/s5d/local/run_cleanup_segmentation.sh new file mode 100755 index 00000000000..324d796b1b1 --- /dev/null +++ b/egs/babel/s5d/local/run_cleanup_segmentation.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar +# 2016 Johns Hopkins University (author: Daniel Povey) +# Apache 2.0 + +# This script demonstrates how to re-segment training data selecting only the +# "good" audio that matches the transcripts. +# The basic idea is to decode with an existing in-domain acoustic model, and a +# biased language model built from the reference, and then work out the +# segmentation from a ctm like file. + +# For nnet3 and chain results after cleanup, see the scripts in +# local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh + +# GMM Results for speaker-independent (SI) and speaker adaptive training (SAT) systems on dev and test sets +# [will add these later]. + +set -e +set -o pipefail +set -u + +stage=0 +cleanup_stage=0 +data=data/train +cleanup_affix=cleaned +srcdir=exp/tri5 +langdir=data/langp/tri5 +nj=100 +decode_nj=16 +decode_num_threads=4 + +. ./path.sh +. ./cmd.sh +. utils/parse_options.sh + +cleaned_data=${data}_${cleanup_affix} + +dir=${srcdir}_${cleanup_affix}_work +cleaned_dir=${srcdir}_${cleanup_affix} + +if [ $stage -le 1 ]; then + # This does the actual data cleanup. + steps/cleanup/clean_and_segment_data.sh --stage $cleanup_stage --nj $nj --cmd "$train_cmd" \ + $data $langdir $srcdir $dir $cleaned_data +fi + +if [ $stage -le 2 ]; then + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + $cleaned_data $langdir $srcdir ${srcdir}_ali_${cleanup_affix} +fi + +if [ $stage -le 3 ]; then + steps/train_sat.sh --cmd "$train_cmd" \ + 5000 100000 $cleaned_data $langdir ${srcdir}_ali_${cleanup_affix} ${cleaned_dir} +fi diff --git a/egs/babel/s5d/local/run_kws_stt_task.sh b/egs/babel/s5d/local/run_kws_stt_task.sh new file mode 100755 index 00000000000..71981a5641b --- /dev/null +++ b/egs/babel/s5d/local/run_kws_stt_task.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#Simple BABEL-only script to be run on generated lattices (to produce the +#files for scoring and for NIST submission + +set -e +set -o pipefail +set -u + +#Begin options +min_lmwt=8 +max_lmwt=12 +cer=0 +skip_kws=false +skip_stt=false +skip_scoring=false +extra_kws=false +cmd=run.pl +max_states=150000 +wip=0.5 #Word insertion penalty +#End of options + +if [ $(basename $0) == score.sh ]; then + skip_kws=true +fi + +echo $0 "$@" +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo " e.g.: $0 data/dev10h data/lang exp/tri6/decode_dev10h" + exit 1; +fi + +data_dir=$1; +lang_dir=$2; +decode_dir=$3; + +##NB: The first ".done" files are used for backward compatibility only +##NB: should be removed in a near future... +if ! $skip_stt ; then + if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then + local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + $data_dir $lang_dir $decode_dir + + if ! $skip_scoring ; then + local/score_stm.sh --cmd "$cmd" --cer $cer \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ + $data_dir $lang_dir $decode_dir + fi + touch $decode_dir/.done.score + fi +fi + +if ! $skip_kws ; then + [ ! -f $data_dir/extra_kws_tasks ] && exit 0 + + for extraid in `cat $data_dir/extra_kws_tasks` ; do + if [ ! -f $decode_dir/.done.kws.$extraid ] ; then + local/kws_search.sh --cmd "$cmd" --extraid $extraid \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --skip-scoring true\ + --max-lmwt ${max_lmwt} --indices-dir $decode_dir/kws_indices \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kws.$extraid + fi + if [[ ! $extraid =~ .*oov.* ]] && [ ! -f $decode_dir/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$decode_cmd" --extraid ${extraid} \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/kws_indices --skip-scoring $skip_scoring \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kwset.$extraid + fi + + if ! $skip_scoring ; then + [ -f $decode_dir/.done.kws.${extraid}.scored ] && continue; + local/kws_search.sh --cmd "$cmd" --extraid $extraid --stage 4 \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --skip-scoring false\ + --max-lmwt ${max_lmwt} --indices-dir $decode_dir/kws_indices \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kws.${extraid}.scored + fi + done +fi diff --git a/egs/babel/s5d/local/run_kws_stt_task2.sh b/egs/babel/s5d/local/run_kws_stt_task2.sh new file mode 100755 index 00000000000..6007baa1756 --- /dev/null +++ b/egs/babel/s5d/local/run_kws_stt_task2.sh @@ -0,0 +1,124 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#Simple BABEL-only script to be run on generated lattices (to produce the +#files for scoring and for NIST submission + +set -e +set -o pipefail +set -u + +#Begin options +min_lmwt=8 +max_lmwt=12 +cer=0 +skip_kws=false +skip_stt=false +skip_scoring=false +extra_kws=false +cmd=run.pl +max_states=150000 +wip=0.5 #Word insertion penalty +#End of options + +if [ $(basename $0) == score.sh ]; then + skip_kws=true +fi + +echo $0 "$@" +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo " e.g.: $0 data/dev10h data/lang exp/tri6/decode_dev10h" + exit 1; +fi + +data_dir=$1; +lang_dir=$(echo "$2" | perl -pe 's/\/$//g') +decode_dir=$3; + +##NB: The first ".done" files are used for backward compatibility only +##NB: should be removed in a near future... +if ! $skip_stt ; then + if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then + local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + $data_dir $lang_dir $decode_dir + + if ! $skip_scoring ; then + local/score_stm.sh --cmd "$cmd" --cer $cer \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ + $data_dir $lang_dir $decode_dir + fi + touch $decode_dir/.done.score + fi +fi + +if ! $skip_kws ; then + [ ! -f $data_dir/extra_kws_tasks ] && exit 0 + + syll_data_dir=$(echo $data_dir | perl -pe 's/\.(pem|seg)$/.syll.$1/g' ) + if [ -d ${syll_data_dir} ] && [ ! -f ${decode_dir}/syllabs/.done ] ; then + local/syllab/lattice_word2syll.sh --cmd "$cmd --mem 8G" \ + $data_dir $lang_dir ${lang_dir}.syll $decode_dir ${decode_dir}/syllabs + touch ${decode_dir}/syllabs/.done + fi + + phn_data_dir=$(echo $data_dir | perl -pe 's/\.(pem|seg)$/.phn.$1/g' ) + if [ -d ${phn_data_dir} ] && [ ! -f ${decode_dir}/phones/.done ] ; then + local/syllab/lattice_word2syll.sh --cmd "$cmd --mem 8G" \ + $data_dir $lang_dir ${lang_dir}.phn $decode_dir ${decode_dir}/phones + touch ${decode_dir}/phones/.done + fi + + + + for extraid in `cat $data_dir/extra_kws_tasks | grep -v oov` ; do + if [ ! -f $decode_dir/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$decode_cmd" --extraid ${extraid} \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/kws_indices --skip-scoring $skip_scoring \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kwset.$extraid + fi + + if [ -f ${decode_dir}/syllabs/kwset_${extraid}_${min_lmwt}/f4de/metrics.txt ]; then + touch $decode_dir/syllabs/.done.kwset.$extraid + fi + + if [ -f ${decode_dir}/phones/kwset_${extraid}_${min_lmwt}/f4de/metrics.txt ]; then + touch $decode_dir/phones/.done.kwset.$extraid + fi + + if [ -f ${decode_dir}/syllabs/.done ] && [ ! -f $decode_dir/syllabs/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$cmd" --extraid ${extraid} --model $decode_dir/../final.mdl\ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/syllabs/kws_indices --skip-scoring $skip_scoring \ + ${lang_dir}.syll $syll_data_dir $decode_dir/syllabs + touch $decode_dir/syllabs/.done.kwset.$extraid + fi + + + if [ -f ${decode_dir}/phones/.done ] && [ ! -f $decode_dir/phones/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$cmd" --extraid ${extraid} --model $decode_dir/../final.mdl\ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/phones/kws_indices --skip-scoring $skip_scoring \ + ${lang_dir}.phn $phn_data_dir $decode_dir/phones + touch $decode_dir/phones/.done.kwset.$extraid + fi + done +fi diff --git a/egs/babel/s5d/local/score.sh b/egs/babel/s5d/local/score.sh new file mode 120000 index 00000000000..7a34ba5b0d7 --- /dev/null +++ b/egs/babel/s5d/local/score.sh @@ -0,0 +1 @@ +run_kws_stt_task.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/score_combine.sh b/egs/babel/s5d/local/score_combine.sh new file mode 100755 index 00000000000..7e8af85b2d8 --- /dev/null +++ b/egs/babel/s5d/local/score_combine.sh @@ -0,0 +1,181 @@ +#!/bin/bash + +# Copyright 2012-2013 Arnab Ghoshal +# Johns Hopkins University (authors: Daniel Povey, Sanjeev Khudanpur) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# Script for system combination using minimum Bayes risk decoding. +# This calls lattice-combine to create a union of lattices that have been +# normalized by removing the total forward cost from them. The resulting lattice +# is used as input to lattice-mbr-decode. This should not be put in steps/ or +# utils/ since the scores on the combined lattice must not be scaled. + +# begin configuration section. +cmd=run.pl +beam=4 # prune the lattices prior to MBR decoding, for speed. +stage=0 +cer=0 +decode_mbr=true +lat_weights= +word_ins_penalty=0.0 +min_lmwt=7 +max_lmwt=17 +parallel_opts="-pe smp 3" +skip_scoring=false +ctm_name= +#end configuration section. + +help_message="Usage: "$(basename $0)" [options] [:lmwt-bias] [:lmwt-bias] [[:lmwt-bias] ... ] + E.g. "$(basename $0)" data/test data/lang exp/tri1/decode exp/tri2/decode exp/tri3/decode exp/combine + or: "$(basename $0)" data/test data/lang exp/tri1/decode exp/tri2/decode:18 exp/tri3/decode:13 exp/combine +Options: + --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. + --min-lmwt INT # minumum LM-weight for lattice rescoring + --max-lmwt INT # maximum LM-weight for lattice rescoring + --lat-weights STR # colon-separated string of lattice weights + --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. + --stage (0|1|2) # (createCTM | filterCTM | runSclite). + --parallel-opts # extra options to command for combination stage, + # default '-pe smp 3' + --cer (0|1) # compute CER in addition to WER +"; + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + + +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + +data=$1 +lang=$2 +dir=${@: -1} # last argument to the script +shift 2; +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +#Let the user to set the CTM file name +#use the data-dir name in case the user doesn't care +if [ -z ${ctm_name} ] ; then + ctm_name=`basename $data` +fi + + +for f in $lang/words.txt $lang/phones/word_boundary.int ; do + [ ! -f $f ] && echo "$0: file $f does not exist" && exit 1; +done +if ! $skip_scoring ; then + for f in $data/stm; do + [ ! -f $f ] && echo "$0: file $f does not exist" && exit 1; + done +fi + + +mkdir -p $dir/log + +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. + decode_dir=`echo $decode_dir | cut -d: -f1` + [ -z "$offset" ] && offset=0 + + model=`dirname $decode_dir`/final.mdl # model one level up from decode dir + for f in $model $decode_dir/lat.1.gz ; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; + done + if [ $i -eq 0 ]; then + nj=`cat $decode_dir/num_jobs` || exit 1; + else + if [ $nj != `cat $decode_dir/num_jobs` ]; then + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + exit 1; + fi + fi + file_list="" + # I want to get the files in the correct order so we can use ",s,cs" to avoid + # memory blowup. I first tried a pattern like file.{1,2,3,4}.gz, but if the + # system default shell is not bash (e.g. dash, in debian) this will not work, + # so we enumerate all the input files. This tends to make the command lines + # very long. + for j in `seq $nj`; do file_list="$file_list $decode_dir/lat.$j.gz"; done + + lats[$i]="ark,s,cs:lattice-scale --inv-acoustic-scale=\$[$offset+LMWT] 'ark:gunzip -c $file_list|' ark:- | \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- | \ + lattice-prune --beam=$beam ark:- ark:- | \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- |" +done + +mkdir -p $dir/scoring/log + +if [ -z "$lat_weights" ]; then + lat_weights=1.0 + for i in `seq $[$num_sys-1]`; do lat_weights="$lat_weights:1.0"; done +fi + +if [ $stage -le 0 ]; then + $cmd $parallel_opts LMWT=$min_lmwt:$max_lmwt $dir/log/combine_lats.LMWT.log \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-combine --lat-weights=$lat_weights "${lats[@]}" ark:- \| \ + lattice-to-ctm-conf --decode-mbr=true ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/${ctm_name}.ctm || exit 1; +fi + + +if [ $stage -le 1 ]; then + # Remove some stuff we don't want to score, from the ctm. + for lmwt in `seq $min_lmwt $max_lmwt`; do + x=$dir/score_${lmwt}/${ctm_name}.ctm + [ ! -f $x ] && echo "File $x does not exist! Exiting... " && exit 1 + cp $x $x.bkup1; + cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ + grep -v -E '|%HESITATION|\(\(\)\)' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + perl -e '@list = (); %list = (); + while(<>) { + chomp; + @col = split(" ", $_); + push(@list, $_); + $key = "$col[0]" . " $col[1]"; + $list{$key} = 1; + } + foreach(sort keys %list) { + $key = $_; + foreach(grep(/$key/, @list)) { + print "$_\n"; + } + }' > $x; + cp $x $x.bkup2; + done +fi + +if ! $skip_scoring ; then + if [ $stage -le 2 ]; then + local/score_stm.sh --min-lmwt $min_lmwt --max-lmwt $max_lmwt $data $lang $dir || exit 1 + fi +fi + + +exit 0 diff --git a/egs/babel/s5d/local/score_map.sh b/egs/babel/s5d/local/score_map.sh new file mode 100755 index 00000000000..ecc528ec909 --- /dev/null +++ b/egs/babel/s5d/local/score_map.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# Apache 2.0 + +[ -f ./path.sh ] && . ./path.sh + +# begin configuration section. +cmd=run.pl +stage=0 +decode_mbr=true +reverse=false +min_lmwt=9 +max_lmwt=20 +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + echo " --reverse (true/false) # score with time reversed features " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ + lattice-best-path --lm-scale=LMWT --word-symbol-table=$symtab \ + "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/LMWT.tra || exit 1; + +if $reverse; then + for lmwt in `seq $min_lmwt $max_lmwt`; do + mv $dir/scoring/$lmwt.tra $dir/scoring/$lmwt.tra.orig + awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \ + <$dir/scoring/$lmwt.tra.orig >$dir/scoring/$lmwt.tra + done +fi + +# Note: the double level of quoting for the sed command +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cat $dir/scoring/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; + +exit 0; diff --git a/egs/babel/s5d/local/score_mbr.sh b/egs/babel/s5d/local/score_mbr.sh new file mode 100755 index 00000000000..b2fcaf5cdf9 --- /dev/null +++ b/egs/babel/s5d/local/score_mbr.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# Script for minimum bayes risk decoding. + +[ -f ./path.sh ] && . ./path.sh; + +# begin configuration section. +cmd=run.pl +min_lmwt=9 +max_lmwt=20 +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score_sclite_conf.sh [--cmd (run.pl|queue.pl...)] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score_mbr.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +# We submit the jobs separately, not as an array, because it's hard +# to get the inverse of the LM scales. +rm $dir/.error 2>/dev/null +for inv_acwt in `seq $min_lmwt $max_lmwt`; do + acwt=`perl -e "print (1.0/$inv_acwt);"` + $cmd $dir/scoring/rescore_mbr.${inv_acwt}.log \ + lattice-mbr-decode --acoustic-scale=$acwt --word-symbol-table=$symtab \ + "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/${inv_acwt}.tra \ + || touch $dir/.error & +done +wait; +[ -f $dir/.error ] && echo "score_mbr.sh: errror getting MBR outout."; + + +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cat $dir/scoring/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">" $dir/wer_LMWT || exit 1; + diff --git a/egs/babel/s5d/local/score_sctk_prune.sh b/egs/babel/s5d/local/score_sctk_prune.sh new file mode 100755 index 00000000000..09662af57c8 --- /dev/null +++ b/egs/babel/s5d/local/score_sctk_prune.sh @@ -0,0 +1,138 @@ +#!/bin/bash +# Copyright Johns Hopkins University (Authors: Daniel Povey, Sanjeev Khudanpur) 2012-2013. Apache 2.0. + +# begin configuration section. +cmd=run.pl +stage=0 +cer=0 +decode_mbr=true +beam=5 +word_ins_penalty=0 +min_lmwt=7 +max_lmwt=17 +model= +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " && exit; + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # (createCTM | filterCTM | runSclite)." + echo " --cer (0|1) # compute CER in addition to WER" + exit 1; +fi + +data=$1 +lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. +dir=$3 + +if [ -z "$model" ] ; then + model=$dir/../final.mdl # assume model one level up from decoding dir. +fi + + +ScoringProgram=$KALDI_ROOT/tools/sctk/bin/sclite +[ ! -f $ScoringProgram ] && echo "Cannot find scoring program at $ScoringProgram" && exit 1; + +for f in $data/char.stm $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ + $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; +done + +name=`basename $data`; # e.g. eval2000 + +mkdir -p $dir/scoring/log + +if [ $stage -le 0 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; +fi + +if [ $stage -le 1 ]; then +# Remove some stuff we don't want to score, from the ctm. + for x in $dir/score_*/$name.ctm; do + cp $x $x.bkup1; + cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ + grep -v -E '|%HESITATION|\(\(\)\)' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + perl -e '@list = (); %list = (); + while(<>) { + chomp; + @col = split(" ", $_); + push(@list, $_); + $key = "$col[0]" . " $col[1]"; + $list{$key} = 1; + } + foreach(sort keys %list) { + $key = $_; + foreach(grep(/$key/, @list)) { + print "$_\n"; + } + }' > $x; + cp $x $x.bkup2; + y=${x%.ctm}; + cat $x.bkup2 | \ + perl -e ' + use Encode; + while(<>) { + chomp; + @col = split(" ", $_); + @col == 6 || die "Bad number of columns!"; + if ($col[4] =~ m/[\x80-\xff]{2}/) { + $word = decode("UTF8", $col[4]); + @char = split(//, $word); + $start = $col[2]; + $dur = $col[3]/@char; + $start -= $dur; + foreach (@char) { + $char = encode("UTF8", $_); + $start += $dur; + # printf "$col[0] $col[1] $start $dur $char\n"; + printf "%s %s %.2f %.2f %s %s\n", $col[0], $col[1], $start, $dur, $char, $col[5]; + } + } + }' > $y.char.ctm + cp $y.char.ctm $y.char.ctm.bkup1 + done +fi + +if [ $stage -le 2 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cp $data/stm $dir/score_LMWT/ '&&' cp $data/glm $dir/score_LMWT/ '&&'\ + $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm -o all -o dtl; + + if [ $cer -eq 1 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.char.log \ + cp $data/char.stm $dir/score_LMWT/'&&'\ + $ScoringProgram -s -r $dir/score_LMWT/char.stm stm -h $dir/score_LMWT/${name}.char.ctm ctm -o all -o dtl; + fi + +# for x in $dir/score_*/*.ctm; do +# mv $x.filt $x; +# rm -f $x.filt*; +# done + +# for x in $dir/score_*/*stm; do +# mv $x.filt $x; +# rm -f $x.filt*; +# done +fi + +echo "Finished scoring on" `date` +exit 0 diff --git a/egs/babel/s5d/local/score_stm.sh b/egs/babel/s5d/local/score_stm.sh new file mode 100755 index 00000000000..56835109722 --- /dev/null +++ b/egs/babel/s5d/local/score_stm.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +# This is a scoring script for the CTMS in /score_/${name}.ctm +# it tries to mimic the NIST scoring setup as much as possible (and usually does a good job) + +# begin configuration section. +cmd=run.pl +cer=0 +min_lmwt=7 +max_lmwt=17 +model= +stage=0 +ctm_name= +case_insensitive=true +use_icu=true +icu_transform='Any-Lower' +#end configuration section. + +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " && exit; + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --cer (0|1) # compute CER in addition to WER" + exit 1; +fi + +data=$1 +lang=$2 # This parameter is not used -- kept only for backwards compatibility +dir=$3 + +set -e +set -o pipefail +set -u + +ScoringProgram=`which sclite` || ScoringProgram=$KALDI_ROOT/tools/sctk/bin/sclite +[ ! -x $ScoringProgram ] && echo "Cannot find scoring program at $ScoringProgram" && exit 1; +SortingProgram=`which hubscr.pl` || SortingProgram=$KALDI_ROOT/tools/sctk/bin/hubscr.pl +[ ! -x $ScoringProgram ] && echo "Cannot find scoring program at $ScoringProgram" && exit 1; + + +for f in $data/stm ; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; +done + + +if [ -z $ctm_name ] ; then + name=`basename $data`; # e.g. eval2000 +else + name=$ctm_name +fi + +mkdir -p $dir/scoring/log +if [ $stage -le 0 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + set -e';' set -o pipefail';' \ + cp -f $data/stm $dir/score_LMWT/stm.unsorted '&&' \ + cp -f $dir/score_LMWT/${name}.ctm $dir/score_LMWT/${name}.ctm.unsorted '&&'\ + $SortingProgram sortSTM \<$dir/score_LMWT/stm.unsorted \>$dir/score_LMWT/stm.sorted '&&' \ + $SortingProgram sortCTM \<$dir/score_LMWT/${name}.ctm.unsorted \>$dir/score_LMWT/${name}.ctm.sorted '&&' \ + paste -d ' ' \<\(cut -f 1-5 -d ' ' $dir/score_LMWT/stm.sorted \) \ + \<\(cut -f 6- -d ' ' $dir/score_LMWT/stm.sorted \| uconv -f utf8 -t utf8 -x "$icu_transform" \) \ + \> $dir/score_LMWT/stm '&&' \ + paste -d ' ' \<\(cut -f 1-4 -d ' ' $dir/score_LMWT/${name}.ctm.sorted \) \ + \<\(cut -f 5- -d ' ' $dir/score_LMWT/${name}.ctm.sorted \| uconv -f utf8 -t utf8 -x "$icu_transform" \) \ + \> $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + utils/fix_ctm.sh $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + $SortingProgram sortCTM \<$dir/score_LMWT/${name}.ctm.sorted2 \>$dir/score_LMWT/${name}.ctm '&&' \ + $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm \ + -n "$name.ctm" -f 0 -D -F -o sum rsum prf dtl sgml -e utf-8 || exit 1 +fi + +if [ $stage -le 1 ]; then + if [ $cer -eq 1 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.char.log \ + $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm \ + -n "$name.char.ctm" -o sum rsum prf dtl sgml -f 0 -D -F -c NOASCII DH -e utf-8 || exit 1 + fi +fi + + +echo "Finished scoring on" `date` +exit 0 + diff --git a/egs/babel/s5d/local/search/analyze_stats.pl b/egs/babel/s5d/local/search/analyze_stats.pl new file mode 100755 index 00000000000..fd09f9c92a7 --- /dev/null +++ b/egs/babel/s5d/local/search/analyze_stats.pl @@ -0,0 +1,219 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + + +my $Usage = < + e.g.: gunzip -c exp/tri5/decode_dev10h.pem/kws/stats.*.gz | \ + $0 --trials 36000 data/dev10h.pem alignment.csv keywords_stats + +Allowed options: + --trials : number of trials (length of the search collection) for ATWV computation +EOU + +use strict; +use warnings; +use utf8; +use Data::Dumper; +use GetOpt::Long; + +my $T = 36212.6725; + +GetOptions ("trials=i" => \$T) or do + { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +if (@ARGV != 3) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +my $data = $ARGV[0]; +my $align = $ARGV[1]; +my $keywords = $ARGV[2]; + +my %SEGMENTS; +open(my $seg_file, "$data/segments") or + die "Cannot open the segments file in $data/segments"; + +while (my $line = <$seg_file>) { + (my $seg_id, my $file_id, my $tstart, my $tend) = split(" ", $line); + $SEGMENTS{$seg_id} = [$file_id, $tstart, $tend]; +} + + +my %ALIGNMENT; +my %TWVSTATS; +open(my $align_file, $align) or + die "Cannot open the alignment file in $align"; + +print "Reading alignment...\n"; +my $dummy=<$align_file>; +while (my $line = <$align_file>) { + chomp $line; + my @entries = split(/\s*,\s*/, $line); + my $kw_id = $entries[3]; + my $file_id = $entries[1]; + my $kw_time = $entries[7]; + my $op_id = join(",", @entries[10 .. 11]); # 'YES,CORR' | 'YES,FA' | 'NO,MISS' | 'NO,CORR!DET' | ',MISS' + + $TWVSTATS{$kw_id}{$op_id} += 1; + next if $op_id eq ",MISS"; + + my $key = sprintf "%s,%s", $kw_id, $file_id; + + if ( grep { abs($_ - $kw_time) <= 0.5 } @{$ALIGNMENT{$key}} ) { + die "The key $key is not unique\n"; + } + push @{$ALIGNMENT{$key}}, \@entries; +} + +#print Dumper(\%TWVSTATS); +print "Done reading alignment...\n"; + + +my %HITCACHE; + +print "Reading stats\n"; +while (my $line = ) { + my @entries = split(" ", $line); + + my $wav = $SEGMENTS{$entries[1]}[0]; + my $seg_start = $SEGMENTS{$entries[1]}[1]; + my $seg_end = $SEGMENTS{$entries[1]}[2]; + + my $kw = $entries[0]; + my $kw_start = $seg_start + $entries[2]/100.00000; + my $kw_stop = $seg_start + $entries[3]/100.00000; + my $kw_center = ($kw_start + $kw_stop) / 2.0; + #print Dumper($kw_start, $kw_stop, $kw_center); + my $kw_wav = $wav; + + my $key = sprintf "%s,%s", $kw, $kw_wav; + + if ( not grep { abs( (@{$_}[7] + @{$_}[8])/2.0 - $kw_center) <= 0.1 } @{$ALIGNMENT{$key}} ) { + ##print "The key $key, $kw_center does not exist in the alignment\n"; + ##print join(" ", @entries) . "\n"; + #print Dumper($ALIGNMENT{$key}); + #die; + } else { + my @tmp = @{$ALIGNMENT{$key}}; + my ($index) = grep { abs( (@{$tmp[$_]}[7] + @{$tmp[$_]}[8]) / 2.0 - $kw_center) <= 0.1 } (0 .. @{$ALIGNMENT{$key}}-1); + die unless defined $index; + my @ali = @{@{$ALIGNMENT{$key}}[$index]}; + my $diff = abs($ali[7] - $kw_start); + + #die "Weird hit " . Dumper(\@entries) if $entries[5] != 0; + + my $hit_id = join(" ", @entries[5 .. @entries-1]); + $hit_id =~ s/\b0\b//g; + $hit_id =~ s/^\s+//g; + $hit_id =~ s/\s+/ /g; + $hit_id =~ s/\s+$//g; + #print $hit_id . "\n"; + #print Dumper(\@ali, $kw_wav, $diff) if $diff > 0.1; + #print Dumper(\@entries); + + my $op_id = join(",", @ali[10 .. 11]); # 'YES,CORR' | 'YES,FA' | 'NO,MISS' | 'NO,CORR!DET' + $HITCACHE{$kw}{$hit_id}{$op_id} += 1; + #push @{$HITCACHE{$hit_id}{join(",", @ali[10 .. 11])}}, $entries[4]; + } + #print Dumper(\@entries, $kw_start, $kw_wav); + #exit +} +#print Dumper(\%HITCACHE); +print "Done reading stats\n"; + +open(my $KW, "> $keywords"); + +print "Analyzing\n"; +my $TWV = 0; +my $NEW_TWV = 0; +my $N_KW = 0; +foreach my $kwid (sort keys %HITCACHE) { + my %old_stats = %{$TWVSTATS{$kwid}}; + #print Dumper($kwid, \%old_stats); + # + $old_stats{"YES,CORR"} = 0 unless defined $old_stats{"YES,CORR"}; + $old_stats{",MISS"} = 0 unless defined $old_stats{",MISS"}; + $old_stats{"NO,MISS"} = 0 unless defined $old_stats{"NO,MISS"}; + $old_stats{"YES,FA"} = 0 unless defined $old_stats{"YES,FA"}; + + my $n_kw = $old_stats{"YES,CORR"} + + $old_stats{",MISS"} + + $old_stats{"NO,MISS"}; + + my $n_trials = $T - $n_kw; + + next if $n_kw == 0; + + my $p_miss = 0; + $p_miss = 1 - $old_stats{"YES,CORR"} / $n_kw unless $n_kw == 0; + my $p_fa = $old_stats{"YES,FA"} / $n_trials; + + my $twv = 1 - $p_miss - 999.9 * $p_fa; + print "$kwid $n_kw $p_miss $p_fa $twv\n"; + + foreach my $kwpath (sort keys $HITCACHE{$kwid}) { + my $weight = 0; + + my %new_stats = %{$HITCACHE{$kwid}{$kwpath}}; + $new_stats{"YES,CORR"} = 0 unless defined $new_stats{"YES,CORR"}; + $new_stats{"YES,FA"} = 0 unless defined $new_stats{"YES,FA"}; + + my $new_p_miss = 1 - ($old_stats{"YES,CORR"} - $new_stats{"YES,CORR"})/ $n_kw; + my $new_p_fa = ($old_stats{"YES,FA"} - $new_stats{"YES,FA"}) / $n_trials; + my $new_twv = 1 - $new_p_miss - 999.9 * $new_p_fa; + if ($new_twv > $twv) { + #print "keep: $kwid $kwpath $twv - $new_twv\n"; + if ((defined $HITCACHE{$kwid}{$kwpath}->{"YES,FA"}) || + (defined $HITCACHE{$kwid}{$kwpath}->{"NO,MISS"}) || + (defined $HITCACHE{$kwid}{$kwpath}->{"YES,CORR"})) { + print Dumper($kwid, $kwpath, $HITCACHE{$kwid}{$kwpath}); + } + $old_stats{"YES,CORR"} -= $new_stats{"YES,CORR"}; + $old_stats{"YES,FA"} -= $new_stats{"YES,FA"} ; + } else { + print $KW "$kwid $kwpath\n"; + #print "remove: $kwid $kwpath $twv - $new_twv\n"; + + } + # print $W "$kwid $weight\n"; + + } + + + my $new_p_miss = 1 - $old_stats{"YES,CORR"} / $n_kw; + my $new_p_fa = $old_stats{"YES,FA"} / $n_trials; + + my $new_twv = 1 - $new_p_miss - 999.9 * $new_p_fa; + + $NEW_TWV = $N_KW/($N_KW+1) * $NEW_TWV + $new_twv / ($N_KW+1); + $TWV = $N_KW/($N_KW+1) * $TWV + $twv / ($N_KW+1); + $N_KW += 1; +} +close($KW); +#print "ATWV: $TWV $NEW_TWV\n"; diff --git a/egs/babel/s5d/local/search/annotate_kwlist.pl b/egs/babel/s5d/local/search/annotate_kwlist.pl new file mode 100755 index 00000000000..fbbdc0c119e --- /dev/null +++ b/egs/babel/s5d/local/search/annotate_kwlist.pl @@ -0,0 +1,166 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2016 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < > output.kwlist.xml + e.g.: cat kwlist.xml | $0 data/dev10h.pem/kwset_kwlist/categories > output.kwlist.xml + +EOU +use strict; +use warnings "FATAL"; +use utf8; +use XML::Parser; +use Data::Dumper; + +binmode STDERR, ":utf8"; +binmode STDOUT, ":utf8"; + +my $IN_KWTEXT=0; +my $KWTEXT=''; +my $KWID=''; +my %CATEGORIES; + +sub kwlist { + my @entries = @_; + shift @entries; + shift @entries; + + my $header=""; + while (@entries) { + my $k = shift @entries; + my $w = shift @entries; + + $header .= " $k=\"$w\" "; + } + print "\n"; +} + +sub kwlist_ { + print "\n"; +} + +sub kw { + my @entries = @_; + shift @entries; + shift @entries; + #print Dumper(@entries); + my %params = @entries; + $KWID = $params{kwid}; +} + +sub kwtext { + my @entries = @_; + shift @entries; + $IN_KWTEXT=1; + #print Dumper(@entries); +} +sub char { + my @entries = @_; + shift @entries; + $KWTEXT=$entries[0] if $IN_KWTEXT eq 1; +} + +sub kwtext_ { + my @entries = @_; + shift @entries; + $IN_KWTEXT=0; + if ($KWTEXT) { + if (exists $CATEGORIES{$KWID}) { + print " \n"; + print " $KWTEXT\n"; + print " \n"; + print " \n"; + print " ALL\n"; + print " 1\n"; + print " \n"; + foreach my $cat (sort keys %{$CATEGORIES{$KWID}} ) { + my @entries = split("=", $cat); + my $name; + my $value; + + if (scalar @entries == 2) { + $name = $entries[0]; + $value = $entries[1]; + } else { + $name = $cat; + $value = 1; + } + print " \n"; + print " $name\n"; + print " $value\n"; + print " \n"; + } + print " \n"; + print " \n"; + } else { + my $n = scalar split " ", $KWTEXT; + my $l=length join("", split($KWTEXT)); + + $n = sprintf "%02d", $n; + $l = sprintf "%02d", $l; + + print " \n"; + print " $KWTEXT\n"; + print " \n"; + print " \n"; + print " Characters\n"; + print " $l\n"; + print " \n"; + print " \n"; + print " NGramOrder\n"; + print " $n\n"; + print " \n"; + print " \n"; + print " NGram Order\n"; + print " $n\n"; + print " \n"; + print " \n"; + print " \n"; + } + } +} + +if (@ARGV != 1) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + + +#Read the categories table +open(G, $ARGV[0]) or die "Cannot open the categories table $ARGV[0]"; +while (my $line = ) { + my @entries = split(" ", $line); + my $kwid = shift @entries; + + foreach my $group (@entries) { + $CATEGORIES{$kwid}->{$group} = 1; + } +} +close(G); + +my $p1 = new XML::Parser(Style => 'Subs'); +$p1->setHandlers(Char => \&char); +$p1->parse(*STDIN); + diff --git a/egs/babel/s5d/local/search/combine.sh b/egs/babel/s5d/local/search/combine.sh new file mode 100755 index 00000000000..4f77c0f0f7c --- /dev/null +++ b/egs/babel/s5d/local/search/combine.sh @@ -0,0 +1,258 @@ +#!/bin/bash +# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey) +# Copyright (c) 2016, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# begin configuration section. +cmd=run.pl +stage=0 +nbest_final=900 +nbest_small=20 +extraid= +skip_scoring=false +optimize=true +duptime=52 +power=1.1 +ntrue_scale= +#end of configuration section + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +help_message="Usage: $0 [options] [ ... ] +E.g.: $0 data/dev10h.pem data/lang exp/tri6_nnet/decode_dev10h.pem/kws_10/ exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/ exp/combine/dev10hx.pem +" +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + + +data=$1; shift; +lang=$1; shift; +output=${@: -1} # last argument to the script +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +if [ -z "$extraid" ] ; then + data="$data/kws" + output="$output/kws" +else + data="$data/kwset_${extraid}" + output="$output/kwset_${extraid}" +fi + +if [ -z "$ntrue_scale" ] ; then + ntrue_scale=$num_sys +fi + +declare -A params=([PWR]=$power [NTRUE]=$ntrue_scale) +declare -A files +declare -A files_reduced + +mkdir -p $output +mkdir -p $output/log + +echo "$0: Combination config (id, weight, results) -- initial" + +i=1 +nsystems=0 +for elem in ${decode_dirs[@]} ; do + params[W$i]="0.5" + if [ -f $elem ] ; then + f=$(echo $elem | cut -d: -f1) + w=$(echo $elem | cut -d: -s -f2) + + [ ! -z "$w" ] && params[W$i]="$w" + files[W$i]=$f + files_reduced[W$i]=$output/results.reduced.$i + + elif [ -d $elem ] && [ -d $elem/details ] ; then + mtwv=$(cat $elem/details/score.txt | grep "MTWV *=" |cut -f 2 -d '=' | sed 's/ //g') + params[W$i]="$mtwv" + files[W$i]=$elem/details/results + files_reduced[W$i]=$output/results.reduced.$i + elif [ -d $elem ] ; then + best_dir=$(find ${elem}_* -name "score.txt" \ + -path "*$extraid*" \ + -path "*/details/*" |\ + xargs grep "MTWV *=" | \ + sort -k2,2g -t '=' | + tail -n 1 | \ + cut -f 1 -d ':' | \ + xargs dirname \ + ) + mtwv=$(cat $best_dir/score.txt | grep "MTWV *=" |cut -f 2 -d '=' | sed 's/ //g') + params[W$i]="$mtwv" + files[W$i]=$best_dir/results + files_reduced[W$i]=$output/results.reduced.$i + else + echo >&2 "$0: The parameter\"$elem\" is not file nor directory" + fi + + echo " $i W$i=${params[W$i]} ${files[W$i]}" + echo "${files[W$i]}" > $output/results_W$i + + cat ${files[W$i]} | \ + local/search/filter_kws_results.pl --probs --nbest $nbest_small > ${files_reduced[W$i]} + + nsystems=$i + i=$(($i+1)) + +done + +if [ $nsystems -le 0 ] ; then + echo >&2 "No acoustic system found" + return 1 +fi + +trials=$(cat $data/trials) + +if $optimize ; then + cmdline= + + + declare -A params + opt_vars="" + opt_task_params="" + for w in "${!params[@]}" ; do + opt_vars="$opt_vars --var $w=${params[$w]}" + + if [ ${files_reduced[$w]+isset} ] ; then + opt_task_params="$opt_task_params $w ${files_reduced[$w]}" + fi + done + + echo "$0: Optimization -- first stage (reduced size results)" + mkdir -p $output/opt + local/optimize2.pl --result-regexp '.*ATWV *= *(.*)' --ftol 0.01 --iftol 0.01\ + --output-dir $output/opt $opt_vars \ + local/search/combine_results.pl --probs --power PWR $opt_task_params - \| \ + local/search/normalize_results_kst.pl --duration $trials --ntrue-scale NTRUE\| \ + local/search/filter_kws_results.pl --nbest 100 \| \ + compute-atwv $trials ark:$data/hitlist ark:- | \ + tee $output/log/optimize.log | grep -i "Iter" || { + echo >&2 "$0: Optimization failed (see $output/log/optimize.log for errors)"; exit 1 + } + + # override the default parameters + if [ -f $output/opt/params.sh ] ; then + . $output/opt/params.sh + else + echo >&2 "$0: Optimization output in $output/opt/params.sh not found"; + exit 1; + fi + + # Second round of optimization -- this time, only the NTRUE + comb_task_params="" + for w in "${!params[@]}" ; do + if [ ${files[$w]+isset} ] ; then + comb_task_params="$comb_task_params ${params[$w]} ${files[$w]}" + fi + done + + echo "$0: Optimization -- second stage (full size results)" + mkdir -p $output/opt_ntrue + local/optimize2.pl --result-regexp '.*ATWV *= *(.*)' \ + --output-dir $output/opt_ntrue --var NTRUE=${params[NTRUE]} \ + local/search/combine_results.pl --probs --tolerance $duptime --power ${params[PWR]} $comb_task_params - \| \ + local/search/normalize_results_kst.pl --duration $trials --ntrue-scale NTRUE\| \ + local/search/filter_kws_results.pl --probs --duptime $duptime \| \ + compute-atwv $trials ark:$data/hitlist ark:- | \ + tee $output/log/optimize_ntrue.log | grep -i "Iteration" || { + echo >&2 "$0: Optimization failed (see $output/log/optimize_ntrue.log for errors)"; exit 1 + } + # override the default parameters + if [ -f $output/opt_ntrue/params.sh ] ; then + . $output/opt_ntrue/params.sh + else + echo >&2 "$0: Optimization output in $output/opt_ntrue/params.sh not found"; + exit 1; + fi +fi + +echo "$0: Combination config (final)" +echo -n "$0: params=[" +comb_task_params="" +for w in "${!params[@]}" ; do + echo -n " $w=${params[$w]}" + if [ ${files[$w]+isset} ] ; then + comb_task_params="$comb_task_params ${params[$w]} ${files[$w]}" + fi +done +echo "]" + +mkdir -p $output/details + + +echo "$0: Doing final combination" +local/search/combine_results.pl \ + --probs --tolerance $duptime --power ${params[PWR]} $comb_task_params - | \ + local/search/normalize_results_kst.pl \ + --duration $trials --ntrue-scale ${params[NTRUE]} |\ + local/search/filter_kws_results.pl --probs --duptime $duptime > $output/details/results + +#Write the parapeters +echo "declare -A params" > $output/details/params.sh +for w in "${!params[@]}" ; do + echo "params[$w]=${params[$w]}" +done >> $output/details/params.sh +echo "${params[NTRUE]}" > $output/details/ntrue +echo "${params[PWR]}" > $output/details/power + +if ! $skip_scoring ; then + echo "$0: Scoring..." + cat $output/details/results |\ + compute-atwv $trials ark,t:$data/hitlist ark:- \ + ${output}/details/alignment.csv \ + > ${output}/details/score.txt \ + 2> ${output}/log/score.log + + cat ${output}/details/alignment.csv |\ + perl local/search/per_category_stats.pl \ + --sweep-step 0.005 $trials $data/categories \ + > ${output}/details/per-category-score.txt \ + 2> ${output}/log/per-category-score.log + + cp $output/details/score.txt $output/score.txt + +fi + +if [ $stage -le 2 ]; then + if [ -f $data/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $data/f4de_attribs #override the previous variables + + ecf=$data/ecf.xml + rttm=$data/rttm + kwlist=$data/kwlist.xml + + mkdir -p ${output}/f4de/ + + cat $kwlist | local/search/annotate_kwlist.pl $data/categories > ${output}/f4de/kwlist.xml + kwlist=${output}/f4de/kwlist.xml + + cat ${output}/details/results | \ + utils/int2sym.pl -f 2 $data/utt.map | \ + local/search/utt_to_files.pl --flen "$flen" $data/../segments |\ + local/search/write_kwslist.pl --flen "$flen" --language "$language" \ + --kwlist-id "$kwlist_name" > ${output}/f4de/kwslist.xml + + KWSEval -e $ecf -r $rttm -t $kwlist -a \ + --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -O -B -q 'OOV:regex=.*' -q 'BaseOOV:regex=.*' \ + -s ${output}/f4de/kwslist.xml -c -o -b -d -f ${output}/f4de/ + + local/kws_oracle_threshold.pl --duration $trials \ + ${output}/f4de/alignment.csv > ${output}/f4de/metrics.txt + fi +fi + +echo "$0: All OK" diff --git a/egs/babel/s5d/local/search/combine_results.pl b/egs/babel/s5d/local/search/combine_results.pl new file mode 100755 index 00000000000..694ee47c2cd --- /dev/null +++ b/egs/babel/s5d/local/search/combine_results.pl @@ -0,0 +1,422 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2016 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < w2 ... + e.g.: $0 0.5 kwslist1.xml 0.5 kwslist2.xml ... kwslist_comb.xml + +Allowed options: + --probs : The input scores are probabilities, not negative log-likelihoods) + --method : Use different combination method (int, default = 0) + 0 -- CombSUM + 1 -- CombMNZ + --input-norm : how the input data should be normalized (int ) + 0 -- Saturate + 1 -- NormSTO + 2 -- source-wise NormSTO + --output-norm : how the output data should be normalized (int ) + 0 -- Saturate + 1 -- NormSTO + --power : The weighted power mean p-coefficient (float, default = 0.5) + --gamma : The gamma coefficient for CombMNZ (float, default = 0.0) + --tolerance : Tolerance (in frames) for being the same hits (float, default = 50) + +EOU + +use strict; +use warnings "FATAL"; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; +use File::Basename; +use Scalar::Util qw(looks_like_number); + +$Data::Dumper::Indent = 2; + +my $TOL = 50; +my $LIKES = 0; + +sub OpenResults { + my $list = shift @_; + + my $source = "STDIN"; + if ($list ne "-") { + open(my $i, "<$list") || die "Fail to open file $list.\n"; + return $i; + } + return $source +} + +sub PrintResults { + my $KWS = shift @_; + + # Start printing + my $result = ""; + foreach my $kwentry (@{$KWS}) { + my ($kwid, $file, $tbeg, $tend, $score, $dummy) = @{$kwentry}; + if ($score > 0) { + $score = -log($score); + } elsif ($score == 0) { + $score = 9999; + } else { + die "Cannot take logarithm of a negative number\n" . join(" ", @{$kwentry}) . "\n"; + } + $result .= "$kwid $file $tbeg $tend $score\n"; + } + + return $result; +} + +sub KwslistTimeCompare { + my ($a, $b) = @_; + + if ($a->[0] eq $b->[0]) { # KWID + if ($a->[1] eq $b->[1]) { # FILEID + if (abs($a->[2] - $b->[2]) <= $TOL) { # KW START + if (abs($a->[3] - $b->[3]) <= $TOL) { #KW END + return 0; + } else { + return ($a->[3] <=> $b->[3] ); + } + } else { + return $a->[2] <=> $b->[2]; + } + } else { + return $a->[1] cmp $b->[1]; + } + } else { + $a->[0] cmp $b->[0]; + } +} + +sub KwslistTimeSort { + my $a = shift; + my $b = shift; + return KwslistTimeCompare($a, $b); +} + +sub ReadLines { + my $kwid = shift @_; + my %files = %{shift @_}; + my @lines = (); + + foreach my $id (sort keys %files) { + my $l = readline $files{$id}; + next unless $l; + chomp $l; + my @entries = split " ", $l; + while ($kwid eq $entries[0]) { + push @entries, $id; + push @lines, [@entries]; + + $l = readline $files{$id}; + last unless $l; + chomp $l; + @entries = split " ", $l; + } + next unless defined $l; + push @entries, $id; + push @lines, [@entries]; + } + return @lines; +} + +sub ReadFirstLines { + my %files = %{shift @_}; + my @lines = (); + + foreach my $id (sort keys %files) { + my $l = readline $files{$id}; + next unless $l; + chomp $l; + + my @entries = split " ", $l; + push @entries, $id; + push @lines, [@entries]; + } + return @lines; +} + +sub MergeCombPwrSum { + my @results = @{shift @_}; + my %weights = %{shift @_}; + my $pwr = shift @_; + my @output = (); + + return @output if not @results; + + while (@results) { + my @mergelist = (); + push @mergelist, shift @results; + while ((@results) && (KwslistTimeCompare($mergelist[0], $results[0]) == 0)) { + push @mergelist, shift @results; + } + + my $best_score = -9999; + my $tend; + my $tbegin; + my $out_score = 0; + foreach my $elem (@mergelist) { + my $score = $elem->[4]; + my $id = $elem->[5]; + if ($score > $best_score) { + $best_score = $score; + $tend = $elem->[3]; + $tbegin = $elem->[2]; + } + #print "$out_score += $weights{$id} * $score\n"; + $out_score += $weights{$id} * ($score ** $pwr); + } + $out_score = $out_score**(1.0/$pwr); + #print "$out_score \n\n\n"; + my $KWID = $mergelist[0]->[0]; + my $UTT = $mergelist[0]->[1]; + push @output, [$KWID, $UTT, $tbegin, $tend, $out_score, ""]; + } + + return \@output; +} + +## More generic version of the combMNZ method +sub MergeCombPwrMNZ { + my @results = @{shift @_}; + my %weights = %{shift @_}; + my $pwr = shift @_; + my $gamma = shift @_; + my @output = (); + + $gamma = 0 unless defined $gamma; + return @output if not @results; + + while (@results) { + my @mergelist = (); + push @mergelist, shift @results; + while ((@results) && (KwslistTimeCompare($mergelist[0], $results[0]) == 0)) { + push @mergelist, shift @results; + } + + my $best_score = -9999; + my $tend; + my $tbegin; + my $out_score = 0; + foreach my $elem (@mergelist) { + my $score = $elem->[4]; + my $id = $elem->[5]; + if ($score > $best_score) { + $best_score = $score; + $tend = $elem->[3]; + $tbegin = $elem->[2]; + } + #print "$out_score += $weights{$id} * $score\n"; + $out_score += $weights{$id} * ($score ** $pwr); + } + $out_score = (@mergelist ** $gamma) * $out_score**(1.0/$pwr); + #print "$out_score \n\n\n"; + my $KWID = $mergelist[0]->[0]; + my $UTT = $mergelist[0]->[1]; + push @output, [$KWID, $UTT, $tbegin, $tend, $out_score, "out"]; + } + + return \@output; +} + +### Sum-to-one normalization +sub NormalizeSTO { + my @results = @{shift @_}; + my @output = (); + my $sum = 0; + foreach my $elem(@results) { + $sum += $elem->[4]; + } + foreach my $elem(@results) { + $elem->[4] = $elem->[4]/$sum; + push @output, $elem; + } + return \@output; +} + +### This will STO normalize all entries in the @results according +### to the id, so that entries with the same id will sum to one +sub NormalizeSTOMulti { + my @results = @{shift @_}; + my @output = (); + my $sum = 0; + my %sums = (); + foreach my $elem(@results) { + $sums{$elem->[5]} += $elem->[4]; + } + foreach my $elem(@results) { + $elem->[4] = $elem->[4]/$sums{$elem->[5]}; + push @output, $elem; + } + return \@output; +} + +### Simple normalization of probabilities/scores +### Everything larger than 1 will be set to 1 +sub NormalizeSaturate { + my @results = @{shift @_}; + my @output = (); + my $sum = 0; + foreach my $elem(@results) { + $elem->[4] = $elem->[4] > 1.0 ? 1.0 : $elem->[4]; + push @output, $elem; + } + return \@output; +} + +my $method = 1; +my $input_norm = 0; +my $output_norm = 0; +my $gamma = 0; +my $power = 0.5; +GetOptions('tolerance=f' => \$TOL, + 'method=i' => sub { shift; $method = shift; + if (($method lt 0) || ($method gt 1)) { + die "Unknown method $method\n\n$Usage\n"; + } + }, + 'input-norm=i' => sub { shift; my $n = shift; + $input_norm = $n; + if (($n lt 0) || ($n gt 2)) { + die "Unknown input-norm $n\n\n$Usage\n"; + } + }, + 'output-norm=i' => sub { shift; my $n = shift; + $output_norm = $n; + if (($n ne 0) || ($n ne 1)) { + die "Unknown output-norm $n\n\n$Usage\n"; + } + }, + 'power=f' => \$power, + 'gamma=f' => \$gamma, + 'inv-power=f' => sub { + shift; my $val = shift; + $power = 1.0/$val; + }, + 'probs' => sub { + $LIKES = 0; + } + ) || do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if (@ARGV % 2 != 1) { + print STDERR "Bad number of (weight, results_list) pairs.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +# Workout the input/output source +my %results_files = (); +my %results_w = (); + +my $i = 0; +while (@ARGV != 1) { + my $w = shift @ARGV; + looks_like_number($w) || die "$0: Bad weight: $w.\n"; + $results_w{$i} = $w; + $results_files{$i} = OpenResults(shift @ARGV); + $i += 1; +} + +my $sumw=0; +foreach my $val (values %results_w ) { + $sumw += $val; +} +#foreach my $val (keys %results_w ) { +# $results_w{$val} = $results_w{$val}/$sumw; +#} + +my $output = shift @ARGV; + +my $deb = 0; +my @lines = (); +@lines = ReadFirstLines(\%results_files); +@lines = sort { KwslistTimeSort($a, $b) } @lines; +push @lines, ReadLines($lines[0]->[0], \%results_files); +@lines = sort { KwslistTimeSort($a, $b) } @lines; + +while (@lines) { + my @res = (); + + push @res, shift @lines; + while ((@lines) && ($lines[0]->[0] eq $res[0]->[0])) { + push @res, shift @lines; + } + #print PrintResults(\@res); + #print PrintResults(NormalizeSTO(MergeCombMNZ(\@res, \%results_w))); + #print PrintResults(NormalizeCutoff(MergeCombPwrSum(\@res, \%results_w, $power))); + #print PrintResults(NormalizeSaturate(MergeCombPwrMNZ(\@res, \%results_w, $power, $gamma))); + #print PrintResults(NormalizeSTO(MergeCombPwrMNZ(NormalizeSTO(\@res), \%results_w, $power, $gamma))); + + my $data = undef; + if ($input_norm == 1) { + $data = NormalizeSTO(\@res); + } elsif ($input_norm == 2) { + $data = NormalizeSTOMulti(\@res); + } else { + $data = NormalizeSaturate(\@res); + } + + if ($method == 0) { + $data = MergeCombPwrSum($data, \%results_w, $power); + } else { + $data = MergeCombPwrMNZ($data, \%results_w, $power, $gamma); + } + + if ($output_norm == 1) { + $data = NormalizeSTO($data); + } else { + $data = NormalizeSaturate($data); + } + + print PrintResults($data); + + #exit if $deb > 3; + #$deb += 1 if $deb; + #if ($res[0]->[0] eq "KW305-02318") { + # $deb = 1; + # print Dumper("START", \@res, \@lines) if $deb; + #} + + my @tmp = (); + if (@lines) { + @tmp = ReadLines($lines[0]->[0], \%results_files); + } else { + # this is probably not necessary -- ReadLines() call + # will always read one line _past_ the current KW + # so we always should have extra KW in the @lines + @tmp = ReadFirstLines(\%results_files); + } + + #print Dumper("TMP", \@tmp) if $deb; + if (@tmp > 0) { + #print Dumper("XXX", \@res, \@lines) if $deb; + push @lines, @tmp; + @lines = sort { KwslistTimeSort($a, $b) } @lines; + } + + #print Dumper(\@res, \@lines) if $deb; + +} diff --git a/egs/babel/s5d/local/search/combine_special.sh b/egs/babel/s5d/local/search/combine_special.sh new file mode 100755 index 00000000000..5802f49be06 --- /dev/null +++ b/egs/babel/s5d/local/search/combine_special.sh @@ -0,0 +1,200 @@ +#!/bin/bash +# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey) +# Copyright (c) 2016, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# begin configuration section. +cmd=run.pl +stage=0 +nbest_final=900 +nbest_small=20 +extraid= +skip_scoring=false +optimize=true +duptime=52 +power=1.1 +ntrue_scale= +#end of configuration section + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +help_message="Usage: $0 [options] [ ... ] +E.g.: $0 data/dev10h.pem data/lang exp/tri6_nnet/decode_dev10h.pem/kws_10/ exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/ exp/combine/dev10hx.pem +" +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + + +data=$1; shift; +lang=$1; shift; +template=$1; shift; +output=${@: -1} # last argument to the script +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +if [ -z "$extraid" ] ; then + data="$data/kws" + output="$output/kws" +else + data="$data/kwset_${extraid}" + output="$output/kwset_${extraid}" +fi + +if [ -z "$ntrue_scale" ] ; then + ntrue_scale=$num_sys +fi + +declare -A params=([PWR]=$power [NTRUE]=$ntrue_scale) +declare -A files +declare -A duced + +mkdir -p $output +mkdir -p $output/log + +if [ -f $template/details/params.sh ] ; then + . $template/details/params.sh +else + echo >&2 "$0: Optimization output in $template/details/params.sh not found"; + exit 1; +fi + + +echo "$0: Combination config (id, weight, results) -- initial" + +i=1 +for elem in ${decode_dirs[@]} ; do + if [ -f $elem ] ; then + files[W$i]=$f + elif [ -d $elem ] && [ -d $elem/details ] ; then + files[W$i]=$elem/details/results + elif [ -d $elem ] ; then + tmpl=`cat $template/results_W${i}` + echo $tmpl + #exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist4_10/details/results + if [[ "$tmpl" == */details/results ]] ; then + base=`echo $tmpl | sed 's:/details/results::g'` + base=`basename $base` + lmwt=${base##*_} + tmpl_kwset=${base%_*} + tmpl_kwset=${tmpl_kwset##*_} + else + echo >&2 "The template results file does not follow the naming pattern" + exit 1 + fi + f=${elem}_${lmwt}/details/results + if [ ! -f $f ]; then + echo >&2 "The file $f does not exist (check template or $template/results_W${i})" + exit 1 + fi + kwset=${elem##*_} + if [ "$kwset" != "$tmpl_kwset" ] ; then + echo >&2 "WARNING: The the kwset and the tmpl kwset do not match! ($kwset vs $tmpl_kwset) " + fi + + files[W$i]=$f + else + echo >&2 "$0: The parameter\"$elem\" is not file nor directory" + fi + echo " $i W$i=${params[W$i]} ${files[W$i]}" + + i=$(($i+1)) + +done + + + +trials=$(cat $data/trials) + + +echo "$0: Combination config (final)" +echo -n "$0: params=[" +comb_task_params="" +for w in "${!params[@]}" ; do + echo -n " $w=${params[$w]}" + if [ ${files[$w]+isset} ] ; then + comb_task_params="$comb_task_params ${params[$w]} ${files[$w]}" + fi +done +echo "]" + +mkdir -p $output/details + + +echo "$0: Doing final combination" +local/search/combine_results.pl \ + --probs --tolerance $duptime --power ${params[PWR]} $comb_task_params - | \ + local/search/normalize_results_kst.pl \ + --duration $trials --ntrue-scale ${params[NTRUE]} |\ + local/search/filter_kws_results.pl --probs --duptime $duptime > $output/details/results + +#Write the parapeters +echo "declare -A params" > $output/details/params.sh +for w in "${!params[@]}" ; do + echo "params[$w]=${params[$w]}" +done >> $output/details/params.sh +echo "${params[NTRUE]}" > $output/details/ntrue +echo "${params[PWR]}" > $output/details/power + + +echo "DATA: $data" +if ! $skip_scoring && [ -f $data/hitlist ] ; then + echo "$0: Scoring..." + cat $output/details/results |\ + compute-atwv $trials ark,t:$data/hitlist ark:- \ + ${output}/details/alignment.csv \ + > ${output}/details/score.txt \ + 2> ${output}/log/score.log + + cat ${output}/details/alignment.csv |\ + perl local/search/per_category_stats.pl \ + --sweep-step 0.005 $trials $data/categories \ + > ${output}/details/per-category-score.txt \ + 2> ${output}/log/per-category-score.log + + cp $output/details/score.txt $output/score.txt + +fi + +if [ $stage -le 2 ]; then + if [ -f $data/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $data/f4de_attribs #override the previous variables + + ecf=$data/ecf.xml + rttm=$data/rttm + kwlist=$data/kwlist.xml + + mkdir -p ${output}/f4de/ + + cat ${output}/details/results | \ + utils/int2sym.pl -f 2 $data/utt.map | \ + local/search/utt_to_files.pl --flen "$flen" $data/../segments |\ + local/search/write_kwslist.pl --flen "$flen" --language "$language" \ + --kwlist-id "$kwlist_name" > ${output}/f4de/kwslist.xml + + if [ -f $rttm ] ; then + cat $kwlist | local/search/annotate_kwlist.pl $data/categories > ${output}/f4de/kwlist.xml + kwlist=${output}/f4de/kwlist.xml + + KWSEval -e $ecf -r $rttm -t $kwlist -a \ + --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -O -B -q 'OOV:regex=.*' -q 'BaseOOV:regex=.*' \ + -s ${output}/f4de/kwslist.xml -c -o -b -d -f ${output}/f4de/ + + local/kws_oracle_threshold.pl --duration $trials \ + ${output}/f4de/alignment.csv > ${output}/f4de/metrics.txt + fi + fi +fi + +echo "$0: All OK" diff --git a/egs/babel/s5d/local/search/compile_keywords.sh b/egs/babel/s5d/local/search/compile_keywords.sh new file mode 100755 index 00000000000..92dc4220a8e --- /dev/null +++ b/egs/babel/s5d/local/search/compile_keywords.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +silence_word= +filter='OOV=0' +# End configuration section +echo $0 "$@" +. parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +data=$1 +lang=$2 +workdir=$3 + +mkdir -p $workdir +cat $data/categories | \ + local/search/filter_by_category.pl $data/categories "$filter" > $workdir/categories + +if [ ! -s $workdir/categories ]; then + echo "$0: WARNING: $workdir/categories is zero-size. That means no keyword" + echo "$0: WARNING: was found that fits the filter \"$filter\". That might be expected." + touch $workdir/keywords.int + touch $workdir/keywords.fsts + exit 0 +fi + +grep -w -F -f <(awk '{print $1}' $workdir/categories) \ + $data/keywords.int > $workdir/keywords.int + +if [ -s $workdir/keywords.int ]; then + if [ -z $silence_word ]; then + transcripts-to-fsts ark:$workdir/keywords.int \ + ark,scp,t:$workdir/keywords.fsts,- | sort -o $workdir/keywords.scp + else + silence_int=`grep -w $silence_word $lang/words.txt | awk '{print $2}'` + [ -z $silence_int ] && \ + echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; + transcripts-to-fsts ark:$data/keywords.int ark,t:- | \ + awk -v 'OFS=\t' -v silint=$silence_int '{ + if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; + }' | fstcopy ark:- ark,scp,t:$workdir/keywords.fsts,- | \ + sort -o $workdir/keywords.scp + fi +else + echo "$0: WARNING: $workdir/keywords.int is zero-size. That means no keyword" + echo "$0: WARNING: was found in the dictionary. That might be expected -- or not." + touch $workdir/keywords.fsts +fi + diff --git a/egs/babel/s5d/local/search/compile_proxy_keywords.sh b/egs/babel/s5d/local/search/compile_proxy_keywords.sh new file mode 100755 index 00000000000..a28105123f3 --- /dev/null +++ b/egs/babel/s5d/local/search/compile_proxy_keywords.sh @@ -0,0 +1,271 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# 2012-2014 Guoguo Chen +# License: Apache 2.0 + +# Begin configuration section. +nj=8 +cmd=run.pl +beam=-1 # Beam for proxy FST, -1 means no prune +phone_beam=-1 # Beam for KxL2xE FST, -1 means no prune +nbest=-1 # Use top n best proxy keywords in proxy FST, -1 means all + # proxies +phone_nbest=-1 # Use top n best phone sequences in KxL2xE, -1 means all + # phone sequences +confusion_matrix= # If supplied, using corresponding E transducer +count_cutoff=1 # Minimal count to be considered in the confusion matrix; + # will ignore phone pairs that have count less than this. +pron_probs=true # If true, then lexicon looks like: + # Word Prob Phone1 Phone2... +g_beam=10 +g_alpha= +g_inv_alpha= +g2p_nbest=10 +g2p_mass=0.95 +case_insensitive=true +icu_transform="Any-Lower" +filter="OOV=1" + +# End configuration section + +echo "$0 " "$@" +. ./utils/parse_options.sh || exit 1; + +# Gets phone symbols +phone_start=2 +if $pron_probs; then + phone_start=3 +fi + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +lang=$2 +l1lex=$3 +g2p=$4 +workdir=$5 + +if [ ! -z "$g_inv_alpha" ] && [ $g_inv_alpha -ne 0 ] ; then + g_alpha=$(echo print 1.0/$g_inv_alpha | perl ) +fi + +# Checks some files. +for f in $l1lex $data/categories $data/keywords.txt ; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1 +done + +mkdir -p $workdir +cat $data/categories | \ + local/search/filter_by_category.pl $data/categories "$filter" > $workdir/categories + +grep -w -F -f <(awk '{print $1}' $workdir/categories) $data/keywords.txt |\ + sort -R > $workdir/keywords.filtered + +paste <(cut -f 1 $workdir/keywords.filtered) \ + <(cut -f 2- $workdir/keywords.filtered | uconv -f utf-8 -t utf-8 -x "$icu_transform") > $workdir/keywords.txt + +cat $l1lex | perl -e ' + while (<>) { + ($word, $prob, $pron) = split " ", $_, 3; + $pron =~ s/_[^\s]+//g; + $pron =~ s/\s+/ /g; + $pron =~ s/^\s+//g; + $pron =~ s/\s+$//g; + print "$word $prob $pron\n" + } +' | sort -u > $workdir/L1.lex + +mkdir -p $workdir/lexicon + +cat $workdir/keywords.txt | perl -e ' + open(f, shift @ARGV); + while() { + @F = split; + $lex{$F[0]} = 1; + } + close(f); + + while() { + @F = split; + foreach $w (@F[1..$#F]) { + print "$w\n" unless defined $lex{$w}; + } + } +' $workdir/L1.lex | sort -u > $workdir/lexicon/oov.txt + +local/apply_g2p.sh --nj $nj --cmd "$cmd" --icu-transform "$icu_transform" \ + --var-counts $g2p_nbest --var-mass $g2p_mass \ + $workdir/lexicon/oov.txt $g2p $workdir/lexicon || exit 1 + +cat $workdir/L1.lex | \ + perl -e ' + while ( $line = ) { + chomp $line; + ($word, $pron) = split " ", $line, 2; + $pron = join(" ", split(" ", $pron)); + push @{$LEX{$pron}}, $word; + } + + open(L1, "| sort -u > $ARGV[0]") or die "Cannot open $ARGV[0]\n"; + open(MAP, "| sort -u > $ARGV[1]") or die "Cannot open $ARGV[1]\n"; + foreach $pron (keys %LEX) { + $head = $LEX{$pron}->[0]; + print L1 "$head $pron\n"; + foreach $alt (@{$LEX{$pron}}) { + print MAP "0 0 $alt $head\n"; + } + } + print MAP "0\n"; + close(L1); + close(MAP); +' $workdir/L1.dedup.lex $workdir/L1.revdup.fst.txt + +pron_probs_param="" +$pron_probs && pron_probs_param="--pron-probs" + +# Creates words.txt that covers all the words in L1.lex and L2.lex. We append +# new words to the original word symbol table. +cat $workdir/L1.lex $workdir/lexicon/lexicon.lex | \ + perl -e ' + binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + binmode STDERR, ":utf8"; + $max_id=0; + %WORDS=(); + open(F, "<:utf8" , $ARGV[0]) or die "Cannot open $ARGV[0]"; + while() { + ($word, $id) = split(" ", $_); + $WORDS{$word} = $id; + $max_id = $id > $max_id ? $id : $max_id; + } + close(F); + while () { + @F = split(" ", $_); + if (not exists $WORDS{$F[0]}) { + $WORDS{$F[0]} = $max_id + 1; + $max_id += 1; + } + } + foreach $kw (keys %WORDS) { + print "$kw $WORDS{$kw}\n"; + } + ' $lang/words.txt | sort -k2,2n > $workdir/words.txt + +cat $workdir/words.txt | \ + uconv -f utf-8 -t utf-8 -x "$icu_transform" > $workdir/words.normalized.txt + +#--ndisambig=`utils/add_lex_disambig.pl \ +#-- $pron_probs_param $workdir/L1.dedup.lex $workdir/L1.disambig.lex` +#--ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST. +#--( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $workdir/disambig.txt + +#remove all position dependent info and other tags +awk '{print $1;}' $lang/phones.txt | sed 's/_[BEIS]//g' | sed 's/_.*//g' | \ + grep -v '^#' | uniq |\ + perl -ne 'BEGIN{$i=0;}; chomp; print $_ . " " . $i . "\n"; $i+=1;' > $workdir/phones.txt + +#--cat $workdir/L2.lex $workdir/L1.lex |\ +#-- awk '{for(i='$phone_start'; i <= NF; i++) {print $i;}}' |\ +#-- sort -u | sed '1i\' |\ +#-- cat - $workdir/disambig.txt | awk 'BEGIN{x=0} {print $0"\t"x; x++;}' \ +#-- > $workdir/phones.txt + +cat $workdir/keywords.txt |\ + local/kwords2indices.pl --map-oov 0 $workdir/words.normalized.txt > $workdir/keywords.int + + +cat $workdir/L1.lex $workdir/lexicon/lexicon.lex | sed 's/\t/ /g' | \ + perl -ne 'chomp; + ($word, $pron) = split / /, $_, 2; + $pron =~ s/_[^ ]*//g; + print "$word $pron\n";' | \ + sort -u > $workdir/L2.lex + +cat $workdir/L1.revdup.fst.txt |\ + fstcompile --isymbols=$workdir/words.txt --osymbols=$workdir/words.txt - |\ + fstarcsort --sort_type=olabel - $workdir/L1.revdup.fst + +echo "" + +#--phone_disambig_symbol=`grep \#0 $workdir/phones.txt | awk '{print $2}'` +#--word_disambig_symbol=`grep \#0 $workdir/words.txt | awk '{print $2}'` +#--phone_disambig_symbols=`grep "^#" $workdir/phones.txt |\ +#-- awk '{print $2}' | tr "\n" " "` +#--word_disambig_symbols=`grep "^#" $workdir/words.txt |\ +#-- awk '{print $2}' | tr "\n" " "` +#-- +#--cat $workdir/L1.disambig.lex |\ +#-- utils/make_lexicon_fst.pl $pron_probs_param - |\ +#-- fstcompile --isymbols=$workdir/phones.txt \ +#-- --osymbols=$workdir/words.txt - |\ +#-- fstaddselfloops "echo $phone_disambig_symbol |" \ +#-- "echo $word_disambig_symbol |" |\ +#-- fstdeterminize | fstrmsymbols "echo $phone_disambig_symbols|" |\ +#-- fstrmsymbols --remove-from-output=true "echo $word_disambig_symbols|" |\ +#-- fstarcsort --sort_type=ilabel > $workdir/L1.fst + +cat $workdir/L1.dedup.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$workdir/phones.txt --osymbols=$workdir/words.txt - |\ + fstarcsort --sort_type=ilabel > $workdir/L1.fst + +echo "" +cat $workdir/L2.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$workdir/phones.txt --osymbols=$workdir/words.txt - |\ + fstinvert | fstarcsort --sort_type=olabel > $workdir/L2.fst + +# Compiles E.fst +conf_mat_param="" +if [ ! -z $confusion_matrix ]; then + echo "$0: Using confusion matrix, normalizing" + local/count_to_logprob.pl --cutoff $count_cutoff \ + $confusion_matrix $workdir/confusion.txt + conf_mat_param="--confusion-matrix $workdir/confusion.txt" +fi + +cat $workdir/phones.txt | \ + grep -v -F -f $lang/phones/silence.txt | awk '{print $1;}' |\ + local/build_edit_distance_fst.pl --boundary-off=true $conf_mat_param - - |\ + fstcompile --isymbols=$workdir/phones.txt \ + --osymbols=$workdir/phones.txt - $workdir/E.fst + +# Pre-composes L2 and E, for the sake of efficiency +fstcompose $workdir/L2.fst $workdir/E.fst |\ + fstarcsort --sort_type=ilabel > $workdir/L2xE.fst + +nof_keywords=`cat $workdir/keywords.txt |wc -l` +if [ $nj -gt $nof_keywords ]; then + nj=$nof_keywords + echo "$0: Too many number of jobs, using $nj instead" +fi + +# Generates the proxy keywords +mkdir -p $workdir/split/log +if [ -z "$g_alpha" ] || [ $g_inv_alpha -eq 0 ] ; then + echo "$0: Generating proxies without G.fst" + $cmd JOB=1:$nj $workdir/split/log/proxy.JOB.log \ + split -n r/JOB/$nj $workdir/keywords.int \| \ + generate-proxy-keywords --verbose=1 \ + --proxy-beam=$beam --proxy-nbest=$nbest \ + --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ + $workdir/L2xE.fst $workdir/L1.fst ark:- ark,t:$workdir/split/proxy.JOB.fsts +else + echo "$0: Generating proxies with G.fst" + $cmd JOB=1:$nj $workdir/split/log/proxy.JOB.log \ + split -n r/JOB/$nj $workdir/keywords.int \| \ + generate-proxy-keywords-ex --verbose=1 --g-beam=$g_beam --g-alpha=$g_alpha\ + --proxy-beam=$beam --proxy-nbest=$nbest \ + --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ + $workdir/L2xE.fst $workdir/L1.fst $lang/G.fst ark:- ark,t:$workdir/split/proxy.JOB.fsts +fi + + +proxy_fsts="" +for j in `seq 1 $nj`; do + proxy_fsts="$proxy_fsts $workdir/split/proxy.$j.fsts" +done +cat $proxy_fsts | fsttablecompose $workdir/L1.revdup.fst ark:- ark:- |\ + fsts-project ark:- ark,scp:$workdir/keywords.fsts,-|\ + sort -o $workdir/keywords.scp diff --git a/egs/babel/s5d/local/search/create_categories.pl b/egs/babel/s5d/local/search/create_categories.pl new file mode 100755 index 00000000000..27703af20ca --- /dev/null +++ b/egs/babel/s5d/local/search/create_categories.pl @@ -0,0 +1,112 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== +my $Usage = < + e.g.: $0 keywords.txt + or $0 --results results + +Allowed options: + --results : instead of keyword specification format, keyword search + results format is assumed. + +NOTE: + If you need both information, you can call the script twice (with different + parameters) and call local/search/normalize_categories.pl to merge (and normalize) + these two tables together. +EOU + +use strict; +use warnings; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; +use open qw(:std :utf8); + +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +my $result_format; +GetOptions("results", \$result_format) or do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if ( @ARGV > 1 ) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +sub QuantizeCount { + my $count = shift @_; + + if ($count <= 0) { + return "0"; + } elsif ($count == 1) { + return "000-001"; + } elsif ($count <= 5) { + return "002-005"; + } elsif ($count <=10) { + return "006-010"; + } elsif ($count <=20) { + return "011-020"; + } elsif ($count <=100) { + return "021-100"; + } else { + return "101-inf"; + } +} + +if (not $result_format ) { + my $kwlist_name=$ARGV[0]; + while (my $line = <>) { + chomp $line; + my ($kwid, $text) = split " ", $line, 2; + + my @words = split " ", $text; + printf "$kwid NGramOrder=%03d\n", scalar @words; + printf "$kwid Characters=%03d\n", length(join("", @words)); + print "$kwid $kwid\n"; + } +} else { + my $prev_kwid = ""; + my $count = 0; + + while (my $line = <>) { + chomp $line; + my @entries = split " ", $line; + next unless @entries; + + if ($prev_kwid ne $entries[0]) { + if ($prev_kwid) { + print "$prev_kwid ResCount=$count\n"; + print "$prev_kwid ResCountQuant=" . QuantizeCount($count) . "\n"; + } + $count = 0; + $prev_kwid = $entries[0]; + } + $count += 1; + } +} + + diff --git a/egs/babel/s5d/local/search/filter_by_category.pl b/egs/babel/s5d/local/search/filter_by_category.pl new file mode 100755 index 00000000000..baef4f6ac2b --- /dev/null +++ b/egs/babel/s5d/local/search/filter_by_category.pl @@ -0,0 +1,360 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2016 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < + e.g.: cat data/dev10h.pem/kws/keywords.int | \ + $0 data/dev10h.pem/kws/categories "Characters>10&&NGramOrder=2" + +Allowed options: + -f : assume the KWID (for which the filter expression is + evaluated) on k-th column (int, default 0) + +NOTE: + When the expression is empty (or missing), its evaluated as always true, + i.e. no entry will be removed from the input + +CAVEATS: + The operator '=' is equivalent to '=='. + + Do not use '-' character in the categories file if you want to use that + category in the filter expression. For example, the default setup adds + the KWID itself as a category. In case you will use the Babel-style KWIDS, + i.e. for example KW304-0008, you won't be able to use the KWID in + the expression itself (but you can still filter according to other categories) + i.e. for example + KW306-0008&&OOV=1 might be a valid expression but most probably wont do + what you want (it will get parsed as + KW306 - (8 && (OOV == 1)) which is most probably not + what you wanted. + Currently, there is no way how to make it work -- unless you rename + the categories (i.e. for example substitute '-' by '_'. While this might be + probably solved by taking the categories into account during parsing, it's + probably not that important. + +EOU + +use strict; +use warnings 'FATAL'; +use utf8; +use Switch; +use Data::Dumper; +use Scalar::Util qw(looks_like_number); +use Getopt::Long; +use POSIX; + +my $debug = ''; +my $field = 0; + +GetOptions("debug" => \$debug, + "f" => \$field) || do { + print STDERR "Cannot parse the command line parameters.\n\n"; + print $Usage . "\n"; + die "Cannot continue"; +}; + +if ((@ARGV < 1) || (@ARGV>2)) { + print STDERR "Incorrect number of parameters.\n\n"; + print $Usage . "\n"; + die "Cannot continue"; +} + +my $group_file = $ARGV[0]; +my $str_expr=""; +$str_expr=$ARGV[1] if defined($ARGV[1]); + +# Split the expression into tokens (might need some more attention +# to make it really correct +sub tokenize_string { + my $s = shift; + $s =~ s/^\s+|\s+$//g; + my @tokens = split(/ *(\&\&|\|\||\>\=|\<\=|==|!=|[\+\-\=\(\)\<\>\*\/^!]) */, $s); + #print STDERR join(", ", @tokens) . "\n"; + return @tokens; +} + + + +# precedence table should reflect the precedence of the operators in C +my %precedence = ( + #unary operators + 'u+' => 11, + 'u-' => 11, + 'u!' => 11, + + '^' => 10, + #'(' => 10, + #')' => 10, + + + #arithmetic operators + '*' => 8, + '/' => 8, + '%' => 8, + + '+' => 7, + '-' => 7, + + # logical operators + '<' => 5, + '>' => 5, + '>=' => 5, + '<=' => 5, + '=' => 4, + '==' => 4, + '!=' => 4, + '&&' => 3, + '||' => 2, +); + +my %right=( + #unary operators + 'u+' => 1, + 'u-' => 1, + 'u!' => 1, + + # this contradicts matlab, but it's what the mathematician's + # interpretation is: 2^3^4 = 2^(3^4), instead of matlabs + # left associativity 2^3^4 = (2^3)^4 + # as always -- if the order is important, use parentheses + '^' => 1, +); + +sub assoc { + my $op = $_[0]; + return (exists $right{$op}) ? $right{$op} : -1; +} + +sub looks_like_variable { + return $_[0] =~ /^[A-Za-z_][A-Za-z_0-9]*$/; +} + +sub unary_op { + my $token = shift; + my $op = shift; + my $res; + + switch( $token ) { + case 'u+' {$res = $op} + case 'u-' {$res = -$op} + case 'u!' {$res = !$op} + else {die "Unknown operator $token"} + } + + return $res; +} + +sub binary_op { + my $token = shift; + my $op2 = shift; + my $op1 = shift; + my $res; + + $op2 += 0.0; + $op1 += 0.0; + switch( $token ) { + case '^' {$res = $op1 ** $op2} + case '*' {$res = $op1 * $op2} + case '/' {$res = $op1 / $op2} + case '%' {$res = $op1 % $op2} + case '+' {$res = $op1 + $op2} + case '-' {$res = $op1 - $op2} + case '<' {$res = $op1 < $op2} + case '>' {$res = $op1 > $op2} + case '>=' {$res = $op1 >= $op2} + case '<=' {$res = $op1 <= $op2} + case '=' {$res = $op1 == $op2} + case '==' {$res = $op1 == $op2} + case '!=' {$res = $op1 != $op2} + case '&&' {$res = $op1 && $op2} + case '||' {$res = $op1 || $op2} + else {die "Unknown operator $token"} + } + + return $res; +} + +# refer to https://en.wikipedia.org/wiki/Shunting-yard_algorithm +# plus perl implementation in http://en.literateprograms.org/Shunting_yard_algorithm_(Perl) +sub to_postfix { + my @stack; + my @output = (); + my $last = ""; + + my @tokens=tokenize_string(shift); + + foreach my $token (@tokens) { + next unless $token ne ''; + + # detection of an unary operators + # not sure if this heuristics is complete + if (($token =~ /^[-+!]$/) && + (defined($precedence{$last}) || ($last eq '') || ($last eq ')'))) { + #print "Unary op: $token\n"; + $token="u$token"; + } + + if (looks_like_number($token)) { + if (looks_like_number($last) || looks_like_variable($last)) { + die "Value tokens must be separated by an operator"; + } + push @output, $token; + } elsif (looks_like_variable($token)) { + if (looks_like_number($last) || looks_like_variable($last)) { + die "Value tokens must be separated by an operator"; + } + push @output, $token; + } elsif (defined $precedence{$token}) { + my $p = $precedence{$token}; + + while (@stack) { + my $old_p = $precedence{$stack[-1]}; + last if $p > $old_p; + last if $p == $old_p and (assoc($token) >= 0); + push @output, pop @stack; + } + push @stack, $token; + } elsif ($token eq '(') { + push @stack, $token; + } elsif ($token eq ')') { + my $t; + do { + $t=pop @stack; + push @output, $t unless $t eq '(' + } while ($t && ($t ne '(')); + die "No matching (" unless $t eq '('; + #print "stack=[" . join(", ", @stack) . "] output=[" . join(", ", @output) . "]\n" ; + } else { + print "stack=[" . join(", ", @stack) . "] output=[" . join(", ", @output) . "]\n" ; + die "Unknown token \"$token\" during parsing the expression"; + } + $last=$token; + } + + # dump the rest of the operators + while (@stack) { + my $t = pop @stack; + die "No matching )" if $t eq '('; + push @output, $t; + } + + # final postfix expression + return @output; +} + +# this follows the standard RPM (postfix) expression evaluation +# the only possibly slightly confusing part is that when we encounter +# a variable, we lookup it's value in %vars. By default, (i.e. if the variable +# is not preset in the dict), the variable evaluates to 0 (false) +sub evaluate_postfix { + my @expression = @{$_[0]}; + my %vars= %{$_[1]}; + + my @stack = (); + foreach my $token (@expression) { + if (looks_like_number($token)) { + push @stack, $token; + } elsif (looks_like_variable($token)) { + my $val = 0; + if (defined $vars{$token}) { + $val = $vars{$token}; + } + push @stack, $val; + } elsif (defined $precedence{$token}) { + my $res; + if ( $token =~ /^u.*$/) { + my $op = pop @stack; + $res = unary_op($token, $op); + } else { + my $op1 = pop @stack; + my $op2 = pop @stack; + $res = binary_op($token, $op1, $op2); + } + push @stack, $res; + } else { + die "Unknown token: $token, expression=[" . join(" ", @expression) . "]\n"; + } + #print STDERR "token = $token; stack = [" . join(' ', @stack) . "]\n"; + + } + if (@stack != 1) { + my $expr = join(" ", @expression); + print STDERR "expression = [$expr]; stack = [" . join(' ', @stack) . "]\n"; + die "The operators did not reduce the stack completely!" if @stack != 1; + } + return pop @stack; +} + + +#--print "infix = [" . join(' ', @tokens) . "]\n"; +#--my @exp = to_postfix(@tokens); +#--my %vals = (A=>50, C => -3); +#--print "output = [" . join(' ', @exp) . "]\n"; +#-- +#--print evaluate_postfix(\@exp, \%vals); + + +my @expression = to_postfix($str_expr); + +my %GROUPS; +#Read the groups table +open(G, $ARGV[0]) or die "Cannot open the group table $ARGV[0]"; +while (my $line = ) { + my @entries = split(" ", $line); + my $kwid = shift @entries; + + foreach my $group (@entries) { + my @entries = split "=", $group; + if (@entries == 2) { + $GROUPS{$kwid}->{$entries[0]} = $entries[1]; + } elsif (@entries ==1 ) { + $GROUPS{$kwid}->{$group} = 1; + } else { + die "Unknown format of the category $group"; + } + } +} +close(G); + +my $let_all_pass=0; +if (not @expression) { + $let_all_pass=1; +} + +while (my $line = ) { + #shortcut if the "ALL" groups is used + if ($let_all_pass == 1) { + print $line; + next; + } + + my @entries = split(" ", $line); + my $kwid = $entries[$field]; + + my $res = evaluate_postfix(\@expression, $GROUPS{$kwid}); + if ($res) { + print $line; + } else { + print STDERR "Not keeping: $line" if $debug; + } + +} + + diff --git a/egs/babel/s5d/local/search/filter_kws_results.pl b/egs/babel/s5d/local/search/filter_kws_results.pl new file mode 100755 index 00000000000..f4e6589c50a --- /dev/null +++ b/egs/babel/s5d/local/search/filter_kws_results.pl @@ -0,0 +1,189 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < > output + e.g.: gunzip -c exp/tri5/kws/result.*.gz | $0 > exp/tri5/kws/results + +Allowed options: + --nbest : how many best results (for each KWID) should be printed + (int, default -1, i.e. no limit) + --duptime : duplicates detection, tolerance (in frames) for being + the same hits (int, default = 50) + --likes + --probs + +CAVEATS: + The script tries to be memory-effective. The impact of this is that we + assume the results are sorted by KWID (i.e. all entries with the same KWID + are in a continuous block). The user is responsible for sorting it. +EOU + +use strict; +use warnings; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; + +# if parameter nbest > 0, then filters the result list so that there is no +# more than nbest hits in the output for each of the KWID +# + +my $nbest = -1; +my $duptime = 50; +my $likes = 0; + +#print STDERR join(" ", $0, @ARGV) . "\n"; +GetOptions ("nbest=f" => \$nbest, + "likes" => \$likes, + "probs" => sub{ $likes = 0}, + "duptime=i" => \$duptime) || do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if (@ARGV != 0) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +# Function for sorting +sub KwslistOutputSort { + if ($a->[0] ne $b->[0]) { + if ($a->[0] =~ m/[0-9]+$/ && $b->[0] =~ m/[0-9]+$/) { + ($a->[0] =~ /([0-9]*)$/)[0] <=> ($b->[0] =~ /([0-9]*)$/)[0] + } else { + $a->[0] cmp $b->[0]; + } + } elsif ($a->[5] ne $b->[5]) { + $b->[5] <=> $a->[5]; + } else { + $a->[1] cmp $b->[1]; + } +} + +sub KwslistDupSort { + my ($a, $b, $duptime) = @_; + if ($a->[1] ne $b->[1]) { + #file + $a->[1] cmp $b->[1]; + } elsif (abs($a->[2]-$b->[2]) >= $duptime){ + #start + $a->[2] <=> $b->[2]; + } elsif ($a->[4] ne $b->[4]) { + #score + $b->[4] <=> $a->[4]; + } else { + #end time + $b->[3] <=> $a->[3]; + } +} + +my @RESULTS; +my %SEEN_KWS; +my $kw = ""; + +while ( my $line = ) { + chomp $line; + my @F = split " ", $line; + @F == 5 || die "$0: Bad number of columns in raw results \"$line\"\n"; + + $F[4] = -$F[4] if $likes; + + if ($F[0] eq $kw) { + push @RESULTS, \@F; + } elsif ($kw eq "" ) { + @RESULTS = (); + push @RESULTS, \@F; + $kw = $F[0]; + } else { + + my @results; + my @tmp = sort { KwslistDupSort($a, $b, $duptime) } @RESULTS; + + @results = (); + if (@tmp >= 1) {push(@results, $tmp[0])}; + for (my $i = 1; $i < scalar(@tmp); $i ++) { + my $prev = $results[-1]; + my $curr = $tmp[$i]; + if ((abs($prev->[2]-$curr->[2]) < $duptime ) && + ($prev->[1] eq $curr->[1])) { + next; + } else { + push(@results, $curr); + } + } + + # this is probably needed only when nbest > 0 + @results = sort { ($b->[4] + 0.0) <=> ($a->[4] + 0.0) } @results; + + my $len; + if( $nbest > 0) { + $len = scalar @results < $nbest ? scalar @results : $nbest; + } else { + $len = scalar @results; + } + for (my $i=0; $i < $len; $i++) { + $results[$i]->[4] = -$results[$i]->[4] if $likes; + print join(" ", @{$results[$i]}) . "\n"; + } + + @RESULTS = (); + push @RESULTS, \@F; + $kw = $F[0]; + } +} +do { + my @results; + my @tmp = sort { KwslistDupSort($a, $b, $duptime) } @RESULTS; + + @results = (); + if (@tmp >= 1) {push(@results, $tmp[0])}; + for (my $i = 1; $i < scalar(@tmp); $i ++) { + my $prev = $results[-1]; + my $curr = $tmp[$i]; + if ((abs($prev->[2]-$curr->[2]) < $duptime ) && + ($prev->[1] eq $curr->[1])) { + next; + } else { + push(@results, $curr); + } + } + + # this is probably needed only when nbest > 0 + @results = sort { ($b->[4] + 0.0) <=> ($a->[4] + 0.0) } @results; + + my $len; + if( $nbest > 0) { + $len = scalar @results < $nbest ? scalar @results : $nbest; + } else { + $len = scalar @results; + } + for (my $i=0; $i < $len; $i++) { + $results[$i]->[4] = -$results[$i]->[4] if $likes; + print join(" ", @{$results[$i]}) . "\n"; + } +} + + diff --git a/egs/babel/s5d/local/search/normalize.sh b/egs/babel/s5d/local/search/normalize.sh new file mode 100755 index 00000000000..38054f75879 --- /dev/null +++ b/egs/babel/s5d/local/search/normalize.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +# case_insensitive=true +extraid= +min_lmwt=8 +max_lmwt=12 +cmd=run.pl +stage=0 +ntrue_from= +# End configuration section. + +help_message="$0: score the kwslist using the F4DE scorer from NIST + Example: + $0 [additional-parameters] + where the most important additional parameters can be: + --extraid #for using, when a non-default kws tasks are setup + (using the kws_setup.sh --extraid) for a kaldi-single data-dir" + +echo $0 $@ +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 3 ]; then + printf "FATAL: incorrect number of variables given to the script\n\n" + printf "$help_message\n" + exit 1; +fi + +set -e -o pipefail + +langdir=$1 +if [ -z $extraid ] ; then + kwsdatadir=$2/kws +else + kwsdatadir=$2/kwset_${extraid} +fi +kwsoutputdir="$3" + +trials=$(cat $kwsdatadir/trials) +mkdir -p $kwsoutputdir/log/ + +if [ $stage -le 0 ] ; then + for LMWT in $(seq $min_lmwt $max_lmwt) ; do + mkdir -p ${kwsoutputdir}_$LMWT/details/ + + cp ${ntrue_from}_$LMWT/details/ntrue ${kwsoutputdir}_$LMWT/details/ntrue + cp ${ntrue_from}_$LMWT/details/ntrue_raw ${kwsoutputdir}_$LMWT/details/ntrue_raw + echo "$ntrue_from" > ${kwsoutputdir}_$LMWT/details/ntrue_from + done +fi + +if [ $stage -le 1 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/normalize.LMWT.log \ + cat ${kwsoutputdir}_LMWT/results \|\ + local/search/normalize_results_kst.pl --trials $trials --ntrue-scale \$\(cat ${kwsoutputdir}_LMWT/details/ntrue\)\ + \> ${kwsoutputdir}_LMWT/details/results + +fi + +if [ $stage -le 2 ]; then +if [ -f $kwsdatadir/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $kwsdatadir/f4de_attribs #override the previous variables + + ecf=$kwsdatadir/ecf.xml + kwlist=$kwsdatadir/kwlist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_write_kwslist.LMWT.log \ + mkdir -p ${kwsoutputdir}_LMWT/f4de/\; \ + cat ${kwsoutputdir}_LMWT/details/results \| \ + utils/int2sym.pl -f 2 $kwsdatadir/utt.map \| \ + local/search/utt_to_files.pl --flen $flen $kwsdatadir/../segments \|\ + local/search/write_kwslist.pl --flen $flen --language $language \ + --kwlist-id $kwlist_name \> ${kwsoutputdir}_LMWT/f4de/kwslist.xml + +fi +fi + +echo "$0: Done" +exit 0; + + diff --git a/egs/babel/s5d/local/search/normalize_categories.pl b/egs/babel/s5d/local/search/normalize_categories.pl new file mode 100755 index 00000000000..f3354e8c4d4 --- /dev/null +++ b/egs/babel/s5d/local/search/normalize_categories.pl @@ -0,0 +1,89 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage=< > categories + e.g.: cat partial_categories.* | $0 > categories + +Allowed options: + --one-per-line : by default, there will be only one line for each KWID + THis option changes the output format so that for + each pair "KWID CATEGORY" will be on a single line. + +Note: + Reads the stream of categories information in the format + + keyword-ID1 category category2 + keyword-ID2 category2 + keyword-ID1 category category2 + + The duplicities are allowed (and will be removed). + Multiple categories per line are allowed (and will be merged) + + The purpose of the script is to be able to merge the information from different + scripts. Each script can generate it's own information about categories + and this script can be then used to merge these partial tables into one global +EOU + +use strict; +use warnings; +use utf8; +use Getopt::Long; +use Data::Dumper; +use POSIX; + +my $one_per_line; + +GetOptions("one-per-line", \$one_per_line) or + do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if (@ARGV != 0) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +my %GROUPS; + +while (my $line=) { + chomp $line; + my @entries = split " ", $line; + + die "The line \"$line\" does not have correct format" if @entries < 2; + + my $kwid=shift @entries; + for my $category (@entries) { + $GROUPS{$kwid}->{$category} = 1; + } +} + +for my $kwid (sort keys %GROUPS) { + if ($one_per_line) { + foreach my $category (sort keys %{$GROUPS{$kwid}} ) { + print $kwid . " " . $category . "\n"; + } + } else { + print $kwid . " " . join(" ", sort keys %{$GROUPS{$kwid}}) . "\n"; + } +} diff --git a/egs/babel/s5d/local/search/normalize_results_kst.pl b/egs/babel/s5d/local/search/normalize_results_kst.pl new file mode 100755 index 00000000000..e57b947f278 --- /dev/null +++ b/egs/babel/s5d/local/search/normalize_results_kst.pl @@ -0,0 +1,203 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== +my $Usage = < results.normalized + +Allowed options: + --probs : the input is probabilities instead of neg-loglikelihoods + + --duration|--trials : size of the searched collectiona in seconds (float) + --beta : the FA vs MISS rate (float, default 999.9) + --ntrue-scale : scales for scaling the expected count of true hits (float, default 1.0) + --thr|--threshold : the decision threshold (float, default 0.5) +EOU + +use strict; +use warnings; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; + +my $ntrue_scale = 1.0; +my $global_thr = 0.5; +my $beta = 999.9; +my $duration = 35785.578; +my $ntrue_table_filename; +my $probs=0; +my $bsum_filename; + +GetOptions("duration|trials=f" => \$duration, + "ntrue-scale=f" => \$ntrue_scale, + "beta=f" => \$beta, + "probs" => \$probs, + "thr|threshold=f" => \$global_thr, + "ntrue-table=s" => \$ntrue_table_filename, + "bsum-table=s" => \$bsum_filename) or do + { + print STDERR "$0: Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "$0: Cannot continue\n" +}; + +if (@ARGV != 0) { + print STDERR "$0: Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "$0: Cannot continue\n" +} + +sub ComputeKST { + my @instances = @{shift @_}; + my $ntrue_scale = shift @_; + my %ntrue_table = %{shift @_}; + + + my $ntrue = 0; + foreach my $elem(@instances) { + $ntrue += $elem->[4]; + } + #$ntrue = $ntrue / @instances; + if (defined ($ntrue_table{$instances[0]->[0]})) { + #print STDERR "For KW " . $instances[0]->[0] . " using the value " . $ntrue_table{$instances[0]->[0]} . "\n"; + $ntrue = $ntrue * $ntrue_table{$instances[0]->[0]}; + } else { + #print STDERR "Using the default vsalue $ntrue_scale\n"; + $ntrue = $ntrue * $ntrue_scale; + } + + my $thr = $beta * $ntrue / ( $duration + $ntrue * ($beta - 1)); + return $thr; +} + +sub ComputeKSTWithExpected { + my @instances = @{shift @_}; + my %expected_table = %{shift @_}; + my $ntrue_scale = shift @_; + my %ntrue_table = %{shift @_}; + + + my $ntrue = $expected_table{$instances[0]->[0]}; + #$ntrue = $ntrue / @instances; + if (defined ($ntrue_table{$instances[0]->[0]})) { + #print STDERR "For KW " . $instances[0]->[0] . " using the value " . $ntrue_table{$instances[0]->[0]} . "\n"; + $ntrue = $ntrue * $ntrue_table{$instances[0]->[0]}; + } else { + #print STDERR "Using the default vsalue $ntrue_scale\n"; + $ntrue = $ntrue * $ntrue_scale; + } + + my $thr = $beta * $ntrue / ( $duration + $ntrue * ($beta - 1)); + return $thr; +} +sub NormalizeScores { + my @instances = @{shift @_}; + my $thr = shift @_; + my $global_thr = shift @_; + + + if ($thr == 0) { + $thr = 0.001; + } + my $q = log($global_thr)/log($thr); + + foreach my $elem(@instances) { + $elem->[4] = pow($elem->[4], $q); + } +} + +sub WriteResults { + my @instances = @{shift @_}; + + foreach my $elem(@instances) { + print join(" ", @{$elem}) . "\n"; + die "$0: " . join(" ", @{$elem}) . "\n" if $elem->[-1] > 1.0; + } + +} + +my $KWID; +my @putative_hits; +my %NTRUE_TABLE = (); + +my %BSUM=(); +if (defined $bsum_filename) { + open(BSUMF, $bsum_filename) or die "$0: Cannot open $bsum_filename"; + while (my $line = ) { + chomp $line; + next unless (($line =~ m/^\s*KW/) || ($line =~ m/^Keyword\s*KW/)); + $line =~ s/^Keyword//g; + $line =~ s/^\s+|\s+$//g; + my @entries = split /\s*\|\s*/, $line; + $BSUM{$entries[0]} = $entries[12]; + } + close(BSUMF); +} + +if ( defined $ntrue_table_filename) { + open (F, $ntrue_table_filename) or die "$0: Cannot open the Ntrue-table file\n"; + while (my $line = ) { + my @entries=split(" ", $line); + + die "$0: The Ntrue-table does not have expected format\n" if @entries != 2; + $NTRUE_TABLE{$entries[0]} = $entries[1] + 0.0; + } + close (F); +} + +while (my $line = ) { + chomp $line; + (my $kwid, my $file, my $start, my $end, my $score) = split " ", $line; + + if ($KWID && ($kwid ne $KWID)) { + + my $thr = ComputeKST(\@putative_hits, $ntrue_scale, \%NTRUE_TABLE ); + if ((defined $BSUM{$KWID}) && (scalar @putative_hits > 100)) { + print STDERR "$0: $KWID $thr $BSUM{$KWID} " . log($thr)/log($global_thr) . "\n"; + my $old_thr = $thr; + $thr = pow($BSUM{$KWID}, log($thr)/log($global_thr)); + } + if ($thr < 0.9999 ) { + NormalizeScores(\@putative_hits, $thr, $global_thr); + WriteResults(\@putative_hits); + } + + $KWID = $kwid; + @putative_hits = (); + } elsif ( not $KWID ) { + $KWID = $kwid; + } + + unless ($probs) { + $score = exp(-$score); + } + push @putative_hits, [$kwid, $file, $start, $end, $score]; +} + +if ($KWID) { + my $thr = ComputeKST(\@putative_hits, $ntrue_scale, \%NTRUE_TABLE ); + if ((defined $BSUM{$KWID}) && (scalar @putative_hits > 100)) { + $thr = pow($BSUM{$KWID}, log($thr)/log($global_thr)); + } + if ($thr < 0.9999 ) { + NormalizeScores(\@putative_hits, $thr, $global_thr); + WriteResults(\@putative_hits); + } +} + diff --git a/egs/babel/s5d/local/search/per_category_stats.pl b/egs/babel/s5d/local/search/per_category_stats.pl new file mode 100755 index 00000000000..d14636dcc0f --- /dev/null +++ b/egs/babel/s5d/local/search/per_category_stats.pl @@ -0,0 +1,326 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +# Takes the alignment.csv and the category tables and computes the per-category +# statistics including the oracle measures (OTWV, MTWV, STWV) +# Is not particulary effective (for example, it computes the oracle measures +# for each keyword several times (once for each category the keyword is in); +# To achieve at least partial speed-up, we cache some of the partial statistics +# The caching gave us speed improvement approx. from 22s down to 14s +# +# The lines in output starting with '#' are intended as comments only -- you +# can filter them out using grep -v '^#' +# The first comment line contains header, +# The second cooment line contains column numbers (to make easier using cut -f) +# -- you don't have to count the fields, just use the present +# number of the field +# +# Compatibility: +# We tried to make the numbers comparable with F4DE output. If there is a large +# difference, something is probably wrong and you should report it +# The column names should be compatible (to large extent) with F4DE output +# files (sum.txt, bsum.txt, cond.bsum.txt). Our intention was, however, +# to make this file easily grepable/machine-processable, so we didn't honor +# the original F4DE file fomrat +# +# Usage: +# It reads the alignment.csv from the STDIN. +# Moreover, it expects exactly two arguments: number of trials and +# the category table +# I.e. +# local/search/per_category_stats.pl +# +# Example: +# cat alignment.csv | perl local/search/per_category_stats.pl `cat data/dev10h.pem/extra_kws/trials` data/dev10h.pem/extra_kws/categories +# +# Additional parameters +# --beta # beta value (weight of FAs), defailt 999.9 +# --sweep-step # sweep step for the oracle measures +# +# TODO +# Document what each field means (might be slightly tricky, as even F4DE +# does not document the exact meaning of some of the fields. +# +# ATWV - actual Term-Weighted Value (TWV for the threshold 0.5) +# MTWV - Maximum Term-Weighted Value (TWV for the threshold that maximizes +# the given category's TWV +# OTWV - Optimum Term-Weighted Value (TWV assuming the decision threshold +# for each Term/KW is determined optimally) +# STWV - Supreme TWV - essentially Lattice Recall + +use strict; +use warnings FATAL => 'all'; +use utf8; +use List::Util; +use Data::Dumper; +use Getopt::Long; +use Scalar::Util qw(looks_like_number); + +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +my %CATEGORIES; +my %STATS; +my %K; + +my $beta=999.9; +my $step_size=0.005; +my $threshold = 0.5; +my $enable_caching = 1; + +my $cat_maxlen = 9; #Must accomodate string "#CATEGORY" in the header +my $field_size = 9; + +my $L = int(1.0/$step_size) + 1; + +GetOptions("beta=f" => \$beta, + "sweep-step=f" => \$step_size, + "disable-caching" => sub{ $enable_caching=''; } + ) or die "Cannot process the input options (possibly unknown switch)"; + +die "Unsupported number of arguments." if @ARGV != 2; +if ( not looks_like_number($ARGV[0])) { + die "The first parameter must be a float number (number of trials) -- got $ARGV[0]"; +} + +my $T= 0.0 + $ARGV[0]; + + +open(CAT, $ARGV[1]) or die("Cannot open categories file $ARGV[1]"); +while(my $line = ) { + my @entries =split(" ", $line); + + die "Unknown format of category line: \"$line\"" if scalar @entries < 2; + my $kw = shift @entries; + + + if (not defined $STATS{$kw}->{fa_sweep}) { + $STATS{$kw}->{fa} = 0; + $STATS{$kw}->{corr} = 0; + $STATS{$kw}->{miss} = 0; + $STATS{$kw}->{lattice_miss} = 0; + $STATS{$kw}->{ntrue} = 0; + $STATS{$kw}->{count} = 0; + $STATS{$kw}->{corrndet} = 0; + + my @tmp1 = (0) x ($L+1); + $STATS{$kw}->{fa_sweep} = \@tmp1; + my @tmp2 = (0) x ($L+1); + $STATS{$kw}->{corr_sweep} = \@tmp2; + } + + push @entries, "ALL"; + foreach my $cat (@entries) { + $cat_maxlen = length($cat) if length($cat) > $cat_maxlen; + push @{$CATEGORIES{$cat}}, $kw; + $K{$cat} += 1; + } +} +close(CAT); +#print Dumper(\%CATEGORIES); + + +#print STDERR "Reading the whole CSV\n"; +my $i = 0; +my $dummy=; +while (my $line=) { + chomp $line; + my @entries = split(",", $line); + + die "Unknown format of category line: \"$line\"" if scalar @entries != 12; + + + my $termid = $entries[3]; + my $ref_time = $entries[5]; + my $score = $entries[9]; + my $decision=$entries[10]; + my $ref = $entries[11]; + + if (not defined($STATS{$termid}->{ntrue})) { + print STDERR "Term $termid not present in the category table, skipping\n"; + next + } + #print "$termid, ref_time=$ref_time, score=$score, start=" . int($score/$step_size + 0.5) . ", L=$L\n" if $termid eq "KW303-00025"; + if ($score) { + $score = 1.0 if $score > 1.0; + my $q = int($score/$step_size) + 1; + for (my $i = 0; $i < $q ; $i += 1) { + if ($ref_time) { + $STATS{$termid}->{corr_sweep}->[$i] += 1; + } else { + $STATS{$termid}->{fa_sweep}->[$i] += 1; + } + } + } + + #print STDERR "$line "; + $STATS{$termid}->{count} += 1 if $score; + + #print Dumper($ref_time, $score, $STATS{$termid}) if ($ref_time); + if (($decision eq "YES") && ($ref eq "FA")) { + $STATS{$termid}->{fa} += 1; + } elsif (($decision eq "YES") && ($ref eq "CORR")) { + $STATS{$termid}->{corr} += 1; + $STATS{$termid}->{ntrue} += 1; + } elsif ($ref eq "MISS") { + $STATS{$termid}->{lattice_miss} += 1 unless $decision; + $STATS{$termid}->{miss} += 1; + $STATS{$termid}->{ntrue} += 1; + } elsif ($ref eq "CORR!DET") { + $STATS{$termid}->{corrndet} += 1; + } + #print STDERR "Done\n"; + +} + +#print STDERR "Read the whole CSV\n"; + +# Create the header +my $H=sprintf "%*s", $cat_maxlen-1, "CATEGORY"; +my @int_vals = map{ sprintf("%*s", $field_size, $_) } (split " ", "#KW #Targ #NTarg #Sys #CorrDet #CorrNDet #FA #MISS"); +my @float_vals = map{ sprintf("%*s", $field_size, $_) } (split " ", "ATWV MTWV OTWV STWV PFA MPFA OPFA PMISS MPMISS OPMISS THR MTHR OTHR"); +print "#" . join(" ", $H, @int_vals, @float_vals) . "\n"; +# Create secondary header with column numbers (to make cut'ing easier +my @col_nrs = map { sprintf "%*d", $field_size, $_ } (2.. 1+@int_vals + @float_vals); +print "#" . join(" ", sprintf("%*d", $cat_maxlen-1, 1), @col_nrs) . "\n"; +# End of the header + +my %CACHE = (); + +foreach my $cat (sort keys %CATEGORIES) { + my $K = 0; + my $ATWV = 0; + my $STWV = 0; + my $PMISS = 0; + my $PFA = 0; + + my $OTWV = 0; + my $OPMISS = 0; + my $OPFA = 0; + my $OTHR = 0; + + my $NTRUE = 0; + my $CORR = 0; + my $FA = 0; + my $MISS = 0; + my $COUNT = 0; + my $CORRNDET = 0; + + my @MTWV_SWEEP = (0) x ($L+1); + my @MPMISS_SWEEP = (0) x ($L+1); + my @MPFA_SWEEP = (0) x ($L+1); + #print Dumper($cat, $CATEGORIES{$cat}); + foreach my $kw (sort @{$CATEGORIES{$cat}}) { + #print Dumper($kw, $STATS{$kw}); + next unless defined $STATS{$kw}->{ntrue}; + next if $STATS{$kw}->{ntrue} == 0; + my $pmiss = 1 - $STATS{$kw}->{corr}/$STATS{$kw}->{ntrue}; + my $pfa = $STATS{$kw}->{fa}/($T - $STATS{$kw}->{ntrue}); + my $twv = 1 - $pmiss - $beta * $pfa; + my $stwv = 1 - $STATS{$kw}->{lattice_miss}/$STATS{$kw}->{ntrue}; + + $NTRUE += $STATS{$kw}->{ntrue}; + $CORR += $STATS{$kw}->{corr}; + $CORRNDET += $STATS{$kw}->{corrndet}; + $FA += $STATS{$kw}->{fa}; + $MISS += $STATS{$kw}->{miss}; + $COUNT += $STATS{$kw}->{count} if $STATS{$kw}->{ntrue} > 0; + + $ATWV = ($K * $ATWV + $twv) / ($K + 1); + $PMISS = ($K * $PMISS + $pmiss) / ($K + 1); + $PFA = ($K * $PFA + $pfa) / ($K + 1); + + $STWV = ($K * $STWV + $stwv ) / ($K + 1); + + $pmiss = 0; + $pfa = 0; + $twv = -99999; + my $othr = -0.1; + #print Dumper($kw, $STATS{$kw}); + if (($enable_caching) && (defined $CACHE{$kw})) { + ($pfa, $pmiss, $twv, $OTHR, my $twv_sweep_cache, my $pfa_sweep_cache, my $pmiss_sweep_cache) = @{$CACHE{$kw}}; + @MTWV_SWEEP = map {($K * $MTWV_SWEEP[$_] + $twv_sweep_cache->[$_]) / ($K + 1)} (0..$L); + @MPFA_SWEEP = map {($K * $MPFA_SWEEP[$_] + $pfa_sweep_cache->[$_]) / ($K + 1)} (0..$L); + @MPMISS_SWEEP = map{($K * $MPMISS_SWEEP[$_] + $pmiss_sweep_cache->[$_]) / ($K + 1)} (0..$L); + } else { + my @twv_sweep_cache = (0) x ($L+1); + my @pmiss_sweep_cache = (0) x ($L+1); + my @pfa_sweep_cache = (0) x ($L+1); + + for (my $i = 0; $i <= $L; $i += 1) { + my $sweep_pmiss = 1 - $STATS{$kw}->{corr_sweep}->[$i]/$STATS{$kw}->{ntrue}; + my $sweep_pfa = $STATS{$kw}->{fa_sweep}->[$i]/($T - $STATS{$kw}->{ntrue}); + my $sweep_twv = 1 - $sweep_pmiss - $beta * $sweep_pfa; + if ($twv < $sweep_twv) { + $pfa = $sweep_pfa; + $pmiss = $sweep_pmiss; + $twv = $sweep_twv; + $OTHR = ($i - 1) * $step_size; + } + $pmiss_sweep_cache[$i] = $sweep_pmiss; + $pfa_sweep_cache[$i] = $sweep_pfa; + $twv_sweep_cache[$i] = $sweep_twv; + + #print "$i $sweep_pmiss $sweep_pfa $sweep_twv\n"; + $MTWV_SWEEP[$i] = ($K * $MTWV_SWEEP[$i] + $sweep_twv) / ($K + 1); + $MPFA_SWEEP[$i] = ($K * $MPFA_SWEEP[$i] + $sweep_pfa) / ($K + 1); + $MPMISS_SWEEP[$i] = ($K * $MPMISS_SWEEP[$i] + $sweep_pmiss) / ($K + 1); + } + $CACHE{$kw} = [$pfa, $pmiss, $twv, $OTHR, \@twv_sweep_cache, \@pfa_sweep_cache, \@pmiss_sweep_cache]; + } + + $OTWV = ($K * $OTWV + $twv) / ($K + 1); + $OPMISS = ($K * $OPMISS + $pmiss) / ($K + 1); + $OPFA = ($K * $OPFA + $pfa) / ($K + 1); + $K += 1; + } + + my $max_idx = 0; + my $MTWV = $MTWV_SWEEP[0]; + my $MPMISS = $MPMISS_SWEEP[0]; + my $MPFA = $MPFA_SWEEP[0]; + my $MTHR = 0; + for(my $i = 1; $i <= $L; $i += 1) { + if ($MTWV_SWEEP[$i] > $MTWV) { + $max_idx = $i; + $MTWV = $MTWV_SWEEP[$i]; + $MPMISS = $MPMISS_SWEEP[$i]; + $MPFA = $MPFA_SWEEP[$i]; + $MTHR = ($i - 1) * $step_size; + } + } + + if ($K > 1) { + $OTHR = "NA"; + } + + my $ntarg = $CORRNDET + $FA; + + my @abs_nrs = ($K, $NTRUE, $ntarg, $COUNT, $CORR, $CORRNDET, $FA, $MISS); + @abs_nrs = map { sprintf "%*d", $field_size, $_ } @abs_nrs; + my @flt_nrs = map { $_ eq "NA" ? sprintf "%6s", $_ : sprintf "% 6.3g", $_ } ($ATWV, $MTWV, $OTWV, $STWV, $PFA, $MPFA, $OPFA, $PMISS, $MPMISS, $OPMISS, 0.5, $MTHR, $OTHR); + @flt_nrs = map {sprintf "%*s", $field_size, $_} @flt_nrs; + + my $nrs = join(" ", @abs_nrs, @flt_nrs); + + $cat = sprintf("%*s", $cat_maxlen, $cat); + print "$cat $nrs \n"; +} + + diff --git a/egs/babel/s5d/local/search/rttm_to_hitlists.sh b/egs/babel/s5d/local/search/rttm_to_hitlists.sh new file mode 100755 index 00000000000..6d4af6fb916 --- /dev/null +++ b/egs/babel/s5d/local/search/rttm_to_hitlists.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +flen=0.01 +segments= +utt_table= +# End configuration section +echo $0 "$@" +. ./utils/parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +if [ $# -ne 5 ] ; then + echo "Usage: " + exit 1 +fi + +rttm=$1 +kwlist=$2 +ecf=$3 +workdir=$4 +output=$5 + +for f in $rttm $kwlist $ecf ; do + [ ! -f $f ] && echo "File \"$f\" does not exist." && exit 1 +done + +mkdir -p $workdir + +{ + echo '' + echo '' +} > $workdir/kwslist.xml + +kwseval=`which KWSEval` +if [ -z "$kwseval" ] ; then + echo >&2 "KWSEval from F4DE tools not found" + exit 1 +fi + +bash -x $kwseval -c -r $rttm -e $ecf -t $kwlist -s $workdir/kwslist.xml -f $workdir/ +grep -E ",,MISS" $workdir/alignment.csv | \ + perl -e ' + binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + binmode STDERR, ":utf8"; + + use Data::Dumper; + $flen='$flen'; + %SEGMENTS=(); + if ((defined $ARGV[0]) && ( $ARGV[0] ne "" )) { + open(F, $ARGV[0]) or die "Cannot open \"$ARGV[0]\""; + while() { + @entries = split(" ", $_); + $entries[2] = int($entries[2]/$flen+0.5); + $entries[3] = int($entries[3]/$flen+0.5); + push @{$SEGMENTS{$entries[1]}}, [@entries]; + } + close(F); + } + + while() { + chomp; + @entries_tmp = split(",", $_); + @entries = ($entries_tmp[3], + $entries_tmp[1], + int($entries_tmp[5]/$flen + 0.5), + int($entries_tmp[6]/$flen + 0.5), + 1.0 + ); + + $fid = $entries[1]; + $start = $entries[2]; + $end = $entries[3]; + + if ((defined $ARGV[0]) && ( $ARGV[0] ne "" )) { + $found = 0; + foreach $entry ( @{$SEGMENTS{$fid}} ) { + if (($start >= $entry->[2]) && ($end <= $entry->[3])) { + $relstart = $start - $entry->[2]; + $relend = $end - $entry->[2]; + print join(" ", $entries[0], $entry->[0], $relstart, $relend, 1.0) . "\n"; + if ($found eq 1) { + print STDERR "WARNING: Segments file generates duplicate hits for the entry"; + print STDERR join(" ", @entries_tmp) . "\n"; + } + $found = 1; + } + } + if ($found eq 0) { + print STDERR "WARNING: Segments file does not allow for finding entry "; + print STDERR join(" ", @entries_tmp) . "\n"; + } + } else { + print join(" ", @entries) . "\n"; + } + } + ' "$segments" | sort | { + if [ -z "$utt_table" ]; then + cat - + else + utils/sym2int.pl -f 2 $utt_table + fi +} > $output diff --git a/egs/babel/s5d/local/search/run_phn_search.sh b/egs/babel/s5d/local/search/run_phn_search.sh new file mode 100755 index 00000000000..44587699a38 --- /dev/null +++ b/egs/babel/s5d/local/search/run_phn_search.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=2 +dir=dev10h.pem +# End configuration section +. ./conf/common_vars.sh +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./lang.conf + +#Example script how to run keyword search using the Kaldi-native pipeline + + +if [ $stage -le 0 ]; then + local/generate_confusion_matrix.sh --nj 64 --cmd "$decode_cmd" \ + exp/sgmm5_denlats/dengraph/ exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix +fi + +if [ $stage -le 1 ] ; then + local/train_g2p.sh --cmd "$decode_cmd" data/local/lexicon.txt exp/g2p +fi + +dataset=${dir%%.*} +datatype=${dir#*.} + +lang=data/lang.phn +data=data/${dataset}.phn.${datatype} + +set +o nounset +eval kwsets=${!dataset_kwlists[@]} +eval my_ecf_file=\$${dataset}_ecf_file +eval my_rttm_file=\$${dataset}_rttm_file +set -o nounset + +my_array_name=${dataset}_kwlists + +eval kwsets=\( \${!$my_array_name[@]} \) +declare -p kwsets +for set in ${kwsets[@]} ; do + eval my_kwlist=\${$my_array_name[$set]} + declare -p my_kwlist +done +declare -p my_ecf_file +declare -p my_rttm_file + +if [ $stage -le 2 ] ; then + + for set in ${kwsets[@]} ; do + + eval my_kwlist=\${$my_array_name[$set]} + + #This will set up the basic files and converts the F4DE files into Kaldi-native format + local/search/setup.sh $my_ecf_file $my_rttm_file "${my_kwlist}" \ + $data $lang $data/kwset_${set} + + # we will search for the IV words normally (i.e. will look for the specificsequence + # of the words + local/search/compile_keywords.sh --filter "OOV=0&&Characters>2"\ + $data/kwset_${set} $lang $data/kwset_${set}/tmp.2 + + # in addition to the direct search of the IV words, we will set up the proxy + # search as well -- we will use lower nbest, compared to OOV=1 + #-- local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --category "OOV=0" \ + #-- --beam 5 --nbest 10 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + #-- ${data}/kwset_${set} ${lang} ${data}/${set}_oov_kws/tmp/L1.lex \ + #-- ${data}/${set}_oov_kws/tmp/L1.lex ${data}/kwset_${set}/tmp.3 + + local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --filter "OOV=1&&Characters>4"\ + --beam 5 --nbest 100 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + ${data}/kwset_${set} ${lang} data/local/dict.phn/lexiconp.txt exp/g2p \ + ${data}/kwset_${set}/tmp.4 + + # and finally, replace the categories by the word-level categories + cp data/$dir/kwset_${set}/categories $data/kwset_${set}/categories + done +fi + +if [ $stage -le 3 ] ; then + for set in ${kwsets[@]} ; do + fsts-union scp:<(sort $data/kwset_${set}/tmp*/keywords.scp) \ + ark,t:"|gzip -c >$data/kwset_${set}/keywords.fsts.gz" + done +fi + + +echo "Directories are set up -- running run-4-phn-anydecode.sh will take care of the rest" +exit 0 + +if [ $stage -le 4 ] ; then + for set in $kwsets ; do + for it in $(seq 1 4); do + system=exp/sgmm5_mmi_b0.1/decode_fmllr_$(basename $data)_it$it + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices ${lang} ${data} $system + done + done +fi + +if [ $stage -le 5 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 6 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem_17_8.5 + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 7 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.bg + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 8 ] ; then + for set in $kwsets ; do + system=exp/tri6_nnet/decode_dev10h.phn.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + diff --git a/egs/babel/s5d/local/search/run_search.sh b/egs/babel/s5d/local/search/run_search.sh new file mode 100755 index 00000000000..2cb40cabb59 --- /dev/null +++ b/egs/babel/s5d/local/search/run_search.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=2 +dir=dev10h.pem +# End configuration section +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./conf/common_vars.sh +. ./lang.conf + +#Example script how to run keyword search using the Kaldi-native pipeline + + +if [ $stage -le 0 ]; then + local/generate_confusion_matrix.sh --nj 64 --cmd "$decode_cmd" \ + exp/sgmm5_denlats/dengraph/ exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix +fi + +if [ $stage -le 1 ] ; then + local/train_g2p.sh --cmd "$decode_cmd" data/local/lexicon.txt exp/g2p +fi + +dataset=${dir%%.*} + +set +o nounset +eval kwsets=${!dataset_kwlists[@]} +eval my_ecf_file=\$${dataset}_ecf_file +eval my_rttm_file=\$${dataset}_rttm_file +set -o nounset + +my_array_name=${dataset}_kwlists + +eval kwsets=\( \${!$my_array_name[@]} \) +declare -p kwsets +for set in ${kwsets[@]} ; do + eval my_kwlist=\${$my_array_name[$set]} + declare -p my_kwlist +done +declare -p my_ecf_file +declare -p my_rttm_file + +if [ $stage -le 2 ] ; then + + for set in ${kwsets[@]} ; do + + eval my_kwlist=\${$my_array_name[$set]} + + #This will set up the basic files and converts the F4DE files into Kaldi-native format + local/search/setup.sh $my_ecf_file $my_rttm_file "${my_kwlist}" \ + data/$dir/ data/lang/ data/$dir/kwset_${set} + + # we will search for the IV words normally (i.e. will look for the specificsequence + # of the words + local/search/compile_keywords.sh --filter "OOV=0&&Characters>2"\ + data/$dir/kwset_${set} data/lang data/$dir/kwset_${set}/tmp.2 + + # in addition to the direct search of the IV words, we will set up the proxy + # search as well -- we will use lower nbest, compared to OOV=1 + #-- local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --category "OOV=0" \ + #-- --beam 5 --nbest 10 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + #-- data/dev10h.pem/kwset_${set} data/lang data/dev10h.pem/${set}_oov_kws/tmp/L1.lex \ + #-- data/dev10h.pem/${set}_oov_kws/tmp/L1.lex data/dev10h.pem/kwset_${set}/tmp.3 + if [ -d data/local/extend ]; then + echo "Detected extended lexicon system..." + local/search/compile_proxy_keywords.sh --cmd "$decode_cmd --mem 12G" --filter "OOV=1&&Characters>2"\ + --beam 5 --nbest 50 --nj 64 --phone-beam 5 --phone-nbest 300 --confusion-matrix exp/conf_matrix/confusions.txt \ + data/$dir/kwset_${set} data/lang data/local/lexiconp.txt exp/g2p \ + data/$dir/kwset_${set}/tmp.4 + else + local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --filter "OOV=1&&Characters>2"\ + --beam 5 --nbest 50 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + data/$dir/kwset_${set} data/lang data/local/lexiconp.txt exp/g2p \ + data/$dir/kwset_${set}/tmp.4 + fi + + cut -f 1 data/local/filtered_lexicon.txt | uconv -f utf8 -t utf8 -x Any-Lower | sort -u | \ + nl | awk '{print $2, $1;}' > data/$dir/kwset_${set}/base_words.txt + paste <(cut -f 1 data/$dir/kwset_${set}/keywords.txt ) \ + <(cut -f 2 data/$dir/kwset_${set}/keywords.txt | \ + uconv -f utf8 -t utf8 -x Any-Lower ) | \ + local/kwords2indices.pl --map-oov 0 data/$dir/kwset_${set}/base_words.txt |\ + perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] BaseOOV=1\n";} + else { print "$F[0] BaseOOV=0\n";}' |\ + cat - data/$dir/kwset_${set}/categories | sort -u |\ + local/search/normalize_categories.pl > data/$dir/kwset_${set}/categories.2 + mv data/$dir/kwset_${set}/categories data/$dir/kwset_${set}/categories.bak + mv data/$dir/kwset_${set}/categories.2 data/$dir/kwset_${set}/categories + + echo >&2 "Kwset $set processed successfully..." + done +fi + +if [ $stage -le 3 ] ; then + for set in ${kwsets[@]} ; do + fsts-union scp:<(sort data/$dir/kwset_${set}/tmp*/keywords.scp) \ + ark,t:"|gzip -c >data/$dir/kwset_${set}/keywords.fsts.gz" + done +fi + + +exit + +if [ $stage -le 4 ] ; then + for set in $kwsets ; do + for it in $(seq 1 4); do + system=exp/sgmm5_mmi_b0.1/decode_fmllr_$dir_it$it + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices \ + data/lang data/$dir $system + done + done +fi + +if [ $stage -le 5 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_$dir + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices \ + data/lang data/$dir $system + done +fi + +if [ $stage -le 6 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_sp/decode_$dir + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices \ + data/lang data/$dir $system + done +fi diff --git a/egs/babel/s5d/local/search/run_syll_search.sh b/egs/babel/s5d/local/search/run_syll_search.sh new file mode 100755 index 00000000000..eb48d836e77 --- /dev/null +++ b/egs/babel/s5d/local/search/run_syll_search.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=2 +dir=dev10h.pem +# End configuration section +. ./conf/common_vars.sh +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./lang.conf + +#Example script how to run keyword search using the Kaldi-native pipeline + + +if [ $stage -le 0 ]; then + local/generate_confusion_matrix.sh --nj 64 --cmd "$decode_cmd" \ + exp/sgmm5_denlats/dengraph/ exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix +fi + +if [ $stage -le 1 ] ; then + local/train_g2p.sh --cmd "$decode_cmd" data/local/lexicon.txt exp/g2p +fi + +dataset=${dir%%.*} +datatype=${dir#*.} + +lang=data/lang.syll +data=data/${dataset}.syll.${datatype} + +set +o nounset +eval kwsets=${!dataset_kwlists[@]} +eval my_ecf_file=\$${dataset}_ecf_file +eval my_rttm_file=\$${dataset}_rttm_file +set -o nounset + +my_array_name=${dataset}_kwlists + +eval kwsets=\( \${!$my_array_name[@]} \) +declare -p kwsets +for set in ${kwsets[@]} ; do + eval my_kwlist=\${$my_array_name[$set]} + declare -p my_kwlist +done +declare -p my_ecf_file +declare -p my_rttm_file + +if [ $stage -le 2 ] ; then + + for set in ${kwsets[@]} ; do + + eval my_kwlist=\${$my_array_name[$set]} + + #This will set up the basic files and converts the F4DE files into Kaldi-native format + local/search/setup.sh $my_ecf_file $my_rttm_file "${my_kwlist}" \ + $data $lang $data/kwset_${set} + + # we will search for the IV words normally (i.e. will look for the specificsequence + # of the words + local/search/compile_keywords.sh --filter "OOV=0&&Characters>2"\ + $data/kwset_${set} $lang $data/kwset_${set}/tmp.2 + + # in addition to the direct search of the IV words, we will set up the proxy + # search as well -- we will use lower nbest, compared to OOV=1 + #-- local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --category "OOV=0" \ + #-- --beam 5 --nbest 10 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + #-- ${data}/kwset_${set} ${lang} ${data}/${set}_oov_kws/tmp/L1.lex \ + #-- ${data}/${set}_oov_kws/tmp/L1.lex ${data}/kwset_${set}/tmp.3 + + local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --filter "OOV=1&&Characters>4"\ + --beam 5 --nbest 100 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + ${data}/kwset_${set} ${lang} data/local/dict.syll/lexiconp.txt exp/g2p \ + ${data}/kwset_${set}/tmp.4 + + # and finally, replace the categories by the word-level categories + cp data/${dir}/kwset_${set}/categories $data/kwset_${set}/categories + done +fi + +if [ $stage -le 3 ] ; then + for set in ${kwsets[@]} ; do + fsts-union scp:<(sort $data/kwset_${set}/tmp*/keywords.scp) \ + ark,t:"|gzip -c >$data/kwset_${set}/keywords.fsts.gz" + done +fi + + +echo "Directories are set up -- running run-4-syll-anydecode.sh will take care of the rest" +exit 0 + +if [ $stage -le 4 ] ; then + for set in $kwsets ; do + for it in $(seq 1 4); do + system=exp/sgmm5_mmi_b0.1/decode_fmllr_$(basename $data)_it$it + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices ${lang} ${data} $system + done + done +fi + +if [ $stage -le 5 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 6 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem_17_8.5 + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 7 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.bg + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 8 ] ; then + for set in $kwsets ; do + system=exp/tri6_nnet/decode_dev10h.syll.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + diff --git a/egs/babel/s5d/local/search/score.sh b/egs/babel/s5d/local/search/score.sh new file mode 100755 index 00000000000..e429b1da030 --- /dev/null +++ b/egs/babel/s5d/local/search/score.sh @@ -0,0 +1,143 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +# case_insensitive=true +extraid= +min_lmwt=8 +max_lmwt=12 +cmd=run.pl +stage=0 +ntrue_from= +# End configuration section. + +help_message="$0: score the kwslist using the F4DE scorer from NIST + Example: + $0 [additional-parameters] + where the most important additional parameters can be: + --extraid #for using, when a non-default kws tasks are setup + (using the kws_setup.sh --extraid) for a kaldi-single data-dir" + +echo $0 $@ +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 3 ]; then + printf "FATAL: incorrect number of variables given to the script\n\n" + printf "$help_message\n" + exit 1; +fi + +set -e -o pipefail + +langdir=$1 +if [ -z $extraid ] ; then + kwsdatadir=$2/kws +else + kwsdatadir=$2/kwset_${extraid} +fi +kwsoutputdir="$3" + +trials=$(cat $kwsdatadir/trials) +mkdir -p $kwsoutputdir/log/ + +if [ $stage -le 0 ] ; then + if [ -z "$ntrue_from" ]; then + for LMWT in $(seq $min_lmwt $max_lmwt) ; do + mkdir -p ${kwsoutputdir}_$LMWT/details/ + mkdir -p ${kwsoutputdir}_$LMWT/scoring/ + + # as we need to sweep through different ntrue-scales we will + # we will do it in one parallel command -- it will be more effective + # than sweeping in a loop and for all lmwts in parallel (as usuallyu + # there will be just a couple of different lmwts, but the ntrue-scale + # has a larger dynamic range + $cmd NTRUE=1:21 $kwsoutputdir/log/score.${LMWT}.NTRUE.log \ + ntrue=\$\(perl -e 'print 1+(NTRUE-1)/5.0' \) '&&' \ + cat ${kwsoutputdir}_$LMWT/results \|\ + local/search/normalize_results_kst.pl --trials $trials --ntrue-scale \$ntrue \|\ + local/search/filter_kws_results.pl --probs --nbest 200 \|\ + compute-atwv $trials ark,t:$kwsdatadir/hitlist ark:- \ + \> ${kwsoutputdir}_$LMWT/scoring/score.NTRUE.txt + + ntrue=$(grep ATWV ${kwsoutputdir}_$LMWT/scoring/score.*.txt | \ + sort -k2,2nr -t '=' | head -n 1 | \ + sed 's/.*score\.\([0-9][0-9]*\)\.txt.*/\1/g') + #The calculation of ntrue must be the same as in the command above + echo "$ntrue" > ${kwsoutputdir}_$LMWT/details/ntrue_raw + ntrue=$(perl -e "print 1+($ntrue-1)/5.0") + echo "$ntrue" > ${kwsoutputdir}_$LMWT/details/ntrue + done + else + for LMWT in $(seq $min_lmwt $max_lmwt) ; do + mkdir -p ${kwsoutputdir}_$LMWT/details/ + mkdir -p ${kwsoutputdir}_$LMWT/scoring/ + + cp ${ntrue_from}_${LMWT}/details/ntrue ${kwsoutputdir}_${LMWT}/details/ntrue + [ -f ${ntrue_from}_${LMWT}/details/ntrue_raw ] && \ + cp ${ntrue_from}_${LMWT}/details/ntrue_raw ${kwsoutputdir}_${LMWT}/details/ntrue_raw + echo "$ntrue_from" > ${kwsoutputdir}_${LMWT}/details/ntrue_from + done + fi +fi + +if [ $stage -le 1 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/normalize.LMWT.log \ + cat ${kwsoutputdir}_LMWT/results \|\ + local/search/normalize_results_kst.pl --trials $trials --ntrue-scale \$\(cat ${kwsoutputdir}_LMWT/details/ntrue\)\ + \> ${kwsoutputdir}_LMWT/details/results + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/score.final.LMWT.log \ + cat ${kwsoutputdir}_LMWT/details/results \|\ + compute-atwv $trials ark,t:$kwsdatadir/hitlist ark:- \ + ${kwsoutputdir}_LMWT/details/alignment.csv \> ${kwsoutputdir}_LMWT/details/score.txt '&&' \ + cp ${kwsoutputdir}_LMWT/details/score.txt ${kwsoutputdir}_LMWT/score.txt + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/per-category-stats.LMWT.log \ + cat ${kwsoutputdir}_LMWT/details/alignment.csv \|\ + perl local/search/per_category_stats.pl --sweep-step 0.005 $trials \ + $kwsdatadir/categories \> ${kwsoutputdir}_LMWT/details/per-category-score.txt +fi + +if [ $stage -le 2 ]; then +if [ -f $kwsdatadir/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $kwsdatadir/f4de_attribs #override the previous variables + + ecf=$kwsdatadir/ecf.xml + rttm=$kwsdatadir/rttm + kwlist=$kwsdatadir/kwlist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_prepare.LMWT.log \ + mkdir -p ${kwsoutputdir}_LMWT/f4de/ '&&' cat $kwlist \| \ + local/search/annotate_kwlist.pl $kwsdatadir/categories \> ${kwsoutputdir}_LMWT/f4de/kwlist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_write_kwslist.LMWT.log \ + cat ${kwsoutputdir}_LMWT/details/results \| \ + utils/int2sym.pl -f 2 $kwsdatadir/utt.map \| \ + local/search/utt_to_files.pl --flen $flen $kwsdatadir/../segments \|\ + local/search/write_kwslist.pl --flen $flen --language $language \ + --kwlist-id $kwlist_name \> ${kwsoutputdir}_LMWT/f4de/kwslist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_score.LMWT.log \ + KWSEval -e $ecf -r $rttm -t ${kwsoutputdir}_LMWT/f4de/kwlist.xml -a \ + --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -O -B -q 'OOV:regex=.*' -q 'BaseOOV:regex=.*' \ + -s ${kwsoutputdir}_LMWT/f4de/kwslist.xml -c -o -b -d -f ${kwsoutputdir}_LMWT/f4de/ + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_report.LMWT.log \ + local/kws_oracle_threshold.pl --duration $trials \ + ${kwsoutputdir}_LMWT/f4de/alignment.csv \> ${kwsoutputdir}_LMWT/f4de/metrics.txt +fi +fi + +echo "$0: Done" +exit 0; + + diff --git a/egs/babel/s5d/local/search/search.sh b/egs/babel/s5d/local/search/search.sh new file mode 100755 index 00000000000..200a49d8e86 --- /dev/null +++ b/egs/babel/s5d/local/search/search.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# License: Apache 2.0 + + +help_message="$(basename $0): do keyword indexing and search. data-dir is assumed to have + kws/ subdirectory that specifies the terms to search for. Output is in + decode-dir/kws/ + Usage: + $(basename $0) " + +# Begin configuration section. +min_lmwt=8 +max_lmwt=12 +cmd=run.pl +model= +skip_scoring=false +skip_optimization=false # true can speed it up if #keywords is small. +max_states=350000 +indices_dir= +kwsout_dir= +stage=0 +word_ins_penalty=0 +extraid= +silence_word= # specify this if you did to in kws_setup.sh, it's more accurate. +strict=false +duptime=0.6 +ntrue_scale=1.0 +nbest=-1 +max_silence_frames=50 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e +set -o pipefail + + +if [[ "$#" -ne "3" ]] ; then + echo -e "$0: FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +silence_opt= + +langdir=$1 +datadir=$2 +decodedir=$3 + +if [ -z $extraid ] ; then + kwsdatadir=$datadir/kws +else + kwsdatadir=$datadir/kwset_${extraid} +fi + +if [ -z $extraid ] ; then + kwsoutdir=$decodedir/kws +else + kwsoutdir=$decodedir/kwset_${extraid} +fi + + +if [ -z $indices_dir ]; then + indices_dir=$kwsoutdir +fi + +if [ ! -z "$model" ]; then + model_flags="--model $model" +else + model_flags= +fi + +mkdir -p $kwsoutdir +for d in "$datadir" "$kwsdatadir" "$langdir" "$decodedir"; do + if [ ! -d "$d" ]; then + echo "$0: FATAL: expected directory $d to exist" + exit 1; + fi +done + +echo "$0: Searching: $kwsdatadir" +duration=$(cat $kwsdatadir/trials) +echo "$0: Duration: $duration" + + +frame_subsampling_factor=1 +if [ -f $decodedir/../frame_subsampling_factor ] ; then + frame_subsampling_factor=$(cat $decodedir/../frame_subsampling_factor) + echo "$0: Frame subsampling factor autodetected: $frame_subsampling_factor" +elif [ -f $decodedir/../../frame_subsampling_factor ] ; then + frame_subsampling_factor=$(cat $decodedir/../../frame_subsampling_factor) + echo "$0: Frame subsampling factor autodetected: $frame_subsampling_factor" +fi + +if [ $stage -le 0 ] ; then + if [ ! -f $indices_dir/.done.index ] ; then + [ ! -d $indices_dir ] && mkdir $indices_dir + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + indices=${indices_dir}_$lmwt + mkdir -p $indices + + acwt=$(perl -e "print 1.0/$lmwt") + [ ! -z $silence_word ] && silence_opt="--silence-word $silence_word" + steps/make_index.sh $silence_opt --cmd "$cmd" --acwt $acwt $model_flags\ + --skip-optimization $skip_optimization --max-states $max_states \ + --word-ins-penalty $word_ins_penalty --max-silence-frames $max_silence_frames\ + --frame-subsampling-factor ${frame_subsampling_factor} \ + $kwsdatadir $langdir $decodedir $indices || exit 1 + done + touch $indices_dir/.done.index + else + echo "$0: Assuming indexing has been aready done. If you really need to re-run " + echo "$0: the indexing again, delete the file $indices_dir/.done.index" + fi +fi + +keywords=$kwsdatadir/keywords.fsts +if [ -f $keywords ] ; then + echo "$0: Using ${keywords} for search" + keywords="ark:$keywords" +elif [ -f ${keywords}.gz ] ; then + echo "$0: Using ${keywords}.gz for search" + keywords="ark:gunzip -c ${keywords}.gz |" +else + echo "$0: The keyword file ${keywords}[.gz] does not exist" +fi + + +if [ $stage -le 1 ]; then + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + kwsoutput=${kwsoutdir}_$lmwt + indices=${indices_dir}_$lmwt + nj=$(cat $indices/num_jobs) + + + for f in $indices/index.1.gz ; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1; + done + + mkdir -p $kwsoutput/log + $cmd JOB=1:$nj $kwsoutput/log/search.JOB.log \ + set -e -o pipefail '&&' \ + kws-search --strict=$strict --negative-tolerance=-1 \ + --frame-subsampling-factor=${frame_subsampling_factor} \ + "ark:gzip -cdf $indices/index.JOB.gz|" "$keywords" \ + "ark,t:| sort -u | gzip -c > $kwsoutput/result.JOB.gz" \ + "ark,t:| sort -u | gzip -c > $kwsoutput/stats.JOB.gz" || exit 1; + done +fi + +if [ $stage -le 2 ]; then + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + kwsoutput=${kwsoutdir}_$lmwt + indices=${indices_dir}_$lmwt + nj=$(cat $indices/num_jobs) + + # This is a memory-efficient way how to do the filtration + # we do this in this way because the result.* files can be fairly big + # and we do not want to run into troubles with memory + files="" + for job in $(seq 1 $nj); do + if [ -f $kwsoutput/result.${job}.gz ] ; then + files="$files <(gunzip -c $kwsoutput/result.${job}.gz)" + elif [ -f $kwsoutput/result.${job} ] ; then + files="$files $kwsoutput/result.${job}" + else + echo >&2 "The file $kwsoutput/result.${job}[.gz] does not exist" + exit 1 + fi + done + # we have to call it using eval as we need the bash to interpret + # the (possible) command substitution in case of gz files + # bash -c would probably work as well, but would spawn another + # shell instance + eval "sort -m -u $files" |\ + local/search/filter_kws_results.pl --likes --nbest $nbest > $kwsoutput/results || exit 1 + done +fi + +if [ -z $extraid ] ; then + extraid_flags= +else + extraid_flags=" --extraid ""$extraid"" " +fi + +if [ $stage -le 4 ]; then + if $skip_scoring ; then + echo "$0: Not scoring, because --skip-scoring true was issued" + elif [ ! -x local/kws_score.sh ] ; then + echo "$0: Not scoring, because the file local/kws_score.sh is not present" + else + echo "$0: Scoring KWS results" + local/search/score.sh --cmd "$decode_cmd" \ + --min-lmwt $min_lmwt --max-lmwt $max_lmwt $extraid_flags \ + $langdir $datadir ${kwsoutdir} || exit 1; + fi +fi + +echo "$0: Done" +exit 0 + diff --git a/egs/babel/s5d/local/search/setup.sh b/egs/babel/s5d/local/search/setup.sh new file mode 100755 index 00000000000..d4e2013a443 --- /dev/null +++ b/egs/babel/s5d/local/search/setup.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +flen=0.01 +icu_transform="Any-Lower" +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +if [ $# -eq 6 ]; then + ecf=$1 + rttm=$2 + kwlist=$3 + data=$4 + lang=$5 + output=$6 +elif [ $# -eq 5 ]; then + ecf=$1 + rttm="" + kwlist=$2 + data=$3 + lang=$4 + output=$5 +else + echo >&2 "Incorrect number of script parameters!" +fi + +mkdir -p $output +for f in $ecf $kwlist; do + [ ! -f $f ] && echo "Mandatory file \"$f\" does not exist." +done + + +# The first way how to compute the duration produced numbers significantly +# dufferent from the numbers reported by F4DE. I'm leaving it here to document +# the fact that the signal_duration field is not the same number as the sum +# of the individual durations (dur field in each ) +#duration=`head -n 1 $ecf | sed 's/.*signal_duration=\"\([0-9.][0-9.]*\)\".*/\1/g'` +#duration=`echo print $duration/2.0 | perl` + +duration=$(cat $ecf | perl -ne 'BEGIN{$dur=0;}{next unless $_ =~ /dur\=/; s/.*dur="([^"]*)".*/$1/; $dur+=$_;}END{print $dur/2}') + +echo $duration > $output/trials +echo $flen > $output/frame_length + +echo "Number of trials: `cat $output/trials`" +echo "Frame lengths: `cat $output/frame_length`" + +echo "Generating map files" +cat $data/segments | awk 'BEGIN{i=1}; {print $1, i; i+=1;}' > $output/utt.map +cat $data/wav.scp | awk 'BEGIN{i=1}; {print $1, i; i+=1;}' > $output/wav.map + +#This does not work cp --no-preserve=all $ecf $output/ecf.xml +cat $ecf > $output/ecf.xml +cat $kwlist > $output/kwlist.xml +[ ! -z "$rttm" ] && cat $rttm > $output/rttm + +{ + echo "kwlist_name=`basename $kwlist`" + language=$(grep kwlist $kwlist | head -n 1 | sed -E 's/.*language="([^"]*)".*/\1/g') + echo "language=$language" + echo "flen=$flen" +} > $output/f4de_attribs + +cat ${kwlist} | \ + perl -ne '{ + chomp; + next unless (m// || m/kwid/); + if ($_ =~ m//) { + s/.*(.*)<\/kwtext>.*/$1/g; + die "Undefined format of the kwlist file!" unless defined $kwid; + print $kwid . "\t" . $_ . "\n"; } + else { + s/.*kwid="(.*)".*/$1/g; $kwid=$_;}; + }' > $output/keywords.txt + + +command -v uconv >/dev/null 2>&1 || { + echo >&2 "I require uconv but it's not installed. Use $KALDI_ROOT/tools/extras/install_icu.sh to install it (or use the system packager)"; + exit 1; +} + +if [ -z "$icu_transform" ]; then + cp $lang/words.txt $output/words.txt +else + uconv -f utf8 -t utf8 -x "${icu_transform}" -o $output/words.txt $lang/words.txt +fi + +if [ -z "$icu_transform" ]; then + cat $output/keywords.txt +else + paste <(cut -f 1 $output/keywords.txt ) \ + <(cut -f 2 $output/keywords.txt | \ + uconv -f utf8 -t utf8 -x "${icu_transform}" ) +fi | local/kwords2indices.pl --map-oov 0 $output/words.txt |\ + sort -u > $output/keywords.int + + +echo "Generating categories" +{ + local/search/create_categories.pl $output/keywords.txt + cat $output/keywords.int | perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] OOV=1\n";} + else { print "$F[0] OOV=0\n";}' +} | local/search/normalize_categories.pl > $output/categories + +if [ ! -z "$rttm" ] && [ -f $rttm ] ; then + local/search/rttm_to_hitlists.sh --segments $data/segments --utt-table $output/utt.map\ + $rttm $kwlist $ecf $output/tmp $output/hitlist +else + echo "Not generating hitlist, scoring won't be possible" +fi +echo "Done" + + diff --git a/egs/babel/s5d/local/search/utt_to_files.pl b/egs/babel/s5d/local/search/utt_to_files.pl new file mode 100755 index 00000000000..ad5da8a50bf --- /dev/null +++ b/egs/babel/s5d/local/search/utt_to_files.pl @@ -0,0 +1,62 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +# Converts the kws result with utterances into whole file offsets +use strict; +use warnings; +use utf8; +use Data::Dumper; +use Getopt::Long; + +my $flen = 0.01; + +GetOptions ("flen=f" => \$flen) or die "$0: Cannot parse command-line options\n"; + +my $segments=$ARGV[0]; +my %SEGMENTS; + +open(SEG, $segments) or die "Cannot open segment file $segments"; +while(my $line = ) { + chomp $line; + my @entries = split(" ", $line); + die "The format of line \"$line\" does not conform the the segments file format" if @entries ne 4; + + $SEGMENTS{$entries[0]} = \@entries; +} + + +while (my $line = ) { + chomp $line; + my @entries = split(" ", $line); + die "The format of line \"$line\" does not conform the result.* file format" if @entries ne 5; + + my $kw = $entries[0]; + my $utt = $entries[1]; + my $start = $entries[2]; + my $end = $entries[3]; + my $score = $entries[4]; + + die "The utterance $utt is not in the segments file" unless exists $SEGMENTS{$utt}; + my $file = $SEGMENTS{$utt}->[1]; + my $utt_start = int( 0.5 + $SEGMENTS{$utt}->[2] / $flen); + my $utt_end = int(0.5 + $SEGMENTS{$utt}->[3] / $flen); + + $start += $utt_start; + $end += $utt_start; + print "$kw $file $start $end $score\n"; +} diff --git a/egs/babel/s5d/local/search/write_kwslist.pl b/egs/babel/s5d/local/search/write_kwslist.pl new file mode 100755 index 00000000000..ade87212829 --- /dev/null +++ b/egs/babel/s5d/local/search/write_kwslist.pl @@ -0,0 +1,134 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < kwslist.xml + +Allowed options: + --flen : duration (in seconds) of audio/feature frame + --language : language (string, default "") + --kwlist-id : kwlist.xml name (string, default "") + --system-id : name of the system (string, default "") + --digits : how many digits should the scores be rounded to? + (int, default 2). Sometimes F4DE gets extremely slow + when the scores have too many digits (perhaps some sweping + issue). This switch can be used to prevent it. +EOU + +use strict; +use warnings; +use utf8; + +use POSIX; +use Data::Dumper; +use Getopt::Long; + +my $flen = 0.01; +my $language=""; +my $kwlist_filename=""; +my $system_id=""; +my $digits = 2; + +GetOptions("flen=f" => \$flen, + "language=s" => \$language, + "kwlist-id=s" => \$kwlist_filename, + "system-id=s" => \$system_id, + "digits=i" => \$digits) or do { + print STDERR "Cannot parse the command-line options.\n"; + print STDERR "$Usage\n"; + die "Cannot continue.\n"; +}; + +if (@ARGV != 0) { + print STDERR "Incorrect number of command-line arguments\n"; + print STDERR "$Usage\n"; + die "Cannot continue.\n"; +} + +sub KwsOutputSort { + my $a = shift @_; + my $b = shift @_; + + if ($a->[4] != $b->[4]) { + #score + return $b->[4] <=> $a->[4]; + } elsif ($a->[1] ne $b->[1]) { + return $a->[1] cmp $b->[1]; + } else { + return $a->[2] <=> $b->[2]; + } +} + +sub PrettyPrint { + my @instances = sort {KwsOutputSort($a, $b)} @{shift @_}; + + return if @instances <= 0; + my $kwid=$instances[0]->[0]; + + print " \n"; + foreach my $elem(@instances) { + (my $kwidx, my $file, my $start, my $end, my $score) = @{$elem}; + my $filename="file=\"$file\""; + + # this is because the decision has to be done on the already + # rounded number (otherwise it can confuse F4DE. + # It's because we do the decision based on the non-rounded score + # but F4DE will see only the rounded score, so the decision + # won't be correctly aligned with the score (especially, for + # some numbers with score 0.5 the decision will be "YES" and for + # other with the same score, the decision will be "NO" + $score = sprintf "%.${digits}f", $score; + my $decision=$score >= 0.5 ? "decision=\"YES\"" : "decision=\"NO\""; + my $tbeg = $start * $flen; + my $dur = $end * $flen - $tbeg; + + $tbeg=sprintf "tbeg=\"%.${digits}f\"", $tbeg; + $dur=sprintf "dur=\"%.${digits}f\"", $dur; + $score=sprintf "score=\"%.${digits}f\"", $score; + my $channel="channel=\"1\""; + + print " \n"; + } + print " \n"; +} + +my $KWID=""; +my @putative_hits; + +print "\n"; + +while (my $line = ) { + chomp $line; + (my $kwid, my $file, my $start, my $end, my $score) = split " ", $line; + + if ($kwid ne $KWID) { + PrettyPrint(\@putative_hits) if $KWID; + $KWID=$kwid; + @putative_hits = (); + } + + push @putative_hits, [$kwid, $file, $start, $end, $score]; + +} +PrettyPrint(\@putative_hits) if $KWID; + +print "\n" diff --git a/egs/babel/s5d/local/search_index.sh b/egs/babel/s5d/local/search_index.sh new file mode 100755 index 00000000000..9e7cdb77f3d --- /dev/null +++ b/egs/babel/s5d/local/search_index.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0 + +# Begin configuration section. +cmd=run.pl +nbest=-1 +strict=true +indices_dir= +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 2 ]; then + echo "Usage: steps/search_index.sh [options] " + echo " e.g.: steps/search_index.sh data/kws exp/sgmm2_5a_mmi/decode/kws/" + echo "" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --nbest # return n best results. (-1 means all)" + echo " --indices-dir # where the indices should be stored, by default it will be in " + exit 1; +fi + + +kwsdatadir=$1; +kwsdir=$2; + +if [ -z $indices_dir ] ; then + indices_dir=$kwsdir +fi + +mkdir -p $kwsdir/log; +nj=`cat $indices_dir/num_jobs` || exit 1; +keywords=$kwsdatadir/keywords.fsts; + +for f in $indices_dir/index.1.gz $keywords; do + [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1; +done + +$cmd JOB=1:$nj $kwsdir/log/search.JOB.log \ + kws-search --strict=$strict --negative-tolerance=-1 \ + "ark:gzip -cdf $indices_dir/index.JOB.gz|" ark:$keywords \ + "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id | sort -u | gzip > $kwsdir/result.JOB.gz" \ + "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id | sort -u | gzip > $kwsdir/stats.JOB.gz" || exit 1; + +exit 0; diff --git a/egs/babel/s5d/local/setup_categories.sh b/egs/babel/s5d/local/setup_categories.sh new file mode 100644 index 00000000000..ffc65173786 --- /dev/null +++ b/egs/babel/s5d/local/setup_categories.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +set=kwlist +output=data/dev10h.pem/kwset_${set}/ + +{ + local/search/create_categories.pl $output/keywords.txt + cat $output/keywords.int | perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] OOV=1\n";} + else { print "$F[0] OOV=0\n";}' +} | local/search/normalize_categories.pl > $output/categories +cut -f 1 data/local/filtered_lexicon.txt | uconv -f utf8 -t utf8 -x Any-Lower | sort -u | \ + nl | awk '{print $2, $1;}' > data/dev10h.pem/kwset_${set}/base_words.txt + paste <(cut -f 1 data/dev10h.pem/kwset_${set}/keywords.txt ) \ + <(cut -f 2 data/dev10h.pem/kwset_${set}/keywords.txt | \ + uconv -f utf8 -t utf8 -x Any-Lower ) | \ + local/kwords2indices.pl --map-oov 0 data/dev10h.pem/kwset_${set}/base_words.txt |\ + perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] BaseOOV=1\n";} + else { print "$F[0] BaseOOV=0\n";}' |\ + cat - data/dev10h.pem/kwset_${set}/categories | sort -u |\ + local/search/normalize_categories.pl > data/dev10h.pem/kwset_${set}/categories.2 + mv data/dev10h.pem/kwset_${set}/categories data/dev10h.pem/kwset_${set}/categories.bak + mv data/dev10h.pem/kwset_${set}/categories.2 data/dev10h.pem/kwset_${set}/categories + +cp data/dev10h.pem/kwset_kwlist/categories data/dev10h.phn.pem/kwset_kwlist/categories +cp data/dev10h.pem/kwset_kwlist/categories data/dev10h.syll.pem/kwset_kwlist/categories +find exp/ -name ".done.kwset.kwlist" | xargs rm + diff --git a/egs/babel/s5d/local/shadow_set_kws_search.sh b/egs/babel/s5d/local/shadow_set_kws_search.sh new file mode 100755 index 00000000000..a67a3a57f6a --- /dev/null +++ b/egs/babel/s5d/local/shadow_set_kws_search.sh @@ -0,0 +1,265 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +#Fail at any unhandled non-zero error code +set -e +set -o pipefail + +help_message="$0: create subset of the input directory (specified as the first directory). + The subset is specified by the second parameter. + The directory in which the subset should be created is the third parameter + Example: + $0 [data-dir2 [data-dir3 [ ...] ]" + +# Begin configuration section. +#acwt=0.0909091 +min_lmwt=7 +max_lmwt=17 +duptime=0.6 +cmd=run.pl +model= +skip_scoring=false +stage=0 +strict=true +skip_optimization=false +max_states=150000 +word_ins_penalty=0 +index_only=false +ntrue_scale=0.1 +# End configuration section. + +echo "$0 $@" # Print the command line for logging +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [[ "$#" -le "2" ]] ; then + echo -e "FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + + +datadir=$1 +langdir=$2 +decodedir=$3 +shift; shift; shift; +datasetA=$1 +datasetB=$2 + + +if [[ ! -d "$langdir" ]] ; then + echo "FATAL: the lang directory does not exist" + exit 1; +fi +if [[ ! -d "$decodedir" ]] ; then + echo "FATAL: the directory with decoded files does not exist" + exit 1; +fi + +for splitdatadir in $@ ; do + kwsdatadir=$splitdatadir/kws + if [ ! -d "$splitdatadir" ] ; then + echo "FATAL: the data directory $splitdatadir does not exist" + exit 1; + fi + if [ ! -d "$kwsdatadir" ] ; then + echo "FATAL: the data directory $kwsdatadir does not exist" + exit 1; + fi + if [ ! -f "$kwsdatadir/ecf.xml" ] ; then + echo "FATAL: the $kwsdatadir does not contain the ecf.xml file" + exit 1; + fi +done + +kwsdatadir=$datadir/kws + +! durationA=`head -1 $datasetA/kws/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` && + echo "Error getting duration from $datasetA/kws/ecf.xml" && exit 1; + + +! durationB=`head -1 $datasetB/kws/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` && + echo "Error getting duration from $datasetB/kws/ecf.xml" && exit 1; + +[ -z $durationA ] && echo "Error getting duration from $datasetA/kws/ecf.xml" && exit 1; +[ -z $durationB ] && echo "Error getting duration from $datasetB/kws/ecf.xml" && exit 1; + +if [ ! -z "$model" ]; then + model_flags="--model $model" +fi + +mkdir -p $decodedir/kws/ +if [ $stage -le 0 ] ; then + echo "Making KWS indices..." + if [ ! -f $decodedir/kws/.done.index ] ; then + for lmwt in `seq $min_lmwt $max_lmwt` ; do + kwsoutdir=$decodedir/kws_$lmwt + mkdir -p $kwsoutdir + + acwt=`perl -e "print (1.0/$lmwt);"` + steps/make_index.sh --strict $strict --cmd "$cmd" --max-states $max_states\ + --acwt $acwt $model_flags --skip-optimization $skip_optimization \ + --word_ins_penalty $word_ins_penalty \ + $kwsdatadir $langdir $decodedir $kwsoutdir || exit 1 + done + touch $decodedir/kws/.done.index + else + echo "Assuming indexing has been aready done. If you really need to re-run " + echo "the indexing again, delete the file $decodedir/kws/.done.index" + fi +fi + +if $index_only ; then + echo "Indexing only was requested, existing now..." + exit 0 +fi + +if [ $stage -le 1 ] ; then + echo "Searching KWS indices..." + for lmwt in `seq $min_lmwt $max_lmwt` ; do + kwsoutdir=$decodedir/kws_$lmwt + dirA=$decodedir/`basename $datasetA`/kws_$lmwt + dirB=$decodedir/`basename $datasetB`/kws_$lmwt + mkdir -p $dirA + mkdir -p $dirB + + steps/search_index.sh --cmd "$cmd" $kwsdatadir $kwsoutdir || exit 1 + + [ ! -f $datasetA/kws/utter_id ] && echo "File $datasetA/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-A " | \ + sed 's/^\(KW.*\)-A /\1 /g' > $dirA/results + + [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetB/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-B " | \ + sed 's/^\(KW.*\)-B /\1 /g' > $dirB/results + + + dirA=$decodedir/`basename $datasetA`_`basename $datasetB`/kws_$lmwt + dirB=$decodedir/`basename $datasetB`_`basename $datasetA`/kws_$lmwt + mkdir -p $dirA + mkdir -p $dirB + [ ! -f $datasetA/kws/utter_id ] && echo "File $datasetA/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-B " | \ + sed 's/^\(KW.*\)-B /\1 /g' > $dirA/results + + [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetB/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-A " | \ + sed 's/^\(KW.*\)-A /\1 /g' > $dirB/results + done +fi + +rootdirA=$decodedir/`basename $datasetA` +rootdirB=$decodedir/`basename $datasetB` +rootdirAB=$decodedir/`basename $datasetA`_`basename $datasetB` +rootdirBA=$decodedir/`basename $datasetB`_`basename $datasetA` + + +echo "Processing $datasetA" +if [ $stage -le 2 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirA/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=true --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map --digits=3 - $rootdirA/kws_LMWT/kwslist.xml || exit 1 + + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirAB/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=true --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map --digits=3 - $rootdirAB/kws_LMWT/kwslist.xml || exit 1 +fi + +if [ $stage -le 3 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirA/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirA/kws_LMWT/kwslist.unnormalized.xml || exit 1 + + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirAB/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirAB/kws_LMWT/kwslist.unnormalized.xml || exit 1 +fi + +echo "Scoring $datasetA" +if [ $stage -le 4 ] ; then + if [[ (! -x local/kws_score.sh ) || ($skip_scoring == true) ]] ; then + echo "Not scoring, because the file local/kws_score.sh is not present" + exit 1 + elif [ ! -f $datasetA/kws/rttm ] ; then + echo "Not scoring, because the file $datasetA/kws/rttm is not present" + else + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_scoring.LMWT.log \ + local/kws_score.sh $datasetA $rootdirA/kws_LMWT + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_scoring.LMWT.log \ + local/kws_score.sh --kwlist $datasetB/kws/kwlist.xml $datasetA $rootdirAB/kws_LMWT + fi +fi + +echo "Processing $datasetB" +if [ $stage -le 5 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirB/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirB/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=true --digits=3 --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirB/kws_LMWT/kwslist.xml || exit 1 + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirBA/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirBA/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=true --digits=3 --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirBA/kws_LMWT/kwslist.xml || exit 1 +fi + +if [ $stage -le 6 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirB/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirB/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirB/kws_LMWT/kwslist.unnormalized.xml || exit 1 + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirBA/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirBA/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirBA/kws_LMWT/kwslist.unnormalized.xml || exit 1 +fi + +echo "Scoring $datasetB" +if [ $stage -le 7 ] ; then + if [[ (! -x local/kws_score.sh ) || ($skip_scoring == true) ]] ; then + echo "Not scoring, because the file local/kws_score.sh is not present" + elif [ ! -f $datasetB/kws/rttm ] ; then + echo "Not scoring, because the file $datasetB/kws/rttm is not present" + else + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirB/kws/kws_scoring.LMWT.log \ + local/kws_score.sh $datasetB $rootdirB/kws_LMWT || exit 1 + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirBA/kws/kws_scoring.LMWT.log \ + local/kws_score.sh --kwlist $datasetA/kws/kwlist.xml $datasetB $rootdirBA/kws_LMWT || exit 1 + fi +fi + +echo "Done, everything seems fine" +exit 0 diff --git a/egs/babel/s5d/local/show_lattice.sh b/egs/babel/s5d/local/show_lattice.sh new file mode 100755 index 00000000000..f18132234ee --- /dev/null +++ b/egs/babel/s5d/local/show_lattice.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +. path.sh + +format=pdf # pdf svg +output= + +. utils/parse_options.sh + +if [ $# != 3 ]; then + echo "usage: $0 [--format pdf|svg] [--output ] " + echo "e.g.: $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt" + exit 1; +fi + +uttid=$1 +lat=$2 +words=$3 + +tmpdir=$(mktemp -d /tmp/kaldi.XXXX); trap "rm -r $tmpdir" EXIT # cleanup + +gunzip -c $lat | lattice-to-fst ark:- ark,scp:$tmpdir/fst.ark,$tmpdir/fst.scp || exit 1 +! grep "^$uttid " $tmpdir/fst.scp && echo "ERROR : Missing utterance '$uttid' from gzipped lattice ark '$lat'" && exit 1 +fstcopy "scp:grep '^$uttid ' $tmpdir/fst.scp |" "scp:echo $uttid $tmpdir/$uttid.fst |" || exit 1 +fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format} + +if [ ! -z $output ]; then + cp $tmpdir/$uttid.${format} $output +fi + +[ $format == "pdf" ] && evince $tmpdir/$uttid.pdf +[ $format == "svg" ] && eog $tmpdir/$uttid.svg + +exit 0 diff --git a/egs/babel/s5d/local/split_ctms.sh b/egs/babel/s5d/local/split_ctms.sh new file mode 100755 index 00000000000..b24a1380111 --- /dev/null +++ b/egs/babel/s5d/local/split_ctms.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +# begin configuration section. +min_lmwt=7 +max_lmwt=17 +stage=0 +cer=0 +ctm_name= +cmd=run.pl +#end configuration section. + +echo "$0 $@" + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +set -e +set -o pipefail + +data=$1; +q=$2; +shift; shift; + +if [ -z $ctm_name ] ; then + ctm_name=`basename $data`; +fi + +name=$ctm_name + +for i in $@ ; do + p=$q/`basename $i` + [ ! -f $i/reco2file_and_channel ] && "The file reco2file_and_channel not present in the $i directory!" && exit 1 + for lmw in $q/score_* ; do + test -d $lmw || exit 1; #this is to protect us before creating directory "score_*" in cases no real score_[something] directory exists + d=$p/`basename $lmw` + mkdir -p $d + + [ ! -f $lmw/$name.ctm ] && echo "File $lmw/$name.ctm does not exist!" && exit 1 + utils/filter_scp.pl <(cut -f 1 -d ' ' $i/reco2file_and_channel) $lmw/$name.ctm > $d/`basename $i`.ctm + done + + if [ -f $i/stm ] ; then + local/score_stm.sh --min-lmwt $min_lmwt --max-lmwt $max_lmwt --cer $cer --cmd "$cmd" $i data/lang $p + else + echo "Not running scoring, file $i/stm does not exist" + fi + +done +exit 0 + diff --git a/egs/babel/s5d/local/stm2text.pl b/egs/babel/s5d/local/stm2text.pl new file mode 100755 index 00000000000..3b069c63554 --- /dev/null +++ b/egs/babel/s5d/local/stm2text.pl @@ -0,0 +1,43 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +#This script takes the source STM file and generates the *.txt files which +#are usually part of the BABEL delivery +#The *.txt files are not the part of the delivery for the evalpart1 subset +#The program works as a filter and the only parameter it expects is +#the path to the output directory +#The filenames are figured out from the STM file +#example of usage: +# cat data/evalpart1/stm local/stm2text.pl data/raw_evalpart1_data/transcriptions + +use strict; +use warnings; + +use utf8; +use Data::Dumper; + +binmode(STDIN, ":encoding(utf8)"); +binmode(STDOUT, ":encoding(utf8)"); + +my $output_dir = $ARGV[0]; +my $prev_filename = ""; +my $OUTPUT; +while ( ) { + chop; + my ($filename, $channel, $speaker, $start, $end, $text) = split(" ", $_, 6); + next if ( $filename =~ /;;.*/ ); + #$filename =~ s/;;(.*)/$1/ if ( $filename =~ /;;.*/ ); + $text = "" if not $text; + + if ( $prev_filename ne $filename ) { + #close($OUTPUT) if ( tell(FH) != -1 ); + print "$output_dir/$filename.txt\n"; + open($OUTPUT, ">:encoding(UTF-8)", "$output_dir/$filename.txt") or die $!; + $prev_filename = $filename; + } + + print $OUTPUT "[$start]\n"; + print $OUTPUT "$text\n"; +} diff --git a/egs/babel/s5d/local/subset_atwv.pl b/egs/babel/s5d/local/subset_atwv.pl new file mode 100755 index 00000000000..ce6b7043116 --- /dev/null +++ b/egs/babel/s5d/local/subset_atwv.pl @@ -0,0 +1,120 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + e.g.: subset_atwv.pl keywords.list bsum.txt + +This script will compute the ATWV for a subset of the original keywords in bsum.txt. +Note that bsum.txt is a file generated by the NIST scoring tool F4DE. keywords.list +is a list of the keywords that you want to compute the ATWV for. For example: +KW101-0001 +KW101-0002 +... + +Allowed options: + --subset-name : Name of the subset (string, default = "") + --width : Width of the printed numbers (int, default = 5 ) +EOU + +my $subset_name = ""; +my $width = 5; +GetOptions('subset-name=s' => \$subset_name, + 'width=i' => \$width); + +@ARGV == 2 || die $Usage; + +# Workout the input/output source +my $kws_filename = shift @ARGV; +my $bsum_filename = shift @ARGV; + +my $source = "STDIN"; +if ($kws_filename ne "-") { + open(KWS, "<$kws_filename") || die "Fail to open keywords file: $kws_filename\n"; + $source = "KWS"; +} +open(BSUM, "<$bsum_filename") || die "Fail to open bsum file: $bsum_filename\n"; + +# Read in the keywords. +my $kws = ""; +while (<$source>) { + chomp; + my @col = split(); + @col == 1 || die "Bad line $_\n"; + if ($kws eq "") { + $kws = $col[0]; + } else { + $kws .= "|$col[0]"; + } +} + +# Process bsum.txt +my $targ_sum = 0; +my $corr_sum = 0; +my $fa_sum = 0; +my $miss_sum = 0; +my $twv_sum = 0; +my $count = 0; +my $subset_count = 0; +my $flag = 0; +if ($kws ne "") { + while () { + chomp; + # Workout the total keywords that have occurrence in the search collection + if (/^Summary Totals/) {$flag = 0;} + if (/^Keyword/) {$flag = 1;} + my @col; + if ($flag == 1) { + # Figure out keywords that don't have occurrences in the search collection + @col = split(/\|/, $_); + $col[2] =~ s/^\s+//; + $col[2] =~ s/\s+$//; + $col[2] ne "" || next; + $count ++; + } else { + next; + } + + # Only collect statistics for given subset + m/$kws/ || next; + + # Keywods that are in the given subset, and have occurrences + $targ_sum += $col[2]; + $corr_sum += $col[3]; + $fa_sum += $col[4]; + $miss_sum += $col[5]; + $twv_sum += $col[6]; + $subset_count ++; + } +} + +# Compute ATWV +my $subset_atwv = ($subset_count == 0) ? 0 : $twv_sum/$subset_count; +my $atwv = ($count == 0) ? 0 : $twv_sum/$count; +my $bp_atwv = ($count == 0) ? 0 : $subset_count/$count; + +# Format the numbers +my $format = "%-${width}d"; +$subset_count = sprintf($format, $subset_count); +$targ_sum = sprintf($format, $targ_sum); +$corr_sum = sprintf($format, $corr_sum); +$fa_sum = sprintf($format, $fa_sum); +$miss_sum = sprintf($format, $miss_sum); +$subset_atwv = sprintf("% .4f", $subset_atwv); +$atwv = sprintf("% .4f", $atwv); +$bp_atwv = sprintf("% .4f", $bp_atwv); + +# Print +if ($subset_name ne "") {print "$subset_name: ";} +print "#Keywords=$subset_count, #Targ=$targ_sum, #Corr=$corr_sum, #FA=$fa_sum, #Miss=$miss_sum, "; +print "Contributed ATWV=$atwv, Best Possible Contributed ATWV=$bp_atwv, ATWV=$subset_atwv\n"; + +if ($kws_filename ne "-") {close(KWS);} +close(BSUM); diff --git a/egs/babel/s5d/local/subset_kwslist.pl b/egs/babel/s5d/local/subset_kwslist.pl new file mode 100755 index 00000000000..361291179ef --- /dev/null +++ b/egs/babel/s5d/local/subset_kwslist.pl @@ -0,0 +1,33 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University +# Apache 2.0. +# +use strict; +use warnings; +use XML::Simple; +use Data::Dumper; + +binmode STDOUT, ":utf8"; + +my %seen; +while (my $keyword = ) { + chomp $keyword; + $seen{$keyword} = 1; +} + + +my $data = XMLin($ARGV[0], ForceArray => 1); + +#print Dumper($data->{kw}); +my @filtered_kws = (); + +foreach my $kwentry (@{$data->{kw}}) { + if (defined $seen{$kwentry->{kwid}}) { + push @filtered_kws, $kwentry; + } +} +$data->{kw} = \@filtered_kws; +my $xml = XMLout($data, RootName=> "kwlist", KeyAttr=>''); +print $xml; +exit 0 diff --git a/egs/babel/s5d/local/summarize_logs.pl b/egs/babel/s5d/local/summarize_logs.pl new file mode 100755 index 00000000000..e816d57d68f --- /dev/null +++ b/egs/babel/s5d/local/summarize_logs.pl @@ -0,0 +1,121 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. + +#scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl \n" && exit 1; + +sub split_hundreds { # split list of filenames into groups of 100. + my $names = shift @_; + my @A = split(" ", $names); + my @ans = (); + while (@A > 0) { + my $group = ""; + for ($x = 0; $x < 100 && @A>0; $x++) { + $fname = pop @A; + $group .= "$fname "; + } + push @ans, $group; + } + return @ans; +} + +sub parse_accounting_entry { + $entry= shift @_; + + @elems = split " ", $entry; + + $time=undef; + $threads=undef; + foreach $elem (@elems) { + if ( $elem=~ m/time=(\d+)/ ) { + $elem =~ s/time=(\d+)/$1/; + $time = $elem; + } elsif ( $elem=~ m/threads=(\d+)/ ) { + $elem =~ s/threads=(\d+)/$1/g; + $threads = $elem; + } else { + die "Unknown entry \"$elem\" when parsing \"$entry\" \n"; + } + } + + if (defined($time) and defined($threads) ) { + return ($time, $threads); + } else { + die "The accounting entry \"$entry\" did not contain all necessary attributes"; + } +} + +foreach $dir (@ARGV) { + + #$dir = $ARGV[0]; + print $dir + + ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ; + + $dir =~ s:/$::; # Remove trailing slash. + + + # Group the files into categories where all have the same base-name. + foreach $f (glob ("$dir/*.log")) { + $f_category = $f; + # do next expression twice; s///g doesn't work as they overlap. + $f_category =~ s:\.\d+\.(?!\d+):.*.:; + #$f_category =~ s:\.\d+\.:.*.:; + $fmap{$f_category} .= " $f"; + } +} + +foreach $c (sort (keys %fmap) ) { + $n = 0; + foreach $fgroup (split_hundreds($fmap{$c})) { + $n += `grep -w WARNING $fgroup | wc -l`; + } + if ($n != 0) { + print "$n warnings in $c\n" + } +} +foreach $c (sort (keys %fmap)) { + $n = 0; + foreach $fgroup (split_hundreds($fmap{$c})) { + $n += `grep -w ERROR $fgroup | wc -l`; + } + if ($n != 0) { + print "$n errors in $c\n" + } +} + +$supertotal_cpu_time=0.0; +$supertotal_clock_time=0.0; +$supertotal_threads=0.0; + +foreach $c (sort (keys %fmap)) { + $n = 0; + + $total_cpu_time=0.0; + $total_clock_time=0.0; + $total_threads=0.0; + foreach $fgroup (split_hundreds($fmap{$c})) { + $lines=`grep -P "# Accounting:? " $fgroup |sed 's/.* Accounting:* *//g'`; + + #print $lines ."\n"; + + @entries = split "\n", $lines; + + foreach $line (@entries) { + $time, $threads = parse_accounting_entry($line); + + $total_cpu_time += $time * $threads; + $total_threads += $threads; + if ( $time > $total_clock_time ) { + $total_clock_time = $time; + } + } + } + print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n"; + + $supertotal_cpu_time += $total_cpu_time; + $supertotal_clock_time += $total_clock_time; + $supertotal_threads += $total_threads; +} +print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n"; + diff --git a/egs/babel/s5d/local/syllab/ali_to_syllabs.sh b/egs/babel/s5d/local/syllab/ali_to_syllabs.sh new file mode 100755 index 00000000000..8f0cb88771a --- /dev/null +++ b/egs/babel/s5d/local/syllab/ali_to_syllabs.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh + +if [ -f ./path.sh ]; then . ./path.sh; fi + +if [ $# != 4 ]; then + echo "This script takes an ali directory and syllab lang dir and generates" + echo "syllabic transceription of the alignment" + echo "" + echo "Usage: $0 " + echo " e.g.: $0 data/train data/lang_syll exp/tri5_ali exp/tri5_ali_syll" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) " + + exit 1; +fi + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +data=$1 +lang=$2 +ali=$3 +out=$4 + + +for f in real_words.txt lex.words2syllabs.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory is probably not a syllable lang dir" && \ + echo "The file $lang/$f is missing" && \ + exit 1 +done + +for f in words.txt L.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +for f in $ali/num_jobs $ali/final.mdl $ali/ali.1.gz ; do + [ ! -f $f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +nj=$(cat $ali/num_jobs) +echo "Extracting phoneme sequences" +$cmd JOB=1:$nj $out/log/ali-to-phones.JOB.log \ + ali-to-phones $ali/final.mdl ark:"gunzip -c $ali/ali.JOB.gz|" ark:- \| \ + transcripts-to-fsts ark:- ark:$out/phones.JOB.fst || exit 1 + +echo "Composing with files in $lang to get syllable sequences" +$cmd JOB=1:$nj $out/log/get-syll-text.JOB.log \ + cat $data/split$nj/JOB/text \| sym2int.pl -f 2- --map-oov '\' $lang/real_words.txt \| \ + transcripts-to-fsts ark,t:- ark:- \|\ + fsttablecompose $lang/lex.words2syllabs.fst ark:- ark:-\| \ + fsts-project ark:- ark:-\| \ + fsttablecompose $lang/L.fst ark:- ark:- \|\ + fsttablecompose ark:$out/phones.JOB.fst ark:- ark:- \| \ + fsts-to-transcripts ark:- ark,t:"|int2sym.pl -f 2- $lang/words.txt > $out/text.JOB" +cat $out/text.* | sort > $out/text + +echo "Done" + diff --git a/egs/babel/s5d/local/syllab/create_syll_datadir.sh b/egs/babel/s5d/local/syllab/create_syll_datadir.sh new file mode 100755 index 00000000000..4c014285619 --- /dev/null +++ b/egs/babel/s5d/local/syllab/create_syll_datadir.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +help_message="Converts normal (with word level transcriptions) into syllabic\nExpects 4 parameters:\n" +# Begin configuration section. +boost_sil=1.0 +cmd=run.pl +nj=4 +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh +. ./path.sh + +if [ $# -ne 4 ] ; then + echo "$#" + echo -e "$help_message" + return 1; +fi + +input=$1 +word_lang=$2 +syll_lang=$3 +output=$4 + +[ ! -f exp/tri5/final.mdl ] && \ + echo "File exp/tri5/final.mdl must exist" && exit 1; + +[ ! -d $input/split$nj ] && utils/split_data.sh $input $nj + +utils/copy_data_dir.sh $input $output +touch $output/.plp.done +touch $output/.done + +if [ -f $input/text ] ; then + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $nj --cmd "$cmd" \ + $input $word_lang exp/tri5 exp/tri5_ali/align_$(basename $input) + + local/syllab/ali_to_syllabs.sh \ + --cmd "$cmd" \ + $input $syll_lang exp/tri5_ali/align_$(basename $input) \ + exp/tri5_ali_syll/align_$(basename $output) + + cp exp/tri5_ali_syll/align_$(basename $output)/text $output/text +fi + +exit 0 + + + diff --git a/egs/babel/s5d/local/syllab/create_syllables.pl b/egs/babel/s5d/local/syllab/create_syllables.pl new file mode 100755 index 00000000000..29a0a67dc8d --- /dev/null +++ b/egs/babel/s5d/local/syllab/create_syllables.pl @@ -0,0 +1,154 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 Johns Hopkins University (Author: Yenda Trmal) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use Getopt::Long; +use Data::Dumper; + +my $with_probs; +my $position_independent_phones; + +GetOptions("with-probs" => \$with_probs, + "position-independent-phones" => \$position_independent_phones +); + +my %SYLLS; +my %LEXICON; + +while (my $line = ) { + chomp $line; + my $word; my $prob; my $pron; + if ($with_probs) { + ($word, $prob, $pron) = split(" ", $line, 3); + } else { + ($word, $pron) = split(" ", $line, 2); + } + my @syllabs = split(/\s*\t\s*/, $pron); + + my $pronlen= scalar @syllabs; + my @extended_syllabs; + if (( $syllabs[0] =~ /x\<.*\>/) || ($word eq "SIL")) { + $SYLLS{$pron} +=1; + push @extended_syllabs, $pron; + } elsif ($pronlen == 1) { + my $syl; + my @phones=split " ", $syllabs[0]; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if (scalar(@phones) == 1 ) { + $syl = "$phones[0]_S"; + } else { + $phones2[0] = $phones[0] . "_B" unless $position_independent_phones; + $phones2[-1] = $phones[-1] ."_E" unless $position_independent_phones; + $syl = join(" ", @phones2); + } + } + $SYLLS{$syl} += 1; + push @extended_syllabs, $syl; + } else { + for (my $i = 0; $i lt $pronlen; $i+=1) { + my $syl; + my @phones=split " ", $syllabs[$i]; + my $first_index = 0; + my $last_index = scalar(@phones)-1; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if ($i == 0) { + $phones2[$first_index] = $phones[$first_index] . "_B"; + } elsif ( $i == ($pronlen - 1)) { + $phones2[$last_index] = $phones[$last_index] . "_E"; + } + $syl = join(" ", @phones2); + } + + push @extended_syllabs, $syl; + $SYLLS{$syl} += 1; + } + } + push @{$LEXICON{$word}}, \@extended_syllabs; +} + + +my %VOCAB; +my %COUNTS; +my %REV_VOCAB; +foreach my $syl (keys %SYLLS) { + my $seq=1; + my $word=$syl; + $word =~ s/_[^\s]*//g; + $word =~ s/ //g; + $word =~ s/[^a-zA-Z0-9<>-|\/]//g; + + my $wordx=$word; + $wordx .= "#$seq"; + while (exists $COUNTS{$wordx}) { + $seq += 1; + $wordx = "$word#$seq"; + } + + $COUNTS{$wordx} += $SYLLS{$syl}; + push @{$VOCAB{$wordx}}, $syl; + $REV_VOCAB{$syl} = $wordx; +} + +open(my $lex_f, "|sort -u > $ARGV[0]") or +die "Cannot open the file\"$ARGV[0]\" for writing"; + +foreach my $word (keys %VOCAB) { + print $lex_f "$word\t" . join("\t", @{$VOCAB{$word}}) . "\n"; +} + +close($lex_f); + +open(my $word2syll_f, "|sort -u > $ARGV[1]") or +die "Cannot open the file\"$ARGV[1]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + my @pron_in_syllabs; + foreach my $syl (@{$pron}) { + die "In word $word, pronunciation $pron: syllable $syl not in the lexicon!" unless exists $REV_VOCAB{$syl}; + push @pron_in_syllabs, $REV_VOCAB{$syl}; + } + print $word2syll_f "$word\t" . join(" ", @pron_in_syllabs) . "\n"; + } +} + +close($word2syll_f); + +open(my $word2ali_f, "|sort -u > $ARGV[2]") or +die "Cannot open the file\"$ARGV[2]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + print $word2ali_f "$word\t$word\t" . join(" ", @{$pron}) . "\n"; + } +} + +close($word2ali_f); + diff --git a/egs/babel/s5d/local/syllab/generate_phone_lang.sh b/egs/babel/s5d/local/syllab/generate_phone_lang.sh new file mode 100755 index 00000000000..fc21a23231b --- /dev/null +++ b/egs/babel/s5d/local/syllab/generate_phone_lang.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh +. ./path.sh + + + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +llang=$2 +lang=$3 +out=$4 +lout=$5 + +test -d $lout && rm -rf $lout +mkdir -p $lout +test -d $out && rm -rf $out +cp -R $lang $out +rm -rf $out/tmp $out/L.fst $out/L_disambig.fst $out/G.fst $out/words.txt +rm -rf $out/phones/word_boundary.{int,txt} + +echo "Generating lexicons.." +if [ -f $lang/phones/word_boundary.int ] ; then + echo "Position dependent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | sed 's/ /\t/g' | local/syllab/create_syllables.pl --with-probs\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | sed 's/ /\t/g' | local/syllab/create_syllables.pl \ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +else + echo "Position independent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs --position-independent-phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl --position_independent_phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +fi +cp $lout/lex.{syllabs2phones,words2syllabs,words2phones}.txt $out + +#We will fake the words.txt file +( + echo ""; + cut -f 1 $out/lex.syllabs2phones.txt; + echo -e "#0\n\n"; +) | nl -v 0 | awk '{print $2, $1}' > $out/syllabs.txt +ln -s syllabs.txt $out/words.txt +cp $lang/words.txt $out/real_words.txt + + +#Figure out the "OOV" token +oovword=$(cat $lang/oov.txt) +oovsyl=$(grep -w -F "$oovword" $out/lex.words2syllabs.txt | \ + awk '{if (NF == 2) { print $2;} + else {print "Error, oov word has more than one syllable "; exit 1;}}') + +echo $oovsyl > $out/oov.txt +grep -w -F "$oovsyl" $out/words.txt | awk '{print $2}' > $out/oov.int + +phone_disambig_symbol=$(grep '#0' $out/phones.txt | awk '{print $2}') +word_disambig_symbol=$(grep '#0' $out/words.txt | awk '{print $2}') + +if [ -f $out/phones/wdisambig_words.int ]; then + echo $word_disambig_symbol > $out/phones/wdisambig_words.int +fi + +optional_sil=$(cat $out/phones/optional_silence.txt) +utils/add_lex_disambig.pl $out/lex.syllabs2phones.txt $out/lex.syllabs2phones.disambig.txt > /dev/null +cat $out/lex.syllabs2phones.disambig.txt | sort -u > $lout/lexicon.txt + +echo " SIL" | cat - $lout/lexicon.txt | perl -ane 'print $F[0], " ", join(" ", @F), "\n";' | \ + sed 's/ #[0-9]$//g' > $out/phones/align_lexicon.txt +cat $lout/lexicon.txt | perl -ane 'print $F[0], "\t1.0\t", join(" ", @F[1..$#F]), "\n";' \ + > $lout/lexiconp.txt + +cat $out/phones/align_lexicon.txt |\ + sym2int.pl -f 3- $out/phones.txt |\ + sym2int.pl -f 1-2 $out/words.txt \ + > $out/phones/align_lexicon.int + +ndisambig=$(cat $out/phones/disambig.int | wc -l) +ndisambig=$[$ndisambig-1] + + +#Compile the lexicons +echo "Compiling words2syllables FST" +utils/make_lexicon_fst.pl $out/lex.words2syllabs.txt | \ + fstcompile --isymbols=$out/syllabs.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.words2syllabs.fst + +echo "Compiling L.fst and L_disambig.fst" +sil=$(cat $lang/phones/optional_silence.txt) +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.txt 0.5 $sil | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.fst +ln -s lex.syllabs2phones.fst $out/L.fst + + +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.disambig.txt 0.5 $sil '#'$ndisambig | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstaddselfloops "echo $phone_disambig_symbol |" "echo $word_disambig_symbol |"|\ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.disambig.fst +ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst + +echo "Validating the output lang dir" +utils/validate_lang.pl $out || exit 1 + +sed -i'' 's/#1$//g' $lout/lexicon.txt +sed -i'' 's/#1$//g' $lout/lexiconp.txt + +echo "Done OK." +exit 0 diff --git a/egs/babel/s5d/local/syllab/generate_syllable_lang.sh b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh new file mode 100755 index 00000000000..db7b0902425 --- /dev/null +++ b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh +. ./path.sh + + + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +llang=$2 +lang=$3 +out=$4 +lout=$5 + +test -d $lout && rm -rf $lout +mkdir -p $lout +test -d $out && rm -rf $out +cp -R $lang $out +rm -rf $out/tmp $out/L.fst $out/L_disambig.fst $out/G.fst $out/words.txt +rm -rf $out/phones/word_boundary.{int,txt} + +echo "Generating lexicons.." +if [ -f $lang/phones/word_boundary.int ] ; then + echo "Position dependent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl \ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +else + echo "Position independent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs --position-independent-phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl --position_independent_phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +fi +cp $lout/lex.{syllabs2phones,words2syllabs,words2phones}.txt $out + +#We will fake the words.txt file +( + echo ""; + cut -f 1 $out/lex.syllabs2phones.txt; + echo -e "#0\n\n"; +) | nl -v 0 | awk '{print $2, $1}' > $out/syllabs.txt +ln -s syllabs.txt $out/words.txt +cp $lang/words.txt $out/real_words.txt + + +#Figure out the "OOV" token +oovword=$(cat $lang/oov.txt) +oovsyl=$(grep -w -F "$oovword" $out/lex.words2syllabs.txt | \ + awk '{if (NF == 2) { print $2;} + else {print "Error, oov word has more than one syllable "; exit 1;}}') + +echo $oovsyl > $out/oov.txt +grep -w -F "$oovsyl" $out/words.txt | awk '{print $2}' > $out/oov.int + +phone_disambig_symbol=$(grep '#0' $out/phones.txt | awk '{print $2}') +word_disambig_symbol=$(grep '#0' $out/words.txt | awk '{print $2}') + +optional_sil=$(cat $out/phones/optional_silence.txt) +utils/add_lex_disambig.pl $out/lex.syllabs2phones.txt $out/lex.syllabs2phones.disambig.txt > /dev/null +cat $out/lex.syllabs2phones.disambig.txt | sort -u > $lout/lexicon.txt + +if [ -f $out/phones/wdisambig_words.int ]; then + echo $word_disambig_symbol > $out/phones/wdisambig_words.int +fi + +echo " SIL" | cat - $lout/lexicon.txt | perl -ane 'print $F[0], " ", join(" ", @F), "\n";' | \ + sed 's/ #[0-9]$//g' > $out/phones/align_lexicon.txt +cat $lout/lexicon.txt | perl -ane 'print $F[0], "\t1.0\t", join(" ", @F[1..$#F]), "\n";' \ + > $lout/lexiconp.txt + +cat $out/phones/align_lexicon.txt |\ + sym2int.pl -f 3- $out/phones.txt |\ + sym2int.pl -f 1-2 $out/words.txt \ + > $out/phones/align_lexicon.int + +ndisambig=$(cat $out/phones/disambig.int | wc -l) +ndisambig=$[$ndisambig-1] + + +#Compile the lexicons +echo "Compiling words2syllables FST" +utils/make_lexicon_fst.pl $out/lex.words2syllabs.txt | \ + fstcompile --isymbols=$out/syllabs.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.words2syllabs.fst + +echo "Compiling L.fst and L_disambig.fst" +sil=$(cat $lang/phones/optional_silence.txt) +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.txt 0.5 $sil | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.fst +ln -s lex.syllabs2phones.fst $out/L.fst + + +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.disambig.txt 0.5 $sil '#'$ndisambig | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstaddselfloops "echo $phone_disambig_symbol |" "echo $word_disambig_symbol |"|\ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.disambig.fst +ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst + +echo "Validating the output lang dir" +utils/validate_lang.pl $out || exit 1 + +sed -i'' 's/#1$//g' $lout/lexicon.txt +sed -i'' 's/#1$//g' $lout/lexiconp.txt + +echo "Done OK." +exit 0 diff --git a/egs/babel/s5d/local/syllab/lattice_word2syll.sh b/egs/babel/s5d/local/syllab/lattice_word2syll.sh new file mode 100755 index 00000000000..b81bf9d18d4 --- /dev/null +++ b/egs/babel/s5d/local/syllab/lattice_word2syll.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +acwt=0.1 +beam=8 +# End configuration section +echo $0 "$@" +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1; shift; +ilang=$1; shift; +olang=$1; shift; +input=$1; shift +output=$1; shift + +nj=$(cat $input/num_jobs) + +mkdir -p $output/log + + +if [ -f $olang/lex.words2syllabs.fst ] ; then + fstinvert $olang/lex.words2syllabs.fst | fstreverse | \ + fstminimize | fstreverse > $output/L.fst + + $cmd JOB=1:$nj $output/log/convert.JOB.log \ + lattice-push --push-strings ark:"gunzip -c $input/lat.JOB.gz|" ark:- \| \ + lattice-lmrescore --lm-scale=-1.0 ark:- "fstproject --project_output=true $ilang/G.fst|" ark:- \| \ + lattice-compose ark:- $output/L.fst ark:- \| \ + lattice-determinize-pruned --beam=8 --acoustic-scale=0.1 ark:- ark:- \| \ + lattice-minimize ark:- "ark:|gzip -c > $output/lat.JOB.gz" + #lattice-minimize ark:- ark:- \| \ + #lattice-lmrescore --lm-scale=1.0 ark:- "fstproject --project_output=true $olang/G.fst|" "ark:|gzip -c > $output/lat.JOB.gz" +else + #for phonemes.... (IIRC) + fstreverse $olang/L.fst | fstminimize | fstreverse > $output/L.fst + $cmd JOB=1:$nj $output/log/convert.JOB.log \ + lattice-push --push-strings ark:"gunzip -c $input/lat.JOB.gz|" ark:- \| \ + lattice-lmrescore --lm-scale=-1.0 ark:- "fstproject --project_output=true $ilang/G.fst|" ark:- \| \ + lattice-align-words $ilang/phones/word_boundary.int $input/../final.mdl ark:- ark:- \| \ + lattice-to-phone-lattice --replace-words $input/../final.mdl ark:- ark:- \| \ + lattice-align-phones $input/../final.mdl ark:- ark:- \| \ + lattice-compose ark:- $output/L.fst ark:- \|\ + lattice-determinize-pruned --beam=$beam --acoustic-scale=$acwt ark:- ark:-\| \ + lattice-minimize ark:- "ark:|gzip -c > $output/lat.JOB.gz" + #lattice-lmrescore --lm-scale=1.0 ark:- "fstproject --project_output=true $olang/G.fst|" ark:"|gzip -c > $output/lat.JOB.gz" +fi + + #lattice-1best ark:- ark:-| nbest-to-linear ark:- ark:/dev/null ark,t:- \ + #utils/int2sym.pl -f 2- $olang/words.txt | head +cp $input/num_jobs $output/num_jobs + diff --git a/egs/babel/s5d/local/syllab/map_prons_to_syllables.pl b/egs/babel/s5d/local/syllab/map_prons_to_syllables.pl new file mode 100755 index 00000000000..df3ce93ce4e --- /dev/null +++ b/egs/babel/s5d/local/syllab/map_prons_to_syllables.pl @@ -0,0 +1,61 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use GetOpt::Long; + +my $probs; + +GetOptions ("--with-probs" => \$probs) + +my $syllab_lexicon=$ARGV[0]; + +my %PRON2SYL; + + +open(my $f, $syllab_lexicon) or die "Cannot open file $syllab_lexicon\n"; +while (my $line = <$f>) { + chomp $line; + + my $syll; + my $pron; + my $prob; + + if ($probs) { + $syll, $prob, $pron = split " ", $line, 3; + } else { + $syll, $pron = split " ", $line, 2; + } + $PRON2SYL{$pron} = $syll; +} + +while (my $line = ) { + chomp $line; + my ($word, $pron) = split(/\s*\t\s*/, $line, 2); + my @syllabs = split(/\s*\t\s*/, $pron); + + my @syl_pron; + foreach my $syl (@syllabs) { + die "in $line unknown syllable $syl" unless exists $PRON2SYL{$syl}; + push @syl_pron, $PRON2SYL{$syl}; + } + print "$word\t" . join(" ", @syl_pron) . "\n"; + +} diff --git a/egs/babel/s5d/local/syllab/run_phones.sh b/egs/babel/s5d/local/syllab/run_phones.sh new file mode 100755 index 00000000000..6f3c7be4cef --- /dev/null +++ b/egs/babel/s5d/local/syllab/run_phones.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=0 +# End configuration section +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh +. ./path.sh + +. ./conf/common_vars.sh +. ./lang.conf + +if [ $# -ne 1 ] ; then + echo "Invalid number of parameters" + exit 1 +fi + +idir=$1 +idata=${idir##*/} + + +odata=${idata%%.*}.phn.${idata#*.} + +if [ $stage -le -1 ] ; then + local/syllab/generate_phone_lang.sh \ + data/train data/local/ data/lang data/lang.phn data/local/dict.phn + + local/syllab/ali_to_syllabs.sh \ + data/train data/lang.phn exp/tri5_ali exp/tri5_ali_phn + + + utils/copy_data_dir.sh data/train data/train.phn + cp exp/tri5_ali_phn/text data/train.phn/text + + #Create syllab LM + local/train_lms_srilm.sh \ + --words-file data/lang.phn/words.txt --train-text data/train.phn/text \ + --oov-symbol "`cat data/lang.phn/oov.txt`" data data/srilm.phn + + local/arpa2G.sh data/srilm.phn/lm.gz data/lang.phn/ data/lang.phn/ +fi + +if [ $stage -le 0 ] && [ -f "$idir/text" ] ; then + #Create dev10h.phn.pem dir + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + $idir data/lang exp/tri5 exp/tri5_ali/align_$idata + + local/syllab/ali_to_syllabs.sh \ + --cmd "$decode_cmd" \ + $idir data/lang.phn exp/tri5_ali/align_$idata exp/tri5_ali_phn/align_$idata +fi + +if [ $stage -le 1 ] ; then + utils/copy_data_dir.sh data/$idata data/$odata + [ -f exp/tri5_ali_phn/align_$idata/text ] && \ + cp exp/tri5_ali_phn/align_$idata/text data/$odata/text + touch data/$odata/.plp.done + touch data/$odata/.done +fi + + diff --git a/egs/babel/s5d/local/syllab/run_syllabs.sh b/egs/babel/s5d/local/syllab/run_syllabs.sh new file mode 100755 index 00000000000..a2ec82f3033 --- /dev/null +++ b/egs/babel/s5d/local/syllab/run_syllabs.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=0 +# End configuration section +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh +. ./path.sh + +. ./conf/common_vars.sh +. ./lang.conf + +if [ $# -ne 1 ] ; then + echo "Invalid number of parameters" + exit 1 +fi + +idir=$1 +idata=${idir##*/} + + +odata=${idata%%.*}.syll.${idata#*.} + +if [ $stage -le -1 ] ; then + local/syllab/generate_syllable_lang.sh \ + data/train data/local/ data/lang data/lang.syll data/local/dict.syll + + local/syllab/ali_to_syllabs.sh \ + data/train data/lang.syll exp/tri5_ali exp/tri5_ali_syll + + + utils/copy_data_dir.sh data/train data/train.syll + cp exp/tri5_ali_syll/text data/train.syll/text + + #Create syllab LM + local/train_lms_srilm.sh \ + --words-file data/lang.syll/words.txt --train-text data/train.syll/text \ + --oov-symbol "`cat data/lang.syll/oov.txt`" data data/srilm.syll + + local/arpa2G.sh data/srilm.syll/lm.gz data/lang.syll/ data/lang.syll/ +fi + +if [ $stage -le 0 ] && [ -f "$idir/text" ] ; then + #Create dev10h.syll.pem dir + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + $idir data/lang exp/tri5 exp/tri5_ali/align_$idata + + local/syllab/ali_to_syllabs.sh \ + --cmd "$decode_cmd" \ + $idir data/lang.syll exp/tri5_ali/align_$idata exp/tri5_ali_syll/align_$idata +fi + +if [ $stage -le 1 ] ; then + utils/copy_data_dir.sh data/$idata data/$odata + [ -f exp/tri5_ali_syll/align_$idata/text ] && \ + cp exp/tri5_ali_syll/align_$idata/text data/$odata/text + touch data/$odata/.plp.done + touch data/$odata/.done +fi + + diff --git a/egs/babel/s5d/local/train_g2p.sh b/egs/babel/s5d/local/train_g2p.sh new file mode 100755 index 00000000000..08be0014656 --- /dev/null +++ b/egs/babel/s5d/local/train_g2p.sh @@ -0,0 +1,94 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +iters=5 +stage=0 +encoding='utf-8' +remove_tags=true +only_words=true +icu_transform="Any-Lower" +cmd=run.pl +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e + +if [ $# != 2 ]; then + echo "Usage: $0 [options] " + echo " where is the training lexicon (one pronunciation per " + echo " word per line) and is directory where the models will " + echo " be stored" + echo "e.g.: train_g2p.sh data/local/lexicon.txt exp/g2p/" + echo "" + echo "main options (for others, see top of script file)" + echo " --iters # How many iterations. Relates to N-ngram order" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +lexicon=$1 +wdir=$2 + + +mkdir -p $wdir/log + +[ ! -f $lexicon ] && echo "$0: Training lexicon does not exist." && exit 1 + +if $only_words ; then + cat $lexicon | sed 's/^<.*>.*$//g' | sed 's/^#.*//g' > $wdir/lexicon_onlywords.txt + lexicon=$wdir/lexicon_onlywords.txt +fi + +if $remove_tags ; then + cat $lexicon |\ + sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g' > $wdir/lexicon_notags.txt + lexicon=$wdir/lexicon_notags.txt +fi + +if [ ! -z $icu_transform ] ; then + paste \ + <(cat $lexicon | awk '{print $1}' | uconv -f $encoding -t $encoding -x "$icu_transform") \ + <(cat $lexicon | sed 's/^[^ \t][^ \t]*[ \t]//g') \ + > $wdir/lexicon_transformed.txt + lexicon=$wdir/lexicon_transformed.txt +fi + +if ! g2p=`which g2p.py` ; then + echo "Sequitur was not found !" + echo "Go to $KALDI_ROOT/tools and execute extras/install_sequitur.sh" + exit 1 +fi + +echo "Training the G2P model (iter 0)" + +if [ $stage -le 0 ]; then + $cmd $wdir/log/g2p.0.log \ + g2p.py -S --encoding $encoding --train $lexicon --devel 5% --write-model $wdir/g2p.model.0 +fi + +for i in `seq 0 $(($iters-2))`; do + + echo "Training the G2P model (iter $[$i + 1] )" + + if [ $stage -le $i ]; then + $cmd $wdir/log/g2p.$(($i + 1)).log \ + g2p.py -S --encoding $encoding --model $wdir/g2p.model.$i --ramp-up --train $lexicon --devel 5% --write-model $wdir/g2p.model.$(($i+1)) + fi + +done + +! (set -e; cd $wdir; ln -sf g2p.model.$[$iters-1] g2p.model.final ) && echo "Problem finalizing training... " && exit 1 + +if [ $stage -le $(($i + 2)) ]; then + echo "Running test..." + $cmd $wdir/log/test.log \ + g2p.py --encoding $encoding --model $wdir/g2p.model.final --test $lexicon +fi + diff --git a/egs/babel/s5d/local/train_lms_srilm.sh b/egs/babel/s5d/local/train_lms_srilm.sh new file mode 100755 index 00000000000..cf357260d8c --- /dev/null +++ b/egs/babel/s5d/local/train_lms_srilm.sh @@ -0,0 +1,229 @@ +#!/bin/bash +export LC_ALL=C + +words_file= +train_text= +dev_text= +oov_symbol="" + +echo "$0 $@" + +[ -f path.sh ] && . ./path.sh +. ./utils/parse_options.sh || exit 1 + +echo "-------------------------------------" +echo "Building an SRILM language model " +echo "-------------------------------------" + +if [ $# -ne 2 ] ; then + echo "Incorrect number of parameters. " + echo "Script has to be called like this:" + echo " $0 [switches] " + echo "For example: " + echo " $0 data data/srilm" + echo "The allowed switches are: " + echo " words_file= word list file -- data/lang/words.txt by default" + echo " train_text= data/train/text is used in case when not specified" + echo " dev_text= last 10 % of the train text is used by default" + echo " oov_symbol=> symbol to use for oov modeling -- by default" + exit 1 +fi + +datadir=$1 +tgtdir=$2 +outlm=lm.gz + + +##End of configuration +loc=`which ngram-count`; +if [ -z $loc ]; then + if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... + sdir=`pwd`/../../../tools/srilm/bin/i686-m64 + else + sdir=`pwd`/../../../tools/srilm/bin/i686 + fi + if [ -f $sdir/ngram-count ]; then + echo Using SRILM tools from $sdir + export PATH=$PATH:$sdir + else + echo You appear to not have SRILM tools installed, either on your path, + echo or installed in $sdir. See tools/install_srilm.sh for installation + echo instructions. + exit 1 + fi +fi + +# Prepare the destination directory +mkdir -p $tgtdir + +for f in $words_file $train_text $dev_text; do + [ ! -s $f ] && echo "No such file $f" && exit 1; +done + +[ -z $words_file ] && words_file=$datadir/lang/words.txt +if [ ! -z "$train_text" ] && [ -z "$dev_text" ] ; then + nr=`cat $train_text | wc -l` + nr_dev=$(($nr / 10 )) + nr_train=$(( $nr - $nr_dev )) + orig_train_text=$train_text + head -n $nr_train $train_text > $tgtdir/train_text + tail -n $nr_dev $train_text > $tgtdir/dev_text + + train_text=$tgtdir/train_text + dev_text=$tgtdir/dev_text + echo "Using words file: $words_file" + echo "Using train text: 9/10 of $orig_train_text" + echo "Using dev text : 1/10 of $orig_train_text" +elif [ ! -z "$train_text" ] && [ ! -z "$dev_text" ] ; then + echo "Using words file: $words_file" + echo "Using train text: $train_text" + echo "Using dev text : $dev_text" + train_text=$train_text + dev_text=$dev_text +else + train_text=$datadir/train/text + dev_text=$datadir/dev2h/text + echo "Using words file: $words_file" + echo "Using train text: $train_text" + echo "Using dev text : $dev_text" +fi + + + +# Extract the word list from the training dictionary; exclude special symbols +sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '' | grep -v -F "$oov_symbol" > $tgtdir/vocab +if (($?)); then + echo "Failed to create vocab from $words_file" + exit 1 +else + # wc vocab # doesn't work due to some encoding issues + echo vocab contains `cat $tgtdir/vocab | perl -ne 'BEGIN{$l=$w=0;}{split; $w+=$#_; $w++; $l++;}END{print "$l lines, $w words\n";}'` +fi + +# Kaldi transcript files contain Utterance_ID as the first word; remove it +cat $train_text | cut -f2- -d' ' > $tgtdir/train.txt +if (($?)); then + echo "Failed to create $tgtdir/train.txt from $train_text" + exit 1 +else + echo "Removed first word (uid) from every line of $train_text" + # wc text.train train.txt # doesn't work due to some encoding issues + echo $train_text contains `cat $train_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo train.txt contains `cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` +fi + +# Kaldi transcript files contain Utterance_ID as the first word; remove it +cat $dev_text | cut -f2- -d' ' > $tgtdir/dev.txt +if (($?)); then + echo "Failed to create $tgtdir/dev.txt from $dev_text" + exit 1 +else + echo "Removed first word (uid) from every line of $dev_text" + # wc text.train train.txt # doesn't work due to some encoding issues + echo $dev_text contains `cat $dev_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` +fi + +echo "-------------------" +echo "Good-Turing 2grams" +echo "-------------------" +ngram-count -lm $tgtdir/2gram.gt01.gz -gt1min 0 -gt2min 1 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/2gram.gt02.gz -gt1min 0 -gt2min 2 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Kneser-Ney 2grams" +echo "-------------------" +ngram-count -lm $tgtdir/2gram.kn01.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/2gram.kn02.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Good-Turing 3grams" +echo "-------------------" +ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Kneser-Ney 3grams" +echo "-------------------" +ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + + +echo "-------------------" +echo "Good-Turing 4grams" +echo "-------------------" +ngram-count -lm $tgtdir/4gram.gt0111.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0112.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0122.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0123.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0113.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0222.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0223.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Kneser-Ney 4grams" +echo "-------------------" +ngram-count -lm $tgtdir/4gram.kn0111.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0112.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0113.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0122.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0123.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0222.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0223.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +if [ ! -z ${LIBLBFGS} ]; then + #please not that if the switch -map-unk "$oov_symbol" is used with -maxent-convert-to-arpa, ngram-count will segfault + #instead of that, we simply output the model in the maxent format and convert it using the "ngram" + echo "-------------------" + echo "Maxent 2grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 2 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/2gram.me.gz || exit 1 + + echo "-------------------" + echo "Maxent 3grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 3 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/3gram.me.gz || exit 1 + + echo "-------------------" + echo "Maxent 4grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 4 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1 + +fi + + +echo "--------------------" +echo "Computing perplexity" +echo "--------------------" +( + for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done + for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done +) | sort -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt + +echo "The perlexity scores report is stored in $tgtdir/perplexities.txt " + +#This will link the lowest perplexity LM as the output LM. +#ln -sf $tgtdir/`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` $outlm + +#A slight modification of the previous approach: +#We look at the two lowest perplexity LMs and use a 3gram LM if one of the two, even if the 4gram is of lower ppl +nof_trigram_lm=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | wc -l` +if [[ $nof_trigram_lm -eq 0 ]] ; then + lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` +elif [[ $nof_trigram_lm -eq 2 ]] ; then + lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` +else #exactly one 3gram LM + lmfilename=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | cut -f 1 -d ' '` +fi +(cd $tgtdir; ln -sf `basename $lmfilename` $outlm ) + diff --git a/egs/babel/s5d/local/txt_to_rttm.pl b/egs/babel/s5d/local/txt_to_rttm.pl new file mode 100755 index 00000000000..0e128520880 --- /dev/null +++ b/egs/babel/s5d/local/txt_to_rttm.pl @@ -0,0 +1,108 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + +Allowed options: + --flen : Frame length (float, default = 0.1) + --symtab : Symbol table (string, default = "") + --segment : Segment file from Kaldi (string, default = "") +EOU + +my $symtab = ""; +my $segment = ""; +my $flen = 0.01; +GetOptions('symtab=s' => \$symtab, + 'segment=s' => \$segment, + 'flen=f' => \$flen); + +if ($symtab) { + if (!open(S, "<$symtab")) {print "Fail to open symbol table: $symtab\n"; exit 1;} +} + +if ($segment) { + if (!open(SEG, "<$segment")) {print "Fail to open segment file: $segment\n"; exit 1;} +} + +if(@ARGV != 2) { + die $Usage; +} + +# Get parameters +my $filein = shift @ARGV; +my $fileout = shift @ARGV; + +# Get input source +my $source = ""; +if ($filein eq "-") { + $source = "STDIN"; +} else { + if (!open(I, "<$filein")) {print "Fail to open input file: $filein\n"; exit 1;} + $source = "I"; +} + +# Open output fst list +my $sourceout = ""; +if ($fileout ne "-") { + if (!open(O, ">$fileout")) {print "Fail to open output file: $fileout\n"; exit 1;} + $sourceout = "O"; +} + +# Get symbol table and start time +my %sym = (); +my %tbeg = (); +my %uid2utt = (); +if ($symtab) { + while() { + chomp; + my @col = split(" ", $_); + @col == 2 || die "Bad number of columns in $symtab\n"; + $sym{$col[1]} = $col[0]; + } +} + +if ($segment) { + while() { + chomp; + my @col = split(" ", $_); + @col == 4 || die "Bad number of columns in $segment\n"; + $tbeg{$col[0]} = $col[2]; + $uid2utt{$col[0]} = $col[1]; + } +} + +# Processing +while (<$source>) { + chomp; + my @col = split(" ", $_); + my $uid = shift @col; + my $words = join(" ", @col); + @col = split(/;/, $words); + + my $utt = $uid; + my $sta = 0; + if ($segment) { + $utt = $uid2utt{$uid}; + $sta = $tbeg{$uid}; + } + foreach (@col) { + my @subcol = split(" ", $_); + @subcol == 2 || die "Bad number of columns in word-frame pair\n"; + my $word = $subcol[0]; + my $dur = $subcol[1]*$flen; + my $lex = "LEXEME"; + if ($symtab) {$word = $sym{$word};} + if ($word =~ m/^<.*>$/) {$lex = "NON-LEX";} + eval "print $sourceout \"$lex $utt 1 $sta $dur $word \n\""; + $sta += $dur; + } +} + +if ($symtab) {close(S);} +if ($segment) {close(SEG);} +if ($filein ne "-") {close(I);} +if ($fileout ne "-") {close(O);} diff --git a/egs/babel/s5d/local/uem_ctm2segments.pl b/egs/babel/s5d/local/uem_ctm2segments.pl new file mode 100755 index 00000000000..658690172c8 --- /dev/null +++ b/egs/babel/s5d/local/uem_ctm2segments.pl @@ -0,0 +1,232 @@ +#!/usr/bin/env perl +use Getopt::Long; + +################################################################################ +# Convert a CTM file produced by decoding a long segment, typically several min +# long, into a sequence of shorter segments of duration 10-15 seconds. Produce +# a segments file of the form used for Kaldi training/decoding +# +# utteranceID recordingID startTime endTime +# +# The desired outcome is that the long (input) segment will be recursively cut +# into shorter segments at the location of long silences, leaving (say) 0.5 sec +# of silence at each end of the two resulting shorter segments, until all the +# segments are of the desired duration. +# +# NOTE: It is assumed that the CTM file provides time information at 0.01 sec +# resolution, and that any missing segments in the CTM correspond to the +# optional silence model, whose output token was removed by the sequence +# +# lattice-align-words --> lattice-to-ctm-conf --> raw CTM file +# + $ctmTimeStep = 0.01; # Could be changed if needed by --timeStep +# +# It is further assumed that the explicit silence token (word) is +# + $silence = ""; +# +# This could be changed using the --silence option if needed. +# +# Another option is the minimum silence duration to permit segmentation +# + $minSilence = 1.02; # seconds +# +# Maximum allowed segment length, could be changed through --maxSegLen +# + $maxSegLen = 30; #seconds +# +# Default segment length, used when the ctm segment is too long +# + $defaultSegLen = 10; # seconds +################################################################################ + +GetOptions("ctmTimeStep=f" => \$ctmTimeStep, + "minSilence=f" => \$minSilence, + "silence=s" => \$silence, + "maxSegLen=f" => \$maxSegLen, + "defaultSegLen=f" => \$defaultSegLen); + +if ($#ARGV == 1) { + $ctmFile = $ARGV[0]; + $segmentsFile = $ARGV[1]; + print STDERR ("$0: $ctmFile $segmentsFile\n"); + print STDERR ("\t--ctmTimeStep = $ctmTimeStep\n") unless ($ctmTimeStep == 0.01); + print STDERR ("\t--silence = $silence\n") unless ($silence eq ""); + print STDERR ("\t--maxSegLen = $maxSegLen\n") unless ($maxSegLen == 30); + print STDERR ("\t--defaultSegLen = $defaultSegLen\n") unless ($defaultSegLen == 10); + +} else { + print STDERR ("Usage: $0 [--options] inputCTM outputSegments\n"); + print STDERR ("\t--ctmTimeStep %f Time resolution of CTM file (default 0.01 sec)\n"); + print STDERR ("\t--silence %s Word token for silence (default )\n"); + print STDERR ("\t--maxSegLen %f Max allowed segment length (default 30 sec)\n"); + print STDERR ("\t--defaultSegLen %f Default segment length (default 10 sec)\n"); + exit(1); +} + +open (CTM, $ctmFile) +|| die "Unable to open input CTM file $ctmFile for reading"; +$numRecordings = $numWords = $n = 0; +$prevFileName = ""; +$prevChannel = ""; +$prevEndTime = 0.00; +$prevConfidence = 0.00; +while ($line=) { + @token = split(/\s+/, $line); + unless (($#token==4)||($#token==5)) { + # CTM should have 5 or 6 tokens per line + # audioFile channel startTime duration word [confidence] + print STDERR ("$0 WARNING: unparsable line $. in ctm file: $line"); + next; + } + if ( ( ($token[0] ne $prevFileName) || ($token[1] ne $prevChannel) ) && ($prevFileName ne "") ) { + break if ($n==0); + ######################################################################## + # This is the next audio file; create segments for the previous file + ######################################################################## + print STDERR ("Audio file $prevFileName contains $n word tokens\n"); + printf STDERR ("\t%d alternating speech/silence segments after mergers\n", &process_this_audio_file); + ######################################################################## + # Done writing out the segments for the previous audio recording + ######################################################################## + $numRecordings++; + # Reset to process the next file + $prevFileName = ""; + $prevChannel = ""; + $prevEndTime = 0.00; + $prevConfidence = 0.00; + $n=0; + } + # Otherwise, this is the next word in the same (i.e. previous) audio file + if ( ($token[2]-$prevEndTime) > $ctmTimeStep ) { + # There is a missing segment in the CTM, presumably silence + $fileName[$n] = $token[0]; + $channel[$n] = $token[1]; + $startTime[$n] = $prevEndTime; + $endTime[$n] = $token[2]; + $wordToken[$n] = $silence; + $confidence[$n]= $prevConfidence; + $n++; + } + # Record this token for processing later + $prevFileName = $fileName[$n] = $token[0]; + $prevChannel = $channel[$n] = $token[1]; + $startTime[$n] = $token[2]; + $prevEndTime = $endTime[$n] = ($token[2]+$token[3]); + $wordToken[$n] = $token[4]; + $prevConfidence = $confidence[$n] = $token[5] if ($#token==5); + $n++; + $numWords++; +} +close(CTM); +if ($n>0) { + # This is the last audio file; create segments for the file + print STDERR ("Audio file $prevFileName contains $n word tokens\n"); + printf STDERR ("\t%d alternating speech/silence segments after mergers\n", &process_this_audio_file); + # Done writing out the segments for the last audio recording + $numRecordings++; +} +print STDERR ("Read $numRecordings filenames containing $numWords words from $ctmFile\n"); + + +sub process_this_audio_file { + # Merge consecutive speech/silence tokens to create candidate "segments" + $s=0; + $segmentStart[$s] = 0.00; + $segmentType[$s] = $silence; + $segmentEnd[$s] = -1.0; + for ($i=0; $i<$n; $i++) { + $sTime = $startTime[$i]; + $word = $wordToken[$i]; + $eTime = $endTime[$i]; + if ( ($word eq $silence) && ($segmentType[$s] ne $silence) + || ($word ne $silence) && ($segmentType[$s] eq $silence) ) { + $segmentEnd[$s] = $sTime; + $s++; + $segmentStart[$s] = $sTime; + $segmentType[$s] = ($word eq $silence) ? $silence : "" ; + } + $segmentEnd[$s] = $eTime; + } + # Merge speech segments separated by silence of less than some minimum duration + # Note: there must be at least two segments for mergers to be an option, i.e. $s>0. + if ($s>0) { + if ( ($segmentType[0] eq $silence) + && ( ($segmentEnd[0]-$segmentStart[0]) < $minSilence) ) { + die "Something wrong: initial silence segment must have a speech segment following it" + unless ($segmentType[1] eq ""); + $segmentType[0] = $segmentType[1]; + $segmentEnd[0] = $segmentEnd[1]; + for ($j=2; $j<=$s; $j++) { + $segmentStart[$j-1] = $segmentStart[$j]; + $segmentType[$j-1] = $segmentType[$j]; + $segmentEnd[$j-1] = $segmentEnd[$j]; + } + $s--; # one silence segment removed + } + for ($i=1; $i<$s; $i++) { + if ( ($segmentType[$i] eq $silence) + && ( ($segmentEnd[$i]-$segmentStart[$i]) < $minSilence) ) { + die "Something wrong: internal silence segment must have speech segments on eithe side" + unless ( ($segmentType[$i-1] eq "") && ($segmentType[$i+1] eq "") ); + $segmentEnd[$i-1] = $segmentEnd[$i+1]; + for ($j=$i+2; $j<=$s; $j++) { + $segmentStart[$j-2] = $segmentStart[$j]; + $segmentType[$j-2] = $segmentType[$j]; + $segmentEnd[$j-2] = $segmentEnd[$j]; + } + $s -= 2; # one silence removed, two speech segments merged + $i--; # backtrack, to process the segment that just moved into position $i + } + } + if ( ($segmentType[$s] eq $silence) + && ( ($segmentEnd[$s]-$segmentStart[$s]) < $minSilence) ) { + die "Something wrong: final silence segment must have a speech segment preceding it" + unless ($segmentType[$s-1] eq ""); + $segmentEnd[$s-1] = $segmentEnd[$s]; + $s--; # one silence segment removed + } + } + # Print segment markers for debugging + $num = $s + 1; + for ($i=0; $i<=$s; $i++) { +# printf STDOUT ("%s %s %.2f %.2f %s\n", +# printf STDOUT ("%s %s %.2f %.2f\n", +# sprintf ("%s_%06i",$prevFileName,(100*$segmentStart[$i])), +# $prevFileName, +# $segmentStart[$i], +# $segmentEnd[$i], $segmentType[$i]); +# ($segmentStart[$i] - (($i==0) ? 0.0 : 0.5)), +# ($segmentEnd[$i] + (($i==$s) ? 0.0 : 0.5))) unless ($segmentType[$i] eq $silence); + if ($segmentType[$i] ne $silence) { + if (($segmentEnd[$i] - $segmentStart[$i]) > $maxSegLen) { + $fakeStart = $segmentStart[$i] - (($i==0) ? 0.0 : 0.5); + while (($segmentEnd[$i] - $fakeStart) > $defaultSegLen) { + printf STDOUT ("%s %s %.2f %.2f\n", + sprintf ("%s_%06i",$prevFileName,(100*$fakeStart)), + $prevFileName, + $fakeStart, + $fakeStart + $defaultSegLen); + $fakeStart += $defaultSegLen; + $num += 2; + } + if (($segmentEnd[$i] - $fakeStart) > 0) { + printf STDOUT ("%s %s %.2f %.2f\n", + sprintf ("%s_%06i",$prevFileName,(100*$fakeStart)), + $prevFileName, + $fakeStart, + ($segmentEnd[$i] + (($i==$s) ? 0.0 : 0.5))); + } else { + $num -= 2; + } + } else { + printf STDOUT ("%s %s %.2f %.2f\n", + sprintf ("%s_%06i",$prevFileName,(100*$segmentStart[$i])), + $prevFileName, + ($segmentStart[$i] - (($i==0) ? 0.0 : 0.5)), + ($segmentEnd[$i] + (($i==$s) ? 0.0 : 0.5))); + } + } + } + $num; +} diff --git a/egs/babel/s5d/nnet3_examples.sh b/egs/babel/s5d/nnet3_examples.sh new file mode 100644 index 00000000000..82661140d3c --- /dev/null +++ b/egs/babel/s5d/nnet3_examples.sh @@ -0,0 +1,32 @@ +# The results shown below are for Telugu fullLP condition +#TDNN + local/nnet3/run_tdnn.sh \ + --affix "6layer_r512" \ + --splice-indexes "-2,-1,0,1,2 -1,2 -3,3 -7,2 0 0 " \ + --relu-dim 512 || exit 1; + + # I modified the TDNN scripts to run for 5 epochs, however these results are with 3 epoch training + ./run-4-anydecode.sh --skip-kws true --dir dev10h.seg --nnet3-model nnet3/tdnn_6layer_r512_sp + #%WER 68.4 | 22131 40145 | 36.3 45.9 17.9 4.7 68.4 31.9 | -1.082 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.seg/score_10/dev10h.seg.ctm.sys + ./run-4-anydecode.sh --skip-kws true --dir dev10h.pem --nnet3-model nnet3/tdnn_6layer_r512_sp + #%WER 67.1 | 22131 40145 | 36.4 45.9 17.8 3.5 67.1 29.6 | -0.902 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys + + + + +#LSTM + local/nnet3/run_lstm.sh + + ./run-4-anydecode.sh --skip-kws true --dir dev10h.seg --is-rnn true --nnet3-model nnet3/lstm_sp --extra-left-context 40 --frames-per-chunk 20 + #%WER 68.0 | 22131 40145 | 38.2 44.8 17.0 6.2 68.0 33.5 | -1.491 | exp/nnet3/lstm_sp/decode_dev10h.seg/score_10/dev10h.seg.ctm.sys + ./run-4-anydecode.sh --skip-kws true --dir dev10h.pem --is-rnn true --nnet3-model nnet3/lstm_sp --extra-left-context 40 --frames-per-chunk 20 + #%WER 65.1 | 22131 40145 | 39.2 45.9 14.9 4.3 65.1 28.8 | -1.299 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys + + +#BLSTM + local/nnet3/run_blstm.sh + ./run-4-anydecode.sh --skip-kws true --dir dev10h.seg --is-rnn true --nnet3-model nnet3/lstm_bidirectional_sp --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 + #%WER 67.1 | 22131 40145 | 38.8 44.9 16.3 5.9 67.1 33.6 | -1.737 | exp/nnet3/lstm_birectional_cell512_sp/decode_dev10h.seg/score_10/dev10h.seg.ctm.sys + ./run-4-anydecode.sh --skip-kws true --dir dev10h.pem --is-rnn true --nnet3-model nnet3/lstm_bidirectional_sp --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 + #%WER 64.2 | 22131 40145 | 39.8 46.0 14.2 4.0 64.2 29.0 | -1.548 | exp/nnet3/lstm_birectional_cell512_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys + diff --git a/egs/babel/s5d/path.sh b/egs/babel/s5d/path.sh new file mode 100755 index 00000000000..212c5e15d55 --- /dev/null +++ b/egs/babel/s5d/path.sh @@ -0,0 +1,7 @@ +export KALDI_ROOT=/export/a09/jtrmal/kaldi/ +. $KALDI_ROOT/tools/env.sh +. /export/a09/jtrmal/kaldi-current/tools/env.sh +. /export/babel/data/software/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/online2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +export LC_ALL=C + diff --git a/egs/babel/s5d/results/RESULTS.105-turkish.flp b/egs/babel/s5d/results/RESULTS.105-turkish.flp new file mode 100644 index 00000000000..737d0893abe --- /dev/null +++ b/egs/babel/s5d/results/RESULTS.105-turkish.flp @@ -0,0 +1,29 @@ +%WER 57.5 | 22070 54382 | 49.0 41.7 9.2 6.5 57.5 30.8 | -1.255 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.8 | 22070 54382 | 57.3 34.1 8.6 5.1 47.8 29.0 | -0.605 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.7 8.3 4.8 45.8 28.7 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.4 8.5 4.8 45.8 28.4 | -0.630 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_17/dev10h.pem.ctm.sys +%WER 47.1 | 22070 54382 | 56.5 32.7 10.8 3.6 47.1 28.7 | -0.430 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5003, THRESHOLD = 0.555 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_13/metrics.txt:MTWV = 0.5339, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_19/metrics.txt:MTWV = 0.5203, THRESHOLD = 0.553 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5078, THRESHOLD = 0.553 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_12/metrics.txt:MTWV = 0.0045, THRESHOLD = 0.891000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_11/metrics.txt:MTWV = 0.0066, THRESHOLD = 0.720000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_18/metrics.txt:MTWV = 0.0058, THRESHOLD = 0.867000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_20/metrics.txt:MTWV = 0.0072, THRESHOLD = 0.785000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 diff --git a/egs/babel/s5d/results/RESULTS.106-tagalog.flp b/egs/babel/s5d/results/RESULTS.106-tagalog.flp new file mode 100644 index 00000000000..72568cebf81 --- /dev/null +++ b/egs/babel/s5d/results/RESULTS.106-tagalog.flp @@ -0,0 +1,34 @@ +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/dev_oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5117, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_10/metrics.txt:MTWV = 0.5408, THRESHOLD = 0.504 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_17/metrics.txt:MTWV = 0.5221, THRESHOLD = 0.556 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5077, THRESHOLD = 0.648 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_oov_kws_10/metrics.txt:MTWV = 0.0038, THRESHOLD = 0.900000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.659 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_17/metrics.txt:MTWV = 0.0047, THRESHOLD = 0.889000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0052, THRESHOLD = 0.522 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 diff --git a/egs/babel/s5d/results/RESULTS.107-vietnamese.flp b/egs/babel/s5d/results/RESULTS.107-vietnamese.flp new file mode 100644 index 00000000000..e64bca74572 --- /dev/null +++ b/egs/babel/s5d/results/RESULTS.107-vietnamese.flp @@ -0,0 +1,50 @@ +%WER 57.9 | 21875 111957 | 45.4 42.3 12.3 3.2 57.9 36.7 | -1.203 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.3 | 21875 111957 | 53.2 37.3 9.5 3.5 50.3 35.8 | -0.917 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_9/dev10h.pem.ctm.sys +%WER 47.4 | 21875 111957 | 55.1 32.8 12.1 2.6 47.4 35.7 | -0.642 | exp/tri6_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 48.6 | 21875 111957 | 54.3 35.9 9.8 2.9 48.6 35.4 | -0.769 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 51.3 32.4 16.2 1.8 50.4 35.7 | -0.487 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys + +############################################################################################################################# + +#KWS on the dev kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_9/metrics.txt:MTWV = 0.4488, THRESHOLD = 0.601 +exp/tri6_nnet/decode_dev10h.pem/kws_10/metrics.txt:MTWV = 0.4926, THRESHOLD = 0.576 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_15/metrics.txt:MTWV = 0.4589, THRESHOLD = 0.635 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4477, THRESHOLD = 0.591 + +#KWS on the dev kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_8/metrics.txt:MTWV = 0.0001, THRESHOLD = 0.778 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_11/metrics.txt:MTWV = 0.0024, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_16/metrics.txt:MTWV = 0.0012, THRESHOLD = 0.596 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_15/metrics.txt:MTWV = 0.0017, THRESHOLD = 0.817 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_8/metrics.txt:MTWV = 0.2886, THRESHOLD = 0.513 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.3672, THRESHOLD = 0.693 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.2999, THRESHOLD = 0.792 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.3041, THRESHOLD = 0.693 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_10/metrics.txt:MTWV = 0.0000, THRESHOLD = 0 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.873 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.214 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.831 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_kws_9/metrics.txt:MTWV = 0.3791, THRESHOLD = 0.564 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_12/metrics.txt:MTWV = 0.4444, THRESHOLD = 0.406 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_15/metrics.txt:MTWV = 0.3780, THRESHOLD = 0.609 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.3904, THRESHOLD = 0.51 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_10/metrics.txt:MTWV = 0.0021, THRESHOLD = 0.724 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0040, THRESHOLD = 0.491 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_15/metrics.txt:MTWV = 0.0032, THRESHOLD = 0.867 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0039, THRESHOLD = 0.105 + +############################################################################################################################ + diff --git a/egs/babel/s5d/results/kws_results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 b/egs/babel/s5d/results/kws_results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 new file mode 100644 index 00000000000..1fdad0615e1 --- /dev/null +++ b/egs/babel/s5d/results/kws_results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 @@ -0,0 +1,211 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:00:20-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4114 OTWV=0.5171 STWV=0.6713 MTWV=0.4128 THRESHOLD=0.453 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3606 , #FA=1935 , #Miss=2988 , Contributed ATWV= 0.4114, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4121 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4639 OTWV=0.5790 STWV=0.7779 MTWV=0.4639 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3948 , #FA=2450 , #Miss=2646 , Contributed ATWV= 0.4639, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4646 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4670 OTWV=0.5932 STWV=0.7799 MTWV=0.4685 THRESHOLD=0.453 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3914 , #FA=2016 , #Miss=2680 , Contributed ATWV= 0.4670, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4677 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4940 OTWV=0.6072 STWV=0.7751 MTWV=0.4940 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4297 , #FA=2623 , #Miss=2297 , Contributed ATWV= 0.4940, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4948 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4970 OTWV=0.6016 STWV=0.7837 MTWV=0.4985 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4145 , #FA=2538 , #Miss=2449 , Contributed ATWV= 0.4970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4977 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.5174 OTWV=0.6324 STWV=0.7958 MTWV=0.5183 THRESHOLD=0.433 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4312 , #FA=2156 , #Miss=2282 , Contributed ATWV= 0.5174, Best Possible Contributed ATWV= 0.9984, ATWV= 0.5182 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), for the set [kwlist2] evaluated on 2016-03-31T12:00:28-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4371 OTWV=0.5527 STWV=0.6904 MTWV=0.4372 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=7695 , #FA=8671 , #Miss=6784 , Contributed ATWV= 0.4356, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4423 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=32 , #Miss=50 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.4822 OTWV=0.6082 STWV=0.7912 MTWV=0.4822 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8278 , #FA=9303 , #Miss=6201 , Contributed ATWV= 0.4808, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4882 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=12 , #FA=60 , #Miss=48 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0924 +ATWV=0.4920 OTWV=0.6156 STWV=0.7891 MTWV=0.4920 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8698 , #FA=10346, #Miss=5781 , Contributed ATWV= 0.4913, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4989 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=59 , #Miss=52 , Contributed ATWV= 0.0006, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0427 +ATWV=0.5006 OTWV=0.6216 STWV=0.7975 MTWV=0.5006 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8552 , #FA=9419 , #Miss=5927 , Contributed ATWV= 0.4992, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5069 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=55 , #Miss=49 , Contributed ATWV= 0.0013, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0873 +ATWV=0.5077 OTWV=0.6291 STWV=0.7819 MTWV=0.5077 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9060 , #FA=10188, #Miss=5419 , Contributed ATWV= 0.5073, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5150 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=7 , #FA=64 , #Miss=53 , Contributed ATWV= 0.0005, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0325 +ATWV=0.5203 OTWV=0.6486 STWV=0.7952 MTWV=0.5218 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9144 , #FA=8922 , #Miss=5335 , Contributed ATWV= 0.5191, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5271 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=44 , #Miss=51 , Contributed ATWV= 0.0012, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0821 +# +# KWS Task performance (TWV), for the set [kwlist3] evaluated on 2016-03-31T12:00:40-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3527 OTWV=0.4568 STWV=0.6002 MTWV=0.3537 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=6954 , #FA=5353 , #Miss=7254 , Contributed ATWV= 0.3477, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3778 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=23 , #FA=232 , #Miss=223 , Contributed ATWV= 0.0049, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0605 +ATWV=0.3997 OTWV=0.5121 STWV=0.7021 MTWV=0.4002 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist3_12/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7407 , #FA=5449 , #Miss=6801 , Contributed ATWV= 0.3919, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4259 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=39 , #FA=307 , #Miss=207 , Contributed ATWV= 0.0076, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0939 +ATWV=0.4102 OTWV=0.5277 STWV=0.7047 MTWV=0.4102 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7795 , #FA=5927 , #Miss=6413 , Contributed ATWV= 0.4033, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4382 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=288 , #Miss=210 , Contributed ATWV= 0.0067, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0822 +ATWV=0.4222 OTWV=0.5278 STWV=0.7066 MTWV=0.4222 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7820 , #FA=5808 , #Miss=6388 , Contributed ATWV= 0.4152, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4511 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=326 , #Miss=210 , Contributed ATWV= 0.0068, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0839 +ATWV=0.4285 OTWV=0.5406 STWV=0.6965 MTWV=0.4286 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8050 , #FA=5500 , #Miss=6158 , Contributed ATWV= 0.4213, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4578 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=34 , #FA=264 , #Miss=212 , Contributed ATWV= 0.0070, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0858 +ATWV=0.4361 OTWV=0.5517 STWV=0.7032 MTWV=0.4361 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8487 , #FA=6339 , #Miss=5721 , Contributed ATWV= 0.4310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4683 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=311 , #Miss=210 , Contributed ATWV= 0.0048, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0594 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:00:53-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2471 OTWV=0.2986 STWV=0.3521 MTWV=0.2471 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1536 , #FA=1187 , #Miss=5058 , Contributed ATWV= 0.2471, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2475 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2738 OTWV=0.3312 STWV=0.3984 MTWV=0.2738 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1588 , #FA=1164 , #Miss=5006 , Contributed ATWV= 0.2738, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2742 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2762 OTWV=0.3345 STWV=0.4011 MTWV=0.2762 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1613 , #FA=1156 , #Miss=4981 , Contributed ATWV= 0.2762, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2766 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2932 OTWV=0.3415 STWV=0.3985 MTWV=0.2981 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1082 , #Miss=4970 , Contributed ATWV= 0.2934, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2938 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2970 OTWV=0.3432 STWV=0.4014 MTWV=0.2970 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1702 , #FA=1132 , #Miss=4892 , Contributed ATWV= 0.2970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2975 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2978 OTWV=0.3444 STWV=0.4035 MTWV=0.2978 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1683 , #FA=1050 , #Miss=4911 , Contributed ATWV= 0.2978, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2983 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:01:05-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2335 OTWV=0.2867 STWV=0.3609 MTWV=0.2337 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/phones/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1443 , #FA=1310 , #Miss=5151 , Contributed ATWV= 0.2336, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2339 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2513 OTWV=0.3174 STWV=0.4034 MTWV=0.2513 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1518 , #FA=1442 , #Miss=5076 , Contributed ATWV= 0.2515, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2519 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2525 OTWV=0.3188 STWV=0.4069 MTWV=0.2583 THRESHOLD=0.444 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1564 , #FA=1489 , #Miss=5030 , Contributed ATWV= 0.2526, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2530 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2575 OTWV=0.3184 STWV=0.3902 MTWV=0.2608 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1544 , #FA=1319 , #Miss=5050 , Contributed ATWV= 0.2575, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2579 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2759 OTWV=0.3294 STWV=0.4067 MTWV=0.2766 THRESHOLD=0.511 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1369 , #Miss=4970 , Contributed ATWV= 0.2760, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2764 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2793 OTWV=0.3306 STWV=0.4042 MTWV=0.2812 THRESHOLD=0.529 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1693 , #FA=1495 , #Miss=4901 , Contributed ATWV= 0.2785, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2790 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:01:23-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2516 OTWV=0.2931 STWV=0.3457 MTWV=0.2518 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1433 , #FA=916 , #Miss=5161 , Contributed ATWV= 0.2516, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2520 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2710 OTWV=0.3243 STWV=0.3971 MTWV=0.2720 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1527 , #FA=1006 , #Miss=5067 , Contributed ATWV= 0.2710, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2715 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2864 OTWV=0.3330 STWV=0.3928 MTWV=0.2864 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1673 , #FA=1135 , #Miss=4921 , Contributed ATWV= 0.2864, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2869 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2874 OTWV=0.3386 STWV=0.4018 MTWV=0.2881 THRESHOLD=0.403 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1591 , #FA=1010 , #Miss=5003 , Contributed ATWV= 0.2874, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2879 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2946 OTWV=0.3463 STWV=0.4046 MTWV=0.2952 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1666 , #FA=1036 , #Miss=4928 , Contributed ATWV= 0.2946, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2951 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist2] evaluated on 2016-03-31T12:01:28-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3298 OTWV=0.4064 STWV=0.4925 MTWV=0.3305 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist2_8/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4881 , #FA=5838 , #Miss=9598 , Contributed ATWV= 0.3281, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3331 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=23 , #Miss=51 , Contributed ATWV= 0.0017, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1105 +ATWV=0.3636 OTWV=0.4527 STWV=0.5672 MTWV=0.3638 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5215 , #FA=6311 , #Miss=9264 , Contributed ATWV= 0.3608, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3663 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=15 , #FA=26 , #Miss=45 , Contributed ATWV= 0.0028, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1873 +ATWV=0.3784 OTWV=0.4622 STWV=0.5703 MTWV=0.3792 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5416 , #FA=6432 , #Miss=9063 , Contributed ATWV= 0.3766, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3824 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=33 , #Miss=49 , Contributed ATWV= 0.0018, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1208 +ATWV=0.3795 OTWV=0.4643 STWV=0.5595 MTWV=0.3795 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5620 , #FA=6171 , #Miss=8859 , Contributed ATWV= 0.3781, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3839 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=29 , #Miss=52 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.3973 OTWV=0.4799 STWV=0.5716 MTWV=0.4011 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5747 , #FA=5988 , #Miss=8732 , Contributed ATWV= 0.3952, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4013 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=26 , #Miss=49 , Contributed ATWV= 0.0020, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1346 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist3] evaluated on 2016-03-31T12:01:38-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2442 OTWV=0.2994 STWV=0.3760 MTWV=0.2442 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3580 , #FA=3520 , #Miss=10628, Contributed ATWV= 0.2378, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2584 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=28 , #FA=145 , #Miss=218 , Contributed ATWV= 0.0064, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0787 +ATWV=0.2681 OTWV=0.3407 STWV=0.4407 MTWV=0.2684 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3688 , #FA=3305 , #Miss=10520, Contributed ATWV= 0.2574, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2797 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=45 , #FA=195 , #Miss=201 , Contributed ATWV= 0.0106, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1306 +ATWV=0.2844 OTWV=0.3499 STWV=0.4441 MTWV=0.2857 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3840 , #FA=3340 , #Miss=10368, Contributed ATWV= 0.2733, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2970 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=44 , #FA=197 , #Miss=202 , Contributed ATWV= 0.0111, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1367 +ATWV=0.2946 OTWV=0.3581 STWV=0.4423 MTWV=0.2948 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3882 , #FA=2874 , #Miss=10326, Contributed ATWV= 0.2804, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3047 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=138 , #Miss=193 , Contributed ATWV= 0.0142, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1751 +ATWV=0.2958 OTWV=0.3658 STWV=0.4485 MTWV=0.2988 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=4068 , #FA=3344 , #Miss=10140, Contributed ATWV= 0.2835, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3081 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=47 , #FA=136 , #Miss=199 , Contributed ATWV= 0.0122, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1504 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:01:55-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1869 OTWV=0.2380 STWV=0.3024 MTWV=0.1869 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1180 , #FA=1168 , #Miss=5414 , Contributed ATWV= 0.1870, Best Possible Contributed ATWV= 0.9984, ATWV= 0.1873 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2043 OTWV=0.2598 STWV=0.3427 MTWV=0.2043 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1281 , #FA=1263 , #Miss=5313 , Contributed ATWV= 0.2045, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2048 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2055 OTWV=0.2591 STWV=0.3340 MTWV=0.2055 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1267 , #FA=1206 , #Miss=5327 , Contributed ATWV= 0.2057, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2060 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=5 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1379 +ATWV=0.2123 OTWV=0.2766 STWV=0.3581 MTWV=0.2149 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1333 , #FA=1274 , #Miss=5261 , Contributed ATWV= 0.2125, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2128 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2216 OTWV=0.2852 STWV=0.3565 MTWV=0.2240 THRESHOLD=0.403 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1371 , #FA=1067 , #Miss=5223 , Contributed ATWV= 0.2209, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2213 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=2 , #Miss=1 , Contributed ATWV= 0.0007, Best Possible Contributed ATWV= 0.0016, ATWV= 0.4448 +ATWV=0.2532 OTWV=0.3121 STWV=0.3808 MTWV=0.2539 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1547 , #FA=1310 , #Miss=5047 , Contributed ATWV= 0.2524, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2528 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist2] evaluated on 2016-03-31T12:02:01-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2686 OTWV=0.3459 STWV=0.4328 MTWV=0.2690 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=3870 , #FA=5258 , #Miss=10609, Contributed ATWV= 0.2670, Best Possible Contributed ATWV= 0.9849, ATWV= 0.2711 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=42 , #Miss=50 , Contributed ATWV= 0.0016, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1055 +ATWV=0.3044 OTWV=0.3970 STWV=0.5154 MTWV=0.3044 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4285 , #FA=5644 , #Miss=10194, Contributed ATWV= 0.3011, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3057 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=16 , #FA=54 , #Miss=44 , Contributed ATWV= 0.0033, Best Possible Contributed ATWV= 0.0151, ATWV= 0.2152 +ATWV=0.3073 OTWV=0.3944 STWV=0.4998 MTWV=0.3079 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch2/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4457 , #FA=6120 , #Miss=10022, Contributed ATWV= 0.3051, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3098 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=13 , #FA=55 , #Miss=47 , Contributed ATWV= 0.0022, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1487 +ATWV=0.3092 OTWV=0.4100 STWV=0.5226 MTWV=0.3125 THRESHOLD=0.465 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4431 , #FA=5723 , #Miss=10048, Contributed ATWV= 0.3078, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3125 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=69 , #Miss=49 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0977 +ATWV=0.3280 OTWV=0.4225 STWV=0.5216 MTWV=0.3291 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4940 , #FA=6266 , #Miss=9539 , Contributed ATWV= 0.3266, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3316 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=63 , #Miss=50 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0911 +ATWV=0.3586 OTWV=0.4552 STWV=0.5519 MTWV=0.3614 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5261 , #FA=6266 , #Miss=9218 , Contributed ATWV= 0.3563, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3618 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=14 , #FA=67 , #Miss=46 , Contributed ATWV= 0.0023, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1531 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist3] evaluated on 2016-03-31T12:02:11-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1931 OTWV=0.2569 STWV=0.3444 MTWV=0.1931 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist3_9/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3015 , #FA=3772 , #Miss=11193, Contributed ATWV= 0.1875, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2037 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=33 , #FA=303 , #Miss=213 , Contributed ATWV= 0.0062, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0759 +ATWV=0.2228 OTWV=0.2982 STWV=0.4154 MTWV=0.2231 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3232 , #FA=3853 , #Miss=10976, Contributed ATWV= 0.2092, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2273 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=57 , #FA=332 , #Miss=189 , Contributed ATWV= 0.0141, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1738 +ATWV=0.2247 OTWV=0.2962 STWV=0.4001 MTWV=0.2247 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3131 , #FA=3232 , #Miss=11077, Contributed ATWV= 0.2122, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2306 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=48 , #FA=278 , #Miss=198 , Contributed ATWV= 0.0131, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1606 +ATWV=0.2320 OTWV=0.3081 STWV=0.4229 MTWV=0.2326 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3378 , #FA=3831 , #Miss=10830, Contributed ATWV= 0.2194, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2384 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=299 , #Miss=193 , Contributed ATWV= 0.0126, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1544 +ATWV=0.2474 OTWV=0.3186 STWV=0.4206 MTWV=0.2476 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3615 , #FA=3812 , #Miss=10593, Contributed ATWV= 0.2310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2510 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=63 , #FA=306 , #Miss=183 , Contributed ATWV= 0.0165, Best Possible Contributed ATWV= 0.0814, ATWV= 0.2023 +ATWV=0.2668 OTWV=0.3433 STWV=0.4457 MTWV=0.2668 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3825 , #FA=3913 , #Miss=10383, Contributed ATWV= 0.2535, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2755 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=59 , #FA=305 , #Miss=187 , Contributed ATWV= 0.0138, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1699 diff --git a/egs/babel/s5d/results/kws_results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 b/egs/babel/s5d/results/kws_results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 new file mode 100644 index 00000000000..1bbdc7dc33a --- /dev/null +++ b/egs/babel/s5d/results/kws_results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:04:48-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4230 OTWV=0.5203 STWV=0.6189 MTWV=0.4235 THRESHOLD=0.473 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6311 , #FA=6437 , #Miss=5805 , Contributed ATWV= 0.4023, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4691 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=89 , #FA=402 , #Miss=391 , Contributed ATWV= 0.0206, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1450 +ATWV=0.4491 OTWV=0.5597 STWV=0.7023 MTWV=0.4494 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6205 , #FA=5950 , #Miss=5911 , Contributed ATWV= 0.4196, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4893 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=107 , #FA=429 , #Miss=373 , Contributed ATWV= 0.0295, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2070 +ATWV=0.4529 OTWV=0.5702 STWV=0.7084 MTWV=0.4529 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6656 , #FA=7401 , #Miss=5460 , Contributed ATWV= 0.4228, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4929 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=120 , #FA=600 , #Miss=360 , Contributed ATWV= 0.0301, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2113 +ATWV=0.4606 OTWV=0.5758 STWV=0.7195 MTWV=0.4606 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6612 , #FA=6706 , #Miss=5504 , Contributed ATWV= 0.4292, Best Possible Contributed ATWV= 0.8576, ATWV= 0.5004 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=118 , #FA=517 , #Miss=362 , Contributed ATWV= 0.0314, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2204 +ATWV=0.4728 OTWV=0.5842 STWV=0.7081 MTWV=0.4728 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6938 , #FA=6197 , #Miss=5178 , Contributed ATWV= 0.4482, Best Possible Contributed ATWV= 0.8576, ATWV= 0.5226 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=96 , #FA=415 , #Miss=384 , Contributed ATWV= 0.0246, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1729 +ATWV=0.4845 OTWV=0.5929 STWV=0.7193 MTWV=0.4847 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=7106 , #FA=6592 , #Miss=5010 , Contributed ATWV= 0.4522, Best Possible Contributed ATWV= 0.8576, ATWV= 0.5273 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=116 , #FA=464 , #Miss=364 , Contributed ATWV= 0.0322, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2264 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:05:02-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3898 OTWV=0.4933 STWV=0.6145 MTWV=0.3899 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4970 , #FA=6619 , #Miss=7146 , Contributed ATWV= 0.3630, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4232 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=99 , #FA=372 , #Miss=381 , Contributed ATWV= 0.0268, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1882 +ATWV=0.4031 OTWV=0.5200 STWV=0.6682 MTWV=0.4031 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5121 , #FA=7137 , #Miss=6995 , Contributed ATWV= 0.3719, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4336 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=114 , #FA=463 , #Miss=366 , Contributed ATWV= 0.0312, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2194 +ATWV=0.4084 OTWV=0.5225 STWV=0.6694 MTWV=0.4094 THRESHOLD=0.465 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5236 , #FA=6998 , #Miss=6880 , Contributed ATWV= 0.3785, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4413 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=108 , #FA=459 , #Miss=372 , Contributed ATWV= 0.0299, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2103 +ATWV=0.4168 OTWV=0.5258 STWV=0.6705 MTWV=0.4171 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5251 , #FA=6798 , #Miss=6865 , Contributed ATWV= 0.3850, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4489 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=116 , #FA=437 , #Miss=364 , Contributed ATWV= 0.0318, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2231 +ATWV=0.4202 OTWV=0.5321 STWV=0.6687 MTWV=0.4209 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5526 , #FA=7152 , #Miss=6590 , Contributed ATWV= 0.3947, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4602 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=92 , #FA=418 , #Miss=388 , Contributed ATWV= 0.0254, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1788 +ATWV=0.4298 OTWV=0.5434 STWV=0.6798 MTWV=0.4315 THRESHOLD=0.453 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5351 , #FA=6564 , #Miss=6765 , Contributed ATWV= 0.3971, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4630 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=116 , #FA=433 , #Miss=364 , Contributed ATWV= 0.0327, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2296 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:05:15-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3717 OTWV=0.4826 STWV=0.6206 MTWV=0.3717 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4873 , #FA=7400 , #Miss=7243 , Contributed ATWV= 0.3453, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4026 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=102 , #FA=464 , #Miss=378 , Contributed ATWV= 0.0264, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1855 +ATWV=0.3794 OTWV=0.4921 STWV=0.6366 MTWV=0.3794 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4976 , #FA=7283 , #Miss=7140 , Contributed ATWV= 0.3504, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4086 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=107 , #FA=475 , #Miss=373 , Contributed ATWV= 0.0290, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2039 +ATWV=0.3803 OTWV=0.4989 STWV=0.6527 MTWV=0.3811 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4802 , #FA=7174 , #Miss=7314 , Contributed ATWV= 0.3507, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4089 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=108 , #FA=485 , #Miss=372 , Contributed ATWV= 0.0296, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2079 +ATWV=0.3865 OTWV=0.5032 STWV=0.6597 MTWV=0.3865 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5057 , #FA=7617 , #Miss=7059 , Contributed ATWV= 0.3569, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4161 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=111 , #FA=541 , #Miss=369 , Contributed ATWV= 0.0297, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2085 +ATWV=0.3987 OTWV=0.5141 STWV=0.6609 MTWV=0.4000 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5421 , #FA=7991 , #Miss=6695 , Contributed ATWV= 0.3758, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4382 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=89 , #FA=545 , #Miss=391 , Contributed ATWV= 0.0229, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1606 +ATWV=0.4089 OTWV=0.5226 STWV=0.6702 MTWV=0.4089 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5226 , #FA=7295 , #Miss=6890 , Contributed ATWV= 0.3793, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4423 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=109 , #FA=490 , #Miss=371 , Contributed ATWV= 0.0296, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2077 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:05:32-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3846 OTWV=0.4898 STWV=0.6140 MTWV=0.3849 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4698 , #FA=6363 , #Miss=7418 , Contributed ATWV= 0.3348, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3904 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=169 , #FA=376 , #Miss=311 , Contributed ATWV= 0.0498, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3497 +ATWV=0.4084 OTWV=0.5296 STWV=0.6808 MTWV=0.4084 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5112 , #FA=7507 , #Miss=7004 , Contributed ATWV= 0.3551, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4140 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=188 , #FA=531 , #Miss=292 , Contributed ATWV= 0.0533, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3747 +ATWV=0.4147 OTWV=0.5426 STWV=0.6942 MTWV=0.4164 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4903 , #FA=6600 , #Miss=7213 , Contributed ATWV= 0.3565, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4157 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=197 , #FA=506 , #Miss=283 , Contributed ATWV= 0.0582, Best Possible Contributed ATWV= 0.1424, ATWV= 0.4086 +ATWV=0.4205 OTWV=0.5421 STWV=0.6920 MTWV=0.4207 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5179 , #FA=7269 , #Miss=6937 , Contributed ATWV= 0.3621, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4222 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=199 , #FA=547 , #Miss=281 , Contributed ATWV= 0.0584, Best Possible Contributed ATWV= 0.1424, ATWV= 0.4099 +ATWV=0.4386 OTWV=0.5595 STWV=0.7003 MTWV=0.4400 THRESHOLD=0.484 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5245 , #FA=6308 , #Miss=6871 , Contributed ATWV= 0.3822, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4456 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=187 , #FA=418 , #Miss=293 , Contributed ATWV= 0.0564, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3962 +ATWV=0.4394 OTWV=0.5585 STWV=0.6927 MTWV=0.4397 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5554 , #FA=7486 , #Miss=6562 , Contributed ATWV= 0.3789, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4418 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=207 , #FA=548 , #Miss=273 , Contributed ATWV= 0.0604, Best Possible Contributed ATWV= 0.1424, ATWV= 0.4246 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:05:47-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1595 OTWV=0.2619 STWV=0.3850 MTWV=0.1602 THRESHOLD=0.503 exp/nnet3/tdnn_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=1949 , #FA=3706 , #Miss=10167, Contributed ATWV= 0.1404, Best Possible Contributed ATWV= 0.8576, ATWV= 0.1637 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=74 , #FA=313 , #Miss=406 , Contributed ATWV= 0.0190, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1338 +ATWV=0.3032 OTWV=0.4062 STWV=0.5289 MTWV=0.3032 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=3866 , #FA=6132 , #Miss=8250 , Contributed ATWV= 0.2606, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3039 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=149 , #FA=450 , #Miss=331 , Contributed ATWV= 0.0425, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2988 +ATWV=0.3355 OTWV=0.4619 STWV=0.6238 MTWV=0.3355 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4070 , #FA=6524 , #Miss=8046 , Contributed ATWV= 0.2849, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3322 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=176 , #FA=559 , #Miss=304 , Contributed ATWV= 0.0506, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3553 +ATWV=0.3368 OTWV=0.4568 STWV=0.6010 MTWV=0.3403 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=3997 , #FA=5729 , #Miss=8119 , Contributed ATWV= 0.2888, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3367 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=169 , #FA=513 , #Miss=311 , Contributed ATWV= 0.0480, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3371 +ATWV=0.3690 OTWV=0.4945 STWV=0.6419 MTWV=0.3701 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4757 , #FA=7077 , #Miss=7359 , Contributed ATWV= 0.3202, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3734 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=169 , #FA=521 , #Miss=311 , Contributed ATWV= 0.0488, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3428 +ATWV=0.3782 OTWV=0.4916 STWV=0.6313 MTWV=0.3786 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4496 , #FA=6091 , #Miss=7620 , Contributed ATWV= 0.3256, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3797 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=183 , #FA=480 , #Miss=297 , Contributed ATWV= 0.0525, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3691 diff --git a/egs/babel/s5d/results/kws_results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 b/egs/babel/s5d/results/kws_results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 new file mode 100644 index 00000000000..f218056412a --- /dev/null +++ b/egs/babel/s5d/results/kws_results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:13:21-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2072 OTWV=0.3242 STWV=0.4752 MTWV=0.2072 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4225 , #FA=6109 , #Miss=10665, Contributed ATWV= 0.2011, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2290 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=38 , #FA=404 , #Miss=388 , Contributed ATWV= 0.0061, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0500 +ATWV=0.2234 OTWV=0.3660 STWV=0.5806 MTWV=0.2244 THRESHOLD=0.473 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4305 , #FA=6650 , #Miss=10585, Contributed ATWV= 0.2101, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2393 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=62 , #FA=515 , #Miss=364 , Contributed ATWV= 0.0133, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1094 +ATWV=0.2386 OTWV=0.3711 STWV=0.5954 MTWV=0.2386 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4267 , #FA=5993 , #Miss=10623, Contributed ATWV= 0.2234, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2544 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=66 , #FA=446 , #Miss=360 , Contributed ATWV= 0.0152, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1243 +ATWV=0.2461 OTWV=0.3869 STWV=0.6098 MTWV=0.2469 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4423 , #FA=5883 , #Miss=10467, Contributed ATWV= 0.2327, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2651 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=63 , #FA=453 , #Miss=363 , Contributed ATWV= 0.0134, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1098 +ATWV=0.2654 OTWV=0.4100 STWV=0.6005 MTWV=0.2672 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=5273 , #FA=6428 , #Miss=9617 , Contributed ATWV= 0.2495, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2842 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=68 , #FA=450 , #Miss=358 , Contributed ATWV= 0.0159, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1302 +ATWV=0.2681 OTWV=0.4076 STWV=0.6090 MTWV=0.2697 THRESHOLD=0.473 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=5143 , #FA=6378 , #Miss=9747 , Contributed ATWV= 0.2519, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2868 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=69 , #FA=462 , #Miss=357 , Contributed ATWV= 0.0163, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1335 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:13:34-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1796 OTWV=0.2970 STWV=0.5017 MTWV=0.1796 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2117 , #FA=5124 , #Miss=12773, Contributed ATWV= 0.1716, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1954 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=45 , #FA=519 , #Miss=381 , Contributed ATWV= 0.0080, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0658 +ATWV=0.1946 OTWV=0.3201 STWV=0.5540 MTWV=0.1946 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2218 , #FA=5557 , #Miss=12672, Contributed ATWV= 0.1817, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2069 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=67 , #FA=599 , #Miss=359 , Contributed ATWV= 0.0129, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1059 +ATWV=0.2035 OTWV=0.3267 STWV=0.5663 MTWV=0.2035 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2384 , #FA=6085 , #Miss=12506, Contributed ATWV= 0.1902, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2167 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=70 , #FA=669 , #Miss=356 , Contributed ATWV= 0.0132, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1084 +ATWV=0.2125 OTWV=0.3434 STWV=0.5569 MTWV=0.2147 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2802 , #FA=6994 , #Miss=12088, Contributed ATWV= 0.2032, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2314 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=63 , #FA=692 , #Miss=363 , Contributed ATWV= 0.0092, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0758 +ATWV=0.2146 OTWV=0.3363 STWV=0.5757 MTWV=0.2146 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2243 , #FA=4805 , #Miss=12647, Contributed ATWV= 0.2025, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2306 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=60 , #FA=550 , #Miss=366 , Contributed ATWV= 0.0121, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0991 +ATWV=0.2233 OTWV=0.3537 STWV=0.5753 MTWV=0.2233 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2472 , #FA=5516 , #Miss=12418, Contributed ATWV= 0.2070, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2357 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=72 , #FA=548 , #Miss=354 , Contributed ATWV= 0.0164, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1343 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:13:46-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1757 OTWV=0.2941 STWV=0.5188 MTWV=0.1757 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2109 , #FA=5112 , #Miss=12781, Contributed ATWV= 0.1661, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1892 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=51 , #FA=566 , #Miss=375 , Contributed ATWV= 0.0096, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0787 +ATWV=0.1885 OTWV=0.3156 STWV=0.5586 MTWV=0.1885 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2171 , #FA=5603 , #Miss=12719, Contributed ATWV= 0.1751, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1994 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=72 , #FA=672 , #Miss=354 , Contributed ATWV= 0.0134, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1103 +ATWV=0.1935 OTWV=0.3237 STWV=0.5717 MTWV=0.1935 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2122 , #FA=4885 , #Miss=12768, Contributed ATWV= 0.1811, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2062 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=62 , #FA=595 , #Miss=364 , Contributed ATWV= 0.0124, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1019 +ATWV=0.2013 OTWV=0.3267 STWV=0.5641 MTWV=0.2014 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2195 , #FA=4829 , #Miss=12695, Contributed ATWV= 0.1891, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2153 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=60 , #FA=592 , #Miss=366 , Contributed ATWV= 0.0123, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1005 +ATWV=0.2087 OTWV=0.3368 STWV=0.5610 MTWV=0.2087 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2348 , #FA=5077 , #Miss=12542, Contributed ATWV= 0.1967, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2240 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=63 , #FA=579 , #Miss=363 , Contributed ATWV= 0.0120, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0984 +ATWV=0.2116 OTWV=0.3465 STWV=0.5804 MTWV=0.2116 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2398 , #FA=5507 , #Miss=12492, Contributed ATWV= 0.1960, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2232 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=69 , #FA=609 , #Miss=357 , Contributed ATWV= 0.0156, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1279 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:14:01-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1708 OTWV=0.2816 STWV=0.4795 MTWV=0.1711 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1913 , #FA=4978 , #Miss=12977, Contributed ATWV= 0.1615, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1839 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=50 , #FA=507 , #Miss=376 , Contributed ATWV= 0.0093, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0764 +ATWV=0.1926 OTWV=0.3156 STWV=0.5617 MTWV=0.1926 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1964 , #FA=4856 , #Miss=12926, Contributed ATWV= 0.1751, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1994 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=79 , #FA=566 , #Miss=347 , Contributed ATWV= 0.0176, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1442 +ATWV=0.1985 OTWV=0.3240 STWV=0.5820 MTWV=0.1985 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1945 , #FA=4804 , #Miss=12945, Contributed ATWV= 0.1794, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2044 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=82 , #FA=557 , #Miss=344 , Contributed ATWV= 0.0191, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1563 +ATWV=0.2054 OTWV=0.3342 STWV=0.5882 MTWV=0.2054 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch3/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2125 , #FA=5218 , #Miss=12765, Contributed ATWV= 0.1875, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2135 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=80 , #FA=626 , #Miss=346 , Contributed ATWV= 0.0179, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1466 +ATWV=0.2126 OTWV=0.3434 STWV=0.5827 MTWV=0.2126 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2207 , #FA=4958 , #Miss=12683, Contributed ATWV= 0.1920, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2186 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=81 , #FA=521 , #Miss=345 , Contributed ATWV= 0.0206, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1692 +ATWV=0.2148 OTWV=0.3452 STWV=0.5808 MTWV=0.2148 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2497 , #FA=6035 , #Miss=12393, Contributed ATWV= 0.1978, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2252 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=79 , #FA=661 , #Miss=347 , Contributed ATWV= 0.0170, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1397 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:14:14-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1379 OTWV=0.2528 STWV=0.4632 MTWV=0.1385 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1422 , #FA=3603 , #Miss=13468, Contributed ATWV= 0.1268, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1444 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=50 , #FA=431 , #Miss=376 , Contributed ATWV= 0.0111, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0907 +ATWV=0.1718 OTWV=0.2920 STWV=0.5386 MTWV=0.1718 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1789 , #FA=4843 , #Miss=13101, Contributed ATWV= 0.1564, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1781 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=71 , #FA=619 , #Miss=355 , Contributed ATWV= 0.0153, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1258 +ATWV=0.1754 OTWV=0.3002 STWV=0.5589 MTWV=0.1754 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1764 , #FA=4651 , #Miss=13126, Contributed ATWV= 0.1573, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1791 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=78 , #FA=592 , #Miss=348 , Contributed ATWV= 0.0181, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1484 +ATWV=0.1768 OTWV=0.3015 STWV=0.5378 MTWV=0.1768 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1823 , #FA=4605 , #Miss=13067, Contributed ATWV= 0.1624, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1849 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=68 , #FA=613 , #Miss=358 , Contributed ATWV= 0.0145, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1188 +ATWV=0.1851 OTWV=0.3170 STWV=0.5671 MTWV=0.1853 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1990 , #FA=4815 , #Miss=12900, Contributed ATWV= 0.1680, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1913 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=74 , #FA=576 , #Miss=352 , Contributed ATWV= 0.0171, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1406 +ATWV=0.1973 OTWV=0.3276 STWV=0.5722 MTWV=0.1981 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2073 , #FA=4741 , #Miss=12817, Contributed ATWV= 0.1803, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2053 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=77 , #FA=601 , #Miss=349 , Contributed ATWV= 0.0170, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1395 diff --git a/egs/babel/s5d/results/kws_results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 b/egs/babel/s5d/results/kws_results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 new file mode 100644 index 00000000000..4e20fac4f56 --- /dev/null +++ b/egs/babel/s5d/results/kws_results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:22:17-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4218 OTWV=0.5044 STWV=0.5838 MTWV=0.4218 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=5437 , #FA=4638 , #Miss=5683 , Contributed ATWV= 0.4123, Best Possible Contributed ATWV= 0.8474, ATWV= 0.4865 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=29 , #FA=122 , #Miss=445 , Contributed ATWV= 0.0096, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0627 +ATWV=0.4619 OTWV=0.5643 STWV=0.6680 MTWV=0.4626 THRESHOLD=0.465 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=6171 , #FA=5956 , #Miss=4949 , Contributed ATWV= 0.4498, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5308 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=41 , #FA=214 , #Miss=433 , Contributed ATWV= 0.0121, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0791 +ATWV=0.4641 OTWV=0.5581 STWV=0.6612 MTWV=0.4641 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=5856 , #FA=4921 , #Miss=5264 , Contributed ATWV= 0.4543, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5361 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=33 , #FA=191 , #Miss=441 , Contributed ATWV= 0.0098, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0644 +ATWV=0.4733 OTWV=0.5691 STWV=0.6747 MTWV=0.4733 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=5952 , #FA=4840 , #Miss=5168 , Contributed ATWV= 0.4608, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5438 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=38 , #FA=188 , #Miss=436 , Contributed ATWV= 0.0125, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0818 +ATWV=0.4843 OTWV=0.5738 STWV=0.6585 MTWV=0.4847 THRESHOLD=0.484 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=6469 , #FA=5159 , #Miss=4651 , Contributed ATWV= 0.4745, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5599 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=34 , #FA=141 , #Miss=440 , Contributed ATWV= 0.0098, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0644 +ATWV=0.4867 OTWV=0.5849 STWV=0.6767 MTWV=0.4879 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=6428 , #FA=4944 , #Miss=4692 , Contributed ATWV= 0.4746, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5601 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=37 , #FA=170 , #Miss=437 , Contributed ATWV= 0.0121, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0792 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:22:31-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2659 OTWV=0.3048 STWV=0.3360 MTWV=0.2665 THRESHOLD=0.444 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/syllabs/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2986 , #FA=2225 , #Miss=8134 , Contributed ATWV= 0.2583, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3048 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=25 , #FA=85 , #Miss=449 , Contributed ATWV= 0.0077, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0502 +ATWV=0.2858 OTWV=0.3350 STWV=0.3711 MTWV=0.2885 THRESHOLD=0.424 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3140 , #FA=2218 , #Miss=7980 , Contributed ATWV= 0.2774, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3274 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=27 , #FA=87 , #Miss=447 , Contributed ATWV= 0.0084, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0548 +ATWV=0.2876 OTWV=0.3334 STWV=0.3663 MTWV=0.2912 THRESHOLD=0.424 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3114 , #FA=2121 , #Miss=8006 , Contributed ATWV= 0.2819, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3327 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=20 , #FA=103 , #Miss=454 , Contributed ATWV= 0.0057, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0371 +ATWV=0.2912 OTWV=0.3367 STWV=0.3742 MTWV=0.2921 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3132 , #FA=2210 , #Miss=7988 , Contributed ATWV= 0.2844, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3357 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=22 , #FA=105 , #Miss=452 , Contributed ATWV= 0.0067, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0441 +ATWV=0.2984 OTWV=0.3436 STWV=0.3773 MTWV=0.2984 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3360 , #FA=2233 , #Miss=7760 , Contributed ATWV= 0.2906, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3429 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=26 , #FA=76 , #Miss=448 , Contributed ATWV= 0.0078, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0511 +ATWV=0.3002 OTWV=0.3415 STWV=0.3713 MTWV=0.3010 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3335 , #FA=2039 , #Miss=7785 , Contributed ATWV= 0.2924, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3451 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=28 , #FA=67 , #Miss=446 , Contributed ATWV= 0.0078, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0511 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:22:44-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2620 OTWV=0.3049 STWV=0.3381 MTWV=0.2624 THRESHOLD=0.444 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2896 , #FA=2268 , #Miss=8224 , Contributed ATWV= 0.2505, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2956 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=36 , #FA=98 , #Miss=438 , Contributed ATWV= 0.0115, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0756 +ATWV=0.2714 OTWV=0.3185 STWV=0.3513 MTWV=0.2753 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2951 , #FA=2075 , #Miss=8169 , Contributed ATWV= 0.2618, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3090 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=31 , #FA=101 , #Miss=443 , Contributed ATWV= 0.0096, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0632 +ATWV=0.2714 OTWV=0.3245 STWV=0.3588 MTWV=0.2765 THRESHOLD=0.365 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2956 , #FA=2157 , #Miss=8164 , Contributed ATWV= 0.2619, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3091 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=32 , #FA=106 , #Miss=442 , Contributed ATWV= 0.0095, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0626 +ATWV=0.2755 OTWV=0.3278 STWV=0.3617 MTWV=0.2787 THRESHOLD=0.41 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3044 , #FA=2236 , #Miss=8076 , Contributed ATWV= 0.2639, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3115 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=38 , #FA=104 , #Miss=436 , Contributed ATWV= 0.0116, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0761 +ATWV=0.2876 OTWV=0.3347 STWV=0.3658 MTWV=0.2941 THRESHOLD=0.41 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3271 , #FA=2252 , #Miss=7849 , Contributed ATWV= 0.2787, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3289 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=31 , #FA=80 , #Miss=443 , Contributed ATWV= 0.0089, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0582 +ATWV=0.2935 OTWV=0.3400 STWV=0.3723 MTWV=0.2953 THRESHOLD=0.473 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3273 , #FA=2245 , #Miss=7847 , Contributed ATWV= 0.2818, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3325 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=38 , #FA=104 , #Miss=436 , Contributed ATWV= 0.0117, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0768 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:23:03-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2646 OTWV=0.3049 STWV=0.3331 MTWV=0.2666 THRESHOLD=0.453 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it3/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2763 , #FA=2036 , #Miss=8357 , Contributed ATWV= 0.2363, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2789 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=86 , #FA=77 , #Miss=388 , Contributed ATWV= 0.0283, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1853 +ATWV=0.3019 OTWV=0.3543 STWV=0.3889 MTWV=0.3067 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2933 , #FA=2017 , #Miss=8187 , Contributed ATWV= 0.2623, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3096 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=120 , #FA=120 , #Miss=354 , Contributed ATWV= 0.0395, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2589 +ATWV=0.3102 OTWV=0.3595 STWV=0.3949 MTWV=0.3138 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2986 , #FA=1904 , #Miss=8134 , Contributed ATWV= 0.2695, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3180 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=123 , #FA=124 , #Miss=351 , Contributed ATWV= 0.0407, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2666 +ATWV=0.3108 OTWV=0.3586 STWV=0.3933 MTWV=0.3121 THRESHOLD=0.424 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3021 , #FA=2041 , #Miss=8099 , Contributed ATWV= 0.2674, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3156 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=136 , #FA=103 , #Miss=338 , Contributed ATWV= 0.0434, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2841 +ATWV=0.3149 OTWV=0.3630 STWV=0.3931 MTWV=0.3198 THRESHOLD=0.399 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3228 , #FA=1870 , #Miss=7892 , Contributed ATWV= 0.2780, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3280 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=118 , #FA=93 , #Miss=356 , Contributed ATWV= 0.0369, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2420 +ATWV=0.3200 OTWV=0.3670 STWV=0.3985 MTWV=0.3222 THRESHOLD=0.403 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3204 , #FA=2050 , #Miss=7916 , Contributed ATWV= 0.2783, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3285 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=132 , #FA=113 , #Miss=342 , Contributed ATWV= 0.0416, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2729 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:23:18-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1461 OTWV=0.1765 STWV=0.1935 MTWV=0.1477 THRESHOLD=0.444 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1504 , #FA=1378 , #Miss=9616 , Contributed ATWV= 0.1281, Best Possible Contributed ATWV= 0.8474, ATWV= 0.1512 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=58 , #FA=79 , #Miss=416 , Contributed ATWV= 0.0180, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1178 +ATWV=0.1866 OTWV=0.2378 STWV=0.2636 MTWV=0.1962 THRESHOLD=0.386 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1677 , #FA=1381 , #Miss=9443 , Contributed ATWV= 0.1586, Best Possible Contributed ATWV= 0.8474, ATWV= 0.1872 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=89 , #FA=93 , #Miss=385 , Contributed ATWV= 0.0281, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1838 +ATWV=0.1946 OTWV=0.2484 STWV=0.2754 MTWV=0.2051 THRESHOLD=0.399 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1704 , #FA=1422 , #Miss=9416 , Contributed ATWV= 0.1643, Best Possible Contributed ATWV= 0.8474, ATWV= 0.1939 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=96 , #FA=110 , #Miss=378 , Contributed ATWV= 0.0303, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1986 +ATWV=0.2026 OTWV=0.2545 STWV=0.2817 MTWV=0.2089 THRESHOLD=0.41 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1821 , #FA=1441 , #Miss=9299 , Contributed ATWV= 0.1707, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2014 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=105 , #FA=121 , #Miss=369 , Contributed ATWV= 0.0319, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2088 +ATWV=0.2288 OTWV=0.2860 STWV=0.3121 MTWV=0.2409 THRESHOLD=0.328 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2169 , #FA=1525 , #Miss=8951 , Contributed ATWV= 0.1938, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2287 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=107 , #FA=86 , #Miss=367 , Contributed ATWV= 0.0350, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2292 +ATWV=0.2408 OTWV=0.2959 STWV=0.3216 MTWV=0.2512 THRESHOLD=0.345 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2322 , #FA=1553 , #Miss=8798 , Contributed ATWV= 0.2054, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2423 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=109 , #FA=92 , #Miss=365 , Contributed ATWV= 0.0355, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2323 diff --git a/egs/babel/s5d/results/kws_results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 b/egs/babel/s5d/results/kws_results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 new file mode 100644 index 00000000000..792b9ca097d --- /dev/null +++ b/egs/babel/s5d/results/kws_results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:25:49-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3254 OTWV=0.4191 STWV=0.5168 MTWV=0.3254 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5022 , #FA=5294 , #Miss=6940 , Contributed ATWV= 0.3223, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3690 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=26 , #FA=235 , #Miss=491 , Contributed ATWV= 0.0031, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0245 +ATWV=0.3668 OTWV=0.4878 STWV=0.6467 MTWV=0.3672 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5496 , #FA=6611 , #Miss=6466 , Contributed ATWV= 0.3598, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4119 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=48 , #FA=403 , #Miss=469 , Contributed ATWV= 0.0071, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0558 +ATWV=0.3767 OTWV=0.4957 STWV=0.6459 MTWV=0.3767 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5698 , #FA=6556 , #Miss=6264 , Contributed ATWV= 0.3715, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4253 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=40 , #FA=378 , #Miss=477 , Contributed ATWV= 0.0051, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0406 +ATWV=0.3866 OTWV=0.5082 STWV=0.6665 MTWV=0.3866 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5644 , #FA=6329 , #Miss=6318 , Contributed ATWV= 0.3801, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4352 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=48 , #FA=401 , #Miss=469 , Contributed ATWV= 0.0065, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0511 +ATWV=0.4033 OTWV=0.5188 STWV=0.6543 MTWV=0.4034 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=6255 , #FA=6413 , #Miss=5707 , Contributed ATWV= 0.3950, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4522 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=52 , #FA=364 , #Miss=465 , Contributed ATWV= 0.0083, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0655 +ATWV=0.4131 OTWV=0.5198 STWV=0.6353 MTWV=0.4131 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=6396 , #FA=6133 , #Miss=5566 , Contributed ATWV= 0.4068, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4657 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=41 , #FA=330 , #Miss=476 , Contributed ATWV= 0.0063, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0496 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:26:02-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3102 OTWV=0.4044 STWV=0.5008 MTWV=0.3102 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3950 , #FA=5521 , #Miss=8012 , Contributed ATWV= 0.3063, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3506 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=29 , #FA=261 , #Miss=488 , Contributed ATWV= 0.0039, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0311 +ATWV=0.3475 OTWV=0.4589 STWV=0.6057 MTWV=0.3482 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4245 , #FA=6029 , #Miss=7717 , Contributed ATWV= 0.3417, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3912 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=39 , #FA=325 , #Miss=478 , Contributed ATWV= 0.0058, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0461 +ATWV=0.3567 OTWV=0.4704 STWV=0.6093 MTWV=0.3575 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4372 , #FA=6005 , #Miss=7590 , Contributed ATWV= 0.3513, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4022 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=38 , #FA=329 , #Miss=479 , Contributed ATWV= 0.0054, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0429 +ATWV=0.3667 OTWV=0.4738 STWV=0.6193 MTWV=0.3674 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4281 , #FA=5292 , #Miss=7681 , Contributed ATWV= 0.3606, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4128 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=40 , #FA=311 , #Miss=477 , Contributed ATWV= 0.0061, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0481 +ATWV=0.3798 OTWV=0.4888 STWV=0.6133 MTWV=0.3799 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4820 , #FA=5800 , #Miss=7142 , Contributed ATWV= 0.3729, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4269 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=47 , #FA=314 , #Miss=470 , Contributed ATWV= 0.0070, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0551 +ATWV=0.3907 OTWV=0.4929 STWV=0.6005 MTWV=0.3907 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5147 , #FA=6646 , #Miss=6815 , Contributed ATWV= 0.3840, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4397 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=45 , #FA=314 , #Miss=472 , Contributed ATWV= 0.0066, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0523 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:26:15-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2937 OTWV=0.3986 STWV=0.5124 MTWV=0.2937 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4370 , #FA=6963 , #Miss=7592 , Contributed ATWV= 0.2878, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3294 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=40 , #FA=371 , #Miss=477 , Contributed ATWV= 0.0059, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0467 +ATWV=0.3182 OTWV=0.4371 STWV=0.5936 MTWV=0.3182 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4479 , #FA=7039 , #Miss=7483 , Contributed ATWV= 0.3118, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3570 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=44 , #FA=444 , #Miss=473 , Contributed ATWV= 0.0063, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0500 +ATWV=0.3274 OTWV=0.4496 STWV=0.6019 MTWV=0.3282 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4562 , #FA=6505 , #Miss=7400 , Contributed ATWV= 0.3206, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3671 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=42 , #FA=370 , #Miss=475 , Contributed ATWV= 0.0068, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0538 +ATWV=0.3444 OTWV=0.4580 STWV=0.6076 MTWV=0.3446 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4491 , #FA=6127 , #Miss=7471 , Contributed ATWV= 0.3361, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3848 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=49 , #FA=396 , #Miss=468 , Contributed ATWV= 0.0083, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0657 +ATWV=0.3515 OTWV=0.4684 STWV=0.6055 MTWV=0.3520 THRESHOLD=0.465 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4966 , #FA=6072 , #Miss=6996 , Contributed ATWV= 0.3427, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3923 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=51 , #FA=321 , #Miss=466 , Contributed ATWV= 0.0088, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0694 +ATWV=0.3624 OTWV=0.4732 STWV=0.5981 MTWV=0.3624 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5315 , #FA=6787 , #Miss=6647 , Contributed ATWV= 0.3542, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4055 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=51 , #FA=356 , #Miss=466 , Contributed ATWV= 0.0082, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0645 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:26:32-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2989 OTWV=0.3850 STWV=0.4696 MTWV=0.2989 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3952 , #FA=4049 , #Miss=8010 , Contributed ATWV= 0.2858, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3272 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=62 , #FA=188 , #Miss=455 , Contributed ATWV= 0.0132, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1041 +ATWV=0.3556 OTWV=0.4644 STWV=0.6019 MTWV=0.3556 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4743 , #FA=6603 , #Miss=7219 , Contributed ATWV= 0.3340, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3823 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=101 , #FA=434 , #Miss=416 , Contributed ATWV= 0.0216, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1708 +ATWV=0.3697 OTWV=0.4791 STWV=0.6110 MTWV=0.3711 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4891 , #FA=6482 , #Miss=7071 , Contributed ATWV= 0.3430, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3927 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=117 , #FA=391 , #Miss=400 , Contributed ATWV= 0.0267, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2109 +ATWV=0.3746 OTWV=0.4805 STWV=0.6185 MTWV=0.3746 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4733 , #FA=5416 , #Miss=7229 , Contributed ATWV= 0.3540, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4053 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=97 , #FA=371 , #Miss=420 , Contributed ATWV= 0.0206, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1629 +ATWV=0.3906 OTWV=0.4954 STWV=0.6114 MTWV=0.3933 THRESHOLD=0.453 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5223 , #FA=5433 , #Miss=6739 , Contributed ATWV= 0.3644, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4172 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=113 , #FA=300 , #Miss=404 , Contributed ATWV= 0.0262, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2072 +ATWV=0.4026 OTWV=0.5039 STWV=0.6123 MTWV=0.4026 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5625 , #FA=6694 , #Miss=6337 , Contributed ATWV= 0.3753, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4296 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=134 , #FA=378 , #Miss=383 , Contributed ATWV= 0.0273, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2160 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:26:46-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2248 OTWV=0.3051 STWV=0.3995 MTWV=0.2248 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3143 , #FA=4419 , #Miss=8819 , Contributed ATWV= 0.2067, Best Possible Contributed ATWV= 0.8735, ATWV= 0.2366 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=87 , #FA=253 , #Miss=430 , Contributed ATWV= 0.0181, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1433 +ATWV=0.2716 OTWV=0.3791 STWV=0.5246 MTWV=0.2724 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3613 , #FA=5664 , #Miss=8349 , Contributed ATWV= 0.2479, Best Possible Contributed ATWV= 0.8735, ATWV= 0.2838 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=110 , #FA=432 , #Miss=407 , Contributed ATWV= 0.0237, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1873 +ATWV=0.2862 OTWV=0.3915 STWV=0.5291 MTWV=0.2874 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3593 , #FA=5029 , #Miss=8369 , Contributed ATWV= 0.2615, Best Possible Contributed ATWV= 0.8735, ATWV= 0.2993 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=111 , #FA=379 , #Miss=406 , Contributed ATWV= 0.0248, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1959 +ATWV=0.2923 OTWV=0.4062 STWV=0.5508 MTWV=0.2923 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4069 , #FA=6808 , #Miss=7893 , Contributed ATWV= 0.2672, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3059 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=122 , #FA=492 , #Miss=395 , Contributed ATWV= 0.0251, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1987 +ATWV=0.3254 OTWV=0.4319 STWV=0.5579 MTWV=0.3254 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4524 , #FA=6093 , #Miss=7438 , Contributed ATWV= 0.2980, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3411 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=130 , #FA=445 , #Miss=387 , Contributed ATWV= 0.0274, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2169 +ATWV=0.3392 OTWV=0.4519 STWV=0.5786 MTWV=0.3392 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4870 , #FA=6666 , #Miss=7092 , Contributed ATWV= 0.3122, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3574 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=133 , #FA=466 , #Miss=384 , Contributed ATWV= 0.0270, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2133 diff --git a/egs/babel/s5d/results/kws_results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 b/egs/babel/s5d/results/kws_results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 new file mode 100644 index 00000000000..1997692642e --- /dev/null +++ b/egs/babel/s5d/results/kws_results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:28:25-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2977 OTWV=0.3916 STWV=0.4944 MTWV=0.2977 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4395 , #FA=5271 , #Miss=7275 , Contributed ATWV= 0.2900, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3348 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=47 , #FA=376 , #Miss=465 , Contributed ATWV= 0.0077, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0574 +ATWV=0.3094 OTWV=0.4251 STWV=0.5824 MTWV=0.3094 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4362 , #FA=5310 , #Miss=7308 , Contributed ATWV= 0.2966, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3424 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=69 , #FA=394 , #Miss=443 , Contributed ATWV= 0.0128, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0957 +ATWV=0.3215 OTWV=0.4319 STWV=0.5834 MTWV=0.3228 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4438 , #FA=4879 , #Miss=7232 , Contributed ATWV= 0.3089, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3566 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=68 , #FA=381 , #Miss=444 , Contributed ATWV= 0.0126, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0942 +ATWV=0.3272 OTWV=0.4381 STWV=0.5897 MTWV=0.3272 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4487 , #FA=4749 , #Miss=7183 , Contributed ATWV= 0.3106, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3585 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=83 , #FA=376 , #Miss=429 , Contributed ATWV= 0.0166, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1242 +ATWV=0.3477 OTWV=0.4611 STWV=0.5871 MTWV=0.3478 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=5253 , #FA=5693 , #Miss=6417 , Contributed ATWV= 0.3363, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3883 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=67 , #FA=450 , #Miss=445 , Contributed ATWV= 0.0114, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0849 +ATWV=0.3543 OTWV=0.4720 STWV=0.6040 MTWV=0.3543 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=5318 , #FA=5634 , #Miss=6352 , Contributed ATWV= 0.3414, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3942 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=72 , #FA=448 , #Miss=440 , Contributed ATWV= 0.0129, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0965 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:28:39-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2682 OTWV=0.3601 STWV=0.4696 MTWV=0.2690 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3255 , #FA=5332 , #Miss=8415 , Contributed ATWV= 0.2627, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3032 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=40 , #FA=397 , #Miss=472 , Contributed ATWV= 0.0056, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0418 +ATWV=0.2748 OTWV=0.3792 STWV=0.5133 MTWV=0.2748 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3036 , #FA=4436 , #Miss=8634 , Contributed ATWV= 0.2646, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3055 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=54 , #FA=371 , #Miss=458 , Contributed ATWV= 0.0102, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0759 +ATWV=0.2823 OTWV=0.3883 STWV=0.5214 MTWV=0.2823 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3344 , #FA=5218 , #Miss=8326 , Contributed ATWV= 0.2711, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3130 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=61 , #FA=428 , #Miss=451 , Contributed ATWV= 0.0112, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0836 +ATWV=0.2874 OTWV=0.3903 STWV=0.5191 MTWV=0.2874 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3274 , #FA=4680 , #Miss=8396 , Contributed ATWV= 0.2740, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3163 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=66 , #FA=364 , #Miss=446 , Contributed ATWV= 0.0134, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1001 +ATWV=0.3076 OTWV=0.4101 STWV=0.5223 MTWV=0.3089 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3617 , #FA=4797 , #Miss=8053 , Contributed ATWV= 0.2968, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3427 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=61 , #FA=376 , #Miss=451 , Contributed ATWV= 0.0107, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0802 +ATWV=0.3083 OTWV=0.4154 STWV=0.5354 MTWV=0.3085 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3813 , #FA=5545 , #Miss=7857 , Contributed ATWV= 0.2996, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3459 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=64 , #FA=463 , #Miss=448 , Contributed ATWV= 0.0087, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0649 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:28:54-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2624 OTWV=0.3740 STWV=0.5112 MTWV=0.2624 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3562 , #FA=6509 , #Miss=8108 , Contributed ATWV= 0.2483, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2866 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=74 , #FA=531 , #Miss=438 , Contributed ATWV= 0.0141, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1056 +ATWV=0.2657 OTWV=0.3687 STWV=0.4899 MTWV=0.2657 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3151 , #FA=4641 , #Miss=8519 , Contributed ATWV= 0.2540, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2932 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=52 , #FA=319 , #Miss=460 , Contributed ATWV= 0.0117, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0876 +ATWV=0.2779 OTWV=0.3829 STWV=0.5065 MTWV=0.2779 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3384 , #FA=5003 , #Miss=8286 , Contributed ATWV= 0.2626, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3032 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=74 , #FA=397 , #Miss=438 , Contributed ATWV= 0.0153, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1140 +ATWV=0.2802 OTWV=0.3862 STWV=0.5240 MTWV=0.2802 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3436 , #FA=5211 , #Miss=8234 , Contributed ATWV= 0.2655, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3065 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=73 , #FA=443 , #Miss=439 , Contributed ATWV= 0.0147, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1099 +ATWV=0.2970 OTWV=0.4030 STWV=0.5268 MTWV=0.2974 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3835 , #FA=5650 , #Miss=7835 , Contributed ATWV= 0.2816, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3251 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=75 , #FA=434 , #Miss=437 , Contributed ATWV= 0.0154, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1153 +ATWV=0.2994 OTWV=0.4095 STWV=0.5369 MTWV=0.2994 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3723 , #FA=4888 , #Miss=7947 , Contributed ATWV= 0.2855, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3295 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=67 , #FA=390 , #Miss=445 , Contributed ATWV= 0.0139, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1042 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:29:11-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2666 OTWV=0.3554 STWV=0.4513 MTWV=0.2666 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3223 , #FA=4631 , #Miss=8447 , Contributed ATWV= 0.2475, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2857 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=76 , #FA=317 , #Miss=436 , Contributed ATWV= 0.0190, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1423 +ATWV=0.2823 OTWV=0.3864 STWV=0.5230 MTWV=0.2823 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3220 , #FA=4390 , #Miss=8450 , Contributed ATWV= 0.2590, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2990 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=94 , #FA=356 , #Miss=418 , Contributed ATWV= 0.0233, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1743 +ATWV=0.2944 OTWV=0.3988 STWV=0.5324 MTWV=0.2946 THRESHOLD=0.511 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3523 , #FA=5214 , #Miss=8147 , Contributed ATWV= 0.2679, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3092 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=115 , #FA=439 , #Miss=397 , Contributed ATWV= 0.0265, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1981 +ATWV=0.2985 OTWV=0.4008 STWV=0.5278 MTWV=0.2995 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3476 , #FA=4605 , #Miss=8194 , Contributed ATWV= 0.2747, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3171 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=105 , #FA=395 , #Miss=407 , Contributed ATWV= 0.0238, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1776 +ATWV=0.3182 OTWV=0.4262 STWV=0.5392 MTWV=0.3205 THRESHOLD=0.465 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3855 , #FA=4523 , #Miss=7815 , Contributed ATWV= 0.2976, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3436 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=91 , #FA=395 , #Miss=421 , Contributed ATWV= 0.0205, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1535 +ATWV=0.3279 OTWV=0.4355 STWV=0.5492 MTWV=0.3295 THRESHOLD=0.484 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4086 , #FA=5301 , #Miss=7584 , Contributed ATWV= 0.3054, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3525 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=98 , #FA=407 , #Miss=414 , Contributed ATWV= 0.0225, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1685 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:29:26-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2158 OTWV=0.3080 STWV=0.4193 MTWV=0.2158 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it1/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2501 , #FA=4203 , #Miss=9169 , Contributed ATWV= 0.1991, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2298 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=80 , #FA=387 , #Miss=432 , Contributed ATWV= 0.0167, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1247 +ATWV=0.2176 OTWV=0.3302 STWV=0.4778 MTWV=0.2176 THRESHOLD=0.491 exp/nnet3/tdnn_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2243 , #FA=3837 , #Miss=9427 , Contributed ATWV= 0.1924, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2222 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=105 , #FA=385 , #Miss=407 , Contributed ATWV= 0.0252, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1882 +ATWV=0.2444 OTWV=0.3568 STWV=0.4926 MTWV=0.2444 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2734 , #FA=4943 , #Miss=8936 , Contributed ATWV= 0.2189, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2527 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=110 , #FA=453 , #Miss=402 , Contributed ATWV= 0.0255, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1904 +ATWV=0.2464 OTWV=0.3638 STWV=0.5166 MTWV=0.2471 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2730 , #FA=5051 , #Miss=8940 , Contributed ATWV= 0.2168, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2503 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=120 , #FA=492 , #Miss=392 , Contributed ATWV= 0.0296, Best Possible Contributed ATWV= 0.1338, ATWV= 0.2216 +ATWV=0.2765 OTWV=0.3905 STWV=0.5268 MTWV=0.2782 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3216 , #FA=4675 , #Miss=8454 , Contributed ATWV= 0.2526, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2916 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=109 , #FA=449 , #Miss=403 , Contributed ATWV= 0.0239, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1786 +ATWV=0.2787 OTWV=0.3901 STWV=0.5224 MTWV=0.2799 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3448 , #FA=5571 , #Miss=8222 , Contributed ATWV= 0.2547, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2940 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=106 , #FA=532 , #Miss=406 , Contributed ATWV= 0.0241, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1799 diff --git a/egs/babel/s5d/results/kws_results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 b/egs/babel/s5d/results/kws_results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 new file mode 100644 index 00000000000..87e1bef6be4 --- /dev/null +++ b/egs/babel/s5d/results/kws_results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:30:41-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4364 OTWV=0.5200 STWV=0.6280 MTWV=0.4364 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=6368 , #FA=4863 , #Miss=6347 , Contributed ATWV= 0.4198, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4785 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=77 , #FA=402 , #Miss=401 , Contributed ATWV= 0.0167, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1359 +ATWV=0.4773 OTWV=0.5774 STWV=0.7209 MTWV=0.4782 THRESHOLD=0.444 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=6550 , #FA=4633 , #Miss=6165 , Contributed ATWV= 0.4556, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5193 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=95 , #FA=405 , #Miss=383 , Contributed ATWV= 0.0218, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1775 +ATWV=0.4854 OTWV=0.5811 STWV=0.7340 MTWV=0.4860 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=6988 , #FA=5965 , #Miss=5727 , Contributed ATWV= 0.4637, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5287 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=107 , #FA=644 , #Miss=371 , Contributed ATWV= 0.0216, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1761 +ATWV=0.4866 OTWV=0.5909 STWV=0.7347 MTWV=0.4870 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=7147 , #FA=6632 , #Miss=5568 , Contributed ATWV= 0.4645, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5295 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=119 , #FA=751 , #Miss=359 , Contributed ATWV= 0.0221, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1800 +ATWV=0.5068 OTWV=0.6090 STWV=0.7323 MTWV=0.5068 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=7333 , #FA=5227 , #Miss=5382 , Contributed ATWV= 0.4791, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5462 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=120 , #FA=509 , #Miss=358 , Contributed ATWV= 0.0277, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2252 +ATWV=0.5099 OTWV=0.6070 STWV=0.7211 MTWV=0.5099 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=7727 , #FA=5750 , #Miss=4988 , Contributed ATWV= 0.4855, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5534 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=125 , #FA=706 , #Miss=353 , Contributed ATWV= 0.0244, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1986 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:30:54-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3801 OTWV=0.4607 STWV=0.5692 MTWV=0.3801 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4182 , #FA=4840 , #Miss=8533 , Contributed ATWV= 0.3608, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4113 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=89 , #FA=390 , #Miss=389 , Contributed ATWV= 0.0192, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1567 +ATWV=0.4019 OTWV=0.4952 STWV=0.6210 MTWV=0.4021 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4476 , #FA=5265 , #Miss=8239 , Contributed ATWV= 0.3789, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4319 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=104 , #FA=460 , #Miss=374 , Contributed ATWV= 0.0230, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1877 +ATWV=0.4112 OTWV=0.5090 STWV=0.6304 MTWV=0.4127 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4556 , #FA=5228 , #Miss=8159 , Contributed ATWV= 0.3877, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4420 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=111 , #FA=427 , #Miss=367 , Contributed ATWV= 0.0235, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1917 +ATWV=0.4120 OTWV=0.5003 STWV=0.6302 MTWV=0.4120 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4705 , #FA=5876 , #Miss=8010 , Contributed ATWV= 0.3897, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4443 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=107 , #FA=541 , #Miss=371 , Contributed ATWV= 0.0223, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1819 +ATWV=0.4233 OTWV=0.5140 STWV=0.6209 MTWV=0.4233 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4715 , #FA=4781 , #Miss=8000 , Contributed ATWV= 0.3982, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4539 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=112 , #FA=375 , #Miss=366 , Contributed ATWV= 0.0251, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2045 +ATWV=0.4281 OTWV=0.5186 STWV=0.6279 MTWV=0.4284 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4813 , #FA=5241 , #Miss=7902 , Contributed ATWV= 0.4030, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4594 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=116 , #FA=452 , #Miss=362 , Contributed ATWV= 0.0251, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2042 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:31:07-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3739 OTWV=0.4614 STWV=0.5894 MTWV=0.3739 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4006 , #FA=4874 , #Miss=8709 , Contributed ATWV= 0.3525, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4018 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=95 , #FA=399 , #Miss=383 , Contributed ATWV= 0.0214, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1746 +ATWV=0.3912 OTWV=0.4914 STWV=0.6376 MTWV=0.3912 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4234 , #FA=5261 , #Miss=8481 , Contributed ATWV= 0.3664, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4177 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=112 , #FA=457 , #Miss=366 , Contributed ATWV= 0.0248, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2017 +ATWV=0.3944 OTWV=0.4934 STWV=0.6385 MTWV=0.3952 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4363 , #FA=5715 , #Miss=8352 , Contributed ATWV= 0.3691, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4208 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=116 , #FA=533 , #Miss=362 , Contributed ATWV= 0.0252, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2054 +ATWV=0.3992 OTWV=0.5032 STWV=0.6463 MTWV=0.3992 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4299 , #FA=5287 , #Miss=8416 , Contributed ATWV= 0.3731, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4254 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=116 , #FA=460 , #Miss=362 , Contributed ATWV= 0.0260, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2120 +ATWV=0.4131 OTWV=0.5074 STWV=0.6366 MTWV=0.4131 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4722 , #FA=5944 , #Miss=7993 , Contributed ATWV= 0.3883, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4427 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=118 , #FA=543 , #Miss=360 , Contributed ATWV= 0.0248, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2019 +ATWV=0.4192 OTWV=0.5136 STWV=0.6432 MTWV=0.4197 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4432 , #FA=4967 , #Miss=8283 , Contributed ATWV= 0.3911, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4458 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=117 , #FA=389 , #Miss=361 , Contributed ATWV= 0.0282, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2295 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:31:26-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3641 OTWV=0.4420 STWV=0.5488 MTWV=0.3641 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3840 , #FA=4117 , #Miss=8875 , Contributed ATWV= 0.3404, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3880 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=102 , #FA=267 , #Miss=376 , Contributed ATWV= 0.0237, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1931 +ATWV=0.4029 OTWV=0.4987 STWV=0.6333 MTWV=0.4039 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4170 , #FA=4646 , #Miss=8545 , Contributed ATWV= 0.3693, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4210 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=144 , #FA=414 , #Miss=334 , Contributed ATWV= 0.0336, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2737 +ATWV=0.4079 OTWV=0.5034 STWV=0.6391 MTWV=0.4079 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4361 , #FA=5099 , #Miss=8354 , Contributed ATWV= 0.3750, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4275 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=144 , #FA=464 , #Miss=334 , Contributed ATWV= 0.0329, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2682 +ATWV=0.4153 OTWV=0.5120 STWV=0.6440 MTWV=0.4159 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4550 , #FA=5777 , #Miss=8165 , Contributed ATWV= 0.3786, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4316 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=159 , #FA=500 , #Miss=319 , Contributed ATWV= 0.0367, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2992 +ATWV=0.4222 OTWV=0.5174 STWV=0.6342 MTWV=0.4224 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4814 , #FA=5842 , #Miss=7901 , Contributed ATWV= 0.3888, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4432 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=147 , #FA=483 , #Miss=331 , Contributed ATWV= 0.0334, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2724 +ATWV=0.4288 OTWV=0.5196 STWV=0.6299 MTWV=0.4288 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4781 , #FA=5182 , #Miss=7934 , Contributed ATWV= 0.3956, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4509 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=146 , #FA=414 , #Miss=332 , Contributed ATWV= 0.0333, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2709 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:31:41-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3217 OTWV=0.4088 STWV=0.5365 MTWV=0.3225 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3439 , #FA=4619 , #Miss=9276 , Contributed ATWV= 0.2980, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3397 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=107 , #FA=370 , #Miss=371 , Contributed ATWV= 0.0237, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1928 +ATWV=0.3625 OTWV=0.4603 STWV=0.6156 MTWV=0.3625 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3584 , #FA=4514 , #Miss=9131 , Contributed ATWV= 0.3277, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3736 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=145 , #FA=365 , #Miss=333 , Contributed ATWV= 0.0348, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2837 +ATWV=0.3648 OTWV=0.4659 STWV=0.6258 MTWV=0.3650 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3728 , #FA=4997 , #Miss=8987 , Contributed ATWV= 0.3283, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3742 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=153 , #FA=426 , #Miss=325 , Contributed ATWV= 0.0365, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2971 +ATWV=0.3776 OTWV=0.4805 STWV=0.6324 MTWV=0.3779 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4000 , #FA=5730 , #Miss=8715 , Contributed ATWV= 0.3437, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3918 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=150 , #FA=500 , #Miss=328 , Contributed ATWV= 0.0339, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2759 +ATWV=0.3885 OTWV=0.4943 STWV=0.6300 MTWV=0.3904 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4304 , #FA=5863 , #Miss=8411 , Contributed ATWV= 0.3553, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4051 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=149 , #FA=466 , #Miss=329 , Contributed ATWV= 0.0332, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2702 +ATWV=0.3993 OTWV=0.4998 STWV=0.6357 MTWV=0.4003 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4480 , #FA=5920 , #Miss=8235 , Contributed ATWV= 0.3667, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4180 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=146 , #FA=498 , #Miss=332 , Contributed ATWV= 0.0327, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2662 diff --git a/egs/babel/s5d/results/results.101-cantonese-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 b/egs/babel/s5d/results/results.101-cantonese-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 new file mode 100644 index 00000000000..0b03f645904 --- /dev/null +++ b/egs/babel/s5d/results/results.101-cantonese-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 @@ -0,0 +1,28 @@ +#Created on 2016-02-18T12:15:22-0500 by local/best_scores.sh +# +# +# STT Task performance (WER), evaluated on 2016-02-18T12:20:23-0500 +%WER 50.4 | 10001 82932 | 57.3 32.0 10.7 7.6 50.4 79.0 | -1.280 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.2 | 10001 82932 | 50.9 37.9 11.1 10.1 59.2 81.7 | -1.687 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.0 | 10001 82932 | 55.5 30.2 14.3 6.5 51.0 80.0 | -0.722 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.3 | 10001 82932 | 59.7 30.6 9.8 7.0 47.3 77.1 | -1.079 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (CER), evaluated on 2016-02-18T12:20:24-0500 +%WER 43.5 | 10001 104181 | 62.5 29.8 7.6 6.1 43.5 78.6 | -1.082 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.char.ctm.sys +%WER 52.3 | 10001 104181 | 55.5 35.9 8.6 7.8 52.3 81.5 | -1.384 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.char.ctm.sys +%WER 43.9 | 10001 104181 | 62.2 28.5 9.3 6.0 43.9 80.1 | -0.627 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.char.ctm.sys +%WER 40.5 | 10001 104181 | 64.3 28.2 7.5 4.8 40.5 76.7 | -0.854 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.char.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T22:47:05-0500 +%WER 50.4 | 10001 82932 | 57.3 32.0 10.7 7.6 50.4 79.0 | -1.280 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.2 | 10001 82932 | 50.9 37.9 11.1 10.1 59.2 81.7 | -1.687 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.0 | 10001 82932 | 55.5 30.2 14.3 6.5 51.0 80.0 | -0.722 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 43.6 | 10001 82932 | 60.4 27.7 11.9 4.0 43.6 73.1 | -0.439 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 47.3 | 10001 82932 | 59.7 30.6 9.8 7.0 47.3 77.1 | -1.079 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (CER), evaluated on 2016-02-19T22:47:09-0500 +%WER 43.5 | 10001 104181 | 62.5 29.8 7.6 6.1 43.5 78.6 | -1.082 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.char.ctm.sys +%WER 52.3 | 10001 104181 | 55.5 35.9 8.6 7.8 52.3 81.5 | -1.384 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.char.ctm.sys +%WER 43.9 | 10001 104181 | 62.2 28.5 9.3 6.0 43.9 80.1 | -0.627 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.char.ctm.sys +%WER 37.0 | 10001 104181 | 65.6 25.3 9.1 2.6 37.0 72.6 | -0.301 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.char.ctm.sys +%WER 40.5 | 10001 104181 | 64.3 28.2 7.5 4.8 40.5 76.7 | -0.854 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.char.ctm.sys diff --git a/egs/babel/s5d/results/results.102-assamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 b/egs/babel/s5d/results/results.102-assamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 new file mode 100644 index 00000000000..00aa7af8149 --- /dev/null +++ b/egs/babel/s5d/results/results.102-assamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 @@ -0,0 +1,27 @@ +#Created on 2015-11-27T17:53:08-0500 +# +# STT Task performance (WER) +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 52.3 37.1 10.7 4.9 52.6 28.2 | -0.763 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.8 35.3 10.8 4.3 50.5 27.2 | -0.860 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:20:59-0500 +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 51.6 36.6 11.8 4.2 52.6 28.1 | -0.671 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.5 34.9 11.6 4.0 50.5 27.3 | -0.803 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:34:05-0500 +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 51.6 36.6 11.8 4.2 52.6 28.1 | -0.671 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.9 | 22313 52407 | 53.3 33.1 13.6 3.2 49.9 27.4 | -0.580 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/score_10/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.5 34.9 11.6 4.0 50.5 27.3 | -0.803 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T18:29:11-0500 +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 51.6 36.6 11.8 4.2 52.6 28.1 | -0.671 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.2 | 22313 52407 | 53.9 32.4 13.7 3.2 49.2 27.3 | -0.554 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.5 34.9 11.6 4.0 50.5 27.3 | -0.803 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.103-bengali-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 b/egs/babel/s5d/results/results.103-bengali-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 new file mode 100644 index 00000000000..64b03ac3178 --- /dev/null +++ b/egs/babel/s5d/results/results.103-bengali-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 @@ -0,0 +1,22 @@ +#Created on 2015-12-01T16:49:23-0500 +# +# STT Task performance (WER) +%WER 63.4 | 22224 57152 | 41.7 46.0 12.3 5.1 63.4 31.3 | -1.288 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 55.8 | 22224 57152 | 48.8 39.6 11.6 4.6 55.8 30.1 | -0.794 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 55.4 | 22224 57152 | 48.3 38.4 13.3 3.7 55.4 30.0 | -0.540 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 54.0 | 22224 57152 | 49.6 37.0 13.5 3.6 54.0 29.7 | -0.713 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_17/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:21:33-0500 +%WER 58.7 | 22224 57152 | 44.9 40.1 15.0 3.6 58.7 30.5 | -0.491 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.8 | 22224 57152 | 48.3 38.9 12.8 4.1 55.8 30.1 | -0.723 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 63.4 | 22224 57152 | 41.7 46.0 12.3 5.1 63.4 31.3 | -1.288 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 55.4 | 22224 57152 | 48.3 38.4 13.3 3.7 55.4 30.0 | -0.540 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 54.0 | 22224 57152 | 49.3 36.5 14.2 3.3 54.0 29.7 | -0.676 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:34:29-0500 +%WER 58.7 | 22224 57152 | 44.9 40.1 15.0 3.6 58.7 30.5 | -0.491 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.8 | 22224 57152 | 48.3 38.9 12.8 4.1 55.8 30.1 | -0.723 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 63.4 | 22224 57152 | 41.7 46.0 12.3 5.1 63.4 31.3 | -1.288 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 55.4 | 22224 57152 | 48.3 38.4 13.3 3.7 55.4 30.0 | -0.540 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 52.2 | 22224 57152 | 50.7 33.9 15.3 2.9 52.2 29.6 | -0.453 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 54.0 | 22224 57152 | 49.3 36.5 14.2 3.3 54.0 29.7 | -0.676 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.104-pashto-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 b/egs/babel/s5d/results/results.104-pashto-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 new file mode 100644 index 00000000000..a085787d6d3 --- /dev/null +++ b/egs/babel/s5d/results/results.104-pashto-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 @@ -0,0 +1,22 @@ +#Created on 2015-11-28T14:48:47-0500 +# +# STT Task performance (WER) +%WER 58.4 | 21825 101803 | 46.1 38.4 15.5 4.5 58.4 32.8 | -1.124 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.4 | 21825 101803 | 53.9 32.8 13.3 4.3 50.4 31.4 | -0.735 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 50.7 | 21825 101803 | 52.8 31.9 15.3 3.5 50.7 31.6 | -0.652 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 21825 101803 | 55.0 32.3 12.8 4.2 49.3 31.0 | -0.739 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:21:44-0500 +%WER 53.4 | 21825 101803 | 50.6 34.1 15.3 4.0 53.4 32.1 | -0.608 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21825 101803 | 53.9 32.8 13.3 4.3 50.4 31.4 | -0.735 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.4 | 21825 101803 | 46.1 38.4 15.5 4.5 58.4 32.8 | -1.124 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21825 101803 | 52.8 31.9 15.3 3.5 50.7 31.6 | -0.652 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 21825 101803 | 54.6 31.8 13.6 3.9 49.3 31.0 | -0.671 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:04-0500 +%WER 53.4 | 21825 101803 | 50.6 34.1 15.3 4.0 53.4 32.1 | -0.608 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21825 101803 | 53.9 32.8 13.3 4.3 50.4 31.4 | -0.735 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.4 | 21825 101803 | 46.1 38.4 15.5 4.5 58.4 32.8 | -1.124 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21825 101803 | 52.8 31.9 15.3 3.5 50.7 31.6 | -0.652 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 47.0 | 21825 101803 | 56.6 30.1 13.3 3.6 47.0 30.7 | -0.541 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 21825 101803 | 54.6 31.8 13.6 3.9 49.3 31.0 | -0.671 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 b/egs/babel/s5d/results/results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 new file mode 100644 index 00000000000..c1b66fb5daf --- /dev/null +++ b/egs/babel/s5d/results/results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 @@ -0,0 +1,242 @@ +#Created on 2016-02-25T15:45:46-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T15:45:46-05:00 +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-25T17:03:34-05:00 +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 21825 101803 | 48.5 39.7 11.8 6.6 58.1 32.6 | -1.226 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 21825 101803 | 56.4 31.2 12.3 4.6 48.1 31.0 | -0.638 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T19:48:53-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 50.4 | 21825 101803 | 55.5 34.1 10.4 5.9 50.4 31.0 | -0.669 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_16/dev10h.pem.ctm.sys +%WER 42.3 | 21825 101803 | 61.0 26.6 12.3 3.3 42.3 30.0 | -1.260 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 21825 101803 | 48.5 39.7 11.8 6.6 58.1 32.6 | -1.226 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21825 101803 | 44.2 42.6 13.3 6.5 62.3 32.9 | -0.955 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 21825 101803 | 56.4 31.2 12.3 4.6 48.1 31.0 | -0.638 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.8 | 21825 101803 | 57.7 28.7 13.7 4.4 46.8 30.8 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T10:07:57-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 50.4 | 21825 101803 | 55.5 34.1 10.4 5.9 50.4 31.0 | -0.669 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_16/dev10h.pem.ctm.sys +%WER 42.3 | 21825 101803 | 61.0 26.6 12.3 3.3 42.3 30.0 | -1.260 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 43.7 | 21825 101803 | 60.3 27.8 12.0 4.0 43.7 30.3 | -1.051 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 46.6 | 21825 101803 | 57.7 29.7 12.6 4.3 46.6 30.8 | -0.740 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 21825 101803 | 48.5 39.7 11.8 6.6 58.1 32.6 | -1.226 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21825 101803 | 44.2 42.6 13.3 6.5 62.3 32.9 | -0.955 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 21825 101803 | 56.4 31.2 12.3 4.6 48.1 31.0 | -0.638 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.8 | 21825 101803 | 57.7 28.7 13.7 4.4 46.8 30.8 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +# +# KWS Task performance (TWV), for the set kwlist evaluated on 2016-03-31T11:30:04-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4114 OTWV=0.5171 STWV=0.6713 MTWV=0.4128 THRESHOLD=0.453 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3606 , #FA=1935 , #Miss=2988 , Contributed ATWV= 0.4114, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4121 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4639 OTWV=0.5790 STWV=0.7779 MTWV=0.4639 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3948 , #FA=2450 , #Miss=2646 , Contributed ATWV= 0.4639, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4646 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4670 OTWV=0.5932 STWV=0.7799 MTWV=0.4685 THRESHOLD=0.453 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3914 , #FA=2016 , #Miss=2680 , Contributed ATWV= 0.4670, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4677 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4940 OTWV=0.6072 STWV=0.7751 MTWV=0.4940 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4297 , #FA=2623 , #Miss=2297 , Contributed ATWV= 0.4940, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4948 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4970 OTWV=0.6016 STWV=0.7837 MTWV=0.4985 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4145 , #FA=2538 , #Miss=2449 , Contributed ATWV= 0.4970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4977 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.5174 OTWV=0.6324 STWV=0.7958 MTWV=0.5183 THRESHOLD=0.433 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4312 , #FA=2156 , #Miss=2282 , Contributed ATWV= 0.5174, Best Possible Contributed ATWV= 0.9984, ATWV= 0.5182 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), for the set kwlist2 evaluated on 2016-03-31T11:30:12-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4371 OTWV=0.5527 STWV=0.6904 MTWV=0.4372 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=7695 , #FA=8671 , #Miss=6784 , Contributed ATWV= 0.4356, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4423 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=32 , #Miss=50 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.4822 OTWV=0.6082 STWV=0.7912 MTWV=0.4822 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8278 , #FA=9303 , #Miss=6201 , Contributed ATWV= 0.4808, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4882 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=12 , #FA=60 , #Miss=48 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0924 +ATWV=0.4920 OTWV=0.6156 STWV=0.7891 MTWV=0.4920 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8698 , #FA=10346, #Miss=5781 , Contributed ATWV= 0.4913, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4989 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=59 , #Miss=52 , Contributed ATWV= 0.0006, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0427 +ATWV=0.5006 OTWV=0.6216 STWV=0.7975 MTWV=0.5006 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8552 , #FA=9419 , #Miss=5927 , Contributed ATWV= 0.4992, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5069 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=55 , #Miss=49 , Contributed ATWV= 0.0013, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0873 +ATWV=0.5077 OTWV=0.6291 STWV=0.7819 MTWV=0.5077 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9060 , #FA=10188, #Miss=5419 , Contributed ATWV= 0.5073, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5150 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=7 , #FA=64 , #Miss=53 , Contributed ATWV= 0.0005, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0325 +ATWV=0.5203 OTWV=0.6486 STWV=0.7952 MTWV=0.5218 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9144 , #FA=8922 , #Miss=5335 , Contributed ATWV= 0.5191, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5271 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=44 , #Miss=51 , Contributed ATWV= 0.0012, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0821 +# +# KWS Task performance (TWV), for the set kwlist3 evaluated on 2016-03-31T11:30:24-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3527 OTWV=0.4568 STWV=0.6002 MTWV=0.3537 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=6954 , #FA=5353 , #Miss=7254 , Contributed ATWV= 0.3477, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3778 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=23 , #FA=232 , #Miss=223 , Contributed ATWV= 0.0049, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0605 +ATWV=0.3997 OTWV=0.5121 STWV=0.7021 MTWV=0.4002 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist3_12/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7407 , #FA=5449 , #Miss=6801 , Contributed ATWV= 0.3919, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4259 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=39 , #FA=307 , #Miss=207 , Contributed ATWV= 0.0076, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0939 +ATWV=0.4102 OTWV=0.5277 STWV=0.7047 MTWV=0.4102 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7795 , #FA=5927 , #Miss=6413 , Contributed ATWV= 0.4033, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4382 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=288 , #Miss=210 , Contributed ATWV= 0.0067, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0822 +ATWV=0.4222 OTWV=0.5278 STWV=0.7066 MTWV=0.4222 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7820 , #FA=5808 , #Miss=6388 , Contributed ATWV= 0.4152, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4511 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=326 , #Miss=210 , Contributed ATWV= 0.0068, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0839 +ATWV=0.4285 OTWV=0.5406 STWV=0.6965 MTWV=0.4286 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8050 , #FA=5500 , #Miss=6158 , Contributed ATWV= 0.4213, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4578 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=34 , #FA=264 , #Miss=212 , Contributed ATWV= 0.0070, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0858 +ATWV=0.4361 OTWV=0.5517 STWV=0.7032 MTWV=0.4361 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8487 , #FA=6339 , #Miss=5721 , Contributed ATWV= 0.4310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4683 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=311 , #Miss=210 , Contributed ATWV= 0.0048, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0594 +# +# KWS Task performance (TWV), syllabic search for the set kwlist evaluated on 2016-03-31T11:30:38-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2471 OTWV=0.2986 STWV=0.3521 MTWV=0.2471 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1536 , #FA=1187 , #Miss=5058 , Contributed ATWV= 0.2471, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2475 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2738 OTWV=0.3312 STWV=0.3984 MTWV=0.2738 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1588 , #FA=1164 , #Miss=5006 , Contributed ATWV= 0.2738, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2742 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2762 OTWV=0.3345 STWV=0.4011 MTWV=0.2762 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1613 , #FA=1156 , #Miss=4981 , Contributed ATWV= 0.2762, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2766 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2932 OTWV=0.3415 STWV=0.3985 MTWV=0.2981 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1082 , #Miss=4970 , Contributed ATWV= 0.2934, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2938 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2970 OTWV=0.3432 STWV=0.4014 MTWV=0.2970 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1702 , #FA=1132 , #Miss=4892 , Contributed ATWV= 0.2970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2975 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2978 OTWV=0.3444 STWV=0.4035 MTWV=0.2978 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1683 , #FA=1050 , #Miss=4911 , Contributed ATWV= 0.2978, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2983 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), phonetic search for the set kwlist evaluated on 2016-03-31T11:30:51-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2335 OTWV=0.2867 STWV=0.3609 MTWV=0.2337 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/phones/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1443 , #FA=1310 , #Miss=5151 , Contributed ATWV= 0.2336, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2339 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2513 OTWV=0.3174 STWV=0.4034 MTWV=0.2513 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1518 , #FA=1442 , #Miss=5076 , Contributed ATWV= 0.2515, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2519 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2525 OTWV=0.3188 STWV=0.4069 MTWV=0.2583 THRESHOLD=0.444 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1564 , #FA=1489 , #Miss=5030 , Contributed ATWV= 0.2526, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2530 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2575 OTWV=0.3184 STWV=0.3902 MTWV=0.2608 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1544 , #FA=1319 , #Miss=5050 , Contributed ATWV= 0.2575, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2579 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2759 OTWV=0.3294 STWV=0.4067 MTWV=0.2766 THRESHOLD=0.511 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1369 , #Miss=4970 , Contributed ATWV= 0.2760, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2764 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2793 OTWV=0.3306 STWV=0.4042 MTWV=0.2812 THRESHOLD=0.529 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1693 , #FA=1495 , #Miss=4901 , Contributed ATWV= 0.2785, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2790 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +declare -ax kwsets='([0]="kwlist" [1]="kwlist2" [2]="kwlist3")' +# +# KWS Task performance (TWV), for the set kwlist evaluated on 2016-03-31T11:31:11-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2516 OTWV=0.2931 STWV=0.3457 MTWV=0.2518 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1433 , #FA=916 , #Miss=5161 , Contributed ATWV= 0.2516, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2520 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2710 OTWV=0.3243 STWV=0.3971 MTWV=0.2720 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1527 , #FA=1006 , #Miss=5067 , Contributed ATWV= 0.2710, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2715 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2864 OTWV=0.3330 STWV=0.3928 MTWV=0.2864 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1673 , #FA=1135 , #Miss=4921 , Contributed ATWV= 0.2864, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2869 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2874 OTWV=0.3386 STWV=0.4018 MTWV=0.2881 THRESHOLD=0.403 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1591 , #FA=1010 , #Miss=5003 , Contributed ATWV= 0.2874, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2879 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2946 OTWV=0.3463 STWV=0.4046 MTWV=0.2952 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1666 , #FA=1036 , #Miss=4928 , Contributed ATWV= 0.2946, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2951 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +# +# KWS Task performance (TWV), for the set kwlist2 evaluated on 2016-03-31T11:31:16-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3298 OTWV=0.4064 STWV=0.4925 MTWV=0.3305 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist2_8/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4881 , #FA=5838 , #Miss=9598 , Contributed ATWV= 0.3281, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3331 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=23 , #Miss=51 , Contributed ATWV= 0.0017, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1105 +ATWV=0.3636 OTWV=0.4527 STWV=0.5672 MTWV=0.3638 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5215 , #FA=6311 , #Miss=9264 , Contributed ATWV= 0.3608, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3663 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=15 , #FA=26 , #Miss=45 , Contributed ATWV= 0.0028, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1873 +ATWV=0.3784 OTWV=0.4622 STWV=0.5703 MTWV=0.3792 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5416 , #FA=6432 , #Miss=9063 , Contributed ATWV= 0.3766, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3824 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=33 , #Miss=49 , Contributed ATWV= 0.0018, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1208 +ATWV=0.3795 OTWV=0.4643 STWV=0.5595 MTWV=0.3795 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5620 , #FA=6171 , #Miss=8859 , Contributed ATWV= 0.3781, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3839 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=29 , #Miss=52 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.3973 OTWV=0.4799 STWV=0.5716 MTWV=0.4011 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5747 , #FA=5988 , #Miss=8732 , Contributed ATWV= 0.3952, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4013 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=26 , #Miss=49 , Contributed ATWV= 0.0020, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1346 +# +# KWS Task performance (TWV), for the set kwlist3 evaluated on 2016-03-31T11:31:26-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2442 OTWV=0.2994 STWV=0.3760 MTWV=0.2442 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3580 , #FA=3520 , #Miss=10628, Contributed ATWV= 0.2378, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2584 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=28 , #FA=145 , #Miss=218 , Contributed ATWV= 0.0064, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0787 +ATWV=0.2681 OTWV=0.3407 STWV=0.4407 MTWV=0.2684 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3688 , #FA=3305 , #Miss=10520, Contributed ATWV= 0.2574, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2797 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=45 , #FA=195 , #Miss=201 , Contributed ATWV= 0.0106, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1306 +ATWV=0.2844 OTWV=0.3499 STWV=0.4441 MTWV=0.2857 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3840 , #FA=3340 , #Miss=10368, Contributed ATWV= 0.2733, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2970 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=44 , #FA=197 , #Miss=202 , Contributed ATWV= 0.0111, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1367 +ATWV=0.2946 OTWV=0.3581 STWV=0.4423 MTWV=0.2948 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3882 , #FA=2874 , #Miss=10326, Contributed ATWV= 0.2804, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3047 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=138 , #Miss=193 , Contributed ATWV= 0.0142, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1751 +ATWV=0.2958 OTWV=0.3658 STWV=0.4485 MTWV=0.2988 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=4068 , #FA=3344 , #Miss=10140, Contributed ATWV= 0.2835, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3081 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=47 , #FA=136 , #Miss=199 , Contributed ATWV= 0.0122, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1504 +declare -ax kwsets='([0]="kwlist" [1]="kwlist2" [2]="kwlist3")' +# +# KWS Task performance (TWV), for the set kwlist evaluated on 2016-03-31T11:31:47-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1869 OTWV=0.2380 STWV=0.3024 MTWV=0.1869 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1180 , #FA=1168 , #Miss=5414 , Contributed ATWV= 0.1870, Best Possible Contributed ATWV= 0.9984, ATWV= 0.1873 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2043 OTWV=0.2598 STWV=0.3427 MTWV=0.2043 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1281 , #FA=1263 , #Miss=5313 , Contributed ATWV= 0.2045, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2048 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2055 OTWV=0.2591 STWV=0.3340 MTWV=0.2055 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1267 , #FA=1206 , #Miss=5327 , Contributed ATWV= 0.2057, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2060 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=5 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1379 +ATWV=0.2123 OTWV=0.2766 STWV=0.3581 MTWV=0.2149 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1333 , #FA=1274 , #Miss=5261 , Contributed ATWV= 0.2125, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2128 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2216 OTWV=0.2852 STWV=0.3565 MTWV=0.2240 THRESHOLD=0.403 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1371 , #FA=1067 , #Miss=5223 , Contributed ATWV= 0.2209, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2213 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=2 , #Miss=1 , Contributed ATWV= 0.0007, Best Possible Contributed ATWV= 0.0016, ATWV= 0.4448 +ATWV=0.2532 OTWV=0.3121 STWV=0.3808 MTWV=0.2539 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1547 , #FA=1310 , #Miss=5047 , Contributed ATWV= 0.2524, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2528 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +# +# KWS Task performance (TWV), for the set kwlist2 evaluated on 2016-03-31T11:31:53-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2686 OTWV=0.3459 STWV=0.4328 MTWV=0.2690 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=3870 , #FA=5258 , #Miss=10609, Contributed ATWV= 0.2670, Best Possible Contributed ATWV= 0.9849, ATWV= 0.2711 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=42 , #Miss=50 , Contributed ATWV= 0.0016, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1055 +ATWV=0.3044 OTWV=0.3970 STWV=0.5154 MTWV=0.3044 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4285 , #FA=5644 , #Miss=10194, Contributed ATWV= 0.3011, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3057 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=16 , #FA=54 , #Miss=44 , Contributed ATWV= 0.0033, Best Possible Contributed ATWV= 0.0151, ATWV= 0.2152 +ATWV=0.3073 OTWV=0.3944 STWV=0.4998 MTWV=0.3079 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch2/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4457 , #FA=6120 , #Miss=10022, Contributed ATWV= 0.3051, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3098 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=13 , #FA=55 , #Miss=47 , Contributed ATWV= 0.0022, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1487 +ATWV=0.3092 OTWV=0.4100 STWV=0.5226 MTWV=0.3125 THRESHOLD=0.465 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4431 , #FA=5723 , #Miss=10048, Contributed ATWV= 0.3078, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3125 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=69 , #Miss=49 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0977 +ATWV=0.3280 OTWV=0.4225 STWV=0.5216 MTWV=0.3291 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4940 , #FA=6266 , #Miss=9539 , Contributed ATWV= 0.3266, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3316 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=63 , #Miss=50 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0911 +ATWV=0.3586 OTWV=0.4552 STWV=0.5519 MTWV=0.3614 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5261 , #FA=6266 , #Miss=9218 , Contributed ATWV= 0.3563, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3618 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=14 , #FA=67 , #Miss=46 , Contributed ATWV= 0.0023, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1531 +# +# KWS Task performance (TWV), for the set kwlist3 evaluated on 2016-03-31T11:32:05-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1931 OTWV=0.2569 STWV=0.3444 MTWV=0.1931 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist3_9/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3015 , #FA=3772 , #Miss=11193, Contributed ATWV= 0.1875, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2037 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=33 , #FA=303 , #Miss=213 , Contributed ATWV= 0.0062, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0759 +ATWV=0.2228 OTWV=0.2982 STWV=0.4154 MTWV=0.2231 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3232 , #FA=3853 , #Miss=10976, Contributed ATWV= 0.2092, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2273 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=57 , #FA=332 , #Miss=189 , Contributed ATWV= 0.0141, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1738 +ATWV=0.2247 OTWV=0.2962 STWV=0.4001 MTWV=0.2247 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3131 , #FA=3232 , #Miss=11077, Contributed ATWV= 0.2122, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2306 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=48 , #FA=278 , #Miss=198 , Contributed ATWV= 0.0131, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1606 +ATWV=0.2320 OTWV=0.3081 STWV=0.4229 MTWV=0.2326 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3378 , #FA=3831 , #Miss=10830, Contributed ATWV= 0.2194, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2384 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=299 , #Miss=193 , Contributed ATWV= 0.0126, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1544 +ATWV=0.2474 OTWV=0.3186 STWV=0.4206 MTWV=0.2476 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3615 , #FA=3812 , #Miss=10593, Contributed ATWV= 0.2310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2510 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=63 , #FA=306 , #Miss=183 , Contributed ATWV= 0.0165, Best Possible Contributed ATWV= 0.0814, ATWV= 0.2023 +ATWV=0.2668 OTWV=0.3433 STWV=0.4457 MTWV=0.2668 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3825 , #FA=3913 , #Miss=10383, Contributed ATWV= 0.2535, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2755 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=59 , #FA=305 , #Miss=187 , Contributed ATWV= 0.0138, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1699 diff --git a/egs/babel/s5d/results/results.105-turkish-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 b/egs/babel/s5d/results/results.105-turkish-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 new file mode 100644 index 00000000000..b76de49ffe3 --- /dev/null +++ b/egs/babel/s5d/results/results.105-turkish-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 @@ -0,0 +1,22 @@ +#Created on 2015-11-28T14:43:17-0500 +# +# STT Task performance (WER) +%WER 57.6 | 22070 54382 | 47.9 41.3 10.8 5.4 57.6 30.8 | -1.174 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 22070 54382 | 57.0 34.0 9.0 4.7 47.7 29.1 | -0.571 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 47.3 | 22070 54382 | 56.9 33.4 9.6 4.2 47.3 29.1 | -0.489 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 46.2 | 22070 54382 | 58.2 32.6 9.2 4.3 46.2 28.5 | -0.560 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:21:55-0500 +%WER 50.7 | 22070 54382 | 53.6 35.5 10.9 4.2 50.7 29.9 | -0.382 | exp/sgmm5/decode_fmllr_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 47.7 | 22070 54382 | 56.5 33.6 9.9 4.2 47.7 29.1 | -0.506 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 57.6 | 22070 54382 | 47.9 41.3 10.8 5.4 57.6 30.8 | -1.174 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.3 | 22070 54382 | 56.5 33.0 10.5 3.7 47.3 29.1 | -0.389 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 22070 54382 | 58.2 32.6 9.2 4.3 46.2 28.5 | -0.560 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:31-0500 +%WER 50.7 | 22070 54382 | 53.6 35.5 10.9 4.2 50.7 29.9 | -0.382 | exp/sgmm5/decode_fmllr_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 47.7 | 22070 54382 | 56.5 33.6 9.9 4.2 47.7 29.1 | -0.506 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 57.6 | 22070 54382 | 47.9 41.3 10.8 5.4 57.6 30.8 | -1.174 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.3 | 22070 54382 | 56.5 33.0 10.5 3.7 47.3 29.1 | -0.389 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 43.8 | 22070 54382 | 60.1 30.1 9.8 3.8 43.8 27.8 | -0.361 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 46.2 | 22070 54382 | 58.2 32.6 9.2 4.3 46.2 28.5 | -0.560 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.106-tagalog-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 b/egs/babel/s5d/results/results.106-tagalog-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 new file mode 100644 index 00000000000..efa5bc3288c --- /dev/null +++ b/egs/babel/s5d/results/results.106-tagalog-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 @@ -0,0 +1,22 @@ +#Created on 2015-11-27T16:50:17-0500 +# +# STT Task performance (WER) +%WER 56.5 | 25332 63009 | 49.6 37.6 12.8 6.0 56.5 32.0 | -1.196 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.0 | 25332 63009 | 56.7 31.7 11.6 4.7 48.0 30.2 | -0.746 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 48.1 | 25332 63009 | 56.2 31.0 12.8 4.3 48.1 30.3 | -0.477 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 25332 63009 | 58.0 30.5 11.5 4.2 46.2 30.0 | -0.682 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:22:58-0500 +%WER 51.1 | 25332 63009 | 53.2 32.5 14.3 4.3 51.1 31.1 | -0.459 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 48.0 | 25332 63009 | 56.7 31.7 11.6 4.7 48.0 30.2 | -0.746 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 56.5 | 25332 63009 | 49.6 37.6 12.8 6.0 56.5 32.0 | -1.196 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 25332 63009 | 56.2 31.0 12.8 4.3 48.1 30.3 | -0.477 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 25332 63009 | 58.0 30.5 11.5 4.2 46.2 30.0 | -0.682 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:40-0500 +%WER 51.1 | 25332 63009 | 53.2 32.5 14.3 4.3 51.1 31.1 | -0.459 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 48.0 | 25332 63009 | 56.7 31.7 11.6 4.7 48.0 30.2 | -0.746 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 56.5 | 25332 63009 | 49.6 37.6 12.8 6.0 56.5 32.0 | -1.196 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 25332 63009 | 56.2 31.0 12.8 4.3 48.1 30.3 | -0.477 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 43.9 | 25332 63009 | 59.5 28.8 11.7 3.4 43.9 29.2 | -0.386 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 25332 63009 | 58.0 30.5 11.5 4.2 46.2 30.0 | -0.682 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.107-vietnamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 b/egs/babel/s5d/results/results.107-vietnamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 new file mode 100644 index 00000000000..7d5da8e0f39 --- /dev/null +++ b/egs/babel/s5d/results/results.107-vietnamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 @@ -0,0 +1,21 @@ +#Created on 2015-11-27T16:51:53-0500 +# +# STT Task performance (WER) +%WER 58.0 | 21875 111957 | 45.0 42.3 12.7 3.0 58.0 36.6 | -1.024 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 52.5 36.5 11.0 2.9 50.4 35.8 | -0.644 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 49.0 | 21875 111957 | 53.4 33.4 13.3 2.4 49.0 35.8 | -0.442 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.6 | 21875 111957 | 53.1 36.2 10.7 2.7 49.6 35.4 | -0.606 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:23:27-0500 +%WER 53.6 | 21875 111957 | 49.4 38.4 12.2 3.0 53.6 36.4 | -0.501 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 52.5 36.5 11.0 2.9 50.4 35.8 | -0.644 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.0 | 21875 111957 | 45.0 42.3 12.7 3.0 58.0 36.6 | -1.024 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.0 | 21875 111957 | 53.4 33.4 13.3 2.4 49.0 35.8 | -0.442 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.6 | 21875 111957 | 53.0 35.6 11.4 2.6 49.6 35.4 | -0.548 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:55-0500 +%WER 53.6 | 21875 111957 | 49.4 38.4 12.2 3.0 53.6 36.4 | -0.501 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 52.5 36.5 11.0 2.9 50.4 35.8 | -0.644 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.0 | 21875 111957 | 45.0 42.3 12.7 3.0 58.0 36.6 | -1.024 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.0 | 21875 111957 | 53.4 33.4 13.3 2.4 49.0 35.8 | -0.442 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.6 | 21875 111957 | 53.0 35.6 11.4 2.6 49.6 35.4 | -0.548 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.201-haitian-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 b/egs/babel/s5d/results/results.201-haitian-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 new file mode 100644 index 00000000000..143944daa01 --- /dev/null +++ b/egs/babel/s5d/results/results.201-haitian-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 @@ -0,0 +1,21 @@ +#Created on 2016-02-18T11:46:09-0500 +# +# STT Task performance (WER) +%WER 56.6 | 21530 83682 | 47.1 35.9 16.9 3.8 56.6 33.1 | -0.984 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.5 | 21530 83682 | 54.3 31.2 14.5 3.8 49.5 32.1 | -0.672 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 49.2 | 21530 83682 | 54.2 30.8 15.0 3.4 49.2 32.0 | -0.537 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.1 | 21530 83682 | 54.3 30.2 15.5 3.4 49.1 31.9 | -0.636 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:42-0500 +%WER 56.6 | 21530 83682 | 47.1 35.9 16.9 3.8 56.6 33.1 | -0.984 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.5 | 21530 83682 | 54.3 31.2 14.5 3.8 49.5 32.1 | -0.672 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 49.2 | 21530 83682 | 54.2 30.8 15.0 3.4 49.2 32.0 | -0.537 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 46.2 | 21530 83682 | 56.4 27.0 16.6 2.6 46.2 31.4 | -0.484 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 49.1 | 21530 83682 | 54.3 30.2 15.5 3.4 49.1 31.9 | -0.636 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:37:00-0500 +%WER 49.5 | 21530 83682 | 54.3 31.2 14.5 3.8 49.5 32.1 | -0.672 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 56.6 | 21530 83682 | 47.1 35.9 16.9 3.8 56.6 33.1 | -0.984 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.2 | 21530 83682 | 53.7 30.2 16.1 2.9 49.2 31.9 | -0.465 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 21530 83682 | 56.4 27.0 16.6 2.6 46.2 31.4 | -0.484 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 49.1 | 21530 83682 | 54.3 30.2 15.5 3.4 49.1 31.9 | -0.636 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.202-swahili.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 b/egs/babel/s5d/results/results.202-swahili.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 new file mode 100644 index 00000000000..faa73c05ecb --- /dev/null +++ b/egs/babel/s5d/results/results.202-swahili.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 @@ -0,0 +1,27 @@ +#Created on 2016-02-21T10:25:47-0500 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-21T10:25:47-0500 +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T06:37:59-05:00 +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +%WER 54.1 | 23781 62345 | 53.8 37.5 8.7 7.9 54.1 30.7 | -1.869 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 43.7 | 23781 62345 | 61.1 30.2 8.7 4.8 43.7 28.6 | -0.713 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.0 | 23781 62345 | 61.1 26.9 12.0 4.1 43.0 28.7 | -0.631 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T20:34:46-05:00 +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +%WER 54.1 | 23781 62345 | 53.8 37.5 8.7 7.9 54.1 30.7 | -1.869 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 43.7 | 23781 62345 | 61.1 30.2 8.7 4.8 43.7 28.6 | -0.713 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.0 | 23781 62345 | 61.1 26.9 12.0 4.1 43.0 28.7 | -0.631 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_12/dev10h.pem.ctm.sys +%WER 44.8 | 23781 62345 | 60.1 30.8 9.2 4.9 44.8 28.6 | -0.702 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T22:36:11-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 44.8 | 23781 62345 | 60.1 30.8 9.2 4.9 44.8 28.6 | -0.702 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 60.0 | 23781 62345 | 43.2 35.3 21.5 3.2 60.0 32.4 | -0.909 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 63.9 | 23781 62345 | 39.6 36.7 23.6 3.5 63.9 33.1 | -1.153 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +%WER 54.1 | 23781 62345 | 53.8 37.5 8.7 7.9 54.1 30.7 | -1.869 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.7 | 23781 62345 | 47.9 40.2 11.9 6.6 58.7 31.8 | -1.355 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 43.7 | 23781 62345 | 61.1 30.2 8.7 4.8 43.7 28.6 | -0.713 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.0 | 23781 62345 | 61.1 26.9 12.0 4.1 43.0 28.7 | -0.631 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_12/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.203-lao-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 b/egs/babel/s5d/results/results.203-lao-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 new file mode 100644 index 00000000000..66d7a71f598 --- /dev/null +++ b/egs/babel/s5d/results/results.203-lao-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 @@ -0,0 +1,14 @@ +#Created on 2015-12-01T16:50:41-0500 +# +# STT Task performance (WER) +%WER 53.4 | 25158 82801 | 51.8 35.4 12.7 5.2 53.4 34.4 | -1.131 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 46.6 | 25158 82801 | 58.2 31.2 10.6 4.8 46.6 33.2 | -0.792 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 47.0 | 25158 82801 | 57.3 30.6 12.2 4.3 47.0 33.5 | -0.645 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 44.3 | 25158 82801 | 59.9 30.1 10.0 4.2 44.3 32.6 | -0.740 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:37-0500 +%WER 53.4 | 25158 82801 | 51.8 35.4 12.7 5.2 53.4 34.4 | -1.131 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 46.6 | 25158 82801 | 58.2 31.2 10.6 4.8 46.6 33.2 | -0.792 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 47.0 | 25158 82801 | 57.3 30.6 12.2 4.3 47.0 33.5 | -0.645 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 42.9 | 25158 82801 | 60.2 27.5 12.2 3.1 42.9 32.5 | -0.492 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 44.3 | 25158 82801 | 59.9 30.1 10.0 4.2 44.3 32.6 | -0.740 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.204-tamil-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 b/egs/babel/s5d/results/results.204-tamil-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 new file mode 100644 index 00000000000..e4dfcd5a5c2 --- /dev/null +++ b/egs/babel/s5d/results/results.204-tamil-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 @@ -0,0 +1,8 @@ +#Created on 2015-12-01T19:55:42-0500 +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:14-0500 +%WER 74.2 | 22178 60033 | 30.1 51.6 18.3 4.3 74.2 36.3 | -1.744 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 67.8 | 22178 60033 | 36.1 47.5 16.4 3.8 67.8 35.0 | -1.220 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_10/dev10h.pem.ctm.sys +%WER 68.1 | 22178 60033 | 35.2 46.5 18.2 3.3 68.1 35.5 | -0.900 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 65.1 | 22178 60033 | 38.0 44.1 18.0 3.1 65.1 34.6 | -0.759 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 66.8 | 22178 60033 | 37.2 46.9 16.0 4.0 66.8 34.8 | -1.137 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.205-kurmanji.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 b/egs/babel/s5d/results/results.205-kurmanji.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 new file mode 100644 index 00000000000..3196f08c26a --- /dev/null +++ b/egs/babel/s5d/results/results.205-kurmanji.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 @@ -0,0 +1,96 @@ +#Created on 2016-02-21T10:24:13-0500 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-21T10:24:13-0500 +# +# STT Task performance (WER), evaluated on 2016-02-21T10:25:04-0500 +# +# STT Task performance (WER), evaluated on 2016-02-27T09:49:11-05:00 +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-27T17:16:07-05:00 +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:52:09-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.3 | 23078 60240 | 31.4 34.0 34.7 2.7 71.3 33.6 | -2.291 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.3 | 23078 60240 | 29.3 33.2 37.5 2.6 73.3 33.7 | -1.834 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 83.2 | 23078 60240 | 18.7 30.0 51.3 1.9 83.2 35.6 | -1.680 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T20:57:22-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.3 | 23078 60240 | 31.4 34.0 34.7 2.7 71.3 33.6 | -2.291 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.3 | 23078 60240 | 29.3 33.2 37.5 2.6 73.3 33.7 | -1.834 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 81.0 | 23078 60240 | 20.9 30.0 49.1 1.9 81.0 35.1 | -1.466 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 83.2 | 23078 60240 | 18.7 30.0 51.3 1.9 83.2 35.6 | -1.680 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T10:56:23-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.3 | 23078 60240 | 31.4 34.0 34.7 2.7 71.3 33.6 | -2.291 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.3 | 23078 60240 | 29.3 33.2 37.5 2.6 73.3 33.7 | -1.834 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 81.0 | 23078 60240 | 20.9 30.0 49.1 1.9 81.0 35.1 | -1.466 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 83.2 | 23078 60240 | 18.7 30.0 51.3 1.9 83.2 35.6 | -1.680 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T22:38:30-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 71.6 | 23078 60240 | 30.8 32.2 37.0 2.4 71.6 33.6 | -2.116 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 73.5 | 23078 60240 | 29.1 32.8 38.1 2.6 73.5 33.7 | -1.960 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T09:57:37-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.6 | 23078 60240 | 30.8 32.2 37.0 2.4 71.6 33.6 | -2.116 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 73.5 | 23078 60240 | 29.1 32.8 38.1 2.6 73.5 33.7 | -1.960 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 82.9 | 23078 60240 | 19.2 30.9 49.9 2.1 82.9 35.6 | -1.948 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-08T07:34:08-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 60.6 | 23078 60240 | 43.0 37.3 19.7 3.6 60.6 31.7 | -1.738 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.5 | 23078 60240 | 29.1 32.8 38.1 2.6 73.5 33.7 | -1.960 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 82.9 | 23078 60240 | 19.2 30.9 49.9 2.1 82.9 35.6 | -1.948 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-10T09:31:52-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 60.6 | 23078 60240 | 43.0 37.3 19.7 3.6 60.6 31.7 | -1.738 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 61.0 | 23078 60240 | 42.5 36.7 20.8 3.5 61.0 31.8 | -1.277 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 82.9 | 23078 60240 | 19.2 30.9 49.9 2.1 82.9 35.6 | -1.948 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.206-zulu-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 b/egs/babel/s5d/results/results.206-zulu-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 new file mode 100644 index 00000000000..1e98cf911ea --- /dev/null +++ b/egs/babel/s5d/results/results.206-zulu-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 @@ -0,0 +1,14 @@ +#Created on 2015-11-27T17:54:01-0500 +# +# STT Task performance (WER) +%WER 66.0 | 22805 52162 | 38.4 47.5 14.1 4.4 66.0 33.2 | -2.078 | exp/tri5/decode_dev10h.pem/score_17/dev10h.pem.ctm.sys +%WER 60.4 | 22805 52162 | 44.4 44.1 11.5 4.8 60.4 32.3 | -1.189 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.1 | 22805 52162 | 44.2 41.8 14.0 3.3 59.1 32.0 | -0.746 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 58.6 | 22805 52162 | 45.4 42.5 12.1 4.0 58.6 31.9 | -1.026 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:04-0500 +%WER 66.0 | 22805 52162 | 38.4 47.5 14.1 4.4 66.0 33.2 | -2.078 | exp/tri5/decode_dev10h.pem/score_17/dev10h.pem.ctm.sys +%WER 60.4 | 22805 52162 | 44.4 44.1 11.5 4.8 60.4 32.3 | -1.189 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.1 | 22805 52162 | 44.2 41.8 14.0 3.3 59.1 32.0 | -0.746 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 56.4 | 22805 52162 | 46.7 40.0 13.3 3.1 56.4 31.4 | -0.682 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 22805 52162 | 45.4 42.5 12.1 4.0 58.6 31.9 | -1.026 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.207-tokpisin.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 b/egs/babel/s5d/results/results.207-tokpisin.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 new file mode 100644 index 00000000000..3d0dc67e8e3 --- /dev/null +++ b/egs/babel/s5d/results/results.207-tokpisin.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 @@ -0,0 +1,34 @@ +#Created on 2016-02-21T10:25:25-0500 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-21T10:25:25-0500 +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T22:19:14-05:00 +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 24353 74481 | 61.0 28.7 10.3 6.8 45.8 29.9 | -1.441 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 36.4 | 24353 74481 | 68.0 21.9 10.1 4.3 36.4 28.1 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 36.2 | 24353 74481 | 67.9 20.9 11.2 4.2 36.2 28.0 | -0.533 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +%WER 36.8 | 24353 74481 | 68.2 23.0 8.8 5.0 36.8 27.8 | -0.602 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:49:10-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 36.8 | 24353 74481 | 68.2 23.0 8.8 5.0 36.8 27.8 | -0.602 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 33.2 | 24353 74481 | 70.6 20.4 9.0 3.8 33.2 26.7 | -1.367 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 33.3 | 24353 74481 | 70.9 20.5 8.7 4.2 33.3 26.7 | -1.038 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.1 | 24353 74481 | 64.7 23.9 11.4 4.8 40.1 29.2 | -0.825 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 24353 74481 | 61.0 28.7 10.3 6.8 45.8 29.9 | -1.441 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.5 | 24353 74481 | 55.3 31.7 13.0 6.8 51.5 30.7 | -1.076 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 36.4 | 24353 74481 | 68.0 21.9 10.1 4.3 36.4 28.1 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 36.2 | 24353 74481 | 67.9 20.9 11.2 4.2 36.2 28.0 | -0.533 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T08:07:38-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 36.8 | 24353 74481 | 68.2 23.0 8.8 5.0 36.8 27.8 | -0.602 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 33.2 | 24353 74481 | 70.6 20.4 9.0 3.8 33.2 26.7 | -1.367 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 33.3 | 24353 74481 | 70.9 20.5 8.7 4.2 33.3 26.7 | -1.038 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 37.6 | 24353 74481 | 66.9 22.5 10.5 4.5 37.6 28.5 | -0.642 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 40.1 | 24353 74481 | 64.7 23.9 11.4 4.8 40.1 29.2 | -0.825 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 24353 74481 | 61.0 28.7 10.3 6.8 45.8 29.9 | -1.441 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.5 | 24353 74481 | 55.3 31.7 13.0 6.8 51.5 30.7 | -1.076 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 36.4 | 24353 74481 | 68.0 21.9 10.1 4.3 36.4 28.1 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 36.2 | 24353 74481 | 67.9 20.9 11.2 4.2 36.2 28.0 | -0.533 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 b/egs/babel/s5d/results/results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 new file mode 100644 index 00000000000..e6af3c9f6f9 --- /dev/null +++ b/egs/babel/s5d/results/results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 @@ -0,0 +1,43 @@ +#Created on 2016-02-25T10:45:54-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T10:45:54-05:00 +# +# STT Task performance (WER), evaluated on 2016-02-25T22:40:27-05:00 +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T20:35:22-05:00 +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 50.9 | 21519 61705 | 54.5 33.1 12.5 5.4 50.9 32.1 | -0.813 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T08:22:19-05:00 +%WER 50.9 | 21519 61705 | 54.5 33.1 12.5 5.4 50.9 32.1 | -0.813 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 45.6 | 21519 61705 | 58.0 29.3 12.7 3.7 45.6 31.2 | -1.354 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.0 | 21519 61705 | 58.2 29.4 12.4 4.1 46.0 31.4 | -1.051 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 68.4 | 21519 61705 | 34.6 32.8 32.7 2.9 68.4 35.4 | -1.082 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.1 | 21519 61705 | 43.0 41.1 15.8 7.2 64.1 34.7 | -1.573 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T07:26:39-05:00 +%WER 50.9 | 21519 61705 | 54.5 33.1 12.5 5.4 50.9 32.1 | -0.813 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 45.6 | 21519 61705 | 58.0 29.3 12.7 3.7 45.6 31.2 | -1.354 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.0 | 21519 61705 | 58.2 29.4 12.4 4.1 46.0 31.4 | -1.051 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.2 | 21519 61705 | 53.1 31.1 15.8 4.3 51.2 32.4 | -0.826 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 68.4 | 21519 61705 | 34.6 32.8 32.7 2.9 68.4 35.4 | -1.082 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.1 | 21519 61705 | 43.0 41.1 15.8 7.2 64.1 34.7 | -1.573 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 b/egs/babel/s5d/results/results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 new file mode 100644 index 00000000000..464362cf7e3 --- /dev/null +++ b/egs/babel/s5d/results/results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 @@ -0,0 +1,52 @@ +#Created on 2016-03-05T10:56:45-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-03-05T10:56:48-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T13:53:27-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T15:21:54-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-07T10:43:21-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.0 | 21958 102699 | 45.6 34.5 19.9 3.7 58.0 33.7 | -1.097 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21958 102699 | 42.1 37.8 20.2 4.4 62.3 34.4 | -0.645 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-08T07:31:46-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.0 | 21958 102699 | 45.6 34.5 19.9 3.7 58.0 33.7 | -1.097 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21958 102699 | 42.1 37.8 20.2 4.4 62.3 34.4 | -0.645 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 61.5 | 21958 102699 | 43.6 36.1 20.3 5.1 61.5 34.2 | -0.641 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-10T23:23:15-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 62.2 | 21958 102699 | 43.3 39.2 17.4 5.6 62.2 34.1 | -0.795 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_19/dev10h.pem.ctm.sys +%WER 58.0 | 21958 102699 | 45.6 34.5 19.9 3.7 58.0 33.7 | -1.097 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 41.0 37.2 21.9 4.1 63.2 34.6 | -0.723 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21958 102699 | 42.1 37.8 20.2 4.4 62.3 34.4 | -0.645 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 61.5 | 21958 102699 | 43.6 36.1 20.3 5.1 61.5 34.2 | -0.641 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 b/egs/babel/s5d/results/results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 new file mode 100644 index 00000000000..9950a8f11a0 --- /dev/null +++ b/egs/babel/s5d/results/results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 @@ -0,0 +1,48 @@ +#Created on 2016-02-25T09:46:16-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T09:46:16-05:00 +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 48.8 | 21823 59749 | 55.6 34.0 10.3 4.4 48.8 30.5 | -0.773 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-25T15:01:39-05:00 +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 55.9 30.1 14.0 3.1 47.2 30.3 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_11/dev10h.pem.ctm.sys +%WER 48.8 | 21823 59749 | 55.6 34.0 10.3 4.4 48.8 30.5 | -0.773 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T06:40:02-05:00 +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 55.9 30.1 14.0 3.1 47.2 30.3 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_11/dev10h.pem.ctm.sys +%WER 48.8 | 21823 59749 | 55.6 34.0 10.3 4.4 48.8 30.5 | -0.773 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:49:16-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 48.8 | 21823 59749 | 55.6 34.3 10.1 4.5 48.8 30.5 | -0.743 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_17/dev10h.pem.ctm.sys +%WER 43.9 | 21823 59749 | 59.3 31.0 9.7 3.2 43.9 29.5 | -0.869 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.5 | 21823 59749 | 56.0 32.9 11.2 3.5 47.5 30.3 | -0.655 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 63.0 | 21823 59749 | 42.7 42.8 14.5 5.7 63.0 33.3 | -1.302 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 56.0 30.4 13.6 3.2 47.2 30.3 | -0.552 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T08:07:40-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 48.8 | 21823 59749 | 55.6 34.3 10.1 4.5 48.8 30.5 | -0.743 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_17/dev10h.pem.ctm.sys +%WER 43.5 | 21823 59749 | 59.6 31.1 9.3 3.1 43.5 29.3 | -1.116 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 43.9 | 21823 59749 | 59.3 31.0 9.7 3.2 43.9 29.5 | -0.869 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.5 | 21823 59749 | 56.0 32.9 11.2 3.5 47.5 30.3 | -0.655 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.7 | 21823 59749 | 53.8 33.6 12.6 3.5 49.7 31.0 | -0.709 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 63.0 | 21823 59749 | 42.7 42.8 14.5 5.7 63.0 33.3 | -1.302 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 56.0 30.4 13.6 3.2 47.2 30.3 | -0.552 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/score_11/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 b/egs/babel/s5d/results/results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 new file mode 100644 index 00000000000..051d40b6f10 --- /dev/null +++ b/egs/babel/s5d/results/results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 @@ -0,0 +1,34 @@ +#Created on 2016-02-26T16:17:55-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-26T16:17:55-05:00 +%WER 59.9 | 23997 87709 | 44.9 36.6 18.5 4.8 59.9 36.0 | -0.664 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 23997 87709 | 48.0 36.7 15.3 6.1 58.1 35.8 | -0.932 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 65.6 | 23997 87709 | 40.8 41.2 18.0 6.5 65.6 36.9 | -1.703 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 56.1 | 23997 87709 | 47.9 33.7 18.4 4.0 56.1 35.3 | -0.545 | exp/tri6_nnet/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 54.0 | 23997 87709 | 50.5 32.3 17.2 4.5 54.0 35.0 | -0.502 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 56.9 | 23997 87709 | 48.7 35.2 16.1 5.7 56.9 35.3 | -0.747 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T10:32:48-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 56.9 | 23997 87709 | 48.7 35.2 16.1 5.7 56.9 35.3 | -0.747 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +%WER 48.3 | 23997 87709 | 55.7 30.1 14.2 4.1 48.3 33.9 | -1.338 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 23997 87709 | 55.0 31.0 13.9 4.4 49.3 34.0 | -1.017 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 77.7 | 23997 87709 | 24.3 32.5 43.2 2.0 77.7 40.1 | -1.550 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 59.9 | 23997 87709 | 44.9 36.6 18.5 4.8 59.9 36.0 | -0.664 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 23997 87709 | 48.0 36.7 15.3 6.1 58.1 35.8 | -0.932 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 65.6 | 23997 87709 | 40.8 41.2 18.0 6.5 65.6 36.9 | -1.703 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 69.9 | 23997 87709 | 35.7 42.4 21.9 5.5 69.9 37.7 | -1.140 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 56.1 | 23997 87709 | 47.9 33.7 18.4 4.0 56.1 35.3 | -0.545 | exp/tri6_nnet/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 54.0 | 23997 87709 | 50.5 32.3 17.2 4.5 54.0 35.0 | -0.502 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:49:22-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 56.9 | 23997 87709 | 48.7 35.2 16.1 5.7 56.9 35.3 | -0.747 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +%WER 48.3 | 23997 87709 | 55.7 30.1 14.2 4.1 48.3 33.9 | -1.338 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 23997 87709 | 55.0 31.0 13.9 4.4 49.3 34.0 | -1.017 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 54.4 | 23997 87709 | 49.7 33.8 16.5 4.2 54.4 35.5 | -0.684 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 77.7 | 23997 87709 | 24.3 32.5 43.2 2.0 77.7 40.1 | -1.550 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 59.9 | 23997 87709 | 44.9 36.6 18.5 4.8 59.9 36.0 | -0.664 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 23997 87709 | 48.0 36.7 15.3 6.1 58.1 35.8 | -0.932 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 65.6 | 23997 87709 | 40.8 41.2 18.0 6.5 65.6 36.9 | -1.703 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 69.9 | 23997 87709 | 35.7 42.4 21.9 5.5 69.9 37.7 | -1.140 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 56.1 | 23997 87709 | 47.9 33.7 18.4 4.0 56.1 35.3 | -0.545 | exp/tri6_nnet/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 54.0 | 23997 87709 | 50.5 32.3 17.2 4.5 54.0 35.0 | -0.502 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 b/egs/babel/s5d/results/results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 new file mode 100644 index 00000000000..9ad464aa2e7 --- /dev/null +++ b/egs/babel/s5d/results/results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 @@ -0,0 +1,41 @@ +#Created on 2016-02-26T06:40:39-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-26T06:40:39-05:00 +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T12:50:32-05:00 +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +%WER 60.8 | 23669 65293 | 44.0 41.1 14.9 4.8 60.8 36.6 | -1.077 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T16:56:30-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 60.8 | 23669 65293 | 44.0 41.1 14.9 4.8 60.8 36.6 | -1.077 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 54.0 | 23669 65293 | 49.5 37.0 13.5 3.5 54.0 35.3 | -1.581 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.1 | 23669 65293 | 48.2 35.9 15.9 3.3 55.1 35.5 | -0.993 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 64.0 | 23669 65293 | 40.1 41.3 18.6 4.0 64.0 37.7 | -1.205 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 72.5 | 23669 65293 | 34.8 49.2 16.1 7.3 72.5 38.6 | -1.941 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:51:39-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 60.8 | 23669 65293 | 44.0 41.1 14.9 4.8 60.8 36.6 | -1.077 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 54.0 | 23669 65293 | 49.5 37.0 13.5 3.5 54.0 35.3 | -1.581 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.1 | 23669 65293 | 48.2 35.9 15.9 3.3 55.1 35.5 | -0.993 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 61.5 | 23669 65293 | 42.1 38.8 19.1 3.6 61.5 36.9 | -0.881 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.0 | 23669 65293 | 40.1 41.3 18.6 4.0 64.0 37.7 | -1.205 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 72.5 | 23669 65293 | 34.8 49.2 16.1 7.3 72.5 38.6 | -1.941 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 b/egs/babel/s5d/results/results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 new file mode 100644 index 00000000000..fc7382101b2 --- /dev/null +++ b/egs/babel/s5d/results/results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 @@ -0,0 +1,54 @@ +#Created on 2016-02-25T23:27:09-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T23:27:09-05:00 +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T20:37:15-05:00 +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T00:59:22-05:00 +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T01:34:27-05:00 +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.4 | 23451 78254 | 62.7 26.6 10.7 3.1 40.4 28.1 | -0.618 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 23451 78254 | 47.4 40.4 12.3 5.9 58.6 31.4 | -0.991 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T08:19:30-05:00 +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.4 | 23451 78254 | 62.7 26.6 10.7 3.1 40.4 28.1 | -0.618 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 58.4 30.4 11.3 4.0 45.6 29.4 | -0.575 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 23451 78254 | 47.4 40.4 12.3 5.9 58.6 31.4 | -0.991 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T08:38:46-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.4 | 23451 78254 | 62.7 26.6 10.7 3.1 40.4 28.1 | -0.618 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 44.1 | 23451 78254 | 59.9 29.6 10.5 4.0 44.1 29.1 | -0.535 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 58.4 30.4 11.3 4.0 45.6 29.4 | -0.575 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 23451 78254 | 47.4 40.4 12.3 5.9 58.6 31.4 | -0.991 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/run-1-main-extend-lex.sh b/egs/babel/s5d/run-1-main-extend-lex.sh new file mode 100755 index 00000000000..035049dad9c --- /dev/null +++ b/egs/babel/s5d/run-1-main-extend-lex.sh @@ -0,0 +1,190 @@ +#!/bin/bash + +# Parameters for extended lexicon. +extend_lexicon=true +unk_fraction_boost=1.0 +num_sent_gen=12000000 +num_prons=1000000 + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +#set -u #Fail on an undefined variable + +lexicon=data/local/lexicon.txt +if $extend_lexicon; then + lexicon=data/local/lexiconp.txt +fi + +#Preparing dev2h and train directories +if [ ! -f data/raw_train_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the TRAIN set" + echo --------------------------------------------------------------------- + + local/make_corpus_subset.sh "$train_data_dir" "$train_data_list" ./data/raw_train_data + train_data_dir=`readlink -f ./data/raw_train_data` + touch data/raw_train_data/.done +fi +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -d data/raw_dev2h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV2H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev2h_data_dir" "$dev2h_data_list" ./data/raw_dev2h_data || exit 1 +fi + +if [ ! -d data/raw_dev10h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV10H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev10h_data_dir" "$dev10h_data_list" ./data/raw_dev10h_data || exit 1 +fi +nj_max=`cat $dev2h_data_list | wc -l` +if [[ "$nj_max" -lt "$decode_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max -- you have $decode_nj! (The training and decoding process has file-granularity)" + exit 1 + decode_nj=$nj_max +fi + +# Move data/dev2h preparation forward so we can get data/dev2h/text for +# diagnostic purpose when extending the lexicon. +if [[ ! -f data/dev2h/wav.scp || data/dev2h/wav.scp -ot ./data/raw_dev2h_data/audio ]]; then + echo --------------------------------------------------------------------- + echo "Preparing dev2h data lists in data/dev2h on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/dev2h + local/prepare_acoustic_training_data.pl \ + --fragmentMarkers \-\*\~ \ + `pwd`/data/raw_dev2h_data data/dev2h > data/dev2h/skipped_utts.log || exit 1 +fi + +if [[ ! -f data/dev2h/glm || data/dev2h/glm -ot "$glmFile" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing dev2h stm files in data/dev2h on" `date` + echo --------------------------------------------------------------------- + if [ -z $dev2h_stm_file ]; then + echo "WARNING: You should define the variable stm_file pointing to the IndusDB stm" + echo "WARNING: Doing that, it will give you scoring close to the NIST scoring. " + local/prepare_stm.pl --fragmentMarkers \-\*\~ data/dev2h || exit 1 + else + local/augment_original_stm.pl $dev2h_stm_file data/dev2h || exit 1 + fi + [ ! -z $glmFile ] && cp $glmFile data/dev2h/glm + +fi + +mkdir -p data/local +if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing lexicon in data/local on" `date` + echo --------------------------------------------------------------------- + local/make_lexicon_subset.sh $train_data_dir/transcription $lexicon_file data/local/filtered_lexicon.txt + local/prepare_lexicon.pl --phonemap "$phoneme_mapping" \ + $lexiconFlags data/local/filtered_lexicon.txt data/local + if $extend_lexicon; then + # Extend the original lexicon. + # Will creates the files data/local/extend/{lexiconp.txt,oov2prob}. + mv data/local/lexicon.txt data/local/lexicon_orig.txt + local/extend_lexicon.sh --cmd "$train_cmd" --cleanup false \ + --num-sent-gen $num_sent_gen --num-prons $num_prons \ + data/local/lexicon_orig.txt data/local/extend data/dev2h/text + cp data/local/extend/lexiconp.txt data/local/ + fi +fi + +mkdir -p data/lang +if [[ ! -f data/lang/L.fst || data/lang/L.fst -ot $lexicon ]]; then + echo --------------------------------------------------------------------- + echo "Creating L.fst etc in data/lang on" `date` + echo --------------------------------------------------------------------- + utils/prepare_lang.sh \ + --share-silence-phones true \ + data/local $oovSymbol data/local/tmp.lang data/lang +fi + +if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/train + local/prepare_acoustic_training_data.pl \ + --vocab $lexicon --fragmentMarkers \-\*\~ \ + $train_data_dir data/train > data/train/skipped_utts.log +fi + +if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then + echo --------------------------------------------------------------------- + echo "Training SRILM language models on" `date` + echo --------------------------------------------------------------------- + # If extending the lexicon, use "--words-file data/local/lexicon_orig.txt" so + # that the LM is trained just on the vocab that appears in the text. Will add + # in the OOVs later. + words_file_param=() + if $extend_lexicon; then + words_file_param=(--words-file data/local/lexicon_orig.txt) + fi + local/train_lms_srilm.sh "${words_file_param[@]}" \ + --dev-text data/dev2h/text --oov-symbol "$oovSymbol"\ + --train-text data/train/text data data/srilm +fi + +if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ||\ + ( -f data/local/extend/oov2prob &&\ + data/lang/G.fst -ot data/local/extend/oov2prob ) ]]; then + echo --------------------------------------------------------------------- + echo "Creating G.fst on " `date` + echo --------------------------------------------------------------------- + extend_lexicon_param=() + if $extend_lexicon; then + [ -f data/local/extend/original_oov_rates ] || exit 1; + unk_fraction=`cat data/local/extend/original_oov_rates |\ + grep "token" | awk -v x=$unk_fraction_boost '{print $NF/100.0*x}'` + extend_lexicon_param=(--cleanup false --unk-fraction $unk_fraction \ + --oov-prob-file data/local/extend/oov2prob) + fi + local/arpa2G.sh ${extend_lexicon_param[@]} \ + data/srilm/lm.gz data/lang data/lang +fi +decode_nj=$dev2h_nj + +echo --------------------------------------------------------------------- +echo "Starting plp feature extraction for data/train in plp on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f data/train/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp_pitch/train plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp/train plp + fi + utils/fix_data_dir.sh data/train + steps/compute_cmvn_stats.sh data/train exp/make_plp/train plp + utils/fix_data_dir.sh data/train + touch data/train/.plp.done +fi + +touch data/.extlex + +echo ------------------------------------------------------------------------- +echo "Extended lexicon finished on" `date`. Now running script run-1-main.sh +echo ------------------------------------------------------------------------- +./run-1-main.sh +exit 0 diff --git a/egs/babel/s5d/run-1-main-unicode.sh b/egs/babel/s5d/run-1-main-unicode.sh new file mode 100755 index 00000000000..e3fb2486c83 --- /dev/null +++ b/egs/babel/s5d/run-1-main-unicode.sh @@ -0,0 +1,385 @@ +#!/bin/bash + +# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first. +tri5_only=false +sgmm5_only=false +denlats_only=false +data_only=false +morfessor=true +tag_percentage=0.1 + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +#set -u #Fail on an undefined variable + +lexicon=data/local/lexicon.txt +if $extend_lexicon; then + lexicon=data/local/lexiconp.txt +fi + +./local/check_tools.sh || exit 1 + +#Preparing dev2h and train directories +if [ ! -f data/raw_train_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the TRAIN set" + echo --------------------------------------------------------------------- + + local/make_corpus_subset.sh "$train_data_dir" "$train_data_list" ./data/raw_train_data + train_data_dir=`readlink -f ./data/raw_train_data` + touch data/raw_train_data/.done +fi +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -d data/raw_dev10h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV10H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev10h_data_dir" "$dev10h_data_list" ./data/raw_dev10h_data || exit 1 +fi + + +mkdir -p data/local +if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing lexicon in data/local on" `date` + echo --------------------------------------------------------------------- + + local/lexicon/make_word_list.py $train_data_dir/filelist.list $train_data_dir/transcription data/local/word_list.txt + echo -e " SIL\n \n \n " > data/local/nonspeech.txt + echo -e " " > data/local/extraspeech.txt + + fmt="word_list" + if $morfessor; then + fmt="morfessor" + morfessor-train --encoding=utf_8 --traindata-list -f"-_" -s data/local/morfessor.bin \ + data/local/word_list.txt + morfessor-segment --encoding=utf_8 --output-format-separator '.' --viterbi-maxlen 3 \ + -l data/local/morfessor.bin <(cut -d' ' -f2 data/local/word_list.txt) \ + | sed 's/\.[\_\-]\././g' > data/local/segments + cut -d' ' data/local/word_list.txt -f2 | paste -d' ' - data/local/segments > data/local/word_list_tmp.txt + mv data/local/word_list_tmp.txt data/local/word_list.txt + fi + + local/lexicon/make_unicode_lexicon.py --tag_percentage $tag_percentage --fmt $fmt \ + --nonspeech data/local/nonspeech.txt --extraspeech data/local/extraspeech.txt \ + --verbose data/local/word_list.txt data/local/lexicon.txt + local/prepare_unicode_lexicon.py --nonspeech data/local/nonspeech.txt \ + --extraspeech data/local/extraspeech.txt data/local/lexicon_table.txt data/local + cp data/local/lexicon.txt data/local/filtered_lexicon.txt +fi + +mkdir -p data/lang +if [[ ! -f data/lang/L.fst || data/lang/L.fst -ot $lexicon ]]; then + echo --------------------------------------------------------------------- + echo "Creating L.fst etc in data/lang on" `date` + echo --------------------------------------------------------------------- + utils/prepare_lang.sh \ + --share-silence-phones true \ + data/local $oovSymbol data/local/tmp.lang data/lang +fi + +if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/train + local/prepare_acoustic_training_data.pl \ + --vocab $lexicon --fragmentMarkers \-\*\~ \ + $train_data_dir data/train > data/train/skipped_utts.log +fi + +if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then + echo --------------------------------------------------------------------- + echo "Training SRILM language models on" `date` + echo --------------------------------------------------------------------- + local/train_lms_srilm.sh --oov-symbol "$oovSymbol"\ + --train-text data/train/text data data/srilm +fi + +if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ]]; then + echo --------------------------------------------------------------------- + echo "Creating G.fst on " `date` + echo --------------------------------------------------------------------- + local/arpa2G.sh data/srilm/lm.gz data/lang data/lang +fi + +echo --------------------------------------------------------------------- +echo "Starting plp feature extraction for data/train in plp on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f data/train/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp_pitch/train plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp/train plp + fi + utils/fix_data_dir.sh data/train + steps/compute_cmvn_stats.sh data/train exp/make_plp/train plp + utils/fix_data_dir.sh data/train + touch data/train/.plp.done +fi + +mkdir -p exp + +if [ ! -f data/train_sub3/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting monophone training data in data/train_sub[123] on" `date` + echo --------------------------------------------------------------------- + numutt=`cat data/train/feats.scp | wc -l`; + utils/subset_data_dir.sh data/train 5000 data/train_sub1 + if [ $numutt -gt 10000 ] ; then + utils/subset_data_dir.sh data/train 10000 data/train_sub2 + else + (cd data; ln -s train train_sub2 ) + fi + if [ $numutt -gt 20000 ] ; then + utils/subset_data_dir.sh data/train 20000 data/train_sub3 + else + (cd data; ln -s train train_sub3 ) + fi + + touch data/train_sub3/.done +fi + +if $data_only; then + echo "--data-only is true" && exit 0 +fi + +if [ ! -f exp/mono/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) monophone training in exp/mono on" `date` + echo --------------------------------------------------------------------- + steps/train_mono.sh \ + --boost-silence $boost_sil --nj 8 --cmd "$train_cmd" \ + data/train_sub1 data/lang exp/mono + touch exp/mono/.done +fi + +if [ ! -f exp/tri1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) triphone training in exp/tri1 on" `date` + echo --------------------------------------------------------------------- + steps/align_si.sh \ + --boost-silence $boost_sil --nj 12 --cmd "$train_cmd" \ + data/train_sub2 data/lang exp/mono exp/mono_ali_sub2 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri1 $numGaussTri1 \ + data/train_sub2 data/lang exp/mono_ali_sub2 exp/tri1 + + touch exp/tri1/.done +fi + + +echo --------------------------------------------------------------------- +echo "Starting (medium) triphone training in exp/tri2 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri2/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj 24 --cmd "$train_cmd" \ + data/train_sub3 data/lang exp/tri1 exp/tri1_ali_sub3 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri2 $numGaussTri2 \ + data/train_sub3 data/lang exp/tri1_ali_sub3 exp/tri2 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train_sub3 data/lang data/local/ \ + exp/tri2 data/local/dictp/tri2 data/local/langp/tri2 data/langp/tri2 + + touch exp/tri2/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (full) triphone training in exp/tri3 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri3/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri2 exp/tri2 exp/tri2_ali + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesTri3 $numGaussTri3 data/train data/langp/tri2 exp/tri2_ali exp/tri3 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local/ \ + exp/tri3 data/local/dictp/tri3 data/local/langp/tri3 data/langp/tri3 + + touch exp/tri3/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (lda_mllt) triphone training in exp/tri4 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri4/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri3 exp/tri3 exp/tri3_ali + + steps/train_lda_mllt.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesMLLT $numGaussMLLT data/train data/langp/tri3 exp/tri3_ali exp/tri4 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri4 data/local/dictp/tri4 data/local/langp/tri4 data/langp/tri4 + + touch exp/tri4/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (SAT) triphone training in exp/tri5 on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f exp/tri5/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri4 exp/tri4 exp/tri4_ali + + steps/train_sat.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesSAT $numGaussSAT data/train data/langp/tri4 exp/tri4_ali exp/tri5 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5 data/local/dictp/tri5 data/local/langp/tri5 data/langp/tri5 + + touch exp/tri5/.done +fi + + +if [ ! -f exp/tri5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/tri5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri5 exp/tri5 exp/tri5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5_ali data/local/dictp/tri5_ali data/local/langp/tri5_ali data/langp/tri5_ali + + touch exp/tri5_ali/.done +fi + +if [ ! -f data/langp_test/.done ]; then + cp -R data/langp/tri5_ali/ data/langp_test + cp data/lang/G.fst data/langp_test + touch data/langp_test/.done +fi + +if $tri5_only ; then + echo "Exiting after stage TRI5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + +################################################################################ +# Ready to start SGMM training +################################################################################ + +if [ ! -f exp/ubm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/ubm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_ubm.sh \ + --cmd "$train_cmd" $numGaussUBM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5 + touch exp/ubm5/.done +fi + +if [ ! -f exp/sgmm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_sgmm2.sh \ + --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + #steps/train_sgmm2_group.sh \ + # --cmd "$train_cmd" "${sgmm_group_extra_opts[@]-}" $numLeavesSGMM $numGaussSGMM \ + # data/train data/lang exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + touch exp/sgmm5/.done +fi + +if $sgmm5_only ; then + echo "Exiting after stage SGMM5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi +################################################################################ +# Ready to start discriminative SGMM training +################################################################################ + +if [ ! -f exp/sgmm5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_sgmm2.sh \ + --nj $train_nj --cmd "$train_cmd" --transform-dir exp/tri5_ali \ + --use-graphs true --use-gselect true \ + data/train data/lang exp/sgmm5 exp/sgmm5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/sgmm5_ali data/local/dictp/sgmm5 data/local/langp/sgmm5 data/langp/sgmm5 + + touch exp/sgmm5_ali/.done +fi + + +if [ ! -f exp/sgmm5_denlats/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_denlats on" `date` + echo --------------------------------------------------------------------- + steps/make_denlats_sgmm2.sh \ + --nj $train_nj --sub-split $train_nj "${sgmm_denlats_extra_opts[@]}" \ + --beam 10.0 --lattice-beam 6 --cmd "$decode_cmd" --transform-dir exp/tri5_ali \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats + touch exp/sgmm5_denlats/.done +fi + + +if $denlats_only ; then + echo "Exiting after generating denlats, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + + +if [ ! -f exp/sgmm5_mmi_b0.1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_mmi_b0.1 on" `date` + echo --------------------------------------------------------------------- + steps/train_mmi_sgmm2.sh \ + --cmd "$train_cmd" "${sgmm_mmi_extra_opts[@]}" \ + --drop-frames true --transform-dir exp/tri5_ali --boost 0.1 \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats \ + exp/sgmm5_mmi_b0.1 + touch exp/sgmm5_mmi_b0.1/.done +fi + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-1-main.sh b/egs/babel/s5d/run-1-main.sh new file mode 100755 index 00000000000..d85407f8db4 --- /dev/null +++ b/egs/babel/s5d/run-1-main.sh @@ -0,0 +1,363 @@ +#!/bin/bash + +# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first. +tri5_only=false +sgmm5_only=false +denlats_only=false +data_only=false + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +#set -u #Fail on an undefined variable + +lexicon=data/local/lexicon.txt +if $extend_lexicon; then + lexicon=data/local/lexiconp.txt +fi + +./local/check_tools.sh || exit 1 + +#Preparing dev2h and train directories +if [ ! -f data/raw_train_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the TRAIN set" + echo --------------------------------------------------------------------- + + local/make_corpus_subset.sh "$train_data_dir" "$train_data_list" ./data/raw_train_data + train_data_dir=`readlink -f ./data/raw_train_data` + touch data/raw_train_data/.done +fi +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -d data/raw_dev10h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV10H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev10h_data_dir" "$dev10h_data_list" ./data/raw_dev10h_data || exit 1 +fi + + +mkdir -p data/local +if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing lexicon in data/local on" `date` + echo --------------------------------------------------------------------- + local/make_lexicon_subset.sh $train_data_dir/transcription $lexicon_file data/local/filtered_lexicon.txt + local/prepare_lexicon.pl --phonemap "$phoneme_mapping" \ + $lexiconFlags data/local/filtered_lexicon.txt data/local +fi + +mkdir -p data/lang +if [[ ! -f data/lang/L.fst || data/lang/L.fst -ot $lexicon ]]; then + echo --------------------------------------------------------------------- + echo "Creating L.fst etc in data/lang on" `date` + echo --------------------------------------------------------------------- + utils/prepare_lang.sh \ + --share-silence-phones true \ + data/local $oovSymbol data/local/tmp.lang data/lang +fi + +if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/train + local/prepare_acoustic_training_data.pl \ + --vocab $lexicon --fragmentMarkers \-\*\~ \ + $train_data_dir data/train > data/train/skipped_utts.log +fi + +if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then + echo --------------------------------------------------------------------- + echo "Training SRILM language models on" `date` + echo --------------------------------------------------------------------- + local/train_lms_srilm.sh --oov-symbol "$oovSymbol"\ + --train-text data/train/text data data/srilm +fi + +if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ]]; then + echo --------------------------------------------------------------------- + echo "Creating G.fst on " `date` + echo --------------------------------------------------------------------- + local/arpa2G.sh data/srilm/lm.gz data/lang data/lang +fi + +echo --------------------------------------------------------------------- +echo "Starting plp feature extraction for data/train in plp on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f data/train/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp_pitch/train plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp/train plp + fi + utils/fix_data_dir.sh data/train + steps/compute_cmvn_stats.sh data/train exp/make_plp/train plp + utils/fix_data_dir.sh data/train + touch data/train/.plp.done +fi + +mkdir -p exp + +if [ ! -f data/train_sub3/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting monophone training data in data/train_sub[123] on" `date` + echo --------------------------------------------------------------------- + numutt=`cat data/train/feats.scp | wc -l`; + utils/subset_data_dir.sh data/train 5000 data/train_sub1 + if [ $numutt -gt 10000 ] ; then + utils/subset_data_dir.sh data/train 10000 data/train_sub2 + else + (cd data; ln -s train train_sub2 ) + fi + if [ $numutt -gt 20000 ] ; then + utils/subset_data_dir.sh data/train 20000 data/train_sub3 + else + (cd data; ln -s train train_sub3 ) + fi + + touch data/train_sub3/.done +fi + +if $data_only; then + echo "--data-only is true" && exit 0 +fi + +if [ ! -f exp/mono/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) monophone training in exp/mono on" `date` + echo --------------------------------------------------------------------- + steps/train_mono.sh \ + --boost-silence $boost_sil --nj 8 --cmd "$train_cmd" \ + data/train_sub1 data/lang exp/mono + touch exp/mono/.done +fi + +if [ ! -f exp/tri1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) triphone training in exp/tri1 on" `date` + echo --------------------------------------------------------------------- + steps/align_si.sh \ + --boost-silence $boost_sil --nj 12 --cmd "$train_cmd" \ + data/train_sub2 data/lang exp/mono exp/mono_ali_sub2 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri1 $numGaussTri1 \ + data/train_sub2 data/lang exp/mono_ali_sub2 exp/tri1 + + touch exp/tri1/.done +fi + + +echo --------------------------------------------------------------------- +echo "Starting (medium) triphone training in exp/tri2 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri2/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj 24 --cmd "$train_cmd" \ + data/train_sub3 data/lang exp/tri1 exp/tri1_ali_sub3 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri2 $numGaussTri2 \ + data/train_sub3 data/lang exp/tri1_ali_sub3 exp/tri2 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train_sub3 data/lang data/local/ \ + exp/tri2 data/local/dictp/tri2 data/local/langp/tri2 data/langp/tri2 + + touch exp/tri2/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (full) triphone training in exp/tri3 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri3/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri2 exp/tri2 exp/tri2_ali + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesTri3 $numGaussTri3 data/train data/langp/tri2 exp/tri2_ali exp/tri3 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local/ \ + exp/tri3 data/local/dictp/tri3 data/local/langp/tri3 data/langp/tri3 + + touch exp/tri3/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (lda_mllt) triphone training in exp/tri4 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri4/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri3 exp/tri3 exp/tri3_ali + + steps/train_lda_mllt.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesMLLT $numGaussMLLT data/train data/langp/tri3 exp/tri3_ali exp/tri4 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri4 data/local/dictp/tri4 data/local/langp/tri4 data/langp/tri4 + + touch exp/tri4/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (SAT) triphone training in exp/tri5 on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f exp/tri5/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri4 exp/tri4 exp/tri4_ali + + steps/train_sat.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesSAT $numGaussSAT data/train data/langp/tri4 exp/tri4_ali exp/tri5 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5 data/local/dictp/tri5 data/local/langp/tri5 data/langp/tri5 + + touch exp/tri5/.done +fi + + +if [ ! -f exp/tri5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/tri5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri5 exp/tri5 exp/tri5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5_ali data/local/dictp/tri5_ali data/local/langp/tri5_ali data/langp/tri5_ali + + touch exp/tri5_ali/.done +fi + +if [ ! -f data/langp_test/.done ]; then + cp -R data/langp/tri5_ali/ data/langp_test + cp data/lang/G.fst data/langp_test + touch data/langp_test/.done +fi + +if $tri5_only ; then + echo "Exiting after stage TRI5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + +################################################################################ +# Ready to start SGMM training +################################################################################ + +if [ ! -f exp/ubm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/ubm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_ubm.sh \ + --cmd "$train_cmd" $numGaussUBM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5 + touch exp/ubm5/.done +fi + +if [ ! -f exp/sgmm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_sgmm2.sh \ + --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + #steps/train_sgmm2_group.sh \ + # --cmd "$train_cmd" "${sgmm_group_extra_opts[@]-}" $numLeavesSGMM $numGaussSGMM \ + # data/train data/lang exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + touch exp/sgmm5/.done +fi + +if $sgmm5_only ; then + echo "Exiting after stage SGMM5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi +################################################################################ +# Ready to start discriminative SGMM training +################################################################################ + +if [ ! -f exp/sgmm5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_sgmm2.sh \ + --nj $train_nj --cmd "$train_cmd" --transform-dir exp/tri5_ali \ + --use-graphs true --use-gselect true \ + data/train data/lang exp/sgmm5 exp/sgmm5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/sgmm5_ali data/local/dictp/sgmm5 data/local/langp/sgmm5 data/langp/sgmm5 + + touch exp/sgmm5_ali/.done +fi + + +if [ ! -f exp/sgmm5_denlats/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_denlats on" `date` + echo --------------------------------------------------------------------- + steps/make_denlats_sgmm2.sh \ + --nj $train_nj --sub-split $train_nj "${sgmm_denlats_extra_opts[@]}" \ + --beam 10.0 --lattice-beam 6 --cmd "$decode_cmd" --transform-dir exp/tri5_ali \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats + touch exp/sgmm5_denlats/.done +fi + + +if $denlats_only ; then + echo "Exiting after generating denlats, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + + +if [ ! -f exp/sgmm5_mmi_b0.1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_mmi_b0.1 on" `date` + echo --------------------------------------------------------------------- + steps/train_mmi_sgmm2.sh \ + --cmd "$train_cmd" "${sgmm_mmi_extra_opts[@]}" \ + --drop-frames true --transform-dir exp/tri5_ali --boost 0.1 \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats \ + exp/sgmm5_mmi_b0.1 + touch exp/sgmm5_mmi_b0.1/.done +fi + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-2-segmentation.sh b/egs/babel/s5d/run-2-segmentation.sh new file mode 100755 index 00000000000..0ced3ffabac --- /dev/null +++ b/egs/babel/s5d/run-2-segmentation.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0 + +#Begin configuration section + +silence_segment_fraction=1.0 # What fraction of segment we should keep + +#end configuration section + +# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first. +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +set -u #Fail on an undefined variable + +#Later in the script we assume the run-1-main.sh was run (because we are using exp/tri4) +#So let's make it mandatory, instead of doing the work on our own. +[ ! -f data/raw_train_data/.done ] && echo "The source training data directory is not ready. Use the run-1-main.sh script to prepare it!" && exit 1 + +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -f data/train_seg/.done ]; then + + mkdir -p data/train_seg + + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + local/prepare_acoustic_training_data.pl --get-whole-transcripts "true" \ + --vocab data/local/lexicon.txt --fragmentMarkers \-\*\~ \ + $train_data_dir data/train_seg > data/train_seg/skipped_utts.log + mv data/train_seg/text data/train_seg/text_orig + + num_silence_segments=$(cat data/train_seg/text_orig | awk '{if (NF == 2 && $2 == "") {print $0}}' | wc -l) + num_keep_silence_segments=`perl -e "printf '%d', ($num_silence_segments * $silence_segment_fraction)"` + if [ $num_silence_segments -eq $num_keep_silence_segments ]; then + # Keep all segments including silence segments + cat data/train_seg/text_orig | awk '{if (NF == 2 && $2 == "") {print $1} else {print $0}}' > data/train_seg/text + else + # Keep only a fraction of silence segments + + cat data/train_seg/text_orig \ + | awk 'BEGIN{i=0} \ + { \ + if (NF == 2 && $2 == "") { \ + if (i<'$num_keep_silence_segments') { \ + print $1; \ + i++; \ + } \ + } else {print $0}\ + }' > data/train_seg/text + fi + #rm data/train_seg/text_orig + utils/fix_data_dir.sh data/train_seg + + echo --------------------------------------------------------------------- + echo "Starting plp feature extraction for data/train_seg in plp on" `date` + echo --------------------------------------------------------------------- + + if [ ! -f data/train_seg/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj \ + data/train_seg exp/make_plp_pitch/train_seg plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj \ + data/train_seg exp/make_plp/train_seg plp + fi + + utils/fix_data_dir.sh data/train_seg + steps/compute_cmvn_stats.sh data/train_seg exp/make_plp/train_seg plp + utils/fix_data_dir.sh data/train_seg + touch data/train_seg/.plp.done + fi + touch data/train_seg/.done +fi + +echo --------------------------------------------------------------------- +echo "Training segmentation model in exp/tri4b_seg" +echo --------------------------------------------------------------------- + +local/resegment/train_segmentation.sh \ + --boost-sil 1.0 --nj $train_nj --cmd "$decode_cmd" \ + exp/tri4 data/train_seg data/lang exp/tri4b_seg || exit 1 + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-2a-nnet-cpu.sh b/egs/babel/s5d/run-2a-nnet-cpu.sh new file mode 100755 index 00000000000..35e7d3ceab3 --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-cpu.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +. conf/common_vars.sh +. ./lang.conf + +# This parameter will be used when the training dies at a certain point. +train_stage=-100 +dir=exp/tri6_nnet +. ./utils/parse_options.sh + +set -e +set -o pipefail +set -u + +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. +echo "Waiting till exp/tri5_ali/.done exists...." +while [ ! -f exp/tri5_ali/.done ]; do sleep 30; done +echo "...done waiting for exp/tri5_ali/.done" + +if [ ! -f $dir/.done ]; then + steps/nnet2/train_pnorm.sh \ + --stage $train_stage --mix-up $dnn_mixup \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_input_dim \ + --pnorm-output-dim $dnn_output_dim \ + --cmd "$train_cmd" \ + "${dnn_cpu_parallel_opts[@]}" \ + data/train data/lang exp/tri5_ali $dir || exit 1 + + touch $dir/.done +fi diff --git a/egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh b/egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh new file mode 100755 index 00000000000..06c9a330295 --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +. conf/common_vars.sh +. ./lang.conf + +train_stage=-10 +dir=exp/tri6b_nnet + +. ./utils/parse_options.sh + +set -e +set -o pipefail +set -u + +dnn_num_hidden_layers=4 +dnn_pnorm_input_dim=3000 +dnn_pnorm_output_dim=300 +dnn_init_learning_rate=0.004 +dnn_final_learning_rate=0.001 +temp_dir=`pwd`/nnet_gpu_egs +ensemble_size=4 +initial_beta=0.1 +final_beta=5 +egs_dir= + +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. +echo "Waiting till exp/tri5_ali/.done exists...." +while [ ! -f exp/tri5_ali/.done ]; do sleep 30; done +echo "...done waiting for exp/tri5_ali/.done" + +if [ ! -f $dir/.done ]; then + steps/nnet2/train_pnorm_ensemble.sh \ + --stage $train_stage --mix-up $dnn_mixup --egs-dir "$egs_dir" \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_pnorm_input_dim \ + --pnorm-output-dim $dnn_pnorm_output_dim \ + --cmd "$train_cmd" \ + "${dnn_gpu_parallel_opts[@]}" \ + --ensemble-size $ensemble_size --initial-beta $initial_beta --final-beta $final_beta \ + data/train data/lang exp/tri5_ali $dir || exit 1 + touch $dir/.done +fi + diff --git a/egs/babel/s5d/run-2a-nnet-gpu.sh b/egs/babel/s5d/run-2a-nnet-gpu.sh new file mode 100755 index 00000000000..55733006d75 --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-gpu.sh @@ -0,0 +1,36 @@ +#!/bin/bash +dir=exp/tri6_nnet +train_stage=-10 + +. conf/common_vars.sh +. ./lang.conf + +# This parameter will be used when the training dies at a certain point. +train_stage=-100 +. ./utils/parse_options.sh + +set -e +set -o pipefail +set -u + +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. +echo "Waiting till exp/tri5_ali/.done exists...." +while [ ! -f exp/tri5_ali/.done ]; do sleep 30; done +echo "...done waiting for exp/tri5_ali/.done" + +if [ ! -f $dir/.done ]; then + steps/nnet2/train_pnorm_fast.sh \ + --stage $train_stage --mix-up $dnn_mixup \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_input_dim \ + --pnorm-output-dim $dnn_output_dim \ + --cmd "$train_cmd" \ + "${dnn_gpu_parallel_opts[@]}" \ + data/train data/langp/tri5_ali exp/tri5_ali $dir || exit 1 + + touch $dir/.done +fi + diff --git a/egs/babel/s5d/run-2a-nnet-mpe.sh b/egs/babel/s5d/run-2a-nnet-mpe.sh new file mode 100755 index 00000000000..6ddddb4cfda --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-mpe.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +. conf/common_vars.sh +. ./lang.conf + +set -e +set -o pipefail +set -u + +# Wait for cross-entropy training. +echo "Waiting till exp/tri6_nnet/.done exists...." +while [ ! -f exp/tri6_nnet/.done ]; do sleep 30; done +echo "...done waiting for exp/tri6_nnet/.done" + +# Generate denominator lattices. +if [ ! -f exp/tri6_nnet_denlats/.done ]; then + steps/nnet2/make_denlats.sh --cmd "$decode_cmd" \ + --nj $train_nj --sub-split $train_nj \ + "${dnn_denlats_extra_opts[@]}" \ + --transform-dir exp/tri5_ali \ + data/train data/lang exp/tri6_nnet exp/tri6_nnet_denlats || exit 1 + + touch exp/tri6_nnet_denlats/.done +fi + +# Generate alignment. +if [ ! -f exp/tri6_nnet_ali/.done ]; then + steps/nnet2/align.sh --use-gpu yes \ + --cmd "$decode_cmd $dnn_parallel_opts" \ + --transform-dir exp/tri5_ali --nj $train_nj \ + data/train data/lang exp/tri6_nnet exp/tri6_nnet_ali || exit 1 + + touch exp/tri6_nnet_ali/.done +fi + +train_stage=-100 +if [ ! -f exp/tri6_nnet_mpe/.done ]; then + steps/nnet2/train_discriminative.sh \ + --stage $train_stage --cmd "$decode_cmd" \ + --learning-rate $dnn_mpe_learning_rate \ + --modify-learning-rates true \ + --last-layer-factor $dnn_mpe_last_layer_factor \ + --num-epochs 4 --cleanup true \ + --retroactive $dnn_mpe_retroactive \ + --transform-dir exp/tri5_ali \ + "${dnn_gpu_mpe_parallel_opts[@]}" data/train data/lang \ + exp/tri6_nnet_ali exp/tri6_nnet_denlats exp/tri6_nnet/final.mdl exp/tri6_nnet_mpe || exit 1 + + touch exp/tri6_nnet_mpe/.done +fi diff --git a/egs/babel/s5d/run-2b-bnf.sh b/egs/babel/s5d/run-2b-bnf.sh new file mode 100755 index 00000000000..bdca049d941 --- /dev/null +++ b/egs/babel/s5d/run-2b-bnf.sh @@ -0,0 +1,150 @@ +#!/bin/bash + +# Copyright 2014 Pegah Ghahremani +# Apache 2.0 + +#Run supervised and semisupervised BNF training +#This yields approx 70 hours of data + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +set -u #Fail on an undefined variable +skip_kws=true +skip_stt=false +semisupervised=true +unsup_string="_semisup" +train_stage=-100 +bnf_weight_threshold=0.35 +ali_dir= +ali_model=exp/tri6b_nnet/ +weights_dir=exp/best_path_weights/unsup.seg/decode_unsup.seg/ + +. ./utils/parse_options.sh + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + + +if $semisupervised ; then + egs_string="--egs-dir exp_bnf${unsup_string}/tri6_bnf/egs" + dirid=unsup.seg +else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + egs_string="" + dirid=train +fi + +[ ! -d $ali_model ] && echo "The alignment model $ali_model does not exist! Use --ali-model to specify it." && exit 1 + +datadir=data/${dirid} +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} +param_bnf_dir=param_bnf${unsup_string} + +if [ -z $ali_dir ] ; then + # If alignment directory is not done, use exp/tri6_nnet_ali as alignment + # directory + ali_dir=exp/tri6_nnet_ali +fi + +if [ ! -f $ali_dir/.done ]; then + echo "$0: Aligning supervised training data in exp/tri6_nnet_ali" + + [ ! -f $ali_model/final.mdl ] && echo -e "$ali_model/final.mdl not found!\nRun run-6-nnet.sh first!" && exit 1 + steps/nnet2/align.sh --cmd "$train_cmd " \ + --use-gpu no --transform-dir exp/tri5_ali --nj $train_nj \ + data/train data/langp/tri5_ali $ali_model $ali_dir || exit 1 + touch $ali_dir/.done +fi + +############################################################################### +# +# Semi-supervised BNF training +# +############################################################################### +mkdir -p $exp_dir/tri6_bnf +if [ ! -f $exp_dir/tri6_bnf/.done ]; then + if $semisupervised ; then + + [ ! -d $datadir ] && echo "Error: $datadir is not available!" && exit 1; + echo "$0: Generate examples using unsupervised data in $exp_dir/tri6_nnet" + if [ ! -f $exp_dir/tri6_bnf/egs/.done ]; then + local/nnet2/get_egs_semi_supervised.sh \ + --cmd "$train_cmd" \ + "${dnn_update_egs_opts[@]}" \ + --transform-dir-sup exp/tri5_ali \ + --transform-dir-unsup exp/tri5/decode_${dirid} \ + --weight-threshold $bnf_weight_threshold \ + data/train $datadir data/langp/tri5_ali/ \ + $ali_dir $weights_dir $exp_dir/tri6_bnf || exit 1; + touch $exp_dir/tri6_bnf/egs/.done + fi + + fi + + echo "$0: Train Bottleneck network" + steps/nnet2/train_tanh_bottleneck.sh \ + --stage $train_stage --num-jobs-nnet $bnf_num_jobs \ + --num-threads $bnf_num_threads --mix-up $bnf_mixup \ + --minibatch-size $bnf_minibatch_size \ + --initial-learning-rate $bnf_init_learning_rate \ + --final-learning-rate $bnf_final_learning_rate \ + --num-hidden-layers $bnf_num_hidden_layers \ + --bottleneck-dim $bottleneck_dim --hidden-layer-dim $bnf_hidden_layer_dim \ + --cmd "$train_cmd --mem 4G" $egs_string \ + "${dnn_gpu_parallel_opts[@]}" \ + data/train data/langp/tri5_ali/ $ali_dir $exp_dir/tri6_bnf || exit 1 + + touch $exp_dir/tri6_bnf/.done +fi + +[ ! -d $param_bnf_dir ] && mkdir -p $param_bnf_dir +if [ ! -f $data_bnf_dir/train_bnf/.done ]; then + mkdir -p $data_bnf_dir + # put the archives in ${param_bnf_dir}/. + steps/nnet2/dump_bottleneck_features.sh --nj $train_nj --cmd "$train_cmd" \ + --transform-dir exp/tri5 data/train $data_bnf_dir/train_bnf \ + $exp_dir/tri6_bnf $param_bnf_dir $exp_dir/dump_bnf + touch $data_bnf_dir/train_bnf/.done +fi + +if [ ! $data_bnf_dir/train/.done -nt $data_bnf_dir/train_bnf/.done ]; then + steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd --max-jobs-run 10" \ + --nj $train_nj --transform-dir exp/tri5_ali $data_bnf_dir/train_sat data/train \ + exp/tri5_ali $exp_dir/make_fmllr_feats/log $param_bnf_dir + + steps/append_feats.sh --cmd "$train_cmd" --nj 4 \ + $data_bnf_dir/train_bnf $data_bnf_dir/train_sat $data_bnf_dir/train \ + $exp_dir/append_feats/log $param_bnf_dir/ + steps/compute_cmvn_stats.sh --fake $data_bnf_dir/train \ + $exp_dir/make_fmllr_feats $param_bnf_dir + rm -r $data_bnf_dir/train_sat + + touch $data_bnf_dir/train/.done +fi + +if [ ! $exp_dir/tri5/.done -nt $data_bnf_dir/train/.done ]; then + steps/train_lda_mllt.sh --splice-opts "--left-context=1 --right-context=1" \ + --dim 60 --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesMLLT $numGaussMLLT $data_bnf_dir/train data/langp/tri5_ali/ exp/tri5_ali $exp_dir/tri5 ; + touch $exp_dir/tri5/.done +fi + +if [ ! $exp_dir/tri6/.done -nt $exp_dir/tri5/.done ]; then + steps/train_sat.sh --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesSAT $numGaussSAT $data_bnf_dir/train data/langp/tri5_ali \ + $exp_dir/tri5 $exp_dir/tri6 + touch $exp_dir/tri6/.done +fi + +echo --------------------------------------------------------------------- +echo "$0: next, run run-6-bnf-sgmm-semisupervised.sh" +echo --------------------------------------------------------------------- + +exit 0; diff --git a/egs/babel/s5d/run-3a-nnet-mpe.sh b/egs/babel/s5d/run-3a-nnet-mpe.sh new file mode 100755 index 00000000000..5271c58d816 --- /dev/null +++ b/egs/babel/s5d/run-3a-nnet-mpe.sh @@ -0,0 +1,54 @@ +#!/bin/bash + + +. conf/common_vars.sh +. ./lang.conf + +modeldir=exp/tri6_nnet + +. ./utils/parse_options.sh +set -e +set -o pipefail +set -u + +# Wait for cross-entropy training. +echo "Waiting till ${modeldir}/.done exists...." +while [ ! -f $modeldir/.done ]; do sleep 30; done +echo "...done waiting for ${modeldir}/.done" + +# Generate denominator lattices. +if [ ! -f exp/tri6_nnet_denlats/.done ]; then + steps/nnet2/make_denlats.sh --cmd "$decode_cmd" \ + --nj $train_nj --sub-split $train_nj \ + "${dnn_denlats_extra_opts[@]}" \ + --transform-dir exp/tri5_ali \ + data/train data/langp/tri5_ali ${modeldir} exp/tri6_nnet_denlats || exit 1 + + touch exp/tri6_nnet_denlats/.done +fi + +# Generate alignment. +if [ ! -f exp/tri6_nnet_ali/.done ]; then + steps/nnet2/align.sh --use-gpu yes \ + --cmd "$decode_cmd $dnn_parallel_opts" \ + --transform-dir exp/tri5_ali --nj $train_nj \ + data/train data/langp/tri5_ali ${modeldir} exp/tri6_nnet_ali || exit 1 + + touch exp/tri6_nnet_ali/.done +fi + +train_stage=-100 +if [ ! -f exp/tri6_nnet_mpe/.done ]; then + steps/nnet2/train_discriminative.sh \ + --stage $train_stage --cmd "$decode_cmd" \ + --learning-rate $dnn_mpe_learning_rate \ + --modify-learning-rates true \ + --last-layer-factor $dnn_mpe_last_layer_factor \ + --num-epochs 4 --cleanup true \ + --retroactive $dnn_mpe_retroactive \ + --transform-dir exp/tri5_ali \ + "${dnn_gpu_mpe_parallel_opts[@]}" data/train data/langp/tri5_ali/ \ + exp/tri6_nnet_ali exp/tri6_nnet_denlats ${modeldir}/final.mdl exp/tri6_nnet_mpe || exit 1 + + touch exp/tri6_nnet_mpe/.done +fi diff --git a/egs/babel/s5d/run-3b-bnf-nnet.sh b/egs/babel/s5d/run-3b-bnf-nnet.sh new file mode 100755 index 00000000000..169eec6f62f --- /dev/null +++ b/egs/babel/s5d/run-3b-bnf-nnet.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# Copyright 2014 Pegah Ghahremani +# 2014 Johns Hopkins (Yenda Trmal) + +# Apache 2.0 + +# This is really an alternative path to the BNF-SGMM, +# where we train a DNN instead of an SGMM. + + +. conf/common_vars.sh +. ./lang.conf +[ -f local.conf ] && . ./local.conf + +set -e +set -o pipefail +set -u + +semisupervised=true +dnn_train_stage=-100 +unsup_string= + +. ./utils/parse_options.sh + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + +if [ -z "$unsup_string" ]; then + if $semisupervised ; then + unsup_string="_semisup" + else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + fi +fi +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} + +if [ ! -e $exp_dir/tri6/.done ]; then + echo "$0: $exp_dir/tri6/.done does not exist" + echo "$0: this script needs to be run _AFTER_ the script run-2b-bnf.sh" + echo "$0: with the appropriate parameters -- mostly the same to the parameters" + echo "$0: of this script" + exit 1 +fi + +# We create an alignment with a lot of jobs, because the LDA accumulation +# when training the pnorm network will be slow, due to the large dimension. +if [ ! $exp_dir/tri6_ali_50/.done -nt $exp_dir/tri6/.done ]; then + echo --------------------------------------------------------------------- + echo "Aligning fMLLR system with 50 jobs" + echo --------------------------------------------------------------------- + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + $data_bnf_dir/train data/lang $exp_dir/tri6 $exp_dir/tri6_ali_50 + touch $exp_dir/tri6_ali_50/.done +fi + + +if [ ! $exp_dir/tri7_nnet/.done -nt $exp_dir/tri6_ali_50/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting hybrid system building (over bottleneck features)" + echo --------------------------------------------------------------------- + steps/nnet2/train_pnorm.sh \ + --stage $dnn_train_stage --mix-up $dnn_mixup \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_input_dim \ + --pnorm-output-dim $dnn_output_dim \ + --egs-opts "--feat-type raw" --lda-opts "--feat-type raw --lda-dim $dnn_output_dim" --splice-width 5 \ + "${dnn_gpu_parallel_opts[@]}" --cmd "$train_cmd" \ + $data_bnf_dir/train data/lang $exp_dir/tri6_ali_50 $exp_dir/tri7_nnet || exit 1 + + touch $exp_dir/tri7_nnet/.done +fi + + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo "To decode a data-set, use run-4b-anydecode-bnf.sh" +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-3b-bnf-sgmm.sh b/egs/babel/s5d/run-3b-bnf-sgmm.sh new file mode 100755 index 00000000000..341ea83565f --- /dev/null +++ b/egs/babel/s5d/run-3b-bnf-sgmm.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +# Copyright 2014 Pegah Ghahremani +# 2014 Johns Hopkins (Yenda Trmal) + +# Apache 2.0 + +# This script builds the SGMM system on top of the kaldi internal bottleneck features. +# It comes after run-6-bnf-semisupervised.sh. + + +. conf/common_vars.sh +. ./lang.conf +[ -f local.conf ] && . ./local.conf + +set -e +set -o pipefail +set -u +semisupervised=true +unsup_string= + +. ./utils/parse_options.sh + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + +if [ -z "$unsup_string" ]; then + if $semisupervised ; then + unsup_string="_semisup" + else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + fi +fi +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} +param_bnf_dir=param_bnf${unsup_string} + +echo --------------------------------------------------------------------- +echo "Starting $exp_dir/ubm7 on" `date` +echo --------------------------------------------------------------------- +if [ ! $exp_dir/ubm7/.done -nt $exp_dir/tri6/.done ]; then + steps/train_ubm.sh --cmd "$train_cmd" \ + $bnf_num_gauss_ubm $data_bnf_dir/train data/lang $exp_dir/tri6 $exp_dir/ubm7 + touch $exp_dir/ubm7/.done +fi + +if [ ! $exp_dir/sgmm7/.done -nt $exp_dir/ubm7/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting $exp_dir/sgmm7 on" `date` + echo --------------------------------------------------------------------- + #steps/train_sgmm2_group.sh \ + steps/train_sgmm2.sh \ + --cmd "$train_cmd" "${sgmm_train_extra_opts[@]}"\ + $numLeavesSGMM $bnf_num_gauss_sgmm $data_bnf_dir/train data/lang \ + $exp_dir/tri6 $exp_dir/ubm7/final.ubm $exp_dir/sgmm7 + touch $exp_dir/sgmm7/.done +fi + +if [ ! $exp_dir/sgmm7_ali/.done -nt $exp_dir/sgmm7/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting $exp_dir/sgmm7_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_sgmm2.sh \ + --nj $train_nj --cmd "$train_cmd" --transform-dir $exp_dir/tri6 --use-graphs true \ + $data_bnf_dir/train data/lang $exp_dir/sgmm7 $exp_dir/sgmm7_ali + touch $exp_dir/sgmm7_ali/.done +fi + +if [ ! $exp_dir/sgmm7_denlats/.done -nt $exp_dir/sgmm7/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting $exp_dir/sgmm5_denlats on" `date` + echo --------------------------------------------------------------------- + steps/make_denlats_sgmm2.sh --cmd "$train_cmd" \ + --nj $train_nj --sub-split $train_nj "${sgmm_denlats_extra_opts[@]}" \ + --transform-dir $exp_dir/tri6 --beam 10.0 --acwt 0.06 --lattice-beam 6 \ + $data_bnf_dir/train data/lang $exp_dir/sgmm7_ali $exp_dir/sgmm7_denlats + touch $exp_dir/sgmm7_denlats/.done +fi + +if [ ! $exp_dir/sgmm7_mmi_b0.1/.done -nt $exp_dir/sgmm7_denlats/.done ]; then + steps/train_mmi_sgmm2.sh \ + --cmd "$train_cmd" --acwt 0.06 \ + --transform-dir $exp_dir/tri6 --boost 0.1 --drop-frames true \ + $data_bnf_dir/train data/lang $exp_dir/sgmm7_ali $exp_dir/sgmm7_denlats \ + $exp_dir/sgmm7_mmi_b0.1 + touch $exp_dir/sgmm7_mmi_b0.1/.done; +fi + + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo "To decode a data-set, use run-4b-anydecode-bnf.sh" +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-4-anydecode.sh b/egs/babel/s5d/run-4-anydecode.sh new file mode 100755 index 00000000000..083ac7e9879 --- /dev/null +++ b/egs/babel/s5d/run-4-anydecode.sh @@ -0,0 +1,724 @@ +#!/bin/bash +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.pem +kind= +data_only=false +fast_path=true +skip_kws=false +skip_stt=false +skip_scoring= +extra_kws=true +vocab_kws=false +tri5_only=false +wip=0.5 + +nnet3_model=nnet3/tdnn_sp +chain_model= +parent_dir_suffix=_cleaned +is_rnn=false +extra_left_context=40 +extra_right_context=40 +frames_per_chunk=20 + +echo "run-4-test.sh $@" + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h|dev2h|eval|shadow)" + exit 1 +fi + +echo "Dir: $dir" + +#This seems to be the only functioning way how to ensure the comple +#set of scripts will exit when sourcing several of them together +#Otherwise, the CTRL-C just terminates the deepest sourced script ? +# Let shell functions inherit ERR trap. Same as `set -E'. +set -o errtrace +trap "echo Exited!; exit;" SIGINT SIGTERM + +./local/check_tools.sh || exit 1 + +# Set proxy search parameters for the extended lexicon case. +if [ -f data/.extlex ]; then + proxy_phone_beam=$extlex_proxy_phone_beam + proxy_phone_nbest=$extlex_proxy_phone_nbest + proxy_beam=$extlex_proxy_beam + proxy_nbest=$extlex_proxy_nbest +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ]; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ -z "${skip_scoring}" ] ; then + if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true + else + skip_scoring=false + fi +fi + +#The $dataset_type value will be the dataset name without any extrension +eval my_data_dir=( "\${${dataset_type}_data_dir[@]}" ) +eval my_data_list=( "\${${dataset_type}_data_list[@]}" ) +if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then + echo "Error: The dir you specified ($dataset_id) does not have existing config"; + exit 1 +fi + +eval my_stm_file=\$${dataset_type}_stm_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_rttm_file=\$${dataset_type}_rttm_file +eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate + +if [ -z "$my_nj" ]; then + echo >&2 "You didn't specify the number of jobs -- variable \"${dataset_type}_nj\" not defined." + exit 1 +fi + +my_subset_ecf=false +eval ind=\${${dataset_type}_subset_ecf+x} +if [ "$ind" == "x" ] ; then + eval my_subset_ecf=\$${dataset_type}_subset_ecf +fi + +declare -A my_kwlists=() +eval my_kwlists_keys="\${!${dataset_type}_kwlists[@]}" +for key in $my_kwlists_keys # make sure you include the quotes there +do + eval my_kwlists_val="\${${dataset_type}_kwlists[$key]}" + #index=`echo $my_kwlists_val | sed 's/.*\.\([^.][^.]*\)\.xml/\1/g'` + index=$key + + my_kwlists["$index"]="${my_kwlists_val}" +done +declare -p my_kwlists +export my_kwlists + +#Just a minor safety precaution to prevent using incorrect settings +#The dataset_* variables should be used. +set -e +set -o pipefail +set -u +unset dir +unset kind + +function make_plp { + target=$1 + logdir=$2 + output=$3 + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + else + steps/make_plp.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + fi + utils/fix_data_dir.sh $target + steps/compute_cmvn_stats.sh $target $logdir $output + utils/fix_data_dir.sh $target +} + +function check_variables_are_set { + for variable in $mandatory_variables ; do + if ! declare -p $variable ; then + echo "Mandatory variable ${variable/my/$dataset_type} is not set! " + echo "You should probably set the variable in the config file " + exit 1 + else + declare -p $variable + fi + done + + if [ ! -z ${optional_variables+x} ] ; then + for variable in $optional_variables ; do + eval my_variable=\$${variable} + echo "$variable=$my_variable" + done + fi +} + +if [ ! -f data/raw_${dataset_type}_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the ${dataset_type} set" + echo --------------------------------------------------------------------- + + l1=${#my_data_dir[*]} + l2=${#my_data_list[*]} + if [ "$l1" -ne "$l2" ]; then + echo "Error, the number of source files lists is not the same as the number of source dirs!" + exit 1 + fi + + resource_string="" + if [ "$dataset_kind" == "unsupervised" ]; then + resource_string+=" --ignore-missing-txt true" + fi + + for i in `seq 0 $(($l1 - 1))`; do + resource_string+=" ${my_data_dir[$i]} " + resource_string+=" ${my_data_list[$i]} " + done + local/make_corpus_subset.sh $resource_string ./data/raw_${dataset_type}_data + touch data/raw_${dataset_type}_data/.done +fi +my_data_dir=`readlink -f ./data/raw_${dataset_type}_data` +[ -f $my_data_dir/filelist.list ] && my_data_list=$my_data_dir/filelist.list +nj_max=`cat $my_data_list | wc -l` || nj_max=`ls $my_data_dir/audio | wc -l` + +if [ "$nj_max" -lt "$my_nj" ] ; then + echo "Number of jobs ($my_nj) is too big!" + echo "The maximum reasonable number of jobs is $nj_max" + my_nj=$nj_max +fi + +##################################################################### +# +# Audio data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing ${dataset_kind} data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +if [ ! -f $dataset_dir/.done ] ; then + if [ "$dataset_kind" == "supervised" ]; then + if [ "$dataset_segments" == "seg" ]; then + . ./local/datasets/supervised_seg.sh || exit 1 + elif [ "$dataset_segments" == "uem" ]; then + . ./local/datasets/supervised_uem.sh || exit 1 + elif [ "$dataset_segments" == "pem" ]; then + . ./local/datasets/supervised_pem.sh || exit 1 + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + elif [ "$dataset_kind" == "unsupervised" ] ; then + if [ "$dataset_segments" == "seg" ] ; then + . ./local/datasets/unsupervised_seg.sh + elif [ "$dataset_segments" == "uem" ] ; then + . ./local/datasets/unsupervised_uem.sh + elif [ "$dataset_segments" == "pem" ] ; then + ##This combination does not really makes sense, + ##Because the PEM is that we get the segmentation + ##and because of the format of the segment files + ##the transcript as well + echo "ERROR: $dataset_segments combined with $dataset_type" + echo "does not really make any sense!" + exit 1 + #. ./local/datasets/unsupervised_pem.sh + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + else + echo "Unknown kind of the dataset: \"$dataset_kind\"!"; + echo "Valid dataset kinds are: supervised, unsupervised, shadow"; + exit 1 + fi + + if [ ! -f ${dataset_dir}/.plp.done ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} parametrization files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp + touch ${dataset_dir}/.plp.done + fi + touch $dataset_dir/.done +fi + +if [ ! -f ${dataset_dir}_hires/.mfcc.done ]; then + dataset=$(basename $dataset_dir) + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} MFCC features in ${dataset_dir}_hires and corresponding iVectors in exp/nnet3/ivectors_$dataset on" `date` + echo --------------------------------------------------------------------- + if [ ! -d ${dataset_dir}_hires ]; then + utils/copy_data_dir.sh data/$dataset data/${dataset}_hires + fi + + mfccdir=mfcc_hires + steps/make_mfcc.sh --nj $my_nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" ${dataset_dir}_hires exp/make_hires/$dataset $mfccdir; + steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir; + utils/fix_data_dir.sh ${dataset_dir}_hires; + touch ${dataset_dir}_hires/.mfcc.done + + touch ${dataset_dir}_hires/.done +fi + +if [ -f exp/nnet3/extractor/final.ie ] && \ + [ ! -f exp/nnet3/ivectors_$(basename $dataset_dir)/.done ] ; then + dataset=$(basename $dataset_dir) + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $my_nj \ + ${dataset_dir}_hires exp/nnet3/extractor exp/nnet3/ivectors_$dataset || exit 1; + + touch exp/nnet3/ivectors_$dataset/.done +fi + +##################################################################### +# +# KWS data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing kws data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +lang=data/lang +if ! $skip_kws ; then + if $extra_kws ; then + L1_lex=data/local/lexiconp.txt + . ./local/datasets/extra_kws.sh || exit 1 + fi + if $vocab_kws ; then + . ./local/datasets/vocab_kws.sh || exit 1 + fi + if [ ! -f data/lang.phn/G.fst ] ; then + ./local/syllab/run_phones.sh --stage -2 ${dataset_dir} + else + ./local/syllab/run_phones.sh ${dataset_dir} + fi + + if [ ! -f data/lang.syll/G.fst ] ; then + ./local/syllab/run_syllabs.sh --stage -2 ${dataset_dir} + else + ./local/syllab/run_syllabs.sh ${dataset_dir} + fi + + ./local/search/run_search.sh --dir ${dataset_dir##*/} + ./local/search/run_phn_search.sh --dir ${dataset_dir##*/} + ./local/search/run_syll_search.sh --dir ${dataset_dir##*/} +fi + +if $data_only ; then + echo "Exiting, as data-only was requested..." + exit 0; +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +if [ ! -f data/langp_test/.done ]; then + cp -R data/langp/tri5_ali/ data/langp_test + cp data/lang/G.fst data/langp_test + touch data/langp_test/.done +fi + +if [ ! -L ./data/langp_test.syll ]; then + ln -s lang.syll data/langp_test.syll +fi +if [ ! -L ./data/langp_test.phn ]; then + ln -s lang.phn data/langp_test.phn +fi + + +decode=exp/tri5/decode_${dataset_id} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning decoding with SAT models on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test exp/tri5 exp/tri5/graph |tee exp/tri5/mkgraph.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4\ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + exp/tri5/graph ${dataset_dir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test ${decode} + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test ${decode}.si +fi + +if $tri5_only; then + echo "--tri5-only is true. So exiting." + exit 0 +fi + +#################################################################### +## SGMM2 decoding +## We Include the SGMM_MMI inside this, as we might only have the DNN systems +## trained and not PLP system. The DNN systems build only on the top of tri5 stage +#################################################################### +if [ -f exp/sgmm5/.done ]; then + decode=exp/sgmm5/decode_fmllr_${dataset_id} + if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test exp/sgmm5 exp/sgmm5/graph |tee exp/sgmm5/mkgraph.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --cmd "$decode_cmd" --transform-dir exp/tri5/decode_${dataset_id} "${decode_extra_opts[@]}"\ + exp/sgmm5/graph ${dataset_dir} $decode |tee $decode/decode.log + touch $decode/.done + + if ! $fast_path ; then + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test exp/sgmm5/decode_fmllr_${dataset_id} + fi + fi + + #################################################################### + ## + ## SGMM_MMI rescoring + ## + #################################################################### + + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ -x exp/sgmm5_mmi_b0.1 ] && [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dataset_id} \ + data/langp_test ${dataset_dir} exp/sgmm5/decode_fmllr_${dataset_id} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi + done + + #We are done -- all lattices has been generated. We have to + #a)Run MBR decoding + #b)Run KW search + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ -f $decode/.done ]; then + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + fi + done +fi + + + +#################################################################### +## +## DNN ("compatibility") decoding -- also, just decode the "default" net +## +#################################################################### +if [ -f exp/tri6_nnet/.done ]; then + decode=exp/tri6_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + +#################################################################### +## +## nnet3 model decoding +## +#################################################################### +if [ -f exp/nnet3/lstm_bidirectional_sp/.done ]; then + decode=exp/nnet3/lstm_bidirectional_sp/decode_${dataset_id} + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + +if [ -f exp/nnet3/lstm_realigned_bidirectional_sp//.done ]; then + decode=exp/nnet3/lstm_realigned_bidirectional_sp//decode_${dataset_id} + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi +if [ -f exp/nnet3/lstm_sp/.done ]; then + decode=exp/nnet3/lstm_sp/decode_${dataset_id} + rnn_opts=" --extra-left-context 40 --extra-right-context 0 --frames-per-chunk 20 " + decode_script=steps/nnet3/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + +if [ -f exp/$nnet3_model/.done ]; then + decode=exp/$nnet3_model/decode_${dataset_id} + rnn_opts= + decode_script=steps/nnet3/decode.sh + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + +#################################################################### +## +## chain model decoding +## +#################################################################### +if [ -f exp/$chain_model/final.mdl ]; then + dir=exp/$chain_model + + decode=$dir/decode_${dataset_id} + decode_script=steps/nnet3/decode.sh + + if [ ! -f exp/nnet3$parent_dir_suffix/ivectors_${dataset_id}/.done ] ; then + steps/online/nnet2/extract_ivectors_online.sh --cmd "$decode_cmd" --nj $my_nj \ + ${dataset_dir}_hires exp/nnet3$parent_dir_suffix/extractor exp/nnet3$parent_dir_suffix/ivectors_${dataset_id}/ || exit 1; + touch exp/nnet3$parent_dir_suffix/ivectors_${dataset_id}/.done + fi + + rnn_opts= + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + echo "Modifying the number of jobs as this is an RNN and decoding can be extremely slow." + my_nj=`cat ${dataset_dir}_hires/spk2utt|wc -l` + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + echo "Modifying the number of jobs as this is an RNN and decoding can be extremely slow." + my_nj=`cat ${dataset_dir}_hires/spk2utt|wc -l` + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3$parent_dir_suffix/ivectors_${dataset_id} \ + $dir/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_chain_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +else + echo "no chain model exp/$chain_model" +fi + +#################################################################### +## +## DNN (nextgen DNN) decoding +## +#################################################################### +if [ -f exp/tri6a_nnet/.done ]; then + decode=exp/tri6a_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + + +#################################################################### +## +## DNN (ensemble) decoding +## +#################################################################### +if [ -f exp/tri6b_nnet/.done ]; then + decode=exp/tri6b_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi +#################################################################### +## +## DNN_MPE decoding +## +#################################################################### +if [ -f exp/tri6_nnet_mpe/.done ]; then + for epoch in 1 2 3 4; do + decode=exp/tri6_nnet_mpe/decode_${dataset_id}_epoch$epoch + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --minimize $minimize \ + --cmd "$decode_cmd" --nj $my_nj --iter epoch$epoch \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + done +fi + +#################################################################### +## +## DNN semi-supervised training decoding +## +#################################################################### +for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ + tri6_nnet_supervised_tuning tri6_nnet_supervised_tuning2 ; do + if [ -f exp/$dnn/.done ]; then + decode=exp/$dnn/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + fi +done +echo "Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-4-phn-anydecode.sh b/egs/babel/s5d/run-4-phn-anydecode.sh new file mode 100755 index 00000000000..054a4665529 --- /dev/null +++ b/egs/babel/s5d/run-4-phn-anydecode.sh @@ -0,0 +1,613 @@ +#!/bin/bash +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.phn.pem +kind= +data_only=false +fast_path=true +skip_kws=false +skip_stt=false +skip_scoring= +extra_kws=true +vocab_kws=false +tri5_only=false +wip=0.5 + +nnet3_model=nnet3/tdnn_sp +is_rnn=false +extra_left_context=0 +extra_right_context=0 +frames_per_chunk=0 + +echo $0 "$@" + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h.phn|dev2h.phn|eval.phn|shadow.phn)" + exit 1 +fi + +#This seems to be the only functioning way how to ensure the comple +#set of scripts will exit when sourcing several of them together +#Otherwise, the CTRL-C just terminates the deepest sourced script ? +# Let shell functions inherit ERR trap. Same as `set -E'. +set -o errtrace +trap "echo Exited!; exit;" SIGINT SIGTERM + +./local/check_tools.sh || exit 1 + +# Set proxy search parameters for the extended lexicon case. +if [ -f data/.extlex ]; then + proxy_phone_beam=$extlex_proxy_phone_beam + proxy_phone_nbest=$extlex_proxy_phone_nbest + proxy_beam=$extlex_proxy_beam + proxy_nbest=$extlex_proxy_nbest +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.phn.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ]; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ -z "${skip_scoring}" ] ; then + if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true + else + skip_scoring=false + fi +fi + +#The $dataset_type value will be the dataset name without any extrension +eval my_data_dir=( "\${${dataset_type}_data_dir[@]}" ) +eval my_data_list=( "\${${dataset_type}_data_list[@]}" ) +if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then + echo "Error: The dir you specified ($dataset_id) does not have existing config"; + exit 1 +fi + +eval my_stm_file=\$${dataset_type}_stm_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_rttm_file=\$${dataset_type}_rttm_file +eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate + +if [ -z "$my_nj" ]; then + echo >&2 "You didn't specify the number of jobs -- variable \"${dataset_type}_nj\" not defined." + exit 1 +fi +my_nj=$(($my_nj * 2)) + +my_subset_ecf=false +eval ind=\${${dataset_type}_subset_ecf+x} +if [ "$ind" == "x" ] ; then + eval my_subset_ecf=\$${dataset_type}_subset_ecf +fi + +declare -A my_kwlists=() +eval my_kwlists_keys="\${!${dataset_type}_kwlists[@]}" +for key in $my_kwlists_keys # make sure you include the quotes there +do + eval my_kwlists_val="\${${dataset_type}_kwlists[$key]}" + my_kwlists["$key"]="${my_kwlists_val}" +done +declare -p my_kwlists +export my_kwlists + +#Just a minor safety precaution to prevent using incorrect settings +#The dataset_* variables should be used. +set -e +set -o pipefail +set -u +unset dir +unset kind + +function make_plp { + target=$1 + logdir=$2 + output=$3 + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + else + steps/make_plp.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + fi + utils/fix_data_dir.sh $target + steps/compute_cmvn_stats.sh $target $logdir $output + utils/fix_data_dir.sh $target +} + +function check_variables_are_set { + for variable in $mandatory_variables ; do + if ! declare -p $variable ; then + echo "Mandatory variable ${variable/my/$dataset_type} is not set! " + echo "You should probably set the variable in the config file " + exit 1 + else + declare -p $variable + fi + done + + if [ ! -z ${optional_variables+x} ] ; then + for variable in $optional_variables ; do + eval my_variable=\$${variable} + echo "$variable=$my_variable" + done + fi +} + +if [ ! -f data/raw_${dataset_type}_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the ${dataset_type} set" + echo --------------------------------------------------------------------- + + l1=${#my_data_dir[*]} + l2=${#my_data_list[*]} + if [ "$l1" -ne "$l2" ]; then + echo "Error, the number of source files lists is not the same as the number of source dirs!" + exit 1 + fi + + resource_string="" + if [ "$dataset_kind" == "unsupervised" ]; then + resource_string+=" --ignore-missing-txt true" + fi + + for i in `seq 0 $(($l1 - 1))`; do + resource_string+=" ${my_data_dir[$i]} " + resource_string+=" ${my_data_list[$i]} " + done + local/make_corpus_subset.sh $resource_string ./data/raw_${dataset_type}_data + touch data/raw_${dataset_type}_data/.done +fi +my_data_dir=`readlink -f ./data/raw_${dataset_type}_data` +[ -f $my_data_dir/filelist.list ] && my_data_list=$my_data_dir/filelist.list +nj_max=`cat $my_data_list | wc -l` || nj_max=`ls $my_data_dir/audio | wc -l` + +if [ "$nj_max" -lt "$my_nj" ] ; then + echo "Number of jobs ($my_nj) is too big!" + echo "The maximum reasonable number of jobs is $nj_max" + my_nj=$nj_max +fi + +##################################################################### +# +# Audio data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing ${dataset_kind} data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +if [ ! -f $dataset_dir/.done ] ; then + if [ "$dataset_kind" == "supervised" ]; then + if [ "$dataset_segments" == "seg" ]; then + . ./local/datasets/supervised_seg.sh || exit 1 + elif [ "$dataset_segments" == "uem" ]; then + . ./local/datasets/supervised_uem.sh || exit 1 + elif [ "$dataset_segments" == "pem" ]; then + . ./local/datasets/supervised_pem.sh || exit 1 + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + elif [ "$dataset_kind" == "unsupervised" ] ; then + if [ "$dataset_segments" == "seg" ] ; then + . ./local/datasets/unsupervised_seg.sh + elif [ "$dataset_segments" == "uem" ] ; then + . ./local/datasets/unsupervised_uem.sh + elif [ "$dataset_segments" == "pem" ] ; then + ##This combination does not really makes sense, + ##Because the PEM is that we get the segmentation + ##and because of the format of the segment files + ##the transcript as well + echo "ERROR: $dataset_segments combined with $dataset_type" + echo "does not really make any sense!" + exit 1 + #. ./local/datasets/unsupervised_pem.sh + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + else + echo "Unknown kind of the dataset: \"$dataset_kind\"!"; + echo "Valid dataset kinds are: supervised, unsupervised, shadow"; + exit 1 + fi + + if [ ! -f ${dataset_dir}/.plp.done ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} parametrization files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp + touch ${dataset_dir}/.plp.done + fi + touch $dataset_dir/.done +fi + +if [ -f exp/nnet3/extractor/final.ie ] && [ ! -f ${dataset_dir}_hires/.mfcc.done ]; then + dataset=$(basename $dataset_dir) + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} MFCC features in ${dataset_dir}_hires and corresponding iVectors in exp/nnet3/ivectors_$dataset on" `date` + echo --------------------------------------------------------------------- + if [ ! -d ${dataset_dir}_hires ]; then + utils/copy_data_dir.sh data/${dataset_type}.${dataset_segments}_hires data/${dataset}_hires + fi + ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true + touch ${dataset_dir}_hires/.done +fi +set -x +ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true +set +x + +##################################################################### +# +# KWS data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing kws data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +lang=data/lang.phn +if ! $skip_kws ; then + if $extra_kws ; then + L1_lex=data/local/dict.phn/lexiconp.txt + . ./local/datasets/extra_kws.sh || exit 1 + fi + if $vocab_kws ; then + . ./local/datasets/vocab_kws.sh || exit 1 + fi +fi + +if $data_only ; then + echo "Exiting, as data-only was requested..." + exit 0; +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +if [ ! -f data/langp_test.phn//.done ]; then + ln -sf lang.phn data/langp_test.phn || true + touch data/langp_test.phn/.done +fi + +decode=exp/tri5/decode_${dataset_id} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning decoding with SAT models on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.phn exp/tri5 exp/tri5/graph.phn |tee exp/tri5/mkgraph.phn.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4\ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + exp/tri5/graph.phn ${dataset_dir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn ${decode} + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn ${decode}.si +fi + +if $tri5_only; then + echo "--tri5-only is true. So exiting." + exit 0 +fi + +#################################################################### +## SGMM2 decoding +## We Include the SGMM_MMI inside this, as we might only have the DNN systems +## trained and not PLP system. The DNN systems build only on the top of tri5 stage +#################################################################### +if [ -f exp/sgmm5/.done ]; then + decode=exp/sgmm5/decode_fmllr_${dataset_id} + if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.phn exp/sgmm5 exp/sgmm5/graph.phn |tee exp/sgmm5/mkgraph.phn.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --cmd "$decode_cmd" --transform-dir exp/tri5/decode_${dataset_id} "${decode_extra_opts[@]}"\ + exp/sgmm5/graph.phn ${dataset_dir} $decode |tee $decode/decode.log + touch $decode/.done + + if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn exp/sgmm5/decode_fmllr_${dataset_id} + fi + fi + + #################################################################### + ## + ## SGMM_MMI rescoring + ## + #################################################################### + + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dataset_id} \ + data/langp_test.phn ${dataset_dir} exp/sgmm5/decode_fmllr_${dataset_id} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi + done + + #We are done -- all lattices has been generated. We have to + #a)Run MBR decoding + #b)Run KW search + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode + done +fi + + + +#################################################################### +## +## DNN ("compatibility") decoding -- also, just decode the "default" net +## +#################################################################### +if [ -f exp/tri6_nnet/.done ]; then + decode=exp/tri6_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + +#################################################################### +## +## nnet3 model decoding +## +#################################################################### +if [ -f exp/nnet3/lstm_bidirectional_sp/.done ]; then + decode=exp/nnet3/lstm_bidirectional_sp/decode_${dataset_id}.phn + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + +if [ -f exp/nnet3/lstm_sp/.done ]; then + decode=exp/nnet3/lstm_sp/decode_${dataset_id}.phn + rnn_opts=" --extra-left-context 40 --extra-right-context 0 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + +if [ -f exp/$nnet3_model/.done ]; then + decode=exp/$nnet3_model/decode_${dataset_id}.phn + rnn_opts= + decode_script=steps/nnet3/decode.sh + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + decode_script=steps/nnet3/lstm/decode.sh + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + + +#################################################################### +## +## DNN (nextgen DNN) decoding +## +#################################################################### +if [ -f exp/tri6a_nnet/.done ]; then + decode=exp/tri6a_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + + +#################################################################### +## +## DNN (ensemble) decoding +## +#################################################################### +if [ -f exp/tri6b_nnet/.done ]; then + decode=exp/tri6b_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi +#################################################################### +## +## DNN_MPE decoding +## +#################################################################### +if [ -f exp/tri6_nnet_mpe/.done ]; then + for epoch in 1 2 3 4; do + decode=exp/tri6_nnet_mpe/decode_${dataset_id}_epoch$epoch + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --minimize $minimize \ + --cmd "$decode_cmd" --nj $my_nj --iter epoch$epoch \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode + done +fi + +#################################################################### +## +## DNN semi-supervised training decoding +## +#################################################################### +for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ + tri6_nnet_supervised_tuning tri6_nnet_supervised_tuning2 ; do + if [ -f exp/$dnn/.done ]; then + decode=exp/$dnn/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode + fi +done +echo "Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-4-syll-anydecode.sh b/egs/babel/s5d/run-4-syll-anydecode.sh new file mode 100755 index 00000000000..e69b168cf49 --- /dev/null +++ b/egs/babel/s5d/run-4-syll-anydecode.sh @@ -0,0 +1,613 @@ +#!/bin/bash +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.syll.pem +kind= +data_only=false +fast_path=true +skip_kws=false +skip_stt=false +skip_scoring= +extra_kws=true +vocab_kws=false +tri5_only=false +wip=0.5 + +nnet3_model=nnet3/tdnn_sp +is_rnn=false +extra_left_context=0 +extra_right_context=0 +frames_per_chunk=0 + +echo $0 "$@" + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h.syll|dev2h.syll|eval.syll|shadow.syll)" + exit 1 +fi + +#This seems to be the only functioning way how to ensure the comple +#set of scripts will exit when sourcing several of them together +#Otherwise, the CTRL-C just terminates the deepest sourced script ? +# Let shell functions inherit ERR trap. Same as `set -E'. +set -o errtrace +trap "echo Exited!; exit;" SIGINT SIGTERM + +./local/check_tools.sh || exit 1 + +# Set proxy search parameters for the extended lexicon case. +if [ -f data/.extlex ]; then + proxy_phone_beam=$extlex_proxy_phone_beam + proxy_phone_nbest=$extlex_proxy_phone_nbest + proxy_beam=$extlex_proxy_beam + proxy_nbest=$extlex_proxy_nbest +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.syll.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ]; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ -z "${skip_scoring}" ] ; then + if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true + else + skip_scoring=false + fi +fi + +#The $dataset_type value will be the dataset name without any extrension +eval my_data_dir=( "\${${dataset_type}_data_dir[@]}" ) +eval my_data_list=( "\${${dataset_type}_data_list[@]}" ) +if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then + echo "Error: The dir you specified ($dataset_id) does not have existing config"; + exit 1 +fi + +eval my_stm_file=\$${dataset_type}_stm_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_rttm_file=\$${dataset_type}_rttm_file +eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate + +if [ -z "$my_nj" ]; then + echo >&2 "You didn't specify the number of jobs -- variable \"${dataset_type}_nj\" not defined." + exit 1 +fi +my_nj=$(($my_nj * 2)) + +my_subset_ecf=false +eval ind=\${${dataset_type}_subset_ecf+x} +if [ "$ind" == "x" ] ; then + eval my_subset_ecf=\$${dataset_type}_subset_ecf +fi + +declare -A my_kwlists=() +eval my_kwlists_keys="\${!${dataset_type}_kwlists[@]}" +for key in $my_kwlists_keys # make sure you include the quotes there +do + eval my_kwlists_val="\${${dataset_type}_kwlists[$key]}" + my_kwlists["$key"]="${my_kwlists_val}" +done +declare -p my_kwlists +export my_kwlists + +#Just a minor safety precaution to prevent using incorrect settings +#The dataset_* variables should be used. +set -e +set -o pipefail +set -u +unset dir +unset kind + +function make_plp { + target=$1 + logdir=$2 + output=$3 + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + else + steps/make_plp.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + fi + utils/fix_data_dir.sh $target + steps/compute_cmvn_stats.sh $target $logdir $output + utils/fix_data_dir.sh $target +} + +function check_variables_are_set { + for variable in $mandatory_variables ; do + if ! declare -p $variable ; then + echo "Mandatory variable ${variable/my/$dataset_type} is not set! " + echo "You should probably set the variable in the config file " + exit 1 + else + declare -p $variable + fi + done + + if [ ! -z ${optional_variables+x} ] ; then + for variable in $optional_variables ; do + eval my_variable=\$${variable} + echo "$variable=$my_variable" + done + fi +} + +if [ ! -f data/raw_${dataset_type}_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the ${dataset_type} set" + echo --------------------------------------------------------------------- + + l1=${#my_data_dir[*]} + l2=${#my_data_list[*]} + if [ "$l1" -ne "$l2" ]; then + echo "Error, the number of source files lists is not the same as the number of source dirs!" + exit 1 + fi + + resource_string="" + if [ "$dataset_kind" == "unsupervised" ]; then + resource_string+=" --ignore-missing-txt true" + fi + + for i in `seq 0 $(($l1 - 1))`; do + resource_string+=" ${my_data_dir[$i]} " + resource_string+=" ${my_data_list[$i]} " + done + local/make_corpus_subset.sh $resource_string ./data/raw_${dataset_type}_data + touch data/raw_${dataset_type}_data/.done +fi +my_data_dir=`readlink -f ./data/raw_${dataset_type}_data` +[ -f $my_data_dir/filelist.list ] && my_data_list=$my_data_dir/filelist.list +nj_max=`cat $my_data_list | wc -l` || nj_max=`ls $my_data_dir/audio | wc -l` + +if [ "$nj_max" -lt "$my_nj" ] ; then + echo "Number of jobs ($my_nj) is too big!" + echo "The maximum reasonable number of jobs is $nj_max" + my_nj=$nj_max +fi + +##################################################################### +# +# Audio data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing ${dataset_kind} data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +if [ ! -f $dataset_dir/.done ] ; then + if [ "$dataset_kind" == "supervised" ]; then + if [ "$dataset_segments" == "seg" ]; then + . ./local/datasets/supervised_seg.sh || exit 1 + elif [ "$dataset_segments" == "uem" ]; then + . ./local/datasets/supervised_uem.sh || exit 1 + elif [ "$dataset_segments" == "pem" ]; then + . ./local/datasets/supervised_pem.sh || exit 1 + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + elif [ "$dataset_kind" == "unsupervised" ] ; then + if [ "$dataset_segments" == "seg" ] ; then + . ./local/datasets/unsupervised_seg.sh + elif [ "$dataset_segments" == "uem" ] ; then + . ./local/datasets/unsupervised_uem.sh + elif [ "$dataset_segments" == "pem" ] ; then + ##This combination does not really makes sense, + ##Because the PEM is that we get the segmentation + ##and because of the format of the segment files + ##the transcript as well + echo "ERROR: $dataset_segments combined with $dataset_type" + echo "does not really make any sense!" + exit 1 + #. ./local/datasets/unsupervised_pem.sh + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + else + echo "Unknown kind of the dataset: \"$dataset_kind\"!"; + echo "Valid dataset kinds are: supervised, unsupervised, shadow"; + exit 1 + fi + + if [ ! -f ${dataset_dir}/.plp.done ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} parametrization files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp + touch ${dataset_dir}/.plp.done + fi + touch $dataset_dir/.done +fi + +if [ -f exp/nnet3/extractor/final.ie ] && [ ! -f ${dataset_dir}_hires/.mfcc.done ]; then + dataset=$(basename $dataset_dir) + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} MFCC features in ${dataset_dir}_hires and corresponding iVectors in exp/nnet3/ivectors_$dataset on" `date` + echo --------------------------------------------------------------------- + if [ ! -d ${dataset_dir}_hires ]; then + utils/copy_data_dir.sh data/${dataset_type}.${dataset_segments}_hires data/${dataset}_hires + fi + ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true + touch ${dataset_dir}_hires/.done +fi +set -x +ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true +set +x + +##################################################################### +# +# KWS data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing kws data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +lang=data/lang.syll +if ! $skip_kws ; then + if $extra_kws ; then + L1_lex=data/local/dict.syll/lexiconp.txt + . ./local/datasets/extra_kws.sh || exit 1 + fi + if $vocab_kws ; then + . ./local/datasets/vocab_kws.sh || exit 1 + fi +fi + +if $data_only ; then + echo "Exiting, as data-only was requested..." + exit 0; +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +if [ ! -f data/langp_test.syll//.done ]; then + ln -sf lang.syll data/langp_test.syll || true + touch data/langp_test.syll/.done +fi + +decode=exp/tri5/decode_${dataset_id} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning decoding with SAT models on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.syll exp/tri5 exp/tri5/graph.syll |tee exp/tri5/mkgraph.syll.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4\ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + exp/tri5/graph.syll ${dataset_dir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll ${decode} + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll ${decode}.si +fi + +if $tri5_only; then + echo "--tri5-only is true. So exiting." + exit 0 +fi + +#################################################################### +## SGMM2 decoding +## We Include the SGMM_MMI inside this, as we might only have the DNN systems +## trained and not PLP system. The DNN systems build only on the top of tri5 stage +#################################################################### +if [ -f exp/sgmm5/.done ]; then + decode=exp/sgmm5/decode_fmllr_${dataset_id} + if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.syll exp/sgmm5 exp/sgmm5/graph.syll |tee exp/sgmm5/mkgraph.syll.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --cmd "$decode_cmd" --transform-dir exp/tri5/decode_${dataset_id} "${decode_extra_opts[@]}"\ + exp/sgmm5/graph.syll ${dataset_dir} $decode |tee $decode/decode.log + touch $decode/.done + + if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll exp/sgmm5/decode_fmllr_${dataset_id} + fi + fi + + #################################################################### + ## + ## SGMM_MMI rescoring + ## + #################################################################### + + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dataset_id} \ + data/langp_test.syll ${dataset_dir} exp/sgmm5/decode_fmllr_${dataset_id} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi + done + + #We are done -- all lattices has been generated. We have to + #a)Run MBR decoding + #b)Run KW search + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode + done +fi + + + +#################################################################### +## +## DNN ("compatibility") decoding -- also, just decode the "default" net +## +#################################################################### +if [ -f exp/tri6_nnet/.done ]; then + decode=exp/tri6_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + +#################################################################### +## +## nnet3 model decoding +## +#################################################################### +if [ -f exp/nnet3/lstm_bidirectional_sp/.done ]; then + decode=exp/nnet3/lstm_bidirectional_sp/decode_${dataset_id}.syll + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + +if [ -f exp/nnet3/lstm_sp/.done ]; then + decode=exp/nnet3/lstm_sp/decode_${dataset_id}.syll + rnn_opts=" --extra-left-context 40 --extra-right-context 0 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + +if [ -f exp/$nnet3_model/.done ]; then + decode=exp/$nnet3_model/decode_${dataset_id}.syll + rnn_opts= + decode_script=steps/nnet3/decode.sh + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + decode_script=steps/nnet3/lstm/decode.sh + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + + +#################################################################### +## +## DNN (nextgen DNN) decoding +## +#################################################################### +if [ -f exp/tri6a_nnet/.done ]; then + decode=exp/tri6a_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + + +#################################################################### +## +## DNN (ensemble) decoding +## +#################################################################### +if [ -f exp/tri6b_nnet/.done ]; then + decode=exp/tri6b_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi +#################################################################### +## +## DNN_MPE decoding +## +#################################################################### +if [ -f exp/tri6_nnet_mpe/.done ]; then + for epoch in 1 2 3 4; do + decode=exp/tri6_nnet_mpe/decode_${dataset_id}_epoch$epoch + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --minimize $minimize \ + --cmd "$decode_cmd" --nj $my_nj --iter epoch$epoch \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode + done +fi + +#################################################################### +## +## DNN semi-supervised training decoding +## +#################################################################### +for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ + tri6_nnet_supervised_tuning tri6_nnet_supervised_tuning2 ; do + if [ -f exp/$dnn/.done ]; then + decode=exp/$dnn/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode + fi +done +echo "Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-4b-anydecode-bnf.sh b/egs/babel/s5d/run-4b-anydecode-bnf.sh new file mode 100755 index 00000000000..8298021feb3 --- /dev/null +++ b/egs/babel/s5d/run-4b-anydecode-bnf.sh @@ -0,0 +1,259 @@ +#!/bin/bash +# Copyright 2014 Pegah Ghahremani +# Apache 2.0 + +# decode BNF + sgmm_mmi system +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.pem +kind= +data_only=false +fast_path=true +skip_kws=false +extra_kws=false +skip_stt=false +skip_scoring=false +tmpdir=`pwd` +semisupervised=true +unsup_string= + +. utils/parse_options.sh + +type=$dir + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h|dev2h|eval|shadow)" + echo "--semisupervised #set to false to skip unsupervised training." + exit 1 +fi + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + +if [ -z "$unsup_string" ] ; then + if $semisupervised ; then + unsup_string="_semisup" + else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + fi +fi + +if ! echo {dev10h,dev2h,eval,unsup,shadow}{,.pem,.uem,.seg} | grep -w "$type" >/dev/null; then + # note: echo dev10.uem | grep -w dev10h will produce a match, but this + # doesn't matter because dev10h is also a valid value. + echo "Invalid variable type=${type}, valid values are " {dev10h,dev2h,eval,unsup}{,.uem,.seg} + exit 1; +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ] ; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true +fi + +dirid=${type} +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} +param_bnf_dir=param_bnf${unsup_string} +datadir=$data_bnf_dir/${dirid} + +[ ! -d data/${dirid} ] && echo "No such directory data/${dirid}" && exit 1; +[ ! -d exp/tri5/decode_${dirid} ] && echo "No such directory exp/tri5/decode_${dirid}" && exit 1; + +# Set my_nj; typically 64. +my_nj=`cat exp/tri5/decode_${dirid}/num_jobs` || exit 1; + + +if [ ! $data_bnf_dir/${dirid}_bnf/.done -nt exp/tri5/decode_${dirid}/.done ] || \ + [ ! $data_bnf_dir/${dirid}_bnf/.done -nt $exp_dir/tri6_bnf/.done ]; then + # put the archives in $param_bnf_dir/. + steps/nnet2/dump_bottleneck_features.sh --nj $my_nj --cmd "$train_cmd" \ + --transform-dir exp/tri5/decode_${dirid} data/${dirid} $data_bnf_dir/${dirid}_bnf $exp_dir/tri6_bnf $param_bnf_dir $exp_dir/dump_bnf + touch $data_bnf_dir/${dirid}_bnf/.done +fi + +if [ ! $data_bnf_dir/${dirid}/.done -nt $data_bnf_dir/${dirid}_bnf/.done ]; then + steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd --max-jobs-run 10" \ + --nj $train_nj --transform-dir exp/tri5/decode_${dirid} $data_bnf_dir/${dirid}_sat data/${dirid} \ + exp/tri5_ali $exp_dir/make_fmllr_feats/log $param_bnf_dir/ + + steps/append_feats.sh --cmd "$train_cmd" --nj 4 \ + $data_bnf_dir/${dirid}_bnf $data_bnf_dir/${dirid}_sat $data_bnf_dir/${dirid} \ + $exp_dir/append_feats/log $param_bnf_dir/ + steps/compute_cmvn_stats.sh --fake $data_bnf_dir/${dirid} $exp_dir/make_fmllr_feats $param_bnf_dir + rm -r $data_bnf_dir/${dirid}_sat + if ! $skip_kws ; then + cp -r data/${dirid}/*kws* $data_bnf_dir/${dirid}/ || true + fi + touch $data_bnf_dir/${dirid}/.done +fi +if ! $skip_kws ; then + rm -rf $data_bnf_dir/${dirid}/*kws* + cp -r data/${dirid}/*kws* $data_bnf_dir/${dirid}/ || true +fi + + +if $data_only ; then + echo "Exiting, as data-only was requested... " +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +decode=$exp_dir/tri6/decode_${dirid} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Decoding with SAT models on top of bottleneck features on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test $exp_dir/tri6 $exp_dir/tri6/graph |tee $exp_dir/tri6/mkgraph.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4 \ + --acwt $bnf_decode_acwt \ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + $exp_dir/tri6/graph ${datadir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip\ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test ${decode} + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test ${decode}.si +fi + +#################################################################### +## SGMM2 decoding +#################################################################### +decode=$exp_dir/sgmm7/decode_fmllr_${dirid} +if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test $exp_dir/sgmm7 $exp_dir/sgmm7/graph |tee $exp_dir/sgmm7/mkgraph.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --acwt $bnf_decode_acwt \ + --cmd "$decode_cmd" --transform-dir $exp_dir/tri6/decode_${dirid} "${decode_extra_opts[@]}"\ + $exp_dir/sgmm7/graph ${datadir} $decode |tee $decode/decode.log + touch $decode/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test $exp_dir/sgmm7/decode_fmllr_${dirid} +fi + +#################################################################### +## +## SGMM_MMI rescoring +## +#################################################################### + +for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=$exp_dir/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter + if [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir $exp_dir/tri6/decode_${dirid} \ + data/langp_test ${datadir} $exp_dir/sgmm7/decode_fmllr_${dirid} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi +done + +#We are done -- all lattices has been generated. We have to +#a)Run MBR decoding +#b)Run KW search +for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=$exp_dir/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test $decode +done + + +if [ -f $exp_dir/tri7_nnet/.done ] && + [[ ( ! $exp_dir/tri7_nnet/decode_${dirid}/.done -nt $datadir/.done) || \ + (! $exp_dir/tri7_nnet/decode_${dirid}/.done -nt $exp_dir/tri7_nnet/.done ) ]]; then + + echo --------------------------------------------------------------------- + echo "Decoding hybrid system on top of bottleneck features on" `date` + echo --------------------------------------------------------------------- + + # We use the graph from tri6. + utils/mkgraph.sh \ + data/langp_test $exp_dir/tri6 $exp_dir/tri6/graph |tee $exp_dir/tri6/mkgraph.log + + decode=$exp_dir/tri7_nnet/decode_${dirid} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --cmd "$decode_cmd" --nj $my_nj \ + --acwt $bnf_decode_acwt \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --feat-type raw \ + $exp_dir/tri6/graph ${datadir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + decode=$exp_dir/tri7_nnet/decode_${dirid} + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test $decode + +fi + +echo "$0: Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-6-combine.sh b/egs/babel/s5d/run-6-combine.sh new file mode 100755 index 00000000000..81dc42caca3 --- /dev/null +++ b/egs/babel/s5d/run-6-combine.sh @@ -0,0 +1,73 @@ +#!/bin/bash + + +. conf/common_vars.sh +. ./lang.conf +. ./cmd.sh + +set -e +set -o pipefail +set -u + +function best_system_path_kws { + path_to_outputs=$1 + + best_out=`(find $path_to_outputs -name "sum.txt" | xargs grep "^| *Occ") | cut -f 1,13,17 -d '|' | sed 's/|//g' | sort -r -n -k 3 | head -n 1| awk '{print $1}'` + echo `dirname $best_out` +} + +function best_system_path_stt { + path_to_outputs=$1 + best_out=` (find $path_to_outputs -name *.ctm.sys | xargs grep Avg) | sed 's/|//g' | column -t | sort -n -k 9 | head -n 1| awk '{print $1}' ` + echo `dirname $best_out` +} +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. + +function lm_offsets { + min=999 + for dir in "$@" ; do + lmw=${dir##*score_} + + [ $lmw -le $min ] && min=$lmw + done + + lat_offset_str="" + for dir in "$@" ; do + latdir_dir=`dirname $dir` + lmw=${dir##*score_} + + offset=$(( $lmw - $min )) + if [ $offset -gt 0 ] ; then + lat_offset_str="$lat_offset_str ${latdir_dir}:$offset " + else + lat_offset_str="$lat_offset_str ${latdir_dir} " + fi + done + + echo $lat_offset_str + +} + +plp_kws=`best_system_path_kws "exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h_it*/kws_*"` +plp_stt=`best_system_path_stt "exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h_it*"` + +dnn_kws=`best_system_path_kws "exp/tri6_nnet//decode_dev10h/kws_*"` +dnn_stt=`best_system_path_stt "exp/tri6_nnet/decode_dev10h/"` + +bnf_kws=`best_system_path_kws "exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h_it*/kws_*"` +bnf_stt=`best_system_path_stt "exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h_it*"` + + + +echo local/score_combine.sh --cmd "$decode_cmd" data/dev10h data/lang `lm_offsets $plp_stt $dnn_stt $bnf_stt` exp/combine/dev10h +#local/score_combine.sh --cmd "$decode_cmd" data/dev10h data/lang `lm_offsets $plp_stt $dnn_stt $bnf_stt` exp/combine/dev10h + +echo local/kws_combine.sh --cmd "$decode_cmd" data/dev10h data/lang $plp_kws $dnn_kws $bnf_kws +#local/kws_combine.sh --cmd "$decode_cmd" data/dev10h data/lang $plp_kws/kwslist.xml $dnn_kws/kwslist.xml $bnf_kws/kwslist.xml exp/combine/dev10h/ + +mkdir -p exp/combine/kws_rescore +#local/rescoring/rescore_repeats.sh --cmd "$decode_cmd" \ +# exp/combine/dev10h/ data/dev10h data/train/text exp/combine/kws_rescore + +exit 0 diff --git a/egs/babel/s5d/steps b/egs/babel/s5d/steps new file mode 120000 index 00000000000..6e99bf5b5ad --- /dev/null +++ b/egs/babel/s5d/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/babel/s5d/utils b/egs/babel/s5d/utils new file mode 120000 index 00000000000..b240885218f --- /dev/null +++ b/egs/babel/s5d/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file diff --git a/egs/wsj/s5/steps/make_index.sh b/egs/wsj/s5/steps/make_index.sh index 5bc9af25241..6c29dbbe8b6 100755 --- a/egs/wsj/s5/steps/make_index.sh +++ b/egs/wsj/s5/steps/make_index.sh @@ -19,6 +19,9 @@ skip_optimization=false # If you only search for few thousands of keywords, # can skip the optimization; but if you're going to search for # millions of keywords, you'd better do set this optimization to # false and do the optimization on the final index. +frame_subsampling_factor= # We will try to autodetect this. You should specify + # the right value if your directory structure is + # non-standard # End configuration section. echo "$0 $@" # Print the command line for logging @@ -52,25 +55,39 @@ srcdir=`dirname $decodedir`; # The model directory is one level up from decoding mkdir -p $kwsdir/log; nj=`cat $decodedir/num_jobs` || exit 1; echo $nj > $kwsdir/num_jobs; + utter_id=$kwsdatadir/utter_id +if [ ! -f $utter_id ] ; then + utter_id=$kwsdatadir/utt.map +fi + if [ -z "$model" ]; then # if --model was not specified on the command line... model=$srcdir/final.mdl; fi for f in $model $decodedir/lat.1.gz $utter_id; do - [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1; + [ ! -f $f ] && echo "$0: Error: no such file $f" && exit 1; done -echo "Using model: $model" +echo "$0: Using model: $model" if [ ! -z $silence_word ]; then silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'` [ -z $silence_int ] && \ - echo "Error: could not find integer representation of silence word $silence_word" && exit 1; + echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; silence_opt="--silence-label=$silence_int" fi +if [ -z "$frame_subsampling_factor" ]; then + if [ -f $decodedir/../frame_subsampling_factor ] ; then + frame_subsampling_factor=$(cat $decodedir/../frame_subsampling_factor) + else + frame_subsampling_factor=1 + fi + echo "$0: Frame subsampling factor autodetected: $frame_subsampling_factor" +fi + word_boundary=$langdir/phones/word_boundary.int align_lexicon=$langdir/phones/align_lexicon.int if [ -f $word_boundary ] ; then @@ -79,6 +96,7 @@ if [ -f $word_boundary ] ; then lattice-align-words $silence_opt --max-expand=$max_expand $word_boundary $model ark:- ark:- \| \ lattice-scale --acoustic-scale=$acwt --lm-scale=$lmwt ark:- ark:- \| \ lattice-to-kws-index --max-states-scale=$max_states_scale --allow-partial=true \ + --frame-subsampling-factor=$frame_subsampling_factor \ --max-silence-frames=$max_silence_frames --strict=$strict ark:$utter_id ark:- ark:- \| \ kws-index-union --skip-optimization=$skip_optimization --strict=$strict --max-states=$max_states \ ark:- "ark:|gzip -c > $kwsdir/index.JOB.gz" || exit 1 @@ -88,11 +106,12 @@ elif [ -f $align_lexicon ]; then lattice-align-words-lexicon $silence_opt --max-expand=$max_expand $align_lexicon $model ark:- ark:- \| \ lattice-scale --acoustic-scale=$acwt --lm-scale=$lmwt ark:- ark:- \| \ lattice-to-kws-index --max-states-scale=$max_states_scale --allow-partial=true \ + --frame-subsampling-factor=$frame_subsampling_factor \ --max-silence-frames=$max_silence_frames --strict=$strict ark:$utter_id ark:- ark:- \| \ kws-index-union --skip-optimization=$skip_optimization --strict=$strict --max-states=$max_states \ ark:- "ark:|gzip -c > $kwsdir/index.JOB.gz" || exit 1 else - echo "$0: cannot find either word-boundary file $word_boundary or alignment lexicon $align_lexicon" + echo "$0: Error: cannot find either word-boundary file $word_boundary or alignment lexicon $align_lexicon" exit 1 fi diff --git a/egs/wsj/s5/steps/make_plp_pitch.sh b/egs/wsj/s5/steps/make_plp_pitch.sh index ff6e83ef577..7a71942ed22 100755 --- a/egs/wsj/s5/steps/make_plp_pitch.sh +++ b/egs/wsj/s5/steps/make_plp_pitch.sh @@ -19,7 +19,7 @@ compress=true echo "$0 $@" # Print the command line for logging -if [ -f path.sh ]; then . ./path.sh; fi +if [ -f ./path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 1 ] || [ $# -gt 3 ]; then diff --git a/egs/wsj/s5/steps/nnet2/get_egs.sh b/egs/wsj/s5/steps/nnet2/get_egs.sh index de9c5ca85bd..2eac4d60f3f 100755 --- a/egs/wsj/s5/steps/nnet2/get_egs.sh +++ b/egs/wsj/s5/steps/nnet2/get_egs.sh @@ -19,7 +19,7 @@ samples_per_iter=200000 # each iteration of training, see this many samples transform_dir= # If supplied, overrides alidir num_jobs_nnet=16 # Number of neural net jobs to run in parallel stage=0 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. splice_width=4 # meaning +- 4 frames on each side for second LDA left_context= right_context= @@ -58,7 +58,7 @@ if [ $# != 4 ]; then echo " # very end." echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -91,7 +91,7 @@ cp $alidir/tree $dir utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; cp $lang/phones.txt $dir || exit 1; -# Get list of validation utterances. +# Get list of validation utterances. awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \ > $dir/valid_uttlist || exit 1; @@ -111,7 +111,7 @@ awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlis [ -z "$transform_dir" ] && transform_dir=$alidir -## Set up features. +## Set up features. if [ -z $feat_type ]; then if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi fi @@ -123,7 +123,7 @@ case $feat_type in train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |" echo $cmvn_opts >$dir/cmvn_opts ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` cp $alidir/{splice_opts,cmvn_opts,final.mat} $dir || exit 1; [ ! -z "$cmvn_opts" ] && \ @@ -266,7 +266,7 @@ if [ $stage -le 4 ]; then echo "$0: Since iters-per-epoch == 1, just concatenating the data." for n in `seq 1 $num_jobs_nnet`; do cat $dir/egs/egs_orig.$n.*.ark > $dir/egs/egs_tmp.$n.0.ark || exit 1; - remove $dir/egs/egs_orig.$n.*.ark + remove $dir/egs/egs_orig.$n.*.ark done else # We'll have to split it up using nnet-copy-egs. egs_list= @@ -291,7 +291,7 @@ if [ $stage -le 5 ]; then for n in `seq 0 $[$iters_per_epoch-1]`; do $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \ nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \ - ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark + ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark remove $dir/egs/egs_tmp.*.$n.ark done fi diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative.sh b/egs/wsj/s5/steps/nnet2/train_discriminative.sh index a1a121345c2..fee51254f4f 100755 --- a/egs/wsj/s5/steps/nnet2/train_discriminative.sh +++ b/egs/wsj/s5/steps/nnet2/train_discriminative.sh @@ -33,7 +33,7 @@ shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of stage=-8 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. These don't num_threads=16 # this is the default but you may want to change it, e.g. to 1 if # using GPUs. @@ -74,7 +74,7 @@ if [ $# != 6 ]; then echo " # this, you may want to decrease the batch size." echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" echo " # use multiple threads... " - echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." + echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" echo " # process." echo " --stage # Used to run a partially-completed training process from somewhere in" diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh index 5e25d7175c3..69ab4596f29 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh @@ -54,7 +54,7 @@ add_layers_period=2 # by default, add new layers every 2 iterations. num_hidden_layers=3 stage=-5 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. These don't splice_width=4 # meaning +- 4 frames on each side for second LDA randprune=4.0 # speeds up LDA. alpha=4.0 diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh index e78d9ab7f5c..d655f039e2f 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh @@ -55,7 +55,7 @@ add_layers_period=2 # by default, add new layers every 2 iterations. num_hidden_layers=3 stage=-5 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 15" # for jobs with a lot of I/O, limits the number running at one time. These don't splice_width=4 # meaning +- 4 frames on each side for second LDA randprune=4.0 # speeds up LDA. alpha=4.0 # relates to preconditioning. diff --git a/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh b/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh index 54d7cf7ea9d..b296e95416b 100755 --- a/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh +++ b/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh @@ -55,7 +55,7 @@ last_layer_factor=0.1 # relates to modify_learning_rates. first_layer_factor=1.0 # relates to modify_learning_rates. stage=-5 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. These don't splice_width=4 # meaning +- 4 frames on each side for second LDA randprune=4.0 # speeds up LDA. alpha=4.0 @@ -65,6 +65,7 @@ mix_up=0 # Number of components to mix up to (should be > #tree leaves, if num_threads=16 parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. +combine_opts="--mem 12G" cleanup=true egs_dir= lda_opts= @@ -371,7 +372,7 @@ if [ $stage -le $num_iters ]; then num_egs=`nnet-copy-egs ark:$egs_dir/combine.egs ark:/dev/null 2>&1 | tail -n 1 | awk '{print $NF}'` mb=$[($num_egs+$this_num_threads-1)/$this_num_threads] [ $mb -gt 512 ] && mb=512 - $cmd $parallel_opts $dir/log/combine.log \ + $cmd $parallel_opts $combine_opts $dir/log/combine.log \ nnet-combine-fast --use-gpu=no --num-threads=$this_num_threads \ --verbose=3 --minibatch-size=$mb "${nnets_list[@]}" ark:$egs_dir/combine.egs \ $dir/final.mdl || exit 1; diff --git a/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py b/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py index 8403c273a9d..471911906c5 100644 --- a/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py +++ b/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py @@ -90,7 +90,7 @@ right_context += int_list[-1] splice_array.append(int_list) except ValueError as e: - sys.exit("invalid --splice-indexes argument " + args.splice_indexes + e) + sys.exit("invalid --splice-indexes argument " + args.splice_indexes + str(e)) left_context = max(0, left_context) right_context = max(0, right_context) num_hidden_layers = len(splice_array) diff --git a/egs/wsj/s5/steps/search_index.sh b/egs/wsj/s5/steps/search_index.sh index 6d4c344c5db..5db3d39b15a 100755 --- a/egs/wsj/s5/steps/search_index.sh +++ b/egs/wsj/s5/steps/search_index.sh @@ -8,6 +8,7 @@ cmd=run.pl nbest=-1 strict=true indices_dir= +frame_subsampling_factor=1 # End configuration section. echo "$0 $@" # Print the command line for logging @@ -36,15 +37,23 @@ fi mkdir -p $kwsdir/log; nj=`cat $indices_dir/num_jobs` || exit 1; -keywords=$kwsdatadir/keywords.fsts; +if [ -f $kwsdatadir/keywords.fsts.gz ]; then + keywords="\"gunzip -c $kwsdatadir/keywords.fsts.gz|\"" +elif [ -f $kwsdatadir/keywords.fsts ]; then + keywords=$kwsdatadir/keywords.fsts; +else + echo "$0: no such file $kwsdatadir/keywords.fsts[.gz]" && exit 1; +fi -for f in $indices_dir/index.1.gz $keywords; do +for f in $indices_dir/index.1.gz ; do [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1; done $cmd JOB=1:$nj $kwsdir/log/search.JOB.log \ kws-search --strict=$strict --negative-tolerance=-1 \ + --frame-subsampling-factor=${frame_subsampling_factor} \ "ark:gzip -cdf $indices_dir/index.JOB.gz|" ark:$keywords \ - "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id > $kwsdir/result.JOB" || exit 1; + "ark,t:|gzip -c > $kwsdir/result.JOB.gz" \ + "ark,t:|gzip -c > $kwsdir/stats.JOB.gz" || exit 1; exit 0; diff --git a/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh b/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh index f9d2890ea39..6cb14068769 100755 --- a/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh +++ b/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh @@ -58,14 +58,14 @@ utils/validate_dict_dir.pl $srcdir; if [ -f $srcdir/lexicon.txt ]; then src_lex=$srcdir/lexicon.txt perl -ane 'print join(" ", split(" ", $_)) . "\n";' < $src_lex |\ - sort > $dir/lexicon.txt + sort -u > $dir/lexicon.txt elif [ -f $srcdir/lexiconp.txt ]; then echo "$0: removing the pron-probs from $srcdir/lexiconp.txt to create $dir/lexicon.txt" # the Perl command below normalizes the spaces (avoid double space). src_lex=$srcdir/lexiconp.txt awk '{$2 = ""; print $0;}' <$srcdir/lexiconp.txt |\ perl -ane 'print join(" ", split(" " ,$_)) . "\n";' |\ - sort > $dir/lexicon.txt || exit 1; + sort -u > $dir/lexicon.txt || exit 1; fi diff --git a/egs/wsj/s5/utils/make_lexicon_fst.pl b/egs/wsj/s5/utils/make_lexicon_fst.pl index bcf0f4df13a..f97129c05cb 100755 --- a/egs/wsj/s5/utils/make_lexicon_fst.pl +++ b/egs/wsj/s5/utils/make_lexicon_fst.pl @@ -21,21 +21,24 @@ $pron_probs = 0; -if ($ARGV[0] eq "--pron-probs") { +if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) { $pron_probs = 1; shift @ARGV; } if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) { - print STDERR - "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt -Creates a lexicon FST that transduces phones to words, and may allow optional silence. -Note: ordinarily, each line of lexicon.txt is: word phone1 phone2 ... phoneN; if the --pron-probs option is -used, each line is: word pronunciation-probability phone1 phone2 ... phoneN. The probability 'prob' will -typically be between zero and one, and note that it's generally helpful to normalize so the largest one -for each word is 1.0, but this is your responsibility. The silence disambiguation symbol, e.g. something -like #5, is used only when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst, and was -introduced to fix a particular case of non-determinism of decoding graphs.\n"; + print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n"; + print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n"; + print STDERR "Note: ordinarily, each line of lexicon.txt is:\n"; + print STDERR " word phone1 phone2 ... phoneN;\n"; + print STDERR "if the --pron-probs option is used, each line is:\n"; + print STDERR " word pronunciation-probability phone1 phone2 ... phoneN.\n\n"; + print STDERR "The probability 'prob' will typically be between zero and one, and note that\n"; + print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n"; + print STDERR "this is your responsibility.\n\n"; + print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n"; + print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n"; + print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n"; exit(1); } diff --git a/egs/wsj/s5/utils/slurm.pl b/egs/wsj/s5/utils/slurm.pl index a332e19cb1d..27e5fce9c01 100755 --- a/egs/wsj/s5/utils/slurm.pl +++ b/egs/wsj/s5/utils/slurm.pl @@ -397,9 +397,12 @@ sub exec_command { print Q " unset CUDA_VISIBLE_DEVICES.\n"; print Q "fi\n"; print Q "time1=\`date +\"%s\"\`\n"; -print Q " ( $cmd ) 2>>$logfile >>$logfile\n"; +print Q " ( $cmd ) &>>$logfile\n"; print Q "ret=\$?\n"; +print Q "sync || true"; print Q "time2=\`date +\"%s\"\`\n"; +print Q "echo '#' Accounting: begin_time=\$time1 >>$logfile\n"; +print Q "echo '#' Accounting: end_time=\$time2 >>$logfile\n"; print Q "echo '#' Accounting: time=\$((\$time2-\$time1)) threads=$num_threads >>$logfile\n"; print Q "echo '#' Finished at \`date\` with status \$ret >>$logfile\n"; print Q "[ \$ret -eq 137 ] && exit 100;\n"; # If process was killed (e.g. oom) it will exit with status 137; diff --git a/egs/wsj/s5/utils/write_kwslist.pl b/egs/wsj/s5/utils/write_kwslist.pl index b2f67815df9..18071fa7671 100755 --- a/egs/wsj/s5/utils/write_kwslist.pl +++ b/egs/wsj/s5/utils/write_kwslist.pl @@ -32,8 +32,9 @@ --remove-NO : Remove the "NO" decision instances (boolean, default = false) --segments : Segments file from Kaldi (string, default = "") --system-id : System ID (string, default = "") - --verbose : Verbose level (higher --> more kws section) (integer, default 0) - --YES-cutoff : Only keep "\$YES-cutoff" yeses for each kw (int, default = -1) + --verbose : Verbose level (higher --> more kws section) (integer, default = 0) + --YES-cutoff : Only keep "\$YES-cutoff" yeses for each kw (int, default = -1) + --nbest | Output upto nbest hits into the kwlist (int, default = -1) EOU @@ -55,6 +56,7 @@ my $remove_dup = "false"; my $remove_NO = "false"; my $YES_cutoff = -1; +my $nbest_max = -1; GetOptions('segments=s' => \$segment, 'flen=f' => \$flen, 'beta=f' => \$beta, @@ -72,7 +74,8 @@ 'duptime=f' => \$duptime, 'remove-dup=s' => \$remove_dup, 'YES-cutoff=i' => \$YES_cutoff, - 'remove-NO=s' => \$remove_NO); + 'remove-NO=s' => \$remove_NO, + 'nbest=i' => \$nbest_max) or die "Cannot continue\n"; ($normalize eq "true" || $normalize eq "false") || die "$0: Bad value for option --normalize\n"; ($remove_dup eq "true" || $remove_dup eq "false") || die "$0: Bad value for option --remove-dup\n"; @@ -134,12 +137,18 @@ sub PrintKwslist { # Start printing $kwslist .= "[0]\" language=\"$info->[1]\" system_id=\"$info->[2]\">\n"; my $prev_kw = ""; + my $nbest = $nbest_max; foreach my $kwentry (@{$KWS}) { + if (($prev_kw eq $kwentry->[0]) && ($nbest le 0) && ($nbest_max gt 0)) { + next; + } if ($prev_kw ne $kwentry->[0]) { if ($prev_kw ne "") {$kwslist .= " \n";} $kwslist .= " [0]\" search_time=\"1\" oov_count=\"0\">\n"; $prev_kw = $kwentry->[0]; + $nbest = $nbest_max; } + $nbest -= 1 if $nbest_max gt 0; my $score = sprintf("%g", $kwentry->[5]); $kwslist .= " [1]\" channel=\"$kwentry->[2]\" tbeg=\"$kwentry->[3]\" dur=\"$kwentry->[4]\" score=\"$score\" decision=\"$kwentry->[6]\""; if (defined($kwentry->[7])) {$kwslist .= " threshold=\"$kwentry->[7]\"";} diff --git a/src/fstbin/Makefile b/src/fstbin/Makefile index a508ed95bd7..8d544e40ea0 100644 --- a/src/fstbin/Makefile +++ b/src/fstbin/Makefile @@ -15,7 +15,8 @@ BINFILES = fstdeterminizestar \ fstmakecontextsyms fstaddsubsequentialloop fstaddselfloops \ fstrmepslocal fstcomposecontext fsttablecompose fstrand fstfactor \ fstdeterminizelog fstphicompose fstrhocompose fstpropfinal fstcopy \ - fstpushspecial fsts-to-transcripts + fstpushspecial fsts-to-transcripts fsts-project fsts-union \ + fsts-scale fsts-difference OBJFILES = diff --git a/src/fstbin/fsts-project.cc b/src/fstbin/fsts-project.cc new file mode 100644 index 00000000000..015f1431725 --- /dev/null +++ b/src/fstbin/fsts-project.cc @@ -0,0 +1,82 @@ +// fstbin/fsts-project.cc + +// Copyright 2016 Johns Hopkins University (Authors: Jan "Yenda" Trmal) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "fstext/fstext-utils.h" +#include "fstext/kaldi-fst-io.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace fst; + typedef kaldi::int32 int32; + typedef kaldi::uint64 uint64; + + const char *usage = + "Reads kaldi archive of FSTs; for each element, performs the project\n" + "operation either on input (default) or on the output (if the option\n" + "--project-output is true).\n" + "\n" + "Usage: fsts-project [options] \n" + " e.g.: fsts-project ark:train.fsts ark,t:train.fsts\n" + "\n" + "see also: fstproject (from the OpenFst toolkit)\n"; + + ParseOptions po(usage); + + bool project_output = false; + + po.Register("project-output", &project_output, + "If true, project output vs input"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string fsts_rspecifier = po.GetArg(1), + fsts_wspecifier = po.GetArg(2); + + + SequentialTableReader fst_reader(fsts_rspecifier); + TableWriter fst_writer(fsts_wspecifier); + + int32 n_done = 0; + for (; !fst_reader.Done(); fst_reader.Next()) { + std::string key = fst_reader.Key(); + VectorFst fst(fst_reader.Value()); + + Project(&fst, project_output ? PROJECT_OUTPUT : PROJECT_INPUT); + + fst_writer.Write(key, fst); + n_done++; + } + + KALDI_LOG << "Projected " << n_done << " FSTs"; + return (n_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/fstbin/fsts-to-transcripts.cc b/src/fstbin/fsts-to-transcripts.cc index 7c301e10390..ae74d5de6e9 100644 --- a/src/fstbin/fsts-to-transcripts.cc +++ b/src/fstbin/fsts-to-transcripts.cc @@ -1,6 +1,7 @@ // fstbin/fsts-to-transcripts.cc -// Copyright 2012-2013 Johns Hopkins University (Authors: Guoguo Chen, Daniel Povey) +// Copyright 2012-2013 Johns Hopkins University (Authors: Guoguo Chen, +// Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // @@ -32,12 +33,15 @@ int main(int argc, char *argv[]) { typedef kaldi::uint64 uint64; const char *usage = - "Reads a table of FSTs; for each element, finds the best path and prints out the\n" - "output-symbol sequence (if --output-side=true), or input-symbol sequence " - "otherwise.\n" + "Reads a table of FSTs; for each element, finds the best path and \n" + "prints out the output-symbol sequence (if --output-side=true), or \n" + "input-symbol sequence otherwise.\n" "\n" - "Usage: fsts-to-transcripts [options] \n" - " e.g.: fsts-to-transcripts ark:train.fsts ark,t:train.text\n"; + "Usage:\n" + " fsts-to-transcripts [options] " + " \n" + "e.g.:\n" + " fsts-to-transcripts ark:train.fsts ark,t:train.text\n"; ParseOptions po(usage); @@ -48,13 +52,13 @@ int main(int argc, char *argv[]) { po.Read(argc, argv); - if (po.NumArgs() < 2 || po.NumArgs() > 3) { + if (po.NumArgs() != 2) { po.PrintUsage(); exit(1); } std::string fst_rspecifier = po.GetArg(1), - transcript_wspecifier = po.GetOptArg(2); + transcript_wspecifier = po.GetArg(2); SequentialTableReader fst_reader(fst_rspecifier); @@ -67,11 +71,11 @@ int main(int argc, char *argv[]) { VectorFst shortest_path; - ShortestPath(fst, &shortest_path); // the OpenFst algorithm ShortestPath. + ShortestPath(fst, &shortest_path); // the OpenFst algorithm ShortestPath. if (shortest_path.NumStates() == 0) { - KALDI_WARN << "Input FST (after shortest path) was empty. Producing no " - << "output for key " << key; + KALDI_WARN << "Input FST (after shortest path) was empty. Producing " + << "no output for key " << key; n_err++; continue; } @@ -80,7 +84,8 @@ int main(int argc, char *argv[]) { bool ans; if (output_side) ans = fst::GetLinearSymbolSequence( shortest_path, NULL, &transcript, NULL); - else ans = fst::GetLinearSymbolSequence( + else + ans = fst::GetLinearSymbolSequence( shortest_path, &transcript, NULL, NULL); if (!ans) { KALDI_ERR << "GetLinearSymbolSequence returned false (code error);"; diff --git a/src/fstbin/fsts-union.cc b/src/fstbin/fsts-union.cc new file mode 100644 index 00000000000..489d7362453 --- /dev/null +++ b/src/fstbin/fsts-union.cc @@ -0,0 +1,98 @@ +// fstbin/fsts-union.cc + +// Copyright 2016 Johns Hopkins University (Authors: Jan "Yenda" Trmal) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "fstext/fstext-utils.h" +#include "fstext/kaldi-fst-io.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace fst; + typedef kaldi::int32 int32; + typedef kaldi::uint64 uint64; + + const char *usage = + "Reads a kaldi archive of FSTs. Performs the FST operation union on\n" + "all fsts sharing the same key. Assumes the archive is sorted by key.\n" + "\n" + "Usage: fsts-union [options] \n" + " e.g.: fsts-union ark:keywords_tmp.fsts ark,t:keywords.fsts\n" + "\n" + "see also: fstunion (from the OpenFst toolkit)\n"; + + ParseOptions po(usage); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string fsts_rspecifier = po.GetArg(1), + fsts_wspecifier = po.GetArg(2); + + + SequentialTableReader fst_reader(fsts_rspecifier); + TableWriter fst_writer(fsts_wspecifier); + + int32 n_out_done = 0, + n_in_done = 0; + std::string res_key = ""; + VectorFst res_fst; + + for (; !fst_reader.Done(); fst_reader.Next()) { + std::string key = fst_reader.Key(); + VectorFst fst(fst_reader.Value()); + + n_in_done++; + if (key == res_key) { + fst::Union(&res_fst, fst); + } else { + if (res_key != "") { + VectorFst out_fst; + fst::Minimize(&res_fst); + fst::RmEpsilon(&res_fst); + fst_writer.Write(res_key, res_fst); + n_out_done++; + } + res_fst = fst; + res_key = key; + } + } + if (res_key != "") { + VectorFst out_fst; + fst::Minimize(&res_fst); + fst::RmEpsilon(&res_fst); + fst_writer.Write(res_key, res_fst); + n_out_done++; + } + + KALDI_LOG << "Applied fst union on " << n_in_done + << " FSTs, produced " << n_out_done << " FSTs"; + return (n_out_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/kws/kws-functions.cc b/src/kws/kws-functions.cc index 26645ee92cb..8cb82c7bb0f 100644 --- a/src/kws/kws-functions.cc +++ b/src/kws/kws-functions.cc @@ -17,6 +17,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. +#include #include "lat/lattice-functions.h" #include "kws/kws-functions.h" @@ -34,7 +35,6 @@ bool CompareInterval(const Interval &i1, return (i1.Start() < i2.Start() ? true : i1.Start() > i2.Start() ? false: i1.End() < i2.End() ? true: false); - } bool ClusterLattice(CompactLattice *clat, @@ -47,9 +47,11 @@ bool ClusterLattice(CompactLattice *clat, // Step 1: Iterate over the lattice to get the arcs StateId max_id = 0; - for (StateIterator siter(*clat); !siter.Done(); siter.Next()) { + for (StateIterator siter(*clat); !siter.Done(); + siter.Next()) { StateId state_id = siter.Value(); - for (ArcIterator aiter(*clat, state_id); !aiter.Done(); aiter.Next()) { + for (ArcIterator aiter(*clat, state_id); !aiter.Done(); + aiter.Next()) { CompactLatticeArc arc = aiter.Value(); if (state_id >= state_times.size() || arc.nextstate >= state_times.size()) return false; @@ -57,7 +59,8 @@ bool ClusterLattice(CompactLattice *clat, max_id = state_id; if (arc.nextstate > max_id) max_id = arc.nextstate; - head[arc.ilabel].push_back(Interval(state_times[state_id], state_times[arc.nextstate])); + head[arc.ilabel].push_back(Interval(state_times[state_id], + state_times[arc.nextstate])); } } // Check if alignments and the states match @@ -85,9 +88,11 @@ bool ClusterLattice(CompactLattice *clat, // Step 3: Cluster arcs according to the maximum overlap: attach // each arc to the cluster-head (as identified in Step 2) which // has the most temporal overlap with the current arc. - for (StateIterator siter(*clat); !siter.Done(); siter.Next()) { + for (StateIterator siter(*clat); !siter.Done(); + siter.Next()) { CompactLatticeArc::StateId state_id = siter.Value(); - for (MutableArcIterator aiter(clat, state_id); !aiter.Done(); aiter.Next()) { + for (MutableArcIterator aiter(clat, state_id); + !aiter.Done(); aiter.Next()) { CompactLatticeArc arc = aiter.Value(); // We don't cluster the epsilon arcs if (arc.ilabel == 0) @@ -100,7 +105,7 @@ bool ClusterLattice(CompactLattice *clat, int32 overlap = interval.Overlap(head[arc.ilabel][i]); if (overlap > max_overlap) { max_overlap = overlap; - olabel = i + 1; // need non-epsilon label. + olabel = i + 1; // need non-epsilon label. } } arc.olabel = olabel; @@ -134,13 +139,21 @@ class CompactLatticeToKwsProductFstMapper { arc.nextstate); } - fst::MapFinalAction FinalAction() const { return fst::MAP_NO_SUPERFINAL; } + fst::MapFinalAction FinalAction() const { + return fst::MAP_NO_SUPERFINAL; + } - fst::MapSymbolsAction InputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS; } + fst::MapSymbolsAction InputSymbolsAction() const { + return fst::MAP_COPY_SYMBOLS; + } - fst::MapSymbolsAction OutputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS;} + fst::MapSymbolsAction OutputSymbolsAction() const { + return fst::MAP_COPY_SYMBOLS; + } - uint64 Properties(uint64 props) const { return props; } + uint64 Properties(uint64 props) const { + return props; + } }; @@ -234,10 +247,12 @@ bool CreateFactorTransducer(const CompactLattice &clat, for (StateId s = 0; s < ns; s++) { // Add arcs from initial state to current state - if (!has_epsilon_property || (state_properties[s] & kStateHasNonEpsilonArcsLeaving)) + if (!has_epsilon_property || + (state_properties[s] & kStateHasNonEpsilonArcsLeaving)) factor_transducer->AddArc(ss, KwsProductArc(0, 0, KwsProductWeight(-alpha[s], StdXStdprimeWeight(state_times[s], ArcticWeight::One())), s)); // Add arcs from current state to final state - if (!has_epsilon_property || (state_properties[s] & kStateHasNonEpsilonArcsEntering)) + if (!has_epsilon_property || + (state_properties[s] & kStateHasNonEpsilonArcsEntering)) factor_transducer->AddArc(s, KwsProductArc(0, utterance_id, KwsProductWeight(0, StdXStdprimeWeight(TropicalWeight::One(), state_times[s])), fs)); // The old final state is not final any more if (factor_transducer->Final(s) != KwsProductWeight::Zero()) @@ -300,8 +315,8 @@ static void DifferenceWrapper(const fst::VectorFst &fst1, Decode(difference, encoder); } else { VectorFst fst2_copy(fst2); - RmEpsilon(&fst2_copy); // or Difference will crash. - RemoveWeights(&fst2_copy); // or Difference will crash. + RmEpsilon(&fst2_copy); // or Difference will crash. + RemoveWeights(&fst2_copy); // or Difference will crash. Difference(fst1, fst2_copy, difference); } } @@ -337,7 +352,8 @@ void MaybeDoSanityCheck(const KwsLexicographicFst &index_transducer) { for (size_t i = 0; i < isymbols.size(); i++) os2 << isymbols[i] << ' '; BaseFloat second_best_cost = weight.Value1().Value(); - KALDI_VLOG(3) << "Second-best path: " << isymbols.size() << " isymbols " << ", " + KALDI_VLOG(3) << "Second-best path: " << isymbols.size() + << " isymbols " << ", " << osymbols.size() << " osymbols, isymbols are " << os2.str() << ", second-best cost is " << second_best_cost; if (second_best_cost < -0.01) { @@ -349,10 +365,12 @@ void MaybeDoSanityCheck(const KwsLexicographicFst &index_transducer) { void MaybeDoSanityCheck(const KwsProductFst &product_transducer) { if (GetVerboseLevel() < 2) return; KwsLexicographicFst index_transducer; - Map(product_transducer, &index_transducer, KwsProductFstToKwsLexicographicFstMapper()); - MaybeDoSanityCheck(index_transducer); -} + Map(product_transducer, + &index_transducer, + KwsProductFstToKwsLexicographicFstMapper()); + MaybeDoSanityCheck(index_transducer); +} -} // end namespace kaldi +} // end namespace kaldi diff --git a/src/kws/kws-functions.h b/src/kws/kws-functions.h index e13e99f38ae..1558285e40d 100644 --- a/src/kws/kws-functions.h +++ b/src/kws/kws-functions.h @@ -21,6 +21,8 @@ #ifndef KALDI_KWS_KWS_FUNCTIONS_H_ #define KALDI_KWS_KWS_FUNCTIONS_H_ +#include + #include "lat/kaldi-lattice.h" #include "kws/kaldi-kws.h" @@ -62,7 +64,7 @@ bool CompareInterval(const Interval &i1, // It puts disambiguating symbols in the olabels, leaving the words on the // ilabels. bool ClusterLattice(CompactLattice *clat, - const vector &state_times); + const std::vector &state_times); // This function contains two steps: weight pushing and factor generation. The // original ShortestDistance() is not very efficient, so we do the weight @@ -70,7 +72,7 @@ bool ClusterLattice(CompactLattice *clat, // factor generation step expand the lattice to the LXTXT' semiring, with // additional start state and end state (and corresponding arcs) added. bool CreateFactorTransducer(const CompactLattice &clat, - const vector &state_times, + const std::vector &state_times, int32 utterance_id, KwsProductFst *factor_transducer); @@ -81,7 +83,7 @@ bool CreateFactorTransducer(const CompactLattice &clat, // step, so the "search area" is limited to the original arcs before factor // generation. void RemoveLongSilences(int32 max_silence_frames, - const vector &state_times, + const std::vector &state_times, KwsProductFst *factor_transducer); // Do the factor merging part: encode input and output, and apply weighted diff --git a/src/kwsbin/Makefile b/src/kwsbin/Makefile index 43028956e9a..5efb19f9c17 100644 --- a/src/kwsbin/Makefile +++ b/src/kwsbin/Makefile @@ -5,7 +5,8 @@ EXTRA_CXXFLAGS = -Wno-sign-compare include ../kaldi.mk BINFILES = lattice-to-kws-index kws-index-union transcripts-to-fsts \ - kws-search generate-proxy-keywords compute-atwv + kws-search generate-proxy-keywords compute-atwv print-proxy-keywords + OBJFILES = diff --git a/src/kwsbin/compute-atwv.cc b/src/kwsbin/compute-atwv.cc index c7c8e484f8d..6d9f6d2c2bb 100644 --- a/src/kwsbin/compute-atwv.cc +++ b/src/kwsbin/compute-atwv.cc @@ -37,29 +37,34 @@ int main(int argc, char *argv[]) { const char *usage = "Computes the Actual Term-Weighted Value and prints it." "\n" - "Usage: compute-atwv [options] [alignment-csv-filename]\n" - " e.g.: compute-atwv 32485.4 ark:ref.1 ark:hyp.1 ali.csv\n" - " or: compute-atwv 32485.4 ark:ref.1 ark:hyp.1\n" + "Usage: \n" + " compute-atwv [options] " + " [alignment-csv-filename]\n" + "e.g.: \n" + " compute-atwv 32485.4 ark:ref.1 ark:hyp.1 ali.csv\n" + "or: \n" + " compute-atwv 32485.4 ark:ref.1 ark:hyp.1\n" "\n" "NOTES: \n" " a) the number of trials is usually equal to the size of the searched\n" " collection in seconds\n" - " b the ref-rspecifier/hyp-rspecifier are the kaldi IO specifiers for both\n" - " the reference and the hypotheses (found hits), respectively.\n" - " The format is the same for both of them. Each line is of \n" - " the following format\n" + " b the ref-rspecifier/hyp-rspecifier are the kaldi IO specifiers \n" + " for both the reference and the hypotheses (found hits), " + " respectively The format is the same for both of them. Each line\n" + " is of the following format\n" "\n" " \n\n" " e.g.:\n\n" " KW106-189 348 459 560 0.8\n" "\n" - " b) the alignment-csv-filename is an optional parameter. If present,\n" - " the alignment i.e. detailed information about what hypotheses match\n" - " up with which reference entries will be generated. The alignemnt\n" - " file format is equivalent to the alignment file produced using\n" - " the F4DE tool. However, we do not set some fields and the utterance\n" - " identifiers are numeric. You can use the script utils/int2sym.pl\n" - " and the utterance/keyword maps to convert the numerical ids into text\n" + " b) the alignment-csv-filename is an optional parameter. \n" + " If present, the alignment i.e. detailed information about what \n" + " hypotheses match up with which reference entries will be \n" + " generated. The alignemnt file format is equivalent to \n" + " the alignment file produced using the F4DE tool. However, we do" + " not set some fields and the utterance identifiers are numeric.\n" + " You can use the script utils/int2sym.pl and the utterance and \n" + " keyword maps to convert the numerical ids into text form\n" " c) the scores are expected to be probabilities. Please note that\n" " the output from the kws-search is in -log(probability).\n" " d) compute-atwv does not perform any score normalization (it's just\n" @@ -79,7 +84,7 @@ int main(int argc, char *argv[]) { po.Read(argc, argv); - if ((po.NumArgs() < 3) || (po.NumArgs() > 4)) { + if (po.NumArgs() < 3 || po.NumArgs() > 4) { po.PrintUsage(); exit(1); } @@ -161,7 +166,6 @@ int main(int argc, char *argv[]) { std::cout << "aproximate OTWV = " << std::fixed << std::setprecision(4) << otwv << std::endl; - } catch(const std::exception &e) { std::cerr << e.what(); return -1; diff --git a/src/kwsbin/generate-proxy-keywords.cc b/src/kwsbin/generate-proxy-keywords.cc index 8495b8e3fe6..9c534abe816 100644 --- a/src/kwsbin/generate-proxy-keywords.cc +++ b/src/kwsbin/generate-proxy-keywords.cc @@ -52,7 +52,7 @@ bool PrintProxyFstPath(const VectorFst &proxy, return true; } -} +} // namespace fst int main(int argc, char *argv[]) { try { @@ -125,7 +125,7 @@ int main(int argc, char *argv[]) { L1_filename = po.GetArg(2), keyword_rspecifier = po.GetArg(3), proxy_wspecifier = po.GetArg(4), - kwlist_wspecifier = (po.NumArgs() == 5) ? po.GetArg(5) : ""; + kwlist_wspecifier = po.GetOptArg(5); VectorFst *L2xE = ReadFstKaldi(L2xE_filename); VectorFst *L1 = ReadFstKaldi(L1_filename); @@ -203,7 +203,7 @@ int main(int argc, char *argv[]) { if (proxy_nbest > 0) { KALDI_VLOG(1) << "ShortestPath(KxL2xExL1', " << proxy_nbest << ")"; proxy = tmp_proxy; - tmp_proxy.DeleteStates(); // Not needed for now. + tmp_proxy.DeleteStates(); // Not needed for now. RmEpsilon(&proxy); ShortestPath(proxy, &tmp_proxy, proxy_nbest, true, true); proxy.DeleteStates(); // Not needed for now. diff --git a/src/kwsbin/kws-index-union.cc b/src/kwsbin/kws-index-union.cc index 84e5db4beba..4a0f3ccea1d 100644 --- a/src/kwsbin/kws-index-union.cc +++ b/src/kwsbin/kws-index-union.cc @@ -32,8 +32,10 @@ int main(int argc, char *argv[]) { typedef kaldi::uint64 uint64; const char *usage = - "Take a union of the indexed lattices. The input index is in the T*T*T semiring and\n" - "the output index is also in the T*T*T semiring. At the end of this program, encoded\n" + "Take a union of the indexed lattices. The input index is in " + " the T*T*T semiring and\n" + "the output index is also in the T*T*T semiring. At the end of " + "this program, encoded\n" "epsilon removal, determinization and minimization will be applied.\n" "\n" "Usage: kws-index-union [options] index-rspecifier index-wspecifier\n" @@ -44,9 +46,12 @@ int main(int argc, char *argv[]) { bool strict = true; bool skip_opt = false; int32 max_states = -1; - po.Register("strict", &strict, "Will allow 0 lattice if it is set to false."); - po.Register("skip-optimization", &skip_opt, "Skip optimization if it's set to true."); - po.Register("max-states", &max_states, "Maximum states for DeterminizeStar."); + po.Register("strict", &strict, + "Will allow 0 lattice if it is set to false."); + po.Register("skip-optimization", &skip_opt, + "Skip optimization if it's set to true."); + po.Register("max-states", &max_states, + "Maximum states for DeterminizeStar."); po.Read(argc, argv); @@ -58,8 +63,10 @@ int main(int argc, char *argv[]) { std::string index_rspecifier = po.GetArg(1), index_wspecifier = po.GetOptArg(2); - SequentialTableReader< VectorFstTplHolder > index_reader(index_rspecifier); - TableWriter< VectorFstTplHolder > index_writer(index_wspecifier); + SequentialTableReader< VectorFstTplHolder > + index_reader(index_rspecifier); + TableWriter< VectorFstTplHolder > + index_writer(index_wspecifier); int32 n_done = 0; KwsLexicographicFst global_index; diff --git a/src/kwsbin/kws-search.cc b/src/kwsbin/kws-search.cc index 467a2ab1ccd..836f7b9a111 100644 --- a/src/kwsbin/kws-search.cc +++ b/src/kwsbin/kws-search.cc @@ -1,6 +1,8 @@ // kwsbin/kws-search.cc -// Copyright 2012-2013 Johns Hopkins University (Authors: Guoguo Chen, Daniel Povey) +// Copyright 2012-2015 Johns Hopkins University (Authors: Guoguo Chen, +// Daniel Povey. +// Yenda Trmal) // See ../../COPYING for clarification regarding multiple authors // @@ -29,17 +31,22 @@ typedef KwsLexicographicArc Arc; typedef Arc::Weight Weight; typedef Arc::StateId StateId; -uint64 EncodeLabel(StateId ilabel, - StateId olabel) { - return (((int64)olabel)<<32)+((int64)ilabel); - +// encode ilabel, olabel pair as a single 64bit (output) symbol +uint64 EncodeLabel(StateId ilabel, StateId olabel) { + return (static_cast(olabel) << 32) + static_cast(ilabel); } +// extract the osymbol from the 64bit symbol. That represents the utterance id +// in this setup -- we throw away the isymbol which is typically 0 or an +// disambiguation symbol StateId DecodeLabelUid(uint64 osymbol) { - // We only need the utterance id - return ((StateId)(osymbol>>32)); + return static_cast(osymbol >> 32); } +// this is a mapper adapter that helps converting +// between the StdArc FST (i.e. tropical semiring FST) +// to the KwsLexicographic FST. Structure will be kept, +// the weights converted/recomputed class VectorFstToKwsLexicographicFstMapper { public: typedef fst::StdArc FromArc; @@ -59,17 +66,95 @@ class VectorFstToKwsLexicographicFstMapper { arc.nextstate); } - fst::MapFinalAction FinalAction() const { return fst::MAP_NO_SUPERFINAL; } + fst::MapFinalAction FinalAction() const { + return fst::MAP_NO_SUPERFINAL; + } - fst::MapSymbolsAction InputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS; } + fst::MapSymbolsAction InputSymbolsAction() const { + return fst::MAP_COPY_SYMBOLS; + } - fst::MapSymbolsAction OutputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS;} + fst::MapSymbolsAction OutputSymbolsAction() const { + return fst::MAP_COPY_SYMBOLS; + } uint64 Properties(uint64 props) const { return props; } }; +struct ActivePath { + std::vector path; + KwsLexicographicArc::Weight weight; + KwsLexicographicArc::Label last; +}; + +bool GenerateActivePaths(const KwsLexicographicFst &proxy, + std::vector *paths, + KwsLexicographicFst::StateId cur_state, + std::vector cur_path, + KwsLexicographicArc::Weight cur_weight) { + for (fst::ArcIterator aiter(proxy, cur_state); + !aiter.Done(); aiter.Next()) { + const Arc &arc = aiter.Value(); + Weight temp_weight = Times(arc.weight, cur_weight); + + cur_path.push_back(arc.ilabel); + + if ( arc.olabel != 0 ) { + ActivePath path; + path.path = cur_path; + path.weight = temp_weight; + path.last = arc.olabel; + paths->push_back(path); + } else { + GenerateActivePaths(proxy, paths, + arc.nextstate, cur_path, temp_weight); + } + cur_path.pop_back(); + } + + return true; +} +} // namespace kaldi + +typedef kaldi::TableWriter< kaldi::BasicVectorHolder > + VectorOfDoublesWriter; +void OutputDetailedStatistics(const std::string &kwid, + const kaldi::KwsLexicographicFst &keyword, + const unordered_map &label_decoder, + VectorOfDoublesWriter *output ) { + std::vector paths; + + if (keyword.Start() == fst::kNoStateId) + return; + + kaldi::GenerateActivePaths(keyword, &paths, keyword.Start(), + std::vector(), + kaldi::KwsLexicographicArc::Weight::One()); + + for (int i = 0; i < paths.size(); ++i) { + std::vector out; + double score; + int32 tbeg, tend, uid; + + uint64 osymbol = label_decoder.find(paths[i].last)->second; + uid = kaldi::DecodeLabelUid(osymbol); + tbeg = paths[i].weight.Value2().Value1().Value(); + tend = paths[i].weight.Value2().Value2().Value(); + score = paths[i].weight.Value1().Value(); + + out.push_back(uid); + out.push_back(tbeg); + out.push_back(tend); + out.push_back(score); + + for (int j = 0; j < paths[i].path.size(); ++j) { + out.push_back(paths[i].path[j]); + } + output->Write(kwid, out); + } } + int main(int argc, char *argv[]) { try { using namespace kaldi; @@ -77,20 +162,33 @@ int main(int argc, char *argv[]) { typedef kaldi::int32 int32; typedef kaldi::uint32 uint32; typedef kaldi::uint64 uint64; - typedef KwsLexicographicArc Arc; - typedef Arc::Weight Weight; - typedef Arc::StateId StateId; const char *usage = - "Search the keywords over the index. This program can be executed parallely, either\n" - "on the index side or the keywords side; we use a script to combine the final search\n" - "results. Note that the index archive has a only key \"global\".\n" - "The output file is in the format:\n" - "kw utterance_id beg_frame end_frame negated_log_probs\n" - " e.g.: KW1 1 23 67 0.6074219\n" + "Search the keywords over the index. This program can be executed\n" + "in parallel, either on the index side or the keywords side; we use\n" + "a script to combine the final search results. Note that the index\n" + "archive has a single key \"global\".\n\n" + "Search has one or two outputs. The first one is mandatory and will\n" + "contain the seach output, i.e. list of all found keyword instances\n" + "The file is in the following format:\n" + "kw_id utt_id beg_frame end_frame neg_logprob\n" + " e.g.: \n" + "KW105-0198 7 335 376 1.91254\n\n" + "The second parameter is optional and allows the user to gather more\n" + "statistics about the individual instances from the posting list.\n" + "Remember \"keyword\" is an FST and as such, there can be multiple\n" + "paths matching in the keyword and in the lattice index in that given\n" + "time period. The stats output will provide all matching paths\n" + "each with the appropriate score. \n" + "The format is as follows:\n" + "kw_id utt_id beg_frame end_frame neg_logprob 0 w_id1 w_id2 ... 0\n" + " e.g.: \n" + "KW105-0198 7 335 376 16.01254 0 5766 5659 0\n" "\n" - "Usage: kws-search [options] index-rspecifier keywords-rspecifier results-wspecifier\n" - " e.g.: kws-search ark:index.idx ark:keywords.fsts ark:results\n"; + "Usage: kws-search [options] " + " []\n" + " e.g.: kws-search ark:index.idx ark:keywords.fsts " + "ark:results ark:stats\n"; ParseOptions po(usage); @@ -99,28 +197,33 @@ int main(int argc, char *argv[]) { bool strict = true; double negative_tolerance = -0.1; double keyword_beam = -1; + int32 frame_subsampling_factor = 1; + po.Register("frame-subsampling-factor", &frame_subsampling_factor, + "Frame subsampling factor. (Default value 1)"); po.Register("nbest", &n_best, "Return the best n hypotheses."); po.Register("keyword-nbest", &keyword_nbest, - "Pick the best n keywords if the FST contains multiple keywords."); + "Pick the best n keywords if the FST contains " + "multiple keywords."); po.Register("strict", &strict, "Affects the return status of the program."); po.Register("negative-tolerance", &negative_tolerance, - "The program will print a warning if we get negative score smaller " - "than this tolerance."); + "The program will print a warning if we get negative score " + "smaller than this tolerance."); po.Register("keyword-beam", &keyword_beam, - "Prune the FST with the given beam if the FST contains multiple keywords."); + "Prune the FST with the given beam if the FST contains " + "multiple keywords."); if (n_best < 0 && n_best != -1) { KALDI_ERR << "Bad number for nbest"; - exit (1); + exit(1); } if (keyword_nbest < 0 && keyword_nbest != -1) { KALDI_ERR << "Bad number for keyword-nbest"; - exit (1); + exit(1); } if (keyword_beam < 0 && keyword_beam != -1) { KALDI_ERR << "Bad number for keyword-beam"; - exit (1); + exit(1); } po.Read(argc, argv); @@ -131,12 +234,16 @@ int main(int argc, char *argv[]) { } std::string index_rspecifier = po.GetArg(1), - keyword_rspecifier = po.GetOptArg(2), - result_wspecifier = po.GetOptArg(3); + keyword_rspecifier = po.GetArg(2), + result_wspecifier = po.GetArg(3), + stats_wspecifier = po.GetOptArg(4); - RandomAccessTableReader< VectorFstTplHolder > index_reader(index_rspecifier); + RandomAccessTableReader< VectorFstTplHolder > + index_reader(index_rspecifier); SequentialTableReader keyword_reader(keyword_rspecifier); - TableWriter > result_writer(result_wspecifier); + VectorOfDoublesWriter result_writer(result_wspecifier); + VectorOfDoublesWriter stats_writer(stats_wspecifier); + // Index has key "global" KwsLexicographicFst index = index_reader.Value("global"); @@ -152,7 +259,8 @@ int main(int argc, char *argv[]) { int32 label_count = 1; unordered_map label_encoder; unordered_map label_decoder; - for (StateIterator siter(index); !siter.Done(); siter.Next()) { + for (StateIterator siter(index); + !siter.Done(); siter.Next()) { StateId state_id = siter.Value(); for (MutableArcIterator aiter(&index, state_id); !aiter.Done(); aiter.Next()) { @@ -175,7 +283,7 @@ int main(int argc, char *argv[]) { aiter.SetValue(arc); } } - ArcSort(&index, fst::ILabelCompare()); + ArcSort(&index, fst::ILabelCompare()); int32 n_done = 0; int32 n_fail = 0; @@ -198,6 +306,15 @@ int main(int argc, char *argv[]) { KwsLexicographicFst result_fst; Map(keyword, &keyword_fst, VectorFstToKwsLexicographicFstMapper()); Compose(keyword_fst, index, &result_fst); + + if (stats_wspecifier != "") { + KwsLexicographicFst matched_seq(result_fst); + OutputDetailedStatistics(key, + matched_seq, + label_decoder, + &stats_writer); + } + Project(&result_fst, PROJECT_OUTPUT); Minimize(&result_fst); ShortestPath(result_fst, &result_fst, n_best); @@ -216,13 +333,14 @@ int main(int argc, char *argv[]) { // We're expecting a two-state FST if (result_fst.Final(arc.nextstate) != Weight::One()) { - KALDI_WARN << "The resulting FST does not have the expected structure for key " << key; + KALDI_WARN << "The resulting FST does not have " + << "the expected structure for key " << key; n_fail++; continue; } uint64 osymbol = label_decoder[arc.olabel]; - uid = (int32)DecodeLabelUid(osymbol); + uid = static_cast(DecodeLabelUid(osymbol)); tbeg = arc.weight.Value2().Value1().Value(); tend = arc.weight.Value2().Value2().Value(); score = arc.weight.Value1().Value(); @@ -235,8 +353,8 @@ int main(int argc, char *argv[]) { } vector result; result.push_back(uid); - result.push_back(tbeg); - result.push_back(tend); + result.push_back(tbeg * frame_subsampling_factor); + result.push_back(tend * frame_subsampling_factor); result.push_back(score); result_writer.Write(key, result); } diff --git a/src/kwsbin/lattice-to-kws-index.cc b/src/kwsbin/lattice-to-kws-index.cc index c635fe63736..fcd6b82119d 100644 --- a/src/kwsbin/lattice-to-kws-index.cc +++ b/src/kwsbin/lattice-to-kws-index.cc @@ -36,23 +36,33 @@ int main(int argc, char *argv[]) { typedef kaldi::uint64 uint64; const char *usage = - "Create an inverted index of the given lattices. The output index is in the T*T*T\n" - "semiring. For details for the semiring, please refer to Dogan Can and Muran Saraclar's" - "lattice indexing paper." + "Create an inverted index of the given lattices. The output index is \n" + "in the T*T*T semiring. For details for the semiring, please refer to\n" + "Dogan Can and Murat Saraclar's paper named " + "\"Lattice Indexing for Spoken Term Detection\"\n" "\n" - "Usage: lattice-to-kws-index [options] utter-symtab-rspecifier lattice-rspecifier index-wspecifier\n" - " e.g.: lattice-to-kws-index ark:utter.symtab ark:1.lats ark:global.idx\n"; + "Usage: lattice-to-kws-index [options] " + " \n" + "e.g.: \n" + " lattice-to-kws-index ark:utter.symtab ark:1.lats ark:global.idx\n"; ParseOptions po(usage); + int32 frame_subsampling_factor = 1; int32 max_silence_frames = 50; bool strict = true; bool allow_partial = true; BaseFloat max_states_scale = 4; - po.Register("max-silence-frames", &max_silence_frames, "Maximum #frames for" - " silence arc."); - po.Register("strict", &strict, "Setting --strict=false will cause successful " - "termination even if we processed no lattices."); + po.Register("frame-subsampling-factor", &frame_subsampling_factor, + "Frame subsampling factor. (Default value 1)"); + po.Register("max-silence-frames", &max_silence_frames, + "If --frame-subsampling-factor is used, --max-silence-frames " + "is relative to the the input, not the output frame rate " + "(we divide by frame-subsampling-factor and round to " + "the closest integer, to get the number of symbols in the " + "lattice)."); + po.Register("strict", &strict, "Setting --strict=false will cause " + "successful termination even if we processed no lattices."); po.Register("max-states-scale", &max_states_scale, "Number of states in the" " original lattice times this scale is the number of states " "allowed when optimizing the index. Negative number means no " @@ -62,14 +72,16 @@ int main(int argc, char *argv[]) { po.Read(argc, argv); - if (po.NumArgs() < 3 || po.NumArgs() > 4) { + if (po.NumArgs() != 3) { po.PrintUsage(); exit(1); } + max_silence_frames = 0.5 + + max_silence_frames / static_cast(frame_subsampling_factor); std::string usymtab_rspecifier = po.GetOptArg(1), lats_rspecifier = po.GetArg(2), - index_wspecifier = po.GetOptArg(3); + index_wspecifier = po.GetArg(3); // We use RandomAccessInt32Reader to read the utterance symtab table. RandomAccessInt32Reader usymtab_reader(usymtab_rspecifier); @@ -77,7 +89,9 @@ int main(int argc, char *argv[]) { // We read the lattice in as CompactLattice; We need the CompactLattice // structure for the rest of the work SequentialCompactLatticeReader clat_reader(lats_rspecifier); - TableWriter< fst::VectorFstTplHolder > index_writer(index_wspecifier); + + TableWriter< fst::VectorFstTplHolder > + index_writer(index_wspecifier); int32 n_done = 0; int32 n_fail = 0; @@ -110,10 +124,10 @@ int main(int argc, char *argv[]) { n_fail++; continue; } - } + } // Get the alignments - vector state_times; + std::vector state_times; CompactLatticeStateTimes(clat, &state_times); // Cluster the arcs in the CompactLattice, write the cluster_id on the @@ -124,9 +138,10 @@ int main(int argc, char *argv[]) { // factor transducer. KALDI_VLOG(1) << "Arc clustering..."; bool success = false; - success = ClusterLattice(&clat, state_times); + success = kaldi::ClusterLattice(&clat, state_times); if (!success) { - KALDI_WARN << "State id's and alignments do not match for lattice " << key; + KALDI_WARN << "State id's and alignments do not match for lattice " + << key; n_fail++; continue; } @@ -145,9 +160,9 @@ int main(int argc, char *argv[]) { EnsureEpsilonProperty(&clat); fst::TopSort(&clat); // We have to recompute the state times because they will have changed. - CompactLatticeStateTimes(clat, &state_times); + CompactLatticeStateTimes(clat, &state_times); } - + // Generate factor transducer // CreateFactorTransducer() corresponds to the "Factor Generation" part of // Dogan and Murat's paper. But we also move the weight pushing step to @@ -155,10 +170,13 @@ int main(int argc, char *argv[]) { KALDI_VLOG(1) << "Generating factor transducer..."; KwsProductFst factor_transducer; int32 utterance_id = usymtab_reader.Value(key); - success = CreateFactorTransducer(clat, state_times, utterance_id, &factor_transducer); + success = kaldi::CreateFactorTransducer(clat, + state_times, + utterance_id, + &factor_transducer); if (!success) { KALDI_WARN << "Cannot generate factor transducer for lattice " << key; - n_fail++; + n_fail++; } MaybeDoSanityCheck(factor_transducer); @@ -178,7 +196,7 @@ int main(int argc, char *argv[]) { DoFactorMerging(&factor_transducer, &index_transducer); MaybeDoSanityCheck(index_transducer); - + // Do factor disambiguation. It corresponds to the "Factor Disambiguation" // step in Dogan and Murat's paper. KALDI_VLOG(1) << "Doing factor disambiguation..."; @@ -191,10 +209,10 @@ int main(int argc, char *argv[]) { KALDI_VLOG(1) << "Optimizing factor transducer..."; OptimizeFactorTransducer(&index_transducer, max_states, allow_partial); - MaybeDoSanityCheck(index_transducer); - + MaybeDoSanityCheck(index_transducer); + // Write result - index_writer.Write(key, index_transducer); + index_writer.Write(key, index_transducer); n_done++; } diff --git a/src/kwsbin/print-proxy-keywords.cc b/src/kwsbin/print-proxy-keywords.cc new file mode 100644 index 00000000000..7c75c4baf66 --- /dev/null +++ b/src/kwsbin/print-proxy-keywords.cc @@ -0,0 +1,134 @@ +// kwsbin/print-proxy-keywords.cc +// +// Copyright 2014-2016 Johns Hopkins University (Author: Guoguo Chen, +// Yenda Trmal) +// +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "fstext/fstext-utils.h" +#include "fstext/kaldi-fst-io.h" + +namespace fst { + +bool PrintProxyFstPath(const VectorFst &proxy, + vector > *path, + vector *cost, + StdArc::StateId cur_state, + vector cur_path, + StdArc::Weight cur_cost) { + if (proxy.Final(cur_state) != StdArc::Weight::Zero()) { + // Assume only final state has non-zero cost. + cur_cost = Times(proxy.Final(cur_state), cur_cost); + path->push_back(cur_path); + cost->push_back(cur_cost); + return true; + } + + for (ArcIterator aiter(proxy, cur_state); + !aiter.Done(); aiter.Next()) { + const StdArc &arc = aiter.Value(); + StdArc::Weight temp_cost = Times(arc.weight, cur_cost); + cur_path.push_back(arc.ilabel); + PrintProxyFstPath(proxy, path, cost, + arc.nextstate, cur_path, temp_cost); + cur_path.pop_back(); + } + + return true; +} +} // namespace fst + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace fst; + typedef kaldi::int32 int32; + typedef kaldi::uint64 uint64; + typedef StdArc::StateId StateId; + typedef StdArc::Weight Weight; + + const char *usage = + "Reads in the proxy keywords FSTs and print them to a file where each\n" + "line is \"kwid w1 w2 .. 2n\"\n" + "\n" + "Usage: \n" + " print-proxy-keywords [options] " + " []]\n" + "e.g.:\n" + " print-proxy-keywords ark:proxy.fsts ark,t:kwlist.txt" + " ark,t:costs.txt\n"; + + ParseOptions po(usage); + + po.Read(argc, argv); + + if (po.NumArgs() < 2 || po.NumArgs() > 3) { + po.PrintUsage(); + exit(1); + } + + std::string proxy_rspecifier = po.GetArg(1), + kwlist_wspecifier = po.GetArg(2), + cost_wspecifier = po.GetOptArg(3); + + + SequentialTableReader proxy_reader(proxy_rspecifier); + TableWriter > kwlist_writer(kwlist_wspecifier); + TableWriter > cost_writer(cost_wspecifier); + + // Start processing the keywords + int32 n_done = 0; + for (; !proxy_reader.Done(); proxy_reader.Next()) { + std::string key = proxy_reader.Key(); + VectorFst proxy = proxy_reader.Value(); + proxy_reader.FreeCurrent(); + + if (proxy.Properties(kAcyclic, true) == 0) { + KALDI_WARN << "Proxy FST has cycles, skip printing paths for " << key; + continue; + } + + vector > paths; + vector costs; + PrintProxyFstPath(proxy, &paths, &costs, proxy.Start(), + vector(), StdArc::Weight::One()); + KALDI_ASSERT(paths.size() == costs.size()); + for (int32 i = 0; i < paths.size(); i++) { + vector kwlist; + vector cost; + cost.push_back(costs[i].Value()); + for (int32 j = 0; j < paths[i].size(); j++) { + kwlist.push_back(paths[i][j]); + } + kwlist_writer.Write(key, kwlist); + if (cost_wspecifier != "") + cost_writer.Write(key, cost); + } + n_done++; + } + + KALDI_LOG << "Done " << n_done << " keywords"; + return (n_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/kwsbin/transcripts-to-fsts.cc b/src/kwsbin/transcripts-to-fsts.cc index e1a99a29fa2..4e7787f2642 100644 --- a/src/kwsbin/transcripts-to-fsts.cc +++ b/src/kwsbin/transcripts-to-fsts.cc @@ -23,6 +23,20 @@ #include "fstext/kaldi-fst-io.h" #include "fstext/fstext-utils.h" +namespace kaldi { +void SetLinearAcceptorWeight(double cost, fst::VectorFst *fst) { + typedef typename fst::StdArc::Label Label; + typedef typename fst::StdArc::Weight Weight; + typedef typename fst::StdArc::StateId StateId; + + StateId start = fst->Start(); + fst::MutableArcIterator > aiter(fst, start); + fst::StdArc arc = aiter.Value(); + arc.weight = cost; + aiter.SetValue(arc); +} +} // namespace kaldi + int main(int argc, char *argv[]) { try { using namespace kaldi; @@ -31,11 +45,22 @@ int main(int argc, char *argv[]) { typedef kaldi::uint64 uint64; const char *usage = - "Build a linear acceptor for each transcription. Read in the transcriptions in archive\n" - "format and write out the linear acceptors in archive format with the same key.\n" + "Build a linear acceptor for each transcription in the archive. " + "Read in the transcriptions in archive format and write out the linear " + "acceptors in archive format with the same key. The costs of " + "the arcs are set to be zero. The cost of the acceptor can be changed\n" + "by supplying the costs archive. In that case, the first arc's cost\n" + "will be set to the value obtained from the archive, i.e. the total\n" + "cost will be equal to cost. The cost archive can be sparse, i.e.\n" + "does not have to include zero-cost transcriptions. It is prefered\n" + "for the archive to be sorted (for efficiency).\n" "\n" - "Usage: transcripts-to-fsts [options] transcriptions-rspecifier fsts-wspecifier\n" - " e.g.: transcripts-to-fsts ark:train.tra ark:train.fsts\n"; + "Usage: \n" + " transcripts-to-fsts [options] " + " [] \n" + "e.g.: \n" + " transcripts-to-fsts ark:train.tra ark,s,cs,t:costs.txt " + " ark:train.fsts\n"; ParseOptions po(usage); @@ -44,10 +69,16 @@ int main(int argc, char *argv[]) { bool project_input = false; bool project_output = false; - po.Register("left-compose", &left_compose, "Compose the given FST to the left"); - po.Register("right-compose", &right_compose, "Compose the given FST to the right"); - po.Register("project-input", &project_input, "Project input labels if true"); - po.Register("project-output", &project_output, "Project input labels if true"); + po.Register("left-compose", &left_compose, + "Compose the given FST to the left"); + po.Register("right-compose", &right_compose, + "Compose the given FST to the right"); + po.Register("project-input", &project_input, + "Project input labels if true " + "(makes sense only with connection to left|right composition)"); + po.Register("project-output", &project_output, + "Project output labels if true" + "(makes sense only with connection to left|right composition)"); po.Read(argc, argv); @@ -56,11 +87,22 @@ int main(int argc, char *argv[]) { exit(1); } - std::string transcript_rspecifier = po.GetArg(1), - fst_wspecifier = po.GetOptArg(2); + std::string transcript_rspecifier, + costs_rspecifier, + fst_wspecifier; + + if ( po.NumArgs() == 2 ) { + transcript_rspecifier = po.GetArg(1); + fst_wspecifier = po.GetArg(2); + } else { + transcript_rspecifier = po.GetArg(1); + costs_rspecifier = po.GetArg(2); + fst_wspecifier = po.GetArg(3); + } SequentialInt32VectorReader transcript_reader(transcript_rspecifier); + RandomAccessDoubleReader costs_reader(costs_rspecifier); TableWriter fst_writer(fst_wspecifier); // Read the possible given FSTs @@ -81,13 +123,17 @@ int main(int argc, char *argv[]) { VectorFst fst; MakeLinearAcceptor(transcript, &fst); + if (costs_reader.HasKey(key)) { + double cost = costs_reader.Value(key); + SetLinearAcceptorWeight(cost, &fst); + } if (lfst != NULL) { VectorFst composed_fst; Compose(*lfst, fst, &composed_fst); fst = composed_fst; } - + if (rfst != NULL) { VectorFst composed_fst; Compose(fst, *rfst, &composed_fst); @@ -111,7 +157,7 @@ int main(int argc, char *argv[]) { delete rfst; KALDI_LOG << "Done " << n_done << " transcriptions"; - return (n_done != 0 ? 0 : 1); + return (n_done != 0 ? 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); return -1; From 9a1244ffb6d47ca58190a3db5d5c13e3f013730f Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Wed, 8 Feb 2017 23:29:26 -0500 Subject: [PATCH 412/530] [egs] removing empty files in BABEL recipe (#1406) These caused a problem on MacOS, as reported by @dogancan. --- egs/babel/s5/RESULTS | 0 egs/babel/s5b/RESULTS | 0 egs/babel/s5c/RESULTS | 0 egs/babel/s5d/RESULTS | 0 4 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 egs/babel/s5/RESULTS delete mode 100644 egs/babel/s5b/RESULTS delete mode 100644 egs/babel/s5c/RESULTS delete mode 100644 egs/babel/s5d/RESULTS diff --git a/egs/babel/s5/RESULTS b/egs/babel/s5/RESULTS deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/egs/babel/s5b/RESULTS b/egs/babel/s5b/RESULTS deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/egs/babel/s5c/RESULTS b/egs/babel/s5c/RESULTS deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/egs/babel/s5d/RESULTS b/egs/babel/s5d/RESULTS deleted file mode 100644 index e69de29bb2d..00000000000 From 001f605356cfca6553cb607b3e4679048dea33d8 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Thu, 9 Feb 2017 14:49:52 -0500 Subject: [PATCH 413/530] [scripts,egs] ivector compatibility checks; minor fixes in egs (#1395) --- egs/rm/s5/local/chain/run_tdnn_5g.sh | 2 +- egs/rm/s5/local/chain/run_tdnn_5n.sh | 5 +- egs/wsj/s5/steps/libs/common.py | 6 +++ egs/wsj/s5/steps/libs/nnet3/train/common.py | 47 ++++++++++++++----- .../steps/nnet2/check_ivectors_compatible.sh | 40 ++++++++++++++++ egs/wsj/s5/steps/nnet2/get_ivector_id.sh | 42 +++++++++++++++++ egs/wsj/s5/steps/nnet3/align.sh | 7 ++- egs/wsj/s5/steps/nnet3/chain/get_egs.sh | 1 + egs/wsj/s5/steps/nnet3/chain/train.py | 5 +- egs/wsj/s5/steps/nnet3/decode.sh | 7 ++- egs/wsj/s5/steps/nnet3/get_egs.sh | 1 + .../s5/steps/nnet3/get_egs_discriminative.sh | 1 + egs/wsj/s5/steps/nnet3/get_egs_targets.sh | 3 +- egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh | 6 +++ egs/wsj/s5/steps/nnet3/train_dnn.py | 4 +- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 4 +- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 7 ++- egs/wsj/s5/steps/nnet3/train_rnn.py | 4 +- .../s5/steps/online/nnet2/extract_ivectors.sh | 5 +- .../online/nnet2/extract_ivectors_online.sh | 5 ++ .../online/nnet2/train_ivector_extractor.sh | 5 ++ 21 files changed, 181 insertions(+), 26 deletions(-) create mode 100755 egs/wsj/s5/steps/nnet2/check_ivectors_compatible.sh create mode 100755 egs/wsj/s5/steps/nnet2/get_ivector_id.sh diff --git a/egs/rm/s5/local/chain/run_tdnn_5g.sh b/egs/rm/s5/local/chain/run_tdnn_5g.sh index f6fbe070763..088cb3ec778 100755 --- a/egs/rm/s5/local/chain/run_tdnn_5g.sh +++ b/egs/rm/s5/local/chain/run_tdnn_5g.sh @@ -120,7 +120,7 @@ if [ $stage -le 8 ]; then --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ --trainer.optimization.final-effective-lrate $final_effective_lrate \ --trainer.max-param-change $max_param_change \ - --cleanup.remove-egs true \ + --cleanup.remove-egs $remove_egs \ --feat-dir data/train \ --tree-dir $treedir \ --lat-dir exp/tri3b_lats \ diff --git a/egs/rm/s5/local/chain/run_tdnn_5n.sh b/egs/rm/s5/local/chain/run_tdnn_5n.sh index 7fd7b82aa1d..7a08becd57f 100755 --- a/egs/rm/s5/local/chain/run_tdnn_5n.sh +++ b/egs/rm/s5/local/chain/run_tdnn_5n.sh @@ -25,7 +25,8 @@ num_jobs_final=4 minibatch_size=128 frames_per_eg=150 remove_egs=false - +#common_egs_dir=exp/chain/tdnn_5g/egs/ +common_egs_dir= # End configuration section. echo "$0 $@" # Print the command line for logging @@ -121,7 +122,7 @@ if [ $stage -le 8 ]; then --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ --trainer.optimization.final-effective-lrate $final_effective_lrate \ --trainer.max-param-change $max_param_change \ - --cleanup.remove-egs true \ + --cleanup.remove-egs $remove_egs \ --feat-dir data/train \ --tree-dir $treedir \ --lat-dir exp/tri3b_lats \ diff --git a/egs/wsj/s5/steps/libs/common.py b/egs/wsj/s5/steps/libs/common.py index 1e0608525ba..66a02062e9c 100644 --- a/egs/wsj/s5/steps/libs/common.py +++ b/egs/wsj/s5/steps/libs/common.py @@ -294,6 +294,12 @@ def get_ivector_dim(ivector_dir=None): ivector_dim = int(stdout_val) return ivector_dim +def get_ivector_extractor_id(ivector_dir=None): + if ivector_dir is None: + return None + [stdout_val, stderr_val] = run_kaldi_command( + "steps/nnet2/get_ivector_id.sh {dir}".format(dir=ivector_dir)) + return stdout_val.strip() def get_feat_dim(feat_dir): [stdout_val, stderr_val] = run_kaldi_command( diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index 977393c44b0..0a02c89de5f 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -57,7 +57,7 @@ def get_successful_models(num_models, log_file_pattern, for line_num in range(1, len(lines) + 1): # we search from the end as this would result in # lesser number of regex searches. Python regex is slow ! - mat_obj = parse_regex.search(lines[-1*line_num]) + mat_obj = parse_regex.search(lines[-1 * line_num]) if mat_obj is not None: this_objf = float(mat_obj.groups()[0]) break @@ -66,7 +66,7 @@ def get_successful_models(num_models, log_file_pattern, accepted_models = [] for i in range(num_models): if (objf[max_index] - objf[i]) <= difference_threshold: - accepted_models.append(i+1) + accepted_models.append(i + 1) if len(accepted_models) != num_models: logger.warn("Only {0}/{1} of the models have been accepted " @@ -74,7 +74,7 @@ def get_successful_models(num_models, log_file_pattern, len(accepted_models), num_models, log_file_pattern)) - return [accepted_models, max_index+1] + return [accepted_models, max_index + 1] def get_average_nnet_model(dir, iter, nnets_list, run_opts, @@ -143,7 +143,7 @@ def validate_chunk_width(chunk_width): or a comma-separated list of integers like '20,30,16'""" if not isinstance(chunk_width, str): return False - a = chunk_width.split(","); + a = chunk_width.split(",") assert len(a) != 0 # would be code error for elem in a: try: @@ -175,7 +175,7 @@ def validate_range_str(range_str): for r in ranges: # a range may be either e.g. '64', or '128-256' try: - c = [ int(x) for x in r.split(":") ] + c = [int(x) for x in r.split(":")] except: return False # c should be either e.g. [ 128 ], or [64,128]. @@ -190,7 +190,6 @@ def validate_range_str(range_str): return True - def validate_minibatch_size_str(minibatch_size_str): """Validate a minibatch-size string (returns bool). A minibatch-size string might either be an integer, like '256', @@ -242,7 +241,7 @@ def halve_range_str(range_str): halved_ranges = [] for r in ranges: # a range may be either e.g. '64', or '128:256' - c = [ str(max(1, int(x)/2)) for x in r.split(":") ] + c = [str(max(1, int(x)/2)) for x in r.split(":")] halved_ranges.append(":".join(c)) return ','.join(halved_ranges) @@ -271,7 +270,7 @@ def halve_minibatch_size_str(minibatch_size_str): def copy_egs_properties_to_exp_dir(egs_dir, dir): try: - for file in ['cmvn_opts', 'splice_opts', 'final.mat']: + for file in ['cmvn_opts', 'splice_opts', 'info/final.ie.id', 'final.mat']: file_name = '{dir}/{file}'.format(dir=egs_dir, file=file) if os.path.isfile(file_name): shutil.copy2(file_name, dir) @@ -305,12 +304,23 @@ def parse_generic_config_vars_file(var_file): raise Exception('Error while parsing the file {0}'.format(var_file)) -def verify_egs_dir(egs_dir, feat_dim, ivector_dim, +def verify_egs_dir(egs_dir, feat_dim, ivector_dim, ivector_extractor_id, left_context, right_context, left_context_initial=-1, right_context_final=-1): try: egs_feat_dim = int(open('{0}/info/feat_dim'.format( egs_dir)).readline()) + + egs_ivector_id = None + try: + egs_ivector_id = open('{0}/info/final.ie.id'.format( + egs_dir)).readline().strip() + except: + # it could actually happen that the file is not there + # for example in cases where the egs were dumped by + # an older version of the script + pass + egs_ivector_dim = int(open('{0}/info/ivector_dim'.format( egs_dir)).readline()) egs_left_context = int(open('{0}/info/left_context'.format( @@ -333,12 +343,26 @@ def verify_egs_dir(egs_dir, feat_dim, ivector_dim, "the current experiment and the provided " "egs directory") + if (((egs_ivector_id is None) and (ivector_extractor_id is not None)) or + ((egs_ivector_id is not None) and (ivector_extractor_id is None))): + logger.warning("The ivector ids are inconsistently used. It's your " + "responsibility to make sure the ivector extractor " + "has been used consistently") + elif (((egs_ivector_id is None) and (ivector_extractor_id is None))): + logger.warning("The ivector ids are not used. It's your " + "responsibility to make sure the ivector extractor " + "has been used consistently") + elif (ivector_extractor_id != egs_ivector_id): + raise Exception("The egs were generated using a different ivector " + "extractor. id1 = {0}, id2={1}".format( + ivector_extractor_id, egs_ivector_id)); + if (egs_left_context < left_context or egs_right_context < right_context): raise Exception('The egs have insufficient (l,r) context ({0},{1}) ' 'versus expected ({2},{3})'.format( - egs_left_context, egs_right_context, - left_context, right_context)) + egs_left_context, egs_right_context, + left_context, right_context)) # the condition on the initial/final context is an equality condition, # not an inequality condition, as there is no mechanism to 'correct' the @@ -569,6 +593,7 @@ def self_test(): assert validate_chunk_width('64') assert validate_chunk_width('64,25,128') + class CommonParser: """Parser for parsing common options related to nnet3 training. diff --git a/egs/wsj/s5/steps/nnet2/check_ivectors_compatible.sh b/egs/wsj/s5/steps/nnet2/check_ivectors_compatible.sh new file mode 100755 index 00000000000..40cc0d2c349 --- /dev/null +++ b/egs/wsj/s5/steps/nnet2/check_ivectors_compatible.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section + +#echo >&2 "$0 $@" # Print the command line for logging +if [ $# != 2 ] ; then + echo >$2 "Usage: $0 " + echo >$2 " e.g.: $0 exp/nnet3/extractor exp/nnet3/ivectors_dev10h.pem" +fi + +dir_a=$1 +dir_b=$2 + +id_a=$(steps/nnet2/get_ivector_id.sh $dir_a) +ret_a=$? +id_b=$(steps/nnet2/get_ivector_id.sh $dir_b) +ret_b=$? + +if [ ! -z "$id_a" ] && [ ! -z "${id_b}" ] ; then + if [ "${id_a}" == "${id_b}" ]; then + exit 0 + else + echo >&2 "$0: ERROR: iVector id in ${id_a} and the iVector id in ${id_a} do not match" + echo >&2 "$0: ERROR: that means that the systems are not compatible." + exit 1 + fi +elif [ -z "$id_a" ] && [ -z "${id_b}" ] ; then + echo >&2 "$0: WARNING: The directories do not contain iVector ID." + echo >&2 "$0: WARNING: That means it's you who's reponsible for keeping " + echo >&2 "$0: WARNING: the directories compatible" + exit 0 +else + echo >&2 "$0: WARNING: One of the directories do not contain iVector ID." + echo >&2 "$0: WARNING: That means it's you who's reponsible for keeping " + echo >&2 "$0: WARNING: the directories compatible" + exit 0 +fi diff --git a/egs/wsj/s5/steps/nnet2/get_ivector_id.sh b/egs/wsj/s5/steps/nnet2/get_ivector_id.sh new file mode 100755 index 00000000000..d7be853349d --- /dev/null +++ b/egs/wsj/s5/steps/nnet2/get_ivector_id.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +# End configuration section. + +#echo >&2 "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + + +if [ $# != 1 ]; then + echo >$2 "Usage: $0 " + echo >$2 " e.g.: $0 exp/nnet3/extractor" + exit 1 +fi + +ivecdir=$1 + +if [ -f $ivecdir/final.ie.id ] ; then + cat $ivecdir/final.ie.id +elif [ -f $ivecdir/final.ie ] ; then + # note the creation can fail in case the extractor directory + # is not read-only media or the user des not have access rights + # in that case we will just behave as if the id is not available + id=$(md5sum $ivecdir/final.ie | awk '{print $1}') + echo "$id" > $ivecdir/final.ie.id || exit 1 + cat $ivecdir/final.ie.id +else + exit 1 +fi + +exit 0 + + + diff --git a/egs/wsj/s5/steps/nnet3/align.sh b/egs/wsj/s5/steps/nnet3/align.sh index 4c3b0987562..1ae5218aa85 100755 --- a/egs/wsj/s5/steps/nnet3/align.sh +++ b/egs/wsj/s5/steps/nnet3/align.sh @@ -62,8 +62,11 @@ else fi extra_files= -[ ! -z "$online_ivector_dir" ] && \ - extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period" +if [ ! -z "$online_ivector_dir" ]; then + steps/nnet2/check_ivectors_compatible.sh $srcdir $online_ivector_dir || exit 1 + extra_files="$srcdir/final.ie.id $online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period" +fi + for f in $srcdir/tree $srcdir/${iter}.mdl $data/feats.scp $lang/L.fst $extra_files; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done diff --git a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh index 0b1ddd1fbc7..4a61f8edaa7 100755 --- a/egs/wsj/s5/steps/nnet3/chain/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/chain/get_egs.sh @@ -215,6 +215,7 @@ fi if [ ! -z "$online_ivector_dir" ]; then ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; echo $ivector_dim > $dir/info/ivector_dim + steps/nnet2/get_ivector_id.sh $online_ivector_dir > $dir/info/final.ie.id || exit 1 ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" else diff --git a/egs/wsj/s5/steps/nnet3/chain/train.py b/egs/wsj/s5/steps/nnet3/chain/train.py index 8624dc947b9..19276817ea0 100755 --- a/egs/wsj/s5/steps/nnet3/chain/train.py +++ b/egs/wsj/s5/steps/nnet3/chain/train.py @@ -265,6 +265,7 @@ def train(args, run_opts, background_process_handler): num_jobs = common_lib.get_number_of_jobs(args.tree_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) + ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment @@ -357,7 +358,8 @@ def train(args, run_opts, background_process_handler): [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( - common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, + common_train_lib.verify_egs_dir(egs_dir, feat_dim, + ivector_dim, ivector_id, egs_left_context, egs_right_context, egs_left_context_initial, egs_right_context_final)) @@ -370,6 +372,7 @@ def train(args, run_opts, background_process_handler): # copy the properties of the egs to dir for # use during decoding + logger.info("Copying the properties from {0} to {1}".format(egs_dir, args.dir)) common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) if (args.stage <= -2): diff --git a/egs/wsj/s5/steps/nnet3/decode.sh b/egs/wsj/s5/steps/nnet3/decode.sh index b97e7f415d7..8aa86e92dcb 100755 --- a/egs/wsj/s5/steps/nnet3/decode.sh +++ b/egs/wsj/s5/steps/nnet3/decode.sh @@ -65,8 +65,11 @@ srcdir=`dirname $dir`; # Assume model directory one level up from decoding direc model=$srcdir/$iter.mdl -[ ! -z "$online_ivector_dir" ] && \ - extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period" +extra_files= +if [ ! -z "$online_ivector_dir" ]; then + steps/nnet2/check_ivectors_compatible.sh $srcdir $online_ivector_dir || exit 1 + extra_files="$srcdir/final.ie.id $online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period" +fi for f in $graphdir/HCLG.fst $data/feats.scp $model $extra_files; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; diff --git a/egs/wsj/s5/steps/nnet3/get_egs.sh b/egs/wsj/s5/steps/nnet3/get_egs.sh index c47522fec7a..d72a3d23fe5 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs.sh @@ -188,6 +188,7 @@ fi if [ ! -z "$online_ivector_dir" ]; then ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; echo $ivector_dim > $dir/info/ivector_dim + steps/nnet2/get_ivector_id.sh $online_ivector_dir > $dir/info/final.ie.id || exit 1 ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" else diff --git a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh index 377c49fc5cb..f74b66b5fd2 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_discriminative.sh @@ -207,6 +207,7 @@ if [ ! -z $online_ivector_dir ]; then ivector_period=$(cat $online_ivector_dir/ivector_period) ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; echo $ivector_dim >$dir/info/ivector_dim + steps/nnet2/get_ivector_id.sh $online_ivector_dir > $dir/info/final.ie.id || exit 1 ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" else ivector_opts="" diff --git a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh index 4af10e2dde1..a2749b48fac 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh @@ -179,7 +179,8 @@ if [ -f $dir/trans.scp ]; then fi if [ ! -z "$online_ivector_dir" ]; then - ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; + steps/nnet2/get_ivector_id.sh $online_ivector_dir > $dir/info/final.ie.id || exit 1 + ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1 echo $ivector_dim > $dir/info/ivector_dim ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1; ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period" diff --git a/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh b/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh index 8fce9ae3831..8f3dac45315 100755 --- a/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh +++ b/egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh @@ -141,6 +141,7 @@ if [ -z "$online_ivector_dir" ]; then ivector_dim=0 else ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1; + steps/nnet2/get_ivector_id.sh $online_ivector_dir > $dir/final.ie.id || exit 1 fi if [ ! -z "$configs_dir" ]; then @@ -213,6 +214,11 @@ fi [ -z $egs_dir ] && egs_dir=$dir/egs +if [ ! -z "$online_ivector_dir" ] ; then + steps/nnet2/check_ivectors_compatible.sh $online_ivector_dir $egs_dir/info || exit 1 +fi + + if [ "$feat_dim" != "$(cat $egs_dir/info/feat_dim)" ]; then echo "$0: feature dimension mismatch with egs, $feat_dim vs $(cat $egs_dir/info/feat_dim)"; exit 1; diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index 689450a80f0..164aee788fa 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -165,6 +165,7 @@ def train(args, run_opts, background_process_handler): num_jobs = common_lib.get_number_of_jobs(args.ali_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) + ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment @@ -231,7 +232,8 @@ def train(args, run_opts, background_process_handler): [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( - common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, + common_train_lib.verify_egs_dir(egs_dir, feat_dim, + ivector_dim, ivector_id, left_context, right_context)) assert(str(args.frames_per_eg) == frames_per_eg_str) diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index 21247e8c7c3..a10b7eb604a 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -164,6 +164,7 @@ def train(args, run_opts, background_process_handler): # Set some variables. feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) + ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) @@ -246,7 +247,8 @@ def train(args, run_opts, background_process_handler): [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( - common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, + common_train_lib.verify_egs_dir(egs_dir, feat_dim, + ivector_dim, ivector_id, left_context, right_context)) assert(str(args.frames_per_eg) == frames_per_eg_str) diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index 5a96d6020fa..272485b898a 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -221,6 +221,8 @@ def train(args, run_opts, background_process_handler): # Set some variables. feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) + ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) + config_dir = '{0}/configs'.format(args.dir) var_file = '{0}/vars'.format(config_dir) @@ -310,11 +312,12 @@ def train(args, run_opts, background_process_handler): [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( - common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, + common_train_lib.verify_egs_dir(egs_dir, feat_dim, + ivector_dim, ivector_id, left_context, right_context)) if args.chunk_width != frames_per_eg_str: raise Exception("mismatch between --egs.chunk-width and the frames_per_eg " - "in the egs dir {0} vs {1}".(args.chunk_width, + "in the egs dir {0} vs {1}".format(args.chunk_width, frames_per_eg_str)) if (args.num_jobs_final > num_archives): diff --git a/egs/wsj/s5/steps/nnet3/train_rnn.py b/egs/wsj/s5/steps/nnet3/train_rnn.py index 5824a77dbfe..6636513e03d 100755 --- a/egs/wsj/s5/steps/nnet3/train_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_rnn.py @@ -221,6 +221,7 @@ def train(args, run_opts, background_process_handler): num_jobs = common_lib.get_number_of_jobs(args.ali_dir) feat_dim = common_lib.get_feat_dim(args.feat_dir) ivector_dim = common_lib.get_ivector_dim(args.online_ivector_dir) + ivector_id = common_lib.get_ivector_extractor_id(args.online_ivector_dir) # split the training data into parts for individual jobs # we will use the same number of jobs as that used for alignment @@ -295,7 +296,8 @@ def train(args, run_opts, background_process_handler): [egs_left_context, egs_right_context, frames_per_eg_str, num_archives] = ( - common_train_lib.verify_egs_dir(egs_dir, feat_dim, ivector_dim, + common_train_lib.verify_egs_dir(egs_dir, feat_dim, + ivector_dim, ivector_id, left_context, right_context, left_context_initial, right_context_final)) if args.chunk_width != frames_per_eg_str: diff --git a/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh b/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh index f27baecd673..53026b840bd 100755 --- a/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh +++ b/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh @@ -277,4 +277,7 @@ if [ $stage -le 5 ]; then for j in $(seq $nj); do cat $dir/ivector_online.$j.scp; done >$dir/ivector_online.scp || exit 1; fi -echo "$0: done extracting (pseudo-online) iVectors" +steps/nnet2/get_ivector_id.sh $srcdir > $dir/final.ie.id || exit 1 + +echo "$0: done extracting (pseudo-online) iVectors to $dir using the extractor in $srcdir." + diff --git a/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh b/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh index b52de1f516b..f4d908e9446 100755 --- a/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh +++ b/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh @@ -127,3 +127,8 @@ if [ $stage -le 1 ]; then echo "$0: combining iVectors across jobs" for j in $(seq $nj); do cat $dir/ivector_online.$j.scp; done >$dir/ivector_online.scp || exit 1; fi + +steps/nnet2/get_ivector_id.sh $srcdir > $dir/final.ie.id || exit 1 + +echo "$0: done extracting (online) iVectors to $dir using the extractor in $srcdir." + diff --git a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh index 9b354c0753e..67845b01c8a 100755 --- a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh +++ b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh @@ -184,3 +184,8 @@ done rm $dir/final.ie 2>/dev/null ln -s $x.ie $dir/final.ie + +# assign a unique id to this extractor +# we are not interested in the id itself, just pre-caching ... +steps/nnet2/get_ivector_id.sh $dir > /dev/null || exit 1 + From 2fd8da973bee1d5e2e278cfbf3f995073186c439 Mon Sep 17 00:00:00 2001 From: Peter Smit Date: Thu, 9 Feb 2017 22:07:01 +0200 Subject: [PATCH 414/530] [scripts] Give prepare_lang the option to generate more phone disambig symbols (#1408) --- egs/wsj/s5/utils/prepare_lang.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/utils/prepare_lang.sh b/egs/wsj/s5/utils/prepare_lang.sh index 054210cdd23..b80fae50277 100755 --- a/egs/wsj/s5/utils/prepare_lang.sh +++ b/egs/wsj/s5/utils/prepare_lang.sh @@ -64,6 +64,9 @@ unk_fst= # if you want to model the unknown-word () phone_symbol_table= # if set, use a specified phones.txt file. extra_word_disambig_syms= # if set, add disambiguation symbols from this file (one per line) # to phones/disambig.txt, phones/wdisambig.txt and words.txt +num_extra_phone_disambig_syms=1 # Standard one phone disambiguation symbol is used for optional silence. + # Increasing this number does not harm, but is only useful if you later + # want to introduce this labels to L_disambig.fst # end configuration sections echo "$0 $@" # Print the command line for logging @@ -284,7 +287,7 @@ if "$silprob"; then else ndisambig=$(utils/add_lex_disambig.pl $unk_opt --pron-probs $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt) fi -ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST. +ndisambig=$[$ndisambig+$num_extra_phone_disambig_syms]; # add (at least) one disambig symbol for silence in lexicon FST. echo $ndisambig > $tmpdir/lex_ndisambig # Format of lexiconp_disambig.txt: From 42114e6444a2b4a4f7fab6d9debd135dc8b95014 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Thu, 9 Feb 2017 15:08:00 -0500 Subject: [PATCH 415/530] [build] make the Makefile checks serial (#1409) --- src/Makefile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index fded748fbe5..c3346d873ef 100644 --- a/src/Makefile +++ b/src/Makefile @@ -31,9 +31,15 @@ include kaldi.mk # Reset the default goal, so that the all target will become default .DEFAULT_GOAL := -all: checkversion kaldi.mk mklibdir $(SUBDIRS) +all: + $(MAKE) checkversion + $(MAKE) kaldi.mk + $(MAKE) mklibdir + $(MAKE) subdirs -echo Done +subdirs: $(SUBDIRS) + mklibdir: test -d $(KALDILIBDIR) || mkdir $(KALDILIBDIR) @@ -51,8 +57,10 @@ checkversion: ifeq ($(shell ./configure --version),$(CONFIGURE_VERSION)) @echo "The version of configure script matches kaldi.mk version. Good." else + @echo "" @echo "The kaldi.mk file was generated using a different version of configure script. Please rerun the configure again" @test -f ./kaldi.mk && echo "Hint: Previous configure command line: " && head -n 2 ./kaldi.mk | grep configure | sed 's/^# *//g' + @echo "" @false endif From 0d5e4b1da05d3598a0a460bd3bca10ecf67b79e8 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Thu, 9 Feb 2017 12:08:50 -0800 Subject: [PATCH 416/530] Resolve merge conflicts and add "make ext" to travis build (#1407) * [build]: resolving OpenFst compilation issue with gcc-6.x (#1392) * [egs] Add new graphemic system for Gale Arabic, with newer nnet scripts (#1298) * [build] Windows build: generate missing base/version.h; cosmetic changes (#1397) * [build]: Enable cross compilation, including to android. (#726) If a user has a number of tool chains installed and they do not want to use the default, they must currently edit the kaldi.mk file after running configure to change the CC, CXX, AR, AS, and RANLIB variables. This is something that should be exposed via the configure script. This patch exposes an option to set the host triple for the desired tool chain in the configure script. Building Kaldi on my Raspberry Pi boards is not particularly fast. I have been using the following patch to build kaldi executables for use on the Pi boards for the better part of a year. A typical invocation for me is something like: $ ./configure --static --atlas-root=/opt/cross/armv8hf \ --fst-root=/opt/cross/armv8hf --host=armv8-rpi3-linux-gnueabihf \ --fst-version=1.4.1 This way I can build on my much faster x86 desktop, but still run experiments on ARM. I have included support for cross compiling for ppc64le and it works for me (at least it produces binaries for ppc64le I don't have a ppc64 machine to test it). Signed-off-by: Eric B Munson * Add mk file and configure options for building for Android Building for Android requires a toolchain that can be built using the Android NDK. It works similiarly to the linux build except that it only uses clang, only supports the openBLAS math library, and requires an additional include directory for the system C++ headers. A typical configure invocation looks like: ./configure --static --openblas-root=/opt/cross/arm-linux-androideabi \ --fst-root=/opt/cross/arm-linux-androideabi \ --host=arm-linux-androideabi --fst-version=1.4.1 \ --android-includes=/opt/cross/arm-linux-androideabi/sysroot/usr/include Signed-off-by: Eric B Munson * Make pthread cancel symbols noops for Android The Android C library does not support cancelling pthreads so the symbols PTHREAD_CANCEL_STATE and pthread_setcancelstate are undefined. Because a pthread cannot be cancelled in Android, it is reasonable to make the pthread_setcancelstate() call a noop. Signed-off-by: Eric B Munson * [build] fixing issue introduced in the previous win commit (#1399) * [egs] Fix to HKUST nnet2/3 scripts. (#1401) when training ubm, we should just use the 40 dimention mfcc so change the train directory for avoiding dimention mismatching this script won't get error when run after nnet2's scripts. * [egs,scripts,src] Add BABEL s5d recipe; various associated fixes (#1356) * Creating a new recipe directory * adding lists * Improvements in the pipeline, fixes, syllab search * Transplanting the diff to s5d * added TDNN, LSTM and BLSTM scripts. added Telugu conf files. * added blstm script and top level commands * improved keyword search, new lang configs * removing not needed scripts * added blstm results * some keyword-search optimization binaries * removing some extra files + kwsearch pipeline improvement * adding configs for the OP3 langs * configs for the rest of the OP3 langs * Added updated configs for IndusDB.20151208.Babel.tar.bz2 * fixes of the pipeline, added langp (re)estimation * adding the kaldi-native search pipeline and a bunch of changes related to this * removing extra files * A couple of fixes * KWS improvements and fixes * Fixes of a couple of issues reported by Fred Richardson * A separate script for lexicon expansion * A couple of fixes and tweaks. Added checks for tools, especially sox. * adding a couple of changes -- new style options and results for BP langs * adding new results(still will need to be updated) * added langp and some details tweaked * updated STT results, new KWS results and a couple of small fixes all around * adding file lists for dev languages * miniature fixes and cleanups * one more batch of small fixes -- mostly whitespace cleanup * small fixes -- location of files and removal of trailing slash inn the pathname * enabling stage-2 KWS pipeline * adding some directories to .gitignore * some quick fixes * latest fixes * making the script split_compound_set to conform to the naming * some last minute fixes for the combination scoring * do not attempt to score when the scoring data is not available * bug fixes and --ntrue-from option * another batch of fixes * adding +x permission to split_compound_set.sh * fixing whitespaces * fixing whitespaces * a couple of fixes * adding the cleanup script and chain models training * adding the graphemic/unicode lexicon feature * adding the graphemic/unicode lexicon feature * fixing the the cc files headers, adding c info * use the user-provided kwset id, not the filename * use _cleaned affix * fixes w.r.t. getting chain models independent on other systems * small fixes as reported by Fred Richardson and Yenda * another issue reported by Fred Richarson * fixing KWS for the chain systems * fixes in the KWS hitlist combination * adding 40hrs pashto config and fixes for the unicode system * fixing some bugs as reported by Ni Chongjia (I2R) * fixing some bugs as reported by Fred Richardson * adding 40hrs Pashto OP3 setup * addressing Dan's comments, some further cleanup * improving the make_index script * remove fsts-scale * adding 'see also' to some of the fst tools * adding back accidentaly removed svn check * [egs] removing empty files in BABEL recipe (#1406) These caused a problem on MacOS, as reported by @dogancan. * Add online extension to travis build. * Fix parallel online extension build. Randomly choose between single and double precision BaseFloats in travis build. * Remove parantheses that were unintentinally added to the travis script in the previous commit. --- .gitignore | 11 + egs/babel/s5/RESULTS | 0 egs/babel/s5/local/make_pitch.sh | 18 +- egs/babel/s5b/RESULTS | 0 egs/babel/s5c/RESULTS | 0 egs/babel/s5c/local/CHECKPOINT.sh | 18 +- egs/babel/s5c/local/ali_to_rttm.sh | 4 +- .../s5c/local/annotated_kwlist_to_KWs.pl | 4 +- egs/babel/s5c/local/apply_g2p.sh | 12 +- .../s5c/local/apply_map_tab_preserving.pl | 12 +- egs/babel/s5c/local/augment_original_stm.pl | 2 +- egs/babel/s5c/local/best_path_weights.sh | 18 +- egs/babel/s5c/local/check_models.sh | 2 +- egs/babel/s5c/local/check_wers.sh | 4 +- egs/babel/s5c/local/cmu_uem2kaldi_dir.sh | 28 +- egs/babel/s5c/local/create_shadow_dataset.sh | 16 +- egs/babel/s5c/local/cstr_ndx2flist.pl | 4 +- egs/babel/s5c/local/ctm2segments.pl | 20 +- egs/babel/s5c/local/datasets/basic_kws.sh | 10 +- egs/babel/s5c/local/datasets/extra_kws.sh | 37 +- .../s5c/local/datasets/supervised_pem.sh | 2 +- .../s5c/local/datasets/supervised_seg.sh | 4 +- .../s5c/local/datasets/supervised_uem.sh | 4 +- egs/babel/s5c/local/datasets/vocab_kws.sh | 22 +- egs/babel/s5c/local/extend_lexicon.sh | 42 +- egs/babel/s5c/local/extract_oov_words.pl | 14 +- egs/babel/s5c/local/filter_kwslist.pl | 8 +- egs/babel/s5c/local/find_transcripts.pl | 4 +- egs/babel/s5c/local/fix_kwslist.pl | 2 +- .../s5c/local/generate_confusion_matrix.sh | 4 +- egs/babel/s5c/local/generate_example_kws.sh | 4 +- .../s5c/local/generate_proxy_keywords.sh | 42 +- egs/babel/s5c/local/get_syllable_text.sh | 77 - egs/babel/s5c/local/gridsearch.pl | 12 +- egs/babel/s5c/local/gridsearch2.pl | 12 +- egs/babel/s5c/local/kwords2indices.pl | 18 +- egs/babel/s5c/local/kws_combine.sh | 6 +- egs/babel/s5c/local/kws_data_prep.sh | 42 +- egs/babel/s5c/local/kws_data_prep_proxy.sh | 26 +- .../s5c/local/kws_data_prep_syllables.sh | 144 -- .../s5c/local/kws_gen_oracle_lattices.sh | 8 +- egs/babel/s5c/local/kws_oracle.sh | 20 +- egs/babel/s5c/local/kws_score_f4de.sh | 9 +- egs/babel/s5c/local/kws_search.sh | 8 +- egs/babel/s5c/local/kws_setup.sh | 14 +- egs/babel/s5c/local/lattice_to_ctm.sh | 47 +- .../s5c/local/lattice_to_ctm_syllable.sh | 115 -- egs/babel/s5c/local/make_L_align.sh | 4 +- egs/babel/s5c/local/make_ecf_subset.sh | 4 +- .../s5c/local/make_lexicon_fst_special.pl | 2 +- egs/babel/s5c/local/make_lexicon_subset.sh | 6 +- egs/babel/s5c/local/make_syllable_lexicon.sh | 72 - egs/babel/s5c/local/naive_comb.pl | 4 +- egs/babel/s5c/local/ndx2flist.pl | 4 +- .../local/nist_eval/create_compound_set.sh | 2 +- .../s5c/local/nist_eval/export_systems.sh | 8 +- egs/babel/s5c/local/nist_eval/filter_data.sh | 2 +- .../s5c/local/nist_eval/get_training_times.sh | 12 +- egs/babel/s5c/local/nist_eval/make_release.sh | 18 +- .../local/nnet2/get_egs_semi_supervised.sh | 28 +- egs/babel/s5c/local/normalize_transcript.pl | 6 +- .../local/prepare_acoustic_training_data.pl | 16 +- egs/babel/s5c/local/prepare_lexicon.pl | 22 +- egs/babel/s5c/local/prepare_stm.pl | 6 +- .../local/resegment/evaluate_segmentation.pl | 2 +- .../s5c/local/resegment/generate_segments.sh | 24 +- egs/babel/s5c/local/rttm_to_text.pl | 8 +- egs/babel/s5c/local/run_kws_stt_task.sh | 30 +- egs/babel/s5c/local/score_combine.sh | 22 +- egs/babel/s5c/local/score_mbr.sh | 2 +- egs/babel/s5c/local/score_sctk_prune.sh | 14 +- egs/babel/s5c/local/score_stm.sh | 7 +- egs/babel/s5c/local/shadow_set_kws_search.sh | 20 +- egs/babel/s5c/local/split_ctms.sh | 6 +- egs/babel/s5c/local/stm2text.pl | 6 +- egs/babel/s5c/local/subset_atwv.pl | 6 +- egs/babel/s5c/local/subset_kwslist.pl | 2 +- egs/babel/s5c/local/summarize_logs.pl | 4 +- egs/babel/s5c/local/syllab/ali_to_syllabs.sh | 71 + .../s5c/local/syllab/create_syllables.pl | 154 ++ .../local/syllab/generate_syllable_lang.sh | 125 ++ .../local/syllab/map_prons_to_syllables.pl | 61 + egs/babel/s5c/local/train_g2p.sh | 4 +- egs/babel/s5c/local/train_lms_srilm.sh | 143 +- egs/babel/s5c/local/train_mmi_sgmm2.sh | 6 +- egs/babel/s5c/local/txt_to_rttm.pl | 4 +- egs/babel/s5c/local/uem_ctm2segments.pl | 8 +- egs/babel/s5c/results/RESULTS.105-turkish.flp | 29 + egs/babel/s5c/results/RESULTS.106-tagalog.flp | 34 + .../s5c/results/RESULTS.107-vietnamese.flp | 50 + egs/babel/s5c/run-1-main.sh | 2 +- egs/babel/s5c/run-4-anydecode.sh | 28 +- egs/babel/s5c/run-4b-anydecode-bnf.sh | 14 +- egs/babel/s5d/EXAMPLE.vietnamese | 116 ++ egs/babel/s5d/README.txt | 82 + egs/babel/s5d/RESULTS.txt | 8 + egs/babel/s5d/RUN_UNICODE_SYSTEM | 9 + egs/babel/s5d/UNICODE_README | 119 ++ egs/babel/s5d/babel.html | 788 +++++++++ egs/babel/s5d/cmd.sh | 29 + egs/babel/s5d/conf/bnf/config_full.py | 61 + egs/babel/s5d/conf/bnf/config_limited.py | 62 + egs/babel/s5d/conf/common.fullLP | 124 ++ egs/babel/s5d/conf/common.limitedLP | 128 ++ .../s5d/conf/common.semisupervised.limitedLP | 27 + egs/babel/s5d/conf/common_vars.sh | 28 + egs/babel/s5d/conf/glm | 13 + .../lang/101-cantonese-fullLP.official.conf | 104 ++ .../101-cantonese-limitedLP.official.conf | 112 ++ .../lang/102-assamese-fullLP.official.conf | 105 ++ .../lang/102-assamese-limitedLP.official.conf | 114 ++ .../lang/103-bengali-fullLP.official.conf | 105 ++ .../lang/103-bengali-limitedLP.official.conf | 114 ++ .../104-pashto-fullLP-40hrs.official.conf | 114 ++ .../conf/lang/104-pashto-fullLP.official.conf | 114 ++ .../lang/104-pashto-limitedLP.official.conf | 110 ++ .../lang/105-turkish-fullLP.official.conf | 111 ++ .../lang/105-turkish-limitedLP.official.conf | 111 ++ .../lang/106-tagalog-fullLP.official.conf | 108 ++ .../lang/106-tagalog-limitedLP.official.conf | 108 ++ .../lang/107-vietnamese-fullLP.official.conf | 107 ++ .../107-vietnamese-limitedLP.official.conf | 115 ++ .../lang/201-haitian-fullLP.official.conf | 80 + .../lang/201-haitian-limitedLP.official.conf | 89 + .../conf/lang/202-swahili.FLP.official.conf | 93 + .../conf/lang/202-swahili.LLP.official.conf | 99 ++ .../conf/lang/203-lao-fullLP.official.conf | 101 ++ .../conf/lang/203-lao-limitedLP.official.conf | 110 ++ .../conf/lang/204-tamil-fullLP.official.conf | 112 ++ .../lang/204-tamil-limitedLP.official.conf | 122 ++ .../conf/lang/205-kurmanji.FLP.official.conf | 94 + .../conf/lang/205-kurmanji.LLP.official.conf | 100 ++ .../conf/lang/206-zulu-fullLP.official.conf | 129 ++ .../lang/206-zulu-limitedLP.official.conf | 126 ++ .../conf/lang/207-tokpisin.FLP.official.conf | 93 + .../conf/lang/207-tokpisin.LLP.official.conf | 99 ++ .../conf/lang/301-cebuano.FLP.official.conf | 100 ++ .../conf/lang/301-cebuano.LLP.official.conf | 106 ++ .../conf/lang/302-kazakh.FLP.official.conf | 101 ++ .../conf/lang/302-kazakh.LLP.official.conf | 107 ++ .../conf/lang/303-telugu.FLP.official.conf | 100 ++ .../conf/lang/303-telugu.LLP.official.conf | 107 ++ .../lang/304-lithuanian.FLP.official.conf | 100 ++ .../lang/304-lithuanian.LLP.official.conf | 106 ++ .../conf/lang/305-guarani.FLP.official.conf | 45 + .../conf/lang/305-guarani.LLP.official.conf | 51 + .../s5d/conf/lang/306-igbo.FLP.official.conf | 45 + .../s5d/conf/lang/306-igbo.LLP.official.conf | 51 + .../conf/lang/307-amharic.FLP.official.conf | 46 + .../conf/lang/307-amharic.LLP.official.conf | 52 + .../conf/lang/401-mongolian.FLP.official.conf | 46 + .../conf/lang/401-mongolian.LLP.official.conf | 52 + .../conf/lang/402-javanese.FLP.official.conf | 47 + .../conf/lang/402-javanese.LLP.official.conf | 51 + .../conf/lang/403-dholuo.FLP.official.conf | 45 + .../conf/lang/403-dholuo.LLP.official.conf | 51 + .../s5d/conf/lists/101-cantonese/dev.list | 120 ++ .../s5d/conf/lists/101-cantonese/eval.list | 220 +++ .../conf/lists/101-cantonese/evalpart1.list | 63 + .../lists/101-cantonese/train.FullLP.list | 965 +++++++++++ .../lists/101-cantonese/train.LimitedLP.list | 120 ++ .../s5d/conf/lists/102-assamese/dev.list | 126 ++ .../s5d/conf/lists/102-assamese/eval.list | 189 +++ .../conf/lists/102-assamese/evalpart1.list | 65 + .../conf/lists/102-assamese/train.FullLP.list | 790 +++++++++ .../lists/102-assamese/train.LimitedLP.list | 138 ++ .../train.LimitedLP.untranscribed.list | 652 +++++++ .../102-assamese/train.untranscribed.list | 259 +++ egs/babel/s5d/conf/lists/103-bengali/dev.list | 125 ++ .../s5d/conf/lists/103-bengali/eval.list | 193 +++ .../s5d/conf/lists/103-bengali/evalpart1.list | 66 + .../conf/lists/103-bengali/train.FullLP.list | 751 ++++++++ .../lists/103-bengali/train.LimitedLP.list | 124 ++ .../train.LimitedLP.untranscribed.list | 627 +++++++ .../103-bengali/train.untranscribed.list | 255 +++ egs/babel/s5d/conf/lists/104-pashto/dev.list | 143 ++ egs/babel/s5d/conf/lists/104-pashto/eval.list | 198 +++ .../s5d/conf/lists/104-pashto/evalpart1.list | 70 + .../conf/lists/104-pashto/train.40HrFLP.list | 512 ++++++ .../lists/104-pashto/train.LimitedLP.list | 131 ++ .../s5d/conf/lists/104-pashto/training.list | 1026 +++++++++++ egs/babel/s5d/conf/lists/105-turkish/dev.list | 127 ++ .../s5d/conf/lists/105-turkish/eval.list | 194 +++ .../s5d/conf/lists/105-turkish/evalpart1.list | 65 + .../conf/lists/105-turkish/train.FullLP.list | 993 +++++++++++ .../lists/105-turkish/train.LimitedLP.list | 128 ++ egs/babel/s5d/conf/lists/106-tagalog/dev.list | 146 ++ .../s5d/conf/lists/106-tagalog/eval.list | 241 +++ .../s5d/conf/lists/106-tagalog/evalpart1.list | 69 + .../conf/lists/106-tagalog/train.FullLP.list | 1138 +++++++++++++ .../lists/106-tagalog/train.LimitedLP.list | 134 ++ .../s5d/conf/lists/107-vietnamese/dev.list | 132 ++ .../s5d/conf/lists/107-vietnamese/eval.list | 981 +++++++++++ .../conf/lists/107-vietnamese/evalpart1.list | 194 +++ .../lists/107-vietnamese/train.FullLP.list | 1042 ++++++++++++ .../lists/107-vietnamese/train.LimitedLP.list | 126 ++ .../train.LimitedLP.untranscribed.list | 916 ++++++++++ egs/babel/s5d/conf/lists/201-haitian/dev.list | 126 ++ .../s5d/conf/lists/201-haitian/eval.list | 194 +++ .../s5d/conf/lists/201-haitian/evalpart1.list | 64 + .../conf/lists/201-haitian/train.FullLP.list | 760 +++++++++ .../lists/201-haitian/train.LimitedLP.list | 126 ++ .../train.LimitedLP.untranscribed.list | 634 +++++++ .../201-haitian/train.untranscribed.list | 270 +++ egs/babel/s5d/conf/lists/202-swahili/dev.list | 142 ++ .../s5d/conf/lists/202-swahili/eval.list | 963 +++++++++++ .../s5d/conf/lists/202-swahili/evalpart1.list | 196 +++ .../s5d/conf/lists/202-swahili/sub-train.list | 128 ++ .../202-swahili/sub-train.untranscribed.list | 397 +++++ .../s5d/conf/lists/202-swahili/training.list | 525 ++++++ .../202-swahili/untranscribed-training.list | 555 ++++++ egs/babel/s5d/conf/lists/203-lao/dev.list | 131 ++ egs/babel/s5d/conf/lists/203-lao/eval.list | 192 +++ .../s5d/conf/lists/203-lao/evalpart1.list | 70 + .../s5d/conf/lists/203-lao/train.FullLP.list | 781 +++++++++ .../conf/lists/203-lao/train.LimitedLP.list | 127 ++ .../train.LimitedLP.untranscribed.list | 654 +++++++ .../lists/203-lao/train.untranscribed.list | 257 +++ egs/babel/s5d/conf/lists/204-tamil/dev.list | 125 ++ egs/babel/s5d/conf/lists/204-tamil/eval.list | 947 +++++++++++ .../s5d/conf/lists/204-tamil/evalpart1.list | 186 ++ .../conf/lists/204-tamil/train.FullLP.list | 778 +++++++++ .../conf/lists/204-tamil/train.LimitedLP.list | 125 ++ .../train.LimitedLP.untranscribed.list | 653 +++++++ .../lists/204-tamil/train.untranscribed.list | 269 +++ .../s5d/conf/lists/205-kurmanji/dev.list | 132 ++ .../s5d/conf/lists/205-kurmanji/eval.list | 193 +++ .../conf/lists/205-kurmanji/evalpart1.list | 63 + .../conf/lists/205-kurmanji/sub-train.list | 133 ++ .../205-kurmanji/sub-train.untranscribed.list | 399 +++++ .../s5d/conf/lists/205-kurmanji/training.list | 532 ++++++ .../205-kurmanji/untranscribed-training.list | 521 ++++++ egs/babel/s5d/conf/lists/206-zulu/dev.list | 141 ++ egs/babel/s5d/conf/lists/206-zulu/eval.list | 202 +++ .../s5d/conf/lists/206-zulu/evalpart1.list | 72 + .../s5d/conf/lists/206-zulu/train.FullLP.list | 829 +++++++++ .../conf/lists/206-zulu/train.LimitedLP.list | 124 ++ .../train.LimitedLP.untranscribed.list | 705 ++++++++ .../lists/206-zulu/train.untranscribed.list | 285 ++++ .../s5d/conf/lists/207-tokpisin/dev.list | 132 ++ .../s5d/conf/lists/207-tokpisin/eval.list | 192 +++ .../conf/lists/207-tokpisin/evalpart1.list | 64 + .../conf/lists/207-tokpisin/sub-train.list | 126 ++ .../207-tokpisin/sub-train.untranscribed.list | 380 +++++ .../s5d/conf/lists/207-tokpisin/training.list | 506 ++++++ .../207-tokpisin/untranscribed-training.list | 539 ++++++ egs/babel/s5d/conf/lists/301-cebuano/dev.list | 134 ++ .../s5d/conf/lists/301-cebuano/eval.list | 190 +++ .../s5d/conf/lists/301-cebuano/evalpart1.list | 62 + .../s5d/conf/lists/301-cebuano/sub-train.list | 126 ++ .../301-cebuano/sub-train.untranscribed.list | 376 ++++ .../s5d/conf/lists/301-cebuano/training.list | 502 ++++++ .../301-cebuano/untranscribed-training.list | 548 ++++++ egs/babel/s5d/conf/lists/302-kazakh/dev.list | 140 ++ egs/babel/s5d/conf/lists/302-kazakh/eval.list | 191 +++ .../s5d/conf/lists/302-kazakh/evalpart1.list | 61 + .../s5d/conf/lists/302-kazakh/sub-train.list | 130 ++ .../302-kazakh/sub-train.untranscribed.list | 398 +++++ .../s5d/conf/lists/302-kazakh/training.list | 528 ++++++ .../302-kazakh/untranscribed-training.list | 569 +++++++ .../s5d/conf/lists/303-telugu/dev.2h.list | 126 ++ egs/babel/s5d/conf/lists/303-telugu/dev.list | 126 ++ egs/babel/s5d/conf/lists/303-telugu/eval.list | 192 +++ .../s5d/conf/lists/303-telugu/evalpart1.list | 62 + .../s5d/conf/lists/303-telugu/sub-train.list | 134 ++ .../303-telugu/sub-train.untranscribed.list | 380 +++++ .../s5d/conf/lists/303-telugu/training.list | 514 ++++++ .../303-telugu/untranscribed-training.list | 501 ++++++ .../s5d/conf/lists/304-lithuanian/dev.2h.list | 122 ++ .../s5d/conf/lists/304-lithuanian/dev.list | 122 ++ .../s5d/conf/lists/304-lithuanian/eval.list | 192 +++ .../conf/lists/304-lithuanian/evalpart1.list | 60 + .../conf/lists/304-lithuanian/sub-train.list | 120 ++ .../sub-train.untranscribed.list | 364 ++++ .../conf/lists/304-lithuanian/training.list | 484 ++++++ .../untranscribed-training.list | 524 ++++++ .../s5d/conf/lists/305-guarani/dev.2h.list | 124 ++ egs/babel/s5d/conf/lists/305-guarani/dev.list | 124 ++ .../s5d/conf/lists/305-guarani/eval.list | 186 ++ .../s5d/conf/lists/305-guarani/sub-train.list | 134 ++ .../305-guarani/sub-train.untranscribed.list | 392 +++++ .../s5d/conf/lists/305-guarani/training.list | 526 ++++++ .../305-guarani/untranscribed-training.list | 525 ++++++ egs/babel/s5d/conf/lists/306-igbo/dev.2h.list | 136 ++ egs/babel/s5d/conf/lists/306-igbo/dev.list | 136 ++ egs/babel/s5d/conf/lists/306-igbo/eval.list | 194 +++ .../s5d/conf/lists/306-igbo/sub-train.list | 132 ++ .../306-igbo/sub-train.untranscribed.list | 380 +++++ .../s5d/conf/lists/306-igbo/training.list | 512 ++++++ .../306-igbo/untranscribed-training.list | 537 ++++++ .../s5d/conf/lists/307-amharic/dev.2h.list | 123 ++ egs/babel/s5d/conf/lists/307-amharic/dev.list | 123 ++ .../s5d/conf/lists/307-amharic/eval.list | 186 ++ .../s5d/conf/lists/307-amharic/sub-train.list | 122 ++ .../307-amharic/sub-train.untranscribed.list | 364 ++++ .../s5d/conf/lists/307-amharic/training.list | 486 ++++++ .../307-amharic/untranscribed-training.list | 568 +++++++ .../s5d/conf/lists/401-mongolian/dev.2h.list | 124 ++ .../s5d/conf/lists/401-mongolian/dev.list | 124 ++ .../s5d/conf/lists/401-mongolian/eval.list | 186 ++ .../conf/lists/401-mongolian/sub-train.list | 126 ++ .../sub-train.untranscribed.list | 392 +++++ .../conf/lists/401-mongolian/training.list | 518 ++++++ .../401-mongolian/untranscribed-training.list | 530 ++++++ .../s5d/conf/lists/402-javanese/dev.2h.list | 122 ++ .../s5d/conf/lists/402-javanese/dev.list | 122 ++ .../s5d/conf/lists/402-javanese/eval.list | 188 ++ .../conf/lists/402-javanese/sub-train.list | 122 ++ .../402-javanese/sub-train.untranscribed.list | 370 ++++ .../s5d/conf/lists/402-javanese/training.list | 492 ++++++ .../402-javanese/untranscribed-training.list | 519 ++++++ .../s5d/conf/lists/403-dholuo/dev.2h.list | 122 ++ egs/babel/s5d/conf/lists/403-dholuo/dev.list | 122 ++ egs/babel/s5d/conf/lists/403-dholuo/eval.list | 182 ++ .../s5d/conf/lists/403-dholuo/sub-train.list | 122 ++ .../403-dholuo/sub-train.untranscribed.list | 380 +++++ .../s5d/conf/lists/403-dholuo/training.list | 502 ++++++ .../403-dholuo/untranscribed-training.list | 533 ++++++ egs/babel/s5d/conf/mfcc.conf | 2 + egs/babel/s5d/conf/mfcc_hires.conf | 10 + egs/babel/s5d/conf/online_cmvn.conf | 1 + egs/babel/s5d/conf/pitch.conf | 1 + egs/babel/s5d/conf/plp.conf | 1 + egs/babel/s5d/conf/slurm.bluecrab.conf | 11 + egs/babel/s5d/local/ali_to_rttm.sh | 80 + .../s5d/local/annotated_kwlist_to_KWs.pl | 124 ++ egs/babel/s5d/local/apply_g2p.sh | 127 ++ .../s5d/local/apply_map_tab_preserving.pl | 94 + egs/babel/s5d/local/arpa2G.sh | 115 ++ egs/babel/s5d/local/augment_original_stm.pl | 110 ++ egs/babel/s5d/local/best_path_weights.sh | 142 ++ egs/babel/s5d/local/best_scores.sh | 43 + egs/babel/s5d/local/best_scores_kws.sh | 179 ++ .../s5d/local/build_edit_distance_fst.pl | 127 ++ egs/babel/s5d/local/chain/run_blstm.sh | 180 ++ egs/babel/s5d/local/chain/run_blstm_bab1.sh | 180 ++ egs/babel/s5d/local/chain/run_blstm_bab2.sh | 180 ++ egs/babel/s5d/local/chain/run_blstm_bab3.sh | 180 ++ egs/babel/s5d/local/chain/run_blstm_bab4.sh | 179 ++ egs/babel/s5d/local/chain/run_blstm_bab5.sh | 179 ++ .../s5d/local/chain/run_ivector_common.sh | 240 +++ egs/babel/s5d/local/chain/run_tdnn.sh | 177 ++ egs/babel/s5d/local/chain/run_tdnn_bab1.sh | 177 ++ egs/babel/s5d/local/chain/run_tdnn_bab2.sh | 177 ++ egs/babel/s5d/local/chain/run_tdnn_bab3.sh | 178 ++ egs/babel/s5d/local/chain/run_tdnn_bab4.sh | 177 ++ egs/babel/s5d/local/check_models.sh | 34 + egs/babel/s5d/local/check_tools.sh | 40 + egs/babel/s5d/local/check_wers.sh | 50 + egs/babel/s5d/local/cmu_uem2kaldi_dir.sh | 124 ++ egs/babel/s5d/local/count_to_logprob.pl | 94 + egs/babel/s5d/local/create_shadow_dataset.sh | 176 ++ egs/babel/s5d/local/cstr_ndx2flist.pl | 54 + egs/babel/s5d/local/ctm2segments.pl | 159 ++ egs/babel/s5d/local/datasets/basic_kws.sh | 28 + egs/babel/s5d/local/datasets/extra_kws.sh | 137 ++ .../s5d/local/datasets/supervised_pem.sh | 35 + .../s5d/local/datasets/supervised_seg.sh | 90 + .../s5d/local/datasets/supervised_uem.sh | 36 + .../s5d/local/datasets/unsupervised_seg.sh | 1 + .../s5d/local/datasets/unsupervised_uem.sh | 1 + egs/babel/s5d/local/datasets/vocab_kws.sh | 51 + egs/babel/s5d/local/decode_helper.sh | 32 + egs/babel/s5d/local/eval_kw_subsets.sh | 4 + egs/babel/s5d/local/extend_lexicon.sh | 572 +++++++ egs/babel/s5d/local/extract_oov_words.pl | 70 + egs/babel/s5d/local/filter_keywords.pl | 68 + egs/babel/s5d/local/filter_kwslist.pl | 55 + egs/babel/s5d/local/fix_kwslist.pl | 89 + .../s5d/local/generate_confusion_matrix.sh | 102 ++ egs/babel/s5d/local/generate_example_kws.sh | 110 ++ .../local/generate_phoneme_transcription.sh | 86 + .../s5d/local/generate_proxy_keywords.sh | 176 ++ egs/babel/s5d/local/kaldi_dir2uem.py | 101 ++ egs/babel/s5d/local/kwords2indices.pl | 123 ++ egs/babel/s5d/local/kws_combine.sh | 119 ++ egs/babel/s5d/local/kws_data_prep.sh | 142 ++ egs/babel/s5d/local/kws_data_prep_proxy.sh | 270 +++ .../s5d/local/kws_gen_oracle_lattices.sh | 56 + egs/babel/s5d/local/kws_oracle.sh | 136 ++ egs/babel/s5d/local/kws_oracle_threshold.pl | 200 +++ egs/babel/s5d/local/kws_score.sh | 1 + egs/babel/s5d/local/kws_score_f4de.sh | 96 ++ egs/babel/s5d/local/kws_search.sh | 230 +++ egs/babel/s5d/local/kws_setup.sh | 158 ++ egs/babel/s5d/local/lattice_to_ctm.sh | 109 ++ .../s5d/local/lexicon/make_unicode_lexicon.py | 469 +++++ egs/babel/s5d/local/lexicon/make_word_list.py | 93 + egs/babel/s5d/local/lonestar.py | 333 ++++ egs/babel/s5d/local/make_L_align.sh | 54 + egs/babel/s5d/local/make_corpus_subset.sh | 96 ++ egs/babel/s5d/local/make_ecf_subset.sh | 52 + .../s5d/local/make_lexicon_fst_special.pl | 53 + egs/babel/s5d/local/make_lexicon_subset.sh | 30 + egs/babel/s5d/local/make_wordlist.sh | 14 + egs/babel/s5d/local/map_lang.sh | 81 + egs/babel/s5d/local/naive_comb.pl | 234 +++ .../local/nist_eval/create_compound_set.sh | 164 ++ .../create_new_language_configs.FLP.sh | 236 +++ .../create_new_language_configs.LLP.sh | 204 +++ .../s5d/local/nist_eval/export_systems.sh | 33 + egs/babel/s5d/local/nist_eval/filter_data.sh | 152 ++ .../s5d/local/nist_eval/get_training_times.sh | 229 +++ egs/babel/s5d/local/nist_eval/make_release.sh | 356 ++++ .../s5d/local/nist_eval/split_compound_set.sh | 53 + .../local/nnet2/get_egs_semi_supervised.sh | 374 ++++ egs/babel/s5d/local/nnet3/run_blstm.sh | 29 + .../s5d/local/nnet3/run_blstm_realigned.sh | 32 + .../s5d/local/nnet3/run_ivector_common.sh | 137 ++ .../run_ivector_multicondition_common.sh | 208 +++ egs/babel/s5d/local/nnet3/run_lstm.sh | 156 ++ .../s5d/local/nnet3/run_lstm_realigned.sh | 149 ++ egs/babel/s5d/local/nnet3/run_tdnn.sh | 83 + egs/babel/s5d/local/normalize_transcript.pl | 59 + .../s5d/local/optimize/OptimizeParams.pm | 631 +++++++ egs/babel/s5d/local/optimize2.pl | 152 ++ .../local/prepare_acoustic_training_data.pl | 484 ++++++ .../s5d/local/prepare_extended_lexicon.sh | 30 + egs/babel/s5d/local/prepare_lexicon.pl | 404 +++++ egs/babel/s5d/local/prepare_stm.pl | 345 ++++ .../s5d/local/prepare_unicode_lexicon.py | 198 +++ egs/babel/s5d/local/reestimate_langp.sh | 33 + .../local/resegment/evaluate_segmentation.pl | 198 +++ .../s5d/local/resegment/generate_segments.sh | 156 ++ egs/babel/s5d/local/resegment/segmentation.py | 1508 +++++++++++++++++ .../s5d/local/resegment/train_segmentation.sh | 63 + egs/babel/s5d/local/rttm_to_text.pl | 151 ++ .../s5d/local/run_cleanup_segmentation.sh | 56 + egs/babel/s5d/local/run_kws_stt_task.sh | 99 ++ egs/babel/s5d/local/run_kws_stt_task2.sh | 124 ++ egs/babel/s5d/local/score.sh | 1 + egs/babel/s5d/local/score_combine.sh | 181 ++ egs/babel/s5d/local/score_map.sh | 64 + egs/babel/s5d/local/score_mbr.sh | 58 + egs/babel/s5d/local/score_sctk_prune.sh | 138 ++ egs/babel/s5d/local/score_stm.sh | 103 ++ egs/babel/s5d/local/search/analyze_stats.pl | 219 +++ egs/babel/s5d/local/search/annotate_kwlist.pl | 166 ++ egs/babel/s5d/local/search/combine.sh | 258 +++ egs/babel/s5d/local/search/combine_results.pl | 422 +++++ egs/babel/s5d/local/search/combine_special.sh | 200 +++ .../s5d/local/search/compile_keywords.sh | 54 + .../local/search/compile_proxy_keywords.sh | 271 +++ .../s5d/local/search/create_categories.pl | 112 ++ .../s5d/local/search/filter_by_category.pl | 360 ++++ .../s5d/local/search/filter_kws_results.pl | 189 +++ egs/babel/s5d/local/search/normalize.sh | 89 + .../s5d/local/search/normalize_categories.pl | 89 + .../s5d/local/search/normalize_results_kst.pl | 203 +++ .../s5d/local/search/per_category_stats.pl | 326 ++++ .../s5d/local/search/rttm_to_hitlists.sh | 107 ++ egs/babel/s5d/local/search/run_phn_search.sh | 135 ++ egs/babel/s5d/local/search/run_search.sh | 136 ++ egs/babel/s5d/local/search/run_syll_search.sh | 135 ++ egs/babel/s5d/local/search/score.sh | 143 ++ egs/babel/s5d/local/search/search.sh | 206 +++ egs/babel/s5d/local/search/setup.sh | 118 ++ egs/babel/s5d/local/search/utt_to_files.pl | 62 + egs/babel/s5d/local/search/write_kwslist.pl | 134 ++ egs/babel/s5d/local/search_index.sh | 51 + egs/babel/s5d/local/setup_categories.sh | 36 + egs/babel/s5d/local/shadow_set_kws_search.sh | 265 +++ egs/babel/s5d/local/show_lattice.sh | 34 + egs/babel/s5d/local/split_ctms.sh | 65 + egs/babel/s5d/local/stm2text.pl | 43 + egs/babel/s5d/local/subset_atwv.pl | 120 ++ egs/babel/s5d/local/subset_kwslist.pl | 33 + egs/babel/s5d/local/summarize_logs.pl | 121 ++ egs/babel/s5d/local/syllab/ali_to_syllabs.sh | 71 + .../s5d/local/syllab/create_syll_datadir.sh | 55 + .../s5d/local/syllab/create_syllables.pl | 154 ++ .../s5d/local/syllab/generate_phone_lang.sh | 129 ++ .../local/syllab/generate_syllable_lang.sh | 129 ++ .../s5d/local/syllab/lattice_word2syll.sh | 57 + .../local/syllab/map_prons_to_syllables.pl | 61 + egs/babel/s5d/local/syllab/run_phones.sh | 67 + egs/babel/s5d/local/syllab/run_syllabs.sh | 67 + egs/babel/s5d/local/train_g2p.sh | 94 + egs/babel/s5d/local/train_lms_srilm.sh | 229 +++ egs/babel/s5d/local/txt_to_rttm.pl | 108 ++ egs/babel/s5d/local/uem_ctm2segments.pl | 232 +++ egs/babel/s5d/nnet3_examples.sh | 32 + egs/babel/s5d/path.sh | 7 + egs/babel/s5d/results/RESULTS.105-turkish.flp | 29 + egs/babel/s5d/results/RESULTS.106-tagalog.flp | 34 + .../s5d/results/RESULTS.107-vietnamese.flp | 50 + ....jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 | 211 +++ ....jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 | 100 ++ ....jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 | 100 ++ ....jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 | 100 ++ ....jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 | 100 ++ ....jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 | 100 ++ ....jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 | 100 ++ ...f.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 | 28 + ...f.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 | 27 + ...f.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 | 22 + ...f.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 | 22 + ....jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 | 242 +++ ...f.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 | 22 + ...f.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 | 22 + ...f.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 | 21 + ...f.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 | 21 + ...f.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 | 27 + ...f.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 | 14 + ...f.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 | 8 + ...f.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 | 96 ++ ...f.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 | 14 + ...f.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 | 34 + ....jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 | 43 + ....jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 | 52 + ....jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 | 48 + ....jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 | 34 + ....jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 | 41 + ....jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 | 54 + egs/babel/s5d/run-1-main-extend-lex.sh | 190 +++ egs/babel/s5d/run-1-main-unicode.sh | 385 +++++ egs/babel/s5d/run-1-main.sh | 363 ++++ egs/babel/s5d/run-2-segmentation.sh | 107 ++ egs/babel/s5d/run-2a-nnet-cpu.sh | 34 + egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh | 46 + egs/babel/s5d/run-2a-nnet-gpu.sh | 36 + egs/babel/s5d/run-2a-nnet-mpe.sh | 50 + egs/babel/s5d/run-2b-bnf.sh | 150 ++ egs/babel/s5d/run-3a-nnet-mpe.sh | 54 + egs/babel/s5d/run-3b-bnf-nnet.sh | 86 + egs/babel/s5d/run-3b-bnf-sgmm.sh | 97 ++ egs/babel/s5d/run-4-anydecode.sh | 724 ++++++++ egs/babel/s5d/run-4-phn-anydecode.sh | 613 +++++++ egs/babel/s5d/run-4-syll-anydecode.sh | 613 +++++++ egs/babel/s5d/run-4b-anydecode-bnf.sh | 259 +++ egs/babel/s5d/run-6-combine.sh | 73 + egs/babel/s5d/steps | 1 + egs/babel/s5d/utils | 1 + .../s5/local/nnet3/run_ivector_common.sh | 4 +- egs/wsj/s5/steps/make_index.sh | 27 +- egs/wsj/s5/steps/make_plp_pitch.sh | 2 +- egs/wsj/s5/steps/nnet2/get_egs.sh | 14 +- .../s5/steps/nnet2/train_discriminative.sh | 4 +- .../s5/steps/nnet2/train_pnorm_ensemble.sh | 2 +- egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh | 2 +- .../s5/steps/nnet2/train_tanh_bottleneck.sh | 5 +- egs/wsj/s5/steps/nnet3/make_tdnn_configs.py | 2 +- egs/wsj/s5/steps/search_index.sh | 15 +- egs/wsj/s5/utils/dict_dir_add_pronprobs.sh | 4 +- egs/wsj/s5/utils/make_lexicon_fst.pl | 23 +- egs/wsj/s5/utils/slurm.pl | 5 +- egs/wsj/s5/utils/write_kwslist.pl | 15 +- src/Makefile | 2 +- src/fstbin/Makefile | 3 +- src/fstbin/fsts-project.cc | 82 + src/fstbin/fsts-to-transcripts.cc | 29 +- src/fstbin/fsts-union.cc | 98 ++ src/kws/kws-functions.cc | 58 +- src/kws/kws-functions.h | 2 + src/kwsbin/Makefile | 3 +- src/kwsbin/compute-atwv.cc | 36 +- src/kwsbin/generate-proxy-keywords.cc | 6 +- src/kwsbin/kws-index-union.cc | 21 +- src/kwsbin/kws-search.cc | 194 ++- src/kwsbin/lattice-to-kws-index.cc | 48 +- src/kwsbin/print-proxy-keywords.cc | 134 ++ src/kwsbin/transcripts-to-fsts.cc | 70 +- src/nnet/nnet-various.h | 4 +- src/nnet3/nnet-example-utils.h | 2 +- tools/extras/travis_script.sh | 10 +- windows/get_version.pl | 6 +- 566 files changed, 88103 insertions(+), 1066 deletions(-) delete mode 100644 egs/babel/s5/RESULTS delete mode 100644 egs/babel/s5b/RESULTS delete mode 100644 egs/babel/s5c/RESULTS delete mode 100755 egs/babel/s5c/local/get_syllable_text.sh delete mode 100755 egs/babel/s5c/local/kws_data_prep_syllables.sh delete mode 100755 egs/babel/s5c/local/lattice_to_ctm_syllable.sh delete mode 100755 egs/babel/s5c/local/make_syllable_lexicon.sh create mode 100755 egs/babel/s5c/local/syllab/ali_to_syllabs.sh create mode 100755 egs/babel/s5c/local/syllab/create_syllables.pl create mode 100755 egs/babel/s5c/local/syllab/generate_syllable_lang.sh create mode 100755 egs/babel/s5c/local/syllab/map_prons_to_syllables.pl create mode 100644 egs/babel/s5c/results/RESULTS.105-turkish.flp create mode 100644 egs/babel/s5c/results/RESULTS.106-tagalog.flp create mode 100644 egs/babel/s5c/results/RESULTS.107-vietnamese.flp create mode 100644 egs/babel/s5d/EXAMPLE.vietnamese create mode 100644 egs/babel/s5d/README.txt create mode 100644 egs/babel/s5d/RESULTS.txt create mode 100644 egs/babel/s5d/RUN_UNICODE_SYSTEM create mode 100644 egs/babel/s5d/UNICODE_README create mode 100644 egs/babel/s5d/babel.html create mode 100644 egs/babel/s5d/cmd.sh create mode 100755 egs/babel/s5d/conf/bnf/config_full.py create mode 100755 egs/babel/s5d/conf/bnf/config_limited.py create mode 100644 egs/babel/s5d/conf/common.fullLP create mode 100644 egs/babel/s5d/conf/common.limitedLP create mode 100644 egs/babel/s5d/conf/common.semisupervised.limitedLP create mode 100644 egs/babel/s5d/conf/common_vars.sh create mode 100644 egs/babel/s5d/conf/glm create mode 100644 egs/babel/s5d/conf/lang/101-cantonese-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/101-cantonese-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/102-assamese-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/102-assamese-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/103-bengali-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/103-bengali-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/104-pashto-fullLP-40hrs.official.conf create mode 100644 egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/104-pashto-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/105-turkish-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/106-tagalog-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/106-tagalog-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/107-vietnamese-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/107-vietnamese-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/201-haitian-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/201-haitian-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/202-swahili.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/202-swahili.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/203-lao-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/203-lao-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/204-tamil-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/204-tamil-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/205-kurmanji.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/205-kurmanji.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/206-zulu-fullLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/206-zulu-limitedLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/207-tokpisin.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/207-tokpisin.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/301-cebuano.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/301-cebuano.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/302-kazakh.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/302-kazakh.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/303-telugu.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/303-telugu.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/304-lithuanian.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/304-lithuanian.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/305-guarani.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/306-igbo.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/307-amharic.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/401-mongolian.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/402-javanese.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf create mode 100644 egs/babel/s5d/conf/lang/403-dholuo.LLP.official.conf create mode 100644 egs/babel/s5d/conf/lists/101-cantonese/dev.list create mode 100644 egs/babel/s5d/conf/lists/101-cantonese/eval.list create mode 100644 egs/babel/s5d/conf/lists/101-cantonese/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/101-cantonese/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/101-cantonese/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/dev.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/eval.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/102-assamese/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/dev.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/eval.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/103-bengali/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/dev.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/eval.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/train.40HrFLP.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/104-pashto/training.list create mode 100644 egs/babel/s5d/conf/lists/105-turkish/dev.list create mode 100644 egs/babel/s5d/conf/lists/105-turkish/eval.list create mode 100644 egs/babel/s5d/conf/lists/105-turkish/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/105-turkish/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/105-turkish/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/106-tagalog/dev.list create mode 100644 egs/babel/s5d/conf/lists/106-tagalog/eval.list create mode 100644 egs/babel/s5d/conf/lists/106-tagalog/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/106-tagalog/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/106-tagalog/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/dev.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/eval.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/dev.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/eval.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/201-haitian/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/dev.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/eval.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/training.list create mode 100644 egs/babel/s5d/conf/lists/202-swahili/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/dev.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/eval.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/203-lao/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/dev.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/eval.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/204-tamil/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/dev.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/eval.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/training.list create mode 100644 egs/babel/s5d/conf/lists/205-kurmanji/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/dev.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/eval.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/train.FullLP.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/206-zulu/train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/dev.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/eval.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/training.list create mode 100644 egs/babel/s5d/conf/lists/207-tokpisin/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/dev.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/eval.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/training.list create mode 100644 egs/babel/s5d/conf/lists/301-cebuano/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/dev.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/eval.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/training.list create mode 100644 egs/babel/s5d/conf/lists/302-kazakh/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/dev.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/eval.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/training.list create mode 100644 egs/babel/s5d/conf/lists/303-telugu/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/dev.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/eval.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/evalpart1.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/training.list create mode 100644 egs/babel/s5d/conf/lists/304-lithuanian/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/dev.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/eval.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/training.list create mode 100644 egs/babel/s5d/conf/lists/305-guarani/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/dev.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/eval.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/training.list create mode 100644 egs/babel/s5d/conf/lists/306-igbo/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/dev.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/eval.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/training.list create mode 100644 egs/babel/s5d/conf/lists/307-amharic/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/dev.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/eval.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/training.list create mode 100644 egs/babel/s5d/conf/lists/401-mongolian/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/dev.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/eval.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/training.list create mode 100644 egs/babel/s5d/conf/lists/402-javanese/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/dev.2h.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/dev.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/eval.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/sub-train.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/sub-train.untranscribed.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/training.list create mode 100644 egs/babel/s5d/conf/lists/403-dholuo/untranscribed-training.list create mode 100644 egs/babel/s5d/conf/mfcc.conf create mode 100644 egs/babel/s5d/conf/mfcc_hires.conf create mode 100644 egs/babel/s5d/conf/online_cmvn.conf create mode 100644 egs/babel/s5d/conf/pitch.conf create mode 100644 egs/babel/s5d/conf/plp.conf create mode 100644 egs/babel/s5d/conf/slurm.bluecrab.conf create mode 100755 egs/babel/s5d/local/ali_to_rttm.sh create mode 100755 egs/babel/s5d/local/annotated_kwlist_to_KWs.pl create mode 100755 egs/babel/s5d/local/apply_g2p.sh create mode 100755 egs/babel/s5d/local/apply_map_tab_preserving.pl create mode 100755 egs/babel/s5d/local/arpa2G.sh create mode 100755 egs/babel/s5d/local/augment_original_stm.pl create mode 100755 egs/babel/s5d/local/best_path_weights.sh create mode 100755 egs/babel/s5d/local/best_scores.sh create mode 100755 egs/babel/s5d/local/best_scores_kws.sh create mode 100755 egs/babel/s5d/local/build_edit_distance_fst.pl create mode 100755 egs/babel/s5d/local/chain/run_blstm.sh create mode 100755 egs/babel/s5d/local/chain/run_blstm_bab1.sh create mode 100755 egs/babel/s5d/local/chain/run_blstm_bab2.sh create mode 100755 egs/babel/s5d/local/chain/run_blstm_bab3.sh create mode 100755 egs/babel/s5d/local/chain/run_blstm_bab4.sh create mode 100755 egs/babel/s5d/local/chain/run_blstm_bab5.sh create mode 100755 egs/babel/s5d/local/chain/run_ivector_common.sh create mode 100755 egs/babel/s5d/local/chain/run_tdnn.sh create mode 100755 egs/babel/s5d/local/chain/run_tdnn_bab1.sh create mode 100755 egs/babel/s5d/local/chain/run_tdnn_bab2.sh create mode 100755 egs/babel/s5d/local/chain/run_tdnn_bab3.sh create mode 100755 egs/babel/s5d/local/chain/run_tdnn_bab4.sh create mode 100755 egs/babel/s5d/local/check_models.sh create mode 100755 egs/babel/s5d/local/check_tools.sh create mode 100755 egs/babel/s5d/local/check_wers.sh create mode 100755 egs/babel/s5d/local/cmu_uem2kaldi_dir.sh create mode 100755 egs/babel/s5d/local/count_to_logprob.pl create mode 100755 egs/babel/s5d/local/create_shadow_dataset.sh create mode 100755 egs/babel/s5d/local/cstr_ndx2flist.pl create mode 100755 egs/babel/s5d/local/ctm2segments.pl create mode 100644 egs/babel/s5d/local/datasets/basic_kws.sh create mode 100644 egs/babel/s5d/local/datasets/extra_kws.sh create mode 100644 egs/babel/s5d/local/datasets/supervised_pem.sh create mode 100644 egs/babel/s5d/local/datasets/supervised_seg.sh create mode 100644 egs/babel/s5d/local/datasets/supervised_uem.sh create mode 120000 egs/babel/s5d/local/datasets/unsupervised_seg.sh create mode 120000 egs/babel/s5d/local/datasets/unsupervised_uem.sh create mode 100644 egs/babel/s5d/local/datasets/vocab_kws.sh create mode 100755 egs/babel/s5d/local/decode_helper.sh create mode 100755 egs/babel/s5d/local/eval_kw_subsets.sh create mode 100755 egs/babel/s5d/local/extend_lexicon.sh create mode 100755 egs/babel/s5d/local/extract_oov_words.pl create mode 100755 egs/babel/s5d/local/filter_keywords.pl create mode 100755 egs/babel/s5d/local/filter_kwslist.pl create mode 100755 egs/babel/s5d/local/fix_kwslist.pl create mode 100755 egs/babel/s5d/local/generate_confusion_matrix.sh create mode 100755 egs/babel/s5d/local/generate_example_kws.sh create mode 100755 egs/babel/s5d/local/generate_phoneme_transcription.sh create mode 100755 egs/babel/s5d/local/generate_proxy_keywords.sh create mode 100755 egs/babel/s5d/local/kaldi_dir2uem.py create mode 100755 egs/babel/s5d/local/kwords2indices.pl create mode 100755 egs/babel/s5d/local/kws_combine.sh create mode 100755 egs/babel/s5d/local/kws_data_prep.sh create mode 100755 egs/babel/s5d/local/kws_data_prep_proxy.sh create mode 100755 egs/babel/s5d/local/kws_gen_oracle_lattices.sh create mode 100755 egs/babel/s5d/local/kws_oracle.sh create mode 100755 egs/babel/s5d/local/kws_oracle_threshold.pl create mode 120000 egs/babel/s5d/local/kws_score.sh create mode 100755 egs/babel/s5d/local/kws_score_f4de.sh create mode 100755 egs/babel/s5d/local/kws_search.sh create mode 100755 egs/babel/s5d/local/kws_setup.sh create mode 100755 egs/babel/s5d/local/lattice_to_ctm.sh create mode 100755 egs/babel/s5d/local/lexicon/make_unicode_lexicon.py create mode 100755 egs/babel/s5d/local/lexicon/make_word_list.py create mode 100755 egs/babel/s5d/local/lonestar.py create mode 100755 egs/babel/s5d/local/make_L_align.sh create mode 100755 egs/babel/s5d/local/make_corpus_subset.sh create mode 100755 egs/babel/s5d/local/make_ecf_subset.sh create mode 100755 egs/babel/s5d/local/make_lexicon_fst_special.pl create mode 100755 egs/babel/s5d/local/make_lexicon_subset.sh create mode 100644 egs/babel/s5d/local/make_wordlist.sh create mode 100755 egs/babel/s5d/local/map_lang.sh create mode 100755 egs/babel/s5d/local/naive_comb.pl create mode 100755 egs/babel/s5d/local/nist_eval/create_compound_set.sh create mode 100755 egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh create mode 100755 egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh create mode 100755 egs/babel/s5d/local/nist_eval/export_systems.sh create mode 100755 egs/babel/s5d/local/nist_eval/filter_data.sh create mode 100755 egs/babel/s5d/local/nist_eval/get_training_times.sh create mode 100755 egs/babel/s5d/local/nist_eval/make_release.sh create mode 100755 egs/babel/s5d/local/nist_eval/split_compound_set.sh create mode 100755 egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh create mode 100755 egs/babel/s5d/local/nnet3/run_blstm.sh create mode 100755 egs/babel/s5d/local/nnet3/run_blstm_realigned.sh create mode 100755 egs/babel/s5d/local/nnet3/run_ivector_common.sh create mode 100755 egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh create mode 100755 egs/babel/s5d/local/nnet3/run_lstm.sh create mode 100755 egs/babel/s5d/local/nnet3/run_lstm_realigned.sh create mode 100755 egs/babel/s5d/local/nnet3/run_tdnn.sh create mode 100755 egs/babel/s5d/local/normalize_transcript.pl create mode 100644 egs/babel/s5d/local/optimize/OptimizeParams.pm create mode 100755 egs/babel/s5d/local/optimize2.pl create mode 100755 egs/babel/s5d/local/prepare_acoustic_training_data.pl create mode 100644 egs/babel/s5d/local/prepare_extended_lexicon.sh create mode 100755 egs/babel/s5d/local/prepare_lexicon.pl create mode 100755 egs/babel/s5d/local/prepare_stm.pl create mode 100755 egs/babel/s5d/local/prepare_unicode_lexicon.py create mode 100755 egs/babel/s5d/local/reestimate_langp.sh create mode 100755 egs/babel/s5d/local/resegment/evaluate_segmentation.pl create mode 100755 egs/babel/s5d/local/resegment/generate_segments.sh create mode 100755 egs/babel/s5d/local/resegment/segmentation.py create mode 100755 egs/babel/s5d/local/resegment/train_segmentation.sh create mode 100755 egs/babel/s5d/local/rttm_to_text.pl create mode 100755 egs/babel/s5d/local/run_cleanup_segmentation.sh create mode 100755 egs/babel/s5d/local/run_kws_stt_task.sh create mode 100755 egs/babel/s5d/local/run_kws_stt_task2.sh create mode 120000 egs/babel/s5d/local/score.sh create mode 100755 egs/babel/s5d/local/score_combine.sh create mode 100755 egs/babel/s5d/local/score_map.sh create mode 100755 egs/babel/s5d/local/score_mbr.sh create mode 100755 egs/babel/s5d/local/score_sctk_prune.sh create mode 100755 egs/babel/s5d/local/score_stm.sh create mode 100755 egs/babel/s5d/local/search/analyze_stats.pl create mode 100755 egs/babel/s5d/local/search/annotate_kwlist.pl create mode 100755 egs/babel/s5d/local/search/combine.sh create mode 100755 egs/babel/s5d/local/search/combine_results.pl create mode 100755 egs/babel/s5d/local/search/combine_special.sh create mode 100755 egs/babel/s5d/local/search/compile_keywords.sh create mode 100755 egs/babel/s5d/local/search/compile_proxy_keywords.sh create mode 100755 egs/babel/s5d/local/search/create_categories.pl create mode 100755 egs/babel/s5d/local/search/filter_by_category.pl create mode 100755 egs/babel/s5d/local/search/filter_kws_results.pl create mode 100755 egs/babel/s5d/local/search/normalize.sh create mode 100755 egs/babel/s5d/local/search/normalize_categories.pl create mode 100755 egs/babel/s5d/local/search/normalize_results_kst.pl create mode 100755 egs/babel/s5d/local/search/per_category_stats.pl create mode 100755 egs/babel/s5d/local/search/rttm_to_hitlists.sh create mode 100755 egs/babel/s5d/local/search/run_phn_search.sh create mode 100755 egs/babel/s5d/local/search/run_search.sh create mode 100755 egs/babel/s5d/local/search/run_syll_search.sh create mode 100755 egs/babel/s5d/local/search/score.sh create mode 100755 egs/babel/s5d/local/search/search.sh create mode 100755 egs/babel/s5d/local/search/setup.sh create mode 100755 egs/babel/s5d/local/search/utt_to_files.pl create mode 100755 egs/babel/s5d/local/search/write_kwslist.pl create mode 100755 egs/babel/s5d/local/search_index.sh create mode 100644 egs/babel/s5d/local/setup_categories.sh create mode 100755 egs/babel/s5d/local/shadow_set_kws_search.sh create mode 100755 egs/babel/s5d/local/show_lattice.sh create mode 100755 egs/babel/s5d/local/split_ctms.sh create mode 100755 egs/babel/s5d/local/stm2text.pl create mode 100755 egs/babel/s5d/local/subset_atwv.pl create mode 100755 egs/babel/s5d/local/subset_kwslist.pl create mode 100755 egs/babel/s5d/local/summarize_logs.pl create mode 100755 egs/babel/s5d/local/syllab/ali_to_syllabs.sh create mode 100755 egs/babel/s5d/local/syllab/create_syll_datadir.sh create mode 100755 egs/babel/s5d/local/syllab/create_syllables.pl create mode 100755 egs/babel/s5d/local/syllab/generate_phone_lang.sh create mode 100755 egs/babel/s5d/local/syllab/generate_syllable_lang.sh create mode 100755 egs/babel/s5d/local/syllab/lattice_word2syll.sh create mode 100755 egs/babel/s5d/local/syllab/map_prons_to_syllables.pl create mode 100755 egs/babel/s5d/local/syllab/run_phones.sh create mode 100755 egs/babel/s5d/local/syllab/run_syllabs.sh create mode 100755 egs/babel/s5d/local/train_g2p.sh create mode 100755 egs/babel/s5d/local/train_lms_srilm.sh create mode 100755 egs/babel/s5d/local/txt_to_rttm.pl create mode 100755 egs/babel/s5d/local/uem_ctm2segments.pl create mode 100644 egs/babel/s5d/nnet3_examples.sh create mode 100755 egs/babel/s5d/path.sh create mode 100644 egs/babel/s5d/results/RESULTS.105-turkish.flp create mode 100644 egs/babel/s5d/results/RESULTS.106-tagalog.flp create mode 100644 egs/babel/s5d/results/RESULTS.107-vietnamese.flp create mode 100644 egs/babel/s5d/results/kws_results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 create mode 100644 egs/babel/s5d/results/kws_results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 create mode 100644 egs/babel/s5d/results/kws_results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 create mode 100644 egs/babel/s5d/results/kws_results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 create mode 100644 egs/babel/s5d/results/kws_results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 create mode 100644 egs/babel/s5d/results/kws_results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 create mode 100644 egs/babel/s5d/results/kws_results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 create mode 100644 egs/babel/s5d/results/results.101-cantonese-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 create mode 100644 egs/babel/s5d/results/results.102-assamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 create mode 100644 egs/babel/s5d/results/results.103-bengali-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 create mode 100644 egs/babel/s5d/results/results.104-pashto-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 create mode 100644 egs/babel/s5d/results/results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 create mode 100644 egs/babel/s5d/results/results.105-turkish-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 create mode 100644 egs/babel/s5d/results/results.106-tagalog-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 create mode 100644 egs/babel/s5d/results/results.107-vietnamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 create mode 100644 egs/babel/s5d/results/results.201-haitian-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 create mode 100644 egs/babel/s5d/results/results.202-swahili.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 create mode 100644 egs/babel/s5d/results/results.203-lao-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 create mode 100644 egs/babel/s5d/results/results.204-tamil-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 create mode 100644 egs/babel/s5d/results/results.205-kurmanji.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 create mode 100644 egs/babel/s5d/results/results.206-zulu-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 create mode 100644 egs/babel/s5d/results/results.207-tokpisin.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 create mode 100644 egs/babel/s5d/results/results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 create mode 100644 egs/babel/s5d/results/results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 create mode 100644 egs/babel/s5d/results/results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 create mode 100644 egs/babel/s5d/results/results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 create mode 100644 egs/babel/s5d/results/results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 create mode 100644 egs/babel/s5d/results/results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 create mode 100755 egs/babel/s5d/run-1-main-extend-lex.sh create mode 100755 egs/babel/s5d/run-1-main-unicode.sh create mode 100755 egs/babel/s5d/run-1-main.sh create mode 100755 egs/babel/s5d/run-2-segmentation.sh create mode 100755 egs/babel/s5d/run-2a-nnet-cpu.sh create mode 100755 egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh create mode 100755 egs/babel/s5d/run-2a-nnet-gpu.sh create mode 100755 egs/babel/s5d/run-2a-nnet-mpe.sh create mode 100755 egs/babel/s5d/run-2b-bnf.sh create mode 100755 egs/babel/s5d/run-3a-nnet-mpe.sh create mode 100755 egs/babel/s5d/run-3b-bnf-nnet.sh create mode 100755 egs/babel/s5d/run-3b-bnf-sgmm.sh create mode 100755 egs/babel/s5d/run-4-anydecode.sh create mode 100755 egs/babel/s5d/run-4-phn-anydecode.sh create mode 100755 egs/babel/s5d/run-4-syll-anydecode.sh create mode 100755 egs/babel/s5d/run-4b-anydecode-bnf.sh create mode 100755 egs/babel/s5d/run-6-combine.sh create mode 120000 egs/babel/s5d/steps create mode 120000 egs/babel/s5d/utils create mode 100644 src/fstbin/fsts-project.cc create mode 100644 src/fstbin/fsts-union.cc create mode 100644 src/kwsbin/print-proxy-keywords.cc diff --git a/.gitignore b/.gitignore index d9c8d77600d..e6d9c0fd612 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,9 @@ GRTAGS GPATH GSYMS +# python compiled sources +*.pyc + # Make dependencies .depend.mk @@ -116,5 +119,13 @@ GSYMS /tools/pthreads*.zip /tools/sequitur /tools/srilm.tgz +/tools/liblbfgs-1.10.tar.gz +/tools/liblbfgs-1.10/ +/tools/openfst-1.5.0.tar.gz +/tools/openfst-1.5.0/ +/tools/srilm-1.7.2-beta.tar.gz +/tools/liblbfgs/ +/tools/sequitur-g2p/ /kaldiwin_vs* + diff --git a/egs/babel/s5/RESULTS b/egs/babel/s5/RESULTS deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/egs/babel/s5/local/make_pitch.sh b/egs/babel/s5/local/make_pitch.sh index 107016d78a9..f3597f504dd 100755 --- a/egs/babel/s5/local/make_pitch.sh +++ b/egs/babel/s5/local/make_pitch.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey) # Bagher BabaAli @@ -50,7 +50,7 @@ mkdir -p $expdir/log || exit 1; scp=$data/wav.scp -[ ! -s $KALDI_ROOT ] && KALDI_ROOT=../../.. +[ ! -s $KALDI_ROOT ] && KALDI_ROOT=../../.. ( # this is for back compatiblity: cd $KALDI_ROOT/tools @@ -92,7 +92,7 @@ done basename=`basename $data` wavdir=$pitchdir/temp_wav_$basename mkdir -p $wavdir - + if [ -f $data/segments ] || grep '|' $data/wav.scp >/dev/null; then wav_scp=$expdir/wav.scp cat $data/segments | awk -v dir=$wavdir '{key=$1; printf("%s %s/%s.wav\n", key, dir, key);}' \ @@ -104,7 +104,7 @@ if [ -f $data/segments ] || grep '|' $data/wav.scp >/dev/null; then else # create a fake segments file that takes the whole file; this is an easy way # to copy to static wav files. Note: probably this has not been tested. - cat $data/wav.scp | awk '{print $1, $1, 0.0, -1.0}' > $expdir/fake_segments + cat $data/wav.scp | awk '{print $1, $1, 0.0, -1.0}' > $expdir/fake_segments segments=$expdir/fake_segments fi if [ $stage -le 0 ]; then @@ -155,11 +155,11 @@ if [ $stage -le 1 ]; then fi # I don't want to put a separate script in svn just for this, so creating a temporary -# script file in the experimental directory. Quotes around 'EOF' disable any +# script file in the experimental directory. Quotes around 'EOF' disable any # interpretation in the here-doc. cat <<'EOF' > $expdir/convert.sh #!/bin/bash -sacc_flist=$1 +sacc_flist=$1 scpfile=$2 [ $# -ne 2 ] && echo "Usage: convert.sh " && exit 1; @@ -247,7 +247,7 @@ exit 0; # rm $expdir/.error 2>/dev/null # # for ((n=1; n<=nj; n++)); do -# # mkdir -p "$expdir/$n" +# # mkdir -p "$expdir/$n" # # done # # $cmd JOB=1:$nj $expdir/make_pitch.JOB.log \ @@ -297,8 +297,8 @@ exit 0; # rm $expdir/wav.*.scp $expdir/segments.* 2>/dev/null -# nf=`cat $data/pitchs.scp | wc -l` -# nu=`cat $data/utt2spk | wc -l` +# nf=`cat $data/pitchs.scp | wc -l` +# nu=`cat $data/utt2spk | wc -l` # if [ $nf -ne $nu ]; then # echo "It seems not all of the feature files were successfully ($nf != $nu);" # echo "consider using utils/fix_data_dir.sh $data" diff --git a/egs/babel/s5b/RESULTS b/egs/babel/s5b/RESULTS deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/egs/babel/s5c/RESULTS b/egs/babel/s5c/RESULTS deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/egs/babel/s5c/local/CHECKPOINT.sh b/egs/babel/s5c/local/CHECKPOINT.sh index 91b64d7fe1a..ed0ddd18399 100755 --- a/egs/babel/s5c/local/CHECKPOINT.sh +++ b/egs/babel/s5c/local/CHECKPOINT.sh @@ -1,11 +1,11 @@ #!/bin/bash function GETAPPROVAL { - until false ; do + until false ; do echo "Do you want to run the command (y/n)?" read -n 1 WISH - - if [ "$WISH" == "y" ]; then + + if [ "$WISH" == "y" ]; then return true; elif [ "$WISH" == "n" ]; then return false; @@ -21,11 +21,11 @@ function ESCAPE_PARAMS { if [[ "$v" == *"<"* ]]; then out="$out \"$v\"" - elif [[ "$v" == *">"* ]] ; then + elif [[ "$v" == *">"* ]] ; then out="$out \"$v\"" - elif [[ "$v" == *"|"* ]] ; then + elif [[ "$v" == *"|"* ]] ; then out="$out \'$v\'" - elif [[ "$v" == *" "* ]] ; then + elif [[ "$v" == *" "* ]] ; then out="$out \"$v\"" else out="$out $v" @@ -76,7 +76,7 @@ function CHECKPOINT { if [ !$INTERACTIVE_CHECKPOINT ] ; then eval `ESCAPE_PARAMS "$@"` - else + else APPROVAL=GETAPPROVAL if $APPROVAL ; then eval `ESCAPE_PARAMS $@` @@ -87,7 +87,7 @@ function CHECKPOINT { echo -e ${COLOR_RED}"CHECKPOINT FAILURE: The command returned non-zero status" >&2 echo -e " rerun the script with the parameter -c $LAST_GOOD_NAME=$COUNTER" >&2 echo -e "COMMAND">&2 - echo -e " " "$@" ${COLOR_RED} >&2 + echo -e " " "$@" ${COLOR_RED} >&2 exit 1 fi @@ -97,7 +97,7 @@ function CHECKPOINT { echo -e "$@"${COLOR_DEFAULT} >&2 fi - COUNTER=$(( $COUNTER + 1 )) + COUNTER=$(( $COUNTER + 1 )) eval export $COUNTER_NAME=$COUNTER } diff --git a/egs/babel/s5c/local/ali_to_rttm.sh b/egs/babel/s5c/local/ali_to_rttm.sh index 63cf8f44dc4..09df9a15805 100755 --- a/egs/babel/s5c/local/ali_to_rttm.sh +++ b/egs/babel/s5c/local/ali_to_rttm.sh @@ -42,7 +42,7 @@ if [ $# != 3 ]; then exit 1; fi -set -e +set -e set -o pipefail set -u @@ -65,7 +65,7 @@ fi $cmd $dir/log/align_to_words.log \ ali-to-phones $dir/final.mdl "ark:gunzip -c $dir/ali.*.gz|" ark,t:- \| \ phones-to-prons $lang/L_align.fst $wbegin $wend ark:- "ark,s:utils/sym2int.pl -f 2- --map-oov '$oov' $lang/words.txt <$data/text|" ark,t:- \| \ - prons-to-wordali ark:- "ark:ali-to-phones --write-lengths=true $dir/final.mdl 'ark:gunzip -c $dir/ali.*.gz|' ark,t:- |" ark,t:$dir/align.txt + prons-to-wordali ark:- "ark:ali-to-phones --write-lengths=true $dir/final.mdl 'ark:gunzip -c $dir/ali.*.gz|' ark,t:- |" ark,t:$dir/align.txt echo "$0: done writing alignments." diff --git a/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl b/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl index 198da36da5a..a4c80cef345 100755 --- a/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl +++ b/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl @@ -26,7 +26,7 @@ Allowed options: EOU -GetOptions(); +GetOptions(); @ARGV >= 2 || die $Usage; @@ -77,7 +77,7 @@ if ($count == 0) { $output .= "$value"; $count ++; next; - } + } if ($count == 6) { $output .= ", ..."; last; diff --git a/egs/babel/s5c/local/apply_g2p.sh b/egs/babel/s5c/local/apply_g2p.sh index f47274cb21c..385b1f3536e 100755 --- a/egs/babel/s5c/local/apply_g2p.sh +++ b/egs/babel/s5c/local/apply_g2p.sh @@ -2,7 +2,7 @@ # Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0 -# Begin configuration section. +# Begin configuration section. iters=5 stage=0 encoding='utf-8' @@ -82,15 +82,15 @@ cat $output/output.* > $output/output #Remap the words from output file back to the original casing #Conversion of some of thems might have failed, so we have to be careful #and use the transform_map file we generated beforehand -#Also, because the sequitur output is not readily usable as lexicon (it adds +#Also, because the sequitur output is not readily usable as lexicon (it adds #one more column with ordering of the pron. variants) convert it into the proper lexicon form output_lex=$output/lexicon.lex if [ ! -z $icu_transform ] ; then #also, the transform is generally N -> 1, i.e. we have to take #extra care of words that might have been mapped into the same one - perl -e 'open(WORDS, $ARGV[0]) or die "Could not open file $ARGV[0]"; - while() { chomp; @F=split; - if ($MAP{$F[0]} ) { push @{$MAP{$F[0]}}, $F[1]; } + perl -e 'open(WORDS, $ARGV[0]) or die "Could not open file $ARGV[0]"; + while() { chomp; @F=split; + if ($MAP{$F[0]} ) { push @{$MAP{$F[0]}}, $F[1]; } else { $MAP{$F[0]} = [$F[1]]; } } close(WORDS); @@ -101,7 +101,7 @@ if [ ! -z $icu_transform ] ; then next; } foreach $word (@{$MAP{$F[0]}} ) { - print "$word\t$F[2]\t$F[3]\n"; + print "$word\t$F[2]\t$F[3]\n"; } } close(LEX); diff --git a/egs/babel/s5c/local/apply_map_tab_preserving.pl b/egs/babel/s5c/local/apply_map_tab_preserving.pl index 2a3238c04a3..b57262f1930 100755 --- a/egs/babel/s5c/local/apply_map_tab_preserving.pl +++ b/egs/babel/s5c/local/apply_map_tab_preserving.pl @@ -12,8 +12,8 @@ # this version preserves tabs. if (@ARGV > 0 && $ARGV[0] eq "-f") { - shift @ARGV; - $field_spec = shift @ARGV; + shift @ARGV; + $field_spec = shift @ARGV; if ($field_spec =~ m/^\d+$/) { $field_begin = $field_spec - 1; $field_end = $field_spec - 1; } @@ -26,7 +26,7 @@ } } if (!defined $field_begin && !defined $field_end) { - die "Bad argument to -f option: $field_spec"; + die "Bad argument to -f option: $field_spec"; } } @@ -70,7 +70,7 @@ $field_offset = 0; for ($n = 0; $n < @A; $n++) { @B = split(" ", $A[$n]); - + for ($x = 0; $x < @B; $x++) { $y = $x + $field_offset; if ( (!defined $field_begin || $y >= $field_begin) @@ -78,12 +78,12 @@ $b = $B[$x]; if (!defined $map{$b}) { if (!$permissive) { - die "apply_map.pl: undefined key $a\n"; + die "apply_map.pl: undefined key $a\n"; } else { print STDERR "apply_map.pl: warning! missing key $a\n"; } } else { - $B[$x] = $map{$b}; + $B[$x] = $map{$b}; } } } diff --git a/egs/babel/s5c/local/augment_original_stm.pl b/egs/babel/s5c/local/augment_original_stm.pl index 4c58ccc6271..c5ad87fd286 100755 --- a/egs/babel/s5c/local/augment_original_stm.pl +++ b/egs/babel/s5c/local/augment_original_stm.pl @@ -8,7 +8,7 @@ #As a result, the scoring will be done on per-speaker basis as well #As the segment from segment mapping generally do not correspond to #the segmentation of the original STM file, it combines the files -#segments and utt2spk to work out the correct speaker ID for +#segments and utt2spk to work out the correct speaker ID for #the reference segment #In case of overlay, it will either use the previous speaker or #prints out an error message diff --git a/egs/babel/s5c/local/best_path_weights.sh b/egs/babel/s5c/local/best_path_weights.sh index 8e88a3610a4..52782ee3655 100755 --- a/egs/babel/s5c/local/best_path_weights.sh +++ b/egs/babel/s5c/local/best_path_weights.sh @@ -16,19 +16,19 @@ # limitations under the License. -# This script combines frame-level posteriors from different decode -# directories. The first decode directory is assumed to be the primary +# This script combines frame-level posteriors from different decode +# directories. The first decode directory is assumed to be the primary # and is used to get the best path. The posteriors from other decode -# directories are interpolated with the posteriors of the best path. -# The output is a new directory with final.mdl, tree from the primary -# decode-dir and the best path alignments and weights in a decode-directory +# directories are interpolated with the posteriors of the best path. +# The output is a new directory with final.mdl, tree from the primary +# decode-dir and the best path alignments and weights in a decode-directory # with the same basename as the primary directory. # This is typically used to get better posteriors for semisupervised training # of DNN -# e.g. local/combine_posteriors.sh exp/tri6_nnet/decode_train_unt.seg +# e.g. local/combine_posteriors.sh exp/tri6_nnet/decode_train_unt.seg # exp/sgmm_mmi_b0.1/decode_fmllr_train_unt.seg_it4 exp/combine_dnn_sgmm -# Here the final.mdl and tree are copied from exp/tri6_nnet to -# exp/combine_dnn_sgmm. best_path_ali.*.gz obtained from the primary dir and +# Here the final.mdl and tree are copied from exp/tri6_nnet to +# exp/combine_dnn_sgmm. best_path_ali.*.gz obtained from the primary dir and # the interpolated posteriors in weights.*.gz are placed in # exp/combine_dnn_sgmm/decode_train_unt.seg @@ -115,7 +115,7 @@ for i in `seq 0 $[num_sys-1]`; do echo $nj > $out_decode/num_jobs else if [ $nj != `cat $decode_dir/num_jobs` ]; then - echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" exit 1; fi fi diff --git a/egs/babel/s5c/local/check_models.sh b/egs/babel/s5c/local/check_models.sh index d02fc4e561a..88b3dacc94b 100755 --- a/egs/babel/s5c/local/check_models.sh +++ b/egs/babel/s5c/local/check_models.sh @@ -4,7 +4,7 @@ check_model () { model=$1 if [ -s $model ]; then echo $model - else + else dir=`dirname $model` latest_model=`ls -lt $dir/{?,??}.mdl 2>/dev/null | head -1 | awk '{print $9}'` echo "*$model is not there, latest is: $latest_model" diff --git a/egs/babel/s5c/local/check_wers.sh b/egs/babel/s5c/local/check_wers.sh index ebd6bb28790..10e1a89ee3a 100755 --- a/egs/babel/s5c/local/check_wers.sh +++ b/egs/babel/s5c/local/check_wers.sh @@ -4,7 +4,7 @@ check_wer () { dir=$1 - if [ -d $dir ]; then + if [ -d $dir ]; then seen_dir=false for ddir in $dir/decode*; do if [ -d $ddir ]; then @@ -34,7 +34,7 @@ for n in `seq 10`; do fi done -if [ $# != 0 ]; then +if [ $# != 0 ]; then echo "Usage: local/check_wers.sh [--final] [--char]" exit 1; fi diff --git a/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh b/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh index a8fcc39eba5..f320cfa19cd 100755 --- a/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh +++ b/egs/babel/s5c/local/cmu_uem2kaldi_dir.sh @@ -30,12 +30,12 @@ mkdir -p $datadir echo "Converting `basename $database` to kaldi directory $datadir " cat $database | perl -pe 's:.+(BABEL):BABEL:; s:\}\s+\{FROM\s+: :; s:\}\s+\{TO\s+: :; s:\}.+::;' | \ - perl -ne '@K = split; - $utteranceID = @K[0]; - $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; - $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; - $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; - $utteranceID .= sprintf ("_%06i", (100*@K[2])); + perl -ne '@K = split; + $utteranceID = @K[0]; + $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; + $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; + $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; + $utteranceID .= sprintf ("_%06i", (100*@K[2])); printf("%s %s %.2f %.2f\n", $utteranceID, @K[0], @K[1], @K[2]);' | sort > $datadir/segments if [ ! -z $filelist ] ; then @@ -66,12 +66,12 @@ perl -ne '{chomp; @K=split; $utt{@K[1]}.=" @K[0]";} # 4. Create the wav.scp file: sph2pipe=`which sph2pipe || which $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe` if [ $? -ne 0 ] ; then - echo "Could not find sph2pipe binary. Add it to PATH" + echo "Could not find sph2pipe binary. Add it to PATH" exit 1; fi sox=`which sox` if [ $? -ne 0 ] ; then - echo "Could not find sox binary. Add it to PATH" + echo "Could not find sox binary. Add it to PATH" exit 1; fi @@ -84,19 +84,19 @@ echo "Creating the $datadir/wav.scp file" elif [ -f $audiopath/audio/$file.wav ] ; then echo "$file $sox $audiopath/audio/$file.wav -r 8000 -c 1 -b 16 -t wav - downsample |" else - echo "Audio file $audiopath/audio/$file.sph does not exist!" >&2 + echo "Audio file $audiopath/audio/$file.sph does not exist!" >&2 exit 1 fi - done | sort -u > $datadir/wav.scp - if [ $? -ne 0 ] ; then - echo "Error producing the wav.scp file" + done | sort -u > $datadir/wav.scp + if [ $? -ne 0 ] ; then + echo "Error producing the wav.scp file" exit 1 fi -) || exit 1 +) || exit 1 l1=`wc -l $datadir/wav.scp | cut -f 1 -d ' ' ` echo "wav.scp contains $l1 files" -if [ ! -z $filelist ] ; then +if [ ! -z $filelist ] ; then l2=`wc -l $filelist | cut -f 1 -d ' '` echo "filelist `basename $filelist` contains $l2 files" diff --git a/egs/babel/s5c/local/create_shadow_dataset.sh b/egs/babel/s5c/local/create_shadow_dataset.sh index 6783ee49770..49467ed28c1 100755 --- a/egs/babel/s5c/local/create_shadow_dataset.sh +++ b/egs/babel/s5c/local/create_shadow_dataset.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2012 Johns Hopkins University +# Copyright 2012 Johns Hopkins University # Apache 2.0. stage=0 @@ -29,8 +29,8 @@ if [ $stage -le 1 ] ; then #zkombinovat ecf echo "Combining ECF files..." perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -87,8 +87,8 @@ if [ $stage -le 2 ] ; then #zkombinovat kwlist echo "Combining the KWLIST files" perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -107,7 +107,7 @@ if [ $stage -le 2 ] ; then if ( $src1->{language} ne $src2->{language} ) { die "KWLIST languages differ in the source kwlist.xml files"; } - + $tgt->{ecf_filename} = ""; $tgt->{language}=$src1->{language}; $tgt->{compareNormalize}=$src1->{compareNormalize}; @@ -143,8 +143,8 @@ fi if [ $stage -le 3 ] ; then echo "Making KWLIST maps" perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; diff --git a/egs/babel/s5c/local/cstr_ndx2flist.pl b/egs/babel/s5c/local/cstr_ndx2flist.pl index d19db421a9f..79daa1a99db 100755 --- a/egs/babel/s5c/local/cstr_ndx2flist.pl +++ b/egs/babel/s5c/local/cstr_ndx2flist.pl @@ -16,7 +16,7 @@ # limitations under the License. # This is modified from the script in standard Kaldi recipe to account -# for the way the WSJ data is structured on the Edinburgh systems. +# for the way the WSJ data is structured on the Edinburgh systems. # - Arnab Ghoshal, 12/1/12 # This program takes as its standard input an .ndx file from the WSJ corpus that looks @@ -25,7 +25,7 @@ #;; #;; Index for WSJ0 SI-short Sennheiser training data #;; Data is read WSJ sentences, Sennheiser mic. -#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts +#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts #;; per speaker TI) = 7236 utts #;; #11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1 diff --git a/egs/babel/s5c/local/ctm2segments.pl b/egs/babel/s5c/local/ctm2segments.pl index 26a786c88b9..55a8bd84fc8 100755 --- a/egs/babel/s5c/local/ctm2segments.pl +++ b/egs/babel/s5c/local/ctm2segments.pl @@ -45,21 +45,21 @@ chop $line; my @entries = split(/ /, $line); die "Cannot parse line \"$line\"" if scalar @entries != 6; - + ($filename, my $chann_id, my $beg, my $end, my $word, my $conf) = @entries; - - $total_seconds += $end * 1.0; - + + $total_seconds += $end * 1.0; + if ($conf >= $cf_needed ) { if ( $words ne "" ) { #print "Extend segment\n"; $words .= " $word"; - $seg_end = $beg * 1.0 + $end*1.0; + $seg_end = $beg * 1.0 + $end*1.0; } else { #start a new segment #print "Start segment\n"; $seg_start = $beg; - $seg_end = $beg * 1.0 + $end*1.0; + $seg_end = $beg * 1.0 + $end*1.0; $words = $word; } } else { @@ -75,14 +75,14 @@ $extracted_seconds+= ($seg_end - $seg_start); $seg_start -= $extend_segments; - $seg_end += $extend_segments; + $seg_end += $extend_segments; my $spk_id=$filename_parts[3] . "_" . $channel; my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); my $last_part = sprintf("%06d", $seg_start * 100); $utt_id .= "_" . $last_part; #print $utt_id . " $beg \n"; - + #14350_A_20121123_042710_001337 #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 @@ -111,14 +111,14 @@ $extracted_seconds+= ($seg_end - $seg_start); $seg_start -= $extend_segments; - $seg_end += $extend_segments; + $seg_end += $extend_segments; my $spk_id=$filename_parts[3] . "_" . $channel; my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); my $last_part = sprintf("%06d", $seg_start * 100); $utt_id .= "_" . $last_part; #print $utt_id . " $beg \n"; - + #14350_A_20121123_042710_001337 #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 diff --git a/egs/babel/s5c/local/datasets/basic_kws.sh b/egs/babel/s5c/local/datasets/basic_kws.sh index 35d6e379658..ed6995b3080 100644 --- a/egs/babel/s5c/local/datasets/basic_kws.sh +++ b/egs/babel/s5c/local/datasets/basic_kws.sh @@ -1,13 +1,13 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. if [ "${dataset_kind}" == "supervised" ] ; then - mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" optional_variables="my_subset_ecf" else - mandatory_variables="my_ecf_file my_kwlist_file" + mandatory_variables="my_ecf_file my_kwlist_file" optional_variables="my_subset_ecf" fi @@ -23,6 +23,6 @@ if [ ! -f ${dataset_dir}/kws/.done ] ; then fi local/kws_setup.sh --case_insensitive $case_insensitive \ "${kws_flags[@]}" "${icu_opt[@]}" \ - $my_ecf_file $my_kwlist_file data/lang ${dataset_dir} || exit 1 - touch ${dataset_dir}/kws/.done + $my_ecf_file $my_kwlist_file $lang ${dataset_dir} || exit 1 + touch ${dataset_dir}/kws/.done fi diff --git a/egs/babel/s5c/local/datasets/extra_kws.sh b/egs/babel/s5c/local/datasets/extra_kws.sh index cb90968a1dc..32031270b36 100644 --- a/egs/babel/s5c/local/datasets/extra_kws.sh +++ b/egs/babel/s5c/local/datasets/extra_kws.sh @@ -1,13 +1,13 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. if [ "${dataset_kind}" == "supervised" ] ; then - mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" optional_variables="my_subset_ecf" else - mandatory_variables="my_ecf_file my_kwlist_file" + mandatory_variables="my_ecf_file my_kwlist_file" optional_variables="my_subset_ecf" fi @@ -17,7 +17,7 @@ function register_extraid { local dataset_dir=$1 local extraid=$2 echo "Registering $extraid" - echo $extraid >> $dataset_dir/extra_kws_tasks; + echo $extraid >> $dataset_dir/extra_kws_tasks; sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks } @@ -31,7 +31,7 @@ function setup_oov_search { local data_dir=$1 local source_dir=$2 local extraid=$3 - + local kwsdatadir=$data_dir/${extraid}_kws mkdir -p $kwsdatadir @@ -50,7 +50,7 @@ function setup_oov_search { paste \ <(cat $kwlist | grep -o -P "(?<=kwid=\").*(?=\")") \ <(cat $kwlist | grep -o -P "(?<=).*(?=)" | uconv -f utf-8 -t utf-8 -x Any-Lower) \ - >$kwsdatadir/keywords.txt + >$kwsdatadir/keywords.txt cut -f 2 $kwsdatadir/keywords.txt | \ sed 's/\s\s*/\n/g' | sort -u > $kwsdatadir/oov.txt @@ -61,7 +61,7 @@ function setup_oov_search { if [ ! -f exp/conf_matrix/.done ] ; then local/generate_confusion_matrix.sh --cmd "$decode_cmd" --nj $my_nj \ exp/sgmm5_denlats/dengraph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix || return 1 - touch exp/conf_matrix/.done + touch exp/conf_matrix/.done fi confusion=exp/conf_matrix/confusions.txt @@ -75,10 +75,13 @@ function setup_oov_search { fi local/apply_g2p.sh --nj $my_nj --cmd "$decode_cmd" \ --var-counts $g2p_nbest --var-mass $g2p_mass \ - $kwsdatadir/oov.txt exp/g2p $kwsdatadir/g2p + $kwsdatadir/oov.txt exp/g2p $kwsdatadir/g2p || return 1 L2_lex=$kwsdatadir/g2p/lexicon.lex - L1_lex=data/local/lexiconp.txt + if [ -z "$L1_lex" ] ; then + L1_lex=data/local/lexiconp.txt + fi + local/kws_data_prep_proxy.sh \ --cmd "$decode_cmd" --nj $my_nj \ --case-insensitive true \ @@ -86,14 +89,14 @@ function setup_oov_search { --phone-cutoff $phone_cutoff \ --pron-probs true --beam $proxy_beam --nbest $proxy_nbest \ --phone-beam $proxy_phone_beam --phone-nbest $proxy_phone_nbest \ - data/lang $data_dir $L1_lex $L2_lex $kwsdatadir + $lang $data_dir $L1_lex $L2_lex $kwsdatadir } kws_flags=( --use-icu true ) if [ "${dataset_kind}" == "supervised" ] ; then - #The presence of the file had been already verified, so just + #The presence of the file had been already verified, so just #add the correct switches kws_flags+=(--rttm-file $my_rttm_file ) fi @@ -107,20 +110,20 @@ if [ ! -f $dataset_dir/.done.kws.oov ] ; then touch $dataset_dir/.done.kws.oov fi if [ ${#my_more_kwlists[@]} -ne 0 ] ; then - + touch $dataset_dir/extra_kws_tasks - + for extraid in "${!my_more_kwlists[@]}" ; do #The next line will help us in running only one. We don't really - #know in which directory the KWS setup will reside in, so we will + #know in which directory the KWS setup will reside in, so we will #place the .done file directly into the data directory [ -f $dataset_dir/.done.kws.$extraid ] && continue; kwlist=${my_more_kwlists[$extraid]} local/kws_setup.sh --extraid $extraid --case_insensitive $case_insensitive \ "${kws_flags[@]}" "${icu_opt[@]}" \ - $my_ecf_file $kwlist data/lang ${dataset_dir} || exit 1 - + $my_ecf_file $kwlist $lang ${dataset_dir} || exit 1 + #Register the dataset for default running... #We can do it without any problem here -- the kws_stt_tasks will not #run it, unless called with --run-extra-tasks true switch @@ -129,7 +132,7 @@ if [ ${#my_more_kwlists[@]} -ne 0 ] ; then done for extraid in "${!my_more_kwlists[@]}" ; do #The next line will help us in running only one. We don't really - #know in which directory the KWS setup will reside in, so we will + #know in which directory the KWS setup will reside in, so we will #place the .done file directly into the data directory [ -f $dataset_dir/.done.kws.${extraid}_oov ] && continue; setup_oov_search $dataset_dir $dataset_dir/${extraid}_kws ${extraid}_oov diff --git a/egs/babel/s5c/local/datasets/supervised_pem.sh b/egs/babel/s5c/local/datasets/supervised_pem.sh index c32d73e0718..e131fae40fa 100644 --- a/egs/babel/s5c/local/datasets/supervised_pem.sh +++ b/egs/babel/s5c/local/datasets/supervised_pem.sh @@ -1,4 +1,4 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. diff --git a/egs/babel/s5c/local/datasets/supervised_seg.sh b/egs/babel/s5c/local/datasets/supervised_seg.sh index a681688f480..a5ccd36211b 100644 --- a/egs/babel/s5c/local/datasets/supervised_seg.sh +++ b/egs/babel/s5c/local/datasets/supervised_seg.sh @@ -1,4 +1,4 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. @@ -57,7 +57,7 @@ echo "Creating the $unseg_dir/reco2file_and_channel file" cat $unseg_dir/wav.scp | awk '{print $1, $1, "A";}' > $unseg_dir/reco2file_and_channel cat $unseg_dir/wav.scp | awk '{print $1, $1;}' > $unseg_dir/utt2spk utils/utt2spk_to_spk2utt.pl $unseg_dir/utt2spk > $unseg_dir/spk2utt - + make_plp $unseg_dir $workdir/make_plp $workdir/plp || exit 1 local/resegment/generate_segments.sh --nj $my_nj --cmd "$decode_cmd" \ diff --git a/egs/babel/s5c/local/datasets/supervised_uem.sh b/egs/babel/s5c/local/datasets/supervised_uem.sh index 318518ad86e..5ac1e003d5d 100644 --- a/egs/babel/s5c/local/datasets/supervised_uem.sh +++ b/egs/babel/s5c/local/datasets/supervised_uem.sh @@ -1,4 +1,4 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. @@ -6,7 +6,7 @@ eval my_data_cmudb=\$${dataset_type}_data_cmudb if [ "${dataset_kind}" != "supervised" ] ; then - mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" + mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" optional_variables="" else mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" diff --git a/egs/babel/s5c/local/datasets/vocab_kws.sh b/egs/babel/s5c/local/datasets/vocab_kws.sh index 812122bd024..40c1d8e841d 100644 --- a/egs/babel/s5c/local/datasets/vocab_kws.sh +++ b/egs/babel/s5c/local/datasets/vocab_kws.sh @@ -1,13 +1,13 @@ -#This script is not really supposed to be run directly +#This script is not really supposed to be run directly #Instead, it should be sourced from the decoding script #It makes many assumption on existence of certain environmental #variables as well as certain directory structure. if [ "${dataset_kind}" == "supervised" ] ; then - mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" optional_variables="my_subset_ecf" else - mandatory_variables="my_ecf_file my_kwlist_file" + mandatory_variables="my_ecf_file my_kwlist_file" optional_variables="my_subset_ecf" fi @@ -15,7 +15,7 @@ check_variables_are_set if [ "$dataset_kind" == "shadow" ]; then true #we do not support multiple kw lists for shadow set system - + elif [ ! -f $dataset_dir/.done.kws.fullvocab ] ; then #a This will work for both supervised and unsupervised dataset kinds kws_flags=() @@ -25,25 +25,25 @@ elif [ ! -f $dataset_dir/.done.kws.fullvocab ] ; then if $my_subset_ecf ; then kws_flags+=(--subset-ecf $my_data_list) fi - + #We just could come with some bogus naming scheme, #but as long as the audio files can tell the iarpa lang id, we will use that langid=`ls -1 $my_data_dir/audio/ | head -n 1| cut -d '_' -f 3` - #NB: we assume the default KWS search is already done and will "borrow" + #NB: we assume the default KWS search is already done and will "borrow" #the rttm and ecf files. #We could easily generate the ecf file, but the RTTM assumes the decoding - #had been already done. That could be done + #had been already done. That could be done #Ideally, these files should be generated here! local/kws_setup.sh --kwlist-wordlist true "${kws_flags[@]}" \ --extraid fullvocab $my_ecf_file \ - <(cat data/lang/words.txt | \ - grep -v -F "<" | grep -v -F "#" | \ + <(cat $lang/words.txt | \ + grep -v "^<" | grep -v "^#" | \ awk "{printf \"KWID$langid-FULLVOCAB-%05d %s\\n\", \$2, \$1 }" ) \ - data/lang ${dataset_dir} || exit 1 + $lang ${dataset_dir} || exit 1 - echo fullvocab >> $dataset_dir/extra_kws_tasks; + echo fullvocab >> $dataset_dir/extra_kws_tasks; sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks touch $dataset_dir/.done.kws.fullvocab fi diff --git a/egs/babel/s5c/local/extend_lexicon.sh b/egs/babel/s5c/local/extend_lexicon.sh index fd0b27a4172..48553dd6279 100755 --- a/egs/babel/s5c/local/extend_lexicon.sh +++ b/egs/babel/s5c/local/extend_lexicon.sh @@ -10,7 +10,7 @@ # two files: lexiconp.txt (this is the lexicon format that has pronunciation # probabilities; the words in the original lexicon have probability one), and # oov2prob, which says how the OOV mass is distributed among the new OOV words -# in the lexicon. +# in the lexicon. # It assumes that the syllables in pronunciations in the input lexicon.txt are # separated by tabs, as is normal for the BABEL setup; the syllable boundaries @@ -39,7 +39,7 @@ # because we felt that this would make the mapping harder for g2p to learn. # Instead we mapped the phones to unique letters; this is what the "phone_map" # file is about. Furthermore, in BABEL we have the concept of tags on the -# phones, e.g. in a tonal language, ay_3 might be the phone "ay" with tone 3. +# phones, e.g. in a tonal language, ay_3 might be the phone "ay" with tone 3. # As far as Kaldi is concerned, ay_3 is a single phone. To avoid the number of # letters blowing up too much, we make these tags separate letters when generating # phone_map, so ay_3 might be mapped to kX with ay mapping to k and 3 mapping to @@ -79,7 +79,7 @@ # equal to 0.33 times the probability listed in oov2prob. However, that script # will not allow the unigram probability of any OOV word to be more probable than # the least probable word which was originally in the ARPA file (not counting , -# which generally has probability -99); this is applied as a ceiling on the +# which generally has probability -99); this is applied as a ceiling on the # unknown-word probabilities. Note: the --unk-fraction should probably be # similar to the OOV rate in that language. Calculating the OOV rate on some # dev data is one reasonable way to set this; see the commands at the very @@ -149,7 +149,7 @@ cp $input_lexicon $toplevel_dir/input_lexicon.txt # just to have a record of wh loc=`which ngram-count`; if [ -z $loc ]; then if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... - sdir=`pwd`/../../../tools/srilm/bin/i686-m64 + sdir=`pwd`/../../../tools/srilm/bin/i686-m64 else sdir=`pwd`/../../../tools/srilm/bin/i686 fi @@ -256,21 +256,21 @@ if [ $stage -le -1 ]; then rm $dir/probs.* 2>/dev/null echo '#!/usr/bin/perl -while(1) { +while(1) { $sent = <>; $line=<>; if ($line !~ m/sentences/) { $sent =~ m/^file/ || die "Bad sent $sent"; exit(0); } - $line = <>; if ($line !~ m/logprob= (\S+)/) { die "Bad line $line"; } print "$1 $sent"; + $line = <>; if ($line !~ m/logprob= (\S+)/) { die "Bad line $line"; } print "$1 $sent"; $line = <>; $line eq "\n" || die "expected blank line"; }' >$dir/temp.pl chmod +x $dir/temp.pl $cmd JOB=1:$nj $dir/log/compute_prob.JOB.log \ $ngram -debug 1 -lm $dir/lm.gz -ppl $dir/sents.JOB \| $dir/temp.pl \| sort -gr \> $dir/probs.JOB || exit 1; - if $cleanup; then - rm $dir/sents.*; + if $cleanup; then + rm $dir/sents.*; fi sort -m -gr $dir/probs.* | uniq | head -n $num_prons > $dir/probs - if $cleanup; then - rm $dir/probs.*; + if $cleanup; then + rm $dir/probs.*; fi mass=$(cat $dir/probs | awk '{x += exp($1 * log(10));} END{print x}') @@ -296,7 +296,7 @@ fi # We may lose a little information by doing this, though, because the segmentation # into phonemes may be ambiguous. So we create a mapping from the original phonemes # and tags to letters of the alphabet. Note: tags are things like s_3 for a phone: here -# s is the phone and _3 is the tag. +# s is the phone and _3 is the tag. if [ $stage -le 0 ]; then @@ -375,10 +375,10 @@ if [ $stage -le $[$g2p_iters+1] ]; then awk '{if (NF >= 4) {printf("%s %s ", $1, $3); for (n=4;n<=NF;n++) {printf("%s", $n);} printf("\n"); }}' | \ sort | uniq > $dir/pron2spelling - # Now remove from pron2spelling, any words that appear in $dir/lexiconp_in.txt + # Now remove from pron2spelling, any words that appear in $dir/lexiconp_in.txt # (this also contains the excluded words like ). cat $dir/pron2spelling | \ - perl -e 'open(F, $ARGV[0]) || die "opening $ARGV[0]"; while() { @A=split; $seen_word{$A[0]}=1; } + perl -e 'open(F, $ARGV[0]) || die "opening $ARGV[0]"; while() { @A=split; $seen_word{$A[0]}=1; } while() { @A=split; if (! $seen_word{$A[2]}) { print; }} ' $dir/lexiconp_in.txt > $dir/pron2spelling.excluded # $dir/pron2spelling.excluded contains lines like #ab syllable1 syllable2 ... # e.g. # Kuku 0.000002642 k>&u k>&u - + cat $dir/probs | \ perl -e ' while(){ @A = split; $prob = shift @A; $pron=join("", @A); $pron =~ tr/,//d; print "$pron $_"; } '> $dir/probs.with_pron @@ -402,7 +402,7 @@ if [ $stage -le $[$g2p_iters+1] ]; then # This is so we can get the pronunciation in the same form that we put it in, for # the p2g training, for easier comparison with the lines in $dir/pron2spelling.excluded - perl -e ' ($p2s, $probs_with_pron) = @ARGV; + perl -e ' ($p2s, $probs_with_pron) = @ARGV; open(P2S, "<$p2s" || die); open(PROBS, "<$probs_with_pron")||die; while () { @A = split; @@ -487,7 +487,7 @@ if [ $stage -le $[$g2p_iters+1] ]; then print L "$word\t$pronprob\t$pron"; } close(L); close(W); # wait for sort to finish. ' \ $dir/lexiconp_oov.txt $dir/oov2prob - + # lexiconp_oov.txt contains lines like: #leyanga 0.96471840417664 l 3 j_" a_" N a #leyanga 1 l 3 j_" a_" N g a @@ -497,7 +497,7 @@ if [ $stage -le $[$g2p_iters+1] ]; then #Adlule 9.62418179264897e-08 #Afuna 2.23048402109824e-06 fi - + if [ $stage -le $[$g2p_iters+2] ]; then # put it to the output directory $localdir e.g. data/local/ cat $dir/lexiconp_in.txt $dir/lexiconp_oov.txt | \ @@ -526,7 +526,7 @@ if [ ! -z $dev_text ]; then $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); printf("Seen $invoc out of $tot tokens; token OOV rate is %.2f\n", $oov_rate);' \ $toplevel_dir/lexiconp.txt > $toplevel_dir/new_oov_rates - + # Original type OOV rate cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | sort -u |\ perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} @@ -549,7 +549,7 @@ exit 0; ###BELOW HERE IS JUST COMMENTS ########### #cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt | \ -for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt | \ perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} @@ -559,7 +559,7 @@ done #Seen 13675 out of 60613 tokens; OOV rate is 77.44 #Seen 26936 out of 60613 tokens; OOV rate is 55.56 -for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do cat data/dev10h/text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | \ perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} diff --git a/egs/babel/s5c/local/extract_oov_words.pl b/egs/babel/s5c/local/extract_oov_words.pl index fbb6e95286d..08f8f5d1436 100755 --- a/egs/babel/s5c/local/extract_oov_words.pl +++ b/egs/babel/s5c/local/extract_oov_words.pl @@ -5,15 +5,15 @@ use Data::Dumper; $Data::Dumper::Indent = 1; -binmode STDOUT, ":utf8"; -binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; $ignore_oov = 0; $ignore_first_field = 0; for($x = 0; $x < 2; $x++) { if ($ARGV[0] eq "-f") { - shift @ARGV; - $field_spec = shift @ARGV; + shift @ARGV; + $field_spec = shift @ARGV; if ($field_spec =~ m/^\d+$/) { $field_begin = $field_spec - 1; $field_end = $field_spec - 1; } @@ -26,7 +26,7 @@ } } if (!defined $field_begin && !defined $field_end) { - die "Bad argument to -f option: $field_spec"; + die "Bad argument to -f option: $field_spec"; } } } @@ -43,7 +43,7 @@ while() { @A = split(" ", $_); @A == 2 || die "bad line in symbol table file: $_"; - + if ( not defined( $sym2int{$A[0]} ) ) { $sym2int{$A[0]} = []; } @@ -62,7 +62,7 @@ $i = $sym2int{$a}; if (!defined ($i)) { print $a . "\n"; - } + } } } } diff --git a/egs/babel/s5c/local/filter_kwslist.pl b/egs/babel/s5c/local/filter_kwslist.pl index c84a5f6d3c9..7c57b62517a 100755 --- a/egs/babel/s5c/local/filter_kwslist.pl +++ b/egs/babel/s5c/local/filter_kwslist.pl @@ -24,19 +24,19 @@ if(ref($kwentry->{kw}) eq 'ARRAY'){ my @arr = @{$kwentry->{kw}}; my @newarray = (); - + push @newarray, $arr[0]; #$arr[0]->{tbeg} . "\n"; for (my $i = 1; $i < scalar(@arr); $i +=1) { - + my $found = 0; foreach my $kw (@newarray) { - if (( abs($arr[$i]->{tbeg} - $kw->{tbeg}) < $duptime ) && + if (( abs($arr[$i]->{tbeg} - $kw->{tbeg}) < $duptime ) && ( $arr[$i]->{channel} == $kw->{channel}) && ( $arr[$i]->{file} eq $kw->{file}) ) { $found = 1; - + #print $arr[$i]->{tbeg} . "\n"; } } diff --git a/egs/babel/s5c/local/find_transcripts.pl b/egs/babel/s5c/local/find_transcripts.pl index 6429411b864..d34b075e7ea 100755 --- a/egs/babel/s5c/local/find_transcripts.pl +++ b/egs/babel/s5c/local/find_transcripts.pl @@ -21,7 +21,7 @@ # It takes as # Extracts from the dot files the transcripts for a given # dataset (represented by a file list). -# +# @ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts"; $dot_flist = shift @ARGV; @@ -36,7 +36,7 @@ -while(){ +while(){ chop; $uttid = $_; $uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_"; diff --git a/egs/babel/s5c/local/fix_kwslist.pl b/egs/babel/s5c/local/fix_kwslist.pl index 29afc73e473..33c6dc30e82 100755 --- a/egs/babel/s5c/local/fix_kwslist.pl +++ b/egs/babel/s5c/local/fix_kwslist.pl @@ -81,7 +81,7 @@ sub mysort { print $xml; } else { if (!open(O, ">$fixed_kwslist_out")) { - print "Fail to open output file: $fixed_kwslist_out\n"; + print "Fail to open output file: $fixed_kwslist_out\n"; exit 1; } print O $xml; diff --git a/egs/babel/s5c/local/generate_confusion_matrix.sh b/egs/babel/s5c/local/generate_confusion_matrix.sh index 4bcbacb5ae9..e6b221f7cc0 100755 --- a/egs/babel/s5c/local/generate_confusion_matrix.sh +++ b/egs/babel/s5c/local/generate_confusion_matrix.sh @@ -2,7 +2,7 @@ # Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0 -# Begin configuration section. +# Begin configuration section. nj=4 cmd=run.pl acwt=0.1 @@ -86,7 +86,7 @@ cat $confusion_files | cut -f 2- -d ' ' | sed 's/ *; */\n/g'| sort | uniq -c | \ perl -ane ' die unless scalar @F == 3; print "$F[1] $F[2] $F[0]\n"; - ' > $wdir/confusions.txt + ' > $wdir/confusions.txt exit 0 #-echo "Converting alignments to phone sequences..." diff --git a/egs/babel/s5c/local/generate_example_kws.sh b/egs/babel/s5c/local/generate_example_kws.sh index 2c849438192..e90752926b3 100755 --- a/egs/babel/s5c/local/generate_example_kws.sh +++ b/egs/babel/s5c/local/generate_example_kws.sh @@ -71,7 +71,7 @@ cat $text | perl -e ' } $min_count++; } - + $total = 20; $current = 0; $min_count = 4; @@ -88,7 +88,7 @@ cat $text | perl -e ' } $min_count++; } - + $total = 10; $current = 0; $min_count = 3; diff --git a/egs/babel/s5c/local/generate_proxy_keywords.sh b/egs/babel/s5c/local/generate_proxy_keywords.sh index 8562953efa4..584f7d7902e 100755 --- a/egs/babel/s5c/local/generate_proxy_keywords.sh +++ b/egs/babel/s5c/local/generate_proxy_keywords.sh @@ -3,7 +3,7 @@ # Copyright 2012-2014 Guoguo Chen # Apache 2.0. -# Begin configuration section. +# Begin configuration section. nj=8 cmd=run.pl beam=-1 # Beam for proxy FST, -1 means no prune @@ -46,7 +46,7 @@ if [ $# -ne 1 ]; then exit 1; fi -set -e +set -e set -o pipefail kwsdatadir=$1 @@ -68,8 +68,34 @@ if $pron_probs; then pron_probs_param="--pron-probs"; fi +cat $kwsdatadir/L1.lex | \ + perl -e ' + while ( $line = ) { + chomp $line; + ($word, $pron) = split " ", $line, 2; + $pron = join(" ", split(" ", $pron)); + push @{$LEX{$pron}}, $word; + } + + open(L1, "| sort -u > $ARGV[0]") or die "Cannot open $ARGV[0]\n"; + open(MAP, "| sort -u > $ARGV[1]") or die "Cannot open $ARGV[1]\n"; + foreach $pron (keys %LEX) { + $head = $LEX{$pron}->[0]; + print L1 "$head $pron\n"; + foreach $alt (@{$LEX{$pron}}) { + print MAP "0 0 $alt $head\n"; + } + } + print MAP "0\n"; + close(L1); + close(MAP); +' $kwsdatadir/L1_dedup.lex $kwsdatadir/L1.revdup.fst.txt + +fstcompile --isymbols=$kwsdatadir/words.txt --osymbols=$kwsdatadir/words.txt $kwsdatadir/L1.revdup.fst.txt | \ + fstarcsort --sort_type=olabel - $kwsdatadir/L1.revdup.fst + ndisambig=`utils/add_lex_disambig.pl \ - $pron_probs_param $kwsdatadir/L1.lex $kwsdatadir/L1_disambig.lex` + $pron_probs_param $kwsdatadir/L1_dedup.lex $kwsdatadir/L1_disambig.lex` ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST. ( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $kwsdatadir/disambig.txt @@ -86,11 +112,12 @@ cat $kwsdatadir/L2.lex |\ --osymbols=$kwsdatadir/words.txt - |\ fstinvert | fstarcsort --sort_type=olabel > $kwsdatadir/L2.fst +echo $kwsdatadir/phones.txt phone_disambig_symbol=`grep \#0 $kwsdatadir/phones.txt | awk '{print $2}'` word_disambig_symbol=`grep \#0 $kwsdatadir/words.txt | awk '{print $2}'` -phone_disambig_symbols=`grep \# $kwsdatadir/phones.txt |\ +phone_disambig_symbols=`grep "^#" $kwsdatadir/phones.txt |\ awk '{print $2}' | tr "\n" " "` -word_disambig_symbols=`grep \# $kwsdatadir/words.txt |\ +word_disambig_symbols=`grep "^#" $kwsdatadir/words.txt |\ awk '{print $2}' | tr "\n" " "` cat $kwsdatadir/L1_disambig.lex |\ utils/make_lexicon_fst.pl $pron_probs_param - |\ @@ -139,10 +166,11 @@ $cmd JOB=1:$nj $kwsdatadir/split/log/proxy.JOB.log \ generate-proxy-keywords --verbose=1 \ --proxy-beam=$beam --proxy-nbest=$nbest \ --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ - $kwsdatadir/L2xE.fst $kwsdatadir/L1.fst ark:- ark:$kwsdatadir/split/proxy.JOB.fsts + $kwsdatadir/L2xE.fst $kwsdatadir/L1.fst ark:- ark,t:$kwsdatadir/split/proxy.JOB.fsts proxy_fsts="" for j in `seq 1 $nj`; do proxy_fsts="$proxy_fsts $kwsdatadir/split/proxy.$j.fsts" done -cat $proxy_fsts > $kwsdatadir/keywords.fsts +cat $proxy_fsts | fsttablecompose $kwsdatadir/L1.revdup.fst ark:- ark:- | \ + fsts-project ark:- ark:$kwsdatadir/keywords.fsts diff --git a/egs/babel/s5c/local/get_syllable_text.sh b/egs/babel/s5c/local/get_syllable_text.sh deleted file mode 100755 index 97d2af7ed65..00000000000 --- a/egs/babel/s5c/local/get_syllable_text.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash - -# Copyright Johns Hopkins University 2013 (author: Daniel Povey) -# Apache 2.0. - -if [ $# -ne 7 ]; then - echo "Usage: get_syllable_text.sh " - echo "e.g.: get_syllable_text.sh data/train data/lang ../s5-vietnamese-limited-syllables/data/lang_nopos \\" - echo " ../s5-vietnamese-limited-syllables/data/local/syllables/word2syllable_lexicon_unweighted.fst" - echo " exp/tri5h_ali exp/tri5_align_syllables ../s5-vietnamese-limited-syllables/data/train" - echo "This script copies the data-directory to but converts the text into syllable-level text." - echo "The inputs are as follows (those that are not self-explanatory):" - echo " is the syllable-level lang/ directory that has been built without" - echo " word-position dependency (we'll strip the suffixes from phones and expect them to be compatible with this)" - echo " is a kind of lexicon FST that describes words as syllable sequences." - echo " contains a word-level alignment of the data in " - echo " will be used to put temporary files and logs (make it somewhere in exp/)" - echo " is a data directory to put the syllable-level data; transcripts go to /text" - exit 1; -fi - -[ -f path.sh ] && . ./path.sh - -data=$1 -lang=$2 -lang_nopos=$3 -word2syllable_fst=$4 -alidir=$5 -dir=$6 -tgtdata=$7 - -for f in $data/text $lang/L.fst $lang_nopos/L.fst $word2syllable_fst $alidir/ali.1.gz \ - $alidir/final.mdl $alidir/num_jobs; do - if [ ! -f $f ]; then - echo "Expected file $f to exist" - exit 1; - fi -done - -mkdir -p $dir/log -nj=`cat $alidir/num_jobs` || exit 1; -sil=`cat data/lang/phones/optional_silence.txt` || exit 1 - -! ( ( for n in `seq $nj`; do gunzip -c $alidir/ali.$n.gz; done ) | \ - ali-to-phones $alidir/final.mdl ark:- ark,t:- | \ - utils/int2sym.pl -f 2- $lang/phones.txt - | \ - sed -E 's/_I( |$)/ /g' | sed -E 's/_E( |$)/ /g' | sed -E 's/_B( |$)/ /g' | sed -E 's/_S( |$)/ /g' | \ - utils/sym2int.pl -f 2- $lang_nopos/phones.txt | \ - gzip -c > $dir/phones.ark.gz ) 2>&1 | tee $dir/log/align.log \ - && echo "Error getting phone-level (non-word-position-dependent) alignments" && exit 1; - -# Get an archive of syllable-level acceptors corresponding to the training data. -# transcripts. We don't have an fstproject program for archives so we use a line of awk. - -! ( cat $data/text | utils/sym2int.pl --map-oov `cat $lang/oov.int` -f 2- $lang/words.txt | \ - transcripts-to-fsts ark:- ark:- | \ - fsttablecompose $word2syllable_fst ark:- ark,t:- | \ - awk '{if (NF < 4) { print; } else { print $1, $2, $3, $3, $5; }}' | \ - gzip -c > $dir/syllables.ark.gz ) 2>&1 | tee $dir/log/get_syllable_fsts.log && \ - echo "Error getting syllable FSTs" && exit 1; - -cp -rT $data $tgtdata || exit 1; -rm -rf $tgtdata/split* - -# From the phone-level transcripts and the syllable-level acceptors, work out -# the syllable sequence for each . Remove consecutive silences. -! ( fsttablecompose $lang_nopos/L.fst "ark:gunzip -c $dir/syllables.ark.gz|" ark:- | \ - fsttablecompose "ark:gunzip -c $dir/phones.ark.gz | transcripts-to-fsts ark:- ark:- |" \ - ark,s,cs:- ark,t:- | fsts-to-transcripts ark:- ark,t:- | int2sym.pl -f 2- $lang_nopos/words.txt | \ - sed "s/$sil $sil/$sil/g" > $tgtdata/text ) && echo "Error getting text data" && exit 1; - -! utils/fix_data_dir.sh $tgtdata/ && echo "Error fixing data dir" && exit 1; - -exit 0; - - - diff --git a/egs/babel/s5c/local/gridsearch.pl b/egs/babel/s5c/local/gridsearch.pl index 7b2ad530fa4..937273286fe 100755 --- a/egs/babel/s5c/local/gridsearch.pl +++ b/egs/babel/s5c/local/gridsearch.pl @@ -78,7 +78,7 @@ sub substitute { sub escape { my @cmd_in = @{$_[0]}; my @cmd = (); - foreach my $x (@cmd_in) { + foreach my $x (@cmd_in) { if ($x =~ m/^\S+$/) { push @cmd, $x } # If string contains no spaces, take # as-is. @@ -100,11 +100,11 @@ sub escape { for (my $i=0; $i < scalar(@ARGV); $i++) { if ($ARGV[$i] eq "-var") { - + $i++; (my $name, my @range) = gen_sequence(split('=', $ARGV[$i])); $VARIABLES{$name}=\@range - + } elsif ($ARGV[$i] eq "-train") { if ( $cmdid ) { if ( $cmdid eq "-eval" ) { @@ -113,7 +113,7 @@ sub escape { @traincmd = @cmd; } } - + $cmdid = $ARGV[$i]; @cmd = (); @@ -167,12 +167,12 @@ sub escape { @out = substitute(\@traincmd, \%params); print "Running train:\n" . join(" ", @out) . "\n"; system(@out) == 0 or die "system @out failed: exit code $?"; - + @out = substitute(\@evalcmd, \%params); print "Running eval:\n" . join(" ", @out) . "\n"; system(@out) == 0 or die "system @out failed: exit code $?"; - + } diff --git a/egs/babel/s5c/local/gridsearch2.pl b/egs/babel/s5c/local/gridsearch2.pl index 6645743c114..d09d8b28f0a 100755 --- a/egs/babel/s5c/local/gridsearch2.pl +++ b/egs/babel/s5c/local/gridsearch2.pl @@ -91,17 +91,17 @@ sub substitute { for (my $i=0; $i < scalar(@ARGV); $i++) { if ($ARGV[$i] eq "-var") { - + $i++; (my $name, my @range) = gen_sequence(split('=', $ARGV[$i])); $VARIABLES{$name}=\@range - + } elsif (grep {$_ eq $ARGV[$i]} @known_switches) { if ($cmdid) { print "CMD: $cmdid\n"; my @tmp = @cmd; - $found_switches{$cmdid} = \@tmp; + $found_switches{$cmdid} = \@tmp; pp(%found_switches); } @@ -120,7 +120,7 @@ sub substitute { if ($cmdid) { print "CMD: $cmdid\n"; my @tmp = @cmd; - $found_switches{$cmdid} = \@tmp; + $found_switches{$cmdid} = \@tmp; } pp(%VARIABLES); @@ -136,11 +136,11 @@ sub substitute { my @out; @out = substitute(\@traincmd, \%params); system(@out) == 0 or die "system @out failed: exit code $?"; - + @out = substitute(\@evalcmd, \%params); system(@out) == 0 or die "system @out failed: exit code $?"; - + } diff --git a/egs/babel/s5c/local/kwords2indices.pl b/egs/babel/s5c/local/kwords2indices.pl index 47cc3dc2741..776f66c5951 100755 --- a/egs/babel/s5c/local/kwords2indices.pl +++ b/egs/babel/s5c/local/kwords2indices.pl @@ -5,8 +5,8 @@ use Data::Dumper; $Data::Dumper::Indent = 1; -binmode STDOUT, ":utf8"; -binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; sub permute { @@ -16,10 +16,10 @@ sub permute { return map([$_], @$last); } - return map { - my $left = $_; + return map { + my $left = $_; map([@$left, $_], @$last) - } + } permute(@_); } @@ -32,8 +32,8 @@ sub permute { shift @ARGV; $map_oov = shift @ARGV; } if ($ARGV[0] eq "-f") { - shift @ARGV; - $field_spec = shift @ARGV; + shift @ARGV; + $field_spec = shift @ARGV; if ($field_spec =~ m/^\d+$/) { $field_begin = $field_spec - 1; $field_end = $field_spec - 1; } @@ -46,7 +46,7 @@ sub permute { } } if (!defined $field_begin && !defined $field_end) { - die "Bad argument to -f option: $field_spec"; + die "Bad argument to -f option: $field_spec"; } } } @@ -61,7 +61,7 @@ sub permute { while() { @A = split(" ", $_); @A == 2 || die "bad line in symbol table file: $_"; - + if ( not defined( $sym2int{$A[0]} ) ) { $sym2int{$A[0]} = []; } diff --git a/egs/babel/s5c/local/kws_combine.sh b/egs/babel/s5c/local/kws_combine.sh index 33446915eac..f795c63aad9 100755 --- a/egs/babel/s5c/local/kws_combine.sh +++ b/egs/babel/s5c/local/kws_combine.sh @@ -17,9 +17,9 @@ # Script for system combination using minimum Bayes risk decoding. -# This calls lattice-combine to create a union of lattices that have been +# This calls lattice-combine to create a union of lattices that have been # normalized by removing the total forward cost from them. The resulting lattice -# is used as input to lattice-mbr-decode. This should not be put in steps/ or +# is used as input to lattice-mbr-decode. This should not be put in steps/ or # utils/ since the scores on the combined lattice must not be scaled. # begin configuration section. @@ -71,7 +71,7 @@ for i in `seq 0 $[num_sys-1]`; do offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. decode_dir=`echo $decode_dir | cut -d: -f1` [ -z "$offset" ] && offset=1 - + weight=$(perl -e "print ($offset/$total_sum);") if [ -f $decode_dir ] ; then systems+="$weight $decode_dir " diff --git a/egs/babel/s5c/local/kws_data_prep.sh b/egs/babel/s5c/local/kws_data_prep.sh index 909e9b2596c..3882c99ce6d 100755 --- a/egs/babel/s5c/local/kws_data_prep.sh +++ b/egs/babel/s5c/local/kws_data_prep.sh @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) # Apache 2.0. -# Begin configuration section. +# Begin configuration section. case_insensitive=true use_icu=true icu_transform="Any-Lower" @@ -21,11 +21,11 @@ help_message=" Note: most important output is keywords.fsts allowed switches: --case-sensitive # Shall we be case-sensitive or not? - # Please not the case-sensitivness depends + # Please not the case-sensitivness depends # on the shell locale! --use-uconv # Use the ICU uconv binary to normalize casing --icu-transform # When using ICU, use this transliteration - + " [ -f ./path.sh ] && . ./path.sh; # source the path. @@ -39,7 +39,7 @@ if [ $# -ne 3 ]; then fi set -u -set -e +set -e set -o pipefail langdir=$1; @@ -51,8 +51,8 @@ keywords=$kwsdatadir/kwlist.xml mkdir -p $kwsdatadir; cat $keywords | perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -75,8 +75,8 @@ if $case_insensitive && ! $use_icu ; then echo "$0: Running case insensitive processing" cat $langdir/words.txt | tr '[:lower:]' '[:upper:]' > $kwsdatadir/words.txt [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ - echo "$0: Warning, multiple words in dictionary differ only in case: " - + echo "$0: Warning, multiple words in dictionary differ only in case: " + cat $kwsdatadir/keywords.txt | tr '[:lower:]' '[:upper:]' | \ sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int @@ -84,7 +84,7 @@ elif $case_insensitive && $use_icu ; then echo "$0: Running case insensitive processing (using ICU with transform \"$icu_transform\")" cat $langdir/words.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" > $kwsdatadir/words.txt [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ - echo "$0: Warning, multiple words in dictionary differ only in case: " + echo "$0: Warning, multiple words in dictionary differ only in case: " paste <(cut -f 1 $kwsdatadir/keywords.txt ) \ <(cut -f 2 $kwsdatadir/keywords.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" ) |\ @@ -107,15 +107,21 @@ fi # Compile keywords into FSTs -if [ -z $silence_word ]; then - transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:$kwsdatadir/keywords.fsts +if [ -s $kwsdatadir/keywords.int ]; then + if [ -z $silence_word ]; then + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:$kwsdatadir/keywords.fsts + else + silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'` + [ -z $silence_int ] && \ + echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:- | \ + awk -v 'OFS=\t' -v silint=$silence_int '{if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; }' \ + > $kwsdatadir/keywords.fsts + fi else - silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'` - [ -z $silence_int ] && \ - echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; - transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:- | \ - awk -v 'OFS=\t' -v silint=$silence_int '{if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; }' \ - > $kwsdatadir/keywords.fsts + echo "WARNING: $kwsdatadir/keywords.int is zero-size. That means no keyword" + echo "WARNING: was found in the dictionary. That might be OK -- or not." + touch $kwsdatadir/keywords.fsts fi # Create utterance id for each utterance @@ -129,7 +135,7 @@ cat $datadir/segments | \ $idx++; }' > $kwsdatadir/utter_id -# Map utterance to the names that will appear in the rttm file. You have +# Map utterance to the names that will appear in the rttm file. You have # to modify the commands below accoring to your rttm file cat $datadir/segments | awk '{print $1" "$2}' | sort | uniq > $kwsdatadir/utter_map; diff --git a/egs/babel/s5c/local/kws_data_prep_proxy.sh b/egs/babel/s5c/local/kws_data_prep_proxy.sh index 787cb009960..04cc59b6499 100755 --- a/egs/babel/s5c/local/kws_data_prep_proxy.sh +++ b/egs/babel/s5c/local/kws_data_prep_proxy.sh @@ -3,7 +3,7 @@ # Copyright 2014 Guoguo Chen # Apache 2.0. -# Begin configuration section. +# Begin configuration section. nj=8 cmd=run.pl beam=-1 # Beam for proxy FST, -1 means no prune @@ -15,6 +15,10 @@ phone_nbest=50 # Use top n best phone sequences in KxL2xE, -1 means all phone_cutoff=5 # We don't generate proxy keywords for OOV keywords that # have less phones than the specified cutoff as they may # introduce a lot false alarms +max_phone_cutoff=9990 # We don't generate proxy keywords for OOV keywords that + # have more than this phonemes. This can be used when + # we need to use different parameters for keywords of + # different lengths. confusion_matrix= # If supplied, using corresponding E transducer count_cutoff=1 # Minimal count to be considered in the confusion matrix; # will ignore phone pairs that have count less than this. @@ -38,13 +42,13 @@ if [ $# -ne 5 ]; then echo " data/local/tmp.lang/lexiconp.txt oov_lexicon.txt data/dev10h/kws/" echo "allowed options:" echo " --case-sensitive # Being case-sensitive or not" - echo " --icu-transform # Transliteration for upper/lower case" + echo " --icu-transform # Transliteration for upper/lower case" echo " # mapping" echo " --proxy-set # Keyword set for generating proxies" exit 1 fi -set -e +set -e set -o pipefail langdir=$1 @@ -62,8 +66,8 @@ keywords=$kwsdatadir/kwlist.xml mkdir -p $kwsdatadir/tmp/ cat $keywords | perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; use XML::Simple; use Data::Dumper; @@ -103,7 +107,7 @@ if $case_insensitive; then else cat $l2_lexicon | sed 's/\s/ /g' > $kwsdatadir/tmp/L2.tmp.lex cp $kwsdatadir/raw_keywords_all.txt $kwsdatadir/keywords_all.txt - + cat $kwsdatadir/keywords_all.txt | \ sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt \ > $kwsdatadir/keywords_all.int @@ -139,11 +143,11 @@ cat $kwsdatadir/keywords_proxy.txt |\ # L1 since it is the lexicon used for the LVCSR training. cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 1 |\ paste -d ' ' - <(cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 2-|\ - sed 's/_[B|E|I|S]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ awk '{if(NF>=2) {print $0}}' > $kwsdatadir/tmp/L1.lex cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 1 |\ paste -d ' ' - <(cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 2-|\ - sed 's/_[B|E|I|S]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ awk '{if(NF>=2) {print $0}}' | perl -e ' ($lex1, $words) = @ARGV; open(L, "<$lex1") || die "Fail to open $lex1.\n"; @@ -230,8 +234,10 @@ cat $kwsdatadir/keywords_proxy.txt | perl -e ' print STEDRR "'$0': No pronunciation found for word: $col[$i]\n"; } } - if ($len >= '$phone_cutoff') { + if (($len >= '$phone_cutoff') && ($len <= '$max_phone_cutoff')){ print "$line\n"; + } elsif ($len > '$max_phone_cutoff'){ + print STDERR "'$0': Keyword $col[0] is too long, not generating proxy\n"; } else { print STDERR "'$0': Keyword $col[0] is too short, not generating proxy\n"; } @@ -256,7 +262,7 @@ cat $datadir/segments | \ $idx++; }' > $kwsdatadir/utter_id -# Map utterance to the names that will appear in the rttm file. You have +# Map utterance to the names that will appear in the rttm file. You have # to modify the commands below accoring to your rttm file cat $datadir/segments | awk '{print $1" "$2}' |\ sort | uniq > $kwsdatadir/utter_map; diff --git a/egs/babel/s5c/local/kws_data_prep_syllables.sh b/egs/babel/s5c/local/kws_data_prep_syllables.sh deleted file mode 100755 index c6245e52c9e..00000000000 --- a/egs/babel/s5c/local/kws_data_prep_syllables.sh +++ /dev/null @@ -1,144 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) -# Apache 2.0. - -# Begin configuration section. -silence_word= # Optional silence word to insert (once) between words of the transcript. -# End configuration section. - -echo $0 "$@" - -[ -f ./path.sh ] && . ./path.sh; # source the path. -. parse_options.sh || exit 1; - - -if [ $# -ne 4 ]; then - echo "Usage: local/kws_data_prep_syllables.sh [options] " - echo " e.g.: local/kws_data_prep_syllables.sh data/lang/ data/dev10h/ SIL data/kws/" - echo "Input is in : kwlist.xml, ecf.xml (rttm file not needed)." - echo "The lang directory is expected to be syllable-level. The syllable-lexicon " - echo "is a text file with lines of the form:" - echo "word syllable1 syllable2" - echo "This script is as kws_data_prep.sh, except that the output keywords.fsts" - echo "contains the various alternative syllable-level pronunciations of the input" - echo "words." - echo "Output is in : keywords.txt, kwlist_invocab.xml," - echo " kwlist_outvocab.xml, keywords.fsts; note that the only syllable-level" - echo " output (and the only one that really matters) is keywords.fsts" - echo "Note: most important output is keywords.fsts" - echo " Options:" - echo " --silence-word # Note, this is required. It is a word, e.g. SIL," - echo " # in the syllable lexicon, that's optional." - exit 1; -fi - -langdir=$1; -datadir=$2; -syllable_lexicon=$3 -kwsdatadir=$4 -keywords=$kwsdatadir/kwlist.xml - -[ -z $silence_word ] && echo "--silence-word option is required" && exit 1; - -mkdir -p $kwsdatadir; - -cat $keywords | perl -e ' - #binmode STDIN, ":utf8"; - binmode STDOUT, ":utf8"; - - use XML::Simple; - use Data::Dumper; - - my $data = XMLin(\*STDIN); - - #print Dumper($data->{kw}); - foreach $kwentry (@{$data->{kw}}) { - #print Dumper($kwentry); - print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; - } -' > $kwsdatadir/keywords.txt - -[ ! -s "$syllable_lexicon" ] && echo "No such file '$syllable_lexicon' (syllable lexicon), or empty file." && exit 1; - -# The word symbols on the first entry of $syllable_lexicon will be given a symbol-table -# file. We just use this symbol table in this script; the values will never appear -# elsewhere. - -mkdir -p $kwsdatadir/temp - -# Remove any lines with symbols we don't have in our symbol vocabulary. -temp_syllable_lexicon=$kwsdatadir/temp/syllable_lexicon.in -cat $syllable_lexicon | sym2int.pl --map-oov 123456789 -f 2- $langdir/words.txt | grep -v -w 123456789 | \ - int2sym.pl -f 2- $langdir/words.txt > $temp_syllable_lexicon - -n1=`cat $syllable_lexicon | wc -l` -n2=`cat $temp_syllable_lexicon | wc -l` -echo "After removing OOV symbols from word-to-syllable lexicon, #lines changed from $n1 to $n2" - - -if $case_insensitive; then - echo "Running case insensitive processing" - # we turn the first element of each line of $temp_syllable_lexicon into upper case. - tr '[:lower:]' '[:upper:]' < $temp_syllable_lexicon | awk '{print $1}' | \ - paste - <(awk '{for(n=2;n<=NF;n++) { printf("%s ", $n); } print ""; }' <$temp_syllable_lexicon) \ - > $kwsdatadir/temp/syllable_lexicon.txt || exit 1; - - # We turn all but the first element of each line in $kwsdatadir/keywords.txt - # into upper case. - tr '[:lower:]' '[:upper:]' < $kwsdatadir/keywords.txt | \ - awk '{for(n=2;n<=NF;n++) { printf("%s ", $n); } print ""; }' | \ - paste <(awk '{print $1}' <$kwsdatadir/keywords.txt) - \ - > $kwsdatadir/temp/keywords.txt || exit 1; -else - cp $temp_syllable_lexicon $kwsdatadir/temp/syllable_lexicon.txt || exit 1; - cp $kwsdatadir/keywords.txt $kwsdatadir/temp/ || exit 1; -fi - -cat $kwsdatadir/temp/syllable_lexicon.txt | awk '{print $1}' | sort | uniq | \ - awk 'BEGIN{print " 0";} {print $1, NR;}' > $kwsdatadir/temp/words.txt - -sym2int.pl --map-oov 0 -f 2- $kwsdatadir/temp/words.txt < $kwsdatadir/temp/keywords.txt \ - > $kwsdatadir/temp/keywords_all.int - -cat $kwsdatadir/temp/keywords_all.int | \ - grep -v " 0 " | grep -v " 0$" > $kwsdatadir/keywords.int - -cut -f 1 -d ' ' $kwsdatadir/keywords.int | \ - local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_invocab.xml - -cat $kwsdatadir/temp/keywords_all.int | \ - egrep " 0 | 0$" | cut -f 1 -d ' ' | \ - local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_outvocab.xml - -local/make_lexicon_fst_special.pl $kwsdatadir/temp/syllable_lexicon.txt $silence_word | \ - sym2int.pl -f 4 $kwsdatadir/temp/words.txt | \ - sym2int.pl -f 3 $langdir/words.txt | \ - fstcompile | \ - fstarcsort --sort_type=olabel > $kwsdatadir/temp/L.fst || exit 1; - -# Compile keywords into FSTs, compose with lexicon to get syllables -# and project on the input (keeping only syllable labels), -# before writing to keywords.fsts - -transcripts-to-fsts ark:$kwsdatadir/keywords.int ark:- | \ - fsttablecompose $kwsdatadir/temp/L.fst ark:- ark,t:- | \ - awk '{if (NF < 4) { print; } else { print $1, $2, $3, $3, $5; }}' > \ - $kwsdatadir/keywords.fsts - -# Create utterance id for each utterance -cat $datadir/segments | \ - awk '{print $1}' | \ - sort | uniq | perl -e ' - $idx=1; - while(<>) { - chomp; - print "$_ $idx\n"; - $idx++; - }' > $kwsdatadir/utter_id - -# Map utterance to the names that will appear in the rttm file. You have -# to modify the commands below accoring to your rttm file -cat $datadir/segments | awk '{print $1" "$2}' | sort | uniq > $kwsdatadir/utter_map; - -echo "Kws data preparation succeeded" diff --git a/egs/babel/s5c/local/kws_gen_oracle_lattices.sh b/egs/babel/s5c/local/kws_gen_oracle_lattices.sh index aa9e22cca96..b73112b191d 100755 --- a/egs/babel/s5c/local/kws_gen_oracle_lattices.sh +++ b/egs/babel/s5c/local/kws_gen_oracle_lattices.sh @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) # Apache 2.0. -# Begin configuration section. +# Begin configuration section. cmd=run.pl duptime=0.5 model=final.mdl @@ -35,8 +35,8 @@ mkdir -p $oracledir/log for filename in $lang/words.txt $decodedir/num_jobs \ $data/text $decodedir/lat.1.gz \ $decodedir/../$model ; do - if [[ ! -f $filename ]] ; then - echo "FATAL: File $filename does not exist!" + if [[ ! -f $filename ]] ; then + echo "FATAL: File $filename does not exist!" exit 1; fi done @@ -44,7 +44,7 @@ done nj=`cat $decodedir/num_jobs` (cd $decodedir; ln -s ../$model final.mdl ) -(cd $oracledir; echo "$nj" > num_jobs ) +(cd $oracledir; echo "$nj" > num_jobs ) $cmd LAT=1:$nj $oracledir/log/lat.LAT.log \ cat $data/text \| \ diff --git a/egs/babel/s5c/local/kws_oracle.sh b/egs/babel/s5c/local/kws_oracle.sh index 44334ba1413..c7aa661664f 100755 --- a/egs/babel/s5c/local/kws_oracle.sh +++ b/egs/babel/s5c/local/kws_oracle.sh @@ -1,23 +1,23 @@ #!/bin/bash # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal) -# 2013 Johns Hopkins University +# 2013 Johns Hopkins University # Apache 2.0. . ./path.sh . ./cmd.sh -# Begin configuration section. +# Begin configuration section. cmd=run.pl -acwt=0.09091 #Acoustic weight -- should not be necessary for oracle lattices +acwt=0.09091 #Acoustic weight -- should not be necessary for oracle lattices duptime=0.6 #Max time difference in which the occurences of the same KW will be seen as duplicates text= # an alternative reference text to use. when not specified, the /text will be used -model= # acoustic model to use +model= # acoustic model to use extraid= # kws setup extra ID (kws task was setup using kws_setup.sh --extraid stage=0 # to resume the computation from different stage # End configuration section. -set -e +set -e set -o pipefail echo "$0 $@" # Print the command line for logging @@ -47,7 +47,7 @@ fi if [ -z "$model" ]; then # if --model was not specified on the command line... srcdir=`dirname $decodedir`; # The model directory is one level up from decoding directory. - model=$srcdir/final.mdl; + model=$srcdir/final.mdl; fi if [ -z $extraid ] ; then # the same logic as with kws_setup.sh @@ -59,7 +59,7 @@ fi nj=`cat $decodedir/num_jobs`; oracledir=$decodedir/kws_oracle -mkdir -p $oracledir +mkdir -p $oracledir mkdir -p $oracledir/log if [ $stage -le 0 ] ; then @@ -119,17 +119,17 @@ if [ $stage -le 4 ]; then echo "=======================================================" ( echo -n "ATWV-full " - grep Occurrence $oracledir/sum.txt | cut -d '|' -f 13 + grep Occurrence $oracledir/sum.txt | cut -d '|' -f 13 ) #-( #-echo -n "ATWV-invocab " - #-grep Occurrence $oracledir/invocab.sum.txt | cut -d '|' -f 13 + #-grep Occurrence $oracledir/invocab.sum.txt | cut -d '|' -f 13 #-) || echo "Error occured getting the invocab results" #-( #-echo -n "ATWV-outvocab " - #-grep Occurrence $oracledir/outvocab.sum.txt | cut -d '|' -f 13 + #-grep Occurrence $oracledir/outvocab.sum.txt | cut -d '|' -f 13 #-) || echo "Error occured getting the outvocab results" echo "=======================================================" diff --git a/egs/babel/s5c/local/kws_score_f4de.sh b/egs/babel/s5c/local/kws_score_f4de.sh index d761e080c1c..cd6948a8a08 100755 --- a/egs/babel/s5c/local/kws_score_f4de.sh +++ b/egs/babel/s5c/local/kws_score_f4de.sh @@ -16,11 +16,11 @@ help_message="$0: score the kwslist using the F4DE scorer from NIST Example: $0 [additional-parameters] where the most important additional parameters can be: - --extraid #for using, when a non-default kws tasks are setup + --extraid #for using, when a non-default kws tasks are setup (using the kws_setup.sh --extraid) for a kaldi-single data-dir --kwlist #allows for an alternative kwlist -- if not set, the default kwlist is taken from - --f4de-prefix #allows for scoring the same results using + --f4de-prefix #allows for scoring the same results using different kwlists and storing them in the same dir " echo $0 $@ @@ -72,8 +72,9 @@ done echo KWSEval -e $ecf -r $rttm -t $kwlist \ -s $kwsoutputdir/kwslist.xml -c -o -b -d -f $kwsoutputdir -KWSEval -e $ecf -r $rttm -t $kwlist \ - -s $kwsoutputdir/kwslist.xml -c -o -b -d -f ${kwsoutputdir}${f4de_prefix} || exit 1; +KWSEval -e $ecf -r $rttm -t $kwlist -a --zGlobalMeasures MAP \ + --zGlobalMeasures MAPpct --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -s $kwsoutputdir/kwslist.xml -c -o -b -d -f ${kwsoutputdir}${f4de_prefix} || exit 1; duration=`cat ${kwsoutputdir}${f4de_prefix}/sum.txt | grep TotDur | cut -f 3 -d '|' | sed "s/\s*//g"` diff --git a/egs/babel/s5c/local/kws_search.sh b/egs/babel/s5c/local/kws_search.sh index 4b275048e0e..9e998d6c3f9 100755 --- a/egs/babel/s5c/local/kws_search.sh +++ b/egs/babel/s5c/local/kws_search.sh @@ -10,7 +10,7 @@ help_message="$(basename $0): do keyword indexing and search. data-dir is assum Usage: $(basename $0) " -# Begin configuration section. +# Begin configuration section. #acwt=0.0909091 min_lmwt=7 max_lmwt=17 @@ -101,7 +101,7 @@ if [ ! -z "$model" ]; then else model_flags= fi - + if [ $stage -le 0 ] ; then if [ ! -f $indices_dir/.done.index ] ; then @@ -109,8 +109,8 @@ if [ $stage -le 0 ] ; then for lmwt in `seq $min_lmwt $max_lmwt` ; do indices=${indices_dir}_$lmwt mkdir -p $indices - - acwt=`perl -e "print (1.0/$lmwt);"` + + acwt=`perl -e "print (1.0/$lmwt);"` [ ! -z $silence_word ] && silence_opt="--silence-word $silence_word" steps/make_index.sh $silence_opt --cmd "$cmd" --acwt $acwt $model_flags\ --skip-optimization $skip_optimization --max-states $max_states \ diff --git a/egs/babel/s5c/local/kws_setup.sh b/egs/babel/s5c/local/kws_setup.sh index f1036f100de..a6b87ef004f 100755 --- a/egs/babel/s5c/local/kws_setup.sh +++ b/egs/babel/s5c/local/kws_setup.sh @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) # Apache 2.0. -# Begin configuration section. +# Begin configuration section. cmd=run.pl case_insensitive=true subset_ecf= @@ -18,7 +18,7 @@ silence_word= # Optional silence word to insert (once) between words of the tra echo "$0 $@" # Print the command line for logging -set -e +set -e set -u set -o pipefail @@ -26,13 +26,13 @@ help_message="$0: Initialize and setup the KWS task directory Usage: $0 [rttm-file] allowed switches: - --subset-ecf /path/to/filelist # The script will subset the ecf file + --subset-ecf /path/to/filelist # The script will subset the ecf file # to contain only the files from the filelist --rttm-file /path/to/rttm # the preferred way how to specify the rttm - # the older way (as an in-line parameter is + # the older way (as an in-line parameter is # obsolete and will be removed in near future --case-insensitive # Shall we be case-sensitive or not? - # Please not the case-sensitivness depends + # Please not the case-sensitivness depends # on the shell locale! --use-icu # Use the ICU uconv binary to normalize casing --icu-transform # When using ICU, use this transliteration @@ -85,13 +85,13 @@ fi mkdir -p $kwsdatadir if [ -z $subset_ecf ] ; then - test -f $kwsdatadir/ecf.xml && rm -f $kwsdatadir/ecf.xml + test -f $kwsdatadir/ecf.xml && rm -f $kwsdatadir/ecf.xml cp "$ecf_file" $kwsdatadir/ecf.xml || exit 1 else local/make_ecf_subset.sh $subset_ecf $ecf_file > $kwsdatadir/ecf.xml fi -if $kwlist_wordlist ; then +if $kwlist_wordlist ; then ( echo '' awk '{ printf(" \n", $1); diff --git a/egs/babel/s5c/local/lattice_to_ctm.sh b/egs/babel/s5c/local/lattice_to_ctm.sh index 08a1b5889a7..5fbde42d237 100755 --- a/egs/babel/s5c/local/lattice_to_ctm.sh +++ b/egs/babel/s5c/local/lattice_to_ctm.sh @@ -39,8 +39,7 @@ if [ -z "$model" ] ; then fi -for f in $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do +for f in $lang/words.txt $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; done @@ -49,17 +48,31 @@ name=`basename $data`; # e.g. eval2000 mkdir -p $dir/scoring/log if [ $stage -le 0 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - set -e -o pipefail \; \ - mkdir -p $dir/score_LMWT/ '&&' \ - lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ - lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ - lattice-prune --beam=$beam ark:- ark:- \| \ - lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ - lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ - utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; + if [ ! -f $lang/phones/word_boundary.int ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + fi fi if [ $stage -le 1 ]; then @@ -76,12 +89,12 @@ if [ $stage -le 1 ]; then grep -v -E '' | \ perl -e '@list = (); %list = (); while(<>) { - chomp; - @col = split(" ", $_); + chomp; + @col = split(" ", $_); push(@list, $_); - $key = "$col[0]" . " $col[1]"; + $key = "$col[0]" . " $col[1]"; $list{$key} = 1; - } + } foreach(sort keys %list) { $key = $_; foreach(grep(/$key/, @list)) { diff --git a/egs/babel/s5c/local/lattice_to_ctm_syllable.sh b/egs/babel/s5c/local/lattice_to_ctm_syllable.sh deleted file mode 100755 index 7165a7a04e5..00000000000 --- a/egs/babel/s5c/local/lattice_to_ctm_syllable.sh +++ /dev/null @@ -1,115 +0,0 @@ -#!/bin/bash -# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. - -# begin configuration section. -cmd=run.pl -stage=0 -decode_mbr=true -beam=4 # Use a fairly narrow beam because lattice-align-words is slow-ish. -word_ins_penalty=0.5 -min_lmwt=7 -max_lmwt=17 -cleanup=true -model= - -#end configuration section. - -#debugging stuff -echo $0 $@ - -[ -f ./path.sh ] && . ./path.sh -[ -f ./cmd.sh ] && . ./cmd.sh -. parse_options.sh || exit 1; - -if [ $# -ne 4 ]; then - echo "Usage: $0 [options] " && exit; - echo "This is as lattice_to_ctm.sh, but for syllable-based systems where we want to" - echo "obtain word-level ctms. Here, is a directory like data/local/w2s," - echo "as created by run-6-syllables.sh. It contains:" - echo " G.fst, Ldet.fst, words.txt, word_align_lexicon.int" - echo " Options:" - echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." - echo " --stage (0|1) # (createCTM | filterCTM )." - exit 1; -fi - -data=$1 -lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. -w2sdir=$3 -dir=$4 - -if [ -z "$model" ] ; then - model=`dirname $dir`/final.mdl # Relative path does not work in some cases - #model=$dir/../final.mdl # assume model one level up from decoding dir. - #[ ! -f $model ] && model=`(set +P; cd $dir/../; pwd)`/final.mdl -fi - -for f in $lang/words.txt $lang/phones/word_boundary.int \ - $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz \ - $w2sdir/{G.fst,Ldet.fst,words.txt,word_align_lexicon.int}; do - [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; -done - -name=`basename $data`; # e.g. eval2000 - -mkdir -p $dir/scoring/log - -# we are counting the LM twice since we have both the original, syllable-level LM -# and the new, word-level one, so we scale by 0.5 to get a reasonably scaled -# LM cost. - -if [ $stage -le 0 ]; then - nj=`cat $dir/num_jobs` || exit 1; - $cmd JOB=1:$nj $dir/scoring/log/get_word_lats.JOB.log \ - lattice-compose "ark:gunzip -c $dir/lat.JOB.gz|" $w2sdir/Ldet.fst ark:- \| \ - lattice-determinize ark:- ark:- \| \ - lattice-compose ark:- $w2sdir/G.fst ark:- \| \ - lattice-scale --lm-scale=0.5 ark:- "ark:|gzip -c >$dir/wlat.JOB.gz" || exit 1; -fi - -if [ $stage -le 1 ]; then - $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ - mkdir -p $dir/score_LMWT/ '&&' \ - lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/wlat.*.gz|" ark:- \| \ - lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ - lattice-prune --beam=$beam ark:- ark:- \| \ - lattice-push ark:- ark:- \| \ - lattice-align-words-lexicon --max-expand=10 --output-if-empty=true $w2sdir/word_align_lexicon.int $model ark:- ark:- \| \ - lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ - utils/int2sym.pl -f 5 $w2sdir/words.txt \| \ - utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ - '>' $dir/score_LMWT/$name.ctm || exit 1; -fi - -if [ $stage -le 2 ]; then - # Remove some stuff we don't want to score, from the ctm. - for x in $dir/score_*/$name.ctm; do - cp $x $x.bkup1; - cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ - grep -v -E '|%HESITATION|\(\(\)\)' | \ - grep -v -E '' | \ - grep -v -E '' | \ - grep -v -E '' | \ - grep -v -E '' | \ - grep -v -E '' | \ - perl -e '@list = (); %list = (); - while(<>) { - chomp; - @col = split(" ", $_); - push(@list, $_); - $key = "$col[0]" . " $col[1]"; - $list{$key} = 1; - } - foreach(sort keys %list) { - $key = $_; - foreach(grep(/$key/, @list)) { - print "$_\n"; - } - }' > $x; - done -fi - -$cleanup && rm $dir/wlat.*.gz - -echo "Lattice2CTM finished on " `date` -exit 0 diff --git a/egs/babel/s5c/local/make_L_align.sh b/egs/babel/s5c/local/make_L_align.sh index 03d1ad517fe..50e46a00493 100755 --- a/egs/babel/s5c/local/make_L_align.sh +++ b/egs/babel/s5c/local/make_L_align.sh @@ -20,7 +20,7 @@ set -e if [ $# -ne 3 ]; then echo "This is a simple script that will generate the L_align.fst" - echo "The FST L_align.fst is used for getting the force-aligned " + echo "The FST L_align.fst is used for getting the force-aligned " echo "utterances" echo "The script automaticky recognizes the probabilistic lexicon" echo "is used and will use the correct file" @@ -39,7 +39,7 @@ silphone=`cat $dir/phones/optional_silence.txt` || exit 1; # Create lexicon with alignment info if [ -f $tmpdir/lexicon.txt ] ; then cat $tmpdir/lexicon.txt | \ - awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' + awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' elif [ -f $tmpdir/lexiconp.txt ] ; then cat $tmpdir/lexiconp.txt | \ awk '{printf("%s #1 ", $1); for (n=3; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' diff --git a/egs/babel/s5c/local/make_ecf_subset.sh b/egs/babel/s5c/local/make_ecf_subset.sh index 53bddcbc839..9bdd95c3e27 100755 --- a/egs/babel/s5c/local/make_ecf_subset.sh +++ b/egs/babel/s5c/local/make_ecf_subset.sh @@ -8,7 +8,7 @@ echo "$0 $@" 1>&2 # Print the command line for logging [ -f ./path.sh ] && . ./path.sh; # source the path. . parse_options.sh || exit 1; -help_message="$0: generates an subset ecf file for spoken term detection evaluation. +help_message="$0: generates an subset ecf file for spoken term detection evaluation. The first parameter specifies the descriptor of the subset, the second parameter specifies the original ecf file. The file will be generated in the kws subdirectory of the directory @@ -47,6 +47,6 @@ duration=`grep -F -f $list_file $src_ecf_file | sed "s/.*dur=\"\([0-9.][0-9.]*\) # Output is produced here: ( grep "" ) diff --git a/egs/babel/s5c/local/make_lexicon_fst_special.pl b/egs/babel/s5c/local/make_lexicon_fst_special.pl index 976c28c029c..3df6e7a9527 100755 --- a/egs/babel/s5c/local/make_lexicon_fst_special.pl +++ b/egs/babel/s5c/local/make_lexicon_fst_special.pl @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (author: Daniel Povey) # makes lexicon FST -- special version only for use in keyword search -# for allowing optional silences between words. This version has +# for allowing optional silences between words. This version has # no pron-probs involved, and # does support an optional silence, but this silence is only allowed # between words (where it may occur an arbitrary number of times), diff --git a/egs/babel/s5c/local/make_lexicon_subset.sh b/egs/babel/s5c/local/make_lexicon_subset.sh index c2bf0e21623..1e77fcaa2b9 100755 --- a/egs/babel/s5c/local/make_lexicon_subset.sh +++ b/egs/babel/s5c/local/make_lexicon_subset.sh @@ -10,9 +10,9 @@ input_lexicon_file=$2 output_lexicon_file=$3 ( - #find $dev_data_dir/transcription/ -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' + #find $dev_data_dir/transcription/ -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' find $transcriptions -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' -) | sort -u | awk ' +) | sort -u | awk ' BEGIN { while(( getline line< ARGV[2] ) > 0 ) { split(line, e, "\t") @@ -20,7 +20,7 @@ output_lexicon_file=$3 } FILENAME="-" i=0 - + while(( getline word< ARGV[1] ) > 0 ) { if (word in LEXICON) print LEXICON[word] diff --git a/egs/babel/s5c/local/make_syllable_lexicon.sh b/egs/babel/s5c/local/make_syllable_lexicon.sh deleted file mode 100755 index 118845982b9..00000000000 --- a/egs/babel/s5c/local/make_syllable_lexicon.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash - - -help="Usage: $(basename $0) - E.g. $(basename $0) data/local/lexicon.txt word2syllable_lexicon.txt data/local/syllables/lexicon.txt - Here, is the text-form lexicon but with tabs separating the syllables, e.g. - WORD w o rr d - has entries of the form - WORD w/o rr/d - has entries of the form - w/o w o" - -# config vars: -pron_probs=false # If you set --pron-probs true, will expect pron-prob on input lexicon and produce - # pron-probs on word2syllable lexicon. -# end configs. -. utils/parse_options.sh - -if [ $# != 3 ]; then - echo $help 2>&1; - exit 1; -fi - -lex_in=$1 -w2s_lex_out=$2 -s2p_lex_out=$3 - -[ ! -f $lex_in ] && echo "No such file $lex_in" && exit 1; -mkdir -p `dirname $w2s_lex_out` -mkdir -p `dirname $s2p_lex_out` - -cat $lex_in | perl -e ' - ($w2s, $pron_probs) = @ARGV; - open(W2S, ">$w2s") || die "opening word to syllable lexicon"; - $saw_tabs = 0; - while() { - chop; - if ($pron_probs eq "true") { - m:(\S+)\s+(\S+)\s+(.+): || die "Bad line $_ (note: have pron probs)."; - $word = $1; - $prob = $2; - $pron = $3; - ($prob > 0.0 && $prob <= 1.0) || die "Bad pron-prob $prob in line $_"; - print W2S "$word $prob"; - } else { - m:(\S+)\s+(.+): || die "Bad line $_ (note: do not have pron probs)."; - $word = $1; - $pron = $2; - print W2S "$word"; - } - @A = split("\t", $pron); - @A >= 1 || die "Bad lexicon line $_\n"; - if (@A > 1) { $saw_tabs = 1; } - foreach $s (@A) { - $s =~ s/^\s+//; # Remove leading space. - $s =~ s/\s+$//; # Remove trailing space. - if ($s ne "") { - $s =~ m:/: && die "slash (/) present in syllable $s (not allowed)\n"; - $t = join("/", split(" ", $s)); # replace spaces with / - print W2S " $t"; - print "$t $s\n"; - } - } - print W2S "\n"; - } - if (! $saw_tabs) { - die "You seem to be using as input to this script, a lexicon that does not have " . - "syllables separated by tabs."; - } - ' $w2s_lex_out $pron_probs | sort | uniq > $s2p_lex_out || exit 1; - -exit 0; diff --git a/egs/babel/s5c/local/naive_comb.pl b/egs/babel/s5c/local/naive_comb.pl index e49ac972169..74ad20d84e3 100755 --- a/egs/babel/s5c/local/naive_comb.pl +++ b/egs/babel/s5c/local/naive_comb.pl @@ -102,7 +102,7 @@ sub KwslistTimeCompare { } } else { $a->[0] cmp $b->[0]; - } + } } sub KwslistTimeSort { @@ -124,7 +124,7 @@ sub KwslistTimeSort { my $method = 1; my $power = 0.5; -GetOptions('tolerance=f' => \$tolerance, +GetOptions('tolerance=f' => \$tolerance, 'method=i' => \$method, 'power=f' => \$power, 'inv-power=f' => sub { (my $opt, my $val) = @_; $power = 1.0/$val;}); diff --git a/egs/babel/s5c/local/ndx2flist.pl b/egs/babel/s5c/local/ndx2flist.pl index 48fc3dec101..c5f676affcd 100755 --- a/egs/babel/s5c/local/ndx2flist.pl +++ b/egs/babel/s5c/local/ndx2flist.pl @@ -21,7 +21,7 @@ #;; #;; Index for WSJ0 SI-short Sennheiser training data #;; Data is read WSJ sentences, Sennheiser mic. -#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts +#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts #;; per speaker TI) = 7236 utts #;; #11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1 @@ -37,7 +37,7 @@ foreach $fn (@ARGV) { $fn =~ m:.+/([0-9\.\-]+)/?$: || die "Bad command-line argument $fn\n"; - $disk_id=$1; + $disk_id=$1; $disk_id =~ tr/-\./__/; # replace - and . with - so 11-10.1 becomes 11_10_1 $fn =~ s:/$::; # Remove final slash, just in case it is present. $disk2fn{$disk_id} = $fn; diff --git a/egs/babel/s5c/local/nist_eval/create_compound_set.sh b/egs/babel/s5c/local/nist_eval/create_compound_set.sh index 63de46f6106..1e745d1ebba 100755 --- a/egs/babel/s5c/local/nist_eval/create_compound_set.sh +++ b/egs/babel/s5c/local/nist_eval/create_compound_set.sh @@ -3,7 +3,7 @@ #Simple script to create compound set info that will allow for more automatized #work with the shadow set. # -#The notion of shadow data set came from the need to be able to verify +#The notion of shadow data set came from the need to be able to verify #the output of the recognizer during decoding the evaluation data. #The idea is simple -- instead of decoding just the eval data, decode both #eval data plus the dev data (or at least some portion of it) interleved diff --git a/egs/babel/s5c/local/nist_eval/export_systems.sh b/egs/babel/s5c/local/nist_eval/export_systems.sh index 7e514bcc077..d0af608416c 100755 --- a/egs/babel/s5c/local/nist_eval/export_systems.sh +++ b/egs/babel/s5c/local/nist_eval/export_systems.sh @@ -2,11 +2,11 @@ set -e set -o pipefail -. ./cmd.sh; . ./path.sh; +. ./cmd.sh; . ./path.sh; #( -#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* #bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* #) & #bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/tri6*_nnet*/decode_shadow.uem* @@ -14,9 +14,9 @@ set -o pipefail ( bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* -#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* ) & -bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/tri6*_nnet*/decode_shadow.uem +bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/tri6*_nnet*/decode_shadow.uem wait wait diff --git a/egs/babel/s5c/local/nist_eval/filter_data.sh b/egs/babel/s5c/local/nist_eval/filter_data.sh index f36903035b6..8576b93fef8 100755 --- a/egs/babel/s5c/local/nist_eval/filter_data.sh +++ b/egs/babel/s5c/local/nist_eval/filter_data.sh @@ -38,7 +38,7 @@ outputname=$name while (( "$#" )); do resultdir=$1;shift - echo "Processing data directory $resultdir" + echo "Processing data directory $resultdir" [ ! -d $resultdir ] && echo "Decode dir $resultdir does not exist!" && exit 1; diff --git a/egs/babel/s5c/local/nist_eval/get_training_times.sh b/egs/babel/s5c/local/nist_eval/get_training_times.sh index 2b92dcefcdc..f5b0012c2f2 100755 --- a/egs/babel/s5c/local/nist_eval/get_training_times.sh +++ b/egs/babel/s5c/local/nist_eval/get_training_times.sh @@ -24,8 +24,8 @@ function process { replace+="\t" done - ( - eval `grep "group=all"` + ( + eval `grep "group=all"` echo -n "threads=$total_threads" echo -n " cpu_time=$total_cpu_time wall_time=$clock_time" echo -n " human_cpu_time="`convertsecs $total_cpu_time` @@ -43,17 +43,17 @@ local/summarize_logs.pl $dir/exp/make_*/*train*/ | process if [ -d $dir/data/local/extend ] ; then legend "Extending the lexicon" - local/summarize_logs.pl $dir/data/local/extend/tmp/log | process + local/summarize_logs.pl $dir/data/local/extend/tmp/log | process fi legend "Training upto stage tri5" -local/summarize_logs.pl $dir/exp/mono*/log $dir/exp/tri{1..5}/log $dir/exp/tri{1..4}_ali*/log | process +local/summarize_logs.pl $dir/exp/mono*/log $dir/exp/tri{1..5}/log $dir/exp/tri{1..4}_ali*/log | process legend "SGMM2 stage training" -local/summarize_logs.pl $dir/exp/ubm5/log $dir/exp/sgmm5/log $dir/exp/tri5_ali/log | process +local/summarize_logs.pl $dir/exp/ubm5/log $dir/exp/sgmm5/log $dir/exp/tri5_ali/log | process legend "SGMM2+bMMI stage training" -local/summarize_logs.pl $dir/exp/sgmm5_*/log $dir/exp/ubm5/log $dir/exp/sgmm5_denlats/log/* | process +local/summarize_logs.pl $dir/exp/sgmm5_*/log $dir/exp/ubm5/log $dir/exp/sgmm5_denlats/log/* | process nnet=tri6_nnet [ ! -d $dir/exp/$nnet ] && nnet=tri6b_nnet diff --git a/egs/babel/s5c/local/nist_eval/make_release.sh b/egs/babel/s5c/local/nist_eval/make_release.sh index ce784431a5c..aff89f92846 100755 --- a/egs/babel/s5c/local/nist_eval/make_release.sh +++ b/egs/babel/s5c/local/nist_eval/make_release.sh @@ -57,7 +57,7 @@ function export_file { else echo "$source_file -> $target_file" fi - + else echo "The file is already there, not doing anything. Either change the version (using --version), or delete that file manually)" exit 1 @@ -72,7 +72,7 @@ function export_kws_file { fixed_xml=$2 kwlist=$3 export_xml=$4 - + echo "Exporting KWS $source_xml as `basename $export_xml`" if [ -f $source_xml ] ; then cp $source_xml $fixed_xml.bak @@ -110,7 +110,7 @@ function find_best_stt_result { local dir=$1 local mask=$2 local record=`(find $dir -name "*.ctm.sys" -path "$mask" -not -ipath "*rescore*" | xargs grep Avg) | sed 's/|//g' | column -t | sort -n -k 9 | head -n 1` - + echo $record >&2 local file=`echo $record | awk -F ":" '{print $1}'` #echo $file >&2 @@ -200,7 +200,7 @@ function figure_out_scase { if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then local basnam=${ecf%%.ecf.xml} local scase=`echo $basnam | awk -F _ '{print $2}'` - + if [[ $scase =~ conv-dev(\..*)? ]]; then echo "BaDev" elif [[ $scase =~ conv-eval(\..*)? ]]; then @@ -211,7 +211,7 @@ function figure_out_scase { echo "BaDev" return 1 fi - else + else echo "WARNING: The ECF file $ecf is probably not an official file" >&2 echo "WARNING: Does not match the mask IARPA-babel.*.ecf.xml" >&2 echo "BaDev" @@ -225,7 +225,7 @@ function figure_out_partition { if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then local basnam=${ecf%%.ecf.xml} local scase=`echo $basnam | awk -F _ '{print $2}'` - + if [[ $scase =~ conv-dev(\..*)? ]]; then echo "conv-dev" elif [[ $scase =~ conv-eval(\..*)? ]]; then @@ -235,7 +235,7 @@ function figure_out_partition { echo "conv-dev" return 1 fi - else + else echo "WARNING: The ECF file $ecf is probably not an official file" >&2 echo "conv-dev" return 1 @@ -264,7 +264,7 @@ fi #data=data/shadow.uem dirid=`basename $data` kws_tasks="kws " -[ -f $data/extra_kws_tasks ] && kws_tasks+=`cat $data/extra_kws_tasks | awk '{print $1"_kws"}'` +[ -f $data/extra_kws_tasks ] && kws_tasks+=`cat $data/extra_kws_tasks | awk '{print $1"_kws"}'` [ -d $data/compounds ] && compounds=`ls $data/compounds` if [ -z "$compounds" ] ; then @@ -295,7 +295,7 @@ else submit_to_google $best_one $ATWV $MTWV ) || echo "Submission failed!" - + for compound in $compounds ; do compound_best_one=`echo $best_one | sed "s:$master/${kws}_:$compound/${kws}_:g"` echo "From ($kws) $best_one going to $compound_best_one" diff --git a/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh index 760d7ee80d5..3b12222e13a 100755 --- a/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh +++ b/egs/babel/s5c/local/nnet2/get_egs_semi_supervised.sh @@ -7,7 +7,7 @@ # This script, which will generally be called from other neural-net training # scripts, extracts the training examples used to train the neural net (and also # the validation examples used for diagnostics), and puts them in separate archives. -# This is similar to the script steps/nnet2/get_egs.sh, but this also extracts +# This is similar to the script steps/nnet2/get_egs.sh, but this also extracts # frames from unsupervsied data. Decode directory for unsupervised data which # has the best path done along with posteriors (can be done using local/combine_posteriors.sh) @@ -25,15 +25,15 @@ samples_per_iter=400000 # each iteration of training, see this many samples # per job. This is just a guideline; it will pick a number # that divides the number of samples in the entire data. transform_dir_sup= # If supplied, overrides alidir -transform_dir_unsup= +transform_dir_unsup= num_jobs_nnet=16 # Number of neural net jobs to run in parallel stage=-10 -io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. splice_width=4 # meaning +- 4 frames on each side for second LDA spk_vecs_dir_sup= spk_vecs_dir_unsup= random_copy=false -weight_threshold=0.7 # Threshold on confidence factor of an unsupervised data +weight_threshold=0.7 # Threshold on confidence factor of an unsupervised data # frame for it to not be ignored supervised_copies=3 # Make x copies of supervised data. use_frame_selection=true @@ -70,7 +70,7 @@ if [ $# != 6 ]; then echo " --supervised-copies <#copies|3> # Make copies of supervised data" echo " --transform-dir-sup # Directory with transforms for supervised training data" echo " --transform-dir-unsup # Directory with transforms for unsupervised training data" - + exit 1; fi @@ -109,7 +109,7 @@ cp $alidir/tree $dir awk '{print $1}' $data_sup/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset > $dir/valid_uttlist -# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately if [ -f $data_sup/utt2uniq ]; then echo "File $data_sup/utt2uniq exists, so augmenting valid_uttlist to" echo "include all perturbed versions of the same 'real' utterances." @@ -121,7 +121,7 @@ if [ -f $data_sup/utt2uniq ]; then rm $dir/uniq2utt $dir/valid_uttlist.tmp fi -# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately awk '{print $1}' $data_sup/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \ head -$num_utts_subset > $dir/train_subset_uttlist @@ -137,7 +137,7 @@ if [ "$norm_vars" != "$norm_vars_unsup" ]; then fi cp $alidir/norm_vars $dir 2>/dev/null -## Set up features. +## Set up features. if [ -z $feat_type ]; then if [ -f $alidir/final.mat ] && [ ! -f $transform_dir_sup/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi fi @@ -150,7 +150,7 @@ case $feat_type in valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` #splice_opts_unsup=`cat $latdir/../splice_opts 2>/dev/null` #if [ "$splice_opts" -ne "$splice_opts_unsup" ]; then @@ -159,14 +159,14 @@ case $feat_type in # exit 1 #fi cp $alidir/splice_opts $dir/splice_opts 2>/dev/null - + #if [ "`diff $alidir/final.mat $latdir/../final.mat &> /dev/null; echo $?`" -ne "0" ]; then # echo "ERROR: Features mismatch for supervised and unsupervised data!" # echo "LDA matrices $alidir/final.mat for supervised data and $latdir/../final.mat for unsupervised data don't match" # exit 1 #fi - cp $alidir/final.mat $dir + cp $alidir/final.mat $dir feats_sup="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata_sup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_sup/JOB/utt2spk scp:$sdata_sup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" feats_unsup="ark,s,cs:cat $sdata_unsup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_unsup/JOB/utt2spk scp:$sdata_unsup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" @@ -309,18 +309,18 @@ if [ $stage -le 3 ]; then for (( i=0; i \$fragMarkers, - "oov=s" => \$OOV_symbol, + "oov=s" => \$OOV_symbol, "vocab=s" => \$vocabFile, "icu-transform=s" => \$icu_transform, "get-whole-transcripts=s" => \$get_whole_transcripts @@ -112,7 +112,7 @@ print STDERR ("\tLimiting transcriptions to words in $vocabFile\n"); print STDERR ("\tMapping OOV tokens to \"$OOV_symbol\"\n"); print STDERR ("\tif they remain OOV even after removing [$fragMarkers] from either end\n") if ($fragMarkers); - } + } print STDERR ("$0 ADVICE: Use full path for the Input Directory\n") unless ($inDir=~m:^/:); } else { print STDERR ("Usage: $0 [--options] InputDir OutputDir\n"); @@ -295,7 +295,7 @@ } else { print STDERR ("$0 ERROR: No .txt files found $TranscriptionDir\n"); exit(1); - } + } } else { print STDERR ("$0 ERROR: No directory named $TranscriptionDir\n"); exit(1); @@ -322,8 +322,8 @@ $SampleRate = 8000; #default while ($#Info>=0) { $line = shift @Info; - $SampleCount = $1 if ($line =~ m:sample_count -i (\d+):); - $SampleRate = $1 if ($line =~ m:sample_rate -i (\d+):); + $SampleCount = $1 if ($line =~ m:sample_count -i (\d+):); + $SampleRate = $1 if ($line =~ m:sample_rate -i (\d+):); } if ($SampleCount<0) { # Unable to extract a valid duration from the sphere header @@ -342,7 +342,7 @@ print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); } else { print STDERR ("$0 NOTICE: No .sph files in $AudioDir\n"); - } + } @AudioFiles = `ls ${AudioDir}/*.wav`; if ($#AudioFiles >= 0) { @@ -378,8 +378,8 @@ print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); } else { print STDERR ("$0 NOTICE: No .wav files in $AudioDir\n"); - } - + } + if ( $#waveformName == 0 ) { print STDERR ("$0 ERROR: No audio files found!"); } diff --git a/egs/babel/s5c/local/prepare_lexicon.pl b/egs/babel/s5c/local/prepare_lexicon.pl index 721e56a0dcf..ff128f07637 100755 --- a/egs/babel/s5c/local/prepare_lexicon.pl +++ b/egs/babel/s5c/local/prepare_lexicon.pl @@ -27,10 +27,10 @@ # 㓤 k_1 i:_1 t_1 # 兄妹 h_1 i:_1 N_1 m_2 u:j_2 # 兄妹 h_1 i:_1 N_1 m_6 u:j_6 -# +# # # Write only one pronunciation per line -# Transfer any tags, prefixed by underscores, to phones in the syllable +# Transfer any tags, prefixed by underscores, to phones in the syllable # Remove the syllable boundary markers, given by periods or pound signs # # NOTE: The Romainzation is present only for some languages. See -r option. @@ -46,7 +46,7 @@ $icu_transform = ""; $phonemap=""; # -# - nonsilence_phones.txt: tagged phones from the new lexicon +# - nonsilence_phones.txt: tagged phones from the new lexicon # # - optional_silence.txt: phones used to model silence in acoustic training # @@ -61,12 +61,12 @@ # ############################################################################### -GetOptions("add=s" => \$nsWordsFile, - "oov=s" => \$OOV_symbol, - "romanized!" => \$romanized, - "sil=s" => \$sil, +GetOptions("add=s" => \$nsWordsFile, + "oov=s" => \$OOV_symbol, + "romanized!" => \$romanized, + "sil=s" => \$sil, "icu-transform=s" => \$icu_transform, - "phonemap=s" => \$phonemap + "phonemap=s" => \$phonemap ); if ($#ARGV == 1) { @@ -165,7 +165,7 @@ $syllable =~ s:\s+: :g; @original_phones = split(" ", $syllable); @substituted_original_phones=(); - + foreach $phone (@original_phones) { if (defined $phonemap_hash{$phone} ) { #print "Sub: $phone => " . join (' ', @{$phonemap_hash{$phone}}) . "\n"; @@ -205,7 +205,7 @@ # It is a phone if ( $substituted_phones{phone} ) { die "ERROR, the $new_phone and $phone are both existing phones, so we cannot do automatic map!"; - } + } $is_original_phone{$phone} = "$phone"; $new_phones .= " $phone"; } @@ -277,7 +277,7 @@ && print STDERR ("$0: Wrote $numProns pronunciations to $outLex\n"); ############################################################################### -# - nonsilence_phones.txt: tagged phones from the new lexicon, 1 phone/line +# - nonsilence_phones.txt: tagged phones from the new lexicon, 1 phone/line ############################################################################### foreach $phone (sort keys %is_new_phone) { diff --git a/egs/babel/s5c/local/prepare_stm.pl b/egs/babel/s5c/local/prepare_stm.pl index edf1b43676d..b4daec585e3 100755 --- a/egs/babel/s5c/local/prepare_stm.pl +++ b/egs/babel/s5c/local/prepare_stm.pl @@ -92,7 +92,7 @@ @tokens = split(/\s+/, $line); unless ($#tokens == 3) { $num_failed_parses+=1; - print STDERR "$0: Couldn't parse line $. in $segmentsFile\n" + print STDERR "$0: Couldn't parse line $. in $segmentsFile\n" if ($num_failed_parses == 1); print STDERR ("\tLine: $line") if ($num_failed_parses le $num_failed_parses_max); @@ -174,7 +174,7 @@ $waveform{$recordingID} =~ s:.+/::; # remove path prefix $waveform{$recordingID} =~ s:\.(sph|wav)\s*$::; # remove file extension $channel{$recordingID} = 1 # Default - unless (exists $channel{$recordingID}); + unless (exists $channel{$recordingID}); ++$numRecordings; } close(SCP); @@ -321,7 +321,7 @@ $w =~ s:([^\x00-\x7F])(?=[^\x00-\x7F]):$1 :g; # split adjacent non-ASCII chars print CHARSTM ("$w\n"); } -close(CHARSTM); +close(CHARSTM); close(STM); print STDERR ("$0: Wrote char.stm file $charStmFile\n"); diff --git a/egs/babel/s5c/local/resegment/evaluate_segmentation.pl b/egs/babel/s5c/local/resegment/evaluate_segmentation.pl index 06a762d7762..9d865cca8c9 100755 --- a/egs/babel/s5c/local/resegment/evaluate_segmentation.pl +++ b/egs/babel/s5c/local/resegment/evaluate_segmentation.pl @@ -1,6 +1,6 @@ #!/usr/bin/env perl -# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar +# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar # Apache 2.0 ################################################################################ diff --git a/egs/babel/s5c/local/resegment/generate_segments.sh b/egs/babel/s5c/local/resegment/generate_segments.sh index 01917c3d4e9..95e88deb87d 100755 --- a/egs/babel/s5c/local/resegment/generate_segments.sh +++ b/egs/babel/s5c/local/resegment/generate_segments.sh @@ -37,14 +37,14 @@ if [ $# -ne 5 ]; then echo " --segmentation-opts '--opt1 opt1val --opt2 opt2val' # options for segmentation.py" echo " --reference-rttm # Reference RTTM file that will be used for analysis of the segmentation" echo " --get-text (true|false) # Convert text from base data directory to correspond to the new segments" - echo + echo echo "e.g.:" echo "$0 data/dev10h data/lang exp/tri4b_seg exp/tri4b_resegment_dev10h" exit 1 fi datadir=$1 # The base data directory that contains at least the files wav.scp and reco2file_and_channel -lang=$2 +lang=$2 model_dir=$3 # Segmentation model directory created using local/resegment/run_segmentation_train.sh temp_dir=$4 # Temporary directory to store some intermediate files during segmentation output_dir=$5 # The target directory @@ -73,18 +73,18 @@ if [ $stage -le 1 ]; then ali-to-phones --per-frame=true $model_dir/final.mdl ark:- ark,t:- \| \ utils/int2sym.pl -f 2- $lang/phones.txt \| \ gzip -c '>' $temp_dir/pred.JOB.gz || exit 1 - + mkdir -p $temp_dir/pred gunzip -c $temp_dir/pred.*.gz | \ - perl -ne '($file, $phones)=split / /, $_, 2; - open($fh, ">'$temp_dir/pred/'$file.pred" ) or die $!; - print {$fh} "$file $phones"; + perl -ne '($file, $phones)=split / /, $_, 2; + open($fh, ">'$temp_dir/pred/'$file.pred" ) or die $!; + print {$fh} "$file $phones"; close($fh);' || exit 1 fi t2=$(date +%s) total_time=$((total_time + t2 - t1)) -echo "SI decoding done in $((t2-t1)) seconds" +echo "SI decoding done in $((t2-t1)) seconds" ############################################################################### @@ -99,8 +99,8 @@ if ! [ `cat $lang/phones/optional_silence.txt | wc -w` -eq 1 ]; then exit 1; fi -silphone=`cat $lang/phones/optional_silence.txt` -# silphone will typically be "sil" or "SIL". +silphone=`cat $lang/phones/optional_silence.txt` +# silphone will typically be "sil" or "SIL". # 3 sets of phones: 0 is silence, 1 is noise, 2 is speech., ( @@ -127,15 +127,15 @@ local/resegment/segmentation.py --verbose 2 $segmentation_opts \ if [ ! -s $output_dir/segments ] ; then echo "Zero segments created during segmentation process." - echo "That means something failed. Try the cause and re-run!" + echo "That means something failed. Try the cause and re-run!" exit 1 fi t2=$(date +%s) total_time=$((total_time + t2 - t1)) -echo "Resegment data done in $((t2-t1)) seconds" +echo "Resegment data done in $((t2-t1)) seconds" -for file in reco2file_and_channel wav.scp ; do +for file in reco2file_and_channel wav.scp ; do [ ! -f $datadir/$file ] && echo "Expected file $datadir/$file to exist" && exit 1 cp $datadir/$file $output_dir/$file done diff --git a/egs/babel/s5c/local/rttm_to_text.pl b/egs/babel/s5c/local/rttm_to_text.pl index 7312acdb886..d33c71e2f17 100755 --- a/egs/babel/s5c/local/rttm_to_text.pl +++ b/egs/babel/s5c/local/rttm_to_text.pl @@ -64,7 +64,7 @@ sub float_gt { @times = (); $filename = $_filename; } - + #I don't really know what is the distinction between all #of these. Let's throw away the SPEAKER, as it does not #really contain information that is to be found in the transcript @@ -91,12 +91,12 @@ sub float_gt { my $B = $times[-1][0]; my $Aend = $times[-2][1]; my $Bend = $times[-1][1]; - + #print "WARNING: Elements in the RTTM file are not sorted for FILENAME $filename!\n"; #print $times[-2][0] . " " . $times[-2][1] - $times[-2][0]. " " . $times[-2][2] . "\n"; #print $times[-1][0] . " " . $times[-1][1] - $times[-1][0]. " " . $times[-1][2] . "\n"; #print "\n"; - + my @sorted = sort {$a <=> $b} ($A, $B, $Aend, $Bend); #print Dumper(\@sorted); $times[-1][0] = $sorted[0]; @@ -129,7 +129,7 @@ sub float_gt { #if ($segmentname ne "10470_A_20111118_172644_000000" ) { # next; #} - + #print $filename . "\n"; #print Dumper(\@times); diff --git a/egs/babel/s5c/local/run_kws_stt_task.sh b/egs/babel/s5c/local/run_kws_stt_task.sh index 50c96e41035..d622aac9442 100755 --- a/egs/babel/s5c/local/run_kws_stt_task.sh +++ b/egs/babel/s5c/local/run_kws_stt_task.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) # Licensed under the Apache License, Version 2.0 (the "License"); @@ -39,7 +39,7 @@ if [ $(basename $0) == score.sh ]; then fi echo $0 "$@" -. utils/parse_options.sh +. utils/parse_options.sh if [ $# -ne 3 ]; then echo "Usage: $0 [options] " @@ -47,27 +47,29 @@ if [ $# -ne 3 ]; then exit 1; fi -data_dir=$1; +data_dir=$1; lang_dir=$2; -decode_dir=$3; +decode_dir=$3; ##NB: The first ".done" files are used for backward compatibility only ##NB: should be removed in a near future... -if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then - local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ - --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ - $data_dir $lang_dir $decode_dir - - if ! $skip_scoring ; then - local/score_stm.sh --cmd "$cmd" --cer $cer \ - --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ +if ! $skip_stt ; then + if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then + local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ $data_dir $lang_dir $decode_dir + + if ! $skip_scoring ; then + local/score_stm.sh --cmd "$cmd" --cer $cer \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ + $data_dir $lang_dir $decode_dir + fi + touch $decode_dir/.done.score fi - touch $decode_dir/.done.score fi if ! $skip_kws ; then - if [ ! -f $decode_dir/.kws.done ] && [ ! -f $decode_dir/.done.kws ]; then + if [ ! -f $decode_dir/.kws.done ] && [ ! -f $decode_dir/.done.kws ]; then local/kws_search.sh --cmd "$cmd" --max-states ${max_states} \ --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} --skip-scoring $skip_scoring\ --indices-dir $decode_dir/kws_indices $lang_dir $data_dir $decode_dir diff --git a/egs/babel/s5c/local/score_combine.sh b/egs/babel/s5c/local/score_combine.sh index f425b5afc68..7e8af85b2d8 100755 --- a/egs/babel/s5c/local/score_combine.sh +++ b/egs/babel/s5c/local/score_combine.sh @@ -18,9 +18,9 @@ # Script for system combination using minimum Bayes risk decoding. -# This calls lattice-combine to create a union of lattices that have been +# This calls lattice-combine to create a union of lattices that have been # normalized by removing the total forward cost from them. The resulting lattice -# is used as input to lattice-mbr-decode. This should not be put in steps/ or +# is used as input to lattice-mbr-decode. This should not be put in steps/ or # utils/ since the scores on the combined lattice must not be scaled. # begin configuration section. @@ -43,7 +43,7 @@ help_message="Usage: "$(basename $0)" [options] or: "$(basename $0)" data/test data/lang exp/tri1/decode exp/tri2/decode:18 exp/tri3/decode:13 exp/combine Options: --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. - --min-lmwt INT # minumum LM-weight for lattice rescoring + --min-lmwt INT # minumum LM-weight for lattice rescoring --max-lmwt INT # maximum LM-weight for lattice rescoring --lat-weights STR # colon-separated string of lattice weights --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. @@ -70,7 +70,7 @@ decode_dirs=( $@ ) # read the remaining arguments into an array unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir num_sys=${#decode_dirs[@]} # number of systems to combine -#Let the user to set the CTM file name +#Let the user to set the CTM file name #use the data-dir name in case the user doesn't care if [ -z ${ctm_name} ] ; then ctm_name=`basename $data` @@ -94,7 +94,7 @@ for i in `seq 0 $[num_sys-1]`; do offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. decode_dir=`echo $decode_dir | cut -d: -f1` [ -z "$offset" ] && offset=0 - + model=`dirname $decode_dir`/final.mdl # model one level up from decode dir for f in $model $decode_dir/lat.1.gz ; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; @@ -103,7 +103,7 @@ for i in `seq 0 $[num_sys-1]`; do nj=`cat $decode_dir/num_jobs` || exit 1; else if [ $nj != `cat $decode_dir/num_jobs` ]; then - echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" exit 1; fi fi @@ -128,7 +128,7 @@ if [ -z "$lat_weights" ]; then for i in `seq $[$num_sys-1]`; do lat_weights="$lat_weights:1.0"; done fi -if [ $stage -le 0 ]; then +if [ $stage -le 0 ]; then $cmd $parallel_opts LMWT=$min_lmwt:$max_lmwt $dir/log/combine_lats.LMWT.log \ mkdir -p $dir/score_LMWT/ '&&' \ lattice-combine --lat-weights=$lat_weights "${lats[@]}" ark:- \| \ @@ -155,12 +155,12 @@ if [ $stage -le 1 ]; then grep -v -E '' | \ perl -e '@list = (); %list = (); while(<>) { - chomp; - @col = split(" ", $_); + chomp; + @col = split(" ", $_); push(@list, $_); - $key = "$col[0]" . " $col[1]"; + $key = "$col[0]" . " $col[1]"; $list{$key} = 1; - } + } foreach(sort keys %list) { $key = $_; foreach(grep(/$key/, @list)) { diff --git a/egs/babel/s5c/local/score_mbr.sh b/egs/babel/s5c/local/score_mbr.sh index 1c39830b4c7..a86dd5c3f71 100755 --- a/egs/babel/s5c/local/score_mbr.sh +++ b/egs/babel/s5c/local/score_mbr.sh @@ -48,7 +48,7 @@ for inv_acwt in `seq $min_lmwt $max_lmwt`; do done wait; [ -f $dir/.error ] && echo "score_mbr.sh: errror getting MBR outout."; - + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ cat $dir/scoring/LMWT.tra \| \ diff --git a/egs/babel/s5c/local/score_sctk_prune.sh b/egs/babel/s5c/local/score_sctk_prune.sh index a6eca9fd071..09662af57c8 100755 --- a/egs/babel/s5c/local/score_sctk_prune.sh +++ b/egs/babel/s5c/local/score_sctk_prune.sh @@ -73,12 +73,12 @@ if [ $stage -le 1 ]; then grep -v -E '' | \ perl -e '@list = (); %list = (); while(<>) { - chomp; - @col = split(" ", $_); + chomp; + @col = split(" ", $_); push(@list, $_); - $key = "$col[0]" . " $col[1]"; + $key = "$col[0]" . " $col[1]"; $list{$key} = 1; - } + } foreach(sort keys %list) { $key = $_; foreach(grep(/$key/, @list)) { @@ -103,8 +103,8 @@ if [ $stage -le 1 ]; then foreach (@char) { $char = encode("UTF8", $_); $start += $dur; - # printf "$col[0] $col[1] $start $dur $char\n"; - printf "%s %s %.2f %.2f %s %s\n", $col[0], $col[1], $start, $dur, $char, $col[5]; + # printf "$col[0] $col[1] $start $dur $char\n"; + printf "%s %s %.2f %.2f %s %s\n", $col[0], $col[1], $start, $dur, $char, $col[5]; } } }' > $y.char.ctm @@ -122,7 +122,7 @@ if [ $stage -le 2 ]; then cp $data/char.stm $dir/score_LMWT/'&&'\ $ScoringProgram -s -r $dir/score_LMWT/char.stm stm -h $dir/score_LMWT/${name}.char.ctm ctm -o all -o dtl; fi - + # for x in $dir/score_*/*.ctm; do # mv $x.filt $x; # rm -f $x.filt*; diff --git a/egs/babel/s5c/local/score_stm.sh b/egs/babel/s5c/local/score_stm.sh index 2406af4e726..56835109722 100755 --- a/egs/babel/s5c/local/score_stm.sh +++ b/egs/babel/s5c/local/score_stm.sh @@ -48,7 +48,7 @@ data=$1 lang=$2 # This parameter is not used -- kept only for backwards compatibility dir=$3 -set -e +set -e set -o pipefail set -u @@ -82,8 +82,9 @@ if [ $stage -le 0 ] ; then \> $dir/score_LMWT/stm '&&' \ paste -d ' ' \<\(cut -f 1-4 -d ' ' $dir/score_LMWT/${name}.ctm.sorted \) \ \<\(cut -f 5- -d ' ' $dir/score_LMWT/${name}.ctm.sorted \| uconv -f utf8 -t utf8 -x "$icu_transform" \) \ - \> $dir/score_LMWT/${name}.ctm '&&' \ - utils/fix_ctm.sh $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm '&&' \ + \> $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + utils/fix_ctm.sh $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + $SortingProgram sortCTM \<$dir/score_LMWT/${name}.ctm.sorted2 \>$dir/score_LMWT/${name}.ctm '&&' \ $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm \ -n "$name.ctm" -f 0 -D -F -o sum rsum prf dtl sgml -e utf-8 || exit 1 fi diff --git a/egs/babel/s5c/local/shadow_set_kws_search.sh b/egs/babel/s5c/local/shadow_set_kws_search.sh index 76521fda9b6..a67a3a57f6a 100755 --- a/egs/babel/s5c/local/shadow_set_kws_search.sh +++ b/egs/babel/s5c/local/shadow_set_kws_search.sh @@ -13,7 +13,7 @@ help_message="$0: create subset of the input directory (specified as the first d Example: $0 [data-dir2 [data-dir3 [ ...] ]" -# Begin configuration section. +# Begin configuration section. #acwt=0.0909091 min_lmwt=7 max_lmwt=17 @@ -101,8 +101,8 @@ if [ $stage -le 0 ] ; then for lmwt in `seq $min_lmwt $max_lmwt` ; do kwsoutdir=$decodedir/kws_$lmwt mkdir -p $kwsoutdir - - acwt=`perl -e "print (1.0/$lmwt);"` + + acwt=`perl -e "print (1.0/$lmwt);"` steps/make_index.sh --strict $strict --cmd "$cmd" --max-states $max_states\ --acwt $acwt $model_flags --skip-optimization $skip_optimization \ --word_ins_penalty $word_ins_penalty \ @@ -128,14 +128,14 @@ if [ $stage -le 1 ] ; then dirB=$decodedir/`basename $datasetB`/kws_$lmwt mkdir -p $dirA mkdir -p $dirB - + steps/search_index.sh --cmd "$cmd" $kwsdatadir $kwsoutdir || exit 1 [ ! -f $datasetA/kws/utter_id ] && echo "File $datasetA/kws/utter_id must exist!" && exit 1; cat $kwsoutdir/result.* | \ grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ grep "^KW[-a-zA-Z0-9]*-A " | \ - sed 's/^\(KW.*\)-A /\1 /g' > $dirA/results + sed 's/^\(KW.*\)-A /\1 /g' > $dirA/results [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; cat $kwsoutdir/result.* | \ @@ -152,7 +152,7 @@ if [ $stage -le 1 ] ; then cat $kwsoutdir/result.* | \ grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ grep "^KW[-a-zA-Z0-9]*-B " | \ - sed 's/^\(KW.*\)-B /\1 /g' > $dirA/results + sed 's/^\(KW.*\)-B /\1 /g' > $dirA/results [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; cat $kwsoutdir/result.* | \ @@ -192,7 +192,7 @@ if [ $stage -le 3 ] ; then utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationA \ --segments=$datadir/segments --normalize=false --remove-dup=true\ --map-utter=$kwsdatadir/utter_map - $rootdirA/kws_LMWT/kwslist.unnormalized.xml || exit 1 - + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_write_unnormalized.LMWT.log \ set -e';' set -o pipefail';' \ cat $rootdirAB/kws_LMWT/results \| \ @@ -204,15 +204,15 @@ fi echo "Scoring $datasetA" if [ $stage -le 4 ] ; then if [[ (! -x local/kws_score.sh ) || ($skip_scoring == true) ]] ; then - echo "Not scoring, because the file local/kws_score.sh is not present" + echo "Not scoring, because the file local/kws_score.sh is not present" exit 1 elif [ ! -f $datasetA/kws/rttm ] ; then echo "Not scoring, because the file $datasetA/kws/rttm is not present" else $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_scoring.LMWT.log \ - local/kws_score.sh $datasetA $rootdirA/kws_LMWT + local/kws_score.sh $datasetA $rootdirA/kws_LMWT $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_scoring.LMWT.log \ - local/kws_score.sh --kwlist $datasetB/kws/kwlist.xml $datasetA $rootdirAB/kws_LMWT + local/kws_score.sh --kwlist $datasetB/kws/kwlist.xml $datasetA $rootdirAB/kws_LMWT fi fi diff --git a/egs/babel/s5c/local/split_ctms.sh b/egs/babel/s5c/local/split_ctms.sh index efba126a5dd..b24a1380111 100755 --- a/egs/babel/s5c/local/split_ctms.sh +++ b/egs/babel/s5c/local/split_ctms.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) # Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,8 +32,8 @@ echo "$0 $@" set -e set -o pipefail -data=$1; -q=$2; +data=$1; +q=$2; shift; shift; if [ -z $ctm_name ] ; then diff --git a/egs/babel/s5c/local/stm2text.pl b/egs/babel/s5c/local/stm2text.pl index 3ec3806238a..3b069c63554 100755 --- a/egs/babel/s5c/local/stm2text.pl +++ b/egs/babel/s5c/local/stm2text.pl @@ -3,7 +3,7 @@ # Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0. -#This script takes the source STM file and generates the *.txt files which +#This script takes the source STM file and generates the *.txt files which #are usually part of the BABEL delivery #The *.txt files are not the part of the delivery for the evalpart1 subset #The program works as a filter and the only parameter it expects is @@ -12,7 +12,7 @@ #example of usage: # cat data/evalpart1/stm local/stm2text.pl data/raw_evalpart1_data/transcriptions -use strict; +use strict; use warnings; use utf8; @@ -30,7 +30,7 @@ next if ( $filename =~ /;;.*/ ); #$filename =~ s/;;(.*)/$1/ if ( $filename =~ /;;.*/ ); $text = "" if not $text; - + if ( $prev_filename ne $filename ) { #close($OUTPUT) if ( tell(FH) != -1 ); print "$output_dir/$filename.txt\n"; diff --git a/egs/babel/s5c/local/subset_atwv.pl b/egs/babel/s5c/local/subset_atwv.pl index 910703db996..ce6b7043116 100755 --- a/egs/babel/s5c/local/subset_atwv.pl +++ b/egs/babel/s5c/local/subset_atwv.pl @@ -13,7 +13,7 @@ e.g.: subset_atwv.pl keywords.list bsum.txt This script will compute the ATWV for a subset of the original keywords in bsum.txt. -Note that bsum.txt is a file generated by the NIST scoring tool F4DE. keywords.list +Note that bsum.txt is a file generated by the NIST scoring tool F4DE. keywords.list is a list of the keywords that you want to compute the ATWV for. For example: KW101-0001 KW101-0002 @@ -27,7 +27,7 @@ my $subset_name = ""; my $width = 5; GetOptions('subset-name=s' => \$subset_name, - 'width=i' => \$width); + 'width=i' => \$width); @ARGV == 2 || die $Usage; @@ -72,7 +72,7 @@ if (/^Keyword/) {$flag = 1;} my @col; if ($flag == 1) { - # Figure out keywords that don't have occurrences in the search collection + # Figure out keywords that don't have occurrences in the search collection @col = split(/\|/, $_); $col[2] =~ s/^\s+//; $col[2] =~ s/\s+$//; diff --git a/egs/babel/s5c/local/subset_kwslist.pl b/egs/babel/s5c/local/subset_kwslist.pl index 96c2c7a7fdd..361291179ef 100755 --- a/egs/babel/s5c/local/subset_kwslist.pl +++ b/egs/babel/s5c/local/subset_kwslist.pl @@ -29,5 +29,5 @@ } $data->{kw} = \@filtered_kws; my $xml = XMLout($data, RootName=> "kwlist", KeyAttr=>''); -print $xml; +print $xml; exit 0 diff --git a/egs/babel/s5c/local/summarize_logs.pl b/egs/babel/s5c/local/summarize_logs.pl index 4f7fc058f96..e816d57d68f 100755 --- a/egs/babel/s5c/local/summarize_logs.pl +++ b/egs/babel/s5c/local/summarize_logs.pl @@ -23,7 +23,7 @@ sub parse_accounting_entry { $entry= shift @_; @elems = split " ", $entry; - + $time=undef; $threads=undef; foreach $elem (@elems) { @@ -96,7 +96,7 @@ sub parse_accounting_entry { $total_threads=0.0; foreach $fgroup (split_hundreds($fmap{$c})) { $lines=`grep -P "# Accounting:? " $fgroup |sed 's/.* Accounting:* *//g'`; - + #print $lines ."\n"; @entries = split "\n", $lines; diff --git a/egs/babel/s5c/local/syllab/ali_to_syllabs.sh b/egs/babel/s5c/local/syllab/ali_to_syllabs.sh new file mode 100755 index 00000000000..8f0cb88771a --- /dev/null +++ b/egs/babel/s5c/local/syllab/ali_to_syllabs.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh + +if [ -f ./path.sh ]; then . ./path.sh; fi + +if [ $# != 4 ]; then + echo "This script takes an ali directory and syllab lang dir and generates" + echo "syllabic transceription of the alignment" + echo "" + echo "Usage: $0 " + echo " e.g.: $0 data/train data/lang_syll exp/tri5_ali exp/tri5_ali_syll" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) " + + exit 1; +fi + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +data=$1 +lang=$2 +ali=$3 +out=$4 + + +for f in real_words.txt lex.words2syllabs.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory is probably not a syllable lang dir" && \ + echo "The file $lang/$f is missing" && \ + exit 1 +done + +for f in words.txt L.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +for f in $ali/num_jobs $ali/final.mdl $ali/ali.1.gz ; do + [ ! -f $f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +nj=$(cat $ali/num_jobs) +echo "Extracting phoneme sequences" +$cmd JOB=1:$nj $out/log/ali-to-phones.JOB.log \ + ali-to-phones $ali/final.mdl ark:"gunzip -c $ali/ali.JOB.gz|" ark:- \| \ + transcripts-to-fsts ark:- ark:$out/phones.JOB.fst || exit 1 + +echo "Composing with files in $lang to get syllable sequences" +$cmd JOB=1:$nj $out/log/get-syll-text.JOB.log \ + cat $data/split$nj/JOB/text \| sym2int.pl -f 2- --map-oov '\' $lang/real_words.txt \| \ + transcripts-to-fsts ark,t:- ark:- \|\ + fsttablecompose $lang/lex.words2syllabs.fst ark:- ark:-\| \ + fsts-project ark:- ark:-\| \ + fsttablecompose $lang/L.fst ark:- ark:- \|\ + fsttablecompose ark:$out/phones.JOB.fst ark:- ark:- \| \ + fsts-to-transcripts ark:- ark,t:"|int2sym.pl -f 2- $lang/words.txt > $out/text.JOB" +cat $out/text.* | sort > $out/text + +echo "Done" + diff --git a/egs/babel/s5c/local/syllab/create_syllables.pl b/egs/babel/s5c/local/syllab/create_syllables.pl new file mode 100755 index 00000000000..29a0a67dc8d --- /dev/null +++ b/egs/babel/s5c/local/syllab/create_syllables.pl @@ -0,0 +1,154 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 Johns Hopkins University (Author: Yenda Trmal) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use Getopt::Long; +use Data::Dumper; + +my $with_probs; +my $position_independent_phones; + +GetOptions("with-probs" => \$with_probs, + "position-independent-phones" => \$position_independent_phones +); + +my %SYLLS; +my %LEXICON; + +while (my $line = ) { + chomp $line; + my $word; my $prob; my $pron; + if ($with_probs) { + ($word, $prob, $pron) = split(" ", $line, 3); + } else { + ($word, $pron) = split(" ", $line, 2); + } + my @syllabs = split(/\s*\t\s*/, $pron); + + my $pronlen= scalar @syllabs; + my @extended_syllabs; + if (( $syllabs[0] =~ /x\<.*\>/) || ($word eq "SIL")) { + $SYLLS{$pron} +=1; + push @extended_syllabs, $pron; + } elsif ($pronlen == 1) { + my $syl; + my @phones=split " ", $syllabs[0]; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if (scalar(@phones) == 1 ) { + $syl = "$phones[0]_S"; + } else { + $phones2[0] = $phones[0] . "_B" unless $position_independent_phones; + $phones2[-1] = $phones[-1] ."_E" unless $position_independent_phones; + $syl = join(" ", @phones2); + } + } + $SYLLS{$syl} += 1; + push @extended_syllabs, $syl; + } else { + for (my $i = 0; $i lt $pronlen; $i+=1) { + my $syl; + my @phones=split " ", $syllabs[$i]; + my $first_index = 0; + my $last_index = scalar(@phones)-1; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if ($i == 0) { + $phones2[$first_index] = $phones[$first_index] . "_B"; + } elsif ( $i == ($pronlen - 1)) { + $phones2[$last_index] = $phones[$last_index] . "_E"; + } + $syl = join(" ", @phones2); + } + + push @extended_syllabs, $syl; + $SYLLS{$syl} += 1; + } + } + push @{$LEXICON{$word}}, \@extended_syllabs; +} + + +my %VOCAB; +my %COUNTS; +my %REV_VOCAB; +foreach my $syl (keys %SYLLS) { + my $seq=1; + my $word=$syl; + $word =~ s/_[^\s]*//g; + $word =~ s/ //g; + $word =~ s/[^a-zA-Z0-9<>-|\/]//g; + + my $wordx=$word; + $wordx .= "#$seq"; + while (exists $COUNTS{$wordx}) { + $seq += 1; + $wordx = "$word#$seq"; + } + + $COUNTS{$wordx} += $SYLLS{$syl}; + push @{$VOCAB{$wordx}}, $syl; + $REV_VOCAB{$syl} = $wordx; +} + +open(my $lex_f, "|sort -u > $ARGV[0]") or +die "Cannot open the file\"$ARGV[0]\" for writing"; + +foreach my $word (keys %VOCAB) { + print $lex_f "$word\t" . join("\t", @{$VOCAB{$word}}) . "\n"; +} + +close($lex_f); + +open(my $word2syll_f, "|sort -u > $ARGV[1]") or +die "Cannot open the file\"$ARGV[1]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + my @pron_in_syllabs; + foreach my $syl (@{$pron}) { + die "In word $word, pronunciation $pron: syllable $syl not in the lexicon!" unless exists $REV_VOCAB{$syl}; + push @pron_in_syllabs, $REV_VOCAB{$syl}; + } + print $word2syll_f "$word\t" . join(" ", @pron_in_syllabs) . "\n"; + } +} + +close($word2syll_f); + +open(my $word2ali_f, "|sort -u > $ARGV[2]") or +die "Cannot open the file\"$ARGV[2]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + print $word2ali_f "$word\t$word\t" . join(" ", @{$pron}) . "\n"; + } +} + +close($word2ali_f); + diff --git a/egs/babel/s5c/local/syllab/generate_syllable_lang.sh b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh new file mode 100755 index 00000000000..2d1fcb2259e --- /dev/null +++ b/egs/babel/s5c/local/syllab/generate_syllable_lang.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh +. ./path.sh + + + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +llang=$2 +lang=$3 +out=$4 +lout=$5 + +test -d $lout && rm -rf $lout +mkdir -p $lout +test -d $out && rm -rf $out +cp -R $lang $out +rm -rf $out/tmp $out/L.fst $out/L_disambig.fst $out/G.fst $out/words.txt +rm -rf $out/phones/word_boundary.{int,txt} + +echo "Generating lexicons.." +if [ -f $lang/phones/word_boundary.int ] ; then + echo "Position dependent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl \ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +else + echo "Position independent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs --position-independent-phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl --position_independent_phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +fi +cp $lout/lex.{syllabs2phones,words2syllabs,words2phones}.txt $out + +#We will fake the words.txt file +( + echo ""; + cut -f 1 $out/lex.syllabs2phones.txt; + echo -e "#0\n\n"; +) | nl -v 0 | awk '{print $2, $1}' > $out/syllabs.txt +ln -s syllabs.txt $out/words.txt +cp $lang/words.txt $out/real_words.txt + + +#Figure out the "OOV" token +oovword=$(cat $lang/oov.txt) +oovsyl=$(grep -w -F "$oovword" $out/lex.words2syllabs.txt | \ + awk '{if (NF == 2) { print $2;} + else {print "Error, oov word has more than one syllable "; exit 1;}}') + +echo $oovsyl > $out/oov.txt +grep -w -F "$oovsyl" $out/words.txt | awk '{print $2}' > $out/oov.int + +phone_disambig_symbol=$(grep '#0' $out/phones.txt | awk '{print $2}') +word_disambig_symbol=$(grep '#0' $out/words.txt | awk '{print $2}') + +optional_sil=$(cat $out/phones/optional_silence.txt) +utils/add_lex_disambig.pl $out/lex.syllabs2phones.txt $out/lex.syllabs2phones.disambig.txt > /dev/null +cat $out/lex.syllabs2phones.disambig.txt | sort -u > $lout/lexicon.txt + +echo " SIL" | cat - $lout/lexicon.txt | perl -ane 'print $F[0], " ", join(" ", @F), "\n";' | \ + sed 's/ #[0-9]$//g' > $out/phones/align_lexicon.txt +cat $lout/lexicon.txt | perl -ane 'print $F[0], "\t1.0\t", join(" ", @F[1..$#F]), "\n";' \ + > $lout/lexiconp.txt + +cat $out/phones/align_lexicon.txt |\ + sym2int.pl -f 3- $out/phones.txt |\ + sym2int.pl -f 1-2 $out/words.txt \ + > $out/phones/align_lexicon.int + +ndisambig=$(cat $out/phones/disambig.int | wc -l) +ndisambig=$[$ndisambig-1] + + +#Compile the lexicons +echo "Compiling words2syllables FST" +utils/make_lexicon_fst.pl $out/lex.words2syllabs.txt | \ + fstcompile --isymbols=$out/syllabs.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.words2syllabs.fst + +echo "Compiling L.fst and L_disambig.fst" +sil=$(cat $lang/phones/optional_silence.txt) +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.txt 0.5 $sil | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.fst +ln -s lex.syllabs2phones.fst $out/L.fst + + +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.disambig.txt 0.5 $sil '#'$ndisambig | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstaddselfloops "echo $phone_disambig_symbol |" "echo $word_disambig_symbol |"|\ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.disambig.fst +ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst + +echo "Validating the output lang dir" +utils/validate_lang.pl $out || exit 1 + +sed -i'' 's/#1$//g' $lout/lexicon.txt +sed -i'' 's/#1$//g' $lout/lexiconp.txt + +echo "Done OK." +exit 0 diff --git a/egs/babel/s5c/local/syllab/map_prons_to_syllables.pl b/egs/babel/s5c/local/syllab/map_prons_to_syllables.pl new file mode 100755 index 00000000000..df3ce93ce4e --- /dev/null +++ b/egs/babel/s5c/local/syllab/map_prons_to_syllables.pl @@ -0,0 +1,61 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use GetOpt::Long; + +my $probs; + +GetOptions ("--with-probs" => \$probs) + +my $syllab_lexicon=$ARGV[0]; + +my %PRON2SYL; + + +open(my $f, $syllab_lexicon) or die "Cannot open file $syllab_lexicon\n"; +while (my $line = <$f>) { + chomp $line; + + my $syll; + my $pron; + my $prob; + + if ($probs) { + $syll, $prob, $pron = split " ", $line, 3; + } else { + $syll, $pron = split " ", $line, 2; + } + $PRON2SYL{$pron} = $syll; +} + +while (my $line = ) { + chomp $line; + my ($word, $pron) = split(/\s*\t\s*/, $line, 2); + my @syllabs = split(/\s*\t\s*/, $pron); + + my @syl_pron; + foreach my $syl (@syllabs) { + die "in $line unknown syllable $syl" unless exists $PRON2SYL{$syl}; + push @syl_pron, $PRON2SYL{$syl}; + } + print "$word\t" . join(" ", @syl_pron) . "\n"; + +} diff --git a/egs/babel/s5c/local/train_g2p.sh b/egs/babel/s5c/local/train_g2p.sh index d608d084ac2..08be0014656 100755 --- a/egs/babel/s5c/local/train_g2p.sh +++ b/egs/babel/s5c/local/train_g2p.sh @@ -2,7 +2,7 @@ # Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) # Apache 2.0 -# Begin configuration section. +# Begin configuration section. iters=5 stage=0 encoding='utf-8' @@ -74,7 +74,7 @@ if [ $stage -le 0 ]; then fi for i in `seq 0 $(($iters-2))`; do - + echo "Training the G2P model (iter $[$i + 1] )" if [ $stage -le $i ]; then diff --git a/egs/babel/s5c/local/train_lms_srilm.sh b/egs/babel/s5c/local/train_lms_srilm.sh index 5bb1bfaa760..be2b0247aeb 100755 --- a/egs/babel/s5c/local/train_lms_srilm.sh +++ b/egs/babel/s5c/local/train_lms_srilm.sh @@ -4,22 +4,41 @@ export LC_ALL=C words_file= train_text= dev_text= +oov_symbol="" -. ./utils/parse_options.sh +echo "$0 $@" + +[ -f path.sh ] && . ./path.sh +. ./utils/parse_options.sh || exit 1 echo "-------------------------------------" echo "Building an SRILM language model " echo "-------------------------------------" +if [ $# -ne 2 ] ; then + echo "Incorrect number of parameters. " + echo "Script has to be called like this:" + echo " $0 [switches] " + echo "For example: " + echo " $0 data data/srilm" + echo "The allowed switches are: " + echo " words_file= word list file -- data/lang/words.txt by default" + echo " train_text= data/train/text is used in case when not specified" + echo " dev_text= last 10 % of the train text is used by default" + echo " oov_symbol=> symbol to use for oov modeling -- by default" + exit 1 +fi + datadir=$1 tgtdir=$2 outlm=lm.gz + ##End of configuration loc=`which ngram-count`; if [ -z $loc ]; then if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... - sdir=`pwd`/../../../tools/srilm/bin/i686-m64 + sdir=`pwd`/../../../tools/srilm/bin/i686-m64 else sdir=`pwd`/../../../tools/srilm/bin/i686 fi @@ -34,23 +53,39 @@ if [ -z $loc ]; then fi fi -[ -z $words_file ] && words_file=$datadir/lang/words.txt -[ -z $train_text ] && train_text=$datadir/train/text -[ -z $dev_text ] && dev_text=$datadir/dev2h/text - -echo "Using words file: $words_file" -echo "Using train text: $train_text" -echo "Using dev text : $dev_text" +# Prepare the destination directory +mkdir -p $tgtdir for f in $words_file $train_text $dev_text; do [ ! -s $f ] && echo "No such file $f" && exit 1; done -# Prepare the destination directory -mkdir -p $tgtdir +[ -z $words_file ] && words_file=$datadir/lang/words.txt +if [ ! -z "$train_text" ] && [ -z "$dev_text" ] ; then + nr=`cat $train_text | wc -l` + nr_dev=$(($nr / 10 )) + nr_train=$(( $nr - $nr_dev )) + orig_train_text=$train_text + head -n $nr_train $train_text > $tgtdir/train_text + tail -n $nr_dev $train_text > $tgtdir/dev_text + + train_text=$tgtdir/train_text + dev_text=$tgtdir/dev_text + echo "Using words file: $words_file" + echo "Using train text: 9/10 of $orig_train_text" + echo "Using dev text : 1/10 of $orig_train_text" +else + echo "Using words file: $words_file" + echo "Using train text: $train_text" + echo "Using dev text : $dev_text" + train_text=$datadir/train/text + dev_text=$datadir/dev2h/text +fi + + # Extract the word list from the training dictionary; exclude special symbols -sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '' > $tgtdir/vocab +sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '' | grep -v -F "$oov_symbol" > $tgtdir/vocab if (($?)); then echo "Failed to create vocab from $words_file" exit 1 @@ -67,8 +102,8 @@ if (($?)); then else echo "Removed first word (uid) from every line of $train_text" # wc text.train train.txt # doesn't work due to some encoding issues - echo $train_text contains `cat $train_text | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` - echo train.txt contains `cat $tgtdir/train.txt | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` + echo $train_text contains `cat $train_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo train.txt contains `cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` fi # Kaldi transcript files contain Utterance_ID as the first word; remove it @@ -79,56 +114,76 @@ if (($?)); then else echo "Removed first word (uid) from every line of $dev_text" # wc text.train train.txt # doesn't work due to some encoding issues - echo $train_text contains `cat $dev_text | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` - echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ne 'BEGIN{$w=$s=0;}{split; $w+=$#_; $w++; $s++;}END{print "$w words, $s sentences\n";}'` + echo $dev_text contains `cat $dev_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` fi - echo "-------------------" echo "Good-Turing 3grams" echo "-------------------" -ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" echo "-------------------" echo "Kneser-Ney 3grams" echo "-------------------" -ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" echo "-------------------" echo "Good-Turing 4grams" echo "-------------------" -ngram-count -lm $tgtdir/4gram.gt0111.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0112.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0122.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0123.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0113.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0222.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.gt0223.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/4gram.gt0111.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0112.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0122.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0123.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0113.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0222.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0223.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" echo "-------------------" echo "Kneser-Ney 4grams" echo "-------------------" -ngram-count -lm $tgtdir/4gram.kn0111.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0112.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0113.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0122.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0123.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0222.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -ngram-count -lm $tgtdir/4gram.kn0223.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort +ngram-count -lm $tgtdir/4gram.kn0111.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0112.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0113.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0122.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0123.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0222.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0223.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +if [ ! -z ${LIBLBFGS} ]; then + set -x + #please not that if the switch -map-unk "$oov_symbol" is used with -maxent-convert-to-arpa, ngram-count will segfault + #instead of that, we simply output the model in the maxent format and convert it using the "ngram" + echo "-------------------" + echo "Maxent 3grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 3 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/3gram.me.gz || exit 1 + + echo "-------------------" + echo "Maxent 4grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 4 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1 + +fi + echo "--------------------" echo "Computing perplexity" echo "--------------------" ( - for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done - for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done -) | sort -r -n -k 13 | column -t | tee $tgtdir/perplexities.txt + for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done + for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done +) | sort -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt echo "The perlexity scores report is stored in $tgtdir/perplexities.txt " @@ -141,9 +196,9 @@ nof_trigram_lm=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | wc -l` if [[ $nof_trigram_lm -eq 0 ]] ; then lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` elif [[ $nof_trigram_lm -eq 2 ]] ; then - lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` + lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` else #exactly one 3gram LM - lmfilename=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | cut -f 1 -d ' '` + lmfilename=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | cut -f 1 -d ' '` fi (cd $tgtdir; ln -sf `basename $lmfilename` $outlm ) diff --git a/egs/babel/s5c/local/train_mmi_sgmm2.sh b/egs/babel/s5c/local/train_mmi_sgmm2.sh index 2d3d0b5bf49..cdf9e28b1bf 100755 --- a/egs/babel/s5c/local/train_mmi_sgmm2.sh +++ b/egs/babel/s5c/local/train_mmi_sgmm2.sh @@ -30,7 +30,7 @@ if [ $# -ne 5 ]; then echo " --cancel (true|false) # cancel stats (true by default)" echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." echo " --config # config containing options" - echo " --stage # stage to do partial re-run from." + echo " --stage # stage to do partial re-run from." echo " --transform-dir # directory to find fMLLR transforms." exit 1; fi @@ -68,7 +68,7 @@ echo "$0: feature type is $feat_type" case $feat_type in delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |" - cp $alidir/final.mat $dir + cp $alidir/final.mat $dir ;; *) echo "Invalid feature type $feat_type" && exit 1; esac @@ -152,7 +152,7 @@ while [ $x -lt $num_iters ]; do $cmd $dir/log/num_acc_sum.$x.log \ sgmm2-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1; rm $dir/num_acc.$x.*.acc - + $cmd $dir/log/update.$x.log \ sgmm2-est-ebw $update_opts $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1; fi diff --git a/egs/babel/s5c/local/txt_to_rttm.pl b/egs/babel/s5c/local/txt_to_rttm.pl index 659d3c593d7..0e128520880 100755 --- a/egs/babel/s5c/local/txt_to_rttm.pl +++ b/egs/babel/s5c/local/txt_to_rttm.pl @@ -18,7 +18,7 @@ my $flen = 0.01; GetOptions('symtab=s' => \$symtab, 'segment=s' => \$segment, - 'flen=f' => \$flen); + 'flen=f' => \$flen); if ($symtab) { if (!open(S, "<$symtab")) {print "Fail to open symbol table: $symtab\n"; exit 1;} @@ -82,7 +82,7 @@ my $uid = shift @col; my $words = join(" ", @col); @col = split(/;/, $words); - + my $utt = $uid; my $sta = 0; if ($segment) { diff --git a/egs/babel/s5c/local/uem_ctm2segments.pl b/egs/babel/s5c/local/uem_ctm2segments.pl index ab560639c06..658690172c8 100755 --- a/egs/babel/s5c/local/uem_ctm2segments.pl +++ b/egs/babel/s5c/local/uem_ctm2segments.pl @@ -40,10 +40,10 @@ $defaultSegLen = 10; # seconds ################################################################################ -GetOptions("ctmTimeStep=f" => \$ctmTimeStep, - "minSilence=f" => \$minSilence, - "silence=s" => \$silence, - "maxSegLen=f" => \$maxSegLen, +GetOptions("ctmTimeStep=f" => \$ctmTimeStep, + "minSilence=f" => \$minSilence, + "silence=s" => \$silence, + "maxSegLen=f" => \$maxSegLen, "defaultSegLen=f" => \$defaultSegLen); if ($#ARGV == 1) { diff --git a/egs/babel/s5c/results/RESULTS.105-turkish.flp b/egs/babel/s5c/results/RESULTS.105-turkish.flp new file mode 100644 index 00000000000..737d0893abe --- /dev/null +++ b/egs/babel/s5c/results/RESULTS.105-turkish.flp @@ -0,0 +1,29 @@ +%WER 57.5 | 22070 54382 | 49.0 41.7 9.2 6.5 57.5 30.8 | -1.255 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.8 | 22070 54382 | 57.3 34.1 8.6 5.1 47.8 29.0 | -0.605 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.7 8.3 4.8 45.8 28.7 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.4 8.5 4.8 45.8 28.4 | -0.630 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_17/dev10h.pem.ctm.sys +%WER 47.1 | 22070 54382 | 56.5 32.7 10.8 3.6 47.1 28.7 | -0.430 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5003, THRESHOLD = 0.555 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_13/metrics.txt:MTWV = 0.5339, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_19/metrics.txt:MTWV = 0.5203, THRESHOLD = 0.553 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5078, THRESHOLD = 0.553 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_12/metrics.txt:MTWV = 0.0045, THRESHOLD = 0.891000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_11/metrics.txt:MTWV = 0.0066, THRESHOLD = 0.720000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_18/metrics.txt:MTWV = 0.0058, THRESHOLD = 0.867000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_20/metrics.txt:MTWV = 0.0072, THRESHOLD = 0.785000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 diff --git a/egs/babel/s5c/results/RESULTS.106-tagalog.flp b/egs/babel/s5c/results/RESULTS.106-tagalog.flp new file mode 100644 index 00000000000..72568cebf81 --- /dev/null +++ b/egs/babel/s5c/results/RESULTS.106-tagalog.flp @@ -0,0 +1,34 @@ +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/dev_oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5117, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_10/metrics.txt:MTWV = 0.5408, THRESHOLD = 0.504 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_17/metrics.txt:MTWV = 0.5221, THRESHOLD = 0.556 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5077, THRESHOLD = 0.648 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_oov_kws_10/metrics.txt:MTWV = 0.0038, THRESHOLD = 0.900000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.659 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_17/metrics.txt:MTWV = 0.0047, THRESHOLD = 0.889000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0052, THRESHOLD = 0.522 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 diff --git a/egs/babel/s5c/results/RESULTS.107-vietnamese.flp b/egs/babel/s5c/results/RESULTS.107-vietnamese.flp new file mode 100644 index 00000000000..e64bca74572 --- /dev/null +++ b/egs/babel/s5c/results/RESULTS.107-vietnamese.flp @@ -0,0 +1,50 @@ +%WER 57.9 | 21875 111957 | 45.4 42.3 12.3 3.2 57.9 36.7 | -1.203 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.3 | 21875 111957 | 53.2 37.3 9.5 3.5 50.3 35.8 | -0.917 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_9/dev10h.pem.ctm.sys +%WER 47.4 | 21875 111957 | 55.1 32.8 12.1 2.6 47.4 35.7 | -0.642 | exp/tri6_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 48.6 | 21875 111957 | 54.3 35.9 9.8 2.9 48.6 35.4 | -0.769 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 51.3 32.4 16.2 1.8 50.4 35.7 | -0.487 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys + +############################################################################################################################# + +#KWS on the dev kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_9/metrics.txt:MTWV = 0.4488, THRESHOLD = 0.601 +exp/tri6_nnet/decode_dev10h.pem/kws_10/metrics.txt:MTWV = 0.4926, THRESHOLD = 0.576 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_15/metrics.txt:MTWV = 0.4589, THRESHOLD = 0.635 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4477, THRESHOLD = 0.591 + +#KWS on the dev kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_8/metrics.txt:MTWV = 0.0001, THRESHOLD = 0.778 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_11/metrics.txt:MTWV = 0.0024, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_16/metrics.txt:MTWV = 0.0012, THRESHOLD = 0.596 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_15/metrics.txt:MTWV = 0.0017, THRESHOLD = 0.817 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_8/metrics.txt:MTWV = 0.2886, THRESHOLD = 0.513 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.3672, THRESHOLD = 0.693 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.2999, THRESHOLD = 0.792 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.3041, THRESHOLD = 0.693 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_10/metrics.txt:MTWV = 0.0000, THRESHOLD = 0 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.873 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.214 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.831 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_kws_9/metrics.txt:MTWV = 0.3791, THRESHOLD = 0.564 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_12/metrics.txt:MTWV = 0.4444, THRESHOLD = 0.406 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_15/metrics.txt:MTWV = 0.3780, THRESHOLD = 0.609 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.3904, THRESHOLD = 0.51 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_10/metrics.txt:MTWV = 0.0021, THRESHOLD = 0.724 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0040, THRESHOLD = 0.491 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_15/metrics.txt:MTWV = 0.0032, THRESHOLD = 0.867 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0039, THRESHOLD = 0.105 + +############################################################################################################################ + diff --git a/egs/babel/s5c/run-1-main.sh b/egs/babel/s5c/run-1-main.sh index e01910ffac0..99d74069087 100755 --- a/egs/babel/s5c/run-1-main.sh +++ b/egs/babel/s5c/run-1-main.sh @@ -119,7 +119,7 @@ if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then echo --------------------------------------------------------------------- echo "Training SRILM language models on" `date` echo --------------------------------------------------------------------- - local/train_lms_srilm.sh --dev-text data/dev2h/text \ + local/train_lms_srilm.sh --oov-symbol $oovSymbol --dev-text data/dev2h/text \ --train-text data/train/text data data/srilm fi diff --git a/egs/babel/s5c/run-4-anydecode.sh b/egs/babel/s5c/run-4-anydecode.sh index 68b87ea1e27..312d26911df 100755 --- a/egs/babel/s5c/run-4-anydecode.sh +++ b/egs/babel/s5c/run-4-anydecode.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash set -e set -o pipefail @@ -13,7 +13,6 @@ fast_path=true skip_kws=false skip_stt=false skip_scoring=false -max_states=150000 extra_kws=true vocab_kws=false tri5_only=false @@ -32,7 +31,7 @@ fi #set of scripts will exit when sourcing several of them together #Otherwise, the CTRL-C just terminates the deepest sourced script ? # Let shell functions inherit ERR trap. Same as `set -E'. -set -o errtrace +set -o errtrace trap "echo Exited!; exit;" SIGINT SIGTERM # Set proxy search parameters for the extended lexicon case. @@ -82,8 +81,8 @@ if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then fi eval my_stm_file=\$${dataset_type}_stm_file -eval my_ecf_file=\$${dataset_type}_ecf_file -eval my_kwlist_file=\$${dataset_type}_kwlist_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_kwlist_file=\$${dataset_type}_kwlist_file eval my_rttm_file=\$${dataset_type}_rttm_file eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate @@ -200,12 +199,12 @@ if [ ! -f $dataset_dir/.done ] ; then fi elif [ "$dataset_kind" == "unsupervised" ] ; then if [ "$dataset_segments" == "seg" ] ; then - . ./local/datasets/unsupervised_seg.sh + . ./local/datasets/unsupervised_seg.sh elif [ "$dataset_segments" == "uem" ] ; then . ./local/datasets/unsupervised_uem.sh elif [ "$dataset_segments" == "pem" ] ; then ##This combination does not really makes sense, - ##Because the PEM is that we get the segmentation + ##Because the PEM is that we get the segmentation ##and because of the format of the segment files ##the transcript as well echo "ERROR: $dataset_segments combined with $dataset_type" @@ -230,7 +229,7 @@ if [ ! -f $dataset_dir/.done ] ; then make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp touch ${dataset_dir}/.plp.done fi - touch $dataset_dir/.done + touch $dataset_dir/.done fi ##################################################################### # @@ -240,12 +239,15 @@ fi echo --------------------------------------------------------------------- echo "Preparing kws data files in ${dataset_dir} on" `date` echo --------------------------------------------------------------------- +lang=data/lang +set -x if ! $skip_kws ; then . ./local/datasets/basic_kws.sh || exit 1 - if $extra_kws ; then + if $extra_kws ; then + L1_lex=data/local/lexiconp.txt . ./local/datasets/extra_kws.sh || exit 1 fi - if $vocab_kws ; then + if $vocab_kws ; then . ./local/datasets/vocab_kws.sh || exit 1 fi fi @@ -257,7 +259,7 @@ fi #################################################################### ## -## FMLLR decoding +## FMLLR decoding ## #################################################################### decode=exp/tri5/decode_${dataset_id} @@ -297,7 +299,7 @@ if $tri5_only; then fi #################################################################### -## SGMM2 decoding +## SGMM2 decoding ## We Include the SGMM_MMI inside this, as we might only have the DNN systems ## trained and not PLP system. The DNN systems build only on the top of tri5 stage #################################################################### @@ -493,5 +495,5 @@ for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ ${dataset_dir} data/lang $decode fi done -echo "Everything looking good...." +echo "Everything looking good...." exit 0 diff --git a/egs/babel/s5c/run-4b-anydecode-bnf.sh b/egs/babel/s5c/run-4b-anydecode-bnf.sh index 27c68bacfd8..205f37b46d9 100755 --- a/egs/babel/s5c/run-4b-anydecode-bnf.sh +++ b/egs/babel/s5c/run-4b-anydecode-bnf.sh @@ -45,7 +45,7 @@ if [ -z "$unsup_string" ] ; then fi fi -if ! echo {dev10h,dev2h,eval,unsup,shadow}{,.uem,.seg} | grep -w "$type" >/dev/null; then +if ! echo {dev10h,dev2h,eval,unsup,shadow}{,.pem,.uem,.seg} | grep -w "$type" >/dev/null; then # note: echo dev10.uem | grep -w dev10h will produce a match, but this # doesn't matter because dev10h is also a valid value. echo "Invalid variable type=${type}, valid values are " {dev10h,dev2h,eval,unsup}{,.uem,.seg} @@ -247,11 +247,13 @@ if [ -f $exp_dir/tri7_nnet/.done ] && touch $decode/.done fi - local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ - --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ - "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ - ${datadir} data/lang $decode fi -echo "$0: Everything looking good...." +decode=$exp_dir/tri7_nnet/decode_${dirid} +local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/lang $decode + +echo "$0: Everything looking good...." exit 0 diff --git a/egs/babel/s5d/EXAMPLE.vietnamese b/egs/babel/s5d/EXAMPLE.vietnamese new file mode 100644 index 00000000000..f5dde82c364 --- /dev/null +++ b/egs/babel/s5d/EXAMPLE.vietnamese @@ -0,0 +1,116 @@ +#!/bin/bash + +#This is an example sequence of commands for running the default Kaldi Babel OP1 system +#It is not assumed that you will run it as a script, even though you can try :) + +./run-1-main.sh +./run-2a-nnet-ensemble-gpu.sh +./run-2b-bnf.sh --semisupervised false --ali-dir exp/tri5_ali/ +./run-3b-bnf-sgmm.sh --semisupervised false +./run-3b-bnf-nnet.sh --semisupervised false + +##Training of the automatic segmenter +./run-2-segmentation.sh + +##Decoding the automatic segmentation of dev2h subset. dev2h.pem would mean decoding +##the dev2h subset using the officialy provided segmentation. +##Also possible to run dev10h.pem, dev10h.uem, dev10h.seg and so on... +./run-4-anydecode.sh --dir dev2h.seg +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised false --extra-kws true + +##Decoding of the unsupervivsed data +./run-4-anydecode.sh --dir unsup.seg --skip-kws true --skip-stt true +./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised false + +##Get the one-best path and the weights for frame-weighting of posteriors +./local/best_path_weights.sh --cmd "$train_cmd" data/unsup.seg/ data/lang \ + exp/tri6b_nnet/decode_unsup.seg/ \ + exp/sgmm5_mmi_b0.1/decode_fmllr_unsup.seg_it1/ \ + exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \ + exp_bnf/tri7_nnet/decode_unsup.seg \ + exp_bnf_semisup/best_path_weights/unsup.seg + +##Semisupervised bottleneck system training (initial setup) +./run-2b-bnf.sh --semisupervised true --ali-model exp/tri6b_nnet/ \ + --weights-dir exp/best_path_weights/unsup.seg/decode_unsup.seg/ + +##Semisup training, SGMM+bMMI on the top of the BN features +./run-3b-bnf-sgmm.sh --semisupervised true +##Semisup training, pNorm DNN on the top of the BN features +./run-3b-bnf-nnet.sh --semisupervised true + +##And decoding again. We decode the unsup.seg again to do the second run of the +##semisupervised training +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised true --extra-kws true +./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised true + +##One-best output and frame weights for the second run of the semisup training +./local/best_path_weights.sh --cmd "$train_cmd" data/unsup.seg/ data/lang \ + exp_bnf_semisup/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \ + exp_bnf_semisup/tri7_nnet/decode_unsup.seg \ + exp/tri6b_nnet/decode_unsup.seg/ \ + exp/sgmm5_mmi_b0.1/decode_fmllr_unsup.seg_it1/ \ + exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \ + exp_bnf/tri7_nnet/decode_unsup.seg \ + exp_bnf_semisup2/best_path_weights/unsup.seg + +##Second run of the semisup training +./run-2b-bnf.sh --unsup-string "_semisup2" --semisupervised true --ali-model exp/tri6b_nnet/ \ + --weights-dir exp_bnf_semisup2/best_path_weights/unsup.seg/decode_fmllr_unsup.seg_it1/ + +./run-3b-bnf-sgmm.sh --semisupervised true --unsup_string "_semisup2" +./run-3b-bnf-nnet.sh --semisupervised true --unsup_string "_semisup2" + +##Decode again to see if we got an improvement +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised true --unsup_string "_semisup2" --extra-kws true + + +##Decoding of the dev10h (all systems, all stages) +./run-4-anydecode.sh --dir dev10h.seg --extra-kws true +./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised false --extra-kws true +./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised true --extra-kws true +./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised true --extra-kws true --unsup_string "_semisup2" + +##Decoding of the shadow.seg (combination of dev10h.seg and eval.seg) +##We did this for eval run as a kind of "sanity check" -- we check the shadow.seg/dev10h.seg subset +##performance vs the standalone dev10h.seg performance to catch (hopefully) possible problems +./run-4-anydecode.sh --dir shadow.seg --extra-kws true +./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised false --extra-kws true +./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised true --extra-kws true +./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised true --extra-kws true --unsup_string "_semisup2" + + + +#This prepares for separation/split of the shadow dataset into the devset, which we can evaluate +# and the eval set, which we will submit +#Note: we do this only once, for ./data, as we do not really need anything else +#just the file lists... +#NB: there was a oversight in one of the scripts that was causing thectm files contain +#BN: incorrect channel info (A instead of 1) +#NB: To fix that, you can run something like this: +#NB: find exp/ -name "shadow.seg.ctm" | xargs -t -n 1 sed -i'.bakx' 's/ A / 1 /g' +./local/nist_eval/create_compound_set.sh --evlset eval.seg --devset dev10h.seg --tgtdir data/shadow.seg + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp/tri6b_nnet/decode_shadow.seg +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp/tri6b_nnet/decode_shadow.seg + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp/sgmm5_mmi_b0.1/decode_*shadow.seg* +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp/sgmm5_mmi_b0.1/decode_*shadow.seg* + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.seg* +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.seg* + +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp_bnf_semisup/sgmm7_mmi_b0.1/decode_*shadow.seg* +./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp_bnf_semisup/sgmm7_mmi_b0.1/decode_*shadow.seg* + +#The following commands will actually do two things +#a) looking at the performance of the dataset --master they will figure out the correct LMW +#b) symlink the appropriate evaluation result file under the correct EXPID into the ./release directory +#Warning: it's a lot of files so it's easy to get confused! +./local/nist_eval/make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1 --data data/shadow.seg --master dev10h.seg lang.conf ./release +./local/nist_eval/make_release.sh --dryrun false --dir exp/tri6b_nnet --data data/shadow.seg --master dev10h.seg lang.conf ./release +./local/nist_eval/make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1 --data data/shadow.seg --master dev10h.seg lang.conf ./release +./local/nist_eval/make_release.sh --dryrun false --dir exp_bnf_semisup/sgmm7_mmi_b0.1 --extrasys SEMISUPX --data data/shadow.seg --master dev10h.seg lang.conf ./release + +#Combine results (what we call 4way-combo) + diff --git a/egs/babel/s5d/README.txt b/egs/babel/s5d/README.txt new file mode 100644 index 00000000000..6bc3ddacba7 --- /dev/null +++ b/egs/babel/s5d/README.txt @@ -0,0 +1,82 @@ +How to setup the BABEL database training environment +==================================================== +a) Preparation: you need to make sure the BABEL data and the F4DE scoring software + is set up as it is in JHU, or change this setup accordingly. This will probably + be hard and will involve some trial and error. Some relevant pathnames can be + found in conf/lang/* and ./path.sh + + Link one of the config files in conf/languages to ./lang.conf. E.g.: + ln -s conf/languages/105-turkish-limitedLP.official.conf lang.conf + + +b) If you plan to work on one or more languages, the following approach is advised. + aa) create empty directory somewhere according to your choice + ( + mkdir 206-zulu-llp; cd 206-zulu-llp + ) + + ab) copy cmd.sh and path.sh (you will probably need to do some changes in these) + especially pay attention to KALDI_ROOT in path.sh and possibly switch to using + run.pl in cmd.sh + ( + cp /path/to/kaldi/egs/babel/s5b/{cmd.sh,path.sh} . + ) + + ac) symlink all the directories here to that directory + ( + ln -s /path/to/kaldi/egs/babel/s5b/{conf,steps,utils,local} . + ) + ad) link the necessary scripts ( see below ) + { + ln -s /path/to/kaldi/egs/babel/s5b/run-1-main.sh . + } + ae) link the appropriate language-specific config file to lang.conf in + each directory. + ( + 206-zulu-llp$ ln -s conf/lang/206-zulu-limitedLP.official.conf lang.conf + ) + + +Running the training scripts +=================================================== + +You run the scripts in order, i.e. + run-1-main.sh + run-2a-nnet.sh and run-2-bnf.sh may be run in parallel, but run-2-bnf.sh should be + run on a machine that has a GPU. + run-3-bnf-system.sh trains an SGMM system on top of bottleneck features from run-2-bnf.sh + run-4-test.sh is decoding with provided segmentation (we get this from CMU) + run-5-anydecode.sh seems to be decoding with the segmentation provided + + + +Official NIST submission preparation +================================================== +The make_release.sh script might come handy. +The scripts evaluates the performance of the sgmm2_mmi_b.0.1 system on +the eval.uem dataset and chooses the same set of parameters to +determine the path inside the test.uem dataset. + +./make_release.sh --relname defaultJHU --lp FullLP --lr BaseLR --ar NTAR \ + conf/languages/106-tagalog-fullLP.official.conf /export/babel/data/releases + + + + + +./run-1-main.sh +./run-2a-nnet-ensemble-gpu.sh +./run-2b-bnf.sh --semisupervised false --ali-dir exp/tri5_ali/ +./run-3b-bnf-sgmm.sh --semisupervised false +./run-3b-bnf-nnet.sh --semisupervised false + +./run-2-segmentation.sh + +./run-4-anydecode.sh --dir dev2h.seg +./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised false --extra-kws true + + + +./run-4-anydecode.sh --dir unsup.seg --skip-kws true --skip-stt true +./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised false + diff --git a/egs/babel/s5d/RESULTS.txt b/egs/babel/s5d/RESULTS.txt new file mode 100644 index 00000000000..c87bf7f2b8b --- /dev/null +++ b/egs/babel/s5d/RESULTS.txt @@ -0,0 +1,8 @@ +The results are by default to be found in /decode_* where the individual /decode_* directory correspond to the language model weight. + +An easthetically pleasing table with the results can be obtained for example like this (YMMV, as well as your aesthetic feeling): +find exp/sgmm5_mmi_b0.1 -name "*.ctm.sys" -not -name "*char.ctm.sys" -ipath "*fmllr_eval.pem*" | xargs grep 'Sum/Avg' | sed 's/:* *| */ /g' | sed 's/ */ /g' | sort -n -k 9 | column -t + +similarly, for the kws outputs, the same table can be obtained as +find exp/sgmm5_mmi_b0.1 -name "sum.txt" -ipath "*fmllr_eval.pem*" | xargs grep "| Occurrence" | cut -f 1,13 -d '|'| sed 's/:|//g' | column -t | sort -k 2 -n -r + diff --git a/egs/babel/s5d/RUN_UNICODE_SYSTEM b/egs/babel/s5d/RUN_UNICODE_SYSTEM new file mode 100644 index 00000000000..79168d4c3bc --- /dev/null +++ b/egs/babel/s5d/RUN_UNICODE_SYSTEM @@ -0,0 +1,9 @@ +./run-1-main-unicode.sh --unicode-lexicon true --morfessor true --tri5-only true + +# For tri5 +./run-4-anydecode.sh --fast-path false --tri5-only true --skip-kws true (for tri5 only) + +# For lstm +./run-4-anydecode.sh --fast-path false --tri5-only true --skip-kws true --data-only true +./local/nnet3/run_lstm.sh +./run-4-anydecode.sh --nnet3-model nnet3/lstm_sp --is-rnn true --dir dev10h.pem --skip-kws true diff --git a/egs/babel/s5d/UNICODE_README b/egs/babel/s5d/UNICODE_README new file mode 100644 index 00000000000..b8b2358436f --- /dev/null +++ b/egs/babel/s5d/UNICODE_README @@ -0,0 +1,119 @@ +Graphemic Lexicon from Unicode +================================================================================ + +General Description +---------------------------------------- +Given some form of word list in an unknown language, we must find pronunciations +for each word. When the language is written alphabetically, the letters +themselves can be used as word pronunciations. In English for instance there +would be 26 phones, and possibly a few extra for the rarely occuring letters, + + "ö","é","è","â", ... + +which occur primarily in foreign loan words. + +Some languages use syllabic systems or partially alphabetic scripts, for which +nothing close to a 1-1 mapping from graphemes to phonemes exists. Examples of +such are Abougidas and Abjads. + +The premise of this system is that for most languages, there exists a unicode +description of the graphemes from which the phonetics may be recovered. + +While non-alphabetic scripts present an obvious challenge, we find that even +for languages such as English and French, the issue of whether or not to treat +each accented character as a separate phone presents a problem. After all, +pâté, pâte, and pate are all English words with different pronunciations. +Resume, and résumé, are also examples. And this for a language that is generally +considered unaccented. In French, which is known to have many diacritics +affecting pronunciation, we nonetheless find words such as forêt, and bosquet, +with essentially the same meaning whose "e" sounds have very much the same +pronunciation. In some scripts, such diacritics are vowel markers, indicators +of tone, or stress, and probably many other linguistic phenomena we have not +yet encounted. + +Fortunately, the unicode representation of such graphemes has an alternate +normalization, "NFD", which decomposes a grapheme into its constituent parts. +In this implementation we treat such marks as modifying the preceding grapheme. +When the grapheme occurs frequently enough, the accented grapheme is +automatically considered a separate phoneme. For infrequent accented graphemes +we treat the accent as a tag and use the tag as an extra question in the tree +building step. + +The issue of syllable boundaries in words is mostly important for keyword-seach. +Syllables can be created by training a morphological analyser on the +conversational transcripts, and then segmenting each word into its learned +morphemes. + +Usage +---------------------------------------- +All the scripts for creating the graphemic lexicon are located in local/lexion, +except for prepare_unicode_lexicon.py. Run ... + +./run-1-main-unicode.sh --unicode-lexicon true --morfessor true + +for a full system run using a unicode-lexicon and morfessor. + +The general structure is. + +1. Generate list of unqiue words in the training data. Just use the word + entries of the filtered_lexicon if available. Do not include words present in + in conversation transcriptions such as , etc.. + +local/lexicon/phone2morph_lexicon.py + +2. Use morfessor to create somewhat logical syllabic units. Train the system + on the conversational transcriptions for instance, though any body of text + in the language should do. The conversational transcriptions were used in + this script however. + +3. Segment each word in the word list into its morphemes. Represent this as + a lexicon of sorts. + +local/lexicon/make_unicode_lexicon.py + +4. Use the morphemic lexicon created in step 3. as input. + +5. Get the unicode representation for each grapheme in each word + +local/lexicon/methods/blind_tags_counts.py + +6. Convert the unicode representation of each word into actual units with + which we derive an entry in the lexicon. This function is actually imported + into make_unicode_lexicon.py It's written this way to allow for more + flexibility in processing the unicode descriptions of graphemes. + +local/prepare_unicode_lexicon.py +7. This creates the rest of the data/local directory. It also adds the extra + questions derived from the unicode-derived tags to extra_questions.txt. + + +Script Descriptions +------------------------------------------------------------------------------ +In local/lexicon, +make_unicode_lexicon.py : + + This script takes as arguments: a lexicon, word-list, or file with distinct + space separated words; a path to an output lexicon that will be generated; a + directory containing all possible methods of processing the unicode + character descriptions; and the name of the method in the directory to use. + Options exist for specifying the type of input file, whether to treat the + input lexicon entries as morphemes, etc.. + +In local/lexicon/methods +blind_tags_counts.py + + Each method in the methods directory is supposed to follow a strict format: + 1. Must have a fmt global specifying the output lexicon format + (normally kalid). + 2. Must have an encode function which maps a certain structure in which + unicode character descriptions were stored to lexicon entries in the + new lexicon we are creating. + 3. Certain input arguments, especially a table argument for the table + containing the mapping between unicode graphemes, and lexical entries. + + +In local/lexicon/methods +phone2morph_lexicon.py + + This script takes an input word list, and outputs a morphemic dictionary. + diff --git a/egs/babel/s5d/babel.html b/egs/babel/s5d/babel.html new file mode 100644 index 00000000000..9848e6566f8 --- /dev/null +++ b/egs/babel/s5d/babel.html @@ -0,0 +1,788 @@ + + + + + +

Description of Kaldi subsystems

+ + This is a description of the complete Kaldi sub-system, containing a description of + all components. It will be referred to from the system descriptions of the various + Kaldi sub-systems, and from the top-level system description of the RADICAL team. + +

1. Abstract

+ +

+ The Kaldi keyword search system is based mostly on a conventional LVCSR pipeline. + We have three main sub-systems, which separately decode the data; + we then use conventional system combination techniques. The four systems are: +

    +
  • SGMM+BMMI. This is a Subspace Gaussian Mixture Model (SGMM) of the type described in [2], + discriminatively trained with Boosted MMI [3]. +
  • DNN. This is a Deep Neural Network with p-norm activations as described in [8]. + For LimitedLP systems we improve performance with an ensemble method which we will + describe below. +
  • Bottleneck SGMM+BMMI system. In this system we train a DNN with a bottleneck layer + of dimension 42, and use it to extract features which we train an SGMM+BMMI system on. +
+ For LimitedLP we add a fourth system, which is a version of the bottleneck system where + DNN to extract the bottleneck features is trained on automatically transcribed data as + well as the LimitedLP data. For FullLP we add a different fourth system, which is + a "sequence-trained" version of the DNN, trained with the State-level Minimum Bayes + Risk criterion (a variant of MPE). + + We also include a fifth, less conventional sub-system, based on the "Point Process Model" (PPM) + that uses phone-level posteriors from a DNNs trained for one of the systems above. + This will be described in Section 4.16. Its outputs are combined with our systems above + for keyword spotting but not for transcription. +

+ Our keyword search pipeline is based on lattice-indexing as described in [5]; the lattices + are generated using the "exact" lattice generation method described in [6]. + To handle out of vocabulary (OOV) keywords, we use the method [4] which constructs for + an OOV keyword sequence, proxy keyword sequences consisting of word sequences which are phonetically + similar. This year we added a "lexicon expansion" method, in which we generate plausible + new words using a syllable-level language model and add them to the lexicon and language model + when decoding (see Section 4.4). (This even slightly improves the WER). We actually add + the original and expanded-lexicon versions of each system to the final system combination, + but including non-expanded decodings in the system combination is not really necessary. +

+ The code and scripts used for the main Kaldi system are available as part of Kaldi; + see svn://svn.code.sf.net/p/kaldi/code/trunk/. The scripts we used this year are + located in the directory egs/babel/s5b. + + +

2. Notable features

+ + A new feature of our system that is shared by all the sub-systems is our + pitch features . We describe these in more detail in [7]. This is a + pitch extraction algorithm based on the old "getf0" method, but which naturally + ensures continuity of the pitch contours even in unvoiced regions. We also + derive a continuous-valued voicing feature from the algoirhtm. Finally we get + a three-dimensional feature consisting of pitch, delta-pitch, and a feature + derived from probability of voicing (POV). These are appended to the PLP + features, giving us consistent gains across languages compared with our + previous pitch features (other teams have also reported gains using our + features). +

+ Something else that is new is the p-norm neural networks [8]. This + is a new nonlinearity type that is related to maxout (in that it is a + dimension-reducing nonlinearity). This gave us around 1% absolute improvement + compared with our old, tanh-based networks. On top of this, for LimitedLP + we introduce an ensemble training method . Imagine training four + networks from different random seeds. We can average the scores from all + of them to get an improvement (around 2% absolute). But we don't like to have + to use multiple networks in test time. Our ensemble method introduces a term in + the objective function to train the networks' outputs towards each other, to make + them more similar, so that in test time we can pick just one of the networks to test with. + This gives us three quarters of the improvement from the simple method of averaging the scores, + but does not slow us down in test time. We only do this for limitedLP because it + slows down training too much to be practical for FullLP. +

+ Our bottleneck feature system is heavily modified since last year, and + has improved. + Firstly, we implemented it all in Kaldi, as opposed to last year's system which was a + hybrid between Kaldi and Theano. This makes the training faster, since Kaldi + supports parallelized neural network training, using multiple GPUs. The basic + recipe is basically the same as last year-- a DNN with a 42-dimensional bottleneck, appending + these features with the baseline fMLLR features, splicing across 3 frames and doing + LDA dimension reduction to 60 dimensions, then training an SGMM system on these features. + However, results seemed a little better with the Kaldi implementation, perhaps 0.5\% + absolute. It's hard to say why, as there are too many differences. The thing that is + new is that we implemented semi-supervised training in the LimitedLP case. We + use the 1-best output from decoding as supervision for the untranscribed data, but only + train on a frame if the state-level posterior is above a threshold (we use a low threshold + of 0.35 for this case). +

+ Our point process model system ( Section 4.16), while it get only around half + the ATWV of our conventional system by itself, is giving us large improvements in + combination with our conventional system, of around 3 to 4% ATWV. This is an + unconventional "exemplar-based" approach. +

+ Our expanded lexicon (Section 4.4) also new. This method takes + as input the provided lexicon, and uses it to hypothesize likely new words + and their pronuciations, along with their probabilities. We generate 2 million + extra words, with associated probabilities, and we allocate the "unknown-word" + probability mass of our language model to these words. Our method is + "backwards", in that we first generate the phonetic sequences, and then + work out the spellings. The improvement this gives is extremely variable. + For Bengali and Assamese, it makes essentialy no difference. But for Zulu + LimitedLP using the development keywords on the development data, it improved + the Kaldi-only ATWV from 0.20 to 0.28. + +

3. Extra resources

+ + For the submitted Kaldi systems we did not use any linguistic or other + resources outside of the language development pack. For our LimitedLP + submissions, we did use the FullLP and "untranscribed" data for unsupervised + training, without using the transcriptions. (This is allowed even in the + BaseLR condition). + +

4. System description

+ +

4.1 Low level features

+ + Our basic features are standard 13-dimensional PLP features. To these we + append 3-dimensional features derived from our "Kaldi" pitch tracker, giving a + 16-dimensional "base feature". Our pitch tracker and the configuration we used + are described in [7]. These features were extremly helpful on tonal languages: + on Cantonese and Vietnamese last year, our tests showed as much as 6% absolute + WER improvement compared with no pitch features. In general our new "Kaldi" + pitch features give us about twice as much improvement as our old features from + last year that were based on SAcC. + +

4.2 Segmentation

+ + Our segmentation is performed via decoding the whole-conversation data using a + GMM-based model. The model is trained in the normal way for an LVCSR system, + but the decoding graph is derived from a phone bigram language model (unsmoothed, + to avoid blowup due to context dependency). We do a single pass of decoding, + without adaptation; the features are processed as spliced-frames+LDA+STC. The + model used for segmentation is trained on transcripts that included certain + data we would normally exclude: segments containing only non-speech events such + as noise are included in the transcripts. +

+ The output of the decoding above is used as the input to the following algorithm. + First we map the frames of the decoder best path to one of three classes: speech, + noise or silence. The segmentation algorithm is as follows: + +

    +
  • Get initial segments: Contiguous regions consisting of speech and/or noise are marked as the initial segments.
  • +
  • Pad the initial segments: Non-speech frames on either side of the initial segments are included in the segments one at a time until there +are no more non-speech frames adjacent to any segments (unlikely) or until the non-speech frames make up about 5% of the total frames in the conversation.
  • +
  • Merge segments: Two segments are merged if the length of non-speech frames between two segments is less than about 1 second and the merged segments are not longer than 10 seconds.
  • +
  • Split long segments: Initial segments that are longer than 10s are split into equal pieces, each shorter than 10s.
  • +
  • Remove segments with only non-speech frames, i.e. containing only silence and noise.
  • +
+ + +

4.3 Lexicon (non-expanded)

+ + Here we describe our basic lexicon, before expansion. The BABEL lexicon + comes with syllable boundaries marked using tabs, and syllable-level tags + marking tone. We attach the tone tags to the phones, so that a syllable + k a t _1 would become the phone sequence k_1 a_1 t_1 + Formally, each tone version of a phone is a separate phone, but see + our explanation of contenxt dependency below . + We noticed that in some languages, the original lexicon seemed to have been expanded + with some kind of script where some original phone was mapped to two alternative + phones. That was the case for Vietnamese last year and Zulu this year, and it + was helpful to reverse this mapping. Our mapping for Zulu is as follows: + + + + + + + +
k_> g_<
3 e
R l
o O
b_< b
t_> th
+ After generating a lexicon as described above, we perform the standard procedure + in Kaldi training scripts, to add word-position dependency. Each phone is mapped + to five versions of the phone depending on whether it's at the beginning, middle + or end of a word, or is a singleton phone, or is a nonword phone (e.g. optional + silence in the lexicon). By this point the phone set is quite large, but again, + see our explanation of context dependency below . +

+ We have four phones in our inventory apart from those that appear in words; + they are all modeled in a context independent way and using a different topology + (5 states, where the middle 3 states all have transitions to each other). These are + for silence, noise, vocalized-noise and unknown-words. The difference between + vocalized noise and unknown-words is that vocalized noise models things like coughs + and laughs, whereas the unknown-word phone models words whose pronunciation is not + known (mainly so we can align them during training). + +

4.4 Lexicon (expanded)

+ + As mentioned above, we perform lexicon expansion to improve our ability to decode + OOV words. The lexicon expansion procedure produces pronunciations and probabilities + for the generated words, so that we know how to allocate the "unknown-word" probability + mass in the language model. The unknown words are introduced as unigrams into our + ARPA language model, with probabilities equal to the probabilities we estimated, + times the unknown-word fraction (equal to the token OOV rate). +

+ The lexicon expansion procedure works as follows (but note that lexicon expansion is + not the only thing we do to handle OOVs; see also Section 4.15). We first take all the entries + in our original lexicon and view them as sentences, where each syllable corresponds to + one symbol (we ignore the spelling). We train an ARPA language model on this with + SRILM; a 3-gram "modified Kneser-Ney with interpolation" seemed to work the best. + We then generate a large number of "sentences" from this language model: 20 million or so, + For each unique sentence in the generated sentences, we compute its language model + probability; we then exclude the sentences that correspond to words in the original + lexicon, take the 2 million best ones, and these will become the pronunciations of + our lexicon entries. +

+ A lexicon entry needs a spelling as well as a pronunciation, and to do this we + use the g2p tool from Sequitur in reverse to produce the most likely + spellings for each pronunciation. We reverse it by taking each lexicon entry, + e.g.
+ hi h iy +and reversing it to produce something like
+ hiy h i
+ Actually we don't do it exactly this way because we want iy to appear as a single + symbol on the left, rather than as a sequence of two symbols. So we map the phones + to ASCII symbols first. When doing so we treat tags (e.g. tones) separately, so each tag + has its own ASCII symbol, and a phone with a tag would be rendered as two ASCII symbols. +

+ We use g2p to generate a list of the top few likely spellings for each of the generated + pronunciations. We take the pronunciations we generated and the probabilities of their spellings, + and convert them into a list of words with probabilities on the words, and a list of + pronunciations for each word with asociated pronunciation probabilities. This is the output + of the lexicon expansion and it is used to create the lexicon and language model that we + decode with. +

+ We ran two versions of each system, one with and one without the lexicon + expansion, because we wanted to see how much effect it was having. Because we + had them both available, we decided to combine both versions for the final + system combination, but this combination made very little difference to the + results and we could equally well have submitted just the expanded-lexicon + systems. + + +

4.5 Phonetic context dependency

+ + Our phonetic context dependency is a fairly standard setup based on triphone context + and a phonetic decision tree with questions about the phonetic context. However, + we should mention how we handle tone and word-position-dependent phones. The number + of actual phone symbols is quite large; it consists of the number of "base phones" + times five (from word-position dependency), times the number of tones. Firstly, + the decision-tree roots are not separate for each phone symbol, but we have one per + "base phone", with all states sharing a root. The questions can be about the state + of the HMM, or about the left phone, the central phone, or the right phone. + Each question is simply a set of phone symbols. However, in constructing the questions + we make use of the structure of the phone symbols. Each question is either about + the tone (or some other tag), about the word-position, or about the "base-phone", + and the questions about the base phone consist of sets of base-phones that are derived + from a binary tree clustering of the acoustic statistics from the central HMM-states + of all the phones. + +

4.6 Language models

+ + Our language models are created using SRILM using the training transcripts. + We automatically select the best one from among a range of smoothing rules and + count cutoffs, using perplexity on held-out data as the criterion; a typical + chosen language model is a good-Turing smoothed 3-gram. + +

4.7 Feature processing and adaptation

+ + Our base features, as described above, are 16-dimensional (MFCC + pitch) features. + We process these by splicing with 3 frames of left and right context, doing + LDA (with the context-dependent states as the classes), and then estimating + an STC/MLLT transform [13] along with our models. We then use speaker adaptation + based on fMLLR, done also during training (i.e. our models are speaker adaptive). + In test time the transforms are obtained by decoding with a GMM-based model. + Our SGMM models use speaker vectors as an additional form of adaptation on top of + this. + + +

4.8 Subspace Gaussian Mixture Models (SGMMs)

+ + Two of the branches of our systems are based on SGMMs [14], as mentioned in the + introduction. Our SGMMs are the "SGMM2" recipe of Kaldi; this uses + the "symmetric" extension of SGMMs as described in [2], and also a substate-tying + scheme that uses a two-level version of the phonetic decision tree, and is similar + in spirit to the Gaussian tying used in BBN's Byblos system. +

+ The main tunable parameters of the SGMM training are given below: + + + + +
Num-gauss-UBM Num-leaves Num-substates
LimitedLP 750 5000 18000
FullLP 800 10000 80000
+ The number of "leaves per group" in the substate-tying scheme is set at its normal value, which + is 5. + + +

4.9 Deep Neural Networks

+ + The deep neural network training setup we use in Kaldi is one of two parallel setups that + we maintain "Karel's setup" and "Dan's setup". This system uses "Dan's setup". The + training procedure differs in a number of ways from previously published methods, and + for reasons of time and space we can't document it fully here. + See here for more information. + The most salient point is that the setup allows us to train a neural network in parallel + on multiple GPUs, which substantially decreases the training time. For example, for Zulu, the + FullLP system took 11 hours to train for 25 epochs on 8 GPUs. + The LimitedLP system took 7 hours to train for 25 epochs on 4 GPUs, but note that we + were training 4 networks at the same time, which slowed down the training by roughly a factor + of 4. + +
4.9.1 p-norm nonlinearities
+ + Our major improvement to our DNN system was the introduction of "p-norm" nonlinearities. + This is described in [8]. The input to our DNNs are 40-dimensional fMLLR features, obtained + via first-pass decoding with our GMM system. These are spliced across a 9-frame context window + (4 frames on each side), and processed with an LDA-like transform to decorrelate them. + The FullLP system has four hidden layers with 4000 as the input dimension to the nonlinearity + and 400 as the output-dimension (so the group size is 10). There are 12000 output neurons + in the softmax layer; this is more than the number of context-dependent states (which is + about 5000), because of the "mixing-up" as described here . + For the LimitedLP system the input/output dimensions are 3000/300 and the softmax layer dimension + is 5000 (versus about 2000 context-dependent states). + +
4.9.2 Ensemble training
+ + For the LimitedLP system we improve our system via a novel "ensemble training" method. + This involves training four versions of the neural network in parallel. We initialize + four networks using four different random seeds. During training, we train them + towards each other by adding a term in the objective function which penalizes the + K-L divergence between their outputs. Practically speaking, this means interpolating + the "hard label" for each frame with a "soft label" derived from interpolating the + posteriors derived from the averaged output of all four neural nets. The amount of + the "soft label" we add to the "hard" label is determined by a constant that we vary + from about 3 to 5 during training, so the extent of "training towards each other" gets + stronger as we train. +

+ During decoding, we pick just one of the systems arbitrarily. Since it has been + trained towards the other networks, it acts a little bit like an ensemble of + networks, even though it is just one network. This gives us about 1.5% WER + improvement. + +

4.9.3 Sequence training
+ + For the FullLP system only, we do discriminative training ("sequence training") + on our DNN. Our discriminative training is based on a state-level variannt of + the Minimum Phone Error (MPE) criterion, called sMBR [15]. We are mostly following + the recipe described in [16], although modified for our parallel-training method. + + The training is based on Stochastic Gradient Descent (SGD), although modified by our + "preconditioning method" which will eventually be described + here (till then, see the code). + We use a learning rate of 9E-5, but one tenth that value for the output layer. + Training is for four epochs. + Instead of frame-level randomization we use segment-level randomization, where a + segment is the smallest pieces we could chop our lattices into while still being + able to accurately evaluate the objective function. The training is in parallel + using 4 GPUs and periodically averaging the parameters, just like for our basic training. + (Note that the "effective learning rate" is as a result four times lower than what + we mentioned above). + + +

4.10 Bottleneck features

+ + Our bottleneck system is based on the same code and methods as our DNN system, + except that we use tanh rather than p-norm nonlinearities, and the DNN has a bottleneck + layer. For the LimitedLP system we use four hidden layers with 1024 neurons, then + a bottleneck layer with 42 neurons, then one hidden layer with 1024 neurons, then the + output layer. For the FullLP system, replace (4, 1024) with (5, 2048). As before, + the input to the network is 40-dimensional LDA+STC+fMLLR features, spliced across 9 frames. +

+ For feature extraction we remove the part after the 42-dimensional bottleneck, including + the tanh nonlinearity, and append it with the baseline 40-dimensional features, giving + an 82-dimensional feature vector. This is appended across ±1 frame and the dimension + is reduced with LDA to 60 dimensions. (Note: we don't commence training on these features + from scratch but start with alignments from our SAT-trained GMM-based system). +

+ From this point we train an SGMM+BMMI system. Because the feature dimension is higher the + number of parameters would increase if we left the rest of the configuration of the system + the same, so we use the following reduced configuration values: + + + + +
Num-gauss-UBM Num-leaves Num-substates
LimitedLP 500 5000 10000
FullLP 5500 10000 50000
+ Because the features are much "stronger" than normal features (i.e. more informative about the + class), and more correlated, we need to decode with a different acoustic scale than normal. + We normally decode SGMM systems with an acoustic scale of 0.1. For this system we decode with + an acoustic scale of 1/15 = 0.06666. Note: the more finely tuned acoustic scale is determined + by best WER or ATWV on the development data, after rescoring the lattices with different weights; + this value is just to get us in the right ballpark during lattice generation. + + +

4.11 Build order

+ + In order to clarify the relationship between the various systems, we document here the + order of system building. The initial stages, when the dependency graph is just a linear + sequence, are as follows: + + + + + + + + + +
Stage Num-leaves/gauss Num-leaves/gauss Feature type
(LimitedLP) (FullLP)
mono n/a n/a delta+delta-delta
tri1 1000/10k 1000/10k delta+delta-delta
tri2 2500/36k 1000/20k delta+delta-delta
tri3 2500/36k 6000/75k delta+delta-delta
tri4 2500/36k 6000/75k LDA+STC
tri5 2500/36k 6000/75k LDA+STC+fMLLR
+After the tri5 stage, the build graph "branches out", and the training of the SGMM system, the +DNN system and the DNN that includes the bottleneck features, all depend on the alignments and +transforms obtained from the tri5 system. We have documented the number of parameters of those +other systems separately. + +

4.12 Decoding order

+ + After training the tri5 system, we obtain via single-pass retraining a version of the system that + is trained on speaker-independent features. This model is used in the first, speaker-independent pass + of recognition-- other than about segmentation, which we have documented separately. All decoding + passes are with WFST decoders that output lattices. Starting from a raw, + state-level lattice we use the determinization algorithm of [6] to produce + a word-level lattice, although this year we extended the determinization algorithm slightly to + enable the generation of deeper lattices, by first doing a phone-level determinization before + the word-level determinization. This keeps the determinization from "blowing up" when the + beam is too large. +

+ The lattices from the speaker-independent decoding are used with the speaker-adapted "tri5" model to compute initial + fMLLR transforms, which are used with the speaker-adapted model to rescore the lattices to get + better posteriors and estimate the fMLLR transforms a second time. + Then another lattice generation pass is done with the speaker-adapted model and adapted features, + and the fMLLR transforms are estimated a third time and the lattices rescored with those features. +

+ Note: we don't include silence frames in the fMLLR computation. Since the + lattice generates soft counts, this is accomplished via per-frame weights, + not a hard cutoff. +

+ The decoding of the later models-- the SGMMs, DNNs and bottleneck feature based SGMMs-- + all depend on the "tri5" decoding because they use the fMLLR transforms generated there. +

+ Once we have these transforms, the DNN decoding is single-pass, but for the discriminatively + trained DNNs we first decode with the basic DNN and then rescore the lattices with + four different versions of the final DNN system, one for each epoch. This is so that we + can choose the best epoch to use. +

+ The SGMM decoding naturally has two passes: one using a speaker-independent version of + the SGMM system (speaker-independent because it doesn't have speaker vectors, although + we do have fMLLR features), and then another pass of decoding after estimating the + speaker vectors. However, we only generate the lattice once. In order to ensure + an accurate final lattice, we dump the state-level lattice from the first pass of + decoding and don't do the final lattice-determinization until after estimating the + speaker vectors. See [6] if the term "state-level lattice" is confusing. + +

4.13 Keyword index generation

+ + The keyword index generation uses Finite State Transducer concepts, and is based on [5]. + It relies on the fact that our lattices are determinized at the word level, which + is an essential part of our lattice generation procedure. This method constructs + an index such that for any given keyword sequence (of any length), one can do a simple + lookup in a finite state transducer and find a list of all the occurrences of that keyword + sequence in the set of lattices that were indexed. + The number of potential word sequences grows exponentially with the sequence + length, and the index does not blow up even though it allows us to look up arbitrarily long + sequences. This is accomplished through the magic of determinization, together with + some clever choices of semirings. +

+ We build a separate index for each language model scale in a predetermined range (e.g. 10, 12, 13, 14, 15), + so that we can separately run the keyword search pipeline for each scale, and pick the + scale with the best ATWV on the dev data. (Note: since there is only one dev set, all our + numbers reported on the dev set have these scales optimized on that set, and the same + applies for WER numbers). + +

4.14 Keyword search

+ + Once the index is built, keyword search is very simple and fast: we look up + the sequence in the index generated above, and it returns a list of the hit locations + (utterance-ids and start and end times) and the associated lattice posteriors. + In this document, we assume that by "keyword" we mean some given sequence of words, possibly + of length one. +

+ The most non-obvious aspect of this is the per-keyword normalization of the scores. + The Term Weighted Value (TWV) metric, after ignoring constant terms and doing + a few manipulations, may be expressed as follows: +

+ TWV = const + sum-over-keywords ( 1/K ( Ntrue-hit / Ntrue - beta/duration NFA ) ) +

+ Here, sum-over-keywords is taken over all keywords that were actually seen in + the test set being considered. The values in the equation may be defined as follows: + + + + + + + + +
Name Definition
K Number of keywords that appear in this test set
Ntrue-hit Number of occurrences of this keyword that we correctly spotted.
Ntrue Number of times this keyword actually occurred in this test set.
NFA Number of incorrect hits of this keyword that we produced.
beta A constant equal to exactly 999.9 (don't ask)
duration The total number of seconds of audio in the test set: a constant we know exactly.
+ + I believe the following analysis comes from [17]. In statistical systems, if we assume + model correctness we can generally trust marginals even of very noisy and unreliable things. + So for instance, even if our individual recognitions of a word are very inaccurate, the sum + of the posterior may be reasonably accurate if the system was well trained. At least, we can hope so. + So if we take the sum of posteriors of the hits of a keyword over our entire training set, we can form + a reasonable estimate of Ntrue. In what goes below, let Ntrue-estimate be simply + the sum of the lattice posteriors of this keyword, over all our test set. We will use Ntrue-estimate + in place of Ntrue. So for some keyword, the TWV contribution from that keyword is: +

+ TWV-contribution = 1/K ( Ntrue-hit / Ntrue-estimate - beta/duration NFA ) +

+ Here, Ntrue-estimate and beta/duration are both known quantities. Consider one putative hit, + i.e. one location in time where we have a nonzero posterior and we might want to produce a hit. Let + the posterior of the keyword in the lattice be p. Let's assume that p is a reasonable estimate of the + probability that the keyword actually exists there, which is reasonable assuming model correctness. + As an aside, note that we scale down the acoustics in our lattices while computing the posteriors, so the probabilities + are quite well calibrated; also, we have plotted the (posterior in our lattice) versus + (probability that the word was actually there) and it's within spitting distance of a straight line. + Anyway, back to the task at hand. We can write, for this putative hit, +

+ expected-TWV-contribution = 1/K ( p / Ntrue-estimate - beta/duration (1-p) ) . +

+ Here, all but one of the quantities in the equation are known. K is not known, because we don't know + how many keywords were actually seen in the test set, but because we only care about the sign of this quantity + we don't actually need to know K. For a putative hit, the equation above gives us all we need to know + in order to know whether to say "yes" or "no": if it's positive, "yes", else "no". We want to + keep the hit if this is positive, i.e. if. +

+ p / Ntrue-estimate - beta/duration (1-p) > 0
+ p (1/Ntrue-estimate + beta/duration) - beta/duration > 0
+ p > (beta/duration) / (1/Ntrue-estimate + beta/duration)
+ p > Ntrue-estimate / (duration/beta + Ntrue-estimate) +

+ Let's call the value above the "threshold", i.e.
+threshold = Ntrue-estimate / (duration/beta + Ntrue-estimate)

+ (there is a different threshold for each keyword). In order to make it easier to choose + the cutoff point for when to stop producing hits, we would to produce the output + as normalized scores that are all somehow comparable to each other. That way we can tune a global threshold. + We would like to normalize our scores in such a way that they are still all between zero and one. + We do this by converting p to a log-ratio, i.e. q = log(p / (1-p)), computing a similar log-ratio for the + threshold, i.e. t = log(threshold / (1-threshold)), and then subtracting t from q, + i.e. q' = q - t, to produce a normalized log-ratio q' (so if q' > 0, then p > threshold). + Then we convert back from a log-ratio to an actual + probability, call this p'. When we work out the equations for this, it comes out to
+ p' = (1-threshold) * p / ((1-threshold)*p + (1-p)*threshold) + + +

4.15 Out of vocabulary (OOV) keyword search

+ + In this section we describe how we perform the keyword search when the keyword is + OOV-- i.e. when at least one of the words in the sequence is not in our lexicon. + Note that this is a separate thing from the lexicon expansion described above. + If we are using the lexicon-expanded decoding graph, then this procedure is only applied + if the keyword is OOV with respect to the expanded lexicon. +

+ We have described our basic proxy search procedure in [4] so we will not repeat + it at length here. The basic idea is to use a learned phone confusion matrix + to find a list of in-vocabulary word sequences that are phonetically close to + the sequence we want, with associated penalties for being too distant. As a + special case, we don't penalize the proxy sequences for having extra phones at + their beginning and end (so, for instance, if the pronunciation of a + searched-for word appeared as part of a longer word, we would allow that + without penalty). +

+ As background, our index lookup is actually done by FST composition, where one + of the things to be composed is the "query FST" (normally with a linear structure) + and one is the huge index. In our proxy search method, we represent the set of + proxy keywords, and their associated weights, as an FST, and to the keyword + search pipeline it looks no different from a linear sequence (since the input + is just an FST). +

+ There is something new about our proxy keyword search pipeline this + year. After implementing the "expanded lexicon", we noticed that the process + of generating proxy keywords was very slow. This procedure involves various + operations of composition and determinization, where the inputs are a linear + sequence consisting of the OOV keyword (as phones), a phone-edit-distance FST, + and a lexicon. When we made the lexicon much bigger, it became slow. In order + to make it fast again, we had to rearrange the order of composition and + determinization, and implement an "on-demand" FST pruning procedure for OpenFST + (as part of the Kaldi extensions to OpenFST). + + + +

4.16. Point Process Models for Keyword Search

+ +

The point process model (PPM) for keyword search [9] is a +whole-word, event-based acoustic modeling and phonetic search technique. +It operates on sparse phonetic event streams extracted from the speech +signal using a frame-level subword acoustic model. In our Babel system, +we use our Kaldi Deep Neural Network acoustic models described above to +generate posteriorgrams over context-dependent states. We subsequently +sum posterior dimensions sharing the same center phone to produce +monophone posteriorgrams for each utterance. After applying the matched +filter smoothing of [10], local maxima of each posterior trajectory +define phonetic event times. The set of phonetic events for the search +collection defines the index for subsequent keyword search; this +construction, which is performed entirely independent of the keyword +set, is our only use of the test audio. +

+The next stage is point process model construction. For +in-vocabulary words, we perform MAP estimation of the Poisson rate +parameters for each word in the lexicon [11]. This takes advantage of +any exemplars present in the training data, but falls back on +dictionary-based model priors (the simple variant, see [11] for details) +if no exemplars are available. For OOV keywords, we use Sequitur G2P +pronunciations to construct the dictionary models. Multi-word keyword +models are constructed by concatenating MAP estimated unigram PPMs, with +the overall duration distributions derived using the Monte Carlo +techniques from [12]. Search for each keyword is performed using an +optimized detection function calculation scheme that is 500,000 times +faster than realtime. We consider the PPM system performance both in +isolation and in combination (at the kwslist level) with the Kaldi LVCSR +search engine outputs. + +
+

4.17. Class-based language model

+Due to the sparsity of the Tamil data a combination of different smoothing techniques where used to train a trigram for LimitedLP and FullLP: +
    +
  • 1. a class based language model, where the class is derived from the first three characters of the Tamil word
  • +
  • 2. a class based LM using the first six characters
  • +
  • 3. one using the last three characters
  • +
  • 4. a skip bigram
  • +
  • 5. a word trigram where the absolute discounting parameter depends on the count level using a rational function
  • +
  • 6. the original trigram (KN as implemented in SRILM)
  • +
+Models 1-5 where implemented in LSVLM. In order to map them to ARPA format an artificial corpus of 30 million tokens was sampled using model 5. A trigram tree was constructed and probabilities of models 1-5 where written to the leafs of that tree. In the end model 1-6 where combined using linear interpolation. Model 2 had for all experiments the largest contribution. + +
+ +

4.18. Segment-level decoding

+
+ +

4.19 System combination methods

+ +
4.19.1 System combination for transcription
+ + Here we describe the system combination methods that are used in the "Kaldi-only" + submissions. For the overall RADICAL combination, which is based on ROVER, we + provide both the individual Kaldi sub-systems, and the overall combined system + which we combine as described in this section. +

+ Our systems are not cross-adapted, unless you count the fact that they all use + the fMLLR transforms from the shared "tri5" stage. For transcription purposes, + the only form of combination we use in the Kaldi sub-system is a combination + procedure based on Minimum Bayes Risk decoding, as described in [1]. We view + this as a more principled way to do confusion network combination (CNC) [18], + without the various heuristics that are used to produce confusion networks. + There is one aspect of this that we should explain, which relates to the + language-model weight. Normally when decoding, we do a linear sweep over the + language model weights over some range (e.g. 10, 11, 12, ... 18), and select + the best one. We do the same when combining systems, except that sometimes the + different systems will require substantially different language model weights + and there is no one weight that is good for all of them; it's not practical to + try all possible combinations of weights. When combining systems, we apply a + different offset to the language-model weights for each system. This offset is + determined by the beginning of the language-model-weight range that we sweep + for each system, which in turn was determined by us when setting up the + configuration files for our system. So for instance, if we start the regular + SGMM system at offset 10, and the bottleneck+SGMM system at 15, then there would + be an offset of 5 between the two systems when we do the combination. +

+ We don't bother applying weights to the different systems when combining, but + on occasion we do leave out some of the worse systems from the combination. + This is decided by a human operator, based on trying different combinations on + the dev set. The identities of the systems that were combined will be noted + in the individual submissions. + +

4.19.2 System combination for keyword search
+ + In this section we describe the Kaldi-internal method of system combination for + keyword search. For the overall RADICAL system combination, we provide the kwslists + for both the individual Kaldi subsystems, and their combination as described in this + section. +

+ The Kaldi-internal combination for keyword search is based on averaging across systems the + unnormalized putative hits (i.e. the lattice posteriors extracted from the index), + before normalizing the averaged posteriors using the normalization method described + in Section 4.14.. Note that in order to do this averaging, we have + to have some notion of when multiple hits are "at the same time". This is pretty obvious + (hits are the same if they overlap in time), so we won't refer to it further. If one + system did not have a hit at a particular time, it's identical to it having a posterior of + zero. +

+ We do not do a conventional average (i.e. a mean). + We wanted to implement something that was in between a mean and a geometric mean. We + used the notion that a geometric mean is a mean of logs, and a log is like a power of + x, (1/p) xp, as p approaches zero. So if we take the mean of xp + for some power p between zero and one, and take the result to the power 1/p, + this is somewhere between a mean and a geometric mean. So this is what we do. + Suppose we have three scores: a, b and c. We choose a power p (say, p=0.5, but it's tuned + per language). Then we let
+ average = (ap + bp + cp)1/p . +
+Actually we extend this to a weighted average, i.e. +
+ average = (waap + wbbp + wccp)1/p +
+where the weights sum to one. The weights are determined manually in small scale +experiments on one of the languages, as the result is not very sensitive to the +weights. We used weights that are fairly close to each other, but with better +systems having larger weights. +

+We apply the normalization method of Section 4.14. after taking the + weighted mean. + + +

5. Hardware

+ +A variable number of 16-core (Intel(R) Xeon(R) CPU E5-2680) machines was used. The amount +of per-core memory was 2\;GB. The training of the LimitedLP was done using 32 cores (2 nodes), +the training of FullLP system was done using 64 cores (4 nodes). Each of the nodes was +equipped with one GPU card (Tesla K20m), however these card weren't used for training, with +the exception of the neural networks (DNN and BNF systems). The detailed timing info will be +provided in the next section. The maximum total storage capacity used was approximately 5TB. +The typical size of a complete system (including lattices) is around 300\;GB. The lattice +generation of the shadow dataset (combined dev10h and eval) was done on 96 cores (6 nodes). +Indexing and search was done on 64 cpus (4 nodes). + +

6. Timing

+
+DATADEF:==BaseLR{204LimitedLP}:AM{204LimitedLP},LM{204LimitedLP},PRON{204LimitedLP},AR{None}
+
+
+Ingestion Elapsed Time (hh:mm:ss) - 151:29:03
+Ingestion Total CPU Time (hh:mm:ss) - 9546:33:38
+Ingestion Total GPU Time (hh:mm:ss) - 92:23:16
+
+Ingestion Maximum CPU Memory (gbytes) - 192
+Ingestion Maximum GPU Memory (gbytes) - 16
+
+Search Elapsed Time (hh:mm:ss) - 12:39:08
+Search Total CPU Time (hh:mm:ss) - 427:17:22
+Search Total GPU Time (hh:mm:ss) - 0:00:00
+
+Search Maximum CPU Memory (gbytes) - 32
+Search Maximum GPU Memory (gbytes) - 16
+
+ + +

7. References

+ + +
    + +
  • [1] "Minimum Bayes Risk decoding and system combination based on a recursion for edit distance", + Haihua Xu, Daniel Povey, Lidia Mangu and Jie Zhu, Computer Speech and Language, 2011.
  • + +
  • [2] "A Symmetrization of the Subspace Gaussian Mixture Model", Daniel Povey, + Martin Karafiat, Arnab Ghoshal, Petr Schwarz, ICASSP 2011
  • + +
  • [3] "Boosted MMI for Model and Feature Space Discriminative Training" , + Daniel Povey, Dimitri Kanevsky, Brian Kingsbury, Bhuvana Ramabhadran, George Saon & Karthik Visweswariah.
  • + +
  • [4] "Using Proxies for OOV keywords in the Keyword Search Task", Guoguo Chen, Oguz Yilmaz, Jan Trmal, + Daniel Povey, and Sanjeev Khudanpur, ASRU 2013
  • + +
  • [5] "Lattice Indexing for Spoken Term Detection", Dogan Can and Murat Saraclar, + IEEE Transactions on Audio, Speech and Language Processing.
  • + +
  • [6] "Generating exact lattices in the WFST framework", D. Povey, M. Hannemann et. al, ICASSP 2012
  • + +
  • [7] "A Pitch Extraction Algorithm Tuned for Automatic Speech Recognition", + Pegah Ghahremani, Bagher BabaAli, Daniel Povey, Korbinian Riedhammer, Jan Trmal and Sanjeev Khudanpur, ICASSP 2014
  • + +
  • [8] "Improving Deep Neural Network Acoustic Models using Generalized Maxout Networks", + Xiaohui Zhang, Jan Trmal, Daniel Povey and Sanjeev Khudanpur, ICASSP 2014 + +
  • [9] Jansen, A. and Niyogi, P., ``Point process models for spotting keywords in + continuous speech", IEEE Trans. Audio, Speech and Language Proc., 17(8), pp. 1457-1470, 2009.
  • + +
  • [10] Kintzley, K., Jansen, A., and Hermansky, H., ``Event Selection from Phone Posteriorgrams + Using Matched Filters," in Proc. of INTERSPEECH, 2011.
  • + +
  • [11] Kintzley, K., Jansen, A., and Hermansky, H., ``MAP Estimation of Whole-Word Acoustic Models + with Dictionary Priors," in Proc. of INTERSPEECH, 2012.
  • + +
  • [12] Kintzley, K., Jansen, A., and Hermansky, H., ``Featherweight Phonetic Keyword Search for + Conversational Speech", in Proc. of ICASSP, 2014.
  • + +
  • [13] Mark Gales, "Semi-Tied Covariance Matrices for Hidden Markov Models", IEEE Trans. SAP, 1999.
  • + +
  • [14] Daniel Povey, Lukas Burget et. al, "The Subspace Gaussian Mixture Model– a Structured + Model for Speech Recognition", Computer Speech and Language, 2011.
  • + +
  • [15] Gibson, Matthew. "Minimum Bayes risk acoustic model estimation and adaptation." Dissertation, + University of Sheffield, 2008.
  • + +
  • [16] K. Vesely, A. Ghoshal, L. Burget and D. Povey, + "Sequence-discriminative training of deep neural networks", Proc. Interspeech 2013
  • + +
  • [17] Damianos Karakos et al., "Score normalization and system combination for improved keyword spotting", ASRU 2013.
  • + +
  • [18] Evermann, Gunnar, and P. C. Woodland. "Posterior probability decoding, confidence estimation and system combination." Proc. Speech Transcription Workshop. Vol. 27. 2000.
  • + +
+ + diff --git a/egs/babel/s5d/cmd.sh b/egs/babel/s5d/cmd.sh new file mode 100644 index 00000000000..a4a11bef039 --- /dev/null +++ b/egs/babel/s5d/cmd.sh @@ -0,0 +1,29 @@ +# "queue.pl" uses qsub. The options to it are +# options to qsub. If you have GridEngine installed, +# change this to a queue you have access to. +# Otherwise, use "run.pl", which will run jobs locally +# (make sure your --num-jobs options are no more than +# the number of cpus on your machine. + +#a) JHU cluster options +export train_cmd="queue.pl -l arch=*64" +export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G" +export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G" + +#export cuda_cmd="..." + + +#b) BUT cluster options +#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M" +#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M" +#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G" + +#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1" +#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu" +#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G" + +#c) run it locally... +#export train_cmd=run.pl +#export decode_cmd=run.pl +#export cuda_cmd=run.pl +#export mkgraph_cmd=run.pl diff --git a/egs/babel/s5d/conf/bnf/config_full.py b/egs/babel/s5d/conf/bnf/config_full.py new file mode 100755 index 00000000000..5ea3ddbb1d9 --- /dev/null +++ b/egs/babel/s5d/conf/bnf/config_full.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +################################################# +## PTDNN - Python Toolkit for Deep Neural Network +## Author: Yajie Miao +################################################# + +import os +import sys + +from utils.learn_rates import LearningRateExpDecay + + +class BnfExpConfig(object): + + def __init__(self): + + # working directory; by default, the pfiles should be here + self.wdir = "WORK/" + self.pretrain_data = self.wdir + 'train.pfile.gz' # pretraining data + self.pretrain_output = self.wdir + "rbm.ptr" # pretraining output + + # finetuning data + self.finetune_train_data = self.wdir + 'train.pfile.gz' # finetune training data + self.finetune_valid_data = self.wdir + 'valid.pfile.gz' # finetune validation data + self.finetune_output = self.wdir + "final.nnet.raw" # finetune output + self.nnet_kaldi_fmt = self.wdir + "final.nnet" + + # global config for nnet topo + self.n_ins=250 # size of input data + self.n_outs=N_OUTS # number of output targets.. we'll replace this with + # the correct number when we move this to the right place. + self.hidden_layers_sizes=[1024, 1024, 1024, 1024, 1024, 42, 1024] # hidden layer sizes + self.bnf_layer_index = 6 # the index of the Bottleneck layer + self.pretrain_layer_num = 5 # number of hidden layers to be pretrained + + # global config for data + self.shuffle = True + self.chunk_size = '200m' + + # pretraining batch size + self.pretrain_batch_size = 128 # batch-size in pretraining + + # pretraining schedule + self.pretrain_gbrbm_lr = 0.005 # learning rate for Gaussian-Bernoulli RBM + self.pretrain_rbm_lr = 0.08 # learning rate for Bernoulli-Bernoulli RBM + self.initial_momentum = 0.5 # initial momentum + self.final_momentum = 0.9 # final momentum + self.initial_momentum_epoch = 2 # for how many epochs do we use initial_momentum + self.pretraining_epochs = 4 # total epochs + + # finetuning batch size + self.finetune_batch_size = 256 # batch-size for finetuning + + # finetuning schedule + self.finetune_momentum = 0.5 # momentum for finetuning + self.lrate = LearningRateExpDecay(start_rate=0.04, # starting learning rate + scale_by = 0.5, # decaying factor in ramping + max_epochs = 1000, # 'dump' epoch limit, never can be reached + min_derror_ramp_start = 0.01, # min validation error difference to trigger ramping + min_derror_stop = 0.01, # min validation error difference to stop finetuning, after ramping + init_error = 100) diff --git a/egs/babel/s5d/conf/bnf/config_limited.py b/egs/babel/s5d/conf/bnf/config_limited.py new file mode 100755 index 00000000000..f63c3640d68 --- /dev/null +++ b/egs/babel/s5d/conf/bnf/config_limited.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +################################################# +## PTDNN - Python Toolkit for Deep Neural Network +## Author: Yajie Miao +################################################# + +import os +import sys + +from utils.learn_rates import LearningRateExpDecay + + +class BnfExpConfig(object): + + def __init__(self): + + # working directory; by default, the pfiles should be here + self.wdir = "WORK/" # Note: we'll replace CWD with the current directory + # when we move this to the right place. + self.pretrain_data = self.wdir + 'train.pfile.gz' # pretraining data + self.pretrain_output = self.wdir + "rbm.ptr" # pretraining output + + # finetuning data + self.finetune_train_data = self.wdir + 'train.pfile.gz' # finetune training data + self.finetune_valid_data = self.wdir + 'valid.pfile.gz' # finetune validation data + self.finetune_output = self.wdir + "final.nnet.raw" # finetune output + self.nnet_kaldi_fmt = self.wdir + "final.nnet" + + # global config for nnet topo + self.n_ins=250 # size of input data + self.n_outs=N_OUTS # number of output targets.. we'll replace this with + # the correct number when we move this to the right place. + self.hidden_layers_sizes=[1024, 1024, 1024, 1024, 42, 1024] # hidden layer sizes + self.bnf_layer_index = 5 # the index of the Bottleneck layer + self.pretrain_layer_num = 4 # number of hidden layers to be pretrained + + # global config for data + self.shuffle = True + self.chunk_size = '200m' + + # pretraining batch size + self.pretrain_batch_size = 128 # batch-size in pretraining + + # pretraining schedule + self.pretrain_gbrbm_lr = 0.005 # learning rate for Gaussian-Bernoulli RBM + self.pretrain_rbm_lr = 0.08 # learning rate for Bernoulli-Bernoulli RBM + self.initial_momentum = 0.5 # initial momentum + self.final_momentum = 0.9 # final momentum + self.initial_momentum_epoch = 5 # for how many epochs do we use initial_momentum + self.pretraining_epochs=10 # total epochs + + # finetuning batch size + self.finetune_batch_size = 256 # batch-size for finetuning + + # finetuning schedule + self.finetune_momentum = 0.5 # momentum for finetuning + self.lrate = LearningRateExpDecay(start_rate=0.08, # starting learning rate + scale_by = 0.5, # decaying factor in ramping + max_epochs = 1000, # 'dump' epoch limit, never can be reached + min_derror_ramp_start = 0.01, # min validation error difference to trigger ramping + min_derror_stop = 0.01, # min validation error difference to stop finetuning, after ramping + init_error = 100) diff --git a/egs/babel/s5d/conf/common.fullLP b/egs/babel/s5d/conf/common.fullLP new file mode 100644 index 00000000000..d203908d3e0 --- /dev/null +++ b/egs/babel/s5d/conf/common.fullLP @@ -0,0 +1,124 @@ +# BNF training parameters +bnf_num_hidden_layers=6 +bottleneck_dim=42 +bnf_hidden_layer_dim=2048 +bnf_minibatch_size=512 +bnf_init_learning_rate=0.008 +bnf_final_learning_rate=0.0008 +bnf_max_change=40 +bnf_num_jobs=4 +bnf_num_threads=1 +bnf_mixup=10000 +bnf_mpe_learning_rate=0.00009 +bnf_mpe_last_layer_factor=0.1 +bnf_num_gauss_ubm=550 # use fewer UBM Gaussians than the + # non-bottleneck system (which has 800) +bnf_num_gauss_sgmm=50000 # use fewer SGMM sub-states than the + # non-bottleneck system (which has 80000). +bnf_decode_acwt=0.066666 + + +# DNN hybrid system training parameters +dnn_num_hidden_layers=4 +dnn_input_dim=4000 +dnn_output_dim=400 +dnn_init_learning_rate=0.008 +dnn_final_learning_rate=0.0008 +dnn_mixup=12000 + +dnn_mpe_learning_rate=0.00008 +dnn_mpe_last_layer_factor=0.1 +dnn_mpe_retroactive=true + +bnf_every_nth_frame=2 # take every 2nd frame. +babel_type=full + +use_pitch=true + +lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 18 ) +lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 ) +lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 ) +lmwt_chain_extra_opts=( --min-lmwt 4 --max-lmwt 22 ) + +dnn_beam=16.0 +dnn_lat_beam=8.5 + +icu_opt=(--use-icu true --icu-transform Any-Lower) + +if [[ `hostname` == *.tacc.utexas.edu ]] ; then + decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" ) + sgmm_train_extra_opts=( ) + sgmm_group_extra_opts=( --num_iters 25 ) + sgmm_denlats_extra_opts=( --num-threads 2 ) + sgmm_mmi_extra_opts=(--cmd "local/lonestar.py -pe smp 2") + dnn_denlats_extra_opts=( --num-threads 2 ) + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" ) + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1) + + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1) + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1) + dnn_parallel_opts="-l gpu=1" +else + decode_extra_opts=(--num-threads 6 --parallel-opts "--num-threads 6 --mem 4G") + sgmm_train_extra_opts=( --num-iters 25 ) + sgmm_group_extra_opts=(--group 3 --parallel-opts "--num-threads 7 --mem 6G") + sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "--num-threads 4" ) + sgmm_mmi_extra_opts=() + dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "--num-threads 4") + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "--num-threads 16") + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1 \ + --parallel-opts "--gpu 1" ) + dnn_parallel_opts="--gpu 1" + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1 \ + --parallel-opts "--gpu 1") +fi + +icu_transform="Any-Lower" +case_insensitive=true + + +max_states=150000 +wip=0.5 + + +phoneme_mapping= + +minimize=true + +proxy_phone_beam=-1 +proxy_phone_nbest=-1 +proxy_beam=5 +proxy_nbest=500 + +extlex_proxy_phone_beam=5 +extlex_proxy_phone_nbest=300 +extlex_proxy_beam=-1 +extlex_proxy_nbest=-1 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/common.limitedLP b/egs/babel/s5d/conf/common.limitedLP new file mode 100644 index 00000000000..a73080a5b65 --- /dev/null +++ b/egs/babel/s5d/conf/common.limitedLP @@ -0,0 +1,128 @@ +# BNF training parameters +bnf_num_hidden_layers=5 +bottleneck_dim=42 +bnf_hidden_layer_dim=1024 +bnf_minibatch_size=512 +bnf_init_learning_rate=0.008 +bnf_final_learning_rate=0.0008 +bnf_max_change=40 +bnf_num_jobs=4 +bnf_num_threads=1 +bnf_mixup=5000 +bnf_mpe_learning_rate=0.00009 +bnf_mpe_last_layer_factor=0.1 +bnf_num_gauss_ubm=500 # use fewer UBM Gaussians than the + # non-bottleneck system (which has 750) +bnf_num_gauss_sgmm=10000 # use fewer SGMM sub-states than the + # non-bottleneck system (which has 18000). +bnf_decode_acwt=0.066666 + + +## DNN hybrid system training parameters +dnn_num_hidden_layers=3 +dnn_input_dim=2000 +dnn_output_dim=200 +dnn_init_learning_rate=0.008 +dnn_final_learning_rate=0.0008 +dnn_mixup=5000 + +dnn_mpe_learning_rate=0.00009 +dnn_mpe_last_layer_factor=0.1 +dnn_mpe_retroactive=true + +bnf_every_nth_frame=1 # take all frames. +babel_type=limited + +use_pitch=true + +lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 12 ) +lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 ) +lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 ) + +dnn_beam=16.0 +dnn_lat_beam=8.5 + +icu_opt=(--use-icu true --icu-transform Any-Lower) + +# Semi-supervised examples options +dnn_update_egs_opts=(--weight-threshold 0.7 --splice-width 4 --samples-per-iter 200000 --num-jobs-nnet 4 --io-opts "-tc 5" ) + +if [[ `hostname` == *.tacc.utexas.edu ]] ; then + decode_extra_opts=( --num-threads 4 --parallel-opts "-pe smp 4" ) + sgmm_train_extra_opts=( --num-iters 25 ) + sgmm_group_extra_opts=( ) + sgmm_denlats_extra_opts=( --num-threads 1 ) + dnn_denlats_extra_opts=( --num-threads 1 ) + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" ) + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 + --parallel-opts "-pe smp 16" ) + + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1) + + dnn_update_parallel_opts=( --num-epochs 15 --num-epochs-extra 5 --num-iters-final 20 ) +else + decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=4.0G") + sgmm_train_extra_opts=( --num-iters 25 ) + sgmm_group_extra_opts=(--group 3 --parallel-opts "-pe smp 3 -l mem_free=7G,ram_free=7.0G" --cmd "queue.pl -l arch=*64 -l mem_free=2.0G,ram_free=2.0G") + sgmm_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2.0G") + sgmm_mmi_extra_opts=(--cmd "queue.pl -l arch=*64 -l mem_free=1.5G,ram_free=1.5G") + dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2.0G") + + dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 \ + --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + dnn_parallel_opts="-l gpu=1" + dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1 \ + --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + + dnn_update_parallel_opts=( --num-epochs 15 --num-epochs-extra 5 --num-iters-final 20 ) +fi + +icu_transform="Any-Lower" +case_insensitive=true + + +max_states=150000 +wip=0.5 + + +phoneme_mapping= + +minimize=true + +proxy_phone_beam=-1 +proxy_phone_nbest=-1 +proxy_beam=5 +proxy_nbest=500 + +extlex_proxy_phone_beam=5 +extlex_proxy_phone_nbest=300 +extlex_proxy_beam=-1 +extlex_proxy_nbest=-1 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/common.semisupervised.limitedLP b/egs/babel/s5d/conf/common.semisupervised.limitedLP new file mode 100644 index 00000000000..63118af268c --- /dev/null +++ b/egs/babel/s5d/conf/common.semisupervised.limitedLP @@ -0,0 +1,27 @@ +## DNN hybrid system training parameters +dnn_num_hidden_layers=3 +dnn_input_dim=2000 +dnn_output_dim=200 +dnn_init_learning_rate=0.008 +dnn_final_learning_rate=0.0008 +dnn_mixup=5000 +num_epochs=15 +num_epochs_extra=5 +num_iters_final=20 + +babel_type=limited + +# Supervised tuning options +# To update only the last layer using only the supervised data after +# semi-supervised training is done +do_supervised_tuning=true +dnn_update_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \ + --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") +dnn_update_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 \ + --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + +# Semi-supervised examples options +egs_gpu_opts=(--splice-width 4 --samples-per-iter 200000 --num-jobs-nnet 4 --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") +egs_cpu_opts=(--splice-width 4 --samples-per-iter 200000 --num-jobs-nnet 8 --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") +egs_io_opts="-tc 5" +weight_threshold=0.7 diff --git a/egs/babel/s5d/conf/common_vars.sh b/egs/babel/s5d/conf/common_vars.sh new file mode 100644 index 00000000000..4a48d2577a8 --- /dev/null +++ b/egs/babel/s5d/conf/common_vars.sh @@ -0,0 +1,28 @@ +#keyword search default +glmFile=conf/glm +duptime=0.5 +case_insensitive=false +use_pitch=true +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="-oov " +boost_sil=1.5 # note from Dan: I expect 1.0 might be better (equivalent to not + # having the option)... should test. +cer=0 + +#Declaring here to make the definition inside the language conf files more +# transparent and nice +declare -A dev10h_kwlists +declare -A dev2h_kwlists +declare -A evalpart1_kwlists +declare -A eval_kwlists +declare -A shadow_kwlists + +# just for back-compatibility +declare -A dev10h_more_kwlists +declare -A dev2h_more_kwlists +declare -A evalpart1_more_kwlists +declare -A eval_more_kwlists +declare -A shadow_more_kwlists +[ -f ./path.sh ] && . ./path.sh; # source the path. +[ -f ./cmd.sh ] && . ./cmd.sh; # source train and decode cmds. diff --git a/egs/babel/s5d/conf/glm b/egs/babel/s5d/conf/glm new file mode 100644 index 00000000000..cdf9c42feaa --- /dev/null +++ b/egs/babel/s5d/conf/glm @@ -0,0 +1,13 @@ +;; +;; File: ma970904.glm +;; Desc: This file contains the transcript filtering rules for the ARPA +;; Mandarin Hub5-NE Evaluation. +;; +;; Date: 970904 +;; - initial creation +;; +;; Hesitation mappings + => %HESITATION / [ ] __ [ ] + => %HESITATION / [ ] __ [ ] + => %HESITATION / [ ] __ [ ] + diff --git a/egs/babel/s5d/conf/lang/101-cantonese-fullLP.official.conf b/egs/babel/s5d/conf/lang/101-cantonese-fullLP.official.conf new file mode 100644 index 00000000000..7d2da3715fb --- /dev/null +++ b/egs/babel/s5d/conf/lang/101-cantonese-fullLP.official.conf @@ -0,0 +1,104 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/101-cantonese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Cantonese_Babel101/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.3hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev2h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev10h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.kwlist.xml +eval_nj=64 + +evalpart1_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +evalpart1_data_list=conf/lists/101-cantonese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/101-cantonese/release-current/conversational/dev + /export/babel/data/101-cantonese/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Cantonese_Babel101/dev.list + /export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm +lexicon_file=/export/babel/data/101-cantonese/release-current/conversational/reference_materials/lexicon.txt +cer=1 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/101-cantonese-limitedLP.official.conf b/egs/babel/s5d/conf/lang/101-cantonese-limitedLP.official.conf new file mode 100644 index 00000000000..66347522065 --- /dev/null +++ b/egs/babel/s5d/conf/lang/101-cantonese-limitedLP.official.conf @@ -0,0 +1,112 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/101-cantonese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Cantonese_Babel101/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.3hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev2h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm +dev10h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.kwlist.xml +eval_nj=64 + +evalpart1_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval +evalpart1_data_list=conf/lists/101-cantonese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/101-cantonese/release-current/conversational/dev + /export/babel/data/101-cantonese/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Cantonese_Babel101/dev.list + /export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Pashto_Babel104/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm +lexicon_file=/export/babel/data/101-cantonese/release-babel101b-v0.4c_sub-train1/conversational/reference_materials/lexicon.sub-train1.txt +cer=1 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/102-assamese-fullLP.official.conf b/egs/babel/s5d/conf/lang/102-assamese-fullLP.official.conf new file mode 100644 index 00000000000..f00afb53454 --- /dev/null +++ b/egs/babel/s5d/conf/lang/102-assamese-fullLP.official.conf @@ -0,0 +1,105 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/102-assamese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Assamese_Babel102/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Assamese_Babel102/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=24 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Assamese_Babel102//dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ +eval_data_list=/export/babel/data/splits/Assamese_Babel102/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/102-assamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/102-assamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/102-assamese/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Assamese_Babel102/dev.list + /export/babel/data/splits/Assamese_Babel102/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/102-assamese/release-current/conversational/reference_materials/lexicon.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/102-assamese-limitedLP.official.conf b/egs/babel/s5d/conf/lang/102-assamese-limitedLP.official.conf new file mode 100644 index 00000000000..937166caf7d --- /dev/null +++ b/egs/babel/s5d/conf/lang/102-assamese-limitedLP.official.conf @@ -0,0 +1,114 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/102-assamese/release-current/conversational/training +train_data_list=/export/babel/data/splits/Assamese_Babel102/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Assamese_Babel102/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=24 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/102-assamese/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Assamese_Babel102//dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev/IARPA-babel102b-v0.5a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ +eval_data_list=/export/babel/data/splits/Assamese_Babel102/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/102-assamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/102-assamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-evalpart1/IARPA-babel102b-v0.5a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/102-assamese/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_102/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Assamese_Babel102/dev.list + /export/babel/data/splits/Assamese_Babel102/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel102b-v0.5a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/102-assamese//release-current/conversational/training/ + /export/babel/data/102-assamese//release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Assamese_Babel102/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Assamese_Babel102/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/102-assamese/release-current/conversational/reference_materials/lexicon.sub-train.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/103-bengali-fullLP.official.conf b/egs/babel/s5d/conf/lang/103-bengali-fullLP.official.conf new file mode 100644 index 00000000000..d283be30d16 --- /dev/null +++ b/egs/babel/s5d/conf/lang/103-bengali-fullLP.official.conf @@ -0,0 +1,105 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/103-bengali/release-current/conversational/training +train_data_list=/export/babel/data/splits/Bengali_Babel103/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=12 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval +eval_data_list=/export/babel/data/splits/Bengali_Babel103//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/103-bengali/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/103-bengali/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/103-bengali/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Bengali_Babel103/dev.list + /export/babel/data/splits/Bengali_Babel103/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/103-bengali/release-current/conversational/reference_materials/lexicon.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/103-bengali-limitedLP.official.conf b/egs/babel/s5d/conf/lang/103-bengali-limitedLP.official.conf new file mode 100644 index 00000000000..3799653db68 --- /dev/null +++ b/egs/babel/s5d/conf/lang/103-bengali-limitedLP.official.conf @@ -0,0 +1,114 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/103-bengali//release-current/conversational/training +train_data_list=/export/babel/data/splits/Bengali_Babel103/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=12 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev/IARPA-babel103b-v0.4b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval +eval_data_list=/export/babel/data/splits/Bengali_Babel103//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/103-bengali/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/103-bengali/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-evalpart1/IARPA-babel103b-v0.4b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/103-bengali/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_103/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Bengali_Babel103/dev.list + /export/babel/data/splits/Bengali_Babel103/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel103b-v0.4b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/103-bengali/release-current/conversational/training/ + /export/babel/data/103-bengali/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Bengali_Babel103/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Bengali_Babel103/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + + + +lexicon_file=/export/babel/data/103-bengali/release-current/conversational/reference_materials/lexicon.sub-train.txt +cer=0 + +max_index_states=150000 +word_ins_penalty=0.5 + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/104-pashto-fullLP-40hrs.official.conf b/egs/babel/s5d/conf/lang/104-pashto-fullLP-40hrs.official.conf new file mode 100644 index 00000000000..9fbaf629935 --- /dev/null +++ b/egs/babel/s5d/conf/lang/104-pashto-fullLP-40hrs.official.conf @@ -0,0 +1,114 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training +train_data_list=./conf/lists/104-pashto/train.40HrFLP.list +train_nj=32 + +#RADICAL DEV2H data files +dev2h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Pashto_Babel104/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev10h_data_list=./conf/lists/104-pashto/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval +evalpart1_data_list=conf/lists/104-pashto/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/dev + /export/babel/data/104-pashto/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Pashto_Babel104/dev.list + /export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/104-pashto/release-current/conversational/reference_materials/lexicon.txt + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf b/egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf new file mode 100644 index 00000000000..08f849b7605 --- /dev/null +++ b/egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf @@ -0,0 +1,114 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training +train_data_list=/export/babel/data/splits/Pashto_Babel104/train.FullLP.list +train_nj=32 + +#RADICAL DEV2H data files +dev2h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Pashto_Babel104/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Pashto_Babel104/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist3.xml + [eval16]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist4.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval +evalpart1_data_list=conf/lists/104-pashto/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/dev + /export/babel/data/104-pashto/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Pashto_Babel104/dev.list + /export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/104-pashto/release-current/conversational/reference_materials/lexicon.txt + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/104-pashto-limitedLP.official.conf b/egs/babel/s5d/conf/lang/104-pashto-limitedLP.official.conf new file mode 100644 index 00000000000..41bc3ba85ef --- /dev/null +++ b/egs/babel/s5d/conf/lang/104-pashto-limitedLP.official.conf @@ -0,0 +1,110 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training +train_data_list=/export/babel/data/splits/Pashto_Babel104/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV2H data files +dev2h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Pashto_Babel104/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Pashto_Babel104/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval +evalpart1_data_list=conf/lists/104-pashto/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/dev + /export/babel/data/104-pashto/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Pashto_Babel104/dev.list + /export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/104-pashto/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Pashto_Babel104/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/104-pashto/release-current-subtrain/conversational/reference_materials/lexicon.sub-train.txt + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf b/egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf new file mode 100644 index 00000000000..6889cb7eb37 --- /dev/null +++ b/egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf @@ -0,0 +1,111 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training +train_data_list=/export/babel/data/splits/Turkish_Babel105/train.fullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev2h_data_list=/export/babel/data/splits/Turkish_Babel105/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev10h_data_list=/export/babel/data/splits/Turkish_Babel105/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +eval_data_list=/export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +evalpart1_data_list=conf/lists/105-turkish/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/105-turkish/release-current-b/conversational/dev + /export/babel/data/105-turkish/release-current-b/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Turkish_Babel105/dev.list + /export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +glmFile=./conf/glm +lexicon_file=/export/babel/data/105-turkish/release-current-b/conversational/reference_materials/lexicon.txt +#http://demo.icu-project.org/icu-bin/translit +icu_opt=(--use-icu true --icu-transform 'İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();' ) +#icu_opt=(--use-icu true --icu-transform "'\\\\\\\\İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();'" ) +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/105-turkish-limitedLP.official.conf b/egs/babel/s5d/conf/lang/105-turkish-limitedLP.official.conf new file mode 100644 index 00000000000..f7ca60c6f25 --- /dev/null +++ b/egs/babel/s5d/conf/lang/105-turkish-limitedLP.official.conf @@ -0,0 +1,111 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training +train_data_list=/export/babel/data/splits/Turkish_Babel105/train.LimitedLP.official.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev2h_data_list=/export/babel/data/splits/Turkish_Babel105/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev +dev10h_data_list=/export/babel/data/splits/Turkish_Babel105/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +eval_data_list=/export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval +evalpart1_data_list=conf/lists/105-turkish/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-evalpart1/IARPA-babel105b-v0.4_conv-evalpart1.annot.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/105-turkish/release-current-b/conversational/dev + /export/babel/data/105-turkish/release-current-b/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Turkish_Babel105/dev.list + /export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/105-turkish/release-current-b/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Turkish_Babel105/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=600 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/105-turkish/release-babel105b-v0.4-rc1/conversational/reference_materials/lexicon.sub-train.txt +#http://demo.icu-project.org/icu-bin/translit +icu_opt=(--use-icu true --icu-transform 'İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();' ) +#icu_opt=(--use-icu true --icu-transform "'\\\\\\\\İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();'" ) +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/106-tagalog-fullLP.official.conf b/egs/babel/s5d/conf/lang/106-tagalog-fullLP.official.conf new file mode 100644 index 00000000000..fa1afe4717e --- /dev/null +++ b/egs/babel/s5d/conf/lang/106-tagalog-fullLP.official.conf @@ -0,0 +1,108 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/106-tagalog/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Tagalog_Babel106/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Tagalog_Babel106/babel106b-v0.2g_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=23 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/106-tagalog/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/106-tagalog/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/106-tagalog/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/106-tagalog/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/106-tagalog-limitedLP.official.conf b/egs/babel/s5d/conf/lang/106-tagalog-limitedLP.official.conf new file mode 100644 index 00000000000..86148300e0c --- /dev/null +++ b/egs/babel/s5d/conf/lang/106-tagalog-limitedLP.official.conf @@ -0,0 +1,108 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/106-tagalog/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.official.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Tagalog_Babel106/babel106b-v0.2g_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev2h_subset_ecf=true +dev2h_nj=23 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval +eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.kwlist2.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/106-tagalog/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1/IARPA-babel106b-v0.2g_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-evalpart1.kwlist2.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/106-tagalog/release-current/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Tagalog_Babel106/dev.list + /export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/106-tagalog/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/106-tagalog/release-babel106b-v0.2g-sub-train/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/107-vietnamese-fullLP.official.conf b/egs/babel/s5d/conf/lang/107-vietnamese-fullLP.official.conf new file mode 100644 index 00000000000..e09ef9df4fd --- /dev/null +++ b/egs/babel/s5d/conf/lang/107-vietnamese-fullLP.official.conf @@ -0,0 +1,107 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Vietnamese_Babel107/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev2h_subset_ecf=true +dev2h_nj=27 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev/ +dev10h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Vietnamese_Babel107/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.kwlist3.xml +eval_nj=81 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/107-vietnamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.annot.kwlist3.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/107-vietnamese/release-current/conversational/dev/ + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Vietnamese_Babel107/dev.list + /export/babel/data/splits/Vietnamese_Babel107/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml + + ) +shadow_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/107-vietnamese/release-current/conversational/reference_materials/lexicon.txt + +phoneme_mapping="i@U=i @ U;oaI=o a I;oaI:=o a I:;u@I=u @ I;uI@= u I @;1@I=1 @ I;1@U=1 @ U; + a:I=a: I; a:U=a: U; aU=a U; @U=@ U; aI=a I; @I=@ I; EU=E U; eU=e U; i@=i @; iU=i U; Oa:=O a: ; Oa=O a; + OE=O E; OI=O I; oI=o I; @:I=@: I; u@=u @; 1@=1 @; ue=u e; uI=u I; 1I=1 I; u@:=u @:; 1U=1 U; ui:=u i:" +# + + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/107-vietnamese-limitedLP.official.conf b/egs/babel/s5d/conf/lang/107-vietnamese-limitedLP.official.conf new file mode 100644 index 00000000000..a659c44ecc4 --- /dev/null +++ b/egs/babel/s5d/conf/lang/107-vietnamese-limitedLP.official.conf @@ -0,0 +1,115 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Vietnamese_Babel107/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev +dev2h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev2h_subset_ecf=true +dev2h_nj=27 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/dev/ +dev10h_data_list=/export/babel/data/splits/Vietnamese_Babel107/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev/IARPA-babel107b-v0.7_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml +) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Vietnamese_Babel107/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-eval.kwlist3.xml +eval_nj=64 + +#Official (POST-)EVAL evaluation data portion +evalpart1_data_dir=/export/babel/data/107-vietnamese/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/107-vietnamese/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/splits/Vietnamese_Babel107/keywords.expanded.cmu.v2.xml +evalpart1_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-evalpart1/IARPA-babel107b-v0.7_conv-evalpart1.annot.kwlist3.xml +) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/107-vietnamese/release-current/conversational/dev/ + /export/babel/data/107-vietnamese/release-current/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Vietnamese_Babel107/dev.list + /export/babel/data/splits/Vietnamese_Babel107/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel107b-v0.7_conv-dev.kwlist3.xml + + ) +shadow_nj=64 + +unsup_data_dir=( + /export/babel/data/107-vietnamese/release-current/conversational/training/ + ) +unsup_data_list=( + /export/babel/data/splits/Vietnamese_Babel107/train.LimitedLP.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/107-vietnamese/release-current/conversational/reference_materials/lexicon.sub-train.txt + +phoneme_mapping="i@U=i @ U;oaI=o a I;oaI:=o a I:;u@I=u @ I;uI@= u I @;1@I=1 @ I;1@U=1 @ U; + a:I=a: I; a:U=a: U; aU=a U; @U=@ U; aI=a I; @I=@ I; EU=E U; eU=e U; i@=i @; iU=i U; Oa:=O a: ; Oa=O a; + OE=O E; OI=O I; oI=o I; @:I=@: I; u@=u @; 1@=1 @; ue=u e; uI=u I; 1I=1 I; u@:=u @:; 1U=1 U; ui:=u i:" +# + + + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/201-haitian-fullLP.official.conf b/egs/babel/s5d/conf/lang/201-haitian-fullLP.official.conf new file mode 100644 index 00000000000..b92a52b7bb6 --- /dev/null +++ b/egs/babel/s5d/conf/lang/201-haitian-fullLP.official.conf @@ -0,0 +1,80 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Haitian_Babel201/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval +eval_data_list=/export/babel/data/splits/Haitian_Babel201//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/201-haitian/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/201-haitian/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Haitian_Babel201/dev.list + /export/babel/data/splits/Haitian_Babel201/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.txt + diff --git a/egs/babel/s5d/conf/lang/201-haitian-limitedLP.official.conf b/egs/babel/s5d/conf/lang/201-haitian-limitedLP.official.conf new file mode 100644 index 00000000000..d1320fd0245 --- /dev/null +++ b/egs/babel/s5d/conf/lang/201-haitian-limitedLP.official.conf @@ -0,0 +1,89 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Haitian_Babel201/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=20 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev/IARPA-babel201b-v0.2b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval +eval_data_list=/export/babel/data/splits/Haitian_Babel201//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/201-haitian/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist2.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-evalpart1/IARPA-babel201b-v0.2b_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/201-haitian/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval + ) +shadow_data_list=( + /export/babel/data/splits/Haitian_Babel201/dev.list + /export/babel/data/splits/Haitian_Babel201/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/201-haitian/release-current/conversational/training/ + /export/babel/data/201-haitian/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Haitian_Babel201/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Haitian_Babel201/train.untranscribed.list + ) +unsup_nj=64 + + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.sub-train.txt + diff --git a/egs/babel/s5d/conf/lang/202-swahili.FLP.official.conf b/egs/babel/s5d/conf/lang/202-swahili.FLP.official.conf new file mode 100644 index 00000000000..d24eb1b73a4 --- /dev/null +++ b/egs/babel/s5d/conf/lang/202-swahili.FLP.official.conf @@ -0,0 +1,93 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/training +train_data_list=./conf/lists/202-swahili//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev2h_data_list=./conf/lists/202-swahili//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev10h_data_list=./conf/lists/202-swahili//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +eval_data_list=./conf/lists/202-swahili//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.ecf.xml +eval_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +evalpart1_data_list=./conf/lists/202-swahili//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist3.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/202-swahili//dev.list + ./conf/lists/202-swahili//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +shadow_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/untranscribed-training +unsup_data_list=./conf/lists/202-swahili//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/202-swahili.LLP.official.conf b/egs/babel/s5d/conf/lang/202-swahili.LLP.official.conf new file mode 100644 index 00000000000..761e6c6e0ab --- /dev/null +++ b/egs/babel/s5d/conf/lang/202-swahili.LLP.official.conf @@ -0,0 +1,99 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/training +train_data_list=./conf/lists/202-swahili//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev2h_data_list=./conf/lists/202-swahili//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev +dev10h_data_list=./conf/lists/202-swahili//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.annot.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +eval_data_list=./conf/lists/202-swahili//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.ecf.xml +eval_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +evalpart1_data_list=./conf/lists/202-swahili//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-evalpart1/IARPA-babel202b-v1.0d_conv-evalpart1.annot.kwlist3.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/dev + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-eval/BABEL_OP2_202/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/202-swahili//dev.list + ./conf/lists/202-swahili//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-dev/IARPA-babel202b-v1.0d_conv-dev.stm +shadow_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel202b-v1.0d_conv-eval.kwlist3.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/202-swahili//untranscribed-training.list + ./conf/lists/202-swahili//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/untranscribed-training + /export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/202-swahili/IARPA-babel202b-v1.0d-build/BABEL_OP2_202/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/203-lao-fullLP.official.conf b/egs/babel/s5d/conf/lang/203-lao-fullLP.official.conf new file mode 100644 index 00000000000..052aa6bbc50 --- /dev/null +++ b/egs/babel/s5d/conf/lang/203-lao-fullLP.official.conf @@ -0,0 +1,101 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/203-lao/release-current/conversational/training +train_data_list=/export/babel/data/splits/Lao_Babel203/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Lao_Babel203/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Lao_Babel203/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval +eval_data_list=/export/babel/data/splits/Lao_Babel203//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/203-lao/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/203-lao/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/203-lao/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Lao_Babel203/dev.list + /export/babel/data/splits/Lao_Babel203/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/203-lao/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/203-lao-limitedLP.official.conf b/egs/babel/s5d/conf/lang/203-lao-limitedLP.official.conf new file mode 100644 index 00000000000..1e12a529361 --- /dev/null +++ b/egs/babel/s5d/conf/lang/203-lao-limitedLP.official.conf @@ -0,0 +1,110 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/203-lao/release-current/conversational/training +train_data_list=/export/babel/data/splits/Lao_Babel203/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Lao_Babel203/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/203-lao/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Lao_Babel203/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev/IARPA-babel203b-v3.1a_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval +eval_data_list=/export/babel/data/splits/Lao_Babel203//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/203-lao/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/203-lao/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-evalpart1/IARPA-babel203b-v3.1a_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/203-lao/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_203/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Lao_Babel203/dev.list + /export/babel/data/splits/Lao_Babel203/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel203b-v3.1a_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + +unsup_data_dir=(/export/babel/data/203-lao/release-current/conversational/training/ + /export/babel/data/203-lao/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Lao_Babel203/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Lao_Babel203/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/203-lao/release-current/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/204-tamil-fullLP.official.conf b/egs/babel/s5d/conf/lang/204-tamil-fullLP.official.conf new file mode 100644 index 00000000000..700ae3d5dfb --- /dev/null +++ b/egs/babel/s5d/conf/lang/204-tamil-fullLP.official.conf @@ -0,0 +1,112 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/204-tamil/release-current/conversational/training +train_data_list=/export/babel/data/splits/Tamil_Babel204/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +dev2h_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +dev10h_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Tamil_Babel204/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +eval_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +eval_nj=64 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/204-tamil/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +evalpart1_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist5.xml + ) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/204-tamil/release-current/conversational/dev/ + /export/babel/data/204-tamil/release-current/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Tamil_Babel204/dev.list + /export/babel/data/splits/Tamil_Babel204/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +shadow_kwlist_file=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +shadow_more_kwlists=( + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +shadow_nj=64 + + + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/204-tamil/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/204-tamil-limitedLP.official.conf b/egs/babel/s5d/conf/lang/204-tamil-limitedLP.official.conf new file mode 100644 index 00000000000..7e16fcd8be5 --- /dev/null +++ b/egs/babel/s5d/conf/lang/204-tamil-limitedLP.official.conf @@ -0,0 +1,122 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/204-tamil/release-current/conversational/training +train_data_list=/export/babel/data/splits/Tamil_Babel204/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.2hr.list +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev2h_kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/204-tamil/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Tamil_Babel204/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.mitllfa3.rttm +dev10h_kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Tamil_Babel204/eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +eval_kwlists=( + [eval]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml +) +eval_nj=64 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/204-tamil/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/204-tamil/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.stm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.scoring.ecf.xml +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1/IARPA-babel204b-v1.1b_conv-evalpart1.mitllfa3.rttm +evalpart1_kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-evalpart1.kwlist5.xml + ) +evalpart1_nj=64 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/204-tamil/release-current/conversational/dev/ + /export/babel/data/204-tamil/release-current/conversational/eval/ + ) +shadow_data_list=( + /export/babel/data/splits/Tamil_Babel204/dev.list + /export/babel/data/splits/Tamil_Babel204/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev/IARPA-babel204b-v1.1b_conv-dev.scoring.ecf.xml +shadow__kwlists=( + [dev]=/export/babel/data/splits/Tamil_Babel204/IARPA-babel204b-v1.1b_conv-dev.radical.kwlist.xml + [bbn1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist.xml + [bbn2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist2.xml + [ibm1]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist3.xml + [ibm2]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist4.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel204b-v1.1b_conv-dev.kwlist5.xml + ) +shadow_nj=64 + + +unsup_data_dir=(/export/babel/data/204-tamil/release-current/conversational/training/ + /export/babel/data/204-tamil/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Tamil_Babel204/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Tamil_Babel204/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--romanized --oov " + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/204-tamil/release-current/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/205-kurmanji.FLP.official.conf b/egs/babel/s5d/conf/lang/205-kurmanji.FLP.official.conf new file mode 100644 index 00000000000..74e006e2692 --- /dev/null +++ b/egs/babel/s5d/conf/lang/205-kurmanji.FLP.official.conf @@ -0,0 +1,94 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/training +train_data_list=./conf/lists/205-kurmanji//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev2h_data_list=./conf/lists/205-kurmanji//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev10h_data_list=./conf/lists/205-kurmanji//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +eval_data_list=./conf/lists/205-kurmanji//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +evalpart1_data_list=./conf/lists/205-kurmanji//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/205-kurmanji//dev.list + ./conf/lists/205-kurmanji//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/untranscribed-training +unsup_data_list=./conf/lists/205-kurmanji//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/205-kurmanji.LLP.official.conf b/egs/babel/s5d/conf/lang/205-kurmanji.LLP.official.conf new file mode 100644 index 00000000000..fc5fdd4aa52 --- /dev/null +++ b/egs/babel/s5d/conf/lang/205-kurmanji.LLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/training +train_data_list=./conf/lists/205-kurmanji//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev2h_data_list=./conf/lists/205-kurmanji//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev +dev10h_data_list=./conf/lists/205-kurmanji//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +eval_data_list=./conf/lists/205-kurmanji//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +evalpart1_data_list=./conf/lists/205-kurmanji//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-evalpart1/IARPA-babel205b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/dev + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-eval/BABEL_OP2_205/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/205-kurmanji//dev.list + ./conf/lists/205-kurmanji//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-dev/IARPA-babel205b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel205b-v1.0a_conv-eval.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/205-kurmanji//untranscribed-training.list + ./conf/lists/205-kurmanji//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/untranscribed-training + /export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/205-kurmanji/IARPA-babel205b-v1.0a-build/BABEL_OP2_205/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/206-zulu-fullLP.official.conf b/egs/babel/s5d/conf/lang/206-zulu-fullLP.official.conf new file mode 100644 index 00000000000..675dc83780d --- /dev/null +++ b/egs/babel/s5d/conf/lang/206-zulu-fullLP.official.conf @@ -0,0 +1,129 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training +train_data_list=/export/babel/data/splits/Zulu_Babel206/train.FullLP.list +train_nj=32 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list +dev2h_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/db-dev-jhuseg-v7-utt.dat +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +dev2h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +dev10h_more_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [dev2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Zulu_Babel206//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/206-zulu/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.stm +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1.ecf.xml +evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist.xml +evalpart1_more_kwlists=( + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist4.xml + ) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/206-zulu/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_206/conversational/eval/ + ) +shadow_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/206-shadow-v0-cleaned-utt.dat +shadow_data_list=( + /export/babel/data/splits/Zulu_Babel206/dev.list + /export/babel/data/splits/Zulu_Babel206/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + + ) +shadow_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +shadow_nj=32 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=1000 +numGaussTri2=20000 +numLeavesTri3=6000 +numGaussTri3=75000 +numLeavesMLLT=6000 +numGaussMLLT=75000 +numLeavesSAT=6000 +numGaussSAT=75000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=80000 + +#Zulu seems to need much larger LM Weights +lmwt_plp_extra_opts=( --min-lmwt 10 --max-lmwt 17 ) +lmwt_bnf_extra_opts=( --min-lmwt 13 --max-lmwt 18 ) +lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 17 ) + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " +phoneme_mapping="k_>=g_<; 3=e; R=l; o=O; b_<=b; t_>=th;" + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true diff --git a/egs/babel/s5d/conf/lang/206-zulu-limitedLP.official.conf b/egs/babel/s5d/conf/lang/206-zulu-limitedLP.official.conf new file mode 100644 index 00000000000..caaf8cdc023 --- /dev/null +++ b/egs/babel/s5d/conf/lang/206-zulu-limitedLP.official.conf @@ -0,0 +1,126 @@ +# include common settings for limitedLP systems. +. conf/common.limitedLP || exit 1; + +#speech corpora files location +train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/ +train_data_list=/export/babel/data/splits/Zulu_Babel206/train.LimitedLP.list +train_nj=16 + +#RADICAL DEV data files +dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/ +dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list +dev2h_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/db-dev-jhuseg-v7-utt.dat +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml +) +dev2h_subset_ecf=true +dev2h_nj=18 + +#Official DEV data files +dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev +dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.stm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.scoring.ecf.xml +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev/IARPA-babel206b-v0.1e_conv-dev.mitllfa3.rttm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + ) +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +eval_data_list=/export/babel/data/splits/Zulu_Babel206//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml +eval_nj=32 + +#Official EVAL period evaluation data files +evalpart1_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval/ +evalpart1_data_list=conf/lists/206-zulu/evalpart1.list +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.stm +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1.ecf.xml +evalpart1_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist.xml + [llp1]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist2.xml + [llp2]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist3.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-evalpart1/IARPA-babel206b-v0.1e_conv-evalpart1.annot.kwlist4.xml +) +evalpart1_nj=32 + +#Shadow data files +shadow_data_dir=( + /export/babel/data/206-zulu/release-current/conversational/dev + /export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_206/conversational/eval/ + ) +shadow_data_cmudb=/export/babel/data/splits/Zulu_Babel206/uem/206-shadow-v0-cleaned-utt.dat +shadow_data_list=( + /export/babel/data/splits/Zulu_Babel206/dev.list + /export/babel/data/splits/Zulu_Babel206/eval.list + ) +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.ecf.xml +shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist.xml +shadow_more_kwlists=( + [llp]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-dev.kwlist2.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel206b-v0.1e_conv-eval.kwlist4.xml + + ) +shadow_nj=32 + + +unsup_data_dir=(/export/babel/data/206-zulu/release-current/conversational/training/ + /export/babel/data/206-zulu/release-current/conversational/untranscribed-training/ + ) +unsup_data_list=( + /export/babel/data/splits/Zulu_Babel206/train.LimitedLP.untranscribed.list + /export/babel/data/splits/Zulu_Babel206/train.untranscribed.list + ) +unsup_nj=64 + +# Acoustic model parameters +numLeavesTri1=1000 +numGaussTri1=10000 +numLeavesTri2=2500 +numGaussTri2=36000 +numLeavesTri3=2500 +numGaussTri3=36000 +numLeavesMLLT=2500 +numGaussMLLT=36000 +numLeavesSAT=2500 +numGaussSAT=36000 +numGaussUBM=750 +numLeavesSGMM=5000 +numGaussSGMM=18000 + +#Zulu seems to need larger LM Weights +lmwt_plp_extra_opts=( --min-lmwt 10 --max-lmwt 17 ) +lmwt_bnf_extra_opts=( --min-lmwt 17 --max-lmwt 24 ) +lmwt_dnn_extra_opts=( --min-lmwt 12 --max-lmwt 17 ) + +# Lexicon and Language Model parameters +oovSymbol="" +lexiconFlags="--oov " +phoneme_mapping="k_>=g_<; 3=e; R=l; o=O; b_<=b; t_>=th;" + +# Scoring protocols (dummy GLM file to appease the scoring script) +#glmFile=./conf/glm +lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt + +#keyword search settings +duptime=0.5 +case_insensitive=true + +proxy_phone_beam=-1 +proxy_phone_nbest=-1 +proxy_beam=5 +proxy_nbest=500 +proxy_cutoff=0 + diff --git a/egs/babel/s5d/conf/lang/207-tokpisin.FLP.official.conf b/egs/babel/s5d/conf/lang/207-tokpisin.FLP.official.conf new file mode 100644 index 00000000000..0653c16fd8f --- /dev/null +++ b/egs/babel/s5d/conf/lang/207-tokpisin.FLP.official.conf @@ -0,0 +1,93 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/training +train_data_list=./conf/lists/207-tokpisin//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev2h_data_list=./conf/lists/207-tokpisin//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev2h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev10h_data_list=./conf/lists/207-tokpisin//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev10h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +eval_data_list=./conf/lists/207-tokpisin//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.ecf.xml +eval_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +evalpart1_data_list=./conf/lists/207-tokpisin//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist4.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist5.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/207-tokpisin//dev.list + ./conf/lists/207-tokpisin//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +shadow_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/untranscribed-training +unsup_data_list=./conf/lists/207-tokpisin//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/207-tokpisin.LLP.official.conf b/egs/babel/s5d/conf/lang/207-tokpisin.LLP.official.conf new file mode 100644 index 00000000000..d48f3196686 --- /dev/null +++ b/egs/babel/s5d/conf/lang/207-tokpisin.LLP.official.conf @@ -0,0 +1,99 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/training +train_data_list=./conf/lists/207-tokpisin//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev2h_data_list=./conf/lists/207-tokpisin//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev2h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev +dev10h_data_list=./conf/lists/207-tokpisin//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +dev10h_kwlists=( + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist3.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.annot.kwlist5.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +eval_data_list=./conf/lists/207-tokpisin//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.ecf.xml +eval_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +evalpart1_data_list=./conf/lists/207-tokpisin//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist4.xml + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-evalpart1/IARPA-babel207b-v1.0e_conv-evalpart1.annot.kwlist5.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/dev + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-eval/BABEL_OP2_207/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/207-tokpisin//dev.list + ./conf/lists/207-tokpisin//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-dev/IARPA-babel207b-v1.0e_conv-dev.stm +shadow_kwlists=( + [kwlist5]=/export/babel/data/scoring/IndusDB/IARPA-babel207b-v1.0e_conv-eval.kwlist5.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/207-tokpisin//untranscribed-training.list + ./conf/lists/207-tokpisin//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/untranscribed-training + /export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/207-tokpisin/IARPA-babel207b-v1.0e-build/BABEL_OP2_207/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/301-cebuano.FLP.official.conf b/egs/babel/s5d/conf/lang/301-cebuano.FLP.official.conf new file mode 100644 index 00000000000..4e552e919f8 --- /dev/null +++ b/egs/babel/s5d/conf/lang/301-cebuano.FLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/training +train_data_list=./conf/lists/301-cebuano//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev2h_data_list=./conf/lists/301-cebuano//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev10h_data_list=./conf/lists/301-cebuano//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +eval_data_list=./conf/lists/301-cebuano//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +evalpart1_data_list=./conf/lists/301-cebuano//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/301-cebuano//dev.list + ./conf/lists/301-cebuano//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/untranscribed-training +unsup_data_list=./conf/lists/301-cebuano//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/301-cebuano.LLP.official.conf b/egs/babel/s5d/conf/lang/301-cebuano.LLP.official.conf new file mode 100644 index 00000000000..6ae02781972 --- /dev/null +++ b/egs/babel/s5d/conf/lang/301-cebuano.LLP.official.conf @@ -0,0 +1,106 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/training +train_data_list=./conf/lists/301-cebuano//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev2h_data_list=./conf/lists/301-cebuano//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev +dev10h_data_list=./conf/lists/301-cebuano//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +eval_data_list=./conf/lists/301-cebuano//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +evalpart1_data_list=./conf/lists/301-cebuano//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-evalpart1/IARPA-babel301b-v2.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/dev + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-eval/BABEL_OP2_301/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/301-cebuano//dev.list + ./conf/lists/301-cebuano//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel301b-v2.0b_conv-dev/IARPA-babel301b-v2.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/301-cebuano//untranscribed-training.list + ./conf/lists/301-cebuano//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/untranscribed-training + /export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/301-cebuano/IARPA-babel301b-v2.0b-build/BABEL_OP2_301/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/302-kazakh.FLP.official.conf b/egs/babel/s5d/conf/lang/302-kazakh.FLP.official.conf new file mode 100644 index 00000000000..d3a02dc1075 --- /dev/null +++ b/egs/babel/s5d/conf/lang/302-kazakh.FLP.official.conf @@ -0,0 +1,101 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/training +train_data_list=./conf/lists/302-kazakh//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev2h_data_list=./conf/lists/302-kazakh//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev10h_data_list=./conf/lists/302-kazakh//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +eval_data_list=./conf/lists/302-kazakh//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +evalpart1_data_list=./conf/lists/302-kazakh//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/302-kazakh//dev.list + ./conf/lists/302-kazakh//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/untranscribed-training +unsup_data_list=./conf/lists/302-kazakh//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/302-kazakh.LLP.official.conf b/egs/babel/s5d/conf/lang/302-kazakh.LLP.official.conf new file mode 100644 index 00000000000..2049c820695 --- /dev/null +++ b/egs/babel/s5d/conf/lang/302-kazakh.LLP.official.conf @@ -0,0 +1,107 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/training +train_data_list=./conf/lists/302-kazakh//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev2h_data_list=./conf/lists/302-kazakh//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev +dev10h_data_list=./conf/lists/302-kazakh//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +eval_data_list=./conf/lists/302-kazakh//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +evalpart1_data_list=./conf/lists/302-kazakh//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-evalpart1/IARPA-babel302b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/dev + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-eval/BABEL_OP2_302/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/302-kazakh//dev.list + ./conf/lists/302-kazakh//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel302b-v1.0a_conv-dev/IARPA-babel302b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/302-kazakh//untranscribed-training.list + ./conf/lists/302-kazakh//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/untranscribed-training + /export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/302-kazakh/IARPA-babel302b-v1.0a-build/BABEL_OP2_302/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/303-telugu.FLP.official.conf b/egs/babel/s5d/conf/lang/303-telugu.FLP.official.conf new file mode 100644 index 00000000000..5ba3f8a1606 --- /dev/null +++ b/egs/babel/s5d/conf/lang/303-telugu.FLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/training +train_data_list=./conf/lists/303-telugu//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev2h_data_list=./conf/lists/303-telugu//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev10h_data_list=./conf/lists/303-telugu//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +eval_data_list=./conf/lists/303-telugu//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +evalpart1_data_list=./conf/lists/303-telugu//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/303-telugu//dev.list + ./conf/lists/303-telugu//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/untranscribed-training +unsup_data_list=./conf/lists/303-telugu//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/303-telugu.LLP.official.conf b/egs/babel/s5d/conf/lang/303-telugu.LLP.official.conf new file mode 100644 index 00000000000..b916b5b27e6 --- /dev/null +++ b/egs/babel/s5d/conf/lang/303-telugu.LLP.official.conf @@ -0,0 +1,107 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/training +train_data_list=./conf/lists/303-telugu//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev2h_data_list=./conf/lists/303-telugu//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev +dev10h_data_list=./conf/lists/303-telugu//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +eval_data_list=./conf/lists/303-telugu//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +evalpart1_data_list=./conf/lists/303-telugu//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-evalpart1/IARPA-babel303b-v1.0a_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/dev + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-eval/BABEL_OP2_303/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/303-telugu//dev.list + ./conf/lists/303-telugu//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel303b-v1.0a_conv-dev/IARPA-babel303b-v1.0a_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/303-telugu//untranscribed-training.list + ./conf/lists/303-telugu//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/untranscribed-training + /export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/303-telugu/IARPA-babel303b-v1.0a-build/BABEL_OP2_303/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/304-lithuanian.FLP.official.conf b/egs/babel/s5d/conf/lang/304-lithuanian.FLP.official.conf new file mode 100644 index 00000000000..8459ca096a0 --- /dev/null +++ b/egs/babel/s5d/conf/lang/304-lithuanian.FLP.official.conf @@ -0,0 +1,100 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/training +train_data_list=./conf/lists/304-lithuanian//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev2h_data_list=./conf/lists/304-lithuanian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev10h_data_list=./conf/lists/304-lithuanian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +eval_data_list=./conf/lists/304-lithuanian//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +evalpart1_data_list=./conf/lists/304-lithuanian//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/304-lithuanian//dev.list + ./conf/lists/304-lithuanian//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/untranscribed-training +unsup_data_list=./conf/lists/304-lithuanian//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/304-lithuanian.LLP.official.conf b/egs/babel/s5d/conf/lang/304-lithuanian.LLP.official.conf new file mode 100644 index 00000000000..a571161390e --- /dev/null +++ b/egs/babel/s5d/conf/lang/304-lithuanian.LLP.official.conf @@ -0,0 +1,106 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/training +train_data_list=./conf/lists/304-lithuanian//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev2h_data_list=./conf/lists/304-lithuanian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev +dev10h_data_list=./conf/lists/304-lithuanian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Official EVAL period evaluation data files +eval_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +eval_data_list=./conf/lists/304-lithuanian//eval.list +eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-eval.ecf.xml +eval_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # eval_kwlists +eval_nj=32 + + +#Official post-EVAL period data files +evalpart1_data_dir=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +evalpart1_data_list=./conf/lists/304-lithuanian//evalpart1.list +evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.mitllfa3.rttm +evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.scoring.ecf.xml +evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.stm +evalpart1_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-evalpart1/IARPA-babel304b-v1.0b_conv-evalpart1.annot.kwlist4.xml +) # evalpart1_kwlists +evalpart1_nj=32 + + +#Shadow data files +shadow_data_dir=( + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/dev + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-eval/BABEL_OP2_304/conversational/eval +) # shadow_data_dir +shadow_data_list=( + ./conf/lists/304-lithuanian//dev.list + ./conf/lists/304-lithuanian//eval.lists +) # shadow_data_dir +shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.scoring.ecf.xml +shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.mitllfa3.rttm +shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.stm +shadow_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist2]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist2.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist3.xml + [kwlist4]=/export/babel/data/scoring/IndusDB/IARPA-babel304b-v1.0b_conv-dev/IARPA-babel304b-v1.0b_conv-dev.annot.kwlist4.xml +) # shadow_kwlists +shadow_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/304-lithuanian//untranscribed-training.list + ./conf/lists/304-lithuanian//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/untranscribed-training + /export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/304-lithuanian/IARPA-babel304b-v1.0b-build/BABEL_OP2_304/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf b/egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf new file mode 100644 index 00000000000..233cd81fffb --- /dev/null +++ b/egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf @@ -0,0 +1,45 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/training +train_data_list=./conf/lists/305-guarani//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev2h_data_list=./conf/lists/305-guarani//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev10h_data_list=./conf/lists/305-guarani//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/untranscribed-training +unsup_data_list=./conf/lists/305-guarani//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/305-guarani.LLP.official.conf b/egs/babel/s5d/conf/lang/305-guarani.LLP.official.conf new file mode 100644 index 00000000000..c0d9cc97524 --- /dev/null +++ b/egs/babel/s5d/conf/lang/305-guarani.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/training +train_data_list=./conf/lists/305-guarani//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev2h_data_list=./conf/lists/305-guarani//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev +dev10h_data_list=./conf/lists/305-guarani//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/305-guarani//untranscribed-training.list + ./conf/lists/305-guarani//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/untranscribed-training + /export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf b/egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf new file mode 100644 index 00000000000..87f82da6b49 --- /dev/null +++ b/egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf @@ -0,0 +1,45 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/training +train_data_list=./conf/lists/306-igbo//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev2h_data_list=./conf/lists/306-igbo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev10h_data_list=./conf/lists/306-igbo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/untranscribed-training +unsup_data_list=./conf/lists/306-igbo//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/306-igbo.LLP.official.conf b/egs/babel/s5d/conf/lang/306-igbo.LLP.official.conf new file mode 100644 index 00000000000..70642537caf --- /dev/null +++ b/egs/babel/s5d/conf/lang/306-igbo.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/training +train_data_list=./conf/lists/306-igbo//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev2h_data_list=./conf/lists/306-igbo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/dev +dev10h_data_list=./conf/lists/306-igbo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/306-igbo//untranscribed-training.list + ./conf/lists/306-igbo//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/untranscribed-training + /export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/306-igbo/IARPA-babel306b-v2.0c-build/BABEL_OP3_306/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf b/egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf new file mode 100644 index 00000000000..9668bd14e6b --- /dev/null +++ b/egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf @@ -0,0 +1,46 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/training +train_data_list=./conf/lists/307-amharic//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev2h_data_list=./conf/lists/307-amharic//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev10h_data_list=./conf/lists/307-amharic//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/untranscribed-training +unsup_data_list=./conf/lists/307-amharic//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/307-amharic.LLP.official.conf b/egs/babel/s5d/conf/lang/307-amharic.LLP.official.conf new file mode 100644 index 00000000000..3c49d4356ce --- /dev/null +++ b/egs/babel/s5d/conf/lang/307-amharic.LLP.official.conf @@ -0,0 +1,52 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/training +train_data_list=./conf/lists/307-amharic//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev2h_data_list=./conf/lists/307-amharic//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/dev +dev10h_data_list=./conf/lists/307-amharic//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/307-amharic//untranscribed-training.list + ./conf/lists/307-amharic//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/untranscribed-training + /export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf b/egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf new file mode 100644 index 00000000000..902ded164d2 --- /dev/null +++ b/egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf @@ -0,0 +1,46 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/training +train_data_list=./conf/lists/401-mongolian//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev2h_data_list=./conf/lists/401-mongolian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev10h_data_list=./conf/lists/401-mongolian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/untranscribed-training +unsup_data_list=./conf/lists/401-mongolian//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/reference_materials/lexicon.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/401-mongolian.LLP.official.conf b/egs/babel/s5d/conf/lang/401-mongolian.LLP.official.conf new file mode 100644 index 00000000000..e3bd46c7e68 --- /dev/null +++ b/egs/babel/s5d/conf/lang/401-mongolian.LLP.official.conf @@ -0,0 +1,52 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/training +train_data_list=./conf/lists/401-mongolian//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev2h_data_list=./conf/lists/401-mongolian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/dev +dev10h_data_list=./conf/lists/401-mongolian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/401-mongolian//untranscribed-training.list + ./conf/lists/401-mongolian//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/untranscribed-training + /export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/401-mongolian/IARPA-babel401b-v2.0b-build/BABEL_OP3_401/conversational/reference_materials/lexicon.sub-train.txt +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf b/egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf new file mode 100644 index 00000000000..0f176dc9396 --- /dev/null +++ b/egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf @@ -0,0 +1,47 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/training +train_data_list=./conf/lists/402-javanese//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev2h_data_list=./conf/lists/402-javanese//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev10h_data_list=./conf/lists/402-javanese//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml + [kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist3.xml + +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/untranscribed-training +unsup_data_list=./conf/lists/402-javanese//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/402-javanese.LLP.official.conf b/egs/babel/s5d/conf/lang/402-javanese.LLP.official.conf new file mode 100644 index 00000000000..99438159ae6 --- /dev/null +++ b/egs/babel/s5d/conf/lang/402-javanese.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/training +train_data_list=./conf/lists/402-javanese//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev2h_data_list=./conf/lists/402-javanese//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/dev +dev10h_data_list=./conf/lists/402-javanese//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/402-javanese//untranscribed-training.list + ./conf/lists/402-javanese//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/untranscribed-training + /export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/402-javanese/IARPA-babel402b-v1.0b-build/BABEL_OP3_402/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf b/egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf new file mode 100644 index 00000000000..6dc95d74304 --- /dev/null +++ b/egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf @@ -0,0 +1,45 @@ +# include common settings for fullLP systems. +. conf/common.fullLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/training +train_data_list=./conf/lists/403-dholuo//training.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev2h_data_list=./conf/lists/403-dholuo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev10h_data_list=./conf/lists/403-dholuo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for FullLP condition +unsup_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/untranscribed-training +unsup_data_list=./conf/lists/403-dholuo//untranscribed-training.list +unsup_nj=32 + + +lexicon_file=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/reference_materials/lexicon.txt + + + diff --git a/egs/babel/s5d/conf/lang/403-dholuo.LLP.official.conf b/egs/babel/s5d/conf/lang/403-dholuo.LLP.official.conf new file mode 100644 index 00000000000..827a1ca5ed0 --- /dev/null +++ b/egs/babel/s5d/conf/lang/403-dholuo.LLP.official.conf @@ -0,0 +1,51 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/training +train_data_list=./conf/lists/403-dholuo//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev2h_data_list=./conf/lists/403-dholuo//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev2h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/dev +dev10h_data_list=./conf/lists/403-dholuo//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm +dev10h_kwlists=( + [kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/403-dholuo//untranscribed-training.list + ./conf/lists/403-dholuo//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/untranscribed-training + /export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file=/export/babel/data/403-dholuo/IARPA-babel403b-v1.0b-build/BABEL_OP3_403/conversational/reference_materials/lexicon.sub-train.txt + + + diff --git a/egs/babel/s5d/conf/lists/101-cantonese/dev.list b/egs/babel/s5d/conf/lists/101-cantonese/dev.list new file mode 100644 index 00000000000..581862a9701 --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/dev.list @@ -0,0 +1,120 @@ +BABEL_BP_101_10470_20111118_172644_inLine +BABEL_BP_101_10470_20111118_172644_outLine +BABEL_BP_101_10713_20111024_220917_inLine +BABEL_BP_101_10713_20111024_220917_outLine +BABEL_BP_101_10733_20111021_141006_inLine +BABEL_BP_101_10733_20111021_141006_outLine +BABEL_BP_101_11982_20111027_140138_inLine +BABEL_BP_101_11982_20111027_140138_outLine +BABEL_BP_101_15916_20111129_174019_inLine +BABEL_BP_101_15916_20111129_174019_outLine +BABEL_BP_101_16346_20111117_212011_inLine +BABEL_BP_101_16346_20111117_212011_outLine +BABEL_BP_101_17983_20111027_140721_inLine +BABEL_BP_101_17983_20111027_140721_outLine +BABEL_BP_101_19656_20111103_235107_inLine +BABEL_BP_101_19656_20111103_235107_outLine +BABEL_BP_101_20471_20111102_141335_inLine +BABEL_BP_101_20471_20111102_141335_outLine +BABEL_BP_101_20741_20111018_195422_inLine +BABEL_BP_101_20741_20111018_195422_outLine +BABEL_BP_101_24833_20111031_142944_inLine +BABEL_BP_101_24833_20111031_142944_outLine +BABEL_BP_101_29290_20111031_003657_inLine +BABEL_BP_101_29290_20111031_003657_outLine +BABEL_BP_101_29589_20111126_175320_inLine +BABEL_BP_101_29589_20111126_175320_outLine +BABEL_BP_101_36722_20111104_030316_inLine +BABEL_BP_101_36722_20111104_030316_outLine +BABEL_BP_101_37784_20111208_190128_inLine +BABEL_BP_101_37784_20111208_190128_outLine +BABEL_BP_101_39963_20111026_150832_inLine +BABEL_BP_101_39963_20111026_150832_outLine +BABEL_BP_101_41146_20111026_153646_inLine +BABEL_BP_101_41146_20111026_153646_outLine +BABEL_BP_101_41541_20111206_172913_inLine +BABEL_BP_101_41541_20111206_172913_outLine +BABEL_BP_101_43306_20111103_161140_inLine +BABEL_BP_101_43306_20111103_161140_outLine +BABEL_BP_101_49582_20111027_141449_inLine +BABEL_BP_101_49582_20111027_141449_outLine +BABEL_BP_101_50718_20111020_135643_inLine +BABEL_BP_101_50718_20111020_135643_outLine +BABEL_BP_101_50798_20111026_223324_inLine +BABEL_BP_101_50798_20111026_223324_outLine +BABEL_BP_101_50883_20111102_204642_inLine +BABEL_BP_101_50883_20111102_204642_outLine +BABEL_BP_101_52335_20111203_155425_inLine +BABEL_BP_101_52335_20111203_155425_outLine +BABEL_BP_101_53994_20111202_163841_inLine +BABEL_BP_101_53994_20111202_163841_outLine +BABEL_BP_101_54339_20111124_170134_inLine +BABEL_BP_101_54339_20111124_170134_outLine +BABEL_BP_101_54621_20111125_183536_inLine +BABEL_BP_101_54621_20111125_183536_outLine +BABEL_BP_101_57724_20111027_181810_inLine +BABEL_BP_101_57724_20111027_181810_outLine +BABEL_BP_101_59175_20111027_151958_inLine +BABEL_BP_101_59175_20111027_151958_outLine +BABEL_BP_101_60193_20111102_144921_inLine +BABEL_BP_101_60193_20111102_144921_outLine +BABEL_BP_101_63114_20111123_012206_inLine +BABEL_BP_101_63114_20111123_012206_outLine +BABEL_BP_101_64351_20111124_153905_inLine +BABEL_BP_101_64351_20111124_153905_outLine +BABEL_BP_101_67411_20111030_182522_inLine +BABEL_BP_101_67411_20111030_182522_outLine +BABEL_BP_101_67750_20111025_140818_inLine +BABEL_BP_101_67750_20111025_140818_outLine +BABEL_BP_101_70285_20111026_191056_inLine +BABEL_BP_101_70285_20111026_191056_outLine +BABEL_BP_101_70625_20111129_171555_inLine +BABEL_BP_101_70625_20111129_171555_outLine +BABEL_BP_101_76192_20111102_164411_inLine +BABEL_BP_101_76192_20111102_164411_outLine +BABEL_BP_101_77137_20111125_163632_inLine +BABEL_BP_101_77137_20111125_163632_outLine +BABEL_BP_101_77591_20111114_194820_inLine +BABEL_BP_101_77591_20111114_194820_outLine +BABEL_BP_101_80150_20111117_003728_inLine +BABEL_BP_101_80150_20111117_003728_outLine +BABEL_BP_101_81119_20111118_140013_inLine +BABEL_BP_101_81119_20111118_140013_outLine +BABEL_BP_101_81717_20111118_145402_inLine +BABEL_BP_101_81717_20111118_145402_outLine +BABEL_BP_101_83531_20111104_002551_inLine +BABEL_BP_101_83531_20111104_002551_outLine +BABEL_BP_101_85573_20111019_141646_inLine +BABEL_BP_101_85573_20111019_141646_outLine +BABEL_BP_101_87539_20111201_130219_inLine +BABEL_BP_101_87539_20111201_130219_outLine +BABEL_BP_101_87607_20111125_162304_inLine +BABEL_BP_101_87607_20111125_162304_outLine +BABEL_BP_101_90082_20111127_153333_inLine +BABEL_BP_101_90082_20111127_153333_outLine +BABEL_BP_101_90559_20111203_144741_inLine +BABEL_BP_101_90559_20111203_144741_outLine +BABEL_BP_101_91723_20111104_231255_inLine +BABEL_BP_101_91723_20111104_231255_outLine +BABEL_BP_101_92602_20111029_191642_inLine +BABEL_BP_101_92602_20111029_191642_outLine +BABEL_BP_101_94235_20111119_200950_inLine +BABEL_BP_101_94235_20111119_200950_outLine +BABEL_BP_101_95120_20111120_194049_inLine +BABEL_BP_101_95120_20111120_194049_outLine +BABEL_BP_101_95121_20111204_185315_inLine +BABEL_BP_101_95121_20111204_185315_outLine +BABEL_BP_101_95350_20111018_202556_inLine +BABEL_BP_101_95350_20111018_202556_outLine +BABEL_BP_101_95514_20111203_141811_inLine +BABEL_BP_101_95514_20111203_141811_outLine +BABEL_BP_101_95637_20111024_141608_inLine +BABEL_BP_101_95637_20111024_141608_outLine +BABEL_BP_101_95736_20111102_184136_inLine +BABEL_BP_101_95736_20111102_184136_outLine +BABEL_BP_101_97518_20111130_230103_inLine +BABEL_BP_101_97518_20111130_230103_outLine +BABEL_BP_101_98402_20111203_194645_inLine +BABEL_BP_101_98402_20111203_194645_outLine +BABEL_BP_101_98675_20111117_190458_inLine +BABEL_BP_101_98675_20111117_190458_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/eval.list b/egs/babel/s5d/conf/lists/101-cantonese/eval.list new file mode 100644 index 00000000000..d2301ae3d82 --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/eval.list @@ -0,0 +1,220 @@ +BABEL_BP_101_11267_20111202_163633_inLine +BABEL_BP_101_11267_20111202_163633_outLine +BABEL_BP_101_11311_20111017_201941_inLine +BABEL_BP_101_11311_20111017_201941_outLine +BABEL_BP_101_12535_20111203_130510_inLine +BABEL_BP_101_12535_20111203_130510_outLine +BABEL_BP_101_13065_20111118_192048_inLine +BABEL_BP_101_13065_20111118_192048_outLine +BABEL_BP_101_13476_20111121_181636_inLine +BABEL_BP_101_13476_20111121_181636_outLine +BABEL_BP_101_14707_20111122_145307_inLine +BABEL_BP_101_14836_20111124_161142_inLine +BABEL_BP_101_14836_20111124_161142_outLine +BABEL_BP_101_14836_20111124_162649_inLine +BABEL_BP_101_14836_20111124_162649_outLine +BABEL_BP_101_15146_20111017_171639_inLine +BABEL_BP_101_15146_20111017_171639_outLine +BABEL_BP_101_15859_20111129_022308_inLine +BABEL_BP_101_15859_20111129_022308_outLine +BABEL_BP_101_16299_20111029_221723_inLine +BABEL_BP_101_16299_20111029_221723_outLine +BABEL_BP_101_16646_20111116_212752_inLine +BABEL_BP_101_16646_20111116_212752_outLine +BABEL_BP_101_17900_20111025_234518_inLine +BABEL_BP_101_17900_20111025_234518_outLine +BABEL_BP_101_19063_20111117_154053_inLine +BABEL_BP_101_19619_20111027_130540_inLine +BABEL_BP_101_19619_20111027_130540_outLine +BABEL_BP_101_20347_20111115_190811_inLine +BABEL_BP_101_20347_20111115_190811_outLine +BABEL_BP_101_21050_20111127_140516_inLine +BABEL_BP_101_21052_20111117_134126_inLine +BABEL_BP_101_21052_20111117_134126_outLine +BABEL_BP_101_22351_20111117_141906_inLine +BABEL_BP_101_22351_20111117_141906_outLine +BABEL_BP_101_22351_20111117_142946_inLine +BABEL_BP_101_22351_20111117_142946_outLine +BABEL_BP_101_24589_20111122_200522_inLine +BABEL_BP_101_25106_20111103_002754_inLine +BABEL_BP_101_25106_20111103_002754_outLine +BABEL_BP_101_26598_20111117_165818_inLine +BABEL_BP_101_26598_20111117_165818_outLine +BABEL_BP_101_27724_20111128_203411_inLine +BABEL_BP_101_27724_20111128_203411_outLine +BABEL_BP_101_28990_20111120_210441_inLine +BABEL_BP_101_28990_20111120_210441_outLine +BABEL_BP_101_30642_20111116_150618_inLine +BABEL_BP_101_30642_20111116_150618_outLine +BABEL_BP_101_32011_20111201_004544_inLine +BABEL_BP_101_32011_20111201_004544_outLine +BABEL_BP_101_32045_20111104_024613_inLine +BABEL_BP_101_32045_20111104_024613_outLine +BABEL_BP_101_32132_20111119_185103_inLine +BABEL_BP_101_32132_20111119_185103_outLine +BABEL_BP_101_33540_20111027_144812_inLine +BABEL_BP_101_33540_20111027_144812_outLine +BABEL_BP_101_35074_20111203_144945_inLine +BABEL_BP_101_35074_20111203_144945_outLine +BABEL_BP_101_35612_20111110_210341_inLine +BABEL_BP_101_35612_20111110_210341_outLine +BABEL_BP_101_36143_20111029_193157_inLine +BABEL_BP_101_36143_20111029_193157_outLine +BABEL_BP_101_36155_20111120_144557_inLine +BABEL_BP_101_36155_20111120_144557_outLine +BABEL_BP_101_36155_20111120_150859_inLine +BABEL_BP_101_36155_20111120_150859_outLine +BABEL_BP_101_36868_20111117_210558_inLine +BABEL_BP_101_37348_20111130_223024_inLine +BABEL_BP_101_37348_20111130_223024_outLine +BABEL_BP_101_38635_20111120_180033_inLine +BABEL_BP_101_38635_20111120_180033_outLine +BABEL_BP_101_38640_20111028_200532_inLine +BABEL_BP_101_38640_20111028_200532_outLine +BABEL_BP_101_38640_20111028_202051_inLine +BABEL_BP_101_38640_20111028_202051_outLine +BABEL_BP_101_39114_20111128_134323_inLine +BABEL_BP_101_39114_20111128_134323_outLine +BABEL_BP_101_41797_20111117_181049_inLine +BABEL_BP_101_41797_20111117_181049_outLine +BABEL_BP_101_42768_20111115_173157_inLine +BABEL_BP_101_42768_20111115_173157_outLine +BABEL_BP_101_42853_20111014_121048_inLine +BABEL_BP_101_42853_20111014_121048_outLine +BABEL_BP_101_43317_20111115_183049_inLine +BABEL_BP_101_43317_20111115_183049_outLine +BABEL_BP_101_43991_20111121_191522_inLine +BABEL_BP_101_43991_20111121_191522_outLine +BABEL_BP_101_46409_20111103_190907_inLine +BABEL_BP_101_46409_20111103_190907_outLine +BABEL_BP_101_46464_20111119_154431_inLine +BABEL_BP_101_46464_20111119_154431_outLine +BABEL_BP_101_46521_20111027_144539_inLine +BABEL_BP_101_46521_20111027_144539_outLine +BABEL_BP_101_46950_20111102_231112_inLine +BABEL_BP_101_46950_20111102_231112_outLine +BABEL_BP_101_47185_20111116_191402_inLine +BABEL_BP_101_47185_20111116_191402_outLine +BABEL_BP_101_48536_20111028_200823_inLine +BABEL_BP_101_48536_20111028_200823_outLine +BABEL_BP_101_48645_20111121_182116_inLine +BABEL_BP_101_48645_20111121_182116_outLine +BABEL_BP_101_48645_20111121_183054_inLine +BABEL_BP_101_48645_20111121_183054_outLine +BABEL_BP_101_49042_20111030_233559_inLine +BABEL_BP_101_49042_20111030_233559_outLine +BABEL_BP_101_49173_20111128_203628_inLine +BABEL_BP_101_49173_20111128_203628_outLine +BABEL_BP_101_49173_20111128_204848_inLine +BABEL_BP_101_49173_20111128_204848_outLine +BABEL_BP_101_49239_20111122_153732_inLine +BABEL_BP_101_49239_20111122_153732_outLine +BABEL_BP_101_49552_20111114_230835_inLine +BABEL_BP_101_49552_20111114_230835_outLine +BABEL_BP_101_50555_20111120_155930_inLine +BABEL_BP_101_50555_20111120_155930_outLine +BABEL_BP_101_51042_20111204_200010_inLine +BABEL_BP_101_51042_20111204_200010_outLine +BABEL_BP_101_53278_20111122_170608_inLine +BABEL_BP_101_53463_20111120_193926_inLine +BABEL_BP_101_53463_20111120_193926_outLine +BABEL_BP_101_53982_20111122_191730_inLine +BABEL_BP_101_57422_20111122_180847_inLine +BABEL_BP_101_57551_20111019_214945_inLine +BABEL_BP_101_57551_20111019_214945_outLine +BABEL_BP_101_59169_20111122_141419_inLine +BABEL_BP_101_59671_20111027_145636_inLine +BABEL_BP_101_59671_20111027_145636_outLine +BABEL_BP_101_59891_20111124_143157_inLine +BABEL_BP_101_60064_20111203_191808_inLine +BABEL_BP_101_60064_20111203_191808_outLine +BABEL_BP_101_60277_20111126_194551_inLine +BABEL_BP_101_60277_20111126_194551_outLine +BABEL_BP_101_60277_20111126_200232_inLine +BABEL_BP_101_60277_20111126_200232_outLine +BABEL_BP_101_61203_20111030_130830_inLine +BABEL_BP_101_61203_20111030_130830_outLine +BABEL_BP_101_61906_20111117_202948_inLine +BABEL_BP_101_61988_20111028_001218_inLine +BABEL_BP_101_61988_20111028_001219_outLine +BABEL_BP_101_64946_20111201_195421_inLine +BABEL_BP_101_64946_20111201_195421_outLine +BABEL_BP_101_65601_20111103_222906_inLine +BABEL_BP_101_65601_20111103_222906_outLine +BABEL_BP_101_66709_20111119_145638_inLine +BABEL_BP_101_66709_20111119_145638_outLine +BABEL_BP_101_67304_20111129_183928_inLine +BABEL_BP_101_67304_20111129_183928_outLine +BABEL_BP_101_68861_20111030_183357_inLine +BABEL_BP_101_68861_20111030_183357_outLine +BABEL_BP_101_72647_20111128_150245_inLine +BABEL_BP_101_72746_20111121_191752_inLine +BABEL_BP_101_73782_20111126_201918_inLine +BABEL_BP_101_73782_20111126_201918_outLine +BABEL_BP_101_74295_20111121_152402_inLine +BABEL_BP_101_74295_20111121_152402_outLine +BABEL_BP_101_74607_20111022_195251_inLine +BABEL_BP_101_74607_20111022_195251_outLine +BABEL_BP_101_74986_20111116_153007_inLine +BABEL_BP_101_74986_20111116_153007_outLine +BABEL_BP_101_75151_20111203_163659_inLine +BABEL_BP_101_75151_20111203_163659_outLine +BABEL_BP_101_75799_20111122_163729_inLine +BABEL_BP_101_75932_20111111_151802_inLine +BABEL_BP_101_75932_20111111_151802_outLine +BABEL_BP_101_76451_20111026_184920_inLine +BABEL_BP_101_76451_20111026_184920_outLine +BABEL_BP_101_76451_20111026_190345_inLine +BABEL_BP_101_76451_20111026_190345_outLine +BABEL_BP_101_76763_20111017_191052_inLine +BABEL_BP_101_76763_20111017_191052_outLine +BABEL_BP_101_76925_20111103_205340_inLine +BABEL_BP_101_76925_20111103_205340_outLine +BABEL_BP_101_77465_20111120_175215_inLine +BABEL_BP_101_77465_20111120_175215_outLine +BABEL_BP_101_78046_20111125_134944_inLine +BABEL_BP_101_78046_20111125_134944_outLine +BABEL_BP_101_79619_20111119_194350_inLine +BABEL_BP_101_79619_20111119_194350_outLine +BABEL_BP_101_79860_20111102_155320_inLine +BABEL_BP_101_79860_20111102_155320_outLine +BABEL_BP_101_80874_20111125_172008_inLine +BABEL_BP_101_80874_20111125_172008_outLine +BABEL_BP_101_81053_20111114_221753_inLine +BABEL_BP_101_81053_20111114_221753_outLine +BABEL_BP_101_81261_20111104_210152_inLine +BABEL_BP_101_81261_20111104_210152_outLine +BABEL_BP_101_81261_20111104_211429_inLine +BABEL_BP_101_81261_20111104_211429_outLine +BABEL_BP_101_81583_20111022_221726_inLine +BABEL_BP_101_81583_20111022_221726_outLine +BABEL_BP_101_81642_20111124_172127_inLine +BABEL_BP_101_81642_20111124_172127_outLine +BABEL_BP_101_83053_20111118_151047_inLine +BABEL_BP_101_83053_20111118_151047_outLine +BABEL_BP_101_83700_20111121_152308_inLine +BABEL_BP_101_83700_20111121_152308_outLine +BABEL_BP_101_83713_20111104_193756_inLine +BABEL_BP_101_83713_20111104_193756_outLine +BABEL_BP_101_86014_20111120_171648_inLine +BABEL_BP_101_88982_20111126_152512_inLine +BABEL_BP_101_88982_20111126_152512_outLine +BABEL_BP_101_89301_20111128_210850_inLine +BABEL_BP_101_89301_20111128_210850_outLine +BABEL_BP_101_89993_20111125_174226_inLine +BABEL_BP_101_89993_20111125_174226_outLine +BABEL_BP_101_90817_20111118_004749_inLine +BABEL_BP_101_90817_20111118_004749_outLine +BABEL_BP_101_91677_20111122_233646_inLine +BABEL_BP_101_91677_20111122_233646_outLine +BABEL_BP_101_91703_20111116_145954_inLine +BABEL_BP_101_91703_20111116_145954_outLine +BABEL_BP_101_94162_20111118_160545_inLine +BABEL_BP_101_94162_20111118_160545_outLine +BABEL_BP_101_95861_20111028_214238_inLine +BABEL_BP_101_95861_20111028_214238_outLine +BABEL_BP_101_96108_20111122_132644_inLine +BABEL_BP_101_97254_20111117_145052_inLine +BABEL_BP_101_97254_20111117_145052_outLine +BABEL_BP_101_97486_20111104_200750_inLine +BABEL_BP_101_97486_20111104_200750_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/evalpart1.list b/egs/babel/s5d/conf/lists/101-cantonese/evalpart1.list new file mode 100644 index 00000000000..1980d99ef3e --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/evalpart1.list @@ -0,0 +1,63 @@ +BABEL_BP_101_15859_20111129_022308_inLine +BABEL_BP_101_15859_20111129_022308_outLine +BABEL_BP_101_17900_20111025_234518_inLine +BABEL_BP_101_17900_20111025_234518_outLine +BABEL_BP_101_20347_20111115_190811_inLine +BABEL_BP_101_20347_20111115_190811_outLine +BABEL_BP_101_33540_20111027_144812_inLine +BABEL_BP_101_33540_20111027_144812_outLine +BABEL_BP_101_36143_20111029_193157_inLine +BABEL_BP_101_36143_20111029_193157_outLine +BABEL_BP_101_38635_20111120_180033_inLine +BABEL_BP_101_38635_20111120_180033_outLine +BABEL_BP_101_39114_20111128_134323_outLine +BABEL_BP_101_42768_20111115_173157_inLine +BABEL_BP_101_42768_20111115_173157_outLine +BABEL_BP_101_42853_20111014_121048_inLine +BABEL_BP_101_42853_20111014_121048_outLine +BABEL_BP_101_43317_20111115_183049_inLine +BABEL_BP_101_43317_20111115_183049_outLine +BABEL_BP_101_43991_20111121_191522_inLine +BABEL_BP_101_43991_20111121_191522_outLine +BABEL_BP_101_46464_20111119_154431_inLine +BABEL_BP_101_46464_20111119_154431_outLine +BABEL_BP_101_47185_20111116_191402_inLine +BABEL_BP_101_47185_20111116_191402_outLine +BABEL_BP_101_48536_20111028_200823_inLine +BABEL_BP_101_48536_20111028_200823_outLine +BABEL_BP_101_49552_20111114_230835_inLine +BABEL_BP_101_49552_20111114_230835_outLine +BABEL_BP_101_51042_20111204_200010_inLine +BABEL_BP_101_51042_20111204_200010_outLine +BABEL_BP_101_57551_20111019_214945_inLine +BABEL_BP_101_57551_20111019_214945_outLine +BABEL_BP_101_60064_20111203_191808_inLine +BABEL_BP_101_60064_20111203_191808_outLine +BABEL_BP_101_66709_20111119_145638_inLine +BABEL_BP_101_66709_20111119_145638_outLine +BABEL_BP_101_67304_20111129_183928_inLine +BABEL_BP_101_67304_20111129_183928_outLine +BABEL_BP_101_68861_20111030_183357_inLine +BABEL_BP_101_68861_20111030_183357_outLine +BABEL_BP_101_74295_20111121_152402_inLine +BABEL_BP_101_74295_20111121_152402_outLine +BABEL_BP_101_74607_20111022_195251_inLine +BABEL_BP_101_74607_20111022_195251_outLine +BABEL_BP_101_75151_20111203_163659_inLine +BABEL_BP_101_75151_20111203_163659_outLine +BABEL_BP_101_75932_20111111_151802_inLine +BABEL_BP_101_75932_20111111_151802_outLine +BABEL_BP_101_76451_20111026_184920_inLine +BABEL_BP_101_76451_20111026_184920_outLine +BABEL_BP_101_76451_20111026_190345_inLine +BABEL_BP_101_76451_20111026_190345_outLine +BABEL_BP_101_76763_20111017_191052_inLine +BABEL_BP_101_76763_20111017_191052_outLine +BABEL_BP_101_81642_20111124_172127_inLine +BABEL_BP_101_81642_20111124_172127_outLine +BABEL_BP_101_83053_20111118_151047_inLine +BABEL_BP_101_83053_20111118_151047_outLine +BABEL_BP_101_90817_20111118_004749_inLine +BABEL_BP_101_90817_20111118_004749_outLine +BABEL_BP_101_97486_20111104_200750_inLine +BABEL_BP_101_97486_20111104_200750_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/train.FullLP.list b/egs/babel/s5d/conf/lists/101-cantonese/train.FullLP.list new file mode 100644 index 00000000000..a7db2aa2a23 --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/train.FullLP.list @@ -0,0 +1,965 @@ +BABEL_BP_101_10033_20111024_205740_inLine +BABEL_BP_101_10033_20111024_205740_outLine +BABEL_BP_101_10066_20111120_165933_inLine +BABEL_BP_101_10066_20111120_165933_outLine +BABEL_BP_101_10160_20111017_201159_inLine +BABEL_BP_101_10160_20111017_201159_outLine +BABEL_BP_101_10211_20111026_234151_inLine +BABEL_BP_101_10211_20111026_234151_outLine +BABEL_BP_101_10900_20111029_155829_inLine +BABEL_BP_101_10900_20111029_155829_outLine +BABEL_BP_101_10925_20111025_152502_inLine +BABEL_BP_101_10925_20111025_152502_outLine +BABEL_BP_101_10945_20111030_173950_inLine +BABEL_BP_101_10945_20111030_173950_outLine +BABEL_BP_101_10973_20111019_183249_inLine +BABEL_BP_101_10973_20111019_183249_outLine +BABEL_BP_101_11031_20111024_203919_inLine +BABEL_BP_101_11031_20111024_203920_outLine +BABEL_BP_101_11036_20111019_192958_inLine +BABEL_BP_101_11036_20111019_192958_outLine +BABEL_BP_101_11371_20111018_183136_inLine +BABEL_BP_101_11371_20111018_183136_outLine +BABEL_BP_101_11422_20111019_145654_inLine +BABEL_BP_101_11422_20111019_145654_outLine +BABEL_BP_101_11479_20111021_205756_inLine +BABEL_BP_101_11479_20111021_205756_outLine +BABEL_BP_101_11690_20111206_171715_inLine +BABEL_BP_101_11690_20111206_171715_outLine +BABEL_BP_101_11694_20111204_205320_inLine +BABEL_BP_101_11694_20111204_205320_outLine +BABEL_BP_101_11827_20111025_190953_inLine +BABEL_BP_101_11827_20111025_190954_outLine +BABEL_BP_101_11868_20111203_180801_inLine +BABEL_BP_101_11868_20111203_180801_outLine +BABEL_BP_101_12003_20111116_132035_inLine +BABEL_BP_101_12003_20111116_132035_outLine +BABEL_BP_101_12552_20111115_153047_inLine +BABEL_BP_101_12552_20111115_153047_outLine +BABEL_BP_101_12631_20111020_140550_inLine +BABEL_BP_101_12631_20111020_140550_outLine +BABEL_BP_101_12807_20111207_142617_inLine +BABEL_BP_101_12807_20111207_142617_outLine +BABEL_BP_101_12897_20111115_165516_inLine +BABEL_BP_101_12897_20111115_165516_outLine +BABEL_BP_101_13229_20111127_140526_inLine +BABEL_BP_101_13229_20111127_140526_outLine +BABEL_BP_101_13272_20111027_193738_inLine +BABEL_BP_101_13272_20111027_193738_outLine +BABEL_BP_101_13530_20111203_184256_inLine +BABEL_BP_101_13530_20111203_184256_outLine +BABEL_BP_101_13781_20111125_145211_inLine +BABEL_BP_101_13781_20111125_145211_outLine +BABEL_BP_101_14054_20111119_163712_inLine +BABEL_BP_101_14054_20111119_163712_outLine +BABEL_BP_101_14294_20111103_134040_inLine +BABEL_BP_101_14294_20111103_134040_outLine +BABEL_BP_101_14500_20111114_202424_inLine +BABEL_BP_101_14500_20111114_202424_outLine +BABEL_BP_101_14666_20111122_125103_inLine +BABEL_BP_101_14666_20111122_125103_outLine +BABEL_BP_101_14729_20111114_200940_inLine +BABEL_BP_101_14729_20111114_200940_outLine +BABEL_BP_101_14769_20111121_155034_inLine +BABEL_BP_101_14769_20111121_155034_outLine +BABEL_BP_101_14891_20111018_130049_inLine +BABEL_BP_101_14891_20111018_130049_outLine +BABEL_BP_101_14915_20111119_165151_inLine +BABEL_BP_101_14915_20111119_165151_outLine +BABEL_BP_101_14936_20111026_202920_inLine +BABEL_BP_101_14936_20111026_202920_outLine +BABEL_BP_101_14997_20111126_152707_inLine +BABEL_BP_101_14997_20111126_152707_outLine +BABEL_BP_101_15142_20111029_163819_inLine +BABEL_BP_101_15142_20111029_163819_outLine +BABEL_BP_101_15460_20111121_223019_inLine +BABEL_BP_101_15460_20111121_223019_outLine +BABEL_BP_101_15473_20111031_131455_inLine +BABEL_BP_101_15473_20111031_131455_outLine +BABEL_BP_101_15696_20111022_193230_inLine +BABEL_BP_101_15696_20111022_193230_outLine +BABEL_BP_101_15873_20111027_121806_inLine +BABEL_BP_101_15873_20111027_121806_outLine +BABEL_BP_101_15881_20111024_141728_inLine +BABEL_BP_101_15881_20111024_141729_outLine +BABEL_BP_101_16066_20111020_145228_inLine +BABEL_BP_101_16066_20111020_145228_outLine +BABEL_BP_101_16266_20111027_153525_inLine +BABEL_BP_101_16266_20111027_153525_outLine +BABEL_BP_101_16313_20111022_221750_inLine +BABEL_BP_101_16313_20111022_221750_outLine +BABEL_BP_101_16406_20111103_000453_inLine +BABEL_BP_101_16406_20111103_000453_outLine +BABEL_BP_101_16617_20111030_144124_inLine +BABEL_BP_101_16617_20111030_144124_outLine +BABEL_BP_101_16660_20111020_211620_inLine +BABEL_BP_101_16660_20111020_211620_outLine +BABEL_BP_101_16669_20111019_142510_inLine +BABEL_BP_101_16669_20111019_142510_outLine +BABEL_BP_101_16883_20111122_184255_inLine +BABEL_BP_101_16883_20111122_184255_outLine +BABEL_BP_101_17013_20111117_011741_inLine +BABEL_BP_101_17013_20111117_011741_outLine +BABEL_BP_101_17018_20111020_161922_inLine +BABEL_BP_101_17018_20111020_161922_outLine +BABEL_BP_101_17080_20111020_184025_inLine +BABEL_BP_101_17080_20111020_184025_outLine +BABEL_BP_101_17093_20111124_155145_inLine +BABEL_BP_101_17093_20111124_155145_outLine +BABEL_BP_101_17203_20111026_142831_inLine +BABEL_BP_101_17203_20111026_142831_outLine +BABEL_BP_101_17203_20111026_145429_inLine +BABEL_BP_101_17203_20111026_145429_outLine +BABEL_BP_101_17572_20111116_155402_inLine +BABEL_BP_101_17572_20111116_155402_outLine +BABEL_BP_101_17606_20111130_231145_inLine +BABEL_BP_101_17606_20111130_231145_outLine +BABEL_BP_101_17933_20111120_204846_inLine +BABEL_BP_101_17933_20111120_204846_outLine +BABEL_BP_101_18701_20111121_171853_inLine +BABEL_BP_101_18701_20111121_171853_outLine +BABEL_BP_101_18950_20111127_144125_inLine +BABEL_BP_101_18950_20111127_144125_outLine +BABEL_BP_101_19012_20111122_173413_inLine +BABEL_BP_101_19012_20111122_173413_outLine +BABEL_BP_101_19147_20111021_174406_inLine +BABEL_BP_101_19147_20111021_174406_outLine +BABEL_BP_101_20320_20111027_210504_inLine +BABEL_BP_101_20320_20111027_210504_outLine +BABEL_BP_101_20408_20111101_210200_inLine +BABEL_BP_101_20408_20111101_210200_outLine +BABEL_BP_101_20518_20111119_174458_inLine +BABEL_BP_101_20518_20111119_174458_outLine +BABEL_BP_101_20582_20111023_162723_inLine +BABEL_BP_101_20582_20111023_162723_outLine +BABEL_BP_101_20590_20111017_172008_inLine +BABEL_BP_101_20590_20111017_172008_outLine +BABEL_BP_101_20685_20111019_125028_inLine +BABEL_BP_101_20685_20111019_125028_outLine +BABEL_BP_101_20740_20111125_195727_inLine +BABEL_BP_101_20740_20111125_195727_outLine +BABEL_BP_101_21367_20111126_132150_inLine +BABEL_BP_101_21367_20111126_132150_outLine +BABEL_BP_101_21430_20111027_145918_inLine +BABEL_BP_101_21430_20111027_145918_outLine +BABEL_BP_101_21477_20111031_155928_inLine +BABEL_BP_101_21477_20111031_155928_outLine +BABEL_BP_101_21584_20111030_210806_inLine +BABEL_BP_101_21584_20111030_210807_outLine +BABEL_BP_101_21929_20111025_182511_inLine +BABEL_BP_101_21929_20111025_182511_outLine +BABEL_BP_101_21946_20111122_150655_inLine +BABEL_BP_101_21946_20111122_150655_outLine +BABEL_BP_101_22898_20111022_141857_inLine +BABEL_BP_101_22898_20111022_141857_outLine +BABEL_BP_101_22903_20111116_132430_inLine +BABEL_BP_101_22903_20111116_132430_outLine +BABEL_BP_101_22910_20111028_190802_inLine +BABEL_BP_101_22910_20111028_190802_outLine +BABEL_BP_101_22979_20111129_142742_inLine +BABEL_BP_101_22979_20111129_142742_outLine +BABEL_BP_101_23167_20111026_194856_inLine +BABEL_BP_101_23167_20111026_194856_outLine +BABEL_BP_101_23168_20111120_192134_inLine +BABEL_BP_101_23168_20111120_192134_outLine +BABEL_BP_101_23571_20111128_232031_inLine +BABEL_BP_101_23571_20111128_232031_outLine +BABEL_BP_101_23719_20111103_143124_inLine +BABEL_BP_101_23719_20111103_143124_outLine +BABEL_BP_101_23930_20111125_132944_inLine +BABEL_BP_101_23930_20111125_132944_outLine +BABEL_BP_101_24420_20111122_215626_inLine +BABEL_BP_101_24420_20111122_215626_outLine +BABEL_BP_101_24608_20111019_191348_inLine +BABEL_BP_101_24608_20111019_191348_outLine +BABEL_BP_101_24642_20111129_132050_inLine +BABEL_BP_101_24642_20111129_132050_outLine +BABEL_BP_101_24661_20111207_131837_inLine +BABEL_BP_101_24661_20111207_131837_outLine +BABEL_BP_101_25021_20111018_200603_inLine +BABEL_BP_101_25021_20111018_200603_outLine +BABEL_BP_101_25035_20111028_135038_inLine +BABEL_BP_101_25035_20111028_135038_outLine +BABEL_BP_101_25236_20111129_194650_inLine +BABEL_BP_101_25236_20111129_194650_outLine +BABEL_BP_101_25278_20111125_162450_inLine +BABEL_BP_101_25278_20111125_162450_outLine +BABEL_BP_101_25576_20111022_203923_inLine +BABEL_BP_101_25576_20111022_203923_outLine +BABEL_BP_101_25934_20111014_130931_inLine +BABEL_BP_101_25934_20111014_130931_outLine +BABEL_BP_101_26017_20111030_202851_inLine +BABEL_BP_101_26017_20111030_202851_outLine +BABEL_BP_101_26350_20111019_203820_inLine +BABEL_BP_101_26350_20111019_203820_outLine +BABEL_BP_101_26684_20111119_145219_inLine +BABEL_BP_101_26684_20111119_145219_outLine +BABEL_BP_101_27064_20111019_132106_inLine +BABEL_BP_101_27064_20111019_132106_outLine +BABEL_BP_101_27178_20111025_174857_inLine +BABEL_BP_101_27178_20111025_174857_outLine +BABEL_BP_101_27427_20111021_132850_inLine +BABEL_BP_101_27427_20111021_132850_outLine +BABEL_BP_101_27503_20111021_175113_inLine +BABEL_BP_101_27503_20111021_175113_outLine +BABEL_BP_101_27619_20111102_201443_inLine +BABEL_BP_101_27619_20111102_201443_outLine +BABEL_BP_101_28107_20111019_140723_inLine +BABEL_BP_101_28107_20111019_140723_outLine +BABEL_BP_101_28132_20111023_133733_inLine +BABEL_BP_101_28132_20111023_133733_outLine +BABEL_BP_101_28161_20111024_180609_inLine +BABEL_BP_101_28161_20111024_180609_outLine +BABEL_BP_101_28204_20111025_133714_inLine +BABEL_BP_101_28204_20111025_133714_outLine +BABEL_BP_101_28260_20111021_184044_inLine +BABEL_BP_101_28260_20111021_184044_outLine +BABEL_BP_101_28675_20111118_185525_inLine +BABEL_BP_101_28675_20111118_185525_outLine +BABEL_BP_101_28740_20111028_214620_inLine +BABEL_BP_101_28740_20111028_214620_outLine +BABEL_BP_101_29097_20111018_135944_inLine +BABEL_BP_101_29097_20111018_135944_outLine +BABEL_BP_101_29133_20111024_182947_inLine +BABEL_BP_101_29133_20111024_182947_outLine +BABEL_BP_101_29302_20111023_172339_inLine +BABEL_BP_101_29302_20111023_172339_outLine +BABEL_BP_101_29328_20111019_133534_inLine +BABEL_BP_101_29328_20111019_133534_outLine +BABEL_BP_101_29335_20111121_164238_inLine +BABEL_BP_101_29335_20111121_164238_outLine +BABEL_BP_101_29444_20111024_213300_inLine +BABEL_BP_101_29444_20111024_213300_outLine +BABEL_BP_101_29959_20111116_201012_inLine +BABEL_BP_101_29959_20111116_201012_outLine +BABEL_BP_101_30168_20111118_132348_inLine +BABEL_BP_101_30168_20111118_132348_outLine +BABEL_BP_101_30530_20111024_153842_inLine +BABEL_BP_101_30530_20111024_153842_outLine +BABEL_BP_101_30722_20111208_204304_inLine +BABEL_BP_101_30722_20111208_204304_outLine +BABEL_BP_101_31265_20111207_131905_inLine +BABEL_BP_101_31265_20111207_131905_outLine +BABEL_BP_101_31393_20111018_154135_inLine +BABEL_BP_101_31393_20111018_154135_outLine +BABEL_BP_101_31441_20111026_004058_inLine +BABEL_BP_101_31441_20111026_004058_outLine +BABEL_BP_101_31451_20111024_213113_inLine +BABEL_BP_101_31451_20111024_213113_outLine +BABEL_BP_101_31460_20111019_144918_inLine +BABEL_BP_101_31460_20111019_144918_outLine +BABEL_BP_101_31917_20111124_151225_inLine +BABEL_BP_101_31917_20111124_151225_outLine +BABEL_BP_101_31980_20111025_130427_inLine +BABEL_BP_101_31980_20111025_130427_outLine +BABEL_BP_101_32274_20111024_160835_inLine +BABEL_BP_101_32274_20111024_160835_outLine +BABEL_BP_101_32295_20111111_144923_inLine +BABEL_BP_101_32295_20111111_144923_outLine +BABEL_BP_101_32452_20111022_135256_inLine +BABEL_BP_101_32452_20111022_135256_outLine +BABEL_BP_101_32710_20111119_133220_inLine +BABEL_BP_101_32710_20111119_133220_outLine +BABEL_BP_101_32890_20111130_220957_inLine +BABEL_BP_101_32890_20111130_220957_outLine +BABEL_BP_101_33023_20111024_133813_inLine +BABEL_BP_101_33023_20111024_133813_outLine +BABEL_BP_101_33671_20111019_130712_inLine +BABEL_BP_101_33671_20111019_130712_outLine +BABEL_BP_101_33742_20111118_231555_inLine +BABEL_BP_101_33742_20111118_231555_outLine +BABEL_BP_101_34194_20111024_173622_inLine +BABEL_BP_101_34194_20111024_173622_outLine +BABEL_BP_101_34446_20111019_005315_inLine +BABEL_BP_101_34446_20111019_005315_outLine +BABEL_BP_101_34930_20111024_143654_inLine +BABEL_BP_101_34930_20111024_143654_outLine +BABEL_BP_101_34961_20111027_175107_inLine +BABEL_BP_101_34961_20111027_175107_outLine +BABEL_BP_101_35006_20111120_181354_inLine +BABEL_BP_101_35006_20111120_181354_outLine +BABEL_BP_101_35016_20111203_203519_inLine +BABEL_BP_101_35016_20111203_203519_outLine +BABEL_BP_101_35179_20111124_131132_inLine +BABEL_BP_101_35179_20111124_131132_outLine +BABEL_BP_101_35357_20111203_170817_inLine +BABEL_BP_101_35357_20111203_170817_outLine +BABEL_BP_101_35391_20111130_144901_inLine +BABEL_BP_101_35391_20111130_144901_outLine +BABEL_BP_101_35576_20111118_131203_inLine +BABEL_BP_101_35576_20111118_131203_outLine +BABEL_BP_101_35932_20111023_151638_inLine +BABEL_BP_101_35932_20111023_151638_outLine +BABEL_BP_101_36268_20111028_174826_inLine +BABEL_BP_101_36268_20111028_174826_outLine +BABEL_BP_101_36383_20111129_181746_inLine +BABEL_BP_101_36383_20111129_181746_outLine +BABEL_BP_101_36424_20111119_145307_inLine +BABEL_BP_101_36424_20111119_145307_outLine +BABEL_BP_101_36502_20111025_145704_inLine +BABEL_BP_101_36502_20111025_145704_outLine +BABEL_BP_101_36711_20111104_142236_inLine +BABEL_BP_101_36711_20111104_142236_outLine +BABEL_BP_101_37094_20111019_184657_inLine +BABEL_BP_101_37094_20111019_184657_outLine +BABEL_BP_101_37110_20111019_203150_inLine +BABEL_BP_101_37110_20111019_203150_outLine +BABEL_BP_101_37203_20111103_180606_inLine +BABEL_BP_101_37203_20111103_180606_outLine +BABEL_BP_101_37210_20111102_172955_inLine +BABEL_BP_101_37210_20111102_172955_outLine +BABEL_BP_101_37258_20111110_203745_inLine +BABEL_BP_101_37258_20111110_203745_outLine +BABEL_BP_101_37285_20111028_003951_inLine +BABEL_BP_101_37285_20111028_003951_outLine +BABEL_BP_101_37461_20111022_210313_inLine +BABEL_BP_101_37461_20111022_210313_outLine +BABEL_BP_101_37766_20111130_012017_inLine +BABEL_BP_101_37766_20111130_012017_outLine +BABEL_BP_101_38108_20111125_153427_inLine +BABEL_BP_101_38108_20111125_153427_outLine +BABEL_BP_101_38698_20111025_183746_inLine +BABEL_BP_101_38698_20111025_183746_outLine +BABEL_BP_101_38879_20111029_193700_inLine +BABEL_BP_101_38879_20111029_193701_outLine +BABEL_BP_101_38912_20111120_214951_inLine +BABEL_BP_101_38912_20111120_214951_outLine +BABEL_BP_101_38956_20111025_175018_inLine +BABEL_BP_101_38956_20111025_175018_outLine +BABEL_BP_101_39080_20111124_182207_inLine +BABEL_BP_101_39080_20111124_182207_outLine +BABEL_BP_101_39140_20111026_125824_inLine +BABEL_BP_101_39140_20111026_125824_outLine +BABEL_BP_101_39246_20111119_185410_inLine +BABEL_BP_101_39246_20111119_185410_outLine +BABEL_BP_101_39287_20111119_192815_inLine +BABEL_BP_101_39287_20111119_192815_outLine +BABEL_BP_101_39317_20111020_162113_inLine +BABEL_BP_101_39317_20111020_162113_outLine +BABEL_BP_101_39756_20111207_162851_inLine +BABEL_BP_101_39756_20111207_162851_outLine +BABEL_BP_101_39809_20111025_182053_inLine +BABEL_BP_101_39809_20111025_182053_outLine +BABEL_BP_101_39915_20111101_164819_inLine +BABEL_BP_101_39915_20111101_164819_outLine +BABEL_BP_101_39997_20111124_152508_inLine +BABEL_BP_101_39997_20111124_152508_outLine +BABEL_BP_101_40046_20111018_185918_inLine +BABEL_BP_101_40046_20111018_185918_outLine +BABEL_BP_101_40123_20111129_182232_inLine +BABEL_BP_101_40123_20111129_182232_outLine +BABEL_BP_101_40346_20111018_165337_inLine +BABEL_BP_101_40346_20111018_165337_outLine +BABEL_BP_101_40439_20111203_182814_inLine +BABEL_BP_101_40439_20111203_182814_outLine +BABEL_BP_101_40510_20111126_151543_inLine +BABEL_BP_101_40510_20111126_151543_outLine +BABEL_BP_101_40980_20111119_150324_inLine +BABEL_BP_101_40980_20111119_150324_outLine +BABEL_BP_101_41170_20111018_182942_inLine +BABEL_BP_101_41170_20111018_182942_outLine +BABEL_BP_101_41456_20111117_162327_inLine +BABEL_BP_101_41456_20111117_162327_outLine +BABEL_BP_101_41513_20111121_142105_inLine +BABEL_BP_101_41513_20111121_142105_outLine +BABEL_BP_101_41661_20111102_131955_inLine +BABEL_BP_101_41661_20111102_131955_outLine +BABEL_BP_101_42145_20111117_131023_inLine +BABEL_BP_101_42145_20111117_131023_outLine +BABEL_BP_101_42266_20111031_233515_inLine +BABEL_BP_101_42266_20111031_233515_outLine +BABEL_BP_101_42615_20111018_173023_inLine +BABEL_BP_101_42615_20111018_173023_outLine +BABEL_BP_101_42766_20111124_150047_inLine +BABEL_BP_101_42766_20111124_150047_outLine +BABEL_BP_101_42788_20111120_201122_inLine +BABEL_BP_101_42788_20111120_201122_outLine +BABEL_BP_101_43086_20111025_160708_inLine +BABEL_BP_101_43086_20111025_160708_outLine +BABEL_BP_101_43383_20111019_135432_inLine +BABEL_BP_101_43383_20111019_135432_outLine +BABEL_BP_101_44129_20111118_210653_inLine +BABEL_BP_101_44129_20111118_210653_outLine +BABEL_BP_101_44209_20111120_131002_inLine +BABEL_BP_101_44209_20111120_131002_outLine +BABEL_BP_101_44403_20111023_151830_inLine +BABEL_BP_101_44403_20111023_151830_outLine +BABEL_BP_101_44403_20111023_152732_inLine +BABEL_BP_101_44403_20111023_152732_outLine +BABEL_BP_101_44535_20111021_153223_inLine +BABEL_BP_101_44535_20111021_153223_outLine +BABEL_BP_101_44836_20111119_154154_inLine +BABEL_BP_101_44836_20111119_154154_outLine +BABEL_BP_101_45361_20111124_141850_inLine +BABEL_BP_101_45361_20111124_141850_outLine +BABEL_BP_101_45472_20111020_171106_inLine +BABEL_BP_101_45472_20111020_171106_outLine +BABEL_BP_101_45511_20111025_204720_inLine +BABEL_BP_101_45511_20111025_204720_outLine +BABEL_BP_101_45642_20111027_171601_inLine +BABEL_BP_101_45642_20111027_171601_outLine +BABEL_BP_101_45678_20111020_155310_inLine +BABEL_BP_101_45678_20111020_155310_outLine +BABEL_BP_101_45702_20111130_133011_inLine +BABEL_BP_101_45702_20111130_133011_outLine +BABEL_BP_101_45738_20111129_143901_inLine +BABEL_BP_101_45738_20111129_143901_outLine +BABEL_BP_101_45931_20111021_190814_inLine +BABEL_BP_101_45931_20111021_190815_outLine +BABEL_BP_101_46243_20111020_204505_inLine +BABEL_BP_101_46243_20111020_204505_outLine +BABEL_BP_101_46332_20111103_181337_inLine +BABEL_BP_101_46332_20111103_181337_outLine +BABEL_BP_101_46603_20111128_205449_inLine +BABEL_BP_101_46603_20111128_205449_outLine +BABEL_BP_101_47128_20111027_143246_inLine +BABEL_BP_101_47128_20111027_143246_outLine +BABEL_BP_101_47634_20111026_134005_inLine +BABEL_BP_101_47634_20111026_134005_outLine +BABEL_BP_101_47646_20111126_015509_inLine +BABEL_BP_101_47646_20111126_015509_outLine +BABEL_BP_101_47661_20111028_183156_inLine +BABEL_BP_101_47661_20111028_183156_outLine +BABEL_BP_101_47794_20111204_021008_inLine +BABEL_BP_101_47794_20111204_021008_outLine +BABEL_BP_101_47823_20111129_204026_inLine +BABEL_BP_101_47823_20111129_204026_outLine +BABEL_BP_101_47906_20111119_130308_inLine +BABEL_BP_101_47906_20111119_130308_outLine +BABEL_BP_101_48053_20111020_130943_inLine +BABEL_BP_101_48053_20111020_130943_outLine +BABEL_BP_101_48188_20111117_210754_inLine +BABEL_BP_101_48188_20111117_210754_outLine +BABEL_BP_101_48410_20111021_230709_inLine +BABEL_BP_101_48410_20111021_230709_outLine +BABEL_BP_101_48418_20111203_171145_inLine +BABEL_BP_101_48418_20111203_171145_outLine +BABEL_BP_101_48511_20111026_133629_inLine +BABEL_BP_101_48511_20111026_133629_outLine +BABEL_BP_101_48559_20111118_125850_inLine +BABEL_BP_101_48559_20111118_125850_outLine +BABEL_BP_101_48733_20111117_140942_inLine +BABEL_BP_101_48733_20111117_140942_outLine +BABEL_BP_101_49306_20111130_170120_inLine +BABEL_BP_101_49306_20111130_170120_outLine +BABEL_BP_101_49452_20111027_171653_inLine +BABEL_BP_101_49452_20111027_171653_outLine +BABEL_BP_101_49541_20111104_192333_inLine +BABEL_BP_101_49541_20111104_192333_outLine +BABEL_BP_101_49624_20111120_201437_inLine +BABEL_BP_101_49624_20111120_201437_outLine +BABEL_BP_101_49689_20111125_174904_inLine +BABEL_BP_101_49689_20111125_174904_outLine +BABEL_BP_101_49773_20111021_195218_inLine +BABEL_BP_101_49773_20111021_195218_outLine +BABEL_BP_101_49864_20111023_192125_inLine +BABEL_BP_101_49864_20111023_192125_outLine +BABEL_BP_101_50101_20111019_173327_inLine +BABEL_BP_101_50101_20111019_173327_outLine +BABEL_BP_101_50201_20111026_154228_inLine +BABEL_BP_101_50201_20111026_154228_outLine +BABEL_BP_101_50409_20111204_161529_inLine +BABEL_BP_101_50409_20111204_161529_outLine +BABEL_BP_101_50416_20111129_170514_inLine +BABEL_BP_101_50416_20111129_170514_outLine +BABEL_BP_101_50476_20111130_010429_inLine +BABEL_BP_101_50476_20111130_010429_outLine +BABEL_BP_101_50589_20111025_190441_inLine +BABEL_BP_101_50589_20111025_190441_outLine +BABEL_BP_101_50842_20111030_171650_inLine +BABEL_BP_101_50842_20111030_171650_outLine +BABEL_BP_101_51052_20111121_175102_inLine +BABEL_BP_101_51052_20111121_175102_outLine +BABEL_BP_101_51117_20111025_175138_inLine +BABEL_BP_101_51117_20111025_175138_outLine +BABEL_BP_101_51374_20111020_152431_inLine +BABEL_BP_101_51374_20111020_152431_outLine +BABEL_BP_101_51446_20111127_145511_inLine +BABEL_BP_101_51446_20111127_145511_outLine +BABEL_BP_101_51569_20111019_201532_inLine +BABEL_BP_101_51569_20111019_201532_outLine +BABEL_BP_101_51727_20111117_003536_inLine +BABEL_BP_101_51727_20111117_003536_outLine +BABEL_BP_101_52366_20111018_140013_inLine +BABEL_BP_101_52366_20111018_140013_outLine +BABEL_BP_101_52642_20111129_221057_inLine +BABEL_BP_101_52642_20111129_221057_outLine +BABEL_BP_101_53181_20111025_171245_inLine +BABEL_BP_101_53181_20111025_171246_outLine +BABEL_BP_101_53464_20111020_132212_inLine +BABEL_BP_101_53464_20111020_132212_outLine +BABEL_BP_101_53544_20111205_190859_inLine +BABEL_BP_101_53544_20111205_190859_outLine +BABEL_BP_101_53703_20111026_123307_inLine +BABEL_BP_101_53703_20111026_123307_outLine +BABEL_BP_101_53824_20111115_174804_inLine +BABEL_BP_101_53824_20111115_174804_outLine +BABEL_BP_101_53985_20111027_134232_inLine +BABEL_BP_101_53985_20111027_134232_outLine +BABEL_BP_101_54315_20111018_150809_inLine +BABEL_BP_101_54315_20111018_150809_outLine +BABEL_BP_101_54787_20111027_003335_inLine +BABEL_BP_101_54787_20111027_003335_outLine +BABEL_BP_101_55369_20111022_150532_inLine +BABEL_BP_101_55369_20111022_150532_outLine +BABEL_BP_101_55786_20111023_175604_inLine +BABEL_BP_101_55786_20111023_175604_outLine +BABEL_BP_101_55786_20111023_181021_inLine +BABEL_BP_101_55786_20111023_181022_outLine +BABEL_BP_101_55944_20111124_180312_inLine +BABEL_BP_101_55944_20111124_180312_outLine +BABEL_BP_101_56070_20111030_192056_inLine +BABEL_BP_101_56070_20111030_192056_outLine +BABEL_BP_101_56117_20111120_230517_inLine +BABEL_BP_101_56117_20111120_230517_outLine +BABEL_BP_101_56648_20111126_183128_inLine +BABEL_BP_101_56648_20111126_183128_outLine +BABEL_BP_101_57457_20111104_004433_inLine +BABEL_BP_101_57457_20111104_004433_outLine +BABEL_BP_101_57629_20111018_150159_inLine +BABEL_BP_101_57629_20111018_150200_outLine +BABEL_BP_101_58137_20111121_200320_inLine +BABEL_BP_101_58137_20111121_200320_outLine +BABEL_BP_101_58190_20111124_203150_inLine +BABEL_BP_101_58190_20111124_203150_outLine +BABEL_BP_101_58357_20111122_155154_inLine +BABEL_BP_101_58357_20111122_155154_outLine +BABEL_BP_101_58536_20111103_202702_inLine +BABEL_BP_101_58536_20111103_202702_outLine +BABEL_BP_101_58715_20111025_173420_inLine +BABEL_BP_101_58715_20111025_173420_outLine +BABEL_BP_101_58863_20111029_204335_inLine +BABEL_BP_101_58863_20111029_204335_outLine +BABEL_BP_101_58923_20111021_133326_inLine +BABEL_BP_101_58923_20111021_133326_outLine +BABEL_BP_101_59028_20111130_201120_inLine +BABEL_BP_101_59028_20111130_201120_outLine +BABEL_BP_101_59032_20111130_125508_inLine +BABEL_BP_101_59032_20111130_125508_outLine +BABEL_BP_101_59454_20111117_203722_inLine +BABEL_BP_101_59454_20111117_203722_outLine +BABEL_BP_101_59544_20111027_165941_inLine +BABEL_BP_101_59544_20111027_165941_outLine +BABEL_BP_101_59868_20111021_213412_inLine +BABEL_BP_101_59868_20111021_213412_outLine +BABEL_BP_101_59925_20111203_131501_inLine +BABEL_BP_101_59925_20111203_131501_outLine +BABEL_BP_101_59961_20111031_203903_inLine +BABEL_BP_101_59961_20111031_203903_outLine +BABEL_BP_101_60106_20111024_194048_inLine +BABEL_BP_101_60106_20111024_194048_outLine +BABEL_BP_101_60110_20111102_211956_inLine +BABEL_BP_101_60110_20111102_211956_outLine +BABEL_BP_101_60183_20111129_192036_inLine +BABEL_BP_101_60183_20111129_192036_outLine +BABEL_BP_101_60605_20111124_131048_inLine +BABEL_BP_101_60605_20111124_131048_outLine +BABEL_BP_101_60826_20111120_164851_inLine +BABEL_BP_101_60826_20111120_164851_outLine +BABEL_BP_101_61073_20111102_190426_inLine +BABEL_BP_101_61073_20111102_190426_outLine +BABEL_BP_101_61119_20111125_210556_inLine +BABEL_BP_101_61119_20111125_210556_outLine +BABEL_BP_101_61408_20111204_193348_inLine +BABEL_BP_101_61408_20111204_193348_outLine +BABEL_BP_101_61446_20111019_151903_inLine +BABEL_BP_101_61446_20111019_151903_outLine +BABEL_BP_101_61449_20111117_151606_inLine +BABEL_BP_101_61449_20111117_151606_outLine +BABEL_BP_101_61762_20111028_180944_inLine +BABEL_BP_101_61762_20111028_180944_outLine +BABEL_BP_101_61822_20111022_202742_inLine +BABEL_BP_101_61822_20111022_202742_outLine +BABEL_BP_101_62177_20111019_143057_inLine +BABEL_BP_101_62177_20111019_143057_outLine +BABEL_BP_101_63339_20111019_193743_inLine +BABEL_BP_101_63339_20111019_193743_outLine +BABEL_BP_101_63459_20111120_165000_inLine +BABEL_BP_101_63459_20111120_165000_outLine +BABEL_BP_101_63711_20111025_215436_inLine +BABEL_BP_101_63711_20111025_215436_outLine +BABEL_BP_101_64205_20111203_135507_inLine +BABEL_BP_101_64205_20111203_135507_outLine +BABEL_BP_101_64404_20111018_165302_inLine +BABEL_BP_101_64404_20111018_165302_outLine +BABEL_BP_101_64889_20111124_220757_inLine +BABEL_BP_101_64889_20111124_220757_outLine +BABEL_BP_101_65606_20111116_140731_inLine +BABEL_BP_101_65606_20111116_140731_outLine +BABEL_BP_101_65743_20111019_161830_inLine +BABEL_BP_101_65743_20111019_161830_outLine +BABEL_BP_101_66188_20111206_204246_inLine +BABEL_BP_101_66188_20111206_204246_outLine +BABEL_BP_101_66798_20111026_145101_inLine +BABEL_BP_101_66798_20111026_145101_outLine +BABEL_BP_101_66839_20111120_192904_inLine +BABEL_BP_101_66839_20111120_192904_outLine +BABEL_BP_101_66866_20111128_183933_inLine +BABEL_BP_101_66866_20111128_183933_outLine +BABEL_BP_101_66903_20111021_130004_inLine +BABEL_BP_101_66903_20111021_130004_outLine +BABEL_BP_101_66964_20111117_173710_inLine +BABEL_BP_101_66964_20111117_173710_outLine +BABEL_BP_101_67555_20111023_140926_inLine +BABEL_BP_101_67555_20111023_140926_outLine +BABEL_BP_101_67798_20111104_013951_inLine +BABEL_BP_101_67798_20111104_013951_outLine +BABEL_BP_101_68129_20111120_133854_inLine +BABEL_BP_101_68129_20111120_133854_outLine +BABEL_BP_101_68295_20111124_181015_inLine +BABEL_BP_101_68295_20111124_181015_outLine +BABEL_BP_101_68545_20111121_132438_inLine +BABEL_BP_101_68545_20111121_132438_outLine +BABEL_BP_101_68706_20111025_141920_inLine +BABEL_BP_101_68706_20111025_141920_outLine +BABEL_BP_101_68767_20111029_174711_inLine +BABEL_BP_101_68767_20111029_174711_outLine +BABEL_BP_101_69049_20111102_140355_inLine +BABEL_BP_101_69049_20111102_140355_outLine +BABEL_BP_101_69137_20111121_162510_inLine +BABEL_BP_101_69137_20111121_162510_outLine +BABEL_BP_101_69236_20111029_183129_inLine +BABEL_BP_101_69236_20111029_183130_outLine +BABEL_BP_101_69295_20111130_003858_inLine +BABEL_BP_101_69295_20111130_003858_outLine +BABEL_BP_101_69368_20111020_193935_inLine +BABEL_BP_101_69368_20111020_193935_outLine +BABEL_BP_101_69548_20111024_183457_inLine +BABEL_BP_101_69548_20111024_192648_inLine +BABEL_BP_101_69548_20111024_192648_outLine +BABEL_BP_101_69650_20111025_220513_inLine +BABEL_BP_101_69650_20111025_220513_outLine +BABEL_BP_101_69764_20111026_211954_inLine +BABEL_BP_101_69764_20111026_211954_outLine +BABEL_BP_101_70511_20111119_201802_inLine +BABEL_BP_101_70511_20111119_201802_outLine +BABEL_BP_101_70548_20111127_144545_inLine +BABEL_BP_101_70548_20111127_144545_outLine +BABEL_BP_101_70615_20111019_192646_inLine +BABEL_BP_101_70615_20111019_192646_outLine +BABEL_BP_101_70680_20111018_151854_inLine +BABEL_BP_101_70680_20111018_151854_outLine +BABEL_BP_101_71741_20111026_164112_inLine +BABEL_BP_101_71741_20111026_164112_outLine +BABEL_BP_101_71778_20111121_153418_inLine +BABEL_BP_101_71778_20111121_153418_outLine +BABEL_BP_101_72330_20111021_174758_inLine +BABEL_BP_101_72330_20111021_174758_outLine +BABEL_BP_101_72718_20111129_164931_inLine +BABEL_BP_101_72718_20111129_164931_outLine +BABEL_BP_101_72879_20111018_234432_inLine +BABEL_BP_101_72879_20111018_234432_outLine +BABEL_BP_101_73170_20111023_213358_inLine +BABEL_BP_101_73170_20111023_213358_outLine +BABEL_BP_101_73542_20111019_212519_inLine +BABEL_BP_101_73542_20111019_212520_outLine +BABEL_BP_101_73761_20111115_130043_inLine +BABEL_BP_101_73761_20111115_130043_outLine +BABEL_BP_101_73786_20111019_133350_inLine +BABEL_BP_101_73786_20111019_133350_outLine +BABEL_BP_101_73911_20111026_220612_inLine +BABEL_BP_101_73911_20111026_220612_outLine +BABEL_BP_101_73923_20111017_171925_inLine +BABEL_BP_101_73923_20111017_171925_outLine +BABEL_BP_101_74234_20111102_161626_inLine +BABEL_BP_101_74234_20111102_161626_outLine +BABEL_BP_101_74317_20111115_154736_inLine +BABEL_BP_101_74317_20111115_154736_outLine +BABEL_BP_101_74395_20111117_135831_inLine +BABEL_BP_101_74395_20111117_135831_outLine +BABEL_BP_101_74404_20111020_190145_inLine +BABEL_BP_101_74404_20111020_190145_outLine +BABEL_BP_101_74451_20111117_164153_inLine +BABEL_BP_101_74451_20111117_164153_outLine +BABEL_BP_101_74823_20111024_162421_inLine +BABEL_BP_101_74823_20111024_162421_outLine +BABEL_BP_101_74884_20111030_143437_inLine +BABEL_BP_101_74884_20111030_143437_outLine +BABEL_BP_101_75020_20111020_153842_inLine +BABEL_BP_101_75020_20111020_153842_outLine +BABEL_BP_101_75243_20111204_182510_inLine +BABEL_BP_101_75243_20111204_182510_outLine +BABEL_BP_101_75815_20111029_172800_inLine +BABEL_BP_101_75815_20111029_172800_outLine +BABEL_BP_101_76341_20111027_132615_inLine +BABEL_BP_101_76341_20111027_132615_outLine +BABEL_BP_101_76585_20111025_150729_inLine +BABEL_BP_101_76585_20111025_150729_outLine +BABEL_BP_101_76661_20111024_190704_inLine +BABEL_BP_101_76661_20111024_190704_outLine +BABEL_BP_101_76661_20111024_194723_inLine +BABEL_BP_101_76661_20111024_194723_outLine +BABEL_BP_101_76733_20111128_133322_inLine +BABEL_BP_101_76733_20111128_133322_outLine +BABEL_BP_101_76919_20111120_202312_inLine +BABEL_BP_101_76919_20111120_202312_outLine +BABEL_BP_101_76944_20111114_210715_inLine +BABEL_BP_101_76944_20111114_210715_outLine +BABEL_BP_101_77238_20111024_161359_inLine +BABEL_BP_101_77238_20111024_161359_outLine +BABEL_BP_101_77244_20111127_021035_inLine +BABEL_BP_101_77244_20111127_021035_outLine +BABEL_BP_101_77348_20111018_171727_inLine +BABEL_BP_101_77348_20111018_171727_outLine +BABEL_BP_101_77802_20111125_191137_inLine +BABEL_BP_101_77802_20111125_191137_outLine +BABEL_BP_101_77886_20111026_163310_inLine +BABEL_BP_101_77886_20111026_163311_outLine +BABEL_BP_101_77965_20111019_133612_inLine +BABEL_BP_101_77965_20111019_133612_outLine +BABEL_BP_101_77965_20111019_134901_inLine +BABEL_BP_101_77965_20111019_134901_outLine +BABEL_BP_101_78094_20111026_132018_inLine +BABEL_BP_101_78094_20111026_132018_outLine +BABEL_BP_101_78514_20111026_204851_inLine +BABEL_BP_101_78514_20111026_204851_outLine +BABEL_BP_101_78879_20111102_133430_inLine +BABEL_BP_101_78879_20111102_133430_outLine +BABEL_BP_101_79412_20111026_010314_inLine +BABEL_BP_101_79412_20111026_010314_outLine +BABEL_BP_101_79495_20111017_200437_inLine +BABEL_BP_101_79495_20111017_200438_outLine +BABEL_BP_101_80008_20111031_180815_inLine +BABEL_BP_101_80008_20111031_180815_outLine +BABEL_BP_101_80156_20111024_145349_inLine +BABEL_BP_101_80156_20111024_145349_outLine +BABEL_BP_101_80535_20111206_193024_inLine +BABEL_BP_101_80535_20111206_193024_outLine +BABEL_BP_101_80786_20111030_205240_inLine +BABEL_BP_101_80786_20111030_205240_outLine +BABEL_BP_101_80817_20111125_182621_inLine +BABEL_BP_101_80817_20111125_182621_outLine +BABEL_BP_101_80953_20111122_205857_inLine +BABEL_BP_101_80953_20111122_205857_outLine +BABEL_BP_101_81056_20111130_220634_inLine +BABEL_BP_101_81056_20111130_220634_outLine +BABEL_BP_101_81308_20111021_143922_inLine +BABEL_BP_101_81308_20111021_143922_outLine +BABEL_BP_101_81321_20111028_124244_inLine +BABEL_BP_101_81321_20111028_124244_outLine +BABEL_BP_101_81486_20111027_163851_inLine +BABEL_BP_101_81486_20111027_163851_outLine +BABEL_BP_101_82023_20111024_151938_inLine +BABEL_BP_101_82023_20111024_151938_outLine +BABEL_BP_101_82025_20111024_170514_inLine +BABEL_BP_101_82025_20111024_170514_outLine +BABEL_BP_101_82217_20111115_191956_inLine +BABEL_BP_101_82217_20111115_191956_outLine +BABEL_BP_101_82484_20111103_172542_inLine +BABEL_BP_101_82484_20111103_172542_outLine +BABEL_BP_101_82591_20111030_152731_inLine +BABEL_BP_101_82591_20111030_152731_outLine +BABEL_BP_101_82766_20111026_195127_inLine +BABEL_BP_101_82766_20111026_195127_outLine +BABEL_BP_101_82881_20111025_194316_inLine +BABEL_BP_101_82881_20111025_194316_outLine +BABEL_BP_101_83362_20111018_185746_inLine +BABEL_BP_101_83362_20111018_185746_outLine +BABEL_BP_101_83791_20111017_205314_inLine +BABEL_BP_101_83791_20111017_205314_outLine +BABEL_BP_101_84042_20111129_190132_inLine +BABEL_BP_101_84042_20111129_190132_outLine +BABEL_BP_101_84088_20111020_184621_inLine +BABEL_BP_101_84088_20111020_184621_outLine +BABEL_BP_101_84335_20111116_205244_inLine +BABEL_BP_101_84335_20111116_205244_outLine +BABEL_BP_101_84540_20111102_204218_inLine +BABEL_BP_101_84540_20111102_204218_outLine +BABEL_BP_101_84543_20111124_200551_inLine +BABEL_BP_101_84543_20111124_200551_outLine +BABEL_BP_101_84943_20111020_144955_inLine +BABEL_BP_101_84943_20111020_144955_outLine +BABEL_BP_101_85083_20111123_195138_inLine +BABEL_BP_101_85083_20111123_195138_outLine +BABEL_BP_101_85533_20111029_135232_inLine +BABEL_BP_101_85533_20111029_135232_outLine +BABEL_BP_101_85617_20111126_195610_inLine +BABEL_BP_101_85617_20111126_195610_outLine +BABEL_BP_101_85883_20111126_183750_inLine +BABEL_BP_101_85883_20111126_183750_outLine +BABEL_BP_101_85948_20111020_171625_inLine +BABEL_BP_101_85948_20111020_171626_outLine +BABEL_BP_101_86016_20111118_140325_inLine +BABEL_BP_101_86016_20111118_140326_outLine +BABEL_BP_101_86029_20111027_190831_inLine +BABEL_BP_101_86029_20111027_190831_outLine +BABEL_BP_101_86227_20111020_213628_inLine +BABEL_BP_101_86227_20111020_213628_outLine +BABEL_BP_101_86258_20111128_161415_inLine +BABEL_BP_101_86258_20111128_161415_outLine +BABEL_BP_101_86419_20111019_211829_inLine +BABEL_BP_101_86419_20111019_211829_outLine +BABEL_BP_101_86752_20111206_182753_inLine +BABEL_BP_101_86752_20111206_182753_outLine +BABEL_BP_101_86900_20111029_140540_inLine +BABEL_BP_101_86900_20111029_140540_outLine +BABEL_BP_101_87107_20111028_193807_inLine +BABEL_BP_101_87107_20111028_193807_outLine +BABEL_BP_101_87351_20111021_224242_inLine +BABEL_BP_101_87351_20111021_224242_outLine +BABEL_BP_101_87481_20111129_131455_inLine +BABEL_BP_101_87481_20111129_131455_outLine +BABEL_BP_101_87564_20111130_175930_inLine +BABEL_BP_101_87564_20111130_175930_outLine +BABEL_BP_101_87634_20111019_151449_inLine +BABEL_BP_101_87634_20111019_151449_outLine +BABEL_BP_101_87634_20111019_152457_inLine +BABEL_BP_101_87634_20111019_152457_outLine +BABEL_BP_101_88243_20111024_193201_inLine +BABEL_BP_101_88243_20111024_193201_outLine +BABEL_BP_101_88294_20111026_023525_inLine +BABEL_BP_101_88294_20111026_023525_outLine +BABEL_BP_101_88464_20111119_194433_inLine +BABEL_BP_101_88464_20111119_194433_outLine +BABEL_BP_101_88506_20111115_203514_inLine +BABEL_BP_101_88506_20111115_203514_outLine +BABEL_BP_101_88929_20111118_201818_inLine +BABEL_BP_101_88929_20111118_201818_outLine +BABEL_BP_101_89345_20111021_155741_inLine +BABEL_BP_101_89345_20111021_155741_outLine +BABEL_BP_101_89573_20111025_201747_inLine +BABEL_BP_101_89573_20111025_201747_outLine +BABEL_BP_101_89619_20111029_215743_inLine +BABEL_BP_101_89619_20111029_215743_outLine +BABEL_BP_101_89630_20111125_193140_inLine +BABEL_BP_101_89630_20111125_193140_outLine +BABEL_BP_101_89674_20111025_190234_inLine +BABEL_BP_101_89674_20111025_190234_outLine +BABEL_BP_101_89818_20111019_181821_inLine +BABEL_BP_101_89818_20111019_181821_outLine +BABEL_BP_101_89965_20111129_175314_inLine +BABEL_BP_101_89965_20111129_175314_outLine +BABEL_BP_101_90313_20111019_155232_inLine +BABEL_BP_101_90313_20111019_155232_outLine +BABEL_BP_101_90393_20111103_165919_inLine +BABEL_BP_101_90393_20111103_165919_outLine +BABEL_BP_101_90490_20111017_192604_inLine +BABEL_BP_101_90490_20111017_192604_outLine +BABEL_BP_101_90506_20111026_182007_inLine +BABEL_BP_101_90506_20111026_182007_outLine +BABEL_BP_101_90511_20111024_132449_inLine +BABEL_BP_101_90511_20111024_132449_outLine +BABEL_BP_101_90577_20111014_144604_inLine +BABEL_BP_101_90577_20111014_144605_outLine +BABEL_BP_101_90730_20111025_154632_inLine +BABEL_BP_101_90730_20111025_154632_outLine +BABEL_BP_101_90819_20111126_173557_inLine +BABEL_BP_101_90819_20111126_173557_outLine +BABEL_BP_101_90890_20111018_143525_inLine +BABEL_BP_101_90890_20111018_143526_outLine +BABEL_BP_101_90960_20111024_132656_outLine +BABEL_BP_101_91007_20111203_160119_inLine +BABEL_BP_101_91007_20111203_160119_outLine +BABEL_BP_101_91358_20111207_171552_inLine +BABEL_BP_101_91358_20111207_171552_outLine +BABEL_BP_101_91401_20111028_174554_inLine +BABEL_BP_101_91401_20111028_174554_outLine +BABEL_BP_101_91406_20111114_221433_inLine +BABEL_BP_101_91406_20111114_221433_outLine +BABEL_BP_101_91409_20111023_181828_inLine +BABEL_BP_101_91409_20111023_181828_outLine +BABEL_BP_101_91481_20111124_213929_inLine +BABEL_BP_101_91481_20111124_213929_outLine +BABEL_BP_101_91491_20111021_160657_outLine +BABEL_BP_101_91668_20111127_133044_inLine +BABEL_BP_101_91668_20111127_133044_outLine +BABEL_BP_101_91777_20111025_195108_outLine +BABEL_BP_101_91873_20111129_214832_inLine +BABEL_BP_101_91873_20111129_214832_outLine +BABEL_BP_101_91905_20111120_211325_inLine +BABEL_BP_101_91905_20111120_211325_outLine +BABEL_BP_101_91979_20111019_195336_inLine +BABEL_BP_101_91979_20111019_195336_outLine +BABEL_BP_101_92321_20111125_152246_inLine +BABEL_BP_101_92321_20111125_152246_outLine +BABEL_BP_101_92436_20111024_214516_inLine +BABEL_BP_101_92436_20111024_214516_outLine +BABEL_BP_101_92560_20111025_142040_outLine +BABEL_BP_101_92628_20111102_131604_inLine +BABEL_BP_101_92628_20111102_131604_outLine +BABEL_BP_101_92642_20111025_170509_inLine +BABEL_BP_101_92642_20111025_170509_outLine +BABEL_BP_101_92735_20111024_171657_inLine +BABEL_BP_101_92735_20111024_171658_outLine +BABEL_BP_101_92800_20111030_222032_inLine +BABEL_BP_101_92800_20111030_222032_outLine +BABEL_BP_101_93004_20111121_190213_inLine +BABEL_BP_101_93004_20111121_190213_outLine +BABEL_BP_101_93091_20111022_191333_inLine +BABEL_BP_101_93091_20111022_191333_outLine +BABEL_BP_101_93151_20111023_184643_inLine +BABEL_BP_101_93151_20111023_184644_outLine +BABEL_BP_101_93192_20111020_130226_inLine +BABEL_BP_101_93192_20111020_130226_outLine +BABEL_BP_101_93454_20111027_014223_inLine +BABEL_BP_101_93454_20111027_014223_outLine +BABEL_BP_101_93597_20111020_195543_outLine +BABEL_BP_101_93643_20111021_154435_inLine +BABEL_BP_101_93643_20111021_154435_outLine +BABEL_BP_101_94149_20111027_125107_inLine +BABEL_BP_101_94149_20111027_125107_outLine +BABEL_BP_101_94222_20111021_144043_outLine +BABEL_BP_101_94223_20111026_220859_inLine +BABEL_BP_101_94223_20111026_220859_outLine +BABEL_BP_101_94226_20111125_140433_inLine +BABEL_BP_101_94226_20111125_140433_outLine +BABEL_BP_101_94514_20111127_130706_inLine +BABEL_BP_101_94514_20111127_130706_outLine +BABEL_BP_101_94696_20111203_191827_inLine +BABEL_BP_101_94696_20111203_191827_outLine +BABEL_BP_101_94989_20111028_152522_inLine +BABEL_BP_101_94989_20111028_152522_outLine +BABEL_BP_101_95034_20111126_193931_inLine +BABEL_BP_101_95034_20111126_193931_outLine +BABEL_BP_101_95423_20111116_164510_inLine +BABEL_BP_101_95423_20111116_164510_outLine +BABEL_BP_101_95533_20111129_183735_inLine +BABEL_BP_101_95533_20111129_183735_outLine +BABEL_BP_101_95542_20111026_190957_inLine +BABEL_BP_101_95542_20111026_190957_outLine +BABEL_BP_101_95589_20111118_214545_inLine +BABEL_BP_101_95589_20111118_214545_outLine +BABEL_BP_101_95650_20111019_144529_inLine +BABEL_BP_101_95650_20111019_144529_outLine +BABEL_BP_101_95815_20111024_155626_inLine +BABEL_BP_101_95815_20111024_155626_outLine +BABEL_BP_101_96216_20111021_181529_inLine +BABEL_BP_101_96216_20111021_181529_outLine +BABEL_BP_101_96283_20111115_154603_inLine +BABEL_BP_101_96283_20111115_154603_outLine +BABEL_BP_101_96322_20111031_190734_inLine +BABEL_BP_101_96322_20111031_190734_outLine +BABEL_BP_101_96347_20111024_201758_inLine +BABEL_BP_101_96347_20111024_201758_outLine +BABEL_BP_101_96438_20111125_195114_inLine +BABEL_BP_101_96438_20111125_195114_outLine +BABEL_BP_101_96630_20111104_005203_inLine +BABEL_BP_101_96630_20111104_005203_outLine +BABEL_BP_101_97274_20111023_151720_inLine +BABEL_BP_101_97274_20111023_151720_outLine +BABEL_BP_101_97405_20111019_151334_inLine +BABEL_BP_101_97405_20111019_151334_outLine +BABEL_BP_101_97629_20111130_000852_inLine +BABEL_BP_101_97629_20111130_000852_outLine +BABEL_BP_101_97650_20111126_144341_inLine +BABEL_BP_101_97650_20111126_144341_outLine +BABEL_BP_101_98086_20111129_161539_inLine +BABEL_BP_101_98086_20111129_161539_outLine +BABEL_BP_101_98099_20111120_130108_inLine +BABEL_BP_101_98099_20111120_130108_outLine +BABEL_BP_101_98219_20111125_155849_inLine +BABEL_BP_101_98219_20111125_155849_outLine +BABEL_BP_101_98279_20111122_195453_inLine +BABEL_BP_101_98279_20111122_195453_outLine +BABEL_BP_101_98345_20111020_205712_outLine +BABEL_BP_101_98467_20111020_152253_inLine +BABEL_BP_101_98467_20111020_152253_outLine +BABEL_BP_101_98476_20111114_220758_inLine +BABEL_BP_101_98476_20111114_220758_outLine +BABEL_BP_101_99061_20111020_183348_outLine +BABEL_BP_101_99562_20111205_235804_inLine +BABEL_BP_101_99562_20111205_235804_outLine +BABEL_BP_101_99571_20111024_164204_inLine +BABEL_BP_101_99571_20111024_164204_outLine +BABEL_BP_101_99856_20111125_184505_inLine +BABEL_BP_101_99856_20111125_184505_outLine diff --git a/egs/babel/s5d/conf/lists/101-cantonese/train.LimitedLP.list b/egs/babel/s5d/conf/lists/101-cantonese/train.LimitedLP.list new file mode 100644 index 00000000000..84f6e984f4b --- /dev/null +++ b/egs/babel/s5d/conf/lists/101-cantonese/train.LimitedLP.list @@ -0,0 +1,120 @@ +BABEL_BP_101_11694_20111204_205320_inLine +BABEL_BP_101_11694_20111204_205320_outLine +BABEL_BP_101_14054_20111119_163712_inLine +BABEL_BP_101_14054_20111119_163712_outLine +BABEL_BP_101_14729_20111114_200940_inLine +BABEL_BP_101_14729_20111114_200940_outLine +BABEL_BP_101_15873_20111027_121806_inLine +BABEL_BP_101_15873_20111027_121806_outLine +BABEL_BP_101_16617_20111030_144124_inLine +BABEL_BP_101_16617_20111030_144124_outLine +BABEL_BP_101_16883_20111122_184255_inLine +BABEL_BP_101_16883_20111122_184255_outLine +BABEL_BP_101_17933_20111120_204846_inLine +BABEL_BP_101_17933_20111120_204846_outLine +BABEL_BP_101_19012_20111122_173413_inLine +BABEL_BP_101_19012_20111122_173413_outLine +BABEL_BP_101_20408_20111101_210200_inLine +BABEL_BP_101_20408_20111101_210200_outLine +BABEL_BP_101_21367_20111126_132150_inLine +BABEL_BP_101_21367_20111126_132150_outLine +BABEL_BP_101_21946_20111122_150655_inLine +BABEL_BP_101_21946_20111122_150655_outLine +BABEL_BP_101_22979_20111129_142742_inLine +BABEL_BP_101_22979_20111129_142742_outLine +BABEL_BP_101_23168_20111120_192134_inLine +BABEL_BP_101_23168_20111120_192134_outLine +BABEL_BP_101_23571_20111128_232031_inLine +BABEL_BP_101_23571_20111128_232031_outLine +BABEL_BP_101_28204_20111025_133714_inLine +BABEL_BP_101_28204_20111025_133714_outLine +BABEL_BP_101_36424_20111119_145307_inLine +BABEL_BP_101_36424_20111119_145307_outLine +BABEL_BP_101_37285_20111028_003951_inLine +BABEL_BP_101_37285_20111028_003951_outLine +BABEL_BP_101_38108_20111125_153427_inLine +BABEL_BP_101_38108_20111125_153427_outLine +BABEL_BP_101_38879_20111029_193700_inLine +BABEL_BP_101_38879_20111029_193701_outLine +BABEL_BP_101_40123_20111129_182232_inLine +BABEL_BP_101_40123_20111129_182232_outLine +BABEL_BP_101_40439_20111203_182814_inLine +BABEL_BP_101_40439_20111203_182814_outLine +BABEL_BP_101_42145_20111117_131023_inLine +BABEL_BP_101_42145_20111117_131023_outLine +BABEL_BP_101_44836_20111119_154154_inLine +BABEL_BP_101_44836_20111119_154154_outLine +BABEL_BP_101_46332_20111103_181337_inLine +BABEL_BP_101_46332_20111103_181337_outLine +BABEL_BP_101_50409_20111204_161529_inLine +BABEL_BP_101_50409_20111204_161529_outLine +BABEL_BP_101_50476_20111130_010429_inLine +BABEL_BP_101_50476_20111130_010429_outLine +BABEL_BP_101_53985_20111027_134232_inLine +BABEL_BP_101_53985_20111027_134232_outLine +BABEL_BP_101_54787_20111027_003335_inLine +BABEL_BP_101_54787_20111027_003335_outLine +BABEL_BP_101_56648_20111126_183128_inLine +BABEL_BP_101_56648_20111126_183128_outLine +BABEL_BP_101_58190_20111124_203150_inLine +BABEL_BP_101_58190_20111124_203150_outLine +BABEL_BP_101_58357_20111122_155154_inLine +BABEL_BP_101_58357_20111122_155154_outLine +BABEL_BP_101_59028_20111130_201120_inLine +BABEL_BP_101_59028_20111130_201120_outLine +BABEL_BP_101_59925_20111203_131501_inLine +BABEL_BP_101_59925_20111203_131501_outLine +BABEL_BP_101_63459_20111120_165000_inLine +BABEL_BP_101_63459_20111120_165000_outLine +BABEL_BP_101_66839_20111120_192904_inLine +BABEL_BP_101_66839_20111120_192904_outLine +BABEL_BP_101_66964_20111117_173710_inLine +BABEL_BP_101_66964_20111117_173710_outLine +BABEL_BP_101_67798_20111104_013951_inLine +BABEL_BP_101_67798_20111104_013951_outLine +BABEL_BP_101_68129_20111120_133854_inLine +BABEL_BP_101_68129_20111120_133854_outLine +BABEL_BP_101_69049_20111102_140355_inLine +BABEL_BP_101_69049_20111102_140355_outLine +BABEL_BP_101_74395_20111117_135831_inLine +BABEL_BP_101_74395_20111117_135831_outLine +BABEL_BP_101_76944_20111114_210715_inLine +BABEL_BP_101_76944_20111114_210715_outLine +BABEL_BP_101_77244_20111127_021035_inLine +BABEL_BP_101_77244_20111127_021035_outLine +BABEL_BP_101_78879_20111102_133430_inLine +BABEL_BP_101_78879_20111102_133430_outLine +BABEL_BP_101_80008_20111031_180815_inLine +BABEL_BP_101_80008_20111031_180815_outLine +BABEL_BP_101_80535_20111206_193024_inLine +BABEL_BP_101_80535_20111206_193024_outLine +BABEL_BP_101_81486_20111027_163851_inLine +BABEL_BP_101_81486_20111027_163851_outLine +BABEL_BP_101_82217_20111115_191956_inLine +BABEL_BP_101_82217_20111115_191956_outLine +BABEL_BP_101_86016_20111118_140325_inLine +BABEL_BP_101_86016_20111118_140326_outLine +BABEL_BP_101_88464_20111119_194433_inLine +BABEL_BP_101_88464_20111119_194433_outLine +BABEL_BP_101_91358_20111207_171552_inLine +BABEL_BP_101_91358_20111207_171552_outLine +BABEL_BP_101_91406_20111114_221433_inLine +BABEL_BP_101_91406_20111114_221433_outLine +BABEL_BP_101_92321_20111125_152246_inLine +BABEL_BP_101_92321_20111125_152246_outLine +BABEL_BP_101_92628_20111102_131604_inLine +BABEL_BP_101_92628_20111102_131604_outLine +BABEL_BP_101_94696_20111203_191827_inLine +BABEL_BP_101_94696_20111203_191827_outLine +BABEL_BP_101_94989_20111028_152522_inLine +BABEL_BP_101_94989_20111028_152522_outLine +BABEL_BP_101_95542_20111026_190957_inLine +BABEL_BP_101_95542_20111026_190957_outLine +BABEL_BP_101_96438_20111125_195114_inLine +BABEL_BP_101_96438_20111125_195114_outLine +BABEL_BP_101_96630_20111104_005203_inLine +BABEL_BP_101_96630_20111104_005203_outLine +BABEL_BP_101_98086_20111129_161539_inLine +BABEL_BP_101_98086_20111129_161539_outLine +BABEL_BP_101_98219_20111125_155849_inLine +BABEL_BP_101_98219_20111125_155849_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/dev.list b/egs/babel/s5d/conf/lists/102-assamese/dev.list new file mode 100644 index 00000000000..044d46cc85a --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/dev.list @@ -0,0 +1,126 @@ +BABEL_OP1_102_10408_20121105_223454_inLine +BABEL_OP1_102_10408_20121105_223454_outLine +BABEL_OP1_102_10925_20120329_192327_inLine +BABEL_OP1_102_10925_20120329_192327_outLine +BABEL_OP1_102_13450_20120421_200138_inLine +BABEL_OP1_102_13450_20120421_200138_outLine +BABEL_OP1_102_13879_20121112_220931_inLine +BABEL_OP1_102_13879_20121112_220931_outLine +BABEL_OP1_102_17900_20120331_195842_inLine +BABEL_OP1_102_17900_20120331_195842_outLine +BABEL_OP1_102_18672_20120614_212320_inLine +BABEL_OP1_102_18672_20120614_212320_outLine +BABEL_OP1_102_20518_20120618_155945_inLine +BABEL_OP1_102_20518_20120618_155945_outLine +BABEL_OP1_102_21370_20120410_231048_inLine +BABEL_OP1_102_21370_20120410_231048_outLine +BABEL_OP1_102_25502_20120404_190523_inLine +BABEL_OP1_102_25502_20120404_190523_outLine +BABEL_OP1_102_27178_20120409_211226_inLine +BABEL_OP1_102_27178_20120409_211226_outLine +BABEL_OP1_102_27698_20120328_165641_inLine +BABEL_OP1_102_27698_20120328_165641_outLine +BABEL_OP1_102_29988_20120805_160211_inLine +BABEL_OP1_102_29988_20120805_160211_outLine +BABEL_OP1_102_31345_20121010_194432_inLine +BABEL_OP1_102_31345_20121010_194432_outLine +BABEL_OP1_102_31345_20121010_195905_inLine +BABEL_OP1_102_31345_20121010_195905_outLine +BABEL_OP1_102_32962_20120427_215011_inLine +BABEL_OP1_102_32962_20120427_215011_outLine +BABEL_OP1_102_33704_20130204_172729_inLine +BABEL_OP1_102_33704_20130204_172729_outLine +BABEL_OP1_102_33969_20130123_165132_inLine +BABEL_OP1_102_33969_20130123_165132_outLine +BABEL_OP1_102_34446_20120426_194557_inLine +BABEL_OP1_102_34446_20120426_194557_outLine +BABEL_OP1_102_34446_20120426_195519_inLine +BABEL_OP1_102_34446_20120426_195519_outLine +BABEL_OP1_102_35470_20130122_212719_inLine +BABEL_OP1_102_35470_20130122_212719_outLine +BABEL_OP1_102_36391_20130127_213459_inLine +BABEL_OP1_102_36391_20130127_213459_outLine +BABEL_OP1_102_38879_20120410_224941_inLine +BABEL_OP1_102_38879_20120410_224941_outLine +BABEL_OP1_102_40385_20121224_164959_inLine +BABEL_OP1_102_40385_20121224_164959_outLine +BABEL_OP1_102_41989_20120410_220614_inLine +BABEL_OP1_102_41989_20120410_220614_outLine +BABEL_OP1_102_43587_20120607_204145_inLine +BABEL_OP1_102_43587_20120607_204145_outLine +BABEL_OP1_102_45106_20120318_191747_inLine +BABEL_OP1_102_45106_20120318_191747_outLine +BABEL_OP1_102_45678_20120328_224850_inLine +BABEL_OP1_102_45678_20120328_224850_outLine +BABEL_OP1_102_45786_20121016_025157_inLine +BABEL_OP1_102_45786_20121016_025157_outLine +BABEL_OP1_102_46593_20121010_023019_inLine +BABEL_OP1_102_46593_20121010_023019_outLine +BABEL_OP1_102_47429_20130121_172000_inLine +BABEL_OP1_102_47429_20130121_172000_outLine +BABEL_OP1_102_47469_20120411_181423_inLine +BABEL_OP1_102_47469_20120411_181423_outLine +BABEL_OP1_102_48812_20120420_004425_inLine +BABEL_OP1_102_48812_20120420_004425_outLine +BABEL_OP1_102_49351_20121214_224227_inLine +BABEL_OP1_102_49351_20121214_224227_outLine +BABEL_OP1_102_50589_20120401_163239_inLine +BABEL_OP1_102_50589_20120401_163239_outLine +BABEL_OP1_102_53179_20121009_225324_inLine +BABEL_OP1_102_53179_20121009_225324_outLine +BABEL_OP1_102_54358_20120908_182858_inLine +BABEL_OP1_102_54358_20120908_182858_outLine +BABEL_OP1_102_54785_20120928_184426_inLine +BABEL_OP1_102_54785_20120928_184426_outLine +BABEL_OP1_102_55355_20121007_010642_inLine +BABEL_OP1_102_55355_20121007_010642_outLine +BABEL_OP1_102_56868_20120410_224604_inLine +BABEL_OP1_102_56868_20120410_224604_outLine +BABEL_OP1_102_59544_20120401_222134_inLine +BABEL_OP1_102_59544_20120401_222134_outLine +BABEL_OP1_102_59746_20120820_014637_inLine +BABEL_OP1_102_59746_20120820_014637_outLine +BABEL_OP1_102_62160_20120422_220826_inLine +BABEL_OP1_102_62160_20120422_220826_outLine +BABEL_OP1_102_64661_20120422_194219_inLine +BABEL_OP1_102_64661_20120422_194219_outLine +BABEL_OP1_102_64880_20121107_190955_inLine +BABEL_OP1_102_64880_20121107_190955_outLine +BABEL_OP1_102_66103_20121006_184826_inLine +BABEL_OP1_102_66103_20121006_184826_outLine +BABEL_OP1_102_66668_20120409_185702_inLine +BABEL_OP1_102_66668_20120409_185702_outLine +BABEL_OP1_102_68706_20120412_221059_inLine +BABEL_OP1_102_68706_20120412_221100_outLine +BABEL_OP1_102_69052_20120506_162432_inLine +BABEL_OP1_102_69052_20120506_162432_outLine +BABEL_OP1_102_70643_20121108_030513_inLine +BABEL_OP1_102_70643_20121108_030513_outLine +BABEL_OP1_102_73122_20120427_225442_inLine +BABEL_OP1_102_73122_20120427_225442_outLine +BABEL_OP1_102_73122_20120427_230538_inLine +BABEL_OP1_102_73122_20120427_230538_outLine +BABEL_OP1_102_77886_20120407_215452_inLine +BABEL_OP1_102_77886_20120407_215452_outLine +BABEL_OP1_102_79519_20121008_214502_inLine +BABEL_OP1_102_79519_20121008_214502_outLine +BABEL_OP1_102_80856_20120423_184225_inLine +BABEL_OP1_102_80856_20120423_184225_outLine +BABEL_OP1_102_84042_20120806_194540_inLine +BABEL_OP1_102_84042_20120806_194540_outLine +BABEL_OP1_102_84532_20121222_152400_inLine +BABEL_OP1_102_84532_20121222_152400_outLine +BABEL_OP1_102_84700_20130104_162152_inLine +BABEL_OP1_102_84700_20130104_162152_outLine +BABEL_OP1_102_86305_20120408_170901_inLine +BABEL_OP1_102_86305_20120408_170901_outLine +BABEL_OP1_102_87671_20120401_172420_inLine +BABEL_OP1_102_87671_20120401_172420_outLine +BABEL_OP1_102_87885_20121113_193407_inLine +BABEL_OP1_102_87885_20121113_193407_outLine +BABEL_OP1_102_88245_20121010_173153_inLine +BABEL_OP1_102_88245_20121010_173153_outLine +BABEL_OP1_102_88464_20120612_191239_inLine +BABEL_OP1_102_88464_20120612_191239_outLine +BABEL_OP1_102_90313_20120407_173340_inLine +BABEL_OP1_102_90313_20120407_173340_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/eval.list b/egs/babel/s5d/conf/lists/102-assamese/eval.list new file mode 100644 index 00000000000..f9c825384ea --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/eval.list @@ -0,0 +1,189 @@ +BABEL_OP1_102_10033_20120330_194952_inLine +BABEL_OP1_102_10033_20120330_194952_outLine +BABEL_OP1_102_11824_20120425_195521_inLine +BABEL_OP1_102_11824_20120425_195521_outLine +BABEL_OP1_102_13635_20121106_201907_inLine +BABEL_OP1_102_13635_20121106_201907_outLine +BABEL_OP1_102_14075_20120729_184929_inLine +BABEL_OP1_102_14075_20120729_184929_outLine +BABEL_OP1_102_16875_20121224_191839_inLine +BABEL_OP1_102_16875_20121224_191839_outLine +BABEL_OP1_102_16984_20120817_222744_inLine +BABEL_OP1_102_16984_20120817_222744_outLine +BABEL_OP1_102_18648_20121220_162525_inLine +BABEL_OP1_102_18648_20121220_162525_outLine +BABEL_OP1_102_18858_20120328_182337_inLine +BABEL_OP1_102_18858_20120328_182337_outLine +BABEL_OP1_102_19479_20130126_224027_inLine +BABEL_OP1_102_19479_20130126_224027_outLine +BABEL_OP1_102_20483_20120427_223135_inLine +BABEL_OP1_102_20483_20120427_223135_outLine +BABEL_OP1_102_20685_20120327_193652_inLine +BABEL_OP1_102_20685_20120327_193652_outLine +BABEL_OP1_102_22566_20121106_194723_inLine +BABEL_OP1_102_22566_20121106_194723_outLine +BABEL_OP1_102_24379_20120928_162955_inLine +BABEL_OP1_102_24379_20120928_162955_outLine +BABEL_OP1_102_27363_20121106_193315_inLine +BABEL_OP1_102_27363_20121106_193315_outLine +BABEL_OP1_102_27645_20121001_010501_inLine +BABEL_OP1_102_27645_20121001_010501_outLine +BABEL_OP1_102_28754_20130128_193759_inLine +BABEL_OP1_102_28754_20130128_193759_outLine +BABEL_OP1_102_28754_20130128_194940_inLine +BABEL_OP1_102_28754_20130128_194940_outLine +BABEL_OP1_102_28768_20121219_231954_inLine +BABEL_OP1_102_28768_20121219_231954_outLine +BABEL_OP1_102_29268_20120410_182212_inLine +BABEL_OP1_102_29268_20120410_182212_outLine +BABEL_OP1_102_29290_20120408_172044_inLine +BABEL_OP1_102_29290_20120408_172044_outLine +BABEL_OP1_102_30210_20121104_182918_outLine +BABEL_OP1_102_32452_20120427_181559_inLine +BABEL_OP1_102_32452_20120427_181559_outLine +BABEL_OP1_102_32452_20120427_183038_inLine +BABEL_OP1_102_32452_20120427_183038_outLine +BABEL_OP1_102_32890_20121114_200236_inLine +BABEL_OP1_102_32890_20121114_200236_outLine +BABEL_OP1_102_34439_20121106_033220_inLine +BABEL_OP1_102_34439_20121106_033220_outLine +BABEL_OP1_102_39915_20130126_231519_inLine +BABEL_OP1_102_39915_20130126_231519_outLine +BABEL_OP1_102_41590_20121114_173839_inLine +BABEL_OP1_102_41590_20121114_173839_outLine +BABEL_OP1_102_42212_20121108_215733_inLine +BABEL_OP1_102_42212_20121108_215733_outLine +BABEL_OP1_102_42768_20120719_001335_inLine +BABEL_OP1_102_42768_20120719_001335_outLine +BABEL_OP1_102_44369_20121104_184516_inLine +BABEL_OP1_102_44369_20121104_184516_outLine +BABEL_OP1_102_44827_20130127_025842_inLine +BABEL_OP1_102_44827_20130127_025842_outLine +BABEL_OP1_102_45472_20120328_164753_inLine +BABEL_OP1_102_45472_20120328_164753_outLine +BABEL_OP1_102_45570_20120716_014312_inLine +BABEL_OP1_102_45570_20120716_014312_outLine +BABEL_OP1_102_46409_20130127_020220_inLine +BABEL_OP1_102_46409_20130127_020220_outLine +BABEL_OP1_102_46427_20120623_181054_inLine +BABEL_OP1_102_46427_20120623_181054_outLine +BABEL_OP1_102_46813_20120722_222747_inLine +BABEL_OP1_102_46813_20120722_222747_outLine +BABEL_OP1_102_46950_20130128_024910_inLine +BABEL_OP1_102_46950_20130128_024910_outLine +BABEL_OP1_102_47249_20121110_184344_inLine +BABEL_OP1_102_47249_20121110_184344_outLine +BABEL_OP1_102_48072_20120405_174716_inLine +BABEL_OP1_102_48072_20120405_174716_outLine +BABEL_OP1_102_48188_20121114_175337_inLine +BABEL_OP1_102_48188_20121114_175337_outLine +BABEL_OP1_102_48191_20121222_233713_inLine +BABEL_OP1_102_48191_20121222_233713_outLine +BABEL_OP1_102_48404_20121223_171643_inLine +BABEL_OP1_102_48404_20121223_171643_outLine +BABEL_OP1_102_49020_20121114_165007_inLine +BABEL_OP1_102_49020_20121114_165007_outLine +BABEL_OP1_102_49306_20120807_210522_inLine +BABEL_OP1_102_49306_20120807_210522_outLine +BABEL_OP1_102_49476_20120623_191532_inLine +BABEL_OP1_102_49476_20120623_191532_outLine +BABEL_OP1_102_50915_20130127_185334_inLine +BABEL_OP1_102_50915_20130127_185334_outLine +BABEL_OP1_102_51374_20120328_232452_inLine +BABEL_OP1_102_51374_20120328_232452_outLine +BABEL_OP1_102_51791_20120729_183323_inLine +BABEL_OP1_102_51791_20120729_183323_outLine +BABEL_OP1_102_53866_20120401_203758_inLine +BABEL_OP1_102_53866_20120401_203758_outLine +BABEL_OP1_102_54315_20120420_202214_inLine +BABEL_OP1_102_54315_20120420_202214_outLine +BABEL_OP1_102_55144_20120418_220307_inLine +BABEL_OP1_102_55144_20120418_220307_outLine +BABEL_OP1_102_55369_20120331_183350_inLine +BABEL_OP1_102_55369_20120331_183350_outLine +BABEL_OP1_102_55369_20120331_184706_inLine +BABEL_OP1_102_55369_20120331_184706_outLine +BABEL_OP1_102_55678_20120411_170804_inLine +BABEL_OP1_102_55678_20120411_170804_outLine +BABEL_OP1_102_57071_20120806_181947_inLine +BABEL_OP1_102_57071_20120806_181947_outLine +BABEL_OP1_102_57551_20120423_192651_inLine +BABEL_OP1_102_57551_20120423_192651_outLine +BABEL_OP1_102_57609_20121003_192352_inLine +BABEL_OP1_102_57609_20121003_192352_outLine +BABEL_OP1_102_57625_20121002_011432_inLine +BABEL_OP1_102_57625_20121002_011432_outLine +BABEL_OP1_102_57637_20130127_030012_inLine +BABEL_OP1_102_57637_20130127_030012_outLine +BABEL_OP1_102_59147_20120329_204323_inLine +BABEL_OP1_102_59147_20120329_204323_outLine +BABEL_OP1_102_65783_20130127_014613_inLine +BABEL_OP1_102_65783_20130127_014613_outLine +BABEL_OP1_102_66798_20120401_215538_inLine +BABEL_OP1_102_66798_20120401_215538_outLine +BABEL_OP1_102_67555_20120401_162516_inLine +BABEL_OP1_102_67555_20120401_162516_outLine +BABEL_OP1_102_68028_20121014_031021_inLine +BABEL_OP1_102_68028_20121014_031021_outLine +BABEL_OP1_102_68136_20130127_022217_inLine +BABEL_OP1_102_68136_20130127_022217_outLine +BABEL_OP1_102_69473_20121104_215944_inLine +BABEL_OP1_102_69473_20121104_215944_outLine +BABEL_OP1_102_70906_20121104_210914_inLine +BABEL_OP1_102_70906_20121104_210914_outLine +BABEL_OP1_102_70975_20130126_220855_inLine +BABEL_OP1_102_70975_20130126_220855_outLine +BABEL_OP1_102_73205_20120409_210950_inLine +BABEL_OP1_102_73205_20120409_210950_outLine +BABEL_OP1_102_74062_20121225_190622_inLine +BABEL_OP1_102_74062_20121225_190622_outLine +BABEL_OP1_102_74607_20120425_221930_inLine +BABEL_OP1_102_74607_20120425_221930_outLine +BABEL_OP1_102_75020_20120328_234502_inLine +BABEL_OP1_102_75020_20120328_234502_outLine +BABEL_OP1_102_76333_20130127_032712_inLine +BABEL_OP1_102_76333_20130127_032712_outLine +BABEL_OP1_102_76372_20121112_041800_inLine +BABEL_OP1_102_76372_20121112_041800_outLine +BABEL_OP1_102_76763_20120330_231328_inLine +BABEL_OP1_102_76763_20120330_231328_outLine +BABEL_OP1_102_76878_20121112_041639_inLine +BABEL_OP1_102_76878_20121112_041639_outLine +BABEL_OP1_102_76925_20130127_021046_inLine +BABEL_OP1_102_76925_20130127_021046_outLine +BABEL_OP1_102_77584_20121114_173809_inLine +BABEL_OP1_102_77584_20121114_173809_outLine +BABEL_OP1_102_77965_20120327_195119_inLine +BABEL_OP1_102_77965_20120327_195119_outLine +BABEL_OP1_102_78245_20120421_181224_inLine +BABEL_OP1_102_78245_20120421_181224_outLine +BABEL_OP1_102_78728_20120430_194848_inLine +BABEL_OP1_102_78728_20120430_194848_outLine +BABEL_OP1_102_81944_20121112_011411_inLine +BABEL_OP1_102_81944_20121112_011411_outLine +BABEL_OP1_102_83053_20130209_201738_inLine +BABEL_OP1_102_83053_20130209_201738_outLine +BABEL_OP1_102_83053_20130209_224536_inLine +BABEL_OP1_102_83053_20130209_224536_outLine +BABEL_OP1_102_83362_20120419_230220_inLine +BABEL_OP1_102_83362_20120419_230220_outLine +BABEL_OP1_102_83585_20120428_191954_inLine +BABEL_OP1_102_83585_20120428_191954_outLine +BABEL_OP1_102_86014_20120607_010221_inLine +BABEL_OP1_102_86014_20120607_010221_outLine +BABEL_OP1_102_88385_20121226_173154_inLine +BABEL_OP1_102_88385_20121226_173154_outLine +BABEL_OP1_102_88932_20120428_164025_inLine +BABEL_OP1_102_88932_20120428_164025_outLine +BABEL_OP1_102_89301_20120927_001102_inLine +BABEL_OP1_102_89301_20120927_001102_outLine +BABEL_OP1_102_91660_20130123_181342_inLine +BABEL_OP1_102_91660_20130123_181342_outLine +BABEL_OP1_102_93000_20120426_203959_inLine +BABEL_OP1_102_93000_20120426_203959_outLine +BABEL_OP1_102_93454_20120331_220854_inLine +BABEL_OP1_102_93454_20120331_220854_outLine +BABEL_OP1_102_95572_20130128_023142_inLine +BABEL_OP1_102_95572_20130128_023142_outLine +BABEL_OP1_102_95952_20121111_182203_inLine +BABEL_OP1_102_95952_20121111_182203_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/evalpart1.list b/egs/babel/s5d/conf/lists/102-assamese/evalpart1.list new file mode 100644 index 00000000000..b6a7ec78017 --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/evalpart1.list @@ -0,0 +1,65 @@ +BABEL_OP1_102_11824_20120425_195521_inLine +BABEL_OP1_102_11824_20120425_195521_outLine +BABEL_OP1_102_16984_20120817_222744_inLine +BABEL_OP1_102_16984_20120817_222744_outLine +BABEL_OP1_102_18858_20120328_182337_inLine +BABEL_OP1_102_18858_20120328_182337_outLine +BABEL_OP1_102_20685_20120327_193652_inLine +BABEL_OP1_102_20685_20120327_193652_outLine +BABEL_OP1_102_22566_20121106_194723_inLine +BABEL_OP1_102_22566_20121106_194723_outLine +BABEL_OP1_102_24379_20120928_162955_inLine +BABEL_OP1_102_24379_20120928_162955_outLine +BABEL_OP1_102_27645_20121001_010501_inLine +BABEL_OP1_102_27645_20121001_010501_outLine +BABEL_OP1_102_28754_20130128_193759_inLine +BABEL_OP1_102_28754_20130128_193759_outLine +BABEL_OP1_102_28754_20130128_194940_inLine +BABEL_OP1_102_28754_20130128_194940_outLine +BABEL_OP1_102_28768_20121219_231954_inLine +BABEL_OP1_102_28768_20121219_231954_outLine +BABEL_OP1_102_29268_20120410_182212_inLine +BABEL_OP1_102_29268_20120410_182212_outLine +BABEL_OP1_102_30210_20121104_182918_outLine +BABEL_OP1_102_42768_20120719_001335_inLine +BABEL_OP1_102_42768_20120719_001335_outLine +BABEL_OP1_102_45570_20120716_014312_inLine +BABEL_OP1_102_45570_20120716_014312_outLine +BABEL_OP1_102_46427_20120623_181054_inLine +BABEL_OP1_102_46427_20120623_181054_outLine +BABEL_OP1_102_46813_20120722_222747_inLine +BABEL_OP1_102_46813_20120722_222747_outLine +BABEL_OP1_102_47249_20121110_184344_inLine +BABEL_OP1_102_47249_20121110_184344_outLine +BABEL_OP1_102_49476_20120623_191532_inLine +BABEL_OP1_102_49476_20120623_191532_outLine +BABEL_OP1_102_51791_20120729_183323_inLine +BABEL_OP1_102_51791_20120729_183323_outLine +BABEL_OP1_102_57551_20120423_192651_inLine +BABEL_OP1_102_57551_20120423_192651_outLine +BABEL_OP1_102_57625_20121002_011432_inLine +BABEL_OP1_102_57625_20121002_011432_outLine +BABEL_OP1_102_66798_20120401_215538_inLine +BABEL_OP1_102_66798_20120401_215538_outLine +BABEL_OP1_102_70906_20121104_210914_inLine +BABEL_OP1_102_70906_20121104_210914_outLine +BABEL_OP1_102_73205_20120409_210950_inLine +BABEL_OP1_102_73205_20120409_210950_outLine +BABEL_OP1_102_74062_20121225_190622_inLine +BABEL_OP1_102_74062_20121225_190622_outLine +BABEL_OP1_102_78245_20120421_181224_inLine +BABEL_OP1_102_78245_20120421_181224_outLine +BABEL_OP1_102_81944_20121112_011411_inLine +BABEL_OP1_102_81944_20121112_011411_outLine +BABEL_OP1_102_83053_20130209_201738_inLine +BABEL_OP1_102_83053_20130209_201738_outLine +BABEL_OP1_102_83053_20130209_224536_inLine +BABEL_OP1_102_83053_20130209_224536_outLine +BABEL_OP1_102_83362_20120419_230220_inLine +BABEL_OP1_102_83362_20120419_230220_outLine +BABEL_OP1_102_83585_20120428_191954_inLine +BABEL_OP1_102_83585_20120428_191954_outLine +BABEL_OP1_102_93000_20120426_203959_inLine +BABEL_OP1_102_93000_20120426_203959_outLine +BABEL_OP1_102_93454_20120331_220854_inLine +BABEL_OP1_102_93454_20120331_220854_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.FullLP.list b/egs/babel/s5d/conf/lists/102-assamese/train.FullLP.list new file mode 100644 index 00000000000..4e388dab16c --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.FullLP.list @@ -0,0 +1,790 @@ +BABEL_OP1_102_10187_20120405_173448_inLine +BABEL_OP1_102_10187_20120405_173448_outLine +BABEL_OP1_102_10271_20120729_173749_inLine +BABEL_OP1_102_10271_20120729_173749_outLine +BABEL_OP1_102_10713_20120401_204236_inLine +BABEL_OP1_102_10713_20120401_204236_outLine +BABEL_OP1_102_11004_20120420_213442_inLine +BABEL_OP1_102_11004_20120420_213442_outLine +BABEL_OP1_102_11031_20120926_231829_inLine +BABEL_OP1_102_11031_20120926_231829_outLine +BABEL_OP1_102_11036_20120406_202335_inLine +BABEL_OP1_102_11036_20120406_202335_outLine +BABEL_OP1_102_11158_20121008_011850_inLine +BABEL_OP1_102_11158_20121008_011850_outLine +BABEL_OP1_102_11371_20120327_175933_inLine +BABEL_OP1_102_11371_20120327_175933_outLine +BABEL_OP1_102_11521_20121005_005530_inLine +BABEL_OP1_102_11521_20121005_005530_outLine +BABEL_OP1_102_11694_20121108_184639_inLine +BABEL_OP1_102_11694_20121108_184639_outLine +BABEL_OP1_102_12120_20121105_205527_inLine +BABEL_OP1_102_12120_20121105_205527_outLine +BABEL_OP1_102_12486_20121009_231421_inLine +BABEL_OP1_102_12486_20121009_231421_outLine +BABEL_OP1_102_12535_20121009_024245_inLine +BABEL_OP1_102_12535_20121009_024245_outLine +BABEL_OP1_102_12552_20120727_023454_inLine +BABEL_OP1_102_12552_20120727_023454_outLine +BABEL_OP1_102_12643_20121108_184648_inLine +BABEL_OP1_102_12643_20121108_184648_outLine +BABEL_OP1_102_12655_20120318_171708_inLine +BABEL_OP1_102_12655_20120318_171708_outLine +BABEL_OP1_102_12844_20120411_193813_inLine +BABEL_OP1_102_12844_20120411_193813_outLine +BABEL_OP1_102_13229_20130127_023814_inLine +BABEL_OP1_102_13229_20130127_023814_outLine +BABEL_OP1_102_13389_20120406_184440_inLine +BABEL_OP1_102_13389_20120406_184440_outLine +BABEL_OP1_102_13702_20130121_185149_inLine +BABEL_OP1_102_13702_20130121_185149_outLine +BABEL_OP1_102_13913_20120807_001423_inLine +BABEL_OP1_102_13913_20120807_001423_outLine +BABEL_OP1_102_14769_20120926_165746_inLine +BABEL_OP1_102_14769_20120926_165746_outLine +BABEL_OP1_102_14874_20120417_153112_inLine +BABEL_OP1_102_14874_20120417_153112_outLine +BABEL_OP1_102_14891_20121009_003232_inLine +BABEL_OP1_102_14891_20121009_003232_outLine +BABEL_OP1_102_15146_20120318_184752_inLine +BABEL_OP1_102_15146_20120318_184752_outLine +BABEL_OP1_102_15234_20121108_022333_inLine +BABEL_OP1_102_15234_20121108_022333_outLine +BABEL_OP1_102_15493_20130127_203044_inLine +BABEL_OP1_102_15493_20130127_203044_outLine +BABEL_OP1_102_15502_20120419_233859_inLine +BABEL_OP1_102_15502_20120419_233859_outLine +BABEL_OP1_102_15502_20120420_000213_inLine +BABEL_OP1_102_15502_20120420_000213_outLine +BABEL_OP1_102_15881_20120331_215830_inLine +BABEL_OP1_102_15881_20120331_215830_outLine +BABEL_OP1_102_15916_20120428_221806_inLine +BABEL_OP1_102_15916_20120428_221806_outLine +BABEL_OP1_102_16167_20130122_175936_inLine +BABEL_OP1_102_16167_20130122_175936_outLine +BABEL_OP1_102_16185_20121105_042129_inLine +BABEL_OP1_102_16185_20121105_042129_outLine +BABEL_OP1_102_16313_20120331_215132_inLine +BABEL_OP1_102_16313_20120331_215132_outLine +BABEL_OP1_102_16669_20120327_202211_inLine +BABEL_OP1_102_16669_20120327_202211_outLine +BABEL_OP1_102_17013_20121105_230820_inLine +BABEL_OP1_102_17013_20121105_230820_outLine +BABEL_OP1_102_17203_20121221_161532_inLine +BABEL_OP1_102_17203_20121221_161532_outLine +BABEL_OP1_102_17207_20120729_230128_inLine +BABEL_OP1_102_17207_20120729_230128_outLine +BABEL_OP1_102_17572_20120806_235812_inLine +BABEL_OP1_102_17572_20120806_235812_outLine +BABEL_OP1_102_17933_20120607_184111_inLine +BABEL_OP1_102_17933_20120607_184111_outLine +BABEL_OP1_102_18344_20121109_192858_inLine +BABEL_OP1_102_18344_20121109_192858_outLine +BABEL_OP1_102_18534_20121105_185859_inLine +BABEL_OP1_102_18534_20121105_185859_outLine +BABEL_OP1_102_18730_20130122_171244_inLine +BABEL_OP1_102_18730_20130122_171244_outLine +BABEL_OP1_102_18802_20121104_232940_inLine +BABEL_OP1_102_18802_20121104_232940_outLine +BABEL_OP1_102_19063_20130209_231415_inLine +BABEL_OP1_102_19063_20130209_231415_outLine +BABEL_OP1_102_19147_20120329_190609_inLine +BABEL_OP1_102_19147_20120329_190609_outLine +BABEL_OP1_102_19456_20121110_201037_inLine +BABEL_OP1_102_19456_20121110_201037_outLine +BABEL_OP1_102_19731_20130123_200845_inLine +BABEL_OP1_102_19731_20130123_200845_outLine +BABEL_OP1_102_19758_20120417_174950_inLine +BABEL_OP1_102_19758_20120417_174950_outLine +BABEL_OP1_102_19867_20130127_211111_inLine +BABEL_OP1_102_19867_20130127_211111_outLine +BABEL_OP1_102_20271_20120410_205746_inLine +BABEL_OP1_102_20271_20120410_205746_outLine +BABEL_OP1_102_20320_20120409_212129_inLine +BABEL_OP1_102_20320_20120409_212129_outLine +BABEL_OP1_102_20320_20120409_214042_inLine +BABEL_OP1_102_20320_20120409_214042_outLine +BABEL_OP1_102_20454_20121010_020017_inLine +BABEL_OP1_102_20454_20121010_020017_outLine +BABEL_OP1_102_20591_20120806_210212_inLine +BABEL_OP1_102_20591_20120806_210212_outLine +BABEL_OP1_102_21050_20120619_010126_inLine +BABEL_OP1_102_21050_20120619_010126_outLine +BABEL_OP1_102_21477_20120417_212152_inLine +BABEL_OP1_102_21477_20120417_212152_outLine +BABEL_OP1_102_21518_20120805_195607_inLine +BABEL_OP1_102_21518_20120805_195607_outLine +BABEL_OP1_102_21758_20120823_164553_inLine +BABEL_OP1_102_21758_20120823_164553_outLine +BABEL_OP1_102_21782_20120422_184156_inLine +BABEL_OP1_102_21782_20120422_184156_outLine +BABEL_OP1_102_22401_20121017_023338_inLine +BABEL_OP1_102_22401_20121017_023338_outLine +BABEL_OP1_102_22408_20120426_225012_inLine +BABEL_OP1_102_22408_20120426_225012_outLine +BABEL_OP1_102_23167_20120329_204718_inLine +BABEL_OP1_102_23167_20120329_204718_outLine +BABEL_OP1_102_24420_20120624_013709_inLine +BABEL_OP1_102_24420_20120624_013709_outLine +BABEL_OP1_102_24661_20121104_224032_inLine +BABEL_OP1_102_24661_20121104_224032_outLine +BABEL_OP1_102_24833_20120410_172706_inLine +BABEL_OP1_102_24833_20120410_172706_outLine +BABEL_OP1_102_25236_20120804_180700_inLine +BABEL_OP1_102_25236_20120804_180700_outLine +BABEL_OP1_102_25576_20120422_180912_inLine +BABEL_OP1_102_25576_20120422_180912_outLine +BABEL_OP1_102_25904_20120611_203203_inLine +BABEL_OP1_102_25904_20120611_203203_outLine +BABEL_OP1_102_25934_20120329_005438_inLine +BABEL_OP1_102_25934_20120329_005438_outLine +BABEL_OP1_102_26348_20121109_170513_inLine +BABEL_OP1_102_26348_20121109_170513_outLine +BABEL_OP1_102_27007_20120611_223823_inLine +BABEL_OP1_102_27007_20120611_223823_outLine +BABEL_OP1_102_27349_20120422_192337_inLine +BABEL_OP1_102_27349_20120422_192337_outLine +BABEL_OP1_102_27427_20120412_182452_inLine +BABEL_OP1_102_27427_20120412_182452_outLine +BABEL_OP1_102_27824_20120427_201104_inLine +BABEL_OP1_102_27824_20120427_201104_outLine +BABEL_OP1_102_27890_20121002_030324_inLine +BABEL_OP1_102_27890_20121002_030324_outLine +BABEL_OP1_102_28016_20120430_193141_inLine +BABEL_OP1_102_28016_20120430_193141_outLine +BABEL_OP1_102_28016_20120430_194530_inLine +BABEL_OP1_102_28016_20120430_194530_outLine +BABEL_OP1_102_28107_20120327_204144_inLine +BABEL_OP1_102_28107_20120327_204144_outLine +BABEL_OP1_102_28204_20120401_204624_inLine +BABEL_OP1_102_28204_20120401_204624_outLine +BABEL_OP1_102_28260_20120329_210829_inLine +BABEL_OP1_102_28260_20120329_210829_outLine +BABEL_OP1_102_28648_20120608_192702_inLine +BABEL_OP1_102_28648_20120608_192702_outLine +BABEL_OP1_102_29168_20120411_174248_inLine +BABEL_OP1_102_29168_20120411_174248_outLine +BABEL_OP1_102_29259_20120612_211621_inLine +BABEL_OP1_102_29259_20120612_211621_outLine +BABEL_OP1_102_29335_20120609_182335_inLine +BABEL_OP1_102_29335_20120609_182335_outLine +BABEL_OP1_102_29335_20120609_183151_inLine +BABEL_OP1_102_29335_20120609_183151_outLine +BABEL_OP1_102_29444_20120331_231513_inLine +BABEL_OP1_102_29444_20120331_231513_outLine +BABEL_OP1_102_29444_20120331_233317_inLine +BABEL_OP1_102_29444_20120331_233317_outLine +BABEL_OP1_102_29512_20120805_170123_inLine +BABEL_OP1_102_29512_20120805_170123_outLine +BABEL_OP1_102_29512_20120805_172610_inLine +BABEL_OP1_102_29512_20120805_172610_outLine +BABEL_OP1_102_29545_20121105_220136_inLine +BABEL_OP1_102_29545_20121105_220136_outLine +BABEL_OP1_102_29959_20130128_195931_inLine +BABEL_OP1_102_29959_20130128_195931_outLine +BABEL_OP1_102_29959_20130128_223813_inLine +BABEL_OP1_102_29959_20130128_223813_outLine +BABEL_OP1_102_30266_20120331_212330_inLine +BABEL_OP1_102_30266_20120331_212330_outLine +BABEL_OP1_102_30530_20120330_173152_inLine +BABEL_OP1_102_30530_20120330_173152_outLine +BABEL_OP1_102_30722_20121011_013755_inLine +BABEL_OP1_102_30722_20121011_013755_outLine +BABEL_OP1_102_31031_20120611_193208_inLine +BABEL_OP1_102_31031_20120611_193208_outLine +BABEL_OP1_102_31902_20120425_211816_inLine +BABEL_OP1_102_31902_20120425_211816_outLine +BABEL_OP1_102_31917_20120611_195339_inLine +BABEL_OP1_102_31917_20120611_195339_outLine +BABEL_OP1_102_32011_20121014_024351_inLine +BABEL_OP1_102_32011_20121014_024351_outLine +BABEL_OP1_102_32562_20121010_014014_inLine +BABEL_OP1_102_32562_20121010_014014_outLine +BABEL_OP1_102_32642_20121104_220528_inLine +BABEL_OP1_102_32642_20121104_220528_outLine +BABEL_OP1_102_33023_20120329_224858_inLine +BABEL_OP1_102_33023_20120329_224858_outLine +BABEL_OP1_102_33540_20120401_212225_inLine +BABEL_OP1_102_33540_20120401_212225_outLine +BABEL_OP1_102_33671_20120422_231219_inLine +BABEL_OP1_102_33671_20120422_231219_outLine +BABEL_OP1_102_34169_20120331_183840_inLine +BABEL_OP1_102_34169_20120331_183840_outLine +BABEL_OP1_102_34194_20120330_182542_inLine +BABEL_OP1_102_34194_20120330_182542_outLine +BABEL_OP1_102_34235_20120405_190745_inLine +BABEL_OP1_102_34235_20120405_190745_outLine +BABEL_OP1_102_34480_20121012_193452_inLine +BABEL_OP1_102_34480_20121012_193452_outLine +BABEL_OP1_102_34590_20120417_151435_inLine +BABEL_OP1_102_34590_20120417_151435_outLine +BABEL_OP1_102_34590_20120417_155556_inLine +BABEL_OP1_102_34590_20120417_155556_outLine +BABEL_OP1_102_34930_20120411_200043_inLine +BABEL_OP1_102_34930_20120411_200043_outLine +BABEL_OP1_102_35011_20120420_020024_inLine +BABEL_OP1_102_35011_20120420_020024_outLine +BABEL_OP1_102_35229_20121106_204019_inLine +BABEL_OP1_102_35229_20121106_204019_outLine +BABEL_OP1_102_35324_20120426_180016_inLine +BABEL_OP1_102_35324_20120426_180016_outLine +BABEL_OP1_102_35324_20120426_203214_inLine +BABEL_OP1_102_35324_20120426_203214_outLine +BABEL_OP1_102_35455_20121112_000231_inLine +BABEL_OP1_102_35455_20121112_000231_outLine +BABEL_OP1_102_36868_20130209_201544_inLine +BABEL_OP1_102_36868_20130209_201544_outLine +BABEL_OP1_102_37260_20120808_012733_inLine +BABEL_OP1_102_37260_20120808_012733_outLine +BABEL_OP1_102_37260_20120808_014150_inLine +BABEL_OP1_102_37260_20120808_014150_outLine +BABEL_OP1_102_37268_20121226_203217_inLine +BABEL_OP1_102_37268_20121226_203217_outLine +BABEL_OP1_102_37285_20120405_223443_inLine +BABEL_OP1_102_37285_20120405_223443_outLine +BABEL_OP1_102_37444_20130128_032426_inLine +BABEL_OP1_102_37444_20130128_032426_outLine +BABEL_OP1_102_37461_20120409_191629_inLine +BABEL_OP1_102_37461_20120409_191629_outLine +BABEL_OP1_102_37461_20120409_194138_inLine +BABEL_OP1_102_37461_20120409_194138_outLine +BABEL_OP1_102_37461_20120409_195519_inLine +BABEL_OP1_102_37461_20120409_195519_outLine +BABEL_OP1_102_37524_20120329_182549_inLine +BABEL_OP1_102_37524_20120329_182549_outLine +BABEL_OP1_102_38264_20121105_050622_inLine +BABEL_OP1_102_38264_20121105_050622_outLine +BABEL_OP1_102_38464_20121012_023702_inLine +BABEL_OP1_102_38464_20121012_023702_outLine +BABEL_OP1_102_38592_20121225_215825_inLine +BABEL_OP1_102_38592_20121225_215825_outLine +BABEL_OP1_102_38635_20120607_010931_inLine +BABEL_OP1_102_38635_20120607_010931_outLine +BABEL_OP1_102_38698_20120401_215032_inLine +BABEL_OP1_102_38698_20120401_215032_outLine +BABEL_OP1_102_38863_20121011_183009_inLine +BABEL_OP1_102_38863_20121011_183009_outLine +BABEL_OP1_102_38985_20120806_174824_inLine +BABEL_OP1_102_38985_20120806_174824_outLine +BABEL_OP1_102_38985_20120806_181000_inLine +BABEL_OP1_102_38985_20120806_181000_outLine +BABEL_OP1_102_39098_20120405_203729_inLine +BABEL_OP1_102_39098_20120405_203729_outLine +BABEL_OP1_102_39114_20120930_180045_inLine +BABEL_OP1_102_39114_20120930_180045_outLine +BABEL_OP1_102_39364_20121105_220855_inLine +BABEL_OP1_102_39364_20121105_220855_outLine +BABEL_OP1_102_39430_20120411_182026_inLine +BABEL_OP1_102_39430_20120411_182026_outLine +BABEL_OP1_102_39430_20120411_184729_inLine +BABEL_OP1_102_39430_20120411_184729_outLine +BABEL_OP1_102_40133_20121112_214034_inLine +BABEL_OP1_102_40133_20121112_214034_outLine +BABEL_OP1_102_40168_20120428_173400_inLine +BABEL_OP1_102_40168_20120428_173400_outLine +BABEL_OP1_102_40882_20130209_204142_inLine +BABEL_OP1_102_40882_20130209_204142_outLine +BABEL_OP1_102_41561_20121111_220752_inLine +BABEL_OP1_102_41561_20121111_220752_outLine +BABEL_OP1_102_41949_20120426_222144_inLine +BABEL_OP1_102_41949_20120426_222144_outLine +BABEL_OP1_102_42615_20120327_180819_inLine +BABEL_OP1_102_42615_20120327_180819_outLine +BABEL_OP1_102_42651_20120409_221530_inLine +BABEL_OP1_102_42651_20120409_221530_outLine +BABEL_OP1_102_42749_20121114_005458_inLine +BABEL_OP1_102_42749_20121114_005458_outLine +BABEL_OP1_102_42749_20121114_010754_inLine +BABEL_OP1_102_42749_20121114_010754_outLine +BABEL_OP1_102_43383_20120406_193121_inLine +BABEL_OP1_102_43383_20120406_193121_outLine +BABEL_OP1_102_43423_20120919_201131_inLine +BABEL_OP1_102_43423_20120919_201131_outLine +BABEL_OP1_102_43426_20120501_170331_inLine +BABEL_OP1_102_43426_20120501_170331_outLine +BABEL_OP1_102_43553_20120408_174809_inLine +BABEL_OP1_102_43553_20120408_174809_outLine +BABEL_OP1_102_43652_20120428_191659_inLine +BABEL_OP1_102_43652_20120428_191659_outLine +BABEL_OP1_102_44649_20120611_185930_inLine +BABEL_OP1_102_44649_20120611_185930_outLine +BABEL_OP1_102_44829_20120907_011054_inLine +BABEL_OP1_102_44829_20120907_011054_outLine +BABEL_OP1_102_44829_20120907_013730_inLine +BABEL_OP1_102_44829_20120907_013730_outLine +BABEL_OP1_102_45227_20120329_003400_inLine +BABEL_OP1_102_45227_20120329_003400_outLine +BABEL_OP1_102_45361_20120611_222502_inLine +BABEL_OP1_102_45361_20120611_222502_outLine +BABEL_OP1_102_45677_20130123_192645_inLine +BABEL_OP1_102_45677_20130123_192645_outLine +BABEL_OP1_102_45681_20120623_173741_inLine +BABEL_OP1_102_45681_20120623_173741_outLine +BABEL_OP1_102_45738_20120806_202458_inLine +BABEL_OP1_102_45738_20120806_202458_outLine +BABEL_OP1_102_45892_20120408_220557_inLine +BABEL_OP1_102_45892_20120408_220557_outLine +BABEL_OP1_102_45931_20120421_233726_inLine +BABEL_OP1_102_45931_20120421_233726_outLine +BABEL_OP1_102_46002_20121009_215715_inLine +BABEL_OP1_102_46002_20121009_215715_outLine +BABEL_OP1_102_46269_20121110_215228_inLine +BABEL_OP1_102_46269_20121110_215228_outLine +BABEL_OP1_102_46521_20120411_193429_inLine +BABEL_OP1_102_46521_20120411_193429_outLine +BABEL_OP1_102_47634_20120408_214325_inLine +BABEL_OP1_102_47634_20120408_214325_outLine +BABEL_OP1_102_47823_20120804_180038_inLine +BABEL_OP1_102_47823_20120804_180038_outLine +BABEL_OP1_102_48281_20120411_214725_inLine +BABEL_OP1_102_48281_20120411_214725_outLine +BABEL_OP1_102_48410_20120407_204734_inLine +BABEL_OP1_102_48410_20120407_204734_outLine +BABEL_OP1_102_48976_20120410_161651_inLine +BABEL_OP1_102_48976_20120410_161651_outLine +BABEL_OP1_102_49042_20120408_165038_inLine +BABEL_OP1_102_49042_20120408_165038_outLine +BABEL_OP1_102_49628_20120817_204731_inLine +BABEL_OP1_102_49628_20120817_204731_outLine +BABEL_OP1_102_49864_20120421_155657_inLine +BABEL_OP1_102_49864_20120421_155657_outLine +BABEL_OP1_102_50416_20120803_215223_inLine +BABEL_OP1_102_50416_20120803_215223_outLine +BABEL_OP1_102_50555_20120606_224819_inLine +BABEL_OP1_102_50555_20120606_224819_outLine +BABEL_OP1_102_50597_20120623_193352_inLine +BABEL_OP1_102_50597_20120623_193352_outLine +BABEL_OP1_102_50718_20120421_191449_inLine +BABEL_OP1_102_50718_20120421_191449_outLine +BABEL_OP1_102_50752_20121227_204235_inLine +BABEL_OP1_102_50752_20121227_204235_outLine +BABEL_OP1_102_50763_20120405_203621_inLine +BABEL_OP1_102_50763_20120405_203621_outLine +BABEL_OP1_102_50798_20120426_190454_inLine +BABEL_OP1_102_50798_20120426_190454_outLine +BABEL_OP1_102_51149_20121227_201136_inLine +BABEL_OP1_102_51149_20121227_201136_outLine +BABEL_OP1_102_52335_20130123_183229_inLine +BABEL_OP1_102_52335_20130123_183229_outLine +BABEL_OP1_102_52606_20121009_222016_inLine +BABEL_OP1_102_52606_20121009_222016_outLine +BABEL_OP1_102_52642_20120803_212045_inLine +BABEL_OP1_102_52642_20120803_212045_outLine +BABEL_OP1_102_52691_20120407_210408_inLine +BABEL_OP1_102_52691_20120407_210408_outLine +BABEL_OP1_102_52691_20120407_211728_inLine +BABEL_OP1_102_52691_20120407_211728_outLine +BABEL_OP1_102_52691_20120407_213757_inLine +BABEL_OP1_102_52691_20120407_213757_outLine +BABEL_OP1_102_52902_20120607_175045_inLine +BABEL_OP1_102_52902_20120607_175045_outLine +BABEL_OP1_102_52902_20120607_180239_inLine +BABEL_OP1_102_52902_20120607_180239_outLine +BABEL_OP1_102_53429_20121224_202431_inLine +BABEL_OP1_102_53429_20121224_202431_outLine +BABEL_OP1_102_53500_20120428_175953_inLine +BABEL_OP1_102_53500_20120428_175953_outLine +BABEL_OP1_102_53703_20120409_180047_inLine +BABEL_OP1_102_53703_20120409_180047_outLine +BABEL_OP1_102_53982_20120607_220642_inLine +BABEL_OP1_102_53982_20120607_220642_outLine +BABEL_OP1_102_54241_20120911_024357_inLine +BABEL_OP1_102_54241_20120911_024357_outLine +BABEL_OP1_102_54241_20120911_025705_inLine +BABEL_OP1_102_54241_20120911_025705_outLine +BABEL_OP1_102_55182_20120330_201037_inLine +BABEL_OP1_102_55182_20120330_201037_outLine +BABEL_OP1_102_55399_20120409_211258_inLine +BABEL_OP1_102_55399_20120409_211258_outLine +BABEL_OP1_102_55450_20121013_171507_inLine +BABEL_OP1_102_55450_20121013_171507_outLine +BABEL_OP1_102_55470_20120429_194956_inLine +BABEL_OP1_102_55470_20120429_194956_outLine +BABEL_OP1_102_55823_20121010_005200_inLine +BABEL_OP1_102_55823_20121010_005200_outLine +BABEL_OP1_102_55874_20121108_215431_inLine +BABEL_OP1_102_55874_20121108_215431_outLine +BABEL_OP1_102_56070_20120410_224512_inLine +BABEL_OP1_102_56070_20120410_224512_outLine +BABEL_OP1_102_56648_20120615_181652_inLine +BABEL_OP1_102_56648_20120615_181652_outLine +BABEL_OP1_102_56812_20121010_203710_inLine +BABEL_OP1_102_56812_20121010_203710_outLine +BABEL_OP1_102_56943_20121221_203039_inLine +BABEL_OP1_102_56943_20121221_203039_outLine +BABEL_OP1_102_57039_20121107_201157_inLine +BABEL_OP1_102_57039_20121107_201157_outLine +BABEL_OP1_102_57422_20120607_213941_inLine +BABEL_OP1_102_57422_20120607_213941_outLine +BABEL_OP1_102_57629_20121010_011015_inLine +BABEL_OP1_102_57629_20121010_011015_outLine +BABEL_OP1_102_57907_20121013_035627_inLine +BABEL_OP1_102_57907_20121013_035627_outLine +BABEL_OP1_102_58715_20120425_190758_inLine +BABEL_OP1_102_58715_20120425_190758_outLine +BABEL_OP1_102_58863_20120404_195038_inLine +BABEL_OP1_102_58863_20120404_195038_outLine +BABEL_OP1_102_58947_20121106_203812_inLine +BABEL_OP1_102_58947_20121106_203812_outLine +BABEL_OP1_102_58947_20121106_205338_inLine +BABEL_OP1_102_58947_20121106_205338_outLine +BABEL_OP1_102_59169_20120611_172953_inLine +BABEL_OP1_102_59169_20120611_172953_outLine +BABEL_OP1_102_59383_20121220_151350_inLine +BABEL_OP1_102_59383_20121220_151350_outLine +BABEL_OP1_102_59628_20121106_031543_inLine +BABEL_OP1_102_59628_20121106_031543_outLine +BABEL_OP1_102_59891_20120611_212238_inLine +BABEL_OP1_102_59891_20120611_212238_outLine +BABEL_OP1_102_59925_20121111_214225_inLine +BABEL_OP1_102_59925_20121111_214225_outLine +BABEL_OP1_102_60193_20120419_201756_inLine +BABEL_OP1_102_60193_20120419_201756_outLine +BABEL_OP1_102_60277_20120615_195600_inLine +BABEL_OP1_102_60277_20120615_195600_outLine +BABEL_OP1_102_60826_20120606_231535_inLine +BABEL_OP1_102_60826_20120606_231535_outLine +BABEL_OP1_102_60848_20121110_170724_inLine +BABEL_OP1_102_60848_20121110_170724_outLine +BABEL_OP1_102_60881_20120401_212818_inLine +BABEL_OP1_102_60881_20120401_212818_outLine +BABEL_OP1_102_60995_20121107_203546_inLine +BABEL_OP1_102_60995_20121107_203546_outLine +BABEL_OP1_102_61263_20121112_213923_inLine +BABEL_OP1_102_61263_20121112_213923_outLine +BABEL_OP1_102_61446_20120420_184155_inLine +BABEL_OP1_102_61446_20120420_184155_outLine +BABEL_OP1_102_61936_20121224_175007_inLine +BABEL_OP1_102_61936_20121224_175007_outLine +BABEL_OP1_102_62132_20120614_214158_inLine +BABEL_OP1_102_62132_20120614_214158_outLine +BABEL_OP1_102_62923_20130122_190544_inLine +BABEL_OP1_102_62923_20130122_190544_outLine +BABEL_OP1_102_63076_20121224_225415_inLine +BABEL_OP1_102_63076_20121224_225415_outLine +BABEL_OP1_102_64185_20120722_220159_inLine +BABEL_OP1_102_64185_20120722_220159_outLine +BABEL_OP1_102_64351_20120608_202610_inLine +BABEL_OP1_102_64351_20120608_202610_outLine +BABEL_OP1_102_65248_20120317_180718_inLine +BABEL_OP1_102_65248_20120317_180718_outLine +BABEL_OP1_102_65273_20121226_233200_inLine +BABEL_OP1_102_65273_20121226_233200_outLine +BABEL_OP1_102_65371_20121228_213615_inLine +BABEL_OP1_102_65371_20121228_213615_outLine +BABEL_OP1_102_65415_20120410_193034_inLine +BABEL_OP1_102_65415_20120410_193034_outLine +BABEL_OP1_102_65580_20120320_234602_inLine +BABEL_OP1_102_65580_20120320_234602_outLine +BABEL_OP1_102_65601_20120427_193019_inLine +BABEL_OP1_102_65601_20120427_193019_outLine +BABEL_OP1_102_65837_20121106_201713_inLine +BABEL_OP1_102_65837_20121106_201713_outLine +BABEL_OP1_102_66330_20120429_164154_inLine +BABEL_OP1_102_66330_20120429_164154_outLine +BABEL_OP1_102_66330_20120429_164900_inLine +BABEL_OP1_102_66330_20120429_164900_outLine +BABEL_OP1_102_66416_20120817_204557_inLine +BABEL_OP1_102_66416_20120817_204557_outLine +BABEL_OP1_102_66441_20120411_170112_inLine +BABEL_OP1_102_66441_20120411_170112_outLine +BABEL_OP1_102_66559_20121227_172234_inLine +BABEL_OP1_102_66559_20121227_172234_outLine +BABEL_OP1_102_67150_20121106_232551_inLine +BABEL_OP1_102_67150_20121106_232551_outLine +BABEL_OP1_102_67733_20120409_192100_inLine +BABEL_OP1_102_67733_20120409_192100_outLine +BABEL_OP1_102_67750_20120330_210301_inLine +BABEL_OP1_102_67750_20120330_210301_outLine +BABEL_OP1_102_67798_20120408_211247_inLine +BABEL_OP1_102_67798_20120408_211247_outLine +BABEL_OP1_102_67916_20121224_185018_inLine +BABEL_OP1_102_67916_20121224_185018_outLine +BABEL_OP1_102_69049_20120422_174706_inLine +BABEL_OP1_102_69049_20120422_174706_outLine +BABEL_OP1_102_69145_20121006_214000_inLine +BABEL_OP1_102_69145_20121006_214000_outLine +BABEL_OP1_102_69275_20121009_000322_inLine +BABEL_OP1_102_69275_20121009_000322_outLine +BABEL_OP1_102_69368_20120328_214605_inLine +BABEL_OP1_102_69368_20120328_214605_outLine +BABEL_OP1_102_69446_20130130_183941_inLine +BABEL_OP1_102_69446_20130130_183941_outLine +BABEL_OP1_102_70077_20121222_173141_inLine +BABEL_OP1_102_70077_20121222_173141_outLine +BABEL_OP1_102_70555_20120421_203231_inLine +BABEL_OP1_102_70555_20120421_203231_outLine +BABEL_OP1_102_71778_20120608_222028_inLine +BABEL_OP1_102_71778_20120608_222028_outLine +BABEL_OP1_102_71844_20120331_200325_inLine +BABEL_OP1_102_71844_20120331_200325_outLine +BABEL_OP1_102_72032_20120329_225115_inLine +BABEL_OP1_102_72032_20120329_225115_outLine +BABEL_OP1_102_72718_20121010_030640_inLine +BABEL_OP1_102_72718_20121010_030640_outLine +BABEL_OP1_102_72799_20120428_225215_inLine +BABEL_OP1_102_72799_20120428_225215_outLine +BABEL_OP1_102_73050_20120929_012255_inLine +BABEL_OP1_102_73050_20120929_012255_outLine +BABEL_OP1_102_73059_20121225_162645_inLine +BABEL_OP1_102_73059_20121225_162645_outLine +BABEL_OP1_102_73059_20121225_163932_inLine +BABEL_OP1_102_73059_20121225_163932_outLine +BABEL_OP1_102_73438_20121103_170431_inLine +BABEL_OP1_102_73438_20121103_170431_outLine +BABEL_OP1_102_73440_20120428_195653_inLine +BABEL_OP1_102_73440_20120428_195653_outLine +BABEL_OP1_102_73452_20121003_021245_inLine +BABEL_OP1_102_73452_20121003_021245_outLine +BABEL_OP1_102_73786_20120420_171039_inLine +BABEL_OP1_102_73786_20120420_171039_outLine +BABEL_OP1_102_74043_20120422_170724_inLine +BABEL_OP1_102_74043_20120422_170724_outLine +BABEL_OP1_102_74368_20121008_041653_inLine +BABEL_OP1_102_74368_20121008_041653_outLine +BABEL_OP1_102_74709_20120806_191528_inLine +BABEL_OP1_102_74709_20120806_191528_outLine +BABEL_OP1_102_74823_20120330_181459_inLine +BABEL_OP1_102_74823_20120330_181459_outLine +BABEL_OP1_102_75140_20120330_171509_inLine +BABEL_OP1_102_75140_20120330_171509_outLine +BABEL_OP1_102_75354_20121105_033257_inLine +BABEL_OP1_102_75354_20121105_033257_outLine +BABEL_OP1_102_75498_20120806_180214_inLine +BABEL_OP1_102_75498_20120806_180214_outLine +BABEL_OP1_102_75680_20121110_180407_inLine +BABEL_OP1_102_75680_20121110_180407_outLine +BABEL_OP1_102_75990_20120426_182351_inLine +BABEL_OP1_102_75990_20120426_182351_outLine +BABEL_OP1_102_76331_20120806_185250_inLine +BABEL_OP1_102_76331_20120806_185250_outLine +BABEL_OP1_102_76451_20120329_193459_inLine +BABEL_OP1_102_76451_20120329_193459_outLine +BABEL_OP1_102_77207_20120804_174005_inLine +BABEL_OP1_102_77207_20120804_174005_outLine +BABEL_OP1_102_77244_20121001_003159_inLine +BABEL_OP1_102_77244_20121001_003159_outLine +BABEL_OP1_102_77465_20120607_001521_inLine +BABEL_OP1_102_77465_20120607_001521_outLine +BABEL_OP1_102_77771_20121227_191404_inLine +BABEL_OP1_102_77771_20121227_191404_outLine +BABEL_OP1_102_77811_20130123_215211_inLine +BABEL_OP1_102_77811_20130123_215211_outLine +BABEL_OP1_102_78514_20120409_182010_inLine +BABEL_OP1_102_78514_20120409_182010_outLine +BABEL_OP1_102_79495_20120320_011136_inLine +BABEL_OP1_102_79495_20120320_011136_outLine +BABEL_OP1_102_79618_20120401_204258_inLine +BABEL_OP1_102_79618_20120401_204258_outLine +BABEL_OP1_102_79698_20121106_212429_inLine +BABEL_OP1_102_79698_20121106_212429_outLine +BABEL_OP1_102_80174_20130211_031725_inLine +BABEL_OP1_102_80174_20130211_031725_outLine +BABEL_OP1_102_80868_20121028_015553_inLine +BABEL_OP1_102_80868_20121028_015553_outLine +BABEL_OP1_102_81084_20120406_191910_inLine +BABEL_OP1_102_81084_20120406_191910_outLine +BABEL_OP1_102_81587_20121225_213038_inLine +BABEL_OP1_102_81587_20121225_213038_outLine +BABEL_OP1_102_81611_20121110_221005_inLine +BABEL_OP1_102_81611_20121110_221005_outLine +BABEL_OP1_102_81717_20130209_201202_inLine +BABEL_OP1_102_81717_20130209_201202_outLine +BABEL_OP1_102_81878_20120331_181439_inLine +BABEL_OP1_102_81878_20120331_181439_outLine +BABEL_OP1_102_81878_20120331_182958_inLine +BABEL_OP1_102_81878_20120331_182958_outLine +BABEL_OP1_102_82009_20121104_013002_inLine +BABEL_OP1_102_82009_20121104_013002_outLine +BABEL_OP1_102_82023_20120330_175253_inLine +BABEL_OP1_102_82023_20120330_175253_outLine +BABEL_OP1_102_82192_20120429_180649_inLine +BABEL_OP1_102_82192_20120429_180649_outLine +BABEL_OP1_102_82408_20120402_190241_inLine +BABEL_OP1_102_82408_20120402_190241_outLine +BABEL_OP1_102_82880_20121108_173528_inLine +BABEL_OP1_102_82880_20121108_173528_outLine +BABEL_OP1_102_83256_20120330_210950_inLine +BABEL_OP1_102_83256_20120330_210950_outLine +BABEL_OP1_102_83493_20120429_172305_inLine +BABEL_OP1_102_83493_20120429_172305_outLine +BABEL_OP1_102_83493_20120429_175508_inLine +BABEL_OP1_102_83493_20120429_175508_outLine +BABEL_OP1_102_83531_20120408_201200_inLine +BABEL_OP1_102_83531_20120408_201200_outLine +BABEL_OP1_102_83531_20120408_203827_inLine +BABEL_OP1_102_83531_20120408_203827_outLine +BABEL_OP1_102_83634_20130123_212154_inLine +BABEL_OP1_102_83634_20130123_212154_outLine +BABEL_OP1_102_83791_20120420_215616_inLine +BABEL_OP1_102_83791_20120420_215616_outLine +BABEL_OP1_102_84088_20120328_180739_inLine +BABEL_OP1_102_84088_20120328_180739_outLine +BABEL_OP1_102_84284_20121225_175332_inLine +BABEL_OP1_102_84284_20121225_175332_outLine +BABEL_OP1_102_84397_20121110_230552_inLine +BABEL_OP1_102_84397_20121110_230552_outLine +BABEL_OP1_102_84439_20120427_184114_inLine +BABEL_OP1_102_84439_20120427_184114_outLine +BABEL_OP1_102_84608_20120609_194053_inLine +BABEL_OP1_102_84608_20120609_194053_outLine +BABEL_OP1_102_84943_20120401_170153_inLine +BABEL_OP1_102_84943_20120401_170153_outLine +BABEL_OP1_102_85204_20120329_192035_inLine +BABEL_OP1_102_85204_20120329_192035_outLine +BABEL_OP1_102_85716_20120401_165708_inLine +BABEL_OP1_102_85716_20120401_165708_outLine +BABEL_OP1_102_86004_20120418_230109_inLine +BABEL_OP1_102_86004_20120418_230109_outLine +BABEL_OP1_102_86227_20120401_195417_inLine +BABEL_OP1_102_86227_20120401_195417_outLine +BABEL_OP1_102_86886_20121112_201306_inLine +BABEL_OP1_102_86886_20121112_201306_outLine +BABEL_OP1_102_86956_20120401_173752_inLine +BABEL_OP1_102_86956_20120401_173752_outLine +BABEL_OP1_102_87234_20121224_212540_inLine +BABEL_OP1_102_87234_20121224_212540_outLine +BABEL_OP1_102_87481_20121027_153449_inLine +BABEL_OP1_102_87481_20121027_153449_outLine +BABEL_OP1_102_87486_20120406_200642_inLine +BABEL_OP1_102_87486_20120406_200642_outLine +BABEL_OP1_102_87806_20120409_183938_inLine +BABEL_OP1_102_87806_20120409_183938_outLine +BABEL_OP1_102_87857_20120405_202526_inLine +BABEL_OP1_102_87857_20120405_202526_outLine +BABEL_OP1_102_87961_20120423_155726_inLine +BABEL_OP1_102_87961_20120423_155726_outLine +BABEL_OP1_102_88163_20121112_003006_inLine +BABEL_OP1_102_88163_20121112_003006_outLine +BABEL_OP1_102_89583_20121011_013631_inLine +BABEL_OP1_102_89583_20121011_013631_outLine +BABEL_OP1_102_89583_20121012_014745_inLine +BABEL_OP1_102_89583_20121012_014745_outLine +BABEL_OP1_102_89838_20120409_214411_inLine +BABEL_OP1_102_89838_20120409_214411_outLine +BABEL_OP1_102_89993_20120607_175900_inLine +BABEL_OP1_102_89993_20120607_175900_outLine +BABEL_OP1_102_90055_20120405_192435_inLine +BABEL_OP1_102_90055_20120405_192435_outLine +BABEL_OP1_102_90389_20121012_050118_inLine +BABEL_OP1_102_90389_20121012_050118_outLine +BABEL_OP1_102_90393_20120419_214927_inLine +BABEL_OP1_102_90393_20120419_214927_outLine +BABEL_OP1_102_90511_20120329_224306_inLine +BABEL_OP1_102_90511_20120329_224306_outLine +BABEL_OP1_102_90609_20120410_184424_inLine +BABEL_OP1_102_90609_20120410_184424_outLine +BABEL_OP1_102_90810_20120404_221650_inLine +BABEL_OP1_102_90810_20120404_221650_outLine +BABEL_OP1_102_90819_20120614_222542_inLine +BABEL_OP1_102_90819_20120614_222542_outLine +BABEL_OP1_102_90890_20120320_235811_inLine +BABEL_OP1_102_90890_20120320_235811_outLine +BABEL_OP1_102_90975_20130127_194034_inLine +BABEL_OP1_102_90975_20130127_194034_outLine +BABEL_OP1_102_90975_20130127_195301_inLine +BABEL_OP1_102_90975_20130127_195301_outLine +BABEL_OP1_102_91171_20121222_000026_inLine +BABEL_OP1_102_91171_20121222_000026_outLine +BABEL_OP1_102_91358_20121103_191541_inLine +BABEL_OP1_102_91358_20121103_191541_outLine +BABEL_OP1_102_91386_20121226_175240_inLine +BABEL_OP1_102_91386_20121226_175240_outLine +BABEL_OP1_102_91401_20120409_195325_inLine +BABEL_OP1_102_91401_20120409_195325_outLine +BABEL_OP1_102_91481_20120806_232222_inLine +BABEL_OP1_102_91481_20120806_232222_outLine +BABEL_OP1_102_91865_20130127_193426_inLine +BABEL_OP1_102_91865_20130127_193426_outLine +BABEL_OP1_102_92002_20120821_172434_inLine +BABEL_OP1_102_92002_20120821_172434_outLine +BABEL_OP1_102_92252_20120805_193105_inLine +BABEL_OP1_102_92252_20120805_193105_outLine +BABEL_OP1_102_92252_20120805_202508_inLine +BABEL_OP1_102_92252_20120805_202508_outLine +BABEL_OP1_102_92321_20120729_204129_inLine +BABEL_OP1_102_92321_20120729_204129_outLine +BABEL_OP1_102_92386_20120401_175909_inLine +BABEL_OP1_102_92386_20120401_175909_outLine +BABEL_OP1_102_92407_20120330_180101_inLine +BABEL_OP1_102_92407_20120330_180101_outLine +BABEL_OP1_102_92591_20120818_164613_inLine +BABEL_OP1_102_92591_20120818_164613_outLine +BABEL_OP1_102_92591_20120818_170346_inLine +BABEL_OP1_102_92591_20120818_170346_outLine +BABEL_OP1_102_92591_20120818_171559_inLine +BABEL_OP1_102_92591_20120818_171559_outLine +BABEL_OP1_102_92628_20120404_212106_inLine +BABEL_OP1_102_92628_20120404_212106_outLine +BABEL_OP1_102_92800_20120408_165253_inLine +BABEL_OP1_102_92800_20120408_165253_outLine +BABEL_OP1_102_93091_20120425_204602_inLine +BABEL_OP1_102_93091_20120425_204602_outLine +BABEL_OP1_102_93091_20120425_205745_inLine +BABEL_OP1_102_93091_20120425_205745_outLine +BABEL_OP1_102_93151_20120410_200907_inLine +BABEL_OP1_102_93151_20120410_200907_outLine +BABEL_OP1_102_93277_20121028_025007_inLine +BABEL_OP1_102_93277_20121028_025007_outLine +BABEL_OP1_102_93277_20121106_011048_inLine +BABEL_OP1_102_93277_20121106_011048_outLine +BABEL_OP1_102_93509_20120422_230046_inLine +BABEL_OP1_102_93509_20120422_230046_outLine +BABEL_OP1_102_93607_20120806_194627_inLine +BABEL_OP1_102_93607_20120806_194627_outLine +BABEL_OP1_102_94162_20130209_213329_inLine +BABEL_OP1_102_94162_20130209_213329_outLine +BABEL_OP1_102_94542_20130122_222709_inLine +BABEL_OP1_102_94542_20130122_222709_outLine +BABEL_OP1_102_94694_20120611_183126_inLine +BABEL_OP1_102_94694_20120611_183126_outLine +BABEL_OP1_102_94696_20130127_183814_inLine +BABEL_OP1_102_94696_20130127_183814_outLine +BABEL_OP1_102_95350_20120420_225049_inLine +BABEL_OP1_102_95350_20120420_225049_outLine +BABEL_OP1_102_95566_20121222_024129_inLine +BABEL_OP1_102_95566_20121222_024129_outLine +BABEL_OP1_102_95637_20120329_225942_inLine +BABEL_OP1_102_95637_20120329_225942_outLine +BABEL_OP1_102_95650_20120327_230850_inLine +BABEL_OP1_102_95650_20120327_230850_outLine +BABEL_OP1_102_95815_20120401_233401_inLine +BABEL_OP1_102_95815_20120401_233401_outLine +BABEL_OP1_102_95849_20121106_222829_inLine +BABEL_OP1_102_95849_20121106_222829_outLine +BABEL_OP1_102_95996_20120427_174020_inLine +BABEL_OP1_102_95996_20120427_174020_outLine +BABEL_OP1_102_96216_20120412_193323_inLine +BABEL_OP1_102_96216_20120412_193323_outLine +BABEL_OP1_102_96283_20120720_021526_inLine +BABEL_OP1_102_96283_20120720_021526_outLine +BABEL_OP1_102_96347_20120330_201932_inLine +BABEL_OP1_102_96347_20120330_201932_outLine +BABEL_OP1_102_96788_20120411_183347_inLine +BABEL_OP1_102_96788_20120411_183347_outLine +BABEL_OP1_102_97004_20121107_210600_inLine +BABEL_OP1_102_97004_20121107_210600_outLine +BABEL_OP1_102_97260_20120409_175649_inLine +BABEL_OP1_102_97260_20120409_175649_outLine +BABEL_OP1_102_97590_20121110_214746_inLine +BABEL_OP1_102_97590_20121110_214746_outLine +BABEL_OP1_102_97590_20121110_215543_inLine +BABEL_OP1_102_97590_20121110_215543_outLine +BABEL_OP1_102_97760_20121010_154720_inLine +BABEL_OP1_102_97760_20121010_154720_outLine +BABEL_OP1_102_98402_20121112_014920_inLine +BABEL_OP1_102_98402_20121112_014920_outLine +BABEL_OP1_102_98640_20120930_211930_inLine +BABEL_OP1_102_98640_20120930_211930_outLine +BABEL_OP1_102_98675_20130209_215547_inLine +BABEL_OP1_102_98675_20130209_215547_outLine +BABEL_OP1_102_99514_20120406_182505_inLine +BABEL_OP1_102_99514_20120406_182505_outLine +BABEL_OP1_102_99709_20120429_201437_inLine +BABEL_OP1_102_99709_20120429_201437_outLine +BABEL_OP1_102_99709_20120429_202748_inLine +BABEL_OP1_102_99709_20120429_202748_outLine +BABEL_OP1_102_99731_20121220_214209_inLine +BABEL_OP1_102_99731_20121220_214209_outLine +BABEL_OP1_102_99823_20120429_181728_inLine +BABEL_OP1_102_99823_20120429_181728_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.list b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.list new file mode 100644 index 00000000000..138e2c7651f --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.list @@ -0,0 +1,138 @@ +BABEL_OP1_102_10713_20120401_204236_inLine +BABEL_OP1_102_10713_20120401_204236_outLine +BABEL_OP1_102_11031_20120926_231829_inLine +BABEL_OP1_102_11031_20120926_231829_outLine +BABEL_OP1_102_12655_20120318_171708_inLine +BABEL_OP1_102_12655_20120318_171708_outLine +BABEL_OP1_102_14874_20120417_153112_inLine +BABEL_OP1_102_14874_20120417_153112_outLine +BABEL_OP1_102_15493_20130127_203044_inLine +BABEL_OP1_102_15493_20130127_203044_outLine +BABEL_OP1_102_16185_20121105_042129_inLine +BABEL_OP1_102_16185_20121105_042129_outLine +BABEL_OP1_102_17207_20120729_230128_inLine +BABEL_OP1_102_17207_20120729_230128_outLine +BABEL_OP1_102_18344_20121109_192858_inLine +BABEL_OP1_102_18344_20121109_192858_outLine +BABEL_OP1_102_19063_20130209_231415_inLine +BABEL_OP1_102_19063_20130209_231415_outLine +BABEL_OP1_102_19758_20120417_174950_inLine +BABEL_OP1_102_19758_20120417_174950_outLine +BABEL_OP1_102_29512_20120805_170123_inLine +BABEL_OP1_102_29512_20120805_170123_outLine +BABEL_OP1_102_29512_20120805_172610_inLine +BABEL_OP1_102_29512_20120805_172610_outLine +BABEL_OP1_102_30530_20120330_173152_inLine +BABEL_OP1_102_30530_20120330_173152_outLine +BABEL_OP1_102_32011_20121014_024351_inLine +BABEL_OP1_102_32011_20121014_024351_outLine +BABEL_OP1_102_34194_20120330_182542_inLine +BABEL_OP1_102_34194_20120330_182542_outLine +BABEL_OP1_102_37524_20120329_182549_inLine +BABEL_OP1_102_37524_20120329_182549_outLine +BABEL_OP1_102_38464_20121012_023702_inLine +BABEL_OP1_102_38464_20121012_023702_outLine +BABEL_OP1_102_38635_20120607_010931_inLine +BABEL_OP1_102_38635_20120607_010931_outLine +BABEL_OP1_102_38985_20120806_174824_inLine +BABEL_OP1_102_38985_20120806_174824_outLine +BABEL_OP1_102_38985_20120806_181000_inLine +BABEL_OP1_102_38985_20120806_181000_outLine +BABEL_OP1_102_39098_20120405_203729_inLine +BABEL_OP1_102_39098_20120405_203729_outLine +BABEL_OP1_102_45227_20120329_003400_inLine +BABEL_OP1_102_45227_20120329_003400_outLine +BABEL_OP1_102_46521_20120411_193429_inLine +BABEL_OP1_102_46521_20120411_193429_outLine +BABEL_OP1_102_48281_20120411_214725_inLine +BABEL_OP1_102_48281_20120411_214725_outLine +BABEL_OP1_102_50416_20120803_215223_inLine +BABEL_OP1_102_50416_20120803_215223_outLine +BABEL_OP1_102_51149_20121227_201136_inLine +BABEL_OP1_102_51149_20121227_201136_outLine +BABEL_OP1_102_53429_20121224_202431_inLine +BABEL_OP1_102_53429_20121224_202431_outLine +BABEL_OP1_102_55399_20120409_211258_inLine +BABEL_OP1_102_55399_20120409_211258_outLine +BABEL_OP1_102_59628_20121106_031543_inLine +BABEL_OP1_102_59628_20121106_031543_outLine +BABEL_OP1_102_61936_20121224_175007_inLine +BABEL_OP1_102_61936_20121224_175007_outLine +BABEL_OP1_102_65601_20120427_193019_inLine +BABEL_OP1_102_65601_20120427_193019_outLine +BABEL_OP1_102_66330_20120429_164154_inLine +BABEL_OP1_102_66330_20120429_164154_outLine +BABEL_OP1_102_66330_20120429_164900_inLine +BABEL_OP1_102_66330_20120429_164900_outLine +BABEL_OP1_102_69446_20130130_183941_inLine +BABEL_OP1_102_69446_20130130_183941_outLine +BABEL_OP1_102_70077_20121222_173141_inLine +BABEL_OP1_102_70077_20121222_173141_outLine +BABEL_OP1_102_71844_20120331_200325_inLine +BABEL_OP1_102_71844_20120331_200325_outLine +BABEL_OP1_102_73059_20121225_162645_inLine +BABEL_OP1_102_73059_20121225_162645_outLine +BABEL_OP1_102_73059_20121225_163932_inLine +BABEL_OP1_102_73059_20121225_163932_outLine +BABEL_OP1_102_77207_20120804_174005_inLine +BABEL_OP1_102_77207_20120804_174005_outLine +BABEL_OP1_102_79618_20120401_204258_inLine +BABEL_OP1_102_79618_20120401_204258_outLine +BABEL_OP1_102_79698_20121106_212429_inLine +BABEL_OP1_102_79698_20121106_212429_outLine +BABEL_OP1_102_80174_20130211_031725_inLine +BABEL_OP1_102_80174_20130211_031725_outLine +BABEL_OP1_102_81611_20121110_221005_inLine +BABEL_OP1_102_81611_20121110_221005_outLine +BABEL_OP1_102_82880_20121108_173528_inLine +BABEL_OP1_102_82880_20121108_173528_outLine +BABEL_OP1_102_85204_20120329_192035_inLine +BABEL_OP1_102_85204_20120329_192035_outLine +BABEL_OP1_102_86227_20120401_195417_inLine +BABEL_OP1_102_86227_20120401_195417_outLine +BABEL_OP1_102_86956_20120401_173752_inLine +BABEL_OP1_102_86956_20120401_173752_outLine +BABEL_OP1_102_87481_20121027_153449_inLine +BABEL_OP1_102_87481_20121027_153449_outLine +BABEL_OP1_102_87486_20120406_200642_inLine +BABEL_OP1_102_87486_20120406_200642_outLine +BABEL_OP1_102_87806_20120409_183938_inLine +BABEL_OP1_102_87806_20120409_183938_outLine +BABEL_OP1_102_89583_20121011_013631_inLine +BABEL_OP1_102_89583_20121011_013631_outLine +BABEL_OP1_102_89583_20121012_014745_inLine +BABEL_OP1_102_89583_20121012_014745_outLine +BABEL_OP1_102_89838_20120409_214411_inLine +BABEL_OP1_102_89838_20120409_214411_outLine +BABEL_OP1_102_90055_20120405_192435_inLine +BABEL_OP1_102_90055_20120405_192435_outLine +BABEL_OP1_102_90389_20121012_050118_inLine +BABEL_OP1_102_90389_20121012_050118_outLine +BABEL_OP1_102_90609_20120410_184424_inLine +BABEL_OP1_102_90609_20120410_184424_outLine +BABEL_OP1_102_92591_20120818_164613_inLine +BABEL_OP1_102_92591_20120818_164613_outLine +BABEL_OP1_102_92591_20120818_170346_inLine +BABEL_OP1_102_92591_20120818_170346_outLine +BABEL_OP1_102_92591_20120818_171559_inLine +BABEL_OP1_102_92591_20120818_171559_outLine +BABEL_OP1_102_93151_20120410_200907_inLine +BABEL_OP1_102_93151_20120410_200907_outLine +BABEL_OP1_102_93277_20121028_025007_inLine +BABEL_OP1_102_93277_20121028_025007_outLine +BABEL_OP1_102_93277_20121106_011048_inLine +BABEL_OP1_102_93277_20121106_011048_outLine +BABEL_OP1_102_95996_20120427_174020_inLine +BABEL_OP1_102_95996_20120427_174020_outLine +BABEL_OP1_102_96216_20120412_193323_inLine +BABEL_OP1_102_96216_20120412_193323_outLine +BABEL_OP1_102_97004_20121107_210600_inLine +BABEL_OP1_102_97004_20121107_210600_outLine +BABEL_OP1_102_97760_20121010_154720_inLine +BABEL_OP1_102_97760_20121010_154720_outLine +BABEL_OP1_102_98640_20120930_211930_inLine +BABEL_OP1_102_98640_20120930_211930_outLine +BABEL_OP1_102_99709_20120429_201437_inLine +BABEL_OP1_102_99709_20120429_201437_outLine +BABEL_OP1_102_99709_20120429_202748_inLine +BABEL_OP1_102_99709_20120429_202748_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..f363d1b4216 --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.LimitedLP.untranscribed.list @@ -0,0 +1,652 @@ +BABEL_OP1_102_10187_20120405_173448_inLine +BABEL_OP1_102_10187_20120405_173448_outLine +BABEL_OP1_102_10271_20120729_173749_inLine +BABEL_OP1_102_10271_20120729_173749_outLine +BABEL_OP1_102_11004_20120420_213442_inLine +BABEL_OP1_102_11004_20120420_213442_outLine +BABEL_OP1_102_11036_20120406_202335_inLine +BABEL_OP1_102_11036_20120406_202335_outLine +BABEL_OP1_102_11158_20121008_011850_inLine +BABEL_OP1_102_11158_20121008_011850_outLine +BABEL_OP1_102_11371_20120327_175933_inLine +BABEL_OP1_102_11371_20120327_175933_outLine +BABEL_OP1_102_11521_20121005_005530_inLine +BABEL_OP1_102_11521_20121005_005530_outLine +BABEL_OP1_102_11694_20121108_184639_inLine +BABEL_OP1_102_11694_20121108_184639_outLine +BABEL_OP1_102_12120_20121105_205527_inLine +BABEL_OP1_102_12120_20121105_205527_outLine +BABEL_OP1_102_12486_20121009_231421_inLine +BABEL_OP1_102_12486_20121009_231421_outLine +BABEL_OP1_102_12535_20121009_024245_inLine +BABEL_OP1_102_12535_20121009_024245_outLine +BABEL_OP1_102_12552_20120727_023454_inLine +BABEL_OP1_102_12552_20120727_023454_outLine +BABEL_OP1_102_12643_20121108_184648_inLine +BABEL_OP1_102_12643_20121108_184648_outLine +BABEL_OP1_102_12844_20120411_193813_inLine +BABEL_OP1_102_12844_20120411_193813_outLine +BABEL_OP1_102_13229_20130127_023814_inLine +BABEL_OP1_102_13229_20130127_023814_outLine +BABEL_OP1_102_13389_20120406_184440_inLine +BABEL_OP1_102_13389_20120406_184440_outLine +BABEL_OP1_102_13702_20130121_185149_inLine +BABEL_OP1_102_13702_20130121_185149_outLine +BABEL_OP1_102_13913_20120807_001423_inLine +BABEL_OP1_102_13913_20120807_001423_outLine +BABEL_OP1_102_14769_20120926_165746_inLine +BABEL_OP1_102_14769_20120926_165746_outLine +BABEL_OP1_102_14891_20121009_003232_inLine +BABEL_OP1_102_14891_20121009_003232_outLine +BABEL_OP1_102_15146_20120318_184752_inLine +BABEL_OP1_102_15146_20120318_184752_outLine +BABEL_OP1_102_15234_20121108_022333_inLine +BABEL_OP1_102_15234_20121108_022333_outLine +BABEL_OP1_102_15502_20120419_233859_inLine +BABEL_OP1_102_15502_20120419_233859_outLine +BABEL_OP1_102_15502_20120420_000213_inLine +BABEL_OP1_102_15502_20120420_000213_outLine +BABEL_OP1_102_15881_20120331_215830_inLine +BABEL_OP1_102_15881_20120331_215830_outLine +BABEL_OP1_102_15916_20120428_221806_inLine +BABEL_OP1_102_15916_20120428_221806_outLine +BABEL_OP1_102_16167_20130122_175936_inLine +BABEL_OP1_102_16167_20130122_175936_outLine +BABEL_OP1_102_16313_20120331_215132_inLine +BABEL_OP1_102_16313_20120331_215132_outLine +BABEL_OP1_102_16669_20120327_202211_inLine +BABEL_OP1_102_16669_20120327_202211_outLine +BABEL_OP1_102_17013_20121105_230820_inLine +BABEL_OP1_102_17013_20121105_230820_outLine +BABEL_OP1_102_17203_20121221_161532_inLine +BABEL_OP1_102_17203_20121221_161532_outLine +BABEL_OP1_102_17572_20120806_235812_inLine +BABEL_OP1_102_17572_20120806_235812_outLine +BABEL_OP1_102_17933_20120607_184111_inLine +BABEL_OP1_102_17933_20120607_184111_outLine +BABEL_OP1_102_18534_20121105_185859_inLine +BABEL_OP1_102_18534_20121105_185859_outLine +BABEL_OP1_102_18730_20130122_171244_inLine +BABEL_OP1_102_18730_20130122_171244_outLine +BABEL_OP1_102_18802_20121104_232940_inLine +BABEL_OP1_102_18802_20121104_232940_outLine +BABEL_OP1_102_19147_20120329_190609_inLine +BABEL_OP1_102_19147_20120329_190609_outLine +BABEL_OP1_102_19456_20121110_201037_inLine +BABEL_OP1_102_19456_20121110_201037_outLine +BABEL_OP1_102_19731_20130123_200845_inLine +BABEL_OP1_102_19731_20130123_200845_outLine +BABEL_OP1_102_19867_20130127_211111_inLine +BABEL_OP1_102_19867_20130127_211111_outLine +BABEL_OP1_102_20271_20120410_205746_inLine +BABEL_OP1_102_20271_20120410_205746_outLine +BABEL_OP1_102_20320_20120409_212129_inLine +BABEL_OP1_102_20320_20120409_212129_outLine +BABEL_OP1_102_20320_20120409_214042_inLine +BABEL_OP1_102_20320_20120409_214042_outLine +BABEL_OP1_102_20454_20121010_020017_inLine +BABEL_OP1_102_20454_20121010_020017_outLine +BABEL_OP1_102_20591_20120806_210212_inLine +BABEL_OP1_102_20591_20120806_210212_outLine +BABEL_OP1_102_21050_20120619_010126_inLine +BABEL_OP1_102_21050_20120619_010126_outLine +BABEL_OP1_102_21477_20120417_212152_inLine +BABEL_OP1_102_21477_20120417_212152_outLine +BABEL_OP1_102_21518_20120805_195607_inLine +BABEL_OP1_102_21518_20120805_195607_outLine +BABEL_OP1_102_21758_20120823_164553_inLine +BABEL_OP1_102_21758_20120823_164553_outLine +BABEL_OP1_102_21782_20120422_184156_inLine +BABEL_OP1_102_21782_20120422_184156_outLine +BABEL_OP1_102_22401_20121017_023338_inLine +BABEL_OP1_102_22401_20121017_023338_outLine +BABEL_OP1_102_22408_20120426_225012_inLine +BABEL_OP1_102_22408_20120426_225012_outLine +BABEL_OP1_102_23167_20120329_204718_inLine +BABEL_OP1_102_23167_20120329_204718_outLine +BABEL_OP1_102_24420_20120624_013709_inLine +BABEL_OP1_102_24420_20120624_013709_outLine +BABEL_OP1_102_24661_20121104_224032_inLine +BABEL_OP1_102_24661_20121104_224032_outLine +BABEL_OP1_102_24833_20120410_172706_inLine +BABEL_OP1_102_24833_20120410_172706_outLine +BABEL_OP1_102_25236_20120804_180700_inLine +BABEL_OP1_102_25236_20120804_180700_outLine +BABEL_OP1_102_25576_20120422_180912_inLine +BABEL_OP1_102_25576_20120422_180912_outLine +BABEL_OP1_102_25904_20120611_203203_inLine +BABEL_OP1_102_25904_20120611_203203_outLine +BABEL_OP1_102_25934_20120329_005438_inLine +BABEL_OP1_102_25934_20120329_005438_outLine +BABEL_OP1_102_26348_20121109_170513_inLine +BABEL_OP1_102_26348_20121109_170513_outLine +BABEL_OP1_102_27007_20120611_223823_inLine +BABEL_OP1_102_27007_20120611_223823_outLine +BABEL_OP1_102_27349_20120422_192337_inLine +BABEL_OP1_102_27349_20120422_192337_outLine +BABEL_OP1_102_27427_20120412_182452_inLine +BABEL_OP1_102_27427_20120412_182452_outLine +BABEL_OP1_102_27824_20120427_201104_inLine +BABEL_OP1_102_27824_20120427_201104_outLine +BABEL_OP1_102_27890_20121002_030324_inLine +BABEL_OP1_102_27890_20121002_030324_outLine +BABEL_OP1_102_28016_20120430_193141_inLine +BABEL_OP1_102_28016_20120430_193141_outLine +BABEL_OP1_102_28016_20120430_194530_inLine +BABEL_OP1_102_28016_20120430_194530_outLine +BABEL_OP1_102_28107_20120327_204144_inLine +BABEL_OP1_102_28107_20120327_204144_outLine +BABEL_OP1_102_28204_20120401_204624_inLine +BABEL_OP1_102_28204_20120401_204624_outLine +BABEL_OP1_102_28260_20120329_210829_inLine +BABEL_OP1_102_28260_20120329_210829_outLine +BABEL_OP1_102_28648_20120608_192702_inLine +BABEL_OP1_102_28648_20120608_192702_outLine +BABEL_OP1_102_29168_20120411_174248_inLine +BABEL_OP1_102_29168_20120411_174248_outLine +BABEL_OP1_102_29259_20120612_211621_inLine +BABEL_OP1_102_29259_20120612_211621_outLine +BABEL_OP1_102_29335_20120609_182335_inLine +BABEL_OP1_102_29335_20120609_182335_outLine +BABEL_OP1_102_29335_20120609_183151_inLine +BABEL_OP1_102_29335_20120609_183151_outLine +BABEL_OP1_102_29444_20120331_231513_inLine +BABEL_OP1_102_29444_20120331_231513_outLine +BABEL_OP1_102_29444_20120331_233317_inLine +BABEL_OP1_102_29444_20120331_233317_outLine +BABEL_OP1_102_29545_20121105_220136_inLine +BABEL_OP1_102_29545_20121105_220136_outLine +BABEL_OP1_102_29959_20130128_195931_inLine +BABEL_OP1_102_29959_20130128_195931_outLine +BABEL_OP1_102_29959_20130128_223813_inLine +BABEL_OP1_102_29959_20130128_223813_outLine +BABEL_OP1_102_30266_20120331_212330_inLine +BABEL_OP1_102_30266_20120331_212330_outLine +BABEL_OP1_102_30722_20121011_013755_inLine +BABEL_OP1_102_30722_20121011_013755_outLine +BABEL_OP1_102_31031_20120611_193208_inLine +BABEL_OP1_102_31031_20120611_193208_outLine +BABEL_OP1_102_31902_20120425_211816_inLine +BABEL_OP1_102_31902_20120425_211816_outLine +BABEL_OP1_102_31917_20120611_195339_inLine +BABEL_OP1_102_31917_20120611_195339_outLine +BABEL_OP1_102_32562_20121010_014014_inLine +BABEL_OP1_102_32562_20121010_014014_outLine +BABEL_OP1_102_32642_20121104_220528_inLine +BABEL_OP1_102_32642_20121104_220528_outLine +BABEL_OP1_102_33023_20120329_224858_inLine +BABEL_OP1_102_33023_20120329_224858_outLine +BABEL_OP1_102_33540_20120401_212225_inLine +BABEL_OP1_102_33540_20120401_212225_outLine +BABEL_OP1_102_33671_20120422_231219_inLine +BABEL_OP1_102_33671_20120422_231219_outLine +BABEL_OP1_102_34169_20120331_183840_inLine +BABEL_OP1_102_34169_20120331_183840_outLine +BABEL_OP1_102_34235_20120405_190745_inLine +BABEL_OP1_102_34235_20120405_190745_outLine +BABEL_OP1_102_34480_20121012_193452_inLine +BABEL_OP1_102_34480_20121012_193452_outLine +BABEL_OP1_102_34590_20120417_151435_inLine +BABEL_OP1_102_34590_20120417_151435_outLine +BABEL_OP1_102_34590_20120417_155556_inLine +BABEL_OP1_102_34590_20120417_155556_outLine +BABEL_OP1_102_34930_20120411_200043_inLine +BABEL_OP1_102_34930_20120411_200043_outLine +BABEL_OP1_102_35011_20120420_020024_inLine +BABEL_OP1_102_35011_20120420_020024_outLine +BABEL_OP1_102_35229_20121106_204019_inLine +BABEL_OP1_102_35229_20121106_204019_outLine +BABEL_OP1_102_35324_20120426_180016_inLine +BABEL_OP1_102_35324_20120426_180016_outLine +BABEL_OP1_102_35324_20120426_203214_inLine +BABEL_OP1_102_35324_20120426_203214_outLine +BABEL_OP1_102_35455_20121112_000231_inLine +BABEL_OP1_102_35455_20121112_000231_outLine +BABEL_OP1_102_36868_20130209_201544_inLine +BABEL_OP1_102_36868_20130209_201544_outLine +BABEL_OP1_102_37260_20120808_012733_inLine +BABEL_OP1_102_37260_20120808_012733_outLine +BABEL_OP1_102_37260_20120808_014150_inLine +BABEL_OP1_102_37260_20120808_014150_outLine +BABEL_OP1_102_37268_20121226_203217_inLine +BABEL_OP1_102_37268_20121226_203217_outLine +BABEL_OP1_102_37285_20120405_223443_inLine +BABEL_OP1_102_37285_20120405_223443_outLine +BABEL_OP1_102_37444_20130128_032426_inLine +BABEL_OP1_102_37444_20130128_032426_outLine +BABEL_OP1_102_37461_20120409_191629_inLine +BABEL_OP1_102_37461_20120409_191629_outLine +BABEL_OP1_102_37461_20120409_194138_inLine +BABEL_OP1_102_37461_20120409_194138_outLine +BABEL_OP1_102_37461_20120409_195519_inLine +BABEL_OP1_102_37461_20120409_195519_outLine +BABEL_OP1_102_38264_20121105_050622_inLine +BABEL_OP1_102_38264_20121105_050622_outLine +BABEL_OP1_102_38592_20121225_215825_inLine +BABEL_OP1_102_38592_20121225_215825_outLine +BABEL_OP1_102_38698_20120401_215032_inLine +BABEL_OP1_102_38698_20120401_215032_outLine +BABEL_OP1_102_38863_20121011_183009_inLine +BABEL_OP1_102_38863_20121011_183009_outLine +BABEL_OP1_102_39114_20120930_180045_inLine +BABEL_OP1_102_39114_20120930_180045_outLine +BABEL_OP1_102_39364_20121105_220855_inLine +BABEL_OP1_102_39364_20121105_220855_outLine +BABEL_OP1_102_39430_20120411_182026_inLine +BABEL_OP1_102_39430_20120411_182026_outLine +BABEL_OP1_102_39430_20120411_184729_inLine +BABEL_OP1_102_39430_20120411_184729_outLine +BABEL_OP1_102_40133_20121112_214034_inLine +BABEL_OP1_102_40133_20121112_214034_outLine +BABEL_OP1_102_40168_20120428_173400_inLine +BABEL_OP1_102_40168_20120428_173400_outLine +BABEL_OP1_102_40882_20130209_204142_inLine +BABEL_OP1_102_40882_20130209_204142_outLine +BABEL_OP1_102_41561_20121111_220752_inLine +BABEL_OP1_102_41561_20121111_220752_outLine +BABEL_OP1_102_41949_20120426_222144_inLine +BABEL_OP1_102_41949_20120426_222144_outLine +BABEL_OP1_102_42615_20120327_180819_inLine +BABEL_OP1_102_42615_20120327_180819_outLine +BABEL_OP1_102_42651_20120409_221530_inLine +BABEL_OP1_102_42651_20120409_221530_outLine +BABEL_OP1_102_42749_20121114_005458_inLine +BABEL_OP1_102_42749_20121114_005458_outLine +BABEL_OP1_102_42749_20121114_010754_inLine +BABEL_OP1_102_42749_20121114_010754_outLine +BABEL_OP1_102_43383_20120406_193121_inLine +BABEL_OP1_102_43383_20120406_193121_outLine +BABEL_OP1_102_43423_20120919_201131_inLine +BABEL_OP1_102_43423_20120919_201131_outLine +BABEL_OP1_102_43426_20120501_170331_inLine +BABEL_OP1_102_43426_20120501_170331_outLine +BABEL_OP1_102_43553_20120408_174809_inLine +BABEL_OP1_102_43553_20120408_174809_outLine +BABEL_OP1_102_43652_20120428_191659_inLine +BABEL_OP1_102_43652_20120428_191659_outLine +BABEL_OP1_102_44649_20120611_185930_inLine +BABEL_OP1_102_44649_20120611_185930_outLine +BABEL_OP1_102_44829_20120907_011054_inLine +BABEL_OP1_102_44829_20120907_011054_outLine +BABEL_OP1_102_44829_20120907_013730_inLine +BABEL_OP1_102_44829_20120907_013730_outLine +BABEL_OP1_102_45361_20120611_222502_inLine +BABEL_OP1_102_45361_20120611_222502_outLine +BABEL_OP1_102_45677_20130123_192645_inLine +BABEL_OP1_102_45677_20130123_192645_outLine +BABEL_OP1_102_45681_20120623_173741_inLine +BABEL_OP1_102_45681_20120623_173741_outLine +BABEL_OP1_102_45738_20120806_202458_inLine +BABEL_OP1_102_45738_20120806_202458_outLine +BABEL_OP1_102_45892_20120408_220557_inLine +BABEL_OP1_102_45892_20120408_220557_outLine +BABEL_OP1_102_45931_20120421_233726_inLine +BABEL_OP1_102_45931_20120421_233726_outLine +BABEL_OP1_102_46002_20121009_215715_inLine +BABEL_OP1_102_46002_20121009_215715_outLine +BABEL_OP1_102_46269_20121110_215228_inLine +BABEL_OP1_102_46269_20121110_215228_outLine +BABEL_OP1_102_47634_20120408_214325_inLine +BABEL_OP1_102_47634_20120408_214325_outLine +BABEL_OP1_102_47823_20120804_180038_inLine +BABEL_OP1_102_47823_20120804_180038_outLine +BABEL_OP1_102_48410_20120407_204734_inLine +BABEL_OP1_102_48410_20120407_204734_outLine +BABEL_OP1_102_48976_20120410_161651_inLine +BABEL_OP1_102_48976_20120410_161651_outLine +BABEL_OP1_102_49042_20120408_165038_inLine +BABEL_OP1_102_49042_20120408_165038_outLine +BABEL_OP1_102_49628_20120817_204731_inLine +BABEL_OP1_102_49628_20120817_204731_outLine +BABEL_OP1_102_49864_20120421_155657_inLine +BABEL_OP1_102_49864_20120421_155657_outLine +BABEL_OP1_102_50555_20120606_224819_inLine +BABEL_OP1_102_50555_20120606_224819_outLine +BABEL_OP1_102_50597_20120623_193352_inLine +BABEL_OP1_102_50597_20120623_193352_outLine +BABEL_OP1_102_50718_20120421_191449_inLine +BABEL_OP1_102_50718_20120421_191449_outLine +BABEL_OP1_102_50752_20121227_204235_inLine +BABEL_OP1_102_50752_20121227_204235_outLine +BABEL_OP1_102_50763_20120405_203621_inLine +BABEL_OP1_102_50763_20120405_203621_outLine +BABEL_OP1_102_50798_20120426_190454_inLine +BABEL_OP1_102_50798_20120426_190454_outLine +BABEL_OP1_102_52335_20130123_183229_inLine +BABEL_OP1_102_52335_20130123_183229_outLine +BABEL_OP1_102_52606_20121009_222016_inLine +BABEL_OP1_102_52606_20121009_222016_outLine +BABEL_OP1_102_52642_20120803_212045_inLine +BABEL_OP1_102_52642_20120803_212045_outLine +BABEL_OP1_102_52691_20120407_210408_inLine +BABEL_OP1_102_52691_20120407_210408_outLine +BABEL_OP1_102_52691_20120407_211728_inLine +BABEL_OP1_102_52691_20120407_211728_outLine +BABEL_OP1_102_52691_20120407_213757_inLine +BABEL_OP1_102_52691_20120407_213757_outLine +BABEL_OP1_102_52902_20120607_175045_inLine +BABEL_OP1_102_52902_20120607_175045_outLine +BABEL_OP1_102_52902_20120607_180239_inLine +BABEL_OP1_102_52902_20120607_180239_outLine +BABEL_OP1_102_53500_20120428_175953_inLine +BABEL_OP1_102_53500_20120428_175953_outLine +BABEL_OP1_102_53703_20120409_180047_inLine +BABEL_OP1_102_53703_20120409_180047_outLine +BABEL_OP1_102_53982_20120607_220642_inLine +BABEL_OP1_102_53982_20120607_220642_outLine +BABEL_OP1_102_54241_20120911_024357_inLine +BABEL_OP1_102_54241_20120911_024357_outLine +BABEL_OP1_102_54241_20120911_025705_inLine +BABEL_OP1_102_54241_20120911_025705_outLine +BABEL_OP1_102_55182_20120330_201037_inLine +BABEL_OP1_102_55182_20120330_201037_outLine +BABEL_OP1_102_55450_20121013_171507_inLine +BABEL_OP1_102_55450_20121013_171507_outLine +BABEL_OP1_102_55470_20120429_194956_inLine +BABEL_OP1_102_55470_20120429_194956_outLine +BABEL_OP1_102_55823_20121010_005200_inLine +BABEL_OP1_102_55823_20121010_005200_outLine +BABEL_OP1_102_55874_20121108_215431_inLine +BABEL_OP1_102_55874_20121108_215431_outLine +BABEL_OP1_102_56070_20120410_224512_inLine +BABEL_OP1_102_56070_20120410_224512_outLine +BABEL_OP1_102_56648_20120615_181652_inLine +BABEL_OP1_102_56648_20120615_181652_outLine +BABEL_OP1_102_56812_20121010_203710_inLine +BABEL_OP1_102_56812_20121010_203710_outLine +BABEL_OP1_102_56943_20121221_203039_inLine +BABEL_OP1_102_56943_20121221_203039_outLine +BABEL_OP1_102_57039_20121107_201157_inLine +BABEL_OP1_102_57039_20121107_201157_outLine +BABEL_OP1_102_57422_20120607_213941_inLine +BABEL_OP1_102_57422_20120607_213941_outLine +BABEL_OP1_102_57629_20121010_011015_inLine +BABEL_OP1_102_57629_20121010_011015_outLine +BABEL_OP1_102_57907_20121013_035627_inLine +BABEL_OP1_102_57907_20121013_035627_outLine +BABEL_OP1_102_58715_20120425_190758_inLine +BABEL_OP1_102_58715_20120425_190758_outLine +BABEL_OP1_102_58863_20120404_195038_inLine +BABEL_OP1_102_58863_20120404_195038_outLine +BABEL_OP1_102_58947_20121106_203812_inLine +BABEL_OP1_102_58947_20121106_203812_outLine +BABEL_OP1_102_58947_20121106_205338_inLine +BABEL_OP1_102_58947_20121106_205338_outLine +BABEL_OP1_102_59169_20120611_172953_inLine +BABEL_OP1_102_59169_20120611_172953_outLine +BABEL_OP1_102_59383_20121220_151350_inLine +BABEL_OP1_102_59383_20121220_151350_outLine +BABEL_OP1_102_59891_20120611_212238_inLine +BABEL_OP1_102_59891_20120611_212238_outLine +BABEL_OP1_102_59925_20121111_214225_inLine +BABEL_OP1_102_59925_20121111_214225_outLine +BABEL_OP1_102_60193_20120419_201756_inLine +BABEL_OP1_102_60193_20120419_201756_outLine +BABEL_OP1_102_60277_20120615_195600_inLine +BABEL_OP1_102_60277_20120615_195600_outLine +BABEL_OP1_102_60826_20120606_231535_inLine +BABEL_OP1_102_60826_20120606_231535_outLine +BABEL_OP1_102_60848_20121110_170724_inLine +BABEL_OP1_102_60848_20121110_170724_outLine +BABEL_OP1_102_60881_20120401_212818_inLine +BABEL_OP1_102_60881_20120401_212818_outLine +BABEL_OP1_102_60995_20121107_203546_inLine +BABEL_OP1_102_60995_20121107_203546_outLine +BABEL_OP1_102_61263_20121112_213923_inLine +BABEL_OP1_102_61263_20121112_213923_outLine +BABEL_OP1_102_61446_20120420_184155_inLine +BABEL_OP1_102_61446_20120420_184155_outLine +BABEL_OP1_102_62132_20120614_214158_inLine +BABEL_OP1_102_62132_20120614_214158_outLine +BABEL_OP1_102_62923_20130122_190544_inLine +BABEL_OP1_102_62923_20130122_190544_outLine +BABEL_OP1_102_63076_20121224_225415_inLine +BABEL_OP1_102_63076_20121224_225415_outLine +BABEL_OP1_102_64185_20120722_220159_inLine +BABEL_OP1_102_64185_20120722_220159_outLine +BABEL_OP1_102_64351_20120608_202610_inLine +BABEL_OP1_102_64351_20120608_202610_outLine +BABEL_OP1_102_65248_20120317_180718_inLine +BABEL_OP1_102_65248_20120317_180718_outLine +BABEL_OP1_102_65273_20121226_233200_inLine +BABEL_OP1_102_65273_20121226_233200_outLine +BABEL_OP1_102_65371_20121228_213615_inLine +BABEL_OP1_102_65371_20121228_213615_outLine +BABEL_OP1_102_65415_20120410_193034_inLine +BABEL_OP1_102_65415_20120410_193034_outLine +BABEL_OP1_102_65580_20120320_234602_inLine +BABEL_OP1_102_65580_20120320_234602_outLine +BABEL_OP1_102_65837_20121106_201713_inLine +BABEL_OP1_102_65837_20121106_201713_outLine +BABEL_OP1_102_66416_20120817_204557_inLine +BABEL_OP1_102_66416_20120817_204557_outLine +BABEL_OP1_102_66441_20120411_170112_inLine +BABEL_OP1_102_66441_20120411_170112_outLine +BABEL_OP1_102_66559_20121227_172234_inLine +BABEL_OP1_102_66559_20121227_172234_outLine +BABEL_OP1_102_67150_20121106_232551_inLine +BABEL_OP1_102_67150_20121106_232551_outLine +BABEL_OP1_102_67733_20120409_192100_inLine +BABEL_OP1_102_67733_20120409_192100_outLine +BABEL_OP1_102_67750_20120330_210301_inLine +BABEL_OP1_102_67750_20120330_210301_outLine +BABEL_OP1_102_67798_20120408_211247_inLine +BABEL_OP1_102_67798_20120408_211247_outLine +BABEL_OP1_102_67916_20121224_185018_inLine +BABEL_OP1_102_67916_20121224_185018_outLine +BABEL_OP1_102_69049_20120422_174706_inLine +BABEL_OP1_102_69049_20120422_174706_outLine +BABEL_OP1_102_69145_20121006_214000_inLine +BABEL_OP1_102_69145_20121006_214000_outLine +BABEL_OP1_102_69275_20121009_000322_inLine +BABEL_OP1_102_69275_20121009_000322_outLine +BABEL_OP1_102_69368_20120328_214605_inLine +BABEL_OP1_102_69368_20120328_214605_outLine +BABEL_OP1_102_70555_20120421_203231_inLine +BABEL_OP1_102_70555_20120421_203231_outLine +BABEL_OP1_102_71778_20120608_222028_inLine +BABEL_OP1_102_71778_20120608_222028_outLine +BABEL_OP1_102_72032_20120329_225115_inLine +BABEL_OP1_102_72032_20120329_225115_outLine +BABEL_OP1_102_72718_20121010_030640_inLine +BABEL_OP1_102_72718_20121010_030640_outLine +BABEL_OP1_102_72799_20120428_225215_inLine +BABEL_OP1_102_72799_20120428_225215_outLine +BABEL_OP1_102_73050_20120929_012255_inLine +BABEL_OP1_102_73050_20120929_012255_outLine +BABEL_OP1_102_73438_20121103_170431_inLine +BABEL_OP1_102_73438_20121103_170431_outLine +BABEL_OP1_102_73440_20120428_195653_inLine +BABEL_OP1_102_73440_20120428_195653_outLine +BABEL_OP1_102_73452_20121003_021245_inLine +BABEL_OP1_102_73452_20121003_021245_outLine +BABEL_OP1_102_73786_20120420_171039_inLine +BABEL_OP1_102_73786_20120420_171039_outLine +BABEL_OP1_102_74043_20120422_170724_inLine +BABEL_OP1_102_74043_20120422_170724_outLine +BABEL_OP1_102_74368_20121008_041653_inLine +BABEL_OP1_102_74368_20121008_041653_outLine +BABEL_OP1_102_74709_20120806_191528_inLine +BABEL_OP1_102_74709_20120806_191528_outLine +BABEL_OP1_102_74823_20120330_181459_inLine +BABEL_OP1_102_74823_20120330_181459_outLine +BABEL_OP1_102_75140_20120330_171509_inLine +BABEL_OP1_102_75140_20120330_171509_outLine +BABEL_OP1_102_75354_20121105_033257_inLine +BABEL_OP1_102_75354_20121105_033257_outLine +BABEL_OP1_102_75498_20120806_180214_inLine +BABEL_OP1_102_75498_20120806_180214_outLine +BABEL_OP1_102_75680_20121110_180407_inLine +BABEL_OP1_102_75680_20121110_180407_outLine +BABEL_OP1_102_75990_20120426_182351_inLine +BABEL_OP1_102_75990_20120426_182351_outLine +BABEL_OP1_102_76331_20120806_185250_inLine +BABEL_OP1_102_76331_20120806_185250_outLine +BABEL_OP1_102_76451_20120329_193459_inLine +BABEL_OP1_102_76451_20120329_193459_outLine +BABEL_OP1_102_77244_20121001_003159_inLine +BABEL_OP1_102_77244_20121001_003159_outLine +BABEL_OP1_102_77465_20120607_001521_inLine +BABEL_OP1_102_77465_20120607_001521_outLine +BABEL_OP1_102_77771_20121227_191404_inLine +BABEL_OP1_102_77771_20121227_191404_outLine +BABEL_OP1_102_77811_20130123_215211_inLine +BABEL_OP1_102_77811_20130123_215211_outLine +BABEL_OP1_102_78514_20120409_182010_inLine +BABEL_OP1_102_78514_20120409_182010_outLine +BABEL_OP1_102_79495_20120320_011136_inLine +BABEL_OP1_102_79495_20120320_011136_outLine +BABEL_OP1_102_80868_20121028_015553_inLine +BABEL_OP1_102_80868_20121028_015553_outLine +BABEL_OP1_102_81084_20120406_191910_inLine +BABEL_OP1_102_81084_20120406_191910_outLine +BABEL_OP1_102_81587_20121225_213038_inLine +BABEL_OP1_102_81587_20121225_213038_outLine +BABEL_OP1_102_81717_20130209_201202_inLine +BABEL_OP1_102_81717_20130209_201202_outLine +BABEL_OP1_102_81878_20120331_181439_inLine +BABEL_OP1_102_81878_20120331_181439_outLine +BABEL_OP1_102_81878_20120331_182958_inLine +BABEL_OP1_102_81878_20120331_182958_outLine +BABEL_OP1_102_82009_20121104_013002_inLine +BABEL_OP1_102_82009_20121104_013002_outLine +BABEL_OP1_102_82023_20120330_175253_inLine +BABEL_OP1_102_82023_20120330_175253_outLine +BABEL_OP1_102_82192_20120429_180649_inLine +BABEL_OP1_102_82192_20120429_180649_outLine +BABEL_OP1_102_82408_20120402_190241_inLine +BABEL_OP1_102_82408_20120402_190241_outLine +BABEL_OP1_102_83256_20120330_210950_inLine +BABEL_OP1_102_83256_20120330_210950_outLine +BABEL_OP1_102_83493_20120429_172305_inLine +BABEL_OP1_102_83493_20120429_172305_outLine +BABEL_OP1_102_83493_20120429_175508_inLine +BABEL_OP1_102_83493_20120429_175508_outLine +BABEL_OP1_102_83531_20120408_201200_inLine +BABEL_OP1_102_83531_20120408_201200_outLine +BABEL_OP1_102_83531_20120408_203827_inLine +BABEL_OP1_102_83531_20120408_203827_outLine +BABEL_OP1_102_83634_20130123_212154_inLine +BABEL_OP1_102_83634_20130123_212154_outLine +BABEL_OP1_102_83791_20120420_215616_inLine +BABEL_OP1_102_83791_20120420_215616_outLine +BABEL_OP1_102_84088_20120328_180739_inLine +BABEL_OP1_102_84088_20120328_180739_outLine +BABEL_OP1_102_84284_20121225_175332_inLine +BABEL_OP1_102_84284_20121225_175332_outLine +BABEL_OP1_102_84397_20121110_230552_inLine +BABEL_OP1_102_84397_20121110_230552_outLine +BABEL_OP1_102_84439_20120427_184114_inLine +BABEL_OP1_102_84439_20120427_184114_outLine +BABEL_OP1_102_84608_20120609_194053_inLine +BABEL_OP1_102_84608_20120609_194053_outLine +BABEL_OP1_102_84943_20120401_170153_inLine +BABEL_OP1_102_84943_20120401_170153_outLine +BABEL_OP1_102_85716_20120401_165708_inLine +BABEL_OP1_102_85716_20120401_165708_outLine +BABEL_OP1_102_86004_20120418_230109_inLine +BABEL_OP1_102_86004_20120418_230109_outLine +BABEL_OP1_102_86886_20121112_201306_inLine +BABEL_OP1_102_86886_20121112_201306_outLine +BABEL_OP1_102_87234_20121224_212540_inLine +BABEL_OP1_102_87234_20121224_212540_outLine +BABEL_OP1_102_87857_20120405_202526_inLine +BABEL_OP1_102_87857_20120405_202526_outLine +BABEL_OP1_102_87961_20120423_155726_inLine +BABEL_OP1_102_87961_20120423_155726_outLine +BABEL_OP1_102_88163_20121112_003006_inLine +BABEL_OP1_102_88163_20121112_003006_outLine +BABEL_OP1_102_89993_20120607_175900_inLine +BABEL_OP1_102_89993_20120607_175900_outLine +BABEL_OP1_102_90393_20120419_214927_inLine +BABEL_OP1_102_90393_20120419_214927_outLine +BABEL_OP1_102_90511_20120329_224306_inLine +BABEL_OP1_102_90511_20120329_224306_outLine +BABEL_OP1_102_90810_20120404_221650_inLine +BABEL_OP1_102_90810_20120404_221650_outLine +BABEL_OP1_102_90819_20120614_222542_inLine +BABEL_OP1_102_90819_20120614_222542_outLine +BABEL_OP1_102_90890_20120320_235811_inLine +BABEL_OP1_102_90890_20120320_235811_outLine +BABEL_OP1_102_90975_20130127_194034_inLine +BABEL_OP1_102_90975_20130127_194034_outLine +BABEL_OP1_102_90975_20130127_195301_inLine +BABEL_OP1_102_90975_20130127_195301_outLine +BABEL_OP1_102_91171_20121222_000026_inLine +BABEL_OP1_102_91171_20121222_000026_outLine +BABEL_OP1_102_91358_20121103_191541_inLine +BABEL_OP1_102_91358_20121103_191541_outLine +BABEL_OP1_102_91386_20121226_175240_inLine +BABEL_OP1_102_91386_20121226_175240_outLine +BABEL_OP1_102_91401_20120409_195325_inLine +BABEL_OP1_102_91401_20120409_195325_outLine +BABEL_OP1_102_91481_20120806_232222_inLine +BABEL_OP1_102_91481_20120806_232222_outLine +BABEL_OP1_102_91865_20130127_193426_inLine +BABEL_OP1_102_91865_20130127_193426_outLine +BABEL_OP1_102_92002_20120821_172434_inLine +BABEL_OP1_102_92002_20120821_172434_outLine +BABEL_OP1_102_92252_20120805_193105_inLine +BABEL_OP1_102_92252_20120805_193105_outLine +BABEL_OP1_102_92252_20120805_202508_inLine +BABEL_OP1_102_92252_20120805_202508_outLine +BABEL_OP1_102_92321_20120729_204129_inLine +BABEL_OP1_102_92321_20120729_204129_outLine +BABEL_OP1_102_92386_20120401_175909_inLine +BABEL_OP1_102_92386_20120401_175909_outLine +BABEL_OP1_102_92407_20120330_180101_inLine +BABEL_OP1_102_92407_20120330_180101_outLine +BABEL_OP1_102_92628_20120404_212106_inLine +BABEL_OP1_102_92628_20120404_212106_outLine +BABEL_OP1_102_92800_20120408_165253_inLine +BABEL_OP1_102_92800_20120408_165253_outLine +BABEL_OP1_102_93091_20120425_204602_inLine +BABEL_OP1_102_93091_20120425_204602_outLine +BABEL_OP1_102_93091_20120425_205745_inLine +BABEL_OP1_102_93091_20120425_205745_outLine +BABEL_OP1_102_93509_20120422_230046_inLine +BABEL_OP1_102_93509_20120422_230046_outLine +BABEL_OP1_102_93607_20120806_194627_inLine +BABEL_OP1_102_93607_20120806_194627_outLine +BABEL_OP1_102_94162_20130209_213329_inLine +BABEL_OP1_102_94162_20130209_213329_outLine +BABEL_OP1_102_94542_20130122_222709_inLine +BABEL_OP1_102_94542_20130122_222709_outLine +BABEL_OP1_102_94694_20120611_183126_inLine +BABEL_OP1_102_94694_20120611_183126_outLine +BABEL_OP1_102_94696_20130127_183814_inLine +BABEL_OP1_102_94696_20130127_183814_outLine +BABEL_OP1_102_95350_20120420_225049_inLine +BABEL_OP1_102_95350_20120420_225049_outLine +BABEL_OP1_102_95566_20121222_024129_inLine +BABEL_OP1_102_95566_20121222_024129_outLine +BABEL_OP1_102_95637_20120329_225942_inLine +BABEL_OP1_102_95637_20120329_225942_outLine +BABEL_OP1_102_95650_20120327_230850_inLine +BABEL_OP1_102_95650_20120327_230850_outLine +BABEL_OP1_102_95815_20120401_233401_inLine +BABEL_OP1_102_95815_20120401_233401_outLine +BABEL_OP1_102_95849_20121106_222829_inLine +BABEL_OP1_102_95849_20121106_222829_outLine +BABEL_OP1_102_96283_20120720_021526_inLine +BABEL_OP1_102_96283_20120720_021526_outLine +BABEL_OP1_102_96347_20120330_201932_inLine +BABEL_OP1_102_96347_20120330_201932_outLine +BABEL_OP1_102_96788_20120411_183347_inLine +BABEL_OP1_102_96788_20120411_183347_outLine +BABEL_OP1_102_97260_20120409_175649_inLine +BABEL_OP1_102_97260_20120409_175649_outLine +BABEL_OP1_102_97590_20121110_214746_inLine +BABEL_OP1_102_97590_20121110_214746_outLine +BABEL_OP1_102_97590_20121110_215543_inLine +BABEL_OP1_102_97590_20121110_215543_outLine +BABEL_OP1_102_98402_20121112_014920_inLine +BABEL_OP1_102_98402_20121112_014920_outLine +BABEL_OP1_102_98675_20130209_215547_inLine +BABEL_OP1_102_98675_20130209_215547_outLine +BABEL_OP1_102_99514_20120406_182505_inLine +BABEL_OP1_102_99514_20120406_182505_outLine +BABEL_OP1_102_99731_20121220_214209_inLine +BABEL_OP1_102_99731_20121220_214209_outLine +BABEL_OP1_102_99823_20120429_181728_inLine +BABEL_OP1_102_99823_20120429_181728_outLine diff --git a/egs/babel/s5d/conf/lists/102-assamese/train.untranscribed.list b/egs/babel/s5d/conf/lists/102-assamese/train.untranscribed.list new file mode 100644 index 00000000000..f93c4c32be7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/102-assamese/train.untranscribed.list @@ -0,0 +1,259 @@ +BABEL_OP1_102_11267_20120807_194639_inLine +BABEL_OP1_102_11267_20120807_194639_outLine +BABEL_OP1_102_11311_20120420_205813_inLine +BABEL_OP1_102_11311_20120420_205813_outLine +BABEL_OP1_102_14610_20120405_182316_inLine +BABEL_OP1_102_14610_20120405_182316_outLine +BABEL_OP1_102_14936_20120408_200722_inLine +BABEL_OP1_102_14936_20120408_200722_outLine +BABEL_OP1_102_16855_20121112_222619_inLine +BABEL_OP1_102_16855_20121112_222619_outLine +BABEL_OP1_102_17080_20120328_184723_inLine +BABEL_OP1_102_17080_20120328_184723_outLine +BABEL_OP1_102_19656_20120426_205905_inLine +BABEL_OP1_102_19656_20120426_205905_outLine +BABEL_OP1_102_22973_20121228_181929_inLine +BABEL_OP1_102_22973_20121228_181929_outLine +BABEL_OP1_102_24642_20121027_144752_inLine +BABEL_OP1_102_24642_20121027_144752_outLine +BABEL_OP1_102_24799_20120425_195004_inLine +BABEL_OP1_102_24799_20120425_195004_outLine +BABEL_OP1_102_25106_20120408_181647_inLine +BABEL_OP1_102_25106_20120408_181647_outLine +BABEL_OP1_102_25992_20120611_184443_inLine +BABEL_OP1_102_25992_20120611_184443_outLine +BABEL_OP1_102_26164_20121224_194642_inLine +BABEL_OP1_102_26164_20121224_194642_outLine +BABEL_OP1_102_27605_20120420_193239_inLine +BABEL_OP1_102_27605_20120420_193239_outLine +BABEL_OP1_102_27825_20120612_214044_inLine +BABEL_OP1_102_27825_20120612_214044_outLine +BABEL_OP1_102_27825_20120612_215834_inLine +BABEL_OP1_102_27825_20120612_215834_outLine +BABEL_OP1_102_27916_20121011_020742_inLine +BABEL_OP1_102_27916_20121011_020742_outLine +BABEL_OP1_102_29302_20120411_221747_inLine +BABEL_OP1_102_29302_20120411_221747_outLine +BABEL_OP1_102_29812_20120408_222336_inLine +BABEL_OP1_102_29812_20120408_222336_outLine +BABEL_OP1_102_30227_20121105_031526_inLine +BABEL_OP1_102_30227_20121105_031526_outLine +BABEL_OP1_102_31393_20120409_185950_inLine +BABEL_OP1_102_31393_20120409_185950_outLine +BABEL_OP1_102_31538_20120411_163952_inLine +BABEL_OP1_102_31538_20120411_163952_outLine +BABEL_OP1_102_31975_20120805_174531_inLine +BABEL_OP1_102_31975_20120805_174531_outLine +BABEL_OP1_102_32045_20120408_214902_inLine +BABEL_OP1_102_32045_20120408_214902_outLine +BABEL_OP1_102_32236_20130121_194429_inLine +BABEL_OP1_102_32236_20130121_194429_outLine +BABEL_OP1_102_32263_20120805_213534_inLine +BABEL_OP1_102_32263_20120805_213534_outLine +BABEL_OP1_102_32274_20120407_220211_inLine +BABEL_OP1_102_32274_20120407_220211_outLine +BABEL_OP1_102_34558_20120401_172719_inLine +BABEL_OP1_102_34558_20120401_172719_outLine +BABEL_OP1_102_35932_20120409_181050_inLine +BABEL_OP1_102_35932_20120409_181050_outLine +BABEL_OP1_102_35972_20120804_222857_inLine +BABEL_OP1_102_35972_20120804_222857_outLine +BABEL_OP1_102_36561_20120615_182603_inLine +BABEL_OP1_102_36561_20120615_182603_outLine +BABEL_OP1_102_37094_20120327_212647_inLine +BABEL_OP1_102_37094_20120327_212647_outLine +BABEL_OP1_102_37374_20120807_002505_inLine +BABEL_OP1_102_37374_20120807_002505_outLine +BABEL_OP1_102_37374_20120807_004102_inLine +BABEL_OP1_102_37374_20120807_004102_outLine +BABEL_OP1_102_39141_20121220_172812_inLine +BABEL_OP1_102_39141_20121220_172812_outLine +BABEL_OP1_102_39497_20120428_183546_inLine +BABEL_OP1_102_39497_20120428_183546_outLine +BABEL_OP1_102_39774_20121224_203424_inLine +BABEL_OP1_102_39774_20121224_203424_outLine +BABEL_OP1_102_40040_20120611_202254_inLine +BABEL_OP1_102_40040_20120611_202254_outLine +BABEL_OP1_102_41512_20121224_195155_inLine +BABEL_OP1_102_41512_20121224_195155_outLine +BABEL_OP1_102_41686_20120404_200841_inLine +BABEL_OP1_102_41686_20120404_200841_outLine +BABEL_OP1_102_42420_20121225_200910_inLine +BABEL_OP1_102_42420_20121225_200910_outLine +BABEL_OP1_102_43317_20120804_190955_inLine +BABEL_OP1_102_43317_20120804_190955_outLine +BABEL_OP1_102_44038_20121110_191648_inLine +BABEL_OP1_102_44038_20121110_191648_outLine +BABEL_OP1_102_44117_20121105_205012_inLine +BABEL_OP1_102_44117_20121105_205012_outLine +BABEL_OP1_102_44500_20120609_205327_inLine +BABEL_OP1_102_44500_20120609_205327_outLine +BABEL_OP1_102_44744_20120330_204705_inLine +BABEL_OP1_102_44744_20120330_204705_outLine +BABEL_OP1_102_45145_20120417_144517_inLine +BABEL_OP1_102_45145_20120417_144517_outLine +BABEL_OP1_102_45512_20120408_174807_inLine +BABEL_OP1_102_45512_20120408_174807_outLine +BABEL_OP1_102_45655_20120405_201151_inLine +BABEL_OP1_102_45655_20120405_201151_outLine +BABEL_OP1_102_45655_20120405_205759_inLine +BABEL_OP1_102_45655_20120405_205759_outLine +BABEL_OP1_102_47037_20120805_212557_inLine +BABEL_OP1_102_47037_20120805_212557_outLine +BABEL_OP1_102_47433_20120329_001114_inLine +BABEL_OP1_102_47433_20120329_001114_outLine +BABEL_OP1_102_47733_20120607_225347_inLine +BABEL_OP1_102_47733_20120607_225347_outLine +BABEL_OP1_102_49173_20121028_022705_inLine +BABEL_OP1_102_49173_20121028_022705_outLine +BABEL_OP1_102_51448_20121111_155248_inLine +BABEL_OP1_102_51448_20121111_155248_outLine +BABEL_OP1_102_52325_20120430_191407_inLine +BABEL_OP1_102_52325_20120430_191407_outLine +BABEL_OP1_102_52515_20120910_021046_inLine +BABEL_OP1_102_52515_20120910_021046_outLine +BABEL_OP1_102_52900_20120426_225238_inLine +BABEL_OP1_102_52900_20120426_225238_outLine +BABEL_OP1_102_52900_20120426_230606_inLine +BABEL_OP1_102_52900_20120426_230606_outLine +BABEL_OP1_102_52913_20121224_231026_inLine +BABEL_OP1_102_52913_20121224_231026_outLine +BABEL_OP1_102_53278_20120607_205252_inLine +BABEL_OP1_102_53278_20120607_205252_outLine +BABEL_OP1_102_53649_20121008_013457_inLine +BABEL_OP1_102_53649_20121008_013457_outLine +BABEL_OP1_102_54818_20120407_212156_inLine +BABEL_OP1_102_54818_20120407_212156_outLine +BABEL_OP1_102_55786_20120401_224618_inLine +BABEL_OP1_102_55786_20120401_224618_outLine +BABEL_OP1_102_57277_20121227_213448_inLine +BABEL_OP1_102_57277_20121227_213448_outLine +BABEL_OP1_102_57454_20120615_183718_inLine +BABEL_OP1_102_57454_20120615_183718_outLine +BABEL_OP1_102_58536_20120426_204822_inLine +BABEL_OP1_102_60064_20121006_215918_inLine +BABEL_OP1_102_60064_20121006_215918_outLine +BABEL_OP1_102_61351_20121220_161410_inLine +BABEL_OP1_102_61351_20121220_161410_outLine +BABEL_OP1_102_62163_20121011_012642_inLine +BABEL_OP1_102_62163_20121011_012642_outLine +BABEL_OP1_102_62277_20120722_203834_inLine +BABEL_OP1_102_62277_20120722_203834_outLine +BABEL_OP1_102_63233_20120405_184701_inLine +BABEL_OP1_102_63233_20120405_184701_outLine +BABEL_OP1_102_63339_20120328_190947_inLine +BABEL_OP1_102_63339_20120328_190947_outLine +BABEL_OP1_102_63353_20120409_193206_inLine +BABEL_OP1_102_63353_20120409_193206_outLine +BABEL_OP1_102_63353_20120409_194011_inLine +BABEL_OP1_102_63353_20120409_194011_outLine +BABEL_OP1_102_64372_20120406_183945_inLine +BABEL_OP1_102_64372_20120406_183945_outLine +BABEL_OP1_102_65989_20120607_000921_inLine +BABEL_OP1_102_65989_20120607_000921_outLine +BABEL_OP1_102_66275_20120719_004257_inLine +BABEL_OP1_102_66275_20120719_004257_outLine +BABEL_OP1_102_66883_20120428_204106_inLine +BABEL_OP1_102_66883_20120428_204106_outLine +BABEL_OP1_102_67304_20120806_203538_inLine +BABEL_OP1_102_67304_20120806_203538_outLine +BABEL_OP1_102_68191_20120606_224106_inLine +BABEL_OP1_102_68191_20120606_224106_outLine +BABEL_OP1_102_68337_20120420_004336_inLine +BABEL_OP1_102_68337_20120420_004336_outLine +BABEL_OP1_102_68671_20121014_155929_inLine +BABEL_OP1_102_68671_20121014_155929_outLine +BABEL_OP1_102_69548_20120330_180855_inLine +BABEL_OP1_102_69548_20120330_180855_outLine +BABEL_OP1_102_72907_20121219_204634_inLine +BABEL_OP1_102_72907_20121219_204634_outLine +BABEL_OP1_102_74295_20120618_234350_inLine +BABEL_OP1_102_74295_20120618_234350_outLine +BABEL_OP1_102_74625_20121010_165038_inLine +BABEL_OP1_102_74625_20121010_165038_outLine +BABEL_OP1_102_75151_20121017_164432_inLine +BABEL_OP1_102_75151_20121017_164432_outLine +BABEL_OP1_102_75151_20121017_164918_inLine +BABEL_OP1_102_75151_20121017_164918_outLine +BABEL_OP1_102_75248_20121106_201226_inLine +BABEL_OP1_102_75248_20121106_201226_outLine +BABEL_OP1_102_75333_20130121_191749_inLine +BABEL_OP1_102_75333_20130121_191749_outLine +BABEL_OP1_102_75871_20120910_013715_inLine +BABEL_OP1_102_75871_20120910_013715_outLine +BABEL_OP1_102_77238_20120331_175602_inLine +BABEL_OP1_102_77238_20120331_175602_outLine +BABEL_OP1_102_77238_20120331_181840_inLine +BABEL_OP1_102_77238_20120331_181840_outLine +BABEL_OP1_102_77697_20130128_202557_inLine +BABEL_OP1_102_77697_20130128_202557_outLine +BABEL_OP1_102_77697_20130128_203734_inLine +BABEL_OP1_102_77697_20130128_203734_outLine +BABEL_OP1_102_78290_20121010_135127_inLine +BABEL_OP1_102_78290_20121010_135127_outLine +BABEL_OP1_102_78681_20121112_013035_inLine +BABEL_OP1_102_78681_20121112_013035_outLine +BABEL_OP1_102_79293_20120404_182947_inLine +BABEL_OP1_102_79293_20120404_182947_outLine +BABEL_OP1_102_80075_20120617_182928_inLine +BABEL_OP1_102_80075_20120617_182928_outLine +BABEL_OP1_102_80247_20130121_182518_inLine +BABEL_OP1_102_80247_20130121_182518_outLine +BABEL_OP1_102_81053_20130127_205227_inLine +BABEL_OP1_102_81053_20130127_205227_outLine +BABEL_OP1_102_81119_20130209_215021_inLine +BABEL_OP1_102_81119_20130209_215021_outLine +BABEL_OP1_102_81642_20120608_184707_inLine +BABEL_OP1_102_81642_20120608_184707_outLine +BABEL_OP1_102_81647_20121010_143838_inLine +BABEL_OP1_102_81647_20121010_143838_outLine +BABEL_OP1_102_81820_20130121_175432_inLine +BABEL_OP1_102_81820_20130121_175432_outLine +BABEL_OP1_102_82881_20120330_215822_inLine +BABEL_OP1_102_82881_20120330_215822_outLine +BABEL_OP1_102_83186_20120817_222832_inLine +BABEL_OP1_102_83186_20120817_222832_outLine +BABEL_OP1_102_83219_20121112_012249_inLine +BABEL_OP1_102_83219_20121112_012249_outLine +BABEL_OP1_102_83702_20120419_173053_inLine +BABEL_OP1_102_83702_20120419_173053_outLine +BABEL_OP1_102_84491_20121026_003510_inLine +BABEL_OP1_102_84491_20121026_003510_outLine +BABEL_OP1_102_86998_20121110_171744_inLine +BABEL_OP1_102_86998_20121110_171744_outLine +BABEL_OP1_102_87077_20120429_190133_inLine +BABEL_OP1_102_87077_20120429_190133_outLine +BABEL_OP1_102_87634_20120327_210105_inLine +BABEL_OP1_102_87634_20120327_210105_outLine +BABEL_OP1_102_88294_20120331_223132_inLine +BABEL_OP1_102_88383_20120409_194253_inLine +BABEL_OP1_102_88383_20120409_194253_outLine +BABEL_OP1_102_88532_20120805_223539_inLine +BABEL_OP1_102_88532_20120805_223539_outLine +BABEL_OP1_102_88982_20120607_221313_inLine +BABEL_OP1_102_88982_20120607_221313_outLine +BABEL_OP1_102_89345_20120331_184511_inLine +BABEL_OP1_102_89345_20120331_184511_outLine +BABEL_OP1_102_89345_20120331_190311_inLine +BABEL_OP1_102_89345_20120331_190311_outLine +BABEL_OP1_102_90024_20121106_025738_inLine +BABEL_OP1_102_90024_20121106_025738_outLine +BABEL_OP1_102_90490_20120318_194705_inLine +BABEL_OP1_102_90490_20120318_194705_outLine +BABEL_OP1_102_90730_20120420_175543_inLine +BABEL_OP1_102_90951_20120929_024352_inLine +BABEL_OP1_102_90951_20120929_024352_outLine +BABEL_OP1_102_91409_20120425_213805_inLine +BABEL_OP1_102_91409_20120425_213805_outLine +BABEL_OP1_102_92642_20120329_225854_inLine +BABEL_OP1_102_92642_20120329_225854_outLine +BABEL_OP1_102_92735_20120425_232435_inLine +BABEL_OP1_102_92735_20120425_232435_outLine +BABEL_OP1_102_92820_20121104_193517_inLine +BABEL_OP1_102_92820_20121104_193517_outLine +BABEL_OP1_102_94218_20121112_171836_inLine +BABEL_OP1_102_94218_20121112_171836_outLine +BABEL_OP1_102_97052_20121013_023448_inLine +BABEL_OP1_102_97052_20121013_023448_outLine +BABEL_OP1_102_99694_20120401_230049_inLine +BABEL_OP1_102_99694_20120401_230049_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/dev.list b/egs/babel/s5d/conf/lists/103-bengali/dev.list new file mode 100644 index 00000000000..4dd26d694d3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/dev.list @@ -0,0 +1,125 @@ +BABEL_OP1_103_10569_20111221_201913_inLine +BABEL_OP1_103_10569_20111221_201913_outLine +BABEL_OP1_103_10576_20111221_214850_inLine +BABEL_OP1_103_10576_20111221_214850_outLine +BABEL_OP1_103_11153_20120204_001459_inLine +BABEL_OP1_103_11153_20120204_001459_outLine +BABEL_OP1_103_12600_20120127_235915_inLine +BABEL_OP1_103_12600_20120127_235915_outLine +BABEL_OP1_103_13990_20120121_225453_inLine +BABEL_OP1_103_13990_20120121_225453_outLine +BABEL_OP1_103_14002_20120116_220151_inLine +BABEL_OP1_103_14002_20120116_220151_outLine +BABEL_OP1_103_14852_20120203_024637_inLine +BABEL_OP1_103_14852_20120203_024637_outLine +BABEL_OP1_103_17081_20120608_004038_inLine +BABEL_OP1_103_17081_20120608_004038_outLine +BABEL_OP1_103_21203_20120523_225358_inLine +BABEL_OP1_103_21203_20120523_225358_outLine +BABEL_OP1_103_22340_20120513_220417_inLine +BABEL_OP1_103_22340_20120513_220417_outLine +BABEL_OP1_103_24503_20120127_182430_inLine +BABEL_OP1_103_24503_20120127_182430_outLine +BABEL_OP1_103_24810_20120114_225518_inLine +BABEL_OP1_103_24810_20120114_225518_outLine +BABEL_OP1_103_25067_20120129_230104_inLine +BABEL_OP1_103_25067_20120129_230104_outLine +BABEL_OP1_103_27912_20120123_185402_inLine +BABEL_OP1_103_27912_20120123_185402_outLine +BABEL_OP1_103_31084_20120729_201226_inLine +BABEL_OP1_103_31084_20120729_201226_outLine +BABEL_OP1_103_37083_20120125_224559_inLine +BABEL_OP1_103_38382_20120110_013824_inLine +BABEL_OP1_103_38382_20120110_013824_outLine +BABEL_OP1_103_40114_20120122_183602_inLine +BABEL_OP1_103_40114_20120122_183602_outLine +BABEL_OP1_103_41417_20120122_224848_inLine +BABEL_OP1_103_41417_20120122_224848_outLine +BABEL_OP1_103_42929_20120118_211148_inLine +BABEL_OP1_103_42929_20120118_211148_outLine +BABEL_OP1_103_42929_20120118_212321_inLine +BABEL_OP1_103_42929_20120118_212321_outLine +BABEL_OP1_103_43051_20120524_163506_inLine +BABEL_OP1_103_43051_20120524_163506_outLine +BABEL_OP1_103_44799_20120131_222925_inLine +BABEL_OP1_103_44799_20120131_222925_outLine +BABEL_OP1_103_48834_20111227_010514_inLine +BABEL_OP1_103_48834_20111227_010514_outLine +BABEL_OP1_103_49329_20120131_223617_inLine +BABEL_OP1_103_49329_20120131_223617_outLine +BABEL_OP1_103_50583_20120114_233345_inLine +BABEL_OP1_103_50583_20120114_233345_outLine +BABEL_OP1_103_50614_20120130_225030_inLine +BABEL_OP1_103_50614_20120130_225030_outLine +BABEL_OP1_103_50910_20120122_001708_inLine +BABEL_OP1_103_50910_20120122_001708_outLine +BABEL_OP1_103_52067_20120127_020600_inLine +BABEL_OP1_103_52845_20120126_200807_inLine +BABEL_OP1_103_52845_20120126_200807_outLine +BABEL_OP1_103_53805_20120126_211949_inLine +BABEL_OP1_103_53805_20120126_211949_outLine +BABEL_OP1_103_57087_20120204_181410_inLine +BABEL_OP1_103_57087_20120204_181410_outLine +BABEL_OP1_103_57721_20120531_194610_inLine +BABEL_OP1_103_57721_20120531_194610_outLine +BABEL_OP1_103_57742_20120125_200619_inLine +BABEL_OP1_103_57742_20120125_200619_outLine +BABEL_OP1_103_60462_20120521_181224_inLine +BABEL_OP1_103_60462_20120521_181224_outLine +BABEL_OP1_103_62038_20111230_004215_inLine +BABEL_OP1_103_62038_20111230_004215_outLine +BABEL_OP1_103_62169_20120304_153842_inLine +BABEL_OP1_103_62169_20120304_153842_outLine +BABEL_OP1_103_62584_20120305_152943_inLine +BABEL_OP1_103_62584_20120305_152943_outLine +BABEL_OP1_103_62837_20120307_223844_inLine +BABEL_OP1_103_62837_20120307_223844_outLine +BABEL_OP1_103_62837_20120307_225550_inLine +BABEL_OP1_103_62837_20120307_225550_outLine +BABEL_OP1_103_63220_20120514_232049_inLine +BABEL_OP1_103_63220_20120514_232049_outLine +BABEL_OP1_103_63444_20120316_030633_inLine +BABEL_OP1_103_64297_20120514_162741_inLine +BABEL_OP1_103_64297_20120514_162741_outLine +BABEL_OP1_103_64853_20120405_163727_inLine +BABEL_OP1_103_64853_20120405_163727_outLine +BABEL_OP1_103_65597_20120530_213140_inLine +BABEL_OP1_103_65597_20120530_213140_outLine +BABEL_OP1_103_65895_20120229_202918_inLine +BABEL_OP1_103_65895_20120229_202918_outLine +BABEL_OP1_103_66313_20120229_230907_inLine +BABEL_OP1_103_66313_20120229_230907_outLine +BABEL_OP1_103_66351_20120111_041605_inLine +BABEL_OP1_103_66351_20120111_041605_outLine +BABEL_OP1_103_66757_20120131_215301_inLine +BABEL_OP1_103_66757_20120131_215301_outLine +BABEL_OP1_103_67421_20120310_230757_inLine +BABEL_OP1_103_67421_20120310_230757_outLine +BABEL_OP1_103_69894_20120307_152955_inLine +BABEL_OP1_103_69894_20120307_152955_outLine +BABEL_OP1_103_76654_20120519_203100_inLine +BABEL_OP1_103_76654_20120519_203100_outLine +BABEL_OP1_103_77082_20120203_232638_inLine +BABEL_OP1_103_77082_20120203_232638_outLine +BABEL_OP1_103_80105_20120530_211541_inLine +BABEL_OP1_103_80105_20120530_211541_outLine +BABEL_OP1_103_80875_20120522_224314_inLine +BABEL_OP1_103_80875_20120522_224314_outLine +BABEL_OP1_103_81318_20120104_020938_inLine +BABEL_OP1_103_81318_20120104_020938_outLine +BABEL_OP1_103_81773_20120101_024120_inLine +BABEL_OP1_103_81773_20120101_024120_outLine +BABEL_OP1_103_82526_20120118_185334_inLine +BABEL_OP1_103_82526_20120118_185334_outLine +BABEL_OP1_103_86207_20120127_145936_inLine +BABEL_OP1_103_86207_20120127_145936_outLine +BABEL_OP1_103_88690_20120201_005057_inLine +BABEL_OP1_103_88690_20120201_005057_outLine +BABEL_OP1_103_91202_20111229_185342_inLine +BABEL_OP1_103_91202_20111229_185342_outLine +BABEL_OP1_103_91275_20120529_195749_inLine +BABEL_OP1_103_91275_20120529_195749_outLine +BABEL_OP1_103_93273_20120123_022109_inLine +BABEL_OP1_103_93273_20120123_022109_outLine +BABEL_OP1_103_95826_20120201_001701_inLine +BABEL_OP1_103_95826_20120201_001701_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/eval.list b/egs/babel/s5d/conf/lists/103-bengali/eval.list new file mode 100644 index 00000000000..03220030e17 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/eval.list @@ -0,0 +1,193 @@ +BABEL_OP1_103_10490_20111220_235407_inLine +BABEL_OP1_103_10490_20111220_235407_outLine +BABEL_OP1_103_11146_20120528_182053_inLine +BABEL_OP1_103_11146_20120528_182053_outLine +BABEL_OP1_103_11168_20111228_213615_inLine +BABEL_OP1_103_11168_20111228_213615_outLine +BABEL_OP1_103_11388_20120520_161554_inLine +BABEL_OP1_103_11511_20120526_232041_inLine +BABEL_OP1_103_11511_20120526_232041_outLine +BABEL_OP1_103_12959_20120127_201055_inLine +BABEL_OP1_103_12959_20120127_201055_outLine +BABEL_OP1_103_14503_20120117_213020_inLine +BABEL_OP1_103_14503_20120117_213020_outLine +BABEL_OP1_103_14713_20120123_195706_inLine +BABEL_OP1_103_14713_20120123_195706_outLine +BABEL_OP1_103_16352_20120201_160631_inLine +BABEL_OP1_103_16352_20120201_160631_outLine +BABEL_OP1_103_17749_20120115_221220_inLine +BABEL_OP1_103_17749_20120115_221220_outLine +BABEL_OP1_103_23322_20120519_165208_inLine +BABEL_OP1_103_23322_20120519_165208_outLine +BABEL_OP1_103_24427_20120513_210712_inLine +BABEL_OP1_103_24427_20120513_210712_outLine +BABEL_OP1_103_25147_20120201_164613_inLine +BABEL_OP1_103_25147_20120201_164614_outLine +BABEL_OP1_103_25525_20120114_010656_inLine +BABEL_OP1_103_25525_20120114_010656_outLine +BABEL_OP1_103_27267_20120101_213815_inLine +BABEL_OP1_103_27267_20120101_213815_outLine +BABEL_OP1_103_28046_20120407_154949_inLine +BABEL_OP1_103_28046_20120407_154949_outLine +BABEL_OP1_103_28516_20120421_231427_inLine +BABEL_OP1_103_28516_20120421_231428_outLine +BABEL_OP1_103_28533_20120421_224406_inLine +BABEL_OP1_103_28533_20120421_224406_outLine +BABEL_OP1_103_28534_20120421_222000_inLine +BABEL_OP1_103_28561_20120421_215523_inLine +BABEL_OP1_103_28561_20120421_215523_outLine +BABEL_OP1_103_28607_20120421_213119_inLine +BABEL_OP1_103_28607_20120421_213119_outLine +BABEL_OP1_103_28834_20120421_205128_inLine +BABEL_OP1_103_28834_20120421_205941_inLine +BABEL_OP1_103_28922_20120421_202038_inLine +BABEL_OP1_103_28922_20120421_202038_outLine +BABEL_OP1_103_29061_20120421_195632_inLine +BABEL_OP1_103_29061_20120421_195632_outLine +BABEL_OP1_103_29397_20120421_192844_inLine +BABEL_OP1_103_29397_20120421_192844_outLine +BABEL_OP1_103_29411_20120421_190505_inLine +BABEL_OP1_103_29411_20120421_190505_outLine +BABEL_OP1_103_29471_20120421_183732_inLine +BABEL_OP1_103_29471_20120421_183732_outLine +BABEL_OP1_103_29489_20120421_181719_inLine +BABEL_OP1_103_29489_20120421_181719_outLine +BABEL_OP1_103_29513_20120421_170000_inLine +BABEL_OP1_103_29513_20120421_170000_outLine +BABEL_OP1_103_30747_20120111_231823_inLine +BABEL_OP1_103_30747_20120111_231823_outLine +BABEL_OP1_103_30848_20120102_001515_inLine +BABEL_OP1_103_30848_20120102_001515_outLine +BABEL_OP1_103_32703_20120128_203538_inLine +BABEL_OP1_103_32703_20120128_203538_outLine +BABEL_OP1_103_33590_20120122_165207_inLine +BABEL_OP1_103_33590_20120122_165207_outLine +BABEL_OP1_103_33590_20120122_170610_inLine +BABEL_OP1_103_33590_20120122_170610_outLine +BABEL_OP1_103_33809_20120122_184348_inLine +BABEL_OP1_103_33809_20120122_184349_outLine +BABEL_OP1_103_34102_20120528_233758_inLine +BABEL_OP1_103_34102_20120528_233758_outLine +BABEL_OP1_103_35052_20120118_164925_inLine +BABEL_OP1_103_35052_20120118_164925_outLine +BABEL_OP1_103_35052_20120118_171428_inLine +BABEL_OP1_103_35052_20120118_171428_outLine +BABEL_OP1_103_36842_20120120_013653_inLine +BABEL_OP1_103_36842_20120120_013653_outLine +BABEL_OP1_103_37798_20120121_014828_inLine +BABEL_OP1_103_37798_20120121_014828_outLine +BABEL_OP1_103_40701_20120523_230827_inLine +BABEL_OP1_103_40701_20120523_230827_outLine +BABEL_OP1_103_40701_20120523_232042_inLine +BABEL_OP1_103_40701_20120523_232042_outLine +BABEL_OP1_103_41871_20120127_015943_inLine +BABEL_OP1_103_41871_20120127_015943_outLine +BABEL_OP1_103_43725_20120518_195136_inLine +BABEL_OP1_103_43725_20120518_195136_outLine +BABEL_OP1_103_44141_20120520_005301_inLine +BABEL_OP1_103_44141_20120520_005301_outLine +BABEL_OP1_103_44515_20120104_001740_inLine +BABEL_OP1_103_44515_20120104_001740_outLine +BABEL_OP1_103_44515_20120104_002748_inLine +BABEL_OP1_103_44515_20120104_002749_outLine +BABEL_OP1_103_46776_20120520_000315_inLine +BABEL_OP1_103_46776_20120520_000315_outLine +BABEL_OP1_103_47313_20120110_161032_inLine +BABEL_OP1_103_47313_20120110_161032_outLine +BABEL_OP1_103_50697_20120523_192842_inLine +BABEL_OP1_103_50697_20120523_192842_outLine +BABEL_OP1_103_51047_20120129_041648_inLine +BABEL_OP1_103_51047_20120129_041648_outLine +BABEL_OP1_103_51079_20120125_205839_inLine +BABEL_OP1_103_51079_20120125_205839_outLine +BABEL_OP1_103_51791_20120207_192918_inLine +BABEL_OP1_103_51791_20120207_192918_outLine +BABEL_OP1_103_52306_20120204_205158_inLine +BABEL_OP1_103_52306_20120204_205158_outLine +BABEL_OP1_103_52570_20120202_202812_inLine +BABEL_OP1_103_52570_20120202_202812_outLine +BABEL_OP1_103_53334_20120115_004411_inLine +BABEL_OP1_103_53334_20120115_004411_outLine +BABEL_OP1_103_54178_20120205_163228_inLine +BABEL_OP1_103_54178_20120205_163228_outLine +BABEL_OP1_103_54673_20120203_032314_inLine +BABEL_OP1_103_54673_20120203_032314_outLine +BABEL_OP1_103_56452_20120131_183725_inLine +BABEL_OP1_103_56452_20120131_183725_outLine +BABEL_OP1_103_56452_20120131_185001_inLine +BABEL_OP1_103_56452_20120131_185001_outLine +BABEL_OP1_103_56945_20120125_234057_inLine +BABEL_OP1_103_56945_20120125_234057_outLine +BABEL_OP1_103_57320_20120519_014148_inLine +BABEL_OP1_103_57320_20120519_014148_outLine +BABEL_OP1_103_57618_20120206_004508_inLine +BABEL_OP1_103_57618_20120206_004508_outLine +BABEL_OP1_103_58807_20120106_230153_inLine +BABEL_OP1_103_58807_20120106_230153_outLine +BABEL_OP1_103_59399_20120123_013608_inLine +BABEL_OP1_103_59399_20120123_013608_outLine +BABEL_OP1_103_61606_20120524_001028_inLine +BABEL_OP1_103_61735_20120102_220532_inLine +BABEL_OP1_103_61735_20120102_220532_outLine +BABEL_OP1_103_62671_20120521_174222_inLine +BABEL_OP1_103_62671_20120521_174222_outLine +BABEL_OP1_103_62941_20120311_004945_inLine +BABEL_OP1_103_62941_20120311_004945_outLine +BABEL_OP1_103_63204_20120312_013958_inLine +BABEL_OP1_103_63204_20120312_013958_outLine +BABEL_OP1_103_63327_20120312_024230_inLine +BABEL_OP1_103_63327_20120312_024230_outLine +BABEL_OP1_103_63439_20120315_041347_inLine +BABEL_OP1_103_63439_20120315_041347_outLine +BABEL_OP1_103_63548_20120319_031651_inLine +BABEL_OP1_103_63548_20120319_031651_outLine +BABEL_OP1_103_63575_20120319_044400_inLine +BABEL_OP1_103_63575_20120319_044400_outLine +BABEL_OP1_103_65788_20120524_153801_inLine +BABEL_OP1_103_65788_20120524_153801_outLine +BABEL_OP1_103_66784_20120111_032559_inLine +BABEL_OP1_103_66784_20120111_032559_outLine +BABEL_OP1_103_66825_20120305_214401_inLine +BABEL_OP1_103_66825_20120305_214401_outLine +BABEL_OP1_103_67716_20120106_145810_inLine +BABEL_OP1_103_67716_20120106_145810_outLine +BABEL_OP1_103_67721_20111229_210017_inLine +BABEL_OP1_103_67721_20111229_210017_outLine +BABEL_OP1_103_68063_20120601_155054_inLine +BABEL_OP1_103_68063_20120601_155054_outLine +BABEL_OP1_103_70108_20120516_193813_inLine +BABEL_OP1_103_70108_20120516_193813_outLine +BABEL_OP1_103_70466_20120526_205046_inLine +BABEL_OP1_103_70466_20120526_205046_outLine +BABEL_OP1_103_72693_20120522_233148_inLine +BABEL_OP1_103_72693_20120522_233148_outLine +BABEL_OP1_103_73171_20120511_003731_inLine +BABEL_OP1_103_73171_20120511_003731_outLine +BABEL_OP1_103_78737_20120107_144050_inLine +BABEL_OP1_103_78737_20120107_144050_outLine +BABEL_OP1_103_80424_20120523_223457_inLine +BABEL_OP1_103_80424_20120523_223457_outLine +BABEL_OP1_103_83137_20120101_220939_inLine +BABEL_OP1_103_83137_20120101_220939_outLine +BABEL_OP1_103_83733_20120114_230510_inLine +BABEL_OP1_103_83733_20120114_230510_outLine +BABEL_OP1_103_88434_20120616_183901_inLine +BABEL_OP1_103_88434_20120616_183901_outLine +BABEL_OP1_103_90432_20111231_212535_inLine +BABEL_OP1_103_90432_20111231_212535_outLine +BABEL_OP1_103_91407_20120204_221709_inLine +BABEL_OP1_103_91407_20120204_221709_outLine +BABEL_OP1_103_92880_20120522_232802_inLine +BABEL_OP1_103_92880_20120522_232802_outLine +BABEL_OP1_103_93227_20120116_190634_inLine +BABEL_OP1_103_93227_20120116_190634_outLine +BABEL_OP1_103_93748_20120114_210648_inLine +BABEL_OP1_103_93748_20120114_210648_outLine +BABEL_OP1_103_96956_20120519_002918_inLine +BABEL_OP1_103_96956_20120519_002918_outLine +BABEL_OP1_103_97738_20120521_183220_inLine +BABEL_OP1_103_97738_20120521_183220_outLine +BABEL_OP1_103_99354_20120206_194646_inLine +BABEL_OP1_103_99354_20120206_194646_outLine +BABEL_OP1_103_99354_20120206_195707_inLine +BABEL_OP1_103_99354_20120206_195707_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/evalpart1.list b/egs/babel/s5d/conf/lists/103-bengali/evalpart1.list new file mode 100644 index 00000000000..1c606caf3b3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/evalpart1.list @@ -0,0 +1,66 @@ +BABEL_OP1_103_11146_20120528_182053_inLine +BABEL_OP1_103_11146_20120528_182053_outLine +BABEL_OP1_103_11168_20111228_213615_inLine +BABEL_OP1_103_11168_20111228_213615_outLine +BABEL_OP1_103_16352_20120201_160631_inLine +BABEL_OP1_103_16352_20120201_160631_outLine +BABEL_OP1_103_17749_20120115_221220_inLine +BABEL_OP1_103_17749_20120115_221220_outLine +BABEL_OP1_103_24427_20120513_210712_inLine +BABEL_OP1_103_24427_20120513_210712_outLine +BABEL_OP1_103_25147_20120201_164613_inLine +BABEL_OP1_103_25147_20120201_164614_outLine +BABEL_OP1_103_28046_20120407_154949_inLine +BABEL_OP1_103_28046_20120407_154949_outLine +BABEL_OP1_103_30747_20120111_231823_inLine +BABEL_OP1_103_30747_20120111_231823_outLine +BABEL_OP1_103_33809_20120122_184348_inLine +BABEL_OP1_103_33809_20120122_184349_outLine +BABEL_OP1_103_35052_20120118_164925_inLine +BABEL_OP1_103_35052_20120118_164925_outLine +BABEL_OP1_103_35052_20120118_171428_inLine +BABEL_OP1_103_35052_20120118_171428_outLine +BABEL_OP1_103_37798_20120121_014828_inLine +BABEL_OP1_103_37798_20120121_014828_outLine +BABEL_OP1_103_41871_20120127_015943_inLine +BABEL_OP1_103_41871_20120127_015943_outLine +BABEL_OP1_103_51079_20120125_205839_inLine +BABEL_OP1_103_51079_20120125_205839_outLine +BABEL_OP1_103_51791_20120207_192918_inLine +BABEL_OP1_103_51791_20120207_192918_outLine +BABEL_OP1_103_52306_20120204_205158_inLine +BABEL_OP1_103_52306_20120204_205158_outLine +BABEL_OP1_103_56452_20120131_183725_inLine +BABEL_OP1_103_56452_20120131_183725_outLine +BABEL_OP1_103_56452_20120131_185001_inLine +BABEL_OP1_103_56452_20120131_185001_outLine +BABEL_OP1_103_58807_20120106_230153_inLine +BABEL_OP1_103_58807_20120106_230153_outLine +BABEL_OP1_103_63204_20120312_013958_inLine +BABEL_OP1_103_63204_20120312_013958_outLine +BABEL_OP1_103_63327_20120312_024230_inLine +BABEL_OP1_103_63327_20120312_024230_outLine +BABEL_OP1_103_63439_20120315_041347_inLine +BABEL_OP1_103_63439_20120315_041347_outLine +BABEL_OP1_103_63548_20120319_031651_inLine +BABEL_OP1_103_63548_20120319_031651_outLine +BABEL_OP1_103_66784_20120111_032559_inLine +BABEL_OP1_103_66784_20120111_032559_outLine +BABEL_OP1_103_68063_20120601_155054_inLine +BABEL_OP1_103_68063_20120601_155054_outLine +BABEL_OP1_103_70466_20120526_205046_inLine +BABEL_OP1_103_70466_20120526_205046_outLine +BABEL_OP1_103_73171_20120511_003731_inLine +BABEL_OP1_103_73171_20120511_003731_outLine +BABEL_OP1_103_83137_20120101_220939_inLine +BABEL_OP1_103_83137_20120101_220939_outLine +BABEL_OP1_103_83733_20120114_230510_inLine +BABEL_OP1_103_83733_20120114_230510_outLine +BABEL_OP1_103_90432_20111231_212535_inLine +BABEL_OP1_103_90432_20111231_212535_outLine +BABEL_OP1_103_92880_20120522_232802_inLine +BABEL_OP1_103_92880_20120522_232802_outLine +BABEL_OP1_103_93748_20120114_210648_inLine +BABEL_OP1_103_93748_20120114_210648_outLine +BABEL_OP1_103_97738_20120521_183220_inLine +BABEL_OP1_103_97738_20120521_183220_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.FullLP.list b/egs/babel/s5d/conf/lists/103-bengali/train.FullLP.list new file mode 100644 index 00000000000..203b313ade2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.FullLP.list @@ -0,0 +1,751 @@ +BABEL_OP1_103_10193_20111229_035249_inLine +BABEL_OP1_103_10193_20111229_035249_outLine +BABEL_OP1_103_10301_20111220_225237_inLine +BABEL_OP1_103_10301_20111220_225237_outLine +BABEL_OP1_103_10305_20111220_231100_inLine +BABEL_OP1_103_10305_20111220_231100_outLine +BABEL_OP1_103_10348_20120113_213854_inLine +BABEL_OP1_103_10348_20120113_213854_outLine +BABEL_OP1_103_10531_20120118_042000_inLine +BABEL_OP1_103_10531_20120118_042000_outLine +BABEL_OP1_103_10556_20111221_000031_inLine +BABEL_OP1_103_10556_20111221_000031_outLine +BABEL_OP1_103_10612_20111222_210911_inLine +BABEL_OP1_103_10612_20111222_210911_outLine +BABEL_OP1_103_10806_20111226_181132_inLine +BABEL_OP1_103_10806_20111226_181132_outLine +BABEL_OP1_103_11128_20120124_200626_inLine +BABEL_OP1_103_11128_20120124_200626_outLine +BABEL_OP1_103_11155_20111230_211626_inLine +BABEL_OP1_103_11155_20111230_211626_outLine +BABEL_OP1_103_11442_20120125_025606_inLine +BABEL_OP1_103_11442_20120125_025606_outLine +BABEL_OP1_103_12518_20111227_181020_inLine +BABEL_OP1_103_12518_20111227_181021_outLine +BABEL_OP1_103_12639_20111229_015021_inLine +BABEL_OP1_103_12639_20111229_015021_outLine +BABEL_OP1_103_12682_20120125_201902_inLine +BABEL_OP1_103_12682_20120125_201902_outLine +BABEL_OP1_103_12682_20120125_210238_inLine +BABEL_OP1_103_12682_20120125_210238_outLine +BABEL_OP1_103_12719_20120203_035027_inLine +BABEL_OP1_103_12719_20120203_035027_outLine +BABEL_OP1_103_12786_20111230_012748_inLine +BABEL_OP1_103_12786_20111230_012749_outLine +BABEL_OP1_103_12809_20111229_175926_inLine +BABEL_OP1_103_12809_20111229_175926_outLine +BABEL_OP1_103_12843_20120117_224043_inLine +BABEL_OP1_103_12843_20120117_224043_outLine +BABEL_OP1_103_13024_20111229_010356_inLine +BABEL_OP1_103_13024_20111229_010357_outLine +BABEL_OP1_103_13295_20120522_232550_inLine +BABEL_OP1_103_13295_20120522_232550_outLine +BABEL_OP1_103_13615_20120113_174612_inLine +BABEL_OP1_103_13615_20120113_174612_outLine +BABEL_OP1_103_13708_20120102_032700_inLine +BABEL_OP1_103_13708_20120102_032700_outLine +BABEL_OP1_103_13752_20120530_221929_inLine +BABEL_OP1_103_13752_20120530_221929_outLine +BABEL_OP1_103_14086_20120113_200751_inLine +BABEL_OP1_103_14086_20120113_200751_outLine +BABEL_OP1_103_14147_20120531_160226_inLine +BABEL_OP1_103_14147_20120531_160226_outLine +BABEL_OP1_103_14147_20120531_170020_inLine +BABEL_OP1_103_14147_20120531_170020_outLine +BABEL_OP1_103_14422_20120514_181741_inLine +BABEL_OP1_103_14422_20120514_181741_outLine +BABEL_OP1_103_14554_20120120_230548_inLine +BABEL_OP1_103_14554_20120120_230548_outLine +BABEL_OP1_103_14583_20120515_192730_inLine +BABEL_OP1_103_14583_20120515_192730_outLine +BABEL_OP1_103_14942_20120101_203529_inLine +BABEL_OP1_103_14942_20120101_203529_outLine +BABEL_OP1_103_15304_20120106_035227_inLine +BABEL_OP1_103_15304_20120106_035227_outLine +BABEL_OP1_103_15600_20111231_181856_inLine +BABEL_OP1_103_15600_20111231_181856_outLine +BABEL_OP1_103_15665_20120517_162750_inLine +BABEL_OP1_103_15665_20120517_162750_outLine +BABEL_OP1_103_15749_20111230_015120_inLine +BABEL_OP1_103_15749_20111230_015120_outLine +BABEL_OP1_103_15803_20120528_164556_inLine +BABEL_OP1_103_15803_20120528_164556_outLine +BABEL_OP1_103_16210_20120118_201234_inLine +BABEL_OP1_103_16210_20120118_201234_outLine +BABEL_OP1_103_16393_20111230_012139_inLine +BABEL_OP1_103_16393_20111230_012139_outLine +BABEL_OP1_103_16416_20120205_011943_inLine +BABEL_OP1_103_16416_20120205_011943_outLine +BABEL_OP1_103_16633_20120105_164800_inLine +BABEL_OP1_103_16633_20120105_164800_outLine +BABEL_OP1_103_16754_20120101_015558_inLine +BABEL_OP1_103_16754_20120101_015558_outLine +BABEL_OP1_103_17063_20120202_201950_inLine +BABEL_OP1_103_17063_20120202_201950_outLine +BABEL_OP1_103_17063_20120202_204211_inLine +BABEL_OP1_103_17063_20120202_204211_outLine +BABEL_OP1_103_17139_20120110_182115_inLine +BABEL_OP1_103_17139_20120110_182115_outLine +BABEL_OP1_103_17180_20120126_233802_inLine +BABEL_OP1_103_17180_20120126_233802_outLine +BABEL_OP1_103_17612_20120531_232906_inLine +BABEL_OP1_103_17876_20120203_220933_inLine +BABEL_OP1_103_17876_20120203_220933_outLine +BABEL_OP1_103_18244_20120514_000930_inLine +BABEL_OP1_103_18244_20120514_000931_outLine +BABEL_OP1_103_18556_20111231_233139_inLine +BABEL_OP1_103_18556_20111231_233139_outLine +BABEL_OP1_103_18755_20120103_012800_inLine +BABEL_OP1_103_18755_20120103_012800_outLine +BABEL_OP1_103_18861_20120612_231154_inLine +BABEL_OP1_103_18861_20120612_231154_outLine +BABEL_OP1_103_18938_20120515_163044_inLine +BABEL_OP1_103_18938_20120515_163044_outLine +BABEL_OP1_103_19118_20120206_194310_inLine +BABEL_OP1_103_19118_20120206_194310_outLine +BABEL_OP1_103_19280_20120515_173629_inLine +BABEL_OP1_103_19280_20120515_173630_outLine +BABEL_OP1_103_19324_20120114_223457_inLine +BABEL_OP1_103_19324_20120114_223457_outLine +BABEL_OP1_103_19564_20120112_151539_inLine +BABEL_OP1_103_19564_20120112_151539_outLine +BABEL_OP1_103_19697_20120107_043218_inLine +BABEL_OP1_103_19697_20120107_043218_outLine +BABEL_OP1_103_19885_20120517_200533_inLine +BABEL_OP1_103_19885_20120517_200533_outLine +BABEL_OP1_103_20481_20120118_004556_inLine +BABEL_OP1_103_20481_20120118_004556_outLine +BABEL_OP1_103_21020_20120517_182615_inLine +BABEL_OP1_103_21020_20120517_182615_outLine +BABEL_OP1_103_21076_20111231_203216_inLine +BABEL_OP1_103_21076_20111231_203216_outLine +BABEL_OP1_103_21208_20120113_192303_inLine +BABEL_OP1_103_21208_20120113_192303_outLine +BABEL_OP1_103_21417_20120115_235720_inLine +BABEL_OP1_103_21417_20120115_235720_outLine +BABEL_OP1_103_21648_20111229_024025_inLine +BABEL_OP1_103_21648_20111229_024025_outLine +BABEL_OP1_103_21928_20120205_210433_inLine +BABEL_OP1_103_21928_20120205_221157_inLine +BABEL_OP1_103_22134_20120204_185956_inLine +BABEL_OP1_103_22134_20120204_185956_outLine +BABEL_OP1_103_22134_20120204_191024_inLine +BABEL_OP1_103_22134_20120204_191024_outLine +BABEL_OP1_103_22338_20120203_214144_inLine +BABEL_OP1_103_22338_20120203_214144_outLine +BABEL_OP1_103_22528_20120204_221751_inLine +BABEL_OP1_103_22528_20120204_221751_outLine +BABEL_OP1_103_22697_20120123_213617_inLine +BABEL_OP1_103_22697_20120123_213617_outLine +BABEL_OP1_103_23237_20120201_204534_inLine +BABEL_OP1_103_23237_20120201_204534_outLine +BABEL_OP1_103_24235_20120205_171351_inLine +BABEL_OP1_103_24235_20120205_171351_outLine +BABEL_OP1_103_24340_20120526_221640_inLine +BABEL_OP1_103_24340_20120526_221640_outLine +BABEL_OP1_103_25040_20120129_215646_inLine +BABEL_OP1_103_25040_20120129_215647_outLine +BABEL_OP1_103_25489_20120107_015122_inLine +BABEL_OP1_103_25489_20120107_015122_outLine +BABEL_OP1_103_26464_20120115_153724_inLine +BABEL_OP1_103_26464_20120115_153725_outLine +BABEL_OP1_103_26603_20120519_190743_inLine +BABEL_OP1_103_26603_20120519_190743_outLine +BABEL_OP1_103_26980_20120114_151400_inLine +BABEL_OP1_103_26980_20120114_151400_outLine +BABEL_OP1_103_27159_20120109_175434_inLine +BABEL_OP1_103_27159_20120109_175434_outLine +BABEL_OP1_103_27298_20120519_164745_inLine +BABEL_OP1_103_27298_20120519_164745_outLine +BABEL_OP1_103_27374_20120608_213343_inLine +BABEL_OP1_103_27374_20120608_213344_outLine +BABEL_OP1_103_27889_20120405_184406_inLine +BABEL_OP1_103_27889_20120405_184406_outLine +BABEL_OP1_103_27895_20120407_201822_inLine +BABEL_OP1_103_27895_20120407_201822_outLine +BABEL_OP1_103_27997_20120406_024629_inLine +BABEL_OP1_103_27997_20120406_024629_outLine +BABEL_OP1_103_28150_20120421_030716_inLine +BABEL_OP1_103_28150_20120421_030716_outLine +BABEL_OP1_103_28281_20120204_215552_inLine +BABEL_OP1_103_28281_20120204_215552_outLine +BABEL_OP1_103_28325_20120421_034840_inLine +BABEL_OP1_103_28325_20120421_034840_outLine +BABEL_OP1_103_28349_20120422_230936_inLine +BABEL_OP1_103_28349_20120422_230936_outLine +BABEL_OP1_103_28452_20120423_002721_inLine +BABEL_OP1_103_28452_20120423_002721_outLine +BABEL_OP1_103_28820_20111231_235604_inLine +BABEL_OP1_103_28820_20111231_235604_outLine +BABEL_OP1_103_29083_20120524_203900_inLine +BABEL_OP1_103_29083_20120524_203900_outLine +BABEL_OP1_103_29368_20120109_152242_inLine +BABEL_OP1_103_29368_20120109_152242_outLine +BABEL_OP1_103_29757_20120607_155549_inLine +BABEL_OP1_103_29757_20120607_155549_outLine +BABEL_OP1_103_30012_20120523_210111_inLine +BABEL_OP1_103_30012_20120523_210111_outLine +BABEL_OP1_103_30031_20111231_051935_inLine +BABEL_OP1_103_30031_20111231_051935_outLine +BABEL_OP1_103_30040_20120114_164613_inLine +BABEL_OP1_103_30040_20120114_164613_outLine +BABEL_OP1_103_30243_20120115_005252_inLine +BABEL_OP1_103_30243_20120115_005252_outLine +BABEL_OP1_103_30620_20111231_181228_inLine +BABEL_OP1_103_30620_20111231_181228_outLine +BABEL_OP1_103_30711_20120612_211646_inLine +BABEL_OP1_103_30711_20120612_211646_outLine +BABEL_OP1_103_30810_20111227_000227_inLine +BABEL_OP1_103_30810_20111227_000227_outLine +BABEL_OP1_103_30847_20120108_235955_inLine +BABEL_OP1_103_30847_20120108_235955_outLine +BABEL_OP1_103_30904_20120522_013413_inLine +BABEL_OP1_103_30904_20120522_013413_outLine +BABEL_OP1_103_31201_20120523_211540_inLine +BABEL_OP1_103_31201_20120523_211540_outLine +BABEL_OP1_103_31871_20120115_205857_inLine +BABEL_OP1_103_31871_20120115_205857_outLine +BABEL_OP1_103_32040_20120122_181109_inLine +BABEL_OP1_103_32040_20120122_181109_outLine +BABEL_OP1_103_32453_20120116_174338_inLine +BABEL_OP1_103_32453_20120116_174338_outLine +BABEL_OP1_103_32722_20120115_005258_inLine +BABEL_OP1_103_32722_20120115_005258_outLine +BABEL_OP1_103_33223_20120108_225050_inLine +BABEL_OP1_103_33223_20120108_225050_outLine +BABEL_OP1_103_33316_20120528_173250_inLine +BABEL_OP1_103_33316_20120528_173250_outLine +BABEL_OP1_103_33534_20120122_020502_inLine +BABEL_OP1_103_33534_20120122_020502_outLine +BABEL_OP1_103_33551_20120122_194434_inLine +BABEL_OP1_103_33551_20120122_194434_outLine +BABEL_OP1_103_33699_20120122_173500_inLine +BABEL_OP1_103_33699_20120122_173500_outLine +BABEL_OP1_103_33807_20120122_190057_inLine +BABEL_OP1_103_33807_20120122_190057_outLine +BABEL_OP1_103_33885_20120125_172938_inLine +BABEL_OP1_103_33885_20120125_172938_outLine +BABEL_OP1_103_33991_20120117_202117_inLine +BABEL_OP1_103_33991_20120117_202118_outLine +BABEL_OP1_103_34137_20120529_224220_inLine +BABEL_OP1_103_34137_20120529_224220_outLine +BABEL_OP1_103_34332_20120204_191733_inLine +BABEL_OP1_103_34332_20120204_191733_outLine +BABEL_OP1_103_34545_20120118_173942_inLine +BABEL_OP1_103_34545_20120118_173942_outLine +BABEL_OP1_103_34564_20120530_211027_inLine +BABEL_OP1_103_34564_20120530_211027_outLine +BABEL_OP1_103_34925_20120112_154829_inLine +BABEL_OP1_103_34925_20120112_154829_outLine +BABEL_OP1_103_34994_20120115_213251_inLine +BABEL_OP1_103_34994_20120115_213251_outLine +BABEL_OP1_103_35144_20120123_230913_inLine +BABEL_OP1_103_35144_20120123_230913_outLine +BABEL_OP1_103_35152_20111230_220705_inLine +BABEL_OP1_103_35152_20111230_220705_outLine +BABEL_OP1_103_35157_20120124_010640_inLine +BABEL_OP1_103_35157_20120124_010640_outLine +BABEL_OP1_103_35444_20120612_203930_inLine +BABEL_OP1_103_35444_20120612_203930_outLine +BABEL_OP1_103_35660_20120122_013401_inLine +BABEL_OP1_103_35660_20120122_013402_outLine +BABEL_OP1_103_35750_20111230_025221_inLine +BABEL_OP1_103_35750_20111230_025221_outLine +BABEL_OP1_103_35892_20120120_205811_inLine +BABEL_OP1_103_35892_20120120_205811_outLine +BABEL_OP1_103_36584_20120201_230611_inLine +BABEL_OP1_103_36584_20120201_230611_outLine +BABEL_OP1_103_36748_20120121_230812_inLine +BABEL_OP1_103_36748_20120121_230812_outLine +BABEL_OP1_103_36962_20120810_005828_inLine +BABEL_OP1_103_36962_20120810_005828_outLine +BABEL_OP1_103_37131_20120522_165130_inLine +BABEL_OP1_103_37131_20120522_165130_outLine +BABEL_OP1_103_37551_20111229_232422_inLine +BABEL_OP1_103_37551_20111229_232422_outLine +BABEL_OP1_103_37604_20120122_203335_inLine +BABEL_OP1_103_37604_20120122_203335_outLine +BABEL_OP1_103_37687_20120124_220825_inLine +BABEL_OP1_103_37687_20120124_220826_outLine +BABEL_OP1_103_38163_20120202_001843_inLine +BABEL_OP1_103_38163_20120202_001843_outLine +BABEL_OP1_103_38573_20120120_234500_inLine +BABEL_OP1_103_38573_20120120_234500_outLine +BABEL_OP1_103_38573_20120121_000745_inLine +BABEL_OP1_103_38573_20120121_000745_outLine +BABEL_OP1_103_38588_20120522_215415_inLine +BABEL_OP1_103_38588_20120522_215415_outLine +BABEL_OP1_103_39119_20120608_004832_inLine +BABEL_OP1_103_39119_20120608_004832_outLine +BABEL_OP1_103_39320_20120207_022344_inLine +BABEL_OP1_103_39320_20120207_022344_outLine +BABEL_OP1_103_39769_20120127_213455_inLine +BABEL_OP1_103_39769_20120127_213455_outLine +BABEL_OP1_103_40410_20120124_204758_inLine +BABEL_OP1_103_40410_20120124_204758_outLine +BABEL_OP1_103_40442_20120202_174431_inLine +BABEL_OP1_103_40442_20120202_174431_outLine +BABEL_OP1_103_40889_20120206_221100_inLine +BABEL_OP1_103_40889_20120206_221100_outLine +BABEL_OP1_103_41144_20120118_222314_inLine +BABEL_OP1_103_41144_20120118_222314_outLine +BABEL_OP1_103_41172_20120114_134829_inLine +BABEL_OP1_103_41172_20120114_134829_outLine +BABEL_OP1_103_41197_20120805_155112_inLine +BABEL_OP1_103_41197_20120805_155112_outLine +BABEL_OP1_103_41498_20120118_023411_inLine +BABEL_OP1_103_41498_20120118_023411_outLine +BABEL_OP1_103_42332_20120126_191134_inLine +BABEL_OP1_103_42332_20120126_191134_outLine +BABEL_OP1_103_42332_20120126_192035_inLine +BABEL_OP1_103_42332_20120126_192035_outLine +BABEL_OP1_103_42651_20120122_000902_inLine +BABEL_OP1_103_42651_20120122_000902_outLine +BABEL_OP1_103_42698_20120123_230900_inLine +BABEL_OP1_103_42698_20120123_230900_outLine +BABEL_OP1_103_42742_20120123_232130_inLine +BABEL_OP1_103_42742_20120123_232130_outLine +BABEL_OP1_103_42790_20120129_205024_inLine +BABEL_OP1_103_42790_20120129_205025_outLine +BABEL_OP1_103_42986_20120125_204035_inLine +BABEL_OP1_103_42986_20120125_204035_outLine +BABEL_OP1_103_43442_20120120_033602_inLine +BABEL_OP1_103_43442_20120120_033602_outLine +BABEL_OP1_103_43571_20111226_210759_inLine +BABEL_OP1_103_43571_20111226_210759_outLine +BABEL_OP1_103_43812_20120124_005515_inLine +BABEL_OP1_103_43812_20120124_005515_outLine +BABEL_OP1_103_43959_20120125_223215_inLine +BABEL_OP1_103_43959_20120125_223215_outLine +BABEL_OP1_103_43974_20120110_164058_inLine +BABEL_OP1_103_43974_20120110_164058_outLine +BABEL_OP1_103_44192_20120523_184414_inLine +BABEL_OP1_103_44192_20120523_184414_outLine +BABEL_OP1_103_44838_20111229_014707_inLine +BABEL_OP1_103_44838_20111229_014707_outLine +BABEL_OP1_103_44948_20120203_011011_inLine +BABEL_OP1_103_44948_20120203_011011_outLine +BABEL_OP1_103_44967_20120207_025756_inLine +BABEL_OP1_103_44967_20120207_025756_outLine +BABEL_OP1_103_45020_20120522_170055_inLine +BABEL_OP1_103_45020_20120522_170055_outLine +BABEL_OP1_103_45029_20120608_010540_inLine +BABEL_OP1_103_45029_20120608_010540_outLine +BABEL_OP1_103_45565_20120125_220956_inLine +BABEL_OP1_103_45565_20120125_220956_outLine +BABEL_OP1_103_45601_20120201_181124_inLine +BABEL_OP1_103_45601_20120201_181124_outLine +BABEL_OP1_103_45763_20120116_175349_inLine +BABEL_OP1_103_45763_20120116_175349_outLine +BABEL_OP1_103_46197_20120524_220246_inLine +BABEL_OP1_103_46197_20120524_220246_outLine +BABEL_OP1_103_46460_20120530_183725_inLine +BABEL_OP1_103_46460_20120530_183725_outLine +BABEL_OP1_103_46460_20120530_185105_inLine +BABEL_OP1_103_46460_20120530_185106_outLine +BABEL_OP1_103_46548_20120517_192114_inLine +BABEL_OP1_103_46548_20120517_192114_outLine +BABEL_OP1_103_46862_20120124_195804_inLine +BABEL_OP1_103_46862_20120124_195804_outLine +BABEL_OP1_103_46862_20120204_203651_inLine +BABEL_OP1_103_46862_20120204_203651_outLine +BABEL_OP1_103_46887_20120202_214319_inLine +BABEL_OP1_103_46887_20120202_214320_outLine +BABEL_OP1_103_46900_20120204_225820_inLine +BABEL_OP1_103_46900_20120204_225820_outLine +BABEL_OP1_103_47151_20111229_233253_inLine +BABEL_OP1_103_47151_20111229_233253_outLine +BABEL_OP1_103_47177_20120127_201638_inLine +BABEL_OP1_103_47177_20120127_201638_outLine +BABEL_OP1_103_47416_20120729_181025_inLine +BABEL_OP1_103_47416_20120729_181025_outLine +BABEL_OP1_103_47424_20111231_203241_inLine +BABEL_OP1_103_47424_20111231_203241_outLine +BABEL_OP1_103_47574_20120207_034724_inLine +BABEL_OP1_103_47574_20120207_034724_outLine +BABEL_OP1_103_48176_20120206_023101_inLine +BABEL_OP1_103_48176_20120206_023101_outLine +BABEL_OP1_103_48259_20120116_022438_inLine +BABEL_OP1_103_48259_20120116_022438_outLine +BABEL_OP1_103_48518_20120121_195050_inLine +BABEL_OP1_103_48518_20120121_195050_outLine +BABEL_OP1_103_49175_20120206_214803_inLine +BABEL_OP1_103_49175_20120206_214803_outLine +BABEL_OP1_103_49520_20120523_172707_inLine +BABEL_OP1_103_49520_20120523_172707_outLine +BABEL_OP1_103_49629_20120104_040004_inLine +BABEL_OP1_103_49629_20120104_040004_outLine +BABEL_OP1_103_49755_20120110_010410_inLine +BABEL_OP1_103_49755_20120110_010410_outLine +BABEL_OP1_103_49819_20120127_012212_inLine +BABEL_OP1_103_49819_20120127_012212_outLine +BABEL_OP1_103_50492_20120123_211938_inLine +BABEL_OP1_103_50492_20120123_211938_outLine +BABEL_OP1_103_50523_20120607_185125_inLine +BABEL_OP1_103_50523_20120607_185126_outLine +BABEL_OP1_103_50798_20120131_022954_inLine +BABEL_OP1_103_50798_20120131_022954_outLine +BABEL_OP1_103_51243_20120201_200604_inLine +BABEL_OP1_103_51243_20120201_200604_outLine +BABEL_OP1_103_52122_20120207_025756_inLine +BABEL_OP1_103_52122_20120207_025756_outLine +BABEL_OP1_103_52604_20120131_233302_inLine +BABEL_OP1_103_52604_20120131_233302_outLine +BABEL_OP1_103_52753_20120521_000301_inLine +BABEL_OP1_103_52753_20120521_001422_inLine +BABEL_OP1_103_53067_20120127_225851_inLine +BABEL_OP1_103_53067_20120127_225851_outLine +BABEL_OP1_103_53262_20120204_194912_inLine +BABEL_OP1_103_53262_20120204_194912_outLine +BABEL_OP1_103_53346_20120128_214441_inLine +BABEL_OP1_103_53346_20120128_214441_outLine +BABEL_OP1_103_53636_20120127_000358_inLine +BABEL_OP1_103_53636_20120127_000358_outLine +BABEL_OP1_103_54030_20111230_220440_inLine +BABEL_OP1_103_54030_20111230_220440_outLine +BABEL_OP1_103_54263_20120206_225348_inLine +BABEL_OP1_103_54263_20120206_225349_outLine +BABEL_OP1_103_54417_20120522_172155_inLine +BABEL_OP1_103_54417_20120522_172155_outLine +BABEL_OP1_103_54606_20120205_175853_inLine +BABEL_OP1_103_54606_20120205_175853_outLine +BABEL_OP1_103_54975_20120207_015749_inLine +BABEL_OP1_103_54975_20120207_015749_outLine +BABEL_OP1_103_54991_20120206_003607_inLine +BABEL_OP1_103_54991_20120206_003607_outLine +BABEL_OP1_103_55166_20120119_180058_inLine +BABEL_OP1_103_55166_20120119_180058_outLine +BABEL_OP1_103_55194_20120529_215243_inLine +BABEL_OP1_103_55194_20120529_215243_outLine +BABEL_OP1_103_55316_20111226_180557_inLine +BABEL_OP1_103_55316_20111226_180557_outLine +BABEL_OP1_103_56704_20120606_171759_inLine +BABEL_OP1_103_56704_20120606_171759_outLine +BABEL_OP1_103_57092_20111227_044400_inLine +BABEL_OP1_103_57092_20111227_044400_outLine +BABEL_OP1_103_57232_20120126_020104_inLine +BABEL_OP1_103_57232_20120126_020104_outLine +BABEL_OP1_103_57351_20120612_182248_inLine +BABEL_OP1_103_57351_20120612_182248_outLine +BABEL_OP1_103_58283_20111231_230840_inLine +BABEL_OP1_103_58283_20111231_230840_outLine +BABEL_OP1_103_58925_20120113_212456_inLine +BABEL_OP1_103_58925_20120113_212456_outLine +BABEL_OP1_103_58925_20120113_214350_inLine +BABEL_OP1_103_58925_20120113_214350_outLine +BABEL_OP1_103_59482_20120612_190437_inLine +BABEL_OP1_103_59482_20120612_190437_outLine +BABEL_OP1_103_59558_20120121_234224_inLine +BABEL_OP1_103_59558_20120121_234224_outLine +BABEL_OP1_103_60524_20120109_213755_inLine +BABEL_OP1_103_60524_20120109_213755_outLine +BABEL_OP1_103_60571_20111228_183342_inLine +BABEL_OP1_103_60571_20111228_183342_outLine +BABEL_OP1_103_60806_20120117_233630_inLine +BABEL_OP1_103_60806_20120117_233630_outLine +BABEL_OP1_103_61229_20120616_151341_inLine +BABEL_OP1_103_61229_20120616_151341_outLine +BABEL_OP1_103_61558_20120106_205412_inLine +BABEL_OP1_103_61558_20120106_205412_outLine +BABEL_OP1_103_61592_20120125_225752_inLine +BABEL_OP1_103_61592_20120125_225752_outLine +BABEL_OP1_103_61629_20120127_192849_inLine +BABEL_OP1_103_61629_20120127_192849_outLine +BABEL_OP1_103_61733_20120201_183457_inLine +BABEL_OP1_103_61733_20120201_183457_outLine +BABEL_OP1_103_62097_20120307_164325_inLine +BABEL_OP1_103_62097_20120307_164325_outLine +BABEL_OP1_103_62182_20111231_003944_inLine +BABEL_OP1_103_62182_20111231_003944_outLine +BABEL_OP1_103_62222_20120122_201756_inLine +BABEL_OP1_103_62222_20120122_201756_outLine +BABEL_OP1_103_62479_20120306_025702_inLine +BABEL_OP1_103_62479_20120306_025702_outLine +BABEL_OP1_103_62558_20120124_220850_inLine +BABEL_OP1_103_62558_20120124_220850_outLine +BABEL_OP1_103_62652_20120306_015948_inLine +BABEL_OP1_103_62652_20120306_015948_outLine +BABEL_OP1_103_62720_20120308_164432_inLine +BABEL_OP1_103_62720_20120308_164432_outLine +BABEL_OP1_103_62720_20120308_165706_inLine +BABEL_OP1_103_62720_20120308_165706_outLine +BABEL_OP1_103_62843_20120310_235523_inLine +BABEL_OP1_103_62843_20120310_235523_outLine +BABEL_OP1_103_63127_20120311_184714_inLine +BABEL_OP1_103_63127_20120311_184714_outLine +BABEL_OP1_103_63129_20120311_193438_inLine +BABEL_OP1_103_63129_20120311_193438_outLine +BABEL_OP1_103_63194_20120312_010359_inLine +BABEL_OP1_103_63194_20120312_010359_outLine +BABEL_OP1_103_63215_20120513_191621_inLine +BABEL_OP1_103_63215_20120513_191621_outLine +BABEL_OP1_103_63240_20120312_021342_inLine +BABEL_OP1_103_63240_20120312_021342_outLine +BABEL_OP1_103_63373_20120315_025205_inLine +BABEL_OP1_103_63373_20120315_025205_outLine +BABEL_OP1_103_63384_20120315_031012_inLine +BABEL_OP1_103_63384_20120315_031012_outLine +BABEL_OP1_103_63422_20120315_034640_inLine +BABEL_OP1_103_63422_20120315_034640_outLine +BABEL_OP1_103_63510_20120318_221426_inLine +BABEL_OP1_103_63510_20120318_221426_outLine +BABEL_OP1_103_63680_20120319_214759_inLine +BABEL_OP1_103_63680_20120319_214759_outLine +BABEL_OP1_103_63687_20120320_181655_inLine +BABEL_OP1_103_63687_20120320_181655_outLine +BABEL_OP1_103_63923_20120320_172933_inLine +BABEL_OP1_103_63923_20120320_172933_outLine +BABEL_OP1_103_63929_20120123_192325_inLine +BABEL_OP1_103_63929_20120123_192325_outLine +BABEL_OP1_103_63950_20120320_184409_inLine +BABEL_OP1_103_63950_20120320_184409_outLine +BABEL_OP1_103_64039_20120320_215418_inLine +BABEL_OP1_103_64039_20120320_215418_outLine +BABEL_OP1_103_64145_20120404_204905_inLine +BABEL_OP1_103_64145_20120404_204905_outLine +BABEL_OP1_103_64153_20120403_180645_inLine +BABEL_OP1_103_64153_20120403_180645_outLine +BABEL_OP1_103_64177_20120404_212051_inLine +BABEL_OP1_103_64177_20120404_212051_outLine +BABEL_OP1_103_64231_20120310_224637_inLine +BABEL_OP1_103_64231_20120310_224637_outLine +BABEL_OP1_103_64610_20120125_223001_inLine +BABEL_OP1_103_64610_20120125_223001_outLine +BABEL_OP1_103_65512_20111229_045507_inLine +BABEL_OP1_103_65512_20111229_045507_outLine +BABEL_OP1_103_65818_20120127_011907_inLine +BABEL_OP1_103_65818_20120127_011907_outLine +BABEL_OP1_103_65954_20120205_190321_inLine +BABEL_OP1_103_65954_20120205_190321_outLine +BABEL_OP1_103_65991_20120229_215906_inLine +BABEL_OP1_103_65991_20120229_215906_outLine +BABEL_OP1_103_66005_20120229_221845_inLine +BABEL_OP1_103_66005_20120229_221845_outLine +BABEL_OP1_103_66048_20120229_225251_inLine +BABEL_OP1_103_66048_20120229_225251_outLine +BABEL_OP1_103_66287_20120108_191621_inLine +BABEL_OP1_103_66287_20120108_191621_outLine +BABEL_OP1_103_66309_20120229_232503_inLine +BABEL_OP1_103_66309_20120229_232503_outLine +BABEL_OP1_103_66659_20120229_235042_inLine +BABEL_OP1_103_66659_20120229_235042_outLine +BABEL_OP1_103_66719_20120116_002436_inLine +BABEL_OP1_103_66719_20120116_002436_outLine +BABEL_OP1_103_66813_20120127_151237_inLine +BABEL_OP1_103_66813_20120127_151237_outLine +BABEL_OP1_103_67001_20120305_223711_inLine +BABEL_OP1_103_67001_20120305_223711_outLine +BABEL_OP1_103_67288_20120305_233501_inLine +BABEL_OP1_103_67288_20120305_233501_outLine +BABEL_OP1_103_67358_20120128_224934_inLine +BABEL_OP1_103_67358_20120128_224934_outLine +BABEL_OP1_103_67484_20120306_212801_inLine +BABEL_OP1_103_67484_20120306_212801_outLine +BABEL_OP1_103_67604_20120306_201231_inLine +BABEL_OP1_103_67604_20120306_201231_outLine +BABEL_OP1_103_67685_20120118_163939_inLine +BABEL_OP1_103_67685_20120118_163939_outLine +BABEL_OP1_103_67814_20120522_200114_inLine +BABEL_OP1_103_67814_20120522_200114_outLine +BABEL_OP1_103_67824_20120116_000148_inLine +BABEL_OP1_103_67824_20120116_000148_outLine +BABEL_OP1_103_68144_20120201_183136_inLine +BABEL_OP1_103_68144_20120201_183136_outLine +BABEL_OP1_103_68602_20120729_174819_inLine +BABEL_OP1_103_68602_20120729_174819_outLine +BABEL_OP1_103_68811_20120531_155031_inLine +BABEL_OP1_103_68811_20120531_155031_outLine +BABEL_OP1_103_69771_20120118_183315_inLine +BABEL_OP1_103_69771_20120118_183315_outLine +BABEL_OP1_103_69969_20120309_020612_inLine +BABEL_OP1_103_69969_20120309_020612_outLine +BABEL_OP1_103_69990_20120305_153850_inLine +BABEL_OP1_103_69990_20120305_153850_outLine +BABEL_OP1_103_70200_20120311_000406_inLine +BABEL_OP1_103_70200_20120311_000406_outLine +BABEL_OP1_103_70442_20111231_223721_inLine +BABEL_OP1_103_70442_20111231_223721_outLine +BABEL_OP1_103_70476_20120117_202957_inLine +BABEL_OP1_103_70476_20120117_202957_outLine +BABEL_OP1_103_70476_20120117_204242_inLine +BABEL_OP1_103_70476_20120117_204242_outLine +BABEL_OP1_103_70484_20120524_210819_inLine +BABEL_OP1_103_70484_20120524_210819_outLine +BABEL_OP1_103_70651_20120131_034337_inLine +BABEL_OP1_103_70651_20120131_034337_outLine +BABEL_OP1_103_70762_20111230_015835_inLine +BABEL_OP1_103_70762_20111230_015835_outLine +BABEL_OP1_103_70858_20120201_191031_inLine +BABEL_OP1_103_70858_20120201_191031_outLine +BABEL_OP1_103_70897_20120118_020506_inLine +BABEL_OP1_103_70897_20120118_020506_outLine +BABEL_OP1_103_70919_20120202_170934_inLine +BABEL_OP1_103_70919_20120202_170934_outLine +BABEL_OP1_103_71215_20120207_001204_inLine +BABEL_OP1_103_71215_20120207_001204_outLine +BABEL_OP1_103_71293_20120101_212224_inLine +BABEL_OP1_103_71293_20120101_212224_outLine +BABEL_OP1_103_71450_20120514_181620_inLine +BABEL_OP1_103_71450_20120514_181621_outLine +BABEL_OP1_103_71666_20120514_223534_inLine +BABEL_OP1_103_71666_20120514_223534_outLine +BABEL_OP1_103_71691_20120109_034006_inLine +BABEL_OP1_103_71691_20120109_034007_outLine +BABEL_OP1_103_72176_20111226_224243_inLine +BABEL_OP1_103_72176_20111226_224243_outLine +BABEL_OP1_103_72179_20120511_023300_inLine +BABEL_OP1_103_72179_20120511_023300_outLine +BABEL_OP1_103_72709_20120204_231928_inLine +BABEL_OP1_103_72709_20120204_231928_outLine +BABEL_OP1_103_72714_20120126_001354_inLine +BABEL_OP1_103_72714_20120126_001354_outLine +BABEL_OP1_103_73264_20111228_184038_inLine +BABEL_OP1_103_73264_20111228_184038_outLine +BABEL_OP1_103_73881_20120120_041629_inLine +BABEL_OP1_103_73881_20120120_041629_outLine +BABEL_OP1_103_74188_20120522_172823_inLine +BABEL_OP1_103_74188_20120522_172823_outLine +BABEL_OP1_103_74334_20120102_033902_inLine +BABEL_OP1_103_74334_20120102_033902_outLine +BABEL_OP1_103_75402_20120120_190246_inLine +BABEL_OP1_103_75402_20120120_190246_outLine +BABEL_OP1_103_75797_20120125_192735_inLine +BABEL_OP1_103_75797_20120125_192735_outLine +BABEL_OP1_103_76069_20120608_031447_inLine +BABEL_OP1_103_76069_20120608_031447_outLine +BABEL_OP1_103_76276_20120114_191208_inLine +BABEL_OP1_103_76276_20120114_191208_outLine +BABEL_OP1_103_76347_20120601_011206_inLine +BABEL_OP1_103_76347_20120601_011206_outLine +BABEL_OP1_103_77097_20120109_024625_inLine +BABEL_OP1_103_77097_20120109_024625_outLine +BABEL_OP1_103_77737_20111230_143637_inLine +BABEL_OP1_103_77737_20111230_143637_outLine +BABEL_OP1_103_78722_20120126_234318_inLine +BABEL_OP1_103_78722_20120126_234318_outLine +BABEL_OP1_103_79127_20120205_215208_inLine +BABEL_OP1_103_79127_20120205_215208_outLine +BABEL_OP1_103_79788_20120201_222512_inLine +BABEL_OP1_103_79788_20120201_222512_outLine +BABEL_OP1_103_79803_20120730_020433_inLine +BABEL_OP1_103_79803_20120730_020433_outLine +BABEL_OP1_103_79857_20120111_205043_inLine +BABEL_OP1_103_79857_20120111_205043_outLine +BABEL_OP1_103_79901_20120202_193650_inLine +BABEL_OP1_103_79901_20120202_194746_inLine +BABEL_OP1_103_80118_20120126_010553_inLine +BABEL_OP1_103_80118_20120126_010553_outLine +BABEL_OP1_103_80183_20120513_182754_inLine +BABEL_OP1_103_80183_20120513_182754_outLine +BABEL_OP1_103_80313_20120106_200706_inLine +BABEL_OP1_103_80313_20120106_200706_outLine +BABEL_OP1_103_80319_20120120_231835_inLine +BABEL_OP1_103_80319_20120120_231835_outLine +BABEL_OP1_103_80943_20120125_185437_inLine +BABEL_OP1_103_80943_20120125_185437_outLine +BABEL_OP1_103_81800_20120531_180959_inLine +BABEL_OP1_103_81800_20120531_180959_outLine +BABEL_OP1_103_81800_20120531_182855_inLine +BABEL_OP1_103_81800_20120531_182855_outLine +BABEL_OP1_103_82094_20120522_225233_inLine +BABEL_OP1_103_82094_20120522_225233_outLine +BABEL_OP1_103_82135_20120117_213149_inLine +BABEL_OP1_103_82135_20120117_213149_outLine +BABEL_OP1_103_83819_20120125_193543_inLine +BABEL_OP1_103_83819_20120125_193543_outLine +BABEL_OP1_103_83835_20111231_193822_inLine +BABEL_OP1_103_83835_20111231_193822_outLine +BABEL_OP1_103_84654_20120515_201204_inLine +BABEL_OP1_103_84654_20120515_201204_outLine +BABEL_OP1_103_84754_20120523_180347_inLine +BABEL_OP1_103_84754_20120523_180347_outLine +BABEL_OP1_103_84854_20120205_001920_inLine +BABEL_OP1_103_84854_20120205_001920_outLine +BABEL_OP1_103_84985_20120105_205509_inLine +BABEL_OP1_103_84985_20120105_205509_outLine +BABEL_OP1_103_85457_20120521_204532_inLine +BABEL_OP1_103_85457_20120521_204532_outLine +BABEL_OP1_103_85577_20120729_215558_inLine +BABEL_OP1_103_85577_20120729_215558_outLine +BABEL_OP1_103_85730_20120116_233350_inLine +BABEL_OP1_103_85730_20120116_233350_outLine +BABEL_OP1_103_85764_20120129_192217_inLine +BABEL_OP1_103_85764_20120129_192217_outLine +BABEL_OP1_103_85897_20120120_171153_inLine +BABEL_OP1_103_85897_20120120_171153_outLine +BABEL_OP1_103_86537_20120511_195620_inLine +BABEL_OP1_103_86537_20120511_195620_outLine +BABEL_OP1_103_86614_20120521_220136_inLine +BABEL_OP1_103_86614_20120521_220136_outLine +BABEL_OP1_103_86680_20120105_191615_inLine +BABEL_OP1_103_86680_20120105_191615_outLine +BABEL_OP1_103_87453_20120515_170718_inLine +BABEL_OP1_103_87453_20120515_170718_outLine +BABEL_OP1_103_87677_20120121_224149_inLine +BABEL_OP1_103_87677_20120121_224149_outLine +BABEL_OP1_103_87723_20120518_211143_inLine +BABEL_OP1_103_87723_20120518_211143_outLine +BABEL_OP1_103_88604_20120206_014323_inLine +BABEL_OP1_103_88604_20120206_014323_outLine +BABEL_OP1_103_88604_20120206_015628_inLine +BABEL_OP1_103_88604_20120206_015628_outLine +BABEL_OP1_103_88677_20120112_032502_inLine +BABEL_OP1_103_88677_20120112_032502_outLine +BABEL_OP1_103_89464_20120205_204528_inLine +BABEL_OP1_103_89464_20120205_204528_outLine +BABEL_OP1_103_89702_20120109_021228_inLine +BABEL_OP1_103_89702_20120109_021228_outLine +BABEL_OP1_103_90041_20120201_190104_inLine +BABEL_OP1_103_90041_20120201_190104_outLine +BABEL_OP1_103_90129_20120126_221744_inLine +BABEL_OP1_103_90129_20120126_221744_outLine +BABEL_OP1_103_90641_20120102_212610_inLine +BABEL_OP1_103_90641_20120102_212610_outLine +BABEL_OP1_103_90882_20120530_230837_inLine +BABEL_OP1_103_90882_20120530_230837_outLine +BABEL_OP1_103_91161_20111229_202627_inLine +BABEL_OP1_103_91161_20111229_202627_outLine +BABEL_OP1_103_91372_20120115_023342_inLine +BABEL_OP1_103_91372_20120115_023342_outLine +BABEL_OP1_103_92722_20120512_132612_inLine +BABEL_OP1_103_92722_20120512_132612_outLine +BABEL_OP1_103_92793_20111229_200332_inLine +BABEL_OP1_103_92793_20111229_200332_outLine +BABEL_OP1_103_92910_20120205_195736_inLine +BABEL_OP1_103_92910_20120205_195736_outLine +BABEL_OP1_103_93026_20111228_235326_inLine +BABEL_OP1_103_93026_20111228_235326_outLine +BABEL_OP1_103_93358_20120107_025421_inLine +BABEL_OP1_103_93358_20120107_025421_outLine +BABEL_OP1_103_93742_20120529_184600_inLine +BABEL_OP1_103_93742_20120529_184600_outLine +BABEL_OP1_103_93907_20111228_051458_inLine +BABEL_OP1_103_93907_20111228_051458_outLine +BABEL_OP1_103_94572_20120131_224123_inLine +BABEL_OP1_103_94572_20120131_224123_outLine +BABEL_OP1_103_94793_20120102_034406_inLine +BABEL_OP1_103_94793_20120102_034406_outLine +BABEL_OP1_103_95349_20111229_201011_inLine +BABEL_OP1_103_95349_20111229_201011_outLine +BABEL_OP1_103_95349_20111229_225436_inLine +BABEL_OP1_103_95349_20111229_225436_outLine +BABEL_OP1_103_95360_20120206_204731_inLine +BABEL_OP1_103_95360_20120206_204731_outLine +BABEL_OP1_103_96186_20120128_212837_inLine +BABEL_OP1_103_96186_20120128_212837_outLine +BABEL_OP1_103_96537_20120729_165831_inLine +BABEL_OP1_103_96537_20120729_165831_outLine +BABEL_OP1_103_96690_20120131_213344_inLine +BABEL_OP1_103_96690_20120131_213344_outLine +BABEL_OP1_103_97679_20111229_191138_inLine +BABEL_OP1_103_97679_20111229_191138_outLine +BABEL_OP1_103_97971_20120111_020458_inLine +BABEL_OP1_103_97971_20120111_020459_outLine +BABEL_OP1_103_98331_20120131_213958_inLine +BABEL_OP1_103_98331_20120131_213958_outLine +BABEL_OP1_103_98446_20120101_215857_inLine +BABEL_OP1_103_98446_20120101_215857_outLine +BABEL_OP1_103_99093_20120514_161939_inLine +BABEL_OP1_103_99093_20120514_161939_outLine +BABEL_OP1_103_99510_20120515_175659_inLine +BABEL_OP1_103_99510_20120515_175659_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.list b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.list new file mode 100644 index 00000000000..4d5c081b1c2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.list @@ -0,0 +1,124 @@ +BABEL_OP1_103_10193_20111229_035249_inLine +BABEL_OP1_103_10193_20111229_035249_outLine +BABEL_OP1_103_10612_20111222_210911_inLine +BABEL_OP1_103_10612_20111222_210911_outLine +BABEL_OP1_103_11128_20120124_200626_inLine +BABEL_OP1_103_11128_20120124_200626_outLine +BABEL_OP1_103_12639_20111229_015021_inLine +BABEL_OP1_103_12639_20111229_015021_outLine +BABEL_OP1_103_12786_20111230_012748_inLine +BABEL_OP1_103_12786_20111230_012749_outLine +BABEL_OP1_103_14554_20120120_230548_inLine +BABEL_OP1_103_14554_20120120_230548_outLine +BABEL_OP1_103_16416_20120205_011943_inLine +BABEL_OP1_103_16416_20120205_011943_outLine +BABEL_OP1_103_19280_20120515_173629_inLine +BABEL_OP1_103_19280_20120515_173630_outLine +BABEL_OP1_103_22134_20120204_185956_inLine +BABEL_OP1_103_22134_20120204_185956_outLine +BABEL_OP1_103_22134_20120204_191024_inLine +BABEL_OP1_103_22134_20120204_191024_outLine +BABEL_OP1_103_22697_20120123_213617_inLine +BABEL_OP1_103_22697_20120123_213617_outLine +BABEL_OP1_103_30620_20111231_181228_inLine +BABEL_OP1_103_30620_20111231_181228_outLine +BABEL_OP1_103_30810_20111227_000227_inLine +BABEL_OP1_103_30810_20111227_000227_outLine +BABEL_OP1_103_32040_20120122_181109_inLine +BABEL_OP1_103_32040_20120122_181109_outLine +BABEL_OP1_103_36584_20120201_230611_inLine +BABEL_OP1_103_36584_20120201_230611_outLine +BABEL_OP1_103_38163_20120202_001843_inLine +BABEL_OP1_103_38163_20120202_001843_outLine +BABEL_OP1_103_39119_20120608_004832_inLine +BABEL_OP1_103_39119_20120608_004832_outLine +BABEL_OP1_103_41144_20120118_222314_inLine +BABEL_OP1_103_41144_20120118_222314_outLine +BABEL_OP1_103_41197_20120805_155112_inLine +BABEL_OP1_103_41197_20120805_155112_outLine +BABEL_OP1_103_41498_20120118_023411_inLine +BABEL_OP1_103_41498_20120118_023411_outLine +BABEL_OP1_103_42742_20120123_232130_inLine +BABEL_OP1_103_42742_20120123_232130_outLine +BABEL_OP1_103_43974_20120110_164058_inLine +BABEL_OP1_103_43974_20120110_164058_outLine +BABEL_OP1_103_44192_20120523_184414_inLine +BABEL_OP1_103_44192_20120523_184414_outLine +BABEL_OP1_103_45601_20120201_181124_inLine +BABEL_OP1_103_45601_20120201_181124_outLine +BABEL_OP1_103_45763_20120116_175349_inLine +BABEL_OP1_103_45763_20120116_175349_outLine +BABEL_OP1_103_46548_20120517_192114_inLine +BABEL_OP1_103_46548_20120517_192114_outLine +BABEL_OP1_103_46887_20120202_214319_inLine +BABEL_OP1_103_46887_20120202_214320_outLine +BABEL_OP1_103_46900_20120204_225820_inLine +BABEL_OP1_103_46900_20120204_225820_outLine +BABEL_OP1_103_48518_20120121_195050_inLine +BABEL_OP1_103_48518_20120121_195050_outLine +BABEL_OP1_103_52604_20120131_233302_inLine +BABEL_OP1_103_52604_20120131_233302_outLine +BABEL_OP1_103_54606_20120205_175853_inLine +BABEL_OP1_103_54606_20120205_175853_outLine +BABEL_OP1_103_55316_20111226_180557_inLine +BABEL_OP1_103_55316_20111226_180557_outLine +BABEL_OP1_103_57232_20120126_020104_inLine +BABEL_OP1_103_57232_20120126_020104_outLine +BABEL_OP1_103_59558_20120121_234224_inLine +BABEL_OP1_103_59558_20120121_234224_outLine +BABEL_OP1_103_60571_20111228_183342_inLine +BABEL_OP1_103_60571_20111228_183342_outLine +BABEL_OP1_103_63422_20120315_034640_inLine +BABEL_OP1_103_63422_20120315_034640_outLine +BABEL_OP1_103_63950_20120320_184409_inLine +BABEL_OP1_103_63950_20120320_184409_outLine +BABEL_OP1_103_64153_20120403_180645_inLine +BABEL_OP1_103_64153_20120403_180645_outLine +BABEL_OP1_103_66659_20120229_235042_inLine +BABEL_OP1_103_66659_20120229_235042_outLine +BABEL_OP1_103_67604_20120306_201231_inLine +BABEL_OP1_103_67604_20120306_201231_outLine +BABEL_OP1_103_68144_20120201_183136_inLine +BABEL_OP1_103_68144_20120201_183136_outLine +BABEL_OP1_103_69771_20120118_183315_inLine +BABEL_OP1_103_69771_20120118_183315_outLine +BABEL_OP1_103_70442_20111231_223721_inLine +BABEL_OP1_103_70442_20111231_223721_outLine +BABEL_OP1_103_70484_20120524_210819_inLine +BABEL_OP1_103_70484_20120524_210819_outLine +BABEL_OP1_103_72176_20111226_224243_inLine +BABEL_OP1_103_72176_20111226_224243_outLine +BABEL_OP1_103_75402_20120120_190246_inLine +BABEL_OP1_103_75402_20120120_190246_outLine +BABEL_OP1_103_76069_20120608_031447_inLine +BABEL_OP1_103_76069_20120608_031447_outLine +BABEL_OP1_103_76347_20120601_011206_inLine +BABEL_OP1_103_76347_20120601_011206_outLine +BABEL_OP1_103_77737_20111230_143637_inLine +BABEL_OP1_103_77737_20111230_143637_outLine +BABEL_OP1_103_80319_20120120_231835_inLine +BABEL_OP1_103_80319_20120120_231835_outLine +BABEL_OP1_103_84754_20120523_180347_inLine +BABEL_OP1_103_84754_20120523_180347_outLine +BABEL_OP1_103_84985_20120105_205509_inLine +BABEL_OP1_103_84985_20120105_205509_outLine +BABEL_OP1_103_85897_20120120_171153_inLine +BABEL_OP1_103_85897_20120120_171153_outLine +BABEL_OP1_103_87723_20120518_211143_inLine +BABEL_OP1_103_87723_20120518_211143_outLine +BABEL_OP1_103_88604_20120206_014323_inLine +BABEL_OP1_103_88604_20120206_014323_outLine +BABEL_OP1_103_88604_20120206_015628_inLine +BABEL_OP1_103_88604_20120206_015628_outLine +BABEL_OP1_103_90041_20120201_190104_inLine +BABEL_OP1_103_90041_20120201_190104_outLine +BABEL_OP1_103_90129_20120126_221744_inLine +BABEL_OP1_103_90129_20120126_221744_outLine +BABEL_OP1_103_93742_20120529_184600_inLine +BABEL_OP1_103_93742_20120529_184600_outLine +BABEL_OP1_103_94572_20120131_224123_inLine +BABEL_OP1_103_94572_20120131_224123_outLine +BABEL_OP1_103_95360_20120206_204731_inLine +BABEL_OP1_103_95360_20120206_204731_outLine +BABEL_OP1_103_96186_20120128_212837_inLine +BABEL_OP1_103_96186_20120128_212837_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..0b2264097e0 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.LimitedLP.untranscribed.list @@ -0,0 +1,627 @@ +BABEL_OP1_103_10301_20111220_225237_inLine +BABEL_OP1_103_10301_20111220_225237_outLine +BABEL_OP1_103_10305_20111220_231100_inLine +BABEL_OP1_103_10305_20111220_231100_outLine +BABEL_OP1_103_10348_20120113_213854_inLine +BABEL_OP1_103_10348_20120113_213854_outLine +BABEL_OP1_103_10531_20120118_042000_inLine +BABEL_OP1_103_10531_20120118_042000_outLine +BABEL_OP1_103_10556_20111221_000031_inLine +BABEL_OP1_103_10556_20111221_000031_outLine +BABEL_OP1_103_10806_20111226_181132_inLine +BABEL_OP1_103_10806_20111226_181132_outLine +BABEL_OP1_103_11155_20111230_211626_inLine +BABEL_OP1_103_11155_20111230_211626_outLine +BABEL_OP1_103_11442_20120125_025606_inLine +BABEL_OP1_103_11442_20120125_025606_outLine +BABEL_OP1_103_12518_20111227_181020_inLine +BABEL_OP1_103_12518_20111227_181021_outLine +BABEL_OP1_103_12682_20120125_201902_inLine +BABEL_OP1_103_12682_20120125_201902_outLine +BABEL_OP1_103_12682_20120125_210238_inLine +BABEL_OP1_103_12682_20120125_210238_outLine +BABEL_OP1_103_12719_20120203_035027_inLine +BABEL_OP1_103_12719_20120203_035027_outLine +BABEL_OP1_103_12809_20111229_175926_inLine +BABEL_OP1_103_12809_20111229_175926_outLine +BABEL_OP1_103_12843_20120117_224043_inLine +BABEL_OP1_103_12843_20120117_224043_outLine +BABEL_OP1_103_13024_20111229_010356_inLine +BABEL_OP1_103_13024_20111229_010357_outLine +BABEL_OP1_103_13295_20120522_232550_inLine +BABEL_OP1_103_13295_20120522_232550_outLine +BABEL_OP1_103_13615_20120113_174612_inLine +BABEL_OP1_103_13615_20120113_174612_outLine +BABEL_OP1_103_13708_20120102_032700_inLine +BABEL_OP1_103_13708_20120102_032700_outLine +BABEL_OP1_103_13752_20120530_221929_inLine +BABEL_OP1_103_13752_20120530_221929_outLine +BABEL_OP1_103_14086_20120113_200751_inLine +BABEL_OP1_103_14086_20120113_200751_outLine +BABEL_OP1_103_14147_20120531_160226_inLine +BABEL_OP1_103_14147_20120531_160226_outLine +BABEL_OP1_103_14147_20120531_170020_inLine +BABEL_OP1_103_14147_20120531_170020_outLine +BABEL_OP1_103_14422_20120514_181741_inLine +BABEL_OP1_103_14422_20120514_181741_outLine +BABEL_OP1_103_14583_20120515_192730_inLine +BABEL_OP1_103_14583_20120515_192730_outLine +BABEL_OP1_103_14942_20120101_203529_inLine +BABEL_OP1_103_14942_20120101_203529_outLine +BABEL_OP1_103_15304_20120106_035227_inLine +BABEL_OP1_103_15304_20120106_035227_outLine +BABEL_OP1_103_15600_20111231_181856_inLine +BABEL_OP1_103_15600_20111231_181856_outLine +BABEL_OP1_103_15665_20120517_162750_inLine +BABEL_OP1_103_15665_20120517_162750_outLine +BABEL_OP1_103_15749_20111230_015120_inLine +BABEL_OP1_103_15749_20111230_015120_outLine +BABEL_OP1_103_15803_20120528_164556_inLine +BABEL_OP1_103_15803_20120528_164556_outLine +BABEL_OP1_103_16210_20120118_201234_inLine +BABEL_OP1_103_16210_20120118_201234_outLine +BABEL_OP1_103_16393_20111230_012139_inLine +BABEL_OP1_103_16393_20111230_012139_outLine +BABEL_OP1_103_16633_20120105_164800_inLine +BABEL_OP1_103_16633_20120105_164800_outLine +BABEL_OP1_103_16754_20120101_015558_inLine +BABEL_OP1_103_16754_20120101_015558_outLine +BABEL_OP1_103_17063_20120202_201950_inLine +BABEL_OP1_103_17063_20120202_201950_outLine +BABEL_OP1_103_17063_20120202_204211_inLine +BABEL_OP1_103_17063_20120202_204211_outLine +BABEL_OP1_103_17139_20120110_182115_inLine +BABEL_OP1_103_17139_20120110_182115_outLine +BABEL_OP1_103_17180_20120126_233802_inLine +BABEL_OP1_103_17180_20120126_233802_outLine +BABEL_OP1_103_17612_20120531_232906_inLine +BABEL_OP1_103_17876_20120203_220933_inLine +BABEL_OP1_103_17876_20120203_220933_outLine +BABEL_OP1_103_18244_20120514_000930_inLine +BABEL_OP1_103_18244_20120514_000931_outLine +BABEL_OP1_103_18556_20111231_233139_inLine +BABEL_OP1_103_18556_20111231_233139_outLine +BABEL_OP1_103_18755_20120103_012800_inLine +BABEL_OP1_103_18755_20120103_012800_outLine +BABEL_OP1_103_18861_20120612_231154_inLine +BABEL_OP1_103_18861_20120612_231154_outLine +BABEL_OP1_103_18938_20120515_163044_inLine +BABEL_OP1_103_18938_20120515_163044_outLine +BABEL_OP1_103_19118_20120206_194310_inLine +BABEL_OP1_103_19118_20120206_194310_outLine +BABEL_OP1_103_19324_20120114_223457_inLine +BABEL_OP1_103_19324_20120114_223457_outLine +BABEL_OP1_103_19564_20120112_151539_inLine +BABEL_OP1_103_19564_20120112_151539_outLine +BABEL_OP1_103_19697_20120107_043218_inLine +BABEL_OP1_103_19697_20120107_043218_outLine +BABEL_OP1_103_19885_20120517_200533_inLine +BABEL_OP1_103_19885_20120517_200533_outLine +BABEL_OP1_103_20481_20120118_004556_inLine +BABEL_OP1_103_20481_20120118_004556_outLine +BABEL_OP1_103_21020_20120517_182615_inLine +BABEL_OP1_103_21020_20120517_182615_outLine +BABEL_OP1_103_21076_20111231_203216_inLine +BABEL_OP1_103_21076_20111231_203216_outLine +BABEL_OP1_103_21208_20120113_192303_inLine +BABEL_OP1_103_21208_20120113_192303_outLine +BABEL_OP1_103_21417_20120115_235720_inLine +BABEL_OP1_103_21417_20120115_235720_outLine +BABEL_OP1_103_21648_20111229_024025_inLine +BABEL_OP1_103_21648_20111229_024025_outLine +BABEL_OP1_103_21928_20120205_210433_inLine +BABEL_OP1_103_21928_20120205_221157_inLine +BABEL_OP1_103_22338_20120203_214144_inLine +BABEL_OP1_103_22338_20120203_214144_outLine +BABEL_OP1_103_22528_20120204_221751_inLine +BABEL_OP1_103_22528_20120204_221751_outLine +BABEL_OP1_103_23237_20120201_204534_inLine +BABEL_OP1_103_23237_20120201_204534_outLine +BABEL_OP1_103_24235_20120205_171351_inLine +BABEL_OP1_103_24235_20120205_171351_outLine +BABEL_OP1_103_24340_20120526_221640_inLine +BABEL_OP1_103_24340_20120526_221640_outLine +BABEL_OP1_103_25040_20120129_215646_inLine +BABEL_OP1_103_25040_20120129_215647_outLine +BABEL_OP1_103_25489_20120107_015122_inLine +BABEL_OP1_103_25489_20120107_015122_outLine +BABEL_OP1_103_26464_20120115_153724_inLine +BABEL_OP1_103_26464_20120115_153725_outLine +BABEL_OP1_103_26603_20120519_190743_inLine +BABEL_OP1_103_26603_20120519_190743_outLine +BABEL_OP1_103_26980_20120114_151400_inLine +BABEL_OP1_103_26980_20120114_151400_outLine +BABEL_OP1_103_27159_20120109_175434_inLine +BABEL_OP1_103_27159_20120109_175434_outLine +BABEL_OP1_103_27298_20120519_164745_inLine +BABEL_OP1_103_27298_20120519_164745_outLine +BABEL_OP1_103_27374_20120608_213343_inLine +BABEL_OP1_103_27374_20120608_213344_outLine +BABEL_OP1_103_27889_20120405_184406_inLine +BABEL_OP1_103_27889_20120405_184406_outLine +BABEL_OP1_103_27895_20120407_201822_inLine +BABEL_OP1_103_27895_20120407_201822_outLine +BABEL_OP1_103_27997_20120406_024629_inLine +BABEL_OP1_103_27997_20120406_024629_outLine +BABEL_OP1_103_28150_20120421_030716_inLine +BABEL_OP1_103_28150_20120421_030716_outLine +BABEL_OP1_103_28281_20120204_215552_inLine +BABEL_OP1_103_28281_20120204_215552_outLine +BABEL_OP1_103_28325_20120421_034840_inLine +BABEL_OP1_103_28325_20120421_034840_outLine +BABEL_OP1_103_28349_20120422_230936_inLine +BABEL_OP1_103_28349_20120422_230936_outLine +BABEL_OP1_103_28452_20120423_002721_inLine +BABEL_OP1_103_28452_20120423_002721_outLine +BABEL_OP1_103_28820_20111231_235604_inLine +BABEL_OP1_103_28820_20111231_235604_outLine +BABEL_OP1_103_29083_20120524_203900_inLine +BABEL_OP1_103_29083_20120524_203900_outLine +BABEL_OP1_103_29368_20120109_152242_inLine +BABEL_OP1_103_29368_20120109_152242_outLine +BABEL_OP1_103_29757_20120607_155549_inLine +BABEL_OP1_103_29757_20120607_155549_outLine +BABEL_OP1_103_30012_20120523_210111_inLine +BABEL_OP1_103_30012_20120523_210111_outLine +BABEL_OP1_103_30031_20111231_051935_inLine +BABEL_OP1_103_30031_20111231_051935_outLine +BABEL_OP1_103_30040_20120114_164613_inLine +BABEL_OP1_103_30040_20120114_164613_outLine +BABEL_OP1_103_30243_20120115_005252_inLine +BABEL_OP1_103_30243_20120115_005252_outLine +BABEL_OP1_103_30711_20120612_211646_inLine +BABEL_OP1_103_30711_20120612_211646_outLine +BABEL_OP1_103_30847_20120108_235955_inLine +BABEL_OP1_103_30847_20120108_235955_outLine +BABEL_OP1_103_30904_20120522_013413_inLine +BABEL_OP1_103_30904_20120522_013413_outLine +BABEL_OP1_103_31201_20120523_211540_inLine +BABEL_OP1_103_31201_20120523_211540_outLine +BABEL_OP1_103_31871_20120115_205857_inLine +BABEL_OP1_103_31871_20120115_205857_outLine +BABEL_OP1_103_32453_20120116_174338_inLine +BABEL_OP1_103_32453_20120116_174338_outLine +BABEL_OP1_103_32722_20120115_005258_inLine +BABEL_OP1_103_32722_20120115_005258_outLine +BABEL_OP1_103_33223_20120108_225050_inLine +BABEL_OP1_103_33223_20120108_225050_outLine +BABEL_OP1_103_33316_20120528_173250_inLine +BABEL_OP1_103_33316_20120528_173250_outLine +BABEL_OP1_103_33534_20120122_020502_inLine +BABEL_OP1_103_33534_20120122_020502_outLine +BABEL_OP1_103_33551_20120122_194434_inLine +BABEL_OP1_103_33551_20120122_194434_outLine +BABEL_OP1_103_33699_20120122_173500_inLine +BABEL_OP1_103_33699_20120122_173500_outLine +BABEL_OP1_103_33807_20120122_190057_inLine +BABEL_OP1_103_33807_20120122_190057_outLine +BABEL_OP1_103_33885_20120125_172938_inLine +BABEL_OP1_103_33885_20120125_172938_outLine +BABEL_OP1_103_33991_20120117_202117_inLine +BABEL_OP1_103_33991_20120117_202118_outLine +BABEL_OP1_103_34137_20120529_224220_inLine +BABEL_OP1_103_34137_20120529_224220_outLine +BABEL_OP1_103_34332_20120204_191733_inLine +BABEL_OP1_103_34332_20120204_191733_outLine +BABEL_OP1_103_34545_20120118_173942_inLine +BABEL_OP1_103_34545_20120118_173942_outLine +BABEL_OP1_103_34564_20120530_211027_inLine +BABEL_OP1_103_34564_20120530_211027_outLine +BABEL_OP1_103_34925_20120112_154829_inLine +BABEL_OP1_103_34925_20120112_154829_outLine +BABEL_OP1_103_34994_20120115_213251_inLine +BABEL_OP1_103_34994_20120115_213251_outLine +BABEL_OP1_103_35144_20120123_230913_inLine +BABEL_OP1_103_35144_20120123_230913_outLine +BABEL_OP1_103_35152_20111230_220705_inLine +BABEL_OP1_103_35152_20111230_220705_outLine +BABEL_OP1_103_35157_20120124_010640_inLine +BABEL_OP1_103_35157_20120124_010640_outLine +BABEL_OP1_103_35444_20120612_203930_inLine +BABEL_OP1_103_35444_20120612_203930_outLine +BABEL_OP1_103_35660_20120122_013401_inLine +BABEL_OP1_103_35660_20120122_013402_outLine +BABEL_OP1_103_35750_20111230_025221_inLine +BABEL_OP1_103_35750_20111230_025221_outLine +BABEL_OP1_103_35892_20120120_205811_inLine +BABEL_OP1_103_35892_20120120_205811_outLine +BABEL_OP1_103_36748_20120121_230812_inLine +BABEL_OP1_103_36748_20120121_230812_outLine +BABEL_OP1_103_36962_20120810_005828_inLine +BABEL_OP1_103_36962_20120810_005828_outLine +BABEL_OP1_103_37131_20120522_165130_inLine +BABEL_OP1_103_37131_20120522_165130_outLine +BABEL_OP1_103_37551_20111229_232422_inLine +BABEL_OP1_103_37551_20111229_232422_outLine +BABEL_OP1_103_37604_20120122_203335_inLine +BABEL_OP1_103_37604_20120122_203335_outLine +BABEL_OP1_103_37687_20120124_220825_inLine +BABEL_OP1_103_37687_20120124_220826_outLine +BABEL_OP1_103_38573_20120120_234500_inLine +BABEL_OP1_103_38573_20120120_234500_outLine +BABEL_OP1_103_38573_20120121_000745_inLine +BABEL_OP1_103_38573_20120121_000745_outLine +BABEL_OP1_103_38588_20120522_215415_inLine +BABEL_OP1_103_38588_20120522_215415_outLine +BABEL_OP1_103_39320_20120207_022344_inLine +BABEL_OP1_103_39320_20120207_022344_outLine +BABEL_OP1_103_39769_20120127_213455_inLine +BABEL_OP1_103_39769_20120127_213455_outLine +BABEL_OP1_103_40410_20120124_204758_inLine +BABEL_OP1_103_40410_20120124_204758_outLine +BABEL_OP1_103_40442_20120202_174431_inLine +BABEL_OP1_103_40442_20120202_174431_outLine +BABEL_OP1_103_40889_20120206_221100_inLine +BABEL_OP1_103_40889_20120206_221100_outLine +BABEL_OP1_103_41172_20120114_134829_inLine +BABEL_OP1_103_41172_20120114_134829_outLine +BABEL_OP1_103_42332_20120126_191134_inLine +BABEL_OP1_103_42332_20120126_191134_outLine +BABEL_OP1_103_42332_20120126_192035_inLine +BABEL_OP1_103_42332_20120126_192035_outLine +BABEL_OP1_103_42651_20120122_000902_inLine +BABEL_OP1_103_42651_20120122_000902_outLine +BABEL_OP1_103_42698_20120123_230900_inLine +BABEL_OP1_103_42698_20120123_230900_outLine +BABEL_OP1_103_42790_20120129_205024_inLine +BABEL_OP1_103_42790_20120129_205025_outLine +BABEL_OP1_103_42986_20120125_204035_inLine +BABEL_OP1_103_42986_20120125_204035_outLine +BABEL_OP1_103_43442_20120120_033602_inLine +BABEL_OP1_103_43442_20120120_033602_outLine +BABEL_OP1_103_43571_20111226_210759_inLine +BABEL_OP1_103_43571_20111226_210759_outLine +BABEL_OP1_103_43812_20120124_005515_inLine +BABEL_OP1_103_43812_20120124_005515_outLine +BABEL_OP1_103_43959_20120125_223215_inLine +BABEL_OP1_103_43959_20120125_223215_outLine +BABEL_OP1_103_44838_20111229_014707_inLine +BABEL_OP1_103_44838_20111229_014707_outLine +BABEL_OP1_103_44948_20120203_011011_inLine +BABEL_OP1_103_44948_20120203_011011_outLine +BABEL_OP1_103_44967_20120207_025756_inLine +BABEL_OP1_103_44967_20120207_025756_outLine +BABEL_OP1_103_45020_20120522_170055_inLine +BABEL_OP1_103_45020_20120522_170055_outLine +BABEL_OP1_103_45029_20120608_010540_inLine +BABEL_OP1_103_45029_20120608_010540_outLine +BABEL_OP1_103_45565_20120125_220956_inLine +BABEL_OP1_103_45565_20120125_220956_outLine +BABEL_OP1_103_46197_20120524_220246_inLine +BABEL_OP1_103_46197_20120524_220246_outLine +BABEL_OP1_103_46460_20120530_183725_inLine +BABEL_OP1_103_46460_20120530_183725_outLine +BABEL_OP1_103_46460_20120530_185105_inLine +BABEL_OP1_103_46460_20120530_185106_outLine +BABEL_OP1_103_46862_20120124_195804_inLine +BABEL_OP1_103_46862_20120124_195804_outLine +BABEL_OP1_103_46862_20120204_203651_inLine +BABEL_OP1_103_46862_20120204_203651_outLine +BABEL_OP1_103_47151_20111229_233253_inLine +BABEL_OP1_103_47151_20111229_233253_outLine +BABEL_OP1_103_47177_20120127_201638_inLine +BABEL_OP1_103_47177_20120127_201638_outLine +BABEL_OP1_103_47416_20120729_181025_inLine +BABEL_OP1_103_47416_20120729_181025_outLine +BABEL_OP1_103_47424_20111231_203241_inLine +BABEL_OP1_103_47424_20111231_203241_outLine +BABEL_OP1_103_47574_20120207_034724_inLine +BABEL_OP1_103_47574_20120207_034724_outLine +BABEL_OP1_103_48176_20120206_023101_inLine +BABEL_OP1_103_48176_20120206_023101_outLine +BABEL_OP1_103_48259_20120116_022438_inLine +BABEL_OP1_103_48259_20120116_022438_outLine +BABEL_OP1_103_49175_20120206_214803_inLine +BABEL_OP1_103_49175_20120206_214803_outLine +BABEL_OP1_103_49520_20120523_172707_inLine +BABEL_OP1_103_49520_20120523_172707_outLine +BABEL_OP1_103_49629_20120104_040004_inLine +BABEL_OP1_103_49629_20120104_040004_outLine +BABEL_OP1_103_49755_20120110_010410_inLine +BABEL_OP1_103_49755_20120110_010410_outLine +BABEL_OP1_103_49819_20120127_012212_inLine +BABEL_OP1_103_49819_20120127_012212_outLine +BABEL_OP1_103_50492_20120123_211938_inLine +BABEL_OP1_103_50492_20120123_211938_outLine +BABEL_OP1_103_50523_20120607_185125_inLine +BABEL_OP1_103_50523_20120607_185126_outLine +BABEL_OP1_103_50798_20120131_022954_inLine +BABEL_OP1_103_50798_20120131_022954_outLine +BABEL_OP1_103_51243_20120201_200604_inLine +BABEL_OP1_103_51243_20120201_200604_outLine +BABEL_OP1_103_52122_20120207_025756_inLine +BABEL_OP1_103_52122_20120207_025756_outLine +BABEL_OP1_103_52753_20120521_000301_inLine +BABEL_OP1_103_52753_20120521_001422_inLine +BABEL_OP1_103_53067_20120127_225851_inLine +BABEL_OP1_103_53067_20120127_225851_outLine +BABEL_OP1_103_53262_20120204_194912_inLine +BABEL_OP1_103_53262_20120204_194912_outLine +BABEL_OP1_103_53346_20120128_214441_inLine +BABEL_OP1_103_53346_20120128_214441_outLine +BABEL_OP1_103_53636_20120127_000358_inLine +BABEL_OP1_103_53636_20120127_000358_outLine +BABEL_OP1_103_54030_20111230_220440_inLine +BABEL_OP1_103_54030_20111230_220440_outLine +BABEL_OP1_103_54263_20120206_225348_inLine +BABEL_OP1_103_54263_20120206_225349_outLine +BABEL_OP1_103_54417_20120522_172155_inLine +BABEL_OP1_103_54417_20120522_172155_outLine +BABEL_OP1_103_54975_20120207_015749_inLine +BABEL_OP1_103_54975_20120207_015749_outLine +BABEL_OP1_103_54991_20120206_003607_inLine +BABEL_OP1_103_54991_20120206_003607_outLine +BABEL_OP1_103_55166_20120119_180058_inLine +BABEL_OP1_103_55166_20120119_180058_outLine +BABEL_OP1_103_55194_20120529_215243_inLine +BABEL_OP1_103_55194_20120529_215243_outLine +BABEL_OP1_103_56704_20120606_171759_inLine +BABEL_OP1_103_56704_20120606_171759_outLine +BABEL_OP1_103_57092_20111227_044400_inLine +BABEL_OP1_103_57092_20111227_044400_outLine +BABEL_OP1_103_57351_20120612_182248_inLine +BABEL_OP1_103_57351_20120612_182248_outLine +BABEL_OP1_103_58283_20111231_230840_inLine +BABEL_OP1_103_58283_20111231_230840_outLine +BABEL_OP1_103_58925_20120113_212456_inLine +BABEL_OP1_103_58925_20120113_212456_outLine +BABEL_OP1_103_58925_20120113_214350_inLine +BABEL_OP1_103_58925_20120113_214350_outLine +BABEL_OP1_103_59482_20120612_190437_inLine +BABEL_OP1_103_59482_20120612_190437_outLine +BABEL_OP1_103_60524_20120109_213755_inLine +BABEL_OP1_103_60524_20120109_213755_outLine +BABEL_OP1_103_60806_20120117_233630_inLine +BABEL_OP1_103_60806_20120117_233630_outLine +BABEL_OP1_103_61229_20120616_151341_inLine +BABEL_OP1_103_61229_20120616_151341_outLine +BABEL_OP1_103_61558_20120106_205412_inLine +BABEL_OP1_103_61558_20120106_205412_outLine +BABEL_OP1_103_61592_20120125_225752_inLine +BABEL_OP1_103_61592_20120125_225752_outLine +BABEL_OP1_103_61629_20120127_192849_inLine +BABEL_OP1_103_61629_20120127_192849_outLine +BABEL_OP1_103_61733_20120201_183457_inLine +BABEL_OP1_103_61733_20120201_183457_outLine +BABEL_OP1_103_62097_20120307_164325_inLine +BABEL_OP1_103_62097_20120307_164325_outLine +BABEL_OP1_103_62182_20111231_003944_inLine +BABEL_OP1_103_62182_20111231_003944_outLine +BABEL_OP1_103_62222_20120122_201756_inLine +BABEL_OP1_103_62222_20120122_201756_outLine +BABEL_OP1_103_62479_20120306_025702_inLine +BABEL_OP1_103_62479_20120306_025702_outLine +BABEL_OP1_103_62558_20120124_220850_inLine +BABEL_OP1_103_62558_20120124_220850_outLine +BABEL_OP1_103_62652_20120306_015948_inLine +BABEL_OP1_103_62652_20120306_015948_outLine +BABEL_OP1_103_62720_20120308_164432_inLine +BABEL_OP1_103_62720_20120308_164432_outLine +BABEL_OP1_103_62720_20120308_165706_inLine +BABEL_OP1_103_62720_20120308_165706_outLine +BABEL_OP1_103_62843_20120310_235523_inLine +BABEL_OP1_103_62843_20120310_235523_outLine +BABEL_OP1_103_63127_20120311_184714_inLine +BABEL_OP1_103_63127_20120311_184714_outLine +BABEL_OP1_103_63129_20120311_193438_inLine +BABEL_OP1_103_63129_20120311_193438_outLine +BABEL_OP1_103_63194_20120312_010359_inLine +BABEL_OP1_103_63194_20120312_010359_outLine +BABEL_OP1_103_63215_20120513_191621_inLine +BABEL_OP1_103_63215_20120513_191621_outLine +BABEL_OP1_103_63240_20120312_021342_inLine +BABEL_OP1_103_63240_20120312_021342_outLine +BABEL_OP1_103_63373_20120315_025205_inLine +BABEL_OP1_103_63373_20120315_025205_outLine +BABEL_OP1_103_63384_20120315_031012_inLine +BABEL_OP1_103_63384_20120315_031012_outLine +BABEL_OP1_103_63510_20120318_221426_inLine +BABEL_OP1_103_63510_20120318_221426_outLine +BABEL_OP1_103_63680_20120319_214759_inLine +BABEL_OP1_103_63680_20120319_214759_outLine +BABEL_OP1_103_63687_20120320_181655_inLine +BABEL_OP1_103_63687_20120320_181655_outLine +BABEL_OP1_103_63923_20120320_172933_inLine +BABEL_OP1_103_63923_20120320_172933_outLine +BABEL_OP1_103_63929_20120123_192325_inLine +BABEL_OP1_103_63929_20120123_192325_outLine +BABEL_OP1_103_64039_20120320_215418_inLine +BABEL_OP1_103_64039_20120320_215418_outLine +BABEL_OP1_103_64145_20120404_204905_inLine +BABEL_OP1_103_64145_20120404_204905_outLine +BABEL_OP1_103_64177_20120404_212051_inLine +BABEL_OP1_103_64177_20120404_212051_outLine +BABEL_OP1_103_64231_20120310_224637_inLine +BABEL_OP1_103_64231_20120310_224637_outLine +BABEL_OP1_103_64610_20120125_223001_inLine +BABEL_OP1_103_64610_20120125_223001_outLine +BABEL_OP1_103_65512_20111229_045507_inLine +BABEL_OP1_103_65512_20111229_045507_outLine +BABEL_OP1_103_65818_20120127_011907_inLine +BABEL_OP1_103_65818_20120127_011907_outLine +BABEL_OP1_103_65954_20120205_190321_inLine +BABEL_OP1_103_65954_20120205_190321_outLine +BABEL_OP1_103_65991_20120229_215906_inLine +BABEL_OP1_103_65991_20120229_215906_outLine +BABEL_OP1_103_66005_20120229_221845_inLine +BABEL_OP1_103_66005_20120229_221845_outLine +BABEL_OP1_103_66048_20120229_225251_inLine +BABEL_OP1_103_66048_20120229_225251_outLine +BABEL_OP1_103_66287_20120108_191621_inLine +BABEL_OP1_103_66287_20120108_191621_outLine +BABEL_OP1_103_66309_20120229_232503_inLine +BABEL_OP1_103_66309_20120229_232503_outLine +BABEL_OP1_103_66719_20120116_002436_inLine +BABEL_OP1_103_66719_20120116_002436_outLine +BABEL_OP1_103_66813_20120127_151237_inLine +BABEL_OP1_103_66813_20120127_151237_outLine +BABEL_OP1_103_67001_20120305_223711_inLine +BABEL_OP1_103_67001_20120305_223711_outLine +BABEL_OP1_103_67288_20120305_233501_inLine +BABEL_OP1_103_67288_20120305_233501_outLine +BABEL_OP1_103_67358_20120128_224934_inLine +BABEL_OP1_103_67358_20120128_224934_outLine +BABEL_OP1_103_67484_20120306_212801_inLine +BABEL_OP1_103_67484_20120306_212801_outLine +BABEL_OP1_103_67685_20120118_163939_inLine +BABEL_OP1_103_67685_20120118_163939_outLine +BABEL_OP1_103_67814_20120522_200114_inLine +BABEL_OP1_103_67814_20120522_200114_outLine +BABEL_OP1_103_67824_20120116_000148_inLine +BABEL_OP1_103_67824_20120116_000148_outLine +BABEL_OP1_103_68602_20120729_174819_inLine +BABEL_OP1_103_68602_20120729_174819_outLine +BABEL_OP1_103_68811_20120531_155031_inLine +BABEL_OP1_103_68811_20120531_155031_outLine +BABEL_OP1_103_69969_20120309_020612_inLine +BABEL_OP1_103_69969_20120309_020612_outLine +BABEL_OP1_103_69990_20120305_153850_inLine +BABEL_OP1_103_69990_20120305_153850_outLine +BABEL_OP1_103_70200_20120311_000406_inLine +BABEL_OP1_103_70200_20120311_000406_outLine +BABEL_OP1_103_70476_20120117_202957_inLine +BABEL_OP1_103_70476_20120117_202957_outLine +BABEL_OP1_103_70476_20120117_204242_inLine +BABEL_OP1_103_70476_20120117_204242_outLine +BABEL_OP1_103_70651_20120131_034337_inLine +BABEL_OP1_103_70651_20120131_034337_outLine +BABEL_OP1_103_70762_20111230_015835_inLine +BABEL_OP1_103_70762_20111230_015835_outLine +BABEL_OP1_103_70858_20120201_191031_inLine +BABEL_OP1_103_70858_20120201_191031_outLine +BABEL_OP1_103_70897_20120118_020506_inLine +BABEL_OP1_103_70897_20120118_020506_outLine +BABEL_OP1_103_70919_20120202_170934_inLine +BABEL_OP1_103_70919_20120202_170934_outLine +BABEL_OP1_103_71215_20120207_001204_inLine +BABEL_OP1_103_71215_20120207_001204_outLine +BABEL_OP1_103_71293_20120101_212224_inLine +BABEL_OP1_103_71293_20120101_212224_outLine +BABEL_OP1_103_71450_20120514_181620_inLine +BABEL_OP1_103_71450_20120514_181621_outLine +BABEL_OP1_103_71666_20120514_223534_inLine +BABEL_OP1_103_71666_20120514_223534_outLine +BABEL_OP1_103_71691_20120109_034006_inLine +BABEL_OP1_103_71691_20120109_034007_outLine +BABEL_OP1_103_72179_20120511_023300_inLine +BABEL_OP1_103_72179_20120511_023300_outLine +BABEL_OP1_103_72709_20120204_231928_inLine +BABEL_OP1_103_72709_20120204_231928_outLine +BABEL_OP1_103_72714_20120126_001354_inLine +BABEL_OP1_103_72714_20120126_001354_outLine +BABEL_OP1_103_73264_20111228_184038_inLine +BABEL_OP1_103_73264_20111228_184038_outLine +BABEL_OP1_103_73881_20120120_041629_inLine +BABEL_OP1_103_73881_20120120_041629_outLine +BABEL_OP1_103_74188_20120522_172823_inLine +BABEL_OP1_103_74188_20120522_172823_outLine +BABEL_OP1_103_74334_20120102_033902_inLine +BABEL_OP1_103_74334_20120102_033902_outLine +BABEL_OP1_103_75797_20120125_192735_inLine +BABEL_OP1_103_75797_20120125_192735_outLine +BABEL_OP1_103_76276_20120114_191208_inLine +BABEL_OP1_103_76276_20120114_191208_outLine +BABEL_OP1_103_77097_20120109_024625_inLine +BABEL_OP1_103_77097_20120109_024625_outLine +BABEL_OP1_103_78722_20120126_234318_inLine +BABEL_OP1_103_78722_20120126_234318_outLine +BABEL_OP1_103_79127_20120205_215208_inLine +BABEL_OP1_103_79127_20120205_215208_outLine +BABEL_OP1_103_79788_20120201_222512_inLine +BABEL_OP1_103_79788_20120201_222512_outLine +BABEL_OP1_103_79803_20120730_020433_inLine +BABEL_OP1_103_79803_20120730_020433_outLine +BABEL_OP1_103_79857_20120111_205043_inLine +BABEL_OP1_103_79857_20120111_205043_outLine +BABEL_OP1_103_79901_20120202_193650_inLine +BABEL_OP1_103_79901_20120202_194746_inLine +BABEL_OP1_103_80118_20120126_010553_inLine +BABEL_OP1_103_80118_20120126_010553_outLine +BABEL_OP1_103_80183_20120513_182754_inLine +BABEL_OP1_103_80183_20120513_182754_outLine +BABEL_OP1_103_80313_20120106_200706_inLine +BABEL_OP1_103_80313_20120106_200706_outLine +BABEL_OP1_103_80943_20120125_185437_inLine +BABEL_OP1_103_80943_20120125_185437_outLine +BABEL_OP1_103_81800_20120531_180959_inLine +BABEL_OP1_103_81800_20120531_180959_outLine +BABEL_OP1_103_81800_20120531_182855_inLine +BABEL_OP1_103_81800_20120531_182855_outLine +BABEL_OP1_103_82094_20120522_225233_inLine +BABEL_OP1_103_82094_20120522_225233_outLine +BABEL_OP1_103_82135_20120117_213149_inLine +BABEL_OP1_103_82135_20120117_213149_outLine +BABEL_OP1_103_83819_20120125_193543_inLine +BABEL_OP1_103_83819_20120125_193543_outLine +BABEL_OP1_103_83835_20111231_193822_inLine +BABEL_OP1_103_83835_20111231_193822_outLine +BABEL_OP1_103_84654_20120515_201204_inLine +BABEL_OP1_103_84654_20120515_201204_outLine +BABEL_OP1_103_84854_20120205_001920_inLine +BABEL_OP1_103_84854_20120205_001920_outLine +BABEL_OP1_103_85457_20120521_204532_inLine +BABEL_OP1_103_85457_20120521_204532_outLine +BABEL_OP1_103_85577_20120729_215558_inLine +BABEL_OP1_103_85577_20120729_215558_outLine +BABEL_OP1_103_85730_20120116_233350_inLine +BABEL_OP1_103_85730_20120116_233350_outLine +BABEL_OP1_103_85764_20120129_192217_inLine +BABEL_OP1_103_85764_20120129_192217_outLine +BABEL_OP1_103_86537_20120511_195620_inLine +BABEL_OP1_103_86537_20120511_195620_outLine +BABEL_OP1_103_86614_20120521_220136_inLine +BABEL_OP1_103_86614_20120521_220136_outLine +BABEL_OP1_103_86680_20120105_191615_inLine +BABEL_OP1_103_86680_20120105_191615_outLine +BABEL_OP1_103_87453_20120515_170718_inLine +BABEL_OP1_103_87453_20120515_170718_outLine +BABEL_OP1_103_87677_20120121_224149_inLine +BABEL_OP1_103_87677_20120121_224149_outLine +BABEL_OP1_103_88677_20120112_032502_inLine +BABEL_OP1_103_88677_20120112_032502_outLine +BABEL_OP1_103_89464_20120205_204528_inLine +BABEL_OP1_103_89464_20120205_204528_outLine +BABEL_OP1_103_89702_20120109_021228_inLine +BABEL_OP1_103_89702_20120109_021228_outLine +BABEL_OP1_103_90641_20120102_212610_inLine +BABEL_OP1_103_90641_20120102_212610_outLine +BABEL_OP1_103_90882_20120530_230837_inLine +BABEL_OP1_103_90882_20120530_230837_outLine +BABEL_OP1_103_91161_20111229_202627_inLine +BABEL_OP1_103_91161_20111229_202627_outLine +BABEL_OP1_103_91372_20120115_023342_inLine +BABEL_OP1_103_91372_20120115_023342_outLine +BABEL_OP1_103_92722_20120512_132612_inLine +BABEL_OP1_103_92722_20120512_132612_outLine +BABEL_OP1_103_92793_20111229_200332_inLine +BABEL_OP1_103_92793_20111229_200332_outLine +BABEL_OP1_103_92910_20120205_195736_inLine +BABEL_OP1_103_92910_20120205_195736_outLine +BABEL_OP1_103_93026_20111228_235326_inLine +BABEL_OP1_103_93026_20111228_235326_outLine +BABEL_OP1_103_93358_20120107_025421_inLine +BABEL_OP1_103_93358_20120107_025421_outLine +BABEL_OP1_103_93907_20111228_051458_inLine +BABEL_OP1_103_93907_20111228_051458_outLine +BABEL_OP1_103_94793_20120102_034406_inLine +BABEL_OP1_103_94793_20120102_034406_outLine +BABEL_OP1_103_95349_20111229_201011_inLine +BABEL_OP1_103_95349_20111229_201011_outLine +BABEL_OP1_103_95349_20111229_225436_inLine +BABEL_OP1_103_95349_20111229_225436_outLine +BABEL_OP1_103_96537_20120729_165831_inLine +BABEL_OP1_103_96537_20120729_165831_outLine +BABEL_OP1_103_96690_20120131_213344_inLine +BABEL_OP1_103_96690_20120131_213344_outLine +BABEL_OP1_103_97679_20111229_191138_inLine +BABEL_OP1_103_97679_20111229_191138_outLine +BABEL_OP1_103_97971_20120111_020458_inLine +BABEL_OP1_103_97971_20120111_020459_outLine +BABEL_OP1_103_98331_20120131_213958_inLine +BABEL_OP1_103_98331_20120131_213958_outLine +BABEL_OP1_103_98446_20120101_215857_inLine +BABEL_OP1_103_98446_20120101_215857_outLine +BABEL_OP1_103_99093_20120514_161939_inLine +BABEL_OP1_103_99093_20120514_161939_outLine +BABEL_OP1_103_99510_20120515_175659_inLine +BABEL_OP1_103_99510_20120515_175659_outLine diff --git a/egs/babel/s5d/conf/lists/103-bengali/train.untranscribed.list b/egs/babel/s5d/conf/lists/103-bengali/train.untranscribed.list new file mode 100644 index 00000000000..5a1273fe091 --- /dev/null +++ b/egs/babel/s5d/conf/lists/103-bengali/train.untranscribed.list @@ -0,0 +1,255 @@ +BABEL_OP1_103_10911_20120521_172505_inLine +BABEL_OP1_103_10911_20120521_172505_outLine +BABEL_OP1_103_10974_20121003_140938_inLine +BABEL_OP1_103_10974_20121003_140938_outLine +BABEL_OP1_103_11386_20121003_121747_inLine +BABEL_OP1_103_11386_20121003_121747_outLine +BABEL_OP1_103_12092_20121214_014753_inLine +BABEL_OP1_103_12092_20121214_014753_outLine +BABEL_OP1_103_13064_20120812_170202_inLine +BABEL_OP1_103_13064_20120812_170202_outLine +BABEL_OP1_103_13834_20121003_194231_inLine +BABEL_OP1_103_13834_20121003_194231_outLine +BABEL_OP1_103_14631_20120927_192723_inLine +BABEL_OP1_103_14631_20120927_192723_outLine +BABEL_OP1_103_15440_20120516_145148_inLine +BABEL_OP1_103_15440_20120516_145148_outLine +BABEL_OP1_103_17813_20120120_005856_inLine +BABEL_OP1_103_17813_20120120_005856_outLine +BABEL_OP1_103_17813_20120124_014523_inLine +BABEL_OP1_103_17813_20120124_014523_outLine +BABEL_OP1_103_18084_20120530_155334_inLine +BABEL_OP1_103_18331_20121007_213032_inLine +BABEL_OP1_103_18331_20121007_213032_outLine +BABEL_OP1_103_21083_20120514_145620_inLine +BABEL_OP1_103_21083_20120514_145620_outLine +BABEL_OP1_103_21352_20120930_143535_inLine +BABEL_OP1_103_21352_20120930_143535_outLine +BABEL_OP1_103_23378_20120531_010537_inLine +BABEL_OP1_103_23378_20120531_010537_outLine +BABEL_OP1_103_24303_20120809_003638_inLine +BABEL_OP1_103_24303_20120809_003638_outLine +BABEL_OP1_103_26536_20120523_220246_inLine +BABEL_OP1_103_26536_20120523_220246_outLine +BABEL_OP1_103_27187_20120929_030115_inLine +BABEL_OP1_103_27187_20120929_030115_outLine +BABEL_OP1_103_27356_20120618_212359_inLine +BABEL_OP1_103_27356_20120618_212359_outLine +BABEL_OP1_103_27378_20121005_211922_inLine +BABEL_OP1_103_27378_20121005_211922_outLine +BABEL_OP1_103_27679_20120528_215730_inLine +BABEL_OP1_103_27679_20120528_215730_outLine +BABEL_OP1_103_27891_20121006_234744_inLine +BABEL_OP1_103_27891_20121006_234744_outLine +BABEL_OP1_103_29103_20120516_171814_inLine +BABEL_OP1_103_29103_20120516_171814_outLine +BABEL_OP1_103_29690_20120930_135813_inLine +BABEL_OP1_103_29690_20120930_135813_outLine +BABEL_OP1_103_29911_20120607_231532_inLine +BABEL_OP1_103_30638_20120928_141651_inLine +BABEL_OP1_103_30638_20120928_141651_outLine +BABEL_OP1_103_30817_20120806_163759_inLine +BABEL_OP1_103_30817_20120806_163759_outLine +BABEL_OP1_103_31485_20120609_184729_inLine +BABEL_OP1_103_31485_20120609_184729_outLine +BABEL_OP1_103_33279_20120928_011938_inLine +BABEL_OP1_103_33279_20120928_011939_outLine +BABEL_OP1_103_37731_20120526_213340_inLine +BABEL_OP1_103_37731_20120526_213340_outLine +BABEL_OP1_103_39215_20121002_013230_inLine +BABEL_OP1_103_39215_20121002_013230_outLine +BABEL_OP1_103_39783_20121002_221911_inLine +BABEL_OP1_103_39783_20121002_221911_outLine +BABEL_OP1_103_42098_20121007_204200_inLine +BABEL_OP1_103_42098_20121007_204200_outLine +BABEL_OP1_103_44267_20121005_232936_inLine +BABEL_OP1_103_44267_20121005_232936_outLine +BABEL_OP1_103_44419_20121225_001833_inLine +BABEL_OP1_103_44419_20121225_001833_outLine +BABEL_OP1_103_44747_20121002_164108_inLine +BABEL_OP1_103_44747_20121002_164108_outLine +BABEL_OP1_103_46947_20120522_173213_inLine +BABEL_OP1_103_46947_20120522_173213_outLine +BABEL_OP1_103_47049_20120522_182020_inLine +BABEL_OP1_103_47049_20120522_182020_outLine +BABEL_OP1_103_47251_20120522_194654_inLine +BABEL_OP1_103_47251_20120522_194654_outLine +BABEL_OP1_103_48313_20120522_202903_inLine +BABEL_OP1_103_48313_20120522_202903_outLine +BABEL_OP1_103_48416_20120606_165040_inLine +BABEL_OP1_103_48416_20120606_165040_outLine +BABEL_OP1_103_48795_20120612_193506_inLine +BABEL_OP1_103_48795_20120612_193506_outLine +BABEL_OP1_103_49201_20120930_161546_inLine +BABEL_OP1_103_49201_20120930_161546_outLine +BABEL_OP1_103_49208_20120522_233157_inLine +BABEL_OP1_103_49208_20120522_233157_outLine +BABEL_OP1_103_49443_20120928_004643_inLine +BABEL_OP1_103_49443_20120928_004643_outLine +BABEL_OP1_103_49545_20120530_163034_inLine +BABEL_OP1_103_49545_20120530_163034_outLine +BABEL_OP1_103_49548_20120523_162625_inLine +BABEL_OP1_103_49548_20120523_162625_outLine +BABEL_OP1_103_49885_20121002_000523_inLine +BABEL_OP1_103_49885_20121002_000523_outLine +BABEL_OP1_103_51973_20120102_032210_inLine +BABEL_OP1_103_51973_20120102_032210_outLine +BABEL_OP1_103_51973_20120102_033759_inLine +BABEL_OP1_103_51973_20120102_033759_outLine +BABEL_OP1_103_53659_20120802_001534_inLine +BABEL_OP1_103_53659_20120802_001534_outLine +BABEL_OP1_103_54393_20120928_235549_inLine +BABEL_OP1_103_54393_20120928_235549_outLine +BABEL_OP1_103_55382_20120629_230445_inLine +BABEL_OP1_103_55382_20120629_230445_outLine +BABEL_OP1_103_56283_20121007_180739_inLine +BABEL_OP1_103_56283_20121007_180739_outLine +BABEL_OP1_103_57584_20120725_224449_inLine +BABEL_OP1_103_57584_20120725_224449_outLine +BABEL_OP1_103_58298_20120528_222416_inLine +BABEL_OP1_103_58298_20120528_222416_outLine +BABEL_OP1_103_59488_20120929_145848_inLine +BABEL_OP1_103_59488_20120929_145848_outLine +BABEL_OP1_103_59799_20121229_171348_inLine +BABEL_OP1_103_59799_20121229_171348_outLine +BABEL_OP1_103_60055_20120819_171855_inLine +BABEL_OP1_103_60055_20120819_171855_outLine +BABEL_OP1_103_60572_20120530_175437_inLine +BABEL_OP1_103_60572_20120530_175437_outLine +BABEL_OP1_103_60730_20120514_223932_inLine +BABEL_OP1_103_60730_20120514_223932_outLine +BABEL_OP1_103_61635_20120928_234720_inLine +BABEL_OP1_103_61635_20120928_234720_outLine +BABEL_OP1_103_61655_20120809_233557_inLine +BABEL_OP1_103_61655_20120809_233557_outLine +BABEL_OP1_103_62109_20120512_223919_inLine +BABEL_OP1_103_62109_20120512_223919_outLine +BABEL_OP1_103_63043_20121007_231348_inLine +BABEL_OP1_103_63043_20121007_231348_outLine +BABEL_OP1_103_63043_20121007_232702_inLine +BABEL_OP1_103_63043_20121007_232702_outLine +BABEL_OP1_103_63390_20120513_174652_inLine +BABEL_OP1_103_63390_20120513_174652_outLine +BABEL_OP1_103_63603_20121011_004426_inLine +BABEL_OP1_103_63603_20121011_004426_outLine +BABEL_OP1_103_63842_20121005_162812_inLine +BABEL_OP1_103_63842_20121005_162812_outLine +BABEL_OP1_103_63996_20120516_162255_inLine +BABEL_OP1_103_63996_20120516_162255_outLine +BABEL_OP1_103_64695_20120731_171306_inLine +BABEL_OP1_103_64695_20120731_171306_outLine +BABEL_OP1_103_66842_20120516_153359_inLine +BABEL_OP1_103_66842_20120516_153400_outLine +BABEL_OP1_103_66879_20120524_201608_inLine +BABEL_OP1_103_66879_20120524_201608_outLine +BABEL_OP1_103_68102_20120601_163256_inLine +BABEL_OP1_103_68102_20120601_163256_outLine +BABEL_OP1_103_68189_20120524_212606_inLine +BABEL_OP1_103_68189_20120524_212606_outLine +BABEL_OP1_103_68538_20120608_172925_inLine +BABEL_OP1_103_68538_20120608_172925_outLine +BABEL_OP1_103_68538_20120608_174508_inLine +BABEL_OP1_103_68538_20120608_174508_outLine +BABEL_OP1_103_71224_20121005_221009_inLine +BABEL_OP1_103_71224_20121005_221009_outLine +BABEL_OP1_103_71996_20120522_225024_inLine +BABEL_OP1_103_71996_20120522_225024_outLine +BABEL_OP1_103_72088_20121003_002504_inLine +BABEL_OP1_103_72088_20121003_002504_outLine +BABEL_OP1_103_75345_20121001_203932_inLine +BABEL_OP1_103_75345_20121001_203932_outLine +BABEL_OP1_103_76149_20121004_032258_inLine +BABEL_OP1_103_76149_20121004_032258_outLine +BABEL_OP1_103_76372_20120514_235628_inLine +BABEL_OP1_103_76372_20120514_235628_outLine +BABEL_OP1_103_76832_20120528_201751_inLine +BABEL_OP1_103_76832_20120528_201751_outLine +BABEL_OP1_103_77294_20120616_144707_inLine +BABEL_OP1_103_77294_20120616_144707_outLine +BABEL_OP1_103_78792_20120522_191207_inLine +BABEL_OP1_103_78792_20120522_191207_outLine +BABEL_OP1_103_78938_20120512_201016_inLine +BABEL_OP1_103_78938_20120512_201016_outLine +BABEL_OP1_103_79006_20120521_012957_outLine +BABEL_OP1_103_79387_20120522_211025_inLine +BABEL_OP1_103_79387_20120522_211025_outLine +BABEL_OP1_103_79989_20120928_013138_inLine +BABEL_OP1_103_79989_20120928_013138_outLine +BABEL_OP1_103_80679_20120930_163521_inLine +BABEL_OP1_103_80679_20120930_163521_outLine +BABEL_OP1_103_81492_20120206_014433_inLine +BABEL_OP1_103_81492_20120206_014433_outLine +BABEL_OP1_103_81492_20120206_020249_inLine +BABEL_OP1_103_81492_20120206_020249_outLine +BABEL_OP1_103_82181_20120929_042042_inLine +BABEL_OP1_103_82181_20120929_042042_outLine +BABEL_OP1_103_84111_20120930_144529_inLine +BABEL_OP1_103_84111_20120930_144529_outLine +BABEL_OP1_103_84946_20120619_234231_inLine +BABEL_OP1_103_84946_20120619_234231_outLine +BABEL_OP1_103_85272_20120531_172145_inLine +BABEL_OP1_103_85272_20120531_172145_outLine +BABEL_OP1_103_85388_20120512_131608_inLine +BABEL_OP1_103_85388_20120512_131608_outLine +BABEL_OP1_103_85443_20120512_163256_inLine +BABEL_OP1_103_85443_20120512_163256_outLine +BABEL_OP1_103_85443_20120512_164633_inLine +BABEL_OP1_103_85443_20120512_164633_outLine +BABEL_OP1_103_86067_20121008_031300_inLine +BABEL_OP1_103_86067_20121008_031300_outLine +BABEL_OP1_103_86121_20121011_040043_inLine +BABEL_OP1_103_86121_20121011_040043_outLine +BABEL_OP1_103_87741_20121231_225715_inLine +BABEL_OP1_103_87741_20121231_225715_outLine +BABEL_OP1_103_89091_20130104_015514_inLine +BABEL_OP1_103_89091_20130104_015514_outLine +BABEL_OP1_103_89091_20130104_032531_inLine +BABEL_OP1_103_89091_20130104_032531_outLine +BABEL_OP1_103_89190_20130106_003028_inLine +BABEL_OP1_103_89190_20130106_003028_outLine +BABEL_OP1_103_90326_20120522_175819_inLine +BABEL_OP1_103_90326_20120522_175819_outLine +BABEL_OP1_103_90672_20121225_182001_inLine +BABEL_OP1_103_90672_20121225_182001_outLine +BABEL_OP1_103_91105_20120516_141445_inLine +BABEL_OP1_103_91105_20120516_141445_outLine +BABEL_OP1_103_91670_20121225_195825_inLine +BABEL_OP1_103_91670_20121225_195825_outLine +BABEL_OP1_103_91723_20121226_011745_inLine +BABEL_OP1_103_91723_20121226_011745_outLine +BABEL_OP1_103_91733_20121227_001726_inLine +BABEL_OP1_103_91733_20121227_001726_outLine +BABEL_OP1_103_91744_20121227_005513_inLine +BABEL_OP1_103_91744_20121227_005513_outLine +BABEL_OP1_103_91815_20121230_215316_inLine +BABEL_OP1_103_91815_20121230_215316_outLine +BABEL_OP1_103_91838_20121230_210441_inLine +BABEL_OP1_103_91838_20121230_210441_outLine +BABEL_OP1_103_91957_20130103_192518_inLine +BABEL_OP1_103_91957_20130103_192518_outLine +BABEL_OP1_103_92027_20130104_160934_inLine +BABEL_OP1_103_92027_20130104_160934_outLine +BABEL_OP1_103_92083_20130105_170057_inLine +BABEL_OP1_103_92083_20130105_170057_outLine +BABEL_OP1_103_92192_20130105_180415_inLine +BABEL_OP1_103_92192_20130105_180415_outLine +BABEL_OP1_103_92277_20130105_173147_inLine +BABEL_OP1_103_92277_20130105_173147_outLine +BABEL_OP1_103_93600_20120812_151245_inLine +BABEL_OP1_103_93600_20120812_151245_outLine +BABEL_OP1_103_94057_20130101_231512_inLine +BABEL_OP1_103_94057_20130101_231513_outLine +BABEL_OP1_103_94065_20130105_015217_inLine +BABEL_OP1_103_94065_20130105_015217_outLine +BABEL_OP1_103_94069_20130101_234436_inLine +BABEL_OP1_103_94069_20130101_234436_outLine +BABEL_OP1_103_96844_20121224_193654_inLine +BABEL_OP1_103_96844_20121224_193654_outLine +BABEL_OP1_103_96868_20120528_161710_inLine +BABEL_OP1_103_96868_20120528_161710_outLine +BABEL_OP1_103_97289_20120806_174807_inLine +BABEL_OP1_103_97289_20120806_174807_outLine +BABEL_OP1_103_98325_20120805_170336_inLine +BABEL_OP1_103_98325_20120805_170336_outLine +BABEL_OP1_103_99446_20120523_164823_inLine +BABEL_OP1_103_99446_20120523_164823_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/dev.list b/egs/babel/s5d/conf/lists/104-pashto/dev.list new file mode 100644 index 00000000000..7624d5decb2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/dev.list @@ -0,0 +1,143 @@ +BABEL_BP_104_04221_20120310_194031_inLine +BABEL_BP_104_04221_20120310_194031_outLine +BABEL_BP_104_08861_20120226_050237_inLine +BABEL_BP_104_08861_20120226_050237_outLine +BABEL_BP_104_10712_20120205_004135_inLine +BABEL_BP_104_10712_20120205_004135_outLine +BABEL_BP_104_10712_20120205_005332_inLine +BABEL_BP_104_10712_20120205_005332_outLine +BABEL_BP_104_13196_20120130_151929_inLine +BABEL_BP_104_13196_20120130_151929_outLine +BABEL_BP_104_14002_20120218_235147_inLine +BABEL_BP_104_14002_20120218_235147_outLine +BABEL_BP_104_15268_20120110_154803_inLine +BABEL_BP_104_15268_20120110_154803_outLine +BABEL_BP_104_15268_20120110_154803_outLine +BABEL_BP_104_16210_20111223_035614_inLine +BABEL_BP_104_16210_20111223_041103_inLine +BABEL_BP_104_17749_20120314_233247_inLine +BABEL_BP_104_17749_20120314_233247_outLine +BABEL_BP_104_21113_20120319_010218_inLine +BABEL_BP_104_22338_20120128_204829_inLine +BABEL_BP_104_22713_20120205_170953_inLine +BABEL_BP_104_23560_20120124_200340_inLine +BABEL_BP_104_28102_20120326_164501_inLine +BABEL_BP_104_28102_20120326_164501_outLine +BABEL_BP_104_28102_20120326_171523_inLine +BABEL_BP_104_28102_20120326_171523_outLine +BABEL_BP_104_29368_20120321_233801_inLine +BABEL_BP_104_29368_20120321_233802_outLine +BABEL_BP_104_29368_20120321_235133_inLine +BABEL_BP_104_29368_20120321_235133_outLine +BABEL_BP_104_33955_20120218_033644_inLine +BABEL_BP_104_33955_20120218_033644_outLine +BABEL_BP_104_34541_20120321_005610_inLine +BABEL_BP_104_34541_20120321_005610_outLine +BABEL_BP_104_35756_20120311_223543_inLine +BABEL_BP_104_35756_20120311_223543_outLine +BABEL_BP_104_36867_20120208_233318_inLine +BABEL_BP_104_36867_20120208_233318_outLine +BABEL_BP_104_37314_20120208_184924_inLine +BABEL_BP_104_37314_20120208_184924_outLine +BABEL_BP_104_39030_20120119_225755_outLine +BABEL_BP_104_39279_20120227_144602_inLine +BABEL_BP_104_39279_20120227_144602_outLine +BABEL_BP_104_40410_20120320_225202_inLine +BABEL_BP_104_40410_20120320_225202_inLine +BABEL_BP_104_40410_20120320_225202_outLine +BABEL_BP_104_40475_20120205_221544_inLine +BABEL_BP_104_40956_20120310_235812_inLine +BABEL_BP_104_40956_20120310_235812_outLine +BABEL_BP_104_43170_20120205_035143_inLine +BABEL_BP_104_44838_20120324_232540_inLine +BABEL_BP_104_44838_20120324_232540_outLine +BABEL_BP_104_53864_20120203_213736_outLine +BABEL_BP_104_54222_20120309_160035_inLine +BABEL_BP_104_54222_20120309_160035_outLine +BABEL_BP_104_56005_20120113_205235_outLine +BABEL_BP_104_56226_20120205_235429_outLine +BABEL_BP_104_60524_20120319_160420_inLine +BABEL_BP_104_60524_20120319_160420_outLine +BABEL_BP_104_60524_20120319_161719_inLine +BABEL_BP_104_60524_20120319_161719_outLine +BABEL_BP_104_61592_20120126_181735_inLine +BABEL_BP_104_61592_20120126_181735_outLine +BABEL_BP_104_61616_20120108_214701_inLine +BABEL_BP_104_61616_20120108_214701_outLine +BABEL_BP_104_62984_20120219_053758_inLine +BABEL_BP_104_62984_20120219_053758_outLine +BABEL_BP_104_64610_20120302_153346_inLine +BABEL_BP_104_64610_20120302_153346_outLine +BABEL_BP_104_66017_20120215_233406_inLine +BABEL_BP_104_66017_20120215_233406_outLine +BABEL_BP_104_70476_20120309_130456_inLine +BABEL_BP_104_70476_20120309_130456_outLine +BABEL_BP_104_72176_20120213_194841_inLine +BABEL_BP_104_72176_20120213_194841_outLine +BABEL_BP_104_73728_20111222_192324_inLine +BABEL_BP_104_73728_20111222_192324_outLine +BABEL_BP_104_74678_20120314_021415_inLine +BABEL_BP_104_74678_20120314_021415_outLine +BABEL_BP_104_74824_20120218_204154_inLine +BABEL_BP_104_74824_20120218_204154_outLine +BABEL_BP_104_75839_20120208_035003_inLine +BABEL_BP_104_75839_20120208_035003_outLine +BABEL_BP_104_76654_20111220_202441_inLine +BABEL_BP_104_76812_20120320_180439_inLine +BABEL_BP_104_76812_20120320_180439_outLine +BABEL_BP_104_76812_20120320_181229_inLine +BABEL_BP_104_76812_20120320_181229_outLine +BABEL_BP_104_78141_20120317_034317_inLine +BABEL_BP_104_78141_20120317_034317_outLine +BABEL_BP_104_81274_20120207_202722_inLine +BABEL_BP_104_81510_20120217_194417_inLine +BABEL_BP_104_81510_20120217_194417_inLine +BABEL_BP_104_82160_20120126_022907_inLine +BABEL_BP_104_82160_20120126_022907_inLine +BABEL_BP_104_82160_20120126_022907_outLine +BABEL_BP_104_83980_20120205_184505_inLine +BABEL_BP_104_83980_20120205_184505_inLine +BABEL_BP_104_83992_20120219_185819_inLine +BABEL_BP_104_83992_20120219_185819_outLine +BABEL_BP_104_84041_20111222_044010_inLine +BABEL_BP_104_84041_20111222_044010_outLine +BABEL_BP_104_84274_20120216_161121_inLine +BABEL_BP_104_84274_20120216_161121_outLine +BABEL_BP_104_85078_20120320_212106_inLine +BABEL_BP_104_85078_20120320_212106_outLine +BABEL_BP_104_85424_20120216_025024_inLine +BABEL_BP_104_85424_20120216_025024_outLine +BABEL_BP_104_85455_20120310_210107_inLine +BABEL_BP_104_85455_20120310_210107_outLine +BABEL_BP_104_85730_20120128_041419_inLine +BABEL_BP_104_85730_20120128_041419_outLine +BABEL_BP_104_85730_20120128_041419_outLine +BABEL_BP_104_86614_20111222_040726_inLine +BABEL_BP_104_86614_20111222_040726_outLine +BABEL_BP_104_86680_20120309_180429_inLine +BABEL_BP_104_86680_20120309_180429_outLine +BABEL_BP_104_86680_20120309_181746_inLine +BABEL_BP_104_86680_20120309_181746_outLine +BABEL_BP_104_86680_20120309_181746_outLine +BABEL_BP_104_87723_20120206_183706_inLine +BABEL_BP_104_87723_20120206_183706_outLine +BABEL_BP_104_88598_20120216_014512_inLine +BABEL_BP_104_88598_20120216_014512_outLine +BABEL_BP_104_88598_20120216_022402_inLine +BABEL_BP_104_88598_20120216_022402_outLine +BABEL_BP_104_89308_20120131_214111_inLine +BABEL_BP_104_89308_20120131_214111_outLine +BABEL_BP_104_89382_20120207_192751_inLine +BABEL_BP_104_89382_20120207_192751_outLine +BABEL_BP_104_90003_20120127_173210_inLine +BABEL_BP_104_91275_20120219_055247_outLine +BABEL_BP_104_91372_20120309_201355_inLine +BABEL_BP_104_91372_20120309_201355_outLine +BABEL_BP_104_93026_20120121_010508_inLine +BABEL_BP_104_93026_20120121_010508_outLine +BABEL_BP_104_94682_20120126_173632_outLine +BABEL_BP_104_96606_20120308_154908_inLine +BABEL_BP_104_96606_20120308_154908_outLine +BABEL_BP_104_97950_20120129_035347_inLine +BABEL_BP_104_99407_20120217_190330_inLine +BABEL_BP_104_99407_20120217_190330_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/eval.list b/egs/babel/s5d/conf/lists/104-pashto/eval.list new file mode 100644 index 00000000000..f3b4a90b6e6 --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/eval.list @@ -0,0 +1,198 @@ +BABEL_BP_104_01275_20120316_230646_inLine +BABEL_BP_104_01275_20120316_230646_outLine +BABEL_BP_104_01275_20120316_231711_inLine +BABEL_BP_104_01275_20120316_231711_outLine +BABEL_BP_104_03053_20120129_025619_inLine +BABEL_BP_104_03053_20120129_025619_outLine +BABEL_BP_104_03053_20120129_030931_inLine +BABEL_BP_104_03053_20120129_030931_outLine +BABEL_BP_104_10348_20120313_005811_inLine +BABEL_BP_104_10348_20120313_005811_outLine +BABEL_BP_104_10494_20120219_173118_inLine +BABEL_BP_104_10494_20120219_173118_outLine +BABEL_BP_104_11386_20120501_221559_inLine +BABEL_BP_104_11386_20120501_221559_outLine +BABEL_BP_104_11894_20120120_154648_inLine +BABEL_BP_104_11894_20120120_154648_outLine +BABEL_BP_104_12719_20120309_214313_inLine +BABEL_BP_104_12722_20120409_210032_inLine +BABEL_BP_104_12722_20120409_210033_outLine +BABEL_BP_104_15269_20120508_173455_inLine +BABEL_BP_104_15997_20120212_170900_inLine +BABEL_BP_104_15997_20120212_170900_outLine +BABEL_BP_104_16352_20120206_004350_inLine +BABEL_BP_104_16352_20120206_004350_outLine +BABEL_BP_104_16629_20120501_184857_inLine +BABEL_BP_104_17218_20120206_041300_inLine +BABEL_BP_104_17218_20120206_041301_outLine +BABEL_BP_104_18358_20120525_224141_inLine +BABEL_BP_104_19112_20120316_145312_inLine +BABEL_BP_104_19112_20120316_145312_outLine +BABEL_BP_104_19686_20120518_175511_inLine +BABEL_BP_104_19760_20120501_194354_inLine +BABEL_BP_104_20157_20120122_171556_inLine +BABEL_BP_104_20157_20120122_171556_outLine +BABEL_BP_104_21180_20120216_024537_inLine +BABEL_BP_104_21180_20120216_024537_outLine +BABEL_BP_104_22422_20120213_143323_inLine +BABEL_BP_104_22422_20120213_143323_outLine +BABEL_BP_104_24378_20120322_201121_inLine +BABEL_BP_104_24378_20120322_201121_outLine +BABEL_BP_104_25365_20120525_172149_inLine +BABEL_BP_104_27891_20120331_012612_inLine +BABEL_BP_104_27891_20120331_012612_outLine +BABEL_BP_104_28573_20120322_135901_inLine +BABEL_BP_104_28573_20120322_135901_outLine +BABEL_BP_104_29083_20120128_011719_inLine +BABEL_BP_104_29083_20120128_011719_outLine +BABEL_BP_104_30978_20120309_013805_inLine +BABEL_BP_104_30978_20120309_013805_outLine +BABEL_BP_104_32245_20120115_200120_outLine +BABEL_BP_104_32669_20120223_164026_inLine +BABEL_BP_104_32669_20120223_164026_outLine +BABEL_BP_104_32708_20120518_190441_inLine +BABEL_BP_104_33429_20120303_015431_inLine +BABEL_BP_104_33429_20120303_015431_outLine +BABEL_BP_104_34137_20120219_183642_inLine +BABEL_BP_104_34137_20120219_183642_outLine +BABEL_BP_104_35317_20120208_173659_inLine +BABEL_BP_104_35317_20120208_173659_outLine +BABEL_BP_104_35764_20120518_193509_inLine +BABEL_BP_104_36227_20120211_165128_inLine +BABEL_BP_104_36227_20120211_165128_outLine +BABEL_BP_104_36227_20120211_181406_inLine +BABEL_BP_104_36227_20120211_181406_outLine +BABEL_BP_104_38954_20120316_173708_inLine +BABEL_BP_104_38954_20120316_173708_outLine +BABEL_BP_104_39577_20120321_011346_inLine +BABEL_BP_104_39577_20120321_011346_outLine +BABEL_BP_104_39696_20120218_034224_inLine +BABEL_BP_104_39696_20120218_034224_outLine +BABEL_BP_104_40445_20120314_225446_inLine +BABEL_BP_104_40445_20120314_225446_outLine +BABEL_BP_104_41545_20120317_151247_inLine +BABEL_BP_104_41545_20120317_151247_outLine +BABEL_BP_104_42397_20120219_050708_inLine +BABEL_BP_104_42397_20120219_050708_outLine +BABEL_BP_104_42427_20120229_145052_inLine +BABEL_BP_104_42427_20120229_145052_outLine +BABEL_BP_104_42728_20120204_220817_inLine +BABEL_BP_104_42728_20120204_220817_outLine +BABEL_BP_104_42730_20120229_010941_inLine +BABEL_BP_104_42730_20120229_010941_outLine +BABEL_BP_104_44792_20120210_023955_inLine +BABEL_BP_104_46216_20120207_194728_inLine +BABEL_BP_104_46216_20120207_194728_outLine +BABEL_BP_104_46862_20120316_155735_inLine +BABEL_BP_104_46862_20120316_155735_outLine +BABEL_BP_104_47771_20120519_163449_inLine +BABEL_BP_104_48518_20120219_154144_inLine +BABEL_BP_104_48518_20120219_154144_outLine +BABEL_BP_104_49662_20120518_205502_inLine +BABEL_BP_104_52533_20120310_204257_inLine +BABEL_BP_104_52533_20120310_204257_outLine +BABEL_BP_104_54646_20120119_215025_inLine +BABEL_BP_104_54646_20120119_215025_outLine +BABEL_BP_104_55043_20120316_021531_inLine +BABEL_BP_104_55043_20120316_021531_outLine +BABEL_BP_104_56605_20120220_012855_inLine +BABEL_BP_104_56605_20120220_012855_outLine +BABEL_BP_104_58283_20111227_182227_inLine +BABEL_BP_104_58283_20111227_182227_outLine +BABEL_BP_104_59121_20120120_170101_inLine +BABEL_BP_104_59121_20120120_170101_outLine +BABEL_BP_104_60055_20120120_151813_inLine +BABEL_BP_104_60055_20120120_151813_outLine +BABEL_BP_104_60523_20120303_012610_inLine +BABEL_BP_104_60523_20120303_012610_outLine +BABEL_BP_104_61400_20120518_194526_inLine +BABEL_BP_104_61755_20120518_180255_inLine +BABEL_BP_104_61786_20120216_204511_inLine +BABEL_BP_104_61786_20120216_204511_outLine +BABEL_BP_104_64198_20120219_231453_inLine +BABEL_BP_104_64198_20120219_231453_outLine +BABEL_BP_104_65668_20120203_175644_inLine +BABEL_BP_104_65668_20120203_175644_outLine +BABEL_BP_104_66153_20120212_161723_inLine +BABEL_BP_104_66153_20120212_161724_outLine +BABEL_BP_104_66842_20120126_174251_inLine +BABEL_BP_104_66842_20120126_174251_outLine +BABEL_BP_104_66847_20120308_230422_inLine +BABEL_BP_104_66847_20120308_230422_outLine +BABEL_BP_104_68538_20120314_231228_inLine +BABEL_BP_104_68538_20120314_231228_outLine +BABEL_BP_104_69336_20120201_211015_inLine +BABEL_BP_104_69336_20120201_211015_outLine +BABEL_BP_104_69336_20120201_213613_inLine +BABEL_BP_104_69336_20120201_213613_outLine +BABEL_BP_104_69728_20120129_180746_inLine +BABEL_BP_104_69728_20120129_180746_outLine +BABEL_BP_104_71284_20111228_210355_inLine +BABEL_BP_104_71284_20111228_210355_outLine +BABEL_BP_104_71284_20111228_215349_inLine +BABEL_BP_104_71284_20111228_215349_outLine +BABEL_BP_104_71925_20120309_151315_inLine +BABEL_BP_104_71925_20120309_151315_outLine +BABEL_BP_104_75869_20111220_204852_inLine +BABEL_BP_104_75869_20111220_204852_outLine +BABEL_BP_104_77082_20120109_183551_inLine +BABEL_BP_104_77082_20120109_183551_outLine +BABEL_BP_104_77290_20120403_023516_inLine +BABEL_BP_104_77290_20120403_023516_outLine +BABEL_BP_104_77621_20120517_225556_inLine +BABEL_BP_104_77737_20120320_204452_inLine +BABEL_BP_104_77737_20120320_204452_outLine +BABEL_BP_104_78298_20120308_204105_inLine +BABEL_BP_104_78298_20120308_204105_outLine +BABEL_BP_104_80644_20120222_222458_inLine +BABEL_BP_104_80644_20120222_222458_outLine +BABEL_BP_104_83327_20120217_233846_inLine +BABEL_BP_104_83327_20120217_233846_outLine +BABEL_BP_104_83782_20120519_153147_inLine +BABEL_BP_104_84398_20120219_052212_inLine +BABEL_BP_104_84398_20120219_052212_outLine +BABEL_BP_104_85897_20120221_033320_inLine +BABEL_BP_104_85897_20120221_033320_outLine +BABEL_BP_104_86231_20120224_065736_inLine +BABEL_BP_104_86231_20120224_065736_outLine +BABEL_BP_104_86793_20120309_185403_inLine +BABEL_BP_104_86793_20120309_185403_outLine +BABEL_BP_104_86873_20120519_160538_inLine +BABEL_BP_104_87124_20120315_000929_inLine +BABEL_BP_104_87124_20120315_000929_outLine +BABEL_BP_104_87734_20120117_154033_inLine +BABEL_BP_104_87734_20120117_154033_outLine +BABEL_BP_104_89463_20111225_195251_inLine +BABEL_BP_104_89463_20111225_195251_outLine +BABEL_BP_104_89702_20120318_005220_inLine +BABEL_BP_104_89702_20120318_005220_outLine +BABEL_BP_104_89851_20120322_183302_inLine +BABEL_BP_104_89851_20120322_183302_outLine +BABEL_BP_104_89851_20120322_194407_inLine +BABEL_BP_104_89851_20120322_194407_outLine +BABEL_BP_104_90758_20120315_015433_inLine +BABEL_BP_104_90758_20120315_015433_outLine +BABEL_BP_104_91105_20120501_195037_inLine +BABEL_BP_104_92247_20120220_023207_inLine +BABEL_BP_104_92247_20120220_023207_outLine +BABEL_BP_104_92721_20120401_235515_inLine +BABEL_BP_104_92721_20120401_235515_outLine +BABEL_BP_104_92721_20120402_000651_inLine +BABEL_BP_104_92721_20120402_000651_outLine +BABEL_BP_104_93180_20111223_033642_inLine +BABEL_BP_104_93180_20111223_033642_outLine +BABEL_BP_104_93742_20120308_233140_inLine +BABEL_BP_104_93742_20120308_233140_outLine +BABEL_BP_104_93748_20120316_223342_inLine +BABEL_BP_104_93748_20120316_223342_outLine +BABEL_BP_104_94934_20120525_175309_inLine +BABEL_BP_104_96186_20120320_210010_inLine +BABEL_BP_104_96186_20120320_210010_outLine +BABEL_BP_104_96868_20120326_145653_inLine +BABEL_BP_104_96868_20120326_145653_outLine +BABEL_BP_104_97574_20120228_161829_inLine +BABEL_BP_104_98271_20120110_010959_inLine +BABEL_BP_104_98271_20120110_010959_outLine +BABEL_BP_104_98420_20120507_174842_inLine +BABEL_BP_104_99428_20120211_174655_inLine +BABEL_BP_104_99428_20120211_174655_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/evalpart1.list b/egs/babel/s5d/conf/lists/104-pashto/evalpart1.list new file mode 100644 index 00000000000..2cf59b81f00 --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/evalpart1.list @@ -0,0 +1,70 @@ +BABEL_BP_104_11894_20120120_154648_inLine +BABEL_BP_104_11894_20120120_154648_outLine +BABEL_BP_104_12722_20120409_210032_inLine +BABEL_BP_104_12722_20120409_210033_outLine +BABEL_BP_104_16352_20120206_004350_inLine +BABEL_BP_104_16352_20120206_004350_outLine +BABEL_BP_104_20157_20120122_171556_inLine +BABEL_BP_104_20157_20120122_171556_outLine +BABEL_BP_104_21180_20120216_024537_inLine +BABEL_BP_104_21180_20120216_024537_outLine +BABEL_BP_104_24378_20120322_201121_inLine +BABEL_BP_104_24378_20120322_201121_outLine +BABEL_BP_104_27891_20120331_012612_inLine +BABEL_BP_104_27891_20120331_012612_outLine +BABEL_BP_104_28573_20120322_135901_inLine +BABEL_BP_104_28573_20120322_135901_outLine +BABEL_BP_104_32669_20120223_164026_inLine +BABEL_BP_104_32669_20120223_164026_outLine +BABEL_BP_104_34137_20120219_183642_inLine +BABEL_BP_104_34137_20120219_183642_outLine +BABEL_BP_104_35317_20120208_173659_inLine +BABEL_BP_104_35317_20120208_173659_outLine +BABEL_BP_104_36227_20120211_165128_inLine +BABEL_BP_104_36227_20120211_165128_outLine +BABEL_BP_104_36227_20120211_181406_inLine +BABEL_BP_104_36227_20120211_181406_outLine +BABEL_BP_104_39577_20120321_011346_inLine +BABEL_BP_104_39577_20120321_011346_outLine +BABEL_BP_104_39696_20120218_034224_inLine +BABEL_BP_104_39696_20120218_034224_outLine +BABEL_BP_104_42427_20120229_145052_inLine +BABEL_BP_104_42427_20120229_145052_outLine +BABEL_BP_104_48518_20120219_154144_inLine +BABEL_BP_104_48518_20120219_154144_outLine +BABEL_BP_104_52533_20120310_204257_inLine +BABEL_BP_104_52533_20120310_204257_outLine +BABEL_BP_104_54646_20120119_215025_inLine +BABEL_BP_104_54646_20120119_215025_outLine +BABEL_BP_104_66153_20120212_161723_inLine +BABEL_BP_104_66153_20120212_161724_outLine +BABEL_BP_104_69336_20120201_211015_inLine +BABEL_BP_104_69336_20120201_211015_outLine +BABEL_BP_104_69336_20120201_213613_inLine +BABEL_BP_104_69336_20120201_213613_outLine +BABEL_BP_104_75869_20111220_204852_inLine +BABEL_BP_104_75869_20111220_204852_outLine +BABEL_BP_104_77082_20120109_183551_inLine +BABEL_BP_104_77082_20120109_183551_outLine +BABEL_BP_104_78298_20120308_204105_inLine +BABEL_BP_104_78298_20120308_204105_outLine +BABEL_BP_104_85897_20120221_033320_inLine +BABEL_BP_104_85897_20120221_033320_outLine +BABEL_BP_104_86793_20120309_185403_inLine +BABEL_BP_104_86793_20120309_185403_outLine +BABEL_BP_104_87124_20120315_000929_inLine +BABEL_BP_104_87124_20120315_000929_outLine +BABEL_BP_104_89851_20120322_183302_inLine +BABEL_BP_104_89851_20120322_183302_outLine +BABEL_BP_104_89851_20120322_194407_inLine +BABEL_BP_104_89851_20120322_194407_outLine +BABEL_BP_104_92721_20120401_235515_inLine +BABEL_BP_104_92721_20120401_235515_outLine +BABEL_BP_104_92721_20120402_000651_inLine +BABEL_BP_104_92721_20120402_000651_outLine +BABEL_BP_104_93748_20120316_223342_inLine +BABEL_BP_104_93748_20120316_223342_outLine +BABEL_BP_104_96868_20120326_145653_inLine +BABEL_BP_104_96868_20120326_145653_outLine +BABEL_BP_104_99428_20120211_174655_inLine +BABEL_BP_104_99428_20120211_174655_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/train.40HrFLP.list b/egs/babel/s5d/conf/lists/104-pashto/train.40HrFLP.list new file mode 100644 index 00000000000..9aefcaef2bb --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/train.40HrFLP.list @@ -0,0 +1,512 @@ +BABEL_BP_104_03770_20120109_014606_inLine +BABEL_BP_104_03770_20120109_014606_outLine +BABEL_BP_104_08036_20111220_013826_inLine +BABEL_BP_104_08139_20120126_021604_inLine +BABEL_BP_104_10193_20120213_031930_inLine +BABEL_BP_104_10193_20120213_031930_outLine +BABEL_BP_104_10289_20120128_035330_inLine +BABEL_BP_104_10289_20120128_035330_outLine +BABEL_BP_104_10642_20120321_210945_outLine +BABEL_BP_104_10911_20111222_025120_inLine +BABEL_BP_104_10911_20111222_025120_outLine +BABEL_BP_104_11146_20120224_000248_inLine +BABEL_BP_104_11146_20120224_000248_outLine +BABEL_BP_104_11153_20120108_191820_inLine +BABEL_BP_104_11153_20120108_191820_outLine +BABEL_BP_104_11202_20120213_235334_inLine +BABEL_BP_104_11202_20120213_235334_outLine +BABEL_BP_104_11442_20120218_234445_inLine +BABEL_BP_104_11442_20120218_234445_outLine +BABEL_BP_104_11647_20120315_022645_inLine +BABEL_BP_104_11647_20120315_022645_outLine +BABEL_BP_104_12562_20120307_152654_inLine +BABEL_BP_104_12682_20120223_031401_inLine +BABEL_BP_104_13064_20120220_040256_inLine +BABEL_BP_104_13064_20120220_040256_outLine +BABEL_BP_104_13189_20120112_020041_inLine +BABEL_BP_104_13189_20120112_020041_outLine +BABEL_BP_104_13456_20120111_024843_outLine +BABEL_BP_104_13694_20120321_001123_outLine +BABEL_BP_104_13798_20120105_221125_inLine +BABEL_BP_104_13798_20120105_221125_outLine +BABEL_BP_104_13952_20120126_185217_inLine +BABEL_BP_104_13952_20120126_185217_outLine +BABEL_BP_104_14147_20120320_003436_inLine +BABEL_BP_104_14147_20120320_003436_outLine +BABEL_BP_104_14225_20120331_015956_inLine +BABEL_BP_104_14225_20120331_020908_inLine +BABEL_BP_104_14527_20120207_235446_inLine +BABEL_BP_104_14527_20120207_235446_outLine +BABEL_BP_104_14927_20111224_041309_inLine +BABEL_BP_104_14927_20111224_041309_outLine +BABEL_BP_104_15324_20120126_222036_inLine +BABEL_BP_104_15324_20120126_222036_outLine +BABEL_BP_104_15324_20120127_023323_inLine +BABEL_BP_104_15324_20120127_023323_outLine +BABEL_BP_104_15377_20120322_045329_inLine +BABEL_BP_104_15415_20120219_181352_inLine +BABEL_BP_104_15861_20120401_024411_inLine +BABEL_BP_104_15949_20120229_140434_inLine +BABEL_BP_104_15949_20120229_140434_outLine +BABEL_BP_104_16074_20120129_041107_inLine +BABEL_BP_104_16074_20120129_041107_outLine +BABEL_BP_104_16290_20120220_200234_inLine +BABEL_BP_104_16290_20120220_200234_outLine +BABEL_BP_104_16339_20120131_184255_inLine +BABEL_BP_104_16350_20120315_043233_outLine +BABEL_BP_104_16416_20120317_205531_inLine +BABEL_BP_104_16416_20120317_205531_outLine +BABEL_BP_104_16416_20120317_211129_inLine +BABEL_BP_104_16416_20120317_211129_outLine +BABEL_BP_104_16633_20120311_053635_inLine +BABEL_BP_104_16633_20120311_053635_outLine +BABEL_BP_104_17081_20120128_030343_inLine +BABEL_BP_104_17081_20120128_030343_outLine +BABEL_BP_104_17180_20120321_215255_inLine +BABEL_BP_104_17180_20120321_215255_outLine +BABEL_BP_104_17216_20120128_015245_inLine +BABEL_BP_104_17216_20120128_020324_inLine +BABEL_BP_104_17319_20111225_210159_inLine +BABEL_BP_104_17319_20111225_210159_outLine +BABEL_BP_104_17429_20120209_024521_inLine +BABEL_BP_104_17429_20120209_024521_outLine +BABEL_BP_104_17904_20120320_014817_inLine +BABEL_BP_104_17904_20120320_014817_outLine +BABEL_BP_104_18084_20111230_210850_outLine +BABEL_BP_104_18537_20120130_181101_inLine +BABEL_BP_104_18537_20120130_181101_outLine +BABEL_BP_104_18667_20120208_175014_inLine +BABEL_BP_104_18667_20120208_175014_outLine +BABEL_BP_104_19044_20120218_182247_outLine +BABEL_BP_104_19044_20120218_183017_outLine +BABEL_BP_104_19044_20120218_183849_outLine +BABEL_BP_104_19324_20120310_192849_inLine +BABEL_BP_104_19324_20120310_192849_outLine +BABEL_BP_104_19481_20120207_235626_inLine +BABEL_BP_104_19481_20120207_235626_outLine +BABEL_BP_104_20016_20120206_215156_inLine +BABEL_BP_104_20016_20120206_215156_outLine +BABEL_BP_104_21256_20120217_202248_inLine +BABEL_BP_104_21256_20120217_202248_outLine +BABEL_BP_104_21928_20120204_212612_inLine +BABEL_BP_104_21928_20120204_212612_outLine +BABEL_BP_104_21968_20120131_180237_inLine +BABEL_BP_104_21968_20120131_180237_outLine +BABEL_BP_104_22548_20120125_211519_inLine +BABEL_BP_104_22590_20120209_224232_inLine +BABEL_BP_104_22590_20120209_224232_outLine +BABEL_BP_104_23381_20120216_161115_outLine +BABEL_BP_104_24235_20120209_030431_outLine +BABEL_BP_104_24585_20120117_225722_inLine +BABEL_BP_104_24585_20120117_225722_outLine +BABEL_BP_104_24735_20120316_221529_inLine +BABEL_BP_104_24735_20120316_221529_outLine +BABEL_BP_104_24750_20120130_183131_inLine +BABEL_BP_104_24750_20120130_183131_outLine +BABEL_BP_104_24810_20120319_165838_outLine +BABEL_BP_104_25015_20120216_005135_inLine +BABEL_BP_104_25015_20120216_005135_outLine +BABEL_BP_104_25525_20120316_140847_outLine +BABEL_BP_104_25911_20111222_051549_inLine +BABEL_BP_104_25911_20111222_051549_outLine +BABEL_BP_104_26946_20120130_034221_outLine +BABEL_BP_104_27298_20111225_192028_inLine +BABEL_BP_104_27298_20111225_192028_outLine +BABEL_BP_104_28289_20120310_202856_inLine +BABEL_BP_104_28289_20120310_202856_outLine +BABEL_BP_104_28330_20120306_194033_inLine +BABEL_BP_104_28330_20120306_195756_inLine +BABEL_BP_104_28734_20120126_205422_inLine +BABEL_BP_104_28734_20120126_212950_inLine +BABEL_BP_104_29009_20120319_164025_outLine +BABEL_BP_104_29967_20120208_201355_inLine +BABEL_BP_104_29967_20120208_201355_outLine +BABEL_BP_104_30143_20111227_132440_inLine +BABEL_BP_104_30271_20120205_163755_inLine +BABEL_BP_104_30271_20120205_165111_inLine +BABEL_BP_104_30628_20120219_182744_inLine +BABEL_BP_104_30628_20120219_182744_outLine +BABEL_BP_104_30848_20120204_154057_inLine +BABEL_BP_104_30848_20120204_154058_outLine +BABEL_BP_104_31663_20120210_140419_inLine +BABEL_BP_104_31663_20120210_140419_outLine +BABEL_BP_104_31919_20120405_023221_inLine +BABEL_BP_104_31926_20120319_040036_outLine +BABEL_BP_104_32956_20120221_133851_inLine +BABEL_BP_104_32956_20120221_133851_outLine +BABEL_BP_104_33337_20120220_005047_inLine +BABEL_BP_104_33337_20120220_005047_outLine +BABEL_BP_104_33846_20120123_194027_inLine +BABEL_BP_104_34164_20120221_141502_inLine +BABEL_BP_104_34164_20120221_141502_outLine +BABEL_BP_104_34188_20120219_000455_inLine +BABEL_BP_104_34188_20120219_000455_outLine +BABEL_BP_104_34335_20111225_224055_outLine +BABEL_BP_104_34833_20120215_025837_inLine +BABEL_BP_104_34833_20120215_025837_outLine +BABEL_BP_104_34994_20120314_001810_outLine +BABEL_BP_104_34994_20120314_003701_outLine +BABEL_BP_104_35073_20120208_223917_outLine +BABEL_BP_104_35444_20120310_190608_inLine +BABEL_BP_104_35444_20120310_190608_outLine +BABEL_BP_104_35646_20120202_222418_inLine +BABEL_BP_104_35646_20120202_222418_outLine +BABEL_BP_104_35874_20120403_213324_inLine +BABEL_BP_104_35916_20120204_030147_inLine +BABEL_BP_104_35916_20120204_030147_outLine +BABEL_BP_104_35923_20120216_021137_inLine +BABEL_BP_104_35923_20120216_021137_outLine +BABEL_BP_104_36138_20120206_210519_inLine +BABEL_BP_104_36138_20120206_210519_outLine +BABEL_BP_104_36413_20120310_185758_inLine +BABEL_BP_104_36413_20120310_185758_outLine +BABEL_BP_104_36487_20120209_211827_inLine +BABEL_BP_104_36487_20120209_211827_outLine +BABEL_BP_104_37131_20120318_210220_inLine +BABEL_BP_104_37131_20120318_210220_outLine +BABEL_BP_104_37135_20120219_044437_inLine +BABEL_BP_104_37135_20120219_044437_outLine +BABEL_BP_104_37593_20120130_203434_inLine +BABEL_BP_104_37593_20120130_203434_outLine +BABEL_BP_104_38479_20120213_011154_inLine +BABEL_BP_104_38479_20120213_011154_outLine +BABEL_BP_104_38563_20120127_181357_outLine +BABEL_BP_104_39178_20120109_195710_inLine +BABEL_BP_104_39320_20120110_190913_inLine +BABEL_BP_104_39320_20120110_190913_outLine +BABEL_BP_104_39390_20120322_042714_outLine +BABEL_BP_104_39525_20120217_200400_inLine +BABEL_BP_104_39525_20120217_200400_outLine +BABEL_BP_104_39999_20120326_194721_inLine +BABEL_BP_104_39999_20120326_194721_outLine +BABEL_BP_104_40136_20120222_030818_inLine +BABEL_BP_104_40136_20120222_030823_outLine +BABEL_BP_104_40607_20120324_163524_inLine +BABEL_BP_104_40612_20120106_024347_inLine +BABEL_BP_104_40612_20120106_024347_outLine +BABEL_BP_104_40640_20120131_044455_outLine +BABEL_BP_104_41306_20120223_191213_inLine +BABEL_BP_104_41306_20120223_191213_outLine +BABEL_BP_104_41531_20120331_010320_inLine +BABEL_BP_104_41531_20120331_010320_outLine +BABEL_BP_104_42145_20120127_042217_inLine +BABEL_BP_104_42145_20120127_042217_outLine +BABEL_BP_104_42571_20120229_014427_inLine +BABEL_BP_104_42571_20120229_014427_outLine +BABEL_BP_104_42571_20120229_020000_inLine +BABEL_BP_104_42571_20120229_020000_outLine +BABEL_BP_104_42929_20120307_150902_inLine +BABEL_BP_104_42929_20120307_150902_outLine +BABEL_BP_104_43322_20120126_040725_inLine +BABEL_BP_104_43462_20120216_210005_inLine +BABEL_BP_104_43462_20120216_210005_outLine +BABEL_BP_104_43480_20120326_155717_inLine +BABEL_BP_104_43501_20120331_220724_outLine +BABEL_BP_104_43501_20120331_222326_outLine +BABEL_BP_104_43684_20120128_182736_outLine +BABEL_BP_104_43724_20120219_213737_inLine +BABEL_BP_104_43724_20120219_213737_outLine +BABEL_BP_104_43725_20120205_002936_inLine +BABEL_BP_104_43725_20120205_002936_outLine +BABEL_BP_104_43833_20120331_193735_outLine +BABEL_BP_104_44468_20120222_125222_inLine +BABEL_BP_104_44468_20120222_125222_outLine +BABEL_BP_104_44515_20120326_144709_inLine +BABEL_BP_104_44515_20120326_150551_inLine +BABEL_BP_104_44799_20120119_040419_inLine +BABEL_BP_104_44799_20120119_040419_outLine +BABEL_BP_104_45356_20120324_234702_outLine +BABEL_BP_104_45403_20111222_014909_outLine +BABEL_BP_104_45562_20120131_200753_inLine +BABEL_BP_104_45926_20120127_162212_inLine +BABEL_BP_104_45926_20120127_162212_outLine +BABEL_BP_104_45947_20120313_214251_inLine +BABEL_BP_104_46168_20120217_200729_inLine +BABEL_BP_104_46168_20120217_200729_outLine +BABEL_BP_104_46734_20120219_025954_outLine +BABEL_BP_104_46979_20120223_173811_inLine +BABEL_BP_104_46979_20120223_173811_outLine +BABEL_BP_104_47015_20120222_053105_inLine +BABEL_BP_104_47015_20120222_053105_outLine +BABEL_BP_104_47917_20120319_003035_inLine +BABEL_BP_104_47917_20120319_003035_outLine +BABEL_BP_104_48000_20120323_171146_inLine +BABEL_BP_104_48000_20120323_171146_outLine +BABEL_BP_104_48001_20120204_231603_inLine +BABEL_BP_104_48001_20120204_231603_outLine +BABEL_BP_104_48259_20120217_200412_inLine +BABEL_BP_104_48259_20120217_200412_outLine +BABEL_BP_104_48944_20120218_011825_inLine +BABEL_BP_104_48944_20120218_011825_outLine +BABEL_BP_104_48946_20120320_192250_inLine +BABEL_BP_104_48946_20120320_192250_outLine +BABEL_BP_104_49141_20120330_015342_inLine +BABEL_BP_104_49629_20120312_155816_outLine +BABEL_BP_104_50407_20120318_232348_inLine +BABEL_BP_104_50407_20120318_232348_outLine +BABEL_BP_104_50682_20120116_205741_inLine +BABEL_BP_104_50682_20120116_205741_outLine +BABEL_BP_104_50820_20120213_140300_inLine +BABEL_BP_104_50820_20120213_140300_outLine +BABEL_BP_104_51024_20120131_172745_inLine +BABEL_BP_104_51047_20120319_042347_outLine +BABEL_BP_104_51329_20120222_203129_inLine +BABEL_BP_104_51329_20120222_203129_outLine +BABEL_BP_104_51329_20120222_205332_inLine +BABEL_BP_104_51329_20120222_205332_outLine +BABEL_BP_104_51519_20120220_052247_inLine +BABEL_BP_104_51519_20120220_052247_outLine +BABEL_BP_104_51570_20120118_225333_inLine +BABEL_BP_104_51570_20120118_225333_outLine +BABEL_BP_104_51716_20120221_005215_inLine +BABEL_BP_104_51716_20120221_005215_outLine +BABEL_BP_104_52300_20120203_210256_inLine +BABEL_BP_104_52300_20120203_210256_outLine +BABEL_BP_104_52753_20120209_225916_inLine +BABEL_BP_104_52753_20120209_225916_outLine +BABEL_BP_104_52753_20120213_014050_inLine +BABEL_BP_104_52753_20120213_014050_outLine +BABEL_BP_104_52954_20120313_170902_inLine +BABEL_BP_104_52954_20120313_170902_outLine +BABEL_BP_104_53159_20120402_035901_inLine +BABEL_BP_104_53159_20120402_035901_outLine +BABEL_BP_104_53334_20120309_184805_inLine +BABEL_BP_104_53334_20120309_184805_outLine +BABEL_BP_104_53659_20120218_205643_inLine +BABEL_BP_104_53659_20120218_205643_outLine +BABEL_BP_104_53718_20120202_220720_outLine +BABEL_BP_104_54909_20120130_194003_inLine +BABEL_BP_104_54909_20120130_194003_outLine +BABEL_BP_104_55213_20120331_185824_outLine +BABEL_BP_104_55668_20120212_011829_inLine +BABEL_BP_104_55668_20120212_011829_outLine +BABEL_BP_104_56201_20120126_180227_outLine +BABEL_BP_104_56308_20120402_024809_outLine +BABEL_BP_104_56704_20120120_155806_inLine +BABEL_BP_104_56704_20120120_155806_outLine +BABEL_BP_104_56753_20120322_204356_outLine +BABEL_BP_104_56805_20120320_045112_inLine +BABEL_BP_104_56805_20120320_045112_outLine +BABEL_BP_104_57005_20120321_034143_inLine +BABEL_BP_104_57082_20120110_024829_inLine +BABEL_BP_104_57116_20120110_180036_inLine +BABEL_BP_104_57167_20111230_213737_outLine +BABEL_BP_104_57210_20120321_020212_inLine +BABEL_BP_104_57210_20120321_020212_outLine +BABEL_BP_104_57263_20120302_211404_inLine +BABEL_BP_104_57320_20120204_230109_inLine +BABEL_BP_104_57320_20120204_230109_outLine +BABEL_BP_104_57531_20120203_165801_inLine +BABEL_BP_104_57531_20120203_165801_outLine +BABEL_BP_104_57672_20120204_030206_outLine +BABEL_BP_104_58149_20120218_161613_outLine +BABEL_BP_104_58298_20120208_214852_inLine +BABEL_BP_104_58298_20120208_214852_outLine +BABEL_BP_104_58939_20120212_184855_inLine +BABEL_BP_104_58939_20120212_184855_outLine +BABEL_BP_104_58963_20120331_015840_inLine +BABEL_BP_104_58963_20120331_015840_outLine +BABEL_BP_104_59219_20120131_225115_outLine +BABEL_BP_104_59399_20120318_144751_inLine +BABEL_BP_104_59399_20120318_144752_outLine +BABEL_BP_104_59482_20120309_190927_inLine +BABEL_BP_104_59482_20120309_190927_outLine +BABEL_BP_104_59681_20120123_213306_inLine +BABEL_BP_104_59681_20120123_213306_outLine +BABEL_BP_104_60462_20120201_181707_inLine +BABEL_BP_104_60462_20120201_181707_outLine +BABEL_BP_104_60806_20120213_161652_outLine +BABEL_BP_104_61029_20120201_224200_outLine +BABEL_BP_104_61523_20120212_035522_inLine +BABEL_BP_104_61655_20120208_203143_inLine +BABEL_BP_104_61655_20120208_203143_outLine +BABEL_BP_104_61733_20120205_220251_outLine +BABEL_BP_104_61735_20120314_012744_inLine +BABEL_BP_104_61909_20120320_190739_inLine +BABEL_BP_104_61909_20120320_190739_outLine +BABEL_BP_104_62815_20120318_025812_outLine +BABEL_BP_104_62816_20120312_153937_outLine +BABEL_BP_104_63111_20120204_232445_outLine +BABEL_BP_104_63215_20120213_040737_inLine +BABEL_BP_104_63215_20120213_040737_outLine +BABEL_BP_104_63220_20120131_155658_inLine +BABEL_BP_104_63220_20120131_155658_outLine +BABEL_BP_104_63390_20120123_212718_outLine +BABEL_BP_104_63397_20120217_194928_inLine +BABEL_BP_104_63397_20120217_194928_outLine +BABEL_BP_104_63784_20120216_015608_inLine +BABEL_BP_104_63784_20120216_015608_outLine +BABEL_BP_104_63934_20120318_201706_inLine +BABEL_BP_104_63934_20120318_201706_outLine +BABEL_BP_104_64990_20120119_173958_inLine +BABEL_BP_104_64990_20120119_173958_outLine +BABEL_BP_104_65341_20120220_222356_inLine +BABEL_BP_104_65341_20120220_222356_outLine +BABEL_BP_104_65590_20120109_001414_inLine +BABEL_BP_104_65590_20120109_001414_outLine +BABEL_BP_104_65954_20120128_163139_inLine +BABEL_BP_104_65954_20120128_163139_outLine +BABEL_BP_104_65974_20120316_195524_inLine +BABEL_BP_104_65974_20120316_195524_outLine +BABEL_BP_104_66784_20111225_190506_outLine +BABEL_BP_104_66879_20120213_004555_inLine +BABEL_BP_104_66879_20120213_004555_outLine +BABEL_BP_104_67106_20120208_201829_inLine +BABEL_BP_104_67106_20120208_201829_outLine +BABEL_BP_104_67423_20120205_220658_outLine +BABEL_BP_104_67685_20120217_235729_inLine +BABEL_BP_104_67685_20120217_235729_outLine +BABEL_BP_104_67718_20120131_164436_inLine +BABEL_BP_104_67718_20120131_164436_outLine +BABEL_BP_104_68077_20120219_155535_outLine +BABEL_BP_104_68111_20120321_185146_outLine +BABEL_BP_104_68144_20120210_223106_outLine +BABEL_BP_104_68189_20120128_005011_inLine +BABEL_BP_104_68189_20120128_005011_outLine +BABEL_BP_104_68209_20120219_045221_inLine +BABEL_BP_104_68997_20120126_010839_inLine +BABEL_BP_104_70333_20120210_033437_outLine +BABEL_BP_104_70528_20120128_013553_inLine +BABEL_BP_104_70528_20120128_013553_outLine +BABEL_BP_104_70762_20120213_175054_outLine +BABEL_BP_104_70897_20120315_000410_inLine +BABEL_BP_104_70897_20120315_000410_outLine +BABEL_BP_104_70897_20120315_013535_inLine +BABEL_BP_104_70897_20120315_013535_outLine +BABEL_BP_104_71948_20120210_012347_inLine +BABEL_BP_104_71970_20120310_195048_inLine +BABEL_BP_104_72874_20120213_191257_inLine +BABEL_BP_104_72874_20120213_191257_outLine +BABEL_BP_104_72910_20120310_185203_outLine +BABEL_BP_104_73450_20120206_024342_inLine +BABEL_BP_104_73450_20120206_024342_outLine +BABEL_BP_104_73925_20120123_233630_inLine +BABEL_BP_104_73925_20120123_233630_outLine +BABEL_BP_104_74261_20120331_191708_outLine +BABEL_BP_104_74334_20111230_035012_inLine +BABEL_BP_104_74940_20120228_225523_inLine +BABEL_BP_104_74940_20120228_225523_outLine +BABEL_BP_104_75390_20120218_133736_inLine +BABEL_BP_104_75390_20120218_133736_outLine +BABEL_BP_104_75402_20120319_160944_inLine +BABEL_BP_104_76714_20120313_220017_inLine +BABEL_BP_104_76714_20120313_220017_outLine +BABEL_BP_104_76738_20120210_010510_inLine +BABEL_BP_104_77097_20120214_235954_inLine +BABEL_BP_104_77097_20120214_235954_outLine +BABEL_BP_104_77256_20120309_064948_inLine +BABEL_BP_104_77537_20120206_034628_outLine +BABEL_BP_104_77711_20120229_163050_inLine +BABEL_BP_104_77711_20120229_163050_outLine +BABEL_BP_104_77711_20120229_164115_inLine +BABEL_BP_104_77711_20120229_164115_outLine +BABEL_BP_104_78225_20120126_170942_outLine +BABEL_BP_104_78443_20120128_211331_inLine +BABEL_BP_104_78443_20120128_211331_outLine +BABEL_BP_104_79120_20120127_021912_inLine +BABEL_BP_104_79120_20120127_021912_outLine +BABEL_BP_104_79120_20120127_030132_inLine +BABEL_BP_104_79120_20120127_030132_outLine +BABEL_BP_104_79156_20120126_191440_outLine +BABEL_BP_104_79753_20120203_173233_inLine +BABEL_BP_104_79753_20120203_173233_outLine +BABEL_BP_104_80134_20120313_215613_inLine +BABEL_BP_104_80134_20120313_215613_outLine +BABEL_BP_104_80284_20120109_235306_inLine +BABEL_BP_104_80284_20120109_235306_outLine +BABEL_BP_104_80559_20120319_152020_outLine +BABEL_BP_104_80616_20120223_193040_inLine +BABEL_BP_104_80616_20120223_193040_outLine +BABEL_BP_104_80867_20120309_034536_inLine +BABEL_BP_104_80867_20120309_034536_outLine +BABEL_BP_104_80929_20120310_194854_inLine +BABEL_BP_104_80929_20120310_194854_outLine +BABEL_BP_104_81726_20120229_154500_inLine +BABEL_BP_104_81726_20120229_154500_outLine +BABEL_BP_104_81996_20120128_185859_outLine +BABEL_BP_104_82499_20120215_024134_inLine +BABEL_BP_104_82499_20120215_024134_outLine +BABEL_BP_104_82595_20120324_154901_outLine +BABEL_BP_104_82964_20120218_181351_outLine +BABEL_BP_104_83072_20120213_170201_inLine +BABEL_BP_104_83072_20120213_170201_outLine +BABEL_BP_104_83112_20120204_161112_inLine +BABEL_BP_104_83112_20120204_161112_outLine +BABEL_BP_104_83747_20120120_153904_outLine +BABEL_BP_104_83866_20120206_040504_inLine +BABEL_BP_104_83866_20120206_040505_outLine +BABEL_BP_104_84854_20120129_233819_inLine +BABEL_BP_104_84854_20120129_233819_outLine +BABEL_BP_104_84885_20120217_215436_inLine +BABEL_BP_104_84885_20120217_215436_outLine +BABEL_BP_104_84950_20120130_131546_inLine +BABEL_BP_104_84950_20120130_131546_outLine +BABEL_BP_104_85558_20120413_044033_inLine +BABEL_BP_104_86528_20120128_211228_inLine +BABEL_BP_104_86537_20120128_022125_inLine +BABEL_BP_104_86537_20120128_023523_inLine +BABEL_BP_104_87067_20120324_182930_inLine +BABEL_BP_104_87067_20120324_182930_outLine +BABEL_BP_104_87517_20120207_200619_inLine +BABEL_BP_104_87517_20120207_200619_outLine +BABEL_BP_104_88070_20120318_164350_outLine +BABEL_BP_104_88434_20120319_170128_inLine +BABEL_BP_104_88434_20120319_170128_outLine +BABEL_BP_104_88921_20120205_215225_inLine +BABEL_BP_104_88921_20120205_215225_outLine +BABEL_BP_104_89036_20120327_211455_inLine +BABEL_BP_104_89925_20120202_000208_inLine +BABEL_BP_104_89925_20120202_000208_outLine +BABEL_BP_104_89952_20120131_212850_inLine +BABEL_BP_104_89952_20120131_212850_outLine +BABEL_BP_104_90263_20120205_044035_inLine +BABEL_BP_104_90263_20120205_044035_outLine +BABEL_BP_104_90310_20120129_024342_outLine +BABEL_BP_104_91161_20120311_032449_inLine +BABEL_BP_104_91161_20120311_032449_outLine +BABEL_BP_104_92342_20120320_041334_inLine +BABEL_BP_104_92342_20120320_041334_outLine +BABEL_BP_104_92722_20120209_235113_outLine +BABEL_BP_104_92793_20120118_235358_inLine +BABEL_BP_104_93300_20120221_135558_inLine +BABEL_BP_104_93300_20120221_135558_outLine +BABEL_BP_104_93713_20120121_004435_inLine +BABEL_BP_104_93730_20120220_052912_outLine +BABEL_BP_104_93730_20120220_053327_outLine +BABEL_BP_104_93730_20120220_054726_outLine +BABEL_BP_104_93844_20120316_014157_inLine +BABEL_BP_104_93844_20120327_194612_inLine +BABEL_BP_104_94572_20120321_022026_inLine +BABEL_BP_104_94683_20120126_024342_inLine +BABEL_BP_104_94775_20120321_230436_inLine +BABEL_BP_104_94775_20120321_230436_outLine +BABEL_BP_104_94793_20120204_043218_inLine +BABEL_BP_104_94793_20120204_043218_outLine +BABEL_BP_104_95349_20111229_162101_inLine +BABEL_BP_104_95360_20120205_133312_inLine +BABEL_BP_104_95360_20120205_133312_outLine +BABEL_BP_104_95465_20120223_040653_inLine +BABEL_BP_104_95465_20120223_040653_outLine +BABEL_BP_104_95904_20120218_183758_inLine +BABEL_BP_104_95904_20120218_183758_outLine +BABEL_BP_104_96343_20120130_143444_outLine +BABEL_BP_104_96690_20120321_005155_inLine +BABEL_BP_104_96811_20120217_021933_inLine +BABEL_BP_104_96811_20120217_021933_outLine +BABEL_BP_104_96956_20120209_025537_inLine +BABEL_BP_104_96956_20120209_025537_outLine +BABEL_BP_104_97050_20120314_144713_outLine +BABEL_BP_104_97803_20120116_184019_inLine +BABEL_BP_104_97803_20120116_184019_outLine +BABEL_BP_104_97971_20120317_004835_inLine +BABEL_BP_104_97971_20120317_004835_outLine +BABEL_BP_104_98067_20120221_131601_inLine +BABEL_BP_104_98067_20120221_131601_outLine +BABEL_BP_104_98110_20120218_193615_outLine +BABEL_BP_104_98503_20120402_230340_inLine +BABEL_BP_104_98503_20120403_025554_inLine +BABEL_BP_104_98588_20120119_011655_inLine +BABEL_BP_104_98588_20120119_011655_outLine +BABEL_BP_104_98942_20120205_224026_outLine +BABEL_BP_104_99354_20120203_152733_inLine +BABEL_BP_104_99354_20120203_152733_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/train.LimitedLP.list b/egs/babel/s5d/conf/lists/104-pashto/train.LimitedLP.list new file mode 100644 index 00000000000..293419a111d --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/train.LimitedLP.list @@ -0,0 +1,131 @@ +BABEL_BP_104_08036_20111220_013826_inLine +BABEL_BP_104_08139_20120126_021604_inLine +BABEL_BP_104_11647_20120315_022645_inLine +BABEL_BP_104_11647_20120315_022645_outLine +BABEL_BP_104_13952_20120126_185217_inLine +BABEL_BP_104_13952_20120126_185217_outLine +BABEL_BP_104_14147_20120320_003436_inLine +BABEL_BP_104_14147_20120320_003436_outLine +BABEL_BP_104_14527_20120207_235446_inLine +BABEL_BP_104_14527_20120207_235446_outLine +BABEL_BP_104_15324_20120126_222036_inLine +BABEL_BP_104_15324_20120126_222036_outLine +BABEL_BP_104_15324_20120127_023323_inLine +BABEL_BP_104_15324_20120127_023323_outLine +BABEL_BP_104_15415_20120219_181352_inLine +BABEL_BP_104_15949_20120229_140434_inLine +BABEL_BP_104_15949_20120229_140434_outLine +BABEL_BP_104_16074_20120129_041107_inLine +BABEL_BP_104_16074_20120129_041107_outLine +BABEL_BP_104_16290_20120220_200234_inLine +BABEL_BP_104_16290_20120220_200234_outLine +BABEL_BP_104_16339_20120131_184255_inLine +BABEL_BP_104_17904_20120320_014817_inLine +BABEL_BP_104_17904_20120320_014817_outLine +BABEL_BP_104_18084_20111230_210850_outLine +BABEL_BP_104_19324_20120310_192849_inLine +BABEL_BP_104_19324_20120310_192849_outLine +BABEL_BP_104_21256_20120217_202248_inLine +BABEL_BP_104_21256_20120217_202248_outLine +BABEL_BP_104_23381_20120216_161115_outLine +BABEL_BP_104_24235_20120209_030431_outLine +BABEL_BP_104_24735_20120316_221529_inLine +BABEL_BP_104_24735_20120316_221529_outLine +BABEL_BP_104_26946_20120130_034221_outLine +BABEL_BP_104_28289_20120310_202856_inLine +BABEL_BP_104_28289_20120310_202856_outLine +BABEL_BP_104_28734_20120126_205422_inLine +BABEL_BP_104_28734_20120126_212950_inLine +BABEL_BP_104_30271_20120205_163755_inLine +BABEL_BP_104_30271_20120205_165111_inLine +BABEL_BP_104_30628_20120219_182744_inLine +BABEL_BP_104_30628_20120219_182744_outLine +BABEL_BP_104_34188_20120219_000455_inLine +BABEL_BP_104_34188_20120219_000455_outLine +BABEL_BP_104_35073_20120208_223917_outLine +BABEL_BP_104_35444_20120310_190608_inLine +BABEL_BP_104_35444_20120310_190608_outLine +BABEL_BP_104_36413_20120310_185758_inLine +BABEL_BP_104_36413_20120310_185758_outLine +BABEL_BP_104_38479_20120213_011154_inLine +BABEL_BP_104_38479_20120213_011154_outLine +BABEL_BP_104_39999_20120326_194721_inLine +BABEL_BP_104_39999_20120326_194721_outLine +BABEL_BP_104_41306_20120223_191213_inLine +BABEL_BP_104_41306_20120223_191213_outLine +BABEL_BP_104_42571_20120229_014427_inLine +BABEL_BP_104_42571_20120229_014427_outLine +BABEL_BP_104_42571_20120229_020000_inLine +BABEL_BP_104_42571_20120229_020000_outLine +BABEL_BP_104_43322_20120126_040725_inLine +BABEL_BP_104_43724_20120219_213737_inLine +BABEL_BP_104_43724_20120219_213737_outLine +BABEL_BP_104_45926_20120127_162212_inLine +BABEL_BP_104_45926_20120127_162212_outLine +BABEL_BP_104_46734_20120219_025954_outLine +BABEL_BP_104_48000_20120323_171146_inLine +BABEL_BP_104_48000_20120323_171146_outLine +BABEL_BP_104_48259_20120217_200412_inLine +BABEL_BP_104_48259_20120217_200412_outLine +BABEL_BP_104_48944_20120218_011825_inLine +BABEL_BP_104_48944_20120218_011825_outLine +BABEL_BP_104_48946_20120320_192250_inLine +BABEL_BP_104_48946_20120320_192250_outLine +BABEL_BP_104_50407_20120318_232348_inLine +BABEL_BP_104_50407_20120318_232348_outLine +BABEL_BP_104_51519_20120220_052247_inLine +BABEL_BP_104_51519_20120220_052247_outLine +BABEL_BP_104_51716_20120221_005215_inLine +BABEL_BP_104_51716_20120221_005215_outLine +BABEL_BP_104_52753_20120209_225916_inLine +BABEL_BP_104_52753_20120209_225916_outLine +BABEL_BP_104_52753_20120213_014050_inLine +BABEL_BP_104_52753_20120213_014050_outLine +BABEL_BP_104_56805_20120320_045112_inLine +BABEL_BP_104_56805_20120320_045112_outLine +BABEL_BP_104_57210_20120321_020212_inLine +BABEL_BP_104_57210_20120321_020212_outLine +BABEL_BP_104_57672_20120204_030206_outLine +BABEL_BP_104_59219_20120131_225115_outLine +BABEL_BP_104_60806_20120213_161652_outLine +BABEL_BP_104_63397_20120217_194928_inLine +BABEL_BP_104_63397_20120217_194928_outLine +BABEL_BP_104_63934_20120318_201706_inLine +BABEL_BP_104_63934_20120318_201706_outLine +BABEL_BP_104_65590_20120109_001414_inLine +BABEL_BP_104_65590_20120109_001414_outLine +BABEL_BP_104_66784_20111225_190506_outLine +BABEL_BP_104_67685_20120217_235729_inLine +BABEL_BP_104_67685_20120217_235729_outLine +BABEL_BP_104_68189_20120128_005011_inLine +BABEL_BP_104_68189_20120128_005011_outLine +BABEL_BP_104_68209_20120219_045221_inLine +BABEL_BP_104_68997_20120126_010839_inLine +BABEL_BP_104_70762_20120213_175054_outLine +BABEL_BP_104_70897_20120315_000410_inLine +BABEL_BP_104_70897_20120315_000410_outLine +BABEL_BP_104_70897_20120315_013535_inLine +BABEL_BP_104_70897_20120315_013535_outLine +BABEL_BP_104_71948_20120210_012347_inLine +BABEL_BP_104_73925_20120123_233630_inLine +BABEL_BP_104_73925_20120123_233630_outLine +BABEL_BP_104_76738_20120210_010510_inLine +BABEL_BP_104_77097_20120214_235954_inLine +BABEL_BP_104_77097_20120214_235954_outLine +BABEL_BP_104_80929_20120310_194854_inLine +BABEL_BP_104_80929_20120310_194854_outLine +BABEL_BP_104_81996_20120128_185859_outLine +BABEL_BP_104_87067_20120324_182930_inLine +BABEL_BP_104_87067_20120324_182930_outLine +BABEL_BP_104_92342_20120320_041334_inLine +BABEL_BP_104_92342_20120320_041334_outLine +BABEL_BP_104_92793_20120118_235358_inLine +BABEL_BP_104_94683_20120126_024342_inLine +BABEL_BP_104_94775_20120321_230436_inLine +BABEL_BP_104_94775_20120321_230436_outLine +BABEL_BP_104_95349_20111229_162101_inLine +BABEL_BP_104_95360_20120205_133312_inLine +BABEL_BP_104_95360_20120205_133312_outLine +BABEL_BP_104_95904_20120218_183758_inLine +BABEL_BP_104_95904_20120218_183758_outLine +BABEL_BP_104_96343_20120130_143444_outLine diff --git a/egs/babel/s5d/conf/lists/104-pashto/training.list b/egs/babel/s5d/conf/lists/104-pashto/training.list new file mode 100644 index 00000000000..deb9bc55dfe --- /dev/null +++ b/egs/babel/s5d/conf/lists/104-pashto/training.list @@ -0,0 +1,1026 @@ +BABEL_BP_104_01820_20120313_212614_inLine +BABEL_BP_104_01820_20120313_212614_outLine +BABEL_BP_104_02362_20120229_213454_inLine +BABEL_BP_104_03770_20120109_014606_inLine +BABEL_BP_104_03770_20120109_014606_outLine +BABEL_BP_104_04074_20120318_203458_outLine +BABEL_BP_104_05545_20120126_034408_inLine +BABEL_BP_104_05545_20120126_034408_outLine +BABEL_BP_104_08036_20111220_013826_inLine +BABEL_BP_104_08139_20120126_021604_inLine +BABEL_BP_104_10193_20120213_031930_inLine +BABEL_BP_104_10193_20120213_031930_outLine +BABEL_BP_104_10289_20120128_035330_inLine +BABEL_BP_104_10289_20120128_035330_outLine +BABEL_BP_104_10642_20120321_210945_outLine +BABEL_BP_104_10668_20120311_014815_inLine +BABEL_BP_104_10668_20120311_014815_outLine +BABEL_BP_104_10911_20111222_025120_inLine +BABEL_BP_104_10911_20111222_025120_outLine +BABEL_BP_104_11145_20120321_154029_inLine +BABEL_BP_104_11145_20120321_154029_outLine +BABEL_BP_104_11146_20120224_000248_inLine +BABEL_BP_104_11146_20120224_000248_outLine +BABEL_BP_104_11153_20120108_191820_inLine +BABEL_BP_104_11153_20120108_191820_outLine +BABEL_BP_104_11202_20120213_235334_inLine +BABEL_BP_104_11202_20120213_235334_outLine +BABEL_BP_104_11388_20120202_224148_inLine +BABEL_BP_104_11388_20120202_224148_outLine +BABEL_BP_104_11442_20120218_234445_inLine +BABEL_BP_104_11442_20120218_234445_outLine +BABEL_BP_104_11626_20120316_193802_inLine +BABEL_BP_104_11626_20120316_193802_outLine +BABEL_BP_104_11647_20120315_022645_inLine +BABEL_BP_104_11647_20120315_022645_outLine +BABEL_BP_104_12171_20120212_154823_inLine +BABEL_BP_104_12171_20120212_154823_outLine +BABEL_BP_104_12474_20120309_193318_inLine +BABEL_BP_104_12474_20120309_193318_outLine +BABEL_BP_104_12494_20120213_180757_inLine +BABEL_BP_104_12494_20120213_180757_outLine +BABEL_BP_104_12562_20120307_152654_inLine +BABEL_BP_104_12682_20120223_031401_inLine +BABEL_BP_104_12843_20120202_221656_inLine +BABEL_BP_104_12843_20120202_221656_outLine +BABEL_BP_104_12946_20120224_013645_inLine +BABEL_BP_104_12946_20120224_013645_outLine +BABEL_BP_104_13064_20120220_040256_inLine +BABEL_BP_104_13064_20120220_040256_outLine +BABEL_BP_104_13157_20120207_204725_inLine +BABEL_BP_104_13157_20120207_204725_outLine +BABEL_BP_104_13189_20120112_020041_inLine +BABEL_BP_104_13189_20120112_020041_outLine +BABEL_BP_104_13354_20120121_164912_inLine +BABEL_BP_104_13354_20120121_164912_outLine +BABEL_BP_104_13456_20120111_024843_outLine +BABEL_BP_104_13546_20120327_004548_outLine +BABEL_BP_104_13580_20120222_195120_inLine +BABEL_BP_104_13580_20120222_195120_outLine +BABEL_BP_104_13615_20120314_233732_inLine +BABEL_BP_104_13615_20120314_233732_outLine +BABEL_BP_104_13694_20120321_001123_outLine +BABEL_BP_104_13771_20120316_004856_inLine +BABEL_BP_104_13771_20120316_004856_outLine +BABEL_BP_104_13798_20120105_221125_inLine +BABEL_BP_104_13798_20120105_221125_outLine +BABEL_BP_104_13952_20120126_185217_inLine +BABEL_BP_104_13952_20120126_185217_outLine +BABEL_BP_104_14147_20120320_003436_inLine +BABEL_BP_104_14147_20120320_003436_outLine +BABEL_BP_104_14225_20120331_015956_inLine +BABEL_BP_104_14225_20120331_020908_inLine +BABEL_BP_104_14527_20120207_235446_inLine +BABEL_BP_104_14527_20120207_235446_outLine +BABEL_BP_104_14927_20111224_041309_inLine +BABEL_BP_104_14927_20111224_041309_outLine +BABEL_BP_104_14984_20120205_195333_inLine +BABEL_BP_104_14984_20120205_195333_outLine +BABEL_BP_104_15176_20120316_181716_outLine +BABEL_BP_104_15324_20120126_222036_inLine +BABEL_BP_104_15324_20120126_222036_outLine +BABEL_BP_104_15324_20120127_023323_inLine +BABEL_BP_104_15324_20120127_023323_outLine +BABEL_BP_104_15377_20120322_045329_inLine +BABEL_BP_104_15415_20120219_181352_inLine +BABEL_BP_104_15552_20120304_160459_inLine +BABEL_BP_104_15600_20111230_233908_inLine +BABEL_BP_104_15600_20111230_233908_outLine +BABEL_BP_104_15600_20111230_234837_inLine +BABEL_BP_104_15600_20111230_234837_outLine +BABEL_BP_104_15630_20120402_033748_inLine +BABEL_BP_104_15749_20120131_005221_inLine +BABEL_BP_104_15749_20120131_005221_outLine +BABEL_BP_104_15803_20120117_155821_inLine +BABEL_BP_104_15803_20120117_155821_outLine +BABEL_BP_104_15861_20120401_024411_inLine +BABEL_BP_104_15949_20120229_140434_inLine +BABEL_BP_104_15949_20120229_140434_outLine +BABEL_BP_104_16074_20120129_041107_inLine +BABEL_BP_104_16074_20120129_041107_outLine +BABEL_BP_104_16290_20120220_200234_inLine +BABEL_BP_104_16290_20120220_200234_outLine +BABEL_BP_104_16339_20120131_184255_inLine +BABEL_BP_104_16350_20120315_043233_outLine +BABEL_BP_104_16416_20120317_205531_inLine +BABEL_BP_104_16416_20120317_205531_outLine +BABEL_BP_104_16416_20120317_211129_inLine +BABEL_BP_104_16416_20120317_211129_outLine +BABEL_BP_104_16633_20120311_053635_inLine +BABEL_BP_104_16633_20120311_053635_outLine +BABEL_BP_104_17081_20120128_030343_inLine +BABEL_BP_104_17081_20120128_030343_outLine +BABEL_BP_104_17180_20120321_215255_inLine +BABEL_BP_104_17180_20120321_215255_outLine +BABEL_BP_104_17216_20120128_015245_inLine +BABEL_BP_104_17216_20120128_020324_inLine +BABEL_BP_104_17319_20111225_210159_inLine +BABEL_BP_104_17319_20111225_210159_outLine +BABEL_BP_104_17410_20120129_211432_inLine +BABEL_BP_104_17410_20120129_211432_outLine +BABEL_BP_104_17429_20120209_024521_inLine +BABEL_BP_104_17429_20120209_024521_outLine +BABEL_BP_104_17450_20120331_021646_inLine +BABEL_BP_104_17612_20120205_043931_outLine +BABEL_BP_104_17783_20120205_045923_inLine +BABEL_BP_104_17783_20120205_045923_outLine +BABEL_BP_104_17904_20120320_014817_inLine +BABEL_BP_104_17904_20120320_014817_outLine +BABEL_BP_104_17930_20120321_161410_outLine +BABEL_BP_104_18084_20111230_210850_outLine +BABEL_BP_104_18537_20120130_181101_inLine +BABEL_BP_104_18537_20120130_181101_outLine +BABEL_BP_104_18616_20120126_040622_inLine +BABEL_BP_104_18616_20120126_040622_outLine +BABEL_BP_104_18667_20120208_175014_inLine +BABEL_BP_104_18667_20120208_175014_outLine +BABEL_BP_104_18861_20120218_221303_inLine +BABEL_BP_104_19044_20120218_182247_outLine +BABEL_BP_104_19044_20120218_183017_outLine +BABEL_BP_104_19044_20120218_183849_outLine +BABEL_BP_104_19137_20120119_001516_inLine +BABEL_BP_104_19207_20111224_044525_inLine +BABEL_BP_104_19207_20111224_044525_outLine +BABEL_BP_104_19324_20120310_192849_inLine +BABEL_BP_104_19324_20120310_192849_outLine +BABEL_BP_104_19346_20120205_182121_outLine +BABEL_BP_104_19481_20120207_235626_inLine +BABEL_BP_104_19481_20120207_235626_outLine +BABEL_BP_104_19494_20120219_214920_inLine +BABEL_BP_104_19494_20120219_214920_outLine +BABEL_BP_104_19759_20111223_184346_outLine +BABEL_BP_104_20016_20120206_215156_inLine +BABEL_BP_104_20016_20120206_215156_outLine +BABEL_BP_104_20114_20120324_213414_inLine +BABEL_BP_104_20171_20120318_012849_inLine +BABEL_BP_104_20171_20120318_012849_outLine +BABEL_BP_104_20171_20120318_014226_inLine +BABEL_BP_104_20171_20120318_014226_outLine +BABEL_BP_104_20219_20120221_223942_inLine +BABEL_BP_104_20219_20120221_223942_outLine +BABEL_BP_104_20386_20120226_160551_inLine +BABEL_BP_104_20386_20120226_160551_outLine +BABEL_BP_104_20557_20120402_215807_inLine +BABEL_BP_104_20738_20120129_182528_inLine +BABEL_BP_104_20738_20120129_182528_outLine +BABEL_BP_104_21041_20120309_143920_inLine +BABEL_BP_104_21041_20120309_143920_outLine +BABEL_BP_104_21061_20120205_192140_inLine +BABEL_BP_104_21061_20120205_192140_outLine +BABEL_BP_104_21224_20120322_040006_inLine +BABEL_BP_104_21224_20120322_040006_outLine +BABEL_BP_104_21256_20120217_202248_inLine +BABEL_BP_104_21256_20120217_202248_outLine +BABEL_BP_104_21489_20120213_163025_inLine +BABEL_BP_104_21489_20120213_163025_outLine +BABEL_BP_104_21928_20120204_212612_inLine +BABEL_BP_104_21928_20120204_212612_outLine +BABEL_BP_104_21968_20120131_180237_inLine +BABEL_BP_104_21968_20120131_180237_outLine +BABEL_BP_104_22548_20120125_211519_inLine +BABEL_BP_104_22590_20120209_224232_inLine +BABEL_BP_104_22590_20120209_224232_outLine +BABEL_BP_104_23322_20120204_173810_inLine +BABEL_BP_104_23322_20120204_173810_outLine +BABEL_BP_104_23370_20120216_013240_inLine +BABEL_BP_104_23370_20120216_013240_outLine +BABEL_BP_104_23381_20120216_161115_outLine +BABEL_BP_104_23705_20120219_011051_inLine +BABEL_BP_104_23705_20120219_011051_outLine +BABEL_BP_104_24235_20120209_030431_outLine +BABEL_BP_104_24585_20120117_225722_inLine +BABEL_BP_104_24585_20120117_225722_outLine +BABEL_BP_104_24735_20120316_221529_inLine +BABEL_BP_104_24735_20120316_221529_outLine +BABEL_BP_104_24750_20120130_183131_inLine +BABEL_BP_104_24750_20120130_183131_outLine +BABEL_BP_104_24810_20120319_165838_outLine +BABEL_BP_104_25015_20120216_005135_inLine +BABEL_BP_104_25015_20120216_005135_outLine +BABEL_BP_104_25525_20120316_140847_outLine +BABEL_BP_104_25911_20111222_051549_inLine +BABEL_BP_104_25911_20111222_051549_outLine +BABEL_BP_104_26475_20120309_222554_inLine +BABEL_BP_104_26654_20120130_035807_inLine +BABEL_BP_104_26654_20120130_035807_outLine +BABEL_BP_104_26801_20120401_022159_inLine +BABEL_BP_104_26946_20120130_034221_outLine +BABEL_BP_104_27184_20120505_152626_outLine +BABEL_BP_104_27267_20120325_002713_outLine +BABEL_BP_104_27298_20111225_192028_inLine +BABEL_BP_104_27298_20111225_192028_outLine +BABEL_BP_104_27374_20120318_190552_outLine +BABEL_BP_104_28066_20120318_173932_inLine +BABEL_BP_104_28066_20120318_173932_outLine +BABEL_BP_104_28281_20120111_045749_inLine +BABEL_BP_104_28281_20120111_045749_outLine +BABEL_BP_104_28289_20120310_202856_inLine +BABEL_BP_104_28289_20120310_202856_outLine +BABEL_BP_104_28330_20120306_194033_inLine +BABEL_BP_104_28330_20120306_195756_inLine +BABEL_BP_104_28734_20120126_205422_inLine +BABEL_BP_104_28734_20120126_212950_inLine +BABEL_BP_104_29009_20120319_164025_outLine +BABEL_BP_104_29103_20120127_183035_inLine +BABEL_BP_104_29103_20120127_183035_outLine +BABEL_BP_104_29757_20120208_191006_inLine +BABEL_BP_104_29757_20120208_191006_outLine +BABEL_BP_104_29809_20120209_044252_inLine +BABEL_BP_104_29809_20120209_044252_outLine +BABEL_BP_104_29967_20120208_201355_inLine +BABEL_BP_104_29967_20120208_201355_outLine +BABEL_BP_104_30031_20120319_035012_inLine +BABEL_BP_104_30031_20120319_035012_outLine +BABEL_BP_104_30040_20120313_211534_inLine +BABEL_BP_104_30040_20120313_211534_outLine +BABEL_BP_104_30040_20120313_212609_inLine +BABEL_BP_104_30040_20120313_212609_outLine +BABEL_BP_104_30143_20111227_132440_inLine +BABEL_BP_104_30197_20120213_160025_inLine +BABEL_BP_104_30197_20120213_160025_outLine +BABEL_BP_104_30271_20120205_163755_inLine +BABEL_BP_104_30271_20120205_165111_inLine +BABEL_BP_104_30620_20120321_151904_inLine +BABEL_BP_104_30620_20120321_151904_outLine +BABEL_BP_104_30628_20120219_182744_inLine +BABEL_BP_104_30628_20120219_182744_outLine +BABEL_BP_104_30711_20120219_175435_outLine +BABEL_BP_104_30847_20120128_045058_inLine +BABEL_BP_104_30847_20120128_050033_inLine +BABEL_BP_104_30848_20120204_154057_inLine +BABEL_BP_104_30848_20120204_154058_outLine +BABEL_BP_104_31046_20120203_161436_inLine +BABEL_BP_104_31095_20120210_132537_inLine +BABEL_BP_104_31215_20120228_003446_inLine +BABEL_BP_104_31215_20120228_003446_outLine +BABEL_BP_104_31215_20120228_032743_inLine +BABEL_BP_104_31215_20120228_032743_outLine +BABEL_BP_104_31663_20120210_140419_inLine +BABEL_BP_104_31663_20120210_140419_outLine +BABEL_BP_104_31919_20120405_023221_inLine +BABEL_BP_104_31926_20120319_040036_outLine +BABEL_BP_104_32703_20120110_212645_outLine +BABEL_BP_104_32703_20120110_212646_inLine +BABEL_BP_104_32956_20120221_133851_inLine +BABEL_BP_104_32956_20120221_133851_outLine +BABEL_BP_104_33223_20120319_050332_inLine +BABEL_BP_104_33272_20120318_233319_outLine +BABEL_BP_104_33337_20120220_005047_inLine +BABEL_BP_104_33337_20120220_005047_outLine +BABEL_BP_104_33510_20120324_185136_outLine +BABEL_BP_104_33518_20120218_204645_inLine +BABEL_BP_104_33518_20120218_204645_outLine +BABEL_BP_104_33846_20120123_194027_inLine +BABEL_BP_104_34037_20120318_232512_inLine +BABEL_BP_104_34037_20120318_235541_inLine +BABEL_BP_104_34164_20120221_141502_inLine +BABEL_BP_104_34164_20120221_141502_outLine +BABEL_BP_104_34188_20120219_000455_inLine +BABEL_BP_104_34188_20120219_000455_outLine +BABEL_BP_104_34335_20111225_224055_outLine +BABEL_BP_104_34540_20120314_153124_inLine +BABEL_BP_104_34540_20120314_153124_outLine +BABEL_BP_104_34708_20120125_173011_inLine +BABEL_BP_104_34708_20120125_173011_outLine +BABEL_BP_104_34714_20120313_142435_inLine +BABEL_BP_104_34714_20120313_142435_outLine +BABEL_BP_104_34833_20120215_025837_inLine +BABEL_BP_104_34833_20120215_025837_outLine +BABEL_BP_104_34994_20120314_001810_outLine +BABEL_BP_104_34994_20120314_003701_outLine +BABEL_BP_104_35073_20120208_223917_outLine +BABEL_BP_104_35152_20111229_025446_inLine +BABEL_BP_104_35152_20111229_025446_outLine +BABEL_BP_104_35241_20120314_052346_inLine +BABEL_BP_104_35241_20120314_052346_outLine +BABEL_BP_104_35444_20120310_190608_inLine +BABEL_BP_104_35444_20120310_190608_outLine +BABEL_BP_104_35544_20120131_174538_inLine +BABEL_BP_104_35544_20120131_174538_outLine +BABEL_BP_104_35646_20120202_222418_inLine +BABEL_BP_104_35646_20120202_222418_outLine +BABEL_BP_104_35874_20120403_213324_inLine +BABEL_BP_104_35916_20120204_030147_inLine +BABEL_BP_104_35916_20120204_030147_outLine +BABEL_BP_104_35923_20120216_021137_inLine +BABEL_BP_104_35923_20120216_021137_outLine +BABEL_BP_104_35983_20120324_152856_outLine +BABEL_BP_104_36017_20120123_222703_outLine +BABEL_BP_104_36138_20120206_210519_inLine +BABEL_BP_104_36138_20120206_210519_outLine +BABEL_BP_104_36413_20120310_185758_inLine +BABEL_BP_104_36413_20120310_185758_outLine +BABEL_BP_104_36487_20120209_211827_inLine +BABEL_BP_104_36487_20120209_211827_outLine +BABEL_BP_104_37131_20120318_210220_inLine +BABEL_BP_104_37131_20120318_210220_outLine +BABEL_BP_104_37135_20120219_044437_inLine +BABEL_BP_104_37135_20120219_044437_outLine +BABEL_BP_104_37357_20120321_212732_inLine +BABEL_BP_104_37357_20120321_212732_outLine +BABEL_BP_104_37551_20120201_032910_inLine +BABEL_BP_104_37551_20120201_032910_outLine +BABEL_BP_104_37593_20120130_203434_inLine +BABEL_BP_104_37593_20120130_203434_outLine +BABEL_BP_104_37687_20120316_175600_outLine +BABEL_BP_104_37731_20120213_034923_inLine +BABEL_BP_104_37731_20120213_034923_outLine +BABEL_BP_104_37733_20120207_040916_inLine +BABEL_BP_104_37733_20120207_040936_outLine +BABEL_BP_104_38163_20120109_022356_inLine +BABEL_BP_104_38163_20120109_022356_outLine +BABEL_BP_104_38223_20120129_195918_inLine +BABEL_BP_104_38223_20120129_195918_outLine +BABEL_BP_104_38255_20120312_231219_inLine +BABEL_BP_104_38255_20120312_231819_inLine +BABEL_BP_104_38255_20120322_142237_inLine +BABEL_BP_104_38479_20120213_011154_inLine +BABEL_BP_104_38479_20120213_011154_outLine +BABEL_BP_104_38563_20120127_181357_outLine +BABEL_BP_104_38685_20120205_205815_inLine +BABEL_BP_104_38685_20120205_205815_outLine +BABEL_BP_104_38902_20120331_152704_inLine +BABEL_BP_104_39119_20120203_183149_inLine +BABEL_BP_104_39119_20120203_183149_outLine +BABEL_BP_104_39178_20120109_195710_inLine +BABEL_BP_104_39320_20120110_190913_inLine +BABEL_BP_104_39320_20120110_190913_outLine +BABEL_BP_104_39390_20120322_042714_outLine +BABEL_BP_104_39525_20120217_200400_inLine +BABEL_BP_104_39525_20120217_200400_outLine +BABEL_BP_104_39999_20120326_194721_inLine +BABEL_BP_104_39999_20120326_194721_outLine +BABEL_BP_104_40136_20120222_030818_inLine +BABEL_BP_104_40136_20120222_030823_outLine +BABEL_BP_104_40585_20120309_200652_outLine +BABEL_BP_104_40607_20120324_163524_inLine +BABEL_BP_104_40612_20120106_024347_inLine +BABEL_BP_104_40612_20120106_024347_outLine +BABEL_BP_104_40640_20120131_044455_outLine +BABEL_BP_104_40701_20120209_031300_inLine +BABEL_BP_104_40701_20120209_031301_outLine +BABEL_BP_104_40866_20120119_030533_inLine +BABEL_BP_104_40866_20120119_030533_outLine +BABEL_BP_104_40889_20120227_180714_inLine +BABEL_BP_104_40889_20120227_180714_outLine +BABEL_BP_104_41306_20120223_191213_inLine +BABEL_BP_104_41306_20120223_191213_outLine +BABEL_BP_104_41498_20120309_223111_outLine +BABEL_BP_104_41531_20120331_010320_inLine +BABEL_BP_104_41531_20120331_010320_outLine +BABEL_BP_104_41610_20111225_214331_inLine +BABEL_BP_104_41610_20111225_214331_outLine +BABEL_BP_104_41871_20120310_200016_inLine +BABEL_BP_104_42006_20120304_162643_inLine +BABEL_BP_104_42006_20120304_162643_outLine +BABEL_BP_104_42145_20120127_042217_inLine +BABEL_BP_104_42145_20120127_042217_outLine +BABEL_BP_104_42571_20120229_014427_inLine +BABEL_BP_104_42571_20120229_014427_outLine +BABEL_BP_104_42571_20120229_020000_inLine +BABEL_BP_104_42571_20120229_020000_outLine +BABEL_BP_104_42836_20120331_181552_outLine +BABEL_BP_104_42929_20120307_150902_inLine +BABEL_BP_104_42929_20120307_150902_outLine +BABEL_BP_104_42952_20120318_233729_inLine +BABEL_BP_104_43036_20120128_024308_inLine +BABEL_BP_104_43036_20120128_025047_inLine +BABEL_BP_104_43036_20120128_025047_outLine +BABEL_BP_104_43036_20120128_030158_inLine +BABEL_BP_104_43036_20120128_030158_outLine +BABEL_BP_104_43051_20120207_002833_inLine +BABEL_BP_104_43267_20120229_211432_inLine +BABEL_BP_104_43267_20120229_211432_outLine +BABEL_BP_104_43322_20120126_040725_inLine +BABEL_BP_104_43462_20120216_210005_inLine +BABEL_BP_104_43462_20120216_210005_outLine +BABEL_BP_104_43480_20120326_155717_inLine +BABEL_BP_104_43501_20120331_220724_outLine +BABEL_BP_104_43501_20120331_222326_outLine +BABEL_BP_104_43571_20120203_040537_inLine +BABEL_BP_104_43571_20120203_040537_outLine +BABEL_BP_104_43684_20120128_182736_outLine +BABEL_BP_104_43714_20120219_132220_inLine +BABEL_BP_104_43714_20120219_132220_outLine +BABEL_BP_104_43724_20120219_213737_inLine +BABEL_BP_104_43724_20120219_213737_outLine +BABEL_BP_104_43725_20120205_002936_inLine +BABEL_BP_104_43725_20120205_002936_outLine +BABEL_BP_104_43833_20120331_193735_outLine +BABEL_BP_104_43974_20120320_213041_inLine +BABEL_BP_104_43974_20120320_215224_inLine +BABEL_BP_104_44103_20120218_005711_inLine +BABEL_BP_104_44103_20120218_005711_outLine +BABEL_BP_104_44141_20120209_171547_inLine +BABEL_BP_104_44141_20120209_171547_outLine +BABEL_BP_104_44267_20120404_010500_inLine +BABEL_BP_104_44278_20120318_152209_inLine +BABEL_BP_104_44278_20120318_152209_outLine +BABEL_BP_104_44468_20120222_125222_inLine +BABEL_BP_104_44468_20120222_125222_outLine +BABEL_BP_104_44515_20120326_144709_inLine +BABEL_BP_104_44515_20120326_150551_inLine +BABEL_BP_104_44799_20120119_040419_inLine +BABEL_BP_104_44799_20120119_040419_outLine +BABEL_BP_104_44976_20120128_211450_inLine +BABEL_BP_104_44976_20120128_211450_outLine +BABEL_BP_104_45064_20120218_205233_inLine +BABEL_BP_104_45064_20120218_205233_outLine +BABEL_BP_104_45188_20120121_023218_outLine +BABEL_BP_104_45356_20120324_234702_outLine +BABEL_BP_104_45403_20111222_014909_outLine +BABEL_BP_104_45562_20120131_200753_inLine +BABEL_BP_104_45823_20120103_201816_inLine +BABEL_BP_104_45842_20120210_164857_inLine +BABEL_BP_104_45842_20120210_164857_outLine +BABEL_BP_104_45926_20120127_162212_inLine +BABEL_BP_104_45926_20120127_162212_outLine +BABEL_BP_104_45947_20120313_214251_inLine +BABEL_BP_104_46004_20120223_160156_inLine +BABEL_BP_104_46004_20120223_160156_outLine +BABEL_BP_104_46168_20120217_200729_inLine +BABEL_BP_104_46168_20120217_200729_outLine +BABEL_BP_104_46319_20120210_225923_outLine +BABEL_BP_104_46361_20120126_004615_outLine +BABEL_BP_104_46455_20120218_222247_inLine +BABEL_BP_104_46455_20120218_222247_outLine +BABEL_BP_104_46734_20120219_025954_outLine +BABEL_BP_104_46827_20120210_134310_inLine +BABEL_BP_104_46827_20120210_134310_outLine +BABEL_BP_104_46900_20120110_181315_inLine +BABEL_BP_104_46900_20120110_181315_outLine +BABEL_BP_104_46979_20120223_173811_inLine +BABEL_BP_104_46979_20120223_173811_outLine +BABEL_BP_104_47015_20120222_053105_inLine +BABEL_BP_104_47015_20120222_053105_outLine +BABEL_BP_104_47177_20120127_223720_outLine +BABEL_BP_104_47424_20120120_233633_inLine +BABEL_BP_104_47424_20120120_233633_outLine +BABEL_BP_104_47836_20120331_183954_outLine +BABEL_BP_104_47916_20120401_014343_inLine +BABEL_BP_104_47917_20120319_003035_inLine +BABEL_BP_104_47917_20120319_003035_outLine +BABEL_BP_104_48000_20120323_171146_inLine +BABEL_BP_104_48000_20120323_171146_outLine +BABEL_BP_104_48001_20120204_231603_inLine +BABEL_BP_104_48001_20120204_231603_outLine +BABEL_BP_104_48259_20120217_200412_inLine +BABEL_BP_104_48259_20120217_200412_outLine +BABEL_BP_104_48365_20120212_043935_inLine +BABEL_BP_104_48365_20120212_043935_outLine +BABEL_BP_104_48416_20120218_203541_inLine +BABEL_BP_104_48416_20120218_203542_outLine +BABEL_BP_104_48834_20111221_032658_inLine +BABEL_BP_104_48834_20111221_032658_outLine +BABEL_BP_104_48944_20120218_011825_inLine +BABEL_BP_104_48944_20120218_011825_outLine +BABEL_BP_104_48946_20120320_192250_inLine +BABEL_BP_104_48946_20120320_192250_outLine +BABEL_BP_104_49141_20120330_015342_inLine +BABEL_BP_104_49629_20120312_155816_outLine +BABEL_BP_104_50030_20120404_005406_inLine +BABEL_BP_104_50407_20120318_232348_inLine +BABEL_BP_104_50407_20120318_232348_outLine +BABEL_BP_104_50523_20120314_033747_inLine +BABEL_BP_104_50523_20120314_033747_outLine +BABEL_BP_104_50523_20120314_231004_inLine +BABEL_BP_104_50523_20120314_231004_outLine +BABEL_BP_104_50583_20120404_000547_inLine +BABEL_BP_104_50682_20120116_205741_inLine +BABEL_BP_104_50682_20120116_205741_outLine +BABEL_BP_104_50820_20120213_140300_inLine +BABEL_BP_104_50820_20120213_140300_outLine +BABEL_BP_104_50940_20120309_160847_inLine +BABEL_BP_104_50940_20120322_132036_inLine +BABEL_BP_104_51024_20120131_172745_inLine +BABEL_BP_104_51047_20120319_042347_outLine +BABEL_BP_104_51079_20120316_150756_outLine +BABEL_BP_104_51329_20120222_203129_inLine +BABEL_BP_104_51329_20120222_203129_outLine +BABEL_BP_104_51329_20120222_205332_inLine +BABEL_BP_104_51329_20120222_205332_outLine +BABEL_BP_104_51388_20120221_175113_inLine +BABEL_BP_104_51519_20120220_052247_inLine +BABEL_BP_104_51519_20120220_052247_outLine +BABEL_BP_104_51570_20120118_225333_inLine +BABEL_BP_104_51570_20120118_225333_outLine +BABEL_BP_104_51716_20120221_005215_inLine +BABEL_BP_104_51716_20120221_005215_outLine +BABEL_BP_104_52067_20120313_210602_inLine +BABEL_BP_104_52067_20120313_210602_outLine +BABEL_BP_104_52116_20120316_225019_inLine +BABEL_BP_104_52116_20120316_225020_outLine +BABEL_BP_104_52300_20120203_210256_inLine +BABEL_BP_104_52300_20120203_210256_outLine +BABEL_BP_104_52359_20120328_212912_inLine +BABEL_BP_104_52753_20120209_225916_inLine +BABEL_BP_104_52753_20120209_225916_outLine +BABEL_BP_104_52753_20120213_014050_inLine +BABEL_BP_104_52753_20120213_014050_outLine +BABEL_BP_104_52954_20120313_170902_inLine +BABEL_BP_104_52954_20120313_170902_outLine +BABEL_BP_104_53159_20120402_035901_inLine +BABEL_BP_104_53159_20120402_035901_outLine +BABEL_BP_104_53262_20120311_192937_inLine +BABEL_BP_104_53334_20120309_184805_inLine +BABEL_BP_104_53334_20120309_184805_outLine +BABEL_BP_104_53346_20120205_222257_inLine +BABEL_BP_104_53659_20120218_205643_inLine +BABEL_BP_104_53659_20120218_205643_outLine +BABEL_BP_104_53718_20120202_220720_outLine +BABEL_BP_104_53820_20120327_182222_inLine +BABEL_BP_104_53820_20120327_182222_outLine +BABEL_BP_104_54263_20120114_032041_inLine +BABEL_BP_104_54417_20120119_045736_inLine +BABEL_BP_104_54417_20120119_045736_outLine +BABEL_BP_104_54780_20120403_231516_inLine +BABEL_BP_104_54780_20120403_232436_inLine +BABEL_BP_104_54909_20120130_194003_inLine +BABEL_BP_104_54909_20120130_194003_outLine +BABEL_BP_104_54975_20120111_002324_inLine +BABEL_BP_104_54975_20120111_002324_outLine +BABEL_BP_104_55131_20111225_220753_outLine +BABEL_BP_104_55213_20120331_185824_outLine +BABEL_BP_104_55316_20111221_024834_inLine +BABEL_BP_104_55382_20120318_154619_inLine +BABEL_BP_104_55544_20120108_200418_inLine +BABEL_BP_104_55544_20120108_200418_outLine +BABEL_BP_104_55668_20120212_011829_inLine +BABEL_BP_104_55668_20120212_011829_outLine +BABEL_BP_104_55855_20111220_211829_outLine +BABEL_BP_104_56119_20120216_183711_inLine +BABEL_BP_104_56119_20120216_183711_outLine +BABEL_BP_104_56201_20120126_180227_outLine +BABEL_BP_104_56308_20120402_024809_outLine +BABEL_BP_104_56704_20120120_155806_inLine +BABEL_BP_104_56704_20120120_155806_outLine +BABEL_BP_104_56753_20120322_204356_outLine +BABEL_BP_104_56805_20120320_045112_inLine +BABEL_BP_104_56805_20120320_045112_outLine +BABEL_BP_104_57005_20120321_034143_inLine +BABEL_BP_104_57082_20120110_024829_inLine +BABEL_BP_104_57116_20120110_180036_inLine +BABEL_BP_104_57167_20111230_213737_outLine +BABEL_BP_104_57210_20120321_020212_inLine +BABEL_BP_104_57210_20120321_020212_outLine +BABEL_BP_104_57263_20120302_211404_inLine +BABEL_BP_104_57320_20120204_230109_inLine +BABEL_BP_104_57320_20120204_230109_outLine +BABEL_BP_104_57492_20120316_185552_inLine +BABEL_BP_104_57492_20120316_185552_outLine +BABEL_BP_104_57531_20120203_165801_inLine +BABEL_BP_104_57531_20120203_165801_outLine +BABEL_BP_104_57618_20120203_144717_inLine +BABEL_BP_104_57618_20120203_144717_outLine +BABEL_BP_104_57672_20120204_030206_outLine +BABEL_BP_104_58041_20120129_165617_inLine +BABEL_BP_104_58041_20120129_165617_outLine +BABEL_BP_104_58089_20120111_210636_inLine +BABEL_BP_104_58089_20120111_210636_outLine +BABEL_BP_104_58094_20120211_202938_outLine +BABEL_BP_104_58149_20120218_161613_outLine +BABEL_BP_104_58188_20120124_150608_inLine +BABEL_BP_104_58188_20120124_150608_outLine +BABEL_BP_104_58298_20120208_214852_inLine +BABEL_BP_104_58298_20120208_214852_outLine +BABEL_BP_104_58807_20120327_175726_outLine +BABEL_BP_104_58939_20120212_184855_inLine +BABEL_BP_104_58939_20120212_184855_outLine +BABEL_BP_104_58963_20120331_015840_inLine +BABEL_BP_104_58963_20120331_015840_outLine +BABEL_BP_104_59158_20120212_005248_inLine +BABEL_BP_104_59158_20120212_005248_outLine +BABEL_BP_104_59183_20120312_190106_inLine +BABEL_BP_104_59219_20120131_225115_outLine +BABEL_BP_104_59399_20120318_144751_inLine +BABEL_BP_104_59399_20120318_144752_outLine +BABEL_BP_104_59482_20120309_190927_inLine +BABEL_BP_104_59482_20120309_190927_outLine +BABEL_BP_104_59681_20120123_213306_inLine +BABEL_BP_104_59681_20120123_213306_outLine +BABEL_BP_104_59835_20120212_162802_inLine +BABEL_BP_104_59835_20120212_162802_outLine +BABEL_BP_104_60462_20120201_181707_inLine +BABEL_BP_104_60462_20120201_181707_outLine +BABEL_BP_104_60737_20120208_204130_inLine +BABEL_BP_104_60737_20120208_204130_outLine +BABEL_BP_104_60806_20120213_161652_outLine +BABEL_BP_104_60921_20120220_050615_inLine +BABEL_BP_104_60921_20120220_050615_outLine +BABEL_BP_104_61029_20120201_224200_outLine +BABEL_BP_104_61166_20120220_033838_inLine +BABEL_BP_104_61166_20120220_034717_inLine +BABEL_BP_104_61327_20120326_140350_inLine +BABEL_BP_104_61327_20120326_140350_outLine +BABEL_BP_104_61523_20120212_035522_inLine +BABEL_BP_104_61606_20120131_174533_inLine +BABEL_BP_104_61655_20120208_203143_inLine +BABEL_BP_104_61655_20120208_203143_outLine +BABEL_BP_104_61733_20120205_220251_outLine +BABEL_BP_104_61735_20120314_012744_inLine +BABEL_BP_104_61909_20120320_190739_inLine +BABEL_BP_104_61909_20120320_190739_outLine +BABEL_BP_104_62182_20111226_205547_inLine +BABEL_BP_104_62182_20111226_205547_outLine +BABEL_BP_104_62388_20120204_031740_inLine +BABEL_BP_104_62388_20120204_031740_outLine +BABEL_BP_104_62815_20120318_025812_outLine +BABEL_BP_104_62816_20120312_153937_outLine +BABEL_BP_104_62978_20120318_211036_inLine +BABEL_BP_104_62978_20120318_211036_outLine +BABEL_BP_104_63111_20120204_232445_outLine +BABEL_BP_104_63215_20120213_040737_inLine +BABEL_BP_104_63215_20120213_040737_outLine +BABEL_BP_104_63220_20120131_155658_inLine +BABEL_BP_104_63220_20120131_155658_outLine +BABEL_BP_104_63390_20120123_212718_outLine +BABEL_BP_104_63397_20120217_194928_inLine +BABEL_BP_104_63397_20120217_194928_outLine +BABEL_BP_104_63603_20120128_213000_outLine +BABEL_BP_104_63784_20120216_015608_inLine +BABEL_BP_104_63784_20120216_015608_outLine +BABEL_BP_104_63929_20120319_155419_inLine +BABEL_BP_104_63929_20120319_155419_outLine +BABEL_BP_104_63934_20120318_201706_inLine +BABEL_BP_104_63934_20120318_201706_outLine +BABEL_BP_104_64055_20120111_034236_outLine +BABEL_BP_104_64297_20120205_031234_inLine +BABEL_BP_104_64297_20120205_031234_outLine +BABEL_BP_104_64646_20120319_163845_outLine +BABEL_BP_104_64695_20120128_014256_inLine +BABEL_BP_104_64695_20120128_014256_outLine +BABEL_BP_104_64820_20120111_032311_inLine +BABEL_BP_104_64820_20120111_032311_outLine +BABEL_BP_104_64905_20120206_221140_inLine +BABEL_BP_104_64905_20120206_221140_outLine +BABEL_BP_104_64990_20120119_173958_inLine +BABEL_BP_104_64990_20120119_173958_outLine +BABEL_BP_104_65211_20120119_015405_inLine +BABEL_BP_104_65211_20120119_015405_outLine +BABEL_BP_104_65341_20120220_222356_inLine +BABEL_BP_104_65341_20120220_222356_outLine +BABEL_BP_104_65357_20120309_190849_inLine +BABEL_BP_104_65357_20120309_190849_outLine +BABEL_BP_104_65590_20120109_001414_inLine +BABEL_BP_104_65590_20120109_001414_outLine +BABEL_BP_104_65741_20120218_010022_inLine +BABEL_BP_104_65788_20120131_172922_outLine +BABEL_BP_104_65954_20120128_163139_inLine +BABEL_BP_104_65954_20120128_163139_outLine +BABEL_BP_104_65974_20120316_195524_inLine +BABEL_BP_104_65974_20120316_195524_outLine +BABEL_BP_104_66351_20120317_181035_inLine +BABEL_BP_104_66351_20120317_181035_outLine +BABEL_BP_104_66643_20120316_004947_inLine +BABEL_BP_104_66643_20120316_004947_outLine +BABEL_BP_104_66784_20111225_190506_outLine +BABEL_BP_104_66879_20120213_004555_inLine +BABEL_BP_104_66879_20120213_004555_outLine +BABEL_BP_104_67106_20120208_201829_inLine +BABEL_BP_104_67106_20120208_201829_outLine +BABEL_BP_104_67374_20120210_034059_inLine +BABEL_BP_104_67374_20120210_034100_outLine +BABEL_BP_104_67423_20120205_220658_outLine +BABEL_BP_104_67534_20120204_181436_inLine +BABEL_BP_104_67534_20120204_181436_outLine +BABEL_BP_104_67655_20120218_035728_outLine +BABEL_BP_104_67684_20120316_135144_inLine +BABEL_BP_104_67684_20120316_135144_outLine +BABEL_BP_104_67685_20120217_235729_inLine +BABEL_BP_104_67685_20120217_235729_outLine +BABEL_BP_104_67718_20120131_164436_inLine +BABEL_BP_104_67718_20120131_164436_outLine +BABEL_BP_104_67928_20120109_174230_inLine +BABEL_BP_104_67928_20120109_174230_outLine +BABEL_BP_104_68077_20120219_155535_outLine +BABEL_BP_104_68111_20120321_185146_outLine +BABEL_BP_104_68144_20120210_223106_outLine +BABEL_BP_104_68189_20120128_005011_inLine +BABEL_BP_104_68189_20120128_005011_outLine +BABEL_BP_104_68209_20120219_045221_inLine +BABEL_BP_104_68926_20120229_145934_inLine +BABEL_BP_104_68926_20120229_145934_outLine +BABEL_BP_104_68997_20120126_010839_inLine +BABEL_BP_104_69127_20120402_221743_outLine +BABEL_BP_104_69398_20111219_215754_inLine +BABEL_BP_104_69638_20120205_022624_inLine +BABEL_BP_104_69638_20120205_022624_outLine +BABEL_BP_104_69656_20120129_050158_inLine +BABEL_BP_104_69656_20120129_050158_outLine +BABEL_BP_104_69656_20120129_051238_inLine +BABEL_BP_104_69656_20120129_051238_outLine +BABEL_BP_104_69771_20120220_034015_inLine +BABEL_BP_104_69771_20120220_034015_outLine +BABEL_BP_104_70207_20120209_001133_inLine +BABEL_BP_104_70207_20120209_001133_outLine +BABEL_BP_104_70333_20120210_033437_outLine +BABEL_BP_104_70528_20120128_013553_inLine +BABEL_BP_104_70528_20120128_013553_outLine +BABEL_BP_104_70762_20120213_175054_outLine +BABEL_BP_104_70858_20120204_012205_inLine +BABEL_BP_104_70897_20120315_000410_inLine +BABEL_BP_104_70897_20120315_000410_outLine +BABEL_BP_104_70897_20120315_013535_inLine +BABEL_BP_104_70897_20120315_013535_outLine +BABEL_BP_104_71204_20120315_040441_inLine +BABEL_BP_104_71324_20111220_215105_outLine +BABEL_BP_104_71786_20120219_212052_outLine +BABEL_BP_104_71948_20120210_012347_inLine +BABEL_BP_104_71970_20120310_195048_inLine +BABEL_BP_104_72179_20120129_175206_inLine +BABEL_BP_104_72179_20120129_175206_outLine +BABEL_BP_104_72480_20120211_223904_inLine +BABEL_BP_104_72480_20120211_224426_inLine +BABEL_BP_104_72693_20120209_005646_inLine +BABEL_BP_104_72693_20120209_005646_outLine +BABEL_BP_104_72709_20120209_034548_inLine +BABEL_BP_104_72709_20120209_034548_outLine +BABEL_BP_104_72874_20120213_191257_inLine +BABEL_BP_104_72874_20120213_191257_outLine +BABEL_BP_104_72910_20120310_185203_outLine +BABEL_BP_104_73188_20120128_003921_inLine +BABEL_BP_104_73188_20120128_003921_outLine +BABEL_BP_104_73199_20120119_195108_outLine +BABEL_BP_104_73403_20120320_183508_outLine +BABEL_BP_104_73403_20120320_184757_outLine +BABEL_BP_104_73450_20120206_024342_inLine +BABEL_BP_104_73450_20120206_024342_outLine +BABEL_BP_104_73607_20120203_163328_inLine +BABEL_BP_104_73607_20120203_163328_outLine +BABEL_BP_104_73925_20120123_233630_inLine +BABEL_BP_104_73925_20120123_233630_outLine +BABEL_BP_104_74261_20120331_191708_outLine +BABEL_BP_104_74334_20111230_035012_inLine +BABEL_BP_104_74940_20120228_225523_inLine +BABEL_BP_104_74940_20120228_225523_outLine +BABEL_BP_104_75390_20120218_133736_inLine +BABEL_BP_104_75390_20120218_133736_outLine +BABEL_BP_104_75402_20120319_160944_inLine +BABEL_BP_104_75724_20120207_172820_outLine +BABEL_BP_104_75822_20120205_214035_inLine +BABEL_BP_104_75895_20120206_024214_inLine +BABEL_BP_104_75895_20120206_024214_outLine +BABEL_BP_104_76375_20120226_014726_inLine +BABEL_BP_104_76375_20120226_014726_outLine +BABEL_BP_104_76573_20120213_150121_inLine +BABEL_BP_104_76573_20120213_150121_outLine +BABEL_BP_104_76714_20120313_220017_inLine +BABEL_BP_104_76714_20120313_220017_outLine +BABEL_BP_104_76738_20120210_010510_inLine +BABEL_BP_104_76742_20111215_203118_outLine +BABEL_BP_104_76832_20120210_030141_outLine +BABEL_BP_104_77097_20120214_235954_inLine +BABEL_BP_104_77097_20120214_235954_outLine +BABEL_BP_104_77256_20120309_064948_inLine +BABEL_BP_104_77294_20120318_224422_inLine +BABEL_BP_104_77294_20120318_224422_outLine +BABEL_BP_104_77537_20120206_034628_outLine +BABEL_BP_104_77693_20111228_014255_outLine +BABEL_BP_104_77711_20120229_163050_inLine +BABEL_BP_104_77711_20120229_163050_outLine +BABEL_BP_104_77711_20120229_164115_inLine +BABEL_BP_104_77711_20120229_164115_outLine +BABEL_BP_104_78225_20120126_170942_outLine +BABEL_BP_104_78254_20120209_222912_inLine +BABEL_BP_104_78254_20120209_222912_outLine +BABEL_BP_104_78254_20120209_234516_inLine +BABEL_BP_104_78254_20120209_234516_outLine +BABEL_BP_104_78367_20120105_012610_inLine +BABEL_BP_104_78367_20120105_012610_outLine +BABEL_BP_104_78443_20120128_211331_inLine +BABEL_BP_104_78443_20120128_211331_outLine +BABEL_BP_104_78452_20120316_005121_inLine +BABEL_BP_104_78452_20120316_005121_outLine +BABEL_BP_104_78452_20120316_005946_inLine +BABEL_BP_104_78452_20120316_005946_outLine +BABEL_BP_104_78462_20120112_181459_inLine +BABEL_BP_104_78737_20120316_173217_inLine +BABEL_BP_104_78737_20120316_173217_outLine +BABEL_BP_104_78978_20120322_041159_inLine +BABEL_BP_104_78978_20120322_042345_inLine +BABEL_BP_104_79030_20120222_170416_inLine +BABEL_BP_104_79030_20120222_170416_outLine +BABEL_BP_104_79030_20120222_211653_inLine +BABEL_BP_104_79030_20120222_211653_outLine +BABEL_BP_104_79120_20120127_021912_inLine +BABEL_BP_104_79120_20120127_021912_outLine +BABEL_BP_104_79120_20120127_030132_inLine +BABEL_BP_104_79120_20120127_030132_outLine +BABEL_BP_104_79127_20120127_171446_outLine +BABEL_BP_104_79156_20120126_191440_outLine +BABEL_BP_104_79185_20120126_025253_inLine +BABEL_BP_104_79185_20120126_025253_outLine +BABEL_BP_104_79191_20120125_210322_inLine +BABEL_BP_104_79191_20120125_210322_outLine +BABEL_BP_104_79244_20111230_180239_inLine +BABEL_BP_104_79378_20120302_011529_outLine +BABEL_BP_104_79387_20120104_201110_inLine +BABEL_BP_104_79387_20120104_201110_outLine +BABEL_BP_104_79679_20120215_053807_inLine +BABEL_BP_104_79679_20120215_053807_outLine +BABEL_BP_104_79753_20120203_173233_inLine +BABEL_BP_104_79753_20120203_173233_outLine +BABEL_BP_104_79888_20120318_024215_outLine +BABEL_BP_104_80105_20120205_233041_inLine +BABEL_BP_104_80105_20120205_233041_outLine +BABEL_BP_104_80134_20120313_215613_inLine +BABEL_BP_104_80134_20120313_215613_outLine +BABEL_BP_104_80226_20120210_182546_inLine +BABEL_BP_104_80226_20120210_182546_outLine +BABEL_BP_104_80284_20120109_235306_inLine +BABEL_BP_104_80284_20120109_235306_outLine +BABEL_BP_104_80424_20120207_221904_inLine +BABEL_BP_104_80424_20120207_221904_outLine +BABEL_BP_104_80559_20120319_152020_outLine +BABEL_BP_104_80616_20120223_193040_inLine +BABEL_BP_104_80616_20120223_193040_outLine +BABEL_BP_104_80679_20120331_033903_outLine +BABEL_BP_104_80815_20120322_001246_outLine +BABEL_BP_104_80867_20120309_034536_inLine +BABEL_BP_104_80867_20120309_034536_outLine +BABEL_BP_104_80929_20120310_194854_inLine +BABEL_BP_104_80929_20120310_194854_outLine +BABEL_BP_104_81726_20120229_154500_inLine +BABEL_BP_104_81726_20120229_154500_outLine +BABEL_BP_104_81773_20120404_000845_outLine +BABEL_BP_104_81923_20120128_004752_inLine +BABEL_BP_104_81923_20120128_004752_outLine +BABEL_BP_104_81996_20120128_185859_outLine +BABEL_BP_104_82068_20120320_233307_inLine +BABEL_BP_104_82068_20120320_234626_inLine +BABEL_BP_104_82149_20120112_163113_inLine +BABEL_BP_104_82499_20120215_024134_inLine +BABEL_BP_104_82499_20120215_024134_outLine +BABEL_BP_104_82526_20120201_124800_inLine +BABEL_BP_104_82526_20120201_124800_outLine +BABEL_BP_104_82583_20120211_041829_outLine +BABEL_BP_104_82595_20120324_154901_outLine +BABEL_BP_104_82677_20120206_173830_outLine +BABEL_BP_104_82838_20120313_152742_inLine +BABEL_BP_104_82838_20120313_152742_outLine +BABEL_BP_104_82838_20120313_154639_inLine +BABEL_BP_104_82838_20120313_154639_outLine +BABEL_BP_104_82849_20120212_185110_inLine +BABEL_BP_104_82849_20120212_185110_outLine +BABEL_BP_104_82964_20120218_181351_outLine +BABEL_BP_104_83050_20120114_231129_inLine +BABEL_BP_104_83050_20120114_231129_outLine +BABEL_BP_104_83072_20120213_170201_inLine +BABEL_BP_104_83072_20120213_170201_outLine +BABEL_BP_104_83112_20120204_161112_inLine +BABEL_BP_104_83112_20120204_161112_outLine +BABEL_BP_104_83747_20120120_153904_outLine +BABEL_BP_104_83835_20120321_145755_inLine +BABEL_BP_104_83835_20120321_145755_outLine +BABEL_BP_104_83866_20120206_040504_inLine +BABEL_BP_104_83866_20120206_040505_outLine +BABEL_BP_104_83941_20120119_030904_inLine +BABEL_BP_104_83941_20120119_030904_outLine +BABEL_BP_104_84132_20120312_054349_outLine +BABEL_BP_104_84315_20120318_184410_outLine +BABEL_BP_104_84360_20111228_033339_inLine +BABEL_BP_104_84360_20111228_033339_outLine +BABEL_BP_104_84854_20120129_233819_inLine +BABEL_BP_104_84854_20120129_233819_outLine +BABEL_BP_104_84885_20120217_215436_inLine +BABEL_BP_104_84885_20120217_215436_outLine +BABEL_BP_104_84950_20120130_131546_inLine +BABEL_BP_104_84950_20120130_131546_outLine +BABEL_BP_104_84985_20120319_172452_outLine +BABEL_BP_104_84985_20120319_173047_outLine +BABEL_BP_104_85147_20120129_180533_inLine +BABEL_BP_104_85147_20120129_180533_outLine +BABEL_BP_104_85272_20120127_032845_inLine +BABEL_BP_104_85272_20120127_032845_outLine +BABEL_BP_104_85388_20120128_190259_inLine +BABEL_BP_104_85388_20120128_190259_outLine +BABEL_BP_104_85558_20120413_044033_inLine +BABEL_BP_104_85579_20120205_170917_inLine +BABEL_BP_104_85579_20120205_170917_outLine +BABEL_BP_104_85597_20120320_231227_inLine +BABEL_BP_104_86528_20120128_211228_inLine +BABEL_BP_104_86537_20120128_022125_inLine +BABEL_BP_104_86537_20120128_023523_inLine +BABEL_BP_104_87032_20120111_203623_inLine +BABEL_BP_104_87032_20120111_203623_outLine +BABEL_BP_104_87067_20120324_182930_inLine +BABEL_BP_104_87067_20120324_182930_outLine +BABEL_BP_104_87422_20120212_021635_outLine +BABEL_BP_104_87453_20120131_210831_inLine +BABEL_BP_104_87453_20120131_210831_outLine +BABEL_BP_104_87517_20120207_200619_inLine +BABEL_BP_104_87517_20120207_200619_outLine +BABEL_BP_104_87970_20120221_172638_inLine +BABEL_BP_104_87970_20120221_172638_outLine +BABEL_BP_104_88006_20120207_214550_inLine +BABEL_BP_104_88006_20120207_214550_outLine +BABEL_BP_104_88070_20120318_164350_outLine +BABEL_BP_104_88434_20120319_170128_inLine +BABEL_BP_104_88434_20120319_170128_outLine +BABEL_BP_104_88604_20120111_001257_inLine +BABEL_BP_104_88604_20120111_001257_outLine +BABEL_BP_104_88921_20120205_215225_inLine +BABEL_BP_104_88921_20120205_215225_outLine +BABEL_BP_104_89036_20120327_211455_inLine +BABEL_BP_104_89053_20120129_232038_inLine +BABEL_BP_104_89053_20120129_232038_outLine +BABEL_BP_104_89402_20120205_045136_outLine +BABEL_BP_104_89925_20120202_000208_inLine +BABEL_BP_104_89925_20120202_000208_outLine +BABEL_BP_104_89952_20120131_212850_inLine +BABEL_BP_104_89952_20120131_212850_outLine +BABEL_BP_104_90022_20120207_051223_inLine +BABEL_BP_104_90022_20120207_051223_outLine +BABEL_BP_104_90263_20120205_044035_inLine +BABEL_BP_104_90263_20120205_044035_outLine +BABEL_BP_104_90310_20120129_024342_outLine +BABEL_BP_104_91161_20120311_032449_inLine +BABEL_BP_104_91161_20120311_032449_outLine +BABEL_BP_104_91495_20120210_163107_inLine +BABEL_BP_104_91495_20120210_163107_outLine +BABEL_BP_104_91875_20120210_004013_inLine +BABEL_BP_104_91875_20120210_004013_outLine +BABEL_BP_104_91880_20120226_221957_inLine +BABEL_BP_104_91880_20120226_221957_outLine +BABEL_BP_104_92000_20120206_011350_inLine +BABEL_BP_104_92000_20120206_011350_outLine +BABEL_BP_104_92310_20120206_033517_inLine +BABEL_BP_104_92310_20120206_033517_outLine +BABEL_BP_104_92342_20120320_041334_inLine +BABEL_BP_104_92342_20120320_041334_outLine +BABEL_BP_104_92636_20120128_193247_inLine +BABEL_BP_104_92636_20120128_193247_outLine +BABEL_BP_104_92679_20111226_171331_outLine +BABEL_BP_104_92722_20120209_235113_outLine +BABEL_BP_104_92793_20120118_235358_inLine +BABEL_BP_104_93129_20120218_130813_inLine +BABEL_BP_104_93129_20120218_130813_outLine +BABEL_BP_104_93227_20120216_190245_inLine +BABEL_BP_104_93227_20120216_190245_outLine +BABEL_BP_104_93300_20120221_135558_inLine +BABEL_BP_104_93300_20120221_135558_outLine +BABEL_BP_104_93358_20120321_002737_inLine +BABEL_BP_104_93358_20120321_003427_inLine +BABEL_BP_104_93713_20120121_004435_inLine +BABEL_BP_104_93730_20120220_052912_outLine +BABEL_BP_104_93730_20120220_054726_outLine +BABEL_BP_104_93844_20120316_014157_inLine +BABEL_BP_104_93844_20120327_194612_inLine +BABEL_BP_104_93976_20120206_181449_outLine +BABEL_BP_104_94051_20120309_174814_outLine +BABEL_BP_104_94533_20120128_020431_inLine +BABEL_BP_104_94533_20120128_020431_outLine +BABEL_BP_104_94572_20120321_022026_inLine +BABEL_BP_104_94683_20120126_024342_inLine +BABEL_BP_104_94775_20120321_230436_inLine +BABEL_BP_104_94775_20120321_230436_outLine +BABEL_BP_104_94793_20120204_043218_inLine +BABEL_BP_104_94793_20120204_043218_outLine +BABEL_BP_104_94951_20120110_231948_inLine +BABEL_BP_104_94951_20120110_231948_outLine +BABEL_BP_104_95202_20120309_185925_inLine +BABEL_BP_104_95202_20120309_185925_outLine +BABEL_BP_104_95349_20111229_162101_inLine +BABEL_BP_104_95360_20120205_133312_inLine +BABEL_BP_104_95360_20120205_133312_outLine +BABEL_BP_104_95465_20120223_040653_inLine +BABEL_BP_104_95465_20120223_040653_outLine +BABEL_BP_104_95904_20120218_183758_inLine +BABEL_BP_104_95904_20120218_183758_outLine +BABEL_BP_104_96343_20120130_143444_outLine +BABEL_BP_104_96621_20120127_235745_inLine +BABEL_BP_104_96621_20120127_235745_outLine +BABEL_BP_104_96690_20120321_005155_inLine +BABEL_BP_104_96811_20120217_021933_inLine +BABEL_BP_104_96811_20120217_021933_outLine +BABEL_BP_104_96956_20120209_025537_inLine +BABEL_BP_104_96956_20120209_025537_outLine +BABEL_BP_104_97050_20120314_144713_outLine +BABEL_BP_104_97258_20120129_060817_inLine +BABEL_BP_104_97258_20120129_060817_outLine +BABEL_BP_104_97335_20120131_013929_inLine +BABEL_BP_104_97335_20120131_013929_outLine +BABEL_BP_104_97492_20120117_173450_inLine +BABEL_BP_104_97492_20120117_173450_outLine +BABEL_BP_104_97803_20120116_184019_inLine +BABEL_BP_104_97803_20120116_184019_outLine +BABEL_BP_104_97971_20120317_004835_inLine +BABEL_BP_104_97971_20120317_004835_outLine +BABEL_BP_104_98067_20120221_131601_inLine +BABEL_BP_104_98067_20120221_131601_outLine +BABEL_BP_104_98110_20120218_193615_outLine +BABEL_BP_104_98331_20120223_014233_inLine +BABEL_BP_104_98446_20120312_135630_inLine +BABEL_BP_104_98503_20120402_230340_inLine +BABEL_BP_104_98503_20120403_025554_inLine +BABEL_BP_104_98588_20120119_011655_inLine +BABEL_BP_104_98588_20120119_011655_outLine +BABEL_BP_104_98942_20120205_224026_outLine +BABEL_BP_104_98987_20120220_184452_inLine +BABEL_BP_104_98987_20120220_184452_outLine +BABEL_BP_104_98993_20120516_040504_inLine +BABEL_BP_104_98993_20120516_040504_outLine +BABEL_BP_104_99093_20120212_062850_inLine +BABEL_BP_104_99093_20120212_062850_outLine +BABEL_BP_104_99354_20120203_152733_inLine +BABEL_BP_104_99354_20120203_152733_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/dev.list b/egs/babel/s5d/conf/lists/105-turkish/dev.list new file mode 100644 index 00000000000..405c3a7662b --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/dev.list @@ -0,0 +1,127 @@ +BABEL_BP_105_11521_20120602_034839_inLine +BABEL_BP_105_11521_20120602_034839_outLine +BABEL_BP_105_12844_20120208_220114_inLine +BABEL_BP_105_12844_20120208_220114_outLine +BABEL_BP_105_12963_20120122_062911_inLine +BABEL_BP_105_12963_20120122_062911_outLine +BABEL_BP_105_13795_20120125_230526_inLine +BABEL_BP_105_13795_20120125_230526_outLine +BABEL_BP_105_13795_20120125_232747_inLine +BABEL_BP_105_13795_20120125_232747_outLine +BABEL_BP_105_15146_20120106_223718_inLine +BABEL_BP_105_15146_20120106_223719_outLine +BABEL_BP_105_15916_20120201_072825_inLine +BABEL_BP_105_15916_20120201_072825_outLine +BABEL_BP_105_16185_20120609_224507_inLine +BABEL_BP_105_16185_20120609_224507_outLine +BABEL_BP_105_19861_20120530_035456_inLine +BABEL_BP_105_19861_20120530_035456_outLine +BABEL_BP_105_20213_20120123_011920_inLine +BABEL_BP_105_20213_20120123_011920_outLine +BABEL_BP_105_21541_20120518_012528_inLine +BABEL_BP_105_22973_20120502_204152_inLine +BABEL_BP_105_22973_20120502_204152_outLine +BABEL_BP_105_26275_20120620_014345_inLine +BABEL_BP_105_26275_20120620_014345_outLine +BABEL_BP_105_29545_20120621_041202_inLine +BABEL_BP_105_29545_20120621_041203_outLine +BABEL_BP_105_31256_20120531_015506_inLine +BABEL_BP_105_31256_20120531_015506_outLine +BABEL_BP_105_31345_20120515_214849_inLine +BABEL_BP_105_31345_20120515_214849_outLine +BABEL_BP_105_32236_20120516_221311_inLine +BABEL_BP_105_32236_20120516_221311_outLine +BABEL_BP_105_35175_20120125_082450_inLine +BABEL_BP_105_35175_20120125_082450_outLine +BABEL_BP_105_39774_20120623_021020_inLine +BABEL_BP_105_39774_20120623_021020_outLine +BABEL_BP_105_39774_20120623_021946_inLine +BABEL_BP_105_39774_20120623_021946_outLine +BABEL_BP_105_39963_20120209_083935_inLine +BABEL_BP_105_39963_20120209_083935_outLine +BABEL_BP_105_40477_20120208_010255_inLine +BABEL_BP_105_40477_20120208_010256_outLine +BABEL_BP_105_40759_20120316_014011_inLine +BABEL_BP_105_40759_20120316_014011_outLine +BABEL_BP_105_42212_20120706_194059_inLine +BABEL_BP_105_42212_20120706_194059_outLine +BABEL_BP_105_42229_20120115_063922_inLine +BABEL_BP_105_42229_20120115_063922_outLine +BABEL_BP_105_44023_20120530_220359_inLine +BABEL_BP_105_44023_20120530_220359_outLine +BABEL_BP_105_44117_20120621_032955_inLine +BABEL_BP_105_44117_20120621_032956_outLine +BABEL_BP_105_48536_20120208_212737_inLine +BABEL_BP_105_48536_20120208_212737_outLine +BABEL_BP_105_49192_20120206_012605_inLine +BABEL_BP_105_49192_20120206_012605_outLine +BABEL_BP_105_54339_20120125_230415_inLine +BABEL_BP_105_54339_20120125_230415_outLine +BABEL_BP_105_55786_20120205_051854_inLine +BABEL_BP_105_55786_20120205_051854_outLine +BABEL_BP_105_55823_20120512_202135_inLine +BABEL_BP_105_55823_20120512_202135_outLine +BABEL_BP_105_56342_20120127_023015_inLine +BABEL_BP_105_56342_20120127_023015_outLine +BABEL_BP_105_60064_20120606_000812_inLine +BABEL_BP_105_60064_20120606_000812_outLine +BABEL_BP_105_60881_20120207_064233_inLine +BABEL_BP_105_60881_20120207_064233_outLine +BABEL_BP_105_66330_20120209_005003_inLine +BABEL_BP_105_66330_20120209_005003_outLine +BABEL_BP_105_66441_20120207_050412_inLine +BABEL_BP_105_66441_20120207_050412_outLine +BABEL_BP_105_66790_20120128_220452_inLine +BABEL_BP_105_66790_20120128_220452_outLine +BABEL_BP_105_66883_20120207_051718_inLine +BABEL_BP_105_66883_20120207_051718_outLine +BABEL_BP_105_67555_20120207_212802_inLine +BABEL_BP_105_67555_20120207_212802_outLine +BABEL_BP_105_67733_20120207_234950_inLine +BABEL_BP_105_67733_20120207_234950_outLine +BABEL_BP_105_69052_20120124_062415_inLine +BABEL_BP_105_69052_20120124_062415_outLine +BABEL_BP_105_75151_20120602_061054_inLine +BABEL_BP_105_75151_20120602_061054_outLine +BABEL_BP_105_76372_20120709_015738_inLine +BABEL_BP_105_76372_20120709_015738_outLine +BABEL_BP_105_76716_20120606_195423_inLine +BABEL_BP_105_76763_20120107_022524_inLine +BABEL_BP_105_76763_20120107_022524_outLine +BABEL_BP_105_78487_20120318_080534_inLine +BABEL_BP_105_80856_20120205_231607_inLine +BABEL_BP_105_80856_20120205_231607_outLine +BABEL_BP_105_84394_20120426_185010_inLine +BABEL_BP_105_84394_20120426_185010_outLine +BABEL_BP_105_84608_20120423_050353_inLine +BABEL_BP_105_84608_20120423_050353_outLine +BABEL_BP_105_87077_20120516_000252_inLine +BABEL_BP_105_87077_20120516_000252_outLine +BABEL_BP_105_87806_20120201_235442_inLine +BABEL_BP_105_87806_20120201_235442_outLine +BABEL_BP_105_88385_20120430_004520_inLine +BABEL_BP_105_88385_20120430_004520_outLine +BABEL_BP_105_90393_20120125_034434_inLine +BABEL_BP_105_90393_20120125_034434_outLine +BABEL_BP_105_91136_20120422_062317_inLine +BABEL_BP_105_91136_20120422_062317_outLine +BABEL_BP_105_91330_20120209_002721_inLine +BABEL_BP_105_91330_20120209_002721_outLine +BABEL_BP_105_91865_20120528_230057_inLine +BABEL_BP_105_91865_20120528_230057_outLine +BABEL_BP_105_92386_20120517_234302_inLine +BABEL_BP_105_92386_20120517_234302_outLine +BABEL_BP_105_92591_20120129_012358_inLine +BABEL_BP_105_92591_20120129_012358_outLine +BABEL_BP_105_93192_20120128_005138_inLine +BABEL_BP_105_93192_20120128_005138_outLine +BABEL_BP_105_93454_20120128_235224_inLine +BABEL_BP_105_93454_20120128_235224_outLine +BABEL_BP_105_93509_20120127_075513_inLine +BABEL_BP_105_93509_20120127_075513_outLine +BABEL_BP_105_95350_20120127_234045_inLine +BABEL_BP_105_95350_20120127_234045_outLine +BABEL_BP_105_95952_20120512_225006_inLine +BABEL_BP_105_95952_20120512_225006_outLine +BABEL_BP_105_95952_20120512_230254_inLine +BABEL_BP_105_95952_20120512_230254_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/eval.list b/egs/babel/s5d/conf/lists/105-turkish/eval.list new file mode 100644 index 00000000000..47736cf7f28 --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/eval.list @@ -0,0 +1,194 @@ +BABEL_BP_105_11158_20120609_061134_inLine +BABEL_BP_105_11158_20120609_061134_outLine +BABEL_BP_105_11478_20120128_081119_inLine +BABEL_BP_105_11478_20120128_081119_outLine +BABEL_BP_105_12535_20120528_235510_inLine +BABEL_BP_105_12535_20120528_235510_outLine +BABEL_BP_105_12667_20120502_025008_inLine +BABEL_BP_105_12667_20120502_025008_outLine +BABEL_BP_105_15859_20120313_033308_inLine +BABEL_BP_105_15859_20120313_033308_outLine +BABEL_BP_105_19153_20120125_060542_inLine +BABEL_BP_105_19153_20120125_060542_outLine +BABEL_BP_105_20332_20120615_235730_inLine +BABEL_BP_105_20332_20120615_235730_outLine +BABEL_BP_105_22229_20120106_234925_inLine +BABEL_BP_105_22229_20120106_234925_outLine +BABEL_BP_105_22229_20120107_000755_inLine +BABEL_BP_105_22229_20120107_000755_outLine +BABEL_BP_105_22566_20120621_011722_inLine +BABEL_BP_105_22566_20120621_011722_outLine +BABEL_BP_105_22696_20120529_224618_inLine +BABEL_BP_105_22696_20120529_224618_outLine +BABEL_BP_105_23714_20120531_230422_inLine +BABEL_BP_105_23714_20120531_230422_outLine +BABEL_BP_105_24642_20120525_033813_inLine +BABEL_BP_105_24642_20120525_033814_outLine +BABEL_BP_105_24661_20120615_203702_inLine +BABEL_BP_105_24661_20120615_203702_outLine +BABEL_BP_105_27178_20120816_063733_inLine +BABEL_BP_105_27178_20120816_063733_outLine +BABEL_BP_105_27645_20120501_182457_inLine +BABEL_BP_105_27645_20120501_182457_outLine +BABEL_BP_105_27825_20120205_013051_inLine +BABEL_BP_105_27825_20120205_013051_outLine +BABEL_BP_105_27916_20120530_234813_inLine +BABEL_BP_105_27916_20120530_234813_outLine +BABEL_BP_105_28768_20120531_033622_inLine +BABEL_BP_105_28768_20120531_033622_outLine +BABEL_BP_105_28768_20120531_035033_inLine +BABEL_BP_105_28768_20120531_035033_outLine +BABEL_BP_105_29512_20120129_020437_inLine +BABEL_BP_105_29512_20120129_020437_outLine +BABEL_BP_105_30227_20120519_234337_inLine +BABEL_BP_105_30227_20120519_234337_outLine +BABEL_BP_105_31393_20120814_054655_inLine +BABEL_BP_105_31393_20120814_054658_outLine +BABEL_BP_105_33969_20120430_013648_inLine +BABEL_BP_105_33969_20120430_013648_outLine +BABEL_BP_105_34370_20120209_233721_inLine +BABEL_BP_105_34370_20120209_233721_outLine +BABEL_BP_105_38464_20120531_202824_inLine +BABEL_BP_105_38464_20120531_202824_outLine +BABEL_BP_105_38985_20120123_064936_inLine +BABEL_BP_105_38985_20120123_064936_outLine +BABEL_BP_105_40385_20120626_182511_inLine +BABEL_BP_105_40385_20120626_182511_outLine +BABEL_BP_105_40439_20120603_221429_inLine +BABEL_BP_105_40439_20120603_221429_outLine +BABEL_BP_105_41513_20120127_091800_inLine +BABEL_BP_105_41513_20120127_091800_outLine +BABEL_BP_105_41541_20120610_220640_inLine +BABEL_BP_105_41989_20120828_232255_inLine +BABEL_BP_105_41989_20120828_232255_outLine +BABEL_BP_105_42749_20120504_192522_inLine +BABEL_BP_105_42749_20120504_192522_outLine +BABEL_BP_105_42768_20120517_203439_inLine +BABEL_BP_105_42768_20120517_203439_outLine +BABEL_BP_105_42768_20120517_204350_inLine +BABEL_BP_105_42768_20120517_204350_outLine +BABEL_BP_105_44038_20120628_032429_inLine +BABEL_BP_105_44038_20120628_032429_outLine +BABEL_BP_105_45106_20120106_231201_inLine +BABEL_BP_105_45106_20120106_231201_outLine +BABEL_BP_105_45145_20120207_231842_inLine +BABEL_BP_105_45145_20120207_231842_outLine +BABEL_BP_105_45677_20120527_022244_inLine +BABEL_BP_105_45677_20120527_022244_outLine +BABEL_BP_105_45786_20120518_034117_inLine +BABEL_BP_105_45786_20120518_034118_outLine +BABEL_BP_105_45893_20120131_060048_inLine +BABEL_BP_105_45893_20120131_060048_outLine +BABEL_BP_105_46427_20120208_230929_inLine +BABEL_BP_105_46427_20120208_230929_outLine +BABEL_BP_105_46813_20120521_040045_inLine +BABEL_BP_105_46813_20120521_040046_outLine +BABEL_BP_105_47263_20120603_001729_inLine +BABEL_BP_105_47263_20120603_001729_outLine +BABEL_BP_105_48191_20120616_010543_inLine +BABEL_BP_105_48191_20120616_010543_outLine +BABEL_BP_105_49714_20120529_004423_inLine +BABEL_BP_105_49714_20120529_004423_outLine +BABEL_BP_105_50915_20120606_030647_inLine +BABEL_BP_105_50915_20120606_030647_outLine +BABEL_BP_105_51042_20120609_053754_inLine +BABEL_BP_105_51042_20120609_053754_outLine +BABEL_BP_105_51374_20120808_021113_inLine +BABEL_BP_105_51374_20120808_021113_outLine +BABEL_BP_105_55450_20120201_022826_inLine +BABEL_BP_105_55450_20120201_022826_outLine +BABEL_BP_105_55777_20120529_060606_inLine +BABEL_BP_105_55777_20120529_060606_outLine +BABEL_BP_105_55777_20120529_065353_inLine +BABEL_BP_105_55777_20120529_065353_outLine +BABEL_BP_105_56812_20120601_070152_inLine +BABEL_BP_105_56812_20120601_070152_outLine +BABEL_BP_105_60848_20120627_050640_inLine +BABEL_BP_105_60848_20120627_050643_outLine +BABEL_BP_105_62160_20120815_073641_inLine +BABEL_BP_105_62160_20120815_073641_outLine +BABEL_BP_105_62177_20120206_010509_inLine +BABEL_BP_105_62177_20120206_010509_outLine +BABEL_BP_105_63459_20120316_010003_inLine +BABEL_BP_105_63491_20120131_020702_inLine +BABEL_BP_105_63491_20120131_020702_outLine +BABEL_BP_105_65601_20120130_233749_inLine +BABEL_BP_105_65601_20120130_233749_outLine +BABEL_BP_105_65732_20120210_054155_inLine +BABEL_BP_105_65732_20120210_054155_outLine +BABEL_BP_105_66188_20120611_222651_inLine +BABEL_BP_105_66188_20120611_222651_outLine +BABEL_BP_105_68671_20120607_065759_inLine +BABEL_BP_105_68671_20120607_065759_outLine +BABEL_BP_105_69145_20120607_070422_inLine +BABEL_BP_105_69145_20120607_070422_outLine +BABEL_BP_105_69275_20120607_085559_inLine +BABEL_BP_105_69275_20120607_085559_outLine +BABEL_BP_105_70077_20120615_070304_inLine +BABEL_BP_105_70077_20120615_070304_outLine +BABEL_BP_105_71654_20120129_031219_inLine +BABEL_BP_105_71654_20120129_031219_outLine +BABEL_BP_105_72011_20120708_195954_inLine +BABEL_BP_105_72011_20120708_195954_outLine +BABEL_BP_105_72011_20120708_201001_inLine +BABEL_BP_105_72011_20120708_201001_outLine +BABEL_BP_105_73562_20120206_084510_inLine +BABEL_BP_105_73562_20120206_084510_outLine +BABEL_BP_105_73757_20120206_093159_inLine +BABEL_BP_105_73757_20120206_093159_outLine +BABEL_BP_105_74295_20120122_020359_inLine +BABEL_BP_105_74295_20120122_020359_outLine +BABEL_BP_105_74607_20120208_041443_inLine +BABEL_BP_105_74607_20120208_041443_outLine +BABEL_BP_105_75020_20120808_014405_inLine +BABEL_BP_105_75020_20120808_014405_outLine +BABEL_BP_105_77771_20120529_022050_inLine +BABEL_BP_105_77771_20120529_022050_outLine +BABEL_BP_105_78245_20120815_044319_inLine +BABEL_BP_105_78245_20120815_044319_outLine +BABEL_BP_105_78728_20120210_014021_inLine +BABEL_BP_105_78728_20120210_014021_outLine +BABEL_BP_105_80174_20120606_185602_inLine +BABEL_BP_105_80174_20120606_185602_outLine +BABEL_BP_105_80535_20120611_065341_inLine +BABEL_BP_105_80535_20120611_065341_outLine +BABEL_BP_105_81944_20120531_010546_inLine +BABEL_BP_105_81944_20120531_010546_outLine +BABEL_BP_105_81996_20120208_060259_inLine +BABEL_BP_105_81996_20120208_060259_outLine +BABEL_BP_105_83012_20120529_010427_inLine +BABEL_BP_105_83012_20120529_010427_outLine +BABEL_BP_105_83053_20120121_030631_inLine +BABEL_BP_105_83053_20120121_030631_outLine +BABEL_BP_105_84700_20120530_041137_inLine +BABEL_BP_105_84700_20120530_041137_outLine +BABEL_BP_105_84865_20120619_034124_inLine +BABEL_BP_105_84865_20120619_034124_outLine +BABEL_BP_105_86305_20120201_230055_inLine +BABEL_BP_105_86305_20120201_230055_outLine +BABEL_BP_105_86998_20120613_030245_inLine +BABEL_BP_105_86998_20120613_030245_outLine +BABEL_BP_105_87885_20120709_012121_inLine +BABEL_BP_105_87885_20120709_012121_outLine +BABEL_BP_105_88245_20120430_200721_inLine +BABEL_BP_105_88245_20120430_200721_outLine +BABEL_BP_105_88982_20120128_051748_inLine +BABEL_BP_105_88982_20120128_051748_outLine +BABEL_BP_105_90180_20120611_232400_inLine +BABEL_BP_105_90180_20120611_232400_outLine +BABEL_BP_105_90313_20120128_001531_inLine +BABEL_BP_105_90313_20120128_001531_outLine +BABEL_BP_105_92308_20120616_231053_inLine +BABEL_BP_105_92308_20120616_231053_outLine +BABEL_BP_105_92328_20120611_062634_inLine +BABEL_BP_105_92328_20120611_062634_outLine +BABEL_BP_105_92820_20120521_005626_inLine +BABEL_BP_105_92820_20120521_005626_outLine +BABEL_BP_105_92852_20120221_033327_inLine +BABEL_BP_105_92852_20120221_033327_outLine +BABEL_BP_105_93151_20120208_021412_inLine +BABEL_BP_105_93151_20120208_021412_outLine +BABEL_BP_105_95861_20120202_000341_inLine +BABEL_BP_105_95861_20120202_000341_outLine +BABEL_BP_105_99929_20120603_000106_inLine +BABEL_BP_105_99929_20120603_000106_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/evalpart1.list b/egs/babel/s5d/conf/lists/105-turkish/evalpart1.list new file mode 100644 index 00000000000..87d6e0f050b --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/evalpart1.list @@ -0,0 +1,65 @@ +BABEL_BP_105_11478_20120128_081119_inLine +BABEL_BP_105_11478_20120128_081119_outLine +BABEL_BP_105_12667_20120502_025008_inLine +BABEL_BP_105_12667_20120502_025008_outLine +BABEL_BP_105_15859_20120313_033308_inLine +BABEL_BP_105_15859_20120313_033308_outLine +BABEL_BP_105_22566_20120621_011722_inLine +BABEL_BP_105_22566_20120621_011722_outLine +BABEL_BP_105_27645_20120501_182457_inLine +BABEL_BP_105_27645_20120501_182457_outLine +BABEL_BP_105_33969_20120430_013648_inLine +BABEL_BP_105_33969_20120430_013648_outLine +BABEL_BP_105_38985_20120123_064936_inLine +BABEL_BP_105_38985_20120123_064936_outLine +BABEL_BP_105_41989_20120828_232255_inLine +BABEL_BP_105_41989_20120828_232255_outLine +BABEL_BP_105_42749_20120504_192522_inLine +BABEL_BP_105_42749_20120504_192522_outLine +BABEL_BP_105_42768_20120517_203439_inLine +BABEL_BP_105_42768_20120517_203439_outLine +BABEL_BP_105_42768_20120517_204350_inLine +BABEL_BP_105_42768_20120517_204350_outLine +BABEL_BP_105_45106_20120106_231201_inLine +BABEL_BP_105_45106_20120106_231201_outLine +BABEL_BP_105_45677_20120527_022244_inLine +BABEL_BP_105_45677_20120527_022244_outLine +BABEL_BP_105_46427_20120208_230929_inLine +BABEL_BP_105_46427_20120208_230929_outLine +BABEL_BP_105_46813_20120521_040045_inLine +BABEL_BP_105_46813_20120521_040046_outLine +BABEL_BP_105_47263_20120603_001729_inLine +BABEL_BP_105_47263_20120603_001729_outLine +BABEL_BP_105_50915_20120606_030647_inLine +BABEL_BP_105_50915_20120606_030647_outLine +BABEL_BP_105_51374_20120808_021113_inLine +BABEL_BP_105_51374_20120808_021113_outLine +BABEL_BP_105_60848_20120627_050640_inLine +BABEL_BP_105_60848_20120627_050643_outLine +BABEL_BP_105_63459_20120316_010003_inLine +BABEL_BP_105_63491_20120131_020702_inLine +BABEL_BP_105_63491_20120131_020702_outLine +BABEL_BP_105_65601_20120130_233749_inLine +BABEL_BP_105_65601_20120130_233749_outLine +BABEL_BP_105_65732_20120210_054155_inLine +BABEL_BP_105_65732_20120210_054155_outLine +BABEL_BP_105_72011_20120708_195954_inLine +BABEL_BP_105_72011_20120708_195954_outLine +BABEL_BP_105_72011_20120708_201001_inLine +BABEL_BP_105_72011_20120708_201001_outLine +BABEL_BP_105_74295_20120122_020359_inLine +BABEL_BP_105_74295_20120122_020359_outLine +BABEL_BP_105_78245_20120815_044319_inLine +BABEL_BP_105_78245_20120815_044319_outLine +BABEL_BP_105_80174_20120606_185602_inLine +BABEL_BP_105_80174_20120606_185602_outLine +BABEL_BP_105_81944_20120531_010546_inLine +BABEL_BP_105_81944_20120531_010546_outLine +BABEL_BP_105_83053_20120121_030631_inLine +BABEL_BP_105_83053_20120121_030631_outLine +BABEL_BP_105_84700_20120530_041137_inLine +BABEL_BP_105_84700_20120530_041137_outLine +BABEL_BP_105_87885_20120709_012121_inLine +BABEL_BP_105_87885_20120709_012121_outLine +BABEL_BP_105_88982_20120128_051748_inLine +BABEL_BP_105_88982_20120128_051748_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/train.FullLP.list b/egs/babel/s5d/conf/lists/105-turkish/train.FullLP.list new file mode 100644 index 00000000000..6d810bffecc --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/train.FullLP.list @@ -0,0 +1,993 @@ +BABEL_BP_105_10160_20120107_220423_inLine +BABEL_BP_105_10160_20120107_220423_outLine +BABEL_BP_105_10211_20120602_185303_inLine +BABEL_BP_105_10211_20120602_185303_outLine +BABEL_BP_105_10467_20120520_004721_inLine +BABEL_BP_105_10467_20120520_004721_outLine +BABEL_BP_105_10973_20120604_181602_inLine +BABEL_BP_105_10973_20120604_181602_outLine +BABEL_BP_105_11022_20120126_221846_inLine +BABEL_BP_105_11022_20120126_221846_outLine +BABEL_BP_105_11152_20120608_002410_inLine +BABEL_BP_105_11152_20120608_002410_outLine +BABEL_BP_105_11371_20120110_001148_inLine +BABEL_BP_105_11371_20120110_001148_outLine +BABEL_BP_105_11422_20120110_233241_inLine +BABEL_BP_105_11422_20120110_233241_outLine +BABEL_BP_105_11627_20120209_232308_inLine +BABEL_BP_105_11868_20120518_025856_inLine +BABEL_BP_105_11868_20120518_025856_outLine +BABEL_BP_105_11982_20120520_192511_outLine +BABEL_BP_105_12003_20120220_085129_inLine +BABEL_BP_105_12003_20120220_085131_outLine +BABEL_BP_105_12120_20120621_024039_inLine +BABEL_BP_105_12120_20120621_024039_outLine +BABEL_BP_105_12439_20120520_215211_inLine +BABEL_BP_105_12439_20120520_215211_outLine +BABEL_BP_105_12643_20120628_010121_inLine +BABEL_BP_105_13065_20120208_032637_inLine +BABEL_BP_105_13065_20120208_032637_outLine +BABEL_BP_105_13118_20120130_042038_outLine +BABEL_BP_105_13389_20120530_002622_inLine +BABEL_BP_105_13389_20120530_002622_outLine +BABEL_BP_105_13530_20120604_015841_inLine +BABEL_BP_105_13530_20120604_015841_outLine +BABEL_BP_105_13660_20120314_062650_inLine +BABEL_BP_105_13660_20120314_062651_outLine +BABEL_BP_105_13702_20120512_204855_inLine +BABEL_BP_105_13702_20120512_204855_outLine +BABEL_BP_105_13913_20120121_005810_inLine +BABEL_BP_105_13913_20120121_005810_outLine +BABEL_BP_105_14054_20120205_012603_inLine +BABEL_BP_105_14054_20120205_012603_outLine +BABEL_BP_105_14707_20120121_003857_inLine +BABEL_BP_105_14707_20120121_003857_outLine +BABEL_BP_105_14891_20120107_224233_inLine +BABEL_BP_105_14891_20120107_224233_outLine +BABEL_BP_105_14936_20120528_215659_inLine +BABEL_BP_105_14936_20120528_215659_outLine +BABEL_BP_105_14997_20120314_212654_inLine +BABEL_BP_105_14997_20120314_212654_outLine +BABEL_BP_105_15022_20120204_043515_inLine +BABEL_BP_105_15022_20120204_043515_outLine +BABEL_BP_105_16066_20120205_105046_inLine +BABEL_BP_105_16066_20120205_105046_outLine +BABEL_BP_105_16257_20120709_025101_inLine +BABEL_BP_105_16257_20120709_025101_outLine +BABEL_BP_105_16346_20120122_031133_inLine +BABEL_BP_105_16346_20120122_031133_outLine +BABEL_BP_105_16617_20120315_024321_inLine +BABEL_BP_105_16617_20120315_024321_outLine +BABEL_BP_105_16646_20120209_075016_inLine +BABEL_BP_105_16646_20120209_075016_outLine +BABEL_BP_105_16855_20120210_062956_inLine +BABEL_BP_105_16855_20120210_062956_outLine +BABEL_BP_105_16875_20120626_033717_inLine +BABEL_BP_105_16875_20120626_033718_outLine +BABEL_BP_105_16883_20120121_060732_inLine +BABEL_BP_105_16883_20120121_060732_outLine +BABEL_BP_105_17013_20120314_031626_inLine +BABEL_BP_105_17013_20120314_031626_outLine +BABEL_BP_105_17018_20120421_182457_outLine +BABEL_BP_105_17511_20120128_212023_inLine +BABEL_BP_105_17511_20120128_212023_outLine +BABEL_BP_105_17606_20120530_230042_inLine +BABEL_BP_105_17606_20120530_230042_outLine +BABEL_BP_105_17933_20120130_062220_inLine +BABEL_BP_105_17948_20120120_073631_inLine +BABEL_BP_105_17948_20120120_073631_outLine +BABEL_BP_105_18209_20120129_215151_inLine +BABEL_BP_105_18209_20120129_215151_outLine +BABEL_BP_105_18234_20120220_051332_inLine +BABEL_BP_105_18672_20120131_015941_inLine +BABEL_BP_105_18672_20120131_015941_outLine +BABEL_BP_105_18701_20120127_035425_inLine +BABEL_BP_105_18701_20120127_035425_outLine +BABEL_BP_105_18716_20120218_070145_inLine +BABEL_BP_105_18802_20120620_222614_inLine +BABEL_BP_105_18802_20120620_222614_outLine +BABEL_BP_105_19248_20120504_193537_inLine +BABEL_BP_105_19248_20120504_193537_outLine +BABEL_BP_105_19404_20120829_192145_inLine +BABEL_BP_105_19404_20120829_192145_outLine +BABEL_BP_105_19479_20120527_195818_inLine +BABEL_BP_105_19479_20120527_195818_outLine +BABEL_BP_105_19479_20120527_200936_inLine +BABEL_BP_105_19479_20120527_200936_outLine +BABEL_BP_105_19731_20120519_190911_inLine +BABEL_BP_105_19731_20120519_190911_outLine +BABEL_BP_105_20320_20120207_211206_inLine +BABEL_BP_105_20320_20120207_211206_outLine +BABEL_BP_105_20347_20120504_231529_inLine +BABEL_BP_105_20347_20120504_232320_inLine +BABEL_BP_105_20462_20120605_192730_inLine +BABEL_BP_105_20462_20120605_192730_outLine +BABEL_BP_105_20471_20120125_013916_inLine +BABEL_BP_105_20471_20120125_013916_outLine +BABEL_BP_105_20471_20120125_015348_inLine +BABEL_BP_105_20471_20120125_015348_outLine +BABEL_BP_105_20483_20120202_013100_inLine +BABEL_BP_105_20483_20120202_013100_outLine +BABEL_BP_105_20518_20120202_070149_inLine +BABEL_BP_105_20518_20120202_070149_outLine +BABEL_BP_105_20590_20120106_021113_inLine +BABEL_BP_105_20590_20120106_021113_outLine +BABEL_BP_105_20591_20120126_045259_inLine +BABEL_BP_105_20591_20120126_045259_outLine +BABEL_BP_105_21258_20120528_002304_inLine +BABEL_BP_105_21258_20120528_002304_outLine +BABEL_BP_105_21367_20120120_050000_inLine +BABEL_BP_105_21367_20120120_050000_outLine +BABEL_BP_105_21370_20120605_185740_inLine +BABEL_BP_105_21370_20120605_185740_outLine +BABEL_BP_105_21430_20120129_024859_outLine +BABEL_BP_105_21518_20120118_195555_inLine +BABEL_BP_105_21518_20120118_195555_outLine +BABEL_BP_105_21714_20120518_223459_inLine +BABEL_BP_105_21714_20120518_223459_outLine +BABEL_BP_105_21782_20120130_003418_outLine +BABEL_BP_105_21946_20120504_035038_inLine +BABEL_BP_105_21946_20120504_035039_outLine +BABEL_BP_105_22179_20120206_023628_inLine +BABEL_BP_105_22179_20120206_023628_outLine +BABEL_BP_105_22272_20120430_191440_inLine +BABEL_BP_105_22272_20120430_191440_outLine +BABEL_BP_105_22408_20120131_202129_inLine +BABEL_BP_105_22408_20120131_202129_outLine +BABEL_BP_105_22408_20120131_210558_inLine +BABEL_BP_105_22408_20120131_210558_outLine +BABEL_BP_105_22509_20120429_020025_inLine +BABEL_BP_105_22509_20120429_020025_outLine +BABEL_BP_105_22898_20120129_040904_inLine +BABEL_BP_105_22898_20120129_040904_outLine +BABEL_BP_105_22903_20120204_205250_inLine +BABEL_BP_105_22903_20120204_205250_outLine +BABEL_BP_105_22910_20120208_013659_inLine +BABEL_BP_105_22910_20120208_013659_outLine +BABEL_BP_105_23167_20120520_193822_inLine +BABEL_BP_105_23167_20120520_193822_outLine +BABEL_BP_105_23502_20120129_223353_inLine +BABEL_BP_105_23502_20120129_223353_outLine +BABEL_BP_105_23571_20120131_040441_inLine +BABEL_BP_105_23571_20120131_040441_outLine +BABEL_BP_105_23629_20120503_212942_inLine +BABEL_BP_105_23629_20120503_212942_outLine +BABEL_BP_105_23930_20120127_051732_outLine +BABEL_BP_105_24094_20120203_230434_inLine +BABEL_BP_105_24094_20120203_230434_outLine +BABEL_BP_105_24420_20120122_053229_inLine +BABEL_BP_105_24420_20120122_053229_outLine +BABEL_BP_105_24589_20120530_180625_inLine +BABEL_BP_105_24589_20120530_180625_outLine +BABEL_BP_105_24608_20120111_023000_inLine +BABEL_BP_105_24608_20120111_023000_outLine +BABEL_BP_105_24638_20120120_040215_inLine +BABEL_BP_105_24638_20120120_040215_outLine +BABEL_BP_105_25035_20120221_014614_inLine +BABEL_BP_105_25035_20120221_014614_outLine +BABEL_BP_105_25106_20120129_003957_inLine +BABEL_BP_105_25106_20120129_003957_outLine +BABEL_BP_105_25236_20120209_002129_inLine +BABEL_BP_105_25236_20120209_002129_outLine +BABEL_BP_105_25278_20120208_203010_inLine +BABEL_BP_105_25278_20120208_203010_outLine +BABEL_BP_105_25315_20120516_232406_inLine +BABEL_BP_105_25315_20120516_232406_outLine +BABEL_BP_105_25735_20120520_030401_inLine +BABEL_BP_105_25735_20120520_030401_outLine +BABEL_BP_105_25934_20120105_020031_inLine +BABEL_BP_105_25992_20120120_012613_inLine +BABEL_BP_105_25992_20120120_012613_outLine +BABEL_BP_105_26164_20120627_210408_inLine +BABEL_BP_105_26164_20120627_210408_outLine +BABEL_BP_105_26350_20120113_221856_inLine +BABEL_BP_105_26350_20120113_221856_outLine +BABEL_BP_105_26598_20120124_055700_inLine +BABEL_BP_105_26598_20120124_055700_outLine +BABEL_BP_105_26644_20120517_212756_inLine +BABEL_BP_105_26644_20120517_212756_outLine +BABEL_BP_105_26684_20120125_030410_inLine +BABEL_BP_105_26684_20120125_030410_outLine +BABEL_BP_105_27349_20120129_233743_inLine +BABEL_BP_105_27349_20120129_233743_outLine +BABEL_BP_105_27605_20120129_073539_inLine +BABEL_BP_105_27605_20120129_073539_outLine +BABEL_BP_105_27724_20120130_023439_inLine +BABEL_BP_105_27724_20120130_023439_outLine +BABEL_BP_105_28107_20120221_061758_outLine +BABEL_BP_105_28204_20120130_031505_inLine +BABEL_BP_105_28204_20120130_031505_outLine +BABEL_BP_105_28889_20120204_200150_outLine +BABEL_BP_105_29133_20120220_042138_inLine +BABEL_BP_105_29168_20120131_214316_inLine +BABEL_BP_105_29168_20120131_214316_outLine +BABEL_BP_105_29259_20120607_190658_inLine +BABEL_BP_105_29259_20120607_190658_outLine +BABEL_BP_105_29276_20120209_054912_inLine +BABEL_BP_105_29276_20120209_054912_outLine +BABEL_BP_105_29290_20120130_044642_inLine +BABEL_BP_105_29302_20120128_044018_outLine +BABEL_BP_105_29335_20120125_090733_inLine +BABEL_BP_105_29335_20120125_090733_outLine +BABEL_BP_105_29407_20120531_013323_inLine +BABEL_BP_105_29407_20120531_013323_outLine +BABEL_BP_105_29421_20120127_235240_inLine +BABEL_BP_105_29421_20120127_235240_outLine +BABEL_BP_105_29444_20120204_050434_inLine +BABEL_BP_105_29444_20120204_050434_outLine +BABEL_BP_105_29771_20120430_234735_inLine +BABEL_BP_105_29771_20120430_234735_outLine +BABEL_BP_105_29988_20120120_075802_inLine +BABEL_BP_105_29988_20120120_075802_outLine +BABEL_BP_105_30168_20120209_192615_inLine +BABEL_BP_105_30168_20120209_192615_outLine +BABEL_BP_105_30554_20120126_022601_inLine +BABEL_BP_105_30554_20120126_022601_outLine +BABEL_BP_105_31281_20120130_004325_inLine +BABEL_BP_105_31281_20120130_004325_outLine +BABEL_BP_105_31460_20120603_224411_inLine +BABEL_BP_105_31460_20120603_224411_outLine +BABEL_BP_105_31917_20120202_083328_inLine +BABEL_BP_105_31917_20120202_083328_outLine +BABEL_BP_105_32120_20120627_232416_inLine +BABEL_BP_105_32120_20120627_232416_outLine +BABEL_BP_105_32263_20120125_003247_inLine +BABEL_BP_105_32263_20120125_003247_outLine +BABEL_BP_105_32295_20120201_060053_inLine +BABEL_BP_105_32334_20120126_064227_inLine +BABEL_BP_105_32334_20120126_064227_outLine +BABEL_BP_105_32642_20120518_185259_outLine +BABEL_BP_105_32663_20120709_040652_inLine +BABEL_BP_105_32663_20120709_040652_outLine +BABEL_BP_105_32710_20120320_040408_inLine +BABEL_BP_105_32710_20120320_040408_outLine +BABEL_BP_105_32818_20120530_032934_inLine +BABEL_BP_105_32818_20120530_032935_outLine +BABEL_BP_105_33671_20120314_060721_inLine +BABEL_BP_105_33671_20120314_060721_outLine +BABEL_BP_105_34169_20120209_195657_outLine +BABEL_BP_105_34194_20120206_104021_inLine +BABEL_BP_105_34194_20120206_104021_outLine +BABEL_BP_105_34235_20120206_051248_inLine +BABEL_BP_105_34248_20120628_013714_inLine +BABEL_BP_105_34248_20120628_013714_outLine +BABEL_BP_105_34480_20120605_033447_inLine +BABEL_BP_105_34480_20120605_033447_outLine +BABEL_BP_105_34498_20120127_071326_inLine +BABEL_BP_105_34498_20120127_071326_outLine +BABEL_BP_105_34590_20120829_000220_inLine +BABEL_BP_105_34590_20120829_000220_outLine +BABEL_BP_105_35006_20120118_204903_inLine +BABEL_BP_105_35006_20120118_204903_outLine +BABEL_BP_105_35011_20120314_000129_inLine +BABEL_BP_105_35229_20120621_203612_inLine +BABEL_BP_105_35229_20120621_203612_outLine +BABEL_BP_105_35324_20120117_204415_inLine +BABEL_BP_105_35324_20120117_204415_outLine +BABEL_BP_105_35329_20120203_051310_inLine +BABEL_BP_105_35329_20120203_051310_outLine +BABEL_BP_105_35357_20120530_040330_inLine +BABEL_BP_105_35357_20120530_040330_outLine +BABEL_BP_105_35576_20120530_184018_inLine +BABEL_BP_105_35576_20120530_184018_outLine +BABEL_BP_105_36276_20120519_000042_inLine +BABEL_BP_105_36276_20120519_000042_outLine +BABEL_BP_105_36360_20120121_024157_inLine +BABEL_BP_105_36360_20120121_024157_outLine +BABEL_BP_105_36383_20120126_014553_inLine +BABEL_BP_105_36561_20120125_091214_inLine +BABEL_BP_105_36561_20120125_091214_outLine +BABEL_BP_105_36711_20120817_211133_inLine +BABEL_BP_105_36711_20120817_211133_outLine +BABEL_BP_105_36722_20120420_012709_inLine +BABEL_BP_105_36722_20120420_012709_outLine +BABEL_BP_105_36975_20120119_201922_inLine +BABEL_BP_105_36975_20120119_201922_outLine +BABEL_BP_105_37094_20120111_013332_inLine +BABEL_BP_105_37094_20120111_013332_outLine +BABEL_BP_105_37110_20120113_201333_inLine +BABEL_BP_105_37110_20120113_201333_outLine +BABEL_BP_105_37111_20120504_215437_inLine +BABEL_BP_105_37111_20120504_215437_outLine +BABEL_BP_105_37260_20120314_015840_inLine +BABEL_BP_105_37260_20120314_015840_outLine +BABEL_BP_105_37444_20120518_221718_inLine +BABEL_BP_105_37444_20120518_221718_outLine +BABEL_BP_105_37461_20120530_010739_inLine +BABEL_BP_105_37461_20120530_010739_outLine +BABEL_BP_105_38108_20120129_001503_inLine +BABEL_BP_105_38108_20120129_001503_outLine +BABEL_BP_105_38640_20120208_010027_inLine +BABEL_BP_105_38640_20120208_010027_outLine +BABEL_BP_105_39066_20120206_073804_inLine +BABEL_BP_105_39066_20120206_073804_outLine +BABEL_BP_105_39114_20120516_035141_inLine +BABEL_BP_105_39114_20120516_035141_outLine +BABEL_BP_105_39384_20120525_200159_outLine +BABEL_BP_105_39384_20120525_200904_outLine +BABEL_BP_105_39915_20120527_221155_inLine +BABEL_BP_105_39915_20120527_221155_outLine +BABEL_BP_105_39997_20120202_204531_inLine +BABEL_BP_105_39997_20120202_204531_outLine +BABEL_BP_105_40002_20120202_061416_inLine +BABEL_BP_105_40002_20120202_061416_outLine +BABEL_BP_105_40040_20120125_211630_inLine +BABEL_BP_105_40040_20120125_211630_outLine +BABEL_BP_105_40046_20120110_013037_inLine +BABEL_BP_105_40046_20120110_013037_outLine +BABEL_BP_105_40084_20120127_075326_inLine +BABEL_BP_105_40084_20120127_075326_outLine +BABEL_BP_105_40123_20120527_021542_inLine +BABEL_BP_105_40123_20120527_021542_outLine +BABEL_BP_105_40346_20120109_223712_inLine +BABEL_BP_105_40346_20120109_223712_outLine +BABEL_BP_105_40510_20120128_063431_inLine +BABEL_BP_105_40510_20120128_063431_outLine +BABEL_BP_105_40980_20120208_102244_outLine +BABEL_BP_105_41170_20120110_004951_inLine +BABEL_BP_105_41170_20120110_004951_outLine +BABEL_BP_105_41456_20120316_021539_inLine +BABEL_BP_105_41456_20120316_021539_outLine +BABEL_BP_105_41540_20120121_064850_inLine +BABEL_BP_105_41540_20120121_064850_outLine +BABEL_BP_105_41561_20120708_205430_inLine +BABEL_BP_105_41561_20120708_205430_outLine +BABEL_BP_105_41661_20120206_073351_inLine +BABEL_BP_105_41661_20120206_073351_outLine +BABEL_BP_105_41797_20120208_054959_inLine +BABEL_BP_105_41797_20120208_054959_outLine +BABEL_BP_105_42145_20120210_004555_inLine +BABEL_BP_105_42145_20120210_004555_outLine +BABEL_BP_105_42309_20120530_225817_inLine +BABEL_BP_105_42309_20120530_225817_outLine +BABEL_BP_105_42471_20120210_064751_outLine +BABEL_BP_105_42651_20120208_003002_inLine +BABEL_BP_105_42651_20120208_003002_outLine +BABEL_BP_105_42788_20120520_202049_outLine +BABEL_BP_105_42853_20120105_232804_inLine +BABEL_BP_105_42853_20120105_232804_outLine +BABEL_BP_105_43017_20120814_005806_inLine +BABEL_BP_105_43017_20120814_005806_outLine +BABEL_BP_105_43277_20120122_050352_inLine +BABEL_BP_105_43277_20120122_050352_outLine +BABEL_BP_105_43317_20120516_181202_inLine +BABEL_BP_105_43317_20120516_181202_outLine +BABEL_BP_105_43383_20120814_060445_inLine +BABEL_BP_105_43383_20120814_060445_outLine +BABEL_BP_105_43425_20120520_223154_inLine +BABEL_BP_105_43425_20120520_223154_outLine +BABEL_BP_105_43425_20120520_224822_inLine +BABEL_BP_105_43425_20120520_224822_outLine +BABEL_BP_105_43426_20120127_054206_inLine +BABEL_BP_105_43426_20120127_054206_outLine +BABEL_BP_105_43991_20120201_043008_inLine +BABEL_BP_105_43991_20120201_043008_outLine +BABEL_BP_105_44129_20120203_031411_inLine +BABEL_BP_105_44209_20120130_072808_inLine +BABEL_BP_105_44209_20120130_072808_outLine +BABEL_BP_105_44500_20120531_224758_inLine +BABEL_BP_105_44500_20120531_224758_outLine +BABEL_BP_105_44568_20120315_215919_inLine +BABEL_BP_105_44568_20120315_215919_outLine +BABEL_BP_105_44756_20120125_222756_inLine +BABEL_BP_105_44829_20120816_071805_inLine +BABEL_BP_105_44829_20120816_071805_outLine +BABEL_BP_105_44836_20120208_085036_inLine +BABEL_BP_105_44836_20120208_085036_outLine +BABEL_BP_105_45214_20120209_223827_inLine +BABEL_BP_105_45214_20120209_223827_outLine +BABEL_BP_105_45227_20120208_205329_inLine +BABEL_BP_105_45227_20120208_205329_outLine +BABEL_BP_105_45511_20120601_001634_inLine +BABEL_BP_105_45511_20120601_001634_outLine +BABEL_BP_105_45512_20120208_063419_inLine +BABEL_BP_105_45512_20120208_063419_outLine +BABEL_BP_105_45655_20120206_065331_inLine +BABEL_BP_105_45655_20120206_065331_outLine +BABEL_BP_105_45681_20120209_002338_inLine +BABEL_BP_105_45681_20120209_002338_outLine +BABEL_BP_105_45929_20120208_051244_inLine +BABEL_BP_105_45929_20120208_051244_outLine +BABEL_BP_105_45931_20120816_011738_inLine +BABEL_BP_105_45931_20120816_011738_outLine +BABEL_BP_105_46603_20120430_193144_inLine +BABEL_BP_105_46603_20120430_193144_outLine +BABEL_BP_105_46977_20120210_043052_inLine +BABEL_BP_105_46977_20120210_043052_outLine +BABEL_BP_105_47037_20120118_025150_inLine +BABEL_BP_105_47037_20120118_025150_outLine +BABEL_BP_105_47128_20120206_014647_inLine +BABEL_BP_105_47128_20120206_014647_outLine +BABEL_BP_105_47429_20120512_193242_inLine +BABEL_BP_105_47429_20120512_193242_outLine +BABEL_BP_105_47433_20120124_032650_inLine +BABEL_BP_105_47433_20120124_032650_outLine +BABEL_BP_105_47566_20120210_004031_outLine +BABEL_BP_105_47625_20120210_031653_outLine +BABEL_BP_105_47646_20120130_220546_inLine +BABEL_BP_105_47646_20120130_220546_outLine +BABEL_BP_105_47733_20120124_050736_inLine +BABEL_BP_105_47733_20120124_050736_outLine +BABEL_BP_105_47794_20120517_013537_inLine +BABEL_BP_105_47794_20120517_013537_outLine +BABEL_BP_105_47794_20120517_014505_inLine +BABEL_BP_105_47794_20120517_014505_outLine +BABEL_BP_105_47821_20120430_182844_outLine +BABEL_BP_105_47823_20120209_005455_inLine +BABEL_BP_105_47823_20120209_005455_outLine +BABEL_BP_105_47845_20120604_014840_inLine +BABEL_BP_105_48061_20120201_084109_inLine +BABEL_BP_105_48061_20120201_084109_outLine +BABEL_BP_105_48247_20120814_194116_inLine +BABEL_BP_105_48247_20120814_194116_outLine +BABEL_BP_105_48281_20120527_205037_inLine +BABEL_BP_105_48281_20120527_205037_outLine +BABEL_BP_105_48281_20120527_210249_inLine +BABEL_BP_105_48281_20120527_210249_outLine +BABEL_BP_105_48317_20120201_220534_inLine +BABEL_BP_105_48410_20120816_072736_inLine +BABEL_BP_105_48410_20120816_072736_outLine +BABEL_BP_105_48418_20120517_235210_inLine +BABEL_BP_105_48418_20120517_235210_outLine +BABEL_BP_105_48491_20120814_025137_inLine +BABEL_BP_105_48491_20120814_025137_outLine +BABEL_BP_105_48559_20120120_085039_inLine +BABEL_BP_105_48559_20120120_085039_outLine +BABEL_BP_105_48976_20120209_021529_inLine +BABEL_BP_105_48976_20120209_021529_outLine +BABEL_BP_105_49186_20120627_224343_inLine +BABEL_BP_105_49186_20120627_224343_outLine +BABEL_BP_105_49239_20120121_234750_outLine +BABEL_BP_105_49541_20120205_233637_inLine +BABEL_BP_105_49541_20120205_233637_outLine +BABEL_BP_105_49624_20120129_090754_inLine +BABEL_BP_105_49624_20120129_090754_outLine +BABEL_BP_105_49689_20120429_224801_inLine +BABEL_BP_105_49689_20120429_224801_outLine +BABEL_BP_105_50028_20120628_020702_inLine +BABEL_BP_105_50028_20120628_020702_outLine +BABEL_BP_105_50141_20120516_230234_inLine +BABEL_BP_105_50141_20120516_230234_outLine +BABEL_BP_105_50201_20120314_220751_inLine +BABEL_BP_105_50201_20120314_220751_outLine +BABEL_BP_105_50416_20120120_030634_inLine +BABEL_BP_105_50416_20120120_030634_outLine +BABEL_BP_105_50416_20120120_032209_inLine +BABEL_BP_105_50416_20120120_032209_outLine +BABEL_BP_105_50641_20120519_213400_inLine +BABEL_BP_105_50641_20120519_213400_outLine +BABEL_BP_105_50752_20120530_202359_inLine +BABEL_BP_105_50752_20120530_202359_outLine +BABEL_BP_105_50798_20120814_222755_inLine +BABEL_BP_105_50798_20120814_222755_outLine +BABEL_BP_105_50932_20120131_024519_outLine +BABEL_BP_105_51052_20120125_203253_inLine +BABEL_BP_105_51052_20120125_203253_outLine +BABEL_BP_105_51149_20120517_022710_inLine +BABEL_BP_105_51149_20120517_022710_outLine +BABEL_BP_105_51448_20120512_221822_inLine +BABEL_BP_105_51448_20120512_221822_outLine +BABEL_BP_105_51521_20120528_232651_inLine +BABEL_BP_105_51521_20120528_232651_outLine +BABEL_BP_105_51569_20120113_191836_inLine +BABEL_BP_105_51569_20120113_191836_outLine +BABEL_BP_105_52219_20120122_061548_inLine +BABEL_BP_105_52219_20120122_061548_outLine +BABEL_BP_105_52335_20120602_042319_inLine +BABEL_BP_105_52335_20120602_042320_outLine +BABEL_BP_105_52602_20120130_010143_inLine +BABEL_BP_105_52602_20120130_010143_outLine +BABEL_BP_105_52642_20120120_062951_inLine +BABEL_BP_105_52642_20120120_062951_outLine +BABEL_BP_105_52900_20120207_074729_inLine +BABEL_BP_105_53179_20120126_014504_inLine +BABEL_BP_105_53179_20120126_014504_outLine +BABEL_BP_105_53181_20120209_221434_inLine +BABEL_BP_105_53181_20120209_221434_outLine +BABEL_BP_105_53352_20120313_025305_inLine +BABEL_BP_105_53352_20120313_025305_outLine +BABEL_BP_105_53653_20120601_203737_inLine +BABEL_BP_105_53653_20120601_203737_outLine +BABEL_BP_105_53653_20120601_205017_inLine +BABEL_BP_105_53653_20120601_205017_outLine +BABEL_BP_105_53994_20120501_063357_inLine +BABEL_BP_105_53994_20120501_063357_outLine +BABEL_BP_105_54285_20120501_170645_inLine +BABEL_BP_105_54285_20120501_170645_outLine +BABEL_BP_105_54621_20120604_220824_inLine +BABEL_BP_105_54621_20120604_220824_outLine +BABEL_BP_105_55355_20120602_030100_inLine +BABEL_BP_105_55355_20120602_030100_outLine +BABEL_BP_105_55399_20120207_220014_inLine +BABEL_BP_105_55399_20120207_220014_outLine +BABEL_BP_105_55470_20120515_231335_inLine +BABEL_BP_105_55470_20120515_231335_outLine +BABEL_BP_105_55820_20120120_200536_inLine +BABEL_BP_105_55820_20120120_200536_outLine +BABEL_BP_105_55838_20120519_183551_outLine +BABEL_BP_105_55838_20120519_184228_outLine +BABEL_BP_105_56039_20120207_012118_inLine +BABEL_BP_105_56039_20120207_012118_outLine +BABEL_BP_105_57148_20120208_234937_inLine +BABEL_BP_105_57148_20120208_234937_outLine +BABEL_BP_105_57454_20120123_082347_inLine +BABEL_BP_105_57454_20120123_082347_outLine +BABEL_BP_105_57457_20120203_040430_inLine +BABEL_BP_105_57457_20120203_040430_outLine +BABEL_BP_105_57619_20120530_212910_inLine +BABEL_BP_105_57619_20120530_212910_outLine +BABEL_BP_105_57629_20120109_193726_inLine +BABEL_BP_105_57629_20120109_193726_outLine +BABEL_BP_105_57637_20120207_092849_outLine +BABEL_BP_105_58108_20120516_200608_inLine +BABEL_BP_105_58108_20120516_200608_outLine +BABEL_BP_105_58108_20120516_215546_inLine +BABEL_BP_105_58108_20120516_215546_outLine +BABEL_BP_105_58192_20120530_040251_inLine +BABEL_BP_105_58192_20120530_040252_outLine +BABEL_BP_105_58232_20120221_044134_inLine +BABEL_BP_105_58232_20120221_044134_outLine +BABEL_BP_105_58357_20120602_203200_inLine +BABEL_BP_105_58357_20120602_203200_outLine +BABEL_BP_105_58413_20120220_081844_inLine +BABEL_BP_105_58413_20120220_081902_outLine +BABEL_BP_105_58536_20120207_101252_inLine +BABEL_BP_105_58536_20120207_101252_outLine +BABEL_BP_105_59169_20120126_071441_inLine +BABEL_BP_105_59169_20120126_071441_outLine +BABEL_BP_105_59175_20120814_014729_inLine +BABEL_BP_105_59175_20120814_014729_outLine +BABEL_BP_105_59454_20120210_050748_inLine +BABEL_BP_105_59454_20120210_050748_outLine +BABEL_BP_105_59924_20120520_193636_outLine +BABEL_BP_105_59925_20120531_025444_inLine +BABEL_BP_105_59925_20120531_025444_outLine +BABEL_BP_105_60106_20120206_012558_inLine +BABEL_BP_105_60106_20120206_012558_outLine +BABEL_BP_105_60193_20120208_022615_inLine +BABEL_BP_105_60193_20120208_022615_outLine +BABEL_BP_105_60605_20120121_024426_inLine +BABEL_BP_105_60605_20120121_024426_outLine +BABEL_BP_105_60826_20120127_052753_inLine +BABEL_BP_105_60826_20120127_052753_outLine +BABEL_BP_105_60842_20120207_082938_inLine +BABEL_BP_105_60842_20120207_082938_outLine +BABEL_BP_105_60995_20120708_212511_inLine +BABEL_BP_105_60995_20120708_212511_outLine +BABEL_BP_105_61119_20120120_011733_inLine +BABEL_BP_105_61119_20120120_011733_outLine +BABEL_BP_105_61750_20120430_182721_inLine +BABEL_BP_105_61750_20120430_182721_outLine +BABEL_BP_105_61762_20120208_192030_inLine +BABEL_BP_105_61762_20120208_192030_outLine +BABEL_BP_105_61906_20120125_055530_inLine +BABEL_BP_105_61906_20120125_055530_outLine +BABEL_BP_105_61936_20120626_050803_inLine +BABEL_BP_105_61936_20120626_050804_outLine +BABEL_BP_105_61988_20120207_042437_inLine +BABEL_BP_105_61988_20120207_042437_outLine +BABEL_BP_105_62277_20120504_191914_inLine +BABEL_BP_105_62277_20120504_191914_outLine +BABEL_BP_105_62286_20120206_001738_inLine +BABEL_BP_105_62286_20120206_001739_outLine +BABEL_BP_105_62589_20120208_070910_inLine +BABEL_BP_105_62589_20120208_070910_outLine +BABEL_BP_105_63116_20120210_011436_inLine +BABEL_BP_105_63116_20120210_011436_outLine +BABEL_BP_105_63233_20120209_025744_inLine +BABEL_BP_105_63339_20120113_014223_inLine +BABEL_BP_105_63339_20120113_014223_outLine +BABEL_BP_105_63352_20120529_062238_inLine +BABEL_BP_105_63352_20120529_062238_outLine +BABEL_BP_105_63369_20120504_000600_inLine +BABEL_BP_105_63369_20120504_000600_outLine +BABEL_BP_105_64404_20120109_210230_inLine +BABEL_BP_105_64404_20120109_210230_outLine +BABEL_BP_105_64661_20120206_064757_inLine +BABEL_BP_105_64724_20120502_185902_inLine +BABEL_BP_105_64724_20120502_185902_outLine +BABEL_BP_105_64889_20120430_041923_inLine +BABEL_BP_105_64889_20120430_041923_outLine +BABEL_BP_105_65069_20120205_053459_inLine +BABEL_BP_105_65069_20120205_053459_outLine +BABEL_BP_105_65248_20120106_003446_inLine +BABEL_BP_105_65248_20120106_003446_outLine +BABEL_BP_105_65580_20120107_231525_inLine +BABEL_BP_105_65580_20120107_231525_outLine +BABEL_BP_105_65629_20120206_013549_inLine +BABEL_BP_105_65629_20120206_013549_outLine +BABEL_BP_105_65783_20120206_225414_inLine +BABEL_BP_105_65783_20120206_225414_outLine +BABEL_BP_105_65923_20120207_201411_inLine +BABEL_BP_105_65923_20120207_201411_outLine +BABEL_BP_105_66419_20120521_015830_inLine +BABEL_BP_105_66419_20120521_015830_outLine +BABEL_BP_105_66451_20120208_202426_inLine +BABEL_BP_105_66451_20120208_202426_outLine +BABEL_BP_105_66798_20120208_003832_inLine +BABEL_BP_105_66798_20120208_003832_outLine +BABEL_BP_105_66839_20120520_203654_inLine +BABEL_BP_105_66839_20120520_203655_outLine +BABEL_BP_105_67150_20120619_230543_inLine +BABEL_BP_105_67150_20120619_230543_outLine +BABEL_BP_105_67227_20120518_213954_inLine +BABEL_BP_105_67227_20120518_213954_outLine +BABEL_BP_105_67304_20120206_233053_inLine +BABEL_BP_105_67304_20120206_233053_outLine +BABEL_BP_105_67628_20120122_014514_inLine +BABEL_BP_105_67628_20120122_014514_outLine +BABEL_BP_105_67798_20120207_222749_inLine +BABEL_BP_105_67798_20120207_222749_outLine +BABEL_BP_105_67916_20120624_041235_inLine +BABEL_BP_105_67916_20120624_041236_outLine +BABEL_BP_105_67916_20120624_042035_inLine +BABEL_BP_105_67916_20120624_042036_outLine +BABEL_BP_105_68129_20120129_010002_inLine +BABEL_BP_105_68276_20120530_043559_inLine +BABEL_BP_105_68276_20120530_043600_outLine +BABEL_BP_105_68295_20120125_213909_inLine +BABEL_BP_105_68295_20120125_213909_outLine +BABEL_BP_105_68362_20120516_233958_inLine +BABEL_BP_105_68362_20120516_233958_outLine +BABEL_BP_105_68545_20120130_195611_inLine +BABEL_BP_105_68545_20120130_195611_outLine +BABEL_BP_105_68767_20120208_195338_inLine +BABEL_BP_105_68767_20120208_195338_outLine +BABEL_BP_105_68861_20120206_042909_inLine +BABEL_BP_105_68861_20120206_042909_outLine +BABEL_BP_105_69137_20120205_012455_inLine +BABEL_BP_105_69137_20120205_012455_outLine +BABEL_BP_105_69368_20120209_025044_inLine +BABEL_BP_105_69368_20120209_025044_outLine +BABEL_BP_105_69548_20120206_002506_inLine +BABEL_BP_105_69548_20120206_002506_outLine +BABEL_BP_105_69621_20120208_050816_inLine +BABEL_BP_105_69621_20120208_050816_outLine +BABEL_BP_105_69764_20120209_041231_inLine +BABEL_BP_105_69764_20120209_041231_outLine +BABEL_BP_105_70243_20120130_002646_inLine +BABEL_BP_105_70243_20120130_002646_outLine +BABEL_BP_105_70285_20120520_195703_inLine +BABEL_BP_105_70285_20120520_195703_outLine +BABEL_BP_105_70511_20120129_071513_inLine +BABEL_BP_105_70511_20120129_071513_outLine +BABEL_BP_105_70548_20120209_030934_inLine +BABEL_BP_105_70548_20120209_030934_outLine +BABEL_BP_105_70615_20120112_204508_inLine +BABEL_BP_105_70615_20120112_204508_outLine +BABEL_BP_105_70680_20120109_201712_inLine +BABEL_BP_105_70906_20120521_022727_inLine +BABEL_BP_105_70906_20120521_022727_outLine +BABEL_BP_105_70975_20120527_224548_inLine +BABEL_BP_105_70975_20120527_224548_outLine +BABEL_BP_105_71178_20120124_044039_inLine +BABEL_BP_105_71178_20120124_044039_outLine +BABEL_BP_105_71739_20120422_024509_inLine +BABEL_BP_105_71739_20120422_024509_outLine +BABEL_BP_105_71741_20120314_230737_inLine +BABEL_BP_105_71741_20120314_230737_outLine +BABEL_BP_105_72119_20120202_041158_inLine +BABEL_BP_105_72119_20120202_041158_outLine +BABEL_BP_105_72141_20120125_085836_inLine +BABEL_BP_105_72141_20120125_085836_outLine +BABEL_BP_105_72297_20120602_030633_inLine +BABEL_BP_105_72297_20120602_030633_outLine +BABEL_BP_105_72330_20120520_201127_outLine +BABEL_BP_105_72330_20120520_201604_outLine +BABEL_BP_105_72718_20120525_180835_inLine +BABEL_BP_105_72718_20120525_180835_outLine +BABEL_BP_105_72746_20120205_020507_inLine +BABEL_BP_105_72746_20120205_020507_outLine +BABEL_BP_105_72879_20120125_032216_inLine +BABEL_BP_105_72879_20120125_032216_outLine +BABEL_BP_105_73051_20120817_204309_inLine +BABEL_BP_105_73051_20120817_204309_outLine +BABEL_BP_105_73059_20120520_222710_inLine +BABEL_BP_105_73059_20120520_222710_outLine +BABEL_BP_105_73072_20120607_013513_inLine +BABEL_BP_105_73072_20120607_013513_outLine +BABEL_BP_105_73452_20120527_020050_inLine +BABEL_BP_105_73452_20120527_020050_outLine +BABEL_BP_105_73542_20120118_000641_inLine +BABEL_BP_105_73542_20120118_000641_outLine +BABEL_BP_105_73752_20120520_212014_inLine +BABEL_BP_105_73752_20120520_212014_outLine +BABEL_BP_105_73761_20120119_040339_inLine +BABEL_BP_105_73761_20120119_040339_outLine +BABEL_BP_105_73780_20120430_230832_inLine +BABEL_BP_105_73780_20120430_230832_outLine +BABEL_BP_105_73944_20120207_022618_inLine +BABEL_BP_105_73944_20120207_022618_outLine +BABEL_BP_105_74012_20120207_031751_inLine +BABEL_BP_105_74012_20120207_031751_outLine +BABEL_BP_105_74571_20120709_032825_inLine +BABEL_BP_105_74588_20120208_231518_inLine +BABEL_BP_105_74588_20120208_231518_outLine +BABEL_BP_105_74709_20120123_195039_inLine +BABEL_BP_105_74709_20120123_195039_outLine +BABEL_BP_105_75248_20120621_004722_inLine +BABEL_BP_105_75248_20120621_004722_outLine +BABEL_BP_105_75333_20120517_033420_inLine +BABEL_BP_105_75333_20120517_033420_outLine +BABEL_BP_105_75354_20120520_012303_inLine +BABEL_BP_105_75354_20120520_012303_outLine +BABEL_BP_105_75498_20120123_090316_inLine +BABEL_BP_105_75498_20120123_090316_outLine +BABEL_BP_105_75680_20120627_220907_inLine +BABEL_BP_105_75680_20120627_220907_outLine +BABEL_BP_105_75799_20120121_081211_inLine +BABEL_BP_105_75799_20120121_081211_outLine +BABEL_BP_105_75845_20120126_093251_inLine +BABEL_BP_105_75845_20120126_093251_outLine +BABEL_BP_105_75990_20120210_003258_inLine +BABEL_BP_105_76252_20120708_232625_inLine +BABEL_BP_105_76252_20120708_232625_outLine +BABEL_BP_105_76320_20120520_214841_outLine +BABEL_BP_105_76451_20120520_012516_inLine +BABEL_BP_105_76451_20120520_012516_outLine +BABEL_BP_105_76691_20120501_060535_inLine +BABEL_BP_105_76691_20120501_060535_outLine +BABEL_BP_105_76925_20120207_051003_inLine +BABEL_BP_105_76925_20120207_051003_outLine +BABEL_BP_105_77137_20120120_003356_inLine +BABEL_BP_105_77137_20120120_003356_outLine +BABEL_BP_105_77244_20120530_230026_inLine +BABEL_BP_105_77244_20120530_230026_outLine +BABEL_BP_105_77342_20120126_053532_inLine +BABEL_BP_105_77348_20120109_231904_inLine +BABEL_BP_105_77348_20120109_231904_outLine +BABEL_BP_105_77483_20120126_061820_outLine +BABEL_BP_105_77487_20120131_083433_inLine +BABEL_BP_105_77487_20120131_083433_outLine +BABEL_BP_105_77584_20120119_043252_inLine +BABEL_BP_105_77584_20120119_043252_outLine +BABEL_BP_105_77802_20120120_034318_inLine +BABEL_BP_105_77802_20120120_034318_outLine +BABEL_BP_105_77811_20120619_203214_inLine +BABEL_BP_105_77811_20120619_203214_outLine +BABEL_BP_105_77965_20120110_195959_inLine +BABEL_BP_105_77965_20120110_195959_outLine +BABEL_BP_105_79284_20120520_000955_inLine +BABEL_BP_105_79284_20120520_000955_outLine +BABEL_BP_105_79293_20120313_050558_inLine +BABEL_BP_105_79412_20120814_020731_inLine +BABEL_BP_105_79412_20120814_020731_outLine +BABEL_BP_105_79456_20120315_065631_outLine +BABEL_BP_105_79495_20120107_211221_inLine +BABEL_BP_105_79495_20120107_211221_outLine +BABEL_BP_105_79619_20120204_034427_outLine +BABEL_BP_105_79899_20120519_004730_inLine +BABEL_BP_105_79899_20120519_004730_outLine +BABEL_BP_105_80008_20120206_073118_inLine +BABEL_BP_105_80008_20120206_073118_outLine +BABEL_BP_105_80028_20120620_230841_inLine +BABEL_BP_105_80028_20120620_230841_outLine +BABEL_BP_105_80247_20120501_021202_inLine +BABEL_BP_105_80247_20120501_021202_outLine +BABEL_BP_105_80290_20120502_211538_inLine +BABEL_BP_105_80290_20120502_211538_outLine +BABEL_BP_105_80838_20120130_015756_inLine +BABEL_BP_105_80838_20120130_015756_outLine +BABEL_BP_105_80874_20120209_070233_inLine +BABEL_BP_105_80874_20120209_070233_outLine +BABEL_BP_105_80953_20120126_025448_inLine +BABEL_BP_105_80953_20120126_025448_outLine +BABEL_BP_105_81015_20120122_211324_inLine +BABEL_BP_105_81015_20120122_211324_outLine +BABEL_BP_105_81056_20120220_045306_inLine +BABEL_BP_105_81056_20120220_045306_outLine +BABEL_BP_105_81084_20120125_044727_outLine +BABEL_BP_105_81096_20120205_000909_inLine +BABEL_BP_105_81096_20120205_000909_outLine +BABEL_BP_105_81244_20120120_234254_inLine +BABEL_BP_105_81244_20120120_234254_outLine +BABEL_BP_105_81261_20120206_044337_inLine +BABEL_BP_105_81261_20120206_044337_outLine +BABEL_BP_105_81321_20120127_073458_inLine +BABEL_BP_105_81321_20120127_073458_outLine +BABEL_BP_105_81583_20120206_035506_inLine +BABEL_BP_105_81583_20120206_035506_outLine +BABEL_BP_105_81587_20120530_022705_inLine +BABEL_BP_105_81587_20120530_022705_outLine +BABEL_BP_105_81717_20120121_075007_inLine +BABEL_BP_105_81717_20120121_075007_outLine +BABEL_BP_105_81799_20120122_044223_inLine +BABEL_BP_105_81799_20120122_044223_outLine +BABEL_BP_105_82006_20120119_235812_inLine +BABEL_BP_105_82006_20120119_235812_outLine +BABEL_BP_105_82103_20120207_074556_outLine +BABEL_BP_105_82443_20120623_012845_inLine +BABEL_BP_105_82443_20120623_012845_outLine +BABEL_BP_105_82591_20120201_222003_outLine +BABEL_BP_105_82766_20120130_011639_inLine +BABEL_BP_105_82766_20120130_011639_outLine +BABEL_BP_105_82880_20120708_225241_inLine +BABEL_BP_105_82880_20120708_225241_outLine +BABEL_BP_105_83256_20120424_212011_inLine +BABEL_BP_105_83256_20120424_212011_outLine +BABEL_BP_105_83529_20120520_020225_inLine +BABEL_BP_105_83529_20120520_020225_outLine +BABEL_BP_105_83531_20120202_033247_inLine +BABEL_BP_105_83531_20120202_033247_outLine +BABEL_BP_105_83700_20120205_032346_inLine +BABEL_BP_105_83700_20120205_032346_outLine +BABEL_BP_105_83702_20120122_070851_inLine +BABEL_BP_105_83702_20120122_070851_outLine +BABEL_BP_105_83713_20120123_051739_inLine +BABEL_BP_105_83713_20120123_051739_outLine +BABEL_BP_105_84171_20120520_204934_outLine +BABEL_BP_105_84916_20120209_213013_outLine +BABEL_BP_105_84943_20120208_061546_inLine +BABEL_BP_105_84943_20120208_061546_outLine +BABEL_BP_105_85031_20120205_234855_inLine +BABEL_BP_105_85031_20120205_234855_outLine +BABEL_BP_105_85083_20120502_172834_inLine +BABEL_BP_105_85083_20120502_172834_outLine +BABEL_BP_105_85222_20120623_191629_inLine +BABEL_BP_105_85222_20120623_191629_outLine +BABEL_BP_105_85883_20120130_035046_inLine +BABEL_BP_105_85883_20120130_035046_outLine +BABEL_BP_105_85941_20120122_072454_inLine +BABEL_BP_105_85941_20120122_072454_outLine +BABEL_BP_105_85948_20120429_220916_inLine +BABEL_BP_105_85948_20120429_220916_outLine +BABEL_BP_105_86004_20120121_235617_inLine +BABEL_BP_105_86004_20120121_235617_outLine +BABEL_BP_105_86014_20120130_071042_inLine +BABEL_BP_105_86014_20120130_071042_outLine +BABEL_BP_105_86259_20120130_021439_inLine +BABEL_BP_105_86259_20120130_021439_outLine +BABEL_BP_105_86801_20120531_045324_outLine +BABEL_BP_105_87107_20120606_210147_inLine +BABEL_BP_105_87107_20120606_210147_outLine +BABEL_BP_105_87850_20120122_034948_inLine +BABEL_BP_105_87850_20120122_034948_outLine +BABEL_BP_105_87857_20120602_232747_inLine +BABEL_BP_105_87857_20120602_232747_outLine +BABEL_BP_105_87862_20120119_190443_inLine +BABEL_BP_105_87862_20120119_190443_outLine +BABEL_BP_105_88243_20120126_081939_inLine +BABEL_BP_105_88243_20120126_081939_outLine +BABEL_BP_105_88253_20120521_025324_inLine +BABEL_BP_105_88253_20120521_025324_outLine +BABEL_BP_105_88294_20120123_071701_inLine +BABEL_BP_105_88294_20120123_071701_outLine +BABEL_BP_105_88383_20120205_064745_inLine +BABEL_BP_105_88383_20120205_064745_outLine +BABEL_BP_105_88506_20120315_203433_inLine +BABEL_BP_105_88506_20120315_203433_outLine +BABEL_BP_105_88932_20120209_024746_inLine +BABEL_BP_105_88932_20120209_024746_outLine +BABEL_BP_105_89345_20120123_012645_inLine +BABEL_BP_105_89345_20120123_012645_outLine +BABEL_BP_105_89565_20120208_075727_outLine +BABEL_BP_105_89583_20120208_041101_inLine +BABEL_BP_105_89583_20120208_041101_outLine +BABEL_BP_105_89674_20120207_210507_inLine +BABEL_BP_105_89674_20120207_210507_outLine +BABEL_BP_105_89818_20120111_002805_inLine +BABEL_BP_105_89818_20120111_002805_outLine +BABEL_BP_105_89838_20120219_225311_inLine +BABEL_BP_105_89838_20120219_225311_outLine +BABEL_BP_105_89867_20120124_044128_inLine +BABEL_BP_105_89867_20120124_044128_outLine +BABEL_BP_105_89867_20120124_050334_inLine +BABEL_BP_105_89867_20120124_050334_outLine +BABEL_BP_105_90046_20120605_010159_inLine +BABEL_BP_105_90046_20120605_010159_outLine +BABEL_BP_105_90055_20120205_015425_inLine +BABEL_BP_105_90055_20120205_015425_outLine +BABEL_BP_105_90490_20120107_011745_inLine +BABEL_BP_105_90490_20120107_011745_outLine +BABEL_BP_105_90559_20120601_213056_inLine +BABEL_BP_105_90559_20120601_213056_outLine +BABEL_BP_105_90577_20120106_010938_inLine +BABEL_BP_105_90577_20120106_010938_outLine +BABEL_BP_105_90730_20120127_001133_inLine +BABEL_BP_105_90730_20120127_001133_outLine +BABEL_BP_105_90819_20120130_023600_inLine +BABEL_BP_105_90819_20120130_023600_outLine +BABEL_BP_105_90951_20120127_014240_inLine +BABEL_BP_105_90951_20120127_014240_outLine +BABEL_BP_105_91002_20120517_195202_inLine +BABEL_BP_105_91002_20120517_195202_outLine +BABEL_BP_105_91358_20120614_031106_inLine +BABEL_BP_105_91358_20120614_031107_outLine +BABEL_BP_105_91386_20120625_201849_inLine +BABEL_BP_105_91386_20120625_201849_outLine +BABEL_BP_105_91703_20120126_003014_inLine +BABEL_BP_105_91703_20120126_003014_outLine +BABEL_BP_105_91975_20120622_002430_inLine +BABEL_BP_105_91975_20120622_002430_outLine +BABEL_BP_105_91975_20120622_004757_inLine +BABEL_BP_105_91975_20120622_004757_outLine +BABEL_BP_105_92252_20120119_001340_inLine +BABEL_BP_105_92252_20120119_001340_outLine +BABEL_BP_105_92407_20120206_090518_inLine +BABEL_BP_105_92407_20120206_090518_outLine +BABEL_BP_105_92628_20120202_065713_inLine +BABEL_BP_105_92628_20120202_065713_outLine +BABEL_BP_105_92752_20120131_065611_inLine +BABEL_BP_105_92752_20120131_065611_outLine +BABEL_BP_105_92789_20120208_092935_inLine +BABEL_BP_105_92789_20120208_092935_outLine +BABEL_BP_105_92800_20120204_062855_inLine +BABEL_BP_105_92800_20120204_062855_outLine +BABEL_BP_105_93004_20120203_214508_inLine +BABEL_BP_105_93004_20120203_214508_outLine +BABEL_BP_105_93044_20120530_205229_inLine +BABEL_BP_105_93044_20120530_205229_outLine +BABEL_BP_105_93044_20120530_210446_inLine +BABEL_BP_105_93044_20120530_210446_outLine +BABEL_BP_105_93314_20120204_045440_outLine +BABEL_BP_105_93436_20120605_021136_outLine +BABEL_BP_105_93541_20120207_220607_inLine +BABEL_BP_105_93541_20120207_220607_outLine +BABEL_BP_105_93637_20120208_014420_inLine +BABEL_BP_105_93637_20120208_014420_outLine +BABEL_BP_105_94149_20120528_213123_inLine +BABEL_BP_105_94149_20120528_213123_outLine +BABEL_BP_105_94162_20120121_020746_inLine +BABEL_BP_105_94162_20120121_020746_outLine +BABEL_BP_105_94168_20120127_071423_inLine +BABEL_BP_105_94168_20120127_071423_outLine +BABEL_BP_105_94223_20120813_060431_inLine +BABEL_BP_105_94223_20120813_060431_outLine +BABEL_BP_105_94226_20120126_200629_inLine +BABEL_BP_105_94226_20120126_200629_outLine +BABEL_BP_105_94235_20120131_090132_inLine +BABEL_BP_105_94235_20120131_090132_outLine +BABEL_BP_105_94542_20120503_002707_inLine +BABEL_BP_105_94542_20120503_002707_outLine +BABEL_BP_105_94694_20120127_060811_inLine +BABEL_BP_105_94694_20120127_060811_outLine +BABEL_BP_105_95034_20120130_072201_outLine +BABEL_BP_105_95533_20120527_024409_inLine +BABEL_BP_105_95533_20120527_024409_outLine +BABEL_BP_105_95650_20120110_225916_inLine +BABEL_BP_105_95650_20120110_225916_outLine +BABEL_BP_105_95736_20120128_235257_inLine +BABEL_BP_105_95736_20120128_235257_outLine +BABEL_BP_105_95815_20120201_065914_inLine +BABEL_BP_105_95815_20120201_065914_outLine +BABEL_BP_105_96108_20120201_013051_inLine +BABEL_BP_105_96108_20120201_013051_outLine +BABEL_BP_105_96302_20120518_220402_inLine +BABEL_BP_105_96302_20120518_220402_outLine +BABEL_BP_105_96438_20120208_042745_inLine +BABEL_BP_105_96438_20120208_042745_outLine +BABEL_BP_105_97004_20120628_024047_inLine +BABEL_BP_105_97260_20120128_060528_inLine +BABEL_BP_105_97260_20120128_060528_outLine +BABEL_BP_105_97274_20120202_091803_inLine +BABEL_BP_105_97274_20120202_091803_outLine +BABEL_BP_105_97298_20120706_190045_inLine +BABEL_BP_105_97298_20120706_190045_outLine +BABEL_BP_105_97318_20120606_000332_inLine +BABEL_BP_105_97318_20120606_000332_outLine +BABEL_BP_105_97405_20120128_051654_outLine +BABEL_BP_105_97629_20120606_230655_inLine +BABEL_BP_105_97629_20120606_230655_outLine +BABEL_BP_105_97635_20120519_194730_inLine +BABEL_BP_105_97635_20120519_194730_outLine +BABEL_BP_105_97650_20120124_023530_inLine +BABEL_BP_105_97650_20120124_023530_outLine +BABEL_BP_105_97699_20120619_014656_inLine +BABEL_BP_105_97699_20120619_014656_outLine +BABEL_BP_105_97760_20120503_205622_inLine +BABEL_BP_105_97760_20120503_205622_outLine +BABEL_BP_105_97797_20120130_025511_inLine +BABEL_BP_105_97797_20120130_025511_outLine +BABEL_BP_105_97941_20120123_224142_inLine +BABEL_BP_105_97941_20120123_224142_outLine +BABEL_BP_105_98279_20120121_021104_inLine +BABEL_BP_105_98279_20120121_021104_outLine +BABEL_BP_105_98402_20120518_004507_inLine +BABEL_BP_105_98402_20120518_004507_outLine +BABEL_BP_105_98476_20120314_082638_outLine +BABEL_BP_105_99414_20120618_212729_inLine +BABEL_BP_105_99414_20120618_212729_outLine +BABEL_BP_105_99514_20120126_232257_inLine +BABEL_BP_105_99514_20120126_232257_outLine +BABEL_BP_105_99694_20120202_034424_inLine +BABEL_BP_105_99694_20120202_034425_outLine diff --git a/egs/babel/s5d/conf/lists/105-turkish/train.LimitedLP.list b/egs/babel/s5d/conf/lists/105-turkish/train.LimitedLP.list new file mode 100644 index 00000000000..18efca5b37c --- /dev/null +++ b/egs/babel/s5d/conf/lists/105-turkish/train.LimitedLP.list @@ -0,0 +1,128 @@ +BABEL_BP_105_16257_20120709_025101_inLine +BABEL_BP_105_16257_20120709_025101_outLine +BABEL_BP_105_17013_20120314_031626_inLine +BABEL_BP_105_17013_20120314_031626_outLine +BABEL_BP_105_18672_20120131_015941_inLine +BABEL_BP_105_18672_20120131_015941_outLine +BABEL_BP_105_18716_20120218_070145_inLine +BABEL_BP_105_20347_20120504_231529_inLine +BABEL_BP_105_20347_20120504_232320_inLine +BABEL_BP_105_20471_20120125_013916_inLine +BABEL_BP_105_20471_20120125_013916_outLine +BABEL_BP_105_20471_20120125_015348_inLine +BABEL_BP_105_20471_20120125_015348_outLine +BABEL_BP_105_21370_20120605_185740_inLine +BABEL_BP_105_21370_20120605_185740_outLine +BABEL_BP_105_22272_20120430_191440_inLine +BABEL_BP_105_22272_20120430_191440_outLine +BABEL_BP_105_22408_20120131_202129_inLine +BABEL_BP_105_22408_20120131_202129_outLine +BABEL_BP_105_22408_20120131_210558_inLine +BABEL_BP_105_22408_20120131_210558_outLine +BABEL_BP_105_22898_20120129_040904_inLine +BABEL_BP_105_22898_20120129_040904_outLine +BABEL_BP_105_23629_20120503_212942_inLine +BABEL_BP_105_23629_20120503_212942_outLine +BABEL_BP_105_24608_20120111_023000_inLine +BABEL_BP_105_24608_20120111_023000_outLine +BABEL_BP_105_26164_20120627_210408_inLine +BABEL_BP_105_26164_20120627_210408_outLine +BABEL_BP_105_26644_20120517_212756_inLine +BABEL_BP_105_26644_20120517_212756_outLine +BABEL_BP_105_27724_20120130_023439_inLine +BABEL_BP_105_27724_20120130_023439_outLine +BABEL_BP_105_29421_20120127_235240_inLine +BABEL_BP_105_29421_20120127_235240_outLine +BABEL_BP_105_31460_20120603_224411_inLine +BABEL_BP_105_31460_20120603_224411_outLine +BABEL_BP_105_32663_20120709_040652_inLine +BABEL_BP_105_32663_20120709_040652_outLine +BABEL_BP_105_32818_20120530_032934_inLine +BABEL_BP_105_32818_20120530_032935_outLine +BABEL_BP_105_34590_20120829_000220_inLine +BABEL_BP_105_34590_20120829_000220_outLine +BABEL_BP_105_35329_20120203_051310_inLine +BABEL_BP_105_35329_20120203_051310_outLine +BABEL_BP_105_35576_20120530_184018_inLine +BABEL_BP_105_35576_20120530_184018_outLine +BABEL_BP_105_39066_20120206_073804_inLine +BABEL_BP_105_39066_20120206_073804_outLine +BABEL_BP_105_39114_20120516_035141_inLine +BABEL_BP_105_39114_20120516_035141_outLine +BABEL_BP_105_42145_20120210_004555_inLine +BABEL_BP_105_42145_20120210_004555_outLine +BABEL_BP_105_43317_20120516_181202_inLine +BABEL_BP_105_43317_20120516_181202_outLine +BABEL_BP_105_44209_20120130_072808_inLine +BABEL_BP_105_44209_20120130_072808_outLine +BABEL_BP_105_44500_20120531_224758_inLine +BABEL_BP_105_44500_20120531_224758_outLine +BABEL_BP_105_45511_20120601_001634_inLine +BABEL_BP_105_45511_20120601_001634_outLine +BABEL_BP_105_45512_20120208_063419_inLine +BABEL_BP_105_45512_20120208_063419_outLine +BABEL_BP_105_47429_20120512_193242_inLine +BABEL_BP_105_47429_20120512_193242_outLine +BABEL_BP_105_47823_20120209_005455_inLine +BABEL_BP_105_47823_20120209_005455_outLine +BABEL_BP_105_49186_20120627_224343_inLine +BABEL_BP_105_49186_20120627_224343_outLine +BABEL_BP_105_50416_20120120_030634_inLine +BABEL_BP_105_50416_20120120_030634_outLine +BABEL_BP_105_50416_20120120_032209_inLine +BABEL_BP_105_50416_20120120_032209_outLine +BABEL_BP_105_51149_20120517_022710_inLine +BABEL_BP_105_51149_20120517_022710_outLine +BABEL_BP_105_53352_20120313_025305_inLine +BABEL_BP_105_53352_20120313_025305_outLine +BABEL_BP_105_55355_20120602_030100_inLine +BABEL_BP_105_55355_20120602_030100_outLine +BABEL_BP_105_56039_20120207_012118_inLine +BABEL_BP_105_56039_20120207_012118_outLine +BABEL_BP_105_60995_20120708_212511_inLine +BABEL_BP_105_60995_20120708_212511_outLine +BABEL_BP_105_61750_20120430_182721_inLine +BABEL_BP_105_61750_20120430_182721_outLine +BABEL_BP_105_62286_20120206_001738_inLine +BABEL_BP_105_62286_20120206_001739_outLine +BABEL_BP_105_62589_20120208_070910_inLine +BABEL_BP_105_62589_20120208_070910_outLine +BABEL_BP_105_63116_20120210_011436_inLine +BABEL_BP_105_63116_20120210_011436_outLine +BABEL_BP_105_65069_20120205_053459_inLine +BABEL_BP_105_65069_20120205_053459_outLine +BABEL_BP_105_65783_20120206_225414_inLine +BABEL_BP_105_65783_20120206_225414_outLine +BABEL_BP_105_69764_20120209_041231_inLine +BABEL_BP_105_69764_20120209_041231_outLine +BABEL_BP_105_71739_20120422_024509_inLine +BABEL_BP_105_71739_20120422_024509_outLine +BABEL_BP_105_71741_20120314_230737_inLine +BABEL_BP_105_71741_20120314_230737_outLine +BABEL_BP_105_72718_20120525_180835_inLine +BABEL_BP_105_72718_20120525_180835_outLine +BABEL_BP_105_73059_20120520_222710_inLine +BABEL_BP_105_73059_20120520_222710_outLine +BABEL_BP_105_73452_20120527_020050_inLine +BABEL_BP_105_73452_20120527_020050_outLine +BABEL_BP_105_75354_20120520_012303_inLine +BABEL_BP_105_75354_20120520_012303_outLine +BABEL_BP_105_80247_20120501_021202_inLine +BABEL_BP_105_80247_20120501_021202_outLine +BABEL_BP_105_82591_20120201_222003_outLine +BABEL_BP_105_83256_20120424_212011_inLine +BABEL_BP_105_83256_20120424_212011_outLine +BABEL_BP_105_83702_20120122_070851_inLine +BABEL_BP_105_83702_20120122_070851_outLine +BABEL_BP_105_83713_20120123_051739_inLine +BABEL_BP_105_83713_20120123_051739_outLine +BABEL_BP_105_90046_20120605_010159_inLine +BABEL_BP_105_90046_20120605_010159_outLine +BABEL_BP_105_92800_20120204_062855_inLine +BABEL_BP_105_92800_20120204_062855_outLine +BABEL_BP_105_94542_20120503_002707_inLine +BABEL_BP_105_94542_20120503_002707_outLine +BABEL_BP_105_96438_20120208_042745_inLine +BABEL_BP_105_96438_20120208_042745_outLine +BABEL_BP_105_97760_20120503_205622_inLine +BABEL_BP_105_97760_20120503_205622_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/dev.list b/egs/babel/s5d/conf/lists/106-tagalog/dev.list new file mode 100644 index 00000000000..09f159f6574 --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/dev.list @@ -0,0 +1,146 @@ +BABEL_BP_106_05343_20120411_001147_inLine +BABEL_BP_106_05343_20120411_001147_outLine +BABEL_BP_106_11690_20120315_042036_inLine +BABEL_BP_106_11690_20120315_042036_outLine +BABEL_BP_106_11694_20120315_051701_inLine +BABEL_BP_106_11694_20120315_051701_outLine +BABEL_BP_106_11915_20120301_192127_outLine +BABEL_BP_106_11915_20120301_193624_outLine +BABEL_BP_106_14475_20120317_195829_inLine +BABEL_BP_106_14475_20120317_195829_outLine +BABEL_BP_106_16883_20120219_191154_inLine +BABEL_BP_106_16883_20120219_191154_outLine +BABEL_BP_106_16883_20120219_191914_inLine +BABEL_BP_106_16883_20120219_191914_outLine +BABEL_BP_106_17948_20120305_020044_inLine +BABEL_BP_106_17948_20120305_020044_outLine +BABEL_BP_106_19012_20120405_191535_inLine +BABEL_BP_106_19012_20120405_191535_outLine +BABEL_BP_106_24379_20120303_015051_inLine +BABEL_BP_106_24379_20120303_015051_outLine +BABEL_BP_106_25035_20120213_014750_inLine +BABEL_BP_106_25035_20120213_014750_outLine +BABEL_BP_106_28260_20120210_165445_inLine +BABEL_BP_106_28260_20120210_165445_outLine +BABEL_BP_106_28740_20120131_002533_inLine +BABEL_BP_106_28768_20120405_170206_inLine +BABEL_BP_106_28768_20120405_170206_outLine +BABEL_BP_106_28768_20120405_172419_inLine +BABEL_BP_106_28768_20120405_172419_outLine +BABEL_BP_106_29268_20120501_030651_inLine +BABEL_BP_106_29268_20120501_032051_inLine +BABEL_BP_106_29268_20120501_033313_inLine +BABEL_BP_106_30554_20120301_192050_inLine +BABEL_BP_106_30554_20120301_192050_outLine +BABEL_BP_106_30715_20120501_014624_inLine +BABEL_BP_106_31635_20120428_220813_inLine +BABEL_BP_106_32642_20120318_154011_inLine +BABEL_BP_106_32642_20120318_154011_outLine +BABEL_BP_106_35896_20120302_123550_inLine +BABEL_BP_106_36490_20120405_193235_inLine +BABEL_BP_106_36490_20120405_193235_outLine +BABEL_BP_106_40168_20120208_173832_outLine +BABEL_BP_106_40168_20120208_175258_outLine +BABEL_BP_106_42383_20120331_140217_inLine +BABEL_BP_106_42383_20120331_140217_outLine +BABEL_BP_106_42766_20120217_003639_inLine +BABEL_BP_106_42766_20120217_003639_outLine +BABEL_BP_106_47845_20120405_122139_inLine +BABEL_BP_106_47845_20120405_122139_outLine +BABEL_BP_106_47845_20120405_123415_inLine +BABEL_BP_106_47845_20120405_123415_outLine +BABEL_BP_106_48477_20120304_224818_inLine +BABEL_BP_106_48477_20120304_224818_outLine +BABEL_BP_106_53544_20120314_004506_inLine +BABEL_BP_106_53544_20120314_004506_outLine +BABEL_BP_106_53544_20120314_010454_inLine +BABEL_BP_106_53544_20120314_010454_outLine +BABEL_BP_106_53982_20120224_233136_inLine +BABEL_BP_106_53982_20120224_233136_outLine +BABEL_BP_106_57422_20120227_015422_inLine +BABEL_BP_106_57422_20120227_015422_outLine +BABEL_BP_106_58413_20120304_005849_inLine +BABEL_BP_106_58413_20120304_005849_outLine +BABEL_BP_106_58737_20120327_234027_inLine +BABEL_BP_106_58737_20120327_234027_outLine +BABEL_BP_106_59500_20120327_192807_inLine +BABEL_BP_106_59500_20120327_192807_outLine +BABEL_BP_106_61385_20120227_200049_inLine +BABEL_BP_106_61385_20120227_200049_outLine +BABEL_BP_106_65580_20120221_205300_inLine +BABEL_BP_106_65580_20120221_205300_outLine +BABEL_BP_106_65580_20120221_210222_inLine +BABEL_BP_106_65580_20120221_210222_outLine +BABEL_BP_106_66026_20120511_112437_inLine +BABEL_BP_106_66026_20120511_114127_inLine +BABEL_BP_106_66668_20120130_000343_inLine +BABEL_BP_106_66668_20120130_000343_outLine +BABEL_BP_106_66668_20120130_002819_inLine +BABEL_BP_106_66668_20120130_002819_outLine +BABEL_BP_106_68362_20120403_123939_inLine +BABEL_BP_106_68362_20120403_123939_outLine +BABEL_BP_106_69050_20120203_173053_inLine +BABEL_BP_106_69050_20120203_173053_outLine +BABEL_BP_106_72297_20120405_193507_inLine +BABEL_BP_106_72297_20120405_193507_outLine +BABEL_BP_106_72297_20120405_194943_inLine +BABEL_BP_106_72297_20120405_194943_outLine +BABEL_BP_106_73782_20120313_012825_inLine +BABEL_BP_106_73782_20120313_012825_outLine +BABEL_BP_106_75333_20120329_172440_inLine +BABEL_BP_106_75333_20120329_172440_outLine +BABEL_BP_106_75871_20120127_162002_inLine +BABEL_BP_106_75871_20120127_162002_outLine +BABEL_BP_106_76341_20120219_170650_inLine +BABEL_BP_106_76341_20120219_170650_outLine +BABEL_BP_106_76341_20120219_173824_inLine +BABEL_BP_106_76341_20120219_173824_outLine +BABEL_BP_106_78572_20120304_135853_inLine +BABEL_BP_106_79570_20120302_141553_outLine +BABEL_BP_106_79632_20120309_173547_inLine +BABEL_BP_106_79632_20120309_173547_outLine +BABEL_BP_106_79698_20120315_223952_inLine +BABEL_BP_106_79698_20120315_230838_inLine +BABEL_BP_106_79698_20120315_230838_outLine +BABEL_BP_106_81587_20120309_163209_inLine +BABEL_BP_106_81587_20120309_163209_outLine +BABEL_BP_106_83255_20120530_214353_inLine +BABEL_BP_106_83891_20120327_163405_inLine +BABEL_BP_106_83891_20120327_163405_outLine +BABEL_BP_106_85617_20120225_212818_inLine +BABEL_BP_106_85617_20120225_212818_outLine +BABEL_BP_106_90180_20120317_002331_inLine +BABEL_BP_106_90180_20120317_002331_outLine +BABEL_BP_106_90577_20120111_201742_inLine +BABEL_BP_106_90577_20120111_201742_outLine +BABEL_BP_106_90764_20120131_140951_inLine +BABEL_BP_106_90764_20120131_140951_outLine +BABEL_BP_106_90890_20120322_020338_inLine +BABEL_BP_106_90890_20120322_020338_outLine +BABEL_BP_106_92820_20120318_144230_inLine +BABEL_BP_106_92820_20120318_144230_outLine +BABEL_BP_106_93000_20120227_164805_inLine +BABEL_BP_106_93000_20120227_164805_outLine +BABEL_BP_106_94149_20120205_211427_inLine +BABEL_BP_106_94149_20120205_211427_outLine +BABEL_BP_106_94244_20120405_200522_inLine +BABEL_BP_106_94244_20120405_200522_outLine +BABEL_BP_106_94542_20120305_045905_inLine +BABEL_BP_106_94542_20120305_045905_outLine +BABEL_BP_106_95589_20120225_030746_inLine +BABEL_BP_106_95589_20120225_032340_inLine +BABEL_BP_106_95589_20120225_032340_outLine +BABEL_BP_106_96347_20120422_163204_inLine +BABEL_BP_106_96347_20120422_163808_inLine +BABEL_BP_106_97318_20120405_141943_inLine +BABEL_BP_106_97318_20120405_141943_outLine +BABEL_BP_106_97629_20120227_180122_inLine +BABEL_BP_106_97629_20120227_180122_outLine +BABEL_BP_106_97797_20120224_210655_inLine +BABEL_BP_106_97797_20120224_210655_outLine +BABEL_BP_106_97797_20120224_211935_inLine +BABEL_BP_106_97797_20120224_211935_outLine +BABEL_BP_106_98086_20120228_172810_inLine +BABEL_BP_106_98086_20120228_172810_outLine +BABEL_BP_106_98640_20120317_040411_inLine +BABEL_BP_106_98640_20120317_040412_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/eval.list b/egs/babel/s5d/conf/lists/106-tagalog/eval.list new file mode 100644 index 00000000000..b2c3042f61a --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/eval.list @@ -0,0 +1,241 @@ +BABEL_BP_106_00590_20120401_144745_inLine +BABEL_BP_106_00590_20120401_144745_outLine +BABEL_BP_106_05737_20120317_201434_inLine +BABEL_BP_106_05737_20120317_201434_outLine +BABEL_BP_106_08336_20120308_213905_inLine +BABEL_BP_106_08336_20120308_231058_inLine +BABEL_BP_106_08336_20120308_231812_inLine +BABEL_BP_106_08336_20120308_232516_inLine +BABEL_BP_106_08336_20120308_234130_inLine +BABEL_BP_106_09067_20120304_174532_inLine +BABEL_BP_106_09067_20120304_174532_outLine +BABEL_BP_106_10033_20120428_005441_inLine +BABEL_BP_106_10279_20120525_160616_inLine +BABEL_BP_106_11868_20120403_204010_inLine +BABEL_BP_106_11868_20120403_204010_outLine +BABEL_BP_106_12317_20120324_045054_inLine +BABEL_BP_106_12317_20120324_045054_outLine +BABEL_BP_106_12631_20120202_190009_inLine +BABEL_BP_106_12631_20120202_190009_outLine +BABEL_BP_106_13635_20120319_005136_inLine +BABEL_BP_106_13635_20120319_005136_outLine +BABEL_BP_106_13715_20120530_194000_inLine +BABEL_BP_106_13878_20120517_133306_inLine +BABEL_BP_106_14899_20120519_174015_inLine +BABEL_BP_106_14915_20120525_195519_inLine +BABEL_BP_106_14915_20120525_201940_inLine +BABEL_BP_106_14915_20120525_235128_inLine +BABEL_BP_106_18730_20120322_025159_inLine +BABEL_BP_106_18730_20120322_025159_outLine +BABEL_BP_106_18991_20120208_210053_inLine +BABEL_BP_106_18991_20120208_210053_outLine +BABEL_BP_106_20213_20120417_130013_inLine +BABEL_BP_106_20213_20120417_130013_outLine +BABEL_BP_106_20307_20120409_012136_inLine +BABEL_BP_106_20307_20120409_012136_outLine +BABEL_BP_106_20462_20120217_160808_inLine +BABEL_BP_106_20462_20120217_160808_outLine +BABEL_BP_106_20462_20120217_164536_inLine +BABEL_BP_106_20462_20120217_164536_outLine +BABEL_BP_106_20518_20120525_181959_inLine +BABEL_BP_106_20518_20120525_182614_inLine +BABEL_BP_106_20518_20120525_183956_inLine +BABEL_BP_106_20685_20120323_031815_inLine +BABEL_BP_106_20685_20120323_031815_outLine +BABEL_BP_106_21634_20120530_182237_inLine +BABEL_BP_106_22401_20120321_012046_inLine +BABEL_BP_106_22401_20120321_012046_outLine +BABEL_BP_106_22401_20120321_013515_inLine +BABEL_BP_106_22401_20120321_013515_outLine +BABEL_BP_106_22566_20120318_130741_inLine +BABEL_BP_106_22566_20120318_130741_outLine +BABEL_BP_106_25041_20120318_183127_inLine +BABEL_BP_106_25041_20120318_183127_outLine +BABEL_BP_106_25072_20120307_172016_inLine +BABEL_BP_106_25072_20120307_172016_outLine +BABEL_BP_106_25072_20120307_173008_inLine +BABEL_BP_106_25072_20120307_173008_outLine +BABEL_BP_106_27645_20120309_195238_inLine +BABEL_BP_106_27645_20120309_195238_outLine +BABEL_BP_106_27825_20120525_165434_inLine +BABEL_BP_106_29259_20120525_174551_inLine +BABEL_BP_106_30168_20120417_211215_inLine +BABEL_BP_106_30722_20120228_173748_inLine +BABEL_BP_106_30722_20120228_175207_inLine +BABEL_BP_106_30722_20120228_175207_outLine +BABEL_BP_106_30722_20120228_180341_inLine +BABEL_BP_106_30722_20120228_180341_outLine +BABEL_BP_106_31350_20120305_132208_inLine +BABEL_BP_106_31451_20120430_160735_inLine +BABEL_BP_106_31614_20120315_181514_inLine +BABEL_BP_106_31614_20120315_181514_outLine +BABEL_BP_106_32132_20120604_141124_inLine +BABEL_BP_106_34732_20120504_011240_inLine +BABEL_BP_106_34732_20120504_011240_outLine +BABEL_BP_106_36828_20120413_195545_inLine +BABEL_BP_106_36828_20120413_195545_outLine +BABEL_BP_106_37940_20120509_134420_inLine +BABEL_BP_106_38524_20120531_115250_inLine +BABEL_BP_106_40385_20120316_121848_inLine +BABEL_BP_106_40385_20120316_123312_inLine +BABEL_BP_106_41146_20120127_174843_outLine +BABEL_BP_106_41456_20120417_215741_inLine +BABEL_BP_106_41456_20120417_215741_outLine +BABEL_BP_106_41471_20120227_013419_inLine +BABEL_BP_106_41471_20120227_013419_outLine +BABEL_BP_106_41471_20120227_015846_inLine +BABEL_BP_106_41471_20120227_015846_outLine +BABEL_BP_106_41797_20120418_010121_inLine +BABEL_BP_106_41797_20120418_010121_outLine +BABEL_BP_106_44500_20120307_165936_inLine +BABEL_BP_106_44500_20120307_165936_outLine +BABEL_BP_106_45570_20120411_165807_inLine +BABEL_BP_106_45570_20120411_165807_outLine +BABEL_BP_106_45929_20120524_212453_inLine +BABEL_BP_106_45929_20120524_220624_inLine +BABEL_BP_106_46409_20120213_193348_outLine +BABEL_BP_106_48281_20120208_172243_inLine +BABEL_BP_106_48281_20120208_172243_outLine +BABEL_BP_106_48559_20120417_130856_inLine +BABEL_BP_106_48559_20120417_130856_outLine +BABEL_BP_106_48559_20120417_140813_inLine +BABEL_BP_106_48559_20120417_140813_outLine +BABEL_BP_106_48645_20120304_124310_inLine +BABEL_BP_106_48645_20120304_124310_outLine +BABEL_BP_106_48727_20120530_170050_inLine +BABEL_BP_106_49351_20120315_214910_inLine +BABEL_BP_106_49351_20120315_214910_outLine +BABEL_BP_106_50112_20120327_165821_inLine +BABEL_BP_106_50112_20120327_165821_outLine +BABEL_BP_106_50757_20120519_142209_inLine +BABEL_BP_106_53278_20120304_182746_inLine +BABEL_BP_106_53278_20120304_182746_outLine +BABEL_BP_106_54285_20120304_170422_inLine +BABEL_BP_106_54285_20120304_170422_outLine +BABEL_BP_106_54339_20120220_233532_inLine +BABEL_BP_106_54339_20120220_233532_outLine +BABEL_BP_106_54339_20120220_235208_inLine +BABEL_BP_106_54339_20120220_235208_outLine +BABEL_BP_106_56648_20120221_204115_inLine +BABEL_BP_106_56648_20120221_204115_outLine +BABEL_BP_106_59454_20120302_005653_inLine +BABEL_BP_106_59454_20120302_013702_inLine +BABEL_BP_106_59454_20120302_013702_outLine +BABEL_BP_106_59736_20120517_215232_inLine +BABEL_BP_106_60064_20120405_122048_inLine +BABEL_BP_106_60064_20120405_122049_outLine +BABEL_BP_106_60183_20120227_184542_outLine +BABEL_BP_106_60183_20120227_185937_outLine +BABEL_BP_106_61408_20120313_190656_inLine +BABEL_BP_106_61408_20120313_190656_outLine +BABEL_BP_106_61408_20120313_191850_inLine +BABEL_BP_106_61408_20120313_191850_outLine +BABEL_BP_106_61762_20120210_205954_inLine +BABEL_BP_106_61762_20120210_205954_outLine +BABEL_BP_106_62589_20120526_194818_inLine +BABEL_BP_106_62710_20120226_042014_inLine +BABEL_BP_106_62710_20120226_042014_outLine +BABEL_BP_106_62710_20120226_043927_inLine +BABEL_BP_106_62710_20120226_043927_outLine +BABEL_BP_106_63116_20120301_233405_inLine +BABEL_BP_106_63116_20120301_233405_outLine +BABEL_BP_106_64178_20120512_001535_inLine +BABEL_BP_106_64178_20120512_001535_outLine +BABEL_BP_106_64300_20120517_211937_inLine +BABEL_BP_106_64300_20120517_213314_inLine +BABEL_BP_106_65837_20120314_013343_inLine +BABEL_BP_106_65837_20120314_013343_outLine +BABEL_BP_106_69871_20120308_190521_inLine +BABEL_BP_106_69871_20120308_191814_inLine +BABEL_BP_106_69871_20120308_191814_outLine +BABEL_BP_106_70323_20120315_214239_inLine +BABEL_BP_106_70323_20120315_214239_outLine +BABEL_BP_106_70530_20120315_171715_inLine +BABEL_BP_106_70530_20120315_171715_outLine +BABEL_BP_106_70773_20120331_201706_inLine +BABEL_BP_106_70773_20120331_201706_outLine +BABEL_BP_106_72647_20120314_140705_inLine +BABEL_BP_106_72647_20120314_140705_outLine +BABEL_BP_106_72908_20120301_214516_inLine +BABEL_BP_106_72908_20120301_214516_outLine +BABEL_BP_106_73050_20120229_190728_inLine +BABEL_BP_106_73050_20120229_190728_outLine +BABEL_BP_106_73050_20120229_192106_inLine +BABEL_BP_106_73050_20120229_192106_outLine +BABEL_BP_106_73122_20120131_151743_inLine +BABEL_BP_106_73122_20120131_151743_outLine +BABEL_BP_106_73205_20120131_011807_inLine +BABEL_BP_106_73205_20120131_011807_outLine +BABEL_BP_106_74940_20120324_000134_inLine +BABEL_BP_106_74940_20120324_000134_outLine +BABEL_BP_106_78487_20120228_180247_inLine +BABEL_BP_106_78487_20120228_184448_inLine +BABEL_BP_106_78487_20120228_185132_inLine +BABEL_BP_106_78487_20120229_165653_inLine +BABEL_BP_106_78487_20120229_180156_inLine +BABEL_BP_106_82007_20120511_234807_inLine +BABEL_BP_106_82007_20120511_234807_outLine +BABEL_BP_106_83012_20120227_002142_inLine +BABEL_BP_106_83012_20120227_002142_outLine +BABEL_BP_106_83012_20120227_004851_inLine +BABEL_BP_106_83012_20120227_004851_outLine +BABEL_BP_106_83053_20120418_185830_inLine +BABEL_BP_106_85719_20120315_175358_inLine +BABEL_BP_106_85719_20120315_175358_outLine +BABEL_BP_106_85883_20120221_204813_inLine +BABEL_BP_106_85883_20120221_204813_outLine +BABEL_BP_106_85883_20120221_210017_inLine +BABEL_BP_106_85883_20120221_210017_outLine +BABEL_BP_106_86211_20120323_003846_inLine +BABEL_BP_106_86211_20120323_003846_outLine +BABEL_BP_106_86339_20120517_211109_inLine +BABEL_BP_106_86900_20120129_013513_inLine +BABEL_BP_106_86900_20120129_013513_outLine +BABEL_BP_106_86998_20120316_235214_inLine +BABEL_BP_106_86998_20120316_235214_outLine +BABEL_BP_106_88932_20120210_024536_inLine +BABEL_BP_106_88932_20120210_024536_outLine +BABEL_BP_106_88932_20120210_031316_inLine +BABEL_BP_106_88932_20120210_031316_outLine +BABEL_BP_106_89619_20120216_201137_inLine +BABEL_BP_106_89619_20120216_201137_outLine +BABEL_BP_106_89619_20120216_202208_inLine +BABEL_BP_106_89619_20120216_202208_outLine +BABEL_BP_106_89674_20120128_172359_inLine +BABEL_BP_106_89674_20120128_172359_outLine +BABEL_BP_106_89674_20120128_175646_inLine +BABEL_BP_106_89674_20120128_175646_outLine +BABEL_BP_106_89818_20120323_031837_inLine +BABEL_BP_106_89818_20120323_033337_inLine +BABEL_BP_106_89818_20120323_033337_outLine +BABEL_BP_106_90046_20120316_225047_inLine +BABEL_BP_106_90046_20120316_225047_outLine +BABEL_BP_106_90559_20120404_191014_inLine +BABEL_BP_106_90559_20120404_191014_outLine +BABEL_BP_106_91007_20120405_174537_inLine +BABEL_BP_106_91007_20120405_174537_outLine +BABEL_BP_106_92072_20120315_162353_inLine +BABEL_BP_106_92072_20120315_162353_outLine +BABEL_BP_106_92094_20120519_171316_inLine +BABEL_BP_106_92328_20120318_183827_inLine +BABEL_BP_106_92328_20120318_183827_outLine +BABEL_BP_106_93506_20120501_114215_inLine +BABEL_BP_106_94696_20120405_132036_inLine +BABEL_BP_106_94696_20120405_132036_outLine +BABEL_BP_106_94696_20120405_132924_inLine +BABEL_BP_106_94696_20120405_132924_outLine +BABEL_BP_106_95225_20120323_234548_inLine +BABEL_BP_106_95225_20120323_234548_outLine +BABEL_BP_106_95572_20120501_120940_inLine +BABEL_BP_106_95637_20120210_215628_inLine +BABEL_BP_106_95637_20120210_215628_outLine +BABEL_BP_106_97052_20120315_205207_inLine +BABEL_BP_106_97052_20120315_205207_outLine +BABEL_BP_106_97941_20120228_153714_inLine +BABEL_BP_106_97941_20120228_155826_inLine +BABEL_BP_106_98099_20120224_234716_inLine +BABEL_BP_106_98099_20120224_234716_outLine +BABEL_BP_106_99503_20120328_011545_inLine +BABEL_BP_106_99503_20120328_011545_outLine +BABEL_BP_106_99764_20120309_004852_inLine +BABEL_BP_106_99764_20120309_004852_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/evalpart1.list b/egs/babel/s5d/conf/lists/106-tagalog/evalpart1.list new file mode 100644 index 00000000000..690fec715fb --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/evalpart1.list @@ -0,0 +1,69 @@ +BABEL_BP_106_11868_20120403_204010_inLine +BABEL_BP_106_11868_20120403_204010_outLine +BABEL_BP_106_18730_20120322_025159_inLine +BABEL_BP_106_18730_20120322_025159_outLine +BABEL_BP_106_18991_20120208_210053_inLine +BABEL_BP_106_18991_20120208_210053_outLine +BABEL_BP_106_20213_20120417_130013_inLine +BABEL_BP_106_20213_20120417_130013_outLine +BABEL_BP_106_20307_20120409_012136_inLine +BABEL_BP_106_20307_20120409_012136_outLine +BABEL_BP_106_20685_20120323_031815_inLine +BABEL_BP_106_20685_20120323_031815_outLine +BABEL_BP_106_22401_20120321_012046_inLine +BABEL_BP_106_22401_20120321_012046_outLine +BABEL_BP_106_22401_20120321_013515_inLine +BABEL_BP_106_22401_20120321_013515_outLine +BABEL_BP_106_22566_20120318_130741_inLine +BABEL_BP_106_22566_20120318_130741_outLine +BABEL_BP_106_27645_20120309_195238_inLine +BABEL_BP_106_27645_20120309_195238_outLine +BABEL_BP_106_32132_20120604_141124_inLine +BABEL_BP_106_34732_20120504_011240_inLine +BABEL_BP_106_34732_20120504_011240_outLine +BABEL_BP_106_41471_20120227_013419_inLine +BABEL_BP_106_41471_20120227_013419_outLine +BABEL_BP_106_41471_20120227_015846_inLine +BABEL_BP_106_41471_20120227_015846_outLine +BABEL_BP_106_48281_20120208_172243_inLine +BABEL_BP_106_48281_20120208_172243_outLine +BABEL_BP_106_48645_20120304_124310_inLine +BABEL_BP_106_48645_20120304_124310_outLine +BABEL_BP_106_53278_20120304_182746_inLine +BABEL_BP_106_53278_20120304_182746_outLine +BABEL_BP_106_54285_20120304_170422_inLine +BABEL_BP_106_54285_20120304_170422_outLine +BABEL_BP_106_54339_20120220_233532_inLine +BABEL_BP_106_54339_20120220_233532_outLine +BABEL_BP_106_54339_20120220_235208_inLine +BABEL_BP_106_54339_20120220_235208_outLine +BABEL_BP_106_63116_20120301_233405_inLine +BABEL_BP_106_63116_20120301_233405_outLine +BABEL_BP_106_72647_20120314_140705_inLine +BABEL_BP_106_72647_20120314_140705_outLine +BABEL_BP_106_73050_20120229_190728_inLine +BABEL_BP_106_73050_20120229_190728_outLine +BABEL_BP_106_73050_20120229_192106_inLine +BABEL_BP_106_73050_20120229_192106_outLine +BABEL_BP_106_73122_20120131_151743_inLine +BABEL_BP_106_73122_20120131_151743_outLine +BABEL_BP_106_73205_20120131_011807_inLine +BABEL_BP_106_73205_20120131_011807_outLine +BABEL_BP_106_74940_20120324_000134_inLine +BABEL_BP_106_74940_20120324_000134_outLine +BABEL_BP_106_82007_20120511_234807_inLine +BABEL_BP_106_82007_20120511_234807_outLine +BABEL_BP_106_85719_20120315_175358_inLine +BABEL_BP_106_85719_20120315_175358_outLine +BABEL_BP_106_86998_20120316_235214_inLine +BABEL_BP_106_86998_20120316_235214_outLine +BABEL_BP_106_90046_20120316_225047_inLine +BABEL_BP_106_90046_20120316_225047_outLine +BABEL_BP_106_90559_20120404_191014_inLine +BABEL_BP_106_90559_20120404_191014_outLine +BABEL_BP_106_95637_20120210_215628_inLine +BABEL_BP_106_95637_20120210_215628_outLine +BABEL_BP_106_97052_20120315_205207_inLine +BABEL_BP_106_97052_20120315_205207_outLine +BABEL_BP_106_97941_20120228_153714_inLine +BABEL_BP_106_97941_20120228_155826_inLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/train.FullLP.list b/egs/babel/s5d/conf/lists/106-tagalog/train.FullLP.list new file mode 100644 index 00000000000..daa7243e0f2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/train.FullLP.list @@ -0,0 +1,1138 @@ +BABEL_BP_106_00300_20120415_005214_inLine +BABEL_BP_106_00315_20120419_231124_inLine +BABEL_BP_106_03420_20120409_204941_inLine +BABEL_BP_106_03420_20120409_204941_outLine +BABEL_BP_106_03420_20120409_211810_inLine +BABEL_BP_106_03420_20120409_211811_outLine +BABEL_BP_106_03695_20120401_185127_inLine +BABEL_BP_106_03695_20120401_190556_inLine +BABEL_BP_106_04577_20120409_220039_inLine +BABEL_BP_106_04577_20120409_220039_outLine +BABEL_BP_106_05510_20120505_014918_inLine +BABEL_BP_106_07199_20120407_224853_inLine +BABEL_BP_106_07199_20120407_224853_outLine +BABEL_BP_106_07924_20120414_191906_inLine +BABEL_BP_106_09087_20120304_155326_outLine +BABEL_BP_106_09087_20120304_161115_outLine +BABEL_BP_106_10160_20120322_024644_inLine +BABEL_BP_106_10160_20120322_024644_outLine +BABEL_BP_106_10271_20120307_153101_inLine +BABEL_BP_106_10271_20120307_153101_outLine +BABEL_BP_106_10470_20120229_011606_inLine +BABEL_BP_106_10470_20120229_011606_outLine +BABEL_BP_106_10545_20120315_185249_inLine +BABEL_BP_106_10545_20120315_185249_outLine +BABEL_BP_106_10643_20120407_222930_inLine +BABEL_BP_106_10643_20120407_222930_outLine +BABEL_BP_106_10732_20120604_111534_inLine +BABEL_BP_106_10732_20120604_113159_inLine +BABEL_BP_106_10985_20120313_013835_inLine +BABEL_BP_106_10985_20120313_013835_outLine +BABEL_BP_106_11004_20120603_171542_inLine +BABEL_BP_106_11152_20120421_140313_inLine +BABEL_BP_106_11158_20120314_183907_inLine +BABEL_BP_106_11158_20120314_183907_outLine +BABEL_BP_106_11158_20120314_193006_inLine +BABEL_BP_106_11158_20120314_193006_outLine +BABEL_BP_106_11197_20120327_225746_inLine +BABEL_BP_106_11197_20120327_225746_outLine +BABEL_BP_106_11197_20120327_231450_inLine +BABEL_BP_106_11197_20120327_231450_outLine +BABEL_BP_106_11208_20120409_211504_inLine +BABEL_BP_106_11233_20120407_231020_inLine +BABEL_BP_106_11233_20120407_231020_outLine +BABEL_BP_106_11366_20120323_024622_inLine +BABEL_BP_106_11366_20120323_024622_outLine +BABEL_BP_106_11366_20120323_025914_inLine +BABEL_BP_106_11366_20120323_025914_outLine +BABEL_BP_106_11479_20120202_183704_inLine +BABEL_BP_106_11603_20120331_150248_inLine +BABEL_BP_106_11603_20120331_150248_outLine +BABEL_BP_106_11603_20120331_151525_inLine +BABEL_BP_106_11603_20120331_151525_outLine +BABEL_BP_106_11627_20120210_040828_inLine +BABEL_BP_106_11650_20120315_191912_outLine +BABEL_BP_106_11650_20120315_215538_outLine +BABEL_BP_106_11982_20120219_202255_inLine +BABEL_BP_106_11982_20120219_202255_outLine +BABEL_BP_106_12003_20120205_192229_inLine +BABEL_BP_106_12003_20120205_192229_outLine +BABEL_BP_106_12120_20120318_023316_inLine +BABEL_BP_106_12120_20120318_024105_inLine +BABEL_BP_106_12120_20120318_024557_inLine +BABEL_BP_106_12120_20120318_025233_inLine +BABEL_BP_106_12248_20120304_225237_inLine +BABEL_BP_106_12486_20120302_130425_inLine +BABEL_BP_106_12486_20120302_130425_outLine +BABEL_BP_106_12535_20120228_130707_inLine +BABEL_BP_106_12535_20120228_131530_inLine +BABEL_BP_106_12535_20120228_135537_inLine +BABEL_BP_106_12643_20120315_235155_inLine +BABEL_BP_106_12643_20120315_235155_outLine +BABEL_BP_106_12667_20120308_204253_inLine +BABEL_BP_106_12667_20120308_204253_outLine +BABEL_BP_106_12807_20120312_175004_inLine +BABEL_BP_106_12807_20120312_175004_outLine +BABEL_BP_106_12963_20120309_184450_inLine +BABEL_BP_106_12963_20120309_184450_outLine +BABEL_BP_106_12979_20120308_200109_inLine +BABEL_BP_106_13065_20120422_032208_inLine +BABEL_BP_106_13065_20120422_035054_inLine +BABEL_BP_106_13071_20120315_000734_inLine +BABEL_BP_106_13071_20120315_000734_outLine +BABEL_BP_106_13071_20120315_001539_inLine +BABEL_BP_106_13071_20120315_001539_outLine +BABEL_BP_106_13341_20120601_211500_inLine +BABEL_BP_106_13441_20120226_235451_inLine +BABEL_BP_106_13441_20120226_235451_outLine +BABEL_BP_106_13476_20120307_215216_inLine +BABEL_BP_106_13476_20120307_215216_outLine +BABEL_BP_106_13530_20120404_122619_inLine +BABEL_BP_106_13530_20120404_123636_inLine +BABEL_BP_106_13709_20120501_184324_inLine +BABEL_BP_106_13795_20120213_233957_inLine +BABEL_BP_106_13795_20120213_233957_outLine +BABEL_BP_106_14059_20120323_040739_inLine +BABEL_BP_106_14059_20120323_040739_outLine +BABEL_BP_106_14524_20120416_134207_inLine +BABEL_BP_106_14524_20120419_235605_inLine +BABEL_BP_106_14591_20120511_002610_inLine +BABEL_BP_106_14770_20120323_025454_inLine +BABEL_BP_106_14770_20120323_025454_outLine +BABEL_BP_106_14836_20120221_185410_inLine +BABEL_BP_106_14836_20120221_185410_outLine +BABEL_BP_106_14840_20120419_212050_inLine +BABEL_BP_106_14936_20120201_174445_inLine +BABEL_BP_106_14936_20120201_174445_outLine +BABEL_BP_106_15234_20120229_012024_inLine +BABEL_BP_106_15234_20120229_012024_outLine +BABEL_BP_106_15353_20120229_125558_inLine +BABEL_BP_106_15353_20120229_125558_outLine +BABEL_BP_106_15859_20120229_175309_inLine +BABEL_BP_106_15859_20120229_175309_outLine +BABEL_BP_106_15940_20120229_001305_inLine +BABEL_BP_106_15940_20120229_001305_outLine +BABEL_BP_106_15966_20120414_160956_inLine +BABEL_BP_106_16117_20120315_004358_inLine +BABEL_BP_106_16117_20120315_004358_outLine +BABEL_BP_106_16185_20120314_174822_outLine +BABEL_BP_106_16307_20120408_002125_inLine +BABEL_BP_106_16307_20120408_002125_outLine +BABEL_BP_106_16385_20120212_202256_inLine +BABEL_BP_106_16385_20120212_202256_outLine +BABEL_BP_106_16406_20120309_161540_inLine +BABEL_BP_106_16783_20120601_214201_inLine +BABEL_BP_106_16984_20120226_022713_inLine +BABEL_BP_106_17013_20120227_184346_inLine +BABEL_BP_106_17013_20120227_184346_outLine +BABEL_BP_106_17093_20120217_180258_outLine +BABEL_BP_106_17203_20120129_171949_inLine +BABEL_BP_106_17353_20120314_160721_inLine +BABEL_BP_106_17353_20120314_163054_inLine +BABEL_BP_106_17452_20120408_203139_inLine +BABEL_BP_106_17452_20120408_204534_inLine +BABEL_BP_106_17452_20120408_205342_inLine +BABEL_BP_106_17511_20120301_194447_inLine +BABEL_BP_106_17511_20120301_194447_outLine +BABEL_BP_106_17606_20120225_235727_inLine +BABEL_BP_106_17606_20120225_235727_outLine +BABEL_BP_106_17850_20120224_223940_inLine +BABEL_BP_106_17850_20120224_223940_outLine +BABEL_BP_106_18209_20120304_004340_inLine +BABEL_BP_106_18209_20120304_004340_outLine +BABEL_BP_106_18701_20120302_161857_inLine +BABEL_BP_106_18701_20120302_161857_outLine +BABEL_BP_106_18802_20120318_014432_inLine +BABEL_BP_106_18802_20120318_014432_outLine +BABEL_BP_106_18903_20120317_121505_inLine +BABEL_BP_106_19063_20120415_183305_inLine +BABEL_BP_106_19248_20120307_192705_inLine +BABEL_BP_106_19248_20120307_192705_outLine +BABEL_BP_106_19290_20120605_180800_inLine +BABEL_BP_106_19479_20120501_154630_inLine +BABEL_BP_106_19479_20120501_155913_inLine +BABEL_BP_106_19479_20120501_165350_inLine +BABEL_BP_106_19619_20120219_023026_inLine +BABEL_BP_106_19619_20120219_023026_outLine +BABEL_BP_106_19656_20120227_201656_inLine +BABEL_BP_106_19656_20120227_201656_outLine +BABEL_BP_106_19861_20120308_181811_inLine +BABEL_BP_106_19861_20120308_181811_outLine +BABEL_BP_106_19867_20120428_022912_inLine +BABEL_BP_106_19915_20120129_043730_inLine +BABEL_BP_106_19915_20120129_043730_outLine +BABEL_BP_106_20320_20120206_212251_inLine +BABEL_BP_106_20320_20120206_212251_outLine +BABEL_BP_106_20591_20120225_172142_inLine +BABEL_BP_106_20680_20120314_195655_inLine +BABEL_BP_106_20680_20120314_233935_inLine +BABEL_BP_106_20680_20120314_233935_outLine +BABEL_BP_106_20740_20120229_234935_inLine +BABEL_BP_106_20741_20120604_131021_inLine +BABEL_BP_106_20775_20120309_184437_inLine +BABEL_BP_106_20775_20120309_184437_outLine +BABEL_BP_106_20985_20120314_184025_inLine +BABEL_BP_106_20985_20120314_184025_outLine +BABEL_BP_106_21050_20120317_181509_outLine +BABEL_BP_106_21258_20120205_012953_outLine +BABEL_BP_106_21259_20120331_174446_inLine +BABEL_BP_106_21259_20120331_174446_outLine +BABEL_BP_106_21259_20120331_184534_inLine +BABEL_BP_106_21259_20120331_184534_outLine +BABEL_BP_106_21259_20120331_225507_inLine +BABEL_BP_106_21259_20120331_225507_outLine +BABEL_BP_106_21306_20120417_233743_inLine +BABEL_BP_106_21367_20120317_185340_inLine +BABEL_BP_106_21367_20120317_185340_outLine +BABEL_BP_106_21430_20120207_184620_inLine +BABEL_BP_106_21430_20120207_184620_outLine +BABEL_BP_106_21518_20120225_224701_inLine +BABEL_BP_106_21518_20120225_224701_outLine +BABEL_BP_106_21556_20120313_021608_inLine +BABEL_BP_106_21556_20120313_021608_outLine +BABEL_BP_106_21714_20120318_174632_inLine +BABEL_BP_106_21714_20120318_174632_outLine +BABEL_BP_106_21845_20120310_002143_inLine +BABEL_BP_106_21845_20120310_002143_outLine +BABEL_BP_106_22034_20120317_021754_inLine +BABEL_BP_106_22034_20120317_021754_outLine +BABEL_BP_106_22272_20120318_201647_inLine +BABEL_BP_106_22272_20120318_201647_outLine +BABEL_BP_106_22408_20120213_221623_inLine +BABEL_BP_106_22408_20120213_221623_outLine +BABEL_BP_106_22696_20120308_195105_inLine +BABEL_BP_106_22696_20120308_195105_outLine +BABEL_BP_106_22903_20120224_164344_inLine +BABEL_BP_106_22903_20120224_164344_outLine +BABEL_BP_106_22910_20120129_213616_inLine +BABEL_BP_106_22910_20120129_213616_outLine +BABEL_BP_106_22973_20120311_224022_inLine +BABEL_BP_106_22973_20120311_224022_outLine +BABEL_BP_106_23167_20120128_183627_inLine +BABEL_BP_106_23167_20120128_183627_outLine +BABEL_BP_106_23571_20120229_180344_inLine +BABEL_BP_106_23571_20120229_180344_outLine +BABEL_BP_106_23629_20120304_212835_inLine +BABEL_BP_106_23629_20120304_212835_outLine +BABEL_BP_106_23878_20120209_170350_inLine +BABEL_BP_106_23878_20120209_170350_outLine +BABEL_BP_106_23995_20120225_011657_inLine +BABEL_BP_106_23995_20120225_011657_outLine +BABEL_BP_106_24084_20120318_015502_inLine +BABEL_BP_106_24124_20120415_182317_inLine +BABEL_BP_106_24335_20120408_005503_inLine +BABEL_BP_106_24335_20120408_012607_inLine +BABEL_BP_106_24335_20120408_012607_outLine +BABEL_BP_106_24441_20120417_211954_inLine +BABEL_BP_106_24569_20120307_232752_inLine +BABEL_BP_106_24569_20120307_232752_outLine +BABEL_BP_106_24580_20120604_165125_inLine +BABEL_BP_106_24638_20120419_013630_inLine +BABEL_BP_106_24661_20120322_221220_inLine +BABEL_BP_106_24661_20120322_221220_outLine +BABEL_BP_106_24817_20120301_031015_inLine +BABEL_BP_106_24817_20120301_031015_outLine +BABEL_BP_106_25279_20120401_195557_inLine +BABEL_BP_106_25479_20120315_154117_inLine +BABEL_BP_106_25479_20120315_154117_outLine +BABEL_BP_106_25479_20120315_160418_inLine +BABEL_BP_106_25479_20120315_160418_outLine +BABEL_BP_106_25502_20120129_015831_inLine +BABEL_BP_106_25502_20120129_015831_outLine +BABEL_BP_106_25735_20120314_233234_inLine +BABEL_BP_106_25735_20120314_233234_outLine +BABEL_BP_106_25751_20120227_221828_inLine +BABEL_BP_106_25751_20120227_221828_outLine +BABEL_BP_106_25866_20120304_181012_inLine +BABEL_BP_106_25866_20120304_181012_outLine +BABEL_BP_106_25871_20120228_005211_inLine +BABEL_BP_106_25871_20120228_005957_inLine +BABEL_BP_106_25871_20120228_012444_inLine +BABEL_BP_106_25904_20120213_182237_inLine +BABEL_BP_106_25904_20120213_182237_outLine +BABEL_BP_106_26164_20120401_201225_inLine +BABEL_BP_106_26164_20120401_201225_outLine +BABEL_BP_106_26164_20120401_202221_inLine +BABEL_BP_106_26164_20120401_202221_outLine +BABEL_BP_106_26348_20120314_173141_outLine +BABEL_BP_106_26598_20120415_181527_inLine +BABEL_BP_106_26644_20120411_154709_inLine +BABEL_BP_106_26684_20120211_170412_inLine +BABEL_BP_106_26684_20120211_170412_outLine +BABEL_BP_106_26786_20120306_151101_inLine +BABEL_BP_106_26786_20120306_151101_outLine +BABEL_BP_106_26901_20120212_192301_inLine +BABEL_BP_106_26901_20120212_192301_outLine +BABEL_BP_106_26901_20120212_193813_inLine +BABEL_BP_106_26901_20120212_193813_outLine +BABEL_BP_106_27363_20120315_165356_inLine +BABEL_BP_106_27890_20120302_171119_inLine +BABEL_BP_106_27890_20120302_171119_outLine +BABEL_BP_106_27916_20120403_232720_inLine +BABEL_BP_106_27916_20120403_232720_outLine +BABEL_BP_106_27916_20120403_233612_inLine +BABEL_BP_106_27916_20120403_233612_outLine +BABEL_BP_106_28683_20120331_165731_inLine +BABEL_BP_106_28754_20120205_171932_inLine +BABEL_BP_106_28754_20120205_171932_outLine +BABEL_BP_106_28754_20120205_174934_inLine +BABEL_BP_106_28754_20120205_174934_outLine +BABEL_BP_106_29087_20120315_125218_outLine +BABEL_BP_106_29087_20120315_130643_outLine +BABEL_BP_106_29097_20120127_001938_inLine +BABEL_BP_106_29097_20120127_001938_outLine +BABEL_BP_106_29133_20120129_171742_outLine +BABEL_BP_106_29290_20120212_151530_inLine +BABEL_BP_106_29290_20120212_151530_outLine +BABEL_BP_106_29328_20120212_210507_outLine +BABEL_BP_106_29407_20120403_225249_inLine +BABEL_BP_106_29421_20120213_182542_inLine +BABEL_BP_106_29421_20120213_182542_outLine +BABEL_BP_106_29512_20120226_190947_inLine +BABEL_BP_106_29512_20120226_190947_outLine +BABEL_BP_106_29545_20120331_153345_outLine +BABEL_BP_106_29589_20120225_144930_inLine +BABEL_BP_106_29589_20120225_144930_outLine +BABEL_BP_106_29988_20120301_225306_inLine +BABEL_BP_106_29988_20120301_234957_inLine +BABEL_BP_106_30418_20120401_162421_inLine +BABEL_BP_106_30418_20120401_162421_outLine +BABEL_BP_106_30583_20120129_163331_inLine +BABEL_BP_106_30583_20120129_163331_outLine +BABEL_BP_106_30642_20120302_150419_inLine +BABEL_BP_106_30642_20120302_150419_outLine +BABEL_BP_106_30818_20120503_004014_inLine +BABEL_BP_106_31031_20120215_010958_inLine +BABEL_BP_106_31031_20120215_010958_outLine +BABEL_BP_106_31256_20120317_140651_inLine +BABEL_BP_106_31256_20120317_140651_outLine +BABEL_BP_106_31265_20120311_235253_inLine +BABEL_BP_106_31265_20120311_235253_outLine +BABEL_BP_106_31328_20120212_180708_inLine +BABEL_BP_106_31328_20120212_180708_outLine +BABEL_BP_106_31606_20120403_225528_inLine +BABEL_BP_106_31783_20120331_154149_inLine +BABEL_BP_106_31783_20120331_163639_inLine +BABEL_BP_106_31975_20120309_181134_inLine +BABEL_BP_106_31975_20120309_181134_outLine +BABEL_BP_106_32263_20120225_201234_inLine +BABEL_BP_106_32263_20120225_203654_inLine +BABEL_BP_106_32334_20120304_193216_outLine +BABEL_BP_106_32400_20120307_235432_inLine +BABEL_BP_106_32400_20120307_235432_outLine +BABEL_BP_106_32562_20120307_193633_inLine +BABEL_BP_106_32562_20120307_193633_outLine +BABEL_BP_106_32710_20120418_235030_inLine +BABEL_BP_106_32887_20120327_221120_inLine +BABEL_BP_106_32887_20120327_222408_inLine +BABEL_BP_106_32890_20120221_193416_inLine +BABEL_BP_106_32890_20120221_193417_outLine +BABEL_BP_106_33023_20120203_000619_inLine +BABEL_BP_106_33023_20120203_000619_outLine +BABEL_BP_106_33192_20120516_170543_inLine +BABEL_BP_106_33192_20120516_172023_inLine +BABEL_BP_106_33540_20120221_204916_outLine +BABEL_BP_106_33540_20120221_210930_outLine +BABEL_BP_106_33671_20120206_215709_outLine +BABEL_BP_106_33707_20120403_172641_inLine +BABEL_BP_106_33742_20120229_020923_inLine +BABEL_BP_106_33742_20120229_020923_outLine +BABEL_BP_106_33817_20120301_165159_inLine +BABEL_BP_106_33817_20120301_165159_outLine +BABEL_BP_106_33969_20120310_001559_inLine +BABEL_BP_106_33969_20120310_001559_outLine +BABEL_BP_106_34328_20120225_012732_inLine +BABEL_BP_106_34328_20120225_012732_outLine +BABEL_BP_106_34439_20120301_190320_inLine +BABEL_BP_106_34439_20120301_190320_outLine +BABEL_BP_106_34480_20120405_141959_inLine +BABEL_BP_106_34498_20120314_171141_inLine +BABEL_BP_106_34498_20120314_171141_outLine +BABEL_BP_106_34498_20120314_172341_inLine +BABEL_BP_106_34498_20120314_172341_outLine +BABEL_BP_106_34857_20120301_183238_inLine +BABEL_BP_106_34857_20120301_183238_outLine +BABEL_BP_106_34859_20120328_231638_inLine +BABEL_BP_106_34859_20120328_231638_outLine +BABEL_BP_106_34859_20120328_233134_inLine +BABEL_BP_106_34859_20120328_233134_outLine +BABEL_BP_106_34894_20120328_014528_inLine +BABEL_BP_106_34894_20120328_014528_outLine +BABEL_BP_106_34961_20120130_011357_inLine +BABEL_BP_106_34961_20120130_011357_outLine +BABEL_BP_106_35016_20120405_195810_outLine +BABEL_BP_106_35153_20120502_162803_inLine +BABEL_BP_106_35153_20120502_170536_inLine +BABEL_BP_106_35179_20120225_063734_inLine +BABEL_BP_106_35179_20120225_063734_outLine +BABEL_BP_106_35188_20120315_154007_inLine +BABEL_BP_106_35188_20120315_154007_outLine +BABEL_BP_106_35305_20120308_195828_inLine +BABEL_BP_106_35305_20120308_195828_outLine +BABEL_BP_106_35318_20120130_203231_inLine +BABEL_BP_106_35318_20120130_203231_outLine +BABEL_BP_106_35329_20120302_140638_inLine +BABEL_BP_106_35329_20120302_140638_outLine +BABEL_BP_106_35441_20120414_194638_inLine +BABEL_BP_106_35470_20120307_190826_inLine +BABEL_BP_106_35470_20120307_190826_outLine +BABEL_BP_106_35576_20120224_211651_inLine +BABEL_BP_106_35576_20120224_211651_outLine +BABEL_BP_106_35612_20120303_000710_inLine +BABEL_BP_106_35612_20120303_000710_outLine +BABEL_BP_106_35706_20120501_011424_inLine +BABEL_BP_106_35951_20120419_001936_inLine +BABEL_BP_106_35972_20120411_154338_inLine +BABEL_BP_106_35972_20120411_155457_inLine +BABEL_BP_106_36143_20120128_230220_outLine +BABEL_BP_106_36268_20120209_180615_inLine +BABEL_BP_106_36268_20120209_180615_outLine +BABEL_BP_106_36276_20120317_130620_inLine +BABEL_BP_106_36276_20120317_134742_inLine +BABEL_BP_106_36276_20120317_134742_outLine +BABEL_BP_106_36383_20120225_021045_inLine +BABEL_BP_106_36383_20120225_021045_outLine +BABEL_BP_106_36391_20120205_201108_inLine +BABEL_BP_106_36391_20120205_201108_outLine +BABEL_BP_106_36868_20120417_204120_inLine +BABEL_BP_106_36868_20120417_210037_inLine +BABEL_BP_106_37064_20120324_130301_inLine +BABEL_BP_106_37064_20120324_130301_outLine +BABEL_BP_106_37258_20120304_002638_inLine +BABEL_BP_106_37258_20120304_002638_outLine +BABEL_BP_106_37260_20120130_191541_inLine +BABEL_BP_106_37260_20120130_191541_outLine +BABEL_BP_106_37766_20120229_163334_inLine +BABEL_BP_106_37766_20120229_163334_outLine +BABEL_BP_106_38175_20120209_214322_inLine +BABEL_BP_106_38175_20120209_214322_outLine +BABEL_BP_106_38248_20120404_214148_inLine +BABEL_BP_106_38248_20120404_222004_inLine +BABEL_BP_106_38248_20120404_222004_outLine +BABEL_BP_106_38248_20120404_223317_inLine +BABEL_BP_106_38248_20120404_223317_outLine +BABEL_BP_106_38396_20120323_023143_inLine +BABEL_BP_106_38396_20120323_023143_outLine +BABEL_BP_106_38464_20120318_215505_inLine +BABEL_BP_106_38464_20120318_215505_outLine +BABEL_BP_106_38464_20120318_220931_inLine +BABEL_BP_106_38464_20120318_220931_outLine +BABEL_BP_106_38635_20120605_171532_inLine +BABEL_BP_106_38640_20120130_174518_inLine +BABEL_BP_106_38640_20120130_174518_outLine +BABEL_BP_106_38656_20120321_230900_inLine +BABEL_BP_106_38656_20120321_232832_inLine +BABEL_BP_106_38879_20120203_203542_inLine +BABEL_BP_106_38912_20120307_023807_outLine +BABEL_BP_106_38956_20120127_010500_inLine +BABEL_BP_106_38956_20120127_010500_outLine +BABEL_BP_106_39080_20120225_180230_inLine +BABEL_BP_106_39080_20120225_180230_outLine +BABEL_BP_106_39114_20120315_131924_inLine +BABEL_BP_106_39114_20120315_135035_inLine +BABEL_BP_106_39114_20120315_142026_inLine +BABEL_BP_106_39179_20120331_134039_outLine +BABEL_BP_106_39179_20120331_134617_outLine +BABEL_BP_106_39264_20120228_015102_inLine +BABEL_BP_106_39264_20120228_015102_outLine +BABEL_BP_106_39264_20120228_022421_inLine +BABEL_BP_106_39264_20120228_022421_outLine +BABEL_BP_106_39563_20120414_162942_inLine +BABEL_BP_106_39756_20120312_165815_inLine +BABEL_BP_106_40002_20120301_225806_inLine +BABEL_BP_106_40197_20120308_211406_inLine +BABEL_BP_106_40211_20120329_005438_inLine +BABEL_BP_106_40211_20120329_005439_outLine +BABEL_BP_106_40288_20120516_161057_inLine +BABEL_BP_106_40439_20120405_122042_inLine +BABEL_BP_106_40510_20120221_155613_inLine +BABEL_BP_106_40510_20120221_155613_outLine +BABEL_BP_106_40680_20120511_153305_inLine +BABEL_BP_106_40882_20120418_205714_inLine +BABEL_BP_106_41327_20120128_163042_inLine +BABEL_BP_106_41327_20120128_163042_outLine +BABEL_BP_106_41541_20120315_003903_inLine +BABEL_BP_106_41541_20120315_003903_outLine +BABEL_BP_106_41557_20120324_040736_inLine +BABEL_BP_106_41557_20120324_040736_outLine +BABEL_BP_106_41557_20120324_043210_inLine +BABEL_BP_106_41557_20120324_043210_outLine +BABEL_BP_106_41710_20120410_205005_outLine +BABEL_BP_106_41733_20120307_171130_inLine +BABEL_BP_106_41816_20120415_184339_inLine +BABEL_BP_106_41949_20120213_174300_inLine +BABEL_BP_106_41949_20120213_174300_outLine +BABEL_BP_106_42183_20120323_223118_inLine +BABEL_BP_106_42183_20120327_190153_inLine +BABEL_BP_106_42651_20120131_020401_inLine +BABEL_BP_106_42651_20120131_020401_outLine +BABEL_BP_106_42768_20120411_173257_inLine +BABEL_BP_106_42820_20120415_180402_inLine +BABEL_BP_106_42910_20120128_213020_inLine +BABEL_BP_106_42910_20120128_213020_outLine +BABEL_BP_106_43069_20120409_204043_inLine +BABEL_BP_106_43069_20120409_204043_outLine +BABEL_BP_106_43306_20120210_032400_inLine +BABEL_BP_106_43306_20120210_032400_outLine +BABEL_BP_106_43425_20120317_174519_inLine +BABEL_BP_106_43425_20120317_175422_inLine +BABEL_BP_106_43425_20120317_183658_inLine +BABEL_BP_106_43652_20120208_010946_inLine +BABEL_BP_106_43652_20120208_010946_outLine +BABEL_BP_106_43939_20120317_194330_inLine +BABEL_BP_106_43939_20120317_194330_outLine +BABEL_BP_106_44038_20120317_204039_inLine +BABEL_BP_106_44038_20120317_205302_inLine +BABEL_BP_106_44052_20120327_234511_inLine +BABEL_BP_106_44052_20120327_234511_outLine +BABEL_BP_106_44052_20120330_222904_inLine +BABEL_BP_106_44052_20120330_222904_outLine +BABEL_BP_106_44369_20120318_231951_inLine +BABEL_BP_106_44369_20120319_020556_inLine +BABEL_BP_106_44756_20120301_235107_inLine +BABEL_BP_106_44756_20120301_235107_outLine +BABEL_BP_106_45106_20120118_001529_inLine +BABEL_BP_106_45106_20120118_001529_outLine +BABEL_BP_106_45145_20120219_143857_inLine +BABEL_BP_106_45361_20120228_002747_inLine +BABEL_BP_106_45361_20120228_002747_outLine +BABEL_BP_106_45453_20120404_225631_inLine +BABEL_BP_106_45453_20120404_225631_outLine +BABEL_BP_106_45511_20120129_010308_inLine +BABEL_BP_106_45511_20120129_010308_outLine +BABEL_BP_106_45642_20120203_042123_inLine +BABEL_BP_106_45642_20120203_042123_outLine +BABEL_BP_106_45677_20120315_012905_inLine +BABEL_BP_106_45677_20120315_013919_inLine +BABEL_BP_106_45681_20120306_210519_inLine +BABEL_BP_106_45702_20120226_175928_inLine +BABEL_BP_106_45702_20120226_175928_outLine +BABEL_BP_106_45793_20120127_170707_inLine +BABEL_BP_106_45793_20120127_170707_outLine +BABEL_BP_106_46427_20120303_200620_outLine +BABEL_BP_106_46435_20120317_184057_outLine +BABEL_BP_106_46603_20120227_192836_inLine +BABEL_BP_106_46603_20120227_192836_outLine +BABEL_BP_106_46744_20120324_002344_inLine +BABEL_BP_106_46744_20120324_002344_outLine +BABEL_BP_106_46813_20120416_015932_inLine +BABEL_BP_106_47263_20120305_023242_inLine +BABEL_BP_106_47429_20120329_195737_inLine +BABEL_BP_106_47429_20120329_195737_outLine +BABEL_BP_106_47469_20120210_221258_inLine +BABEL_BP_106_47469_20120210_221258_outLine +BABEL_BP_106_47661_20120131_002939_outLine +BABEL_BP_106_47794_20120403_181127_inLine +BABEL_BP_106_47794_20120403_182418_inLine +BABEL_BP_106_47821_20120228_011928_inLine +BABEL_BP_106_47823_20120302_214046_outLine +BABEL_BP_106_47906_20120418_223527_inLine +BABEL_BP_106_47906_20120418_225920_inLine +BABEL_BP_106_48059_20120317_161513_inLine +BABEL_BP_106_48059_20120317_161513_outLine +BABEL_BP_106_48061_20120303_234335_inLine +BABEL_BP_106_48181_20120211_011159_inLine +BABEL_BP_106_48181_20120211_011159_outLine +BABEL_BP_106_48188_20120307_034039_outLine +BABEL_BP_106_48317_20120301_002256_inLine +BABEL_BP_106_48317_20120301_002256_outLine +BABEL_BP_106_48418_20120407_165729_inLine +BABEL_BP_106_48536_20120129_053527_inLine +BABEL_BP_106_48683_20120505_022553_inLine +BABEL_BP_106_49239_20120317_123831_inLine +BABEL_BP_106_49309_20120330_230450_inLine +BABEL_BP_106_49346_20120405_185601_inLine +BABEL_BP_106_49381_20120414_193653_inLine +BABEL_BP_106_49582_20120213_230049_inLine +BABEL_BP_106_49582_20120213_230049_outLine +BABEL_BP_106_49624_20120224_194049_inLine +BABEL_BP_106_49624_20120224_194049_outLine +BABEL_BP_106_49689_20120225_153748_outLine +BABEL_BP_106_49714_20120227_191755_inLine +BABEL_BP_106_49714_20120227_191755_outLine +BABEL_BP_106_50141_20120309_225945_inLine +BABEL_BP_106_50141_20120309_225945_outLine +BABEL_BP_106_50298_20120227_005517_inLine +BABEL_BP_106_50298_20120227_005517_outLine +BABEL_BP_106_50387_20120229_175528_inLine +BABEL_BP_106_50387_20120229_175528_outLine +BABEL_BP_106_50409_20120319_185818_inLine +BABEL_BP_106_50409_20120319_185818_outLine +BABEL_BP_106_50410_20120229_183217_inLine +BABEL_BP_106_50410_20120229_183217_outLine +BABEL_BP_106_50468_20120417_231448_inLine +BABEL_BP_106_50476_20120304_171701_inLine +BABEL_BP_106_50476_20120304_171701_outLine +BABEL_BP_106_50555_20120605_134945_inLine +BABEL_BP_106_50589_20120128_192230_inLine +BABEL_BP_106_50589_20120128_192230_outLine +BABEL_BP_106_50641_20120317_180902_inLine +BABEL_BP_106_50641_20120317_180902_outLine +BABEL_BP_106_50752_20120310_001913_inLine +BABEL_BP_106_51042_20120313_230521_inLine +BABEL_BP_106_51073_20120128_200706_inLine +BABEL_BP_106_51073_20120128_200706_outLine +BABEL_BP_106_51149_20120329_174521_inLine +BABEL_BP_106_51149_20120329_174521_outLine +BABEL_BP_106_51448_20120413_214526_inLine +BABEL_BP_106_51448_20120413_220517_inLine +BABEL_BP_106_51727_20120229_000250_inLine +BABEL_BP_106_51727_20120229_000250_outLine +BABEL_BP_106_52033_20120228_001715_inLine +BABEL_BP_106_52033_20120228_001715_outLine +BABEL_BP_106_52154_20120312_004528_outLine +BABEL_BP_106_52325_20120211_220159_inLine +BABEL_BP_106_52366_20120124_164406_inLine +BABEL_BP_106_52366_20120124_164406_outLine +BABEL_BP_106_52642_20120222_175700_inLine +BABEL_BP_106_52642_20120222_175700_outLine +BABEL_BP_106_52902_20120605_184038_inLine +BABEL_BP_106_53179_20120301_181951_inLine +BABEL_BP_106_53179_20120301_181951_outLine +BABEL_BP_106_53315_20120329_182550_inLine +BABEL_BP_106_53315_20120329_182550_outLine +BABEL_BP_106_53376_20120323_000750_inLine +BABEL_BP_106_53376_20120323_000750_outLine +BABEL_BP_106_53463_20120605_180156_inLine +BABEL_BP_106_53653_20120405_182452_inLine +BABEL_BP_106_53653_20120405_183849_inLine +BABEL_BP_106_53824_20120227_025033_inLine +BABEL_BP_106_53824_20120227_025033_outLine +BABEL_BP_106_54358_20120229_223811_outLine +BABEL_BP_106_54621_20120227_235308_inLine +BABEL_BP_106_54621_20120227_235308_outLine +BABEL_BP_106_54785_20120303_011154_outLine +BABEL_BP_106_55182_20120422_185742_inLine +BABEL_BP_106_55204_20120330_230730_outLine +BABEL_BP_106_55288_20120503_010325_inLine +BABEL_BP_106_55355_20120405_180949_inLine +BABEL_BP_106_55450_20120302_125827_inLine +BABEL_BP_106_55450_20120302_125827_outLine +BABEL_BP_106_55823_20120329_210142_inLine +BABEL_BP_106_55823_20120329_210142_outLine +BABEL_BP_106_55838_20120318_160306_outLine +BABEL_BP_106_55922_20120322_021453_inLine +BABEL_BP_106_55922_20120322_021453_outLine +BABEL_BP_106_55922_20120322_022537_inLine +BABEL_BP_106_55922_20120322_022537_outLine +BABEL_BP_106_55944_20120306_172041_inLine +BABEL_BP_106_55944_20120306_172041_outLine +BABEL_BP_106_56117_20120313_001237_outLine +BABEL_BP_106_56342_20120605_162901_inLine +BABEL_BP_106_56634_20120328_235133_inLine +BABEL_BP_106_56634_20120328_235133_outLine +BABEL_BP_106_56868_20120203_012057_inLine +BABEL_BP_106_56868_20120203_012057_outLine +BABEL_BP_106_56943_20120126_224048_outLine +BABEL_BP_106_57020_20120305_121648_outLine +BABEL_BP_106_57039_20120314_003848_inLine +BABEL_BP_106_57039_20120314_005748_inLine +BABEL_BP_106_57609_20120304_174858_inLine +BABEL_BP_106_57609_20120304_174858_outLine +BABEL_BP_106_57609_20120304_180016_inLine +BABEL_BP_106_57609_20120304_180016_outLine +BABEL_BP_106_57638_20120414_164822_inLine +BABEL_BP_106_58108_20120411_180115_inLine +BABEL_BP_106_58192_20120308_182924_inLine +BABEL_BP_106_58192_20120308_182924_outLine +BABEL_BP_106_58232_20120226_031714_inLine +BABEL_BP_106_58232_20120226_031714_outLine +BABEL_BP_106_58447_20120329_013316_inLine +BABEL_BP_106_58447_20120329_013316_outLine +BABEL_BP_106_58536_20120210_221536_inLine +BABEL_BP_106_58536_20120210_221536_outLine +BABEL_BP_106_58572_20120401_203941_inLine +BABEL_BP_106_58572_20120401_203941_outLine +BABEL_BP_106_58587_20120411_003742_inLine +BABEL_BP_106_58587_20120411_003742_outLine +BABEL_BP_106_58746_20120308_211819_inLine +BABEL_BP_106_58746_20120308_211819_outLine +BABEL_BP_106_58956_20120602_130340_inLine +BABEL_BP_106_59071_20120228_033845_inLine +BABEL_BP_106_59071_20120228_033845_outLine +BABEL_BP_106_59175_20120221_181535_inLine +BABEL_BP_106_59175_20120221_181535_outLine +BABEL_BP_106_59383_20120317_170327_inLine +BABEL_BP_106_59383_20120317_175629_inLine +BABEL_BP_106_59544_20120209_182249_inLine +BABEL_BP_106_59544_20120209_182249_outLine +BABEL_BP_106_59565_20120321_231854_inLine +BABEL_BP_106_59565_20120321_233445_inLine +BABEL_BP_106_59565_20120321_234523_inLine +BABEL_BP_106_59628_20120309_181006_inLine +BABEL_BP_106_59628_20120309_181006_outLine +BABEL_BP_106_59746_20120225_061555_outLine +BABEL_BP_106_59764_20120222_204824_inLine +BABEL_BP_106_59764_20120222_204824_outLine +BABEL_BP_106_59846_20120318_164327_inLine +BABEL_BP_106_59878_20120505_021018_inLine +BABEL_BP_106_59925_20120403_160805_inLine +BABEL_BP_106_60106_20120422_155717_inLine +BABEL_BP_106_60238_20120307_161043_inLine +BABEL_BP_106_60238_20120307_165816_inLine +BABEL_BP_106_60238_20120307_165816_outLine +BABEL_BP_106_60250_20120203_013207_inLine +BABEL_BP_106_60250_20120203_013207_outLine +BABEL_BP_106_60598_20120324_022730_inLine +BABEL_BP_106_60598_20120324_022730_outLine +BABEL_BP_106_60677_20120418_162110_inLine +BABEL_BP_106_60677_20120418_162841_inLine +BABEL_BP_106_60693_20120331_185728_inLine +BABEL_BP_106_60693_20120414_201244_inLine +BABEL_BP_106_60753_20120120_175004_inLine +BABEL_BP_106_60753_20120120_175004_outLine +BABEL_BP_106_60826_20120605_170407_inLine +BABEL_BP_106_61073_20120428_141557_outLine +BABEL_BP_106_61446_20120603_125637_inLine +BABEL_BP_106_61489_20120417_203944_inLine +BABEL_BP_106_61566_20120323_224814_inLine +BABEL_BP_106_61566_20120324_111941_inLine +BABEL_BP_106_61772_20120502_164250_inLine +BABEL_BP_106_61906_20120418_175007_inLine +BABEL_BP_106_62163_20120313_231604_outLine +BABEL_BP_106_62163_20120313_232930_outLine +BABEL_BP_106_62255_20120314_190940_inLine +BABEL_BP_106_62255_20120314_190940_outLine +BABEL_BP_106_62452_20120420_001258_inLine +BABEL_BP_106_62452_20120420_002416_inLine +BABEL_BP_106_62610_20120511_023409_inLine +BABEL_BP_106_62610_20120511_024325_inLine +BABEL_BP_106_63114_20120226_074838_inLine +BABEL_BP_106_63114_20120226_074838_outLine +BABEL_BP_106_63305_20120324_030221_inLine +BABEL_BP_106_63305_20120324_030221_outLine +BABEL_BP_106_63368_20120604_114321_inLine +BABEL_BP_106_63392_20120405_141717_inLine +BABEL_BP_106_63468_20120409_200746_inLine +BABEL_BP_106_63468_20120409_200746_outLine +BABEL_BP_106_63711_20120129_023323_inLine +BABEL_BP_106_63711_20120129_023323_outLine +BABEL_BP_106_63741_20120328_001923_inLine +BABEL_BP_106_64172_20120331_141241_inLine +BABEL_BP_106_64172_20120331_141241_outLine +BABEL_BP_106_64172_20120331_152028_inLine +BABEL_BP_106_64172_20120331_152028_outLine +BABEL_BP_106_64226_20120404_231458_inLine +BABEL_BP_106_64334_20120528_174746_inLine +BABEL_BP_106_64351_20120217_181140_inLine +BABEL_BP_106_64351_20120217_181140_outLine +BABEL_BP_106_64889_20120307_175001_inLine +BABEL_BP_106_64889_20120307_175001_outLine +BABEL_BP_106_65248_20120321_230954_inLine +BABEL_BP_106_65248_20120321_230954_outLine +BABEL_BP_106_65371_20120308_201622_inLine +BABEL_BP_106_65371_20120308_201622_outLine +BABEL_BP_106_65579_20120317_123135_inLine +BABEL_BP_106_65601_20120211_212006_inLine +BABEL_BP_106_65631_20120216_021352_inLine +BABEL_BP_106_65631_20120216_021352_outLine +BABEL_BP_106_65656_20120309_195913_outLine +BABEL_BP_106_65989_20120605_163026_inLine +BABEL_BP_106_66101_20120227_174035_inLine +BABEL_BP_106_66101_20120227_174035_outLine +BABEL_BP_106_66188_20120316_230006_inLine +BABEL_BP_106_66188_20120316_230006_outLine +BABEL_BP_106_66247_20120331_214412_inLine +BABEL_BP_106_66247_20120331_214412_outLine +BABEL_BP_106_66416_20120225_122454_inLine +BABEL_BP_106_66416_20120225_122454_outLine +BABEL_BP_106_66559_20120505_033828_inLine +BABEL_BP_106_66709_20120302_222833_inLine +BABEL_BP_106_66872_20120302_010751_inLine +BABEL_BP_106_66872_20120302_012055_inLine +BABEL_BP_106_66964_20120416_132128_inLine +BABEL_BP_106_67304_20120222_212038_inLine +BABEL_BP_106_67304_20120222_212038_outLine +BABEL_BP_106_67411_20120210_155625_inLine +BABEL_BP_106_67411_20120210_155625_outLine +BABEL_BP_106_67630_20120324_031205_inLine +BABEL_BP_106_67630_20120324_031205_outLine +BABEL_BP_106_67630_20120324_033243_inLine +BABEL_BP_106_67630_20120324_033243_outLine +BABEL_BP_106_67733_20120219_180702_inLine +BABEL_BP_106_67772_20120130_225552_inLine +BABEL_BP_106_67772_20120130_225552_outLine +BABEL_BP_106_68111_20120419_232912_inLine +BABEL_BP_106_68276_20120308_201728_inLine +BABEL_BP_106_68276_20120308_203526_inLine +BABEL_BP_106_68287_20120408_172649_inLine +BABEL_BP_106_68392_20120331_224408_inLine +BABEL_BP_106_68392_20120331_224408_outLine +BABEL_BP_106_68490_20120227_152714_inLine +BABEL_BP_106_68610_20120505_011125_inLine +BABEL_BP_106_68665_20120409_202242_inLine +BABEL_BP_106_68671_20120407_164400_inLine +BABEL_BP_106_68803_20120121_171931_inLine +BABEL_BP_106_68803_20120121_171931_outLine +BABEL_BP_106_69145_20120319_175304_inLine +BABEL_BP_106_69145_20120319_175304_outLine +BABEL_BP_106_69236_20120216_195133_inLine +BABEL_BP_106_69275_20120318_200803_inLine +BABEL_BP_106_69275_20120318_204539_inLine +BABEL_BP_106_69275_20120318_204539_outLine +BABEL_BP_106_69446_20120416_010511_inLine +BABEL_BP_106_69621_20120130_005117_inLine +BABEL_BP_106_69621_20120130_005117_outLine +BABEL_BP_106_70077_20120315_191801_inLine +BABEL_BP_106_70285_20120128_211036_outLine +BABEL_BP_106_70511_20120224_173336_inLine +BABEL_BP_106_70511_20120224_173336_outLine +BABEL_BP_106_70983_20120516_163100_inLine +BABEL_BP_106_71160_20120224_221158_inLine +BABEL_BP_106_71176_20120225_190017_inLine +BABEL_BP_106_71176_20120225_190017_outLine +BABEL_BP_106_71310_20120221_005007_inLine +BABEL_BP_106_71310_20120221_005007_outLine +BABEL_BP_106_71313_20120419_205542_inLine +BABEL_BP_106_71741_20120127_162656_inLine +BABEL_BP_106_72119_20120315_153943_inLine +BABEL_BP_106_72142_20120327_171827_inLine +BABEL_BP_106_72142_20120327_173216_inLine +BABEL_BP_106_72142_20120327_173216_outLine +BABEL_BP_106_72801_20120325_202928_outLine +BABEL_BP_106_72801_20120325_222633_inLine +BABEL_BP_106_72801_20120325_222633_outLine +BABEL_BP_106_72858_20120605_123842_inLine +BABEL_BP_106_73059_20120309_164956_outLine +BABEL_BP_106_73250_20120410_001928_inLine +BABEL_BP_106_73250_20120410_003448_inLine +BABEL_BP_106_73438_20120314_002432_outLine +BABEL_BP_106_73440_20120131_160945_inLine +BABEL_BP_106_73440_20120131_160945_outLine +BABEL_BP_106_73752_20120228_142547_inLine +BABEL_BP_106_73752_20120228_142547_outLine +BABEL_BP_106_73780_20120304_170119_outLine +BABEL_BP_106_73786_20120202_230843_inLine +BABEL_BP_106_73786_20120202_230843_outLine +BABEL_BP_106_73911_20120219_194519_inLine +BABEL_BP_106_73911_20120219_194519_outLine +BABEL_BP_106_74214_20120503_012037_inLine +BABEL_BP_106_74295_20120317_213141_inLine +BABEL_BP_106_74295_20120317_214659_inLine +BABEL_BP_106_74368_20120317_141935_inLine +BABEL_BP_106_74395_20120414_202413_inLine +BABEL_BP_106_74508_20120209_223405_inLine +BABEL_BP_106_74508_20120209_223405_outLine +BABEL_BP_106_74533_20120502_222417_inLine +BABEL_BP_106_74986_20120418_222615_inLine +BABEL_BP_106_75036_20120224_163823_inLine +BABEL_BP_106_75036_20120224_163823_outLine +BABEL_BP_106_75125_20120325_173456_inLine +BABEL_BP_106_75151_20120405_172457_inLine +BABEL_BP_106_75243_20120314_181814_inLine +BABEL_BP_106_75243_20120314_193719_inLine +BABEL_BP_106_75243_20120314_194814_inLine +BABEL_BP_106_75740_20120128_205720_inLine +BABEL_BP_106_75740_20120128_205720_outLine +BABEL_BP_106_75919_20120419_222309_inLine +BABEL_BP_106_75932_20120301_185217_inLine +BABEL_BP_106_75932_20120301_185217_outLine +BABEL_BP_106_76252_20120318_131223_outLine +BABEL_BP_106_76320_20120317_171522_inLine +BABEL_BP_106_76451_20120128_174820_inLine +BABEL_BP_106_76451_20120128_174820_outLine +BABEL_BP_106_76733_20120317_205542_inLine +BABEL_BP_106_76733_20120317_205542_outLine +BABEL_BP_106_76748_20120313_033301_inLine +BABEL_BP_106_76748_20120313_033301_outLine +BABEL_BP_106_76919_20120301_013753_outLine +BABEL_BP_106_76989_20120212_225118_inLine +BABEL_BP_106_76993_20120227_180157_inLine +BABEL_BP_106_76993_20120227_180157_outLine +BABEL_BP_106_77104_20120320_000526_inLine +BABEL_BP_106_77244_20120317_154534_inLine +BABEL_BP_106_77244_20120317_160037_inLine +BABEL_BP_106_77315_20120227_153127_outLine +BABEL_BP_106_77342_20120224_193702_inLine +BABEL_BP_106_77342_20120224_193702_outLine +BABEL_BP_106_77342_20120224_201725_inLine +BABEL_BP_106_77342_20120224_201725_outLine +BABEL_BP_106_77487_20120310_023017_outLine +BABEL_BP_106_77584_20120228_185654_inLine +BABEL_BP_106_77584_20120228_185654_outLine +BABEL_BP_106_78094_20120127_144526_inLine +BABEL_BP_106_78406_20120331_145033_inLine +BABEL_BP_106_78406_20120331_145857_inLine +BABEL_BP_106_78516_20120324_012547_inLine +BABEL_BP_106_78516_20120324_012547_outLine +BABEL_BP_106_78617_20120325_220217_inLine +BABEL_BP_106_78617_20120325_220217_outLine +BABEL_BP_106_78617_20120327_225421_inLine +BABEL_BP_106_78617_20120327_225421_outLine +BABEL_BP_106_78753_20120511_020629_inLine +BABEL_BP_106_79284_20120317_154901_inLine +BABEL_BP_106_79284_20120317_190801_inLine +BABEL_BP_106_79519_20120407_175434_inLine +BABEL_BP_106_79526_20120418_000428_inLine +BABEL_BP_106_79593_20120528_180841_inLine +BABEL_BP_106_79650_20120410_220151_outLine +BABEL_BP_106_79650_20120410_221127_outLine +BABEL_BP_106_79970_20120420_164617_inLine +BABEL_BP_106_80068_20120414_213628_inLine +BABEL_BP_106_80075_20120605_113236_inLine +BABEL_BP_106_80150_20120229_200345_inLine +BABEL_BP_106_80150_20120229_200345_outLine +BABEL_BP_106_80174_20120422_023124_inLine +BABEL_BP_106_80290_20120311_231738_inLine +BABEL_BP_106_80290_20120311_231738_outLine +BABEL_BP_106_80290_20120311_234143_inLine +BABEL_BP_106_80290_20120311_234143_outLine +BABEL_BP_106_80535_20120319_003708_inLine +BABEL_BP_106_80598_20120416_002228_inLine +BABEL_BP_106_80638_20120411_224029_inLine +BABEL_BP_106_80701_20120315_153813_inLine +BABEL_BP_106_81065_20120603_120830_inLine +BABEL_BP_106_81096_20120604_122742_inLine +BABEL_BP_106_81119_20120417_201549_inLine +BABEL_BP_106_81601_20120205_223405_inLine +BABEL_BP_106_81601_20120205_223405_outLine +BABEL_BP_106_81642_20120218_232158_inLine +BABEL_BP_106_81642_20120218_232158_outLine +BABEL_BP_106_81647_20120304_131330_inLine +BABEL_BP_106_81647_20120304_131330_outLine +BABEL_BP_106_81769_20120330_213453_outLine +BABEL_BP_106_81799_20120220_212705_inLine +BABEL_BP_106_81799_20120220_212705_outLine +BABEL_BP_106_81820_20120329_190503_inLine +BABEL_BP_106_81844_20120226_002405_inLine +BABEL_BP_106_81844_20120226_002405_outLine +BABEL_BP_106_81944_20120404_120724_inLine +BABEL_BP_106_81944_20120404_120724_outLine +BABEL_BP_106_82409_20120227_002253_inLine +BABEL_BP_106_82409_20120227_002253_outLine +BABEL_BP_106_82443_20120315_182456_outLine +BABEL_BP_106_82484_20120210_215502_outLine +BABEL_BP_106_82766_20120211_165522_inLine +BABEL_BP_106_82766_20120211_165522_outLine +BABEL_BP_106_83186_20120227_203823_inLine +BABEL_BP_106_83186_20120227_203823_outLine +BABEL_BP_106_83531_20120210_162513_inLine +BABEL_BP_106_83531_20120210_162513_outLine +BABEL_BP_106_83634_20120317_172215_outLine +BABEL_BP_106_83702_20120212_030118_inLine +BABEL_BP_106_83702_20120212_030118_outLine +BABEL_BP_106_83921_20120227_141419_inLine +BABEL_BP_106_83921_20120227_141419_outLine +BABEL_BP_106_84025_20120307_171246_inLine +BABEL_BP_106_84025_20120307_171246_outLine +BABEL_BP_106_84171_20120317_135204_inLine +BABEL_BP_106_84171_20120317_135204_outLine +BABEL_BP_106_84284_20120304_161058_inLine +BABEL_BP_106_84284_20120304_161058_outLine +BABEL_BP_106_84394_20120226_172149_inLine +BABEL_BP_106_84394_20120226_172149_outLine +BABEL_BP_106_84488_20120329_015848_inLine +BABEL_BP_106_84488_20120329_015848_outLine +BABEL_BP_106_84608_20120304_145910_inLine +BABEL_BP_106_84608_20120304_145910_outLine +BABEL_BP_106_84700_20120308_185454_inLine +BABEL_BP_106_84700_20120308_185454_outLine +BABEL_BP_106_84756_20120324_004957_inLine +BABEL_BP_106_84756_20120324_004957_outLine +BABEL_BP_106_84779_20120313_035600_inLine +BABEL_BP_106_84779_20120313_041105_inLine +BABEL_BP_106_84779_20120313_041106_outLine +BABEL_BP_106_84980_20120227_014019_inLine +BABEL_BP_106_84980_20120227_014019_outLine +BABEL_BP_106_85101_20120401_193440_inLine +BABEL_BP_106_85101_20120401_193440_outLine +BABEL_BP_106_85533_20120130_235957_inLine +BABEL_BP_106_85533_20120130_235957_outLine +BABEL_BP_106_85752_20120301_023900_inLine +BABEL_BP_106_85752_20120301_023900_outLine +BABEL_BP_106_86014_20120605_153510_inLine +BABEL_BP_106_86016_20120211_173225_inLine +BABEL_BP_106_86016_20120211_173225_outLine +BABEL_BP_106_86029_20120130_001526_inLine +BABEL_BP_106_86029_20120130_001526_outLine +BABEL_BP_106_86337_20120411_130915_inLine +BABEL_BP_106_86344_20120323_230601_outLine +BABEL_BP_106_86344_20120323_231804_inLine +BABEL_BP_106_86344_20120323_231804_outLine +BABEL_BP_106_86344_20120323_232835_inLine +BABEL_BP_106_86344_20120323_232835_outLine +BABEL_BP_106_87124_20120411_050315_inLine +BABEL_BP_106_87124_20120411_050315_outLine +BABEL_BP_106_87139_20120227_175141_inLine +BABEL_BP_106_87139_20120227_175141_outLine +BABEL_BP_106_87210_20120212_183156_inLine +BABEL_BP_106_87210_20120212_183156_outLine +BABEL_BP_106_87218_20120501_004341_inLine +BABEL_BP_106_87281_20120318_175101_inLine +BABEL_BP_106_87281_20120318_175101_outLine +BABEL_BP_106_87520_20120428_013320_inLine +BABEL_BP_106_87520_20120428_014139_inLine +BABEL_BP_106_87539_20120228_005220_inLine +BABEL_BP_106_87539_20120228_005220_outLine +BABEL_BP_106_87564_20120225_141938_inLine +BABEL_BP_106_87564_20120225_141938_outLine +BABEL_BP_106_87607_20120221_144252_inLine +BABEL_BP_106_87607_20120221_150220_inLine +BABEL_BP_106_87607_20120221_153642_inLine +BABEL_BP_106_87634_20120203_031511_inLine +BABEL_BP_106_87634_20120203_031511_outLine +BABEL_BP_106_87850_20120212_182620_inLine +BABEL_BP_106_87850_20120212_184930_inLine +BABEL_BP_106_87850_20120212_190826_inLine +BABEL_BP_106_87862_20120224_185514_outLine +BABEL_BP_106_87985_20120328_214048_inLine +BABEL_BP_106_87985_20120328_214048_outLine +BABEL_BP_106_88245_20120309_175128_inLine +BABEL_BP_106_88385_20120307_191827_inLine +BABEL_BP_106_88385_20120307_191827_outLine +BABEL_BP_106_88506_20120411_195636_inLine +BABEL_BP_106_88929_20120421_132840_inLine +BABEL_BP_106_88929_20120421_134445_inLine +BABEL_BP_106_89301_20120229_011855_inLine +BABEL_BP_106_89301_20120229_011855_outLine +BABEL_BP_106_89301_20120229_012853_inLine +BABEL_BP_106_89301_20120229_012853_outLine +BABEL_BP_106_89417_20120131_014042_outLine +BABEL_BP_106_89583_20120304_211628_outLine +BABEL_BP_106_89583_20120304_214338_outLine +BABEL_BP_106_89727_20120404_165020_inLine +BABEL_BP_106_90127_20120228_034539_inLine +BABEL_BP_106_90202_20120311_205142_outLine +BABEL_BP_106_90389_20120318_150647_inLine +BABEL_BP_106_90389_20120318_151932_inLine +BABEL_BP_106_90393_20120211_230839_inLine +BABEL_BP_106_90393_20120211_230839_outLine +BABEL_BP_106_90436_20120228_014236_inLine +BABEL_BP_106_90436_20120228_014236_outLine +BABEL_BP_106_90490_20120322_030219_inLine +BABEL_BP_106_90490_20120322_033415_inLine +BABEL_BP_106_90506_20120128_002109_inLine +BABEL_BP_106_90506_20120128_002109_outLine +BABEL_BP_106_90511_20120210_164822_inLine +BABEL_BP_106_90511_20120210_164822_outLine +BABEL_BP_106_90742_20120501_022105_inLine +BABEL_BP_106_90742_20120501_022837_inLine +BABEL_BP_106_90951_20120302_230530_inLine +BABEL_BP_106_90951_20120302_232555_inLine +BABEL_BP_106_91000_20120311_230040_inLine +BABEL_BP_106_91000_20120311_230040_outLine +BABEL_BP_106_91000_20120311_231020_inLine +BABEL_BP_106_91000_20120311_231020_outLine +BABEL_BP_106_91002_20120411_201622_inLine +BABEL_BP_106_91143_20120413_234122_inLine +BABEL_BP_106_91358_20120312_180740_inLine +BABEL_BP_106_91401_20120131_014626_inLine +BABEL_BP_106_91401_20120131_014627_outLine +BABEL_BP_106_91481_20120303_192948_inLine +BABEL_BP_106_91481_20120303_202847_inLine +BABEL_BP_106_91583_20120415_170849_inLine +BABEL_BP_106_91583_20120415_172901_inLine +BABEL_BP_106_91660_20120307_172116_inLine +BABEL_BP_106_91660_20120307_172116_outLine +BABEL_BP_106_91668_20120312_231623_inLine +BABEL_BP_106_91668_20120312_234357_inLine +BABEL_BP_106_91687_20120530_211936_inLine +BABEL_BP_106_91703_20120301_180553_inLine +BABEL_BP_106_91703_20120301_180553_outLine +BABEL_BP_106_91865_20120227_132028_inLine +BABEL_BP_106_91865_20120227_132028_outLine +BABEL_BP_106_91905_20120225_044624_inLine +BABEL_BP_106_91905_20120225_044624_outLine +BABEL_BP_106_91975_20120318_202137_inLine +BABEL_BP_106_92002_20120301_010732_inLine +BABEL_BP_106_92346_20120410_232631_inLine +BABEL_BP_106_92346_20120410_234651_inLine +BABEL_BP_106_92591_20120301_232554_outLine +BABEL_BP_106_92642_20120214_041746_inLine +BABEL_BP_106_92642_20120214_041746_outLine +BABEL_BP_106_93044_20120405_184614_inLine +BABEL_BP_106_93169_20120126_190053_inLine +BABEL_BP_106_93237_20120412_202749_inLine +BABEL_BP_106_93268_20120316_173016_inLine +BABEL_BP_106_93268_20120316_185049_inLine +BABEL_BP_106_93277_20120314_162508_inLine +BABEL_BP_106_93277_20120314_162508_outLine +BABEL_BP_106_93302_20120530_221003_inLine +BABEL_BP_106_93326_20120329_003409_inLine +BABEL_BP_106_93326_20120329_003409_outLine +BABEL_BP_106_93436_20120314_172420_inLine +BABEL_BP_106_93506_20120501_114215_outLine +BABEL_BP_106_93607_20120304_213723_outLine +BABEL_BP_106_93811_20120419_004934_inLine +BABEL_BP_106_94126_20120331_143958_inLine +BABEL_BP_106_94126_20120331_143958_outLine +BABEL_BP_106_94162_20120418_180628_inLine +BABEL_BP_106_94223_20120219_180504_inLine +BABEL_BP_106_94223_20120219_185026_inLine +BABEL_BP_106_94223_20120219_191721_inLine +BABEL_BP_106_94223_20120219_194907_inLine +BABEL_BP_106_94514_20120225_190925_inLine +BABEL_BP_106_94514_20120225_190925_outLine +BABEL_BP_106_94514_20120225_192755_inLine +BABEL_BP_106_94514_20120225_192755_outLine +BABEL_BP_106_94694_20120315_140425_inLine +BABEL_BP_106_94694_20120315_140425_outLine +BABEL_BP_106_94814_20120211_015600_inLine +BABEL_BP_106_95034_20120222_020622_inLine +BABEL_BP_106_95034_20120222_020622_outLine +BABEL_BP_106_95120_20120226_174855_inLine +BABEL_BP_106_95120_20120226_174855_outLine +BABEL_BP_106_95121_20120314_000217_inLine +BABEL_BP_106_95121_20120314_000217_outLine +BABEL_BP_106_95325_20120225_072841_inLine +BABEL_BP_106_95325_20120225_072841_outLine +BABEL_BP_106_95514_20120404_160802_inLine +BABEL_BP_106_95533_20120227_180819_inLine +BABEL_BP_106_95572_20120501_120940_outLine +BABEL_BP_106_95628_20120331_180349_inLine +BABEL_BP_106_95628_20120331_180349_outLine +BABEL_BP_106_95650_20120203_023309_inLine +BABEL_BP_106_95650_20120203_023309_outLine +BABEL_BP_106_95791_20120323_213855_inLine +BABEL_BP_106_95849_20120317_111924_inLine +BABEL_BP_106_95849_20120317_112530_inLine +BABEL_BP_106_95849_20120317_114235_inLine +BABEL_BP_106_95893_20120304_185606_inLine +BABEL_BP_106_95952_20120413_203735_inLine +BABEL_BP_106_95952_20120413_203735_outLine +BABEL_BP_106_95952_20120413_204700_inLine +BABEL_BP_106_95952_20120413_204700_outLine +BABEL_BP_106_96108_20120306_154946_inLine +BABEL_BP_106_96108_20120306_154946_outLine +BABEL_BP_106_96302_20120317_192957_inLine +BABEL_BP_106_96302_20120317_193605_inLine +BABEL_BP_106_96302_20120317_195426_inLine +BABEL_BP_106_96302_20120317_195426_outLine +BABEL_BP_106_96425_20120314_181621_inLine +BABEL_BP_106_96425_20120314_183006_inLine +BABEL_BP_106_96463_20120304_141645_inLine +BABEL_BP_106_96630_20120204_003252_inLine +BABEL_BP_106_96630_20120204_003252_outLine +BABEL_BP_106_96636_20120605_122128_inLine +BABEL_BP_106_96717_20120331_232633_outLine +BABEL_BP_106_96922_20120331_201147_outLine +BABEL_BP_106_97486_20120214_032248_inLine +BABEL_BP_106_97486_20120214_032248_outLine +BABEL_BP_106_97486_20120214_035344_inLine +BABEL_BP_106_97486_20120214_035344_outLine +BABEL_BP_106_97635_20120330_174657_inLine +BABEL_BP_106_97649_20120312_212246_inLine +BABEL_BP_106_97649_20120312_213707_inLine +BABEL_BP_106_97699_20120317_154627_outLine +BABEL_BP_106_98279_20120228_221829_inLine +BABEL_BP_106_98279_20120228_222916_inLine +BABEL_BP_106_98279_20120228_224103_inLine +BABEL_BP_106_98402_20120311_211004_inLine +BABEL_BP_106_98402_20120311_211004_outLine +BABEL_BP_106_98465_20120408_005224_inLine +BABEL_BP_106_98465_20120408_005224_outLine +BABEL_BP_106_98476_20120301_235152_inLine +BABEL_BP_106_98476_20120301_235152_outLine +BABEL_BP_106_98675_20120418_012454_inLine +BABEL_BP_106_98807_20120331_181706_inLine +BABEL_BP_106_98807_20120331_181706_outLine +BABEL_BP_106_98807_20120331_182345_inLine +BABEL_BP_106_98807_20120331_182345_outLine +BABEL_BP_106_98807_20120331_183121_inLine +BABEL_BP_106_98807_20120331_183121_outLine +BABEL_BP_106_99012_20120419_224750_inLine +BABEL_BP_106_99697_20120229_185303_inLine +BABEL_BP_106_99856_20120226_184042_inLine +BABEL_BP_106_99856_20120226_184042_outLine +BABEL_BP_106_99856_20120226_191212_inLine +BABEL_BP_106_99856_20120226_191212_outLine diff --git a/egs/babel/s5d/conf/lists/106-tagalog/train.LimitedLP.list b/egs/babel/s5d/conf/lists/106-tagalog/train.LimitedLP.list new file mode 100644 index 00000000000..fee3e3adbaf --- /dev/null +++ b/egs/babel/s5d/conf/lists/106-tagalog/train.LimitedLP.list @@ -0,0 +1,134 @@ +BABEL_BP_106_03420_20120409_204941_inLine +BABEL_BP_106_03420_20120409_204941_outLine +BABEL_BP_106_03420_20120409_211810_inLine +BABEL_BP_106_03420_20120409_211811_outLine +BABEL_BP_106_10985_20120313_013835_inLine +BABEL_BP_106_10985_20120313_013835_outLine +BABEL_BP_106_11158_20120314_183907_inLine +BABEL_BP_106_11158_20120314_183907_outLine +BABEL_BP_106_11158_20120314_193006_inLine +BABEL_BP_106_11158_20120314_193006_outLine +BABEL_BP_106_12248_20120304_225237_inLine +BABEL_BP_106_13071_20120315_000734_inLine +BABEL_BP_106_13071_20120315_000734_outLine +BABEL_BP_106_13071_20120315_001539_inLine +BABEL_BP_106_13071_20120315_001539_outLine +BABEL_BP_106_16406_20120309_161540_inLine +BABEL_BP_106_19867_20120428_022912_inLine +BABEL_BP_106_20320_20120206_212251_inLine +BABEL_BP_106_20320_20120206_212251_outLine +BABEL_BP_106_20740_20120229_234935_inLine +BABEL_BP_106_22910_20120129_213616_inLine +BABEL_BP_106_22910_20120129_213616_outLine +BABEL_BP_106_23571_20120229_180344_inLine +BABEL_BP_106_23571_20120229_180344_outLine +BABEL_BP_106_23878_20120209_170350_inLine +BABEL_BP_106_23878_20120209_170350_outLine +BABEL_BP_106_25751_20120227_221828_inLine +BABEL_BP_106_25751_20120227_221828_outLine +BABEL_BP_106_25866_20120304_181012_inLine +BABEL_BP_106_25866_20120304_181012_outLine +BABEL_BP_106_27916_20120403_232720_inLine +BABEL_BP_106_27916_20120403_232720_outLine +BABEL_BP_106_27916_20120403_233612_inLine +BABEL_BP_106_27916_20120403_233612_outLine +BABEL_BP_106_30818_20120503_004014_inLine +BABEL_BP_106_31265_20120311_235253_inLine +BABEL_BP_106_31265_20120311_235253_outLine +BABEL_BP_106_32400_20120307_235432_inLine +BABEL_BP_106_32400_20120307_235432_outLine +BABEL_BP_106_32890_20120221_193416_inLine +BABEL_BP_106_32890_20120221_193417_outLine +BABEL_BP_106_33742_20120229_020923_inLine +BABEL_BP_106_33742_20120229_020923_outLine +BABEL_BP_106_34480_20120405_141959_inLine +BABEL_BP_106_34961_20120130_011357_inLine +BABEL_BP_106_34961_20120130_011357_outLine +BABEL_BP_106_35179_20120225_063734_inLine +BABEL_BP_106_35179_20120225_063734_outLine +BABEL_BP_106_35706_20120501_011424_inLine +BABEL_BP_106_36268_20120209_180615_inLine +BABEL_BP_106_36268_20120209_180615_outLine +BABEL_BP_106_38640_20120130_174518_inLine +BABEL_BP_106_38640_20120130_174518_outLine +BABEL_BP_106_38956_20120127_010500_inLine +BABEL_BP_106_38956_20120127_010500_outLine +BABEL_BP_106_40510_20120221_155613_inLine +BABEL_BP_106_40510_20120221_155613_outLine +BABEL_BP_106_40680_20120511_153305_inLine +BABEL_BP_106_45453_20120404_225631_inLine +BABEL_BP_106_45453_20120404_225631_outLine +BABEL_BP_106_46603_20120227_192836_inLine +BABEL_BP_106_46603_20120227_192836_outLine +BABEL_BP_106_47429_20120329_195737_inLine +BABEL_BP_106_47429_20120329_195737_outLine +BABEL_BP_106_48188_20120307_034039_outLine +BABEL_BP_106_49624_20120224_194049_inLine +BABEL_BP_106_49624_20120224_194049_outLine +BABEL_BP_106_49689_20120225_152557_outLine +BABEL_BP_106_49689_20120225_153748_outLine +BABEL_BP_106_49714_20120227_191755_inLine +BABEL_BP_106_49714_20120227_191755_outLine +BABEL_BP_106_50409_20120319_185818_inLine +BABEL_BP_106_50409_20120319_185818_outLine +BABEL_BP_106_51149_20120329_174521_inLine +BABEL_BP_106_51149_20120329_174521_outLine +BABEL_BP_106_52366_20120124_164406_inLine +BABEL_BP_106_52366_20120124_164406_outLine +BABEL_BP_106_53315_20120329_182550_inLine +BABEL_BP_106_53315_20120329_182550_outLine +BABEL_BP_106_53376_20120323_000750_inLine +BABEL_BP_106_53376_20120323_000750_outLine +BABEL_BP_106_55823_20120329_210142_inLine +BABEL_BP_106_55823_20120329_210142_outLine +BABEL_BP_106_55922_20120322_021453_inLine +BABEL_BP_106_55922_20120322_021453_outLine +BABEL_BP_106_55922_20120322_022537_inLine +BABEL_BP_106_55922_20120322_022537_outLine +BABEL_BP_106_58192_20120308_182924_inLine +BABEL_BP_106_58192_20120308_182924_outLine +BABEL_BP_106_60598_20120324_022730_inLine +BABEL_BP_106_60598_20120324_022730_outLine +BABEL_BP_106_63305_20120324_030221_inLine +BABEL_BP_106_63305_20120324_030221_outLine +BABEL_BP_106_65248_20120321_230954_inLine +BABEL_BP_106_65248_20120321_230954_outLine +BABEL_BP_106_68392_20120331_224408_inLine +BABEL_BP_106_68392_20120331_224408_outLine +BABEL_BP_106_68610_20120505_011125_inLine +BABEL_BP_106_70285_20120128_211036_outLine +BABEL_BP_106_71310_20120221_005007_inLine +BABEL_BP_106_71310_20120221_005007_outLine +BABEL_BP_106_75036_20120224_163823_inLine +BABEL_BP_106_75036_20120224_163823_outLine +BABEL_BP_106_75932_20120301_185217_inLine +BABEL_BP_106_75932_20120301_185217_outLine +BABEL_BP_106_76252_20120318_131223_outLine +BABEL_BP_106_79519_20120407_175434_inLine +BABEL_BP_106_80174_20120422_023124_inLine +BABEL_BP_106_81944_20120404_120724_inLine +BABEL_BP_106_81944_20120404_120724_outLine +BABEL_BP_106_82766_20120211_165522_inLine +BABEL_BP_106_82766_20120211_165522_outLine +BABEL_BP_106_86014_20120605_153510_inLine +BABEL_BP_106_87210_20120212_183156_inLine +BABEL_BP_106_87210_20120212_183156_outLine +BABEL_BP_106_89417_20120131_014042_inLine +BABEL_BP_106_89417_20120131_014042_outLine +BABEL_BP_106_89727_20120404_165020_inLine +BABEL_BP_106_91000_20120311_230040_inLine +BABEL_BP_106_91000_20120311_230040_outLine +BABEL_BP_106_91000_20120311_231020_inLine +BABEL_BP_106_91000_20120311_231020_outLine +BABEL_BP_106_91401_20120131_014626_inLine +BABEL_BP_106_91401_20120131_014627_outLine +BABEL_BP_106_91905_20120225_044624_inLine +BABEL_BP_106_91905_20120225_044624_outLine +BABEL_BP_106_93169_20120126_190053_inLine +BABEL_BP_106_94814_20120211_015600_inLine +BABEL_BP_106_95034_20120222_020622_inLine +BABEL_BP_106_95034_20120222_020622_outLine +BABEL_BP_106_96630_20120204_003252_inLine +BABEL_BP_106_96630_20120204_003252_outLine +BABEL_BP_106_98465_20120408_005224_inLine +BABEL_BP_106_98465_20120408_005224_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/dev.list b/egs/babel/s5d/conf/lists/107-vietnamese/dev.list new file mode 100644 index 00000000000..f44c76db308 --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/dev.list @@ -0,0 +1,132 @@ +BABEL_BP_107_11031_20120617_182613_inLine +BABEL_BP_107_11031_20120617_182613_outLine +BABEL_BP_107_12120_20120704_024505_inLine +BABEL_BP_107_12120_20120704_024505_outLine +BABEL_BP_107_12248_20120614_183345_inLine +BABEL_BP_107_12248_20120614_183345_outLine +BABEL_BP_107_12963_20120509_002346_inLine +BABEL_BP_107_12963_20120509_002346_outLine +BABEL_BP_107_12963_20120509_003852_inLine +BABEL_BP_107_12963_20120509_003852_outLine +BABEL_BP_107_13476_20120428_003452_inLine +BABEL_BP_107_13476_20120428_003452_outLine +BABEL_BP_107_14610_20120218_201908_inLine +BABEL_BP_107_14610_20120218_201908_outLine +BABEL_BP_107_14769_20120420_013147_inLine +BABEL_BP_107_14769_20120420_013147_outLine +BABEL_BP_107_14997_20120406_190013_inLine +BABEL_BP_107_14997_20120406_190013_outLine +BABEL_BP_107_14997_20120406_191102_inLine +BABEL_BP_107_14997_20120406_191102_outLine +BABEL_BP_107_15493_20120617_120952_inLine +BABEL_BP_107_15493_20120617_120952_outLine +BABEL_BP_107_15502_20120627_124423_inLine +BABEL_BP_107_15502_20120627_124423_outLine +BABEL_BP_107_16167_20120215_213113_inLine +BABEL_BP_107_16167_20120215_213113_outLine +BABEL_BP_107_18730_20120222_145916_inLine +BABEL_BP_107_18730_20120222_145916_outLine +BABEL_BP_107_19619_20120215_221131_inLine +BABEL_BP_107_19619_20120215_221131_outLine +BABEL_BP_107_19619_20120215_223011_inLine +BABEL_BP_107_19619_20120215_223011_outLine +BABEL_BP_107_21489_20120608_123945_inLine +BABEL_BP_107_21489_20120608_123945_outLine +BABEL_BP_107_26644_20120509_013405_inLine +BABEL_BP_107_26644_20120509_013405_outLine +BABEL_BP_107_28161_20120322_171027_inLine +BABEL_BP_107_28161_20120322_171027_outLine +BABEL_BP_107_28648_20120506_223200_inLine +BABEL_BP_107_28648_20120506_223200_outLine +BABEL_BP_107_29168_20120321_215013_inLine +BABEL_BP_107_29168_20120321_215013_outLine +BABEL_BP_107_31538_20120320_202748_inLine +BABEL_BP_107_31538_20120320_202748_outLine +BABEL_BP_107_32120_20120704_182238_inLine +BABEL_BP_107_32120_20120704_182238_outLine +BABEL_BP_107_32236_20120505_195420_inLine +BABEL_BP_107_32236_20120505_195420_outLine +BABEL_BP_107_33704_20120416_005402_inLine +BABEL_BP_107_33704_20120416_005402_outLine +BABEL_BP_107_35391_20120416_192241_inLine +BABEL_BP_107_35391_20120416_192241_outLine +BABEL_BP_107_35441_20120421_221245_inLine +BABEL_BP_107_35441_20120421_221245_outLine +BABEL_BP_107_39080_20120415_141817_inLine +BABEL_BP_107_39080_20120415_141817_outLine +BABEL_BP_107_39140_20120409_163031_inLine +BABEL_BP_107_39140_20120409_163031_outLine +BABEL_BP_107_39997_20120516_214034_inLine +BABEL_BP_107_39997_20120516_214035_outLine +BABEL_BP_107_41456_20120421_133628_inLine +BABEL_BP_107_41456_20120421_133628_outLine +BABEL_BP_107_41661_20120329_022249_inLine +BABEL_BP_107_41661_20120329_022249_outLine +BABEL_BP_107_41661_20120329_023848_inLine +BABEL_BP_107_41661_20120329_023848_outLine +BABEL_BP_107_43086_20120210_015927_inLine +BABEL_BP_107_43086_20120210_015927_outLine +BABEL_BP_107_45512_20120505_135144_inLine +BABEL_BP_107_45512_20120505_135144_outLine +BABEL_BP_107_45677_20120428_184714_inLine +BABEL_BP_107_45677_20120428_184714_outLine +BABEL_BP_107_47037_20120415_210047_inLine +BABEL_BP_107_47037_20120415_210047_outLine +BABEL_BP_107_54285_20120430_233928_inLine +BABEL_BP_107_54285_20120430_233928_outLine +BABEL_BP_107_56812_20120502_123725_inLine +BABEL_BP_107_56812_20120502_123725_outLine +BABEL_BP_107_57020_20120427_011940_inLine +BABEL_BP_107_57020_20120427_011940_outLine +BABEL_BP_107_57976_20120704_183740_inLine +BABEL_BP_107_57976_20120704_183740_outLine +BABEL_BP_107_59868_20120324_013729_inLine +BABEL_BP_107_59868_20120324_013729_outLine +BABEL_BP_107_59868_20120324_015118_inLine +BABEL_BP_107_59868_20120324_015118_outLine +BABEL_BP_107_59891_20120504_013809_inLine +BABEL_BP_107_59891_20120504_013809_outLine +BABEL_BP_107_63459_20120415_003841_inLine +BABEL_BP_107_63459_20120415_003841_outLine +BABEL_BP_107_65606_20120416_004652_inLine +BABEL_BP_107_65606_20120416_004652_outLine +BABEL_BP_107_70625_20120426_235142_inLine +BABEL_BP_107_70625_20120426_235142_outLine +BABEL_BP_107_71178_20120617_184313_inLine +BABEL_BP_107_71178_20120617_184313_outLine +BABEL_BP_107_73542_20120209_010311_inLine +BABEL_BP_107_73542_20120209_010311_outLine +BABEL_BP_107_75990_20120408_211713_inLine +BABEL_BP_107_75990_20120408_211713_outLine +BABEL_BP_107_76320_20120504_123902_inLine +BABEL_BP_107_76320_20120504_123902_outLine +BABEL_BP_107_77697_20120416_235254_inLine +BABEL_BP_107_77697_20120416_235254_outLine +BABEL_BP_107_77771_20120421_231323_inLine +BABEL_BP_107_77771_20120421_231323_outLine +BABEL_BP_107_79412_20120322_174955_inLine +BABEL_BP_107_79412_20120322_174955_outLine +BABEL_BP_107_79526_20120420_150504_inLine +BABEL_BP_107_79526_20120420_150504_outLine +BABEL_BP_107_83219_20120421_172919_inLine +BABEL_BP_107_83219_20120421_172919_outLine +BABEL_BP_107_85204_20120212_190017_inLine +BABEL_BP_107_85204_20120212_190017_outLine +BABEL_BP_107_86259_20120507_015816_inLine +BABEL_BP_107_86259_20120507_015816_outLine +BABEL_BP_107_87850_20120406_210353_inLine +BABEL_BP_107_87850_20120406_210354_outLine +BABEL_BP_107_88383_20120627_125444_inLine +BABEL_BP_107_88383_20120627_125444_outLine +BABEL_BP_107_88383_20120627_130611_inLine +BABEL_BP_107_88383_20120627_130611_outLine +BABEL_BP_107_89838_20120212_205650_inLine +BABEL_BP_107_89838_20120212_205650_outLine +BABEL_BP_107_90202_20120502_194459_inLine +BABEL_BP_107_90202_20120502_194459_outLine +BABEL_BP_107_92386_20120322_195456_inLine +BABEL_BP_107_92386_20120322_195456_outLine +BABEL_BP_107_96283_20120503_162149_inLine +BABEL_BP_107_96283_20120503_162149_outLine +BABEL_BP_107_97760_20120614_184333_inLine +BABEL_BP_107_97760_20120614_184333_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/eval.list b/egs/babel/s5d/conf/lists/107-vietnamese/eval.list new file mode 100644 index 00000000000..9cc6f7875ed --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/eval.list @@ -0,0 +1,981 @@ +BABEL_BP_107_10170_20120703_230849_inLine +BABEL_BP_107_10170_20120703_230850_outLine +BABEL_BP_107_10170_20120703_231552_inLine +BABEL_BP_107_10170_20120703_231552_outLine +BABEL_BP_107_10187_20120218_202839_inLine +BABEL_BP_107_10187_20120218_202839_outLine +BABEL_BP_107_10408_20120704_021830_inLine +BABEL_BP_107_10408_20120704_021830_outLine +BABEL_BP_107_10470_20120608_135407_inLine +BABEL_BP_107_10470_20120608_135407_outLine +BABEL_BP_107_10925_20120209_233924_inLine +BABEL_BP_107_10925_20120209_233924_outLine +BABEL_BP_107_11004_20120329_040734_inLine +BABEL_BP_107_11004_20120329_040734_outLine +BABEL_BP_107_11152_20120418_221056_inLine +BABEL_BP_107_11152_20120418_221056_outLine +BABEL_BP_107_11203_20120415_212056_inLine +BABEL_BP_107_11203_20120415_212056_outLine +BABEL_BP_107_11824_20120413_213002_inLine +BABEL_BP_107_11824_20120413_213002_outLine +BABEL_BP_107_12535_20120614_190306_inLine +BABEL_BP_107_12535_20120614_190306_outLine +BABEL_BP_107_12667_20120514_195317_inLine +BABEL_BP_107_12667_20120514_195317_outLine +BABEL_BP_107_12700_20120608_010254_inLine +BABEL_BP_107_12700_20120608_010254_outLine +BABEL_BP_107_13118_20120419_193637_inLine +BABEL_BP_107_13118_20120419_193637_outLine +BABEL_BP_107_13441_20120412_212102_inLine +BABEL_BP_107_13441_20120412_212102_outLine +BABEL_BP_107_13709_20120406_164042_inLine +BABEL_BP_107_13709_20120406_164043_outLine +BABEL_BP_107_13913_20120415_144214_inLine +BABEL_BP_107_13913_20120415_144214_outLine +BABEL_BP_107_14389_20120617_164138_inLine +BABEL_BP_107_14389_20120617_164138_outLine +BABEL_BP_107_14874_20120320_190424_inLine +BABEL_BP_107_14874_20120320_190424_outLine +BABEL_BP_107_14874_20120320_192210_inLine +BABEL_BP_107_14874_20120320_192210_outLine +BABEL_BP_107_15022_20120418_133337_inLine +BABEL_BP_107_15022_20120418_133337_outLine +BABEL_BP_107_15234_20120509_180434_inLine +BABEL_BP_107_15234_20120509_180434_outLine +BABEL_BP_107_15859_20120419_133516_inLine +BABEL_BP_107_15859_20120419_133516_outLine +BABEL_BP_107_15916_20120426_132306_inLine +BABEL_BP_107_15916_20120426_132306_outLine +BABEL_BP_107_16266_20120211_215251_inLine +BABEL_BP_107_16266_20120211_215251_outLine +BABEL_BP_107_16299_20120220_135944_inLine +BABEL_BP_107_16299_20120220_135944_outLine +BABEL_BP_107_16346_20120423_192454_inLine +BABEL_BP_107_16346_20120423_192454_outLine +BABEL_BP_107_16984_20120414_193034_inLine +BABEL_BP_107_16984_20120414_193034_outLine +BABEL_BP_107_17080_20120212_150122_inLine +BABEL_BP_107_17080_20120212_150122_outLine +BABEL_BP_107_17207_20120507_023403_inLine +BABEL_BP_107_17207_20120507_023403_outLine +BABEL_BP_107_17511_20120419_232032_inLine +BABEL_BP_107_17511_20120419_232032_outLine +BABEL_BP_107_17572_20120613_040637_inLine +BABEL_BP_107_17572_20120613_040637_outLine +BABEL_BP_107_17850_20120615_234216_inLine +BABEL_BP_107_17850_20120615_234216_outLine +BABEL_BP_107_17900_20120323_015142_inLine +BABEL_BP_107_17900_20120323_015142_outLine +BABEL_BP_107_18672_20120426_150856_inLine +BABEL_BP_107_18672_20120426_150856_outLine +BABEL_BP_107_18980_20120608_125749_inLine +BABEL_BP_107_18980_20120608_125749_outLine +BABEL_BP_107_19147_20120212_161206_inLine +BABEL_BP_107_19147_20120212_161206_outLine +BABEL_BP_107_19456_20120704_165824_inLine +BABEL_BP_107_19456_20120704_165824_outLine +BABEL_BP_107_19656_20120325_230731_inLine +BABEL_BP_107_19656_20120325_230731_outLine +BABEL_BP_107_19861_20120511_013731_inLine +BABEL_BP_107_19861_20120511_013731_outLine +BABEL_BP_107_19861_20120511_014743_inLine +BABEL_BP_107_19861_20120511_014744_outLine +BABEL_BP_107_19915_20120218_150645_inLine +BABEL_BP_107_19915_20120218_150645_outLine +BABEL_BP_107_20408_20120323_142004_inLine +BABEL_BP_107_20408_20120323_142004_outLine +BABEL_BP_107_20408_20120323_143722_inLine +BABEL_BP_107_20408_20120323_143722_outLine +BABEL_BP_107_20471_20120328_020935_inLine +BABEL_BP_107_20471_20120328_020935_outLine +BABEL_BP_107_20546_20120323_215948_inLine +BABEL_BP_107_20546_20120323_215948_outLine +BABEL_BP_107_20685_20120222_210447_inLine +BABEL_BP_107_20685_20120222_210447_outLine +BABEL_BP_107_20775_20120502_214146_inLine +BABEL_BP_107_20775_20120502_214146_outLine +BABEL_BP_107_21714_20120608_140615_inLine +BABEL_BP_107_21714_20120608_140615_outLine +BABEL_BP_107_21782_20120321_191431_inLine +BABEL_BP_107_21782_20120321_191431_outLine +BABEL_BP_107_21845_20120613_195420_inLine +BABEL_BP_107_21845_20120613_195420_outLine +BABEL_BP_107_22179_20120220_172322_inLine +BABEL_BP_107_22179_20120220_172322_outLine +BABEL_BP_107_22351_20120413_231618_inLine +BABEL_BP_107_22351_20120413_231618_outLine +BABEL_BP_107_22408_20120416_180244_inLine +BABEL_BP_107_22408_20120416_180244_outLine +BABEL_BP_107_22537_20120322_214458_inLine +BABEL_BP_107_22537_20120322_214458_outLine +BABEL_BP_107_22566_20120704_023628_inLine +BABEL_BP_107_22566_20120704_023628_outLine +BABEL_BP_107_22973_20120503_231406_inLine +BABEL_BP_107_22973_20120503_231406_outLine +BABEL_BP_107_23168_20120618_113427_inLine +BABEL_BP_107_23168_20120618_113427_outLine +BABEL_BP_107_23336_20120429_192926_inLine +BABEL_BP_107_23336_20120429_192926_outLine +BABEL_BP_107_23352_20120425_211848_inLine +BABEL_BP_107_23352_20120425_211848_outLine +BABEL_BP_107_23995_20120418_194620_inLine +BABEL_BP_107_23995_20120418_194620_outLine +BABEL_BP_107_24379_20120422_173418_inLine +BABEL_BP_107_24379_20120422_173418_outLine +BABEL_BP_107_24431_20120215_202205_inLine +BABEL_BP_107_24431_20120215_202205_outLine +BABEL_BP_107_24580_20120420_011554_inLine +BABEL_BP_107_24580_20120420_011554_outLine +BABEL_BP_107_24589_20120508_183427_inLine +BABEL_BP_107_24589_20120508_183427_outLine +BABEL_BP_107_25021_20120131_214134_inLine +BABEL_BP_107_25021_20120131_214134_outLine +BABEL_BP_107_25502_20120217_005526_inLine +BABEL_BP_107_25502_20120217_005526_outLine +BABEL_BP_107_25735_20120608_134208_inLine +BABEL_BP_107_25735_20120608_134208_outLine +BABEL_BP_107_25871_20120422_181122_inLine +BABEL_BP_107_25871_20120422_181122_outLine +BABEL_BP_107_25904_20120509_000636_inLine +BABEL_BP_107_25904_20120509_000636_outLine +BABEL_BP_107_26164_20120705_014122_inLine +BABEL_BP_107_26164_20120705_014122_outLine +BABEL_BP_107_27178_20120324_021235_inLine +BABEL_BP_107_27178_20120324_021235_outLine +BABEL_BP_107_27349_20120321_195149_inLine +BABEL_BP_107_27349_20120321_195149_outLine +BABEL_BP_107_27605_20120329_015050_inLine +BABEL_BP_107_27605_20120329_015050_outLine +BABEL_BP_107_27645_20120501_005559_inLine +BABEL_BP_107_27645_20120501_005559_outLine +BABEL_BP_107_27824_20120418_211841_inLine +BABEL_BP_107_27824_20120418_211841_outLine +BABEL_BP_107_27825_20120418_230344_inLine +BABEL_BP_107_27825_20120418_230344_outLine +BABEL_BP_107_27825_20120418_231611_inLine +BABEL_BP_107_27825_20120418_231611_outLine +BABEL_BP_107_28754_20120417_233136_inLine +BABEL_BP_107_28754_20120417_233136_outLine +BABEL_BP_107_28768_20120607_134003_inLine +BABEL_BP_107_28768_20120607_134003_outLine +BABEL_BP_107_28768_20120607_135648_inLine +BABEL_BP_107_28768_20120607_135648_outLine +BABEL_BP_107_28990_20120421_150239_inLine +BABEL_BP_107_28990_20120421_150239_outLine +BABEL_BP_107_29087_20120511_023457_inLine +BABEL_BP_107_29087_20120511_023457_outLine +BABEL_BP_107_29097_20120120_174353_inLine +BABEL_BP_107_29097_20120120_174353_outLine +BABEL_BP_107_29133_20120212_223742_inLine +BABEL_BP_107_29133_20120212_223742_outLine +BABEL_BP_107_29259_20120418_213018_inLine +BABEL_BP_107_29259_20120418_213018_outLine +BABEL_BP_107_29328_20120208_021903_inLine +BABEL_BP_107_29328_20120208_021903_outLine +BABEL_BP_107_29421_20120501_121237_inLine +BABEL_BP_107_29421_20120501_121237_outLine +BABEL_BP_107_29512_20120426_133304_inLine +BABEL_BP_107_29512_20120426_133304_outLine +BABEL_BP_107_29545_20120704_025504_inLine +BABEL_BP_107_29545_20120704_025504_outLine +BABEL_BP_107_30530_20120210_191257_inLine +BABEL_BP_107_30530_20120210_191257_outLine +BABEL_BP_107_30642_20120424_124529_inLine +BABEL_BP_107_30642_20120424_124529_outLine +BABEL_BP_107_31256_20120424_173937_inLine +BABEL_BP_107_31256_20120424_173937_outLine +BABEL_BP_107_31345_20120501_200006_inLine +BABEL_BP_107_31345_20120501_200006_outLine +BABEL_BP_107_31441_20120322_221247_inLine +BABEL_BP_107_31441_20120322_221247_outLine +BABEL_BP_107_31678_20120323_003303_inLine +BABEL_BP_107_31678_20120323_003303_outLine +BABEL_BP_107_31841_20120420_173052_inLine +BABEL_BP_107_31841_20120420_173052_outLine +BABEL_BP_107_31841_20120420_175428_inLine +BABEL_BP_107_31841_20120420_175428_outLine +BABEL_BP_107_31975_20120418_213316_inLine +BABEL_BP_107_31975_20120418_213316_outLine +BABEL_BP_107_32045_20120627_135349_inLine +BABEL_BP_107_32045_20120627_135349_outLine +BABEL_BP_107_32263_20120415_125245_inLine +BABEL_BP_107_32263_20120415_125245_outLine +BABEL_BP_107_32452_20120417_025731_inLine +BABEL_BP_107_32452_20120417_025731_outLine +BABEL_BP_107_32562_20120502_183523_inLine +BABEL_BP_107_32562_20120502_183523_outLine +BABEL_BP_107_32642_20120507_162602_inLine +BABEL_BP_107_32642_20120507_162602_outLine +BABEL_BP_107_32818_20120505_124034_inLine +BABEL_BP_107_32818_20120505_124034_outLine +BABEL_BP_107_32830_20120217_010905_inLine +BABEL_BP_107_32830_20120217_010905_outLine +BABEL_BP_107_32962_20120417_002922_inLine +BABEL_BP_107_32962_20120417_002922_outLine +BABEL_BP_107_33243_20120417_000926_inLine +BABEL_BP_107_33243_20120417_000926_outLine +BABEL_BP_107_33527_20120415_192039_inLine +BABEL_BP_107_33527_20120415_192039_outLine +BABEL_BP_107_34169_20120328_012436_inLine +BABEL_BP_107_34169_20120328_012436_outLine +BABEL_BP_107_34194_20120218_004244_inLine +BABEL_BP_107_34194_20120218_004244_outLine +BABEL_BP_107_34248_20120704_190743_inLine +BABEL_BP_107_34248_20120704_190743_outLine +BABEL_BP_107_34357_20120608_192929_inLine +BABEL_BP_107_34357_20120608_192929_outLine +BABEL_BP_107_34439_20120514_155943_inLine +BABEL_BP_107_34439_20120514_155943_outLine +BABEL_BP_107_35064_20120609_183707_inLine +BABEL_BP_107_35064_20120609_183707_outLine +BABEL_BP_107_35576_20120618_004603_inLine +BABEL_BP_107_35576_20120618_004603_outLine +BABEL_BP_107_35612_20120424_221417_inLine +BABEL_BP_107_35612_20120424_221418_outLine +BABEL_BP_107_35896_20120426_160252_inLine +BABEL_BP_107_35896_20120426_160252_outLine +BABEL_BP_107_35932_20120321_221039_inLine +BABEL_BP_107_35932_20120321_221039_outLine +BABEL_BP_107_35951_20120415_161914_inLine +BABEL_BP_107_35951_20120415_161914_outLine +BABEL_BP_107_35972_20120510_232832_inLine +BABEL_BP_107_35972_20120510_232832_outLine +BABEL_BP_107_36143_20120217_012635_inLine +BABEL_BP_107_36143_20120217_012635_outLine +BABEL_BP_107_36143_20120217_175752_inLine +BABEL_BP_107_36143_20120217_175752_outLine +BABEL_BP_107_36155_20120421_014500_inLine +BABEL_BP_107_36155_20120421_014500_outLine +BABEL_BP_107_36868_20120426_234641_inLine +BABEL_BP_107_36868_20120426_234641_outLine +BABEL_BP_107_37094_20120208_155100_inLine +BABEL_BP_107_37094_20120208_155100_outLine +BABEL_BP_107_37185_20120608_122828_inLine +BABEL_BP_107_37185_20120608_122828_outLine +BABEL_BP_107_37203_20120409_183756_inLine +BABEL_BP_107_37203_20120409_183756_outLine +BABEL_BP_107_37260_20120509_024525_inLine +BABEL_BP_107_37260_20120509_024525_outLine +BABEL_BP_107_37348_20120506_234059_inLine +BABEL_BP_107_37348_20120506_234938_inLine +BABEL_BP_107_37348_20120507_000848_inLine +BABEL_BP_107_37348_20120507_000848_outLine +BABEL_BP_107_37766_20120608_155216_inLine +BABEL_BP_107_37766_20120608_155217_outLine +BABEL_BP_107_37784_20120509_195942_inLine +BABEL_BP_107_37784_20120509_195942_outLine +BABEL_BP_107_37842_20120513_023632_inLine +BABEL_BP_107_37842_20120513_023632_outLine +BABEL_BP_107_38635_20120424_231446_inLine +BABEL_BP_107_38635_20120424_231446_outLine +BABEL_BP_107_38863_20120614_173605_inLine +BABEL_BP_107_38863_20120614_173605_outLine +BABEL_BP_107_38863_20120614_174335_inLine +BABEL_BP_107_38863_20120614_174335_outLine +BABEL_BP_107_38863_20120614_175009_inLine +BABEL_BP_107_38863_20120614_175009_outLine +BABEL_BP_107_38985_20120506_223622_inLine +BABEL_BP_107_38985_20120506_223622_outLine +BABEL_BP_107_39098_20120324_231724_inLine +BABEL_BP_107_39098_20120324_231724_outLine +BABEL_BP_107_39098_20120324_232726_inLine +BABEL_BP_107_39098_20120324_232726_outLine +BABEL_BP_107_39114_20120614_184836_inLine +BABEL_BP_107_39114_20120614_184836_outLine +BABEL_BP_107_39287_20120611_013320_inLine +BABEL_BP_107_39287_20120611_013320_outLine +BABEL_BP_107_39809_20120216_013447_inLine +BABEL_BP_107_39809_20120216_013447_outLine +BABEL_BP_107_39889_20120325_135610_inLine +BABEL_BP_107_39889_20120325_135610_outLine +BABEL_BP_107_39963_20120323_223603_inLine +BABEL_BP_107_39963_20120323_223603_outLine +BABEL_BP_107_39968_20120609_221724_inLine +BABEL_BP_107_39968_20120609_221724_outLine +BABEL_BP_107_40040_20120506_220308_outLine +BABEL_BP_107_40168_20120420_180808_inLine +BABEL_BP_107_40168_20120420_180808_outLine +BABEL_BP_107_40197_20120504_174115_inLine +BABEL_BP_107_40197_20120504_174115_outLine +BABEL_BP_107_40809_20120627_194401_inLine +BABEL_BP_107_40809_20120627_194401_outLine +BABEL_BP_107_41075_20120416_005109_inLine +BABEL_BP_107_41075_20120416_005109_outLine +BABEL_BP_107_41512_20120704_113900_inLine +BABEL_BP_107_41512_20120704_113900_outLine +BABEL_BP_107_41561_20120704_233037_inLine +BABEL_BP_107_41561_20120704_233037_outLine +BABEL_BP_107_41686_20120217_004524_inLine +BABEL_BP_107_41686_20120217_004524_outLine +BABEL_BP_107_41733_20120429_210259_inLine +BABEL_BP_107_41733_20120429_210259_outLine +BABEL_BP_107_41949_20120430_155207_inLine +BABEL_BP_107_41949_20120430_155207_outLine +BABEL_BP_107_41989_20120321_185501_inLine +BABEL_BP_107_41989_20120321_185501_outLine +BABEL_BP_107_41989_20120321_190714_inLine +BABEL_BP_107_41989_20120321_190714_outLine +BABEL_BP_107_42212_20120704_203258_inLine +BABEL_BP_107_42212_20120704_203258_outLine +BABEL_BP_107_42229_20120216_204712_inLine +BABEL_BP_107_42229_20120216_204712_outLine +BABEL_BP_107_42420_20120705_031347_inLine +BABEL_BP_107_42420_20120705_031347_outLine +BABEL_BP_107_42768_20120503_180000_inLine +BABEL_BP_107_42768_20120503_180000_outLine +BABEL_BP_107_42788_20120421_142943_inLine +BABEL_BP_107_42788_20120421_142943_outLine +BABEL_BP_107_43317_20120510_000906_inLine +BABEL_BP_107_43317_20120510_000906_outLine +BABEL_BP_107_43383_20120404_222305_inLine +BABEL_BP_107_43383_20120404_222305_outLine +BABEL_BP_107_43991_20120429_013420_inLine +BABEL_BP_107_43991_20120429_013420_outLine +BABEL_BP_107_44023_20120430_233729_inLine +BABEL_BP_107_44023_20120430_233730_outLine +BABEL_BP_107_44038_20120704_200232_inLine +BABEL_BP_107_44038_20120704_200232_outLine +BABEL_BP_107_44117_20120704_023955_inLine +BABEL_BP_107_44117_20120704_023955_outLine +BABEL_BP_107_44209_20120418_205150_inLine +BABEL_BP_107_44209_20120418_205150_outLine +BABEL_BP_107_44500_20120421_220207_inLine +BABEL_BP_107_44500_20120421_220207_outLine +BABEL_BP_107_44649_20120429_012920_inLine +BABEL_BP_107_44649_20120429_012920_outLine +BABEL_BP_107_45106_20120118_183909_inLine +BABEL_BP_107_45106_20120118_183909_outLine +BABEL_BP_107_45145_20120215_141231_inLine +BABEL_BP_107_45145_20120215_141231_outLine +BABEL_BP_107_45214_20120418_132013_inLine +BABEL_BP_107_45214_20120418_132013_outLine +BABEL_BP_107_45472_20120210_160318_inLine +BABEL_BP_107_45472_20120210_160318_outLine +BABEL_BP_107_45642_20120211_232703_inLine +BABEL_BP_107_45642_20120211_232703_outLine +BABEL_BP_107_45655_20120218_191119_inLine +BABEL_BP_107_45655_20120218_191119_outLine +BABEL_BP_107_45678_20120210_172837_inLine +BABEL_BP_107_45678_20120210_172837_outLine +BABEL_BP_107_45681_20120517_003820_inLine +BABEL_BP_107_45681_20120517_003820_outLine +BABEL_BP_107_45786_20120502_200051_inLine +BABEL_BP_107_45786_20120502_200051_outLine +BABEL_BP_107_46269_20120616_171713_inLine +BABEL_BP_107_46269_20120616_171713_outLine +BABEL_BP_107_46409_20120429_201101_inLine +BABEL_BP_107_46409_20120429_201101_outLine +BABEL_BP_107_46427_20120516_213127_inLine +BABEL_BP_107_46427_20120516_213127_outLine +BABEL_BP_107_46593_20120429_172814_inLine +BABEL_BP_107_46593_20120429_172814_outLine +BABEL_BP_107_46813_20120503_214109_inLine +BABEL_BP_107_46813_20120503_214109_outLine +BABEL_BP_107_47185_20120417_000125_inLine +BABEL_BP_107_47185_20120417_000125_outLine +BABEL_BP_107_47249_20120704_173500_inLine +BABEL_BP_107_47249_20120704_173500_outLine +BABEL_BP_107_47429_20120614_125021_inLine +BABEL_BP_107_47429_20120614_125021_outLine +BABEL_BP_107_47469_20120409_195752_inLine +BABEL_BP_107_47469_20120409_195752_outLine +BABEL_BP_107_47634_20120405_165429_inLine +BABEL_BP_107_47634_20120405_165429_outLine +BABEL_BP_107_47733_20120508_112151_inLine +BABEL_BP_107_47733_20120508_112151_outLine +BABEL_BP_107_48061_20120420_003849_inLine +BABEL_BP_107_48061_20120420_003849_outLine +BABEL_BP_107_48061_20120420_005250_inLine +BABEL_BP_107_48061_20120420_005250_outLine +BABEL_BP_107_48072_20120218_181934_inLine +BABEL_BP_107_48072_20120218_181934_outLine +BABEL_BP_107_48072_20120218_183449_inLine +BABEL_BP_107_48072_20120218_183449_outLine +BABEL_BP_107_48317_20120423_021629_inLine +BABEL_BP_107_48317_20120423_021629_outLine +BABEL_BP_107_48404_20120704_162020_inLine +BABEL_BP_107_48404_20120704_162020_outLine +BABEL_BP_107_48410_20120329_220200_inLine +BABEL_BP_107_48410_20120329_220200_outLine +BABEL_BP_107_48536_20120214_212101_inLine +BABEL_BP_107_48536_20120214_212101_outLine +BABEL_BP_107_48645_20120421_221346_inLine +BABEL_BP_107_48645_20120421_221346_outLine +BABEL_BP_107_49042_20120408_181734_inLine +BABEL_BP_107_49042_20120408_181734_outLine +BABEL_BP_107_49173_20120505_204557_inLine +BABEL_BP_107_49173_20120505_204557_outLine +BABEL_BP_107_49306_20120524_204041_inLine +BABEL_BP_107_49306_20120524_204041_outLine +BABEL_BP_107_49624_20120618_024358_inLine +BABEL_BP_107_49624_20120618_024358_outLine +BABEL_BP_107_50101_20120208_164249_inLine +BABEL_BP_107_50101_20120208_164249_outLine +BABEL_BP_107_50101_20120208_170815_inLine +BABEL_BP_107_50101_20120208_170815_outLine +BABEL_BP_107_50416_20120517_120502_inLine +BABEL_BP_107_50416_20120517_120502_outLine +BABEL_BP_107_50555_20120428_205621_inLine +BABEL_BP_107_50555_20120428_205621_outLine +BABEL_BP_107_50597_20120516_212308_inLine +BABEL_BP_107_50597_20120516_212308_outLine +BABEL_BP_107_50763_20120220_151302_inLine +BABEL_BP_107_50763_20120220_151302_outLine +BABEL_BP_107_50915_20120608_150955_inLine +BABEL_BP_107_50915_20120608_150955_outLine +BABEL_BP_107_51149_20120514_203206_inLine +BABEL_BP_107_51149_20120514_203207_outLine +BABEL_BP_107_51791_20120517_004528_inLine +BABEL_BP_107_51791_20120517_004528_outLine +BABEL_BP_107_52024_20120414_193538_inLine +BABEL_BP_107_52024_20120414_193538_outLine +BABEL_BP_107_52325_20120418_011735_inLine +BABEL_BP_107_52325_20120418_011735_outLine +BABEL_BP_107_52446_20120212_002618_inLine +BABEL_BP_107_52446_20120212_002618_outLine +BABEL_BP_107_52515_20120324_020411_inLine +BABEL_BP_107_52515_20120324_020411_outLine +BABEL_BP_107_52606_20120617_195206_inLine +BABEL_BP_107_52606_20120617_195206_outLine +BABEL_BP_107_52642_20120517_000300_inLine +BABEL_BP_107_52642_20120517_000300_outLine +BABEL_BP_107_52691_20120617_160904_inLine +BABEL_BP_107_52691_20120617_160904_outLine +BABEL_BP_107_52900_20120320_150335_inLine +BABEL_BP_107_52900_20120320_150335_outLine +BABEL_BP_107_52913_20120704_121758_inLine +BABEL_BP_107_52913_20120704_121759_outLine +BABEL_BP_107_53179_20120618_003820_inLine +BABEL_BP_107_53179_20120618_003820_outLine +BABEL_BP_107_53278_20120508_192335_inLine +BABEL_BP_107_53278_20120508_192335_outLine +BABEL_BP_107_53352_20120504_210910_inLine +BABEL_BP_107_53352_20120504_210910_outLine +BABEL_BP_107_53429_20120704_123624_inLine +BABEL_BP_107_53429_20120704_123624_outLine +BABEL_BP_107_53500_20120416_172018_inLine +BABEL_BP_107_53500_20120416_172018_outLine +BABEL_BP_107_53989_20120703_234506_inLine +BABEL_BP_107_53989_20120703_234506_outLine +BABEL_BP_107_53989_20120703_235719_inLine +BABEL_BP_107_53989_20120703_235719_outLine +BABEL_BP_107_54339_20120506_215557_inLine +BABEL_BP_107_54339_20120506_215557_outLine +BABEL_BP_107_55100_20120417_210019_inLine +BABEL_BP_107_55100_20120417_210020_outLine +BABEL_BP_107_55121_20120504_003327_inLine +BABEL_BP_107_55121_20120504_003327_outLine +BABEL_BP_107_55144_20120321_012306_inLine +BABEL_BP_107_55144_20120321_012306_outLine +BABEL_BP_107_55399_20120215_193434_inLine +BABEL_BP_107_55399_20120215_193434_outLine +BABEL_BP_107_55450_20120424_185013_inLine +BABEL_BP_107_55450_20120424_185013_outLine +BABEL_BP_107_55678_20120323_211821_inLine +BABEL_BP_107_55678_20120323_211821_outLine +BABEL_BP_107_55786_20120322_173045_inLine +BABEL_BP_107_55786_20120322_173045_outLine +BABEL_BP_107_55820_20120411_162436_inLine +BABEL_BP_107_55820_20120411_162436_outLine +BABEL_BP_107_55823_20120608_172512_inLine +BABEL_BP_107_55823_20120608_172512_outLine +BABEL_BP_107_56342_20120419_132008_inLine +BABEL_BP_107_56342_20120419_132008_outLine +BABEL_BP_107_56591_20120418_002004_inLine +BABEL_BP_107_56591_20120418_002004_outLine +BABEL_BP_107_56868_20120406_013202_inLine +BABEL_BP_107_56868_20120406_013202_outLine +BABEL_BP_107_56943_20120222_201642_inLine +BABEL_BP_107_56943_20120222_201642_outLine +BABEL_BP_107_57071_20120527_184402_inLine +BABEL_BP_107_57071_20120527_184402_outLine +BABEL_BP_107_57277_20120503_200553_inLine +BABEL_BP_107_57277_20120503_200553_outLine +BABEL_BP_107_57551_20120325_225227_inLine +BABEL_BP_107_57551_20120325_225227_outLine +BABEL_BP_107_57609_20120430_223510_inLine +BABEL_BP_107_57609_20120430_223510_outLine +BABEL_BP_107_57625_20120506_021834_inLine +BABEL_BP_107_57625_20120506_021834_outLine +BABEL_BP_107_57724_20120212_213811_inLine +BABEL_BP_107_57724_20120212_213811_outLine +BABEL_BP_107_57907_20120608_160937_inLine +BABEL_BP_107_57907_20120608_160939_outLine +BABEL_BP_107_58157_20120608_181026_inLine +BABEL_BP_107_58157_20120608_181026_outLine +BABEL_BP_107_58413_20120418_134444_inLine +BABEL_BP_107_58413_20120418_134444_outLine +BABEL_BP_107_58923_20120210_190334_inLine +BABEL_BP_107_58923_20120210_190334_outLine +BABEL_BP_107_59028_20120523_205355_inLine +BABEL_BP_107_59028_20120523_205355_outLine +BABEL_BP_107_59147_20120215_152227_inLine +BABEL_BP_107_59147_20120215_152227_outLine +BABEL_BP_107_59544_20120406_170833_inLine +BABEL_BP_107_59544_20120406_170833_outLine +BABEL_BP_107_59671_20120322_225750_inLine +BABEL_BP_107_59671_20120322_225750_outLine +BABEL_BP_107_59746_20120414_161308_inLine +BABEL_BP_107_59746_20120414_161308_outLine +BABEL_BP_107_60250_20120218_193537_inLine +BABEL_BP_107_60250_20120218_193537_outLine +BABEL_BP_107_60848_20120704_171856_inLine +BABEL_BP_107_60848_20120704_171856_outLine +BABEL_BP_107_60995_20120704_234842_inLine +BABEL_BP_107_60995_20120704_234843_outLine +BABEL_BP_107_61203_20120217_182644_inLine +BABEL_BP_107_61203_20120217_182644_outLine +BABEL_BP_107_61762_20120217_131207_inLine +BABEL_BP_107_61762_20120217_131207_outLine +BABEL_BP_107_61822_20120405_153356_inLine +BABEL_BP_107_61822_20120405_153357_outLine +BABEL_BP_107_61936_20120704_141205_inLine +BABEL_BP_107_61936_20120704_141205_outLine +BABEL_BP_107_61988_20120406_134336_inLine +BABEL_BP_107_61988_20120406_134336_outLine +BABEL_BP_107_62286_20120429_193945_inLine +BABEL_BP_107_62286_20120429_193945_outLine +BABEL_BP_107_62589_20120423_001315_inLine +BABEL_BP_107_62589_20120423_001315_outLine +BABEL_BP_107_62589_20120423_002039_inLine +BABEL_BP_107_62589_20120423_002039_outLine +BABEL_BP_107_63320_20120608_012846_inLine +BABEL_BP_107_63320_20120608_012846_outLine +BABEL_BP_107_63491_20120502_145101_inLine +BABEL_BP_107_63491_20120502_145101_outLine +BABEL_BP_107_64185_20120504_161653_inLine +BABEL_BP_107_64185_20120504_161653_outLine +BABEL_BP_107_64404_20120206_185707_inLine +BABEL_BP_107_64404_20120206_185708_outLine +BABEL_BP_107_64661_20120325_212204_inLine +BABEL_BP_107_64661_20120325_212204_outLine +BABEL_BP_107_64946_20120517_001754_inLine +BABEL_BP_107_64946_20120517_001754_outLine +BABEL_BP_107_65069_20120421_135835_inLine +BABEL_BP_107_65069_20120421_135835_outLine +BABEL_BP_107_65371_20120507_195517_inLine +BABEL_BP_107_65371_20120507_195517_outLine +BABEL_BP_107_65415_20120220_153755_inLine +BABEL_BP_107_65415_20120220_153755_outLine +BABEL_BP_107_65443_20120220_152901_inLine +BABEL_BP_107_65443_20120220_152901_outLine +BABEL_BP_107_65601_20120417_001124_inLine +BABEL_BP_107_65601_20120417_001124_outLine +BABEL_BP_107_65629_20120322_191551_inLine +BABEL_BP_107_65629_20120322_191551_outLine +BABEL_BP_107_65656_20120503_233657_inLine +BABEL_BP_107_65656_20120503_233658_outLine +BABEL_BP_107_65717_20120414_151906_inLine +BABEL_BP_107_65717_20120414_151906_outLine +BABEL_BP_107_65783_20120429_153408_inLine +BABEL_BP_107_65783_20120429_153408_outLine +BABEL_BP_107_65923_20120420_093839_inLine +BABEL_BP_107_65923_20120420_093839_outLine +BABEL_BP_107_66082_20120608_111438_inLine +BABEL_BP_107_66082_20120608_111438_outLine +BABEL_BP_107_66101_20120426_213014_inLine +BABEL_BP_107_66101_20120426_213014_outLine +BABEL_BP_107_66103_20120505_220240_inLine +BABEL_BP_107_66103_20120505_220240_outLine +BABEL_BP_107_66275_20120503_171050_inLine +BABEL_BP_107_66275_20120503_171050_outLine +BABEL_BP_107_66330_20120502_001133_inLine +BABEL_BP_107_66330_20120502_001133_outLine +BABEL_BP_107_66441_20120324_190814_inLine +BABEL_BP_107_66441_20120324_190814_outLine +BABEL_BP_107_66668_20120212_211947_inLine +BABEL_BP_107_66668_20120212_211947_outLine +BABEL_BP_107_66784_20120616_151422_inLine +BABEL_BP_107_66784_20120616_151422_outLine +BABEL_BP_107_66798_20120404_220411_inLine +BABEL_BP_107_66798_20120404_220411_outLine +BABEL_BP_107_67150_20120618_004347_inLine +BABEL_BP_107_67150_20120618_004347_outLine +BABEL_BP_107_67411_20120322_141025_inLine +BABEL_BP_107_67411_20120322_141025_outLine +BABEL_BP_107_67733_20120215_120553_inLine +BABEL_BP_107_67733_20120215_120553_outLine +BABEL_BP_107_67775_20120502_193035_inLine +BABEL_BP_107_67775_20120502_193035_outLine +BABEL_BP_107_68028_20120502_121140_inLine +BABEL_BP_107_68028_20120502_121140_outLine +BABEL_BP_107_68136_20120416_173551_inLine +BABEL_BP_107_68136_20120416_173551_outLine +BABEL_BP_107_68239_20120608_131431_inLine +BABEL_BP_107_68239_20120608_131431_outLine +BABEL_BP_107_68337_20120404_230000_inLine +BABEL_BP_107_68337_20120404_230000_outLine +BABEL_BP_107_68861_20120323_171053_inLine +BABEL_BP_107_68861_20120323_171053_outLine +BABEL_BP_107_68861_20120323_180450_inLine +BABEL_BP_107_68861_20120323_180450_outLine +BABEL_BP_107_69052_20120417_002628_inLine +BABEL_BP_107_69052_20120417_002628_outLine +BABEL_BP_107_69230_20120703_133459_inLine +BABEL_BP_107_69230_20120703_133459_outLine +BABEL_BP_107_69236_20120214_230344_inLine +BABEL_BP_107_69236_20120214_230344_outLine +BABEL_BP_107_69368_20120211_170226_inLine +BABEL_BP_107_69368_20120211_170226_outLine +BABEL_BP_107_69446_20120416_020122_inLine +BABEL_BP_107_69446_20120416_020122_outLine +BABEL_BP_107_69473_20120605_230319_inLine +BABEL_BP_107_69473_20120605_230319_outLine +BABEL_BP_107_69548_20120213_023955_inLine +BABEL_BP_107_69548_20120213_023955_outLine +BABEL_BP_107_69621_20120213_130748_inLine +BABEL_BP_107_69621_20120213_130748_outLine +BABEL_BP_107_69650_20120323_023553_inLine +BABEL_BP_107_69650_20120323_023553_outLine +BABEL_BP_107_69764_20120324_234039_inLine +BABEL_BP_107_69764_20120324_234039_outLine +BABEL_BP_107_70643_20120427_194211_inLine +BABEL_BP_107_70643_20120427_194211_outLine +BABEL_BP_107_70680_20120201_144426_inLine +BABEL_BP_107_70680_20120201_144426_outLine +BABEL_BP_107_70965_20120506_175829_inLine +BABEL_BP_107_70965_20120506_175829_outLine +BABEL_BP_107_71160_20120616_001355_inLine +BABEL_BP_107_71160_20120616_001355_outLine +BABEL_BP_107_72011_20120704_231031_inLine +BABEL_BP_107_72011_20120704_231031_outLine +BABEL_BP_107_72141_20120322_223344_inLine +BABEL_BP_107_72141_20120322_223344_outLine +BABEL_BP_107_72234_20120511_134939_inLine +BABEL_BP_107_72234_20120511_134939_outLine +BABEL_BP_107_72234_20120511_140008_inLine +BABEL_BP_107_72234_20120511_140008_outLine +BABEL_BP_107_72746_20120429_003515_inLine +BABEL_BP_107_72746_20120429_003515_outLine +BABEL_BP_107_72799_20120425_133035_inLine +BABEL_BP_107_72799_20120425_133035_outLine +BABEL_BP_107_72907_20120505_105259_inLine +BABEL_BP_107_72907_20120505_105259_outLine +BABEL_BP_107_73050_20120426_114239_inLine +BABEL_BP_107_73050_20120426_114239_outLine +BABEL_BP_107_73059_20120425_012258_inLine +BABEL_BP_107_73059_20120425_012258_outLine +BABEL_BP_107_73072_20120322_141121_inLine +BABEL_BP_107_73072_20120322_141121_outLine +BABEL_BP_107_73122_20120501_124450_inLine +BABEL_BP_107_73122_20120501_124450_outLine +BABEL_BP_107_73170_20120322_151236_inLine +BABEL_BP_107_73170_20120322_151236_outLine +BABEL_BP_107_73780_20120613_200802_inLine +BABEL_BP_107_73780_20120613_200802_outLine +BABEL_BP_107_73786_20120323_222826_inLine +BABEL_BP_107_73786_20120323_222826_outLine +BABEL_BP_107_73923_20120118_183938_inLine +BABEL_BP_107_73923_20120118_183938_outLine +BABEL_BP_107_74368_20120424_185039_inLine +BABEL_BP_107_74368_20120424_185039_outLine +BABEL_BP_107_74508_20120418_002925_inLine +BABEL_BP_107_74508_20120418_002925_outLine +BABEL_BP_107_74607_20120426_001241_inLine +BABEL_BP_107_74607_20120426_001241_outLine +BABEL_BP_107_74884_20120323_135739_inLine +BABEL_BP_107_74884_20120323_135739_outLine +BABEL_BP_107_75151_20120611_195147_inLine +BABEL_BP_107_75151_20120611_195147_outLine +BABEL_BP_107_75354_20120506_150750_inLine +BABEL_BP_107_75354_20120506_150750_outLine +BABEL_BP_107_75740_20120216_215302_inLine +BABEL_BP_107_75740_20120216_215302_outLine +BABEL_BP_107_75871_20120214_025447_inLine +BABEL_BP_107_75871_20120214_025447_outLine +BABEL_BP_107_75932_20120419_222819_inLine +BABEL_BP_107_75932_20120419_222819_outLine +BABEL_BP_107_76002_20120608_001301_inLine +BABEL_BP_107_76002_20120608_001301_outLine +BABEL_BP_107_76331_20120417_020306_inLine +BABEL_BP_107_76331_20120417_020306_outLine +BABEL_BP_107_76333_20120418_131111_inLine +BABEL_BP_107_76333_20120418_131111_outLine +BABEL_BP_107_76745_20120608_120713_inLine +BABEL_BP_107_76745_20120608_120713_outLine +BABEL_BP_107_77137_20120424_021726_inLine +BABEL_BP_107_77137_20120424_021726_outLine +BABEL_BP_107_77342_20120613_025311_inLine +BABEL_BP_107_77342_20120613_025311_outLine +BABEL_BP_107_77465_20120422_011705_inLine +BABEL_BP_107_77465_20120422_011705_outLine +BABEL_BP_107_77483_20120412_193453_inLine +BABEL_BP_107_77483_20120412_193453_outLine +BABEL_BP_107_77485_20120612_135036_inLine +BABEL_BP_107_77485_20120612_135036_outLine +BABEL_BP_107_77584_20120411_172119_inLine +BABEL_BP_107_77584_20120411_172119_outLine +BABEL_BP_107_77811_20120616_161504_inLine +BABEL_BP_107_77811_20120616_161504_outLine +BABEL_BP_107_77965_20120215_010556_inLine +BABEL_BP_107_77965_20120215_010556_outLine +BABEL_BP_107_78046_20120508_124043_inLine +BABEL_BP_107_78046_20120508_124043_outLine +BABEL_BP_107_78114_20120418_223932_inLine +BABEL_BP_107_78114_20120418_223932_outLine +BABEL_BP_107_78114_20120418_225258_inLine +BABEL_BP_107_78114_20120418_225258_outLine +BABEL_BP_107_78245_20120321_225726_inLine +BABEL_BP_107_78245_20120321_225726_outLine +BABEL_BP_107_78290_20120425_225137_inLine +BABEL_BP_107_78290_20120425_225137_outLine +BABEL_BP_107_78583_20120505_001318_inLine +BABEL_BP_107_78583_20120505_001318_outLine +BABEL_BP_107_78728_20120320_163004_inLine +BABEL_BP_107_78728_20120320_163004_outLine +BABEL_BP_107_78879_20120322_210341_inLine +BABEL_BP_107_78879_20120322_210341_outLine +BABEL_BP_107_79618_20120322_195037_inLine +BABEL_BP_107_79618_20120322_195037_outLine +BABEL_BP_107_79698_20120614_142804_inLine +BABEL_BP_107_79698_20120614_142804_outLine +BABEL_BP_107_79899_20120507_153432_inLine +BABEL_BP_107_79899_20120507_153432_outLine +BABEL_BP_107_80068_20120419_172811_inLine +BABEL_BP_107_80068_20120419_172811_outLine +BABEL_BP_107_80075_20120418_223142_inLine +BABEL_BP_107_80075_20120418_223142_outLine +BABEL_BP_107_80156_20120325_205810_inLine +BABEL_BP_107_80156_20120325_205810_outLine +BABEL_BP_107_80195_20120328_024036_inLine +BABEL_BP_107_80195_20120328_024036_outLine +BABEL_BP_107_80247_20120429_181855_inLine +BABEL_BP_107_80247_20120429_181855_outLine +BABEL_BP_107_80856_20120325_214845_inLine +BABEL_BP_107_80856_20120325_214845_outLine +BABEL_BP_107_81015_20120418_212020_inLine +BABEL_BP_107_81015_20120418_212020_outLine +BABEL_BP_107_81070_20120612_140617_inLine +BABEL_BP_107_81070_20120612_140617_outLine +BABEL_BP_107_81084_20120328_220200_inLine +BABEL_BP_107_81084_20120328_220200_outLine +BABEL_BP_107_81119_20120418_221853_inLine +BABEL_BP_107_81119_20120418_221853_outLine +BABEL_BP_107_81261_20120324_015429_inLine +BABEL_BP_107_81261_20120324_015429_outLine +BABEL_BP_107_81587_20120429_185902_inLine +BABEL_BP_107_81587_20120429_185902_outLine +BABEL_BP_107_81642_20120504_013042_inLine +BABEL_BP_107_81642_20120504_013042_outLine +BABEL_BP_107_81647_20120425_231333_inLine +BABEL_BP_107_81647_20120425_231333_outLine +BABEL_BP_107_81799_20120506_220843_inLine +BABEL_BP_107_81799_20120506_220843_outLine +BABEL_BP_107_81820_20120506_004426_inLine +BABEL_BP_107_81820_20120506_004426_outLine +BABEL_BP_107_81944_20120607_131513_inLine +BABEL_BP_107_81944_20120607_131513_outLine +BABEL_BP_107_82009_20120503_174403_inLine +BABEL_BP_107_82009_20120503_174403_outLine +BABEL_BP_107_82023_20120217_190453_inLine +BABEL_BP_107_82023_20120217_190453_outLine +BABEL_BP_107_82408_20120216_020857_inLine +BABEL_BP_107_82408_20120216_020857_outLine +BABEL_BP_107_82409_20120507_104757_inLine +BABEL_BP_107_82409_20120507_104757_outLine +BABEL_BP_107_82443_20120705_035534_inLine +BABEL_BP_107_82443_20120705_035535_outLine +BABEL_BP_107_82484_20120409_191254_inLine +BABEL_BP_107_82484_20120409_191254_outLine +BABEL_BP_107_82881_20120212_142555_inLine +BABEL_BP_107_82881_20120212_142555_outLine +BABEL_BP_107_83186_20120414_181142_inLine +BABEL_BP_107_83186_20120414_181142_outLine +BABEL_BP_107_83493_20120509_144229_inLine +BABEL_BP_107_83493_20120509_144229_outLine +BABEL_BP_107_83585_20120429_194403_inLine +BABEL_BP_107_83585_20120429_194403_outLine +BABEL_BP_107_83791_20120329_034633_inLine +BABEL_BP_107_83791_20120329_034633_outLine +BABEL_BP_107_84394_20120426_000543_inLine +BABEL_BP_107_84394_20120426_000543_outLine +BABEL_BP_107_84394_20120426_001306_inLine +BABEL_BP_107_84394_20120426_001306_outLine +BABEL_BP_107_84439_20120418_011204_inLine +BABEL_BP_107_84439_20120418_011204_outLine +BABEL_BP_107_84491_20120430_203802_inLine +BABEL_BP_107_84491_20120430_203802_outLine +BABEL_BP_107_84608_20120421_181859_inLine +BABEL_BP_107_84608_20120421_181859_outLine +BABEL_BP_107_84700_20120501_125141_inLine +BABEL_BP_107_84700_20120501_125141_outLine +BABEL_BP_107_84865_20120618_002645_inLine +BABEL_BP_107_84865_20120618_002645_outLine +BABEL_BP_107_84916_20120427_012731_inLine +BABEL_BP_107_84916_20120427_012731_outLine +BABEL_BP_107_84980_20120419_172354_inLine +BABEL_BP_107_84980_20120419_172354_outLine +BABEL_BP_107_85719_20120423_181434_inLine +BABEL_BP_107_85719_20120423_181434_outLine +BABEL_BP_107_85752_20120607_210520_inLine +BABEL_BP_107_85752_20120607_210520_outLine +BABEL_BP_107_85948_20120212_131910_inLine +BABEL_BP_107_85948_20120212_131910_outLine +BABEL_BP_107_86004_20120324_175639_inLine +BABEL_BP_107_86004_20120324_175639_outLine +BABEL_BP_107_86900_20120216_203256_inLine +BABEL_BP_107_86900_20120216_203256_outLine +BABEL_BP_107_86956_20120322_203435_inLine +BABEL_BP_107_86956_20120322_203435_outLine +BABEL_BP_107_87059_20120704_001703_inLine +BABEL_BP_107_87059_20120704_001703_outLine +BABEL_BP_107_87077_20120421_193746_inLine +BABEL_BP_107_87077_20120421_193746_outLine +BABEL_BP_107_87107_20120321_205615_inLine +BABEL_BP_107_87107_20120321_205615_outLine +BABEL_BP_107_87107_20120321_234308_inLine +BABEL_BP_107_87107_20120321_234308_outLine +BABEL_BP_107_87234_20120704_120118_inLine +BABEL_BP_107_87234_20120704_120118_outLine +BABEL_BP_107_87351_20120330_014139_inLine +BABEL_BP_107_87351_20120330_014139_outLine +BABEL_BP_107_87520_20120414_023319_inLine +BABEL_BP_107_87520_20120414_023319_outLine +BABEL_BP_107_87607_20120516_233058_inLine +BABEL_BP_107_87607_20120516_233058_outLine +BABEL_BP_107_87634_20120208_165319_inLine +BABEL_BP_107_87634_20120208_165319_outLine +BABEL_BP_107_87961_20120324_022603_inLine +BABEL_BP_107_87961_20120324_022603_outLine +BABEL_BP_107_88245_20120511_235523_inLine +BABEL_BP_107_88245_20120511_235523_outLine +BABEL_BP_107_88385_20120502_200409_inLine +BABEL_BP_107_88385_20120502_200409_outLine +BABEL_BP_107_88385_20120502_201320_inLine +BABEL_BP_107_88385_20120502_201320_outLine +BABEL_BP_107_88464_20120503_003553_inLine +BABEL_BP_107_88464_20120503_003553_outLine +BABEL_BP_107_88932_20120417_195406_inLine +BABEL_BP_107_88932_20120417_195406_outLine +BABEL_BP_107_88982_20120506_154243_inLine +BABEL_BP_107_88982_20120506_154243_outLine +BABEL_BP_107_89301_20120429_183901_inLine +BABEL_BP_107_89301_20120429_183901_outLine +BABEL_BP_107_89345_20120322_214445_inLine +BABEL_BP_107_89345_20120322_214445_outLine +BABEL_BP_107_89345_20120322_220001_inLine +BABEL_BP_107_89345_20120322_220001_outLine +BABEL_BP_107_89573_20120422_162720_inLine +BABEL_BP_107_89573_20120422_162720_outLine +BABEL_BP_107_89583_20120425_142134_inLine +BABEL_BP_107_89583_20120425_142134_outLine +BABEL_BP_107_89867_20120324_204851_inLine +BABEL_BP_107_89867_20120324_204851_outLine +BABEL_BP_107_90046_20120613_193455_inLine +BABEL_BP_107_90046_20120613_193455_outLine +BABEL_BP_107_90055_20120220_173056_inLine +BABEL_BP_107_90055_20120220_173056_outLine +BABEL_BP_107_90127_20120429_190926_inLine +BABEL_BP_107_90127_20120429_190926_outLine +BABEL_BP_107_90389_20120510_233725_inLine +BABEL_BP_107_90389_20120510_233725_outLine +BABEL_BP_107_90436_20120507_172546_inLine +BABEL_BP_107_90436_20120507_172546_outLine +BABEL_BP_107_90511_20120212_010634_inLine +BABEL_BP_107_90511_20120212_010634_outLine +BABEL_BP_107_90730_20120627_132153_inLine +BABEL_BP_107_90730_20120627_132153_outLine +BABEL_BP_107_90730_20120627_133239_inLine +BABEL_BP_107_90730_20120627_133239_outLine +BABEL_BP_107_90810_20120217_200922_inLine +BABEL_BP_107_90810_20120217_200922_outLine +BABEL_BP_107_90834_20120212_143912_inLine +BABEL_BP_107_90834_20120212_143912_outLine +BABEL_BP_107_91143_20120422_002758_inLine +BABEL_BP_107_91143_20120422_002758_outLine +BABEL_BP_107_91171_20120414_012621_inLine +BABEL_BP_107_91171_20120414_012621_outLine +BABEL_BP_107_91386_20120703_235839_inLine +BABEL_BP_107_91386_20120703_235839_outLine +BABEL_BP_107_91677_20120422_141358_inLine +BABEL_BP_107_91677_20120422_141358_outLine +BABEL_BP_107_91703_20120617_235231_inLine +BABEL_BP_107_92308_20120430_133906_inLine +BABEL_BP_107_92308_20120430_133906_outLine +BABEL_BP_107_92642_20120211_005506_inLine +BABEL_BP_107_92642_20120211_005506_outLine +BABEL_BP_107_92752_20120421_184804_inLine +BABEL_BP_107_92752_20120421_184805_outLine +BABEL_BP_107_92820_20120617_124233_inLine +BABEL_BP_107_92820_20120617_124233_outLine +BABEL_BP_107_92852_20120418_234454_inLine +BABEL_BP_107_92852_20120418_234454_outLine +BABEL_BP_107_93000_20120325_233431_inLine +BABEL_BP_107_93000_20120325_233431_outLine +BABEL_BP_107_93151_20120501_140536_inLine +BABEL_BP_107_93151_20120501_140536_outLine +BABEL_BP_107_93192_20120322_180400_inLine +BABEL_BP_107_93192_20120322_180400_outLine +BABEL_BP_107_93277_20120510_183523_inLine +BABEL_BP_107_93277_20120510_183523_outLine +BABEL_BP_107_93314_20120501_134510_inLine +BABEL_BP_107_93314_20120501_134510_outLine +BABEL_BP_107_93436_20120611_021137_inLine +BABEL_BP_107_93607_20120418_014651_inLine +BABEL_BP_107_93607_20120418_014651_outLine +BABEL_BP_107_93643_20120212_175939_inLine +BABEL_BP_107_93643_20120212_175939_outLine +BABEL_BP_107_93811_20120418_213351_inLine +BABEL_BP_107_93811_20120418_213351_outLine +BABEL_BP_107_94168_20120326_171855_inLine +BABEL_BP_107_94168_20120326_171855_outLine +BABEL_BP_107_94235_20120428_004200_inLine +BABEL_BP_107_94235_20120428_004200_outLine +BABEL_BP_107_94752_20120218_144213_inLine +BABEL_BP_107_94752_20120218_144213_outLine +BABEL_BP_107_95350_20120325_000241_inLine +BABEL_BP_107_95350_20120325_000241_outLine +BABEL_BP_107_95534_20120608_005148_inLine +BABEL_BP_107_95534_20120608_005148_outLine +BABEL_BP_107_95650_20120208_163126_inLine +BABEL_BP_107_95650_20120208_163126_outLine +BABEL_BP_107_95736_20120323_154852_inLine +BABEL_BP_107_95736_20120323_154852_outLine +BABEL_BP_107_95849_20120704_011515_inLine +BABEL_BP_107_95849_20120704_011515_outLine +BABEL_BP_107_95893_20120501_114843_inLine +BABEL_BP_107_95893_20120501_114843_outLine +BABEL_BP_107_95952_20120607_145525_inLine +BABEL_BP_107_95952_20120607_145526_outLine +BABEL_BP_107_96108_20120421_194651_inLine +BABEL_BP_107_96108_20120421_194651_outLine +BABEL_BP_107_96347_20120212_202200_inLine +BABEL_BP_107_96347_20120212_202200_outLine +BABEL_BP_107_96463_20120507_233133_inLine +BABEL_BP_107_96463_20120507_233133_outLine +BABEL_BP_107_96636_20120421_193514_inLine +BABEL_BP_107_96636_20120421_193514_outLine +BABEL_BP_107_96636_20120421_195252_inLine +BABEL_BP_107_96636_20120421_195252_outLine +BABEL_BP_107_96788_20120409_195914_inLine +BABEL_BP_107_96788_20120409_195914_outLine +BABEL_BP_107_97004_20120704_194048_inLine +BABEL_BP_107_97004_20120704_194048_outLine +BABEL_BP_107_97230_20120612_142451_inLine +BABEL_BP_107_97230_20120612_142451_outLine +BABEL_BP_107_97254_20120422_153600_inLine +BABEL_BP_107_97254_20120422_153600_outLine +BABEL_BP_107_97298_20120704_201748_inLine +BABEL_BP_107_97298_20120704_201748_outLine +BABEL_BP_107_97590_20120616_165917_inLine +BABEL_BP_107_97590_20120616_165917_outLine +BABEL_BP_107_97635_20120617_233435_inLine +BABEL_BP_107_97635_20120617_233435_outLine +BABEL_BP_107_97699_20120618_005543_inLine +BABEL_BP_107_97699_20120618_005543_outLine +BABEL_BP_107_97797_20120617_234645_inLine +BABEL_BP_107_97797_20120617_234645_outLine +BABEL_BP_107_97941_20120423_201113_inLine +BABEL_BP_107_97941_20120423_201113_outLine +BABEL_BP_107_97941_20120423_201934_inLine +BABEL_BP_107_97941_20120423_201934_outLine +BABEL_BP_107_98279_20120509_172421_inLine +BABEL_BP_107_98279_20120509_172421_outLine +BABEL_BP_107_98762_20120612_160310_inLine +BABEL_BP_107_98762_20120612_160310_outLine +BABEL_BP_107_99514_20120505_142249_inLine +BABEL_BP_107_99514_20120505_142249_outLine +BABEL_BP_107_99697_20120424_211952_inLine +BABEL_BP_107_99697_20120424_211952_outLine +BABEL_BP_107_99709_20120510_011731_inLine +BABEL_BP_107_99709_20120510_011731_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/evalpart1.list b/egs/babel/s5d/conf/lists/107-vietnamese/evalpart1.list new file mode 100644 index 00000000000..81896827fbf --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/evalpart1.list @@ -0,0 +1,194 @@ +BABEL_BP_107_11203_20120415_212056_inLine +BABEL_BP_107_11203_20120415_212056_outLine +BABEL_BP_107_11824_20120413_213002_inLine +BABEL_BP_107_11824_20120413_213002_outLine +BABEL_BP_107_14389_20120617_164138_inLine +BABEL_BP_107_14389_20120617_164138_outLine +BABEL_BP_107_14874_20120320_190424_inLine +BABEL_BP_107_14874_20120320_190424_outLine +BABEL_BP_107_14874_20120320_192210_inLine +BABEL_BP_107_14874_20120320_192210_outLine +BABEL_BP_107_15859_20120419_133516_inLine +BABEL_BP_107_15859_20120419_133516_outLine +BABEL_BP_107_17900_20120323_015142_inLine +BABEL_BP_107_17900_20120323_015142_outLine +BABEL_BP_107_20685_20120222_210447_inLine +BABEL_BP_107_20685_20120222_210447_outLine +BABEL_BP_107_20775_20120502_214146_inLine +BABEL_BP_107_20775_20120502_214146_outLine +BABEL_BP_107_22566_20120704_023628_inLine +BABEL_BP_107_22566_20120704_023628_outLine +BABEL_BP_107_24379_20120422_173418_inLine +BABEL_BP_107_24379_20120422_173418_outLine +BABEL_BP_107_24431_20120215_202205_inLine +BABEL_BP_107_24431_20120215_202205_outLine +BABEL_BP_107_25502_20120217_005526_inLine +BABEL_BP_107_25502_20120217_005526_outLine +BABEL_BP_107_25871_20120422_181122_inLine +BABEL_BP_107_25871_20120422_181122_outLine +BABEL_BP_107_27605_20120329_015050_inLine +BABEL_BP_107_27605_20120329_015050_outLine +BABEL_BP_107_27645_20120501_005559_inLine +BABEL_BP_107_27645_20120501_005559_outLine +BABEL_BP_107_28754_20120417_233136_inLine +BABEL_BP_107_28754_20120417_233136_outLine +BABEL_BP_107_29133_20120212_223742_inLine +BABEL_BP_107_29133_20120212_223742_outLine +BABEL_BP_107_29512_20120426_133304_inLine +BABEL_BP_107_29512_20120426_133304_outLine +BABEL_BP_107_31256_20120424_173937_inLine +BABEL_BP_107_31256_20120424_173937_outLine +BABEL_BP_107_32452_20120417_025731_inLine +BABEL_BP_107_32452_20120417_025731_outLine +BABEL_BP_107_32830_20120217_010905_inLine +BABEL_BP_107_32830_20120217_010905_outLine +BABEL_BP_107_32962_20120417_002922_inLine +BABEL_BP_107_32962_20120417_002922_outLine +BABEL_BP_107_34357_20120608_192929_inLine +BABEL_BP_107_34357_20120608_192929_outLine +BABEL_BP_107_34439_20120514_155943_inLine +BABEL_BP_107_34439_20120514_155943_outLine +BABEL_BP_107_35896_20120426_160252_inLine +BABEL_BP_107_35896_20120426_160252_outLine +BABEL_BP_107_36143_20120217_012635_inLine +BABEL_BP_107_36143_20120217_012635_outLine +BABEL_BP_107_36143_20120217_175752_inLine +BABEL_BP_107_36143_20120217_175752_outLine +BABEL_BP_107_37185_20120608_122828_inLine +BABEL_BP_107_37185_20120608_122828_outLine +BABEL_BP_107_37842_20120513_023632_inLine +BABEL_BP_107_37842_20120513_023632_outLine +BABEL_BP_107_38635_20120424_231446_inLine +BABEL_BP_107_38635_20120424_231446_outLine +BABEL_BP_107_38863_20120614_173605_inLine +BABEL_BP_107_38863_20120614_173605_outLine +BABEL_BP_107_38863_20120614_174335_inLine +BABEL_BP_107_38863_20120614_174335_outLine +BABEL_BP_107_38863_20120614_175009_inLine +BABEL_BP_107_38863_20120614_175009_outLine +BABEL_BP_107_41989_20120321_185501_inLine +BABEL_BP_107_41989_20120321_185501_outLine +BABEL_BP_107_41989_20120321_190714_inLine +BABEL_BP_107_41989_20120321_190714_outLine +BABEL_BP_107_42212_20120704_203258_inLine +BABEL_BP_107_42212_20120704_203258_outLine +BABEL_BP_107_42768_20120503_180000_inLine +BABEL_BP_107_42768_20120503_180000_outLine +BABEL_BP_107_43991_20120429_013420_inLine +BABEL_BP_107_43991_20120429_013420_outLine +BABEL_BP_107_44117_20120704_023955_inLine +BABEL_BP_107_44117_20120704_023955_outLine +BABEL_BP_107_45106_20120118_183909_inLine +BABEL_BP_107_45106_20120118_183909_outLine +BABEL_BP_107_45786_20120502_200051_inLine +BABEL_BP_107_45786_20120502_200051_outLine +BABEL_BP_107_46427_20120516_213127_inLine +BABEL_BP_107_46427_20120516_213127_outLine +BABEL_BP_107_46813_20120503_214109_inLine +BABEL_BP_107_46813_20120503_214109_outLine +BABEL_BP_107_47185_20120417_000125_inLine +BABEL_BP_107_47185_20120417_000125_outLine +BABEL_BP_107_47249_20120704_173500_inLine +BABEL_BP_107_47249_20120704_173500_outLine +BABEL_BP_107_48404_20120704_162020_inLine +BABEL_BP_107_48404_20120704_162020_outLine +BABEL_BP_107_50915_20120608_150955_inLine +BABEL_BP_107_50915_20120608_150955_outLine +BABEL_BP_107_51791_20120517_004528_inLine +BABEL_BP_107_51791_20120517_004528_outLine +BABEL_BP_107_52024_20120414_193538_inLine +BABEL_BP_107_52024_20120414_193538_outLine +BABEL_BP_107_52691_20120617_160904_inLine +BABEL_BP_107_52691_20120617_160904_outLine +BABEL_BP_107_52900_20120320_150335_inLine +BABEL_BP_107_52900_20120320_150335_outLine +BABEL_BP_107_53278_20120508_192335_inLine +BABEL_BP_107_53278_20120508_192335_outLine +BABEL_BP_107_55121_20120504_003327_inLine +BABEL_BP_107_55121_20120504_003327_outLine +BABEL_BP_107_55678_20120323_211821_inLine +BABEL_BP_107_55678_20120323_211821_outLine +BABEL_BP_107_56342_20120419_132008_inLine +BABEL_BP_107_56342_20120419_132008_outLine +BABEL_BP_107_57551_20120325_225227_inLine +BABEL_BP_107_57551_20120325_225227_outLine +BABEL_BP_107_57625_20120506_021834_inLine +BABEL_BP_107_57625_20120506_021834_outLine +BABEL_BP_107_59671_20120322_225750_inLine +BABEL_BP_107_59671_20120322_225750_outLine +BABEL_BP_107_60250_20120218_193537_inLine +BABEL_BP_107_60250_20120218_193537_outLine +BABEL_BP_107_61988_20120406_134336_inLine +BABEL_BP_107_61988_20120406_134336_outLine +BABEL_BP_107_63491_20120502_145101_inLine +BABEL_BP_107_63491_20120502_145101_outLine +BABEL_BP_107_65415_20120220_153755_inLine +BABEL_BP_107_65415_20120220_153755_outLine +BABEL_BP_107_65783_20120429_153408_inLine +BABEL_BP_107_65783_20120429_153408_outLine +BABEL_BP_107_66784_20120616_151422_inLine +BABEL_BP_107_66784_20120616_151422_outLine +BABEL_BP_107_68337_20120404_230000_inLine +BABEL_BP_107_68337_20120404_230000_outLine +BABEL_BP_107_69236_20120214_230344_inLine +BABEL_BP_107_69236_20120214_230344_outLine +BABEL_BP_107_70643_20120427_194211_inLine +BABEL_BP_107_70643_20120427_194211_outLine +BABEL_BP_107_72011_20120704_231031_inLine +BABEL_BP_107_72011_20120704_231031_outLine +BABEL_BP_107_73122_20120501_124450_inLine +BABEL_BP_107_73122_20120501_124450_outLine +BABEL_BP_107_75932_20120419_222819_inLine +BABEL_BP_107_75932_20120419_222819_outLine +BABEL_BP_107_76002_20120608_001301_inLine +BABEL_BP_107_76002_20120608_001301_outLine +BABEL_BP_107_76745_20120608_120713_inLine +BABEL_BP_107_76745_20120608_120713_outLine +BABEL_BP_107_78245_20120321_225726_inLine +BABEL_BP_107_78245_20120321_225726_outLine +BABEL_BP_107_79618_20120322_195037_inLine +BABEL_BP_107_79618_20120322_195037_outLine +BABEL_BP_107_79698_20120614_142804_inLine +BABEL_BP_107_79698_20120614_142804_outLine +BABEL_BP_107_80247_20120429_181855_inLine +BABEL_BP_107_80247_20120429_181855_outLine +BABEL_BP_107_81261_20120324_015429_inLine +BABEL_BP_107_81261_20120324_015429_outLine +BABEL_BP_107_81642_20120504_013042_inLine +BABEL_BP_107_81642_20120504_013042_outLine +BABEL_BP_107_81647_20120425_231333_inLine +BABEL_BP_107_81647_20120425_231333_outLine +BABEL_BP_107_81944_20120607_131513_inLine +BABEL_BP_107_81944_20120607_131513_outLine +BABEL_BP_107_83186_20120414_181142_inLine +BABEL_BP_107_83186_20120414_181142_outLine +BABEL_BP_107_84700_20120501_125141_inLine +BABEL_BP_107_84700_20120501_125141_outLine +BABEL_BP_107_84916_20120427_012731_inLine +BABEL_BP_107_84916_20120427_012731_outLine +BABEL_BP_107_85719_20120423_181434_inLine +BABEL_BP_107_85719_20120423_181434_outLine +BABEL_BP_107_87634_20120208_165319_inLine +BABEL_BP_107_87634_20120208_165319_outLine +BABEL_BP_107_88385_20120502_200409_inLine +BABEL_BP_107_88385_20120502_200409_outLine +BABEL_BP_107_88385_20120502_201320_inLine +BABEL_BP_107_88385_20120502_201320_outLine +BABEL_BP_107_92642_20120211_005506_inLine +BABEL_BP_107_92642_20120211_005506_outLine +BABEL_BP_107_92852_20120418_234454_inLine +BABEL_BP_107_92852_20120418_234454_outLine +BABEL_BP_107_93277_20120510_183523_inLine +BABEL_BP_107_93277_20120510_183523_outLine +BABEL_BP_107_95952_20120607_145525_inLine +BABEL_BP_107_95952_20120607_145526_outLine +BABEL_BP_107_97941_20120423_201113_inLine +BABEL_BP_107_97941_20120423_201113_outLine +BABEL_BP_107_97941_20120423_201934_inLine +BABEL_BP_107_97941_20120423_201934_outLine +BABEL_BP_107_98279_20120509_172421_inLine +BABEL_BP_107_98279_20120509_172421_outLine +BABEL_BP_107_98762_20120612_160310_inLine +BABEL_BP_107_98762_20120612_160310_outLine +BABEL_BP_107_99697_20120424_211952_inLine +BABEL_BP_107_99697_20120424_211952_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/train.FullLP.list b/egs/babel/s5d/conf/lists/107-vietnamese/train.FullLP.list new file mode 100644 index 00000000000..522b95fc080 --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/train.FullLP.list @@ -0,0 +1,1042 @@ +BABEL_BP_107_10033_20120208_180820_outLine +BABEL_BP_107_10066_20120428_121544_inLine +BABEL_BP_107_10066_20120428_121544_outLine +BABEL_BP_107_10190_20120424_023348_inLine +BABEL_BP_107_10190_20120425_012249_inLine +BABEL_BP_107_10211_20120323_013915_inLine +BABEL_BP_107_10211_20120323_013915_outLine +BABEL_BP_107_10545_20120424_184701_inLine +BABEL_BP_107_10697_20120516_194235_inLine +BABEL_BP_107_10732_20120328_172421_inLine +BABEL_BP_107_10732_20120328_172422_outLine +BABEL_BP_107_10900_20120322_022523_inLine +BABEL_BP_107_10900_20120322_022524_outLine +BABEL_BP_107_10945_20120322_222039_inLine +BABEL_BP_107_10945_20120322_222039_outLine +BABEL_BP_107_10973_20120404_233129_inLine +BABEL_BP_107_10973_20120404_233129_outLine +BABEL_BP_107_10985_20120502_123725_inLine +BABEL_BP_107_10985_20120502_123725_outLine +BABEL_BP_107_11022_20120422_013455_inLine +BABEL_BP_107_11022_20120422_013455_outLine +BABEL_BP_107_11422_20120208_160559_inLine +BABEL_BP_107_11422_20120208_160559_outLine +BABEL_BP_107_11479_20120212_011029_inLine +BABEL_BP_107_11479_20120212_011029_outLine +BABEL_BP_107_11827_20120322_205100_inLine +BABEL_BP_107_11827_20120322_205100_outLine +BABEL_BP_107_11949_20120704_001817_inLine +BABEL_BP_107_11949_20120704_001817_outLine +BABEL_BP_107_11982_20120217_004340_inLine +BABEL_BP_107_12486_20120424_174759_inLine +BABEL_BP_107_12552_20120503_152109_inLine +BABEL_BP_107_12569_20120609_190056_inLine +BABEL_BP_107_12569_20120609_190056_outLine +BABEL_BP_107_12587_20120322_230456_inLine +BABEL_BP_107_12587_20120322_230457_outLine +BABEL_BP_107_12643_20120704_185225_inLine +BABEL_BP_107_12643_20120704_185225_outLine +BABEL_BP_107_12897_20120413_195042_inLine +BABEL_BP_107_12897_20120413_195042_outLine +BABEL_BP_107_12897_20120413_200727_inLine +BABEL_BP_107_12897_20120413_200727_outLine +BABEL_BP_107_13065_20120425_034939_inLine +BABEL_BP_107_13065_20120425_034939_outLine +BABEL_BP_107_13229_20120417_201028_inLine +BABEL_BP_107_13229_20120417_201028_outLine +BABEL_BP_107_13272_20120320_141107_outLine +BABEL_BP_107_13272_20120320_142506_outLine +BABEL_BP_107_13389_20120406_141036_inLine +BABEL_BP_107_13389_20120406_141036_outLine +BABEL_BP_107_13419_20120218_213925_inLine +BABEL_BP_107_13419_20120218_214753_inLine +BABEL_BP_107_13781_20120516_204849_inLine +BABEL_BP_107_13781_20120516_204849_outLine +BABEL_BP_107_13795_20120418_190613_inLine +BABEL_BP_107_13795_20120418_190613_outLine +BABEL_BP_107_14075_20120507_004435_inLine +BABEL_BP_107_14294_20120328_010858_inLine +BABEL_BP_107_14294_20120328_010858_outLine +BABEL_BP_107_14468_20120321_003916_inLine +BABEL_BP_107_14468_20120321_003916_outLine +BABEL_BP_107_14475_20120704_204813_inLine +BABEL_BP_107_14475_20120704_204813_outLine +BABEL_BP_107_14500_20120429_194225_outLine +BABEL_BP_107_14707_20120429_004741_inLine +BABEL_BP_107_14707_20120429_004741_outLine +BABEL_BP_107_14707_20120429_005954_inLine +BABEL_BP_107_14891_20120118_195012_inLine +BABEL_BP_107_14707_20120429_005954_outLine +BABEL_BP_107_14729_20120429_200418_outLine +BABEL_BP_107_14836_20120507_235040_outLine +BABEL_BP_107_14891_20120118_195012_outLine +BABEL_BP_107_14936_20120405_224830_inLine +BABEL_BP_107_14936_20120405_224830_outLine +BABEL_BP_107_15073_20120417_011547_outLine +BABEL_BP_107_15142_20120322_132735_outLine +BABEL_BP_107_15353_20120504_193952_inLine +BABEL_BP_107_15353_20120504_193952_outLine +BABEL_BP_107_15460_20120426_224823_inLine +BABEL_BP_107_15460_20120426_224823_outLine +BABEL_BP_107_15473_20120217_231342_inLine +BABEL_BP_107_15696_20120328_010156_outLine +BABEL_BP_107_15719_20120612_122632_inLine +BABEL_BP_107_15719_20120612_122632_outLine +BABEL_BP_107_15744_20120608_123258_inLine +BABEL_BP_107_15873_20120405_224524_inLine +BABEL_BP_107_15873_20120405_224524_outLine +BABEL_BP_107_15881_20120322_233839_inLine +BABEL_BP_107_15940_20120424_221327_inLine +BABEL_BP_107_16406_20120324_011714_inLine +BABEL_BP_107_16406_20120324_011714_outLine +BABEL_BP_107_16617_20120228_014302_inLine +BABEL_BP_107_16646_20120418_130946_outLine +BABEL_BP_107_16660_20120210_231224_outLine +BABEL_BP_107_16669_20120208_140603_inLine +BABEL_BP_107_16801_20120418_121951_inLine +BABEL_BP_107_16801_20120418_203644_inLine +BABEL_BP_107_16875_20120704_133550_inLine +BABEL_BP_107_16875_20120704_133550_outLine +BABEL_BP_107_16883_20120501_194424_inLine +BABEL_BP_107_16883_20120501_194424_outLine +BABEL_BP_107_16950_20120704_155322_inLine +BABEL_BP_107_16950_20120704_155322_outLine +BABEL_BP_107_17013_20120501_002142_inLine +BABEL_BP_107_17013_20120501_002142_outLine +BABEL_BP_107_17018_20120322_220450_inLine +BABEL_BP_107_17018_20120322_220450_outLine +BABEL_BP_107_17093_20120501_202548_outLine +BABEL_BP_107_17203_20120212_220043_outLine +BABEL_BP_107_17353_20120617_133436_inLine +BABEL_BP_107_17353_20120617_133436_outLine +BABEL_BP_107_17933_20120421_134916_inLine +BABEL_BP_107_17933_20120421_134916_outLine +BABEL_BP_107_18187_20120608_125102_outLine +BABEL_BP_107_18209_20120420_004725_inLine +BABEL_BP_107_18234_20120210_230712_inLine +BABEL_BP_107_18495_20120618_003601_outLine +BABEL_BP_107_18534_20120504_132522_inLine +BABEL_BP_107_18534_20120504_132522_outLine +BABEL_BP_107_18858_20120209_004527_outLine +BABEL_BP_107_19012_20120503_215037_inLine +BABEL_BP_107_19012_20120503_215037_outLine +BABEL_BP_107_19248_20120508_210026_inLine +BABEL_BP_107_19248_20120508_210027_outLine +BABEL_BP_107_19290_20120421_141409_inLine +BABEL_BP_107_19290_20120421_141409_outLine +BABEL_BP_107_19404_20120321_171020_inLine +BABEL_BP_107_19404_20120321_171020_outLine +BABEL_BP_107_19479_20120407_014459_inLine +BABEL_BP_107_19479_20120407_014459_outLine +BABEL_BP_107_19731_20120506_011629_inLine +BABEL_BP_107_19731_20120515_001656_inLine +BABEL_BP_107_19869_20120608_012542_outLine +BABEL_BP_107_20320_20120212_214655_inLine +BABEL_BP_107_20332_20120426_010134_inLine +BABEL_BP_107_20332_20120426_010837_inLine +BABEL_BP_107_20332_20120426_010134_outLine +BABEL_BP_107_20332_20120426_010837_outLine +BABEL_BP_107_20483_20120416_171740_outLine +BABEL_BP_107_20518_20120418_211112_inLine +BABEL_BP_107_20582_20120322_220747_inLine +BABEL_BP_107_20582_20120322_220747_outLine +BABEL_BP_107_20740_20120427_193225_inLine +BABEL_BP_107_20740_20120427_193757_inLine +BABEL_BP_107_20741_20120325_181245_outLine +BABEL_BP_107_20799_20120515_010136_inLine +BABEL_BP_107_20799_20120515_010136_outLine +BABEL_BP_107_21052_20120415_204922_inLine +BABEL_BP_107_21139_20120425_192642_outLine +BABEL_BP_107_21258_20120418_145725_inLine +BABEL_BP_107_21367_20120629_140326_outLine +BABEL_BP_107_21430_20120608_003600_outLine +BABEL_BP_107_21477_20120323_185255_inLine +BABEL_BP_107_21477_20120323_185255_outLine +BABEL_BP_107_21518_20120501_152038_inLine +BABEL_BP_107_21518_20120501_152038_outLine +BABEL_BP_107_21584_20120217_004017_inLine +BABEL_BP_107_21584_20120217_004017_outLine +BABEL_BP_107_21758_20120407_010928_inLine +BABEL_BP_107_21758_20120407_010928_outLine +BABEL_BP_107_21758_20120407_011555_inLine +BABEL_BP_107_21758_20120407_011555_outLine +BABEL_BP_107_21929_20120323_015539_inLine +BABEL_BP_107_21929_20120323_022750_inLine +BABEL_BP_107_21946_20120507_015056_inLine +BABEL_BP_107_21946_20120507_015056_outLine +BABEL_BP_107_22010_20120608_182138_inLine +BABEL_BP_107_22010_20120608_182138_outLine +BABEL_BP_107_22272_20120511_232328_inLine +BABEL_BP_107_22272_20120511_232328_outLine +BABEL_BP_107_22494_20120613_122322_outLine +BABEL_BP_107_22898_20120322_144401_inLine +BABEL_BP_107_22898_20120322_144401_outLine +BABEL_BP_107_22910_20120214_213815_inLine +BABEL_BP_107_22910_20120214_213815_outLine +BABEL_BP_107_22979_20120505_000039_inLine +BABEL_BP_107_22979_20120505_000039_outLine +BABEL_BP_107_23167_20120217_212610_inLine +BABEL_BP_107_23167_20120217_212610_outLine +BABEL_BP_107_23629_20120501_173549_inLine +BABEL_BP_107_23629_20120501_173549_outLine +BABEL_BP_107_23930_20120506_214145_inLine +BABEL_BP_107_24014_20120618_010729_inLine +BABEL_BP_107_24014_20120618_010729_outLine +BABEL_BP_107_24094_20120421_134318_outLine +BABEL_BP_107_24569_20120507_123854_outLine +BABEL_BP_107_24608_20120208_170106_outLine +BABEL_BP_107_24638_20120504_004348_outLine +BABEL_BP_107_24642_20120505_201543_inLine +BABEL_BP_107_24642_20120505_201543_outLine +BABEL_BP_107_24799_20120508_232153_outLine +BABEL_BP_107_24817_20120422_203514_inLine +BABEL_BP_107_24833_20120218_171649_outLine +BABEL_BP_107_25035_20120214_230841_inLine +BABEL_BP_107_25072_20120429_144535_inLine +BABEL_BP_107_25479_20120506_161146_inLine +BABEL_BP_107_25479_20120506_161146_outLine +BABEL_BP_107_25576_20120321_222905_outLine +BABEL_BP_107_25866_20120426_193335_inLine +BABEL_BP_107_26348_20120508_100651_inLine +BABEL_BP_107_26348_20120508_102042_inLine +BABEL_BP_107_26350_20120209_004945_inLine +BABEL_BP_107_26350_20120209_004945_outLine +BABEL_BP_107_26350_20120209_012139_inLine +BABEL_BP_107_26350_20120209_012139_outLine +BABEL_BP_107_26598_20120425_143602_outLine +BABEL_BP_107_26684_20120530_155756_inLine +BABEL_BP_107_26786_20120423_191945_inLine +BABEL_BP_107_26786_20120423_191945_outLine +BABEL_BP_107_27064_20120222_210044_inLine +BABEL_BP_107_27064_20120222_210044_outLine +BABEL_BP_107_27503_20120212_221915_inLine +BABEL_BP_107_27619_20120328_023110_outLine +BABEL_BP_107_27698_20120212_005737_inLine +BABEL_BP_107_27698_20120212_005737_outLine +BABEL_BP_107_27724_20120407_130547_inLine +BABEL_BP_107_27724_20120407_130547_outLine +BABEL_BP_107_27890_20120428_235422_inLine +BABEL_BP_107_27890_20120428_235422_outLine +BABEL_BP_107_27916_20120607_114245_outLine +BABEL_BP_107_27916_20120607_115650_outLine +BABEL_BP_107_28016_20120405_222219_inLine +BABEL_BP_107_28016_20120405_222219_outLine +BABEL_BP_107_28107_20120208_142843_outLine +BABEL_BP_107_28107_20120208_144923_outLine +BABEL_BP_107_28132_20120405_152728_outLine +BABEL_BP_107_28260_20120212_153106_inLine +BABEL_BP_107_28557_20120507_001619_outLine +BABEL_BP_107_28675_20120607_231549_inLine +BABEL_BP_107_28675_20120607_231549_outLine +BABEL_BP_107_28675_20120607_233243_inLine +BABEL_BP_107_28675_20120607_233243_outLine +BABEL_BP_107_28740_20120212_150039_inLine +BABEL_BP_107_28740_20120212_150039_outLine +BABEL_BP_107_29280_20120607_184929_outLine +BABEL_BP_107_29280_20120607_190345_outLine +BABEL_BP_107_29290_20120415_102435_inLine +BABEL_BP_107_29335_20120424_013042_inLine +BABEL_BP_107_29335_20120424_013042_outLine +BABEL_BP_107_29407_20120607_132315_inLine +BABEL_BP_107_29407_20120607_135318_inLine +BABEL_BP_107_29444_20120322_191236_outLine +BABEL_BP_107_29771_20120504_010738_outLine +BABEL_BP_107_29959_20120418_001028_inLine +BABEL_BP_107_29959_20120418_001028_outLine +BABEL_BP_107_29988_20120516_233700_inLine +BABEL_BP_107_30210_20120427_140255_inLine +BABEL_BP_107_30210_20120502_202749_inLine +BABEL_BP_107_30554_20120617_231216_outLine +BABEL_BP_107_30583_20120212_210712_inLine +BABEL_BP_107_30722_20120505_103655_inLine +BABEL_BP_107_30722_20120505_103655_outLine +BABEL_BP_107_31031_20120501_205733_inLine +BABEL_BP_107_31031_20120501_210746_inLine +BABEL_BP_107_31298_20120322_125112_outLine +BABEL_BP_107_31393_20120325_171905_inLine +BABEL_BP_107_31460_20120325_193921_inLine +BABEL_BP_107_31606_20120607_131428_inLine +BABEL_BP_107_31738_20120704_101130_outLine +BABEL_BP_107_31902_20120417_015618_inLine +BABEL_BP_107_31902_20120417_015618_outLine +BABEL_BP_107_31917_20120501_202910_inLine +BABEL_BP_107_31917_20120501_202910_outLine +BABEL_BP_107_31980_20120212_174027_inLine +BABEL_BP_107_31980_20120212_174027_outLine +BABEL_BP_107_32132_20120418_211743_inLine +BABEL_BP_107_32274_20120324_011402_inLine +BABEL_BP_107_32295_20120617_141025_inLine +BABEL_BP_107_32295_20120617_141025_outLine +BABEL_BP_107_32334_20120429_005403_inLine +BABEL_BP_107_32334_20120429_005403_outLine +BABEL_BP_107_32400_20120426_000137_inLine +BABEL_BP_107_32400_20120426_000137_outLine +BABEL_BP_107_32710_20120418_215432_inLine +BABEL_BP_107_32710_20120418_215432_outLine +BABEL_BP_107_33012_20120611_155055_inLine +BABEL_BP_107_33364_20120617_011853_inLine +BABEL_BP_107_33364_20120617_011853_outLine +BABEL_BP_107_33577_20120704_152608_outLine +BABEL_BP_107_33671_20120330_001033_inLine +BABEL_BP_107_33671_20120330_001033_outLine +BABEL_BP_107_33742_20120608_143147_inLine +BABEL_BP_107_33742_20120608_143147_outLine +BABEL_BP_107_33817_20120423_130850_inLine +BABEL_BP_107_33817_20120423_130850_outLine +BABEL_BP_107_33969_20120429_214721_outLine +BABEL_BP_107_34235_20120218_205136_outLine +BABEL_BP_107_34480_20120608_151830_inLine +BABEL_BP_107_34498_20120429_140537_inLine +BABEL_BP_107_34498_20120429_140537_outLine +BABEL_BP_107_34590_20120323_134554_inLine +BABEL_BP_107_34590_20120323_134554_outLine +BABEL_BP_107_34857_20120419_235853_inLine +BABEL_BP_107_34961_20120212_223315_inLine +BABEL_BP_107_34961_20120212_223315_outLine +BABEL_BP_107_34961_20120212_224207_inLine +BABEL_BP_107_34961_20120212_224207_outLine +BABEL_BP_107_35011_20120321_223128_inLine +BABEL_BP_107_35011_20120321_223128_outLine +BABEL_BP_107_35016_20120611_185645_outLine +BABEL_BP_107_35074_20120608_164703_outLine +BABEL_BP_107_35179_20120414_153233_inLine +BABEL_BP_107_35179_20120414_153233_outLine +BABEL_BP_107_35188_20120614_131427_inLine +BABEL_BP_107_35305_20120422_120043_outLine +BABEL_BP_107_35357_20120614_212245_inLine +BABEL_BP_107_35357_20120614_212245_outLine +BABEL_BP_107_36037_20120616_153023_outLine +BABEL_BP_107_36196_20120608_110319_inLine +BABEL_BP_107_36196_20120608_111049_inLine +BABEL_BP_107_36268_20120406_211711_inLine +BABEL_BP_107_36268_20120406_211711_outLine +BABEL_BP_107_36356_20120211_173247_inLine +BABEL_BP_107_36356_20120211_173247_outLine +BABEL_BP_107_36383_20120416_225701_outLine +BABEL_BP_107_36391_20120505_171824_inLine +BABEL_BP_107_36424_20120421_130549_inLine +BABEL_BP_107_36424_20120421_130549_outLine +BABEL_BP_107_36424_20120421_133610_inLine +BABEL_BP_107_36424_20120421_133610_outLine +BABEL_BP_107_36502_20120617_145859_inLine +BABEL_BP_107_36502_20120617_145859_outLine +BABEL_BP_107_36711_20120325_230112_inLine +BABEL_BP_107_36711_20120325_230112_outLine +BABEL_BP_107_36722_20120627_122821_inLine +BABEL_BP_107_36722_20120627_122821_outLine +BABEL_BP_107_37110_20120209_002706_inLine +BABEL_BP_107_37110_20120209_002706_outLine +BABEL_BP_107_37210_20120322_205536_outLine +BABEL_BP_107_37285_20120325_000245_inLine +BABEL_BP_107_37285_20120325_000245_outLine +BABEL_BP_107_37335_20120616_150016_inLine +BABEL_BP_107_37335_20120616_150016_outLine +BABEL_BP_107_37374_20120418_185819_inLine +BABEL_BP_107_37940_20120424_004619_inLine +BABEL_BP_107_37940_20120424_004619_outLine +BABEL_BP_107_38464_20120422_105536_outLine +BABEL_BP_107_38592_20120704_150926_outLine +BABEL_BP_107_38640_20120215_030154_inLine +BABEL_BP_107_38640_20120215_030154_outLine +BABEL_BP_107_38698_20120322_213531_inLine +BABEL_BP_107_38698_20120322_213531_outLine +BABEL_BP_107_38879_20120406_150304_inLine +BABEL_BP_107_38879_20120406_150304_outLine +BABEL_BP_107_38912_20120414_160852_inLine +BABEL_BP_107_38912_20120414_160852_outLine +BABEL_BP_107_39246_20120613_202128_inLine +BABEL_BP_107_39246_20120613_202128_outLine +BABEL_BP_107_39264_20120417_191639_inLine +BABEL_BP_107_39264_20120417_191639_outLine +BABEL_BP_107_39296_20120705_025906_inLine +BABEL_BP_107_39384_20120324_010939_inLine +BABEL_BP_107_39384_20120324_010939_outLine +BABEL_BP_107_39384_20120324_011832_inLine +BABEL_BP_107_39384_20120324_011832_outLine +BABEL_BP_107_39430_20120325_015935_inLine +BABEL_BP_107_39430_20120325_015935_outLine +BABEL_BP_107_40002_20120502_174229_outLine +BABEL_BP_107_40123_20120505_191426_inLine +BABEL_BP_107_40123_20120505_191426_outLine +BABEL_BP_107_40385_20120704_143210_outLine +BABEL_BP_107_40477_20120323_194919_outLine +BABEL_BP_107_40510_20120426_153808_inLine +BABEL_BP_107_40510_20120426_153808_outLine +BABEL_BP_107_40980_20120416_233130_inLine +BABEL_BP_107_40980_20120416_233130_outLine +BABEL_BP_107_40980_20120417_001128_inLine +BABEL_BP_107_40980_20120417_001128_outLine +BABEL_BP_107_41146_20120211_162158_inLine +BABEL_BP_107_41170_20120201_205341_inLine +BABEL_BP_107_41170_20120201_205341_outLine +BABEL_BP_107_41590_20120610_162218_outLine +BABEL_BP_107_41797_20120420_003902_inLine +BABEL_BP_107_41797_20120420_003902_outLine +BABEL_BP_107_42145_20120418_131525_inLine +BABEL_BP_107_42266_20120407_182544_outLine +BABEL_BP_107_42309_20120608_215912_inLine +BABEL_BP_107_42309_20120608_215912_outLine +BABEL_BP_107_42651_20120211_192913_inLine +BABEL_BP_107_42651_20120211_192913_outLine +BABEL_BP_107_42910_20120212_154722_inLine +BABEL_BP_107_42910_20120212_154722_outLine +BABEL_BP_107_43017_20120322_170152_inLine +BABEL_BP_107_43017_20120322_170152_outLine +BABEL_BP_107_43306_20120409_184959_inLine +BABEL_BP_107_43306_20120409_184959_outLine +BABEL_BP_107_43423_20120504_001214_inLine +BABEL_BP_107_43423_20120504_010312_inLine +BABEL_BP_107_43426_20120426_183951_inLine +BABEL_BP_107_43426_20120426_183951_outLine +BABEL_BP_107_43587_20120506_182330_inLine +BABEL_BP_107_43652_20120416_175011_inLine +BABEL_BP_107_43652_20120418_093619_inLine +BABEL_BP_107_44129_20120512_023836_inLine +BABEL_BP_107_44129_20120512_023836_outLine +BABEL_BP_107_44369_20120504_024021_inLine +BABEL_BP_107_44369_20120504_024021_outLine +BABEL_BP_107_44403_20120322_214144_inLine +BABEL_BP_107_44403_20120322_214144_outLine +BABEL_BP_107_44756_20120426_155822_inLine +BABEL_BP_107_44756_20120426_155822_outLine +BABEL_BP_107_44829_20120404_224815_outLine +BABEL_BP_107_44836_20120417_003600_outLine +BABEL_BP_107_44943_20120506_191737_inLine +BABEL_BP_107_45227_20120210_223857_inLine +BABEL_BP_107_45511_20120212_170655_inLine +BABEL_BP_107_45511_20120212_170655_outLine +BABEL_BP_107_45570_20120509_151829_inLine +BABEL_BP_107_45570_20120509_151829_outLine +BABEL_BP_107_45793_20120211_040134_inLine +BABEL_BP_107_45793_20120211_040134_outLine +BABEL_BP_107_45929_20120418_215417_outLine +BABEL_BP_107_45931_20120322_143234_inLine +BABEL_BP_107_45931_20120322_143234_outLine +BABEL_BP_107_46243_20120210_233353_inLine +BABEL_BP_107_46243_20120210_233353_outLine +BABEL_BP_107_46332_20120418_002934_inLine +BABEL_BP_107_46332_20120418_002934_outLine +BABEL_BP_107_46603_20120421_113906_inLine +BABEL_BP_107_46756_20120429_195314_outLine +BABEL_BP_107_46977_20120426_015005_inLine +BABEL_BP_107_47263_20120422_150216_inLine +BABEL_BP_107_47433_20120210_185410_outLine +BABEL_BP_107_47618_20120502_004413_inLine +BABEL_BP_107_47618_20120502_004413_outLine +BABEL_BP_107_47661_20120216_224419_inLine +BABEL_BP_107_47661_20120216_224419_outLine +BABEL_BP_107_47794_20120514_175438_inLine +BABEL_BP_107_47794_20120514_175438_outLine +BABEL_BP_107_47823_20120516_204140_inLine +BABEL_BP_107_47845_20120613_004732_outLine +BABEL_BP_107_47906_20120415_224420_inLine +BABEL_BP_107_47906_20120415_224420_outLine +BABEL_BP_107_48188_20120422_150955_inLine +BABEL_BP_107_48188_20120422_150955_outLine +BABEL_BP_107_48418_20120421_163333_inLine +BABEL_BP_107_48511_20120322_145729_inLine +BABEL_BP_107_48511_20120322_145729_outLine +BABEL_BP_107_48559_20120502_201955_inLine +BABEL_BP_107_48559_20120502_201955_outLine +BABEL_BP_107_48607_20120607_215116_outLine +BABEL_BP_107_48733_20120418_142426_inLine +BABEL_BP_107_48733_20120418_142426_outLine +BABEL_BP_107_48753_20120426_134417_inLine +BABEL_BP_107_48753_20120426_134417_outLine +BABEL_BP_107_48812_20120323_162517_inLine +BABEL_BP_107_48812_20120324_182527_inLine +BABEL_BP_107_48976_20120220_152013_inLine +BABEL_BP_107_48976_20120220_152013_outLine +BABEL_BP_107_49186_20120704_180724_inLine +BABEL_BP_107_49186_20120704_180724_outLine +BABEL_BP_107_49192_20120421_190503_outLine +BABEL_BP_107_49239_20120429_144119_inLine +BABEL_BP_107_49346_20120611_192752_outLine +BABEL_BP_107_49351_20120614_132223_inLine +BABEL_BP_107_49351_20120614_132223_outLine +BABEL_BP_107_49371_20120608_002052_inLine +BABEL_BP_107_49541_20120325_223621_inLine +BABEL_BP_107_49541_20120325_223621_outLine +BABEL_BP_107_49552_20120614_140129_inLine +BABEL_BP_107_49689_20120415_163537_inLine +BABEL_BP_107_49689_20120415_163537_outLine +BABEL_BP_107_49714_20120509_113627_outLine +BABEL_BP_107_49773_20120211_151308_inLine +BABEL_BP_107_49773_20120211_151308_outLine +BABEL_BP_107_50028_20120704_192522_inLine +BABEL_BP_107_50028_20120704_192522_outLine +BABEL_BP_107_50141_20120505_233033_inLine +BABEL_BP_107_50141_20120505_233033_outLine +BABEL_BP_107_50201_20120216_001139_inLine +BABEL_BP_107_50201_20120216_001139_outLine +BABEL_BP_107_50267_20120421_135338_inLine +BABEL_BP_107_50267_20120421_135338_outLine +BABEL_BP_107_50298_20120507_152508_outLine +BABEL_BP_107_50409_20120608_205803_inLine +BABEL_BP_107_50468_20120420_114108_inLine +BABEL_BP_107_50468_20120420_114108_outLine +BABEL_BP_107_50468_20120420_115203_inLine +BABEL_BP_107_50468_20120420_115203_outLine +BABEL_BP_107_50476_20120430_225248_inLine +BABEL_BP_107_50476_20120430_225248_outLine +BABEL_BP_107_50718_20120321_125943_inLine +BABEL_BP_107_50752_20120421_202932_inLine +BABEL_BP_107_50752_20120421_202932_outLine +BABEL_BP_107_50883_20120328_013430_inLine +BABEL_BP_107_50883_20120328_013430_outLine +BABEL_BP_107_51052_20120424_004427_inLine +BABEL_BP_107_51052_20120424_004427_outLine +BABEL_BP_107_51073_20120216_010300_outLine +BABEL_BP_107_51117_20120211_034844_inLine +BABEL_BP_107_51117_20120211_034844_outLine +BABEL_BP_107_51136_20120405_142910_inLine +BABEL_BP_107_51136_20120405_142910_outLine +BABEL_BP_107_51446_20120417_221307_inLine +BABEL_BP_107_51446_20120417_221307_outLine +BABEL_BP_107_51448_20120608_170641_inLine +BABEL_BP_107_51448_20120608_171219_inLine +BABEL_BP_107_51663_20120506_160921_inLine +BABEL_BP_107_51727_20120424_225602_inLine +BABEL_BP_107_51727_20120424_225602_outLine +BABEL_BP_107_52154_20120503_203816_inLine +BABEL_BP_107_52219_20120417_113120_inLine +BABEL_BP_107_52219_20120417_113120_outLine +BABEL_BP_107_52807_20120608_171526_inLine +BABEL_BP_107_52807_20120608_171526_outLine +BABEL_BP_107_52902_20120421_150627_outLine +BABEL_BP_107_53181_20120211_163316_inLine +BABEL_BP_107_53181_20120211_163316_outLine +BABEL_BP_107_53463_20120421_150635_inLine +BABEL_BP_107_53463_20120421_150635_outLine +BABEL_BP_107_53463_20120421_152028_inLine +BABEL_BP_107_53463_20120421_152028_outLine +BABEL_BP_107_53649_20120611_193416_outLine +BABEL_BP_107_53653_20120607_150151_outLine +BABEL_BP_107_53703_20120502_153540_outLine +BABEL_BP_107_53824_20120503_223532_inLine +BABEL_BP_107_53824_20120503_223532_outLine +BABEL_BP_107_53824_20120503_225007_inLine +BABEL_BP_107_53824_20120503_225007_outLine +BABEL_BP_107_53982_20120509_013004_outLine +BABEL_BP_107_53994_20120501_161638_outLine +BABEL_BP_107_54199_20120607_200253_inLine +BABEL_BP_107_54199_20120607_202722_inLine +BABEL_BP_107_54199_20120607_202722_outLine +BABEL_BP_107_54241_20120324_013254_inLine +BABEL_BP_107_54241_20120324_013254_outLine +BABEL_BP_107_54332_20120608_182424_inLine +BABEL_BP_107_54332_20120608_183219_inLine +BABEL_BP_107_54518_20120608_120238_inLine +BABEL_BP_107_54621_20120421_132410_inLine +BABEL_BP_107_54621_20120421_132410_outLine +BABEL_BP_107_54785_20120602_195720_inLine +BABEL_BP_107_54787_20120405_202915_inLine +BABEL_BP_107_54787_20120405_202915_outLine +BABEL_BP_107_55182_20120209_015206_inLine +BABEL_BP_107_55355_20120608_155709_inLine +BABEL_BP_107_55355_20120612_142521_inLine +BABEL_BP_107_55396_20120321_141254_outLine +BABEL_BP_107_55470_20120421_134215_outLine +BABEL_BP_107_55777_20120421_234307_inLine +BABEL_BP_107_55777_20120421_234307_outLine +BABEL_BP_107_55874_20120504_184342_inLine +BABEL_BP_107_55874_20120504_184343_outLine +BABEL_BP_107_56039_20120516_215649_inLine +BABEL_BP_107_56039_20120516_215649_outLine +BABEL_BP_107_56070_20120220_174719_inLine +BABEL_BP_107_57148_20120217_014955_inLine +BABEL_BP_107_57148_20120217_014955_outLine +BABEL_BP_107_57148_20120217_024257_inLine +BABEL_BP_107_57148_20120217_024257_outLine +BABEL_BP_107_57422_20120508_014547_inLine +BABEL_BP_107_57422_20120508_014547_outLine +BABEL_BP_107_57457_20120617_193611_inLine +BABEL_BP_107_57457_20120617_193611_outLine +BABEL_BP_107_57619_20120505_151800_inLine +BABEL_BP_107_58108_20120509_141003_inLine +BABEL_BP_107_58108_20120509_141003_outLine +BABEL_BP_107_58137_20120421_185042_inLine +BABEL_BP_107_58137_20120421_185042_outLine +BABEL_BP_107_58190_20120506_195510_outLine +BABEL_BP_107_58232_20120501_122112_inLine +BABEL_BP_107_58232_20120501_122112_outLine +BABEL_BP_107_58357_20120507_125021_inLine +BABEL_BP_107_58357_20120507_125021_outLine +BABEL_BP_107_58536_20120501_013825_inLine +BABEL_BP_107_58536_20120501_013825_outLine +BABEL_BP_107_58746_20120614_181729_inLine +BABEL_BP_107_58746_20120614_181729_outLine +BABEL_BP_107_58863_20120218_011117_inLine +BABEL_BP_107_58863_20120218_011117_outLine +BABEL_BP_107_58863_20120218_012806_inLine +BABEL_BP_107_58863_20120218_012806_outLine +BABEL_BP_107_59071_20120423_184821_inLine +BABEL_BP_107_59175_20120212_225712_inLine +BABEL_BP_107_59175_20120212_225712_outLine +BABEL_BP_107_59383_20120502_205353_inLine +BABEL_BP_107_59383_20120502_205353_outLine +BABEL_BP_107_59628_20120428_215033_inLine +BABEL_BP_107_59764_20120524_205913_inLine +BABEL_BP_107_59924_20120417_194534_inLine +BABEL_BP_107_59924_20120417_194534_outLine +BABEL_BP_107_59961_20120218_211136_inLine +BABEL_BP_107_60106_20120211_003229_inLine +BABEL_BP_107_60106_20120211_003229_outLine +BABEL_BP_107_60183_20120428_164103_inLine +BABEL_BP_107_60183_20120428_164103_outLine +BABEL_BP_107_60193_20120328_014042_inLine +BABEL_BP_107_60238_20120506_132025_outLine +BABEL_BP_107_60338_20120505_131543_inLine +BABEL_BP_107_60338_20120505_131543_outLine +BABEL_BP_107_60605_20120506_215948_inLine +BABEL_BP_107_60677_20120415_145311_inLine +BABEL_BP_107_60677_20120415_145311_outLine +BABEL_BP_107_60677_20120415_150336_inLine +BABEL_BP_107_60677_20120415_150336_outLine +BABEL_BP_107_60826_20120424_235431_inLine +BABEL_BP_107_60826_20120424_235432_outLine +BABEL_BP_107_60842_20120617_190839_inLine +BABEL_BP_107_60842_20120617_190839_outLine +BABEL_BP_107_61073_20120322_193656_inLine +BABEL_BP_107_61073_20120322_193656_outLine +BABEL_BP_107_61408_20120628_141349_outLine +BABEL_BP_107_61449_20120421_232700_inLine +BABEL_BP_107_61449_20120421_232700_outLine +BABEL_BP_107_61906_20120414_201744_inLine +BABEL_BP_107_61906_20120414_201744_outLine +BABEL_BP_107_62132_20120506_160034_inLine +BABEL_BP_107_62160_20120323_180702_outLine +BABEL_BP_107_62163_20120628_180945_inLine +BABEL_BP_107_62163_20120628_182002_inLine +BABEL_BP_107_62177_20120323_001326_inLine +BABEL_BP_107_62255_20120506_204123_inLine +BABEL_BP_107_62255_20120506_204123_outLine +BABEL_BP_107_62277_20120504_173047_inLine +BABEL_BP_107_62696_20120508_135942_outLine +BABEL_BP_107_62696_20120509_100233_outLine +BABEL_BP_107_62923_20120322_163015_inLine +BABEL_BP_107_62923_20120322_163015_outLine +BABEL_BP_107_62993_20120608_130210_inLine +BABEL_BP_107_62993_20120608_130210_outLine +BABEL_BP_107_63076_20120704_011318_inLine +BABEL_BP_107_63116_20120419_163443_inLine +BABEL_BP_107_63233_20120323_003312_inLine +BABEL_BP_107_63352_20120421_222544_inLine +BABEL_BP_107_63368_20120418_215232_inLine +BABEL_BP_107_63368_20120418_215232_outLine +BABEL_BP_107_63368_20120418_220224_inLine +BABEL_BP_107_63368_20120418_220224_outLine +BABEL_BP_107_63368_20120418_222134_inLine +BABEL_BP_107_63368_20120418_222134_outLine +BABEL_BP_107_63369_20120614_191919_inLine +BABEL_BP_107_63711_20120212_183127_inLine +BABEL_BP_107_63711_20120212_183127_outLine +BABEL_BP_107_64205_20120428_020155_inLine +BABEL_BP_107_64351_20120513_193703_outLine +BABEL_BP_107_64724_20120503_155446_inLine +BABEL_BP_107_64724_20120503_155446_outLine +BABEL_BP_107_64889_20120503_174229_inLine +BABEL_BP_107_64889_20120503_174229_outLine +BABEL_BP_107_65414_20120608_131726_inLine +BABEL_BP_107_65743_20120404_191932_inLine +BABEL_BP_107_65743_20120404_191932_outLine +BABEL_BP_107_65989_20120419_141422_inLine +BABEL_BP_107_66346_20120703_161130_inLine +BABEL_BP_107_66346_20120703_161130_outLine +BABEL_BP_107_66419_20120505_205757_inLine +BABEL_BP_107_66419_20120505_205757_outLine +BABEL_BP_107_66451_20120214_215503_inLine +BABEL_BP_107_66451_20120214_215503_outLine +BABEL_BP_107_66499_20120610_220818_inLine +BABEL_BP_107_66559_20120421_185343_inLine +BABEL_BP_107_66709_20120617_152656_outLine +BABEL_BP_107_66709_20120617_153822_outLine +BABEL_BP_107_66790_20120421_182115_inLine +BABEL_BP_107_66839_20120613_192022_inLine +BABEL_BP_107_66839_20120613_192022_outLine +BABEL_BP_107_66866_20120418_001946_inLine +BABEL_BP_107_66866_20120418_001946_outLine +BABEL_BP_107_66903_20120210_183320_inLine +BABEL_BP_107_66903_20120210_183320_outLine +BABEL_BP_107_66964_20120419_205513_inLine +BABEL_BP_107_66964_20120419_205513_outLine +BABEL_BP_107_67304_20120523_201027_inLine +BABEL_BP_107_67304_20120523_201027_outLine +BABEL_BP_107_67555_20120323_130439_outLine +BABEL_BP_107_67628_20120418_215117_inLine +BABEL_BP_107_67798_20120627_141236_inLine +BABEL_BP_107_68009_20120608_112155_inLine +BABEL_BP_107_68129_20120611_013309_outLine +BABEL_BP_107_68191_20120428_114953_outLine +BABEL_BP_107_68295_20120506_210459_outLine +BABEL_BP_107_68362_20120503_194813_outLine +BABEL_BP_107_68545_20120421_220606_inLine +BABEL_BP_107_68545_20120421_220606_outLine +BABEL_BP_107_68671_20120608_205710_inLine +BABEL_BP_107_68671_20120608_205710_outLine +BABEL_BP_107_68767_20120214_214534_inLine +BABEL_BP_107_68767_20120214_214534_outLine +BABEL_BP_107_69028_20120430_132441_inLine +BABEL_BP_107_69049_20120322_215956_inLine +BABEL_BP_107_69137_20120424_183202_inLine +BABEL_BP_107_69137_20120424_183202_outLine +BABEL_BP_107_69275_20120608_210354_inLine +BABEL_BP_107_69295_20120501_154139_inLine +BABEL_BP_107_70000_20120618_004254_inLine +BABEL_BP_107_70000_20120618_004254_outLine +BABEL_BP_107_70077_20120428_170417_inLine +BABEL_BP_107_70120_20120418_213104_inLine +BABEL_BP_107_70120_20120418_213104_outLine +BABEL_BP_107_70285_20120212_214056_inLine +BABEL_BP_107_70323_20120617_122402_outLine +BABEL_BP_107_70441_20120704_163546_inLine +BABEL_BP_107_70441_20120704_163546_outLine +BABEL_BP_107_70511_20120618_124928_outLine +BABEL_BP_107_70615_20120208_233912_inLine +BABEL_BP_107_70615_20120208_233912_outLine +BABEL_BP_107_70975_20120407_011601_inLine +BABEL_BP_107_70975_20120407_011601_outLine +BABEL_BP_107_71176_20120418_195323_inLine +BABEL_BP_107_71176_20120418_195323_outLine +BABEL_BP_107_71739_20120430_125259_inLine +BABEL_BP_107_71741_20120211_231000_inLine +BABEL_BP_107_71741_20120211_231000_outLine +BABEL_BP_107_71778_20120427_132527_inLine +BABEL_BP_107_71778_20120427_132527_outLine +BABEL_BP_107_71844_20120212_180004_inLine +BABEL_BP_107_71927_20120516_204724_inLine +BABEL_BP_107_72269_20120416_010327_inLine +BABEL_BP_107_72269_20120416_010327_outLine +BABEL_BP_107_72297_20120608_185443_inLine +BABEL_BP_107_72297_20120608_185443_outLine +BABEL_BP_107_72297_20120608_190156_inLine +BABEL_BP_107_72297_20120608_190156_outLine +BABEL_BP_107_72647_20120614_125725_inLine +BABEL_BP_107_72718_20120505_025006_inLine +BABEL_BP_107_72718_20120505_025006_outLine +BABEL_BP_107_72879_20120403_141911_inLine +BABEL_BP_107_72879_20120403_141911_outLine +BABEL_BP_107_73205_20120211_191427_outLine +BABEL_BP_107_73438_20120502_201055_inLine +BABEL_BP_107_73438_20120502_201055_outLine +BABEL_BP_107_73440_20120416_172035_inLine +BABEL_BP_107_73452_20120504_170508_inLine +BABEL_BP_107_73452_20120504_170508_outLine +BABEL_BP_107_73752_20120610_174558_inLine +BABEL_BP_107_73761_20120424_154013_inLine +BABEL_BP_107_73761_20120424_154013_outLine +BABEL_BP_107_73911_20120215_175351_inLine +BABEL_BP_107_73911_20120215_175351_outLine +BABEL_BP_107_73921_20120501_000425_outLine +BABEL_BP_107_74043_20120323_014301_outLine +BABEL_BP_107_74234_20120328_020415_inLine +BABEL_BP_107_74234_20120328_020415_outLine +BABEL_BP_107_74317_20120502_225211_inLine +BABEL_BP_107_74317_20120502_225211_outLine +BABEL_BP_107_74395_20120418_140703_inLine +BABEL_BP_107_74395_20120418_140703_outLine +BABEL_BP_107_74404_20120212_134850_outLine +BABEL_BP_107_74625_20120425_234344_inLine +BABEL_BP_107_74700_20120610_233419_inLine +BABEL_BP_107_74823_20120217_022832_inLine +BABEL_BP_107_74823_20120217_022832_outLine +BABEL_BP_107_74935_20120616_144642_inLine +BABEL_BP_107_74974_20120617_143904_inLine +BABEL_BP_107_74974_20120617_143904_outLine +BABEL_BP_107_74986_20120416_011008_inLine +BABEL_BP_107_74986_20120416_011008_outLine +BABEL_BP_107_74986_20120416_011927_inLine +BABEL_BP_107_74986_20120416_011927_outLine +BABEL_BP_107_75036_20120325_233130_inLine +BABEL_BP_107_75036_20120325_233130_outLine +BABEL_BP_107_75333_20120505_200116_inLine +BABEL_BP_107_75333_20120505_200116_outLine +BABEL_BP_107_75498_20120506_171232_inLine +BABEL_BP_107_75498_20120506_171232_outLine +BABEL_BP_107_75680_20120704_175114_inLine +BABEL_BP_107_75680_20120704_175114_outLine +BABEL_BP_107_75799_20120429_140233_inLine +BABEL_BP_107_75799_20120429_140233_outLine +BABEL_BP_107_75815_20120217_141539_inLine +BABEL_BP_107_75815_20120217_141539_outLine +BABEL_BP_107_76252_20120705_003603_outLine +BABEL_BP_107_76341_20120215_201638_inLine +BABEL_BP_107_76341_20120215_201638_outLine +BABEL_BP_107_76661_20120405_132625_inLine +BABEL_BP_107_76691_20120501_002016_inLine +BABEL_BP_107_76716_20120418_215649_outLine +BABEL_BP_107_76733_20120424_181359_inLine +BABEL_BP_107_76733_20120424_181359_outLine +BABEL_BP_107_76733_20120424_183605_inLine +BABEL_BP_107_76733_20120424_183605_outLine +BABEL_BP_107_76748_20120504_181420_inLine +BABEL_BP_107_76919_20120507_010805_outLine +BABEL_BP_107_76925_20120407_015139_inLine +BABEL_BP_107_76944_20120505_000745_inLine +BABEL_BP_107_76944_20120505_000745_outLine +BABEL_BP_107_76993_20120501_125118_inLine +BABEL_BP_107_76993_20120501_125118_outLine +BABEL_BP_107_77238_20120322_211133_outLine +BABEL_BP_107_77244_20120429_164842_inLine +BABEL_BP_107_77244_20120429_164842_outLine +BABEL_BP_107_77315_20120527_222821_outLine +BABEL_BP_107_77338_20120617_171454_inLine +BABEL_BP_107_77338_20120617_171454_outLine +BABEL_BP_107_77473_20120610_000112_inLine +BABEL_BP_107_77886_20120326_191938_inLine +BABEL_BP_107_77886_20120326_191938_outLine +BABEL_BP_107_78094_20120212_205141_inLine +BABEL_BP_107_78094_20120212_205141_outLine +BABEL_BP_107_78487_20120430_133108_inLine +BABEL_BP_107_78487_20120430_133108_outLine +BABEL_BP_107_78514_20120617_131155_outLine +BABEL_BP_107_79284_20120511_180310_inLine +BABEL_BP_107_79284_20120511_180310_outLine +BABEL_BP_107_79495_20120222_195716_inLine +BABEL_BP_107_79619_20120420_115502_inLine +BABEL_BP_107_79619_20120420_115502_outLine +BABEL_BP_107_79632_20120428_182831_inLine +BABEL_BP_107_79632_20120428_182831_outLine +BABEL_BP_107_79860_20120328_023545_inLine +BABEL_BP_107_79944_20120424_213833_inLine +BABEL_BP_107_79970_20120418_214316_inLine +BABEL_BP_107_80008_20120218_225347_inLine +BABEL_BP_107_80008_20120218_225347_outLine +BABEL_BP_107_80282_20120627_190514_inLine +BABEL_BP_107_80282_20120627_190935_inLine +BABEL_BP_107_80290_20120501_134226_inLine +BABEL_BP_107_80290_20120501_134226_outLine +BABEL_BP_107_80337_20120608_000801_inLine +BABEL_BP_107_80337_20120608_000801_outLine +BABEL_BP_107_80638_20120501_223037_inLine +BABEL_BP_107_80638_20120501_223037_outLine +BABEL_BP_107_80786_20120212_204918_inLine +BABEL_BP_107_80786_20120212_204918_outLine +BABEL_BP_107_81056_20120502_155358_inLine +BABEL_BP_107_81056_20120502_155358_outLine +BABEL_BP_107_81096_20120418_221604_inLine +BABEL_BP_107_81096_20120418_221604_outLine +BABEL_BP_107_81321_20120329_030424_outLine +BABEL_BP_107_81486_20120213_035232_inLine +BABEL_BP_107_81486_20120213_040319_inLine +BABEL_BP_107_81535_20120421_151505_inLine +BABEL_BP_107_81535_20120421_151505_outLine +BABEL_BP_107_81611_20120616_154507_outLine +BABEL_BP_107_81717_20120426_185608_inLine +BABEL_BP_107_81771_20120615_224609_inLine +BABEL_BP_107_81771_20120615_224609_outLine +BABEL_BP_107_82006_20120417_133143_outLine +BABEL_BP_107_82025_20120325_012956_inLine +BABEL_BP_107_82103_20120326_172335_inLine +BABEL_BP_107_82103_20120326_172335_outLine +BABEL_BP_107_82131_20120704_135728_inLine +BABEL_BP_107_82131_20120704_211005_inLine +BABEL_BP_107_82591_20120407_185008_outLine +BABEL_BP_107_82599_20120608_140933_outLine +BABEL_BP_107_82766_20120627_112435_outLine +BABEL_BP_107_82880_20120705_001819_inLine +BABEL_BP_107_82880_20120705_001819_outLine +BABEL_BP_107_82947_20120426_103950_inLine +BABEL_BP_107_82947_20120426_103950_outLine +BABEL_BP_107_82947_20120509_202553_inLine +BABEL_BP_107_82947_20120509_202553_outLine +BABEL_BP_107_83017_20120608_125136_inLine +BABEL_BP_107_83053_20120426_184045_inLine +BABEL_BP_107_83256_20120212_162557_outLine +BABEL_BP_107_83360_20120418_000230_inLine +BABEL_BP_107_83360_20120418_000230_outLine +BABEL_BP_107_83529_20120608_152238_outLine +BABEL_BP_107_83700_20120427_121525_inLine +BABEL_BP_107_83700_20120427_121525_outLine +BABEL_BP_107_83702_20120418_010601_inLine +BABEL_BP_107_83702_20120418_010601_outLine +BABEL_BP_107_83982_20120704_125429_outLine +BABEL_BP_107_83982_20120704_125430_inLine +BABEL_BP_107_83982_20120704_131324_inLine +BABEL_BP_107_83982_20120704_131324_outLine +BABEL_BP_107_84171_20120504_185725_inLine +BABEL_BP_107_84335_20120418_002843_inLine +BABEL_BP_107_84397_20120608_080802_outLine +BABEL_BP_107_84532_20120703_171302_inLine +BABEL_BP_107_84540_20120328_205952_outLine +BABEL_BP_107_84543_20120503_005623_inLine +BABEL_BP_107_84543_20120503_005623_outLine +BABEL_BP_107_84943_20120405_134459_inLine +BABEL_BP_107_85083_20120425_024151_inLine +BABEL_BP_107_85354_20120704_145327_inLine +BABEL_BP_107_85354_20120704_145327_outLine +BABEL_BP_107_85573_20120208_152239_inLine +BABEL_BP_107_85617_20120415_171620_inLine +BABEL_BP_107_85617_20120415_171620_outLine +BABEL_BP_107_85686_20120627_180412_inLine +BABEL_BP_107_85686_20120627_180413_outLine +BABEL_BP_107_85716_20120330_201512_outLine +BABEL_BP_107_85716_20120330_202652_outLine +BABEL_BP_107_85819_20120705_030943_inLine +BABEL_BP_107_85819_20120705_030944_outLine +BABEL_BP_107_86016_20120417_225748_inLine +BABEL_BP_107_86029_20120212_235447_inLine +BABEL_BP_107_86419_20120209_010052_inLine +BABEL_BP_107_86419_20120209_010052_outLine +BABEL_BP_107_86801_20120429_211031_inLine +BABEL_BP_107_86801_20120429_211031_outLine +BABEL_BP_107_86890_20120322_202435_inLine +BABEL_BP_107_87167_20120211_230800_outLine +BABEL_BP_107_87481_20120513_191237_inLine +BABEL_BP_107_87481_20120513_191237_outLine +BABEL_BP_107_87539_20120418_225114_inLine +BABEL_BP_107_87539_20120418_225114_outLine +BABEL_BP_107_87671_20120218_011104_inLine +BABEL_BP_107_87857_20120325_000202_inLine +BABEL_BP_107_88243_20120322_210747_inLine +BABEL_BP_107_88243_20120322_210747_outLine +BABEL_BP_107_88253_20120511_165340_inLine +BABEL_BP_107_88253_20120511_165340_outLine +BABEL_BP_107_88294_20120322_163142_outLine +BABEL_BP_107_88506_20120503_191321_inLine +BABEL_BP_107_88506_20120503_191321_outLine +BABEL_BP_107_88532_20120416_012644_inLine +BABEL_BP_107_89619_20120217_174102_inLine +BABEL_BP_107_89619_20120217_174102_outLine +BABEL_BP_107_89644_20120501_170949_inLine +BABEL_BP_107_89644_20120501_170949_outLine +BABEL_BP_107_89657_20120610_213215_inLine +BABEL_BP_107_89657_20120610_213215_outLine +BABEL_BP_107_89674_20120212_162158_inLine +BABEL_BP_107_89674_20120212_162158_outLine +BABEL_BP_107_89965_20120505_003121_inLine +BABEL_BP_107_89965_20120505_003121_outLine +BABEL_BP_107_90313_20120325_200742_inLine +BABEL_BP_107_90393_20120417_220816_inLine +BABEL_BP_107_90393_20120417_220817_outLine +BABEL_BP_107_90559_20120608_184439_inLine +BABEL_BP_107_90559_20120608_184439_outLine +BABEL_BP_107_90577_20120118_141830_inLine +BABEL_BP_107_90577_20120118_141830_outLine +BABEL_BP_107_90609_20120216_194251_inLine +BABEL_BP_107_90764_20120418_004231_outLine +BABEL_BP_107_90975_20120428_231848_inLine +BABEL_BP_107_90975_20120428_231848_outLine +BABEL_BP_107_91000_20120529_151028_inLine +BABEL_BP_107_91002_20120429_192712_inLine +BABEL_BP_107_91002_20120429_192712_outLine +BABEL_BP_107_91007_20120612_144506_inLine +BABEL_BP_107_91040_20120618_152624_outLine +BABEL_BP_107_91136_20120427_122059_inLine +BABEL_BP_107_91401_20120213_010307_inLine +BABEL_BP_107_91401_20120213_010307_outLine +BABEL_BP_107_91406_20120429_193057_inLine +BABEL_BP_107_91406_20120429_193057_outLine +BABEL_BP_107_91409_20120520_225023_outLine +BABEL_BP_107_91409_20120520_231205_outLine +BABEL_BP_107_91660_20120510_181954_inLine +BABEL_BP_107_91660_20120510_181954_outLine +BABEL_BP_107_91660_20120510_182853_inLine +BABEL_BP_107_91660_20120510_182853_outLine +BABEL_BP_107_91660_20120510_184146_inLine +BABEL_BP_107_91660_20120510_184146_outLine +BABEL_BP_107_91723_20120323_144335_outLine +BABEL_BP_107_91865_20120429_214728_inLine +BABEL_BP_107_91865_20120429_214728_outLine +BABEL_BP_107_91905_20120504_210602_inLine +BABEL_BP_107_91905_20120504_210602_outLine +BABEL_BP_107_91975_20120703_173220_inLine +BABEL_BP_107_91975_20120703_173220_outLine +BABEL_BP_107_91979_20120209_000610_inLine +BABEL_BP_107_92002_20120418_214926_outLine +BABEL_BP_107_92407_20120210_183713_inLine +BABEL_BP_107_92407_20120210_183713_outLine +BABEL_BP_107_92436_20120213_013131_inLine +BABEL_BP_107_92436_20120213_013131_outLine +BABEL_BP_107_92591_20120505_140206_outLine +BABEL_BP_107_92602_20120216_214746_inLine +BABEL_BP_107_92602_20120216_215738_inLine +BABEL_BP_107_92603_20120416_011244_inLine +BABEL_BP_107_92603_20120416_011244_outLine +BABEL_BP_107_92628_20120323_014512_inLine +BABEL_BP_107_92628_20120323_014512_outLine +BABEL_BP_107_92643_20120608_122156_inLine +BABEL_BP_107_92643_20120608_123106_inLine +BABEL_BP_107_92735_20120413_181602_inLine +BABEL_BP_107_92789_20120416_165856_inLine +BABEL_BP_107_92800_20120412_013211_outLine +BABEL_BP_107_93044_20120607_140719_inLine +BABEL_BP_107_93044_20120607_140719_outLine +BABEL_BP_107_93509_20120321_230219_inLine +BABEL_BP_107_93509_20120321_230219_outLine +BABEL_BP_107_93804_20120703_232729_inLine +BABEL_BP_107_93804_20120703_233401_inLine +BABEL_BP_107_93974_20120627_184419_inLine +BABEL_BP_107_93974_20120627_184419_outLine +BABEL_BP_107_93979_20120422_134735_inLine +BABEL_BP_107_93979_20120422_134735_outLine +BABEL_BP_107_94149_20120405_220033_outLine +BABEL_BP_107_94162_20120425_235433_inLine +BABEL_BP_107_94223_20120215_204525_inLine +BABEL_BP_107_94514_20120417_001615_inLine +BABEL_BP_107_94514_20120417_001615_outLine +BABEL_BP_107_94514_20120417_003504_inLine +BABEL_BP_107_94514_20120417_003504_outLine +BABEL_BP_107_94541_20120705_024032_outLine +BABEL_BP_107_94542_20120512_223011_inLine +BABEL_BP_107_94542_20120512_223011_outLine +BABEL_BP_107_94694_20120508_120203_inLine +BABEL_BP_107_94694_20120508_120203_outLine +BABEL_BP_107_94696_20120608_185951_inLine +BABEL_BP_107_94696_20120608_185951_outLine +BABEL_BP_107_94814_20120501_130313_inLine +BABEL_BP_107_94814_20120501_130313_outLine +BABEL_BP_107_94989_20120627_120236_outLine +BABEL_BP_107_95121_20120628_123304_inLine +BABEL_BP_107_95423_20120415_201523_inLine +BABEL_BP_107_95423_20120415_201523_outLine +BABEL_BP_107_95533_20120505_005928_inLine +BABEL_BP_107_95533_20120505_005928_outLine +BABEL_BP_107_95542_20120502_223446_inLine +BABEL_BP_107_95542_20120502_223446_outLine +BABEL_BP_107_95566_20120505_162738_inLine +BABEL_BP_107_95572_20120406_151856_inLine +BABEL_BP_107_95572_20120406_151856_outLine +BABEL_BP_107_95589_20120419_162645_inLine +BABEL_BP_107_95589_20120419_162645_outLine +BABEL_BP_107_95815_20120322_160344_inLine +BABEL_BP_107_95815_20120322_160344_outLine +BABEL_BP_107_95996_20120324_230119_inLine +BABEL_BP_107_96302_20120510_023815_inLine +BABEL_BP_107_96302_20120510_023815_outLine +BABEL_BP_107_96322_20120218_202407_inLine +BABEL_BP_107_96322_20120218_202407_outLine +BABEL_BP_107_96667_20120426_182837_inLine +BABEL_BP_107_96667_20120426_182837_outLine +BABEL_BP_107_96959_20120505_014233_inLine +BABEL_BP_107_96959_20120505_014233_outLine +BABEL_BP_107_97260_20120324_012659_outLine +BABEL_BP_107_97318_20120608_183537_inLine +BABEL_BP_107_97318_20120608_183537_outLine +BABEL_BP_107_97629_20120420_202833_inLine +BABEL_BP_107_97629_20120420_202833_outLine +BABEL_BP_107_97946_20120411_213631_outLine +BABEL_BP_107_98086_20120609_185014_inLine +BABEL_BP_107_98086_20120609_185014_outLine +BABEL_BP_107_98099_20120618_120506_outLine +BABEL_BP_107_98219_20120512_202308_inLine +BABEL_BP_107_98219_20120512_202308_outLine +BABEL_BP_107_98219_20120512_203451_inLine +BABEL_BP_107_98219_20120512_203451_outLine +BABEL_BP_107_98402_20120421_162435_inLine +BABEL_BP_107_98402_20120421_162435_outLine +BABEL_BP_107_98640_20120425_213908_outLine +BABEL_BP_107_98675_20120419_225133_inLine +BABEL_BP_107_98675_20120419_225133_outLine +BABEL_BP_107_99414_20120430_200633_inLine +BABEL_BP_107_99414_20120430_200633_outLine +BABEL_BP_107_99567_20120405_154443_outLine +BABEL_BP_107_99571_20120322_165034_inLine +BABEL_BP_107_99571_20120322_165034_outLine +BABEL_BP_107_99694_20120322_165823_inLine +BABEL_BP_107_99694_20120322_165823_outLine +BABEL_BP_107_99731_20120618_005616_outLine +BABEL_BP_107_99764_20120415_202745_inLine +BABEL_BP_107_99823_20120511_002213_inLine +BABEL_BP_107_99823_20120511_002213_outLine +BABEL_BP_107_99929_20120612_143030_inLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.list b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.list new file mode 100644 index 00000000000..a47debb4917 --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.list @@ -0,0 +1,126 @@ +BABEL_BP_107_12643_20120704_185225_inLine +BABEL_BP_107_12643_20120704_185225_outLine +BABEL_BP_107_13065_20120425_034939_inLine +BABEL_BP_107_13065_20120425_034939_outLine +BABEL_BP_107_13389_20120406_141036_inLine +BABEL_BP_107_13389_20120406_141036_outLine +BABEL_BP_107_14468_20120321_003916_inLine +BABEL_BP_107_14468_20120321_003916_outLine +BABEL_BP_107_14475_20120704_204813_inLine +BABEL_BP_107_14475_20120704_204813_outLine +BABEL_BP_107_14891_20120118_195012_inLine +BABEL_BP_107_14891_20120118_195012_outLine +BABEL_BP_107_17933_20120421_134916_inLine +BABEL_BP_107_17933_20120421_134916_outLine +BABEL_BP_107_19479_20120407_014459_inLine +BABEL_BP_107_19479_20120407_014459_outLine +BABEL_BP_107_21477_20120323_185255_inLine +BABEL_BP_107_21477_20120323_185255_outLine +BABEL_BP_107_21518_20120501_152038_inLine +BABEL_BP_107_21518_20120501_152038_outLine +BABEL_BP_107_22010_20120608_182138_inLine +BABEL_BP_107_22010_20120608_182138_outLine +BABEL_BP_107_22272_20120511_232328_inLine +BABEL_BP_107_22272_20120511_232328_outLine +BABEL_BP_107_22979_20120505_000039_inLine +BABEL_BP_107_22979_20120505_000039_outLine +BABEL_BP_107_23629_20120501_173549_inLine +BABEL_BP_107_23629_20120501_173549_outLine +BABEL_BP_107_27724_20120407_130547_inLine +BABEL_BP_107_27724_20120407_130547_outLine +BABEL_BP_107_31980_20120212_174027_inLine +BABEL_BP_107_31980_20120212_174027_outLine +BABEL_BP_107_34590_20120323_134554_inLine +BABEL_BP_107_34590_20120323_134554_outLine +BABEL_BP_107_36722_20120627_122821_inLine +BABEL_BP_107_36722_20120627_122821_outLine +BABEL_BP_107_38912_20120414_160852_inLine +BABEL_BP_107_38912_20120414_160852_outLine +BABEL_BP_107_41170_20120201_205341_inLine +BABEL_BP_107_41170_20120201_205341_outLine +BABEL_BP_107_42309_20120608_215912_inLine +BABEL_BP_107_42309_20120608_215912_outLine +BABEL_BP_107_42651_20120211_192913_inLine +BABEL_BP_107_42651_20120211_192913_outLine +BABEL_BP_107_42910_20120212_154722_inLine +BABEL_BP_107_42910_20120212_154722_outLine +BABEL_BP_107_43306_20120409_184959_inLine +BABEL_BP_107_43306_20120409_184959_outLine +BABEL_BP_107_44369_20120504_024021_inLine +BABEL_BP_107_44369_20120504_024021_outLine +BABEL_BP_107_44403_20120322_214144_inLine +BABEL_BP_107_44403_20120322_214144_outLine +BABEL_BP_107_44756_20120426_155822_inLine +BABEL_BP_107_44756_20120426_155822_outLine +BABEL_BP_107_49186_20120704_180724_inLine +BABEL_BP_107_49186_20120704_180724_outLine +BABEL_BP_107_49552_20120614_140129_inLine +BABEL_BP_107_50267_20120421_135338_inLine +BABEL_BP_107_50267_20120421_135338_outLine +BABEL_BP_107_50883_20120328_013430_inLine +BABEL_BP_107_50883_20120328_013430_outLine +BABEL_BP_107_52219_20120417_113120_inLine +BABEL_BP_107_52219_20120417_113120_outLine +BABEL_BP_107_53181_20120211_163316_inLine +BABEL_BP_107_53181_20120211_163316_outLine +BABEL_BP_107_54199_20120607_200253_inLine +BABEL_BP_107_54199_20120607_202722_inLine +BABEL_BP_107_54199_20120607_202722_outLine +BABEL_BP_107_54621_20120421_132410_inLine +BABEL_BP_107_54621_20120421_132410_outLine +BABEL_BP_107_55777_20120421_234307_inLine +BABEL_BP_107_55777_20120421_234307_outLine +BABEL_BP_107_58357_20120507_125021_inLine +BABEL_BP_107_58357_20120507_125021_outLine +BABEL_BP_107_59175_20120212_225712_inLine +BABEL_BP_107_59175_20120212_225712_outLine +BABEL_BP_107_60677_20120415_145311_inLine +BABEL_BP_107_60677_20120415_145311_outLine +BABEL_BP_107_60677_20120415_150336_inLine +BABEL_BP_107_60677_20120415_150336_outLine +BABEL_BP_107_61073_20120322_193656_inLine +BABEL_BP_107_61073_20120322_193656_outLine +BABEL_BP_107_62923_20120322_163015_inLine +BABEL_BP_107_62923_20120322_163015_outLine +BABEL_BP_107_63711_20120212_183127_inLine +BABEL_BP_107_63711_20120212_183127_outLine +BABEL_BP_107_66346_20120703_161130_inLine +BABEL_BP_107_66346_20120703_161130_outLine +BABEL_BP_107_66419_20120505_205757_inLine +BABEL_BP_107_66419_20120505_205757_outLine +BABEL_BP_107_66903_20120210_183320_inLine +BABEL_BP_107_66903_20120210_183320_outLine +BABEL_BP_107_67304_20120523_201027_inLine +BABEL_BP_107_67304_20120523_201027_outLine +BABEL_BP_107_71778_20120427_132527_inLine +BABEL_BP_107_71778_20120427_132527_outLine +BABEL_BP_107_73452_20120504_170508_inLine +BABEL_BP_107_73452_20120504_170508_outLine +BABEL_BP_107_73752_20120610_174558_inLine +BABEL_BP_107_73911_20120215_175351_inLine +BABEL_BP_107_73911_20120215_175351_outLine +BABEL_BP_107_74234_20120328_020415_inLine +BABEL_BP_107_74234_20120328_020415_outLine +BABEL_BP_107_75680_20120704_175114_inLine +BABEL_BP_107_75680_20120704_175114_outLine +BABEL_BP_107_80786_20120212_204918_inLine +BABEL_BP_107_80786_20120212_204918_outLine +BABEL_BP_107_81096_20120418_221604_inLine +BABEL_BP_107_81096_20120418_221604_outLine +BABEL_BP_107_81771_20120615_224609_inLine +BABEL_BP_107_81771_20120615_224609_outLine +BABEL_BP_107_82947_20120426_103950_inLine +BABEL_BP_107_82947_20120426_103950_outLine +BABEL_BP_107_82947_20120509_202553_inLine +BABEL_BP_107_82947_20120509_202553_outLine +BABEL_BP_107_84397_20120608_080802_outLine +BABEL_BP_107_85617_20120415_171620_inLine +BABEL_BP_107_85617_20120415_171620_outLine +BABEL_BP_107_86801_20120429_211031_inLine +BABEL_BP_107_86801_20120429_211031_outLine +BABEL_BP_107_90559_20120608_184439_inLine +BABEL_BP_107_90559_20120608_184439_outLine +BABEL_BP_107_90975_20120428_231848_inLine +BABEL_BP_107_90975_20120428_231848_outLine +BABEL_BP_107_96322_20120218_202407_inLine +BABEL_BP_107_96322_20120218_202407_outLine diff --git a/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..4379937a74f --- /dev/null +++ b/egs/babel/s5d/conf/lists/107-vietnamese/train.LimitedLP.untranscribed.list @@ -0,0 +1,916 @@ +BABEL_BP_107_10033_20120208_180820_outLine +BABEL_BP_107_10066_20120428_121544_inLine +BABEL_BP_107_10066_20120428_121544_outLine +BABEL_BP_107_10190_20120424_023348_inLine +BABEL_BP_107_10190_20120425_012249_inLine +BABEL_BP_107_10211_20120323_013915_inLine +BABEL_BP_107_10211_20120323_013915_outLine +BABEL_BP_107_10545_20120424_184701_inLine +BABEL_BP_107_10697_20120516_194235_inLine +BABEL_BP_107_10732_20120328_172421_inLine +BABEL_BP_107_10732_20120328_172422_outLine +BABEL_BP_107_10900_20120322_022523_inLine +BABEL_BP_107_10900_20120322_022524_outLine +BABEL_BP_107_10945_20120322_222039_inLine +BABEL_BP_107_10945_20120322_222039_outLine +BABEL_BP_107_10973_20120404_233129_inLine +BABEL_BP_107_10973_20120404_233129_outLine +BABEL_BP_107_10985_20120502_123725_inLine +BABEL_BP_107_10985_20120502_123725_outLine +BABEL_BP_107_11022_20120422_013455_inLine +BABEL_BP_107_11022_20120422_013455_outLine +BABEL_BP_107_11422_20120208_160559_inLine +BABEL_BP_107_11422_20120208_160559_outLine +BABEL_BP_107_11479_20120212_011029_inLine +BABEL_BP_107_11479_20120212_011029_outLine +BABEL_BP_107_11827_20120322_205100_inLine +BABEL_BP_107_11827_20120322_205100_outLine +BABEL_BP_107_11949_20120704_001817_inLine +BABEL_BP_107_11949_20120704_001817_outLine +BABEL_BP_107_11982_20120217_004340_inLine +BABEL_BP_107_12486_20120424_174759_inLine +BABEL_BP_107_12552_20120503_152109_inLine +BABEL_BP_107_12569_20120609_190056_inLine +BABEL_BP_107_12569_20120609_190056_outLine +BABEL_BP_107_12587_20120322_230456_inLine +BABEL_BP_107_12587_20120322_230457_outLine +BABEL_BP_107_12897_20120413_195042_inLine +BABEL_BP_107_12897_20120413_195042_outLine +BABEL_BP_107_12897_20120413_200727_inLine +BABEL_BP_107_12897_20120413_200727_outLine +BABEL_BP_107_13229_20120417_201028_inLine +BABEL_BP_107_13229_20120417_201028_outLine +BABEL_BP_107_13272_20120320_141107_outLine +BABEL_BP_107_13272_20120320_142506_outLine +BABEL_BP_107_13419_20120218_213925_inLine +BABEL_BP_107_13419_20120218_214753_inLine +BABEL_BP_107_13781_20120516_204849_inLine +BABEL_BP_107_13781_20120516_204849_outLine +BABEL_BP_107_13795_20120418_190613_inLine +BABEL_BP_107_13795_20120418_190613_outLine +BABEL_BP_107_14075_20120507_004435_inLine +BABEL_BP_107_14294_20120328_010858_inLine +BABEL_BP_107_14294_20120328_010858_outLine +BABEL_BP_107_14500_20120429_194225_outLine +BABEL_BP_107_14707_20120429_004741_inLine +BABEL_BP_107_14707_20120429_004741_outLine +BABEL_BP_107_14707_20120429_005954_inLine +BABEL_BP_107_14707_20120429_005954_outLine +BABEL_BP_107_14729_20120429_200418_outLine +BABEL_BP_107_14836_20120507_235040_outLine +BABEL_BP_107_14936_20120405_224830_inLine +BABEL_BP_107_14936_20120405_224830_outLine +BABEL_BP_107_15073_20120417_011547_outLine +BABEL_BP_107_15142_20120322_132735_outLine +BABEL_BP_107_15353_20120504_193952_inLine +BABEL_BP_107_15353_20120504_193952_outLine +BABEL_BP_107_15460_20120426_224823_inLine +BABEL_BP_107_15460_20120426_224823_outLine +BABEL_BP_107_15473_20120217_231342_inLine +BABEL_BP_107_15696_20120328_010156_outLine +BABEL_BP_107_15719_20120612_122632_inLine +BABEL_BP_107_15719_20120612_122632_outLine +BABEL_BP_107_15744_20120608_123258_inLine +BABEL_BP_107_15873_20120405_224524_inLine +BABEL_BP_107_15873_20120405_224524_outLine +BABEL_BP_107_15881_20120322_233839_inLine +BABEL_BP_107_15940_20120424_221327_inLine +BABEL_BP_107_16406_20120324_011714_inLine +BABEL_BP_107_16406_20120324_011714_outLine +BABEL_BP_107_16617_20120228_014302_inLine +BABEL_BP_107_16646_20120418_130946_outLine +BABEL_BP_107_16660_20120210_231224_outLine +BABEL_BP_107_16669_20120208_140603_inLine +BABEL_BP_107_16801_20120418_121951_inLine +BABEL_BP_107_16801_20120418_203644_inLine +BABEL_BP_107_16875_20120704_133550_inLine +BABEL_BP_107_16875_20120704_133550_outLine +BABEL_BP_107_16883_20120501_194424_inLine +BABEL_BP_107_16883_20120501_194424_outLine +BABEL_BP_107_16950_20120704_155322_inLine +BABEL_BP_107_16950_20120704_155322_outLine +BABEL_BP_107_17013_20120501_002142_inLine +BABEL_BP_107_17013_20120501_002142_outLine +BABEL_BP_107_17018_20120322_220450_inLine +BABEL_BP_107_17018_20120322_220450_outLine +BABEL_BP_107_17093_20120501_202548_outLine +BABEL_BP_107_17203_20120212_220043_outLine +BABEL_BP_107_17353_20120617_133436_inLine +BABEL_BP_107_17353_20120617_133436_outLine +BABEL_BP_107_18187_20120608_125102_outLine +BABEL_BP_107_18209_20120420_004725_inLine +BABEL_BP_107_18234_20120210_230712_inLine +BABEL_BP_107_18495_20120618_003601_outLine +BABEL_BP_107_18534_20120504_132522_inLine +BABEL_BP_107_18534_20120504_132522_outLine +BABEL_BP_107_18858_20120209_004527_outLine +BABEL_BP_107_19012_20120503_215037_inLine +BABEL_BP_107_19012_20120503_215037_outLine +BABEL_BP_107_19248_20120508_210026_inLine +BABEL_BP_107_19248_20120508_210027_outLine +BABEL_BP_107_19290_20120421_141409_inLine +BABEL_BP_107_19290_20120421_141409_outLine +BABEL_BP_107_19404_20120321_171020_inLine +BABEL_BP_107_19404_20120321_171020_outLine +BABEL_BP_107_19731_20120506_011629_inLine +BABEL_BP_107_19731_20120515_001656_inLine +BABEL_BP_107_19869_20120608_012542_outLine +BABEL_BP_107_20320_20120212_214655_inLine +BABEL_BP_107_20332_20120426_010134_inLine +BABEL_BP_107_20332_20120426_010837_inLine +BABEL_BP_107_20332_20120426_010134_outLine +BABEL_BP_107_20332_20120426_010837_outLine +BABEL_BP_107_20483_20120416_171740_outLine +BABEL_BP_107_20518_20120418_211112_inLine +BABEL_BP_107_20582_20120322_220747_inLine +BABEL_BP_107_20582_20120322_220747_outLine +BABEL_BP_107_20740_20120427_193225_inLine +BABEL_BP_107_20740_20120427_193757_inLine +BABEL_BP_107_20741_20120325_181245_outLine +BABEL_BP_107_20799_20120515_010136_inLine +BABEL_BP_107_20799_20120515_010136_outLine +BABEL_BP_107_21052_20120415_204922_inLine +BABEL_BP_107_21139_20120425_192642_outLine +BABEL_BP_107_21258_20120418_145725_inLine +BABEL_BP_107_21367_20120629_140326_outLine +BABEL_BP_107_21430_20120608_003600_outLine +BABEL_BP_107_21584_20120217_004017_inLine +BABEL_BP_107_21584_20120217_004017_outLine +BABEL_BP_107_21758_20120407_010928_inLine +BABEL_BP_107_21758_20120407_010928_outLine +BABEL_BP_107_21758_20120407_011555_inLine +BABEL_BP_107_21758_20120407_011555_outLine +BABEL_BP_107_21929_20120323_015539_inLine +BABEL_BP_107_21929_20120323_022750_inLine +BABEL_BP_107_21946_20120507_015056_inLine +BABEL_BP_107_21946_20120507_015056_outLine +BABEL_BP_107_22494_20120613_122322_outLine +BABEL_BP_107_22898_20120322_144401_inLine +BABEL_BP_107_22898_20120322_144401_outLine +BABEL_BP_107_22910_20120214_213815_inLine +BABEL_BP_107_22910_20120214_213815_outLine +BABEL_BP_107_23167_20120217_212610_inLine +BABEL_BP_107_23167_20120217_212610_outLine +BABEL_BP_107_23930_20120506_214145_inLine +BABEL_BP_107_24014_20120618_010729_inLine +BABEL_BP_107_24014_20120618_010729_outLine +BABEL_BP_107_24094_20120421_134318_outLine +BABEL_BP_107_24569_20120507_123854_outLine +BABEL_BP_107_24608_20120208_170106_outLine +BABEL_BP_107_24638_20120504_004348_outLine +BABEL_BP_107_24642_20120505_201543_inLine +BABEL_BP_107_24642_20120505_201543_outLine +BABEL_BP_107_24799_20120508_232153_outLine +BABEL_BP_107_24817_20120422_203514_inLine +BABEL_BP_107_24833_20120218_171649_outLine +BABEL_BP_107_25035_20120214_230841_inLine +BABEL_BP_107_25072_20120429_144535_inLine +BABEL_BP_107_25479_20120506_161146_inLine +BABEL_BP_107_25479_20120506_161146_outLine +BABEL_BP_107_25576_20120321_222905_outLine +BABEL_BP_107_25866_20120426_193335_inLine +BABEL_BP_107_26348_20120508_100651_inLine +BABEL_BP_107_26348_20120508_102042_inLine +BABEL_BP_107_26350_20120209_004945_inLine +BABEL_BP_107_26350_20120209_004945_outLine +BABEL_BP_107_26350_20120209_012139_inLine +BABEL_BP_107_26350_20120209_012139_outLine +BABEL_BP_107_26598_20120425_143602_outLine +BABEL_BP_107_26684_20120530_155756_inLine +BABEL_BP_107_26786_20120423_191945_inLine +BABEL_BP_107_26786_20120423_191945_outLine +BABEL_BP_107_27064_20120222_210044_inLine +BABEL_BP_107_27064_20120222_210044_outLine +BABEL_BP_107_27503_20120212_221915_inLine +BABEL_BP_107_27619_20120328_023110_outLine +BABEL_BP_107_27698_20120212_005737_inLine +BABEL_BP_107_27698_20120212_005737_outLine +BABEL_BP_107_27890_20120428_235422_inLine +BABEL_BP_107_27890_20120428_235422_outLine +BABEL_BP_107_27916_20120607_114245_outLine +BABEL_BP_107_27916_20120607_115650_outLine +BABEL_BP_107_28016_20120405_222219_inLine +BABEL_BP_107_28016_20120405_222219_outLine +BABEL_BP_107_28107_20120208_142843_outLine +BABEL_BP_107_28107_20120208_144923_outLine +BABEL_BP_107_28132_20120405_152728_outLine +BABEL_BP_107_28260_20120212_153106_inLine +BABEL_BP_107_28557_20120507_001619_outLine +BABEL_BP_107_28675_20120607_231549_inLine +BABEL_BP_107_28675_20120607_231549_outLine +BABEL_BP_107_28675_20120607_233243_inLine +BABEL_BP_107_28675_20120607_233243_outLine +BABEL_BP_107_28740_20120212_150039_inLine +BABEL_BP_107_28740_20120212_150039_outLine +BABEL_BP_107_29280_20120607_184929_outLine +BABEL_BP_107_29280_20120607_190345_outLine +BABEL_BP_107_29290_20120415_102435_inLine +BABEL_BP_107_29335_20120424_013042_inLine +BABEL_BP_107_29335_20120424_013042_outLine +BABEL_BP_107_29407_20120607_132315_inLine +BABEL_BP_107_29407_20120607_135318_inLine +BABEL_BP_107_29444_20120322_191236_outLine +BABEL_BP_107_29771_20120504_010738_outLine +BABEL_BP_107_29959_20120418_001028_inLine +BABEL_BP_107_29959_20120418_001028_outLine +BABEL_BP_107_29988_20120516_233700_inLine +BABEL_BP_107_30210_20120427_140255_inLine +BABEL_BP_107_30210_20120502_202749_inLine +BABEL_BP_107_30554_20120617_231216_outLine +BABEL_BP_107_30583_20120212_210712_inLine +BABEL_BP_107_30722_20120505_103655_inLine +BABEL_BP_107_30722_20120505_103655_outLine +BABEL_BP_107_31031_20120501_205733_inLine +BABEL_BP_107_31031_20120501_210746_inLine +BABEL_BP_107_31298_20120322_125112_outLine +BABEL_BP_107_31393_20120325_171905_inLine +BABEL_BP_107_31460_20120325_193921_inLine +BABEL_BP_107_31606_20120607_131428_inLine +BABEL_BP_107_31738_20120704_101130_outLine +BABEL_BP_107_31902_20120417_015618_inLine +BABEL_BP_107_31902_20120417_015618_outLine +BABEL_BP_107_31917_20120501_202910_inLine +BABEL_BP_107_31917_20120501_202910_outLine +BABEL_BP_107_32132_20120418_211743_inLine +BABEL_BP_107_32274_20120324_011402_inLine +BABEL_BP_107_32295_20120617_141025_inLine +BABEL_BP_107_32295_20120617_141025_outLine +BABEL_BP_107_32334_20120429_005403_inLine +BABEL_BP_107_32334_20120429_005403_outLine +BABEL_BP_107_32400_20120426_000137_inLine +BABEL_BP_107_32400_20120426_000137_outLine +BABEL_BP_107_32710_20120418_215432_inLine +BABEL_BP_107_32710_20120418_215432_outLine +BABEL_BP_107_33012_20120611_155055_inLine +BABEL_BP_107_33364_20120617_011853_inLine +BABEL_BP_107_33364_20120617_011853_outLine +BABEL_BP_107_33577_20120704_152608_outLine +BABEL_BP_107_33671_20120330_001033_inLine +BABEL_BP_107_33671_20120330_001033_outLine +BABEL_BP_107_33742_20120608_143147_inLine +BABEL_BP_107_33742_20120608_143147_outLine +BABEL_BP_107_33817_20120423_130850_inLine +BABEL_BP_107_33817_20120423_130850_outLine +BABEL_BP_107_33969_20120429_214721_outLine +BABEL_BP_107_34235_20120218_205136_outLine +BABEL_BP_107_34480_20120608_151830_inLine +BABEL_BP_107_34498_20120429_140537_inLine +BABEL_BP_107_34498_20120429_140537_outLine +BABEL_BP_107_34857_20120419_235853_inLine +BABEL_BP_107_34961_20120212_223315_inLine +BABEL_BP_107_34961_20120212_223315_outLine +BABEL_BP_107_34961_20120212_224207_inLine +BABEL_BP_107_34961_20120212_224207_outLine +BABEL_BP_107_35011_20120321_223128_inLine +BABEL_BP_107_35011_20120321_223128_outLine +BABEL_BP_107_35016_20120611_185645_outLine +BABEL_BP_107_35074_20120608_164703_outLine +BABEL_BP_107_35179_20120414_153233_inLine +BABEL_BP_107_35179_20120414_153233_outLine +BABEL_BP_107_35188_20120614_131427_inLine +BABEL_BP_107_35305_20120422_120043_outLine +BABEL_BP_107_35357_20120614_212245_inLine +BABEL_BP_107_35357_20120614_212245_outLine +BABEL_BP_107_36037_20120616_153023_outLine +BABEL_BP_107_36196_20120608_110319_inLine +BABEL_BP_107_36196_20120608_111049_inLine +BABEL_BP_107_36268_20120406_211711_inLine +BABEL_BP_107_36268_20120406_211711_outLine +BABEL_BP_107_36356_20120211_173247_inLine +BABEL_BP_107_36356_20120211_173247_outLine +BABEL_BP_107_36383_20120416_225701_outLine +BABEL_BP_107_36391_20120505_171824_inLine +BABEL_BP_107_36424_20120421_130549_inLine +BABEL_BP_107_36424_20120421_130549_outLine +BABEL_BP_107_36424_20120421_133610_inLine +BABEL_BP_107_36424_20120421_133610_outLine +BABEL_BP_107_36502_20120617_145859_inLine +BABEL_BP_107_36502_20120617_145859_outLine +BABEL_BP_107_36711_20120325_230112_inLine +BABEL_BP_107_36711_20120325_230112_outLine +BABEL_BP_107_37110_20120209_002706_inLine +BABEL_BP_107_37110_20120209_002706_outLine +BABEL_BP_107_37210_20120322_205536_outLine +BABEL_BP_107_37285_20120325_000245_inLine +BABEL_BP_107_37285_20120325_000245_outLine +BABEL_BP_107_37335_20120616_150016_inLine +BABEL_BP_107_37335_20120616_150016_outLine +BABEL_BP_107_37374_20120418_185819_inLine +BABEL_BP_107_37940_20120424_004619_inLine +BABEL_BP_107_37940_20120424_004619_outLine +BABEL_BP_107_38464_20120422_105536_outLine +BABEL_BP_107_38592_20120704_150926_outLine +BABEL_BP_107_38640_20120215_030154_inLine +BABEL_BP_107_38640_20120215_030154_outLine +BABEL_BP_107_38698_20120322_213531_inLine +BABEL_BP_107_38698_20120322_213531_outLine +BABEL_BP_107_38879_20120406_150304_inLine +BABEL_BP_107_38879_20120406_150304_outLine +BABEL_BP_107_39246_20120613_202128_inLine +BABEL_BP_107_39246_20120613_202128_outLine +BABEL_BP_107_39264_20120417_191639_inLine +BABEL_BP_107_39264_20120417_191639_outLine +BABEL_BP_107_39296_20120705_025906_inLine +BABEL_BP_107_39384_20120324_010939_inLine +BABEL_BP_107_39384_20120324_010939_outLine +BABEL_BP_107_39384_20120324_011832_inLine +BABEL_BP_107_39384_20120324_011832_outLine +BABEL_BP_107_39430_20120325_015935_inLine +BABEL_BP_107_39430_20120325_015935_outLine +BABEL_BP_107_40002_20120502_174229_outLine +BABEL_BP_107_40123_20120505_191426_inLine +BABEL_BP_107_40123_20120505_191426_outLine +BABEL_BP_107_40385_20120704_143210_outLine +BABEL_BP_107_40477_20120323_194919_outLine +BABEL_BP_107_40510_20120426_153808_inLine +BABEL_BP_107_40510_20120426_153808_outLine +BABEL_BP_107_40980_20120416_233130_inLine +BABEL_BP_107_40980_20120416_233130_outLine +BABEL_BP_107_40980_20120417_001128_inLine +BABEL_BP_107_40980_20120417_001128_outLine +BABEL_BP_107_41146_20120211_162158_inLine +BABEL_BP_107_41590_20120610_162218_outLine +BABEL_BP_107_41797_20120420_003902_inLine +BABEL_BP_107_41797_20120420_003902_outLine +BABEL_BP_107_42145_20120418_131525_inLine +BABEL_BP_107_42266_20120407_182544_outLine +BABEL_BP_107_43017_20120322_170152_inLine +BABEL_BP_107_43017_20120322_170152_outLine +BABEL_BP_107_43423_20120504_001214_inLine +BABEL_BP_107_43423_20120504_010312_inLine +BABEL_BP_107_43426_20120426_183951_inLine +BABEL_BP_107_43426_20120426_183951_outLine +BABEL_BP_107_43587_20120506_182330_inLine +BABEL_BP_107_43652_20120416_175011_inLine +BABEL_BP_107_43652_20120418_093619_inLine +BABEL_BP_107_44129_20120512_023836_inLine +BABEL_BP_107_44129_20120512_023836_outLine +BABEL_BP_107_44829_20120404_224815_outLine +BABEL_BP_107_44836_20120417_003600_outLine +BABEL_BP_107_44943_20120506_191737_inLine +BABEL_BP_107_45227_20120210_223857_inLine +BABEL_BP_107_45511_20120212_170655_inLine +BABEL_BP_107_45511_20120212_170655_outLine +BABEL_BP_107_45570_20120509_151829_inLine +BABEL_BP_107_45570_20120509_151829_outLine +BABEL_BP_107_45793_20120211_040134_inLine +BABEL_BP_107_45793_20120211_040134_outLine +BABEL_BP_107_45929_20120418_215417_outLine +BABEL_BP_107_45931_20120322_143234_inLine +BABEL_BP_107_45931_20120322_143234_outLine +BABEL_BP_107_46243_20120210_233353_inLine +BABEL_BP_107_46243_20120210_233353_outLine +BABEL_BP_107_46332_20120418_002934_inLine +BABEL_BP_107_46332_20120418_002934_outLine +BABEL_BP_107_46603_20120421_113906_inLine +BABEL_BP_107_46756_20120429_195314_outLine +BABEL_BP_107_46977_20120426_015005_inLine +BABEL_BP_107_47263_20120422_150216_inLine +BABEL_BP_107_47433_20120210_185410_outLine +BABEL_BP_107_47618_20120502_004413_inLine +BABEL_BP_107_47618_20120502_004413_outLine +BABEL_BP_107_47661_20120216_224419_inLine +BABEL_BP_107_47661_20120216_224419_outLine +BABEL_BP_107_47794_20120514_175438_inLine +BABEL_BP_107_47794_20120514_175438_outLine +BABEL_BP_107_47823_20120516_204140_inLine +BABEL_BP_107_47845_20120613_004732_outLine +BABEL_BP_107_47906_20120415_224420_inLine +BABEL_BP_107_47906_20120415_224420_outLine +BABEL_BP_107_48188_20120422_150955_inLine +BABEL_BP_107_48188_20120422_150955_outLine +BABEL_BP_107_48418_20120421_163333_inLine +BABEL_BP_107_48511_20120322_145729_inLine +BABEL_BP_107_48511_20120322_145729_outLine +BABEL_BP_107_48559_20120502_201955_inLine +BABEL_BP_107_48559_20120502_201955_outLine +BABEL_BP_107_48607_20120607_215116_outLine +BABEL_BP_107_48733_20120418_142426_inLine +BABEL_BP_107_48733_20120418_142426_outLine +BABEL_BP_107_48753_20120426_134417_inLine +BABEL_BP_107_48753_20120426_134417_outLine +BABEL_BP_107_48812_20120323_162517_inLine +BABEL_BP_107_48812_20120324_182527_inLine +BABEL_BP_107_48976_20120220_152013_inLine +BABEL_BP_107_48976_20120220_152013_outLine +BABEL_BP_107_49192_20120421_190503_outLine +BABEL_BP_107_49239_20120429_144119_inLine +BABEL_BP_107_49346_20120611_192752_outLine +BABEL_BP_107_49351_20120614_132223_inLine +BABEL_BP_107_49351_20120614_132223_outLine +BABEL_BP_107_49371_20120608_002052_inLine +BABEL_BP_107_49541_20120325_223621_inLine +BABEL_BP_107_49541_20120325_223621_outLine +BABEL_BP_107_49689_20120415_163537_inLine +BABEL_BP_107_49689_20120415_163537_outLine +BABEL_BP_107_49714_20120509_113627_outLine +BABEL_BP_107_49773_20120211_151308_inLine +BABEL_BP_107_49773_20120211_151308_outLine +BABEL_BP_107_50028_20120704_192522_inLine +BABEL_BP_107_50028_20120704_192522_outLine +BABEL_BP_107_50141_20120505_233033_inLine +BABEL_BP_107_50141_20120505_233033_outLine +BABEL_BP_107_50201_20120216_001139_inLine +BABEL_BP_107_50201_20120216_001139_outLine +BABEL_BP_107_50298_20120507_152508_outLine +BABEL_BP_107_50409_20120608_205803_inLine +BABEL_BP_107_50468_20120420_114108_inLine +BABEL_BP_107_50468_20120420_114108_outLine +BABEL_BP_107_50468_20120420_115203_inLine +BABEL_BP_107_50468_20120420_115203_outLine +BABEL_BP_107_50476_20120430_225248_inLine +BABEL_BP_107_50476_20120430_225248_outLine +BABEL_BP_107_50718_20120321_125943_inLine +BABEL_BP_107_50752_20120421_202932_inLine +BABEL_BP_107_50752_20120421_202932_outLine +BABEL_BP_107_51052_20120424_004427_inLine +BABEL_BP_107_51052_20120424_004427_outLine +BABEL_BP_107_51073_20120216_010300_outLine +BABEL_BP_107_51117_20120211_034844_inLine +BABEL_BP_107_51117_20120211_034844_outLine +BABEL_BP_107_51136_20120405_142910_inLine +BABEL_BP_107_51136_20120405_142910_outLine +BABEL_BP_107_51446_20120417_221307_inLine +BABEL_BP_107_51446_20120417_221307_outLine +BABEL_BP_107_51448_20120608_170641_inLine +BABEL_BP_107_51448_20120608_171219_inLine +BABEL_BP_107_51663_20120506_160921_inLine +BABEL_BP_107_51727_20120424_225602_inLine +BABEL_BP_107_51727_20120424_225602_outLine +BABEL_BP_107_52154_20120503_203816_inLine +BABEL_BP_107_52807_20120608_171526_inLine +BABEL_BP_107_52807_20120608_171526_outLine +BABEL_BP_107_52902_20120421_150627_outLine +BABEL_BP_107_53463_20120421_150635_inLine +BABEL_BP_107_53463_20120421_150635_outLine +BABEL_BP_107_53463_20120421_152028_inLine +BABEL_BP_107_53463_20120421_152028_outLine +BABEL_BP_107_53649_20120611_193416_outLine +BABEL_BP_107_53653_20120607_150151_outLine +BABEL_BP_107_53703_20120502_153540_outLine +BABEL_BP_107_53824_20120503_223532_inLine +BABEL_BP_107_53824_20120503_223532_outLine +BABEL_BP_107_53824_20120503_225007_inLine +BABEL_BP_107_53824_20120503_225007_outLine +BABEL_BP_107_53982_20120509_013004_outLine +BABEL_BP_107_53994_20120501_161638_outLine +BABEL_BP_107_54241_20120324_013254_inLine +BABEL_BP_107_54241_20120324_013254_outLine +BABEL_BP_107_54332_20120608_182424_inLine +BABEL_BP_107_54332_20120608_183219_inLine +BABEL_BP_107_54518_20120608_120238_inLine +BABEL_BP_107_54785_20120602_195720_inLine +BABEL_BP_107_54787_20120405_202915_inLine +BABEL_BP_107_54787_20120405_202915_outLine +BABEL_BP_107_55182_20120209_015206_inLine +BABEL_BP_107_55355_20120608_155709_inLine +BABEL_BP_107_55355_20120612_142521_inLine +BABEL_BP_107_55396_20120321_141254_outLine +BABEL_BP_107_55470_20120421_134215_outLine +BABEL_BP_107_55874_20120504_184342_inLine +BABEL_BP_107_55874_20120504_184343_outLine +BABEL_BP_107_56039_20120516_215649_inLine +BABEL_BP_107_56039_20120516_215649_outLine +BABEL_BP_107_56070_20120220_174719_inLine +BABEL_BP_107_57148_20120217_014955_inLine +BABEL_BP_107_57148_20120217_014955_outLine +BABEL_BP_107_57148_20120217_024257_inLine +BABEL_BP_107_57148_20120217_024257_outLine +BABEL_BP_107_57422_20120508_014547_inLine +BABEL_BP_107_57422_20120508_014547_outLine +BABEL_BP_107_57457_20120617_193611_inLine +BABEL_BP_107_57457_20120617_193611_outLine +BABEL_BP_107_57619_20120505_151800_inLine +BABEL_BP_107_58108_20120509_141003_inLine +BABEL_BP_107_58108_20120509_141003_outLine +BABEL_BP_107_58137_20120421_185042_inLine +BABEL_BP_107_58137_20120421_185042_outLine +BABEL_BP_107_58190_20120506_195510_outLine +BABEL_BP_107_58232_20120501_122112_inLine +BABEL_BP_107_58232_20120501_122112_outLine +BABEL_BP_107_58536_20120501_013825_inLine +BABEL_BP_107_58536_20120501_013825_outLine +BABEL_BP_107_58746_20120614_181729_inLine +BABEL_BP_107_58746_20120614_181729_outLine +BABEL_BP_107_58863_20120218_011117_inLine +BABEL_BP_107_58863_20120218_011117_outLine +BABEL_BP_107_58863_20120218_012806_inLine +BABEL_BP_107_58863_20120218_012806_outLine +BABEL_BP_107_59071_20120423_184821_inLine +BABEL_BP_107_59383_20120502_205353_inLine +BABEL_BP_107_59383_20120502_205353_outLine +BABEL_BP_107_59628_20120428_215033_inLine +BABEL_BP_107_59764_20120524_205913_inLine +BABEL_BP_107_59924_20120417_194534_inLine +BABEL_BP_107_59924_20120417_194534_outLine +BABEL_BP_107_59961_20120218_211136_inLine +BABEL_BP_107_60106_20120211_003229_inLine +BABEL_BP_107_60106_20120211_003229_outLine +BABEL_BP_107_60183_20120428_164103_inLine +BABEL_BP_107_60183_20120428_164103_outLine +BABEL_BP_107_60193_20120328_014042_inLine +BABEL_BP_107_60238_20120506_132025_outLine +BABEL_BP_107_60338_20120505_131543_inLine +BABEL_BP_107_60338_20120505_131543_outLine +BABEL_BP_107_60605_20120506_215948_inLine +BABEL_BP_107_60826_20120424_235431_inLine +BABEL_BP_107_60826_20120424_235432_outLine +BABEL_BP_107_60842_20120617_190839_inLine +BABEL_BP_107_60842_20120617_190839_outLine +BABEL_BP_107_61408_20120628_141349_outLine +BABEL_BP_107_61449_20120421_232700_inLine +BABEL_BP_107_61449_20120421_232700_outLine +BABEL_BP_107_61906_20120414_201744_inLine +BABEL_BP_107_61906_20120414_201744_outLine +BABEL_BP_107_62132_20120506_160034_inLine +BABEL_BP_107_62160_20120323_180702_outLine +BABEL_BP_107_62163_20120628_180945_inLine +BABEL_BP_107_62163_20120628_182002_inLine +BABEL_BP_107_62177_20120323_001326_inLine +BABEL_BP_107_62255_20120506_204123_inLine +BABEL_BP_107_62255_20120506_204123_outLine +BABEL_BP_107_62277_20120504_173047_inLine +BABEL_BP_107_62696_20120508_135942_outLine +BABEL_BP_107_62696_20120509_100233_outLine +BABEL_BP_107_62993_20120608_130210_inLine +BABEL_BP_107_62993_20120608_130210_outLine +BABEL_BP_107_63076_20120704_011318_inLine +BABEL_BP_107_63116_20120419_163443_inLine +BABEL_BP_107_63233_20120323_003312_inLine +BABEL_BP_107_63352_20120421_222544_inLine +BABEL_BP_107_63368_20120418_215232_inLine +BABEL_BP_107_63368_20120418_215232_outLine +BABEL_BP_107_63368_20120418_220224_inLine +BABEL_BP_107_63368_20120418_220224_outLine +BABEL_BP_107_63368_20120418_222134_inLine +BABEL_BP_107_63368_20120418_222134_outLine +BABEL_BP_107_63369_20120614_191919_inLine +BABEL_BP_107_64205_20120428_020155_inLine +BABEL_BP_107_64351_20120513_193703_outLine +BABEL_BP_107_64724_20120503_155446_inLine +BABEL_BP_107_64724_20120503_155446_outLine +BABEL_BP_107_64889_20120503_174229_inLine +BABEL_BP_107_64889_20120503_174229_outLine +BABEL_BP_107_65414_20120608_131726_inLine +BABEL_BP_107_65743_20120404_191932_inLine +BABEL_BP_107_65743_20120404_191932_outLine +BABEL_BP_107_65989_20120419_141422_inLine +BABEL_BP_107_66451_20120214_215503_inLine +BABEL_BP_107_66451_20120214_215503_outLine +BABEL_BP_107_66499_20120610_220818_inLine +BABEL_BP_107_66559_20120421_185343_inLine +BABEL_BP_107_66709_20120617_152656_outLine +BABEL_BP_107_66709_20120617_153822_outLine +BABEL_BP_107_66790_20120421_182115_inLine +BABEL_BP_107_66839_20120613_192022_inLine +BABEL_BP_107_66839_20120613_192022_outLine +BABEL_BP_107_66866_20120418_001946_inLine +BABEL_BP_107_66866_20120418_001946_outLine +BABEL_BP_107_66964_20120419_205513_inLine +BABEL_BP_107_66964_20120419_205513_outLine +BABEL_BP_107_67555_20120323_130439_outLine +BABEL_BP_107_67628_20120418_215117_inLine +BABEL_BP_107_67798_20120627_141236_inLine +BABEL_BP_107_68009_20120608_112155_inLine +BABEL_BP_107_68129_20120611_013309_outLine +BABEL_BP_107_68191_20120428_114953_outLine +BABEL_BP_107_68295_20120506_210459_outLine +BABEL_BP_107_68362_20120503_194813_outLine +BABEL_BP_107_68545_20120421_220606_inLine +BABEL_BP_107_68545_20120421_220606_outLine +BABEL_BP_107_68671_20120608_205710_inLine +BABEL_BP_107_68671_20120608_205710_outLine +BABEL_BP_107_68767_20120214_214534_inLine +BABEL_BP_107_68767_20120214_214534_outLine +BABEL_BP_107_69028_20120430_132441_inLine +BABEL_BP_107_69049_20120322_215956_inLine +BABEL_BP_107_69137_20120424_183202_inLine +BABEL_BP_107_69137_20120424_183202_outLine +BABEL_BP_107_69275_20120608_210354_inLine +BABEL_BP_107_69295_20120501_154139_inLine +BABEL_BP_107_70000_20120618_004254_inLine +BABEL_BP_107_70000_20120618_004254_outLine +BABEL_BP_107_70077_20120428_170417_inLine +BABEL_BP_107_70120_20120418_213104_inLine +BABEL_BP_107_70120_20120418_213104_outLine +BABEL_BP_107_70285_20120212_214056_inLine +BABEL_BP_107_70323_20120617_122402_outLine +BABEL_BP_107_70441_20120704_163546_inLine +BABEL_BP_107_70441_20120704_163546_outLine +BABEL_BP_107_70511_20120618_124928_outLine +BABEL_BP_107_70615_20120208_233912_inLine +BABEL_BP_107_70615_20120208_233912_outLine +BABEL_BP_107_70975_20120407_011601_inLine +BABEL_BP_107_70975_20120407_011601_outLine +BABEL_BP_107_71176_20120418_195323_inLine +BABEL_BP_107_71176_20120418_195323_outLine +BABEL_BP_107_71739_20120430_125259_inLine +BABEL_BP_107_71741_20120211_231000_inLine +BABEL_BP_107_71741_20120211_231000_outLine +BABEL_BP_107_71844_20120212_180004_inLine +BABEL_BP_107_71927_20120516_204724_inLine +BABEL_BP_107_72269_20120416_010327_inLine +BABEL_BP_107_72269_20120416_010327_outLine +BABEL_BP_107_72297_20120608_185443_inLine +BABEL_BP_107_72297_20120608_185443_outLine +BABEL_BP_107_72297_20120608_190156_inLine +BABEL_BP_107_72297_20120608_190156_outLine +BABEL_BP_107_72647_20120614_125725_inLine +BABEL_BP_107_72718_20120505_025006_inLine +BABEL_BP_107_72718_20120505_025006_outLine +BABEL_BP_107_72879_20120403_141911_inLine +BABEL_BP_107_72879_20120403_141911_outLine +BABEL_BP_107_73205_20120211_191427_outLine +BABEL_BP_107_73438_20120502_201055_inLine +BABEL_BP_107_73438_20120502_201055_outLine +BABEL_BP_107_73440_20120416_172035_inLine +BABEL_BP_107_73761_20120424_154013_inLine +BABEL_BP_107_73761_20120424_154013_outLine +BABEL_BP_107_73921_20120501_000425_outLine +BABEL_BP_107_74043_20120323_014301_outLine +BABEL_BP_107_74317_20120502_225211_inLine +BABEL_BP_107_74317_20120502_225211_outLine +BABEL_BP_107_74395_20120418_140703_inLine +BABEL_BP_107_74395_20120418_140703_outLine +BABEL_BP_107_74404_20120212_134850_outLine +BABEL_BP_107_74625_20120425_234344_inLine +BABEL_BP_107_74700_20120610_233419_inLine +BABEL_BP_107_74823_20120217_022832_inLine +BABEL_BP_107_74823_20120217_022832_outLine +BABEL_BP_107_74935_20120616_144642_inLine +BABEL_BP_107_74974_20120617_143904_inLine +BABEL_BP_107_74974_20120617_143904_outLine +BABEL_BP_107_74986_20120416_011008_inLine +BABEL_BP_107_74986_20120416_011008_outLine +BABEL_BP_107_74986_20120416_011927_inLine +BABEL_BP_107_74986_20120416_011927_outLine +BABEL_BP_107_75036_20120325_233130_inLine +BABEL_BP_107_75036_20120325_233130_outLine +BABEL_BP_107_75333_20120505_200116_inLine +BABEL_BP_107_75333_20120505_200116_outLine +BABEL_BP_107_75498_20120506_171232_inLine +BABEL_BP_107_75498_20120506_171232_outLine +BABEL_BP_107_75799_20120429_140233_inLine +BABEL_BP_107_75799_20120429_140233_outLine +BABEL_BP_107_75815_20120217_141539_inLine +BABEL_BP_107_75815_20120217_141539_outLine +BABEL_BP_107_76252_20120705_003603_outLine +BABEL_BP_107_76341_20120215_201638_inLine +BABEL_BP_107_76341_20120215_201638_outLine +BABEL_BP_107_76661_20120405_132625_inLine +BABEL_BP_107_76691_20120501_002016_inLine +BABEL_BP_107_76716_20120418_215649_outLine +BABEL_BP_107_76733_20120424_181359_inLine +BABEL_BP_107_76733_20120424_181359_outLine +BABEL_BP_107_76733_20120424_183605_inLine +BABEL_BP_107_76733_20120424_183605_outLine +BABEL_BP_107_76748_20120504_181420_inLine +BABEL_BP_107_76919_20120507_010805_outLine +BABEL_BP_107_76925_20120407_015139_inLine +BABEL_BP_107_76944_20120505_000745_inLine +BABEL_BP_107_76944_20120505_000745_outLine +BABEL_BP_107_76993_20120501_125118_inLine +BABEL_BP_107_76993_20120501_125118_outLine +BABEL_BP_107_77238_20120322_211133_outLine +BABEL_BP_107_77244_20120429_164842_inLine +BABEL_BP_107_77244_20120429_164842_outLine +BABEL_BP_107_77315_20120527_222821_outLine +BABEL_BP_107_77338_20120617_171454_inLine +BABEL_BP_107_77338_20120617_171454_outLine +BABEL_BP_107_77473_20120610_000112_inLine +BABEL_BP_107_77886_20120326_191938_inLine +BABEL_BP_107_77886_20120326_191938_outLine +BABEL_BP_107_78094_20120212_205141_inLine +BABEL_BP_107_78094_20120212_205141_outLine +BABEL_BP_107_78487_20120430_133108_inLine +BABEL_BP_107_78487_20120430_133108_outLine +BABEL_BP_107_78514_20120617_131155_outLine +BABEL_BP_107_79284_20120511_180310_inLine +BABEL_BP_107_79284_20120511_180310_outLine +BABEL_BP_107_79495_20120222_195716_inLine +BABEL_BP_107_79619_20120420_115502_inLine +BABEL_BP_107_79619_20120420_115502_outLine +BABEL_BP_107_79632_20120428_182831_inLine +BABEL_BP_107_79632_20120428_182831_outLine +BABEL_BP_107_79860_20120328_023545_inLine +BABEL_BP_107_79944_20120424_213833_inLine +BABEL_BP_107_79970_20120418_214316_inLine +BABEL_BP_107_80008_20120218_225347_inLine +BABEL_BP_107_80008_20120218_225347_outLine +BABEL_BP_107_80282_20120627_190514_inLine +BABEL_BP_107_80282_20120627_190935_inLine +BABEL_BP_107_80290_20120501_134226_inLine +BABEL_BP_107_80290_20120501_134226_outLine +BABEL_BP_107_80337_20120608_000801_inLine +BABEL_BP_107_80337_20120608_000801_outLine +BABEL_BP_107_80638_20120501_223037_inLine +BABEL_BP_107_80638_20120501_223037_outLine +BABEL_BP_107_81056_20120502_155358_inLine +BABEL_BP_107_81056_20120502_155358_outLine +BABEL_BP_107_81321_20120329_030424_outLine +BABEL_BP_107_81486_20120213_035232_inLine +BABEL_BP_107_81486_20120213_040319_inLine +BABEL_BP_107_81535_20120421_151505_inLine +BABEL_BP_107_81535_20120421_151505_outLine +BABEL_BP_107_81611_20120616_154507_outLine +BABEL_BP_107_81717_20120426_185608_inLine +BABEL_BP_107_82006_20120417_133143_outLine +BABEL_BP_107_82025_20120325_012956_inLine +BABEL_BP_107_82103_20120326_172335_inLine +BABEL_BP_107_82103_20120326_172335_outLine +BABEL_BP_107_82131_20120704_135728_inLine +BABEL_BP_107_82131_20120704_211005_inLine +BABEL_BP_107_82591_20120407_185008_outLine +BABEL_BP_107_82599_20120608_140933_outLine +BABEL_BP_107_82766_20120627_112435_outLine +BABEL_BP_107_82880_20120705_001819_inLine +BABEL_BP_107_82880_20120705_001819_outLine +BABEL_BP_107_83017_20120608_125136_inLine +BABEL_BP_107_83053_20120426_184045_inLine +BABEL_BP_107_83256_20120212_162557_outLine +BABEL_BP_107_83360_20120418_000230_inLine +BABEL_BP_107_83360_20120418_000230_outLine +BABEL_BP_107_83529_20120608_152238_outLine +BABEL_BP_107_83700_20120427_121525_inLine +BABEL_BP_107_83700_20120427_121525_outLine +BABEL_BP_107_83702_20120418_010601_inLine +BABEL_BP_107_83702_20120418_010601_outLine +BABEL_BP_107_83982_20120704_125429_outLine +BABEL_BP_107_83982_20120704_125430_inLine +BABEL_BP_107_83982_20120704_131324_inLine +BABEL_BP_107_83982_20120704_131324_outLine +BABEL_BP_107_84171_20120504_185725_inLine +BABEL_BP_107_84335_20120418_002843_inLine +BABEL_BP_107_84532_20120703_171302_inLine +BABEL_BP_107_84540_20120328_205952_outLine +BABEL_BP_107_84543_20120503_005623_inLine +BABEL_BP_107_84543_20120503_005623_outLine +BABEL_BP_107_84943_20120405_134459_inLine +BABEL_BP_107_85083_20120425_024151_inLine +BABEL_BP_107_85354_20120704_145327_inLine +BABEL_BP_107_85354_20120704_145327_outLine +BABEL_BP_107_85573_20120208_152239_inLine +BABEL_BP_107_85686_20120627_180412_inLine +BABEL_BP_107_85686_20120627_180413_outLine +BABEL_BP_107_85716_20120330_201512_outLine +BABEL_BP_107_85716_20120330_202652_outLine +BABEL_BP_107_85819_20120705_030943_inLine +BABEL_BP_107_85819_20120705_030944_outLine +BABEL_BP_107_86016_20120417_225748_inLine +BABEL_BP_107_86029_20120212_235447_inLine +BABEL_BP_107_86419_20120209_010052_inLine +BABEL_BP_107_86419_20120209_010052_outLine +BABEL_BP_107_86890_20120322_202435_inLine +BABEL_BP_107_87167_20120211_230800_outLine +BABEL_BP_107_87481_20120513_191237_inLine +BABEL_BP_107_87481_20120513_191237_outLine +BABEL_BP_107_87539_20120418_225114_inLine +BABEL_BP_107_87539_20120418_225114_outLine +BABEL_BP_107_87671_20120218_011104_inLine +BABEL_BP_107_87857_20120325_000202_inLine +BABEL_BP_107_88243_20120322_210747_inLine +BABEL_BP_107_88243_20120322_210747_outLine +BABEL_BP_107_88253_20120511_165340_inLine +BABEL_BP_107_88253_20120511_165340_outLine +BABEL_BP_107_88294_20120322_163142_outLine +BABEL_BP_107_88506_20120503_191321_inLine +BABEL_BP_107_88506_20120503_191321_outLine +BABEL_BP_107_88532_20120416_012644_inLine +BABEL_BP_107_89619_20120217_174102_inLine +BABEL_BP_107_89619_20120217_174102_outLine +BABEL_BP_107_89644_20120501_170949_inLine +BABEL_BP_107_89644_20120501_170949_outLine +BABEL_BP_107_89657_20120610_213215_inLine +BABEL_BP_107_89657_20120610_213215_outLine +BABEL_BP_107_89674_20120212_162158_inLine +BABEL_BP_107_89674_20120212_162158_outLine +BABEL_BP_107_89965_20120505_003121_inLine +BABEL_BP_107_89965_20120505_003121_outLine +BABEL_BP_107_90313_20120325_200742_inLine +BABEL_BP_107_90393_20120417_220816_inLine +BABEL_BP_107_90393_20120417_220817_outLine +BABEL_BP_107_90577_20120118_141830_inLine +BABEL_BP_107_90577_20120118_141830_outLine +BABEL_BP_107_90609_20120216_194251_inLine +BABEL_BP_107_90764_20120418_004231_outLine +BABEL_BP_107_91000_20120529_151028_inLine +BABEL_BP_107_91002_20120429_192712_inLine +BABEL_BP_107_91002_20120429_192712_outLine +BABEL_BP_107_91007_20120612_144506_inLine +BABEL_BP_107_91040_20120618_152624_outLine +BABEL_BP_107_91136_20120427_122059_inLine +BABEL_BP_107_91401_20120213_010307_inLine +BABEL_BP_107_91401_20120213_010307_outLine +BABEL_BP_107_91406_20120429_193057_inLine +BABEL_BP_107_91406_20120429_193057_outLine +BABEL_BP_107_91409_20120520_225023_outLine +BABEL_BP_107_91409_20120520_231205_outLine +BABEL_BP_107_91660_20120510_181954_inLine +BABEL_BP_107_91660_20120510_181954_outLine +BABEL_BP_107_91660_20120510_182853_inLine +BABEL_BP_107_91660_20120510_182853_outLine +BABEL_BP_107_91660_20120510_184146_inLine +BABEL_BP_107_91660_20120510_184146_outLine +BABEL_BP_107_91723_20120323_144335_outLine +BABEL_BP_107_91865_20120429_214728_inLine +BABEL_BP_107_91865_20120429_214728_outLine +BABEL_BP_107_91905_20120504_210602_inLine +BABEL_BP_107_91905_20120504_210602_outLine +BABEL_BP_107_91975_20120703_173220_inLine +BABEL_BP_107_91975_20120703_173220_outLine +BABEL_BP_107_91979_20120209_000610_inLine +BABEL_BP_107_92002_20120418_214926_outLine +BABEL_BP_107_92407_20120210_183713_inLine +BABEL_BP_107_92407_20120210_183713_outLine +BABEL_BP_107_92436_20120213_013131_inLine +BABEL_BP_107_92436_20120213_013131_outLine +BABEL_BP_107_92591_20120505_140206_outLine +BABEL_BP_107_92602_20120216_214746_inLine +BABEL_BP_107_92602_20120216_215738_inLine +BABEL_BP_107_92603_20120416_011244_inLine +BABEL_BP_107_92603_20120416_011244_outLine +BABEL_BP_107_92628_20120323_014512_inLine +BABEL_BP_107_92628_20120323_014512_outLine +BABEL_BP_107_92643_20120608_122156_inLine +BABEL_BP_107_92643_20120608_123106_inLine +BABEL_BP_107_92735_20120413_181602_inLine +BABEL_BP_107_92789_20120416_165856_inLine +BABEL_BP_107_92800_20120412_013211_outLine +BABEL_BP_107_93044_20120607_140719_inLine +BABEL_BP_107_93044_20120607_140719_outLine +BABEL_BP_107_93509_20120321_230219_inLine +BABEL_BP_107_93509_20120321_230219_outLine +BABEL_BP_107_93804_20120703_232729_inLine +BABEL_BP_107_93804_20120703_233401_inLine +BABEL_BP_107_93974_20120627_184419_inLine +BABEL_BP_107_93974_20120627_184419_outLine +BABEL_BP_107_93979_20120422_134735_inLine +BABEL_BP_107_93979_20120422_134735_outLine +BABEL_BP_107_94149_20120405_220033_outLine +BABEL_BP_107_94162_20120425_235433_inLine +BABEL_BP_107_94223_20120215_204525_inLine +BABEL_BP_107_94514_20120417_001615_inLine +BABEL_BP_107_94514_20120417_001615_outLine +BABEL_BP_107_94514_20120417_003504_inLine +BABEL_BP_107_94514_20120417_003504_outLine +BABEL_BP_107_94541_20120705_024032_outLine +BABEL_BP_107_94542_20120512_223011_inLine +BABEL_BP_107_94542_20120512_223011_outLine +BABEL_BP_107_94694_20120508_120203_inLine +BABEL_BP_107_94694_20120508_120203_outLine +BABEL_BP_107_94696_20120608_185951_inLine +BABEL_BP_107_94696_20120608_185951_outLine +BABEL_BP_107_94814_20120501_130313_inLine +BABEL_BP_107_94814_20120501_130313_outLine +BABEL_BP_107_94989_20120627_120236_outLine +BABEL_BP_107_95121_20120628_123304_inLine +BABEL_BP_107_95423_20120415_201523_inLine +BABEL_BP_107_95423_20120415_201523_outLine +BABEL_BP_107_95533_20120505_005928_inLine +BABEL_BP_107_95533_20120505_005928_outLine +BABEL_BP_107_95542_20120502_223446_inLine +BABEL_BP_107_95542_20120502_223446_outLine +BABEL_BP_107_95566_20120505_162738_inLine +BABEL_BP_107_95572_20120406_151856_inLine +BABEL_BP_107_95572_20120406_151856_outLine +BABEL_BP_107_95589_20120419_162645_inLine +BABEL_BP_107_95589_20120419_162645_outLine +BABEL_BP_107_95815_20120322_160344_inLine +BABEL_BP_107_95815_20120322_160344_outLine +BABEL_BP_107_95996_20120324_230119_inLine +BABEL_BP_107_96302_20120510_023815_inLine +BABEL_BP_107_96302_20120510_023815_outLine +BABEL_BP_107_96667_20120426_182837_inLine +BABEL_BP_107_96667_20120426_182837_outLine +BABEL_BP_107_96959_20120505_014233_inLine +BABEL_BP_107_96959_20120505_014233_outLine +BABEL_BP_107_97260_20120324_012659_outLine +BABEL_BP_107_97318_20120608_183537_inLine +BABEL_BP_107_97318_20120608_183537_outLine +BABEL_BP_107_97629_20120420_202833_inLine +BABEL_BP_107_97629_20120420_202833_outLine +BABEL_BP_107_97946_20120411_213631_outLine +BABEL_BP_107_98086_20120609_185014_inLine +BABEL_BP_107_98086_20120609_185014_outLine +BABEL_BP_107_98099_20120618_120506_outLine +BABEL_BP_107_98219_20120512_202308_inLine +BABEL_BP_107_98219_20120512_202308_outLine +BABEL_BP_107_98219_20120512_203451_inLine +BABEL_BP_107_98219_20120512_203451_outLine +BABEL_BP_107_98402_20120421_162435_inLine +BABEL_BP_107_98402_20120421_162435_outLine +BABEL_BP_107_98640_20120425_213908_outLine +BABEL_BP_107_98675_20120419_225133_inLine +BABEL_BP_107_98675_20120419_225133_outLine +BABEL_BP_107_99414_20120430_200633_inLine +BABEL_BP_107_99414_20120430_200633_outLine +BABEL_BP_107_99567_20120405_154443_outLine +BABEL_BP_107_99571_20120322_165034_inLine +BABEL_BP_107_99571_20120322_165034_outLine +BABEL_BP_107_99694_20120322_165823_inLine +BABEL_BP_107_99694_20120322_165823_outLine +BABEL_BP_107_99731_20120618_005616_outLine +BABEL_BP_107_99764_20120415_202745_inLine +BABEL_BP_107_99823_20120511_002213_inLine +BABEL_BP_107_99823_20120511_002213_outLine +BABEL_BP_107_99929_20120612_143030_inLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/dev.list b/egs/babel/s5d/conf/lists/201-haitian/dev.list new file mode 100644 index 00000000000..208f92ee9cb --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/dev.list @@ -0,0 +1,126 @@ +BABEL_OP1_201_10019_20130527_022947_inLine +BABEL_OP1_201_10019_20130527_022947_outLine +BABEL_OP1_201_10319_20130306_021244_inLine +BABEL_OP1_201_10319_20130306_021244_outLine +BABEL_OP1_201_14440_20130302_012105_inLine +BABEL_OP1_201_14440_20130302_012105_outLine +BABEL_OP1_201_15324_20130228_031225_inLine +BABEL_OP1_201_15324_20130228_031225_outLine +BABEL_OP1_201_15535_20130305_062354_inLine +BABEL_OP1_201_15535_20130305_062354_outLine +BABEL_OP1_201_15638_20130305_060156_inLine +BABEL_OP1_201_15638_20130305_060156_outLine +BABEL_OP1_201_16184_20130305_081912_inLine +BABEL_OP1_201_16184_20130305_081912_outLine +BABEL_OP1_201_21029_20130529_114410_inLine +BABEL_OP1_201_21029_20130529_114410_outLine +BABEL_OP1_201_21029_20130529_115127_inLine +BABEL_OP1_201_21029_20130529_115127_outLine +BABEL_OP1_201_21109_20130414_085917_inLine +BABEL_OP1_201_21109_20130414_085917_outLine +BABEL_OP1_201_21393_20130501_071647_inLine +BABEL_OP1_201_21393_20130501_071647_outLine +BABEL_OP1_201_23151_20130428_054353_inLine +BABEL_OP1_201_23151_20130428_054353_outLine +BABEL_OP1_201_23983_20130503_023139_inLine +BABEL_OP1_201_23983_20130503_023139_outLine +BABEL_OP1_201_23983_20130503_023729_inLine +BABEL_OP1_201_23983_20130503_023729_outLine +BABEL_OP1_201_26074_20130522_003756_inLine +BABEL_OP1_201_26074_20130522_003756_outLine +BABEL_OP1_201_26206_20130302_073520_inLine +BABEL_OP1_201_26206_20130302_073520_outLine +BABEL_OP1_201_32832_20130430_060411_inLine +BABEL_OP1_201_32832_20130430_060411_outLine +BABEL_OP1_201_32861_20130429_111248_inLine +BABEL_OP1_201_32861_20130429_111248_outLine +BABEL_OP1_201_32998_20130531_000201_inLine +BABEL_OP1_201_32998_20130531_000201_outLine +BABEL_OP1_201_35583_20130429_033957_inLine +BABEL_OP1_201_35583_20130429_033957_outLine +BABEL_OP1_201_41400_20130430_094739_inLine +BABEL_OP1_201_41400_20130430_094739_outLine +BABEL_OP1_201_41609_20130404_034518_inLine +BABEL_OP1_201_41609_20130404_034518_outLine +BABEL_OP1_201_45843_20130227_092425_inLine +BABEL_OP1_201_45843_20130227_092425_outLine +BABEL_OP1_201_45843_20130227_095551_inLine +BABEL_OP1_201_45843_20130227_095551_outLine +BABEL_OP1_201_46315_20130302_045420_inLine +BABEL_OP1_201_46315_20130302_045420_outLine +BABEL_OP1_201_47877_20130429_092603_inLine +BABEL_OP1_201_47877_20130429_092603_outLine +BABEL_OP1_201_49197_20130529_061436_inLine +BABEL_OP1_201_49197_20130529_061436_outLine +BABEL_OP1_201_49287_20130227_083257_inLine +BABEL_OP1_201_49287_20130227_083257_outLine +BABEL_OP1_201_51858_20130224_055705_inLine +BABEL_OP1_201_51858_20130224_055705_outLine +BABEL_OP1_201_52025_20130226_082606_inLine +BABEL_OP1_201_52025_20130226_082606_outLine +BABEL_OP1_201_52694_20130518_050051_inLine +BABEL_OP1_201_52694_20130518_050051_outLine +BABEL_OP1_201_54162_20130508_044116_inLine +BABEL_OP1_201_54162_20130508_044116_outLine +BABEL_OP1_201_59898_20130223_041449_inLine +BABEL_OP1_201_59898_20130223_041449_outLine +BABEL_OP1_201_61011_20130228_062832_inLine +BABEL_OP1_201_61011_20130228_062832_outLine +BABEL_OP1_201_61357_20130602_030259_inLine +BABEL_OP1_201_61357_20130602_030259_outLine +BABEL_OP1_201_62456_20130521_040629_inLine +BABEL_OP1_201_62456_20130521_040629_outLine +BABEL_OP1_201_63757_20130531_014819_inLine +BABEL_OP1_201_63757_20130531_014819_outLine +BABEL_OP1_201_65252_20130503_025634_inLine +BABEL_OP1_201_65252_20130503_025634_outLine +BABEL_OP1_201_65640_20130429_103434_inLine +BABEL_OP1_201_65640_20130429_103434_outLine +BABEL_OP1_201_67085_20130503_043953_inLine +BABEL_OP1_201_67085_20130503_043953_outLine +BABEL_OP1_201_67842_20130528_081111_inLine +BABEL_OP1_201_67842_20130528_081111_outLine +BABEL_OP1_201_70110_20130224_022802_inLine +BABEL_OP1_201_70110_20130224_022802_outLine +BABEL_OP1_201_70716_20130503_015538_inLine +BABEL_OP1_201_70716_20130503_015538_outLine +BABEL_OP1_201_70986_20130307_075426_inLine +BABEL_OP1_201_70986_20130307_075426_outLine +BABEL_OP1_201_71263_20130602_021725_inLine +BABEL_OP1_201_71263_20130602_021725_outLine +BABEL_OP1_201_72654_20130510_063658_inLine +BABEL_OP1_201_72654_20130510_063658_outLine +BABEL_OP1_201_74226_20130303_125222_inLine +BABEL_OP1_201_74226_20130303_125222_outLine +BABEL_OP1_201_75223_20130221_024906_inLine +BABEL_OP1_201_75223_20130221_024906_outLine +BABEL_OP1_201_77112_20130528_050544_inLine +BABEL_OP1_201_77112_20130528_050544_outLine +BABEL_OP1_201_78194_20121206_064008_inLine +BABEL_OP1_201_78194_20121206_064008_outLine +BABEL_OP1_201_78360_20130430_101414_inLine +BABEL_OP1_201_78360_20130430_101414_outLine +BABEL_OP1_201_78454_20130531_032436_inLine +BABEL_OP1_201_78454_20130531_032436_outLine +BABEL_OP1_201_79571_20130302_074959_inLine +BABEL_OP1_201_79571_20130302_074959_outLine +BABEL_OP1_201_80881_20130220_022131_inLine +BABEL_OP1_201_80881_20130220_022131_outLine +BABEL_OP1_201_81553_20130430_095301_inLine +BABEL_OP1_201_81553_20130430_095301_outLine +BABEL_OP1_201_82035_20130601_052036_inLine +BABEL_OP1_201_82035_20130601_052036_outLine +BABEL_OP1_201_84125_20130227_022410_inLine +BABEL_OP1_201_84125_20130227_022410_outLine +BABEL_OP1_201_85439_20130503_071053_inLine +BABEL_OP1_201_85439_20130503_071053_outLine +BABEL_OP1_201_88982_20130512_060722_inLine +BABEL_OP1_201_88982_20130512_060722_outLine +BABEL_OP1_201_96584_20130427_001740_inLine +BABEL_OP1_201_96584_20130427_001740_outLine +BABEL_OP1_201_96842_20130503_081834_inLine +BABEL_OP1_201_96842_20130503_081834_outLine +BABEL_OP1_201_96985_20130313_031020_inLine +BABEL_OP1_201_96985_20130313_031020_outLine +BABEL_OP1_201_99813_20130514_080612_inLine +BABEL_OP1_201_99813_20130514_080612_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/eval.list b/egs/babel/s5d/conf/lists/201-haitian/eval.list new file mode 100644 index 00000000000..d9a4445b43d --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/eval.list @@ -0,0 +1,194 @@ +BABEL_OP1_201_10188_20121207_034031_inLine +BABEL_OP1_201_10188_20121207_034031_outLine +BABEL_OP1_201_11581_20130524_035647_inLine +BABEL_OP1_201_11581_20130524_035647_outLine +BABEL_OP1_201_13427_20130517_044959_inLine +BABEL_OP1_201_13427_20130517_044959_outLine +BABEL_OP1_201_14228_20130312_063112_inLine +BABEL_OP1_201_14228_20130312_063112_outLine +BABEL_OP1_201_14537_20130604_084139_inLine +BABEL_OP1_201_14537_20130604_084139_outLine +BABEL_OP1_201_15926_20130302_065808_inLine +BABEL_OP1_201_15926_20130302_065808_outLine +BABEL_OP1_201_16056_20130328_050018_inLine +BABEL_OP1_201_16056_20130328_050018_outLine +BABEL_OP1_201_17165_20130509_020154_inLine +BABEL_OP1_201_17165_20130509_020154_outLine +BABEL_OP1_201_18242_20130603_023106_inLine +BABEL_OP1_201_18242_20130603_023106_outLine +BABEL_OP1_201_19101_20130521_032103_inLine +BABEL_OP1_201_19101_20130521_032103_outLine +BABEL_OP1_201_19545_20130517_060948_inLine +BABEL_OP1_201_19545_20130517_060948_outLine +BABEL_OP1_201_19621_20130517_031837_inLine +BABEL_OP1_201_19621_20130517_031837_outLine +BABEL_OP1_201_19672_20130301_110157_inLine +BABEL_OP1_201_19672_20130301_110157_outLine +BABEL_OP1_201_22641_20130222_024500_inLine +BABEL_OP1_201_22641_20130222_024500_outLine +BABEL_OP1_201_23260_20130502_085418_inLine +BABEL_OP1_201_23260_20130502_085418_outLine +BABEL_OP1_201_23395_20130521_052906_inLine +BABEL_OP1_201_23395_20130521_052906_outLine +BABEL_OP1_201_23628_20130528_052627_inLine +BABEL_OP1_201_23628_20130528_052627_outLine +BABEL_OP1_201_23731_20130517_014107_inLine +BABEL_OP1_201_23731_20130517_014107_outLine +BABEL_OP1_201_25412_20130531_050830_inLine +BABEL_OP1_201_25412_20130531_050830_outLine +BABEL_OP1_201_31484_20130304_060634_inLine +BABEL_OP1_201_31484_20130304_060634_outLine +BABEL_OP1_201_31583_20130630_090026_inLine +BABEL_OP1_201_31583_20130630_090026_outLine +BABEL_OP1_201_34019_20130224_123823_inLine +BABEL_OP1_201_34019_20130224_123823_outLine +BABEL_OP1_201_34688_20130226_033106_inLine +BABEL_OP1_201_34688_20130226_033106_outLine +BABEL_OP1_201_35202_20130228_143257_inLine +BABEL_OP1_201_35202_20130228_143257_outLine +BABEL_OP1_201_35202_20130228_144257_inLine +BABEL_OP1_201_35202_20130228_144257_outLine +BABEL_OP1_201_37064_20130528_095008_inLine +BABEL_OP1_201_37064_20130528_095008_outLine +BABEL_OP1_201_37290_20130602_070243_inLine +BABEL_OP1_201_37290_20130602_070243_outLine +BABEL_OP1_201_39159_20130226_043216_inLine +BABEL_OP1_201_39159_20130226_043216_outLine +BABEL_OP1_201_39744_20130226_025333_inLine +BABEL_OP1_201_39744_20130226_025333_outLine +BABEL_OP1_201_41038_20130301_095640_inLine +BABEL_OP1_201_41038_20130301_095640_outLine +BABEL_OP1_201_41745_20130530_021647_inLine +BABEL_OP1_201_41745_20130530_021647_outLine +BABEL_OP1_201_43285_20130303_112216_inLine +BABEL_OP1_201_43285_20130303_112216_outLine +BABEL_OP1_201_44255_20130427_232421_inLine +BABEL_OP1_201_44255_20130427_232421_outLine +BABEL_OP1_201_44255_20130427_233501_inLine +BABEL_OP1_201_44255_20130427_233501_outLine +BABEL_OP1_201_44847_20130228_021744_inLine +BABEL_OP1_201_44847_20130228_021744_outLine +BABEL_OP1_201_44847_20130301_014421_inLine +BABEL_OP1_201_44847_20130301_014421_outLine +BABEL_OP1_201_44868_20130301_094502_inLine +BABEL_OP1_201_44868_20130301_094502_outLine +BABEL_OP1_201_44868_20130301_095004_inLine +BABEL_OP1_201_44868_20130301_095004_outLine +BABEL_OP1_201_45106_20130228_110111_inLine +BABEL_OP1_201_45106_20130228_110111_outLine +BABEL_OP1_201_46202_20130301_041831_inLine +BABEL_OP1_201_46202_20130301_041831_outLine +BABEL_OP1_201_46712_20130527_095034_inLine +BABEL_OP1_201_46712_20130527_095034_outLine +BABEL_OP1_201_46974_20130305_032251_inLine +BABEL_OP1_201_46974_20130305_032251_outLine +BABEL_OP1_201_49775_20130312_061709_inLine +BABEL_OP1_201_49775_20130312_061709_outLine +BABEL_OP1_201_52222_20130221_115458_inLine +BABEL_OP1_201_52222_20130221_115458_outLine +BABEL_OP1_201_52442_20130511_033818_inLine +BABEL_OP1_201_52442_20130511_033818_outLine +BABEL_OP1_201_54405_20130512_043326_inLine +BABEL_OP1_201_54405_20130512_043326_outLine +BABEL_OP1_201_56523_20130530_035306_inLine +BABEL_OP1_201_56523_20130530_035306_outLine +BABEL_OP1_201_56720_20130305_084355_inLine +BABEL_OP1_201_56720_20130305_084355_outLine +BABEL_OP1_201_57609_20130519_003542_inLine +BABEL_OP1_201_57609_20130519_003542_outLine +BABEL_OP1_201_57922_20130601_024619_inLine +BABEL_OP1_201_57922_20130601_024619_outLine +BABEL_OP1_201_57935_20130522_034918_inLine +BABEL_OP1_201_57935_20130522_034918_outLine +BABEL_OP1_201_59645_20130510_022401_inLine +BABEL_OP1_201_59645_20130510_022401_outLine +BABEL_OP1_201_60352_20130301_071549_inLine +BABEL_OP1_201_60352_20130301_071549_outLine +BABEL_OP1_201_60352_20130301_072624_inLine +BABEL_OP1_201_60352_20130301_072624_outLine +BABEL_OP1_201_60508_20130221_023139_inLine +BABEL_OP1_201_60508_20130221_023139_outLine +BABEL_OP1_201_61440_20130602_061805_inLine +BABEL_OP1_201_61440_20130602_061805_outLine +BABEL_OP1_201_61963_20130430_084852_inLine +BABEL_OP1_201_61963_20130430_084852_outLine +BABEL_OP1_201_62155_20130507_055437_inLine +BABEL_OP1_201_62155_20130507_055437_outLine +BABEL_OP1_201_63309_20130214_111801_inLine +BABEL_OP1_201_63309_20130214_111801_outLine +BABEL_OP1_201_63481_20130306_031400_inLine +BABEL_OP1_201_63481_20130306_031400_outLine +BABEL_OP1_201_63511_20130704_101544_inLine +BABEL_OP1_201_63511_20130704_101544_outLine +BABEL_OP1_201_64638_20130228_015923_inLine +BABEL_OP1_201_64638_20130228_015923_outLine +BABEL_OP1_201_64870_20130521_011614_inLine +BABEL_OP1_201_64870_20130521_011614_outLine +BABEL_OP1_201_66967_20130223_042440_inLine +BABEL_OP1_201_66967_20130223_042440_outLine +BABEL_OP1_201_67552_20130302_031450_inLine +BABEL_OP1_201_67552_20130302_031450_outLine +BABEL_OP1_201_67592_20130413_085928_inLine +BABEL_OP1_201_67592_20130413_085928_outLine +BABEL_OP1_201_67794_20130528_054900_inLine +BABEL_OP1_201_67794_20130528_054900_outLine +BABEL_OP1_201_67794_20130528_060329_inLine +BABEL_OP1_201_67794_20130528_060329_outLine +BABEL_OP1_201_68059_20130514_015440_inLine +BABEL_OP1_201_68059_20130514_015440_outLine +BABEL_OP1_201_69633_20130302_015041_inLine +BABEL_OP1_201_69633_20130302_015041_outLine +BABEL_OP1_201_73757_20130510_051523_inLine +BABEL_OP1_201_73757_20130510_051523_outLine +BABEL_OP1_201_75359_20130502_024157_inLine +BABEL_OP1_201_75359_20130502_024157_outLine +BABEL_OP1_201_76773_20130529_015651_inLine +BABEL_OP1_201_76773_20130529_015651_outLine +BABEL_OP1_201_77139_20130221_080959_inLine +BABEL_OP1_201_77139_20130221_080959_outLine +BABEL_OP1_201_77391_20130529_083139_inLine +BABEL_OP1_201_77391_20130529_083139_outLine +BABEL_OP1_201_77567_20130305_071815_inLine +BABEL_OP1_201_77567_20130305_071815_outLine +BABEL_OP1_201_78630_20130604_103056_inLine +BABEL_OP1_201_78630_20130604_103056_outLine +BABEL_OP1_201_80897_20130602_013830_inLine +BABEL_OP1_201_80897_20130602_013830_outLine +BABEL_OP1_201_81229_20130529_053302_inLine +BABEL_OP1_201_81229_20130529_053302_outLine +BABEL_OP1_201_81404_20130528_042634_inLine +BABEL_OP1_201_81404_20130528_042634_outLine +BABEL_OP1_201_82030_20130704_095440_inLine +BABEL_OP1_201_82030_20130704_095440_outLine +BABEL_OP1_201_82030_20130704_100506_inLine +BABEL_OP1_201_82030_20130704_100506_outLine +BABEL_OP1_201_83366_20130228_065600_inLine +BABEL_OP1_201_83366_20130228_065600_outLine +BABEL_OP1_201_83783_20130524_015629_inLine +BABEL_OP1_201_83783_20130524_015629_outLine +BABEL_OP1_201_84327_20130305_092405_inLine +BABEL_OP1_201_84327_20130305_092405_outLine +BABEL_OP1_201_84583_20130518_020910_inLine +BABEL_OP1_201_84583_20130518_020910_outLine +BABEL_OP1_201_86748_20130428_024819_inLine +BABEL_OP1_201_86748_20130428_024819_outLine +BABEL_OP1_201_89045_20130307_055651_inLine +BABEL_OP1_201_89045_20130307_055651_outLine +BABEL_OP1_201_91930_20130429_004949_inLine +BABEL_OP1_201_91930_20130429_004949_outLine +BABEL_OP1_201_91930_20130429_005907_inLine +BABEL_OP1_201_91930_20130429_005907_outLine +BABEL_OP1_201_92060_20130502_110221_inLine +BABEL_OP1_201_92060_20130502_110221_outLine +BABEL_OP1_201_92698_20130510_005433_inLine +BABEL_OP1_201_92698_20130510_005433_outLine +BABEL_OP1_201_93861_20130512_005008_inLine +BABEL_OP1_201_93861_20130512_005008_outLine +BABEL_OP1_201_94141_20130430_122007_inLine +BABEL_OP1_201_94141_20130430_122007_outLine +BABEL_OP1_201_94166_20130429_044116_inLine +BABEL_OP1_201_94166_20130429_044116_outLine +BABEL_OP1_201_94587_20130305_100125_inLine +BABEL_OP1_201_94587_20130305_100125_outLine +BABEL_OP1_201_94745_20130301_131752_inLine +BABEL_OP1_201_94745_20130301_131752_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/evalpart1.list b/egs/babel/s5d/conf/lists/201-haitian/evalpart1.list new file mode 100644 index 00000000000..0b771a04457 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/evalpart1.list @@ -0,0 +1,64 @@ +BABEL_OP1_201_15926_20130302_065808_inLine +BABEL_OP1_201_15926_20130302_065808_outLine +BABEL_OP1_201_19545_20130517_060948_inLine +BABEL_OP1_201_19545_20130517_060948_outLine +BABEL_OP1_201_23395_20130521_052906_inLine +BABEL_OP1_201_23395_20130521_052906_outLine +BABEL_OP1_201_23628_20130528_052627_inLine +BABEL_OP1_201_23628_20130528_052627_outLine +BABEL_OP1_201_23731_20130517_014107_inLine +BABEL_OP1_201_23731_20130517_014107_outLine +BABEL_OP1_201_31583_20130630_090026_inLine +BABEL_OP1_201_31583_20130630_090026_outLine +BABEL_OP1_201_39159_20130226_043216_inLine +BABEL_OP1_201_39159_20130226_043216_outLine +BABEL_OP1_201_41745_20130530_021647_inLine +BABEL_OP1_201_41745_20130530_021647_outLine +BABEL_OP1_201_44255_20130427_232421_inLine +BABEL_OP1_201_44255_20130427_232421_outLine +BABEL_OP1_201_44255_20130427_233501_inLine +BABEL_OP1_201_44255_20130427_233501_outLine +BABEL_OP1_201_44868_20130301_094502_inLine +BABEL_OP1_201_44868_20130301_094502_outLine +BABEL_OP1_201_44868_20130301_095004_inLine +BABEL_OP1_201_44868_20130301_095004_outLine +BABEL_OP1_201_45106_20130228_110111_inLine +BABEL_OP1_201_45106_20130228_110111_outLine +BABEL_OP1_201_46712_20130527_095034_inLine +BABEL_OP1_201_46712_20130527_095034_outLine +BABEL_OP1_201_49775_20130312_061709_inLine +BABEL_OP1_201_49775_20130312_061709_outLine +BABEL_OP1_201_57922_20130601_024619_inLine +BABEL_OP1_201_57922_20130601_024619_outLine +BABEL_OP1_201_60508_20130221_023139_inLine +BABEL_OP1_201_60508_20130221_023139_outLine +BABEL_OP1_201_62155_20130507_055437_inLine +BABEL_OP1_201_62155_20130507_055437_outLine +BABEL_OP1_201_63481_20130306_031400_inLine +BABEL_OP1_201_63481_20130306_031400_outLine +BABEL_OP1_201_63511_20130704_101544_inLine +BABEL_OP1_201_63511_20130704_101544_outLine +BABEL_OP1_201_64638_20130228_015923_inLine +BABEL_OP1_201_64638_20130228_015923_outLine +BABEL_OP1_201_75359_20130502_024157_inLine +BABEL_OP1_201_75359_20130502_024157_outLine +BABEL_OP1_201_76773_20130529_015651_inLine +BABEL_OP1_201_76773_20130529_015651_outLine +BABEL_OP1_201_77139_20130221_080959_inLine +BABEL_OP1_201_77139_20130221_080959_outLine +BABEL_OP1_201_77567_20130305_071815_inLine +BABEL_OP1_201_77567_20130305_071815_outLine +BABEL_OP1_201_78630_20130604_103056_inLine +BABEL_OP1_201_78630_20130604_103056_outLine +BABEL_OP1_201_80897_20130602_013830_inLine +BABEL_OP1_201_80897_20130602_013830_outLine +BABEL_OP1_201_81229_20130529_053302_inLine +BABEL_OP1_201_81229_20130529_053302_outLine +BABEL_OP1_201_83366_20130228_065600_inLine +BABEL_OP1_201_83366_20130228_065600_outLine +BABEL_OP1_201_83783_20130524_015629_inLine +BABEL_OP1_201_83783_20130524_015629_outLine +BABEL_OP1_201_86748_20130428_024819_inLine +BABEL_OP1_201_86748_20130428_024819_outLine +BABEL_OP1_201_94141_20130430_122007_inLine +BABEL_OP1_201_94141_20130430_122007_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.FullLP.list b/egs/babel/s5d/conf/lists/201-haitian/train.FullLP.list new file mode 100644 index 00000000000..751c6ca4652 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.FullLP.list @@ -0,0 +1,760 @@ +BABEL_OP1_201_10002_20130212_152853_inLine +BABEL_OP1_201_10002_20130212_152853_outLine +BABEL_OP1_201_10036_20130528_005502_inLine +BABEL_OP1_201_10036_20130528_005502_outLine +BABEL_OP1_201_10482_20130305_105317_inLine +BABEL_OP1_201_10482_20130305_105317_outLine +BABEL_OP1_201_10647_20130428_045536_inLine +BABEL_OP1_201_10647_20130428_045536_outLine +BABEL_OP1_201_10901_20130529_031421_inLine +BABEL_OP1_201_10901_20130529_031421_outLine +BABEL_OP1_201_11096_20130603_043221_inLine +BABEL_OP1_201_11096_20130603_043221_outLine +BABEL_OP1_201_11663_20130601_002903_inLine +BABEL_OP1_201_11663_20130601_002903_outLine +BABEL_OP1_201_11673_20130226_015822_inLine +BABEL_OP1_201_11673_20130226_015822_outLine +BABEL_OP1_201_11797_20130328_033102_inLine +BABEL_OP1_201_11797_20130328_033102_outLine +BABEL_OP1_201_12220_20130528_051622_inLine +BABEL_OP1_201_12220_20130528_051622_outLine +BABEL_OP1_201_12242_20130603_033446_inLine +BABEL_OP1_201_12242_20130603_033446_outLine +BABEL_OP1_201_12606_20130429_120351_inLine +BABEL_OP1_201_12606_20130429_120351_outLine +BABEL_OP1_201_12606_20130429_121040_inLine +BABEL_OP1_201_12606_20130429_121040_outLine +BABEL_OP1_201_12635_20130429_040127_inLine +BABEL_OP1_201_12635_20130429_040127_outLine +BABEL_OP1_201_12767_20130509_005500_inLine +BABEL_OP1_201_12767_20130509_005500_outLine +BABEL_OP1_201_13178_20130301_043649_inLine +BABEL_OP1_201_13178_20130301_043649_outLine +BABEL_OP1_201_13324_20130529_035029_inLine +BABEL_OP1_201_13324_20130529_035029_outLine +BABEL_OP1_201_13483_20130306_062423_inLine +BABEL_OP1_201_13483_20130306_062423_outLine +BABEL_OP1_201_13490_20130508_033252_inLine +BABEL_OP1_201_13490_20130508_033252_outLine +BABEL_OP1_201_13664_20130117_073343_inLine +BABEL_OP1_201_13664_20130117_073343_outLine +BABEL_OP1_201_14179_20130303_111502_inLine +BABEL_OP1_201_14179_20130303_111502_outLine +BABEL_OP1_201_14229_20130528_023254_inLine +BABEL_OP1_201_14229_20130528_023254_outLine +BABEL_OP1_201_14539_20130501_223201_inLine +BABEL_OP1_201_14539_20130501_223201_outLine +BABEL_OP1_201_14560_20130301_065543_inLine +BABEL_OP1_201_14560_20130301_065543_outLine +BABEL_OP1_201_14807_20130522_012156_inLine +BABEL_OP1_201_14807_20130522_012156_outLine +BABEL_OP1_201_14899_20130301_035636_inLine +BABEL_OP1_201_14899_20130301_035636_outLine +BABEL_OP1_201_14972_20130518_025852_inLine +BABEL_OP1_201_14972_20130518_025852_outLine +BABEL_OP1_201_15216_20130503_005405_inLine +BABEL_OP1_201_15216_20130503_005405_outLine +BABEL_OP1_201_15322_20130701_030436_inLine +BABEL_OP1_201_15322_20130701_030436_outLine +BABEL_OP1_201_15382_20130228_050819_inLine +BABEL_OP1_201_15382_20130228_050819_outLine +BABEL_OP1_201_15702_20130301_041117_inLine +BABEL_OP1_201_15702_20130301_041117_outLine +BABEL_OP1_201_15730_20130305_034450_inLine +BABEL_OP1_201_15730_20130305_034450_outLine +BABEL_OP1_201_15848_20130130_070404_inLine +BABEL_OP1_201_15848_20130130_070404_outLine +BABEL_OP1_201_15902_20130323_005824_inLine +BABEL_OP1_201_15902_20130323_005824_outLine +BABEL_OP1_201_16149_20130322_021647_inLine +BABEL_OP1_201_16149_20130322_021647_outLine +BABEL_OP1_201_16467_20130704_025921_inLine +BABEL_OP1_201_16467_20130704_025921_outLine +BABEL_OP1_201_16800_20130702_085158_inLine +BABEL_OP1_201_16800_20130702_085158_outLine +BABEL_OP1_201_16924_20130301_032937_inLine +BABEL_OP1_201_16924_20130301_032937_outLine +BABEL_OP1_201_16938_20130514_072820_inLine +BABEL_OP1_201_16938_20130514_072820_outLine +BABEL_OP1_201_17032_20130306_103506_inLine +BABEL_OP1_201_17032_20130306_103506_outLine +BABEL_OP1_201_17113_20130519_093427_inLine +BABEL_OP1_201_17113_20130519_093427_outLine +BABEL_OP1_201_17472_20130311_075957_inLine +BABEL_OP1_201_17472_20130311_075957_outLine +BABEL_OP1_201_17496_20130301_030157_inLine +BABEL_OP1_201_17496_20130301_030157_outLine +BABEL_OP1_201_17520_20130518_012147_inLine +BABEL_OP1_201_17520_20130518_012147_outLine +BABEL_OP1_201_17567_20130512_065938_inLine +BABEL_OP1_201_17567_20130512_065938_outLine +BABEL_OP1_201_17881_20130429_230318_inLine +BABEL_OP1_201_17881_20130429_230318_outLine +BABEL_OP1_201_17923_20130529_021211_inLine +BABEL_OP1_201_17923_20130529_021211_outLine +BABEL_OP1_201_18118_20130501_084131_inLine +BABEL_OP1_201_18118_20130501_084131_outLine +BABEL_OP1_201_18766_20130502_102418_inLine +BABEL_OP1_201_18766_20130502_102418_outLine +BABEL_OP1_201_19134_20130601_040621_inLine +BABEL_OP1_201_19134_20130601_040621_outLine +BABEL_OP1_201_19589_20130502_093932_inLine +BABEL_OP1_201_19589_20130502_093932_outLine +BABEL_OP1_201_19722_20130425_005348_inLine +BABEL_OP1_201_19722_20130425_005348_outLine +BABEL_OP1_201_19749_20130429_090621_inLine +BABEL_OP1_201_19749_20130429_090621_outLine +BABEL_OP1_201_19767_20130502_130900_inLine +BABEL_OP1_201_19767_20130502_130900_outLine +BABEL_OP1_201_19877_20130502_085421_inLine +BABEL_OP1_201_19877_20130502_085421_outLine +BABEL_OP1_201_20330_20130429_035418_inLine +BABEL_OP1_201_20330_20130429_035418_outLine +BABEL_OP1_201_20437_20130216_094002_inLine +BABEL_OP1_201_20437_20130216_094002_outLine +BABEL_OP1_201_20768_20130701_035344_inLine +BABEL_OP1_201_20768_20130701_035344_outLine +BABEL_OP1_201_20800_20130529_035944_inLine +BABEL_OP1_201_20800_20130529_035944_outLine +BABEL_OP1_201_20972_20130603_035417_inLine +BABEL_OP1_201_20972_20130603_035417_outLine +BABEL_OP1_201_21244_20130602_073304_inLine +BABEL_OP1_201_21244_20130602_073304_outLine +BABEL_OP1_201_21807_20130522_042858_inLine +BABEL_OP1_201_21807_20130522_042858_outLine +BABEL_OP1_201_21892_20130430_033520_inLine +BABEL_OP1_201_21892_20130430_033520_outLine +BABEL_OP1_201_22466_20121206_070403_inLine +BABEL_OP1_201_22466_20121206_070403_outLine +BABEL_OP1_201_22494_20130305_052405_inLine +BABEL_OP1_201_22494_20130305_052405_outLine +BABEL_OP1_201_22624_20130305_121723_inLine +BABEL_OP1_201_22624_20130305_121723_outLine +BABEL_OP1_201_23046_20130527_110737_inLine +BABEL_OP1_201_23046_20130527_110737_outLine +BABEL_OP1_201_23119_20130321_054320_inLine +BABEL_OP1_201_23119_20130321_054320_outLine +BABEL_OP1_201_23190_20130603_224243_inLine +BABEL_OP1_201_23190_20130603_224243_outLine +BABEL_OP1_201_23195_20130227_050013_inLine +BABEL_OP1_201_23195_20130227_050013_outLine +BABEL_OP1_201_23239_20130305_093734_inLine +BABEL_OP1_201_23239_20130305_093734_outLine +BABEL_OP1_201_23893_20130430_080021_inLine +BABEL_OP1_201_23893_20130430_080021_outLine +BABEL_OP1_201_24231_20130502_123747_inLine +BABEL_OP1_201_24231_20130502_123747_outLine +BABEL_OP1_201_24239_20130703_230221_inLine +BABEL_OP1_201_24239_20130703_230221_outLine +BABEL_OP1_201_24270_20130530_020630_inLine +BABEL_OP1_201_24270_20130530_020630_outLine +BABEL_OP1_201_24290_20130703_074550_inLine +BABEL_OP1_201_24290_20130703_074550_outLine +BABEL_OP1_201_24470_20130531_024204_inLine +BABEL_OP1_201_24470_20130531_024204_outLine +BABEL_OP1_201_24501_20130429_102945_inLine +BABEL_OP1_201_24501_20130429_102945_outLine +BABEL_OP1_201_24532_20130307_060030_inLine +BABEL_OP1_201_24532_20130307_060030_outLine +BABEL_OP1_201_24586_20130430_025349_inLine +BABEL_OP1_201_24586_20130430_032300_inLine +BABEL_OP1_201_24586_20130430_032300_outLine +BABEL_OP1_201_24586_20130430_033306_inLine +BABEL_OP1_201_24586_20130430_033306_outLine +BABEL_OP1_201_24589_20130529_111014_inLine +BABEL_OP1_201_24589_20130529_111014_outLine +BABEL_OP1_201_24679_20130222_072407_inLine +BABEL_OP1_201_24679_20130222_072407_outLine +BABEL_OP1_201_24982_20130529_044009_inLine +BABEL_OP1_201_24982_20130529_044009_outLine +BABEL_OP1_201_25015_20130501_223825_inLine +BABEL_OP1_201_25015_20130501_223825_outLine +BABEL_OP1_201_25961_20130223_033405_inLine +BABEL_OP1_201_25961_20130223_033405_outLine +BABEL_OP1_201_26072_20130429_011940_inLine +BABEL_OP1_201_26072_20130429_011940_outLine +BABEL_OP1_201_26388_20130528_030259_inLine +BABEL_OP1_201_26388_20130528_030259_outLine +BABEL_OP1_201_26836_20130528_100100_inLine +BABEL_OP1_201_26836_20130528_100100_outLine +BABEL_OP1_201_26836_20130528_101331_inLine +BABEL_OP1_201_26836_20130528_101331_outLine +BABEL_OP1_201_26999_20130228_090136_inLine +BABEL_OP1_201_26999_20130228_090136_outLine +BABEL_OP1_201_27042_20130701_075011_inLine +BABEL_OP1_201_27042_20130701_075011_outLine +BABEL_OP1_201_27203_20130602_005950_inLine +BABEL_OP1_201_27203_20130602_005950_outLine +BABEL_OP1_201_27590_20130304_072243_inLine +BABEL_OP1_201_27590_20130304_072243_outLine +BABEL_OP1_201_28419_20130528_035005_inLine +BABEL_OP1_201_28419_20130528_035005_outLine +BABEL_OP1_201_28522_20130303_104614_inLine +BABEL_OP1_201_28522_20130303_104614_outLine +BABEL_OP1_201_28600_20130701_051100_inLine +BABEL_OP1_201_28600_20130701_051100_outLine +BABEL_OP1_201_28606_20130305_101646_inLine +BABEL_OP1_201_28606_20130305_101646_outLine +BABEL_OP1_201_28775_20130529_005204_inLine +BABEL_OP1_201_28775_20130529_005204_outLine +BABEL_OP1_201_28814_20130704_000405_inLine +BABEL_OP1_201_28814_20130704_000405_outLine +BABEL_OP1_201_28871_20121207_015933_inLine +BABEL_OP1_201_28871_20121207_015933_outLine +BABEL_OP1_201_28945_20130528_094913_inLine +BABEL_OP1_201_28945_20130528_094913_outLine +BABEL_OP1_201_29023_20130530_024701_inLine +BABEL_OP1_201_29023_20130530_024701_outLine +BABEL_OP1_201_29072_20130304_052508_inLine +BABEL_OP1_201_29072_20130304_052508_outLine +BABEL_OP1_201_29168_20130222_015942_inLine +BABEL_OP1_201_29168_20130222_015942_outLine +BABEL_OP1_201_30180_20130528_033242_inLine +BABEL_OP1_201_30180_20130528_033242_outLine +BABEL_OP1_201_30395_20130529_034626_inLine +BABEL_OP1_201_30395_20130529_034626_outLine +BABEL_OP1_201_30432_20130227_084229_inLine +BABEL_OP1_201_30432_20130227_084229_outLine +BABEL_OP1_201_30576_20130527_002801_inLine +BABEL_OP1_201_30576_20130527_002801_outLine +BABEL_OP1_201_31109_20130510_030741_inLine +BABEL_OP1_201_31109_20130510_030741_outLine +BABEL_OP1_201_31628_20130301_081256_inLine +BABEL_OP1_201_31628_20130301_081256_outLine +BABEL_OP1_201_32097_20130130_021717_inLine +BABEL_OP1_201_32097_20130130_021717_outLine +BABEL_OP1_201_32122_20130529_070011_inLine +BABEL_OP1_201_32122_20130529_070011_outLine +BABEL_OP1_201_32171_20130220_084632_inLine +BABEL_OP1_201_32171_20130220_084632_outLine +BABEL_OP1_201_32708_20130528_093343_inLine +BABEL_OP1_201_32708_20130528_093343_outLine +BABEL_OP1_201_33229_20130429_025144_inLine +BABEL_OP1_201_33229_20130429_025144_outLine +BABEL_OP1_201_33659_20130214_000335_inLine +BABEL_OP1_201_33659_20130214_000335_outLine +BABEL_OP1_201_33806_20130630_224040_inLine +BABEL_OP1_201_33806_20130630_224040_outLine +BABEL_OP1_201_34106_20130305_032650_inLine +BABEL_OP1_201_34106_20130305_032650_outLine +BABEL_OP1_201_34145_20130301_033324_inLine +BABEL_OP1_201_34145_20130301_033324_outLine +BABEL_OP1_201_34197_20130227_065321_inLine +BABEL_OP1_201_34197_20130227_065321_outLine +BABEL_OP1_201_34336_20130527_071806_inLine +BABEL_OP1_201_34336_20130527_071806_outLine +BABEL_OP1_201_34679_20130529_040931_inLine +BABEL_OP1_201_34679_20130529_040931_outLine +BABEL_OP1_201_34826_20130430_025628_inLine +BABEL_OP1_201_34826_20130430_025628_outLine +BABEL_OP1_201_34903_20130302_052444_inLine +BABEL_OP1_201_34903_20130302_052444_outLine +BABEL_OP1_201_35000_20130702_092721_inLine +BABEL_OP1_201_35000_20130702_092721_outLine +BABEL_OP1_201_35008_20130305_114402_inLine +BABEL_OP1_201_35008_20130305_114402_outLine +BABEL_OP1_201_35467_20130321_032230_inLine +BABEL_OP1_201_35467_20130321_032230_outLine +BABEL_OP1_201_36219_20130528_021139_inLine +BABEL_OP1_201_36219_20130528_021139_outLine +BABEL_OP1_201_36341_20130226_074136_inLine +BABEL_OP1_201_36341_20130226_074136_outLine +BABEL_OP1_201_36894_20130221_070614_inLine +BABEL_OP1_201_36894_20130221_070614_outLine +BABEL_OP1_201_37271_20130430_025526_inLine +BABEL_OP1_201_37271_20130430_025526_outLine +BABEL_OP1_201_37598_20130601_032226_inLine +BABEL_OP1_201_37598_20130601_032226_outLine +BABEL_OP1_201_38076_20130302_132339_inLine +BABEL_OP1_201_38076_20130302_132339_outLine +BABEL_OP1_201_38878_20130228_041057_inLine +BABEL_OP1_201_38878_20130228_041057_outLine +BABEL_OP1_201_39426_20130429_085957_inLine +BABEL_OP1_201_39426_20130429_085957_outLine +BABEL_OP1_201_39638_20130126_082343_inLine +BABEL_OP1_201_39638_20130126_082343_outLine +BABEL_OP1_201_40713_20130530_005109_inLine +BABEL_OP1_201_40713_20130530_005109_outLine +BABEL_OP1_201_41097_20130228_063046_inLine +BABEL_OP1_201_41097_20130228_063046_outLine +BABEL_OP1_201_41334_20130630_085009_inLine +BABEL_OP1_201_41334_20130630_085009_outLine +BABEL_OP1_201_41469_20130303_034949_inLine +BABEL_OP1_201_41469_20130303_034949_outLine +BABEL_OP1_201_41542_20130429_084921_inLine +BABEL_OP1_201_41542_20130429_084921_outLine +BABEL_OP1_201_41618_20130518_035113_inLine +BABEL_OP1_201_41618_20130518_035113_outLine +BABEL_OP1_201_41685_20130214_090836_inLine +BABEL_OP1_201_41685_20130214_090836_outLine +BABEL_OP1_201_41720_20130203_053934_inLine +BABEL_OP1_201_41720_20130203_053934_outLine +BABEL_OP1_201_41890_20130430_020800_inLine +BABEL_OP1_201_41890_20130430_020800_outLine +BABEL_OP1_201_42155_20130521_023245_inLine +BABEL_OP1_201_42155_20130521_023245_outLine +BABEL_OP1_201_42243_20130303_022442_inLine +BABEL_OP1_201_42243_20130303_022442_outLine +BABEL_OP1_201_42497_20130529_040557_inLine +BABEL_OP1_201_42497_20130529_040557_outLine +BABEL_OP1_201_42619_20130228_081700_inLine +BABEL_OP1_201_42619_20130228_081700_outLine +BABEL_OP1_201_42771_20130516_235914_inLine +BABEL_OP1_201_42771_20130516_235914_outLine +BABEL_OP1_201_42834_20130227_094847_inLine +BABEL_OP1_201_42834_20130227_094847_outLine +BABEL_OP1_201_42991_20130301_104105_inLine +BABEL_OP1_201_42991_20130301_104105_outLine +BABEL_OP1_201_43286_20130301_085932_inLine +BABEL_OP1_201_43286_20130301_085932_outLine +BABEL_OP1_201_43323_20130211_115349_inLine +BABEL_OP1_201_43323_20130211_120743_inLine +BABEL_OP1_201_43323_20130211_120743_outLine +BABEL_OP1_201_43588_20130430_054932_inLine +BABEL_OP1_201_43588_20130430_054932_outLine +BABEL_OP1_201_43646_20130130_080323_inLine +BABEL_OP1_201_43646_20130130_080323_outLine +BABEL_OP1_201_43784_20130529_104333_inLine +BABEL_OP1_201_43784_20130529_104333_outLine +BABEL_OP1_201_43794_20130603_014105_inLine +BABEL_OP1_201_43794_20130603_014105_outLine +BABEL_OP1_201_44477_20130302_072308_inLine +BABEL_OP1_201_44477_20130302_072308_outLine +BABEL_OP1_201_44477_20130302_073645_inLine +BABEL_OP1_201_44477_20130302_073645_outLine +BABEL_OP1_201_44478_20130502_075027_inLine +BABEL_OP1_201_44478_20130502_075027_outLine +BABEL_OP1_201_44709_20130303_114051_inLine +BABEL_OP1_201_44709_20130303_114051_outLine +BABEL_OP1_201_45559_20130503_033307_inLine +BABEL_OP1_201_45559_20130503_033307_outLine +BABEL_OP1_201_46066_20130429_123746_inLine +BABEL_OP1_201_46066_20130429_123746_outLine +BABEL_OP1_201_46169_20130702_011629_inLine +BABEL_OP1_201_46169_20130702_011629_outLine +BABEL_OP1_201_46310_20130328_024919_inLine +BABEL_OP1_201_46310_20130328_024919_outLine +BABEL_OP1_201_46550_20130528_065103_inLine +BABEL_OP1_201_46550_20130528_065103_outLine +BABEL_OP1_201_46558_20130220_030534_inLine +BABEL_OP1_201_46558_20130220_030534_outLine +BABEL_OP1_201_46589_20130302_082301_inLine +BABEL_OP1_201_46589_20130302_082301_outLine +BABEL_OP1_201_46625_20130308_141424_inLine +BABEL_OP1_201_46625_20130308_141424_outLine +BABEL_OP1_201_46681_20130530_033328_inLine +BABEL_OP1_201_46681_20130530_033328_outLine +BABEL_OP1_201_46770_20130429_011947_inLine +BABEL_OP1_201_46770_20130429_011947_outLine +BABEL_OP1_201_46976_20130517_023139_inLine +BABEL_OP1_201_46976_20130517_023139_outLine +BABEL_OP1_201_47270_20130427_010445_inLine +BABEL_OP1_201_47270_20130427_010445_outLine +BABEL_OP1_201_47270_20130427_011917_inLine +BABEL_OP1_201_47270_20130427_011917_outLine +BABEL_OP1_201_47270_20130427_013155_inLine +BABEL_OP1_201_47270_20130427_013155_outLine +BABEL_OP1_201_47802_20130524_044824_inLine +BABEL_OP1_201_47802_20130524_044824_outLine +BABEL_OP1_201_47878_20130522_021958_inLine +BABEL_OP1_201_47878_20130522_021958_outLine +BABEL_OP1_201_48243_20130602_122113_inLine +BABEL_OP1_201_48243_20130602_122113_outLine +BABEL_OP1_201_48299_20130226_120812_inLine +BABEL_OP1_201_48299_20130226_120812_outLine +BABEL_OP1_201_48299_20130226_122743_inLine +BABEL_OP1_201_48299_20130226_122743_outLine +BABEL_OP1_201_48907_20130429_093546_inLine +BABEL_OP1_201_48907_20130429_093546_outLine +BABEL_OP1_201_49027_20130529_101617_inLine +BABEL_OP1_201_49027_20130529_101617_outLine +BABEL_OP1_201_49118_20130429_023211_inLine +BABEL_OP1_201_49118_20130429_023211_outLine +BABEL_OP1_201_49216_20130314_070036_inLine +BABEL_OP1_201_49216_20130314_070036_outLine +BABEL_OP1_201_49502_20130302_064002_inLine +BABEL_OP1_201_49502_20130302_064002_outLine +BABEL_OP1_201_49630_20130306_105833_inLine +BABEL_OP1_201_49630_20130306_105833_outLine +BABEL_OP1_201_49637_20130426_020402_inLine +BABEL_OP1_201_49637_20130426_020402_outLine +BABEL_OP1_201_49768_20130529_082143_inLine +BABEL_OP1_201_49768_20130529_082143_outLine +BABEL_OP1_201_49902_20130527_063448_inLine +BABEL_OP1_201_49902_20130527_063448_outLine +BABEL_OP1_201_49907_20130529_101707_inLine +BABEL_OP1_201_49907_20130529_101707_outLine +BABEL_OP1_201_49945_20130501_080703_inLine +BABEL_OP1_201_49945_20130501_080703_outLine +BABEL_OP1_201_50549_20130428_053142_inLine +BABEL_OP1_201_50549_20130428_053142_outLine +BABEL_OP1_201_50549_20130428_055313_inLine +BABEL_OP1_201_50549_20130428_055313_outLine +BABEL_OP1_201_50601_20130521_045944_inLine +BABEL_OP1_201_50601_20130521_045944_outLine +BABEL_OP1_201_50681_20130228_015155_inLine +BABEL_OP1_201_50681_20130228_015155_outLine +BABEL_OP1_201_50681_20130228_020643_inLine +BABEL_OP1_201_50681_20130228_020643_outLine +BABEL_OP1_201_50726_20130228_033852_inLine +BABEL_OP1_201_50726_20130228_033852_outLine +BABEL_OP1_201_50779_20130522_051719_inLine +BABEL_OP1_201_50779_20130522_051719_outLine +BABEL_OP1_201_50810_20130312_055632_inLine +BABEL_OP1_201_50810_20130312_055632_outLine +BABEL_OP1_201_50940_20130309_041526_inLine +BABEL_OP1_201_50940_20130309_041526_outLine +BABEL_OP1_201_51611_20130530_094039_inLine +BABEL_OP1_201_51611_20130530_094039_outLine +BABEL_OP1_201_52301_20130223_024524_inLine +BABEL_OP1_201_52301_20130223_024524_outLine +BABEL_OP1_201_52404_20130301_233232_inLine +BABEL_OP1_201_52404_20130301_233232_outLine +BABEL_OP1_201_52422_20130428_023051_inLine +BABEL_OP1_201_52422_20130428_023051_outLine +BABEL_OP1_201_52490_20130220_051000_inLine +BABEL_OP1_201_52490_20130220_051000_outLine +BABEL_OP1_201_52804_20130529_032046_inLine +BABEL_OP1_201_52804_20130529_032046_outLine +BABEL_OP1_201_52818_20130301_121852_inLine +BABEL_OP1_201_52818_20130301_121852_outLine +BABEL_OP1_201_53917_20130429_091547_inLine +BABEL_OP1_201_53917_20130429_091547_outLine +BABEL_OP1_201_55259_20130526_073400_inLine +BABEL_OP1_201_55259_20130526_073400_outLine +BABEL_OP1_201_55267_20130228_064943_inLine +BABEL_OP1_201_55267_20130228_064943_outLine +BABEL_OP1_201_55968_20130314_043319_inLine +BABEL_OP1_201_55968_20130314_043319_outLine +BABEL_OP1_201_55968_20130314_044612_inLine +BABEL_OP1_201_55968_20130314_044612_outLine +BABEL_OP1_201_56023_20130501_081011_inLine +BABEL_OP1_201_56023_20130501_081011_outLine +BABEL_OP1_201_56307_20130301_024958_inLine +BABEL_OP1_201_56307_20130301_024958_outLine +BABEL_OP1_201_57065_20130302_033227_inLine +BABEL_OP1_201_57065_20130302_033227_outLine +BABEL_OP1_201_57093_20130510_071214_inLine +BABEL_OP1_201_57093_20130510_071214_outLine +BABEL_OP1_201_57233_20130206_090034_inLine +BABEL_OP1_201_57233_20130206_090034_outLine +BABEL_OP1_201_57464_20130428_051858_inLine +BABEL_OP1_201_57464_20130428_051858_outLine +BABEL_OP1_201_57548_20130518_042831_inLine +BABEL_OP1_201_57548_20130518_042831_outLine +BABEL_OP1_201_57678_20130528_022013_inLine +BABEL_OP1_201_57678_20130528_022013_outLine +BABEL_OP1_201_58107_20130518_004334_inLine +BABEL_OP1_201_58107_20130518_004334_outLine +BABEL_OP1_201_58145_20130602_044301_inLine +BABEL_OP1_201_58145_20130602_044301_outLine +BABEL_OP1_201_58313_20130522_055528_inLine +BABEL_OP1_201_58313_20130522_055528_outLine +BABEL_OP1_201_58585_20130429_003422_inLine +BABEL_OP1_201_58585_20130429_003422_outLine +BABEL_OP1_201_58821_20130306_091219_inLine +BABEL_OP1_201_58821_20130306_091219_outLine +BABEL_OP1_201_59039_20130220_090641_inLine +BABEL_OP1_201_59039_20130220_090641_outLine +BABEL_OP1_201_59509_20130227_090836_inLine +BABEL_OP1_201_59509_20130227_090836_outLine +BABEL_OP1_201_59509_20130227_092230_inLine +BABEL_OP1_201_59509_20130227_092230_outLine +BABEL_OP1_201_60115_20130301_114138_inLine +BABEL_OP1_201_60115_20130301_114138_outLine +BABEL_OP1_201_60418_20130301_073212_inLine +BABEL_OP1_201_60418_20130301_073212_outLine +BABEL_OP1_201_60436_20130503_044737_inLine +BABEL_OP1_201_60436_20130503_044737_outLine +BABEL_OP1_201_60474_20130527_081400_inLine +BABEL_OP1_201_60474_20130527_081400_outLine +BABEL_OP1_201_60661_20130529_023958_inLine +BABEL_OP1_201_60661_20130529_023958_outLine +BABEL_OP1_201_61435_20130430_031742_inLine +BABEL_OP1_201_61435_20130430_031742_outLine +BABEL_OP1_201_61873_20130519_030703_inLine +BABEL_OP1_201_61873_20130519_030703_outLine +BABEL_OP1_201_62014_20130228_083820_inLine +BABEL_OP1_201_62014_20130228_083820_outLine +BABEL_OP1_201_63081_20130226_035431_inLine +BABEL_OP1_201_63081_20130226_035431_outLine +BABEL_OP1_201_63084_20130301_114742_inLine +BABEL_OP1_201_63084_20130301_114742_outLine +BABEL_OP1_201_63307_20130521_235343_inLine +BABEL_OP1_201_63307_20130521_235343_outLine +BABEL_OP1_201_63425_20130301_080734_inLine +BABEL_OP1_201_63425_20130301_080734_outLine +BABEL_OP1_201_63604_20130412_021112_inLine +BABEL_OP1_201_63604_20130412_021112_outLine +BABEL_OP1_201_64259_20130202_090605_inLine +BABEL_OP1_201_64259_20130202_090605_outLine +BABEL_OP1_201_64398_20130301_084125_inLine +BABEL_OP1_201_64398_20130301_084125_outLine +BABEL_OP1_201_65064_20130521_061233_inLine +BABEL_OP1_201_65064_20130521_061233_outLine +BABEL_OP1_201_65561_20130305_120931_inLine +BABEL_OP1_201_65561_20130305_120931_outLine +BABEL_OP1_201_65723_20130529_004610_inLine +BABEL_OP1_201_65723_20130529_004610_outLine +BABEL_OP1_201_66045_20130509_044408_inLine +BABEL_OP1_201_66045_20130509_044408_outLine +BABEL_OP1_201_66472_20130517_041032_inLine +BABEL_OP1_201_66472_20130517_041032_outLine +BABEL_OP1_201_67213_20130224_044805_inLine +BABEL_OP1_201_67213_20130224_044805_outLine +BABEL_OP1_201_67283_20130223_012433_inLine +BABEL_OP1_201_67283_20130223_012433_outLine +BABEL_OP1_201_67401_20130522_063044_inLine +BABEL_OP1_201_67401_20130522_063044_outLine +BABEL_OP1_201_67622_20130306_012440_inLine +BABEL_OP1_201_67622_20130306_012440_outLine +BABEL_OP1_201_68040_20130517_004413_inLine +BABEL_OP1_201_68040_20130517_004413_outLine +BABEL_OP1_201_68068_20130302_042557_inLine +BABEL_OP1_201_68068_20130302_042557_outLine +BABEL_OP1_201_68244_20130228_052832_inLine +BABEL_OP1_201_68244_20130228_052832_outLine +BABEL_OP1_201_68306_20130301_132523_inLine +BABEL_OP1_201_68306_20130301_132523_outLine +BABEL_OP1_201_68748_20130301_051957_inLine +BABEL_OP1_201_68748_20130301_051957_outLine +BABEL_OP1_201_68924_20130228_031746_inLine +BABEL_OP1_201_68924_20130228_031746_outLine +BABEL_OP1_201_69107_20130518_053632_inLine +BABEL_OP1_201_69107_20130518_053632_outLine +BABEL_OP1_201_69574_20130313_015419_inLine +BABEL_OP1_201_69574_20130313_015419_outLine +BABEL_OP1_201_69578_20130509_033949_inLine +BABEL_OP1_201_69578_20130509_033949_outLine +BABEL_OP1_201_69636_20130302_024254_inLine +BABEL_OP1_201_69636_20130302_024254_outLine +BABEL_OP1_201_70343_20130302_035639_inLine +BABEL_OP1_201_70343_20130302_035639_outLine +BABEL_OP1_201_70343_20130302_040518_inLine +BABEL_OP1_201_70343_20130302_040518_outLine +BABEL_OP1_201_70386_20130528_033752_inLine +BABEL_OP1_201_70386_20130528_033752_outLine +BABEL_OP1_201_70601_20130528_025629_inLine +BABEL_OP1_201_70601_20130528_025629_outLine +BABEL_OP1_201_70794_20130314_065330_inLine +BABEL_OP1_201_70794_20130314_065330_outLine +BABEL_OP1_201_71121_20130215_075206_inLine +BABEL_OP1_201_71121_20130215_075206_outLine +BABEL_OP1_201_72324_20130227_080108_inLine +BABEL_OP1_201_72324_20130227_080108_outLine +BABEL_OP1_201_72349_20130527_005409_inLine +BABEL_OP1_201_72349_20130527_005409_outLine +BABEL_OP1_201_72587_20130227_092146_inLine +BABEL_OP1_201_72587_20130227_092146_outLine +BABEL_OP1_201_72844_20130320_030750_inLine +BABEL_OP1_201_72844_20130320_030750_outLine +BABEL_OP1_201_73430_20130306_070252_inLine +BABEL_OP1_201_73430_20130306_070252_outLine +BABEL_OP1_201_73485_20130704_012751_inLine +BABEL_OP1_201_73485_20130704_012751_outLine +BABEL_OP1_201_73511_20130305_064018_inLine +BABEL_OP1_201_73511_20130305_064018_outLine +BABEL_OP1_201_73518_20130427_020953_inLine +BABEL_OP1_201_73518_20130427_020953_outLine +BABEL_OP1_201_73591_20121205_085430_inLine +BABEL_OP1_201_73591_20121205_085430_outLine +BABEL_OP1_201_73591_20121205_091943_inLine +BABEL_OP1_201_73591_20121205_091943_outLine +BABEL_OP1_201_73964_20130502_060046_inLine +BABEL_OP1_201_73964_20130502_060046_outLine +BABEL_OP1_201_74280_20130307_060529_inLine +BABEL_OP1_201_74280_20130307_060529_outLine +BABEL_OP1_201_74728_20130502_015015_inLine +BABEL_OP1_201_74728_20130502_015015_outLine +BABEL_OP1_201_74799_20130530_004139_inLine +BABEL_OP1_201_74799_20130530_004139_outLine +BABEL_OP1_201_74921_20130302_015536_inLine +BABEL_OP1_201_74921_20130302_015536_outLine +BABEL_OP1_201_74921_20130302_020351_inLine +BABEL_OP1_201_74921_20130302_020351_outLine +BABEL_OP1_201_75064_20130528_032631_inLine +BABEL_OP1_201_75064_20130528_032631_outLine +BABEL_OP1_201_75342_20130305_071206_inLine +BABEL_OP1_201_75342_20130305_071206_outLine +BABEL_OP1_201_75764_20130428_041456_inLine +BABEL_OP1_201_75764_20130428_041456_outLine +BABEL_OP1_201_75993_20130529_053731_inLine +BABEL_OP1_201_75993_20130529_053731_outLine +BABEL_OP1_201_76683_20130524_053916_inLine +BABEL_OP1_201_76683_20130524_053916_outLine +BABEL_OP1_201_77126_20121205_072118_inLine +BABEL_OP1_201_77126_20121205_072118_outLine +BABEL_OP1_201_77427_20130528_003638_inLine +BABEL_OP1_201_77427_20130528_003638_outLine +BABEL_OP1_201_78116_20130304_074916_inLine +BABEL_OP1_201_78116_20130304_074916_outLine +BABEL_OP1_201_78398_20130529_023517_inLine +BABEL_OP1_201_78398_20130529_023517_outLine +BABEL_OP1_201_78943_20130528_034620_inLine +BABEL_OP1_201_78943_20130528_034620_outLine +BABEL_OP1_201_79129_20130524_031851_inLine +BABEL_OP1_201_79129_20130524_031851_outLine +BABEL_OP1_201_79167_20130303_071948_inLine +BABEL_OP1_201_79167_20130303_071948_outLine +BABEL_OP1_201_79167_20130303_093604_inLine +BABEL_OP1_201_79167_20130303_093604_outLine +BABEL_OP1_201_79429_20130216_152022_inLine +BABEL_OP1_201_79429_20130216_152022_outLine +BABEL_OP1_201_80306_20130509_071053_inLine +BABEL_OP1_201_80306_20130509_071053_outLine +BABEL_OP1_201_81287_20130305_141750_inLine +BABEL_OP1_201_81287_20130305_141750_outLine +BABEL_OP1_201_81392_20130304_082518_inLine +BABEL_OP1_201_81392_20130304_082518_outLine +BABEL_OP1_201_81424_20130304_080620_inLine +BABEL_OP1_201_81424_20130304_080620_outLine +BABEL_OP1_201_81433_20130514_063900_inLine +BABEL_OP1_201_81433_20130514_063900_outLine +BABEL_OP1_201_81674_20130224_134642_inLine +BABEL_OP1_201_81674_20130224_134642_outLine +BABEL_OP1_201_81810_20130302_043825_inLine +BABEL_OP1_201_81810_20130302_043825_outLine +BABEL_OP1_201_81971_20130227_030618_inLine +BABEL_OP1_201_81971_20130227_030618_outLine +BABEL_OP1_201_82123_20130505_053636_inLine +BABEL_OP1_201_82123_20130505_053636_outLine +BABEL_OP1_201_82138_20130509_063904_inLine +BABEL_OP1_201_82138_20130509_063904_outLine +BABEL_OP1_201_82140_20130510_013208_inLine +BABEL_OP1_201_82140_20130510_013208_outLine +BABEL_OP1_201_82637_20130227_044340_inLine +BABEL_OP1_201_82637_20130227_044340_outLine +BABEL_OP1_201_82904_20130427_005507_inLine +BABEL_OP1_201_82904_20130427_005507_outLine +BABEL_OP1_201_82979_20130529_063602_inLine +BABEL_OP1_201_82979_20130529_063602_outLine +BABEL_OP1_201_83238_20130514_054056_inLine +BABEL_OP1_201_83238_20130514_054056_outLine +BABEL_OP1_201_83430_20130210_094011_inLine +BABEL_OP1_201_83430_20130210_094011_outLine +BABEL_OP1_201_83455_20130511_053045_inLine +BABEL_OP1_201_83455_20130511_053045_outLine +BABEL_OP1_201_83625_20130128_091225_inLine +BABEL_OP1_201_83625_20130128_091225_outLine +BABEL_OP1_201_83651_20130604_075201_inLine +BABEL_OP1_201_83651_20130604_075201_outLine +BABEL_OP1_201_83929_20121205_055436_inLine +BABEL_OP1_201_83929_20121205_055436_outLine +BABEL_OP1_201_83929_20121206_061559_inLine +BABEL_OP1_201_83929_20121206_061559_outLine +BABEL_OP1_201_83935_20130305_104443_inLine +BABEL_OP1_201_83935_20130305_104443_outLine +BABEL_OP1_201_84547_20130227_041326_inLine +BABEL_OP1_201_84547_20130227_041326_outLine +BABEL_OP1_201_84715_20130429_094324_inLine +BABEL_OP1_201_84715_20130429_094324_outLine +BABEL_OP1_201_84936_20130301_073352_inLine +BABEL_OP1_201_84936_20130301_073352_outLine +BABEL_OP1_201_85010_20130206_122216_inLine +BABEL_OP1_201_85010_20130206_122216_outLine +BABEL_OP1_201_85047_20130510_055057_inLine +BABEL_OP1_201_85047_20130510_055057_outLine +BABEL_OP1_201_85048_20130522_072215_inLine +BABEL_OP1_201_85048_20130522_072215_outLine +BABEL_OP1_201_85647_20130511_015627_inLine +BABEL_OP1_201_85647_20130511_015627_outLine +BABEL_OP1_201_86191_20130528_045113_inLine +BABEL_OP1_201_86191_20130528_045113_outLine +BABEL_OP1_201_86191_20130528_051540_inLine +BABEL_OP1_201_86191_20130528_051540_outLine +BABEL_OP1_201_86433_20130303_035210_inLine +BABEL_OP1_201_86433_20130303_035210_outLine +BABEL_OP1_201_86467_20130221_031701_inLine +BABEL_OP1_201_86467_20130221_031701_outLine +BABEL_OP1_201_86557_20130306_054158_inLine +BABEL_OP1_201_86557_20130306_054158_outLine +BABEL_OP1_201_86635_20130227_080743_inLine +BABEL_OP1_201_86635_20130227_080743_outLine +BABEL_OP1_201_86676_20130302_034945_inLine +BABEL_OP1_201_86676_20130302_034945_outLine +BABEL_OP1_201_86888_20130301_011747_inLine +BABEL_OP1_201_86888_20130301_011747_outLine +BABEL_OP1_201_87074_20130529_072238_inLine +BABEL_OP1_201_87074_20130529_072238_outLine +BABEL_OP1_201_87179_20130414_223248_inLine +BABEL_OP1_201_87179_20130414_223248_outLine +BABEL_OP1_201_87298_20130530_035908_inLine +BABEL_OP1_201_87298_20130530_035908_outLine +BABEL_OP1_201_87313_20130228_054816_inLine +BABEL_OP1_201_87313_20130228_054816_outLine +BABEL_OP1_201_87545_20130501_052733_inLine +BABEL_OP1_201_87545_20130501_052733_outLine +BABEL_OP1_201_87731_20130216_084329_inLine +BABEL_OP1_201_87731_20130216_084329_outLine +BABEL_OP1_201_87796_20130531_043218_inLine +BABEL_OP1_201_87796_20130531_043218_outLine +BABEL_OP1_201_88445_20130228_100123_inLine +BABEL_OP1_201_88445_20130228_100123_outLine +BABEL_OP1_201_88661_20130305_103247_inLine +BABEL_OP1_201_88661_20130305_103247_outLine +BABEL_OP1_201_89059_20130429_001658_inLine +BABEL_OP1_201_89059_20130429_001658_outLine +BABEL_OP1_201_89877_20130602_052802_inLine +BABEL_OP1_201_89877_20130602_052802_outLine +BABEL_OP1_201_90347_20130601_020619_inLine +BABEL_OP1_201_90347_20130601_020619_outLine +BABEL_OP1_201_90777_20130530_043440_inLine +BABEL_OP1_201_90777_20130530_043440_outLine +BABEL_OP1_201_91125_20130301_044113_inLine +BABEL_OP1_201_91125_20130301_044113_outLine +BABEL_OP1_201_91336_20130511_010308_inLine +BABEL_OP1_201_91336_20130511_010308_outLine +BABEL_OP1_201_91891_20130306_084037_inLine +BABEL_OP1_201_91891_20130306_084037_outLine +BABEL_OP1_201_91944_20130529_030733_inLine +BABEL_OP1_201_91944_20130529_030733_outLine +BABEL_OP1_201_91977_20130228_225341_inLine +BABEL_OP1_201_91977_20130228_225341_outLine +BABEL_OP1_201_92509_20130222_064302_inLine +BABEL_OP1_201_92509_20130222_064302_outLine +BABEL_OP1_201_92557_20130428_115801_inLine +BABEL_OP1_201_92557_20130428_115801_outLine +BABEL_OP1_201_92740_20130301_044629_inLine +BABEL_OP1_201_92740_20130301_044629_outLine +BABEL_OP1_201_92792_20130630_124723_inLine +BABEL_OP1_201_92792_20130630_124723_outLine +BABEL_OP1_201_92942_20130601_011759_inLine +BABEL_OP1_201_92942_20130601_011759_outLine +BABEL_OP1_201_93222_20130127_012443_inLine +BABEL_OP1_201_93222_20130127_012443_outLine +BABEL_OP1_201_93224_20130227_095611_inLine +BABEL_OP1_201_93224_20130227_095611_outLine +BABEL_OP1_201_93604_20130502_071337_inLine +BABEL_OP1_201_93604_20130502_071337_outLine +BABEL_OP1_201_93964_20130511_000644_inLine +BABEL_OP1_201_93964_20130511_000644_outLine +BABEL_OP1_201_94025_20130303_091916_inLine +BABEL_OP1_201_94025_20130303_091916_outLine +BABEL_OP1_201_94316_20130503_072805_inLine +BABEL_OP1_201_94316_20130503_072805_outLine +BABEL_OP1_201_94449_20130704_033336_inLine +BABEL_OP1_201_94449_20130704_033336_outLine +BABEL_OP1_201_94487_20130502_053741_inLine +BABEL_OP1_201_94487_20130502_053741_outLine +BABEL_OP1_201_94666_20130512_052019_inLine +BABEL_OP1_201_94666_20130512_052019_outLine +BABEL_OP1_201_94869_20130313_052715_inLine +BABEL_OP1_201_94869_20130313_052715_outLine +BABEL_OP1_201_94923_20130531_054229_inLine +BABEL_OP1_201_94923_20130531_054229_outLine +BABEL_OP1_201_95446_20130430_051750_inLine +BABEL_OP1_201_95446_20130430_051750_outLine +BABEL_OP1_201_96059_20130430_034442_inLine +BABEL_OP1_201_96059_20130430_034442_outLine +BABEL_OP1_201_96376_20130704_011157_inLine +BABEL_OP1_201_96376_20130704_011157_outLine +BABEL_OP1_201_96820_20130514_032741_inLine +BABEL_OP1_201_96820_20130514_032741_outLine +BABEL_OP1_201_97363_20130528_063449_inLine +BABEL_OP1_201_97363_20130528_063449_outLine +BABEL_OP1_201_97557_20130228_004756_inLine +BABEL_OP1_201_97557_20130228_004756_outLine +BABEL_OP1_201_99202_20130521_003552_inLine +BABEL_OP1_201_99202_20130521_003552_outLine +BABEL_OP1_201_99955_20130429_001807_inLine +BABEL_OP1_201_99955_20130429_001807_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.list b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.list new file mode 100644 index 00000000000..c6271d71566 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.list @@ -0,0 +1,126 @@ +BABEL_OP1_201_13178_20130301_043649_inLine +BABEL_OP1_201_13178_20130301_043649_outLine +BABEL_OP1_201_14229_20130528_023254_inLine +BABEL_OP1_201_14229_20130528_023254_outLine +BABEL_OP1_201_15216_20130503_005405_inLine +BABEL_OP1_201_15216_20130503_005405_outLine +BABEL_OP1_201_15848_20130130_070404_inLine +BABEL_OP1_201_15848_20130130_070404_outLine +BABEL_OP1_201_16938_20130514_072820_inLine +BABEL_OP1_201_16938_20130514_072820_outLine +BABEL_OP1_201_17881_20130429_230318_inLine +BABEL_OP1_201_17881_20130429_230318_outLine +BABEL_OP1_201_17923_20130529_021211_inLine +BABEL_OP1_201_17923_20130529_021211_outLine +BABEL_OP1_201_18118_20130501_084131_inLine +BABEL_OP1_201_18118_20130501_084131_outLine +BABEL_OP1_201_19722_20130425_005348_inLine +BABEL_OP1_201_19722_20130425_005348_outLine +BABEL_OP1_201_19749_20130429_090621_inLine +BABEL_OP1_201_19749_20130429_090621_outLine +BABEL_OP1_201_20768_20130701_035344_inLine +BABEL_OP1_201_20768_20130701_035344_outLine +BABEL_OP1_201_20800_20130529_035944_inLine +BABEL_OP1_201_20800_20130529_035944_outLine +BABEL_OP1_201_21244_20130602_073304_inLine +BABEL_OP1_201_21244_20130602_073304_outLine +BABEL_OP1_201_24290_20130703_074550_inLine +BABEL_OP1_201_24290_20130703_074550_outLine +BABEL_OP1_201_24589_20130529_111014_inLine +BABEL_OP1_201_24589_20130529_111014_outLine +BABEL_OP1_201_26072_20130429_011940_inLine +BABEL_OP1_201_26072_20130429_011940_outLine +BABEL_OP1_201_28606_20130305_101646_inLine +BABEL_OP1_201_28606_20130305_101646_outLine +BABEL_OP1_201_28871_20121207_015933_inLine +BABEL_OP1_201_28871_20121207_015933_outLine +BABEL_OP1_201_31109_20130510_030741_inLine +BABEL_OP1_201_31109_20130510_030741_outLine +BABEL_OP1_201_36219_20130528_021139_inLine +BABEL_OP1_201_36219_20130528_021139_outLine +BABEL_OP1_201_36341_20130226_074136_inLine +BABEL_OP1_201_36341_20130226_074136_outLine +BABEL_OP1_201_37271_20130430_025526_inLine +BABEL_OP1_201_37271_20130430_025526_outLine +BABEL_OP1_201_40713_20130530_005109_inLine +BABEL_OP1_201_40713_20130530_005109_outLine +BABEL_OP1_201_41097_20130228_063046_inLine +BABEL_OP1_201_41097_20130228_063046_outLine +BABEL_OP1_201_41618_20130518_035113_inLine +BABEL_OP1_201_41618_20130518_035113_outLine +BABEL_OP1_201_42243_20130303_022442_inLine +BABEL_OP1_201_42243_20130303_022442_outLine +BABEL_OP1_201_42619_20130228_081700_inLine +BABEL_OP1_201_42619_20130228_081700_outLine +BABEL_OP1_201_43646_20130130_080323_inLine +BABEL_OP1_201_43646_20130130_080323_outLine +BABEL_OP1_201_45559_20130503_033307_inLine +BABEL_OP1_201_45559_20130503_033307_outLine +BABEL_OP1_201_46625_20130308_141424_inLine +BABEL_OP1_201_46625_20130308_141424_outLine +BABEL_OP1_201_47270_20130427_010445_inLine +BABEL_OP1_201_47270_20130427_010445_outLine +BABEL_OP1_201_47270_20130427_011917_inLine +BABEL_OP1_201_47270_20130427_011917_outLine +BABEL_OP1_201_47270_20130427_013155_inLine +BABEL_OP1_201_47270_20130427_013155_outLine +BABEL_OP1_201_48907_20130429_093546_inLine +BABEL_OP1_201_48907_20130429_093546_outLine +BABEL_OP1_201_49118_20130429_023211_inLine +BABEL_OP1_201_49118_20130429_023211_outLine +BABEL_OP1_201_49502_20130302_064002_inLine +BABEL_OP1_201_49502_20130302_064002_outLine +BABEL_OP1_201_49902_20130527_063448_inLine +BABEL_OP1_201_49902_20130527_063448_outLine +BABEL_OP1_201_50601_20130521_045944_inLine +BABEL_OP1_201_50601_20130521_045944_outLine +BABEL_OP1_201_50681_20130228_015155_inLine +BABEL_OP1_201_50681_20130228_015155_outLine +BABEL_OP1_201_50681_20130228_020643_inLine +BABEL_OP1_201_50681_20130228_020643_outLine +BABEL_OP1_201_50726_20130228_033852_inLine +BABEL_OP1_201_50726_20130228_033852_outLine +BABEL_OP1_201_52804_20130529_032046_inLine +BABEL_OP1_201_52804_20130529_032046_outLine +BABEL_OP1_201_53917_20130429_091547_inLine +BABEL_OP1_201_53917_20130429_091547_outLine +BABEL_OP1_201_57093_20130510_071214_inLine +BABEL_OP1_201_57093_20130510_071214_outLine +BABEL_OP1_201_60418_20130301_073212_inLine +BABEL_OP1_201_60418_20130301_073212_outLine +BABEL_OP1_201_63425_20130301_080734_inLine +BABEL_OP1_201_63425_20130301_080734_outLine +BABEL_OP1_201_65723_20130529_004610_inLine +BABEL_OP1_201_65723_20130529_004610_outLine +BABEL_OP1_201_68040_20130517_004413_inLine +BABEL_OP1_201_68040_20130517_004413_outLine +BABEL_OP1_201_70601_20130528_025629_inLine +BABEL_OP1_201_70601_20130528_025629_outLine +BABEL_OP1_201_71121_20130215_075206_inLine +BABEL_OP1_201_71121_20130215_075206_outLine +BABEL_OP1_201_72349_20130527_005409_inLine +BABEL_OP1_201_72349_20130527_005409_outLine +BABEL_OP1_201_74799_20130530_004139_inLine +BABEL_OP1_201_74799_20130530_004139_outLine +BABEL_OP1_201_77126_20121205_072118_inLine +BABEL_OP1_201_77126_20121205_072118_outLine +BABEL_OP1_201_81674_20130224_134642_inLine +BABEL_OP1_201_81674_20130224_134642_outLine +BABEL_OP1_201_83935_20130305_104443_inLine +BABEL_OP1_201_83935_20130305_104443_outLine +BABEL_OP1_201_85048_20130522_072215_inLine +BABEL_OP1_201_85048_20130522_072215_outLine +BABEL_OP1_201_87545_20130501_052733_inLine +BABEL_OP1_201_87545_20130501_052733_outLine +BABEL_OP1_201_91336_20130511_010308_inLine +BABEL_OP1_201_91336_20130511_010308_outLine +BABEL_OP1_201_92792_20130630_124723_inLine +BABEL_OP1_201_92792_20130630_124723_outLine +BABEL_OP1_201_92942_20130601_011759_inLine +BABEL_OP1_201_92942_20130601_011759_outLine +BABEL_OP1_201_93224_20130227_095611_inLine +BABEL_OP1_201_93224_20130227_095611_outLine +BABEL_OP1_201_94666_20130512_052019_inLine +BABEL_OP1_201_94666_20130512_052019_outLine +BABEL_OP1_201_94923_20130531_054229_inLine +BABEL_OP1_201_94923_20130531_054229_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..701e74d974b --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.LimitedLP.untranscribed.list @@ -0,0 +1,634 @@ +BABEL_OP1_201_10002_20130212_152853_inLine +BABEL_OP1_201_10002_20130212_152853_outLine +BABEL_OP1_201_10036_20130528_005502_inLine +BABEL_OP1_201_10036_20130528_005502_outLine +BABEL_OP1_201_10482_20130305_105317_inLine +BABEL_OP1_201_10482_20130305_105317_outLine +BABEL_OP1_201_10647_20130428_045536_inLine +BABEL_OP1_201_10647_20130428_045536_outLine +BABEL_OP1_201_10901_20130529_031421_inLine +BABEL_OP1_201_10901_20130529_031421_outLine +BABEL_OP1_201_11096_20130603_043221_inLine +BABEL_OP1_201_11096_20130603_043221_outLine +BABEL_OP1_201_11663_20130601_002903_inLine +BABEL_OP1_201_11663_20130601_002903_outLine +BABEL_OP1_201_11673_20130226_015822_inLine +BABEL_OP1_201_11673_20130226_015822_outLine +BABEL_OP1_201_11797_20130328_033102_inLine +BABEL_OP1_201_11797_20130328_033102_outLine +BABEL_OP1_201_12220_20130528_051622_inLine +BABEL_OP1_201_12220_20130528_051622_outLine +BABEL_OP1_201_12242_20130603_033446_inLine +BABEL_OP1_201_12242_20130603_033446_outLine +BABEL_OP1_201_12606_20130429_120351_inLine +BABEL_OP1_201_12606_20130429_120351_outLine +BABEL_OP1_201_12606_20130429_121040_inLine +BABEL_OP1_201_12606_20130429_121040_outLine +BABEL_OP1_201_12635_20130429_040127_inLine +BABEL_OP1_201_12635_20130429_040127_outLine +BABEL_OP1_201_12767_20130509_005500_inLine +BABEL_OP1_201_12767_20130509_005500_outLine +BABEL_OP1_201_13324_20130529_035029_inLine +BABEL_OP1_201_13324_20130529_035029_outLine +BABEL_OP1_201_13483_20130306_062423_inLine +BABEL_OP1_201_13483_20130306_062423_outLine +BABEL_OP1_201_13490_20130508_033252_inLine +BABEL_OP1_201_13490_20130508_033252_outLine +BABEL_OP1_201_13664_20130117_073343_inLine +BABEL_OP1_201_13664_20130117_073343_outLine +BABEL_OP1_201_14179_20130303_111502_inLine +BABEL_OP1_201_14179_20130303_111502_outLine +BABEL_OP1_201_14539_20130501_223201_inLine +BABEL_OP1_201_14539_20130501_223201_outLine +BABEL_OP1_201_14560_20130301_065543_inLine +BABEL_OP1_201_14560_20130301_065543_outLine +BABEL_OP1_201_14807_20130522_012156_inLine +BABEL_OP1_201_14807_20130522_012156_outLine +BABEL_OP1_201_14899_20130301_035636_inLine +BABEL_OP1_201_14899_20130301_035636_outLine +BABEL_OP1_201_14972_20130518_025852_inLine +BABEL_OP1_201_14972_20130518_025852_outLine +BABEL_OP1_201_15322_20130701_030436_inLine +BABEL_OP1_201_15322_20130701_030436_outLine +BABEL_OP1_201_15382_20130228_050819_inLine +BABEL_OP1_201_15382_20130228_050819_outLine +BABEL_OP1_201_15702_20130301_041117_inLine +BABEL_OP1_201_15702_20130301_041117_outLine +BABEL_OP1_201_15730_20130305_034450_inLine +BABEL_OP1_201_15730_20130305_034450_outLine +BABEL_OP1_201_15902_20130323_005824_inLine +BABEL_OP1_201_15902_20130323_005824_outLine +BABEL_OP1_201_16149_20130322_021647_inLine +BABEL_OP1_201_16149_20130322_021647_outLine +BABEL_OP1_201_16467_20130704_025921_inLine +BABEL_OP1_201_16467_20130704_025921_outLine +BABEL_OP1_201_16800_20130702_085158_inLine +BABEL_OP1_201_16800_20130702_085158_outLine +BABEL_OP1_201_16924_20130301_032937_inLine +BABEL_OP1_201_16924_20130301_032937_outLine +BABEL_OP1_201_17032_20130306_103506_inLine +BABEL_OP1_201_17032_20130306_103506_outLine +BABEL_OP1_201_17113_20130519_093427_inLine +BABEL_OP1_201_17113_20130519_093427_outLine +BABEL_OP1_201_17472_20130311_075957_inLine +BABEL_OP1_201_17472_20130311_075957_outLine +BABEL_OP1_201_17496_20130301_030157_inLine +BABEL_OP1_201_17496_20130301_030157_outLine +BABEL_OP1_201_17520_20130518_012147_inLine +BABEL_OP1_201_17520_20130518_012147_outLine +BABEL_OP1_201_17567_20130512_065938_inLine +BABEL_OP1_201_17567_20130512_065938_outLine +BABEL_OP1_201_18766_20130502_102418_inLine +BABEL_OP1_201_18766_20130502_102418_outLine +BABEL_OP1_201_19134_20130601_040621_inLine +BABEL_OP1_201_19134_20130601_040621_outLine +BABEL_OP1_201_19589_20130502_093932_inLine +BABEL_OP1_201_19589_20130502_093932_outLine +BABEL_OP1_201_19767_20130502_130900_inLine +BABEL_OP1_201_19767_20130502_130900_outLine +BABEL_OP1_201_19877_20130502_085421_inLine +BABEL_OP1_201_19877_20130502_085421_outLine +BABEL_OP1_201_20330_20130429_035418_inLine +BABEL_OP1_201_20330_20130429_035418_outLine +BABEL_OP1_201_20437_20130216_094002_inLine +BABEL_OP1_201_20437_20130216_094002_outLine +BABEL_OP1_201_20972_20130603_035417_inLine +BABEL_OP1_201_20972_20130603_035417_outLine +BABEL_OP1_201_21807_20130522_042858_inLine +BABEL_OP1_201_21807_20130522_042858_outLine +BABEL_OP1_201_21892_20130430_033520_inLine +BABEL_OP1_201_21892_20130430_033520_outLine +BABEL_OP1_201_22466_20121206_070403_inLine +BABEL_OP1_201_22466_20121206_070403_outLine +BABEL_OP1_201_22494_20130305_052405_inLine +BABEL_OP1_201_22494_20130305_052405_outLine +BABEL_OP1_201_22624_20130305_121723_inLine +BABEL_OP1_201_22624_20130305_121723_outLine +BABEL_OP1_201_23046_20130527_110737_inLine +BABEL_OP1_201_23046_20130527_110737_outLine +BABEL_OP1_201_23119_20130321_054320_inLine +BABEL_OP1_201_23119_20130321_054320_outLine +BABEL_OP1_201_23190_20130603_224243_inLine +BABEL_OP1_201_23190_20130603_224243_outLine +BABEL_OP1_201_23195_20130227_050013_inLine +BABEL_OP1_201_23195_20130227_050013_outLine +BABEL_OP1_201_23239_20130305_093734_inLine +BABEL_OP1_201_23239_20130305_093734_outLine +BABEL_OP1_201_23893_20130430_080021_inLine +BABEL_OP1_201_23893_20130430_080021_outLine +BABEL_OP1_201_24231_20130502_123747_inLine +BABEL_OP1_201_24231_20130502_123747_outLine +BABEL_OP1_201_24239_20130703_230221_inLine +BABEL_OP1_201_24239_20130703_230221_outLine +BABEL_OP1_201_24270_20130530_020630_inLine +BABEL_OP1_201_24270_20130530_020630_outLine +BABEL_OP1_201_24470_20130531_024204_inLine +BABEL_OP1_201_24470_20130531_024204_outLine +BABEL_OP1_201_24501_20130429_102945_inLine +BABEL_OP1_201_24501_20130429_102945_outLine +BABEL_OP1_201_24532_20130307_060030_inLine +BABEL_OP1_201_24532_20130307_060030_outLine +BABEL_OP1_201_24586_20130430_025349_inLine +BABEL_OP1_201_24586_20130430_032300_inLine +BABEL_OP1_201_24586_20130430_032300_outLine +BABEL_OP1_201_24586_20130430_033306_inLine +BABEL_OP1_201_24586_20130430_033306_outLine +BABEL_OP1_201_24679_20130222_072407_inLine +BABEL_OP1_201_24679_20130222_072407_outLine +BABEL_OP1_201_24982_20130529_044009_inLine +BABEL_OP1_201_24982_20130529_044009_outLine +BABEL_OP1_201_25015_20130501_223825_inLine +BABEL_OP1_201_25015_20130501_223825_outLine +BABEL_OP1_201_25961_20130223_033405_inLine +BABEL_OP1_201_25961_20130223_033405_outLine +BABEL_OP1_201_26388_20130528_030259_inLine +BABEL_OP1_201_26388_20130528_030259_outLine +BABEL_OP1_201_26836_20130528_100100_inLine +BABEL_OP1_201_26836_20130528_100100_outLine +BABEL_OP1_201_26836_20130528_101331_inLine +BABEL_OP1_201_26836_20130528_101331_outLine +BABEL_OP1_201_26999_20130228_090136_inLine +BABEL_OP1_201_26999_20130228_090136_outLine +BABEL_OP1_201_27042_20130701_075011_inLine +BABEL_OP1_201_27042_20130701_075011_outLine +BABEL_OP1_201_27203_20130602_005950_inLine +BABEL_OP1_201_27203_20130602_005950_outLine +BABEL_OP1_201_27590_20130304_072243_inLine +BABEL_OP1_201_27590_20130304_072243_outLine +BABEL_OP1_201_28419_20130528_035005_inLine +BABEL_OP1_201_28419_20130528_035005_outLine +BABEL_OP1_201_28522_20130303_104614_inLine +BABEL_OP1_201_28522_20130303_104614_outLine +BABEL_OP1_201_28600_20130701_051100_inLine +BABEL_OP1_201_28600_20130701_051100_outLine +BABEL_OP1_201_28775_20130529_005204_inLine +BABEL_OP1_201_28775_20130529_005204_outLine +BABEL_OP1_201_28814_20130704_000405_inLine +BABEL_OP1_201_28814_20130704_000405_outLine +BABEL_OP1_201_28945_20130528_094913_inLine +BABEL_OP1_201_28945_20130528_094913_outLine +BABEL_OP1_201_29023_20130530_024701_inLine +BABEL_OP1_201_29023_20130530_024701_outLine +BABEL_OP1_201_29072_20130304_052508_inLine +BABEL_OP1_201_29072_20130304_052508_outLine +BABEL_OP1_201_29168_20130222_015942_inLine +BABEL_OP1_201_29168_20130222_015942_outLine +BABEL_OP1_201_30180_20130528_033242_inLine +BABEL_OP1_201_30180_20130528_033242_outLine +BABEL_OP1_201_30395_20130529_034626_inLine +BABEL_OP1_201_30395_20130529_034626_outLine +BABEL_OP1_201_30432_20130227_084229_inLine +BABEL_OP1_201_30432_20130227_084229_outLine +BABEL_OP1_201_30576_20130527_002801_inLine +BABEL_OP1_201_30576_20130527_002801_outLine +BABEL_OP1_201_31628_20130301_081256_inLine +BABEL_OP1_201_31628_20130301_081256_outLine +BABEL_OP1_201_32097_20130130_021717_inLine +BABEL_OP1_201_32097_20130130_021717_outLine +BABEL_OP1_201_32122_20130529_070011_inLine +BABEL_OP1_201_32122_20130529_070011_outLine +BABEL_OP1_201_32171_20130220_084632_inLine +BABEL_OP1_201_32171_20130220_084632_outLine +BABEL_OP1_201_32708_20130528_093343_inLine +BABEL_OP1_201_32708_20130528_093343_outLine +BABEL_OP1_201_33229_20130429_025144_inLine +BABEL_OP1_201_33229_20130429_025144_outLine +BABEL_OP1_201_33659_20130214_000335_inLine +BABEL_OP1_201_33659_20130214_000335_outLine +BABEL_OP1_201_33806_20130630_224040_inLine +BABEL_OP1_201_33806_20130630_224040_outLine +BABEL_OP1_201_34106_20130305_032650_inLine +BABEL_OP1_201_34106_20130305_032650_outLine +BABEL_OP1_201_34145_20130301_033324_inLine +BABEL_OP1_201_34145_20130301_033324_outLine +BABEL_OP1_201_34197_20130227_065321_inLine +BABEL_OP1_201_34197_20130227_065321_outLine +BABEL_OP1_201_34336_20130527_071806_inLine +BABEL_OP1_201_34336_20130527_071806_outLine +BABEL_OP1_201_34679_20130529_040931_inLine +BABEL_OP1_201_34679_20130529_040931_outLine +BABEL_OP1_201_34826_20130430_025628_inLine +BABEL_OP1_201_34826_20130430_025628_outLine +BABEL_OP1_201_34903_20130302_052444_inLine +BABEL_OP1_201_34903_20130302_052444_outLine +BABEL_OP1_201_35000_20130702_092721_inLine +BABEL_OP1_201_35000_20130702_092721_outLine +BABEL_OP1_201_35008_20130305_114402_inLine +BABEL_OP1_201_35008_20130305_114402_outLine +BABEL_OP1_201_35467_20130321_032230_inLine +BABEL_OP1_201_35467_20130321_032230_outLine +BABEL_OP1_201_36894_20130221_070614_inLine +BABEL_OP1_201_36894_20130221_070614_outLine +BABEL_OP1_201_37598_20130601_032226_inLine +BABEL_OP1_201_37598_20130601_032226_outLine +BABEL_OP1_201_38076_20130302_132339_inLine +BABEL_OP1_201_38076_20130302_132339_outLine +BABEL_OP1_201_38878_20130228_041057_inLine +BABEL_OP1_201_38878_20130228_041057_outLine +BABEL_OP1_201_39426_20130429_085957_inLine +BABEL_OP1_201_39426_20130429_085957_outLine +BABEL_OP1_201_39638_20130126_082343_inLine +BABEL_OP1_201_39638_20130126_082343_outLine +BABEL_OP1_201_41334_20130630_085009_inLine +BABEL_OP1_201_41334_20130630_085009_outLine +BABEL_OP1_201_41469_20130303_034949_inLine +BABEL_OP1_201_41469_20130303_034949_outLine +BABEL_OP1_201_41542_20130429_084921_inLine +BABEL_OP1_201_41542_20130429_084921_outLine +BABEL_OP1_201_41685_20130214_090836_inLine +BABEL_OP1_201_41685_20130214_090836_outLine +BABEL_OP1_201_41720_20130203_053934_inLine +BABEL_OP1_201_41720_20130203_053934_outLine +BABEL_OP1_201_41890_20130430_020800_inLine +BABEL_OP1_201_41890_20130430_020800_outLine +BABEL_OP1_201_42155_20130521_023245_inLine +BABEL_OP1_201_42155_20130521_023245_outLine +BABEL_OP1_201_42497_20130529_040557_inLine +BABEL_OP1_201_42497_20130529_040557_outLine +BABEL_OP1_201_42771_20130516_235914_inLine +BABEL_OP1_201_42771_20130516_235914_outLine +BABEL_OP1_201_42834_20130227_094847_inLine +BABEL_OP1_201_42834_20130227_094847_outLine +BABEL_OP1_201_42991_20130301_104105_inLine +BABEL_OP1_201_42991_20130301_104105_outLine +BABEL_OP1_201_43286_20130301_085932_inLine +BABEL_OP1_201_43286_20130301_085932_outLine +BABEL_OP1_201_43323_20130211_115349_inLine +BABEL_OP1_201_43323_20130211_120743_inLine +BABEL_OP1_201_43323_20130211_120743_outLine +BABEL_OP1_201_43588_20130430_054932_inLine +BABEL_OP1_201_43588_20130430_054932_outLine +BABEL_OP1_201_43784_20130529_104333_inLine +BABEL_OP1_201_43784_20130529_104333_outLine +BABEL_OP1_201_43794_20130603_014105_inLine +BABEL_OP1_201_43794_20130603_014105_outLine +BABEL_OP1_201_44477_20130302_072308_inLine +BABEL_OP1_201_44477_20130302_072308_outLine +BABEL_OP1_201_44477_20130302_073645_inLine +BABEL_OP1_201_44477_20130302_073645_outLine +BABEL_OP1_201_44478_20130502_075027_inLine +BABEL_OP1_201_44478_20130502_075027_outLine +BABEL_OP1_201_44709_20130303_114051_inLine +BABEL_OP1_201_44709_20130303_114051_outLine +BABEL_OP1_201_46066_20130429_123746_inLine +BABEL_OP1_201_46066_20130429_123746_outLine +BABEL_OP1_201_46169_20130702_011629_inLine +BABEL_OP1_201_46169_20130702_011629_outLine +BABEL_OP1_201_46310_20130328_024919_inLine +BABEL_OP1_201_46310_20130328_024919_outLine +BABEL_OP1_201_46550_20130528_065103_inLine +BABEL_OP1_201_46550_20130528_065103_outLine +BABEL_OP1_201_46558_20130220_030534_inLine +BABEL_OP1_201_46558_20130220_030534_outLine +BABEL_OP1_201_46589_20130302_082301_inLine +BABEL_OP1_201_46589_20130302_082301_outLine +BABEL_OP1_201_46681_20130530_033328_inLine +BABEL_OP1_201_46681_20130530_033328_outLine +BABEL_OP1_201_46770_20130429_011947_inLine +BABEL_OP1_201_46770_20130429_011947_outLine +BABEL_OP1_201_46976_20130517_023139_inLine +BABEL_OP1_201_46976_20130517_023139_outLine +BABEL_OP1_201_47802_20130524_044824_inLine +BABEL_OP1_201_47802_20130524_044824_outLine +BABEL_OP1_201_47878_20130522_021958_inLine +BABEL_OP1_201_47878_20130522_021958_outLine +BABEL_OP1_201_48243_20130602_122113_inLine +BABEL_OP1_201_48243_20130602_122113_outLine +BABEL_OP1_201_48299_20130226_120812_inLine +BABEL_OP1_201_48299_20130226_120812_outLine +BABEL_OP1_201_48299_20130226_122743_inLine +BABEL_OP1_201_48299_20130226_122743_outLine +BABEL_OP1_201_49027_20130529_101617_inLine +BABEL_OP1_201_49027_20130529_101617_outLine +BABEL_OP1_201_49216_20130314_070036_inLine +BABEL_OP1_201_49216_20130314_070036_outLine +BABEL_OP1_201_49630_20130306_105833_inLine +BABEL_OP1_201_49630_20130306_105833_outLine +BABEL_OP1_201_49637_20130426_020402_inLine +BABEL_OP1_201_49637_20130426_020402_outLine +BABEL_OP1_201_49768_20130529_082143_inLine +BABEL_OP1_201_49768_20130529_082143_outLine +BABEL_OP1_201_49907_20130529_101707_inLine +BABEL_OP1_201_49907_20130529_101707_outLine +BABEL_OP1_201_49945_20130501_080703_inLine +BABEL_OP1_201_49945_20130501_080703_outLine +BABEL_OP1_201_50549_20130428_053142_inLine +BABEL_OP1_201_50549_20130428_053142_outLine +BABEL_OP1_201_50549_20130428_055313_inLine +BABEL_OP1_201_50549_20130428_055313_outLine +BABEL_OP1_201_50779_20130522_051719_inLine +BABEL_OP1_201_50779_20130522_051719_outLine +BABEL_OP1_201_50810_20130312_055632_inLine +BABEL_OP1_201_50810_20130312_055632_outLine +BABEL_OP1_201_50940_20130309_041526_inLine +BABEL_OP1_201_50940_20130309_041526_outLine +BABEL_OP1_201_51611_20130530_094039_inLine +BABEL_OP1_201_51611_20130530_094039_outLine +BABEL_OP1_201_52301_20130223_024524_inLine +BABEL_OP1_201_52301_20130223_024524_outLine +BABEL_OP1_201_52404_20130301_233232_inLine +BABEL_OP1_201_52404_20130301_233232_outLine +BABEL_OP1_201_52422_20130428_023051_inLine +BABEL_OP1_201_52422_20130428_023051_outLine +BABEL_OP1_201_52490_20130220_051000_inLine +BABEL_OP1_201_52490_20130220_051000_outLine +BABEL_OP1_201_52818_20130301_121852_inLine +BABEL_OP1_201_52818_20130301_121852_outLine +BABEL_OP1_201_55259_20130526_073400_inLine +BABEL_OP1_201_55259_20130526_073400_outLine +BABEL_OP1_201_55267_20130228_064943_inLine +BABEL_OP1_201_55267_20130228_064943_outLine +BABEL_OP1_201_55968_20130314_043319_inLine +BABEL_OP1_201_55968_20130314_043319_outLine +BABEL_OP1_201_55968_20130314_044612_inLine +BABEL_OP1_201_55968_20130314_044612_outLine +BABEL_OP1_201_56023_20130501_081011_inLine +BABEL_OP1_201_56023_20130501_081011_outLine +BABEL_OP1_201_56307_20130301_024958_inLine +BABEL_OP1_201_56307_20130301_024958_outLine +BABEL_OP1_201_57065_20130302_033227_inLine +BABEL_OP1_201_57065_20130302_033227_outLine +BABEL_OP1_201_57233_20130206_090034_inLine +BABEL_OP1_201_57233_20130206_090034_outLine +BABEL_OP1_201_57464_20130428_051858_inLine +BABEL_OP1_201_57464_20130428_051858_outLine +BABEL_OP1_201_57548_20130518_042831_inLine +BABEL_OP1_201_57548_20130518_042831_outLine +BABEL_OP1_201_57678_20130528_022013_inLine +BABEL_OP1_201_57678_20130528_022013_outLine +BABEL_OP1_201_58107_20130518_004334_inLine +BABEL_OP1_201_58107_20130518_004334_outLine +BABEL_OP1_201_58145_20130602_044301_inLine +BABEL_OP1_201_58145_20130602_044301_outLine +BABEL_OP1_201_58313_20130522_055528_inLine +BABEL_OP1_201_58313_20130522_055528_outLine +BABEL_OP1_201_58585_20130429_003422_inLine +BABEL_OP1_201_58585_20130429_003422_outLine +BABEL_OP1_201_58821_20130306_091219_inLine +BABEL_OP1_201_58821_20130306_091219_outLine +BABEL_OP1_201_59039_20130220_090641_inLine +BABEL_OP1_201_59039_20130220_090641_outLine +BABEL_OP1_201_59509_20130227_090836_inLine +BABEL_OP1_201_59509_20130227_090836_outLine +BABEL_OP1_201_59509_20130227_092230_inLine +BABEL_OP1_201_59509_20130227_092230_outLine +BABEL_OP1_201_60115_20130301_114138_inLine +BABEL_OP1_201_60115_20130301_114138_outLine +BABEL_OP1_201_60436_20130503_044737_inLine +BABEL_OP1_201_60436_20130503_044737_outLine +BABEL_OP1_201_60474_20130527_081400_inLine +BABEL_OP1_201_60474_20130527_081400_outLine +BABEL_OP1_201_60661_20130529_023958_inLine +BABEL_OP1_201_60661_20130529_023958_outLine +BABEL_OP1_201_61435_20130430_031742_inLine +BABEL_OP1_201_61435_20130430_031742_outLine +BABEL_OP1_201_61873_20130519_030703_inLine +BABEL_OP1_201_61873_20130519_030703_outLine +BABEL_OP1_201_62014_20130228_083820_inLine +BABEL_OP1_201_62014_20130228_083820_outLine +BABEL_OP1_201_63081_20130226_035431_inLine +BABEL_OP1_201_63081_20130226_035431_outLine +BABEL_OP1_201_63084_20130301_114742_inLine +BABEL_OP1_201_63084_20130301_114742_outLine +BABEL_OP1_201_63307_20130521_235343_inLine +BABEL_OP1_201_63307_20130521_235343_outLine +BABEL_OP1_201_63604_20130412_021112_inLine +BABEL_OP1_201_63604_20130412_021112_outLine +BABEL_OP1_201_64259_20130202_090605_inLine +BABEL_OP1_201_64259_20130202_090605_outLine +BABEL_OP1_201_64398_20130301_084125_inLine +BABEL_OP1_201_64398_20130301_084125_outLine +BABEL_OP1_201_65064_20130521_061233_inLine +BABEL_OP1_201_65064_20130521_061233_outLine +BABEL_OP1_201_65561_20130305_120931_inLine +BABEL_OP1_201_65561_20130305_120931_outLine +BABEL_OP1_201_66045_20130509_044408_inLine +BABEL_OP1_201_66045_20130509_044408_outLine +BABEL_OP1_201_66472_20130517_041032_inLine +BABEL_OP1_201_66472_20130517_041032_outLine +BABEL_OP1_201_67213_20130224_044805_inLine +BABEL_OP1_201_67213_20130224_044805_outLine +BABEL_OP1_201_67283_20130223_012433_inLine +BABEL_OP1_201_67283_20130223_012433_outLine +BABEL_OP1_201_67401_20130522_063044_inLine +BABEL_OP1_201_67401_20130522_063044_outLine +BABEL_OP1_201_67622_20130306_012440_inLine +BABEL_OP1_201_67622_20130306_012440_outLine +BABEL_OP1_201_68068_20130302_042557_inLine +BABEL_OP1_201_68068_20130302_042557_outLine +BABEL_OP1_201_68244_20130228_052832_inLine +BABEL_OP1_201_68244_20130228_052832_outLine +BABEL_OP1_201_68306_20130301_132523_inLine +BABEL_OP1_201_68306_20130301_132523_outLine +BABEL_OP1_201_68748_20130301_051957_inLine +BABEL_OP1_201_68748_20130301_051957_outLine +BABEL_OP1_201_68924_20130228_031746_inLine +BABEL_OP1_201_68924_20130228_031746_outLine +BABEL_OP1_201_69107_20130518_053632_inLine +BABEL_OP1_201_69107_20130518_053632_outLine +BABEL_OP1_201_69574_20130313_015419_inLine +BABEL_OP1_201_69574_20130313_015419_outLine +BABEL_OP1_201_69578_20130509_033949_inLine +BABEL_OP1_201_69578_20130509_033949_outLine +BABEL_OP1_201_69636_20130302_024254_inLine +BABEL_OP1_201_69636_20130302_024254_outLine +BABEL_OP1_201_70343_20130302_035639_inLine +BABEL_OP1_201_70343_20130302_035639_outLine +BABEL_OP1_201_70343_20130302_040518_inLine +BABEL_OP1_201_70343_20130302_040518_outLine +BABEL_OP1_201_70386_20130528_033752_inLine +BABEL_OP1_201_70386_20130528_033752_outLine +BABEL_OP1_201_70794_20130314_065330_inLine +BABEL_OP1_201_70794_20130314_065330_outLine +BABEL_OP1_201_72324_20130227_080108_inLine +BABEL_OP1_201_72324_20130227_080108_outLine +BABEL_OP1_201_72587_20130227_092146_inLine +BABEL_OP1_201_72587_20130227_092146_outLine +BABEL_OP1_201_72844_20130320_030750_inLine +BABEL_OP1_201_72844_20130320_030750_outLine +BABEL_OP1_201_73430_20130306_070252_inLine +BABEL_OP1_201_73430_20130306_070252_outLine +BABEL_OP1_201_73485_20130704_012751_inLine +BABEL_OP1_201_73485_20130704_012751_outLine +BABEL_OP1_201_73511_20130305_064018_inLine +BABEL_OP1_201_73511_20130305_064018_outLine +BABEL_OP1_201_73518_20130427_020953_inLine +BABEL_OP1_201_73518_20130427_020953_outLine +BABEL_OP1_201_73591_20121205_085430_inLine +BABEL_OP1_201_73591_20121205_085430_outLine +BABEL_OP1_201_73591_20121205_091943_inLine +BABEL_OP1_201_73591_20121205_091943_outLine +BABEL_OP1_201_73964_20130502_060046_inLine +BABEL_OP1_201_73964_20130502_060046_outLine +BABEL_OP1_201_74280_20130307_060529_inLine +BABEL_OP1_201_74280_20130307_060529_outLine +BABEL_OP1_201_74728_20130502_015015_inLine +BABEL_OP1_201_74728_20130502_015015_outLine +BABEL_OP1_201_74921_20130302_015536_inLine +BABEL_OP1_201_74921_20130302_015536_outLine +BABEL_OP1_201_74921_20130302_020351_inLine +BABEL_OP1_201_74921_20130302_020351_outLine +BABEL_OP1_201_75064_20130528_032631_inLine +BABEL_OP1_201_75064_20130528_032631_outLine +BABEL_OP1_201_75342_20130305_071206_inLine +BABEL_OP1_201_75342_20130305_071206_outLine +BABEL_OP1_201_75764_20130428_041456_inLine +BABEL_OP1_201_75764_20130428_041456_outLine +BABEL_OP1_201_75993_20130529_053731_inLine +BABEL_OP1_201_75993_20130529_053731_outLine +BABEL_OP1_201_76683_20130524_053916_inLine +BABEL_OP1_201_76683_20130524_053916_outLine +BABEL_OP1_201_77427_20130528_003638_inLine +BABEL_OP1_201_77427_20130528_003638_outLine +BABEL_OP1_201_78116_20130304_074916_inLine +BABEL_OP1_201_78116_20130304_074916_outLine +BABEL_OP1_201_78398_20130529_023517_inLine +BABEL_OP1_201_78398_20130529_023517_outLine +BABEL_OP1_201_78943_20130528_034620_inLine +BABEL_OP1_201_78943_20130528_034620_outLine +BABEL_OP1_201_79129_20130524_031851_inLine +BABEL_OP1_201_79129_20130524_031851_outLine +BABEL_OP1_201_79167_20130303_071948_inLine +BABEL_OP1_201_79167_20130303_071948_outLine +BABEL_OP1_201_79167_20130303_093604_inLine +BABEL_OP1_201_79167_20130303_093604_outLine +BABEL_OP1_201_79429_20130216_152022_inLine +BABEL_OP1_201_79429_20130216_152022_outLine +BABEL_OP1_201_80306_20130509_071053_inLine +BABEL_OP1_201_80306_20130509_071053_outLine +BABEL_OP1_201_81287_20130305_141750_inLine +BABEL_OP1_201_81287_20130305_141750_outLine +BABEL_OP1_201_81392_20130304_082518_inLine +BABEL_OP1_201_81392_20130304_082518_outLine +BABEL_OP1_201_81424_20130304_080620_inLine +BABEL_OP1_201_81424_20130304_080620_outLine +BABEL_OP1_201_81433_20130514_063900_inLine +BABEL_OP1_201_81433_20130514_063900_outLine +BABEL_OP1_201_81810_20130302_043825_inLine +BABEL_OP1_201_81810_20130302_043825_outLine +BABEL_OP1_201_81971_20130227_030618_inLine +BABEL_OP1_201_81971_20130227_030618_outLine +BABEL_OP1_201_82123_20130505_053636_inLine +BABEL_OP1_201_82123_20130505_053636_outLine +BABEL_OP1_201_82138_20130509_063904_inLine +BABEL_OP1_201_82138_20130509_063904_outLine +BABEL_OP1_201_82140_20130510_013208_inLine +BABEL_OP1_201_82140_20130510_013208_outLine +BABEL_OP1_201_82637_20130227_044340_inLine +BABEL_OP1_201_82637_20130227_044340_outLine +BABEL_OP1_201_82904_20130427_005507_inLine +BABEL_OP1_201_82904_20130427_005507_outLine +BABEL_OP1_201_82979_20130529_063602_inLine +BABEL_OP1_201_82979_20130529_063602_outLine +BABEL_OP1_201_83238_20130514_054056_inLine +BABEL_OP1_201_83238_20130514_054056_outLine +BABEL_OP1_201_83430_20130210_094011_inLine +BABEL_OP1_201_83430_20130210_094011_outLine +BABEL_OP1_201_83455_20130511_053045_inLine +BABEL_OP1_201_83455_20130511_053045_outLine +BABEL_OP1_201_83625_20130128_091225_inLine +BABEL_OP1_201_83625_20130128_091225_outLine +BABEL_OP1_201_83651_20130604_075201_inLine +BABEL_OP1_201_83651_20130604_075201_outLine +BABEL_OP1_201_83929_20121205_055436_inLine +BABEL_OP1_201_83929_20121205_055436_outLine +BABEL_OP1_201_83929_20121206_061559_inLine +BABEL_OP1_201_83929_20121206_061559_outLine +BABEL_OP1_201_84547_20130227_041326_inLine +BABEL_OP1_201_84547_20130227_041326_outLine +BABEL_OP1_201_84715_20130429_094324_inLine +BABEL_OP1_201_84715_20130429_094324_outLine +BABEL_OP1_201_84936_20130301_073352_inLine +BABEL_OP1_201_84936_20130301_073352_outLine +BABEL_OP1_201_85010_20130206_122216_inLine +BABEL_OP1_201_85010_20130206_122216_outLine +BABEL_OP1_201_85047_20130510_055057_inLine +BABEL_OP1_201_85047_20130510_055057_outLine +BABEL_OP1_201_85647_20130511_015627_inLine +BABEL_OP1_201_85647_20130511_015627_outLine +BABEL_OP1_201_86191_20130528_045113_inLine +BABEL_OP1_201_86191_20130528_045113_outLine +BABEL_OP1_201_86191_20130528_051540_inLine +BABEL_OP1_201_86191_20130528_051540_outLine +BABEL_OP1_201_86433_20130303_035210_inLine +BABEL_OP1_201_86433_20130303_035210_outLine +BABEL_OP1_201_86467_20130221_031701_inLine +BABEL_OP1_201_86467_20130221_031701_outLine +BABEL_OP1_201_86557_20130306_054158_inLine +BABEL_OP1_201_86557_20130306_054158_outLine +BABEL_OP1_201_86635_20130227_080743_inLine +BABEL_OP1_201_86635_20130227_080743_outLine +BABEL_OP1_201_86676_20130302_034945_inLine +BABEL_OP1_201_86676_20130302_034945_outLine +BABEL_OP1_201_86888_20130301_011747_inLine +BABEL_OP1_201_86888_20130301_011747_outLine +BABEL_OP1_201_87074_20130529_072238_inLine +BABEL_OP1_201_87074_20130529_072238_outLine +BABEL_OP1_201_87179_20130414_223248_inLine +BABEL_OP1_201_87179_20130414_223248_outLine +BABEL_OP1_201_87298_20130530_035908_inLine +BABEL_OP1_201_87298_20130530_035908_outLine +BABEL_OP1_201_87313_20130228_054816_inLine +BABEL_OP1_201_87313_20130228_054816_outLine +BABEL_OP1_201_87731_20130216_084329_inLine +BABEL_OP1_201_87731_20130216_084329_outLine +BABEL_OP1_201_87796_20130531_043218_inLine +BABEL_OP1_201_87796_20130531_043218_outLine +BABEL_OP1_201_88445_20130228_100123_inLine +BABEL_OP1_201_88445_20130228_100123_outLine +BABEL_OP1_201_88661_20130305_103247_inLine +BABEL_OP1_201_88661_20130305_103247_outLine +BABEL_OP1_201_89059_20130429_001658_inLine +BABEL_OP1_201_89059_20130429_001658_outLine +BABEL_OP1_201_89877_20130602_052802_inLine +BABEL_OP1_201_89877_20130602_052802_outLine +BABEL_OP1_201_90347_20130601_020619_inLine +BABEL_OP1_201_90347_20130601_020619_outLine +BABEL_OP1_201_90777_20130530_043440_inLine +BABEL_OP1_201_90777_20130530_043440_outLine +BABEL_OP1_201_91125_20130301_044113_inLine +BABEL_OP1_201_91125_20130301_044113_outLine +BABEL_OP1_201_91891_20130306_084037_inLine +BABEL_OP1_201_91891_20130306_084037_outLine +BABEL_OP1_201_91944_20130529_030733_inLine +BABEL_OP1_201_91944_20130529_030733_outLine +BABEL_OP1_201_91977_20130228_225341_inLine +BABEL_OP1_201_91977_20130228_225341_outLine +BABEL_OP1_201_92509_20130222_064302_inLine +BABEL_OP1_201_92509_20130222_064302_outLine +BABEL_OP1_201_92557_20130428_115801_inLine +BABEL_OP1_201_92557_20130428_115801_outLine +BABEL_OP1_201_92740_20130301_044629_inLine +BABEL_OP1_201_92740_20130301_044629_outLine +BABEL_OP1_201_93222_20130127_012443_inLine +BABEL_OP1_201_93222_20130127_012443_outLine +BABEL_OP1_201_93604_20130502_071337_inLine +BABEL_OP1_201_93604_20130502_071337_outLine +BABEL_OP1_201_93964_20130511_000644_inLine +BABEL_OP1_201_93964_20130511_000644_outLine +BABEL_OP1_201_94025_20130303_091916_inLine +BABEL_OP1_201_94025_20130303_091916_outLine +BABEL_OP1_201_94316_20130503_072805_inLine +BABEL_OP1_201_94316_20130503_072805_outLine +BABEL_OP1_201_94449_20130704_033336_inLine +BABEL_OP1_201_94449_20130704_033336_outLine +BABEL_OP1_201_94487_20130502_053741_inLine +BABEL_OP1_201_94487_20130502_053741_outLine +BABEL_OP1_201_94869_20130313_052715_inLine +BABEL_OP1_201_94869_20130313_052715_outLine +BABEL_OP1_201_95446_20130430_051750_inLine +BABEL_OP1_201_95446_20130430_051750_outLine +BABEL_OP1_201_96059_20130430_034442_inLine +BABEL_OP1_201_96059_20130430_034442_outLine +BABEL_OP1_201_96376_20130704_011157_inLine +BABEL_OP1_201_96376_20130704_011157_outLine +BABEL_OP1_201_96820_20130514_032741_inLine +BABEL_OP1_201_96820_20130514_032741_outLine +BABEL_OP1_201_97363_20130528_063449_inLine +BABEL_OP1_201_97363_20130528_063449_outLine +BABEL_OP1_201_97557_20130228_004756_inLine +BABEL_OP1_201_97557_20130228_004756_outLine +BABEL_OP1_201_99202_20130521_003552_inLine +BABEL_OP1_201_99202_20130521_003552_outLine +BABEL_OP1_201_99955_20130429_001807_inLine +BABEL_OP1_201_99955_20130429_001807_outLine diff --git a/egs/babel/s5d/conf/lists/201-haitian/train.untranscribed.list b/egs/babel/s5d/conf/lists/201-haitian/train.untranscribed.list new file mode 100644 index 00000000000..33da29dd0f7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/201-haitian/train.untranscribed.list @@ -0,0 +1,270 @@ +BABEL_OP1_201_10974_20130512_073026_inLine +BABEL_OP1_201_10974_20130512_073026_outLine +BABEL_OP1_201_11352_20130501_215210_inLine +BABEL_OP1_201_11352_20130501_215210_outLine +BABEL_OP1_201_13040_20130604_010848_inLine +BABEL_OP1_201_13040_20130604_010848_outLine +BABEL_OP1_201_14158_20130301_041642_inLine +BABEL_OP1_201_14158_20130301_041642_outLine +BABEL_OP1_201_15042_20130502_000845_inLine +BABEL_OP1_201_15042_20130502_000845_outLine +BABEL_OP1_201_17573_20130313_093021_inLine +BABEL_OP1_201_17573_20130313_093021_outLine +BABEL_OP1_201_18078_20130430_095821_inLine +BABEL_OP1_201_18078_20130430_095821_outLine +BABEL_OP1_201_19120_20130405_142951_inLine +BABEL_OP1_201_19120_20130405_142951_outLine +BABEL_OP1_201_21581_20130527_033524_inLine +BABEL_OP1_201_21581_20130527_033524_outLine +BABEL_OP1_201_21581_20130527_034908_inLine +BABEL_OP1_201_21581_20130527_034908_outLine +BABEL_OP1_201_22170_20130403_030729_inLine +BABEL_OP1_201_22170_20130403_030729_outLine +BABEL_OP1_201_27478_20130502_010501_inLine +BABEL_OP1_201_27478_20130502_010501_outLine +BABEL_OP1_201_28012_20130427_041255_inLine +BABEL_OP1_201_28012_20130427_041255_outLine +BABEL_OP1_201_28585_20130426_120901_inLine +BABEL_OP1_201_28585_20130426_120901_outLine +BABEL_OP1_201_29039_20130502_123143_inLine +BABEL_OP1_201_29039_20130502_123143_outLine +BABEL_OP1_201_29404_20130428_094208_inLine +BABEL_OP1_201_29404_20130428_094208_outLine +BABEL_OP1_201_29685_20130603_224641_inLine +BABEL_OP1_201_29685_20130603_224641_outLine +BABEL_OP1_201_29777_20130430_071717_inLine +BABEL_OP1_201_29777_20130430_071717_outLine +BABEL_OP1_201_30653_20130501_222756_inLine +BABEL_OP1_201_30653_20130501_222756_outLine +BABEL_OP1_201_31182_20130415_005506_inLine +BABEL_OP1_201_31182_20130415_005506_outLine +BABEL_OP1_201_32872_20130429_221658_inLine +BABEL_OP1_201_32872_20130429_221658_outLine +BABEL_OP1_201_32959_20130323_033657_inLine +BABEL_OP1_201_32959_20130323_033657_outLine +BABEL_OP1_201_35885_20130630_115617_inLine +BABEL_OP1_201_35885_20130630_115617_outLine +BABEL_OP1_201_36059_20130404_104841_inLine +BABEL_OP1_201_36059_20130404_104841_outLine +BABEL_OP1_201_40740_20130429_011150_inLine +BABEL_OP1_201_40740_20130429_011150_outLine +BABEL_OP1_201_41493_20130312_081558_inLine +BABEL_OP1_201_41493_20130312_081558_outLine +BABEL_OP1_201_41920_20130403_050458_inLine +BABEL_OP1_201_41920_20130403_050458_outLine +BABEL_OP1_201_42231_20130306_074634_inLine +BABEL_OP1_201_42231_20130306_074634_outLine +BABEL_OP1_201_42231_20130306_075939_inLine +BABEL_OP1_201_42231_20130306_075939_outLine +BABEL_OP1_201_42600_20130527_055528_inLine +BABEL_OP1_201_42600_20130527_055528_outLine +BABEL_OP1_201_42600_20130527_060503_inLine +BABEL_OP1_201_42600_20130527_060503_outLine +BABEL_OP1_201_42718_20130429_001514_inLine +BABEL_OP1_201_42718_20130429_001514_outLine +BABEL_OP1_201_44420_20130603_050431_inLine +BABEL_OP1_201_44420_20130603_050431_outLine +BABEL_OP1_201_45140_20130429_085359_inLine +BABEL_OP1_201_45140_20130429_085359_outLine +BABEL_OP1_201_45777_20130528_001753_inLine +BABEL_OP1_201_45777_20130528_001753_outLine +BABEL_OP1_201_45908_20130430_062256_inLine +BABEL_OP1_201_45908_20130430_062256_outLine +BABEL_OP1_201_46702_20130308_094852_inLine +BABEL_OP1_201_46702_20130308_094852_outLine +BABEL_OP1_201_48200_20130428_230807_inLine +BABEL_OP1_201_48200_20130428_230807_outLine +BABEL_OP1_201_48399_20130426_031102_inLine +BABEL_OP1_201_48399_20130426_031102_outLine +BABEL_OP1_201_48758_20130415_035720_inLine +BABEL_OP1_201_48758_20130415_035720_outLine +BABEL_OP1_201_49812_20130429_013208_inLine +BABEL_OP1_201_49812_20130429_013208_outLine +BABEL_OP1_201_50745_20130501_232950_inLine +BABEL_OP1_201_50745_20130501_232950_outLine +BABEL_OP1_201_50962_20130529_005739_inLine +BABEL_OP1_201_50962_20130529_005739_outLine +BABEL_OP1_201_50962_20130529_013505_inLine +BABEL_OP1_201_50962_20130529_013505_outLine +BABEL_OP1_201_51417_20130429_013022_inLine +BABEL_OP1_201_51417_20130429_013022_outLine +BABEL_OP1_201_51417_20130429_015210_inLine +BABEL_OP1_201_51417_20130429_015210_outLine +BABEL_OP1_201_52614_20130503_045833_inLine +BABEL_OP1_201_52614_20130503_045833_outLine +BABEL_OP1_201_52614_20130503_051217_inLine +BABEL_OP1_201_52614_20130503_051217_outLine +BABEL_OP1_201_53072_20130430_114228_inLine +BABEL_OP1_201_53072_20130430_114228_outLine +BABEL_OP1_201_53419_20130630_034136_inLine +BABEL_OP1_201_53419_20130630_034136_outLine +BABEL_OP1_201_54040_20130701_030051_inLine +BABEL_OP1_201_54040_20130701_030051_outLine +BABEL_OP1_201_54923_20130512_032825_inLine +BABEL_OP1_201_54923_20130512_032825_outLine +BABEL_OP1_201_55013_20130503_054608_inLine +BABEL_OP1_201_55013_20130503_054608_outLine +BABEL_OP1_201_56198_20130529_062601_inLine +BABEL_OP1_201_56198_20130529_062601_outLine +BABEL_OP1_201_56370_20130406_025411_inLine +BABEL_OP1_201_56370_20130406_025411_outLine +BABEL_OP1_201_56429_20130528_053349_inLine +BABEL_OP1_201_56429_20130528_053349_outLine +BABEL_OP1_201_56684_20130430_033812_inLine +BABEL_OP1_201_56684_20130430_033812_outLine +BABEL_OP1_201_57067_20130428_015420_inLine +BABEL_OP1_201_57067_20130428_015420_outLine +BABEL_OP1_201_57654_20130604_021427_inLine +BABEL_OP1_201_57654_20130604_021427_outLine +BABEL_OP1_201_58815_20130701_072119_inLine +BABEL_OP1_201_58815_20130701_072119_outLine +BABEL_OP1_201_58850_20130529_032635_inLine +BABEL_OP1_201_58850_20130529_032635_outLine +BABEL_OP1_201_59993_20130529_074044_inLine +BABEL_OP1_201_59993_20130529_074044_outLine +BABEL_OP1_201_60836_20130603_224729_inLine +BABEL_OP1_201_60836_20130603_224729_outLine +BABEL_OP1_201_62430_20130428_025620_inLine +BABEL_OP1_201_62430_20130428_025620_outLine +BABEL_OP1_201_62852_20130303_042827_inLine +BABEL_OP1_201_62852_20130303_042827_outLine +BABEL_OP1_201_63220_20130227_082602_inLine +BABEL_OP1_201_63220_20130227_082602_outLine +BABEL_OP1_201_63523_20130501_123402_inLine +BABEL_OP1_201_63523_20130501_123402_outLine +BABEL_OP1_201_64796_20130131_073304_inLine +BABEL_OP1_201_64796_20130131_073304_outLine +BABEL_OP1_201_65298_20130427_075419_inLine +BABEL_OP1_201_65298_20130427_075419_outLine +BABEL_OP1_201_66026_20130414_055206_inLine +BABEL_OP1_201_66026_20130414_055206_outLine +BABEL_OP1_201_66837_20130325_095909_inLine +BABEL_OP1_201_66837_20130325_095909_outLine +BABEL_OP1_201_66959_20130326_091943_inLine +BABEL_OP1_201_66959_20130326_091943_outLine +BABEL_OP1_201_67373_20130528_075634_inLine +BABEL_OP1_201_67373_20130528_075634_outLine +BABEL_OP1_201_71038_20130430_020855_inLine +BABEL_OP1_201_71038_20130430_020855_outLine +BABEL_OP1_201_71067_20130228_114156_inLine +BABEL_OP1_201_71067_20130228_114156_outLine +BABEL_OP1_201_71282_20130428_011003_inLine +BABEL_OP1_201_71282_20130428_011003_outLine +BABEL_OP1_201_71333_20130527_094400_inLine +BABEL_OP1_201_71333_20130527_094400_outLine +BABEL_OP1_201_71704_20130604_005411_inLine +BABEL_OP1_201_71704_20130604_005411_outLine +BABEL_OP1_201_71780_20130528_070831_inLine +BABEL_OP1_201_71780_20130528_070831_outLine +BABEL_OP1_201_73119_20130529_084814_inLine +BABEL_OP1_201_73119_20130529_084814_outLine +BABEL_OP1_201_74111_20130415_122650_inLine +BABEL_OP1_201_74111_20130415_122650_outLine +BABEL_OP1_201_74253_20130324_094324_inLine +BABEL_OP1_201_74253_20130324_094324_outLine +BABEL_OP1_201_74455_20130429_223748_inLine +BABEL_OP1_201_74455_20130429_223748_outLine +BABEL_OP1_201_75261_20130428_072427_inLine +BABEL_OP1_201_75261_20130428_072427_outLine +BABEL_OP1_201_76372_20130406_002653_inLine +BABEL_OP1_201_76372_20130406_002653_outLine +BABEL_OP1_201_79107_20130704_020050_inLine +BABEL_OP1_201_79107_20130704_020050_outLine +BABEL_OP1_201_80655_20130429_014151_inLine +BABEL_OP1_201_80655_20130429_014151_outLine +BABEL_OP1_201_80721_20130324_011204_inLine +BABEL_OP1_201_80721_20130324_011204_outLine +BABEL_OP1_201_81213_20130604_060123_inLine +BABEL_OP1_201_81213_20130604_060123_outLine +BABEL_OP1_201_82361_20130429_234744_inLine +BABEL_OP1_201_82361_20130429_234744_outLine +BABEL_OP1_201_82966_20130702_014841_inLine +BABEL_OP1_201_82966_20130702_014841_outLine +BABEL_OP1_201_83062_20130428_080508_inLine +BABEL_OP1_201_83062_20130428_080508_outLine +BABEL_OP1_201_83062_20130428_081244_inLine +BABEL_OP1_201_83062_20130428_081244_outLine +BABEL_OP1_201_83545_20130503_013151_inLine +BABEL_OP1_201_83545_20130503_013151_outLine +BABEL_OP1_201_84061_20130528_013733_inLine +BABEL_OP1_201_84061_20130528_013733_outLine +BABEL_OP1_201_85028_20130413_093438_inLine +BABEL_OP1_201_85028_20130413_093438_outLine +BABEL_OP1_201_85248_20130429_023317_inLine +BABEL_OP1_201_85248_20130429_023317_outLine +BABEL_OP1_201_87693_20130528_083347_inLine +BABEL_OP1_201_87693_20130528_083347_outLine +BABEL_OP1_201_88686_20130306_035740_inLine +BABEL_OP1_201_88686_20130306_035740_outLine +BABEL_OP1_201_88686_20130306_040902_inLine +BABEL_OP1_201_88686_20130306_040902_outLine +BABEL_OP1_201_89330_20130630_075430_inLine +BABEL_OP1_201_89330_20130630_075430_outLine +BABEL_OP1_201_89330_20130630_075936_inLine +BABEL_OP1_201_89330_20130630_075936_outLine +BABEL_OP1_201_89372_20130312_074918_inLine +BABEL_OP1_201_89372_20130312_074918_outLine +BABEL_OP1_201_89560_20130415_124517_inLine +BABEL_OP1_201_89560_20130415_124517_outLine +BABEL_OP1_201_89665_20130603_230819_inLine +BABEL_OP1_201_89665_20130603_230819_outLine +BABEL_OP1_201_89794_20130301_115507_inLine +BABEL_OP1_201_89794_20130301_115507_outLine +BABEL_OP1_201_89794_20130303_105823_inLine +BABEL_OP1_201_89794_20130303_105823_outLine +BABEL_OP1_201_90417_20130520_032334_inLine +BABEL_OP1_201_90417_20130520_032334_outLine +BABEL_OP1_201_90935_20130604_012414_inLine +BABEL_OP1_201_90935_20130604_012414_outLine +BABEL_OP1_201_91372_20130704_010321_inLine +BABEL_OP1_201_91372_20130704_010321_outLine +BABEL_OP1_201_91581_20130313_100349_inLine +BABEL_OP1_201_91581_20130313_100349_outLine +BABEL_OP1_201_91825_20130226_051913_inLine +BABEL_OP1_201_91825_20130226_051913_outLine +BABEL_OP1_201_92096_20130406_072054_inLine +BABEL_OP1_201_92096_20130406_072054_outLine +BABEL_OP1_201_92356_20130428_015350_inLine +BABEL_OP1_201_92356_20130428_015350_outLine +BABEL_OP1_201_92757_20130604_084623_inLine +BABEL_OP1_201_92757_20130604_084623_outLine +BABEL_OP1_201_92886_20130528_023229_inLine +BABEL_OP1_201_92886_20130528_023229_outLine +BABEL_OP1_201_92941_20130527_095346_inLine +BABEL_OP1_201_92941_20130527_095346_outLine +BABEL_OP1_201_93320_20130630_082741_inLine +BABEL_OP1_201_93320_20130630_082741_outLine +BABEL_OP1_201_93475_20130530_101306_inLine +BABEL_OP1_201_93475_20130530_101306_outLine +BABEL_OP1_201_93946_20130406_073121_inLine +BABEL_OP1_201_93946_20130406_073121_outLine +BABEL_OP1_201_94044_20130429_080249_inLine +BABEL_OP1_201_94044_20130429_080249_outLine +BABEL_OP1_201_95467_20130630_224512_inLine +BABEL_OP1_201_95467_20130630_224512_outLine +BABEL_OP1_201_96088_20130429_045832_inLine +BABEL_OP1_201_96088_20130429_045832_outLine +BABEL_OP1_201_96446_20130426_023651_inLine +BABEL_OP1_201_96446_20130426_023651_outLine +BABEL_OP1_201_97097_20130502_025744_inLine +BABEL_OP1_201_97097_20130502_025744_outLine +BABEL_OP1_201_97264_20130429_083940_inLine +BABEL_OP1_201_97264_20130429_083940_outLine +BABEL_OP1_201_97988_20130320_082635_inLine +BABEL_OP1_201_97988_20130320_082635_outLine +BABEL_OP1_201_98506_20130430_082503_inLine +BABEL_OP1_201_98506_20130430_082503_outLine +BABEL_OP1_201_98678_20130403_061826_inLine +BABEL_OP1_201_98678_20130403_061826_outLine +BABEL_OP1_201_98909_20130529_002845_inLine +BABEL_OP1_201_98909_20130529_002845_outLine +BABEL_OP1_201_98909_20130529_003625_inLine +BABEL_OP1_201_98909_20130529_003625_outLine +BABEL_OP1_201_98909_20130529_004310_inLine +BABEL_OP1_201_98909_20130529_004310_outLine +BABEL_OP1_201_98909_20130529_004845_inLine +BABEL_OP1_201_98909_20130529_004845_outLine +BABEL_OP1_201_99516_20130319_061728_inLine +BABEL_OP1_201_99516_20130319_061728_outLine +BABEL_OP1_201_99516_20130320_023645_inLine +BABEL_OP1_201_99516_20130320_023645_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/dev.list b/egs/babel/s5d/conf/lists/202-swahili/dev.list new file mode 100644 index 00000000000..21ae20c66d7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/dev.list @@ -0,0 +1,142 @@ +BABEL_OP2_202_10524_20131009_200043_inLine +BABEL_OP2_202_10524_20131009_200043_outLine +BABEL_OP2_202_12635_20131101_212012_inLine +BABEL_OP2_202_12635_20131101_212012_outLine +BABEL_OP2_202_12635_20131101_213218_inLine +BABEL_OP2_202_12635_20131101_213218_outLine +BABEL_OP2_202_14814_20140205_210842_inLine +BABEL_OP2_202_14814_20140205_210842_outLine +BABEL_OP2_202_15420_20140210_010333_inLine +BABEL_OP2_202_15420_20140210_010333_outLine +BABEL_OP2_202_16249_20131202_232723_inLine +BABEL_OP2_202_16249_20131202_232723_outLine +BABEL_OP2_202_17115_20140218_210921_inLine +BABEL_OP2_202_17115_20140218_210921_outLine +BABEL_OP2_202_18766_20140218_222017_inLine +BABEL_OP2_202_18766_20140218_222017_outLine +BABEL_OP2_202_24239_20140206_191516_inLine +BABEL_OP2_202_24239_20140206_191516_outLine +BABEL_OP2_202_24290_20140219_000423_inLine +BABEL_OP2_202_24290_20140219_000423_outLine +BABEL_OP2_202_25085_20140219_185114_inLine +BABEL_OP2_202_25085_20140219_185114_outLine +BABEL_OP2_202_25242_20131203_015232_inLine +BABEL_OP2_202_25242_20131203_015232_outLine +BABEL_OP2_202_27478_20140209_224101_inLine +BABEL_OP2_202_27478_20140209_224101_outLine +BABEL_OP2_202_29633_20131009_175514_inLine +BABEL_OP2_202_29633_20131009_175514_outLine +BABEL_OP2_202_29663_20131208_035816_inLine +BABEL_OP2_202_29663_20131208_035816_outLine +BABEL_OP2_202_32287_20131207_203757_inLine +BABEL_OP2_202_32287_20131207_203757_outLine +BABEL_OP2_202_33273_20130219_205419_inLine +BABEL_OP2_202_33273_20130219_205419_outLine +BABEL_OP2_202_33273_20130219_224915_inLine +BABEL_OP2_202_33273_20130219_224915_outLine +BABEL_OP2_202_34197_20121228_201800_inLine +BABEL_OP2_202_34197_20121228_201800_outLine +BABEL_OP2_202_38588_20130228_211322_inLine +BABEL_OP2_202_38588_20130228_211322_outLine +BABEL_OP2_202_39893_20140115_023429_inLine +BABEL_OP2_202_39893_20140115_023429_outLine +BABEL_OP2_202_44309_20140220_184116_inLine +BABEL_OP2_202_44309_20140220_184116_outLine +BABEL_OP2_202_44478_20131011_041636_inLine +BABEL_OP2_202_44478_20131011_041636_outLine +BABEL_OP2_202_45459_20131012_022245_inLine +BABEL_OP2_202_45459_20131012_022245_outLine +BABEL_OP2_202_46169_20131128_183232_inLine +BABEL_OP2_202_46169_20131128_183232_outLine +BABEL_OP2_202_46169_20131128_184600_inLine +BABEL_OP2_202_46169_20131128_184600_outLine +BABEL_OP2_202_46681_20130109_191412_inLine +BABEL_OP2_202_46681_20130109_191412_outLine +BABEL_OP2_202_47405_20131215_233528_inLine +BABEL_OP2_202_47405_20131215_233528_outLine +BABEL_OP2_202_48844_20130108_190416_inLine +BABEL_OP2_202_48844_20130108_190416_outLine +BABEL_OP2_202_52265_20140123_235252_inLine +BABEL_OP2_202_52265_20140123_235252_outLine +BABEL_OP2_202_53957_20131031_012125_inLine +BABEL_OP2_202_53957_20131031_012125_outLine +BABEL_OP2_202_54046_20140121_184347_inLine +BABEL_OP2_202_54046_20140121_184347_outLine +BABEL_OP2_202_55042_20131217_033729_inLine +BABEL_OP2_202_55042_20131217_033729_outLine +BABEL_OP2_202_55106_20131215_030617_inLine +BABEL_OP2_202_55106_20131215_030617_outLine +BABEL_OP2_202_55902_20140121_230205_inLine +BABEL_OP2_202_55902_20140121_230205_outLine +BABEL_OP2_202_59091_20140130_225624_inLine +BABEL_OP2_202_59091_20140130_225624_outLine +BABEL_OP2_202_59549_20131003_203701_inLine +BABEL_OP2_202_59549_20131003_203701_outLine +BABEL_OP2_202_59549_20131003_204655_inLine +BABEL_OP2_202_59549_20131003_204655_outLine +BABEL_OP2_202_60650_20131126_234235_inLine +BABEL_OP2_202_60650_20131126_234235_outLine +BABEL_OP2_202_61440_20140128_015556_inLine +BABEL_OP2_202_61440_20140128_015556_outLine +BABEL_OP2_202_63084_20130801_014407_inLine +BABEL_OP2_202_63084_20130801_014407_outLine +BABEL_OP2_202_63084_20130801_015957_inLine +BABEL_OP2_202_63084_20130801_015957_outLine +BABEL_OP2_202_63336_20140129_004138_inLine +BABEL_OP2_202_63336_20140129_004138_outLine +BABEL_OP2_202_63484_20140128_234153_inLine +BABEL_OP2_202_63484_20140128_234153_outLine +BABEL_OP2_202_63604_20121231_193706_inLine +BABEL_OP2_202_63604_20121231_193706_outLine +BABEL_OP2_202_63787_20130108_202518_inLine +BABEL_OP2_202_63787_20130108_202518_outLine +BABEL_OP2_202_63787_20130108_203416_inLine +BABEL_OP2_202_63787_20130108_203416_outLine +BABEL_OP2_202_66177_20140201_213827_inLine +BABEL_OP2_202_66177_20140201_213827_outLine +BABEL_OP2_202_66822_20130219_222318_inLine +BABEL_OP2_202_66822_20130219_222318_outLine +BABEL_OP2_202_66822_20130219_225918_inLine +BABEL_OP2_202_66822_20130219_225918_outLine +BABEL_OP2_202_68384_20131031_003533_inLine +BABEL_OP2_202_68384_20131031_003533_outLine +BABEL_OP2_202_68924_20130924_231821_inLine +BABEL_OP2_202_68924_20130924_231821_outLine +BABEL_OP2_202_69964_20131012_170534_inLine +BABEL_OP2_202_69964_20131012_170534_outLine +BABEL_OP2_202_72040_20131002_213605_inLine +BABEL_OP2_202_72040_20131002_213605_outLine +BABEL_OP2_202_73258_20130215_190454_inLine +BABEL_OP2_202_73258_20130215_190454_outLine +BABEL_OP2_202_73301_20140226_185528_inLine +BABEL_OP2_202_73301_20140226_185528_outLine +BABEL_OP2_202_73819_20130911_163458_inLine +BABEL_OP2_202_73819_20130911_163458_outLine +BABEL_OP2_202_73819_20130927_003321_inLine +BABEL_OP2_202_73819_20130927_003321_outLine +BABEL_OP2_202_75993_20140115_210258_inLine +BABEL_OP2_202_75993_20140115_210258_outLine +BABEL_OP2_202_76756_20130417_204823_inLine +BABEL_OP2_202_76756_20130417_204823_outLine +BABEL_OP2_202_76756_20130417_210400_inLine +BABEL_OP2_202_76756_20130417_210400_outLine +BABEL_OP2_202_77990_20131007_063102_inLine +BABEL_OP2_202_77990_20131007_063102_outLine +BABEL_OP2_202_82637_20121227_193227_inLine +BABEL_OP2_202_82637_20121227_193227_outLine +BABEL_OP2_202_82637_20121227_205425_inLine +BABEL_OP2_202_82637_20121227_205425_outLine +BABEL_OP2_202_84177_20131208_021104_inLine +BABEL_OP2_202_84177_20131208_021104_outLine +BABEL_OP2_202_88260_20130227_194941_inLine +BABEL_OP2_202_88260_20130227_194941_outLine +BABEL_OP2_202_88661_20130801_192922_inLine +BABEL_OP2_202_88661_20130801_192922_outLine +BABEL_OP2_202_90080_20140319_222809_inLine +BABEL_OP2_202_90080_20140319_222809_outLine +BABEL_OP2_202_92740_20130923_235638_inLine +BABEL_OP2_202_92740_20130923_235638_outLine +BABEL_OP2_202_98311_20130109_191639_inLine +BABEL_OP2_202_98311_20130109_191639_outLine +BABEL_OP2_202_98311_20130109_195922_inLine +BABEL_OP2_202_98311_20130109_195922_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/eval.list b/egs/babel/s5d/conf/lists/202-swahili/eval.list new file mode 100644 index 00000000000..8fb4fe490bf --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/eval.list @@ -0,0 +1,963 @@ +BABEL_OP2_202_10019_20130928_235503_inLine +BABEL_OP2_202_10019_20130928_235503_outLine +BABEL_OP2_202_10416_20130215_183832_inLine +BABEL_OP2_202_10416_20130215_183832_outLine +BABEL_OP2_202_11681_20131005_155822_inLine +BABEL_OP2_202_11681_20131005_155822_outLine +BABEL_OP2_202_11723_20131130_201430_inLine +BABEL_OP2_202_11723_20131130_201430_outLine +BABEL_OP2_202_11797_20130104_222532_inLine +BABEL_OP2_202_11797_20130104_222532_outLine +BABEL_OP2_202_12220_20130312_022037_inLine +BABEL_OP2_202_12220_20130312_022037_outLine +BABEL_OP2_202_12321_20140210_015215_inLine +BABEL_OP2_202_12321_20140210_015215_outLine +BABEL_OP2_202_12606_20131010_030508_inLine +BABEL_OP2_202_12606_20131010_030508_outLine +BABEL_OP2_202_13040_20131005_180024_inLine +BABEL_OP2_202_13040_20131005_180024_outLine +BABEL_OP2_202_13909_20140207_075853_inLine +BABEL_OP2_202_13909_20140207_075853_outLine +BABEL_OP2_202_13929_20140205_042603_inLine +BABEL_OP2_202_13929_20140205_042603_outLine +BABEL_OP2_202_14137_20131219_015746_inLine +BABEL_OP2_202_14137_20131219_015746_outLine +BABEL_OP2_202_14141_20131009_061849_inLine +BABEL_OP2_202_14141_20131009_061849_outLine +BABEL_OP2_202_14179_20130926_175610_inLine +BABEL_OP2_202_14179_20130926_175610_outLine +BABEL_OP2_202_14228_20131017_195830_inLine +BABEL_OP2_202_14228_20131017_195830_outLine +BABEL_OP2_202_14229_20140208_071149_inLine +BABEL_OP2_202_14229_20140208_071149_outLine +BABEL_OP2_202_14440_20130503_203601_inLine +BABEL_OP2_202_14440_20130503_203601_outLine +BABEL_OP2_202_14440_20130503_204507_inLine +BABEL_OP2_202_14440_20130503_204507_outLine +BABEL_OP2_202_14537_20131016_202630_inLine +BABEL_OP2_202_14537_20131016_202630_outLine +BABEL_OP2_202_14725_20130104_004026_inLine +BABEL_OP2_202_14725_20130104_004026_outLine +BABEL_OP2_202_14807_20140207_040450_inLine +BABEL_OP2_202_14807_20140207_040450_outLine +BABEL_OP2_202_15902_20130108_191503_inLine +BABEL_OP2_202_15902_20130108_191503_outLine +BABEL_OP2_202_16056_20130105_232626_inLine +BABEL_OP2_202_16056_20130105_232626_outLine +BABEL_OP2_202_16056_20130105_235157_inLine +BABEL_OP2_202_16056_20130105_235157_outLine +BABEL_OP2_202_16407_20131203_231519_inLine +BABEL_OP2_202_16407_20131203_231519_outLine +BABEL_OP2_202_16467_20131101_192502_inLine +BABEL_OP2_202_16467_20131101_192502_outLine +BABEL_OP2_202_16475_20130222_200416_inLine +BABEL_OP2_202_16475_20130222_200416_outLine +BABEL_OP2_202_16787_20130220_000429_inLine +BABEL_OP2_202_16787_20130220_000429_outLine +BABEL_OP2_202_17280_20130312_211445_inLine +BABEL_OP2_202_17280_20130312_211445_outLine +BABEL_OP2_202_17440_20131018_012538_inLine +BABEL_OP2_202_17440_20131018_012538_outLine +BABEL_OP2_202_17511_20140205_051449_inLine +BABEL_OP2_202_17511_20140205_051449_outLine +BABEL_OP2_202_17751_20140207_220944_inLine +BABEL_OP2_202_17751_20140207_220944_outLine +BABEL_OP2_202_17881_20131010_011054_inLine +BABEL_OP2_202_17881_20131010_011054_outLine +BABEL_OP2_202_17923_20131004_055753_inLine +BABEL_OP2_202_17923_20131004_055753_outLine +BABEL_OP2_202_18291_20140207_215404_inLine +BABEL_OP2_202_18291_20140207_215404_outLine +BABEL_OP2_202_18380_20130213_000457_inLine +BABEL_OP2_202_18380_20130213_000457_outLine +BABEL_OP2_202_18731_20131128_043434_inLine +BABEL_OP2_202_18731_20131128_043434_outLine +BABEL_OP2_202_19440_20131129_002711_inLine +BABEL_OP2_202_19440_20131129_002711_outLine +BABEL_OP2_202_19444_20131128_200206_inLine +BABEL_OP2_202_19444_20131128_200206_outLine +BABEL_OP2_202_19461_20131129_203023_inLine +BABEL_OP2_202_19461_20131129_203023_outLine +BABEL_OP2_202_19545_20130927_190707_inLine +BABEL_OP2_202_19545_20130927_190707_outLine +BABEL_OP2_202_19621_20130930_034444_inLine +BABEL_OP2_202_19621_20130930_034444_outLine +BABEL_OP2_202_19663_20130220_221050_inLine +BABEL_OP2_202_19663_20130220_221050_outLine +BABEL_OP2_202_19699_20131127_214845_inLine +BABEL_OP2_202_19699_20131127_214845_outLine +BABEL_OP2_202_20738_20131029_183614_inLine +BABEL_OP2_202_20738_20131029_183614_outLine +BABEL_OP2_202_20896_20131220_001523_inLine +BABEL_OP2_202_20896_20131220_001523_outLine +BABEL_OP2_202_20985_20130920_011520_inLine +BABEL_OP2_202_20985_20130920_011520_outLine +BABEL_OP2_202_21029_20131004_003216_inLine +BABEL_OP2_202_21029_20131004_003216_outLine +BABEL_OP2_202_21029_20131004_003949_inLine +BABEL_OP2_202_21029_20131004_003949_outLine +BABEL_OP2_202_21393_20140209_001300_inLine +BABEL_OP2_202_21393_20140209_001300_outLine +BABEL_OP2_202_21435_20131010_024821_inLine +BABEL_OP2_202_21435_20131010_024821_outLine +BABEL_OP2_202_21794_20130219_010105_inLine +BABEL_OP2_202_21794_20130219_010105_outLine +BABEL_OP2_202_22021_20131203_222315_inLine +BABEL_OP2_202_22021_20131203_222315_outLine +BABEL_OP2_202_22021_20131203_223002_inLine +BABEL_OP2_202_22021_20131203_223002_outLine +BABEL_OP2_202_22321_20130104_190713_inLine +BABEL_OP2_202_22321_20130104_190713_outLine +BABEL_OP2_202_22591_20131212_031002_inLine +BABEL_OP2_202_22591_20131212_031002_outLine +BABEL_OP2_202_22641_20131011_032157_inLine +BABEL_OP2_202_22641_20131011_032157_outLine +BABEL_OP2_202_23260_20131011_024723_inLine +BABEL_OP2_202_23260_20131011_024723_outLine +BABEL_OP2_202_23355_20131128_005023_inLine +BABEL_OP2_202_23355_20131128_005023_outLine +BABEL_OP2_202_23505_20130107_235621_inLine +BABEL_OP2_202_23505_20130107_235621_outLine +BABEL_OP2_202_23700_20131202_222611_inLine +BABEL_OP2_202_23700_20131202_222611_outLine +BABEL_OP2_202_23731_20130930_020336_inLine +BABEL_OP2_202_23731_20130930_020336_outLine +BABEL_OP2_202_23893_20140207_234800_inLine +BABEL_OP2_202_23893_20140207_234800_outLine +BABEL_OP2_202_23983_20131012_000311_inLine +BABEL_OP2_202_23983_20131012_000311_outLine +BABEL_OP2_202_23995_20131101_185025_inLine +BABEL_OP2_202_23995_20131101_185025_outLine +BABEL_OP2_202_24044_20140108_215521_inLine +BABEL_OP2_202_24044_20140108_215521_outLine +BABEL_OP2_202_24044_20140108_220416_inLine +BABEL_OP2_202_24044_20140108_220416_outLine +BABEL_OP2_202_24221_20131120_203727_inLine +BABEL_OP2_202_24221_20131120_203727_outLine +BABEL_OP2_202_24231_20131012_020132_inLine +BABEL_OP2_202_24231_20131012_020132_outLine +BABEL_OP2_202_24270_20130111_201422_inLine +BABEL_OP2_202_24270_20130111_201422_outLine +BABEL_OP2_202_24323_20130221_203951_inLine +BABEL_OP2_202_24323_20130221_203951_outLine +BABEL_OP2_202_24924_20140207_235730_inLine +BABEL_OP2_202_24924_20140207_235730_outLine +BABEL_OP2_202_25012_20131201_002441_inLine +BABEL_OP2_202_25012_20131201_002441_outLine +BABEL_OP2_202_25961_20130103_204145_inLine +BABEL_OP2_202_25961_20130103_204145_outLine +BABEL_OP2_202_26869_20131216_035718_inLine +BABEL_OP2_202_26869_20131216_035718_outLine +BABEL_OP2_202_28190_20131101_213802_inLine +BABEL_OP2_202_28190_20131101_213802_outLine +BABEL_OP2_202_28422_20130924_010422_inLine +BABEL_OP2_202_28422_20130924_010422_outLine +BABEL_OP2_202_28775_20131004_012212_inLine +BABEL_OP2_202_28775_20131004_012212_outLine +BABEL_OP2_202_28945_20131218_230914_inLine +BABEL_OP2_202_28945_20131218_230914_outLine +BABEL_OP2_202_28945_20131218_232558_inLine +BABEL_OP2_202_28945_20131218_232558_outLine +BABEL_OP2_202_29072_20130930_024744_inLine +BABEL_OP2_202_29072_20130930_024744_outLine +BABEL_OP2_202_29135_20121228_185350_inLine +BABEL_OP2_202_29135_20121228_185350_outLine +BABEL_OP2_202_29208_20130220_203235_inLine +BABEL_OP2_202_29208_20130220_203235_outLine +BABEL_OP2_202_29352_20140209_224923_inLine +BABEL_OP2_202_29352_20140209_224923_outLine +BABEL_OP2_202_29416_20131031_232830_inLine +BABEL_OP2_202_29416_20131031_232830_outLine +BABEL_OP2_202_29643_20131016_010339_inLine +BABEL_OP2_202_29643_20131016_010339_outLine +BABEL_OP2_202_30013_20130930_190028_inLine +BABEL_OP2_202_30013_20130930_190028_outLine +BABEL_OP2_202_30058_20131009_163633_inLine +BABEL_OP2_202_30058_20131009_163633_outLine +BABEL_OP2_202_30180_20130311_225750_inLine +BABEL_OP2_202_30180_20130311_225750_outLine +BABEL_OP2_202_30250_20121228_195004_inLine +BABEL_OP2_202_30250_20121228_195004_outLine +BABEL_OP2_202_30250_20121228_195937_inLine +BABEL_OP2_202_30250_20121228_195937_outLine +BABEL_OP2_202_30497_20131010_013817_inLine +BABEL_OP2_202_30497_20131010_013817_outLine +BABEL_OP2_202_30720_20140204_222643_inLine +BABEL_OP2_202_30720_20140204_222643_outLine +BABEL_OP2_202_31074_20131211_235228_inLine +BABEL_OP2_202_31074_20131211_235228_outLine +BABEL_OP2_202_31484_20130912_200823_inLine +BABEL_OP2_202_31484_20130912_200823_outLine +BABEL_OP2_202_31668_20131128_224714_inLine +BABEL_OP2_202_31668_20131128_224714_outLine +BABEL_OP2_202_31992_20130108_181649_inLine +BABEL_OP2_202_31992_20130108_181649_outLine +BABEL_OP2_202_32244_20131012_215830_inLine +BABEL_OP2_202_32244_20131012_215830_outLine +BABEL_OP2_202_32708_20131002_201520_inLine +BABEL_OP2_202_32708_20131002_201520_outLine +BABEL_OP2_202_32727_20131218_221722_inLine +BABEL_OP2_202_32727_20131218_221722_outLine +BABEL_OP2_202_32832_20140108_220009_inLine +BABEL_OP2_202_32832_20140108_220009_outLine +BABEL_OP2_202_32861_20140109_212532_inLine +BABEL_OP2_202_32861_20140109_212532_outLine +BABEL_OP2_202_33111_20131009_060839_inLine +BABEL_OP2_202_33111_20131009_060839_outLine +BABEL_OP2_202_33355_20130107_235044_inLine +BABEL_OP2_202_33355_20130107_235044_outLine +BABEL_OP2_202_33672_20131005_004220_inLine +BABEL_OP2_202_33672_20131005_004220_outLine +BABEL_OP2_202_33933_20131129_215148_inLine +BABEL_OP2_202_33933_20131129_215148_outLine +BABEL_OP2_202_34208_20131203_234204_inLine +BABEL_OP2_202_34208_20131203_234204_outLine +BABEL_OP2_202_34328_20130212_200451_inLine +BABEL_OP2_202_34328_20130212_200451_outLine +BABEL_OP2_202_34336_20131001_231731_inLine +BABEL_OP2_202_34336_20131001_231731_outLine +BABEL_OP2_202_34477_20131001_021709_inLine +BABEL_OP2_202_34477_20131001_021709_outLine +BABEL_OP2_202_34679_20131004_003331_inLine +BABEL_OP2_202_34679_20131004_003331_outLine +BABEL_OP2_202_34899_20140222_024004_inLine +BABEL_OP2_202_34899_20140222_024004_outLine +BABEL_OP2_202_35420_20131129_013142_inLine +BABEL_OP2_202_35420_20131129_013142_outLine +BABEL_OP2_202_35583_20131220_024235_inLine +BABEL_OP2_202_35583_20131220_024235_outLine +BABEL_OP2_202_36039_20131009_024333_inLine +BABEL_OP2_202_36039_20131009_024333_outLine +BABEL_OP2_202_36059_20131009_055135_inLine +BABEL_OP2_202_36059_20131009_055135_outLine +BABEL_OP2_202_36341_20121228_171758_inLine +BABEL_OP2_202_36341_20121228_171758_outLine +BABEL_OP2_202_36505_20131029_215901_inLine +BABEL_OP2_202_36505_20131029_215901_outLine +BABEL_OP2_202_36632_20131216_000901_inLine +BABEL_OP2_202_36632_20131216_000901_outLine +BABEL_OP2_202_36900_20140114_232052_inLine +BABEL_OP2_202_36900_20140114_232052_outLine +BABEL_OP2_202_37007_20140115_001317_inLine +BABEL_OP2_202_37007_20140115_001317_outLine +BABEL_OP2_202_37228_20140114_192926_inLine +BABEL_OP2_202_37228_20140114_192926_outLine +BABEL_OP2_202_37594_20131130_000553_inLine +BABEL_OP2_202_37594_20131130_000553_outLine +BABEL_OP2_202_38125_20131012_003034_inLine +BABEL_OP2_202_38125_20131012_003034_outLine +BABEL_OP2_202_38340_20131002_224306_inLine +BABEL_OP2_202_38340_20131002_224306_outLine +BABEL_OP2_202_38431_20140115_013439_inLine +BABEL_OP2_202_38431_20140115_013439_outLine +BABEL_OP2_202_38664_20130315_010258_inLine +BABEL_OP2_202_38664_20130315_010258_outLine +BABEL_OP2_202_38741_20131003_014930_inLine +BABEL_OP2_202_38741_20131003_014930_outLine +BABEL_OP2_202_38878_20130422_220726_inLine +BABEL_OP2_202_38878_20130422_220726_outLine +BABEL_OP2_202_38979_20131129_213001_inLine +BABEL_OP2_202_38979_20131129_213001_outLine +BABEL_OP2_202_39059_20140114_231114_inLine +BABEL_OP2_202_39059_20140114_231114_outLine +BABEL_OP2_202_39099_20131012_024904_inLine +BABEL_OP2_202_39099_20131012_024904_outLine +BABEL_OP2_202_39277_20131127_183847_inLine +BABEL_OP2_202_39277_20131127_183847_outLine +BABEL_OP2_202_39848_20130218_212103_inLine +BABEL_OP2_202_39848_20130218_212103_outLine +BABEL_OP2_202_39927_20131220_231538_inLine +BABEL_OP2_202_39927_20131220_231538_outLine +BABEL_OP2_202_40092_20131216_000009_inLine +BABEL_OP2_202_40092_20131216_000009_outLine +BABEL_OP2_202_40196_20140115_000031_inLine +BABEL_OP2_202_40196_20140115_000031_outLine +BABEL_OP2_202_40713_20131002_202108_inLine +BABEL_OP2_202_40713_20131002_202108_outLine +BABEL_OP2_202_41097_20130425_211559_inLine +BABEL_OP2_202_41097_20130425_211559_outLine +BABEL_OP2_202_41100_20130109_000954_inLine +BABEL_OP2_202_41100_20130109_000954_outLine +BABEL_OP2_202_41109_20140114_212001_inLine +BABEL_OP2_202_41109_20140114_212001_outLine +BABEL_OP2_202_41272_20131011_014240_inLine +BABEL_OP2_202_41272_20131011_014240_outLine +BABEL_OP2_202_41400_20140221_225008_inLine +BABEL_OP2_202_41400_20140221_225008_outLine +BABEL_OP2_202_41682_20131128_174746_inLine +BABEL_OP2_202_41682_20131128_174746_outLine +BABEL_OP2_202_41685_20131128_221337_inLine +BABEL_OP2_202_41685_20131128_221337_outLine +BABEL_OP2_202_41692_20131013_044515_inLine +BABEL_OP2_202_41692_20131013_044515_outLine +BABEL_OP2_202_41741_20130108_182526_inLine +BABEL_OP2_202_41741_20130108_182526_outLine +BABEL_OP2_202_41920_20130103_233550_inLine +BABEL_OP2_202_41920_20130103_233550_outLine +BABEL_OP2_202_42497_20131003_225108_inLine +BABEL_OP2_202_42497_20131003_225108_outLine +BABEL_OP2_202_42848_20131015_223830_inLine +BABEL_OP2_202_42848_20131015_223830_outLine +BABEL_OP2_202_42883_20131016_211736_inLine +BABEL_OP2_202_42883_20131016_211736_outLine +BABEL_OP2_202_43074_20140227_192107_inLine +BABEL_OP2_202_43074_20140227_192107_outLine +BABEL_OP2_202_43368_20130930_024429_inLine +BABEL_OP2_202_43368_20130930_024429_outLine +BABEL_OP2_202_43388_20130215_192049_inLine +BABEL_OP2_202_43388_20130215_192049_outLine +BABEL_OP2_202_43588_20131011_193321_inLine +BABEL_OP2_202_43588_20131011_193321_outLine +BABEL_OP2_202_43789_20130213_194416_inLine +BABEL_OP2_202_43789_20130213_194416_outLine +BABEL_OP2_202_44114_20140221_202130_inLine +BABEL_OP2_202_44114_20140221_202130_outLine +BABEL_OP2_202_44619_20131003_023727_inLine +BABEL_OP2_202_44619_20131003_023727_outLine +BABEL_OP2_202_44678_20131128_010554_inLine +BABEL_OP2_202_44678_20131128_010554_outLine +BABEL_OP2_202_44681_20131218_213752_inLine +BABEL_OP2_202_44681_20131218_213752_outLine +BABEL_OP2_202_44681_20131218_214913_inLine +BABEL_OP2_202_44681_20131218_214913_outLine +BABEL_OP2_202_45121_20131014_040623_inLine +BABEL_OP2_202_45121_20131014_040623_outLine +BABEL_OP2_202_45374_20131217_005647_inLine +BABEL_OP2_202_45374_20131217_005647_outLine +BABEL_OP2_202_45560_20130105_195053_inLine +BABEL_OP2_202_45560_20130105_195053_outLine +BABEL_OP2_202_45642_20130109_010614_inLine +BABEL_OP2_202_45642_20130109_010614_outLine +BABEL_OP2_202_45697_20131029_204657_inLine +BABEL_OP2_202_45697_20131029_204657_outLine +BABEL_OP2_202_45699_20131202_203725_inLine +BABEL_OP2_202_45699_20131202_203725_outLine +BABEL_OP2_202_45770_20130105_212856_inLine +BABEL_OP2_202_45770_20130105_212856_outLine +BABEL_OP2_202_45777_20130930_215344_inLine +BABEL_OP2_202_45777_20130930_215344_outLine +BABEL_OP2_202_45777_20130930_220539_inLine +BABEL_OP2_202_45777_20130930_220539_outLine +BABEL_OP2_202_45851_20131011_210832_inLine +BABEL_OP2_202_45851_20131011_210832_outLine +BABEL_OP2_202_46008_20131017_180431_inLine +BABEL_OP2_202_46008_20131017_180431_outLine +BABEL_OP2_202_46333_20140225_002629_inLine +BABEL_OP2_202_46333_20140225_002629_outLine +BABEL_OP2_202_46535_20131202_194843_inLine +BABEL_OP2_202_46535_20131202_194843_outLine +BABEL_OP2_202_46712_20140130_195615_inLine +BABEL_OP2_202_46712_20140130_195615_outLine +BABEL_OP2_202_46905_20131130_194813_inLine +BABEL_OP2_202_46905_20131130_194813_outLine +BABEL_OP2_202_46974_20130729_181547_inLine +BABEL_OP2_202_46974_20130729_181547_outLine +BABEL_OP2_202_47215_20131005_012123_inLine +BABEL_OP2_202_47215_20131005_012123_outLine +BABEL_OP2_202_47283_20131003_211344_inLine +BABEL_OP2_202_47283_20131003_211344_outLine +BABEL_OP2_202_47959_20131004_035713_inLine +BABEL_OP2_202_47959_20131004_035713_outLine +BABEL_OP2_202_48016_20140220_204253_inLine +BABEL_OP2_202_48016_20140220_204253_outLine +BABEL_OP2_202_48024_20131202_232637_inLine +BABEL_OP2_202_48024_20131202_232637_outLine +BABEL_OP2_202_48610_20130107_203818_inLine +BABEL_OP2_202_48610_20130107_203818_outLine +BABEL_OP2_202_48663_20140222_012910_inLine +BABEL_OP2_202_48663_20140222_012910_outLine +BABEL_OP2_202_48758_20131009_051338_inLine +BABEL_OP2_202_48758_20131009_051338_outLine +BABEL_OP2_202_48789_20130212_210506_inLine +BABEL_OP2_202_48789_20130212_210506_outLine +BABEL_OP2_202_49197_20130222_010455_inLine +BABEL_OP2_202_49197_20130222_010455_outLine +BABEL_OP2_202_49637_20130103_203801_inLine +BABEL_OP2_202_49637_20130103_203801_outLine +BABEL_OP2_202_49767_20140221_192835_inLine +BABEL_OP2_202_49767_20140221_192835_outLine +BABEL_OP2_202_50090_20130425_175113_inLine +BABEL_OP2_202_50090_20130425_175113_outLine +BABEL_OP2_202_50175_20131010_020435_inLine +BABEL_OP2_202_50175_20131010_020435_outLine +BABEL_OP2_202_50565_20121228_195128_inLine +BABEL_OP2_202_50565_20121228_195128_outLine +BABEL_OP2_202_50601_20130911_001026_inLine +BABEL_OP2_202_50601_20130911_001026_outLine +BABEL_OP2_202_50630_20130926_021713_inLine +BABEL_OP2_202_50630_20130926_021713_outLine +BABEL_OP2_202_50745_20131010_001443_inLine +BABEL_OP2_202_50745_20131010_001443_outLine +BABEL_OP2_202_50779_20130911_004921_inLine +BABEL_OP2_202_50779_20130911_004921_outLine +BABEL_OP2_202_50958_20130219_215809_inLine +BABEL_OP2_202_50958_20130219_215809_outLine +BABEL_OP2_202_50962_20131002_203346_inLine +BABEL_OP2_202_50962_20131002_203346_outLine +BABEL_OP2_202_51414_20131012_225839_inLine +BABEL_OP2_202_51414_20131012_225839_outLine +BABEL_OP2_202_51530_20131012_011011_inLine +BABEL_OP2_202_51530_20131012_011011_outLine +BABEL_OP2_202_51701_20140123_205529_inLine +BABEL_OP2_202_51701_20140123_205529_outLine +BABEL_OP2_202_52058_20131128_233329_inLine +BABEL_OP2_202_52058_20131128_233329_outLine +BABEL_OP2_202_52070_20140124_231122_inLine +BABEL_OP2_202_52070_20140124_231122_outLine +BABEL_OP2_202_52222_20131126_183055_inLine +BABEL_OP2_202_52222_20131126_183055_outLine +BABEL_OP2_202_52301_20140122_233501_inLine +BABEL_OP2_202_52301_20140122_233501_outLine +BABEL_OP2_202_52447_20131014_001157_inLine +BABEL_OP2_202_52447_20131014_001157_outLine +BABEL_OP2_202_52483_20140123_011106_inLine +BABEL_OP2_202_52483_20140123_011106_outLine +BABEL_OP2_202_52614_20131011_162942_inLine +BABEL_OP2_202_52614_20131011_162942_outLine +BABEL_OP2_202_52717_20130107_190619_inLine +BABEL_OP2_202_52717_20130107_190619_outLine +BABEL_OP2_202_52725_20131009_155625_inLine +BABEL_OP2_202_52725_20131009_155625_outLine +BABEL_OP2_202_52804_20131006_224625_inLine +BABEL_OP2_202_52804_20131006_224625_outLine +BABEL_OP2_202_53068_20131130_195134_inLine +BABEL_OP2_202_53068_20131130_195134_outLine +BABEL_OP2_202_53206_20131129_004718_inLine +BABEL_OP2_202_53206_20131129_004718_outLine +BABEL_OP2_202_53419_20140123_230101_inLine +BABEL_OP2_202_53419_20140123_230101_outLine +BABEL_OP2_202_53441_20131207_225909_inLine +BABEL_OP2_202_53441_20131207_225909_outLine +BABEL_OP2_202_53492_20131010_185348_inLine +BABEL_OP2_202_53492_20131010_185348_outLine +BABEL_OP2_202_53665_20131010_234640_inLine +BABEL_OP2_202_53665_20131010_234640_outLine +BABEL_OP2_202_54160_20130102_205447_inLine +BABEL_OP2_202_54160_20130102_205447_outLine +BABEL_OP2_202_54162_20130318_213750_inLine +BABEL_OP2_202_54162_20130318_213750_outLine +BABEL_OP2_202_54594_20131129_200245_inLine +BABEL_OP2_202_54594_20131129_200245_outLine +BABEL_OP2_202_54735_20140125_031824_inLine +BABEL_OP2_202_54735_20140125_031824_outLine +BABEL_OP2_202_54923_20140125_010451_inLine +BABEL_OP2_202_54923_20140125_010451_outLine +BABEL_OP2_202_55013_20131011_155605_inLine +BABEL_OP2_202_55013_20131011_155605_outLine +BABEL_OP2_202_55742_20140115_203307_inLine +BABEL_OP2_202_55742_20140115_203307_outLine +BABEL_OP2_202_55818_20130108_192939_inLine +BABEL_OP2_202_55818_20130108_192939_outLine +BABEL_OP2_202_56198_20131003_013538_inLine +BABEL_OP2_202_56198_20131003_013538_outLine +BABEL_OP2_202_56213_20140122_225210_inLine +BABEL_OP2_202_56213_20140122_225210_outLine +BABEL_OP2_202_56370_20130104_200151_inLine +BABEL_OP2_202_56370_20130104_200151_outLine +BABEL_OP2_202_56523_20130222_213416_inLine +BABEL_OP2_202_56523_20130222_213416_outLine +BABEL_OP2_202_56677_20140121_210326_inLine +BABEL_OP2_202_56677_20140121_210326_outLine +BABEL_OP2_202_56743_20130225_194854_inLine +BABEL_OP2_202_56743_20130225_194854_outLine +BABEL_OP2_202_56826_20131126_175456_inLine +BABEL_OP2_202_56826_20131126_175456_outLine +BABEL_OP2_202_57067_20140125_030302_inLine +BABEL_OP2_202_57067_20140125_030302_outLine +BABEL_OP2_202_57093_20131001_005041_inLine +BABEL_OP2_202_57093_20131001_005041_outLine +BABEL_OP2_202_57548_20130928_000636_inLine +BABEL_OP2_202_57548_20130928_000636_outLine +BABEL_OP2_202_57609_20130110_214448_inLine +BABEL_OP2_202_57609_20130110_214448_outLine +BABEL_OP2_202_57650_20131031_222920_inLine +BABEL_OP2_202_57650_20131031_222920_outLine +BABEL_OP2_202_57650_20131031_224035_inLine +BABEL_OP2_202_57650_20131031_224035_outLine +BABEL_OP2_202_57678_20140130_211104_inLine +BABEL_OP2_202_57678_20140130_211104_outLine +BABEL_OP2_202_57919_20131204_003234_inLine +BABEL_OP2_202_57919_20131204_003234_outLine +BABEL_OP2_202_58061_20131128_202231_inLine +BABEL_OP2_202_58061_20131128_202231_outLine +BABEL_OP2_202_58815_20131029_230825_inLine +BABEL_OP2_202_58815_20131029_230825_outLine +BABEL_OP2_202_58850_20130222_005155_inLine +BABEL_OP2_202_58850_20130222_005155_outLine +BABEL_OP2_202_58926_20131005_000157_inLine +BABEL_OP2_202_58926_20131005_000157_outLine +BABEL_OP2_202_59039_20131130_232650_inLine +BABEL_OP2_202_59039_20131130_232650_outLine +BABEL_OP2_202_59307_20131009_070225_inLine +BABEL_OP2_202_59307_20131009_070225_outLine +BABEL_OP2_202_59898_20130103_222102_inLine +BABEL_OP2_202_59898_20130103_222102_outLine +BABEL_OP2_202_60026_20130105_231529_inLine +BABEL_OP2_202_60026_20130105_231529_outLine +BABEL_OP2_202_60026_20130105_232525_inLine +BABEL_OP2_202_60026_20130105_232525_outLine +BABEL_OP2_202_60115_20130924_002929_inLine +BABEL_OP2_202_60115_20130924_002929_outLine +BABEL_OP2_202_60310_20131030_231919_inLine +BABEL_OP2_202_60310_20131030_231919_outLine +BABEL_OP2_202_60498_20131012_205044_inLine +BABEL_OP2_202_60498_20131012_205044_outLine +BABEL_OP2_202_60538_20130107_185811_inLine +BABEL_OP2_202_60538_20130107_185811_outLine +BABEL_OP2_202_60626_20131003_025140_inLine +BABEL_OP2_202_60626_20131003_025140_outLine +BABEL_OP2_202_60661_20131004_193207_inLine +BABEL_OP2_202_60661_20131004_193207_outLine +BABEL_OP2_202_60836_20131006_231246_inLine +BABEL_OP2_202_60836_20131006_231246_outLine +BABEL_OP2_202_61348_20130423_213656_inLine +BABEL_OP2_202_61348_20130423_213656_outLine +BABEL_OP2_202_61831_20140129_223655_inLine +BABEL_OP2_202_61831_20140129_223655_outLine +BABEL_OP2_202_61963_20140130_004249_inLine +BABEL_OP2_202_61963_20140130_004249_outLine +BABEL_OP2_202_62014_20130422_215514_inLine +BABEL_OP2_202_62014_20130422_215514_outLine +BABEL_OP2_202_62155_20131010_030043_inLine +BABEL_OP2_202_62155_20131010_030043_outLine +BABEL_OP2_202_62177_20131101_224431_inLine +BABEL_OP2_202_62177_20131101_224431_outLine +BABEL_OP2_202_62200_20130221_201143_inLine +BABEL_OP2_202_62200_20130221_201143_outLine +BABEL_OP2_202_62289_20131012_021114_inLine +BABEL_OP2_202_62289_20131012_021114_outLine +BABEL_OP2_202_62434_20130104_004333_inLine +BABEL_OP2_202_62434_20130104_004333_outLine +BABEL_OP2_202_62434_20130104_005350_inLine +BABEL_OP2_202_62434_20130104_005350_outLine +BABEL_OP2_202_62545_20131127_195440_inLine +BABEL_OP2_202_62545_20131127_195440_outLine +BABEL_OP2_202_62734_20130930_165147_inLine +BABEL_OP2_202_62734_20130930_165147_outLine +BABEL_OP2_202_62835_20130212_190421_inLine +BABEL_OP2_202_62835_20130212_190421_outLine +BABEL_OP2_202_63481_20121229_212430_inLine +BABEL_OP2_202_63481_20121229_212430_outLine +BABEL_OP2_202_63511_20140202_013550_inLine +BABEL_OP2_202_63511_20140202_013550_outLine +BABEL_OP2_202_63730_20140128_213539_inLine +BABEL_OP2_202_63730_20140128_213539_outLine +BABEL_OP2_202_63906_20140127_191132_inLine +BABEL_OP2_202_63906_20140127_191132_outLine +BABEL_OP2_202_63938_20140129_203743_inLine +BABEL_OP2_202_63938_20140129_203743_outLine +BABEL_OP2_202_64350_20130109_214951_inLine +BABEL_OP2_202_64350_20130109_214951_outLine +BABEL_OP2_202_64350_20130109_234646_inLine +BABEL_OP2_202_64350_20130109_234646_outLine +BABEL_OP2_202_64350_20130110_000149_inLine +BABEL_OP2_202_64350_20130110_000149_outLine +BABEL_OP2_202_64638_20130923_221504_inLine +BABEL_OP2_202_64638_20130923_221504_outLine +BABEL_OP2_202_64768_20130930_231452_inLine +BABEL_OP2_202_64768_20130930_231452_outLine +BABEL_OP2_202_64902_20131010_043148_inLine +BABEL_OP2_202_64902_20131010_043148_outLine +BABEL_OP2_202_65298_20131031_213621_inLine +BABEL_OP2_202_65298_20131031_213621_outLine +BABEL_OP2_202_65477_20130219_211638_inLine +BABEL_OP2_202_65477_20130219_211638_outLine +BABEL_OP2_202_65639_20131128_205641_inLine +BABEL_OP2_202_65639_20131128_205641_outLine +BABEL_OP2_202_65640_20131010_034809_inLine +BABEL_OP2_202_65640_20131010_034809_outLine +BABEL_OP2_202_65882_20131004_204102_inLine +BABEL_OP2_202_65882_20131004_204102_outLine +BABEL_OP2_202_65882_20131004_205447_inLine +BABEL_OP2_202_65882_20131004_205447_outLine +BABEL_OP2_202_66026_20140226_195114_inLine +BABEL_OP2_202_66026_20140226_195114_outLine +BABEL_OP2_202_66472_20130214_204424_inLine +BABEL_OP2_202_66472_20130214_204424_outLine +BABEL_OP2_202_66519_20130930_020855_inLine +BABEL_OP2_202_66519_20130930_020855_outLine +BABEL_OP2_202_66837_20131030_220432_inLine +BABEL_OP2_202_66837_20131030_220432_outLine +BABEL_OP2_202_66959_20131018_194733_inLine +BABEL_OP2_202_66959_20131018_194733_outLine +BABEL_OP2_202_66967_20130103_220521_inLine +BABEL_OP2_202_66967_20130103_220521_outLine +BABEL_OP2_202_67085_20131016_193800_inLine +BABEL_OP2_202_67085_20131016_193800_outLine +BABEL_OP2_202_67152_20131129_224301_inLine +BABEL_OP2_202_67152_20131129_224301_outLine +BABEL_OP2_202_67373_20131004_205550_inLine +BABEL_OP2_202_67373_20131004_205550_outLine +BABEL_OP2_202_67389_20140131_211249_inLine +BABEL_OP2_202_67389_20140131_211249_outLine +BABEL_OP2_202_67794_20131003_192439_inLine +BABEL_OP2_202_67794_20131003_192439_outLine +BABEL_OP2_202_67842_20131003_222534_inLine +BABEL_OP2_202_67842_20131003_222534_outLine +BABEL_OP2_202_67999_20140201_200014_inLine +BABEL_OP2_202_67999_20140201_200014_outLine +BABEL_OP2_202_68059_20140125_192613_inLine +BABEL_OP2_202_68059_20140125_192613_outLine +BABEL_OP2_202_68182_20131031_193507_inLine +BABEL_OP2_202_68182_20131031_193507_outLine +BABEL_OP2_202_68306_20130729_224017_inLine +BABEL_OP2_202_68306_20130729_224017_outLine +BABEL_OP2_202_68627_20130219_230718_inLine +BABEL_OP2_202_68627_20130219_230718_outLine +BABEL_OP2_202_68908_20131130_010731_inLine +BABEL_OP2_202_68908_20131130_010731_outLine +BABEL_OP2_202_69090_20131127_230541_inLine +BABEL_OP2_202_69090_20131127_230541_outLine +BABEL_OP2_202_69107_20130927_174817_inLine +BABEL_OP2_202_69107_20130927_174817_outLine +BABEL_OP2_202_69574_20131005_172205_inLine +BABEL_OP2_202_69574_20131005_172205_outLine +BABEL_OP2_202_69633_20130801_191800_inLine +BABEL_OP2_202_69633_20130801_191800_outLine +BABEL_OP2_202_69885_20131011_031936_inLine +BABEL_OP2_202_69885_20131011_031936_outLine +BABEL_OP2_202_69972_20140129_230607_inLine +BABEL_OP2_202_69972_20140129_230607_outLine +BABEL_OP2_202_69992_20130108_193548_inLine +BABEL_OP2_202_69992_20130108_193548_outLine +BABEL_OP2_202_70257_20131130_202722_inLine +BABEL_OP2_202_70257_20131130_202722_outLine +BABEL_OP2_202_70526_20131012_045553_inLine +BABEL_OP2_202_70526_20131012_045553_outLine +BABEL_OP2_202_70716_20131012_032544_inLine +BABEL_OP2_202_70716_20131012_032544_outLine +BABEL_OP2_202_71038_20140306_165543_inLine +BABEL_OP2_202_71038_20140306_165543_outLine +BABEL_OP2_202_71047_20140303_233000_inLine +BABEL_OP2_202_71047_20140303_233000_outLine +BABEL_OP2_202_71189_20131010_061651_inLine +BABEL_OP2_202_71189_20131010_061651_outLine +BABEL_OP2_202_71282_20131030_163454_inLine +BABEL_OP2_202_71282_20131030_163454_outLine +BABEL_OP2_202_71419_20131130_200448_inLine +BABEL_OP2_202_71419_20131130_200448_outLine +BABEL_OP2_202_71460_20131218_192638_inLine +BABEL_OP2_202_71460_20131218_192638_outLine +BABEL_OP2_202_71559_20140311_230424_inLine +BABEL_OP2_202_71559_20140311_230424_outLine +BABEL_OP2_202_71704_20130109_185345_inLine +BABEL_OP2_202_71704_20130109_185345_outLine +BABEL_OP2_202_71780_20131003_034729_inLine +BABEL_OP2_202_71780_20131003_034729_outLine +BABEL_OP2_202_72654_20130929_175728_inLine +BABEL_OP2_202_72654_20130929_175728_outLine +BABEL_OP2_202_72733_20131018_230438_inLine +BABEL_OP2_202_72733_20131018_230438_outLine +BABEL_OP2_202_73042_20130109_205002_inLine +BABEL_OP2_202_73042_20130109_205002_outLine +BABEL_OP2_202_73072_20130105_235040_inLine +BABEL_OP2_202_73072_20130105_235040_outLine +BABEL_OP2_202_73485_20131011_183811_inLine +BABEL_OP2_202_73485_20131011_183811_outLine +BABEL_OP2_202_73485_20131011_184857_inLine +BABEL_OP2_202_73485_20131011_184857_outLine +BABEL_OP2_202_73757_20130319_022121_inLine +BABEL_OP2_202_73757_20130319_022121_outLine +BABEL_OP2_202_73964_20131011_010642_inLine +BABEL_OP2_202_73964_20131011_010642_outLine +BABEL_OP2_202_74111_20131018_223020_inLine +BABEL_OP2_202_74111_20131018_223020_outLine +BABEL_OP2_202_74455_20131201_010424_inLine +BABEL_OP2_202_74455_20131201_010424_outLine +BABEL_OP2_202_74641_20130927_171309_inLine +BABEL_OP2_202_74641_20130927_171309_outLine +BABEL_OP2_202_74728_20131011_175203_inLine +BABEL_OP2_202_74728_20131011_175203_outLine +BABEL_OP2_202_74886_20130104_222216_inLine +BABEL_OP2_202_74886_20130104_222216_outLine +BABEL_OP2_202_75365_20131017_020033_inLine +BABEL_OP2_202_75365_20131017_020033_outLine +BABEL_OP2_202_75465_20140227_020909_inLine +BABEL_OP2_202_75465_20140227_020909_outLine +BABEL_OP2_202_75869_20131010_054546_inLine +BABEL_OP2_202_75869_20131010_054546_outLine +BABEL_OP2_202_75981_20131017_182656_inLine +BABEL_OP2_202_75981_20131017_182656_outLine +BABEL_OP2_202_76155_20130214_225045_inLine +BABEL_OP2_202_76155_20130214_225045_outLine +BABEL_OP2_202_76155_20130214_231141_inLine +BABEL_OP2_202_76155_20130214_231141_outLine +BABEL_OP2_202_76155_20130214_233751_inLine +BABEL_OP2_202_76155_20130214_233751_outLine +BABEL_OP2_202_76218_20130215_211824_inLine +BABEL_OP2_202_76218_20130215_211824_outLine +BABEL_OP2_202_76372_20131010_032300_inLine +BABEL_OP2_202_76372_20131010_032300_outLine +BABEL_OP2_202_76773_20131004_211703_inLine +BABEL_OP2_202_76773_20131004_211703_outLine +BABEL_OP2_202_77139_20121228_190704_inLine +BABEL_OP2_202_77139_20121228_190704_outLine +BABEL_OP2_202_77730_20130108_005804_inLine +BABEL_OP2_202_77730_20130108_005804_outLine +BABEL_OP2_202_78116_20130730_032152_inLine +BABEL_OP2_202_78116_20130730_032152_outLine +BABEL_OP2_202_78161_20131128_013256_inLine +BABEL_OP2_202_78161_20131128_013256_outLine +BABEL_OP2_202_78254_20140315_200641_inLine +BABEL_OP2_202_79131_20131011_031533_inLine +BABEL_OP2_202_79131_20131011_031533_outLine +BABEL_OP2_202_79167_20130801_173136_inLine +BABEL_OP2_202_79167_20130801_173136_outLine +BABEL_OP2_202_79505_20140304_011515_inLine +BABEL_OP2_202_79505_20140304_011515_outLine +BABEL_OP2_202_79590_20130214_233631_inLine +BABEL_OP2_202_79590_20130214_233631_outLine +BABEL_OP2_202_79820_20131002_224612_inLine +BABEL_OP2_202_79820_20131002_224612_outLine +BABEL_OP2_202_79858_20131007_202121_inLine +BABEL_OP2_202_79858_20131007_202121_outLine +BABEL_OP2_202_80241_20131208_061751_inLine +BABEL_OP2_202_80241_20131208_061751_outLine +BABEL_OP2_202_80577_20131101_002029_inLine +BABEL_OP2_202_80577_20131101_002029_outLine +BABEL_OP2_202_80721_20131018_215413_inLine +BABEL_OP2_202_80721_20131018_215413_outLine +BABEL_OP2_202_81424_20130731_174939_inLine +BABEL_OP2_202_81424_20130731_174939_outLine +BABEL_OP2_202_81427_20130930_033601_inLine +BABEL_OP2_202_81427_20130930_033601_outLine +BABEL_OP2_202_81427_20130930_034540_inLine +BABEL_OP2_202_81427_20130930_034540_outLine +BABEL_OP2_202_81581_20131130_234413_inLine +BABEL_OP2_202_81581_20131130_234413_outLine +BABEL_OP2_202_81674_20131129_201042_inLine +BABEL_OP2_202_81674_20131129_201042_outLine +BABEL_OP2_202_81810_20130731_202723_inLine +BABEL_OP2_202_81810_20130731_202723_outLine +BABEL_OP2_202_81854_20131016_235937_inLine +BABEL_OP2_202_81854_20131016_235937_outLine +BABEL_OP2_202_82089_20130213_201744_inLine +BABEL_OP2_202_82089_20130213_201744_outLine +BABEL_OP2_202_82140_20130411_203406_inLine +BABEL_OP2_202_82140_20130411_203406_outLine +BABEL_OP2_202_82145_20131009_152735_inLine +BABEL_OP2_202_82145_20131009_152735_outLine +BABEL_OP2_202_82145_20131010_055122_inLine +BABEL_OP2_202_82145_20131010_055122_outLine +BABEL_OP2_202_82863_20130213_003624_inLine +BABEL_OP2_202_82863_20130213_003624_outLine +BABEL_OP2_202_82979_20131002_205506_inLine +BABEL_OP2_202_82979_20131002_205506_outLine +BABEL_OP2_202_83062_20131129_191922_inLine +BABEL_OP2_202_83062_20131129_191922_outLine +BABEL_OP2_202_83935_20130801_192224_inLine +BABEL_OP2_202_83935_20130801_192224_outLine +BABEL_OP2_202_83935_20130801_194402_inLine +BABEL_OP2_202_83935_20130801_194402_outLine +BABEL_OP2_202_84061_20130929_235409_inLine +BABEL_OP2_202_84061_20130929_235409_outLine +BABEL_OP2_202_84079_20131208_050702_inLine +BABEL_OP2_202_84079_20131208_050702_outLine +BABEL_OP2_202_84125_20121222_184258_inLine +BABEL_OP2_202_84125_20121222_184258_outLine +BABEL_OP2_202_84125_20121222_185218_inLine +BABEL_OP2_202_84125_20121222_185218_outLine +BABEL_OP2_202_84327_20130730_193322_inLine +BABEL_OP2_202_84327_20130730_193322_outLine +BABEL_OP2_202_84605_20131003_053508_inLine +BABEL_OP2_202_84605_20131003_053508_outLine +BABEL_OP2_202_84737_20131031_211648_inLine +BABEL_OP2_202_84737_20131031_211648_outLine +BABEL_OP2_202_84815_20131018_211832_inLine +BABEL_OP2_202_84815_20131018_211832_outLine +BABEL_OP2_202_84823_20131031_020506_inLine +BABEL_OP2_202_84823_20131031_020506_outLine +BABEL_OP2_202_85048_20130911_014859_inLine +BABEL_OP2_202_85048_20130911_014859_outLine +BABEL_OP2_202_85179_20131101_192951_inLine +BABEL_OP2_202_85179_20131101_192951_outLine +BABEL_OP2_202_85248_20131030_022406_inLine +BABEL_OP2_202_85248_20131030_022406_outLine +BABEL_OP2_202_85322_20130108_190627_inLine +BABEL_OP2_202_85322_20130108_190627_outLine +BABEL_OP2_202_85322_20130108_191905_inLine +BABEL_OP2_202_85322_20130108_191905_outLine +BABEL_OP2_202_85325_20131011_181734_inLine +BABEL_OP2_202_85325_20131011_181734_outLine +BABEL_OP2_202_85439_20131012_024821_inLine +BABEL_OP2_202_85439_20131012_024821_outLine +BABEL_OP2_202_86467_20121231_205911_inLine +BABEL_OP2_202_86467_20121231_205911_outLine +BABEL_OP2_202_86472_20130803_213443_inLine +BABEL_OP2_202_86472_20130803_213443_outLine +BABEL_OP2_202_86826_20131015_204931_inLine +BABEL_OP2_202_86826_20131015_204931_outLine +BABEL_OP2_202_86830_20131031_221935_inLine +BABEL_OP2_202_86830_20131031_221935_outLine +BABEL_OP2_202_87074_20140114_001320_inLine +BABEL_OP2_202_87074_20140114_001320_outLine +BABEL_OP2_202_87470_20130225_202639_inLine +BABEL_OP2_202_87470_20130225_202639_outLine +BABEL_OP2_202_87545_20131012_025318_inLine +BABEL_OP2_202_87545_20131012_025318_outLine +BABEL_OP2_202_87866_20131215_203616_inLine +BABEL_OP2_202_87866_20131215_203616_outLine +BABEL_OP2_202_87871_20131031_222231_inLine +BABEL_OP2_202_87871_20131031_222231_outLine +BABEL_OP2_202_87921_20131017_204018_inLine +BABEL_OP2_202_87921_20131017_204018_outLine +BABEL_OP2_202_88372_20131012_023925_inLine +BABEL_OP2_202_88372_20131012_023925_outLine +BABEL_OP2_202_88550_20131017_004344_inLine +BABEL_OP2_202_88550_20131017_004344_outLine +BABEL_OP2_202_88550_20131017_005456_inLine +BABEL_OP2_202_88550_20131017_005456_outLine +BABEL_OP2_202_88601_20130212_205048_inLine +BABEL_OP2_202_88601_20130212_205048_outLine +BABEL_OP2_202_88873_20131004_003616_inLine +BABEL_OP2_202_88873_20131004_003616_outLine +BABEL_OP2_202_89226_20131203_030320_inLine +BABEL_OP2_202_89226_20131203_030320_outLine +BABEL_OP2_202_89560_20131018_222518_inLine +BABEL_OP2_202_89560_20131018_222518_outLine +BABEL_OP2_202_89650_20131202_204623_inLine +BABEL_OP2_202_89650_20131202_204623_outLine +BABEL_OP2_202_89718_20131203_002623_inLine +BABEL_OP2_202_89718_20131203_002623_outLine +BABEL_OP2_202_89888_20130109_184456_inLine +BABEL_OP2_202_89888_20130109_184456_outLine +BABEL_OP2_202_90935_20130226_232117_inLine +BABEL_OP2_202_90935_20130226_232117_outLine +BABEL_OP2_202_91189_20131017_013603_inLine +BABEL_OP2_202_91189_20131017_013603_outLine +BABEL_OP2_202_91336_20130318_212106_inLine +BABEL_OP2_202_91336_20130318_212106_outLine +BABEL_OP2_202_91411_20131130_013112_inLine +BABEL_OP2_202_91411_20131130_013112_outLine +BABEL_OP2_202_91581_20131018_012025_inLine +BABEL_OP2_202_91581_20131018_012025_outLine +BABEL_OP2_202_91808_20131204_000439_inLine +BABEL_OP2_202_91808_20131204_000439_outLine +BABEL_OP2_202_91930_20131009_204054_inLine +BABEL_OP2_202_91930_20131009_204054_outLine +BABEL_OP2_202_91971_20131203_013031_inLine +BABEL_OP2_202_91971_20131203_013031_outLine +BABEL_OP2_202_91977_20130803_020205_inLine +BABEL_OP2_202_91977_20130803_020205_outLine +BABEL_OP2_202_92096_20131010_010207_inLine +BABEL_OP2_202_92096_20131010_010207_outLine +BABEL_OP2_202_92356_20140319_233703_inLine +BABEL_OP2_202_92356_20140319_233703_outLine +BABEL_OP2_202_92459_20131001_210517_inLine +BABEL_OP2_202_92459_20131001_210517_outLine +BABEL_OP2_202_92509_20121228_220632_inLine +BABEL_OP2_202_92509_20121228_220632_outLine +BABEL_OP2_202_92698_20130930_170131_inLine +BABEL_OP2_202_92698_20130930_170131_outLine +BABEL_OP2_202_92698_20130930_171329_inLine +BABEL_OP2_202_92698_20130930_171329_outLine +BABEL_OP2_202_92757_20131012_012455_inLine +BABEL_OP2_202_92757_20131012_012455_outLine +BABEL_OP2_202_92809_20131010_013656_inLine +BABEL_OP2_202_92809_20131010_013656_outLine +BABEL_OP2_202_92941_20131001_030226_inLine +BABEL_OP2_202_92941_20131001_030226_outLine +BABEL_OP2_202_93411_20130411_203410_inLine +BABEL_OP2_202_93411_20130411_203410_outLine +BABEL_OP2_202_93475_20140115_204518_inLine +BABEL_OP2_202_93475_20140115_204518_outLine +BABEL_OP2_202_93515_20131012_015923_inLine +BABEL_OP2_202_93515_20131012_015923_outLine +BABEL_OP2_202_93861_20130417_181331_inLine +BABEL_OP2_202_93861_20130417_181331_outLine +BABEL_OP2_202_93861_20130417_184517_inLine +BABEL_OP2_202_93861_20130417_184517_outLine +BABEL_OP2_202_93946_20131018_213959_inLine +BABEL_OP2_202_93946_20131018_213959_outLine +BABEL_OP2_202_93964_20130411_173113_inLine +BABEL_OP2_202_93964_20130411_173113_outLine +BABEL_OP2_202_93964_20130411_174717_inLine +BABEL_OP2_202_93964_20130411_174717_outLine +BABEL_OP2_202_94044_20131127_234911_inLine +BABEL_OP2_202_94044_20131127_234911_outLine +BABEL_OP2_202_94166_20131101_013342_inLine +BABEL_OP2_202_94166_20131101_013342_outLine +BABEL_OP2_202_94212_20131129_213734_inLine +BABEL_OP2_202_94212_20131129_213734_outLine +BABEL_OP2_202_94442_20131014_165222_inLine +BABEL_OP2_202_94442_20131014_165222_outLine +BABEL_OP2_202_94465_20131018_014837_inLine +BABEL_OP2_202_94465_20131018_014837_outLine +BABEL_OP2_202_94487_20131011_165627_inLine +BABEL_OP2_202_94487_20131011_165627_outLine +BABEL_OP2_202_94713_20131130_020453_inLine +BABEL_OP2_202_94713_20131130_020453_outLine +BABEL_OP2_202_94745_20130807_024052_inLine +BABEL_OP2_202_94745_20130807_024052_outLine +BABEL_OP2_202_95269_20130228_201037_inLine +BABEL_OP2_202_95269_20130228_201037_outLine +BABEL_OP2_202_95583_20130104_184957_inLine +BABEL_OP2_202_95583_20130104_184957_outLine +BABEL_OP2_202_95598_20130207_212051_inLine +BABEL_OP2_202_95598_20130207_212051_outLine +BABEL_OP2_202_95677_20131216_002743_inLine +BABEL_OP2_202_95677_20131216_002743_outLine +BABEL_OP2_202_95942_20131009_231612_inLine +BABEL_OP2_202_95942_20131009_231612_outLine +BABEL_OP2_202_95966_20130216_005201_inLine +BABEL_OP2_202_95966_20130216_005201_outLine +BABEL_OP2_202_95966_20130216_010600_inLine +BABEL_OP2_202_95966_20130216_010600_outLine +BABEL_OP2_202_96041_20140317_233707_inLine +BABEL_OP2_202_96041_20140317_233707_outLine +BABEL_OP2_202_96059_20131012_001057_inLine +BABEL_OP2_202_96059_20131012_001057_outLine +BABEL_OP2_202_96077_20131215_014408_inLine +BABEL_OP2_202_96077_20131215_014408_outLine +BABEL_OP2_202_96158_20131127_202846_inLine +BABEL_OP2_202_96158_20131127_202846_outLine +BABEL_OP2_202_96190_20140114_004611_inLine +BABEL_OP2_202_96190_20140114_004611_outLine +BABEL_OP2_202_96205_20130213_183412_inLine +BABEL_OP2_202_96205_20130213_183412_outLine +BABEL_OP2_202_96405_20131002_203007_inLine +BABEL_OP2_202_96405_20131002_203007_outLine +BABEL_OP2_202_96446_20130103_231919_inLine +BABEL_OP2_202_96446_20130103_231919_outLine +BABEL_OP2_202_96446_20130103_232611_inLine +BABEL_OP2_202_96446_20130103_232611_outLine +BABEL_OP2_202_96934_20131001_205011_inLine +BABEL_OP2_202_96934_20131001_205011_outLine +BABEL_OP2_202_97097_20131010_022340_inLine +BABEL_OP2_202_97097_20131010_022340_outLine +BABEL_OP2_202_97448_20131202_225423_inLine +BABEL_OP2_202_97448_20131202_225423_outLine +BABEL_OP2_202_97896_20130222_200148_inLine +BABEL_OP2_202_97896_20130222_200148_outLine +BABEL_OP2_202_97896_20130222_201339_inLine +BABEL_OP2_202_97896_20130222_201339_outLine +BABEL_OP2_202_97988_20131017_202448_inLine +BABEL_OP2_202_97988_20131017_202448_outLine +BABEL_OP2_202_98165_20130928_235834_inLine +BABEL_OP2_202_98165_20130928_235834_outLine +BABEL_OP2_202_98165_20130929_001916_inLine +BABEL_OP2_202_98165_20130929_001916_outLine +BABEL_OP2_202_98255_20131130_002114_inLine +BABEL_OP2_202_98255_20131130_002114_outLine +BABEL_OP2_202_98365_20130912_012649_inLine +BABEL_OP2_202_98365_20130912_012649_outLine +BABEL_OP2_202_98365_20130912_013735_inLine +BABEL_OP2_202_98365_20130912_013735_outLine +BABEL_OP2_202_98489_20140113_195524_inLine +BABEL_OP2_202_98489_20140113_195524_outLine +BABEL_OP2_202_98506_20131009_055751_inLine +BABEL_OP2_202_98506_20131009_055751_outLine +BABEL_OP2_202_98565_20131204_010715_inLine +BABEL_OP2_202_98565_20131204_010715_outLine +BABEL_OP2_202_98888_20130214_225058_inLine +BABEL_OP2_202_98888_20130214_225058_outLine +BABEL_OP2_202_99202_20130111_190008_inLine +BABEL_OP2_202_99202_20130111_190008_outLine +BABEL_OP2_202_99487_20130109_013911_inLine +BABEL_OP2_202_99487_20130109_013911_outLine +BABEL_OP2_202_99920_20130109_211943_inLine +BABEL_OP2_202_99920_20130109_211943_outLine +BABEL_OP2_202_99952_20131016_024323_inLine +BABEL_OP2_202_99952_20131016_024323_outLine +BABEL_OP2_202_99975_20131127_204148_inLine +BABEL_OP2_202_99975_20131127_204148_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/evalpart1.list b/egs/babel/s5d/conf/lists/202-swahili/evalpart1.list new file mode 100644 index 00000000000..c01647b6d12 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/evalpart1.list @@ -0,0 +1,196 @@ +BABEL_OP2_202_10019_20130928_235503_inLine +BABEL_OP2_202_10019_20130928_235503_outLine +BABEL_OP2_202_10416_20130215_183832_inLine +BABEL_OP2_202_10416_20130215_183832_outLine +BABEL_OP2_202_12321_20140210_015215_inLine +BABEL_OP2_202_12321_20140210_015215_outLine +BABEL_OP2_202_13040_20131005_180024_inLine +BABEL_OP2_202_13040_20131005_180024_outLine +BABEL_OP2_202_13929_20140205_042603_inLine +BABEL_OP2_202_13929_20140205_042603_outLine +BABEL_OP2_202_14537_20131016_202630_inLine +BABEL_OP2_202_14537_20131016_202630_outLine +BABEL_OP2_202_16407_20131203_231519_inLine +BABEL_OP2_202_16407_20131203_231519_outLine +BABEL_OP2_202_16787_20130220_000429_inLine +BABEL_OP2_202_16787_20130220_000429_outLine +BABEL_OP2_202_17511_20140205_051449_inLine +BABEL_OP2_202_17511_20140205_051449_outLine +BABEL_OP2_202_19545_20130927_190707_inLine +BABEL_OP2_202_19545_20130927_190707_outLine +BABEL_OP2_202_20738_20131029_183614_inLine +BABEL_OP2_202_20738_20131029_183614_outLine +BABEL_OP2_202_20896_20131220_001523_inLine +BABEL_OP2_202_20896_20131220_001523_outLine +BABEL_OP2_202_21794_20130219_010105_inLine +BABEL_OP2_202_21794_20130219_010105_outLine +BABEL_OP2_202_22641_20131011_032157_inLine +BABEL_OP2_202_22641_20131011_032157_outLine +BABEL_OP2_202_23355_20131128_005023_inLine +BABEL_OP2_202_23355_20131128_005023_outLine +BABEL_OP2_202_23731_20130930_020336_inLine +BABEL_OP2_202_23731_20130930_020336_outLine +BABEL_OP2_202_24924_20140207_235730_inLine +BABEL_OP2_202_24924_20140207_235730_outLine +BABEL_OP2_202_26869_20131216_035718_inLine +BABEL_OP2_202_26869_20131216_035718_outLine +BABEL_OP2_202_28422_20130924_010422_inLine +BABEL_OP2_202_28422_20130924_010422_outLine +BABEL_OP2_202_30250_20121228_195004_inLine +BABEL_OP2_202_30250_20121228_195004_outLine +BABEL_OP2_202_30250_20121228_195937_inLine +BABEL_OP2_202_30250_20121228_195937_outLine +BABEL_OP2_202_30497_20131010_013817_inLine +BABEL_OP2_202_30497_20131010_013817_outLine +BABEL_OP2_202_31484_20130912_200823_inLine +BABEL_OP2_202_31484_20130912_200823_outLine +BABEL_OP2_202_32832_20140108_220009_inLine +BABEL_OP2_202_32832_20140108_220009_outLine +BABEL_OP2_202_36505_20131029_215901_inLine +BABEL_OP2_202_36505_20131029_215901_outLine +BABEL_OP2_202_38664_20130315_010258_inLine +BABEL_OP2_202_38664_20130315_010258_outLine +BABEL_OP2_202_38741_20131003_014930_inLine +BABEL_OP2_202_38741_20131003_014930_outLine +BABEL_OP2_202_39277_20131127_183847_inLine +BABEL_OP2_202_39277_20131127_183847_outLine +BABEL_OP2_202_41109_20140114_212001_inLine +BABEL_OP2_202_41109_20140114_212001_outLine +BABEL_OP2_202_44678_20131128_010554_inLine +BABEL_OP2_202_44678_20131128_010554_outLine +BABEL_OP2_202_44681_20131218_213752_inLine +BABEL_OP2_202_44681_20131218_213752_outLine +BABEL_OP2_202_44681_20131218_214913_inLine +BABEL_OP2_202_44681_20131218_214913_outLine +BABEL_OP2_202_45777_20130930_215344_inLine +BABEL_OP2_202_45777_20130930_215344_outLine +BABEL_OP2_202_45777_20130930_220539_inLine +BABEL_OP2_202_45777_20130930_220539_outLine +BABEL_OP2_202_46333_20140225_002629_inLine +BABEL_OP2_202_46333_20140225_002629_outLine +BABEL_OP2_202_46974_20130729_181547_inLine +BABEL_OP2_202_46974_20130729_181547_outLine +BABEL_OP2_202_47959_20131004_035713_inLine +BABEL_OP2_202_47959_20131004_035713_outLine +BABEL_OP2_202_48016_20140220_204253_inLine +BABEL_OP2_202_48016_20140220_204253_outLine +BABEL_OP2_202_48758_20131009_051338_inLine +BABEL_OP2_202_48758_20131009_051338_outLine +BABEL_OP2_202_49637_20130103_203801_inLine +BABEL_OP2_202_49637_20130103_203801_outLine +BABEL_OP2_202_50630_20130926_021713_inLine +BABEL_OP2_202_50630_20130926_021713_outLine +BABEL_OP2_202_50958_20130219_215809_inLine +BABEL_OP2_202_50958_20130219_215809_outLine +BABEL_OP2_202_50962_20131002_203346_inLine +BABEL_OP2_202_50962_20131002_203346_outLine +BABEL_OP2_202_51414_20131012_225839_inLine +BABEL_OP2_202_51414_20131012_225839_outLine +BABEL_OP2_202_52070_20140124_231122_inLine +BABEL_OP2_202_52070_20140124_231122_outLine +BABEL_OP2_202_52222_20131126_183055_inLine +BABEL_OP2_202_52222_20131126_183055_outLine +BABEL_OP2_202_52447_20131014_001157_inLine +BABEL_OP2_202_52447_20131014_001157_outLine +BABEL_OP2_202_52614_20131011_162942_inLine +BABEL_OP2_202_52614_20131011_162942_outLine +BABEL_OP2_202_53206_20131129_004718_inLine +BABEL_OP2_202_53206_20131129_004718_outLine +BABEL_OP2_202_55742_20140115_203307_inLine +BABEL_OP2_202_55742_20140115_203307_outLine +BABEL_OP2_202_56523_20130222_213416_inLine +BABEL_OP2_202_56523_20130222_213416_outLine +BABEL_OP2_202_57650_20131031_222920_inLine +BABEL_OP2_202_57650_20131031_222920_outLine +BABEL_OP2_202_57650_20131031_224035_inLine +BABEL_OP2_202_57650_20131031_224035_outLine +BABEL_OP2_202_60626_20131003_025140_inLine +BABEL_OP2_202_60626_20131003_025140_outLine +BABEL_OP2_202_62155_20131010_030043_inLine +BABEL_OP2_202_62155_20131010_030043_outLine +BABEL_OP2_202_62434_20130104_004333_inLine +BABEL_OP2_202_62434_20130104_004333_outLine +BABEL_OP2_202_62434_20130104_005350_inLine +BABEL_OP2_202_62434_20130104_005350_outLine +BABEL_OP2_202_62835_20130212_190421_inLine +BABEL_OP2_202_62835_20130212_190421_outLine +BABEL_OP2_202_63481_20121229_212430_inLine +BABEL_OP2_202_63481_20121229_212430_outLine +BABEL_OP2_202_63511_20140202_013550_inLine +BABEL_OP2_202_63511_20140202_013550_outLine +BABEL_OP2_202_64638_20130923_221504_inLine +BABEL_OP2_202_64638_20130923_221504_outLine +BABEL_OP2_202_66959_20131018_194733_inLine +BABEL_OP2_202_66959_20131018_194733_outLine +BABEL_OP2_202_66967_20130103_220521_inLine +BABEL_OP2_202_66967_20130103_220521_outLine +BABEL_OP2_202_67373_20131004_205550_inLine +BABEL_OP2_202_67373_20131004_205550_outLine +BABEL_OP2_202_67794_20131003_192439_inLine +BABEL_OP2_202_67794_20131003_192439_outLine +BABEL_OP2_202_69090_20131127_230541_inLine +BABEL_OP2_202_69090_20131127_230541_outLine +BABEL_OP2_202_69972_20140129_230607_inLine +BABEL_OP2_202_69972_20140129_230607_outLine +BABEL_OP2_202_71282_20131030_163454_inLine +BABEL_OP2_202_71282_20131030_163454_outLine +BABEL_OP2_202_71704_20130109_185345_inLine +BABEL_OP2_202_71704_20130109_185345_outLine +BABEL_OP2_202_73072_20130105_235040_inLine +BABEL_OP2_202_73072_20130105_235040_outLine +BABEL_OP2_202_74111_20131018_223020_inLine +BABEL_OP2_202_74111_20131018_223020_outLine +BABEL_OP2_202_74641_20130927_171309_inLine +BABEL_OP2_202_74641_20130927_171309_outLine +BABEL_OP2_202_76773_20131004_211703_inLine +BABEL_OP2_202_76773_20131004_211703_outLine +BABEL_OP2_202_83062_20131129_191922_inLine +BABEL_OP2_202_83062_20131129_191922_outLine +BABEL_OP2_202_84327_20130730_193322_inLine +BABEL_OP2_202_84327_20130730_193322_outLine +BABEL_OP2_202_87545_20131012_025318_inLine +BABEL_OP2_202_87545_20131012_025318_outLine +BABEL_OP2_202_89718_20131203_002623_inLine +BABEL_OP2_202_89718_20131203_002623_outLine +BABEL_OP2_202_90935_20130226_232117_inLine +BABEL_OP2_202_90935_20130226_232117_outLine +BABEL_OP2_202_91930_20131009_204054_inLine +BABEL_OP2_202_91930_20131009_204054_outLine +BABEL_OP2_202_91971_20131203_013031_inLine +BABEL_OP2_202_91971_20131203_013031_outLine +BABEL_OP2_202_92698_20130930_170131_inLine +BABEL_OP2_202_92698_20130930_170131_outLine +BABEL_OP2_202_92698_20130930_171329_inLine +BABEL_OP2_202_92698_20130930_171329_outLine +BABEL_OP2_202_93861_20130417_181331_inLine +BABEL_OP2_202_93861_20130417_181331_outLine +BABEL_OP2_202_93861_20130417_184517_inLine +BABEL_OP2_202_93861_20130417_184517_outLine +BABEL_OP2_202_93946_20131018_213959_inLine +BABEL_OP2_202_93946_20131018_213959_outLine +BABEL_OP2_202_94166_20131101_013342_inLine +BABEL_OP2_202_94166_20131101_013342_outLine +BABEL_OP2_202_94212_20131129_213734_inLine +BABEL_OP2_202_94212_20131129_213734_outLine +BABEL_OP2_202_95966_20130216_005201_inLine +BABEL_OP2_202_95966_20130216_005201_outLine +BABEL_OP2_202_95966_20130216_010600_inLine +BABEL_OP2_202_95966_20130216_010600_outLine +BABEL_OP2_202_96041_20140317_233707_inLine +BABEL_OP2_202_96041_20140317_233707_outLine +BABEL_OP2_202_96059_20131012_001057_inLine +BABEL_OP2_202_96059_20131012_001057_outLine +BABEL_OP2_202_96205_20130213_183412_inLine +BABEL_OP2_202_96205_20130213_183412_outLine +BABEL_OP2_202_96934_20131001_205011_inLine +BABEL_OP2_202_96934_20131001_205011_outLine +BABEL_OP2_202_97097_20131010_022340_inLine +BABEL_OP2_202_97097_20131010_022340_outLine +BABEL_OP2_202_97448_20131202_225423_inLine +BABEL_OP2_202_97448_20131202_225423_outLine +BABEL_OP2_202_98255_20131130_002114_inLine +BABEL_OP2_202_98255_20131130_002114_outLine +BABEL_OP2_202_98888_20130214_225058_inLine +BABEL_OP2_202_98888_20130214_225058_outLine +BABEL_OP2_202_99487_20130109_013911_inLine +BABEL_OP2_202_99487_20130109_013911_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/sub-train.list b/egs/babel/s5d/conf/lists/202-swahili/sub-train.list new file mode 100644 index 00000000000..ec4d25cd88a --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/sub-train.list @@ -0,0 +1,128 @@ +BABEL_OP2_202_11859_20140206_193130_inLine +BABEL_OP2_202_11859_20140206_193130_outLine +BABEL_OP2_202_14719_20131126_223914_inLine +BABEL_OP2_202_14719_20131126_223914_outLine +BABEL_OP2_202_16838_20140204_225359_inLine +BABEL_OP2_202_16838_20140204_225359_outLine +BABEL_OP2_202_21206_20140207_213800_inLine +BABEL_OP2_202_21206_20140207_213800_outLine +BABEL_OP2_202_24501_20140205_231355_inLine +BABEL_OP2_202_24501_20140205_231355_outLine +BABEL_OP2_202_27189_20131216_001758_inLine +BABEL_OP2_202_27189_20131216_001758_outLine +BABEL_OP2_202_28522_20130925_000938_inLine +BABEL_OP2_202_28522_20130925_000938_outLine +BABEL_OP2_202_28644_20140205_001525_inLine +BABEL_OP2_202_28644_20140205_001525_outLine +BABEL_OP2_202_30280_20140220_001618_inLine +BABEL_OP2_202_30280_20140220_001618_outLine +BABEL_OP2_202_30432_20130502_210534_inLine +BABEL_OP2_202_30432_20130502_210534_outLine +BABEL_OP2_202_30432_20130503_175016_inLine +BABEL_OP2_202_30432_20130503_175016_outLine +BABEL_OP2_202_30645_20130108_200114_inLine +BABEL_OP2_202_30645_20130108_200114_outLine +BABEL_OP2_202_32837_20131101_203319_inLine +BABEL_OP2_202_32837_20131101_203319_outLine +BABEL_OP2_202_35609_20140220_193923_inLine +BABEL_OP2_202_35609_20140220_193923_outLine +BABEL_OP2_202_38963_20131215_232437_inLine +BABEL_OP2_202_38963_20131215_232437_outLine +BABEL_OP2_202_43395_20140220_223151_inLine +BABEL_OP2_202_43395_20140220_223151_outLine +BABEL_OP2_202_46770_20140223_234733_inLine +BABEL_OP2_202_46770_20140223_234733_outLine +BABEL_OP2_202_48243_20131009_224543_inLine +BABEL_OP2_202_48243_20131009_224543_outLine +BABEL_OP2_202_48422_20140225_220708_inLine +BABEL_OP2_202_48422_20140225_220708_outLine +BABEL_OP2_202_51156_20131216_015429_inLine +BABEL_OP2_202_51156_20131216_015429_outLine +BABEL_OP2_202_51484_20140123_220444_inLine +BABEL_OP2_202_51484_20140123_220444_outLine +BABEL_OP2_202_51611_20130109_194912_inLine +BABEL_OP2_202_51611_20130109_194912_outLine +BABEL_OP2_202_53063_20140124_000041_inLine +BABEL_OP2_202_53063_20140124_000041_outLine +BABEL_OP2_202_54074_20140123_205035_inLine +BABEL_OP2_202_54074_20140123_205035_outLine +BABEL_OP2_202_54841_20140122_195114_inLine +BABEL_OP2_202_54841_20140122_195114_outLine +BABEL_OP2_202_54841_20140122_200157_inLine +BABEL_OP2_202_54841_20140122_200157_outLine +BABEL_OP2_202_55259_20130930_023554_inLine +BABEL_OP2_202_55259_20130930_023554_outLine +BABEL_OP2_202_55349_20131010_002325_inLine +BABEL_OP2_202_55349_20131010_002325_outLine +BABEL_OP2_202_56306_20140122_204419_inLine +BABEL_OP2_202_56306_20140122_204419_outLine +BABEL_OP2_202_56465_20140122_194039_inLine +BABEL_OP2_202_56465_20140122_194039_outLine +BABEL_OP2_202_57782_20140129_231340_inLine +BABEL_OP2_202_57782_20140129_231340_outLine +BABEL_OP2_202_59720_20130930_032445_inLine +BABEL_OP2_202_59720_20130930_032445_outLine +BABEL_OP2_202_60477_20140201_200420_inLine +BABEL_OP2_202_60477_20140201_200420_outLine +BABEL_OP2_202_60778_20131201_233949_inLine +BABEL_OP2_202_60778_20131201_233949_outLine +BABEL_OP2_202_61040_20140227_003457_inLine +BABEL_OP2_202_61040_20140227_003457_outLine +BABEL_OP2_202_63670_20140130_231139_inLine +BABEL_OP2_202_63670_20140130_231139_outLine +BABEL_OP2_202_65466_20131010_013521_inLine +BABEL_OP2_202_65466_20131010_013521_outLine +BABEL_OP2_202_66001_20130107_194345_inLine +BABEL_OP2_202_66001_20130107_194345_outLine +BABEL_OP2_202_66045_20130410_204151_inLine +BABEL_OP2_202_66045_20130410_204151_outLine +BABEL_OP2_202_66045_20130410_211501_inLine +BABEL_OP2_202_66045_20130410_211501_outLine +BABEL_OP2_202_67401_20130912_043928_inLine +BABEL_OP2_202_67401_20130912_043928_outLine +BABEL_OP2_202_67964_20140125_232737_inLine +BABEL_OP2_202_67964_20140125_232737_outLine +BABEL_OP2_202_68748_20130803_201133_inLine +BABEL_OP2_202_68748_20130803_201133_outLine +BABEL_OP2_202_71976_20131128_193641_inLine +BABEL_OP2_202_71976_20131128_193641_outLine +BABEL_OP2_202_74121_20130220_195721_inLine +BABEL_OP2_202_74121_20130220_195721_outLine +BABEL_OP2_202_74121_20130220_201735_inLine +BABEL_OP2_202_74121_20130220_201735_outLine +BABEL_OP2_202_75064_20140226_232411_inLine +BABEL_OP2_202_75064_20140226_232411_outLine +BABEL_OP2_202_75261_20140311_002541_inLine +BABEL_OP2_202_75261_20140311_002541_outLine +BABEL_OP2_202_75812_20131127_193133_inLine +BABEL_OP2_202_75812_20131127_193133_outLine +BABEL_OP2_202_76499_20130412_201900_inLine +BABEL_OP2_202_76499_20130412_201900_outLine +BABEL_OP2_202_77033_20140312_034901_inLine +BABEL_OP2_202_77033_20140312_034901_outLine +BABEL_OP2_202_79045_20140310_212332_inLine +BABEL_OP2_202_79045_20140310_212332_outLine +BABEL_OP2_202_80306_20130928_232209_inLine +BABEL_OP2_202_80306_20130928_232209_outLine +BABEL_OP2_202_80989_20131016_213255_inLine +BABEL_OP2_202_80989_20131016_213255_outLine +BABEL_OP2_202_81622_20130218_232606_inLine +BABEL_OP2_202_81622_20130218_232606_outLine +BABEL_OP2_202_83625_20131130_222251_inLine +BABEL_OP2_202_83625_20131130_222251_outLine +BABEL_OP2_202_84194_20131130_024921_inLine +BABEL_OP2_202_84194_20131130_024921_outLine +BABEL_OP2_202_84408_20130306_184336_inLine +BABEL_OP2_202_84408_20130306_184336_outLine +BABEL_OP2_202_84768_20130107_194303_inLine +BABEL_OP2_202_84768_20130107_194303_outLine +BABEL_OP2_202_87305_20131016_225546_inLine +BABEL_OP2_202_87305_20131016_225546_outLine +BABEL_OP2_202_89695_20130215_224831_inLine +BABEL_OP2_202_89695_20130215_224831_outLine +BABEL_OP2_202_90740_20131120_195825_inLine +BABEL_OP2_202_90740_20131120_195825_outLine +BABEL_OP2_202_91478_20131127_031740_inLine +BABEL_OP2_202_91478_20131127_031740_outLine +BABEL_OP2_202_95231_20131128_211454_inLine +BABEL_OP2_202_95231_20131128_211454_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/202-swahili/sub-train.untranscribed.list new file mode 100644 index 00000000000..6f18d1b31d9 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/sub-train.untranscribed.list @@ -0,0 +1,397 @@ +BABEL_OP2_202_10002_20131130_011225_inLine +BABEL_OP2_202_10002_20131130_011225_outLine +BABEL_OP2_202_10184_20130214_193710_inLine +BABEL_OP2_202_10184_20130214_193710_outLine +BABEL_OP2_202_10464_20131203_215404_inLine +BABEL_OP2_202_10464_20131203_215404_outLine +BABEL_OP2_202_10647_20131009_183755_inLine +BABEL_OP2_202_10647_20131009_183755_outLine +BABEL_OP2_202_10966_20131219_004736_inLine +BABEL_OP2_202_10966_20131219_004736_outLine +BABEL_OP2_202_11310_20131220_011737_inLine +BABEL_OP2_202_11310_20131220_011737_outLine +BABEL_OP2_202_11352_20131120_175331_inLine +BABEL_OP2_202_11352_20131120_175331_outLine +BABEL_OP2_202_11528_20131126_194053_inLine +BABEL_OP2_202_11528_20131126_194053_outLine +BABEL_OP2_202_12846_20140207_070059_inLine +BABEL_OP2_202_12846_20140207_070059_outLine +BABEL_OP2_202_12846_20140207_072228_inLine +BABEL_OP2_202_12846_20140207_072228_outLine +BABEL_OP2_202_13126_20131010_154341_inLine +BABEL_OP2_202_13126_20131010_154341_outLine +BABEL_OP2_202_13189_20131218_191846_inLine +BABEL_OP2_202_13189_20131218_191846_outLine +BABEL_OP2_202_13490_20130410_232045_inLine +BABEL_OP2_202_13490_20130410_232045_outLine +BABEL_OP2_202_13561_20130927_174413_inLine +BABEL_OP2_202_13561_20130927_174413_outLine +BABEL_OP2_202_14929_20130215_230011_inLine +BABEL_OP2_202_14929_20130215_230011_outLine +BABEL_OP2_202_15024_20130211_211646_inLine +BABEL_OP2_202_15024_20130211_211646_outLine +BABEL_OP2_202_15281_20131017_173858_inLine +BABEL_OP2_202_15281_20131017_173858_outLine +BABEL_OP2_202_16149_20130108_192505_inLine +BABEL_OP2_202_16149_20130108_192505_outLine +BABEL_OP2_202_16839_20131218_202752_inLine +BABEL_OP2_202_16839_20131218_202752_outLine +BABEL_OP2_202_16886_20130219_213720_inLine +BABEL_OP2_202_16886_20130219_213720_outLine +BABEL_OP2_202_17472_20131128_215323_inLine +BABEL_OP2_202_17472_20131128_215323_outLine +BABEL_OP2_202_18242_20131203_010326_inLine +BABEL_OP2_202_18242_20131203_010326_outLine +BABEL_OP2_202_18490_20140109_200346_inLine +BABEL_OP2_202_18490_20140109_200346_outLine +BABEL_OP2_202_18566_20140209_233124_inLine +BABEL_OP2_202_18566_20140209_233124_outLine +BABEL_OP2_202_19589_20131016_205832_inLine +BABEL_OP2_202_19589_20131016_205832_outLine +BABEL_OP2_202_19877_20131011_005357_inLine +BABEL_OP2_202_19877_20131011_005357_outLine +BABEL_OP2_202_21624_20131009_200818_inLine +BABEL_OP2_202_21624_20131009_200818_outLine +BABEL_OP2_202_21807_20130926_194526_inLine +BABEL_OP2_202_21807_20130926_194526_outLine +BABEL_OP2_202_22643_20131126_221057_inLine +BABEL_OP2_202_22643_20131126_221057_outLine +BABEL_OP2_202_22918_20131031_201038_inLine +BABEL_OP2_202_22918_20131031_201038_outLine +BABEL_OP2_202_23092_20131018_200124_inLine +BABEL_OP2_202_23092_20131018_200124_outLine +BABEL_OP2_202_23153_20130220_213017_inLine +BABEL_OP2_202_23153_20130220_213017_outLine +BABEL_OP2_202_23190_20130308_215320_inLine +BABEL_OP2_202_23190_20130308_215320_outLine +BABEL_OP2_202_23195_20140205_001534_inLine +BABEL_OP2_202_23195_20140205_001534_outLine +BABEL_OP2_202_24010_20140204_221739_inLine +BABEL_OP2_202_24010_20140204_221739_outLine +BABEL_OP2_202_24241_20140218_231626_inLine +BABEL_OP2_202_24241_20140218_231626_outLine +BABEL_OP2_202_24779_20140205_002210_inLine +BABEL_OP2_202_24779_20140205_002210_outLine +BABEL_OP2_202_24982_20131219_225432_inLine +BABEL_OP2_202_24982_20131219_225432_outLine +BABEL_OP2_202_25698_20140208_030726_inLine +BABEL_OP2_202_25698_20140208_030726_outLine +BABEL_OP2_202_25719_20140217_232330_inLine +BABEL_OP2_202_25719_20140217_232330_outLine +BABEL_OP2_202_26507_20131030_200210_inLine +BABEL_OP2_202_26507_20131030_200210_outLine +BABEL_OP2_202_27042_20140209_012004_inLine +BABEL_OP2_202_27042_20140209_012004_outLine +BABEL_OP2_202_27367_20131127_225822_inLine +BABEL_OP2_202_27367_20131127_225822_outLine +BABEL_OP2_202_28303_20130930_225539_inLine +BABEL_OP2_202_28303_20130930_225539_outLine +BABEL_OP2_202_28595_20140219_174344_inLine +BABEL_OP2_202_28595_20140219_174344_outLine +BABEL_OP2_202_29439_20131009_210851_inLine +BABEL_OP2_202_29439_20131009_210851_outLine +BABEL_OP2_202_29482_20140204_232809_inLine +BABEL_OP2_202_29482_20140204_232809_outLine +BABEL_OP2_202_29482_20140204_234658_inLine +BABEL_OP2_202_29482_20140204_234658_outLine +BABEL_OP2_202_30098_20140210_002512_inLine +BABEL_OP2_202_30098_20140210_002512_outLine +BABEL_OP2_202_30461_20140219_222004_inLine +BABEL_OP2_202_30461_20140219_222004_outLine +BABEL_OP2_202_31184_20130213_182811_inLine +BABEL_OP2_202_31184_20130213_182811_outLine +BABEL_OP2_202_31184_20130213_183600_inLine +BABEL_OP2_202_31184_20130213_183600_outLine +BABEL_OP2_202_31919_20131010_181805_inLine +BABEL_OP2_202_31919_20131010_181805_outLine +BABEL_OP2_202_32998_20131221_004354_inLine +BABEL_OP2_202_32998_20131221_004354_outLine +BABEL_OP2_202_33424_20131012_231429_inLine +BABEL_OP2_202_33424_20131012_231429_outLine +BABEL_OP2_202_33497_20130429_202650_inLine +BABEL_OP2_202_33497_20130429_202650_outLine +BABEL_OP2_202_33497_20130429_204336_inLine +BABEL_OP2_202_33497_20130429_204336_outLine +BABEL_OP2_202_33913_20131218_215213_inLine +BABEL_OP2_202_33913_20131218_215213_outLine +BABEL_OP2_202_34064_20131220_013348_inLine +BABEL_OP2_202_34064_20131220_013348_outLine +BABEL_OP2_202_34410_20131119_191059_inLine +BABEL_OP2_202_34410_20131119_191059_outLine +BABEL_OP2_202_34486_20131016_193212_inLine +BABEL_OP2_202_34486_20131016_193212_outLine +BABEL_OP2_202_34586_20131219_235945_inLine +BABEL_OP2_202_34586_20131219_235945_outLine +BABEL_OP2_202_34826_20131220_013036_inLine +BABEL_OP2_202_34826_20131220_013036_outLine +BABEL_OP2_202_34860_20131202_205952_inLine +BABEL_OP2_202_34860_20131202_205952_outLine +BABEL_OP2_202_35139_20131003_221114_inLine +BABEL_OP2_202_35139_20131003_221114_outLine +BABEL_OP2_202_36642_20140114_203343_inLine +BABEL_OP2_202_36642_20140114_203343_outLine +BABEL_OP2_202_36894_20121228_180620_inLine +BABEL_OP2_202_36894_20121228_180620_outLine +BABEL_OP2_202_37285_20130730_214031_inLine +BABEL_OP2_202_37285_20130730_214031_outLine +BABEL_OP2_202_39006_20140115_012801_inLine +BABEL_OP2_202_39006_20140115_012801_outLine +BABEL_OP2_202_40557_20131018_015314_inLine +BABEL_OP2_202_40557_20131018_015314_outLine +BABEL_OP2_202_40565_20130725_183219_inLine +BABEL_OP2_202_40565_20130725_183219_outLine +BABEL_OP2_202_41542_20131029_200308_inLine +BABEL_OP2_202_41542_20131029_200308_outLine +BABEL_OP2_202_41598_20140225_031321_inLine +BABEL_OP2_202_41598_20140225_031321_outLine +BABEL_OP2_202_41720_20131129_192607_inLine +BABEL_OP2_202_41720_20131129_192607_outLine +BABEL_OP2_202_41720_20131129_194102_inLine +BABEL_OP2_202_41720_20131129_194102_outLine +BABEL_OP2_202_42309_20140221_210458_inLine +BABEL_OP2_202_42309_20140221_210458_outLine +BABEL_OP2_202_42434_20130930_235132_inLine +BABEL_OP2_202_42434_20130930_235132_outLine +BABEL_OP2_202_42434_20131001_001757_inLine +BABEL_OP2_202_42434_20131001_001757_outLine +BABEL_OP2_202_42991_20130801_010705_inLine +BABEL_OP2_202_42991_20130801_010705_outLine +BABEL_OP2_202_43794_20131015_230636_inLine +BABEL_OP2_202_43794_20131015_230636_outLine +BABEL_OP2_202_46041_20131018_224852_inLine +BABEL_OP2_202_46041_20131018_224852_outLine +BABEL_OP2_202_46261_20130213_203255_inLine +BABEL_OP2_202_46261_20130213_203255_outLine +BABEL_OP2_202_46550_20131003_205134_inLine +BABEL_OP2_202_46550_20131003_205134_outLine +BABEL_OP2_202_46688_20130108_003601_inLine +BABEL_OP2_202_46688_20130108_003601_outLine +BABEL_OP2_202_46757_20130726_172556_inLine +BABEL_OP2_202_46757_20130726_172556_outLine +BABEL_OP2_202_46976_20130214_203921_inLine +BABEL_OP2_202_46976_20130214_203921_outLine +BABEL_OP2_202_47186_20131101_211007_inLine +BABEL_OP2_202_47186_20131101_211007_outLine +BABEL_OP2_202_47823_20131017_214917_inLine +BABEL_OP2_202_47823_20131017_214917_outLine +BABEL_OP2_202_47866_20131010_061153_inLine +BABEL_OP2_202_47866_20131010_061153_outLine +BABEL_OP2_202_48299_20131130_233044_inLine +BABEL_OP2_202_48399_20131005_030007_outLine +BABEL_OP2_202_49437_20131031_193108_inLine +BABEL_OP2_202_49437_20131031_193108_outLine +BABEL_OP2_202_49630_20130731_234235_inLine +BABEL_OP2_202_49630_20130731_234235_outLine +BABEL_OP2_202_49739_20131127_171846_inLine +BABEL_OP2_202_49739_20131127_171846_outLine +BABEL_OP2_202_49768_20131001_222725_inLine +BABEL_OP2_202_49768_20131001_222725_outLine +BABEL_OP2_202_49907_20131003_213256_inLine +BABEL_OP2_202_49907_20131003_213256_outLine +BABEL_OP2_202_50186_20131216_004336_inLine +BABEL_OP2_202_50186_20131216_004336_outLine +BABEL_OP2_202_52246_20140125_011930_inLine +BABEL_OP2_202_52246_20140125_011930_outLine +BABEL_OP2_202_52272_20130103_193203_inLine +BABEL_OP2_202_52272_20130103_193203_outLine +BABEL_OP2_202_52422_20140123_223352_inLine +BABEL_OP2_202_52422_20140123_223352_outLine +BABEL_OP2_202_53758_20131203_003849_inLine +BABEL_OP2_202_53758_20131203_003849_outLine +BABEL_OP2_202_54066_20140121_223255_inLine +BABEL_OP2_202_54066_20140121_223255_outLine +BABEL_OP2_202_54530_20140125_000633_inLine +BABEL_OP2_202_54530_20140125_000633_outLine +BABEL_OP2_202_54634_20140121_201449_inLine +BABEL_OP2_202_54634_20140121_201449_outLine +BABEL_OP2_202_55381_20140123_030341_inLine +BABEL_OP2_202_55381_20140123_030341_outLine +BABEL_OP2_202_56023_20140124_213010_inLine +BABEL_OP2_202_56023_20140124_213010_outLine +BABEL_OP2_202_56331_20140124_212336_inLine +BABEL_OP2_202_56331_20140124_212336_outLine +BABEL_OP2_202_56606_20140123_202633_inLine +BABEL_OP2_202_56606_20140123_202633_outLine +BABEL_OP2_202_56951_20131130_192609_inLine +BABEL_OP2_202_56951_20131130_192609_outLine +BABEL_OP2_202_57233_20131120_235941_inLine +BABEL_OP2_202_57233_20131120_235941_outLine +BABEL_OP2_202_58103_20130930_045229_inLine +BABEL_OP2_202_58103_20130930_045229_outLine +BABEL_OP2_202_58107_20130927_165258_inLine +BABEL_OP2_202_58107_20130927_165258_outLine +BABEL_OP2_202_58489_20140131_214025_inLine +BABEL_OP2_202_58489_20140131_214025_outLine +BABEL_OP2_202_58821_20130730_183731_inLine +BABEL_OP2_202_58821_20130730_183731_outLine +BABEL_OP2_202_59028_20140131_212747_inLine +BABEL_OP2_202_59028_20140131_212747_outLine +BABEL_OP2_202_59402_20140201_222141_inLine +BABEL_OP2_202_59402_20140201_222141_outLine +BABEL_OP2_202_59402_20140201_222847_inLine +BABEL_OP2_202_59402_20140201_222847_outLine +BABEL_OP2_202_60474_20140120_222223_inLine +BABEL_OP2_202_60474_20140120_222223_outLine +BABEL_OP2_202_61438_20131129_231819_inLine +BABEL_OP2_202_61438_20131129_231819_outLine +BABEL_OP2_202_61438_20131129_233030_inLine +BABEL_OP2_202_61438_20131129_233030_outLine +BABEL_OP2_202_61873_20130111_181915_inLine +BABEL_OP2_202_61873_20130111_181915_outLine +BABEL_OP2_202_62047_20140129_020943_inLine +BABEL_OP2_202_62047_20140129_020943_outLine +BABEL_OP2_202_62360_20131014_211636_inLine +BABEL_OP2_202_62360_20131014_211636_outLine +BABEL_OP2_202_62714_20131101_225706_inLine +BABEL_OP2_202_62714_20131101_225706_outLine +BABEL_OP2_202_63490_20131203_234940_inLine +BABEL_OP2_202_63490_20131203_234940_outLine +BABEL_OP2_202_63920_20131215_021712_inLine +BABEL_OP2_202_63920_20131215_021712_outLine +BABEL_OP2_202_64688_20140126_004157_inLine +BABEL_OP2_202_64688_20140126_004157_outLine +BABEL_OP2_202_65048_20140128_174534_inLine +BABEL_OP2_202_65048_20140128_174534_outLine +BABEL_OP2_202_65336_20140131_001312_inLine +BABEL_OP2_202_65336_20140131_001312_outLine +BABEL_OP2_202_65913_20140127_181419_inLine +BABEL_OP2_202_65913_20140127_181419_outLine +BABEL_OP2_202_66305_20140126_020747_inLine +BABEL_OP2_202_66305_20140126_020747_outLine +BABEL_OP2_202_66641_20131127_183344_inLine +BABEL_OP2_202_66641_20131127_183344_outLine +BABEL_OP2_202_66916_20121229_203810_inLine +BABEL_OP2_202_66916_20121229_203810_outLine +BABEL_OP2_202_66916_20121229_211053_inLine +BABEL_OP2_202_66916_20121229_211053_outLine +BABEL_OP2_202_68289_20131128_012756_inLine +BABEL_OP2_202_68289_20131128_012756_outLine +BABEL_OP2_202_68854_20131012_000134_inLine +BABEL_OP2_202_68854_20131012_000134_outLine +BABEL_OP2_202_69937_20140131_034019_inLine +BABEL_OP2_202_69937_20140131_034019_outLine +BABEL_OP2_202_71566_20140311_213752_inLine +BABEL_OP2_202_71566_20140311_213752_outLine +BABEL_OP2_202_72324_20130423_161716_inLine +BABEL_OP2_202_72324_20130423_161716_outLine +BABEL_OP2_202_73005_20131012_011254_inLine +BABEL_OP2_202_73005_20131012_011254_outLine +BABEL_OP2_202_73022_20140226_210050_inLine +BABEL_OP2_202_73022_20140226_210050_outLine +BABEL_OP2_202_73518_20140304_001655_inLine +BABEL_OP2_202_73518_20140304_001655_outLine +BABEL_OP2_202_74667_20130227_180657_inLine +BABEL_OP2_202_74667_20130227_180657_outLine +BABEL_OP2_202_75930_20131202_213433_inLine +BABEL_OP2_202_75930_20131202_213433_outLine +BABEL_OP2_202_76126_20131031_183234_inLine +BABEL_OP2_202_76126_20131031_183234_outLine +BABEL_OP2_202_76444_20131018_000013_inLine +BABEL_OP2_202_76444_20131018_000013_outLine +BABEL_OP2_202_77146_20121229_203404_inLine +BABEL_OP2_202_77146_20121229_203404_outLine +BABEL_OP2_202_78482_20140311_014827_inLine +BABEL_OP2_202_78482_20140311_014827_outLine +BABEL_OP2_202_79367_20130107_224252_inLine +BABEL_OP2_202_79367_20130107_224252_outLine +BABEL_OP2_202_79973_20131130_184708_inLine +BABEL_OP2_202_79973_20131130_184708_outLine +BABEL_OP2_202_79995_20140227_030446_inLine +BABEL_OP2_202_79995_20140227_030446_outLine +BABEL_OP2_202_80134_20131202_174756_inLine +BABEL_OP2_202_80134_20131202_174756_outLine +BABEL_OP2_202_80383_20131207_013517_inLine +BABEL_OP2_202_80383_20131207_013517_outLine +BABEL_OP2_202_81149_20131010_010411_inLine +BABEL_OP2_202_81149_20131010_010411_outLine +BABEL_OP2_202_82123_20131130_004859_inLine +BABEL_OP2_202_82123_20131130_004859_outLine +BABEL_OP2_202_82138_20130415_225929_inLine +BABEL_OP2_202_82138_20130415_225929_outLine +BABEL_OP2_202_82425_20130108_181846_inLine +BABEL_OP2_202_82425_20130108_181846_outLine +BABEL_OP2_202_82473_20131004_202625_inLine +BABEL_OP2_202_82473_20131004_202625_outLine +BABEL_OP2_202_82496_20130105_232830_inLine +BABEL_OP2_202_82496_20130105_232830_outLine +BABEL_OP2_202_82622_20131007_171417_inLine +BABEL_OP2_202_82622_20131007_171417_outLine +BABEL_OP2_202_83609_20131128_022206_inLine +BABEL_OP2_202_83609_20131128_022206_outLine +BABEL_OP2_202_83651_20131003_212624_inLine +BABEL_OP2_202_83651_20131003_212624_outLine +BABEL_OP2_202_84077_20131130_195755_inLine +BABEL_OP2_202_84077_20131130_195755_outLine +BABEL_OP2_202_84466_20131010_040505_inLine +BABEL_OP2_202_84466_20131010_040505_outLine +BABEL_OP2_202_84469_20131018_212735_inLine +BABEL_OP2_202_84469_20131018_212735_outLine +BABEL_OP2_202_86156_20131030_001706_inLine +BABEL_OP2_202_86156_20131030_001706_outLine +BABEL_OP2_202_87179_20140320_165556_inLine +BABEL_OP2_202_87179_20140320_165556_outLine +BABEL_OP2_202_88776_20130107_192204_inLine +BABEL_OP2_202_88776_20130107_192204_outLine +BABEL_OP2_202_88776_20130107_195623_inLine +BABEL_OP2_202_88776_20130107_195623_outLine +BABEL_OP2_202_88783_20131018_191706_inLine +BABEL_OP2_202_88783_20131018_191706_outLine +BABEL_OP2_202_88865_20140319_212413_inLine +BABEL_OP2_202_88865_20140319_212413_outLine +BABEL_OP2_202_89665_20140320_004314_inLine +BABEL_OP2_202_89665_20140320_004314_outLine +BABEL_OP2_202_90347_20130912_005052_inLine +BABEL_OP2_202_90347_20130912_005052_outLine +BABEL_OP2_202_90572_20131009_190400_inLine +BABEL_OP2_202_90572_20131009_190400_outLine +BABEL_OP2_202_90737_20130213_201303_inLine +BABEL_OP2_202_90737_20130213_201303_outLine +BABEL_OP2_202_90739_20130222_223815_inLine +BABEL_OP2_202_90739_20130222_223815_outLine +BABEL_OP2_202_91080_20130429_213558_inLine +BABEL_OP2_202_91080_20130429_213558_outLine +BABEL_OP2_202_91891_20130803_000104_inLine +BABEL_OP2_202_91891_20130803_000104_outLine +BABEL_OP2_202_92065_20140109_204802_inLine +BABEL_OP2_202_92065_20140109_204802_outLine +BABEL_OP2_202_92440_20131203_195407_inLine +BABEL_OP2_202_92440_20131203_195407_outLine +BABEL_OP2_202_92440_20131203_200046_inLine +BABEL_OP2_202_92440_20131203_200046_outLine +BABEL_OP2_202_92527_20130225_184732_inLine +BABEL_OP2_202_92527_20130225_184732_outLine +BABEL_OP2_202_93153_20131003_212947_inLine +BABEL_OP2_202_93153_20131003_212947_outLine +BABEL_OP2_202_93153_20131003_213722_inLine +BABEL_OP2_202_93153_20131003_213722_outLine +BABEL_OP2_202_93222_20131126_211540_inLine +BABEL_OP2_202_93222_20131126_211540_outLine +BABEL_OP2_202_94333_20130105_202651_inLine +BABEL_OP2_202_94333_20130105_202651_outLine +BABEL_OP2_202_94449_20131011_205657_inLine +BABEL_OP2_202_94449_20131011_205657_outLine +BABEL_OP2_202_94869_20121219_204921_inLine +BABEL_OP2_202_94869_20121219_204921_outLine +BABEL_OP2_202_95077_20140320_014923_inLine +BABEL_OP2_202_95077_20140320_014923_outLine +BABEL_OP2_202_96376_20131011_024111_inLine +BABEL_OP2_202_96376_20131011_024111_outLine +BABEL_OP2_202_96680_20131130_202936_inLine +BABEL_OP2_202_96680_20131130_202936_outLine +BABEL_OP2_202_96690_20130220_210217_inLine +BABEL_OP2_202_96690_20130220_210217_outLine +BABEL_OP2_202_96808_20131012_212254_inLine +BABEL_OP2_202_96808_20131012_212254_outLine +BABEL_OP2_202_97220_20140319_193818_inLine +BABEL_OP2_202_97220_20140319_193818_outLine +BABEL_OP2_202_97363_20131002_203133_inLine +BABEL_OP2_202_97363_20131002_203133_outLine +BABEL_OP2_202_97363_20131003_002739_inLine +BABEL_OP2_202_97363_20131003_002739_outLine +BABEL_OP2_202_97373_20130730_151855_inLine +BABEL_OP2_202_99401_20130108_001107_inLine +BABEL_OP2_202_99401_20130108_001107_outLine +BABEL_OP2_202_99594_20130220_222308_inLine +BABEL_OP2_202_99594_20130220_222308_outLine +BABEL_OP2_202_99883_20131120_212150_inLine +BABEL_OP2_202_99883_20131120_212150_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/training.list b/egs/babel/s5d/conf/lists/202-swahili/training.list new file mode 100644 index 00000000000..1f0477cdd00 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/training.list @@ -0,0 +1,525 @@ +BABEL_OP2_202_10002_20131130_011225_inLine +BABEL_OP2_202_10002_20131130_011225_outLine +BABEL_OP2_202_10184_20130214_193710_inLine +BABEL_OP2_202_10184_20130214_193710_outLine +BABEL_OP2_202_10464_20131203_215404_inLine +BABEL_OP2_202_10464_20131203_215404_outLine +BABEL_OP2_202_10647_20131009_183755_inLine +BABEL_OP2_202_10647_20131009_183755_outLine +BABEL_OP2_202_10966_20131219_004736_inLine +BABEL_OP2_202_10966_20131219_004736_outLine +BABEL_OP2_202_11310_20131220_011737_inLine +BABEL_OP2_202_11310_20131220_011737_outLine +BABEL_OP2_202_11352_20131120_175331_inLine +BABEL_OP2_202_11352_20131120_175331_outLine +BABEL_OP2_202_11528_20131126_194053_inLine +BABEL_OP2_202_11528_20131126_194053_outLine +BABEL_OP2_202_11859_20140206_193130_inLine +BABEL_OP2_202_11859_20140206_193130_outLine +BABEL_OP2_202_12846_20140207_070059_inLine +BABEL_OP2_202_12846_20140207_070059_outLine +BABEL_OP2_202_12846_20140207_072228_inLine +BABEL_OP2_202_12846_20140207_072228_outLine +BABEL_OP2_202_13126_20131010_154341_inLine +BABEL_OP2_202_13126_20131010_154341_outLine +BABEL_OP2_202_13189_20131218_191846_inLine +BABEL_OP2_202_13189_20131218_191846_outLine +BABEL_OP2_202_13490_20130410_232045_inLine +BABEL_OP2_202_13490_20130410_232045_outLine +BABEL_OP2_202_13561_20130927_174413_inLine +BABEL_OP2_202_13561_20130927_174413_outLine +BABEL_OP2_202_14719_20131126_223914_inLine +BABEL_OP2_202_14719_20131126_223914_outLine +BABEL_OP2_202_14929_20130215_230011_inLine +BABEL_OP2_202_14929_20130215_230011_outLine +BABEL_OP2_202_15024_20130211_211646_inLine +BABEL_OP2_202_15024_20130211_211646_outLine +BABEL_OP2_202_15281_20131017_173858_inLine +BABEL_OP2_202_15281_20131017_173858_outLine +BABEL_OP2_202_16149_20130108_192505_inLine +BABEL_OP2_202_16149_20130108_192505_outLine +BABEL_OP2_202_16838_20140204_225359_inLine +BABEL_OP2_202_16838_20140204_225359_outLine +BABEL_OP2_202_16839_20131218_202752_inLine +BABEL_OP2_202_16839_20131218_202752_outLine +BABEL_OP2_202_16886_20130219_213720_inLine +BABEL_OP2_202_16886_20130219_213720_outLine +BABEL_OP2_202_17472_20131128_215323_inLine +BABEL_OP2_202_17472_20131128_215323_outLine +BABEL_OP2_202_18242_20131203_010326_inLine +BABEL_OP2_202_18242_20131203_010326_outLine +BABEL_OP2_202_18490_20140109_200346_inLine +BABEL_OP2_202_18490_20140109_200346_outLine +BABEL_OP2_202_18566_20140209_233124_inLine +BABEL_OP2_202_18566_20140209_233124_outLine +BABEL_OP2_202_19589_20131016_205832_inLine +BABEL_OP2_202_19589_20131016_205832_outLine +BABEL_OP2_202_19877_20131011_005357_inLine +BABEL_OP2_202_19877_20131011_005357_outLine +BABEL_OP2_202_21206_20140207_213800_inLine +BABEL_OP2_202_21206_20140207_213800_outLine +BABEL_OP2_202_21624_20131009_200818_inLine +BABEL_OP2_202_21624_20131009_200818_outLine +BABEL_OP2_202_21807_20130926_194526_inLine +BABEL_OP2_202_21807_20130926_194526_outLine +BABEL_OP2_202_22643_20131126_221057_inLine +BABEL_OP2_202_22643_20131126_221057_outLine +BABEL_OP2_202_22918_20131031_201038_inLine +BABEL_OP2_202_22918_20131031_201038_outLine +BABEL_OP2_202_23092_20131018_200124_inLine +BABEL_OP2_202_23092_20131018_200124_outLine +BABEL_OP2_202_23153_20130220_213017_inLine +BABEL_OP2_202_23153_20130220_213017_outLine +BABEL_OP2_202_23190_20130308_215320_inLine +BABEL_OP2_202_23190_20130308_215320_outLine +BABEL_OP2_202_23195_20140205_001534_inLine +BABEL_OP2_202_23195_20140205_001534_outLine +BABEL_OP2_202_24010_20140204_221739_inLine +BABEL_OP2_202_24010_20140204_221739_outLine +BABEL_OP2_202_24241_20140218_231626_inLine +BABEL_OP2_202_24241_20140218_231626_outLine +BABEL_OP2_202_24501_20140205_231355_inLine +BABEL_OP2_202_24501_20140205_231355_outLine +BABEL_OP2_202_24779_20140205_002210_inLine +BABEL_OP2_202_24779_20140205_002210_outLine +BABEL_OP2_202_24982_20131219_225432_inLine +BABEL_OP2_202_24982_20131219_225432_outLine +BABEL_OP2_202_25698_20140208_030726_inLine +BABEL_OP2_202_25698_20140208_030726_outLine +BABEL_OP2_202_25719_20140217_232330_inLine +BABEL_OP2_202_25719_20140217_232330_outLine +BABEL_OP2_202_26507_20131030_200210_inLine +BABEL_OP2_202_26507_20131030_200210_outLine +BABEL_OP2_202_27042_20140209_012004_inLine +BABEL_OP2_202_27042_20140209_012004_outLine +BABEL_OP2_202_27189_20131216_001758_inLine +BABEL_OP2_202_27189_20131216_001758_outLine +BABEL_OP2_202_27367_20131127_225822_inLine +BABEL_OP2_202_27367_20131127_225822_outLine +BABEL_OP2_202_28303_20130930_225539_inLine +BABEL_OP2_202_28303_20130930_225539_outLine +BABEL_OP2_202_28522_20130925_000938_inLine +BABEL_OP2_202_28522_20130925_000938_outLine +BABEL_OP2_202_28595_20140219_174344_inLine +BABEL_OP2_202_28595_20140219_174344_outLine +BABEL_OP2_202_28644_20140205_001525_inLine +BABEL_OP2_202_28644_20140205_001525_outLine +BABEL_OP2_202_29439_20131009_210851_inLine +BABEL_OP2_202_29439_20131009_210851_outLine +BABEL_OP2_202_29482_20140204_232809_inLine +BABEL_OP2_202_29482_20140204_232809_outLine +BABEL_OP2_202_29482_20140204_234658_inLine +BABEL_OP2_202_29482_20140204_234658_outLine +BABEL_OP2_202_30098_20140210_002512_inLine +BABEL_OP2_202_30098_20140210_002512_outLine +BABEL_OP2_202_30280_20140220_001618_inLine +BABEL_OP2_202_30280_20140220_001618_outLine +BABEL_OP2_202_30432_20130502_210534_inLine +BABEL_OP2_202_30432_20130502_210534_outLine +BABEL_OP2_202_30432_20130503_175016_inLine +BABEL_OP2_202_30432_20130503_175016_outLine +BABEL_OP2_202_30461_20140219_222004_inLine +BABEL_OP2_202_30461_20140219_222004_outLine +BABEL_OP2_202_30645_20130108_200114_inLine +BABEL_OP2_202_30645_20130108_200114_outLine +BABEL_OP2_202_31184_20130213_182811_inLine +BABEL_OP2_202_31184_20130213_182811_outLine +BABEL_OP2_202_31184_20130213_183600_inLine +BABEL_OP2_202_31184_20130213_183600_outLine +BABEL_OP2_202_31919_20131010_181805_inLine +BABEL_OP2_202_31919_20131010_181805_outLine +BABEL_OP2_202_32837_20131101_203319_inLine +BABEL_OP2_202_32837_20131101_203319_outLine +BABEL_OP2_202_32998_20131221_004354_inLine +BABEL_OP2_202_32998_20131221_004354_outLine +BABEL_OP2_202_33424_20131012_231429_inLine +BABEL_OP2_202_33424_20131012_231429_outLine +BABEL_OP2_202_33497_20130429_202650_inLine +BABEL_OP2_202_33497_20130429_202650_outLine +BABEL_OP2_202_33497_20130429_204336_inLine +BABEL_OP2_202_33497_20130429_204336_outLine +BABEL_OP2_202_33913_20131218_215213_inLine +BABEL_OP2_202_33913_20131218_215213_outLine +BABEL_OP2_202_34064_20131220_013348_inLine +BABEL_OP2_202_34064_20131220_013348_outLine +BABEL_OP2_202_34410_20131119_191059_inLine +BABEL_OP2_202_34410_20131119_191059_outLine +BABEL_OP2_202_34486_20131016_193212_inLine +BABEL_OP2_202_34486_20131016_193212_outLine +BABEL_OP2_202_34586_20131219_235945_inLine +BABEL_OP2_202_34586_20131219_235945_outLine +BABEL_OP2_202_34826_20131220_013036_inLine +BABEL_OP2_202_34826_20131220_013036_outLine +BABEL_OP2_202_34860_20131202_205952_inLine +BABEL_OP2_202_34860_20131202_205952_outLine +BABEL_OP2_202_35139_20131003_221114_inLine +BABEL_OP2_202_35139_20131003_221114_outLine +BABEL_OP2_202_35609_20140220_193923_inLine +BABEL_OP2_202_35609_20140220_193923_outLine +BABEL_OP2_202_36642_20140114_203343_inLine +BABEL_OP2_202_36642_20140114_203343_outLine +BABEL_OP2_202_36894_20121228_180620_inLine +BABEL_OP2_202_36894_20121228_180620_outLine +BABEL_OP2_202_37285_20130730_214031_inLine +BABEL_OP2_202_37285_20130730_214031_outLine +BABEL_OP2_202_38963_20131215_232437_inLine +BABEL_OP2_202_38963_20131215_232437_outLine +BABEL_OP2_202_39006_20140115_012801_inLine +BABEL_OP2_202_39006_20140115_012801_outLine +BABEL_OP2_202_40557_20131018_015314_inLine +BABEL_OP2_202_40557_20131018_015314_outLine +BABEL_OP2_202_40565_20130725_183219_inLine +BABEL_OP2_202_40565_20130725_183219_outLine +BABEL_OP2_202_41542_20131029_200308_inLine +BABEL_OP2_202_41542_20131029_200308_outLine +BABEL_OP2_202_41598_20140225_031321_inLine +BABEL_OP2_202_41598_20140225_031321_outLine +BABEL_OP2_202_41720_20131129_192607_inLine +BABEL_OP2_202_41720_20131129_192607_outLine +BABEL_OP2_202_41720_20131129_194102_inLine +BABEL_OP2_202_41720_20131129_194102_outLine +BABEL_OP2_202_42309_20140221_210458_inLine +BABEL_OP2_202_42309_20140221_210458_outLine +BABEL_OP2_202_42434_20130930_235132_inLine +BABEL_OP2_202_42434_20130930_235132_outLine +BABEL_OP2_202_42434_20131001_001757_inLine +BABEL_OP2_202_42434_20131001_001757_outLine +BABEL_OP2_202_42991_20130801_010705_inLine +BABEL_OP2_202_42991_20130801_010705_outLine +BABEL_OP2_202_43395_20140220_223151_inLine +BABEL_OP2_202_43395_20140220_223151_outLine +BABEL_OP2_202_43794_20131015_230636_inLine +BABEL_OP2_202_43794_20131015_230636_outLine +BABEL_OP2_202_46041_20131018_224852_inLine +BABEL_OP2_202_46041_20131018_224852_outLine +BABEL_OP2_202_46261_20130213_203255_inLine +BABEL_OP2_202_46261_20130213_203255_outLine +BABEL_OP2_202_46550_20131003_205134_inLine +BABEL_OP2_202_46550_20131003_205134_outLine +BABEL_OP2_202_46688_20130108_003601_inLine +BABEL_OP2_202_46688_20130108_003601_outLine +BABEL_OP2_202_46757_20130726_172556_inLine +BABEL_OP2_202_46757_20130726_172556_outLine +BABEL_OP2_202_46770_20140223_234733_inLine +BABEL_OP2_202_46770_20140223_234733_outLine +BABEL_OP2_202_46976_20130214_203921_inLine +BABEL_OP2_202_46976_20130214_203921_outLine +BABEL_OP2_202_47186_20131101_211007_inLine +BABEL_OP2_202_47186_20131101_211007_outLine +BABEL_OP2_202_47823_20131017_214917_inLine +BABEL_OP2_202_47823_20131017_214917_outLine +BABEL_OP2_202_47866_20131010_061153_inLine +BABEL_OP2_202_47866_20131010_061153_outLine +BABEL_OP2_202_48243_20131009_224543_inLine +BABEL_OP2_202_48243_20131009_224543_outLine +BABEL_OP2_202_48299_20131130_233044_inLine +BABEL_OP2_202_48399_20131005_030007_outLine +BABEL_OP2_202_48422_20140225_220708_inLine +BABEL_OP2_202_48422_20140225_220708_outLine +BABEL_OP2_202_49437_20131031_193108_inLine +BABEL_OP2_202_49437_20131031_193108_outLine +BABEL_OP2_202_49630_20130731_234235_inLine +BABEL_OP2_202_49630_20130731_234235_outLine +BABEL_OP2_202_49739_20131127_171846_inLine +BABEL_OP2_202_49739_20131127_171846_outLine +BABEL_OP2_202_49768_20131001_222725_inLine +BABEL_OP2_202_49768_20131001_222725_outLine +BABEL_OP2_202_49907_20131003_213256_inLine +BABEL_OP2_202_49907_20131003_213256_outLine +BABEL_OP2_202_50186_20131216_004336_inLine +BABEL_OP2_202_50186_20131216_004336_outLine +BABEL_OP2_202_51156_20131216_015429_inLine +BABEL_OP2_202_51156_20131216_015429_outLine +BABEL_OP2_202_51484_20140123_220444_inLine +BABEL_OP2_202_51484_20140123_220444_outLine +BABEL_OP2_202_51611_20130109_194912_inLine +BABEL_OP2_202_51611_20130109_194912_outLine +BABEL_OP2_202_52246_20140125_011930_inLine +BABEL_OP2_202_52246_20140125_011930_outLine +BABEL_OP2_202_52272_20130103_193203_inLine +BABEL_OP2_202_52272_20130103_193203_outLine +BABEL_OP2_202_52422_20140123_223352_inLine +BABEL_OP2_202_52422_20140123_223352_outLine +BABEL_OP2_202_53063_20140124_000041_inLine +BABEL_OP2_202_53063_20140124_000041_outLine +BABEL_OP2_202_53758_20131203_003849_inLine +BABEL_OP2_202_53758_20131203_003849_outLine +BABEL_OP2_202_54066_20140121_223255_inLine +BABEL_OP2_202_54066_20140121_223255_outLine +BABEL_OP2_202_54074_20140123_205035_inLine +BABEL_OP2_202_54074_20140123_205035_outLine +BABEL_OP2_202_54530_20140125_000633_inLine +BABEL_OP2_202_54530_20140125_000633_outLine +BABEL_OP2_202_54634_20140121_201449_inLine +BABEL_OP2_202_54634_20140121_201449_outLine +BABEL_OP2_202_54841_20140122_195114_inLine +BABEL_OP2_202_54841_20140122_195114_outLine +BABEL_OP2_202_54841_20140122_200157_inLine +BABEL_OP2_202_54841_20140122_200157_outLine +BABEL_OP2_202_55259_20130930_023554_inLine +BABEL_OP2_202_55259_20130930_023554_outLine +BABEL_OP2_202_55349_20131010_002325_inLine +BABEL_OP2_202_55349_20131010_002325_outLine +BABEL_OP2_202_55381_20140123_030341_inLine +BABEL_OP2_202_55381_20140123_030341_outLine +BABEL_OP2_202_56023_20140124_213010_inLine +BABEL_OP2_202_56023_20140124_213010_outLine +BABEL_OP2_202_56306_20140122_204419_inLine +BABEL_OP2_202_56306_20140122_204419_outLine +BABEL_OP2_202_56331_20140124_212336_inLine +BABEL_OP2_202_56331_20140124_212336_outLine +BABEL_OP2_202_56465_20140122_194039_inLine +BABEL_OP2_202_56465_20140122_194039_outLine +BABEL_OP2_202_56606_20140123_202633_inLine +BABEL_OP2_202_56606_20140123_202633_outLine +BABEL_OP2_202_56951_20131130_192609_inLine +BABEL_OP2_202_56951_20131130_192609_outLine +BABEL_OP2_202_57233_20131120_235941_inLine +BABEL_OP2_202_57233_20131120_235941_outLine +BABEL_OP2_202_57782_20140129_231340_inLine +BABEL_OP2_202_57782_20140129_231340_outLine +BABEL_OP2_202_58103_20130930_045229_inLine +BABEL_OP2_202_58103_20130930_045229_outLine +BABEL_OP2_202_58107_20130927_165258_inLine +BABEL_OP2_202_58107_20130927_165258_outLine +BABEL_OP2_202_58489_20140131_214025_inLine +BABEL_OP2_202_58489_20140131_214025_outLine +BABEL_OP2_202_58821_20130730_183731_inLine +BABEL_OP2_202_58821_20130730_183731_outLine +BABEL_OP2_202_59028_20140131_212747_inLine +BABEL_OP2_202_59028_20140131_212747_outLine +BABEL_OP2_202_59402_20140201_222141_inLine +BABEL_OP2_202_59402_20140201_222141_outLine +BABEL_OP2_202_59402_20140201_222847_inLine +BABEL_OP2_202_59402_20140201_222847_outLine +BABEL_OP2_202_59720_20130930_032445_inLine +BABEL_OP2_202_59720_20130930_032445_outLine +BABEL_OP2_202_60474_20140120_222223_inLine +BABEL_OP2_202_60474_20140120_222223_outLine +BABEL_OP2_202_60477_20140201_200420_inLine +BABEL_OP2_202_60477_20140201_200420_outLine +BABEL_OP2_202_60778_20131201_233949_inLine +BABEL_OP2_202_60778_20131201_233949_outLine +BABEL_OP2_202_61040_20140227_003457_inLine +BABEL_OP2_202_61040_20140227_003457_outLine +BABEL_OP2_202_61438_20131129_231819_inLine +BABEL_OP2_202_61438_20131129_231819_outLine +BABEL_OP2_202_61438_20131129_233030_inLine +BABEL_OP2_202_61438_20131129_233030_outLine +BABEL_OP2_202_61873_20130111_181915_inLine +BABEL_OP2_202_61873_20130111_181915_outLine +BABEL_OP2_202_62047_20140129_020943_inLine +BABEL_OP2_202_62047_20140129_020943_outLine +BABEL_OP2_202_62360_20131014_211636_inLine +BABEL_OP2_202_62360_20131014_211636_outLine +BABEL_OP2_202_62714_20131101_225706_inLine +BABEL_OP2_202_62714_20131101_225706_outLine +BABEL_OP2_202_63490_20131203_234940_inLine +BABEL_OP2_202_63490_20131203_234940_outLine +BABEL_OP2_202_63670_20140130_231139_inLine +BABEL_OP2_202_63670_20140130_231139_outLine +BABEL_OP2_202_63920_20131215_021712_inLine +BABEL_OP2_202_63920_20131215_021712_outLine +BABEL_OP2_202_64688_20140126_004157_inLine +BABEL_OP2_202_64688_20140126_004157_outLine +BABEL_OP2_202_65048_20140128_174534_inLine +BABEL_OP2_202_65048_20140128_174534_outLine +BABEL_OP2_202_65336_20140131_001312_inLine +BABEL_OP2_202_65336_20140131_001312_outLine +BABEL_OP2_202_65466_20131010_013521_inLine +BABEL_OP2_202_65466_20131010_013521_outLine +BABEL_OP2_202_65913_20140127_181419_inLine +BABEL_OP2_202_65913_20140127_181419_outLine +BABEL_OP2_202_66001_20130107_194345_inLine +BABEL_OP2_202_66001_20130107_194345_outLine +BABEL_OP2_202_66045_20130410_204151_inLine +BABEL_OP2_202_66045_20130410_204151_outLine +BABEL_OP2_202_66045_20130410_211501_inLine +BABEL_OP2_202_66045_20130410_211501_outLine +BABEL_OP2_202_66305_20140126_020747_inLine +BABEL_OP2_202_66305_20140126_020747_outLine +BABEL_OP2_202_66641_20131127_183344_inLine +BABEL_OP2_202_66641_20131127_183344_outLine +BABEL_OP2_202_66916_20121229_203810_inLine +BABEL_OP2_202_66916_20121229_203810_outLine +BABEL_OP2_202_66916_20121229_211053_inLine +BABEL_OP2_202_66916_20121229_211053_outLine +BABEL_OP2_202_67401_20130912_043928_inLine +BABEL_OP2_202_67401_20130912_043928_outLine +BABEL_OP2_202_67964_20140125_232737_inLine +BABEL_OP2_202_67964_20140125_232737_outLine +BABEL_OP2_202_68289_20131128_012756_inLine +BABEL_OP2_202_68289_20131128_012756_outLine +BABEL_OP2_202_68748_20130803_201133_inLine +BABEL_OP2_202_68748_20130803_201133_outLine +BABEL_OP2_202_68854_20131012_000134_inLine +BABEL_OP2_202_68854_20131012_000134_outLine +BABEL_OP2_202_69937_20140131_034019_inLine +BABEL_OP2_202_69937_20140131_034019_outLine +BABEL_OP2_202_71566_20140311_213752_inLine +BABEL_OP2_202_71566_20140311_213752_outLine +BABEL_OP2_202_71976_20131128_193641_inLine +BABEL_OP2_202_71976_20131128_193641_outLine +BABEL_OP2_202_72324_20130423_161716_inLine +BABEL_OP2_202_72324_20130423_161716_outLine +BABEL_OP2_202_73005_20131012_011254_inLine +BABEL_OP2_202_73005_20131012_011254_outLine +BABEL_OP2_202_73022_20140226_210050_inLine +BABEL_OP2_202_73022_20140226_210050_outLine +BABEL_OP2_202_73518_20140304_001655_inLine +BABEL_OP2_202_73518_20140304_001655_outLine +BABEL_OP2_202_74121_20130220_195721_inLine +BABEL_OP2_202_74121_20130220_195721_outLine +BABEL_OP2_202_74121_20130220_201735_inLine +BABEL_OP2_202_74121_20130220_201735_outLine +BABEL_OP2_202_74667_20130227_180657_inLine +BABEL_OP2_202_74667_20130227_180657_outLine +BABEL_OP2_202_75064_20140226_232411_inLine +BABEL_OP2_202_75064_20140226_232411_outLine +BABEL_OP2_202_75261_20140311_002541_inLine +BABEL_OP2_202_75261_20140311_002541_outLine +BABEL_OP2_202_75812_20131127_193133_inLine +BABEL_OP2_202_75812_20131127_193133_outLine +BABEL_OP2_202_75930_20131202_213433_inLine +BABEL_OP2_202_75930_20131202_213433_outLine +BABEL_OP2_202_76126_20131031_183234_inLine +BABEL_OP2_202_76126_20131031_183234_outLine +BABEL_OP2_202_76444_20131018_000013_inLine +BABEL_OP2_202_76444_20131018_000013_outLine +BABEL_OP2_202_76499_20130412_201900_inLine +BABEL_OP2_202_76499_20130412_201900_outLine +BABEL_OP2_202_77033_20140312_034901_inLine +BABEL_OP2_202_77033_20140312_034901_outLine +BABEL_OP2_202_77146_20121229_203404_inLine +BABEL_OP2_202_77146_20121229_203404_outLine +BABEL_OP2_202_78482_20140311_014827_inLine +BABEL_OP2_202_78482_20140311_014827_outLine +BABEL_OP2_202_79045_20140310_212332_inLine +BABEL_OP2_202_79045_20140310_212332_outLine +BABEL_OP2_202_79367_20130107_224252_inLine +BABEL_OP2_202_79367_20130107_224252_outLine +BABEL_OP2_202_79973_20131130_184708_inLine +BABEL_OP2_202_79973_20131130_184708_outLine +BABEL_OP2_202_79995_20140227_030446_inLine +BABEL_OP2_202_79995_20140227_030446_outLine +BABEL_OP2_202_80134_20131202_174756_inLine +BABEL_OP2_202_80134_20131202_174756_outLine +BABEL_OP2_202_80306_20130928_232209_inLine +BABEL_OP2_202_80306_20130928_232209_outLine +BABEL_OP2_202_80383_20131207_013517_inLine +BABEL_OP2_202_80383_20131207_013517_outLine +BABEL_OP2_202_80989_20131016_213255_inLine +BABEL_OP2_202_80989_20131016_213255_outLine +BABEL_OP2_202_81149_20131010_010411_inLine +BABEL_OP2_202_81149_20131010_010411_outLine +BABEL_OP2_202_81622_20130218_232606_inLine +BABEL_OP2_202_81622_20130218_232606_outLine +BABEL_OP2_202_82123_20131130_004859_inLine +BABEL_OP2_202_82123_20131130_004859_outLine +BABEL_OP2_202_82138_20130415_225929_inLine +BABEL_OP2_202_82138_20130415_225929_outLine +BABEL_OP2_202_82425_20130108_181846_inLine +BABEL_OP2_202_82425_20130108_181846_outLine +BABEL_OP2_202_82473_20131004_202625_inLine +BABEL_OP2_202_82473_20131004_202625_outLine +BABEL_OP2_202_82496_20130105_232830_inLine +BABEL_OP2_202_82496_20130105_232830_outLine +BABEL_OP2_202_82622_20131007_171417_inLine +BABEL_OP2_202_82622_20131007_171417_outLine +BABEL_OP2_202_83609_20131128_022206_inLine +BABEL_OP2_202_83609_20131128_022206_outLine +BABEL_OP2_202_83625_20131130_222251_inLine +BABEL_OP2_202_83625_20131130_222251_outLine +BABEL_OP2_202_83651_20131003_212624_inLine +BABEL_OP2_202_83651_20131003_212624_outLine +BABEL_OP2_202_84077_20131130_195755_inLine +BABEL_OP2_202_84077_20131130_195755_outLine +BABEL_OP2_202_84194_20131130_024921_inLine +BABEL_OP2_202_84194_20131130_024921_outLine +BABEL_OP2_202_84408_20130306_184336_inLine +BABEL_OP2_202_84408_20130306_184336_outLine +BABEL_OP2_202_84466_20131010_040505_inLine +BABEL_OP2_202_84466_20131010_040505_outLine +BABEL_OP2_202_84469_20131018_212735_inLine +BABEL_OP2_202_84469_20131018_212735_outLine +BABEL_OP2_202_84768_20130107_194303_inLine +BABEL_OP2_202_84768_20130107_194303_outLine +BABEL_OP2_202_86156_20131030_001706_inLine +BABEL_OP2_202_86156_20131030_001706_outLine +BABEL_OP2_202_87179_20140320_165556_inLine +BABEL_OP2_202_87179_20140320_165556_outLine +BABEL_OP2_202_87305_20131016_225546_inLine +BABEL_OP2_202_87305_20131016_225546_outLine +BABEL_OP2_202_88776_20130107_192204_inLine +BABEL_OP2_202_88776_20130107_192204_outLine +BABEL_OP2_202_88776_20130107_195623_inLine +BABEL_OP2_202_88776_20130107_195623_outLine +BABEL_OP2_202_88783_20131018_191706_inLine +BABEL_OP2_202_88783_20131018_191706_outLine +BABEL_OP2_202_88865_20140319_212413_inLine +BABEL_OP2_202_88865_20140319_212413_outLine +BABEL_OP2_202_89665_20140320_004314_inLine +BABEL_OP2_202_89665_20140320_004314_outLine +BABEL_OP2_202_89695_20130215_224831_inLine +BABEL_OP2_202_89695_20130215_224831_outLine +BABEL_OP2_202_90347_20130912_005052_inLine +BABEL_OP2_202_90347_20130912_005052_outLine +BABEL_OP2_202_90572_20131009_190400_inLine +BABEL_OP2_202_90572_20131009_190400_outLine +BABEL_OP2_202_90737_20130213_201303_inLine +BABEL_OP2_202_90737_20130213_201303_outLine +BABEL_OP2_202_90739_20130222_223815_inLine +BABEL_OP2_202_90739_20130222_223815_outLine +BABEL_OP2_202_90740_20131120_195825_inLine +BABEL_OP2_202_90740_20131120_195825_outLine +BABEL_OP2_202_91080_20130429_213558_inLine +BABEL_OP2_202_91080_20130429_213558_outLine +BABEL_OP2_202_91478_20131127_031740_inLine +BABEL_OP2_202_91478_20131127_031740_outLine +BABEL_OP2_202_91891_20130803_000104_inLine +BABEL_OP2_202_91891_20130803_000104_outLine +BABEL_OP2_202_92065_20140109_204802_inLine +BABEL_OP2_202_92065_20140109_204802_outLine +BABEL_OP2_202_92440_20131203_195407_inLine +BABEL_OP2_202_92440_20131203_195407_outLine +BABEL_OP2_202_92440_20131203_200046_inLine +BABEL_OP2_202_92440_20131203_200046_outLine +BABEL_OP2_202_92527_20130225_184732_inLine +BABEL_OP2_202_92527_20130225_184732_outLine +BABEL_OP2_202_93153_20131003_212947_inLine +BABEL_OP2_202_93153_20131003_212947_outLine +BABEL_OP2_202_93153_20131003_213722_inLine +BABEL_OP2_202_93153_20131003_213722_outLine +BABEL_OP2_202_93222_20131126_211540_inLine +BABEL_OP2_202_93222_20131126_211540_outLine +BABEL_OP2_202_94333_20130105_202651_inLine +BABEL_OP2_202_94333_20130105_202651_outLine +BABEL_OP2_202_94449_20131011_205657_inLine +BABEL_OP2_202_94449_20131011_205657_outLine +BABEL_OP2_202_94869_20121219_204921_inLine +BABEL_OP2_202_94869_20121219_204921_outLine +BABEL_OP2_202_95077_20140320_014923_inLine +BABEL_OP2_202_95077_20140320_014923_outLine +BABEL_OP2_202_95231_20131128_211454_inLine +BABEL_OP2_202_95231_20131128_211454_outLine +BABEL_OP2_202_96376_20131011_024111_inLine +BABEL_OP2_202_96376_20131011_024111_outLine +BABEL_OP2_202_96680_20131130_202936_inLine +BABEL_OP2_202_96680_20131130_202936_outLine +BABEL_OP2_202_96690_20130220_210217_inLine +BABEL_OP2_202_96690_20130220_210217_outLine +BABEL_OP2_202_96808_20131012_212254_inLine +BABEL_OP2_202_96808_20131012_212254_outLine +BABEL_OP2_202_97220_20140319_193818_inLine +BABEL_OP2_202_97220_20140319_193818_outLine +BABEL_OP2_202_97363_20131002_203133_inLine +BABEL_OP2_202_97363_20131002_203133_outLine +BABEL_OP2_202_97363_20131003_002739_inLine +BABEL_OP2_202_97363_20131003_002739_outLine +BABEL_OP2_202_97373_20130730_151855_inLine +BABEL_OP2_202_99401_20130108_001107_inLine +BABEL_OP2_202_99401_20130108_001107_outLine +BABEL_OP2_202_99594_20130220_222308_inLine +BABEL_OP2_202_99594_20130220_222308_outLine +BABEL_OP2_202_99883_20131120_212150_inLine +BABEL_OP2_202_99883_20131120_212150_outLine diff --git a/egs/babel/s5d/conf/lists/202-swahili/untranscribed-training.list b/egs/babel/s5d/conf/lists/202-swahili/untranscribed-training.list new file mode 100644 index 00000000000..72047620427 --- /dev/null +++ b/egs/babel/s5d/conf/lists/202-swahili/untranscribed-training.list @@ -0,0 +1,555 @@ +BABEL_OP2_202_10036_20130313_171555_outLine +BABEL_OP2_202_10058_20131017_230021_outLine +BABEL_OP2_202_10313_20140205_002214_inLine +BABEL_OP2_202_10319_20121229_224454_outLine +BABEL_OP2_202_10411_20140209_011824_inLine +BABEL_OP2_202_10411_20140209_011824_outLine +BABEL_OP2_202_10469_20131130_014924_inLine +BABEL_OP2_202_10638_20140205_005404_inLine +BABEL_OP2_202_10638_20140205_005404_outLine +BABEL_OP2_202_10901_20130913_004135_outLine +BABEL_OP2_202_10938_20130930_020020_inLine +BABEL_OP2_202_11096_20131016_000245_outLine +BABEL_OP2_202_11486_20140206_204420_inLine +BABEL_OP2_202_11486_20140206_204420_outLine +BABEL_OP2_202_11486_20140206_205137_inLine +BABEL_OP2_202_11486_20140206_205137_outLine +BABEL_OP2_202_12242_20140214_231330_inLine +BABEL_OP2_202_12609_20140207_172212_inLine +BABEL_OP2_202_13030_20131218_195618_outLine +BABEL_OP2_202_13324_20131219_001852_inLine +BABEL_OP2_202_13324_20131219_001852_outLine +BABEL_OP2_202_13547_20131127_025355_outLine +BABEL_OP2_202_13776_20140206_201743_inLine +BABEL_OP2_202_14560_20140206_011812_inLine +BABEL_OP2_202_14723_20140205_060355_inLine +BABEL_OP2_202_14723_20140205_060355_outLine +BABEL_OP2_202_15042_20131130_221534_inLine +BABEL_OP2_202_15042_20131130_221534_outLine +BABEL_OP2_202_15216_20140208_175430_inLine +BABEL_OP2_202_15322_20140208_191251_inLine +BABEL_OP2_202_15322_20140208_191251_outLine +BABEL_OP2_202_15466_20131127_213156_inLine +BABEL_OP2_202_15466_20131127_214339_inLine +BABEL_OP2_202_15535_20131001_012120_inLine +BABEL_OP2_202_15749_20140206_024112_inLine +BABEL_OP2_202_15749_20140206_024112_outLine +BABEL_OP2_202_15926_20130925_034742_inLine +BABEL_OP2_202_15926_20130925_034742_outLine +BABEL_OP2_202_15926_20130925_035312_inLine +BABEL_OP2_202_15926_20130925_035312_outLine +BABEL_OP2_202_16800_20131219_012534_outLine +BABEL_OP2_202_17127_20140113_203603_inLine +BABEL_OP2_202_17127_20140113_203603_outLine +BABEL_OP2_202_17165_20130410_211020_outLine +BABEL_OP2_202_17320_20140207_162515_outLine +BABEL_OP2_202_17320_20140207_163148_outLine +BABEL_OP2_202_17420_20131029_235015_inLine +BABEL_OP2_202_17496_20130926_185827_inLine +BABEL_OP2_202_17582_20140208_011506_inLine +BABEL_OP2_202_17913_20131128_031821_outLine +BABEL_OP2_202_17914_20131031_221433_inLine +BABEL_OP2_202_17937_20131220_004727_outLine +BABEL_OP2_202_18033_20131218_020549_inLine +BABEL_OP2_202_18033_20131218_020549_outLine +BABEL_OP2_202_18037_20140205_003923_inLine +BABEL_OP2_202_18037_20140205_003923_outLine +BABEL_OP2_202_18280_20140205_025345_inLine +BABEL_OP2_202_18280_20140205_025345_outLine +BABEL_OP2_202_18297_20131012_004111_inLine +BABEL_OP2_202_18297_20131012_004111_outLine +BABEL_OP2_202_18370_20140205_033926_inLine +BABEL_OP2_202_18370_20140205_033926_outLine +BABEL_OP2_202_18863_20131130_020252_inLine +BABEL_OP2_202_18863_20131130_032443_inLine +BABEL_OP2_202_19703_20131218_015339_inLine +BABEL_OP2_202_19773_20131220_220513_inLine +BABEL_OP2_202_19773_20131220_220513_outLine +BABEL_OP2_202_19782_20131102_001852_inLine +BABEL_OP2_202_20330_20140109_172943_outLine +BABEL_OP2_202_20330_20140109_174004_outLine +BABEL_OP2_202_20367_20140207_065137_outLine +BABEL_OP2_202_20454_20131217_005702_outLine +BABEL_OP2_202_20724_20131218_014801_outLine +BABEL_OP2_202_20800_20130109_234836_outLine +BABEL_OP2_202_20922_20140108_231607_outLine +BABEL_OP2_202_21004_20131017_221746_inLine +BABEL_OP2_202_21159_20140205_213005_inLine +BABEL_OP2_202_21244_20131015_194634_outLine +BABEL_OP2_202_21315_20140206_175302_inLine +BABEL_OP2_202_21327_20140206_001641_inLine +BABEL_OP2_202_21426_20140204_235517_inLine +BABEL_OP2_202_21426_20140204_235517_outLine +BABEL_OP2_202_22034_20140130_232345_inLine +BABEL_OP2_202_22034_20140130_234608_inLine +BABEL_OP2_202_22170_20131009_031606_inLine +BABEL_OP2_202_22288_20131212_003625_inLine +BABEL_OP2_202_22612_20131101_182509_inLine +BABEL_OP2_202_22965_20140214_212302_inLine +BABEL_OP2_202_22965_20140214_212302_outLine +BABEL_OP2_202_23046_20140207_214018_inLine +BABEL_OP2_202_23046_20140207_214018_outLine +BABEL_OP2_202_23196_20131130_210710_inLine +BABEL_OP2_202_23196_20131130_210710_outLine +BABEL_OP2_202_23239_20130923_232142_inLine +BABEL_OP2_202_23239_20130923_232142_outLine +BABEL_OP2_202_23681_20131016_231325_outLine +BABEL_OP2_202_23752_20140204_225435_inLine +BABEL_OP2_202_23752_20140204_225435_outLine +BABEL_OP2_202_24017_20131220_000437_outLine +BABEL_OP2_202_24209_20131203_184510_outLine +BABEL_OP2_202_24253_20131010_013952_outLine +BABEL_OP2_202_24587_20131127_230044_inLine +BABEL_OP2_202_24587_20131127_230044_outLine +BABEL_OP2_202_24648_20131128_030622_inLine +BABEL_OP2_202_25015_20140207_234017_inLine +BABEL_OP2_202_25198_20140219_203259_inLine +BABEL_OP2_202_25895_20131203_201422_inLine +BABEL_OP2_202_25895_20131203_202401_inLine +BABEL_OP2_202_26072_20140114_031432_inLine +BABEL_OP2_202_26072_20140114_031432_outLine +BABEL_OP2_202_26388_20131217_235617_outLine +BABEL_OP2_202_26478_20140207_223256_outLine +BABEL_OP2_202_26574_20140205_203902_inLine +BABEL_OP2_202_26574_20140205_203902_outLine +BABEL_OP2_202_26574_20140205_205040_inLine +BABEL_OP2_202_26574_20140205_205040_outLine +BABEL_OP2_202_27014_20140109_225600_outLine +BABEL_OP2_202_27014_20140109_231225_outLine +BABEL_OP2_202_27218_20131003_212404_inLine +BABEL_OP2_202_27841_20140206_002202_inLine +BABEL_OP2_202_27841_20140206_002202_outLine +BABEL_OP2_202_28280_20140205_194444_outLine +BABEL_OP2_202_28419_20140205_193403_inLine +BABEL_OP2_202_29023_20131219_010409_outLine +BABEL_OP2_202_29230_20140207_212300_inLine +BABEL_OP2_202_29323_20131031_234945_inLine +BABEL_OP2_202_29323_20131101_000454_inLine +BABEL_OP2_202_29563_20131212_060621_inLine +BABEL_OP2_202_29563_20131212_060621_outLine +BABEL_OP2_202_29746_20131217_001441_inLine +BABEL_OP2_202_29765_20140209_221538_inLine +BABEL_OP2_202_29765_20140209_221538_outLine +BABEL_OP2_202_29911_20131127_184715_outLine +BABEL_OP2_202_30345_20131220_003550_outLine +BABEL_OP2_202_30576_20131017_012418_inLine +BABEL_OP2_202_30653_20131015_175341_inLine +BABEL_OP2_202_30653_20131015_175341_outLine +BABEL_OP2_202_30974_20140219_013521_inLine +BABEL_OP2_202_31267_20140227_054848_inLine +BABEL_OP2_202_31267_20140227_054848_outLine +BABEL_OP2_202_31346_20131019_000000_outLine +BABEL_OP2_202_31346_20131101_003311_outLine +BABEL_OP2_202_31490_20130109_003835_inLine +BABEL_OP2_202_31490_20130109_005722_inLine +BABEL_OP2_202_31500_20131217_015833_inLine +BABEL_OP2_202_31500_20131217_015833_outLine +BABEL_OP2_202_31583_20131219_213900_outLine +BABEL_OP2_202_31628_20130921_212411_inLine +BABEL_OP2_202_31728_20131129_044747_inLine +BABEL_OP2_202_31728_20131129_044747_outLine +BABEL_OP2_202_32048_20131219_013244_inLine +BABEL_OP2_202_32328_20131030_210553_inLine +BABEL_OP2_202_32380_20131130_215631_outLine +BABEL_OP2_202_32872_20131016_190028_inLine +BABEL_OP2_202_32872_20131016_190028_outLine +BABEL_OP2_202_33149_20140225_001335_inLine +BABEL_OP2_202_33149_20140225_001335_outLine +BABEL_OP2_202_33229_20131218_213456_outLine +BABEL_OP2_202_33659_20140225_233435_outLine +BABEL_OP2_202_33800_20131126_211758_inLine +BABEL_OP2_202_33800_20131126_211758_outLine +BABEL_OP2_202_33806_20140224_202859_inLine +BABEL_OP2_202_33806_20140224_202859_outLine +BABEL_OP2_202_33840_20131101_001620_outLine +BABEL_OP2_202_33951_20130426_182755_outLine +BABEL_OP2_202_33951_20130426_184040_outLine +BABEL_OP2_202_33992_20131015_184831_inLine +BABEL_OP2_202_33992_20131015_184831_outLine +BABEL_OP2_202_34019_20140224_193636_outLine +BABEL_OP2_202_34106_20121227_222718_inLine +BABEL_OP2_202_34629_20131130_211927_outLine +BABEL_OP2_202_34713_20140225_014158_inLine +BABEL_OP2_202_34713_20140225_014158_outLine +BABEL_OP2_202_35008_20130722_185829_inLine +BABEL_OP2_202_35008_20130722_191623_inLine +BABEL_OP2_202_35143_20131018_192106_inLine +BABEL_OP2_202_35202_20130801_172530_inLine +BABEL_OP2_202_35202_20130801_195816_inLine +BABEL_OP2_202_35786_20131015_181857_inLine +BABEL_OP2_202_35786_20131015_181857_outLine +BABEL_OP2_202_36364_20131130_021940_inLine +BABEL_OP2_202_36669_20130213_192457_inLine +BABEL_OP2_202_37064_20131002_185856_inLine +BABEL_OP2_202_37064_20131002_185856_outLine +BABEL_OP2_202_37229_20140114_233648_outLine +BABEL_OP2_202_37271_20140114_192528_inLine +BABEL_OP2_202_37271_20140114_192528_outLine +BABEL_OP2_202_37499_20131016_183113_inLine +BABEL_OP2_202_37499_20131016_183113_outLine +BABEL_OP2_202_37684_20131203_022005_inLine +BABEL_OP2_202_37684_20131203_024603_inLine +BABEL_OP2_202_37776_20140115_213234_outLine +BABEL_OP2_202_37853_20131101_233956_inLine +BABEL_OP2_202_37853_20131101_233956_outLine +BABEL_OP2_202_37853_20131101_235036_inLine +BABEL_OP2_202_37853_20131101_235036_outLine +BABEL_OP2_202_38323_20140114_202816_inLine +BABEL_OP2_202_38750_20131018_005908_outLine +BABEL_OP2_202_39555_20140114_200302_outLine +BABEL_OP2_202_39579_20140115_214035_outLine +BABEL_OP2_202_39638_20131130_231218_inLine +BABEL_OP2_202_39680_20140115_212202_inLine +BABEL_OP2_202_39680_20140115_212202_outLine +BABEL_OP2_202_39920_20131208_045704_inLine +BABEL_OP2_202_39920_20131208_051609_inLine +BABEL_OP2_202_40648_20131215_203941_outLine +BABEL_OP2_202_40648_20131215_205022_outLine +BABEL_OP2_202_40686_20140114_232425_outLine +BABEL_OP2_202_40686_20140114_233413_outLine +BABEL_OP2_202_40740_20140114_212913_outLine +BABEL_OP2_202_40740_20140114_221533_outLine +BABEL_OP2_202_40939_20140115_005331_inLine +BABEL_OP2_202_40939_20140115_005331_outLine +BABEL_OP2_202_41073_20140114_201346_inLine +BABEL_OP2_202_41073_20140114_201346_outLine +BABEL_OP2_202_41174_20130222_214400_outLine +BABEL_OP2_202_41233_20131127_035936_inLine +BABEL_OP2_202_41233_20131127_035936_outLine +BABEL_OP2_202_41592_20130927_203118_inLine +BABEL_OP2_202_41745_20130222_224352_outLine +BABEL_OP2_202_41745_20130222_225523_outLine +BABEL_OP2_202_41745_20130226_220300_outLine +BABEL_OP2_202_41890_20131011_232931_inLine +BABEL_OP2_202_41890_20131011_232931_outLine +BABEL_OP2_202_41890_20131011_235301_inLine +BABEL_OP2_202_41890_20131011_235301_outLine +BABEL_OP2_202_41958_20131001_185053_inLine +BABEL_OP2_202_42146_20131011_232931_inLine +BABEL_OP2_202_42146_20131011_232931_outLine +BABEL_OP2_202_42146_20131011_233957_inLine +BABEL_OP2_202_42146_20131011_233957_outLine +BABEL_OP2_202_42231_20140224_221548_inLine +BABEL_OP2_202_42299_20140220_233422_outLine +BABEL_OP2_202_42526_20140228_035815_outLine +BABEL_OP2_202_42600_20131001_200025_outLine +BABEL_OP2_202_43115_20131012_005141_inLine +BABEL_OP2_202_43115_20131012_005141_outLine +BABEL_OP2_202_43323_20131129_040506_outLine +BABEL_OP2_202_43784_20131003_005323_inLine +BABEL_OP2_202_44029_20140224_224653_outLine +BABEL_OP2_202_44290_20131013_001608_inLine +BABEL_OP2_202_44290_20131013_001608_outLine +BABEL_OP2_202_44446_20131130_014441_inLine +BABEL_OP2_202_44868_20130806_210559_inLine +BABEL_OP2_202_45486_20140224_210341_outLine +BABEL_OP2_202_45559_20131016_215852_inLine +BABEL_OP2_202_45559_20131016_215852_outLine +BABEL_OP2_202_46268_20130107_230757_inLine +BABEL_OP2_202_46389_20131216_020541_inLine +BABEL_OP2_202_46389_20131216_020541_outLine +BABEL_OP2_202_46763_20131009_191902_inLine +BABEL_OP2_202_46763_20131009_191902_outLine +BABEL_OP2_202_46808_20140224_220014_inLine +BABEL_OP2_202_46808_20140224_220014_outLine +BABEL_OP2_202_47270_20140225_015137_inLine +BABEL_OP2_202_47487_20140224_232210_outLine +BABEL_OP2_202_47637_20140224_183628_inLine +BABEL_OP2_202_47637_20140224_183628_outLine +BABEL_OP2_202_47882_20131128_015709_outLine +BABEL_OP2_202_47923_20131129_211629_inLine +BABEL_OP2_202_47923_20131129_211629_outLine +BABEL_OP2_202_48200_20131128_211840_inLine +BABEL_OP2_202_49001_20131003_151102_inLine +BABEL_OP2_202_49001_20131003_151102_outLine +BABEL_OP2_202_49027_20131012_171107_outLine +BABEL_OP2_202_49118_20140223_214255_inLine +BABEL_OP2_202_49118_20140223_214255_outLine +BABEL_OP2_202_49216_20121227_221242_inLine +BABEL_OP2_202_49216_20121227_233227_inLine +BABEL_OP2_202_49330_20131130_015311_inLine +BABEL_OP2_202_49330_20131130_015311_outLine +BABEL_OP2_202_49502_20121227_234825_outLine +BABEL_OP2_202_49812_20140224_225827_inLine +BABEL_OP2_202_49812_20140224_225827_outLine +BABEL_OP2_202_49870_20140224_214828_inLine +BABEL_OP2_202_49870_20140224_214828_outLine +BABEL_OP2_202_49912_20140224_190150_inLine +BABEL_OP2_202_49912_20140224_190150_outLine +BABEL_OP2_202_50549_20140122_212548_inLine +BABEL_OP2_202_50549_20140122_212548_outLine +BABEL_OP2_202_50940_20131128_044329_outLine +BABEL_OP2_202_51015_20130919_230711_outLine +BABEL_OP2_202_51693_20140122_221655_outLine +BABEL_OP2_202_51955_20131004_201017_inLine +BABEL_OP2_202_51955_20131004_201017_outLine +BABEL_OP2_202_52322_20140122_020749_outLine +BABEL_OP2_202_52322_20140122_022032_outLine +BABEL_OP2_202_52438_20131002_193009_inLine +BABEL_OP2_202_52499_20131216_043452_inLine +BABEL_OP2_202_52694_20140123_012847_inLine +BABEL_OP2_202_52694_20140123_012847_outLine +BABEL_OP2_202_52803_20131129_235234_inLine +BABEL_OP2_202_52803_20131129_235234_outLine +BABEL_OP2_202_53010_20131129_193814_inLine +BABEL_OP2_202_53010_20131129_193814_outLine +BABEL_OP2_202_53144_20140123_025818_inLine +BABEL_OP2_202_53144_20140123_025818_outLine +BABEL_OP2_202_53415_20131101_205155_inLine +BABEL_OP2_202_54040_20140124_235842_inLine +BABEL_OP2_202_54040_20140124_235842_outLine +BABEL_OP2_202_54040_20140125_000629_inLine +BABEL_OP2_202_54040_20140125_000629_outLine +BABEL_OP2_202_54104_20130108_184048_inLine +BABEL_OP2_202_54390_20140123_021824_inLine +BABEL_OP2_202_54477_20140121_195829_inLine +BABEL_OP2_202_54477_20140121_195829_outLine +BABEL_OP2_202_54567_20130215_201456_outLine +BABEL_OP2_202_54697_20140124_011928_inLine +BABEL_OP2_202_54827_20131012_020910_inLine +BABEL_OP2_202_55136_20131120_230735_inLine +BABEL_OP2_202_55136_20131120_230735_outLine +BABEL_OP2_202_55136_20131120_231613_inLine +BABEL_OP2_202_55136_20131120_231613_outLine +BABEL_OP2_202_55267_20130429_211135_inLine +BABEL_OP2_202_55968_20121221_210945_outLine +BABEL_OP2_202_55968_20121222_190905_outLine +BABEL_OP2_202_56019_20140122_235300_inLine +BABEL_OP2_202_56019_20140122_235300_outLine +BABEL_OP2_202_56057_20131217_015911_outLine +BABEL_OP2_202_56076_20131012_005019_inLine +BABEL_OP2_202_56076_20131012_005019_outLine +BABEL_OP2_202_56307_20130925_024659_outLine +BABEL_OP2_202_56326_20131129_235243_inLine +BABEL_OP2_202_56427_20140115_215916_inLine +BABEL_OP2_202_56427_20140115_215916_outLine +BABEL_OP2_202_56468_20140125_021443_inLine +BABEL_OP2_202_56468_20140125_021443_outLine +BABEL_OP2_202_56684_20140122_005322_inLine +BABEL_OP2_202_56684_20140122_005322_outLine +BABEL_OP2_202_56925_20131215_232111_outLine +BABEL_OP2_202_57035_20131218_025223_inLine +BABEL_OP2_202_57035_20131218_025223_outLine +BABEL_OP2_202_57116_20121220_184622_outLine +BABEL_OP2_202_57219_20140128_183934_inLine +BABEL_OP2_202_57219_20140128_183934_outLine +BABEL_OP2_202_57464_20131012_002232_inLine +BABEL_OP2_202_57566_20140130_012514_inLine +BABEL_OP2_202_57566_20140130_012514_outLine +BABEL_OP2_202_57654_20140115_204117_inLine +BABEL_OP2_202_57654_20140115_204117_outLine +BABEL_OP2_202_57982_20131010_025934_outLine +BABEL_OP2_202_58026_20140129_211720_inLine +BABEL_OP2_202_58026_20140129_211720_outLine +BABEL_OP2_202_58585_20140201_230856_inLine +BABEL_OP2_202_58915_20140129_222116_outLine +BABEL_OP2_202_59078_20130911_214801_outLine +BABEL_OP2_202_59163_20131212_044108_outLine +BABEL_OP2_202_59291_20140201_004317_inLine +BABEL_OP2_202_59301_20140125_202811_inLine +BABEL_OP2_202_59301_20140125_202811_outLine +BABEL_OP2_202_59864_20140131_204919_inLine +BABEL_OP2_202_59993_20140114_021749_inLine +BABEL_OP2_202_59993_20140114_021749_outLine +BABEL_OP2_202_60299_20140131_075856_inLine +BABEL_OP2_202_60299_20140131_075856_outLine +BABEL_OP2_202_60307_20140131_184522_inLine +BABEL_OP2_202_60307_20140131_184522_outLine +BABEL_OP2_202_60397_20131202_205856_inLine +BABEL_OP2_202_60397_20131202_205856_outLine +BABEL_OP2_202_60458_20140130_223733_inLine +BABEL_OP2_202_60458_20140130_223733_outLine +BABEL_OP2_202_60706_20121228_005527_outLine +BABEL_OP2_202_61190_20131002_225904_outLine +BABEL_OP2_202_61219_20140120_234802_inLine +BABEL_OP2_202_61219_20140120_234802_outLine +BABEL_OP2_202_61684_20131130_211629_inLine +BABEL_OP2_202_61731_20131003_043735_outLine +BABEL_OP2_202_61971_20131010_034223_inLine +BABEL_OP2_202_62286_20130221_203131_outLine +BABEL_OP2_202_62362_20140201_220857_inLine +BABEL_OP2_202_62362_20140201_220857_outLine +BABEL_OP2_202_62471_20131203_193149_inLine +BABEL_OP2_202_62471_20131203_193149_outLine +BABEL_OP2_202_62491_20140131_223205_inLine +BABEL_OP2_202_62491_20140131_223205_outLine +BABEL_OP2_202_62656_20131203_001914_inLine +BABEL_OP2_202_62656_20131203_001914_outLine +BABEL_OP2_202_62724_20131017_221403_inLine +BABEL_OP2_202_63081_20121228_003935_inLine +BABEL_OP2_202_63094_20131016_235150_inLine +BABEL_OP2_202_63094_20131016_235150_outLine +BABEL_OP2_202_63265_20131216_224818_outLine +BABEL_OP2_202_63265_20131216_232337_outLine +BABEL_OP2_202_63334_20131128_190201_outLine +BABEL_OP2_202_63445_20121229_001208_outLine +BABEL_OP2_202_63671_20131213_034007_inLine +BABEL_OP2_202_63671_20131213_034007_outLine +BABEL_OP2_202_63766_20131217_022038_outLine +BABEL_OP2_202_64014_20140130_213507_inLine +BABEL_OP2_202_64014_20140130_213507_outLine +BABEL_OP2_202_64469_20131216_032049_outLine +BABEL_OP2_202_64635_20131129_013800_inLine +BABEL_OP2_202_64635_20131129_013800_outLine +BABEL_OP2_202_65252_20131016_183445_inLine +BABEL_OP2_202_65252_20131016_183445_outLine +BABEL_OP2_202_65268_20131220_020108_inLine +BABEL_OP2_202_65268_20131220_020108_outLine +BABEL_OP2_202_65268_20131220_021438_inLine +BABEL_OP2_202_65268_20131220_021438_outLine +BABEL_OP2_202_65367_20140129_234024_inLine +BABEL_OP2_202_65367_20140129_234024_outLine +BABEL_OP2_202_65370_20140201_195241_inLine +BABEL_OP2_202_65370_20140201_195241_outLine +BABEL_OP2_202_67066_20140130_212058_inLine +BABEL_OP2_202_67066_20140130_212058_outLine +BABEL_OP2_202_67213_20140130_185616_outLine +BABEL_OP2_202_67304_20140201_230632_inLine +BABEL_OP2_202_67304_20140201_230632_outLine +BABEL_OP2_202_67592_20140129_234213_inLine +BABEL_OP2_202_67659_20140115_234146_outLine +BABEL_OP2_202_67773_20140129_215114_inLine +BABEL_OP2_202_67773_20140129_215114_outLine +BABEL_OP2_202_67894_20140130_232658_inLine +BABEL_OP2_202_67894_20140130_232658_outLine +BABEL_OP2_202_68068_20130802_203147_outLine +BABEL_OP2_202_68385_20130208_214719_outLine +BABEL_OP2_202_68668_20140131_221117_inLine +BABEL_OP2_202_68668_20140131_221117_outLine +BABEL_OP2_202_68823_20131215_001456_outLine +BABEL_OP2_202_68910_20140127_211718_inLine +BABEL_OP2_202_68910_20140127_211718_outLine +BABEL_OP2_202_69096_20140312_022044_inLine +BABEL_OP2_202_69096_20140312_022044_outLine +BABEL_OP2_202_69153_20131128_194250_outLine +BABEL_OP2_202_69474_20130731_011215_inLine +BABEL_OP2_202_69474_20130731_012232_inLine +BABEL_OP2_202_69746_20140125_215609_inLine +BABEL_OP2_202_69746_20140125_215609_outLine +BABEL_OP2_202_69982_20140131_212729_inLine +BABEL_OP2_202_69982_20140131_212729_outLine +BABEL_OP2_202_69982_20140131_213451_inLine +BABEL_OP2_202_69982_20140131_213451_outLine +BABEL_OP2_202_70121_20130308_180634_inLine +BABEL_OP2_202_70221_20130429_160925_outLine +BABEL_OP2_202_70293_20131218_043924_inLine +BABEL_OP2_202_70293_20131218_043924_outLine +BABEL_OP2_202_70343_20130730_000937_outLine +BABEL_OP2_202_70452_20140115_230438_inLine +BABEL_OP2_202_70460_20131101_001026_inLine +BABEL_OP2_202_70460_20131101_001026_outLine +BABEL_OP2_202_70726_20131216_011153_inLine +BABEL_OP2_202_70726_20131216_011153_outLine +BABEL_OP2_202_71121_20131208_073117_outLine +BABEL_OP2_202_71404_20131004_002732_inLine +BABEL_OP2_202_72073_20131213_042304_inLine +BABEL_OP2_202_72349_20131128_183232_inLine +BABEL_OP2_202_72349_20131128_185336_inLine +BABEL_OP2_202_72844_20121222_003955_outLine +BABEL_OP2_202_73119_20140116_003550_outLine +BABEL_OP2_202_73119_20140120_205305_outLine +BABEL_OP2_202_73299_20131017_003841_outLine +BABEL_OP2_202_73909_20140303_224953_outLine +BABEL_OP2_202_74799_20130911_223303_outLine +BABEL_OP2_202_75342_20130731_021621_outLine +BABEL_OP2_202_75505_20121222_213031_inLine +BABEL_OP2_202_76437_20121219_005936_outLine +BABEL_OP2_202_76730_20131217_052627_outLine +BABEL_OP2_202_77112_20131003_031801_inLine +BABEL_OP2_202_77391_20140121_013824_inLine +BABEL_OP2_202_77391_20140121_013824_outLine +BABEL_OP2_202_77567_20121228_181102_inLine +BABEL_OP2_202_77803_20121222_202737_inLine +BABEL_OP2_202_77803_20121222_215157_inLine +BABEL_OP2_202_77904_20131221_020558_inLine +BABEL_OP2_202_77904_20131221_020558_outLine +BABEL_OP2_202_77909_20140126_064115_inLine +BABEL_OP2_202_77909_20140126_064115_outLine +BABEL_OP2_202_77921_20131127_232806_inLine +BABEL_OP2_202_77921_20131127_232806_outLine +BABEL_OP2_202_77921_20131127_234200_inLine +BABEL_OP2_202_77921_20131127_234200_outLine +BABEL_OP2_202_78016_20140226_215807_outLine +BABEL_OP2_202_78398_20131004_061913_outLine +BABEL_OP2_202_78511_20140226_231944_inLine +BABEL_OP2_202_78630_20140113_211239_inLine +BABEL_OP2_202_78630_20140113_211239_outLine +BABEL_OP2_202_78630_20140113_212040_inLine +BABEL_OP2_202_78630_20140113_212040_outLine +BABEL_OP2_202_78829_20131126_221958_outLine +BABEL_OP2_202_78833_20131119_205910_inLine +BABEL_OP2_202_78943_20140121_025623_inLine +BABEL_OP2_202_78943_20140121_025623_outLine +BABEL_OP2_202_78958_20131207_004716_inLine +BABEL_OP2_202_78976_20140115_235057_inLine +BABEL_OP2_202_78976_20140115_235057_outLine +BABEL_OP2_202_79139_20130928_173217_inLine +BABEL_OP2_202_79190_20130927_184727_outLine +BABEL_OP2_202_79451_20131004_055308_outLine +BABEL_OP2_202_80439_20131001_190050_outLine +BABEL_OP2_202_80559_20131007_063834_outLine +BABEL_OP2_202_80655_20131016_212951_inLine +BABEL_OP2_202_81213_20131004_205633_inLine +BABEL_OP2_202_81213_20131004_210252_inLine +BABEL_OP2_202_81287_20130731_193240_outLine +BABEL_OP2_202_81287_20130731_195716_outLine +BABEL_OP2_202_81392_20130728_234236_inLine +BABEL_OP2_202_81392_20130729_021638_inLine +BABEL_OP2_202_81404_20130314_220702_inLine +BABEL_OP2_202_81769_20131127_214614_outLine +BABEL_OP2_202_82742_20131029_223343_inLine +BABEL_OP2_202_83545_20131017_183706_inLine +BABEL_OP2_202_83783_20130911_225559_outLine +BABEL_OP2_202_84055_20131208_040856_outLine +BABEL_OP2_202_84430_20131217_023038_inLine +BABEL_OP2_202_84430_20131217_024752_inLine +BABEL_OP2_202_84467_20131126_224903_inLine +BABEL_OP2_202_84467_20131126_224903_outLine +BABEL_OP2_202_85010_20131201_004538_outLine +BABEL_OP2_202_85340_20131002_202217_outLine +BABEL_OP2_202_85647_20130416_223722_inLine +BABEL_OP2_202_86191_20140121_192414_inLine +BABEL_OP2_202_86191_20140121_192414_outLine +BABEL_OP2_202_86321_20131018_003746_inLine +BABEL_OP2_202_86676_20130802_223159_outLine +BABEL_OP2_202_86676_20130802_225309_outLine +BABEL_OP2_202_86722_20131001_193946_inLine +BABEL_OP2_202_86845_20131126_204553_outLine +BABEL_OP2_202_86845_20131126_210711_outLine +BABEL_OP2_202_86878_20131220_215841_inLine +BABEL_OP2_202_87693_20131004_231549_outLine +BABEL_OP2_202_87884_20131017_214906_inLine +BABEL_OP2_202_88982_20130930_042104_outLine +BABEL_OP2_202_89516_20131208_025053_inLine +BABEL_OP2_202_89516_20131208_025053_outLine +BABEL_OP2_202_89943_20131003_153927_outLine +BABEL_OP2_202_90318_20131215_222302_outLine +BABEL_OP2_202_91266_20131127_021953_inLine +BABEL_OP2_202_91266_20131127_021953_outLine +BABEL_OP2_202_92060_20131011_235309_outLine +BABEL_OP2_202_92176_20130319_022508_inLine +BABEL_OP2_202_92886_20131004_210342_inLine +BABEL_OP2_202_93443_20131129_212311_inLine +BABEL_OP2_202_93443_20131129_212311_outLine +BABEL_OP2_202_94025_20130801_210343_outLine +BABEL_OP2_202_94253_20131004_010116_outLine +BABEL_OP2_202_94316_20131017_194727_inLine +BABEL_OP2_202_94891_20131126_173659_inLine +BABEL_OP2_202_94891_20131126_173659_outLine +BABEL_OP2_202_95399_20130211_220740_inLine +BABEL_OP2_202_95399_20130211_230605_inLine +BABEL_OP2_202_95399_20130211_232555_inLine +BABEL_OP2_202_95937_20131217_005609_inLine +BABEL_OP2_202_96504_20140125_035346_inLine +BABEL_OP2_202_96504_20140125_035346_outLine +BABEL_OP2_202_96525_20131018_225425_inLine +BABEL_OP2_202_96525_20131018_225425_outLine +BABEL_OP2_202_96525_20131018_230802_inLine +BABEL_OP2_202_96525_20131018_230802_outLine +BABEL_OP2_202_97063_20131128_231626_outLine +BABEL_OP2_202_97461_20130928_010334_outLine +BABEL_OP2_202_97836_20131009_221934_outLine +BABEL_OP2_202_97925_20131203_210706_outLine +BABEL_OP2_202_98678_20131010_023001_outLine +BABEL_OP2_202_99732_20131126_215915_inLine +BABEL_OP2_202_99732_20131126_215915_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/dev.list b/egs/babel/s5d/conf/lists/203-lao/dev.list new file mode 100644 index 00000000000..3a31f075909 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/dev.list @@ -0,0 +1,131 @@ +BABEL_OP1_203_10188_20130220_225432_inLine +BABEL_OP1_203_10188_20130220_225432_outLine +BABEL_OP1_203_10188_20130220_230849_inLine +BABEL_OP1_203_10188_20130220_230849_outLine +BABEL_OP1_203_10319_20130314_213741_inLine +BABEL_OP1_203_10319_20130314_213741_outLine +BABEL_OP1_203_10319_20130314_214749_inLine +BABEL_OP1_203_10319_20130314_214749_outLine +BABEL_OP1_203_14158_20130409_181505_inLine +BABEL_OP1_203_14158_20130409_181505_outLine +BABEL_OP1_203_14158_20130409_182411_inLine +BABEL_OP1_203_14158_20130409_182411_outLine +BABEL_OP1_203_14158_20130409_183108_inLine +BABEL_OP1_203_14158_20130409_183108_outLine +BABEL_OP1_203_14228_20130405_154037_inLine +BABEL_OP1_203_14228_20130405_154037_outLine +BABEL_OP1_203_14228_20130405_163836_inLine +BABEL_OP1_203_14228_20130405_163836_outLine +BABEL_OP1_203_14440_20130509_205709_inLine +BABEL_OP1_203_14440_20130509_205709_outLine +BABEL_OP1_203_15042_20130727_173946_inLine +BABEL_OP1_203_15042_20130727_173946_outLine +BABEL_OP1_203_16800_20130421_140442_inLine +BABEL_OP1_203_16800_20130421_140442_outLine +BABEL_OP1_203_17127_20130421_131732_inLine +BABEL_OP1_203_17127_20130421_131732_outLine +BABEL_OP1_203_17127_20130421_132248_inLine +BABEL_OP1_203_17127_20130421_132248_outLine +BABEL_OP1_203_17573_20130331_192906_inLine +BABEL_OP1_203_17573_20130331_192906_outLine +BABEL_OP1_203_17890_20130329_160302_inLine +BABEL_OP1_203_17890_20130329_160302_outLine +BABEL_OP1_203_19621_20130330_192114_inLine +BABEL_OP1_203_19621_20130330_192114_outLine +BABEL_OP1_203_19663_20130322_163118_inLine +BABEL_OP1_203_19663_20130322_163118_outLine +BABEL_OP1_203_19672_20130401_204303_inLine +BABEL_OP1_203_19672_20130401_204303_outLine +BABEL_OP1_203_21581_20130327_180143_inLine +BABEL_OP1_203_21581_20130327_180143_outLine +BABEL_OP1_203_22170_20130424_213413_inLine +BABEL_OP1_203_22170_20130424_213413_outLine +BABEL_OP1_203_22216_20130307_190055_inLine +BABEL_OP1_203_22216_20130307_190055_outLine +BABEL_OP1_203_22466_20130218_191925_inLine +BABEL_OP1_203_22466_20130218_191925_outLine +BABEL_OP1_203_23151_20130408_192838_inLine +BABEL_OP1_203_23151_20130408_192838_outLine +BABEL_OP1_203_23260_20130726_170748_inLine +BABEL_OP1_203_23260_20130726_170748_outLine +BABEL_OP1_203_23681_20130730_162132_inLine +BABEL_OP1_203_23681_20130730_162132_outLine +BABEL_OP1_203_23995_20130731_195202_inLine +BABEL_OP1_203_23995_20130731_195202_outLine +BABEL_OP1_203_25012_20130814_141020_inLine +BABEL_OP1_203_25012_20130814_141020_outLine +BABEL_OP1_203_26206_20130328_193450_inLine +BABEL_OP1_203_26206_20130328_193450_outLine +BABEL_OP1_203_29208_20130320_141202_inLine +BABEL_OP1_203_29208_20130320_141202_outLine +BABEL_OP1_203_29765_20130426_185032_inLine +BABEL_OP1_203_29765_20130426_185032_outLine +BABEL_OP1_203_31484_20130404_184608_inLine +BABEL_OP1_203_31484_20130404_184608_outLine +BABEL_OP1_203_32861_20130424_133938_inLine +BABEL_OP1_203_32861_20130424_133938_outLine +BABEL_OP1_203_32959_20130406_145730_inLine +BABEL_OP1_203_32959_20130406_145730_outLine +BABEL_OP1_203_37499_20130512_203148_inLine +BABEL_OP1_203_37499_20130512_203148_outLine +BABEL_OP1_203_39744_20130307_140614_inLine +BABEL_OP1_203_39744_20130307_140614_outLine +BABEL_OP1_203_41400_20130728_194416_inLine +BABEL_OP1_203_41400_20130728_194416_outLine +BABEL_OP1_203_41920_20130310_185621_inLine +BABEL_OP1_203_41920_20130310_185621_outLine +BABEL_OP1_203_48789_20130324_180810_inLine +BABEL_OP1_203_48789_20130324_180810_outLine +BABEL_OP1_203_50565_20130307_164552_inLine +BABEL_OP1_203_50565_20130307_164552_outLine +BABEL_OP1_203_52025_20130306_143713_inLine +BABEL_OP1_203_52025_20130306_143713_outLine +BABEL_OP1_203_52725_20130410_214000_inLine +BABEL_OP1_203_52725_20130410_214000_outLine +BABEL_OP1_203_52932_20130314_203215_inLine +BABEL_OP1_203_52932_20130314_203215_outLine +BABEL_OP1_203_56090_20130304_141755_inLine +BABEL_OP1_203_56090_20130304_141755_outLine +BABEL_OP1_203_56429_20130313_200952_inLine +BABEL_OP1_203_56429_20130313_200952_outLine +BABEL_OP1_203_56743_20130319_152822_inLine +BABEL_OP1_203_56743_20130319_152822_outLine +BABEL_OP1_203_57609_20130330_155903_inLine +BABEL_OP1_203_57609_20130330_155903_outLine +BABEL_OP1_203_58717_20130505_152817_inLine +BABEL_OP1_203_58717_20130505_152817_outLine +BABEL_OP1_203_58734_20130309_204100_inLine +BABEL_OP1_203_60538_20130311_163456_inLine +BABEL_OP1_203_60538_20130311_163456_outLine +BABEL_OP1_203_60836_20130314_211014_inLine +BABEL_OP1_203_60836_20130314_211014_outLine +BABEL_OP1_203_61963_20130718_155107_inLine +BABEL_OP1_203_61963_20130718_155107_outLine +BABEL_OP1_203_62155_20130426_173905_inLine +BABEL_OP1_203_62155_20130426_173905_outLine +BABEL_OP1_203_65252_20130731_170815_inLine +BABEL_OP1_203_65252_20130731_170815_outLine +BABEL_OP1_203_66026_20130331_154806_inLine +BABEL_OP1_203_66026_20130331_154806_outLine +BABEL_OP1_203_67842_20130313_142229_inLine +BABEL_OP1_203_67842_20130313_142229_outLine +BABEL_OP1_203_72654_20130323_163248_inLine +BABEL_OP1_203_72654_20130323_163248_outLine +BABEL_OP1_203_72733_20130731_235502_inLine +BABEL_OP1_203_72733_20130731_235502_outLine +BABEL_OP1_203_79190_20130714_135011_inLine +BABEL_OP1_203_79190_20130714_135011_outLine +BABEL_OP1_203_84370_20130506_190748_inLine +BABEL_OP1_203_84370_20130506_190748_outLine +BABEL_OP1_203_88601_20130323_155050_inLine +BABEL_OP1_203_88601_20130323_155050_outLine +BABEL_OP1_203_90417_20130507_172057_inLine +BABEL_OP1_203_90417_20130507_172057_outLine +BABEL_OP1_203_93475_20130312_144135_inLine +BABEL_OP1_203_93475_20130312_144135_outLine +BABEL_OP1_203_95467_20130506_155929_inLine +BABEL_OP1_203_95467_20130506_155929_outLine +BABEL_OP1_203_96504_20130319_161923_inLine +BABEL_OP1_203_96504_20130319_161923_outLine +BABEL_OP1_203_99732_20130406_175258_inLine +BABEL_OP1_203_99732_20130406_175258_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/eval.list b/egs/babel/s5d/conf/lists/203-lao/eval.list new file mode 100644 index 00000000000..f231ad9d910 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/eval.list @@ -0,0 +1,192 @@ +BABEL_OP1_203_12321_20130406_165656_inLine +BABEL_OP1_203_12321_20130406_165656_outLine +BABEL_OP1_203_12916_20130309_200304_inLine +BABEL_OP1_203_12916_20130309_200304_outLine +BABEL_OP1_203_13040_20130312_181212_inLine +BABEL_OP1_203_13040_20130312_181212_outLine +BABEL_OP1_203_13427_20130428_153335_inLine +BABEL_OP1_203_13427_20130428_153335_outLine +BABEL_OP1_203_14537_20130726_183519_inLine +BABEL_OP1_203_14537_20130726_183519_outLine +BABEL_OP1_203_15262_20130311_163214_inLine +BABEL_OP1_203_15262_20130311_163214_outLine +BABEL_OP1_203_15848_20130304_193558_inLine +BABEL_OP1_203_15848_20130304_193558_outLine +BABEL_OP1_203_16056_20130309_212127_inLine +BABEL_OP1_203_16056_20130309_212127_outLine +BABEL_OP1_203_17165_20130323_193349_inLine +BABEL_OP1_203_17165_20130323_193349_outLine +BABEL_OP1_203_17420_20130410_223425_inLine +BABEL_OP1_203_17420_20130410_223425_outLine +BABEL_OP1_203_18863_20130423_201154_inLine +BABEL_OP1_203_18863_20130423_201154_outLine +BABEL_OP1_203_19545_20130328_181847_inLine +BABEL_OP1_203_19545_20130328_181847_outLine +BABEL_OP1_203_19767_20130729_162359_inLine +BABEL_OP1_203_20721_20130805_184106_inLine +BABEL_OP1_203_20721_20130805_184106_outLine +BABEL_OP1_203_20738_20130501_144021_inLine +BABEL_OP1_203_20738_20130501_144021_outLine +BABEL_OP1_203_20800_20130312_182739_inLine +BABEL_OP1_203_20800_20130312_182739_outLine +BABEL_OP1_203_20800_20130312_190729_inLine +BABEL_OP1_203_20800_20130312_190729_outLine +BABEL_OP1_203_21159_20130428_145928_inLine +BABEL_OP1_203_21159_20130428_145928_outLine +BABEL_OP1_203_21393_20130802_160502_inLine +BABEL_OP1_203_21393_20130802_160502_outLine +BABEL_OP1_203_21794_20130323_191728_inLine +BABEL_OP1_203_21794_20130323_191728_outLine +BABEL_OP1_203_22641_20130310_194352_inLine +BABEL_OP1_203_22641_20130310_194352_outLine +BABEL_OP1_203_23395_20130423_140708_inLine +BABEL_OP1_203_23395_20130423_140708_outLine +BABEL_OP1_203_23731_20130331_144735_inLine +BABEL_OP1_203_23731_20130331_144735_outLine +BABEL_OP1_203_24924_20130509_190210_inLine +BABEL_OP1_203_24924_20130509_190210_outLine +BABEL_OP1_203_27189_20130812_203016_inLine +BABEL_OP1_203_27189_20130812_203016_outLine +BABEL_OP1_203_28422_20130401_201546_inLine +BABEL_OP1_203_28422_20130401_201546_outLine +BABEL_OP1_203_28538_20130323_211503_inLine +BABEL_OP1_203_28538_20130323_211503_outLine +BABEL_OP1_203_28538_20130323_212946_inLine +BABEL_OP1_203_28538_20130323_212946_outLine +BABEL_OP1_203_29685_20130319_225955_inLine +BABEL_OP1_203_29685_20130319_225955_outLine +BABEL_OP1_203_30250_20130307_153941_inLine +BABEL_OP1_203_30250_20130307_153941_outLine +BABEL_OP1_203_32832_20130410_151037_inLine +BABEL_OP1_203_32832_20130410_151037_outLine +BABEL_OP1_203_32872_20130715_135603_inLine +BABEL_OP1_203_32872_20130715_135603_outLine +BABEL_OP1_203_33216_20130427_175935_inLine +BABEL_OP1_203_33216_20130427_175935_outLine +BABEL_OP1_203_33216_20130427_182630_inLine +BABEL_OP1_203_33216_20130427_182630_outLine +BABEL_OP1_203_33424_20130728_164533_inLine +BABEL_OP1_203_33424_20130728_164533_outLine +BABEL_OP1_203_40624_20130812_181331_inLine +BABEL_OP1_203_40624_20130812_181331_outLine +BABEL_OP1_203_41038_20130629_153757_inLine +BABEL_OP1_203_41038_20130629_153757_outLine +BABEL_OP1_203_41109_20130410_205358_inLine +BABEL_OP1_203_41109_20130410_205358_outLine +BABEL_OP1_203_41109_20130410_210805_inLine +BABEL_OP1_203_41109_20130410_210805_outLine +BABEL_OP1_203_41233_20130801_201451_inLine +BABEL_OP1_203_41233_20130801_201451_outLine +BABEL_OP1_203_41890_20130731_203018_inLine +BABEL_OP1_203_41890_20130731_203018_outLine +BABEL_OP1_203_42231_20130330_183550_inLine +BABEL_OP1_203_42231_20130330_183550_outLine +BABEL_OP1_203_43789_20130324_223656_inLine +BABEL_OP1_203_43789_20130324_223656_outLine +BABEL_OP1_203_44255_20130410_165447_inLine +BABEL_OP1_203_44255_20130410_165447_outLine +BABEL_OP1_203_44420_20130320_170344_inLine +BABEL_OP1_203_44420_20130320_170344_outLine +BABEL_OP1_203_45140_20130725_155519_inLine +BABEL_OP1_203_45140_20130725_155519_outLine +BABEL_OP1_203_45770_20130309_142629_inLine +BABEL_OP1_203_45770_20130309_142629_outLine +BABEL_OP1_203_45777_20130324_154017_inLine +BABEL_OP1_203_45777_20130324_154017_outLine +BABEL_OP1_203_45908_20130728_202553_inLine +BABEL_OP1_203_45908_20130728_202553_outLine +BABEL_OP1_203_46333_20130309_224915_inLine +BABEL_OP1_203_46333_20130309_224915_outLine +BABEL_OP1_203_46905_20130812_144116_inLine +BABEL_OP1_203_46905_20130812_144116_outLine +BABEL_OP1_203_47959_20130323_214413_inLine +BABEL_OP1_203_47959_20130323_214413_outLine +BABEL_OP1_203_48399_20130309_162921_inLine +BABEL_OP1_203_48399_20130309_162921_outLine +BABEL_OP1_203_48399_20130309_164247_inLine +BABEL_OP1_203_48399_20130309_164247_outLine +BABEL_OP1_203_49870_20130813_180458_inLine +BABEL_OP1_203_49870_20130813_180458_outLine +BABEL_OP1_203_50962_20130326_161422_inLine +BABEL_OP1_203_50962_20130326_161422_outLine +BABEL_OP1_203_53072_20130714_171830_inLine +BABEL_OP1_203_53072_20130714_171830_outLine +BABEL_OP1_203_56019_20130512_160906_inLine +BABEL_OP1_203_56019_20130512_160906_outLine +BABEL_OP1_203_56523_20130319_184906_inLine +BABEL_OP1_203_56523_20130319_184906_outLine +BABEL_OP1_203_57650_20130411_204456_inLine +BABEL_OP1_203_57650_20130411_204456_outLine +BABEL_OP1_203_57922_20130329_164830_inLine +BABEL_OP1_203_57922_20130329_164830_outLine +BABEL_OP1_203_59898_20130309_161351_inLine +BABEL_OP1_203_59898_20130309_161351_outLine +BABEL_OP1_203_62434_20130309_161135_inLine +BABEL_OP1_203_62434_20130309_161135_outLine +BABEL_OP1_203_65339_20130813_152743_inLine +BABEL_OP1_203_65339_20130813_152743_outLine +BABEL_OP1_203_67085_20130803_171200_inLine +BABEL_OP1_203_67085_20130803_171200_outLine +BABEL_OP1_203_67373_20130314_214840_inLine +BABEL_OP1_203_67373_20130314_214840_outLine +BABEL_OP1_203_70726_20130812_194620_inLine +BABEL_OP1_203_70726_20130812_194620_outLine +BABEL_OP1_203_71282_20130425_151939_inLine +BABEL_OP1_203_71282_20130425_151939_outLine +BABEL_OP1_203_71333_20130314_164236_inLine +BABEL_OP1_203_71333_20130314_164236_outLine +BABEL_OP1_203_72073_20130813_163908_inLine +BABEL_OP1_203_72073_20130813_163908_outLine +BABEL_OP1_203_73119_20130318_205141_inLine +BABEL_OP1_203_73119_20130318_205141_outLine +BABEL_OP1_203_73119_20130318_210234_inLine +BABEL_OP1_203_73119_20130318_210234_outLine +BABEL_OP1_203_73757_20130327_154312_inLine +BABEL_OP1_203_73757_20130327_154312_outLine +BABEL_OP1_203_73837_20130320_223755_inLine +BABEL_OP1_203_73837_20130320_223755_outLine +BABEL_OP1_203_74111_20130720_165204_inLine +BABEL_OP1_203_74111_20130720_165204_outLine +BABEL_OP1_203_74641_20130329_192047_inLine +BABEL_OP1_203_74641_20130329_192047_outLine +BABEL_OP1_203_75359_20130719_144824_inLine +BABEL_OP1_203_75359_20130719_144824_outLine +BABEL_OP1_203_77225_20130813_222437_inLine +BABEL_OP1_203_77225_20130813_222437_outLine +BABEL_OP1_203_82904_20130726_192222_inLine +BABEL_OP1_203_82904_20130726_192222_outLine +BABEL_OP1_203_83771_20130729_194808_inLine +BABEL_OP1_203_88394_20130813_004013_inLine +BABEL_OP1_203_88394_20130813_004013_outLine +BABEL_OP1_203_88550_20130714_194639_inLine +BABEL_OP1_203_88550_20130714_194639_outLine +BABEL_OP1_203_88686_20130307_221522_inLine +BABEL_OP1_203_88686_20130307_221522_outLine +BABEL_OP1_203_89372_20130306_162204_inLine +BABEL_OP1_203_89372_20130306_162204_outLine +BABEL_OP1_203_89794_20130714_144126_inLine +BABEL_OP1_203_89794_20130714_144126_outLine +BABEL_OP1_203_91930_20130424_162834_inLine +BABEL_OP1_203_91930_20130424_162834_outLine +BABEL_OP1_203_93861_20130327_171912_inLine +BABEL_OP1_203_93861_20130327_171912_outLine +BABEL_OP1_203_94002_20130324_154206_inLine +BABEL_OP1_203_94002_20130324_154206_outLine +BABEL_OP1_203_94237_20130801_180053_inLine +BABEL_OP1_203_94237_20130801_180053_outLine +BABEL_OP1_203_96088_20130714_191026_inLine +BABEL_OP1_203_96088_20130714_191026_outLine +BABEL_OP1_203_96525_20130713_172412_inLine +BABEL_OP1_203_96525_20130713_172412_outLine +BABEL_OP1_203_97097_20130721_180647_inLine +BABEL_OP1_203_97097_20130721_180647_outLine +BABEL_OP1_203_97570_20130501_151019_inLine +BABEL_OP1_203_97570_20130501_151019_outLine +BABEL_OP1_203_97911_20130427_144233_inLine +BABEL_OP1_203_97911_20130427_144233_outLine +BABEL_OP1_203_98489_20130314_215814_inLine +BABEL_OP1_203_98489_20130314_215814_outLine +BABEL_OP1_203_98580_20130324_195754_inLine +BABEL_OP1_203_98580_20130324_195754_outLine +BABEL_OP1_203_99264_20130726_161527_inLine +BABEL_OP1_203_99264_20130726_161527_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/evalpart1.list b/egs/babel/s5d/conf/lists/203-lao/evalpart1.list new file mode 100644 index 00000000000..a4ebcdd2d76 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/evalpart1.list @@ -0,0 +1,70 @@ +BABEL_OP1_203_18863_20130423_201154_inLine +BABEL_OP1_203_18863_20130423_201154_outLine +BABEL_OP1_203_19545_20130328_181847_inLine +BABEL_OP1_203_19545_20130328_181847_outLine +BABEL_OP1_203_20738_20130501_144021_inLine +BABEL_OP1_203_20738_20130501_144021_outLine +BABEL_OP1_203_21794_20130323_191728_inLine +BABEL_OP1_203_21794_20130323_191728_outLine +BABEL_OP1_203_23395_20130423_140708_inLine +BABEL_OP1_203_23395_20130423_140708_outLine +BABEL_OP1_203_28538_20130323_211503_inLine +BABEL_OP1_203_28538_20130323_211503_outLine +BABEL_OP1_203_28538_20130323_212946_inLine +BABEL_OP1_203_28538_20130323_212946_outLine +BABEL_OP1_203_30250_20130307_153941_inLine +BABEL_OP1_203_30250_20130307_153941_outLine +BABEL_OP1_203_32872_20130715_135603_inLine +BABEL_OP1_203_32872_20130715_135603_outLine +BABEL_OP1_203_41109_20130410_205358_inLine +BABEL_OP1_203_41109_20130410_205358_outLine +BABEL_OP1_203_41109_20130410_210805_inLine +BABEL_OP1_203_41109_20130410_210805_outLine +BABEL_OP1_203_44255_20130410_165447_inLine +BABEL_OP1_203_44255_20130410_165447_outLine +BABEL_OP1_203_45140_20130725_155519_inLine +BABEL_OP1_203_45140_20130725_155519_outLine +BABEL_OP1_203_45777_20130324_154017_inLine +BABEL_OP1_203_45777_20130324_154017_outLine +BABEL_OP1_203_47959_20130323_214413_inLine +BABEL_OP1_203_47959_20130323_214413_outLine +BABEL_OP1_203_48399_20130309_162921_inLine +BABEL_OP1_203_48399_20130309_162921_outLine +BABEL_OP1_203_48399_20130309_164247_inLine +BABEL_OP1_203_48399_20130309_164247_outLine +BABEL_OP1_203_56019_20130512_160906_inLine +BABEL_OP1_203_56019_20130512_160906_outLine +BABEL_OP1_203_56523_20130319_184906_inLine +BABEL_OP1_203_56523_20130319_184906_outLine +BABEL_OP1_203_57650_20130411_204456_inLine +BABEL_OP1_203_57650_20130411_204456_outLine +BABEL_OP1_203_57922_20130329_164830_inLine +BABEL_OP1_203_57922_20130329_164830_outLine +BABEL_OP1_203_59898_20130309_161351_inLine +BABEL_OP1_203_59898_20130309_161351_outLine +BABEL_OP1_203_67085_20130803_171200_inLine +BABEL_OP1_203_67085_20130803_171200_outLine +BABEL_OP1_203_71282_20130425_151939_inLine +BABEL_OP1_203_71282_20130425_151939_outLine +BABEL_OP1_203_73119_20130318_205141_inLine +BABEL_OP1_203_73119_20130318_205141_outLine +BABEL_OP1_203_73119_20130318_210234_inLine +BABEL_OP1_203_73119_20130318_210234_outLine +BABEL_OP1_203_73837_20130320_223755_inLine +BABEL_OP1_203_73837_20130320_223755_outLine +BABEL_OP1_203_74111_20130720_165204_inLine +BABEL_OP1_203_74111_20130720_165204_outLine +BABEL_OP1_203_75359_20130719_144824_inLine +BABEL_OP1_203_75359_20130719_144824_outLine +BABEL_OP1_203_89372_20130306_162204_inLine +BABEL_OP1_203_89372_20130306_162204_outLine +BABEL_OP1_203_93861_20130327_171912_inLine +BABEL_OP1_203_93861_20130327_171912_outLine +BABEL_OP1_203_94002_20130324_154206_inLine +BABEL_OP1_203_94002_20130324_154206_outLine +BABEL_OP1_203_97097_20130721_180647_inLine +BABEL_OP1_203_97097_20130721_180647_outLine +BABEL_OP1_203_97570_20130501_151019_inLine +BABEL_OP1_203_97570_20130501_151019_outLine +BABEL_OP1_203_98580_20130324_195754_inLine +BABEL_OP1_203_98580_20130324_195754_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.FullLP.list b/egs/babel/s5d/conf/lists/203-lao/train.FullLP.list new file mode 100644 index 00000000000..b7fb97d771f --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.FullLP.list @@ -0,0 +1,781 @@ +BABEL_OP1_203_10036_20130318_191401_inLine +BABEL_OP1_203_10036_20130318_191401_outLine +BABEL_OP1_203_10411_20130511_174439_inLine +BABEL_OP1_203_10411_20130511_174439_outLine +BABEL_OP1_203_10482_20130403_160013_inLine +BABEL_OP1_203_10482_20130403_160013_outLine +BABEL_OP1_203_10524_20130425_183925_inLine +BABEL_OP1_203_10524_20130425_183925_outLine +BABEL_OP1_203_10524_20130425_185048_inLine +BABEL_OP1_203_10524_20130425_185048_outLine +BABEL_OP1_203_10901_20130321_180232_inLine +BABEL_OP1_203_10901_20130321_180232_outLine +BABEL_OP1_203_10938_20130319_190809_inLine +BABEL_OP1_203_10938_20130319_190809_outLine +BABEL_OP1_203_10966_20130319_135742_inLine +BABEL_OP1_203_10966_20130319_135742_outLine +BABEL_OP1_203_10974_20130425_162609_inLine +BABEL_OP1_203_10974_20130425_162609_outLine +BABEL_OP1_203_11352_20130426_170450_inLine +BABEL_OP1_203_11352_20130426_170450_outLine +BABEL_OP1_203_11486_20130428_131348_inLine +BABEL_OP1_203_11486_20130428_131348_outLine +BABEL_OP1_203_11663_20130402_202025_inLine +BABEL_OP1_203_11663_20130402_202025_outLine +BABEL_OP1_203_11673_20130306_201125_inLine +BABEL_OP1_203_11673_20130306_201125_outLine +BABEL_OP1_203_11797_20130309_195420_inLine +BABEL_OP1_203_11797_20130309_195420_outLine +BABEL_OP1_203_11859_20130511_201411_inLine +BABEL_OP1_203_11859_20130511_201411_outLine +BABEL_OP1_203_12036_20130312_182225_inLine +BABEL_OP1_203_12036_20130312_182225_outLine +BABEL_OP1_203_12220_20130321_160841_inLine +BABEL_OP1_203_12220_20130321_160841_outLine +BABEL_OP1_203_12606_20130726_174724_inLine +BABEL_OP1_203_12606_20130726_174724_outLine +BABEL_OP1_203_12609_20130727_133133_outLine +BABEL_OP1_203_12767_20130313_214914_inLine +BABEL_OP1_203_12767_20130313_214914_outLine +BABEL_OP1_203_12851_20130304_181335_inLine +BABEL_OP1_203_12851_20130304_181335_outLine +BABEL_OP1_203_12851_20130304_182835_inLine +BABEL_OP1_203_12851_20130304_182835_outLine +BABEL_OP1_203_12851_20130304_185138_inLine +BABEL_OP1_203_12851_20130304_185138_outLine +BABEL_OP1_203_13126_20130421_175306_inLine +BABEL_OP1_203_13126_20130421_175306_outLine +BABEL_OP1_203_13126_20130421_180154_inLine +BABEL_OP1_203_13126_20130421_180154_outLine +BABEL_OP1_203_13324_20130313_185155_inLine +BABEL_OP1_203_13324_20130313_185155_outLine +BABEL_OP1_203_13483_20130409_231107_inLine +BABEL_OP1_203_13483_20130409_231107_outLine +BABEL_OP1_203_13490_20130322_143131_inLine +BABEL_OP1_203_13490_20130322_143131_outLine +BABEL_OP1_203_13664_20130304_155051_inLine +BABEL_OP1_203_13664_20130304_155051_outLine +BABEL_OP1_203_13709_20130410_222037_inLine +BABEL_OP1_203_13709_20130410_222037_outLine +BABEL_OP1_203_13744_20130307_215445_inLine +BABEL_OP1_203_13744_20130307_215445_outLine +BABEL_OP1_203_13792_20130310_142445_inLine +BABEL_OP1_203_13792_20130310_142445_outLine +BABEL_OP1_203_14137_20130314_181335_inLine +BABEL_OP1_203_14137_20130314_181335_outLine +BABEL_OP1_203_14141_20130410_212719_inLine +BABEL_OP1_203_14141_20130410_212719_outLine +BABEL_OP1_203_14179_20130402_211621_inLine +BABEL_OP1_203_14179_20130402_211621_outLine +BABEL_OP1_203_14229_20130324_162827_inLine +BABEL_OP1_203_14229_20130324_162827_outLine +BABEL_OP1_203_14237_20130313_222650_inLine +BABEL_OP1_203_14237_20130313_222650_outLine +BABEL_OP1_203_14560_20130425_140155_inLine +BABEL_OP1_203_14560_20130425_140155_outLine +BABEL_OP1_203_14719_20130406_191558_inLine +BABEL_OP1_203_14719_20130406_191558_outLine +BABEL_OP1_203_14725_20130309_185639_inLine +BABEL_OP1_203_14725_20130309_185639_outLine +BABEL_OP1_203_14729_20130411_214726_inLine +BABEL_OP1_203_14729_20130411_214726_outLine +BABEL_OP1_203_14814_20130314_133131_inLine +BABEL_OP1_203_14814_20130314_133131_outLine +BABEL_OP1_203_14899_20130311_184638_inLine +BABEL_OP1_203_14899_20130311_184638_outLine +BABEL_OP1_203_14929_20130324_184056_inLine +BABEL_OP1_203_14929_20130324_184056_outLine +BABEL_OP1_203_15024_20130322_152846_inLine +BABEL_OP1_203_15024_20130322_152846_outLine +BABEL_OP1_203_15163_20130319_154026_inLine +BABEL_OP1_203_15163_20130319_154026_outLine +BABEL_OP1_203_15227_20130513_222256_inLine +BABEL_OP1_203_15227_20130513_222256_outLine +BABEL_OP1_203_15322_20130511_152438_inLine +BABEL_OP1_203_15322_20130511_152438_outLine +BABEL_OP1_203_15324_20130512_224242_inLine +BABEL_OP1_203_15324_20130512_224242_outLine +BABEL_OP1_203_15324_20130512_225202_inLine +BABEL_OP1_203_15324_20130512_225202_outLine +BABEL_OP1_203_15535_20130329_143236_inLine +BABEL_OP1_203_15535_20130329_143236_outLine +BABEL_OP1_203_15638_20130409_150143_inLine +BABEL_OP1_203_15638_20130409_150143_outLine +BABEL_OP1_203_15730_20130307_201711_inLine +BABEL_OP1_203_15730_20130307_201711_outLine +BABEL_OP1_203_15749_20130407_175145_inLine +BABEL_OP1_203_15749_20130407_175145_outLine +BABEL_OP1_203_15902_20130309_193940_inLine +BABEL_OP1_203_15902_20130309_193940_outLine +BABEL_OP1_203_16149_20130309_171014_inLine +BABEL_OP1_203_16149_20130309_171014_outLine +BABEL_OP1_203_16924_20130720_175321_inLine +BABEL_OP1_203_16924_20130720_175321_outLine +BABEL_OP1_203_17032_20130402_175428_inLine +BABEL_OP1_203_17032_20130402_175428_outLine +BABEL_OP1_203_17097_20130430_173440_inLine +BABEL_OP1_203_17097_20130430_173440_outLine +BABEL_OP1_203_17115_20130425_173844_inLine +BABEL_OP1_203_17115_20130425_173844_outLine +BABEL_OP1_203_17115_20130425_175816_inLine +BABEL_OP1_203_17115_20130425_175816_outLine +BABEL_OP1_203_17472_20130408_215034_inLine +BABEL_OP1_203_17472_20130408_215034_outLine +BABEL_OP1_203_17567_20130425_145936_inLine +BABEL_OP1_203_17567_20130425_145936_outLine +BABEL_OP1_203_17751_20130512_155328_inLine +BABEL_OP1_203_17751_20130512_155328_outLine +BABEL_OP1_203_17914_20130503_215602_inLine +BABEL_OP1_203_17914_20130503_215602_outLine +BABEL_OP1_203_17923_20130314_203130_inLine +BABEL_OP1_203_17923_20130314_203130_outLine +BABEL_OP1_203_18118_20130730_191442_inLine +BABEL_OP1_203_18118_20130730_191442_outLine +BABEL_OP1_203_18380_20130327_214619_inLine +BABEL_OP1_203_18380_20130327_214619_outLine +BABEL_OP1_203_18566_20130503_153904_inLine +BABEL_OP1_203_18566_20130503_153904_outLine +BABEL_OP1_203_18939_20130311_144740_inLine +BABEL_OP1_203_18939_20130311_144740_outLine +BABEL_OP1_203_19101_20130423_142324_inLine +BABEL_OP1_203_19101_20130423_142324_outLine +BABEL_OP1_203_19134_20130328_220635_inLine +BABEL_OP1_203_19134_20130328_220635_outLine +BABEL_OP1_203_19589_20130727_143145_inLine +BABEL_OP1_203_19589_20130727_143145_outLine +BABEL_OP1_203_19703_20130318_160958_inLine +BABEL_OP1_203_19703_20130318_160958_outLine +BABEL_OP1_203_19703_20130318_162314_inLine +BABEL_OP1_203_19703_20130318_162314_outLine +BABEL_OP1_203_19773_20130407_183531_inLine +BABEL_OP1_203_19773_20130407_183531_outLine +BABEL_OP1_203_19782_20130404_170141_inLine +BABEL_OP1_203_19782_20130404_170141_outLine +BABEL_OP1_203_20133_20130304_160351_inLine +BABEL_OP1_203_20133_20130304_160351_outLine +BABEL_OP1_203_20330_20130410_161539_inLine +BABEL_OP1_203_20330_20130410_161539_outLine +BABEL_OP1_203_20682_20130406_194906_inLine +BABEL_OP1_203_20682_20130406_194906_outLine +BABEL_OP1_203_20768_20130407_190152_inLine +BABEL_OP1_203_20768_20130407_190152_outLine +BABEL_OP1_203_20985_20130330_210730_inLine +BABEL_OP1_203_20985_20130330_210730_outLine +BABEL_OP1_203_21004_20130410_181101_inLine +BABEL_OP1_203_21004_20130410_181101_outLine +BABEL_OP1_203_21004_20130410_182740_inLine +BABEL_OP1_203_21004_20130410_182740_outLine +BABEL_OP1_203_21109_20130406_161601_inLine +BABEL_OP1_203_21109_20130406_161601_outLine +BABEL_OP1_203_21206_20130312_164516_inLine +BABEL_OP1_203_21206_20130312_164516_outLine +BABEL_OP1_203_21315_20130501_151005_inLine +BABEL_OP1_203_21315_20130501_151005_outLine +BABEL_OP1_203_21327_20130405_203336_inLine +BABEL_OP1_203_21327_20130405_203336_outLine +BABEL_OP1_203_21435_20130423_181043_inLine +BABEL_OP1_203_21435_20130423_181043_outLine +BABEL_OP1_203_22280_20130329_161951_inLine +BABEL_OP1_203_22280_20130329_161951_outLine +BABEL_OP1_203_22321_20130309_191222_inLine +BABEL_OP1_203_22321_20130309_191222_outLine +BABEL_OP1_203_22446_20130309_134600_inLine +BABEL_OP1_203_22446_20130309_134600_outLine +BABEL_OP1_203_22494_20130402_171234_inLine +BABEL_OP1_203_22494_20130402_171234_outLine +BABEL_OP1_203_22612_20130406_220338_inLine +BABEL_OP1_203_22612_20130406_220338_outLine +BABEL_OP1_203_22624_20130403_190935_inLine +BABEL_OP1_203_22624_20130403_190935_outLine +BABEL_OP1_203_22918_20130410_190723_inLine +BABEL_OP1_203_22918_20130410_190723_outLine +BABEL_OP1_203_23006_20130319_211412_inLine +BABEL_OP1_203_23006_20130319_211412_outLine +BABEL_OP1_203_23046_20130322_165811_inLine +BABEL_OP1_203_23046_20130322_165811_outLine +BABEL_OP1_203_23092_20130406_014425_inLine +BABEL_OP1_203_23092_20130406_014425_outLine +BABEL_OP1_203_23092_20130406_015338_inLine +BABEL_OP1_203_23092_20130406_015338_outLine +BABEL_OP1_203_23153_20130320_194433_inLine +BABEL_OP1_203_23153_20130320_194433_outLine +BABEL_OP1_203_23239_20130331_171214_inLine +BABEL_OP1_203_23239_20130331_171214_outLine +BABEL_OP1_203_23505_20130309_204825_inLine +BABEL_OP1_203_23505_20130309_204825_outLine +BABEL_OP1_203_23980_20130321_193946_inLine +BABEL_OP1_203_23980_20130321_193946_outLine +BABEL_OP1_203_24017_20130424_174037_inLine +BABEL_OP1_203_24017_20130424_174037_outLine +BABEL_OP1_203_24253_20130423_175626_inLine +BABEL_OP1_203_24253_20130423_175626_outLine +BABEL_OP1_203_24270_20130329_153331_inLine +BABEL_OP1_203_24270_20130329_153331_outLine +BABEL_OP1_203_24290_20130423_133315_inLine +BABEL_OP1_203_24290_20130423_133315_outLine +BABEL_OP1_203_24323_20130320_160949_inLine +BABEL_OP1_203_24323_20130320_160949_outLine +BABEL_OP1_203_24470_20130329_205656_inLine +BABEL_OP1_203_24470_20130329_205656_outLine +BABEL_OP1_203_24501_20130421_141711_inLine +BABEL_OP1_203_24501_20130421_141711_outLine +BABEL_OP1_203_24569_20130405_200644_inLine +BABEL_OP1_203_24569_20130405_200644_outLine +BABEL_OP1_203_24586_20130506_203931_inLine +BABEL_OP1_203_24586_20130506_203931_outLine +BABEL_OP1_203_24589_20130323_190409_inLine +BABEL_OP1_203_24589_20130323_190409_outLine +BABEL_OP1_203_24589_20130323_192722_inLine +BABEL_OP1_203_24589_20130323_192722_outLine +BABEL_OP1_203_24590_20130321_221146_inLine +BABEL_OP1_203_24590_20130321_221146_outLine +BABEL_OP1_203_24679_20130307_145644_inLine +BABEL_OP1_203_24679_20130307_145644_outLine +BABEL_OP1_203_24779_20130426_183526_inLine +BABEL_OP1_203_24779_20130426_183526_outLine +BABEL_OP1_203_24982_20130327_153429_inLine +BABEL_OP1_203_24982_20130327_153429_outLine +BABEL_OP1_203_25015_20130728_150746_inLine +BABEL_OP1_203_25015_20130728_150746_outLine +BABEL_OP1_203_25085_20130508_145922_inLine +BABEL_OP1_203_25085_20130508_145922_outLine +BABEL_OP1_203_25220_20130502_183943_inLine +BABEL_OP1_203_25220_20130502_183943_outLine +BABEL_OP1_203_25412_20130329_201051_inLine +BABEL_OP1_203_25412_20130329_201051_outLine +BABEL_OP1_203_25698_20130509_182226_inLine +BABEL_OP1_203_25698_20130509_182226_outLine +BABEL_OP1_203_25719_20130426_202355_inLine +BABEL_OP1_203_25719_20130426_202355_outLine +BABEL_OP1_203_25767_20130311_183243_inLine +BABEL_OP1_203_25767_20130311_183243_outLine +BABEL_OP1_203_25961_20130311_171235_inLine +BABEL_OP1_203_25961_20130311_171235_outLine +BABEL_OP1_203_26388_20130318_200305_inLine +BABEL_OP1_203_26388_20130318_200305_outLine +BABEL_OP1_203_26507_20130430_234212_inLine +BABEL_OP1_203_26507_20130430_234212_outLine +BABEL_OP1_203_26574_20130411_160556_inLine +BABEL_OP1_203_26574_20130411_160556_outLine +BABEL_OP1_203_26602_20130801_171131_inLine +BABEL_OP1_203_26602_20130801_171131_outLine +BABEL_OP1_203_26836_20130315_160512_inLine +BABEL_OP1_203_26836_20130315_160512_outLine +BABEL_OP1_203_27125_20130308_003724_inLine +BABEL_OP1_203_27125_20130308_003724_outLine +BABEL_OP1_203_27218_20130312_194932_inLine +BABEL_OP1_203_27218_20130312_194932_outLine +BABEL_OP1_203_27478_20130501_195141_inLine +BABEL_OP1_203_27478_20130501_195141_outLine +BABEL_OP1_203_27478_20130501_200641_inLine +BABEL_OP1_203_27478_20130501_200641_outLine +BABEL_OP1_203_27590_20130405_200930_inLine +BABEL_OP1_203_27590_20130405_200930_outLine +BABEL_OP1_203_27841_20130403_211143_inLine +BABEL_OP1_203_27841_20130403_211143_outLine +BABEL_OP1_203_28190_20130730_195836_inLine +BABEL_OP1_203_28190_20130730_195836_outLine +BABEL_OP1_203_28280_20130501_220643_inLine +BABEL_OP1_203_28280_20130501_220643_outLine +BABEL_OP1_203_28419_20130319_165427_inLine +BABEL_OP1_203_28419_20130319_165427_outLine +BABEL_OP1_203_28522_20130328_170837_inLine +BABEL_OP1_203_28522_20130328_170837_outLine +BABEL_OP1_203_28775_20130313_213707_inLine +BABEL_OP1_203_28775_20130313_213707_outLine +BABEL_OP1_203_28775_20130313_215352_inLine +BABEL_OP1_203_28775_20130313_215352_outLine +BABEL_OP1_203_28945_20130315_171902_inLine +BABEL_OP1_203_28945_20130315_171902_outLine +BABEL_OP1_203_29023_20130313_194148_inLine +BABEL_OP1_203_29023_20130313_194148_outLine +BABEL_OP1_203_29023_20130313_195106_inLine +BABEL_OP1_203_29023_20130313_195106_outLine +BABEL_OP1_203_29039_20130402_153541_inLine +BABEL_OP1_203_29039_20130402_153541_outLine +BABEL_OP1_203_29168_20130306_213504_inLine +BABEL_OP1_203_29168_20130306_213504_outLine +BABEL_OP1_203_29323_20130403_215525_inLine +BABEL_OP1_203_29323_20130403_215525_outLine +BABEL_OP1_203_29416_20130421_133101_inLine +BABEL_OP1_203_29416_20130421_133101_outLine +BABEL_OP1_203_29439_20130422_150608_inLine +BABEL_OP1_203_29439_20130422_150608_outLine +BABEL_OP1_203_30013_20130331_170538_inLine +BABEL_OP1_203_30013_20130331_170538_outLine +BABEL_OP1_203_30395_20130318_180120_inLine +BABEL_OP1_203_30395_20130318_180120_outLine +BABEL_OP1_203_30645_20130309_151850_inLine +BABEL_OP1_203_30645_20130309_151850_outLine +BABEL_OP1_203_31184_20130322_141512_inLine +BABEL_OP1_203_31184_20130322_141512_outLine +BABEL_OP1_203_31184_20130322_142743_inLine +BABEL_OP1_203_31184_20130322_142743_outLine +BABEL_OP1_203_31490_20130321_210518_inLine +BABEL_OP1_203_31490_20130321_210518_outLine +BABEL_OP1_203_31992_20130313_143826_inLine +BABEL_OP1_203_31992_20130313_143826_outLine +BABEL_OP1_203_32097_20130304_195431_inLine +BABEL_OP1_203_32097_20130304_195431_outLine +BABEL_OP1_203_32122_20130320_174321_inLine +BABEL_OP1_203_32122_20130320_174321_outLine +BABEL_OP1_203_32122_20130320_175419_inLine +BABEL_OP1_203_32122_20130320_175419_outLine +BABEL_OP1_203_32244_20130728_182847_inLine +BABEL_OP1_203_32244_20130728_182847_outLine +BABEL_OP1_203_32914_20130411_174738_inLine +BABEL_OP1_203_32914_20130411_174738_outLine +BABEL_OP1_203_32998_20130329_155417_inLine +BABEL_OP1_203_32998_20130329_155417_outLine +BABEL_OP1_203_33175_20130307_204134_inLine +BABEL_OP1_203_33175_20130307_204134_outLine +BABEL_OP1_203_33476_20130320_140412_inLine +BABEL_OP1_203_33476_20130320_140412_outLine +BABEL_OP1_203_33672_20130312_165130_inLine +BABEL_OP1_203_33672_20130312_165130_outLine +BABEL_OP1_203_33704_20130405_220001_inLine +BABEL_OP1_203_33704_20130405_220001_outLine +BABEL_OP1_203_33840_20130803_192343_inLine +BABEL_OP1_203_33840_20130803_192343_outLine +BABEL_OP1_203_34145_20130331_145240_inLine +BABEL_OP1_203_34145_20130331_145240_outLine +BABEL_OP1_203_35139_20130313_143646_inLine +BABEL_OP1_203_35139_20130313_143646_outLine +BABEL_OP1_203_36505_20130731_191406_inLine +BABEL_OP1_203_36505_20130731_191406_outLine +BABEL_OP1_203_36594_20130421_182303_inLine +BABEL_OP1_203_36594_20130421_182303_outLine +BABEL_OP1_203_37598_20130330_000102_inLine +BABEL_OP1_203_37598_20130330_000102_outLine +BABEL_OP1_203_38979_20130409_173446_inLine +BABEL_OP1_203_38979_20130409_173446_outLine +BABEL_OP1_203_38979_20130409_174405_inLine +BABEL_OP1_203_38979_20130409_174405_outLine +BABEL_OP1_203_39006_20130506_192659_inLine +BABEL_OP1_203_39006_20130506_192659_outLine +BABEL_OP1_203_39555_20130720_183746_inLine +BABEL_OP1_203_39555_20130720_183746_outLine +BABEL_OP1_203_39848_20130320_133756_inLine +BABEL_OP1_203_39848_20130320_133756_outLine +BABEL_OP1_203_40557_20130404_005522_inLine +BABEL_OP1_203_40557_20130404_005522_outLine +BABEL_OP1_203_40565_20130331_171210_inLine +BABEL_OP1_203_40565_20130331_171210_outLine +BABEL_OP1_203_40713_20130321_155930_inLine +BABEL_OP1_203_40713_20130321_155930_outLine +BABEL_OP1_203_41073_20130721_172038_inLine +BABEL_OP1_203_41073_20130721_172038_outLine +BABEL_OP1_203_41097_20130427_224950_inLine +BABEL_OP1_203_41097_20130427_224950_outLine +BABEL_OP1_203_41100_20130313_161755_inLine +BABEL_OP1_203_41100_20130313_161755_outLine +BABEL_OP1_203_41174_20130318_203041_inLine +BABEL_OP1_203_41174_20130318_203041_outLine +BABEL_OP1_203_41334_20130501_232034_inLine +BABEL_OP1_203_41334_20130501_232034_outLine +BABEL_OP1_203_41442_20130404_174409_inLine +BABEL_OP1_203_41442_20130404_174409_outLine +BABEL_OP1_203_41469_20130313_185923_inLine +BABEL_OP1_203_41469_20130313_185923_outLine +BABEL_OP1_203_41609_20130309_175203_inLine +BABEL_OP1_203_41609_20130309_175203_outLine +BABEL_OP1_203_41680_20130304_134640_inLine +BABEL_OP1_203_41680_20130304_134640_outLine +BABEL_OP1_203_42029_20130403_184623_inLine +BABEL_OP1_203_42029_20130403_184623_outLine +BABEL_OP1_203_42126_20130805_213859_inLine +BABEL_OP1_203_42126_20130805_213859_outLine +BABEL_OP1_203_42243_20130313_170336_inLine +BABEL_OP1_203_42243_20130313_170336_outLine +BABEL_OP1_203_42299_20130508_203220_inLine +BABEL_OP1_203_42299_20130508_203220_outLine +BABEL_OP1_203_42299_20130508_204824_inLine +BABEL_OP1_203_42299_20130508_204824_outLine +BABEL_OP1_203_42309_20130428_191239_inLine +BABEL_OP1_203_42309_20130428_191239_outLine +BABEL_OP1_203_42434_20130323_160637_inLine +BABEL_OP1_203_42434_20130323_160637_outLine +BABEL_OP1_203_42834_20130404_194840_inLine +BABEL_OP1_203_42834_20130404_194840_outLine +BABEL_OP1_203_42848_20130513_201112_inLine +BABEL_OP1_203_42848_20130513_201112_outLine +BABEL_OP1_203_42883_20130729_171646_inLine +BABEL_OP1_203_42883_20130729_171646_outLine +BABEL_OP1_203_43368_20130327_215424_inLine +BABEL_OP1_203_43368_20130327_215424_outLine +BABEL_OP1_203_43388_20130327_192024_inLine +BABEL_OP1_203_43388_20130327_192024_outLine +BABEL_OP1_203_43588_20130714_163553_inLine +BABEL_OP1_203_43588_20130714_163553_outLine +BABEL_OP1_203_43784_20130314_171933_inLine +BABEL_OP1_203_43784_20130314_171933_outLine +BABEL_OP1_203_43788_20130504_173234_inLine +BABEL_OP1_203_43788_20130504_173234_outLine +BABEL_OP1_203_43920_20130405_194800_inLine +BABEL_OP1_203_43920_20130405_194800_outLine +BABEL_OP1_203_44477_20130331_190402_inLine +BABEL_OP1_203_44477_20130331_190402_outLine +BABEL_OP1_203_44478_20130730_170938_inLine +BABEL_OP1_203_44478_20130730_170938_outLine +BABEL_OP1_203_44619_20130313_175437_inLine +BABEL_OP1_203_44619_20130313_175437_outLine +BABEL_OP1_203_44709_20130331_183159_inLine +BABEL_OP1_203_44709_20130331_183159_outLine +BABEL_OP1_203_44961_20130311_173427_inLine +BABEL_OP1_203_44961_20130311_173427_outLine +BABEL_OP1_203_45560_20130309_173444_inLine +BABEL_OP1_203_45560_20130309_173444_outLine +BABEL_OP1_203_45642_20130313_202110_inLine +BABEL_OP1_203_45642_20130313_202110_outLine +BABEL_OP1_203_45851_20130801_014413_inLine +BABEL_OP1_203_45851_20130801_014413_outLine +BABEL_OP1_203_46310_20130309_211431_inLine +BABEL_OP1_203_46310_20130309_211431_outLine +BABEL_OP1_203_46550_20130313_153012_inLine +BABEL_OP1_203_46550_20130313_153012_outLine +BABEL_OP1_203_46625_20130304_201959_inLine +BABEL_OP1_203_46625_20130304_201959_outLine +BABEL_OP1_203_46681_20130313_203139_inLine +BABEL_OP1_203_46681_20130313_203139_outLine +BABEL_OP1_203_46688_20130314_212550_inLine +BABEL_OP1_203_46688_20130314_212550_outLine +BABEL_OP1_203_46763_20130426_160841_inLine +BABEL_OP1_203_46763_20130426_160841_outLine +BABEL_OP1_203_47186_20130405_120609_inLine +BABEL_OP1_203_47186_20130405_120609_outLine +BABEL_OP1_203_47270_20130410_160110_inLine +BABEL_OP1_203_47270_20130410_160110_outLine +BABEL_OP1_203_47487_20130321_145055_inLine +BABEL_OP1_203_47487_20130321_145055_outLine +BABEL_OP1_203_47823_20130406_151016_inLine +BABEL_OP1_203_47823_20130406_151016_outLine +BABEL_OP1_203_47866_20130723_152640_inLine +BABEL_OP1_203_47866_20130723_152640_outLine +BABEL_OP1_203_48422_20130425_175947_inLine +BABEL_OP1_203_48422_20130425_175947_outLine +BABEL_OP1_203_48610_20130309_222037_inLine +BABEL_OP1_203_48610_20130309_222037_outLine +BABEL_OP1_203_49001_20130315_160533_inLine +BABEL_OP1_203_49001_20130315_160533_outLine +BABEL_OP1_203_49216_20130307_211955_inLine +BABEL_OP1_203_49216_20130307_211955_outLine +BABEL_OP1_203_49287_20130331_155341_inLine +BABEL_OP1_203_49287_20130331_155341_outLine +BABEL_OP1_203_49437_20130405_194333_inLine +BABEL_OP1_203_49437_20130405_194333_outLine +BABEL_OP1_203_49437_20130405_195645_inLine +BABEL_OP1_203_49437_20130405_195645_outLine +BABEL_OP1_203_49630_20130408_182919_inLine +BABEL_OP1_203_49630_20130408_182919_outLine +BABEL_OP1_203_49637_20130313_134853_inLine +BABEL_OP1_203_49637_20130313_134853_outLine +BABEL_OP1_203_49768_20130320_164815_inLine +BABEL_OP1_203_49768_20130320_164815_outLine +BABEL_OP1_203_49902_20130323_175920_inLine +BABEL_OP1_203_49902_20130323_175920_outLine +BABEL_OP1_203_50090_20130726_145642_inLine +BABEL_OP1_203_50090_20130726_145642_outLine +BABEL_OP1_203_50175_20130311_181803_inLine +BABEL_OP1_203_50175_20130311_181803_outLine +BABEL_OP1_203_50726_20130307_135236_inLine +BABEL_OP1_203_50726_20130307_135236_outLine +BABEL_OP1_203_51414_20130729_152916_inLine +BABEL_OP1_203_51414_20130729_152916_outLine +BABEL_OP1_203_51530_20130803_174620_inLine +BABEL_OP1_203_51530_20130803_174620_outLine +BABEL_OP1_203_51611_20130312_195333_inLine +BABEL_OP1_203_51611_20130312_195333_outLine +BABEL_OP1_203_51701_20130508_232537_inLine +BABEL_OP1_203_51701_20130508_232537_outLine +BABEL_OP1_203_51819_20130328_150620_inLine +BABEL_OP1_203_51819_20130328_150620_outLine +BABEL_OP1_203_51955_20130314_175859_inLine +BABEL_OP1_203_51955_20130314_175859_outLine +BABEL_OP1_203_51955_20130314_180731_inLine +BABEL_OP1_203_51955_20130314_180731_outLine +BABEL_OP1_203_52246_20130319_221049_inLine +BABEL_OP1_203_52246_20130319_221049_outLine +BABEL_OP1_203_52272_20130313_140038_inLine +BABEL_OP1_203_52272_20130313_140038_outLine +BABEL_OP1_203_52404_20130409_005414_inLine +BABEL_OP1_203_52404_20130409_005414_outLine +BABEL_OP1_203_52422_20130427_140502_inLine +BABEL_OP1_203_52422_20130427_140502_outLine +BABEL_OP1_203_52447_20130513_224209_inLine +BABEL_OP1_203_52447_20130513_224209_outLine +BABEL_OP1_203_52490_20130309_141915_inLine +BABEL_OP1_203_52490_20130309_141915_outLine +BABEL_OP1_203_52717_20130311_173849_inLine +BABEL_OP1_203_52717_20130311_173849_outLine +BABEL_OP1_203_52854_20130221_192229_inLine +BABEL_OP1_203_52854_20130221_192229_outLine +BABEL_OP1_203_53063_20130407_210935_inLine +BABEL_OP1_203_53063_20130407_210935_outLine +BABEL_OP1_203_53665_20130727_150857_inLine +BABEL_OP1_203_53665_20130727_150857_outLine +BABEL_OP1_203_53842_20130322_165451_inLine +BABEL_OP1_203_53842_20130322_165451_outLine +BABEL_OP1_203_54046_20130804_193101_inLine +BABEL_OP1_203_54046_20130804_193101_outLine +BABEL_OP1_203_54074_20130319_150208_inLine +BABEL_OP1_203_54074_20130319_150208_outLine +BABEL_OP1_203_54104_20130309_204103_inLine +BABEL_OP1_203_54104_20130309_204103_outLine +BABEL_OP1_203_54390_20130313_161947_inLine +BABEL_OP1_203_54390_20130313_161947_outLine +BABEL_OP1_203_54477_20130408_133628_inLine +BABEL_OP1_203_54477_20130408_133628_outLine +BABEL_OP1_203_54530_20130424_194302_inLine +BABEL_OP1_203_54530_20130424_194302_outLine +BABEL_OP1_203_54697_20130405_153323_inLine +BABEL_OP1_203_54697_20130405_153323_outLine +BABEL_OP1_203_54744_20130311_153522_inLine +BABEL_OP1_203_54744_20130311_153522_outLine +BABEL_OP1_203_54827_20130803_201026_inLine +BABEL_OP1_203_54827_20130803_201026_outLine +BABEL_OP1_203_54953_20130319_135125_inLine +BABEL_OP1_203_54953_20130319_135125_outLine +BABEL_OP1_203_55259_20130323_181918_inLine +BABEL_OP1_203_55259_20130323_181918_outLine +BABEL_OP1_203_55818_20130309_163433_inLine +BABEL_OP1_203_55818_20130309_163433_outLine +BABEL_OP1_203_55950_20130728_141857_inLine +BABEL_OP1_203_55950_20130728_141857_outLine +BABEL_OP1_203_56076_20130728_212423_inLine +BABEL_OP1_203_56076_20130728_212423_outLine +BABEL_OP1_203_56198_20130314_163346_inLine +BABEL_OP1_203_56198_20130314_163346_outLine +BABEL_OP1_203_56198_20130314_164412_inLine +BABEL_OP1_203_56198_20130314_164412_outLine +BABEL_OP1_203_56213_20130407_184955_inLine +BABEL_OP1_203_56213_20130407_184955_outLine +BABEL_OP1_203_56306_20130408_202539_inLine +BABEL_OP1_203_56306_20130408_202539_outLine +BABEL_OP1_203_56307_20130401_212823_inLine +BABEL_OP1_203_56307_20130401_212823_outLine +BABEL_OP1_203_56465_20130503_211423_inLine +BABEL_OP1_203_56465_20130503_211423_outLine +BABEL_OP1_203_56677_20130407_020513_inLine +BABEL_OP1_203_56677_20130407_020513_outLine +BABEL_OP1_203_56826_20130403_155349_inLine +BABEL_OP1_203_56826_20130403_155349_outLine +BABEL_OP1_203_57093_20130323_155842_inLine +BABEL_OP1_203_57093_20130323_155842_outLine +BABEL_OP1_203_57116_20130306_200913_inLine +BABEL_OP1_203_57116_20130306_200913_outLine +BABEL_OP1_203_57529_20130404_225031_inLine +BABEL_OP1_203_57529_20130404_225031_outLine +BABEL_OP1_203_57678_20130319_173142_inLine +BABEL_OP1_203_57678_20130319_173142_outLine +BABEL_OP1_203_58107_20130331_163124_inLine +BABEL_OP1_203_58107_20130331_163124_outLine +BABEL_OP1_203_58107_20130331_164049_inLine +BABEL_OP1_203_58107_20130331_164049_outLine +BABEL_OP1_203_58145_20130404_174142_inLine +BABEL_OP1_203_58145_20130404_174142_outLine +BABEL_OP1_203_58489_20130406_171644_inLine +BABEL_OP1_203_58489_20130406_171644_outLine +BABEL_OP1_203_58821_20130330_171943_inLine +BABEL_OP1_203_58821_20130330_171943_outLine +BABEL_OP1_203_58850_20130320_210438_outLine +BABEL_OP1_203_58853_20130804_133710_inLine +BABEL_OP1_203_58853_20130804_133710_outLine +BABEL_OP1_203_58915_20130508_170813_inLine +BABEL_OP1_203_58915_20130508_170813_outLine +BABEL_OP1_203_58926_20130314_221922_inLine +BABEL_OP1_203_58926_20130314_221922_outLine +BABEL_OP1_203_59078_20130328_222520_inLine +BABEL_OP1_203_59078_20130328_222520_outLine +BABEL_OP1_203_59307_20130503_211805_inLine +BABEL_OP1_203_59307_20130503_211805_outLine +BABEL_OP1_203_59720_20130323_160840_inLine +BABEL_OP1_203_59720_20130323_160840_outLine +BABEL_OP1_203_59747_20130307_185538_inLine +BABEL_OP1_203_59747_20130307_185538_outLine +BABEL_OP1_203_59864_20130719_183902_inLine +BABEL_OP1_203_59864_20130719_183902_outLine +BABEL_OP1_203_59928_20130314_205249_inLine +BABEL_OP1_203_59928_20130314_205249_outLine +BABEL_OP1_203_60026_20130311_192442_inLine +BABEL_OP1_203_60026_20130311_192442_outLine +BABEL_OP1_203_60352_20130724_151721_inLine +BABEL_OP1_203_60352_20130724_151721_outLine +BABEL_OP1_203_60397_20130814_170113_inLine +BABEL_OP1_203_60397_20130814_170113_outLine +BABEL_OP1_203_60436_20130726_213808_inLine +BABEL_OP1_203_60436_20130726_213808_outLine +BABEL_OP1_203_60830_20130323_152836_inLine +BABEL_OP1_203_60830_20130323_152836_outLine +BABEL_OP1_203_61011_20130307_163948_inLine +BABEL_OP1_203_61011_20130307_163948_outLine +BABEL_OP1_203_61225_20130310_001509_inLine +BABEL_OP1_203_61225_20130310_001509_outLine +BABEL_OP1_203_61225_20130310_002607_inLine +BABEL_OP1_203_61225_20130310_002607_outLine +BABEL_OP1_203_61435_20130421_175121_inLine +BABEL_OP1_203_61435_20130421_175121_outLine +BABEL_OP1_203_61440_20130513_143551_inLine +BABEL_OP1_203_61440_20130513_143551_outLine +BABEL_OP1_203_61888_20130410_154115_inLine +BABEL_OP1_203_61888_20130410_154115_outLine +BABEL_OP1_203_62014_20130503_150317_inLine +BABEL_OP1_203_62014_20130503_150317_outLine +BABEL_OP1_203_62200_20130320_155842_inLine +BABEL_OP1_203_62200_20130320_155842_outLine +BABEL_OP1_203_62360_20130729_185133_inLine +BABEL_OP1_203_62360_20130729_185133_outLine +BABEL_OP1_203_62362_20130513_145108_inLine +BABEL_OP1_203_62362_20130513_145108_outLine +BABEL_OP1_203_62714_20130430_183624_inLine +BABEL_OP1_203_62714_20130430_183624_outLine +BABEL_OP1_203_62800_20130307_204137_inLine +BABEL_OP1_203_62800_20130307_204137_outLine +BABEL_OP1_203_62976_20130512_201748_inLine +BABEL_OP1_203_62976_20130512_201748_outLine +BABEL_OP1_203_63094_20130512_165833_inLine +BABEL_OP1_203_63094_20130512_165833_outLine +BABEL_OP1_203_63730_20130507_163540_inLine +BABEL_OP1_203_63730_20130507_163540_outLine +BABEL_OP1_203_64014_20130411_192910_inLine +BABEL_OP1_203_64014_20130411_192910_outLine +BABEL_OP1_203_64065_20130326_201717_inLine +BABEL_OP1_203_64065_20130326_201717_outLine +BABEL_OP1_203_64065_20130326_202638_inLine +BABEL_OP1_203_64065_20130326_202638_outLine +BABEL_OP1_203_65723_20130313_205922_inLine +BABEL_OP1_203_65723_20130313_205922_outLine +BABEL_OP1_203_65913_20130726_205358_inLine +BABEL_OP1_203_65913_20130726_205358_outLine +BABEL_OP1_203_66001_20130309_233448_inLine +BABEL_OP1_203_66001_20130309_233448_outLine +BABEL_OP1_203_66045_20130323_203735_inLine +BABEL_OP1_203_66045_20130323_203735_outLine +BABEL_OP1_203_66822_20130324_142935_inLine +BABEL_OP1_203_66822_20130324_142935_outLine +BABEL_OP1_203_66916_20130308_142310_inLine +BABEL_OP1_203_66916_20130308_142310_outLine +BABEL_OP1_203_66971_20130725_151439_inLine +BABEL_OP1_203_66971_20130725_151439_outLine +BABEL_OP1_203_67066_20130509_215551_inLine +BABEL_OP1_203_67066_20130509_215551_outLine +BABEL_OP1_203_68289_20130409_222355_inLine +BABEL_OP1_203_68289_20130409_222355_outLine +BABEL_OP1_203_68385_20130221_213027_inLine +BABEL_OP1_203_68385_20130221_213027_outLine +BABEL_OP1_203_69096_20130714_153203_inLine +BABEL_OP1_203_69096_20130714_153203_outLine +BABEL_OP1_203_69474_20130409_153705_inLine +BABEL_OP1_203_69474_20130409_153705_outLine +BABEL_OP1_203_69885_20130729_175242_inLine +BABEL_OP1_203_69885_20130729_175242_outLine +BABEL_OP1_203_69964_20130801_183705_inLine +BABEL_OP1_203_69964_20130801_183705_outLine +BABEL_OP1_203_70221_20130502_153055_inLine +BABEL_OP1_203_70221_20130502_153055_outLine +BABEL_OP1_203_70386_20130315_162835_inLine +BABEL_OP1_203_70386_20130315_162835_outLine +BABEL_OP1_203_70639_20130805_192027_inLine +BABEL_OP1_203_70639_20130805_192027_outLine +BABEL_OP1_203_70716_20130731_182939_inLine +BABEL_OP1_203_70716_20130731_182939_outLine +BABEL_OP1_203_71067_20130503_201919_inLine +BABEL_OP1_203_71067_20130503_201919_outLine +BABEL_OP1_203_71566_20130406_212124_inLine +BABEL_OP1_203_71566_20130406_212124_outLine +BABEL_OP1_203_72324_20130721_195442_inLine +BABEL_OP1_203_72324_20130721_195442_outLine +BABEL_OP1_203_72587_20130331_220349_inLine +BABEL_OP1_203_72587_20130331_220349_outLine +BABEL_OP1_203_73042_20130314_184552_inLine +BABEL_OP1_203_73042_20130314_184552_outLine +BABEL_OP1_203_73301_20130321_151848_inLine +BABEL_OP1_203_73301_20130321_151848_outLine +BABEL_OP1_203_73591_20130222_132516_inLine +BABEL_OP1_203_73591_20130222_132516_outLine +BABEL_OP1_203_74667_20130322_155857_inLine +BABEL_OP1_203_74667_20130322_155857_outLine +BABEL_OP1_203_74886_20130309_200304_inLine +BABEL_OP1_203_74886_20130309_200304_outLine +BABEL_OP1_203_75064_20130322_142556_inLine +BABEL_OP1_203_75064_20130322_142556_outLine +BABEL_OP1_203_75342_20130404_193602_inLine +BABEL_OP1_203_75342_20130404_193602_outLine +BABEL_OP1_203_75869_20130721_161850_inLine +BABEL_OP1_203_75869_20130721_161850_outLine +BABEL_OP1_203_76444_20130406_153810_inLine +BABEL_OP1_203_76444_20130406_153810_outLine +BABEL_OP1_203_76482_20130508_220808_inLine +BABEL_OP1_203_76482_20130508_220808_outLine +BABEL_OP1_203_77242_20130508_191854_inLine +BABEL_OP1_203_77242_20130508_191854_outLine +BABEL_OP1_203_78749_20130426_182140_inLine +BABEL_OP1_203_78749_20130426_182140_outLine +BABEL_OP1_203_79131_20130727_202021_inLine +BABEL_OP1_203_79131_20130727_202021_outLine +BABEL_OP1_203_79660_20130512_173422_inLine +BABEL_OP1_203_79660_20130512_173422_outLine +BABEL_OP1_203_80134_20130814_145021_inLine +BABEL_OP1_203_80134_20130814_145021_outLine +BABEL_OP1_203_81287_20130403_225530_inLine +BABEL_OP1_203_81287_20130403_225530_outLine +BABEL_OP1_203_82224_20130718_134750_inLine +BABEL_OP1_203_82224_20130718_134750_outLine +BABEL_OP1_203_83813_20130812_133548_inLine +BABEL_OP1_203_83813_20130812_133548_outLine +BABEL_OP1_203_84339_20130802_181641_inLine +BABEL_OP1_203_84339_20130802_181641_outLine +BABEL_OP1_203_84469_20130421_132749_inLine +BABEL_OP1_203_84469_20130421_132749_outLine +BABEL_OP1_203_84611_20130312_152852_inLine +BABEL_OP1_203_84611_20130312_152852_outLine +BABEL_OP1_203_85325_20130802_212902_inLine +BABEL_OP1_203_85325_20130802_212902_outLine +BABEL_OP1_203_86597_20130508_182316_inLine +BABEL_OP1_203_86597_20130508_182316_outLine +BABEL_OP1_203_86628_20130512_215243_inLine +BABEL_OP1_203_86628_20130512_215243_outLine +BABEL_OP1_203_86830_20130423_194221_inLine +BABEL_OP1_203_86830_20130423_194221_outLine +BABEL_OP1_203_86878_20130804_174949_inLine +BABEL_OP1_203_86878_20130804_174949_outLine +BABEL_OP1_203_86891_20130427_122020_inLine +BABEL_OP1_203_86891_20130427_122020_outLine +BABEL_OP1_203_87305_20130512_150816_inLine +BABEL_OP1_203_87305_20130512_150816_outLine +BABEL_OP1_203_89358_20130327_183946_inLine +BABEL_OP1_203_89358_20130327_183946_outLine +BABEL_OP1_203_89943_20130319_151705_inLine +BABEL_OP1_203_89943_20130319_151705_outLine +BABEL_OP1_203_90709_20130311_171156_inLine +BABEL_OP1_203_90709_20130311_171156_outLine +BABEL_OP1_203_91760_20130728_190550_inLine +BABEL_OP1_203_91760_20130728_190550_outLine +BABEL_OP1_203_92077_20130725_140650_inLine +BABEL_OP1_203_92077_20130725_140650_outLine +BABEL_OP1_203_93411_20130324_150550_inLine +BABEL_OP1_203_93411_20130324_150550_outLine +BABEL_OP1_203_93490_20130804_201521_inLine +BABEL_OP1_203_93490_20130804_201521_outLine +BABEL_OP1_203_93964_20130327_171307_inLine +BABEL_OP1_203_93964_20130327_171307_outLine +BABEL_OP1_203_94442_20130727_182743_inLine +BABEL_OP1_203_94442_20130727_182743_outLine +BABEL_OP1_203_94449_20130801_010717_inLine +BABEL_OP1_203_94449_20130801_010717_outLine +BABEL_OP1_203_95338_20130727_211019_inLine +BABEL_OP1_203_95338_20130727_211019_outLine +BABEL_OP1_203_96059_20130731_211048_inLine +BABEL_OP1_203_96059_20130731_211048_outLine +BABEL_OP1_203_96376_20130731_143340_outLine +BABEL_OP1_203_96690_20130320_183730_inLine +BABEL_OP1_203_96690_20130320_183730_outLine +BABEL_OP1_203_96690_20130320_185039_inLine +BABEL_OP1_203_96690_20130320_185039_outLine +BABEL_OP1_203_96842_20130726_140248_inLine +BABEL_OP1_203_96842_20130726_140248_outLine +BABEL_OP1_203_97220_20130508_165310_inLine +BABEL_OP1_203_97220_20130508_165310_outLine +BABEL_OP1_203_97836_20130430_195102_inLine +BABEL_OP1_203_97836_20130430_195102_outLine +BABEL_OP1_203_98192_20130511_210223_inLine +BABEL_OP1_203_98192_20130511_210223_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.list b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.list new file mode 100644 index 00000000000..bc4c7166c32 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.list @@ -0,0 +1,127 @@ +BABEL_OP1_203_10974_20130425_162609_inLine +BABEL_OP1_203_10974_20130425_162609_outLine +BABEL_OP1_203_14141_20130410_212719_inLine +BABEL_OP1_203_14141_20130410_212719_outLine +BABEL_OP1_203_14237_20130313_222650_inLine +BABEL_OP1_203_14237_20130313_222650_outLine +BABEL_OP1_203_15163_20130319_154026_inLine +BABEL_OP1_203_15163_20130319_154026_outLine +BABEL_OP1_203_15324_20130512_224242_inLine +BABEL_OP1_203_15324_20130512_224242_outLine +BABEL_OP1_203_15324_20130512_225202_inLine +BABEL_OP1_203_15324_20130512_225202_outLine +BABEL_OP1_203_15638_20130409_150143_inLine +BABEL_OP1_203_15638_20130409_150143_outLine +BABEL_OP1_203_17115_20130425_173844_inLine +BABEL_OP1_203_17115_20130425_173844_outLine +BABEL_OP1_203_17115_20130425_175816_inLine +BABEL_OP1_203_17115_20130425_175816_outLine +BABEL_OP1_203_17751_20130512_155328_inLine +BABEL_OP1_203_17751_20130512_155328_outLine +BABEL_OP1_203_17914_20130503_215602_inLine +BABEL_OP1_203_17914_20130503_215602_outLine +BABEL_OP1_203_17923_20130314_203130_inLine +BABEL_OP1_203_17923_20130314_203130_outLine +BABEL_OP1_203_20682_20130406_194906_inLine +BABEL_OP1_203_20682_20130406_194906_outLine +BABEL_OP1_203_22624_20130403_190935_inLine +BABEL_OP1_203_22624_20130403_190935_outLine +BABEL_OP1_203_24270_20130329_153331_inLine +BABEL_OP1_203_24270_20130329_153331_outLine +BABEL_OP1_203_24589_20130323_190409_inLine +BABEL_OP1_203_24589_20130323_190409_outLine +BABEL_OP1_203_24589_20130323_192722_inLine +BABEL_OP1_203_24589_20130323_192722_outLine +BABEL_OP1_203_25220_20130502_183943_inLine +BABEL_OP1_203_25220_20130502_183943_outLine +BABEL_OP1_203_27478_20130501_195141_inLine +BABEL_OP1_203_27478_20130501_195141_outLine +BABEL_OP1_203_27478_20130501_200641_inLine +BABEL_OP1_203_27478_20130501_200641_outLine +BABEL_OP1_203_28190_20130730_195836_inLine +BABEL_OP1_203_28190_20130730_195836_outLine +BABEL_OP1_203_28945_20130315_171902_inLine +BABEL_OP1_203_28945_20130315_171902_outLine +BABEL_OP1_203_32914_20130411_174738_inLine +BABEL_OP1_203_32914_20130411_174738_outLine +BABEL_OP1_203_33175_20130307_204134_inLine +BABEL_OP1_203_33175_20130307_204134_outLine +BABEL_OP1_203_40713_20130321_155930_inLine +BABEL_OP1_203_40713_20130321_155930_outLine +BABEL_OP1_203_41097_20130427_224950_inLine +BABEL_OP1_203_41097_20130427_224950_outLine +BABEL_OP1_203_41100_20130313_161755_inLine +BABEL_OP1_203_41100_20130313_161755_outLine +BABEL_OP1_203_41680_20130304_134640_inLine +BABEL_OP1_203_41680_20130304_134640_outLine +BABEL_OP1_203_42126_20130805_213859_inLine +BABEL_OP1_203_42126_20130805_213859_outLine +BABEL_OP1_203_42243_20130313_170336_inLine +BABEL_OP1_203_42243_20130313_170336_outLine +BABEL_OP1_203_42834_20130404_194840_inLine +BABEL_OP1_203_42834_20130404_194840_outLine +BABEL_OP1_203_42883_20130729_171646_inLine +BABEL_OP1_203_42883_20130729_171646_outLine +BABEL_OP1_203_44477_20130331_190402_inLine +BABEL_OP1_203_44477_20130331_190402_outLine +BABEL_OP1_203_45642_20130313_202110_inLine +BABEL_OP1_203_45642_20130313_202110_outLine +BABEL_OP1_203_46625_20130304_201959_inLine +BABEL_OP1_203_46625_20130304_201959_outLine +BABEL_OP1_203_46763_20130426_160841_inLine +BABEL_OP1_203_46763_20130426_160841_outLine +BABEL_OP1_203_47270_20130410_160110_inLine +BABEL_OP1_203_47270_20130410_160110_outLine +BABEL_OP1_203_49637_20130313_134853_inLine +BABEL_OP1_203_49637_20130313_134853_outLine +BABEL_OP1_203_49902_20130323_175920_inLine +BABEL_OP1_203_49902_20130323_175920_outLine +BABEL_OP1_203_50726_20130307_135236_inLine +BABEL_OP1_203_50726_20130307_135236_outLine +BABEL_OP1_203_51414_20130729_152916_inLine +BABEL_OP1_203_51414_20130729_152916_outLine +BABEL_OP1_203_52447_20130513_224209_inLine +BABEL_OP1_203_52447_20130513_224209_outLine +BABEL_OP1_203_52854_20130221_192229_inLine +BABEL_OP1_203_52854_20130221_192229_outLine +BABEL_OP1_203_54046_20130804_193101_inLine +BABEL_OP1_203_54046_20130804_193101_outLine +BABEL_OP1_203_54744_20130311_153522_inLine +BABEL_OP1_203_54744_20130311_153522_outLine +BABEL_OP1_203_55818_20130309_163433_inLine +BABEL_OP1_203_55818_20130309_163433_outLine +BABEL_OP1_203_56213_20130407_184955_inLine +BABEL_OP1_203_56213_20130407_184955_outLine +BABEL_OP1_203_56465_20130503_211423_inLine +BABEL_OP1_203_56465_20130503_211423_outLine +BABEL_OP1_203_56677_20130407_020513_inLine +BABEL_OP1_203_56677_20130407_020513_outLine +BABEL_OP1_203_58850_20130320_210438_outLine +BABEL_OP1_203_58853_20130804_133710_inLine +BABEL_OP1_203_58853_20130804_133710_outLine +BABEL_OP1_203_61011_20130307_163948_inLine +BABEL_OP1_203_61011_20130307_163948_outLine +BABEL_OP1_203_62362_20130513_145108_inLine +BABEL_OP1_203_62362_20130513_145108_outLine +BABEL_OP1_203_63094_20130512_165833_inLine +BABEL_OP1_203_63094_20130512_165833_outLine +BABEL_OP1_203_64014_20130411_192910_inLine +BABEL_OP1_203_64014_20130411_192910_outLine +BABEL_OP1_203_65723_20130313_205922_inLine +BABEL_OP1_203_65723_20130313_205922_outLine +BABEL_OP1_203_69885_20130729_175242_inLine +BABEL_OP1_203_69885_20130729_175242_outLine +BABEL_OP1_203_70639_20130805_192027_inLine +BABEL_OP1_203_70639_20130805_192027_outLine +BABEL_OP1_203_73042_20130314_184552_inLine +BABEL_OP1_203_73042_20130314_184552_outLine +BABEL_OP1_203_73301_20130321_151848_inLine +BABEL_OP1_203_73301_20130321_151848_outLine +BABEL_OP1_203_78749_20130426_182140_inLine +BABEL_OP1_203_78749_20130426_182140_outLine +BABEL_OP1_203_83813_20130812_133548_inLine +BABEL_OP1_203_83813_20130812_133548_outLine +BABEL_OP1_203_86830_20130423_194221_inLine +BABEL_OP1_203_86830_20130423_194221_outLine +BABEL_OP1_203_96842_20130726_140248_inLine +BABEL_OP1_203_96842_20130726_140248_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..500c68fda58 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.LimitedLP.untranscribed.list @@ -0,0 +1,654 @@ +BABEL_OP1_203_10036_20130318_191401_inLine +BABEL_OP1_203_10036_20130318_191401_outLine +BABEL_OP1_203_10411_20130511_174439_inLine +BABEL_OP1_203_10411_20130511_174439_outLine +BABEL_OP1_203_10482_20130403_160013_inLine +BABEL_OP1_203_10482_20130403_160013_outLine +BABEL_OP1_203_10524_20130425_183925_inLine +BABEL_OP1_203_10524_20130425_183925_outLine +BABEL_OP1_203_10524_20130425_185048_inLine +BABEL_OP1_203_10524_20130425_185048_outLine +BABEL_OP1_203_10901_20130321_180232_inLine +BABEL_OP1_203_10901_20130321_180232_outLine +BABEL_OP1_203_10938_20130319_190809_inLine +BABEL_OP1_203_10938_20130319_190809_outLine +BABEL_OP1_203_10966_20130319_135742_inLine +BABEL_OP1_203_10966_20130319_135742_outLine +BABEL_OP1_203_11352_20130426_170450_inLine +BABEL_OP1_203_11352_20130426_170450_outLine +BABEL_OP1_203_11486_20130428_131348_inLine +BABEL_OP1_203_11486_20130428_131348_outLine +BABEL_OP1_203_11663_20130402_202025_inLine +BABEL_OP1_203_11663_20130402_202025_outLine +BABEL_OP1_203_11673_20130306_201125_inLine +BABEL_OP1_203_11673_20130306_201125_outLine +BABEL_OP1_203_11797_20130309_195420_inLine +BABEL_OP1_203_11797_20130309_195420_outLine +BABEL_OP1_203_11859_20130511_201411_inLine +BABEL_OP1_203_11859_20130511_201411_outLine +BABEL_OP1_203_12036_20130312_182225_inLine +BABEL_OP1_203_12036_20130312_182225_outLine +BABEL_OP1_203_12220_20130321_160841_inLine +BABEL_OP1_203_12220_20130321_160841_outLine +BABEL_OP1_203_12606_20130726_174724_inLine +BABEL_OP1_203_12606_20130726_174724_outLine +BABEL_OP1_203_12609_20130727_133133_outLine +BABEL_OP1_203_12767_20130313_214914_inLine +BABEL_OP1_203_12767_20130313_214914_outLine +BABEL_OP1_203_12851_20130304_181335_inLine +BABEL_OP1_203_12851_20130304_181335_outLine +BABEL_OP1_203_12851_20130304_182835_inLine +BABEL_OP1_203_12851_20130304_182835_outLine +BABEL_OP1_203_12851_20130304_185138_inLine +BABEL_OP1_203_12851_20130304_185138_outLine +BABEL_OP1_203_13126_20130421_175306_inLine +BABEL_OP1_203_13126_20130421_175306_outLine +BABEL_OP1_203_13126_20130421_180154_inLine +BABEL_OP1_203_13126_20130421_180154_outLine +BABEL_OP1_203_13324_20130313_185155_inLine +BABEL_OP1_203_13324_20130313_185155_outLine +BABEL_OP1_203_13483_20130409_231107_inLine +BABEL_OP1_203_13483_20130409_231107_outLine +BABEL_OP1_203_13490_20130322_143131_inLine +BABEL_OP1_203_13490_20130322_143131_outLine +BABEL_OP1_203_13664_20130304_155051_inLine +BABEL_OP1_203_13664_20130304_155051_outLine +BABEL_OP1_203_13709_20130410_222037_inLine +BABEL_OP1_203_13709_20130410_222037_outLine +BABEL_OP1_203_13744_20130307_215445_inLine +BABEL_OP1_203_13744_20130307_215445_outLine +BABEL_OP1_203_13792_20130310_142445_inLine +BABEL_OP1_203_13792_20130310_142445_outLine +BABEL_OP1_203_14137_20130314_181335_inLine +BABEL_OP1_203_14137_20130314_181335_outLine +BABEL_OP1_203_14179_20130402_211621_inLine +BABEL_OP1_203_14179_20130402_211621_outLine +BABEL_OP1_203_14229_20130324_162827_inLine +BABEL_OP1_203_14229_20130324_162827_outLine +BABEL_OP1_203_14560_20130425_140155_inLine +BABEL_OP1_203_14560_20130425_140155_outLine +BABEL_OP1_203_14719_20130406_191558_inLine +BABEL_OP1_203_14719_20130406_191558_outLine +BABEL_OP1_203_14725_20130309_185639_inLine +BABEL_OP1_203_14725_20130309_185639_outLine +BABEL_OP1_203_14729_20130411_214726_inLine +BABEL_OP1_203_14729_20130411_214726_outLine +BABEL_OP1_203_14814_20130314_133131_inLine +BABEL_OP1_203_14814_20130314_133131_outLine +BABEL_OP1_203_14899_20130311_184638_inLine +BABEL_OP1_203_14899_20130311_184638_outLine +BABEL_OP1_203_14929_20130324_184056_inLine +BABEL_OP1_203_14929_20130324_184056_outLine +BABEL_OP1_203_15024_20130322_152846_inLine +BABEL_OP1_203_15024_20130322_152846_outLine +BABEL_OP1_203_15227_20130513_222256_inLine +BABEL_OP1_203_15227_20130513_222256_outLine +BABEL_OP1_203_15322_20130511_152438_inLine +BABEL_OP1_203_15322_20130511_152438_outLine +BABEL_OP1_203_15535_20130329_143236_inLine +BABEL_OP1_203_15535_20130329_143236_outLine +BABEL_OP1_203_15730_20130307_201711_inLine +BABEL_OP1_203_15730_20130307_201711_outLine +BABEL_OP1_203_15749_20130407_175145_inLine +BABEL_OP1_203_15749_20130407_175145_outLine +BABEL_OP1_203_15902_20130309_193940_inLine +BABEL_OP1_203_15902_20130309_193940_outLine +BABEL_OP1_203_16149_20130309_171014_inLine +BABEL_OP1_203_16149_20130309_171014_outLine +BABEL_OP1_203_16924_20130720_175321_inLine +BABEL_OP1_203_16924_20130720_175321_outLine +BABEL_OP1_203_17032_20130402_175428_inLine +BABEL_OP1_203_17032_20130402_175428_outLine +BABEL_OP1_203_17097_20130430_173440_inLine +BABEL_OP1_203_17097_20130430_173440_outLine +BABEL_OP1_203_17472_20130408_215034_inLine +BABEL_OP1_203_17472_20130408_215034_outLine +BABEL_OP1_203_17567_20130425_145936_inLine +BABEL_OP1_203_17567_20130425_145936_outLine +BABEL_OP1_203_18118_20130730_191442_inLine +BABEL_OP1_203_18118_20130730_191442_outLine +BABEL_OP1_203_18380_20130327_214619_inLine +BABEL_OP1_203_18380_20130327_214619_outLine +BABEL_OP1_203_18566_20130503_153904_inLine +BABEL_OP1_203_18566_20130503_153904_outLine +BABEL_OP1_203_18939_20130311_144740_inLine +BABEL_OP1_203_18939_20130311_144740_outLine +BABEL_OP1_203_19101_20130423_142324_inLine +BABEL_OP1_203_19101_20130423_142324_outLine +BABEL_OP1_203_19134_20130328_220635_inLine +BABEL_OP1_203_19134_20130328_220635_outLine +BABEL_OP1_203_19589_20130727_143145_inLine +BABEL_OP1_203_19589_20130727_143145_outLine +BABEL_OP1_203_19703_20130318_160958_inLine +BABEL_OP1_203_19703_20130318_160958_outLine +BABEL_OP1_203_19703_20130318_162314_inLine +BABEL_OP1_203_19703_20130318_162314_outLine +BABEL_OP1_203_19773_20130407_183531_inLine +BABEL_OP1_203_19773_20130407_183531_outLine +BABEL_OP1_203_19782_20130404_170141_inLine +BABEL_OP1_203_19782_20130404_170141_outLine +BABEL_OP1_203_20133_20130304_160351_inLine +BABEL_OP1_203_20133_20130304_160351_outLine +BABEL_OP1_203_20330_20130410_161539_inLine +BABEL_OP1_203_20330_20130410_161539_outLine +BABEL_OP1_203_20768_20130407_190152_inLine +BABEL_OP1_203_20768_20130407_190152_outLine +BABEL_OP1_203_20985_20130330_210730_inLine +BABEL_OP1_203_20985_20130330_210730_outLine +BABEL_OP1_203_21004_20130410_181101_inLine +BABEL_OP1_203_21004_20130410_181101_outLine +BABEL_OP1_203_21004_20130410_182740_inLine +BABEL_OP1_203_21004_20130410_182740_outLine +BABEL_OP1_203_21109_20130406_161601_inLine +BABEL_OP1_203_21109_20130406_161601_outLine +BABEL_OP1_203_21206_20130312_164516_inLine +BABEL_OP1_203_21206_20130312_164516_outLine +BABEL_OP1_203_21315_20130501_151005_inLine +BABEL_OP1_203_21315_20130501_151005_outLine +BABEL_OP1_203_21327_20130405_203336_inLine +BABEL_OP1_203_21327_20130405_203336_outLine +BABEL_OP1_203_21435_20130423_181043_inLine +BABEL_OP1_203_21435_20130423_181043_outLine +BABEL_OP1_203_22280_20130329_161951_inLine +BABEL_OP1_203_22280_20130329_161951_outLine +BABEL_OP1_203_22321_20130309_191222_inLine +BABEL_OP1_203_22321_20130309_191222_outLine +BABEL_OP1_203_22446_20130309_134600_inLine +BABEL_OP1_203_22446_20130309_134600_outLine +BABEL_OP1_203_22494_20130402_171234_inLine +BABEL_OP1_203_22494_20130402_171234_outLine +BABEL_OP1_203_22612_20130406_220338_inLine +BABEL_OP1_203_22612_20130406_220338_outLine +BABEL_OP1_203_22918_20130410_190723_inLine +BABEL_OP1_203_22918_20130410_190723_outLine +BABEL_OP1_203_23006_20130319_211412_inLine +BABEL_OP1_203_23006_20130319_211412_outLine +BABEL_OP1_203_23046_20130322_165811_inLine +BABEL_OP1_203_23046_20130322_165811_outLine +BABEL_OP1_203_23092_20130406_014425_inLine +BABEL_OP1_203_23092_20130406_014425_outLine +BABEL_OP1_203_23092_20130406_015338_inLine +BABEL_OP1_203_23092_20130406_015338_outLine +BABEL_OP1_203_23153_20130320_194433_inLine +BABEL_OP1_203_23153_20130320_194433_outLine +BABEL_OP1_203_23239_20130331_171214_inLine +BABEL_OP1_203_23239_20130331_171214_outLine +BABEL_OP1_203_23505_20130309_204825_inLine +BABEL_OP1_203_23505_20130309_204825_outLine +BABEL_OP1_203_23980_20130321_193946_inLine +BABEL_OP1_203_23980_20130321_193946_outLine +BABEL_OP1_203_24017_20130424_174037_inLine +BABEL_OP1_203_24017_20130424_174037_outLine +BABEL_OP1_203_24253_20130423_175626_inLine +BABEL_OP1_203_24253_20130423_175626_outLine +BABEL_OP1_203_24290_20130423_133315_inLine +BABEL_OP1_203_24290_20130423_133315_outLine +BABEL_OP1_203_24323_20130320_160949_inLine +BABEL_OP1_203_24323_20130320_160949_outLine +BABEL_OP1_203_24470_20130329_205656_inLine +BABEL_OP1_203_24470_20130329_205656_outLine +BABEL_OP1_203_24501_20130421_141711_inLine +BABEL_OP1_203_24501_20130421_141711_outLine +BABEL_OP1_203_24569_20130405_200644_inLine +BABEL_OP1_203_24569_20130405_200644_outLine +BABEL_OP1_203_24586_20130506_203931_inLine +BABEL_OP1_203_24586_20130506_203931_outLine +BABEL_OP1_203_24590_20130321_221146_inLine +BABEL_OP1_203_24590_20130321_221146_outLine +BABEL_OP1_203_24679_20130307_145644_inLine +BABEL_OP1_203_24679_20130307_145644_outLine +BABEL_OP1_203_24779_20130426_183526_inLine +BABEL_OP1_203_24779_20130426_183526_outLine +BABEL_OP1_203_24982_20130327_153429_inLine +BABEL_OP1_203_24982_20130327_153429_outLine +BABEL_OP1_203_25015_20130728_150746_inLine +BABEL_OP1_203_25015_20130728_150746_outLine +BABEL_OP1_203_25085_20130508_145922_inLine +BABEL_OP1_203_25085_20130508_145922_outLine +BABEL_OP1_203_25412_20130329_201051_inLine +BABEL_OP1_203_25412_20130329_201051_outLine +BABEL_OP1_203_25698_20130509_182226_inLine +BABEL_OP1_203_25698_20130509_182226_outLine +BABEL_OP1_203_25719_20130426_202355_inLine +BABEL_OP1_203_25719_20130426_202355_outLine +BABEL_OP1_203_25767_20130311_183243_inLine +BABEL_OP1_203_25767_20130311_183243_outLine +BABEL_OP1_203_25961_20130311_171235_inLine +BABEL_OP1_203_25961_20130311_171235_outLine +BABEL_OP1_203_26388_20130318_200305_inLine +BABEL_OP1_203_26388_20130318_200305_outLine +BABEL_OP1_203_26507_20130430_234212_inLine +BABEL_OP1_203_26507_20130430_234212_outLine +BABEL_OP1_203_26574_20130411_160556_inLine +BABEL_OP1_203_26574_20130411_160556_outLine +BABEL_OP1_203_26602_20130801_171131_inLine +BABEL_OP1_203_26602_20130801_171131_outLine +BABEL_OP1_203_26836_20130315_160512_inLine +BABEL_OP1_203_26836_20130315_160512_outLine +BABEL_OP1_203_27125_20130308_003724_inLine +BABEL_OP1_203_27125_20130308_003724_outLine +BABEL_OP1_203_27218_20130312_194932_inLine +BABEL_OP1_203_27218_20130312_194932_outLine +BABEL_OP1_203_27590_20130405_200930_inLine +BABEL_OP1_203_27590_20130405_200930_outLine +BABEL_OP1_203_27841_20130403_211143_inLine +BABEL_OP1_203_27841_20130403_211143_outLine +BABEL_OP1_203_28280_20130501_220643_inLine +BABEL_OP1_203_28280_20130501_220643_outLine +BABEL_OP1_203_28419_20130319_165427_inLine +BABEL_OP1_203_28419_20130319_165427_outLine +BABEL_OP1_203_28522_20130328_170837_inLine +BABEL_OP1_203_28522_20130328_170837_outLine +BABEL_OP1_203_28775_20130313_213707_inLine +BABEL_OP1_203_28775_20130313_213707_outLine +BABEL_OP1_203_28775_20130313_215352_inLine +BABEL_OP1_203_28775_20130313_215352_outLine +BABEL_OP1_203_29023_20130313_194148_inLine +BABEL_OP1_203_29023_20130313_194148_outLine +BABEL_OP1_203_29023_20130313_195106_inLine +BABEL_OP1_203_29023_20130313_195106_outLine +BABEL_OP1_203_29039_20130402_153541_inLine +BABEL_OP1_203_29039_20130402_153541_outLine +BABEL_OP1_203_29168_20130306_213504_inLine +BABEL_OP1_203_29168_20130306_213504_outLine +BABEL_OP1_203_29323_20130403_215525_inLine +BABEL_OP1_203_29323_20130403_215525_outLine +BABEL_OP1_203_29416_20130421_133101_inLine +BABEL_OP1_203_29416_20130421_133101_outLine +BABEL_OP1_203_29439_20130422_150608_inLine +BABEL_OP1_203_29439_20130422_150608_outLine +BABEL_OP1_203_30013_20130331_170538_inLine +BABEL_OP1_203_30013_20130331_170538_outLine +BABEL_OP1_203_30395_20130318_180120_inLine +BABEL_OP1_203_30395_20130318_180120_outLine +BABEL_OP1_203_30645_20130309_151850_inLine +BABEL_OP1_203_30645_20130309_151850_outLine +BABEL_OP1_203_31184_20130322_141512_inLine +BABEL_OP1_203_31184_20130322_141512_outLine +BABEL_OP1_203_31184_20130322_142743_inLine +BABEL_OP1_203_31184_20130322_142743_outLine +BABEL_OP1_203_31490_20130321_210518_inLine +BABEL_OP1_203_31490_20130321_210518_outLine +BABEL_OP1_203_31992_20130313_143826_inLine +BABEL_OP1_203_31992_20130313_143826_outLine +BABEL_OP1_203_32097_20130304_195431_inLine +BABEL_OP1_203_32097_20130304_195431_outLine +BABEL_OP1_203_32122_20130320_174321_inLine +BABEL_OP1_203_32122_20130320_174321_outLine +BABEL_OP1_203_32122_20130320_175419_inLine +BABEL_OP1_203_32122_20130320_175419_outLine +BABEL_OP1_203_32244_20130728_182847_inLine +BABEL_OP1_203_32244_20130728_182847_outLine +BABEL_OP1_203_32998_20130329_155417_inLine +BABEL_OP1_203_32998_20130329_155417_outLine +BABEL_OP1_203_33476_20130320_140412_inLine +BABEL_OP1_203_33476_20130320_140412_outLine +BABEL_OP1_203_33672_20130312_165130_inLine +BABEL_OP1_203_33672_20130312_165130_outLine +BABEL_OP1_203_33704_20130405_220001_inLine +BABEL_OP1_203_33704_20130405_220001_outLine +BABEL_OP1_203_33840_20130803_192343_inLine +BABEL_OP1_203_33840_20130803_192343_outLine +BABEL_OP1_203_34145_20130331_145240_inLine +BABEL_OP1_203_34145_20130331_145240_outLine +BABEL_OP1_203_35139_20130313_143646_inLine +BABEL_OP1_203_35139_20130313_143646_outLine +BABEL_OP1_203_36505_20130731_191406_inLine +BABEL_OP1_203_36505_20130731_191406_outLine +BABEL_OP1_203_36594_20130421_182303_inLine +BABEL_OP1_203_36594_20130421_182303_outLine +BABEL_OP1_203_37598_20130330_000102_inLine +BABEL_OP1_203_37598_20130330_000102_outLine +BABEL_OP1_203_38979_20130409_173446_inLine +BABEL_OP1_203_38979_20130409_173446_outLine +BABEL_OP1_203_38979_20130409_174405_inLine +BABEL_OP1_203_38979_20130409_174405_outLine +BABEL_OP1_203_39006_20130506_192659_inLine +BABEL_OP1_203_39006_20130506_192659_outLine +BABEL_OP1_203_39555_20130720_183746_inLine +BABEL_OP1_203_39555_20130720_183746_outLine +BABEL_OP1_203_39848_20130320_133756_inLine +BABEL_OP1_203_39848_20130320_133756_outLine +BABEL_OP1_203_40557_20130404_005522_inLine +BABEL_OP1_203_40557_20130404_005522_outLine +BABEL_OP1_203_40565_20130331_171210_inLine +BABEL_OP1_203_40565_20130331_171210_outLine +BABEL_OP1_203_41073_20130721_172038_inLine +BABEL_OP1_203_41073_20130721_172038_outLine +BABEL_OP1_203_41174_20130318_203041_inLine +BABEL_OP1_203_41174_20130318_203041_outLine +BABEL_OP1_203_41334_20130501_232034_inLine +BABEL_OP1_203_41334_20130501_232034_outLine +BABEL_OP1_203_41442_20130404_174409_inLine +BABEL_OP1_203_41442_20130404_174409_outLine +BABEL_OP1_203_41469_20130313_185923_inLine +BABEL_OP1_203_41469_20130313_185923_outLine +BABEL_OP1_203_41609_20130309_175203_inLine +BABEL_OP1_203_41609_20130309_175203_outLine +BABEL_OP1_203_42029_20130403_184623_inLine +BABEL_OP1_203_42029_20130403_184623_outLine +BABEL_OP1_203_42299_20130508_203220_inLine +BABEL_OP1_203_42299_20130508_203220_outLine +BABEL_OP1_203_42299_20130508_204824_inLine +BABEL_OP1_203_42299_20130508_204824_outLine +BABEL_OP1_203_42309_20130428_191239_inLine +BABEL_OP1_203_42309_20130428_191239_outLine +BABEL_OP1_203_42434_20130323_160637_inLine +BABEL_OP1_203_42434_20130323_160637_outLine +BABEL_OP1_203_42848_20130513_201112_inLine +BABEL_OP1_203_42848_20130513_201112_outLine +BABEL_OP1_203_43368_20130327_215424_inLine +BABEL_OP1_203_43368_20130327_215424_outLine +BABEL_OP1_203_43388_20130327_192024_inLine +BABEL_OP1_203_43388_20130327_192024_outLine +BABEL_OP1_203_43588_20130714_163553_inLine +BABEL_OP1_203_43588_20130714_163553_outLine +BABEL_OP1_203_43784_20130314_171933_inLine +BABEL_OP1_203_43784_20130314_171933_outLine +BABEL_OP1_203_43788_20130504_173234_inLine +BABEL_OP1_203_43788_20130504_173234_outLine +BABEL_OP1_203_43920_20130405_194800_inLine +BABEL_OP1_203_43920_20130405_194800_outLine +BABEL_OP1_203_44478_20130730_170938_inLine +BABEL_OP1_203_44478_20130730_170938_outLine +BABEL_OP1_203_44619_20130313_175437_inLine +BABEL_OP1_203_44619_20130313_175437_outLine +BABEL_OP1_203_44709_20130331_183159_inLine +BABEL_OP1_203_44709_20130331_183159_outLine +BABEL_OP1_203_44961_20130311_173427_inLine +BABEL_OP1_203_44961_20130311_173427_outLine +BABEL_OP1_203_45560_20130309_173444_inLine +BABEL_OP1_203_45560_20130309_173444_outLine +BABEL_OP1_203_45851_20130801_014413_inLine +BABEL_OP1_203_45851_20130801_014413_outLine +BABEL_OP1_203_46310_20130309_211431_inLine +BABEL_OP1_203_46310_20130309_211431_outLine +BABEL_OP1_203_46550_20130313_153012_inLine +BABEL_OP1_203_46550_20130313_153012_outLine +BABEL_OP1_203_46681_20130313_203139_inLine +BABEL_OP1_203_46681_20130313_203139_outLine +BABEL_OP1_203_46688_20130314_212550_inLine +BABEL_OP1_203_46688_20130314_212550_outLine +BABEL_OP1_203_47186_20130405_120609_inLine +BABEL_OP1_203_47186_20130405_120609_outLine +BABEL_OP1_203_47487_20130321_145055_inLine +BABEL_OP1_203_47487_20130321_145055_outLine +BABEL_OP1_203_47823_20130406_151016_inLine +BABEL_OP1_203_47823_20130406_151016_outLine +BABEL_OP1_203_47866_20130723_152640_inLine +BABEL_OP1_203_47866_20130723_152640_outLine +BABEL_OP1_203_48422_20130425_175947_inLine +BABEL_OP1_203_48422_20130425_175947_outLine +BABEL_OP1_203_48610_20130309_222037_inLine +BABEL_OP1_203_48610_20130309_222037_outLine +BABEL_OP1_203_49001_20130315_160533_inLine +BABEL_OP1_203_49001_20130315_160533_outLine +BABEL_OP1_203_49216_20130307_211955_inLine +BABEL_OP1_203_49216_20130307_211955_outLine +BABEL_OP1_203_49287_20130331_155341_inLine +BABEL_OP1_203_49287_20130331_155341_outLine +BABEL_OP1_203_49437_20130405_194333_inLine +BABEL_OP1_203_49437_20130405_194333_outLine +BABEL_OP1_203_49437_20130405_195645_inLine +BABEL_OP1_203_49437_20130405_195645_outLine +BABEL_OP1_203_49630_20130408_182919_inLine +BABEL_OP1_203_49630_20130408_182919_outLine +BABEL_OP1_203_49768_20130320_164815_inLine +BABEL_OP1_203_49768_20130320_164815_outLine +BABEL_OP1_203_50090_20130726_145642_inLine +BABEL_OP1_203_50090_20130726_145642_outLine +BABEL_OP1_203_50175_20130311_181803_inLine +BABEL_OP1_203_50175_20130311_181803_outLine +BABEL_OP1_203_51530_20130803_174620_inLine +BABEL_OP1_203_51530_20130803_174620_outLine +BABEL_OP1_203_51611_20130312_195333_inLine +BABEL_OP1_203_51611_20130312_195333_outLine +BABEL_OP1_203_51701_20130508_232537_inLine +BABEL_OP1_203_51701_20130508_232537_outLine +BABEL_OP1_203_51819_20130328_150620_inLine +BABEL_OP1_203_51819_20130328_150620_outLine +BABEL_OP1_203_51955_20130314_175859_inLine +BABEL_OP1_203_51955_20130314_175859_outLine +BABEL_OP1_203_51955_20130314_180731_inLine +BABEL_OP1_203_51955_20130314_180731_outLine +BABEL_OP1_203_52246_20130319_221049_inLine +BABEL_OP1_203_52246_20130319_221049_outLine +BABEL_OP1_203_52272_20130313_140038_inLine +BABEL_OP1_203_52272_20130313_140038_outLine +BABEL_OP1_203_52404_20130409_005414_inLine +BABEL_OP1_203_52404_20130409_005414_outLine +BABEL_OP1_203_52422_20130427_140502_inLine +BABEL_OP1_203_52422_20130427_140502_outLine +BABEL_OP1_203_52490_20130309_141915_inLine +BABEL_OP1_203_52490_20130309_141915_outLine +BABEL_OP1_203_52717_20130311_173849_inLine +BABEL_OP1_203_52717_20130311_173849_outLine +BABEL_OP1_203_53063_20130407_210935_inLine +BABEL_OP1_203_53063_20130407_210935_outLine +BABEL_OP1_203_53665_20130727_150857_inLine +BABEL_OP1_203_53665_20130727_150857_outLine +BABEL_OP1_203_53842_20130322_165451_inLine +BABEL_OP1_203_53842_20130322_165451_outLine +BABEL_OP1_203_54074_20130319_150208_inLine +BABEL_OP1_203_54074_20130319_150208_outLine +BABEL_OP1_203_54104_20130309_204103_inLine +BABEL_OP1_203_54104_20130309_204103_outLine +BABEL_OP1_203_54390_20130313_161947_inLine +BABEL_OP1_203_54390_20130313_161947_outLine +BABEL_OP1_203_54477_20130408_133628_inLine +BABEL_OP1_203_54477_20130408_133628_outLine +BABEL_OP1_203_54530_20130424_194302_inLine +BABEL_OP1_203_54530_20130424_194302_outLine +BABEL_OP1_203_54697_20130405_153323_inLine +BABEL_OP1_203_54697_20130405_153323_outLine +BABEL_OP1_203_54827_20130803_201026_inLine +BABEL_OP1_203_54827_20130803_201026_outLine +BABEL_OP1_203_54953_20130319_135125_inLine +BABEL_OP1_203_54953_20130319_135125_outLine +BABEL_OP1_203_55259_20130323_181918_inLine +BABEL_OP1_203_55259_20130323_181918_outLine +BABEL_OP1_203_55950_20130728_141857_inLine +BABEL_OP1_203_55950_20130728_141857_outLine +BABEL_OP1_203_56076_20130728_212423_inLine +BABEL_OP1_203_56076_20130728_212423_outLine +BABEL_OP1_203_56198_20130314_163346_inLine +BABEL_OP1_203_56198_20130314_163346_outLine +BABEL_OP1_203_56198_20130314_164412_inLine +BABEL_OP1_203_56198_20130314_164412_outLine +BABEL_OP1_203_56306_20130408_202539_inLine +BABEL_OP1_203_56306_20130408_202539_outLine +BABEL_OP1_203_56307_20130401_212823_inLine +BABEL_OP1_203_56307_20130401_212823_outLine +BABEL_OP1_203_56826_20130403_155349_inLine +BABEL_OP1_203_56826_20130403_155349_outLine +BABEL_OP1_203_57093_20130323_155842_inLine +BABEL_OP1_203_57093_20130323_155842_outLine +BABEL_OP1_203_57116_20130306_200913_inLine +BABEL_OP1_203_57116_20130306_200913_outLine +BABEL_OP1_203_57529_20130404_225031_inLine +BABEL_OP1_203_57529_20130404_225031_outLine +BABEL_OP1_203_57678_20130319_173142_inLine +BABEL_OP1_203_57678_20130319_173142_outLine +BABEL_OP1_203_58107_20130331_163124_inLine +BABEL_OP1_203_58107_20130331_163124_outLine +BABEL_OP1_203_58107_20130331_164049_inLine +BABEL_OP1_203_58107_20130331_164049_outLine +BABEL_OP1_203_58145_20130404_174142_inLine +BABEL_OP1_203_58145_20130404_174142_outLine +BABEL_OP1_203_58489_20130406_171644_inLine +BABEL_OP1_203_58489_20130406_171644_outLine +BABEL_OP1_203_58821_20130330_171943_inLine +BABEL_OP1_203_58821_20130330_171943_outLine +BABEL_OP1_203_58915_20130508_170813_inLine +BABEL_OP1_203_58915_20130508_170813_outLine +BABEL_OP1_203_58926_20130314_221922_inLine +BABEL_OP1_203_58926_20130314_221922_outLine +BABEL_OP1_203_59078_20130328_222520_inLine +BABEL_OP1_203_59078_20130328_222520_outLine +BABEL_OP1_203_59307_20130503_211805_inLine +BABEL_OP1_203_59307_20130503_211805_outLine +BABEL_OP1_203_59720_20130323_160840_inLine +BABEL_OP1_203_59720_20130323_160840_outLine +BABEL_OP1_203_59747_20130307_185538_inLine +BABEL_OP1_203_59747_20130307_185538_outLine +BABEL_OP1_203_59864_20130719_183902_inLine +BABEL_OP1_203_59864_20130719_183902_outLine +BABEL_OP1_203_59928_20130314_205249_inLine +BABEL_OP1_203_59928_20130314_205249_outLine +BABEL_OP1_203_60026_20130311_192442_inLine +BABEL_OP1_203_60026_20130311_192442_outLine +BABEL_OP1_203_60352_20130724_151721_inLine +BABEL_OP1_203_60352_20130724_151721_outLine +BABEL_OP1_203_60397_20130814_170113_inLine +BABEL_OP1_203_60397_20130814_170113_outLine +BABEL_OP1_203_60436_20130726_213808_inLine +BABEL_OP1_203_60436_20130726_213808_outLine +BABEL_OP1_203_60830_20130323_152836_inLine +BABEL_OP1_203_60830_20130323_152836_outLine +BABEL_OP1_203_61225_20130310_001509_inLine +BABEL_OP1_203_61225_20130310_001509_outLine +BABEL_OP1_203_61225_20130310_002607_inLine +BABEL_OP1_203_61225_20130310_002607_outLine +BABEL_OP1_203_61435_20130421_175121_inLine +BABEL_OP1_203_61435_20130421_175121_outLine +BABEL_OP1_203_61440_20130513_143551_inLine +BABEL_OP1_203_61440_20130513_143551_outLine +BABEL_OP1_203_61888_20130410_154115_inLine +BABEL_OP1_203_61888_20130410_154115_outLine +BABEL_OP1_203_62014_20130503_150317_inLine +BABEL_OP1_203_62014_20130503_150317_outLine +BABEL_OP1_203_62200_20130320_155842_inLine +BABEL_OP1_203_62200_20130320_155842_outLine +BABEL_OP1_203_62360_20130729_185133_inLine +BABEL_OP1_203_62360_20130729_185133_outLine +BABEL_OP1_203_62714_20130430_183624_inLine +BABEL_OP1_203_62714_20130430_183624_outLine +BABEL_OP1_203_62800_20130307_204137_inLine +BABEL_OP1_203_62800_20130307_204137_outLine +BABEL_OP1_203_62976_20130512_201748_inLine +BABEL_OP1_203_62976_20130512_201748_outLine +BABEL_OP1_203_63730_20130507_163540_inLine +BABEL_OP1_203_63730_20130507_163540_outLine +BABEL_OP1_203_64065_20130326_201717_inLine +BABEL_OP1_203_64065_20130326_201717_outLine +BABEL_OP1_203_64065_20130326_202638_inLine +BABEL_OP1_203_64065_20130326_202638_outLine +BABEL_OP1_203_65913_20130726_205358_inLine +BABEL_OP1_203_65913_20130726_205358_outLine +BABEL_OP1_203_66001_20130309_233448_inLine +BABEL_OP1_203_66001_20130309_233448_outLine +BABEL_OP1_203_66045_20130323_203735_inLine +BABEL_OP1_203_66045_20130323_203735_outLine +BABEL_OP1_203_66822_20130324_142935_inLine +BABEL_OP1_203_66822_20130324_142935_outLine +BABEL_OP1_203_66916_20130308_142310_inLine +BABEL_OP1_203_66916_20130308_142310_outLine +BABEL_OP1_203_66971_20130725_151439_inLine +BABEL_OP1_203_66971_20130725_151439_outLine +BABEL_OP1_203_67066_20130509_215551_inLine +BABEL_OP1_203_67066_20130509_215551_outLine +BABEL_OP1_203_68289_20130409_222355_inLine +BABEL_OP1_203_68289_20130409_222355_outLine +BABEL_OP1_203_68385_20130221_213027_inLine +BABEL_OP1_203_68385_20130221_213027_outLine +BABEL_OP1_203_69096_20130714_153203_inLine +BABEL_OP1_203_69096_20130714_153203_outLine +BABEL_OP1_203_69474_20130409_153705_inLine +BABEL_OP1_203_69474_20130409_153705_outLine +BABEL_OP1_203_69964_20130801_183705_inLine +BABEL_OP1_203_69964_20130801_183705_outLine +BABEL_OP1_203_70221_20130502_153055_inLine +BABEL_OP1_203_70221_20130502_153055_outLine +BABEL_OP1_203_70386_20130315_162835_inLine +BABEL_OP1_203_70386_20130315_162835_outLine +BABEL_OP1_203_70716_20130731_182939_inLine +BABEL_OP1_203_70716_20130731_182939_outLine +BABEL_OP1_203_71067_20130503_201919_inLine +BABEL_OP1_203_71067_20130503_201919_outLine +BABEL_OP1_203_71566_20130406_212124_inLine +BABEL_OP1_203_71566_20130406_212124_outLine +BABEL_OP1_203_72324_20130721_195442_inLine +BABEL_OP1_203_72324_20130721_195442_outLine +BABEL_OP1_203_72587_20130331_220349_inLine +BABEL_OP1_203_72587_20130331_220349_outLine +BABEL_OP1_203_73591_20130222_132516_inLine +BABEL_OP1_203_73591_20130222_132516_outLine +BABEL_OP1_203_74667_20130322_155857_inLine +BABEL_OP1_203_74667_20130322_155857_outLine +BABEL_OP1_203_74886_20130309_200304_inLine +BABEL_OP1_203_74886_20130309_200304_outLine +BABEL_OP1_203_75064_20130322_142556_inLine +BABEL_OP1_203_75064_20130322_142556_outLine +BABEL_OP1_203_75342_20130404_193602_inLine +BABEL_OP1_203_75342_20130404_193602_outLine +BABEL_OP1_203_75869_20130721_161850_inLine +BABEL_OP1_203_75869_20130721_161850_outLine +BABEL_OP1_203_76444_20130406_153810_inLine +BABEL_OP1_203_76444_20130406_153810_outLine +BABEL_OP1_203_76482_20130508_220808_inLine +BABEL_OP1_203_76482_20130508_220808_outLine +BABEL_OP1_203_77242_20130508_191854_inLine +BABEL_OP1_203_77242_20130508_191854_outLine +BABEL_OP1_203_79131_20130727_202021_inLine +BABEL_OP1_203_79131_20130727_202021_outLine +BABEL_OP1_203_79660_20130512_173422_inLine +BABEL_OP1_203_79660_20130512_173422_outLine +BABEL_OP1_203_80134_20130814_145021_inLine +BABEL_OP1_203_80134_20130814_145021_outLine +BABEL_OP1_203_81287_20130403_225530_inLine +BABEL_OP1_203_81287_20130403_225530_outLine +BABEL_OP1_203_82224_20130718_134750_inLine +BABEL_OP1_203_82224_20130718_134750_outLine +BABEL_OP1_203_84339_20130802_181641_inLine +BABEL_OP1_203_84339_20130802_181641_outLine +BABEL_OP1_203_84469_20130421_132749_inLine +BABEL_OP1_203_84469_20130421_132749_outLine +BABEL_OP1_203_84611_20130312_152852_inLine +BABEL_OP1_203_84611_20130312_152852_outLine +BABEL_OP1_203_85325_20130802_212902_inLine +BABEL_OP1_203_85325_20130802_212902_outLine +BABEL_OP1_203_86597_20130508_182316_inLine +BABEL_OP1_203_86597_20130508_182316_outLine +BABEL_OP1_203_86628_20130512_215243_inLine +BABEL_OP1_203_86628_20130512_215243_outLine +BABEL_OP1_203_86878_20130804_174949_inLine +BABEL_OP1_203_86878_20130804_174949_outLine +BABEL_OP1_203_86891_20130427_122020_inLine +BABEL_OP1_203_86891_20130427_122020_outLine +BABEL_OP1_203_87305_20130512_150816_inLine +BABEL_OP1_203_87305_20130512_150816_outLine +BABEL_OP1_203_89358_20130327_183946_inLine +BABEL_OP1_203_89358_20130327_183946_outLine +BABEL_OP1_203_89943_20130319_151705_inLine +BABEL_OP1_203_89943_20130319_151705_outLine +BABEL_OP1_203_90709_20130311_171156_inLine +BABEL_OP1_203_90709_20130311_171156_outLine +BABEL_OP1_203_91760_20130728_190550_inLine +BABEL_OP1_203_91760_20130728_190550_outLine +BABEL_OP1_203_92077_20130725_140650_inLine +BABEL_OP1_203_92077_20130725_140650_outLine +BABEL_OP1_203_93411_20130324_150550_inLine +BABEL_OP1_203_93411_20130324_150550_outLine +BABEL_OP1_203_93490_20130804_201521_inLine +BABEL_OP1_203_93490_20130804_201521_outLine +BABEL_OP1_203_93964_20130327_171307_inLine +BABEL_OP1_203_93964_20130327_171307_outLine +BABEL_OP1_203_94442_20130727_182743_inLine +BABEL_OP1_203_94442_20130727_182743_outLine +BABEL_OP1_203_94449_20130801_010717_inLine +BABEL_OP1_203_94449_20130801_010717_outLine +BABEL_OP1_203_95338_20130727_211019_inLine +BABEL_OP1_203_95338_20130727_211019_outLine +BABEL_OP1_203_96059_20130731_211048_inLine +BABEL_OP1_203_96059_20130731_211048_outLine +BABEL_OP1_203_96376_20130731_143340_outLine +BABEL_OP1_203_96690_20130320_183730_inLine +BABEL_OP1_203_96690_20130320_183730_outLine +BABEL_OP1_203_96690_20130320_185039_inLine +BABEL_OP1_203_96690_20130320_185039_outLine +BABEL_OP1_203_97220_20130508_165310_inLine +BABEL_OP1_203_97220_20130508_165310_outLine +BABEL_OP1_203_97836_20130430_195102_inLine +BABEL_OP1_203_97836_20130430_195102_outLine +BABEL_OP1_203_98192_20130511_210223_inLine +BABEL_OP1_203_98192_20130511_210223_outLine diff --git a/egs/babel/s5d/conf/lists/203-lao/train.untranscribed.list b/egs/babel/s5d/conf/lists/203-lao/train.untranscribed.list new file mode 100644 index 00000000000..38bcbffd9e6 --- /dev/null +++ b/egs/babel/s5d/conf/lists/203-lao/train.untranscribed.list @@ -0,0 +1,257 @@ +BABEL_OP1_203_16184_20130309_181723_inLine +BABEL_OP1_203_29777_20130424_230709_inLine +BABEL_OP1_203_29777_20130424_230709_outLine +BABEL_OP1_203_30253_20130406_221820_inLine +BABEL_OP1_203_30253_20130406_221820_outLine +BABEL_OP1_203_30497_20130724_152950_inLine +BABEL_OP1_203_30497_20130724_152950_outLine +BABEL_OP1_203_30497_20130724_154924_inLine +BABEL_OP1_203_30497_20130724_154924_outLine +BABEL_OP1_203_30653_20130422_190728_inLine +BABEL_OP1_203_30653_20130422_190728_outLine +BABEL_OP1_203_31182_20130407_165109_inLine +BABEL_OP1_203_31182_20130407_165109_outLine +BABEL_OP1_203_33229_20130716_174756_inLine +BABEL_OP1_203_33229_20130716_174756_outLine +BABEL_OP1_203_33273_20130321_185940_inLine +BABEL_OP1_203_33273_20130321_185940_outLine +BABEL_OP1_203_34688_20130309_150605_inLine +BABEL_OP1_203_34688_20130309_150605_outLine +BABEL_OP1_203_35202_20130403_172345_inLine +BABEL_OP1_203_35202_20130403_172345_outLine +BABEL_OP1_203_35885_20130423_124518_inLine +BABEL_OP1_203_35885_20130423_124518_outLine +BABEL_OP1_203_36017_20130727_194800_inLine +BABEL_OP1_203_36017_20130727_194800_outLine +BABEL_OP1_203_36059_20130420_141048_inLine +BABEL_OP1_203_36059_20130420_141048_outLine +BABEL_OP1_203_37064_20130315_163413_inLine +BABEL_OP1_203_37064_20130315_163413_outLine +BABEL_OP1_203_39159_20130307_161600_inLine +BABEL_OP1_203_39159_20130307_161600_outLine +BABEL_OP1_203_40740_20130425_194217_inLine +BABEL_OP1_203_40740_20130425_194217_outLine +BABEL_OP1_203_42718_20130719_184452_inLine +BABEL_OP1_203_42718_20130719_184452_outLine +BABEL_OP1_203_43285_20130403_143505_inLine +BABEL_OP1_203_43285_20130403_143505_outLine +BABEL_OP1_203_44309_20130724_151039_inLine +BABEL_OP1_203_44309_20130724_151039_outLine +BABEL_OP1_203_44681_20130808_235229_inLine +BABEL_OP1_203_44681_20130808_235229_outLine +BABEL_OP1_203_44847_20130713_184411_inLine +BABEL_OP1_203_44847_20130713_184411_outLine +BABEL_OP1_203_45201_20130802_170453_inLine +BABEL_OP1_203_45201_20130802_170453_outLine +BABEL_OP1_203_45697_20130410_000422_inLine +BABEL_OP1_203_45697_20130410_000422_outLine +BABEL_OP1_203_46702_20130306_164740_inLine +BABEL_OP1_203_46702_20130306_164740_outLine +BABEL_OP1_203_46712_20130323_203036_inLine +BABEL_OP1_203_46712_20130323_203036_outLine +BABEL_OP1_203_46881_20130307_203600_inLine +BABEL_OP1_203_46881_20130307_203600_outLine +BABEL_OP1_203_46974_20130404_232711_inLine +BABEL_OP1_203_46974_20130404_232711_outLine +BABEL_OP1_203_49197_20130318_181956_inLine +BABEL_OP1_203_49197_20130318_181956_outLine +BABEL_OP1_203_49767_20130430_202016_inLine +BABEL_OP1_203_49767_20130430_202016_outLine +BABEL_OP1_203_49812_20130808_171144_inLine +BABEL_OP1_203_49812_20130808_171144_outLine +BABEL_OP1_203_52070_20130808_163435_inLine +BABEL_OP1_203_52070_20130808_163435_outLine +BABEL_OP1_203_52442_20130506_145255_inLine +BABEL_OP1_203_52442_20130506_145255_outLine +BABEL_OP1_203_52614_20130727_194453_inLine +BABEL_OP1_203_52614_20130727_194453_outLine +BABEL_OP1_203_53419_20130406_175304_inLine +BABEL_OP1_203_53419_20130406_175304_outLine +BABEL_OP1_203_54040_20130406_184426_inLine +BABEL_OP1_203_54040_20130406_184426_outLine +BABEL_OP1_203_54405_20130729_003503_inLine +BABEL_OP1_203_54405_20130729_003503_outLine +BABEL_OP1_203_55267_20130505_191654_inLine +BABEL_OP1_203_55267_20130505_191654_outLine +BABEL_OP1_203_57219_20130502_194506_inLine +BABEL_OP1_203_57219_20130502_194506_outLine +BABEL_OP1_203_57464_20130725_171314_inLine +BABEL_OP1_203_57464_20130725_171314_outLine +BABEL_OP1_203_60626_20130322_152952_inLine +BABEL_OP1_203_60626_20130322_152952_outLine +BABEL_OP1_203_61971_20130725_164007_inLine +BABEL_OP1_203_61971_20130725_164007_outLine +BABEL_OP1_203_62047_20130407_151438_inLine +BABEL_OP1_203_62047_20130407_151438_outLine +BABEL_OP1_203_62286_20130320_214620_inLine +BABEL_OP1_203_62286_20130320_214620_outLine +BABEL_OP1_203_62456_20130328_142035_inLine +BABEL_OP1_203_62456_20130328_142035_outLine +BABEL_OP1_203_62835_20130323_203456_inLine +BABEL_OP1_203_62835_20130323_203456_outLine +BABEL_OP1_203_62852_20130306_200729_inLine +BABEL_OP1_203_62852_20130306_200729_outLine +BABEL_OP1_203_63220_20130331_212757_inLine +BABEL_OP1_203_63220_20130331_212757_outLine +BABEL_OP1_203_63445_20130307_151033_inLine +BABEL_OP1_203_63445_20130307_151033_outLine +BABEL_OP1_203_63757_20130328_223730_inLine +BABEL_OP1_203_63757_20130328_223730_outLine +BABEL_OP1_203_63938_20130410_173153_inLine +BABEL_OP1_203_63938_20130410_173153_outLine +BABEL_OP1_203_64494_20130313_131022_inLine +BABEL_OP1_203_64494_20130313_131022_outLine +BABEL_OP1_203_64638_20130410_142811_inLine +BABEL_OP1_203_64638_20130410_142811_outLine +BABEL_OP1_203_64759_20130309_200819_inLine +BABEL_OP1_203_64759_20130309_200819_outLine +BABEL_OP1_203_64796_20130307_184812_inLine +BABEL_OP1_203_64796_20130307_184812_outLine +BABEL_OP1_203_65466_20130725_163637_inLine +BABEL_OP1_203_65466_20130725_163637_outLine +BABEL_OP1_203_65477_20130320_173710_inLine +BABEL_OP1_203_65477_20130320_173710_outLine +BABEL_OP1_203_65477_20130320_180148_inLine +BABEL_OP1_203_65477_20130320_180148_outLine +BABEL_OP1_203_65477_20130320_201453_inLine +BABEL_OP1_203_65477_20130320_201453_outLine +BABEL_OP1_203_65639_20130806_171139_inLine +BABEL_OP1_203_65639_20130806_171139_outLine +BABEL_OP1_203_66837_20130405_182629_inLine +BABEL_OP1_203_66837_20130405_182629_outLine +BABEL_OP1_203_66959_20130401_000804_inLine +BABEL_OP1_203_66959_20130401_000804_outLine +BABEL_OP1_203_66967_20130309_193012_inLine +BABEL_OP1_203_66967_20130309_193012_outLine +BABEL_OP1_203_67726_20130815_142409_inLine +BABEL_OP1_203_67726_20130815_142409_outLine +BABEL_OP1_203_68910_20130819_161909_inLine +BABEL_OP1_203_68910_20130819_161909_outLine +BABEL_OP1_203_68910_20130819_163243_inLine +BABEL_OP1_203_68910_20130819_163243_outLine +BABEL_OP1_203_69633_20130425_200355_inLine +BABEL_OP1_203_69633_20130425_200355_outLine +BABEL_OP1_203_69982_20130506_163359_inLine +BABEL_OP1_203_69982_20130506_163359_outLine +BABEL_OP1_203_70282_20130329_152316_inLine +BABEL_OP1_203_70282_20130329_152316_outLine +BABEL_OP1_203_71704_20130312_213023_inLine +BABEL_OP1_203_71704_20130312_213023_outLine +BABEL_OP1_203_72349_20130726_200409_inLine +BABEL_OP1_203_72349_20130726_200409_outLine +BABEL_OP1_203_72844_20130307_143012_inLine +BABEL_OP1_203_72844_20130307_143012_outLine +BABEL_OP1_203_73622_20130311_175840_inLine +BABEL_OP1_203_73622_20130311_175840_outLine +BABEL_OP1_203_74253_20130403_190412_inLine +BABEL_OP1_203_74253_20130403_190412_outLine +BABEL_OP1_203_75366_20130430_153011_inLine +BABEL_OP1_203_75366_20130430_153011_outLine +BABEL_OP1_203_75465_20130408_174529_inLine +BABEL_OP1_203_75465_20130408_174529_outLine +BABEL_OP1_203_76218_20130320_160931_inLine +BABEL_OP1_203_76218_20130320_160931_outLine +BABEL_OP1_203_76218_20130320_162301_inLine +BABEL_OP1_203_76218_20130320_162301_outLine +BABEL_OP1_203_76773_20130313_174635_inLine +BABEL_OP1_203_76773_20130313_174635_outLine +BABEL_OP1_203_76970_20130502_140228_inLine +BABEL_OP1_203_76970_20130502_140228_outLine +BABEL_OP1_203_76970_20130502_141316_inLine +BABEL_OP1_203_76970_20130502_141316_outLine +BABEL_OP1_203_77391_20130321_134502_inLine +BABEL_OP1_203_77391_20130321_134502_outLine +BABEL_OP1_203_77567_20130307_183648_inLine +BABEL_OP1_203_77567_20130307_183648_outLine +BABEL_OP1_203_78609_20130411_135436_inLine +BABEL_OP1_203_78609_20130411_135436_outLine +BABEL_OP1_203_78958_20130815_152142_inLine +BABEL_OP1_203_78958_20130815_152142_outLine +BABEL_OP1_203_78976_20130320_143441_inLine +BABEL_OP1_203_78976_20130320_143441_outLine +BABEL_OP1_203_79107_20130501_145558_inLine +BABEL_OP1_203_79107_20130501_145558_outLine +BABEL_OP1_203_79571_20130401_193207_inLine +BABEL_OP1_203_79571_20130401_193207_outLine +BABEL_OP1_203_79858_20130309_212924_inLine +BABEL_OP1_203_79858_20130309_212924_outLine +BABEL_OP1_203_80721_20130402_142121_inLine +BABEL_OP1_203_80721_20130402_142121_outLine +BABEL_OP1_203_80897_20130328_174210_inLine +BABEL_OP1_203_80897_20130328_174210_outLine +BABEL_OP1_203_81229_20130321_133228_inLine +BABEL_OP1_203_81229_20130321_133228_outLine +BABEL_OP1_203_81854_20130730_230009_inLine +BABEL_OP1_203_81854_20130730_230009_outLine +BABEL_OP1_203_82966_20130405_153412_inLine +BABEL_OP1_203_82966_20130405_153412_outLine +BABEL_OP1_203_83366_20130428_224139_inLine +BABEL_OP1_203_83366_20130428_224139_outLine +BABEL_OP1_203_83775_20130319_135705_inLine +BABEL_OP1_203_83775_20130319_135705_outLine +BABEL_OP1_203_84029_20130812_185834_inLine +BABEL_OP1_203_84029_20130812_185834_outLine +BABEL_OP1_203_84125_20130306_192759_inLine +BABEL_OP1_203_84125_20130306_192759_outLine +BABEL_OP1_203_84583_20130409_145116_inLine +BABEL_OP1_203_84583_20130409_145116_outLine +BABEL_OP1_203_85248_20130403_172428_inLine +BABEL_OP1_203_85248_20130403_172428_outLine +BABEL_OP1_203_85248_20130403_173731_inLine +BABEL_OP1_203_85248_20130403_173731_outLine +BABEL_OP1_203_86748_20130424_181510_inLine +BABEL_OP1_203_86748_20130424_181510_outLine +BABEL_OP1_203_87871_20130403_233602_inLine +BABEL_OP1_203_87871_20130403_233602_outLine +BABEL_OP1_203_88812_20130724_142719_inLine +BABEL_OP1_203_88812_20130724_142719_outLine +BABEL_OP1_203_89045_20130306_200546_inLine +BABEL_OP1_203_89045_20130306_200546_outLine +BABEL_OP1_203_90935_20130319_215413_inLine +BABEL_OP1_203_90935_20130319_215413_outLine +BABEL_OP1_203_91581_20130406_211109_inLine +BABEL_OP1_203_91581_20130406_211109_outLine +BABEL_OP1_203_91593_20130511_222420_inLine +BABEL_OP1_203_91593_20130511_222420_outLine +BABEL_OP1_203_91825_20130310_211043_inLine +BABEL_OP1_203_91825_20130310_211043_outLine +BABEL_OP1_203_91884_20130422_190145_inLine +BABEL_OP1_203_91884_20130422_190145_outLine +BABEL_OP1_203_92176_20130322_143345_inLine +BABEL_OP1_203_92176_20130322_143345_outLine +BABEL_OP1_203_92356_20130715_210447_inLine +BABEL_OP1_203_92356_20130715_210447_outLine +BABEL_OP1_203_92698_20130327_174701_inLine +BABEL_OP1_203_92698_20130327_174701_outLine +BABEL_OP1_203_92698_20130327_175923_inLine +BABEL_OP1_203_92698_20130327_175923_outLine +BABEL_OP1_203_92886_20130314_211354_inLine +BABEL_OP1_203_92886_20130314_211354_outLine +BABEL_OP1_203_93224_20130503_144751_inLine +BABEL_OP1_203_93224_20130503_144751_outLine +BABEL_OP1_203_93320_20130502_175919_inLine +BABEL_OP1_203_93320_20130502_175919_outLine +BABEL_OP1_203_93946_20130406_004722_inLine +BABEL_OP1_203_93946_20130406_004722_outLine +BABEL_OP1_203_94212_20130806_184552_inLine +BABEL_OP1_203_94212_20130806_184552_outLine +BABEL_OP1_203_95966_20130320_201310_inLine +BABEL_OP1_203_95966_20130320_201310_outLine +BABEL_OP1_203_96205_20130324_175526_inLine +BABEL_OP1_203_96205_20130324_175526_outLine +BABEL_OP1_203_96584_20130410_144453_inLine +BABEL_OP1_203_96584_20130410_144453_outLine +BABEL_OP1_203_96934_20130319_142928_inLine +BABEL_OP1_203_96934_20130319_142928_outLine +BABEL_OP1_203_96985_20130313_141845_inLine +BABEL_OP1_203_96985_20130313_141845_outLine +BABEL_OP1_203_97136_20130410_190244_inLine +BABEL_OP1_203_97136_20130410_190244_outLine +BABEL_OP1_203_98506_20130423_152625_inLine +BABEL_OP1_203_98506_20130423_152625_outLine +BABEL_OP1_203_98678_20130721_152255_inLine +BABEL_OP1_203_98678_20130721_152255_outLine +BABEL_OP1_203_99487_20130311_174358_inLine +BABEL_OP1_203_99487_20130311_174358_outLine +BABEL_OP1_203_99516_20130309_164733_inLine +BABEL_OP1_203_99516_20130309_164733_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/dev.list b/egs/babel/s5d/conf/lists/204-tamil/dev.list new file mode 100644 index 00000000000..f793b6bf7fa --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/dev.list @@ -0,0 +1,125 @@ +BABEL_OP1_204_13189_20130613_161247_inLine +BABEL_OP1_204_13189_20130613_161247_outLine +BABEL_OP1_204_17881_20130219_205442_inLine +BABEL_OP1_204_17881_20130219_205442_outLine +BABEL_OP1_204_18924_20130224_150538_inLine +BABEL_OP1_204_18924_20130224_150538_outLine +BABEL_OP1_204_20682_20130209_174057_inLine +BABEL_OP1_204_20682_20130209_174057_outLine +BABEL_OP1_204_22021_20130818_153135_inLine +BABEL_OP1_204_22021_20130818_153135_outLine +BABEL_OP1_204_22288_20130820_021043_inLine +BABEL_OP1_204_22288_20130820_021043_outLine +BABEL_OP1_204_22288_20130820_022958_inLine +BABEL_OP1_204_22288_20130820_022958_outLine +BABEL_OP1_204_22466_20121213_214935_inLine +BABEL_OP1_204_22466_20121213_214935_outLine +BABEL_OP1_204_23700_20130825_003724_inLine +BABEL_OP1_204_23700_20130825_003724_outLine +BABEL_OP1_204_23700_20130825_004922_inLine +BABEL_OP1_204_23700_20130825_004922_outLine +BABEL_OP1_204_24239_20130227_004742_inLine +BABEL_OP1_204_24239_20130227_004742_outLine +BABEL_OP1_204_24290_20130228_200830_inLine +BABEL_OP1_204_24290_20130228_200830_outLine +BABEL_OP1_204_24679_20130112_222528_inLine +BABEL_OP1_204_24679_20130112_222528_outLine +BABEL_OP1_204_25895_20130830_022140_inLine +BABEL_OP1_204_25895_20130830_022140_outLine +BABEL_OP1_204_26602_20130215_003413_inLine +BABEL_OP1_204_26602_20130215_003413_outLine +BABEL_OP1_204_27218_20130102_192252_inLine +BABEL_OP1_204_27218_20130102_192252_outLine +BABEL_OP1_204_28606_20130126_221856_inLine +BABEL_OP1_204_28606_20130126_221856_outLine +BABEL_OP1_204_28945_20130102_221003_inLine +BABEL_OP1_204_28945_20130102_221003_outLine +BABEL_OP1_204_29076_20130222_205943_inLine +BABEL_OP1_204_29076_20130222_205943_outLine +BABEL_OP1_204_31624_20130107_221428_inLine +BABEL_OP1_204_31624_20130107_221428_outLine +BABEL_OP1_204_32287_20130902_231135_inLine +BABEL_OP1_204_32287_20130902_231135_outLine +BABEL_OP1_204_33672_20130115_033234_inLine +BABEL_OP1_204_33672_20130115_033234_outLine +BABEL_OP1_204_37290_20130707_161547_inLine +BABEL_OP1_204_37290_20130707_161547_outLine +BABEL_OP1_204_37594_20130805_155303_inLine +BABEL_OP1_204_37594_20130805_155303_outLine +BABEL_OP1_204_38979_20130516_003257_inLine +BABEL_OP1_204_38979_20130516_003257_outLine +BABEL_OP1_204_42155_20130122_030534_inLine +BABEL_OP1_204_42155_20130122_030534_outLine +BABEL_OP1_204_43239_20130216_055950_inLine +BABEL_OP1_204_43239_20130216_055950_outLine +BABEL_OP1_204_44029_20130824_003907_inLine +BABEL_OP1_204_44029_20130824_003907_outLine +BABEL_OP1_204_44619_20130104_192431_inLine +BABEL_OP1_204_44619_20130104_192431_outLine +BABEL_OP1_204_44961_20130106_015828_inLine +BABEL_OP1_204_44961_20130106_015828_outLine +BABEL_OP1_204_46535_20130818_001009_inLine +BABEL_OP1_204_46535_20130818_001009_outLine +BABEL_OP1_204_47451_20130210_010011_inLine +BABEL_OP1_204_47451_20130210_010011_outLine +BABEL_OP1_204_48024_20130829_223102_inLine +BABEL_OP1_204_48024_20130829_223102_outLine +BABEL_OP1_204_50565_20121224_203735_inLine +BABEL_OP1_204_50565_20121224_203735_outLine +BABEL_OP1_204_51701_20130312_022556_inLine +BABEL_OP1_204_51701_20130312_022556_outLine +BABEL_OP1_204_54160_20121231_225532_inLine +BABEL_OP1_204_54160_20121231_225532_outLine +BABEL_OP1_204_55136_20130705_164312_inLine +BABEL_OP1_204_55136_20130705_164312_outLine +BABEL_OP1_204_57935_20130126_234131_inLine +BABEL_OP1_204_57935_20130126_234131_outLine +BABEL_OP1_204_58047_20130222_222259_inLine +BABEL_OP1_204_58047_20130222_222259_outLine +BABEL_OP1_204_59747_20121222_160946_inLine +BABEL_OP1_204_59747_20121222_160946_outLine +BABEL_OP1_204_61440_20130627_182754_inLine +BABEL_OP1_204_61440_20130627_182754_outLine +BABEL_OP1_204_62545_20130703_202255_inLine +BABEL_OP1_204_62545_20130703_202255_outLine +BABEL_OP1_204_63484_20130821_005511_inLine +BABEL_OP1_204_63484_20130821_005511_outLine +BABEL_OP1_204_64350_20130102_195330_inLine +BABEL_OP1_204_64350_20130102_195330_outLine +BABEL_OP1_204_64902_20130215_191500_inLine +BABEL_OP1_204_64902_20130215_191500_outLine +BABEL_OP1_204_68244_20130129_184054_inLine +BABEL_OP1_204_70639_20130704_165905_inLine +BABEL_OP1_204_70639_20130704_165905_outLine +BABEL_OP1_204_71121_20130522_213640_inLine +BABEL_OP1_204_71121_20130522_213640_outLine +BABEL_OP1_204_73990_20130521_162632_inLine +BABEL_OP1_204_73990_20130521_162632_outLine +BABEL_OP1_204_78161_20130521_152635_inLine +BABEL_OP1_204_78161_20130521_152635_outLine +BABEL_OP1_204_83238_20130121_201216_inLine +BABEL_OP1_204_83238_20130121_201216_outLine +BABEL_OP1_204_84177_20130901_213641_inLine +BABEL_OP1_204_84177_20130901_213641_outLine +BABEL_OP1_204_84815_20130209_040750_inLine +BABEL_OP1_204_84815_20130209_040750_outLine +BABEL_OP1_204_86557_20130103_183044_inLine +BABEL_OP1_204_86557_20130103_183044_outLine +BABEL_OP1_204_87074_20130107_181209_inLine +BABEL_OP1_204_87074_20130107_181209_outLine +BABEL_OP1_204_87298_20130114_172850_inLine +BABEL_OP1_204_87298_20130114_172850_outLine +BABEL_OP1_204_90937_20130516_224543_inLine +BABEL_OP1_204_90937_20130516_224543_outLine +BABEL_OP1_204_91808_20130603_193623_inLine +BABEL_OP1_204_91808_20130603_193623_outLine +BABEL_OP1_204_92509_20130107_011707_inLine +BABEL_OP1_204_92509_20130107_011707_outLine +BABEL_OP1_204_94465_20130212_212918_inLine +BABEL_OP1_204_94465_20130212_212918_outLine +BABEL_OP1_204_94923_20130608_143347_inLine +BABEL_OP1_204_94923_20130608_143347_outLine +BABEL_OP1_204_96059_20130225_212517_inLine +BABEL_OP1_204_96059_20130225_212517_outLine +BABEL_OP1_204_97286_20130520_145640_inLine +BABEL_OP1_204_97286_20130520_145640_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/eval.list b/egs/babel/s5d/conf/lists/204-tamil/eval.list new file mode 100644 index 00000000000..1887ca15694 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/eval.list @@ -0,0 +1,947 @@ +BABEL_OP1_204_10058_20130305_040021_inLine +BABEL_OP1_204_10058_20130305_040021_outLine +BABEL_OP1_204_10313_20130705_155607_inLine +BABEL_OP1_204_10313_20130705_155607_outLine +BABEL_OP1_204_10524_20130219_145437_inLine +BABEL_OP1_204_10524_20130219_145437_outLine +BABEL_OP1_204_10524_20130219_235944_inLine +BABEL_OP1_204_10524_20130219_235944_outLine +BABEL_OP1_204_10524_20130220_000643_inLine +BABEL_OP1_204_10524_20130220_000643_outLine +BABEL_OP1_204_10638_20130510_124441_inLine +BABEL_OP1_204_10638_20130510_124441_outLine +BABEL_OP1_204_11768_20130825_151244_inLine +BABEL_OP1_204_11768_20130825_151244_outLine +BABEL_OP1_204_12321_20130220_211618_inLine +BABEL_OP1_204_12321_20130220_211618_outLine +BABEL_OP1_204_12635_20130601_152113_inLine +BABEL_OP1_204_12635_20130601_152113_outLine +BABEL_OP1_204_12916_20130107_224212_inLine +BABEL_OP1_204_12916_20130107_224212_outLine +BABEL_OP1_204_13561_20130120_185547_inLine +BABEL_OP1_204_13561_20130120_185547_outLine +BABEL_OP1_204_13664_20121218_221847_inLine +BABEL_OP1_204_13664_20121218_221847_outLine +BABEL_OP1_204_13909_20130313_210114_inLine +BABEL_OP1_204_13909_20130313_210114_outLine +BABEL_OP1_204_13929_20130716_200759_inLine +BABEL_OP1_204_13929_20130716_200759_outLine +BABEL_OP1_204_14028_20130820_214748_inLine +BABEL_OP1_204_14028_20130820_214748_outLine +BABEL_OP1_204_14229_20130112_024917_inLine +BABEL_OP1_204_14229_20130112_024917_outLine +BABEL_OP1_204_14350_20130113_023333_inLine +BABEL_OP1_204_14350_20130113_023333_outLine +BABEL_OP1_204_14537_20130303_005043_inLine +BABEL_OP1_204_14537_20130303_005043_outLine +BABEL_OP1_204_14723_20130710_180819_inLine +BABEL_OP1_204_14723_20130710_180819_outLine +BABEL_OP1_204_14875_20130111_192622_inLine +BABEL_OP1_204_14875_20130111_192622_outLine +BABEL_OP1_204_14929_20130131_002309_inLine +BABEL_OP1_204_14929_20130131_002309_outLine +BABEL_OP1_204_15163_20130130_004303_inLine +BABEL_OP1_204_15163_20130130_004303_outLine +BABEL_OP1_204_15227_20130624_180548_inLine +BABEL_OP1_204_15227_20130624_180548_outLine +BABEL_OP1_204_15382_20130126_201407_inLine +BABEL_OP1_204_15382_20130126_201407_outLine +BABEL_OP1_204_15420_20130901_223125_inLine +BABEL_OP1_204_15420_20130901_223125_outLine +BABEL_OP1_204_15466_20130521_205553_inLine +BABEL_OP1_204_15466_20130521_205553_outLine +BABEL_OP1_204_15848_20121218_180011_inLine +BABEL_OP1_204_15848_20121218_180011_outLine +BABEL_OP1_204_16056_20130102_234300_inLine +BABEL_OP1_204_16056_20130102_234300_outLine +BABEL_OP1_204_16184_20121220_210106_inLine +BABEL_OP1_204_16184_20121220_210106_outLine +BABEL_OP1_204_16351_20130705_205024_inLine +BABEL_OP1_204_16351_20130705_205024_outLine +BABEL_OP1_204_16749_20130224_215355_inLine +BABEL_OP1_204_16749_20130224_215355_outLine +BABEL_OP1_204_16787_20130121_045651_inLine +BABEL_OP1_204_16787_20130121_045651_outLine +BABEL_OP1_204_17165_20130130_191341_inLine +BABEL_OP1_204_17165_20130130_191341_outLine +BABEL_OP1_204_17511_20130716_180322_inLine +BABEL_OP1_204_17511_20130716_180322_outLine +BABEL_OP1_204_17520_20130122_040744_inLine +BABEL_OP1_204_17520_20130122_040744_outLine +BABEL_OP1_204_17914_20130311_035117_inLine +BABEL_OP1_204_17914_20130311_035117_outLine +BABEL_OP1_204_17937_20130803_170049_inLine +BABEL_OP1_204_17937_20130803_170049_outLine +BABEL_OP1_204_18033_20130906_011555_inLine +BABEL_OP1_204_18033_20130906_011555_outLine +BABEL_OP1_204_18863_20130210_164314_inLine +BABEL_OP1_204_18863_20130210_164314_outLine +BABEL_OP1_204_19545_20130122_164148_inLine +BABEL_OP1_204_19545_20130122_164148_outLine +BABEL_OP1_204_19663_20130126_195459_inLine +BABEL_OP1_204_19663_20130126_195459_outLine +BABEL_OP1_204_19749_20130707_153432_inLine +BABEL_OP1_204_19749_20130707_153432_outLine +BABEL_OP1_204_19773_20130217_220127_inLine +BABEL_OP1_204_19773_20130217_220127_outLine +BABEL_OP1_204_19773_20130217_234204_inLine +BABEL_OP1_204_19773_20130217_234204_outLine +BABEL_OP1_204_19782_20130209_175552_inLine +BABEL_OP1_204_19782_20130209_175552_outLine +BABEL_OP1_204_20133_20121218_172017_inLine +BABEL_OP1_204_20133_20121218_172017_outLine +BABEL_OP1_204_20800_20130102_180915_inLine +BABEL_OP1_204_20800_20130102_180915_outLine +BABEL_OP1_204_20896_20130822_163553_inLine +BABEL_OP1_204_20896_20130822_163553_outLine +BABEL_OP1_204_21004_20130209_230509_inLine +BABEL_OP1_204_21004_20130209_230509_outLine +BABEL_OP1_204_21029_20130107_212248_inLine +BABEL_OP1_204_21029_20130107_212248_outLine +BABEL_OP1_204_21109_20130301_151421_inLine +BABEL_OP1_204_21109_20130301_151421_outLine +BABEL_OP1_204_21159_20130607_143737_inLine +BABEL_OP1_204_21159_20130607_143737_outLine +BABEL_OP1_204_21244_20130627_172541_inLine +BABEL_OP1_204_21244_20130627_172541_outLine +BABEL_OP1_204_21581_20130118_192005_inLine +BABEL_OP1_204_21581_20130118_192005_outLine +BABEL_OP1_204_21794_20130129_233131_inLine +BABEL_OP1_204_21794_20130129_233131_outLine +BABEL_OP1_204_22280_20130222_191834_inLine +BABEL_OP1_204_22280_20130222_191834_outLine +BABEL_OP1_204_22641_20121224_195014_inLine +BABEL_OP1_204_22641_20121224_195014_outLine +BABEL_OP1_204_23196_20130605_144617_inLine +BABEL_OP1_204_23196_20130605_144617_outLine +BABEL_OP1_204_23355_20130812_203203_inLine +BABEL_OP1_204_23355_20130812_203203_outLine +BABEL_OP1_204_23355_20130812_204058_inLine +BABEL_OP1_204_23355_20130812_204058_outLine +BABEL_OP1_204_23395_20130126_013244_inLine +BABEL_OP1_204_23395_20130126_013244_outLine +BABEL_OP1_204_23505_20130108_173244_inLine +BABEL_OP1_204_23505_20130108_173244_outLine +BABEL_OP1_204_23681_20130625_175654_inLine +BABEL_OP1_204_23681_20130625_175654_outLine +BABEL_OP1_204_23983_20130707_144157_inLine +BABEL_OP1_204_23983_20130707_144157_outLine +BABEL_OP1_204_23983_20130707_145156_inLine +BABEL_OP1_204_23983_20130707_145156_outLine +BABEL_OP1_204_23995_20130209_202505_inLine +BABEL_OP1_204_23995_20130209_202505_outLine +BABEL_OP1_204_24017_20130209_202828_inLine +BABEL_OP1_204_24017_20130209_202828_outLine +BABEL_OP1_204_24037_20130708_184129_inLine +BABEL_OP1_204_24037_20130708_184129_outLine +BABEL_OP1_204_24323_20130121_041043_inLine +BABEL_OP1_204_24323_20130121_041043_outLine +BABEL_OP1_204_24589_20130111_223930_inLine +BABEL_OP1_204_24589_20130111_223930_outLine +BABEL_OP1_204_24779_20130607_183107_inLine +BABEL_OP1_204_24779_20130607_183107_outLine +BABEL_OP1_204_25012_20130705_184756_inLine +BABEL_OP1_204_25012_20130705_184756_outLine +BABEL_OP1_204_25015_20130329_012535_inLine +BABEL_OP1_204_25015_20130329_012535_outLine +BABEL_OP1_204_25068_20130901_230020_inLine +BABEL_OP1_204_25068_20130901_230020_outLine +BABEL_OP1_204_25068_20130901_235001_inLine +BABEL_OP1_204_25068_20130901_235001_outLine +BABEL_OP1_204_25085_20130612_170506_inLine +BABEL_OP1_204_25085_20130612_170506_outLine +BABEL_OP1_204_25198_20130625_185430_inLine +BABEL_OP1_204_25198_20130625_185430_outLine +BABEL_OP1_204_25220_20130715_192051_inLine +BABEL_OP1_204_25220_20130715_192051_outLine +BABEL_OP1_204_25767_20130107_180931_inLine +BABEL_OP1_204_25767_20130107_180931_outLine +BABEL_OP1_204_26206_20130129_191521_inLine +BABEL_OP1_204_26206_20130129_191521_outLine +BABEL_OP1_204_26574_20130216_002354_inLine +BABEL_OP1_204_26574_20130216_002354_outLine +BABEL_OP1_204_26574_20130218_013612_inLine +BABEL_OP1_204_26574_20130218_013612_outLine +BABEL_OP1_204_26869_20130815_190057_inLine +BABEL_OP1_204_26869_20130815_190057_outLine +BABEL_OP1_204_27014_20130708_191739_inLine +BABEL_OP1_204_27014_20130708_191739_outLine +BABEL_OP1_204_27367_20130708_153816_inLine +BABEL_OP1_204_27367_20130708_153816_outLine +BABEL_OP1_204_28012_20130211_213147_inLine +BABEL_OP1_204_28012_20130211_213147_outLine +BABEL_OP1_204_28303_20130112_003656_inLine +BABEL_OP1_204_28303_20130112_003656_outLine +BABEL_OP1_204_28585_20130208_014141_inLine +BABEL_OP1_204_28585_20130208_014141_outLine +BABEL_OP1_204_28775_20130119_000600_inLine +BABEL_OP1_204_28775_20130119_000600_outLine +BABEL_OP1_204_28814_20130224_202343_inLine +BABEL_OP1_204_28814_20130224_202343_outLine +BABEL_OP1_204_28871_20121219_184300_inLine +BABEL_OP1_204_28871_20121219_184300_outLine +BABEL_OP1_204_29021_20130227_043427_inLine +BABEL_OP1_204_29021_20130227_043427_outLine +BABEL_OP1_204_29072_20130127_023330_inLine +BABEL_OP1_204_29072_20130127_023330_outLine +BABEL_OP1_204_29168_20130112_230634_inLine +BABEL_OP1_204_29168_20130112_230634_outLine +BABEL_OP1_204_29208_20130127_011057_inLine +BABEL_OP1_204_29208_20130127_011057_outLine +BABEL_OP1_204_29352_20130628_145610_inLine +BABEL_OP1_204_29352_20130628_145610_outLine +BABEL_OP1_204_29663_20130829_225524_inLine +BABEL_OP1_204_29663_20130829_225524_outLine +BABEL_OP1_204_29765_20130607_162026_inLine +BABEL_OP1_204_29765_20130607_162026_outLine +BABEL_OP1_204_29777_20130211_193239_inLine +BABEL_OP1_204_29777_20130211_193239_outLine +BABEL_OP1_204_30461_20130628_160212_inLine +BABEL_OP1_204_30461_20130628_160212_outLine +BABEL_OP1_204_30653_20130216_171325_inLine +BABEL_OP1_204_30653_20130216_171325_outLine +BABEL_OP1_204_30720_20130524_153314_inLine +BABEL_OP1_204_30720_20130524_153314_outLine +BABEL_OP1_204_30869_20130211_004250_inLine +BABEL_OP1_204_30869_20130211_004250_outLine +BABEL_OP1_204_30974_20130508_113119_inLine +BABEL_OP1_204_30974_20130508_113119_outLine +BABEL_OP1_204_31109_20130121_195304_inLine +BABEL_OP1_204_31109_20130121_195304_outLine +BABEL_OP1_204_31267_20130311_024343_inLine +BABEL_OP1_204_31267_20130311_024343_outLine +BABEL_OP1_204_31668_20130603_155703_inLine +BABEL_OP1_204_31668_20130603_155703_outLine +BABEL_OP1_204_31919_20130322_030728_inLine +BABEL_OP1_204_31919_20130322_030728_outLine +BABEL_OP1_204_32380_20130812_163206_inLine +BABEL_OP1_204_32380_20130812_163206_outLine +BABEL_OP1_204_32630_20130618_150743_inLine +BABEL_OP1_204_32630_20130618_150743_outLine +BABEL_OP1_204_32708_20130107_000057_inLine +BABEL_OP1_204_32708_20130107_000057_outLine +BABEL_OP1_204_32832_20130208_200126_inLine +BABEL_OP1_204_32832_20130208_200126_outLine +BABEL_OP1_204_32837_20130211_011900_inLine +BABEL_OP1_204_32837_20130211_011900_outLine +BABEL_OP1_204_32914_20130218_021836_inLine +BABEL_OP1_204_32914_20130218_021836_outLine +BABEL_OP1_204_32914_20130218_023337_inLine +BABEL_OP1_204_32914_20130218_023337_outLine +BABEL_OP1_204_32961_20130518_164254_inLine +BABEL_OP1_204_32961_20130518_164254_outLine +BABEL_OP1_204_33149_20130901_211119_inLine +BABEL_OP1_204_33149_20130901_211119_outLine +BABEL_OP1_204_33333_20130818_163046_inLine +BABEL_OP1_204_33333_20130818_163046_outLine +BABEL_OP1_204_33635_20130127_024601_inLine +BABEL_OP1_204_33635_20130127_024601_outLine +BABEL_OP1_204_33992_20130625_183028_inLine +BABEL_OP1_204_33992_20130625_183028_outLine +BABEL_OP1_204_34208_20130815_173402_inLine +BABEL_OP1_204_34208_20130815_173402_outLine +BABEL_OP1_204_34336_20130111_190838_inLine +BABEL_OP1_204_34336_20130111_190838_outLine +BABEL_OP1_204_34564_20130217_024252_inLine +BABEL_OP1_204_34564_20130217_024252_outLine +BABEL_OP1_204_34629_20130524_214401_inLine +BABEL_OP1_204_34629_20130524_214401_outLine +BABEL_OP1_204_35069_20130211_183408_inLine +BABEL_OP1_204_35069_20130211_183408_outLine +BABEL_OP1_204_35139_20130114_222544_inLine +BABEL_OP1_204_35139_20130114_222544_outLine +BABEL_OP1_204_35467_20121221_225338_inLine +BABEL_OP1_204_35467_20121221_225338_outLine +BABEL_OP1_204_35583_20130224_214957_inLine +BABEL_OP1_204_35583_20130224_214957_outLine +BABEL_OP1_204_35786_20130625_191629_inLine +BABEL_OP1_204_35786_20130625_191629_outLine +BABEL_OP1_204_35885_20130225_225544_inLine +BABEL_OP1_204_35885_20130225_225544_outLine +BABEL_OP1_204_36147_20130902_003850_inLine +BABEL_OP1_204_36147_20130902_003850_outLine +BABEL_OP1_204_36219_20130116_023001_inLine +BABEL_OP1_204_36219_20130116_023001_outLine +BABEL_OP1_204_36300_20130802_173230_inLine +BABEL_OP1_204_36300_20130802_173230_outLine +BABEL_OP1_204_36364_20130802_160044_inLine +BABEL_OP1_204_36364_20130802_160044_outLine +BABEL_OP1_204_36505_20130209_151150_inLine +BABEL_OP1_204_36505_20130209_151150_outLine +BABEL_OP1_204_36505_20130212_211726_inLine +BABEL_OP1_204_36505_20130212_211726_outLine +BABEL_OP1_204_36632_20130725_160202_inLine +BABEL_OP1_204_36632_20130725_160202_outLine +BABEL_OP1_204_36900_20130210_013355_inLine +BABEL_OP1_204_36900_20130210_013355_outLine +BABEL_OP1_204_37007_20130708_211216_inLine +BABEL_OP1_204_37007_20130708_211216_outLine +BABEL_OP1_204_37068_20130815_173112_inLine +BABEL_OP1_204_37068_20130815_173112_outLine +BABEL_OP1_204_37281_20130131_020847_inLine +BABEL_OP1_204_37281_20130131_020847_outLine +BABEL_OP1_204_37499_20130627_150627_inLine +BABEL_OP1_204_37499_20130627_150627_outLine +BABEL_OP1_204_37598_20130607_165958_inLine +BABEL_OP1_204_37598_20130607_165958_outLine +BABEL_OP1_204_38323_20130311_030447_inLine +BABEL_OP1_204_38323_20130311_030447_outLine +BABEL_OP1_204_38554_20121221_210925_inLine +BABEL_OP1_204_38554_20121221_210925_outLine +BABEL_OP1_204_38741_20130103_233022_inLine +BABEL_OP1_204_38741_20130103_233022_outLine +BABEL_OP1_204_38963_20130830_013927_inLine +BABEL_OP1_204_38963_20130830_013927_outLine +BABEL_OP1_204_39006_20130310_042623_inLine +BABEL_OP1_204_39006_20130310_042623_outLine +BABEL_OP1_204_39277_20130710_203344_inLine +BABEL_OP1_204_39277_20130710_203344_outLine +BABEL_OP1_204_39426_20130218_002812_inLine +BABEL_OP1_204_39426_20130218_002812_outLine +BABEL_OP1_204_39579_20130724_163251_inLine +BABEL_OP1_204_39579_20130724_163251_outLine +BABEL_OP1_204_41073_20130211_210606_inLine +BABEL_OP1_204_41073_20130211_210606_outLine +BABEL_OP1_204_41100_20130108_172156_inLine +BABEL_OP1_204_41100_20130108_172156_outLine +BABEL_OP1_204_41109_20130211_003851_inLine +BABEL_OP1_204_41109_20130211_003851_outLine +BABEL_OP1_204_41174_20130117_215826_inLine +BABEL_OP1_204_41174_20130117_215826_outLine +BABEL_OP1_204_41400_20130702_161025_inLine +BABEL_OP1_204_41400_20130702_161025_outLine +BABEL_OP1_204_41493_20121218_185431_inLine +BABEL_OP1_204_41493_20121218_185431_outLine +BABEL_OP1_204_41609_20130102_232356_inLine +BABEL_OP1_204_41609_20130102_232356_outLine +BABEL_OP1_204_41680_20121219_175709_inLine +BABEL_OP1_204_41680_20121219_175709_outLine +BABEL_OP1_204_41692_20130624_195718_inLine +BABEL_OP1_204_41692_20130624_195718_outLine +BABEL_OP1_204_42243_20121222_194916_inLine +BABEL_OP1_204_42243_20121222_194916_outLine +BABEL_OP1_204_42309_20130521_001029_inLine +BABEL_OP1_204_42309_20130521_001029_outLine +BABEL_OP1_204_42434_20130116_230135_inLine +BABEL_OP1_204_42434_20130116_230135_outLine +BABEL_OP1_204_42600_20130111_202254_inLine +BABEL_OP1_204_42600_20130111_202254_outLine +BABEL_OP1_204_42771_20130228_025042_inLine +BABEL_OP1_204_42771_20130228_025042_outLine +BABEL_OP1_204_42848_20130627_222753_inLine +BABEL_OP1_204_42848_20130627_222753_outLine +BABEL_OP1_204_42877_20130815_164740_inLine +BABEL_OP1_204_42877_20130815_164740_outLine +BABEL_OP1_204_42883_20130624_202703_inLine +BABEL_OP1_204_42883_20130624_202703_outLine +BABEL_OP1_204_43074_20130509_115450_inLine +BABEL_OP1_204_43074_20130509_115450_outLine +BABEL_OP1_204_43285_20130130_012851_inLine +BABEL_OP1_204_43285_20130130_012851_outLine +BABEL_OP1_204_43388_20130129_230503_inLine +BABEL_OP1_204_43388_20130129_230503_outLine +BABEL_OP1_204_43395_20130313_164710_inLine +BABEL_OP1_204_43395_20130313_164710_outLine +BABEL_OP1_204_43646_20121218_215728_inLine +BABEL_OP1_204_43646_20121218_215728_outLine +BABEL_OP1_204_43990_20130521_142553_inLine +BABEL_OP1_204_44255_20130225_230219_inLine +BABEL_OP1_204_44255_20130225_230219_outLine +BABEL_OP1_204_44681_20130830_000000_inLine +BABEL_OP1_204_44681_20130830_000000_outLine +BABEL_OP1_204_44847_20130126_212511_inLine +BABEL_OP1_204_44847_20130126_212511_outLine +BABEL_OP1_204_45106_20130325_003034_inLine +BABEL_OP1_204_45106_20130325_003034_outLine +BABEL_OP1_204_45106_20130325_004324_inLine +BABEL_OP1_204_45106_20130325_004324_outLine +BABEL_OP1_204_45201_20130312_021424_inLine +BABEL_OP1_204_45201_20130312_021424_outLine +BABEL_OP1_204_45536_20130217_014642_inLine +BABEL_OP1_204_45536_20130217_014642_outLine +BABEL_OP1_204_45559_20130303_234142_inLine +BABEL_OP1_204_45559_20130303_234142_outLine +BABEL_OP1_204_45560_20130107_224441_inLine +BABEL_OP1_204_45560_20130107_224441_outLine +BABEL_OP1_204_45642_20130106_040244_inLine +BABEL_OP1_204_45642_20130106_040244_outLine +BABEL_OP1_204_45771_20130626_191013_inLine +BABEL_OP1_204_45771_20130626_191013_outLine +BABEL_OP1_204_45908_20130607_213719_inLine +BABEL_OP1_204_45908_20130607_213719_outLine +BABEL_OP1_204_46202_20130524_162004_inLine +BABEL_OP1_204_46202_20130524_162004_outLine +BABEL_OP1_204_46310_20130103_163932_inLine +BABEL_OP1_204_46310_20130103_163932_outLine +BABEL_OP1_204_46315_20130129_014152_inLine +BABEL_OP1_204_46315_20130129_014152_outLine +BABEL_OP1_204_46625_20121219_193926_inLine +BABEL_OP1_204_46625_20121219_193926_outLine +BABEL_OP1_204_46712_20130111_175849_inLine +BABEL_OP1_204_46712_20130111_175849_outLine +BABEL_OP1_204_46763_20130216_235210_inLine +BABEL_OP1_204_46763_20130216_235210_outLine +BABEL_OP1_204_46770_20130224_204253_inLine +BABEL_OP1_204_46770_20130224_204253_outLine +BABEL_OP1_204_46881_20121222_190526_inLine +BABEL_OP1_204_46881_20121222_190526_outLine +BABEL_OP1_204_47309_20130705_182329_inLine +BABEL_OP1_204_47309_20130705_182329_outLine +BABEL_OP1_204_47405_20130829_233945_inLine +BABEL_OP1_204_47405_20130829_233945_outLine +BABEL_OP1_204_47799_20130516_193711_inLine +BABEL_OP1_204_47799_20130516_193711_outLine +BABEL_OP1_204_47882_20130705_203354_inLine +BABEL_OP1_204_47882_20130705_203354_outLine +BABEL_OP1_204_48016_20130311_033904_inLine +BABEL_OP1_204_48016_20130311_033904_outLine +BABEL_OP1_204_48200_20130209_211626_inLine +BABEL_OP1_204_48200_20130209_211626_outLine +BABEL_OP1_204_48399_20130112_205650_inLine +BABEL_OP1_204_48399_20130112_205650_outLine +BABEL_OP1_204_48663_20130303_002413_inLine +BABEL_OP1_204_48663_20130303_002413_outLine +BABEL_OP1_204_48663_20130303_023530_inLine +BABEL_OP1_204_48663_20130303_023530_outLine +BABEL_OP1_204_49216_20130112_225803_inLine +BABEL_OP1_204_49216_20130112_225803_outLine +BABEL_OP1_204_49630_20130130_014200_inLine +BABEL_OP1_204_49630_20130130_014200_outLine +BABEL_OP1_204_49637_20130112_201836_inLine +BABEL_OP1_204_49637_20130112_201836_outLine +BABEL_OP1_204_49870_20130824_181019_inLine +BABEL_OP1_204_49870_20130824_181019_outLine +BABEL_OP1_204_49902_20130128_182704_inLine +BABEL_OP1_204_49902_20130128_182704_outLine +BABEL_OP1_204_50090_20130122_180653_inLine +BABEL_OP1_204_50090_20130122_180653_outLine +BABEL_OP1_204_50940_20130522_185117_inLine +BABEL_OP1_204_50940_20130522_185117_outLine +BABEL_OP1_204_50958_20130129_195029_inLine +BABEL_OP1_204_50958_20130129_195029_outLine +BABEL_OP1_204_51414_20130531_173250_inLine +BABEL_OP1_204_51414_20130531_173250_outLine +BABEL_OP1_204_51417_20130212_002429_inLine +BABEL_OP1_204_51417_20130212_002429_outLine +BABEL_OP1_204_51484_20130209_174419_inLine +BABEL_OP1_204_51484_20130209_174419_outLine +BABEL_OP1_204_51540_20130228_021352_inLine +BABEL_OP1_204_51540_20130228_021352_outLine +BABEL_OP1_204_51611_20130608_155952_inLine +BABEL_OP1_204_51611_20130608_155952_outLine +BABEL_OP1_204_52058_20130710_173207_inLine +BABEL_OP1_204_52058_20130710_173207_outLine +BABEL_OP1_204_52070_20130607_210255_inLine +BABEL_OP1_204_52070_20130607_210255_outLine +BABEL_OP1_204_52222_20130524_171039_inLine +BABEL_OP1_204_52222_20130524_171039_outLine +BABEL_OP1_204_52246_20130122_172528_inLine +BABEL_OP1_204_52246_20130122_172528_outLine +BABEL_OP1_204_52265_20130516_202551_inLine +BABEL_OP1_204_52265_20130516_202551_outLine +BABEL_OP1_204_52272_20130112_213528_inLine +BABEL_OP1_204_52272_20130112_213528_outLine +BABEL_OP1_204_52438_20130103_172024_inLine +BABEL_OP1_204_52438_20130103_172024_outLine +BABEL_OP1_204_52447_20130624_184145_inLine +BABEL_OP1_204_52447_20130624_184145_outLine +BABEL_OP1_204_53010_20130825_210105_inLine +BABEL_OP1_204_53010_20130825_210105_outLine +BABEL_OP1_204_53072_20130605_225249_inLine +BABEL_OP1_204_53072_20130605_225249_outLine +BABEL_OP1_204_53206_20130704_211512_inLine +BABEL_OP1_204_53206_20130704_211512_outLine +BABEL_OP1_204_53492_20130322_020510_inLine +BABEL_OP1_204_53492_20130322_020510_outLine +BABEL_OP1_204_53665_20130301_171513_inLine +BABEL_OP1_204_53665_20130301_171513_outLine +BABEL_OP1_204_53957_20130219_004311_inLine +BABEL_OP1_204_53957_20130219_004311_outLine +BABEL_OP1_204_53957_20130219_004930_inLine +BABEL_OP1_204_53957_20130219_004930_outLine +BABEL_OP1_204_54477_20130217_014421_inLine +BABEL_OP1_204_54477_20130217_014421_outLine +BABEL_OP1_204_54477_20130220_020436_inLine +BABEL_OP1_204_54477_20130220_020436_outLine +BABEL_OP1_204_54697_20130209_190625_inLine +BABEL_OP1_204_54697_20130209_190625_outLine +BABEL_OP1_204_54735_20130830_002922_inLine +BABEL_OP1_204_54735_20130830_002922_outLine +BABEL_OP1_204_54735_20130830_004018_inLine +BABEL_OP1_204_54735_20130830_004018_outLine +BABEL_OP1_204_55042_20130820_010539_inLine +BABEL_OP1_204_55042_20130820_010539_outLine +BABEL_OP1_204_55818_20130115_173558_inLine +BABEL_OP1_204_55818_20130115_173558_outLine +BABEL_OP1_204_55968_20121219_172146_inLine +BABEL_OP1_204_55968_20121219_172146_outLine +BABEL_OP1_204_56019_20130301_165116_inLine +BABEL_OP1_204_56019_20130301_165116_outLine +BABEL_OP1_204_56076_20130223_224429_inLine +BABEL_OP1_204_56076_20130223_224429_outLine +BABEL_OP1_204_56331_20130318_211453_inLine +BABEL_OP1_204_56331_20130318_211453_outLine +BABEL_OP1_204_56345_20130524_143829_inLine +BABEL_OP1_204_56345_20130524_143829_outLine +BABEL_OP1_204_56465_20130312_022322_inLine +BABEL_OP1_204_56465_20130312_022322_outLine +BABEL_OP1_204_56468_20130606_150512_inLine +BABEL_OP1_204_56468_20130606_150512_outLine +BABEL_OP1_204_56674_20130725_164519_inLine +BABEL_OP1_204_56674_20130725_164519_outLine +BABEL_OP1_204_56826_20130215_030029_inLine +BABEL_OP1_204_56826_20130215_030029_outLine +BABEL_OP1_204_57219_20130311_044204_inLine +BABEL_OP1_204_57219_20130311_044204_outLine +BABEL_OP1_204_57566_20130304_024842_inLine +BABEL_OP1_204_57566_20130304_024842_outLine +BABEL_OP1_204_57609_20130122_194937_inLine +BABEL_OP1_204_57609_20130122_194937_outLine +BABEL_OP1_204_57654_20130114_074621_inLine +BABEL_OP1_204_57654_20130114_074621_outLine +BABEL_OP1_204_57919_20130902_232635_inLine +BABEL_OP1_204_57919_20130902_232635_outLine +BABEL_OP1_204_58061_20130605_182326_inLine +BABEL_OP1_204_58061_20130605_182326_outLine +BABEL_OP1_204_58145_20130123_042048_inLine +BABEL_OP1_204_58145_20130123_042048_outLine +BABEL_OP1_204_58853_20130709_164717_inLine +BABEL_OP1_204_58853_20130709_164717_outLine +BABEL_OP1_204_58915_20130531_170755_inLine +BABEL_OP1_204_58915_20130531_170755_outLine +BABEL_OP1_204_59635_20130211_170439_inLine +BABEL_OP1_204_59635_20130211_170439_outLine +BABEL_OP1_204_59993_20130104_172518_inLine +BABEL_OP1_204_59993_20130104_172518_outLine +BABEL_OP1_204_60458_20130618_144323_inLine +BABEL_OP1_204_60458_20130618_144323_outLine +BABEL_OP1_204_60498_20130624_192541_inLine +BABEL_OP1_204_60498_20130624_192541_outLine +BABEL_OP1_204_60650_20130709_185316_inLine +BABEL_OP1_204_60650_20130709_185316_outLine +BABEL_OP1_204_60836_20130116_024921_inLine +BABEL_OP1_204_60836_20130116_024921_outLine +BABEL_OP1_204_61219_20130114_220900_inLine +BABEL_OP1_204_61219_20130114_220900_outLine +BABEL_OP1_204_61357_20130124_034332_inLine +BABEL_OP1_204_61357_20130124_034332_outLine +BABEL_OP1_204_61678_20121220_171940_inLine +BABEL_OP1_204_61678_20121220_171940_outLine +BABEL_OP1_204_61684_20130523_114244_inLine +BABEL_OP1_204_61684_20130523_114244_outLine +BABEL_OP1_204_62047_20130211_213702_inLine +BABEL_OP1_204_62047_20130211_213702_outLine +BABEL_OP1_204_62155_20130215_213833_inLine +BABEL_OP1_204_62155_20130215_213833_outLine +BABEL_OP1_204_62158_20130508_122027_inLine +BABEL_OP1_204_62158_20130508_122027_outLine +BABEL_OP1_204_62286_20130126_212818_inLine +BABEL_OP1_204_62286_20130126_212818_outLine +BABEL_OP1_204_62323_20130820_221917_inLine +BABEL_OP1_204_62323_20130820_221917_outLine +BABEL_OP1_204_62360_20130228_184057_inLine +BABEL_OP1_204_62360_20130228_184057_outLine +BABEL_OP1_204_62362_20130626_225421_inLine +BABEL_OP1_204_62362_20130626_225421_outLine +BABEL_OP1_204_62434_20130114_192752_inLine +BABEL_OP1_204_62434_20130114_192752_outLine +BABEL_OP1_204_62471_20130818_161031_inLine +BABEL_OP1_204_62471_20130818_161031_outLine +BABEL_OP1_204_62714_20130215_213205_inLine +BABEL_OP1_204_62714_20130215_213205_outLine +BABEL_OP1_204_63265_20130821_232031_inLine +BABEL_OP1_204_63265_20130821_232031_outLine +BABEL_OP1_204_63425_20130318_220104_inLine +BABEL_OP1_204_63425_20130318_220104_outLine +BABEL_OP1_204_63481_20121224_021602_inLine +BABEL_OP1_204_63481_20121224_021602_outLine +BABEL_OP1_204_63511_20130515_175657_inLine +BABEL_OP1_204_63511_20130515_175657_outLine +BABEL_OP1_204_63523_20130301_162515_inLine +BABEL_OP1_204_63523_20130301_162515_outLine +BABEL_OP1_204_63757_20130222_193431_inLine +BABEL_OP1_204_63757_20130222_193431_outLine +BABEL_OP1_204_63757_20130222_194438_inLine +BABEL_OP1_204_63757_20130222_194438_outLine +BABEL_OP1_204_63906_20130322_030332_inLine +BABEL_OP1_204_63906_20130322_030332_outLine +BABEL_OP1_204_63938_20130212_150410_inLine +BABEL_OP1_204_63938_20130212_150410_outLine +BABEL_OP1_204_64014_20130707_165607_inLine +BABEL_OP1_204_64014_20130707_165607_outLine +BABEL_OP1_204_64635_20130730_203724_inLine +BABEL_OP1_204_64635_20130730_203724_outLine +BABEL_OP1_204_64638_20130314_012822_inLine +BABEL_OP1_204_64638_20130314_012822_outLine +BABEL_OP1_204_64768_20130112_061048_inLine +BABEL_OP1_204_64768_20130112_061048_outLine +BABEL_OP1_204_65077_20121219_175859_inLine +BABEL_OP1_204_65077_20121219_175859_outLine +BABEL_OP1_204_65339_20130821_194428_inLine +BABEL_OP1_204_65339_20130821_194428_outLine +BABEL_OP1_204_65367_20130224_214222_inLine +BABEL_OP1_204_65367_20130224_214222_outLine +BABEL_OP1_204_65370_20130508_125401_inLine +BABEL_OP1_204_65370_20130508_125401_outLine +BABEL_OP1_204_65639_20130703_191008_inLine +BABEL_OP1_204_65639_20130703_191008_outLine +BABEL_OP1_204_65723_20130118_220849_inLine +BABEL_OP1_204_65723_20130118_220849_outLine +BABEL_OP1_204_66967_20130107_185021_inLine +BABEL_OP1_204_66967_20130107_185021_outLine +BABEL_OP1_204_67304_20130906_005328_inLine +BABEL_OP1_204_67304_20130906_005328_outLine +BABEL_OP1_204_67389_20130523_220733_inLine +BABEL_OP1_204_67389_20130523_220733_outLine +BABEL_OP1_204_67592_20130211_032508_inLine +BABEL_OP1_204_67592_20130211_032508_outLine +BABEL_OP1_204_68908_20130805_144908_inLine +BABEL_OP1_204_68908_20130805_144908_outLine +BABEL_OP1_204_69107_20130217_222355_inLine +BABEL_OP1_204_69107_20130217_222355_outLine +BABEL_OP1_204_69972_20130802_175346_inLine +BABEL_OP1_204_69972_20130802_175346_outLine +BABEL_OP1_204_69982_20130310_050949_inLine +BABEL_OP1_204_69982_20130310_050949_outLine +BABEL_OP1_204_70110_20121219_223303_inLine +BABEL_OP1_204_70110_20121219_223303_outLine +BABEL_OP1_204_70121_20130116_162838_inLine +BABEL_OP1_204_70121_20130116_162838_outLine +BABEL_OP1_204_70182_20130517_002241_inLine +BABEL_OP1_204_70182_20130517_002241_outLine +BABEL_OP1_204_70251_20130117_003349_inLine +BABEL_OP1_204_70251_20130117_003349_outLine +BABEL_OP1_204_70343_20130129_203836_inLine +BABEL_OP1_204_70343_20130129_203836_outLine +BABEL_OP1_204_70526_20130310_204157_inLine +BABEL_OP1_204_70526_20130310_204157_outLine +BABEL_OP1_204_70526_20130311_194113_inLine +BABEL_OP1_204_70526_20130311_194113_outLine +BABEL_OP1_204_70986_20130531_211537_inLine +BABEL_OP1_204_70986_20130531_211537_outLine +BABEL_OP1_204_71038_20130225_191007_inLine +BABEL_OP1_204_71038_20130225_191007_outLine +BABEL_OP1_204_71263_20130130_171712_inLine +BABEL_OP1_204_71263_20130130_171712_outLine +BABEL_OP1_204_71263_20130130_172902_inLine +BABEL_OP1_204_71263_20130130_172902_outLine +BABEL_OP1_204_71333_20130111_181914_inLine +BABEL_OP1_204_71333_20130111_181914_outLine +BABEL_OP1_204_71419_20130710_200227_inLine +BABEL_OP1_204_71419_20130710_200227_outLine +BABEL_OP1_204_71460_20130902_003219_inLine +BABEL_OP1_204_71460_20130902_003219_outLine +BABEL_OP1_204_71559_20130217_032759_inLine +BABEL_OP1_204_71559_20130217_032759_outLine +BABEL_OP1_204_71566_20130209_235200_inLine +BABEL_OP1_204_71566_20130209_235200_outLine +BABEL_OP1_204_71704_20130114_182140_inLine +BABEL_OP1_204_71704_20130114_182140_outLine +BABEL_OP1_204_71754_20130822_005036_inLine +BABEL_OP1_204_71754_20130822_005036_outLine +BABEL_OP1_204_71780_20130104_180509_inLine +BABEL_OP1_204_71780_20130104_180509_outLine +BABEL_OP1_204_72844_20121221_181459_inLine +BABEL_OP1_204_72844_20121221_181459_outLine +BABEL_OP1_204_73072_20130107_173326_inLine +BABEL_OP1_204_73072_20130107_173326_outLine +BABEL_OP1_204_73301_20130116_023950_inLine +BABEL_OP1_204_73301_20130116_023950_outLine +BABEL_OP1_204_73518_20130225_195225_inLine +BABEL_OP1_204_73518_20130225_195225_outLine +BABEL_OP1_204_73622_20130108_180939_inLine +BABEL_OP1_204_73622_20130108_180939_outLine +BABEL_OP1_204_73814_20130122_170515_inLine +BABEL_OP1_204_73814_20130122_170515_outLine +BABEL_OP1_204_73837_20130115_213251_inLine +BABEL_OP1_204_73837_20130115_213251_outLine +BABEL_OP1_204_73909_20130209_171219_inLine +BABEL_OP1_204_73909_20130209_171219_outLine +BABEL_OP1_204_74078_20130901_220513_inLine +BABEL_OP1_204_74078_20130901_220513_outLine +BABEL_OP1_204_75223_20130106_180539_inLine +BABEL_OP1_204_75223_20130106_180539_outLine +BABEL_OP1_204_75460_20130515_225130_inLine +BABEL_OP1_204_75460_20130515_225130_outLine +BABEL_OP1_204_75505_20121220_175919_inLine +BABEL_OP1_204_75505_20121220_175919_outLine +BABEL_OP1_204_75869_20130614_143452_inLine +BABEL_OP1_204_75869_20130614_143452_outLine +BABEL_OP1_204_75981_20130304_014705_inLine +BABEL_OP1_204_75981_20130304_014705_outLine +BABEL_OP1_204_76069_20130821_001213_inLine +BABEL_OP1_204_76069_20130821_001213_outLine +BABEL_OP1_204_76155_20130129_210554_inLine +BABEL_OP1_204_76155_20130129_210554_outLine +BABEL_OP1_204_76793_20130812_204256_inLine +BABEL_OP1_204_76793_20130812_204256_outLine +BABEL_OP1_204_76902_20130520_161816_inLine +BABEL_OP1_204_76902_20130520_161816_outLine +BABEL_OP1_204_77139_20130103_214953_inLine +BABEL_OP1_204_77139_20130103_214953_outLine +BABEL_OP1_204_77225_20130825_155026_inLine +BABEL_OP1_204_77225_20130825_155026_outLine +BABEL_OP1_204_77225_20130825_160328_inLine +BABEL_OP1_204_77225_20130825_160328_outLine +BABEL_OP1_204_77242_20130310_031438_inLine +BABEL_OP1_204_77242_20130310_031438_outLine +BABEL_OP1_204_77391_20130114_214011_inLine +BABEL_OP1_204_77391_20130114_214011_outLine +BABEL_OP1_204_77909_20130822_005415_inLine +BABEL_OP1_204_77909_20130822_005415_outLine +BABEL_OP1_204_77974_20130305_023753_inLine +BABEL_OP1_204_77974_20130305_023753_outLine +BABEL_OP1_204_78360_20130227_174048_inLine +BABEL_OP1_204_78360_20130227_174048_outLine +BABEL_OP1_204_78482_20130208_181819_inLine +BABEL_OP1_204_78482_20130208_181819_outLine +BABEL_OP1_204_78749_20130607_175636_inLine +BABEL_OP1_204_78749_20130607_175636_outLine +BABEL_OP1_204_79028_20130818_170543_inLine +BABEL_OP1_204_79028_20130818_170543_outLine +BABEL_OP1_204_79107_20130311_033735_inLine +BABEL_OP1_204_79107_20130311_033735_outLine +BABEL_OP1_204_79429_20130522_180804_inLine +BABEL_OP1_204_79429_20130522_180804_outLine +BABEL_OP1_204_79723_20130815_161014_inLine +BABEL_OP1_204_79723_20130815_161014_outLine +BABEL_OP1_204_79858_20130108_175702_inLine +BABEL_OP1_204_79858_20130108_175702_outLine +BABEL_OP1_204_79898_20130607_173143_inLine +BABEL_OP1_204_79898_20130607_173143_outLine +BABEL_OP1_204_80577_20130310_051912_inLine +BABEL_OP1_204_80577_20130310_051912_outLine +BABEL_OP1_204_80622_20130325_141431_inLine +BABEL_OP1_204_80622_20130325_141431_outLine +BABEL_OP1_204_80897_20130130_194208_inLine +BABEL_OP1_204_80897_20130130_194208_outLine +BABEL_OP1_204_81392_20130129_021012_inLine +BABEL_OP1_204_81392_20130129_021012_outLine +BABEL_OP1_204_81427_20130118_211419_inLine +BABEL_OP1_204_81427_20130118_211419_outLine +BABEL_OP1_204_81433_20130217_234814_inLine +BABEL_OP1_204_81433_20130217_234814_outLine +BABEL_OP1_204_81553_20130225_183924_inLine +BABEL_OP1_204_81553_20130225_183924_outLine +BABEL_OP1_204_81581_20130726_141606_inLine +BABEL_OP1_204_81581_20130726_141606_outLine +BABEL_OP1_204_81674_20130522_172505_inLine +BABEL_OP1_204_81674_20130522_172505_outLine +BABEL_OP1_204_81769_20130710_161840_inLine +BABEL_OP1_204_81769_20130710_161840_outLine +BABEL_OP1_204_82224_20130224_184149_inLine +BABEL_OP1_204_82224_20130224_184149_outLine +BABEL_OP1_204_83436_20130111_223716_inLine +BABEL_OP1_204_83436_20130111_223716_outLine +BABEL_OP1_204_83643_20130830_005334_inLine +BABEL_OP1_204_83643_20130830_005334_outLine +BABEL_OP1_204_83813_20130704_172117_inLine +BABEL_OP1_204_83813_20130704_172117_outLine +BABEL_OP1_204_83851_20130114_065704_inLine +BABEL_OP1_204_83851_20130114_065704_outLine +BABEL_OP1_204_83974_20130607_152537_inLine +BABEL_OP1_204_83974_20130607_152537_outLine +BABEL_OP1_204_84079_20130821_203040_inLine +BABEL_OP1_204_84079_20130821_203040_outLine +BABEL_OP1_204_84194_20130716_194041_inLine +BABEL_OP1_204_84194_20130716_194041_outLine +BABEL_OP1_204_84370_20130310_050228_inLine +BABEL_OP1_204_84370_20130310_050228_outLine +BABEL_OP1_204_84469_20130210_003435_inLine +BABEL_OP1_204_84469_20130210_003435_outLine +BABEL_OP1_204_84541_20130820_230752_inLine +BABEL_OP1_204_84541_20130820_230752_outLine +BABEL_OP1_204_84709_20130518_125528_inLine +BABEL_OP1_204_84709_20130518_125528_outLine +BABEL_OP1_204_84768_20130106_033700_inLine +BABEL_OP1_204_84768_20130106_033700_outLine +BABEL_OP1_204_84823_20130218_212443_inLine +BABEL_OP1_204_84823_20130218_212443_outLine +BABEL_OP1_204_85179_20130209_014947_inLine +BABEL_OP1_204_85179_20130209_014947_outLine +BABEL_OP1_204_85246_20130516_211538_inLine +BABEL_OP1_204_85246_20130516_211538_outLine +BABEL_OP1_204_85254_20130312_035109_inLine +BABEL_OP1_204_85254_20130312_035109_outLine +BABEL_OP1_204_85322_20130107_192937_inLine +BABEL_OP1_204_85322_20130107_192937_outLine +BABEL_OP1_204_85340_20130111_212907_inLine +BABEL_OP1_204_85340_20130111_212907_outLine +BABEL_OP1_204_85519_20130301_161437_inLine +BABEL_OP1_204_85519_20130301_161437_outLine +BABEL_OP1_204_85651_20130216_204250_inLine +BABEL_OP1_204_85651_20130216_204250_outLine +BABEL_OP1_204_86597_20130310_031951_inLine +BABEL_OP1_204_86597_20130310_031951_outLine +BABEL_OP1_204_86722_20130114_025704_inLine +BABEL_OP1_204_86722_20130114_025704_outLine +BABEL_OP1_204_86826_20130627_190707_inLine +BABEL_OP1_204_86826_20130627_190707_outLine +BABEL_OP1_204_86830_20130613_181407_inLine +BABEL_OP1_204_86830_20130613_181407_outLine +BABEL_OP1_204_86845_20130705_192829_inLine +BABEL_OP1_204_86845_20130705_192829_outLine +BABEL_OP1_204_86845_20130705_193447_inLine +BABEL_OP1_204_86845_20130705_193447_outLine +BABEL_OP1_204_86885_20130825_200228_inLine +BABEL_OP1_204_86885_20130825_200228_outLine +BABEL_OP1_204_86952_20121231_204819_inLine +BABEL_OP1_204_86952_20121231_204819_outLine +BABEL_OP1_204_87280_20130209_180508_inLine +BABEL_OP1_204_87280_20130209_180508_outLine +BABEL_OP1_204_87470_20130122_032958_inLine +BABEL_OP1_204_87470_20130122_032958_outLine +BABEL_OP1_204_87889_20130225_183607_inLine +BABEL_OP1_204_87889_20130225_183607_outLine +BABEL_OP1_204_88394_20130708_175704_inLine +BABEL_OP1_204_88394_20130708_175704_outLine +BABEL_OP1_204_88686_20121222_200228_inLine +BABEL_OP1_204_88686_20121222_200228_outLine +BABEL_OP1_204_88873_20130108_214456_inLine +BABEL_OP1_204_88873_20130108_214456_outLine +BABEL_OP1_204_88982_20130129_004023_inLine +BABEL_OP1_204_88982_20130129_004023_outLine +BABEL_OP1_204_89372_20121219_192043_inLine +BABEL_OP1_204_89372_20121219_192043_outLine +BABEL_OP1_204_89665_20130122_035608_inLine +BABEL_OP1_204_89665_20130122_035608_outLine +BABEL_OP1_204_89718_20130821_214732_inLine +BABEL_OP1_204_89718_20130821_214732_outLine +BABEL_OP1_204_89794_20130321_180037_inLine +BABEL_OP1_204_89794_20130321_180037_outLine +BABEL_OP1_204_89794_20130321_181250_inLine +BABEL_OP1_204_89794_20130321_181250_outLine +BABEL_OP1_204_89888_20130115_181504_inLine +BABEL_OP1_204_89888_20130115_181504_outLine +BABEL_OP1_204_90440_20130509_133501_inLine +BABEL_OP1_204_90440_20130509_133501_outLine +BABEL_OP1_204_90740_20130605_163314_inLine +BABEL_OP1_204_90740_20130605_163314_outLine +BABEL_OP1_204_90832_20130310_045516_inLine +BABEL_OP1_204_90832_20130310_045516_outLine +BABEL_OP1_204_90930_20130901_200839_inLine +BABEL_OP1_204_90930_20130901_200839_outLine +BABEL_OP1_204_90935_20130116_220822_inLine +BABEL_OP1_204_90935_20130116_220822_outLine +BABEL_OP1_204_91125_20130112_215414_inLine +BABEL_OP1_204_91125_20130112_215414_outLine +BABEL_OP1_204_91189_20130516_000538_inLine +BABEL_OP1_204_91189_20130516_000538_outLine +BABEL_OP1_204_91252_20130821_000400_inLine +BABEL_OP1_204_91252_20130821_000400_outLine +BABEL_OP1_204_91411_20130710_193521_inLine +BABEL_OP1_204_91411_20130710_193521_outLine +BABEL_OP1_204_91463_20130120_184700_inLine +BABEL_OP1_204_91463_20130120_184700_outLine +BABEL_OP1_204_91581_20130210_013423_inLine +BABEL_OP1_204_91581_20130210_013423_outLine +BABEL_OP1_204_91825_20121224_185428_inLine +BABEL_OP1_204_91825_20121224_185428_outLine +BABEL_OP1_204_91825_20121224_191424_inLine +BABEL_OP1_204_91825_20121224_191424_outLine +BABEL_OP1_204_91884_20130215_205051_inLine +BABEL_OP1_204_91884_20130215_205051_outLine +BABEL_OP1_204_91971_20130818_152604_inLine +BABEL_OP1_204_91971_20130818_152604_outLine +BABEL_OP1_204_92077_20130614_165026_inLine +BABEL_OP1_204_92077_20130614_165026_outLine +BABEL_OP1_204_92176_20130120_165309_inLine +BABEL_OP1_204_92176_20130120_165309_outLine +BABEL_OP1_204_92252_20130812_220232_inLine +BABEL_OP1_204_92252_20130812_220232_outLine +BABEL_OP1_204_92941_20130120_230410_inLine +BABEL_OP1_204_92941_20130120_230410_outLine +BABEL_OP1_204_93320_20130311_022333_inLine +BABEL_OP1_204_93320_20130311_022333_outLine +BABEL_OP1_204_93320_20130311_023402_inLine +BABEL_OP1_204_93320_20130311_023402_outLine +BABEL_OP1_204_93443_20130803_153015_inLine +BABEL_OP1_204_93443_20130803_153015_outLine +BABEL_OP1_204_93858_20130311_005700_inLine +BABEL_OP1_204_93858_20130311_005700_outLine +BABEL_OP1_204_93937_20130313_172438_inLine +BABEL_OP1_204_93937_20130313_172438_outLine +BABEL_OP1_204_93946_20130210_172621_inLine +BABEL_OP1_204_93946_20130210_172621_outLine +BABEL_OP1_204_93946_20130210_175020_inLine +BABEL_OP1_204_93946_20130210_175020_outLine +BABEL_OP1_204_94035_20130704_185858_inLine +BABEL_OP1_204_94035_20130704_185858_outLine +BABEL_OP1_204_94166_20130212_185608_inLine +BABEL_OP1_204_94166_20130212_185608_outLine +BABEL_OP1_204_94212_20130709_195201_inLine +BABEL_OP1_204_94212_20130709_195201_outLine +BABEL_OP1_204_94237_20130227_204940_inLine +BABEL_OP1_204_94237_20130227_204940_outLine +BABEL_OP1_204_94262_20130307_222214_inLine +BABEL_OP1_204_94262_20130307_222214_outLine +BABEL_OP1_204_94409_20130130_012526_inLine +BABEL_OP1_204_94409_20130130_012526_outLine +BABEL_OP1_204_94713_20130710_173705_inLine +BABEL_OP1_204_94713_20130710_173705_outLine +BABEL_OP1_204_94803_20130524_125715_inLine +BABEL_OP1_204_94803_20130524_125715_outLine +BABEL_OP1_204_94891_20130520_200303_inLine +BABEL_OP1_204_94891_20130520_200303_outLine +BABEL_OP1_204_94969_20130516_174057_inLine +BABEL_OP1_204_94969_20130516_174057_outLine +BABEL_OP1_204_95124_20130521_171211_inLine +BABEL_OP1_204_95124_20130521_171211_outLine +BABEL_OP1_204_95269_20130121_040957_inLine +BABEL_OP1_204_95269_20130121_040957_outLine +BABEL_OP1_204_95338_20130617_183230_inLine +BABEL_OP1_204_95338_20130617_183230_outLine +BABEL_OP1_204_95467_20130310_041013_inLine +BABEL_OP1_204_95467_20130310_041013_outLine +BABEL_OP1_204_95571_20130605_173956_inLine +BABEL_OP1_204_95571_20130605_173956_outLine +BABEL_OP1_204_95583_20130107_233706_inLine +BABEL_OP1_204_95583_20130107_233706_outLine +BABEL_OP1_204_95598_20121218_225349_inLine +BABEL_OP1_204_95598_20121218_225349_outLine +BABEL_OP1_204_96504_20130111_012757_inLine +BABEL_OP1_204_96504_20130111_012757_outLine +BABEL_OP1_204_96820_20130120_015641_inLine +BABEL_OP1_204_96820_20130120_015641_outLine +BABEL_OP1_204_96842_20130614_172939_inLine +BABEL_OP1_204_96842_20130614_172939_outLine +BABEL_OP1_204_96934_20130119_033411_inLine +BABEL_OP1_204_96934_20130119_033411_outLine +BABEL_OP1_204_96940_20130520_190004_inLine +BABEL_OP1_204_96940_20130520_190004_outLine +BABEL_OP1_204_97345_20130705_170655_inLine +BABEL_OP1_204_97345_20130705_170655_outLine +BABEL_OP1_204_97448_20130830_013253_inLine +BABEL_OP1_204_97448_20130830_013253_outLine +BABEL_OP1_204_97588_20130106_172133_inLine +BABEL_OP1_204_97588_20130106_172133_outLine +BABEL_OP1_204_97604_20130224_214511_inLine +BABEL_OP1_204_97604_20130224_214511_outLine +BABEL_OP1_204_97911_20130701_170644_inLine +BABEL_OP1_204_97911_20130701_170644_outLine +BABEL_OP1_204_98255_20130716_204027_inLine +BABEL_OP1_204_98255_20130716_204027_outLine +BABEL_OP1_204_98311_20130114_061903_inLine +BABEL_OP1_204_98311_20130114_061903_outLine +BABEL_OP1_204_98390_20130114_195309_inLine +BABEL_OP1_204_98390_20130114_195309_outLine +BABEL_OP1_204_98580_20130130_233406_inLine +BABEL_OP1_204_98580_20130130_233406_outLine +BABEL_OP1_204_98678_20130215_215447_inLine +BABEL_OP1_204_98678_20130215_215447_outLine +BABEL_OP1_204_98888_20130130_200414_inLine +BABEL_OP1_204_98888_20130130_200414_outLine +BABEL_OP1_204_98909_20130118_224024_inLine +BABEL_OP1_204_98909_20130118_224024_outLine +BABEL_OP1_204_99264_20130211_183956_inLine +BABEL_OP1_204_99264_20130211_183956_outLine +BABEL_OP1_204_99344_20130705_180532_inLine +BABEL_OP1_204_99344_20130705_180532_outLine +BABEL_OP1_204_99487_20130111_175232_inLine +BABEL_OP1_204_99487_20130111_175232_outLine +BABEL_OP1_204_99516_20130103_172113_inLine +BABEL_OP1_204_99516_20130103_172113_outLine +BABEL_OP1_204_99718_20130114_221147_inLine +BABEL_OP1_204_99718_20130114_221147_outLine +BABEL_OP1_204_99975_20130812_220558_inLine +BABEL_OP1_204_99975_20130812_220558_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/evalpart1.list b/egs/babel/s5d/conf/lists/204-tamil/evalpart1.list new file mode 100644 index 00000000000..c5dbddb1867 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/evalpart1.list @@ -0,0 +1,186 @@ +BABEL_OP1_204_10638_20130510_124441_inLine +BABEL_OP1_204_10638_20130510_124441_outLine +BABEL_OP1_204_12321_20130220_211618_inLine +BABEL_OP1_204_12321_20130220_211618_outLine +BABEL_OP1_204_12635_20130601_152113_inLine +BABEL_OP1_204_12635_20130601_152113_outLine +BABEL_OP1_204_14350_20130113_023333_inLine +BABEL_OP1_204_14350_20130113_023333_outLine +BABEL_OP1_204_14723_20130710_180819_inLine +BABEL_OP1_204_14723_20130710_180819_outLine +BABEL_OP1_204_14875_20130111_192622_inLine +BABEL_OP1_204_14875_20130111_192622_outLine +BABEL_OP1_204_15227_20130624_180548_inLine +BABEL_OP1_204_15227_20130624_180548_outLine +BABEL_OP1_204_15848_20121218_180011_inLine +BABEL_OP1_204_15848_20121218_180011_outLine +BABEL_OP1_204_16351_20130705_205024_inLine +BABEL_OP1_204_16351_20130705_205024_outLine +BABEL_OP1_204_17165_20130130_191341_inLine +BABEL_OP1_204_17165_20130130_191341_outLine +BABEL_OP1_204_18863_20130210_164314_inLine +BABEL_OP1_204_18863_20130210_164314_outLine +BABEL_OP1_204_19545_20130122_164148_inLine +BABEL_OP1_204_19545_20130122_164148_outLine +BABEL_OP1_204_21029_20130107_212248_inLine +BABEL_OP1_204_21029_20130107_212248_outLine +BABEL_OP1_204_21159_20130607_143737_inLine +BABEL_OP1_204_21159_20130607_143737_outLine +BABEL_OP1_204_21794_20130129_233131_inLine +BABEL_OP1_204_21794_20130129_233131_outLine +BABEL_OP1_204_22641_20121224_195014_inLine +BABEL_OP1_204_22641_20121224_195014_outLine +BABEL_OP1_204_23196_20130605_144617_inLine +BABEL_OP1_204_23196_20130605_144617_outLine +BABEL_OP1_204_23395_20130126_013244_inLine +BABEL_OP1_204_23395_20130126_013244_outLine +BABEL_OP1_204_25068_20130901_230020_inLine +BABEL_OP1_204_25068_20130901_230020_outLine +BABEL_OP1_204_25068_20130901_235001_inLine +BABEL_OP1_204_25068_20130901_235001_outLine +BABEL_OP1_204_28585_20130208_014141_inLine +BABEL_OP1_204_28585_20130208_014141_outLine +BABEL_OP1_204_28871_20121219_184300_inLine +BABEL_OP1_204_28871_20121219_184300_outLine +BABEL_OP1_204_29208_20130127_011057_inLine +BABEL_OP1_204_29208_20130127_011057_outLine +BABEL_OP1_204_29352_20130628_145610_inLine +BABEL_OP1_204_29352_20130628_145610_outLine +BABEL_OP1_204_29777_20130211_193239_inLine +BABEL_OP1_204_29777_20130211_193239_outLine +BABEL_OP1_204_32832_20130208_200126_inLine +BABEL_OP1_204_32832_20130208_200126_outLine +BABEL_OP1_204_32961_20130518_164254_inLine +BABEL_OP1_204_32961_20130518_164254_outLine +BABEL_OP1_204_33635_20130127_024601_inLine +BABEL_OP1_204_33635_20130127_024601_outLine +BABEL_OP1_204_37281_20130131_020847_inLine +BABEL_OP1_204_37281_20130131_020847_outLine +BABEL_OP1_204_39579_20130724_163251_inLine +BABEL_OP1_204_39579_20130724_163251_outLine +BABEL_OP1_204_41493_20121218_185431_inLine +BABEL_OP1_204_41493_20121218_185431_outLine +BABEL_OP1_204_44255_20130225_230219_inLine +BABEL_OP1_204_44255_20130225_230219_outLine +BABEL_OP1_204_44681_20130830_000000_inLine +BABEL_OP1_204_44681_20130830_000000_outLine +BABEL_OP1_204_45106_20130325_003034_inLine +BABEL_OP1_204_45106_20130325_003034_outLine +BABEL_OP1_204_45106_20130325_004324_inLine +BABEL_OP1_204_45106_20130325_004324_outLine +BABEL_OP1_204_46202_20130524_162004_inLine +BABEL_OP1_204_46202_20130524_162004_outLine +BABEL_OP1_204_46625_20121219_193926_inLine +BABEL_OP1_204_46625_20121219_193926_outLine +BABEL_OP1_204_47882_20130705_203354_inLine +BABEL_OP1_204_47882_20130705_203354_outLine +BABEL_OP1_204_48016_20130311_033904_inLine +BABEL_OP1_204_48016_20130311_033904_outLine +BABEL_OP1_204_48399_20130112_205650_inLine +BABEL_OP1_204_48399_20130112_205650_outLine +BABEL_OP1_204_50958_20130129_195029_inLine +BABEL_OP1_204_50958_20130129_195029_outLine +BABEL_OP1_204_53206_20130704_211512_inLine +BABEL_OP1_204_53206_20130704_211512_outLine +BABEL_OP1_204_56019_20130301_165116_inLine +BABEL_OP1_204_56019_20130301_165116_outLine +BABEL_OP1_204_57219_20130311_044204_inLine +BABEL_OP1_204_57219_20130311_044204_outLine +BABEL_OP1_204_57609_20130122_194937_inLine +BABEL_OP1_204_57609_20130122_194937_outLine +BABEL_OP1_204_57654_20130114_074621_inLine +BABEL_OP1_204_57654_20130114_074621_outLine +BABEL_OP1_204_59993_20130104_172518_inLine +BABEL_OP1_204_59993_20130104_172518_outLine +BABEL_OP1_204_62155_20130215_213833_inLine +BABEL_OP1_204_62155_20130215_213833_outLine +BABEL_OP1_204_63481_20121224_021602_inLine +BABEL_OP1_204_63481_20121224_021602_outLine +BABEL_OP1_204_63523_20130301_162515_inLine +BABEL_OP1_204_63523_20130301_162515_outLine +BABEL_OP1_204_65339_20130821_194428_inLine +BABEL_OP1_204_65339_20130821_194428_outLine +BABEL_OP1_204_67592_20130211_032508_inLine +BABEL_OP1_204_67592_20130211_032508_outLine +BABEL_OP1_204_69972_20130802_175346_inLine +BABEL_OP1_204_69972_20130802_175346_outLine +BABEL_OP1_204_69982_20130310_050949_inLine +BABEL_OP1_204_69982_20130310_050949_outLine +BABEL_OP1_204_70110_20121219_223303_inLine +BABEL_OP1_204_70110_20121219_223303_outLine +BABEL_OP1_204_71038_20130225_191007_inLine +BABEL_OP1_204_71038_20130225_191007_outLine +BABEL_OP1_204_71333_20130111_181914_inLine +BABEL_OP1_204_71333_20130111_181914_outLine +BABEL_OP1_204_71704_20130114_182140_inLine +BABEL_OP1_204_71704_20130114_182140_outLine +BABEL_OP1_204_71754_20130822_005036_inLine +BABEL_OP1_204_71754_20130822_005036_outLine +BABEL_OP1_204_73622_20130108_180939_inLine +BABEL_OP1_204_73622_20130108_180939_outLine +BABEL_OP1_204_73837_20130115_213251_inLine +BABEL_OP1_204_73837_20130115_213251_outLine +BABEL_OP1_204_77909_20130822_005415_inLine +BABEL_OP1_204_77909_20130822_005415_outLine +BABEL_OP1_204_81427_20130118_211419_inLine +BABEL_OP1_204_81427_20130118_211419_outLine +BABEL_OP1_204_84370_20130310_050228_inLine +BABEL_OP1_204_84370_20130310_050228_outLine +BABEL_OP1_204_84709_20130518_125528_inLine +BABEL_OP1_204_84709_20130518_125528_outLine +BABEL_OP1_204_84823_20130218_212443_inLine +BABEL_OP1_204_84823_20130218_212443_outLine +BABEL_OP1_204_86830_20130613_181407_inLine +BABEL_OP1_204_86830_20130613_181407_outLine +BABEL_OP1_204_88394_20130708_175704_inLine +BABEL_OP1_204_88394_20130708_175704_outLine +BABEL_OP1_204_88686_20121222_200228_inLine +BABEL_OP1_204_88686_20121222_200228_outLine +BABEL_OP1_204_88873_20130108_214456_inLine +BABEL_OP1_204_88873_20130108_214456_outLine +BABEL_OP1_204_88982_20130129_004023_inLine +BABEL_OP1_204_88982_20130129_004023_outLine +BABEL_OP1_204_89372_20121219_192043_inLine +BABEL_OP1_204_89372_20121219_192043_outLine +BABEL_OP1_204_89718_20130821_214732_inLine +BABEL_OP1_204_89718_20130821_214732_outLine +BABEL_OP1_204_89794_20130321_180037_inLine +BABEL_OP1_204_89794_20130321_180037_outLine +BABEL_OP1_204_89794_20130321_181250_inLine +BABEL_OP1_204_89794_20130321_181250_outLine +BABEL_OP1_204_90930_20130901_200839_inLine +BABEL_OP1_204_90930_20130901_200839_outLine +BABEL_OP1_204_90935_20130116_220822_inLine +BABEL_OP1_204_90935_20130116_220822_outLine +BABEL_OP1_204_91252_20130821_000400_inLine +BABEL_OP1_204_91252_20130821_000400_outLine +BABEL_OP1_204_91884_20130215_205051_inLine +BABEL_OP1_204_91884_20130215_205051_outLine +BABEL_OP1_204_91971_20130818_152604_inLine +BABEL_OP1_204_91971_20130818_152604_outLine +BABEL_OP1_204_92176_20130120_165309_inLine +BABEL_OP1_204_92176_20130120_165309_outLine +BABEL_OP1_204_92941_20130120_230410_inLine +BABEL_OP1_204_92941_20130120_230410_outLine +BABEL_OP1_204_94166_20130212_185608_inLine +BABEL_OP1_204_94166_20130212_185608_outLine +BABEL_OP1_204_94212_20130709_195201_inLine +BABEL_OP1_204_94212_20130709_195201_outLine +BABEL_OP1_204_95598_20121218_225349_inLine +BABEL_OP1_204_95598_20121218_225349_outLine +BABEL_OP1_204_96934_20130119_033411_inLine +BABEL_OP1_204_96934_20130119_033411_outLine +BABEL_OP1_204_97345_20130705_170655_inLine +BABEL_OP1_204_97345_20130705_170655_outLine +BABEL_OP1_204_97448_20130830_013253_inLine +BABEL_OP1_204_97448_20130830_013253_outLine +BABEL_OP1_204_98580_20130130_233406_inLine +BABEL_OP1_204_98580_20130130_233406_outLine +BABEL_OP1_204_98888_20130130_200414_inLine +BABEL_OP1_204_98888_20130130_200414_outLine +BABEL_OP1_204_99264_20130211_183956_inLine +BABEL_OP1_204_99264_20130211_183956_outLine +BABEL_OP1_204_99344_20130705_180532_inLine +BABEL_OP1_204_99344_20130705_180532_outLine +BABEL_OP1_204_99516_20130103_172113_inLine +BABEL_OP1_204_99516_20130103_172113_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.FullLP.list b/egs/babel/s5d/conf/lists/204-tamil/train.FullLP.list new file mode 100644 index 00000000000..84a8b1815a2 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.FullLP.list @@ -0,0 +1,778 @@ +BABEL_OP1_204_10002_20130523_142107_inLine +BABEL_OP1_204_10002_20130523_142107_outLine +BABEL_OP1_204_10036_20130116_163652_inLine +BABEL_OP1_204_10036_20130116_163652_outLine +BABEL_OP1_204_10184_20130217_232154_inLine +BABEL_OP1_204_10184_20130217_232154_outLine +BABEL_OP1_204_10411_20130313_042405_inLine +BABEL_OP1_204_10411_20130313_042405_outLine +BABEL_OP1_204_10469_20130708_201653_inLine +BABEL_OP1_204_10469_20130708_201653_outLine +BABEL_OP1_204_10647_20130225_175457_inLine +BABEL_OP1_204_10647_20130225_175457_outLine +BABEL_OP1_204_10647_20130225_184106_inLine +BABEL_OP1_204_10647_20130225_184106_outLine +BABEL_OP1_204_10901_20130120_220533_inLine +BABEL_OP1_204_10901_20130120_220533_outLine +BABEL_OP1_204_10938_20130118_213056_inLine +BABEL_OP1_204_10938_20130118_213056_outLine +BABEL_OP1_204_10966_20130114_210156_inLine +BABEL_OP1_204_10966_20130114_210156_outLine +BABEL_OP1_204_11310_20130705_180254_inLine +BABEL_OP1_204_11310_20130705_180254_outLine +BABEL_OP1_204_11352_20130220_023807_inLine +BABEL_OP1_204_11352_20130220_023807_outLine +BABEL_OP1_204_11486_20130607_155406_inLine +BABEL_OP1_204_11486_20130607_155406_outLine +BABEL_OP1_204_11581_20130222_215500_inLine +BABEL_OP1_204_11581_20130222_215500_outLine +BABEL_OP1_204_11581_20130222_220101_inLine +BABEL_OP1_204_11581_20130222_220101_outLine +BABEL_OP1_204_11663_20130319_201815_inLine +BABEL_OP1_204_11663_20130319_201815_outLine +BABEL_OP1_204_11673_20121220_214236_inLine +BABEL_OP1_204_11673_20121220_214236_outLine +BABEL_OP1_204_11723_20130803_144247_inLine +BABEL_OP1_204_11723_20130803_144247_outLine +BABEL_OP1_204_11797_20130107_214732_inLine +BABEL_OP1_204_11797_20130107_214732_outLine +BABEL_OP1_204_12036_20130102_170500_inLine +BABEL_OP1_204_12036_20130102_170500_outLine +BABEL_OP1_204_12036_20130102_171149_inLine +BABEL_OP1_204_12036_20130102_171149_outLine +BABEL_OP1_204_12220_20130120_183204_inLine +BABEL_OP1_204_12220_20130120_183204_outLine +BABEL_OP1_204_12242_20130111_014802_inLine +BABEL_OP1_204_12242_20130111_014802_outLine +BABEL_OP1_204_12846_20130515_220132_inLine +BABEL_OP1_204_12846_20130515_220132_outLine +BABEL_OP1_204_12851_20121219_172018_inLine +BABEL_OP1_204_12851_20121219_172018_outLine +BABEL_OP1_204_13030_20130120_210514_inLine +BABEL_OP1_204_13184_20130228_032847_inLine +BABEL_OP1_204_13184_20130228_032847_outLine +BABEL_OP1_204_13324_20130103_211640_inLine +BABEL_OP1_204_13324_20130103_211640_outLine +BABEL_OP1_204_13490_20130314_031843_inLine +BABEL_OP1_204_13490_20130314_031843_outLine +BABEL_OP1_204_13744_20130106_232543_inLine +BABEL_OP1_204_13744_20130106_232543_outLine +BABEL_OP1_204_13776_20130626_215241_inLine +BABEL_OP1_204_13776_20130626_215241_outLine +BABEL_OP1_204_13792_20121231_015544_inLine +BABEL_OP1_204_13792_20121231_015544_outLine +BABEL_OP1_204_14719_20130219_231741_inLine +BABEL_OP1_204_14719_20130219_231741_outLine +BABEL_OP1_204_14719_20130219_232513_inLine +BABEL_OP1_204_14719_20130219_232513_outLine +BABEL_OP1_204_14725_20130111_204740_inLine +BABEL_OP1_204_14725_20130111_204740_outLine +BABEL_OP1_204_14807_20130222_213831_inLine +BABEL_OP1_204_14807_20130222_213831_outLine +BABEL_OP1_204_15730_20130103_154749_inLine +BABEL_OP1_204_15730_20130103_154749_outLine +BABEL_OP1_204_15985_20130627_154935_inLine +BABEL_OP1_204_15985_20130627_154935_outLine +BABEL_OP1_204_16249_20130906_003049_inLine +BABEL_OP1_204_16249_20130906_003049_outLine +BABEL_OP1_204_16726_20130815_164352_inLine +BABEL_OP1_204_16726_20130815_164352_outLine +BABEL_OP1_204_16800_20130307_025108_inLine +BABEL_OP1_204_16800_20130307_025108_outLine +BABEL_OP1_204_16802_20130821_234724_inLine +BABEL_OP1_204_16802_20130821_234724_outLine +BABEL_OP1_204_16838_20130703_183021_inLine +BABEL_OP1_204_16838_20130703_183021_outLine +BABEL_OP1_204_17032_20130129_012026_inLine +BABEL_OP1_204_17032_20130129_012026_outLine +BABEL_OP1_204_17420_20130426_172522_inLine +BABEL_OP1_204_17420_20130426_172522_outLine +BABEL_OP1_204_17420_20130426_174314_inLine +BABEL_OP1_204_17420_20130426_174314_outLine +BABEL_OP1_204_17496_20130325_015543_inLine +BABEL_OP1_204_17496_20130325_015543_outLine +BABEL_OP1_204_18037_20130825_200728_inLine +BABEL_OP1_204_18037_20130825_200728_outLine +BABEL_OP1_204_18280_20130818_172915_inLine +BABEL_OP1_204_18280_20130818_172915_outLine +BABEL_OP1_204_18939_20130110_214704_inLine +BABEL_OP1_204_18939_20130110_214704_outLine +BABEL_OP1_204_18992_20130830_001646_inLine +BABEL_OP1_204_18992_20130830_001646_outLine +BABEL_OP1_204_19134_20130120_191037_inLine +BABEL_OP1_204_19134_20130120_191037_outLine +BABEL_OP1_204_19461_20130704_154920_inLine +BABEL_OP1_204_19461_20130704_154920_outLine +BABEL_OP1_204_19589_20130304_020747_inLine +BABEL_OP1_204_19589_20130304_020747_outLine +BABEL_OP1_204_19688_20130708_194740_inLine +BABEL_OP1_204_19688_20130708_194740_outLine +BABEL_OP1_204_20330_20130217_225055_inLine +BABEL_OP1_204_20330_20130217_225055_outLine +BABEL_OP1_204_20367_20130312_024055_inLine +BABEL_OP1_204_20367_20130312_024055_outLine +BABEL_OP1_204_20437_20130523_235611_inLine +BABEL_OP1_204_20437_20130523_235611_outLine +BABEL_OP1_204_20721_20130704_183621_inLine +BABEL_OP1_204_20721_20130704_183621_outLine +BABEL_OP1_204_20916_20121218_174604_inLine +BABEL_OP1_204_20916_20121218_174604_outLine +BABEL_OP1_204_20985_20130129_225135_inLine +BABEL_OP1_204_20985_20130129_225135_outLine +BABEL_OP1_204_21426_20130515_212900_inLine +BABEL_OP1_204_21426_20130515_212900_outLine +BABEL_OP1_204_21435_20130215_200722_inLine +BABEL_OP1_204_21435_20130215_200722_outLine +BABEL_OP1_204_21543_20130901_203127_inLine +BABEL_OP1_204_21543_20130901_203127_outLine +BABEL_OP1_204_21807_20130127_033626_inLine +BABEL_OP1_204_21807_20130127_033626_outLine +BABEL_OP1_204_21807_20130127_041609_inLine +BABEL_OP1_204_21807_20130127_041609_outLine +BABEL_OP1_204_22321_20130107_231204_inLine +BABEL_OP1_204_22321_20130107_231204_outLine +BABEL_OP1_204_22643_20130709_192909_inLine +BABEL_OP1_204_22643_20130709_192909_outLine +BABEL_OP1_204_23006_20130115_200742_inLine +BABEL_OP1_204_23006_20130115_200742_outLine +BABEL_OP1_204_23046_20130114_165057_inLine +BABEL_OP1_204_23046_20130114_165057_outLine +BABEL_OP1_204_23153_20130128_223235_inLine +BABEL_OP1_204_23153_20130128_223235_outLine +BABEL_OP1_204_23190_20130116_191153_inLine +BABEL_OP1_204_23190_20130116_191153_outLine +BABEL_OP1_204_23752_20130517_181521_inLine +BABEL_OP1_204_23752_20130517_181521_outLine +BABEL_OP1_204_23980_20130127_031636_inLine +BABEL_OP1_204_23980_20130127_031636_outLine +BABEL_OP1_204_24010_20130510_160627_inLine +BABEL_OP1_204_24010_20130510_160627_outLine +BABEL_OP1_204_24221_20130803_162307_inLine +BABEL_OP1_204_24221_20130803_162307_outLine +BABEL_OP1_204_24231_20130702_165725_inLine +BABEL_OP1_204_24231_20130702_165725_outLine +BABEL_OP1_204_24253_20130216_173828_inLine +BABEL_OP1_204_24253_20130216_173828_outLine +BABEL_OP1_204_24587_20130812_201846_inLine +BABEL_OP1_204_24587_20130812_201846_outLine +BABEL_OP1_204_24605_20130111_185213_inLine +BABEL_OP1_204_24605_20130111_185213_outLine +BABEL_OP1_204_26381_20130906_003653_inLine +BABEL_OP1_204_26381_20130906_003653_outLine +BABEL_OP1_204_26388_20121222_180059_inLine +BABEL_OP1_204_26388_20121222_180059_outLine +BABEL_OP1_204_26478_20130628_163250_inLine +BABEL_OP1_204_26478_20130628_163250_outLine +BABEL_OP1_204_27042_20130215_015654_inLine +BABEL_OP1_204_27042_20130215_015654_outLine +BABEL_OP1_204_27203_20130123_034459_inLine +BABEL_OP1_204_27203_20130123_034459_outLine +BABEL_OP1_204_27478_20130219_233409_inLine +BABEL_OP1_204_27478_20130219_233409_outLine +BABEL_OP1_204_27841_20130225_192938_inLine +BABEL_OP1_204_27841_20130225_192938_outLine +BABEL_OP1_204_28522_20130130_021159_inLine +BABEL_OP1_204_28522_20130130_021159_outLine +BABEL_OP1_204_28595_20130515_165745_inLine +BABEL_OP1_204_28595_20130515_165745_outLine +BABEL_OP1_204_29135_20121226_012303_inLine +BABEL_OP1_204_29404_20130225_222910_inLine +BABEL_OP1_204_29404_20130225_222910_outLine +BABEL_OP1_204_29633_20130219_205935_inLine +BABEL_OP1_204_29633_20130219_205935_outLine +BABEL_OP1_204_29911_20130704_163449_inLine +BABEL_OP1_204_29911_20130704_163449_outLine +BABEL_OP1_204_30013_20130129_224621_inLine +BABEL_OP1_204_30013_20130129_224621_outLine +BABEL_OP1_204_30098_20130302_223148_inLine +BABEL_OP1_204_30098_20130302_223148_outLine +BABEL_OP1_204_30345_20130211_192641_inLine +BABEL_OP1_204_30345_20130211_192641_outLine +BABEL_OP1_204_30432_20130128_194847_inLine +BABEL_OP1_204_30432_20130128_194847_outLine +BABEL_OP1_204_31039_20130817_183417_inLine +BABEL_OP1_204_31039_20130817_183417_outLine +BABEL_OP1_204_31490_20130106_234029_inLine +BABEL_OP1_204_31490_20130106_234029_outLine +BABEL_OP1_204_31728_20130730_183500_inLine +BABEL_OP1_204_31728_20130730_183500_outLine +BABEL_OP1_204_32097_20121218_192753_inLine +BABEL_OP1_204_32097_20121218_192753_outLine +BABEL_OP1_204_32122_20130119_232805_inLine +BABEL_OP1_204_32122_20130119_232805_outLine +BABEL_OP1_204_32169_20130820_205304_inLine +BABEL_OP1_204_32169_20130820_205304_outLine +BABEL_OP1_204_32244_20130617_175424_inLine +BABEL_OP1_204_32244_20130617_175424_outLine +BABEL_OP1_204_32328_20130218_020809_inLine +BABEL_OP1_204_32328_20130218_020809_outLine +BABEL_OP1_204_32727_20130224_174507_inLine +BABEL_OP1_204_32727_20130224_174507_outLine +BABEL_OP1_204_33273_20130126_234135_inLine +BABEL_OP1_204_33273_20130126_234135_outLine +BABEL_OP1_204_33355_20130110_222048_inLine +BABEL_OP1_204_33355_20130110_222048_outLine +BABEL_OP1_204_33424_20130617_192727_inLine +BABEL_OP1_204_33424_20130617_192727_outLine +BABEL_OP1_204_33774_20130601_164240_inLine +BABEL_OP1_204_33774_20130601_164240_outLine +BABEL_OP1_204_33806_20130310_041206_inLine +BABEL_OP1_204_33806_20130310_041206_outLine +BABEL_OP1_204_33913_20130205_155246_inLine +BABEL_OP1_204_34197_20121229_204615_inLine +BABEL_OP1_204_34197_20121229_204615_outLine +BABEL_OP1_204_34486_20130626_205810_inLine +BABEL_OP1_204_34486_20130626_205810_outLine +BABEL_OP1_204_34688_20121231_163152_inLine +BABEL_OP1_204_34713_20130516_164824_inLine +BABEL_OP1_204_34713_20130516_164824_outLine +BABEL_OP1_204_34811_20130130_015529_inLine +BABEL_OP1_204_34811_20130130_015529_outLine +BABEL_OP1_204_34860_20130524_205736_inLine +BABEL_OP1_204_34860_20130524_205736_outLine +BABEL_OP1_204_35000_20130217_021526_inLine +BABEL_OP1_204_35000_20130217_021526_outLine +BABEL_OP1_204_36293_20130107_173251_inLine +BABEL_OP1_204_36293_20130107_173251_outLine +BABEL_OP1_204_37228_20130224_205648_inLine +BABEL_OP1_204_37228_20130224_205648_outLine +BABEL_OP1_204_38588_20130119_231312_inLine +BABEL_OP1_204_38588_20130119_231312_outLine +BABEL_OP1_204_38664_20130116_202337_inLine +BABEL_OP1_204_38664_20130116_202337_outLine +BABEL_OP1_204_38750_20130208_003349_inLine +BABEL_OP1_204_38750_20130208_003349_outLine +BABEL_OP1_204_39099_20130302_210320_inLine +BABEL_OP1_204_39099_20130302_210320_outLine +BABEL_OP1_204_39307_20130104_021512_inLine +BABEL_OP1_204_39307_20130104_021512_outLine +BABEL_OP1_204_39638_20130605_153521_inLine +BABEL_OP1_204_39638_20130605_153521_outLine +BABEL_OP1_204_39848_20130130_204605_inLine +BABEL_OP1_204_39848_20130130_204605_outLine +BABEL_OP1_204_39893_20130313_023055_inLine +BABEL_OP1_204_39893_20130313_023055_outLine +BABEL_OP1_204_40196_20130902_001447_inLine +BABEL_OP1_204_40196_20130902_001447_outLine +BABEL_OP1_204_40565_20130129_202204_inLine +BABEL_OP1_204_40565_20130129_202204_outLine +BABEL_OP1_204_40648_20130710_170435_inLine +BABEL_OP1_204_40648_20130710_170435_outLine +BABEL_OP1_204_40686_20130704_204726_inLine +BABEL_OP1_204_40686_20130704_204726_outLine +BABEL_OP1_204_41233_20130209_215355_inLine +BABEL_OP1_204_41233_20130209_215355_outLine +BABEL_OP1_204_41334_20130311_032651_inLine +BABEL_OP1_204_41334_20130311_032651_outLine +BABEL_OP1_204_41598_20130227_193020_inLine +BABEL_OP1_204_41598_20130227_193020_outLine +BABEL_OP1_204_41720_20130524_184216_inLine +BABEL_OP1_204_41720_20130524_184216_outLine +BABEL_OP1_204_41890_20130227_233410_inLine +BABEL_OP1_204_41890_20130227_233410_outLine +BABEL_OP1_204_41920_20130101_031856_inLine +BABEL_OP1_204_41958_20130120_013639_inLine +BABEL_OP1_204_41958_20130120_013639_outLine +BABEL_OP1_204_41958_20130120_014156_inLine +BABEL_OP1_204_41958_20130120_014156_outLine +BABEL_OP1_204_41958_20130120_015222_inLine +BABEL_OP1_204_41958_20130120_015222_outLine +BABEL_OP1_204_42299_20130613_164705_inLine +BABEL_OP1_204_42299_20130613_164705_outLine +BABEL_OP1_204_42526_20130225_185629_inLine +BABEL_OP1_204_42526_20130225_185629_outLine +BABEL_OP1_204_42942_20130127_014343_inLine +BABEL_OP1_204_42942_20130127_014343_outLine +BABEL_OP1_204_43157_20130514_222203_inLine +BABEL_OP1_204_43157_20130514_222203_outLine +BABEL_OP1_204_43286_20130104_031805_inLine +BABEL_OP1_204_43286_20130104_031805_outLine +BABEL_OP1_204_43323_20130523_152627_inLine +BABEL_OP1_204_43323_20130523_152627_outLine +BABEL_OP1_204_43368_20130118_201259_inLine +BABEL_OP1_204_43368_20130118_201259_outLine +BABEL_OP1_204_43794_20130627_212826_inLine +BABEL_OP1_204_43794_20130627_212826_outLine +BABEL_OP1_204_44347_20130220_035919_inLine +BABEL_OP1_204_44347_20130220_035919_outLine +BABEL_OP1_204_44898_20130705_195912_inLine +BABEL_OP1_204_44898_20130705_195912_outLine +BABEL_OP1_204_45121_20130618_153308_inLine +BABEL_OP1_204_45121_20130618_153308_outLine +BABEL_OP1_204_45374_20130906_011341_inLine +BABEL_OP1_204_45374_20130906_011341_outLine +BABEL_OP1_204_45459_20130302_031028_inLine +BABEL_OP1_204_45459_20130302_031028_outLine +BABEL_OP1_204_45699_20130815_000115_inLine +BABEL_OP1_204_45699_20130815_000115_outLine +BABEL_OP1_204_45770_20130116_214623_inLine +BABEL_OP1_204_45770_20130116_214623_outLine +BABEL_OP1_204_46066_20130226_201734_inLine +BABEL_OP1_204_46066_20130226_201734_outLine +BABEL_OP1_204_46169_20130218_214523_inLine +BABEL_OP1_204_46558_20130103_175101_inLine +BABEL_OP1_204_46558_20130103_175101_outLine +BABEL_OP1_204_46905_20130704_183507_inLine +BABEL_OP1_204_46905_20130704_183507_outLine +BABEL_OP1_204_47156_20130310_000732_inLine +BABEL_OP1_204_47156_20130310_000732_outLine +BABEL_OP1_204_47283_20130102_220157_inLine +BABEL_OP1_204_47283_20130102_220157_outLine +BABEL_OP1_204_47802_20130614_155949_inLine +BABEL_OP1_204_47802_20130614_155949_outLine +BABEL_OP1_204_47823_20130209_191710_inLine +BABEL_OP1_204_47823_20130209_191710_outLine +BABEL_OP1_204_47878_20130128_213649_inLine +BABEL_OP1_204_47878_20130128_213649_outLine +BABEL_OP1_204_47878_20130128_214921_inLine +BABEL_OP1_204_47878_20130128_214921_outLine +BABEL_OP1_204_47923_20130812_172435_inLine +BABEL_OP1_204_47923_20130812_172435_outLine +BABEL_OP1_204_48299_20130531_202054_inLine +BABEL_OP1_204_48299_20130531_202054_outLine +BABEL_OP1_204_48610_20130114_165811_inLine +BABEL_OP1_204_48610_20130114_165811_outLine +BABEL_OP1_204_49027_20130606_142005_inLine +BABEL_OP1_204_49027_20130606_142005_outLine +BABEL_OP1_204_49768_20130115_220927_inLine +BABEL_OP1_204_49768_20130115_220927_outLine +BABEL_OP1_204_49775_20121219_214712_inLine +BABEL_OP1_204_49912_20130313_040643_inLine +BABEL_OP1_204_49912_20130313_040643_outLine +BABEL_OP1_204_49945_20130624_173403_inLine +BABEL_OP1_204_49945_20130624_173403_outLine +BABEL_OP1_204_50745_20130216_163145_inLine +BABEL_OP1_204_50745_20130216_163145_outLine +BABEL_OP1_204_50810_20121218_184451_inLine +BABEL_OP1_204_50810_20121218_184451_outLine +BABEL_OP1_204_51156_20130821_223730_inLine +BABEL_OP1_204_51156_20130821_223730_outLine +BABEL_OP1_204_51185_20130517_170655_inLine +BABEL_OP1_204_51185_20130517_170655_outLine +BABEL_OP1_204_51407_20130127_042921_inLine +BABEL_OP1_204_51407_20130127_044800_inLine +BABEL_OP1_204_52301_20130113_034941_inLine +BABEL_OP1_204_52301_20130113_034941_outLine +BABEL_OP1_204_52322_20130524_175752_inLine +BABEL_OP1_204_52322_20130524_175752_outLine +BABEL_OP1_204_52717_20130107_043805_inLine +BABEL_OP1_204_52717_20130107_043805_outLine +BABEL_OP1_204_52803_20130802_163814_inLine +BABEL_OP1_204_52803_20130802_163814_outLine +BABEL_OP1_204_52804_20130103_212424_inLine +BABEL_OP1_204_52804_20130103_212424_outLine +BABEL_OP1_204_53068_20130830_003817_inLine +BABEL_OP1_204_53068_20130830_003817_outLine +BABEL_OP1_204_53144_20130217_224136_inLine +BABEL_OP1_204_53144_20130217_224136_outLine +BABEL_OP1_204_53144_20130217_225527_inLine +BABEL_OP1_204_53144_20130217_225527_outLine +BABEL_OP1_204_53441_20130825_001938_inLine +BABEL_OP1_204_53441_20130825_001938_outLine +BABEL_OP1_204_53917_20130217_215053_inLine +BABEL_OP1_204_53917_20130217_215053_outLine +BABEL_OP1_204_54066_20130514_211116_inLine +BABEL_OP1_204_54066_20130514_211116_outLine +BABEL_OP1_204_54074_20130131_005828_inLine +BABEL_OP1_204_54074_20130131_005828_outLine +BABEL_OP1_204_54104_20130107_180959_inLine +BABEL_OP1_204_54104_20130107_180959_outLine +BABEL_OP1_204_54162_20130130_185332_inLine +BABEL_OP1_204_54162_20130130_185332_outLine +BABEL_OP1_204_54390_20130104_174530_inLine +BABEL_OP1_204_54390_20130104_174530_outLine +BABEL_OP1_204_54567_20130222_184721_inLine +BABEL_OP1_204_54567_20130222_184721_outLine +BABEL_OP1_204_54594_20130704_191249_inLine +BABEL_OP1_204_54594_20130704_191249_outLine +BABEL_OP1_204_54634_20130626_181537_inLine +BABEL_OP1_204_54634_20130626_181537_outLine +BABEL_OP1_204_54923_20130313_190841_inLine +BABEL_OP1_204_54923_20130313_190841_outLine +BABEL_OP1_204_54923_20130313_192534_inLine +BABEL_OP1_204_54923_20130313_192534_outLine +BABEL_OP1_204_54923_20130313_194117_inLine +BABEL_OP1_204_54923_20130313_194117_outLine +BABEL_OP1_204_55259_20130119_230219_inLine +BABEL_OP1_204_55259_20130119_230219_outLine +BABEL_OP1_204_55815_20130821_003003_inLine +BABEL_OP1_204_55815_20130821_003003_outLine +BABEL_OP1_204_56023_20130216_222455_inLine +BABEL_OP1_204_56023_20130216_222455_outLine +BABEL_OP1_204_56117_20130815_152303_inLine +BABEL_OP1_204_56117_20130815_152303_outLine +BABEL_OP1_204_56326_20130704_194950_inLine +BABEL_OP1_204_56326_20130704_194950_outLine +BABEL_OP1_204_56606_20130730_211609_inLine +BABEL_OP1_204_56606_20130730_211609_outLine +BABEL_OP1_204_56925_20130901_220934_inLine +BABEL_OP1_204_56925_20130901_220934_outLine +BABEL_OP1_204_57067_20130227_191402_outLine +BABEL_OP1_204_57233_20130524_200041_inLine +BABEL_OP1_204_57233_20130524_200041_outLine +BABEL_OP1_204_57782_20130417_212234_inLine +BABEL_OP1_204_57782_20130417_212234_outLine +BABEL_OP1_204_57887_20130705_183438_inLine +BABEL_OP1_204_57887_20130705_183438_outLine +BABEL_OP1_204_58006_20130325_011740_inLine +BABEL_OP1_204_58006_20130325_011740_outLine +BABEL_OP1_204_58026_20130310_194418_inLine +BABEL_OP1_204_58026_20130310_194418_outLine +BABEL_OP1_204_58103_20130118_221354_inLine +BABEL_OP1_204_58103_20130118_221354_outLine +BABEL_OP1_204_58313_20130127_023416_inLine +BABEL_OP1_204_58313_20130127_023416_outLine +BABEL_OP1_204_58489_20130209_220922_inLine +BABEL_OP1_204_58489_20130209_220922_outLine +BABEL_OP1_204_58489_20130209_221554_inLine +BABEL_OP1_204_58489_20130209_221554_outLine +BABEL_OP1_204_58636_20130812_211303_inLine +BABEL_OP1_204_58636_20130812_211303_outLine +BABEL_OP1_204_58734_20130108_172420_inLine +BABEL_OP1_204_58734_20130108_172420_outLine +BABEL_OP1_204_59028_20130507_123451_inLine +BABEL_OP1_204_59028_20130507_123451_outLine +BABEL_OP1_204_59291_20130719_200731_inLine +BABEL_OP1_204_59291_20130719_200731_outLine +BABEL_OP1_204_59307_20130218_000435_inLine +BABEL_OP1_204_59307_20130218_000435_outLine +BABEL_OP1_204_59307_20130218_001152_inLine +BABEL_OP1_204_59307_20130218_001152_outLine +BABEL_OP1_204_59685_20130812_185114_inLine +BABEL_OP1_204_59685_20130812_185114_outLine +BABEL_OP1_204_59864_20130302_195039_inLine +BABEL_OP1_204_59864_20130302_195039_outLine +BABEL_OP1_204_59928_20130103_190414_inLine +BABEL_OP1_204_59928_20130103_190414_outLine +BABEL_OP1_204_60026_20130107_002905_inLine +BABEL_OP1_204_60026_20130107_002905_outLine +BABEL_OP1_204_60282_20130815_161243_inLine +BABEL_OP1_204_60282_20130815_161243_outLine +BABEL_OP1_204_60299_20130313_025357_inLine +BABEL_OP1_204_60299_20130313_025357_outLine +BABEL_OP1_204_60299_20130313_030001_inLine +BABEL_OP1_204_60299_20130313_030001_outLine +BABEL_OP1_204_60397_20130822_013145_inLine +BABEL_OP1_204_60397_20130822_013145_outLine +BABEL_OP1_204_60477_20130521_010650_inLine +BABEL_OP1_204_60477_20130521_010650_outLine +BABEL_OP1_204_61190_20130111_183015_inLine +BABEL_OP1_204_61190_20130111_183015_outLine +BABEL_OP1_204_61435_20130217_214434_inLine +BABEL_OP1_204_61435_20130217_214434_outLine +BABEL_OP1_204_61438_20130719_233853_inLine +BABEL_OP1_204_61438_20130719_233853_outLine +BABEL_OP1_204_61731_20130107_035739_inLine +BABEL_OP1_204_61731_20130107_035739_outLine +BABEL_OP1_204_62177_20130719_152209_inLine +BABEL_OP1_204_62177_20130719_152209_outLine +BABEL_OP1_204_62656_20130902_220800_inLine +BABEL_OP1_204_62656_20130902_220800_outLine +BABEL_OP1_204_62734_20130119_222114_inLine +BABEL_OP1_204_62810_20130106_161333_inLine +BABEL_OP1_204_62810_20130106_161333_outLine +BABEL_OP1_204_62976_20130129_174043_inLine +BABEL_OP1_204_62976_20130129_174043_outLine +BABEL_OP1_204_63334_20130729_183108_inLine +BABEL_OP1_204_63334_20130729_183108_outLine +BABEL_OP1_204_63671_20130817_171243_inLine +BABEL_OP1_204_63671_20130817_171243_outLine +BABEL_OP1_204_63730_20130310_032536_inLine +BABEL_OP1_204_63766_20130824_010950_inLine +BABEL_OP1_204_63766_20130824_010950_outLine +BABEL_OP1_204_63920_20130822_001336_inLine +BABEL_OP1_204_63920_20130822_001336_outLine +BABEL_OP1_204_64065_20130102_231436_inLine +BABEL_OP1_204_64065_20130102_231436_outLine +BABEL_OP1_204_64259_20130610_224356_inLine +BABEL_OP1_204_64259_20130610_224356_outLine +BABEL_OP1_204_64398_20130319_024434_inLine +BABEL_OP1_204_64398_20130319_024434_outLine +BABEL_OP1_204_64469_20130818_174134_inLine +BABEL_OP1_204_64469_20130818_174134_outLine +BABEL_OP1_204_64722_20130215_020559_inLine +BABEL_OP1_204_64722_20130215_020559_outLine +BABEL_OP1_204_65048_20130901_235622_inLine +BABEL_OP1_204_65048_20130901_235622_outLine +BABEL_OP1_204_65268_20130603_220955_inLine +BABEL_OP1_204_65268_20130603_220955_outLine +BABEL_OP1_204_66305_20130218_004015_inLine +BABEL_OP1_204_66305_20130218_004015_outLine +BABEL_OP1_204_66472_20130308_022324_inLine +BABEL_OP1_204_66822_20130121_042919_inLine +BABEL_OP1_204_66822_20130121_042919_outLine +BABEL_OP1_204_66837_20130209_003706_inLine +BABEL_OP1_204_66971_20130617_172242_inLine +BABEL_OP1_204_66971_20130617_172242_outLine +BABEL_OP1_204_67053_20130522_161823_inLine +BABEL_OP1_204_67053_20130522_161823_outLine +BABEL_OP1_204_67283_20130113_013031_inLine +BABEL_OP1_204_67283_20130113_013031_outLine +BABEL_OP1_204_67401_20130222_205647_inLine +BABEL_OP1_204_67401_20130222_205647_outLine +BABEL_OP1_204_67659_20130111_193800_inLine +BABEL_OP1_204_67659_20130111_193800_outLine +BABEL_OP1_204_68384_20130719_175720_inLine +BABEL_OP1_204_68384_20130719_175720_outLine +BABEL_OP1_204_68910_20130816_191414_inLine +BABEL_OP1_204_68910_20130816_191414_outLine +BABEL_OP1_204_68924_20130129_165613_inLine +BABEL_OP1_204_68924_20130129_165613_outLine +BABEL_OP1_204_69096_20130303_195234_inLine +BABEL_OP1_204_69096_20130303_195234_outLine +BABEL_OP1_204_69574_20121218_220812_inLine +BABEL_OP1_204_69574_20121218_220812_outLine +BABEL_OP1_204_69937_20130715_192435_inLine +BABEL_OP1_204_69937_20130715_192435_outLine +BABEL_OP1_204_69964_20130704_161248_inLine +BABEL_OP1_204_69964_20130704_161248_outLine +BABEL_OP1_204_69992_20130107_234311_inLine +BABEL_OP1_204_69992_20130107_234311_outLine +BABEL_OP1_204_70216_20130628_200952_inLine +BABEL_OP1_204_70216_20130628_200952_outLine +BABEL_OP1_204_70257_20130716_194637_inLine +BABEL_OP1_204_70257_20130716_194637_outLine +BABEL_OP1_204_70257_20130716_195558_inLine +BABEL_OP1_204_70257_20130716_195558_outLine +BABEL_OP1_204_70293_20130902_214220_inLine +BABEL_OP1_204_70293_20130902_214220_outLine +BABEL_OP1_204_70601_20130122_030105_inLine +BABEL_OP1_204_70601_20130122_030105_outLine +BABEL_OP1_204_70794_20121220_222614_inLine +BABEL_OP1_204_70794_20121220_222614_outLine +BABEL_OP1_204_71067_20130319_205826_inLine +BABEL_OP1_204_71067_20130319_205826_outLine +BABEL_OP1_204_71189_20130215_200359_inLine +BABEL_OP1_204_71189_20130215_200359_outLine +BABEL_OP1_204_71976_20130730_180338_inLine +BABEL_OP1_204_71976_20130730_180338_outLine +BABEL_OP1_204_72073_20130823_001235_inLine +BABEL_OP1_204_72073_20130823_001235_outLine +BABEL_OP1_204_72110_20130208_235019_inLine +BABEL_OP1_204_72110_20130208_235019_outLine +BABEL_OP1_204_73549_20130701_155700_inLine +BABEL_OP1_204_73549_20130701_155700_outLine +BABEL_OP1_204_73696_20130310_022514_inLine +BABEL_OP1_204_73696_20130310_022514_outLine +BABEL_OP1_204_73822_20130515_221842_inLine +BABEL_OP1_204_73822_20130515_221842_outLine +BABEL_OP1_204_74121_20130129_170655_inLine +BABEL_OP1_204_74121_20130129_170655_outLine +BABEL_OP1_204_74280_20121220_170635_inLine +BABEL_OP1_204_74280_20121220_170635_outLine +BABEL_OP1_204_74280_20121220_172100_inLine +BABEL_OP1_204_74280_20121220_172100_outLine +BABEL_OP1_204_74763_20130825_175903_inLine +BABEL_OP1_204_74763_20130825_175903_outLine +BABEL_OP1_204_75064_20130111_180636_inLine +BABEL_OP1_204_75064_20130111_180636_outLine +BABEL_OP1_204_75365_20130516_010147_inLine +BABEL_OP1_204_75365_20130516_010147_outLine +BABEL_OP1_204_75975_20130902_224807_inLine +BABEL_OP1_204_75975_20130902_224807_outLine +BABEL_OP1_204_76126_20130217_205227_outLine +BABEL_OP1_204_76238_20130205_022020_inLine +BABEL_OP1_204_76482_20130310_023337_inLine +BABEL_OP1_204_76482_20130310_023337_outLine +BABEL_OP1_204_76730_20130825_010524_inLine +BABEL_OP1_204_76730_20130825_010524_outLine +BABEL_OP1_204_77427_20130116_173650_inLine +BABEL_OP1_204_77427_20130116_173650_outLine +BABEL_OP1_204_77803_20121219_215121_inLine +BABEL_OP1_204_77803_20121219_215121_outLine +BABEL_OP1_204_78016_20130118_223813_inLine +BABEL_OP1_204_78016_20130118_223813_outLine +BABEL_OP1_204_78016_20130118_224939_inLine +BABEL_OP1_204_78016_20130118_224939_outLine +BABEL_OP1_204_78116_20130130_004511_inLine +BABEL_OP1_204_78116_20130130_004511_outLine +BABEL_OP1_204_78254_20130114_224850_inLine +BABEL_OP1_204_78254_20130114_224850_outLine +BABEL_OP1_204_78313_20130223_202010_inLine +BABEL_OP1_204_78313_20130223_202010_outLine +BABEL_OP1_204_78543_20130313_200956_inLine +BABEL_OP1_204_78743_20130210_214804_inLine +BABEL_OP1_204_78743_20130210_214804_outLine +BABEL_OP1_204_78829_20130724_210413_inLine +BABEL_OP1_204_78829_20130724_210413_outLine +BABEL_OP1_204_79045_20130213_233402_inLine +BABEL_OP1_204_79045_20130213_233402_outLine +BABEL_OP1_204_79080_20130224_194409_inLine +BABEL_OP1_204_79080_20130224_194409_outLine +BABEL_OP1_204_79129_20130222_200128_inLine +BABEL_OP1_204_79129_20130222_200128_outLine +BABEL_OP1_204_79367_20130110_223433_inLine +BABEL_OP1_204_79367_20130110_223433_outLine +BABEL_OP1_204_79505_20130223_203535_inLine +BABEL_OP1_204_79505_20130223_203535_outLine +BABEL_OP1_204_80069_20130310_201210_inLine +BABEL_OP1_204_80241_20130825_143825_inLine +BABEL_OP1_204_80241_20130825_143825_outLine +BABEL_OP1_204_80439_20130115_225051_inLine +BABEL_OP1_204_80439_20130115_225051_outLine +BABEL_OP1_204_81213_20130114_221437_inLine +BABEL_OP1_204_81213_20130114_221437_outLine +BABEL_OP1_204_81622_20130130_223905_inLine +BABEL_OP1_204_81622_20130130_223905_outLine +BABEL_OP1_204_81810_20130319_043547_inLine +BABEL_OP1_204_81810_20130319_043547_outLine +BABEL_OP1_204_81854_20130303_025438_inLine +BABEL_OP1_204_81854_20130303_025438_outLine +BABEL_OP1_204_82425_20130108_181556_inLine +BABEL_OP1_204_82425_20130108_181556_outLine +BABEL_OP1_204_82935_20130208_135243_inLine +BABEL_OP1_204_82935_20130208_135243_outLine +BABEL_OP1_204_82979_20130103_191447_inLine +BABEL_OP1_204_82979_20130103_191447_outLine +BABEL_OP1_204_83394_20130313_005013_inLine +BABEL_OP1_204_83394_20130313_005013_outLine +BABEL_OP1_204_83430_20130603_202255_inLine +BABEL_OP1_204_83430_20130603_202255_outLine +BABEL_OP1_204_83455_20130119_213254_inLine +BABEL_OP1_204_83455_20130119_213254_outLine +BABEL_OP1_204_83625_20130531_181104_inLine +BABEL_OP1_204_83625_20130531_181104_outLine +BABEL_OP1_204_83771_20130625_172000_inLine +BABEL_OP1_204_83771_20130625_172000_outLine +BABEL_OP1_204_84055_20130228_202242_inLine +BABEL_OP1_204_84055_20130228_202242_outLine +BABEL_OP1_204_84077_20130812_184211_inLine +BABEL_OP1_204_84077_20130812_184211_outLine +BABEL_OP1_204_84430_20130817_164608_inLine +BABEL_OP1_204_84430_20130817_164608_outLine +BABEL_OP1_204_84430_20130901_201534_inLine +BABEL_OP1_204_84430_20130901_201534_outLine +BABEL_OP1_204_84466_20130220_015953_inLine +BABEL_OP1_204_84466_20130220_015953_outLine +BABEL_OP1_204_84583_20130122_032028_outLine +BABEL_OP1_204_84715_20130225_194321_inLine +BABEL_OP1_204_84715_20130225_194321_outLine +BABEL_OP1_204_85010_20130531_160005_inLine +BABEL_OP1_204_85010_20130531_160005_outLine +BABEL_OP1_204_85028_20130301_204938_inLine +BABEL_OP1_204_85028_20130301_222343_inLine +BABEL_OP1_204_85048_20130423_000346_inLine +BABEL_OP1_204_85048_20130423_000346_outLine +BABEL_OP1_204_85331_20130310_030345_inLine +BABEL_OP1_204_85331_20130310_030345_outLine +BABEL_OP1_204_85331_20130310_033244_inLine +BABEL_OP1_204_85331_20130310_033244_outLine +BABEL_OP1_204_85647_20130120_023041_inLine +BABEL_OP1_204_85647_20130120_023041_outLine +BABEL_OP1_204_86433_20130126_230445_inLine +BABEL_OP1_204_86433_20130126_230445_outLine +BABEL_OP1_204_86715_20130313_002453_inLine +BABEL_OP1_204_86715_20130313_002453_outLine +BABEL_OP1_204_86715_20130313_003416_inLine +BABEL_OP1_204_86715_20130313_003416_outLine +BABEL_OP1_204_86891_20130605_215220_inLine +BABEL_OP1_204_86891_20130605_215220_outLine +BABEL_OP1_204_87073_20121220_221057_inLine +BABEL_OP1_204_87073_20121220_221057_outLine +BABEL_OP1_204_87073_20121220_221600_inLine +BABEL_OP1_204_87073_20121220_221600_outLine +BABEL_OP1_204_87073_20121220_222957_inLine +BABEL_OP1_204_87073_20121220_222957_outLine +BABEL_OP1_204_87305_20130515_233922_inLine +BABEL_OP1_204_87305_20130515_233922_outLine +BABEL_OP1_204_87731_20130523_205109_inLine +BABEL_OP1_204_87731_20130523_205109_outLine +BABEL_OP1_204_88445_20130129_191832_inLine +BABEL_OP1_204_88445_20130129_191832_outLine +BABEL_OP1_204_88673_20130705_173732_inLine +BABEL_OP1_204_88673_20130705_173732_outLine +BABEL_OP1_204_88865_20130707_151620_inLine +BABEL_OP1_204_88865_20130707_151620_outLine +BABEL_OP1_204_89516_20130729_214127_inLine +BABEL_OP1_204_89516_20130729_214127_outLine +BABEL_OP1_204_89695_20130130_001218_inLine +BABEL_OP1_204_89695_20130130_001218_outLine +BABEL_OP1_204_89877_20130129_192538_inLine +BABEL_OP1_204_89877_20130129_192538_outLine +BABEL_OP1_204_90347_20130124_030740_inLine +BABEL_OP1_204_90347_20130124_030740_outLine +BABEL_OP1_204_90709_20130107_232337_inLine +BABEL_OP1_204_90709_20130107_232337_outLine +BABEL_OP1_204_91319_20130225_184203_inLine +BABEL_OP1_204_91319_20130225_184203_outLine +BABEL_OP1_204_91383_20130702_173202_inLine +BABEL_OP1_204_91383_20130702_173202_outLine +BABEL_OP1_204_91475_20130701_163859_inLine +BABEL_OP1_204_91475_20130701_163859_outLine +BABEL_OP1_204_91606_20130312_032420_inLine +BABEL_OP1_204_91606_20130312_032420_outLine +BABEL_OP1_204_91760_20130618_160303_inLine +BABEL_OP1_204_91760_20130618_160303_outLine +BABEL_OP1_204_92605_20130518_145958_inLine +BABEL_OP1_204_92605_20130518_145958_outLine +BABEL_OP1_204_92809_20130116_171026_inLine +BABEL_OP1_204_92809_20130116_171026_outLine +BABEL_OP1_204_92942_20130127_233540_inLine +BABEL_OP1_204_92942_20130127_233540_outLine +BABEL_OP1_204_93222_20130604_000913_inLine +BABEL_OP1_204_93222_20130604_000913_outLine +BABEL_OP1_204_93411_20130128_182958_inLine +BABEL_OP1_204_93411_20130128_182958_outLine +BABEL_OP1_204_93469_20130302_033019_inLine +BABEL_OP1_204_93469_20130302_033019_outLine +BABEL_OP1_204_93490_20130209_033837_inLine +BABEL_OP1_204_93490_20130209_033837_outLine +BABEL_OP1_204_93490_20130209_140440_inLine +BABEL_OP1_204_93490_20130209_140440_outLine +BABEL_OP1_204_93681_20130901_204636_inLine +BABEL_OP1_204_93681_20130901_204636_outLine +BABEL_OP1_204_94442_20130617_164306_inLine +BABEL_OP1_204_94442_20130617_164306_outLine +BABEL_OP1_204_95028_20130518_173442_inLine +BABEL_OP1_204_95028_20130518_173442_outLine +BABEL_OP1_204_95399_20130125_184030_outLine +BABEL_OP1_204_95446_20130225_185013_inLine +BABEL_OP1_204_95446_20130225_185013_outLine +BABEL_OP1_204_95663_20121221_214944_inLine +BABEL_OP1_204_95663_20121221_214944_outLine +BABEL_OP1_204_95942_20130215_204023_inLine +BABEL_OP1_204_95942_20130215_204023_outLine +BABEL_OP1_204_96158_20130721_235954_inLine +BABEL_OP1_204_96158_20130721_235954_outLine +BABEL_OP1_204_96190_20130116_041341_inLine +BABEL_OP1_204_96190_20130116_041341_outLine +BABEL_OP1_204_96247_20130319_165606_inLine +BABEL_OP1_204_96247_20130319_165606_outLine +BABEL_OP1_204_96690_20130129_191200_inLine +BABEL_OP1_204_96690_20130129_191200_outLine +BABEL_OP1_204_96730_20130225_193316_inLine +BABEL_OP1_204_96730_20130225_193316_outLine +BABEL_OP1_204_96808_20130617_185713_inLine +BABEL_OP1_204_96808_20130617_185713_outLine +BABEL_OP1_204_96910_20130115_215424_inLine +BABEL_OP1_204_96910_20130115_215424_outLine +BABEL_OP1_204_97063_20130227_185803_inLine +BABEL_OP1_204_97063_20130227_185803_outLine +BABEL_OP1_204_97063_20130306_232138_inLine +BABEL_OP1_204_97063_20130306_232138_outLine +BABEL_OP1_204_97220_20130310_023745_inLine +BABEL_OP1_204_97220_20130310_023745_outLine +BABEL_OP1_204_97376_20130128_213930_inLine +BABEL_OP1_204_97376_20130128_213930_outLine +BABEL_OP1_204_97461_20130127_014703_inLine +BABEL_OP1_204_97461_20130127_014703_outLine +BABEL_OP1_204_97461_20130127_015849_inLine +BABEL_OP1_204_97461_20130127_015849_outLine +BABEL_OP1_204_97731_20130210_235215_inLine +BABEL_OP1_204_97731_20130210_235215_outLine +BABEL_OP1_204_97772_20121218_224525_inLine +BABEL_OP1_204_97772_20121218_224525_outLine +BABEL_OP1_204_97836_20130220_015139_inLine +BABEL_OP1_204_97836_20130220_015139_outLine +BABEL_OP1_204_98365_20130224_175209_inLine +BABEL_OP1_204_98365_20130224_175209_outLine +BABEL_OP1_204_98565_20130817_171905_inLine +BABEL_OP1_204_98565_20130817_171905_outLine +BABEL_OP1_204_99289_20130215_210617_inLine +BABEL_OP1_204_99289_20130215_210617_outLine +BABEL_OP1_204_99401_20130108_180622_inLine +BABEL_OP1_204_99401_20130108_180622_outLine +BABEL_OP1_204_99594_20130126_192710_inLine +BABEL_OP1_204_99594_20130126_192710_outLine +BABEL_OP1_204_99887_20130210_212207_inLine +BABEL_OP1_204_99887_20130210_212207_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.list b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.list new file mode 100644 index 00000000000..4c5afd85381 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.list @@ -0,0 +1,125 @@ +BABEL_OP1_204_10184_20130217_232154_inLine +BABEL_OP1_204_10184_20130217_232154_outLine +BABEL_OP1_204_11723_20130803_144247_inLine +BABEL_OP1_204_11723_20130803_144247_outLine +BABEL_OP1_204_12220_20130120_183204_inLine +BABEL_OP1_204_12220_20130120_183204_outLine +BABEL_OP1_204_13324_20130103_211640_inLine +BABEL_OP1_204_13324_20130103_211640_outLine +BABEL_OP1_204_13490_20130314_031843_inLine +BABEL_OP1_204_13490_20130314_031843_outLine +BABEL_OP1_204_13792_20121231_015544_inLine +BABEL_OP1_204_13792_20121231_015544_outLine +BABEL_OP1_204_14807_20130222_213831_inLine +BABEL_OP1_204_14807_20130222_213831_outLine +BABEL_OP1_204_16249_20130906_003049_inLine +BABEL_OP1_204_16249_20130906_003049_outLine +BABEL_OP1_204_17032_20130129_012026_inLine +BABEL_OP1_204_17032_20130129_012026_outLine +BABEL_OP1_204_20330_20130217_225055_inLine +BABEL_OP1_204_20330_20130217_225055_outLine +BABEL_OP1_204_20367_20130312_024055_inLine +BABEL_OP1_204_20367_20130312_024055_outLine +BABEL_OP1_204_22321_20130107_231204_inLine +BABEL_OP1_204_22321_20130107_231204_outLine +BABEL_OP1_204_23980_20130127_031636_inLine +BABEL_OP1_204_23980_20130127_031636_outLine +BABEL_OP1_204_24605_20130111_185213_inLine +BABEL_OP1_204_24605_20130111_185213_outLine +BABEL_OP1_204_27042_20130215_015654_inLine +BABEL_OP1_204_27042_20130215_015654_outLine +BABEL_OP1_204_27478_20130219_233409_inLine +BABEL_OP1_204_27478_20130219_233409_outLine +BABEL_OP1_204_27841_20130225_192938_inLine +BABEL_OP1_204_27841_20130225_192938_outLine +BABEL_OP1_204_31728_20130730_183500_inLine +BABEL_OP1_204_31728_20130730_183500_outLine +BABEL_OP1_204_32727_20130224_174507_inLine +BABEL_OP1_204_32727_20130224_174507_outLine +BABEL_OP1_204_33355_20130110_222048_inLine +BABEL_OP1_204_33355_20130110_222048_outLine +BABEL_OP1_204_34713_20130516_164824_inLine +BABEL_OP1_204_34713_20130516_164824_outLine +BABEL_OP1_204_38750_20130208_003349_inLine +BABEL_OP1_204_38750_20130208_003349_outLine +BABEL_OP1_204_39099_20130302_210320_inLine +BABEL_OP1_204_39099_20130302_210320_outLine +BABEL_OP1_204_40196_20130902_001447_inLine +BABEL_OP1_204_40196_20130902_001447_outLine +BABEL_OP1_204_40686_20130704_204726_inLine +BABEL_OP1_204_40686_20130704_204726_outLine +BABEL_OP1_204_41233_20130209_215355_inLine +BABEL_OP1_204_41233_20130209_215355_outLine +BABEL_OP1_204_42942_20130127_014343_inLine +BABEL_OP1_204_42942_20130127_014343_outLine +BABEL_OP1_204_43157_20130514_222203_inLine +BABEL_OP1_204_43157_20130514_222203_outLine +BABEL_OP1_204_43368_20130118_201259_inLine +BABEL_OP1_204_43368_20130118_201259_outLine +BABEL_OP1_204_45121_20130618_153308_inLine +BABEL_OP1_204_45121_20130618_153308_outLine +BABEL_OP1_204_45374_20130906_011341_inLine +BABEL_OP1_204_45374_20130906_011341_outLine +BABEL_OP1_204_45770_20130116_214623_inLine +BABEL_OP1_204_45770_20130116_214623_outLine +BABEL_OP1_204_49027_20130606_142005_inLine +BABEL_OP1_204_49027_20130606_142005_outLine +BABEL_OP1_204_50745_20130216_163145_inLine +BABEL_OP1_204_50745_20130216_163145_outLine +BABEL_OP1_204_53917_20130217_215053_inLine +BABEL_OP1_204_53917_20130217_215053_outLine +BABEL_OP1_204_58026_20130310_194418_inLine +BABEL_OP1_204_58026_20130310_194418_outLine +BABEL_OP1_204_60282_20130815_161243_inLine +BABEL_OP1_204_60282_20130815_161243_outLine +BABEL_OP1_204_63766_20130824_010950_inLine +BABEL_OP1_204_63766_20130824_010950_outLine +BABEL_OP1_204_68924_20130129_165613_inLine +BABEL_OP1_204_68924_20130129_165613_outLine +BABEL_OP1_204_69574_20121218_220812_inLine +BABEL_OP1_204_69574_20121218_220812_outLine +BABEL_OP1_204_70257_20130716_194637_inLine +BABEL_OP1_204_70257_20130716_194637_outLine +BABEL_OP1_204_70257_20130716_195558_inLine +BABEL_OP1_204_70257_20130716_195558_outLine +BABEL_OP1_204_73822_20130515_221842_inLine +BABEL_OP1_204_73822_20130515_221842_outLine +BABEL_OP1_204_74280_20121220_170635_inLine +BABEL_OP1_204_74280_20121220_170635_outLine +BABEL_OP1_204_74280_20121220_172100_inLine +BABEL_OP1_204_74280_20121220_172100_outLine +BABEL_OP1_204_79045_20130213_233402_inLine +BABEL_OP1_204_79045_20130213_233402_outLine +BABEL_OP1_204_79129_20130222_200128_inLine +BABEL_OP1_204_79129_20130222_200128_outLine +BABEL_OP1_204_80241_20130825_143825_inLine +BABEL_OP1_204_80241_20130825_143825_outLine +BABEL_OP1_204_81854_20130303_025438_inLine +BABEL_OP1_204_81854_20130303_025438_outLine +BABEL_OP1_204_83625_20130531_181104_inLine +BABEL_OP1_204_83625_20130531_181104_outLine +BABEL_OP1_204_85048_20130423_000346_inLine +BABEL_OP1_204_85048_20130423_000346_outLine +BABEL_OP1_204_87731_20130523_205109_inLine +BABEL_OP1_204_87731_20130523_205109_outLine +BABEL_OP1_204_89516_20130729_214127_inLine +BABEL_OP1_204_89516_20130729_214127_outLine +BABEL_OP1_204_91319_20130225_184203_inLine +BABEL_OP1_204_91319_20130225_184203_outLine +BABEL_OP1_204_91383_20130702_173202_inLine +BABEL_OP1_204_91383_20130702_173202_outLine +BABEL_OP1_204_91475_20130701_163859_inLine +BABEL_OP1_204_91475_20130701_163859_outLine +BABEL_OP1_204_91606_20130312_032420_inLine +BABEL_OP1_204_91606_20130312_032420_outLine +BABEL_OP1_204_93411_20130128_182958_inLine +BABEL_OP1_204_93411_20130128_182958_outLine +BABEL_OP1_204_95399_20130125_184030_outLine +BABEL_OP1_204_96910_20130115_215424_inLine +BABEL_OP1_204_96910_20130115_215424_outLine +BABEL_OP1_204_97731_20130210_235215_inLine +BABEL_OP1_204_97731_20130210_235215_outLine +BABEL_OP1_204_97836_20130220_015139_inLine +BABEL_OP1_204_97836_20130220_015139_outLine +BABEL_OP1_204_98565_20130817_171905_inLine +BABEL_OP1_204_98565_20130817_171905_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..09510717b52 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.LimitedLP.untranscribed.list @@ -0,0 +1,653 @@ +BABEL_OP1_204_10002_20130523_142107_inLine +BABEL_OP1_204_10002_20130523_142107_outLine +BABEL_OP1_204_10036_20130116_163652_inLine +BABEL_OP1_204_10036_20130116_163652_outLine +BABEL_OP1_204_10411_20130313_042405_inLine +BABEL_OP1_204_10411_20130313_042405_outLine +BABEL_OP1_204_10469_20130708_201653_inLine +BABEL_OP1_204_10469_20130708_201653_outLine +BABEL_OP1_204_10647_20130225_175457_inLine +BABEL_OP1_204_10647_20130225_175457_outLine +BABEL_OP1_204_10647_20130225_184106_inLine +BABEL_OP1_204_10647_20130225_184106_outLine +BABEL_OP1_204_10901_20130120_220533_inLine +BABEL_OP1_204_10901_20130120_220533_outLine +BABEL_OP1_204_10938_20130118_213056_inLine +BABEL_OP1_204_10938_20130118_213056_outLine +BABEL_OP1_204_10966_20130114_210156_inLine +BABEL_OP1_204_10966_20130114_210156_outLine +BABEL_OP1_204_11310_20130705_180254_inLine +BABEL_OP1_204_11310_20130705_180254_outLine +BABEL_OP1_204_11352_20130220_023807_inLine +BABEL_OP1_204_11352_20130220_023807_outLine +BABEL_OP1_204_11486_20130607_155406_inLine +BABEL_OP1_204_11486_20130607_155406_outLine +BABEL_OP1_204_11581_20130222_215500_inLine +BABEL_OP1_204_11581_20130222_215500_outLine +BABEL_OP1_204_11581_20130222_220101_inLine +BABEL_OP1_204_11581_20130222_220101_outLine +BABEL_OP1_204_11663_20130319_201815_inLine +BABEL_OP1_204_11663_20130319_201815_outLine +BABEL_OP1_204_11673_20121220_214236_inLine +BABEL_OP1_204_11673_20121220_214236_outLine +BABEL_OP1_204_11797_20130107_214732_inLine +BABEL_OP1_204_11797_20130107_214732_outLine +BABEL_OP1_204_12036_20130102_170500_inLine +BABEL_OP1_204_12036_20130102_170500_outLine +BABEL_OP1_204_12036_20130102_171149_inLine +BABEL_OP1_204_12036_20130102_171149_outLine +BABEL_OP1_204_12242_20130111_014802_inLine +BABEL_OP1_204_12242_20130111_014802_outLine +BABEL_OP1_204_12846_20130515_220132_inLine +BABEL_OP1_204_12846_20130515_220132_outLine +BABEL_OP1_204_12851_20121219_172018_inLine +BABEL_OP1_204_12851_20121219_172018_outLine +BABEL_OP1_204_13030_20130120_210514_inLine +BABEL_OP1_204_13184_20130228_032847_inLine +BABEL_OP1_204_13184_20130228_032847_outLine +BABEL_OP1_204_13744_20130106_232543_inLine +BABEL_OP1_204_13744_20130106_232543_outLine +BABEL_OP1_204_13776_20130626_215241_inLine +BABEL_OP1_204_13776_20130626_215241_outLine +BABEL_OP1_204_14719_20130219_231741_inLine +BABEL_OP1_204_14719_20130219_231741_outLine +BABEL_OP1_204_14719_20130219_232513_inLine +BABEL_OP1_204_14719_20130219_232513_outLine +BABEL_OP1_204_14725_20130111_204740_inLine +BABEL_OP1_204_14725_20130111_204740_outLine +BABEL_OP1_204_15730_20130103_154749_inLine +BABEL_OP1_204_15730_20130103_154749_outLine +BABEL_OP1_204_15985_20130627_154935_inLine +BABEL_OP1_204_15985_20130627_154935_outLine +BABEL_OP1_204_16726_20130815_164352_inLine +BABEL_OP1_204_16726_20130815_164352_outLine +BABEL_OP1_204_16800_20130307_025108_inLine +BABEL_OP1_204_16800_20130307_025108_outLine +BABEL_OP1_204_16802_20130821_234724_inLine +BABEL_OP1_204_16802_20130821_234724_outLine +BABEL_OP1_204_16838_20130703_183021_inLine +BABEL_OP1_204_16838_20130703_183021_outLine +BABEL_OP1_204_17420_20130426_172522_inLine +BABEL_OP1_204_17420_20130426_172522_outLine +BABEL_OP1_204_17420_20130426_174314_inLine +BABEL_OP1_204_17420_20130426_174314_outLine +BABEL_OP1_204_17496_20130325_015543_inLine +BABEL_OP1_204_17496_20130325_015543_outLine +BABEL_OP1_204_18037_20130825_200728_inLine +BABEL_OP1_204_18037_20130825_200728_outLine +BABEL_OP1_204_18280_20130818_172915_inLine +BABEL_OP1_204_18280_20130818_172915_outLine +BABEL_OP1_204_18939_20130110_214704_inLine +BABEL_OP1_204_18939_20130110_214704_outLine +BABEL_OP1_204_18992_20130830_001646_inLine +BABEL_OP1_204_18992_20130830_001646_outLine +BABEL_OP1_204_19134_20130120_191037_inLine +BABEL_OP1_204_19134_20130120_191037_outLine +BABEL_OP1_204_19461_20130704_154920_inLine +BABEL_OP1_204_19461_20130704_154920_outLine +BABEL_OP1_204_19589_20130304_020747_inLine +BABEL_OP1_204_19589_20130304_020747_outLine +BABEL_OP1_204_19688_20130708_194740_inLine +BABEL_OP1_204_19688_20130708_194740_outLine +BABEL_OP1_204_20437_20130523_235611_inLine +BABEL_OP1_204_20437_20130523_235611_outLine +BABEL_OP1_204_20721_20130704_183621_inLine +BABEL_OP1_204_20721_20130704_183621_outLine +BABEL_OP1_204_20916_20121218_174604_inLine +BABEL_OP1_204_20916_20121218_174604_outLine +BABEL_OP1_204_20985_20130129_225135_inLine +BABEL_OP1_204_20985_20130129_225135_outLine +BABEL_OP1_204_21426_20130515_212900_inLine +BABEL_OP1_204_21426_20130515_212900_outLine +BABEL_OP1_204_21435_20130215_200722_inLine +BABEL_OP1_204_21435_20130215_200722_outLine +BABEL_OP1_204_21543_20130901_203127_inLine +BABEL_OP1_204_21543_20130901_203127_outLine +BABEL_OP1_204_21807_20130127_033626_inLine +BABEL_OP1_204_21807_20130127_033626_outLine +BABEL_OP1_204_21807_20130127_041609_inLine +BABEL_OP1_204_21807_20130127_041609_outLine +BABEL_OP1_204_22643_20130709_192909_inLine +BABEL_OP1_204_22643_20130709_192909_outLine +BABEL_OP1_204_23006_20130115_200742_inLine +BABEL_OP1_204_23006_20130115_200742_outLine +BABEL_OP1_204_23046_20130114_165057_inLine +BABEL_OP1_204_23046_20130114_165057_outLine +BABEL_OP1_204_23153_20130128_223235_inLine +BABEL_OP1_204_23153_20130128_223235_outLine +BABEL_OP1_204_23190_20130116_191153_inLine +BABEL_OP1_204_23190_20130116_191153_outLine +BABEL_OP1_204_23752_20130517_181521_inLine +BABEL_OP1_204_23752_20130517_181521_outLine +BABEL_OP1_204_24010_20130510_160627_inLine +BABEL_OP1_204_24010_20130510_160627_outLine +BABEL_OP1_204_24221_20130803_162307_inLine +BABEL_OP1_204_24221_20130803_162307_outLine +BABEL_OP1_204_24231_20130702_165725_inLine +BABEL_OP1_204_24231_20130702_165725_outLine +BABEL_OP1_204_24253_20130216_173828_inLine +BABEL_OP1_204_24253_20130216_173828_outLine +BABEL_OP1_204_24587_20130812_201846_inLine +BABEL_OP1_204_24587_20130812_201846_outLine +BABEL_OP1_204_26381_20130906_003653_inLine +BABEL_OP1_204_26381_20130906_003653_outLine +BABEL_OP1_204_26388_20121222_180059_inLine +BABEL_OP1_204_26388_20121222_180059_outLine +BABEL_OP1_204_26478_20130628_163250_inLine +BABEL_OP1_204_26478_20130628_163250_outLine +BABEL_OP1_204_27203_20130123_034459_inLine +BABEL_OP1_204_27203_20130123_034459_outLine +BABEL_OP1_204_28522_20130130_021159_inLine +BABEL_OP1_204_28522_20130130_021159_outLine +BABEL_OP1_204_28595_20130515_165745_inLine +BABEL_OP1_204_28595_20130515_165745_outLine +BABEL_OP1_204_29135_20121226_012303_inLine +BABEL_OP1_204_29404_20130225_222910_inLine +BABEL_OP1_204_29404_20130225_222910_outLine +BABEL_OP1_204_29633_20130219_205935_inLine +BABEL_OP1_204_29633_20130219_205935_outLine +BABEL_OP1_204_29911_20130704_163449_inLine +BABEL_OP1_204_29911_20130704_163449_outLine +BABEL_OP1_204_30013_20130129_224621_inLine +BABEL_OP1_204_30013_20130129_224621_outLine +BABEL_OP1_204_30098_20130302_223148_inLine +BABEL_OP1_204_30098_20130302_223148_outLine +BABEL_OP1_204_30345_20130211_192641_inLine +BABEL_OP1_204_30345_20130211_192641_outLine +BABEL_OP1_204_30432_20130128_194847_inLine +BABEL_OP1_204_30432_20130128_194847_outLine +BABEL_OP1_204_31039_20130817_183417_inLine +BABEL_OP1_204_31039_20130817_183417_outLine +BABEL_OP1_204_31490_20130106_234029_inLine +BABEL_OP1_204_31490_20130106_234029_outLine +BABEL_OP1_204_32097_20121218_192753_inLine +BABEL_OP1_204_32097_20121218_192753_outLine +BABEL_OP1_204_32122_20130119_232805_inLine +BABEL_OP1_204_32122_20130119_232805_outLine +BABEL_OP1_204_32169_20130820_205304_inLine +BABEL_OP1_204_32169_20130820_205304_outLine +BABEL_OP1_204_32244_20130617_175424_inLine +BABEL_OP1_204_32244_20130617_175424_outLine +BABEL_OP1_204_32328_20130218_020809_inLine +BABEL_OP1_204_32328_20130218_020809_outLine +BABEL_OP1_204_33273_20130126_234135_inLine +BABEL_OP1_204_33273_20130126_234135_outLine +BABEL_OP1_204_33424_20130617_192727_inLine +BABEL_OP1_204_33424_20130617_192727_outLine +BABEL_OP1_204_33774_20130601_164240_inLine +BABEL_OP1_204_33774_20130601_164240_outLine +BABEL_OP1_204_33806_20130310_041206_inLine +BABEL_OP1_204_33806_20130310_041206_outLine +BABEL_OP1_204_33913_20130205_155246_inLine +BABEL_OP1_204_34197_20121229_204615_inLine +BABEL_OP1_204_34197_20121229_204615_outLine +BABEL_OP1_204_34486_20130626_205810_inLine +BABEL_OP1_204_34486_20130626_205810_outLine +BABEL_OP1_204_34688_20121231_163152_inLine +BABEL_OP1_204_34811_20130130_015529_inLine +BABEL_OP1_204_34811_20130130_015529_outLine +BABEL_OP1_204_34860_20130524_205736_inLine +BABEL_OP1_204_34860_20130524_205736_outLine +BABEL_OP1_204_35000_20130217_021526_inLine +BABEL_OP1_204_35000_20130217_021526_outLine +BABEL_OP1_204_36293_20130107_173251_inLine +BABEL_OP1_204_36293_20130107_173251_outLine +BABEL_OP1_204_37228_20130224_205648_inLine +BABEL_OP1_204_37228_20130224_205648_outLine +BABEL_OP1_204_38588_20130119_231312_inLine +BABEL_OP1_204_38588_20130119_231312_outLine +BABEL_OP1_204_38664_20130116_202337_inLine +BABEL_OP1_204_38664_20130116_202337_outLine +BABEL_OP1_204_39307_20130104_021512_inLine +BABEL_OP1_204_39307_20130104_021512_outLine +BABEL_OP1_204_39638_20130605_153521_inLine +BABEL_OP1_204_39638_20130605_153521_outLine +BABEL_OP1_204_39848_20130130_204605_inLine +BABEL_OP1_204_39848_20130130_204605_outLine +BABEL_OP1_204_39893_20130313_023055_inLine +BABEL_OP1_204_39893_20130313_023055_outLine +BABEL_OP1_204_40565_20130129_202204_inLine +BABEL_OP1_204_40565_20130129_202204_outLine +BABEL_OP1_204_40648_20130710_170435_inLine +BABEL_OP1_204_40648_20130710_170435_outLine +BABEL_OP1_204_41334_20130311_032651_inLine +BABEL_OP1_204_41334_20130311_032651_outLine +BABEL_OP1_204_41598_20130227_193020_inLine +BABEL_OP1_204_41598_20130227_193020_outLine +BABEL_OP1_204_41720_20130524_184216_inLine +BABEL_OP1_204_41720_20130524_184216_outLine +BABEL_OP1_204_41890_20130227_233410_inLine +BABEL_OP1_204_41890_20130227_233410_outLine +BABEL_OP1_204_41920_20130101_031856_inLine +BABEL_OP1_204_41958_20130120_013639_inLine +BABEL_OP1_204_41958_20130120_013639_outLine +BABEL_OP1_204_41958_20130120_014156_inLine +BABEL_OP1_204_41958_20130120_014156_outLine +BABEL_OP1_204_41958_20130120_015222_inLine +BABEL_OP1_204_41958_20130120_015222_outLine +BABEL_OP1_204_42299_20130613_164705_inLine +BABEL_OP1_204_42299_20130613_164705_outLine +BABEL_OP1_204_42526_20130225_185629_inLine +BABEL_OP1_204_42526_20130225_185629_outLine +BABEL_OP1_204_43286_20130104_031805_inLine +BABEL_OP1_204_43286_20130104_031805_outLine +BABEL_OP1_204_43323_20130523_152627_inLine +BABEL_OP1_204_43323_20130523_152627_outLine +BABEL_OP1_204_43794_20130627_212826_inLine +BABEL_OP1_204_43794_20130627_212826_outLine +BABEL_OP1_204_44347_20130220_035919_inLine +BABEL_OP1_204_44347_20130220_035919_outLine +BABEL_OP1_204_44898_20130705_195912_inLine +BABEL_OP1_204_44898_20130705_195912_outLine +BABEL_OP1_204_45459_20130302_031028_inLine +BABEL_OP1_204_45459_20130302_031028_outLine +BABEL_OP1_204_45699_20130815_000115_inLine +BABEL_OP1_204_45699_20130815_000115_outLine +BABEL_OP1_204_46066_20130226_201734_inLine +BABEL_OP1_204_46066_20130226_201734_outLine +BABEL_OP1_204_46169_20130218_214523_inLine +BABEL_OP1_204_46558_20130103_175101_inLine +BABEL_OP1_204_46558_20130103_175101_outLine +BABEL_OP1_204_46905_20130704_183507_inLine +BABEL_OP1_204_46905_20130704_183507_outLine +BABEL_OP1_204_47156_20130310_000732_inLine +BABEL_OP1_204_47156_20130310_000732_outLine +BABEL_OP1_204_47283_20130102_220157_inLine +BABEL_OP1_204_47283_20130102_220157_outLine +BABEL_OP1_204_47802_20130614_155949_inLine +BABEL_OP1_204_47802_20130614_155949_outLine +BABEL_OP1_204_47823_20130209_191710_inLine +BABEL_OP1_204_47823_20130209_191710_outLine +BABEL_OP1_204_47878_20130128_213649_inLine +BABEL_OP1_204_47878_20130128_213649_outLine +BABEL_OP1_204_47878_20130128_214921_inLine +BABEL_OP1_204_47878_20130128_214921_outLine +BABEL_OP1_204_47923_20130812_172435_inLine +BABEL_OP1_204_47923_20130812_172435_outLine +BABEL_OP1_204_48299_20130531_202054_inLine +BABEL_OP1_204_48299_20130531_202054_outLine +BABEL_OP1_204_48610_20130114_165811_inLine +BABEL_OP1_204_48610_20130114_165811_outLine +BABEL_OP1_204_49768_20130115_220927_inLine +BABEL_OP1_204_49768_20130115_220927_outLine +BABEL_OP1_204_49775_20121219_214712_inLine +BABEL_OP1_204_49912_20130313_040643_inLine +BABEL_OP1_204_49912_20130313_040643_outLine +BABEL_OP1_204_49945_20130624_173403_inLine +BABEL_OP1_204_49945_20130624_173403_outLine +BABEL_OP1_204_50810_20121218_184451_inLine +BABEL_OP1_204_50810_20121218_184451_outLine +BABEL_OP1_204_51156_20130821_223730_inLine +BABEL_OP1_204_51156_20130821_223730_outLine +BABEL_OP1_204_51185_20130517_170655_inLine +BABEL_OP1_204_51185_20130517_170655_outLine +BABEL_OP1_204_51407_20130127_042921_inLine +BABEL_OP1_204_51407_20130127_044800_inLine +BABEL_OP1_204_52301_20130113_034941_inLine +BABEL_OP1_204_52301_20130113_034941_outLine +BABEL_OP1_204_52322_20130524_175752_inLine +BABEL_OP1_204_52322_20130524_175752_outLine +BABEL_OP1_204_52717_20130107_043805_inLine +BABEL_OP1_204_52717_20130107_043805_outLine +BABEL_OP1_204_52803_20130802_163814_inLine +BABEL_OP1_204_52803_20130802_163814_outLine +BABEL_OP1_204_52804_20130103_212424_inLine +BABEL_OP1_204_52804_20130103_212424_outLine +BABEL_OP1_204_53068_20130830_003817_inLine +BABEL_OP1_204_53068_20130830_003817_outLine +BABEL_OP1_204_53144_20130217_224136_inLine +BABEL_OP1_204_53144_20130217_224136_outLine +BABEL_OP1_204_53144_20130217_225527_inLine +BABEL_OP1_204_53144_20130217_225527_outLine +BABEL_OP1_204_53441_20130825_001938_inLine +BABEL_OP1_204_53441_20130825_001938_outLine +BABEL_OP1_204_54066_20130514_211116_inLine +BABEL_OP1_204_54066_20130514_211116_outLine +BABEL_OP1_204_54074_20130131_005828_inLine +BABEL_OP1_204_54074_20130131_005828_outLine +BABEL_OP1_204_54104_20130107_180959_inLine +BABEL_OP1_204_54104_20130107_180959_outLine +BABEL_OP1_204_54162_20130130_185332_inLine +BABEL_OP1_204_54162_20130130_185332_outLine +BABEL_OP1_204_54390_20130104_174530_inLine +BABEL_OP1_204_54390_20130104_174530_outLine +BABEL_OP1_204_54567_20130222_184721_inLine +BABEL_OP1_204_54567_20130222_184721_outLine +BABEL_OP1_204_54594_20130704_191249_inLine +BABEL_OP1_204_54594_20130704_191249_outLine +BABEL_OP1_204_54634_20130626_181537_inLine +BABEL_OP1_204_54634_20130626_181537_outLine +BABEL_OP1_204_54923_20130313_190841_inLine +BABEL_OP1_204_54923_20130313_190841_outLine +BABEL_OP1_204_54923_20130313_192534_inLine +BABEL_OP1_204_54923_20130313_192534_outLine +BABEL_OP1_204_54923_20130313_194117_inLine +BABEL_OP1_204_54923_20130313_194117_outLine +BABEL_OP1_204_55259_20130119_230219_inLine +BABEL_OP1_204_55259_20130119_230219_outLine +BABEL_OP1_204_55815_20130821_003003_inLine +BABEL_OP1_204_55815_20130821_003003_outLine +BABEL_OP1_204_56023_20130216_222455_inLine +BABEL_OP1_204_56023_20130216_222455_outLine +BABEL_OP1_204_56117_20130815_152303_inLine +BABEL_OP1_204_56117_20130815_152303_outLine +BABEL_OP1_204_56326_20130704_194950_inLine +BABEL_OP1_204_56326_20130704_194950_outLine +BABEL_OP1_204_56606_20130730_211609_inLine +BABEL_OP1_204_56606_20130730_211609_outLine +BABEL_OP1_204_56925_20130901_220934_inLine +BABEL_OP1_204_56925_20130901_220934_outLine +BABEL_OP1_204_57067_20130227_191402_outLine +BABEL_OP1_204_57233_20130524_200041_inLine +BABEL_OP1_204_57233_20130524_200041_outLine +BABEL_OP1_204_57782_20130417_212234_inLine +BABEL_OP1_204_57782_20130417_212234_outLine +BABEL_OP1_204_57887_20130705_183438_inLine +BABEL_OP1_204_57887_20130705_183438_outLine +BABEL_OP1_204_58006_20130325_011740_inLine +BABEL_OP1_204_58006_20130325_011740_outLine +BABEL_OP1_204_58103_20130118_221354_inLine +BABEL_OP1_204_58103_20130118_221354_outLine +BABEL_OP1_204_58313_20130127_023416_inLine +BABEL_OP1_204_58313_20130127_023416_outLine +BABEL_OP1_204_58489_20130209_220922_inLine +BABEL_OP1_204_58489_20130209_220922_outLine +BABEL_OP1_204_58489_20130209_221554_inLine +BABEL_OP1_204_58489_20130209_221554_outLine +BABEL_OP1_204_58636_20130812_211303_inLine +BABEL_OP1_204_58636_20130812_211303_outLine +BABEL_OP1_204_58734_20130108_172420_inLine +BABEL_OP1_204_58734_20130108_172420_outLine +BABEL_OP1_204_59028_20130507_123451_inLine +BABEL_OP1_204_59028_20130507_123451_outLine +BABEL_OP1_204_59291_20130719_200731_inLine +BABEL_OP1_204_59291_20130719_200731_outLine +BABEL_OP1_204_59307_20130218_000435_inLine +BABEL_OP1_204_59307_20130218_000435_outLine +BABEL_OP1_204_59307_20130218_001152_inLine +BABEL_OP1_204_59307_20130218_001152_outLine +BABEL_OP1_204_59685_20130812_185114_inLine +BABEL_OP1_204_59685_20130812_185114_outLine +BABEL_OP1_204_59864_20130302_195039_inLine +BABEL_OP1_204_59864_20130302_195039_outLine +BABEL_OP1_204_59928_20130103_190414_inLine +BABEL_OP1_204_59928_20130103_190414_outLine +BABEL_OP1_204_60026_20130107_002905_inLine +BABEL_OP1_204_60026_20130107_002905_outLine +BABEL_OP1_204_60299_20130313_025357_inLine +BABEL_OP1_204_60299_20130313_025357_outLine +BABEL_OP1_204_60299_20130313_030001_inLine +BABEL_OP1_204_60299_20130313_030001_outLine +BABEL_OP1_204_60397_20130822_013145_inLine +BABEL_OP1_204_60397_20130822_013145_outLine +BABEL_OP1_204_60477_20130521_010650_inLine +BABEL_OP1_204_60477_20130521_010650_outLine +BABEL_OP1_204_61190_20130111_183015_inLine +BABEL_OP1_204_61190_20130111_183015_outLine +BABEL_OP1_204_61435_20130217_214434_inLine +BABEL_OP1_204_61435_20130217_214434_outLine +BABEL_OP1_204_61438_20130719_233853_inLine +BABEL_OP1_204_61438_20130719_233853_outLine +BABEL_OP1_204_61731_20130107_035739_inLine +BABEL_OP1_204_61731_20130107_035739_outLine +BABEL_OP1_204_62177_20130719_152209_inLine +BABEL_OP1_204_62177_20130719_152209_outLine +BABEL_OP1_204_62656_20130902_220800_inLine +BABEL_OP1_204_62656_20130902_220800_outLine +BABEL_OP1_204_62734_20130119_222114_inLine +BABEL_OP1_204_62810_20130106_161333_inLine +BABEL_OP1_204_62810_20130106_161333_outLine +BABEL_OP1_204_62976_20130129_174043_inLine +BABEL_OP1_204_62976_20130129_174043_outLine +BABEL_OP1_204_63334_20130729_183108_inLine +BABEL_OP1_204_63334_20130729_183108_outLine +BABEL_OP1_204_63671_20130817_171243_inLine +BABEL_OP1_204_63671_20130817_171243_outLine +BABEL_OP1_204_63730_20130310_032536_inLine +BABEL_OP1_204_63920_20130822_001336_inLine +BABEL_OP1_204_63920_20130822_001336_outLine +BABEL_OP1_204_64065_20130102_231436_inLine +BABEL_OP1_204_64065_20130102_231436_outLine +BABEL_OP1_204_64259_20130610_224356_inLine +BABEL_OP1_204_64259_20130610_224356_outLine +BABEL_OP1_204_64398_20130319_024434_inLine +BABEL_OP1_204_64398_20130319_024434_outLine +BABEL_OP1_204_64469_20130818_174134_inLine +BABEL_OP1_204_64469_20130818_174134_outLine +BABEL_OP1_204_64722_20130215_020559_inLine +BABEL_OP1_204_64722_20130215_020559_outLine +BABEL_OP1_204_65048_20130901_235622_inLine +BABEL_OP1_204_65048_20130901_235622_outLine +BABEL_OP1_204_65268_20130603_220955_inLine +BABEL_OP1_204_65268_20130603_220955_outLine +BABEL_OP1_204_66305_20130218_004015_inLine +BABEL_OP1_204_66305_20130218_004015_outLine +BABEL_OP1_204_66472_20130308_022324_inLine +BABEL_OP1_204_66822_20130121_042919_inLine +BABEL_OP1_204_66822_20130121_042919_outLine +BABEL_OP1_204_66837_20130209_003706_inLine +BABEL_OP1_204_66971_20130617_172242_inLine +BABEL_OP1_204_66971_20130617_172242_outLine +BABEL_OP1_204_67053_20130522_161823_inLine +BABEL_OP1_204_67053_20130522_161823_outLine +BABEL_OP1_204_67283_20130113_013031_inLine +BABEL_OP1_204_67283_20130113_013031_outLine +BABEL_OP1_204_67401_20130222_205647_inLine +BABEL_OP1_204_67401_20130222_205647_outLine +BABEL_OP1_204_67659_20130111_193800_inLine +BABEL_OP1_204_67659_20130111_193800_outLine +BABEL_OP1_204_68384_20130719_175720_inLine +BABEL_OP1_204_68384_20130719_175720_outLine +BABEL_OP1_204_68910_20130816_191414_inLine +BABEL_OP1_204_68910_20130816_191414_outLine +BABEL_OP1_204_69096_20130303_195234_inLine +BABEL_OP1_204_69096_20130303_195234_outLine +BABEL_OP1_204_69937_20130715_192435_inLine +BABEL_OP1_204_69937_20130715_192435_outLine +BABEL_OP1_204_69964_20130704_161248_inLine +BABEL_OP1_204_69964_20130704_161248_outLine +BABEL_OP1_204_69992_20130107_234311_inLine +BABEL_OP1_204_69992_20130107_234311_outLine +BABEL_OP1_204_70216_20130628_200952_inLine +BABEL_OP1_204_70216_20130628_200952_outLine +BABEL_OP1_204_70293_20130902_214220_inLine +BABEL_OP1_204_70293_20130902_214220_outLine +BABEL_OP1_204_70601_20130122_030105_inLine +BABEL_OP1_204_70601_20130122_030105_outLine +BABEL_OP1_204_70794_20121220_222614_inLine +BABEL_OP1_204_70794_20121220_222614_outLine +BABEL_OP1_204_71067_20130319_205826_inLine +BABEL_OP1_204_71067_20130319_205826_outLine +BABEL_OP1_204_71189_20130215_200359_inLine +BABEL_OP1_204_71189_20130215_200359_outLine +BABEL_OP1_204_71976_20130730_180338_inLine +BABEL_OP1_204_71976_20130730_180338_outLine +BABEL_OP1_204_72073_20130823_001235_inLine +BABEL_OP1_204_72073_20130823_001235_outLine +BABEL_OP1_204_72110_20130208_235019_inLine +BABEL_OP1_204_72110_20130208_235019_outLine +BABEL_OP1_204_73549_20130701_155700_inLine +BABEL_OP1_204_73549_20130701_155700_outLine +BABEL_OP1_204_73696_20130310_022514_inLine +BABEL_OP1_204_73696_20130310_022514_outLine +BABEL_OP1_204_74121_20130129_170655_inLine +BABEL_OP1_204_74121_20130129_170655_outLine +BABEL_OP1_204_74763_20130825_175903_inLine +BABEL_OP1_204_74763_20130825_175903_outLine +BABEL_OP1_204_75064_20130111_180636_inLine +BABEL_OP1_204_75064_20130111_180636_outLine +BABEL_OP1_204_75365_20130516_010147_inLine +BABEL_OP1_204_75365_20130516_010147_outLine +BABEL_OP1_204_75975_20130902_224807_inLine +BABEL_OP1_204_75975_20130902_224807_outLine +BABEL_OP1_204_76126_20130217_205227_outLine +BABEL_OP1_204_76238_20130205_022020_inLine +BABEL_OP1_204_76482_20130310_023337_inLine +BABEL_OP1_204_76482_20130310_023337_outLine +BABEL_OP1_204_76730_20130825_010524_inLine +BABEL_OP1_204_76730_20130825_010524_outLine +BABEL_OP1_204_77427_20130116_173650_inLine +BABEL_OP1_204_77427_20130116_173650_outLine +BABEL_OP1_204_77803_20121219_215121_inLine +BABEL_OP1_204_77803_20121219_215121_outLine +BABEL_OP1_204_78016_20130118_223813_inLine +BABEL_OP1_204_78016_20130118_223813_outLine +BABEL_OP1_204_78016_20130118_224939_inLine +BABEL_OP1_204_78016_20130118_224939_outLine +BABEL_OP1_204_78116_20130130_004511_inLine +BABEL_OP1_204_78116_20130130_004511_outLine +BABEL_OP1_204_78254_20130114_224850_inLine +BABEL_OP1_204_78254_20130114_224850_outLine +BABEL_OP1_204_78313_20130223_202010_inLine +BABEL_OP1_204_78313_20130223_202010_outLine +BABEL_OP1_204_78543_20130313_200956_inLine +BABEL_OP1_204_78743_20130210_214804_inLine +BABEL_OP1_204_78743_20130210_214804_outLine +BABEL_OP1_204_78829_20130724_210413_inLine +BABEL_OP1_204_78829_20130724_210413_outLine +BABEL_OP1_204_79080_20130224_194409_inLine +BABEL_OP1_204_79080_20130224_194409_outLine +BABEL_OP1_204_79367_20130110_223433_inLine +BABEL_OP1_204_79367_20130110_223433_outLine +BABEL_OP1_204_79505_20130223_203535_inLine +BABEL_OP1_204_79505_20130223_203535_outLine +BABEL_OP1_204_80069_20130310_201210_inLine +BABEL_OP1_204_80439_20130115_225051_inLine +BABEL_OP1_204_80439_20130115_225051_outLine +BABEL_OP1_204_81213_20130114_221437_inLine +BABEL_OP1_204_81213_20130114_221437_outLine +BABEL_OP1_204_81622_20130130_223905_inLine +BABEL_OP1_204_81622_20130130_223905_outLine +BABEL_OP1_204_81810_20130319_043547_inLine +BABEL_OP1_204_81810_20130319_043547_outLine +BABEL_OP1_204_82425_20130108_181556_inLine +BABEL_OP1_204_82425_20130108_181556_outLine +BABEL_OP1_204_82935_20130208_135243_inLine +BABEL_OP1_204_82935_20130208_135243_outLine +BABEL_OP1_204_82979_20130103_191447_inLine +BABEL_OP1_204_82979_20130103_191447_outLine +BABEL_OP1_204_83394_20130313_005013_inLine +BABEL_OP1_204_83394_20130313_005013_outLine +BABEL_OP1_204_83430_20130603_202255_inLine +BABEL_OP1_204_83430_20130603_202255_outLine +BABEL_OP1_204_83455_20130119_213254_inLine +BABEL_OP1_204_83455_20130119_213254_outLine +BABEL_OP1_204_83771_20130625_172000_inLine +BABEL_OP1_204_83771_20130625_172000_outLine +BABEL_OP1_204_84055_20130228_202242_inLine +BABEL_OP1_204_84055_20130228_202242_outLine +BABEL_OP1_204_84077_20130812_184211_inLine +BABEL_OP1_204_84077_20130812_184211_outLine +BABEL_OP1_204_84430_20130817_164608_inLine +BABEL_OP1_204_84430_20130817_164608_outLine +BABEL_OP1_204_84430_20130901_201534_inLine +BABEL_OP1_204_84430_20130901_201534_outLine +BABEL_OP1_204_84466_20130220_015953_inLine +BABEL_OP1_204_84466_20130220_015953_outLine +BABEL_OP1_204_84583_20130122_032028_outLine +BABEL_OP1_204_84715_20130225_194321_inLine +BABEL_OP1_204_84715_20130225_194321_outLine +BABEL_OP1_204_85010_20130531_160005_inLine +BABEL_OP1_204_85010_20130531_160005_outLine +BABEL_OP1_204_85028_20130301_204938_inLine +BABEL_OP1_204_85028_20130301_222343_inLine +BABEL_OP1_204_85331_20130310_030345_inLine +BABEL_OP1_204_85331_20130310_030345_outLine +BABEL_OP1_204_85331_20130310_033244_inLine +BABEL_OP1_204_85331_20130310_033244_outLine +BABEL_OP1_204_85647_20130120_023041_inLine +BABEL_OP1_204_85647_20130120_023041_outLine +BABEL_OP1_204_86433_20130126_230445_inLine +BABEL_OP1_204_86433_20130126_230445_outLine +BABEL_OP1_204_86715_20130313_002453_inLine +BABEL_OP1_204_86715_20130313_002453_outLine +BABEL_OP1_204_86715_20130313_003416_inLine +BABEL_OP1_204_86715_20130313_003416_outLine +BABEL_OP1_204_86891_20130605_215220_inLine +BABEL_OP1_204_86891_20130605_215220_outLine +BABEL_OP1_204_87073_20121220_221057_inLine +BABEL_OP1_204_87073_20121220_221057_outLine +BABEL_OP1_204_87073_20121220_221600_inLine +BABEL_OP1_204_87073_20121220_221600_outLine +BABEL_OP1_204_87073_20121220_222957_inLine +BABEL_OP1_204_87073_20121220_222957_outLine +BABEL_OP1_204_87305_20130515_233922_inLine +BABEL_OP1_204_87305_20130515_233922_outLine +BABEL_OP1_204_88445_20130129_191832_inLine +BABEL_OP1_204_88445_20130129_191832_outLine +BABEL_OP1_204_88673_20130705_173732_inLine +BABEL_OP1_204_88673_20130705_173732_outLine +BABEL_OP1_204_88865_20130707_151620_inLine +BABEL_OP1_204_88865_20130707_151620_outLine +BABEL_OP1_204_89695_20130130_001218_inLine +BABEL_OP1_204_89695_20130130_001218_outLine +BABEL_OP1_204_89877_20130129_192538_inLine +BABEL_OP1_204_89877_20130129_192538_outLine +BABEL_OP1_204_90347_20130124_030740_inLine +BABEL_OP1_204_90347_20130124_030740_outLine +BABEL_OP1_204_90709_20130107_232337_inLine +BABEL_OP1_204_90709_20130107_232337_outLine +BABEL_OP1_204_91760_20130618_160303_inLine +BABEL_OP1_204_91760_20130618_160303_outLine +BABEL_OP1_204_92605_20130518_145958_inLine +BABEL_OP1_204_92605_20130518_145958_outLine +BABEL_OP1_204_92809_20130116_171026_inLine +BABEL_OP1_204_92809_20130116_171026_outLine +BABEL_OP1_204_92942_20130127_233540_inLine +BABEL_OP1_204_92942_20130127_233540_outLine +BABEL_OP1_204_93222_20130604_000913_inLine +BABEL_OP1_204_93222_20130604_000913_outLine +BABEL_OP1_204_93469_20130302_033019_inLine +BABEL_OP1_204_93469_20130302_033019_outLine +BABEL_OP1_204_93490_20130209_033837_inLine +BABEL_OP1_204_93490_20130209_033837_outLine +BABEL_OP1_204_93490_20130209_140440_inLine +BABEL_OP1_204_93490_20130209_140440_outLine +BABEL_OP1_204_93681_20130901_204636_inLine +BABEL_OP1_204_93681_20130901_204636_outLine +BABEL_OP1_204_94442_20130617_164306_inLine +BABEL_OP1_204_94442_20130617_164306_outLine +BABEL_OP1_204_95028_20130518_173442_inLine +BABEL_OP1_204_95028_20130518_173442_outLine +BABEL_OP1_204_95446_20130225_185013_inLine +BABEL_OP1_204_95446_20130225_185013_outLine +BABEL_OP1_204_95663_20121221_214944_inLine +BABEL_OP1_204_95663_20121221_214944_outLine +BABEL_OP1_204_95942_20130215_204023_inLine +BABEL_OP1_204_95942_20130215_204023_outLine +BABEL_OP1_204_96158_20130721_235954_inLine +BABEL_OP1_204_96158_20130721_235954_outLine +BABEL_OP1_204_96190_20130116_041341_inLine +BABEL_OP1_204_96190_20130116_041341_outLine +BABEL_OP1_204_96247_20130319_165606_inLine +BABEL_OP1_204_96247_20130319_165606_outLine +BABEL_OP1_204_96690_20130129_191200_inLine +BABEL_OP1_204_96690_20130129_191200_outLine +BABEL_OP1_204_96730_20130225_193316_inLine +BABEL_OP1_204_96730_20130225_193316_outLine +BABEL_OP1_204_96808_20130617_185713_inLine +BABEL_OP1_204_96808_20130617_185713_outLine +BABEL_OP1_204_97063_20130227_185803_inLine +BABEL_OP1_204_97063_20130227_185803_outLine +BABEL_OP1_204_97063_20130306_232138_inLine +BABEL_OP1_204_97063_20130306_232138_outLine +BABEL_OP1_204_97220_20130310_023745_inLine +BABEL_OP1_204_97220_20130310_023745_outLine +BABEL_OP1_204_97376_20130128_213930_inLine +BABEL_OP1_204_97376_20130128_213930_outLine +BABEL_OP1_204_97461_20130127_014703_inLine +BABEL_OP1_204_97461_20130127_014703_outLine +BABEL_OP1_204_97461_20130127_015849_inLine +BABEL_OP1_204_97461_20130127_015849_outLine +BABEL_OP1_204_97772_20121218_224525_inLine +BABEL_OP1_204_97772_20121218_224525_outLine +BABEL_OP1_204_98365_20130224_175209_inLine +BABEL_OP1_204_98365_20130224_175209_outLine +BABEL_OP1_204_99289_20130215_210617_inLine +BABEL_OP1_204_99289_20130215_210617_outLine +BABEL_OP1_204_99401_20130108_180622_inLine +BABEL_OP1_204_99401_20130108_180622_outLine +BABEL_OP1_204_99594_20130126_192710_inLine +BABEL_OP1_204_99594_20130126_192710_outLine +BABEL_OP1_204_99887_20130210_212207_inLine +BABEL_OP1_204_99887_20130210_212207_outLine diff --git a/egs/babel/s5d/conf/lists/204-tamil/train.untranscribed.list b/egs/babel/s5d/conf/lists/204-tamil/train.untranscribed.list new file mode 100644 index 00000000000..cacb28a9b83 --- /dev/null +++ b/egs/babel/s5d/conf/lists/204-tamil/train.untranscribed.list @@ -0,0 +1,269 @@ +BABEL_OP1_204_10416_20130129_214039_outLine +BABEL_OP1_204_10464_20130816_191819_inLine +BABEL_OP1_204_10464_20130816_191819_outLine +BABEL_OP1_204_11528_20130611_211620_inLine +BABEL_OP1_204_11528_20130611_211620_outLine +BABEL_OP1_204_11859_20130313_032533_outLine +BABEL_OP1_204_12767_20130116_025609_outLine +BABEL_OP1_204_13126_20130217_010703_inLine +BABEL_OP1_204_13126_20130217_010703_outLine +BABEL_OP1_204_13178_20130325_020355_outLine +BABEL_OP1_204_13547_20130726_181100_inLine +BABEL_OP1_204_14097_20130815_163903_inLine +BABEL_OP1_204_14097_20130815_163903_outLine +BABEL_OP1_204_14137_20130111_210406_inLine +BABEL_OP1_204_14137_20130111_210406_outLine +BABEL_OP1_204_14560_20130325_002021_outLine +BABEL_OP1_204_14814_20130109_222610_outLine +BABEL_OP1_204_15024_20130312_175432_outLine +BABEL_OP1_204_15024_20130312_180805_outLine +BABEL_OP1_204_15216_20130212_014230_outLine +BABEL_OP1_204_15322_20130301_005753_outLine +BABEL_OP1_204_15869_20130818_163437_inLine +BABEL_OP1_204_15869_20130818_163437_outLine +BABEL_OP1_204_16149_20130116_033842_outLine +BABEL_OP1_204_16475_20130119_031738_outLine +BABEL_OP1_204_16839_20130215_160016_outLine +BABEL_OP1_204_16886_20130121_034643_outLine +BABEL_OP1_204_16938_20130121_204111_outLine +BABEL_OP1_204_17573_20130210_015840_outLine +BABEL_OP1_204_17751_20130313_054734_inLine +BABEL_OP1_204_18297_20130302_224344_inLine +BABEL_OP1_204_18297_20130302_224344_outLine +BABEL_OP1_204_18490_20130729_180159_inLine +BABEL_OP1_204_19120_20130216_232255_inLine +BABEL_OP1_204_19444_20130726_202328_inLine +BABEL_OP1_204_19767_20130227_011601_outLine +BABEL_OP1_204_20922_20130207_205901_inLine +BABEL_OP1_204_20972_20130426_122452_outLine +BABEL_OP1_204_21315_20130310_055422_inLine +BABEL_OP1_204_21624_20130219_005819_outLine +BABEL_OP1_204_22591_20130817_190345_inLine +BABEL_OP1_204_22591_20130817_190345_outLine +BABEL_OP1_204_22612_20130209_232523_outLine +BABEL_OP1_204_22918_20130228_021314_outLine +BABEL_OP1_204_23893_20130223_170306_inLine +BABEL_OP1_204_24209_20130814_213938_inLine +BABEL_OP1_204_24209_20130814_213938_outLine +BABEL_OP1_204_24501_20130217_012457_inLine +BABEL_OP1_204_24532_20121227_175136_outLine +BABEL_OP1_204_24586_20130217_014206_inLine +BABEL_OP1_204_24924_20130311_043001_outLine +BABEL_OP1_204_24982_20121228_191618_outLine +BABEL_OP1_204_25719_20130209_012505_outLine +BABEL_OP1_204_25961_20130107_180739_outLine +BABEL_OP1_204_26072_20130227_193336_inLine +BABEL_OP1_204_26836_20121228_170007_outLine +BABEL_OP1_204_28190_20130209_194352_inLine +BABEL_OP1_204_28190_20130225_194934_inLine +BABEL_OP1_204_28600_20130209_182228_outLine +BABEL_OP1_204_28644_20130724_180414_inLine +BABEL_OP1_204_29230_20130311_030639_outLine +BABEL_OP1_204_29563_20130724_172019_inLine +BABEL_OP1_204_29563_20130724_172019_outLine +BABEL_OP1_204_30253_20130216_045613_outLine +BABEL_OP1_204_31184_20130124_204831_outLine +BABEL_OP1_204_31346_20130216_053626_outLine +BABEL_OP1_204_32148_20130217_164600_inLine +BABEL_OP1_204_32148_20130217_164600_outLine +BABEL_OP1_204_32301_20130129_184613_outLine +BABEL_OP1_204_32861_20130227_173658_outLine +BABEL_OP1_204_32959_20130209_030319_outLine +BABEL_OP1_204_33704_20130226_220031_outLine +BABEL_OP1_204_33933_20130829_222537_inLine +BABEL_OP1_204_33933_20130829_222537_outLine +BABEL_OP1_204_34410_20130611_194642_inLine +BABEL_OP1_204_34410_20130611_194642_outLine +BABEL_OP1_204_34477_20130120_004221_inLine +BABEL_OP1_204_34477_20130120_010034_inLine +BABEL_OP1_204_34826_20130226_192804_inLine +BABEL_OP1_204_34899_20130311_034756_outLine +BABEL_OP1_204_35838_20130725_195132_inLine +BABEL_OP1_204_36341_20130107_025830_outLine +BABEL_OP1_204_36642_20130617_150620_outLine +BABEL_OP1_204_37064_20121227_220816_outLine +BABEL_OP1_204_37285_20130129_031728_inLine +BABEL_OP1_204_37285_20130129_031728_outLine +BABEL_OP1_204_38431_20130205_201344_outLine +BABEL_OP1_204_38878_20130124_001536_inLine +BABEL_OP1_204_40092_20130813_200028_inLine +BABEL_OP1_204_40092_20130813_200028_outLine +BABEL_OP1_204_40740_20130216_225837_outLine +BABEL_OP1_204_41542_20130225_183730_inLine +BABEL_OP1_204_41745_20130116_005714_outLine +BABEL_OP1_204_43115_20130302_023125_inLine +BABEL_OP1_204_43784_20121227_190820_outLine +BABEL_OP1_204_44446_20130523_123230_inLine +BABEL_OP1_204_44446_20130523_123230_outLine +BABEL_OP1_204_44477_20130121_193451_outLine +BABEL_OP1_204_44709_20130319_032435_inLine +BABEL_OP1_204_44709_20130319_032435_outLine +BABEL_OP1_204_45851_20130227_013648_inLine +BABEL_OP1_204_46389_20130814_160916_inLine +BABEL_OP1_204_46389_20130814_160916_outLine +BABEL_OP1_204_46389_20130814_161827_inLine +BABEL_OP1_204_46389_20130814_161827_outLine +BABEL_OP1_204_46808_20130829_232458_inLine +BABEL_OP1_204_46808_20130829_232458_outLine +BABEL_OP1_204_46974_20130129_181636_outLine +BABEL_OP1_204_47110_20130815_155025_inLine +BABEL_OP1_204_47110_20130815_155025_outLine +BABEL_OP1_204_48907_20130228_232925_outLine +BABEL_OP1_204_49001_20121228_172935_outLine +BABEL_OP1_204_49330_20130805_162032_inLine +BABEL_OP1_204_49330_20130805_162032_outLine +BABEL_OP1_204_49739_20130726_173931_inLine +BABEL_OP1_204_50175_20121222_205817_inLine +BABEL_OP1_204_50175_20121222_205817_outLine +BABEL_OP1_204_51015_20130130_013728_outLine +BABEL_OP1_204_51858_20130521_175757_inLine +BABEL_OP1_204_52381_20130224_210437_inLine +BABEL_OP1_204_52381_20130224_210437_outLine +BABEL_OP1_204_52404_20130119_200928_outLine +BABEL_OP1_204_52442_20130120_233503_inLine +BABEL_OP1_204_52442_20130120_233503_outLine +BABEL_OP1_204_52499_20130825_162347_inLine +BABEL_OP1_204_52499_20130825_162347_outLine +BABEL_OP1_204_53842_20130122_230928_outLine +BABEL_OP1_204_54046_20130209_030752_inLine +BABEL_OP1_204_54530_20130217_020357_inLine +BABEL_OP1_204_54953_20130224_201532_inLine +BABEL_OP1_204_55013_20130301_025827_inLine +BABEL_OP1_204_55013_20130301_025827_outLine +BABEL_OP1_204_55902_20130520_170810_inLine +BABEL_OP1_204_56370_20130107_193415_outLine +BABEL_OP1_204_56523_20130120_185614_inLine +BABEL_OP1_204_56523_20130120_185614_outLine +BABEL_OP1_204_56523_20130120_190444_inLine +BABEL_OP1_204_56523_20130120_190444_outLine +BABEL_OP1_204_56523_20130126_235544_inLine +BABEL_OP1_204_56523_20130126_235544_outLine +BABEL_OP1_204_56684_20130208_181923_inLine +BABEL_OP1_204_57141_20130216_214557_inLine +BABEL_OP1_204_57650_20130518_200728_outLine +BABEL_OP1_204_58585_20130225_194900_inLine +BABEL_OP1_204_58585_20130225_194900_outLine +BABEL_OP1_204_59163_20130829_230137_inLine +BABEL_OP1_204_59163_20130829_230137_outLine +BABEL_OP1_204_59549_20130116_224253_outLine +BABEL_OP1_204_59645_20130122_004956_inLine +BABEL_OP1_204_60436_20130303_010341_inLine +BABEL_OP1_204_60436_20130303_010341_outLine +BABEL_OP1_204_60474_20130111_204951_inLine +BABEL_OP1_204_60538_20130107_001630_outLine +BABEL_OP1_204_60626_20130104_165746_outLine +BABEL_OP1_204_61040_20130226_221158_outLine +BABEL_OP1_204_61873_20130128_002947_outLine +BABEL_OP1_204_62200_20130121_031957_inLine +BABEL_OP1_204_62200_20130121_031957_outLine +BABEL_OP1_204_62430_20130221_003525_inLine +BABEL_OP1_204_63604_20121231_023648_inLine +BABEL_OP1_204_63604_20121231_024400_inLine +BABEL_OP1_204_63787_20130115_003402_inLine +BABEL_OP1_204_63787_20130115_003402_outLine +BABEL_OP1_204_64494_20130118_033516_outLine +BABEL_OP1_204_64796_20130101_031431_inLine +BABEL_OP1_204_65477_20130121_031004_inLine +BABEL_OP1_204_66001_20130110_231018_outLine +BABEL_OP1_204_66001_20130110_232622_outLine +BABEL_OP1_204_66959_20130224_165508_inLine +BABEL_OP1_204_67622_20121224_014023_inLine +BABEL_OP1_204_69153_20130304_135528_inLine +BABEL_OP1_204_70452_20130111_164540_inLine +BABEL_OP1_204_70726_20130825_192242_inLine +BABEL_OP1_204_70726_20130825_192242_outLine +BABEL_OP1_204_71047_20130226_192147_inLine +BABEL_OP1_204_71047_20130226_192147_outLine +BABEL_OP1_204_72007_20130130_020438_outLine +BABEL_OP1_204_73258_20130129_221752_inLine +BABEL_OP1_204_73485_20130226_020310_inLine +BABEL_OP1_204_73485_20130226_020310_outLine +BABEL_OP1_204_75764_20130227_174139_inLine +BABEL_OP1_204_75764_20130227_174139_outLine +BABEL_OP1_204_76218_20130131_023737_outLine +BABEL_OP1_204_76444_20130613_172917_inLine +BABEL_OP1_204_76499_20130401_153504_inLine +BABEL_OP1_204_76499_20130401_153504_outLine +BABEL_OP1_204_77112_20130103_183710_outLine +BABEL_OP1_204_77126_20130110_175103_outLine +BABEL_OP1_204_77146_20121224_223748_outLine +BABEL_OP1_204_77567_20130103_223440_outLine +BABEL_OP1_204_78398_20130103_204208_inLine +BABEL_OP1_204_78398_20130103_204208_outLine +BABEL_OP1_204_78544_20130119_181147_outLine +BABEL_OP1_204_78630_20130116_034919_inLine +BABEL_OP1_204_78833_20130726_170037_inLine +BABEL_OP1_204_78943_20130109_215659_outLine +BABEL_OP1_204_79139_20130130_193601_inLine +BABEL_OP1_204_79190_20130127_012553_outLine +BABEL_OP1_204_79451_20121227_183417_outLine +BABEL_OP1_204_79820_20130104_184214_inLine +BABEL_OP1_204_79820_20130104_184214_outLine +BABEL_OP1_204_79973_20130729_211226_inLine +BABEL_OP1_204_79973_20130729_211226_outLine +BABEL_OP1_204_80559_20130103_205745_outLine +BABEL_OP1_204_81671_20130217_195401_outLine +BABEL_OP1_204_81971_20121225_005045_outLine +BABEL_OP1_204_82035_20130120_205546_inLine +BABEL_OP1_204_82138_20130328_213639_inLine +BABEL_OP1_204_82140_20130328_220209_inLine +BABEL_OP1_204_82140_20130328_220209_outLine +BABEL_OP1_204_82622_20121230_013735_inLine +BABEL_OP1_204_82622_20121230_013735_outLine +BABEL_OP1_204_82966_20130217_024614_outLine +BABEL_OP1_204_83609_20130716_211644_inLine +BABEL_OP1_204_83609_20130716_211644_outLine +BABEL_OP1_204_84605_20130114_234516_inLine +BABEL_OP1_204_84609_20130726_193719_inLine +BABEL_OP1_204_84737_20130614_151624_inLine +BABEL_OP1_204_85047_20130328_012807_outLine +BABEL_OP1_204_85260_20130822_000133_inLine +BABEL_OP1_204_85260_20130822_000133_outLine +BABEL_OP1_204_86467_20121224_182636_inLine +BABEL_OP1_204_86467_20121224_182636_outLine +BABEL_OP1_204_86628_20130516_235050_inLine +BABEL_OP1_204_87629_20130122_042941_inLine +BABEL_OP1_204_87629_20130122_042941_outLine +BABEL_OP1_204_87629_20130124_021257_inLine +BABEL_OP1_204_87629_20130124_021257_outLine +BABEL_OP1_204_88674_20130729_204202_inLine +BABEL_OP1_204_89059_20130224_201925_inLine +BABEL_OP1_204_89226_20130825_175510_inLine +BABEL_OP1_204_89226_20130825_175510_outLine +BABEL_OP1_204_89560_20130222_171412_outLine +BABEL_OP1_204_89560_20130222_172629_outLine +BABEL_OP1_204_89575_20130227_020958_outLine +BABEL_OP1_204_90318_20130825_173403_inLine +BABEL_OP1_204_90318_20130825_173403_outLine +BABEL_OP1_204_90572_20130221_011543_inLine +BABEL_OP1_204_90572_20130221_011543_outLine +BABEL_OP1_204_91372_20130311_005543_inLine +BABEL_OP1_204_91478_20130531_193258_inLine +BABEL_OP1_204_91478_20130531_193258_outLine +BABEL_OP1_204_92527_20130119_222341_outLine +BABEL_OP1_204_92792_20130225_210332_inLine +BABEL_OP1_204_92792_20130225_210332_outLine +BABEL_OP1_204_93007_20130628_153139_inLine +BABEL_OP1_204_93007_20130628_153139_outLine +BABEL_OP1_204_93153_20130108_171639_outLine +BABEL_OP1_204_93861_20130120_204242_inLine +BABEL_OP1_204_93861_20130120_210020_inLine +BABEL_OP1_204_94253_20130116_032205_inLine +BABEL_OP1_204_94333_20130110_220709_outLine +BABEL_OP1_204_94449_20130226_025646_outLine +BABEL_OP1_204_94666_20130122_132253_inLine +BABEL_OP1_204_95490_20130112_211544_outLine +BABEL_OP1_204_95677_20130818_153821_inLine +BABEL_OP1_204_95677_20130818_153821_outLine +BABEL_OP1_204_95750_20130830_003827_inLine +BABEL_OP1_204_95750_20130830_003827_outLine +BABEL_OP1_204_95966_20130131_013244_outLine +BABEL_OP1_204_96376_20130311_011036_inLine +BABEL_OP1_204_96405_20130104_164913_inLine +BABEL_OP1_204_96405_20130104_164913_outLine +BABEL_OP1_204_96985_20121231_002917_inLine +BABEL_OP1_204_97097_20130322_004237_inLine +BABEL_OP1_204_97264_20130216_205659_outLine +BABEL_OP1_204_97460_20130126_211058_outLine +BABEL_OP1_204_97557_20130123_172926_inLine +BABEL_OP1_204_99920_20130102_191548_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/dev.list b/egs/babel/s5d/conf/lists/205-kurmanji/dev.list new file mode 100644 index 00000000000..168081362fa --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/dev.list @@ -0,0 +1,132 @@ +BABEL_OP2_205_10019_20130330_212743_inLine +BABEL_OP2_205_10019_20130330_212743_outLine +BABEL_OP2_205_10319_20130304_201724_inLine +BABEL_OP2_205_10319_20130304_201724_outLine +BABEL_OP2_205_11096_20130410_000324_inLine +BABEL_OP2_205_11096_20130410_000324_outLine +BABEL_OP2_205_12036_20130315_061649_inLine +BABEL_OP2_205_12036_20130315_061649_outLine +BABEL_OP2_205_13792_20130307_054343_inLine +BABEL_OP2_205_13792_20130307_054343_outLine +BABEL_OP2_205_14229_20130325_212616_inLine +BABEL_OP2_205_14229_20130325_212616_outLine +BABEL_OP2_205_14440_20130327_213643_inLine +BABEL_OP2_205_14440_20130327_213643_outLine +BABEL_OP2_205_15216_20130406_215019_inLine +BABEL_OP2_205_15216_20130406_215019_outLine +BABEL_OP2_205_15216_20130406_215856_inLine +BABEL_OP2_205_15216_20130406_215856_outLine +BABEL_OP2_205_15638_20130331_200208_inLine +BABEL_OP2_205_15638_20130331_200208_outLine +BABEL_OP2_205_15730_20130303_011735_inLine +BABEL_OP2_205_15730_20130303_011735_outLine +BABEL_OP2_205_15848_20130228_192452_inLine +BABEL_OP2_205_15848_20130228_192452_outLine +BABEL_OP2_205_16056_20130323_010902_inLine +BABEL_OP2_205_16056_20130323_010902_outLine +BABEL_OP2_205_16787_20130323_072114_inLine +BABEL_OP2_205_16787_20130323_072114_outLine +BABEL_OP2_205_17127_20130407_044210_inLine +BABEL_OP2_205_17127_20130407_044210_outLine +BABEL_OP2_205_19663_20130320_062434_inLine +BABEL_OP2_205_19663_20130320_062434_outLine +BABEL_OP2_205_20454_20140125_002855_inLine +BABEL_OP2_205_20454_20140125_002855_outLine +BABEL_OP2_205_21029_20130313_025506_inLine +BABEL_OP2_205_21029_20130313_025506_outLine +BABEL_OP2_205_22288_20131228_021559_inLine +BABEL_OP2_205_22965_20130318_011526_inLine +BABEL_OP2_205_22965_20130318_011526_outLine +BABEL_OP2_205_23151_20130415_001434_inLine +BABEL_OP2_205_23151_20130415_001434_outLine +BABEL_OP2_205_23151_20130415_002727_inLine +BABEL_OP2_205_23151_20130415_002727_outLine +BABEL_OP2_205_23260_20130412_034843_inLine +BABEL_OP2_205_23260_20130412_034843_outLine +BABEL_OP2_205_24589_20130327_211515_inLine +BABEL_OP2_205_24589_20130327_211515_outLine +BABEL_OP2_205_26206_20130507_004626_inLine +BABEL_OP2_205_26206_20130507_004626_outLine +BABEL_OP2_205_26999_20130414_220838_inLine +BABEL_OP2_205_26999_20130414_220838_outLine +BABEL_OP2_205_28190_20130409_034344_inLine +BABEL_OP2_205_28190_20130409_034344_outLine +BABEL_OP2_205_28775_20130314_052506_inLine +BABEL_OP2_205_28775_20130314_052506_outLine +BABEL_OP2_205_28871_20130226_041104_inLine +BABEL_OP2_205_28871_20130226_041104_outLine +BABEL_OP2_205_28945_20130315_053607_inLine +BABEL_OP2_205_28945_20130315_053607_outLine +BABEL_OP2_205_29039_20130401_012825_inLine +BABEL_OP2_205_29039_20130401_012825_outLine +BABEL_OP2_205_29135_20130303_025305_inLine +BABEL_OP2_205_29135_20130303_025305_outLine +BABEL_OP2_205_29633_20130413_192214_inLine +BABEL_OP2_205_29633_20130413_192214_outLine +BABEL_OP2_205_29643_20130408_040750_inLine +BABEL_OP2_205_29643_20130408_040750_outLine +BABEL_OP2_205_29777_20130409_004437_inLine +BABEL_OP2_205_29777_20130409_004437_outLine +BABEL_OP2_205_30653_20130505_220845_inLine +BABEL_OP2_205_30653_20130505_220845_outLine +BABEL_OP2_205_31919_20130413_172911_inLine +BABEL_OP2_205_31919_20130413_172911_outLine +BABEL_OP2_205_33251_20130331_025243_inLine +BABEL_OP2_205_33251_20130331_025243_outLine +BABEL_OP2_205_34336_20130325_005404_inLine +BABEL_OP2_205_34336_20130325_005404_outLine +BABEL_OP2_205_35069_20130407_022433_inLine +BABEL_OP2_205_35069_20130407_022433_outLine +BABEL_OP2_205_35069_20130407_023338_inLine +BABEL_OP2_205_35069_20130407_023338_outLine +BABEL_OP2_205_35583_20130408_183143_inLine +BABEL_OP2_205_35583_20130408_183143_outLine +BABEL_OP2_205_35788_20131231_021724_inLine +BABEL_OP2_205_35788_20131231_021724_outLine +BABEL_OP2_205_36219_20130324_013816_inLine +BABEL_OP2_205_36219_20130324_013816_outLine +BABEL_OP2_205_36219_20130324_015535_inLine +BABEL_OP2_205_36219_20130324_015535_outLine +BABEL_OP2_205_36293_20130302_213235_inLine +BABEL_OP2_205_36293_20130302_213235_outLine +BABEL_OP2_205_41097_20130406_012211_inLine +BABEL_OP2_205_41097_20130406_012211_outLine +BABEL_OP2_205_44868_20130330_223802_inLine +BABEL_OP2_205_44868_20130330_223802_outLine +BABEL_OP2_205_45699_20140126_003136_inLine +BABEL_OP2_205_45699_20140126_003136_outLine +BABEL_OP2_205_46535_20140108_201338_inLine +BABEL_OP2_205_46535_20140108_201338_outLine +BABEL_OP2_205_50565_20130304_002644_inLine +BABEL_OP2_205_50565_20130304_002644_outLine +BABEL_OP2_205_51540_20130407_040411_inLine +BABEL_OP2_205_51540_20130407_040411_outLine +BABEL_OP2_205_51540_20130407_042258_inLine +BABEL_OP2_205_51540_20130407_042258_outLine +BABEL_OP2_205_54046_20130409_011916_inLine +BABEL_OP2_205_54046_20130409_011916_outLine +BABEL_OP2_205_54735_20131228_012336_inLine +BABEL_OP2_205_54735_20131228_012336_outLine +BABEL_OP2_205_60830_20131223_005744_inLine +BABEL_OP2_205_72903_20131225_002056_inLine +BABEL_OP2_205_72903_20131225_002056_outLine +BABEL_OP2_205_77225_20140106_235541_inLine +BABEL_OP2_205_77225_20140106_235541_outLine +BABEL_OP2_205_78360_20140123_011434_inLine +BABEL_OP2_205_78360_20140123_011434_outLine +BABEL_OP2_205_79139_20130621_004019_inLine +BABEL_OP2_205_79139_20130621_004019_outLine +BABEL_OP2_205_86830_20130413_224330_inLine +BABEL_OP2_205_86830_20130413_224330_outLine +BABEL_OP2_205_86830_20130413_225657_inLine +BABEL_OP2_205_86830_20130413_225657_outLine +BABEL_OP2_205_92060_20130413_223434_inLine +BABEL_OP2_205_92060_20130413_223434_outLine +BABEL_OP2_205_92643_20130413_053627_inLine +BABEL_OP2_205_92643_20130413_053627_outLine +BABEL_OP2_205_95399_20131222_015121_inLine +BABEL_OP2_205_95399_20131222_015121_outLine +BABEL_OP2_205_96808_20130412_211621_inLine +BABEL_OP2_205_96808_20130412_211621_outLine +BABEL_OP2_205_97136_20130525_003505_inLine +BABEL_OP2_205_97136_20130525_003505_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/eval.list b/egs/babel/s5d/conf/lists/205-kurmanji/eval.list new file mode 100644 index 00000000000..e0ceeb8f70d --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/eval.list @@ -0,0 +1,193 @@ +BABEL_OP2_205_10188_20130301_060141_inLine +BABEL_OP2_205_10188_20130301_060141_outLine +BABEL_OP2_205_10416_20130623_000709_inLine +BABEL_OP2_205_10416_20130623_000709_outLine +BABEL_OP2_205_11419_20140124_203146_inLine +BABEL_OP2_205_11419_20140124_203146_outLine +BABEL_OP2_205_13040_20130312_094024_inLine +BABEL_OP2_205_13040_20130312_094024_outLine +BABEL_OP2_205_13427_20130315_071728_inLine +BABEL_OP2_205_13427_20130315_071728_outLine +BABEL_OP2_205_13427_20130315_075858_inLine +BABEL_OP2_205_13427_20130315_075858_outLine +BABEL_OP2_205_14179_20130401_220334_inLine +BABEL_OP2_205_14179_20130401_220334_outLine +BABEL_OP2_205_14537_20130413_045331_inLine +BABEL_OP2_205_14537_20130413_045331_outLine +BABEL_OP2_205_14560_20130408_183055_inLine +BABEL_OP2_205_14560_20130408_183055_outLine +BABEL_OP2_205_15702_20130331_230832_inLine +BABEL_OP2_205_15702_20130331_230832_outLine +BABEL_OP2_205_16184_20130227_050048_inLine +BABEL_OP2_205_16184_20130227_050048_outLine +BABEL_OP2_205_16249_20140124_210751_inLine +BABEL_OP2_205_16249_20140124_210751_outLine +BABEL_OP2_205_16407_20140124_214655_inLine +BABEL_OP2_205_16407_20140124_214655_outLine +BABEL_OP2_205_16601_20130415_195023_inLine +BABEL_OP2_205_16601_20130415_195023_outLine +BABEL_OP2_205_17165_20130620_234702_inLine +BABEL_OP2_205_17165_20130620_234702_outLine +BABEL_OP2_205_17573_20130408_175948_inLine +BABEL_OP2_205_17573_20130408_175948_outLine +BABEL_OP2_205_17890_20130507_001713_inLine +BABEL_OP2_205_17890_20130507_001713_outLine +BABEL_OP2_205_18033_20140124_221028_inLine +BABEL_OP2_205_18033_20140124_221028_outLine +BABEL_OP2_205_18370_20140124_223813_inLine +BABEL_OP2_205_18370_20140124_223813_outLine +BABEL_OP2_205_18863_20130412_202349_inLine +BABEL_OP2_205_18863_20130412_202349_outLine +BABEL_OP2_205_19120_20130506_071138_inLine +BABEL_OP2_205_19120_20130506_071138_outLine +BABEL_OP2_205_19832_20130621_222438_inLine +BABEL_OP2_205_19832_20130621_222438_outLine +BABEL_OP2_205_20330_20130413_042945_inLine +BABEL_OP2_205_20330_20130413_042945_outLine +BABEL_OP2_205_22170_20131101_103425_inLine +BABEL_OP2_205_22170_20131101_103425_outLine +BABEL_OP2_205_22466_20130225_225235_inLine +BABEL_OP2_205_22466_20130225_225235_outLine +BABEL_OP2_205_22466_20130225_225943_inLine +BABEL_OP2_205_22466_20130225_225943_outLine +BABEL_OP2_205_22641_20130304_041448_inLine +BABEL_OP2_205_22641_20130304_041448_outLine +BABEL_OP2_205_23395_20130324_223525_inLine +BABEL_OP2_205_23395_20130324_223525_outLine +BABEL_OP2_205_23628_20130326_051335_inLine +BABEL_OP2_205_23628_20130326_051335_outLine +BABEL_OP2_205_24033_20130406_195331_inLine +BABEL_OP2_205_24033_20130406_195331_outLine +BABEL_OP2_205_24209_20140125_012503_inLine +BABEL_OP2_205_24209_20140125_012503_outLine +BABEL_OP2_205_24924_20130612_193640_inLine +BABEL_OP2_205_24924_20130612_193640_outLine +BABEL_OP2_205_25767_20130316_235631_inLine +BABEL_OP2_205_25767_20130316_235631_outLine +BABEL_OP2_205_26869_20140107_231859_inLine +BABEL_OP2_205_26869_20140107_231859_outLine +BABEL_OP2_205_28585_20130406_222735_inLine +BABEL_OP2_205_28585_20130406_222735_outLine +BABEL_OP2_205_29076_20130318_205813_inLine +BABEL_OP2_205_29076_20130318_205813_outLine +BABEL_OP2_205_29482_20140123_203957_inLine +BABEL_OP2_205_29482_20140123_203957_outLine +BABEL_OP2_205_30250_20130303_023602_inLine +BABEL_OP2_205_30250_20130303_023602_outLine +BABEL_OP2_205_30497_20130412_045747_inLine +BABEL_OP2_205_30497_20130412_045747_outLine +BABEL_OP2_205_31484_20130331_231345_inLine +BABEL_OP2_205_31484_20130331_231345_outLine +BABEL_OP2_205_31979_20130319_081826_inLine +BABEL_OP2_205_31979_20130319_081826_outLine +BABEL_OP2_205_32727_20130413_214408_inLine +BABEL_OP2_205_32727_20130413_214408_outLine +BABEL_OP2_205_33800_20140125_192240_inLine +BABEL_OP2_205_33800_20140125_192240_outLine +BABEL_OP2_205_33992_20130519_062650_inLine +BABEL_OP2_205_33992_20130519_062650_outLine +BABEL_OP2_205_34486_20130518_044858_inLine +BABEL_OP2_205_34486_20130518_044858_outLine +BABEL_OP2_205_34899_20130619_000929_inLine +BABEL_OP2_205_34899_20130619_000929_outLine +BABEL_OP2_205_35786_20130522_020532_inLine +BABEL_OP2_205_35786_20130522_020532_outLine +BABEL_OP2_205_36147_20140125_211617_inLine +BABEL_OP2_205_36147_20140125_211617_outLine +BABEL_OP2_205_37064_20130318_004959_inLine +BABEL_OP2_205_37064_20130318_004959_outLine +BABEL_OP2_205_38139_20130622_053934_inLine +BABEL_OP2_205_38139_20130622_053934_outLine +BABEL_OP2_205_38139_20130622_055315_inLine +BABEL_OP2_205_38139_20130622_055315_outLine +BABEL_OP2_205_38750_20130413_172545_inLine +BABEL_OP2_205_38750_20130413_172545_outLine +BABEL_OP2_205_38750_20130413_173308_inLine +BABEL_OP2_205_38750_20130413_173308_outLine +BABEL_OP2_205_39744_20130301_230818_inLine +BABEL_OP2_205_39744_20130301_230818_outLine +BABEL_OP2_205_41400_20140122_205716_inLine +BABEL_OP2_205_41400_20140122_205716_outLine +BABEL_OP2_205_41682_20140125_234229_inLine +BABEL_OP2_205_41682_20140125_234229_outLine +BABEL_OP2_205_42231_20130415_192437_inLine +BABEL_OP2_205_42231_20130415_192437_outLine +BABEL_OP2_205_42600_20130324_234058_inLine +BABEL_OP2_205_42600_20130324_234058_outLine +BABEL_OP2_205_43074_20130622_063932_inLine +BABEL_OP2_205_43074_20130622_063932_outLine +BABEL_OP2_205_43646_20130227_205147_inLine +BABEL_OP2_205_43646_20130227_205147_outLine +BABEL_OP2_205_44420_20130328_013519_inLine +BABEL_OP2_205_44420_20130328_013519_outLine +BABEL_OP2_205_45106_20130330_013041_inLine +BABEL_OP2_205_45106_20130330_013041_outLine +BABEL_OP2_205_45771_20130518_054435_inLine +BABEL_OP2_205_45771_20130518_054435_outLine +BABEL_OP2_205_45777_20130325_205405_inLine +BABEL_OP2_205_45777_20130325_205405_outLine +BABEL_OP2_205_45843_20130330_060240_inLine +BABEL_OP2_205_45843_20130330_060240_outLine +BABEL_OP2_205_45843_20130330_061029_inLine +BABEL_OP2_205_45843_20130330_061029_outLine +BABEL_OP2_205_46712_20130326_222120_inLine +BABEL_OP2_205_46712_20130326_222120_outLine +BABEL_OP2_205_46974_20130506_235400_inLine +BABEL_OP2_205_46974_20130506_235400_outLine +BABEL_OP2_205_46974_20130507_000125_inLine +BABEL_OP2_205_46974_20130507_000125_outLine +BABEL_OP2_205_47959_20130322_204503_inLine +BABEL_OP2_205_47959_20130322_204503_outLine +BABEL_OP2_205_50958_20130318_044644_inLine +BABEL_OP2_205_50958_20130318_044644_outLine +BABEL_OP2_205_50962_20130321_021704_inLine +BABEL_OP2_205_50962_20130321_021704_outLine +BABEL_OP2_205_51417_20130407_001304_inLine +BABEL_OP2_205_51417_20130407_001304_outLine +BABEL_OP2_205_56213_20130508_055436_inLine +BABEL_OP2_205_56213_20130508_055436_outLine +BABEL_OP2_205_56213_20130508_060404_inLine +BABEL_OP2_205_56213_20130508_060404_outLine +BABEL_OP2_205_57067_20130407_183303_inLine +BABEL_OP2_205_57067_20130407_183303_outLine +BABEL_OP2_205_57922_20130331_195052_inLine +BABEL_OP2_205_57922_20130331_195052_outLine +BABEL_OP2_205_60115_20130330_212943_inLine +BABEL_OP2_205_60115_20130330_212943_outLine +BABEL_OP2_205_62362_20130517_212752_inLine +BABEL_OP2_205_62362_20130517_212752_outLine +BABEL_OP2_205_63265_20131226_003348_inLine +BABEL_OP2_205_63265_20131226_003348_outLine +BABEL_OP2_205_63511_20131224_213929_inLine +BABEL_OP2_205_63511_20131224_213929_outLine +BABEL_OP2_205_65252_20130413_190417_inLine +BABEL_OP2_205_65252_20130413_190417_outLine +BABEL_OP2_205_65339_20131225_232144_inLine +BABEL_OP2_205_65339_20131225_232144_outLine +BABEL_OP2_205_70726_20140112_003521_inLine +BABEL_OP2_205_70726_20140112_003521_outLine +BABEL_OP2_205_76902_20140123_211702_inLine +BABEL_OP2_205_76902_20140123_211702_outLine +BABEL_OP2_205_78161_20140124_012828_inLine +BABEL_OP2_205_78161_20140124_012828_outLine +BABEL_OP2_205_78958_20140105_000039_inLine +BABEL_OP2_205_78958_20140105_000039_outLine +BABEL_OP2_205_81229_20130316_035102_inLine +BABEL_OP2_205_81229_20130316_035102_outLine +BABEL_OP2_205_85439_20130413_172716_inLine +BABEL_OP2_205_85439_20130413_172716_outLine +BABEL_OP2_205_90440_20140123_225611_inLine +BABEL_OP2_205_91930_20130413_193923_inLine +BABEL_OP2_205_91930_20130413_193923_outLine +BABEL_OP2_205_92698_20130622_032618_inLine +BABEL_OP2_205_92698_20130622_032618_outLine +BABEL_OP2_205_94141_20140118_223253_inLine +BABEL_OP2_205_94141_20140118_223253_outLine +BABEL_OP2_205_95966_20131224_023420_inLine +BABEL_OP2_205_96584_20130408_014557_inLine +BABEL_OP2_205_96584_20130408_014557_outLine +BABEL_OP2_205_96940_20140123_220447_inLine +BABEL_OP2_205_96940_20140123_220447_outLine +BABEL_OP2_205_97988_20130414_061145_inLine +BABEL_OP2_205_97988_20130414_061145_outLine +BABEL_OP2_205_98580_20131223_014628_inLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/evalpart1.list b/egs/babel/s5d/conf/lists/205-kurmanji/evalpart1.list new file mode 100644 index 00000000000..ff7234650d1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/evalpart1.list @@ -0,0 +1,63 @@ +BABEL_OP2_205_13040_20130312_094024_inLine +BABEL_OP2_205_13040_20130312_094024_outLine +BABEL_OP2_205_13427_20130315_071728_inLine +BABEL_OP2_205_13427_20130315_071728_outLine +BABEL_OP2_205_13427_20130315_075858_inLine +BABEL_OP2_205_13427_20130315_075858_outLine +BABEL_OP2_205_16184_20130227_050048_inLine +BABEL_OP2_205_16184_20130227_050048_outLine +BABEL_OP2_205_17165_20130620_234702_inLine +BABEL_OP2_205_17165_20130620_234702_outLine +BABEL_OP2_205_17573_20130408_175948_inLine +BABEL_OP2_205_17573_20130408_175948_outLine +BABEL_OP2_205_18863_20130412_202349_inLine +BABEL_OP2_205_18863_20130412_202349_outLine +BABEL_OP2_205_19120_20130506_071138_inLine +BABEL_OP2_205_19120_20130506_071138_outLine +BABEL_OP2_205_23628_20130326_051335_inLine +BABEL_OP2_205_23628_20130326_051335_outLine +BABEL_OP2_205_24033_20130406_195331_inLine +BABEL_OP2_205_24033_20130406_195331_outLine +BABEL_OP2_205_24209_20140125_012503_inLine +BABEL_OP2_205_24209_20140125_012503_outLine +BABEL_OP2_205_24924_20130612_193640_inLine +BABEL_OP2_205_24924_20130612_193640_outLine +BABEL_OP2_205_28585_20130406_222735_inLine +BABEL_OP2_205_28585_20130406_222735_outLine +BABEL_OP2_205_30250_20130303_023602_inLine +BABEL_OP2_205_30250_20130303_023602_outLine +BABEL_OP2_205_34899_20130619_000929_inLine +BABEL_OP2_205_34899_20130619_000929_outLine +BABEL_OP2_205_37064_20130318_004959_inLine +BABEL_OP2_205_37064_20130318_004959_outLine +BABEL_OP2_205_38750_20130413_172545_inLine +BABEL_OP2_205_38750_20130413_172545_outLine +BABEL_OP2_205_38750_20130413_173308_inLine +BABEL_OP2_205_38750_20130413_173308_outLine +BABEL_OP2_205_45106_20130330_013041_inLine +BABEL_OP2_205_45106_20130330_013041_outLine +BABEL_OP2_205_45777_20130325_205405_inLine +BABEL_OP2_205_45777_20130325_205405_outLine +BABEL_OP2_205_47959_20130322_204503_inLine +BABEL_OP2_205_47959_20130322_204503_outLine +BABEL_OP2_205_50958_20130318_044644_inLine +BABEL_OP2_205_50958_20130318_044644_outLine +BABEL_OP2_205_57067_20130407_183303_inLine +BABEL_OP2_205_57067_20130407_183303_outLine +BABEL_OP2_205_57922_20130331_195052_inLine +BABEL_OP2_205_57922_20130331_195052_outLine +BABEL_OP2_205_63511_20131224_213929_inLine +BABEL_OP2_205_63511_20131224_213929_outLine +BABEL_OP2_205_65339_20131225_232144_inLine +BABEL_OP2_205_65339_20131225_232144_outLine +BABEL_OP2_205_81229_20130316_035102_inLine +BABEL_OP2_205_81229_20130316_035102_outLine +BABEL_OP2_205_85439_20130413_172716_inLine +BABEL_OP2_205_85439_20130413_172716_outLine +BABEL_OP2_205_91930_20130413_193923_inLine +BABEL_OP2_205_91930_20130413_193923_outLine +BABEL_OP2_205_92698_20130622_032618_inLine +BABEL_OP2_205_92698_20130622_032618_outLine +BABEL_OP2_205_94141_20140118_223253_inLine +BABEL_OP2_205_94141_20140118_223253_outLine +BABEL_OP2_205_98580_20131223_014628_inLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.list b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.list new file mode 100644 index 00000000000..022ddf05869 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.list @@ -0,0 +1,133 @@ +BABEL_OP2_205_10184_20130315_054426_inLine +BABEL_OP2_205_10184_20130315_054426_outLine +BABEL_OP2_205_10647_20130413_190550_inLine +BABEL_OP2_205_10647_20130413_190550_outLine +BABEL_OP2_205_12220_20130323_002310_inLine +BABEL_OP2_205_12220_20130323_002310_outLine +BABEL_OP2_205_14807_20130326_065101_inLine +BABEL_OP2_205_14807_20130326_065101_outLine +BABEL_OP2_205_14807_20130326_070339_inLine +BABEL_OP2_205_14807_20130326_070339_outLine +BABEL_OP2_205_14875_20130319_211742_inLine +BABEL_OP2_205_14875_20130319_211742_outLine +BABEL_OP2_205_14875_20130319_213338_inLine +BABEL_OP2_205_14875_20130319_213338_outLine +BABEL_OP2_205_14929_20131223_022753_inLine +BABEL_OP2_205_15535_20130506_195619_inLine +BABEL_OP2_205_15535_20130506_195619_outLine +BABEL_OP2_205_17881_20130413_190631_inLine +BABEL_OP2_205_17881_20130413_190631_outLine +BABEL_OP2_205_17881_20130413_191638_inLine +BABEL_OP2_205_17881_20130413_191638_outLine +BABEL_OP2_205_17914_20130407_235720_inLine +BABEL_OP2_205_17914_20130407_235720_outLine +BABEL_OP2_205_18766_20130413_033911_inLine +BABEL_OP2_205_18766_20130413_033911_outLine +BABEL_OP2_205_19134_20130331_195936_inLine +BABEL_OP2_205_19134_20130331_195936_outLine +BABEL_OP2_205_19749_20130406_231234_inLine +BABEL_OP2_205_19749_20130406_231234_outLine +BABEL_OP2_205_20800_20130408_015430_inLine +BABEL_OP2_205_20800_20130408_015430_outLine +BABEL_OP2_205_20916_20130228_200116_inLine +BABEL_OP2_205_20916_20130228_200116_outLine +BABEL_OP2_205_21206_20130312_205638_inLine +BABEL_OP2_205_21206_20130312_205638_outLine +BABEL_OP2_205_22321_20130308_042214_inLine +BABEL_OP2_205_22321_20130308_042214_outLine +BABEL_OP2_205_23092_20130413_181637_inLine +BABEL_OP2_205_23092_20130413_181637_outLine +BABEL_OP2_205_23893_20140123_003759_inLine +BABEL_OP2_205_23893_20140123_003759_outLine +BABEL_OP2_205_24239_20130415_171824_inLine +BABEL_OP2_205_24239_20130415_171824_outLine +BABEL_OP2_205_24290_20130414_221432_inLine +BABEL_OP2_205_24290_20130414_221432_outLine +BABEL_OP2_205_24323_20130326_051101_inLine +BABEL_OP2_205_24323_20130326_051101_outLine +BABEL_OP2_205_24605_20130311_012103_inLine +BABEL_OP2_205_24605_20130311_012103_outLine +BABEL_OP2_205_25085_20130612_023620_inLine +BABEL_OP2_205_25085_20130612_023620_outLine +BABEL_OP2_205_26574_20130509_203057_inLine +BABEL_OP2_205_26574_20130509_203057_outLine +BABEL_OP2_205_26602_20130412_235831_inLine +BABEL_OP2_205_26602_20130412_235831_outLine +BABEL_OP2_205_28477_20130412_234819_inLine +BABEL_OP2_205_28477_20130412_234819_outLine +BABEL_OP2_205_28522_20130401_211215_inLine +BABEL_OP2_205_28522_20130401_211215_outLine +BABEL_OP2_205_31039_20140125_023755_inLine +BABEL_OP2_205_31039_20140125_023755_outLine +BABEL_OP2_205_32630_20130412_054815_inLine +BABEL_OP2_205_32630_20130412_054815_outLine +BABEL_OP2_205_33355_20130311_214515_inLine +BABEL_OP2_205_33355_20130311_214515_outLine +BABEL_OP2_205_33840_20130507_012940_inLine +BABEL_OP2_205_33840_20130507_012940_outLine +BABEL_OP2_205_34106_20130301_221919_inLine +BABEL_OP2_205_34106_20130301_221919_outLine +BABEL_OP2_205_34197_20130302_231101_inLine +BABEL_OP2_205_34197_20130302_231101_outLine +BABEL_OP2_205_34647_20140125_205318_inLine +BABEL_OP2_205_34647_20140125_205318_outLine +BABEL_OP2_205_37853_20130413_005407_inLine +BABEL_OP2_205_37853_20130413_005407_outLine +BABEL_OP2_205_38554_20130301_085606_inLine +BABEL_OP2_205_38554_20130301_085606_outLine +BABEL_OP2_205_38664_20130325_030156_inLine +BABEL_OP2_205_38664_20130325_030156_outLine +BABEL_OP2_205_38963_20131227_202341_inLine +BABEL_OP2_205_38963_20131227_202341_outLine +BABEL_OP2_205_39059_20130414_033146_inLine +BABEL_OP2_205_39059_20130414_033146_outLine +BABEL_OP2_205_39059_20130414_034411_inLine +BABEL_OP2_205_39059_20130414_034411_outLine +BABEL_OP2_205_40196_20140125_222906_inLine +BABEL_OP2_205_40196_20140125_222906_outLine +BABEL_OP2_205_41618_20130312_214004_inLine +BABEL_OP2_205_41618_20130312_214004_outLine +BABEL_OP2_205_41741_20130326_004056_inLine +BABEL_OP2_205_41741_20130326_004056_outLine +BABEL_OP2_205_42619_20130325_002736_inLine +BABEL_OP2_205_42619_20130325_002736_outLine +BABEL_OP2_205_43368_20130329_211826_inLine +BABEL_OP2_205_43368_20130329_211826_outLine +BABEL_OP2_205_43368_20130329_212612_inLine +BABEL_OP2_205_43368_20130329_212612_outLine +BABEL_OP2_205_45121_20130412_035841_inLine +BABEL_OP2_205_45121_20130412_035841_outLine +BABEL_OP2_205_46315_20130506_231421_inLine +BABEL_OP2_205_46315_20130506_231421_outLine +BABEL_OP2_205_49118_20130412_210858_inLine +BABEL_OP2_205_49118_20130412_210858_outLine +BABEL_OP2_205_49118_20130412_211622_inLine +BABEL_OP2_205_49118_20130412_211622_outLine +BABEL_OP2_205_50745_20130505_195625_inLine +BABEL_OP2_205_50745_20130505_195625_outLine +BABEL_OP2_205_53415_20131216_223652_inLine +BABEL_OP2_205_53415_20131216_223652_outLine +BABEL_OP2_205_58026_20131219_010750_inLine +BABEL_OP2_205_58026_20131219_010750_outLine +BABEL_OP2_205_58821_20130415_190958_inLine +BABEL_OP2_205_58821_20130415_190958_outLine +BABEL_OP2_205_59645_20130619_190548_inLine +BABEL_OP2_205_59645_20130619_190548_outLine +BABEL_OP2_205_62200_20130405_021524_inLine +BABEL_OP2_205_62200_20130405_021524_outLine +BABEL_OP2_205_62289_20140122_214709_inLine +BABEL_OP2_205_62289_20140122_214709_outLine +BABEL_OP2_205_70716_20130413_193114_inLine +BABEL_OP2_205_70716_20130413_193114_outLine +BABEL_OP2_205_77242_20130616_015950_inLine +BABEL_OP2_205_77242_20130616_015950_outLine +BABEL_OP2_205_84605_20130319_203823_inLine +BABEL_OP2_205_84605_20130319_203823_outLine +BABEL_OP2_205_84737_20130407_054058_inLine +BABEL_OP2_205_84737_20130407_054058_outLine +BABEL_OP2_205_84936_20130405_063301_inLine +BABEL_OP2_205_84936_20130405_063301_outLine +BABEL_OP2_205_86826_20130411_224207_inLine +BABEL_OP2_205_86826_20130411_224207_outLine +BABEL_OP2_205_90760_20130612_022556_inLine +BABEL_OP2_205_90760_20130612_022556_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.untranscribed.list new file mode 100644 index 00000000000..89ee0b28779 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/sub-train.untranscribed.list @@ -0,0 +1,399 @@ +BABEL_OP2_205_10036_20130325_212656_inLine +BABEL_OP2_205_10036_20130325_212656_outLine +BABEL_OP2_205_10482_20130330_232812_inLine +BABEL_OP2_205_10482_20130330_232812_outLine +BABEL_OP2_205_10638_20140122_201207_inLine +BABEL_OP2_205_10638_20140122_201207_outLine +BABEL_OP2_205_10938_20130402_021742_inLine +BABEL_OP2_205_10938_20130402_021742_outLine +BABEL_OP2_205_10966_20130324_203837_inLine +BABEL_OP2_205_10966_20130324_203837_outLine +BABEL_OP2_205_11352_20130505_190427_inLine +BABEL_OP2_205_11352_20130505_190427_outLine +BABEL_OP2_205_11581_20130317_071927_inLine +BABEL_OP2_205_11581_20130317_071927_outLine +BABEL_OP2_205_11663_20130402_031747_inLine +BABEL_OP2_205_11663_20130402_031747_outLine +BABEL_OP2_205_11797_20130307_233702_inLine +BABEL_OP2_205_11797_20130307_233702_outLine +BABEL_OP2_205_11797_20130307_235053_inLine +BABEL_OP2_205_11797_20130307_235053_outLine +BABEL_OP2_205_12635_20130406_230527_inLine +BABEL_OP2_205_12635_20130406_230527_outLine +BABEL_OP2_205_13030_20130330_234019_inLine +BABEL_OP2_205_13030_20130330_234019_outLine +BABEL_OP2_205_13189_20130413_230649_inLine +BABEL_OP2_205_13189_20130413_230649_outLine +BABEL_OP2_205_13324_20130318_043359_inLine +BABEL_OP2_205_13324_20130318_043359_outLine +BABEL_OP2_205_13744_20130302_055938_inLine +BABEL_OP2_205_13744_20130302_055938_outLine +BABEL_OP2_205_14137_20130326_212737_inLine +BABEL_OP2_205_14137_20130326_212737_outLine +BABEL_OP2_205_14539_20130413_020822_inLine +BABEL_OP2_205_14539_20130413_020822_outLine +BABEL_OP2_205_14729_20130526_024319_inLine +BABEL_OP2_205_14729_20130526_024319_outLine +BABEL_OP2_205_14814_20130326_062123_inLine +BABEL_OP2_205_14814_20130326_062123_outLine +BABEL_OP2_205_14899_20130303_062436_inLine +BABEL_OP2_205_14899_20130303_062436_outLine +BABEL_OP2_205_14972_20130312_213702_inLine +BABEL_OP2_205_14972_20130312_213702_outLine +BABEL_OP2_205_15024_20131222_033424_inLine +BABEL_OP2_205_15024_20131222_033424_outLine +BABEL_OP2_205_15227_20130412_005202_inLine +BABEL_OP2_205_15227_20130412_005202_outLine +BABEL_OP2_205_15382_20130325_075405_inLine +BABEL_OP2_205_15382_20130325_075405_outLine +BABEL_OP2_205_16839_20130407_052530_inLine +BABEL_OP2_205_16839_20130407_052530_outLine +BABEL_OP2_205_16886_20130326_054927_inLine +BABEL_OP2_205_16886_20130326_054927_outLine +BABEL_OP2_205_16924_20130331_232254_inLine +BABEL_OP2_205_16924_20130331_232254_outLine +BABEL_OP2_205_17320_20130413_054847_inLine +BABEL_OP2_205_17320_20130413_054847_outLine +BABEL_OP2_205_17440_20130413_195207_inLine +BABEL_OP2_205_17440_20130413_195207_outLine +BABEL_OP2_205_17472_20130508_013928_inLine +BABEL_OP2_205_17472_20130508_013928_outLine +BABEL_OP2_205_17496_20130414_215325_inLine +BABEL_OP2_205_17496_20130414_215325_outLine +BABEL_OP2_205_17520_20130312_074120_inLine +BABEL_OP2_205_17520_20130312_074120_outLine +BABEL_OP2_205_17615_20130407_234405_inLine +BABEL_OP2_205_17615_20130407_234405_outLine +BABEL_OP2_205_18242_20130408_005657_inLine +BABEL_OP2_205_18242_20130408_005657_outLine +BABEL_OP2_205_18291_20130618_004811_inLine +BABEL_OP2_205_18291_20130618_004811_outLine +BABEL_OP2_205_18566_20130505_173829_inLine +BABEL_OP2_205_18566_20130505_173829_outLine +BABEL_OP2_205_19589_20130413_203154_inLine +BABEL_OP2_205_19589_20130413_203154_outLine +BABEL_OP2_205_19703_20130325_042858_inLine +BABEL_OP2_205_19703_20130325_042858_outLine +BABEL_OP2_205_19722_20130306_045231_inLine +BABEL_OP2_205_19722_20130306_045231_outLine +BABEL_OP2_205_20133_20130228_055409_inLine +BABEL_OP2_205_20133_20130228_055409_outLine +BABEL_OP2_205_20922_20130406_225439_inLine +BABEL_OP2_205_20922_20130406_225439_outLine +BABEL_OP2_205_20985_20130401_025757_inLine +BABEL_OP2_205_20985_20130401_025757_outLine +BABEL_OP2_205_21004_20130408_222653_inLine +BABEL_OP2_205_21004_20130408_222653_outLine +BABEL_OP2_205_21435_20130414_044944_inLine +BABEL_OP2_205_21435_20130414_044944_outLine +BABEL_OP2_205_21543_20140125_004741_inLine +BABEL_OP2_205_21543_20140125_004741_outLine +BABEL_OP2_205_21807_20130324_054526_inLine +BABEL_OP2_205_21892_20130507_023354_inLine +BABEL_OP2_205_21892_20130507_023354_outLine +BABEL_OP2_205_22446_20130309_073946_outLine +BABEL_OP2_205_22494_20130331_230611_inLine +BABEL_OP2_205_22494_20130331_230611_outLine +BABEL_OP2_205_22624_20130331_012106_inLine +BABEL_OP2_205_22624_20130331_012106_outLine +BABEL_OP2_205_22629_20131231_223232_inLine +BABEL_OP2_205_22629_20131231_223232_outLine +BABEL_OP2_205_22918_20130413_043023_inLine +BABEL_OP2_205_22918_20130413_043023_outLine +BABEL_OP2_205_22918_20130413_044543_inLine +BABEL_OP2_205_22918_20130413_044543_outLine +BABEL_OP2_205_23006_20130322_202429_inLine +BABEL_OP2_205_23006_20130322_202429_outLine +BABEL_OP2_205_23046_20130327_010653_inLine +BABEL_OP2_205_23046_20130327_010653_outLine +BABEL_OP2_205_23190_20130323_014750_inLine +BABEL_OP2_205_23190_20130323_014750_outLine +BABEL_OP2_205_23239_20130331_034518_inLine +BABEL_OP2_205_23239_20130331_034518_outLine +BABEL_OP2_205_23752_20140123_024924_inLine +BABEL_OP2_205_23752_20140123_024924_outLine +BABEL_OP2_205_24253_20130505_214600_inLine +BABEL_OP2_205_24253_20130505_214600_outLine +BABEL_OP2_205_24270_20130406_070358_inLine +BABEL_OP2_205_24270_20130406_070358_outLine +BABEL_OP2_205_24470_20130406_021646_inLine +BABEL_OP2_205_24470_20130406_021646_outLine +BABEL_OP2_205_24532_20130227_052040_inLine +BABEL_OP2_205_24532_20130227_052040_outLine +BABEL_OP2_205_24569_20130508_235213_inLine +BABEL_OP2_205_24569_20130508_235213_outLine +BABEL_OP2_205_24679_20130303_043753_inLine +BABEL_OP2_205_24679_20130303_043753_outLine +BABEL_OP2_205_25719_20130406_231631_inLine +BABEL_OP2_205_25719_20130406_231631_outLine +BABEL_OP2_205_25719_20130406_232555_inLine +BABEL_OP2_205_25719_20130406_232555_outLine +BABEL_OP2_205_25719_20130406_233313_inLine +BABEL_OP2_205_25719_20130406_233313_outLine +BABEL_OP2_205_25961_20130305_063202_inLine +BABEL_OP2_205_25961_20130305_063202_outLine +BABEL_OP2_205_26381_20140125_015707_inLine +BABEL_OP2_205_26381_20140125_015707_outLine +BABEL_OP2_205_26388_20130330_021001_inLine +BABEL_OP2_205_26388_20130330_021001_outLine +BABEL_OP2_205_26507_20131101_103425_inLine +BABEL_OP2_205_26507_20131101_103425_outLine +BABEL_OP2_205_27125_20130227_061700_inLine +BABEL_OP2_205_27125_20130227_061700_outLine +BABEL_OP2_205_27189_20140104_001032_inLine +BABEL_OP2_205_27189_20140104_001032_outLine +BABEL_OP2_205_27203_20130331_021946_inLine +BABEL_OP2_205_27203_20130331_021946_outLine +BABEL_OP2_205_27590_20130506_201921_inLine +BABEL_OP2_205_27590_20130506_201921_outLine +BABEL_OP2_205_27841_20130414_222155_inLine +BABEL_OP2_205_27841_20130414_222155_outLine +BABEL_OP2_205_28012_20130507_054019_inLine +BABEL_OP2_205_28012_20130507_054019_outLine +BABEL_OP2_205_28419_20130320_202136_inLine +BABEL_OP2_205_28419_20130320_202136_outLine +BABEL_OP2_205_29023_20130314_060343_inLine +BABEL_OP2_205_29023_20130314_060343_outLine +BABEL_OP2_205_29323_20130414_230355_inLine +BABEL_OP2_205_29323_20130414_230355_outLine +BABEL_OP2_205_29404_20130414_214714_inLine +BABEL_OP2_205_29404_20130414_214714_outLine +BABEL_OP2_205_29439_20130413_182356_inLine +BABEL_OP2_205_29439_20130413_182356_outLine +BABEL_OP2_205_30013_20130401_005939_inLine +BABEL_OP2_205_30013_20130401_005939_outLine +BABEL_OP2_205_30180_20130323_005331_inLine +BABEL_OP2_205_30180_20130323_005331_outLine +BABEL_OP2_205_30395_20130316_060814_inLine +BABEL_OP2_205_30395_20130316_060814_outLine +BABEL_OP2_205_30432_20130330_200303_inLine +BABEL_OP2_205_30432_20130330_200303_outLine +BABEL_OP2_205_30869_20130412_202311_inLine +BABEL_OP2_205_30869_20130412_202311_outLine +BABEL_OP2_205_31109_20130619_181905_inLine +BABEL_OP2_205_31109_20130619_181905_outLine +BABEL_OP2_205_31346_20130507_204621_inLine +BABEL_OP2_205_31346_20130507_204621_outLine +BABEL_OP2_205_32097_20130301_034527_inLine +BABEL_OP2_205_32097_20130301_034527_outLine +BABEL_OP2_205_32122_20130321_004623_inLine +BABEL_OP2_205_32122_20130321_004623_outLine +BABEL_OP2_205_32122_20130321_010341_inLine +BABEL_OP2_205_32122_20130321_010341_outLine +BABEL_OP2_205_32244_20130412_190534_inLine +BABEL_OP2_205_32244_20130412_190534_outLine +BABEL_OP2_205_32837_20130507_011223_inLine +BABEL_OP2_205_32837_20130507_011223_outLine +BABEL_OP2_205_33229_20130414_213157_inLine +BABEL_OP2_205_33229_20130414_213157_outLine +BABEL_OP2_205_33273_20130320_040141_inLine +BABEL_OP2_205_33273_20130320_040141_outLine +BABEL_OP2_205_33424_20130412_193538_inLine +BABEL_OP2_205_33424_20130412_193538_outLine +BABEL_OP2_205_33476_20130405_051711_inLine +BABEL_OP2_205_33476_20130405_051711_outLine +BABEL_OP2_205_33497_20130619_220728_inLine +BABEL_OP2_205_33497_20130619_220728_outLine +BABEL_OP2_205_33913_20130414_052534_inLine +BABEL_OP2_205_33913_20130414_052534_outLine +BABEL_OP2_205_33951_20130619_212409_inLine +BABEL_OP2_205_33951_20130619_212409_outLine +BABEL_OP2_205_34586_20140125_203417_inLine +BABEL_OP2_205_34586_20140125_203417_outLine +BABEL_OP2_205_34903_20130406_055051_inLine +BABEL_OP2_205_34903_20130406_055051_outLine +BABEL_OP2_205_35139_20130312_070415_inLine +BABEL_OP2_205_35139_20130312_070415_outLine +BABEL_OP2_205_35143_20130414_203900_inLine +BABEL_OP2_205_35143_20130414_203900_outLine +BABEL_OP2_205_35181_20130413_201739_inLine +BABEL_OP2_205_35181_20130413_201739_outLine +BABEL_OP2_205_36642_20130413_013238_inLine +BABEL_OP2_205_36642_20130413_013238_outLine +BABEL_OP2_205_37228_20130407_205807_inLine +BABEL_OP2_205_37228_20130407_205807_outLine +BABEL_OP2_205_37271_20130507_231712_inLine +BABEL_OP2_205_37271_20130507_231712_outLine +BABEL_OP2_205_37285_20130401_061737_inLine +BABEL_OP2_205_37285_20130401_061737_outLine +BABEL_OP2_205_37290_20130405_070403_inLine +BABEL_OP2_205_37290_20130405_070403_outLine +BABEL_OP2_205_37598_20130405_034853_inLine +BABEL_OP2_205_37598_20130405_034853_outLine +BABEL_OP2_205_37682_20130325_022952_inLine +BABEL_OP2_205_37682_20130325_022952_outLine +BABEL_OP2_205_37776_20140125_220835_inLine +BABEL_OP2_205_37776_20140125_220835_outLine +BABEL_OP2_205_38340_20130315_063442_inLine +BABEL_OP2_205_38340_20130315_063442_outLine +BABEL_OP2_205_38689_20130414_233704_inLine +BABEL_OP2_205_38689_20130414_233704_outLine +BABEL_OP2_205_38741_20130315_071146_inLine +BABEL_OP2_205_38741_20130315_071146_outLine +BABEL_OP2_205_38878_20130406_202135_inLine +BABEL_OP2_205_38878_20130406_202135_outLine +BABEL_OP2_205_39555_20130507_025010_inLine +BABEL_OP2_205_39555_20130507_025010_outLine +BABEL_OP2_205_40557_20130413_185709_inLine +BABEL_OP2_205_40557_20130413_185709_outLine +BABEL_OP2_205_40557_20130413_190849_inLine +BABEL_OP2_205_40557_20130413_190849_outLine +BABEL_OP2_205_40565_20130401_015506_inLine +BABEL_OP2_205_40565_20130401_015506_outLine +BABEL_OP2_205_40939_20140125_231452_inLine +BABEL_OP2_205_40939_20140125_231452_outLine +BABEL_OP2_205_41038_20130405_060002_inLine +BABEL_OP2_205_41038_20130405_060002_outLine +BABEL_OP2_205_41174_20130318_033313_inLine +BABEL_OP2_205_41174_20130318_033313_outLine +BABEL_OP2_205_42834_20130414_202256_inLine +BABEL_OP2_205_42834_20130414_202256_outLine +BABEL_OP2_205_42991_20130401_024013_inLine +BABEL_OP2_205_42991_20130401_024013_outLine +BABEL_OP2_205_42991_20130401_025044_inLine +BABEL_OP2_205_42991_20130401_025044_outLine +BABEL_OP2_205_43286_20130304_044510_inLine +BABEL_OP2_205_43286_20130304_044510_outLine +BABEL_OP2_205_43788_20130331_024429_inLine +BABEL_OP2_205_43788_20130331_024429_outLine +BABEL_OP2_205_43788_20130331_030508_inLine +BABEL_OP2_205_43788_20130331_030508_outLine +BABEL_OP2_205_44847_20130325_055635_inLine +BABEL_OP2_205_44847_20130325_055635_outLine +BABEL_OP2_205_45235_20130509_011826_inLine +BABEL_OP2_205_45235_20130509_011826_outLine +BABEL_OP2_205_45374_20140126_000904_inLine +BABEL_OP2_205_45374_20140126_000904_outLine +BABEL_OP2_205_46041_20130507_202255_inLine +BABEL_OP2_205_46041_20130507_202255_outLine +BABEL_OP2_205_46589_20130331_014535_inLine +BABEL_OP2_205_46589_20130331_014535_outLine +BABEL_OP2_205_46757_20130401_191649_inLine +BABEL_OP2_205_46757_20130401_191649_outLine +BABEL_OP2_205_46976_20131104_051409_inLine +BABEL_OP2_205_46976_20131104_051409_outLine +BABEL_OP2_205_47110_20140126_005953_inLine +BABEL_OP2_205_47110_20140126_005953_outLine +BABEL_OP2_205_47451_20130408_195325_inLine +BABEL_OP2_205_47451_20130408_195325_outLine +BABEL_OP2_205_47487_20130328_060026_inLine +BABEL_OP2_205_47487_20130328_060026_outLine +BABEL_OP2_205_47823_20130330_204952_inLine +BABEL_OP2_205_47823_20130330_204952_outLine +BABEL_OP2_205_47878_20130319_211057_inLine +BABEL_OP2_205_47878_20130319_211057_outLine +BABEL_OP2_205_48422_20130407_020759_inLine +BABEL_OP2_205_48422_20130407_020759_outLine +BABEL_OP2_205_49287_20130327_053930_inLine +BABEL_OP2_205_49287_20130327_053930_outLine +BABEL_OP2_205_49630_20130401_013908_inLine +BABEL_OP2_205_49630_20130401_013908_outLine +BABEL_OP2_205_49768_20130330_025558_inLine +BABEL_OP2_205_49768_20130330_025558_outLine +BABEL_OP2_205_50186_20140126_012415_inLine +BABEL_OP2_205_50186_20140126_012415_outLine +BABEL_OP2_205_50779_20130320_043549_inLine +BABEL_OP2_205_50779_20130320_043549_outLine +BABEL_OP2_205_50779_20130320_044244_inLine +BABEL_OP2_205_50779_20130320_044244_outLine +BABEL_OP2_205_51015_20130401_202255_inLine +BABEL_OP2_205_51015_20130401_202255_outLine +BABEL_OP2_205_52246_20130323_232916_inLine +BABEL_OP2_205_52246_20130323_232916_outLine +BABEL_OP2_205_52490_20130326_051608_inLine +BABEL_OP2_205_52490_20130326_051608_outLine +BABEL_OP2_205_53063_20130508_051415_inLine +BABEL_OP2_205_53063_20130508_051415_outLine +BABEL_OP2_205_53441_20140126_015538_inLine +BABEL_OP2_205_53441_20140126_015538_outLine +BABEL_OP2_205_53758_20131228_000238_inLine +BABEL_OP2_205_53758_20131228_000238_outLine +BABEL_OP2_205_54104_20130323_222459_inLine +BABEL_OP2_205_54104_20130323_222459_outLine +BABEL_OP2_205_54827_20130414_030516_inLine +BABEL_OP2_205_54827_20130414_030516_outLine +BABEL_OP2_205_54841_20130414_225855_inLine +BABEL_OP2_205_54841_20130414_225855_outLine +BABEL_OP2_205_54953_20130317_013652_inLine +BABEL_OP2_205_54953_20130317_013652_outLine +BABEL_OP2_205_56198_20130321_041358_inLine +BABEL_OP2_205_56198_20130321_041358_outLine +BABEL_OP2_205_56925_20140126_023234_inLine +BABEL_OP2_205_56925_20140126_023234_outLine +BABEL_OP2_205_57065_20130407_232501_inLine +BABEL_OP2_205_57065_20130407_232501_outLine +BABEL_OP2_205_57678_20130323_232415_inLine +BABEL_OP2_205_57678_20130323_232415_outLine +BABEL_OP2_205_57935_20130322_224501_inLine +BABEL_OP2_205_57935_20130322_224501_outLine +BABEL_OP2_205_59078_20130406_075721_inLine +BABEL_OP2_205_59078_20130406_075721_outLine +BABEL_OP2_205_59635_20130406_225014_inLine +BABEL_OP2_205_59635_20130406_225014_outLine +BABEL_OP2_205_60282_20140107_024858_inLine +BABEL_OP2_205_60282_20140107_024858_outLine +BABEL_OP2_205_60436_20130413_200129_inLine +BABEL_OP2_205_60436_20130413_200129_outLine +BABEL_OP2_205_61440_20130411_231312_inLine +BABEL_OP2_205_61440_20130411_231312_outLine +BABEL_OP2_205_61971_20130413_052620_inLine +BABEL_OP2_205_61971_20130413_052620_outLine +BABEL_OP2_205_62014_20130329_225214_inLine +BABEL_OP2_205_62014_20130329_225214_outLine +BABEL_OP2_205_62360_20140122_233956_inLine +BABEL_OP2_205_62810_20130304_075632_inLine +BABEL_OP2_205_62810_20130304_075632_outLine +BABEL_OP2_205_63084_20130405_025236_inLine +BABEL_OP2_205_63084_20130405_025236_outLine +BABEL_OP2_205_63787_20130310_001339_inLine +BABEL_OP2_205_63787_20130310_001339_outLine +BABEL_OP2_205_63920_20131226_014831_inLine +BABEL_OP2_205_64688_20131226_232545_inLine +BABEL_OP2_205_64688_20131226_232545_outLine +BABEL_OP2_205_66971_20130413_002731_inLine +BABEL_OP2_205_66971_20130413_002731_outLine +BABEL_OP2_205_67964_20140122_221653_inLine +BABEL_OP2_205_67964_20140122_221653_outLine +BABEL_OP2_205_68289_20130407_225726_inLine +BABEL_OP2_205_68289_20130407_225726_outLine +BABEL_OP2_205_68748_20130330_225712_inLine +BABEL_OP2_205_68748_20130330_225712_outLine +BABEL_OP2_205_70452_20130328_011715_inLine +BABEL_OP2_205_70452_20130328_011715_outLine +BABEL_OP2_205_70713_20131129_235040_inLine +BABEL_OP2_205_74799_20130407_030553_inLine +BABEL_OP2_205_74799_20130407_030553_outLine +BABEL_OP2_205_76683_20130331_201352_inLine +BABEL_OP2_205_76683_20130331_201352_outLine +BABEL_OP2_205_78254_20130323_051609_inLine +BABEL_OP2_205_78254_20130323_051609_outLine +BABEL_OP2_205_80559_20130323_224458_inLine +BABEL_OP2_205_80559_20130323_224458_outLine +BABEL_OP2_205_81149_20130412_061213_inLine +BABEL_OP2_205_81149_20130412_061213_outLine +BABEL_OP2_205_82138_20130622_210458_inLine +BABEL_OP2_205_82138_20130622_210458_outLine +BABEL_OP2_205_86191_20130323_060631_inLine +BABEL_OP2_205_86191_20130323_060631_outLine +BABEL_OP2_205_86433_20130325_084312_inLine +BABEL_OP2_205_86433_20130325_084312_outLine +BABEL_OP2_205_86676_20130331_014116_inLine +BABEL_OP2_205_86676_20130331_014116_outLine +BABEL_OP2_205_86715_20130618_002759_inLine +BABEL_OP2_205_86715_20130618_002759_outLine +BABEL_OP2_205_91336_20130622_230929_inLine +BABEL_OP2_205_91336_20130622_230929_outLine +BABEL_OP2_205_92605_20140123_032518_inLine +BABEL_OP2_205_92605_20140123_032518_outLine +BABEL_OP2_205_93964_20130623_014819_inLine +BABEL_OP2_205_93964_20130623_014819_outLine +BABEL_OP2_205_94891_20140123_222847_inLine +BABEL_OP2_205_94891_20140123_222847_outLine +BABEL_OP2_205_94978_20131126_045451_inLine +BABEL_OP2_205_94978_20131126_045451_outLine +BABEL_OP2_205_96376_20140120_211321_inLine +BABEL_OP2_205_96376_20140120_211321_outLine +BABEL_OP2_205_97772_20130301_071555_inLine +BABEL_OP2_205_97772_20130301_071555_outLine +BABEL_OP2_205_99594_20130320_070531_inLine +BABEL_OP2_205_99594_20130320_070531_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/training.list b/egs/babel/s5d/conf/lists/205-kurmanji/training.list new file mode 100644 index 00000000000..6f50b091eff --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/training.list @@ -0,0 +1,532 @@ +BABEL_OP2_205_10036_20130325_212656_inLine +BABEL_OP2_205_10036_20130325_212656_outLine +BABEL_OP2_205_10184_20130315_054426_inLine +BABEL_OP2_205_10184_20130315_054426_outLine +BABEL_OP2_205_10482_20130330_232812_inLine +BABEL_OP2_205_10482_20130330_232812_outLine +BABEL_OP2_205_10638_20140122_201207_inLine +BABEL_OP2_205_10638_20140122_201207_outLine +BABEL_OP2_205_10647_20130413_190550_inLine +BABEL_OP2_205_10647_20130413_190550_outLine +BABEL_OP2_205_10938_20130402_021742_inLine +BABEL_OP2_205_10938_20130402_021742_outLine +BABEL_OP2_205_10966_20130324_203837_inLine +BABEL_OP2_205_10966_20130324_203837_outLine +BABEL_OP2_205_11352_20130505_190427_inLine +BABEL_OP2_205_11352_20130505_190427_outLine +BABEL_OP2_205_11581_20130317_071927_inLine +BABEL_OP2_205_11581_20130317_071927_outLine +BABEL_OP2_205_11663_20130402_031747_inLine +BABEL_OP2_205_11663_20130402_031747_outLine +BABEL_OP2_205_11797_20130307_233702_inLine +BABEL_OP2_205_11797_20130307_233702_outLine +BABEL_OP2_205_11797_20130307_235053_inLine +BABEL_OP2_205_11797_20130307_235053_outLine +BABEL_OP2_205_12220_20130323_002310_inLine +BABEL_OP2_205_12220_20130323_002310_outLine +BABEL_OP2_205_12635_20130406_230527_inLine +BABEL_OP2_205_12635_20130406_230527_outLine +BABEL_OP2_205_13030_20130330_234019_inLine +BABEL_OP2_205_13030_20130330_234019_outLine +BABEL_OP2_205_13189_20130413_230649_inLine +BABEL_OP2_205_13189_20130413_230649_outLine +BABEL_OP2_205_13324_20130318_043359_inLine +BABEL_OP2_205_13324_20130318_043359_outLine +BABEL_OP2_205_13744_20130302_055938_inLine +BABEL_OP2_205_13744_20130302_055938_outLine +BABEL_OP2_205_14137_20130326_212737_inLine +BABEL_OP2_205_14137_20130326_212737_outLine +BABEL_OP2_205_14539_20130413_020822_inLine +BABEL_OP2_205_14539_20130413_020822_outLine +BABEL_OP2_205_14729_20130526_024319_inLine +BABEL_OP2_205_14729_20130526_024319_outLine +BABEL_OP2_205_14807_20130326_065101_inLine +BABEL_OP2_205_14807_20130326_065101_outLine +BABEL_OP2_205_14807_20130326_070339_inLine +BABEL_OP2_205_14807_20130326_070339_outLine +BABEL_OP2_205_14814_20130326_062123_inLine +BABEL_OP2_205_14814_20130326_062123_outLine +BABEL_OP2_205_14875_20130319_211742_inLine +BABEL_OP2_205_14875_20130319_211742_outLine +BABEL_OP2_205_14875_20130319_213338_inLine +BABEL_OP2_205_14875_20130319_213338_outLine +BABEL_OP2_205_14899_20130303_062436_inLine +BABEL_OP2_205_14899_20130303_062436_outLine +BABEL_OP2_205_14929_20131223_022753_inLine +BABEL_OP2_205_14972_20130312_213702_inLine +BABEL_OP2_205_14972_20130312_213702_outLine +BABEL_OP2_205_15024_20131222_033424_inLine +BABEL_OP2_205_15024_20131222_033424_outLine +BABEL_OP2_205_15227_20130412_005202_inLine +BABEL_OP2_205_15227_20130412_005202_outLine +BABEL_OP2_205_15382_20130325_075405_inLine +BABEL_OP2_205_15382_20130325_075405_outLine +BABEL_OP2_205_15535_20130506_195619_inLine +BABEL_OP2_205_15535_20130506_195619_outLine +BABEL_OP2_205_16839_20130407_052530_inLine +BABEL_OP2_205_16839_20130407_052530_outLine +BABEL_OP2_205_16886_20130326_054927_inLine +BABEL_OP2_205_16886_20130326_054927_outLine +BABEL_OP2_205_16924_20130331_232254_inLine +BABEL_OP2_205_16924_20130331_232254_outLine +BABEL_OP2_205_17320_20130413_054847_inLine +BABEL_OP2_205_17320_20130413_054847_outLine +BABEL_OP2_205_17440_20130413_195207_inLine +BABEL_OP2_205_17440_20130413_195207_outLine +BABEL_OP2_205_17472_20130508_013928_inLine +BABEL_OP2_205_17472_20130508_013928_outLine +BABEL_OP2_205_17496_20130414_215325_inLine +BABEL_OP2_205_17496_20130414_215325_outLine +BABEL_OP2_205_17520_20130312_074120_inLine +BABEL_OP2_205_17520_20130312_074120_outLine +BABEL_OP2_205_17615_20130407_234405_inLine +BABEL_OP2_205_17615_20130407_234405_outLine +BABEL_OP2_205_17881_20130413_190631_inLine +BABEL_OP2_205_17881_20130413_190631_outLine +BABEL_OP2_205_17881_20130413_191638_inLine +BABEL_OP2_205_17881_20130413_191638_outLine +BABEL_OP2_205_17914_20130407_235720_inLine +BABEL_OP2_205_17914_20130407_235720_outLine +BABEL_OP2_205_18242_20130408_005657_inLine +BABEL_OP2_205_18242_20130408_005657_outLine +BABEL_OP2_205_18291_20130618_004811_inLine +BABEL_OP2_205_18291_20130618_004811_outLine +BABEL_OP2_205_18566_20130505_173829_inLine +BABEL_OP2_205_18566_20130505_173829_outLine +BABEL_OP2_205_18766_20130413_033911_inLine +BABEL_OP2_205_18766_20130413_033911_outLine +BABEL_OP2_205_19134_20130331_195936_inLine +BABEL_OP2_205_19134_20130331_195936_outLine +BABEL_OP2_205_19589_20130413_203154_inLine +BABEL_OP2_205_19589_20130413_203154_outLine +BABEL_OP2_205_19703_20130325_042858_inLine +BABEL_OP2_205_19703_20130325_042858_outLine +BABEL_OP2_205_19722_20130306_045231_inLine +BABEL_OP2_205_19722_20130306_045231_outLine +BABEL_OP2_205_19749_20130406_231234_inLine +BABEL_OP2_205_19749_20130406_231234_outLine +BABEL_OP2_205_20133_20130228_055409_inLine +BABEL_OP2_205_20133_20130228_055409_outLine +BABEL_OP2_205_20800_20130408_015430_inLine +BABEL_OP2_205_20800_20130408_015430_outLine +BABEL_OP2_205_20916_20130228_200116_inLine +BABEL_OP2_205_20916_20130228_200116_outLine +BABEL_OP2_205_20922_20130406_225439_inLine +BABEL_OP2_205_20922_20130406_225439_outLine +BABEL_OP2_205_20985_20130401_025757_inLine +BABEL_OP2_205_20985_20130401_025757_outLine +BABEL_OP2_205_21004_20130408_222653_inLine +BABEL_OP2_205_21004_20130408_222653_outLine +BABEL_OP2_205_21206_20130312_205638_inLine +BABEL_OP2_205_21206_20130312_205638_outLine +BABEL_OP2_205_21435_20130414_044944_inLine +BABEL_OP2_205_21435_20130414_044944_outLine +BABEL_OP2_205_21543_20140125_004741_inLine +BABEL_OP2_205_21543_20140125_004741_outLine +BABEL_OP2_205_21807_20130324_054526_inLine +BABEL_OP2_205_21892_20130507_023354_inLine +BABEL_OP2_205_21892_20130507_023354_outLine +BABEL_OP2_205_22321_20130308_042214_inLine +BABEL_OP2_205_22321_20130308_042214_outLine +BABEL_OP2_205_22446_20130309_073946_outLine +BABEL_OP2_205_22494_20130331_230611_inLine +BABEL_OP2_205_22494_20130331_230611_outLine +BABEL_OP2_205_22624_20130331_012106_inLine +BABEL_OP2_205_22624_20130331_012106_outLine +BABEL_OP2_205_22629_20131231_223232_inLine +BABEL_OP2_205_22629_20131231_223232_outLine +BABEL_OP2_205_22918_20130413_043023_inLine +BABEL_OP2_205_22918_20130413_043023_outLine +BABEL_OP2_205_22918_20130413_044543_inLine +BABEL_OP2_205_22918_20130413_044543_outLine +BABEL_OP2_205_23006_20130322_202429_inLine +BABEL_OP2_205_23006_20130322_202429_outLine +BABEL_OP2_205_23046_20130327_010653_inLine +BABEL_OP2_205_23046_20130327_010653_outLine +BABEL_OP2_205_23092_20130413_181637_inLine +BABEL_OP2_205_23092_20130413_181637_outLine +BABEL_OP2_205_23190_20130323_014750_inLine +BABEL_OP2_205_23190_20130323_014750_outLine +BABEL_OP2_205_23239_20130331_034518_inLine +BABEL_OP2_205_23239_20130331_034518_outLine +BABEL_OP2_205_23752_20140123_024924_inLine +BABEL_OP2_205_23752_20140123_024924_outLine +BABEL_OP2_205_23893_20140123_003759_inLine +BABEL_OP2_205_23893_20140123_003759_outLine +BABEL_OP2_205_24239_20130415_171824_inLine +BABEL_OP2_205_24239_20130415_171824_outLine +BABEL_OP2_205_24253_20130505_214600_inLine +BABEL_OP2_205_24253_20130505_214600_outLine +BABEL_OP2_205_24270_20130406_070358_inLine +BABEL_OP2_205_24270_20130406_070358_outLine +BABEL_OP2_205_24290_20130414_221432_inLine +BABEL_OP2_205_24290_20130414_221432_outLine +BABEL_OP2_205_24323_20130326_051101_inLine +BABEL_OP2_205_24323_20130326_051101_outLine +BABEL_OP2_205_24470_20130406_021646_inLine +BABEL_OP2_205_24470_20130406_021646_outLine +BABEL_OP2_205_24532_20130227_052040_inLine +BABEL_OP2_205_24532_20130227_052040_outLine +BABEL_OP2_205_24569_20130508_235213_inLine +BABEL_OP2_205_24569_20130508_235213_outLine +BABEL_OP2_205_24605_20130311_012103_inLine +BABEL_OP2_205_24605_20130311_012103_outLine +BABEL_OP2_205_24679_20130303_043753_inLine +BABEL_OP2_205_24679_20130303_043753_outLine +BABEL_OP2_205_25085_20130612_023620_inLine +BABEL_OP2_205_25085_20130612_023620_outLine +BABEL_OP2_205_25719_20130406_231631_inLine +BABEL_OP2_205_25719_20130406_231631_outLine +BABEL_OP2_205_25719_20130406_232555_inLine +BABEL_OP2_205_25719_20130406_232555_outLine +BABEL_OP2_205_25719_20130406_233313_inLine +BABEL_OP2_205_25719_20130406_233313_outLine +BABEL_OP2_205_25961_20130305_063202_inLine +BABEL_OP2_205_25961_20130305_063202_outLine +BABEL_OP2_205_26381_20140125_015707_inLine +BABEL_OP2_205_26381_20140125_015707_outLine +BABEL_OP2_205_26388_20130330_021001_inLine +BABEL_OP2_205_26388_20130330_021001_outLine +BABEL_OP2_205_26507_20131101_103425_inLine +BABEL_OP2_205_26507_20131101_103425_outLine +BABEL_OP2_205_26574_20130509_203057_inLine +BABEL_OP2_205_26574_20130509_203057_outLine +BABEL_OP2_205_26602_20130412_235831_inLine +BABEL_OP2_205_26602_20130412_235831_outLine +BABEL_OP2_205_27125_20130227_061700_inLine +BABEL_OP2_205_27125_20130227_061700_outLine +BABEL_OP2_205_27189_20140104_001032_inLine +BABEL_OP2_205_27189_20140104_001032_outLine +BABEL_OP2_205_27203_20130331_021946_inLine +BABEL_OP2_205_27203_20130331_021946_outLine +BABEL_OP2_205_27590_20130506_201921_inLine +BABEL_OP2_205_27590_20130506_201921_outLine +BABEL_OP2_205_27841_20130414_222155_inLine +BABEL_OP2_205_27841_20130414_222155_outLine +BABEL_OP2_205_28012_20130507_054019_inLine +BABEL_OP2_205_28012_20130507_054019_outLine +BABEL_OP2_205_28419_20130320_202136_inLine +BABEL_OP2_205_28419_20130320_202136_outLine +BABEL_OP2_205_28477_20130412_234819_inLine +BABEL_OP2_205_28477_20130412_234819_outLine +BABEL_OP2_205_28522_20130401_211215_inLine +BABEL_OP2_205_28522_20130401_211215_outLine +BABEL_OP2_205_29023_20130314_060343_inLine +BABEL_OP2_205_29023_20130314_060343_outLine +BABEL_OP2_205_29323_20130414_230355_inLine +BABEL_OP2_205_29323_20130414_230355_outLine +BABEL_OP2_205_29404_20130414_214714_inLine +BABEL_OP2_205_29404_20130414_214714_outLine +BABEL_OP2_205_29439_20130413_182356_inLine +BABEL_OP2_205_29439_20130413_182356_outLine +BABEL_OP2_205_30013_20130401_005939_inLine +BABEL_OP2_205_30013_20130401_005939_outLine +BABEL_OP2_205_30180_20130323_005331_inLine +BABEL_OP2_205_30180_20130323_005331_outLine +BABEL_OP2_205_30395_20130316_060814_inLine +BABEL_OP2_205_30395_20130316_060814_outLine +BABEL_OP2_205_30432_20130330_200303_inLine +BABEL_OP2_205_30432_20130330_200303_outLine +BABEL_OP2_205_30869_20130412_202311_inLine +BABEL_OP2_205_30869_20130412_202311_outLine +BABEL_OP2_205_31039_20140125_023755_inLine +BABEL_OP2_205_31039_20140125_023755_outLine +BABEL_OP2_205_31109_20130619_181905_inLine +BABEL_OP2_205_31109_20130619_181905_outLine +BABEL_OP2_205_31346_20130507_204621_inLine +BABEL_OP2_205_31346_20130507_204621_outLine +BABEL_OP2_205_32097_20130301_034527_inLine +BABEL_OP2_205_32097_20130301_034527_outLine +BABEL_OP2_205_32122_20130321_004623_inLine +BABEL_OP2_205_32122_20130321_004623_outLine +BABEL_OP2_205_32122_20130321_010341_inLine +BABEL_OP2_205_32122_20130321_010341_outLine +BABEL_OP2_205_32244_20130412_190534_inLine +BABEL_OP2_205_32244_20130412_190534_outLine +BABEL_OP2_205_32630_20130412_054815_inLine +BABEL_OP2_205_32630_20130412_054815_outLine +BABEL_OP2_205_32837_20130507_011223_inLine +BABEL_OP2_205_32837_20130507_011223_outLine +BABEL_OP2_205_33229_20130414_213157_inLine +BABEL_OP2_205_33229_20130414_213157_outLine +BABEL_OP2_205_33273_20130320_040141_inLine +BABEL_OP2_205_33273_20130320_040141_outLine +BABEL_OP2_205_33355_20130311_214515_inLine +BABEL_OP2_205_33355_20130311_214515_outLine +BABEL_OP2_205_33424_20130412_193538_inLine +BABEL_OP2_205_33424_20130412_193538_outLine +BABEL_OP2_205_33476_20130405_051711_inLine +BABEL_OP2_205_33476_20130405_051711_outLine +BABEL_OP2_205_33497_20130619_220728_inLine +BABEL_OP2_205_33497_20130619_220728_outLine +BABEL_OP2_205_33840_20130507_012940_inLine +BABEL_OP2_205_33840_20130507_012940_outLine +BABEL_OP2_205_33913_20130414_052534_inLine +BABEL_OP2_205_33913_20130414_052534_outLine +BABEL_OP2_205_33951_20130619_212409_inLine +BABEL_OP2_205_33951_20130619_212409_outLine +BABEL_OP2_205_34106_20130301_221919_inLine +BABEL_OP2_205_34106_20130301_221919_outLine +BABEL_OP2_205_34197_20130302_231101_inLine +BABEL_OP2_205_34197_20130302_231101_outLine +BABEL_OP2_205_34586_20140125_203417_inLine +BABEL_OP2_205_34586_20140125_203417_outLine +BABEL_OP2_205_34647_20140125_205318_inLine +BABEL_OP2_205_34647_20140125_205318_outLine +BABEL_OP2_205_34903_20130406_055051_inLine +BABEL_OP2_205_34903_20130406_055051_outLine +BABEL_OP2_205_35139_20130312_070415_inLine +BABEL_OP2_205_35139_20130312_070415_outLine +BABEL_OP2_205_35143_20130414_203900_inLine +BABEL_OP2_205_35143_20130414_203900_outLine +BABEL_OP2_205_35181_20130413_201739_inLine +BABEL_OP2_205_35181_20130413_201739_outLine +BABEL_OP2_205_36642_20130413_013238_inLine +BABEL_OP2_205_36642_20130413_013238_outLine +BABEL_OP2_205_37228_20130407_205807_inLine +BABEL_OP2_205_37228_20130407_205807_outLine +BABEL_OP2_205_37271_20130507_231712_inLine +BABEL_OP2_205_37271_20130507_231712_outLine +BABEL_OP2_205_37285_20130401_061737_inLine +BABEL_OP2_205_37285_20130401_061737_outLine +BABEL_OP2_205_37290_20130405_070403_inLine +BABEL_OP2_205_37290_20130405_070403_outLine +BABEL_OP2_205_37598_20130405_034853_inLine +BABEL_OP2_205_37598_20130405_034853_outLine +BABEL_OP2_205_37682_20130325_022952_inLine +BABEL_OP2_205_37682_20130325_022952_outLine +BABEL_OP2_205_37776_20140125_220835_inLine +BABEL_OP2_205_37776_20140125_220835_outLine +BABEL_OP2_205_37853_20130413_005407_inLine +BABEL_OP2_205_37853_20130413_005407_outLine +BABEL_OP2_205_38340_20130315_063442_inLine +BABEL_OP2_205_38340_20130315_063442_outLine +BABEL_OP2_205_38554_20130301_085606_inLine +BABEL_OP2_205_38554_20130301_085606_outLine +BABEL_OP2_205_38664_20130325_030156_inLine +BABEL_OP2_205_38664_20130325_030156_outLine +BABEL_OP2_205_38689_20130414_233704_inLine +BABEL_OP2_205_38689_20130414_233704_outLine +BABEL_OP2_205_38741_20130315_071146_inLine +BABEL_OP2_205_38741_20130315_071146_outLine +BABEL_OP2_205_38878_20130406_202135_inLine +BABEL_OP2_205_38878_20130406_202135_outLine +BABEL_OP2_205_38963_20131227_202341_inLine +BABEL_OP2_205_38963_20131227_202341_outLine +BABEL_OP2_205_39059_20130414_033146_inLine +BABEL_OP2_205_39059_20130414_033146_outLine +BABEL_OP2_205_39059_20130414_034411_inLine +BABEL_OP2_205_39059_20130414_034411_outLine +BABEL_OP2_205_39555_20130507_025010_inLine +BABEL_OP2_205_39555_20130507_025010_outLine +BABEL_OP2_205_40196_20140125_222906_inLine +BABEL_OP2_205_40196_20140125_222906_outLine +BABEL_OP2_205_40557_20130413_185709_inLine +BABEL_OP2_205_40557_20130413_185709_outLine +BABEL_OP2_205_40557_20130413_190849_inLine +BABEL_OP2_205_40557_20130413_190849_outLine +BABEL_OP2_205_40565_20130401_015506_inLine +BABEL_OP2_205_40565_20130401_015506_outLine +BABEL_OP2_205_40939_20140125_231452_inLine +BABEL_OP2_205_40939_20140125_231452_outLine +BABEL_OP2_205_41038_20130405_060002_inLine +BABEL_OP2_205_41038_20130405_060002_outLine +BABEL_OP2_205_41174_20130318_033313_inLine +BABEL_OP2_205_41174_20130318_033313_outLine +BABEL_OP2_205_41618_20130312_214004_inLine +BABEL_OP2_205_41618_20130312_214004_outLine +BABEL_OP2_205_41741_20130326_004056_inLine +BABEL_OP2_205_41741_20130326_004056_outLine +BABEL_OP2_205_42619_20130325_002736_inLine +BABEL_OP2_205_42619_20130325_002736_outLine +BABEL_OP2_205_42834_20130414_202256_inLine +BABEL_OP2_205_42834_20130414_202256_outLine +BABEL_OP2_205_42991_20130401_024013_inLine +BABEL_OP2_205_42991_20130401_024013_outLine +BABEL_OP2_205_42991_20130401_025044_inLine +BABEL_OP2_205_42991_20130401_025044_outLine +BABEL_OP2_205_43286_20130304_044510_inLine +BABEL_OP2_205_43286_20130304_044510_outLine +BABEL_OP2_205_43368_20130329_211826_inLine +BABEL_OP2_205_43368_20130329_211826_outLine +BABEL_OP2_205_43368_20130329_212612_inLine +BABEL_OP2_205_43368_20130329_212612_outLine +BABEL_OP2_205_43788_20130331_024429_inLine +BABEL_OP2_205_43788_20130331_024429_outLine +BABEL_OP2_205_43788_20130331_030508_inLine +BABEL_OP2_205_43788_20130331_030508_outLine +BABEL_OP2_205_44847_20130325_055635_inLine +BABEL_OP2_205_44847_20130325_055635_outLine +BABEL_OP2_205_45121_20130412_035841_inLine +BABEL_OP2_205_45121_20130412_035841_outLine +BABEL_OP2_205_45235_20130509_011826_inLine +BABEL_OP2_205_45235_20130509_011826_outLine +BABEL_OP2_205_45374_20140126_000904_inLine +BABEL_OP2_205_45374_20140126_000904_outLine +BABEL_OP2_205_46041_20130507_202255_inLine +BABEL_OP2_205_46041_20130507_202255_outLine +BABEL_OP2_205_46315_20130506_231421_inLine +BABEL_OP2_205_46315_20130506_231421_outLine +BABEL_OP2_205_46589_20130331_014535_inLine +BABEL_OP2_205_46589_20130331_014535_outLine +BABEL_OP2_205_46757_20130401_191649_inLine +BABEL_OP2_205_46757_20130401_191649_outLine +BABEL_OP2_205_46976_20131104_051409_inLine +BABEL_OP2_205_46976_20131104_051409_outLine +BABEL_OP2_205_47110_20140126_005953_inLine +BABEL_OP2_205_47110_20140126_005953_outLine +BABEL_OP2_205_47451_20130408_195325_inLine +BABEL_OP2_205_47451_20130408_195325_outLine +BABEL_OP2_205_47487_20130328_060026_inLine +BABEL_OP2_205_47487_20130328_060026_outLine +BABEL_OP2_205_47823_20130330_204952_inLine +BABEL_OP2_205_47823_20130330_204952_outLine +BABEL_OP2_205_47878_20130319_211057_inLine +BABEL_OP2_205_47878_20130319_211057_outLine +BABEL_OP2_205_48422_20130407_020759_inLine +BABEL_OP2_205_48422_20130407_020759_outLine +BABEL_OP2_205_49118_20130412_210858_inLine +BABEL_OP2_205_49118_20130412_210858_outLine +BABEL_OP2_205_49118_20130412_211622_inLine +BABEL_OP2_205_49118_20130412_211622_outLine +BABEL_OP2_205_49287_20130327_053930_inLine +BABEL_OP2_205_49287_20130327_053930_outLine +BABEL_OP2_205_49630_20130401_013908_inLine +BABEL_OP2_205_49630_20130401_013908_outLine +BABEL_OP2_205_49768_20130330_025558_inLine +BABEL_OP2_205_49768_20130330_025558_outLine +BABEL_OP2_205_50186_20140126_012415_inLine +BABEL_OP2_205_50186_20140126_012415_outLine +BABEL_OP2_205_50745_20130505_195625_inLine +BABEL_OP2_205_50745_20130505_195625_outLine +BABEL_OP2_205_50779_20130320_043549_inLine +BABEL_OP2_205_50779_20130320_043549_outLine +BABEL_OP2_205_50779_20130320_044244_inLine +BABEL_OP2_205_50779_20130320_044244_outLine +BABEL_OP2_205_51015_20130401_202255_inLine +BABEL_OP2_205_51015_20130401_202255_outLine +BABEL_OP2_205_52246_20130323_232916_inLine +BABEL_OP2_205_52246_20130323_232916_outLine +BABEL_OP2_205_52490_20130326_051608_inLine +BABEL_OP2_205_52490_20130326_051608_outLine +BABEL_OP2_205_53063_20130508_051415_inLine +BABEL_OP2_205_53063_20130508_051415_outLine +BABEL_OP2_205_53415_20131216_223652_inLine +BABEL_OP2_205_53415_20131216_223652_outLine +BABEL_OP2_205_53441_20140126_015538_inLine +BABEL_OP2_205_53441_20140126_015538_outLine +BABEL_OP2_205_53758_20131228_000238_inLine +BABEL_OP2_205_53758_20131228_000238_outLine +BABEL_OP2_205_54104_20130323_222459_inLine +BABEL_OP2_205_54104_20130323_222459_outLine +BABEL_OP2_205_54827_20130414_030516_inLine +BABEL_OP2_205_54827_20130414_030516_outLine +BABEL_OP2_205_54841_20130414_225855_inLine +BABEL_OP2_205_54841_20130414_225855_outLine +BABEL_OP2_205_54953_20130317_013652_inLine +BABEL_OP2_205_54953_20130317_013652_outLine +BABEL_OP2_205_56198_20130321_041358_inLine +BABEL_OP2_205_56198_20130321_041358_outLine +BABEL_OP2_205_56925_20140126_023234_inLine +BABEL_OP2_205_56925_20140126_023234_outLine +BABEL_OP2_205_57065_20130407_232501_inLine +BABEL_OP2_205_57065_20130407_232501_outLine +BABEL_OP2_205_57678_20130323_232415_inLine +BABEL_OP2_205_57678_20130323_232415_outLine +BABEL_OP2_205_57935_20130322_224501_inLine +BABEL_OP2_205_57935_20130322_224501_outLine +BABEL_OP2_205_58026_20131219_010750_inLine +BABEL_OP2_205_58026_20131219_010750_outLine +BABEL_OP2_205_58821_20130415_190958_inLine +BABEL_OP2_205_58821_20130415_190958_outLine +BABEL_OP2_205_59078_20130406_075721_inLine +BABEL_OP2_205_59078_20130406_075721_outLine +BABEL_OP2_205_59635_20130406_225014_inLine +BABEL_OP2_205_59635_20130406_225014_outLine +BABEL_OP2_205_59645_20130619_190548_inLine +BABEL_OP2_205_59645_20130619_190548_outLine +BABEL_OP2_205_60282_20140107_024858_inLine +BABEL_OP2_205_60282_20140107_024858_outLine +BABEL_OP2_205_60436_20130413_200129_inLine +BABEL_OP2_205_60436_20130413_200129_outLine +BABEL_OP2_205_61440_20130411_231312_inLine +BABEL_OP2_205_61440_20130411_231312_outLine +BABEL_OP2_205_61971_20130413_052620_inLine +BABEL_OP2_205_61971_20130413_052620_outLine +BABEL_OP2_205_62014_20130329_225214_inLine +BABEL_OP2_205_62014_20130329_225214_outLine +BABEL_OP2_205_62200_20130405_021524_inLine +BABEL_OP2_205_62200_20130405_021524_outLine +BABEL_OP2_205_62289_20140122_214709_inLine +BABEL_OP2_205_62289_20140122_214709_outLine +BABEL_OP2_205_62360_20140122_233956_inLine +BABEL_OP2_205_62810_20130304_075632_inLine +BABEL_OP2_205_62810_20130304_075632_outLine +BABEL_OP2_205_63084_20130405_025236_inLine +BABEL_OP2_205_63084_20130405_025236_outLine +BABEL_OP2_205_63787_20130310_001339_inLine +BABEL_OP2_205_63787_20130310_001339_outLine +BABEL_OP2_205_63920_20131226_014831_inLine +BABEL_OP2_205_64688_20131226_232545_inLine +BABEL_OP2_205_64688_20131226_232545_outLine +BABEL_OP2_205_66971_20130413_002731_inLine +BABEL_OP2_205_66971_20130413_002731_outLine +BABEL_OP2_205_67964_20140122_221653_inLine +BABEL_OP2_205_67964_20140122_221653_outLine +BABEL_OP2_205_68289_20130407_225726_inLine +BABEL_OP2_205_68289_20130407_225726_outLine +BABEL_OP2_205_68748_20130330_225712_inLine +BABEL_OP2_205_68748_20130330_225712_outLine +BABEL_OP2_205_70452_20130328_011715_inLine +BABEL_OP2_205_70452_20130328_011715_outLine +BABEL_OP2_205_70713_20131129_235040_inLine +BABEL_OP2_205_70716_20130413_193114_inLine +BABEL_OP2_205_70716_20130413_193114_outLine +BABEL_OP2_205_74799_20130407_030553_inLine +BABEL_OP2_205_74799_20130407_030553_outLine +BABEL_OP2_205_76683_20130331_201352_inLine +BABEL_OP2_205_76683_20130331_201352_outLine +BABEL_OP2_205_77242_20130616_015950_inLine +BABEL_OP2_205_77242_20130616_015950_outLine +BABEL_OP2_205_78254_20130323_051609_inLine +BABEL_OP2_205_78254_20130323_051609_outLine +BABEL_OP2_205_80559_20130323_224458_inLine +BABEL_OP2_205_80559_20130323_224458_outLine +BABEL_OP2_205_81149_20130412_061213_inLine +BABEL_OP2_205_81149_20130412_061213_outLine +BABEL_OP2_205_82138_20130622_210458_inLine +BABEL_OP2_205_82138_20130622_210458_outLine +BABEL_OP2_205_84605_20130319_203823_inLine +BABEL_OP2_205_84605_20130319_203823_outLine +BABEL_OP2_205_84737_20130407_054058_inLine +BABEL_OP2_205_84737_20130407_054058_outLine +BABEL_OP2_205_84936_20130405_063301_inLine +BABEL_OP2_205_84936_20130405_063301_outLine +BABEL_OP2_205_86191_20130323_060631_inLine +BABEL_OP2_205_86191_20130323_060631_outLine +BABEL_OP2_205_86433_20130325_084312_inLine +BABEL_OP2_205_86433_20130325_084312_outLine +BABEL_OP2_205_86676_20130331_014116_inLine +BABEL_OP2_205_86676_20130331_014116_outLine +BABEL_OP2_205_86715_20130618_002759_inLine +BABEL_OP2_205_86715_20130618_002759_outLine +BABEL_OP2_205_86826_20130411_224207_inLine +BABEL_OP2_205_86826_20130411_224207_outLine +BABEL_OP2_205_90760_20130612_022556_inLine +BABEL_OP2_205_90760_20130612_022556_outLine +BABEL_OP2_205_91336_20130622_230929_inLine +BABEL_OP2_205_91336_20130622_230929_outLine +BABEL_OP2_205_92605_20140123_032518_inLine +BABEL_OP2_205_92605_20140123_032518_outLine +BABEL_OP2_205_93964_20130623_014819_inLine +BABEL_OP2_205_93964_20130623_014819_outLine +BABEL_OP2_205_94891_20140123_222847_inLine +BABEL_OP2_205_94891_20140123_222847_outLine +BABEL_OP2_205_94978_20131126_045451_inLine +BABEL_OP2_205_94978_20131126_045451_outLine +BABEL_OP2_205_96376_20140120_211321_inLine +BABEL_OP2_205_96376_20140120_211321_outLine +BABEL_OP2_205_97772_20130301_071555_inLine +BABEL_OP2_205_97772_20130301_071555_outLine +BABEL_OP2_205_99594_20130320_070531_inLine +BABEL_OP2_205_99594_20130320_070531_outLine diff --git a/egs/babel/s5d/conf/lists/205-kurmanji/untranscribed-training.list b/egs/babel/s5d/conf/lists/205-kurmanji/untranscribed-training.list new file mode 100644 index 00000000000..0239610b1a7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/205-kurmanji/untranscribed-training.list @@ -0,0 +1,521 @@ +BABEL_OP2_205_12321_20131101_103424_inLine +BABEL_OP2_205_12321_20131101_103424_outLine +BABEL_OP2_205_14350_20130311_065704_inLine +BABEL_OP2_205_14350_20130311_065704_outLine +BABEL_OP2_205_15262_20130310_220350_inLine +BABEL_OP2_205_15262_20130310_220350_outLine +BABEL_OP2_205_15902_20130309_042954_inLine +BABEL_OP2_205_15902_20130309_042954_outLine +BABEL_OP2_205_16475_20130318_071049_inLine +BABEL_OP2_205_16475_20130318_071049_outLine +BABEL_OP2_205_17582_20130612_045820_inLine +BABEL_OP2_205_17582_20130612_045820_outLine +BABEL_OP2_205_17923_20130310_071855_inLine +BABEL_OP2_205_17923_20130310_071855_outLine +BABEL_OP2_205_18992_20131227_214303_inLine +BABEL_OP2_205_19545_20131103_054936_inLine +BABEL_OP2_205_19545_20131103_054936_outLine +BABEL_OP2_205_20724_20131226_220301_inLine +BABEL_OP2_205_20738_20131219_000457_inLine +BABEL_OP2_205_20738_20131219_000457_outLine +BABEL_OP2_205_20768_20131104_020043_inLine +BABEL_OP2_205_20768_20131104_020043_outLine +BABEL_OP2_205_21109_20131110_203639_inLine +BABEL_OP2_205_21109_20131110_203639_outLine +BABEL_OP2_205_21244_20130411_220542_inLine +BABEL_OP2_205_21244_20130411_220542_outLine +BABEL_OP2_205_23681_20130519_052009_outLine +BABEL_OP2_205_23731_20131104_055621_outLine +BABEL_OP2_205_26074_20130318_092803_inLine +BABEL_OP2_205_26074_20130318_092803_outLine +BABEL_OP2_205_26074_20130318_093434_inLine +BABEL_OP2_205_26074_20130318_093434_outLine +BABEL_OP2_205_26398_20130414_210257_outLine +BABEL_OP2_205_28538_20131222_010921_inLine +BABEL_OP2_205_29230_20130619_045232_inLine +BABEL_OP2_205_31182_20131104_010653_inLine +BABEL_OP2_205_31182_20131104_010653_outLine +BABEL_OP2_205_32301_20130401_015605_outLine +BABEL_OP2_205_36039_20130525_013805_inLine +BABEL_OP2_205_36039_20130525_013805_outLine +BABEL_OP2_205_36059_20131108_200854_inLine +BABEL_OP2_205_36059_20131108_200854_outLine +BABEL_OP2_205_36059_20131108_201758_inLine +BABEL_OP2_205_36059_20131108_201758_outLine +BABEL_OP2_205_36059_20131108_202426_inLine +BABEL_OP2_205_36059_20131108_202426_outLine +BABEL_OP2_205_37229_20130612_045130_inLine +BABEL_OP2_205_37229_20130612_045130_outLine +BABEL_OP2_205_37499_20130423_030008_inLine +BABEL_OP2_205_37499_20130423_030008_outLine +BABEL_OP2_205_38979_20131105_052419_inLine +BABEL_OP2_205_38979_20131105_052419_outLine +BABEL_OP2_205_39159_20130303_040403_inLine +BABEL_OP2_205_39159_20130303_040403_outLine +BABEL_OP2_205_40713_20130314_075828_inLine +BABEL_OP2_205_40713_20130314_075828_outLine +BABEL_OP2_205_40740_20130407_055052_inLine +BABEL_OP2_205_40740_20130407_055052_outLine +BABEL_OP2_205_41100_20130311_015856_inLine +BABEL_OP2_205_41100_20130311_015856_outLine +BABEL_OP2_205_41109_20130406_224530_inLine +BABEL_OP2_205_41109_20130406_224530_outLine +BABEL_OP2_205_41493_20130228_081724_inLine +BABEL_OP2_205_41493_20130228_081724_outLine +BABEL_OP2_205_41745_20130319_001127_inLine +BABEL_OP2_205_41745_20130319_001127_outLine +BABEL_OP2_205_41920_20130325_212001_inLine +BABEL_OP2_205_42155_20130312_064841_inLine +BABEL_OP2_205_42155_20130312_064841_outLine +BABEL_OP2_205_42243_20130305_063726_inLine +BABEL_OP2_205_42243_20130305_063726_outLine +BABEL_OP2_205_43239_20131110_055057_inLine +BABEL_OP2_205_43239_20131110_055057_outLine +BABEL_OP2_205_44255_20130406_205651_inLine +BABEL_OP2_205_44255_20130406_205651_outLine +BABEL_OP2_205_44531_20131108_194709_inLine +BABEL_OP2_205_44531_20131108_194709_outLine +BABEL_OP2_205_44619_20130314_074104_inLine +BABEL_OP2_205_44619_20130314_074104_outLine +BABEL_OP2_205_45642_20130311_052042_inLine +BABEL_OP2_205_45642_20130311_052042_outLine +BABEL_OP2_205_46558_20130304_053902_inLine +BABEL_OP2_205_46558_20130304_053902_outLine +BABEL_OP2_205_46702_20130301_025750_inLine +BABEL_OP2_205_46702_20130301_025750_outLine +BABEL_OP2_205_46763_20130505_222913_inLine +BABEL_OP2_205_46763_20130505_222913_outLine +BABEL_OP2_205_47215_20130408_021338_inLine +BABEL_OP2_205_47215_20130408_021338_outLine +BABEL_OP2_205_47270_20130406_211808_inLine +BABEL_OP2_205_47270_20130406_211808_outLine +BABEL_OP2_205_47405_20131231_032458_inLine +BABEL_OP2_205_47405_20131231_032458_outLine +BABEL_OP2_205_47877_20130407_205116_inLine +BABEL_OP2_205_47877_20130407_205116_outLine +BABEL_OP2_205_48399_20130304_002906_inLine +BABEL_OP2_205_48399_20130304_002906_outLine +BABEL_OP2_205_48758_20131107_075636_inLine +BABEL_OP2_205_48758_20131107_075636_outLine +BABEL_OP2_205_48758_20131107_080446_inLine +BABEL_OP2_205_48758_20131107_080446_outLine +BABEL_OP2_205_48789_20131103_043932_inLine +BABEL_OP2_205_48789_20131103_043932_outLine +BABEL_OP2_205_49775_20130227_060536_inLine +BABEL_OP2_205_49775_20130227_060536_outLine +BABEL_OP2_205_49812_20130407_020818_inLine +BABEL_OP2_205_49812_20130407_020818_outLine +BABEL_OP2_205_49945_20130412_222951_inLine +BABEL_OP2_205_49945_20130412_222951_outLine +BABEL_OP2_205_50090_20130329_203208_inLine +BABEL_OP2_205_50090_20130329_203208_outLine +BABEL_OP2_205_50681_20130330_050901_inLine +BABEL_OP2_205_50681_20130330_050901_outLine +BABEL_OP2_205_51530_20130414_210733_inLine +BABEL_OP2_205_51530_20130414_210733_outLine +BABEL_OP2_205_51611_20130311_072551_inLine +BABEL_OP2_205_51611_20130311_072551_outLine +BABEL_OP2_205_51819_20130401_010103_outLine +BABEL_OP2_205_51819_20130401_010745_outLine +BABEL_OP2_205_52447_20130412_001856_inLine +BABEL_OP2_205_52447_20130412_001856_outLine +BABEL_OP2_205_52483_20130621_205901_inLine +BABEL_OP2_205_52483_20130621_205901_outLine +BABEL_OP2_205_52804_20130311_002720_inLine +BABEL_OP2_205_52804_20130311_002720_outLine +BABEL_OP2_205_54040_20131104_013114_inLine +BABEL_OP2_205_54040_20131104_013114_outLine +BABEL_OP2_205_54160_20130306_033742_inLine +BABEL_OP2_205_54160_20130306_033742_outLine +BABEL_OP2_205_54405_20130618_202708_inLine +BABEL_OP2_205_54405_20130618_202708_outLine +BABEL_OP2_205_55818_20130309_080103_inLine +BABEL_OP2_205_55818_20130309_080103_outLine +BABEL_OP2_205_56090_20130227_204816_inLine +BABEL_OP2_205_56090_20130227_204816_outLine +BABEL_OP2_205_56306_20130407_012524_inLine +BABEL_OP2_205_56306_20130407_012524_outLine +BABEL_OP2_205_56306_20130407_013746_inLine +BABEL_OP2_205_56306_20130407_013746_outLine +BABEL_OP2_205_56331_20130413_043736_inLine +BABEL_OP2_205_56331_20130413_043736_outLine +BABEL_OP2_205_56370_20130306_061205_inLine +BABEL_OP2_205_56370_20130306_061205_outLine +BABEL_OP2_205_56429_20130311_053708_inLine +BABEL_OP2_205_56429_20130311_053708_outLine +BABEL_OP2_205_56523_20130317_224401_inLine +BABEL_OP2_205_56523_20130317_224401_outLine +BABEL_OP2_205_56720_20130506_182315_inLine +BABEL_OP2_205_56720_20130506_182315_outLine +BABEL_OP2_205_57566_20130407_031257_inLine +BABEL_OP2_205_57566_20130407_031257_outLine +BABEL_OP2_205_58915_20130611_221704_inLine +BABEL_OP2_205_58915_20130611_221704_outLine +BABEL_OP2_205_59928_20130321_012004_inLine +BABEL_OP2_205_59928_20130321_012004_outLine +BABEL_OP2_205_59993_20130321_045802_inLine +BABEL_OP2_205_59993_20130321_045802_outLine +BABEL_OP2_205_60474_20130324_213649_inLine +BABEL_OP2_205_60474_20130324_213649_outLine +BABEL_OP2_205_60508_20130304_205015_inLine +BABEL_OP2_205_60508_20130304_205015_outLine +BABEL_OP2_205_60538_20130310_004703_inLine +BABEL_OP2_205_60538_20130310_004703_outLine +BABEL_OP2_205_60626_20130315_071907_inLine +BABEL_OP2_205_60626_20130315_071907_outLine +BABEL_OP2_205_60706_20130307_053430_inLine +BABEL_OP2_205_60706_20130307_053430_outLine +BABEL_OP2_205_60836_20130330_072606_inLine +BABEL_OP2_205_60836_20130330_072606_outLine +BABEL_OP2_205_61167_20130326_222257_inLine +BABEL_OP2_205_61167_20130326_222257_outLine +BABEL_OP2_205_61190_20130325_004615_inLine +BABEL_OP2_205_61190_20130325_004615_outLine +BABEL_OP2_205_61219_20130325_212553_inLine +BABEL_OP2_205_61219_20130325_212553_outLine +BABEL_OP2_205_61357_20130330_232257_inLine +BABEL_OP2_205_61357_20130330_232257_outLine +BABEL_OP2_205_62434_20130305_215011_inLine +BABEL_OP2_205_62434_20130305_215011_outLine +BABEL_OP2_205_62434_20130305_220154_inLine +BABEL_OP2_205_62434_20130305_220154_outLine +BABEL_OP2_205_62734_20130328_050453_inLine +BABEL_OP2_205_62734_20130328_050453_outLine +BABEL_OP2_205_63081_20130309_012237_inLine +BABEL_OP2_205_63081_20130309_012237_outLine +BABEL_OP2_205_63094_20131113_030146_inLine +BABEL_OP2_205_63220_20130326_055356_inLine +BABEL_OP2_205_63220_20130326_055356_outLine +BABEL_OP2_205_63445_20130308_235018_inLine +BABEL_OP2_205_63445_20130308_235018_outLine +BABEL_OP2_205_63481_20130307_082632_inLine +BABEL_OP2_205_63481_20130307_082632_outLine +BABEL_OP2_205_63523_20140121_213251_inLine +BABEL_OP2_205_63757_20130406_055509_inLine +BABEL_OP2_205_63757_20130406_055509_outLine +BABEL_OP2_205_63938_20130413_044053_inLine +BABEL_OP2_205_63938_20130413_044053_outLine +BABEL_OP2_205_64014_20130413_004605_inLine +BABEL_OP2_205_64014_20130413_004605_outLine +BABEL_OP2_205_64065_20130315_044531_inLine +BABEL_OP2_205_64065_20130315_044531_outLine +BABEL_OP2_205_64494_20130313_043717_inLine +BABEL_OP2_205_64494_20130313_043717_outLine +BABEL_OP2_205_64638_20130408_004937_inLine +BABEL_OP2_205_64638_20130408_004937_outLine +BABEL_OP2_205_64722_20131107_210205_inLine +BABEL_OP2_205_64722_20131107_210205_outLine +BABEL_OP2_205_64759_20130307_214024_inLine +BABEL_OP2_205_64759_20130307_214024_outLine +BABEL_OP2_205_64759_20130307_215400_inLine +BABEL_OP2_205_64759_20130307_215400_outLine +BABEL_OP2_205_64768_20130327_213122_inLine +BABEL_OP2_205_64768_20130327_213122_outLine +BABEL_OP2_205_64796_20130307_042443_inLine +BABEL_OP2_205_64796_20130307_042443_outLine +BABEL_OP2_205_64902_20130414_052508_inLine +BABEL_OP2_205_64902_20130414_052508_outLine +BABEL_OP2_205_65882_20130311_013812_inLine +BABEL_OP2_205_65882_20130311_013812_outLine +BABEL_OP2_205_66026_20130508_223030_inLine +BABEL_OP2_205_66026_20130508_223030_outLine +BABEL_OP2_205_66959_20130414_224335_inLine +BABEL_OP2_205_66959_20130414_224335_outLine +BABEL_OP2_205_67085_20130414_180541_inLine +BABEL_OP2_205_67085_20130414_180541_outLine +BABEL_OP2_205_67389_20140111_225039_inLine +BABEL_OP2_205_67389_20140111_225039_outLine +BABEL_OP2_205_67552_20130331_072350_inLine +BABEL_OP2_205_67552_20130331_072350_outLine +BABEL_OP2_205_67552_20130331_073746_inLine +BABEL_OP2_205_67552_20130331_073746_outLine +BABEL_OP2_205_67592_20130509_213421_inLine +BABEL_OP2_205_67592_20130509_213421_outLine +BABEL_OP2_205_67794_20130315_081604_inLine +BABEL_OP2_205_67794_20130315_081604_outLine +BABEL_OP2_205_67999_20130407_223424_inLine +BABEL_OP2_205_67999_20130407_223424_outLine +BABEL_OP2_205_68059_20130619_053732_inLine +BABEL_OP2_205_68059_20130619_053732_outLine +BABEL_OP2_205_68182_20130415_041909_inLine +BABEL_OP2_205_68182_20130415_041909_outLine +BABEL_OP2_205_69633_20130331_021718_inLine +BABEL_OP2_205_69633_20130331_021718_outLine +BABEL_OP2_205_69633_20130331_023306_inLine +BABEL_OP2_205_69633_20130331_023306_outLine +BABEL_OP2_205_69885_20130415_051700_inLine +BABEL_OP2_205_69885_20130415_051700_outLine +BABEL_OP2_205_70110_20130302_074003_inLine +BABEL_OP2_205_70110_20130302_074003_outLine +BABEL_OP2_205_70343_20130401_203305_inLine +BABEL_OP2_205_70343_20130401_203305_outLine +BABEL_OP2_205_70526_20130416_033943_inLine +BABEL_OP2_205_70526_20130416_033943_outLine +BABEL_OP2_205_71047_20131109_013132_inLine +BABEL_OP2_205_71047_20131109_013132_outLine +BABEL_OP2_205_71333_20130326_225136_inLine +BABEL_OP2_205_71333_20130326_225136_outLine +BABEL_OP2_205_71614_20130506_175649_inLine +BABEL_OP2_205_71614_20130506_175649_outLine +BABEL_OP2_205_71704_20130317_002057_inLine +BABEL_OP2_205_71704_20130317_002057_outLine +BABEL_OP2_205_71754_20140115_014345_inLine +BABEL_OP2_205_71754_20140115_014345_outLine +BABEL_OP2_205_72040_20130321_022323_inLine +BABEL_OP2_205_72040_20130321_022323_outLine +BABEL_OP2_205_72733_20130415_183417_inLine +BABEL_OP2_205_72733_20130415_183417_outLine +BABEL_OP2_205_73042_20130317_000810_inLine +BABEL_OP2_205_73042_20130317_000810_outLine +BABEL_OP2_205_73072_20130311_213816_inLine +BABEL_OP2_205_73072_20130311_213816_outLine +BABEL_OP2_205_73301_20130330_062717_inLine +BABEL_OP2_205_73301_20130330_062717_outLine +BABEL_OP2_205_73301_20130330_064357_inLine +BABEL_OP2_205_73301_20130330_064357_outLine +BABEL_OP2_205_73408_20130622_062600_inLine +BABEL_OP2_205_73408_20130622_062600_outLine +BABEL_OP2_205_73837_20130330_054105_inLine +BABEL_OP2_205_73837_20130330_054105_outLine +BABEL_OP2_205_74111_20130507_182333_inLine +BABEL_OP2_205_74111_20130507_182333_outLine +BABEL_OP2_205_74280_20130301_022106_inLine +BABEL_OP2_205_74280_20130301_022106_outLine +BABEL_OP2_205_74455_20130414_041223_inLine +BABEL_OP2_205_74455_20130414_041223_outLine +BABEL_OP2_205_74641_20130314_060344_inLine +BABEL_OP2_205_74641_20130314_060344_outLine +BABEL_OP2_205_74921_20130331_061311_inLine +BABEL_OP2_205_74921_20130331_061311_outLine +BABEL_OP2_205_75223_20130306_045441_inLine +BABEL_OP2_205_75223_20130306_045441_outLine +BABEL_OP2_205_75261_20130408_234257_inLine +BABEL_OP2_205_75261_20130408_234257_outLine +BABEL_OP2_205_75342_20130415_192555_inLine +BABEL_OP2_205_75342_20130415_192555_outLine +BABEL_OP2_205_75981_20130413_042503_inLine +BABEL_OP2_205_75981_20130413_042503_outLine +BABEL_OP2_205_76773_20130312_051652_inLine +BABEL_OP2_205_76773_20130312_051652_outLine +BABEL_OP2_205_77139_20130305_045120_inLine +BABEL_OP2_205_77139_20130305_045120_outLine +BABEL_OP2_205_77744_20130328_012940_outLine +BABEL_OP2_205_78544_20130408_000050_inLine +BABEL_OP2_205_78544_20130408_000050_outLine +BABEL_OP2_205_78544_20130408_001043_inLine +BABEL_OP2_205_78544_20130408_001043_outLine +BABEL_OP2_205_78609_20130508_001720_inLine +BABEL_OP2_205_78609_20130508_001720_outLine +BABEL_OP2_205_78630_20130330_200921_inLine +BABEL_OP2_205_78630_20130330_200921_outLine +BABEL_OP2_205_78943_20130326_063742_inLine +BABEL_OP2_205_78943_20130326_063742_outLine +BABEL_OP2_205_79045_20130507_020315_inLine +BABEL_OP2_205_79045_20130507_020315_outLine +BABEL_OP2_205_79107_20130613_004324_inLine +BABEL_OP2_205_79107_20130613_004324_outLine +BABEL_OP2_205_79167_20130331_053551_inLine +BABEL_OP2_205_79167_20130331_053551_outLine +BABEL_OP2_205_79190_20130313_020401_inLine +BABEL_OP2_205_79190_20130313_020401_outLine +BABEL_OP2_205_79590_20131221_025241_inLine +BABEL_OP2_205_79590_20131221_025241_outLine +BABEL_OP2_205_79590_20131221_031508_inLine +BABEL_OP2_205_79590_20131221_031508_outLine +BABEL_OP2_205_79751_20130324_220236_inLine +BABEL_OP2_205_79751_20130324_220236_outLine +BABEL_OP2_205_79858_20130309_210841_inLine +BABEL_OP2_205_79858_20130309_210841_outLine +BABEL_OP2_205_80136_20130406_190838_inLine +BABEL_OP2_205_80136_20130406_190838_outLine +BABEL_OP2_205_80577_20131110_045204_inLine +BABEL_OP2_205_80577_20131110_045204_outLine +BABEL_OP2_205_80881_20130326_002818_inLine +BABEL_OP2_205_80881_20130326_002818_outLine +BABEL_OP2_205_80881_20130326_004157_inLine +BABEL_OP2_205_80881_20130326_004157_outLine +BABEL_OP2_205_80881_20130326_005241_inLine +BABEL_OP2_205_80881_20130326_005241_outLine +BABEL_OP2_205_81287_20130414_230143_inLine +BABEL_OP2_205_81287_20130414_230143_outLine +BABEL_OP2_205_81392_20130506_224137_inLine +BABEL_OP2_205_81392_20130506_224137_outLine +BABEL_OP2_205_81404_20130324_072708_inLine +BABEL_OP2_205_81404_20130324_072708_outLine +BABEL_OP2_205_81433_20131110_024152_inLine +BABEL_OP2_205_81433_20131110_024152_outLine +BABEL_OP2_205_81553_20130408_190946_inLine +BABEL_OP2_205_81671_20130407_193047_inLine +BABEL_OP2_205_81671_20130407_193047_outLine +BABEL_OP2_205_81854_20130413_035448_inLine +BABEL_OP2_205_81854_20130413_035448_outLine +BABEL_OP2_205_82030_20130416_024208_inLine +BABEL_OP2_205_82030_20130416_024208_outLine +BABEL_OP2_205_82145_20131101_103425_inLine +BABEL_OP2_205_82145_20131101_103425_outLine +BABEL_OP2_205_82863_20131110_214438_inLine +BABEL_OP2_205_82863_20131110_214438_outLine +BABEL_OP2_205_82863_20131110_220419_inLine +BABEL_OP2_205_82863_20131110_220419_outLine +BABEL_OP2_205_82979_20130321_013427_inLine +BABEL_OP2_205_83062_20130412_213219_inLine +BABEL_OP2_205_83062_20130412_213219_outLine +BABEL_OP2_205_83366_20130406_070242_outLine +BABEL_OP2_205_83436_20130306_064555_inLine +BABEL_OP2_205_83436_20130306_064555_outLine +BABEL_OP2_205_83545_20130413_183305_outLine +BABEL_OP2_205_83775_20130326_223716_inLine +BABEL_OP2_205_83775_20130326_223716_outLine +BABEL_OP2_205_83783_20130316_062751_inLine +BABEL_OP2_205_83783_20130316_062751_outLine +BABEL_OP2_205_84125_20130301_040550_inLine +BABEL_OP2_205_84125_20130301_040550_outLine +BABEL_OP2_205_84370_20130613_203134_inLine +BABEL_OP2_205_84370_20130613_213749_inLine +BABEL_OP2_205_84458_20130508_224724_inLine +BABEL_OP2_205_84458_20130508_224724_outLine +BABEL_OP2_205_84469_20130408_213237_inLine +BABEL_OP2_205_84469_20130408_213237_outLine +BABEL_OP2_205_84583_20130312_211219_inLine +BABEL_OP2_205_84583_20130312_211219_outLine +BABEL_OP2_205_84815_20130413_183704_inLine +BABEL_OP2_205_84815_20130413_183704_outLine +BABEL_OP2_205_84838_20130509_005525_inLine +BABEL_OP2_205_84838_20130509_005525_outLine +BABEL_OP2_205_85179_20130409_002521_inLine +BABEL_OP2_205_85179_20130409_002521_outLine +BABEL_OP2_205_85248_20130414_215500_inLine +BABEL_OP2_205_85248_20130414_215500_outLine +BABEL_OP2_205_85260_20140115_021714_inLine +BABEL_OP2_205_85260_20140115_021714_outLine +BABEL_OP2_205_85322_20130323_013257_inLine +BABEL_OP2_205_85325_20130414_061613_inLine +BABEL_OP2_205_85325_20130414_061613_outLine +BABEL_OP2_205_85331_20130616_014645_inLine +BABEL_OP2_205_85331_20130616_014645_outLine +BABEL_OP2_205_85340_20130321_040745_inLine +BABEL_OP2_205_85340_20130321_040745_outLine +BABEL_OP2_205_86321_20130413_173559_inLine +BABEL_OP2_205_86321_20130413_173559_outLine +BABEL_OP2_205_86467_20130305_074640_inLine +BABEL_OP2_205_86467_20130305_074640_outLine +BABEL_OP2_205_86472_20130408_003043_inLine +BABEL_OP2_205_86472_20130408_003043_outLine +BABEL_OP2_205_86557_20130304_044109_inLine +BABEL_OP2_205_86557_20130304_044109_outLine +BABEL_OP2_205_87298_20130322_201204_inLine +BABEL_OP2_205_87629_20130312_052701_inLine +BABEL_OP2_205_87629_20130312_052701_outLine +BABEL_OP2_205_87884_20130413_194754_inLine +BABEL_OP2_205_87884_20130413_194754_outLine +BABEL_OP2_205_87889_20130408_204610_inLine +BABEL_OP2_205_87889_20130408_204610_outLine +BABEL_OP2_205_88686_20130303_070128_inLine +BABEL_OP2_205_88686_20130303_070128_outLine +BABEL_OP2_205_88873_20130326_050532_inLine +BABEL_OP2_205_88873_20130326_050532_outLine +BABEL_OP2_205_89372_20130227_014653_inLine +BABEL_OP2_205_89372_20130227_014653_outLine +BABEL_OP2_205_89457_20131111_005030_inLine +BABEL_OP2_205_89457_20131111_005030_outLine +BABEL_OP2_205_89560_20130507_184514_inLine +BABEL_OP2_205_89560_20130507_184514_outLine +BABEL_OP2_205_89888_20130311_070650_inLine +BABEL_OP2_205_89888_20130311_070650_outLine +BABEL_OP2_205_90347_20130331_202436_outLine +BABEL_OP2_205_90935_20130324_042904_inLine +BABEL_OP2_205_90935_20130324_042904_outLine +BABEL_OP2_205_91189_20131108_222823_inLine +BABEL_OP2_205_91189_20131108_222823_outLine +BABEL_OP2_205_91319_20130408_214039_inLine +BABEL_OP2_205_91319_20130408_214039_outLine +BABEL_OP2_205_91463_20130331_044435_inLine +BABEL_OP2_205_91463_20130331_044435_outLine +BABEL_OP2_205_91581_20130408_184119_inLine +BABEL_OP2_205_91581_20130408_184119_outLine +BABEL_OP2_205_91884_20130413_195918_inLine +BABEL_OP2_205_91884_20130413_195918_outLine +BABEL_OP2_205_91891_20130330_214543_inLine +BABEL_OP2_205_91891_20130330_214543_outLine +BABEL_OP2_205_91971_20140108_225426_inLine +BABEL_OP2_205_91971_20140108_225426_outLine +BABEL_OP2_205_91977_20130331_024658_inLine +BABEL_OP2_205_91977_20130331_024658_outLine +BABEL_OP2_205_91977_20130331_030804_inLine +BABEL_OP2_205_91977_20130331_030804_outLine +BABEL_OP2_205_92077_20130413_012328_inLine +BABEL_OP2_205_92077_20130413_012328_outLine +BABEL_OP2_205_92459_20130330_001356_inLine +BABEL_OP2_205_92459_20130330_001356_outLine +BABEL_OP2_205_92509_20130303_235756_inLine +BABEL_OP2_205_92509_20130303_235756_outLine +BABEL_OP2_205_92740_20130330_215927_inLine +BABEL_OP2_205_92740_20130330_215927_outLine +BABEL_OP2_205_92886_20130318_002931_inLine +BABEL_OP2_205_92886_20130318_002931_outLine +BABEL_OP2_205_92941_20130329_233855_inLine +BABEL_OP2_205_92941_20130329_233855_outLine +BABEL_OP2_205_93224_20130329_231410_inLine +BABEL_OP2_205_93224_20130329_231410_outLine +BABEL_OP2_205_93224_20130329_233120_inLine +BABEL_OP2_205_93224_20130329_233120_outLine +BABEL_OP2_205_93475_20130318_075901_inLine +BABEL_OP2_205_93475_20130318_075901_outLine +BABEL_OP2_205_93861_20130623_023740_inLine +BABEL_OP2_205_93946_20130413_175030_inLine +BABEL_OP2_205_93946_20130413_175030_outLine +BABEL_OP2_205_93946_20130413_180241_inLine +BABEL_OP2_205_93946_20130413_180241_outLine +BABEL_OP2_205_94002_20131110_223144_inLine +BABEL_OP2_205_94002_20131110_223144_outLine +BABEL_OP2_205_94442_20130413_000848_inLine +BABEL_OP2_205_94442_20130413_000848_outLine +BABEL_OP2_205_94465_20130408_073150_inLine +BABEL_OP2_205_94465_20130408_073150_outLine +BABEL_OP2_205_94587_20130330_222137_inLine +BABEL_OP2_205_94587_20130330_222137_outLine +BABEL_OP2_205_95269_20130323_231507_inLine +BABEL_OP2_205_95269_20130323_231507_outLine +BABEL_OP2_205_95294_20130331_225911_inLine +BABEL_OP2_205_95294_20130331_225911_outLine +BABEL_OP2_205_95446_20131109_020511_inLine +BABEL_OP2_205_95446_20131109_020511_outLine +BABEL_OP2_205_95467_20130616_022551_inLine +BABEL_OP2_205_95490_20130303_001325_inLine +BABEL_OP2_205_95490_20130303_001325_outLine +BABEL_OP2_205_95583_20130305_224743_inLine +BABEL_OP2_205_95583_20130305_224743_outLine +BABEL_OP2_205_96324_20130307_062000_inLine +BABEL_OP2_205_96324_20130307_062000_outLine +BABEL_OP2_205_96842_20130413_004424_inLine +BABEL_OP2_205_96842_20130413_004424_outLine +BABEL_OP2_205_96934_20130330_010104_inLine +BABEL_OP2_205_96934_20130330_010104_outLine +BABEL_OP2_205_96985_20130305_215036_inLine +BABEL_OP2_205_96985_20130305_215036_outLine +BABEL_OP2_205_97264_20130407_190517_inLine +BABEL_OP2_205_97264_20130407_190517_outLine +BABEL_OP2_205_97363_20130321_030214_inLine +BABEL_OP2_205_97363_20130321_030214_outLine +BABEL_OP2_205_97570_20130406_072121_inLine +BABEL_OP2_205_97570_20130406_072121_outLine +BABEL_OP2_205_97604_20130408_175013_inLine +BABEL_OP2_205_97604_20130408_175013_outLine +BABEL_OP2_205_97731_20130413_011730_inLine +BABEL_OP2_205_97731_20130413_011730_outLine +BABEL_OP2_205_97731_20130413_013459_inLine +BABEL_OP2_205_97731_20130413_013459_outLine +BABEL_OP2_205_98311_20130311_063743_inLine +BABEL_OP2_205_98311_20130311_063743_outLine +BABEL_OP2_205_99264_20130412_221353_inLine +BABEL_OP2_205_99264_20130412_221353_outLine +BABEL_OP2_205_99487_20130310_062912_inLine +BABEL_OP2_205_99487_20130310_062912_outLine +BABEL_OP2_205_99516_20130304_070035_inLine +BABEL_OP2_205_99516_20130304_070035_outLine +BABEL_OP2_205_99718_20130311_081329_inLine +BABEL_OP2_205_99718_20130311_081329_outLine +BABEL_OP2_205_99813_20131110_022455_inLine +BABEL_OP2_205_99813_20131110_022455_outLine +BABEL_OP2_205_99920_20130408_013635_inLine +BABEL_OP2_205_99920_20130408_013635_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/dev.list b/egs/babel/s5d/conf/lists/206-zulu/dev.list new file mode 100644 index 00000000000..52d51a26c88 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/dev.list @@ -0,0 +1,141 @@ +BABEL_OP1_206_14350_20121123_042710_inLine +BABEL_OP1_206_14350_20121123_042710_outLine +BABEL_OP1_206_15042_20130124_002208_inLine +BABEL_OP1_206_15042_20130124_002208_outLine +BABEL_OP1_206_15042_20130124_003815_inLine +BABEL_OP1_206_15042_20130124_003815_outLine +BABEL_OP1_206_15163_20121129_232215_inLine +BABEL_OP1_206_15163_20121129_232215_outLine +BABEL_OP1_206_19621_20121219_031810_inLine +BABEL_OP1_206_19621_20121219_031810_outLine +BABEL_OP1_206_19663_20121219_173010_inLine +BABEL_OP1_206_19663_20121219_173010_outLine +BABEL_OP1_206_22466_20121130_231814_inLine +BABEL_OP1_206_22466_20121130_231814_outLine +BABEL_OP1_206_23995_20121215_221537_inLine +BABEL_OP1_206_23995_20121215_221537_outLine +BABEL_OP1_206_26999_20121213_022027_inLine +BABEL_OP1_206_26999_20121213_022027_outLine +BABEL_OP1_206_28190_20121213_031401_inLine +BABEL_OP1_206_28190_20121213_031401_outLine +BABEL_OP1_206_28606_20121215_000631_inLine +BABEL_OP1_206_28606_20121215_000631_outLine +BABEL_OP1_206_31182_20121222_050854_inLine +BABEL_OP1_206_31182_20121222_050854_outLine +BABEL_OP1_206_32727_20130601_012544_inLine +BABEL_OP1_206_32727_20130601_012544_outLine +BABEL_OP1_206_34477_20121130_183409_inLine +BABEL_OP1_206_34477_20121130_183409_outLine +BABEL_OP1_206_34477_20121130_184826_inLine +BABEL_OP1_206_34477_20121130_184826_outLine +BABEL_OP1_206_34899_20130602_004027_inLine +BABEL_OP1_206_34899_20130602_004027_outLine +BABEL_OP1_206_35583_20130529_005600_inLine +BABEL_OP1_206_35583_20130529_005600_outLine +BABEL_OP1_206_36219_20121130_184946_inLine +BABEL_OP1_206_36219_20121130_184946_outLine +BABEL_OP1_206_36594_20130601_002535_inLine +BABEL_OP1_206_36594_20130601_002535_outLine +BABEL_OP1_206_36990_20121130_212128_inLine +BABEL_OP1_206_36990_20121130_212128_outLine +BABEL_OP1_206_36990_20121130_213230_inLine +BABEL_OP1_206_36990_20121130_213230_outLine +BABEL_OP1_206_36990_20121130_220005_inLine +BABEL_OP1_206_36990_20121130_220005_outLine +BABEL_OP1_206_40740_20121214_002216_inLine +BABEL_OP1_206_40740_20121214_002216_outLine +BABEL_OP1_206_41100_20121129_002525_inLine +BABEL_OP1_206_41100_20121129_002525_outLine +BABEL_OP1_206_41100_20121129_003855_inLine +BABEL_OP1_206_41100_20121129_003855_outLine +BABEL_OP1_206_41493_20121128_222116_inLine +BABEL_OP1_206_41493_20121128_222116_outLine +BABEL_OP1_206_41493_20121128_230231_inLine +BABEL_OP1_206_41493_20121128_230231_outLine +BABEL_OP1_206_41920_20121129_204231_inLine +BABEL_OP1_206_41920_20121129_204231_outLine +BABEL_OP1_206_42600_20121206_212006_inLine +BABEL_OP1_206_42600_20121206_212006_outLine +BABEL_OP1_206_43646_20121206_213819_inLine +BABEL_OP1_206_43646_20121206_213819_outLine +BABEL_OP1_206_47877_20121212_233516_inLine +BABEL_OP1_206_47877_20121212_233516_outLine +BABEL_OP1_206_47877_20121213_000206_inLine +BABEL_OP1_206_47877_20121213_000206_outLine +BABEL_OP1_206_47877_20121213_030248_inLine +BABEL_OP1_206_47877_20121213_030248_outLine +BABEL_OP1_206_49767_20130530_203947_inLine +BABEL_OP1_206_49767_20130530_203947_outLine +BABEL_OP1_206_49902_20121201_230757_inLine +BABEL_OP1_206_49902_20121201_230757_outLine +BABEL_OP1_206_49902_20121202_000107_inLine +BABEL_OP1_206_49902_20121202_000107_outLine +BABEL_OP1_206_54405_20130522_224053_inLine +BABEL_OP1_206_54405_20130522_224053_outLine +BABEL_OP1_206_56198_20121128_190457_inLine +BABEL_OP1_206_56198_20121128_190457_outLine +BABEL_OP1_206_56429_20121220_005243_inLine +BABEL_OP1_206_56429_20121220_005243_outLine +BABEL_OP1_206_56684_20121212_010900_inLine +BABEL_OP1_206_56684_20121212_010900_outLine +BABEL_OP1_206_58815_20121216_231254_inLine +BABEL_OP1_206_58815_20121216_231254_outLine +BABEL_OP1_206_60538_20121205_021137_inLine +BABEL_OP1_206_60538_20121205_021137_outLine +BABEL_OP1_206_60706_20121128_191751_inLine +BABEL_OP1_206_60706_20121128_191751_outLine +BABEL_OP1_206_61011_20121219_024939_inLine +BABEL_OP1_206_61011_20121219_024939_outLine +BABEL_OP1_206_61219_20121204_234808_inLine +BABEL_OP1_206_61219_20121204_234808_outLine +BABEL_OP1_206_62362_20130301_013214_inLine +BABEL_OP1_206_62362_20130301_013214_outLine +BABEL_OP1_206_63220_20130531_002428_inLine +BABEL_OP1_206_63220_20130531_002428_outLine +BABEL_OP1_206_65692_20121212_230954_inLine +BABEL_OP1_206_65692_20121212_230954_outLine +BABEL_OP1_206_66837_20130111_182531_inLine +BABEL_OP1_206_66837_20130111_182531_outLine +BABEL_OP1_206_66959_20121218_192949_inLine +BABEL_OP1_206_66959_20121218_192949_outLine +BABEL_OP1_206_67066_20130604_231822_inLine +BABEL_OP1_206_67066_20130604_231822_outLine +BABEL_OP1_206_71780_20121219_010817_inLine +BABEL_OP1_206_71780_20121219_010817_outLine +BABEL_OP1_206_77225_20130604_013253_inLine +BABEL_OP1_206_77225_20130604_013253_outLine +BABEL_OP1_206_79858_20121126_013705_inLine +BABEL_OP1_206_79858_20121126_013705_outLine +BABEL_OP1_206_81854_20130122_210400_inLine +BABEL_OP1_206_81854_20130122_210400_outLine +BABEL_OP1_206_82224_20130602_234038_inLine +BABEL_OP1_206_82224_20130602_234038_outLine +BABEL_OP1_206_82966_20121213_231116_inLine +BABEL_OP1_206_82966_20121213_231116_outLine +BABEL_OP1_206_84838_20121210_051040_inLine +BABEL_OP1_206_84838_20121210_051040_outLine +BABEL_OP1_206_85048_20121220_202904_inLine +BABEL_OP1_206_85048_20121220_202904_outLine +BABEL_OP1_206_85340_20121129_000834_inLine +BABEL_OP1_206_85340_20121129_000834_outLine +BABEL_OP1_206_85340_20121129_231533_inLine +BABEL_OP1_206_85340_20121129_231533_outLine +BABEL_OP1_206_92252_20130601_235344_inLine +BABEL_OP1_206_92252_20130601_235344_outLine +BABEL_OP1_206_92886_20121128_042622_inLine +BABEL_OP1_206_92886_20121128_042622_outLine +BABEL_OP1_206_92886_20121128_045107_inLine +BABEL_OP1_206_92886_20121128_045107_outLine +BABEL_OP1_206_93007_20130528_211314_inLine +BABEL_OP1_206_93007_20130528_211314_outLine +BABEL_OP1_206_95490_20130103_005535_inLine +BABEL_OP1_206_95490_20130103_005535_outLine +BABEL_OP1_206_96584_20130121_011505_inLine +BABEL_OP1_206_96584_20130121_011505_outLine +BABEL_OP1_206_97849_20130123_000229_inLine +BABEL_OP1_206_97849_20130123_000229_outLine +BABEL_OP1_206_97988_20121212_223804_inLine +BABEL_OP1_206_97988_20121212_223804_outLine +BABEL_OP1_206_99594_20121220_022404_outLine +BABEL_OP1_206_99718_20121128_213548_inLine +BABEL_OP1_206_99718_20121128_213548_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/eval.list b/egs/babel/s5d/conf/lists/206-zulu/eval.list new file mode 100644 index 00000000000..b75e559d38b --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/eval.list @@ -0,0 +1,202 @@ +BABEL_OP1_206_10019_20121129_221847_inLine +BABEL_OP1_206_10019_20121129_221847_outLine +BABEL_OP1_206_10184_20130530_225826_inLine +BABEL_OP1_206_10184_20130530_225826_outLine +BABEL_OP1_206_10319_20121201_000052_inLine +BABEL_OP1_206_10319_20121201_000052_outLine +BABEL_OP1_206_10319_20121201_002831_inLine +BABEL_OP1_206_10319_20121201_002831_outLine +BABEL_OP1_206_10416_20121229_182422_inLine +BABEL_OP1_206_10416_20121229_182422_outLine +BABEL_OP1_206_13040_20121206_215505_inLine +BABEL_OP1_206_13040_20121206_215505_outLine +BABEL_OP1_206_13040_20121206_221350_inLine +BABEL_OP1_206_13040_20121206_221350_outLine +BABEL_OP1_206_14229_20121220_002130_inLine +BABEL_OP1_206_14229_20121220_002130_outLine +BABEL_OP1_206_14237_20121130_193638_inLine +BABEL_OP1_206_14237_20121130_193638_outLine +BABEL_OP1_206_15926_20121211_205054_inLine +BABEL_OP1_206_15926_20121211_205054_outLine +BABEL_OP1_206_16787_20121220_025209_inLine +BABEL_OP1_206_16787_20121220_025209_outLine +BABEL_OP1_206_17165_20121128_185603_inLine +BABEL_OP1_206_17165_20121128_185603_outLine +BABEL_OP1_206_17573_20121214_234307_inLine +BABEL_OP1_206_17573_20121214_234307_outLine +BABEL_OP1_206_18863_20121214_201427_inLine +BABEL_OP1_206_18863_20121214_201427_outLine +BABEL_OP1_206_19672_20121218_230453_inLine +BABEL_OP1_206_19672_20121218_230453_outLine +BABEL_OP1_206_21794_20121130_183726_inLine +BABEL_OP1_206_21794_20121130_183726_outLine +BABEL_OP1_206_22641_20130605_195037_inLine +BABEL_OP1_206_22641_20130605_195037_outLine +BABEL_OP1_206_23395_20130110_222315_inLine +BABEL_OP1_206_23395_20130110_222315_outLine +BABEL_OP1_206_23628_20121128_215213_inLine +BABEL_OP1_206_23628_20121128_215213_outLine +BABEL_OP1_206_25220_20130528_232132_inLine +BABEL_OP1_206_25220_20130528_232132_outLine +BABEL_OP1_206_26074_20121221_172845_inLine +BABEL_OP1_206_26074_20121221_172845_outLine +BABEL_OP1_206_26478_20130523_003304_inLine +BABEL_OP1_206_26478_20130523_003304_outLine +BABEL_OP1_206_29208_20121220_212757_inLine +BABEL_OP1_206_29208_20121220_212757_outLine +BABEL_OP1_206_29777_20121220_010458_inLine +BABEL_OP1_206_29777_20121220_010458_outLine +BABEL_OP1_206_29777_20121220_012240_inLine +BABEL_OP1_206_29777_20121220_012240_outLine +BABEL_OP1_206_30250_20121129_205052_inLine +BABEL_OP1_206_30250_20121129_205052_outLine +BABEL_OP1_206_31484_20130530_181941_inLine +BABEL_OP1_206_31484_20130530_181941_outLine +BABEL_OP1_206_31979_20130120_174010_inLine +BABEL_OP1_206_31979_20130120_174010_outLine +BABEL_OP1_206_35000_20121220_022037_inLine +BABEL_OP1_206_35000_20121220_022037_outLine +BABEL_OP1_206_35202_20121218_153251_inLine +BABEL_OP1_206_35202_20121218_153251_outLine +BABEL_OP1_206_35706_20130603_175544_inLine +BABEL_OP1_206_35706_20130603_175544_outLine +BABEL_OP1_206_36669_20130528_012812_inLine +BABEL_OP1_206_36669_20130528_012812_outLine +BABEL_OP1_206_37064_20121128_061027_inLine +BABEL_OP1_206_37064_20121128_061027_outLine +BABEL_OP1_206_37064_20121128_224230_inLine +BABEL_OP1_206_37064_20121128_224230_outLine +BABEL_OP1_206_37064_20121128_233033_inLine +BABEL_OP1_206_37064_20121128_233033_outLine +BABEL_OP1_206_40092_20130604_005619_inLine +BABEL_OP1_206_40092_20130604_005619_outLine +BABEL_OP1_206_41741_20121123_161203_inLine +BABEL_OP1_206_41741_20121123_161203_outLine +BABEL_OP1_206_41745_20121206_052354_inLine +BABEL_OP1_206_41745_20121206_052354_outLine +BABEL_OP1_206_42231_20121213_215559_inLine +BABEL_OP1_206_42231_20121213_215559_outLine +BABEL_OP1_206_43920_20130527_173524_inLine +BABEL_OP1_206_43920_20130527_173524_outLine +BABEL_OP1_206_45106_20121207_233620_inLine +BABEL_OP1_206_45106_20121207_233620_outLine +BABEL_OP1_206_45140_20130602_193439_inLine +BABEL_OP1_206_45140_20130602_193439_outLine +BABEL_OP1_206_45777_20121220_211320_inLine +BABEL_OP1_206_45777_20121220_211320_outLine +BABEL_OP1_206_45843_20130103_065538_inLine +BABEL_OP1_206_45843_20130103_065538_outLine +BABEL_OP1_206_46625_20121206_223937_inLine +BABEL_OP1_206_46625_20121206_223937_outLine +BABEL_OP1_206_46712_20121129_221717_inLine +BABEL_OP1_206_46712_20121129_221717_outLine +BABEL_OP1_206_48200_20121218_202643_inLine +BABEL_OP1_206_48200_20121218_202643_outLine +BABEL_OP1_206_48758_20130601_165902_inLine +BABEL_OP1_206_48758_20130601_165902_outLine +BABEL_OP1_206_50962_20121205_031651_inLine +BABEL_OP1_206_50962_20121205_031651_outLine +BABEL_OP1_206_53842_20121203_222845_inLine +BABEL_OP1_206_53842_20121203_222845_outLine +BABEL_OP1_206_54040_20121216_233328_inLine +BABEL_OP1_206_54040_20121216_233328_outLine +BABEL_OP1_206_55742_20121129_210507_inLine +BABEL_OP1_206_55742_20121129_210507_outLine +BABEL_OP1_206_56090_20121130_064154_inLine +BABEL_OP1_206_56090_20121130_064154_outLine +BABEL_OP1_206_56743_20121205_030951_inLine +BABEL_OP1_206_56743_20121205_030951_outLine +BABEL_OP1_206_57650_20130605_164821_inLine +BABEL_OP1_206_57650_20130605_164821_outLine +BABEL_OP1_206_57654_20121201_024813_inLine +BABEL_OP1_206_57654_20121201_024813_outLine +BABEL_OP1_206_59993_20121218_222534_inLine +BABEL_OP1_206_59993_20121218_222534_outLine +BABEL_OP1_206_60282_20130604_201941_inLine +BABEL_OP1_206_60282_20130604_201941_outLine +BABEL_OP1_206_60836_20130523_194516_inLine +BABEL_OP1_206_60836_20130523_194516_outLine +BABEL_OP1_206_62155_20130301_010901_inLine +BABEL_OP1_206_62155_20130301_010901_outLine +BABEL_OP1_206_62835_20121201_223026_inLine +BABEL_OP1_206_62835_20121201_223026_outLine +BABEL_OP1_206_66967_20121128_215012_inLine +BABEL_OP1_206_66967_20121128_215012_outLine +BABEL_OP1_206_67842_20130523_231054_inLine +BABEL_OP1_206_67842_20130523_231054_outLine +BABEL_OP1_206_71282_20121219_154752_inLine +BABEL_OP1_206_71282_20121219_154752_outLine +BABEL_OP1_206_71333_20121219_195507_inLine +BABEL_OP1_206_71333_20121219_195507_outLine +BABEL_OP1_206_71333_20121219_202710_inLine +BABEL_OP1_206_71333_20121219_202710_outLine +BABEL_OP1_206_71333_20121220_020603_inLine +BABEL_OP1_206_71333_20121220_020603_outLine +BABEL_OP1_206_71704_20121203_210805_inLine +BABEL_OP1_206_71704_20121203_210805_outLine +BABEL_OP1_206_73042_20130528_223845_inLine +BABEL_OP1_206_73042_20130528_223845_outLine +BABEL_OP1_206_73622_20121203_233522_inLine +BABEL_OP1_206_73622_20121203_233522_outLine +BABEL_OP1_206_73837_20121202_232509_inLine +BABEL_OP1_206_73837_20121202_232509_outLine +BABEL_OP1_206_73837_20121202_234026_inLine +BABEL_OP1_206_73837_20121202_234026_outLine +BABEL_OP1_206_74111_20130527_210704_inLine +BABEL_OP1_206_74111_20130527_210704_outLine +BABEL_OP1_206_74641_20130601_192414_inLine +BABEL_OP1_206_74641_20130601_192414_outLine +BABEL_OP1_206_76773_20121219_022906_inLine +BABEL_OP1_206_76773_20121219_022906_outLine +BABEL_OP1_206_78630_20130420_211941_inLine +BABEL_OP1_206_78630_20130420_211941_outLine +BABEL_OP1_206_78976_20121206_005749_inLine +BABEL_OP1_206_78976_20121206_005749_outLine +BABEL_OP1_206_79820_20121127_235837_inLine +BABEL_OP1_206_79820_20121127_235837_outLine +BABEL_OP1_206_81392_20121219_022235_inLine +BABEL_OP1_206_81392_20121219_022235_outLine +BABEL_OP1_206_81404_20121215_230948_inLine +BABEL_OP1_206_81404_20121215_230948_outLine +BABEL_OP1_206_84125_20121201_213358_inLine +BABEL_OP1_206_84125_20121201_213358_outLine +BABEL_OP1_206_88873_20121129_222922_inLine +BABEL_OP1_206_88873_20121129_222922_outLine +BABEL_OP1_206_89045_20121201_221210_inLine +BABEL_OP1_206_89045_20121201_221210_outLine +BABEL_OP1_206_89045_20121201_222746_inLine +BABEL_OP1_206_89045_20121201_222746_outLine +BABEL_OP1_206_89372_20130103_022242_inLine +BABEL_OP1_206_89372_20130103_022242_outLine +BABEL_OP1_206_90935_20121207_230747_inLine +BABEL_OP1_206_90935_20121207_230747_outLine +BABEL_OP1_206_91593_20130602_212217_inLine +BABEL_OP1_206_91593_20130602_212217_outLine +BABEL_OP1_206_91884_20130531_175329_inLine +BABEL_OP1_206_91884_20130531_175329_outLine +BABEL_OP1_206_92698_20121128_234824_inLine +BABEL_OP1_206_92698_20121128_234824_outLine +BABEL_OP1_206_92698_20121129_000933_inLine +BABEL_OP1_206_92698_20121129_000933_outLine +BABEL_OP1_206_93153_20130524_203739_inLine +BABEL_OP1_206_93153_20130524_203739_outLine +BABEL_OP1_206_93946_20130531_215200_inLine +BABEL_OP1_206_93946_20130531_215200_outLine +BABEL_OP1_206_94002_20121208_002204_inLine +BABEL_OP1_206_94002_20121208_002204_outLine +BABEL_OP1_206_95399_20130528_171818_inLine +BABEL_OP1_206_95399_20130528_171818_outLine +BABEL_OP1_206_96205_20121217_165620_inLine +BABEL_OP1_206_96205_20121217_165620_outLine +BABEL_OP1_206_96205_20121217_171026_inLine +BABEL_OP1_206_96205_20121217_171026_outLine +BABEL_OP1_206_96504_20121207_214704_inLine +BABEL_OP1_206_96504_20121207_214704_outLine +BABEL_OP1_206_98580_20121201_203508_inLine +BABEL_OP1_206_98580_20121201_203508_outLine +BABEL_OP1_206_98888_20130603_202859_inLine +BABEL_OP1_206_98888_20130603_202859_outLine +BABEL_OP1_206_99401_20121123_043326_inLine +BABEL_OP1_206_99401_20121123_043326_outLine +BABEL_OP1_206_99732_20121220_033454_inLine +BABEL_OP1_206_99732_20121220_033454_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/evalpart1.list b/egs/babel/s5d/conf/lists/206-zulu/evalpart1.list new file mode 100644 index 00000000000..6b6bf451b3e --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/evalpart1.list @@ -0,0 +1,72 @@ +BABEL_OP1_206_13040_20121206_215505_inLine +BABEL_OP1_206_13040_20121206_215505_outLine +BABEL_OP1_206_13040_20121206_221350_inLine +BABEL_OP1_206_13040_20121206_221350_outLine +BABEL_OP1_206_18863_20121214_201427_inLine +BABEL_OP1_206_18863_20121214_201427_outLine +BABEL_OP1_206_19672_20121218_230453_inLine +BABEL_OP1_206_19672_20121218_230453_outLine +BABEL_OP1_206_21794_20121130_183726_inLine +BABEL_OP1_206_21794_20121130_183726_outLine +BABEL_OP1_206_23395_20130110_222315_inLine +BABEL_OP1_206_23395_20130110_222315_outLine +BABEL_OP1_206_23628_20121128_215213_inLine +BABEL_OP1_206_23628_20121128_215213_outLine +BABEL_OP1_206_30250_20121129_205052_inLine +BABEL_OP1_206_30250_20121129_205052_outLine +BABEL_OP1_206_31979_20130120_174010_inLine +BABEL_OP1_206_31979_20130120_174010_outLine +BABEL_OP1_206_35202_20121218_153251_inLine +BABEL_OP1_206_35202_20121218_153251_outLine +BABEL_OP1_206_37064_20121128_061027_inLine +BABEL_OP1_206_37064_20121128_061027_outLine +BABEL_OP1_206_37064_20121128_224230_inLine +BABEL_OP1_206_37064_20121128_224230_outLine +BABEL_OP1_206_37064_20121128_233033_inLine +BABEL_OP1_206_37064_20121128_233033_outLine +BABEL_OP1_206_41745_20121206_052354_inLine +BABEL_OP1_206_41745_20121206_052354_outLine +BABEL_OP1_206_45140_20130602_193439_inLine +BABEL_OP1_206_45140_20130602_193439_outLine +BABEL_OP1_206_45777_20121220_211320_inLine +BABEL_OP1_206_45777_20121220_211320_outLine +BABEL_OP1_206_48758_20130601_165902_inLine +BABEL_OP1_206_48758_20130601_165902_outLine +BABEL_OP1_206_55742_20121129_210507_inLine +BABEL_OP1_206_55742_20121129_210507_outLine +BABEL_OP1_206_57650_20130605_164821_inLine +BABEL_OP1_206_57650_20130605_164821_outLine +BABEL_OP1_206_57654_20121201_024813_inLine +BABEL_OP1_206_57654_20121201_024813_outLine +BABEL_OP1_206_62155_20130301_010901_inLine +BABEL_OP1_206_62155_20130301_010901_outLine +BABEL_OP1_206_62835_20121201_223026_inLine +BABEL_OP1_206_62835_20121201_223026_outLine +BABEL_OP1_206_71333_20121219_195507_inLine +BABEL_OP1_206_71333_20121219_195507_outLine +BABEL_OP1_206_71333_20121219_202710_inLine +BABEL_OP1_206_71333_20121219_202710_outLine +BABEL_OP1_206_71333_20121220_020603_inLine +BABEL_OP1_206_71333_20121220_020603_outLine +BABEL_OP1_206_71704_20121203_210805_inLine +BABEL_OP1_206_71704_20121203_210805_outLine +BABEL_OP1_206_73622_20121203_233522_inLine +BABEL_OP1_206_73622_20121203_233522_outLine +BABEL_OP1_206_73837_20121202_232509_inLine +BABEL_OP1_206_73837_20121202_232509_outLine +BABEL_OP1_206_73837_20121202_234026_inLine +BABEL_OP1_206_73837_20121202_234026_outLine +BABEL_OP1_206_78630_20130420_211941_inLine +BABEL_OP1_206_78630_20130420_211941_outLine +BABEL_OP1_206_78976_20121206_005749_inLine +BABEL_OP1_206_78976_20121206_005749_outLine +BABEL_OP1_206_81392_20121219_022235_inLine +BABEL_OP1_206_81392_20121219_022235_outLine +BABEL_OP1_206_88873_20121129_222922_inLine +BABEL_OP1_206_88873_20121129_222922_outLine +BABEL_OP1_206_90935_20121207_230747_inLine +BABEL_OP1_206_90935_20121207_230747_outLine +BABEL_OP1_206_98580_20121201_203508_inLine +BABEL_OP1_206_98580_20121201_203508_outLine +BABEL_OP1_206_98888_20130603_202859_inLine +BABEL_OP1_206_98888_20130603_202859_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.FullLP.list b/egs/babel/s5d/conf/lists/206-zulu/train.FullLP.list new file mode 100644 index 00000000000..f47e8d654e1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.FullLP.list @@ -0,0 +1,829 @@ +BABEL_OP1_206_10901_20121128_230024_inLine +BABEL_OP1_206_10901_20121128_230024_outLine +BABEL_OP1_206_10901_20121129_003238_inLine +BABEL_OP1_206_10901_20121129_003238_outLine +BABEL_OP1_206_10966_20121205_213021_inLine +BABEL_OP1_206_10966_20121205_213021_outLine +BABEL_OP1_206_10966_20121205_214750_inLine +BABEL_OP1_206_10966_20121205_214750_outLine +BABEL_OP1_206_11581_20121213_020058_inLine +BABEL_OP1_206_11581_20121213_020058_outLine +BABEL_OP1_206_11797_20121207_001426_inLine +BABEL_OP1_206_11797_20121207_001426_outLine +BABEL_OP1_206_11797_20121207_002917_inLine +BABEL_OP1_206_11797_20121207_002917_outLine +BABEL_OP1_206_11859_20130602_013210_inLine +BABEL_OP1_206_11859_20130602_013210_outLine +BABEL_OP1_206_12242_20121218_022109_inLine +BABEL_OP1_206_12242_20121218_022109_outLine +BABEL_OP1_206_12851_20121215_010712_inLine +BABEL_OP1_206_12851_20121215_010712_outLine +BABEL_OP1_206_13030_20121129_225418_inLine +BABEL_OP1_206_13030_20121129_225418_outLine +BABEL_OP1_206_13184_20121216_223430_inLine +BABEL_OP1_206_13184_20121216_223430_outLine +BABEL_OP1_206_13184_20121216_224722_inLine +BABEL_OP1_206_13184_20121216_224722_outLine +BABEL_OP1_206_13483_20121219_205820_inLine +BABEL_OP1_206_13483_20121219_205820_outLine +BABEL_OP1_206_13483_20121219_212915_inLine +BABEL_OP1_206_13483_20121219_212915_outLine +BABEL_OP1_206_13490_20121221_005743_inLine +BABEL_OP1_206_13490_20121221_005743_outLine +BABEL_OP1_206_13744_20121205_205818_inLine +BABEL_OP1_206_13744_20121205_205818_outLine +BABEL_OP1_206_14137_20130118_010712_inLine +BABEL_OP1_206_14137_20130118_010712_outLine +BABEL_OP1_206_14137_20130122_014528_inLine +BABEL_OP1_206_14137_20130122_014528_outLine +BABEL_OP1_206_14179_20121210_224630_inLine +BABEL_OP1_206_14179_20121210_224630_outLine +BABEL_OP1_206_14440_20121218_231347_inLine +BABEL_OP1_206_14440_20121218_231347_outLine +BABEL_OP1_206_14719_20121213_040757_inLine +BABEL_OP1_206_14719_20121213_040757_outLine +BABEL_OP1_206_14729_20130531_183022_inLine +BABEL_OP1_206_14729_20130531_183022_outLine +BABEL_OP1_206_14807_20121221_150943_inLine +BABEL_OP1_206_14807_20121221_150943_outLine +BABEL_OP1_206_14814_20121129_203954_inLine +BABEL_OP1_206_14814_20121129_203954_outLine +BABEL_OP1_206_14899_20121203_021835_inLine +BABEL_OP1_206_14899_20121203_021835_outLine +BABEL_OP1_206_14929_20121203_232411_inLine +BABEL_OP1_206_14929_20121203_232411_outLine +BABEL_OP1_206_15024_20130527_234410_inLine +BABEL_OP1_206_15024_20130527_234410_outLine +BABEL_OP1_206_15324_20121208_010033_inLine +BABEL_OP1_206_15324_20121208_010033_outLine +BABEL_OP1_206_15702_20121214_225618_inLine +BABEL_OP1_206_15702_20121214_225618_outLine +BABEL_OP1_206_15702_20121214_231152_inLine +BABEL_OP1_206_15702_20121214_231152_outLine +BABEL_OP1_206_15702_20121214_232449_inLine +BABEL_OP1_206_15702_20121214_232449_outLine +BABEL_OP1_206_16149_20121201_010342_inLine +BABEL_OP1_206_16149_20121201_010342_outLine +BABEL_OP1_206_16467_20130531_200137_inLine +BABEL_OP1_206_16467_20130531_200137_outLine +BABEL_OP1_206_16475_20130121_210828_inLine +BABEL_OP1_206_16475_20130121_210828_outLine +BABEL_OP1_206_16475_20130121_212136_inLine +BABEL_OP1_206_16475_20130121_212136_outLine +BABEL_OP1_206_16839_20121217_170534_inLine +BABEL_OP1_206_16839_20121217_170534_outLine +BABEL_OP1_206_16886_20130524_232154_inLine +BABEL_OP1_206_16886_20130524_232154_outLine +BABEL_OP1_206_17032_20121219_220514_inLine +BABEL_OP1_206_17032_20121219_220514_outLine +BABEL_OP1_206_17280_20130527_191437_inLine +BABEL_OP1_206_17280_20130527_191437_outLine +BABEL_OP1_206_17440_20121227_213432_inLine +BABEL_OP1_206_17440_20121227_213432_outLine +BABEL_OP1_206_17472_20121214_193824_inLine +BABEL_OP1_206_17472_20121214_193824_outLine +BABEL_OP1_206_17567_20121209_205317_inLine +BABEL_OP1_206_17567_20121209_205317_outLine +BABEL_OP1_206_17567_20121209_211139_inLine +BABEL_OP1_206_17567_20121209_211139_outLine +BABEL_OP1_206_17615_20121214_193534_inLine +BABEL_OP1_206_17615_20121214_193534_outLine +BABEL_OP1_206_17881_20130121_005313_inLine +BABEL_OP1_206_17881_20130121_005313_outLine +BABEL_OP1_206_17923_20121130_214207_inLine +BABEL_OP1_206_17923_20121130_214207_outLine +BABEL_OP1_206_18291_20130604_183732_inLine +BABEL_OP1_206_18291_20130604_183732_outLine +BABEL_OP1_206_19722_20121130_203924_inLine +BABEL_OP1_206_19722_20121130_203924_outLine +BABEL_OP1_206_19773_20130101_015259_inLine +BABEL_OP1_206_19773_20130101_015259_outLine +BABEL_OP1_206_19818_20130529_204811_inLine +BABEL_OP1_206_19818_20130529_204811_outLine +BABEL_OP1_206_19877_20130123_175339_inLine +BABEL_OP1_206_19877_20130123_175339_outLine +BABEL_OP1_206_19877_20130123_181047_inLine +BABEL_OP1_206_19877_20130123_181047_outLine +BABEL_OP1_206_20682_20121213_030430_inLine +BABEL_OP1_206_20682_20121213_030430_outLine +BABEL_OP1_206_20800_20130523_220352_inLine +BABEL_OP1_206_20800_20130523_220352_outLine +BABEL_OP1_206_20916_20121205_203848_inLine +BABEL_OP1_206_20916_20121205_203848_outLine +BABEL_OP1_206_20922_20121214_231110_inLine +BABEL_OP1_206_20922_20121214_231110_outLine +BABEL_OP1_206_21004_20121210_215455_inLine +BABEL_OP1_206_21004_20121210_215455_outLine +BABEL_OP1_206_21004_20121210_223449_inLine +BABEL_OP1_206_21004_20121210_223449_outLine +BABEL_OP1_206_21206_20121220_001511_inLine +BABEL_OP1_206_21206_20121220_001511_outLine +BABEL_OP1_206_21327_20130111_022748_inLine +BABEL_OP1_206_21327_20130111_022748_outLine +BABEL_OP1_206_21892_20121213_235725_inLine +BABEL_OP1_206_21892_20121213_235725_outLine +BABEL_OP1_206_22494_20130530_004456_inLine +BABEL_OP1_206_22494_20130530_004456_outLine +BABEL_OP1_206_22624_20121219_210041_inLine +BABEL_OP1_206_22624_20121219_210041_outLine +BABEL_OP1_206_22826_20130121_231859_inLine +BABEL_OP1_206_22826_20130121_231859_outLine +BABEL_OP1_206_22826_20130121_233139_inLine +BABEL_OP1_206_22826_20130121_233139_outLine +BABEL_OP1_206_22965_20121128_011001_inLine +BABEL_OP1_206_22965_20121128_011001_outLine +BABEL_OP1_206_22965_20121128_012241_inLine +BABEL_OP1_206_22965_20121128_012241_outLine +BABEL_OP1_206_23006_20121203_004250_inLine +BABEL_OP1_206_23006_20121203_004250_outLine +BABEL_OP1_206_23006_20121203_073608_inLine +BABEL_OP1_206_23006_20121203_073608_outLine +BABEL_OP1_206_23092_20121227_211821_inLine +BABEL_OP1_206_23092_20121227_211821_outLine +BABEL_OP1_206_23151_20121217_034512_inLine +BABEL_OP1_206_23151_20121217_034512_outLine +BABEL_OP1_206_23153_20130102_224836_inLine +BABEL_OP1_206_23153_20130102_224836_outLine +BABEL_OP1_206_23190_20121219_204325_inLine +BABEL_OP1_206_23190_20121219_204325_outLine +BABEL_OP1_206_23239_20130118_000831_inLine +BABEL_OP1_206_23239_20130118_000831_outLine +BABEL_OP1_206_23505_20121203_010039_inLine +BABEL_OP1_206_23505_20121203_010039_outLine +BABEL_OP1_206_24253_20130120_235750_inLine +BABEL_OP1_206_24253_20130120_235750_outLine +BABEL_OP1_206_24253_20130121_000835_inLine +BABEL_OP1_206_24253_20130121_000835_outLine +BABEL_OP1_206_24253_20130121_012503_inLine +BABEL_OP1_206_24253_20130121_012503_outLine +BABEL_OP1_206_24323_20121214_212407_inLine +BABEL_OP1_206_24323_20121214_212407_outLine +BABEL_OP1_206_24323_20121214_213448_inLine +BABEL_OP1_206_24323_20121214_213448_outLine +BABEL_OP1_206_24532_20121201_203102_inLine +BABEL_OP1_206_24532_20121201_203102_outLine +BABEL_OP1_206_24569_20121210_211659_inLine +BABEL_OP1_206_24569_20121210_211659_outLine +BABEL_OP1_206_24590_20121201_210938_inLine +BABEL_OP1_206_24590_20121201_210938_outLine +BABEL_OP1_206_24590_20121201_215618_inLine +BABEL_OP1_206_24590_20121201_215618_outLine +BABEL_OP1_206_24605_20121218_201807_inLine +BABEL_OP1_206_24605_20121218_201807_outLine +BABEL_OP1_206_24982_20130603_194918_inLine +BABEL_OP1_206_24982_20130603_194918_outLine +BABEL_OP1_206_25412_20121210_201120_inLine +BABEL_OP1_206_25412_20121210_201120_outLine +BABEL_OP1_206_25412_20121210_203544_inLine +BABEL_OP1_206_25412_20121210_203544_outLine +BABEL_OP1_206_25496_20130529_000539_inLine +BABEL_OP1_206_25496_20130529_000539_outLine +BABEL_OP1_206_25698_20130603_011444_inLine +BABEL_OP1_206_25698_20130603_011444_outLine +BABEL_OP1_206_25719_20121215_000803_inLine +BABEL_OP1_206_25719_20121215_000803_outLine +BABEL_OP1_206_25767_20121204_021252_inLine +BABEL_OP1_206_25767_20121204_021252_outLine +BABEL_OP1_206_25961_20121202_232650_inLine +BABEL_OP1_206_25961_20121202_232650_outLine +BABEL_OP1_206_25961_20121202_234202_inLine +BABEL_OP1_206_25961_20121202_234202_outLine +BABEL_OP1_206_26206_20130529_172847_inLine +BABEL_OP1_206_26206_20130529_172847_outLine +BABEL_OP1_206_26388_20121202_191806_inLine +BABEL_OP1_206_26388_20121202_191806_outLine +BABEL_OP1_206_26836_20121201_210310_inLine +BABEL_OP1_206_26836_20121201_210310_outLine +BABEL_OP1_206_27042_20121219_230502_inLine +BABEL_OP1_206_27042_20121219_230502_outLine +BABEL_OP1_206_27082_20121220_012037_inLine +BABEL_OP1_206_27082_20121220_012037_outLine +BABEL_OP1_206_27125_20121203_012043_inLine +BABEL_OP1_206_27125_20121203_012043_outLine +BABEL_OP1_206_27203_20121214_210018_inLine +BABEL_OP1_206_27203_20121214_210018_outLine +BABEL_OP1_206_27590_20121216_180900_inLine +BABEL_OP1_206_27590_20121216_180900_outLine +BABEL_OP1_206_27841_20121216_014031_inLine +BABEL_OP1_206_27841_20121216_014031_outLine +BABEL_OP1_206_28303_20121128_201831_inLine +BABEL_OP1_206_28303_20121128_201831_outLine +BABEL_OP1_206_28419_20121207_221153_inLine +BABEL_OP1_206_28419_20121207_221153_outLine +BABEL_OP1_206_28775_20121203_022428_inLine +BABEL_OP1_206_28775_20121203_022428_outLine +BABEL_OP1_206_28945_20130118_003100_inLine +BABEL_OP1_206_28945_20130118_003100_outLine +BABEL_OP1_206_29023_20121201_234219_inLine +BABEL_OP1_206_29023_20121201_234219_outLine +BABEL_OP1_206_29039_20121220_013046_inLine +BABEL_OP1_206_29039_20121220_013046_outLine +BABEL_OP1_206_29135_20121219_224133_inLine +BABEL_OP1_206_29135_20121219_224133_outLine +BABEL_OP1_206_29323_20121219_201726_inLine +BABEL_OP1_206_29323_20121219_201726_outLine +BABEL_OP1_206_29323_20121219_203137_inLine +BABEL_OP1_206_29323_20121219_203137_outLine +BABEL_OP1_206_30395_20121206_014115_inLine +BABEL_OP1_206_30395_20121206_014115_outLine +BABEL_OP1_206_30869_20121227_221910_inLine +BABEL_OP1_206_30869_20121227_221910_outLine +BABEL_OP1_206_31109_20121224_061142_inLine +BABEL_OP1_206_31109_20121224_061142_outLine +BABEL_OP1_206_31490_20121128_234650_inLine +BABEL_OP1_206_31490_20121128_234650_outLine +BABEL_OP1_206_31624_20121123_081518_inLine +BABEL_OP1_206_31624_20121123_081518_outLine +BABEL_OP1_206_31628_20130528_194548_inLine +BABEL_OP1_206_31628_20130528_194548_outLine +BABEL_OP1_206_32122_20121128_184757_inLine +BABEL_OP1_206_32122_20121128_184757_outLine +BABEL_OP1_206_32301_20130530_191142_inLine +BABEL_OP1_206_32301_20130530_191142_outLine +BABEL_OP1_206_32328_20121215_181911_inLine +BABEL_OP1_206_32328_20121215_181911_outLine +BABEL_OP1_206_32708_20121231_225706_inLine +BABEL_OP1_206_32708_20121231_225706_outLine +BABEL_OP1_206_32837_20121213_221825_inLine +BABEL_OP1_206_32837_20121213_221825_outLine +BABEL_OP1_206_32837_20121213_223037_inLine +BABEL_OP1_206_32837_20121213_223037_outLine +BABEL_OP1_206_33111_20130601_200233_inLine +BABEL_OP1_206_33111_20130601_200233_outLine +BABEL_OP1_206_33273_20121129_201318_inLine +BABEL_OP1_206_33273_20121129_201318_outLine +BABEL_OP1_206_33355_20121130_055943_inLine +BABEL_OP1_206_33355_20121130_055943_outLine +BABEL_OP1_206_33672_20130524_171145_inLine +BABEL_OP1_206_33672_20130524_171145_outLine +BABEL_OP1_206_33704_20121213_214430_inLine +BABEL_OP1_206_33704_20121213_214430_outLine +BABEL_OP1_206_33840_20121213_230741_inLine +BABEL_OP1_206_33840_20121213_230741_outLine +BABEL_OP1_206_34197_20121128_232538_inLine +BABEL_OP1_206_34197_20121128_232538_outLine +BABEL_OP1_206_34328_20121202_184915_inLine +BABEL_OP1_206_34328_20121202_184915_outLine +BABEL_OP1_206_34564_20121214_020257_inLine +BABEL_OP1_206_34564_20121214_020257_outLine +BABEL_OP1_206_34679_20121206_000152_inLine +BABEL_OP1_206_34679_20121206_000152_outLine +BABEL_OP1_206_34826_20121215_005505_inLine +BABEL_OP1_206_34826_20121215_005505_outLine +BABEL_OP1_206_35008_20121216_210449_inLine +BABEL_OP1_206_35008_20121216_210449_outLine +BABEL_OP1_206_36505_20121213_222927_inLine +BABEL_OP1_206_36505_20121213_222927_outLine +BABEL_OP1_206_36894_20121128_201825_inLine +BABEL_OP1_206_36894_20121128_201825_outLine +BABEL_OP1_206_37598_20130111_224005_inLine +BABEL_OP1_206_37598_20130111_224005_outLine +BABEL_OP1_206_38431_20121214_013939_inLine +BABEL_OP1_206_38431_20121214_013939_outLine +BABEL_OP1_206_38554_20121123_025415_inLine +BABEL_OP1_206_38554_20121123_025415_outLine +BABEL_OP1_206_38689_20121217_013737_inLine +BABEL_OP1_206_38689_20121217_013737_outLine +BABEL_OP1_206_38878_20130530_172309_inLine +BABEL_OP1_206_38878_20130530_172309_outLine +BABEL_OP1_206_39059_20121215_230057_inLine +BABEL_OP1_206_39059_20121215_230057_outLine +BABEL_OP1_206_39059_20121216_000252_inLine +BABEL_OP1_206_39059_20121216_000252_outLine +BABEL_OP1_206_39307_20121207_024156_inLine +BABEL_OP1_206_39307_20121207_024156_outLine +BABEL_OP1_206_39426_20130120_232407_inLine +BABEL_OP1_206_39426_20130120_232407_outLine +BABEL_OP1_206_39426_20130120_233651_inLine +BABEL_OP1_206_39426_20130120_233651_outLine +BABEL_OP1_206_40557_20121218_025254_inLine +BABEL_OP1_206_40557_20121218_025254_outLine +BABEL_OP1_206_40713_20121129_215041_inLine +BABEL_OP1_206_40713_20121129_215041_outLine +BABEL_OP1_206_41097_20121215_173120_inLine +BABEL_OP1_206_41097_20121215_173120_outLine +BABEL_OP1_206_41174_20130604_193434_inLine +BABEL_OP1_206_41174_20130604_193434_outLine +BABEL_OP1_206_41233_20121215_001846_inLine +BABEL_OP1_206_41233_20121215_001846_outLine +BABEL_OP1_206_41598_20130102_233834_inLine +BABEL_OP1_206_41598_20130102_233834_outLine +BABEL_OP1_206_42029_20121220_181050_inLine +BABEL_OP1_206_42029_20121220_181050_outLine +BABEL_OP1_206_42434_20121202_195754_inLine +BABEL_OP1_206_42434_20121202_195754_outLine +BABEL_OP1_206_42434_20121202_202540_inLine +BABEL_OP1_206_42434_20121202_202540_outLine +BABEL_OP1_206_42619_20121213_204854_inLine +BABEL_OP1_206_42619_20121213_204854_outLine +BABEL_OP1_206_42771_20130601_203101_inLine +BABEL_OP1_206_42771_20130601_203101_outLine +BABEL_OP1_206_42834_20121219_015826_inLine +BABEL_OP1_206_42834_20121219_015826_outLine +BABEL_OP1_206_43286_20121125_054930_inLine +BABEL_OP1_206_43286_20121125_054930_outLine +BABEL_OP1_206_43286_20121125_060858_inLine +BABEL_OP1_206_43286_20121125_060858_outLine +BABEL_OP1_206_43286_20121126_003810_inLine +BABEL_OP1_206_43286_20121126_003810_outLine +BABEL_OP1_206_43368_20121128_203447_inLine +BABEL_OP1_206_43368_20121128_203447_outLine +BABEL_OP1_206_43784_20121230_224515_inLine +BABEL_OP1_206_43784_20121230_224515_outLine +BABEL_OP1_206_43788_20121223_235436_inLine +BABEL_OP1_206_43788_20121223_235436_outLine +BABEL_OP1_206_44477_20121228_020003_inLine +BABEL_OP1_206_44477_20121228_020003_outLine +BABEL_OP1_206_44619_20121129_201028_inLine +BABEL_OP1_206_44619_20121129_201028_outLine +BABEL_OP1_206_44619_20121129_203209_inLine +BABEL_OP1_206_44619_20121129_203209_outLine +BABEL_OP1_206_45235_20121213_044536_inLine +BABEL_OP1_206_45235_20121213_044536_outLine +BABEL_OP1_206_45536_20121212_023751_inLine +BABEL_OP1_206_45536_20121212_023751_outLine +BABEL_OP1_206_45560_20121210_054617_inLine +BABEL_OP1_206_45560_20121210_054617_outLine +BABEL_OP1_206_45770_20121205_213203_inLine +BABEL_OP1_206_45770_20121205_213203_outLine +BABEL_OP1_206_45851_20130123_013016_inLine +BABEL_OP1_206_45851_20130123_013016_outLine +BABEL_OP1_206_46066_20121218_015244_outLine +BABEL_OP1_206_46066_20121218_020520_inLine +BABEL_OP1_206_46066_20121218_020520_outLine +BABEL_OP1_206_46261_20130524_180914_inLine +BABEL_OP1_206_46261_20130524_180914_outLine +BABEL_OP1_206_46330_20121220_171612_inLine +BABEL_OP1_206_46330_20121220_171612_outLine +BABEL_OP1_206_46558_20121125_000809_inLine +BABEL_OP1_206_46558_20121125_000809_outLine +BABEL_OP1_206_46688_20121130_222025_inLine +BABEL_OP1_206_46688_20121130_222025_outLine +BABEL_OP1_206_46770_20121213_030348_inLine +BABEL_OP1_206_46770_20121213_030348_outLine +BABEL_OP1_206_46976_20121222_002626_inLine +BABEL_OP1_206_46976_20121222_002626_outLine +BABEL_OP1_206_47186_20121214_212658_inLine +BABEL_OP1_206_47186_20121214_212658_outLine +BABEL_OP1_206_47215_20121129_232526_inLine +BABEL_OP1_206_47215_20121129_232526_outLine +BABEL_OP1_206_47487_20121127_232736_inLine +BABEL_OP1_206_47487_20121127_232736_outLine +BABEL_OP1_206_47802_20121213_220928_inLine +BABEL_OP1_206_47802_20121213_220928_outLine +BABEL_OP1_206_47878_20121221_153159_inLine +BABEL_OP1_206_47878_20121221_153159_outLine +BABEL_OP1_206_48789_20121202_173639_inLine +BABEL_OP1_206_48789_20121202_173639_outLine +BABEL_OP1_206_48844_20121123_030435_inLine +BABEL_OP1_206_48844_20121123_030435_outLine +BABEL_OP1_206_48844_20121204_030447_inLine +BABEL_OP1_206_48844_20121204_030447_outLine +BABEL_OP1_206_49001_20121128_201907_inLine +BABEL_OP1_206_49001_20121128_201907_outLine +BABEL_OP1_206_49287_20121219_204754_inLine +BABEL_OP1_206_49287_20121219_204754_outLine +BABEL_OP1_206_49870_20130605_000829_inLine +BABEL_OP1_206_49870_20130605_000829_outLine +BABEL_OP1_206_49907_20121128_055731_inLine +BABEL_OP1_206_49907_20121128_055731_outLine +BABEL_OP1_206_49912_20130603_002155_inLine +BABEL_OP1_206_49912_20130603_002155_outLine +BABEL_OP1_206_50090_20121210_232617_inLine +BABEL_OP1_206_50090_20121210_232617_outLine +BABEL_OP1_206_50090_20121210_234419_inLine +BABEL_OP1_206_50090_20121210_234419_outLine +BABEL_OP1_206_50175_20130604_165733_inLine +BABEL_OP1_206_50175_20130604_165733_outLine +BABEL_OP1_206_50565_20121206_213949_inLine +BABEL_OP1_206_50565_20121206_213949_outLine +BABEL_OP1_206_50565_20121206_215103_inLine +BABEL_OP1_206_50565_20121206_215103_outLine +BABEL_OP1_206_50565_20121206_221547_inLine +BABEL_OP1_206_50565_20121206_221547_outLine +BABEL_OP1_206_50601_20121219_030519_inLine +BABEL_OP1_206_50601_20121219_030519_outLine +BABEL_OP1_206_50681_20121222_003908_inLine +BABEL_OP1_206_50681_20121222_003908_outLine +BABEL_OP1_206_50726_20130103_015437_inLine +BABEL_OP1_206_50726_20130103_015437_outLine +BABEL_OP1_206_51015_20121216_025307_inLine +BABEL_OP1_206_51015_20121216_025307_outLine +BABEL_OP1_206_51484_20121213_023814_inLine +BABEL_OP1_206_51484_20121213_023814_outLine +BABEL_OP1_206_51540_20121212_225359_inLine +BABEL_OP1_206_51540_20121212_225359_outLine +BABEL_OP1_206_51955_20121219_004818_inLine +BABEL_OP1_206_51955_20121219_004818_outLine +BABEL_OP1_206_52422_20121220_034724_inLine +BABEL_OP1_206_52422_20121220_034724_outLine +BABEL_OP1_206_52694_20130523_175759_inLine +BABEL_OP1_206_52694_20130523_175759_outLine +BABEL_OP1_206_52804_20121201_184720_inLine +BABEL_OP1_206_52804_20121201_184720_outLine +BABEL_OP1_206_52818_20121228_012038_inLine +BABEL_OP1_206_52818_20121228_012038_outLine +BABEL_OP1_206_52854_20121128_034458_inLine +BABEL_OP1_206_52854_20121128_034458_outLine +BABEL_OP1_206_52854_20121206_214928_inLine +BABEL_OP1_206_52854_20121206_214928_outLine +BABEL_OP1_206_52854_20121206_224251_inLine +BABEL_OP1_206_52854_20121206_224251_outLine +BABEL_OP1_206_52932_20121128_045304_inLine +BABEL_OP1_206_52932_20121128_045304_outLine +BABEL_OP1_206_52932_20121128_233739_inLine +BABEL_OP1_206_52932_20121128_233739_outLine +BABEL_OP1_206_53957_20130522_194644_inLine +BABEL_OP1_206_53957_20130522_194644_outLine +BABEL_OP1_206_54104_20130102_215440_inLine +BABEL_OP1_206_54104_20130102_215440_outLine +BABEL_OP1_206_54162_20121220_230656_inLine +BABEL_OP1_206_54162_20121220_230656_outLine +BABEL_OP1_206_54390_20121130_203012_inLine +BABEL_OP1_206_54390_20121130_203012_outLine +BABEL_OP1_206_54477_20121212_013137_inLine +BABEL_OP1_206_54477_20121212_013137_outLine +BABEL_OP1_206_54530_20130531_233153_inLine +BABEL_OP1_206_54530_20130531_233153_outLine +BABEL_OP1_206_54697_20121228_003256_inLine +BABEL_OP1_206_54697_20121228_003256_outLine +BABEL_OP1_206_54744_20130103_035406_inLine +BABEL_OP1_206_54744_20130103_035406_outLine +BABEL_OP1_206_54953_20121205_023337_inLine +BABEL_OP1_206_54953_20121205_023337_outLine +BABEL_OP1_206_55259_20130118_022049_inLine +BABEL_OP1_206_55259_20130118_022049_outLine +BABEL_OP1_206_55259_20130118_023307_inLine +BABEL_OP1_206_55259_20130118_023307_outLine +BABEL_OP1_206_55818_20121130_051150_inLine +BABEL_OP1_206_55818_20121130_051150_outLine +BABEL_OP1_206_55818_20121130_054331_inLine +BABEL_OP1_206_55818_20121130_054331_outLine +BABEL_OP1_206_55968_20121204_204317_inLine +BABEL_OP1_206_55968_20121204_204317_outLine +BABEL_OP1_206_55968_20121204_211213_inLine +BABEL_OP1_206_55968_20121204_211213_outLine +BABEL_OP1_206_56023_20121227_235521_inLine +BABEL_OP1_206_56023_20121227_235521_outLine +BABEL_OP1_206_56677_20130111_174028_inLine +BABEL_OP1_206_56677_20130111_174028_outLine +BABEL_OP1_206_57093_20121205_002300_inLine +BABEL_OP1_206_57093_20121205_002300_outLine +BABEL_OP1_206_57093_20121205_044909_inLine +BABEL_OP1_206_57093_20121205_044909_outLine +BABEL_OP1_206_57141_20121212_211734_inLine +BABEL_OP1_206_57141_20121212_211734_outLine +BABEL_OP1_206_57529_20121211_232002_inLine +BABEL_OP1_206_57529_20121211_232002_outLine +BABEL_OP1_206_57678_20121201_231032_inLine +BABEL_OP1_206_57678_20121201_231032_outLine +BABEL_OP1_206_58047_20121212_222839_inLine +BABEL_OP1_206_58047_20121212_222839_outLine +BABEL_OP1_206_58313_20121220_211354_inLine +BABEL_OP1_206_58313_20121220_211354_outLine +BABEL_OP1_206_58489_20121221_225602_inLine +BABEL_OP1_206_58489_20121221_225602_outLine +BABEL_OP1_206_58734_20121130_203502_inLine +BABEL_OP1_206_58734_20121130_203502_outLine +BABEL_OP1_206_58821_20130531_205929_inLine +BABEL_OP1_206_58821_20130531_205929_outLine +BABEL_OP1_206_60026_20121205_044105_inLine +BABEL_OP1_206_60026_20121205_044105_outLine +BABEL_OP1_206_60299_20130602_222928_inLine +BABEL_OP1_206_60299_20130602_222928_outLine +BABEL_OP1_206_60310_20121220_003756_inLine +BABEL_OP1_206_60310_20121220_003756_outLine +BABEL_OP1_206_60418_20130530_195743_inLine +BABEL_OP1_206_60418_20130530_195743_outLine +BABEL_OP1_206_61167_20121202_012318_inLine +BABEL_OP1_206_61167_20121202_012318_outLine +BABEL_OP1_206_61167_20121203_083125_inLine +BABEL_OP1_206_61167_20121203_083125_outLine +BABEL_OP1_206_61225_20121128_222308_inLine +BABEL_OP1_206_61225_20121128_222308_outLine +BABEL_OP1_206_61348_20121218_225731_inLine +BABEL_OP1_206_61348_20121218_225731_outLine +BABEL_OP1_206_61357_20130120_183001_inLine +BABEL_OP1_206_61357_20130120_183001_outLine +BABEL_OP1_206_61435_20121217_000451_inLine +BABEL_OP1_206_61435_20121217_000451_outLine +BABEL_OP1_206_61678_20121123_013649_inLine +BABEL_OP1_206_61678_20121123_013649_outLine +BABEL_OP1_206_61731_20121128_024803_inLine +BABEL_OP1_206_61731_20121128_024803_outLine +BABEL_OP1_206_61888_20130605_172611_inLine +BABEL_OP1_206_61888_20130605_172611_outLine +BABEL_OP1_206_62200_20130522_212226_inLine +BABEL_OP1_206_62200_20130522_212226_outLine +BABEL_OP1_206_62724_20121218_202436_inLine +BABEL_OP1_206_62724_20121218_202436_outLine +BABEL_OP1_206_62800_20121201_010750_inLine +BABEL_OP1_206_62800_20121201_010750_outLine +BABEL_OP1_206_62800_20121201_015047_inLine +BABEL_OP1_206_62800_20121201_015047_outLine +BABEL_OP1_206_62800_20121201_021942_inLine +BABEL_OP1_206_62800_20121201_021942_outLine +BABEL_OP1_206_62810_20121122_202600_inLine +BABEL_OP1_206_62810_20121122_202600_outLine +BABEL_OP1_206_63081_20121219_012926_inLine +BABEL_OP1_206_63081_20121219_012926_outLine +BABEL_OP1_206_63081_20121219_174450_inLine +BABEL_OP1_206_63081_20121219_174450_outLine +BABEL_OP1_206_63084_20121210_013516_inLine +BABEL_OP1_206_63084_20121210_013516_outLine +BABEL_OP1_206_63425_20121214_182639_inLine +BABEL_OP1_206_63425_20121214_182639_outLine +BABEL_OP1_206_63445_20121207_014019_inLine +BABEL_OP1_206_63445_20121207_014019_outLine +BABEL_OP1_206_63604_20130527_215715_inLine +BABEL_OP1_206_63604_20130527_215715_outLine +BABEL_OP1_206_63670_20121212_212623_inLine +BABEL_OP1_206_63670_20121212_212623_outLine +BABEL_OP1_206_63757_20121222_235730_inLine +BABEL_OP1_206_63757_20121222_235730_outLine +BABEL_OP1_206_63787_20130530_221300_inLine +BABEL_OP1_206_63787_20130530_221300_outLine +BABEL_OP1_206_63906_20130131_014942_inLine +BABEL_OP1_206_63906_20130131_014942_outLine +BABEL_OP1_206_64014_20130122_011323_inLine +BABEL_OP1_206_64014_20130122_011323_outLine +BABEL_OP1_206_64768_20121207_223917_inLine +BABEL_OP1_206_64768_20121207_223917_outLine +BABEL_OP1_206_65064_20121221_000939_inLine +BABEL_OP1_206_65064_20121221_000939_outLine +BABEL_OP1_206_65723_20121129_222430_inLine +BABEL_OP1_206_65723_20121129_222430_outLine +BABEL_OP1_206_65882_20121201_174526_inLine +BABEL_OP1_206_65882_20121201_174526_outLine +BABEL_OP1_206_66001_20130103_012213_inLine +BABEL_OP1_206_66001_20130103_012213_outLine +BABEL_OP1_206_66045_20121129_223013_inLine +BABEL_OP1_206_66045_20121129_223013_outLine +BABEL_OP1_206_66519_20121202_220401_inLine +BABEL_OP1_206_66519_20121202_220401_outLine +BABEL_OP1_206_66916_20130118_005447_inLine +BABEL_OP1_206_66916_20130118_005447_outLine +BABEL_OP1_206_66916_20130118_010520_inLine +BABEL_OP1_206_66916_20130118_010520_outLine +BABEL_OP1_206_67622_20121206_210526_inLine +BABEL_OP1_206_67622_20121206_210526_outLine +BABEL_OP1_206_67659_20121219_201336_inLine +BABEL_OP1_206_67659_20121219_201336_outLine +BABEL_OP1_206_68306_20121213_205817_inLine +BABEL_OP1_206_68306_20121213_205817_outLine +BABEL_OP1_206_68385_20121123_231120_inLine +BABEL_OP1_206_68385_20121123_231120_outLine +BABEL_OP1_206_68627_20130122_023725_inLine +BABEL_OP1_206_68627_20130122_023725_outLine +BABEL_OP1_206_68748_20121212_025750_inLine +BABEL_OP1_206_68748_20121212_025750_outLine +BABEL_OP1_206_68924_20121228_001758_inLine +BABEL_OP1_206_68924_20121228_001758_outLine +BABEL_OP1_206_69578_20121214_002009_inLine +BABEL_OP1_206_69578_20121214_002009_outLine +BABEL_OP1_206_69992_20130529_181609_inLine +BABEL_OP1_206_69992_20130529_181609_outLine +BABEL_OP1_206_70121_20121219_215051_inLine +BABEL_OP1_206_70121_20121219_215051_outLine +BABEL_OP1_206_70121_20121219_220824_inLine +BABEL_OP1_206_70121_20121219_220824_outLine +BABEL_OP1_206_70251_20121219_044415_inLine +BABEL_OP1_206_70251_20121219_044415_outLine +BABEL_OP1_206_70343_20121221_023826_inLine +BABEL_OP1_206_70343_20121221_023826_outLine +BABEL_OP1_206_70386_20121207_232647_inLine +BABEL_OP1_206_70386_20121207_232647_outLine +BABEL_OP1_206_71067_20121209_210046_inLine +BABEL_OP1_206_71067_20121209_210046_outLine +BABEL_OP1_206_71067_20121209_214030_inLine +BABEL_OP1_206_71067_20121209_214030_outLine +BABEL_OP1_206_71566_20130604_214443_inLine +BABEL_OP1_206_71566_20130604_214443_outLine +BABEL_OP1_206_72110_20121221_232617_inLine +BABEL_OP1_206_72110_20121221_232617_outLine +BABEL_OP1_206_72319_20130123_022502_inLine +BABEL_OP1_206_72319_20130123_022502_outLine +BABEL_OP1_206_72324_20130602_184851_inLine +BABEL_OP1_206_72324_20130602_184851_outLine +BABEL_OP1_206_72844_20121130_193956_inLine +BABEL_OP1_206_72844_20121130_193956_outLine +BABEL_OP1_206_73005_20130122_021229_inLine +BABEL_OP1_206_73005_20130122_021229_outLine +BABEL_OP1_206_73072_20121205_231914_inLine +BABEL_OP1_206_73072_20121205_231914_outLine +BABEL_OP1_206_73258_20130120_170200_inLine +BABEL_OP1_206_73258_20130120_170200_outLine +BABEL_OP1_206_73301_20130529_214428_inLine +BABEL_OP1_206_73301_20130529_214428_outLine +BABEL_OP1_206_73485_20130122_235208_inLine +BABEL_OP1_206_73485_20130122_235208_outLine +BABEL_OP1_206_73591_20121117_212751_inLine +BABEL_OP1_206_73591_20121117_212751_outLine +BABEL_OP1_206_73964_20130317_202534_inLine +BABEL_OP1_206_73964_20130317_202534_outLine +BABEL_OP1_206_74886_20121128_205141_inLine +BABEL_OP1_206_74886_20121128_205141_outLine +BABEL_OP1_206_75064_20121129_233512_inLine +BABEL_OP1_206_75064_20121129_233512_outLine +BABEL_OP1_206_75505_20130522_234600_inLine +BABEL_OP1_206_75505_20130522_234600_outLine +BABEL_OP1_206_75993_20121128_223040_inLine +BABEL_OP1_206_75993_20121128_223040_outLine +BABEL_OP1_206_76126_20121219_020552_inLine +BABEL_OP1_206_76126_20121219_020552_outLine +BABEL_OP1_206_76238_20130111_190815_inLine +BABEL_OP1_206_76238_20130111_190815_outLine +BABEL_OP1_206_76372_20130603_190448_inLine +BABEL_OP1_206_76372_20130603_190448_outLine +BABEL_OP1_206_76437_20121117_202446_inLine +BABEL_OP1_206_76437_20121117_202446_outLine +BABEL_OP1_206_77730_20130107_234021_inLine +BABEL_OP1_206_77730_20130107_234021_outLine +BABEL_OP1_206_77803_20121130_005638_inLine +BABEL_OP1_206_77803_20121130_005638_outLine +BABEL_OP1_206_78398_20121206_003319_inLine +BABEL_OP1_206_78398_20121206_003319_outLine +BABEL_OP1_206_78544_20121220_000743_inLine +BABEL_OP1_206_78544_20121220_000743_outLine +BABEL_OP1_206_78943_20121129_231930_inLine +BABEL_OP1_206_78943_20121129_231930_outLine +BABEL_OP1_206_79080_20121212_205306_inLine +BABEL_OP1_206_79080_20121212_205306_outLine +BABEL_OP1_206_79131_20130123_003404_inLine +BABEL_OP1_206_79131_20130123_003404_outLine +BABEL_OP1_206_79167_20130602_202526_inLine +BABEL_OP1_206_79167_20130602_202526_outLine +BABEL_OP1_206_79367_20121204_001524_inLine +BABEL_OP1_206_79367_20121204_001524_outLine +BABEL_OP1_206_79367_20121204_004137_inLine +BABEL_OP1_206_79367_20121204_004137_outLine +BABEL_OP1_206_79898_20130524_002505_inLine +BABEL_OP1_206_79898_20130524_002505_outLine +BABEL_OP1_206_80241_20130604_001309_inLine +BABEL_OP1_206_80241_20130604_001309_outLine +BABEL_OP1_206_80439_20130527_182722_inLine +BABEL_OP1_206_80439_20130527_182722_outLine +BABEL_OP1_206_80559_20121206_232755_inLine +BABEL_OP1_206_80559_20121206_232755_outLine +BABEL_OP1_206_80781_20121219_233131_inLine +BABEL_OP1_206_80781_20121219_233131_outLine +BABEL_OP1_206_80881_20121204_030141_inLine +BABEL_OP1_206_80881_20121204_030141_outLine +BABEL_OP1_206_81435_20121220_204044_inLine +BABEL_OP1_206_81435_20121220_204044_outLine +BABEL_OP1_206_82035_20121220_195943_inLine +BABEL_OP1_206_82035_20121220_195943_outLine +BABEL_OP1_206_82138_20121129_223223_inLine +BABEL_OP1_206_82138_20121129_223223_outLine +BABEL_OP1_206_82303_20130531_191551_inLine +BABEL_OP1_206_82303_20130531_191551_outLine +BABEL_OP1_206_82391_20121221_015423_inLine +BABEL_OP1_206_82391_20121221_015423_outLine +BABEL_OP1_206_82425_20121129_212519_inLine +BABEL_OP1_206_82425_20121129_212519_outLine +BABEL_OP1_206_82473_20121206_004738_inLine +BABEL_OP1_206_82473_20121206_004738_outLine +BABEL_OP1_206_82622_20130604_222219_inLine +BABEL_OP1_206_82622_20130604_222219_outLine +BABEL_OP1_206_83455_20121205_024244_inLine +BABEL_OP1_206_83455_20121205_024244_outLine +BABEL_OP1_206_84547_20121206_225105_inLine +BABEL_OP1_206_84547_20121206_225105_outLine +BABEL_OP1_206_84605_20121129_212603_inLine +BABEL_OP1_206_84605_20121129_212603_outLine +BABEL_OP1_206_84805_20121214_221155_inLine +BABEL_OP1_206_84805_20121214_221155_outLine +BABEL_OP1_206_85028_20121212_014236_inLine +BABEL_OP1_206_85028_20121212_014236_outLine +BABEL_OP1_206_85248_20121217_174710_inLine +BABEL_OP1_206_85248_20121217_174710_outLine +BABEL_OP1_206_85322_20130530_233851_inLine +BABEL_OP1_206_85322_20130530_233851_outLine +BABEL_OP1_206_85647_20121206_022317_inLine +BABEL_OP1_206_85647_20121206_022317_outLine +BABEL_OP1_206_85647_20121206_024354_inLine +BABEL_OP1_206_85647_20121206_024354_outLine +BABEL_OP1_206_85651_20130420_232505_inLine +BABEL_OP1_206_85651_20130420_232505_outLine +BABEL_OP1_206_86191_20121205_001218_inLine +BABEL_OP1_206_86191_20121205_001218_outLine +BABEL_OP1_206_86321_20121212_025212_inLine +BABEL_OP1_206_86321_20121212_025212_outLine +BABEL_OP1_206_86433_20121220_215310_inLine +BABEL_OP1_206_86433_20121220_215310_outLine +BABEL_OP1_206_86433_20121220_225718_inLine +BABEL_OP1_206_86433_20121220_225718_outLine +BABEL_OP1_206_86472_20121221_010912_inLine +BABEL_OP1_206_86472_20121221_010912_outLine +BABEL_OP1_206_86635_20121218_223238_inLine +BABEL_OP1_206_86635_20121218_223238_outLine +BABEL_OP1_206_86635_20121218_230141_inLine +BABEL_OP1_206_86635_20121218_230141_outLine +BABEL_OP1_206_86715_20130602_174900_inLine +BABEL_OP1_206_86715_20130602_174900_outLine +BABEL_OP1_206_86722_20121204_231838_inLine +BABEL_OP1_206_86722_20121204_231838_outLine +BABEL_OP1_206_86860_20130122_004822_inLine +BABEL_OP1_206_86860_20130122_004822_outLine +BABEL_OP1_206_86952_20130601_175321_inLine +BABEL_OP1_206_86952_20130601_175321_outLine +BABEL_OP1_206_87073_20130102_212334_inLine +BABEL_OP1_206_87073_20130102_212334_outLine +BABEL_OP1_206_87074_20121128_194554_inLine +BABEL_OP1_206_87074_20121128_194554_outLine +BABEL_OP1_206_87280_20121207_231125_inLine +BABEL_OP1_206_87280_20121207_231125_outLine +BABEL_OP1_206_87298_20121129_212519_inLine +BABEL_OP1_206_87298_20121129_212519_outLine +BABEL_OP1_206_87298_20121129_213610_inLine +BABEL_OP1_206_87298_20121129_213610_outLine +BABEL_OP1_206_87470_20121203_052237_inLine +BABEL_OP1_206_87470_20121203_052237_outLine +BABEL_OP1_206_87871_20121220_222250_inLine +BABEL_OP1_206_87871_20121220_222250_outLine +BABEL_OP1_206_87921_20121221_003205_inLine +BABEL_OP1_206_87921_20121221_003205_outLine +BABEL_OP1_206_88260_20121208_204256_inLine +BABEL_OP1_206_88260_20121208_204256_outLine +BABEL_OP1_206_88372_20130120_230911_inLine +BABEL_OP1_206_88372_20130120_230911_outLine +BABEL_OP1_206_88925_20130603_230637_inLine +BABEL_OP1_206_88925_20130603_230637_outLine +BABEL_OP1_206_89575_20121220_211420_inLine +BABEL_OP1_206_89575_20121220_211420_outLine +BABEL_OP1_206_89665_20121208_212046_inLine +BABEL_OP1_206_89665_20121208_212046_outLine +BABEL_OP1_206_89943_20121127_034521_inLine +BABEL_OP1_206_89943_20121127_034521_outLine +BABEL_OP1_206_89943_20121128_015307_inLine +BABEL_OP1_206_89943_20121128_015307_outLine +BABEL_OP1_206_90417_20130605_185956_inLine +BABEL_OP1_206_90417_20130605_185956_outLine +BABEL_OP1_206_90572_20130618_045832_inLine +BABEL_OP1_206_90572_20130618_045832_outLine +BABEL_OP1_206_90739_20130604_174758_inLine +BABEL_OP1_206_90739_20130604_174758_outLine +BABEL_OP1_206_90760_20130525_001351_inLine +BABEL_OP1_206_90760_20130525_001351_outLine +BABEL_OP1_206_91080_20121220_024658_inLine +BABEL_OP1_206_91080_20121220_024658_outLine +BABEL_OP1_206_91125_20121123_063516_inLine +BABEL_OP1_206_91125_20121123_063516_outLine +BABEL_OP1_206_91336_20121205_221404_inLine +BABEL_OP1_206_91336_20121205_221404_outLine +BABEL_OP1_206_91581_20121209_193208_inLine +BABEL_OP1_206_91581_20121209_193208_outLine +BABEL_OP1_206_92096_20130123_010912_inLine +BABEL_OP1_206_92096_20130123_010912_outLine +BABEL_OP1_206_92459_20130529_223322_inLine +BABEL_OP1_206_92459_20130529_223322_outLine +BABEL_OP1_206_92527_20121128_232151_inLine +BABEL_OP1_206_92527_20121128_232151_outLine +BABEL_OP1_206_92527_20121128_234105_inLine +BABEL_OP1_206_92527_20121128_234105_outLine +BABEL_OP1_206_92557_20121213_005100_inLine +BABEL_OP1_206_92557_20121213_005100_outLine +BABEL_OP1_206_92740_20121211_184826_inLine +BABEL_OP1_206_92740_20121211_184826_outLine +BABEL_OP1_206_93224_20121211_003624_inLine +BABEL_OP1_206_93224_20121211_003624_outLine +BABEL_OP1_206_93411_20121220_002408_inLine +BABEL_OP1_206_93411_20121220_002408_outLine +BABEL_OP1_206_93632_20121212_021207_inLine +BABEL_OP1_206_93632_20121212_021207_outLine +BABEL_OP1_206_93858_20130605_005238_inLine +BABEL_OP1_206_93858_20130605_005238_outLine +BABEL_OP1_206_93964_20121205_235339_inLine +BABEL_OP1_206_93964_20121205_235339_outLine +BABEL_OP1_206_94025_20121213_025224_inLine +BABEL_OP1_206_94025_20121213_025224_outLine +BABEL_OP1_206_94745_20130531_014707_inLine +BABEL_OP1_206_94745_20130531_014707_outLine +BABEL_OP1_206_94869_20121205_203951_inLine +BABEL_OP1_206_94869_20121205_203951_outLine +BABEL_OP1_206_95028_20130601_222202_inLine +BABEL_OP1_206_95028_20130601_222202_outLine +BABEL_OP1_206_95231_20130601_230414_inLine +BABEL_OP1_206_95231_20130601_230414_outLine +BABEL_OP1_206_95446_20121220_221335_inLine +BABEL_OP1_206_95446_20121220_221335_outLine +BABEL_OP1_206_96730_20121220_213139_inLine +BABEL_OP1_206_96730_20121220_213139_outLine +BABEL_OP1_206_96910_20121202_211324_inLine +BABEL_OP1_206_96910_20121202_211324_outLine +BABEL_OP1_206_97376_20121220_234456_inLine +BABEL_OP1_206_97376_20121220_234456_outLine +BABEL_OP1_206_97772_20121123_064042_inLine +BABEL_OP1_206_97772_20121123_064042_outLine +BABEL_OP1_206_98311_20130528_182109_inLine +BABEL_OP1_206_98311_20130528_182109_outLine +BABEL_OP1_206_98390_20121123_064010_inLine +BABEL_OP1_206_98390_20121123_064010_outLine +BABEL_OP1_206_98489_20121201_220216_inLine +BABEL_OP1_206_98489_20121201_220216_outLine +BABEL_OP1_206_99289_20130123_161855_inLine +BABEL_OP1_206_99289_20130123_161855_outLine +BABEL_OP1_206_99289_20130123_163456_inLine +BABEL_OP1_206_99289_20130123_163456_outLine +BABEL_OP1_206_99955_20121219_002822_inLine +BABEL_OP1_206_99955_20121219_002822_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.list b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.list new file mode 100644 index 00000000000..37be6f9253e --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.list @@ -0,0 +1,124 @@ +BABEL_OP1_206_13030_20121129_225418_inLine +BABEL_OP1_206_13030_20121129_225418_outLine +BABEL_OP1_206_14440_20121218_231347_inLine +BABEL_OP1_206_14440_20121218_231347_outLine +BABEL_OP1_206_15324_20121208_010033_inLine +BABEL_OP1_206_15324_20121208_010033_outLine +BABEL_OP1_206_17440_20121227_213432_inLine +BABEL_OP1_206_17440_20121227_213432_outLine +BABEL_OP1_206_17923_20121130_214207_inLine +BABEL_OP1_206_17923_20121130_214207_outLine +BABEL_OP1_206_18291_20130604_183732_inLine +BABEL_OP1_206_18291_20130604_183732_outLine +BABEL_OP1_206_20682_20121213_030430_inLine +BABEL_OP1_206_20682_20121213_030430_outLine +BABEL_OP1_206_20800_20130523_220352_inLine +BABEL_OP1_206_20800_20130523_220352_outLine +BABEL_OP1_206_23151_20121217_034512_inLine +BABEL_OP1_206_23151_20121217_034512_outLine +BABEL_OP1_206_24605_20121218_201807_inLine +BABEL_OP1_206_24605_20121218_201807_outLine +BABEL_OP1_206_26206_20130529_172847_inLine +BABEL_OP1_206_26206_20130529_172847_outLine +BABEL_OP1_206_27082_20121220_012037_inLine +BABEL_OP1_206_27082_20121220_012037_outLine +BABEL_OP1_206_28419_20121207_221153_inLine +BABEL_OP1_206_28419_20121207_221153_outLine +BABEL_OP1_206_28775_20121203_022428_inLine +BABEL_OP1_206_28775_20121203_022428_outLine +BABEL_OP1_206_31624_20121123_081518_inLine +BABEL_OP1_206_31624_20121123_081518_outLine +BABEL_OP1_206_32708_20121231_225706_inLine +BABEL_OP1_206_32708_20121231_225706_outLine +BABEL_OP1_206_34564_20121214_020257_inLine +BABEL_OP1_206_34564_20121214_020257_outLine +BABEL_OP1_206_36505_20121213_222927_inLine +BABEL_OP1_206_36505_20121213_222927_outLine +BABEL_OP1_206_38431_20121214_013939_inLine +BABEL_OP1_206_38431_20121214_013939_outLine +BABEL_OP1_206_45560_20121210_054617_inLine +BABEL_OP1_206_45560_20121210_054617_outLine +BABEL_OP1_206_45770_20121205_213203_inLine +BABEL_OP1_206_45770_20121205_213203_outLine +BABEL_OP1_206_47186_20121214_212658_inLine +BABEL_OP1_206_47186_20121214_212658_outLine +BABEL_OP1_206_47215_20121129_232526_inLine +BABEL_OP1_206_47215_20121129_232526_outLine +BABEL_OP1_206_48789_20121202_173639_inLine +BABEL_OP1_206_48789_20121202_173639_outLine +BABEL_OP1_206_50175_20130604_165733_inLine +BABEL_OP1_206_50175_20130604_165733_outLine +BABEL_OP1_206_50601_20121219_030519_inLine +BABEL_OP1_206_50601_20121219_030519_outLine +BABEL_OP1_206_50726_20130103_015437_inLine +BABEL_OP1_206_50726_20130103_015437_outLine +BABEL_OP1_206_51540_20121212_225359_inLine +BABEL_OP1_206_51540_20121212_225359_outLine +BABEL_OP1_206_52694_20130523_175759_inLine +BABEL_OP1_206_52694_20130523_175759_outLine +BABEL_OP1_206_53957_20130522_194644_inLine +BABEL_OP1_206_53957_20130522_194644_outLine +BABEL_OP1_206_54744_20130103_035406_inLine +BABEL_OP1_206_54744_20130103_035406_outLine +BABEL_OP1_206_55818_20121130_051150_inLine +BABEL_OP1_206_55818_20121130_051150_outLine +BABEL_OP1_206_55818_20121130_054331_inLine +BABEL_OP1_206_55818_20121130_054331_outLine +BABEL_OP1_206_57678_20121201_231032_inLine +BABEL_OP1_206_57678_20121201_231032_outLine +BABEL_OP1_206_60418_20130530_195743_inLine +BABEL_OP1_206_60418_20130530_195743_outLine +BABEL_OP1_206_61225_20121128_222308_inLine +BABEL_OP1_206_61225_20121128_222308_outLine +BABEL_OP1_206_63081_20121219_012926_inLine +BABEL_OP1_206_63081_20121219_012926_outLine +BABEL_OP1_206_63081_20121219_174450_inLine +BABEL_OP1_206_63081_20121219_174450_outLine +BABEL_OP1_206_63445_20121207_014019_inLine +BABEL_OP1_206_63445_20121207_014019_outLine +BABEL_OP1_206_63604_20130527_215715_inLine +BABEL_OP1_206_63604_20130527_215715_outLine +BABEL_OP1_206_65723_20121129_222430_inLine +BABEL_OP1_206_65723_20121129_222430_outLine +BABEL_OP1_206_65882_20121201_174526_inLine +BABEL_OP1_206_65882_20121201_174526_outLine +BABEL_OP1_206_66519_20121202_220401_inLine +BABEL_OP1_206_66519_20121202_220401_outLine +BABEL_OP1_206_67659_20121219_201336_inLine +BABEL_OP1_206_67659_20121219_201336_outLine +BABEL_OP1_206_73072_20121205_231914_inLine +BABEL_OP1_206_73072_20121205_231914_outLine +BABEL_OP1_206_73964_20130317_202534_inLine +BABEL_OP1_206_73964_20130317_202534_outLine +BABEL_OP1_206_76372_20130603_190448_inLine +BABEL_OP1_206_76372_20130603_190448_outLine +BABEL_OP1_206_77730_20130107_234021_inLine +BABEL_OP1_206_77730_20130107_234021_outLine +BABEL_OP1_206_79898_20130524_002505_inLine +BABEL_OP1_206_79898_20130524_002505_outLine +BABEL_OP1_206_80241_20130604_001309_inLine +BABEL_OP1_206_80241_20130604_001309_outLine +BABEL_OP1_206_80881_20121204_030141_inLine +BABEL_OP1_206_80881_20121204_030141_outLine +BABEL_OP1_206_85248_20121217_174710_inLine +BABEL_OP1_206_85248_20121217_174710_outLine +BABEL_OP1_206_86860_20130122_004822_inLine +BABEL_OP1_206_86860_20130122_004822_outLine +BABEL_OP1_206_86952_20130601_175321_inLine +BABEL_OP1_206_86952_20130601_175321_outLine +BABEL_OP1_206_87074_20121128_194554_inLine +BABEL_OP1_206_87074_20121128_194554_outLine +BABEL_OP1_206_87280_20121207_231125_inLine +BABEL_OP1_206_87280_20121207_231125_outLine +BABEL_OP1_206_90417_20130605_185956_inLine +BABEL_OP1_206_90417_20130605_185956_outLine +BABEL_OP1_206_91080_20121220_024658_inLine +BABEL_OP1_206_91080_20121220_024658_outLine +BABEL_OP1_206_91581_20121209_193208_inLine +BABEL_OP1_206_91581_20121209_193208_outLine +BABEL_OP1_206_92096_20130123_010912_inLine +BABEL_OP1_206_92096_20130123_010912_outLine +BABEL_OP1_206_93224_20121211_003624_inLine +BABEL_OP1_206_93224_20121211_003624_outLine +BABEL_OP1_206_98489_20121201_220216_inLine +BABEL_OP1_206_98489_20121201_220216_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.untranscribed.list b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.untranscribed.list new file mode 100644 index 00000000000..dd4d5d3c445 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.LimitedLP.untranscribed.list @@ -0,0 +1,705 @@ +BABEL_OP1_206_10901_20121128_230024_inLine +BABEL_OP1_206_10901_20121128_230024_outLine +BABEL_OP1_206_10901_20121129_003238_inLine +BABEL_OP1_206_10901_20121129_003238_outLine +BABEL_OP1_206_10966_20121205_213021_inLine +BABEL_OP1_206_10966_20121205_213021_outLine +BABEL_OP1_206_10966_20121205_214750_inLine +BABEL_OP1_206_10966_20121205_214750_outLine +BABEL_OP1_206_11581_20121213_020058_inLine +BABEL_OP1_206_11581_20121213_020058_outLine +BABEL_OP1_206_11797_20121207_001426_inLine +BABEL_OP1_206_11797_20121207_001426_outLine +BABEL_OP1_206_11797_20121207_002917_inLine +BABEL_OP1_206_11797_20121207_002917_outLine +BABEL_OP1_206_11859_20130602_013210_inLine +BABEL_OP1_206_11859_20130602_013210_outLine +BABEL_OP1_206_12242_20121218_022109_inLine +BABEL_OP1_206_12242_20121218_022109_outLine +BABEL_OP1_206_12851_20121215_010712_inLine +BABEL_OP1_206_12851_20121215_010712_outLine +BABEL_OP1_206_13184_20121216_223430_inLine +BABEL_OP1_206_13184_20121216_223430_outLine +BABEL_OP1_206_13184_20121216_224722_inLine +BABEL_OP1_206_13184_20121216_224722_outLine +BABEL_OP1_206_13483_20121219_205820_inLine +BABEL_OP1_206_13483_20121219_205820_outLine +BABEL_OP1_206_13483_20121219_212915_inLine +BABEL_OP1_206_13483_20121219_212915_outLine +BABEL_OP1_206_13490_20121221_005743_inLine +BABEL_OP1_206_13490_20121221_005743_outLine +BABEL_OP1_206_13744_20121205_205818_inLine +BABEL_OP1_206_13744_20121205_205818_outLine +BABEL_OP1_206_14137_20130118_010712_inLine +BABEL_OP1_206_14137_20130118_010712_outLine +BABEL_OP1_206_14137_20130122_014528_inLine +BABEL_OP1_206_14137_20130122_014528_outLine +BABEL_OP1_206_14179_20121210_224630_inLine +BABEL_OP1_206_14179_20121210_224630_outLine +BABEL_OP1_206_14719_20121213_040757_inLine +BABEL_OP1_206_14719_20121213_040757_outLine +BABEL_OP1_206_14729_20130531_183022_inLine +BABEL_OP1_206_14729_20130531_183022_outLine +BABEL_OP1_206_14807_20121221_150943_inLine +BABEL_OP1_206_14807_20121221_150943_outLine +BABEL_OP1_206_14814_20121129_203954_inLine +BABEL_OP1_206_14814_20121129_203954_outLine +BABEL_OP1_206_14899_20121203_021835_inLine +BABEL_OP1_206_14899_20121203_021835_outLine +BABEL_OP1_206_14929_20121203_232411_inLine +BABEL_OP1_206_14929_20121203_232411_outLine +BABEL_OP1_206_15024_20130527_234410_inLine +BABEL_OP1_206_15024_20130527_234410_outLine +BABEL_OP1_206_15702_20121214_225618_inLine +BABEL_OP1_206_15702_20121214_225618_outLine +BABEL_OP1_206_15702_20121214_231152_inLine +BABEL_OP1_206_15702_20121214_231152_outLine +BABEL_OP1_206_15702_20121214_232449_inLine +BABEL_OP1_206_15702_20121214_232449_outLine +BABEL_OP1_206_16149_20121201_010342_inLine +BABEL_OP1_206_16149_20121201_010342_outLine +BABEL_OP1_206_16467_20130531_200137_inLine +BABEL_OP1_206_16467_20130531_200137_outLine +BABEL_OP1_206_16475_20130121_210828_inLine +BABEL_OP1_206_16475_20130121_210828_outLine +BABEL_OP1_206_16475_20130121_212136_inLine +BABEL_OP1_206_16475_20130121_212136_outLine +BABEL_OP1_206_16839_20121217_170534_inLine +BABEL_OP1_206_16839_20121217_170534_outLine +BABEL_OP1_206_16886_20130524_232154_inLine +BABEL_OP1_206_16886_20130524_232154_outLine +BABEL_OP1_206_17032_20121219_220514_inLine +BABEL_OP1_206_17032_20121219_220514_outLine +BABEL_OP1_206_17280_20130527_191437_inLine +BABEL_OP1_206_17280_20130527_191437_outLine +BABEL_OP1_206_17472_20121214_193824_inLine +BABEL_OP1_206_17472_20121214_193824_outLine +BABEL_OP1_206_17567_20121209_205317_inLine +BABEL_OP1_206_17567_20121209_205317_outLine +BABEL_OP1_206_17567_20121209_211139_inLine +BABEL_OP1_206_17567_20121209_211139_outLine +BABEL_OP1_206_17615_20121214_193534_inLine +BABEL_OP1_206_17615_20121214_193534_outLine +BABEL_OP1_206_17881_20130121_005313_inLine +BABEL_OP1_206_17881_20130121_005313_outLine +BABEL_OP1_206_19722_20121130_203924_inLine +BABEL_OP1_206_19722_20121130_203924_outLine +BABEL_OP1_206_19773_20130101_015259_inLine +BABEL_OP1_206_19773_20130101_015259_outLine +BABEL_OP1_206_19818_20130529_204811_inLine +BABEL_OP1_206_19818_20130529_204811_outLine +BABEL_OP1_206_19877_20130123_175339_inLine +BABEL_OP1_206_19877_20130123_175339_outLine +BABEL_OP1_206_19877_20130123_181047_inLine +BABEL_OP1_206_19877_20130123_181047_outLine +BABEL_OP1_206_20916_20121205_203848_inLine +BABEL_OP1_206_20916_20121205_203848_outLine +BABEL_OP1_206_20922_20121214_231110_inLine +BABEL_OP1_206_20922_20121214_231110_outLine +BABEL_OP1_206_21004_20121210_215455_inLine +BABEL_OP1_206_21004_20121210_215455_outLine +BABEL_OP1_206_21004_20121210_223449_inLine +BABEL_OP1_206_21004_20121210_223449_outLine +BABEL_OP1_206_21206_20121220_001511_inLine +BABEL_OP1_206_21206_20121220_001511_outLine +BABEL_OP1_206_21327_20130111_022748_inLine +BABEL_OP1_206_21327_20130111_022748_outLine +BABEL_OP1_206_21892_20121213_235725_inLine +BABEL_OP1_206_21892_20121213_235725_outLine +BABEL_OP1_206_22494_20130530_004456_inLine +BABEL_OP1_206_22494_20130530_004456_outLine +BABEL_OP1_206_22624_20121219_210041_inLine +BABEL_OP1_206_22624_20121219_210041_outLine +BABEL_OP1_206_22826_20130121_231859_inLine +BABEL_OP1_206_22826_20130121_231859_outLine +BABEL_OP1_206_22826_20130121_233139_inLine +BABEL_OP1_206_22826_20130121_233139_outLine +BABEL_OP1_206_22965_20121128_011001_inLine +BABEL_OP1_206_22965_20121128_011001_outLine +BABEL_OP1_206_22965_20121128_012241_inLine +BABEL_OP1_206_22965_20121128_012241_outLine +BABEL_OP1_206_23006_20121203_004250_inLine +BABEL_OP1_206_23006_20121203_004250_outLine +BABEL_OP1_206_23006_20121203_073608_inLine +BABEL_OP1_206_23006_20121203_073608_outLine +BABEL_OP1_206_23092_20121227_211821_inLine +BABEL_OP1_206_23092_20121227_211821_outLine +BABEL_OP1_206_23153_20130102_224836_inLine +BABEL_OP1_206_23153_20130102_224836_outLine +BABEL_OP1_206_23190_20121219_204325_inLine +BABEL_OP1_206_23190_20121219_204325_outLine +BABEL_OP1_206_23239_20130118_000831_inLine +BABEL_OP1_206_23239_20130118_000831_outLine +BABEL_OP1_206_23505_20121203_010039_inLine +BABEL_OP1_206_23505_20121203_010039_outLine +BABEL_OP1_206_24253_20130120_235750_inLine +BABEL_OP1_206_24253_20130120_235750_outLine +BABEL_OP1_206_24253_20130121_000835_inLine +BABEL_OP1_206_24253_20130121_000835_outLine +BABEL_OP1_206_24253_20130121_012503_inLine +BABEL_OP1_206_24253_20130121_012503_outLine +BABEL_OP1_206_24323_20121214_212407_inLine +BABEL_OP1_206_24323_20121214_212407_outLine +BABEL_OP1_206_24323_20121214_213448_inLine +BABEL_OP1_206_24323_20121214_213448_outLine +BABEL_OP1_206_24532_20121201_203102_inLine +BABEL_OP1_206_24532_20121201_203102_outLine +BABEL_OP1_206_24569_20121210_211659_inLine +BABEL_OP1_206_24569_20121210_211659_outLine +BABEL_OP1_206_24590_20121201_210938_inLine +BABEL_OP1_206_24590_20121201_210938_outLine +BABEL_OP1_206_24590_20121201_215618_inLine +BABEL_OP1_206_24590_20121201_215618_outLine +BABEL_OP1_206_24982_20130603_194918_inLine +BABEL_OP1_206_24982_20130603_194918_outLine +BABEL_OP1_206_25412_20121210_201120_inLine +BABEL_OP1_206_25412_20121210_201120_outLine +BABEL_OP1_206_25412_20121210_203544_inLine +BABEL_OP1_206_25412_20121210_203544_outLine +BABEL_OP1_206_25496_20130529_000539_inLine +BABEL_OP1_206_25496_20130529_000539_outLine +BABEL_OP1_206_25698_20130603_011444_inLine +BABEL_OP1_206_25698_20130603_011444_outLine +BABEL_OP1_206_25719_20121215_000803_inLine +BABEL_OP1_206_25719_20121215_000803_outLine +BABEL_OP1_206_25767_20121204_021252_inLine +BABEL_OP1_206_25767_20121204_021252_outLine +BABEL_OP1_206_25961_20121202_232650_inLine +BABEL_OP1_206_25961_20121202_232650_outLine +BABEL_OP1_206_25961_20121202_234202_inLine +BABEL_OP1_206_25961_20121202_234202_outLine +BABEL_OP1_206_26388_20121202_191806_inLine +BABEL_OP1_206_26388_20121202_191806_outLine +BABEL_OP1_206_26836_20121201_210310_inLine +BABEL_OP1_206_26836_20121201_210310_outLine +BABEL_OP1_206_27042_20121219_230502_inLine +BABEL_OP1_206_27042_20121219_230502_outLine +BABEL_OP1_206_27125_20121203_012043_inLine +BABEL_OP1_206_27125_20121203_012043_outLine +BABEL_OP1_206_27203_20121214_210018_inLine +BABEL_OP1_206_27203_20121214_210018_outLine +BABEL_OP1_206_27590_20121216_180900_inLine +BABEL_OP1_206_27590_20121216_180900_outLine +BABEL_OP1_206_27841_20121216_014031_inLine +BABEL_OP1_206_27841_20121216_014031_outLine +BABEL_OP1_206_28303_20121128_201831_inLine +BABEL_OP1_206_28303_20121128_201831_outLine +BABEL_OP1_206_28945_20130118_003100_inLine +BABEL_OP1_206_28945_20130118_003100_outLine +BABEL_OP1_206_29023_20121201_234219_inLine +BABEL_OP1_206_29023_20121201_234219_outLine +BABEL_OP1_206_29039_20121220_013046_inLine +BABEL_OP1_206_29039_20121220_013046_outLine +BABEL_OP1_206_29135_20121219_224133_inLine +BABEL_OP1_206_29135_20121219_224133_outLine +BABEL_OP1_206_29323_20121219_201726_inLine +BABEL_OP1_206_29323_20121219_201726_outLine +BABEL_OP1_206_29323_20121219_203137_inLine +BABEL_OP1_206_29323_20121219_203137_outLine +BABEL_OP1_206_30395_20121206_014115_inLine +BABEL_OP1_206_30395_20121206_014115_outLine +BABEL_OP1_206_30869_20121227_221910_inLine +BABEL_OP1_206_30869_20121227_221910_outLine +BABEL_OP1_206_31109_20121224_061142_inLine +BABEL_OP1_206_31109_20121224_061142_outLine +BABEL_OP1_206_31490_20121128_234650_inLine +BABEL_OP1_206_31490_20121128_234650_outLine +BABEL_OP1_206_31628_20130528_194548_inLine +BABEL_OP1_206_31628_20130528_194548_outLine +BABEL_OP1_206_32122_20121128_184757_inLine +BABEL_OP1_206_32122_20121128_184757_outLine +BABEL_OP1_206_32301_20130530_191142_inLine +BABEL_OP1_206_32301_20130530_191142_outLine +BABEL_OP1_206_32328_20121215_181911_inLine +BABEL_OP1_206_32328_20121215_181911_outLine +BABEL_OP1_206_32837_20121213_221825_inLine +BABEL_OP1_206_32837_20121213_221825_outLine +BABEL_OP1_206_32837_20121213_223037_inLine +BABEL_OP1_206_32837_20121213_223037_outLine +BABEL_OP1_206_33111_20130601_200233_inLine +BABEL_OP1_206_33111_20130601_200233_outLine +BABEL_OP1_206_33273_20121129_201318_inLine +BABEL_OP1_206_33273_20121129_201318_outLine +BABEL_OP1_206_33355_20121130_055943_inLine +BABEL_OP1_206_33355_20121130_055943_outLine +BABEL_OP1_206_33672_20130524_171145_inLine +BABEL_OP1_206_33672_20130524_171145_outLine +BABEL_OP1_206_33704_20121213_214430_inLine +BABEL_OP1_206_33704_20121213_214430_outLine +BABEL_OP1_206_33840_20121213_230741_inLine +BABEL_OP1_206_33840_20121213_230741_outLine +BABEL_OP1_206_34197_20121128_232538_inLine +BABEL_OP1_206_34197_20121128_232538_outLine +BABEL_OP1_206_34328_20121202_184915_inLine +BABEL_OP1_206_34328_20121202_184915_outLine +BABEL_OP1_206_34679_20121206_000152_inLine +BABEL_OP1_206_34679_20121206_000152_outLine +BABEL_OP1_206_34826_20121215_005505_inLine +BABEL_OP1_206_34826_20121215_005505_outLine +BABEL_OP1_206_35008_20121216_210449_inLine +BABEL_OP1_206_35008_20121216_210449_outLine +BABEL_OP1_206_36894_20121128_201825_inLine +BABEL_OP1_206_36894_20121128_201825_outLine +BABEL_OP1_206_37598_20130111_224005_inLine +BABEL_OP1_206_37598_20130111_224005_outLine +BABEL_OP1_206_38554_20121123_025415_inLine +BABEL_OP1_206_38554_20121123_025415_outLine +BABEL_OP1_206_38689_20121217_013737_inLine +BABEL_OP1_206_38689_20121217_013737_outLine +BABEL_OP1_206_38878_20130530_172309_inLine +BABEL_OP1_206_38878_20130530_172309_outLine +BABEL_OP1_206_39059_20121215_230057_inLine +BABEL_OP1_206_39059_20121215_230057_outLine +BABEL_OP1_206_39059_20121216_000252_inLine +BABEL_OP1_206_39059_20121216_000252_outLine +BABEL_OP1_206_39307_20121207_024156_inLine +BABEL_OP1_206_39307_20121207_024156_outLine +BABEL_OP1_206_39426_20130120_232407_inLine +BABEL_OP1_206_39426_20130120_232407_outLine +BABEL_OP1_206_39426_20130120_233651_inLine +BABEL_OP1_206_39426_20130120_233651_outLine +BABEL_OP1_206_40557_20121218_025254_inLine +BABEL_OP1_206_40557_20121218_025254_outLine +BABEL_OP1_206_40713_20121129_215041_inLine +BABEL_OP1_206_40713_20121129_215041_outLine +BABEL_OP1_206_41097_20121215_173120_inLine +BABEL_OP1_206_41097_20121215_173120_outLine +BABEL_OP1_206_41174_20130604_193434_inLine +BABEL_OP1_206_41174_20130604_193434_outLine +BABEL_OP1_206_41233_20121215_001846_inLine +BABEL_OP1_206_41233_20121215_001846_outLine +BABEL_OP1_206_41598_20130102_233834_inLine +BABEL_OP1_206_41598_20130102_233834_outLine +BABEL_OP1_206_42029_20121220_181050_inLine +BABEL_OP1_206_42029_20121220_181050_outLine +BABEL_OP1_206_42434_20121202_195754_inLine +BABEL_OP1_206_42434_20121202_195754_outLine +BABEL_OP1_206_42434_20121202_202540_inLine +BABEL_OP1_206_42434_20121202_202540_outLine +BABEL_OP1_206_42619_20121213_204854_inLine +BABEL_OP1_206_42619_20121213_204854_outLine +BABEL_OP1_206_42771_20130601_203101_inLine +BABEL_OP1_206_42771_20130601_203101_outLine +BABEL_OP1_206_42834_20121219_015826_inLine +BABEL_OP1_206_42834_20121219_015826_outLine +BABEL_OP1_206_43286_20121125_054930_inLine +BABEL_OP1_206_43286_20121125_054930_outLine +BABEL_OP1_206_43286_20121125_060858_inLine +BABEL_OP1_206_43286_20121125_060858_outLine +BABEL_OP1_206_43286_20121126_003810_inLine +BABEL_OP1_206_43286_20121126_003810_outLine +BABEL_OP1_206_43368_20121128_203447_inLine +BABEL_OP1_206_43368_20121128_203447_outLine +BABEL_OP1_206_43784_20121230_224515_inLine +BABEL_OP1_206_43784_20121230_224515_outLine +BABEL_OP1_206_43788_20121223_235436_inLine +BABEL_OP1_206_43788_20121223_235436_outLine +BABEL_OP1_206_44477_20121228_020003_inLine +BABEL_OP1_206_44477_20121228_020003_outLine +BABEL_OP1_206_44619_20121129_201028_inLine +BABEL_OP1_206_44619_20121129_201028_outLine +BABEL_OP1_206_44619_20121129_203209_inLine +BABEL_OP1_206_44619_20121129_203209_outLine +BABEL_OP1_206_45235_20121213_044536_inLine +BABEL_OP1_206_45235_20121213_044536_outLine +BABEL_OP1_206_45536_20121212_023751_inLine +BABEL_OP1_206_45536_20121212_023751_outLine +BABEL_OP1_206_45851_20130123_013016_inLine +BABEL_OP1_206_45851_20130123_013016_outLine +BABEL_OP1_206_46066_20121218_015244_outLine +BABEL_OP1_206_46066_20121218_020520_inLine +BABEL_OP1_206_46066_20121218_020520_outLine +BABEL_OP1_206_46261_20130524_180914_inLine +BABEL_OP1_206_46261_20130524_180914_outLine +BABEL_OP1_206_46330_20121220_171612_inLine +BABEL_OP1_206_46330_20121220_171612_outLine +BABEL_OP1_206_46558_20121125_000809_inLine +BABEL_OP1_206_46558_20121125_000809_outLine +BABEL_OP1_206_46688_20121130_222025_inLine +BABEL_OP1_206_46688_20121130_222025_outLine +BABEL_OP1_206_46770_20121213_030348_inLine +BABEL_OP1_206_46770_20121213_030348_outLine +BABEL_OP1_206_46976_20121222_002626_inLine +BABEL_OP1_206_46976_20121222_002626_outLine +BABEL_OP1_206_47487_20121127_232736_inLine +BABEL_OP1_206_47487_20121127_232736_outLine +BABEL_OP1_206_47802_20121213_220928_inLine +BABEL_OP1_206_47802_20121213_220928_outLine +BABEL_OP1_206_47878_20121221_153159_inLine +BABEL_OP1_206_47878_20121221_153159_outLine +BABEL_OP1_206_48844_20121123_030435_inLine +BABEL_OP1_206_48844_20121123_030435_outLine +BABEL_OP1_206_48844_20121204_030447_inLine +BABEL_OP1_206_48844_20121204_030447_outLine +BABEL_OP1_206_49001_20121128_201907_inLine +BABEL_OP1_206_49001_20121128_201907_outLine +BABEL_OP1_206_49287_20121219_204754_inLine +BABEL_OP1_206_49287_20121219_204754_outLine +BABEL_OP1_206_49870_20130605_000829_inLine +BABEL_OP1_206_49870_20130605_000829_outLine +BABEL_OP1_206_49907_20121128_055731_inLine +BABEL_OP1_206_49907_20121128_055731_outLine +BABEL_OP1_206_49912_20130603_002155_inLine +BABEL_OP1_206_49912_20130603_002155_outLine +BABEL_OP1_206_50090_20121210_232617_inLine +BABEL_OP1_206_50090_20121210_232617_outLine +BABEL_OP1_206_50090_20121210_234419_inLine +BABEL_OP1_206_50090_20121210_234419_outLine +BABEL_OP1_206_50565_20121206_213949_inLine +BABEL_OP1_206_50565_20121206_213949_outLine +BABEL_OP1_206_50565_20121206_215103_inLine +BABEL_OP1_206_50565_20121206_215103_outLine +BABEL_OP1_206_50565_20121206_221547_inLine +BABEL_OP1_206_50565_20121206_221547_outLine +BABEL_OP1_206_50681_20121222_003908_inLine +BABEL_OP1_206_50681_20121222_003908_outLine +BABEL_OP1_206_51015_20121216_025307_inLine +BABEL_OP1_206_51015_20121216_025307_outLine +BABEL_OP1_206_51484_20121213_023814_inLine +BABEL_OP1_206_51484_20121213_023814_outLine +BABEL_OP1_206_51955_20121219_004818_inLine +BABEL_OP1_206_51955_20121219_004818_outLine +BABEL_OP1_206_52422_20121220_034724_inLine +BABEL_OP1_206_52422_20121220_034724_outLine +BABEL_OP1_206_52804_20121201_184720_inLine +BABEL_OP1_206_52804_20121201_184720_outLine +BABEL_OP1_206_52818_20121228_012038_inLine +BABEL_OP1_206_52818_20121228_012038_outLine +BABEL_OP1_206_52854_20121128_034458_inLine +BABEL_OP1_206_52854_20121128_034458_outLine +BABEL_OP1_206_52854_20121206_214928_inLine +BABEL_OP1_206_52854_20121206_214928_outLine +BABEL_OP1_206_52854_20121206_224251_inLine +BABEL_OP1_206_52854_20121206_224251_outLine +BABEL_OP1_206_52932_20121128_045304_inLine +BABEL_OP1_206_52932_20121128_045304_outLine +BABEL_OP1_206_52932_20121128_233739_inLine +BABEL_OP1_206_52932_20121128_233739_outLine +BABEL_OP1_206_54104_20130102_215440_inLine +BABEL_OP1_206_54104_20130102_215440_outLine +BABEL_OP1_206_54162_20121220_230656_inLine +BABEL_OP1_206_54162_20121220_230656_outLine +BABEL_OP1_206_54390_20121130_203012_inLine +BABEL_OP1_206_54390_20121130_203012_outLine +BABEL_OP1_206_54477_20121212_013137_inLine +BABEL_OP1_206_54477_20121212_013137_outLine +BABEL_OP1_206_54530_20130531_233153_inLine +BABEL_OP1_206_54530_20130531_233153_outLine +BABEL_OP1_206_54697_20121228_003256_inLine +BABEL_OP1_206_54697_20121228_003256_outLine +BABEL_OP1_206_54953_20121205_023337_inLine +BABEL_OP1_206_54953_20121205_023337_outLine +BABEL_OP1_206_55259_20130118_022049_inLine +BABEL_OP1_206_55259_20130118_022049_outLine +BABEL_OP1_206_55259_20130118_023307_inLine +BABEL_OP1_206_55259_20130118_023307_outLine +BABEL_OP1_206_55968_20121204_204317_inLine +BABEL_OP1_206_55968_20121204_204317_outLine +BABEL_OP1_206_55968_20121204_211213_inLine +BABEL_OP1_206_55968_20121204_211213_outLine +BABEL_OP1_206_56023_20121227_235521_inLine +BABEL_OP1_206_56023_20121227_235521_outLine +BABEL_OP1_206_56677_20130111_174028_inLine +BABEL_OP1_206_56677_20130111_174028_outLine +BABEL_OP1_206_57093_20121205_002300_inLine +BABEL_OP1_206_57093_20121205_002300_outLine +BABEL_OP1_206_57093_20121205_044909_inLine +BABEL_OP1_206_57093_20121205_044909_outLine +BABEL_OP1_206_57141_20121212_211734_inLine +BABEL_OP1_206_57141_20121212_211734_outLine +BABEL_OP1_206_57529_20121211_232002_inLine +BABEL_OP1_206_57529_20121211_232002_outLine +BABEL_OP1_206_58047_20121212_222839_inLine +BABEL_OP1_206_58047_20121212_222839_outLine +BABEL_OP1_206_58313_20121220_211354_inLine +BABEL_OP1_206_58313_20121220_211354_outLine +BABEL_OP1_206_58489_20121221_225602_inLine +BABEL_OP1_206_58489_20121221_225602_outLine +BABEL_OP1_206_58734_20121130_203502_inLine +BABEL_OP1_206_58734_20121130_203502_outLine +BABEL_OP1_206_58821_20130531_205929_inLine +BABEL_OP1_206_58821_20130531_205929_outLine +BABEL_OP1_206_60026_20121205_044105_inLine +BABEL_OP1_206_60026_20121205_044105_outLine +BABEL_OP1_206_60299_20130602_222928_inLine +BABEL_OP1_206_60299_20130602_222928_outLine +BABEL_OP1_206_60310_20121220_003756_inLine +BABEL_OP1_206_60310_20121220_003756_outLine +BABEL_OP1_206_61167_20121202_012318_inLine +BABEL_OP1_206_61167_20121202_012318_outLine +BABEL_OP1_206_61167_20121203_083125_inLine +BABEL_OP1_206_61167_20121203_083125_outLine +BABEL_OP1_206_61348_20121218_225731_inLine +BABEL_OP1_206_61348_20121218_225731_outLine +BABEL_OP1_206_61357_20130120_183001_inLine +BABEL_OP1_206_61357_20130120_183001_outLine +BABEL_OP1_206_61435_20121217_000451_inLine +BABEL_OP1_206_61435_20121217_000451_outLine +BABEL_OP1_206_61678_20121123_013649_inLine +BABEL_OP1_206_61678_20121123_013649_outLine +BABEL_OP1_206_61731_20121128_024803_inLine +BABEL_OP1_206_61731_20121128_024803_outLine +BABEL_OP1_206_61888_20130605_172611_inLine +BABEL_OP1_206_61888_20130605_172611_outLine +BABEL_OP1_206_62200_20130522_212226_inLine +BABEL_OP1_206_62200_20130522_212226_outLine +BABEL_OP1_206_62724_20121218_202436_inLine +BABEL_OP1_206_62724_20121218_202436_outLine +BABEL_OP1_206_62800_20121201_010750_inLine +BABEL_OP1_206_62800_20121201_010750_outLine +BABEL_OP1_206_62800_20121201_015047_inLine +BABEL_OP1_206_62800_20121201_015047_outLine +BABEL_OP1_206_62800_20121201_021942_inLine +BABEL_OP1_206_62800_20121201_021942_outLine +BABEL_OP1_206_62810_20121122_202600_inLine +BABEL_OP1_206_62810_20121122_202600_outLine +BABEL_OP1_206_63084_20121210_013516_inLine +BABEL_OP1_206_63084_20121210_013516_outLine +BABEL_OP1_206_63425_20121214_182639_inLine +BABEL_OP1_206_63425_20121214_182639_outLine +BABEL_OP1_206_63670_20121212_212623_inLine +BABEL_OP1_206_63670_20121212_212623_outLine +BABEL_OP1_206_63757_20121222_235730_inLine +BABEL_OP1_206_63757_20121222_235730_outLine +BABEL_OP1_206_63787_20130530_221300_inLine +BABEL_OP1_206_63787_20130530_221300_outLine +BABEL_OP1_206_63906_20130131_014942_inLine +BABEL_OP1_206_63906_20130131_014942_outLine +BABEL_OP1_206_64014_20130122_011323_inLine +BABEL_OP1_206_64014_20130122_011323_outLine +BABEL_OP1_206_64768_20121207_223917_inLine +BABEL_OP1_206_64768_20121207_223917_outLine +BABEL_OP1_206_65064_20121221_000939_inLine +BABEL_OP1_206_65064_20121221_000939_outLine +BABEL_OP1_206_66001_20130103_012213_inLine +BABEL_OP1_206_66001_20130103_012213_outLine +BABEL_OP1_206_66045_20121129_223013_inLine +BABEL_OP1_206_66045_20121129_223013_outLine +BABEL_OP1_206_66916_20130118_005447_inLine +BABEL_OP1_206_66916_20130118_005447_outLine +BABEL_OP1_206_66916_20130118_010520_inLine +BABEL_OP1_206_66916_20130118_010520_outLine +BABEL_OP1_206_67622_20121206_210526_inLine +BABEL_OP1_206_67622_20121206_210526_outLine +BABEL_OP1_206_68306_20121213_205817_inLine +BABEL_OP1_206_68306_20121213_205817_outLine +BABEL_OP1_206_68385_20121123_231120_inLine +BABEL_OP1_206_68385_20121123_231120_outLine +BABEL_OP1_206_68627_20130122_023725_inLine +BABEL_OP1_206_68627_20130122_023725_outLine +BABEL_OP1_206_68748_20121212_025750_inLine +BABEL_OP1_206_68748_20121212_025750_outLine +BABEL_OP1_206_68924_20121228_001758_inLine +BABEL_OP1_206_68924_20121228_001758_outLine +BABEL_OP1_206_69578_20121214_002009_inLine +BABEL_OP1_206_69578_20121214_002009_outLine +BABEL_OP1_206_69992_20130529_181609_inLine +BABEL_OP1_206_69992_20130529_181609_outLine +BABEL_OP1_206_70121_20121219_215051_inLine +BABEL_OP1_206_70121_20121219_215051_outLine +BABEL_OP1_206_70121_20121219_220824_inLine +BABEL_OP1_206_70121_20121219_220824_outLine +BABEL_OP1_206_70251_20121219_044415_inLine +BABEL_OP1_206_70251_20121219_044415_outLine +BABEL_OP1_206_70343_20121221_023826_inLine +BABEL_OP1_206_70343_20121221_023826_outLine +BABEL_OP1_206_70386_20121207_232647_inLine +BABEL_OP1_206_70386_20121207_232647_outLine +BABEL_OP1_206_71067_20121209_210046_inLine +BABEL_OP1_206_71067_20121209_210046_outLine +BABEL_OP1_206_71067_20121209_214030_inLine +BABEL_OP1_206_71067_20121209_214030_outLine +BABEL_OP1_206_71566_20130604_214443_inLine +BABEL_OP1_206_71566_20130604_214443_outLine +BABEL_OP1_206_72110_20121221_232617_inLine +BABEL_OP1_206_72110_20121221_232617_outLine +BABEL_OP1_206_72319_20130123_022502_inLine +BABEL_OP1_206_72319_20130123_022502_outLine +BABEL_OP1_206_72324_20130602_184851_inLine +BABEL_OP1_206_72324_20130602_184851_outLine +BABEL_OP1_206_72844_20121130_193956_inLine +BABEL_OP1_206_72844_20121130_193956_outLine +BABEL_OP1_206_73005_20130122_021229_inLine +BABEL_OP1_206_73005_20130122_021229_outLine +BABEL_OP1_206_73258_20130120_170200_inLine +BABEL_OP1_206_73258_20130120_170200_outLine +BABEL_OP1_206_73301_20130529_214428_inLine +BABEL_OP1_206_73301_20130529_214428_outLine +BABEL_OP1_206_73485_20130122_235208_inLine +BABEL_OP1_206_73485_20130122_235208_outLine +BABEL_OP1_206_73591_20121117_212751_inLine +BABEL_OP1_206_73591_20121117_212751_outLine +BABEL_OP1_206_74886_20121128_205141_inLine +BABEL_OP1_206_74886_20121128_205141_outLine +BABEL_OP1_206_75064_20121129_233512_inLine +BABEL_OP1_206_75064_20121129_233512_outLine +BABEL_OP1_206_75505_20130522_234600_inLine +BABEL_OP1_206_75505_20130522_234600_outLine +BABEL_OP1_206_75993_20121128_223040_inLine +BABEL_OP1_206_75993_20121128_223040_outLine +BABEL_OP1_206_76126_20121219_020552_inLine +BABEL_OP1_206_76126_20121219_020552_outLine +BABEL_OP1_206_76238_20130111_190815_inLine +BABEL_OP1_206_76238_20130111_190815_outLine +BABEL_OP1_206_76437_20121117_202446_inLine +BABEL_OP1_206_76437_20121117_202446_outLine +BABEL_OP1_206_77803_20121130_005638_inLine +BABEL_OP1_206_77803_20121130_005638_outLine +BABEL_OP1_206_78398_20121206_003319_inLine +BABEL_OP1_206_78398_20121206_003319_outLine +BABEL_OP1_206_78544_20121220_000743_inLine +BABEL_OP1_206_78544_20121220_000743_outLine +BABEL_OP1_206_78943_20121129_231930_inLine +BABEL_OP1_206_78943_20121129_231930_outLine +BABEL_OP1_206_79080_20121212_205306_inLine +BABEL_OP1_206_79080_20121212_205306_outLine +BABEL_OP1_206_79131_20130123_003404_inLine +BABEL_OP1_206_79131_20130123_003404_outLine +BABEL_OP1_206_79167_20130602_202526_inLine +BABEL_OP1_206_79167_20130602_202526_outLine +BABEL_OP1_206_79367_20121204_001524_inLine +BABEL_OP1_206_79367_20121204_001524_outLine +BABEL_OP1_206_79367_20121204_004137_inLine +BABEL_OP1_206_79367_20121204_004137_outLine +BABEL_OP1_206_80439_20130527_182722_inLine +BABEL_OP1_206_80439_20130527_182722_outLine +BABEL_OP1_206_80559_20121206_232755_inLine +BABEL_OP1_206_80559_20121206_232755_outLine +BABEL_OP1_206_80781_20121219_233131_inLine +BABEL_OP1_206_80781_20121219_233131_outLine +BABEL_OP1_206_81435_20121220_204044_inLine +BABEL_OP1_206_81435_20121220_204044_outLine +BABEL_OP1_206_82035_20121220_195943_inLine +BABEL_OP1_206_82035_20121220_195943_outLine +BABEL_OP1_206_82138_20121129_223223_inLine +BABEL_OP1_206_82138_20121129_223223_outLine +BABEL_OP1_206_82303_20130531_191551_inLine +BABEL_OP1_206_82303_20130531_191551_outLine +BABEL_OP1_206_82391_20121221_015423_inLine +BABEL_OP1_206_82391_20121221_015423_outLine +BABEL_OP1_206_82425_20121129_212519_inLine +BABEL_OP1_206_82425_20121129_212519_outLine +BABEL_OP1_206_82473_20121206_004738_inLine +BABEL_OP1_206_82473_20121206_004738_outLine +BABEL_OP1_206_82622_20130604_222219_inLine +BABEL_OP1_206_82622_20130604_222219_outLine +BABEL_OP1_206_83455_20121205_024244_inLine +BABEL_OP1_206_83455_20121205_024244_outLine +BABEL_OP1_206_84547_20121206_225105_inLine +BABEL_OP1_206_84547_20121206_225105_outLine +BABEL_OP1_206_84605_20121129_212603_inLine +BABEL_OP1_206_84605_20121129_212603_outLine +BABEL_OP1_206_84805_20121214_221155_inLine +BABEL_OP1_206_84805_20121214_221155_outLine +BABEL_OP1_206_85028_20121212_014236_inLine +BABEL_OP1_206_85028_20121212_014236_outLine +BABEL_OP1_206_85322_20130530_233851_inLine +BABEL_OP1_206_85322_20130530_233851_outLine +BABEL_OP1_206_85647_20121206_022317_inLine +BABEL_OP1_206_85647_20121206_022317_outLine +BABEL_OP1_206_85647_20121206_024354_inLine +BABEL_OP1_206_85647_20121206_024354_outLine +BABEL_OP1_206_85651_20130420_232505_inLine +BABEL_OP1_206_85651_20130420_232505_outLine +BABEL_OP1_206_86191_20121205_001218_inLine +BABEL_OP1_206_86191_20121205_001218_outLine +BABEL_OP1_206_86321_20121212_025212_inLine +BABEL_OP1_206_86321_20121212_025212_outLine +BABEL_OP1_206_86433_20121220_215310_inLine +BABEL_OP1_206_86433_20121220_215310_outLine +BABEL_OP1_206_86433_20121220_225718_inLine +BABEL_OP1_206_86433_20121220_225718_outLine +BABEL_OP1_206_86472_20121221_010912_inLine +BABEL_OP1_206_86472_20121221_010912_outLine +BABEL_OP1_206_86635_20121218_223238_inLine +BABEL_OP1_206_86635_20121218_223238_outLine +BABEL_OP1_206_86635_20121218_230141_inLine +BABEL_OP1_206_86635_20121218_230141_outLine +BABEL_OP1_206_86715_20130602_174900_inLine +BABEL_OP1_206_86715_20130602_174900_outLine +BABEL_OP1_206_86722_20121204_231838_inLine +BABEL_OP1_206_86722_20121204_231838_outLine +BABEL_OP1_206_87073_20130102_212334_inLine +BABEL_OP1_206_87073_20130102_212334_outLine +BABEL_OP1_206_87298_20121129_212519_inLine +BABEL_OP1_206_87298_20121129_212519_outLine +BABEL_OP1_206_87298_20121129_213610_inLine +BABEL_OP1_206_87298_20121129_213610_outLine +BABEL_OP1_206_87470_20121203_052237_inLine +BABEL_OP1_206_87470_20121203_052237_outLine +BABEL_OP1_206_87871_20121220_222250_inLine +BABEL_OP1_206_87871_20121220_222250_outLine +BABEL_OP1_206_87921_20121221_003205_inLine +BABEL_OP1_206_87921_20121221_003205_outLine +BABEL_OP1_206_88260_20121208_204256_inLine +BABEL_OP1_206_88260_20121208_204256_outLine +BABEL_OP1_206_88372_20130120_230911_inLine +BABEL_OP1_206_88372_20130120_230911_outLine +BABEL_OP1_206_88925_20130603_230637_inLine +BABEL_OP1_206_88925_20130603_230637_outLine +BABEL_OP1_206_89575_20121220_211420_inLine +BABEL_OP1_206_89575_20121220_211420_outLine +BABEL_OP1_206_89665_20121208_212046_inLine +BABEL_OP1_206_89665_20121208_212046_outLine +BABEL_OP1_206_89943_20121127_034521_inLine +BABEL_OP1_206_89943_20121127_034521_outLine +BABEL_OP1_206_89943_20121128_015307_inLine +BABEL_OP1_206_89943_20121128_015307_outLine +BABEL_OP1_206_90572_20130618_045832_inLine +BABEL_OP1_206_90572_20130618_045832_outLine +BABEL_OP1_206_90739_20130604_174758_inLine +BABEL_OP1_206_90739_20130604_174758_outLine +BABEL_OP1_206_90760_20130525_001351_inLine +BABEL_OP1_206_90760_20130525_001351_outLine +BABEL_OP1_206_91125_20121123_063516_inLine +BABEL_OP1_206_91125_20121123_063516_outLine +BABEL_OP1_206_91336_20121205_221404_inLine +BABEL_OP1_206_91336_20121205_221404_outLine +BABEL_OP1_206_92459_20130529_223322_inLine +BABEL_OP1_206_92459_20130529_223322_outLine +BABEL_OP1_206_92527_20121128_232151_inLine +BABEL_OP1_206_92527_20121128_232151_outLine +BABEL_OP1_206_92527_20121128_234105_inLine +BABEL_OP1_206_92527_20121128_234105_outLine +BABEL_OP1_206_92557_20121213_005100_inLine +BABEL_OP1_206_92557_20121213_005100_outLine +BABEL_OP1_206_92740_20121211_184826_inLine +BABEL_OP1_206_92740_20121211_184826_outLine +BABEL_OP1_206_93411_20121220_002408_inLine +BABEL_OP1_206_93411_20121220_002408_outLine +BABEL_OP1_206_93632_20121212_021207_inLine +BABEL_OP1_206_93632_20121212_021207_outLine +BABEL_OP1_206_93858_20130605_005238_inLine +BABEL_OP1_206_93858_20130605_005238_outLine +BABEL_OP1_206_93964_20121205_235339_inLine +BABEL_OP1_206_93964_20121205_235339_outLine +BABEL_OP1_206_94025_20121213_025224_inLine +BABEL_OP1_206_94025_20121213_025224_outLine +BABEL_OP1_206_94745_20130531_014707_inLine +BABEL_OP1_206_94745_20130531_014707_outLine +BABEL_OP1_206_94869_20121205_203951_inLine +BABEL_OP1_206_94869_20121205_203951_outLine +BABEL_OP1_206_95028_20130601_222202_inLine +BABEL_OP1_206_95028_20130601_222202_outLine +BABEL_OP1_206_95231_20130601_230414_inLine +BABEL_OP1_206_95231_20130601_230414_outLine +BABEL_OP1_206_95446_20121220_221335_inLine +BABEL_OP1_206_95446_20121220_221335_outLine +BABEL_OP1_206_96730_20121220_213139_inLine +BABEL_OP1_206_96730_20121220_213139_outLine +BABEL_OP1_206_96910_20121202_211324_inLine +BABEL_OP1_206_96910_20121202_211324_outLine +BABEL_OP1_206_97376_20121220_234456_inLine +BABEL_OP1_206_97376_20121220_234456_outLine +BABEL_OP1_206_97772_20121123_064042_inLine +BABEL_OP1_206_97772_20121123_064042_outLine +BABEL_OP1_206_98311_20130528_182109_inLine +BABEL_OP1_206_98311_20130528_182109_outLine +BABEL_OP1_206_98390_20121123_064010_inLine +BABEL_OP1_206_98390_20121123_064010_outLine +BABEL_OP1_206_99289_20130123_161855_inLine +BABEL_OP1_206_99289_20130123_161855_outLine +BABEL_OP1_206_99289_20130123_163456_inLine +BABEL_OP1_206_99289_20130123_163456_outLine +BABEL_OP1_206_99955_20121219_002822_inLine +BABEL_OP1_206_99955_20121219_002822_outLine diff --git a/egs/babel/s5d/conf/lists/206-zulu/train.untranscribed.list b/egs/babel/s5d/conf/lists/206-zulu/train.untranscribed.list new file mode 100644 index 00000000000..b9d6a50aad4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/206-zulu/train.untranscribed.list @@ -0,0 +1,285 @@ +BABEL_OP1_206_10974_20121228_005413_inLine +BABEL_OP1_206_10974_20121228_005413_outLine +BABEL_OP1_206_10974_20121228_024429_inLine +BABEL_OP1_206_10974_20121228_024429_outLine +BABEL_OP1_206_14228_20130111_014154_inLine +BABEL_OP1_206_14228_20130111_014154_outLine +BABEL_OP1_206_15262_20121229_174321_inLine +BABEL_OP1_206_15262_20121229_174321_outLine +BABEL_OP1_206_15262_20121230_013109_inLine +BABEL_OP1_206_15262_20121230_013109_outLine +BABEL_OP1_206_15848_20121219_014456_inLine +BABEL_OP1_206_15848_20121219_014456_outLine +BABEL_OP1_206_15848_20121219_020128_inLine +BABEL_OP1_206_15848_20121219_020128_outLine +BABEL_OP1_206_16056_20130618_231336_inLine +BABEL_OP1_206_16056_20130618_231336_outLine +BABEL_OP1_206_16938_20130418_204901_inLine +BABEL_OP1_206_16938_20130418_204901_outLine +BABEL_OP1_206_17115_20130704_003152_inLine +BABEL_OP1_206_17115_20130704_003152_outLine +BABEL_OP1_206_17127_20130607_184256_inLine +BABEL_OP1_206_17127_20130607_184256_outLine +BABEL_OP1_206_17496_20121213_021057_inLine +BABEL_OP1_206_17496_20121213_021057_outLine +BABEL_OP1_206_17890_20121218_232607_inLine +BABEL_OP1_206_17890_20121218_232607_outLine +BABEL_OP1_206_17890_20121218_234135_inLine +BABEL_OP1_206_17890_20121218_234135_outLine +BABEL_OP1_206_19130_20130618_230729_inLine +BABEL_OP1_206_19130_20130618_230729_outLine +BABEL_OP1_206_19130_20130618_233209_inLine +BABEL_OP1_206_19130_20130618_233209_outLine +BABEL_OP1_206_19782_20121212_231659_inLine +BABEL_OP1_206_19782_20121212_231659_outLine +BABEL_OP1_206_19832_20130619_213422_inLine +BABEL_OP1_206_19832_20130619_213422_outLine +BABEL_OP1_206_19832_20130621_212156_inLine +BABEL_OP1_206_19832_20130621_212156_outLine +BABEL_OP1_206_21159_20130409_220748_inLine +BABEL_OP1_206_21159_20130409_220748_outLine +BABEL_OP1_206_22034_20130823_052902_inLine +BABEL_OP1_206_22034_20130823_052902_outLine +BABEL_OP1_206_22216_20121206_230217_inLine +BABEL_OP1_206_22216_20121206_230217_outLine +BABEL_OP1_206_22612_20130111_030229_inLine +BABEL_OP1_206_22612_20130111_030229_outLine +BABEL_OP1_206_23983_20130318_001202_inLine +BABEL_OP1_206_23983_20130318_001202_outLine +BABEL_OP1_206_24239_20130123_200948_inLine +BABEL_OP1_206_24239_20130123_200948_outLine +BABEL_OP1_206_28871_20130316_231654_inLine +BABEL_OP1_206_28871_20130316_231654_outLine +BABEL_OP1_206_29168_20121219_024841_inLine +BABEL_OP1_206_29168_20121219_024841_outLine +BABEL_OP1_206_29230_20130607_212302_inLine +BABEL_OP1_206_29230_20130607_212302_outLine +BABEL_OP1_206_29685_20121209_215754_inLine +BABEL_OP1_206_29685_20121209_215754_outLine +BABEL_OP1_206_29685_20121218_164410_inLine +BABEL_OP1_206_29685_20121218_164410_outLine +BABEL_OP1_206_30653_20130609_003734_inLine +BABEL_OP1_206_30653_20130609_003734_outLine +BABEL_OP1_206_30653_20130609_010837_inLine +BABEL_OP1_206_30653_20130609_010837_outLine +BABEL_OP1_206_30720_20130717_175529_inLine +BABEL_OP1_206_30720_20130717_175529_outLine +BABEL_OP1_206_32832_20121210_200734_inLine +BABEL_OP1_206_32832_20121210_200734_outLine +BABEL_OP1_206_32872_20130709_004706_inLine +BABEL_OP1_206_32872_20130709_004706_outLine +BABEL_OP1_206_32961_20130708_045618_inLine +BABEL_OP1_206_32961_20130708_045618_outLine +BABEL_OP1_206_34629_20130719_022535_inLine +BABEL_OP1_206_34629_20130719_022535_outLine +BABEL_OP1_206_36017_20130123_211455_inLine +BABEL_OP1_206_36017_20130123_211455_outLine +BABEL_OP1_206_38139_20130714_222440_inLine +BABEL_OP1_206_38139_20130714_222440_outLine +BABEL_OP1_206_39159_20121219_215221_inLine +BABEL_OP1_206_39159_20121219_215221_outLine +BABEL_OP1_206_41272_20130123_012754_inLine +BABEL_OP1_206_41272_20130123_012754_outLine +BABEL_OP1_206_43157_20130702_170155_inLine +BABEL_OP1_206_43157_20130702_170155_outLine +BABEL_OP1_206_43789_20130704_211632_inLine +BABEL_OP1_206_43789_20130704_211632_outLine +BABEL_OP1_206_43789_20130704_214224_inLine +BABEL_OP1_206_43789_20130704_214224_outLine +BABEL_OP1_206_43990_20130717_000515_inLine +BABEL_OP1_206_43990_20130717_000515_outLine +BABEL_OP1_206_44290_20130122_225754_inLine +BABEL_OP1_206_44290_20130122_225754_outLine +BABEL_OP1_206_44290_20130122_230740_inLine +BABEL_OP1_206_44290_20130122_230740_outLine +BABEL_OP1_206_44290_20130122_231733_inLine +BABEL_OP1_206_44290_20130122_231733_outLine +BABEL_OP1_206_44420_20121207_005913_inLine +BABEL_OP1_206_44420_20121207_005913_outLine +BABEL_OP1_206_44847_20121213_214340_inLine +BABEL_OP1_206_44847_20121213_214340_outLine +BABEL_OP1_206_45908_20130128_214430_inLine +BABEL_OP1_206_45908_20130128_214430_outLine +BABEL_OP1_206_46315_20121215_035427_inLine +BABEL_OP1_206_46315_20121215_035427_outLine +BABEL_OP1_206_46881_20121207_203628_inLine +BABEL_OP1_206_46881_20121207_203628_outLine +BABEL_OP1_206_46881_20121207_205322_inLine +BABEL_OP1_206_46881_20121207_205322_outLine +BABEL_OP1_206_46974_20121217_030549_inLine +BABEL_OP1_206_46974_20121217_030549_outLine +BABEL_OP1_206_46974_20121217_175603_inLine +BABEL_OP1_206_46974_20121217_175603_outLine +BABEL_OP1_206_47270_20130610_005427_inLine +BABEL_OP1_206_47270_20130610_005427_outLine +BABEL_OP1_206_48663_20130618_220742_inLine +BABEL_OP1_206_48663_20130618_220742_outLine +BABEL_OP1_206_49197_20130102_213736_inLine +BABEL_OP1_206_49197_20130102_213736_outLine +BABEL_OP1_206_49630_20121219_190512_inLine +BABEL_OP1_206_49630_20121219_190512_outLine +BABEL_OP1_206_52438_20121205_011303_inLine +BABEL_OP1_206_52438_20121205_011303_outLine +BABEL_OP1_206_52442_20130103_034355_inLine +BABEL_OP1_206_52442_20130103_034355_outLine +BABEL_OP1_206_52483_20130719_011409_inLine +BABEL_OP1_206_52483_20130719_011409_outLine +BABEL_OP1_206_53206_20130717_214929_inLine +BABEL_OP1_206_53206_20130717_214929_outLine +BABEL_OP1_206_56213_20121211_204232_inLine +BABEL_OP1_206_56213_20121211_204232_outLine +BABEL_OP1_206_56345_20130716_043400_inLine +BABEL_OP1_206_56345_20130716_043400_outLine +BABEL_OP1_206_56370_20121207_023036_inLine +BABEL_OP1_206_56370_20121207_023036_outLine +BABEL_OP1_206_56523_20121205_023208_inLine +BABEL_OP1_206_56523_20121205_023208_outLine +BABEL_OP1_206_57067_20130102_012254_inLine +BABEL_OP1_206_57067_20130102_012254_outLine +BABEL_OP1_206_60307_20130719_015514_inLine +BABEL_OP1_206_60307_20130719_015514_outLine +BABEL_OP1_206_60307_20130719_020926_inLine +BABEL_OP1_206_60307_20130719_020926_outLine +BABEL_OP1_206_60307_20130719_024339_inLine +BABEL_OP1_206_60307_20130719_024339_outLine +BABEL_OP1_206_60508_20121207_000229_inLine +BABEL_OP1_206_60508_20121207_000229_outLine +BABEL_OP1_206_60661_20121130_205836_inLine +BABEL_OP1_206_60661_20121130_205836_outLine +BABEL_OP1_206_62158_20130710_235209_inLine +BABEL_OP1_206_62158_20130710_235209_outLine +BABEL_OP1_206_62286_20121128_234346_inLine +BABEL_OP1_206_62286_20121128_234346_outLine +BABEL_OP1_206_62286_20121129_203539_inLine +BABEL_OP1_206_62286_20121129_203539_outLine +BABEL_OP1_206_62286_20121129_212959_inLine +BABEL_OP1_206_62286_20121129_212959_outLine +BABEL_OP1_206_62434_20121219_215717_inLine +BABEL_OP1_206_62434_20121219_215717_outLine +BABEL_OP1_206_62456_20121213_021820_inLine +BABEL_OP1_206_62456_20121213_021820_outLine +BABEL_OP1_206_64638_20121219_213206_inLine +BABEL_OP1_206_64638_20121219_213206_outLine +BABEL_OP1_206_66361_20130716_054608_inLine +BABEL_OP1_206_66361_20130716_054608_outLine +BABEL_OP1_206_67389_20130710_003945_inLine +BABEL_OP1_206_67389_20130710_003945_outLine +BABEL_OP1_206_67726_20130722_002158_inLine +BABEL_OP1_206_67726_20130722_002158_outLine +BABEL_OP1_206_67794_20121205_012401_inLine +BABEL_OP1_206_67794_20121205_012401_outLine +BABEL_OP1_206_68823_20130823_044634_inLine +BABEL_OP1_206_68823_20130823_044634_outLine +BABEL_OP1_206_69885_20130610_194001_inLine +BABEL_OP1_206_69885_20130610_194001_outLine +BABEL_OP1_206_69982_20130706_192449_inLine +BABEL_OP1_206_69982_20130706_192449_outLine +BABEL_OP1_206_70282_20121214_191323_inLine +BABEL_OP1_206_70282_20121214_191323_outLine +BABEL_OP1_206_70526_20130123_020108_inLine +BABEL_OP1_206_70526_20130123_020108_outLine +BABEL_OP1_206_70986_20130710_195224_inLine +BABEL_OP1_206_70986_20130710_195224_outLine +BABEL_OP1_206_70986_20130710_200021_inLine +BABEL_OP1_206_70986_20130710_200021_outLine +BABEL_OP1_206_71189_20130122_234213_inLine +BABEL_OP1_206_71189_20130122_234213_outLine +BABEL_OP1_206_72654_20130418_185023_inLine +BABEL_OP1_206_72654_20130418_185023_outLine +BABEL_OP1_206_73408_20130706_195257_inLine +BABEL_OP1_206_73408_20130706_195257_outLine +BABEL_OP1_206_74226_20130709_222957_inLine +BABEL_OP1_206_74226_20130709_222957_outLine +BABEL_OP1_206_75359_20130128_221313_inLine +BABEL_OP1_206_75359_20130128_221313_outLine +BABEL_OP1_206_75366_20130626_043947_inLine +BABEL_OP1_206_75366_20130626_043947_outLine +BABEL_OP1_206_77567_20121206_015015_inLine +BABEL_OP1_206_77567_20121206_015015_outLine +BABEL_OP1_206_78609_20121210_172907_inLine +BABEL_OP1_206_78609_20121210_172907_outLine +BABEL_OP1_206_79139_20121130_021538_inLine +BABEL_OP1_206_79139_20121130_021538_outLine +BABEL_OP1_206_79571_20121208_220739_inLine +BABEL_OP1_206_79571_20121208_220739_outLine +BABEL_OP1_206_79751_20130709_233219_inLine +BABEL_OP1_206_79751_20130709_233219_outLine +BABEL_OP1_206_80897_20121220_195655_inLine +BABEL_OP1_206_80897_20121220_195655_outLine +BABEL_OP1_206_81229_20121203_035326_inLine +BABEL_OP1_206_81229_20121203_035331_outLine +BABEL_OP1_206_81424_20130114_180200_inLine +BABEL_OP1_206_81424_20130114_180200_outLine +BABEL_OP1_206_81553_20130114_222622_inLine +BABEL_OP1_206_81553_20130114_222622_outLine +BABEL_OP1_206_82030_20130607_164514_inLine +BABEL_OP1_206_82030_20130607_164514_outLine +BABEL_OP1_206_82030_20130607_170355_inLine +BABEL_OP1_206_82030_20130607_170355_outLine +BABEL_OP1_206_83366_20130112_225127_inLine +BABEL_OP1_206_83366_20130112_225127_outLine +BABEL_OP1_206_83851_20121219_214118_inLine +BABEL_OP1_206_83851_20121219_214118_outLine +BABEL_OP1_206_84327_20130605_221231_inLine +BABEL_OP1_206_84327_20130605_221231_outLine +BABEL_OP1_206_84339_20130610_180645_inLine +BABEL_OP1_206_84339_20130610_180645_outLine +BABEL_OP1_206_84583_20121209_183927_inLine +BABEL_OP1_206_84583_20121209_183927_outLine +BABEL_OP1_206_84709_20130718_233020_inLine +BABEL_OP1_206_84709_20130718_233020_outLine +BABEL_OP1_206_85246_20130708_202906_inLine +BABEL_OP1_206_85246_20130708_202906_outLine +BABEL_OP1_206_85439_20130123_002202_inLine +BABEL_OP1_206_85439_20130123_002202_outLine +BABEL_OP1_206_87693_20121205_012117_inLine +BABEL_OP1_206_87693_20121205_012117_outLine +BABEL_OP1_206_89718_20130811_194933_inLine +BABEL_OP1_206_89718_20130811_194933_outLine +BABEL_OP1_206_89794_20121214_233120_inLine +BABEL_OP1_206_89794_20121214_233120_outLine +BABEL_OP1_206_90440_20130718_001037_inLine +BABEL_OP1_206_90440_20130718_001037_outLine +BABEL_OP1_206_90440_20130718_002114_inLine +BABEL_OP1_206_90440_20130718_002114_outLine +BABEL_OP1_206_91825_20121229_025012_inLine +BABEL_OP1_206_91825_20121229_025012_outLine +BABEL_OP1_206_91930_20130609_211010_inLine +BABEL_OP1_206_91930_20130609_211010_outLine +BABEL_OP1_206_92176_20121206_000728_inLine +BABEL_OP1_206_92176_20121206_000728_outLine +BABEL_OP1_206_92281_20130715_213202_inLine +BABEL_OP1_206_92281_20130715_213202_outLine +BABEL_OP1_206_92509_20121207_014928_inLine +BABEL_OP1_206_92509_20121207_014928_outLine +BABEL_OP1_206_92757_20121114_211008_inLine +BABEL_OP1_206_92757_20121114_211009_outLine +BABEL_OP1_206_93443_20130619_014744_inLine +BABEL_OP1_206_93443_20130619_014744_outLine +BABEL_OP1_206_94044_20130717_183259_inLine +BABEL_OP1_206_94044_20130717_183259_outLine +BABEL_OP1_206_94044_20130717_184742_inLine +BABEL_OP1_206_94044_20130717_184742_outLine +BABEL_OP1_206_94166_20130618_012452_inLine +BABEL_OP1_206_94166_20130618_012452_outLine +BABEL_OP1_206_94803_20130702_181918_inLine +BABEL_OP1_206_94803_20130702_181918_outLine +BABEL_OP1_206_94969_20130618_233618_inLine +BABEL_OP1_206_95598_20121218_232233_inLine +BABEL_OP1_206_95598_20121218_232233_outLine +BABEL_OP1_206_96088_20130123_015420_inLine +BABEL_OP1_206_96088_20130123_015420_outLine +BABEL_OP1_206_96446_20121219_003144_inLine +BABEL_OP1_206_96446_20121219_003144_outLine +BABEL_OP1_206_96940_20130723_004026_inLine +BABEL_OP1_206_96940_20130723_004026_outLine +BABEL_OP1_206_96985_20121212_205933_inLine +BABEL_OP1_206_96985_20121212_205933_outLine +BABEL_OP1_206_97570_20130111_233033_inLine +BABEL_OP1_206_97570_20130111_233033_outLine +BABEL_OP1_206_99516_20121123_082052_inLine +BABEL_OP1_206_99516_20121123_082052_outLine +BABEL_OP1_206_99920_20130524_010643_inLine +BABEL_OP1_206_99920_20130524_010643_outLine +BABEL_OP1_206_99920_20130524_012051_inLine +BABEL_OP1_206_99920_20130524_012051_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/dev.list b/egs/babel/s5d/conf/lists/207-tokpisin/dev.list new file mode 100644 index 00000000000..a8ed2a6bc2a --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/dev.list @@ -0,0 +1,132 @@ +BABEL_OP2_207_14141_20130927_123928_inLine +BABEL_OP2_207_14141_20130927_123928_outLine +BABEL_OP2_207_14229_20130801_102759_inLine +BABEL_OP2_207_14229_20130801_102759_outLine +BABEL_OP2_207_14440_20130824_152406_inLine +BABEL_OP2_207_14440_20130824_152406_outLine +BABEL_OP2_207_14440_20130824_153139_inLine +BABEL_OP2_207_14440_20130824_153139_outLine +BABEL_OP2_207_14440_20130824_153643_inLine +BABEL_OP2_207_14440_20130824_153643_outLine +BABEL_OP2_207_14875_20130731_170626_inLine +BABEL_OP2_207_14875_20130731_170626_outLine +BABEL_OP2_207_15848_20130623_210617_inLine +BABEL_OP2_207_15848_20130623_210617_outLine +BABEL_OP2_207_17127_20130925_073246_inLine +BABEL_OP2_207_17127_20130925_073246_outLine +BABEL_OP2_207_17923_20130629_151018_inLine +BABEL_OP2_207_17923_20130629_151018_outLine +BABEL_OP2_207_20916_20130623_184646_inLine +BABEL_OP2_207_20916_20130623_184646_outLine +BABEL_OP2_207_20916_20130623_190432_inLine +BABEL_OP2_207_20916_20130623_190432_outLine +BABEL_OP2_207_21244_20131010_122553_inLine +BABEL_OP2_207_21244_20131010_122553_outLine +BABEL_OP2_207_22216_20130801_104847_inLine +BABEL_OP2_207_22216_20130801_104847_outLine +BABEL_OP2_207_23505_20130626_153607_inLine +BABEL_OP2_207_23505_20130626_153607_outLine +BABEL_OP2_207_23893_20130909_152137_inLine +BABEL_OP2_207_23893_20130909_152137_outLine +BABEL_OP2_207_24589_20130722_131056_inLine +BABEL_OP2_207_24589_20130722_131056_outLine +BABEL_OP2_207_27218_20130701_174655_inLine +BABEL_OP2_207_27218_20130701_174655_outLine +BABEL_OP2_207_29911_20131212_174224_inLine +BABEL_OP2_207_29911_20131212_174224_outLine +BABEL_OP2_207_32708_20130730_130556_inLine +BABEL_OP2_207_32708_20130730_130556_outLine +BABEL_OP2_207_32832_20130922_122814_inLine +BABEL_OP2_207_32832_20130922_122814_outLine +BABEL_OP2_207_33111_20130930_120538_inLine +BABEL_OP2_207_33111_20130930_120538_outLine +BABEL_OP2_207_33175_20130621_162225_inLine +BABEL_OP2_207_33175_20130621_162225_outLine +BABEL_OP2_207_34477_20130722_140642_inLine +BABEL_OP2_207_34477_20130722_140642_outLine +BABEL_OP2_207_38431_20130915_163140_inLine +BABEL_OP2_207_38431_20130915_163140_outLine +BABEL_OP2_207_40713_20130711_151622_inLine +BABEL_OP2_207_40713_20130711_151622_outLine +BABEL_OP2_207_41100_20130712_160739_inLine +BABEL_OP2_207_41100_20130712_160739_outLine +BABEL_OP2_207_43646_20130624_165324_inLine +BABEL_OP2_207_43646_20130624_165324_outLine +BABEL_OP2_207_45697_20130925_144605_inLine +BABEL_OP2_207_45697_20130925_144605_outLine +BABEL_OP2_207_46535_20131219_223648_inLine +BABEL_OP2_207_46535_20131219_223648_outLine +BABEL_OP2_207_46625_20130627_133432_inLine +BABEL_OP2_207_46625_20130627_133432_outLine +BABEL_OP2_207_46881_20130626_133140_inLine +BABEL_OP2_207_46881_20130626_133140_outLine +BABEL_OP2_207_47270_20130926_142206_inLine +BABEL_OP2_207_47270_20130926_142206_outLine +BABEL_OP2_207_54744_20130627_200004_inLine +BABEL_OP2_207_54744_20130627_200004_outLine +BABEL_OP2_207_56468_20131102_114004_inLine +BABEL_OP2_207_56468_20131102_114004_outLine +BABEL_OP2_207_59898_20130625_211705_inLine +BABEL_OP2_207_59898_20130625_211705_outLine +BABEL_OP2_207_59898_20130625_212216_inLine +BABEL_OP2_207_59898_20130625_212216_outLine +BABEL_OP2_207_59898_20130625_212948_inLine +BABEL_OP2_207_59898_20130625_212948_outLine +BABEL_OP2_207_60706_20130623_230602_inLine +BABEL_OP2_207_60706_20130623_230602_outLine +BABEL_OP2_207_61011_20130624_164607_inLine +BABEL_OP2_207_61011_20130624_164607_outLine +BABEL_OP2_207_61357_20130822_150714_inLine +BABEL_OP2_207_61357_20130822_150714_outLine +BABEL_OP2_207_61963_20130830_141616_inLine +BABEL_OP2_207_61963_20130830_141616_outLine +BABEL_OP2_207_65252_20131008_183014_inLine +BABEL_OP2_207_65252_20131008_183014_outLine +BABEL_OP2_207_67213_20131218_185924_inLine +BABEL_OP2_207_67213_20131218_185924_outLine +BABEL_OP2_207_70110_20130621_125315_inLine +BABEL_OP2_207_70110_20130621_125315_outLine +BABEL_OP2_207_70726_20131222_161540_inLine +BABEL_OP2_207_70726_20131222_161540_outLine +BABEL_OP2_207_73072_20130730_140848_inLine +BABEL_OP2_207_73072_20130730_140848_outLine +BABEL_OP2_207_74226_20130828_115915_inLine +BABEL_OP2_207_74226_20130828_115915_outLine +BABEL_OP2_207_76218_20130809_145308_inLine +BABEL_OP2_207_76218_20130809_145308_outLine +BABEL_OP2_207_76837_20131207_184347_inLine +BABEL_OP2_207_76837_20131207_184347_outLine +BABEL_OP2_207_77730_20130628_215628_inLine +BABEL_OP2_207_77730_20130628_215628_outLine +BABEL_OP2_207_79131_20130915_155341_inLine +BABEL_OP2_207_79131_20130915_155341_outLine +BABEL_OP2_207_80577_20130930_204532_inLine +BABEL_OP2_207_80577_20130930_204532_outLine +BABEL_OP2_207_80881_20130621_220309_inLine +BABEL_OP2_207_80881_20130621_220309_outLine +BABEL_OP2_207_82742_20130915_204759_inLine +BABEL_OP2_207_82742_20130915_204759_outLine +BABEL_OP2_207_83851_20130731_154045_inLine +BABEL_OP2_207_83851_20130731_154045_outLine +BABEL_OP2_207_84815_20130911_144350_inLine +BABEL_OP2_207_84815_20130911_144350_outLine +BABEL_OP2_207_85179_20130920_130213_inLine +BABEL_OP2_207_85179_20130920_130213_outLine +BABEL_OP2_207_85439_20131009_141636_inLine +BABEL_OP2_207_85439_20131009_141636_outLine +BABEL_OP2_207_86557_20130621_160840_inLine +BABEL_OP2_207_86557_20130621_160840_outLine +BABEL_OP2_207_86557_20130621_161939_inLine +BABEL_OP2_207_86557_20130621_161939_outLine +BABEL_OP2_207_90777_20130725_111134_inLine +BABEL_OP2_207_90777_20130725_111134_outLine +BABEL_OP2_207_92886_20130711_144627_inLine +BABEL_OP2_207_92886_20130711_144627_outLine +BABEL_OP2_207_96324_20130625_154301_inLine +BABEL_OP2_207_96324_20130625_154301_outLine +BABEL_OP2_207_97136_20131003_120422_inLine +BABEL_OP2_207_97136_20131003_120422_outLine +BABEL_OP2_207_97849_20131003_125642_inLine +BABEL_OP2_207_97849_20131003_125642_outLine +BABEL_OP2_207_99975_20131027_145501_inLine +BABEL_OP2_207_99975_20131027_145501_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/eval.list b/egs/babel/s5d/conf/lists/207-tokpisin/eval.list new file mode 100644 index 00000000000..57c92f399f4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/eval.list @@ -0,0 +1,192 @@ +BABEL_OP2_207_10416_20130808_151430_inLine +BABEL_OP2_207_10416_20130808_151430_outLine +BABEL_OP2_207_10974_20130821_152545_inLine +BABEL_OP2_207_10974_20130821_152545_outLine +BABEL_OP2_207_13040_20130711_172945_inLine +BABEL_OP2_207_13040_20130711_172945_outLine +BABEL_OP2_207_13427_20130817_155156_inLine +BABEL_OP2_207_13427_20130817_155156_outLine +BABEL_OP2_207_15042_20130915_183113_inLine +BABEL_OP2_207_15042_20130915_183113_outLine +BABEL_OP2_207_15163_20130809_152912_inLine +BABEL_OP2_207_15163_20130809_152912_outLine +BABEL_OP2_207_15926_20130905_125437_inLine +BABEL_OP2_207_15926_20130905_125437_outLine +BABEL_OP2_207_16184_20130625_002017_inLine +BABEL_OP2_207_16184_20130625_002017_outLine +BABEL_OP2_207_16467_20130918_155738_inLine +BABEL_OP2_207_16467_20130918_155738_outLine +BABEL_OP2_207_16467_20130918_160609_inLine +BABEL_OP2_207_16467_20130918_160609_outLine +BABEL_OP2_207_16601_20130906_133242_inLine +BABEL_OP2_207_16601_20130906_133242_outLine +BABEL_OP2_207_19545_20130821_135751_inLine +BABEL_OP2_207_19545_20130821_135751_outLine +BABEL_OP2_207_19672_20130903_141816_inLine +BABEL_OP2_207_19672_20130903_141816_outLine +BABEL_OP2_207_20896_20131224_170209_inLine +BABEL_OP2_207_20896_20131224_170209_outLine +BABEL_OP2_207_21029_20130702_120434_inLine +BABEL_OP2_207_21029_20130702_120434_outLine +BABEL_OP2_207_21581_20130724_161007_inLine +BABEL_OP2_207_21581_20130724_161007_outLine +BABEL_OP2_207_22170_20130828_151813_inLine +BABEL_OP2_207_22170_20130828_151813_outLine +BABEL_OP2_207_24010_20131023_153049_inLine +BABEL_OP2_207_24010_20131023_153049_outLine +BABEL_OP2_207_24033_20130930_123827_inLine +BABEL_OP2_207_24033_20130930_123827_outLine +BABEL_OP2_207_24221_20131028_153502_inLine +BABEL_OP2_207_24221_20131028_153502_outLine +BABEL_OP2_207_27082_20130812_162844_inLine +BABEL_OP2_207_27082_20130812_162844_outLine +BABEL_OP2_207_28422_20130905_135311_inLine +BABEL_OP2_207_28422_20130905_135311_outLine +BABEL_OP2_207_28871_20130621_163843_inLine +BABEL_OP2_207_28871_20130621_163843_outLine +BABEL_OP2_207_29230_20131015_133532_inLine +BABEL_OP2_207_29230_20131015_133532_outLine +BABEL_OP2_207_30250_20130720_111643_inLine +BABEL_OP2_207_30250_20130720_111643_outLine +BABEL_OP2_207_31484_20130906_164627_inLine +BABEL_OP2_207_31484_20130906_164627_outLine +BABEL_OP2_207_34019_20131218_205039_inLine +BABEL_OP2_207_34019_20131218_205039_outLine +BABEL_OP2_207_36017_20131003_111732_inLine +BABEL_OP2_207_36017_20131003_111732_outLine +BABEL_OP2_207_37068_20131211_133052_inLine +BABEL_OP2_207_37068_20131211_133052_outLine +BABEL_OP2_207_37499_20131009_162024_inLine +BABEL_OP2_207_37499_20131009_162024_outLine +BABEL_OP2_207_41493_20130628_222817_inLine +BABEL_OP2_207_41493_20130628_222817_outLine +BABEL_OP2_207_41920_20130730_105920_inLine +BABEL_OP2_207_41920_20130730_105920_outLine +BABEL_OP2_207_42600_20130724_152811_inLine +BABEL_OP2_207_42600_20130724_152811_outLine +BABEL_OP2_207_42600_20130724_154332_inLine +BABEL_OP2_207_42600_20130724_154332_outLine +BABEL_OP2_207_44255_20130925_074247_inLine +BABEL_OP2_207_44255_20130925_074247_outLine +BABEL_OP2_207_44678_20131029_142212_inLine +BABEL_OP2_207_44678_20131029_142212_outLine +BABEL_OP2_207_45235_20130918_123528_inLine +BABEL_OP2_207_45235_20130918_123528_outLine +BABEL_OP2_207_45777_20130731_140413_inLine +BABEL_OP2_207_45777_20130731_140413_outLine +BABEL_OP2_207_46041_20130919_111546_inLine +BABEL_OP2_207_46041_20130919_111546_outLine +BABEL_OP2_207_46702_20130627_192620_inLine +BABEL_OP2_207_46702_20130627_192620_outLine +BABEL_OP2_207_48663_20130828_133856_inLine +BABEL_OP2_207_48663_20130828_133856_outLine +BABEL_OP2_207_49775_20130711_130307_inLine +BABEL_OP2_207_49775_20130711_130307_outLine +BABEL_OP2_207_50186_20131207_163954_inLine +BABEL_OP2_207_50186_20131207_163954_outLine +BABEL_OP2_207_50962_20130712_152844_inLine +BABEL_OP2_207_50962_20130712_152844_outLine +BABEL_OP2_207_52070_20131018_160716_inLine +BABEL_OP2_207_52070_20131018_160716_outLine +BABEL_OP2_207_52694_20130819_142518_inLine +BABEL_OP2_207_52694_20130819_142518_outLine +BABEL_OP2_207_52854_20130701_173625_inLine +BABEL_OP2_207_52854_20130701_173625_outLine +BABEL_OP2_207_53419_20130915_212209_inLine +BABEL_OP2_207_53419_20130915_212209_outLine +BABEL_OP2_207_55742_20130628_204255_inLine +BABEL_OP2_207_55742_20130628_204255_outLine +BABEL_OP2_207_56429_20130729_115308_inLine +BABEL_OP2_207_56429_20130729_115308_outLine +BABEL_OP2_207_56743_20130731_145617_inLine +BABEL_OP2_207_56743_20130731_145617_outLine +BABEL_OP2_207_57654_20130711_145355_inLine +BABEL_OP2_207_57654_20130711_145355_outLine +BABEL_OP2_207_57654_20130711_150856_inLine +BABEL_OP2_207_57654_20130711_150856_outLine +BABEL_OP2_207_58815_20130917_153637_inLine +BABEL_OP2_207_58815_20130917_153637_outLine +BABEL_OP2_207_59993_20130712_145207_inLine +BABEL_OP2_207_59993_20130712_145207_outLine +BABEL_OP2_207_60418_20130829_155821_inLine +BABEL_OP2_207_60418_20130829_155821_outLine +BABEL_OP2_207_60508_20130801_182520_inLine +BABEL_OP2_207_60508_20130801_182520_outLine +BABEL_OP2_207_62430_20130930_120306_inLine +BABEL_OP2_207_62430_20130930_120306_outLine +BABEL_OP2_207_63445_20130730_154254_inLine +BABEL_OP2_207_63445_20130730_154254_outLine +BABEL_OP2_207_64796_20130627_095719_inLine +BABEL_OP2_207_64796_20130627_095719_outLine +BABEL_OP2_207_64796_20130627_102602_inLine +BABEL_OP2_207_64796_20130627_102602_outLine +BABEL_OP2_207_66519_20130724_134257_inLine +BABEL_OP2_207_66519_20130724_134257_outLine +BABEL_OP2_207_66519_20130724_135210_inLine +BABEL_OP2_207_66519_20130724_135210_outLine +BABEL_OP2_207_67373_20130629_154522_inLine +BABEL_OP2_207_67373_20130629_154522_outLine +BABEL_OP2_207_67794_20130629_150014_inLine +BABEL_OP2_207_67794_20130629_150014_outLine +BABEL_OP2_207_67794_20130629_152744_inLine +BABEL_OP2_207_67794_20130629_152744_outLine +BABEL_OP2_207_67842_20130711_144619_inLine +BABEL_OP2_207_67842_20130711_144619_outLine +BABEL_OP2_207_71333_20130711_155031_inLine +BABEL_OP2_207_71333_20130711_155031_outLine +BABEL_OP2_207_71704_20130701_154358_inLine +BABEL_OP2_207_71704_20130701_154358_outLine +BABEL_OP2_207_74111_20130922_211430_inLine +BABEL_OP2_207_74111_20130922_211430_outLine +BABEL_OP2_207_75366_20131018_141443_inLine +BABEL_OP2_207_75366_20131018_141443_outLine +BABEL_OP2_207_75465_20130919_133102_inLine +BABEL_OP2_207_75465_20130919_133102_outLine +BABEL_OP2_207_76372_20130930_220003_inLine +BABEL_OP2_207_76372_20130930_220003_outLine +BABEL_OP2_207_77139_20130624_231111_inLine +BABEL_OP2_207_77139_20130624_231111_outLine +BABEL_OP2_207_78630_20130802_140131_inLine +BABEL_OP2_207_78630_20130802_140131_outLine +BABEL_OP2_207_78976_20130701_162332_inLine +BABEL_OP2_207_78976_20130701_162332_outLine +BABEL_OP2_207_79028_20131211_173303_inLine +BABEL_OP2_207_79028_20131211_173303_outLine +BABEL_OP2_207_79660_20131011_163724_inLine +BABEL_OP2_207_79660_20131011_163724_outLine +BABEL_OP2_207_80655_20131001_101140_inLine +BABEL_OP2_207_80655_20131001_101140_outLine +BABEL_OP2_207_80721_20130910_121013_inLine +BABEL_OP2_207_80721_20130910_121013_outLine +BABEL_OP2_207_81392_20130905_165515_inLine +BABEL_OP2_207_81392_20130905_165515_outLine +BABEL_OP2_207_83366_20130824_150458_inLine +BABEL_OP2_207_83366_20130824_150458_outLine +BABEL_OP2_207_83545_20131009_133016_inLine +BABEL_OP2_207_83545_20131009_133016_outLine +BABEL_OP2_207_89888_20130730_133532_inLine +BABEL_OP2_207_89888_20130730_133532_outLine +BABEL_OP2_207_90318_20131224_133452_inLine +BABEL_OP2_207_90318_20131224_133452_outLine +BABEL_OP2_207_90935_20130725_162432_inLine +BABEL_OP2_207_90935_20130725_162432_outLine +BABEL_OP2_207_92941_20130722_163301_inLine +BABEL_OP2_207_92941_20130722_163301_outLine +BABEL_OP2_207_95598_20130625_170733_inLine +BABEL_OP2_207_95598_20130625_170733_outLine +BABEL_OP2_207_95966_20130811_204100_inLine +BABEL_OP2_207_95966_20130811_204100_outLine +BABEL_OP2_207_96934_20130723_143258_inLine +BABEL_OP2_207_96934_20130723_143258_outLine +BABEL_OP2_207_96985_20130626_084229_inLine +BABEL_OP2_207_96985_20130626_084229_outLine +BABEL_OP2_207_97988_20130909_215057_inLine +BABEL_OP2_207_97988_20130909_215057_outLine +BABEL_OP2_207_98165_20130724_141743_inLine +BABEL_OP2_207_98165_20130724_141743_outLine +BABEL_OP2_207_98506_20130930_135511_inLine +BABEL_OP2_207_98506_20130930_135511_outLine +BABEL_OP2_207_98580_20130809_144219_inLine +BABEL_OP2_207_98580_20130809_144219_outLine +BABEL_OP2_207_98678_20131001_204525_inLine +BABEL_OP2_207_98678_20131001_204525_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/evalpart1.list b/egs/babel/s5d/conf/lists/207-tokpisin/evalpart1.list new file mode 100644 index 00000000000..042fde9446d --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/evalpart1.list @@ -0,0 +1,64 @@ +BABEL_OP2_207_10416_20130808_151430_inLine +BABEL_OP2_207_10416_20130808_151430_outLine +BABEL_OP2_207_15926_20130905_125437_inLine +BABEL_OP2_207_15926_20130905_125437_outLine +BABEL_OP2_207_19545_20130821_135751_inLine +BABEL_OP2_207_19545_20130821_135751_outLine +BABEL_OP2_207_24033_20130930_123827_inLine +BABEL_OP2_207_24033_20130930_123827_outLine +BABEL_OP2_207_28422_20130905_135311_inLine +BABEL_OP2_207_28422_20130905_135311_outLine +BABEL_OP2_207_30250_20130720_111643_inLine +BABEL_OP2_207_30250_20130720_111643_outLine +BABEL_OP2_207_31484_20130906_164627_inLine +BABEL_OP2_207_31484_20130906_164627_outLine +BABEL_OP2_207_34019_20131218_205039_inLine +BABEL_OP2_207_34019_20131218_205039_outLine +BABEL_OP2_207_42600_20130724_152811_inLine +BABEL_OP2_207_42600_20130724_152811_outLine +BABEL_OP2_207_42600_20130724_154332_inLine +BABEL_OP2_207_42600_20130724_154332_outLine +BABEL_OP2_207_44255_20130925_074247_inLine +BABEL_OP2_207_44255_20130925_074247_outLine +BABEL_OP2_207_44678_20131029_142212_inLine +BABEL_OP2_207_44678_20131029_142212_outLine +BABEL_OP2_207_48663_20130828_133856_inLine +BABEL_OP2_207_48663_20130828_133856_outLine +BABEL_OP2_207_49775_20130711_130307_inLine +BABEL_OP2_207_49775_20130711_130307_outLine +BABEL_OP2_207_50962_20130712_152844_inLine +BABEL_OP2_207_50962_20130712_152844_outLine +BABEL_OP2_207_52070_20131018_160716_inLine +BABEL_OP2_207_52070_20131018_160716_outLine +BABEL_OP2_207_55742_20130628_204255_inLine +BABEL_OP2_207_55742_20130628_204255_outLine +BABEL_OP2_207_57654_20130711_145355_inLine +BABEL_OP2_207_57654_20130711_145355_outLine +BABEL_OP2_207_57654_20130711_150856_inLine +BABEL_OP2_207_57654_20130711_150856_outLine +BABEL_OP2_207_58815_20130917_153637_inLine +BABEL_OP2_207_58815_20130917_153637_outLine +BABEL_OP2_207_59993_20130712_145207_inLine +BABEL_OP2_207_59993_20130712_145207_outLine +BABEL_OP2_207_60508_20130801_182520_inLine +BABEL_OP2_207_60508_20130801_182520_outLine +BABEL_OP2_207_67373_20130629_154522_inLine +BABEL_OP2_207_67373_20130629_154522_outLine +BABEL_OP2_207_71704_20130701_154358_inLine +BABEL_OP2_207_71704_20130701_154358_outLine +BABEL_OP2_207_74111_20130922_211430_inLine +BABEL_OP2_207_74111_20130922_211430_outLine +BABEL_OP2_207_78976_20130701_162332_inLine +BABEL_OP2_207_78976_20130701_162332_outLine +BABEL_OP2_207_80655_20131001_101140_inLine +BABEL_OP2_207_80655_20131001_101140_outLine +BABEL_OP2_207_90935_20130725_162432_inLine +BABEL_OP2_207_90935_20130725_162432_outLine +BABEL_OP2_207_92941_20130722_163301_inLine +BABEL_OP2_207_92941_20130722_163301_outLine +BABEL_OP2_207_95966_20130811_204100_inLine +BABEL_OP2_207_95966_20130811_204100_outLine +BABEL_OP2_207_98580_20130809_144219_inLine +BABEL_OP2_207_98580_20130809_144219_outLine +BABEL_OP2_207_98678_20131001_204525_inLine +BABEL_OP2_207_98678_20131001_204525_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.list b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.list new file mode 100644 index 00000000000..0f3cabb11e7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.list @@ -0,0 +1,126 @@ +BABEL_OP2_207_10058_20131001_123723_inLine +BABEL_OP2_207_10058_20131001_123723_outLine +BABEL_OP2_207_11681_20130701_131708_inLine +BABEL_OP2_207_11681_20130701_131708_outLine +BABEL_OP2_207_11723_20131029_192512_inLine +BABEL_OP2_207_11723_20131029_192512_outLine +BABEL_OP2_207_13178_20130828_124504_inLine +BABEL_OP2_207_13178_20130828_124504_outLine +BABEL_OP2_207_13324_20130628_205651_inLine +BABEL_OP2_207_13324_20130628_205651_outLine +BABEL_OP2_207_13490_20130811_183642_inLine +BABEL_OP2_207_13490_20130811_183642_outLine +BABEL_OP2_207_13792_20130725_131748_inLine +BABEL_OP2_207_13792_20130725_131748_outLine +BABEL_OP2_207_14137_20130702_122633_inLine +BABEL_OP2_207_14137_20130702_122633_outLine +BABEL_OP2_207_16839_20130923_202105_inLine +BABEL_OP2_207_16839_20130923_202105_outLine +BABEL_OP2_207_17032_20130906_140931_inLine +BABEL_OP2_207_17032_20130906_140931_outLine +BABEL_OP2_207_17420_20130925_143517_inLine +BABEL_OP2_207_17420_20130925_143517_outLine +BABEL_OP2_207_17440_20130911_132642_inLine +BABEL_OP2_207_17440_20130911_132642_outLine +BABEL_OP2_207_22021_20131220_151707_inLine +BABEL_OP2_207_22021_20131220_151707_outLine +BABEL_OP2_207_26999_20130903_135935_inLine +BABEL_OP2_207_26999_20130903_135935_outLine +BABEL_OP2_207_28945_20130719_160541_inLine +BABEL_OP2_207_28945_20130719_160541_outLine +BABEL_OP2_207_29023_20130702_110704_inLine +BABEL_OP2_207_29023_20130702_110704_outLine +BABEL_OP2_207_29168_20130624_215131_inLine +BABEL_OP2_207_29168_20130624_215131_outLine +BABEL_OP2_207_30576_20131003_141444_inLine +BABEL_OP2_207_30576_20131003_141444_outLine +BABEL_OP2_207_31490_20130626_143343_inLine +BABEL_OP2_207_31490_20130626_143343_outLine +BABEL_OP2_207_31624_20130722_163153_inLine +BABEL_OP2_207_31624_20130722_163153_outLine +BABEL_OP2_207_32727_20130910_153130_inLine +BABEL_OP2_207_32727_20130910_153130_outLine +BABEL_OP2_207_33355_20130626_141603_inLine +BABEL_OP2_207_33355_20130626_141603_outLine +BABEL_OP2_207_34197_20130625_162431_inLine +BABEL_OP2_207_34197_20130625_162431_outLine +BABEL_OP2_207_42497_20130628_234333_inLine +BABEL_OP2_207_42497_20130628_234333_outLine +BABEL_OP2_207_42834_20130828_121531_inLine +BABEL_OP2_207_42834_20130828_121531_outLine +BABEL_OP2_207_44029_20131224_183902_inLine +BABEL_OP2_207_44029_20131224_183902_outLine +BABEL_OP2_207_44619_20130720_150103_inLine +BABEL_OP2_207_44619_20130720_150103_outLine +BABEL_OP2_207_48610_20130627_142410_inLine +BABEL_OP2_207_48610_20130627_142410_outLine +BABEL_OP2_207_50175_20130627_131732_inLine +BABEL_OP2_207_50175_20130627_131732_outLine +BABEL_OP2_207_50565_20130625_145121_inLine +BABEL_OP2_207_50565_20130625_145121_outLine +BABEL_OP2_207_52804_20130729_144756_inLine +BABEL_OP2_207_52804_20130729_144756_outLine +BABEL_OP2_207_53917_20130926_150707_inLine +BABEL_OP2_207_53917_20130926_150707_outLine +BABEL_OP2_207_54953_20130725_154539_inLine +BABEL_OP2_207_54953_20130725_154539_outLine +BABEL_OP2_207_56198_20130702_120906_inLine +BABEL_OP2_207_56198_20130702_120906_outLine +BABEL_OP2_207_60661_20130719_154858_inLine +BABEL_OP2_207_60661_20130719_154858_outLine +BABEL_OP2_207_60661_20130719_160027_inLine +BABEL_OP2_207_60661_20130719_160027_outLine +BABEL_OP2_207_62289_20130828_152328_inLine +BABEL_OP2_207_62289_20130828_152328_outLine +BABEL_OP2_207_62800_20130625_222225_inLine +BABEL_OP2_207_62800_20130625_222225_outLine +BABEL_OP2_207_64768_20130722_132745_inLine +BABEL_OP2_207_64768_20130722_132745_outLine +BABEL_OP2_207_69574_20130624_154052_inLine +BABEL_OP2_207_69574_20130624_154052_outLine +BABEL_OP2_207_69574_20130624_162442_inLine +BABEL_OP2_207_69574_20130624_162442_outLine +BABEL_OP2_207_70216_20131212_112351_inLine +BABEL_OP2_207_70216_20131212_112351_outLine +BABEL_OP2_207_70716_20131005_160013_inLine +BABEL_OP2_207_70716_20131005_160013_outLine +BABEL_OP2_207_71038_20130831_112716_inLine +BABEL_OP2_207_71038_20130831_112716_outLine +BABEL_OP2_207_71121_20131212_125525_inLine +BABEL_OP2_207_71121_20131212_125525_outLine +BABEL_OP2_207_74280_20130623_173429_inLine +BABEL_OP2_207_74280_20130623_173429_outLine +BABEL_OP2_207_77744_20130720_130633_inLine +BABEL_OP2_207_77744_20130720_130633_outLine +BABEL_OP2_207_78194_20130622_152343_inLine +BABEL_OP2_207_78194_20130622_152343_outLine +BABEL_OP2_207_78604_20130629_143534_inLine +BABEL_OP2_207_78604_20130629_143534_outLine +BABEL_OP2_207_78943_20130701_150832_inLine +BABEL_OP2_207_78943_20130701_150832_outLine +BABEL_OP2_207_86467_20130621_164129_inLine +BABEL_OP2_207_86467_20130621_164129_outLine +BABEL_OP2_207_86826_20131010_131452_inLine +BABEL_OP2_207_86826_20131010_131452_outLine +BABEL_OP2_207_87074_20130702_114658_inLine +BABEL_OP2_207_87074_20130702_114658_outLine +BABEL_OP2_207_87298_20130722_163007_inLine +BABEL_OP2_207_87298_20130722_163007_outLine +BABEL_OP2_207_87298_20130722_164947_inLine +BABEL_OP2_207_87298_20130722_164947_outLine +BABEL_OP2_207_89650_20131220_191027_inLine +BABEL_OP2_207_89650_20131220_191027_outLine +BABEL_OP2_207_95269_20130725_140512_inLine +BABEL_OP2_207_95269_20130725_140512_outLine +BABEL_OP2_207_97588_20130720_172415_inLine +BABEL_OP2_207_97588_20130720_172415_outLine +BABEL_OP2_207_97731_20130920_141703_inLine +BABEL_OP2_207_97731_20130920_141703_outLine +BABEL_OP2_207_97836_20130930_145119_inLine +BABEL_OP2_207_97836_20130930_145119_outLine +BABEL_OP2_207_97896_20130807_165056_inLine +BABEL_OP2_207_97896_20130807_165056_outLine +BABEL_OP2_207_97911_20131017_134323_inLine +BABEL_OP2_207_97911_20131017_134323_outLine +BABEL_OP2_207_98489_20130712_001025_inLine +BABEL_OP2_207_98489_20130712_001025_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.untranscribed.list new file mode 100644 index 00000000000..7fa52da3207 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP2_207_10036_20130724_130953_inLine +BABEL_OP2_207_10036_20130724_130953_outLine +BABEL_OP2_207_10638_20131023_161558_inLine +BABEL_OP2_207_10638_20131023_161558_outLine +BABEL_OP2_207_10647_20130930_130411_inLine +BABEL_OP2_207_10647_20130930_130411_outLine +BABEL_OP2_207_10938_20130723_154630_inLine +BABEL_OP2_207_10938_20130723_154630_outLine +BABEL_OP2_207_12036_20130628_172018_inLine +BABEL_OP2_207_12036_20130628_172018_outLine +BABEL_OP2_207_12242_20130720_122145_inLine +BABEL_OP2_207_12242_20130720_122145_outLine +BABEL_OP2_207_12851_20130624_231520_inLine +BABEL_OP2_207_12851_20130624_231520_outLine +BABEL_OP2_207_13483_20130914_124412_inLine +BABEL_OP2_207_13483_20130914_124412_outLine +BABEL_OP2_207_13664_20130624_131414_inLine +BABEL_OP2_207_13664_20130624_131414_outLine +BABEL_OP2_207_13709_20130925_114224_inLine +BABEL_OP2_207_13709_20130925_114224_outLine +BABEL_OP2_207_13776_20131010_175808_inLine +BABEL_OP2_207_13776_20131010_175808_outLine +BABEL_OP2_207_14179_20130905_113236_inLine +BABEL_OP2_207_14179_20130905_113236_outLine +BABEL_OP2_207_14972_20130821_111242_inLine +BABEL_OP2_207_14972_20130821_111242_outLine +BABEL_OP2_207_15024_20130820_131419_inLine +BABEL_OP2_207_15024_20130820_131419_outLine +BABEL_OP2_207_15382_20130827_130728_inLine +BABEL_OP2_207_15382_20130827_130728_outLine +BABEL_OP2_207_15730_20130627_154012_inLine +BABEL_OP2_207_15730_20130627_154012_outLine +BABEL_OP2_207_16149_20130720_115211_inLine +BABEL_OP2_207_16149_20130720_115211_outLine +BABEL_OP2_207_16749_20130830_154859_inLine +BABEL_OP2_207_16749_20130830_154859_outLine +BABEL_OP2_207_17472_20130910_165052_inLine +BABEL_OP2_207_17472_20130910_165052_outLine +BABEL_OP2_207_17496_20130827_154835_inLine +BABEL_OP2_207_17496_20130827_154835_outLine +BABEL_OP2_207_17520_20130820_160316_inLine +BABEL_OP2_207_17520_20130820_160316_outLine +BABEL_OP2_207_17615_20130903_123606_inLine +BABEL_OP2_207_17615_20130903_123606_outLine +BABEL_OP2_207_18078_20130920_135919_inLine +BABEL_OP2_207_18078_20130920_135919_outLine +BABEL_OP2_207_18297_20130828_161347_inLine +BABEL_OP2_207_18297_20130828_161347_outLine +BABEL_OP2_207_18370_20131205_182514_inLine +BABEL_OP2_207_18370_20131205_182514_outLine +BABEL_OP2_207_19134_20130822_145954_inLine +BABEL_OP2_207_19134_20130822_145954_outLine +BABEL_OP2_207_19703_20130720_154219_inLine +BABEL_OP2_207_19703_20130720_154219_outLine +BABEL_OP2_207_19818_20130826_134257_inLine +BABEL_OP2_207_19818_20130826_134257_outLine +BABEL_OP2_207_19877_20130912_151401_inLine +BABEL_OP2_207_19877_20130912_151401_outLine +BABEL_OP2_207_20437_20131030_165858_inLine +BABEL_OP2_207_20437_20131030_165858_outLine +BABEL_OP2_207_20985_20130905_145111_inLine +BABEL_OP2_207_20985_20130905_145111_outLine +BABEL_OP2_207_21004_20130909_140247_inLine +BABEL_OP2_207_21004_20130909_140247_outLine +BABEL_OP2_207_21004_20130909_141426_inLine +BABEL_OP2_207_21004_20130909_141426_outLine +BABEL_OP2_207_21206_20130630_201617_inLine +BABEL_OP2_207_21206_20130630_201617_outLine +BABEL_OP2_207_21327_20130912_132010_inLine +BABEL_OP2_207_21327_20130912_132010_outLine +BABEL_OP2_207_22446_20130725_155758_inLine +BABEL_OP2_207_22446_20130725_155758_outLine +BABEL_OP2_207_23006_20130722_133014_inLine +BABEL_OP2_207_23006_20130722_133014_outLine +BABEL_OP2_207_23046_20130729_122607_inLine +BABEL_OP2_207_23046_20130729_122607_outLine +BABEL_OP2_207_23092_20130911_151410_inLine +BABEL_OP2_207_23092_20130911_151410_outLine +BABEL_OP2_207_24532_20130626_162254_inLine +BABEL_OP2_207_24532_20130626_162254_outLine +BABEL_OP2_207_24586_20130930_115553_inLine +BABEL_OP2_207_24586_20130930_115553_outLine +BABEL_OP2_207_24590_20130807_162732_inLine +BABEL_OP2_207_24590_20130807_162732_outLine +BABEL_OP2_207_24679_20130625_144735_inLine +BABEL_OP2_207_24679_20130625_144735_outLine +BABEL_OP2_207_24982_20130729_152422_inLine +BABEL_OP2_207_24982_20130729_152422_outLine +BABEL_OP2_207_25767_20130628_220921_inLine +BABEL_OP2_207_25767_20130628_220921_outLine +BABEL_OP2_207_26388_20130722_152932_inLine +BABEL_OP2_207_26388_20130722_152932_outLine +BABEL_OP2_207_27590_20130912_155435_inLine +BABEL_OP2_207_27590_20130912_155435_outLine +BABEL_OP2_207_28012_20130920_162354_inLine +BABEL_OP2_207_28012_20130920_162354_outLine +BABEL_OP2_207_28303_20130731_132124_inLine +BABEL_OP2_207_28303_20130731_132124_outLine +BABEL_OP2_207_28522_20130906_172331_inLine +BABEL_OP2_207_28522_20130906_172331_outLine +BABEL_OP2_207_28595_20131022_154118_inLine +BABEL_OP2_207_28595_20131022_154118_outLine +BABEL_OP2_207_29404_20130930_154214_inLine +BABEL_OP2_207_29404_20130930_154214_outLine +BABEL_OP2_207_29633_20131001_114745_inLine +BABEL_OP2_207_29633_20131001_114745_outLine +BABEL_OP2_207_30058_20130927_094530_inLine +BABEL_OP2_207_30058_20130927_094530_outLine +BABEL_OP2_207_30180_20130725_150836_inLine +BABEL_OP2_207_30180_20130725_150836_outLine +BABEL_OP2_207_30180_20130725_152116_inLine +BABEL_OP2_207_30180_20130725_152116_outLine +BABEL_OP2_207_30395_20130701_130920_inLine +BABEL_OP2_207_30395_20130701_130920_outLine +BABEL_OP2_207_31039_20131219_232002_inLine +BABEL_OP2_207_31039_20131219_232002_outLine +BABEL_OP2_207_31074_20131206_183901_inLine +BABEL_OP2_207_31074_20131206_183901_outLine +BABEL_OP2_207_32122_20130725_140342_inLine +BABEL_OP2_207_32122_20130725_140342_outLine +BABEL_OP2_207_33951_20130812_152815_inLine +BABEL_OP2_207_33951_20130812_152815_outLine +BABEL_OP2_207_34486_20131009_154321_inLine +BABEL_OP2_207_34486_20131009_154321_outLine +BABEL_OP2_207_34679_20130722_131020_inLine +BABEL_OP2_207_34679_20130722_131020_outLine +BABEL_OP2_207_34860_20131031_170619_inLine +BABEL_OP2_207_34860_20131031_170619_outLine +BABEL_OP2_207_35008_20130909_114545_inLine +BABEL_OP2_207_35008_20130909_114545_outLine +BABEL_OP2_207_35139_20130701_113506_inLine +BABEL_OP2_207_35139_20130701_113506_outLine +BABEL_OP2_207_35467_20130627_092105_inLine +BABEL_OP2_207_35467_20130627_092105_outLine +BABEL_OP2_207_35467_20130627_093134_inLine +BABEL_OP2_207_35467_20130627_093134_outLine +BABEL_OP2_207_36293_20130722_173251_inLine +BABEL_OP2_207_36293_20130722_173251_outLine +BABEL_OP2_207_36642_20131007_171446_inLine +BABEL_OP2_207_36642_20131007_171446_outLine +BABEL_OP2_207_37285_20130906_152635_inLine +BABEL_OP2_207_37285_20130906_152635_outLine +BABEL_OP2_207_38741_20130702_112110_inLine +BABEL_OP2_207_38741_20130702_112110_outLine +BABEL_OP2_207_39307_20130625_162418_inLine +BABEL_OP2_207_39307_20130625_162418_outLine +BABEL_OP2_207_41542_20130925_125258_inLine +BABEL_OP2_207_41542_20130925_125258_outLine +BABEL_OP2_207_41680_20130621_172501_inLine +BABEL_OP2_207_41680_20130621_172501_outLine +BABEL_OP2_207_41720_20131031_110123_inLine +BABEL_OP2_207_41720_20131031_110123_outLine +BABEL_OP2_207_43794_20131010_152749_inLine +BABEL_OP2_207_43794_20131010_152749_outLine +BABEL_OP2_207_46268_20130626_132448_inLine +BABEL_OP2_207_46268_20130626_132448_outLine +BABEL_OP2_207_46550_20130720_181026_inLine +BABEL_OP2_207_46550_20130720_181026_outLine +BABEL_OP2_207_46558_20130622_140751_inLine +BABEL_OP2_207_46558_20130622_140751_outLine +BABEL_OP2_207_46589_20130904_135639_inLine +BABEL_OP2_207_46589_20130904_135639_outLine +BABEL_OP2_207_46681_20130702_082940_inLine +BABEL_OP2_207_46681_20130702_082940_outLine +BABEL_OP2_207_47283_20130719_175044_inLine +BABEL_OP2_207_47283_20130719_175044_outLine +BABEL_OP2_207_47451_20130909_142242_inLine +BABEL_OP2_207_47451_20130909_142242_outLine +BABEL_OP2_207_47637_20131212_210756_inLine +BABEL_OP2_207_47637_20131212_210756_outLine +BABEL_OP2_207_48844_20130712_140038_inLine +BABEL_OP2_207_48844_20130712_140038_outLine +BABEL_OP2_207_49768_20130722_145407_inLine +BABEL_OP2_207_49768_20130722_145407_outLine +BABEL_OP2_207_50427_20130820_120507_inLine +BABEL_OP2_207_50427_20130820_120507_outLine +BABEL_OP2_207_51185_20131025_171803_inLine +BABEL_OP2_207_51185_20131025_171803_outLine +BABEL_OP2_207_51955_20130702_113003_inLine +BABEL_OP2_207_51955_20130702_113003_outLine +BABEL_OP2_207_51955_20130702_113703_inLine +BABEL_OP2_207_51955_20130702_113703_outLine +BABEL_OP2_207_52272_20130729_145134_inLine +BABEL_OP2_207_52272_20130729_145134_outLine +BABEL_OP2_207_52322_20131022_130920_inLine +BABEL_OP2_207_52322_20131022_130920_outLine +BABEL_OP2_207_52404_20130903_132311_inLine +BABEL_OP2_207_52404_20130903_132311_outLine +BABEL_OP2_207_52490_20130731_141151_inLine +BABEL_OP2_207_52490_20130731_141151_outLine +BABEL_OP2_207_52499_20131224_143602_inLine +BABEL_OP2_207_52499_20131224_143602_outLine +BABEL_OP2_207_52932_20130712_142557_inLine +BABEL_OP2_207_52932_20130712_142557_outLine +BABEL_OP2_207_52932_20130712_143902_inLine +BABEL_OP2_207_52932_20130712_143902_outLine +BABEL_OP2_207_53063_20130915_191541_inLine +BABEL_OP2_207_53063_20130915_191541_outLine +BABEL_OP2_207_53957_20130914_133951_inLine +BABEL_OP2_207_53957_20130914_133951_outLine +BABEL_OP2_207_54390_20130720_163619_inLine +BABEL_OP2_207_54390_20130720_163619_outLine +BABEL_OP2_207_54530_20130914_111523_inLine +BABEL_OP2_207_54530_20130914_111523_outLine +BABEL_OP2_207_55902_20131026_192303_inLine +BABEL_OP2_207_55902_20131026_192303_outLine +BABEL_OP2_207_56326_20131105_180513_inLine +BABEL_OP2_207_56326_20131105_180513_outLine +BABEL_OP2_207_58006_20131001_163445_inLine +BABEL_OP2_207_58006_20131001_163445_outLine +BABEL_OP2_207_58926_20130720_155800_inLine +BABEL_OP2_207_58926_20130720_155800_outLine +BABEL_OP2_207_58926_20130720_162011_inLine +BABEL_OP2_207_58926_20130720_162011_outLine +BABEL_OP2_207_59720_20130723_144903_inLine +BABEL_OP2_207_59720_20130723_144903_outLine +BABEL_OP2_207_60115_20130905_120839_inLine +BABEL_OP2_207_60115_20130905_120839_outLine +BABEL_OP2_207_60474_20130724_150210_inLine +BABEL_OP2_207_60474_20130724_150210_outLine +BABEL_OP2_207_62734_20130724_141406_inLine +BABEL_OP2_207_62734_20130724_141406_outLine +BABEL_OP2_207_62810_20130628_195519_inLine +BABEL_OP2_207_62810_20130628_195519_outLine +BABEL_OP2_207_63787_20130628_150319_inLine +BABEL_OP2_207_63787_20130628_150319_outLine +BABEL_OP2_207_64065_20130711_144127_inLine +BABEL_OP2_207_64065_20130711_144127_outLine +BABEL_OP2_207_65723_20130628_225606_inLine +BABEL_OP2_207_65723_20130628_225606_outLine +BABEL_OP2_207_65882_20130711_131739_inLine +BABEL_OP2_207_65882_20130711_131739_outLine +BABEL_OP2_207_66001_20130627_130307_inLine +BABEL_OP2_207_66001_20130627_130307_outLine +BABEL_OP2_207_66916_20130625_141125_inLine +BABEL_OP2_207_66916_20130625_141125_outLine +BABEL_OP2_207_67283_20130626_165836_inLine +BABEL_OP2_207_67283_20130626_165836_outLine +BABEL_OP2_207_67659_20130730_103326_inLine +BABEL_OP2_207_67659_20130730_103326_outLine +BABEL_OP2_207_67659_20130730_104313_inLine +BABEL_OP2_207_67659_20130730_104313_outLine +BABEL_OP2_207_67726_20131212_115926_inLine +BABEL_OP2_207_67726_20131212_115926_outLine +BABEL_OP2_207_68924_20130824_111816_inLine +BABEL_OP2_207_68924_20130824_111816_outLine +BABEL_OP2_207_69636_20130903_113702_inLine +BABEL_OP2_207_69636_20130903_113702_outLine +BABEL_OP2_207_69992_20130628_145720_inLine +BABEL_OP2_207_69992_20130628_145720_outLine +BABEL_OP2_207_69992_20130628_151110_inLine +BABEL_OP2_207_69992_20130628_151110_outLine +BABEL_OP2_207_70452_20130719_143347_inLine +BABEL_OP2_207_70452_20130719_143347_outLine +BABEL_OP2_207_70794_20130622_150717_inLine +BABEL_OP2_207_70794_20130622_150717_outLine +BABEL_OP2_207_71404_20130712_141658_inLine +BABEL_OP2_207_71404_20130712_141658_outLine +BABEL_OP2_207_72587_20130826_152730_inLine +BABEL_OP2_207_72587_20130826_152730_outLine +BABEL_OP2_207_73022_20130924_132328_inLine +BABEL_OP2_207_73022_20130924_132328_outLine +BABEL_OP2_207_73591_20130625_194125_inLine +BABEL_OP2_207_73591_20130625_194125_outLine +BABEL_OP2_207_73814_20130822_124306_inLine +BABEL_OP2_207_73814_20130822_124306_outLine +BABEL_OP2_207_73990_20131029_162659_inLine +BABEL_OP2_207_73990_20131029_162659_outLine +BABEL_OP2_207_74667_20130808_161304_inLine +BABEL_OP2_207_74667_20130808_161304_outLine +BABEL_OP2_207_75064_20130720_134326_inLine +BABEL_OP2_207_75064_20130720_134326_outLine +BABEL_OP2_207_75505_20130627_155926_inLine +BABEL_OP2_207_75505_20130627_155926_outLine +BABEL_OP2_207_77146_20130625_205452_inLine +BABEL_OP2_207_77146_20130625_205452_outLine +BABEL_OP2_207_77803_20130626_144156_inLine +BABEL_OP2_207_77803_20130626_144156_outLine +BABEL_OP2_207_77990_20130701_144426_inLine +BABEL_OP2_207_77990_20130701_144426_outLine +BABEL_OP2_207_78482_20130919_144242_inLine +BABEL_OP2_207_78482_20130919_144242_outLine +BABEL_OP2_207_79080_20130922_214849_inLine +BABEL_OP2_207_79080_20130922_214849_outLine +BABEL_OP2_207_79367_20130626_150601_inLine +BABEL_OP2_207_79367_20130626_150601_outLine +BABEL_OP2_207_79451_20130712_135228_inLine +BABEL_OP2_207_79451_20130712_135228_outLine +BABEL_OP2_207_80439_20130722_161436_inLine +BABEL_OP2_207_80439_20130722_161436_outLine +BABEL_OP2_207_80559_20130712_144234_inLine +BABEL_OP2_207_80559_20130712_144234_outLine +BABEL_OP2_207_81971_20130623_113232_inLine +BABEL_OP2_207_81971_20130623_113232_outLine +BABEL_OP2_207_82425_20130626_153351_inLine +BABEL_OP2_207_82425_20130626_153351_outLine +BABEL_OP2_207_84547_20130626_230549_inLine +BABEL_OP2_207_84547_20130626_230549_outLine +BABEL_OP2_207_84611_20130630_210848_inLine +BABEL_OP2_207_84611_20130630_210848_outLine +BABEL_OP2_207_84768_20130627_204526_inLine +BABEL_OP2_207_84768_20130627_204526_outLine +BABEL_OP2_207_84805_20130922_111910_inLine +BABEL_OP2_207_84805_20130922_111910_outLine +BABEL_OP2_207_85010_20131031_114820_inLine +BABEL_OP2_207_85010_20131031_114820_outLine +BABEL_OP2_207_85340_20130731_141136_inLine +BABEL_OP2_207_85340_20130731_141136_outLine +BABEL_OP2_207_86191_20130720_132952_inLine +BABEL_OP2_207_86191_20130720_132952_outLine +BABEL_OP2_207_86628_20131011_145244_inLine +BABEL_OP2_207_86628_20131011_145244_outLine +BABEL_OP2_207_86713_20130924_095726_inLine +BABEL_OP2_207_86713_20130924_095726_outLine +BABEL_OP2_207_86722_20130723_173932_inLine +BABEL_OP2_207_86722_20130723_173932_outLine +BABEL_OP2_207_87489_20130925_122043_inLine +BABEL_OP2_207_87489_20130925_122043_outLine +BABEL_OP2_207_87777_20130827_113252_inLine +BABEL_OP2_207_87777_20130827_113252_outLine +BABEL_OP2_207_87884_20130911_154713_inLine +BABEL_OP2_207_87884_20130911_154713_outLine +BABEL_OP2_207_87921_20130909_222741_inLine +BABEL_OP2_207_87921_20130909_222741_outLine +BABEL_OP2_207_88776_20130628_223035_inLine +BABEL_OP2_207_88776_20130628_223035_outLine +BABEL_OP2_207_89059_20130830_150700_inLine +BABEL_OP2_207_89059_20130830_150700_outLine +BABEL_OP2_207_89877_20130822_133155_inLine +BABEL_OP2_207_89877_20130822_133155_outLine +BABEL_OP2_207_90572_20130927_112514_inLine +BABEL_OP2_207_90572_20130927_112514_outLine +BABEL_OP2_207_91125_20130622_154739_inLine +BABEL_OP2_207_91125_20130622_154739_outLine +BABEL_OP2_207_91383_20131017_164250_inLine +BABEL_OP2_207_91383_20131017_164250_outLine +BABEL_OP2_207_91760_20131008_175549_inLine +BABEL_OP2_207_91760_20131008_175549_outLine +BABEL_OP2_207_91888_20131002_140054_inLine +BABEL_OP2_207_91888_20131002_140054_outLine +BABEL_OP2_207_92736_20130913_142730_inLine +BABEL_OP2_207_92736_20130913_142730_outLine +BABEL_OP2_207_93475_20130712_141154_inLine +BABEL_OP2_207_93475_20130712_141154_outLine +BABEL_OP2_207_94262_20130912_223931_inLine +BABEL_OP2_207_94262_20130912_223931_outLine +BABEL_OP2_207_94869_20130627_162540_inLine +BABEL_OP2_207_94869_20130627_162540_outLine +BABEL_OP2_207_95077_20130910_113448_inLine +BABEL_OP2_207_95077_20130910_113448_outLine +BABEL_OP2_207_95231_20131029_145824_inLine +BABEL_OP2_207_95231_20131029_145824_outLine +BABEL_OP2_207_95663_20130626_085943_inLine +BABEL_OP2_207_95663_20130626_085943_outLine +BABEL_OP2_207_96190_20130730_105000_inLine +BABEL_OP2_207_96190_20130730_105000_outLine +BABEL_OP2_207_96525_20130919_151001_inLine +BABEL_OP2_207_96525_20130919_151001_outLine +BABEL_OP2_207_96690_20130808_133431_inLine +BABEL_OP2_207_96690_20130808_133431_outLine +BABEL_OP2_207_96808_20131007_222455_inLine +BABEL_OP2_207_96808_20131007_222455_outLine +BABEL_OP2_207_96820_20130815_171850_inLine +BABEL_OP2_207_96820_20130815_171850_outLine +BABEL_OP2_207_96820_20130815_172511_inLine +BABEL_OP2_207_96820_20130815_172511_outLine +BABEL_OP2_207_96910_20130723_132125_inLine +BABEL_OP2_207_96910_20130723_132125_outLine +BABEL_OP2_207_97220_20131015_210228_inLine +BABEL_OP2_207_97220_20131015_210228_outLine +BABEL_OP2_207_97557_20130824_125158_inLine +BABEL_OP2_207_97557_20130824_125158_outLine +BABEL_OP2_207_98390_20130630_121753_inLine +BABEL_OP2_207_98390_20130630_121753_outLine +BABEL_OP2_207_98565_20131220_143328_inLine +BABEL_OP2_207_98565_20131220_143328_outLine +BABEL_OP2_207_99289_20130930_212352_inLine +BABEL_OP2_207_99289_20130930_212352_outLine +BABEL_OP2_207_99998_20130730_104201_inLine +BABEL_OP2_207_99998_20130730_104201_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/training.list b/egs/babel/s5d/conf/lists/207-tokpisin/training.list new file mode 100644 index 00000000000..265ad40a321 --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/training.list @@ -0,0 +1,506 @@ +BABEL_OP2_207_10036_20130724_130953_inLine +BABEL_OP2_207_10036_20130724_130953_outLine +BABEL_OP2_207_10058_20131001_123723_inLine +BABEL_OP2_207_10058_20131001_123723_outLine +BABEL_OP2_207_10638_20131023_161558_inLine +BABEL_OP2_207_10638_20131023_161558_outLine +BABEL_OP2_207_10647_20130930_130411_inLine +BABEL_OP2_207_10647_20130930_130411_outLine +BABEL_OP2_207_10938_20130723_154630_inLine +BABEL_OP2_207_10938_20130723_154630_outLine +BABEL_OP2_207_11681_20130701_131708_inLine +BABEL_OP2_207_11681_20130701_131708_outLine +BABEL_OP2_207_11723_20131029_192512_inLine +BABEL_OP2_207_11723_20131029_192512_outLine +BABEL_OP2_207_12036_20130628_172018_inLine +BABEL_OP2_207_12036_20130628_172018_outLine +BABEL_OP2_207_12242_20130720_122145_inLine +BABEL_OP2_207_12242_20130720_122145_outLine +BABEL_OP2_207_12851_20130624_231520_inLine +BABEL_OP2_207_12851_20130624_231520_outLine +BABEL_OP2_207_13178_20130828_124504_inLine +BABEL_OP2_207_13178_20130828_124504_outLine +BABEL_OP2_207_13324_20130628_205651_inLine +BABEL_OP2_207_13324_20130628_205651_outLine +BABEL_OP2_207_13483_20130914_124412_inLine +BABEL_OP2_207_13483_20130914_124412_outLine +BABEL_OP2_207_13490_20130811_183642_inLine +BABEL_OP2_207_13490_20130811_183642_outLine +BABEL_OP2_207_13664_20130624_131414_inLine +BABEL_OP2_207_13664_20130624_131414_outLine +BABEL_OP2_207_13709_20130925_114224_inLine +BABEL_OP2_207_13709_20130925_114224_outLine +BABEL_OP2_207_13776_20131010_175808_inLine +BABEL_OP2_207_13776_20131010_175808_outLine +BABEL_OP2_207_13792_20130725_131748_inLine +BABEL_OP2_207_13792_20130725_131748_outLine +BABEL_OP2_207_14137_20130702_122633_inLine +BABEL_OP2_207_14137_20130702_122633_outLine +BABEL_OP2_207_14179_20130905_113236_inLine +BABEL_OP2_207_14179_20130905_113236_outLine +BABEL_OP2_207_14972_20130821_111242_inLine +BABEL_OP2_207_14972_20130821_111242_outLine +BABEL_OP2_207_15024_20130820_131419_inLine +BABEL_OP2_207_15024_20130820_131419_outLine +BABEL_OP2_207_15382_20130827_130728_inLine +BABEL_OP2_207_15382_20130827_130728_outLine +BABEL_OP2_207_15730_20130627_154012_inLine +BABEL_OP2_207_15730_20130627_154012_outLine +BABEL_OP2_207_16149_20130720_115211_inLine +BABEL_OP2_207_16149_20130720_115211_outLine +BABEL_OP2_207_16749_20130830_154859_inLine +BABEL_OP2_207_16749_20130830_154859_outLine +BABEL_OP2_207_16839_20130923_202105_inLine +BABEL_OP2_207_16839_20130923_202105_outLine +BABEL_OP2_207_17032_20130906_140931_inLine +BABEL_OP2_207_17032_20130906_140931_outLine +BABEL_OP2_207_17420_20130925_143517_inLine +BABEL_OP2_207_17420_20130925_143517_outLine +BABEL_OP2_207_17440_20130911_132642_inLine +BABEL_OP2_207_17440_20130911_132642_outLine +BABEL_OP2_207_17472_20130910_165052_inLine +BABEL_OP2_207_17472_20130910_165052_outLine +BABEL_OP2_207_17496_20130827_154835_inLine +BABEL_OP2_207_17496_20130827_154835_outLine +BABEL_OP2_207_17520_20130820_160316_inLine +BABEL_OP2_207_17520_20130820_160316_outLine +BABEL_OP2_207_17615_20130903_123606_inLine +BABEL_OP2_207_17615_20130903_123606_outLine +BABEL_OP2_207_18078_20130920_135919_inLine +BABEL_OP2_207_18078_20130920_135919_outLine +BABEL_OP2_207_18297_20130828_161347_inLine +BABEL_OP2_207_18297_20130828_161347_outLine +BABEL_OP2_207_18370_20131205_182514_inLine +BABEL_OP2_207_18370_20131205_182514_outLine +BABEL_OP2_207_19134_20130822_145954_inLine +BABEL_OP2_207_19134_20130822_145954_outLine +BABEL_OP2_207_19703_20130720_154219_inLine +BABEL_OP2_207_19703_20130720_154219_outLine +BABEL_OP2_207_19818_20130826_134257_inLine +BABEL_OP2_207_19818_20130826_134257_outLine +BABEL_OP2_207_19877_20130912_151401_inLine +BABEL_OP2_207_19877_20130912_151401_outLine +BABEL_OP2_207_20437_20131030_165858_inLine +BABEL_OP2_207_20437_20131030_165858_outLine +BABEL_OP2_207_20985_20130905_145111_inLine +BABEL_OP2_207_20985_20130905_145111_outLine +BABEL_OP2_207_21004_20130909_140247_inLine +BABEL_OP2_207_21004_20130909_140247_outLine +BABEL_OP2_207_21004_20130909_141426_inLine +BABEL_OP2_207_21004_20130909_141426_outLine +BABEL_OP2_207_21206_20130630_201617_inLine +BABEL_OP2_207_21206_20130630_201617_outLine +BABEL_OP2_207_21327_20130912_132010_inLine +BABEL_OP2_207_21327_20130912_132010_outLine +BABEL_OP2_207_22021_20131220_151707_inLine +BABEL_OP2_207_22021_20131220_151707_outLine +BABEL_OP2_207_22446_20130725_155758_inLine +BABEL_OP2_207_22446_20130725_155758_outLine +BABEL_OP2_207_23006_20130722_133014_inLine +BABEL_OP2_207_23006_20130722_133014_outLine +BABEL_OP2_207_23046_20130729_122607_inLine +BABEL_OP2_207_23046_20130729_122607_outLine +BABEL_OP2_207_23092_20130911_151410_inLine +BABEL_OP2_207_23092_20130911_151410_outLine +BABEL_OP2_207_24532_20130626_162254_inLine +BABEL_OP2_207_24532_20130626_162254_outLine +BABEL_OP2_207_24586_20130930_115553_inLine +BABEL_OP2_207_24586_20130930_115553_outLine +BABEL_OP2_207_24590_20130807_162732_inLine +BABEL_OP2_207_24590_20130807_162732_outLine +BABEL_OP2_207_24679_20130625_144735_inLine +BABEL_OP2_207_24679_20130625_144735_outLine +BABEL_OP2_207_24982_20130729_152422_inLine +BABEL_OP2_207_24982_20130729_152422_outLine +BABEL_OP2_207_25767_20130628_220921_inLine +BABEL_OP2_207_25767_20130628_220921_outLine +BABEL_OP2_207_26388_20130722_152932_inLine +BABEL_OP2_207_26388_20130722_152932_outLine +BABEL_OP2_207_26999_20130903_135935_inLine +BABEL_OP2_207_26999_20130903_135935_outLine +BABEL_OP2_207_27590_20130912_155435_inLine +BABEL_OP2_207_27590_20130912_155435_outLine +BABEL_OP2_207_28012_20130920_162354_inLine +BABEL_OP2_207_28012_20130920_162354_outLine +BABEL_OP2_207_28303_20130731_132124_inLine +BABEL_OP2_207_28303_20130731_132124_outLine +BABEL_OP2_207_28522_20130906_172331_inLine +BABEL_OP2_207_28522_20130906_172331_outLine +BABEL_OP2_207_28595_20131022_154118_inLine +BABEL_OP2_207_28595_20131022_154118_outLine +BABEL_OP2_207_28945_20130719_160541_inLine +BABEL_OP2_207_28945_20130719_160541_outLine +BABEL_OP2_207_29023_20130702_110704_inLine +BABEL_OP2_207_29023_20130702_110704_outLine +BABEL_OP2_207_29168_20130624_215131_inLine +BABEL_OP2_207_29168_20130624_215131_outLine +BABEL_OP2_207_29404_20130930_154214_inLine +BABEL_OP2_207_29404_20130930_154214_outLine +BABEL_OP2_207_29633_20131001_114745_inLine +BABEL_OP2_207_29633_20131001_114745_outLine +BABEL_OP2_207_30058_20130927_094530_inLine +BABEL_OP2_207_30058_20130927_094530_outLine +BABEL_OP2_207_30180_20130725_150836_inLine +BABEL_OP2_207_30180_20130725_150836_outLine +BABEL_OP2_207_30180_20130725_152116_inLine +BABEL_OP2_207_30180_20130725_152116_outLine +BABEL_OP2_207_30395_20130701_130920_inLine +BABEL_OP2_207_30395_20130701_130920_outLine +BABEL_OP2_207_30576_20131003_141444_inLine +BABEL_OP2_207_30576_20131003_141444_outLine +BABEL_OP2_207_31039_20131219_232002_inLine +BABEL_OP2_207_31039_20131219_232002_outLine +BABEL_OP2_207_31074_20131206_183901_inLine +BABEL_OP2_207_31074_20131206_183901_outLine +BABEL_OP2_207_31490_20130626_143343_inLine +BABEL_OP2_207_31490_20130626_143343_outLine +BABEL_OP2_207_31624_20130722_163153_inLine +BABEL_OP2_207_31624_20130722_163153_outLine +BABEL_OP2_207_32122_20130725_140342_inLine +BABEL_OP2_207_32122_20130725_140342_outLine +BABEL_OP2_207_32727_20130910_153130_inLine +BABEL_OP2_207_32727_20130910_153130_outLine +BABEL_OP2_207_33355_20130626_141603_inLine +BABEL_OP2_207_33355_20130626_141603_outLine +BABEL_OP2_207_33951_20130812_152815_inLine +BABEL_OP2_207_33951_20130812_152815_outLine +BABEL_OP2_207_34197_20130625_162431_inLine +BABEL_OP2_207_34197_20130625_162431_outLine +BABEL_OP2_207_34486_20131009_154321_inLine +BABEL_OP2_207_34486_20131009_154321_outLine +BABEL_OP2_207_34679_20130722_131020_inLine +BABEL_OP2_207_34679_20130722_131020_outLine +BABEL_OP2_207_34860_20131031_170619_inLine +BABEL_OP2_207_34860_20131031_170619_outLine +BABEL_OP2_207_35008_20130909_114545_inLine +BABEL_OP2_207_35008_20130909_114545_outLine +BABEL_OP2_207_35139_20130701_113506_inLine +BABEL_OP2_207_35139_20130701_113506_outLine +BABEL_OP2_207_35467_20130627_092105_inLine +BABEL_OP2_207_35467_20130627_092105_outLine +BABEL_OP2_207_35467_20130627_093134_inLine +BABEL_OP2_207_35467_20130627_093134_outLine +BABEL_OP2_207_36293_20130722_173251_inLine +BABEL_OP2_207_36293_20130722_173251_outLine +BABEL_OP2_207_36642_20131007_171446_inLine +BABEL_OP2_207_36642_20131007_171446_outLine +BABEL_OP2_207_37285_20130906_152635_inLine +BABEL_OP2_207_37285_20130906_152635_outLine +BABEL_OP2_207_38741_20130702_112110_inLine +BABEL_OP2_207_38741_20130702_112110_outLine +BABEL_OP2_207_39307_20130625_162418_inLine +BABEL_OP2_207_39307_20130625_162418_outLine +BABEL_OP2_207_41542_20130925_125258_inLine +BABEL_OP2_207_41542_20130925_125258_outLine +BABEL_OP2_207_41680_20130621_172501_inLine +BABEL_OP2_207_41680_20130621_172501_outLine +BABEL_OP2_207_41720_20131031_110123_inLine +BABEL_OP2_207_41720_20131031_110123_outLine +BABEL_OP2_207_42497_20130628_234333_inLine +BABEL_OP2_207_42497_20130628_234333_outLine +BABEL_OP2_207_42834_20130828_121531_inLine +BABEL_OP2_207_42834_20130828_121531_outLine +BABEL_OP2_207_43794_20131010_152749_inLine +BABEL_OP2_207_43794_20131010_152749_outLine +BABEL_OP2_207_44029_20131224_183902_inLine +BABEL_OP2_207_44029_20131224_183902_outLine +BABEL_OP2_207_44619_20130720_150103_inLine +BABEL_OP2_207_44619_20130720_150103_outLine +BABEL_OP2_207_46268_20130626_132448_inLine +BABEL_OP2_207_46268_20130626_132448_outLine +BABEL_OP2_207_46550_20130720_181026_inLine +BABEL_OP2_207_46550_20130720_181026_outLine +BABEL_OP2_207_46558_20130622_140751_inLine +BABEL_OP2_207_46558_20130622_140751_outLine +BABEL_OP2_207_46589_20130904_135639_inLine +BABEL_OP2_207_46589_20130904_135639_outLine +BABEL_OP2_207_46681_20130702_082940_inLine +BABEL_OP2_207_46681_20130702_082940_outLine +BABEL_OP2_207_47283_20130719_175044_inLine +BABEL_OP2_207_47283_20130719_175044_outLine +BABEL_OP2_207_47451_20130909_142242_inLine +BABEL_OP2_207_47451_20130909_142242_outLine +BABEL_OP2_207_47637_20131212_210756_inLine +BABEL_OP2_207_47637_20131212_210756_outLine +BABEL_OP2_207_48610_20130627_142410_inLine +BABEL_OP2_207_48610_20130627_142410_outLine +BABEL_OP2_207_48844_20130712_140038_inLine +BABEL_OP2_207_48844_20130712_140038_outLine +BABEL_OP2_207_49768_20130722_145407_inLine +BABEL_OP2_207_49768_20130722_145407_outLine +BABEL_OP2_207_50175_20130627_131732_inLine +BABEL_OP2_207_50175_20130627_131732_outLine +BABEL_OP2_207_50427_20130820_120507_inLine +BABEL_OP2_207_50427_20130820_120507_outLine +BABEL_OP2_207_50565_20130625_145121_inLine +BABEL_OP2_207_50565_20130625_145121_outLine +BABEL_OP2_207_51185_20131025_171803_inLine +BABEL_OP2_207_51185_20131025_171803_outLine +BABEL_OP2_207_51955_20130702_113003_inLine +BABEL_OP2_207_51955_20130702_113003_outLine +BABEL_OP2_207_51955_20130702_113703_inLine +BABEL_OP2_207_51955_20130702_113703_outLine +BABEL_OP2_207_52272_20130729_145134_inLine +BABEL_OP2_207_52272_20130729_145134_outLine +BABEL_OP2_207_52322_20131022_130920_inLine +BABEL_OP2_207_52322_20131022_130920_outLine +BABEL_OP2_207_52404_20130903_132311_inLine +BABEL_OP2_207_52404_20130903_132311_outLine +BABEL_OP2_207_52490_20130731_141151_inLine +BABEL_OP2_207_52490_20130731_141151_outLine +BABEL_OP2_207_52499_20131224_143602_inLine +BABEL_OP2_207_52499_20131224_143602_outLine +BABEL_OP2_207_52804_20130729_144756_inLine +BABEL_OP2_207_52804_20130729_144756_outLine +BABEL_OP2_207_52932_20130712_142557_inLine +BABEL_OP2_207_52932_20130712_142557_outLine +BABEL_OP2_207_52932_20130712_143902_inLine +BABEL_OP2_207_52932_20130712_143902_outLine +BABEL_OP2_207_53063_20130915_191541_inLine +BABEL_OP2_207_53063_20130915_191541_outLine +BABEL_OP2_207_53917_20130926_150707_inLine +BABEL_OP2_207_53917_20130926_150707_outLine +BABEL_OP2_207_53957_20130914_133951_inLine +BABEL_OP2_207_53957_20130914_133951_outLine +BABEL_OP2_207_54390_20130720_163619_inLine +BABEL_OP2_207_54390_20130720_163619_outLine +BABEL_OP2_207_54530_20130914_111523_inLine +BABEL_OP2_207_54530_20130914_111523_outLine +BABEL_OP2_207_54953_20130725_154539_inLine +BABEL_OP2_207_54953_20130725_154539_outLine +BABEL_OP2_207_55902_20131026_192303_inLine +BABEL_OP2_207_55902_20131026_192303_outLine +BABEL_OP2_207_56198_20130702_120906_inLine +BABEL_OP2_207_56198_20130702_120906_outLine +BABEL_OP2_207_56326_20131105_180513_inLine +BABEL_OP2_207_56326_20131105_180513_outLine +BABEL_OP2_207_58006_20131001_163445_inLine +BABEL_OP2_207_58006_20131001_163445_outLine +BABEL_OP2_207_58926_20130720_155800_inLine +BABEL_OP2_207_58926_20130720_155800_outLine +BABEL_OP2_207_58926_20130720_162011_inLine +BABEL_OP2_207_58926_20130720_162011_outLine +BABEL_OP2_207_59720_20130723_144903_inLine +BABEL_OP2_207_59720_20130723_144903_outLine +BABEL_OP2_207_60115_20130905_120839_inLine +BABEL_OP2_207_60115_20130905_120839_outLine +BABEL_OP2_207_60474_20130724_150210_inLine +BABEL_OP2_207_60474_20130724_150210_outLine +BABEL_OP2_207_60661_20130719_154858_inLine +BABEL_OP2_207_60661_20130719_154858_outLine +BABEL_OP2_207_60661_20130719_160027_inLine +BABEL_OP2_207_60661_20130719_160027_outLine +BABEL_OP2_207_62289_20130828_152328_inLine +BABEL_OP2_207_62289_20130828_152328_outLine +BABEL_OP2_207_62734_20130724_141406_inLine +BABEL_OP2_207_62734_20130724_141406_outLine +BABEL_OP2_207_62800_20130625_222225_inLine +BABEL_OP2_207_62800_20130625_222225_outLine +BABEL_OP2_207_62810_20130628_195519_inLine +BABEL_OP2_207_62810_20130628_195519_outLine +BABEL_OP2_207_63787_20130628_150319_inLine +BABEL_OP2_207_63787_20130628_150319_outLine +BABEL_OP2_207_64065_20130711_144127_inLine +BABEL_OP2_207_64065_20130711_144127_outLine +BABEL_OP2_207_64768_20130722_132745_inLine +BABEL_OP2_207_64768_20130722_132745_outLine +BABEL_OP2_207_65723_20130628_225606_inLine +BABEL_OP2_207_65723_20130628_225606_outLine +BABEL_OP2_207_65882_20130711_131739_inLine +BABEL_OP2_207_65882_20130711_131739_outLine +BABEL_OP2_207_66001_20130627_130307_inLine +BABEL_OP2_207_66001_20130627_130307_outLine +BABEL_OP2_207_66916_20130625_141125_inLine +BABEL_OP2_207_66916_20130625_141125_outLine +BABEL_OP2_207_67283_20130626_165836_inLine +BABEL_OP2_207_67283_20130626_165836_outLine +BABEL_OP2_207_67659_20130730_103326_inLine +BABEL_OP2_207_67659_20130730_103326_outLine +BABEL_OP2_207_67659_20130730_104313_inLine +BABEL_OP2_207_67659_20130730_104313_outLine +BABEL_OP2_207_67726_20131212_115926_inLine +BABEL_OP2_207_67726_20131212_115926_outLine +BABEL_OP2_207_68924_20130824_111816_inLine +BABEL_OP2_207_68924_20130824_111816_outLine +BABEL_OP2_207_69574_20130624_154052_inLine +BABEL_OP2_207_69574_20130624_154052_outLine +BABEL_OP2_207_69574_20130624_162442_inLine +BABEL_OP2_207_69574_20130624_162442_outLine +BABEL_OP2_207_69636_20130903_113702_inLine +BABEL_OP2_207_69636_20130903_113702_outLine +BABEL_OP2_207_69992_20130628_145720_inLine +BABEL_OP2_207_69992_20130628_145720_outLine +BABEL_OP2_207_69992_20130628_151110_inLine +BABEL_OP2_207_69992_20130628_151110_outLine +BABEL_OP2_207_70216_20131212_112351_inLine +BABEL_OP2_207_70216_20131212_112351_outLine +BABEL_OP2_207_70452_20130719_143347_inLine +BABEL_OP2_207_70452_20130719_143347_outLine +BABEL_OP2_207_70716_20131005_160013_inLine +BABEL_OP2_207_70716_20131005_160013_outLine +BABEL_OP2_207_70794_20130622_150717_inLine +BABEL_OP2_207_70794_20130622_150717_outLine +BABEL_OP2_207_71038_20130831_112716_inLine +BABEL_OP2_207_71038_20130831_112716_outLine +BABEL_OP2_207_71121_20131212_125525_inLine +BABEL_OP2_207_71121_20131212_125525_outLine +BABEL_OP2_207_71404_20130712_141658_inLine +BABEL_OP2_207_71404_20130712_141658_outLine +BABEL_OP2_207_72587_20130826_152730_inLine +BABEL_OP2_207_72587_20130826_152730_outLine +BABEL_OP2_207_73022_20130924_132328_inLine +BABEL_OP2_207_73022_20130924_132328_outLine +BABEL_OP2_207_73591_20130625_194125_inLine +BABEL_OP2_207_73591_20130625_194125_outLine +BABEL_OP2_207_73814_20130822_124306_inLine +BABEL_OP2_207_73814_20130822_124306_outLine +BABEL_OP2_207_73990_20131029_162659_inLine +BABEL_OP2_207_73990_20131029_162659_outLine +BABEL_OP2_207_74280_20130623_173429_inLine +BABEL_OP2_207_74280_20130623_173429_outLine +BABEL_OP2_207_74667_20130808_161304_inLine +BABEL_OP2_207_74667_20130808_161304_outLine +BABEL_OP2_207_75064_20130720_134326_inLine +BABEL_OP2_207_75064_20130720_134326_outLine +BABEL_OP2_207_75505_20130627_155926_inLine +BABEL_OP2_207_75505_20130627_155926_outLine +BABEL_OP2_207_77146_20130625_205452_inLine +BABEL_OP2_207_77146_20130625_205452_outLine +BABEL_OP2_207_77744_20130720_130633_inLine +BABEL_OP2_207_77744_20130720_130633_outLine +BABEL_OP2_207_77803_20130626_144156_inLine +BABEL_OP2_207_77803_20130626_144156_outLine +BABEL_OP2_207_77990_20130701_144426_inLine +BABEL_OP2_207_77990_20130701_144426_outLine +BABEL_OP2_207_78194_20130622_152343_inLine +BABEL_OP2_207_78194_20130622_152343_outLine +BABEL_OP2_207_78482_20130919_144242_inLine +BABEL_OP2_207_78482_20130919_144242_outLine +BABEL_OP2_207_78604_20130629_143534_inLine +BABEL_OP2_207_78604_20130629_143534_outLine +BABEL_OP2_207_78943_20130701_150832_inLine +BABEL_OP2_207_78943_20130701_150832_outLine +BABEL_OP2_207_79080_20130922_214849_inLine +BABEL_OP2_207_79080_20130922_214849_outLine +BABEL_OP2_207_79367_20130626_150601_inLine +BABEL_OP2_207_79367_20130626_150601_outLine +BABEL_OP2_207_79451_20130712_135228_inLine +BABEL_OP2_207_79451_20130712_135228_outLine +BABEL_OP2_207_80439_20130722_161436_inLine +BABEL_OP2_207_80439_20130722_161436_outLine +BABEL_OP2_207_80559_20130712_144234_inLine +BABEL_OP2_207_80559_20130712_144234_outLine +BABEL_OP2_207_81971_20130623_113232_inLine +BABEL_OP2_207_81971_20130623_113232_outLine +BABEL_OP2_207_82425_20130626_153351_inLine +BABEL_OP2_207_82425_20130626_153351_outLine +BABEL_OP2_207_84547_20130626_230549_inLine +BABEL_OP2_207_84547_20130626_230549_outLine +BABEL_OP2_207_84611_20130630_210848_inLine +BABEL_OP2_207_84611_20130630_210848_outLine +BABEL_OP2_207_84768_20130627_204526_inLine +BABEL_OP2_207_84768_20130627_204526_outLine +BABEL_OP2_207_84805_20130922_111910_inLine +BABEL_OP2_207_84805_20130922_111910_outLine +BABEL_OP2_207_85010_20131031_114820_inLine +BABEL_OP2_207_85010_20131031_114820_outLine +BABEL_OP2_207_85340_20130731_141136_inLine +BABEL_OP2_207_85340_20130731_141136_outLine +BABEL_OP2_207_86191_20130720_132952_inLine +BABEL_OP2_207_86191_20130720_132952_outLine +BABEL_OP2_207_86467_20130621_164129_inLine +BABEL_OP2_207_86467_20130621_164129_outLine +BABEL_OP2_207_86628_20131011_145244_inLine +BABEL_OP2_207_86628_20131011_145244_outLine +BABEL_OP2_207_86713_20130924_095726_inLine +BABEL_OP2_207_86713_20130924_095726_outLine +BABEL_OP2_207_86722_20130723_173932_inLine +BABEL_OP2_207_86722_20130723_173932_outLine +BABEL_OP2_207_86826_20131010_131452_inLine +BABEL_OP2_207_86826_20131010_131452_outLine +BABEL_OP2_207_87074_20130702_114658_inLine +BABEL_OP2_207_87074_20130702_114658_outLine +BABEL_OP2_207_87298_20130722_163007_inLine +BABEL_OP2_207_87298_20130722_163007_outLine +BABEL_OP2_207_87298_20130722_164947_inLine +BABEL_OP2_207_87298_20130722_164947_outLine +BABEL_OP2_207_87489_20130925_122043_inLine +BABEL_OP2_207_87489_20130925_122043_outLine +BABEL_OP2_207_87777_20130827_113252_inLine +BABEL_OP2_207_87777_20130827_113252_outLine +BABEL_OP2_207_87884_20130911_154713_inLine +BABEL_OP2_207_87884_20130911_154713_outLine +BABEL_OP2_207_87921_20130909_222741_inLine +BABEL_OP2_207_87921_20130909_222741_outLine +BABEL_OP2_207_88776_20130628_223035_inLine +BABEL_OP2_207_88776_20130628_223035_outLine +BABEL_OP2_207_89059_20130830_150700_inLine +BABEL_OP2_207_89059_20130830_150700_outLine +BABEL_OP2_207_89650_20131220_191027_inLine +BABEL_OP2_207_89650_20131220_191027_outLine +BABEL_OP2_207_89877_20130822_133155_inLine +BABEL_OP2_207_89877_20130822_133155_outLine +BABEL_OP2_207_90572_20130927_112514_inLine +BABEL_OP2_207_90572_20130927_112514_outLine +BABEL_OP2_207_91125_20130622_154739_inLine +BABEL_OP2_207_91125_20130622_154739_outLine +BABEL_OP2_207_91383_20131017_164250_inLine +BABEL_OP2_207_91383_20131017_164250_outLine +BABEL_OP2_207_91760_20131008_175549_inLine +BABEL_OP2_207_91760_20131008_175549_outLine +BABEL_OP2_207_91888_20131002_140054_inLine +BABEL_OP2_207_91888_20131002_140054_outLine +BABEL_OP2_207_92736_20130913_142730_inLine +BABEL_OP2_207_92736_20130913_142730_outLine +BABEL_OP2_207_93475_20130712_141154_inLine +BABEL_OP2_207_93475_20130712_141154_outLine +BABEL_OP2_207_94262_20130912_223931_inLine +BABEL_OP2_207_94262_20130912_223931_outLine +BABEL_OP2_207_94869_20130627_162540_inLine +BABEL_OP2_207_94869_20130627_162540_outLine +BABEL_OP2_207_95077_20130910_113448_inLine +BABEL_OP2_207_95077_20130910_113448_outLine +BABEL_OP2_207_95231_20131029_145824_inLine +BABEL_OP2_207_95231_20131029_145824_outLine +BABEL_OP2_207_95269_20130725_140512_inLine +BABEL_OP2_207_95269_20130725_140512_outLine +BABEL_OP2_207_95663_20130626_085943_inLine +BABEL_OP2_207_95663_20130626_085943_outLine +BABEL_OP2_207_96190_20130730_105000_inLine +BABEL_OP2_207_96190_20130730_105000_outLine +BABEL_OP2_207_96525_20130919_151001_inLine +BABEL_OP2_207_96525_20130919_151001_outLine +BABEL_OP2_207_96690_20130808_133431_inLine +BABEL_OP2_207_96690_20130808_133431_outLine +BABEL_OP2_207_96808_20131007_222455_inLine +BABEL_OP2_207_96808_20131007_222455_outLine +BABEL_OP2_207_96820_20130815_171850_inLine +BABEL_OP2_207_96820_20130815_171850_outLine +BABEL_OP2_207_96820_20130815_172511_inLine +BABEL_OP2_207_96820_20130815_172511_outLine +BABEL_OP2_207_96910_20130723_132125_inLine +BABEL_OP2_207_96910_20130723_132125_outLine +BABEL_OP2_207_97220_20131015_210228_inLine +BABEL_OP2_207_97220_20131015_210228_outLine +BABEL_OP2_207_97557_20130824_125158_inLine +BABEL_OP2_207_97557_20130824_125158_outLine +BABEL_OP2_207_97588_20130720_172415_inLine +BABEL_OP2_207_97588_20130720_172415_outLine +BABEL_OP2_207_97731_20130920_141703_inLine +BABEL_OP2_207_97731_20130920_141703_outLine +BABEL_OP2_207_97836_20130930_145119_inLine +BABEL_OP2_207_97836_20130930_145119_outLine +BABEL_OP2_207_97896_20130807_165056_inLine +BABEL_OP2_207_97896_20130807_165056_outLine +BABEL_OP2_207_97911_20131017_134323_inLine +BABEL_OP2_207_97911_20131017_134323_outLine +BABEL_OP2_207_98390_20130630_121753_inLine +BABEL_OP2_207_98390_20130630_121753_outLine +BABEL_OP2_207_98489_20130712_001025_inLine +BABEL_OP2_207_98489_20130712_001025_outLine +BABEL_OP2_207_98565_20131220_143328_inLine +BABEL_OP2_207_98565_20131220_143328_outLine +BABEL_OP2_207_99289_20130930_212352_inLine +BABEL_OP2_207_99289_20130930_212352_outLine +BABEL_OP2_207_99998_20130730_104201_inLine +BABEL_OP2_207_99998_20130730_104201_outLine diff --git a/egs/babel/s5d/conf/lists/207-tokpisin/untranscribed-training.list b/egs/babel/s5d/conf/lists/207-tokpisin/untranscribed-training.list new file mode 100644 index 00000000000..bd95fc6c89a --- /dev/null +++ b/egs/babel/s5d/conf/lists/207-tokpisin/untranscribed-training.list @@ -0,0 +1,539 @@ +BABEL_OP2_207_11096_20131010_155716_inLine +BABEL_OP2_207_11096_20131010_155716_outLine +BABEL_OP2_207_12635_20130926_134703_inLine +BABEL_OP2_207_12635_20130926_134703_outLine +BABEL_OP2_207_13189_20130924_113930_inLine +BABEL_OP2_207_13189_20130924_113930_outLine +BABEL_OP2_207_14097_20131211_145352_inLine +BABEL_OP2_207_14097_20131211_145352_outLine +BABEL_OP2_207_15324_20130824_114737_inLine +BABEL_OP2_207_15324_20130824_114737_outLine +BABEL_OP2_207_15324_20130824_115222_inLine +BABEL_OP2_207_15324_20130824_115222_outLine +BABEL_OP2_207_15324_20130824_120315_inLine +BABEL_OP2_207_15324_20130824_120315_outLine +BABEL_OP2_207_16787_20130807_141736_inLine +BABEL_OP2_207_16787_20130807_141736_outLine +BABEL_OP2_207_17165_20130811_161522_inLine +BABEL_OP2_207_17165_20130811_161522_outLine +BABEL_OP2_207_17582_20131014_140754_inLine +BABEL_OP2_207_17582_20131014_140754_outLine +BABEL_OP2_207_17881_20130927_103059_inLine +BABEL_OP2_207_17881_20130927_103059_outLine +BABEL_OP2_207_17890_20130905_165333_inLine +BABEL_OP2_207_17890_20130905_165333_outLine +BABEL_OP2_207_17914_20130926_134141_inLine +BABEL_OP2_207_17914_20130926_134141_outLine +BABEL_OP2_207_18118_20130912_105508_inLine +BABEL_OP2_207_18118_20130912_105508_outLine +BABEL_OP2_207_18380_20130811_091120_inLine +BABEL_OP2_207_18380_20130811_091120_outLine +BABEL_OP2_207_18766_20131007_145032_inLine +BABEL_OP2_207_18766_20131007_145032_outLine +BABEL_OP2_207_19120_20131001_112430_inLine +BABEL_OP2_207_19120_20131001_113821_inLine +BABEL_OP2_207_19130_20130915_130323_inLine +BABEL_OP2_207_19130_20130915_130323_outLine +BABEL_OP2_207_19130_20130915_170627_inLine +BABEL_OP2_207_19130_20130915_170627_outLine +BABEL_OP2_207_19444_20131027_115915_inLine +BABEL_OP2_207_19444_20131027_115915_outLine +BABEL_OP2_207_19621_20130820_123522_inLine +BABEL_OP2_207_19621_20130820_123522_outLine +BABEL_OP2_207_19663_20130808_130208_inLine +BABEL_OP2_207_19663_20130808_130208_outLine +BABEL_OP2_207_19832_20131023_131334_inLine +BABEL_OP2_207_19832_20131023_131334_outLine +BABEL_OP2_207_20738_20130925_150141_inLine +BABEL_OP2_207_20738_20130925_150141_outLine +BABEL_OP2_207_20768_20130918_153000_inLine +BABEL_OP2_207_20768_20130918_153000_outLine +BABEL_OP2_207_21393_20131009_171742_inLine +BABEL_OP2_207_21393_20131009_172913_inLine +BABEL_OP2_207_21435_20130930_113048_inLine +BABEL_OP2_207_21435_20130930_113048_outLine +BABEL_OP2_207_23355_20131028_195808_inLine +BABEL_OP2_207_23355_20131028_195808_outLine +BABEL_OP2_207_23395_20130819_172407_inLine +BABEL_OP2_207_23395_20130819_172407_outLine +BABEL_OP2_207_24017_20130920_143300_inLine +BABEL_OP2_207_24017_20130920_143300_outLine +BABEL_OP2_207_24231_20131004_142046_inLine +BABEL_OP2_207_24231_20131004_142046_outLine +BABEL_OP2_207_24241_20131022_163927_inLine +BABEL_OP2_207_24241_20131022_163927_outLine +BABEL_OP2_207_24587_20131028_181902_inLine +BABEL_OP2_207_24587_20131028_181902_outLine +BABEL_OP2_207_25068_20131128_155214_inLine +BABEL_OP2_207_25068_20131128_155214_outLine +BABEL_OP2_207_25198_20131009_144048_inLine +BABEL_OP2_207_25198_20131009_144048_outLine +BABEL_OP2_207_26206_20130905_123052_inLine +BABEL_OP2_207_26206_20130905_123052_outLine +BABEL_OP2_207_26398_20131007_122710_inLine +BABEL_OP2_207_26398_20131007_122710_outLine +BABEL_OP2_207_27042_20130913_145438_inLine +BABEL_OP2_207_27042_20130913_145438_outLine +BABEL_OP2_207_28538_20130809_210336_inLine +BABEL_OP2_207_28538_20130809_210336_outLine +BABEL_OP2_207_28585_20130921_125721_inLine +BABEL_OP2_207_28585_20130921_125721_outLine +BABEL_OP2_207_29021_20131002_153001_inLine +BABEL_OP2_207_29021_20131002_153001_outLine +BABEL_OP2_207_29208_20130808_130335_inLine +BABEL_OP2_207_29208_20130808_130335_outLine +BABEL_OP2_207_29643_20131010_214342_inLine +BABEL_OP2_207_29643_20131010_214342_outLine +BABEL_OP2_207_30497_20131001_130218_inLine +BABEL_OP2_207_30497_20131001_130218_outLine +BABEL_OP2_207_30869_20130920_162014_inLine +BABEL_OP2_207_30869_20130920_162014_outLine +BABEL_OP2_207_31182_20130917_210449_inLine +BABEL_OP2_207_31182_20130917_210449_outLine +BABEL_OP2_207_31184_20130809_153124_inLine +BABEL_OP2_207_31184_20130809_153124_outLine +BABEL_OP2_207_31583_20130916_202055_inLine +BABEL_OP2_207_31583_20130916_202055_outLine +BABEL_OP2_207_31628_20130829_135440_inLine +BABEL_OP2_207_31628_20130829_135440_outLine +BABEL_OP2_207_31979_20130808_164711_inLine +BABEL_OP2_207_31979_20130808_164711_outLine +BABEL_OP2_207_31979_20130808_165705_inLine +BABEL_OP2_207_31979_20130808_165705_outLine +BABEL_OP2_207_32301_20130905_171450_inLine +BABEL_OP2_207_32301_20130905_171450_outLine +BABEL_OP2_207_32861_20131001_201155_inLine +BABEL_OP2_207_32861_20131001_201155_outLine +BABEL_OP2_207_32872_20131007_132753_inLine +BABEL_OP2_207_32872_20131007_132753_outLine +BABEL_OP2_207_32914_20130828_143138_inLine +BABEL_OP2_207_32914_20130828_143138_outLine +BABEL_OP2_207_33251_20130826_113656_inLine +BABEL_OP2_207_33251_20130826_113656_outLine +BABEL_OP2_207_33635_20130810_084448_inLine +BABEL_OP2_207_33635_20130810_084448_outLine +BABEL_OP2_207_34336_20130723_154022_inLine +BABEL_OP2_207_34336_20130723_154022_outLine +BABEL_OP2_207_34903_20130826_111451_inLine +BABEL_OP2_207_34903_20130826_111451_outLine +BABEL_OP2_207_34903_20130826_112452_inLine +BABEL_OP2_207_34903_20130826_112452_outLine +BABEL_OP2_207_35202_20130904_140235_inLine +BABEL_OP2_207_35202_20130904_140235_outLine +BABEL_OP2_207_36059_20130828_142450_inLine +BABEL_OP2_207_36059_20130828_142450_outLine +BABEL_OP2_207_36147_20131128_144158_inLine +BABEL_OP2_207_36147_20131128_144158_outLine +BABEL_OP2_207_36219_20130801_142236_inLine +BABEL_OP2_207_36219_20130801_142236_outLine +BABEL_OP2_207_36990_20130813_203843_inLine +BABEL_OP2_207_36990_20130813_203843_outLine +BABEL_OP2_207_36990_20130813_205054_inLine +BABEL_OP2_207_36990_20130813_205054_outLine +BABEL_OP2_207_37064_20130802_163007_inLine +BABEL_OP2_207_37064_20130802_163007_outLine +BABEL_OP2_207_37229_20131014_133555_inLine +BABEL_OP2_207_37229_20131014_133555_outLine +BABEL_OP2_207_37281_20130809_155629_inLine +BABEL_OP2_207_37281_20130809_155629_outLine +BABEL_OP2_207_37598_20130822_115445_inLine +BABEL_OP2_207_37598_20130822_115445_outLine +BABEL_OP2_207_38076_20130828_114052_inLine +BABEL_OP2_207_38076_20130828_114052_outLine +BABEL_OP2_207_38750_20130912_115957_inLine +BABEL_OP2_207_38750_20130912_115957_outLine +BABEL_OP2_207_38979_20130925_142422_inLine +BABEL_OP2_207_38979_20130925_142422_outLine +BABEL_OP2_207_39059_20130924_141830_inLine +BABEL_OP2_207_39059_20130924_141830_outLine +BABEL_OP2_207_39159_20130802_184611_inLine +BABEL_OP2_207_39159_20130802_184611_outLine +BABEL_OP2_207_39680_20130924_150026_inLine +BABEL_OP2_207_39680_20130924_150026_outLine +BABEL_OP2_207_41097_20130826_120511_inLine +BABEL_OP2_207_41097_20130826_120511_outLine +BABEL_OP2_207_41233_20130919_151406_inLine +BABEL_OP2_207_41233_20130919_151406_outLine +BABEL_OP2_207_41692_20131008_115554_inLine +BABEL_OP2_207_41692_20131008_115554_outLine +BABEL_OP2_207_42155_20130820_153344_inLine +BABEL_OP2_207_42155_20130820_153344_outLine +BABEL_OP2_207_42155_20130820_155002_inLine +BABEL_OP2_207_42155_20130820_155002_outLine +BABEL_OP2_207_42243_20130730_171007_inLine +BABEL_OP2_207_42243_20130730_171620_inLine +BABEL_OP2_207_42526_20130903_162134_inLine +BABEL_OP2_207_42526_20130903_162134_outLine +BABEL_OP2_207_42526_20130903_163434_inLine +BABEL_OP2_207_42526_20130903_163434_outLine +BABEL_OP2_207_42718_20130828_165932_inLine +BABEL_OP2_207_42718_20130828_165932_outLine +BABEL_OP2_207_42848_20131010_143925_inLine +BABEL_OP2_207_42848_20131010_143925_outLine +BABEL_OP2_207_42883_20131008_131439_inLine +BABEL_OP2_207_42883_20131008_131439_outLine +BABEL_OP2_207_43074_20131213_105423_inLine +BABEL_OP2_207_43074_20131213_105423_outLine +BABEL_OP2_207_43285_20130905_162602_inLine +BABEL_OP2_207_43285_20130905_162602_outLine +BABEL_OP2_207_43388_20130809_194529_inLine +BABEL_OP2_207_43388_20130809_194529_outLine +BABEL_OP2_207_43789_20130809_213917_inLine +BABEL_OP2_207_43789_20130809_213917_outLine +BABEL_OP2_207_43990_20131027_190409_inLine +BABEL_OP2_207_43990_20131027_190409_outLine +BABEL_OP2_207_44290_20131002_160104_inLine +BABEL_OP2_207_44290_20131002_160104_outLine +BABEL_OP2_207_44847_20130827_155200_inLine +BABEL_OP2_207_44847_20130827_155200_outLine +BABEL_OP2_207_44868_20130904_135956_inLine +BABEL_OP2_207_44868_20130904_135956_outLine +BABEL_OP2_207_45106_20130823_154724_inLine +BABEL_OP2_207_45106_20130823_154724_outLine +BABEL_OP2_207_46315_20130905_150622_inLine +BABEL_OP2_207_46315_20130905_150622_outLine +BABEL_OP2_207_47799_20131023_123730_inLine +BABEL_OP2_207_47799_20131023_123730_outLine +BABEL_OP2_207_47877_20130902_143454_inLine +BABEL_OP2_207_47877_20130902_143454_outLine +BABEL_OP2_207_48200_20130921_155444_inLine +BABEL_OP2_207_48200_20130921_155444_outLine +BABEL_OP2_207_48789_20130812_134605_inLine +BABEL_OP2_207_48789_20130812_134605_outLine +BABEL_OP2_207_49118_20130920_121936_inLine +BABEL_OP2_207_49118_20130920_121936_outLine +BABEL_OP2_207_49197_20130807_131817_inLine +BABEL_OP2_207_49197_20130807_131817_outLine +BABEL_OP2_207_49812_20130922_204620_inLine +BABEL_OP2_207_49812_20130922_204620_outLine +BABEL_OP2_207_49902_20130724_154629_inLine +BABEL_OP2_207_49902_20130724_154629_outLine +BABEL_OP2_207_50630_20130905_150725_inLine +BABEL_OP2_207_50630_20130905_150725_outLine +BABEL_OP2_207_50745_20130930_091255_inLine +BABEL_OP2_207_50745_20130930_091255_outLine +BABEL_OP2_207_50810_20130625_080815_outLine +BABEL_OP2_207_50940_20131212_122606_inLine +BABEL_OP2_207_50940_20131212_122606_outLine +BABEL_OP2_207_50958_20130808_153539_inLine +BABEL_OP2_207_50958_20130808_155452_inLine +BABEL_OP2_207_51414_20131008_124320_inLine +BABEL_OP2_207_51414_20131008_124320_outLine +BABEL_OP2_207_51540_20130920_151858_inLine +BABEL_OP2_207_51540_20130920_151858_outLine +BABEL_OP2_207_52222_20131101_145127_inLine +BABEL_OP2_207_52222_20131101_145127_outLine +BABEL_OP2_207_52442_20130814_204040_inLine +BABEL_OP2_207_52442_20130814_204040_outLine +BABEL_OP2_207_52483_20131023_121543_inLine +BABEL_OP2_207_52483_20131023_121543_outLine +BABEL_OP2_207_53072_20131007_135601_inLine +BABEL_OP2_207_53072_20131007_135601_outLine +BABEL_OP2_207_53665_20131001_124434_inLine +BABEL_OP2_207_53665_20131001_124434_outLine +BABEL_OP2_207_54634_20131106_133052_inLine +BABEL_OP2_207_54634_20131106_133052_outLine +BABEL_OP2_207_56023_20130922_114453_inLine +BABEL_OP2_207_56023_20130922_114453_outLine +BABEL_OP2_207_56213_20130911_155753_inLine +BABEL_OP2_207_56213_20130911_155753_outLine +BABEL_OP2_207_56306_20130903_120057_inLine +BABEL_OP2_207_56306_20130903_120057_outLine +BABEL_OP2_207_56345_20131030_214035_inLine +BABEL_OP2_207_56345_20131030_214035_outLine +BABEL_OP2_207_56677_20130911_181638_inLine +BABEL_OP2_207_56677_20130911_181638_outLine +BABEL_OP2_207_56720_20130910_114920_inLine +BABEL_OP2_207_56720_20130910_114920_outLine +BABEL_OP2_207_57065_20130903_113823_inLine +BABEL_OP2_207_57065_20130903_113823_outLine +BABEL_OP2_207_57219_20131016_161014_inLine +BABEL_OP2_207_57219_20131016_161014_outLine +BABEL_OP2_207_57464_20131002_141306_inLine +BABEL_OP2_207_57464_20131002_141306_outLine +BABEL_OP2_207_57566_20130921_125810_inLine +BABEL_OP2_207_57566_20130921_125810_outLine +BABEL_OP2_207_57609_20130819_123817_inLine +BABEL_OP2_207_57609_20130819_123817_outLine +BABEL_OP2_207_57678_20130802_172845_inLine +BABEL_OP2_207_57678_20130802_172845_outLine +BABEL_OP2_207_57919_20131130_111652_inLine +BABEL_OP2_207_57919_20131130_111652_outLine +BABEL_OP2_207_58026_20131017_142517_inLine +BABEL_OP2_207_58026_20131017_142517_outLine +BABEL_OP2_207_58717_20130821_131155_inLine +BABEL_OP2_207_58717_20130821_131155_outLine +BABEL_OP2_207_59928_20130701_151227_inLine +BABEL_OP2_207_60310_20130915_115206_inLine +BABEL_OP2_207_60310_20130915_115206_outLine +BABEL_OP2_207_61040_20130915_174923_inLine +BABEL_OP2_207_61040_20130915_174923_outLine +BABEL_OP2_207_61225_20130624_075839_inLine +BABEL_OP2_207_61225_20130624_075839_outLine +BABEL_OP2_207_61225_20130816_102014_inLine +BABEL_OP2_207_61225_20130816_102014_outLine +BABEL_OP2_207_61435_20130920_151632_inLine +BABEL_OP2_207_61435_20130920_151632_outLine +BABEL_OP2_207_61888_20130925_153044_inLine +BABEL_OP2_207_61888_20130925_153044_outLine +BABEL_OP2_207_61971_20131002_122934_inLine +BABEL_OP2_207_61971_20131002_122934_outLine +BABEL_OP2_207_61971_20131002_124937_inLine +BABEL_OP2_207_61971_20131002_124937_outLine +BABEL_OP2_207_62286_20130808_152914_inLine +BABEL_OP2_207_62286_20130808_152914_outLine +BABEL_OP2_207_62456_20131008_120833_inLine +BABEL_OP2_207_62456_20131008_120833_outLine +BABEL_OP2_207_62835_20130813_200412_inLine +BABEL_OP2_207_62835_20130813_200412_outLine +BABEL_OP2_207_63220_20130826_161151_inLine +BABEL_OP2_207_63220_20130826_161151_outLine +BABEL_OP2_207_63309_20131218_175444_inLine +BABEL_OP2_207_63309_20131218_175444_outLine +BABEL_OP2_207_63425_20130829_145909_inLine +BABEL_OP2_207_63425_20130829_145909_outLine +BABEL_OP2_207_63523_20130829_130857_inLine +BABEL_OP2_207_63523_20130829_130857_outLine +BABEL_OP2_207_63523_20130829_131711_inLine +BABEL_OP2_207_63523_20130829_131711_outLine +BABEL_OP2_207_63730_20131015_214600_inLine +BABEL_OP2_207_63730_20131015_214600_outLine +BABEL_OP2_207_63938_20130926_154144_inLine +BABEL_OP2_207_63938_20130926_154144_outLine +BABEL_OP2_207_63938_20130926_155144_inLine +BABEL_OP2_207_63938_20130926_155144_outLine +BABEL_OP2_207_64014_20130926_150824_inLine +BABEL_OP2_207_64014_20130926_150824_outLine +BABEL_OP2_207_64259_20131102_110911_inLine +BABEL_OP2_207_64259_20131102_110911_outLine +BABEL_OP2_207_64638_20130829_133013_inLine +BABEL_OP2_207_64638_20130829_133013_outLine +BABEL_OP2_207_64902_20130930_143110_inLine +BABEL_OP2_207_64902_20130930_143110_outLine +BABEL_OP2_207_65064_20130820_141717_inLine +BABEL_OP2_207_65064_20130820_141717_outLine +BABEL_OP2_207_65477_20130807_163701_inLine +BABEL_OP2_207_65640_20131002_143110_inLine +BABEL_OP2_207_65640_20131002_143110_outLine +BABEL_OP2_207_66026_20130911_163013_inLine +BABEL_OP2_207_66026_20130911_163013_outLine +BABEL_OP2_207_66959_20130910_082006_inLine +BABEL_OP2_207_66959_20130910_082006_outLine +BABEL_OP2_207_66959_20130910_082705_inLine +BABEL_OP2_207_66959_20130910_082705_outLine +BABEL_OP2_207_66959_20130910_083542_inLine +BABEL_OP2_207_66959_20130910_083542_outLine +BABEL_OP2_207_66975_20131203_124359_inLine +BABEL_OP2_207_66975_20131203_124359_outLine +BABEL_OP2_207_67085_20131004_122616_inLine +BABEL_OP2_207_67085_20131004_122616_outLine +BABEL_OP2_207_67552_20130904_171052_inLine +BABEL_OP2_207_67552_20130904_171052_outLine +BABEL_OP2_207_67964_20131003_163118_inLine +BABEL_OP2_207_67964_20131003_163118_outLine +BABEL_OP2_207_68306_20130906_161631_inLine +BABEL_OP2_207_68306_20130906_161631_outLine +BABEL_OP2_207_69107_20130821_115813_inLine +BABEL_OP2_207_69107_20130821_115813_outLine +BABEL_OP2_207_69107_20130821_120807_inLine +BABEL_OP2_207_69107_20130821_120807_outLine +BABEL_OP2_207_69153_20130912_183854_inLine +BABEL_OP2_207_69153_20130912_183854_outLine +BABEL_OP2_207_69885_20130907_114201_inLine +BABEL_OP2_207_69885_20130907_114201_outLine +BABEL_OP2_207_69982_20131018_120252_inLine +BABEL_OP2_207_69982_20131018_120252_outLine +BABEL_OP2_207_70182_20131014_163540_inLine +BABEL_OP2_207_70182_20131014_163540_outLine +BABEL_OP2_207_70343_20130907_114751_inLine +BABEL_OP2_207_70343_20130907_114751_outLine +BABEL_OP2_207_70460_20130925_151332_inLine +BABEL_OP2_207_70460_20130925_151332_outLine +BABEL_OP2_207_70460_20130925_152713_inLine +BABEL_OP2_207_70460_20130925_152713_outLine +BABEL_OP2_207_70526_20130908_193512_inLine +BABEL_OP2_207_70526_20130908_193512_outLine +BABEL_OP2_207_70986_20131030_190232_inLine +BABEL_OP2_207_70986_20131030_190232_outLine +BABEL_OP2_207_71189_20130930_121030_inLine +BABEL_OP2_207_71189_20130930_121030_outLine +BABEL_OP2_207_71460_20131128_152217_inLine +BABEL_OP2_207_71460_20131128_152217_outLine +BABEL_OP2_207_72007_20130906_152449_inLine +BABEL_OP2_207_72007_20130906_152449_outLine +BABEL_OP2_207_72349_20131002_145602_inLine +BABEL_OP2_207_72349_20131002_145602_outLine +BABEL_OP2_207_73301_20130801_133004_inLine +BABEL_OP2_207_73301_20130801_133004_outLine +BABEL_OP2_207_73485_20130907_132923_inLine +BABEL_OP2_207_73485_20130907_132923_outLine +BABEL_OP2_207_73757_20130813_005856_inLine +BABEL_OP2_207_73757_20130813_005856_outLine +BABEL_OP2_207_73757_20130813_011142_inLine +BABEL_OP2_207_73757_20130813_011142_outLine +BABEL_OP2_207_75342_20130906_143544_inLine +BABEL_OP2_207_75342_20130906_143544_outLine +BABEL_OP2_207_75460_20131014_160822_inLine +BABEL_OP2_207_75460_20131014_160822_outLine +BABEL_OP2_207_76793_20131028_174027_inLine +BABEL_OP2_207_76793_20131028_174027_outLine +BABEL_OP2_207_76970_20131018_142728_inLine +BABEL_OP2_207_76970_20131018_142728_outLine +BABEL_OP2_207_77242_20131015_210438_inLine +BABEL_OP2_207_77242_20131015_210438_outLine +BABEL_OP2_207_78016_20130725_161812_outLine +BABEL_OP2_207_78116_20130906_165511_inLine +BABEL_OP2_207_78116_20130906_165511_outLine +BABEL_OP2_207_78360_20130926_154542_inLine +BABEL_OP2_207_78360_20130926_154542_outLine +BABEL_OP2_207_78544_20130829_140559_inLine +BABEL_OP2_207_78544_20130829_140559_outLine +BABEL_OP2_207_79139_20130811_111254_inLine +BABEL_OP2_207_79139_20130811_111254_outLine +BABEL_OP2_207_80622_20130824_120649_inLine +BABEL_OP2_207_80622_20130824_120649_outLine +BABEL_OP2_207_80897_20130824_111625_inLine +BABEL_OP2_207_80897_20130824_111625_outLine +BABEL_OP2_207_81229_20130807_135935_inLine +BABEL_OP2_207_81229_20130807_135935_outLine +BABEL_OP2_207_81810_20130831_144019_inLine +BABEL_OP2_207_81810_20130831_144019_outLine +BABEL_OP2_207_81810_20130831_145233_inLine +BABEL_OP2_207_81810_20130831_145233_outLine +BABEL_OP2_207_82089_20130809_131053_inLine +BABEL_OP2_207_82089_20130809_131053_outLine +BABEL_OP2_207_82224_20130923_132931_inLine +BABEL_OP2_207_82361_20131001_152932_inLine +BABEL_OP2_207_82361_20131001_152932_outLine +BABEL_OP2_207_82473_20130702_072644_inLine +BABEL_OP2_207_82966_20130918_130322_inLine +BABEL_OP2_207_82966_20130918_130322_outLine +BABEL_OP2_207_83062_20131002_115950_inLine +BABEL_OP2_207_83062_20131002_115950_outLine +BABEL_OP2_207_83929_20130621_172038_outLine +BABEL_OP2_207_83935_20130906_160858_inLine +BABEL_OP2_207_83935_20130906_160858_outLine +BABEL_OP2_207_84055_20130926_130321_inLine +BABEL_OP2_207_84055_20130926_130321_outLine +BABEL_OP2_207_84055_20130926_131535_inLine +BABEL_OP2_207_84055_20130926_131535_outLine +BABEL_OP2_207_84061_20130725_161035_inLine +BABEL_OP2_207_84061_20130725_161035_outLine +BABEL_OP2_207_84327_20130907_112232_inLine +BABEL_OP2_207_84327_20130907_112232_outLine +BABEL_OP2_207_84339_20130908_213808_inLine +BABEL_OP2_207_84339_20130908_213808_outLine +BABEL_OP2_207_84370_20131017_114407_inLine +BABEL_OP2_207_84370_20131017_114407_outLine +BABEL_OP2_207_84458_20130911_150603_inLine +BABEL_OP2_207_84458_20130911_150603_outLine +BABEL_OP2_207_84469_20130911_152956_inLine +BABEL_OP2_207_84469_20130911_152956_outLine +BABEL_OP2_207_84709_20131025_164240_inLine +BABEL_OP2_207_84709_20131025_164240_outLine +BABEL_OP2_207_84737_20130924_104520_inLine +BABEL_OP2_207_84737_20130924_104520_outLine +BABEL_OP2_207_84838_20130918_142125_inLine +BABEL_OP2_207_84838_20130918_142125_outLine +BABEL_OP2_207_85254_20131016_163511_inLine +BABEL_OP2_207_85254_20131016_163511_outLine +BABEL_OP2_207_85325_20130908_204430_inLine +BABEL_OP2_207_85325_20130908_204430_outLine +BABEL_OP2_207_86597_20131015_223953_inLine +BABEL_OP2_207_86597_20131015_223953_outLine +BABEL_OP2_207_86888_20130823_120853_inLine +BABEL_OP2_207_86888_20130823_120853_outLine +BABEL_OP2_207_86888_20130823_122304_inLine +BABEL_OP2_207_86888_20130823_122304_outLine +BABEL_OP2_207_87545_20131004_134332_inLine +BABEL_OP2_207_87545_20131004_134332_outLine +BABEL_OP2_207_87889_20130828_123340_inLine +BABEL_OP2_207_87889_20130828_123340_outLine +BABEL_OP2_207_88372_20130927_123913_inLine +BABEL_OP2_207_88372_20130927_123913_outLine +BABEL_OP2_207_88550_20131002_133933_inLine +BABEL_OP2_207_88550_20131002_133933_outLine +BABEL_OP2_207_88601_20130812_143956_inLine +BABEL_OP2_207_88601_20130812_143956_outLine +BABEL_OP2_207_88669_20130823_134613_inLine +BABEL_OP2_207_88669_20130823_134613_outLine +BABEL_OP2_207_89358_20130820_133904_inLine +BABEL_OP2_207_89358_20130820_133904_outLine +BABEL_OP2_207_89794_20130828_091302_inLine +BABEL_OP2_207_89794_20130828_091302_outLine +BABEL_OP2_207_90080_20131003_143629_inLine +BABEL_OP2_207_90080_20131003_143629_outLine +BABEL_OP2_207_90440_20131027_175417_inLine +BABEL_OP2_207_90440_20131027_175417_outLine +BABEL_OP2_207_90709_20130627_182820_inLine +BABEL_OP2_207_90709_20130627_182820_outLine +BABEL_OP2_207_90739_20130807_151133_inLine +BABEL_OP2_207_90739_20130807_151133_outLine +BABEL_OP2_207_90760_20131016_111829_inLine +BABEL_OP2_207_90760_20131016_111829_outLine +BABEL_OP2_207_91189_20131011_125932_inLine +BABEL_OP2_207_91189_20131011_125932_outLine +BABEL_OP2_207_91372_20130909_134637_inLine +BABEL_OP2_207_91372_20130909_134637_outLine +BABEL_OP2_207_91930_20131001_222834_inLine +BABEL_OP2_207_91930_20131001_222834_outLine +BABEL_OP2_207_91930_20131001_223632_inLine +BABEL_OP2_207_91930_20131001_223632_outLine +BABEL_OP2_207_92077_20131007_163003_inLine +BABEL_OP2_207_92077_20131007_163003_outLine +BABEL_OP2_207_92176_20130813_133457_inLine +BABEL_OP2_207_92176_20130813_133457_outLine +BABEL_OP2_207_92557_20130924_134800_inLine +BABEL_OP2_207_92557_20130924_134800_outLine +BABEL_OP2_207_92643_20131007_150231_inLine +BABEL_OP2_207_92643_20131007_150231_outLine +BABEL_OP2_207_92698_20130812_235059_inLine +BABEL_OP2_207_92698_20130812_235059_outLine +BABEL_OP2_207_92757_20130902_145657_inLine +BABEL_OP2_207_92757_20130902_145657_outLine +BABEL_OP2_207_92757_20130902_151025_inLine +BABEL_OP2_207_92757_20130902_151025_outLine +BABEL_OP2_207_92757_20130902_152031_inLine +BABEL_OP2_207_92757_20130902_152031_outLine +BABEL_OP2_207_93469_20131004_145605_inLine +BABEL_OP2_207_93469_20131004_145605_outLine +BABEL_OP2_207_94002_20130813_140301_inLine +BABEL_OP2_207_94002_20130813_140301_outLine +BABEL_OP2_207_94025_20130904_125944_inLine +BABEL_OP2_207_94025_20130904_125944_outLine +BABEL_OP2_207_94025_20130904_130959_inLine +BABEL_OP2_207_94025_20130904_130959_outLine +BABEL_OP2_207_94166_20130925_152248_inLine +BABEL_OP2_207_94166_20130925_152248_outLine +BABEL_OP2_207_94237_20131004_202859_inLine +BABEL_OP2_207_94237_20131004_202859_outLine +BABEL_OP2_207_94409_20130809_220412_inLine +BABEL_OP2_207_94409_20130809_220412_outLine +BABEL_OP2_207_94465_20130909_125729_inLine +BABEL_OP2_207_94465_20130909_125729_outLine +BABEL_OP2_207_94465_20130909_130933_inLine +BABEL_OP2_207_94465_20130909_130933_outLine +BABEL_OP2_207_94745_20130829_131647_inLine +BABEL_OP2_207_94745_20130829_131647_outLine +BABEL_OP2_207_94803_20131101_171456_inLine +BABEL_OP2_207_94803_20131101_171456_outLine +BABEL_OP2_207_95670_20130801_184732_inLine +BABEL_OP2_207_95670_20130801_184732_outLine +BABEL_OP2_207_95670_20130801_185813_inLine +BABEL_OP2_207_95670_20130801_185813_outLine +BABEL_OP2_207_95903_20130927_143755_inLine +BABEL_OP2_207_95903_20130927_143755_outLine +BABEL_OP2_207_96088_20131002_131712_inLine +BABEL_OP2_207_96088_20131002_131712_outLine +BABEL_OP2_207_96205_20130820_122740_inLine +BABEL_OP2_207_96205_20130820_122740_outLine +BABEL_OP2_207_96405_20130802_164853_inLine +BABEL_OP2_207_96405_20130802_164853_outLine +BABEL_OP2_207_96504_20130719_153914_inLine +BABEL_OP2_207_96504_20130719_155023_inLine +BABEL_OP2_207_96504_20130802_132920_inLine +BABEL_OP2_207_96584_20130926_074218_inLine +BABEL_OP2_207_96584_20130926_074218_outLine +BABEL_OP2_207_97097_20131001_132614_inLine +BABEL_OP2_207_97097_20131001_132614_outLine +BABEL_OP2_207_99887_20130924_102355_inLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/dev.list b/egs/babel/s5d/conf/lists/301-cebuano/dev.list new file mode 100644 index 00000000000..ecf3753ee7d --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/dev.list @@ -0,0 +1,134 @@ +BABEL_OP2_301_13792_20131111_122219_inLine +BABEL_OP2_301_13792_20131111_122219_outLine +BABEL_OP2_301_14141_20140118_202248_inLine +BABEL_OP2_301_14141_20140118_202248_outLine +BABEL_OP2_301_15262_20131105_213812_inLine +BABEL_OP2_301_15262_20131105_213812_outLine +BABEL_OP2_301_15262_20131105_230709_inLine +BABEL_OP2_301_15262_20131105_230709_outLine +BABEL_OP2_301_15638_20131210_131327_inLine +BABEL_OP2_301_15638_20131210_131327_outLine +BABEL_OP2_301_17127_20140106_175906_inLine +BABEL_OP2_301_17127_20140106_175906_outLine +BABEL_OP2_301_17881_20140122_201653_inLine +BABEL_OP2_301_17881_20140122_201653_outLine +BABEL_OP2_301_18078_20131226_153406_inLine +BABEL_OP2_301_18078_20131226_153406_outLine +BABEL_OP2_301_21109_20140102_180619_inLine +BABEL_OP2_301_21109_20140102_180619_outLine +BABEL_OP2_301_22280_20140206_202952_inLine +BABEL_OP2_301_22280_20140206_202952_outLine +BABEL_OP2_301_22466_20131015_174457_inLine +BABEL_OP2_301_22466_20131015_174457_outLine +BABEL_OP2_301_22612_20131217_202720_inLine +BABEL_OP2_301_22612_20131217_202720_outLine +BABEL_OP2_301_23505_20131023_135517_inLine +BABEL_OP2_301_23505_20131023_135517_outLine +BABEL_OP2_301_24241_20140214_170629_inLine +BABEL_OP2_301_24241_20140214_170629_outLine +BABEL_OP2_301_27082_20131209_203149_inLine +BABEL_OP2_301_27082_20131209_203149_outLine +BABEL_OP2_301_29685_20131203_182746_inLine +BABEL_OP2_301_29685_20131203_182746_outLine +BABEL_OP2_301_29685_20131203_184526_inLine +BABEL_OP2_301_29685_20131203_184526_outLine +BABEL_OP2_301_36059_20140118_204512_inLine +BABEL_OP2_301_36059_20140118_204512_outLine +BABEL_OP2_301_37281_20131205_190107_inLine +BABEL_OP2_301_37281_20131205_190107_outLine +BABEL_OP2_301_38340_20131128_145618_inLine +BABEL_OP2_301_38340_20131128_145618_outLine +BABEL_OP2_301_40713_20131126_193850_inLine +BABEL_OP2_301_40713_20131126_193850_outLine +BABEL_OP2_301_41958_20131127_145018_inLine +BABEL_OP2_301_41958_20131127_145018_outLine +BABEL_OP2_301_43239_20140102_190746_inLine +BABEL_OP2_301_43239_20140102_190746_outLine +BABEL_OP2_301_43646_20131019_165638_inLine +BABEL_OP2_301_43646_20131019_165638_outLine +BABEL_OP2_301_46008_20140126_192930_inLine +BABEL_OP2_301_46008_20140126_192930_outLine +BABEL_OP2_301_46333_20131027_181031_inLine +BABEL_OP2_301_46333_20131027_181031_outLine +BABEL_OP2_301_48789_20131209_181711_inLine +BABEL_OP2_301_48789_20131209_181711_outLine +BABEL_OP2_301_49902_20131127_180426_inLine +BABEL_OP2_301_49902_20131127_180426_outLine +BABEL_OP2_301_50565_20131025_202729_inLine +BABEL_OP2_301_50565_20131025_202729_outLine +BABEL_OP2_301_51530_20140125_195307_inLine +BABEL_OP2_301_51530_20140125_195307_outLine +BABEL_OP2_301_51955_20131125_182037_inLine +BABEL_OP2_301_51955_20131125_182037_outLine +BABEL_OP2_301_52301_20131107_133036_inLine +BABEL_OP2_301_52301_20131107_133036_outLine +BABEL_OP2_301_52301_20131107_135543_inLine +BABEL_OP2_301_52301_20131107_135543_outLine +BABEL_OP2_301_54744_20131202_184432_inLine +BABEL_OP2_301_54744_20131202_184432_outLine +BABEL_OP2_301_56370_20131101_175739_inLine +BABEL_OP2_301_56370_20131101_175739_outLine +BABEL_OP2_301_60299_20140202_130806_inLine +BABEL_OP2_301_60299_20140202_130806_outLine +BABEL_OP2_301_62362_20140129_154002_inLine +BABEL_OP2_301_62362_20140129_154002_outLine +BABEL_OP2_301_63425_20131213_184303_inLine +BABEL_OP2_301_63425_20131213_184303_outLine +BABEL_OP2_301_64759_20131103_154236_inLine +BABEL_OP2_301_64759_20131103_154236_outLine +BABEL_OP2_301_64870_20131226_133240_inLine +BABEL_OP2_301_64870_20131226_133240_outLine +BABEL_OP2_301_65252_20140126_190555_inLine +BABEL_OP2_301_65252_20140126_190555_outLine +BABEL_OP2_301_66026_20131216_194850_inLine +BABEL_OP2_301_66026_20131216_194850_outLine +BABEL_OP2_301_67085_20140126_181613_inLine +BABEL_OP2_301_67085_20140126_181613_outLine +BABEL_OP2_301_68306_20131212_171648_inLine +BABEL_OP2_301_68306_20131212_171648_outLine +BABEL_OP2_301_71404_20131112_205323_inLine +BABEL_OP2_301_71404_20131112_205323_outLine +BABEL_OP2_301_71404_20131112_211451_inLine +BABEL_OP2_301_71404_20131112_211451_outLine +BABEL_OP2_301_74226_20131213_195309_inLine +BABEL_OP2_301_74226_20131213_195309_outLine +BABEL_OP2_301_74455_20140115_152935_inLine +BABEL_OP2_301_74455_20140115_152935_outLine +BABEL_OP2_301_78194_20131015_181857_inLine +BABEL_OP2_301_78194_20131015_181857_outLine +BABEL_OP2_301_78194_20131015_183910_inLine +BABEL_OP2_301_78194_20131015_183910_outLine +BABEL_OP2_301_78360_20140110_190526_inLine +BABEL_OP2_301_78360_20140110_190526_outLine +BABEL_OP2_301_79660_20140201_160331_inLine +BABEL_OP2_301_79660_20140201_160331_outLine +BABEL_OP2_301_79820_20131127_235459_inLine +BABEL_OP2_301_79820_20131127_235459_outLine +BABEL_OP2_301_80897_20140206_142309_inLine +BABEL_OP2_301_80897_20140206_142309_outLine +BABEL_OP2_301_81427_20131126_151401_inLine +BABEL_OP2_301_81427_20131126_151401_outLine +BABEL_OP2_301_84611_20131125_193454_inLine +BABEL_OP2_301_84611_20131125_193454_outLine +BABEL_OP2_301_84709_20140220_141332_inLine +BABEL_OP2_301_84709_20140220_141332_outLine +BABEL_OP2_301_85179_20131227_172225_inLine +BABEL_OP2_301_85179_20131227_172225_outLine +BABEL_OP2_301_86467_20131112_182159_inLine +BABEL_OP2_301_86467_20131112_182159_outLine +BABEL_OP2_301_86467_20131112_193636_inLine +BABEL_OP2_301_86467_20131112_193636_outLine +BABEL_OP2_301_88550_20140128_150822_inLine +BABEL_OP2_301_88550_20140128_150822_outLine +BABEL_OP2_301_88873_20131202_130910_inLine +BABEL_OP2_301_88873_20131202_130910_outLine +BABEL_OP2_301_92792_20140123_104047_inLine +BABEL_OP2_301_92792_20140123_104047_outLine +BABEL_OP2_301_96985_20131021_164130_inLine +BABEL_OP2_301_96985_20131021_164130_outLine +BABEL_OP2_301_98489_20131123_232017_inLine +BABEL_OP2_301_98489_20131123_232017_outLine +BABEL_OP2_301_98489_20131123_233440_inLine +BABEL_OP2_301_98489_20131123_233440_outLine +BABEL_OP2_301_99516_20131022_111915_inLine +BABEL_OP2_301_99516_20131022_111915_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/eval.list b/egs/babel/s5d/conf/lists/301-cebuano/eval.list new file mode 100644 index 00000000000..6958122726d --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/eval.list @@ -0,0 +1,190 @@ +BABEL_OP2_301_10019_20131127_165625_inLine +BABEL_OP2_301_10019_20131127_165625_outLine +BABEL_OP2_301_10416_20131203_193332_inLine +BABEL_OP2_301_10416_20131203_193332_outLine +BABEL_OP2_301_12767_20131122_204245_inLine +BABEL_OP2_301_12767_20131122_204245_outLine +BABEL_OP2_301_13427_20131226_153605_inLine +BABEL_OP2_301_13427_20131226_153605_outLine +BABEL_OP2_301_13490_20131209_200441_inLine +BABEL_OP2_301_13490_20131209_200441_outLine +BABEL_OP2_301_14440_20131217_152957_inLine +BABEL_OP2_301_14440_20131217_152957_outLine +BABEL_OP2_301_14537_20140126_192700_inLine +BABEL_OP2_301_14537_20140126_192700_outLine +BABEL_OP2_301_16056_20131112_135620_inLine +BABEL_OP2_301_16056_20131112_135620_outLine +BABEL_OP2_301_16056_20131112_140413_inLine +BABEL_OP2_301_16056_20131112_140413_outLine +BABEL_OP2_301_16184_20131018_004611_inLine +BABEL_OP2_301_16184_20131018_004611_outLine +BABEL_OP2_301_16839_20140106_195749_inLine +BABEL_OP2_301_16839_20140106_195749_outLine +BABEL_OP2_301_17165_20131203_150708_inLine +BABEL_OP2_301_17165_20131203_150708_outLine +BABEL_OP2_301_18766_20140127_140851_inLine +BABEL_OP2_301_18766_20140127_140851_outLine +BABEL_OP2_301_19782_20131220_143639_inLine +BABEL_OP2_301_19782_20131220_143639_outLine +BABEL_OP2_301_19832_20140214_144414_inLine +BABEL_OP2_301_19832_20140214_144414_outLine +BABEL_OP2_301_20800_20131119_233324_inLine +BABEL_OP2_301_20800_20131119_233324_outLine +BABEL_OP2_301_22641_20131112_223928_inLine +BABEL_OP2_301_22641_20131112_223928_outLine +BABEL_OP2_301_23196_20140224_145440_inLine +BABEL_OP2_301_23196_20140224_145440_outLine +BABEL_OP2_301_23628_20131121_202709_inLine +BABEL_OP2_301_23628_20131121_202709_outLine +BABEL_OP2_301_26074_20140214_150738_inLine +BABEL_OP2_301_26074_20140214_150738_outLine +BABEL_OP2_301_28585_20140103_174051_inLine +BABEL_OP2_301_28585_20140103_174051_outLine +BABEL_OP2_301_29777_20131227_175745_inLine +BABEL_OP2_301_29777_20131227_175745_outLine +BABEL_OP2_301_32914_20140102_183534_inLine +BABEL_OP2_301_32914_20140102_183534_outLine +BABEL_OP2_301_33992_20140128_153304_inLine +BABEL_OP2_301_33992_20140128_153304_outLine +BABEL_OP2_301_35069_20140104_210141_inLine +BABEL_OP2_301_35069_20140104_210141_outLine +BABEL_OP2_301_36219_20131125_140227_inLine +BABEL_OP2_301_36219_20131125_140227_outLine +BABEL_OP2_301_36219_20131125_141324_inLine +BABEL_OP2_301_36219_20131125_141324_outLine +BABEL_OP2_301_36341_20131024_131700_inLine +BABEL_OP2_301_36341_20131024_131700_outLine +BABEL_OP2_301_36341_20131025_165924_inLine +BABEL_OP2_301_36341_20131025_165924_outLine +BABEL_OP2_301_37499_20140129_153724_inLine +BABEL_OP2_301_37499_20140129_153724_outLine +BABEL_OP2_301_40740_20140106_203616_inLine +BABEL_OP2_301_40740_20140106_203616_outLine +BABEL_OP2_301_41493_20131025_161722_inLine +BABEL_OP2_301_41493_20131025_161722_outLine +BABEL_OP2_301_41920_20131110_141258_inLine +BABEL_OP2_301_41920_20131110_141258_outLine +BABEL_OP2_301_41920_20131110_142621_inLine +BABEL_OP2_301_41920_20131110_142621_outLine +BABEL_OP2_301_42600_20131125_184712_inLine +BABEL_OP2_301_42600_20131125_184712_outLine +BABEL_OP2_301_42600_20131125_185254_inLine +BABEL_OP2_301_42600_20131125_185254_outLine +BABEL_OP2_301_43789_20131205_204932_inLine +BABEL_OP2_301_43789_20131205_204932_outLine +BABEL_OP2_301_45777_20131129_214116_inLine +BABEL_OP2_301_45777_20131129_214116_outLine +BABEL_OP2_301_47877_20140109_182631_inLine +BABEL_OP2_301_47877_20140109_182631_outLine +BABEL_OP2_301_48399_20131115_184608_inLine +BABEL_OP2_301_48399_20131115_184608_outLine +BABEL_OP2_301_48422_20140104_203017_inLine +BABEL_OP2_301_48422_20140104_203017_outLine +BABEL_OP2_301_49287_20140110_233951_inLine +BABEL_OP2_301_49287_20140110_233951_outLine +BABEL_OP2_301_49502_20131025_191447_inLine +BABEL_OP2_301_49502_20131025_191447_outLine +BABEL_OP2_301_49812_20140108_153912_inLine +BABEL_OP2_301_49812_20140108_153912_outLine +BABEL_OP2_301_51417_20140104_191034_inLine +BABEL_OP2_301_51417_20140104_191034_outLine +BABEL_OP2_301_52447_20140128_140241_inLine +BABEL_OP2_301_52447_20140128_140241_outLine +BABEL_OP2_301_58145_20140205_195241_inLine +BABEL_OP2_301_58145_20140205_195241_outLine +BABEL_OP2_301_58815_20131219_183200_inLine +BABEL_OP2_301_58815_20131219_183200_outLine +BABEL_OP2_301_58915_20140204_180046_inLine +BABEL_OP2_301_58915_20140204_180046_outLine +BABEL_OP2_301_60508_20131023_221321_inLine +BABEL_OP2_301_60508_20131023_221321_outLine +BABEL_OP2_301_61348_20131210_184944_inLine +BABEL_OP2_301_61348_20131210_184944_outLine +BABEL_OP2_301_61357_20140205_203135_inLine +BABEL_OP2_301_61357_20140205_203135_outLine +BABEL_OP2_301_61678_20131130_195119_inLine +BABEL_OP2_301_61678_20131130_195119_outLine +BABEL_OP2_301_61684_20140224_141104_inLine +BABEL_OP2_301_61684_20140224_141104_outLine +BABEL_OP2_301_62434_20131027_204412_inLine +BABEL_OP2_301_62434_20131027_204412_outLine +BABEL_OP2_301_62835_20131205_201607_inLine +BABEL_OP2_301_62835_20131205_201607_outLine +BABEL_OP2_301_62852_20131112_145306_inLine +BABEL_OP2_301_62852_20131112_145306_outLine +BABEL_OP2_301_63445_20131017_163305_inLine +BABEL_OP2_301_63445_20131017_163305_outLine +BABEL_OP2_301_63481_20131018_205953_inLine +BABEL_OP2_301_63481_20131018_205953_outLine +BABEL_OP2_301_63523_20140127_032850_inLine +BABEL_OP2_301_63523_20140127_032850_outLine +BABEL_OP2_301_65268_20140224_143314_inLine +BABEL_OP2_301_65268_20140224_143314_outLine +BABEL_OP2_301_66967_20131119_230046_inLine +BABEL_OP2_301_66967_20131119_230046_outLine +BABEL_OP2_301_67592_20131223_194021_inLine +BABEL_OP2_301_67592_20131223_194021_outLine +BABEL_OP2_301_69885_20140126_142648_inLine +BABEL_OP2_301_69885_20140126_142648_outLine +BABEL_OP2_301_71282_20140115_180924_inLine +BABEL_OP2_301_71282_20140115_180924_outLine +BABEL_OP2_301_71333_20131126_155505_inLine +BABEL_OP2_301_71333_20131126_155505_outLine +BABEL_OP2_301_73622_20131030_201514_inLine +BABEL_OP2_301_73622_20131030_201514_outLine +BABEL_OP2_301_75359_20140127_022948_inLine +BABEL_OP2_301_75359_20140127_022948_outLine +BABEL_OP2_301_75460_20140130_145829_inLine +BABEL_OP2_301_75460_20140130_145829_outLine +BABEL_OP2_301_76218_20131205_183037_inLine +BABEL_OP2_301_76218_20131205_183037_outLine +BABEL_OP2_301_77139_20131112_164236_inLine +BABEL_OP2_301_77139_20131112_164236_outLine +BABEL_OP2_301_78454_20140206_205852_inLine +BABEL_OP2_301_78454_20140206_205852_outLine +BABEL_OP2_301_78630_20131125_133236_inLine +BABEL_OP2_301_78630_20131125_133236_outLine +BABEL_OP2_301_79590_20131204_214240_inLine +BABEL_OP2_301_79590_20131204_214240_outLine +BABEL_OP2_301_80881_20131106_185321_inLine +BABEL_OP2_301_80881_20131106_185321_outLine +BABEL_OP2_301_83775_20131124_022216_inLine +BABEL_OP2_301_83775_20131124_022216_outLine +BABEL_OP2_301_84370_20140204_202527_inLine +BABEL_OP2_301_84370_20140204_202527_outLine +BABEL_OP2_301_85439_20140126_191119_inLine +BABEL_OP2_301_85439_20140126_191119_outLine +BABEL_OP2_301_86748_20140112_204921_inLine +BABEL_OP2_301_86748_20140112_204921_outLine +BABEL_OP2_301_87693_20131204_010154_inLine +BABEL_OP2_301_87693_20131204_010154_outLine +BABEL_OP2_301_88601_20131208_212307_inLine +BABEL_OP2_301_88601_20131208_212307_outLine +BABEL_OP2_301_88686_20131023_165851_inLine +BABEL_OP2_301_88686_20131023_165851_outLine +BABEL_OP2_301_89457_20131206_124818_inLine +BABEL_OP2_301_89457_20131206_124818_outLine +BABEL_OP2_301_90777_20131126_025413_inLine +BABEL_OP2_301_90777_20131126_025413_outLine +BABEL_OP2_301_92060_20140126_194852_inLine +BABEL_OP2_301_92060_20140126_194852_outLine +BABEL_OP2_301_92281_20140214_190838_inLine +BABEL_OP2_301_92281_20140214_190838_outLine +BABEL_OP2_301_92509_20131019_131304_inLine +BABEL_OP2_301_92509_20131019_131304_outLine +BABEL_OP2_301_92698_20131203_135210_inLine +BABEL_OP2_301_92698_20131203_135210_outLine +BABEL_OP2_301_93604_20140125_212930_inLine +BABEL_OP2_301_93604_20140125_212930_outLine +BABEL_OP2_301_94587_20131213_182558_inLine +BABEL_OP2_301_94587_20131213_182558_outLine +BABEL_OP2_301_95598_20131020_194214_inLine +BABEL_OP2_301_95598_20131020_194214_outLine +BABEL_OP2_301_95966_20131205_151956_inLine +BABEL_OP2_301_95966_20131205_151956_outLine +BABEL_OP2_301_96088_20140128_155726_inLine +BABEL_OP2_301_96088_20140128_155726_outLine +BABEL_OP2_301_96808_20140127_174411_inLine +BABEL_OP2_301_96808_20140127_174411_outLine +BABEL_OP2_301_98580_20131204_210023_inLine +BABEL_OP2_301_98580_20131204_210023_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/evalpart1.list b/egs/babel/s5d/conf/lists/301-cebuano/evalpart1.list new file mode 100644 index 00000000000..31455174b8e --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/evalpart1.list @@ -0,0 +1,62 @@ +BABEL_OP2_301_13427_20131226_153605_inLine +BABEL_OP2_301_13427_20131226_153605_outLine +BABEL_OP2_301_18766_20140127_140851_inLine +BABEL_OP2_301_18766_20140127_140851_outLine +BABEL_OP2_301_19832_20140214_144414_inLine +BABEL_OP2_301_19832_20140214_144414_outLine +BABEL_OP2_301_23628_20131121_202709_inLine +BABEL_OP2_301_23628_20131121_202709_outLine +BABEL_OP2_301_26074_20140214_150738_inLine +BABEL_OP2_301_26074_20140214_150738_outLine +BABEL_OP2_301_28585_20140103_174051_inLine +BABEL_OP2_301_28585_20140103_174051_outLine +BABEL_OP2_301_33992_20140128_153304_inLine +BABEL_OP2_301_33992_20140128_153304_outLine +BABEL_OP2_301_42600_20131125_184712_inLine +BABEL_OP2_301_42600_20131125_184712_outLine +BABEL_OP2_301_42600_20131125_185254_inLine +BABEL_OP2_301_42600_20131125_185254_outLine +BABEL_OP2_301_60508_20131023_221321_inLine +BABEL_OP2_301_60508_20131023_221321_outLine +BABEL_OP2_301_61357_20140205_203135_inLine +BABEL_OP2_301_61357_20140205_203135_outLine +BABEL_OP2_301_62434_20131027_204412_inLine +BABEL_OP2_301_62434_20131027_204412_outLine +BABEL_OP2_301_62835_20131205_201607_inLine +BABEL_OP2_301_62835_20131205_201607_outLine +BABEL_OP2_301_62852_20131112_145306_inLine +BABEL_OP2_301_62852_20131112_145306_outLine +BABEL_OP2_301_63481_20131018_205953_inLine +BABEL_OP2_301_63481_20131018_205953_outLine +BABEL_OP2_301_63523_20140127_032850_inLine +BABEL_OP2_301_63523_20140127_032850_outLine +BABEL_OP2_301_71282_20140115_180924_inLine +BABEL_OP2_301_71282_20140115_180924_outLine +BABEL_OP2_301_71333_20131126_155505_inLine +BABEL_OP2_301_71333_20131126_155505_outLine +BABEL_OP2_301_75359_20140127_022948_inLine +BABEL_OP2_301_75359_20140127_022948_outLine +BABEL_OP2_301_75460_20140130_145829_inLine +BABEL_OP2_301_75460_20140130_145829_outLine +BABEL_OP2_301_78630_20131125_133236_inLine +BABEL_OP2_301_78630_20131125_133236_outLine +BABEL_OP2_301_83775_20131124_022216_inLine +BABEL_OP2_301_83775_20131124_022216_outLine +BABEL_OP2_301_86748_20140112_204921_inLine +BABEL_OP2_301_86748_20140112_204921_outLine +BABEL_OP2_301_88601_20131208_212307_inLine +BABEL_OP2_301_88601_20131208_212307_outLine +BABEL_OP2_301_92060_20140126_194852_inLine +BABEL_OP2_301_92060_20140126_194852_outLine +BABEL_OP2_301_92281_20140214_190838_inLine +BABEL_OP2_301_92281_20140214_190838_outLine +BABEL_OP2_301_93604_20140125_212930_inLine +BABEL_OP2_301_93604_20140125_212930_outLine +BABEL_OP2_301_94587_20131213_182558_inLine +BABEL_OP2_301_94587_20131213_182558_outLine +BABEL_OP2_301_95966_20131205_151956_inLine +BABEL_OP2_301_95966_20131205_151956_outLine +BABEL_OP2_301_96808_20140127_174411_inLine +BABEL_OP2_301_96808_20140127_174411_outLine +BABEL_OP2_301_98580_20131204_210023_inLine +BABEL_OP2_301_98580_20131204_210023_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/sub-train.list b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.list new file mode 100644 index 00000000000..8347770b847 --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.list @@ -0,0 +1,126 @@ +BABEL_OP2_301_10482_20131213_185259_inLine +BABEL_OP2_301_10482_20131213_185259_outLine +BABEL_OP2_301_11681_20131121_134611_inLine +BABEL_OP2_301_11681_20131121_134611_outLine +BABEL_OP2_301_12220_20131205_210711_inLine +BABEL_OP2_301_12220_20131205_210711_outLine +BABEL_OP2_301_14229_20131129_210206_inLine +BABEL_OP2_301_14229_20131129_210206_outLine +BABEL_OP2_301_14807_20140214_134654_inLine +BABEL_OP2_301_14807_20140214_134654_outLine +BABEL_OP2_301_15163_20131203_221053_inLine +BABEL_OP2_301_15163_20131203_221053_outLine +BABEL_OP2_301_17113_20140202_140244_inLine +BABEL_OP2_301_17113_20140202_140244_outLine +BABEL_OP2_301_18380_20131208_205543_inLine +BABEL_OP2_301_18380_20131208_205543_outLine +BABEL_OP2_301_20437_20140223_171247_inLine +BABEL_OP2_301_20437_20140223_171247_outLine +BABEL_OP2_301_22216_20131024_101416_inLine +BABEL_OP2_301_22216_20131024_101416_outLine +BABEL_OP2_301_28595_20140214_164503_inLine +BABEL_OP2_301_28595_20140214_164503_outLine +BABEL_OP2_301_28945_20131123_183004_inLine +BABEL_OP2_301_28945_20131123_183004_outLine +BABEL_OP2_301_32708_20131122_134009_inLine +BABEL_OP2_301_32708_20131122_134009_outLine +BABEL_OP2_301_32708_20131122_134900_inLine +BABEL_OP2_301_32708_20131122_134900_outLine +BABEL_OP2_301_33175_20131019_231650_inLine +BABEL_OP2_301_33175_20131019_231650_outLine +BABEL_OP2_301_33216_20140131_183344_inLine +BABEL_OP2_301_33216_20140131_183344_outLine +BABEL_OP2_301_33355_20131021_130538_inLine +BABEL_OP2_301_33355_20131021_130538_outLine +BABEL_OP2_301_34106_20131020_192105_inLine +BABEL_OP2_301_34106_20131020_192105_outLine +BABEL_OP2_301_34811_20131204_195646_inLine +BABEL_OP2_301_34811_20131204_195646_outLine +BABEL_OP2_301_37228_20140109_190716_inLine +BABEL_OP2_301_37228_20140109_190716_outLine +BABEL_OP2_301_38554_20131024_134203_inLine +BABEL_OP2_301_38554_20131024_134203_outLine +BABEL_OP2_301_39680_20140115_193747_inLine +BABEL_OP2_301_39680_20140115_193747_outLine +BABEL_OP2_301_41680_20131016_202751_inLine +BABEL_OP2_301_41680_20131016_202751_outLine +BABEL_OP2_301_43388_20131203_204504_inLine +BABEL_OP2_301_43388_20131203_204504_outLine +BABEL_OP2_301_45559_20140127_145550_inLine +BABEL_OP2_301_45559_20140127_145550_outLine +BABEL_OP2_301_45560_20131104_171401_inLine +BABEL_OP2_301_45560_20131104_171401_outLine +BABEL_OP2_301_46066_20140110_170456_inLine +BABEL_OP2_301_46066_20140110_170456_outLine +BABEL_OP2_301_46268_20131021_142020_inLine +BABEL_OP2_301_46268_20131021_142020_outLine +BABEL_OP2_301_50810_20131025_174542_inLine +BABEL_OP2_301_50810_20131025_174542_outLine +BABEL_OP2_301_52265_20140216_163445_inLine +BABEL_OP2_301_52265_20140216_163445_outLine +BABEL_OP2_301_54162_20131210_170602_inLine +BABEL_OP2_301_54162_20131210_170602_outLine +BABEL_OP2_301_54953_20131127_005926_inLine +BABEL_OP2_301_54953_20131127_005926_outLine +BABEL_OP2_301_55818_20131110_111534_inLine +BABEL_OP2_301_55818_20131110_111534_outLine +BABEL_OP2_301_55818_20131110_121457_inLine +BABEL_OP2_301_55818_20131110_121457_outLine +BABEL_OP2_301_56306_20140108_175350_inLine +BABEL_OP2_301_56306_20140108_175350_outLine +BABEL_OP2_301_64902_20140123_130547_inLine +BABEL_OP2_301_64902_20140123_130547_outLine +BABEL_OP2_301_65298_20140115_174724_inLine +BABEL_OP2_301_65298_20140115_174724_outLine +BABEL_OP2_301_67213_20140220_182122_inLine +BABEL_OP2_301_67213_20140220_182122_outLine +BABEL_OP2_301_67622_20131023_150210_inLine +BABEL_OP2_301_67622_20131023_150210_outLine +BABEL_OP2_301_68924_20131210_145459_inLine +BABEL_OP2_301_68924_20131210_145459_outLine +BABEL_OP2_301_69746_20140108_182845_inLine +BABEL_OP2_301_69746_20140108_182845_outLine +BABEL_OP2_301_71263_20140205_210654_inLine +BABEL_OP2_301_71263_20140205_210654_outLine +BABEL_OP2_301_72733_20140126_155036_inLine +BABEL_OP2_301_72733_20140126_155036_outLine +BABEL_OP2_301_73042_20131114_135827_inLine +BABEL_OP2_301_73042_20131114_135827_outLine +BABEL_OP2_301_73591_20131016_200144_inLine +BABEL_OP2_301_73591_20131016_200144_outLine +BABEL_OP2_301_73591_20131016_201810_inLine +BABEL_OP2_301_73591_20131016_201810_outLine +BABEL_OP2_301_75869_20140122_141000_inLine +BABEL_OP2_301_75869_20140122_141000_outLine +BABEL_OP2_301_78482_20131227_163840_inLine +BABEL_OP2_301_78482_20131227_163840_outLine +BABEL_OP2_301_81810_20131214_030628_inLine +BABEL_OP2_301_81810_20131214_030628_outLine +BABEL_OP2_301_81854_20140127_151841_inLine +BABEL_OP2_301_81854_20140127_151841_outLine +BABEL_OP2_301_84547_20131025_143053_inLine +BABEL_OP2_301_84547_20131025_143053_outLine +BABEL_OP2_301_85248_20140115_144605_inLine +BABEL_OP2_301_85248_20140115_144605_outLine +BABEL_OP2_301_87545_20140125_194128_inLine +BABEL_OP2_301_87545_20140125_194128_outLine +BABEL_OP2_301_91372_20140126_145526_inLine +BABEL_OP2_301_91372_20140126_145526_outLine +BABEL_OP2_301_91463_20140206_144651_inLine +BABEL_OP2_301_91463_20140206_144651_outLine +BABEL_OP2_301_91884_20140118_220510_inLine +BABEL_OP2_301_91884_20140118_220510_outLine +BABEL_OP2_301_93475_20131119_183619_inLine +BABEL_OP2_301_93475_20131119_183619_outLine +BABEL_OP2_301_93515_20140125_212344_inLine +BABEL_OP2_301_93515_20140125_212344_outLine +BABEL_OP2_301_94409_20131204_145545_inLine +BABEL_OP2_301_94409_20131204_145545_outLine +BABEL_OP2_301_95399_20131206_150920_inLine +BABEL_OP2_301_95399_20131206_150920_outLine +BABEL_OP2_301_96190_20131122_024403_inLine +BABEL_OP2_301_96190_20131122_024403_outLine +BABEL_OP2_301_99202_20131226_202006_inLine +BABEL_OP2_301_99202_20131226_202006_outLine +BABEL_OP2_301_99955_20140110_162703_inLine +BABEL_OP2_301_99955_20140110_162703_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.untranscribed.list new file mode 100644 index 00000000000..690d88bbe06 --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/sub-train.untranscribed.list @@ -0,0 +1,376 @@ +BABEL_OP2_301_10647_20140122_182555_inLine +BABEL_OP2_301_10647_20140122_182555_outLine +BABEL_OP2_301_11581_20140214_131627_inLine +BABEL_OP2_301_11581_20140214_131627_outLine +BABEL_OP2_301_11673_20131025_130227_inLine +BABEL_OP2_301_11673_20131025_130227_outLine +BABEL_OP2_301_12846_20140130_151205_inLine +BABEL_OP2_301_12846_20140130_151205_outLine +BABEL_OP2_301_13184_20140106_154520_inLine +BABEL_OP2_301_13184_20140106_154520_outLine +BABEL_OP2_301_13776_20140129_150321_inLine +BABEL_OP2_301_13776_20140129_150321_outLine +BABEL_OP2_301_14137_20131129_173610_inLine +BABEL_OP2_301_14137_20131129_173610_outLine +BABEL_OP2_301_14729_20140120_213036_inLine +BABEL_OP2_301_14729_20140120_213036_outLine +BABEL_OP2_301_14929_20131204_221803_inLine +BABEL_OP2_301_14929_20131204_221803_outLine +BABEL_OP2_301_15466_20140220_163311_inLine +BABEL_OP2_301_15466_20140220_163311_outLine +BABEL_OP2_301_15617_20140216_200848_inLine +BABEL_OP2_301_15617_20140216_200848_outLine +BABEL_OP2_301_15730_20131103_191134_inLine +BABEL_OP2_301_15730_20131103_191134_outLine +BABEL_OP2_301_16149_20131122_182440_inLine +BABEL_OP2_301_16149_20131122_182440_outLine +BABEL_OP2_301_16749_20140109_145017_inLine +BABEL_OP2_301_16749_20140109_145017_outLine +BABEL_OP2_301_18118_20140214_195210_inLine +BABEL_OP2_301_18118_20140214_195210_outLine +BABEL_OP2_301_19589_20140126_182029_inLine +BABEL_OP2_301_19589_20140126_182029_outLine +BABEL_OP2_301_20133_20131017_002355_inLine +BABEL_OP2_301_20133_20131017_002355_outLine +BABEL_OP2_301_20922_20140103_201925_inLine +BABEL_OP2_301_20922_20140103_201925_outLine +BABEL_OP2_301_21206_20131113_200040_inLine +BABEL_OP2_301_21206_20131113_200040_outLine +BABEL_OP2_301_21435_20140123_135222_inLine +BABEL_OP2_301_21435_20140123_135222_outLine +BABEL_OP2_301_22321_20131101_141744_inLine +BABEL_OP2_301_22321_20131101_141744_outLine +BABEL_OP2_301_22494_20131210_134219_inLine +BABEL_OP2_301_22494_20131210_134219_outLine +BABEL_OP2_301_23893_20140214_163728_inLine +BABEL_OP2_301_23893_20140214_163728_outLine +BABEL_OP2_301_24239_20140126_152805_inLine +BABEL_OP2_301_24239_20140126_152805_outLine +BABEL_OP2_301_24679_20131023_181905_inLine +BABEL_OP2_301_24679_20131023_181905_outLine +BABEL_OP2_301_24982_20131125_001842_inLine +BABEL_OP2_301_24982_20131125_001842_outLine +BABEL_OP2_301_25719_20140104_183539_inLine +BABEL_OP2_301_25719_20140104_183539_outLine +BABEL_OP2_301_25767_20131112_191047_inLine +BABEL_OP2_301_25767_20131112_191047_outLine +BABEL_OP2_301_26072_20140110_183220_inLine +BABEL_OP2_301_26072_20140110_183220_outLine +BABEL_OP2_301_28012_20140103_194242_inLine +BABEL_OP2_301_28012_20140103_194242_outLine +BABEL_OP2_301_28303_20131125_142043_inLine +BABEL_OP2_301_28303_20131125_142043_outLine +BABEL_OP2_301_28600_20131220_191009_inLine +BABEL_OP2_301_28600_20131220_191009_outLine +BABEL_OP2_301_28814_20140109_152108_inLine +BABEL_OP2_301_28814_20140109_152108_outLine +BABEL_OP2_301_29023_20131123_171357_inLine +BABEL_OP2_301_29023_20131123_171357_outLine +BABEL_OP2_301_29023_20131123_173406_inLine +BABEL_OP2_301_29023_20131123_173406_outLine +BABEL_OP2_301_29168_20131018_141724_inLine +BABEL_OP2_301_29168_20131018_141724_outLine +BABEL_OP2_301_29323_20140112_184600_inLine +BABEL_OP2_301_29323_20140112_184600_outLine +BABEL_OP2_301_29404_20140123_123004_inLine +BABEL_OP2_301_29404_20140123_123004_outLine +BABEL_OP2_301_30645_20131116_183545_inLine +BABEL_OP2_301_30645_20131116_183545_outLine +BABEL_OP2_301_31490_20131130_190602_inLine +BABEL_OP2_301_31490_20131130_190602_outLine +BABEL_OP2_301_32630_20140127_181615_inLine +BABEL_OP2_301_32630_20140127_181615_outLine +BABEL_OP2_301_32998_20140206_212018_inLine +BABEL_OP2_301_32998_20140206_212018_outLine +BABEL_OP2_301_33672_20131112_192407_inLine +BABEL_OP2_301_33672_20131112_192407_outLine +BABEL_OP2_301_33672_20131112_194343_inLine +BABEL_OP2_301_33672_20131112_194343_outLine +BABEL_OP2_301_33806_20140204_204611_inLine +BABEL_OP2_301_33806_20140204_204611_outLine +BABEL_OP2_301_34019_20140220_175645_inLine +BABEL_OP2_301_34019_20140220_175645_outLine +BABEL_OP2_301_34629_20140224_174043_inLine +BABEL_OP2_301_34629_20140224_174043_outLine +BABEL_OP2_301_34860_20140224_170732_inLine +BABEL_OP2_301_34860_20140224_170732_outLine +BABEL_OP2_301_36039_20140121_002746_inLine +BABEL_OP2_301_36039_20140121_002746_outLine +BABEL_OP2_301_36669_20131208_191649_inLine +BABEL_OP2_301_36669_20131208_191649_outLine +BABEL_OP2_301_37598_20140206_190701_inLine +BABEL_OP2_301_37598_20140206_190701_outLine +BABEL_OP2_301_37682_20131128_161814_inLine +BABEL_OP2_301_37682_20131128_161814_outLine +BABEL_OP2_301_38323_20140205_184350_inLine +BABEL_OP2_301_38323_20140205_184350_outLine +BABEL_OP2_301_39006_20140204_195257_inLine +BABEL_OP2_301_39006_20140204_195257_outLine +BABEL_OP2_301_39099_20140127_003852_inLine +BABEL_OP2_301_39099_20140127_003852_outLine +BABEL_OP2_301_39307_20131024_204807_inLine +BABEL_OP2_301_39307_20131024_204807_outLine +BABEL_OP2_301_39638_20140224_155231_inLine +BABEL_OP2_301_39638_20140224_155231_outLine +BABEL_OP2_301_39848_20131204_163640_inLine +BABEL_OP2_301_39848_20131204_163640_outLine +BABEL_OP2_301_41469_20131123_114935_inLine +BABEL_OP2_301_41469_20131123_114935_outLine +BABEL_OP2_301_41469_20131123_115625_inLine +BABEL_OP2_301_41469_20131123_115625_outLine +BABEL_OP2_301_41685_20140223_155438_inLine +BABEL_OP2_301_41685_20140223_155438_outLine +BABEL_OP2_301_42029_20140115_163832_inLine +BABEL_OP2_301_42029_20140115_163832_outLine +BABEL_OP2_301_42434_20131127_190632_inLine +BABEL_OP2_301_42434_20131127_190632_outLine +BABEL_OP2_301_43115_20140125_145846_inLine +BABEL_OP2_301_43115_20140125_145846_outLine +BABEL_OP2_301_43323_20140223_191949_inLine +BABEL_OP2_301_43323_20140223_191949_outLine +BABEL_OP2_301_44347_20140102_122651_inLine +BABEL_OP2_301_44347_20140102_122651_outLine +BABEL_OP2_301_44619_20131122_014112_inLine +BABEL_OP2_301_44619_20131122_014112_outLine +BABEL_OP2_301_45121_20140127_190059_inLine +BABEL_OP2_301_45121_20140127_190059_outLine +BABEL_OP2_301_45851_20140127_224015_inLine +BABEL_OP2_301_45851_20140127_224015_outLine +BABEL_OP2_301_46310_20131104_001007_inLine +BABEL_OP2_301_46310_20131104_001007_outLine +BABEL_OP2_301_49001_20131126_004357_inLine +BABEL_OP2_301_49001_20131126_004357_outLine +BABEL_OP2_301_49216_20131020_181355_inLine +BABEL_OP2_301_49216_20131020_181355_outLine +BABEL_OP2_301_49945_20140127_184032_inLine +BABEL_OP2_301_49945_20140127_184032_outLine +BABEL_OP2_301_50175_20131019_212339_inLine +BABEL_OP2_301_50175_20131019_212339_outLine +BABEL_OP2_301_51484_20131220_211835_inLine +BABEL_OP2_301_51484_20131220_211835_outLine +BABEL_OP2_301_51540_20140106_172711_inLine +BABEL_OP2_301_51540_20140106_172711_outLine +BABEL_OP2_301_51701_20140205_193018_inLine +BABEL_OP2_301_51701_20140205_193018_outLine +BABEL_OP2_301_51968_20131204_190129_inLine +BABEL_OP2_301_51968_20131204_190129_outLine +BABEL_OP2_301_52272_20131030_202958_inLine +BABEL_OP2_301_52272_20131030_202958_outLine +BABEL_OP2_301_52381_20140109_155159_inLine +BABEL_OP2_301_52381_20140109_155159_outLine +BABEL_OP2_301_52404_20131211_192143_inLine +BABEL_OP2_301_52404_20131211_192143_outLine +BABEL_OP2_301_52804_20131122_192606_inLine +BABEL_OP2_301_52804_20131122_192606_outLine +BABEL_OP2_301_53842_20131205_212824_inLine +BABEL_OP2_301_53842_20131205_212824_outLine +BABEL_OP2_301_53842_20131205_214030_inLine +BABEL_OP2_301_53842_20131205_214030_outLine +BABEL_OP2_301_54074_20131204_200954_inLine +BABEL_OP2_301_54074_20131204_200954_outLine +BABEL_OP2_301_54530_20131218_184644_inLine +BABEL_OP2_301_54530_20131218_184644_outLine +BABEL_OP2_301_54567_20131205_193927_inLine +BABEL_OP2_301_54567_20131205_193927_outLine +BABEL_OP2_301_54827_20140126_184228_inLine +BABEL_OP2_301_54827_20140126_184228_outLine +BABEL_OP2_301_55106_20140119_161343_inLine +BABEL_OP2_301_55106_20140119_161343_outLine +BABEL_OP2_301_55349_20140121_152059_inLine +BABEL_OP2_301_55349_20140121_152059_outLine +BABEL_OP2_301_55381_20140103_163729_inLine +BABEL_OP2_301_55381_20140103_163729_outLine +BABEL_OP2_301_57116_20131129_012420_inLine +BABEL_OP2_301_57116_20131129_012420_outLine +BABEL_OP2_301_57233_20140224_172256_inLine +BABEL_OP2_301_57233_20140224_172256_outLine +BABEL_OP2_301_57542_20140122_150942_inLine +BABEL_OP2_301_57542_20140122_150942_outLine +BABEL_OP2_301_57566_20140106_150720_inLine +BABEL_OP2_301_57566_20140106_150720_outLine +BABEL_OP2_301_58006_20140122_203731_inLine +BABEL_OP2_301_58006_20140122_203731_outLine +BABEL_OP2_301_58313_20140207_172512_inLine +BABEL_OP2_301_58313_20140207_172512_outLine +BABEL_OP2_301_58926_20131124_131005_inLine +BABEL_OP2_301_58926_20131124_131005_outLine +BABEL_OP2_301_59039_20140220_172820_inLine +BABEL_OP2_301_59039_20140220_172820_outLine +BABEL_OP2_301_59078_20140206_221105_inLine +BABEL_OP2_301_59078_20140206_221105_outLine +BABEL_OP2_301_59549_20131115_144344_inLine +BABEL_OP2_301_59549_20131115_144344_outLine +BABEL_OP2_301_59549_20131115_145934_inLine +BABEL_OP2_301_59549_20131115_145934_outLine +BABEL_OP2_301_59928_20131208_181057_inLine +BABEL_OP2_301_59928_20131208_181057_outLine +BABEL_OP2_301_60436_20140126_184303_inLine +BABEL_OP2_301_60436_20140126_184303_outLine +BABEL_OP2_301_60458_20140127_174755_inLine +BABEL_OP2_301_60458_20140127_174755_outLine +BABEL_OP2_301_60474_20131125_202818_inLine +BABEL_OP2_301_60474_20131125_202818_outLine +BABEL_OP2_301_60477_20140131_142240_inLine +BABEL_OP2_301_60477_20140131_142240_outLine +BABEL_OP2_301_60498_20140128_144917_inLine +BABEL_OP2_301_60498_20140128_144917_outLine +BABEL_OP2_301_60626_20131123_194530_inLine +BABEL_OP2_301_60626_20131123_194530_outLine +BABEL_OP2_301_61440_20140129_162338_inLine +BABEL_OP2_301_61440_20140129_162338_outLine +BABEL_OP2_301_62047_20131223_201629_inLine +BABEL_OP2_301_62047_20131223_201629_outLine +BABEL_OP2_301_62734_20131127_125913_inLine +BABEL_OP2_301_62734_20131127_125913_outLine +BABEL_OP2_301_62800_20131023_133254_inLine +BABEL_OP2_301_62800_20131023_133254_outLine +BABEL_OP2_301_63787_20131112_234133_inLine +BABEL_OP2_301_63787_20131112_234133_outLine +BABEL_OP2_301_63906_20140122_195218_inLine +BABEL_OP2_301_63906_20140122_195218_outLine +BABEL_OP2_301_64768_20131129_183309_inLine +BABEL_OP2_301_64768_20131129_183309_outLine +BABEL_OP2_301_65466_20140122_211719_inLine +BABEL_OP2_301_65466_20140122_211719_outLine +BABEL_OP2_301_66045_20131203_142944_inLine +BABEL_OP2_301_66045_20131203_142944_outLine +BABEL_OP2_301_66361_20140223_153258_inLine +BABEL_OP2_301_66361_20140223_153258_outLine +BABEL_OP2_301_66916_20131023_223807_inLine +BABEL_OP2_301_66916_20131023_223807_outLine +BABEL_OP2_301_67152_20140119_212917_inLine +BABEL_OP2_301_67152_20140119_212917_outLine +BABEL_OP2_301_68182_20140115_183030_inLine +BABEL_OP2_301_68182_20140115_183030_outLine +BABEL_OP2_301_69096_20140128_171512_inLine +BABEL_OP2_301_69096_20140128_171512_outLine +BABEL_OP2_301_69937_20140131_181058_inLine +BABEL_OP2_301_69937_20140131_181058_outLine +BABEL_OP2_301_69992_20131110_135349_inLine +BABEL_OP2_301_69992_20131110_135349_outLine +BABEL_OP2_301_70386_20140102_173141_inLine +BABEL_OP2_301_70386_20140102_173141_outLine +BABEL_OP2_301_71121_20140223_161906_inLine +BABEL_OP2_301_71121_20140223_161906_outLine +BABEL_OP2_301_72844_20131023_180119_inLine +BABEL_OP2_301_72844_20131023_180119_outLine +BABEL_OP2_301_73005_20140126_193903_inLine +BABEL_OP2_301_73005_20140126_193903_outLine +BABEL_OP2_301_73258_20131203_200331_inLine +BABEL_OP2_301_73258_20131203_200331_outLine +BABEL_OP2_301_73485_20140128_210522_inLine +BABEL_OP2_301_73485_20140128_210522_outLine +BABEL_OP2_301_73549_20140131_160208_inLine +BABEL_OP2_301_73549_20140131_160208_outLine +BABEL_OP2_301_73964_20140214_161434_inLine +BABEL_OP2_301_73964_20140214_161434_outLine +BABEL_OP2_301_74886_20131102_122938_inLine +BABEL_OP2_301_74886_20131102_122938_outLine +BABEL_OP2_301_75261_20131226_160602_inLine +BABEL_OP2_301_75261_20131226_160602_outLine +BABEL_OP2_301_75981_20140127_143431_inLine +BABEL_OP2_301_75981_20140127_143431_outLine +BABEL_OP2_301_76155_20131203_185301_inLine +BABEL_OP2_301_76155_20131203_185301_outLine +BABEL_OP2_301_77146_20131023_185146_inLine +BABEL_OP2_301_77146_20131023_185146_outLine +BABEL_OP2_301_77427_20131124_013134_inLine +BABEL_OP2_301_77427_20131124_013134_outLine +BABEL_OP2_301_77427_20131124_014748_inLine +BABEL_OP2_301_77427_20131124_014748_outLine +BABEL_OP2_301_77744_20131117_154739_inLine +BABEL_OP2_301_77744_20131117_154739_outLine +BABEL_OP2_301_78543_20140131_010053_inLine +BABEL_OP2_301_78543_20140131_010053_outLine +BABEL_OP2_301_78743_20131220_201406_inLine +BABEL_OP2_301_78743_20131220_201406_outLine +BABEL_OP2_301_78943_20131120_175430_inLine +BABEL_OP2_301_78943_20131120_175430_outLine +BABEL_OP2_301_79451_20131125_114859_inLine +BABEL_OP2_301_79451_20131125_114859_outLine +BABEL_OP2_301_81622_20131204_193304_inLine +BABEL_OP2_301_81622_20131204_193304_outLine +BABEL_OP2_301_82089_20131208_202028_inLine +BABEL_OP2_301_82089_20131208_202028_outLine +BABEL_OP2_301_82425_20131113_010203_inLine +BABEL_OP2_301_82425_20131113_010203_outLine +BABEL_OP2_301_82626_20140131_233635_inLine +BABEL_OP2_301_82626_20140131_233635_outLine +BABEL_OP2_301_83436_20131116_194233_inLine +BABEL_OP2_301_83436_20131116_194233_outLine +BABEL_OP2_301_83455_20131129_211537_inLine +BABEL_OP2_301_83455_20131129_211537_outLine +BABEL_OP2_301_83455_20131129_212747_inLine +BABEL_OP2_301_83455_20131129_212747_outLine +BABEL_OP2_301_83625_20140224_161632_inLine +BABEL_OP2_301_83625_20140224_161632_outLine +BABEL_OP2_301_84458_20131216_193109_inLine +BABEL_OP2_301_84458_20131216_193109_outLine +BABEL_OP2_301_85322_20131112_183356_inLine +BABEL_OP2_301_85322_20131112_183356_outLine +BABEL_OP2_301_85519_20140103_170652_inLine +BABEL_OP2_301_85519_20140103_170652_outLine +BABEL_OP2_301_86156_20140122_185516_inLine +BABEL_OP2_301_86156_20140122_185516_outLine +BABEL_OP2_301_87470_20131128_003454_inLine +BABEL_OP2_301_87470_20131128_003454_outLine +BABEL_OP2_301_88812_20140126_203311_inLine +BABEL_OP2_301_88812_20140126_203311_outLine +BABEL_OP2_301_88925_20131220_151054_inLine +BABEL_OP2_301_88925_20131220_151054_outLine +BABEL_OP2_301_88938_20140104_195418_inLine +BABEL_OP2_301_88938_20140104_195418_outLine +BABEL_OP2_301_89059_20140109_141228_inLine +BABEL_OP2_301_89059_20140109_141228_outLine +BABEL_OP2_301_89358_20131209_174055_inLine +BABEL_OP2_301_89358_20131209_174055_outLine +BABEL_OP2_301_89665_20131206_143535_inLine +BABEL_OP2_301_89665_20131206_143535_outLine +BABEL_OP2_301_89695_20131203_225429_inLine +BABEL_OP2_301_89695_20131203_225429_outLine +BABEL_OP2_301_89877_20140205_200816_inLine +BABEL_OP2_301_89877_20140205_200816_outLine +BABEL_OP2_301_90709_20131109_170505_inLine +BABEL_OP2_301_90709_20131109_170505_outLine +BABEL_OP2_301_90737_20131206_160650_inLine +BABEL_OP2_301_90737_20131206_160650_outLine +BABEL_OP2_301_91478_20140224_170543_inLine +BABEL_OP2_301_91478_20140224_170543_outLine +BABEL_OP2_301_91760_20140127_183930_inLine +BABEL_OP2_301_91760_20140127_183930_outLine +BABEL_OP2_301_91891_20131213_192340_inLine +BABEL_OP2_301_91891_20131213_192340_outLine +BABEL_OP2_301_91944_20131114_123915_inLine +BABEL_OP2_301_91944_20131114_123915_outLine +BABEL_OP2_301_92809_20131124_142340_inLine +BABEL_OP2_301_92809_20131124_142340_outLine +BABEL_OP2_301_92809_20131124_143817_inLine +BABEL_OP2_301_92809_20131124_143817_outLine +BABEL_OP2_301_92942_20140206_180304_inLine +BABEL_OP2_301_92942_20140206_180304_outLine +BABEL_OP2_301_93153_20131114_144733_inLine +BABEL_OP2_301_93153_20131114_144733_outLine +BABEL_OP2_301_93153_20131114_151704_inLine +BABEL_OP2_301_93153_20131114_151704_outLine +BABEL_OP2_301_93964_20131130_172431_inLine +BABEL_OP2_301_93964_20131130_172431_outLine +BABEL_OP2_301_94978_20140119_185149_inLine +BABEL_OP2_301_94978_20140119_185149_outLine +BABEL_OP2_301_95338_20140127_192317_inLine +BABEL_OP2_301_95338_20140127_192317_outLine +BABEL_OP2_301_95583_20131029_002312_inLine +BABEL_OP2_301_95583_20131029_002312_outLine +BABEL_OP2_301_95663_20131025_134113_inLine +BABEL_OP2_301_95663_20131025_134113_outLine +BABEL_OP2_301_95935_20140103_190515_inLine +BABEL_OP2_301_95935_20140103_190515_outLine +BABEL_OP2_301_96324_20131026_023101_inLine +BABEL_OP2_301_96324_20131026_023101_outLine +BABEL_OP2_301_96376_20140126_140015_inLine +BABEL_OP2_301_96376_20140126_140015_outLine +BABEL_OP2_301_96910_20131124_183403_inLine +BABEL_OP2_301_96910_20131124_183403_outLine +BABEL_OP2_301_97136_20140120_235804_inLine +BABEL_OP2_301_97136_20140120_235804_outLine +BABEL_OP2_301_97588_20131025_185012_inLine +BABEL_OP2_301_97588_20131025_185012_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/training.list b/egs/babel/s5d/conf/lists/301-cebuano/training.list new file mode 100644 index 00000000000..e6ea8dcfeff --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/training.list @@ -0,0 +1,502 @@ +BABEL_OP2_301_10482_20131213_185259_inLine +BABEL_OP2_301_10482_20131213_185259_outLine +BABEL_OP2_301_10647_20140122_182555_inLine +BABEL_OP2_301_10647_20140122_182555_outLine +BABEL_OP2_301_11581_20140214_131627_inLine +BABEL_OP2_301_11581_20140214_131627_outLine +BABEL_OP2_301_11673_20131025_130227_inLine +BABEL_OP2_301_11673_20131025_130227_outLine +BABEL_OP2_301_11681_20131121_134611_inLine +BABEL_OP2_301_11681_20131121_134611_outLine +BABEL_OP2_301_12220_20131205_210711_inLine +BABEL_OP2_301_12220_20131205_210711_outLine +BABEL_OP2_301_12846_20140130_151205_inLine +BABEL_OP2_301_12846_20140130_151205_outLine +BABEL_OP2_301_13184_20140106_154520_inLine +BABEL_OP2_301_13184_20140106_154520_outLine +BABEL_OP2_301_13776_20140129_150321_inLine +BABEL_OP2_301_13776_20140129_150321_outLine +BABEL_OP2_301_14137_20131129_173610_inLine +BABEL_OP2_301_14137_20131129_173610_outLine +BABEL_OP2_301_14229_20131129_210206_inLine +BABEL_OP2_301_14229_20131129_210206_outLine +BABEL_OP2_301_14729_20140120_213036_inLine +BABEL_OP2_301_14729_20140120_213036_outLine +BABEL_OP2_301_14807_20140214_134654_inLine +BABEL_OP2_301_14807_20140214_134654_outLine +BABEL_OP2_301_14929_20131204_221803_inLine +BABEL_OP2_301_14929_20131204_221803_outLine +BABEL_OP2_301_15163_20131203_221053_inLine +BABEL_OP2_301_15163_20131203_221053_outLine +BABEL_OP2_301_15466_20140220_163311_inLine +BABEL_OP2_301_15466_20140220_163311_outLine +BABEL_OP2_301_15617_20140216_200848_inLine +BABEL_OP2_301_15617_20140216_200848_outLine +BABEL_OP2_301_15730_20131103_191134_inLine +BABEL_OP2_301_15730_20131103_191134_outLine +BABEL_OP2_301_16149_20131122_182440_inLine +BABEL_OP2_301_16149_20131122_182440_outLine +BABEL_OP2_301_16749_20140109_145017_inLine +BABEL_OP2_301_16749_20140109_145017_outLine +BABEL_OP2_301_17113_20140202_140244_inLine +BABEL_OP2_301_17113_20140202_140244_outLine +BABEL_OP2_301_18118_20140214_195210_inLine +BABEL_OP2_301_18118_20140214_195210_outLine +BABEL_OP2_301_18380_20131208_205543_inLine +BABEL_OP2_301_18380_20131208_205543_outLine +BABEL_OP2_301_19589_20140126_182029_inLine +BABEL_OP2_301_19589_20140126_182029_outLine +BABEL_OP2_301_20133_20131017_002355_inLine +BABEL_OP2_301_20133_20131017_002355_outLine +BABEL_OP2_301_20437_20140223_171247_inLine +BABEL_OP2_301_20437_20140223_171247_outLine +BABEL_OP2_301_20922_20140103_201925_inLine +BABEL_OP2_301_20922_20140103_201925_outLine +BABEL_OP2_301_21206_20131113_200040_inLine +BABEL_OP2_301_21206_20131113_200040_outLine +BABEL_OP2_301_21435_20140123_135222_inLine +BABEL_OP2_301_21435_20140123_135222_outLine +BABEL_OP2_301_22216_20131024_101416_inLine +BABEL_OP2_301_22216_20131024_101416_outLine +BABEL_OP2_301_22321_20131101_141744_inLine +BABEL_OP2_301_22321_20131101_141744_outLine +BABEL_OP2_301_22494_20131210_134219_inLine +BABEL_OP2_301_22494_20131210_134219_outLine +BABEL_OP2_301_23893_20140214_163728_inLine +BABEL_OP2_301_23893_20140214_163728_outLine +BABEL_OP2_301_24239_20140126_152805_inLine +BABEL_OP2_301_24239_20140126_152805_outLine +BABEL_OP2_301_24679_20131023_181905_inLine +BABEL_OP2_301_24679_20131023_181905_outLine +BABEL_OP2_301_24982_20131125_001842_inLine +BABEL_OP2_301_24982_20131125_001842_outLine +BABEL_OP2_301_25719_20140104_183539_inLine +BABEL_OP2_301_25719_20140104_183539_outLine +BABEL_OP2_301_25767_20131112_191047_inLine +BABEL_OP2_301_25767_20131112_191047_outLine +BABEL_OP2_301_26072_20140110_183220_inLine +BABEL_OP2_301_26072_20140110_183220_outLine +BABEL_OP2_301_28012_20140103_194242_inLine +BABEL_OP2_301_28012_20140103_194242_outLine +BABEL_OP2_301_28303_20131125_142043_inLine +BABEL_OP2_301_28303_20131125_142043_outLine +BABEL_OP2_301_28595_20140214_164503_inLine +BABEL_OP2_301_28595_20140214_164503_outLine +BABEL_OP2_301_28600_20131220_191009_inLine +BABEL_OP2_301_28600_20131220_191009_outLine +BABEL_OP2_301_28814_20140109_152108_inLine +BABEL_OP2_301_28814_20140109_152108_outLine +BABEL_OP2_301_28945_20131123_183004_inLine +BABEL_OP2_301_28945_20131123_183004_outLine +BABEL_OP2_301_29023_20131123_171357_inLine +BABEL_OP2_301_29023_20131123_171357_outLine +BABEL_OP2_301_29023_20131123_173406_inLine +BABEL_OP2_301_29023_20131123_173406_outLine +BABEL_OP2_301_29168_20131018_141724_inLine +BABEL_OP2_301_29168_20131018_141724_outLine +BABEL_OP2_301_29323_20140112_184600_inLine +BABEL_OP2_301_29323_20140112_184600_outLine +BABEL_OP2_301_29404_20140123_123004_inLine +BABEL_OP2_301_29404_20140123_123004_outLine +BABEL_OP2_301_30645_20131116_183545_inLine +BABEL_OP2_301_30645_20131116_183545_outLine +BABEL_OP2_301_31490_20131130_190602_inLine +BABEL_OP2_301_31490_20131130_190602_outLine +BABEL_OP2_301_32630_20140127_181615_inLine +BABEL_OP2_301_32630_20140127_181615_outLine +BABEL_OP2_301_32708_20131122_134009_inLine +BABEL_OP2_301_32708_20131122_134009_outLine +BABEL_OP2_301_32708_20131122_134900_inLine +BABEL_OP2_301_32708_20131122_134900_outLine +BABEL_OP2_301_32998_20140206_212018_inLine +BABEL_OP2_301_32998_20140206_212018_outLine +BABEL_OP2_301_33175_20131019_231650_inLine +BABEL_OP2_301_33175_20131019_231650_outLine +BABEL_OP2_301_33216_20140131_183344_inLine +BABEL_OP2_301_33216_20140131_183344_outLine +BABEL_OP2_301_33355_20131021_130538_inLine +BABEL_OP2_301_33355_20131021_130538_outLine +BABEL_OP2_301_33672_20131112_192407_inLine +BABEL_OP2_301_33672_20131112_192407_outLine +BABEL_OP2_301_33672_20131112_194343_inLine +BABEL_OP2_301_33672_20131112_194343_outLine +BABEL_OP2_301_33806_20140204_204611_inLine +BABEL_OP2_301_33806_20140204_204611_outLine +BABEL_OP2_301_34019_20140220_175645_inLine +BABEL_OP2_301_34019_20140220_175645_outLine +BABEL_OP2_301_34106_20131020_192105_inLine +BABEL_OP2_301_34106_20131020_192105_outLine +BABEL_OP2_301_34629_20140224_174043_inLine +BABEL_OP2_301_34629_20140224_174043_outLine +BABEL_OP2_301_34811_20131204_195646_inLine +BABEL_OP2_301_34811_20131204_195646_outLine +BABEL_OP2_301_34860_20140224_170732_inLine +BABEL_OP2_301_34860_20140224_170732_outLine +BABEL_OP2_301_36039_20140121_002746_inLine +BABEL_OP2_301_36039_20140121_002746_outLine +BABEL_OP2_301_36669_20131208_191649_inLine +BABEL_OP2_301_36669_20131208_191649_outLine +BABEL_OP2_301_37228_20140109_190716_inLine +BABEL_OP2_301_37228_20140109_190716_outLine +BABEL_OP2_301_37598_20140206_190701_inLine +BABEL_OP2_301_37598_20140206_190701_outLine +BABEL_OP2_301_37682_20131128_161814_inLine +BABEL_OP2_301_37682_20131128_161814_outLine +BABEL_OP2_301_38323_20140205_184350_inLine +BABEL_OP2_301_38323_20140205_184350_outLine +BABEL_OP2_301_38554_20131024_134203_inLine +BABEL_OP2_301_38554_20131024_134203_outLine +BABEL_OP2_301_39006_20140204_195257_inLine +BABEL_OP2_301_39006_20140204_195257_outLine +BABEL_OP2_301_39099_20140127_003852_inLine +BABEL_OP2_301_39099_20140127_003852_outLine +BABEL_OP2_301_39307_20131024_204807_inLine +BABEL_OP2_301_39307_20131024_204807_outLine +BABEL_OP2_301_39638_20140224_155231_inLine +BABEL_OP2_301_39638_20140224_155231_outLine +BABEL_OP2_301_39680_20140115_193747_inLine +BABEL_OP2_301_39680_20140115_193747_outLine +BABEL_OP2_301_39848_20131204_163640_inLine +BABEL_OP2_301_39848_20131204_163640_outLine +BABEL_OP2_301_41469_20131123_114935_inLine +BABEL_OP2_301_41469_20131123_114935_outLine +BABEL_OP2_301_41469_20131123_115625_inLine +BABEL_OP2_301_41469_20131123_115625_outLine +BABEL_OP2_301_41680_20131016_202751_inLine +BABEL_OP2_301_41680_20131016_202751_outLine +BABEL_OP2_301_41685_20140223_155438_inLine +BABEL_OP2_301_41685_20140223_155438_outLine +BABEL_OP2_301_42029_20140115_163832_inLine +BABEL_OP2_301_42029_20140115_163832_outLine +BABEL_OP2_301_42434_20131127_190632_inLine +BABEL_OP2_301_42434_20131127_190632_outLine +BABEL_OP2_301_43115_20140125_145846_inLine +BABEL_OP2_301_43115_20140125_145846_outLine +BABEL_OP2_301_43323_20140223_191949_inLine +BABEL_OP2_301_43323_20140223_191949_outLine +BABEL_OP2_301_43388_20131203_204504_inLine +BABEL_OP2_301_43388_20131203_204504_outLine +BABEL_OP2_301_44347_20140102_122651_inLine +BABEL_OP2_301_44347_20140102_122651_outLine +BABEL_OP2_301_44619_20131122_014112_inLine +BABEL_OP2_301_44619_20131122_014112_outLine +BABEL_OP2_301_45121_20140127_190059_inLine +BABEL_OP2_301_45121_20140127_190059_outLine +BABEL_OP2_301_45559_20140127_145550_inLine +BABEL_OP2_301_45559_20140127_145550_outLine +BABEL_OP2_301_45560_20131104_171401_inLine +BABEL_OP2_301_45560_20131104_171401_outLine +BABEL_OP2_301_45851_20140127_224015_inLine +BABEL_OP2_301_45851_20140127_224015_outLine +BABEL_OP2_301_46066_20140110_170456_inLine +BABEL_OP2_301_46066_20140110_170456_outLine +BABEL_OP2_301_46268_20131021_142020_inLine +BABEL_OP2_301_46268_20131021_142020_outLine +BABEL_OP2_301_46310_20131104_001007_inLine +BABEL_OP2_301_46310_20131104_001007_outLine +BABEL_OP2_301_49001_20131126_004357_inLine +BABEL_OP2_301_49001_20131126_004357_outLine +BABEL_OP2_301_49216_20131020_181355_inLine +BABEL_OP2_301_49216_20131020_181355_outLine +BABEL_OP2_301_49945_20140127_184032_inLine +BABEL_OP2_301_49945_20140127_184032_outLine +BABEL_OP2_301_50175_20131019_212339_inLine +BABEL_OP2_301_50175_20131019_212339_outLine +BABEL_OP2_301_50810_20131025_174542_inLine +BABEL_OP2_301_50810_20131025_174542_outLine +BABEL_OP2_301_51484_20131220_211835_inLine +BABEL_OP2_301_51484_20131220_211835_outLine +BABEL_OP2_301_51540_20140106_172711_inLine +BABEL_OP2_301_51540_20140106_172711_outLine +BABEL_OP2_301_51701_20140205_193018_inLine +BABEL_OP2_301_51701_20140205_193018_outLine +BABEL_OP2_301_51968_20131204_190129_inLine +BABEL_OP2_301_51968_20131204_190129_outLine +BABEL_OP2_301_52265_20140216_163445_inLine +BABEL_OP2_301_52265_20140216_163445_outLine +BABEL_OP2_301_52272_20131030_202958_inLine +BABEL_OP2_301_52272_20131030_202958_outLine +BABEL_OP2_301_52381_20140109_155159_inLine +BABEL_OP2_301_52381_20140109_155159_outLine +BABEL_OP2_301_52404_20131211_192143_inLine +BABEL_OP2_301_52404_20131211_192143_outLine +BABEL_OP2_301_52804_20131122_192606_inLine +BABEL_OP2_301_52804_20131122_192606_outLine +BABEL_OP2_301_53842_20131205_212824_inLine +BABEL_OP2_301_53842_20131205_212824_outLine +BABEL_OP2_301_53842_20131205_214030_inLine +BABEL_OP2_301_53842_20131205_214030_outLine +BABEL_OP2_301_54074_20131204_200954_inLine +BABEL_OP2_301_54074_20131204_200954_outLine +BABEL_OP2_301_54162_20131210_170602_inLine +BABEL_OP2_301_54162_20131210_170602_outLine +BABEL_OP2_301_54530_20131218_184644_inLine +BABEL_OP2_301_54530_20131218_184644_outLine +BABEL_OP2_301_54567_20131205_193927_inLine +BABEL_OP2_301_54567_20131205_193927_outLine +BABEL_OP2_301_54827_20140126_184228_inLine +BABEL_OP2_301_54827_20140126_184228_outLine +BABEL_OP2_301_54953_20131127_005926_inLine +BABEL_OP2_301_54953_20131127_005926_outLine +BABEL_OP2_301_55106_20140119_161343_inLine +BABEL_OP2_301_55106_20140119_161343_outLine +BABEL_OP2_301_55349_20140121_152059_inLine +BABEL_OP2_301_55349_20140121_152059_outLine +BABEL_OP2_301_55381_20140103_163729_inLine +BABEL_OP2_301_55381_20140103_163729_outLine +BABEL_OP2_301_55818_20131110_111534_inLine +BABEL_OP2_301_55818_20131110_111534_outLine +BABEL_OP2_301_55818_20131110_121457_inLine +BABEL_OP2_301_55818_20131110_121457_outLine +BABEL_OP2_301_56306_20140108_175350_inLine +BABEL_OP2_301_56306_20140108_175350_outLine +BABEL_OP2_301_57116_20131129_012420_inLine +BABEL_OP2_301_57116_20131129_012420_outLine +BABEL_OP2_301_57233_20140224_172256_inLine +BABEL_OP2_301_57233_20140224_172256_outLine +BABEL_OP2_301_57542_20140122_150942_inLine +BABEL_OP2_301_57542_20140122_150942_outLine +BABEL_OP2_301_57566_20140106_150720_inLine +BABEL_OP2_301_57566_20140106_150720_outLine +BABEL_OP2_301_58006_20140122_203731_inLine +BABEL_OP2_301_58006_20140122_203731_outLine +BABEL_OP2_301_58313_20140207_172512_inLine +BABEL_OP2_301_58313_20140207_172512_outLine +BABEL_OP2_301_58926_20131124_131005_inLine +BABEL_OP2_301_58926_20131124_131005_outLine +BABEL_OP2_301_59039_20140220_172820_inLine +BABEL_OP2_301_59039_20140220_172820_outLine +BABEL_OP2_301_59078_20140206_221105_inLine +BABEL_OP2_301_59078_20140206_221105_outLine +BABEL_OP2_301_59549_20131115_144344_inLine +BABEL_OP2_301_59549_20131115_144344_outLine +BABEL_OP2_301_59549_20131115_145934_inLine +BABEL_OP2_301_59549_20131115_145934_outLine +BABEL_OP2_301_59928_20131208_181057_inLine +BABEL_OP2_301_59928_20131208_181057_outLine +BABEL_OP2_301_60436_20140126_184303_inLine +BABEL_OP2_301_60436_20140126_184303_outLine +BABEL_OP2_301_60458_20140127_174755_inLine +BABEL_OP2_301_60458_20140127_174755_outLine +BABEL_OP2_301_60474_20131125_202818_inLine +BABEL_OP2_301_60474_20131125_202818_outLine +BABEL_OP2_301_60477_20140131_142240_inLine +BABEL_OP2_301_60477_20140131_142240_outLine +BABEL_OP2_301_60498_20140128_144917_inLine +BABEL_OP2_301_60498_20140128_144917_outLine +BABEL_OP2_301_60626_20131123_194530_inLine +BABEL_OP2_301_60626_20131123_194530_outLine +BABEL_OP2_301_61440_20140129_162338_inLine +BABEL_OP2_301_61440_20140129_162338_outLine +BABEL_OP2_301_62047_20131223_201629_inLine +BABEL_OP2_301_62047_20131223_201629_outLine +BABEL_OP2_301_62734_20131127_125913_inLine +BABEL_OP2_301_62734_20131127_125913_outLine +BABEL_OP2_301_62800_20131023_133254_inLine +BABEL_OP2_301_62800_20131023_133254_outLine +BABEL_OP2_301_63787_20131112_234133_inLine +BABEL_OP2_301_63787_20131112_234133_outLine +BABEL_OP2_301_63906_20140122_195218_inLine +BABEL_OP2_301_63906_20140122_195218_outLine +BABEL_OP2_301_64768_20131129_183309_inLine +BABEL_OP2_301_64768_20131129_183309_outLine +BABEL_OP2_301_64902_20140123_130547_inLine +BABEL_OP2_301_64902_20140123_130547_outLine +BABEL_OP2_301_65298_20140115_174724_inLine +BABEL_OP2_301_65298_20140115_174724_outLine +BABEL_OP2_301_65466_20140122_211719_inLine +BABEL_OP2_301_65466_20140122_211719_outLine +BABEL_OP2_301_66045_20131203_142944_inLine +BABEL_OP2_301_66045_20131203_142944_outLine +BABEL_OP2_301_66361_20140223_153258_inLine +BABEL_OP2_301_66361_20140223_153258_outLine +BABEL_OP2_301_66916_20131023_223807_inLine +BABEL_OP2_301_66916_20131023_223807_outLine +BABEL_OP2_301_67152_20140119_212917_inLine +BABEL_OP2_301_67152_20140119_212917_outLine +BABEL_OP2_301_67213_20140220_182122_inLine +BABEL_OP2_301_67213_20140220_182122_outLine +BABEL_OP2_301_67622_20131023_150210_inLine +BABEL_OP2_301_67622_20131023_150210_outLine +BABEL_OP2_301_68182_20140115_183030_inLine +BABEL_OP2_301_68182_20140115_183030_outLine +BABEL_OP2_301_68924_20131210_145459_inLine +BABEL_OP2_301_68924_20131210_145459_outLine +BABEL_OP2_301_69096_20140128_171512_inLine +BABEL_OP2_301_69096_20140128_171512_outLine +BABEL_OP2_301_69746_20140108_182845_inLine +BABEL_OP2_301_69746_20140108_182845_outLine +BABEL_OP2_301_69937_20140131_181058_inLine +BABEL_OP2_301_69937_20140131_181058_outLine +BABEL_OP2_301_69992_20131110_135349_inLine +BABEL_OP2_301_69992_20131110_135349_outLine +BABEL_OP2_301_70386_20140102_173141_inLine +BABEL_OP2_301_70386_20140102_173141_outLine +BABEL_OP2_301_71121_20140223_161906_inLine +BABEL_OP2_301_71121_20140223_161906_outLine +BABEL_OP2_301_71263_20140205_210654_inLine +BABEL_OP2_301_71263_20140205_210654_outLine +BABEL_OP2_301_72733_20140126_155036_inLine +BABEL_OP2_301_72733_20140126_155036_outLine +BABEL_OP2_301_72844_20131023_180119_inLine +BABEL_OP2_301_72844_20131023_180119_outLine +BABEL_OP2_301_73005_20140126_193903_inLine +BABEL_OP2_301_73005_20140126_193903_outLine +BABEL_OP2_301_73042_20131114_135827_inLine +BABEL_OP2_301_73042_20131114_135827_outLine +BABEL_OP2_301_73258_20131203_200331_inLine +BABEL_OP2_301_73258_20131203_200331_outLine +BABEL_OP2_301_73485_20140128_210522_inLine +BABEL_OP2_301_73485_20140128_210522_outLine +BABEL_OP2_301_73549_20140131_160208_inLine +BABEL_OP2_301_73549_20140131_160208_outLine +BABEL_OP2_301_73591_20131016_200144_inLine +BABEL_OP2_301_73591_20131016_200144_outLine +BABEL_OP2_301_73591_20131016_201810_inLine +BABEL_OP2_301_73591_20131016_201810_outLine +BABEL_OP2_301_73964_20140214_161434_inLine +BABEL_OP2_301_73964_20140214_161434_outLine +BABEL_OP2_301_74886_20131102_122938_inLine +BABEL_OP2_301_74886_20131102_122938_outLine +BABEL_OP2_301_75261_20131226_160602_inLine +BABEL_OP2_301_75261_20131226_160602_outLine +BABEL_OP2_301_75869_20140122_141000_inLine +BABEL_OP2_301_75869_20140122_141000_outLine +BABEL_OP2_301_75981_20140127_143431_inLine +BABEL_OP2_301_75981_20140127_143431_outLine +BABEL_OP2_301_76155_20131203_185301_inLine +BABEL_OP2_301_76155_20131203_185301_outLine +BABEL_OP2_301_77146_20131023_185146_inLine +BABEL_OP2_301_77146_20131023_185146_outLine +BABEL_OP2_301_77427_20131124_013134_inLine +BABEL_OP2_301_77427_20131124_013134_outLine +BABEL_OP2_301_77427_20131124_014748_inLine +BABEL_OP2_301_77427_20131124_014748_outLine +BABEL_OP2_301_77744_20131117_154739_inLine +BABEL_OP2_301_77744_20131117_154739_outLine +BABEL_OP2_301_78482_20131227_163840_inLine +BABEL_OP2_301_78482_20131227_163840_outLine +BABEL_OP2_301_78543_20140131_010053_inLine +BABEL_OP2_301_78543_20140131_010053_outLine +BABEL_OP2_301_78743_20131220_201406_inLine +BABEL_OP2_301_78743_20131220_201406_outLine +BABEL_OP2_301_78943_20131120_175430_inLine +BABEL_OP2_301_78943_20131120_175430_outLine +BABEL_OP2_301_79451_20131125_114859_inLine +BABEL_OP2_301_79451_20131125_114859_outLine +BABEL_OP2_301_81622_20131204_193304_inLine +BABEL_OP2_301_81622_20131204_193304_outLine +BABEL_OP2_301_81810_20131214_030628_inLine +BABEL_OP2_301_81810_20131214_030628_outLine +BABEL_OP2_301_81854_20140127_151841_inLine +BABEL_OP2_301_81854_20140127_151841_outLine +BABEL_OP2_301_82089_20131208_202028_inLine +BABEL_OP2_301_82089_20131208_202028_outLine +BABEL_OP2_301_82425_20131113_010203_inLine +BABEL_OP2_301_82425_20131113_010203_outLine +BABEL_OP2_301_82626_20140131_233635_inLine +BABEL_OP2_301_82626_20140131_233635_outLine +BABEL_OP2_301_83436_20131116_194233_inLine +BABEL_OP2_301_83436_20131116_194233_outLine +BABEL_OP2_301_83455_20131129_211537_inLine +BABEL_OP2_301_83455_20131129_211537_outLine +BABEL_OP2_301_83455_20131129_212747_inLine +BABEL_OP2_301_83455_20131129_212747_outLine +BABEL_OP2_301_83625_20140224_161632_inLine +BABEL_OP2_301_83625_20140224_161632_outLine +BABEL_OP2_301_84458_20131216_193109_inLine +BABEL_OP2_301_84458_20131216_193109_outLine +BABEL_OP2_301_84547_20131025_143053_inLine +BABEL_OP2_301_84547_20131025_143053_outLine +BABEL_OP2_301_85248_20140115_144605_inLine +BABEL_OP2_301_85248_20140115_144605_outLine +BABEL_OP2_301_85322_20131112_183356_inLine +BABEL_OP2_301_85322_20131112_183356_outLine +BABEL_OP2_301_85519_20140103_170652_inLine +BABEL_OP2_301_85519_20140103_170652_outLine +BABEL_OP2_301_86156_20140122_185516_inLine +BABEL_OP2_301_86156_20140122_185516_outLine +BABEL_OP2_301_87470_20131128_003454_inLine +BABEL_OP2_301_87470_20131128_003454_outLine +BABEL_OP2_301_87545_20140125_194128_inLine +BABEL_OP2_301_87545_20140125_194128_outLine +BABEL_OP2_301_88812_20140126_203311_inLine +BABEL_OP2_301_88812_20140126_203311_outLine +BABEL_OP2_301_88925_20131220_151054_inLine +BABEL_OP2_301_88925_20131220_151054_outLine +BABEL_OP2_301_88938_20140104_195418_inLine +BABEL_OP2_301_88938_20140104_195418_outLine +BABEL_OP2_301_89059_20140109_141228_inLine +BABEL_OP2_301_89059_20140109_141228_outLine +BABEL_OP2_301_89358_20131209_174055_inLine +BABEL_OP2_301_89358_20131209_174055_outLine +BABEL_OP2_301_89665_20131206_143535_inLine +BABEL_OP2_301_89665_20131206_143535_outLine +BABEL_OP2_301_89695_20131203_225429_inLine +BABEL_OP2_301_89695_20131203_225429_outLine +BABEL_OP2_301_89877_20140205_200816_inLine +BABEL_OP2_301_89877_20140205_200816_outLine +BABEL_OP2_301_90709_20131109_170505_inLine +BABEL_OP2_301_90709_20131109_170505_outLine +BABEL_OP2_301_90737_20131206_160650_inLine +BABEL_OP2_301_90737_20131206_160650_outLine +BABEL_OP2_301_91372_20140126_145526_inLine +BABEL_OP2_301_91372_20140126_145526_outLine +BABEL_OP2_301_91463_20140206_144651_inLine +BABEL_OP2_301_91463_20140206_144651_outLine +BABEL_OP2_301_91478_20140224_170543_inLine +BABEL_OP2_301_91478_20140224_170543_outLine +BABEL_OP2_301_91760_20140127_183930_inLine +BABEL_OP2_301_91760_20140127_183930_outLine +BABEL_OP2_301_91884_20140118_220510_inLine +BABEL_OP2_301_91884_20140118_220510_outLine +BABEL_OP2_301_91891_20131213_192340_inLine +BABEL_OP2_301_91891_20131213_192340_outLine +BABEL_OP2_301_91944_20131114_123915_inLine +BABEL_OP2_301_91944_20131114_123915_outLine +BABEL_OP2_301_92809_20131124_142340_inLine +BABEL_OP2_301_92809_20131124_142340_outLine +BABEL_OP2_301_92809_20131124_143817_inLine +BABEL_OP2_301_92809_20131124_143817_outLine +BABEL_OP2_301_92942_20140206_180304_inLine +BABEL_OP2_301_92942_20140206_180304_outLine +BABEL_OP2_301_93153_20131114_144733_inLine +BABEL_OP2_301_93153_20131114_144733_outLine +BABEL_OP2_301_93153_20131114_151704_inLine +BABEL_OP2_301_93153_20131114_151704_outLine +BABEL_OP2_301_93475_20131119_183619_inLine +BABEL_OP2_301_93475_20131119_183619_outLine +BABEL_OP2_301_93515_20140125_212344_inLine +BABEL_OP2_301_93515_20140125_212344_outLine +BABEL_OP2_301_93964_20131130_172431_inLine +BABEL_OP2_301_93964_20131130_172431_outLine +BABEL_OP2_301_94409_20131204_145545_inLine +BABEL_OP2_301_94409_20131204_145545_outLine +BABEL_OP2_301_94978_20140119_185149_inLine +BABEL_OP2_301_94978_20140119_185149_outLine +BABEL_OP2_301_95338_20140127_192317_inLine +BABEL_OP2_301_95338_20140127_192317_outLine +BABEL_OP2_301_95399_20131206_150920_inLine +BABEL_OP2_301_95399_20131206_150920_outLine +BABEL_OP2_301_95583_20131029_002312_inLine +BABEL_OP2_301_95583_20131029_002312_outLine +BABEL_OP2_301_95663_20131025_134113_inLine +BABEL_OP2_301_95663_20131025_134113_outLine +BABEL_OP2_301_95935_20140103_190515_inLine +BABEL_OP2_301_95935_20140103_190515_outLine +BABEL_OP2_301_96190_20131122_024403_inLine +BABEL_OP2_301_96190_20131122_024403_outLine +BABEL_OP2_301_96324_20131026_023101_inLine +BABEL_OP2_301_96324_20131026_023101_outLine +BABEL_OP2_301_96376_20140126_140015_inLine +BABEL_OP2_301_96376_20140126_140015_outLine +BABEL_OP2_301_96910_20131124_183403_inLine +BABEL_OP2_301_96910_20131124_183403_outLine +BABEL_OP2_301_97136_20140120_235804_inLine +BABEL_OP2_301_97136_20140120_235804_outLine +BABEL_OP2_301_97588_20131025_185012_inLine +BABEL_OP2_301_97588_20131025_185012_outLine +BABEL_OP2_301_99202_20131226_202006_inLine +BABEL_OP2_301_99202_20131226_202006_outLine +BABEL_OP2_301_99955_20140110_162703_inLine +BABEL_OP2_301_99955_20140110_162703_outLine diff --git a/egs/babel/s5d/conf/lists/301-cebuano/untranscribed-training.list b/egs/babel/s5d/conf/lists/301-cebuano/untranscribed-training.list new file mode 100644 index 00000000000..f0033cd47ec --- /dev/null +++ b/egs/babel/s5d/conf/lists/301-cebuano/untranscribed-training.list @@ -0,0 +1,548 @@ +BABEL_OP2_301_10188_20131015_200722_inLine +BABEL_OP2_301_10188_20131015_200722_outLine +BABEL_OP2_301_10188_20131015_201921_inLine +BABEL_OP2_301_10188_20131015_201921_outLine +BABEL_OP2_301_10974_20131226_183511_inLine +BABEL_OP2_301_10974_20131226_183511_outLine +BABEL_OP2_301_11096_20140129_200046_inLine +BABEL_OP2_301_11096_20140129_200046_outLine +BABEL_OP2_301_11663_20140206_183134_inLine +BABEL_OP2_301_11663_20140206_183134_outLine +BABEL_OP2_301_12851_20131019_233929_inLine +BABEL_OP2_301_12851_20131019_233929_outLine +BABEL_OP2_301_12851_20131026_182349_inLine +BABEL_OP2_301_12851_20131026_182349_outLine +BABEL_OP2_301_13030_20131128_165148_inLine +BABEL_OP2_301_13030_20131128_165148_outLine +BABEL_OP2_301_13040_20131113_202409_outLine +BABEL_OP2_301_13744_20131020_151336_inLine +BABEL_OP2_301_13744_20131020_160305_inLine +BABEL_OP2_301_13909_20140201_154926_inLine +BABEL_OP2_301_13909_20140201_154926_outLine +BABEL_OP2_301_14228_20131223_203905_outLine +BABEL_OP2_301_15024_20131206_182911_inLine +BABEL_OP2_301_15024_20131206_182911_outLine +BABEL_OP2_301_15535_20131212_140356_inLine +BABEL_OP2_301_15535_20131212_140356_outLine +BABEL_OP2_301_15749_20131226_201016_inLine +BABEL_OP2_301_15749_20131226_201016_outLine +BABEL_OP2_301_15902_20131116_134056_inLine +BABEL_OP2_301_15902_20131116_134056_outLine +BABEL_OP2_301_17567_20131223_202218_inLine +BABEL_OP2_301_17567_20131223_202218_outLine +BABEL_OP2_301_17573_20131221_234136_inLine +BABEL_OP2_301_17573_20131221_234136_outLine +BABEL_OP2_301_17751_20140201_163439_inLine +BABEL_OP2_301_17751_20140201_163439_outLine +BABEL_OP2_301_17914_20140114_191137_inLine +BABEL_OP2_301_17914_20140114_191137_outLine +BABEL_OP2_301_18863_20140103_143408_inLine +BABEL_OP2_301_18863_20140103_145207_inLine +BABEL_OP2_301_18939_20131108_215217_outLine +BABEL_OP2_301_19120_20140129_153621_inLine +BABEL_OP2_301_19120_20140129_153621_outLine +BABEL_OP2_301_19545_20131223_225812_inLine +BABEL_OP2_301_19545_20131223_225812_outLine +BABEL_OP2_301_19703_20131203_181434_inLine +BABEL_OP2_301_19703_20131203_181434_outLine +BABEL_OP2_301_20682_20131220_204542_inLine +BABEL_OP2_301_20682_20131220_204542_outLine +BABEL_OP2_301_20738_20140115_135411_inLine +BABEL_OP2_301_20738_20140115_135411_outLine +BABEL_OP2_301_21426_20140216_181528_outLine +BABEL_OP2_301_21426_20140216_182606_outLine +BABEL_OP2_301_21581_20131128_151038_inLine +BABEL_OP2_301_21581_20131128_151038_outLine +BABEL_OP2_301_21794_20131203_210336_inLine +BABEL_OP2_301_21794_20131203_210336_outLine +BABEL_OP2_301_21794_20131203_211241_inLine +BABEL_OP2_301_21794_20131203_211241_outLine +BABEL_OP2_301_21794_20131203_212201_inLine +BABEL_OP2_301_21794_20131203_212201_outLine +BABEL_OP2_301_22170_20140119_235310_inLine +BABEL_OP2_301_22170_20140119_235310_outLine +BABEL_OP2_301_23151_20140115_191742_inLine +BABEL_OP2_301_23151_20140115_191742_outLine +BABEL_OP2_301_23260_20140123_165218_inLine +BABEL_OP2_301_23260_20140123_165218_outLine +BABEL_OP2_301_23681_20140129_150558_inLine +BABEL_OP2_301_23681_20140129_150558_outLine +BABEL_OP2_301_23983_20140125_164849_inLine +BABEL_OP2_301_23983_20140125_164849_outLine +BABEL_OP2_301_24033_20140108_160013_inLine +BABEL_OP2_301_24033_20140108_160013_outLine +BABEL_OP2_301_24470_20140206_191002_inLine +BABEL_OP2_301_24470_20140206_191002_outLine +BABEL_OP2_301_25085_20140204_170633_inLine +BABEL_OP2_301_25085_20140204_170633_outLine +BABEL_OP2_301_25220_20140202_012113_inLine +BABEL_OP2_301_25220_20140202_012113_outLine +BABEL_OP2_301_25698_20140202_155327_inLine +BABEL_OP2_301_25698_20140202_155327_outLine +BABEL_OP2_301_26398_20140125_202344_inLine +BABEL_OP2_301_26398_20140125_202344_outLine +BABEL_OP2_301_26574_20131226_194917_inLine +BABEL_OP2_301_26574_20131226_194917_outLine +BABEL_OP2_301_27203_20140205_212839_inLine +BABEL_OP2_301_27203_20140205_212839_outLine +BABEL_OP2_301_27478_20140120_183015_inLine +BABEL_OP2_301_27478_20140120_183015_outLine +BABEL_OP2_301_28190_20140103_204548_inLine +BABEL_OP2_301_28190_20140103_204548_outLine +BABEL_OP2_301_28190_20140103_211418_inLine +BABEL_OP2_301_28190_20140103_211418_outLine +BABEL_OP2_301_28538_20131206_201510_inLine +BABEL_OP2_301_28538_20131206_201510_outLine +BABEL_OP2_301_28775_20131117_184047_inLine +BABEL_OP2_301_28775_20131117_184047_outLine +BABEL_OP2_301_28775_20131117_184742_inLine +BABEL_OP2_301_28775_20131117_184742_outLine +BABEL_OP2_301_28775_20131117_190311_inLine +BABEL_OP2_301_28775_20131117_190311_outLine +BABEL_OP2_301_29072_20131212_183347_inLine +BABEL_OP2_301_29072_20131212_183347_outLine +BABEL_OP2_301_29076_20140207_194512_inLine +BABEL_OP2_301_29076_20140207_194512_outLine +BABEL_OP2_301_29352_20140131_181124_inLine +BABEL_OP2_301_29352_20140131_181124_outLine +BABEL_OP2_301_29633_20140121_164509_inLine +BABEL_OP2_301_29633_20140121_164509_outLine +BABEL_OP2_301_29643_20140129_200354_outLine +BABEL_OP2_301_29765_20140131_185401_inLine +BABEL_OP2_301_29765_20140131_185401_outLine +BABEL_OP2_301_30253_20131217_200910_inLine +BABEL_OP2_301_30253_20131217_200910_outLine +BABEL_OP2_301_30280_20140210_171213_inLine +BABEL_OP2_301_30280_20140210_171213_outLine +BABEL_OP2_301_30497_20140123_162323_inLine +BABEL_OP2_301_30497_20140123_162323_outLine +BABEL_OP2_301_31109_20131225_234903_inLine +BABEL_OP2_301_31109_20131225_234903_outLine +BABEL_OP2_301_31182_20140102_125318_inLine +BABEL_OP2_301_31182_20140102_125318_outLine +BABEL_OP2_301_31182_20140102_130533_inLine +BABEL_OP2_301_31182_20140102_130533_outLine +BABEL_OP2_301_31184_20131208_184700_inLine +BABEL_OP2_301_31184_20131208_184700_outLine +BABEL_OP2_301_31484_20131210_183412_inLine +BABEL_OP2_301_31484_20131210_183412_outLine +BABEL_OP2_301_31583_20131220_145426_inLine +BABEL_OP2_301_31583_20131220_145426_outLine +BABEL_OP2_301_32048_20140107_184712_inLine +BABEL_OP2_301_32048_20140107_184712_outLine +BABEL_OP2_301_32861_20140110_193920_outLine +BABEL_OP2_301_32872_20140126_181540_inLine +BABEL_OP2_301_32872_20140126_181540_outLine +BABEL_OP2_301_32959_20131218_155238_inLine +BABEL_OP2_301_32959_20131218_155238_outLine +BABEL_OP2_301_33229_20140112_190633_inLine +BABEL_OP2_301_33229_20140112_190633_outLine +BABEL_OP2_301_33251_20140206_154015_inLine +BABEL_OP2_301_33251_20140206_154015_outLine +BABEL_OP2_301_33659_20140223_185752_outLine +BABEL_OP2_301_34336_20131125_162020_inLine +BABEL_OP2_301_34336_20131125_162020_outLine +BABEL_OP2_301_34336_20131125_163318_inLine +BABEL_OP2_301_34336_20131125_163318_outLine +BABEL_OP2_301_34477_20131129_201317_inLine +BABEL_OP2_301_34477_20131129_201317_outLine +BABEL_OP2_301_34688_20131107_151905_inLine +BABEL_OP2_301_34713_20140216_184756_outLine +BABEL_OP2_301_34899_20140201_183710_inLine +BABEL_OP2_301_34899_20140201_183710_outLine +BABEL_OP2_301_36017_20140123_220745_inLine +BABEL_OP2_301_36017_20140123_220745_outLine +BABEL_OP2_301_36894_20131113_201325_inLine +BABEL_OP2_301_38750_20131218_210138_inLine +BABEL_OP2_301_38750_20131218_210138_outLine +BABEL_OP2_301_39059_20140115_160435_inLine +BABEL_OP2_301_39059_20140115_160435_outLine +BABEL_OP2_301_39059_20140115_161237_inLine +BABEL_OP2_301_39059_20140115_161237_outLine +BABEL_OP2_301_39893_20140201_164926_inLine +BABEL_OP2_301_39893_20140201_164926_outLine +BABEL_OP2_301_41097_20131218_194351_inLine +BABEL_OP2_301_41097_20131218_194351_outLine +BABEL_OP2_301_41100_20131130_204102_inLine +BABEL_OP2_301_41100_20131130_204102_outLine +BABEL_OP2_301_41100_20131130_204814_inLine +BABEL_OP2_301_41100_20131130_204814_outLine +BABEL_OP2_301_41109_20140107_200127_inLine +BABEL_OP2_301_41109_20140107_200127_outLine +BABEL_OP2_301_41272_20140126_163911_inLine +BABEL_OP2_301_41272_20140126_163911_outLine +BABEL_OP2_301_41442_20131220_182530_inLine +BABEL_OP2_301_41442_20131220_182530_outLine +BABEL_OP2_301_41442_20131220_183940_inLine +BABEL_OP2_301_41442_20131220_183940_outLine +BABEL_OP2_301_42231_20131213_161445_inLine +BABEL_OP2_301_42231_20131213_161445_outLine +BABEL_OP2_301_42243_20131124_191210_inLine +BABEL_OP2_301_42243_20131124_191210_outLine +BABEL_OP2_301_42718_20140126_222724_inLine +BABEL_OP2_301_42718_20140126_222724_outLine +BABEL_OP2_301_43074_20140213_170948_inLine +BABEL_OP2_301_43074_20140213_170948_outLine +BABEL_OP2_301_43157_20140214_155422_inLine +BABEL_OP2_301_43157_20140214_155422_outLine +BABEL_OP2_301_43588_20140128_173254_inLine +BABEL_OP2_301_43588_20140128_173254_outLine +BABEL_OP2_301_43588_20140128_174720_inLine +BABEL_OP2_301_43588_20140128_174720_outLine +BABEL_OP2_301_43990_20140220_141338_inLine +BABEL_OP2_301_43990_20140220_141338_outLine +BABEL_OP2_301_44255_20140115_001546_inLine +BABEL_OP2_301_44255_20140115_001546_outLine +BABEL_OP2_301_44290_20140126_145048_inLine +BABEL_OP2_301_44290_20140126_145048_outLine +BABEL_OP2_301_44531_20140118_212803_inLine +BABEL_OP2_301_44531_20140118_212803_outLine +BABEL_OP2_301_44847_20131214_204251_inLine +BABEL_OP2_301_44847_20131214_204251_outLine +BABEL_OP2_301_44847_20131214_230118_inLine +BABEL_OP2_301_44847_20131214_230118_outLine +BABEL_OP2_301_45697_20140214_220139_inLine +BABEL_OP2_301_45697_20140214_220139_outLine +BABEL_OP2_301_46169_20131220_162551_inLine +BABEL_OP2_301_46169_20131220_162551_outLine +BABEL_OP2_301_46202_20140224_155801_inLine +BABEL_OP2_301_46202_20140224_155801_outLine +BABEL_OP2_301_46315_20131211_204949_inLine +BABEL_OP2_301_46315_20131211_204949_outLine +BABEL_OP2_301_46625_20131026_225140_outLine +BABEL_OP2_301_46974_20131211_200449_inLine +BABEL_OP2_301_46974_20131211_200449_outLine +BABEL_OP2_301_47637_20140213_164701_inLine +BABEL_OP2_301_47637_20140213_164701_outLine +BABEL_OP2_301_47799_20140216_165643_inLine +BABEL_OP2_301_47799_20140216_165643_outLine +BABEL_OP2_301_48016_20140205_174755_inLine +BABEL_OP2_301_48016_20140205_174755_outLine +BABEL_OP2_301_48299_20140224_163951_inLine +BABEL_OP2_301_48299_20140224_163951_outLine +BABEL_OP2_301_48610_20131113_182547_outLine +BABEL_OP2_301_48663_20140126_210156_inLine +BABEL_OP2_301_48663_20140126_210156_outLine +BABEL_OP2_301_48758_20140122_144530_inLine +BABEL_OP2_301_48758_20140122_144530_outLine +BABEL_OP2_301_48758_20140122_155747_inLine +BABEL_OP2_301_48758_20140122_155747_outLine +BABEL_OP2_301_48907_20140127_134337_inLine +BABEL_OP2_301_48907_20140127_134337_outLine +BABEL_OP2_301_49637_20131030_211145_inLine +BABEL_OP2_301_49767_20140131_135142_inLine +BABEL_OP2_301_49767_20140131_135142_outLine +BABEL_OP2_301_50779_20140207_191951_inLine +BABEL_OP2_301_50779_20140207_191951_outLine +BABEL_OP2_301_50940_20140220_201041_inLine +BABEL_OP2_301_50940_20140220_201041_outLine +BABEL_OP2_301_51858_20140220_170150_inLine +BABEL_OP2_301_51858_20140220_170150_outLine +BABEL_OP2_301_52222_20140224_160657_inLine +BABEL_OP2_301_52222_20140224_160657_outLine +BABEL_OP2_301_52483_20140214_142008_inLine +BABEL_OP2_301_52483_20140214_142008_outLine +BABEL_OP2_301_52854_20131015_224412_inLine +BABEL_OP2_301_52854_20131015_224412_outLine +BABEL_OP2_301_52854_20131015_225109_inLine +BABEL_OP2_301_52854_20131015_225109_outLine +BABEL_OP2_301_52854_20131015_230437_inLine +BABEL_OP2_301_52854_20131015_230437_outLine +BABEL_OP2_301_53072_20140128_162233_inLine +BABEL_OP2_301_53072_20140128_162233_outLine +BABEL_OP2_301_53415_20140119_182758_inLine +BABEL_OP2_301_53415_20140119_182758_outLine +BABEL_OP2_301_53419_20131222_184412_inLine +BABEL_OP2_301_53419_20131222_184412_outLine +BABEL_OP2_301_53492_20140122_223158_inLine +BABEL_OP2_301_53492_20140122_223158_outLine +BABEL_OP2_301_53492_20140122_223724_inLine +BABEL_OP2_301_53492_20140122_223724_outLine +BABEL_OP2_301_54040_20140102_113546_inLine +BABEL_OP2_301_54040_20140102_113546_outLine +BABEL_OP2_301_54066_20140214_153112_inLine +BABEL_OP2_301_54066_20140214_153112_outLine +BABEL_OP2_301_54405_20131227_152052_inLine +BABEL_OP2_301_54405_20131227_152052_outLine +BABEL_OP2_301_54634_20140225_214816_inLine +BABEL_OP2_301_54634_20140225_214816_outLine +BABEL_OP2_301_54923_20140201_161814_inLine +BABEL_OP2_301_54923_20140201_161814_outLine +BABEL_OP2_301_55013_20140214_165830_inLine +BABEL_OP2_301_55013_20140214_165830_outLine +BABEL_OP2_301_56019_20140117_192119_inLine +BABEL_OP2_301_56019_20140117_192119_outLine +BABEL_OP2_301_56090_20131016_191346_inLine +BABEL_OP2_301_56090_20131016_191346_outLine +BABEL_OP2_301_56213_20131216_202911_inLine +BABEL_OP2_301_56213_20131216_202911_outLine +BABEL_OP2_301_56345_20140223_203712_inLine +BABEL_OP2_301_56345_20140223_203712_outLine +BABEL_OP2_301_56429_20131112_172026_inLine +BABEL_OP2_301_56429_20131112_172026_outLine +BABEL_OP2_301_56465_20140205_174245_inLine +BABEL_OP2_301_56465_20140205_174245_outLine +BABEL_OP2_301_56468_20140225_204233_inLine +BABEL_OP2_301_56468_20140225_204233_outLine +BABEL_OP2_301_56677_20131217_201344_inLine +BABEL_OP2_301_56677_20131217_201344_outLine +BABEL_OP2_301_56684_20140105_193720_inLine +BABEL_OP2_301_56684_20140105_193720_outLine +BABEL_OP2_301_57067_20140107_192621_inLine +BABEL_OP2_301_57067_20140107_192621_outLine +BABEL_OP2_301_57219_20140205_155125_outLine +BABEL_OP2_301_57219_20140205_160417_outLine +BABEL_OP2_301_57529_20131217_195013_inLine +BABEL_OP2_301_57529_20131217_195013_outLine +BABEL_OP2_301_57609_20131224_152505_inLine +BABEL_OP2_301_57609_20131224_152505_outLine +BABEL_OP2_301_57650_20140114_203646_inLine +BABEL_OP2_301_57650_20140114_203646_outLine +BABEL_OP2_301_57654_20131123_151724_inLine +BABEL_OP2_301_57654_20131123_151724_outLine +BABEL_OP2_301_57654_20131123_152356_inLine +BABEL_OP2_301_57654_20131123_152356_outLine +BABEL_OP2_301_57654_20131123_154603_inLine +BABEL_OP2_301_57654_20131123_154603_outLine +BABEL_OP2_301_58717_20131223_213724_inLine +BABEL_OP2_301_58717_20131223_213724_outLine +BABEL_OP2_301_59028_20140201_153656_inLine +BABEL_OP2_301_59028_20140201_153656_outLine +BABEL_OP2_301_59645_20131224_162758_inLine +BABEL_OP2_301_59645_20131224_162758_outLine +BABEL_OP2_301_60307_20140213_205247_inLine +BABEL_OP2_301_60307_20140213_205247_outLine +BABEL_OP2_301_61011_20131020_212453_inLine +BABEL_OP2_301_61011_20131020_212453_outLine +BABEL_OP2_301_62155_20140121_235400_inLine +BABEL_OP2_301_62155_20140121_235400_outLine +BABEL_OP2_301_62430_20140123_160035_inLine +BABEL_OP2_301_62430_20140123_160035_outLine +BABEL_OP2_301_63094_20140129_205122_inLine +BABEL_OP2_301_63094_20140129_205122_outLine +BABEL_OP2_301_63220_20131218_184307_inLine +BABEL_OP2_301_63220_20131218_184307_outLine +BABEL_OP2_301_63511_20140214_161858_inLine +BABEL_OP2_301_63511_20140214_161858_outLine +BABEL_OP2_301_63670_20131216_201258_inLine +BABEL_OP2_301_63670_20131216_201258_outLine +BABEL_OP2_301_63730_20140204_182322_inLine +BABEL_OP2_301_63730_20140204_182322_outLine +BABEL_OP2_301_63757_20140206_214404_inLine +BABEL_OP2_301_63757_20140206_214404_outLine +BABEL_OP2_301_64014_20140114_133546_inLine +BABEL_OP2_301_64014_20140114_133546_outLine +BABEL_OP2_301_64259_20140225_211407_inLine +BABEL_OP2_301_64259_20140225_211407_outLine +BABEL_OP2_301_64398_20131213_201128_inLine +BABEL_OP2_301_64398_20131213_201128_outLine +BABEL_OP2_301_65064_20140207_185319_inLine +BABEL_OP2_301_65064_20140207_185319_outLine +BABEL_OP2_301_65370_20140201_200500_inLine +BABEL_OP2_301_65370_20140201_200500_outLine +BABEL_OP2_301_65640_20140123_140233_inLine +BABEL_OP2_301_65640_20140123_140233_outLine +BABEL_OP2_301_66001_20131115_220236_inLine +BABEL_OP2_301_66001_20131115_220236_outLine +BABEL_OP2_301_66519_20131128_144732_inLine +BABEL_OP2_301_66519_20131128_144732_outLine +BABEL_OP2_301_66519_20131128_150056_inLine +BABEL_OP2_301_66519_20131128_150056_outLine +BABEL_OP2_301_67283_20131023_173705_inLine +BABEL_OP2_301_67283_20131023_173705_outLine +BABEL_OP2_301_67389_20140219_142647_inLine +BABEL_OP2_301_67389_20140219_142647_outLine +BABEL_OP2_301_67401_20140207_182426_inLine +BABEL_OP2_301_67401_20140207_182426_outLine +BABEL_OP2_301_68385_20131016_193158_inLine +BABEL_OP2_301_69153_20131216_202419_inLine +BABEL_OP2_301_69153_20131216_202419_outLine +BABEL_OP2_301_69578_20131201_211250_inLine +BABEL_OP2_301_69578_20131201_211250_outLine +BABEL_OP2_301_69578_20131201_212353_inLine +BABEL_OP2_301_69578_20131201_212353_outLine +BABEL_OP2_301_70221_20131223_190148_inLine +BABEL_OP2_301_70221_20131223_190148_outLine +BABEL_OP2_301_70343_20131212_181613_inLine +BABEL_OP2_301_70343_20131212_181613_outLine +BABEL_OP2_301_70526_20140127_161237_inLine +BABEL_OP2_301_70526_20140127_161237_outLine +BABEL_OP2_301_70986_20140223_164925_inLine +BABEL_OP2_301_70986_20140223_164925_outLine +BABEL_OP2_301_72110_20131218_192930_inLine +BABEL_OP2_301_72110_20131218_192930_outLine +BABEL_OP2_301_72110_20131220_163212_inLine +BABEL_OP2_301_72110_20131220_163212_outLine +BABEL_OP2_301_73301_20131208_194427_inLine +BABEL_OP2_301_73301_20131208_194427_outLine +BABEL_OP2_301_73408_20140213_184704_inLine +BABEL_OP2_301_73408_20140213_184704_outLine +BABEL_OP2_301_73822_20140216_175714_inLine +BABEL_OP2_301_73822_20140216_175714_outLine +BABEL_OP2_301_73837_20131124_173546_inLine +BABEL_OP2_301_74728_20140214_203632_outLine +BABEL_OP2_301_75064_20131129_123930_inLine +BABEL_OP2_301_75064_20131129_123930_outLine +BABEL_OP2_301_75064_20131129_124541_inLine +BABEL_OP2_301_75064_20131129_124541_outLine +BABEL_OP2_301_75342_20131217_201144_inLine +BABEL_OP2_301_75342_20131217_201144_outLine +BABEL_OP2_301_75366_20140131_192045_inLine +BABEL_OP2_301_75366_20140131_192045_outLine +BABEL_OP2_301_75465_20131221_182948_inLine +BABEL_OP2_301_75465_20131221_182948_outLine +BABEL_OP2_301_77242_20140204_192041_inLine +BABEL_OP2_301_77242_20140204_192041_outLine +BABEL_OP2_301_77803_20131024_201026_inLine +BABEL_OP2_301_77803_20131024_201026_outLine +BABEL_OP2_301_79107_20140204_212236_inLine +BABEL_OP2_301_79107_20140204_212236_outLine +BABEL_OP2_301_79139_20131203_165343_outLine +BABEL_OP2_301_79429_20140220_203629_inLine +BABEL_OP2_301_79429_20140220_203629_outLine +BABEL_OP2_301_79858_20131024_220616_outLine +BABEL_OP2_301_80306_20131203_161230_inLine +BABEL_OP2_301_80306_20131203_161230_outLine +BABEL_OP2_301_80306_20131203_162810_inLine +BABEL_OP2_301_80306_20131203_162810_outLine +BABEL_OP2_301_80439_20131202_210809_inLine +BABEL_OP2_301_80439_20131202_210809_outLine +BABEL_OP2_301_80655_20140123_205823_inLine +BABEL_OP2_301_80655_20140123_205823_outLine +BABEL_OP2_301_80721_20131225_182955_inLine +BABEL_OP2_301_80721_20131225_182955_outLine +BABEL_OP2_301_81213_20131114_184213_outLine +BABEL_OP2_301_81213_20131114_190753_outLine +BABEL_OP2_301_81404_20131206_140303_outLine +BABEL_OP2_301_81971_20131029_141333_outLine +BABEL_OP2_301_82030_20140126_162146_inLine +BABEL_OP2_301_82030_20140126_162146_outLine +BABEL_OP2_301_82140_20131201_202210_inLine +BABEL_OP2_301_82140_20131201_202210_outLine +BABEL_OP2_301_82224_20140108_145115_inLine +BABEL_OP2_301_82224_20140108_145115_outLine +BABEL_OP2_301_82361_20140123_185800_outLine +BABEL_OP2_301_82966_20131229_171324_inLine +BABEL_OP2_301_82966_20131229_171324_outLine +BABEL_OP2_301_83062_20140123_143457_inLine +BABEL_OP2_301_83062_20140123_143457_outLine +BABEL_OP2_301_83935_20131216_140532_inLine +BABEL_OP2_301_83935_20131216_140532_outLine +BABEL_OP2_301_84327_20131217_123632_inLine +BABEL_OP2_301_84327_20131217_123632_outLine +BABEL_OP2_301_84823_20131218_180840_inLine +BABEL_OP2_301_84823_20131218_180840_outLine +BABEL_OP2_301_85246_20140216_194331_inLine +BABEL_OP2_301_85246_20140216_194331_outLine +BABEL_OP2_301_85254_20140131_161411_inLine +BABEL_OP2_301_85254_20140131_161411_outLine +BABEL_OP2_301_85254_20140131_162620_inLine +BABEL_OP2_301_85254_20140131_162620_outLine +BABEL_OP2_301_86557_20131019_195730_outLine +BABEL_OP2_301_86597_20140204_185521_inLine +BABEL_OP2_301_86597_20140204_185521_outLine +BABEL_OP2_301_86715_20140201_181648_inLine +BABEL_OP2_301_86715_20140201_181648_outLine +BABEL_OP2_301_86826_20140129_155917_inLine +BABEL_OP2_301_86826_20140129_155917_outLine +BABEL_OP2_301_87280_20131220_194114_inLine +BABEL_OP2_301_87280_20131220_194114_outLine +BABEL_OP2_301_87731_20140220_185807_inLine +BABEL_OP2_301_87731_20140220_185807_outLine +BABEL_OP2_301_87777_20140208_173157_inLine +BABEL_OP2_301_87777_20140208_173157_outLine +BABEL_OP2_301_89045_20131025_163532_inLine +BABEL_OP2_301_89045_20131025_163532_outLine +BABEL_OP2_301_90347_20140206_160505_inLine +BABEL_OP2_301_90347_20140206_160505_outLine +BABEL_OP2_301_90417_20140202_164404_inLine +BABEL_OP2_301_90417_20140202_164404_outLine +BABEL_OP2_301_90760_20140204_173413_inLine +BABEL_OP2_301_90760_20140204_173413_outLine +BABEL_OP2_301_91189_20140130_134130_inLine +BABEL_OP2_301_91189_20140130_134130_outLine +BABEL_OP2_301_91581_20131218_193124_inLine +BABEL_OP2_301_91581_20131218_193124_outLine +BABEL_OP2_301_91593_20140201_174423_inLine +BABEL_OP2_301_91593_20140201_174423_outLine +BABEL_OP2_301_91888_20140128_153319_inLine +BABEL_OP2_301_91888_20140128_153319_outLine +BABEL_OP2_301_92077_20140127_141441_inLine +BABEL_OP2_301_92077_20140127_141441_outLine +BABEL_OP2_301_92356_20140112_181929_inLine +BABEL_OP2_301_92356_20140112_181929_outLine +BABEL_OP2_301_92557_20140115_150859_inLine +BABEL_OP2_301_92557_20140115_150859_outLine +BABEL_OP2_301_92643_20140127_134733_inLine +BABEL_OP2_301_92643_20140127_134733_outLine +BABEL_OP2_301_93007_20140201_215259_inLine +BABEL_OP2_301_93007_20140201_215259_outLine +BABEL_OP2_301_93222_20140224_152228_inLine +BABEL_OP2_301_93222_20140224_152228_outLine +BABEL_OP2_301_93681_20131129_223439_inLine +BABEL_OP2_301_93681_20131129_223439_outLine +BABEL_OP2_301_93858_20140202_152245_inLine +BABEL_OP2_301_93858_20140202_152245_outLine +BABEL_OP2_301_94044_20140225_200641_inLine +BABEL_OP2_301_94044_20140225_200641_outLine +BABEL_OP2_301_94141_20140214_174210_inLine +BABEL_OP2_301_94141_20140214_174210_outLine +BABEL_OP2_301_94141_20140214_174838_inLine +BABEL_OP2_301_94141_20140214_174838_outLine +BABEL_OP2_301_94166_20140114_223757_inLine +BABEL_OP2_301_94166_20140114_223757_outLine +BABEL_OP2_301_94237_20140125_154005_inLine +BABEL_OP2_301_94237_20140125_154005_outLine +BABEL_OP2_301_94487_20140214_181548_inLine +BABEL_OP2_301_94487_20140214_181548_outLine +BABEL_OP2_301_94969_20140216_191950_inLine +BABEL_OP2_301_94969_20140216_191950_outLine +BABEL_OP2_301_95467_20140204_202122_inLine +BABEL_OP2_301_95467_20140204_202122_outLine +BABEL_OP2_301_95490_20131019_201427_inLine +BABEL_OP2_301_95571_20140225_185558_inLine +BABEL_OP2_301_95571_20140225_185558_outLine +BABEL_OP2_301_95670_20131119_163101_inLine +BABEL_OP2_301_95670_20131119_163101_outLine +BABEL_OP2_301_95670_20131119_163931_inLine +BABEL_OP2_301_95670_20131119_163931_outLine +BABEL_OP2_301_96205_20131208_194017_inLine +BABEL_OP2_301_96205_20131208_194017_outLine +BABEL_OP2_301_96205_20131208_195213_inLine +BABEL_OP2_301_96205_20131208_195213_outLine +BABEL_OP2_301_96446_20131030_214504_inLine +BABEL_OP2_301_96446_20131030_214504_outLine +BABEL_OP2_301_96584_20140114_144108_inLine +BABEL_OP2_301_96584_20140114_144108_outLine +BABEL_OP2_301_96934_20131202_185517_inLine +BABEL_OP2_301_96934_20131202_185517_outLine +BABEL_OP2_301_96940_20140223_150250_inLine +BABEL_OP2_301_96940_20140223_150250_outLine +BABEL_OP2_301_97097_20140122_232217_inLine +BABEL_OP2_301_97097_20140122_232217_outLine +BABEL_OP2_301_97220_20140204_184737_inLine +BABEL_OP2_301_97220_20140204_184737_outLine +BABEL_OP2_301_97604_20140112_103548_inLine +BABEL_OP2_301_97604_20140112_103548_outLine +BABEL_OP2_301_97849_20140123_174235_inLine +BABEL_OP2_301_97849_20140123_174235_outLine +BABEL_OP2_301_97911_20140131_152336_inLine +BABEL_OP2_301_97911_20140131_152336_outLine +BABEL_OP2_301_97911_20140131_153328_inLine +BABEL_OP2_301_97911_20140131_153328_outLine +BABEL_OP2_301_97988_20131219_190252_inLine +BABEL_OP2_301_97988_20131219_190252_outLine +BABEL_OP2_301_97988_20140114_012737_inLine +BABEL_OP2_301_97988_20140114_012737_outLine +BABEL_OP2_301_98192_20140205_153043_inLine +BABEL_OP2_301_98192_20140205_153043_outLine +BABEL_OP2_301_98506_20140122_174742_inLine +BABEL_OP2_301_98506_20140122_174742_outLine +BABEL_OP2_301_98678_20140122_124908_inLine +BABEL_OP2_301_98678_20140122_124908_outLine +BABEL_OP2_301_99401_20131024_225414_inLine +BABEL_OP2_301_99401_20131024_225414_outLine +BABEL_OP2_301_99732_20131220_214613_inLine +BABEL_OP2_301_99732_20131220_214613_outLine +BABEL_OP2_301_99813_20131216_151916_inLine +BABEL_OP2_301_99813_20131216_151916_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/dev.list b/egs/babel/s5d/conf/lists/302-kazakh/dev.list new file mode 100644 index 00000000000..31a554efeef --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/dev.list @@ -0,0 +1,140 @@ +BABEL_OP2_302_10002_20140316_215637_inLine +BABEL_OP2_302_10002_20140316_215637_outLine +BABEL_OP2_302_10188_20131030_194100_inLine +BABEL_OP2_302_10188_20131030_194100_outLine +BABEL_OP2_302_11673_20131104_223908_inLine +BABEL_OP2_302_11673_20131104_223908_outLine +BABEL_OP2_302_13324_20131115_220718_inLine +BABEL_OP2_302_13324_20131115_220718_outLine +BABEL_OP2_302_17440_20140218_204311_inLine +BABEL_OP2_302_17440_20140218_204311_outLine +BABEL_OP2_302_17573_20140312_030325_inLine +BABEL_OP2_302_17573_20140312_030325_outLine +BABEL_OP2_302_17914_20140126_234956_inLine +BABEL_OP2_302_17914_20140126_234956_outLine +BABEL_OP2_302_17923_20131116_222221_inLine +BABEL_OP2_302_17923_20131116_222221_outLine +BABEL_OP2_302_18939_20131111_213325_inLine +BABEL_OP2_302_18939_20131111_213325_outLine +BABEL_OP2_302_19663_20131212_235807_inLine +BABEL_OP2_302_19663_20131212_235807_outLine +BABEL_OP2_302_19703_20131202_234704_inLine +BABEL_OP2_302_19703_20131202_234704_outLine +BABEL_OP2_302_20682_20140114_221052_inLine +BABEL_OP2_302_20682_20140114_221052_outLine +BABEL_OP2_302_20768_20140203_185125_inLine +BABEL_OP2_302_20768_20140203_185125_outLine +BABEL_OP2_302_20768_20140203_190423_inLine +BABEL_OP2_302_20768_20140203_190423_outLine +BABEL_OP2_302_21109_20140111_215428_inLine +BABEL_OP2_302_21109_20140111_215428_outLine +BABEL_OP2_302_21581_20131217_222306_inLine +BABEL_OP2_302_21581_20131217_222306_outLine +BABEL_OP2_302_22216_20131104_153600_inLine +BABEL_OP2_302_22216_20131104_153600_outLine +BABEL_OP2_302_23355_20140317_191841_inLine +BABEL_OP2_302_23355_20140317_191841_outLine +BABEL_OP2_302_24589_20131129_215929_inLine +BABEL_OP2_302_24589_20131129_215929_outLine +BABEL_OP2_302_26072_20140131_184053_inLine +BABEL_OP2_302_26072_20140131_184053_outLine +BABEL_OP2_302_33175_20131105_201906_inLine +BABEL_OP2_302_33175_20131105_201906_outLine +BABEL_OP2_302_33355_20131112_211255_inLine +BABEL_OP2_302_33355_20131112_211255_outLine +BABEL_OP2_302_33355_20131112_213746_inLine +BABEL_OP2_302_33355_20131112_213746_outLine +BABEL_OP2_302_34328_20131219_023407_inLine +BABEL_OP2_302_34328_20131219_023407_outLine +BABEL_OP2_302_36341_20131101_170216_inLine +BABEL_OP2_302_36341_20131101_170216_outLine +BABEL_OP2_302_36341_20131101_171111_inLine +BABEL_OP2_302_36341_20131101_171111_outLine +BABEL_OP2_302_36669_20131206_164229_inLine +BABEL_OP2_302_36669_20131206_164229_outLine +BABEL_OP2_302_41174_20131212_200450_inLine +BABEL_OP2_302_41174_20131212_200450_outLine +BABEL_OP2_302_41442_20140125_220923_inLine +BABEL_OP2_302_41442_20140125_220923_outLine +BABEL_OP2_302_42497_20131116_001033_inLine +BABEL_OP2_302_42497_20131116_001033_outLine +BABEL_OP2_302_42497_20131116_002236_inLine +BABEL_OP2_302_42497_20131116_002236_outLine +BABEL_OP2_302_43789_20140108_210806_inLine +BABEL_OP2_302_43789_20140108_210806_outLine +BABEL_OP2_302_44868_20131217_205108_inLine +BABEL_OP2_302_44868_20131217_205108_outLine +BABEL_OP2_302_44868_20131217_205716_inLine +BABEL_OP2_302_44868_20131217_205716_outLine +BABEL_OP2_302_44868_20131217_211035_inLine +BABEL_OP2_302_44868_20131217_211035_outLine +BABEL_OP2_302_45642_20131114_014119_inLine +BABEL_OP2_302_45642_20131114_014119_outLine +BABEL_OP2_302_47156_20140313_011009_inLine +BABEL_OP2_302_47156_20140313_011009_outLine +BABEL_OP2_302_49502_20131104_181501_inLine +BABEL_OP2_302_49502_20131104_181501_outLine +BABEL_OP2_302_50565_20131103_225947_inLine +BABEL_OP2_302_50565_20131103_225947_outLine +BABEL_OP2_302_50726_20131118_025621_inLine +BABEL_OP2_302_50726_20131118_025621_outLine +BABEL_OP2_302_50745_20140214_021844_inLine +BABEL_OP2_302_50745_20140214_021844_outLine +BABEL_OP2_302_60830_20131205_223823_inLine +BABEL_OP2_302_60830_20131205_223823_outLine +BABEL_OP2_302_60830_20131205_225122_inLine +BABEL_OP2_302_60830_20131205_225122_outLine +BABEL_OP2_302_61011_20131110_191134_inLine +BABEL_OP2_302_61011_20131110_191134_outLine +BABEL_OP2_302_61040_20140123_215906_inLine +BABEL_OP2_302_61040_20140123_215906_outLine +BABEL_OP2_302_61963_20140119_184816_inLine +BABEL_OP2_302_61963_20140119_184816_outLine +BABEL_OP2_302_66916_20131121_223838_inLine +BABEL_OP2_302_66916_20131121_223838_outLine +BABEL_OP2_302_70110_20131109_190313_inLine +BABEL_OP2_302_70110_20131109_190313_outLine +BABEL_OP2_302_70182_20140214_185232_inLine +BABEL_OP2_302_70182_20140214_185232_outLine +BABEL_OP2_302_72654_20131207_162604_inLine +BABEL_OP2_302_72654_20131207_162604_outLine +BABEL_OP2_302_77730_20131114_223327_inLine +BABEL_OP2_302_77730_20131114_223327_outLine +BABEL_OP2_302_77730_20131114_230511_inLine +BABEL_OP2_302_77730_20131114_230511_outLine +BABEL_OP2_302_77730_20131114_231344_inLine +BABEL_OP2_302_77730_20131114_231344_outLine +BABEL_OP2_302_79080_20140203_192545_inLine +BABEL_OP2_302_79080_20140203_192545_outLine +BABEL_OP2_302_80577_20140126_190012_inLine +BABEL_OP2_302_80577_20140126_190012_outLine +BABEL_OP2_302_81854_20140203_161410_inLine +BABEL_OP2_302_81854_20140203_161410_outLine +BABEL_OP2_302_81971_20131101_194252_inLine +BABEL_OP2_302_81971_20131101_194252_outLine +BABEL_OP2_302_81971_20131101_195016_inLine +BABEL_OP2_302_81971_20131101_195016_outLine +BABEL_OP2_302_84823_20140213_015014_inLine +BABEL_OP2_302_84823_20140213_015014_outLine +BABEL_OP2_302_85248_20140123_204317_inLine +BABEL_OP2_302_85248_20140123_204317_outLine +BABEL_OP2_302_85322_20131108_161437_inLine +BABEL_OP2_302_85322_20131108_161437_outLine +BABEL_OP2_302_86557_20131121_000022_inLine +BABEL_OP2_302_86557_20131121_000022_outLine +BABEL_OP2_302_87889_20140119_163150_inLine +BABEL_OP2_302_87889_20140119_163150_outLine +BABEL_OP2_302_90080_20140120_230635_inLine +BABEL_OP2_302_90080_20140120_230635_outLine +BABEL_OP2_302_91593_20140215_175049_inLine +BABEL_OP2_302_91593_20140215_175049_outLine +BABEL_OP2_302_92509_20131114_030809_inLine +BABEL_OP2_302_92509_20131114_030809_outLine +BABEL_OP2_302_93320_20140218_173001_inLine +BABEL_OP2_302_93320_20140218_173001_outLine +BABEL_OP2_302_93475_20131115_203137_inLine +BABEL_OP2_302_93475_20131115_203137_outLine +BABEL_OP2_302_95583_20131112_203137_inLine +BABEL_OP2_302_95583_20131112_203137_outLine +BABEL_OP2_302_96842_20140131_154710_inLine +BABEL_OP2_302_96842_20140131_154710_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/eval.list b/egs/babel/s5d/conf/lists/302-kazakh/eval.list new file mode 100644 index 00000000000..cf23788087e --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/eval.list @@ -0,0 +1,191 @@ +BABEL_OP2_302_10416_20131210_035651_inLine +BABEL_OP2_302_10416_20131210_035651_outLine +BABEL_OP2_302_11096_20140219_220112_inLine +BABEL_OP2_302_11096_20140219_220112_outLine +BABEL_OP2_302_12916_20131107_171154_inLine +BABEL_OP2_302_12916_20131107_171154_outLine +BABEL_OP2_302_15216_20140219_211720_inLine +BABEL_OP2_302_15216_20140219_211720_outLine +BABEL_OP2_302_16787_20131207_203127_inLine +BABEL_OP2_302_16787_20131207_203127_outLine +BABEL_OP2_302_17582_20140215_204647_inLine +BABEL_OP2_302_17582_20140215_204647_outLine +BABEL_OP2_302_17751_20140216_211124_inLine +BABEL_OP2_302_17751_20140216_211124_outLine +BABEL_OP2_302_18291_20140215_182410_inLine +BABEL_OP2_302_18291_20140215_182410_outLine +BABEL_OP2_302_18863_20140118_154802_inLine +BABEL_OP2_302_18863_20140118_154802_outLine +BABEL_OP2_302_19545_20131213_220625_inLine +BABEL_OP2_302_19545_20131213_220625_outLine +BABEL_OP2_302_19672_20131217_215636_inLine +BABEL_OP2_302_19672_20131217_215636_outLine +BABEL_OP2_302_19782_20140125_222442_inLine +BABEL_OP2_302_19782_20140125_222442_outLine +BABEL_OP2_302_20738_20140126_201239_inLine +BABEL_OP2_302_20738_20140126_201239_outLine +BABEL_OP2_302_22624_20140116_163601_inLine +BABEL_OP2_302_22624_20140116_163601_outLine +BABEL_OP2_302_22641_20131104_232148_inLine +BABEL_OP2_302_22641_20131104_232148_outLine +BABEL_OP2_302_23628_20131206_185035_inLine +BABEL_OP2_302_23628_20131206_185035_outLine +BABEL_OP2_302_23731_20131211_000104_inLine +BABEL_OP2_302_23731_20131211_000104_outLine +BABEL_OP2_302_23893_20140314_000251_inLine +BABEL_OP2_302_23893_20140314_000251_outLine +BABEL_OP2_302_24924_20140219_171405_inLine +BABEL_OP2_302_24924_20140219_171405_outLine +BABEL_OP2_302_28422_20131224_204108_inLine +BABEL_OP2_302_28422_20131224_204108_outLine +BABEL_OP2_302_28871_20131030_171711_inLine +BABEL_OP2_302_28871_20131030_171711_outLine +BABEL_OP2_302_29352_20140304_201752_inLine +BABEL_OP2_302_29352_20140304_201752_outLine +BABEL_OP2_302_29777_20140114_172507_inLine +BABEL_OP2_302_29777_20140114_172507_outLine +BABEL_OP2_302_31979_20131206_224314_inLine +BABEL_OP2_302_31979_20131206_224314_outLine +BABEL_OP2_302_32914_20140106_220002_inLine +BABEL_OP2_302_32914_20140106_220002_outLine +BABEL_OP2_302_33635_20131206_225838_inLine +BABEL_OP2_302_33635_20131206_225838_outLine +BABEL_OP2_302_33672_20131111_153638_inLine +BABEL_OP2_302_33672_20131111_153638_outLine +BABEL_OP2_302_37064_20131207_191407_inLine +BABEL_OP2_302_37064_20131207_191407_outLine +BABEL_OP2_302_37499_20140225_222508_inLine +BABEL_OP2_302_37499_20140225_222508_outLine +BABEL_OP2_302_38139_20140315_230332_inLine +BABEL_OP2_302_38139_20140315_230332_outLine +BABEL_OP2_302_38979_20140126_212312_inLine +BABEL_OP2_302_38979_20140126_212312_outLine +BABEL_OP2_302_41493_20131031_190908_inLine +BABEL_OP2_302_41493_20131031_190908_outLine +BABEL_OP2_302_42299_20140216_142852_inLine +BABEL_OP2_302_42299_20140216_142852_outLine +BABEL_OP2_302_42942_20131207_000752_inLine +BABEL_OP2_302_42942_20131207_000752_outLine +BABEL_OP2_302_43388_20131222_214138_inLine +BABEL_OP2_302_43388_20131222_214138_outLine +BABEL_OP2_302_45777_20131209_205207_inLine +BABEL_OP2_302_45777_20131209_205207_outLine +BABEL_OP2_302_46974_20140108_014337_inLine +BABEL_OP2_302_46974_20140108_014337_outLine +BABEL_OP2_302_47877_20140118_204004_inLine +BABEL_OP2_302_47877_20140118_204004_outLine +BABEL_OP2_302_48016_20140220_174426_inLine +BABEL_OP2_302_48016_20140220_174426_outLine +BABEL_OP2_302_49775_20131103_031204_inLine +BABEL_OP2_302_49775_20131103_031204_outLine +BABEL_OP2_302_49902_20131218_203252_inLine +BABEL_OP2_302_49902_20131218_203252_outLine +BABEL_OP2_302_52025_20131108_191032_inLine +BABEL_OP2_302_52025_20131108_191032_outLine +BABEL_OP2_302_52025_20131108_193401_inLine +BABEL_OP2_302_52025_20131108_193401_outLine +BABEL_OP2_302_54744_20131111_235401_inLine +BABEL_OP2_302_54744_20131111_235401_outLine +BABEL_OP2_302_55742_20131118_154051_inLine +BABEL_OP2_302_55742_20131118_154051_outLine +BABEL_OP2_302_56019_20140226_155123_inLine +BABEL_OP2_302_56019_20140226_155123_outLine +BABEL_OP2_302_56370_20131120_230147_inLine +BABEL_OP2_302_56370_20131120_230147_outLine +BABEL_OP2_302_56429_20131117_181816_inLine +BABEL_OP2_302_56429_20131117_181816_outLine +BABEL_OP2_302_56523_20131215_162313_inLine +BABEL_OP2_302_56523_20131215_162313_outLine +BABEL_OP2_302_57219_20140218_190044_inLine +BABEL_OP2_302_57219_20140218_190044_outLine +BABEL_OP2_302_57650_20140126_224015_inLine +BABEL_OP2_302_57650_20140126_224015_outLine +BABEL_OP2_302_58815_20140125_201759_inLine +BABEL_OP2_302_58815_20140125_201759_outLine +BABEL_OP2_302_60836_20131115_015627_inLine +BABEL_OP2_302_60836_20131115_015627_outLine +BABEL_OP2_302_61219_20131128_233326_inLine +BABEL_OP2_302_61219_20131128_233326_outLine +BABEL_OP2_302_62286_20131214_174209_inLine +BABEL_OP2_302_62286_20131214_174209_outLine +BABEL_OP2_302_63481_20131105_213305_inLine +BABEL_OP2_302_63481_20131105_213305_outLine +BABEL_OP2_302_64759_20131107_153706_inLine +BABEL_OP2_302_66967_20131125_200431_inLine +BABEL_OP2_302_66967_20131125_200431_outLine +BABEL_OP2_302_66967_20131125_201605_inLine +BABEL_OP2_302_66967_20131125_201605_outLine +BABEL_OP2_302_66967_20131125_202216_inLine +BABEL_OP2_302_66967_20131125_202216_outLine +BABEL_OP2_302_67066_20140215_220827_inLine +BABEL_OP2_302_67066_20140215_220827_outLine +BABEL_OP2_302_71404_20131128_225018_inLine +BABEL_OP2_302_71404_20131128_225018_outLine +BABEL_OP2_302_71780_20131121_222518_inLine +BABEL_OP2_302_71780_20131121_222518_outLine +BABEL_OP2_302_73042_20131115_165006_inLine +BABEL_OP2_302_73042_20131115_165006_outLine +BABEL_OP2_302_73119_20131128_222112_inLine +BABEL_OP2_302_73119_20131128_222112_outLine +BABEL_OP2_302_73622_20131117_223750_inLine +BABEL_OP2_302_73622_20131117_223750_outLine +BABEL_OP2_302_73622_20131117_230514_inLine +BABEL_OP2_302_73622_20131117_230514_outLine +BABEL_OP2_302_76372_20140121_204025_inLine +BABEL_OP2_302_76372_20140121_204025_outLine +BABEL_OP2_302_76773_20131117_001202_inLine +BABEL_OP2_302_76773_20131117_001202_outLine +BABEL_OP2_302_77112_20131127_221650_inLine +BABEL_OP2_302_77112_20131127_221650_outLine +BABEL_OP2_302_78604_20131117_205614_inLine +BABEL_OP2_302_78604_20131117_205614_outLine +BABEL_OP2_302_78604_20131117_210914_inLine +BABEL_OP2_302_78604_20131117_210914_outLine +BABEL_OP2_302_78749_20140305_221314_inLine +BABEL_OP2_302_78749_20140305_221314_outLine +BABEL_OP2_302_79107_20140223_160949_inLine +BABEL_OP2_302_79107_20140223_160949_outLine +BABEL_OP2_302_79505_20140221_191940_inLine +BABEL_OP2_302_79505_20140221_191940_outLine +BABEL_OP2_302_79571_20131224_210857_inLine +BABEL_OP2_302_79571_20131224_210857_outLine +BABEL_OP2_302_80881_20131130_200459_inLine +BABEL_OP2_302_80881_20131130_200459_outLine +BABEL_OP2_302_80897_20131226_221806_inLine +BABEL_OP2_302_80897_20131226_221806_outLine +BABEL_OP2_302_82966_20140203_200450_inLine +BABEL_OP2_302_82966_20140203_200450_outLine +BABEL_OP2_302_85179_20140113_180639_inLine +BABEL_OP2_302_85179_20140113_180639_outLine +BABEL_OP2_302_87280_20140123_211738_inLine +BABEL_OP2_302_87280_20140123_211738_outLine +BABEL_OP2_302_88686_20140131_165805_inLine +BABEL_OP2_302_88686_20140131_165805_outLine +BABEL_OP2_302_89372_20131106_214629_inLine +BABEL_OP2_302_89372_20131106_214629_outLine +BABEL_OP2_302_90417_20140215_195110_inLine +BABEL_OP2_302_90417_20140215_195110_outLine +BABEL_OP2_302_90935_20131207_172013_inLine +BABEL_OP2_302_90935_20131207_172013_outLine +BABEL_OP2_302_92281_20140312_223937_inLine +BABEL_OP2_302_92281_20140312_223937_outLine +BABEL_OP2_302_93224_20131219_004305_inLine +BABEL_OP2_302_93224_20131219_004305_outLine +BABEL_OP2_302_93861_20131208_195418_inLine +BABEL_OP2_302_93861_20131208_195418_outLine +BABEL_OP2_302_95663_20131031_164153_inLine +BABEL_OP2_302_95663_20131031_164153_outLine +BABEL_OP2_302_97097_20140121_214508_inLine +BABEL_OP2_302_97097_20140121_214508_outLine +BABEL_OP2_302_97220_20140216_214954_inLine +BABEL_OP2_302_97220_20140216_214954_outLine +BABEL_OP2_302_97264_20140203_010930_inLine +BABEL_OP2_302_97264_20140203_010930_outLine +BABEL_OP2_302_97988_20140226_180453_inLine +BABEL_OP2_302_97988_20140226_180453_outLine +BABEL_OP2_302_98888_20140224_195320_inLine +BABEL_OP2_302_98888_20140224_195320_outLine +BABEL_OP2_302_99344_20140317_184547_inLine +BABEL_OP2_302_99344_20140317_184547_outLine +BABEL_OP2_302_99516_20131109_182628_inLine +BABEL_OP2_302_99516_20131109_182628_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/evalpart1.list b/egs/babel/s5d/conf/lists/302-kazakh/evalpart1.list new file mode 100644 index 00000000000..402c6ca4cb0 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/evalpart1.list @@ -0,0 +1,61 @@ +BABEL_OP2_302_10416_20131210_035651_inLine +BABEL_OP2_302_10416_20131210_035651_outLine +BABEL_OP2_302_16787_20131207_203127_inLine +BABEL_OP2_302_16787_20131207_203127_outLine +BABEL_OP2_302_18863_20140118_154802_inLine +BABEL_OP2_302_18863_20140118_154802_outLine +BABEL_OP2_302_19672_20131217_215636_inLine +BABEL_OP2_302_19672_20131217_215636_outLine +BABEL_OP2_302_23628_20131206_185035_inLine +BABEL_OP2_302_23628_20131206_185035_outLine +BABEL_OP2_302_23731_20131211_000104_inLine +BABEL_OP2_302_23731_20131211_000104_outLine +BABEL_OP2_302_33635_20131206_225838_inLine +BABEL_OP2_302_33635_20131206_225838_outLine +BABEL_OP2_302_42942_20131207_000752_inLine +BABEL_OP2_302_42942_20131207_000752_outLine +BABEL_OP2_302_45777_20131209_205207_inLine +BABEL_OP2_302_45777_20131209_205207_outLine +BABEL_OP2_302_46974_20140108_014337_inLine +BABEL_OP2_302_46974_20140108_014337_outLine +BABEL_OP2_302_48016_20140220_174426_inLine +BABEL_OP2_302_48016_20140220_174426_outLine +BABEL_OP2_302_49775_20131103_031204_inLine +BABEL_OP2_302_49775_20131103_031204_outLine +BABEL_OP2_302_54744_20131111_235401_inLine +BABEL_OP2_302_54744_20131111_235401_outLine +BABEL_OP2_302_55742_20131118_154051_inLine +BABEL_OP2_302_55742_20131118_154051_outLine +BABEL_OP2_302_56019_20140226_155123_inLine +BABEL_OP2_302_56019_20140226_155123_outLine +BABEL_OP2_302_56429_20131117_181816_inLine +BABEL_OP2_302_56429_20131117_181816_outLine +BABEL_OP2_302_57650_20140126_224015_inLine +BABEL_OP2_302_57650_20140126_224015_outLine +BABEL_OP2_302_58815_20140125_201759_inLine +BABEL_OP2_302_58815_20140125_201759_outLine +BABEL_OP2_302_63481_20131105_213305_inLine +BABEL_OP2_302_63481_20131105_213305_outLine +BABEL_OP2_302_64759_20131107_153706_inLine +BABEL_OP2_302_71780_20131121_222518_inLine +BABEL_OP2_302_71780_20131121_222518_outLine +BABEL_OP2_302_73042_20131115_165006_inLine +BABEL_OP2_302_73042_20131115_165006_outLine +BABEL_OP2_302_73119_20131128_222112_inLine +BABEL_OP2_302_73119_20131128_222112_outLine +BABEL_OP2_302_76773_20131117_001202_inLine +BABEL_OP2_302_76773_20131117_001202_outLine +BABEL_OP2_302_78604_20131117_205614_inLine +BABEL_OP2_302_78604_20131117_205614_outLine +BABEL_OP2_302_78604_20131117_210914_inLine +BABEL_OP2_302_78604_20131117_210914_outLine +BABEL_OP2_302_80897_20131226_221806_inLine +BABEL_OP2_302_80897_20131226_221806_outLine +BABEL_OP2_302_89372_20131106_214629_inLine +BABEL_OP2_302_89372_20131106_214629_outLine +BABEL_OP2_302_92281_20140312_223937_inLine +BABEL_OP2_302_92281_20140312_223937_outLine +BABEL_OP2_302_97097_20140121_214508_inLine +BABEL_OP2_302_97097_20140121_214508_outLine +BABEL_OP2_302_98888_20140224_195320_inLine +BABEL_OP2_302_98888_20140224_195320_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/sub-train.list b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.list new file mode 100644 index 00000000000..ef82fb8fc17 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.list @@ -0,0 +1,130 @@ +BABEL_OP2_302_13483_20140111_145619_inLine +BABEL_OP2_302_13483_20140111_145619_outLine +BABEL_OP2_302_13792_20131105_160713_inLine +BABEL_OP2_302_13792_20131105_160713_outLine +BABEL_OP2_302_14137_20131205_201718_inLine +BABEL_OP2_302_14137_20131205_201718_outLine +BABEL_OP2_302_15638_20131227_190456_inLine +BABEL_OP2_302_15638_20131227_190456_outLine +BABEL_OP2_302_16467_20140125_193127_inLine +BABEL_OP2_302_16467_20140125_193127_outLine +BABEL_OP2_302_16886_20131209_211339_inLine +BABEL_OP2_302_16886_20131209_211339_outLine +BABEL_OP2_302_17113_20140216_165407_inLine +BABEL_OP2_302_17113_20140216_165407_outLine +BABEL_OP2_302_17567_20131227_223417_inLine +BABEL_OP2_302_17567_20131227_223417_outLine +BABEL_OP2_302_18118_20140312_010735_inLine +BABEL_OP2_302_18118_20140312_010735_outLine +BABEL_OP2_302_19722_20131106_001542_inLine +BABEL_OP2_302_19722_20131106_001542_outLine +BABEL_OP2_302_22280_20131214_220249_inLine +BABEL_OP2_302_22280_20131214_220249_outLine +BABEL_OP2_302_23505_20131113_214234_inLine +BABEL_OP2_302_23505_20131113_214234_outLine +BABEL_OP2_302_23505_20131113_215736_inLine +BABEL_OP2_302_23505_20131113_215736_outLine +BABEL_OP2_302_24323_20131207_212641_inLine +BABEL_OP2_302_24323_20131207_212641_outLine +BABEL_OP2_302_25085_20140216_161934_inLine +BABEL_OP2_302_25085_20140216_161934_outLine +BABEL_OP2_302_29135_20131031_201509_inLine +BABEL_OP2_302_29135_20131031_201509_outLine +BABEL_OP2_302_29416_20140125_222019_inLine +BABEL_OP2_302_29416_20140125_222019_outLine +BABEL_OP2_302_31490_20131120_230743_inLine +BABEL_OP2_302_31490_20131120_230743_outLine +BABEL_OP2_302_32287_20140316_185534_inLine +BABEL_OP2_302_32287_20140316_185534_outLine +BABEL_OP2_302_32301_20140108_212650_inLine +BABEL_OP2_302_32301_20140108_212650_outLine +BABEL_OP2_302_34197_20131203_173358_inLine +BABEL_OP2_302_34197_20131203_173358_outLine +BABEL_OP2_302_34477_20131205_030548_inLine +BABEL_OP2_302_34477_20131205_030548_outLine +BABEL_OP2_302_34477_20131205_035623_inLine +BABEL_OP2_302_34477_20131205_035623_outLine +BABEL_OP2_302_37598_20131218_200535_inLine +BABEL_OP2_302_37598_20131218_200535_outLine +BABEL_OP2_302_38588_20131216_211052_inLine +BABEL_OP2_302_38588_20131216_211052_outLine +BABEL_OP2_302_39744_20131031_182731_inLine +BABEL_OP2_302_39744_20131031_182731_outLine +BABEL_OP2_302_41233_20140111_195838_inLine +BABEL_OP2_302_41233_20140111_195838_outLine +BABEL_OP2_302_43646_20131204_185430_inLine +BABEL_OP2_302_43646_20131204_185430_outLine +BABEL_OP2_302_43920_20140312_031242_inLine +BABEL_OP2_302_43920_20140312_031242_outLine +BABEL_OP2_302_44619_20131212_234348_inLine +BABEL_OP2_302_44619_20131212_234348_outLine +BABEL_OP2_302_46763_20140225_183302_inLine +BABEL_OP2_302_46763_20140225_183302_outLine +BABEL_OP2_302_48243_20131128_221311_inLine +BABEL_OP2_302_48243_20131128_221311_outLine +BABEL_OP2_302_49912_20140217_201647_inLine +BABEL_OP2_302_49912_20140217_201647_outLine +BABEL_OP2_302_50779_20131219_172746_inLine +BABEL_OP2_302_50779_20131219_172746_outLine +BABEL_OP2_302_53492_20140124_221354_inLine +BABEL_OP2_302_53492_20140124_221354_outLine +BABEL_OP2_302_53492_20140124_231722_inLine +BABEL_OP2_302_53492_20140124_231722_outLine +BABEL_OP2_302_56306_20140115_190808_inLine +BABEL_OP2_302_56306_20140115_190808_outLine +BABEL_OP2_302_58850_20131209_231304_inLine +BABEL_OP2_302_58850_20131209_231304_outLine +BABEL_OP2_302_61888_20140127_161005_inLine +BABEL_OP2_302_61888_20140127_161005_outLine +BABEL_OP2_302_70386_20131203_030837_inLine +BABEL_OP2_302_70386_20131203_030837_outLine +BABEL_OP2_302_70452_20131219_032729_inLine +BABEL_OP2_302_70452_20131219_032729_outLine +BABEL_OP2_302_71038_20140119_172132_inLine +BABEL_OP2_302_71038_20140119_172132_outLine +BABEL_OP2_302_71067_20140130_194954_inLine +BABEL_OP2_302_71067_20140130_194954_outLine +BABEL_OP2_302_75223_20131130_211714_inLine +BABEL_OP2_302_75223_20131130_211714_outLine +BABEL_OP2_302_75223_20131130_212825_inLine +BABEL_OP2_302_75223_20131130_212825_outLine +BABEL_OP2_302_77126_20131111_012344_inLine +BABEL_OP2_302_77126_20131111_012344_outLine +BABEL_OP2_302_77242_20140217_184823_inLine +BABEL_OP2_302_77242_20140217_184823_outLine +BABEL_OP2_302_79898_20140310_200258_inLine +BABEL_OP2_302_79898_20140310_200258_outLine +BABEL_OP2_302_80781_20131207_183741_inLine +BABEL_OP2_302_80781_20131207_183741_outLine +BABEL_OP2_302_81213_20131118_175514_inLine +BABEL_OP2_302_81213_20131118_175514_outLine +BABEL_OP2_302_82138_20131206_045140_inLine +BABEL_OP2_302_82138_20131206_045140_outLine +BABEL_OP2_302_82145_20140301_225354_inLine +BABEL_OP2_302_82145_20140301_225354_outLine +BABEL_OP2_302_82224_20140203_014024_inLine +BABEL_OP2_302_82224_20140203_014024_outLine +BABEL_OP2_302_83436_20131106_170059_inLine +BABEL_OP2_302_83436_20131106_170059_outLine +BABEL_OP2_302_84408_20131207_204020_inLine +BABEL_OP2_302_84408_20131207_204020_outLine +BABEL_OP2_302_85010_20140316_222754_inLine +BABEL_OP2_302_85010_20140316_222754_outLine +BABEL_OP2_302_87298_20140130_191447_inLine +BABEL_OP2_302_87298_20140130_191447_outLine +BABEL_OP2_302_87693_20131121_041057_inLine +BABEL_OP2_302_87693_20131121_041057_outLine +BABEL_OP2_302_94803_20140313_225823_inLine +BABEL_OP2_302_94803_20140313_225823_outLine +BABEL_OP2_302_95598_20131101_172634_inLine +BABEL_OP2_302_95598_20131101_172634_outLine +BABEL_OP2_302_95598_20131101_175037_inLine +BABEL_OP2_302_95598_20131101_175037_outLine +BABEL_OP2_302_95903_20140303_002203_inLine +BABEL_OP2_302_95903_20140303_002203_outLine +BABEL_OP2_302_97731_20140114_201001_inLine +BABEL_OP2_302_97731_20140114_201001_outLine +BABEL_OP2_302_97772_20131107_223232_inLine +BABEL_OP2_302_97772_20131107_223232_outLine +BABEL_OP2_302_98489_20131204_181216_inLine +BABEL_OP2_302_98489_20131204_181216_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.untranscribed.list new file mode 100644 index 00000000000..668576c2888 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/sub-train.untranscribed.list @@ -0,0 +1,398 @@ +BABEL_OP2_302_10036_20131223_231808_inLine +BABEL_OP2_302_10036_20131223_231808_outLine +BABEL_OP2_302_10313_20140319_000910_inLine +BABEL_OP2_302_10313_20140319_000910_outLine +BABEL_OP2_302_10938_20140110_231500_inLine +BABEL_OP2_302_10938_20140110_231500_outLine +BABEL_OP2_302_10966_20131201_171745_inLine +BABEL_OP2_302_10966_20131201_171745_outLine +BABEL_OP2_302_11486_20140327_014542_inLine +BABEL_OP2_302_11486_20140327_014542_outLine +BABEL_OP2_302_11528_20140313_172050_inLine +BABEL_OP2_302_11528_20140313_172050_outLine +BABEL_OP2_302_11581_20131224_173459_inLine +BABEL_OP2_302_11581_20131224_173459_outLine +BABEL_OP2_302_11797_20131123_210739_inLine +BABEL_OP2_302_11797_20131123_210739_outLine +BABEL_OP2_302_12220_20131208_170136_inLine +BABEL_OP2_302_12220_20131208_170136_outLine +BABEL_OP2_302_12606_20140203_201955_inLine +BABEL_OP2_302_12606_20140203_201955_outLine +BABEL_OP2_302_12609_20140213_010711_inLine +BABEL_OP2_302_12609_20140213_010711_outLine +BABEL_OP2_302_12767_20131109_202610_inLine +BABEL_OP2_302_12767_20131109_202610_outLine +BABEL_OP2_302_12846_20140216_173021_inLine +BABEL_OP2_302_12846_20140216_173021_outLine +BABEL_OP2_302_12851_20131030_220616_inLine +BABEL_OP2_302_12851_20131030_220616_outLine +BABEL_OP2_302_13664_20131030_032900_inLine +BABEL_OP2_302_13664_20131030_032900_outLine +BABEL_OP2_302_13709_20140126_163818_inLine +BABEL_OP2_302_13709_20140126_163818_outLine +BABEL_OP2_302_14725_20131106_204535_inLine +BABEL_OP2_302_14725_20131106_204535_outLine +BABEL_OP2_302_14807_20131220_203507_inLine +BABEL_OP2_302_14807_20131220_203507_outLine +BABEL_OP2_302_14814_20131206_165156_inLine +BABEL_OP2_302_14814_20131206_165156_outLine +BABEL_OP2_302_14899_20131101_223556_inLine +BABEL_OP2_302_14899_20131101_223556_outLine +BABEL_OP2_302_14972_20131220_203939_inLine +BABEL_OP2_302_14972_20131220_203939_outLine +BABEL_OP2_302_15535_20131227_221937_inLine +BABEL_OP2_302_15535_20131227_221937_outLine +BABEL_OP2_302_15617_20140312_215158_inLine +BABEL_OP2_302_15617_20140312_215158_outLine +BABEL_OP2_302_15730_20131121_044516_inLine +BABEL_OP2_302_15730_20131121_044516_outLine +BABEL_OP2_302_16839_20140203_151410_inLine +BABEL_OP2_302_16839_20140203_151410_outLine +BABEL_OP2_302_17032_20140108_211239_inLine +BABEL_OP2_302_17032_20140108_211239_outLine +BABEL_OP2_302_17097_20140310_234246_inLine +BABEL_OP2_302_17097_20140310_234246_outLine +BABEL_OP2_302_17280_20131214_140641_inLine +BABEL_OP2_302_17280_20131214_140641_outLine +BABEL_OP2_302_17320_20140203_165125_inLine +BABEL_OP2_302_17320_20140203_165125_outLine +BABEL_OP2_302_18078_20140219_195739_inLine +BABEL_OP2_302_18078_20140219_195739_outLine +BABEL_OP2_302_18242_20140219_185647_inLine +BABEL_OP2_302_18242_20140219_185647_outLine +BABEL_OP2_302_19773_20140113_201049_inLine +BABEL_OP2_302_19773_20140113_201049_outLine +BABEL_OP2_302_20133_20131225_003913_inLine +BABEL_OP2_302_20133_20131225_003913_outLine +BABEL_OP2_302_20367_20140220_000514_inLine +BABEL_OP2_302_20367_20140220_000514_outLine +BABEL_OP2_302_20437_20140317_015757_inLine +BABEL_OP2_302_20437_20140317_015757_outLine +BABEL_OP2_302_20916_20131031_232512_inLine +BABEL_OP2_302_20916_20131031_232512_outLine +BABEL_OP2_302_20922_20140115_174224_inLine +BABEL_OP2_302_20922_20140115_174224_outLine +BABEL_OP2_302_20972_20140301_200910_inLine +BABEL_OP2_302_20972_20140301_200910_outLine +BABEL_OP2_302_20985_20131227_225613_inLine +BABEL_OP2_302_20985_20131227_225613_outLine +BABEL_OP2_302_20985_20131227_230755_inLine +BABEL_OP2_302_20985_20131227_230755_outLine +BABEL_OP2_302_21206_20131209_212818_inLine +BABEL_OP2_302_21206_20131209_212818_outLine +BABEL_OP2_302_21206_20131209_214221_inLine +BABEL_OP2_302_21206_20131209_214221_outLine +BABEL_OP2_302_21435_20140201_181751_inLine +BABEL_OP2_302_21435_20140201_181751_outLine +BABEL_OP2_302_21624_20140302_191929_inLine +BABEL_OP2_302_21624_20140302_191929_outLine +BABEL_OP2_302_21807_20131215_163416_inLine +BABEL_OP2_302_21807_20131215_163416_outLine +BABEL_OP2_302_22321_20131204_001445_inLine +BABEL_OP2_302_22321_20131204_001445_outLine +BABEL_OP2_302_22321_20131204_002854_inLine +BABEL_OP2_302_22321_20131204_002854_outLine +BABEL_OP2_302_22446_20131107_221527_inLine +BABEL_OP2_302_22446_20131107_221527_outLine +BABEL_OP2_302_23239_20131227_213345_inLine +BABEL_OP2_302_23239_20131227_213345_outLine +BABEL_OP2_302_23239_20131227_214733_inLine +BABEL_OP2_302_23239_20131227_214733_outLine +BABEL_OP2_302_23980_20131206_213027_inLine +BABEL_OP2_302_23980_20131206_213027_outLine +BABEL_OP2_302_24239_20140314_185042_inLine +BABEL_OP2_302_24239_20140314_185042_outLine +BABEL_OP2_302_24241_20140312_211507_inLine +BABEL_OP2_302_24241_20140312_211507_outLine +BABEL_OP2_302_24270_20131218_184807_inLine +BABEL_OP2_302_24270_20131218_184807_outLine +BABEL_OP2_302_24586_20140301_162559_inLine +BABEL_OP2_302_24586_20140301_162559_outLine +BABEL_OP2_302_24605_20131109_160432_inLine +BABEL_OP2_302_24605_20131109_160432_outLine +BABEL_OP2_302_24648_20140313_194015_inLine +BABEL_OP2_302_24648_20140313_194015_outLine +BABEL_OP2_302_24679_20131101_171953_inLine +BABEL_OP2_302_24679_20131101_171953_outLine +BABEL_OP2_302_24679_20131101_173035_inLine +BABEL_OP2_302_24679_20131101_173035_outLine +BABEL_OP2_302_24982_20131128_202029_inLine +BABEL_OP2_302_24982_20131128_202029_outLine +BABEL_OP2_302_24982_20131128_202537_inLine +BABEL_OP2_302_24982_20131128_202537_outLine +BABEL_OP2_302_24982_20131128_203436_inLine +BABEL_OP2_302_24982_20131128_203436_outLine +BABEL_OP2_302_25496_20140228_212748_inLine +BABEL_OP2_302_25496_20140228_212748_outLine +BABEL_OP2_302_25767_20131108_203252_inLine +BABEL_OP2_302_25767_20131108_203252_outLine +BABEL_OP2_302_25767_20131108_205755_inLine +BABEL_OP2_302_25767_20131108_205755_outLine +BABEL_OP2_302_25961_20131122_214450_inLine +BABEL_OP2_302_25961_20131122_214450_outLine +BABEL_OP2_302_26388_20140203_173156_inLine +BABEL_OP2_302_26388_20140203_173156_outLine +BABEL_OP2_302_26836_20131207_194346_inLine +BABEL_OP2_302_26836_20131207_194346_outLine +BABEL_OP2_302_27367_20140317_000858_inLine +BABEL_OP2_302_27367_20140317_000858_outLine +BABEL_OP2_302_28012_20140115_155940_inLine +BABEL_OP2_302_28012_20140115_155940_outLine +BABEL_OP2_302_28477_20140127_173004_inLine +BABEL_OP2_302_28477_20140127_173004_outLine +BABEL_OP2_302_28595_20140312_200036_inLine +BABEL_OP2_302_28595_20140312_200036_outLine +BABEL_OP2_302_28814_20140115_202820_inLine +BABEL_OP2_302_28814_20140115_202820_outLine +BABEL_OP2_302_29072_20131224_215008_inLine +BABEL_OP2_302_29072_20131224_215008_outLine +BABEL_OP2_302_29439_20140226_160155_inLine +BABEL_OP2_302_29439_20140226_160155_outLine +BABEL_OP2_302_30013_20140111_202103_inLine +BABEL_OP2_302_30013_20140111_202103_outLine +BABEL_OP2_302_30345_20140113_154634_inLine +BABEL_OP2_302_30345_20140113_154634_outLine +BABEL_OP2_302_30461_20140305_205327_inLine +BABEL_OP2_302_30461_20140305_205327_outLine +BABEL_OP2_302_30720_20140312_002038_inLine +BABEL_OP2_302_30720_20140312_002038_outLine +BABEL_OP2_302_31267_20140221_194733_inLine +BABEL_OP2_302_31267_20140221_194733_outLine +BABEL_OP2_302_32097_20131106_232714_inLine +BABEL_OP2_302_32097_20131106_232714_outLine +BABEL_OP2_302_32097_20131106_233937_inLine +BABEL_OP2_302_32097_20131106_233937_outLine +BABEL_OP2_302_34106_20131118_201548_inLine +BABEL_OP2_302_34106_20131118_201548_outLine +BABEL_OP2_302_34486_20140313_003302_inLine +BABEL_OP2_302_34486_20140313_003302_outLine +BABEL_OP2_302_34811_20131210_202739_inLine +BABEL_OP2_302_34811_20131210_202739_outLine +BABEL_OP2_302_34826_20140127_205243_inLine +BABEL_OP2_302_34826_20140127_205243_outLine +BABEL_OP2_302_35000_20140126_011711_inLine +BABEL_OP2_302_35000_20140126_011711_outLine +BABEL_OP2_302_35139_20131117_174341_inLine +BABEL_OP2_302_35139_20131117_174341_outLine +BABEL_OP2_302_36894_20131113_172242_inLine +BABEL_OP2_302_36894_20131113_172242_outLine +BABEL_OP2_302_37271_20131228_201109_inLine +BABEL_OP2_302_37271_20131228_201109_outLine +BABEL_OP2_302_37682_20131218_170241_inLine +BABEL_OP2_302_37682_20131218_170241_outLine +BABEL_OP2_302_39006_20140220_200207_inLine +BABEL_OP2_302_39006_20140220_200207_outLine +BABEL_OP2_302_39555_20140110_211809_inLine +BABEL_OP2_302_39555_20140110_211809_outLine +BABEL_OP2_302_39848_20131210_214951_inLine +BABEL_OP2_302_39848_20131210_214951_outLine +BABEL_OP2_302_41680_20131031_034941_inLine +BABEL_OP2_302_41680_20131031_034941_outLine +BABEL_OP2_302_42526_20140119_151507_inLine +BABEL_OP2_302_42526_20140119_151507_outLine +BABEL_OP2_302_42771_20131210_163330_inLine +BABEL_OP2_302_42771_20131210_163330_outLine +BABEL_OP2_302_43286_20131105_180949_inLine +BABEL_OP2_302_43286_20131105_180949_outLine +BABEL_OP2_302_43784_20131128_211932_inLine +BABEL_OP2_302_43784_20131128_211932_outLine +BABEL_OP2_302_43788_20131225_201206_inLine +BABEL_OP2_302_43788_20131225_201206_outLine +BABEL_OP2_302_44961_20131111_223956_inLine +BABEL_OP2_302_44961_20131111_223956_outLine +BABEL_OP2_302_45770_20131107_181651_inLine +BABEL_OP2_302_45770_20131107_181651_outLine +BABEL_OP2_302_46268_20131113_230054_inLine +BABEL_OP2_302_46268_20131113_230054_outLine +BABEL_OP2_302_46558_20131204_011205_inLine +BABEL_OP2_302_46558_20131204_011205_outLine +BABEL_OP2_302_46688_20131114_195414_inLine +BABEL_OP2_302_46688_20131114_195414_outLine +BABEL_OP2_302_46757_20131227_210756_inLine +BABEL_OP2_302_46757_20131227_210756_outLine +BABEL_OP2_302_46976_20131212_235226_inLine +BABEL_OP2_302_46976_20131212_235226_outLine +BABEL_OP2_302_47866_20140122_200544_inLine +BABEL_OP2_302_47866_20140122_200544_outLine +BABEL_OP2_302_48610_20131112_232839_inLine +BABEL_OP2_302_48610_20131112_232839_outLine +BABEL_OP2_302_50549_20140130_172816_inLine +BABEL_OP2_302_50549_20140130_172816_outLine +BABEL_OP2_302_50810_20131031_180316_inLine +BABEL_OP2_302_50810_20131031_180316_outLine +BABEL_OP2_302_51955_20131117_200909_inLine +BABEL_OP2_302_51955_20131117_200909_outLine +BABEL_OP2_302_52272_20131106_175931_inLine +BABEL_OP2_302_52272_20131106_175931_outLine +BABEL_OP2_302_52381_20140118_163935_inLine +BABEL_OP2_302_52381_20140118_163935_outLine +BABEL_OP2_302_52404_20131226_172656_inLine +BABEL_OP2_302_52404_20131226_172656_outLine +BABEL_OP2_302_53063_20140219_175252_inLine +BABEL_OP2_302_53063_20140219_175252_outLine +BABEL_OP2_302_54074_20131213_143818_inLine +BABEL_OP2_302_54074_20131213_143818_outLine +BABEL_OP2_302_54104_20131108_172927_inLine +BABEL_OP2_302_54104_20131108_172927_outLine +BABEL_OP2_302_54697_20140125_210815_inLine +BABEL_OP2_302_54697_20140125_210815_outLine +BABEL_OP2_302_54953_20131207_170139_inLine +BABEL_OP2_302_54953_20131207_170139_outLine +BABEL_OP2_302_55106_20140226_210229_inLine +BABEL_OP2_302_55106_20140226_210229_outLine +BABEL_OP2_302_57065_20140105_155451_inLine +BABEL_OP2_302_57065_20140105_155451_outLine +BABEL_OP2_302_57548_20131220_025554_inLine +BABEL_OP2_302_57548_20131220_025554_outLine +BABEL_OP2_302_57566_20140129_021108_inLine +BABEL_OP2_302_57566_20140129_021108_outLine +BABEL_OP2_302_58926_20131121_050015_inLine +BABEL_OP2_302_58926_20131121_050015_outLine +BABEL_OP2_302_59509_20140203_215611_inLine +BABEL_OP2_302_59509_20140203_215611_outLine +BABEL_OP2_302_59635_20140124_183551_inLine +BABEL_OP2_302_59635_20140124_183551_outLine +BABEL_OP2_302_59720_20131206_145023_inLine +BABEL_OP2_302_59720_20131206_145023_outLine +BABEL_OP2_302_59747_20131104_164054_inLine +BABEL_OP2_302_59747_20131104_164054_outLine +BABEL_OP2_302_60299_20140220_162147_inLine +BABEL_OP2_302_60299_20140220_162147_outLine +BABEL_OP2_302_63307_20131213_180556_inLine +BABEL_OP2_302_63307_20131213_180556_outLine +BABEL_OP2_302_63999_20140214_214838_inLine +BABEL_OP2_302_63999_20140214_214838_outLine +BABEL_OP2_302_64870_20131226_033837_inLine +BABEL_OP2_302_64870_20131226_033837_outLine +BABEL_OP2_302_66001_20131114_212059_inLine +BABEL_OP2_302_66001_20131114_212059_outLine +BABEL_OP2_302_66822_20131207_210025_inLine +BABEL_OP2_302_66822_20131207_210025_outLine +BABEL_OP2_302_67401_20131220_213115_inLine +BABEL_OP2_302_67401_20131220_213115_outLine +BABEL_OP2_302_67622_20131109_180009_inLine +BABEL_OP2_302_67622_20131109_180009_outLine +BABEL_OP2_302_67659_20131130_221743_inLine +BABEL_OP2_302_67659_20131130_221743_outLine +BABEL_OP2_302_67773_20140318_210730_inLine +BABEL_OP2_302_67773_20140318_210730_outLine +BABEL_OP2_302_68244_20140131_200512_inLine +BABEL_OP2_302_68244_20140131_200512_outLine +BABEL_OP2_302_68289_20140126_231552_inLine +BABEL_OP2_302_68289_20140126_231552_outLine +BABEL_OP2_302_68924_20140130_230433_inLine +BABEL_OP2_302_68924_20140130_230433_outLine +BABEL_OP2_302_69578_20131207_134320_inLine +BABEL_OP2_302_69578_20131207_134320_outLine +BABEL_OP2_302_69746_20140118_173307_inLine +BABEL_OP2_302_69746_20140118_173307_outLine +BABEL_OP2_302_70121_20131209_204053_inLine +BABEL_OP2_302_70121_20131209_204053_outLine +BABEL_OP2_302_70251_20131109_191513_inLine +BABEL_OP2_302_70251_20131109_191513_outLine +BABEL_OP2_302_70460_20140222_223545_inLine +BABEL_OP2_302_70460_20140222_223545_outLine +BABEL_OP2_302_70713_20140126_171356_inLine +BABEL_OP2_302_70713_20140126_171356_outLine +BABEL_OP2_302_70794_20131107_185831_inLine +BABEL_OP2_302_70794_20131107_185831_outLine +BABEL_OP2_302_72952_20140215_000239_inLine +BABEL_OP2_302_72952_20140215_000239_outLine +BABEL_OP2_302_73518_20140123_190347_inLine +BABEL_OP2_302_73518_20140123_190347_outLine +BABEL_OP2_302_74921_20131225_190044_inLine +BABEL_OP2_302_74921_20131225_190044_outLine +BABEL_OP2_302_75505_20131104_234450_inLine +BABEL_OP2_302_75505_20131104_234450_outLine +BABEL_OP2_302_75505_20131104_235752_inLine +BABEL_OP2_302_75505_20131104_235752_outLine +BABEL_OP2_302_76756_20131219_183439_inLine +BABEL_OP2_302_76756_20131219_183439_outLine +BABEL_OP2_302_77033_20140127_183412_inLine +BABEL_OP2_302_77033_20140127_183412_outLine +BABEL_OP2_302_77990_20131117_222127_inLine +BABEL_OP2_302_77990_20131117_222127_outLine +BABEL_OP2_302_78398_20131116_213051_inLine +BABEL_OP2_302_78398_20131116_213051_outLine +BABEL_OP2_302_78943_20131206_035541_inLine +BABEL_OP2_302_78943_20131206_035541_outLine +BABEL_OP2_302_78943_20131206_042746_inLine +BABEL_OP2_302_78943_20131206_042746_outLine +BABEL_OP2_302_79995_20140125_225240_inLine +BABEL_OP2_302_79995_20140125_225240_outLine +BABEL_OP2_302_80306_20131206_235538_inLine +BABEL_OP2_302_80306_20131206_235538_outLine +BABEL_OP2_302_81287_20140115_190105_inLine +BABEL_OP2_302_81287_20140115_190105_outLine +BABEL_OP2_302_81671_20140131_220121_inLine +BABEL_OP2_302_81671_20140131_220121_outLine +BABEL_OP2_302_82089_20131206_191938_inLine +BABEL_OP2_302_82089_20131206_191938_outLine +BABEL_OP2_302_82425_20131115_005742_inLine +BABEL_OP2_302_82425_20131115_005742_outLine +BABEL_OP2_302_82935_20140114_204802_inLine +BABEL_OP2_302_82935_20140114_204802_outLine +BABEL_OP2_302_83430_20140315_203750_inLine +BABEL_OP2_302_83430_20140315_203750_outLine +BABEL_OP2_302_83455_20131208_201956_inLine +BABEL_OP2_302_83455_20131208_201956_outLine +BABEL_OP2_302_84469_20140107_205046_inLine +BABEL_OP2_302_84469_20140107_205046_outLine +BABEL_OP2_302_84715_20140127_201518_inLine +BABEL_OP2_302_84715_20140127_201518_outLine +BABEL_OP2_302_84936_20140108_204108_inLine +BABEL_OP2_302_84936_20140108_204108_outLine +BABEL_OP2_302_86628_20140215_171431_inLine +BABEL_OP2_302_86628_20140215_171431_outLine +BABEL_OP2_302_86715_20140215_174540_inLine +BABEL_OP2_302_86715_20140215_174540_outLine +BABEL_OP2_302_87305_20140214_225515_inLine +BABEL_OP2_302_87305_20140214_225515_outLine +BABEL_OP2_302_87777_20140127_145958_inLine +BABEL_OP2_302_87777_20140127_145958_outLine +BABEL_OP2_302_88661_20131225_211835_inLine +BABEL_OP2_302_88661_20131225_211835_outLine +BABEL_OP2_302_88938_20140202_215623_inLine +BABEL_OP2_302_88938_20140202_215623_outLine +BABEL_OP2_302_89059_20140115_214308_inLine +BABEL_OP2_302_89059_20140115_214308_outLine +BABEL_OP2_302_90709_20131111_143953_inLine +BABEL_OP2_302_90709_20131111_143953_outLine +BABEL_OP2_302_92557_20140125_230505_inLine +BABEL_OP2_302_92557_20140125_230505_outLine +BABEL_OP2_302_92736_20140119_170328_inLine +BABEL_OP2_302_92736_20140119_170328_outLine +BABEL_OP2_302_92809_20131109_182045_inLine +BABEL_OP2_302_92809_20131109_182045_outLine +BABEL_OP2_302_92942_20131219_014744_inLine +BABEL_OP2_302_92942_20131219_014744_outLine +BABEL_OP2_302_93632_20140203_154221_inLine +BABEL_OP2_302_93632_20140203_154221_outLine +BABEL_OP2_302_93964_20131208_002934_inLine +BABEL_OP2_302_93964_20131208_002934_outLine +BABEL_OP2_302_94253_20131114_215945_inLine +BABEL_OP2_302_94253_20131114_215945_outLine +BABEL_OP2_302_94449_20140314_185933_inLine +BABEL_OP2_302_94449_20140314_185933_outLine +BABEL_OP2_302_95670_20131130_185901_inLine +BABEL_OP2_302_95670_20131130_185901_outLine +BABEL_OP2_302_96525_20140110_201817_inLine +BABEL_OP2_302_96525_20140110_201817_outLine +BABEL_OP2_302_96690_20131204_221739_inLine +BABEL_OP2_302_96690_20131204_221739_outLine +BABEL_OP2_302_96910_20140130_210316_inLine +BABEL_OP2_302_96910_20140130_210316_outLine +BABEL_OP2_302_97461_20131211_211339_inLine +BABEL_OP2_302_97461_20131211_211339_outLine +BABEL_OP2_302_97557_20131219_192714_inLine +BABEL_OP2_302_97557_20131219_192714_outLine +BABEL_OP2_302_97588_20131101_161834_inLine +BABEL_OP2_302_97588_20131101_161834_outLine +BABEL_OP2_302_97588_20131101_163947_inLine +BABEL_OP2_302_97588_20131101_163947_outLine +BABEL_OP2_302_98909_20131117_153948_inLine +BABEL_OP2_302_98909_20131117_153948_outLine +BABEL_OP2_302_99401_20131114_221114_inLine +BABEL_OP2_302_99401_20131114_221114_outLine +BABEL_OP2_302_99887_20140129_162421_inLine +BABEL_OP2_302_99887_20140129_162421_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/training.list b/egs/babel/s5d/conf/lists/302-kazakh/training.list new file mode 100644 index 00000000000..c2026850026 --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/training.list @@ -0,0 +1,528 @@ +BABEL_OP2_302_10036_20131223_231808_inLine +BABEL_OP2_302_10036_20131223_231808_outLine +BABEL_OP2_302_10313_20140319_000910_inLine +BABEL_OP2_302_10313_20140319_000910_outLine +BABEL_OP2_302_10938_20140110_231500_inLine +BABEL_OP2_302_10938_20140110_231500_outLine +BABEL_OP2_302_10966_20131201_171745_inLine +BABEL_OP2_302_10966_20131201_171745_outLine +BABEL_OP2_302_11486_20140327_014542_inLine +BABEL_OP2_302_11486_20140327_014542_outLine +BABEL_OP2_302_11528_20140313_172050_inLine +BABEL_OP2_302_11528_20140313_172050_outLine +BABEL_OP2_302_11581_20131224_173459_inLine +BABEL_OP2_302_11581_20131224_173459_outLine +BABEL_OP2_302_11797_20131123_210739_inLine +BABEL_OP2_302_11797_20131123_210739_outLine +BABEL_OP2_302_12220_20131208_170136_inLine +BABEL_OP2_302_12220_20131208_170136_outLine +BABEL_OP2_302_12606_20140203_201955_inLine +BABEL_OP2_302_12606_20140203_201955_outLine +BABEL_OP2_302_12609_20140213_010711_inLine +BABEL_OP2_302_12609_20140213_010711_outLine +BABEL_OP2_302_12767_20131109_202610_inLine +BABEL_OP2_302_12767_20131109_202610_outLine +BABEL_OP2_302_12846_20140216_173021_inLine +BABEL_OP2_302_12846_20140216_173021_outLine +BABEL_OP2_302_12851_20131030_220616_inLine +BABEL_OP2_302_12851_20131030_220616_outLine +BABEL_OP2_302_13483_20140111_145619_inLine +BABEL_OP2_302_13483_20140111_145619_outLine +BABEL_OP2_302_13664_20131030_032900_inLine +BABEL_OP2_302_13664_20131030_032900_outLine +BABEL_OP2_302_13709_20140126_163818_inLine +BABEL_OP2_302_13709_20140126_163818_outLine +BABEL_OP2_302_13792_20131105_160713_inLine +BABEL_OP2_302_13792_20131105_160713_outLine +BABEL_OP2_302_14137_20131205_201718_inLine +BABEL_OP2_302_14137_20131205_201718_outLine +BABEL_OP2_302_14725_20131106_204535_inLine +BABEL_OP2_302_14725_20131106_204535_outLine +BABEL_OP2_302_14807_20131220_203507_inLine +BABEL_OP2_302_14807_20131220_203507_outLine +BABEL_OP2_302_14814_20131206_165156_inLine +BABEL_OP2_302_14814_20131206_165156_outLine +BABEL_OP2_302_14899_20131101_223556_inLine +BABEL_OP2_302_14899_20131101_223556_outLine +BABEL_OP2_302_14972_20131220_203939_inLine +BABEL_OP2_302_14972_20131220_203939_outLine +BABEL_OP2_302_15535_20131227_221937_inLine +BABEL_OP2_302_15535_20131227_221937_outLine +BABEL_OP2_302_15617_20140312_215158_inLine +BABEL_OP2_302_15617_20140312_215158_outLine +BABEL_OP2_302_15638_20131227_190456_inLine +BABEL_OP2_302_15638_20131227_190456_outLine +BABEL_OP2_302_15730_20131121_044516_inLine +BABEL_OP2_302_15730_20131121_044516_outLine +BABEL_OP2_302_16467_20140125_193127_inLine +BABEL_OP2_302_16467_20140125_193127_outLine +BABEL_OP2_302_16839_20140203_151410_inLine +BABEL_OP2_302_16839_20140203_151410_outLine +BABEL_OP2_302_16886_20131209_211339_inLine +BABEL_OP2_302_16886_20131209_211339_outLine +BABEL_OP2_302_17032_20140108_211239_inLine +BABEL_OP2_302_17032_20140108_211239_outLine +BABEL_OP2_302_17097_20140310_234246_inLine +BABEL_OP2_302_17097_20140310_234246_outLine +BABEL_OP2_302_17113_20140216_165407_inLine +BABEL_OP2_302_17113_20140216_165407_outLine +BABEL_OP2_302_17280_20131214_140641_inLine +BABEL_OP2_302_17280_20131214_140641_outLine +BABEL_OP2_302_17320_20140203_165125_inLine +BABEL_OP2_302_17320_20140203_165125_outLine +BABEL_OP2_302_17567_20131227_223417_inLine +BABEL_OP2_302_17567_20131227_223417_outLine +BABEL_OP2_302_18078_20140219_195739_inLine +BABEL_OP2_302_18078_20140219_195739_outLine +BABEL_OP2_302_18118_20140312_010735_inLine +BABEL_OP2_302_18118_20140312_010735_outLine +BABEL_OP2_302_18242_20140219_185647_inLine +BABEL_OP2_302_18242_20140219_185647_outLine +BABEL_OP2_302_19722_20131106_001542_inLine +BABEL_OP2_302_19722_20131106_001542_outLine +BABEL_OP2_302_19773_20140113_201049_inLine +BABEL_OP2_302_19773_20140113_201049_outLine +BABEL_OP2_302_20133_20131225_003913_inLine +BABEL_OP2_302_20133_20131225_003913_outLine +BABEL_OP2_302_20367_20140220_000514_inLine +BABEL_OP2_302_20367_20140220_000514_outLine +BABEL_OP2_302_20437_20140317_015757_inLine +BABEL_OP2_302_20437_20140317_015757_outLine +BABEL_OP2_302_20916_20131031_232512_inLine +BABEL_OP2_302_20916_20131031_232512_outLine +BABEL_OP2_302_20922_20140115_174224_inLine +BABEL_OP2_302_20922_20140115_174224_outLine +BABEL_OP2_302_20972_20140301_200910_inLine +BABEL_OP2_302_20972_20140301_200910_outLine +BABEL_OP2_302_20985_20131227_225613_inLine +BABEL_OP2_302_20985_20131227_225613_outLine +BABEL_OP2_302_20985_20131227_230755_inLine +BABEL_OP2_302_20985_20131227_230755_outLine +BABEL_OP2_302_21206_20131209_212818_inLine +BABEL_OP2_302_21206_20131209_212818_outLine +BABEL_OP2_302_21206_20131209_214221_inLine +BABEL_OP2_302_21206_20131209_214221_outLine +BABEL_OP2_302_21435_20140201_181751_inLine +BABEL_OP2_302_21435_20140201_181751_outLine +BABEL_OP2_302_21624_20140302_191929_inLine +BABEL_OP2_302_21624_20140302_191929_outLine +BABEL_OP2_302_21807_20131215_163416_inLine +BABEL_OP2_302_21807_20131215_163416_outLine +BABEL_OP2_302_22280_20131214_220249_inLine +BABEL_OP2_302_22280_20131214_220249_outLine +BABEL_OP2_302_22321_20131204_001445_inLine +BABEL_OP2_302_22321_20131204_001445_outLine +BABEL_OP2_302_22321_20131204_002854_inLine +BABEL_OP2_302_22321_20131204_002854_outLine +BABEL_OP2_302_22446_20131107_221527_inLine +BABEL_OP2_302_22446_20131107_221527_outLine +BABEL_OP2_302_23239_20131227_213345_inLine +BABEL_OP2_302_23239_20131227_213345_outLine +BABEL_OP2_302_23239_20131227_214733_inLine +BABEL_OP2_302_23239_20131227_214733_outLine +BABEL_OP2_302_23505_20131113_214234_inLine +BABEL_OP2_302_23505_20131113_214234_outLine +BABEL_OP2_302_23505_20131113_215736_inLine +BABEL_OP2_302_23505_20131113_215736_outLine +BABEL_OP2_302_23980_20131206_213027_inLine +BABEL_OP2_302_23980_20131206_213027_outLine +BABEL_OP2_302_24239_20140314_185042_inLine +BABEL_OP2_302_24239_20140314_185042_outLine +BABEL_OP2_302_24241_20140312_211507_inLine +BABEL_OP2_302_24241_20140312_211507_outLine +BABEL_OP2_302_24270_20131218_184807_inLine +BABEL_OP2_302_24270_20131218_184807_outLine +BABEL_OP2_302_24323_20131207_212641_inLine +BABEL_OP2_302_24323_20131207_212641_outLine +BABEL_OP2_302_24586_20140301_162559_inLine +BABEL_OP2_302_24586_20140301_162559_outLine +BABEL_OP2_302_24605_20131109_160432_inLine +BABEL_OP2_302_24605_20131109_160432_outLine +BABEL_OP2_302_24648_20140313_194015_inLine +BABEL_OP2_302_24648_20140313_194015_outLine +BABEL_OP2_302_24679_20131101_171953_inLine +BABEL_OP2_302_24679_20131101_171953_outLine +BABEL_OP2_302_24679_20131101_173035_inLine +BABEL_OP2_302_24679_20131101_173035_outLine +BABEL_OP2_302_24982_20131128_202029_inLine +BABEL_OP2_302_24982_20131128_202029_outLine +BABEL_OP2_302_24982_20131128_202537_inLine +BABEL_OP2_302_24982_20131128_202537_outLine +BABEL_OP2_302_24982_20131128_203436_inLine +BABEL_OP2_302_24982_20131128_203436_outLine +BABEL_OP2_302_25085_20140216_161934_inLine +BABEL_OP2_302_25085_20140216_161934_outLine +BABEL_OP2_302_25496_20140228_212748_inLine +BABEL_OP2_302_25496_20140228_212748_outLine +BABEL_OP2_302_25767_20131108_203252_inLine +BABEL_OP2_302_25767_20131108_203252_outLine +BABEL_OP2_302_25767_20131108_205755_inLine +BABEL_OP2_302_25767_20131108_205755_outLine +BABEL_OP2_302_25961_20131122_214450_inLine +BABEL_OP2_302_25961_20131122_214450_outLine +BABEL_OP2_302_26388_20140203_173156_inLine +BABEL_OP2_302_26388_20140203_173156_outLine +BABEL_OP2_302_26836_20131207_194346_inLine +BABEL_OP2_302_26836_20131207_194346_outLine +BABEL_OP2_302_27367_20140317_000858_inLine +BABEL_OP2_302_27367_20140317_000858_outLine +BABEL_OP2_302_28012_20140115_155940_inLine +BABEL_OP2_302_28012_20140115_155940_outLine +BABEL_OP2_302_28477_20140127_173004_inLine +BABEL_OP2_302_28477_20140127_173004_outLine +BABEL_OP2_302_28595_20140312_200036_inLine +BABEL_OP2_302_28595_20140312_200036_outLine +BABEL_OP2_302_28814_20140115_202820_inLine +BABEL_OP2_302_28814_20140115_202820_outLine +BABEL_OP2_302_29072_20131224_215008_inLine +BABEL_OP2_302_29072_20131224_215008_outLine +BABEL_OP2_302_29135_20131031_201509_inLine +BABEL_OP2_302_29135_20131031_201509_outLine +BABEL_OP2_302_29416_20140125_222019_inLine +BABEL_OP2_302_29416_20140125_222019_outLine +BABEL_OP2_302_29439_20140226_160155_inLine +BABEL_OP2_302_29439_20140226_160155_outLine +BABEL_OP2_302_30013_20140111_202103_inLine +BABEL_OP2_302_30013_20140111_202103_outLine +BABEL_OP2_302_30345_20140113_154634_inLine +BABEL_OP2_302_30345_20140113_154634_outLine +BABEL_OP2_302_30461_20140305_205327_inLine +BABEL_OP2_302_30461_20140305_205327_outLine +BABEL_OP2_302_30720_20140312_002038_inLine +BABEL_OP2_302_30720_20140312_002038_outLine +BABEL_OP2_302_31267_20140221_194733_inLine +BABEL_OP2_302_31267_20140221_194733_outLine +BABEL_OP2_302_31490_20131120_230743_inLine +BABEL_OP2_302_31490_20131120_230743_outLine +BABEL_OP2_302_32097_20131106_232714_inLine +BABEL_OP2_302_32097_20131106_232714_outLine +BABEL_OP2_302_32097_20131106_233937_inLine +BABEL_OP2_302_32097_20131106_233937_outLine +BABEL_OP2_302_32287_20140316_185534_inLine +BABEL_OP2_302_32287_20140316_185534_outLine +BABEL_OP2_302_32301_20140108_212650_inLine +BABEL_OP2_302_32301_20140108_212650_outLine +BABEL_OP2_302_34106_20131118_201548_inLine +BABEL_OP2_302_34106_20131118_201548_outLine +BABEL_OP2_302_34197_20131203_173358_inLine +BABEL_OP2_302_34197_20131203_173358_outLine +BABEL_OP2_302_34477_20131205_030548_inLine +BABEL_OP2_302_34477_20131205_030548_outLine +BABEL_OP2_302_34477_20131205_035623_inLine +BABEL_OP2_302_34477_20131205_035623_outLine +BABEL_OP2_302_34486_20140313_003302_inLine +BABEL_OP2_302_34486_20140313_003302_outLine +BABEL_OP2_302_34811_20131210_202739_inLine +BABEL_OP2_302_34811_20131210_202739_outLine +BABEL_OP2_302_34826_20140127_205243_inLine +BABEL_OP2_302_34826_20140127_205243_outLine +BABEL_OP2_302_35000_20140126_011711_inLine +BABEL_OP2_302_35000_20140126_011711_outLine +BABEL_OP2_302_35139_20131117_174341_inLine +BABEL_OP2_302_35139_20131117_174341_outLine +BABEL_OP2_302_36894_20131113_172242_inLine +BABEL_OP2_302_36894_20131113_172242_outLine +BABEL_OP2_302_37271_20131228_201109_inLine +BABEL_OP2_302_37271_20131228_201109_outLine +BABEL_OP2_302_37598_20131218_200535_inLine +BABEL_OP2_302_37598_20131218_200535_outLine +BABEL_OP2_302_37682_20131218_170241_inLine +BABEL_OP2_302_37682_20131218_170241_outLine +BABEL_OP2_302_38588_20131216_211052_inLine +BABEL_OP2_302_38588_20131216_211052_outLine +BABEL_OP2_302_39006_20140220_200207_inLine +BABEL_OP2_302_39006_20140220_200207_outLine +BABEL_OP2_302_39555_20140110_211809_inLine +BABEL_OP2_302_39555_20140110_211809_outLine +BABEL_OP2_302_39744_20131031_182731_inLine +BABEL_OP2_302_39744_20131031_182731_outLine +BABEL_OP2_302_39848_20131210_214951_inLine +BABEL_OP2_302_39848_20131210_214951_outLine +BABEL_OP2_302_41233_20140111_195838_inLine +BABEL_OP2_302_41233_20140111_195838_outLine +BABEL_OP2_302_41680_20131031_034941_inLine +BABEL_OP2_302_41680_20131031_034941_outLine +BABEL_OP2_302_42526_20140119_151507_inLine +BABEL_OP2_302_42526_20140119_151507_outLine +BABEL_OP2_302_42771_20131210_163330_inLine +BABEL_OP2_302_42771_20131210_163330_outLine +BABEL_OP2_302_43286_20131105_180949_inLine +BABEL_OP2_302_43286_20131105_180949_outLine +BABEL_OP2_302_43646_20131204_185430_inLine +BABEL_OP2_302_43646_20131204_185430_outLine +BABEL_OP2_302_43784_20131128_211932_inLine +BABEL_OP2_302_43784_20131128_211932_outLine +BABEL_OP2_302_43788_20131225_201206_inLine +BABEL_OP2_302_43788_20131225_201206_outLine +BABEL_OP2_302_43920_20140312_031242_inLine +BABEL_OP2_302_43920_20140312_031242_outLine +BABEL_OP2_302_44619_20131212_234348_inLine +BABEL_OP2_302_44619_20131212_234348_outLine +BABEL_OP2_302_44961_20131111_223956_inLine +BABEL_OP2_302_44961_20131111_223956_outLine +BABEL_OP2_302_45770_20131107_181651_inLine +BABEL_OP2_302_45770_20131107_181651_outLine +BABEL_OP2_302_46268_20131113_230054_inLine +BABEL_OP2_302_46268_20131113_230054_outLine +BABEL_OP2_302_46558_20131204_011205_inLine +BABEL_OP2_302_46558_20131204_011205_outLine +BABEL_OP2_302_46688_20131114_195414_inLine +BABEL_OP2_302_46688_20131114_195414_outLine +BABEL_OP2_302_46757_20131227_210756_inLine +BABEL_OP2_302_46757_20131227_210756_outLine +BABEL_OP2_302_46763_20140225_183302_inLine +BABEL_OP2_302_46763_20140225_183302_outLine +BABEL_OP2_302_46976_20131212_235226_inLine +BABEL_OP2_302_46976_20131212_235226_outLine +BABEL_OP2_302_47866_20140122_200544_inLine +BABEL_OP2_302_47866_20140122_200544_outLine +BABEL_OP2_302_48243_20131128_221311_inLine +BABEL_OP2_302_48243_20131128_221311_outLine +BABEL_OP2_302_48610_20131112_232839_inLine +BABEL_OP2_302_48610_20131112_232839_outLine +BABEL_OP2_302_49912_20140217_201647_inLine +BABEL_OP2_302_49912_20140217_201647_outLine +BABEL_OP2_302_50549_20140130_172816_inLine +BABEL_OP2_302_50549_20140130_172816_outLine +BABEL_OP2_302_50779_20131219_172746_inLine +BABEL_OP2_302_50779_20131219_172746_outLine +BABEL_OP2_302_50810_20131031_180316_inLine +BABEL_OP2_302_50810_20131031_180316_outLine +BABEL_OP2_302_51955_20131117_200909_inLine +BABEL_OP2_302_51955_20131117_200909_outLine +BABEL_OP2_302_52272_20131106_175931_inLine +BABEL_OP2_302_52272_20131106_175931_outLine +BABEL_OP2_302_52381_20140118_163935_inLine +BABEL_OP2_302_52381_20140118_163935_outLine +BABEL_OP2_302_52404_20131226_172656_inLine +BABEL_OP2_302_52404_20131226_172656_outLine +BABEL_OP2_302_53063_20140219_175252_inLine +BABEL_OP2_302_53063_20140219_175252_outLine +BABEL_OP2_302_53492_20140124_221354_inLine +BABEL_OP2_302_53492_20140124_221354_outLine +BABEL_OP2_302_53492_20140124_231722_inLine +BABEL_OP2_302_53492_20140124_231722_outLine +BABEL_OP2_302_54074_20131213_143818_inLine +BABEL_OP2_302_54074_20131213_143818_outLine +BABEL_OP2_302_54104_20131108_172927_inLine +BABEL_OP2_302_54104_20131108_172927_outLine +BABEL_OP2_302_54697_20140125_210815_inLine +BABEL_OP2_302_54697_20140125_210815_outLine +BABEL_OP2_302_54953_20131207_170139_inLine +BABEL_OP2_302_54953_20131207_170139_outLine +BABEL_OP2_302_55106_20140226_210229_inLine +BABEL_OP2_302_55106_20140226_210229_outLine +BABEL_OP2_302_56306_20140115_190808_inLine +BABEL_OP2_302_56306_20140115_190808_outLine +BABEL_OP2_302_57065_20140105_155451_inLine +BABEL_OP2_302_57065_20140105_155451_outLine +BABEL_OP2_302_57548_20131220_025554_inLine +BABEL_OP2_302_57548_20131220_025554_outLine +BABEL_OP2_302_57566_20140129_021108_inLine +BABEL_OP2_302_57566_20140129_021108_outLine +BABEL_OP2_302_58850_20131209_231304_inLine +BABEL_OP2_302_58850_20131209_231304_outLine +BABEL_OP2_302_58926_20131121_050015_inLine +BABEL_OP2_302_58926_20131121_050015_outLine +BABEL_OP2_302_59509_20140203_215611_inLine +BABEL_OP2_302_59509_20140203_215611_outLine +BABEL_OP2_302_59635_20140124_183551_inLine +BABEL_OP2_302_59635_20140124_183551_outLine +BABEL_OP2_302_59720_20131206_145023_inLine +BABEL_OP2_302_59720_20131206_145023_outLine +BABEL_OP2_302_59747_20131104_164054_inLine +BABEL_OP2_302_59747_20131104_164054_outLine +BABEL_OP2_302_60299_20140220_162147_inLine +BABEL_OP2_302_60299_20140220_162147_outLine +BABEL_OP2_302_61888_20140127_161005_inLine +BABEL_OP2_302_61888_20140127_161005_outLine +BABEL_OP2_302_63307_20131213_180556_inLine +BABEL_OP2_302_63307_20131213_180556_outLine +BABEL_OP2_302_63999_20140214_214838_inLine +BABEL_OP2_302_63999_20140214_214838_outLine +BABEL_OP2_302_64870_20131226_033837_inLine +BABEL_OP2_302_64870_20131226_033837_outLine +BABEL_OP2_302_66001_20131114_212059_inLine +BABEL_OP2_302_66001_20131114_212059_outLine +BABEL_OP2_302_66822_20131207_210025_inLine +BABEL_OP2_302_66822_20131207_210025_outLine +BABEL_OP2_302_67401_20131220_213115_inLine +BABEL_OP2_302_67401_20131220_213115_outLine +BABEL_OP2_302_67622_20131109_180009_inLine +BABEL_OP2_302_67622_20131109_180009_outLine +BABEL_OP2_302_67659_20131130_221743_inLine +BABEL_OP2_302_67659_20131130_221743_outLine +BABEL_OP2_302_67773_20140318_210730_inLine +BABEL_OP2_302_67773_20140318_210730_outLine +BABEL_OP2_302_68244_20140131_200512_inLine +BABEL_OP2_302_68244_20140131_200512_outLine +BABEL_OP2_302_68289_20140126_231552_inLine +BABEL_OP2_302_68289_20140126_231552_outLine +BABEL_OP2_302_68924_20140130_230433_inLine +BABEL_OP2_302_68924_20140130_230433_outLine +BABEL_OP2_302_69578_20131207_134320_inLine +BABEL_OP2_302_69578_20131207_134320_outLine +BABEL_OP2_302_69746_20140118_173307_inLine +BABEL_OP2_302_69746_20140118_173307_outLine +BABEL_OP2_302_70121_20131209_204053_inLine +BABEL_OP2_302_70121_20131209_204053_outLine +BABEL_OP2_302_70251_20131109_191513_inLine +BABEL_OP2_302_70251_20131109_191513_outLine +BABEL_OP2_302_70386_20131203_030837_inLine +BABEL_OP2_302_70386_20131203_030837_outLine +BABEL_OP2_302_70452_20131219_032729_inLine +BABEL_OP2_302_70452_20131219_032729_outLine +BABEL_OP2_302_70460_20140222_223545_inLine +BABEL_OP2_302_70460_20140222_223545_outLine +BABEL_OP2_302_70713_20140126_171356_inLine +BABEL_OP2_302_70713_20140126_171356_outLine +BABEL_OP2_302_70794_20131107_185831_inLine +BABEL_OP2_302_70794_20131107_185831_outLine +BABEL_OP2_302_71038_20140119_172132_inLine +BABEL_OP2_302_71038_20140119_172132_outLine +BABEL_OP2_302_71067_20140130_194954_inLine +BABEL_OP2_302_71067_20140130_194954_outLine +BABEL_OP2_302_72952_20140215_000239_inLine +BABEL_OP2_302_72952_20140215_000239_outLine +BABEL_OP2_302_73518_20140123_190347_inLine +BABEL_OP2_302_73518_20140123_190347_outLine +BABEL_OP2_302_74921_20131225_190044_inLine +BABEL_OP2_302_74921_20131225_190044_outLine +BABEL_OP2_302_75223_20131130_211714_inLine +BABEL_OP2_302_75223_20131130_211714_outLine +BABEL_OP2_302_75223_20131130_212825_inLine +BABEL_OP2_302_75223_20131130_212825_outLine +BABEL_OP2_302_75505_20131104_234450_inLine +BABEL_OP2_302_75505_20131104_234450_outLine +BABEL_OP2_302_75505_20131104_235752_inLine +BABEL_OP2_302_75505_20131104_235752_outLine +BABEL_OP2_302_76756_20131219_183439_inLine +BABEL_OP2_302_76756_20131219_183439_outLine +BABEL_OP2_302_77033_20140127_183412_inLine +BABEL_OP2_302_77033_20140127_183412_outLine +BABEL_OP2_302_77126_20131111_012344_inLine +BABEL_OP2_302_77126_20131111_012344_outLine +BABEL_OP2_302_77242_20140217_184823_inLine +BABEL_OP2_302_77242_20140217_184823_outLine +BABEL_OP2_302_77990_20131117_222127_inLine +BABEL_OP2_302_77990_20131117_222127_outLine +BABEL_OP2_302_78398_20131116_213051_inLine +BABEL_OP2_302_78398_20131116_213051_outLine +BABEL_OP2_302_78943_20131206_035541_inLine +BABEL_OP2_302_78943_20131206_035541_outLine +BABEL_OP2_302_78943_20131206_042746_inLine +BABEL_OP2_302_78943_20131206_042746_outLine +BABEL_OP2_302_79898_20140310_200258_inLine +BABEL_OP2_302_79898_20140310_200258_outLine +BABEL_OP2_302_79995_20140125_225240_inLine +BABEL_OP2_302_79995_20140125_225240_outLine +BABEL_OP2_302_80306_20131206_235538_inLine +BABEL_OP2_302_80306_20131206_235538_outLine +BABEL_OP2_302_80781_20131207_183741_inLine +BABEL_OP2_302_80781_20131207_183741_outLine +BABEL_OP2_302_81213_20131118_175514_inLine +BABEL_OP2_302_81213_20131118_175514_outLine +BABEL_OP2_302_81287_20140115_190105_inLine +BABEL_OP2_302_81287_20140115_190105_outLine +BABEL_OP2_302_81671_20140131_220121_inLine +BABEL_OP2_302_81671_20140131_220121_outLine +BABEL_OP2_302_82089_20131206_191938_inLine +BABEL_OP2_302_82089_20131206_191938_outLine +BABEL_OP2_302_82138_20131206_045140_inLine +BABEL_OP2_302_82138_20131206_045140_outLine +BABEL_OP2_302_82145_20140301_225354_inLine +BABEL_OP2_302_82145_20140301_225354_outLine +BABEL_OP2_302_82224_20140203_014024_inLine +BABEL_OP2_302_82224_20140203_014024_outLine +BABEL_OP2_302_82425_20131115_005742_inLine +BABEL_OP2_302_82425_20131115_005742_outLine +BABEL_OP2_302_82935_20140114_204802_inLine +BABEL_OP2_302_82935_20140114_204802_outLine +BABEL_OP2_302_83430_20140315_203750_inLine +BABEL_OP2_302_83430_20140315_203750_outLine +BABEL_OP2_302_83436_20131106_170059_inLine +BABEL_OP2_302_83436_20131106_170059_outLine +BABEL_OP2_302_83455_20131208_201956_inLine +BABEL_OP2_302_83455_20131208_201956_outLine +BABEL_OP2_302_84408_20131207_204020_inLine +BABEL_OP2_302_84408_20131207_204020_outLine +BABEL_OP2_302_84469_20140107_205046_inLine +BABEL_OP2_302_84469_20140107_205046_outLine +BABEL_OP2_302_84715_20140127_201518_inLine +BABEL_OP2_302_84715_20140127_201518_outLine +BABEL_OP2_302_84936_20140108_204108_inLine +BABEL_OP2_302_84936_20140108_204108_outLine +BABEL_OP2_302_85010_20140316_222754_inLine +BABEL_OP2_302_85010_20140316_222754_outLine +BABEL_OP2_302_86628_20140215_171431_inLine +BABEL_OP2_302_86628_20140215_171431_outLine +BABEL_OP2_302_86715_20140215_174540_inLine +BABEL_OP2_302_86715_20140215_174540_outLine +BABEL_OP2_302_87298_20140130_191447_inLine +BABEL_OP2_302_87298_20140130_191447_outLine +BABEL_OP2_302_87305_20140214_225515_inLine +BABEL_OP2_302_87305_20140214_225515_outLine +BABEL_OP2_302_87693_20131121_041057_inLine +BABEL_OP2_302_87693_20131121_041057_outLine +BABEL_OP2_302_87777_20140127_145958_inLine +BABEL_OP2_302_87777_20140127_145958_outLine +BABEL_OP2_302_88661_20131225_211835_inLine +BABEL_OP2_302_88661_20131225_211835_outLine +BABEL_OP2_302_88938_20140202_215623_inLine +BABEL_OP2_302_88938_20140202_215623_outLine +BABEL_OP2_302_89059_20140115_214308_inLine +BABEL_OP2_302_89059_20140115_214308_outLine +BABEL_OP2_302_90709_20131111_143953_inLine +BABEL_OP2_302_90709_20131111_143953_outLine +BABEL_OP2_302_92557_20140125_230505_inLine +BABEL_OP2_302_92557_20140125_230505_outLine +BABEL_OP2_302_92736_20140119_170328_inLine +BABEL_OP2_302_92736_20140119_170328_outLine +BABEL_OP2_302_92809_20131109_182045_inLine +BABEL_OP2_302_92809_20131109_182045_outLine +BABEL_OP2_302_92942_20131219_014744_inLine +BABEL_OP2_302_92942_20131219_014744_outLine +BABEL_OP2_302_93632_20140203_154221_inLine +BABEL_OP2_302_93632_20140203_154221_outLine +BABEL_OP2_302_93964_20131208_002934_inLine +BABEL_OP2_302_93964_20131208_002934_outLine +BABEL_OP2_302_94253_20131114_215945_inLine +BABEL_OP2_302_94253_20131114_215945_outLine +BABEL_OP2_302_94449_20140314_185933_inLine +BABEL_OP2_302_94449_20140314_185933_outLine +BABEL_OP2_302_94803_20140313_225823_inLine +BABEL_OP2_302_94803_20140313_225823_outLine +BABEL_OP2_302_95598_20131101_172634_inLine +BABEL_OP2_302_95598_20131101_172634_outLine +BABEL_OP2_302_95598_20131101_175037_inLine +BABEL_OP2_302_95598_20131101_175037_outLine +BABEL_OP2_302_95670_20131130_185901_inLine +BABEL_OP2_302_95670_20131130_185901_outLine +BABEL_OP2_302_95903_20140303_002203_inLine +BABEL_OP2_302_95903_20140303_002203_outLine +BABEL_OP2_302_96525_20140110_201817_inLine +BABEL_OP2_302_96525_20140110_201817_outLine +BABEL_OP2_302_96690_20131204_221739_inLine +BABEL_OP2_302_96690_20131204_221739_outLine +BABEL_OP2_302_96910_20140130_210316_inLine +BABEL_OP2_302_96910_20140130_210316_outLine +BABEL_OP2_302_97461_20131211_211339_inLine +BABEL_OP2_302_97461_20131211_211339_outLine +BABEL_OP2_302_97557_20131219_192714_inLine +BABEL_OP2_302_97557_20131219_192714_outLine +BABEL_OP2_302_97588_20131101_161834_inLine +BABEL_OP2_302_97588_20131101_161834_outLine +BABEL_OP2_302_97588_20131101_163947_inLine +BABEL_OP2_302_97588_20131101_163947_outLine +BABEL_OP2_302_97731_20140114_201001_inLine +BABEL_OP2_302_97731_20140114_201001_outLine +BABEL_OP2_302_97772_20131107_223232_inLine +BABEL_OP2_302_97772_20131107_223232_outLine +BABEL_OP2_302_98489_20131204_181216_inLine +BABEL_OP2_302_98489_20131204_181216_outLine +BABEL_OP2_302_98909_20131117_153948_inLine +BABEL_OP2_302_98909_20131117_153948_outLine +BABEL_OP2_302_99401_20131114_221114_inLine +BABEL_OP2_302_99401_20131114_221114_outLine +BABEL_OP2_302_99887_20140129_162421_inLine +BABEL_OP2_302_99887_20140129_162421_outLine diff --git a/egs/babel/s5d/conf/lists/302-kazakh/untranscribed-training.list b/egs/babel/s5d/conf/lists/302-kazakh/untranscribed-training.list new file mode 100644 index 00000000000..52a1f686ddc --- /dev/null +++ b/egs/babel/s5d/conf/lists/302-kazakh/untranscribed-training.list @@ -0,0 +1,569 @@ +BABEL_OP2_302_11723_20140320_021007_inLine +BABEL_OP2_302_11723_20140320_021007_outLine +BABEL_OP2_302_11723_20140320_030801_inLine +BABEL_OP2_302_11723_20140320_030801_outLine +BABEL_OP2_302_11768_20140319_010224_inLine +BABEL_OP2_302_11768_20140319_010224_outLine +BABEL_OP2_302_13776_20140225_201416_inLine +BABEL_OP2_302_13776_20140225_201416_outLine +BABEL_OP2_302_13776_20140225_203014_inLine +BABEL_OP2_302_13776_20140225_203014_outLine +BABEL_OP2_302_14179_20131218_222613_inLine +BABEL_OP2_302_14179_20131218_222613_outLine +BABEL_OP2_302_14179_20131218_223829_inLine +BABEL_OP2_302_14179_20131218_223829_outLine +BABEL_OP2_302_14179_20131218_224616_inLine +BABEL_OP2_302_14179_20131218_224616_outLine +BABEL_OP2_302_14537_20140204_020213_inLine +BABEL_OP2_302_14537_20140204_020213_outLine +BABEL_OP2_302_14575_20140317_000954_inLine +BABEL_OP2_302_14575_20140317_000954_outLine +BABEL_OP2_302_15322_20140223_032907_inLine +BABEL_OP2_302_15322_20140223_032907_outLine +BABEL_OP2_302_15466_20140311_190939_inLine +BABEL_OP2_302_15466_20140311_190939_outLine +BABEL_OP2_302_15749_20140106_224305_inLine +BABEL_OP2_302_15749_20140106_224305_outLine +BABEL_OP2_302_15869_20140319_024910_inLine +BABEL_OP2_302_15869_20140319_024910_outLine +BABEL_OP2_302_15926_20131223_153308_inLine +BABEL_OP2_302_15926_20131223_153308_outLine +BABEL_OP2_302_15926_20131223_154557_inLine +BABEL_OP2_302_15926_20131223_154557_outLine +BABEL_OP2_302_15926_20131223_155325_inLine +BABEL_OP2_302_15926_20131223_155325_outLine +BABEL_OP2_302_15926_20131223_160015_inLine +BABEL_OP2_302_15926_20131223_160015_outLine +BABEL_OP2_302_15926_20131223_160509_inLine +BABEL_OP2_302_15926_20131223_160509_outLine +BABEL_OP2_302_15926_20131223_161316_inLine +BABEL_OP2_302_15926_20131223_161316_outLine +BABEL_OP2_302_17115_20140326_194730_inLine +BABEL_OP2_302_17115_20140326_194730_outLine +BABEL_OP2_302_17420_20140222_233405_inLine +BABEL_OP2_302_17420_20140222_233405_outLine +BABEL_OP2_302_17615_20140107_025352_inLine +BABEL_OP2_302_17615_20140107_025352_outLine +BABEL_OP2_302_19440_20140328_012334_inLine +BABEL_OP2_302_19440_20140328_012334_outLine +BABEL_OP2_302_20800_20131116_165644_inLine +BABEL_OP2_302_20800_20131116_165644_outLine +BABEL_OP2_302_20896_20140319_002117_inLine +BABEL_OP2_302_20896_20140319_002117_outLine +BABEL_OP2_302_21244_20140221_181738_inLine +BABEL_OP2_302_21244_20140221_181738_outLine +BABEL_OP2_302_21244_20140221_185615_inLine +BABEL_OP2_302_21244_20140221_185615_outLine +BABEL_OP2_302_21315_20140304_163357_inLine +BABEL_OP2_302_21315_20140304_163357_outLine +BABEL_OP2_302_21393_20140303_153741_inLine +BABEL_OP2_302_21393_20140303_153741_outLine +BABEL_OP2_302_22170_20140203_213129_inLine +BABEL_OP2_302_22170_20140203_213129_outLine +BABEL_OP2_302_22494_20131227_200715_inLine +BABEL_OP2_302_22918_20140203_004320_inLine +BABEL_OP2_302_22918_20140203_004320_outLine +BABEL_OP2_302_23092_20140218_224204_inLine +BABEL_OP2_302_23092_20140218_224204_outLine +BABEL_OP2_302_24033_20140131_000701_inLine +BABEL_OP2_302_24033_20140131_000701_outLine +BABEL_OP2_302_24209_20140328_231409_inLine +BABEL_OP2_302_24209_20140328_231409_outLine +BABEL_OP2_302_24587_20140318_225950_inLine +BABEL_OP2_302_24587_20140318_225950_outLine +BABEL_OP2_302_25719_20140129_155329_inLine +BABEL_OP2_302_25719_20140129_155329_outLine +BABEL_OP2_302_26507_20140308_203259_inLine +BABEL_OP2_302_26507_20140308_203259_outLine +BABEL_OP2_302_26574_20140201_202449_inLine +BABEL_OP2_302_26574_20140201_202449_outLine +BABEL_OP2_302_27218_20131115_211108_inLine +BABEL_OP2_302_27218_20131115_211108_outLine +BABEL_OP2_302_27841_20140124_002040_inLine +BABEL_OP2_302_27841_20140124_002040_outLine +BABEL_OP2_302_27841_20140124_003521_inLine +BABEL_OP2_302_27841_20140124_003521_outLine +BABEL_OP2_302_28538_20131218_015743_inLine +BABEL_OP2_302_28600_20140111_172326_inLine +BABEL_OP2_302_28600_20140111_172326_outLine +BABEL_OP2_302_28600_20140111_173213_inLine +BABEL_OP2_302_28600_20140111_173213_outLine +BABEL_OP2_302_28606_20140108_222927_inLine +BABEL_OP2_302_28606_20140108_222927_outLine +BABEL_OP2_302_28606_20140108_223822_inLine +BABEL_OP2_302_28606_20140108_223822_outLine +BABEL_OP2_302_28775_20131116_210510_outLine +BABEL_OP2_302_29076_20131214_155845_inLine +BABEL_OP2_302_29076_20131214_155845_outLine +BABEL_OP2_302_29076_20131214_160555_inLine +BABEL_OP2_302_29076_20131214_160555_outLine +BABEL_OP2_302_29323_20140130_190425_inLine +BABEL_OP2_302_29323_20140130_190425_outLine +BABEL_OP2_302_29404_20140214_174021_inLine +BABEL_OP2_302_29404_20140214_174021_outLine +BABEL_OP2_302_29643_20140213_000617_inLine +BABEL_OP2_302_29643_20140213_000617_outLine +BABEL_OP2_302_30974_20140315_192921_inLine +BABEL_OP2_302_30974_20140315_192921_outLine +BABEL_OP2_302_31182_20140107_213519_outLine +BABEL_OP2_302_31346_20131230_220709_inLine +BABEL_OP2_302_31346_20131230_221548_inLine +BABEL_OP2_302_31919_20140120_211340_inLine +BABEL_OP2_302_31919_20140120_211340_outLine +BABEL_OP2_302_32727_20140304_200331_inLine +BABEL_OP2_302_32727_20140304_200331_outLine +BABEL_OP2_302_32832_20140114_180910_inLine +BABEL_OP2_302_32832_20140114_180910_outLine +BABEL_OP2_302_32872_20140311_203050_inLine +BABEL_OP2_302_32872_20140311_203050_outLine +BABEL_OP2_302_32872_20140311_204413_inLine +BABEL_OP2_302_32872_20140311_204413_outLine +BABEL_OP2_302_32959_20140301_192344_inLine +BABEL_OP2_302_32959_20140301_192344_outLine +BABEL_OP2_302_33273_20131214_184246_inLine +BABEL_OP2_302_33273_20131214_184246_outLine +BABEL_OP2_302_33273_20131214_191106_inLine +BABEL_OP2_302_33273_20131214_191106_outLine +BABEL_OP2_302_34410_20140321_182956_inLine +BABEL_OP2_302_34410_20140321_182956_outLine +BABEL_OP2_302_34629_20140315_192329_inLine +BABEL_OP2_302_34629_20140315_192329_outLine +BABEL_OP2_302_35202_20131221_211514_inLine +BABEL_OP2_302_35202_20131221_211514_outLine +BABEL_OP2_302_35202_20131221_220228_inLine +BABEL_OP2_302_35202_20131221_220228_outLine +BABEL_OP2_302_35609_20140326_155717_inLine +BABEL_OP2_302_35609_20140326_155717_outLine +BABEL_OP2_302_35786_20140201_193528_inLine +BABEL_OP2_302_35786_20140201_193528_outLine +BABEL_OP2_302_36147_20140314_230249_inLine +BABEL_OP2_302_36147_20140314_230249_outLine +BABEL_OP2_302_36632_20140316_220512_inLine +BABEL_OP2_302_36632_20140316_220512_outLine +BABEL_OP2_302_36642_20140203_204149_inLine +BABEL_OP2_302_36642_20140203_204149_outLine +BABEL_OP2_302_37281_20131223_193947_inLine +BABEL_OP2_302_37281_20131223_193947_outLine +BABEL_OP2_302_38554_20131107_231324_inLine +BABEL_OP2_302_38554_20131107_231324_outLine +BABEL_OP2_302_39159_20131101_181154_inLine +BABEL_OP2_302_39159_20131101_181154_outLine +BABEL_OP2_302_39159_20131101_182621_inLine +BABEL_OP2_302_39159_20131101_182621_outLine +BABEL_OP2_302_39277_20140313_204841_inLine +BABEL_OP2_302_39277_20140313_204841_outLine +BABEL_OP2_302_39920_20140308_212443_inLine +BABEL_OP2_302_39920_20140308_212443_outLine +BABEL_OP2_302_40196_20140316_214624_inLine +BABEL_OP2_302_40196_20140316_214624_outLine +BABEL_OP2_302_40740_20140203_175842_inLine +BABEL_OP2_302_40740_20140203_175842_outLine +BABEL_OP2_302_41109_20140129_151014_outLine +BABEL_OP2_302_41109_20140129_151930_outLine +BABEL_OP2_302_41682_20140316_184028_inLine +BABEL_OP2_302_41682_20140316_184028_outLine +BABEL_OP2_302_41685_20140319_224731_inLine +BABEL_OP2_302_41685_20140319_224731_outLine +BABEL_OP2_302_41745_20131216_195331_inLine +BABEL_OP2_302_41745_20131216_195331_outLine +BABEL_OP2_302_42126_20140319_211544_inLine +BABEL_OP2_302_42126_20140319_211544_outLine +BABEL_OP2_302_42243_20131104_193524_inLine +BABEL_OP2_302_42243_20131104_193524_outLine +BABEL_OP2_302_42718_20140303_235926_inLine +BABEL_OP2_302_42718_20140303_235926_outLine +BABEL_OP2_302_42883_20140301_215140_inLine +BABEL_OP2_302_42883_20140301_215140_outLine +BABEL_OP2_302_42883_20140301_220413_inLine +BABEL_OP2_302_42883_20140301_220413_outLine +BABEL_OP2_302_43285_20131218_173818_inLine +BABEL_OP2_302_43285_20131218_173818_outLine +BABEL_OP2_302_43285_20131218_175248_inLine +BABEL_OP2_302_43285_20131218_175248_outLine +BABEL_OP2_302_43323_20140320_012405_inLine +BABEL_OP2_302_43323_20140320_012405_outLine +BABEL_OP2_302_43990_20140319_003408_inLine +BABEL_OP2_302_44255_20140203_221612_inLine +BABEL_OP2_302_44255_20140203_221612_outLine +BABEL_OP2_302_44681_20140316_231417_inLine +BABEL_OP2_302_44681_20140316_231417_outLine +BABEL_OP2_302_45106_20140130_195527_inLine +BABEL_OP2_302_45106_20140130_195527_outLine +BABEL_OP2_302_45140_20140204_000835_inLine +BABEL_OP2_302_45140_20140204_000835_outLine +BABEL_OP2_302_45374_20140316_190302_inLine +BABEL_OP2_302_45374_20140316_190302_outLine +BABEL_OP2_302_46066_20140131_180512_inLine +BABEL_OP2_302_46066_20140131_180512_outLine +BABEL_OP2_302_46315_20140112_051606_inLine +BABEL_OP2_302_46315_20140112_051606_outLine +BABEL_OP2_302_46315_20140112_053032_inLine +BABEL_OP2_302_46315_20140112_053032_outLine +BABEL_OP2_302_46333_20131106_193911_inLine +BABEL_OP2_302_46333_20131106_193911_outLine +BABEL_OP2_302_46389_20140317_000314_inLine +BABEL_OP2_302_46389_20140317_000314_outLine +BABEL_OP2_302_46589_20131218_200246_inLine +BABEL_OP2_302_47799_20140310_191802_inLine +BABEL_OP2_302_47799_20140310_191802_outLine +BABEL_OP2_302_48200_20140111_171610_inLine +BABEL_OP2_302_48200_20140111_171610_outLine +BABEL_OP2_302_48758_20140222_204731_inLine +BABEL_OP2_302_48758_20140222_204731_outLine +BABEL_OP2_302_49027_20140307_172629_inLine +BABEL_OP2_302_49027_20140307_172629_outLine +BABEL_OP2_302_49118_20140114_164903_inLine +BABEL_OP2_302_49118_20140114_164903_outLine +BABEL_OP2_302_49437_20140123_224810_inLine +BABEL_OP2_302_49437_20140123_224810_outLine +BABEL_OP2_302_49739_20140314_204410_inLine +BABEL_OP2_302_49739_20140314_204410_outLine +BABEL_OP2_302_51015_20131225_194000_inLine +BABEL_OP2_302_51015_20131225_194000_outLine +BABEL_OP2_302_51407_20131210_182141_inLine +BABEL_OP2_302_51407_20131210_182141_outLine +BABEL_OP2_302_51414_20140301_231945_inLine +BABEL_OP2_302_51414_20140301_231945_outLine +BABEL_OP2_302_51414_20140301_232951_inLine +BABEL_OP2_302_51414_20140301_232951_outLine +BABEL_OP2_302_51530_20140303_173734_inLine +BABEL_OP2_302_51530_20140303_173734_outLine +BABEL_OP2_302_51693_20140317_180609_inLine +BABEL_OP2_302_51693_20140317_180609_outLine +BABEL_OP2_302_51819_20140108_231905_inLine +BABEL_OP2_302_51819_20140108_231905_outLine +BABEL_OP2_302_51819_20140108_232624_inLine +BABEL_OP2_302_51819_20140108_232624_outLine +BABEL_OP2_302_51858_20140314_235721_inLine +BABEL_OP2_302_51858_20140314_235721_outLine +BABEL_OP2_302_52070_20140320_231722_inLine +BABEL_OP2_302_52070_20140320_231722_outLine +BABEL_OP2_302_52222_20140314_185604_inLine +BABEL_OP2_302_52222_20140314_185604_outLine +BABEL_OP2_302_52265_20140317_214257_inLine +BABEL_OP2_302_52265_20140317_214257_outLine +BABEL_OP2_302_52483_20140318_192930_inLine +BABEL_OP2_302_52483_20140318_192930_outLine +BABEL_OP2_302_52490_20131204_173409_inLine +BABEL_OP2_302_52490_20131204_173409_outLine +BABEL_OP2_302_52725_20140224_182027_inLine +BABEL_OP2_302_52725_20140224_182027_outLine +BABEL_OP2_302_53072_20140307_191159_inLine +BABEL_OP2_302_53072_20140307_191159_outLine +BABEL_OP2_302_53415_20140301_180358_inLine +BABEL_OP2_302_53415_20140301_180358_outLine +BABEL_OP2_302_53917_20140214_214823_inLine +BABEL_OP2_302_53917_20140214_214823_outLine +BABEL_OP2_302_54046_20140111_191512_inLine +BABEL_OP2_302_54046_20140111_191512_outLine +BABEL_OP2_302_54160_20131105_233517_inLine +BABEL_OP2_302_54160_20131105_233517_outLine +BABEL_OP2_302_54405_20140111_185837_inLine +BABEL_OP2_302_54405_20140111_185837_outLine +BABEL_OP2_302_54477_20140108_182424_inLine +BABEL_OP2_302_54477_20140108_182424_outLine +BABEL_OP2_302_54923_20140216_224345_inLine +BABEL_OP2_302_54923_20140216_224345_outLine +BABEL_OP2_302_55259_20140110_235646_inLine +BABEL_OP2_302_55259_20140110_235646_outLine +BABEL_OP2_302_56331_20140116_230347_inLine +BABEL_OP2_302_56331_20140116_230347_outLine +BABEL_OP2_302_56345_20140316_214007_inLine +BABEL_OP2_302_56345_20140316_214007_outLine +BABEL_OP2_302_56468_20140313_205811_inLine +BABEL_OP2_302_56468_20140313_205811_outLine +BABEL_OP2_302_56743_20131216_222343_inLine +BABEL_OP2_302_56743_20131216_222343_outLine +BABEL_OP2_302_56925_20140324_224547_inLine +BABEL_OP2_302_56925_20140324_224547_outLine +BABEL_OP2_302_57116_20131030_223921_inLine +BABEL_OP2_302_57116_20131030_223921_outLine +BABEL_OP2_302_57542_20140122_203736_inLine +BABEL_OP2_302_57542_20140122_203736_outLine +BABEL_OP2_302_57654_20131117_191902_inLine +BABEL_OP2_302_57654_20131117_191902_outLine +BABEL_OP2_302_57654_20131117_192605_inLine +BABEL_OP2_302_57654_20131117_192605_outLine +BABEL_OP2_302_57678_20131219_025602_inLine +BABEL_OP2_302_57678_20131219_025602_outLine +BABEL_OP2_302_58047_20131218_204521_inLine +BABEL_OP2_302_58047_20131218_204521_outLine +BABEL_OP2_302_58734_20131113_233358_inLine +BABEL_OP2_302_58734_20131113_233358_outLine +BABEL_OP2_302_59091_20140128_234107_inLine +BABEL_OP2_302_59091_20140128_234107_outLine +BABEL_OP2_302_59301_20140114_221332_inLine +BABEL_OP2_302_59301_20140114_221332_outLine +BABEL_OP2_302_60115_20131217_170350_inLine +BABEL_OP2_302_60115_20131217_170350_outLine +BABEL_OP2_302_60661_20131116_191211_inLine +BABEL_OP2_302_60661_20131116_191211_outLine +BABEL_OP2_302_62155_20140201_185809_inLine +BABEL_OP2_302_62155_20140201_185809_outLine +BABEL_OP2_302_62158_20140319_223940_inLine +BABEL_OP2_302_62158_20140319_223940_outLine +BABEL_OP2_302_62200_20131209_215237_inLine +BABEL_OP2_302_62200_20131209_215237_outLine +BABEL_OP2_302_62362_20140225_004754_inLine +BABEL_OP2_302_62362_20140225_004754_outLine +BABEL_OP2_302_62430_20140301_152214_inLine +BABEL_OP2_302_62430_20140301_152214_outLine +BABEL_OP2_302_62724_20140304_224111_inLine +BABEL_OP2_302_62724_20140304_224111_outLine +BABEL_OP2_302_62835_20131223_201212_inLine +BABEL_OP2_302_63220_20140127_003053_inLine +BABEL_OP2_302_63220_20140127_003053_outLine +BABEL_OP2_302_63425_20140119_000855_inLine +BABEL_OP2_302_63425_20140119_000855_outLine +BABEL_OP2_302_63445_20131121_234555_inLine +BABEL_OP2_302_63511_20140311_232611_inLine +BABEL_OP2_302_63511_20140311_232611_outLine +BABEL_OP2_302_63523_20140219_180149_inLine +BABEL_OP2_302_63523_20140219_180149_outLine +BABEL_OP2_302_63906_20140120_224621_inLine +BABEL_OP2_302_63906_20140120_224621_outLine +BABEL_OP2_302_63938_20140129_205148_inLine +BABEL_OP2_302_63938_20140129_205148_outLine +BABEL_OP2_302_64350_20131117_225845_inLine +BABEL_OP2_302_64350_20131117_225845_outLine +BABEL_OP2_302_64350_20131117_232849_inLine +BABEL_OP2_302_64350_20131117_232849_outLine +BABEL_OP2_302_65639_20140320_150018_inLine +BABEL_OP2_302_65639_20140320_150018_outLine +BABEL_OP2_302_66026_20140326_234154_inLine +BABEL_OP2_302_66026_20140326_234154_outLine +BABEL_OP2_302_66361_20140319_194108_inLine +BABEL_OP2_302_66361_20140319_194108_outLine +BABEL_OP2_302_67213_20140327_183232_inLine +BABEL_OP2_302_67213_20140327_183232_outLine +BABEL_OP2_302_67304_20140327_170105_inLine +BABEL_OP2_302_67304_20140327_170105_outLine +BABEL_OP2_302_67389_20140316_224805_inLine +BABEL_OP2_302_67389_20140316_224805_outLine +BABEL_OP2_302_67389_20140316_230159_inLine +BABEL_OP2_302_67389_20140316_230159_outLine +BABEL_OP2_302_67552_20131225_215450_inLine +BABEL_OP2_302_67552_20131225_215450_outLine +BABEL_OP2_302_67592_20140113_211110_inLine +BABEL_OP2_302_67592_20140113_211110_outLine +BABEL_OP2_302_67726_20140319_013401_outLine +BABEL_OP2_302_67726_20140319_014304_outLine +BABEL_OP2_302_67794_20131117_183019_inLine +BABEL_OP2_302_67794_20131117_183019_outLine +BABEL_OP2_302_68402_20140327_221916_inLine +BABEL_OP2_302_68402_20140327_221916_outLine +BABEL_OP2_302_68627_20131204_225115_inLine +BABEL_OP2_302_68627_20131204_225115_outLine +BABEL_OP2_302_68748_20131217_195520_inLine +BABEL_OP2_302_68748_20131217_195520_outLine +BABEL_OP2_302_68748_20131217_201343_inLine +BABEL_OP2_302_68748_20131217_201343_outLine +BABEL_OP2_302_68908_20140320_022955_inLine +BABEL_OP2_302_68908_20140320_022955_outLine +BABEL_OP2_302_69982_20140311_005531_inLine +BABEL_OP2_302_69982_20140311_005531_outLine +BABEL_OP2_302_70282_20131220_160010_inLine +BABEL_OP2_302_70282_20131220_160010_outLine +BABEL_OP2_302_70639_20140318_221840_inLine +BABEL_OP2_302_70639_20140318_221840_outLine +BABEL_OP2_302_70726_20140319_183341_inLine +BABEL_OP2_302_70726_20140319_183341_outLine +BABEL_OP2_302_71419_20140314_222627_inLine +BABEL_OP2_302_71419_20140314_222627_outLine +BABEL_OP2_302_73408_20140326_185144_inLine +BABEL_OP2_302_73408_20140326_185144_outLine +BABEL_OP2_302_73408_20140326_190631_inLine +BABEL_OP2_302_73408_20140326_190631_outLine +BABEL_OP2_302_73591_20131029_231600_inLine +BABEL_OP2_302_73814_20131226_180746_inLine +BABEL_OP2_302_73814_20131226_180746_outLine +BABEL_OP2_302_73814_20131226_181941_inLine +BABEL_OP2_302_73814_20131226_181941_outLine +BABEL_OP2_302_73964_20140303_232725_inLine +BABEL_OP2_302_73964_20140303_232725_outLine +BABEL_OP2_302_74078_20140324_220859_outLine +BABEL_OP2_302_74121_20131206_165002_inLine +BABEL_OP2_302_74253_20140203_174833_inLine +BABEL_OP2_302_74253_20140203_174833_outLine +BABEL_OP2_302_74728_20140307_151132_inLine +BABEL_OP2_302_74728_20140307_151132_outLine +BABEL_OP2_302_75064_20131205_015445_inLine +BABEL_OP2_302_75261_20140218_213238_inLine +BABEL_OP2_302_75261_20140218_213238_outLine +BABEL_OP2_302_75359_20140220_000334_inLine +BABEL_OP2_302_75359_20140220_000334_outLine +BABEL_OP2_302_75366_20140310_224545_inLine +BABEL_OP2_302_75366_20140310_224545_outLine +BABEL_OP2_302_75465_20140125_194816_inLine +BABEL_OP2_302_75465_20140125_194816_outLine +BABEL_OP2_302_75764_20140123_173321_inLine +BABEL_OP2_302_75764_20140123_173321_outLine +BABEL_OP2_302_75869_20140118_180045_inLine +BABEL_OP2_302_75869_20140118_180045_outLine +BABEL_OP2_302_75993_20131118_164850_inLine +BABEL_OP2_302_75993_20131118_164850_outLine +BABEL_OP2_302_76444_20140304_213108_inLine +BABEL_OP2_302_76444_20140304_213108_outLine +BABEL_OP2_302_76970_20140327_002045_inLine +BABEL_OP2_302_76970_20140327_002045_outLine +BABEL_OP2_302_77904_20140316_204739_inLine +BABEL_OP2_302_77904_20140316_204739_outLine +BABEL_OP2_302_78360_20140131_201120_inLine +BABEL_OP2_302_78360_20140131_201120_outLine +BABEL_OP2_302_78630_20131115_232537_inLine +BABEL_OP2_302_78630_20131115_232537_outLine +BABEL_OP2_302_78976_20131128_230615_inLine +BABEL_OP2_302_78976_20131128_230615_outLine +BABEL_OP2_302_79167_20131225_175926_inLine +BABEL_OP2_302_79167_20131225_175926_outLine +BABEL_OP2_302_79367_20131112_222137_inLine +BABEL_OP2_302_79367_20131112_222137_outLine +BABEL_OP2_302_79858_20131116_000426_inLine +BABEL_OP2_302_81404_20131213_230929_inLine +BABEL_OP2_302_81404_20131213_230929_outLine +BABEL_OP2_302_81427_20131211_221442_inLine +BABEL_OP2_302_81427_20131211_221442_outLine +BABEL_OP2_302_81674_20140315_024749_inLine +BABEL_OP2_302_81674_20140315_024749_outLine +BABEL_OP2_302_82140_20131206_055551_inLine +BABEL_OP2_302_82140_20131206_055551_outLine +BABEL_OP2_302_82361_20140204_014603_inLine +BABEL_OP2_302_82361_20140204_014603_outLine +BABEL_OP2_302_82622_20131105_002634_inLine +BABEL_OP2_302_82622_20131105_002634_outLine +BABEL_OP2_302_82904_20140203_194011_inLine +BABEL_OP2_302_82904_20140203_194011_outLine +BABEL_OP2_302_83366_20131223_172753_inLine +BABEL_OP2_302_83366_20131223_172753_outLine +BABEL_OP2_302_83775_20131203_184707_inLine +BABEL_OP2_302_83775_20131203_184707_outLine +BABEL_OP2_302_83783_20131218_212844_inLine +BABEL_OP2_302_83783_20131218_212844_outLine +BABEL_OP2_302_84327_20140112_031943_inLine +BABEL_OP2_302_84327_20140112_031943_outLine +BABEL_OP2_302_84327_20140112_033431_inLine +BABEL_OP2_302_84327_20140112_033431_outLine +BABEL_OP2_302_84458_20140226_001547_inLine +BABEL_OP2_302_84458_20140226_001547_outLine +BABEL_OP2_302_84583_20131220_210443_inLine +BABEL_OP2_302_84583_20131220_210443_outLine +BABEL_OP2_302_84838_20140112_004851_inLine +BABEL_OP2_302_84838_20140112_004851_outLine +BABEL_OP2_302_84838_20140112_011030_inLine +BABEL_OP2_302_84838_20140112_011030_outLine +BABEL_OP2_302_85028_20140106_232649_inLine +BABEL_OP2_302_85028_20140106_232649_outLine +BABEL_OP2_302_85260_20140318_235730_inLine +BABEL_OP2_302_85260_20140318_235730_outLine +BABEL_OP2_302_85260_20140319_021618_inLine +BABEL_OP2_302_85260_20140319_021618_outLine +BABEL_OP2_302_85519_20140111_210933_inLine +BABEL_OP2_302_85519_20140111_210933_outLine +BABEL_OP2_302_85651_20140108_220631_inLine +BABEL_OP2_302_85651_20140108_220631_outLine +BABEL_OP2_302_85651_20140108_221652_inLine +BABEL_OP2_302_85651_20140108_221652_outLine +BABEL_OP2_302_85651_20140108_222943_inLine +BABEL_OP2_302_85651_20140108_222943_outLine +BABEL_OP2_302_86321_20140304_184505_outLine +BABEL_OP2_302_86321_20140304_190052_outLine +BABEL_OP2_302_86676_20131221_194024_inLine +BABEL_OP2_302_86676_20131221_194024_outLine +BABEL_OP2_302_86826_20140221_213850_inLine +BABEL_OP2_302_86826_20140221_213850_outLine +BABEL_OP2_302_86885_20140319_172338_inLine +BABEL_OP2_302_86885_20140319_172338_outLine +BABEL_OP2_302_86888_20131221_183239_inLine +BABEL_OP2_302_86888_20131221_183239_outLine +BABEL_OP2_302_86952_20131105_224050_inLine +BABEL_OP2_302_86952_20131105_224050_outLine +BABEL_OP2_302_87073_20131108_174654_inLine +BABEL_OP2_302_87073_20131108_174654_outLine +BABEL_OP2_302_87545_20140303_174324_inLine +BABEL_OP2_302_87545_20140303_174324_outLine +BABEL_OP2_302_87629_20131226_030820_inLine +BABEL_OP2_302_87629_20131226_030820_outLine +BABEL_OP2_302_90440_20140314_190637_inLine +BABEL_OP2_302_90440_20140314_190637_outLine +BABEL_OP2_302_90740_20140312_213002_inLine +BABEL_OP2_302_90740_20140312_213002_outLine +BABEL_OP2_302_91606_20140223_185235_inLine +BABEL_OP2_302_91606_20140223_185235_outLine +BABEL_OP2_302_91808_20140315_215351_inLine +BABEL_OP2_302_91808_20140315_215351_outLine +BABEL_OP2_302_91977_20131217_185642_inLine +BABEL_OP2_302_91977_20131217_185642_outLine +BABEL_OP2_302_92096_20140121_222052_inLine +BABEL_OP2_302_92096_20140121_222052_outLine +BABEL_OP2_302_92096_20140121_222833_inLine +BABEL_OP2_302_92096_20140121_222833_outLine +BABEL_OP2_302_92096_20140121_223620_inLine +BABEL_OP2_302_92096_20140121_223620_outLine +BABEL_OP2_302_92252_20140317_205453_outLine +BABEL_OP2_302_92740_20131217_174305_inLine +BABEL_OP2_302_92740_20131217_174305_outLine +BABEL_OP2_302_92886_20131204_201740_inLine +BABEL_OP2_302_93153_20131115_225858_inLine +BABEL_OP2_302_93153_20131115_225858_outLine +BABEL_OP2_302_93443_20140320_025047_inLine +BABEL_OP2_302_93443_20140320_025047_outLine +BABEL_OP2_302_93490_20140113_184331_inLine +BABEL_OP2_302_93490_20140113_184331_outLine +BABEL_OP2_302_93946_20140225_213901_inLine +BABEL_OP2_302_93946_20140225_213901_outLine +BABEL_OP2_302_94044_20140314_194724_inLine +BABEL_OP2_302_94044_20140314_194724_outLine +BABEL_OP2_302_94044_20140314_195844_inLine +BABEL_OP2_302_94044_20140314_195844_outLine +BABEL_OP2_302_94141_20140220_002237_inLine +BABEL_OP2_302_94141_20140220_002237_outLine +BABEL_OP2_302_94212_20140313_231224_inLine +BABEL_OP2_302_94212_20140313_231224_outLine +BABEL_OP2_302_94487_20140303_234032_inLine +BABEL_OP2_302_94487_20140303_234032_outLine +BABEL_OP2_302_94587_20140115_224719_inLine +BABEL_OP2_302_94587_20140115_224719_outLine +BABEL_OP2_302_94587_20140115_225600_inLine +BABEL_OP2_302_94587_20140115_225600_outLine +BABEL_OP2_302_94587_20140115_230344_inLine +BABEL_OP2_302_94587_20140115_230344_outLine +BABEL_OP2_302_95467_20140217_181554_inLine +BABEL_OP2_302_95467_20140217_181554_outLine +BABEL_OP2_302_95490_20131119_220530_inLine +BABEL_OP2_302_95490_20131119_220530_outLine +BABEL_OP2_302_95490_20131119_221754_inLine +BABEL_OP2_302_95490_20131119_221754_outLine +BABEL_OP2_302_95571_20140315_172644_inLine +BABEL_OP2_302_95571_20140315_172644_outLine +BABEL_OP2_302_95966_20131213_023122_inLine +BABEL_OP2_302_95966_20131213_023122_outLine +BABEL_OP2_302_96088_20140307_165731_inLine +BABEL_OP2_302_96088_20140307_165731_outLine +BABEL_OP2_302_96247_20140120_221340_inLine +BABEL_OP2_302_96247_20140120_221340_outLine +BABEL_OP2_302_96247_20140120_224135_inLine +BABEL_OP2_302_96247_20140120_224135_outLine +BABEL_OP2_302_96584_20140127_164106_inLine +BABEL_OP2_302_96584_20140127_164106_outLine +BABEL_OP2_302_96934_20131203_232255_inLine +BABEL_OP2_302_97570_20131223_175908_inLine +BABEL_OP2_302_97570_20131223_175908_outLine +BABEL_OP2_302_97570_20131223_180949_inLine +BABEL_OP2_302_97570_20131223_180949_outLine +BABEL_OP2_302_97849_20140203_203804_inLine +BABEL_OP2_302_97849_20140203_203804_outLine +BABEL_OP2_302_97896_20131212_155943_inLine +BABEL_OP2_302_97896_20131212_155943_outLine +BABEL_OP2_302_98165_20131218_211431_inLine +BABEL_OP2_302_98165_20131218_211431_outLine +BABEL_OP2_302_99202_20131226_015321_inLine +BABEL_OP2_302_99202_20131226_015321_outLine +BABEL_OP2_302_99264_20140111_215716_inLine +BABEL_OP2_302_99264_20140111_215716_outLine +BABEL_OP2_302_99487_20131109_171503_inLine +BABEL_OP2_302_99718_20131113_003931_inLine +BABEL_OP2_302_99718_20131113_003931_outLine +BABEL_OP2_302_99952_20140203_225818_inLine +BABEL_OP2_302_99955_20140127_230118_inLine +BABEL_OP2_302_99975_20140317_202757_inLine +BABEL_OP2_302_99975_20140317_202757_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/dev.2h.list b/egs/babel/s5d/conf/lists/303-telugu/dev.2h.list new file mode 100644 index 00000000000..2109ba73287 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/dev.2h.list @@ -0,0 +1,126 @@ +BABEL_OP2_303_12220_20131108_232918_inLine +BABEL_OP2_303_12220_20131108_232918_outLine +BABEL_OP2_303_13040_20131115_232722_inLine +BABEL_OP2_303_13040_20131115_232722_outLine +BABEL_OP2_303_14158_20131204_203458_inLine +BABEL_OP2_303_14158_20131204_203458_outLine +BABEL_OP2_303_15466_20140204_191250_inLine +BABEL_OP2_303_15466_20140204_191250_outLine +BABEL_OP2_303_16056_20131030_201705_inLine +BABEL_OP2_303_16056_20131030_201705_outLine +BABEL_OP2_303_16475_20131109_024735_inLine +BABEL_OP2_303_16475_20131109_024735_outLine +BABEL_OP2_303_17280_20131105_033157_inLine +BABEL_OP2_303_17280_20131105_033157_outLine +BABEL_OP2_303_19703_20131114_213952_inLine +BABEL_OP2_303_19703_20131114_213952_outLine +BABEL_OP2_303_21029_20131112_180205_inLine +BABEL_OP2_303_21029_20131112_180205_outLine +BABEL_OP2_303_22965_20131114_213605_inLine +BABEL_OP2_303_22965_20131114_213605_outLine +BABEL_OP2_303_28585_20131204_042033_inLine +BABEL_OP2_303_28585_20131204_042033_outLine +BABEL_OP2_303_28600_20131201_001853_inLine +BABEL_OP2_303_28600_20131201_001853_outLine +BABEL_OP2_303_28945_20131111_210924_inLine +BABEL_OP2_303_28945_20131111_210924_outLine +BABEL_OP2_303_34197_20131105_003635_inLine +BABEL_OP2_303_34197_20131105_003635_outLine +BABEL_OP2_303_34336_20131114_162157_inLine +BABEL_OP2_303_34336_20131114_162157_outLine +BABEL_OP2_303_36059_20131218_034050_inLine +BABEL_OP2_303_36059_20131218_034050_outLine +BABEL_OP2_303_37499_20140129_194730_inLine +BABEL_OP2_303_37499_20140129_194730_outLine +BABEL_OP2_303_37499_20140130_010436_inLine +BABEL_OP2_303_37499_20140130_010436_outLine +BABEL_OP2_303_38554_20131024_205502_inLine +BABEL_OP2_303_38554_20131024_205502_outLine +BABEL_OP2_303_39848_20131113_195552_inLine +BABEL_OP2_303_39848_20131113_195552_outLine +BABEL_OP2_303_40713_20131111_182733_inLine +BABEL_OP2_303_40713_20131111_182733_outLine +BABEL_OP2_303_40740_20131205_003945_inLine +BABEL_OP2_303_40740_20131205_003945_outLine +BABEL_OP2_303_41272_20140204_204727_inLine +BABEL_OP2_303_41272_20140204_204727_outLine +BABEL_OP2_303_41400_20140222_205655_inLine +BABEL_OP2_303_41400_20140222_205655_outLine +BABEL_OP2_303_43794_20140131_221611_inLine +BABEL_OP2_303_43794_20140131_221611_outLine +BABEL_OP2_303_45560_20131029_184514_inLine +BABEL_OP2_303_45560_20131029_184514_outLine +BABEL_OP2_303_46333_20131102_160049_inLine +BABEL_OP2_303_46333_20131102_160049_outLine +BABEL_OP2_303_46702_20131023_225137_inLine +BABEL_OP2_303_46702_20131023_225137_outLine +BABEL_OP2_303_49287_20131115_193114_inLine +BABEL_OP2_303_49287_20131115_193114_outLine +BABEL_OP2_303_49306_20140204_203901_inLine +BABEL_OP2_303_49306_20140204_203901_outLine +BABEL_OP2_303_51858_20140219_183931_inLine +BABEL_OP2_303_51858_20140219_183931_outLine +BABEL_OP2_303_52854_20131105_013802_inLine +BABEL_OP2_303_52854_20131105_013802_outLine +BABEL_OP2_303_55818_20131027_191439_inLine +BABEL_OP2_303_55818_20131027_191439_outLine +BABEL_OP2_303_56684_20131205_182944_inLine +BABEL_OP2_303_56684_20131205_182944_outLine +BABEL_OP2_303_56720_20131122_215343_inLine +BABEL_OP2_303_56720_20131122_215343_outLine +BABEL_OP2_303_58734_20131109_181122_inLine +BABEL_OP2_303_58734_20131109_181122_outLine +BABEL_OP2_303_60474_20131113_232723_inLine +BABEL_OP2_303_60474_20131113_232723_outLine +BABEL_OP2_303_61167_20131104_210455_inLine +BABEL_OP2_303_61167_20131104_210455_outLine +BABEL_OP2_303_62289_20140222_212804_inLine +BABEL_OP2_303_62289_20140222_212804_outLine +BABEL_OP2_303_64759_20131104_194712_inLine +BABEL_OP2_303_64759_20131104_194712_outLine +BABEL_OP2_303_64759_20131104_195356_inLine +BABEL_OP2_303_64759_20131104_195356_outLine +BABEL_OP2_303_65370_20140222_225324_inLine +BABEL_OP2_303_65370_20140222_225324_outLine +BABEL_OP2_303_69574_20131027_004044_inLine +BABEL_OP2_303_69574_20131027_004044_outLine +BABEL_OP2_303_70110_20131025_151421_inLine +BABEL_OP2_303_70110_20131025_151421_outLine +BABEL_OP2_303_73119_20131115_162847_inLine +BABEL_OP2_303_73119_20131115_162847_outLine +BABEL_OP2_303_73119_20131115_164236_inLine +BABEL_OP2_303_73119_20131115_164236_outLine +BABEL_OP2_303_73446_20140111_183215_inLine +BABEL_OP2_303_73446_20140111_183215_outLine +BABEL_OP2_303_74280_20131025_160420_inLine +BABEL_OP2_303_74280_20131025_160420_outLine +BABEL_OP2_303_75064_20131114_174949_inLine +BABEL_OP2_303_75064_20131114_174949_outLine +BABEL_OP2_303_77112_20131114_020655_inLine +BABEL_OP2_303_77112_20131114_020655_outLine +BABEL_OP2_303_82089_20131111_003358_inLine +BABEL_OP2_303_82089_20131111_003358_outLine +BABEL_OP2_303_83455_20131115_205335_inLine +BABEL_OP2_303_83455_20131115_205335_outLine +BABEL_OP2_303_84709_20140205_175937_inLine +BABEL_OP2_303_84709_20140205_175937_outLine +BABEL_OP2_303_86472_20131204_195705_inLine +BABEL_OP2_303_86472_20131204_195705_outLine +BABEL_OP2_303_86557_20131025_175510_inLine +BABEL_OP2_303_86557_20131025_175510_outLine +BABEL_OP2_303_87073_20131027_001213_inLine +BABEL_OP2_303_87073_20131027_001213_outLine +BABEL_OP2_303_87629_20131114_030529_inLine +BABEL_OP2_303_87629_20131114_030529_outLine +BABEL_OP2_303_88988_20140218_203032_inLine +BABEL_OP2_303_88988_20140218_203032_outLine +BABEL_OP2_303_91825_20131025_170933_inLine +BABEL_OP2_303_91825_20131025_170933_outLine +BABEL_OP2_303_91977_20131130_190309_inLine +BABEL_OP2_303_91977_20131130_190309_outLine +BABEL_OP2_303_92096_20131226_204359_inLine +BABEL_OP2_303_92096_20131226_204359_outLine +BABEL_OP2_303_92509_20131027_003447_inLine +BABEL_OP2_303_92509_20131027_003447_outLine +BABEL_OP2_303_99487_20131027_195100_inLine +BABEL_OP2_303_99487_20131027_195100_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/dev.list b/egs/babel/s5d/conf/lists/303-telugu/dev.list new file mode 100644 index 00000000000..2109ba73287 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/dev.list @@ -0,0 +1,126 @@ +BABEL_OP2_303_12220_20131108_232918_inLine +BABEL_OP2_303_12220_20131108_232918_outLine +BABEL_OP2_303_13040_20131115_232722_inLine +BABEL_OP2_303_13040_20131115_232722_outLine +BABEL_OP2_303_14158_20131204_203458_inLine +BABEL_OP2_303_14158_20131204_203458_outLine +BABEL_OP2_303_15466_20140204_191250_inLine +BABEL_OP2_303_15466_20140204_191250_outLine +BABEL_OP2_303_16056_20131030_201705_inLine +BABEL_OP2_303_16056_20131030_201705_outLine +BABEL_OP2_303_16475_20131109_024735_inLine +BABEL_OP2_303_16475_20131109_024735_outLine +BABEL_OP2_303_17280_20131105_033157_inLine +BABEL_OP2_303_17280_20131105_033157_outLine +BABEL_OP2_303_19703_20131114_213952_inLine +BABEL_OP2_303_19703_20131114_213952_outLine +BABEL_OP2_303_21029_20131112_180205_inLine +BABEL_OP2_303_21029_20131112_180205_outLine +BABEL_OP2_303_22965_20131114_213605_inLine +BABEL_OP2_303_22965_20131114_213605_outLine +BABEL_OP2_303_28585_20131204_042033_inLine +BABEL_OP2_303_28585_20131204_042033_outLine +BABEL_OP2_303_28600_20131201_001853_inLine +BABEL_OP2_303_28600_20131201_001853_outLine +BABEL_OP2_303_28945_20131111_210924_inLine +BABEL_OP2_303_28945_20131111_210924_outLine +BABEL_OP2_303_34197_20131105_003635_inLine +BABEL_OP2_303_34197_20131105_003635_outLine +BABEL_OP2_303_34336_20131114_162157_inLine +BABEL_OP2_303_34336_20131114_162157_outLine +BABEL_OP2_303_36059_20131218_034050_inLine +BABEL_OP2_303_36059_20131218_034050_outLine +BABEL_OP2_303_37499_20140129_194730_inLine +BABEL_OP2_303_37499_20140129_194730_outLine +BABEL_OP2_303_37499_20140130_010436_inLine +BABEL_OP2_303_37499_20140130_010436_outLine +BABEL_OP2_303_38554_20131024_205502_inLine +BABEL_OP2_303_38554_20131024_205502_outLine +BABEL_OP2_303_39848_20131113_195552_inLine +BABEL_OP2_303_39848_20131113_195552_outLine +BABEL_OP2_303_40713_20131111_182733_inLine +BABEL_OP2_303_40713_20131111_182733_outLine +BABEL_OP2_303_40740_20131205_003945_inLine +BABEL_OP2_303_40740_20131205_003945_outLine +BABEL_OP2_303_41272_20140204_204727_inLine +BABEL_OP2_303_41272_20140204_204727_outLine +BABEL_OP2_303_41400_20140222_205655_inLine +BABEL_OP2_303_41400_20140222_205655_outLine +BABEL_OP2_303_43794_20140131_221611_inLine +BABEL_OP2_303_43794_20140131_221611_outLine +BABEL_OP2_303_45560_20131029_184514_inLine +BABEL_OP2_303_45560_20131029_184514_outLine +BABEL_OP2_303_46333_20131102_160049_inLine +BABEL_OP2_303_46333_20131102_160049_outLine +BABEL_OP2_303_46702_20131023_225137_inLine +BABEL_OP2_303_46702_20131023_225137_outLine +BABEL_OP2_303_49287_20131115_193114_inLine +BABEL_OP2_303_49287_20131115_193114_outLine +BABEL_OP2_303_49306_20140204_203901_inLine +BABEL_OP2_303_49306_20140204_203901_outLine +BABEL_OP2_303_51858_20140219_183931_inLine +BABEL_OP2_303_51858_20140219_183931_outLine +BABEL_OP2_303_52854_20131105_013802_inLine +BABEL_OP2_303_52854_20131105_013802_outLine +BABEL_OP2_303_55818_20131027_191439_inLine +BABEL_OP2_303_55818_20131027_191439_outLine +BABEL_OP2_303_56684_20131205_182944_inLine +BABEL_OP2_303_56684_20131205_182944_outLine +BABEL_OP2_303_56720_20131122_215343_inLine +BABEL_OP2_303_56720_20131122_215343_outLine +BABEL_OP2_303_58734_20131109_181122_inLine +BABEL_OP2_303_58734_20131109_181122_outLine +BABEL_OP2_303_60474_20131113_232723_inLine +BABEL_OP2_303_60474_20131113_232723_outLine +BABEL_OP2_303_61167_20131104_210455_inLine +BABEL_OP2_303_61167_20131104_210455_outLine +BABEL_OP2_303_62289_20140222_212804_inLine +BABEL_OP2_303_62289_20140222_212804_outLine +BABEL_OP2_303_64759_20131104_194712_inLine +BABEL_OP2_303_64759_20131104_194712_outLine +BABEL_OP2_303_64759_20131104_195356_inLine +BABEL_OP2_303_64759_20131104_195356_outLine +BABEL_OP2_303_65370_20140222_225324_inLine +BABEL_OP2_303_65370_20140222_225324_outLine +BABEL_OP2_303_69574_20131027_004044_inLine +BABEL_OP2_303_69574_20131027_004044_outLine +BABEL_OP2_303_70110_20131025_151421_inLine +BABEL_OP2_303_70110_20131025_151421_outLine +BABEL_OP2_303_73119_20131115_162847_inLine +BABEL_OP2_303_73119_20131115_162847_outLine +BABEL_OP2_303_73119_20131115_164236_inLine +BABEL_OP2_303_73119_20131115_164236_outLine +BABEL_OP2_303_73446_20140111_183215_inLine +BABEL_OP2_303_73446_20140111_183215_outLine +BABEL_OP2_303_74280_20131025_160420_inLine +BABEL_OP2_303_74280_20131025_160420_outLine +BABEL_OP2_303_75064_20131114_174949_inLine +BABEL_OP2_303_75064_20131114_174949_outLine +BABEL_OP2_303_77112_20131114_020655_inLine +BABEL_OP2_303_77112_20131114_020655_outLine +BABEL_OP2_303_82089_20131111_003358_inLine +BABEL_OP2_303_82089_20131111_003358_outLine +BABEL_OP2_303_83455_20131115_205335_inLine +BABEL_OP2_303_83455_20131115_205335_outLine +BABEL_OP2_303_84709_20140205_175937_inLine +BABEL_OP2_303_84709_20140205_175937_outLine +BABEL_OP2_303_86472_20131204_195705_inLine +BABEL_OP2_303_86472_20131204_195705_outLine +BABEL_OP2_303_86557_20131025_175510_inLine +BABEL_OP2_303_86557_20131025_175510_outLine +BABEL_OP2_303_87073_20131027_001213_inLine +BABEL_OP2_303_87073_20131027_001213_outLine +BABEL_OP2_303_87629_20131114_030529_inLine +BABEL_OP2_303_87629_20131114_030529_outLine +BABEL_OP2_303_88988_20140218_203032_inLine +BABEL_OP2_303_88988_20140218_203032_outLine +BABEL_OP2_303_91825_20131025_170933_inLine +BABEL_OP2_303_91825_20131025_170933_outLine +BABEL_OP2_303_91977_20131130_190309_inLine +BABEL_OP2_303_91977_20131130_190309_outLine +BABEL_OP2_303_92096_20131226_204359_inLine +BABEL_OP2_303_92096_20131226_204359_outLine +BABEL_OP2_303_92509_20131027_003447_inLine +BABEL_OP2_303_92509_20131027_003447_outLine +BABEL_OP2_303_99487_20131027_195100_inLine +BABEL_OP2_303_99487_20131027_195100_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/eval.list b/egs/babel/s5d/conf/lists/303-telugu/eval.list new file mode 100644 index 00000000000..e40856e3e9d --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/eval.list @@ -0,0 +1,192 @@ +BABEL_OP2_303_10416_20131110_200711_inLine +BABEL_OP2_303_10416_20131110_200711_outLine +BABEL_OP2_303_10416_20131110_202619_inLine +BABEL_OP2_303_10416_20131110_202619_outLine +BABEL_OP2_303_10974_20131115_172420_inLine +BABEL_OP2_303_10974_20131115_172420_outLine +BABEL_OP2_303_10974_20131115_193956_inLine +BABEL_OP2_303_10974_20131115_193956_outLine +BABEL_OP2_303_11096_20140214_163324_inLine +BABEL_OP2_303_11096_20140214_163324_outLine +BABEL_OP2_303_12321_20131129_164832_inLine +BABEL_OP2_303_12321_20131129_164832_outLine +BABEL_OP2_303_12635_20131211_184855_inLine +BABEL_OP2_303_12635_20131211_184855_outLine +BABEL_OP2_303_12916_20131029_201419_inLine +BABEL_OP2_303_12916_20131029_201419_outLine +BABEL_OP2_303_14729_20131215_013912_inLine +BABEL_OP2_303_14729_20131215_013912_outLine +BABEL_OP2_303_17115_20140211_231649_inLine +BABEL_OP2_303_17115_20140211_231649_outLine +BABEL_OP2_303_17165_20131113_202255_inLine +BABEL_OP2_303_17165_20131113_202255_outLine +BABEL_OP2_303_19120_20131224_010850_inLine +BABEL_OP2_303_19120_20131224_010850_outLine +BABEL_OP2_303_23151_20131206_220005_inLine +BABEL_OP2_303_23151_20131206_220005_outLine +BABEL_OP2_303_23983_20140201_224449_inLine +BABEL_OP2_303_23983_20140201_224449_outLine +BABEL_OP2_303_24033_20131205_013346_inLine +BABEL_OP2_303_24033_20131205_013346_outLine +BABEL_OP2_303_26206_20131116_212034_inLine +BABEL_OP2_303_26206_20131116_212034_outLine +BABEL_OP2_303_27218_20131101_202112_inLine +BABEL_OP2_303_27218_20131101_202112_outLine +BABEL_OP2_303_28422_20131130_210214_inLine +BABEL_OP2_303_28422_20131130_210214_outLine +BABEL_OP2_303_29168_20131105_002039_inLine +BABEL_OP2_303_29168_20131105_002039_outLine +BABEL_OP2_303_31668_20140204_210838_inLine +BABEL_OP2_303_31668_20140204_210838_outLine +BABEL_OP2_303_31992_20131107_183659_inLine +BABEL_OP2_303_31992_20131107_183659_outLine +BABEL_OP2_303_32872_20140127_213914_inLine +BABEL_OP2_303_32872_20140127_213914_outLine +BABEL_OP2_303_32961_20140218_193151_inLine +BABEL_OP2_303_32961_20140218_193151_outLine +BABEL_OP2_303_33635_20131109_185729_inLine +BABEL_OP2_303_33635_20131109_185729_outLine +BABEL_OP2_303_34019_20140219_191126_inLine +BABEL_OP2_303_34019_20140219_191126_outLine +BABEL_OP2_303_34019_20140219_192321_inLine +BABEL_OP2_303_34019_20140219_192321_outLine +BABEL_OP2_303_34688_20131031_000954_inLine +BABEL_OP2_303_34688_20131031_000954_outLine +BABEL_OP2_303_37853_20131230_224659_inLine +BABEL_OP2_303_37853_20131230_224659_outLine +BABEL_OP2_303_39159_20131024_202413_inLine +BABEL_OP2_303_39159_20131024_202413_outLine +BABEL_OP2_303_42600_20131114_231539_inLine +BABEL_OP2_303_42600_20131114_231539_outLine +BABEL_OP2_303_43990_20140204_202831_inLine +BABEL_OP2_303_43990_20140204_202831_outLine +BABEL_OP2_303_44290_20140204_193649_inLine +BABEL_OP2_303_44290_20140204_193649_outLine +BABEL_OP2_303_45642_20131114_201049_inLine +BABEL_OP2_303_45642_20131114_201049_outLine +BABEL_OP2_303_45642_20131114_203559_inLine +BABEL_OP2_303_45642_20131114_203559_outLine +BABEL_OP2_303_45770_20131029_180305_inLine +BABEL_OP2_303_45770_20131029_180305_outLine +BABEL_OP2_303_45908_20140211_224100_inLine +BABEL_OP2_303_45908_20140211_224100_outLine +BABEL_OP2_303_46974_20131116_205026_inLine +BABEL_OP2_303_46974_20131116_205026_outLine +BABEL_OP2_303_47959_20131113_020835_inLine +BABEL_OP2_303_47959_20131113_020835_outLine +BABEL_OP2_303_48610_20131031_175448_inLine +BABEL_OP2_303_48610_20131031_175448_outLine +BABEL_OP2_303_49775_20131029_201844_inLine +BABEL_OP2_303_49775_20131029_201844_outLine +BABEL_OP2_303_49812_20131208_222038_inLine +BABEL_OP2_303_49812_20131208_222038_outLine +BABEL_OP2_303_51530_20140203_190540_inLine +BABEL_OP2_303_51530_20140203_190540_outLine +BABEL_OP2_303_52025_20131025_023135_inLine +BABEL_OP2_303_52025_20131025_023135_outLine +BABEL_OP2_303_52422_20131205_220934_inLine +BABEL_OP2_303_52422_20131205_220934_outLine +BABEL_OP2_303_52442_20131115_192454_inLine +BABEL_OP2_303_52442_20131115_192454_outLine +BABEL_OP2_303_52614_20140204_214212_inLine +BABEL_OP2_303_52614_20140204_214212_outLine +BABEL_OP2_303_53072_20140116_175409_inLine +BABEL_OP2_303_53072_20140116_175409_outLine +BABEL_OP2_303_56090_20131108_182022_inLine +BABEL_OP2_303_56090_20131108_182022_outLine +BABEL_OP2_303_57678_20131112_230248_inLine +BABEL_OP2_303_57678_20131112_230248_outLine +BABEL_OP2_303_58061_20140219_230114_inLine +BABEL_OP2_303_58061_20140219_230114_outLine +BABEL_OP2_303_59898_20131101_004202_inLine +BABEL_OP2_303_59898_20131101_004202_outLine +BABEL_OP2_303_59928_20131113_223724_inLine +BABEL_OP2_303_59928_20131113_223724_outLine +BABEL_OP2_303_59928_20131113_225824_inLine +BABEL_OP2_303_59928_20131113_225824_outLine +BABEL_OP2_303_60026_20131107_170611_inLine +BABEL_OP2_303_60026_20131107_170611_outLine +BABEL_OP2_303_60626_20131111_190013_inLine +BABEL_OP2_303_60626_20131111_190013_outLine +BABEL_OP2_303_62852_20131105_205005_inLine +BABEL_OP2_303_62852_20131105_205005_outLine +BABEL_OP2_303_63481_20131028_222923_inLine +BABEL_OP2_303_63481_20131028_222923_outLine +BABEL_OP2_303_63523_20140211_213504_inLine +BABEL_OP2_303_63523_20140211_213504_outLine +BABEL_OP2_303_64638_20131202_192509_inLine +BABEL_OP2_303_64638_20131202_192509_outLine +BABEL_OP2_303_64796_20131114_235122_inLine +BABEL_OP2_303_64796_20131114_235122_outLine +BABEL_OP2_303_65640_20140203_210724_inLine +BABEL_OP2_303_65640_20140203_210724_outLine +BABEL_OP2_303_66026_20131201_225144_inLine +BABEL_OP2_303_66026_20131201_225144_outLine +BABEL_OP2_303_66837_20131116_170219_inLine +BABEL_OP2_303_66837_20131116_170219_outLine +BABEL_OP2_303_66959_20131201_000211_inLine +BABEL_OP2_303_66959_20131201_000211_outLine +BABEL_OP2_303_66967_20131026_202801_inLine +BABEL_OP2_303_66967_20131026_202801_outLine +BABEL_OP2_303_67373_20131115_001228_inLine +BABEL_OP2_303_67373_20131115_001228_outLine +BABEL_OP2_303_67389_20140205_200604_inLine +BABEL_OP2_303_67389_20140205_200604_outLine +BABEL_OP2_303_67389_20140205_201314_inLine +BABEL_OP2_303_67389_20140205_201314_outLine +BABEL_OP2_303_70282_20131115_224940_inLine +BABEL_OP2_303_70282_20131115_224940_outLine +BABEL_OP2_303_73301_20131113_213007_inLine +BABEL_OP2_303_73301_20131113_213007_outLine +BABEL_OP2_303_74253_20131118_232619_inLine +BABEL_OP2_303_74253_20131118_232619_outLine +BABEL_OP2_303_75359_20140222_204832_inLine +BABEL_OP2_303_75359_20140222_204832_outLine +BABEL_OP2_303_77567_20131107_170005_inLine +BABEL_OP2_303_77567_20131107_170005_outLine +BABEL_OP2_303_79139_20131113_181752_inLine +BABEL_OP2_303_79139_20131113_181752_outLine +BABEL_OP2_303_79858_20131109_210103_inLine +BABEL_OP2_303_79858_20131109_210103_outLine +BABEL_OP2_303_81229_20131115_205519_inLine +BABEL_OP2_303_81229_20131115_205519_outLine +BABEL_OP2_303_81392_20131118_201348_inLine +BABEL_OP2_303_81392_20131118_201348_outLine +BABEL_OP2_303_81404_20131105_042501_inLine +BABEL_OP2_303_81404_20131105_042501_outLine +BABEL_OP2_303_83436_20131027_190144_inLine +BABEL_OP2_303_83436_20131027_190144_outLine +BABEL_OP2_303_84055_20131215_032429_inLine +BABEL_OP2_303_84055_20131215_032429_outLine +BABEL_OP2_303_84583_20131114_154624_inLine +BABEL_OP2_303_84583_20131114_154624_outLine +BABEL_OP2_303_87545_20140203_185743_inLine +BABEL_OP2_303_87545_20140203_185743_outLine +BABEL_OP2_303_87921_20131204_182122_inLine +BABEL_OP2_303_87921_20131204_182122_outLine +BABEL_OP2_303_89330_20140219_012432_inLine +BABEL_OP2_303_89330_20140219_012432_outLine +BABEL_OP2_303_93224_20131114_192358_inLine +BABEL_OP2_303_93224_20131114_192358_outLine +BABEL_OP2_303_94587_20131120_180235_inLine +BABEL_OP2_303_94587_20131120_180235_outLine +BABEL_OP2_303_95294_20131204_200315_inLine +BABEL_OP2_303_95294_20131204_200315_outLine +BABEL_OP2_303_95571_20140219_211426_inLine +BABEL_OP2_303_95571_20140219_211426_outLine +BABEL_OP2_303_96405_20131113_205241_inLine +BABEL_OP2_303_96405_20131113_205241_outLine +BABEL_OP2_303_96504_20131113_192045_inLine +BABEL_OP2_303_96504_20131113_192045_outLine +BABEL_OP2_303_96934_20131115_014431_inLine +BABEL_OP2_303_96934_20131115_014431_outLine +BABEL_OP2_303_96985_20131030_204329_inLine +BABEL_OP2_303_96985_20131030_204329_outLine +BABEL_OP2_303_97570_20131115_235518_inLine +BABEL_OP2_303_97570_20131115_235518_outLine +BABEL_OP2_303_97849_20140203_203326_inLine +BABEL_OP2_303_97849_20140203_203326_outLine +BABEL_OP2_303_99516_20131026_193835_inLine +BABEL_OP2_303_99516_20131026_193835_outLine +BABEL_OP2_303_99718_20131031_171828_inLine +BABEL_OP2_303_99718_20131031_171828_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/evalpart1.list b/egs/babel/s5d/conf/lists/303-telugu/evalpart1.list new file mode 100644 index 00000000000..528cd0840d9 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/evalpart1.list @@ -0,0 +1,62 @@ +BABEL_OP2_303_11096_20140214_163324_inLine +BABEL_OP2_303_11096_20140214_163324_outLine +BABEL_OP2_303_14729_20131215_013912_inLine +BABEL_OP2_303_14729_20131215_013912_outLine +BABEL_OP2_303_17165_20131113_202255_inLine +BABEL_OP2_303_17165_20131113_202255_outLine +BABEL_OP2_303_23983_20140201_224449_inLine +BABEL_OP2_303_23983_20140201_224449_outLine +BABEL_OP2_303_24033_20131205_013346_inLine +BABEL_OP2_303_24033_20131205_013346_outLine +BABEL_OP2_303_29168_20131105_002039_inLine +BABEL_OP2_303_29168_20131105_002039_outLine +BABEL_OP2_303_32872_20140127_213914_inLine +BABEL_OP2_303_32872_20140127_213914_outLine +BABEL_OP2_303_33635_20131109_185729_inLine +BABEL_OP2_303_33635_20131109_185729_outLine +BABEL_OP2_303_34019_20140219_191126_inLine +BABEL_OP2_303_34019_20140219_191126_outLine +BABEL_OP2_303_34019_20140219_192321_inLine +BABEL_OP2_303_34019_20140219_192321_outLine +BABEL_OP2_303_44290_20140204_193649_inLine +BABEL_OP2_303_44290_20140204_193649_outLine +BABEL_OP2_303_47959_20131113_020835_inLine +BABEL_OP2_303_47959_20131113_020835_outLine +BABEL_OP2_303_49775_20131029_201844_inLine +BABEL_OP2_303_49775_20131029_201844_outLine +BABEL_OP2_303_52442_20131115_192454_inLine +BABEL_OP2_303_52442_20131115_192454_outLine +BABEL_OP2_303_56090_20131108_182022_inLine +BABEL_OP2_303_56090_20131108_182022_outLine +BABEL_OP2_303_60626_20131111_190013_inLine +BABEL_OP2_303_60626_20131111_190013_outLine +BABEL_OP2_303_63481_20131028_222923_inLine +BABEL_OP2_303_63481_20131028_222923_outLine +BABEL_OP2_303_63523_20140211_213504_inLine +BABEL_OP2_303_63523_20140211_213504_outLine +BABEL_OP2_303_66959_20131201_000211_inLine +BABEL_OP2_303_66959_20131201_000211_outLine +BABEL_OP2_303_66967_20131026_202801_inLine +BABEL_OP2_303_66967_20131026_202801_outLine +BABEL_OP2_303_74253_20131118_232619_inLine +BABEL_OP2_303_74253_20131118_232619_outLine +BABEL_OP2_303_75359_20140222_204832_inLine +BABEL_OP2_303_75359_20140222_204832_outLine +BABEL_OP2_303_77567_20131107_170005_inLine +BABEL_OP2_303_77567_20131107_170005_outLine +BABEL_OP2_303_79858_20131109_210103_inLine +BABEL_OP2_303_79858_20131109_210103_outLine +BABEL_OP2_303_81229_20131115_205519_inLine +BABEL_OP2_303_81229_20131115_205519_outLine +BABEL_OP2_303_84583_20131114_154624_inLine +BABEL_OP2_303_84583_20131114_154624_outLine +BABEL_OP2_303_89330_20140219_012432_inLine +BABEL_OP2_303_89330_20140219_012432_outLine +BABEL_OP2_303_95294_20131204_200315_inLine +BABEL_OP2_303_95294_20131204_200315_outLine +BABEL_OP2_303_95571_20140219_211426_inLine +BABEL_OP2_303_95571_20140219_211426_outLine +BABEL_OP2_303_96934_20131115_014431_inLine +BABEL_OP2_303_96934_20131115_014431_outLine +BABEL_OP2_303_97570_20131115_235518_inLine +BABEL_OP2_303_97570_20131115_235518_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/sub-train.list b/egs/babel/s5d/conf/lists/303-telugu/sub-train.list new file mode 100644 index 00000000000..3694701cd97 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/sub-train.list @@ -0,0 +1,134 @@ +BABEL_OP2_303_10188_20131108_175933_inLine +BABEL_OP2_303_10188_20131108_175933_outLine +BABEL_OP2_303_11673_20131026_034803_inLine +BABEL_OP2_303_11673_20131026_034803_outLine +BABEL_OP2_303_13030_20131109_023950_inLine +BABEL_OP2_303_13030_20131109_023950_outLine +BABEL_OP2_303_14875_20131112_211504_inLine +BABEL_OP2_303_14875_20131112_211504_outLine +BABEL_OP2_303_14929_20131112_164303_inLine +BABEL_OP2_303_14929_20131112_164303_outLine +BABEL_OP2_303_14929_20131112_165202_inLine +BABEL_OP2_303_14929_20131112_165202_outLine +BABEL_OP2_303_14929_20131112_171242_inLine +BABEL_OP2_303_14929_20131112_171242_outLine +BABEL_OP2_303_17127_20131224_002728_inLine +BABEL_OP2_303_17127_20131224_002728_outLine +BABEL_OP2_303_18380_20131111_015535_inLine +BABEL_OP2_303_18380_20131111_015535_outLine +BABEL_OP2_303_18380_20131119_224151_inLine +BABEL_OP2_303_18380_20131119_224151_outLine +BABEL_OP2_303_21435_20131226_175809_inLine +BABEL_OP2_303_21435_20131226_175809_outLine +BABEL_OP2_303_21435_20131226_181138_inLine +BABEL_OP2_303_21435_20131226_181138_outLine +BABEL_OP2_303_23681_20140119_223006_inLine +BABEL_OP2_303_23681_20140119_223006_outLine +BABEL_OP2_303_24231_20140201_230638_inLine +BABEL_OP2_303_24231_20140201_230638_outLine +BABEL_OP2_303_24589_20131114_182843_inLine +BABEL_OP2_303_24589_20131114_182843_outLine +BABEL_OP2_303_25767_20131028_161454_inLine +BABEL_OP2_303_25767_20131028_161454_outLine +BABEL_OP2_303_25961_20131030_225755_inLine +BABEL_OP2_303_25961_20131030_225755_outLine +BABEL_OP2_303_31490_20131105_010342_inLine +BABEL_OP2_303_31490_20131105_010342_outLine +BABEL_OP2_303_31490_20131105_011345_inLine +BABEL_OP2_303_31490_20131105_011345_outLine +BABEL_OP2_303_32861_20131216_223500_inLine +BABEL_OP2_303_32861_20131216_223500_outLine +BABEL_OP2_303_33704_20131210_195453_inLine +BABEL_OP2_303_33704_20131210_195453_outLine +BABEL_OP2_303_35069_20131205_165127_inLine +BABEL_OP2_303_35069_20131205_165127_outLine +BABEL_OP2_303_36341_20131024_221132_inLine +BABEL_OP2_303_36341_20131024_221132_outLine +BABEL_OP2_303_36669_20131110_155909_inLine +BABEL_OP2_303_36669_20131110_155909_outLine +BABEL_OP2_303_37682_20131105_023703_inLine +BABEL_OP2_303_37682_20131105_023703_outLine +BABEL_OP2_303_39307_20131027_043600_inLine +BABEL_OP2_303_39307_20131027_043600_outLine +BABEL_OP2_303_40565_20131116_182747_inLine +BABEL_OP2_303_40565_20131116_182747_outLine +BABEL_OP2_303_41493_20131027_155001_inLine +BABEL_OP2_303_41493_20131027_155001_outLine +BABEL_OP2_303_42718_20140118_201247_inLine +BABEL_OP2_303_42718_20140118_201247_outLine +BABEL_OP2_303_43115_20140201_195115_inLine +BABEL_OP2_303_43115_20140201_195115_outLine +BABEL_OP2_303_43789_20131111_163502_inLine +BABEL_OP2_303_43789_20131111_163502_outLine +BABEL_OP2_303_46550_20131111_233520_inLine +BABEL_OP2_303_46550_20131111_233520_outLine +BABEL_OP2_303_46558_20131028_190003_inLine +BABEL_OP2_303_46558_20131028_190003_outLine +BABEL_OP2_303_47823_20131201_004209_inLine +BABEL_OP2_303_47823_20131201_004209_outLine +BABEL_OP2_303_50726_20131028_210641_inLine +BABEL_OP2_303_50726_20131028_210641_outLine +BABEL_OP2_303_51540_20131204_041920_inLine +BABEL_OP2_303_51540_20131204_041920_outLine +BABEL_OP2_303_60538_20131111_200459_inLine +BABEL_OP2_303_60538_20131111_200459_outLine +BABEL_OP2_303_63084_20131115_202655_inLine +BABEL_OP2_303_63084_20131115_202655_outLine +BABEL_OP2_303_64768_20131113_203120_inLine +BABEL_OP2_303_64768_20131113_203120_outLine +BABEL_OP2_303_65077_20131024_174953_inLine +BABEL_OP2_303_65077_20131024_174953_outLine +BABEL_OP2_303_67964_20140222_211658_inLine +BABEL_OP2_303_67964_20140222_211658_outLine +BABEL_OP2_303_69107_20131113_222827_inLine +BABEL_OP2_303_69107_20131113_222827_outLine +BABEL_OP2_303_69633_20131130_193122_inLine +BABEL_OP2_303_69633_20131130_193122_outLine +BABEL_OP2_303_72587_20131115_221128_inLine +BABEL_OP2_303_72587_20131115_221128_outLine +BABEL_OP2_303_73990_20140219_201105_inLine +BABEL_OP2_303_73990_20140219_201105_outLine +BABEL_OP2_303_73990_20140219_202300_inLine +BABEL_OP2_303_73990_20140219_202300_outLine +BABEL_OP2_303_74886_20131101_194728_inLine +BABEL_OP2_303_74886_20131101_194728_outLine +BABEL_OP2_303_75365_20140218_173521_inLine +BABEL_OP2_303_75365_20140218_173521_outLine +BABEL_OP2_303_76756_20131115_182926_inLine +BABEL_OP2_303_76756_20131115_182926_outLine +BABEL_OP2_303_78454_20131114_230026_inLine +BABEL_OP2_303_78454_20131114_230026_outLine +BABEL_OP2_303_79820_20131114_181827_inLine +BABEL_OP2_303_79820_20131114_181827_outLine +BABEL_OP2_303_80881_20131027_165716_inLine +BABEL_OP2_303_80881_20131027_165716_outLine +BABEL_OP2_303_81424_20131120_192659_inLine +BABEL_OP2_303_81424_20131120_192659_outLine +BABEL_OP2_303_83935_20131122_222948_inLine +BABEL_OP2_303_83935_20131122_222948_outLine +BABEL_OP2_303_84061_20131104_224830_inLine +BABEL_OP2_303_84061_20131104_224830_outLine +BABEL_OP2_303_84327_20131122_203936_inLine +BABEL_OP2_303_84327_20131122_203936_outLine +BABEL_OP2_303_85248_20131206_184028_inLine +BABEL_OP2_303_85248_20131206_184028_outLine +BABEL_OP2_303_86952_20131105_173230_inLine +BABEL_OP2_303_86952_20131105_173230_outLine +BABEL_OP2_303_87884_20131206_022424_inLine +BABEL_OP2_303_87884_20131206_022424_outLine +BABEL_OP2_303_87889_20131213_215703_inLine +BABEL_OP2_303_87889_20131213_215703_outLine +BABEL_OP2_303_88982_20131115_181618_inLine +BABEL_OP2_303_88982_20131115_181618_outLine +BABEL_OP2_303_90080_20131228_233334_inLine +BABEL_OP2_303_90080_20131228_233334_outLine +BABEL_OP2_303_90740_20140221_220031_inLine +BABEL_OP2_303_90740_20140221_220031_outLine +BABEL_OP2_303_92176_20131115_153306_inLine +BABEL_OP2_303_92176_20131115_153306_outLine +BABEL_OP2_303_96324_20131107_162546_inLine +BABEL_OP2_303_96324_20131107_162546_outLine +BABEL_OP2_303_97988_20131204_195626_inLine +BABEL_OP2_303_97988_20131204_195626_outLine +BABEL_OP2_303_97988_20131204_211137_inLine +BABEL_OP2_303_97988_20131204_211137_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/303-telugu/sub-train.untranscribed.list new file mode 100644 index 00000000000..7d4ce3b8a3d --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP2_303_10058_20140205_001109_inLine +BABEL_OP2_303_10058_20140205_001109_outLine +BABEL_OP2_303_10638_20140218_213711_inLine +BABEL_OP2_303_10638_20140218_213711_outLine +BABEL_OP2_303_10938_20131104_204555_inLine +BABEL_OP2_303_10938_20131104_204555_outLine +BABEL_OP2_303_11352_20131224_005439_inLine +BABEL_OP2_303_11352_20131224_005439_outLine +BABEL_OP2_303_12036_20131101_174653_inLine +BABEL_OP2_303_12036_20131101_174653_outLine +BABEL_OP2_303_12242_20131113_222307_inLine +BABEL_OP2_303_12242_20131113_222307_outLine +BABEL_OP2_303_13324_20131107_211806_inLine +BABEL_OP2_303_13324_20131107_211806_outLine +BABEL_OP2_303_13586_20131115_180921_inLine +BABEL_OP2_303_13586_20131115_180921_outLine +BABEL_OP2_303_13664_20131108_184651_inLine +BABEL_OP2_303_13664_20131108_184651_outLine +BABEL_OP2_303_13744_20131026_234054_inLine +BABEL_OP2_303_13744_20131026_234054_outLine +BABEL_OP2_303_14229_20131114_032214_inLine +BABEL_OP2_303_14229_20131114_032214_outLine +BABEL_OP2_303_14350_20131105_195640_inLine +BABEL_OP2_303_14350_20131105_195640_outLine +BABEL_OP2_303_14899_20131102_204324_inLine +BABEL_OP2_303_14899_20131102_204324_outLine +BABEL_OP2_303_14972_20131114_023627_inLine +BABEL_OP2_303_14972_20131114_023627_outLine +BABEL_OP2_303_15702_20131206_225729_inLine +BABEL_OP2_303_15702_20131206_225729_outLine +BABEL_OP2_303_15730_20131101_163118_inLine +BABEL_OP2_303_15730_20131101_163118_outLine +BABEL_OP2_303_16184_20131204_033225_inLine +BABEL_OP2_303_16184_20131204_033225_outLine +BABEL_OP2_303_16839_20131223_215734_inLine +BABEL_OP2_303_16839_20131223_215734_outLine +BABEL_OP2_303_16886_20131108_204525_inLine +BABEL_OP2_303_16886_20131108_204525_outLine +BABEL_OP2_303_16938_20131112_015544_inLine +BABEL_OP2_303_16938_20131112_015544_outLine +BABEL_OP2_303_17520_20131114_164811_inLine +BABEL_OP2_303_17520_20131114_164811_outLine +BABEL_OP2_303_18242_20140218_014910_inLine +BABEL_OP2_303_18242_20140218_014910_outLine +BABEL_OP2_303_18924_20131112_001935_inLine +BABEL_OP2_303_18924_20131112_001935_outLine +BABEL_OP2_303_20437_20140202_232910_inLine +BABEL_OP2_303_20437_20140202_232910_outLine +BABEL_OP2_303_20437_20140202_234756_inLine +BABEL_OP2_303_20437_20140202_234756_outLine +BABEL_OP2_303_20985_20131122_183435_inLine +BABEL_OP2_303_20985_20131122_183435_outLine +BABEL_OP2_303_23006_20131113_150924_inLine +BABEL_OP2_303_23006_20131113_150924_outLine +BABEL_OP2_303_23046_20131114_171927_inLine +BABEL_OP2_303_23046_20131114_171927_outLine +BABEL_OP2_303_23239_20131206_181414_inLine +BABEL_OP2_303_23239_20131206_181414_outLine +BABEL_OP2_303_23260_20140203_194817_inLine +BABEL_OP2_303_23260_20140203_194817_outLine +BABEL_OP2_303_23505_20131109_184015_inLine +BABEL_OP2_303_23505_20131109_184015_outLine +BABEL_OP2_303_23980_20131114_202648_inLine +BABEL_OP2_303_23980_20131114_202648_outLine +BABEL_OP2_303_24010_20140218_224141_inLine +BABEL_OP2_303_24010_20140218_224141_outLine +BABEL_OP2_303_24323_20131201_180512_inLine +BABEL_OP2_303_24323_20131201_180512_outLine +BABEL_OP2_303_24470_20131204_174323_inLine +BABEL_OP2_303_24470_20131204_174323_outLine +BABEL_OP2_303_24982_20131114_012226_inLine +BABEL_OP2_303_24982_20131114_012226_outLine +BABEL_OP2_303_25198_20140121_180931_inLine +BABEL_OP2_303_25198_20140121_180931_outLine +BABEL_OP2_303_25719_20131205_191053_inLine +BABEL_OP2_303_25719_20131205_191053_outLine +BABEL_OP2_303_26072_20131216_221839_inLine +BABEL_OP2_303_26072_20131216_221839_outLine +BABEL_OP2_303_26388_20131113_034454_inLine +BABEL_OP2_303_26388_20131113_034454_outLine +BABEL_OP2_303_27125_20131024_195716_inLine +BABEL_OP2_303_27125_20131024_195716_outLine +BABEL_OP2_303_27590_20131118_222641_inLine +BABEL_OP2_303_27590_20131118_222641_outLine +BABEL_OP2_303_28419_20131113_195258_inLine +BABEL_OP2_303_28419_20131113_195258_outLine +BABEL_OP2_303_29404_20131224_014921_inLine +BABEL_OP2_303_29404_20131224_014921_outLine +BABEL_OP2_303_29482_20140219_221449_inLine +BABEL_OP2_303_29482_20140219_221449_outLine +BABEL_OP2_303_29685_20131105_180851_inLine +BABEL_OP2_303_29685_20131105_180851_outLine +BABEL_OP2_303_30013_20131116_185844_inLine +BABEL_OP2_303_30013_20131116_185844_outLine +BABEL_OP2_303_30345_20131224_005453_inLine +BABEL_OP2_303_30345_20131224_005453_outLine +BABEL_OP2_303_30395_20131112_004350_inLine +BABEL_OP2_303_30395_20131112_004350_outLine +BABEL_OP2_303_30645_20131029_193530_inLine +BABEL_OP2_303_30645_20131029_193530_outLine +BABEL_OP2_303_32048_20131204_223219_inLine +BABEL_OP2_303_32048_20131204_223219_outLine +BABEL_OP2_303_32171_20140203_203242_inLine +BABEL_OP2_303_32171_20140203_203242_outLine +BABEL_OP2_303_32301_20131120_212820_inLine +BABEL_OP2_303_32301_20131120_212820_outLine +BABEL_OP2_303_33229_20131206_220332_inLine +BABEL_OP2_303_33229_20131206_220332_outLine +BABEL_OP2_303_33424_20140129_211552_inLine +BABEL_OP2_303_33424_20140129_211552_outLine +BABEL_OP2_303_33672_20131029_201146_inLine +BABEL_OP2_303_33672_20131029_201146_outLine +BABEL_OP2_303_33913_20131116_003805_inLine +BABEL_OP2_303_33913_20131116_003805_outLine +BABEL_OP2_303_34106_20131027_203150_inLine +BABEL_OP2_303_34106_20131027_203150_outLine +BABEL_OP2_303_34811_20131115_235931_inLine +BABEL_OP2_303_34811_20131115_235931_outLine +BABEL_OP2_303_35000_20131210_184313_inLine +BABEL_OP2_303_35000_20131210_184313_outLine +BABEL_OP2_303_35008_20131120_185919_inLine +BABEL_OP2_303_35008_20131120_185919_outLine +BABEL_OP2_303_35143_20131206_023320_inLine +BABEL_OP2_303_35143_20131206_023320_outLine +BABEL_OP2_303_36594_20131215_014334_inLine +BABEL_OP2_303_36594_20131215_014334_outLine +BABEL_OP2_303_36594_20131215_022952_inLine +BABEL_OP2_303_36594_20131215_022952_outLine +BABEL_OP2_303_37228_20131216_171725_inLine +BABEL_OP2_303_37228_20131216_171725_outLine +BABEL_OP2_303_41469_20131025_210607_inLine +BABEL_OP2_303_41469_20131025_210607_outLine +BABEL_OP2_303_41609_20131031_164009_inLine +BABEL_OP2_303_41609_20131031_164009_outLine +BABEL_OP2_303_41680_20131108_184050_inLine +BABEL_OP2_303_41680_20131108_184050_outLine +BABEL_OP2_303_41692_20140119_000215_inLine +BABEL_OP2_303_41692_20140119_000215_outLine +BABEL_OP2_303_41692_20140120_002447_inLine +BABEL_OP2_303_41692_20140120_002447_outLine +BABEL_OP2_303_42526_20131216_190003_inLine +BABEL_OP2_303_42526_20131216_190003_outLine +BABEL_OP2_303_43784_20131115_013454_inLine +BABEL_OP2_303_43784_20131115_013454_outLine +BABEL_OP2_303_43784_20131115_014528_inLine +BABEL_OP2_303_43784_20131115_014528_outLine +BABEL_OP2_303_43788_20131202_222520_inLine +BABEL_OP2_303_43788_20131202_222520_outLine +BABEL_OP2_303_43920_20131130_143746_inLine +BABEL_OP2_303_43920_20131130_143746_outLine +BABEL_OP2_303_45459_20140201_203718_inLine +BABEL_OP2_303_45459_20140201_203718_outLine +BABEL_OP2_303_46330_20131210_212701_inLine +BABEL_OP2_303_46330_20131210_212701_outLine +BABEL_OP2_303_46688_20131108_184839_inLine +BABEL_OP2_303_46688_20131108_184839_outLine +BABEL_OP2_303_46757_20131116_193234_inLine +BABEL_OP2_303_46757_20131116_193234_outLine +BABEL_OP2_303_47215_20131108_200333_inLine +BABEL_OP2_303_47215_20131108_200333_outLine +BABEL_OP2_303_47487_20131104_200239_inLine +BABEL_OP2_303_47487_20131104_200239_outLine +BABEL_OP2_303_47637_20140222_233717_inLine +BABEL_OP2_303_47637_20140222_233717_outLine +BABEL_OP2_303_47866_20131230_165319_inLine +BABEL_OP2_303_47866_20131230_165319_outLine +BABEL_OP2_303_47878_20131116_184454_inLine +BABEL_OP2_303_47878_20131116_184454_outLine +BABEL_OP2_303_48844_20131030_014630_inLine +BABEL_OP2_303_48844_20131030_014630_outLine +BABEL_OP2_303_49027_20140127_225946_inLine +BABEL_OP2_303_49027_20140127_225946_outLine +BABEL_OP2_303_49197_20131115_221049_inLine +BABEL_OP2_303_49197_20131115_221049_outLine +BABEL_OP2_303_49216_20131031_011232_inLine +BABEL_OP2_303_49216_20131031_011232_outLine +BABEL_OP2_303_49437_20131211_205647_inLine +BABEL_OP2_303_49437_20131211_205647_outLine +BABEL_OP2_303_50565_20131102_213418_inLine +BABEL_OP2_303_50565_20131102_213418_outLine +BABEL_OP2_303_50779_20131215_002945_inLine +BABEL_OP2_303_50779_20131215_002945_outLine +BABEL_OP2_303_51015_20131121_004617_inLine +BABEL_OP2_303_51015_20131121_004617_outLine +BABEL_OP2_303_51968_20131113_214616_inLine +BABEL_OP2_303_51968_20131113_214616_outLine +BABEL_OP2_303_51968_20131113_220135_inLine +BABEL_OP2_303_51968_20131113_220135_outLine +BABEL_OP2_303_52272_20131027_195752_inLine +BABEL_OP2_303_52272_20131027_195752_outLine +BABEL_OP2_303_52381_20131216_174822_inLine +BABEL_OP2_303_52381_20131216_174822_outLine +BABEL_OP2_303_52490_20131027_172351_inLine +BABEL_OP2_303_52490_20131027_172351_outLine +BABEL_OP2_303_52804_20131105_185205_inLine +BABEL_OP2_303_52804_20131105_185205_outLine +BABEL_OP2_303_53144_20131227_024859_inLine +BABEL_OP2_303_53144_20131227_024859_outLine +BABEL_OP2_303_53665_20140204_194114_inLine +BABEL_OP2_303_53665_20140204_194114_outLine +BABEL_OP2_303_54104_20131030_190134_inLine +BABEL_OP2_303_54104_20131030_190134_outLine +BABEL_OP2_303_54162_20131114_015157_inLine +BABEL_OP2_303_54162_20131114_015157_outLine +BABEL_OP2_303_54744_20131101_012632_inLine +BABEL_OP2_303_54744_20131101_012632_outLine +BABEL_OP2_303_55968_20131027_154130_inLine +BABEL_OP2_303_55968_20131027_154130_outLine +BABEL_OP2_303_57141_20131129_191059_inLine +BABEL_OP2_303_57141_20131129_191059_outLine +BABEL_OP2_303_57464_20140204_205308_inLine +BABEL_OP2_303_57464_20140204_205308_outLine +BABEL_OP2_303_57464_20140204_220733_inLine +BABEL_OP2_303_57464_20140204_220733_outLine +BABEL_OP2_303_57566_20131205_002558_inLine +BABEL_OP2_303_57566_20131205_002558_outLine +BABEL_OP2_303_57782_20140222_210824_inLine +BABEL_OP2_303_57782_20140222_210824_outLine +BABEL_OP2_303_58313_20131114_234055_inLine +BABEL_OP2_303_58313_20131114_234055_outLine +BABEL_OP2_303_58821_20131121_205344_inLine +BABEL_OP2_303_58821_20131121_205344_outLine +BABEL_OP2_303_59509_20131130_021844_inLine +BABEL_OP2_303_59509_20131130_021844_outLine +BABEL_OP2_303_59635_20131205_021406_inLine +BABEL_OP2_303_59635_20131205_021406_outLine +BABEL_OP2_303_62014_20131114_203925_inLine +BABEL_OP2_303_62014_20131114_203925_outLine +BABEL_OP2_303_62714_20131228_155020_inLine +BABEL_OP2_303_62714_20131228_155020_outLine +BABEL_OP2_303_62810_20131028_225346_inLine +BABEL_OP2_303_62810_20131028_225346_outLine +BABEL_OP2_303_63604_20131101_000901_inLine +BABEL_OP2_303_63604_20131101_000901_outLine +BABEL_OP2_303_63730_20140218_210748_inLine +BABEL_OP2_303_63730_20140218_210748_outLine +BABEL_OP2_303_64014_20131229_214739_inLine +BABEL_OP2_303_64014_20131229_214739_outLine +BABEL_OP2_303_64065_20131111_230551_inLine +BABEL_OP2_303_64065_20131111_230551_outLine +BABEL_OP2_303_65561_20131122_180110_inLine +BABEL_OP2_303_65561_20131122_180110_outLine +BABEL_OP2_303_66001_20131031_192905_inLine +BABEL_OP2_303_66001_20131031_192905_outLine +BABEL_OP2_303_66361_20140203_182323_inLine +BABEL_OP2_303_66361_20140203_182323_outLine +BABEL_OP2_303_67283_20131109_213605_inLine +BABEL_OP2_303_67283_20131109_213605_outLine +BABEL_OP2_303_67401_20131114_215749_inLine +BABEL_OP2_303_67401_20131114_215749_outLine +BABEL_OP2_303_67401_20131114_221127_inLine +BABEL_OP2_303_67401_20131114_221127_outLine +BABEL_OP2_303_68068_20131204_212345_inLine +BABEL_OP2_303_68068_20131204_212345_outLine +BABEL_OP2_303_69153_20131204_184008_inLine +BABEL_OP2_303_69153_20131204_184008_outLine +BABEL_OP2_303_69992_20131030_011814_inLine +BABEL_OP2_303_69992_20131030_011814_outLine +BABEL_OP2_303_70221_20131124_180244_inLine +BABEL_OP2_303_70221_20131124_180244_outLine +BABEL_OP2_303_70251_20131027_201724_inLine +BABEL_OP2_303_70251_20131027_201724_outLine +BABEL_OP2_303_70452_20131115_202651_inLine +BABEL_OP2_303_70452_20131115_202651_outLine +BABEL_OP2_303_71067_20131115_221146_inLine +BABEL_OP2_303_71067_20131115_221146_outLine +BABEL_OP2_303_71189_20131225_050235_inLine +BABEL_OP2_303_71189_20131225_050235_outLine +BABEL_OP2_303_72040_20131112_173033_inLine +BABEL_OP2_303_72040_20131112_173033_outLine +BABEL_OP2_303_72844_20131111_192144_inLine +BABEL_OP2_303_72844_20131111_192144_outLine +BABEL_OP2_303_73022_20131216_173848_inLine +BABEL_OP2_303_73022_20131216_173848_outLine +BABEL_OP2_303_73299_20140217_173212_inLine +BABEL_OP2_303_73299_20140217_173212_outLine +BABEL_OP2_303_73591_20131020_193026_inLine +BABEL_OP2_303_73591_20131020_193026_outLine +BABEL_OP2_303_75342_20131122_191140_inLine +BABEL_OP2_303_75342_20131122_191140_outLine +BABEL_OP2_303_75505_20131102_220904_inLine +BABEL_OP2_303_75505_20131102_220904_outLine +BABEL_OP2_303_76902_20140205_233041_inLine +BABEL_OP2_303_76902_20140205_233041_outLine +BABEL_OP2_303_77730_20131107_221840_inLine +BABEL_OP2_303_77730_20131107_221840_outLine +BABEL_OP2_303_77744_20131113_232408_inLine +BABEL_OP2_303_77744_20131113_232408_outLine +BABEL_OP2_303_78544_20131204_194704_inLine +BABEL_OP2_303_78544_20131204_194704_outLine +BABEL_OP2_303_78604_20131101_194153_inLine +BABEL_OP2_303_78604_20131101_194153_outLine +BABEL_OP2_303_78943_20131115_213626_inLine +BABEL_OP2_303_78943_20131115_213626_outLine +BABEL_OP2_303_79451_20131114_213026_inLine +BABEL_OP2_303_79451_20131114_213026_outLine +BABEL_OP2_303_79590_20131113_222157_inLine +BABEL_OP2_303_79590_20131113_222157_outLine +BABEL_OP2_303_79751_20131105_025908_inLine +BABEL_OP2_303_79751_20131105_025908_outLine +BABEL_OP2_303_80559_20131101_190006_inLine +BABEL_OP2_303_80559_20131101_190006_outLine +BABEL_OP2_303_80622_20131130_040503_inLine +BABEL_OP2_303_80622_20131130_040503_outLine +BABEL_OP2_303_81149_20140203_201343_inLine +BABEL_OP2_303_81149_20140203_201343_outLine +BABEL_OP2_303_81287_20131121_184328_inLine +BABEL_OP2_303_81287_20131121_184328_outLine +BABEL_OP2_303_81671_20131205_004357_inLine +BABEL_OP2_303_81671_20131205_004357_outLine +BABEL_OP2_303_82622_20131029_212941_inLine +BABEL_OP2_303_82622_20131029_212941_outLine +BABEL_OP2_303_82935_20131205_024033_inLine +BABEL_OP2_303_82935_20131205_024033_outLine +BABEL_OP2_303_82935_20131205_025919_inLine +BABEL_OP2_303_82935_20131205_025919_outLine +BABEL_OP2_303_83771_20140119_181859_inLine +BABEL_OP2_303_83771_20140119_181859_outLine +BABEL_OP2_303_84458_20131204_213157_inLine +BABEL_OP2_303_84458_20131204_213157_outLine +BABEL_OP2_303_84547_20131026_230544_inLine +BABEL_OP2_303_84547_20131026_230544_outLine +BABEL_OP2_303_84605_20131112_192034_inLine +BABEL_OP2_303_84605_20131112_192034_outLine +BABEL_OP2_303_84805_20131204_153317_inLine +BABEL_OP2_303_84805_20131204_153317_outLine +BABEL_OP2_303_84936_20131115_204004_inLine +BABEL_OP2_303_84936_20131115_204004_outLine +BABEL_OP2_303_85340_20131111_215301_inLine +BABEL_OP2_303_85340_20131111_215301_outLine +BABEL_OP2_303_86191_20131114_221742_inLine +BABEL_OP2_303_86191_20131114_221742_outLine +BABEL_OP2_303_86321_20131204_175915_inLine +BABEL_OP2_303_86321_20131204_175915_outLine +BABEL_OP2_303_86467_20131025_013235_inLine +BABEL_OP2_303_86467_20131025_013235_outLine +BABEL_OP2_303_86676_20131204_185429_inLine +BABEL_OP2_303_86676_20131204_185429_outLine +BABEL_OP2_303_86713_20131206_165123_inLine +BABEL_OP2_303_86713_20131206_165123_outLine +BABEL_OP2_303_86891_20140222_195106_inLine +BABEL_OP2_303_86891_20140222_195106_outLine +BABEL_OP2_303_87313_20131116_193233_inLine +BABEL_OP2_303_87313_20131116_193233_outLine +BABEL_OP2_303_88776_20131031_184652_inLine +BABEL_OP2_303_88776_20131031_184652_outLine +BABEL_OP2_303_91125_20131102_191721_inLine +BABEL_OP2_303_91125_20131102_191721_outLine +BABEL_OP2_303_91944_20131107_214314_inLine +BABEL_OP2_303_91944_20131107_214314_outLine +BABEL_OP2_303_92605_20140205_192703_inLine +BABEL_OP2_303_92605_20140205_192703_outLine +BABEL_OP2_303_92757_20140211_221207_inLine +BABEL_OP2_303_92757_20140211_221207_outLine +BABEL_OP2_303_92792_20131223_042728_inLine +BABEL_OP2_303_92792_20131223_042728_outLine +BABEL_OP2_303_94025_20131211_211933_inLine +BABEL_OP2_303_94025_20131211_211933_outLine +BABEL_OP2_303_94333_20131029_193545_inLine +BABEL_OP2_303_94333_20131029_193545_outLine +BABEL_OP2_303_94745_20131204_205747_inLine +BABEL_OP2_303_94745_20131204_205747_outLine +BABEL_OP2_303_94869_20131101_184934_inLine +BABEL_OP2_303_94869_20131101_184934_outLine +BABEL_OP2_303_96690_20131114_194453_inLine +BABEL_OP2_303_96690_20131114_194453_outLine +BABEL_OP2_303_97286_20140205_223354_inLine +BABEL_OP2_303_97286_20140205_223354_outLine +BABEL_OP2_303_97772_20131024_230426_inLine +BABEL_OP2_303_97772_20131024_230426_outLine +BABEL_OP2_303_98311_20131107_224445_inLine +BABEL_OP2_303_98311_20131107_224445_outLine +BABEL_OP2_303_98356_20131121_191712_inLine +BABEL_OP2_303_98356_20131121_191712_outLine +BABEL_OP2_303_98390_20131029_164425_inLine +BABEL_OP2_303_98390_20131029_164425_outLine +BABEL_OP2_303_99955_20131215_222330_inLine +BABEL_OP2_303_99955_20131215_222330_outLine +BABEL_OP2_303_99955_20131216_231047_inLine +BABEL_OP2_303_99955_20131216_231047_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/training.list b/egs/babel/s5d/conf/lists/303-telugu/training.list new file mode 100644 index 00000000000..fec579c4325 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/training.list @@ -0,0 +1,514 @@ +BABEL_OP2_303_10058_20140205_001109_inLine +BABEL_OP2_303_10058_20140205_001109_outLine +BABEL_OP2_303_10188_20131108_175933_inLine +BABEL_OP2_303_10188_20131108_175933_outLine +BABEL_OP2_303_10638_20140218_213711_inLine +BABEL_OP2_303_10638_20140218_213711_outLine +BABEL_OP2_303_10938_20131104_204555_inLine +BABEL_OP2_303_10938_20131104_204555_outLine +BABEL_OP2_303_11352_20131224_005439_inLine +BABEL_OP2_303_11352_20131224_005439_outLine +BABEL_OP2_303_11673_20131026_034803_inLine +BABEL_OP2_303_11673_20131026_034803_outLine +BABEL_OP2_303_12036_20131101_174653_inLine +BABEL_OP2_303_12036_20131101_174653_outLine +BABEL_OP2_303_12242_20131113_222307_inLine +BABEL_OP2_303_12242_20131113_222307_outLine +BABEL_OP2_303_13030_20131109_023950_inLine +BABEL_OP2_303_13030_20131109_023950_outLine +BABEL_OP2_303_13324_20131107_211806_inLine +BABEL_OP2_303_13324_20131107_211806_outLine +BABEL_OP2_303_13586_20131115_180921_inLine +BABEL_OP2_303_13586_20131115_180921_outLine +BABEL_OP2_303_13664_20131108_184651_inLine +BABEL_OP2_303_13664_20131108_184651_outLine +BABEL_OP2_303_13744_20131026_234054_inLine +BABEL_OP2_303_13744_20131026_234054_outLine +BABEL_OP2_303_14229_20131114_032214_inLine +BABEL_OP2_303_14229_20131114_032214_outLine +BABEL_OP2_303_14350_20131105_195640_inLine +BABEL_OP2_303_14350_20131105_195640_outLine +BABEL_OP2_303_14875_20131112_211504_inLine +BABEL_OP2_303_14875_20131112_211504_outLine +BABEL_OP2_303_14899_20131102_204324_inLine +BABEL_OP2_303_14899_20131102_204324_outLine +BABEL_OP2_303_14929_20131112_164303_inLine +BABEL_OP2_303_14929_20131112_164303_outLine +BABEL_OP2_303_14929_20131112_165202_inLine +BABEL_OP2_303_14929_20131112_165202_outLine +BABEL_OP2_303_14929_20131112_171242_inLine +BABEL_OP2_303_14929_20131112_171242_outLine +BABEL_OP2_303_14972_20131114_023627_inLine +BABEL_OP2_303_14972_20131114_023627_outLine +BABEL_OP2_303_15702_20131206_225729_inLine +BABEL_OP2_303_15702_20131206_225729_outLine +BABEL_OP2_303_15730_20131101_163118_inLine +BABEL_OP2_303_15730_20131101_163118_outLine +BABEL_OP2_303_16184_20131204_033225_inLine +BABEL_OP2_303_16184_20131204_033225_outLine +BABEL_OP2_303_16839_20131223_215734_inLine +BABEL_OP2_303_16839_20131223_215734_outLine +BABEL_OP2_303_16886_20131108_204525_inLine +BABEL_OP2_303_16886_20131108_204525_outLine +BABEL_OP2_303_16938_20131112_015544_inLine +BABEL_OP2_303_16938_20131112_015544_outLine +BABEL_OP2_303_17127_20131224_002728_inLine +BABEL_OP2_303_17127_20131224_002728_outLine +BABEL_OP2_303_17520_20131114_164811_inLine +BABEL_OP2_303_17520_20131114_164811_outLine +BABEL_OP2_303_18242_20140218_014910_inLine +BABEL_OP2_303_18242_20140218_014910_outLine +BABEL_OP2_303_18380_20131111_015535_inLine +BABEL_OP2_303_18380_20131111_015535_outLine +BABEL_OP2_303_18380_20131119_224151_inLine +BABEL_OP2_303_18380_20131119_224151_outLine +BABEL_OP2_303_18924_20131112_001935_inLine +BABEL_OP2_303_18924_20131112_001935_outLine +BABEL_OP2_303_20437_20140202_232910_inLine +BABEL_OP2_303_20437_20140202_232910_outLine +BABEL_OP2_303_20437_20140202_234756_inLine +BABEL_OP2_303_20437_20140202_234756_outLine +BABEL_OP2_303_20985_20131122_183435_inLine +BABEL_OP2_303_20985_20131122_183435_outLine +BABEL_OP2_303_21435_20131226_175809_inLine +BABEL_OP2_303_21435_20131226_175809_outLine +BABEL_OP2_303_21435_20131226_181138_inLine +BABEL_OP2_303_21435_20131226_181138_outLine +BABEL_OP2_303_23006_20131113_150924_inLine +BABEL_OP2_303_23006_20131113_150924_outLine +BABEL_OP2_303_23046_20131114_171927_inLine +BABEL_OP2_303_23046_20131114_171927_outLine +BABEL_OP2_303_23239_20131206_181414_inLine +BABEL_OP2_303_23239_20131206_181414_outLine +BABEL_OP2_303_23260_20140203_194817_inLine +BABEL_OP2_303_23260_20140203_194817_outLine +BABEL_OP2_303_23505_20131109_184015_inLine +BABEL_OP2_303_23505_20131109_184015_outLine +BABEL_OP2_303_23681_20140119_223006_inLine +BABEL_OP2_303_23681_20140119_223006_outLine +BABEL_OP2_303_23980_20131114_202648_inLine +BABEL_OP2_303_23980_20131114_202648_outLine +BABEL_OP2_303_24010_20140218_224141_inLine +BABEL_OP2_303_24010_20140218_224141_outLine +BABEL_OP2_303_24231_20140201_230638_inLine +BABEL_OP2_303_24231_20140201_230638_outLine +BABEL_OP2_303_24323_20131201_180512_inLine +BABEL_OP2_303_24323_20131201_180512_outLine +BABEL_OP2_303_24470_20131204_174323_inLine +BABEL_OP2_303_24470_20131204_174323_outLine +BABEL_OP2_303_24589_20131114_182843_inLine +BABEL_OP2_303_24589_20131114_182843_outLine +BABEL_OP2_303_24982_20131114_012226_inLine +BABEL_OP2_303_24982_20131114_012226_outLine +BABEL_OP2_303_25198_20140121_180931_inLine +BABEL_OP2_303_25198_20140121_180931_outLine +BABEL_OP2_303_25719_20131205_191053_inLine +BABEL_OP2_303_25719_20131205_191053_outLine +BABEL_OP2_303_25767_20131028_161454_inLine +BABEL_OP2_303_25767_20131028_161454_outLine +BABEL_OP2_303_25961_20131030_225755_inLine +BABEL_OP2_303_25961_20131030_225755_outLine +BABEL_OP2_303_26072_20131216_221839_inLine +BABEL_OP2_303_26072_20131216_221839_outLine +BABEL_OP2_303_26388_20131113_034454_inLine +BABEL_OP2_303_26388_20131113_034454_outLine +BABEL_OP2_303_27125_20131024_195716_inLine +BABEL_OP2_303_27125_20131024_195716_outLine +BABEL_OP2_303_27590_20131118_222641_inLine +BABEL_OP2_303_27590_20131118_222641_outLine +BABEL_OP2_303_28419_20131113_195258_inLine +BABEL_OP2_303_28419_20131113_195258_outLine +BABEL_OP2_303_29404_20131224_014921_inLine +BABEL_OP2_303_29404_20131224_014921_outLine +BABEL_OP2_303_29482_20140219_221449_inLine +BABEL_OP2_303_29482_20140219_221449_outLine +BABEL_OP2_303_29685_20131105_180851_inLine +BABEL_OP2_303_29685_20131105_180851_outLine +BABEL_OP2_303_30013_20131116_185844_inLine +BABEL_OP2_303_30013_20131116_185844_outLine +BABEL_OP2_303_30345_20131224_005453_inLine +BABEL_OP2_303_30345_20131224_005453_outLine +BABEL_OP2_303_30395_20131112_004350_inLine +BABEL_OP2_303_30395_20131112_004350_outLine +BABEL_OP2_303_30645_20131029_193530_inLine +BABEL_OP2_303_30645_20131029_193530_outLine +BABEL_OP2_303_31490_20131105_010342_inLine +BABEL_OP2_303_31490_20131105_010342_outLine +BABEL_OP2_303_31490_20131105_011345_inLine +BABEL_OP2_303_31490_20131105_011345_outLine +BABEL_OP2_303_32048_20131204_223219_inLine +BABEL_OP2_303_32048_20131204_223219_outLine +BABEL_OP2_303_32171_20140203_203242_inLine +BABEL_OP2_303_32171_20140203_203242_outLine +BABEL_OP2_303_32301_20131120_212820_inLine +BABEL_OP2_303_32301_20131120_212820_outLine +BABEL_OP2_303_32861_20131216_223500_inLine +BABEL_OP2_303_32861_20131216_223500_outLine +BABEL_OP2_303_33229_20131206_220332_inLine +BABEL_OP2_303_33229_20131206_220332_outLine +BABEL_OP2_303_33424_20140129_211552_inLine +BABEL_OP2_303_33424_20140129_211552_outLine +BABEL_OP2_303_33672_20131029_201146_inLine +BABEL_OP2_303_33672_20131029_201146_outLine +BABEL_OP2_303_33704_20131210_195453_inLine +BABEL_OP2_303_33704_20131210_195453_outLine +BABEL_OP2_303_33913_20131116_003805_inLine +BABEL_OP2_303_33913_20131116_003805_outLine +BABEL_OP2_303_34106_20131027_203150_inLine +BABEL_OP2_303_34106_20131027_203150_outLine +BABEL_OP2_303_34811_20131115_235931_inLine +BABEL_OP2_303_34811_20131115_235931_outLine +BABEL_OP2_303_35000_20131210_184313_inLine +BABEL_OP2_303_35000_20131210_184313_outLine +BABEL_OP2_303_35008_20131120_185919_inLine +BABEL_OP2_303_35008_20131120_185919_outLine +BABEL_OP2_303_35069_20131205_165127_inLine +BABEL_OP2_303_35069_20131205_165127_outLine +BABEL_OP2_303_35143_20131206_023320_inLine +BABEL_OP2_303_35143_20131206_023320_outLine +BABEL_OP2_303_36341_20131024_221132_inLine +BABEL_OP2_303_36341_20131024_221132_outLine +BABEL_OP2_303_36594_20131215_014334_inLine +BABEL_OP2_303_36594_20131215_014334_outLine +BABEL_OP2_303_36594_20131215_022952_inLine +BABEL_OP2_303_36594_20131215_022952_outLine +BABEL_OP2_303_36669_20131110_155909_inLine +BABEL_OP2_303_36669_20131110_155909_outLine +BABEL_OP2_303_37228_20131216_171725_inLine +BABEL_OP2_303_37228_20131216_171725_outLine +BABEL_OP2_303_37682_20131105_023703_inLine +BABEL_OP2_303_37682_20131105_023703_outLine +BABEL_OP2_303_39307_20131027_043600_inLine +BABEL_OP2_303_39307_20131027_043600_outLine +BABEL_OP2_303_40565_20131116_182747_inLine +BABEL_OP2_303_40565_20131116_182747_outLine +BABEL_OP2_303_41469_20131025_210607_inLine +BABEL_OP2_303_41469_20131025_210607_outLine +BABEL_OP2_303_41493_20131027_155001_inLine +BABEL_OP2_303_41493_20131027_155001_outLine +BABEL_OP2_303_41609_20131031_164009_inLine +BABEL_OP2_303_41609_20131031_164009_outLine +BABEL_OP2_303_41680_20131108_184050_inLine +BABEL_OP2_303_41680_20131108_184050_outLine +BABEL_OP2_303_41692_20140119_000215_inLine +BABEL_OP2_303_41692_20140119_000215_outLine +BABEL_OP2_303_41692_20140120_002447_inLine +BABEL_OP2_303_41692_20140120_002447_outLine +BABEL_OP2_303_42526_20131216_190003_inLine +BABEL_OP2_303_42526_20131216_190003_outLine +BABEL_OP2_303_42718_20140118_201247_inLine +BABEL_OP2_303_42718_20140118_201247_outLine +BABEL_OP2_303_43115_20140201_195115_inLine +BABEL_OP2_303_43115_20140201_195115_outLine +BABEL_OP2_303_43784_20131115_013454_inLine +BABEL_OP2_303_43784_20131115_013454_outLine +BABEL_OP2_303_43784_20131115_014528_inLine +BABEL_OP2_303_43784_20131115_014528_outLine +BABEL_OP2_303_43788_20131202_222520_inLine +BABEL_OP2_303_43788_20131202_222520_outLine +BABEL_OP2_303_43789_20131111_163502_inLine +BABEL_OP2_303_43789_20131111_163502_outLine +BABEL_OP2_303_43920_20131130_143746_inLine +BABEL_OP2_303_43920_20131130_143746_outLine +BABEL_OP2_303_45459_20140201_203718_inLine +BABEL_OP2_303_45459_20140201_203718_outLine +BABEL_OP2_303_46330_20131210_212701_inLine +BABEL_OP2_303_46330_20131210_212701_outLine +BABEL_OP2_303_46550_20131111_233520_inLine +BABEL_OP2_303_46550_20131111_233520_outLine +BABEL_OP2_303_46558_20131028_190003_inLine +BABEL_OP2_303_46558_20131028_190003_outLine +BABEL_OP2_303_46688_20131108_184839_inLine +BABEL_OP2_303_46688_20131108_184839_outLine +BABEL_OP2_303_46757_20131116_193234_inLine +BABEL_OP2_303_46757_20131116_193234_outLine +BABEL_OP2_303_47215_20131108_200333_inLine +BABEL_OP2_303_47215_20131108_200333_outLine +BABEL_OP2_303_47487_20131104_200239_inLine +BABEL_OP2_303_47487_20131104_200239_outLine +BABEL_OP2_303_47637_20140222_233717_inLine +BABEL_OP2_303_47637_20140222_233717_outLine +BABEL_OP2_303_47823_20131201_004209_inLine +BABEL_OP2_303_47823_20131201_004209_outLine +BABEL_OP2_303_47866_20131230_165319_inLine +BABEL_OP2_303_47866_20131230_165319_outLine +BABEL_OP2_303_47878_20131116_184454_inLine +BABEL_OP2_303_47878_20131116_184454_outLine +BABEL_OP2_303_48844_20131030_014630_inLine +BABEL_OP2_303_48844_20131030_014630_outLine +BABEL_OP2_303_49027_20140127_225946_inLine +BABEL_OP2_303_49027_20140127_225946_outLine +BABEL_OP2_303_49197_20131115_221049_inLine +BABEL_OP2_303_49197_20131115_221049_outLine +BABEL_OP2_303_49216_20131031_011232_inLine +BABEL_OP2_303_49216_20131031_011232_outLine +BABEL_OP2_303_49437_20131211_205647_inLine +BABEL_OP2_303_49437_20131211_205647_outLine +BABEL_OP2_303_50565_20131102_213418_inLine +BABEL_OP2_303_50565_20131102_213418_outLine +BABEL_OP2_303_50726_20131028_210641_inLine +BABEL_OP2_303_50726_20131028_210641_outLine +BABEL_OP2_303_50779_20131215_002945_inLine +BABEL_OP2_303_50779_20131215_002945_outLine +BABEL_OP2_303_51015_20131121_004617_inLine +BABEL_OP2_303_51015_20131121_004617_outLine +BABEL_OP2_303_51540_20131204_041920_inLine +BABEL_OP2_303_51540_20131204_041920_outLine +BABEL_OP2_303_51968_20131113_214616_inLine +BABEL_OP2_303_51968_20131113_214616_outLine +BABEL_OP2_303_51968_20131113_220135_inLine +BABEL_OP2_303_51968_20131113_220135_outLine +BABEL_OP2_303_52272_20131027_195752_inLine +BABEL_OP2_303_52272_20131027_195752_outLine +BABEL_OP2_303_52381_20131216_174822_inLine +BABEL_OP2_303_52381_20131216_174822_outLine +BABEL_OP2_303_52490_20131027_172351_inLine +BABEL_OP2_303_52490_20131027_172351_outLine +BABEL_OP2_303_52804_20131105_185205_inLine +BABEL_OP2_303_52804_20131105_185205_outLine +BABEL_OP2_303_53144_20131227_024859_inLine +BABEL_OP2_303_53144_20131227_024859_outLine +BABEL_OP2_303_53665_20140204_194114_inLine +BABEL_OP2_303_53665_20140204_194114_outLine +BABEL_OP2_303_54104_20131030_190134_inLine +BABEL_OP2_303_54104_20131030_190134_outLine +BABEL_OP2_303_54162_20131114_015157_inLine +BABEL_OP2_303_54162_20131114_015157_outLine +BABEL_OP2_303_54744_20131101_012632_inLine +BABEL_OP2_303_54744_20131101_012632_outLine +BABEL_OP2_303_55968_20131027_154130_inLine +BABEL_OP2_303_55968_20131027_154130_outLine +BABEL_OP2_303_57141_20131129_191059_inLine +BABEL_OP2_303_57141_20131129_191059_outLine +BABEL_OP2_303_57464_20140204_205308_inLine +BABEL_OP2_303_57464_20140204_205308_outLine +BABEL_OP2_303_57464_20140204_220733_inLine +BABEL_OP2_303_57464_20140204_220733_outLine +BABEL_OP2_303_57566_20131205_002558_inLine +BABEL_OP2_303_57566_20131205_002558_outLine +BABEL_OP2_303_57782_20140222_210824_inLine +BABEL_OP2_303_57782_20140222_210824_outLine +BABEL_OP2_303_58313_20131114_234055_inLine +BABEL_OP2_303_58313_20131114_234055_outLine +BABEL_OP2_303_58821_20131121_205344_inLine +BABEL_OP2_303_58821_20131121_205344_outLine +BABEL_OP2_303_59509_20131130_021844_inLine +BABEL_OP2_303_59509_20131130_021844_outLine +BABEL_OP2_303_59635_20131205_021406_inLine +BABEL_OP2_303_59635_20131205_021406_outLine +BABEL_OP2_303_60538_20131111_200459_inLine +BABEL_OP2_303_60538_20131111_200459_outLine +BABEL_OP2_303_62014_20131114_203925_inLine +BABEL_OP2_303_62014_20131114_203925_outLine +BABEL_OP2_303_62714_20131228_155020_inLine +BABEL_OP2_303_62714_20131228_155020_outLine +BABEL_OP2_303_62810_20131028_225346_inLine +BABEL_OP2_303_62810_20131028_225346_outLine +BABEL_OP2_303_63084_20131115_202655_inLine +BABEL_OP2_303_63084_20131115_202655_outLine +BABEL_OP2_303_63604_20131101_000901_inLine +BABEL_OP2_303_63604_20131101_000901_outLine +BABEL_OP2_303_63730_20140218_210748_inLine +BABEL_OP2_303_63730_20140218_210748_outLine +BABEL_OP2_303_64014_20131229_214739_inLine +BABEL_OP2_303_64014_20131229_214739_outLine +BABEL_OP2_303_64065_20131111_230551_inLine +BABEL_OP2_303_64065_20131111_230551_outLine +BABEL_OP2_303_64768_20131113_203120_inLine +BABEL_OP2_303_64768_20131113_203120_outLine +BABEL_OP2_303_65077_20131024_174953_inLine +BABEL_OP2_303_65077_20131024_174953_outLine +BABEL_OP2_303_65561_20131122_180110_inLine +BABEL_OP2_303_65561_20131122_180110_outLine +BABEL_OP2_303_66001_20131031_192905_inLine +BABEL_OP2_303_66001_20131031_192905_outLine +BABEL_OP2_303_66361_20140203_182323_inLine +BABEL_OP2_303_66361_20140203_182323_outLine +BABEL_OP2_303_67283_20131109_213605_inLine +BABEL_OP2_303_67283_20131109_213605_outLine +BABEL_OP2_303_67401_20131114_215749_inLine +BABEL_OP2_303_67401_20131114_215749_outLine +BABEL_OP2_303_67401_20131114_221127_inLine +BABEL_OP2_303_67401_20131114_221127_outLine +BABEL_OP2_303_67964_20140222_211658_inLine +BABEL_OP2_303_67964_20140222_211658_outLine +BABEL_OP2_303_68068_20131204_212345_inLine +BABEL_OP2_303_68068_20131204_212345_outLine +BABEL_OP2_303_69107_20131113_222827_inLine +BABEL_OP2_303_69107_20131113_222827_outLine +BABEL_OP2_303_69153_20131204_184008_inLine +BABEL_OP2_303_69153_20131204_184008_outLine +BABEL_OP2_303_69633_20131130_193122_inLine +BABEL_OP2_303_69633_20131130_193122_outLine +BABEL_OP2_303_69992_20131030_011814_inLine +BABEL_OP2_303_69992_20131030_011814_outLine +BABEL_OP2_303_70221_20131124_180244_inLine +BABEL_OP2_303_70221_20131124_180244_outLine +BABEL_OP2_303_70251_20131027_201724_inLine +BABEL_OP2_303_70251_20131027_201724_outLine +BABEL_OP2_303_70452_20131115_202651_inLine +BABEL_OP2_303_70452_20131115_202651_outLine +BABEL_OP2_303_71067_20131115_221146_inLine +BABEL_OP2_303_71067_20131115_221146_outLine +BABEL_OP2_303_71189_20131225_050235_inLine +BABEL_OP2_303_71189_20131225_050235_outLine +BABEL_OP2_303_72040_20131112_173033_inLine +BABEL_OP2_303_72040_20131112_173033_outLine +BABEL_OP2_303_72587_20131115_221128_inLine +BABEL_OP2_303_72587_20131115_221128_outLine +BABEL_OP2_303_72844_20131111_192144_inLine +BABEL_OP2_303_72844_20131111_192144_outLine +BABEL_OP2_303_73022_20131216_173848_inLine +BABEL_OP2_303_73022_20131216_173848_outLine +BABEL_OP2_303_73299_20140217_173212_inLine +BABEL_OP2_303_73299_20140217_173212_outLine +BABEL_OP2_303_73591_20131020_193026_inLine +BABEL_OP2_303_73591_20131020_193026_outLine +BABEL_OP2_303_73990_20140219_201105_inLine +BABEL_OP2_303_73990_20140219_201105_outLine +BABEL_OP2_303_73990_20140219_202300_inLine +BABEL_OP2_303_73990_20140219_202300_outLine +BABEL_OP2_303_74886_20131101_194728_inLine +BABEL_OP2_303_74886_20131101_194728_outLine +BABEL_OP2_303_75342_20131122_191140_inLine +BABEL_OP2_303_75342_20131122_191140_outLine +BABEL_OP2_303_75365_20140218_173521_inLine +BABEL_OP2_303_75365_20140218_173521_outLine +BABEL_OP2_303_75505_20131102_220904_inLine +BABEL_OP2_303_75505_20131102_220904_outLine +BABEL_OP2_303_76756_20131115_182926_inLine +BABEL_OP2_303_76756_20131115_182926_outLine +BABEL_OP2_303_76902_20140205_233041_inLine +BABEL_OP2_303_76902_20140205_233041_outLine +BABEL_OP2_303_77730_20131107_221840_inLine +BABEL_OP2_303_77730_20131107_221840_outLine +BABEL_OP2_303_77744_20131113_232408_inLine +BABEL_OP2_303_77744_20131113_232408_outLine +BABEL_OP2_303_78454_20131114_230026_inLine +BABEL_OP2_303_78454_20131114_230026_outLine +BABEL_OP2_303_78544_20131204_194704_inLine +BABEL_OP2_303_78544_20131204_194704_outLine +BABEL_OP2_303_78604_20131101_194153_inLine +BABEL_OP2_303_78604_20131101_194153_outLine +BABEL_OP2_303_78943_20131115_213626_inLine +BABEL_OP2_303_78943_20131115_213626_outLine +BABEL_OP2_303_79451_20131114_213026_inLine +BABEL_OP2_303_79451_20131114_213026_outLine +BABEL_OP2_303_79590_20131113_222157_inLine +BABEL_OP2_303_79590_20131113_222157_outLine +BABEL_OP2_303_79751_20131105_025908_inLine +BABEL_OP2_303_79751_20131105_025908_outLine +BABEL_OP2_303_79820_20131114_181827_inLine +BABEL_OP2_303_79820_20131114_181827_outLine +BABEL_OP2_303_80559_20131101_190006_inLine +BABEL_OP2_303_80559_20131101_190006_outLine +BABEL_OP2_303_80622_20131130_040503_inLine +BABEL_OP2_303_80622_20131130_040503_outLine +BABEL_OP2_303_80881_20131027_165716_inLine +BABEL_OP2_303_80881_20131027_165716_outLine +BABEL_OP2_303_81149_20140203_201343_inLine +BABEL_OP2_303_81149_20140203_201343_outLine +BABEL_OP2_303_81287_20131121_184328_inLine +BABEL_OP2_303_81287_20131121_184328_outLine +BABEL_OP2_303_81424_20131120_192659_inLine +BABEL_OP2_303_81424_20131120_192659_outLine +BABEL_OP2_303_81671_20131205_004357_inLine +BABEL_OP2_303_81671_20131205_004357_outLine +BABEL_OP2_303_82622_20131029_212941_inLine +BABEL_OP2_303_82622_20131029_212941_outLine +BABEL_OP2_303_82935_20131205_024033_inLine +BABEL_OP2_303_82935_20131205_024033_outLine +BABEL_OP2_303_82935_20131205_025919_inLine +BABEL_OP2_303_82935_20131205_025919_outLine +BABEL_OP2_303_83771_20140119_181859_inLine +BABEL_OP2_303_83771_20140119_181859_outLine +BABEL_OP2_303_83935_20131122_222948_inLine +BABEL_OP2_303_83935_20131122_222948_outLine +BABEL_OP2_303_84061_20131104_224830_inLine +BABEL_OP2_303_84061_20131104_224830_outLine +BABEL_OP2_303_84327_20131122_203936_inLine +BABEL_OP2_303_84327_20131122_203936_outLine +BABEL_OP2_303_84458_20131204_213157_inLine +BABEL_OP2_303_84458_20131204_213157_outLine +BABEL_OP2_303_84547_20131026_230544_inLine +BABEL_OP2_303_84547_20131026_230544_outLine +BABEL_OP2_303_84605_20131112_192034_inLine +BABEL_OP2_303_84605_20131112_192034_outLine +BABEL_OP2_303_84805_20131204_153317_inLine +BABEL_OP2_303_84805_20131204_153317_outLine +BABEL_OP2_303_84936_20131115_204004_inLine +BABEL_OP2_303_84936_20131115_204004_outLine +BABEL_OP2_303_85248_20131206_184028_inLine +BABEL_OP2_303_85248_20131206_184028_outLine +BABEL_OP2_303_85340_20131111_215301_inLine +BABEL_OP2_303_85340_20131111_215301_outLine +BABEL_OP2_303_86191_20131114_221742_inLine +BABEL_OP2_303_86191_20131114_221742_outLine +BABEL_OP2_303_86321_20131204_175915_inLine +BABEL_OP2_303_86321_20131204_175915_outLine +BABEL_OP2_303_86467_20131025_013235_inLine +BABEL_OP2_303_86467_20131025_013235_outLine +BABEL_OP2_303_86676_20131204_185429_inLine +BABEL_OP2_303_86676_20131204_185429_outLine +BABEL_OP2_303_86713_20131206_165123_inLine +BABEL_OP2_303_86713_20131206_165123_outLine +BABEL_OP2_303_86891_20140222_195106_inLine +BABEL_OP2_303_86891_20140222_195106_outLine +BABEL_OP2_303_86952_20131105_173230_inLine +BABEL_OP2_303_86952_20131105_173230_outLine +BABEL_OP2_303_87313_20131116_193233_inLine +BABEL_OP2_303_87313_20131116_193233_outLine +BABEL_OP2_303_87884_20131206_022424_inLine +BABEL_OP2_303_87884_20131206_022424_outLine +BABEL_OP2_303_87889_20131213_215703_inLine +BABEL_OP2_303_87889_20131213_215703_outLine +BABEL_OP2_303_88776_20131031_184652_inLine +BABEL_OP2_303_88776_20131031_184652_outLine +BABEL_OP2_303_88982_20131115_181618_inLine +BABEL_OP2_303_88982_20131115_181618_outLine +BABEL_OP2_303_90080_20131228_233334_inLine +BABEL_OP2_303_90080_20131228_233334_outLine +BABEL_OP2_303_90740_20140221_220031_inLine +BABEL_OP2_303_90740_20140221_220031_outLine +BABEL_OP2_303_91125_20131102_191721_inLine +BABEL_OP2_303_91125_20131102_191721_outLine +BABEL_OP2_303_91944_20131107_214314_inLine +BABEL_OP2_303_91944_20131107_214314_outLine +BABEL_OP2_303_92176_20131115_153306_inLine +BABEL_OP2_303_92176_20131115_153306_outLine +BABEL_OP2_303_92605_20140205_192703_inLine +BABEL_OP2_303_92605_20140205_192703_outLine +BABEL_OP2_303_92757_20140211_221207_inLine +BABEL_OP2_303_92757_20140211_221207_outLine +BABEL_OP2_303_92792_20131223_042728_inLine +BABEL_OP2_303_92792_20131223_042728_outLine +BABEL_OP2_303_94025_20131211_211933_inLine +BABEL_OP2_303_94025_20131211_211933_outLine +BABEL_OP2_303_94333_20131029_193545_inLine +BABEL_OP2_303_94333_20131029_193545_outLine +BABEL_OP2_303_94745_20131204_205747_inLine +BABEL_OP2_303_94745_20131204_205747_outLine +BABEL_OP2_303_94869_20131101_184934_inLine +BABEL_OP2_303_94869_20131101_184934_outLine +BABEL_OP2_303_96324_20131107_162546_inLine +BABEL_OP2_303_96324_20131107_162546_outLine +BABEL_OP2_303_96690_20131114_194453_inLine +BABEL_OP2_303_96690_20131114_194453_outLine +BABEL_OP2_303_97286_20140205_223354_inLine +BABEL_OP2_303_97286_20140205_223354_outLine +BABEL_OP2_303_97772_20131024_230426_inLine +BABEL_OP2_303_97772_20131024_230426_outLine +BABEL_OP2_303_97988_20131204_195626_inLine +BABEL_OP2_303_97988_20131204_195626_outLine +BABEL_OP2_303_97988_20131204_211137_inLine +BABEL_OP2_303_97988_20131204_211137_outLine +BABEL_OP2_303_98311_20131107_224445_inLine +BABEL_OP2_303_98311_20131107_224445_outLine +BABEL_OP2_303_98356_20131121_191712_inLine +BABEL_OP2_303_98356_20131121_191712_outLine +BABEL_OP2_303_98390_20131029_164425_inLine +BABEL_OP2_303_98390_20131029_164425_outLine +BABEL_OP2_303_99955_20131215_222330_inLine +BABEL_OP2_303_99955_20131215_222330_outLine +BABEL_OP2_303_99955_20131216_231047_inLine +BABEL_OP2_303_99955_20131216_231047_outLine diff --git a/egs/babel/s5d/conf/lists/303-telugu/untranscribed-training.list b/egs/babel/s5d/conf/lists/303-telugu/untranscribed-training.list new file mode 100644 index 00000000000..29a7e3f80b4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/303-telugu/untranscribed-training.list @@ -0,0 +1,501 @@ +BABEL_OP2_303_11310_20140309_202017_inLine +BABEL_OP2_303_11310_20140309_202017_outLine +BABEL_OP2_303_11723_20140320_222729_inLine +BABEL_OP2_303_11723_20140320_222729_outLine +BABEL_OP2_303_11723_20140320_223508_inLine +BABEL_OP2_303_11723_20140320_223508_outLine +BABEL_OP2_303_13184_20131204_011559_inLine +BABEL_OP2_303_13184_20131204_011559_outLine +BABEL_OP2_303_13189_20131211_195308_inLine +BABEL_OP2_303_13189_20131211_195308_outLine +BABEL_OP2_303_13792_20131029_222536_inLine +BABEL_OP2_303_13792_20131029_222536_outLine +BABEL_OP2_303_13929_20140327_182253_inLine +BABEL_OP2_303_13929_20140327_182253_outLine +BABEL_OP2_303_14575_20140328_215314_inLine +BABEL_OP2_303_14575_20140328_215314_outLine +BABEL_OP2_303_14723_20140327_220200_inLine +BABEL_OP2_303_14723_20140327_220200_outLine +BABEL_OP2_303_14884_20140320_193514_inLine +BABEL_OP2_303_14884_20140320_193514_outLine +BABEL_OP2_303_14884_20140320_195858_inLine +BABEL_OP2_303_14884_20140320_195858_outLine +BABEL_OP2_303_15926_20131130_215154_inLine +BABEL_OP2_303_15926_20131130_215154_outLine +BABEL_OP2_303_16351_20140309_193931_inLine +BABEL_OP2_303_16351_20140309_193931_outLine +BABEL_OP2_303_16726_20140328_174353_inLine +BABEL_OP2_303_16726_20140328_174353_outLine +BABEL_OP2_303_17511_20140327_212725_inLine +BABEL_OP2_303_17511_20140327_212725_outLine +BABEL_OP2_303_17751_20140130_221610_inLine +BABEL_OP2_303_17751_20140130_221610_outLine +BABEL_OP2_303_17890_20131116_201518_inLine +BABEL_OP2_303_17890_20131116_201518_outLine +BABEL_OP2_303_17914_20131229_223237_inLine +BABEL_OP2_303_17914_20131229_223237_outLine +BABEL_OP2_303_17937_20140319_174736_inLine +BABEL_OP2_303_17937_20140319_174736_outLine +BABEL_OP2_303_18280_20140328_223246_inLine +BABEL_OP2_303_18280_20140328_223246_outLine +BABEL_OP2_303_18297_20140125_191248_inLine +BABEL_OP2_303_18297_20140125_191248_outLine +BABEL_OP2_303_18566_20131228_173117_inLine +BABEL_OP2_303_18566_20131228_173117_outLine +BABEL_OP2_303_19101_20131114_161754_inLine +BABEL_OP2_303_19101_20131114_161754_outLine +BABEL_OP2_303_19440_20140325_010253_inLine +BABEL_OP2_303_19440_20140325_010253_outLine +BABEL_OP2_303_19444_20140324_030047_inLine +BABEL_OP2_303_19444_20140324_030047_outLine +BABEL_OP2_303_19621_20131117_014609_inLine +BABEL_OP2_303_19621_20131117_014609_outLine +BABEL_OP2_303_20682_20131128_201847_inLine +BABEL_OP2_303_20682_20131128_201847_outLine +BABEL_OP2_303_20738_20131230_225647_inLine +BABEL_OP2_303_20738_20131230_225647_outLine +BABEL_OP2_303_20896_20140328_234931_inLine +BABEL_OP2_303_20896_20140328_234931_outLine +BABEL_OP2_303_21159_20140318_195039_inLine +BABEL_OP2_303_21159_20140318_195039_outLine +BABEL_OP2_303_21244_20140129_215632_inLine +BABEL_OP2_303_21244_20140129_215632_outLine +BABEL_OP2_303_21315_20140405_194002_inLine +BABEL_OP2_303_21315_20140405_194002_outLine +BABEL_OP2_303_22021_20140413_225936_inLine +BABEL_OP2_303_22021_20140413_225936_outLine +BABEL_OP2_303_22591_20140404_023216_inLine +BABEL_OP2_303_22591_20140404_023216_outLine +BABEL_OP2_303_22641_20131025_191802_inLine +BABEL_OP2_303_22641_20131025_191802_outLine +BABEL_OP2_303_22643_20140319_183843_inLine +BABEL_OP2_303_22643_20140319_183843_outLine +BABEL_OP2_303_23355_20140324_163413_inLine +BABEL_OP2_303_23355_20140324_163413_outLine +BABEL_OP2_303_23628_20131114_233248_inLine +BABEL_OP2_303_23628_20131114_233248_outLine +BABEL_OP2_303_23700_20140330_203130_inLine +BABEL_OP2_303_23700_20140330_203130_outLine +BABEL_OP2_303_24587_20140324_011441_inLine +BABEL_OP2_303_24587_20140324_011441_outLine +BABEL_OP2_303_24587_20140324_035935_inLine +BABEL_OP2_303_24587_20140324_035935_outLine +BABEL_OP2_303_24648_20140324_212818_inLine +BABEL_OP2_303_24648_20140324_212818_outLine +BABEL_OP2_303_25012_20140309_203215_inLine +BABEL_OP2_303_25012_20140309_203215_outLine +BABEL_OP2_303_25085_20140213_175133_inLine +BABEL_OP2_303_25085_20140213_175133_outLine +BABEL_OP2_303_25242_20140308_200459_inLine +BABEL_OP2_303_25242_20140308_200459_outLine +BABEL_OP2_303_25496_20140325_025625_inLine +BABEL_OP2_303_25496_20140325_025625_outLine +BABEL_OP2_303_26074_20131114_211040_inLine +BABEL_OP2_303_26074_20131114_211040_outLine +BABEL_OP2_303_27014_20140309_212535_inLine +BABEL_OP2_303_27014_20140309_212535_outLine +BABEL_OP2_303_27478_20131228_145746_inLine +BABEL_OP2_303_27478_20131228_145746_outLine +BABEL_OP2_303_28303_20131030_203335_inLine +BABEL_OP2_303_28303_20131030_203335_outLine +BABEL_OP2_303_28814_20131216_215127_inLine +BABEL_OP2_303_28814_20131216_215127_outLine +BABEL_OP2_303_29072_20131118_191936_outLine +BABEL_OP2_303_29563_20140327_193023_inLine +BABEL_OP2_303_29563_20140327_193023_outLine +BABEL_OP2_303_29643_20140131_234915_inLine +BABEL_OP2_303_29643_20140131_234915_outLine +BABEL_OP2_303_29765_20140317_141957_inLine +BABEL_OP2_303_29765_20140317_141957_outLine +BABEL_OP2_303_30084_20140212_191819_inLine +BABEL_OP2_303_30084_20140212_191819_outLine +BABEL_OP2_303_30250_20131105_004442_inLine +BABEL_OP2_303_30250_20131105_004442_outLine +BABEL_OP2_303_32832_20131204_034501_inLine +BABEL_OP2_303_32832_20131204_034501_outLine +BABEL_OP2_303_33273_20131106_231154_inLine +BABEL_OP2_303_33273_20131106_231154_outLine +BABEL_OP2_303_33774_20140325_031929_inLine +BABEL_OP2_303_33774_20140325_031929_outLine +BABEL_OP2_303_34064_20140324_183744_inLine +BABEL_OP2_303_34064_20140324_183744_outLine +BABEL_OP2_303_34208_20140404_030609_inLine +BABEL_OP2_303_34208_20140404_030609_outLine +BABEL_OP2_303_34477_20131113_195424_inLine +BABEL_OP2_303_34477_20131113_195424_outLine +BABEL_OP2_303_35420_20140318_214611_inLine +BABEL_OP2_303_35420_20140318_214611_outLine +BABEL_OP2_303_35467_20131114_210333_inLine +BABEL_OP2_303_35467_20131114_210333_outLine +BABEL_OP2_303_35885_20131225_181427_inLine +BABEL_OP2_303_35885_20131225_181427_outLine +BABEL_OP2_303_36017_20140204_222306_inLine +BABEL_OP2_303_36017_20140204_222306_outLine +BABEL_OP2_303_36147_20140402_224231_inLine +BABEL_OP2_303_36147_20140402_224231_outLine +BABEL_OP2_303_36900_20131223_225105_inLine +BABEL_OP2_303_36900_20131223_225105_outLine +BABEL_OP2_303_36990_20131111_022257_inLine +BABEL_OP2_303_36990_20131111_022257_outLine +BABEL_OP2_303_37290_20131114_034451_inLine +BABEL_OP2_303_37290_20131114_034451_outLine +BABEL_OP2_303_38340_20131114_184816_inLine +BABEL_OP2_303_38340_20131114_184816_outLine +BABEL_OP2_303_39099_20140127_233334_inLine +BABEL_OP2_303_39099_20140127_233334_outLine +BABEL_OP2_303_39277_20140324_193505_inLine +BABEL_OP2_303_39277_20140324_193505_outLine +BABEL_OP2_303_39579_20140327_191248_inLine +BABEL_OP2_303_39579_20140327_191248_outLine +BABEL_OP2_303_39680_20131211_183650_inLine +BABEL_OP2_303_39680_20131211_183650_outLine +BABEL_OP2_303_40092_20140329_200501_inLine +BABEL_OP2_303_40092_20140329_200501_outLine +BABEL_OP2_303_40092_20140329_201239_inLine +BABEL_OP2_303_40092_20140329_201239_outLine +BABEL_OP2_303_40092_20140329_202122_inLine +BABEL_OP2_303_40092_20140329_202122_outLine +BABEL_OP2_303_40648_20140319_195523_inLine +BABEL_OP2_303_40648_20140319_195523_outLine +BABEL_OP2_303_40939_20140415_195416_inLine +BABEL_OP2_303_40939_20140415_195416_outLine +BABEL_OP2_303_41745_20131109_041340_inLine +BABEL_OP2_303_41745_20131109_041340_outLine +BABEL_OP2_303_42155_20131114_053239_inLine +BABEL_OP2_303_42155_20131114_053239_outLine +BABEL_OP2_303_42243_20131025_222121_inLine +BABEL_OP2_303_42243_20131025_222121_outLine +BABEL_OP2_303_42619_20131124_172939_inLine +BABEL_OP2_303_42619_20131124_172939_outLine +BABEL_OP2_303_42834_20131115_023812_inLine +BABEL_OP2_303_42834_20131115_023812_outLine +BABEL_OP2_303_43395_20140405_161423_inLine +BABEL_OP2_303_43395_20140405_161423_outLine +BABEL_OP2_303_44114_20140405_145238_inLine +BABEL_OP2_303_44114_20140405_145238_outLine +BABEL_OP2_303_44619_20131109_201926_inLine +BABEL_OP2_303_44619_20131109_201926_outLine +BABEL_OP2_303_44678_20140320_185927_inLine +BABEL_OP2_303_44678_20140320_185927_outLine +BABEL_OP2_303_44898_20140309_220734_inLine +BABEL_OP2_303_44898_20140309_220734_outLine +BABEL_OP2_303_45121_20140207_012357_inLine +BABEL_OP2_303_45121_20140207_012357_outLine +BABEL_OP2_303_45140_20140205_001649_inLine +BABEL_OP2_303_45140_20140205_001649_outLine +BABEL_OP2_303_45777_20131116_041840_inLine +BABEL_OP2_303_45777_20131116_041840_outLine +BABEL_OP2_303_46535_20140404_014728_inLine +BABEL_OP2_303_46535_20140404_014728_outLine +BABEL_OP2_303_46712_20131114_191120_inLine +BABEL_OP2_303_46712_20131114_191120_outLine +BABEL_OP2_303_47877_20131218_041443_inLine +BABEL_OP2_303_47877_20131218_041443_outLine +BABEL_OP2_303_47882_20140309_225723_inLine +BABEL_OP2_303_47882_20140309_225723_outLine +BABEL_OP2_303_48024_20140324_154856_inLine +BABEL_OP2_303_48024_20140324_154856_outLine +BABEL_OP2_303_49001_20131114_194536_inLine +BABEL_OP2_303_49001_20131114_194536_outLine +BABEL_OP2_303_49870_20140330_002407_inLine +BABEL_OP2_303_49870_20140330_002407_outLine +BABEL_OP2_303_49870_20140330_003441_inLine +BABEL_OP2_303_49870_20140330_003441_outLine +BABEL_OP2_303_49902_20131104_154633_inLine +BABEL_OP2_303_49902_20131104_154633_outLine +BABEL_OP2_303_49907_20131114_011516_inLine +BABEL_OP2_303_49907_20131114_011516_outLine +BABEL_OP2_303_50427_20131113_234859_inLine +BABEL_OP2_303_50427_20131113_234859_outLine +BABEL_OP2_303_50630_20131130_231747_inLine +BABEL_OP2_303_50630_20131130_231747_outLine +BABEL_OP2_303_50940_20140203_224023_inLine +BABEL_OP2_303_50940_20140203_224023_outLine +BABEL_OP2_303_50958_20131110_200903_inLine +BABEL_OP2_303_50958_20131110_200903_outLine +BABEL_OP2_303_51414_20140118_210505_inLine +BABEL_OP2_303_51414_20140118_210505_outLine +BABEL_OP2_303_51417_20131205_015949_inLine +BABEL_OP2_303_51417_20131205_015949_outLine +BABEL_OP2_303_52058_20140318_223046_inLine +BABEL_OP2_303_52058_20140318_223046_outLine +BABEL_OP2_303_52058_20140318_223719_inLine +BABEL_OP2_303_52058_20140318_223719_outLine +BABEL_OP2_303_52322_20140319_164229_inLine +BABEL_OP2_303_52322_20140319_164229_outLine +BABEL_OP2_303_52818_20131115_053831_inLine +BABEL_OP2_303_52818_20131115_053831_outLine +BABEL_OP2_303_53010_20140403_235230_inLine +BABEL_OP2_303_53010_20140403_235230_outLine +BABEL_OP2_303_53068_20140321_041556_inLine +BABEL_OP2_303_53068_20140321_041556_outLine +BABEL_OP2_303_53206_20140308_201930_inLine +BABEL_OP2_303_53206_20140308_201930_outLine +BABEL_OP2_303_54405_20131113_021212_inLine +BABEL_OP2_303_54405_20131113_021212_outLine +BABEL_OP2_303_54953_20131109_030545_inLine +BABEL_OP2_303_54953_20131109_030545_outLine +BABEL_OP2_303_55013_20140204_205447_inLine +BABEL_OP2_303_55013_20140204_205447_outLine +BABEL_OP2_303_55742_20131114_230121_inLine +BABEL_OP2_303_55742_20131114_230121_outLine +BABEL_OP2_303_56306_20131206_164521_inLine +BABEL_OP2_303_56306_20131206_164521_outLine +BABEL_OP2_303_56326_20140309_213505_inLine +BABEL_OP2_303_56326_20140309_213505_outLine +BABEL_OP2_303_56370_20131030_191610_inLine +BABEL_OP2_303_56370_20131030_191610_outLine +BABEL_OP2_303_56523_20131109_044230_inLine +BABEL_OP2_303_56523_20131109_044230_outLine +BABEL_OP2_303_56743_20131109_043328_inLine +BABEL_OP2_303_56743_20131109_043328_outLine +BABEL_OP2_303_57065_20131204_193037_inLine +BABEL_OP2_303_57065_20131204_193037_outLine +BABEL_OP2_303_57650_20131230_182126_inLine +BABEL_OP2_303_57650_20131230_182126_outLine +BABEL_OP2_303_58717_20131115_231922_inLine +BABEL_OP2_303_58717_20131115_231922_outLine +BABEL_OP2_303_59039_20140219_180738_inLine +BABEL_OP2_303_59039_20140219_180738_outLine +BABEL_OP2_303_59091_20131206_183149_inLine +BABEL_OP2_303_59091_20131206_183149_outLine +BABEL_OP2_303_59163_20140416_164729_inLine +BABEL_OP2_303_59163_20140416_164729_outLine +BABEL_OP2_303_59301_20131205_012957_inLine +BABEL_OP2_303_59301_20131205_012957_outLine +BABEL_OP2_303_59747_20131114_224542_inLine +BABEL_OP2_303_59747_20131114_224542_outLine +BABEL_OP2_303_60352_20131115_205920_inLine +BABEL_OP2_303_60352_20131115_205920_outLine +BABEL_OP2_303_60352_20131115_210809_inLine +BABEL_OP2_303_60352_20131115_210809_outLine +BABEL_OP2_303_60418_20131115_210956_inLine +BABEL_OP2_303_60418_20131115_210956_outLine +BABEL_OP2_303_60508_20131101_185756_inLine +BABEL_OP2_303_60508_20131101_185756_outLine +BABEL_OP2_303_60650_20140319_182240_inLine +BABEL_OP2_303_60650_20140319_182240_outLine +BABEL_OP2_303_60836_20131112_201953_inLine +BABEL_OP2_303_60836_20131112_201953_outLine +BABEL_OP2_303_61219_20131114_181005_inLine +BABEL_OP2_303_61219_20131114_181005_outLine +BABEL_OP2_303_61435_20131123_235604_inLine +BABEL_OP2_303_61435_20131123_235604_outLine +BABEL_OP2_303_61684_20140220_032432_inLine +BABEL_OP2_303_61684_20140220_032432_outLine +BABEL_OP2_303_61873_20131114_011706_inLine +BABEL_OP2_303_61873_20131114_011706_outLine +BABEL_OP2_303_61971_20131228_000329_inLine +BABEL_OP2_303_61971_20131228_000329_outLine +BABEL_OP2_303_62286_20131129_203236_inLine +BABEL_OP2_303_62286_20131129_203236_outLine +BABEL_OP2_303_62362_20140129_183345_inLine +BABEL_OP2_303_62362_20140129_183345_outLine +BABEL_OP2_303_62471_20140328_192801_inLine +BABEL_OP2_303_62471_20140328_192801_outLine +BABEL_OP2_303_62734_20131108_203310_inLine +BABEL_OP2_303_62734_20131108_203310_outLine +BABEL_OP2_303_63445_20131101_180928_inLine +BABEL_OP2_303_63445_20131101_180928_outLine +BABEL_OP2_303_63787_20131029_232219_inLine +BABEL_OP2_303_63787_20131029_232219_outLine +BABEL_OP2_303_63938_20131225_194045_inLine +BABEL_OP2_303_63938_20131225_194045_outLine +BABEL_OP2_303_65298_20140222_213911_inLine +BABEL_OP2_303_65298_20140222_213911_outLine +BABEL_OP2_303_65639_20140320_184458_inLine +BABEL_OP2_303_65639_20140320_184458_outLine +BABEL_OP2_303_65723_20131106_221517_inLine +BABEL_OP2_303_65723_20131106_221517_outLine +BABEL_OP2_303_66305_20131224_012218_inLine +BABEL_OP2_303_66305_20131224_022308_inLine +BABEL_OP2_303_67085_20140223_030002_inLine +BABEL_OP2_303_67085_20140223_030002_outLine +BABEL_OP2_303_67304_20140319_193543_inLine +BABEL_OP2_303_67304_20140319_193543_outLine +BABEL_OP2_303_67794_20131111_173553_inLine +BABEL_OP2_303_67794_20131111_173553_outLine +BABEL_OP2_303_68040_20131116_041049_inLine +BABEL_OP2_303_68040_20131116_041049_outLine +BABEL_OP2_303_68182_20131206_203404_inLine +BABEL_OP2_303_68182_20131206_203404_outLine +BABEL_OP2_303_68402_20140319_235557_inLine +BABEL_OP2_303_68402_20140319_235557_outLine +BABEL_OP2_303_68854_20140125_191013_inLine +BABEL_OP2_303_68854_20140125_191013_outLine +BABEL_OP2_303_69090_20140322_190538_inLine +BABEL_OP2_303_69090_20140322_190538_outLine +BABEL_OP2_303_69964_20140201_215153_inLine +BABEL_OP2_303_69964_20140201_215153_outLine +BABEL_OP2_303_69972_20140412_213250_inLine +BABEL_OP2_303_69972_20140412_213250_outLine +BABEL_OP2_303_70182_20140131_021121_inLine +BABEL_OP2_303_70182_20140131_021121_outLine +BABEL_OP2_303_70216_20140309_212242_inLine +BABEL_OP2_303_70216_20140309_212242_outLine +BABEL_OP2_303_70526_20140121_191817_inLine +BABEL_OP2_303_70526_20140121_191817_outLine +BABEL_OP2_303_71282_20131206_205821_inLine +BABEL_OP2_303_71282_20131206_205821_outLine +BABEL_OP2_303_71333_20131114_201026_inLine +BABEL_OP2_303_71333_20131114_201026_outLine +BABEL_OP2_303_71704_20131107_231553_inLine +BABEL_OP2_303_71704_20131107_231553_outLine +BABEL_OP2_303_71754_20140327_221321_inLine +BABEL_OP2_303_71754_20140327_221321_outLine +BABEL_OP2_303_73258_20131110_190632_inLine +BABEL_OP2_303_73258_20131110_190632_outLine +BABEL_OP2_303_73305_20140219_214719_inLine +BABEL_OP2_303_73305_20140219_214719_outLine +BABEL_OP2_303_73408_20140222_222505_inLine +BABEL_OP2_303_73408_20140222_222505_outLine +BABEL_OP2_303_73837_20131114_035127_inLine +BABEL_OP2_303_73837_20131114_035127_outLine +BABEL_OP2_303_74121_20131109_193228_inLine +BABEL_OP2_303_74121_20131109_193228_outLine +BABEL_OP2_303_75366_20140222_194703_inLine +BABEL_OP2_303_75366_20140222_194703_outLine +BABEL_OP2_303_75460_20140211_182910_inLine +BABEL_OP2_303_75460_20140211_182910_outLine +BABEL_OP2_303_77139_20131105_210350_inLine +BABEL_OP2_303_77139_20131105_210350_outLine +BABEL_OP2_303_79028_20140416_181014_inLine +BABEL_OP2_303_79028_20140416_181014_outLine +BABEL_OP2_303_79080_20131208_203223_inLine +BABEL_OP2_303_79080_20131208_203223_outLine +BABEL_OP2_303_79129_20131114_034645_outLine +BABEL_OP2_303_79723_20140413_221551_inLine +BABEL_OP2_303_79723_20140413_221551_outLine +BABEL_OP2_303_79898_20140309_211140_inLine +BABEL_OP2_303_79898_20140309_211140_outLine +BABEL_OP2_303_80721_20131201_171555_inLine +BABEL_OP2_303_80721_20131201_171555_outLine +BABEL_OP2_303_81427_20131105_001654_inLine +BABEL_OP2_303_81427_20131105_001654_outLine +BABEL_OP2_303_81674_20140202_220306_inLine +BABEL_OP2_303_81674_20140202_220306_outLine +BABEL_OP2_303_82361_20140204_232359_inLine +BABEL_OP2_303_82361_20140204_232359_outLine +BABEL_OP2_303_82626_20140315_024235_inLine +BABEL_OP2_303_82626_20140315_024235_outLine +BABEL_OP2_303_82863_20131111_030006_inLine +BABEL_OP2_303_82863_20131111_030006_outLine +BABEL_OP2_303_82904_20140204_205103_inLine +BABEL_OP2_303_83062_20140204_210837_inLine +BABEL_OP2_303_83062_20140204_210837_outLine +BABEL_OP2_303_83813_20140320_010221_inLine +BABEL_OP2_303_83813_20140320_010221_outLine +BABEL_OP2_303_84125_20131025_195026_inLine +BABEL_OP2_303_84125_20131025_195026_outLine +BABEL_OP2_303_84339_20140111_180841_inLine +BABEL_OP2_303_84339_20140111_180841_outLine +BABEL_OP2_303_84815_20131204_190755_inLine +BABEL_OP2_303_84815_20131204_190755_outLine +BABEL_OP2_303_85048_20131114_222244_inLine +BABEL_OP2_303_85048_20131114_222244_outLine +BABEL_OP2_303_85260_20140327_224114_inLine +BABEL_OP2_303_85260_20140327_224114_outLine +BABEL_OP2_303_86715_20140312_223757_inLine +BABEL_OP2_303_86715_20140312_223757_outLine +BABEL_OP2_303_86748_20131206_231713_inLine +BABEL_OP2_303_86748_20131206_231713_outLine +BABEL_OP2_303_86830_20131211_192459_inLine +BABEL_OP2_303_86830_20131211_192459_outLine +BABEL_OP2_303_86860_20140204_194637_inLine +BABEL_OP2_303_86860_20140204_194637_outLine +BABEL_OP2_303_88686_20131028_192526_inLine +BABEL_OP2_303_88686_20131028_192526_outLine +BABEL_OP2_303_88812_20140125_200044_inLine +BABEL_OP2_303_88812_20140125_200044_outLine +BABEL_OP2_303_88873_20131112_214623_inLine +BABEL_OP2_303_88873_20131112_214623_outLine +BABEL_OP2_303_89045_20131024_213611_inLine +BABEL_OP2_303_89045_20131024_213611_outLine +BABEL_OP2_303_89372_20131025_175446_inLine +BABEL_OP2_303_89372_20131025_175446_outLine +BABEL_OP2_303_89457_20131113_185151_inLine +BABEL_OP2_303_89457_20131113_185151_outLine +BABEL_OP2_303_89575_20131129_162850_inLine +BABEL_OP2_303_89575_20131129_162850_outLine +BABEL_OP2_303_89650_20140414_003815_inLine +BABEL_OP2_303_89650_20140414_003815_outLine +BABEL_OP2_303_90417_20140206_194432_inLine +BABEL_OP2_303_90417_20140206_194432_outLine +BABEL_OP2_303_91189_20140214_204530_inLine +BABEL_OP2_303_91189_20140214_204530_outLine +BABEL_OP2_303_91336_20131114_155601_inLine +BABEL_OP2_303_91336_20131114_155601_outLine +BABEL_OP2_303_91411_20140318_210954_inLine +BABEL_OP2_303_91411_20140318_210954_outLine +BABEL_OP2_303_91463_20131115_000512_inLine +BABEL_OP2_303_91463_20131115_000512_outLine +BABEL_OP2_303_91581_20131202_041422_inLine +BABEL_OP2_303_91581_20131202_041422_outLine +BABEL_OP2_303_91593_20140209_040916_inLine +BABEL_OP2_303_91593_20140209_040916_outLine +BABEL_OP2_303_91606_20140325_030918_inLine +BABEL_OP2_303_91606_20140325_030918_outLine +BABEL_OP2_303_91760_20140129_023507_inLine +BABEL_OP2_303_91760_20140129_023507_outLine +BABEL_OP2_303_91808_20140324_180442_inLine +BABEL_OP2_303_91808_20140324_180442_outLine +BABEL_OP2_303_91884_20131224_170738_inLine +BABEL_OP2_303_91884_20131224_170738_outLine +BABEL_OP2_303_91971_20140401_140304_inLine +BABEL_OP2_303_91971_20140401_140304_outLine +BABEL_OP2_303_92281_20140225_212826_inLine +BABEL_OP2_303_92281_20140225_212826_outLine +BABEL_OP2_303_93007_20140325_033131_inLine +BABEL_OP2_303_93007_20140325_033131_outLine +BABEL_OP2_303_93443_20140320_235342_inLine +BABEL_OP2_303_93443_20140320_235342_outLine +BABEL_OP2_303_93681_20140322_200153_inLine +BABEL_OP2_303_93681_20140322_200153_outLine +BABEL_OP2_303_93861_20131114_005221_inLine +BABEL_OP2_303_93861_20131114_005221_outLine +BABEL_OP2_303_93861_20131114_011200_inLine +BABEL_OP2_303_93861_20131114_011200_outLine +BABEL_OP2_303_93937_20140312_225604_inLine +BABEL_OP2_303_93937_20140312_225604_outLine +BABEL_OP2_303_93946_20131204_180611_inLine +BABEL_OP2_303_93946_20131204_180611_outLine +BABEL_OP2_303_94002_20131113_163221_inLine +BABEL_OP2_303_94002_20131113_163221_outLine +BABEL_OP2_303_94035_20140320_015111_inLine +BABEL_OP2_303_94035_20140320_015111_outLine +BABEL_OP2_303_94044_20140221_020012_inLine +BABEL_OP2_303_94044_20140221_020012_outLine +BABEL_OP2_303_94212_20140328_202919_inLine +BABEL_OP2_303_94212_20140328_202919_outLine +BABEL_OP2_303_94713_20140319_231311_inLine +BABEL_OP2_303_94713_20140319_231311_outLine +BABEL_OP2_303_95028_20140206_001106_inLine +BABEL_OP2_303_95028_20140206_001106_outLine +BABEL_OP2_303_95028_20140320_001627_inLine +BABEL_OP2_303_95028_20140320_001627_outLine +BABEL_OP2_303_95467_20140218_202005_inLine +BABEL_OP2_303_95467_20140218_202005_outLine +BABEL_OP2_303_95490_20131101_164715_inLine +BABEL_OP2_303_95490_20131101_164715_outLine +BABEL_OP2_303_95663_20131116_161029_inLine +BABEL_OP2_303_95663_20131116_161029_outLine +BABEL_OP2_303_95935_20131204_145738_inLine +BABEL_OP2_303_95935_20131204_145738_outLine +BABEL_OP2_303_96088_20140127_224534_inLine +BABEL_OP2_303_96088_20140127_224534_outLine +BABEL_OP2_303_97097_20140122_030319_inLine +BABEL_OP2_303_97097_20140122_030319_outLine +BABEL_OP2_303_97264_20131205_020902_inLine +BABEL_OP2_303_97264_20131205_020902_outLine +BABEL_OP2_303_97588_20131027_011205_inLine +BABEL_OP2_303_97588_20131027_011205_outLine +BABEL_OP2_303_98255_20140322_200157_inLine +BABEL_OP2_303_98255_20140322_200157_outLine +BABEL_OP2_303_98580_20131112_204407_inLine +BABEL_OP2_303_98580_20131112_204407_outLine +BABEL_OP2_303_99813_20131115_032632_inLine +BABEL_OP2_303_99813_20131115_032632_outLine +BABEL_OP2_303_99883_20140326_192513_inLine +BABEL_OP2_303_99883_20140326_192513_outLine +BABEL_OP2_303_99952_20140212_000327_inLine +BABEL_OP2_303_99952_20140212_000327_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/dev.2h.list b/egs/babel/s5d/conf/lists/304-lithuanian/dev.2h.list new file mode 100644 index 00000000000..37f27ef3750 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/dev.2h.list @@ -0,0 +1,122 @@ +BABEL_OP2_304_13040_20131130_214521_inLine +BABEL_OP2_304_13040_20131130_214521_outLine +BABEL_OP2_304_14158_20140115_023605_inLine +BABEL_OP2_304_14158_20140115_023605_outLine +BABEL_OP2_304_14575_20131024_232334_inLine +BABEL_OP2_304_14575_20131024_232334_outLine +BABEL_OP2_304_14575_20131024_235230_inLine +BABEL_OP2_304_14575_20131024_235230_outLine +BABEL_OP2_304_15163_20140108_001236_inLine +BABEL_OP2_304_15163_20140108_001236_outLine +BABEL_OP2_304_15262_20131210_004932_inLine +BABEL_OP2_304_15262_20131210_004932_outLine +BABEL_OP2_304_16056_20140123_070422_inLine +BABEL_OP2_304_16056_20140123_070422_outLine +BABEL_OP2_304_16787_20131206_025653_inLine +BABEL_OP2_304_16787_20131206_025653_outLine +BABEL_OP2_304_17511_20131126_055458_inLine +BABEL_OP2_304_17511_20131126_055458_outLine +BABEL_OP2_304_17573_20140203_230300_inLine +BABEL_OP2_304_17573_20140203_230300_outLine +BABEL_OP2_304_17914_20140228_184910_inLine +BABEL_OP2_304_17914_20140228_184910_outLine +BABEL_OP2_304_21581_20131216_220706_inLine +BABEL_OP2_304_21581_20131216_220706_outLine +BABEL_OP2_304_22021_20131023_221926_inLine +BABEL_OP2_304_22021_20131023_221926_outLine +BABEL_OP2_304_22288_20131112_035653_inLine +BABEL_OP2_304_22288_20131112_035653_outLine +BABEL_OP2_304_26206_20140120_022753_inLine +BABEL_OP2_304_26206_20140120_022753_outLine +BABEL_OP2_304_29777_20140217_064220_inLine +BABEL_OP2_304_29777_20140217_064220_outLine +BABEL_OP2_304_31500_20131109_033149_inLine +BABEL_OP2_304_31500_20131109_033149_outLine +BABEL_OP2_304_31979_20140109_015624_inLine +BABEL_OP2_304_31979_20140109_015624_outLine +BABEL_OP2_304_32959_20140210_005641_inLine +BABEL_OP2_304_32959_20140210_005641_outLine +BABEL_OP2_304_33800_20131023_012145_inLine +BABEL_OP2_304_33800_20131023_012145_outLine +BABEL_OP2_304_34208_20131031_044912_inLine +BABEL_OP2_304_34208_20131031_044912_outLine +BABEL_OP2_304_35069_20140304_002856_inLine +BABEL_OP2_304_35069_20140304_002856_outLine +BABEL_OP2_304_35202_20140111_000728_inLine +BABEL_OP2_304_35202_20140111_000728_outLine +BABEL_OP2_304_37064_20131129_035959_inLine +BABEL_OP2_304_37064_20131129_035959_outLine +BABEL_OP2_304_37068_20131023_011604_inLine +BABEL_OP2_304_37068_20131023_011604_outLine +BABEL_OP2_304_39927_20131021_221542_inLine +BABEL_OP2_304_39927_20131021_221542_outLine +BABEL_OP2_304_40330_20131109_021648_inLine +BABEL_OP2_304_40330_20131109_021648_outLine +BABEL_OP2_304_42877_20131022_230033_inLine +BABEL_OP2_304_42877_20131022_230033_outLine +BABEL_OP2_304_44420_20131214_233135_inLine +BABEL_OP2_304_44420_20131214_233135_outLine +BABEL_OP2_304_46702_20131115_213311_inLine +BABEL_OP2_304_46702_20131115_213311_outLine +BABEL_OP2_304_46712_20131209_044650_inLine +BABEL_OP2_304_46712_20131209_044650_outLine +BABEL_OP2_304_46974_20140220_023915_inLine +BABEL_OP2_304_46974_20140220_023915_outLine +BABEL_OP2_304_54735_20131112_025013_inLine +BABEL_OP2_304_54735_20131112_025013_outLine +BABEL_OP2_304_63265_20131108_044545_inLine +BABEL_OP2_304_63265_20131108_044545_outLine +BABEL_OP2_304_63307_20140121_215145_inLine +BABEL_OP2_304_63307_20140121_215145_outLine +BABEL_OP2_304_63938_20140303_232624_inLine +BABEL_OP2_304_63938_20140303_232624_outLine +BABEL_OP2_304_64494_20131212_025147_inLine +BABEL_OP2_304_64494_20131212_025147_outLine +BABEL_OP2_304_67671_20131106_030834_inLine +BABEL_OP2_304_67671_20131106_030834_outLine +BABEL_OP2_304_70110_20131118_222225_inLine +BABEL_OP2_304_70110_20131118_222225_outLine +BABEL_OP2_304_70282_20140114_194359_inLine +BABEL_OP2_304_70282_20140114_194359_outLine +BABEL_OP2_304_71704_20131215_005510_inLine +BABEL_OP2_304_71704_20131215_005510_outLine +BABEL_OP2_304_73622_20131216_061333_inLine +BABEL_OP2_304_73622_20131216_061333_outLine +BABEL_OP2_304_76837_20131020_200525_inLine +BABEL_OP2_304_76837_20131020_200525_outLine +BABEL_OP2_304_78877_20131023_202733_inLine +BABEL_OP2_304_78877_20131023_202733_outLine +BABEL_OP2_304_84079_20131112_195009_inLine +BABEL_OP2_304_84079_20131112_195009_outLine +BABEL_OP2_304_86878_20131129_043842_inLine +BABEL_OP2_304_86878_20131129_043842_outLine +BABEL_OP2_304_87629_20140121_223247_inLine +BABEL_OP2_304_87629_20140121_223247_outLine +BABEL_OP2_304_87693_20131214_012505_inLine +BABEL_OP2_304_87693_20131214_012505_outLine +BABEL_OP2_304_88394_20131030_012001_inLine +BABEL_OP2_304_88394_20131030_012001_outLine +BABEL_OP2_304_88873_20131215_052029_inLine +BABEL_OP2_304_88873_20131215_052029_outLine +BABEL_OP2_304_89457_20140107_011232_inLine +BABEL_OP2_304_89457_20140107_011232_outLine +BABEL_OP2_304_91411_20140214_045051_inLine +BABEL_OP2_304_91411_20140214_045051_outLine +BABEL_OP2_304_94002_20140106_061517_inLine +BABEL_OP2_304_94002_20140106_061517_outLine +BABEL_OP2_304_94035_20131028_044307_inLine +BABEL_OP2_304_94035_20131028_044307_outLine +BABEL_OP2_304_94166_20140222_223654_inLine +BABEL_OP2_304_94166_20140222_223654_outLine +BABEL_OP2_304_94587_20140203_223943_inLine +BABEL_OP2_304_94587_20140203_223943_outLine +BABEL_OP2_304_95966_20140116_013030_inLine +BABEL_OP2_304_95966_20140116_013030_outLine +BABEL_OP2_304_96041_20131110_011619_inLine +BABEL_OP2_304_96041_20131110_011619_outLine +BABEL_OP2_304_96934_20131207_231603_inLine +BABEL_OP2_304_96934_20131207_231603_outLine +BABEL_OP2_304_97604_20140221_172005_inLine +BABEL_OP2_304_97604_20140221_172005_outLine +BABEL_OP2_304_99732_20140213_211724_inLine +BABEL_OP2_304_99732_20140213_211724_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/dev.list b/egs/babel/s5d/conf/lists/304-lithuanian/dev.list new file mode 100644 index 00000000000..37f27ef3750 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/dev.list @@ -0,0 +1,122 @@ +BABEL_OP2_304_13040_20131130_214521_inLine +BABEL_OP2_304_13040_20131130_214521_outLine +BABEL_OP2_304_14158_20140115_023605_inLine +BABEL_OP2_304_14158_20140115_023605_outLine +BABEL_OP2_304_14575_20131024_232334_inLine +BABEL_OP2_304_14575_20131024_232334_outLine +BABEL_OP2_304_14575_20131024_235230_inLine +BABEL_OP2_304_14575_20131024_235230_outLine +BABEL_OP2_304_15163_20140108_001236_inLine +BABEL_OP2_304_15163_20140108_001236_outLine +BABEL_OP2_304_15262_20131210_004932_inLine +BABEL_OP2_304_15262_20131210_004932_outLine +BABEL_OP2_304_16056_20140123_070422_inLine +BABEL_OP2_304_16056_20140123_070422_outLine +BABEL_OP2_304_16787_20131206_025653_inLine +BABEL_OP2_304_16787_20131206_025653_outLine +BABEL_OP2_304_17511_20131126_055458_inLine +BABEL_OP2_304_17511_20131126_055458_outLine +BABEL_OP2_304_17573_20140203_230300_inLine +BABEL_OP2_304_17573_20140203_230300_outLine +BABEL_OP2_304_17914_20140228_184910_inLine +BABEL_OP2_304_17914_20140228_184910_outLine +BABEL_OP2_304_21581_20131216_220706_inLine +BABEL_OP2_304_21581_20131216_220706_outLine +BABEL_OP2_304_22021_20131023_221926_inLine +BABEL_OP2_304_22021_20131023_221926_outLine +BABEL_OP2_304_22288_20131112_035653_inLine +BABEL_OP2_304_22288_20131112_035653_outLine +BABEL_OP2_304_26206_20140120_022753_inLine +BABEL_OP2_304_26206_20140120_022753_outLine +BABEL_OP2_304_29777_20140217_064220_inLine +BABEL_OP2_304_29777_20140217_064220_outLine +BABEL_OP2_304_31500_20131109_033149_inLine +BABEL_OP2_304_31500_20131109_033149_outLine +BABEL_OP2_304_31979_20140109_015624_inLine +BABEL_OP2_304_31979_20140109_015624_outLine +BABEL_OP2_304_32959_20140210_005641_inLine +BABEL_OP2_304_32959_20140210_005641_outLine +BABEL_OP2_304_33800_20131023_012145_inLine +BABEL_OP2_304_33800_20131023_012145_outLine +BABEL_OP2_304_34208_20131031_044912_inLine +BABEL_OP2_304_34208_20131031_044912_outLine +BABEL_OP2_304_35069_20140304_002856_inLine +BABEL_OP2_304_35069_20140304_002856_outLine +BABEL_OP2_304_35202_20140111_000728_inLine +BABEL_OP2_304_35202_20140111_000728_outLine +BABEL_OP2_304_37064_20131129_035959_inLine +BABEL_OP2_304_37064_20131129_035959_outLine +BABEL_OP2_304_37068_20131023_011604_inLine +BABEL_OP2_304_37068_20131023_011604_outLine +BABEL_OP2_304_39927_20131021_221542_inLine +BABEL_OP2_304_39927_20131021_221542_outLine +BABEL_OP2_304_40330_20131109_021648_inLine +BABEL_OP2_304_40330_20131109_021648_outLine +BABEL_OP2_304_42877_20131022_230033_inLine +BABEL_OP2_304_42877_20131022_230033_outLine +BABEL_OP2_304_44420_20131214_233135_inLine +BABEL_OP2_304_44420_20131214_233135_outLine +BABEL_OP2_304_46702_20131115_213311_inLine +BABEL_OP2_304_46702_20131115_213311_outLine +BABEL_OP2_304_46712_20131209_044650_inLine +BABEL_OP2_304_46712_20131209_044650_outLine +BABEL_OP2_304_46974_20140220_023915_inLine +BABEL_OP2_304_46974_20140220_023915_outLine +BABEL_OP2_304_54735_20131112_025013_inLine +BABEL_OP2_304_54735_20131112_025013_outLine +BABEL_OP2_304_63265_20131108_044545_inLine +BABEL_OP2_304_63265_20131108_044545_outLine +BABEL_OP2_304_63307_20140121_215145_inLine +BABEL_OP2_304_63307_20140121_215145_outLine +BABEL_OP2_304_63938_20140303_232624_inLine +BABEL_OP2_304_63938_20140303_232624_outLine +BABEL_OP2_304_64494_20131212_025147_inLine +BABEL_OP2_304_64494_20131212_025147_outLine +BABEL_OP2_304_67671_20131106_030834_inLine +BABEL_OP2_304_67671_20131106_030834_outLine +BABEL_OP2_304_70110_20131118_222225_inLine +BABEL_OP2_304_70110_20131118_222225_outLine +BABEL_OP2_304_70282_20140114_194359_inLine +BABEL_OP2_304_70282_20140114_194359_outLine +BABEL_OP2_304_71704_20131215_005510_inLine +BABEL_OP2_304_71704_20131215_005510_outLine +BABEL_OP2_304_73622_20131216_061333_inLine +BABEL_OP2_304_73622_20131216_061333_outLine +BABEL_OP2_304_76837_20131020_200525_inLine +BABEL_OP2_304_76837_20131020_200525_outLine +BABEL_OP2_304_78877_20131023_202733_inLine +BABEL_OP2_304_78877_20131023_202733_outLine +BABEL_OP2_304_84079_20131112_195009_inLine +BABEL_OP2_304_84079_20131112_195009_outLine +BABEL_OP2_304_86878_20131129_043842_inLine +BABEL_OP2_304_86878_20131129_043842_outLine +BABEL_OP2_304_87629_20140121_223247_inLine +BABEL_OP2_304_87629_20140121_223247_outLine +BABEL_OP2_304_87693_20131214_012505_inLine +BABEL_OP2_304_87693_20131214_012505_outLine +BABEL_OP2_304_88394_20131030_012001_inLine +BABEL_OP2_304_88394_20131030_012001_outLine +BABEL_OP2_304_88873_20131215_052029_inLine +BABEL_OP2_304_88873_20131215_052029_outLine +BABEL_OP2_304_89457_20140107_011232_inLine +BABEL_OP2_304_89457_20140107_011232_outLine +BABEL_OP2_304_91411_20140214_045051_inLine +BABEL_OP2_304_91411_20140214_045051_outLine +BABEL_OP2_304_94002_20140106_061517_inLine +BABEL_OP2_304_94002_20140106_061517_outLine +BABEL_OP2_304_94035_20131028_044307_inLine +BABEL_OP2_304_94035_20131028_044307_outLine +BABEL_OP2_304_94166_20140222_223654_inLine +BABEL_OP2_304_94166_20140222_223654_outLine +BABEL_OP2_304_94587_20140203_223943_inLine +BABEL_OP2_304_94587_20140203_223943_outLine +BABEL_OP2_304_95966_20140116_013030_inLine +BABEL_OP2_304_95966_20140116_013030_outLine +BABEL_OP2_304_96041_20131110_011619_inLine +BABEL_OP2_304_96041_20131110_011619_outLine +BABEL_OP2_304_96934_20131207_231603_inLine +BABEL_OP2_304_96934_20131207_231603_outLine +BABEL_OP2_304_97604_20140221_172005_inLine +BABEL_OP2_304_97604_20140221_172005_outLine +BABEL_OP2_304_99732_20140213_211724_inLine +BABEL_OP2_304_99732_20140213_211724_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/eval.list b/egs/babel/s5d/conf/lists/304-lithuanian/eval.list new file mode 100644 index 00000000000..506241eadc5 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/eval.list @@ -0,0 +1,192 @@ +BABEL_OP2_304_10416_20140107_061620_inLine +BABEL_OP2_304_10416_20140107_061620_outLine +BABEL_OP2_304_14723_20131125_042706_inLine +BABEL_OP2_304_14723_20131125_042706_outLine +BABEL_OP2_304_16351_20131027_201533_inLine +BABEL_OP2_304_16351_20131027_201533_outLine +BABEL_OP2_304_16802_20131108_055143_inLine +BABEL_OP2_304_16802_20131108_055143_outLine +BABEL_OP2_304_18863_20140222_035802_inLine +BABEL_OP2_304_18863_20140222_035802_outLine +BABEL_OP2_304_20724_20131109_014600_inLine +BABEL_OP2_304_20724_20131109_014600_outLine +BABEL_OP2_304_22641_20131201_215149_inLine +BABEL_OP2_304_22641_20131201_215149_outLine +BABEL_OP2_304_23355_20131126_211038_inLine +BABEL_OP2_304_23355_20131126_211038_outLine +BABEL_OP2_304_23395_20140214_042808_inLine +BABEL_OP2_304_23395_20140214_042808_outLine +BABEL_OP2_304_23628_20131208_203311_inLine +BABEL_OP2_304_23628_20131208_203311_outLine +BABEL_OP2_304_23700_20131025_204511_inLine +BABEL_OP2_304_23700_20131025_204511_outLine +BABEL_OP2_304_23731_20140111_003449_inLine +BABEL_OP2_304_23731_20140111_003449_outLine +BABEL_OP2_304_24033_20140304_045137_inLine +BABEL_OP2_304_24033_20140304_045137_outLine +BABEL_OP2_304_24209_20131022_193019_inLine +BABEL_OP2_304_24209_20131022_193019_outLine +BABEL_OP2_304_24209_20131022_193936_inLine +BABEL_OP2_304_24209_20131022_193936_outLine +BABEL_OP2_304_25068_20131019_030524_inLine +BABEL_OP2_304_25068_20131019_030524_outLine +BABEL_OP2_304_26869_20131031_215636_inLine +BABEL_OP2_304_26869_20131031_215636_outLine +BABEL_OP2_304_28422_20140112_043550_inLine +BABEL_OP2_304_28422_20140112_043550_outLine +BABEL_OP2_304_28538_20140106_011449_inLine +BABEL_OP2_304_28538_20140106_011449_outLine +BABEL_OP2_304_28585_20140225_043733_inLine +BABEL_OP2_304_28585_20140225_043733_outLine +BABEL_OP2_304_30250_20140120_020901_inLine +BABEL_OP2_304_30250_20140120_020901_outLine +BABEL_OP2_304_36219_20131216_035438_inLine +BABEL_OP2_304_36219_20131216_035438_outLine +BABEL_OP2_304_36632_20131024_201211_inLine +BABEL_OP2_304_36632_20131024_201211_outLine +BABEL_OP2_304_39159_20131208_045854_inLine +BABEL_OP2_304_39159_20131208_045854_outLine +BABEL_OP2_304_39277_20131020_204845_inLine +BABEL_OP2_304_39277_20131020_204845_outLine +BABEL_OP2_304_41109_20140220_021208_inLine +BABEL_OP2_304_41109_20140220_021208_outLine +BABEL_OP2_304_43285_20140124_012117_inLine +BABEL_OP2_304_43285_20140124_012117_outLine +BABEL_OP2_304_44255_20140222_010712_inLine +BABEL_OP2_304_44255_20140222_010712_outLine +BABEL_OP2_304_44681_20131023_205447_inLine +BABEL_OP2_304_44681_20131023_205447_outLine +BABEL_OP2_304_45106_20140117_233013_inLine +BABEL_OP2_304_45106_20140117_233013_outLine +BABEL_OP2_304_45699_20131022_213702_inLine +BABEL_OP2_304_45699_20131022_213702_outLine +BABEL_OP2_304_46905_20131025_213636_inLine +BABEL_OP2_304_46905_20131025_213636_outLine +BABEL_OP2_304_47882_20131027_194825_inLine +BABEL_OP2_304_47882_20131027_194825_outLine +BABEL_OP2_304_48200_20140221_015225_inLine +BABEL_OP2_304_48200_20140221_015225_outLine +BABEL_OP2_304_49641_20131112_211903_inLine +BABEL_OP2_304_49641_20131112_211903_outLine +BABEL_OP2_304_49775_20131114_210107_inLine +BABEL_OP2_304_49775_20131114_210107_outLine +BABEL_OP2_304_50962_20131206_052346_inLine +BABEL_OP2_304_50962_20131206_052346_outLine +BABEL_OP2_304_53206_20131021_231814_inLine +BABEL_OP2_304_53206_20131021_231814_outLine +BABEL_OP2_304_53441_20131026_001731_inLine +BABEL_OP2_304_53441_20131026_001731_outLine +BABEL_OP2_304_53758_20131110_023501_inLine +BABEL_OP2_304_53758_20131110_023501_outLine +BABEL_OP2_304_54040_20140207_031046_inLine +BABEL_OP2_304_54040_20140207_031046_outLine +BABEL_OP2_304_55742_20131210_035616_inLine +BABEL_OP2_304_55742_20131210_035616_outLine +BABEL_OP2_304_57650_20140228_212617_inLine +BABEL_OP2_304_57650_20140228_212617_outLine +BABEL_OP2_304_57654_20131129_021919_inLine +BABEL_OP2_304_57654_20131129_021919_outLine +BABEL_OP2_304_57922_20140212_234031_inLine +BABEL_OP2_304_57922_20140212_234031_outLine +BABEL_OP2_304_60508_20131213_013224_inLine +BABEL_OP2_304_60508_20131213_013224_outLine +BABEL_OP2_304_62434_20131204_015115_inLine +BABEL_OP2_304_62434_20131204_015115_outLine +BABEL_OP2_304_63481_20131218_054343_inLine +BABEL_OP2_304_63481_20131218_054343_outLine +BABEL_OP2_304_63484_20131108_002450_inLine +BABEL_OP2_304_63484_20131108_002450_outLine +BABEL_OP2_304_65339_20131108_025612_inLine +BABEL_OP2_304_65339_20131108_025612_outLine +BABEL_OP2_304_66967_20131211_212833_inLine +BABEL_OP2_304_66967_20131211_212833_outLine +BABEL_OP2_304_67373_20131213_035431_inLine +BABEL_OP2_304_67373_20131213_035431_outLine +BABEL_OP2_304_67726_20131021_224218_inLine +BABEL_OP2_304_67726_20131021_224218_outLine +BABEL_OP2_304_67794_20131211_225335_inLine +BABEL_OP2_304_67794_20131211_225335_outLine +BABEL_OP2_304_68823_20131020_204717_inLine +BABEL_OP2_304_68823_20131020_204717_outLine +BABEL_OP2_304_69090_20131028_014204_inLine +BABEL_OP2_304_69090_20131028_014204_outLine +BABEL_OP2_304_69574_20131114_192607_inLine +BABEL_OP2_304_69574_20131114_192607_outLine +BABEL_OP2_304_70726_20131024_044755_inLine +BABEL_OP2_304_70726_20131024_044755_outLine +BABEL_OP2_304_71278_20131021_222320_inLine +BABEL_OP2_304_71278_20131021_222320_outLine +BABEL_OP2_304_73837_20131203_050134_inLine +BABEL_OP2_304_73837_20131203_050134_outLine +BABEL_OP2_304_74111_20140214_221515_inLine +BABEL_OP2_304_74111_20140214_221515_outLine +BABEL_OP2_304_74280_20131114_221312_inLine +BABEL_OP2_304_74280_20131114_221312_outLine +BABEL_OP2_304_75465_20140214_020356_inLine +BABEL_OP2_304_75465_20140214_020356_outLine +BABEL_OP2_304_76773_20131201_022925_inLine +BABEL_OP2_304_76773_20131201_022925_outLine +BABEL_OP2_304_77904_20131023_031446_inLine +BABEL_OP2_304_77904_20131023_031446_outLine +BABEL_OP2_304_77990_20131201_021431_inLine +BABEL_OP2_304_77990_20131201_021431_outLine +BABEL_OP2_304_78609_20140215_083334_inLine +BABEL_OP2_304_78609_20140215_083334_outLine +BABEL_OP2_304_78630_20131216_203357_inLine +BABEL_OP2_304_78630_20131216_203357_outLine +BABEL_OP2_304_78958_20131106_193325_inLine +BABEL_OP2_304_78958_20131106_193325_outLine +BABEL_OP2_304_78976_20131207_040932_inLine +BABEL_OP2_304_78976_20131207_040932_outLine +BABEL_OP2_304_80241_20131031_000650_inLine +BABEL_OP2_304_80241_20131031_000650_outLine +BABEL_OP2_304_83366_20140114_021841_inLine +BABEL_OP2_304_83366_20140114_021841_outLine +BABEL_OP2_304_83643_20131112_015611_inLine +BABEL_OP2_304_83643_20131112_015611_outLine +BABEL_OP2_304_83775_20140106_012027_inLine +BABEL_OP2_304_83775_20140106_012027_outLine +BABEL_OP2_304_83783_20140123_015127_inLine +BABEL_OP2_304_83783_20140123_015127_outLine +BABEL_OP2_304_84029_20131107_051843_inLine +BABEL_OP2_304_84029_20131107_051843_outLine +BABEL_OP2_304_85260_20131024_194755_inLine +BABEL_OP2_304_85260_20131024_194755_outLine +BABEL_OP2_304_86885_20131024_233222_inLine +BABEL_OP2_304_86885_20131024_233222_outLine +BABEL_OP2_304_89045_20131115_232122_inLine +BABEL_OP2_304_89045_20131115_232122_outLine +BABEL_OP2_304_89226_20131024_203728_inLine +BABEL_OP2_304_89226_20131024_203728_outLine +BABEL_OP2_304_89372_20131115_002102_inLine +BABEL_OP2_304_89372_20131115_002102_outLine +BABEL_OP2_304_90930_20131020_000019_inLine +BABEL_OP2_304_90930_20131020_000019_outLine +BABEL_OP2_304_90935_20131204_230914_inLine +BABEL_OP2_304_90935_20131204_230914_outLine +BABEL_OP2_304_91971_20131023_230515_inLine +BABEL_OP2_304_91971_20131023_230515_outLine +BABEL_OP2_304_92509_20131210_214423_inLine +BABEL_OP2_304_92509_20131210_214423_outLine +BABEL_OP2_304_92698_20140118_013836_inLine +BABEL_OP2_304_92698_20140118_013836_outLine +BABEL_OP2_304_93946_20140213_192924_inLine +BABEL_OP2_304_93946_20140213_192924_outLine +BABEL_OP2_304_94869_20131114_004423_inLine +BABEL_OP2_304_94869_20131114_004423_outLine +BABEL_OP2_304_95077_20140213_032447_inLine +BABEL_OP2_304_95077_20140213_032447_outLine +BABEL_OP2_304_96504_20131215_211136_inLine +BABEL_OP2_304_96504_20131215_211136_outLine +BABEL_OP2_304_96504_20131215_212158_inLine +BABEL_OP2_304_96504_20131215_212158_outLine +BABEL_OP2_304_97448_20131109_203008_inLine +BABEL_OP2_304_97448_20131109_203008_outLine +BABEL_OP2_304_97570_20140114_012633_inLine +BABEL_OP2_304_97570_20140114_012633_outLine +BABEL_OP2_304_97772_20131115_013811_inLine +BABEL_OP2_304_97772_20131115_013811_outLine +BABEL_OP2_304_98255_20131126_040940_inLine +BABEL_OP2_304_98255_20131126_040940_outLine +BABEL_OP2_304_98888_20140116_000206_inLine +BABEL_OP2_304_98888_20140116_000206_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/evalpart1.list b/egs/babel/s5d/conf/lists/304-lithuanian/evalpart1.list new file mode 100644 index 00000000000..bf4691f0f34 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/evalpart1.list @@ -0,0 +1,60 @@ +BABEL_OP2_304_10416_20140107_061620_inLine +BABEL_OP2_304_10416_20140107_061620_outLine +BABEL_OP2_304_14723_20131125_042706_inLine +BABEL_OP2_304_14723_20131125_042706_outLine +BABEL_OP2_304_16351_20131027_201533_inLine +BABEL_OP2_304_16351_20131027_201533_outLine +BABEL_OP2_304_18863_20140222_035802_inLine +BABEL_OP2_304_18863_20140222_035802_outLine +BABEL_OP2_304_22641_20131201_215149_inLine +BABEL_OP2_304_22641_20131201_215149_outLine +BABEL_OP2_304_25068_20131019_030524_inLine +BABEL_OP2_304_25068_20131019_030524_outLine +BABEL_OP2_304_28422_20140112_043550_inLine +BABEL_OP2_304_28422_20140112_043550_outLine +BABEL_OP2_304_28585_20140225_043733_inLine +BABEL_OP2_304_28585_20140225_043733_outLine +BABEL_OP2_304_30250_20140120_020901_inLine +BABEL_OP2_304_30250_20140120_020901_outLine +BABEL_OP2_304_36219_20131216_035438_inLine +BABEL_OP2_304_36219_20131216_035438_outLine +BABEL_OP2_304_39159_20131208_045854_inLine +BABEL_OP2_304_39159_20131208_045854_outLine +BABEL_OP2_304_41109_20140220_021208_inLine +BABEL_OP2_304_41109_20140220_021208_outLine +BABEL_OP2_304_43285_20140124_012117_inLine +BABEL_OP2_304_43285_20140124_012117_outLine +BABEL_OP2_304_44255_20140222_010712_inLine +BABEL_OP2_304_44255_20140222_010712_outLine +BABEL_OP2_304_44681_20131023_205447_inLine +BABEL_OP2_304_44681_20131023_205447_outLine +BABEL_OP2_304_45106_20140117_233013_inLine +BABEL_OP2_304_45106_20140117_233013_outLine +BABEL_OP2_304_45699_20131022_213702_inLine +BABEL_OP2_304_45699_20131022_213702_outLine +BABEL_OP2_304_53206_20131021_231814_inLine +BABEL_OP2_304_53206_20131021_231814_outLine +BABEL_OP2_304_57922_20140212_234031_inLine +BABEL_OP2_304_57922_20140212_234031_outLine +BABEL_OP2_304_60508_20131213_013224_inLine +BABEL_OP2_304_60508_20131213_013224_outLine +BABEL_OP2_304_63481_20131218_054343_inLine +BABEL_OP2_304_63481_20131218_054343_outLine +BABEL_OP2_304_65339_20131108_025612_inLine +BABEL_OP2_304_65339_20131108_025612_outLine +BABEL_OP2_304_66967_20131211_212833_inLine +BABEL_OP2_304_66967_20131211_212833_outLine +BABEL_OP2_304_70726_20131024_044755_inLine +BABEL_OP2_304_70726_20131024_044755_outLine +BABEL_OP2_304_78609_20140215_083334_inLine +BABEL_OP2_304_78609_20140215_083334_outLine +BABEL_OP2_304_83366_20140114_021841_inLine +BABEL_OP2_304_83366_20140114_021841_outLine +BABEL_OP2_304_83775_20140106_012027_inLine +BABEL_OP2_304_83775_20140106_012027_outLine +BABEL_OP2_304_85260_20131024_194755_inLine +BABEL_OP2_304_85260_20131024_194755_outLine +BABEL_OP2_304_97448_20131109_203008_inLine +BABEL_OP2_304_97448_20131109_203008_outLine +BABEL_OP2_304_98888_20140116_000206_inLine +BABEL_OP2_304_98888_20140116_000206_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.list b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.list new file mode 100644 index 00000000000..858a278660f --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.list @@ -0,0 +1,120 @@ +BABEL_OP2_304_10019_20131215_000700_inLine +BABEL_OP2_304_10019_20131215_000700_outLine +BABEL_OP2_304_11768_20131025_195124_inLine +BABEL_OP2_304_11768_20131025_195124_outLine +BABEL_OP2_304_13929_20131020_015822_inLine +BABEL_OP2_304_13929_20131020_015822_outLine +BABEL_OP2_304_15420_20131207_024154_inLine +BABEL_OP2_304_15420_20131207_024154_outLine +BABEL_OP2_304_17937_20131127_033509_inLine +BABEL_OP2_304_17937_20131127_033509_outLine +BABEL_OP2_304_18037_20131024_213803_inLine +BABEL_OP2_304_18037_20131024_213803_outLine +BABEL_OP2_304_18731_20131023_003305_inLine +BABEL_OP2_304_18731_20131023_003305_outLine +BABEL_OP2_304_20916_20131114_013626_inLine +BABEL_OP2_304_20916_20131114_013626_outLine +BABEL_OP2_304_21029_20131212_035937_inLine +BABEL_OP2_304_21029_20131212_035937_outLine +BABEL_OP2_304_22170_20140304_071139_inLine +BABEL_OP2_304_22170_20140304_071139_outLine +BABEL_OP2_304_23098_20131107_033644_inLine +BABEL_OP2_304_23098_20131107_033644_outLine +BABEL_OP2_304_26074_20140112_023253_inLine +BABEL_OP2_304_26074_20140112_023253_outLine +BABEL_OP2_304_34564_20140213_195420_inLine +BABEL_OP2_304_34564_20140213_195420_outLine +BABEL_OP2_304_35420_20131029_043734_inLine +BABEL_OP2_304_35420_20131029_043734_outLine +BABEL_OP2_304_35838_20131024_211303_inLine +BABEL_OP2_304_35838_20131024_211303_outLine +BABEL_OP2_304_36147_20131019_040800_inLine +BABEL_OP2_304_36147_20131019_040800_outLine +BABEL_OP2_304_39688_20131109_222248_inLine +BABEL_OP2_304_39688_20131109_222248_outLine +BABEL_OP2_304_40092_20131031_014914_inLine +BABEL_OP2_304_40092_20131031_014914_outLine +BABEL_OP2_304_41493_20131113_221501_inLine +BABEL_OP2_304_41493_20131113_221501_outLine +BABEL_OP2_304_42126_20131024_215636_inLine +BABEL_OP2_304_42126_20131024_215636_outLine +BABEL_OP2_304_46333_20131204_195151_inLine +BABEL_OP2_304_46333_20131204_195151_outLine +BABEL_OP2_304_47877_20140227_065455_inLine +BABEL_OP2_304_47877_20140227_065455_outLine +BABEL_OP2_304_48789_20140108_012933_inLine +BABEL_OP2_304_48789_20140108_012933_outLine +BABEL_OP2_304_51417_20140228_011906_inLine +BABEL_OP2_304_51417_20140228_011906_outLine +BABEL_OP2_304_52025_20131116_004427_inLine +BABEL_OP2_304_52025_20131116_004427_outLine +BABEL_OP2_304_56429_20131129_223408_inLine +BABEL_OP2_304_56429_20131129_223408_outLine +BABEL_OP2_304_56684_20140223_001031_inLine +BABEL_OP2_304_56684_20140223_001031_outLine +BABEL_OP2_304_56720_20140119_005254_inLine +BABEL_OP2_304_56720_20140119_005254_outLine +BABEL_OP2_304_56743_20131218_042118_inLine +BABEL_OP2_304_56743_20131218_042118_outLine +BABEL_OP2_304_57609_20140121_202504_inLine +BABEL_OP2_304_57609_20140121_202504_outLine +BABEL_OP2_304_58103_20131212_013517_inLine +BABEL_OP2_304_58103_20131212_013517_outLine +BABEL_OP2_304_59291_20140207_213735_inLine +BABEL_OP2_304_59291_20140207_213735_outLine +BABEL_OP2_304_60418_20140111_062723_inLine +BABEL_OP2_304_60418_20140111_062723_outLine +BABEL_OP2_304_61219_20131206_061726_inLine +BABEL_OP2_304_61219_20131206_061726_outLine +BABEL_OP2_304_61357_20140113_232629_inLine +BABEL_OP2_304_61357_20140113_232629_outLine +BABEL_OP2_304_61963_20140226_192451_inLine +BABEL_OP2_304_61963_20140226_192451_outLine +BABEL_OP2_304_62323_20131113_001039_inLine +BABEL_OP2_304_62323_20131113_001039_outLine +BABEL_OP2_304_63445_20131127_005349_inLine +BABEL_OP2_304_63445_20131127_005349_outLine +BABEL_OP2_304_64759_20140118_203442_inLine +BABEL_OP2_304_64759_20140118_203442_outLine +BABEL_OP2_304_64796_20131128_060852_inLine +BABEL_OP2_304_64796_20131128_060852_outLine +BABEL_OP2_304_65077_20131115_005739_inLine +BABEL_OP2_304_65077_20131115_005739_outLine +BABEL_OP2_304_66026_20140212_224055_inLine +BABEL_OP2_304_66026_20140212_224055_outLine +BABEL_OP2_304_68910_20131101_042132_inLine +BABEL_OP2_304_68910_20131101_042132_outLine +BABEL_OP2_304_72903_20131113_023457_inLine +BABEL_OP2_304_72903_20131113_023457_outLine +BABEL_OP2_304_73042_20131214_052022_inLine +BABEL_OP2_304_73042_20131214_052022_outLine +BABEL_OP2_304_74455_20140224_013111_inLine +BABEL_OP2_304_74455_20140224_013111_outLine +BABEL_OP2_304_78360_20140301_020449_inLine +BABEL_OP2_304_78360_20140301_020449_outLine +BABEL_OP2_304_79723_20131023_023756_inLine +BABEL_OP2_304_79723_20131023_023756_outLine +BABEL_OP2_304_79820_20131214_042918_inLine +BABEL_OP2_304_79820_20131214_042918_outLine +BABEL_OP2_304_80721_20140213_051749_inLine +BABEL_OP2_304_80721_20140213_051749_outLine +BABEL_OP2_304_81427_20131211_012524_inLine +BABEL_OP2_304_81427_20131211_012524_outLine +BABEL_OP2_304_83813_20131028_033118_inLine +BABEL_OP2_304_83813_20131028_033118_outLine +BABEL_OP2_304_83851_20131203_212613_inLine +BABEL_OP2_304_83851_20131203_212613_outLine +BABEL_OP2_304_84125_20131115_235931_inLine +BABEL_OP2_304_84125_20131115_235931_outLine +BABEL_OP2_304_85179_20140214_071121_inLine +BABEL_OP2_304_85179_20140214_071121_outLine +BABEL_OP2_304_92252_20131022_042600_inLine +BABEL_OP2_304_92252_20131022_042600_outLine +BABEL_OP2_304_93443_20131127_032037_inLine +BABEL_OP2_304_93443_20131127_032037_outLine +BABEL_OP2_304_96205_20140107_233946_inLine +BABEL_OP2_304_96205_20140107_233946_outLine +BABEL_OP2_304_98565_20131023_235505_inLine +BABEL_OP2_304_98565_20131023_235505_outLine +BABEL_OP2_304_99920_20140211_023914_inLine +BABEL_OP2_304_99920_20140211_023914_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.untranscribed.list new file mode 100644 index 00000000000..5ddd7320c00 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/sub-train.untranscribed.list @@ -0,0 +1,364 @@ +BABEL_OP2_304_10313_20131021_235202_inLine +BABEL_OP2_304_10313_20131021_235202_outLine +BABEL_OP2_304_10469_20131103_031709_inLine +BABEL_OP2_304_10469_20131103_031709_outLine +BABEL_OP2_304_11419_20131022_014303_inLine +BABEL_OP2_304_11419_20131022_014303_outLine +BABEL_OP2_304_11681_20131213_001647_inLine +BABEL_OP2_304_11681_20131213_001647_outLine +BABEL_OP2_304_12220_20131217_183010_inLine +BABEL_OP2_304_12220_20131217_183010_outLine +BABEL_OP2_304_13030_20131214_223348_inLine +BABEL_OP2_304_13030_20131214_223348_outLine +BABEL_OP2_304_13483_20140121_014427_inLine +BABEL_OP2_304_13483_20140121_014427_outLine +BABEL_OP2_304_13547_20131025_230206_inLine +BABEL_OP2_304_13547_20131025_230206_outLine +BABEL_OP2_304_14229_20131203_213430_inLine +BABEL_OP2_304_14229_20131203_213430_outLine +BABEL_OP2_304_14440_20140116_035720_inLine +BABEL_OP2_304_14440_20140116_035720_outLine +BABEL_OP2_304_14875_20131215_025538_inLine +BABEL_OP2_304_14875_20131215_025538_outLine +BABEL_OP2_304_15535_20140120_031512_inLine +BABEL_OP2_304_15535_20140120_031512_outLine +BABEL_OP2_304_15869_20131024_035059_inLine +BABEL_OP2_304_15869_20131024_035059_outLine +BABEL_OP2_304_16249_20131019_215021_inLine +BABEL_OP2_304_16249_20131019_215021_outLine +BABEL_OP2_304_16938_20140117_232323_inLine +BABEL_OP2_304_16938_20140117_232323_outLine +BABEL_OP2_304_17032_20140121_010326_inLine +BABEL_OP2_304_17032_20140121_010326_outLine +BABEL_OP2_304_17923_20140112_012407_inLine +BABEL_OP2_304_17923_20140112_012407_outLine +BABEL_OP2_304_18033_20131019_011702_inLine +BABEL_OP2_304_18033_20131019_011702_outLine +BABEL_OP2_304_19440_20131022_001353_inLine +BABEL_OP2_304_19440_20131022_001353_outLine +BABEL_OP2_304_19782_20140214_025658_inLine +BABEL_OP2_304_19782_20140214_025658_outLine +BABEL_OP2_304_20330_20140222_024609_inLine +BABEL_OP2_304_20330_20140222_024609_outLine +BABEL_OP2_304_20454_20131022_030532_inLine +BABEL_OP2_304_20454_20131022_030532_outLine +BABEL_OP2_304_20800_20140109_021508_inLine +BABEL_OP2_304_20800_20140109_021508_outLine +BABEL_OP2_304_21109_20140217_041609_inLine +BABEL_OP2_304_21109_20140217_041609_outLine +BABEL_OP2_304_22629_20131106_052813_inLine +BABEL_OP2_304_22629_20131106_052813_outLine +BABEL_OP2_304_23995_20140221_014044_inLine +BABEL_OP2_304_23995_20140221_014044_outLine +BABEL_OP2_304_24532_20131115_041442_inLine +BABEL_OP2_304_24532_20131115_041442_outLine +BABEL_OP2_304_24589_20131211_034826_inLine +BABEL_OP2_304_24589_20131211_034826_outLine +BABEL_OP2_304_24648_20131023_232628_inLine +BABEL_OP2_304_24648_20131023_232628_outLine +BABEL_OP2_304_25895_20131106_022638_inLine +BABEL_OP2_304_25895_20131106_022638_outLine +BABEL_OP2_304_26602_20140220_035529_inLine +BABEL_OP2_304_26602_20140220_035529_outLine +BABEL_OP2_304_27042_20140214_003359_inLine +BABEL_OP2_304_27042_20140214_003359_outLine +BABEL_OP2_304_27125_20131115_034249_inLine +BABEL_OP2_304_27125_20131115_034249_outLine +BABEL_OP2_304_27189_20131107_042859_inLine +BABEL_OP2_304_27189_20131107_042859_outLine +BABEL_OP2_304_27218_20131214_022520_inLine +BABEL_OP2_304_27218_20131214_022520_outLine +BABEL_OP2_304_28945_20131208_071533_inLine +BABEL_OP2_304_28945_20131208_071533_outLine +BABEL_OP2_304_29135_20131211_225441_inLine +BABEL_OP2_304_29135_20131211_225441_outLine +BABEL_OP2_304_29168_20131210_211235_inLine +BABEL_OP2_304_29168_20131210_211235_outLine +BABEL_OP2_304_29208_20140108_013943_inLine +BABEL_OP2_304_29208_20140108_013943_outLine +BABEL_OP2_304_29663_20131101_024202_inLine +BABEL_OP2_304_29663_20131101_024202_outLine +BABEL_OP2_304_29746_20131020_192452_inLine +BABEL_OP2_304_29746_20131020_192452_outLine +BABEL_OP2_304_30253_20140210_055904_inLine +BABEL_OP2_304_30253_20140210_055904_outLine +BABEL_OP2_304_30426_20131108_232120_inLine +BABEL_OP2_304_30426_20131108_232120_outLine +BABEL_OP2_304_31624_20131216_054258_inLine +BABEL_OP2_304_31624_20131216_054258_outLine +BABEL_OP2_304_32169_20131101_054038_inLine +BABEL_OP2_304_32169_20131101_054038_outLine +BABEL_OP2_304_32832_20140223_005017_inLine +BABEL_OP2_304_32832_20140223_005017_outLine +BABEL_OP2_304_32861_20140303_235600_inLine +BABEL_OP2_304_32861_20140303_235600_outLine +BABEL_OP2_304_33111_20140304_043553_inLine +BABEL_OP2_304_33111_20140304_043553_outLine +BABEL_OP2_304_33149_20131127_000224_inLine +BABEL_OP2_304_33149_20131127_000224_outLine +BABEL_OP2_304_34064_20131020_210038_inLine +BABEL_OP2_304_34064_20131020_210038_outLine +BABEL_OP2_304_34064_20131021_223728_inLine +BABEL_OP2_304_34064_20131021_223728_outLine +BABEL_OP2_304_34328_20140106_031822_inLine +BABEL_OP2_304_34328_20140106_031822_outLine +BABEL_OP2_304_34336_20131210_042513_inLine +BABEL_OP2_304_34336_20131210_042513_outLine +BABEL_OP2_304_34647_20131109_231717_inLine +BABEL_OP2_304_34647_20131109_231717_outLine +BABEL_OP2_304_36300_20131030_210103_inLine +BABEL_OP2_304_36300_20131030_210103_outLine +BABEL_OP2_304_36341_20131208_040050_inLine +BABEL_OP2_304_36341_20131208_040050_outLine +BABEL_OP2_304_36990_20140106_050927_inLine +BABEL_OP2_304_36990_20140106_050927_outLine +BABEL_OP2_304_37684_20131019_020843_inLine +BABEL_OP2_304_37684_20131019_020843_outLine +BABEL_OP2_304_38963_20131109_190146_inLine +BABEL_OP2_304_38963_20131109_190146_outLine +BABEL_OP2_304_39680_20140226_002516_inLine +BABEL_OP2_304_39680_20140226_002516_outLine +BABEL_OP2_304_40624_20131107_024514_inLine +BABEL_OP2_304_40624_20131107_024514_outLine +BABEL_OP2_304_40713_20131210_063734_inLine +BABEL_OP2_304_40713_20131210_063734_outLine +BABEL_OP2_304_41233_20140222_034336_inLine +BABEL_OP2_304_41233_20140222_034336_outLine +BABEL_OP2_304_41442_20140214_035912_inLine +BABEL_OP2_304_41442_20140214_035912_outLine +BABEL_OP2_304_41741_20131215_020846_inLine +BABEL_OP2_304_41741_20131215_020846_outLine +BABEL_OP2_304_42243_20131218_052141_inLine +BABEL_OP2_304_42243_20131218_052141_outLine +BABEL_OP2_304_42497_20131130_034031_inLine +BABEL_OP2_304_42497_20131130_034031_outLine +BABEL_OP2_304_44868_20140110_204822_inLine +BABEL_OP2_304_44868_20140110_204822_outLine +BABEL_OP2_304_45374_20131019_200425_inLine +BABEL_OP2_304_45374_20131019_200425_outLine +BABEL_OP2_304_45642_20140114_234140_inLine +BABEL_OP2_304_45642_20140114_234140_outLine +BABEL_OP2_304_45843_20140114_205141_inLine +BABEL_OP2_304_45843_20140114_205141_outLine +BABEL_OP2_304_46315_20140122_004043_inLine +BABEL_OP2_304_46315_20140122_004043_outLine +BABEL_OP2_304_46389_20131022_050904_inLine +BABEL_OP2_304_46389_20131022_050904_outLine +BABEL_OP2_304_47110_20131023_015940_inLine +BABEL_OP2_304_47110_20131023_015940_outLine +BABEL_OP2_304_47270_20140222_021820_inLine +BABEL_OP2_304_47270_20140222_021820_outLine +BABEL_OP2_304_47451_20140203_224639_inLine +BABEL_OP2_304_47451_20140203_224639_outLine +BABEL_OP2_304_48024_20131031_215347_inLine +BABEL_OP2_304_48024_20131031_215347_outLine +BABEL_OP2_304_49001_20131214_003327_inLine +BABEL_OP2_304_49001_20131214_003327_outLine +BABEL_OP2_304_49287_20140118_013355_inLine +BABEL_OP2_304_49287_20140118_013355_outLine +BABEL_OP2_304_50175_20131124_033223_inLine +BABEL_OP2_304_50175_20131124_033223_outLine +BABEL_OP2_304_50175_20131124_035833_inLine +BABEL_OP2_304_50175_20131124_035833_outLine +BABEL_OP2_304_50565_20140124_052942_inLine +BABEL_OP2_304_50565_20140124_052942_outLine +BABEL_OP2_304_50726_20131213_031251_inLine +BABEL_OP2_304_50726_20131213_031251_outLine +BABEL_OP2_304_51540_20140304_011452_inLine +BABEL_OP2_304_51540_20140304_011452_outLine +BABEL_OP2_304_52058_20131022_055536_inLine +BABEL_OP2_304_52058_20131022_055536_outLine +BABEL_OP2_304_52438_20131206_043319_inLine +BABEL_OP2_304_52438_20131206_043319_outLine +BABEL_OP2_304_52818_20140112_011936_inLine +BABEL_OP2_304_52818_20140112_011936_outLine +BABEL_OP2_304_53419_20140213_061844_inLine +BABEL_OP2_304_53419_20140213_061844_outLine +BABEL_OP2_304_53842_20140109_012849_inLine +BABEL_OP2_304_53842_20140109_012849_outLine +BABEL_OP2_304_54744_20131205_024818_inLine +BABEL_OP2_304_54744_20131205_024818_outLine +BABEL_OP2_304_55042_20131112_051412_inLine +BABEL_OP2_304_55042_20131112_051412_outLine +BABEL_OP2_304_55381_20140217_005926_inLine +BABEL_OP2_304_55381_20140217_005926_outLine +BABEL_OP2_304_55818_20131218_020051_inLine +BABEL_OP2_304_55818_20131218_020051_outLine +BABEL_OP2_304_56057_20131112_043401_inLine +BABEL_OP2_304_56057_20131112_043401_outLine +BABEL_OP2_304_56117_20131023_035134_inLine +BABEL_OP2_304_56117_20131023_035134_outLine +BABEL_OP2_304_56674_20131024_233415_inLine +BABEL_OP2_304_56674_20131024_233415_outLine +BABEL_OP2_304_57035_20131106_183242_inLine +BABEL_OP2_304_57035_20131106_183242_outLine +BABEL_OP2_304_57566_20140227_000622_inLine +BABEL_OP2_304_57566_20140227_000622_outLine +BABEL_OP2_304_57935_20140211_015542_inLine +BABEL_OP2_304_57935_20140211_015542_outLine +BABEL_OP2_304_58585_20140226_022746_inLine +BABEL_OP2_304_58585_20140226_022746_outLine +BABEL_OP2_304_58717_20140112_000351_inLine +BABEL_OP2_304_58717_20140112_000351_outLine +BABEL_OP2_304_59163_20131022_033947_inLine +BABEL_OP2_304_59163_20131022_033947_outLine +BABEL_OP2_304_59645_20140110_210530_inLine +BABEL_OP2_304_59645_20140110_210530_outLine +BABEL_OP2_304_60282_20131031_040356_inLine +BABEL_OP2_304_60282_20131031_040356_outLine +BABEL_OP2_304_60397_20131024_183527_inLine +BABEL_OP2_304_60397_20131024_183527_outLine +BABEL_OP2_304_60538_20131211_043030_inLine +BABEL_OP2_304_60538_20131211_043030_outLine +BABEL_OP2_304_60830_20140106_224130_inLine +BABEL_OP2_304_60830_20140106_224130_outLine +BABEL_OP2_304_61011_20131206_213833_inLine +BABEL_OP2_304_61011_20131206_213833_outLine +BABEL_OP2_304_61225_20131113_052324_inLine +BABEL_OP2_304_61225_20131113_052324_outLine +BABEL_OP2_304_61831_20131201_042817_inLine +BABEL_OP2_304_61831_20131201_042817_outLine +BABEL_OP2_304_61888_20140228_181648_inLine +BABEL_OP2_304_61888_20140228_181648_outLine +BABEL_OP2_304_62177_20140227_184207_inLine +BABEL_OP2_304_62177_20140227_184207_outLine +BABEL_OP2_304_63081_20131206_195135_inLine +BABEL_OP2_304_63081_20131206_195135_outLine +BABEL_OP2_304_63671_20131024_002535_inLine +BABEL_OP2_304_63671_20131024_002535_outLine +BABEL_OP2_304_63920_20131108_182401_inLine +BABEL_OP2_304_63920_20131108_182401_outLine +BABEL_OP2_304_64014_20140228_033939_inLine +BABEL_OP2_304_64014_20140228_033939_outLine +BABEL_OP2_304_64469_20131023_182630_inLine +BABEL_OP2_304_64469_20131023_182630_outLine +BABEL_OP2_304_64688_20131109_040635_inLine +BABEL_OP2_304_64688_20131109_040635_outLine +BABEL_OP2_304_65336_20131109_051329_inLine +BABEL_OP2_304_65336_20131109_051329_outLine +BABEL_OP2_304_65723_20131201_233928_inLine +BABEL_OP2_304_65723_20131201_233928_outLine +BABEL_OP2_304_65882_20131128_220533_inLine +BABEL_OP2_304_65882_20131128_220533_outLine +BABEL_OP2_304_66001_20131208_023839_inLine +BABEL_OP2_304_66001_20131208_023839_outLine +BABEL_OP2_304_66350_20131022_021812_inLine +BABEL_OP2_304_66350_20131022_021812_outLine +BABEL_OP2_304_66837_20140213_053859_inLine +BABEL_OP2_304_66837_20140213_053859_outLine +BABEL_OP2_304_67304_20140216_025015_inLine +BABEL_OP2_304_67304_20140216_025015_outLine +BABEL_OP2_304_67552_20140114_011538_inLine +BABEL_OP2_304_67552_20140114_011538_outLine +BABEL_OP2_304_67894_20131112_060500_inLine +BABEL_OP2_304_67894_20131112_060500_outLine +BABEL_OP2_304_68059_20140111_025607_inLine +BABEL_OP2_304_68059_20140111_025607_outLine +BABEL_OP2_304_68908_20131127_032840_inLine +BABEL_OP2_304_68908_20131127_032840_outLine +BABEL_OP2_304_69107_20140123_192506_inLine +BABEL_OP2_304_69107_20140123_192506_outLine +BABEL_OP2_304_69153_20140212_204658_inLine +BABEL_OP2_304_69153_20140212_204658_outLine +BABEL_OP2_304_69992_20131213_195450_inLine +BABEL_OP2_304_69992_20131213_195450_outLine +BABEL_OP2_304_70216_20131020_173420_inLine +BABEL_OP2_304_70216_20131020_173420_outLine +BABEL_OP2_304_71263_20140113_223556_inLine +BABEL_OP2_304_71263_20140113_223556_outLine +BABEL_OP2_304_71401_20131020_005620_inLine +BABEL_OP2_304_71401_20131020_005620_outLine +BABEL_OP2_304_72844_20131115_202958_inLine +BABEL_OP2_304_72844_20131115_202958_outLine +BABEL_OP2_304_74226_20140217_044122_inLine +BABEL_OP2_304_74226_20140217_044122_outLine +BABEL_OP2_304_75064_20131209_035217_inLine +BABEL_OP2_304_75064_20131209_035217_outLine +BABEL_OP2_304_75223_20131205_012248_inLine +BABEL_OP2_304_75223_20131205_012248_outLine +BABEL_OP2_304_75930_20131020_013042_inLine +BABEL_OP2_304_75930_20131020_013042_outLine +BABEL_OP2_304_75975_20131019_054431_inLine +BABEL_OP2_304_75975_20131019_054431_outLine +BABEL_OP2_304_76069_20131113_042346_inLine +BABEL_OP2_304_76069_20131113_042346_outLine +BABEL_OP2_304_76730_20131025_213853_inLine +BABEL_OP2_304_76730_20131025_213853_outLine +BABEL_OP2_304_76793_20131126_013011_inLine +BABEL_OP2_304_76793_20131126_013011_outLine +BABEL_OP2_304_77033_20140228_043125_inLine +BABEL_OP2_304_77033_20140228_043125_outLine +BABEL_OP2_304_77730_20131217_042107_inLine +BABEL_OP2_304_77730_20131217_042107_outLine +BABEL_OP2_304_78943_20131208_222716_inLine +BABEL_OP2_304_78943_20131208_222716_outLine +BABEL_OP2_304_79028_20131022_221243_inLine +BABEL_OP2_304_79028_20131022_221243_outLine +BABEL_OP2_304_79045_20140214_202301_inLine +BABEL_OP2_304_79045_20140214_202301_outLine +BABEL_OP2_304_80209_20131112_232041_inLine +BABEL_OP2_304_80209_20131112_232041_outLine +BABEL_OP2_304_80383_20131107_233543_inLine +BABEL_OP2_304_80383_20131107_233543_outLine +BABEL_OP2_304_80577_20140301_014201_inLine +BABEL_OP2_304_80577_20140301_014201_outLine +BABEL_OP2_304_80881_20131205_175435_inLine +BABEL_OP2_304_80881_20131205_175435_outLine +BABEL_OP2_304_81404_20131213_041501_inLine +BABEL_OP2_304_81404_20131213_041501_outLine +BABEL_OP2_304_81769_20140105_005749_inLine +BABEL_OP2_304_81769_20140105_005749_outLine +BABEL_OP2_304_82863_20140106_054346_inLine +BABEL_OP2_304_82863_20140106_054346_outLine +BABEL_OP2_304_83436_20131211_025218_inLine +BABEL_OP2_304_83436_20131211_025218_outLine +BABEL_OP2_304_83935_20140123_034100_inLine +BABEL_OP2_304_83935_20140123_034100_outLine +BABEL_OP2_304_84194_20131129_040805_inLine +BABEL_OP2_304_84194_20131129_040805_outLine +BABEL_OP2_304_84605_20131215_005949_inLine +BABEL_OP2_304_84605_20131215_005949_outLine +BABEL_OP2_304_85248_20140222_235016_inLine +BABEL_OP2_304_85248_20140222_235016_outLine +BABEL_OP2_304_86100_20131112_221929_inLine +BABEL_OP2_304_86100_20131112_221929_outLine +BABEL_OP2_304_86472_20140116_050058_inLine +BABEL_OP2_304_86472_20140116_050058_outLine +BABEL_OP2_304_86557_20131130_234925_inLine +BABEL_OP2_304_86557_20131130_234925_outLine +BABEL_OP2_304_86829_20131107_180321_inLine +BABEL_OP2_304_86829_20131107_180321_outLine +BABEL_OP2_304_86830_20140228_051058_inLine +BABEL_OP2_304_86830_20140228_051058_outLine +BABEL_OP2_304_87280_20140207_030432_inLine +BABEL_OP2_304_87280_20140207_030432_outLine +BABEL_OP2_304_87866_20131106_002751_inLine +BABEL_OP2_304_87866_20131106_002751_outLine +BABEL_OP2_304_88982_20140111_233039_inLine +BABEL_OP2_304_88982_20140111_233039_outLine +BABEL_OP2_304_89650_20131024_023031_inLine +BABEL_OP2_304_89650_20131024_023031_outLine +BABEL_OP2_304_91581_20140203_231410_inLine +BABEL_OP2_304_91581_20140203_231410_outLine +BABEL_OP2_304_92440_20131109_003559_inLine +BABEL_OP2_304_92440_20131109_003559_outLine +BABEL_OP2_304_92886_20131202_233808_inLine +BABEL_OP2_304_92886_20131202_233808_outLine +BABEL_OP2_304_93224_20140121_191942_inLine +BABEL_OP2_304_93224_20140121_191942_outLine +BABEL_OP2_304_93475_20131213_025105_inLine +BABEL_OP2_304_93475_20131213_025105_outLine +BABEL_OP2_304_93681_20131202_212236_inLine +BABEL_OP2_304_93681_20131202_212236_outLine +BABEL_OP2_304_94465_20140213_013300_inLine +BABEL_OP2_304_94465_20140213_013300_outLine +BABEL_OP2_304_94923_20140212_021923_inLine +BABEL_OP2_304_94923_20140212_021923_outLine +BABEL_OP2_304_95677_20131024_031406_inLine +BABEL_OP2_304_95677_20131024_031406_outLine +BABEL_OP2_304_96405_20131214_205112_inLine +BABEL_OP2_304_96405_20131214_205112_outLine +BABEL_OP2_304_98165_20131218_234422_inLine +BABEL_OP2_304_98165_20131218_234422_outLine +BABEL_OP2_304_99264_20140222_211846_inLine +BABEL_OP2_304_99264_20140222_211846_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/training.list b/egs/babel/s5d/conf/lists/304-lithuanian/training.list new file mode 100644 index 00000000000..72d421bf1a9 --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/training.list @@ -0,0 +1,484 @@ +BABEL_OP2_304_10019_20131215_000700_inLine +BABEL_OP2_304_10019_20131215_000700_outLine +BABEL_OP2_304_10313_20131021_235202_inLine +BABEL_OP2_304_10313_20131021_235202_outLine +BABEL_OP2_304_10469_20131103_031709_inLine +BABEL_OP2_304_10469_20131103_031709_outLine +BABEL_OP2_304_11419_20131022_014303_inLine +BABEL_OP2_304_11419_20131022_014303_outLine +BABEL_OP2_304_11681_20131213_001647_inLine +BABEL_OP2_304_11681_20131213_001647_outLine +BABEL_OP2_304_11768_20131025_195124_inLine +BABEL_OP2_304_11768_20131025_195124_outLine +BABEL_OP2_304_12220_20131217_183010_inLine +BABEL_OP2_304_12220_20131217_183010_outLine +BABEL_OP2_304_13030_20131214_223348_inLine +BABEL_OP2_304_13030_20131214_223348_outLine +BABEL_OP2_304_13483_20140121_014427_inLine +BABEL_OP2_304_13483_20140121_014427_outLine +BABEL_OP2_304_13547_20131025_230206_inLine +BABEL_OP2_304_13547_20131025_230206_outLine +BABEL_OP2_304_13929_20131020_015822_inLine +BABEL_OP2_304_13929_20131020_015822_outLine +BABEL_OP2_304_14229_20131203_213430_inLine +BABEL_OP2_304_14229_20131203_213430_outLine +BABEL_OP2_304_14440_20140116_035720_inLine +BABEL_OP2_304_14440_20140116_035720_outLine +BABEL_OP2_304_14875_20131215_025538_inLine +BABEL_OP2_304_14875_20131215_025538_outLine +BABEL_OP2_304_15420_20131207_024154_inLine +BABEL_OP2_304_15420_20131207_024154_outLine +BABEL_OP2_304_15535_20140120_031512_inLine +BABEL_OP2_304_15535_20140120_031512_outLine +BABEL_OP2_304_15869_20131024_035059_inLine +BABEL_OP2_304_15869_20131024_035059_outLine +BABEL_OP2_304_16249_20131019_215021_inLine +BABEL_OP2_304_16249_20131019_215021_outLine +BABEL_OP2_304_16938_20140117_232323_inLine +BABEL_OP2_304_16938_20140117_232323_outLine +BABEL_OP2_304_17032_20140121_010326_inLine +BABEL_OP2_304_17032_20140121_010326_outLine +BABEL_OP2_304_17923_20140112_012407_inLine +BABEL_OP2_304_17923_20140112_012407_outLine +BABEL_OP2_304_17937_20131127_033509_inLine +BABEL_OP2_304_17937_20131127_033509_outLine +BABEL_OP2_304_18033_20131019_011702_inLine +BABEL_OP2_304_18033_20131019_011702_outLine +BABEL_OP2_304_18037_20131024_213803_inLine +BABEL_OP2_304_18037_20131024_213803_outLine +BABEL_OP2_304_18731_20131023_003305_inLine +BABEL_OP2_304_18731_20131023_003305_outLine +BABEL_OP2_304_19440_20131022_001353_inLine +BABEL_OP2_304_19440_20131022_001353_outLine +BABEL_OP2_304_19782_20140214_025658_inLine +BABEL_OP2_304_19782_20140214_025658_outLine +BABEL_OP2_304_20330_20140222_024609_inLine +BABEL_OP2_304_20330_20140222_024609_outLine +BABEL_OP2_304_20454_20131022_030532_inLine +BABEL_OP2_304_20454_20131022_030532_outLine +BABEL_OP2_304_20800_20140109_021508_inLine +BABEL_OP2_304_20800_20140109_021508_outLine +BABEL_OP2_304_20916_20131114_013626_inLine +BABEL_OP2_304_20916_20131114_013626_outLine +BABEL_OP2_304_21029_20131212_035937_inLine +BABEL_OP2_304_21029_20131212_035937_outLine +BABEL_OP2_304_21109_20140217_041609_inLine +BABEL_OP2_304_21109_20140217_041609_outLine +BABEL_OP2_304_22170_20140304_071139_inLine +BABEL_OP2_304_22170_20140304_071139_outLine +BABEL_OP2_304_22629_20131106_052813_inLine +BABEL_OP2_304_22629_20131106_052813_outLine +BABEL_OP2_304_23098_20131107_033644_inLine +BABEL_OP2_304_23098_20131107_033644_outLine +BABEL_OP2_304_23995_20140221_014044_inLine +BABEL_OP2_304_23995_20140221_014044_outLine +BABEL_OP2_304_24532_20131115_041442_inLine +BABEL_OP2_304_24532_20131115_041442_outLine +BABEL_OP2_304_24589_20131211_034826_inLine +BABEL_OP2_304_24589_20131211_034826_outLine +BABEL_OP2_304_24648_20131023_232628_inLine +BABEL_OP2_304_24648_20131023_232628_outLine +BABEL_OP2_304_25895_20131106_022638_inLine +BABEL_OP2_304_25895_20131106_022638_outLine +BABEL_OP2_304_26074_20140112_023253_inLine +BABEL_OP2_304_26074_20140112_023253_outLine +BABEL_OP2_304_26602_20140220_035529_inLine +BABEL_OP2_304_26602_20140220_035529_outLine +BABEL_OP2_304_27042_20140214_003359_inLine +BABEL_OP2_304_27042_20140214_003359_outLine +BABEL_OP2_304_27125_20131115_034249_inLine +BABEL_OP2_304_27125_20131115_034249_outLine +BABEL_OP2_304_27189_20131107_042859_inLine +BABEL_OP2_304_27189_20131107_042859_outLine +BABEL_OP2_304_27218_20131214_022520_inLine +BABEL_OP2_304_27218_20131214_022520_outLine +BABEL_OP2_304_28945_20131208_071533_inLine +BABEL_OP2_304_28945_20131208_071533_outLine +BABEL_OP2_304_29135_20131211_225441_inLine +BABEL_OP2_304_29135_20131211_225441_outLine +BABEL_OP2_304_29168_20131210_211235_inLine +BABEL_OP2_304_29168_20131210_211235_outLine +BABEL_OP2_304_29208_20140108_013943_inLine +BABEL_OP2_304_29208_20140108_013943_outLine +BABEL_OP2_304_29663_20131101_024202_inLine +BABEL_OP2_304_29663_20131101_024202_outLine +BABEL_OP2_304_29746_20131020_192452_inLine +BABEL_OP2_304_29746_20131020_192452_outLine +BABEL_OP2_304_30253_20140210_055904_inLine +BABEL_OP2_304_30253_20140210_055904_outLine +BABEL_OP2_304_30426_20131108_232120_inLine +BABEL_OP2_304_30426_20131108_232120_outLine +BABEL_OP2_304_31624_20131216_054258_inLine +BABEL_OP2_304_31624_20131216_054258_outLine +BABEL_OP2_304_32169_20131101_054038_inLine +BABEL_OP2_304_32169_20131101_054038_outLine +BABEL_OP2_304_32832_20140223_005017_inLine +BABEL_OP2_304_32832_20140223_005017_outLine +BABEL_OP2_304_32861_20140303_235600_inLine +BABEL_OP2_304_32861_20140303_235600_outLine +BABEL_OP2_304_33111_20140304_043553_inLine +BABEL_OP2_304_33111_20140304_043553_outLine +BABEL_OP2_304_33149_20131127_000224_inLine +BABEL_OP2_304_33149_20131127_000224_outLine +BABEL_OP2_304_34064_20131020_210038_inLine +BABEL_OP2_304_34064_20131020_210038_outLine +BABEL_OP2_304_34064_20131021_223728_inLine +BABEL_OP2_304_34064_20131021_223728_outLine +BABEL_OP2_304_34328_20140106_031822_inLine +BABEL_OP2_304_34328_20140106_031822_outLine +BABEL_OP2_304_34336_20131210_042513_inLine +BABEL_OP2_304_34336_20131210_042513_outLine +BABEL_OP2_304_34564_20140213_195420_inLine +BABEL_OP2_304_34564_20140213_195420_outLine +BABEL_OP2_304_34647_20131109_231717_inLine +BABEL_OP2_304_34647_20131109_231717_outLine +BABEL_OP2_304_35420_20131029_043734_inLine +BABEL_OP2_304_35420_20131029_043734_outLine +BABEL_OP2_304_35838_20131024_211303_inLine +BABEL_OP2_304_35838_20131024_211303_outLine +BABEL_OP2_304_36147_20131019_040800_inLine +BABEL_OP2_304_36147_20131019_040800_outLine +BABEL_OP2_304_36300_20131030_210103_inLine +BABEL_OP2_304_36300_20131030_210103_outLine +BABEL_OP2_304_36341_20131208_040050_inLine +BABEL_OP2_304_36341_20131208_040050_outLine +BABEL_OP2_304_36990_20140106_050927_inLine +BABEL_OP2_304_36990_20140106_050927_outLine +BABEL_OP2_304_37684_20131019_020843_inLine +BABEL_OP2_304_37684_20131019_020843_outLine +BABEL_OP2_304_38963_20131109_190146_inLine +BABEL_OP2_304_38963_20131109_190146_outLine +BABEL_OP2_304_39680_20140226_002516_inLine +BABEL_OP2_304_39680_20140226_002516_outLine +BABEL_OP2_304_39688_20131109_222248_inLine +BABEL_OP2_304_39688_20131109_222248_outLine +BABEL_OP2_304_40092_20131031_014914_inLine +BABEL_OP2_304_40092_20131031_014914_outLine +BABEL_OP2_304_40624_20131107_024514_inLine +BABEL_OP2_304_40624_20131107_024514_outLine +BABEL_OP2_304_40713_20131210_063734_inLine +BABEL_OP2_304_40713_20131210_063734_outLine +BABEL_OP2_304_41233_20140222_034336_inLine +BABEL_OP2_304_41233_20140222_034336_outLine +BABEL_OP2_304_41442_20140214_035912_inLine +BABEL_OP2_304_41442_20140214_035912_outLine +BABEL_OP2_304_41493_20131113_221501_inLine +BABEL_OP2_304_41493_20131113_221501_outLine +BABEL_OP2_304_41741_20131215_020846_inLine +BABEL_OP2_304_41741_20131215_020846_outLine +BABEL_OP2_304_42126_20131024_215636_inLine +BABEL_OP2_304_42126_20131024_215636_outLine +BABEL_OP2_304_42243_20131218_052141_inLine +BABEL_OP2_304_42243_20131218_052141_outLine +BABEL_OP2_304_42497_20131130_034031_inLine +BABEL_OP2_304_42497_20131130_034031_outLine +BABEL_OP2_304_44868_20140110_204822_inLine +BABEL_OP2_304_44868_20140110_204822_outLine +BABEL_OP2_304_45374_20131019_200425_inLine +BABEL_OP2_304_45374_20131019_200425_outLine +BABEL_OP2_304_45642_20140114_234140_inLine +BABEL_OP2_304_45642_20140114_234140_outLine +BABEL_OP2_304_45843_20140114_205141_inLine +BABEL_OP2_304_45843_20140114_205141_outLine +BABEL_OP2_304_46315_20140122_004043_inLine +BABEL_OP2_304_46315_20140122_004043_outLine +BABEL_OP2_304_46333_20131204_195151_inLine +BABEL_OP2_304_46333_20131204_195151_outLine +BABEL_OP2_304_46389_20131022_050904_inLine +BABEL_OP2_304_46389_20131022_050904_outLine +BABEL_OP2_304_47110_20131023_015940_inLine +BABEL_OP2_304_47110_20131023_015940_outLine +BABEL_OP2_304_47270_20140222_021820_inLine +BABEL_OP2_304_47270_20140222_021820_outLine +BABEL_OP2_304_47451_20140203_224639_inLine +BABEL_OP2_304_47451_20140203_224639_outLine +BABEL_OP2_304_47877_20140227_065455_inLine +BABEL_OP2_304_47877_20140227_065455_outLine +BABEL_OP2_304_48024_20131031_215347_inLine +BABEL_OP2_304_48024_20131031_215347_outLine +BABEL_OP2_304_48789_20140108_012933_inLine +BABEL_OP2_304_48789_20140108_012933_outLine +BABEL_OP2_304_49001_20131214_003327_inLine +BABEL_OP2_304_49001_20131214_003327_outLine +BABEL_OP2_304_49287_20140118_013355_inLine +BABEL_OP2_304_49287_20140118_013355_outLine +BABEL_OP2_304_50175_20131124_033223_inLine +BABEL_OP2_304_50175_20131124_033223_outLine +BABEL_OP2_304_50175_20131124_035833_inLine +BABEL_OP2_304_50175_20131124_035833_outLine +BABEL_OP2_304_50565_20140124_052942_inLine +BABEL_OP2_304_50565_20140124_052942_outLine +BABEL_OP2_304_50726_20131213_031251_inLine +BABEL_OP2_304_50726_20131213_031251_outLine +BABEL_OP2_304_51417_20140228_011906_inLine +BABEL_OP2_304_51417_20140228_011906_outLine +BABEL_OP2_304_51540_20140304_011452_inLine +BABEL_OP2_304_51540_20140304_011452_outLine +BABEL_OP2_304_52025_20131116_004427_inLine +BABEL_OP2_304_52025_20131116_004427_outLine +BABEL_OP2_304_52058_20131022_055536_inLine +BABEL_OP2_304_52058_20131022_055536_outLine +BABEL_OP2_304_52438_20131206_043319_inLine +BABEL_OP2_304_52438_20131206_043319_outLine +BABEL_OP2_304_52818_20140112_011936_inLine +BABEL_OP2_304_52818_20140112_011936_outLine +BABEL_OP2_304_53419_20140213_061844_inLine +BABEL_OP2_304_53419_20140213_061844_outLine +BABEL_OP2_304_53842_20140109_012849_inLine +BABEL_OP2_304_53842_20140109_012849_outLine +BABEL_OP2_304_54744_20131205_024818_inLine +BABEL_OP2_304_54744_20131205_024818_outLine +BABEL_OP2_304_55042_20131112_051412_inLine +BABEL_OP2_304_55042_20131112_051412_outLine +BABEL_OP2_304_55381_20140217_005926_inLine +BABEL_OP2_304_55381_20140217_005926_outLine +BABEL_OP2_304_55818_20131218_020051_inLine +BABEL_OP2_304_55818_20131218_020051_outLine +BABEL_OP2_304_56057_20131112_043401_inLine +BABEL_OP2_304_56057_20131112_043401_outLine +BABEL_OP2_304_56117_20131023_035134_inLine +BABEL_OP2_304_56117_20131023_035134_outLine +BABEL_OP2_304_56429_20131129_223408_inLine +BABEL_OP2_304_56429_20131129_223408_outLine +BABEL_OP2_304_56674_20131024_233415_inLine +BABEL_OP2_304_56674_20131024_233415_outLine +BABEL_OP2_304_56684_20140223_001031_inLine +BABEL_OP2_304_56684_20140223_001031_outLine +BABEL_OP2_304_56720_20140119_005254_inLine +BABEL_OP2_304_56720_20140119_005254_outLine +BABEL_OP2_304_56743_20131218_042118_inLine +BABEL_OP2_304_56743_20131218_042118_outLine +BABEL_OP2_304_57035_20131106_183242_inLine +BABEL_OP2_304_57035_20131106_183242_outLine +BABEL_OP2_304_57566_20140227_000622_inLine +BABEL_OP2_304_57566_20140227_000622_outLine +BABEL_OP2_304_57609_20140121_202504_inLine +BABEL_OP2_304_57609_20140121_202504_outLine +BABEL_OP2_304_57935_20140211_015542_inLine +BABEL_OP2_304_57935_20140211_015542_outLine +BABEL_OP2_304_58103_20131212_013517_inLine +BABEL_OP2_304_58103_20131212_013517_outLine +BABEL_OP2_304_58585_20140226_022746_inLine +BABEL_OP2_304_58585_20140226_022746_outLine +BABEL_OP2_304_58717_20140112_000351_inLine +BABEL_OP2_304_58717_20140112_000351_outLine +BABEL_OP2_304_59163_20131022_033947_inLine +BABEL_OP2_304_59163_20131022_033947_outLine +BABEL_OP2_304_59291_20140207_213735_inLine +BABEL_OP2_304_59291_20140207_213735_outLine +BABEL_OP2_304_59645_20140110_210530_inLine +BABEL_OP2_304_59645_20140110_210530_outLine +BABEL_OP2_304_60282_20131031_040356_inLine +BABEL_OP2_304_60282_20131031_040356_outLine +BABEL_OP2_304_60397_20131024_183527_inLine +BABEL_OP2_304_60397_20131024_183527_outLine +BABEL_OP2_304_60418_20140111_062723_inLine +BABEL_OP2_304_60418_20140111_062723_outLine +BABEL_OP2_304_60538_20131211_043030_inLine +BABEL_OP2_304_60538_20131211_043030_outLine +BABEL_OP2_304_60830_20140106_224130_inLine +BABEL_OP2_304_60830_20140106_224130_outLine +BABEL_OP2_304_61011_20131206_213833_inLine +BABEL_OP2_304_61011_20131206_213833_outLine +BABEL_OP2_304_61219_20131206_061726_inLine +BABEL_OP2_304_61219_20131206_061726_outLine +BABEL_OP2_304_61225_20131113_052324_inLine +BABEL_OP2_304_61225_20131113_052324_outLine +BABEL_OP2_304_61357_20140113_232629_inLine +BABEL_OP2_304_61357_20140113_232629_outLine +BABEL_OP2_304_61831_20131201_042817_inLine +BABEL_OP2_304_61831_20131201_042817_outLine +BABEL_OP2_304_61888_20140228_181648_inLine +BABEL_OP2_304_61888_20140228_181648_outLine +BABEL_OP2_304_61963_20140226_192451_inLine +BABEL_OP2_304_61963_20140226_192451_outLine +BABEL_OP2_304_62177_20140227_184207_inLine +BABEL_OP2_304_62177_20140227_184207_outLine +BABEL_OP2_304_62323_20131113_001039_inLine +BABEL_OP2_304_62323_20131113_001039_outLine +BABEL_OP2_304_63081_20131206_195135_inLine +BABEL_OP2_304_63081_20131206_195135_outLine +BABEL_OP2_304_63445_20131127_005349_inLine +BABEL_OP2_304_63445_20131127_005349_outLine +BABEL_OP2_304_63671_20131024_002535_inLine +BABEL_OP2_304_63671_20131024_002535_outLine +BABEL_OP2_304_63920_20131108_182401_inLine +BABEL_OP2_304_63920_20131108_182401_outLine +BABEL_OP2_304_64014_20140228_033939_inLine +BABEL_OP2_304_64014_20140228_033939_outLine +BABEL_OP2_304_64469_20131023_182630_inLine +BABEL_OP2_304_64469_20131023_182630_outLine +BABEL_OP2_304_64688_20131109_040635_inLine +BABEL_OP2_304_64688_20131109_040635_outLine +BABEL_OP2_304_64759_20140118_203442_inLine +BABEL_OP2_304_64759_20140118_203442_outLine +BABEL_OP2_304_64796_20131128_060852_inLine +BABEL_OP2_304_64796_20131128_060852_outLine +BABEL_OP2_304_65077_20131115_005739_inLine +BABEL_OP2_304_65077_20131115_005739_outLine +BABEL_OP2_304_65336_20131109_051329_inLine +BABEL_OP2_304_65336_20131109_051329_outLine +BABEL_OP2_304_65723_20131201_233928_inLine +BABEL_OP2_304_65723_20131201_233928_outLine +BABEL_OP2_304_65882_20131128_220533_inLine +BABEL_OP2_304_65882_20131128_220533_outLine +BABEL_OP2_304_66001_20131208_023839_inLine +BABEL_OP2_304_66001_20131208_023839_outLine +BABEL_OP2_304_66026_20140212_224055_inLine +BABEL_OP2_304_66026_20140212_224055_outLine +BABEL_OP2_304_66350_20131022_021812_inLine +BABEL_OP2_304_66350_20131022_021812_outLine +BABEL_OP2_304_66837_20140213_053859_inLine +BABEL_OP2_304_66837_20140213_053859_outLine +BABEL_OP2_304_67304_20140216_025015_inLine +BABEL_OP2_304_67304_20140216_025015_outLine +BABEL_OP2_304_67552_20140114_011538_inLine +BABEL_OP2_304_67552_20140114_011538_outLine +BABEL_OP2_304_67894_20131112_060500_inLine +BABEL_OP2_304_67894_20131112_060500_outLine +BABEL_OP2_304_68059_20140111_025607_inLine +BABEL_OP2_304_68059_20140111_025607_outLine +BABEL_OP2_304_68908_20131127_032840_inLine +BABEL_OP2_304_68908_20131127_032840_outLine +BABEL_OP2_304_68910_20131101_042132_inLine +BABEL_OP2_304_68910_20131101_042132_outLine +BABEL_OP2_304_69107_20140123_192506_inLine +BABEL_OP2_304_69107_20140123_192506_outLine +BABEL_OP2_304_69153_20140212_204658_inLine +BABEL_OP2_304_69153_20140212_204658_outLine +BABEL_OP2_304_69992_20131213_195450_inLine +BABEL_OP2_304_69992_20131213_195450_outLine +BABEL_OP2_304_70216_20131020_173420_inLine +BABEL_OP2_304_70216_20131020_173420_outLine +BABEL_OP2_304_71263_20140113_223556_inLine +BABEL_OP2_304_71263_20140113_223556_outLine +BABEL_OP2_304_71401_20131020_005620_inLine +BABEL_OP2_304_71401_20131020_005620_outLine +BABEL_OP2_304_72844_20131115_202958_inLine +BABEL_OP2_304_72844_20131115_202958_outLine +BABEL_OP2_304_72903_20131113_023457_inLine +BABEL_OP2_304_72903_20131113_023457_outLine +BABEL_OP2_304_73042_20131214_052022_inLine +BABEL_OP2_304_73042_20131214_052022_outLine +BABEL_OP2_304_74226_20140217_044122_inLine +BABEL_OP2_304_74226_20140217_044122_outLine +BABEL_OP2_304_74455_20140224_013111_inLine +BABEL_OP2_304_74455_20140224_013111_outLine +BABEL_OP2_304_75064_20131209_035217_inLine +BABEL_OP2_304_75064_20131209_035217_outLine +BABEL_OP2_304_75223_20131205_012248_inLine +BABEL_OP2_304_75223_20131205_012248_outLine +BABEL_OP2_304_75930_20131020_013042_inLine +BABEL_OP2_304_75930_20131020_013042_outLine +BABEL_OP2_304_75975_20131019_054431_inLine +BABEL_OP2_304_75975_20131019_054431_outLine +BABEL_OP2_304_76069_20131113_042346_inLine +BABEL_OP2_304_76069_20131113_042346_outLine +BABEL_OP2_304_76730_20131025_213853_inLine +BABEL_OP2_304_76730_20131025_213853_outLine +BABEL_OP2_304_76793_20131126_013011_inLine +BABEL_OP2_304_76793_20131126_013011_outLine +BABEL_OP2_304_77033_20140228_043125_inLine +BABEL_OP2_304_77033_20140228_043125_outLine +BABEL_OP2_304_77730_20131217_042107_inLine +BABEL_OP2_304_77730_20131217_042107_outLine +BABEL_OP2_304_78360_20140301_020449_inLine +BABEL_OP2_304_78360_20140301_020449_outLine +BABEL_OP2_304_78943_20131208_222716_inLine +BABEL_OP2_304_78943_20131208_222716_outLine +BABEL_OP2_304_79028_20131022_221243_inLine +BABEL_OP2_304_79028_20131022_221243_outLine +BABEL_OP2_304_79045_20140214_202301_inLine +BABEL_OP2_304_79045_20140214_202301_outLine +BABEL_OP2_304_79723_20131023_023756_inLine +BABEL_OP2_304_79723_20131023_023756_outLine +BABEL_OP2_304_79820_20131214_042918_inLine +BABEL_OP2_304_79820_20131214_042918_outLine +BABEL_OP2_304_80209_20131112_232041_inLine +BABEL_OP2_304_80209_20131112_232041_outLine +BABEL_OP2_304_80383_20131107_233543_inLine +BABEL_OP2_304_80383_20131107_233543_outLine +BABEL_OP2_304_80577_20140301_014201_inLine +BABEL_OP2_304_80577_20140301_014201_outLine +BABEL_OP2_304_80721_20140213_051749_inLine +BABEL_OP2_304_80721_20140213_051749_outLine +BABEL_OP2_304_80881_20131205_175435_inLine +BABEL_OP2_304_80881_20131205_175435_outLine +BABEL_OP2_304_81404_20131213_041501_inLine +BABEL_OP2_304_81404_20131213_041501_outLine +BABEL_OP2_304_81427_20131211_012524_inLine +BABEL_OP2_304_81427_20131211_012524_outLine +BABEL_OP2_304_81769_20140105_005749_inLine +BABEL_OP2_304_81769_20140105_005749_outLine +BABEL_OP2_304_82863_20140106_054346_inLine +BABEL_OP2_304_82863_20140106_054346_outLine +BABEL_OP2_304_83436_20131211_025218_inLine +BABEL_OP2_304_83436_20131211_025218_outLine +BABEL_OP2_304_83813_20131028_033118_inLine +BABEL_OP2_304_83813_20131028_033118_outLine +BABEL_OP2_304_83851_20131203_212613_inLine +BABEL_OP2_304_83851_20131203_212613_outLine +BABEL_OP2_304_83935_20140123_034100_inLine +BABEL_OP2_304_83935_20140123_034100_outLine +BABEL_OP2_304_84125_20131115_235931_inLine +BABEL_OP2_304_84125_20131115_235931_outLine +BABEL_OP2_304_84194_20131129_040805_inLine +BABEL_OP2_304_84194_20131129_040805_outLine +BABEL_OP2_304_84605_20131215_005949_inLine +BABEL_OP2_304_84605_20131215_005949_outLine +BABEL_OP2_304_85179_20140214_071121_inLine +BABEL_OP2_304_85179_20140214_071121_outLine +BABEL_OP2_304_85248_20140222_235016_inLine +BABEL_OP2_304_85248_20140222_235016_outLine +BABEL_OP2_304_86100_20131112_221929_inLine +BABEL_OP2_304_86100_20131112_221929_outLine +BABEL_OP2_304_86472_20140116_050058_inLine +BABEL_OP2_304_86472_20140116_050058_outLine +BABEL_OP2_304_86557_20131130_234925_inLine +BABEL_OP2_304_86557_20131130_234925_outLine +BABEL_OP2_304_86829_20131107_180321_inLine +BABEL_OP2_304_86829_20131107_180321_outLine +BABEL_OP2_304_86830_20140228_051058_inLine +BABEL_OP2_304_86830_20140228_051058_outLine +BABEL_OP2_304_87280_20140207_030432_inLine +BABEL_OP2_304_87280_20140207_030432_outLine +BABEL_OP2_304_87866_20131106_002751_inLine +BABEL_OP2_304_87866_20131106_002751_outLine +BABEL_OP2_304_88982_20140111_233039_inLine +BABEL_OP2_304_88982_20140111_233039_outLine +BABEL_OP2_304_89650_20131024_023031_inLine +BABEL_OP2_304_89650_20131024_023031_outLine +BABEL_OP2_304_91581_20140203_231410_inLine +BABEL_OP2_304_91581_20140203_231410_outLine +BABEL_OP2_304_92252_20131022_042600_inLine +BABEL_OP2_304_92252_20131022_042600_outLine +BABEL_OP2_304_92440_20131109_003559_inLine +BABEL_OP2_304_92440_20131109_003559_outLine +BABEL_OP2_304_92886_20131202_233808_inLine +BABEL_OP2_304_92886_20131202_233808_outLine +BABEL_OP2_304_93224_20140121_191942_inLine +BABEL_OP2_304_93224_20140121_191942_outLine +BABEL_OP2_304_93443_20131127_032037_inLine +BABEL_OP2_304_93443_20131127_032037_outLine +BABEL_OP2_304_93475_20131213_025105_inLine +BABEL_OP2_304_93475_20131213_025105_outLine +BABEL_OP2_304_93681_20131202_212236_inLine +BABEL_OP2_304_93681_20131202_212236_outLine +BABEL_OP2_304_94465_20140213_013300_inLine +BABEL_OP2_304_94465_20140213_013300_outLine +BABEL_OP2_304_94923_20140212_021923_inLine +BABEL_OP2_304_94923_20140212_021923_outLine +BABEL_OP2_304_95677_20131024_031406_inLine +BABEL_OP2_304_95677_20131024_031406_outLine +BABEL_OP2_304_96205_20140107_233946_inLine +BABEL_OP2_304_96205_20140107_233946_outLine +BABEL_OP2_304_96405_20131214_205112_inLine +BABEL_OP2_304_96405_20131214_205112_outLine +BABEL_OP2_304_98165_20131218_234422_inLine +BABEL_OP2_304_98165_20131218_234422_outLine +BABEL_OP2_304_98565_20131023_235505_inLine +BABEL_OP2_304_98565_20131023_235505_outLine +BABEL_OP2_304_99264_20140222_211846_inLine +BABEL_OP2_304_99264_20140222_211846_outLine +BABEL_OP2_304_99920_20140211_023914_inLine +BABEL_OP2_304_99920_20140211_023914_outLine diff --git a/egs/babel/s5d/conf/lists/304-lithuanian/untranscribed-training.list b/egs/babel/s5d/conf/lists/304-lithuanian/untranscribed-training.list new file mode 100644 index 00000000000..fe18640b4ca --- /dev/null +++ b/egs/babel/s5d/conf/lists/304-lithuanian/untranscribed-training.list @@ -0,0 +1,524 @@ +BABEL_OP2_304_10319_20131123_212421_inLine +BABEL_OP2_304_10938_20131210_232654_outLine +BABEL_OP2_304_10974_20140112_002642_inLine +BABEL_OP2_304_10974_20140112_002642_outLine +BABEL_OP2_304_11663_20140228_001249_inLine +BABEL_OP2_304_11663_20140228_001249_outLine +BABEL_OP2_304_12036_20140212_031355_inLine +BABEL_OP2_304_12036_20140212_031355_outLine +BABEL_OP2_304_13184_20140226_230154_inLine +BABEL_OP2_304_13184_20140226_230154_outLine +BABEL_OP2_304_13490_20140115_234942_inLine +BABEL_OP2_304_13490_20140115_234942_outLine +BABEL_OP2_304_13709_20140301_011301_inLine +BABEL_OP2_304_13709_20140301_011301_outLine +BABEL_OP2_304_14179_20140213_202716_inLine +BABEL_OP2_304_14179_20140213_202716_outLine +BABEL_OP2_304_14228_20140214_035430_inLine +BABEL_OP2_304_14228_20140214_035430_outLine +BABEL_OP2_304_14350_20131215_015450_outLine +BABEL_OP2_304_14884_20131019_215509_outLine +BABEL_OP2_304_14929_20140120_043551_inLine +BABEL_OP2_304_15324_20140111_022619_inLine +BABEL_OP2_304_15324_20140111_022619_outLine +BABEL_OP2_304_15702_20140121_182136_outLine +BABEL_OP2_304_15926_20140114_014611_inLine +BABEL_OP2_304_15926_20140114_014611_outLine +BABEL_OP2_304_16149_20131218_020646_inLine +BABEL_OP2_304_16149_20131218_020646_outLine +BABEL_OP2_304_17165_20140115_221908_inLine +BABEL_OP2_304_17165_20140115_221908_outLine +BABEL_OP2_304_17440_20140210_030642_inLine +BABEL_OP2_304_17440_20140210_030642_outLine +BABEL_OP2_304_17496_20140124_200649_inLine +BABEL_OP2_304_17496_20140124_200649_outLine +BABEL_OP2_304_17567_20140112_001028_inLine +BABEL_OP2_304_17567_20140112_001028_outLine +BABEL_OP2_304_18370_20131019_230544_inLine +BABEL_OP2_304_18924_20140122_013011_inLine +BABEL_OP2_304_19134_20140211_192844_inLine +BABEL_OP2_304_19134_20140211_192844_outLine +BABEL_OP2_304_19621_20140111_004245_inLine +BABEL_OP2_304_19621_20140111_004245_outLine +BABEL_OP2_304_19672_20140111_213853_inLine +BABEL_OP2_304_19672_20140111_213853_outLine +BABEL_OP2_304_20682_20140212_225527_inLine +BABEL_OP2_304_20682_20140212_225527_outLine +BABEL_OP2_304_20721_20131126_051609_inLine +BABEL_OP2_304_20896_20131024_162213_outLine +BABEL_OP2_304_21004_20140202_211736_inLine +BABEL_OP2_304_21004_20140202_211736_outLine +BABEL_OP2_304_21794_20140107_223934_inLine +BABEL_OP2_304_21794_20140107_223934_outLine +BABEL_OP2_304_22280_20140112_040339_inLine +BABEL_OP2_304_22280_20140112_040339_outLine +BABEL_OP2_304_22612_20140214_015154_inLine +BABEL_OP2_304_22612_20140214_015154_outLine +BABEL_OP2_304_23092_20140210_000452_inLine +BABEL_OP2_304_23092_20140210_000452_outLine +BABEL_OP2_304_23151_20140226_015000_inLine +BABEL_OP2_304_23151_20140226_015000_outLine +BABEL_OP2_304_23153_20140108_004710_inLine +BABEL_OP2_304_23153_20140108_004710_outLine +BABEL_OP2_304_23239_20140113_023420_inLine +BABEL_OP2_304_23239_20140113_023420_outLine +BABEL_OP2_304_23505_20140207_012218_inLine +BABEL_OP2_304_23505_20140207_012218_outLine +BABEL_OP2_304_24270_20140112_035305_inLine +BABEL_OP2_304_24270_20140112_035305_outLine +BABEL_OP2_304_24470_20140213_013507_inLine +BABEL_OP2_304_24470_20140213_013507_outLine +BABEL_OP2_304_26072_20140303_191300_inLine +BABEL_OP2_304_26072_20140303_191300_outLine +BABEL_OP2_304_26836_20131203_035109_inLine +BABEL_OP2_304_27203_20140114_184748_inLine +BABEL_OP2_304_27203_20140114_184748_outLine +BABEL_OP2_304_27590_20140120_000746_inLine +BABEL_OP2_304_27590_20140120_000746_outLine +BABEL_OP2_304_27841_20140224_192246_inLine +BABEL_OP2_304_27841_20140224_192246_outLine +BABEL_OP2_304_28190_20140216_014935_inLine +BABEL_OP2_304_28190_20140216_014935_outLine +BABEL_OP2_304_28303_20131204_205256_inLine +BABEL_OP2_304_28303_20131204_205256_outLine +BABEL_OP2_304_28606_20140118_214444_inLine +BABEL_OP2_304_28606_20140118_214444_outLine +BABEL_OP2_304_28814_20140221_030045_inLine +BABEL_OP2_304_28814_20140221_030045_outLine +BABEL_OP2_304_29076_20140214_031718_inLine +BABEL_OP2_304_29076_20140214_031718_outLine +BABEL_OP2_304_29323_20140224_220848_inLine +BABEL_OP2_304_29323_20140224_220848_outLine +BABEL_OP2_304_29416_20140209_221011_inLine +BABEL_OP2_304_29416_20140209_221011_outLine +BABEL_OP2_304_29685_20131203_214940_inLine +BABEL_OP2_304_29685_20131203_214940_outLine +BABEL_OP2_304_30345_20140220_044057_inLine +BABEL_OP2_304_30345_20140220_044057_outLine +BABEL_OP2_304_30432_20140114_200919_inLine +BABEL_OP2_304_30432_20140114_200919_outLine +BABEL_OP2_304_31109_20140117_222519_outLine +BABEL_OP2_304_31184_20140107_010323_inLine +BABEL_OP2_304_31184_20140107_010323_outLine +BABEL_OP2_304_31346_20140214_012447_inLine +BABEL_OP2_304_31346_20140214_012447_outLine +BABEL_OP2_304_31583_20140217_070401_inLine +BABEL_OP2_304_31583_20140217_070401_outLine +BABEL_OP2_304_31628_20140113_222259_inLine +BABEL_OP2_304_31628_20140113_222259_outLine +BABEL_OP2_304_31728_20131026_182200_outLine +BABEL_OP2_304_32380_20131202_212228_inLine +BABEL_OP2_304_32708_20131214_184617_inLine +BABEL_OP2_304_33175_20131218_055325_inLine +BABEL_OP2_304_33175_20131218_055325_outLine +BABEL_OP2_304_33229_20140224_173431_inLine +BABEL_OP2_304_33229_20140224_173431_outLine +BABEL_OP2_304_33635_20140108_055209_inLine +BABEL_OP2_304_33635_20140108_055209_outLine +BABEL_OP2_304_33913_20140208_001736_outLine +BABEL_OP2_304_33951_20140107_014839_inLine +BABEL_OP2_304_33951_20140107_014839_outLine +BABEL_OP2_304_34197_20140120_003604_inLine +BABEL_OP2_304_34197_20140120_010000_inLine +BABEL_OP2_304_34477_20131207_235614_inLine +BABEL_OP2_304_34477_20131207_235614_outLine +BABEL_OP2_304_34811_20140111_213202_inLine +BABEL_OP2_304_34811_20140111_213202_outLine +BABEL_OP2_304_34903_20140114_055610_outLine +BABEL_OP2_304_35000_20140214_054306_inLine +BABEL_OP2_304_35000_20140214_054306_outLine +BABEL_OP2_304_35143_20140213_194234_inLine +BABEL_OP2_304_35143_20140213_194234_outLine +BABEL_OP2_304_35583_20140221_183221_inLine +BABEL_OP2_304_35583_20140221_183221_outLine +BABEL_OP2_304_36669_20140107_024312_inLine +BABEL_OP2_304_36669_20140107_024312_outLine +BABEL_OP2_304_36894_20131214_210131_inLine +BABEL_OP2_304_36894_20131214_210131_outLine +BABEL_OP2_304_37271_20140214_210456_inLine +BABEL_OP2_304_37271_20140214_210456_outLine +BABEL_OP2_304_37281_20140112_044135_outLine +BABEL_OP2_304_37776_20131029_063539_inLine +BABEL_OP2_304_37853_20140228_011410_outLine +BABEL_OP2_304_38689_20140122_053200_inLine +BABEL_OP2_304_38689_20140122_053200_outLine +BABEL_OP2_304_39059_20140223_225026_inLine +BABEL_OP2_304_39059_20140223_225026_outLine +BABEL_OP2_304_39307_20131212_235627_outLine +BABEL_OP2_304_39426_20140222_001315_inLine +BABEL_OP2_304_39426_20140222_001315_outLine +BABEL_OP2_304_39555_20140217_001722_inLine +BABEL_OP2_304_39555_20140217_001722_outLine +BABEL_OP2_304_39744_20131205_204607_inLine +BABEL_OP2_304_39744_20131205_204607_outLine +BABEL_OP2_304_40557_20140214_015226_inLine +BABEL_OP2_304_40557_20140214_015226_outLine +BABEL_OP2_304_40648_20140105_023520_inLine +BABEL_OP2_304_40648_20140105_023520_outLine +BABEL_OP2_304_40740_20140303_034635_inLine +BABEL_OP2_304_40740_20140303_034635_outLine +BABEL_OP2_304_41038_20140120_031808_inLine +BABEL_OP2_304_41038_20140120_031808_outLine +BABEL_OP2_304_41097_20140114_065635_inLine +BABEL_OP2_304_41097_20140114_065635_outLine +BABEL_OP2_304_41100_20131212_035211_outLine +BABEL_OP2_304_41682_20131027_233404_outLine +BABEL_OP2_304_42029_20140225_042923_outLine +BABEL_OP2_304_42600_20131203_001235_inLine +BABEL_OP2_304_42619_20140124_050222_inLine +BABEL_OP2_304_42619_20140124_050222_outLine +BABEL_OP2_304_42942_20140122_011934_inLine +BABEL_OP2_304_42942_20140122_011934_outLine +BABEL_OP2_304_43788_20140113_035220_inLine +BABEL_OP2_304_43788_20140113_035220_outLine +BABEL_OP2_304_43789_20140108_205751_inLine +BABEL_OP2_304_43789_20140108_205751_outLine +BABEL_OP2_304_44477_20140116_062617_outLine +BABEL_OP2_304_44709_20140111_061727_inLine +BABEL_OP2_304_44709_20140111_061727_outLine +BABEL_OP2_304_44847_20140123_043546_inLine +BABEL_OP2_304_44847_20140123_043546_outLine +BABEL_OP2_304_46041_20140216_224708_inLine +BABEL_OP2_304_46041_20140216_224708_outLine +BABEL_OP2_304_46066_20140303_215904_inLine +BABEL_OP2_304_46066_20140303_215904_outLine +BABEL_OP2_304_46757_20140123_053540_outLine +BABEL_OP2_304_47186_20140213_022646_inLine +BABEL_OP2_304_47186_20140213_022646_outLine +BABEL_OP2_304_47283_20131208_055437_inLine +BABEL_OP2_304_47283_20131208_055437_outLine +BABEL_OP2_304_47823_20140213_200425_inLine +BABEL_OP2_304_47823_20140213_200425_outLine +BABEL_OP2_304_47959_20131206_002421_inLine +BABEL_OP2_304_47959_20131206_002421_outLine +BABEL_OP2_304_48243_20131130_191603_inLine +BABEL_OP2_304_48243_20131130_191603_outLine +BABEL_OP2_304_48422_20140226_030714_inLine +BABEL_OP2_304_48422_20140226_030714_outLine +BABEL_OP2_304_49118_20140224_035159_inLine +BABEL_OP2_304_49118_20140224_035159_outLine +BABEL_OP2_304_49502_20131128_050926_inLine +BABEL_OP2_304_49502_20131128_050926_outLine +BABEL_OP2_304_49637_20131204_225149_outLine +BABEL_OP2_304_49739_20131025_221623_outLine +BABEL_OP2_304_49812_20140221_004200_inLine +BABEL_OP2_304_49812_20140221_004200_outLine +BABEL_OP2_304_49870_20131026_011340_outLine +BABEL_OP2_304_49902_20131215_011726_inLine +BABEL_OP2_304_49902_20131215_011726_outLine +BABEL_OP2_304_50427_20140121_041440_inLine +BABEL_OP2_304_50427_20140121_041440_outLine +BABEL_OP2_304_50549_20140225_015442_inLine +BABEL_OP2_304_50549_20140225_015442_outLine +BABEL_OP2_304_50630_20140115_225817_outLine +BABEL_OP2_304_50958_20140107_035922_inLine +BABEL_OP2_304_50958_20140107_035922_outLine +BABEL_OP2_304_51156_20131108_034329_outLine +BABEL_OP2_304_51611_20131215_000933_outLine +BABEL_OP2_304_51611_20131215_001818_outLine +BABEL_OP2_304_51955_20131130_031610_inLine +BABEL_OP2_304_51955_20131130_031610_outLine +BABEL_OP2_304_51968_20140115_014540_outLine +BABEL_OP2_304_52404_20140111_203352_inLine +BABEL_OP2_304_52404_20140111_203352_outLine +BABEL_OP2_304_52422_20140227_022646_inLine +BABEL_OP2_304_52422_20140227_022646_outLine +BABEL_OP2_304_52442_20140111_220136_inLine +BABEL_OP2_304_52442_20140111_220136_outLine +BABEL_OP2_304_52717_20131212_005407_inLine +BABEL_OP2_304_52717_20131212_005407_outLine +BABEL_OP2_304_52932_20131203_060700_inLine +BABEL_OP2_304_53957_20140213_050235_inLine +BABEL_OP2_304_53957_20140213_050235_outLine +BABEL_OP2_304_54104_20140122_065051_inLine +BABEL_OP2_304_54162_20140117_004147_outLine +BABEL_OP2_304_54405_20140111_033306_inLine +BABEL_OP2_304_54405_20140111_033306_outLine +BABEL_OP2_304_54477_20140214_014521_outLine +BABEL_OP2_304_56198_20131207_044103_inLine +BABEL_OP2_304_56198_20131207_044103_outLine +BABEL_OP2_304_56198_20131207_044824_inLine +BABEL_OP2_304_56198_20131207_044824_outLine +BABEL_OP2_304_56370_20131210_222152_inLine +BABEL_OP2_304_56370_20131210_222152_outLine +BABEL_OP2_304_56606_20131029_001934_inLine +BABEL_OP2_304_57067_20140226_203014_inLine +BABEL_OP2_304_57067_20140226_203014_outLine +BABEL_OP2_304_57093_20140108_195135_inLine +BABEL_OP2_304_57093_20140108_195135_outLine +BABEL_OP2_304_57529_20140213_194042_inLine +BABEL_OP2_304_57529_20140213_194042_outLine +BABEL_OP2_304_57919_20131019_181730_inLine +BABEL_OP2_304_58489_20140213_201405_inLine +BABEL_OP2_304_58489_20140213_201405_outLine +BABEL_OP2_304_58821_20140122_043719_inLine +BABEL_OP2_304_58821_20140122_043719_outLine +BABEL_OP2_304_59078_20140114_043013_inLine +BABEL_OP2_304_59078_20140114_043013_outLine +BABEL_OP2_304_59301_20140220_055528_inLine +BABEL_OP2_304_59301_20140220_055528_outLine +BABEL_OP2_304_59301_20140220_061405_inLine +BABEL_OP2_304_59301_20140220_061405_outLine +BABEL_OP2_304_59509_20140111_012159_inLine +BABEL_OP2_304_59509_20140111_012159_outLine +BABEL_OP2_304_59993_20131207_052409_inLine +BABEL_OP2_304_59993_20131207_052409_outLine +BABEL_OP2_304_60650_20131104_000431_outLine +BABEL_OP2_304_61040_20140214_024448_inLine +BABEL_OP2_304_61040_20140214_024448_outLine +BABEL_OP2_304_61167_20131218_000849_inLine +BABEL_OP2_304_61167_20131218_000849_outLine +BABEL_OP2_304_61190_20131202_233122_inLine +BABEL_OP2_304_61190_20131202_233122_outLine +BABEL_OP2_304_61435_20140220_043508_inLine +BABEL_OP2_304_61435_20140220_043508_outLine +BABEL_OP2_304_62014_20140110_225736_inLine +BABEL_OP2_304_62014_20140110_225736_outLine +BABEL_OP2_304_62286_20140107_221925_inLine +BABEL_OP2_304_62286_20140107_221925_outLine +BABEL_OP2_304_62471_20131023_045947_inLine +BABEL_OP2_304_62471_20131023_045947_outLine +BABEL_OP2_304_62491_20131024_043538_outLine +BABEL_OP2_304_62734_20131211_050743_inLine +BABEL_OP2_304_62734_20131211_050743_outLine +BABEL_OP2_304_62810_20131205_000409_outLine +BABEL_OP2_304_62810_20131205_001411_outLine +BABEL_OP2_304_62835_20140111_023213_inLine +BABEL_OP2_304_62835_20140111_023213_outLine +BABEL_OP2_304_62852_20131217_062508_outLine +BABEL_OP2_304_63220_20140118_205257_outLine +BABEL_OP2_304_63757_20140112_050031_inLine +BABEL_OP2_304_64065_20131208_042849_inLine +BABEL_OP2_304_64065_20131208_042849_outLine +BABEL_OP2_304_64350_20140112_002927_inLine +BABEL_OP2_304_64350_20140112_002927_outLine +BABEL_OP2_304_64398_20140114_001457_inLine +BABEL_OP2_304_64635_20131031_005941_outLine +BABEL_OP2_304_64768_20131205_215037_inLine +BABEL_OP2_304_64768_20131205_215037_outLine +BABEL_OP2_304_64870_20140121_052458_inLine +BABEL_OP2_304_64870_20140121_052458_outLine +BABEL_OP2_304_65064_20140211_004709_inLine +BABEL_OP2_304_65064_20140211_004709_outLine +BABEL_OP2_304_65298_20140225_223150_outLine +BABEL_OP2_304_65367_20140223_221652_inLine +BABEL_OP2_304_65367_20140223_221652_outLine +BABEL_OP2_304_66519_20131217_221237_inLine +BABEL_OP2_304_66519_20131217_221237_outLine +BABEL_OP2_304_66959_20140214_010021_inLine +BABEL_OP2_304_66959_20140214_010021_outLine +BABEL_OP2_304_67659_20131210_070128_inLine +BABEL_OP2_304_67659_20131210_070128_outLine +BABEL_OP2_304_67842_20131209_002442_inLine +BABEL_OP2_304_67842_20131209_002442_outLine +BABEL_OP2_304_68244_20140112_003451_inLine +BABEL_OP2_304_68244_20140112_003451_outLine +BABEL_OP2_304_69578_20140116_015102_inLine +BABEL_OP2_304_69578_20140116_015102_outLine +BABEL_OP2_304_69633_20140112_001408_inLine +BABEL_OP2_304_69633_20140112_001408_outLine +BABEL_OP2_304_69636_20140114_002409_outLine +BABEL_OP2_304_70221_20140114_051222_inLine +BABEL_OP2_304_70221_20140114_051222_outLine +BABEL_OP2_304_70293_20131019_050801_inLine +BABEL_OP2_304_70343_20140123_031245_inLine +BABEL_OP2_304_70343_20140123_031245_outLine +BABEL_OP2_304_70713_20140304_052610_inLine +BABEL_OP2_304_70713_20140304_052610_outLine +BABEL_OP2_304_71038_20140227_011955_inLine +BABEL_OP2_304_71038_20140227_011955_outLine +BABEL_OP2_304_71067_20140111_025531_inLine +BABEL_OP2_304_71067_20140111_025531_outLine +BABEL_OP2_304_71282_20140303_003653_inLine +BABEL_OP2_304_71282_20140303_003653_outLine +BABEL_OP2_304_71559_20140217_031954_inLine +BABEL_OP2_304_71559_20140217_031954_outLine +BABEL_OP2_304_72324_20140111_214356_inLine +BABEL_OP2_304_72324_20140111_214356_outLine +BABEL_OP2_304_73072_20131211_203538_inLine +BABEL_OP2_304_73119_20131207_030241_inLine +BABEL_OP2_304_73119_20131207_030241_outLine +BABEL_OP2_304_73518_20140227_044044_inLine +BABEL_OP2_304_73518_20140227_044044_outLine +BABEL_OP2_304_74253_20140209_020556_outLine +BABEL_OP2_304_75261_20140214_002012_inLine +BABEL_OP2_304_75261_20140214_002012_outLine +BABEL_OP2_304_76218_20140112_030818_inLine +BABEL_OP2_304_76238_20140213_065615_outLine +BABEL_OP2_304_77391_20131206_031416_inLine +BABEL_OP2_304_77391_20131206_031416_outLine +BABEL_OP2_304_77567_20131217_032300_inLine +BABEL_OP2_304_78016_20131211_033559_inLine +BABEL_OP2_304_78016_20131211_033559_outLine +BABEL_OP2_304_78016_20131211_034555_inLine +BABEL_OP2_304_78016_20131211_034555_outLine +BABEL_OP2_304_78016_20131211_035830_inLine +BABEL_OP2_304_78016_20131211_035830_outLine +BABEL_OP2_304_78544_20140118_220548_inLine +BABEL_OP2_304_78544_20140118_220548_outLine +BABEL_OP2_304_78544_20140118_221258_inLine +BABEL_OP2_304_78544_20140118_221258_outLine +BABEL_OP2_304_78544_20140118_222525_inLine +BABEL_OP2_304_78544_20140118_222525_outLine +BABEL_OP2_304_78833_20131024_214927_outLine +BABEL_OP2_304_79129_20140303_004430_inLine +BABEL_OP2_304_79129_20140303_004430_outLine +BABEL_OP2_304_79139_20140117_233824_inLine +BABEL_OP2_304_79139_20140117_233824_outLine +BABEL_OP2_304_79167_20140113_043213_inLine +BABEL_OP2_304_79167_20140113_043213_outLine +BABEL_OP2_304_79571_20140115_210036_inLine +BABEL_OP2_304_79571_20140115_210036_outLine +BABEL_OP2_304_79590_20140115_194001_inLine +BABEL_OP2_304_79590_20140115_194001_outLine +BABEL_OP2_304_80136_20140221_210907_inLine +BABEL_OP2_304_80136_20140221_210907_outLine +BABEL_OP2_304_80306_20140113_211243_inLine +BABEL_OP2_304_80306_20140113_211243_outLine +BABEL_OP2_304_80781_20131207_214652_inLine +BABEL_OP2_304_80781_20131207_214652_outLine +BABEL_OP2_304_81392_20140120_040823_inLine +BABEL_OP2_304_81392_20140120_040823_outLine +BABEL_OP2_304_81435_20140122_044047_inLine +BABEL_OP2_304_81435_20140122_044047_outLine +BABEL_OP2_304_81553_20140221_190721_inLine +BABEL_OP2_304_81553_20140221_190721_outLine +BABEL_OP2_304_81622_20140115_191114_inLine +BABEL_OP2_304_81622_20140115_191114_outLine +BABEL_OP2_304_81671_20140303_000114_inLine +BABEL_OP2_304_81671_20140303_000114_outLine +BABEL_OP2_304_82138_20140108_210521_inLine +BABEL_OP2_304_82138_20140108_210521_outLine +BABEL_OP2_304_82140_20140109_010030_inLine +BABEL_OP2_304_82140_20140109_010030_outLine +BABEL_OP2_304_82966_20140212_003555_outLine +BABEL_OP2_304_82979_20131206_030414_inLine +BABEL_OP2_304_82979_20131206_030414_outLine +BABEL_OP2_304_83238_20140121_050333_inLine +BABEL_OP2_304_83238_20140121_050333_outLine +BABEL_OP2_304_83609_20131031_045140_inLine +BABEL_OP2_304_83609_20131031_045140_outLine +BABEL_OP2_304_84055_20140304_014209_inLine +BABEL_OP2_304_84055_20140304_014209_outLine +BABEL_OP2_304_84327_20140119_004436_inLine +BABEL_OP2_304_84327_20140119_004436_outLine +BABEL_OP2_304_84430_20131024_015151_inLine +BABEL_OP2_304_84467_20131030_000051_outLine +BABEL_OP2_304_84541_20131113_030920_inLine +BABEL_OP2_304_84583_20140122_023451_inLine +BABEL_OP2_304_84583_20140122_023451_outLine +BABEL_OP2_304_84715_20140225_204018_inLine +BABEL_OP2_304_84715_20140225_204018_outLine +BABEL_OP2_304_84823_20140213_070220_inLine +BABEL_OP2_304_84823_20140213_070220_outLine +BABEL_OP2_304_84936_20140115_025845_inLine +BABEL_OP2_304_84936_20140115_025845_outLine +BABEL_OP2_304_85028_20140216_043545_inLine +BABEL_OP2_304_85028_20140216_043545_outLine +BABEL_OP2_304_85048_20140213_194500_inLine +BABEL_OP2_304_85048_20140213_194500_outLine +BABEL_OP2_304_85651_20131213_231614_inLine +BABEL_OP2_304_85651_20131213_231614_outLine +BABEL_OP2_304_86191_20131208_035829_inLine +BABEL_OP2_304_86191_20131208_035829_outLine +BABEL_OP2_304_86433_20140122_053030_inLine +BABEL_OP2_304_86433_20140122_053030_outLine +BABEL_OP2_304_86676_20140109_025931_inLine +BABEL_OP2_304_86676_20140109_025931_outLine +BABEL_OP2_304_86748_20140225_213348_inLine +BABEL_OP2_304_86748_20140225_213348_outLine +BABEL_OP2_304_87073_20131114_031449_outLine +BABEL_OP2_304_87074_20131216_043306_inLine +BABEL_OP2_304_87074_20131216_043306_outLine +BABEL_OP2_304_87313_20140115_011909_inLine +BABEL_OP2_304_87313_20140115_011909_outLine +BABEL_OP2_304_87889_20140225_010303_inLine +BABEL_OP2_304_87889_20140225_010303_outLine +BABEL_OP2_304_88445_20140112_004454_inLine +BABEL_OP2_304_88445_20140112_004454_outLine +BABEL_OP2_304_88661_20140109_034129_inLine +BABEL_OP2_304_88661_20140109_034129_outLine +BABEL_OP2_304_88674_20131029_180931_outLine +BABEL_OP2_304_88686_20131124_012926_outLine +BABEL_OP2_304_88938_20140226_001937_inLine +BABEL_OP2_304_89575_20140213_180857_inLine +BABEL_OP2_304_89575_20140213_180857_outLine +BABEL_OP2_304_89695_20140107_232450_inLine +BABEL_OP2_304_89695_20140107_232450_outLine +BABEL_OP2_304_89718_20131112_202612_outLine +BABEL_OP2_304_89877_20140114_202105_inLine +BABEL_OP2_304_89877_20140114_202105_outLine +BABEL_OP2_304_89888_20131215_015024_outLine +BABEL_OP2_304_89943_20131214_030426_inLine +BABEL_OP2_304_89943_20131214_030426_outLine +BABEL_OP2_304_91252_20131113_035252_inLine +BABEL_OP2_304_91319_20140222_040737_inLine +BABEL_OP2_304_91319_20140222_040737_outLine +BABEL_OP2_304_91336_20140109_002119_inLine +BABEL_OP2_304_91336_20140109_002119_outLine +BABEL_OP2_304_91463_20140212_010126_inLine +BABEL_OP2_304_91463_20140212_012624_inLine +BABEL_OP2_304_91825_20131214_190413_inLine +BABEL_OP2_304_91825_20131214_190413_outLine +BABEL_OP2_304_91825_20131214_191357_inLine +BABEL_OP2_304_91825_20131214_191357_outLine +BABEL_OP2_304_92065_20140213_212712_inLine +BABEL_OP2_304_92065_20140213_212712_outLine +BABEL_OP2_304_92065_20140213_213512_inLine +BABEL_OP2_304_92065_20140213_213512_outLine +BABEL_OP2_304_92065_20140213_214440_inLine +BABEL_OP2_304_92065_20140213_214440_outLine +BABEL_OP2_304_92176_20140114_011149_inLine +BABEL_OP2_304_92176_20140114_011149_outLine +BABEL_OP2_304_92356_20140225_005836_inLine +BABEL_OP2_304_92356_20140225_005836_outLine +BABEL_OP2_304_92459_20131207_233730_outLine +BABEL_OP2_304_92527_20131217_074850_inLine +BABEL_OP2_304_92527_20131217_074850_outLine +BABEL_OP2_304_92740_20140112_014905_inLine +BABEL_OP2_304_92740_20140112_014905_outLine +BABEL_OP2_304_92809_20131213_011040_inLine +BABEL_OP2_304_93153_20140207_015821_inLine +BABEL_OP2_304_93153_20140207_015821_outLine +BABEL_OP2_304_93490_20140216_035543_inLine +BABEL_OP2_304_93490_20140216_035543_outLine +BABEL_OP2_304_93964_20140108_222311_inLine +BABEL_OP2_304_94212_20131020_203106_inLine +BABEL_OP2_304_94253_20131216_233242_inLine +BABEL_OP2_304_94253_20131216_233242_outLine +BABEL_OP2_304_94713_20131125_035926_outLine +BABEL_OP2_304_95446_20140221_224816_inLine +BABEL_OP2_304_95446_20140221_224816_outLine +BABEL_OP2_304_95937_20131108_201706_inLine +BABEL_OP2_304_96077_20131107_020023_outLine +BABEL_OP2_304_96446_20131204_220739_inLine +BABEL_OP2_304_96446_20131204_220739_outLine +BABEL_OP2_304_96525_20140216_231544_inLine +BABEL_OP2_304_96525_20140216_231544_outLine +BABEL_OP2_304_96584_20140228_045227_inLine +BABEL_OP2_304_96584_20140228_045227_outLine +BABEL_OP2_304_96910_20131203_185444_inLine +BABEL_OP2_304_96910_20131203_185444_outLine +BABEL_OP2_304_97264_20140225_193258_inLine +BABEL_OP2_304_97264_20140225_193258_outLine +BABEL_OP2_304_97376_20140121_204102_inLine +BABEL_OP2_304_97376_20140121_204102_outLine +BABEL_OP2_304_97461_20140111_022155_inLine +BABEL_OP2_304_97461_20140111_022155_outLine +BABEL_OP2_304_97988_20140212_022710_inLine +BABEL_OP2_304_97988_20140212_022710_outLine +BABEL_OP2_304_98390_20140123_203258_inLine +BABEL_OP2_304_98390_20140123_203258_outLine +BABEL_OP2_304_98580_20140115_202801_inLine +BABEL_OP2_304_98580_20140115_202801_outLine +BABEL_OP2_304_99202_20140121_200458_outLine +BABEL_OP2_304_99487_20131211_225837_outLine +BABEL_OP2_304_99516_20131202_003142_inLine +BABEL_OP2_304_99594_20140107_024518_inLine +BABEL_OP2_304_99594_20140107_024518_outLine +BABEL_OP2_304_99813_20140115_223643_inLine +BABEL_OP2_304_99813_20140115_223643_outLine +BABEL_OP2_304_99887_20140220_032712_inLine +BABEL_OP2_304_99887_20140220_032712_outLine +BABEL_OP2_304_99955_20140303_223109_inLine +BABEL_OP2_304_99955_20140303_223109_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/dev.2h.list b/egs/babel/s5d/conf/lists/305-guarani/dev.2h.list new file mode 100644 index 00000000000..4e8210eeac3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/dev.2h.list @@ -0,0 +1,124 @@ +BABEL_OP3_305_13483_20150218_082518_inLine +BABEL_OP3_305_13483_20150218_082518_outLine +BABEL_OP3_305_18992_20140612_060247_inLine +BABEL_OP3_305_18992_20140612_060247_outLine +BABEL_OP3_305_20721_20150114_090748_inLine +BABEL_OP3_305_20721_20150114_090748_outLine +BABEL_OP3_305_21004_20150217_083755_inLine +BABEL_OP3_305_21004_20150217_083755_outLine +BABEL_OP3_305_21624_20150222_054542_inLine +BABEL_OP3_305_21624_20150222_054542_outLine +BABEL_OP3_305_22034_20141017_000534_inLine +BABEL_OP3_305_22034_20141017_000534_outLine +BABEL_OP3_305_22288_20140611_014728_inLine +BABEL_OP3_305_22288_20140611_014728_outLine +BABEL_OP3_305_22446_20140619_021336_inLine +BABEL_OP3_305_22446_20140619_021336_outLine +BABEL_OP3_305_23006_20140807_062702_inLine +BABEL_OP3_305_23006_20140807_062702_outLine +BABEL_OP3_305_23239_20150208_054506_inLine +BABEL_OP3_305_23239_20150208_054506_outLine +BABEL_OP3_305_24253_20150219_085207_inLine +BABEL_OP3_305_24253_20150219_085207_outLine +BABEL_OP3_305_27046_20140614_013755_inLine +BABEL_OP3_305_27046_20140614_013755_outLine +BABEL_OP3_305_30645_20140619_062447_inLine +BABEL_OP3_305_30645_20140619_062447_outLine +BABEL_OP3_305_32097_20140615_023706_inLine +BABEL_OP3_305_32097_20140615_023706_outLine +BABEL_OP3_305_32169_20140612_043749_inLine +BABEL_OP3_305_32169_20140612_043749_outLine +BABEL_OP3_305_34208_20140612_034755_inLine +BABEL_OP3_305_34208_20140612_034755_outLine +BABEL_OP3_305_37064_20140917_032644_inLine +BABEL_OP3_305_37064_20140917_032644_outLine +BABEL_OP3_305_38963_20140611_064935_inLine +BABEL_OP3_305_38963_20140611_064935_outLine +BABEL_OP3_305_39307_20140823_040640_inLine +BABEL_OP3_305_39307_20140823_040640_outLine +BABEL_OP3_305_39555_20141022_235815_inLine +BABEL_OP3_305_39555_20141022_235815_outLine +BABEL_OP3_305_39555_20141023_010258_inLine +BABEL_OP3_305_39555_20141023_010258_outLine +BABEL_OP3_305_41685_20150320_083024_inLine +BABEL_OP3_305_41685_20150320_083024_outLine +BABEL_OP3_305_43395_20150303_092614_inLine +BABEL_OP3_305_43395_20150303_092614_outLine +BABEL_OP3_305_44619_20140621_050143_inLine +BABEL_OP3_305_44619_20140621_050143_outLine +BABEL_OP3_305_45235_20141022_025027_inLine +BABEL_OP3_305_45235_20141022_025027_outLine +BABEL_OP3_305_46169_20150122_044028_inLine +BABEL_OP3_305_46169_20150122_044028_outLine +BABEL_OP3_305_46389_20141017_013950_inLine +BABEL_OP3_305_46389_20141017_013950_outLine +BABEL_OP3_305_46550_20140906_022304_inLine +BABEL_OP3_305_46550_20140906_022304_outLine +BABEL_OP3_305_46550_20140906_023533_inLine +BABEL_OP3_305_46550_20140906_023533_outLine +BABEL_OP3_305_46808_20140613_063242_inLine +BABEL_OP3_305_46808_20140613_063242_outLine +BABEL_OP3_305_47283_20140827_041341_inLine +BABEL_OP3_305_47283_20140827_041341_outLine +BABEL_OP3_305_49870_20140612_044921_inLine +BABEL_OP3_305_49870_20140612_044921_outLine +BABEL_OP3_305_50090_20150206_002321_inLine +BABEL_OP3_305_50090_20150206_002321_outLine +BABEL_OP3_305_50810_20140619_063147_inLine +BABEL_OP3_305_50810_20140619_063147_outLine +BABEL_OP3_305_50962_20140621_015129_inLine +BABEL_OP3_305_50962_20140621_015129_outLine +BABEL_OP3_305_51156_20140613_063549_inLine +BABEL_OP3_305_51156_20140613_063549_outLine +BABEL_OP3_305_52717_20140619_062206_inLine +BABEL_OP3_305_52717_20140619_062206_outLine +BABEL_OP3_305_53441_20140612_055846_inLine +BABEL_OP3_305_53441_20140612_055846_outLine +BABEL_OP3_305_56019_20150221_084856_inLine +BABEL_OP3_305_56019_20150221_084856_outLine +BABEL_OP3_305_58107_20150201_050424_inLine +BABEL_OP3_305_58107_20150201_050424_outLine +BABEL_OP3_305_58717_20150201_022141_inLine +BABEL_OP3_305_58717_20150201_022141_outLine +BABEL_OP3_305_61971_20150328_064233_inLine +BABEL_OP3_305_61971_20150328_064233_outLine +BABEL_OP3_305_66305_20150220_030810_inLine +BABEL_OP3_305_66305_20150220_030810_outLine +BABEL_OP3_305_67659_20140808_040651_inLine +BABEL_OP3_305_67659_20140808_040651_outLine +BABEL_OP3_305_73430_20150218_080038_inLine +BABEL_OP3_305_73430_20150218_080038_outLine +BABEL_OP3_305_73511_20150213_081754_inLine +BABEL_OP3_305_73511_20150213_081754_outLine +BABEL_OP3_305_76756_20150206_024436_inLine +BABEL_OP3_305_76756_20150206_024436_outLine +BABEL_OP3_305_78161_20150312_093559_inLine +BABEL_OP3_305_78161_20150312_093559_outLine +BABEL_OP3_305_78609_20141021_002844_inLine +BABEL_OP3_305_78609_20141021_002844_outLine +BABEL_OP3_305_81229_20140904_012832_inLine +BABEL_OP3_305_81229_20140904_012832_outLine +BABEL_OP3_305_81287_20150215_053321_inLine +BABEL_OP3_305_81287_20150215_053321_outLine +BABEL_OP3_305_81424_20150213_073659_inLine +BABEL_OP3_305_81424_20150213_073659_outLine +BABEL_OP3_305_84029_20140613_050741_inLine +BABEL_OP3_305_84029_20140613_050741_outLine +BABEL_OP3_305_84541_20140612_075946_inLine +BABEL_OP3_305_84541_20140612_075946_outLine +BABEL_OP3_305_84768_20140619_061958_inLine +BABEL_OP3_305_84768_20140619_061958_outLine +BABEL_OP3_305_86885_20140612_074001_inLine +BABEL_OP3_305_86885_20140612_074001_outLine +BABEL_OP3_305_88686_20140906_002505_inLine +BABEL_OP3_305_88686_20140906_002505_outLine +BABEL_OP3_305_90737_20141020_235210_inLine +BABEL_OP3_305_90737_20141020_235210_outLine +BABEL_OP3_305_91383_20150307_051712_inLine +BABEL_OP3_305_91383_20150307_051712_outLine +BABEL_OP3_305_96446_20140620_020014_inLine +BABEL_OP3_305_96446_20140620_020014_outLine +BABEL_OP3_305_97588_20140806_063029_inLine +BABEL_OP3_305_97588_20140806_063029_outLine +BABEL_OP3_305_97911_20150304_082443_inLine +BABEL_OP3_305_97911_20150304_082443_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/dev.list b/egs/babel/s5d/conf/lists/305-guarani/dev.list new file mode 100644 index 00000000000..4e8210eeac3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/dev.list @@ -0,0 +1,124 @@ +BABEL_OP3_305_13483_20150218_082518_inLine +BABEL_OP3_305_13483_20150218_082518_outLine +BABEL_OP3_305_18992_20140612_060247_inLine +BABEL_OP3_305_18992_20140612_060247_outLine +BABEL_OP3_305_20721_20150114_090748_inLine +BABEL_OP3_305_20721_20150114_090748_outLine +BABEL_OP3_305_21004_20150217_083755_inLine +BABEL_OP3_305_21004_20150217_083755_outLine +BABEL_OP3_305_21624_20150222_054542_inLine +BABEL_OP3_305_21624_20150222_054542_outLine +BABEL_OP3_305_22034_20141017_000534_inLine +BABEL_OP3_305_22034_20141017_000534_outLine +BABEL_OP3_305_22288_20140611_014728_inLine +BABEL_OP3_305_22288_20140611_014728_outLine +BABEL_OP3_305_22446_20140619_021336_inLine +BABEL_OP3_305_22446_20140619_021336_outLine +BABEL_OP3_305_23006_20140807_062702_inLine +BABEL_OP3_305_23006_20140807_062702_outLine +BABEL_OP3_305_23239_20150208_054506_inLine +BABEL_OP3_305_23239_20150208_054506_outLine +BABEL_OP3_305_24253_20150219_085207_inLine +BABEL_OP3_305_24253_20150219_085207_outLine +BABEL_OP3_305_27046_20140614_013755_inLine +BABEL_OP3_305_27046_20140614_013755_outLine +BABEL_OP3_305_30645_20140619_062447_inLine +BABEL_OP3_305_30645_20140619_062447_outLine +BABEL_OP3_305_32097_20140615_023706_inLine +BABEL_OP3_305_32097_20140615_023706_outLine +BABEL_OP3_305_32169_20140612_043749_inLine +BABEL_OP3_305_32169_20140612_043749_outLine +BABEL_OP3_305_34208_20140612_034755_inLine +BABEL_OP3_305_34208_20140612_034755_outLine +BABEL_OP3_305_37064_20140917_032644_inLine +BABEL_OP3_305_37064_20140917_032644_outLine +BABEL_OP3_305_38963_20140611_064935_inLine +BABEL_OP3_305_38963_20140611_064935_outLine +BABEL_OP3_305_39307_20140823_040640_inLine +BABEL_OP3_305_39307_20140823_040640_outLine +BABEL_OP3_305_39555_20141022_235815_inLine +BABEL_OP3_305_39555_20141022_235815_outLine +BABEL_OP3_305_39555_20141023_010258_inLine +BABEL_OP3_305_39555_20141023_010258_outLine +BABEL_OP3_305_41685_20150320_083024_inLine +BABEL_OP3_305_41685_20150320_083024_outLine +BABEL_OP3_305_43395_20150303_092614_inLine +BABEL_OP3_305_43395_20150303_092614_outLine +BABEL_OP3_305_44619_20140621_050143_inLine +BABEL_OP3_305_44619_20140621_050143_outLine +BABEL_OP3_305_45235_20141022_025027_inLine +BABEL_OP3_305_45235_20141022_025027_outLine +BABEL_OP3_305_46169_20150122_044028_inLine +BABEL_OP3_305_46169_20150122_044028_outLine +BABEL_OP3_305_46389_20141017_013950_inLine +BABEL_OP3_305_46389_20141017_013950_outLine +BABEL_OP3_305_46550_20140906_022304_inLine +BABEL_OP3_305_46550_20140906_022304_outLine +BABEL_OP3_305_46550_20140906_023533_inLine +BABEL_OP3_305_46550_20140906_023533_outLine +BABEL_OP3_305_46808_20140613_063242_inLine +BABEL_OP3_305_46808_20140613_063242_outLine +BABEL_OP3_305_47283_20140827_041341_inLine +BABEL_OP3_305_47283_20140827_041341_outLine +BABEL_OP3_305_49870_20140612_044921_inLine +BABEL_OP3_305_49870_20140612_044921_outLine +BABEL_OP3_305_50090_20150206_002321_inLine +BABEL_OP3_305_50090_20150206_002321_outLine +BABEL_OP3_305_50810_20140619_063147_inLine +BABEL_OP3_305_50810_20140619_063147_outLine +BABEL_OP3_305_50962_20140621_015129_inLine +BABEL_OP3_305_50962_20140621_015129_outLine +BABEL_OP3_305_51156_20140613_063549_inLine +BABEL_OP3_305_51156_20140613_063549_outLine +BABEL_OP3_305_52717_20140619_062206_inLine +BABEL_OP3_305_52717_20140619_062206_outLine +BABEL_OP3_305_53441_20140612_055846_inLine +BABEL_OP3_305_53441_20140612_055846_outLine +BABEL_OP3_305_56019_20150221_084856_inLine +BABEL_OP3_305_56019_20150221_084856_outLine +BABEL_OP3_305_58107_20150201_050424_inLine +BABEL_OP3_305_58107_20150201_050424_outLine +BABEL_OP3_305_58717_20150201_022141_inLine +BABEL_OP3_305_58717_20150201_022141_outLine +BABEL_OP3_305_61971_20150328_064233_inLine +BABEL_OP3_305_61971_20150328_064233_outLine +BABEL_OP3_305_66305_20150220_030810_inLine +BABEL_OP3_305_66305_20150220_030810_outLine +BABEL_OP3_305_67659_20140808_040651_inLine +BABEL_OP3_305_67659_20140808_040651_outLine +BABEL_OP3_305_73430_20150218_080038_inLine +BABEL_OP3_305_73430_20150218_080038_outLine +BABEL_OP3_305_73511_20150213_081754_inLine +BABEL_OP3_305_73511_20150213_081754_outLine +BABEL_OP3_305_76756_20150206_024436_inLine +BABEL_OP3_305_76756_20150206_024436_outLine +BABEL_OP3_305_78161_20150312_093559_inLine +BABEL_OP3_305_78161_20150312_093559_outLine +BABEL_OP3_305_78609_20141021_002844_inLine +BABEL_OP3_305_78609_20141021_002844_outLine +BABEL_OP3_305_81229_20140904_012832_inLine +BABEL_OP3_305_81229_20140904_012832_outLine +BABEL_OP3_305_81287_20150215_053321_inLine +BABEL_OP3_305_81287_20150215_053321_outLine +BABEL_OP3_305_81424_20150213_073659_inLine +BABEL_OP3_305_81424_20150213_073659_outLine +BABEL_OP3_305_84029_20140613_050741_inLine +BABEL_OP3_305_84029_20140613_050741_outLine +BABEL_OP3_305_84541_20140612_075946_inLine +BABEL_OP3_305_84541_20140612_075946_outLine +BABEL_OP3_305_84768_20140619_061958_inLine +BABEL_OP3_305_84768_20140619_061958_outLine +BABEL_OP3_305_86885_20140612_074001_inLine +BABEL_OP3_305_86885_20140612_074001_outLine +BABEL_OP3_305_88686_20140906_002505_inLine +BABEL_OP3_305_88686_20140906_002505_outLine +BABEL_OP3_305_90737_20141020_235210_inLine +BABEL_OP3_305_90737_20141020_235210_outLine +BABEL_OP3_305_91383_20150307_051712_inLine +BABEL_OP3_305_91383_20150307_051712_outLine +BABEL_OP3_305_96446_20140620_020014_inLine +BABEL_OP3_305_96446_20140620_020014_outLine +BABEL_OP3_305_97588_20140806_063029_inLine +BABEL_OP3_305_97588_20140806_063029_outLine +BABEL_OP3_305_97911_20150304_082443_inLine +BABEL_OP3_305_97911_20150304_082443_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/eval.list b/egs/babel/s5d/conf/lists/305-guarani/eval.list new file mode 100644 index 00000000000..517ff94f450 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/eval.list @@ -0,0 +1,186 @@ +BABEL_OP3_305_10036_20140807_033554_inLine +BABEL_OP3_305_10036_20140807_033554_outLine +BABEL_OP3_305_10188_20140614_030926_inLine +BABEL_OP3_305_10188_20140614_030926_outLine +BABEL_OP3_305_10482_20150214_093450_inLine +BABEL_OP3_305_10482_20150214_093450_outLine +BABEL_OP3_305_10638_20150318_093445_inLine +BABEL_OP3_305_10638_20150318_093445_outLine +BABEL_OP3_305_11352_20150219_080531_inLine +BABEL_OP3_305_11352_20150219_080531_outLine +BABEL_OP3_305_11528_20150328_055924_inLine +BABEL_OP3_305_11528_20150328_055924_outLine +BABEL_OP3_305_13126_20150222_063432_inLine +BABEL_OP3_305_13126_20150222_063432_outLine +BABEL_OP3_305_13586_20150122_061859_inLine +BABEL_OP3_305_13586_20150122_061859_outLine +BABEL_OP3_305_13744_20140615_004815_inLine +BABEL_OP3_305_13744_20140615_004815_outLine +BABEL_OP3_305_15163_20141021_042732_inLine +BABEL_OP3_305_15163_20141021_042732_outLine +BABEL_OP3_305_16475_20140910_050557_inLine +BABEL_OP3_305_16475_20140910_050557_outLine +BABEL_OP3_305_16726_20140620_054123_inLine +BABEL_OP3_305_16726_20140620_054123_outLine +BABEL_OP3_305_20724_20140612_032059_inLine +BABEL_OP3_305_20724_20140612_032059_outLine +BABEL_OP3_305_22494_20150210_082201_inLine +BABEL_OP3_305_22494_20150210_082201_outLine +BABEL_OP3_305_22643_20150331_062123_inLine +BABEL_OP3_305_22643_20150331_062123_outLine +BABEL_OP3_305_25895_20140611_072609_inLine +BABEL_OP3_305_25895_20140611_072609_outLine +BABEL_OP3_305_26206_20150212_091700_inLine +BABEL_OP3_305_26206_20150212_091700_outLine +BABEL_OP3_305_26388_20140731_014108_inLine +BABEL_OP3_305_26388_20140731_014108_outLine +BABEL_OP3_305_27082_20141016_051726_inLine +BABEL_OP3_305_27082_20141016_051726_outLine +BABEL_OP3_305_28775_20140621_055220_inLine +BABEL_OP3_305_28775_20140621_055220_outLine +BABEL_OP3_305_28945_20140906_010750_inLine +BABEL_OP3_305_28945_20140906_010750_outLine +BABEL_OP3_305_29023_20140821_052317_inLine +BABEL_OP3_305_29023_20140821_052317_outLine +BABEL_OP3_305_29023_20140821_053525_inLine +BABEL_OP3_305_29023_20140821_053525_outLine +BABEL_OP3_305_29765_20150307_085516_inLine +BABEL_OP3_305_29765_20150307_085516_outLine +BABEL_OP3_305_30461_20150310_062851_inLine +BABEL_OP3_305_30461_20150310_062851_outLine +BABEL_OP3_305_30653_20150219_091045_inLine +BABEL_OP3_305_30653_20150219_091045_outLine +BABEL_OP3_305_31484_20150212_074454_inLine +BABEL_OP3_305_31484_20150212_074454_outLine +BABEL_OP3_305_31628_20150208_021858_inLine +BABEL_OP3_305_31628_20150208_021858_outLine +BABEL_OP3_305_32301_20150212_011150_inLine +BABEL_OP3_305_32301_20150212_011150_outLine +BABEL_OP3_305_32328_20150122_041147_inLine +BABEL_OP3_305_32328_20150122_041147_outLine +BABEL_OP3_305_35139_20140822_065230_inLine +BABEL_OP3_305_35139_20140822_065230_outLine +BABEL_OP3_305_35181_20150221_032331_inLine +BABEL_OP3_305_35181_20150221_032331_outLine +BABEL_OP3_305_36505_20141211_012908_inLine +BABEL_OP3_305_36505_20141211_012908_outLine +BABEL_OP3_305_36505_20141211_014026_inLine +BABEL_OP3_305_36505_20141211_014026_outLine +BABEL_OP3_305_37684_20140612_021940_inLine +BABEL_OP3_305_37684_20140612_021940_outLine +BABEL_OP3_305_41174_20140904_033334_inLine +BABEL_OP3_305_41174_20140904_033334_outLine +BABEL_OP3_305_41920_20140618_052053_inLine +BABEL_OP3_305_41920_20140618_052053_outLine +BABEL_OP3_305_42600_20140731_005108_inLine +BABEL_OP3_305_42600_20140731_005108_outLine +BABEL_OP3_305_43788_20150208_044657_inLine +BABEL_OP3_305_43788_20150208_044657_outLine +BABEL_OP3_305_46315_20150214_012323_inLine +BABEL_OP3_305_46315_20150214_012323_outLine +BABEL_OP3_305_46625_20140618_065851_inLine +BABEL_OP3_305_46625_20140618_065851_outLine +BABEL_OP3_305_48758_20150220_092254_inLine +BABEL_OP3_305_48758_20150220_092254_outLine +BABEL_OP3_305_49216_20140615_041916_inLine +BABEL_OP3_305_49216_20140615_041916_outLine +BABEL_OP3_305_49637_20140619_051340_inLine +BABEL_OP3_305_49637_20140619_051340_outLine +BABEL_OP3_305_50175_20140620_063847_inLine +BABEL_OP3_305_50175_20140620_063847_outLine +BABEL_OP3_305_50630_20150211_101833_inLine +BABEL_OP3_305_50630_20150211_101833_outLine +BABEL_OP3_305_51417_20141028_072402_inLine +BABEL_OP3_305_51417_20141028_072402_outLine +BABEL_OP3_305_52438_20140621_014747_inLine +BABEL_OP3_305_52438_20140621_014747_outLine +BABEL_OP3_305_52804_20140822_074104_inLine +BABEL_OP3_305_52804_20140822_074104_outLine +BABEL_OP3_305_53758_20140611_060640_inLine +BABEL_OP3_305_53758_20140611_060640_outLine +BABEL_OP3_305_56468_20150327_024417_inLine +BABEL_OP3_305_56468_20150327_024417_outLine +BABEL_OP3_305_56677_20150226_094545_inLine +BABEL_OP3_305_56677_20150226_094545_outLine +BABEL_OP3_305_58821_20150217_093203_inLine +BABEL_OP3_305_58821_20150217_093203_outLine +BABEL_OP3_305_59163_20140614_065953_inLine +BABEL_OP3_305_59163_20140614_065953_outLine +BABEL_OP3_305_60538_20140619_023839_inLine +BABEL_OP3_305_60538_20140619_023839_outLine +BABEL_OP3_305_60661_20140822_055802_inLine +BABEL_OP3_305_60661_20140822_055802_outLine +BABEL_OP3_305_61011_20140911_035151_inLine +BABEL_OP3_305_61011_20140911_035151_outLine +BABEL_OP3_305_63484_20140614_064915_inLine +BABEL_OP3_305_63484_20140614_064915_outLine +BABEL_OP3_305_66916_20141021_002433_inLine +BABEL_OP3_305_66916_20141021_002433_outLine +BABEL_OP3_305_67152_20150228_091753_inLine +BABEL_OP3_305_67152_20150228_091753_outLine +BABEL_OP3_305_67894_20140614_021409_inLine +BABEL_OP3_305_67894_20140614_021409_outLine +BABEL_OP3_305_69633_20150211_042219_inLine +BABEL_OP3_305_69633_20150211_042219_outLine +BABEL_OP3_305_70386_20140823_041550_inLine +BABEL_OP3_305_70386_20140823_041550_outLine +BABEL_OP3_305_71614_20150220_005206_inLine +BABEL_OP3_305_71614_20150220_005206_outLine +BABEL_OP3_305_72007_20150218_073351_inLine +BABEL_OP3_305_72007_20150218_073351_outLine +BABEL_OP3_305_73072_20140620_003027_inLine +BABEL_OP3_305_73072_20140620_003027_outLine +BABEL_OP3_305_73622_20140731_060846_inLine +BABEL_OP3_305_73622_20140731_060846_outLine +BABEL_OP3_305_75930_20140613_043849_inLine +BABEL_OP3_305_75930_20140613_043849_outLine +BABEL_OP3_305_75930_20140613_045058_inLine +BABEL_OP3_305_75930_20140613_045058_outLine +BABEL_OP3_305_76218_20140912_034653_inLine +BABEL_OP3_305_76218_20140912_034653_outLine +BABEL_OP3_305_77112_20141017_061539_inLine +BABEL_OP3_305_77112_20141017_061539_outLine +BABEL_OP3_305_78398_20140807_031509_inLine +BABEL_OP3_305_78398_20140807_031509_outLine +BABEL_OP3_305_78543_20150307_102417_inLine +BABEL_OP3_305_78543_20150307_102417_outLine +BABEL_OP3_305_78604_20140801_052426_inLine +BABEL_OP3_305_78604_20140801_052426_outLine +BABEL_OP3_305_79107_20150301_081556_inLine +BABEL_OP3_305_79107_20150301_081556_outLine +BABEL_OP3_305_80383_20140612_015419_inLine +BABEL_OP3_305_80383_20140612_015419_outLine +BABEL_OP3_305_81404_20140821_230151_inLine +BABEL_OP3_305_81404_20140821_230151_outLine +BABEL_OP3_305_83775_20140808_011711_inLine +BABEL_OP3_305_83775_20140808_011711_outLine +BABEL_OP3_305_84370_20150301_074935_inLine +BABEL_OP3_305_84370_20150301_074935_outLine +BABEL_OP3_305_84466_20150220_080109_inLine +BABEL_OP3_305_84466_20150220_080109_outLine +BABEL_OP3_305_86676_20150207_083733_inLine +BABEL_OP3_305_86676_20150207_083733_outLine +BABEL_OP3_305_87074_20140821_062711_inLine +BABEL_OP3_305_87074_20140821_062711_outLine +BABEL_OP3_305_87693_20140913_020630_inLine +BABEL_OP3_305_87693_20140913_020630_outLine +BABEL_OP3_305_88372_20150301_060900_inLine +BABEL_OP3_305_88372_20150301_060900_outLine +BABEL_OP3_305_88661_20150207_095906_inLine +BABEL_OP3_305_88661_20150207_095906_outLine +BABEL_OP3_305_88982_20150122_052417_inLine +BABEL_OP3_305_88982_20150122_052417_outLine +BABEL_OP3_305_88988_20150318_090506_inLine +BABEL_OP3_305_88988_20150318_090506_outLine +BABEL_OP3_305_89059_20141028_073001_inLine +BABEL_OP3_305_89059_20141028_073001_outLine +BABEL_OP3_305_89226_20140614_041314_inLine +BABEL_OP3_305_89226_20140614_041314_outLine +BABEL_OP3_305_90935_20140808_010525_inLine +BABEL_OP3_305_90935_20140808_010525_outLine +BABEL_OP3_305_93007_20150311_015743_inLine +BABEL_OP3_305_93007_20150311_015743_outLine +BABEL_OP3_305_95663_20140806_050031_inLine +BABEL_OP3_305_95663_20140806_050031_outLine +BABEL_OP3_305_96910_20140911_043108_inLine +BABEL_OP3_305_96910_20140911_043108_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/sub-train.list b/egs/babel/s5d/conf/lists/305-guarani/sub-train.list new file mode 100644 index 00000000000..ff3a375b9e6 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/sub-train.list @@ -0,0 +1,134 @@ +BABEL_OP3_305_11419_20140620_004343_inLine +BABEL_OP3_305_11419_20140620_004343_outLine +BABEL_OP3_305_12242_20140808_034042_inLine +BABEL_OP3_305_12242_20140808_034042_outLine +BABEL_OP3_305_12242_20140808_035409_inLine +BABEL_OP3_305_12242_20140808_035409_outLine +BABEL_OP3_305_14814_20140621_005436_inLine +BABEL_OP3_305_14814_20140621_005436_outLine +BABEL_OP3_305_14814_20140621_011333_inLine +BABEL_OP3_305_14814_20140621_011333_outLine +BABEL_OP3_305_15926_20150211_090843_inLine +BABEL_OP3_305_15926_20150211_090843_outLine +BABEL_OP3_305_17032_20150213_094305_inLine +BABEL_OP3_305_17032_20150213_094305_outLine +BABEL_OP3_305_17032_20150213_095552_inLine +BABEL_OP3_305_17032_20150213_095552_outLine +BABEL_OP3_305_20454_20140619_022112_inLine +BABEL_OP3_305_20454_20140619_022112_outLine +BABEL_OP3_305_21543_20141018_050405_inLine +BABEL_OP3_305_21543_20141018_050405_outLine +BABEL_OP3_305_21794_20141021_022208_inLine +BABEL_OP3_305_21794_20141021_022208_outLine +BABEL_OP3_305_30180_20140906_231005_inLine +BABEL_OP3_305_30180_20140906_231005_outLine +BABEL_OP3_305_33111_20150228_023906_inLine +BABEL_OP3_305_33111_20150228_023906_outLine +BABEL_OP3_305_33149_20141021_034616_inLine +BABEL_OP3_305_33149_20141021_034616_outLine +BABEL_OP3_305_33251_20150130_021517_inLine +BABEL_OP3_305_33251_20150130_021517_outLine +BABEL_OP3_305_34629_20150327_010455_inLine +BABEL_OP3_305_34629_20150327_010455_outLine +BABEL_OP3_305_35467_20140806_032442_inLine +BABEL_OP3_305_35467_20140806_032442_outLine +BABEL_OP3_305_35706_20150221_093541_inLine +BABEL_OP3_305_35706_20150221_093541_outLine +BABEL_OP3_305_37007_20150331_081658_inLine +BABEL_OP3_305_37007_20150331_081658_outLine +BABEL_OP3_305_38664_20140807_042817_inLine +BABEL_OP3_305_38664_20140807_042817_outLine +BABEL_OP3_305_43368_20140822_071919_inLine +BABEL_OP3_305_43368_20140822_071919_outLine +BABEL_OP3_305_45486_20150331_070439_inLine +BABEL_OP3_305_45486_20150331_070439_outLine +BABEL_OP3_305_46681_20140729_053142_inLine +BABEL_OP3_305_46681_20140729_053142_outLine +BABEL_OP3_305_46688_20140620_060408_inLine +BABEL_OP3_305_46688_20140620_060408_outLine +BABEL_OP3_305_46757_20150211_011836_inLine +BABEL_OP3_305_46757_20150211_011836_outLine +BABEL_OP3_305_46757_20150211_013224_inLine +BABEL_OP3_305_46757_20150211_013224_outLine +BABEL_OP3_305_48844_20140621_034908_inLine +BABEL_OP3_305_48844_20140621_034908_outLine +BABEL_OP3_305_48844_20140621_035628_inLine +BABEL_OP3_305_48844_20140621_035628_outLine +BABEL_OP3_305_49768_20140731_031152_inLine +BABEL_OP3_305_49768_20140731_031152_outLine +BABEL_OP3_305_51015_20150211_235649_inLine +BABEL_OP3_305_51015_20150211_235649_outLine +BABEL_OP3_305_51611_20140619_070031_inLine +BABEL_OP3_305_51611_20140619_070031_outLine +BABEL_OP3_305_51611_20140619_071006_inLine +BABEL_OP3_305_51611_20140619_071006_outLine +BABEL_OP3_305_52725_20150227_111722_inLine +BABEL_OP3_305_52725_20150227_111722_outLine +BABEL_OP3_305_55815_20140612_000452_inLine +BABEL_OP3_305_55815_20140612_000452_outLine +BABEL_OP3_305_55818_20140620_003329_inLine +BABEL_OP3_305_55818_20140620_003329_outLine +BABEL_OP3_305_56198_20140904_224843_inLine +BABEL_OP3_305_56198_20140904_224843_outLine +BABEL_OP3_305_57116_20140618_021028_inLine +BABEL_OP3_305_57116_20140618_021028_outLine +BABEL_OP3_305_57654_20140917_034820_inLine +BABEL_OP3_305_57654_20140917_034820_outLine +BABEL_OP3_305_58061_20150326_103607_inLine +BABEL_OP3_305_58061_20150326_103607_outLine +BABEL_OP3_305_58734_20140620_003259_inLine +BABEL_OP3_305_58734_20140620_003259_outLine +BABEL_OP3_305_62158_20150313_013514_inLine +BABEL_OP3_305_62158_20150313_013514_outLine +BABEL_OP3_305_62734_20140821_221916_inLine +BABEL_OP3_305_62734_20140821_221916_outLine +BABEL_OP3_305_62852_20140618_072924_inLine +BABEL_OP3_305_62852_20140618_072924_outLine +BABEL_OP3_305_65466_20150222_074001_inLine +BABEL_OP3_305_65466_20150222_074001_outLine +BABEL_OP3_305_66967_20140618_044613_inLine +BABEL_OP3_305_66967_20140618_044613_outLine +BABEL_OP3_305_67373_20140822_005349_inLine +BABEL_OP3_305_67373_20140822_005349_outLine +BABEL_OP3_305_67389_20150317_083510_inLine +BABEL_OP3_305_67389_20150317_083510_outLine +BABEL_OP3_305_68068_20150206_100103_inLine +BABEL_OP3_305_68068_20150206_100103_outLine +BABEL_OP3_305_69090_20141018_010121_inLine +BABEL_OP3_305_69090_20141018_010121_outLine +BABEL_OP3_305_70251_20140618_233739_inLine +BABEL_OP3_305_70251_20140618_233739_outLine +BABEL_OP3_305_71333_20140808_025232_inLine +BABEL_OP3_305_71333_20140808_025232_outLine +BABEL_OP3_305_73301_20140808_235747_inLine +BABEL_OP3_305_73301_20140808_235747_outLine +BABEL_OP3_305_77225_20140612_003002_inLine +BABEL_OP3_305_77225_20140612_003002_outLine +BABEL_OP3_305_79028_20140621_005114_inLine +BABEL_OP3_305_79028_20140621_005114_outLine +BABEL_OP3_305_82626_20150307_100633_inLine +BABEL_OP3_305_82626_20150307_100633_outLine +BABEL_OP3_305_83436_20140619_060309_inLine +BABEL_OP3_305_83436_20140619_060309_outLine +BABEL_OP3_305_83935_20150213_091523_inLine +BABEL_OP3_305_83935_20150213_091523_outLine +BABEL_OP3_305_84055_20150221_083133_inLine +BABEL_OP3_305_84055_20150221_083133_outLine +BABEL_OP3_305_84079_20140613_053813_inLine +BABEL_OP3_305_84079_20140613_053813_outLine +BABEL_OP3_305_84605_20140903_033325_inLine +BABEL_OP3_305_84605_20140903_033325_outLine +BABEL_OP3_305_84605_20140903_034415_inLine +BABEL_OP3_305_84605_20140903_034415_outLine +BABEL_OP3_305_86433_20150211_094926_inLine +BABEL_OP3_305_86433_20150211_094926_outLine +BABEL_OP3_305_92941_20140911_002247_inLine +BABEL_OP3_305_92941_20140911_002247_outLine +BABEL_OP3_305_95269_20140912_000910_inLine +BABEL_OP3_305_95269_20140912_000910_outLine +BABEL_OP3_305_96041_20140611_065313_inLine +BABEL_OP3_305_96041_20140611_065313_outLine +BABEL_OP3_305_97220_20150303_234352_inLine +BABEL_OP3_305_97220_20150303_234352_outLine +BABEL_OP3_305_98192_20150306_053152_inLine +BABEL_OP3_305_98192_20150306_053152_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/305-guarani/sub-train.untranscribed.list new file mode 100644 index 00000000000..165c7e95f06 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/sub-train.untranscribed.list @@ -0,0 +1,392 @@ +BABEL_OP3_305_10901_20141017_014336_inLine +BABEL_OP3_305_10901_20141017_014336_outLine +BABEL_OP3_305_11723_20141021_053536_inLine +BABEL_OP3_305_11723_20141021_053536_outLine +BABEL_OP3_305_12851_20140618_061651_inLine +BABEL_OP3_305_12851_20140618_061651_outLine +BABEL_OP3_305_13040_20140621_000510_inLine +BABEL_OP3_305_13040_20140621_000510_outLine +BABEL_OP3_305_13189_20141107_012921_inLine +BABEL_OP3_305_13189_20141107_012921_outLine +BABEL_OP3_305_13664_20140615_011412_inLine +BABEL_OP3_305_13664_20140615_011412_outLine +BABEL_OP3_305_14158_20150207_011013_inLine +BABEL_OP3_305_14158_20150207_011013_outLine +BABEL_OP3_305_15617_20150318_012704_inLine +BABEL_OP3_305_15617_20150318_012704_outLine +BABEL_OP3_305_15638_20150212_081118_inLine +BABEL_OP3_305_15638_20150212_081118_outLine +BABEL_OP3_305_16249_20140615_022748_inLine +BABEL_OP3_305_16249_20140615_022748_outLine +BABEL_OP3_305_16886_20141016_232346_inLine +BABEL_OP3_305_16886_20141016_232346_outLine +BABEL_OP3_305_17115_20150310_055940_inLine +BABEL_OP3_305_17115_20150310_055940_outLine +BABEL_OP3_305_17511_20150118_093132_inLine +BABEL_OP3_305_17511_20150118_093132_outLine +BABEL_OP3_305_17511_20150118_094117_inLine +BABEL_OP3_305_17511_20150118_094117_outLine +BABEL_OP3_305_17881_20150220_094906_inLine +BABEL_OP3_305_17881_20150220_094906_outLine +BABEL_OP3_305_17890_20150212_094355_inLine +BABEL_OP3_305_17890_20150212_094355_outLine +BABEL_OP3_305_17923_20140801_003933_inLine +BABEL_OP3_305_17923_20140801_003933_outLine +BABEL_OP3_305_18037_20140611_044623_inLine +BABEL_OP3_305_18037_20140611_044623_outLine +BABEL_OP3_305_18566_20150219_072100_inLine +BABEL_OP3_305_18566_20150219_072100_outLine +BABEL_OP3_305_19101_20150123_042130_inLine +BABEL_OP3_305_19101_20150123_042130_outLine +BABEL_OP3_305_19101_20150123_043206_inLine +BABEL_OP3_305_19101_20150123_043206_outLine +BABEL_OP3_305_19621_20150122_072624_inLine +BABEL_OP3_305_19621_20150122_072624_outLine +BABEL_OP3_305_20922_20141107_000604_inLine +BABEL_OP3_305_20922_20141107_000604_outLine +BABEL_OP3_305_21581_20140822_033738_inLine +BABEL_OP3_305_21581_20140822_033738_outLine +BABEL_OP3_305_22624_20150215_050752_inLine +BABEL_OP3_305_22624_20150215_050752_outLine +BABEL_OP3_305_22624_20150215_051632_inLine +BABEL_OP3_305_22624_20150215_051632_outLine +BABEL_OP3_305_23190_20140907_002648_inLine +BABEL_OP3_305_23190_20140907_002648_outLine +BABEL_OP3_305_23195_20150328_071332_inLine +BABEL_OP3_305_23195_20150328_071332_outLine +BABEL_OP3_305_23752_20150319_043326_inLine +BABEL_OP3_305_23752_20150319_043326_outLine +BABEL_OP3_305_24323_20141021_014706_inLine +BABEL_OP3_305_24323_20141021_014706_outLine +BABEL_OP3_305_24586_20150227_100127_inLine +BABEL_OP3_305_24586_20150227_100127_outLine +BABEL_OP3_305_24589_20140822_030512_inLine +BABEL_OP3_305_24589_20140822_030512_outLine +BABEL_OP3_305_24924_20150306_061542_inLine +BABEL_OP3_305_24924_20150306_061542_outLine +BABEL_OP3_305_25220_20150311_004737_inLine +BABEL_OP3_305_25220_20150311_004737_outLine +BABEL_OP3_305_25412_20150123_041255_inLine +BABEL_OP3_305_25412_20150123_041255_outLine +BABEL_OP3_305_27042_20150124_044459_inLine +BABEL_OP3_305_27042_20150124_044459_outLine +BABEL_OP3_305_27125_20140618_065021_inLine +BABEL_OP3_305_27125_20140618_065021_outLine +BABEL_OP3_305_28303_20140806_030759_inLine +BABEL_OP3_305_28303_20140806_030759_outLine +BABEL_OP3_305_28477_20141107_050727_inLine +BABEL_OP3_305_28477_20141107_050727_outLine +BABEL_OP3_305_28606_20150213_101119_inLine +BABEL_OP3_305_28606_20150213_101119_outLine +BABEL_OP3_305_29072_20150212_084053_inLine +BABEL_OP3_305_29072_20150212_084053_outLine +BABEL_OP3_305_30280_20150310_080905_inLine +BABEL_OP3_305_30280_20150310_080905_outLine +BABEL_OP3_305_30869_20141030_043630_inLine +BABEL_OP3_305_30869_20141030_043630_outLine +BABEL_OP3_305_31668_20150313_021804_inLine +BABEL_OP3_305_31668_20150313_021804_outLine +BABEL_OP3_305_32708_20140822_052506_inLine +BABEL_OP3_305_32708_20140822_052506_outLine +BABEL_OP3_305_33355_20140619_231328_inLine +BABEL_OP3_305_33355_20140619_231328_outLine +BABEL_OP3_305_33635_20141021_015047_inLine +BABEL_OP3_305_33635_20141021_015047_outLine +BABEL_OP3_305_34145_20150211_103633_inLine +BABEL_OP3_305_34145_20150211_103633_outLine +BABEL_OP3_305_34410_20150319_085843_inLine +BABEL_OP3_305_34410_20150319_085843_outLine +BABEL_OP3_305_35008_20150214_095953_inLine +BABEL_OP3_305_35008_20150214_095953_outLine +BABEL_OP3_305_35609_20150310_091253_inLine +BABEL_OP3_305_35609_20150310_091253_outLine +BABEL_OP3_305_36147_20140612_063038_inLine +BABEL_OP3_305_36147_20140612_063038_outLine +BABEL_OP3_305_37285_20150213_015416_inLine +BABEL_OP3_305_37285_20150213_015416_outLine +BABEL_OP3_305_38741_20140906_040000_inLine +BABEL_OP3_305_38741_20140906_040000_outLine +BABEL_OP3_305_39638_20150328_073733_inLine +BABEL_OP3_305_39638_20150328_073733_outLine +BABEL_OP3_305_39920_20150301_070243_inLine +BABEL_OP3_305_39920_20150301_070243_outLine +BABEL_OP3_305_40092_20140611_040031_inLine +BABEL_OP3_305_40092_20140611_040031_outLine +BABEL_OP3_305_40565_20150210_092106_inLine +BABEL_OP3_305_40565_20150210_092106_outLine +BABEL_OP3_305_41334_20150305_082911_inLine +BABEL_OP3_305_41334_20150305_082911_outLine +BABEL_OP3_305_42231_20150217_080721_inLine +BABEL_OP3_305_42231_20150217_080721_outLine +BABEL_OP3_305_42434_20140822_053733_inLine +BABEL_OP3_305_42434_20140822_053733_outLine +BABEL_OP3_305_42497_20140823_034443_inLine +BABEL_OP3_305_42497_20140823_034443_outLine +BABEL_OP3_305_43789_20141017_015101_inLine +BABEL_OP3_305_43789_20141017_015101_outLine +BABEL_OP3_305_43990_20150312_102420_inLine +BABEL_OP3_305_43990_20150312_102420_outLine +BABEL_OP3_305_44868_20150206_083108_inLine +BABEL_OP3_305_44868_20150206_083108_outLine +BABEL_OP3_305_44961_20140619_013154_inLine +BABEL_OP3_305_44961_20140619_013154_outLine +BABEL_OP3_305_46558_20140905_012017_inLine +BABEL_OP3_305_46558_20140905_012017_outLine +BABEL_OP3_305_46558_20140905_013000_inLine +BABEL_OP3_305_46558_20140905_013000_outLine +BABEL_OP3_305_46589_20150207_091824_inLine +BABEL_OP3_305_46589_20150207_091824_outLine +BABEL_OP3_305_46702_20140619_050719_inLine +BABEL_OP3_305_46702_20140619_050719_outLine +BABEL_OP3_305_47823_20150214_081513_inLine +BABEL_OP3_305_47823_20150214_081513_outLine +BABEL_OP3_305_49641_20140613_041400_inLine +BABEL_OP3_305_49641_20140613_041400_outLine +BABEL_OP3_305_49902_20140809_050813_inLine +BABEL_OP3_305_49902_20140809_050813_outLine +BABEL_OP3_305_50186_20140619_044546_inLine +BABEL_OP3_305_50186_20140619_044546_outLine +BABEL_OP3_305_50186_20140619_045904_inLine +BABEL_OP3_305_50186_20140619_045904_outLine +BABEL_OP3_305_50565_20140612_072129_inLine +BABEL_OP3_305_50565_20140612_072129_outLine +BABEL_OP3_305_50745_20150219_082842_inLine +BABEL_OP3_305_50745_20150219_082842_outLine +BABEL_OP3_305_51819_20150210_085538_inLine +BABEL_OP3_305_51819_20150210_085538_outLine +BABEL_OP3_305_52404_20150208_070706_inLine +BABEL_OP3_305_52404_20150208_070706_outLine +BABEL_OP3_305_52818_20150206_104316_inLine +BABEL_OP3_305_52818_20150206_104316_outLine +BABEL_OP3_305_52854_20140620_010725_inLine +BABEL_OP3_305_52854_20140620_010725_outLine +BABEL_OP3_305_53144_20150220_084533_inLine +BABEL_OP3_305_53144_20150220_084533_outLine +BABEL_OP3_305_54594_20150114_073509_inLine +BABEL_OP3_305_54594_20150114_073509_outLine +BABEL_OP3_305_55042_20140614_022059_inLine +BABEL_OP3_305_55042_20140614_022059_outLine +BABEL_OP3_305_55106_20150221_080452_inLine +BABEL_OP3_305_55106_20150221_080452_outLine +BABEL_OP3_305_57609_20150127_040742_inLine +BABEL_OP3_305_57609_20150127_040742_outLine +BABEL_OP3_305_57935_20150203_072757_inLine +BABEL_OP3_305_57935_20150203_072757_outLine +BABEL_OP3_305_59549_20140620_001253_inLine +BABEL_OP3_305_59549_20140620_001253_outLine +BABEL_OP3_305_59720_20140807_043323_inLine +BABEL_OP3_305_59720_20140807_043323_outLine +BABEL_OP3_305_60115_20150211_025109_inLine +BABEL_OP3_305_60115_20150211_025109_outLine +BABEL_OP3_305_60282_20140612_025229_inLine +BABEL_OP3_305_60282_20140612_025229_outLine +BABEL_OP3_305_60477_20150304_092057_inLine +BABEL_OP3_305_60477_20150304_092057_outLine +BABEL_OP3_305_60626_20141018_012739_inLine +BABEL_OP3_305_60626_20141018_012739_outLine +BABEL_OP3_305_60650_20150331_055502_inLine +BABEL_OP3_305_60650_20150331_055502_outLine +BABEL_OP3_305_60830_20141017_004525_inLine +BABEL_OP3_305_60830_20141017_004525_outLine +BABEL_OP3_305_60830_20141017_053807_inLine +BABEL_OP3_305_60830_20141017_053807_outLine +BABEL_OP3_305_61348_20141017_014818_inLine +BABEL_OP3_305_61348_20141017_014818_outLine +BABEL_OP3_305_61348_20141017_060653_inLine +BABEL_OP3_305_61348_20141017_060653_outLine +BABEL_OP3_305_61873_20150123_024415_inLine +BABEL_OP3_305_61873_20150123_024415_outLine +BABEL_OP3_305_62200_20141017_014602_inLine +BABEL_OP3_305_62200_20141017_014602_outLine +BABEL_OP3_305_62471_20140619_072350_inLine +BABEL_OP3_305_62471_20140619_072350_outLine +BABEL_OP3_305_63084_20150207_074116_inLine +BABEL_OP3_305_63084_20150207_074116_outLine +BABEL_OP3_305_64469_20140620_063122_inLine +BABEL_OP3_305_64469_20140620_063122_outLine +BABEL_OP3_305_64768_20140822_043008_inLine +BABEL_OP3_305_64768_20140822_043008_outLine +BABEL_OP3_305_64902_20150220_102326_inLine +BABEL_OP3_305_64902_20150220_102326_outLine +BABEL_OP3_305_65477_20141016_234600_inLine +BABEL_OP3_305_65477_20141016_234600_outLine +BABEL_OP3_305_65477_20141016_235812_inLine +BABEL_OP3_305_65477_20141016_235812_outLine +BABEL_OP3_305_65692_20150127_044937_inLine +BABEL_OP3_305_65692_20150127_044937_outLine +BABEL_OP3_305_66045_20140822_062953_inLine +BABEL_OP3_305_66045_20140822_062953_outLine +BABEL_OP3_305_66177_20150221_091456_inLine +BABEL_OP3_305_66177_20150221_091456_outLine +BABEL_OP3_305_66975_20140615_024703_inLine +BABEL_OP3_305_66975_20140615_024703_outLine +BABEL_OP3_305_67053_20150312_031258_inLine +BABEL_OP3_305_67053_20150312_031258_outLine +BABEL_OP3_305_67283_20140618_075016_inLine +BABEL_OP3_305_67283_20140618_075016_outLine +BABEL_OP3_305_67842_20140906_014501_inLine +BABEL_OP3_305_67842_20140906_014501_outLine +BABEL_OP3_305_68244_20150208_045135_inLine +BABEL_OP3_305_68244_20150208_045135_outLine +BABEL_OP3_305_68668_20140614_053023_inLine +BABEL_OP3_305_68668_20140614_053023_outLine +BABEL_OP3_305_69574_20140618_231512_inLine +BABEL_OP3_305_69574_20140618_231512_outLine +BABEL_OP3_305_70282_20150127_012555_inLine +BABEL_OP3_305_70282_20150127_012555_outLine +BABEL_OP3_305_70794_20140614_073231_inLine +BABEL_OP3_305_70794_20140614_073231_outLine +BABEL_OP3_305_70986_20150320_092518_inLine +BABEL_OP3_305_70986_20150320_092518_outLine +BABEL_OP3_305_71189_20150227_092723_inLine +BABEL_OP3_305_71189_20150227_092723_outLine +BABEL_OP3_305_71278_20140614_040622_inLine +BABEL_OP3_305_71278_20140614_040622_outLine +BABEL_OP3_305_71282_20141028_054244_inLine +BABEL_OP3_305_71282_20141028_054244_outLine +BABEL_OP3_305_71566_20150217_074338_inLine +BABEL_OP3_305_71566_20150217_074338_outLine +BABEL_OP3_305_72110_20150214_074424_inLine +BABEL_OP3_305_72110_20150214_074424_outLine +BABEL_OP3_305_72903_20140612_021516_inLine +BABEL_OP3_305_72903_20140612_021516_outLine +BABEL_OP3_305_74667_20140904_050532_inLine +BABEL_OP3_305_74667_20140904_050532_outLine +BABEL_OP3_305_74763_20140612_011204_inLine +BABEL_OP3_305_74763_20140612_011204_outLine +BABEL_OP3_305_74921_20150208_081422_inLine +BABEL_OP3_305_74921_20150208_081422_outLine +BABEL_OP3_305_75064_20140621_012128_inLine +BABEL_OP3_305_75064_20140621_012128_outLine +BABEL_OP3_305_75223_20140618_232223_inLine +BABEL_OP3_305_75223_20140618_232223_outLine +BABEL_OP3_305_76126_20150122_051345_inLine +BABEL_OP3_305_76126_20150122_051345_outLine +BABEL_OP3_305_76437_20140615_023448_inLine +BABEL_OP3_305_76437_20140615_023448_outLine +BABEL_OP3_305_77744_20140821_052246_inLine +BABEL_OP3_305_77744_20140821_052246_outLine +BABEL_OP3_305_77909_20140613_035103_inLine +BABEL_OP3_305_77909_20140613_035103_outLine +BABEL_OP3_305_78016_20140821_222210_inLine +BABEL_OP3_305_78016_20140821_222210_outLine +BABEL_OP3_305_79167_20150208_063508_inLine +BABEL_OP3_305_79167_20150208_063508_outLine +BABEL_OP3_305_79571_20150211_095226_inLine +BABEL_OP3_305_79571_20150211_095226_outLine +BABEL_OP3_305_79751_20140821_233858_inLine +BABEL_OP3_305_79751_20140821_233858_outLine +BABEL_OP3_305_80241_20140612_015921_inLine +BABEL_OP3_305_80241_20140612_015921_outLine +BABEL_OP3_305_80577_20150221_073930_inLine +BABEL_OP3_305_80577_20150221_073930_outLine +BABEL_OP3_305_81213_20140822_005322_inLine +BABEL_OP3_305_81213_20140822_005322_outLine +BABEL_OP3_305_82622_20140619_013825_inLine +BABEL_OP3_305_82622_20140619_013825_outLine +BABEL_OP3_305_84061_20140807_063818_inLine +BABEL_OP3_305_84061_20140807_063818_outLine +BABEL_OP3_305_84177_20141021_041721_inLine +BABEL_OP3_305_84177_20141021_041721_outLine +BABEL_OP3_305_84177_20141021_043314_inLine +BABEL_OP3_305_84177_20141021_043314_outLine +BABEL_OP3_305_84327_20150213_084928_inLine +BABEL_OP3_305_84327_20150213_084928_outLine +BABEL_OP3_305_84823_20150122_033347_inLine +BABEL_OP3_305_84823_20150122_033347_outLine +BABEL_OP3_305_84838_20141022_010706_inLine +BABEL_OP3_305_84838_20141022_010706_outLine +BABEL_OP3_305_85047_20150122_030457_inLine +BABEL_OP3_305_85047_20150122_030457_outLine +BABEL_OP3_305_85254_20150303_094409_inLine +BABEL_OP3_305_85254_20150303_094409_outLine +BABEL_OP3_305_85260_20140611_042042_inLine +BABEL_OP3_305_85260_20140611_042042_outLine +BABEL_OP3_305_85322_20140822_031748_inLine +BABEL_OP3_305_85322_20140822_031748_outLine +BABEL_OP3_305_85340_20140826_232921_inLine +BABEL_OP3_305_85340_20140826_232921_outLine +BABEL_OP3_305_85519_20141022_033125_inLine +BABEL_OP3_305_85519_20141022_033125_outLine +BABEL_OP3_305_86100_20140612_024853_inLine +BABEL_OP3_305_86100_20140612_024853_outLine +BABEL_OP3_305_86321_20150218_084806_inLine +BABEL_OP3_305_86321_20150218_084806_outLine +BABEL_OP3_305_86597_20150305_020834_inLine +BABEL_OP3_305_86597_20150305_020834_outLine +BABEL_OP3_305_86830_20141028_051738_inLine +BABEL_OP3_305_86830_20141028_051738_outLine +BABEL_OP3_305_86878_20150115_115301_inLine +BABEL_OP3_305_86878_20150115_115301_outLine +BABEL_OP3_305_87179_20141021_010357_inLine +BABEL_OP3_305_87179_20141021_010357_outLine +BABEL_OP3_305_88669_20150226_082958_inLine +BABEL_OP3_305_88669_20150226_082958_outLine +BABEL_OP3_305_89330_20150305_075359_inLine +BABEL_OP3_305_89330_20150305_075359_outLine +BABEL_OP3_305_89372_20140806_054633_inLine +BABEL_OP3_305_89372_20140806_054633_outLine +BABEL_OP3_305_89650_20140606_064449_inLine +BABEL_OP3_305_89650_20140606_064449_outLine +BABEL_OP3_305_90572_20150221_011508_inLine +BABEL_OP3_305_90572_20150221_011508_outLine +BABEL_OP3_305_90739_20140910_010202_inLine +BABEL_OP3_305_90739_20140910_010202_outLine +BABEL_OP3_305_90739_20140910_011127_inLine +BABEL_OP3_305_90739_20140910_011127_outLine +BABEL_OP3_305_90777_20140910_031558_inLine +BABEL_OP3_305_90777_20140910_031558_outLine +BABEL_OP3_305_90930_20140612_073132_inLine +BABEL_OP3_305_90930_20140612_073132_outLine +BABEL_OP3_305_91252_20140612_013640_inLine +BABEL_OP3_305_91252_20140612_013640_outLine +BABEL_OP3_305_91463_20150204_001924_inLine +BABEL_OP3_305_91463_20150204_001924_outLine +BABEL_OP3_305_91977_20150210_012536_inLine +BABEL_OP3_305_91977_20150210_012536_outLine +BABEL_OP3_305_92281_20150312_104117_inLine +BABEL_OP3_305_92281_20150312_104117_outLine +BABEL_OP3_305_92509_20140620_020408_inLine +BABEL_OP3_305_92509_20140620_020408_outLine +BABEL_OP3_305_92605_20150312_090817_inLine +BABEL_OP3_305_92605_20150312_090817_outLine +BABEL_OP3_305_92740_20150210_020753_inLine +BABEL_OP3_305_92740_20150210_020753_outLine +BABEL_OP3_305_93320_20150305_072620_inLine +BABEL_OP3_305_93320_20150305_072620_outLine +BABEL_OP3_305_93681_20141018_044334_inLine +BABEL_OP3_305_93681_20141018_044334_outLine +BABEL_OP3_305_93861_20150123_004904_inLine +BABEL_OP3_305_93861_20150123_004904_outLine +BABEL_OP3_305_94253_20140902_015125_inLine +BABEL_OP3_305_94253_20140902_015125_outLine +BABEL_OP3_305_94587_20150214_091538_inLine +BABEL_OP3_305_94587_20150214_091538_outLine +BABEL_OP3_305_94713_20150114_082431_inLine +BABEL_OP3_305_94713_20150114_082431_outLine +BABEL_OP3_305_95598_20140615_025323_inLine +BABEL_OP3_305_95598_20140615_025323_outLine +BABEL_OP3_305_95903_20150222_060503_inLine +BABEL_OP3_305_95903_20150222_060503_outLine +BABEL_OP3_305_95942_20150227_105233_inLine +BABEL_OP3_305_95942_20150227_105233_outLine +BABEL_OP3_305_96504_20140808_230422_inLine +BABEL_OP3_305_96504_20140808_230422_outLine +BABEL_OP3_305_96504_20140808_231336_inLine +BABEL_OP3_305_96504_20140808_231336_outLine +BABEL_OP3_305_97772_20140618_074519_inLine +BABEL_OP3_305_97772_20140618_074519_outLine +BABEL_OP3_305_97896_20140904_071346_inLine +BABEL_OP3_305_97896_20140904_071346_outLine +BABEL_OP3_305_98255_20150115_095803_inLine +BABEL_OP3_305_98255_20150115_095803_outLine +BABEL_OP3_305_98255_20150115_101856_inLine +BABEL_OP3_305_98255_20150115_101856_outLine +BABEL_OP3_305_98909_20140730_054930_inLine +BABEL_OP3_305_98909_20140730_054930_outLine +BABEL_OP3_305_98909_20140730_055859_inLine +BABEL_OP3_305_98909_20140730_055859_outLine +BABEL_OP3_305_99289_20150227_102036_inLine +BABEL_OP3_305_99289_20150227_102036_outLine +BABEL_OP3_305_99516_20140620_054149_inLine +BABEL_OP3_305_99516_20140620_054149_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/training.list b/egs/babel/s5d/conf/lists/305-guarani/training.list new file mode 100644 index 00000000000..d191e6ac974 --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/training.list @@ -0,0 +1,526 @@ +BABEL_OP3_305_10901_20141017_014336_inLine +BABEL_OP3_305_10901_20141017_014336_outLine +BABEL_OP3_305_11419_20140620_004343_inLine +BABEL_OP3_305_11419_20140620_004343_outLine +BABEL_OP3_305_11723_20141021_053536_inLine +BABEL_OP3_305_11723_20141021_053536_outLine +BABEL_OP3_305_12242_20140808_034042_inLine +BABEL_OP3_305_12242_20140808_034042_outLine +BABEL_OP3_305_12242_20140808_035409_inLine +BABEL_OP3_305_12242_20140808_035409_outLine +BABEL_OP3_305_12851_20140618_061651_inLine +BABEL_OP3_305_12851_20140618_061651_outLine +BABEL_OP3_305_13040_20140621_000510_inLine +BABEL_OP3_305_13040_20140621_000510_outLine +BABEL_OP3_305_13189_20141107_012921_inLine +BABEL_OP3_305_13189_20141107_012921_outLine +BABEL_OP3_305_13664_20140615_011412_inLine +BABEL_OP3_305_13664_20140615_011412_outLine +BABEL_OP3_305_14158_20150207_011013_inLine +BABEL_OP3_305_14158_20150207_011013_outLine +BABEL_OP3_305_14814_20140621_005436_inLine +BABEL_OP3_305_14814_20140621_005436_outLine +BABEL_OP3_305_14814_20140621_011333_inLine +BABEL_OP3_305_14814_20140621_011333_outLine +BABEL_OP3_305_15617_20150318_012704_inLine +BABEL_OP3_305_15617_20150318_012704_outLine +BABEL_OP3_305_15638_20150212_081118_inLine +BABEL_OP3_305_15638_20150212_081118_outLine +BABEL_OP3_305_15926_20150211_090843_inLine +BABEL_OP3_305_15926_20150211_090843_outLine +BABEL_OP3_305_16249_20140615_022748_inLine +BABEL_OP3_305_16249_20140615_022748_outLine +BABEL_OP3_305_16886_20141016_232346_inLine +BABEL_OP3_305_16886_20141016_232346_outLine +BABEL_OP3_305_17032_20150213_094305_inLine +BABEL_OP3_305_17032_20150213_094305_outLine +BABEL_OP3_305_17032_20150213_095552_inLine +BABEL_OP3_305_17032_20150213_095552_outLine +BABEL_OP3_305_17115_20150310_055940_inLine +BABEL_OP3_305_17115_20150310_055940_outLine +BABEL_OP3_305_17511_20150118_093132_inLine +BABEL_OP3_305_17511_20150118_093132_outLine +BABEL_OP3_305_17511_20150118_094117_inLine +BABEL_OP3_305_17511_20150118_094117_outLine +BABEL_OP3_305_17881_20150220_094906_inLine +BABEL_OP3_305_17881_20150220_094906_outLine +BABEL_OP3_305_17890_20150212_094355_inLine +BABEL_OP3_305_17890_20150212_094355_outLine +BABEL_OP3_305_17923_20140801_003933_inLine +BABEL_OP3_305_17923_20140801_003933_outLine +BABEL_OP3_305_18037_20140611_044623_inLine +BABEL_OP3_305_18037_20140611_044623_outLine +BABEL_OP3_305_18566_20150219_072100_inLine +BABEL_OP3_305_18566_20150219_072100_outLine +BABEL_OP3_305_19101_20150123_042130_inLine +BABEL_OP3_305_19101_20150123_042130_outLine +BABEL_OP3_305_19101_20150123_043206_inLine +BABEL_OP3_305_19101_20150123_043206_outLine +BABEL_OP3_305_19621_20150122_072624_inLine +BABEL_OP3_305_19621_20150122_072624_outLine +BABEL_OP3_305_20454_20140619_022112_inLine +BABEL_OP3_305_20454_20140619_022112_outLine +BABEL_OP3_305_20922_20141107_000604_inLine +BABEL_OP3_305_20922_20141107_000604_outLine +BABEL_OP3_305_21543_20141018_050405_inLine +BABEL_OP3_305_21543_20141018_050405_outLine +BABEL_OP3_305_21581_20140822_033738_inLine +BABEL_OP3_305_21581_20140822_033738_outLine +BABEL_OP3_305_21794_20141021_022208_inLine +BABEL_OP3_305_21794_20141021_022208_outLine +BABEL_OP3_305_22624_20150215_050752_inLine +BABEL_OP3_305_22624_20150215_050752_outLine +BABEL_OP3_305_22624_20150215_051632_inLine +BABEL_OP3_305_22624_20150215_051632_outLine +BABEL_OP3_305_23190_20140907_002648_inLine +BABEL_OP3_305_23190_20140907_002648_outLine +BABEL_OP3_305_23195_20150328_071332_inLine +BABEL_OP3_305_23195_20150328_071332_outLine +BABEL_OP3_305_23752_20150319_043326_inLine +BABEL_OP3_305_23752_20150319_043326_outLine +BABEL_OP3_305_24323_20141021_014706_inLine +BABEL_OP3_305_24323_20141021_014706_outLine +BABEL_OP3_305_24586_20150227_100127_inLine +BABEL_OP3_305_24586_20150227_100127_outLine +BABEL_OP3_305_24589_20140822_030512_inLine +BABEL_OP3_305_24589_20140822_030512_outLine +BABEL_OP3_305_24924_20150306_061542_inLine +BABEL_OP3_305_24924_20150306_061542_outLine +BABEL_OP3_305_25220_20150311_004737_inLine +BABEL_OP3_305_25220_20150311_004737_outLine +BABEL_OP3_305_25412_20150123_041255_inLine +BABEL_OP3_305_25412_20150123_041255_outLine +BABEL_OP3_305_27042_20150124_044459_inLine +BABEL_OP3_305_27042_20150124_044459_outLine +BABEL_OP3_305_27125_20140618_065021_inLine +BABEL_OP3_305_27125_20140618_065021_outLine +BABEL_OP3_305_28303_20140806_030759_inLine +BABEL_OP3_305_28303_20140806_030759_outLine +BABEL_OP3_305_28477_20141107_050727_inLine +BABEL_OP3_305_28477_20141107_050727_outLine +BABEL_OP3_305_28606_20150213_101119_inLine +BABEL_OP3_305_28606_20150213_101119_outLine +BABEL_OP3_305_29072_20150212_084053_inLine +BABEL_OP3_305_29072_20150212_084053_outLine +BABEL_OP3_305_30180_20140906_231005_inLine +BABEL_OP3_305_30180_20140906_231005_outLine +BABEL_OP3_305_30280_20150310_080905_inLine +BABEL_OP3_305_30280_20150310_080905_outLine +BABEL_OP3_305_30869_20141030_043630_inLine +BABEL_OP3_305_30869_20141030_043630_outLine +BABEL_OP3_305_31668_20150313_021804_inLine +BABEL_OP3_305_31668_20150313_021804_outLine +BABEL_OP3_305_32708_20140822_052506_inLine +BABEL_OP3_305_32708_20140822_052506_outLine +BABEL_OP3_305_33111_20150228_023906_inLine +BABEL_OP3_305_33111_20150228_023906_outLine +BABEL_OP3_305_33149_20141021_034616_inLine +BABEL_OP3_305_33149_20141021_034616_outLine +BABEL_OP3_305_33251_20150130_021517_inLine +BABEL_OP3_305_33251_20150130_021517_outLine +BABEL_OP3_305_33355_20140619_231328_inLine +BABEL_OP3_305_33355_20140619_231328_outLine +BABEL_OP3_305_33635_20141021_015047_inLine +BABEL_OP3_305_33635_20141021_015047_outLine +BABEL_OP3_305_34145_20150211_103633_inLine +BABEL_OP3_305_34145_20150211_103633_outLine +BABEL_OP3_305_34410_20150319_085843_inLine +BABEL_OP3_305_34410_20150319_085843_outLine +BABEL_OP3_305_34629_20150327_010455_inLine +BABEL_OP3_305_34629_20150327_010455_outLine +BABEL_OP3_305_35008_20150214_095953_inLine +BABEL_OP3_305_35008_20150214_095953_outLine +BABEL_OP3_305_35467_20140806_032442_inLine +BABEL_OP3_305_35467_20140806_032442_outLine +BABEL_OP3_305_35609_20150310_091253_inLine +BABEL_OP3_305_35609_20150310_091253_outLine +BABEL_OP3_305_35706_20150221_093541_inLine +BABEL_OP3_305_35706_20150221_093541_outLine +BABEL_OP3_305_36147_20140612_063038_inLine +BABEL_OP3_305_36147_20140612_063038_outLine +BABEL_OP3_305_37007_20150331_081658_inLine +BABEL_OP3_305_37007_20150331_081658_outLine +BABEL_OP3_305_37285_20150213_015416_inLine +BABEL_OP3_305_37285_20150213_015416_outLine +BABEL_OP3_305_38664_20140807_042817_inLine +BABEL_OP3_305_38664_20140807_042817_outLine +BABEL_OP3_305_38741_20140906_040000_inLine +BABEL_OP3_305_38741_20140906_040000_outLine +BABEL_OP3_305_39638_20150328_073733_inLine +BABEL_OP3_305_39638_20150328_073733_outLine +BABEL_OP3_305_39920_20150301_070243_inLine +BABEL_OP3_305_39920_20150301_070243_outLine +BABEL_OP3_305_40092_20140611_040031_inLine +BABEL_OP3_305_40092_20140611_040031_outLine +BABEL_OP3_305_40565_20150210_092106_inLine +BABEL_OP3_305_40565_20150210_092106_outLine +BABEL_OP3_305_41334_20150305_082911_inLine +BABEL_OP3_305_41334_20150305_082911_outLine +BABEL_OP3_305_42231_20150217_080721_inLine +BABEL_OP3_305_42231_20150217_080721_outLine +BABEL_OP3_305_42434_20140822_053733_inLine +BABEL_OP3_305_42434_20140822_053733_outLine +BABEL_OP3_305_42497_20140823_034443_inLine +BABEL_OP3_305_42497_20140823_034443_outLine +BABEL_OP3_305_43368_20140822_071919_inLine +BABEL_OP3_305_43368_20140822_071919_outLine +BABEL_OP3_305_43789_20141017_015101_inLine +BABEL_OP3_305_43789_20141017_015101_outLine +BABEL_OP3_305_43990_20150312_102420_inLine +BABEL_OP3_305_43990_20150312_102420_outLine +BABEL_OP3_305_44868_20150206_083108_inLine +BABEL_OP3_305_44868_20150206_083108_outLine +BABEL_OP3_305_44961_20140619_013154_inLine +BABEL_OP3_305_44961_20140619_013154_outLine +BABEL_OP3_305_45486_20150331_070439_inLine +BABEL_OP3_305_45486_20150331_070439_outLine +BABEL_OP3_305_46558_20140905_012017_inLine +BABEL_OP3_305_46558_20140905_012017_outLine +BABEL_OP3_305_46558_20140905_013000_inLine +BABEL_OP3_305_46558_20140905_013000_outLine +BABEL_OP3_305_46589_20150207_091824_inLine +BABEL_OP3_305_46589_20150207_091824_outLine +BABEL_OP3_305_46681_20140729_053142_inLine +BABEL_OP3_305_46681_20140729_053142_outLine +BABEL_OP3_305_46688_20140620_060408_inLine +BABEL_OP3_305_46688_20140620_060408_outLine +BABEL_OP3_305_46702_20140619_050719_inLine +BABEL_OP3_305_46702_20140619_050719_outLine +BABEL_OP3_305_46757_20150211_011836_inLine +BABEL_OP3_305_46757_20150211_011836_outLine +BABEL_OP3_305_46757_20150211_013224_inLine +BABEL_OP3_305_46757_20150211_013224_outLine +BABEL_OP3_305_47823_20150214_081513_inLine +BABEL_OP3_305_47823_20150214_081513_outLine +BABEL_OP3_305_48844_20140621_034908_inLine +BABEL_OP3_305_48844_20140621_034908_outLine +BABEL_OP3_305_48844_20140621_035628_inLine +BABEL_OP3_305_48844_20140621_035628_outLine +BABEL_OP3_305_49641_20140613_041400_inLine +BABEL_OP3_305_49641_20140613_041400_outLine +BABEL_OP3_305_49768_20140731_031152_inLine +BABEL_OP3_305_49768_20140731_031152_outLine +BABEL_OP3_305_49902_20140809_050813_inLine +BABEL_OP3_305_49902_20140809_050813_outLine +BABEL_OP3_305_50186_20140619_044546_inLine +BABEL_OP3_305_50186_20140619_044546_outLine +BABEL_OP3_305_50186_20140619_045904_inLine +BABEL_OP3_305_50186_20140619_045904_outLine +BABEL_OP3_305_50565_20140612_072129_inLine +BABEL_OP3_305_50565_20140612_072129_outLine +BABEL_OP3_305_50745_20150219_082842_inLine +BABEL_OP3_305_50745_20150219_082842_outLine +BABEL_OP3_305_51015_20150211_235649_inLine +BABEL_OP3_305_51015_20150211_235649_outLine +BABEL_OP3_305_51611_20140619_070031_inLine +BABEL_OP3_305_51611_20140619_070031_outLine +BABEL_OP3_305_51611_20140619_071006_inLine +BABEL_OP3_305_51611_20140619_071006_outLine +BABEL_OP3_305_51819_20150210_085538_inLine +BABEL_OP3_305_51819_20150210_085538_outLine +BABEL_OP3_305_52404_20150208_070706_inLine +BABEL_OP3_305_52404_20150208_070706_outLine +BABEL_OP3_305_52725_20150227_111722_inLine +BABEL_OP3_305_52725_20150227_111722_outLine +BABEL_OP3_305_52818_20150206_104316_inLine +BABEL_OP3_305_52818_20150206_104316_outLine +BABEL_OP3_305_52854_20140620_010725_inLine +BABEL_OP3_305_52854_20140620_010725_outLine +BABEL_OP3_305_53144_20150220_084533_inLine +BABEL_OP3_305_53144_20150220_084533_outLine +BABEL_OP3_305_54594_20150114_073509_inLine +BABEL_OP3_305_54594_20150114_073509_outLine +BABEL_OP3_305_55042_20140614_022059_inLine +BABEL_OP3_305_55042_20140614_022059_outLine +BABEL_OP3_305_55106_20150221_080452_inLine +BABEL_OP3_305_55106_20150221_080452_outLine +BABEL_OP3_305_55815_20140612_000452_inLine +BABEL_OP3_305_55815_20140612_000452_outLine +BABEL_OP3_305_55818_20140620_003329_inLine +BABEL_OP3_305_55818_20140620_003329_outLine +BABEL_OP3_305_56198_20140904_224843_inLine +BABEL_OP3_305_56198_20140904_224843_outLine +BABEL_OP3_305_57116_20140618_021028_inLine +BABEL_OP3_305_57116_20140618_021028_outLine +BABEL_OP3_305_57609_20150127_040742_inLine +BABEL_OP3_305_57609_20150127_040742_outLine +BABEL_OP3_305_57654_20140917_034820_inLine +BABEL_OP3_305_57654_20140917_034820_outLine +BABEL_OP3_305_57935_20150203_072757_inLine +BABEL_OP3_305_57935_20150203_072757_outLine +BABEL_OP3_305_58061_20150326_103607_inLine +BABEL_OP3_305_58061_20150326_103607_outLine +BABEL_OP3_305_58734_20140620_003259_inLine +BABEL_OP3_305_58734_20140620_003259_outLine +BABEL_OP3_305_59549_20140620_001253_inLine +BABEL_OP3_305_59549_20140620_001253_outLine +BABEL_OP3_305_59720_20140807_043323_inLine +BABEL_OP3_305_59720_20140807_043323_outLine +BABEL_OP3_305_60115_20150211_025109_inLine +BABEL_OP3_305_60115_20150211_025109_outLine +BABEL_OP3_305_60282_20140612_025229_inLine +BABEL_OP3_305_60282_20140612_025229_outLine +BABEL_OP3_305_60477_20150304_092057_inLine +BABEL_OP3_305_60477_20150304_092057_outLine +BABEL_OP3_305_60626_20141018_012739_inLine +BABEL_OP3_305_60626_20141018_012739_outLine +BABEL_OP3_305_60650_20150331_055502_inLine +BABEL_OP3_305_60650_20150331_055502_outLine +BABEL_OP3_305_60830_20141017_004525_inLine +BABEL_OP3_305_60830_20141017_004525_outLine +BABEL_OP3_305_60830_20141017_053807_inLine +BABEL_OP3_305_60830_20141017_053807_outLine +BABEL_OP3_305_61348_20141017_014818_inLine +BABEL_OP3_305_61348_20141017_014818_outLine +BABEL_OP3_305_61348_20141017_060653_inLine +BABEL_OP3_305_61348_20141017_060653_outLine +BABEL_OP3_305_61873_20150123_024415_inLine +BABEL_OP3_305_61873_20150123_024415_outLine +BABEL_OP3_305_62158_20150313_013514_inLine +BABEL_OP3_305_62158_20150313_013514_outLine +BABEL_OP3_305_62200_20141017_014602_inLine +BABEL_OP3_305_62200_20141017_014602_outLine +BABEL_OP3_305_62471_20140619_072350_inLine +BABEL_OP3_305_62471_20140619_072350_outLine +BABEL_OP3_305_62734_20140821_221916_inLine +BABEL_OP3_305_62734_20140821_221916_outLine +BABEL_OP3_305_62852_20140618_072924_inLine +BABEL_OP3_305_62852_20140618_072924_outLine +BABEL_OP3_305_63084_20150207_074116_inLine +BABEL_OP3_305_63084_20150207_074116_outLine +BABEL_OP3_305_64469_20140620_063122_inLine +BABEL_OP3_305_64469_20140620_063122_outLine +BABEL_OP3_305_64768_20140822_043008_inLine +BABEL_OP3_305_64768_20140822_043008_outLine +BABEL_OP3_305_64902_20150220_102326_inLine +BABEL_OP3_305_64902_20150220_102326_outLine +BABEL_OP3_305_65466_20150222_074001_inLine +BABEL_OP3_305_65466_20150222_074001_outLine +BABEL_OP3_305_65477_20141016_234600_inLine +BABEL_OP3_305_65477_20141016_234600_outLine +BABEL_OP3_305_65477_20141016_235812_inLine +BABEL_OP3_305_65477_20141016_235812_outLine +BABEL_OP3_305_65692_20150127_044937_inLine +BABEL_OP3_305_65692_20150127_044937_outLine +BABEL_OP3_305_66045_20140822_062953_inLine +BABEL_OP3_305_66045_20140822_062953_outLine +BABEL_OP3_305_66177_20150221_091456_inLine +BABEL_OP3_305_66177_20150221_091456_outLine +BABEL_OP3_305_66967_20140618_044613_inLine +BABEL_OP3_305_66967_20140618_044613_outLine +BABEL_OP3_305_66975_20140615_024703_inLine +BABEL_OP3_305_66975_20140615_024703_outLine +BABEL_OP3_305_67053_20150312_031258_inLine +BABEL_OP3_305_67053_20150312_031258_outLine +BABEL_OP3_305_67283_20140618_075016_inLine +BABEL_OP3_305_67283_20140618_075016_outLine +BABEL_OP3_305_67373_20140822_005349_inLine +BABEL_OP3_305_67373_20140822_005349_outLine +BABEL_OP3_305_67389_20150317_083510_inLine +BABEL_OP3_305_67389_20150317_083510_outLine +BABEL_OP3_305_67842_20140906_014501_inLine +BABEL_OP3_305_67842_20140906_014501_outLine +BABEL_OP3_305_68068_20150206_100103_inLine +BABEL_OP3_305_68068_20150206_100103_outLine +BABEL_OP3_305_68244_20150208_045135_inLine +BABEL_OP3_305_68244_20150208_045135_outLine +BABEL_OP3_305_68668_20140614_053023_inLine +BABEL_OP3_305_68668_20140614_053023_outLine +BABEL_OP3_305_69090_20141018_010121_inLine +BABEL_OP3_305_69090_20141018_010121_outLine +BABEL_OP3_305_69574_20140618_231512_inLine +BABEL_OP3_305_69574_20140618_231512_outLine +BABEL_OP3_305_70251_20140618_233739_inLine +BABEL_OP3_305_70251_20140618_233739_outLine +BABEL_OP3_305_70282_20150127_012555_inLine +BABEL_OP3_305_70282_20150127_012555_outLine +BABEL_OP3_305_70794_20140614_073231_inLine +BABEL_OP3_305_70794_20140614_073231_outLine +BABEL_OP3_305_70986_20150320_092518_inLine +BABEL_OP3_305_70986_20150320_092518_outLine +BABEL_OP3_305_71189_20150227_092723_inLine +BABEL_OP3_305_71189_20150227_092723_outLine +BABEL_OP3_305_71278_20140614_040622_inLine +BABEL_OP3_305_71278_20140614_040622_outLine +BABEL_OP3_305_71282_20141028_054244_inLine +BABEL_OP3_305_71282_20141028_054244_outLine +BABEL_OP3_305_71333_20140808_025232_inLine +BABEL_OP3_305_71333_20140808_025232_outLine +BABEL_OP3_305_71566_20150217_074338_inLine +BABEL_OP3_305_71566_20150217_074338_outLine +BABEL_OP3_305_72110_20150214_074424_inLine +BABEL_OP3_305_72110_20150214_074424_outLine +BABEL_OP3_305_72903_20140612_021516_inLine +BABEL_OP3_305_72903_20140612_021516_outLine +BABEL_OP3_305_73301_20140808_235747_inLine +BABEL_OP3_305_73301_20140808_235747_outLine +BABEL_OP3_305_74667_20140904_050532_inLine +BABEL_OP3_305_74667_20140904_050532_outLine +BABEL_OP3_305_74763_20140612_011204_inLine +BABEL_OP3_305_74763_20140612_011204_outLine +BABEL_OP3_305_74921_20150208_081422_inLine +BABEL_OP3_305_74921_20150208_081422_outLine +BABEL_OP3_305_75064_20140621_012128_inLine +BABEL_OP3_305_75064_20140621_012128_outLine +BABEL_OP3_305_75223_20140618_232223_inLine +BABEL_OP3_305_75223_20140618_232223_outLine +BABEL_OP3_305_76126_20150122_051345_inLine +BABEL_OP3_305_76126_20150122_051345_outLine +BABEL_OP3_305_76437_20140615_023448_inLine +BABEL_OP3_305_76437_20140615_023448_outLine +BABEL_OP3_305_77225_20140612_003002_inLine +BABEL_OP3_305_77225_20140612_003002_outLine +BABEL_OP3_305_77744_20140821_052246_inLine +BABEL_OP3_305_77744_20140821_052246_outLine +BABEL_OP3_305_77909_20140613_035103_inLine +BABEL_OP3_305_77909_20140613_035103_outLine +BABEL_OP3_305_78016_20140821_222210_inLine +BABEL_OP3_305_78016_20140821_222210_outLine +BABEL_OP3_305_79028_20140621_005114_inLine +BABEL_OP3_305_79028_20140621_005114_outLine +BABEL_OP3_305_79167_20150208_063508_inLine +BABEL_OP3_305_79167_20150208_063508_outLine +BABEL_OP3_305_79571_20150211_095226_inLine +BABEL_OP3_305_79571_20150211_095226_outLine +BABEL_OP3_305_79751_20140821_233858_inLine +BABEL_OP3_305_79751_20140821_233858_outLine +BABEL_OP3_305_80241_20140612_015921_inLine +BABEL_OP3_305_80241_20140612_015921_outLine +BABEL_OP3_305_80577_20150221_073930_inLine +BABEL_OP3_305_80577_20150221_073930_outLine +BABEL_OP3_305_81213_20140822_005322_inLine +BABEL_OP3_305_81213_20140822_005322_outLine +BABEL_OP3_305_82622_20140619_013825_inLine +BABEL_OP3_305_82622_20140619_013825_outLine +BABEL_OP3_305_82626_20150307_100633_inLine +BABEL_OP3_305_82626_20150307_100633_outLine +BABEL_OP3_305_83436_20140619_060309_inLine +BABEL_OP3_305_83436_20140619_060309_outLine +BABEL_OP3_305_83935_20150213_091523_inLine +BABEL_OP3_305_83935_20150213_091523_outLine +BABEL_OP3_305_84055_20150221_083133_inLine +BABEL_OP3_305_84055_20150221_083133_outLine +BABEL_OP3_305_84061_20140807_063818_inLine +BABEL_OP3_305_84061_20140807_063818_outLine +BABEL_OP3_305_84079_20140613_053813_inLine +BABEL_OP3_305_84079_20140613_053813_outLine +BABEL_OP3_305_84177_20141021_041721_inLine +BABEL_OP3_305_84177_20141021_041721_outLine +BABEL_OP3_305_84177_20141021_043314_inLine +BABEL_OP3_305_84177_20141021_043314_outLine +BABEL_OP3_305_84327_20150213_084928_inLine +BABEL_OP3_305_84327_20150213_084928_outLine +BABEL_OP3_305_84605_20140903_033325_inLine +BABEL_OP3_305_84605_20140903_033325_outLine +BABEL_OP3_305_84605_20140903_034415_inLine +BABEL_OP3_305_84605_20140903_034415_outLine +BABEL_OP3_305_84823_20150122_033347_inLine +BABEL_OP3_305_84823_20150122_033347_outLine +BABEL_OP3_305_84838_20141022_010706_inLine +BABEL_OP3_305_84838_20141022_010706_outLine +BABEL_OP3_305_85047_20150122_030457_inLine +BABEL_OP3_305_85047_20150122_030457_outLine +BABEL_OP3_305_85254_20150303_094409_inLine +BABEL_OP3_305_85254_20150303_094409_outLine +BABEL_OP3_305_85260_20140611_042042_inLine +BABEL_OP3_305_85260_20140611_042042_outLine +BABEL_OP3_305_85322_20140822_031748_inLine +BABEL_OP3_305_85322_20140822_031748_outLine +BABEL_OP3_305_85340_20140826_232921_inLine +BABEL_OP3_305_85340_20140826_232921_outLine +BABEL_OP3_305_85519_20141022_033125_inLine +BABEL_OP3_305_85519_20141022_033125_outLine +BABEL_OP3_305_86100_20140612_024853_inLine +BABEL_OP3_305_86100_20140612_024853_outLine +BABEL_OP3_305_86321_20150218_084806_inLine +BABEL_OP3_305_86321_20150218_084806_outLine +BABEL_OP3_305_86433_20150211_094926_inLine +BABEL_OP3_305_86433_20150211_094926_outLine +BABEL_OP3_305_86597_20150305_020834_inLine +BABEL_OP3_305_86597_20150305_020834_outLine +BABEL_OP3_305_86830_20141028_051738_inLine +BABEL_OP3_305_86830_20141028_051738_outLine +BABEL_OP3_305_86878_20150115_115301_inLine +BABEL_OP3_305_86878_20150115_115301_outLine +BABEL_OP3_305_87179_20141021_010357_inLine +BABEL_OP3_305_87179_20141021_010357_outLine +BABEL_OP3_305_88669_20150226_082958_inLine +BABEL_OP3_305_88669_20150226_082958_outLine +BABEL_OP3_305_89330_20150305_075359_inLine +BABEL_OP3_305_89330_20150305_075359_outLine +BABEL_OP3_305_89372_20140806_054633_inLine +BABEL_OP3_305_89372_20140806_054633_outLine +BABEL_OP3_305_89650_20140606_064449_inLine +BABEL_OP3_305_89650_20140606_064449_outLine +BABEL_OP3_305_90572_20150221_011508_inLine +BABEL_OP3_305_90572_20150221_011508_outLine +BABEL_OP3_305_90739_20140910_010202_inLine +BABEL_OP3_305_90739_20140910_010202_outLine +BABEL_OP3_305_90739_20140910_011127_inLine +BABEL_OP3_305_90739_20140910_011127_outLine +BABEL_OP3_305_90777_20140910_031558_inLine +BABEL_OP3_305_90777_20140910_031558_outLine +BABEL_OP3_305_90930_20140612_073132_inLine +BABEL_OP3_305_90930_20140612_073132_outLine +BABEL_OP3_305_91252_20140612_013640_inLine +BABEL_OP3_305_91252_20140612_013640_outLine +BABEL_OP3_305_91463_20150204_001924_inLine +BABEL_OP3_305_91463_20150204_001924_outLine +BABEL_OP3_305_91977_20150210_012536_inLine +BABEL_OP3_305_91977_20150210_012536_outLine +BABEL_OP3_305_92281_20150312_104117_inLine +BABEL_OP3_305_92281_20150312_104117_outLine +BABEL_OP3_305_92509_20140620_020408_inLine +BABEL_OP3_305_92509_20140620_020408_outLine +BABEL_OP3_305_92605_20150312_090817_inLine +BABEL_OP3_305_92605_20150312_090817_outLine +BABEL_OP3_305_92740_20150210_020753_inLine +BABEL_OP3_305_92740_20150210_020753_outLine +BABEL_OP3_305_92941_20140911_002247_inLine +BABEL_OP3_305_92941_20140911_002247_outLine +BABEL_OP3_305_93320_20150305_072620_inLine +BABEL_OP3_305_93320_20150305_072620_outLine +BABEL_OP3_305_93681_20141018_044334_inLine +BABEL_OP3_305_93681_20141018_044334_outLine +BABEL_OP3_305_93861_20150123_004904_inLine +BABEL_OP3_305_93861_20150123_004904_outLine +BABEL_OP3_305_94253_20140902_015125_inLine +BABEL_OP3_305_94253_20140902_015125_outLine +BABEL_OP3_305_94587_20150214_091538_inLine +BABEL_OP3_305_94587_20150214_091538_outLine +BABEL_OP3_305_94713_20150114_082431_inLine +BABEL_OP3_305_94713_20150114_082431_outLine +BABEL_OP3_305_95269_20140912_000910_inLine +BABEL_OP3_305_95269_20140912_000910_outLine +BABEL_OP3_305_95598_20140615_025323_inLine +BABEL_OP3_305_95598_20140615_025323_outLine +BABEL_OP3_305_95903_20150222_060503_inLine +BABEL_OP3_305_95903_20150222_060503_outLine +BABEL_OP3_305_95942_20150227_105233_inLine +BABEL_OP3_305_95942_20150227_105233_outLine +BABEL_OP3_305_96041_20140611_065313_inLine +BABEL_OP3_305_96041_20140611_065313_outLine +BABEL_OP3_305_96504_20140808_230422_inLine +BABEL_OP3_305_96504_20140808_230422_outLine +BABEL_OP3_305_96504_20140808_231336_inLine +BABEL_OP3_305_96504_20140808_231336_outLine +BABEL_OP3_305_97220_20150303_234352_inLine +BABEL_OP3_305_97220_20150303_234352_outLine +BABEL_OP3_305_97772_20140618_074519_inLine +BABEL_OP3_305_97772_20140618_074519_outLine +BABEL_OP3_305_97896_20140904_071346_inLine +BABEL_OP3_305_97896_20140904_071346_outLine +BABEL_OP3_305_98192_20150306_053152_inLine +BABEL_OP3_305_98192_20150306_053152_outLine +BABEL_OP3_305_98255_20150115_095803_inLine +BABEL_OP3_305_98255_20150115_095803_outLine +BABEL_OP3_305_98255_20150115_101856_inLine +BABEL_OP3_305_98255_20150115_101856_outLine +BABEL_OP3_305_98909_20140730_054930_inLine +BABEL_OP3_305_98909_20140730_054930_outLine +BABEL_OP3_305_98909_20140730_055859_inLine +BABEL_OP3_305_98909_20140730_055859_outLine +BABEL_OP3_305_99289_20150227_102036_inLine +BABEL_OP3_305_99289_20150227_102036_outLine +BABEL_OP3_305_99516_20140620_054149_inLine +BABEL_OP3_305_99516_20140620_054149_outLine diff --git a/egs/babel/s5d/conf/lists/305-guarani/untranscribed-training.list b/egs/babel/s5d/conf/lists/305-guarani/untranscribed-training.list new file mode 100644 index 00000000000..3b4e995995f --- /dev/null +++ b/egs/babel/s5d/conf/lists/305-guarani/untranscribed-training.list @@ -0,0 +1,525 @@ +BABEL_OP3_305_10002_20150327_045715_inLine +BABEL_OP3_305_10002_20150327_045715_outLine +BABEL_OP3_305_12846_20150711_092831_inLine +BABEL_OP3_305_12846_20150711_092831_outLine +BABEL_OP3_305_13561_20150122_000259_inLine +BABEL_OP3_305_13561_20150122_000259_outLine +BABEL_OP3_305_13792_20140619_010014_inLine +BABEL_OP3_305_13792_20140619_010014_outLine +BABEL_OP3_305_13909_20150709_071634_inLine +BABEL_OP3_305_13909_20150709_071634_outLine +BABEL_OP3_305_13929_20150429_060818_inLine +BABEL_OP3_305_13929_20150429_060818_outLine +BABEL_OP3_305_14179_20150211_104346_inLine +BABEL_OP3_305_14537_20150507_004514_inLine +BABEL_OP3_305_14537_20150507_004514_outLine +BABEL_OP3_305_14560_20150208_054722_inLine +BABEL_OP3_305_14575_20150501_043914_inLine +BABEL_OP3_305_14575_20150501_043914_outLine +BABEL_OP3_305_14807_20150124_062928_inLine +BABEL_OP3_305_14807_20150124_062928_outLine +BABEL_OP3_305_14875_20140808_063210_inLine +BABEL_OP3_305_14972_20150123_045130_inLine +BABEL_OP3_305_14972_20150123_045130_outLine +BABEL_OP3_305_15324_20150226_034700_inLine +BABEL_OP3_305_15324_20150226_034700_outLine +BABEL_OP3_305_15382_20150211_004401_inLine +BABEL_OP3_305_15382_20150211_004401_outLine +BABEL_OP3_305_15466_20150319_020617_inLine +BABEL_OP3_305_15466_20150319_020617_outLine +BABEL_OP3_305_15702_20150207_022910_inLine +BABEL_OP3_305_15869_20140613_063410_inLine +BABEL_OP3_305_15869_20140613_063410_outLine +BABEL_OP3_305_15985_20150712_053914_inLine +BABEL_OP3_305_15985_20150712_053914_outLine +BABEL_OP3_305_16056_20140618_060252_inLine +BABEL_OP3_305_16056_20140618_060252_outLine +BABEL_OP3_305_16802_20140613_064802_outLine +BABEL_OP3_305_16838_20150428_014210_inLine +BABEL_OP3_305_16838_20150428_014210_outLine +BABEL_OP3_305_16938_20150127_074437_inLine +BABEL_OP3_305_16938_20150127_074437_outLine +BABEL_OP3_305_17472_20150226_001559_inLine +BABEL_OP3_305_17472_20150226_001559_outLine +BABEL_OP3_305_17520_20150123_072609_inLine +BABEL_OP3_305_17520_20150123_072609_outLine +BABEL_OP3_305_17573_20150225_054303_inLine +BABEL_OP3_305_17573_20150225_054303_outLine +BABEL_OP3_305_17751_20150709_064430_inLine +BABEL_OP3_305_17751_20150709_064430_outLine +BABEL_OP3_305_19545_20150224_095516_inLine +BABEL_OP3_305_19545_20150224_095516_outLine +BABEL_OP3_305_19589_20150605_040559_inLine +BABEL_OP3_305_19589_20150605_040559_outLine +BABEL_OP3_305_19722_20140620_011143_inLine +BABEL_OP3_305_19722_20140620_011143_outLine +BABEL_OP3_305_19722_20140620_012427_inLine +BABEL_OP3_305_19722_20140620_012427_outLine +BABEL_OP3_305_20738_20150303_004715_inLine +BABEL_OP3_305_20738_20150303_004715_outLine +BABEL_OP3_305_21029_20140823_005012_outLine +BABEL_OP3_305_21426_20150317_013855_inLine +BABEL_OP3_305_21426_20150317_013855_outLine +BABEL_OP3_305_22170_20150219_024431_inLine +BABEL_OP3_305_22170_20150219_024431_outLine +BABEL_OP3_305_23395_20150124_020906_inLine +BABEL_OP3_305_23395_20150124_020906_outLine +BABEL_OP3_305_24037_20150408_020032_inLine +BABEL_OP3_305_24037_20150408_020032_outLine +BABEL_OP3_305_24270_20150127_065231_inLine +BABEL_OP3_305_24270_20150127_065231_outLine +BABEL_OP3_305_24648_20150720_024919_inLine +BABEL_OP3_305_24648_20150720_024919_outLine +BABEL_OP3_305_25698_20150713_041848_inLine +BABEL_OP3_305_25698_20150713_041848_outLine +BABEL_OP3_305_26074_20150123_054227_inLine +BABEL_OP3_305_26074_20150123_054227_outLine +BABEL_OP3_305_26507_20150228_085010_inLine +BABEL_OP3_305_26507_20150228_085010_outLine +BABEL_OP3_305_26869_20140611_062738_inLine +BABEL_OP3_305_26869_20140611_062738_outLine +BABEL_OP3_305_26999_20150211_213027_outLine +BABEL_OP3_305_27203_20150203_021148_inLine +BABEL_OP3_305_27203_20150203_021148_outLine +BABEL_OP3_305_28522_20150210_024545_inLine +BABEL_OP3_305_28522_20150210_024545_outLine +BABEL_OP3_305_28595_20150311_092304_inLine +BABEL_OP3_305_28595_20150311_092304_outLine +BABEL_OP3_305_28644_20150501_021643_inLine +BABEL_OP3_305_28644_20150501_021643_outLine +BABEL_OP3_305_28814_20141028_061920_inLine +BABEL_OP3_305_28814_20141028_061920_outLine +BABEL_OP3_305_29039_20150225_033135_inLine +BABEL_OP3_305_29039_20150225_033135_outLine +BABEL_OP3_305_29135_20140620_020910_inLine +BABEL_OP3_305_29135_20140620_020910_outLine +BABEL_OP3_305_29643_20150712_020443_inLine +BABEL_OP3_305_29643_20150712_020443_outLine +BABEL_OP3_305_29911_20150425_022101_inLine +BABEL_OP3_305_29911_20150425_022101_outLine +BABEL_OP3_305_30084_20150711_110851_inLine +BABEL_OP3_305_30084_20150711_110851_outLine +BABEL_OP3_305_30253_20150226_074731_inLine +BABEL_OP3_305_30345_20150801_030841_inLine +BABEL_OP3_305_30345_20150801_030841_outLine +BABEL_OP3_305_30395_20140913_031713_inLine +BABEL_OP3_305_30395_20140913_031713_outLine +BABEL_OP3_305_30395_20140913_033401_inLine +BABEL_OP3_305_30395_20140913_033401_outLine +BABEL_OP3_305_31109_20150201_061030_inLine +BABEL_OP3_305_31131_20150318_083818_inLine +BABEL_OP3_305_31184_20141016_042343_inLine +BABEL_OP3_305_31184_20141016_042343_outLine +BABEL_OP3_305_31490_20140618_043106_inLine +BABEL_OP3_305_31490_20140618_043106_outLine +BABEL_OP3_305_32171_20150313_090240_inLine +BABEL_OP3_305_32171_20150313_090240_outLine +BABEL_OP3_305_32244_20150508_010834_inLine +BABEL_OP3_305_32244_20150508_010834_outLine +BABEL_OP3_305_32630_20150508_025319_inLine +BABEL_OP3_305_32630_20150508_025319_outLine +BABEL_OP3_305_32959_20150218_010038_inLine +BABEL_OP3_305_32959_20150218_010038_outLine +BABEL_OP3_305_32961_20150312_083747_inLine +BABEL_OP3_305_32961_20150312_083747_outLine +BABEL_OP3_305_33216_20150305_093049_outLine +BABEL_OP3_305_34482_20140612_002439_inLine +BABEL_OP3_305_34482_20140612_002439_outLine +BABEL_OP3_305_34688_20140620_051303_inLine +BABEL_OP3_305_34688_20140620_051303_outLine +BABEL_OP3_305_34899_20150708_044950_inLine +BABEL_OP3_305_34899_20150708_044950_outLine +BABEL_OP3_305_34903_20150513_000213_inLine +BABEL_OP3_305_34903_20150513_000213_outLine +BABEL_OP3_305_35838_20150505_025409_inLine +BABEL_OP3_305_35838_20150505_025409_outLine +BABEL_OP3_305_36642_20150529_004314_inLine +BABEL_OP3_305_36642_20150529_004314_outLine +BABEL_OP3_305_37229_20150711_062628_inLine +BABEL_OP3_305_37229_20150711_062628_outLine +BABEL_OP3_305_37776_20141021_051359_inLine +BABEL_OP3_305_37776_20141021_051359_outLine +BABEL_OP3_305_38554_20140618_050525_inLine +BABEL_OP3_305_38554_20140618_050525_outLine +BABEL_OP3_305_38689_20150215_061537_inLine +BABEL_OP3_305_38689_20150215_061537_outLine +BABEL_OP3_305_38750_20150512_033350_outLine +BABEL_OP3_305_38878_20150226_001924_inLine +BABEL_OP3_305_38878_20150226_001924_outLine +BABEL_OP3_305_38979_20150222_070549_inLine +BABEL_OP3_305_38979_20150222_071202_inLine +BABEL_OP3_305_39006_20150305_001413_inLine +BABEL_OP3_305_39006_20150305_001413_outLine +BABEL_OP3_305_40330_20140613_044545_inLine +BABEL_OP3_305_40330_20140613_044545_outLine +BABEL_OP3_305_40648_20150425_034647_inLine +BABEL_OP3_305_40648_20150425_034647_outLine +BABEL_OP3_305_41720_20150327_013143_inLine +BABEL_OP3_305_42029_20141107_005557_inLine +BABEL_OP3_305_42029_20141107_005557_outLine +BABEL_OP3_305_42126_20150428_014342_inLine +BABEL_OP3_305_42126_20150428_014342_outLine +BABEL_OP3_305_42126_20150428_021652_inLine +BABEL_OP3_305_42126_20150428_021652_outLine +BABEL_OP3_305_42619_20150211_044149_inLine +BABEL_OP3_305_42619_20150211_044149_outLine +BABEL_OP3_305_42834_20150212_100155_inLine +BABEL_OP3_305_42834_20150212_100155_outLine +BABEL_OP3_305_42848_20150711_053624_inLine +BABEL_OP3_305_42848_20150711_053624_outLine +BABEL_OP3_305_43157_20150313_015446_inLine +BABEL_OP3_305_43157_20150313_015446_outLine +BABEL_OP3_305_43285_20150210_022647_inLine +BABEL_OP3_305_43323_20150719_100142_inLine +BABEL_OP3_305_43323_20150719_100142_outLine +BABEL_OP3_305_43794_20150712_055921_inLine +BABEL_OP3_305_43794_20150712_055921_outLine +BABEL_OP3_305_44309_20150221_054810_inLine +BABEL_OP3_305_44309_20150221_054810_outLine +BABEL_OP3_305_44681_20150506_011354_inLine +BABEL_OP3_305_44681_20150506_011354_outLine +BABEL_OP3_305_45699_20140621_010650_inLine +BABEL_OP3_305_45699_20140621_010650_outLine +BABEL_OP3_305_45771_20150509_034615_inLine +BABEL_OP3_305_45771_20150509_034615_outLine +BABEL_OP3_305_46974_20150214_020116_inLine +BABEL_OP3_305_46974_20150214_020116_outLine +BABEL_OP3_305_47309_20150409_072623_inLine +BABEL_OP3_305_47309_20150409_072623_outLine +BABEL_OP3_305_47405_20140612_010358_inLine +BABEL_OP3_305_47405_20140612_010358_outLine +BABEL_OP3_305_47451_20150226_004537_inLine +BABEL_OP3_305_47451_20150226_004537_outLine +BABEL_OP3_305_47866_20150221_013305_inLine +BABEL_OP3_305_47866_20150221_013305_outLine +BABEL_OP3_305_47866_20150221_014014_inLine +BABEL_OP3_305_47866_20150221_014014_outLine +BABEL_OP3_305_48016_20150306_064336_inLine +BABEL_OP3_305_48016_20150306_064336_outLine +BABEL_OP3_305_48299_20150325_094035_inLine +BABEL_OP3_305_48299_20150325_094035_outLine +BABEL_OP3_305_49775_20140618_071800_inLine +BABEL_OP3_305_49775_20140618_071800_outLine +BABEL_OP3_305_49912_20150713_052104_inLine +BABEL_OP3_305_49912_20150713_052104_outLine +BABEL_OP3_305_49945_20150507_042152_inLine +BABEL_OP3_305_49945_20150507_042152_outLine +BABEL_OP3_305_50726_20140620_231413_inLine +BABEL_OP3_305_50726_20140620_231413_outLine +BABEL_OP3_305_50779_20150124_073920_inLine +BABEL_OP3_305_50779_20150124_073920_outLine +BABEL_OP3_305_51414_20150508_035339_inLine +BABEL_OP3_305_51414_20150508_035339_outLine +BABEL_OP3_305_52058_20150425_021345_inLine +BABEL_OP3_305_52058_20150425_021345_outLine +BABEL_OP3_305_52070_20150708_053057_inLine +BABEL_OP3_305_52070_20150708_053057_outLine +BABEL_OP3_305_53063_20150227_005949_inLine +BABEL_OP3_305_53063_20150227_005949_outLine +BABEL_OP3_305_54066_20150314_023944_inLine +BABEL_OP3_305_55136_20150720_024100_inLine +BABEL_OP3_305_55136_20150720_024100_outLine +BABEL_OP3_305_56057_20140614_044506_inLine +BABEL_OP3_305_56057_20140614_044506_outLine +BABEL_OP3_305_56326_20150422_010519_inLine +BABEL_OP3_305_56326_20150422_010519_outLine +BABEL_OP3_305_56345_20150327_043440_inLine +BABEL_OP3_305_56465_20150306_050918_inLine +BABEL_OP3_305_56465_20150306_050918_outLine +BABEL_OP3_305_56674_20150501_040501_inLine +BABEL_OP3_305_56674_20150501_040501_outLine +BABEL_OP3_305_56684_20150801_003245_inLine +BABEL_OP3_305_56684_20150801_003245_outLine +BABEL_OP3_305_56951_20150501_022425_inLine +BABEL_OP3_305_56951_20150501_022425_outLine +BABEL_OP3_305_57093_20150122_014446_inLine +BABEL_OP3_305_57093_20150122_021223_inLine +BABEL_OP3_305_57782_20150310_044823_inLine +BABEL_OP3_305_57782_20150310_044823_outLine +BABEL_OP3_305_58047_20150124_055910_inLine +BABEL_OP3_305_58047_20150124_055910_outLine +BABEL_OP3_305_58313_20150124_015438_inLine +BABEL_OP3_305_58313_20150124_015438_outLine +BABEL_OP3_305_58489_20150217_090604_inLine +BABEL_OP3_305_58850_20141017_005516_inLine +BABEL_OP3_305_58850_20141017_010823_inLine +BABEL_OP3_305_59028_20150709_062445_inLine +BABEL_OP3_305_59028_20150709_062445_outLine +BABEL_OP3_305_59028_20150712_043120_inLine +BABEL_OP3_305_59028_20150712_043120_outLine +BABEL_OP3_305_59078_20150127_073310_inLine +BABEL_OP3_305_59078_20150127_073310_outLine +BABEL_OP3_305_59509_20150206_012130_inLine +BABEL_OP3_305_59509_20150206_013211_inLine +BABEL_OP3_305_59747_20140620_004831_inLine +BABEL_OP3_305_59747_20140620_004831_outLine +BABEL_OP3_305_60307_20150310_094538_inLine +BABEL_OP3_305_60307_20150310_094538_outLine +BABEL_OP3_305_60436_20150529_012621_inLine +BABEL_OP3_305_60436_20150529_012621_outLine +BABEL_OP3_305_60458_20150508_023847_inLine +BABEL_OP3_305_60458_20150508_023847_outLine +BABEL_OP3_305_60498_20150508_031033_inLine +BABEL_OP3_305_60498_20150508_031033_outLine +BABEL_OP3_305_60508_20140822_014453_inLine +BABEL_OP3_305_60778_20150425_005047_inLine +BABEL_OP3_305_60778_20150425_005047_outLine +BABEL_OP3_305_60836_20140809_005847_inLine +BABEL_OP3_305_61219_20140730_063954_inLine +BABEL_OP3_305_61219_20140730_063954_outLine +BABEL_OP3_305_61225_20140620_001221_inLine +BABEL_OP3_305_61225_20140620_001221_outLine +BABEL_OP3_305_61438_20150423_020808_inLine +BABEL_OP3_305_61438_20150423_020808_outLine +BABEL_OP3_305_61731_20140621_035703_inLine +BABEL_OP3_305_61731_20140621_035703_outLine +BABEL_OP3_305_61731_20140621_041145_inLine +BABEL_OP3_305_61731_20140621_041145_outLine +BABEL_OP3_305_62323_20140612_010032_inLine +BABEL_OP3_305_62323_20140612_010032_outLine +BABEL_OP3_305_62362_20150712_082552_inLine +BABEL_OP3_305_62362_20150712_082552_outLine +BABEL_OP3_305_62430_20150219_045422_inLine +BABEL_OP3_305_62430_20150219_045422_outLine +BABEL_OP3_305_62545_20150424_004115_inLine +BABEL_OP3_305_62545_20150424_004115_outLine +BABEL_OP3_305_63094_20150712_100827_inLine +BABEL_OP3_305_63094_20150712_100827_outLine +BABEL_OP3_305_63265_20140611_234727_inLine +BABEL_OP3_305_63265_20140611_234727_outLine +BABEL_OP3_305_63265_20140611_235803_inLine +BABEL_OP3_305_63265_20140611_235803_outLine +BABEL_OP3_305_63307_20150122_063418_inLine +BABEL_OP3_305_63307_20150122_063418_outLine +BABEL_OP3_305_63307_20150122_065933_inLine +BABEL_OP3_305_63307_20150122_065933_outLine +BABEL_OP3_305_63309_20150319_003832_inLine +BABEL_OP3_305_63309_20150319_003832_outLine +BABEL_OP3_305_63336_20140614_051945_inLine +BABEL_OP3_305_63336_20140614_051945_outLine +BABEL_OP3_305_63490_20150408_025018_inLine +BABEL_OP3_305_63490_20150408_025018_outLine +BABEL_OP3_305_63490_20150408_025711_inLine +BABEL_OP3_305_63490_20150408_025711_outLine +BABEL_OP3_305_63730_20150305_010517_inLine +BABEL_OP3_305_63730_20150305_010517_outLine +BABEL_OP3_305_63906_20150221_045610_inLine +BABEL_OP3_305_64259_20150719_092713_inLine +BABEL_OP3_305_64259_20150719_092713_outLine +BABEL_OP3_305_65077_20140801_012944_inLine +BABEL_OP3_305_65077_20140801_012944_outLine +BABEL_OP3_305_65561_20150214_033031_inLine +BABEL_OP3_305_65561_20150214_033031_outLine +BABEL_OP3_305_65639_20150428_024614_inLine +BABEL_OP3_305_65639_20150428_024614_outLine +BABEL_OP3_305_66001_20140620_042612_inLine +BABEL_OP3_305_66001_20140620_042612_outLine +BABEL_OP3_305_66361_20150320_085921_inLine +BABEL_OP3_305_66361_20150320_085921_outLine +BABEL_OP3_305_66959_20150225_060511_inLine +BABEL_OP3_305_66959_20150225_060511_outLine +BABEL_OP3_305_66971_20150507_025406_inLine +BABEL_OP3_305_66971_20150507_025406_outLine +BABEL_OP3_305_67085_20150522_035734_inLine +BABEL_OP3_305_67085_20150522_035734_outLine +BABEL_OP3_305_68924_20150203_052345_inLine +BABEL_OP3_305_68924_20150203_052345_outLine +BABEL_OP3_305_69107_20150123_023939_inLine +BABEL_OP3_305_69107_20150123_023939_outLine +BABEL_OP3_305_69474_20150217_095752_inLine +BABEL_OP3_305_69474_20150217_095752_outLine +BABEL_OP3_305_70216_20150418_044143_inLine +BABEL_OP3_305_70216_20150418_044143_outLine +BABEL_OP3_305_70216_20150418_045222_inLine +BABEL_OP3_305_70216_20150418_045222_outLine +BABEL_OP3_305_70343_20150213_010739_inLine +BABEL_OP3_305_70343_20150213_010739_outLine +BABEL_OP3_305_71067_20150206_022645_inLine +BABEL_OP3_305_71067_20150206_022645_outLine +BABEL_OP3_305_71704_20140730_042541_inLine +BABEL_OP3_305_71704_20140730_042541_outLine +BABEL_OP3_305_72040_20140905_002224_inLine +BABEL_OP3_305_72040_20140905_002224_outLine +BABEL_OP3_305_72952_20150712_063306_inLine +BABEL_OP3_305_72952_20150712_063306_outLine +BABEL_OP3_305_73299_20150712_044814_inLine +BABEL_OP3_305_73299_20150712_044814_outLine +BABEL_OP3_305_73305_20150328_030752_inLine +BABEL_OP3_305_73305_20150328_030752_outLine +BABEL_OP3_305_73814_20150207_014107_inLine +BABEL_OP3_305_74226_20150211_232229_inLine +BABEL_OP3_305_74226_20150211_232229_outLine +BABEL_OP3_305_74886_20140620_052822_inLine +BABEL_OP3_305_74886_20140620_052822_outLine +BABEL_OP3_305_75342_20150513_235657_inLine +BABEL_OP3_305_75342_20150513_235657_outLine +BABEL_OP3_305_75366_20150310_042904_inLine +BABEL_OP3_305_75366_20150310_042904_outLine +BABEL_OP3_305_75460_20150711_021713_inLine +BABEL_OP3_305_75460_20150711_021713_outLine +BABEL_OP3_305_76730_20140729_052201_inLine +BABEL_OP3_305_76773_20140823_031314_inLine +BABEL_OP3_305_76773_20140823_031314_outLine +BABEL_OP3_305_76902_20150320_043734_inLine +BABEL_OP3_305_76902_20150320_043734_outLine +BABEL_OP3_305_77730_20140730_051628_inLine +BABEL_OP3_305_77730_20140730_051628_outLine +BABEL_OP3_305_77832_20150317_003741_inLine +BABEL_OP3_305_77832_20150317_003741_outLine +BABEL_OP3_305_78116_20150213_013547_inLine +BABEL_OP3_305_78116_20150213_013547_outLine +BABEL_OP3_305_78194_20140618_010449_inLine +BABEL_OP3_305_78194_20140618_010449_outLine +BABEL_OP3_305_78254_20140801_003005_inLine +BABEL_OP3_305_78254_20140801_003005_outLine +BABEL_OP3_305_78454_20150127_025616_inLine +BABEL_OP3_305_78454_20150127_025616_outLine +BABEL_OP3_305_78511_20150225_034550_inLine +BABEL_OP3_305_78511_20150225_034550_outLine +BABEL_OP3_305_78877_20150428_004749_inLine +BABEL_OP3_305_78877_20150428_004749_outLine +BABEL_OP3_305_79429_20150319_013246_inLine +BABEL_OP3_305_79429_20150319_013246_outLine +BABEL_OP3_305_79660_20150712_042549_inLine +BABEL_OP3_305_79660_20150712_042549_outLine +BABEL_OP3_305_79898_20150307_091426_inLine +BABEL_OP3_305_79898_20150307_091426_outLine +BABEL_OP3_305_79898_20150307_093317_inLine +BABEL_OP3_305_79898_20150307_093317_outLine +BABEL_OP3_305_80559_20140731_042258_inLine +BABEL_OP3_305_80559_20140731_042258_outLine +BABEL_OP3_305_80989_20150712_091615_inLine +BABEL_OP3_305_80989_20150712_091615_outLine +BABEL_OP3_305_81433_20150127_070550_inLine +BABEL_OP3_305_81433_20150127_070550_outLine +BABEL_OP3_305_81810_20150208_075542_inLine +BABEL_OP3_305_81810_20150208_075542_outLine +BABEL_OP3_305_82145_20150301_063108_inLine +BABEL_OP3_305_82145_20150301_063108_outLine +BABEL_OP3_305_82145_20150301_064502_inLine +BABEL_OP3_305_82145_20150301_064502_outLine +BABEL_OP3_305_82425_20140620_053637_inLine +BABEL_OP3_305_82425_20140620_053637_outLine +BABEL_OP3_305_82742_20150124_054325_inLine +BABEL_OP3_305_82863_20141021_023356_inLine +BABEL_OP3_305_82863_20141021_023356_outLine +BABEL_OP3_305_83545_20150605_042852_inLine +BABEL_OP3_305_83545_20150605_042852_outLine +BABEL_OP3_305_83771_20150509_012937_inLine +BABEL_OP3_305_83771_20150509_012937_outLine +BABEL_OP3_305_83771_20150509_013635_inLine +BABEL_OP3_305_83771_20150509_013635_outLine +BABEL_OP3_305_83813_20150429_053518_inLine +BABEL_OP3_305_83813_20150429_053518_outLine +BABEL_OP3_305_84125_20140614_072153_inLine +BABEL_OP3_305_84125_20140614_072153_outLine +BABEL_OP3_305_84583_20150123_062012_inLine +BABEL_OP3_305_84583_20150123_062012_outLine +BABEL_OP3_305_85010_20150327_022501_inLine +BABEL_OP3_305_85010_20150327_022501_outLine +BABEL_OP3_305_85048_20150124_074706_inLine +BABEL_OP3_305_85048_20150124_074706_outLine +BABEL_OP3_305_85246_20150317_090655_inLine +BABEL_OP3_305_85246_20150317_090655_outLine +BABEL_OP3_305_85246_20150317_091545_inLine +BABEL_OP3_305_85246_20150317_091545_outLine +BABEL_OP3_305_85647_20150124_020413_inLine +BABEL_OP3_305_85647_20150124_020413_outLine +BABEL_OP3_305_85647_20150124_021612_inLine +BABEL_OP3_305_85647_20150124_021612_outLine +BABEL_OP3_305_86628_20150709_074216_inLine +BABEL_OP3_305_86628_20150709_074216_outLine +BABEL_OP3_305_86826_20150711_081659_inLine +BABEL_OP3_305_86826_20150711_081659_outLine +BABEL_OP3_305_87629_20150123_042545_inLine +BABEL_OP3_305_87629_20150123_042545_outLine +BABEL_OP3_305_87731_20150720_063702_inLine +BABEL_OP3_305_87731_20150720_063702_outLine +BABEL_OP3_305_87884_20150218_232216_inLine +BABEL_OP3_305_87884_20150218_232216_outLine +BABEL_OP3_305_88445_20150208_035054_inLine +BABEL_OP3_305_88673_20150719_085433_inLine +BABEL_OP3_305_88673_20150719_085433_outLine +BABEL_OP3_305_89203_20150802_011814_inLine +BABEL_OP3_305_89203_20150802_011814_outLine +BABEL_OP3_305_89888_20140730_034633_inLine +BABEL_OP3_305_89888_20140730_034633_outLine +BABEL_OP3_305_90417_20150721_002233_inLine +BABEL_OP3_305_90417_20150721_002233_outLine +BABEL_OP3_305_90740_20150326_035521_inLine +BABEL_OP3_305_90740_20150326_035521_outLine +BABEL_OP3_305_91189_20150708_000127_inLine +BABEL_OP3_305_91189_20150708_000127_outLine +BABEL_OP3_305_91336_20150122_061156_inLine +BABEL_OP3_305_91336_20150122_061156_outLine +BABEL_OP3_305_91411_20150425_040553_inLine +BABEL_OP3_305_91411_20150425_040553_outLine +BABEL_OP3_305_91760_20150508_021256_inLine +BABEL_OP3_305_91760_20150508_021256_outLine +BABEL_OP3_305_91891_20150214_084516_inLine +BABEL_OP3_305_91891_20150214_084516_outLine +BABEL_OP3_305_91930_20150219_015305_inLine +BABEL_OP3_305_91930_20150219_015305_outLine +BABEL_OP3_305_91930_20150219_015722_inLine +BABEL_OP3_305_91930_20150219_015722_outLine +BABEL_OP3_305_92065_20150227_020241_inLine +BABEL_OP3_305_92077_20150528_054240_inLine +BABEL_OP3_305_92077_20150528_054240_outLine +BABEL_OP3_305_92096_20150224_003248_inLine +BABEL_OP3_305_92096_20150224_003248_outLine +BABEL_OP3_305_92252_20140619_005416_inLine +BABEL_OP3_305_92252_20140619_005416_outLine +BABEL_OP3_305_92792_20150220_033116_inLine +BABEL_OP3_305_93222_20150325_030317_inLine +BABEL_OP3_305_93222_20150325_030317_outLine +BABEL_OP3_305_93515_20150528_011902_inLine +BABEL_OP3_305_93515_20150528_011902_outLine +BABEL_OP3_305_93604_20150522_043957_inLine +BABEL_OP3_305_93604_20150522_043957_outLine +BABEL_OP3_305_93632_20150730_105129_inLine +BABEL_OP3_305_93632_20150730_105129_outLine +BABEL_OP3_305_93964_20150122_021516_inLine +BABEL_OP3_305_93964_20150122_021516_outLine +BABEL_OP3_305_93964_20150122_024514_inLine +BABEL_OP3_305_93964_20150122_024514_outLine +BABEL_OP3_305_93964_20150122_025759_inLine +BABEL_OP3_305_93964_20150122_025759_outLine +BABEL_OP3_305_94035_20150429_013519_inLine +BABEL_OP3_305_94035_20150429_013519_outLine +BABEL_OP3_305_94212_20150425_011456_inLine +BABEL_OP3_305_94212_20150425_011456_outLine +BABEL_OP3_305_94442_20150507_034412_inLine +BABEL_OP3_305_94442_20150507_034412_outLine +BABEL_OP3_305_94803_20150317_093455_inLine +BABEL_OP3_305_94803_20150317_093455_outLine +BABEL_OP3_305_94869_20140619_054259_inLine +BABEL_OP3_305_94891_20150720_080832_inLine +BABEL_OP3_305_94891_20150720_080832_outLine +BABEL_OP3_305_94923_20150123_064032_inLine +BABEL_OP3_305_94923_20150123_064032_outLine +BABEL_OP3_305_95028_20150320_013045_inLine +BABEL_OP3_305_95028_20150320_013944_inLine +BABEL_OP3_305_95294_20150207_015416_inLine +BABEL_OP3_305_95294_20150207_020517_inLine +BABEL_OP3_305_95571_20150326_084852_inLine +BABEL_OP3_305_95571_20150326_084852_outLine +BABEL_OP3_305_96405_20140621_013139_inLine +BABEL_OP3_305_96405_20140621_013139_outLine +BABEL_OP3_305_96405_20140621_015225_inLine +BABEL_OP3_305_96405_20140621_015225_outLine +BABEL_OP3_305_96584_20141107_045031_inLine +BABEL_OP3_305_96584_20141107_045031_outLine +BABEL_OP3_305_96808_20150507_011006_inLine +BABEL_OP3_305_96808_20150507_011006_outLine +BABEL_OP3_305_96940_20150320_051125_inLine +BABEL_OP3_305_96940_20150320_051125_outLine +BABEL_OP3_305_97136_20150224_085912_inLine +BABEL_OP3_305_97136_20150224_085912_outLine +BABEL_OP3_305_97731_20150731_083617_inLine +BABEL_OP3_305_97731_20150731_083617_outLine +BABEL_OP3_305_98390_20140619_004932_inLine +BABEL_OP3_305_98390_20140619_004932_outLine +BABEL_OP3_305_99813_20150127_075030_inLine +BABEL_OP3_305_99813_20150127_075030_outLine +BABEL_OP3_305_99887_20141028_055805_inLine +BABEL_OP3_305_99887_20141028_055805_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/dev.2h.list b/egs/babel/s5d/conf/lists/306-igbo/dev.2h.list new file mode 100644 index 00000000000..cf0824db01d --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/dev.2h.list @@ -0,0 +1,136 @@ +BABEL_OP3_306_10036_20140729_233849_inLine +BABEL_OP3_306_10036_20140729_233849_outLine +BABEL_OP3_306_10036_20140729_234612_inLine +BABEL_OP3_306_10036_20140729_234612_outLine +BABEL_OP3_306_11681_20140620_015031_inLine +BABEL_OP3_306_11681_20140620_015031_outLine +BABEL_OP3_306_11681_20140620_020405_inLine +BABEL_OP3_306_11681_20140620_020405_outLine +BABEL_OP3_306_13427_20140810_232413_inLine +BABEL_OP3_306_13427_20140810_232413_outLine +BABEL_OP3_306_13744_20150303_033441_inLine +BABEL_OP3_306_13744_20150303_033441_outLine +BABEL_OP3_306_19722_20150304_045710_inLine +BABEL_OP3_306_19782_20141026_011352_inLine +BABEL_OP3_306_19782_20141026_011352_outLine +BABEL_OP3_306_19818_20140801_211130_inLine +BABEL_OP3_306_19818_20140801_211130_outLine +BABEL_OP3_306_21807_20150310_215245_inLine +BABEL_OP3_306_21807_20150310_215245_outLine +BABEL_OP3_306_23098_20150410_035508_inLine +BABEL_OP3_306_23098_20150410_035508_outLine +BABEL_OP3_306_25961_20140607_021757_inLine +BABEL_OP3_306_25961_20140607_021757_outLine +BABEL_OP3_306_28419_20140606_201307_inLine +BABEL_OP3_306_28419_20140606_201307_outLine +BABEL_OP3_306_29023_20140614_002447_inLine +BABEL_OP3_306_29023_20140614_002447_outLine +BABEL_OP3_306_33497_20140730_031414_inLine +BABEL_OP3_306_33497_20140730_031414_outLine +BABEL_OP3_306_33497_20140803_034655_inLine +BABEL_OP3_306_33497_20140803_034655_outLine +BABEL_OP3_306_34197_20140520_215059_inLine +BABEL_OP3_306_34197_20140520_215059_outLine +BABEL_OP3_306_35420_20140527_001314_inLine +BABEL_OP3_306_35420_20140527_001314_outLine +BABEL_OP3_306_36990_20140803_235016_inLine +BABEL_OP3_306_36990_20140803_235016_outLine +BABEL_OP3_306_36990_20140804_000605_inLine +BABEL_OP3_306_36990_20140804_000605_outLine +BABEL_OP3_306_39744_20140514_001627_inLine +BABEL_OP3_306_39744_20140514_001627_outLine +BABEL_OP3_306_40740_20141030_012619_inLine +BABEL_OP3_306_40740_20141030_012619_outLine +BABEL_OP3_306_44347_20141028_001614_inLine +BABEL_OP3_306_44347_20141028_001614_outLine +BABEL_OP3_306_47882_20140524_204056_inLine +BABEL_OP3_306_47882_20140524_204056_outLine +BABEL_OP3_306_50427_20140805_190819_inLine +BABEL_OP3_306_50427_20140805_190819_outLine +BABEL_OP3_306_50726_20140521_235356_inLine +BABEL_OP3_306_50726_20140521_235356_outLine +BABEL_OP3_306_51417_20141103_210924_inLine +BABEL_OP3_306_51417_20141103_210924_outLine +BABEL_OP3_306_52301_20140607_003158_inLine +BABEL_OP3_306_52301_20140607_003158_outLine +BABEL_OP3_306_53842_20140905_005627_inLine +BABEL_OP3_306_53842_20140905_005627_outLine +BABEL_OP3_306_54530_20141006_030910_inLine +BABEL_OP3_306_54530_20141006_030910_outLine +BABEL_OP3_306_56677_20141007_020945_inLine +BABEL_OP3_306_56677_20141007_020945_outLine +BABEL_OP3_306_57141_20141026_224125_inLine +BABEL_OP3_306_57141_20141026_224125_outLine +BABEL_OP3_306_58107_20140805_204322_inLine +BABEL_OP3_306_58107_20140805_204322_outLine +BABEL_OP3_306_58585_20141028_233305_inLine +BABEL_OP3_306_58585_20141028_233305_outLine +BABEL_OP3_306_59635_20141031_194036_inLine +BABEL_OP3_306_59635_20141031_194036_outLine +BABEL_OP3_306_60508_20140521_055301_inLine +BABEL_OP3_306_60508_20140521_055301_outLine +BABEL_OP3_306_60778_20140527_195205_inLine +BABEL_OP3_306_60778_20140527_195205_outLine +BABEL_OP3_306_63334_20150216_005033_inLine +BABEL_OP3_306_63334_20150216_005033_outLine +BABEL_OP3_306_63490_20140524_215813_inLine +BABEL_OP3_306_63490_20140524_215813_outLine +BABEL_OP3_306_64722_20141223_013811_inLine +BABEL_OP3_306_64722_20141223_013811_outLine +BABEL_OP3_306_66959_20141031_215547_inLine +BABEL_OP3_306_66959_20141031_215547_outLine +BABEL_OP3_306_68289_20141113_024309_inLine +BABEL_OP3_306_68289_20141113_024309_outLine +BABEL_OP3_306_69636_20140804_020846_inLine +BABEL_OP3_306_69636_20140804_020846_outLine +BABEL_OP3_306_71047_20141028_021029_inLine +BABEL_OP3_306_71047_20141028_021029_outLine +BABEL_OP3_306_71460_20150215_025120_inLine +BABEL_OP3_306_71460_20150215_025120_outLine +BABEL_OP3_306_76756_20140803_011009_inLine +BABEL_OP3_306_76756_20140803_011009_outLine +BABEL_OP3_306_76756_20140803_011841_inLine +BABEL_OP3_306_76756_20140803_011841_outLine +BABEL_OP3_306_76756_20140803_012244_inLine +BABEL_OP3_306_76756_20140803_012244_outLine +BABEL_OP3_306_77112_20140609_224704_inLine +BABEL_OP3_306_77112_20140609_224704_outLine +BABEL_OP3_306_77803_20140517_202422_inLine +BABEL_OP3_306_77803_20140517_202422_outLine +BABEL_OP3_306_79451_20140608_012042_inLine +BABEL_OP3_306_79451_20140608_012042_outLine +BABEL_OP3_306_79723_20150331_184104_inLine +BABEL_OP3_306_79723_20150331_184104_outLine +BABEL_OP3_306_79995_20141025_230126_inLine +BABEL_OP3_306_79995_20141025_230126_outLine +BABEL_OP3_306_82145_20141223_031926_inLine +BABEL_OP3_306_82145_20141223_031926_outLine +BABEL_OP3_306_83455_20140804_235008_inLine +BABEL_OP3_306_83455_20140804_235008_outLine +BABEL_OP3_306_83643_20150404_031037_inLine +BABEL_OP3_306_83643_20150404_031037_outLine +BABEL_OP3_306_84079_20150402_221122_inLine +BABEL_OP3_306_84079_20150402_221122_outLine +BABEL_OP3_306_87280_20141026_002639_inLine +BABEL_OP3_306_87280_20141026_002639_outLine +BABEL_OP3_306_87298_20140609_033909_inLine +BABEL_OP3_306_87298_20140609_033909_outLine +BABEL_OP3_306_87313_20140802_000850_inLine +BABEL_OP3_306_87313_20140802_000850_outLine +BABEL_OP3_306_87313_20140802_001509_inLine +BABEL_OP3_306_87313_20140802_001509_outLine +BABEL_OP3_306_87313_20140802_002411_inLine +BABEL_OP3_306_87313_20140802_002411_outLine +BABEL_OP3_306_88925_20141025_235636_inLine +BABEL_OP3_306_88925_20141025_235636_outLine +BABEL_OP3_306_92176_20140803_000102_inLine +BABEL_OP3_306_92176_20140803_000102_outLine +BABEL_OP3_306_94035_20140528_224527_inLine +BABEL_OP3_306_94035_20140528_224527_outLine +BABEL_OP3_306_94212_20140525_012758_inLine +BABEL_OP3_306_94212_20140525_012758_outLine +BABEL_OP3_306_95077_20141031_230550_inLine +BABEL_OP3_306_95077_20141031_230550_outLine +BABEL_OP3_306_95294_20140808_012803_inLine +BABEL_OP3_306_95663_20140513_213124_inLine +BABEL_OP3_306_95663_20140513_213124_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/dev.list b/egs/babel/s5d/conf/lists/306-igbo/dev.list new file mode 100644 index 00000000000..cf0824db01d --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/dev.list @@ -0,0 +1,136 @@ +BABEL_OP3_306_10036_20140729_233849_inLine +BABEL_OP3_306_10036_20140729_233849_outLine +BABEL_OP3_306_10036_20140729_234612_inLine +BABEL_OP3_306_10036_20140729_234612_outLine +BABEL_OP3_306_11681_20140620_015031_inLine +BABEL_OP3_306_11681_20140620_015031_outLine +BABEL_OP3_306_11681_20140620_020405_inLine +BABEL_OP3_306_11681_20140620_020405_outLine +BABEL_OP3_306_13427_20140810_232413_inLine +BABEL_OP3_306_13427_20140810_232413_outLine +BABEL_OP3_306_13744_20150303_033441_inLine +BABEL_OP3_306_13744_20150303_033441_outLine +BABEL_OP3_306_19722_20150304_045710_inLine +BABEL_OP3_306_19782_20141026_011352_inLine +BABEL_OP3_306_19782_20141026_011352_outLine +BABEL_OP3_306_19818_20140801_211130_inLine +BABEL_OP3_306_19818_20140801_211130_outLine +BABEL_OP3_306_21807_20150310_215245_inLine +BABEL_OP3_306_21807_20150310_215245_outLine +BABEL_OP3_306_23098_20150410_035508_inLine +BABEL_OP3_306_23098_20150410_035508_outLine +BABEL_OP3_306_25961_20140607_021757_inLine +BABEL_OP3_306_25961_20140607_021757_outLine +BABEL_OP3_306_28419_20140606_201307_inLine +BABEL_OP3_306_28419_20140606_201307_outLine +BABEL_OP3_306_29023_20140614_002447_inLine +BABEL_OP3_306_29023_20140614_002447_outLine +BABEL_OP3_306_33497_20140730_031414_inLine +BABEL_OP3_306_33497_20140730_031414_outLine +BABEL_OP3_306_33497_20140803_034655_inLine +BABEL_OP3_306_33497_20140803_034655_outLine +BABEL_OP3_306_34197_20140520_215059_inLine +BABEL_OP3_306_34197_20140520_215059_outLine +BABEL_OP3_306_35420_20140527_001314_inLine +BABEL_OP3_306_35420_20140527_001314_outLine +BABEL_OP3_306_36990_20140803_235016_inLine +BABEL_OP3_306_36990_20140803_235016_outLine +BABEL_OP3_306_36990_20140804_000605_inLine +BABEL_OP3_306_36990_20140804_000605_outLine +BABEL_OP3_306_39744_20140514_001627_inLine +BABEL_OP3_306_39744_20140514_001627_outLine +BABEL_OP3_306_40740_20141030_012619_inLine +BABEL_OP3_306_40740_20141030_012619_outLine +BABEL_OP3_306_44347_20141028_001614_inLine +BABEL_OP3_306_44347_20141028_001614_outLine +BABEL_OP3_306_47882_20140524_204056_inLine +BABEL_OP3_306_47882_20140524_204056_outLine +BABEL_OP3_306_50427_20140805_190819_inLine +BABEL_OP3_306_50427_20140805_190819_outLine +BABEL_OP3_306_50726_20140521_235356_inLine +BABEL_OP3_306_50726_20140521_235356_outLine +BABEL_OP3_306_51417_20141103_210924_inLine +BABEL_OP3_306_51417_20141103_210924_outLine +BABEL_OP3_306_52301_20140607_003158_inLine +BABEL_OP3_306_52301_20140607_003158_outLine +BABEL_OP3_306_53842_20140905_005627_inLine +BABEL_OP3_306_53842_20140905_005627_outLine +BABEL_OP3_306_54530_20141006_030910_inLine +BABEL_OP3_306_54530_20141006_030910_outLine +BABEL_OP3_306_56677_20141007_020945_inLine +BABEL_OP3_306_56677_20141007_020945_outLine +BABEL_OP3_306_57141_20141026_224125_inLine +BABEL_OP3_306_57141_20141026_224125_outLine +BABEL_OP3_306_58107_20140805_204322_inLine +BABEL_OP3_306_58107_20140805_204322_outLine +BABEL_OP3_306_58585_20141028_233305_inLine +BABEL_OP3_306_58585_20141028_233305_outLine +BABEL_OP3_306_59635_20141031_194036_inLine +BABEL_OP3_306_59635_20141031_194036_outLine +BABEL_OP3_306_60508_20140521_055301_inLine +BABEL_OP3_306_60508_20140521_055301_outLine +BABEL_OP3_306_60778_20140527_195205_inLine +BABEL_OP3_306_60778_20140527_195205_outLine +BABEL_OP3_306_63334_20150216_005033_inLine +BABEL_OP3_306_63334_20150216_005033_outLine +BABEL_OP3_306_63490_20140524_215813_inLine +BABEL_OP3_306_63490_20140524_215813_outLine +BABEL_OP3_306_64722_20141223_013811_inLine +BABEL_OP3_306_64722_20141223_013811_outLine +BABEL_OP3_306_66959_20141031_215547_inLine +BABEL_OP3_306_66959_20141031_215547_outLine +BABEL_OP3_306_68289_20141113_024309_inLine +BABEL_OP3_306_68289_20141113_024309_outLine +BABEL_OP3_306_69636_20140804_020846_inLine +BABEL_OP3_306_69636_20140804_020846_outLine +BABEL_OP3_306_71047_20141028_021029_inLine +BABEL_OP3_306_71047_20141028_021029_outLine +BABEL_OP3_306_71460_20150215_025120_inLine +BABEL_OP3_306_71460_20150215_025120_outLine +BABEL_OP3_306_76756_20140803_011009_inLine +BABEL_OP3_306_76756_20140803_011009_outLine +BABEL_OP3_306_76756_20140803_011841_inLine +BABEL_OP3_306_76756_20140803_011841_outLine +BABEL_OP3_306_76756_20140803_012244_inLine +BABEL_OP3_306_76756_20140803_012244_outLine +BABEL_OP3_306_77112_20140609_224704_inLine +BABEL_OP3_306_77112_20140609_224704_outLine +BABEL_OP3_306_77803_20140517_202422_inLine +BABEL_OP3_306_77803_20140517_202422_outLine +BABEL_OP3_306_79451_20140608_012042_inLine +BABEL_OP3_306_79451_20140608_012042_outLine +BABEL_OP3_306_79723_20150331_184104_inLine +BABEL_OP3_306_79723_20150331_184104_outLine +BABEL_OP3_306_79995_20141025_230126_inLine +BABEL_OP3_306_79995_20141025_230126_outLine +BABEL_OP3_306_82145_20141223_031926_inLine +BABEL_OP3_306_82145_20141223_031926_outLine +BABEL_OP3_306_83455_20140804_235008_inLine +BABEL_OP3_306_83455_20140804_235008_outLine +BABEL_OP3_306_83643_20150404_031037_inLine +BABEL_OP3_306_83643_20150404_031037_outLine +BABEL_OP3_306_84079_20150402_221122_inLine +BABEL_OP3_306_84079_20150402_221122_outLine +BABEL_OP3_306_87280_20141026_002639_inLine +BABEL_OP3_306_87280_20141026_002639_outLine +BABEL_OP3_306_87298_20140609_033909_inLine +BABEL_OP3_306_87298_20140609_033909_outLine +BABEL_OP3_306_87313_20140802_000850_inLine +BABEL_OP3_306_87313_20140802_000850_outLine +BABEL_OP3_306_87313_20140802_001509_inLine +BABEL_OP3_306_87313_20140802_001509_outLine +BABEL_OP3_306_87313_20140802_002411_inLine +BABEL_OP3_306_87313_20140802_002411_outLine +BABEL_OP3_306_88925_20141025_235636_inLine +BABEL_OP3_306_88925_20141025_235636_outLine +BABEL_OP3_306_92176_20140803_000102_inLine +BABEL_OP3_306_92176_20140803_000102_outLine +BABEL_OP3_306_94035_20140528_224527_inLine +BABEL_OP3_306_94035_20140528_224527_outLine +BABEL_OP3_306_94212_20140525_012758_inLine +BABEL_OP3_306_94212_20140525_012758_outLine +BABEL_OP3_306_95077_20141031_230550_inLine +BABEL_OP3_306_95077_20141031_230550_outLine +BABEL_OP3_306_95294_20140808_012803_inLine +BABEL_OP3_306_95663_20140513_213124_inLine +BABEL_OP3_306_95663_20140513_213124_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/eval.list b/egs/babel/s5d/conf/lists/306-igbo/eval.list new file mode 100644 index 00000000000..c9db48fb2e1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/eval.list @@ -0,0 +1,194 @@ +BABEL_OP3_306_11673_20140513_040551_inLine +BABEL_OP3_306_11673_20140513_040551_outLine +BABEL_OP3_306_12321_20141027_232351_inLine +BABEL_OP3_306_12321_20141027_232351_outLine +BABEL_OP3_306_12635_20141101_005451_inLine +BABEL_OP3_306_12635_20141101_005451_outLine +BABEL_OP3_306_13490_20140802_230433_inLine +BABEL_OP3_306_13490_20140802_230433_outLine +BABEL_OP3_306_13490_20140802_232130_inLine +BABEL_OP3_306_13490_20140802_232130_outLine +BABEL_OP3_306_13586_20140802_035824_inLine +BABEL_OP3_306_13586_20140802_035824_outLine +BABEL_OP3_306_13792_20140531_014010_inLine +BABEL_OP3_306_13792_20140531_014010_outLine +BABEL_OP3_306_14537_20150311_192951_inLine +BABEL_OP3_306_14537_20150311_192951_outLine +BABEL_OP3_306_15730_20140521_222017_inLine +BABEL_OP3_306_15730_20140521_222017_outLine +BABEL_OP3_306_15848_20140510_004027_inLine +BABEL_OP3_306_15848_20140510_004027_outLine +BABEL_OP3_306_18924_20140814_021546_inLine +BABEL_OP3_306_18924_20140814_021546_outLine +BABEL_OP3_306_20916_20140520_205947_inLine +BABEL_OP3_306_20916_20140520_205947_outLine +BABEL_OP3_306_21206_20140621_194701_inLine +BABEL_OP3_306_21206_20140621_194701_outLine +BABEL_OP3_306_22641_20150312_020316_inLine +BABEL_OP3_306_22641_20150312_020316_outLine +BABEL_OP3_306_23628_20140603_213715_inLine +BABEL_OP3_306_23628_20140603_213715_outLine +BABEL_OP3_306_26999_20140729_223316_inLine +BABEL_OP3_306_26999_20140729_223316_outLine +BABEL_OP3_306_28775_20140620_234019_inLine +BABEL_OP3_306_28775_20140620_234019_outLine +BABEL_OP3_306_29135_20140509_234939_inLine +BABEL_OP3_306_29135_20140509_234939_outLine +BABEL_OP3_306_29352_20150316_234927_inLine +BABEL_OP3_306_29352_20150316_234927_outLine +BABEL_OP3_306_30058_20141221_102805_inLine +BABEL_OP3_306_30058_20141221_102805_outLine +BABEL_OP3_306_30345_20141029_013617_inLine +BABEL_OP3_306_30345_20141029_013617_outLine +BABEL_OP3_306_31490_20150416_203824_inLine +BABEL_OP3_306_31490_20150416_203824_outLine +BABEL_OP3_306_32301_20140924_003519_inLine +BABEL_OP3_306_32301_20140924_003519_outLine +BABEL_OP3_306_32328_20141029_221831_inLine +BABEL_OP3_306_32328_20141029_221831_outLine +BABEL_OP3_306_33273_20141016_012203_inLine +BABEL_OP3_306_33273_20141016_012203_outLine +BABEL_OP3_306_34903_20140812_000146_inLine +BABEL_OP3_306_34903_20140812_000146_outLine +BABEL_OP3_306_35788_20150410_005320_inLine +BABEL_OP3_306_35788_20150410_005320_outLine +BABEL_OP3_306_36341_20140509_022205_inLine +BABEL_OP3_306_36341_20140509_022205_outLine +BABEL_OP3_306_36341_20140509_022936_inLine +BABEL_OP3_306_36341_20140509_022936_outLine +BABEL_OP3_306_37064_20140606_222758_inLine +BABEL_OP3_306_37064_20140606_222758_outLine +BABEL_OP3_306_38689_20141009_214007_inLine +BABEL_OP3_306_38689_20141009_214007_outLine +BABEL_OP3_306_39159_20140509_230506_inLine +BABEL_OP3_306_39159_20140509_230506_outLine +BABEL_OP3_306_39927_20150216_011520_inLine +BABEL_OP3_306_39927_20150216_011520_outLine +BABEL_OP3_306_41174_20140730_214115_inLine +BABEL_OP3_306_41174_20140730_214115_outLine +BABEL_OP3_306_41542_20141031_044512_inLine +BABEL_OP3_306_41542_20141031_044512_outLine +BABEL_OP3_306_42834_20140813_025421_inLine +BABEL_OP3_306_42834_20140813_025421_outLine +BABEL_OP3_306_42942_20141010_020223_inLine +BABEL_OP3_306_42942_20141010_020223_outLine +BABEL_OP3_306_43646_20140510_012702_inLine +BABEL_OP3_306_43646_20140510_012702_outLine +BABEL_OP3_306_46333_20150412_023828_inLine +BABEL_OP3_306_46333_20150412_023828_outLine +BABEL_OP3_306_47215_20140714_024322_inLine +BABEL_OP3_306_47215_20140714_024322_outLine +BABEL_OP3_306_48399_20140531_222338_inLine +BABEL_OP3_306_48399_20140531_222338_outLine +BABEL_OP3_306_49216_20140512_234713_inLine +BABEL_OP3_306_49216_20140512_234713_outLine +BABEL_OP3_306_51407_20140808_210301_inLine +BABEL_OP3_306_51407_20140808_210301_outLine +BABEL_OP3_306_51407_20140808_211334_inLine +BABEL_OP3_306_51407_20140808_211334_outLine +BABEL_OP3_306_51955_20140604_224650_inLine +BABEL_OP3_306_51955_20140604_224650_outLine +BABEL_OP3_306_52694_20140811_233144_inLine +BABEL_OP3_306_52694_20140811_233144_outLine +BABEL_OP3_306_53917_20141031_222826_inLine +BABEL_OP3_306_53917_20141031_222826_outLine +BABEL_OP3_306_56429_20140622_011257_inLine +BABEL_OP3_306_56429_20140622_011257_outLine +BABEL_OP3_306_56606_20150403_212810_inLine +BABEL_OP3_306_56606_20150403_212810_outLine +BABEL_OP3_306_56743_20140802_030717_inLine +BABEL_OP3_306_56743_20140802_030717_outLine +BABEL_OP3_306_57035_20150410_033837_inLine +BABEL_OP3_306_57035_20150410_033837_outLine +BABEL_OP3_306_57093_20140728_215709_inLine +BABEL_OP3_306_57093_20140728_215709_outLine +BABEL_OP3_306_57093_20140728_221243_inLine +BABEL_OP3_306_57093_20140728_221243_outLine +BABEL_OP3_306_57093_20140729_003342_inLine +BABEL_OP3_306_57093_20140729_003342_outLine +BABEL_OP3_306_57116_20140517_222852_inLine +BABEL_OP3_306_57116_20140517_222852_outLine +BABEL_OP3_306_59928_20140610_024019_inLine +BABEL_OP3_306_59928_20140610_024019_outLine +BABEL_OP3_306_60706_20140531_003048_inLine +BABEL_OP3_306_60706_20140531_003048_outLine +BABEL_OP3_306_61684_20150420_023032_inLine +BABEL_OP3_306_61684_20150420_023032_outLine +BABEL_OP3_306_62545_20140527_204602_inLine +BABEL_OP3_306_62545_20140527_204602_outLine +BABEL_OP3_306_62835_20140905_002934_inLine +BABEL_OP3_306_62835_20140905_002934_outLine +BABEL_OP3_306_63081_20140509_000544_inLine +BABEL_OP3_306_63081_20140509_000544_outLine +BABEL_OP3_306_63445_20140521_030723_inLine +BABEL_OP3_306_63445_20140521_030723_outLine +BABEL_OP3_306_63481_20140522_195610_inLine +BABEL_OP3_306_63481_20140522_195610_outLine +BABEL_OP3_306_64494_20140605_043852_inLine +BABEL_OP3_306_64494_20140605_043852_outLine +BABEL_OP3_306_66026_20141101_233612_inLine +BABEL_OP3_306_66026_20141101_233612_outLine +BABEL_OP3_306_67283_20140606_231809_inLine +BABEL_OP3_306_67283_20140606_231809_outLine +BABEL_OP3_306_69992_20150421_045903_inLine +BABEL_OP3_306_69992_20150421_045903_outLine +BABEL_OP3_306_70452_20140531_022425_inLine +BABEL_OP3_306_70452_20140531_022425_outLine +BABEL_OP3_306_72073_20150220_210400_inLine +BABEL_OP3_306_72073_20150220_210400_outLine +BABEL_OP3_306_73518_20141028_214326_inLine +BABEL_OP3_306_73518_20141028_214326_outLine +BABEL_OP3_306_73591_20140510_022335_inLine +BABEL_OP3_306_73591_20140510_022335_outLine +BABEL_OP3_306_73814_20140724_034710_inLine +BABEL_OP3_306_73814_20140724_034710_outLine +BABEL_OP3_306_75342_20141006_210132_inLine +BABEL_OP3_306_75342_20141006_210132_outLine +BABEL_OP3_306_75342_20141006_211900_inLine +BABEL_OP3_306_75342_20141006_211900_outLine +BABEL_OP3_306_76499_20140729_230952_inLine +BABEL_OP3_306_76499_20140729_230952_outLine +BABEL_OP3_306_78877_20140527_221925_inLine +BABEL_OP3_306_78877_20140527_221925_outLine +BABEL_OP3_306_79107_20150418_021409_inLine +BABEL_OP3_306_79107_20150418_021409_outLine +BABEL_OP3_306_84029_20150415_035216_inLine +BABEL_OP3_306_84029_20150415_035216_outLine +BABEL_OP3_306_84125_20140519_232101_inLine +BABEL_OP3_306_84125_20140519_232101_outLine +BABEL_OP3_306_84547_20140514_224528_inLine +BABEL_OP3_306_84547_20140514_224528_outLine +BABEL_OP3_306_87693_20140620_002643_inLine +BABEL_OP3_306_87693_20140620_002643_outLine +BABEL_OP3_306_88686_20150402_213711_inLine +BABEL_OP3_306_88686_20150402_213711_outLine +BABEL_OP3_306_88988_20150317_002311_inLine +BABEL_OP3_306_88988_20150317_002311_outLine +BABEL_OP3_306_90935_20140725_035705_inLine +BABEL_OP3_306_90935_20140725_035705_outLine +BABEL_OP3_306_92942_20140723_005927_inLine +BABEL_OP3_306_92942_20140723_005927_outLine +BABEL_OP3_306_93937_20150317_060204_inLine +BABEL_OP3_306_93937_20150317_060204_outLine +BABEL_OP3_306_94713_20140529_005611_inLine +BABEL_OP3_306_94713_20140529_005611_outLine +BABEL_OP3_306_95490_20140521_225751_inLine +BABEL_OP3_306_95490_20140521_225751_outLine +BABEL_OP3_306_95935_20141028_222645_inLine +BABEL_OP3_306_95935_20141028_222645_outLine +BABEL_OP3_306_96324_20140531_010613_inLine +BABEL_OP3_306_96324_20140531_010613_outLine +BABEL_OP3_306_96405_20140606_005741_inLine +BABEL_OP3_306_96405_20140606_005741_outLine +BABEL_OP3_306_96680_20140528_005805_inLine +BABEL_OP3_306_96680_20140528_005805_outLine +BABEL_OP3_306_96910_20140605_201948_inLine +BABEL_OP3_306_96910_20140605_201948_outLine +BABEL_OP3_306_96934_20140604_223915_inLine +BABEL_OP3_306_96934_20140604_223915_outLine +BABEL_OP3_306_98489_20140612_194947_inLine +BABEL_OP3_306_98489_20140612_194947_outLine +BABEL_OP3_306_98489_20140612_195637_inLine +BABEL_OP3_306_98489_20140612_195637_outLine +BABEL_OP3_306_99401_20140714_020007_inLine +BABEL_OP3_306_99401_20140714_020007_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/sub-train.list b/egs/babel/s5d/conf/lists/306-igbo/sub-train.list new file mode 100644 index 00000000000..f72794f4c94 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/sub-train.list @@ -0,0 +1,132 @@ +BABEL_OP3_306_10524_20150307_210859_inLine +BABEL_OP3_306_10524_20150307_210859_outLine +BABEL_OP3_306_14575_20140530_194144_inLine +BABEL_OP3_306_14575_20140530_194144_outLine +BABEL_OP3_306_15926_20140815_011013_inLine +BABEL_OP3_306_18490_20150402_010442_inLine +BABEL_OP3_306_18490_20150402_010442_outLine +BABEL_OP3_306_19444_20150214_045709_inLine +BABEL_OP3_306_19444_20150214_045709_outLine +BABEL_OP3_306_20721_20140529_000851_inLine +BABEL_OP3_306_20721_20140529_000851_outLine +BABEL_OP3_306_21581_20140724_022000_inLine +BABEL_OP3_306_21581_20140724_022000_outLine +BABEL_OP3_306_24037_20140524_232238_inLine +BABEL_OP3_306_24037_20140524_232238_outLine +BABEL_OP3_306_26074_20140815_015119_inLine +BABEL_OP3_306_26074_20140815_015119_outLine +BABEL_OP3_306_26478_20150317_053650_inLine +BABEL_OP3_306_26478_20150317_053650_outLine +BABEL_OP3_306_27218_20140625_013736_inLine +BABEL_OP3_306_28538_20140919_192901_inLine +BABEL_OP3_306_28538_20140919_192901_outLine +BABEL_OP3_306_28945_20140610_222125_inLine +BABEL_OP3_306_28945_20140610_222125_outLine +BABEL_OP3_306_31182_20141028_015316_inLine +BABEL_OP3_306_31182_20141028_015316_outLine +BABEL_OP3_306_31346_20141029_183248_inLine +BABEL_OP3_306_31346_20141029_183248_outLine +BABEL_OP3_306_33840_20141031_013533_inLine +BABEL_OP3_306_33840_20141031_013533_outLine +BABEL_OP3_306_33840_20141031_014151_inLine +BABEL_OP3_306_33840_20141031_014151_outLine +BABEL_OP3_306_36293_20140521_011821_inLine +BABEL_OP3_306_36293_20140521_011821_outLine +BABEL_OP3_306_40686_20140523_014206_inLine +BABEL_OP3_306_40686_20140523_014206_outLine +BABEL_OP3_306_44709_20140728_212605_inLine +BABEL_OP3_306_44709_20140728_212605_outLine +BABEL_OP3_306_48610_20140604_003825_inLine +BABEL_OP3_306_48610_20140604_003825_outLine +BABEL_OP3_306_49437_20141029_030600_inLine +BABEL_OP3_306_49437_20141029_030600_outLine +BABEL_OP3_306_50175_20150402_210041_inLine +BABEL_OP3_306_50175_20150402_210041_outLine +BABEL_OP3_306_50962_20140605_232213_inLine +BABEL_OP3_306_50962_20140605_232213_outLine +BABEL_OP3_306_55818_20140603_031605_inLine +BABEL_OP3_306_55818_20140603_031605_outLine +BABEL_OP3_306_55902_20150313_043244_inLine +BABEL_OP3_306_55902_20150313_043244_outLine +BABEL_OP3_306_55968_20140515_005800_inLine +BABEL_OP3_306_55968_20140515_005800_outLine +BABEL_OP3_306_56925_20150214_231609_inLine +BABEL_OP3_306_56925_20150214_231609_outLine +BABEL_OP3_306_59898_20150411_024935_inLine +BABEL_OP3_306_59898_20150411_024935_outLine +BABEL_OP3_306_62491_20140528_021234_inLine +BABEL_OP3_306_62491_20140528_021234_outLine +BABEL_OP3_306_62724_20141031_231843_inLine +BABEL_OP3_306_62724_20141031_231843_outLine +BABEL_OP3_306_63265_20150115_213217_inLine +BABEL_OP3_306_63265_20150115_213217_outLine +BABEL_OP3_306_63671_20150420_041005_inLine +BABEL_OP3_306_63671_20150420_041005_outLine +BABEL_OP3_306_66641_20150422_025109_inLine +BABEL_OP3_306_66641_20150422_025109_outLine +BABEL_OP3_306_67622_20140521_015356_inLine +BABEL_OP3_306_67622_20140521_015356_outLine +BABEL_OP3_306_70110_20140514_211101_inLine +BABEL_OP3_306_70110_20140514_211101_outLine +BABEL_OP3_306_70110_20140514_221144_inLine +BABEL_OP3_306_70110_20140514_221144_outLine +BABEL_OP3_306_72324_20140724_022916_inLine +BABEL_OP3_306_72324_20140724_022916_outLine +BABEL_OP3_306_72324_20140724_024048_inLine +BABEL_OP3_306_72324_20140724_024048_outLine +BABEL_OP3_306_73119_20140603_013443_inLine +BABEL_OP3_306_73119_20140603_013443_outLine +BABEL_OP3_306_74121_20140920_001224_inLine +BABEL_OP3_306_74121_20140920_001224_outLine +BABEL_OP3_306_74280_20140515_234933_inLine +BABEL_OP3_306_74280_20140515_234933_outLine +BABEL_OP3_306_78398_20140604_220522_inLine +BABEL_OP3_306_78398_20140604_220522_outLine +BABEL_OP3_306_78511_20141030_232402_inLine +BABEL_OP3_306_78511_20141030_232402_outLine +BABEL_OP3_306_80306_20140729_235651_inLine +BABEL_OP3_306_80306_20140729_235651_outLine +BABEL_OP3_306_81287_20141009_184932_inLine +BABEL_OP3_306_81287_20141009_184932_outLine +BABEL_OP3_306_82035_20140812_211933_inLine +BABEL_OP3_306_82035_20140812_211933_outLine +BABEL_OP3_306_82935_20141027_220108_inLine +BABEL_OP3_306_82935_20141027_220108_outLine +BABEL_OP3_306_82935_20141027_221034_inLine +BABEL_OP3_306_82935_20141027_221034_outLine +BABEL_OP3_306_83651_20140606_023153_inLine +BABEL_OP3_306_83651_20140606_023153_outLine +BABEL_OP3_306_84768_20150416_212057_inLine +BABEL_OP3_306_84768_20150416_212057_outLine +BABEL_OP3_306_85028_20141029_200629_inLine +BABEL_OP3_306_85028_20141029_200629_outLine +BABEL_OP3_306_85647_20140805_005301_inLine +BABEL_OP3_306_85647_20140805_005301_outLine +BABEL_OP3_306_86888_20140801_232454_inLine +BABEL_OP3_306_86888_20140801_232454_outLine +BABEL_OP3_306_89358_20141003_194649_inLine +BABEL_OP3_306_89358_20141003_194649_outLine +BABEL_OP3_306_90737_20140903_235501_inLine +BABEL_OP3_306_90737_20140903_235501_outLine +BABEL_OP3_306_91266_20150215_015545_inLine +BABEL_OP3_306_91266_20150215_015545_outLine +BABEL_OP3_306_91266_20150215_022001_inLine +BABEL_OP3_306_91266_20150215_022001_outLine +BABEL_OP3_306_92941_20140607_001711_inLine +BABEL_OP3_306_92941_20140607_001711_outLine +BABEL_OP3_306_92941_20140607_003034_inLine +BABEL_OP3_306_92941_20140607_003034_outLine +BABEL_OP3_306_93632_20141103_184555_inLine +BABEL_OP3_306_93632_20141103_184555_outLine +BABEL_OP3_306_93946_20141101_211743_inLine +BABEL_OP3_306_93946_20141101_211743_outLine +BABEL_OP3_306_93964_20140730_022556_inLine +BABEL_OP3_306_93964_20140730_022556_outLine +BABEL_OP3_306_94409_20141006_205245_inLine +BABEL_OP3_306_94409_20141006_205245_outLine +BABEL_OP3_306_95399_20140905_005504_inLine +BABEL_OP3_306_95399_20140905_005504_outLine +BABEL_OP3_306_97588_20140521_051503_inLine +BABEL_OP3_306_97588_20140521_051503_outLine +BABEL_OP3_306_99344_20140801_002154_inLine +BABEL_OP3_306_99344_20140801_002154_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/306-igbo/sub-train.untranscribed.list new file mode 100644 index 00000000000..7ca400d26e5 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP3_306_10188_20140511_001332_inLine +BABEL_OP3_306_10188_20140511_001332_outLine +BABEL_OP3_306_10313_20140523_024428_inLine +BABEL_OP3_306_10313_20140523_024428_outLine +BABEL_OP3_306_10319_20140522_015112_inLine +BABEL_OP3_306_10319_20140522_015112_outLine +BABEL_OP3_306_10416_20140802_195508_inLine +BABEL_OP3_306_10416_20140802_195508_outLine +BABEL_OP3_306_10974_20140805_011808_inLine +BABEL_OP3_306_10974_20140805_011808_outLine +BABEL_OP3_306_12036_20140604_193658_inLine +BABEL_OP3_306_12036_20140604_193658_outLine +BABEL_OP3_306_12242_20140601_233200_inLine +BABEL_OP3_306_12242_20140601_233200_outLine +BABEL_OP3_306_13324_20140625_222242_inLine +BABEL_OP3_306_13324_20140625_223418_inLine +BABEL_OP3_306_13561_20140802_043219_inLine +BABEL_OP3_306_13561_20140802_043219_outLine +BABEL_OP3_306_14141_20141223_040734_inLine +BABEL_OP3_306_14141_20141223_040734_outLine +BABEL_OP3_306_14229_20150304_204617_inLine +BABEL_OP3_306_14237_20140531_215051_inLine +BABEL_OP3_306_14237_20140531_215051_outLine +BABEL_OP3_306_14814_20140602_011013_inLine +BABEL_OP3_306_14814_20140602_011013_outLine +BABEL_OP3_306_15024_20140904_235714_inLine +BABEL_OP3_306_15024_20140904_235714_outLine +BABEL_OP3_306_15163_20141006_024649_inLine +BABEL_OP3_306_15163_20141006_024649_outLine +BABEL_OP3_306_15382_20140730_010226_inLine +BABEL_OP3_306_15382_20140730_010226_outLine +BABEL_OP3_306_16184_20140519_222131_inLine +BABEL_OP3_306_16184_20140519_222131_outLine +BABEL_OP3_306_16351_20140524_195830_inLine +BABEL_OP3_306_16351_20140524_195830_outLine +BABEL_OP3_306_16787_20140802_223754_inLine +BABEL_OP3_306_16787_20140802_223754_outLine +BABEL_OP3_306_16839_20141030_003721_inLine +BABEL_OP3_306_16839_20141030_003721_outLine +BABEL_OP3_306_16938_20140809_233743_inLine +BABEL_OP3_306_17472_20150318_193931_inLine +BABEL_OP3_306_17472_20150318_193931_outLine +BABEL_OP3_306_17511_20150116_221327_inLine +BABEL_OP3_306_17511_20150116_221327_outLine +BABEL_OP3_306_17881_20150304_004415_inLine +BABEL_OP3_306_17881_20150304_004415_outLine +BABEL_OP3_306_18280_20150223_175908_inLine +BABEL_OP3_306_18280_20150223_175908_outLine +BABEL_OP3_306_18370_20150223_190452_inLine +BABEL_OP3_306_18370_20150223_190452_outLine +BABEL_OP3_306_18863_20141103_232200_inLine +BABEL_OP3_306_18863_20141103_232200_outLine +BABEL_OP3_306_19767_20150317_173511_inLine +BABEL_OP3_306_19767_20150317_173511_outLine +BABEL_OP3_306_21244_20150303_021843_inLine +BABEL_OP3_306_21244_20150303_021843_outLine +BABEL_OP3_306_21892_20141031_004104_inLine +BABEL_OP3_306_21892_20141031_004104_outLine +BABEL_OP3_306_22021_20150421_200500_inLine +BABEL_OP3_306_22021_20150421_200500_outLine +BABEL_OP3_306_22494_20141004_000311_inLine +BABEL_OP3_306_22494_20141004_000311_outLine +BABEL_OP3_306_22643_20140526_192640_inLine +BABEL_OP3_306_22643_20140526_192640_outLine +BABEL_OP3_306_23355_20150306_040413_inLine +BABEL_OP3_306_23355_20150306_040413_outLine +BABEL_OP3_306_23395_20140815_012335_inLine +BABEL_OP3_306_23395_20140815_012335_outLine +BABEL_OP3_306_24270_20141009_010150_inLine +BABEL_OP3_306_24270_20141009_010150_outLine +BABEL_OP3_306_24679_20140521_043344_inLine +BABEL_OP3_306_24679_20140521_043344_outLine +BABEL_OP3_306_25767_20140603_022935_inLine +BABEL_OP3_306_25767_20140603_022935_outLine +BABEL_OP3_306_26388_20140605_212825_inLine +BABEL_OP3_306_26388_20140605_212825_outLine +BABEL_OP3_306_26574_20141028_193409_inLine +BABEL_OP3_306_26574_20141028_193409_outLine +BABEL_OP3_306_26836_20140606_012758_inLine +BABEL_OP3_306_26836_20140606_012758_outLine +BABEL_OP3_306_26869_20150311_010234_inLine +BABEL_OP3_306_26869_20150311_010234_outLine +BABEL_OP3_306_27014_20140525_005218_inLine +BABEL_OP3_306_27014_20140525_005218_outLine +BABEL_OP3_306_27367_20140524_212214_inLine +BABEL_OP3_306_27367_20140524_212214_outLine +BABEL_OP3_306_30250_20140520_201955_inLine +BABEL_OP3_306_30250_20140520_201955_outLine +BABEL_OP3_306_30395_20140620_010240_inLine +BABEL_OP3_306_30395_20140620_010240_outLine +BABEL_OP3_306_30395_20140620_011044_inLine +BABEL_OP3_306_30395_20140620_011044_outLine +BABEL_OP3_306_31074_20150120_001644_inLine +BABEL_OP3_306_31074_20150120_001644_outLine +BABEL_OP3_306_31184_20141006_222942_inLine +BABEL_OP3_306_31184_20141006_222942_outLine +BABEL_OP3_306_31992_20140714_213448_inLine +BABEL_OP3_306_31992_20140714_213448_outLine +BABEL_OP3_306_32169_20150311_001538_inLine +BABEL_OP3_306_32169_20150311_001538_outLine +BABEL_OP3_306_32832_20141027_221739_inLine +BABEL_OP3_306_32832_20141027_221739_outLine +BABEL_OP3_306_33251_20140725_025307_inLine +BABEL_OP3_306_33251_20140725_025307_outLine +BABEL_OP3_306_33476_20140730_232844_inLine +BABEL_OP3_306_33476_20140730_232844_outLine +BABEL_OP3_306_33951_20140725_061646_inLine +BABEL_OP3_306_33951_20140725_061646_outLine +BABEL_OP3_306_34564_20141026_225715_inLine +BABEL_OP3_306_34564_20141026_225715_outLine +BABEL_OP3_306_34564_20141026_230434_inLine +BABEL_OP3_306_34564_20141026_230434_outLine +BABEL_OP3_306_36059_20141223_034056_inLine +BABEL_OP3_306_36059_20141223_034056_outLine +BABEL_OP3_306_36147_20150215_051814_inLine +BABEL_OP3_306_36147_20150215_051814_outLine +BABEL_OP3_306_36364_20150123_012425_inLine +BABEL_OP3_306_36364_20150123_012425_outLine +BABEL_OP3_306_36505_20141027_211503_inLine +BABEL_OP3_306_36505_20141027_211503_outLine +BABEL_OP3_306_37007_20140527_013428_inLine +BABEL_OP3_306_37007_20140527_013428_outLine +BABEL_OP3_306_38323_20150418_203354_inLine +BABEL_OP3_306_38323_20150418_203354_outLine +BABEL_OP3_306_38554_20140517_054801_inLine +BABEL_OP3_306_38554_20140517_054801_outLine +BABEL_OP3_306_38554_20140517_055631_inLine +BABEL_OP3_306_38554_20140517_055631_outLine +BABEL_OP3_306_39555_20141030_012732_inLine +BABEL_OP3_306_39555_20141030_012732_outLine +BABEL_OP3_306_40330_20150418_213611_inLine +BABEL_OP3_306_40330_20150418_213611_outLine +BABEL_OP3_306_40713_20140605_205025_inLine +BABEL_OP3_306_40713_20140605_205025_outLine +BABEL_OP3_306_41233_20141029_235039_inLine +BABEL_OP3_306_41233_20141029_235039_outLine +BABEL_OP3_306_41233_20141030_004714_inLine +BABEL_OP3_306_41233_20141030_004714_outLine +BABEL_OP3_306_41469_20150405_025457_inLine +BABEL_OP3_306_41469_20150405_025457_outLine +BABEL_OP3_306_41592_20140731_180118_inLine +BABEL_OP3_306_41592_20140731_180118_outLine +BABEL_OP3_306_41920_20140531_032613_inLine +BABEL_OP3_306_41920_20140531_032613_outLine +BABEL_OP3_306_42126_20140528_024621_inLine +BABEL_OP3_306_42126_20140528_024621_outLine +BABEL_OP3_306_42231_20141009_191123_inLine +BABEL_OP3_306_42231_20141009_191123_outLine +BABEL_OP3_306_43286_20140522_203724_inLine +BABEL_OP3_306_43286_20140522_203724_outLine +BABEL_OP3_306_43388_20140802_221518_inLine +BABEL_OP3_306_43388_20140802_221518_outLine +BABEL_OP3_306_43388_20140802_222040_inLine +BABEL_OP3_306_43388_20140802_222040_outLine +BABEL_OP3_306_43388_20140802_222715_inLine +BABEL_OP3_306_43388_20140802_222715_outLine +BABEL_OP3_306_43784_20140608_022047_inLine +BABEL_OP3_306_43784_20140608_022047_outLine +BABEL_OP3_306_46066_20141027_233339_inLine +BABEL_OP3_306_46066_20141027_233339_outLine +BABEL_OP3_306_46310_20140602_230134_inLine +BABEL_OP3_306_46310_20140602_230134_outLine +BABEL_OP3_306_46550_20140605_222807_inLine +BABEL_OP3_306_46550_20140605_222807_outLine +BABEL_OP3_306_46625_20140606_202920_inLine +BABEL_OP3_306_46625_20140606_202920_outLine +BABEL_OP3_306_46757_20140920_030716_inLine +BABEL_OP3_306_46757_20140920_030716_outLine +BABEL_OP3_306_46905_20140528_215718_inLine +BABEL_OP3_306_46905_20140528_215718_outLine +BABEL_OP3_306_47923_20150131_000157_inLine +BABEL_OP3_306_47923_20150131_000157_outLine +BABEL_OP3_306_49502_20150403_222234_inLine +BABEL_OP3_306_49502_20150403_222234_outLine +BABEL_OP3_306_50565_20140521_040110_inLine +BABEL_OP3_306_50565_20140521_040110_outLine +BABEL_OP3_306_51156_20150116_191446_inLine +BABEL_OP3_306_51156_20150116_191446_outLine +BABEL_OP3_306_52058_20140526_231450_inLine +BABEL_OP3_306_52058_20140526_231450_outLine +BABEL_OP3_306_52265_20150320_030911_inLine +BABEL_OP3_306_52265_20150320_030911_outLine +BABEL_OP3_306_52932_20140608_003800_inLine +BABEL_OP3_306_52932_20140608_003800_outLine +BABEL_OP3_306_53206_20140523_191711_inLine +BABEL_OP3_306_53206_20140523_191711_outLine +BABEL_OP3_306_53758_20150227_224132_inLine +BABEL_OP3_306_53758_20150227_224132_outLine +BABEL_OP3_306_54160_20140602_201949_inLine +BABEL_OP3_306_54160_20140602_201949_outLine +BABEL_OP3_306_54594_20140528_232952_inLine +BABEL_OP3_306_54594_20140528_232952_outLine +BABEL_OP3_306_54697_20141027_014534_inLine +BABEL_OP3_306_54697_20141027_014534_outLine +BABEL_OP3_306_54697_20141027_015651_inLine +BABEL_OP3_306_54697_20141027_015651_outLine +BABEL_OP3_306_56306_20141111_210052_inLine +BABEL_OP3_306_56306_20141111_210052_outLine +BABEL_OP3_306_56523_20140729_211409_inLine +BABEL_OP3_306_56523_20140729_211409_outLine +BABEL_OP3_306_56826_20141005_005430_inLine +BABEL_OP3_306_56826_20141005_005430_outLine +BABEL_OP3_306_57065_20140813_021110_inLine +BABEL_OP3_306_57065_20140813_021110_outLine +BABEL_OP3_306_57654_20140622_013309_inLine +BABEL_OP3_306_57654_20140622_013309_outLine +BABEL_OP3_306_58145_20140724_045437_inLine +BABEL_OP3_306_58145_20140724_045437_outLine +BABEL_OP3_306_58489_20141026_005336_inLine +BABEL_OP3_306_58489_20141026_005336_outLine +BABEL_OP3_306_59078_20141009_004020_inLine +BABEL_OP3_306_59078_20141009_004020_outLine +BABEL_OP3_306_59509_20140805_224009_inLine +BABEL_OP3_306_59509_20140805_224009_outLine +BABEL_OP3_306_59509_20140805_224625_inLine +BABEL_OP3_306_59509_20140805_224625_outLine +BABEL_OP3_306_60310_20141004_230555_inLine +BABEL_OP3_306_60310_20141004_230555_outLine +BABEL_OP3_306_60352_20140806_021626_inLine +BABEL_OP3_306_60352_20140806_021626_outLine +BABEL_OP3_306_61219_20140603_003614_inLine +BABEL_OP3_306_61219_20140603_003614_outLine +BABEL_OP3_306_61225_20140515_013438_inLine +BABEL_OP3_306_61225_20140515_013438_outLine +BABEL_OP3_306_61435_20141029_014344_inLine +BABEL_OP3_306_61435_20141029_014344_outLine +BABEL_OP3_306_61438_20140527_213221_inLine +BABEL_OP3_306_61438_20140527_213221_outLine +BABEL_OP3_306_61888_20141104_013244_inLine +BABEL_OP3_306_61888_20141104_013244_outLine +BABEL_OP3_306_63220_20140811_231222_inLine +BABEL_OP3_306_63220_20140811_231222_outLine +BABEL_OP3_306_63766_20150226_043203_inLine +BABEL_OP3_306_63766_20150226_043203_outLine +BABEL_OP3_306_64350_20140621_190438_inLine +BABEL_OP3_306_64350_20140621_190438_outLine +BABEL_OP3_306_64398_20140731_012313_inLine +BABEL_OP3_306_64398_20140731_012313_outLine +BABEL_OP3_306_64796_20150407_014947_inLine +BABEL_OP3_306_64796_20150407_014947_outLine +BABEL_OP3_306_65723_20140604_235722_inLine +BABEL_OP3_306_65723_20140604_235722_outLine +BABEL_OP3_306_66350_20150225_020558_inLine +BABEL_OP3_306_66350_20150225_020558_outLine +BABEL_OP3_306_66916_20140522_002931_inLine +BABEL_OP3_306_66916_20140522_002931_outLine +BABEL_OP3_306_66967_20140606_235110_inLine +BABEL_OP3_306_66967_20140606_235110_outLine +BABEL_OP3_306_67373_20140624_224214_inLine +BABEL_OP3_306_67373_20140624_224214_outLine +BABEL_OP3_306_67373_20140624_225314_inLine +BABEL_OP3_306_67373_20140624_225314_outLine +BABEL_OP3_306_67401_20140815_032242_inLine +BABEL_OP3_306_67401_20140815_032242_outLine +BABEL_OP3_306_67592_20141028_032006_inLine +BABEL_OP3_306_67592_20141028_032006_outLine +BABEL_OP3_306_67726_20140523_010156_inLine +BABEL_OP3_306_67726_20140523_010156_outLine +BABEL_OP3_306_67999_20141031_215535_inLine +BABEL_OP3_306_67999_20141031_215535_outLine +BABEL_OP3_306_68059_20140802_023600_inLine +BABEL_OP3_306_68059_20140802_023600_outLine +BABEL_OP3_306_68068_20141004_180553_inLine +BABEL_OP3_306_68068_20141004_180553_outLine +BABEL_OP3_306_69578_20140729_234354_inLine +BABEL_OP3_306_69578_20140729_234354_outLine +BABEL_OP3_306_70221_20140801_213304_inLine +BABEL_OP3_306_70221_20140801_213304_outLine +BABEL_OP3_306_70293_20150216_013540_inLine +BABEL_OP3_306_70293_20150216_013540_outLine +BABEL_OP3_306_70639_20140528_035113_inLine +BABEL_OP3_306_70639_20140528_035113_outLine +BABEL_OP3_306_70794_20140520_000549_inLine +BABEL_OP3_306_70794_20140520_000549_outLine +BABEL_OP3_306_71401_20150221_015039_inLine +BABEL_OP3_306_71401_20150221_015039_outLine +BABEL_OP3_306_71976_20140529_033557_inLine +BABEL_OP3_306_71976_20140529_033557_outLine +BABEL_OP3_306_72587_20140811_040036_inLine +BABEL_OP3_306_72587_20140811_040036_outLine +BABEL_OP3_306_72844_20140518_010610_inLine +BABEL_OP3_306_72844_20140518_010610_outLine +BABEL_OP3_306_73511_20141005_215627_inLine +BABEL_OP3_306_73511_20141005_215627_outLine +BABEL_OP3_306_75505_20140512_201003_inLine +BABEL_OP3_306_75505_20140512_201003_outLine +BABEL_OP3_306_77146_20140521_002843_inLine +BABEL_OP3_306_77146_20140521_002843_outLine +BABEL_OP3_306_77225_20150327_022842_inLine +BABEL_OP3_306_77225_20150327_022842_outLine +BABEL_OP3_306_78194_20140611_232911_inLine +BABEL_OP3_306_78194_20140611_232911_outLine +BABEL_OP3_306_78254_20140602_034556_inLine +BABEL_OP3_306_78254_20140602_034556_outLine +BABEL_OP3_306_78604_20140625_020654_inLine +BABEL_OP3_306_78604_20140625_020654_outLine +BABEL_OP3_306_79139_20140725_061931_inLine +BABEL_OP3_306_79139_20140725_061931_outLine +BABEL_OP3_306_80383_20150325_015939_inLine +BABEL_OP3_306_80383_20150325_015939_outLine +BABEL_OP3_306_80781_20140729_014618_inLine +BABEL_OP3_306_80781_20140729_014618_outLine +BABEL_OP3_306_81427_20140723_232926_inLine +BABEL_OP3_306_81427_20140723_232926_outLine +BABEL_OP3_306_81581_20140529_022004_inLine +BABEL_OP3_306_81581_20140529_022004_outLine +BABEL_OP3_306_82303_20150313_044844_inLine +BABEL_OP3_306_82303_20150313_044844_outLine +BABEL_OP3_306_82637_20140514_233142_inLine +BABEL_OP3_306_82637_20140514_233142_outLine +BABEL_OP3_306_84430_20150331_191720_inLine +BABEL_OP3_306_84430_20150331_191720_outLine +BABEL_OP3_306_84609_20150401_222657_inLine +BABEL_OP3_306_84609_20150401_222657_outLine +BABEL_OP3_306_84611_20140605_003243_inLine +BABEL_OP3_306_84611_20140605_003243_outLine +BABEL_OP3_306_84815_20141101_002538_inLine +BABEL_OP3_306_84815_20141101_002538_outLine +BABEL_OP3_306_86191_20140603_042134_inLine +BABEL_OP3_306_86191_20140603_042134_outLine +BABEL_OP3_306_86433_20140816_072513_inLine +BABEL_OP3_306_86433_20140816_072513_outLine +BABEL_OP3_306_86472_20140730_223950_inLine +BABEL_OP3_306_86472_20140730_223950_outLine +BABEL_OP3_306_86845_20140524_192542_inLine +BABEL_OP3_306_86845_20140524_192542_outLine +BABEL_OP3_306_86952_20140531_040557_inLine +BABEL_OP3_306_86952_20140531_040557_outLine +BABEL_OP3_306_87179_20141029_021040_inLine +BABEL_OP3_306_87179_20141029_021040_outLine +BABEL_OP3_306_87353_20150327_191436_inLine +BABEL_OP3_306_87353_20150327_191436_outLine +BABEL_OP3_306_87884_20141101_223809_inLine +BABEL_OP3_306_87884_20141101_223809_outLine +BABEL_OP3_306_88601_20141003_171755_inLine +BABEL_OP3_306_88601_20141003_171755_outLine +BABEL_OP3_306_88661_20141005_225341_inLine +BABEL_OP3_306_88661_20141005_225341_outLine +BABEL_OP3_306_89045_20140517_213454_inLine +BABEL_OP3_306_89045_20140517_213454_outLine +BABEL_OP3_306_89059_20141104_210223_inLine +BABEL_OP3_306_89059_20141104_210223_outLine +BABEL_OP3_306_89059_20141104_211433_inLine +BABEL_OP3_306_89059_20141104_211433_outLine +BABEL_OP3_306_89457_20140730_002520_inLine +BABEL_OP3_306_89457_20140730_002520_outLine +BABEL_OP3_306_90440_20150312_002806_inLine +BABEL_OP3_306_90440_20150312_002806_outLine +BABEL_OP3_306_91463_20140729_001809_inLine +BABEL_OP3_306_91463_20140729_001809_outLine +BABEL_OP3_306_91825_20150408_204309_inLine +BABEL_OP3_306_91825_20150408_204309_outLine +BABEL_OP3_306_91891_20141009_203853_inLine +BABEL_OP3_306_91891_20141009_203853_outLine +BABEL_OP3_306_92440_20150326_232645_inLine +BABEL_OP3_306_92440_20150326_232645_outLine +BABEL_OP3_306_92509_20140521_023136_inLine +BABEL_OP3_306_92509_20140521_023136_outLine +BABEL_OP3_306_92809_20150419_011906_inLine +BABEL_OP3_306_92809_20150419_011906_outLine +BABEL_OP3_306_94253_20140606_032103_inLine +BABEL_OP3_306_94253_20140606_032103_outLine +BABEL_OP3_306_94869_20140515_230712_inLine +BABEL_OP3_306_94869_20140515_230712_outLine +BABEL_OP3_306_94978_20141115_234420_inLine +BABEL_OP3_306_94978_20141115_234420_outLine +BABEL_OP3_306_95124_20150416_012109_inLine +BABEL_OP3_306_95124_20150416_012109_outLine +BABEL_OP3_306_95598_20140509_043406_inLine +BABEL_OP3_306_95598_20140509_043406_outLine +BABEL_OP3_306_96730_20141028_230035_inLine +BABEL_OP3_306_96730_20141028_230035_outLine +BABEL_OP3_306_96820_20140802_051525_inLine +BABEL_OP3_306_96820_20140802_051525_outLine +BABEL_OP3_306_97570_20140801_224422_inLine +BABEL_OP3_306_97570_20140801_224422_outLine +BABEL_OP3_306_98311_20140604_201838_inLine +BABEL_OP3_306_98311_20140604_201838_outLine +BABEL_OP3_306_99920_20140604_212052_inLine +BABEL_OP3_306_99920_20140604_212052_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/training.list b/egs/babel/s5d/conf/lists/306-igbo/training.list new file mode 100644 index 00000000000..0504de58fb1 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/training.list @@ -0,0 +1,512 @@ +BABEL_OP3_306_10188_20140511_001332_inLine +BABEL_OP3_306_10188_20140511_001332_outLine +BABEL_OP3_306_10313_20140523_024428_inLine +BABEL_OP3_306_10313_20140523_024428_outLine +BABEL_OP3_306_10319_20140522_015112_inLine +BABEL_OP3_306_10319_20140522_015112_outLine +BABEL_OP3_306_10416_20140802_195508_inLine +BABEL_OP3_306_10416_20140802_195508_outLine +BABEL_OP3_306_10524_20150307_210859_inLine +BABEL_OP3_306_10524_20150307_210859_outLine +BABEL_OP3_306_10974_20140805_011808_inLine +BABEL_OP3_306_10974_20140805_011808_outLine +BABEL_OP3_306_12036_20140604_193658_inLine +BABEL_OP3_306_12036_20140604_193658_outLine +BABEL_OP3_306_12242_20140601_233200_inLine +BABEL_OP3_306_12242_20140601_233200_outLine +BABEL_OP3_306_13324_20140625_222242_inLine +BABEL_OP3_306_13324_20140625_223418_inLine +BABEL_OP3_306_13561_20140802_043219_inLine +BABEL_OP3_306_13561_20140802_043219_outLine +BABEL_OP3_306_14141_20141223_040734_inLine +BABEL_OP3_306_14141_20141223_040734_outLine +BABEL_OP3_306_14229_20150304_204617_inLine +BABEL_OP3_306_14237_20140531_215051_inLine +BABEL_OP3_306_14237_20140531_215051_outLine +BABEL_OP3_306_14575_20140530_194144_inLine +BABEL_OP3_306_14575_20140530_194144_outLine +BABEL_OP3_306_14814_20140602_011013_inLine +BABEL_OP3_306_14814_20140602_011013_outLine +BABEL_OP3_306_15024_20140904_235714_inLine +BABEL_OP3_306_15024_20140904_235714_outLine +BABEL_OP3_306_15163_20141006_024649_inLine +BABEL_OP3_306_15163_20141006_024649_outLine +BABEL_OP3_306_15382_20140730_010226_inLine +BABEL_OP3_306_15382_20140730_010226_outLine +BABEL_OP3_306_15926_20140815_011013_inLine +BABEL_OP3_306_16184_20140519_222131_inLine +BABEL_OP3_306_16184_20140519_222131_outLine +BABEL_OP3_306_16351_20140524_195830_inLine +BABEL_OP3_306_16351_20140524_195830_outLine +BABEL_OP3_306_16787_20140802_223754_inLine +BABEL_OP3_306_16787_20140802_223754_outLine +BABEL_OP3_306_16839_20141030_003721_inLine +BABEL_OP3_306_16839_20141030_003721_outLine +BABEL_OP3_306_16938_20140809_233743_inLine +BABEL_OP3_306_17472_20150318_193931_inLine +BABEL_OP3_306_17472_20150318_193931_outLine +BABEL_OP3_306_17511_20150116_221327_inLine +BABEL_OP3_306_17511_20150116_221327_outLine +BABEL_OP3_306_17881_20150304_004415_inLine +BABEL_OP3_306_17881_20150304_004415_outLine +BABEL_OP3_306_18280_20150223_175908_inLine +BABEL_OP3_306_18280_20150223_175908_outLine +BABEL_OP3_306_18370_20150223_190452_inLine +BABEL_OP3_306_18370_20150223_190452_outLine +BABEL_OP3_306_18490_20150402_010442_inLine +BABEL_OP3_306_18490_20150402_010442_outLine +BABEL_OP3_306_18863_20141103_232200_inLine +BABEL_OP3_306_18863_20141103_232200_outLine +BABEL_OP3_306_19444_20150214_045709_inLine +BABEL_OP3_306_19444_20150214_045709_outLine +BABEL_OP3_306_19767_20150317_173511_inLine +BABEL_OP3_306_19767_20150317_173511_outLine +BABEL_OP3_306_20721_20140529_000851_inLine +BABEL_OP3_306_20721_20140529_000851_outLine +BABEL_OP3_306_21244_20150303_021843_inLine +BABEL_OP3_306_21244_20150303_021843_outLine +BABEL_OP3_306_21581_20140724_022000_inLine +BABEL_OP3_306_21581_20140724_022000_outLine +BABEL_OP3_306_21892_20141031_004104_inLine +BABEL_OP3_306_21892_20141031_004104_outLine +BABEL_OP3_306_22021_20150421_200500_inLine +BABEL_OP3_306_22021_20150421_200500_outLine +BABEL_OP3_306_22494_20141004_000311_inLine +BABEL_OP3_306_22494_20141004_000311_outLine +BABEL_OP3_306_22643_20140526_192640_inLine +BABEL_OP3_306_22643_20140526_192640_outLine +BABEL_OP3_306_23355_20150306_040413_inLine +BABEL_OP3_306_23355_20150306_040413_outLine +BABEL_OP3_306_23395_20140815_012335_inLine +BABEL_OP3_306_23395_20140815_012335_outLine +BABEL_OP3_306_24037_20140524_232238_inLine +BABEL_OP3_306_24037_20140524_232238_outLine +BABEL_OP3_306_24270_20141009_010150_inLine +BABEL_OP3_306_24270_20141009_010150_outLine +BABEL_OP3_306_24679_20140521_043344_inLine +BABEL_OP3_306_24679_20140521_043344_outLine +BABEL_OP3_306_25767_20140603_022935_inLine +BABEL_OP3_306_25767_20140603_022935_outLine +BABEL_OP3_306_26074_20140815_015119_inLine +BABEL_OP3_306_26074_20140815_015119_outLine +BABEL_OP3_306_26388_20140605_212825_inLine +BABEL_OP3_306_26388_20140605_212825_outLine +BABEL_OP3_306_26478_20150317_053650_inLine +BABEL_OP3_306_26478_20150317_053650_outLine +BABEL_OP3_306_26574_20141028_193409_inLine +BABEL_OP3_306_26574_20141028_193409_outLine +BABEL_OP3_306_26836_20140606_012758_inLine +BABEL_OP3_306_26836_20140606_012758_outLine +BABEL_OP3_306_26869_20150311_010234_inLine +BABEL_OP3_306_26869_20150311_010234_outLine +BABEL_OP3_306_27014_20140525_005218_inLine +BABEL_OP3_306_27014_20140525_005218_outLine +BABEL_OP3_306_27218_20140625_013736_inLine +BABEL_OP3_306_27367_20140524_212214_inLine +BABEL_OP3_306_27367_20140524_212214_outLine +BABEL_OP3_306_28538_20140919_192901_inLine +BABEL_OP3_306_28538_20140919_192901_outLine +BABEL_OP3_306_28945_20140610_222125_inLine +BABEL_OP3_306_28945_20140610_222125_outLine +BABEL_OP3_306_30250_20140520_201955_inLine +BABEL_OP3_306_30250_20140520_201955_outLine +BABEL_OP3_306_30395_20140620_010240_inLine +BABEL_OP3_306_30395_20140620_010240_outLine +BABEL_OP3_306_30395_20140620_011044_inLine +BABEL_OP3_306_30395_20140620_011044_outLine +BABEL_OP3_306_31074_20150120_001644_inLine +BABEL_OP3_306_31074_20150120_001644_outLine +BABEL_OP3_306_31182_20141028_015316_inLine +BABEL_OP3_306_31182_20141028_015316_outLine +BABEL_OP3_306_31184_20141006_222942_inLine +BABEL_OP3_306_31184_20141006_222942_outLine +BABEL_OP3_306_31346_20141029_183248_inLine +BABEL_OP3_306_31346_20141029_183248_outLine +BABEL_OP3_306_31992_20140714_213448_inLine +BABEL_OP3_306_31992_20140714_213448_outLine +BABEL_OP3_306_32169_20150311_001538_inLine +BABEL_OP3_306_32169_20150311_001538_outLine +BABEL_OP3_306_32832_20141027_221739_inLine +BABEL_OP3_306_32832_20141027_221739_outLine +BABEL_OP3_306_33251_20140725_025307_inLine +BABEL_OP3_306_33251_20140725_025307_outLine +BABEL_OP3_306_33476_20140730_232844_inLine +BABEL_OP3_306_33476_20140730_232844_outLine +BABEL_OP3_306_33840_20141031_013533_inLine +BABEL_OP3_306_33840_20141031_013533_outLine +BABEL_OP3_306_33840_20141031_014151_inLine +BABEL_OP3_306_33840_20141031_014151_outLine +BABEL_OP3_306_33951_20140725_061646_inLine +BABEL_OP3_306_33951_20140725_061646_outLine +BABEL_OP3_306_34564_20141026_225715_inLine +BABEL_OP3_306_34564_20141026_225715_outLine +BABEL_OP3_306_34564_20141026_230434_inLine +BABEL_OP3_306_34564_20141026_230434_outLine +BABEL_OP3_306_36059_20141223_034056_inLine +BABEL_OP3_306_36059_20141223_034056_outLine +BABEL_OP3_306_36147_20150215_051814_inLine +BABEL_OP3_306_36147_20150215_051814_outLine +BABEL_OP3_306_36293_20140521_011821_inLine +BABEL_OP3_306_36293_20140521_011821_outLine +BABEL_OP3_306_36364_20150123_012425_inLine +BABEL_OP3_306_36364_20150123_012425_outLine +BABEL_OP3_306_36505_20141027_211503_inLine +BABEL_OP3_306_36505_20141027_211503_outLine +BABEL_OP3_306_37007_20140527_013428_inLine +BABEL_OP3_306_37007_20140527_013428_outLine +BABEL_OP3_306_38323_20150418_203354_inLine +BABEL_OP3_306_38323_20150418_203354_outLine +BABEL_OP3_306_38554_20140517_054801_inLine +BABEL_OP3_306_38554_20140517_054801_outLine +BABEL_OP3_306_38554_20140517_055631_inLine +BABEL_OP3_306_38554_20140517_055631_outLine +BABEL_OP3_306_39555_20141030_012732_inLine +BABEL_OP3_306_39555_20141030_012732_outLine +BABEL_OP3_306_40330_20150418_213611_inLine +BABEL_OP3_306_40330_20150418_213611_outLine +BABEL_OP3_306_40686_20140523_014206_inLine +BABEL_OP3_306_40686_20140523_014206_outLine +BABEL_OP3_306_40713_20140605_205025_inLine +BABEL_OP3_306_40713_20140605_205025_outLine +BABEL_OP3_306_41233_20141029_235039_inLine +BABEL_OP3_306_41233_20141029_235039_outLine +BABEL_OP3_306_41233_20141030_004714_inLine +BABEL_OP3_306_41233_20141030_004714_outLine +BABEL_OP3_306_41469_20150405_025457_inLine +BABEL_OP3_306_41469_20150405_025457_outLine +BABEL_OP3_306_41592_20140731_180118_inLine +BABEL_OP3_306_41592_20140731_180118_outLine +BABEL_OP3_306_41920_20140531_032613_inLine +BABEL_OP3_306_41920_20140531_032613_outLine +BABEL_OP3_306_42126_20140528_024621_inLine +BABEL_OP3_306_42126_20140528_024621_outLine +BABEL_OP3_306_42231_20141009_191123_inLine +BABEL_OP3_306_42231_20141009_191123_outLine +BABEL_OP3_306_43286_20140522_203724_inLine +BABEL_OP3_306_43286_20140522_203724_outLine +BABEL_OP3_306_43388_20140802_221518_inLine +BABEL_OP3_306_43388_20140802_221518_outLine +BABEL_OP3_306_43388_20140802_222040_inLine +BABEL_OP3_306_43388_20140802_222040_outLine +BABEL_OP3_306_43388_20140802_222715_inLine +BABEL_OP3_306_43388_20140802_222715_outLine +BABEL_OP3_306_43784_20140608_022047_inLine +BABEL_OP3_306_43784_20140608_022047_outLine +BABEL_OP3_306_44709_20140728_212605_inLine +BABEL_OP3_306_44709_20140728_212605_outLine +BABEL_OP3_306_46066_20141027_233339_inLine +BABEL_OP3_306_46066_20141027_233339_outLine +BABEL_OP3_306_46310_20140602_230134_inLine +BABEL_OP3_306_46310_20140602_230134_outLine +BABEL_OP3_306_46550_20140605_222807_inLine +BABEL_OP3_306_46550_20140605_222807_outLine +BABEL_OP3_306_46625_20140606_202920_inLine +BABEL_OP3_306_46625_20140606_202920_outLine +BABEL_OP3_306_46757_20140920_030716_inLine +BABEL_OP3_306_46757_20140920_030716_outLine +BABEL_OP3_306_46905_20140528_215718_inLine +BABEL_OP3_306_46905_20140528_215718_outLine +BABEL_OP3_306_47923_20150131_000157_inLine +BABEL_OP3_306_47923_20150131_000157_outLine +BABEL_OP3_306_48610_20140604_003825_inLine +BABEL_OP3_306_48610_20140604_003825_outLine +BABEL_OP3_306_49437_20141029_030600_inLine +BABEL_OP3_306_49437_20141029_030600_outLine +BABEL_OP3_306_49502_20150403_222234_inLine +BABEL_OP3_306_49502_20150403_222234_outLine +BABEL_OP3_306_50175_20150402_210041_inLine +BABEL_OP3_306_50175_20150402_210041_outLine +BABEL_OP3_306_50565_20140521_040110_inLine +BABEL_OP3_306_50565_20140521_040110_outLine +BABEL_OP3_306_50962_20140605_232213_inLine +BABEL_OP3_306_50962_20140605_232213_outLine +BABEL_OP3_306_51156_20150116_191446_inLine +BABEL_OP3_306_51156_20150116_191446_outLine +BABEL_OP3_306_52058_20140526_231450_inLine +BABEL_OP3_306_52058_20140526_231450_outLine +BABEL_OP3_306_52265_20150320_030911_inLine +BABEL_OP3_306_52265_20150320_030911_outLine +BABEL_OP3_306_52932_20140608_003800_inLine +BABEL_OP3_306_52932_20140608_003800_outLine +BABEL_OP3_306_53206_20140523_191711_inLine +BABEL_OP3_306_53206_20140523_191711_outLine +BABEL_OP3_306_53758_20150227_224132_inLine +BABEL_OP3_306_53758_20150227_224132_outLine +BABEL_OP3_306_54160_20140602_201949_inLine +BABEL_OP3_306_54160_20140602_201949_outLine +BABEL_OP3_306_54594_20140528_232952_inLine +BABEL_OP3_306_54594_20140528_232952_outLine +BABEL_OP3_306_54697_20141027_014534_inLine +BABEL_OP3_306_54697_20141027_014534_outLine +BABEL_OP3_306_54697_20141027_015651_inLine +BABEL_OP3_306_54697_20141027_015651_outLine +BABEL_OP3_306_55818_20140603_031605_inLine +BABEL_OP3_306_55818_20140603_031605_outLine +BABEL_OP3_306_55902_20150313_043244_inLine +BABEL_OP3_306_55902_20150313_043244_outLine +BABEL_OP3_306_55968_20140515_005800_inLine +BABEL_OP3_306_55968_20140515_005800_outLine +BABEL_OP3_306_56306_20141111_210052_inLine +BABEL_OP3_306_56306_20141111_210052_outLine +BABEL_OP3_306_56523_20140729_211409_inLine +BABEL_OP3_306_56523_20140729_211409_outLine +BABEL_OP3_306_56826_20141005_005430_inLine +BABEL_OP3_306_56826_20141005_005430_outLine +BABEL_OP3_306_56925_20150214_231609_inLine +BABEL_OP3_306_56925_20150214_231609_outLine +BABEL_OP3_306_57065_20140813_021110_inLine +BABEL_OP3_306_57065_20140813_021110_outLine +BABEL_OP3_306_57654_20140622_013309_inLine +BABEL_OP3_306_57654_20140622_013309_outLine +BABEL_OP3_306_58145_20140724_045437_inLine +BABEL_OP3_306_58145_20140724_045437_outLine +BABEL_OP3_306_58489_20141026_005336_inLine +BABEL_OP3_306_58489_20141026_005336_outLine +BABEL_OP3_306_59078_20141009_004020_inLine +BABEL_OP3_306_59078_20141009_004020_outLine +BABEL_OP3_306_59509_20140805_224009_inLine +BABEL_OP3_306_59509_20140805_224009_outLine +BABEL_OP3_306_59509_20140805_224625_inLine +BABEL_OP3_306_59509_20140805_224625_outLine +BABEL_OP3_306_59898_20150411_024935_inLine +BABEL_OP3_306_59898_20150411_024935_outLine +BABEL_OP3_306_60310_20141004_230555_inLine +BABEL_OP3_306_60310_20141004_230555_outLine +BABEL_OP3_306_60352_20140806_021626_inLine +BABEL_OP3_306_60352_20140806_021626_outLine +BABEL_OP3_306_61219_20140603_003614_inLine +BABEL_OP3_306_61219_20140603_003614_outLine +BABEL_OP3_306_61225_20140515_013438_inLine +BABEL_OP3_306_61225_20140515_013438_outLine +BABEL_OP3_306_61435_20141029_014344_inLine +BABEL_OP3_306_61435_20141029_014344_outLine +BABEL_OP3_306_61438_20140527_213221_inLine +BABEL_OP3_306_61438_20140527_213221_outLine +BABEL_OP3_306_61888_20141104_013244_inLine +BABEL_OP3_306_61888_20141104_013244_outLine +BABEL_OP3_306_62491_20140528_021234_inLine +BABEL_OP3_306_62491_20140528_021234_outLine +BABEL_OP3_306_62724_20141031_231843_inLine +BABEL_OP3_306_62724_20141031_231843_outLine +BABEL_OP3_306_63220_20140811_231222_inLine +BABEL_OP3_306_63220_20140811_231222_outLine +BABEL_OP3_306_63265_20150115_213217_inLine +BABEL_OP3_306_63265_20150115_213217_outLine +BABEL_OP3_306_63671_20150420_041005_inLine +BABEL_OP3_306_63671_20150420_041005_outLine +BABEL_OP3_306_63766_20150226_043203_inLine +BABEL_OP3_306_63766_20150226_043203_outLine +BABEL_OP3_306_64350_20140621_190438_inLine +BABEL_OP3_306_64350_20140621_190438_outLine +BABEL_OP3_306_64398_20140731_012313_inLine +BABEL_OP3_306_64398_20140731_012313_outLine +BABEL_OP3_306_64796_20150407_014947_inLine +BABEL_OP3_306_64796_20150407_014947_outLine +BABEL_OP3_306_65723_20140604_235722_inLine +BABEL_OP3_306_65723_20140604_235722_outLine +BABEL_OP3_306_66350_20150225_020558_inLine +BABEL_OP3_306_66350_20150225_020558_outLine +BABEL_OP3_306_66641_20150422_025109_inLine +BABEL_OP3_306_66641_20150422_025109_outLine +BABEL_OP3_306_66916_20140522_002931_inLine +BABEL_OP3_306_66916_20140522_002931_outLine +BABEL_OP3_306_66967_20140606_235110_inLine +BABEL_OP3_306_66967_20140606_235110_outLine +BABEL_OP3_306_67373_20140624_224214_inLine +BABEL_OP3_306_67373_20140624_224214_outLine +BABEL_OP3_306_67373_20140624_225314_inLine +BABEL_OP3_306_67373_20140624_225314_outLine +BABEL_OP3_306_67401_20140815_032242_inLine +BABEL_OP3_306_67401_20140815_032242_outLine +BABEL_OP3_306_67592_20141028_032006_inLine +BABEL_OP3_306_67592_20141028_032006_outLine +BABEL_OP3_306_67622_20140521_015356_inLine +BABEL_OP3_306_67622_20140521_015356_outLine +BABEL_OP3_306_67726_20140523_010156_inLine +BABEL_OP3_306_67726_20140523_010156_outLine +BABEL_OP3_306_67999_20141031_215535_inLine +BABEL_OP3_306_67999_20141031_215535_outLine +BABEL_OP3_306_68059_20140802_023600_inLine +BABEL_OP3_306_68059_20140802_023600_outLine +BABEL_OP3_306_68068_20141004_180553_inLine +BABEL_OP3_306_68068_20141004_180553_outLine +BABEL_OP3_306_69578_20140729_234354_inLine +BABEL_OP3_306_69578_20140729_234354_outLine +BABEL_OP3_306_70110_20140514_211101_inLine +BABEL_OP3_306_70110_20140514_211101_outLine +BABEL_OP3_306_70110_20140514_221144_inLine +BABEL_OP3_306_70110_20140514_221144_outLine +BABEL_OP3_306_70221_20140801_213304_inLine +BABEL_OP3_306_70221_20140801_213304_outLine +BABEL_OP3_306_70293_20150216_013540_inLine +BABEL_OP3_306_70293_20150216_013540_outLine +BABEL_OP3_306_70639_20140528_035113_inLine +BABEL_OP3_306_70639_20140528_035113_outLine +BABEL_OP3_306_70794_20140520_000549_inLine +BABEL_OP3_306_70794_20140520_000549_outLine +BABEL_OP3_306_71401_20150221_015039_inLine +BABEL_OP3_306_71401_20150221_015039_outLine +BABEL_OP3_306_71976_20140529_033557_inLine +BABEL_OP3_306_71976_20140529_033557_outLine +BABEL_OP3_306_72324_20140724_022916_inLine +BABEL_OP3_306_72324_20140724_022916_outLine +BABEL_OP3_306_72324_20140724_024048_inLine +BABEL_OP3_306_72324_20140724_024048_outLine +BABEL_OP3_306_72587_20140811_040036_inLine +BABEL_OP3_306_72587_20140811_040036_outLine +BABEL_OP3_306_72844_20140518_010610_inLine +BABEL_OP3_306_72844_20140518_010610_outLine +BABEL_OP3_306_73119_20140603_013443_inLine +BABEL_OP3_306_73119_20140603_013443_outLine +BABEL_OP3_306_73511_20141005_215627_inLine +BABEL_OP3_306_73511_20141005_215627_outLine +BABEL_OP3_306_74121_20140920_001224_inLine +BABEL_OP3_306_74121_20140920_001224_outLine +BABEL_OP3_306_74280_20140515_234933_inLine +BABEL_OP3_306_74280_20140515_234933_outLine +BABEL_OP3_306_75505_20140512_201003_inLine +BABEL_OP3_306_75505_20140512_201003_outLine +BABEL_OP3_306_77146_20140521_002843_inLine +BABEL_OP3_306_77146_20140521_002843_outLine +BABEL_OP3_306_77225_20150327_022842_inLine +BABEL_OP3_306_77225_20150327_022842_outLine +BABEL_OP3_306_78194_20140611_232911_inLine +BABEL_OP3_306_78194_20140611_232911_outLine +BABEL_OP3_306_78254_20140602_034556_inLine +BABEL_OP3_306_78254_20140602_034556_outLine +BABEL_OP3_306_78398_20140604_220522_inLine +BABEL_OP3_306_78398_20140604_220522_outLine +BABEL_OP3_306_78511_20141030_232402_inLine +BABEL_OP3_306_78511_20141030_232402_outLine +BABEL_OP3_306_78604_20140625_020654_inLine +BABEL_OP3_306_78604_20140625_020654_outLine +BABEL_OP3_306_79139_20140725_061931_inLine +BABEL_OP3_306_79139_20140725_061931_outLine +BABEL_OP3_306_80306_20140729_235651_inLine +BABEL_OP3_306_80306_20140729_235651_outLine +BABEL_OP3_306_80383_20150325_015939_inLine +BABEL_OP3_306_80383_20150325_015939_outLine +BABEL_OP3_306_80781_20140729_014618_inLine +BABEL_OP3_306_80781_20140729_014618_outLine +BABEL_OP3_306_81287_20141009_184932_inLine +BABEL_OP3_306_81287_20141009_184932_outLine +BABEL_OP3_306_81427_20140723_232926_inLine +BABEL_OP3_306_81427_20140723_232926_outLine +BABEL_OP3_306_81581_20140529_022004_inLine +BABEL_OP3_306_81581_20140529_022004_outLine +BABEL_OP3_306_82035_20140812_211933_inLine +BABEL_OP3_306_82035_20140812_211933_outLine +BABEL_OP3_306_82303_20150313_044844_inLine +BABEL_OP3_306_82303_20150313_044844_outLine +BABEL_OP3_306_82637_20140514_233142_inLine +BABEL_OP3_306_82637_20140514_233142_outLine +BABEL_OP3_306_82935_20141027_220108_inLine +BABEL_OP3_306_82935_20141027_220108_outLine +BABEL_OP3_306_82935_20141027_221034_inLine +BABEL_OP3_306_82935_20141027_221034_outLine +BABEL_OP3_306_83651_20140606_023153_inLine +BABEL_OP3_306_83651_20140606_023153_outLine +BABEL_OP3_306_84430_20150331_191720_inLine +BABEL_OP3_306_84430_20150331_191720_outLine +BABEL_OP3_306_84609_20150401_222657_inLine +BABEL_OP3_306_84609_20150401_222657_outLine +BABEL_OP3_306_84611_20140605_003243_inLine +BABEL_OP3_306_84611_20140605_003243_outLine +BABEL_OP3_306_84768_20150416_212057_inLine +BABEL_OP3_306_84768_20150416_212057_outLine +BABEL_OP3_306_84815_20141101_002538_inLine +BABEL_OP3_306_84815_20141101_002538_outLine +BABEL_OP3_306_85028_20141029_200629_inLine +BABEL_OP3_306_85028_20141029_200629_outLine +BABEL_OP3_306_85647_20140805_005301_inLine +BABEL_OP3_306_85647_20140805_005301_outLine +BABEL_OP3_306_86191_20140603_042134_inLine +BABEL_OP3_306_86191_20140603_042134_outLine +BABEL_OP3_306_86433_20140816_072513_inLine +BABEL_OP3_306_86433_20140816_072513_outLine +BABEL_OP3_306_86472_20140730_223950_inLine +BABEL_OP3_306_86472_20140730_223950_outLine +BABEL_OP3_306_86845_20140524_192542_inLine +BABEL_OP3_306_86845_20140524_192542_outLine +BABEL_OP3_306_86888_20140801_232454_inLine +BABEL_OP3_306_86888_20140801_232454_outLine +BABEL_OP3_306_86952_20140531_040557_inLine +BABEL_OP3_306_86952_20140531_040557_outLine +BABEL_OP3_306_87179_20141029_021040_inLine +BABEL_OP3_306_87179_20141029_021040_outLine +BABEL_OP3_306_87353_20150327_191436_inLine +BABEL_OP3_306_87353_20150327_191436_outLine +BABEL_OP3_306_87884_20141101_223809_inLine +BABEL_OP3_306_87884_20141101_223809_outLine +BABEL_OP3_306_88601_20141003_171755_inLine +BABEL_OP3_306_88601_20141003_171755_outLine +BABEL_OP3_306_88661_20141005_225341_inLine +BABEL_OP3_306_88661_20141005_225341_outLine +BABEL_OP3_306_89045_20140517_213454_inLine +BABEL_OP3_306_89045_20140517_213454_outLine +BABEL_OP3_306_89059_20141104_210223_inLine +BABEL_OP3_306_89059_20141104_210223_outLine +BABEL_OP3_306_89059_20141104_211433_inLine +BABEL_OP3_306_89059_20141104_211433_outLine +BABEL_OP3_306_89358_20141003_194649_inLine +BABEL_OP3_306_89358_20141003_194649_outLine +BABEL_OP3_306_89457_20140730_002520_inLine +BABEL_OP3_306_89457_20140730_002520_outLine +BABEL_OP3_306_90440_20150312_002806_inLine +BABEL_OP3_306_90440_20150312_002806_outLine +BABEL_OP3_306_90737_20140903_235501_inLine +BABEL_OP3_306_90737_20140903_235501_outLine +BABEL_OP3_306_91266_20150215_015545_inLine +BABEL_OP3_306_91266_20150215_015545_outLine +BABEL_OP3_306_91266_20150215_022001_inLine +BABEL_OP3_306_91266_20150215_022001_outLine +BABEL_OP3_306_91463_20140729_001809_inLine +BABEL_OP3_306_91463_20140729_001809_outLine +BABEL_OP3_306_91825_20150408_204309_inLine +BABEL_OP3_306_91825_20150408_204309_outLine +BABEL_OP3_306_91891_20141009_203853_inLine +BABEL_OP3_306_91891_20141009_203853_outLine +BABEL_OP3_306_92440_20150326_232645_inLine +BABEL_OP3_306_92440_20150326_232645_outLine +BABEL_OP3_306_92509_20140521_023136_inLine +BABEL_OP3_306_92509_20140521_023136_outLine +BABEL_OP3_306_92809_20150419_011906_inLine +BABEL_OP3_306_92809_20150419_011906_outLine +BABEL_OP3_306_92941_20140607_001711_inLine +BABEL_OP3_306_92941_20140607_001711_outLine +BABEL_OP3_306_92941_20140607_003034_inLine +BABEL_OP3_306_92941_20140607_003034_outLine +BABEL_OP3_306_93632_20141103_184555_inLine +BABEL_OP3_306_93632_20141103_184555_outLine +BABEL_OP3_306_93946_20141101_211743_inLine +BABEL_OP3_306_93946_20141101_211743_outLine +BABEL_OP3_306_93964_20140730_022556_inLine +BABEL_OP3_306_93964_20140730_022556_outLine +BABEL_OP3_306_94253_20140606_032103_inLine +BABEL_OP3_306_94253_20140606_032103_outLine +BABEL_OP3_306_94409_20141006_205245_inLine +BABEL_OP3_306_94409_20141006_205245_outLine +BABEL_OP3_306_94869_20140515_230712_inLine +BABEL_OP3_306_94869_20140515_230712_outLine +BABEL_OP3_306_94978_20141115_234420_inLine +BABEL_OP3_306_94978_20141115_234420_outLine +BABEL_OP3_306_95124_20150416_012109_inLine +BABEL_OP3_306_95124_20150416_012109_outLine +BABEL_OP3_306_95399_20140905_005504_inLine +BABEL_OP3_306_95399_20140905_005504_outLine +BABEL_OP3_306_95598_20140509_043406_inLine +BABEL_OP3_306_95598_20140509_043406_outLine +BABEL_OP3_306_96730_20141028_230035_inLine +BABEL_OP3_306_96730_20141028_230035_outLine +BABEL_OP3_306_96820_20140802_051525_inLine +BABEL_OP3_306_96820_20140802_051525_outLine +BABEL_OP3_306_97570_20140801_224422_inLine +BABEL_OP3_306_97570_20140801_224422_outLine +BABEL_OP3_306_97588_20140521_051503_inLine +BABEL_OP3_306_97588_20140521_051503_outLine +BABEL_OP3_306_98311_20140604_201838_inLine +BABEL_OP3_306_98311_20140604_201838_outLine +BABEL_OP3_306_99344_20140801_002154_inLine +BABEL_OP3_306_99344_20140801_002154_outLine +BABEL_OP3_306_99920_20140604_212052_inLine +BABEL_OP3_306_99920_20140604_212052_outLine diff --git a/egs/babel/s5d/conf/lists/306-igbo/untranscribed-training.list b/egs/babel/s5d/conf/lists/306-igbo/untranscribed-training.list new file mode 100644 index 00000000000..0369662c6a8 --- /dev/null +++ b/egs/babel/s5d/conf/lists/306-igbo/untranscribed-training.list @@ -0,0 +1,537 @@ +BABEL_OP3_306_10647_20150310_235644_inLine +BABEL_OP3_306_10647_20150310_235644_outLine +BABEL_OP3_306_11310_20140523_212606_inLine +BABEL_OP3_306_11859_20150210_222000_inLine +BABEL_OP3_306_11859_20150210_222000_outLine +BABEL_OP3_306_12220_20140729_004621_inLine +BABEL_OP3_306_12220_20140729_004621_outLine +BABEL_OP3_306_13040_20140621_040421_inLine +BABEL_OP3_306_13040_20140621_040421_outLine +BABEL_OP3_306_13126_20141222_235100_inLine +BABEL_OP3_306_13126_20141222_235100_outLine +BABEL_OP3_306_13189_20141031_050830_inLine +BABEL_OP3_306_13189_20141031_050830_outLine +BABEL_OP3_306_13483_20141007_044436_inLine +BABEL_OP3_306_13483_20141007_044436_outLine +BABEL_OP3_306_14137_20140602_222616_inLine +BABEL_OP3_306_14137_20140602_222616_outLine +BABEL_OP3_306_14179_20140814_232925_inLine +BABEL_OP3_306_14179_20140814_232925_outLine +BABEL_OP3_306_14179_20140814_233948_inLine +BABEL_OP3_306_14179_20140814_233948_outLine +BABEL_OP3_306_15869_20150421_011752_inLine +BABEL_OP3_306_15869_20150421_011752_outLine +BABEL_OP3_306_16249_20150113_220219_inLine +BABEL_OP3_306_16249_20150113_220219_outLine +BABEL_OP3_306_16407_20150227_040000_inLine +BABEL_OP3_306_16407_20150227_040000_outLine +BABEL_OP3_306_16467_20141025_211050_inLine +BABEL_OP3_306_16467_20141025_211050_outLine +BABEL_OP3_306_16802_20150115_213527_inLine +BABEL_OP3_306_16802_20150115_213527_outLine +BABEL_OP3_306_16886_20140731_022140_inLine +BABEL_OP3_306_16886_20140731_022140_outLine +BABEL_OP3_306_17032_20150311_185659_inLine +BABEL_OP3_306_17032_20150311_185659_outLine +BABEL_OP3_306_17496_20140803_035835_inLine +BABEL_OP3_306_17496_20140803_035835_outLine +BABEL_OP3_306_17567_20140803_225538_inLine +BABEL_OP3_306_17567_20140803_225538_outLine +BABEL_OP3_306_17890_20140919_221004_inLine +BABEL_OP3_306_17890_20140919_221004_outLine +BABEL_OP3_306_17923_20140625_232123_inLine +BABEL_OP3_306_17923_20140625_232123_outLine +BABEL_OP3_306_18992_20150303_000227_inLine +BABEL_OP3_306_18992_20150303_000227_outLine +BABEL_OP3_306_19461_20140527_041640_inLine +BABEL_OP3_306_19621_20140803_005045_inLine +BABEL_OP3_306_19621_20140803_005045_outLine +BABEL_OP3_306_19672_20141005_040103_inLine +BABEL_OP3_306_19672_20141005_040103_outLine +BABEL_OP3_306_19672_20141005_040626_inLine +BABEL_OP3_306_19672_20141005_040626_outLine +BABEL_OP3_306_19688_20140524_223141_inLine +BABEL_OP3_306_19703_20140602_002345_inLine +BABEL_OP3_306_19703_20140602_002345_outLine +BABEL_OP3_306_20133_20140514_195807_inLine +BABEL_OP3_306_20133_20140514_195807_outLine +BABEL_OP3_306_20133_20140514_202548_inLine +BABEL_OP3_306_20133_20140514_202548_outLine +BABEL_OP3_306_20682_20141027_012441_inLine +BABEL_OP3_306_20768_20150306_035010_inLine +BABEL_OP3_306_20768_20150306_035010_outLine +BABEL_OP3_306_20800_20140625_005044_inLine +BABEL_OP3_306_20800_20140625_005044_outLine +BABEL_OP3_306_20800_20140625_005605_inLine +BABEL_OP3_306_20800_20140625_005605_outLine +BABEL_OP3_306_21004_20141026_004641_inLine +BABEL_OP3_306_21004_20141026_004641_outLine +BABEL_OP3_306_21029_20140614_210102_outLine +BABEL_OP3_306_21159_20150210_210334_inLine +BABEL_OP3_306_21159_20150210_210334_outLine +BABEL_OP3_306_22216_20140530_234149_inLine +BABEL_OP3_306_22280_20141009_011742_inLine +BABEL_OP3_306_22280_20141009_011742_outLine +BABEL_OP3_306_22321_20140602_210645_inLine +BABEL_OP3_306_22321_20140602_212529_inLine +BABEL_OP3_306_22446_20140531_021922_inLine +BABEL_OP3_306_22466_20140510_200019_inLine +BABEL_OP3_306_22466_20140510_200019_outLine +BABEL_OP3_306_22918_20141101_233512_inLine +BABEL_OP3_306_22918_20141101_233512_outLine +BABEL_OP3_306_22965_20140612_215959_outLine +BABEL_OP3_306_22965_20140612_220852_outLine +BABEL_OP3_306_23151_20150211_025354_inLine +BABEL_OP3_306_23151_20150211_025354_outLine +BABEL_OP3_306_23190_20140729_204900_inLine +BABEL_OP3_306_23190_20140729_204900_outLine +BABEL_OP3_306_23731_20140804_002220_inLine +BABEL_OP3_306_23731_20140804_002220_outLine +BABEL_OP3_306_24470_20141008_233522_inLine +BABEL_OP3_306_24470_20141008_233522_outLine +BABEL_OP3_306_24569_20141101_214133_inLine +BABEL_OP3_306_24569_20141101_214133_outLine +BABEL_OP3_306_24982_20140606_004556_inLine +BABEL_OP3_306_24982_20140606_004556_outLine +BABEL_OP3_306_25012_20140523_200250_inLine +BABEL_OP3_306_25012_20140523_200250_outLine +BABEL_OP3_306_25068_20150203_020803_inLine +BABEL_OP3_306_25068_20150203_020803_outLine +BABEL_OP3_306_25412_20140815_220223_inLine +BABEL_OP3_306_25412_20140815_220223_outLine +BABEL_OP3_306_25895_20150311_050131_outLine +BABEL_OP3_306_26602_20141029_024837_inLine +BABEL_OP3_306_26602_20141029_024837_outLine +BABEL_OP3_306_27203_20140725_002808_inLine +BABEL_OP3_306_27203_20140725_002808_outLine +BABEL_OP3_306_28190_20141031_000818_inLine +BABEL_OP3_306_28190_20141031_000818_outLine +BABEL_OP3_306_28280_20150316_040438_inLine +BABEL_OP3_306_28280_20150316_040438_outLine +BABEL_OP3_306_28522_20140814_224406_inLine +BABEL_OP3_306_28522_20140814_224406_outLine +BABEL_OP3_306_28585_20141028_213521_inLine +BABEL_OP3_306_28585_20141028_213521_outLine +BABEL_OP3_306_28600_20141025_233550_inLine +BABEL_OP3_306_28600_20141025_233550_outLine +BABEL_OP3_306_29076_20140815_025044_inLine +BABEL_OP3_306_29076_20140815_025044_outLine +BABEL_OP3_306_29076_20140815_030534_inLine +BABEL_OP3_306_29076_20140815_030534_outLine +BABEL_OP3_306_29168_20140520_233011_inLine +BABEL_OP3_306_29168_20140520_233011_outLine +BABEL_OP3_306_29168_20140520_234151_inLine +BABEL_OP3_306_29168_20140520_234151_outLine +BABEL_OP3_306_29168_20140520_235529_inLine +BABEL_OP3_306_29168_20140520_235529_outLine +BABEL_OP3_306_29416_20141026_232903_inLine +BABEL_OP3_306_29416_20141026_232903_outLine +BABEL_OP3_306_29482_20150314_051634_inLine +BABEL_OP3_306_29482_20150314_051634_outLine +BABEL_OP3_306_29663_20150311_033510_outLine +BABEL_OP3_306_29911_20140527_033850_inLine +BABEL_OP3_306_29911_20140527_033850_outLine +BABEL_OP3_306_30180_20140728_235122_inLine +BABEL_OP3_306_30180_20140728_235122_outLine +BABEL_OP3_306_30645_20150420_223611_inLine +BABEL_OP3_306_30645_20150420_223611_outLine +BABEL_OP3_306_31624_20140605_214151_inLine +BABEL_OP3_306_31624_20140605_215209_inLine +BABEL_OP3_306_31728_20150401_203654_inLine +BABEL_OP3_306_31728_20150401_203654_outLine +BABEL_OP3_306_32048_20141028_234758_inLine +BABEL_OP3_306_32048_20141028_234758_outLine +BABEL_OP3_306_32380_20150131_012448_inLine +BABEL_OP3_306_32380_20150131_012448_outLine +BABEL_OP3_306_32837_20141031_024422_inLine +BABEL_OP3_306_32837_20141031_024422_outLine +BABEL_OP3_306_33216_20150314_024409_inLine +BABEL_OP3_306_33229_20141029_230937_inLine +BABEL_OP3_306_33229_20141029_230937_outLine +BABEL_OP3_306_34482_20150223_031106_inLine +BABEL_OP3_306_34482_20150223_031106_outLine +BABEL_OP3_306_34811_20140803_015207_inLine +BABEL_OP3_306_34811_20140803_015207_outLine +BABEL_OP3_306_34826_20141028_005224_inLine +BABEL_OP3_306_34826_20141028_005224_outLine +BABEL_OP3_306_35143_20141031_235658_inLine +BABEL_OP3_306_35143_20141031_235658_outLine +BABEL_OP3_306_36039_20141116_001002_outLine +BABEL_OP3_306_37682_20140725_003616_inLine +BABEL_OP3_306_37682_20140725_003616_outLine +BABEL_OP3_306_37853_20141101_023348_inLine +BABEL_OP3_306_37853_20141101_023348_outLine +BABEL_OP3_306_38340_20140611_234929_inLine +BABEL_OP3_306_38340_20140611_234929_outLine +BABEL_OP3_306_38340_20140611_235849_inLine +BABEL_OP3_306_38340_20140611_235849_outLine +BABEL_OP3_306_38588_20140728_230958_inLine +BABEL_OP3_306_38588_20140728_230958_outLine +BABEL_OP3_306_38664_20140730_025027_inLine +BABEL_OP3_306_38664_20140730_025027_outLine +BABEL_OP3_306_38750_20141101_221241_inLine +BABEL_OP3_306_38750_20141101_221241_outLine +BABEL_OP3_306_39307_20140522_010101_inLine +BABEL_OP3_306_39426_20141102_040515_inLine +BABEL_OP3_306_39426_20141102_040515_outLine +BABEL_OP3_306_39579_20150123_014947_inLine +BABEL_OP3_306_39579_20150123_014947_outLine +BABEL_OP3_306_39638_20150418_005151_inLine +BABEL_OP3_306_39638_20150418_005151_outLine +BABEL_OP3_306_39848_20141006_034744_inLine +BABEL_OP3_306_39848_20141006_034744_outLine +BABEL_OP3_306_40557_20141101_025253_inLine +BABEL_OP3_306_40557_20141101_025253_outLine +BABEL_OP3_306_41038_20140812_205140_inLine +BABEL_OP3_306_41038_20140812_205140_outLine +BABEL_OP3_306_41100_20140718_034152_inLine +BABEL_OP3_306_41100_20140718_034152_outLine +BABEL_OP3_306_41100_20140718_040923_inLine +BABEL_OP3_306_41100_20140718_040923_outLine +BABEL_OP3_306_41442_20141026_003328_inLine +BABEL_OP3_306_41442_20141026_003328_outLine +BABEL_OP3_306_41493_20140515_044422_inLine +BABEL_OP3_306_41493_20140515_044422_outLine +BABEL_OP3_306_41609_20150418_225730_inLine +BABEL_OP3_306_41609_20150418_225730_outLine +BABEL_OP3_306_42243_20150408_003626_inLine +BABEL_OP3_306_42243_20150408_003626_outLine +BABEL_OP3_306_42434_20140724_015333_inLine +BABEL_OP3_306_42434_20140724_015333_outLine +BABEL_OP3_306_42497_20140622_023839_inLine +BABEL_OP3_306_42497_20140622_023839_outLine +BABEL_OP3_306_42991_20140808_231227_inLine +BABEL_OP3_306_42991_20140808_231227_outLine +BABEL_OP3_306_42991_20140809_015233_inLine +BABEL_OP3_306_42991_20140809_015233_outLine +BABEL_OP3_306_43285_20140814_222223_inLine +BABEL_OP3_306_43920_20141031_035638_inLine +BABEL_OP3_306_43920_20141031_035638_outLine +BABEL_OP3_306_44477_20140804_041338_inLine +BABEL_OP3_306_44477_20140804_041338_outLine +BABEL_OP3_306_44681_20140528_001629_inLine +BABEL_OP3_306_44681_20140528_001629_outLine +BABEL_OP3_306_44898_20140524_184833_inLine +BABEL_OP3_306_44898_20140524_184833_outLine +BABEL_OP3_306_45374_20150120_011233_inLine +BABEL_OP3_306_45374_20150120_011233_outLine +BABEL_OP3_306_45697_20141031_035336_inLine +BABEL_OP3_306_45697_20141031_035336_outLine +BABEL_OP3_306_46041_20141029_203936_inLine +BABEL_OP3_306_46041_20141029_203936_outLine +BABEL_OP3_306_46041_20141029_210843_inLine +BABEL_OP3_306_46041_20141029_210843_outLine +BABEL_OP3_306_46261_20141009_055048_inLine +BABEL_OP3_306_46261_20141009_055048_outLine +BABEL_OP3_306_46268_20150417_042038_inLine +BABEL_OP3_306_46268_20150417_042038_outLine +BABEL_OP3_306_46589_20141005_224403_inLine +BABEL_OP3_306_46589_20141005_224403_outLine +BABEL_OP3_306_46702_20140517_231741_inLine +BABEL_OP3_306_46702_20140517_231741_outLine +BABEL_OP3_306_46712_20140606_212859_inLine +BABEL_OP3_306_46712_20140606_212859_outLine +BABEL_OP3_306_46712_20140606_214018_inLine +BABEL_OP3_306_46712_20140606_214018_outLine +BABEL_OP3_306_46881_20150403_054836_inLine +BABEL_OP3_306_46881_20150403_054836_outLine +BABEL_OP3_306_48200_20141028_004545_inLine +BABEL_OP3_306_48200_20141028_004545_outLine +BABEL_OP3_306_48422_20141102_004117_inLine +BABEL_OP3_306_48422_20141102_004117_outLine +BABEL_OP3_306_48789_20141015_223422_inLine +BABEL_OP3_306_48789_20141015_223422_outLine +BABEL_OP3_306_48844_20150421_064019_inLine +BABEL_OP3_306_48844_20150421_064019_outLine +BABEL_OP3_306_49118_20141028_000839_inLine +BABEL_OP3_306_49118_20141028_000839_outLine +BABEL_OP3_306_49767_20150312_012314_inLine +BABEL_OP3_306_49767_20150312_012314_outLine +BABEL_OP3_306_49812_20141105_222036_inLine +BABEL_OP3_306_49812_20141105_222036_outLine +BABEL_OP3_306_49907_20140606_231957_inLine +BABEL_OP3_306_49907_20140606_231957_outLine +BABEL_OP3_306_50090_20140804_031708_inLine +BABEL_OP3_306_50090_20140804_031708_outLine +BABEL_OP3_306_50810_20140514_184240_inLine +BABEL_OP3_306_50958_20141016_004240_inLine +BABEL_OP3_306_50958_20141016_004240_outLine +BABEL_OP3_306_50958_20141016_005618_inLine +BABEL_OP3_306_50958_20141016_005618_outLine +BABEL_OP3_306_51484_20141026_005632_inLine +BABEL_OP3_306_51819_20140923_230818_inLine +BABEL_OP3_306_51819_20140923_230818_outLine +BABEL_OP3_306_51858_20150416_031524_inLine +BABEL_OP3_306_51858_20150416_031524_outLine +BABEL_OP3_306_52246_20140730_012314_inLine +BABEL_OP3_306_52246_20140730_012314_outLine +BABEL_OP3_306_52717_20140717_164851_inLine +BABEL_OP3_306_52717_20140717_164851_outLine +BABEL_OP3_306_52818_20140812_202317_inLine +BABEL_OP3_306_52818_20140812_202317_outLine +BABEL_OP3_306_53063_20141102_002734_inLine +BABEL_OP3_306_53063_20141102_002734_outLine +BABEL_OP3_306_54046_20141030_225348_inLine +BABEL_OP3_306_54046_20141030_225348_outLine +BABEL_OP3_306_54074_20140919_194620_inLine +BABEL_OP3_306_54074_20140919_194620_outLine +BABEL_OP3_306_54074_20140919_195619_inLine +BABEL_OP3_306_54074_20140919_195619_outLine +BABEL_OP3_306_54104_20150420_015927_inLine +BABEL_OP3_306_54104_20150420_015927_outLine +BABEL_OP3_306_54953_20140730_001818_inLine +BABEL_OP3_306_54953_20140730_001818_outLine +BABEL_OP3_306_56023_20141029_001317_inLine +BABEL_OP3_306_56023_20141029_001317_outLine +BABEL_OP3_306_56023_20141029_002053_inLine +BABEL_OP3_306_56023_20141029_002053_outLine +BABEL_OP3_306_56023_20141029_003640_inLine +BABEL_OP3_306_56023_20141029_003640_outLine +BABEL_OP3_306_56090_20140511_020343_inLine +BABEL_OP3_306_56090_20140511_020343_outLine +BABEL_OP3_306_56198_20140612_204109_inLine +BABEL_OP3_306_56198_20140612_204109_outLine +BABEL_OP3_306_56326_20140523_001458_inLine +BABEL_OP3_306_56326_20140523_001458_outLine +BABEL_OP3_306_56720_20141006_203142_inLine +BABEL_OP3_306_56720_20141006_203142_outLine +BABEL_OP3_306_57609_20140808_220254_inLine +BABEL_OP3_306_57609_20140808_220254_outLine +BABEL_OP3_306_58850_20140731_002418_inLine +BABEL_OP3_306_58850_20140731_002418_outLine +BABEL_OP3_306_58926_20140605_035534_inLine +BABEL_OP3_306_59549_20140621_221900_inLine +BABEL_OP3_306_59549_20140621_221900_outLine +BABEL_OP3_306_59549_20140621_223133_inLine +BABEL_OP3_306_59549_20140621_223133_outLine +BABEL_OP3_306_59747_20140530_225826_inLine +BABEL_OP3_306_59747_20140530_225826_outLine +BABEL_OP3_306_59747_20140530_231320_inLine +BABEL_OP3_306_59747_20140530_231320_outLine +BABEL_OP3_306_59993_20140606_000233_inLine +BABEL_OP3_306_60626_20140614_202445_inLine +BABEL_OP3_306_60626_20140614_202445_outLine +BABEL_OP3_306_60830_20141006_215349_inLine +BABEL_OP3_306_60830_20141006_215349_outLine +BABEL_OP3_306_61011_20140515_030617_inLine +BABEL_OP3_306_61963_20141028_202812_inLine +BABEL_OP3_306_61963_20141028_202812_outLine +BABEL_OP3_306_62014_20140804_205329_inLine +BABEL_OP3_306_62014_20140804_205329_outLine +BABEL_OP3_306_62047_20141028_035724_inLine +BABEL_OP3_306_62047_20141028_035724_outLine +BABEL_OP3_306_62434_20150414_000517_outLine +BABEL_OP3_306_62810_20150409_183507_inLine +BABEL_OP3_306_62810_20150409_183507_outLine +BABEL_OP3_306_62976_20140811_223219_inLine +BABEL_OP3_306_63084_20140809_013406_inLine +BABEL_OP3_306_63084_20140809_013406_outLine +BABEL_OP3_306_63309_20150417_061125_inLine +BABEL_OP3_306_63309_20150417_061125_outLine +BABEL_OP3_306_63336_20150221_022703_inLine +BABEL_OP3_306_63336_20150221_022703_outLine +BABEL_OP3_306_64065_20140610_210016_inLine +BABEL_OP3_306_64065_20140610_210016_outLine +BABEL_OP3_306_64768_20140604_000427_inLine +BABEL_OP3_306_64768_20140604_000427_outLine +BABEL_OP3_306_65077_20140516_204250_inLine +BABEL_OP3_306_65077_20140516_204250_outLine +BABEL_OP3_306_65692_20140802_044543_inLine +BABEL_OP3_306_65692_20140802_044543_outLine +BABEL_OP3_306_66177_20141104_024434_inLine +BABEL_OP3_306_66177_20141104_024434_outLine +BABEL_OP3_306_67659_20140602_021238_inLine +BABEL_OP3_306_68040_20140802_182145_inLine +BABEL_OP3_306_68385_20140511_024349_inLine +BABEL_OP3_306_68385_20140511_024349_outLine +BABEL_OP3_306_68385_20140511_025326_inLine +BABEL_OP3_306_68385_20140511_025326_outLine +BABEL_OP3_306_68823_20150123_213140_inLine +BABEL_OP3_306_68823_20150123_213140_outLine +BABEL_OP3_306_68910_20150311_040225_inLine +BABEL_OP3_306_68910_20150311_040225_outLine +BABEL_OP3_306_69574_20140517_001243_inLine +BABEL_OP3_306_69574_20140517_001243_outLine +BABEL_OP3_306_70601_20140725_010325_inLine +BABEL_OP3_306_70601_20140725_011335_inLine +BABEL_OP3_306_70726_20150220_234954_inLine +BABEL_OP3_306_70726_20150220_234954_outLine +BABEL_OP3_306_71419_20140526_222116_inLine +BABEL_OP3_306_71419_20140526_222116_outLine +BABEL_OP3_306_71566_20141026_022020_inLine +BABEL_OP3_306_71566_20141026_022020_outLine +BABEL_OP3_306_73022_20141102_005954_inLine +BABEL_OP3_306_73022_20141102_005954_outLine +BABEL_OP3_306_73022_20141102_010949_inLine +BABEL_OP3_306_73022_20141102_010949_outLine +BABEL_OP3_306_73072_20140603_222119_inLine +BABEL_OP3_306_73549_20150312_223219_inLine +BABEL_OP3_306_73549_20150312_223219_outLine +BABEL_OP3_306_74455_20141030_231535_inLine +BABEL_OP3_306_74455_20141030_231535_outLine +BABEL_OP3_306_74667_20140730_220428_inLine +BABEL_OP3_306_74667_20140730_220428_outLine +BABEL_OP3_306_74763_20150422_000233_inLine +BABEL_OP3_306_74763_20150422_000233_outLine +BABEL_OP3_306_74799_20141016_010127_inLine +BABEL_OP3_306_74799_20141016_010127_outLine +BABEL_OP3_306_74921_20140804_005230_inLine +BABEL_OP3_306_74921_20140804_005230_outLine +BABEL_OP3_306_75465_20141025_231951_inLine +BABEL_OP3_306_75465_20141025_231951_outLine +BABEL_OP3_306_76069_20150223_021350_inLine +BABEL_OP3_306_76069_20150223_021350_outLine +BABEL_OP3_306_76238_20141007_011009_inLine +BABEL_OP3_306_76238_20141007_011009_outLine +BABEL_OP3_306_76683_20140813_015005_inLine +BABEL_OP3_306_76683_20140813_015005_outLine +BABEL_OP3_306_76773_20140621_234123_inLine +BABEL_OP3_306_76773_20140621_234123_outLine +BABEL_OP3_306_77033_20141102_032017_inLine +BABEL_OP3_306_77033_20141102_032017_outLine +BABEL_OP3_306_77427_20140803_024549_inLine +BABEL_OP3_306_77427_20140803_024549_outLine +BABEL_OP3_306_78609_20141029_012144_inLine +BABEL_OP3_306_78609_20141029_012144_outLine +BABEL_OP3_306_79571_20140814_212942_inLine +BABEL_OP3_306_79571_20140814_212942_outLine +BABEL_OP3_306_79590_20141006_195244_inLine +BABEL_OP3_306_79590_20141006_195244_outLine +BABEL_OP3_306_79590_20141006_200315_inLine +BABEL_OP3_306_79590_20141006_200315_outLine +BABEL_OP3_306_80439_20140605_000944_inLine +BABEL_OP3_306_80439_20140605_000944_outLine +BABEL_OP3_306_80559_20140625_032329_inLine +BABEL_OP3_306_80559_20140625_032329_outLine +BABEL_OP3_306_80897_20140725_020057_inLine +BABEL_OP3_306_80897_20140725_020057_outLine +BABEL_OP3_306_81229_20140730_223530_inLine +BABEL_OP3_306_81229_20140730_223530_outLine +BABEL_OP3_306_81404_20140725_025731_inLine +BABEL_OP3_306_81810_20140728_223725_inLine +BABEL_OP3_306_81971_20140509_013738_inLine +BABEL_OP3_306_82138_20140730_174109_inLine +BABEL_OP3_306_82138_20140730_174109_outLine +BABEL_OP3_306_82425_20140714_035045_inLine +BABEL_OP3_306_82425_20140714_035045_outLine +BABEL_OP3_306_82496_20150418_234759_inLine +BABEL_OP3_306_82496_20150418_234759_outLine +BABEL_OP3_306_82622_20150411_050327_inLine +BABEL_OP3_306_82622_20150411_050327_outLine +BABEL_OP3_306_82979_20140612_012812_outLine +BABEL_OP3_306_83238_20140809_203535_inLine +BABEL_OP3_306_83238_20140809_203535_outLine +BABEL_OP3_306_83238_20140809_205023_inLine +BABEL_OP3_306_83238_20140809_205023_outLine +BABEL_OP3_306_83775_20140724_231716_inLine +BABEL_OP3_306_83813_20140528_211112_inLine +BABEL_OP3_306_83813_20140528_211112_outLine +BABEL_OP3_306_84061_20140730_005053_inLine +BABEL_OP3_306_84061_20140730_005053_outLine +BABEL_OP3_306_84177_20150214_011945_inLine +BABEL_OP3_306_84177_20150214_011945_outLine +BABEL_OP3_306_84327_20141006_211803_inLine +BABEL_OP3_306_84327_20141006_211803_outLine +BABEL_OP3_306_84408_20140729_231948_inLine +BABEL_OP3_306_84408_20140729_231948_outLine +BABEL_OP3_306_84737_20141031_010833_inLine +BABEL_OP3_306_84737_20141031_010833_outLine +BABEL_OP3_306_84823_20141006_034008_inLine +BABEL_OP3_306_84823_20141006_034008_outLine +BABEL_OP3_306_84838_20141029_023621_inLine +BABEL_OP3_306_84838_20141029_023621_outLine +BABEL_OP3_306_85179_20141101_012428_inLine +BABEL_OP3_306_85179_20141101_012428_outLine +BABEL_OP3_306_85322_20150420_034604_inLine +BABEL_OP3_306_85322_20150420_034604_outLine +BABEL_OP3_306_86100_20150328_002625_inLine +BABEL_OP3_306_86100_20150328_002625_outLine +BABEL_OP3_306_86830_20141031_030135_inLine +BABEL_OP3_306_86830_20141031_030135_outLine +BABEL_OP3_306_87073_20140516_232026_inLine +BABEL_OP3_306_87073_20140516_232026_outLine +BABEL_OP3_306_87470_20140729_214135_inLine +BABEL_OP3_306_87470_20140729_214135_outLine +BABEL_OP3_306_87796_20140816_000301_inLine +BABEL_OP3_306_87796_20140816_000301_outLine +BABEL_OP3_306_88260_20140725_033250_inLine +BABEL_OP3_306_88260_20140725_033250_outLine +BABEL_OP3_306_88394_20140525_002127_inLine +BABEL_OP3_306_88394_20140525_002127_outLine +BABEL_OP3_306_88669_20140802_011238_inLine +BABEL_OP3_306_88669_20140802_011238_outLine +BABEL_OP3_306_88669_20140802_011732_inLine +BABEL_OP3_306_88669_20140802_011732_outLine +BABEL_OP3_306_88669_20140802_012458_inLine +BABEL_OP3_306_88669_20140802_012458_outLine +BABEL_OP3_306_88673_20140731_231306_inLine +BABEL_OP3_306_88673_20140731_231306_outLine +BABEL_OP3_306_88783_20141031_212634_inLine +BABEL_OP3_306_88783_20141031_212634_outLine +BABEL_OP3_306_88938_20141102_003357_inLine +BABEL_OP3_306_88938_20141102_003357_outLine +BABEL_OP3_306_89372_20140516_004539_inLine +BABEL_OP3_306_89372_20140516_004539_outLine +BABEL_OP3_306_89560_20141029_203632_inLine +BABEL_OP3_306_89560_20141029_203632_outLine +BABEL_OP3_306_89650_20150331_011100_inLine +BABEL_OP3_306_89650_20150331_011100_outLine +BABEL_OP3_306_89665_20140725_015846_inLine +BABEL_OP3_306_89665_20140725_015846_outLine +BABEL_OP3_306_89695_20141006_020223_inLine +BABEL_OP3_306_89695_20141006_020223_outLine +BABEL_OP3_306_89794_20140813_221738_inLine +BABEL_OP3_306_89794_20140813_221738_outLine +BABEL_OP3_306_89943_20140607_005926_inLine +BABEL_OP3_306_89943_20140607_005926_outLine +BABEL_OP3_306_90347_20140814_172652_inLine +BABEL_OP3_306_90347_20140814_172652_outLine +BABEL_OP3_306_91125_20140522_213937_inLine +BABEL_OP3_306_91125_20140522_213937_outLine +BABEL_OP3_306_91125_20140522_214703_inLine +BABEL_OP3_306_91125_20140522_214703_outLine +BABEL_OP3_306_91319_20141028_013449_inLine +BABEL_OP3_306_91319_20141028_013449_outLine +BABEL_OP3_306_91971_20150331_203936_inLine +BABEL_OP3_306_91971_20150331_203936_outLine +BABEL_OP3_306_91977_20141004_202232_inLine +BABEL_OP3_306_91977_20141004_202232_outLine +BABEL_OP3_306_92252_20150327_024334_inLine +BABEL_OP3_306_92557_20141031_213221_inLine +BABEL_OP3_306_92557_20141031_213221_outLine +BABEL_OP3_306_92740_20141004_182215_inLine +BABEL_OP3_306_92740_20141004_182215_outLine +BABEL_OP3_306_92886_20140611_015551_inLine +BABEL_OP3_306_92886_20140611_015551_outLine +BABEL_OP3_306_93007_20150314_033427_outLine +BABEL_OP3_306_93475_20140625_235211_inLine +BABEL_OP3_306_93475_20140625_235211_outLine +BABEL_OP3_306_94166_20141102_014755_inLine +BABEL_OP3_306_94166_20141102_014755_outLine +BABEL_OP3_306_94333_20150418_031427_inLine +BABEL_OP3_306_94333_20150418_031427_outLine +BABEL_OP3_306_95446_20141028_001455_inLine +BABEL_OP3_306_95446_20141028_001455_outLine +BABEL_OP3_306_96077_20150327_033005_inLine +BABEL_OP3_306_96077_20150327_033005_outLine +BABEL_OP3_306_96190_20140614_223920_inLine +BABEL_OP3_306_96190_20140614_223920_outLine +BABEL_OP3_306_96584_20141104_034807_inLine +BABEL_OP3_306_97264_20141028_220710_inLine +BABEL_OP3_306_97264_20141028_220710_outLine +BABEL_OP3_306_97363_20140612_224303_inLine +BABEL_OP3_306_97363_20140612_224303_outLine +BABEL_OP3_306_97557_20140802_234323_inLine +BABEL_OP3_306_97557_20140802_234323_outLine +BABEL_OP3_306_97557_20140802_235634_inLine +BABEL_OP3_306_97557_20140802_235634_outLine +BABEL_OP3_306_97896_20140731_015336_inLine +BABEL_OP3_306_97896_20140731_015336_outLine +BABEL_OP3_306_97988_20141101_013315_inLine +BABEL_OP3_306_97988_20141101_013315_outLine +BABEL_OP3_306_98356_20140929_235521_inLine +BABEL_OP3_306_98356_20140929_235521_outLine +BABEL_OP3_306_98565_20150327_040438_inLine +BABEL_OP3_306_98565_20150327_040438_outLine +BABEL_OP3_306_98888_20141006_032811_inLine +BABEL_OP3_306_98888_20141006_032811_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/dev.2h.list b/egs/babel/s5d/conf/lists/307-amharic/dev.2h.list new file mode 100644 index 00000000000..933a75246bc --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/dev.2h.list @@ -0,0 +1,123 @@ +BABEL_OP3_307_11096_20140823_004817_inLine +BABEL_OP3_307_11096_20140823_004817_outLine +BABEL_OP3_307_13030_20140510_014335_inLine +BABEL_OP3_307_13030_20140510_014335_outLine +BABEL_OP3_307_14440_20140601_192635_inLine +BABEL_OP3_307_14440_20140601_192635_outLine +BABEL_OP3_307_15324_20140531_195640_inLine +BABEL_OP3_307_15324_20140531_195640_outLine +BABEL_OP3_307_15848_20140414_191259_inLine +BABEL_OP3_307_15848_20140414_191259_outLine +BABEL_OP3_307_16601_20140616_191918_inLine +BABEL_OP3_307_16601_20140616_191918_outLine +BABEL_OP3_307_17280_20140509_005048_inLine +BABEL_OP3_307_17280_20140509_005048_outLine +BABEL_OP3_307_17881_20140721_204147_inLine +BABEL_OP3_307_17881_20140721_204147_outLine +BABEL_OP3_307_18766_20140725_193025_inLine +BABEL_OP3_307_18766_20140725_193025_outLine +BABEL_OP3_307_19621_20140517_232031_inLine +BABEL_OP3_307_19621_20140517_232031_outLine +BABEL_OP3_307_19782_20140702_230513_inLine +BABEL_OP3_307_19782_20140702_230513_outLine +BABEL_OP3_307_21029_20140430_192710_inLine +BABEL_OP3_307_21029_20140430_192710_outLine +BABEL_OP3_307_28871_20140414_214155_inLine +BABEL_OP3_307_28871_20140414_214155_outLine +BABEL_OP3_307_29168_20140415_202128_inLine +BABEL_OP3_307_29168_20140415_202128_outLine +BABEL_OP3_307_29765_20140823_220912_inLine +BABEL_OP3_307_29765_20140823_220912_outLine +BABEL_OP3_307_30280_20140909_000751_outLine +BABEL_OP3_307_32048_20140705_013312_inLine +BABEL_OP3_307_32048_20140705_013312_outLine +BABEL_OP3_307_32708_20140429_224318_inLine +BABEL_OP3_307_32708_20140429_224318_outLine +BABEL_OP3_307_36219_20140405_235707_inLine +BABEL_OP3_307_36219_20140405_235707_outLine +BABEL_OP3_307_37285_20140618_224046_inLine +BABEL_OP3_307_37285_20140618_224046_outLine +BABEL_OP3_307_41741_20140422_000845_inLine +BABEL_OP3_307_41741_20140422_000845_outLine +BABEL_OP3_307_42848_20140822_203249_inLine +BABEL_OP3_307_42848_20140822_203249_outLine +BABEL_OP3_307_42883_20140823_230118_inLine +BABEL_OP3_307_42883_20140823_230118_outLine +BABEL_OP3_307_44619_20140405_193041_inLine +BABEL_OP3_307_44619_20140405_193041_outLine +BABEL_OP3_307_44961_20140421_215913_inLine +BABEL_OP3_307_44961_20140421_215913_outLine +BABEL_OP3_307_46625_20140414_224528_inLine +BABEL_OP3_307_46625_20140414_224528_outLine +BABEL_OP3_307_47799_20140902_200301_inLine +BABEL_OP3_307_47799_20140902_200301_outLine +BABEL_OP3_307_49902_20140510_004310_inLine +BABEL_OP3_307_49902_20140510_004310_outLine +BABEL_OP3_307_50090_20140531_225332_inLine +BABEL_OP3_307_50090_20140531_225332_outLine +BABEL_OP3_307_52438_20140429_232836_inLine +BABEL_OP3_307_52438_20140429_232836_outLine +BABEL_OP3_307_54160_20140402_232820_inLine +BABEL_OP3_307_54160_20140402_232820_outLine +BABEL_OP3_307_58717_20140518_204047_inLine +BABEL_OP3_307_58717_20140518_204047_outLine +BABEL_OP3_307_60498_20140823_192847_inLine +BABEL_OP3_307_60498_20140823_192847_outLine +BABEL_OP3_307_61011_20140415_180846_inLine +BABEL_OP3_307_61011_20140415_180846_outLine +BABEL_OP3_307_61011_20140415_181727_inLine +BABEL_OP3_307_61011_20140415_181727_outLine +BABEL_OP3_307_61357_20140602_184817_inLine +BABEL_OP3_307_61357_20140602_184817_outLine +BABEL_OP3_307_62200_20140505_000149_inLine +BABEL_OP3_307_62200_20140505_000149_outLine +BABEL_OP3_307_62286_20140503_220651_inLine +BABEL_OP3_307_62286_20140503_220651_outLine +BABEL_OP3_307_64870_20140518_011602_inLine +BABEL_OP3_307_64870_20140518_011602_outLine +BABEL_OP3_307_65692_20140517_182352_inLine +BABEL_OP3_307_65692_20140517_182352_outLine +BABEL_OP3_307_66519_20140510_212511_inLine +BABEL_OP3_307_66519_20140510_212511_outLine +BABEL_OP3_307_69153_20140624_193324_inLine +BABEL_OP3_307_69153_20140624_193324_outLine +BABEL_OP3_307_69633_20140607_233440_inLine +BABEL_OP3_307_69633_20140607_233440_outLine +BABEL_OP3_307_71038_20140712_000601_inLine +BABEL_OP3_307_71038_20140712_000601_outLine +BABEL_OP3_307_73757_20140512_231155_inLine +BABEL_OP3_307_73757_20140512_231155_outLine +BABEL_OP3_307_76372_20140721_231708_inLine +BABEL_OP3_307_76372_20140721_231708_outLine +BABEL_OP3_307_81553_20140707_003952_inLine +BABEL_OP3_307_81553_20140707_003952_outLine +BABEL_OP3_307_85439_20140814_215435_inLine +BABEL_OP3_307_85439_20140814_215435_outLine +BABEL_OP3_307_88550_20140809_212521_inLine +BABEL_OP3_307_88550_20140809_212521_outLine +BABEL_OP3_307_88601_20140512_171733_inLine +BABEL_OP3_307_88601_20140512_171733_outLine +BABEL_OP3_307_89888_20140520_191659_inLine +BABEL_OP3_307_89888_20140520_191659_outLine +BABEL_OP3_307_90777_20140507_231811_inLine +BABEL_OP3_307_90777_20140507_231811_outLine +BABEL_OP3_307_92176_20140515_231853_inLine +BABEL_OP3_307_92176_20140515_231853_outLine +BABEL_OP3_307_92643_20140806_220922_inLine +BABEL_OP3_307_92643_20140806_220922_outLine +BABEL_OP3_307_92886_20140430_194417_inLine +BABEL_OP3_307_92886_20140430_194417_outLine +BABEL_OP3_307_93320_20140823_214255_inLine +BABEL_OP3_307_93320_20140823_214255_outLine +BABEL_OP3_307_94002_20140511_172143_inLine +BABEL_OP3_307_94002_20140511_172143_outLine +BABEL_OP3_307_94237_20140814_181922_inLine +BABEL_OP3_307_94237_20140814_181922_outLine +BABEL_OP3_307_95124_20140828_224047_inLine +BABEL_OP3_307_95124_20140828_224047_outLine +BABEL_OP3_307_96940_20140901_181148_inLine +BABEL_OP3_307_96940_20140901_181148_outLine +BABEL_OP3_307_96985_20140503_190037_inLine +BABEL_OP3_307_96985_20140503_190037_outLine +BABEL_OP3_307_98506_20140807_170934_inLine +BABEL_OP3_307_98506_20140807_170934_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/dev.list b/egs/babel/s5d/conf/lists/307-amharic/dev.list new file mode 100644 index 00000000000..933a75246bc --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/dev.list @@ -0,0 +1,123 @@ +BABEL_OP3_307_11096_20140823_004817_inLine +BABEL_OP3_307_11096_20140823_004817_outLine +BABEL_OP3_307_13030_20140510_014335_inLine +BABEL_OP3_307_13030_20140510_014335_outLine +BABEL_OP3_307_14440_20140601_192635_inLine +BABEL_OP3_307_14440_20140601_192635_outLine +BABEL_OP3_307_15324_20140531_195640_inLine +BABEL_OP3_307_15324_20140531_195640_outLine +BABEL_OP3_307_15848_20140414_191259_inLine +BABEL_OP3_307_15848_20140414_191259_outLine +BABEL_OP3_307_16601_20140616_191918_inLine +BABEL_OP3_307_16601_20140616_191918_outLine +BABEL_OP3_307_17280_20140509_005048_inLine +BABEL_OP3_307_17280_20140509_005048_outLine +BABEL_OP3_307_17881_20140721_204147_inLine +BABEL_OP3_307_17881_20140721_204147_outLine +BABEL_OP3_307_18766_20140725_193025_inLine +BABEL_OP3_307_18766_20140725_193025_outLine +BABEL_OP3_307_19621_20140517_232031_inLine +BABEL_OP3_307_19621_20140517_232031_outLine +BABEL_OP3_307_19782_20140702_230513_inLine +BABEL_OP3_307_19782_20140702_230513_outLine +BABEL_OP3_307_21029_20140430_192710_inLine +BABEL_OP3_307_21029_20140430_192710_outLine +BABEL_OP3_307_28871_20140414_214155_inLine +BABEL_OP3_307_28871_20140414_214155_outLine +BABEL_OP3_307_29168_20140415_202128_inLine +BABEL_OP3_307_29168_20140415_202128_outLine +BABEL_OP3_307_29765_20140823_220912_inLine +BABEL_OP3_307_29765_20140823_220912_outLine +BABEL_OP3_307_30280_20140909_000751_outLine +BABEL_OP3_307_32048_20140705_013312_inLine +BABEL_OP3_307_32048_20140705_013312_outLine +BABEL_OP3_307_32708_20140429_224318_inLine +BABEL_OP3_307_32708_20140429_224318_outLine +BABEL_OP3_307_36219_20140405_235707_inLine +BABEL_OP3_307_36219_20140405_235707_outLine +BABEL_OP3_307_37285_20140618_224046_inLine +BABEL_OP3_307_37285_20140618_224046_outLine +BABEL_OP3_307_41741_20140422_000845_inLine +BABEL_OP3_307_41741_20140422_000845_outLine +BABEL_OP3_307_42848_20140822_203249_inLine +BABEL_OP3_307_42848_20140822_203249_outLine +BABEL_OP3_307_42883_20140823_230118_inLine +BABEL_OP3_307_42883_20140823_230118_outLine +BABEL_OP3_307_44619_20140405_193041_inLine +BABEL_OP3_307_44619_20140405_193041_outLine +BABEL_OP3_307_44961_20140421_215913_inLine +BABEL_OP3_307_44961_20140421_215913_outLine +BABEL_OP3_307_46625_20140414_224528_inLine +BABEL_OP3_307_46625_20140414_224528_outLine +BABEL_OP3_307_47799_20140902_200301_inLine +BABEL_OP3_307_47799_20140902_200301_outLine +BABEL_OP3_307_49902_20140510_004310_inLine +BABEL_OP3_307_49902_20140510_004310_outLine +BABEL_OP3_307_50090_20140531_225332_inLine +BABEL_OP3_307_50090_20140531_225332_outLine +BABEL_OP3_307_52438_20140429_232836_inLine +BABEL_OP3_307_52438_20140429_232836_outLine +BABEL_OP3_307_54160_20140402_232820_inLine +BABEL_OP3_307_54160_20140402_232820_outLine +BABEL_OP3_307_58717_20140518_204047_inLine +BABEL_OP3_307_58717_20140518_204047_outLine +BABEL_OP3_307_60498_20140823_192847_inLine +BABEL_OP3_307_60498_20140823_192847_outLine +BABEL_OP3_307_61011_20140415_180846_inLine +BABEL_OP3_307_61011_20140415_180846_outLine +BABEL_OP3_307_61011_20140415_181727_inLine +BABEL_OP3_307_61011_20140415_181727_outLine +BABEL_OP3_307_61357_20140602_184817_inLine +BABEL_OP3_307_61357_20140602_184817_outLine +BABEL_OP3_307_62200_20140505_000149_inLine +BABEL_OP3_307_62200_20140505_000149_outLine +BABEL_OP3_307_62286_20140503_220651_inLine +BABEL_OP3_307_62286_20140503_220651_outLine +BABEL_OP3_307_64870_20140518_011602_inLine +BABEL_OP3_307_64870_20140518_011602_outLine +BABEL_OP3_307_65692_20140517_182352_inLine +BABEL_OP3_307_65692_20140517_182352_outLine +BABEL_OP3_307_66519_20140510_212511_inLine +BABEL_OP3_307_66519_20140510_212511_outLine +BABEL_OP3_307_69153_20140624_193324_inLine +BABEL_OP3_307_69153_20140624_193324_outLine +BABEL_OP3_307_69633_20140607_233440_inLine +BABEL_OP3_307_69633_20140607_233440_outLine +BABEL_OP3_307_71038_20140712_000601_inLine +BABEL_OP3_307_71038_20140712_000601_outLine +BABEL_OP3_307_73757_20140512_231155_inLine +BABEL_OP3_307_73757_20140512_231155_outLine +BABEL_OP3_307_76372_20140721_231708_inLine +BABEL_OP3_307_76372_20140721_231708_outLine +BABEL_OP3_307_81553_20140707_003952_inLine +BABEL_OP3_307_81553_20140707_003952_outLine +BABEL_OP3_307_85439_20140814_215435_inLine +BABEL_OP3_307_85439_20140814_215435_outLine +BABEL_OP3_307_88550_20140809_212521_inLine +BABEL_OP3_307_88550_20140809_212521_outLine +BABEL_OP3_307_88601_20140512_171733_inLine +BABEL_OP3_307_88601_20140512_171733_outLine +BABEL_OP3_307_89888_20140520_191659_inLine +BABEL_OP3_307_89888_20140520_191659_outLine +BABEL_OP3_307_90777_20140507_231811_inLine +BABEL_OP3_307_90777_20140507_231811_outLine +BABEL_OP3_307_92176_20140515_231853_inLine +BABEL_OP3_307_92176_20140515_231853_outLine +BABEL_OP3_307_92643_20140806_220922_inLine +BABEL_OP3_307_92643_20140806_220922_outLine +BABEL_OP3_307_92886_20140430_194417_inLine +BABEL_OP3_307_92886_20140430_194417_outLine +BABEL_OP3_307_93320_20140823_214255_inLine +BABEL_OP3_307_93320_20140823_214255_outLine +BABEL_OP3_307_94002_20140511_172143_inLine +BABEL_OP3_307_94002_20140511_172143_outLine +BABEL_OP3_307_94237_20140814_181922_inLine +BABEL_OP3_307_94237_20140814_181922_outLine +BABEL_OP3_307_95124_20140828_224047_inLine +BABEL_OP3_307_95124_20140828_224047_outLine +BABEL_OP3_307_96940_20140901_181148_inLine +BABEL_OP3_307_96940_20140901_181148_outLine +BABEL_OP3_307_96985_20140503_190037_inLine +BABEL_OP3_307_96985_20140503_190037_outLine +BABEL_OP3_307_98506_20140807_170934_inLine +BABEL_OP3_307_98506_20140807_170934_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/eval.list b/egs/babel/s5d/conf/lists/307-amharic/eval.list new file mode 100644 index 00000000000..9687fe69738 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/eval.list @@ -0,0 +1,186 @@ +BABEL_OP3_307_10319_20140417_201202_inLine +BABEL_OP3_307_10319_20140417_201202_outLine +BABEL_OP3_307_12846_20140820_004747_inLine +BABEL_OP3_307_12846_20140820_004747_outLine +BABEL_OP3_307_13040_20140519_010732_inLine +BABEL_OP3_307_13040_20140519_010732_outLine +BABEL_OP3_307_13427_20140517_185634_inLine +BABEL_OP3_307_13427_20140517_185634_outLine +BABEL_OP3_307_15617_20140902_211446_inLine +BABEL_OP3_307_15617_20140902_211446_outLine +BABEL_OP3_307_16056_20140403_224737_inLine +BABEL_OP3_307_16056_20140403_224737_outLine +BABEL_OP3_307_16787_20140504_192345_inLine +BABEL_OP3_307_16787_20140504_192345_outLine +BABEL_OP3_307_16787_20140504_193044_inLine +BABEL_OP3_307_16787_20140504_193044_outLine +BABEL_OP3_307_18242_20140822_194420_inLine +BABEL_OP3_307_18242_20140822_194420_outLine +BABEL_OP3_307_19672_20140610_182836_inLine +BABEL_OP3_307_19672_20140610_182836_outLine +BABEL_OP3_307_20738_20140714_223501_inLine +BABEL_OP3_307_20738_20140714_223501_outLine +BABEL_OP3_307_20800_20140501_001836_inLine +BABEL_OP3_307_20800_20140501_001836_outLine +BABEL_OP3_307_21581_20140510_172450_inLine +BABEL_OP3_307_21581_20140510_172450_outLine +BABEL_OP3_307_22641_20140417_190251_inLine +BABEL_OP3_307_22641_20140417_190251_outLine +BABEL_OP3_307_23260_20140809_221233_inLine +BABEL_OP3_307_23260_20140809_221233_outLine +BABEL_OP3_307_23983_20140814_233159_inLine +BABEL_OP3_307_23983_20140814_233159_outLine +BABEL_OP3_307_24033_20140705_202406_inLine +BABEL_OP3_307_24033_20140705_202406_outLine +BABEL_OP3_307_26072_20140707_234609_inLine +BABEL_OP3_307_26072_20140707_234609_outLine +BABEL_OP3_307_28585_20140703_170913_inLine +BABEL_OP3_307_28585_20140703_170913_outLine +BABEL_OP3_307_28606_20140617_001826_inLine +BABEL_OP3_307_28606_20140617_001826_outLine +BABEL_OP3_307_31668_20140827_172922_inLine +BABEL_OP3_307_31668_20140827_172922_outLine +BABEL_OP3_307_33635_20140508_230911_inLine +BABEL_OP3_307_33635_20140508_230911_outLine +BABEL_OP3_307_33659_20140824_234408_inLine +BABEL_OP3_307_33659_20140824_234408_outLine +BABEL_OP3_307_34486_20140824_163426_inLine +BABEL_OP3_307_34486_20140824_163426_outLine +BABEL_OP3_307_34564_20140703_183530_inLine +BABEL_OP3_307_34564_20140703_183530_outLine +BABEL_OP3_307_34713_20140903_004337_inLine +BABEL_OP3_307_34713_20140903_004337_outLine +BABEL_OP3_307_35202_20140609_172217_inLine +BABEL_OP3_307_35202_20140609_172217_outLine +BABEL_OP3_307_35609_20140907_195928_inLine +BABEL_OP3_307_35609_20140907_195928_outLine +BABEL_OP3_307_36017_20140811_180507_inLine +BABEL_OP3_307_36017_20140811_180507_outLine +BABEL_OP3_307_37064_20140405_195726_inLine +BABEL_OP3_307_37064_20140405_195726_outLine +BABEL_OP3_307_41745_20140508_193418_inLine +BABEL_OP3_307_41745_20140508_193418_outLine +BABEL_OP3_307_42231_20140616_222234_inLine +BABEL_OP3_307_42231_20140616_222234_outLine +BABEL_OP3_307_43285_20140607_212542_inLine +BABEL_OP3_307_43285_20140607_212542_outLine +BABEL_OP3_307_44420_20140503_221325_inLine +BABEL_OP3_307_44420_20140503_221325_outLine +BABEL_OP3_307_44847_20140527_221753_inLine +BABEL_OP3_307_44847_20140527_221753_outLine +BABEL_OP3_307_45106_20140530_183351_inLine +BABEL_OP3_307_45106_20140530_183351_outLine +BABEL_OP3_307_45777_20140506_181506_inLine +BABEL_OP3_307_45777_20140506_181506_outLine +BABEL_OP3_307_47877_20140705_224331_inLine +BABEL_OP3_307_47877_20140705_224331_outLine +BABEL_OP3_307_47959_20140505_185302_inLine +BABEL_OP3_307_47959_20140505_185302_outLine +BABEL_OP3_307_48399_20140403_003150_inLine +BABEL_OP3_307_48399_20140403_003150_outLine +BABEL_OP3_307_49637_20140417_211134_inLine +BABEL_OP3_307_49637_20140417_211134_outLine +BABEL_OP3_307_50175_20140415_222418_inLine +BABEL_OP3_307_50175_20140415_222418_outLine +BABEL_OP3_307_50630_20140609_215223_inLine +BABEL_OP3_307_50630_20140609_215223_outLine +BABEL_OP3_307_51858_20140829_174031_inLine +BABEL_OP3_307_51858_20140829_174031_outLine +BABEL_OP3_307_52694_20140519_182152_inLine +BABEL_OP3_307_52694_20140519_182152_outLine +BABEL_OP3_307_53072_20140810_001530_inLine +BABEL_OP3_307_53072_20140810_001530_outLine +BABEL_OP3_307_54405_20140517_202903_inLine +BABEL_OP3_307_54405_20140517_202903_outLine +BABEL_OP3_307_57609_20140519_194402_inLine +BABEL_OP3_307_57609_20140519_194402_outLine +BABEL_OP3_307_60307_20140907_225330_inLine +BABEL_OP3_307_60307_20140907_225330_outLine +BABEL_OP3_307_60538_20140423_174547_inLine +BABEL_OP3_307_60538_20140423_174547_outLine +BABEL_OP3_307_62362_20140824_175404_inLine +BABEL_OP3_307_62362_20140824_175404_outLine +BABEL_OP3_307_62852_20140416_014025_inLine +BABEL_OP3_307_62852_20140416_014025_outLine +BABEL_OP3_307_63309_20140828_003208_inLine +BABEL_OP3_307_63309_20140828_003208_outLine +BABEL_OP3_307_63445_20140401_225339_inLine +BABEL_OP3_307_63445_20140401_225339_outLine +BABEL_OP3_307_64494_20140430_224138_inLine +BABEL_OP3_307_64494_20140430_224138_outLine +BABEL_OP3_307_64638_20140609_213059_inLine +BABEL_OP3_307_64638_20140609_213059_outLine +BABEL_OP3_307_65252_20140813_202634_inLine +BABEL_OP3_307_65252_20140813_202634_outLine +BABEL_OP3_307_65370_20140907_174141_inLine +BABEL_OP3_307_65370_20140907_174141_outLine +BABEL_OP3_307_67794_20140430_211624_inLine +BABEL_OP3_307_67794_20140430_211624_outLine +BABEL_OP3_307_67794_20140430_212806_inLine +BABEL_OP3_307_67794_20140430_212806_outLine +BABEL_OP3_307_70110_20140414_223000_inLine +BABEL_OP3_307_70110_20140414_223000_outLine +BABEL_OP3_307_73042_20140403_013739_inLine +BABEL_OP3_307_73042_20140403_013739_outLine +BABEL_OP3_307_75460_20140821_232032_inLine +BABEL_OP3_307_75460_20140821_232032_outLine +BABEL_OP3_307_76773_20140403_224239_inLine +BABEL_OP3_307_76773_20140403_224239_outLine +BABEL_OP3_307_77112_20140405_232547_inLine +BABEL_OP3_307_77112_20140405_232547_outLine +BABEL_OP3_307_77391_20140404_205514_inLine +BABEL_OP3_307_77391_20140404_205514_outLine +BABEL_OP3_307_79820_20140404_235700_inLine +BABEL_OP3_307_79820_20140404_235700_outLine +BABEL_OP3_307_80897_20140605_185417_inLine +BABEL_OP3_307_80897_20140605_185417_outLine +BABEL_OP3_307_82361_20140811_190547_inLine +BABEL_OP3_307_82361_20140811_190547_outLine +BABEL_OP3_307_82966_20140704_224020_inLine +BABEL_OP3_307_82966_20140704_224020_outLine +BABEL_OP3_307_83062_20140730_214025_inLine +BABEL_OP3_307_83062_20140730_214025_outLine +BABEL_OP3_307_83366_20140529_193250_inLine +BABEL_OP3_307_83366_20140529_193250_outLine +BABEL_OP3_307_83545_20140813_230842_inLine +BABEL_OP3_307_83545_20140813_230842_outLine +BABEL_OP3_307_83775_20140510_215248_inLine +BABEL_OP3_307_83775_20140510_215248_outLine +BABEL_OP3_307_83775_20140510_220305_inLine +BABEL_OP3_307_83775_20140510_220305_outLine +BABEL_OP3_307_83851_20140404_202207_inLine +BABEL_OP3_307_83851_20140404_202207_outLine +BABEL_OP3_307_86748_20140707_202225_inLine +BABEL_OP3_307_86748_20140707_202225_outLine +BABEL_OP3_307_87073_20140327_221923_inLine +BABEL_OP3_307_87073_20140327_221923_outLine +BABEL_OP3_307_87693_20140503_194632_inLine +BABEL_OP3_307_87693_20140503_194632_outLine +BABEL_OP3_307_89045_20140519_191547_inLine +BABEL_OP3_307_89045_20140519_191547_outLine +BABEL_OP3_307_89330_20140821_234229_inLine +BABEL_OP3_307_89330_20140821_234229_outLine +BABEL_OP3_307_89794_20140531_224759_inLine +BABEL_OP3_307_89794_20140531_224759_outLine +BABEL_OP3_307_90440_20140829_001435_inLine +BABEL_OP3_307_90440_20140829_001435_outLine +BABEL_OP3_307_90935_20140508_183907_inLine +BABEL_OP3_307_90935_20140508_183907_outLine +BABEL_OP3_307_91463_20140603_203737_inLine +BABEL_OP3_307_91463_20140603_203737_outLine +BABEL_OP3_307_92060_20140814_230458_inLine +BABEL_OP3_307_92060_20140814_230458_outLine +BABEL_OP3_307_92698_20140510_215147_inLine +BABEL_OP3_307_92698_20140510_215147_outLine +BABEL_OP3_307_94587_20140614_000734_inLine +BABEL_OP3_307_94587_20140614_000734_outLine +BABEL_OP3_307_96205_20140512_195746_inLine +BABEL_OP3_307_96205_20140512_195746_outLine +BABEL_OP3_307_97264_20140705_170053_inLine +BABEL_OP3_307_97264_20140705_170053_outLine +BABEL_OP3_307_98580_20140504_195655_inLine +BABEL_OP3_307_98580_20140504_195655_outLine +BABEL_OP3_307_99487_20140518_212249_inLine +BABEL_OP3_307_99487_20140518_212249_outLine +BABEL_OP3_307_99952_20140822_185201_inLine +BABEL_OP3_307_99952_20140822_185201_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/sub-train.list b/egs/babel/s5d/conf/lists/307-amharic/sub-train.list new file mode 100644 index 00000000000..a21532c03d7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/sub-train.list @@ -0,0 +1,122 @@ +BABEL_OP3_307_14229_20140503_233516_inLine +BABEL_OP3_307_14229_20140503_233516_outLine +BABEL_OP3_307_14725_20140421_212856_inLine +BABEL_OP3_307_14725_20140421_212856_outLine +BABEL_OP3_307_15216_20140628_231525_inLine +BABEL_OP3_307_15216_20140628_231525_outLine +BABEL_OP3_307_15902_20140422_235151_inLine +BABEL_OP3_307_15902_20140422_235151_outLine +BABEL_OP3_307_16475_20140511_014949_inLine +BABEL_OP3_307_16475_20140511_014949_outLine +BABEL_OP3_307_17496_20140530_181532_inLine +BABEL_OP3_307_17496_20140530_181532_outLine +BABEL_OP3_307_22321_20140417_205436_inLine +BABEL_OP3_307_22321_20140417_205436_outLine +BABEL_OP3_307_22612_20140624_171814_inLine +BABEL_OP3_307_22612_20140624_171814_outLine +BABEL_OP3_307_23006_20140506_191811_inLine +BABEL_OP3_307_23006_20140506_191811_outLine +BABEL_OP3_307_25767_20140403_234644_inLine +BABEL_OP3_307_25767_20140403_234644_outLine +BABEL_OP3_307_26602_20140702_235542_inLine +BABEL_OP3_307_26602_20140702_235542_outLine +BABEL_OP3_307_27125_20140414_222204_inLine +BABEL_OP3_307_27125_20140414_222204_outLine +BABEL_OP3_307_28190_20140703_190209_inLine +BABEL_OP3_307_28190_20140703_190209_outLine +BABEL_OP3_307_29076_20140605_214715_inLine +BABEL_OP3_307_29076_20140605_214715_outLine +BABEL_OP3_307_33251_20140603_185012_inLine +BABEL_OP3_307_33251_20140603_185012_outLine +BABEL_OP3_307_34197_20140401_235309_inLine +BABEL_OP3_307_34197_20140401_235309_outLine +BABEL_OP3_307_34336_20140405_010509_inLine +BABEL_OP3_307_34336_20140405_010509_outLine +BABEL_OP3_307_35583_20140706_224724_inLine +BABEL_OP3_307_35583_20140706_224724_outLine +BABEL_OP3_307_38076_20140531_001406_inLine +BABEL_OP3_307_38076_20140531_001406_outLine +BABEL_OP3_307_39059_20140717_183250_inLine +BABEL_OP3_307_39059_20140717_183250_outLine +BABEL_OP3_307_41097_20140531_181736_inLine +BABEL_OP3_307_41097_20140531_181736_outLine +BABEL_OP3_307_41685_20140825_205956_inLine +BABEL_OP3_307_41685_20140825_205956_outLine +BABEL_OP3_307_44446_20140827_003250_inLine +BABEL_OP3_307_44446_20140827_003250_outLine +BABEL_OP3_307_49502_20140415_220754_inLine +BABEL_OP3_307_49502_20140415_220754_outLine +BABEL_OP3_307_51611_20140423_232011_inLine +BABEL_OP3_307_51611_20140423_232011_outLine +BABEL_OP3_307_53842_20140513_184522_inLine +BABEL_OP3_307_53842_20140513_184522_outLine +BABEL_OP3_307_56198_20140501_005036_inLine +BABEL_OP3_307_56198_20140501_005036_outLine +BABEL_OP3_307_57678_20140405_000739_inLine +BABEL_OP3_307_57678_20140405_000739_outLine +BABEL_OP3_307_61971_20140811_182130_inLine +BABEL_OP3_307_61971_20140811_182130_outLine +BABEL_OP3_307_64350_20140403_011744_inLine +BABEL_OP3_307_64350_20140403_011744_outLine +BABEL_OP3_307_64768_20140404_233306_inLine +BABEL_OP3_307_64768_20140404_233306_outLine +BABEL_OP3_307_67552_20140611_194432_inLine +BABEL_OP3_307_67552_20140611_194432_outLine +BABEL_OP3_307_70986_20140825_003434_inLine +BABEL_OP3_307_70986_20140825_003434_outLine +BABEL_OP3_307_71263_20140602_180728_inLine +BABEL_OP3_307_71263_20140602_180728_outLine +BABEL_OP3_307_73446_20140809_165436_inLine +BABEL_OP3_307_73446_20140809_165436_outLine +BABEL_OP3_307_74799_20140602_191429_inLine +BABEL_OP3_307_74799_20140602_191429_outLine +BABEL_OP3_307_77139_20140416_004159_inLine +BABEL_OP3_307_77139_20140416_004159_outLine +BABEL_OP3_307_77803_20140402_001929_inLine +BABEL_OP3_307_77803_20140402_001929_outLine +BABEL_OP3_307_78161_20140828_164656_inLine +BABEL_OP3_307_78161_20140828_164656_outLine +BABEL_OP3_307_78194_20140411_164649_inLine +BABEL_OP3_307_78194_20140411_164649_outLine +BABEL_OP3_307_79167_20140606_224734_inLine +BABEL_OP3_307_79167_20140606_224734_outLine +BABEL_OP3_307_79429_20140826_212728_inLine +BABEL_OP3_307_79429_20140826_212728_outLine +BABEL_OP3_307_80069_20140821_213402_inLine +BABEL_OP3_307_80069_20140821_213402_outLine +BABEL_OP3_307_82140_20140513_191321_inLine +BABEL_OP3_307_82140_20140513_191321_outLine +BABEL_OP3_307_82863_20140511_183302_inLine +BABEL_OP3_307_82863_20140511_183302_outLine +BABEL_OP3_307_82904_20140730_002106_inLine +BABEL_OP3_307_82904_20140730_002106_outLine +BABEL_OP3_307_86472_20140609_222936_inLine +BABEL_OP3_307_86472_20140609_222936_outLine +BABEL_OP3_307_86888_20140530_190736_inLine +BABEL_OP3_307_86888_20140530_190736_outLine +BABEL_OP3_307_87074_20140429_185857_inLine +BABEL_OP3_307_87074_20140429_185857_outLine +BABEL_OP3_307_90417_20140822_223028_inLine +BABEL_OP3_307_90417_20140822_223028_outLine +BABEL_OP3_307_90417_20140822_224049_inLine +BABEL_OP3_307_90417_20140822_224049_outLine +BABEL_OP3_307_90709_20140421_235753_inLine +BABEL_OP3_307_90709_20140421_235753_outLine +BABEL_OP3_307_91189_20140821_210308_inLine +BABEL_OP3_307_91189_20140821_210308_outLine +BABEL_OP3_307_91581_20140623_234855_inLine +BABEL_OP3_307_91581_20140623_234855_outLine +BABEL_OP3_307_91884_20140723_193506_inLine +BABEL_OP3_307_91884_20140723_193506_outLine +BABEL_OP3_307_91888_20140813_180920_inLine +BABEL_OP3_307_91888_20140813_180920_outLine +BABEL_OP3_307_92757_20140809_200327_inLine +BABEL_OP3_307_92757_20140809_200327_outLine +BABEL_OP3_307_93469_20140813_214657_inLine +BABEL_OP3_307_93469_20140813_214657_outLine +BABEL_OP3_307_94465_20140622_180637_inLine +BABEL_OP3_307_94465_20140622_180637_outLine +BABEL_OP3_307_94891_20140830_193021_inLine +BABEL_OP3_307_94891_20140830_193021_outLine +BABEL_OP3_307_97588_20140415_223241_inLine +BABEL_OP3_307_97588_20140415_223241_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/307-amharic/sub-train.untranscribed.list new file mode 100644 index 00000000000..fce3045a1ed --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/sub-train.untranscribed.list @@ -0,0 +1,364 @@ +BABEL_OP3_307_10638_20140902_000559_inLine +BABEL_OP3_307_10638_20140902_000559_outLine +BABEL_OP3_307_10647_20140721_185220_inLine +BABEL_OP3_307_10647_20140721_185220_outLine +BABEL_OP3_307_10938_20140511_203436_inLine +BABEL_OP3_307_10938_20140511_203436_outLine +BABEL_OP3_307_11673_20140403_181549_inLine +BABEL_OP3_307_11673_20140403_181549_outLine +BABEL_OP3_307_11797_20140403_212832_inLine +BABEL_OP3_307_11797_20140403_212832_outLine +BABEL_OP3_307_12767_20140403_010841_inLine +BABEL_OP3_307_12767_20140403_010841_outLine +BABEL_OP3_307_13490_20140511_183719_inLine +BABEL_OP3_307_13490_20140511_183719_outLine +BABEL_OP3_307_13664_20140414_233828_inLine +BABEL_OP3_307_13664_20140414_233828_outLine +BABEL_OP3_307_13709_20140712_220945_inLine +BABEL_OP3_307_13709_20140712_220945_outLine +BABEL_OP3_307_13776_20140824_184628_inLine +BABEL_OP3_307_13776_20140824_184628_outLine +BABEL_OP3_307_14237_20140417_200235_inLine +BABEL_OP3_307_14237_20140417_200235_outLine +BABEL_OP3_307_14814_20140505_232452_inLine +BABEL_OP3_307_14814_20140505_232452_outLine +BABEL_OP3_307_15227_20140821_214005_inLine +BABEL_OP3_307_15227_20140821_214005_outLine +BABEL_OP3_307_15227_20140822_215614_inLine +BABEL_OP3_307_15227_20140822_215614_outLine +BABEL_OP3_307_15535_20140614_181940_inLine +BABEL_OP3_307_15535_20140614_181940_outLine +BABEL_OP3_307_15730_20140520_180833_inLine +BABEL_OP3_307_15730_20140520_180833_outLine +BABEL_OP3_307_16149_20140403_005747_inLine +BABEL_OP3_307_16149_20140403_005747_outLine +BABEL_OP3_307_17520_20140518_010259_inLine +BABEL_OP3_307_17520_20140518_010259_outLine +BABEL_OP3_307_18566_20140730_203138_inLine +BABEL_OP3_307_18566_20140730_203138_outLine +BABEL_OP3_307_18939_20140417_155733_inLine +BABEL_OP3_307_18939_20140417_155733_outLine +BABEL_OP3_307_18939_20140417_160632_inLine +BABEL_OP3_307_18939_20140417_160632_outLine +BABEL_OP3_307_19818_20140529_184253_inLine +BABEL_OP3_307_19818_20140529_184253_outLine +BABEL_OP3_307_20437_20140825_181004_inLine +BABEL_OP3_307_20437_20140825_181004_outLine +BABEL_OP3_307_20916_20140415_014115_inLine +BABEL_OP3_307_20916_20140415_014115_outLine +BABEL_OP3_307_20972_20140821_181210_inLine +BABEL_OP3_307_20972_20140821_181210_outLine +BABEL_OP3_307_21327_20140624_183416_inLine +BABEL_OP3_307_21327_20140624_183416_outLine +BABEL_OP3_307_21435_20140715_021926_inLine +BABEL_OP3_307_21435_20140715_021926_outLine +BABEL_OP3_307_23980_20140508_223043_inLine +BABEL_OP3_307_23980_20140508_223043_outLine +BABEL_OP3_307_24010_20140903_194143_inLine +BABEL_OP3_307_24010_20140903_194143_outLine +BABEL_OP3_307_24017_20140630_191336_inLine +BABEL_OP3_307_24017_20140630_191336_outLine +BABEL_OP3_307_24270_20140602_192257_inLine +BABEL_OP3_307_24270_20140602_192257_outLine +BABEL_OP3_307_24470_20140604_230747_inLine +BABEL_OP3_307_24470_20140604_230747_outLine +BABEL_OP3_307_26388_20140504_193621_inLine +BABEL_OP3_307_26388_20140504_193621_outLine +BABEL_OP3_307_29021_20140725_002551_inLine +BABEL_OP3_307_29021_20140725_002551_outLine +BABEL_OP3_307_29072_20140613_182323_inLine +BABEL_OP3_307_29072_20140613_182323_outLine +BABEL_OP3_307_29633_20140722_010644_inLine +BABEL_OP3_307_29633_20140722_010644_outLine +BABEL_OP3_307_30098_20140725_200446_inLine +BABEL_OP3_307_30098_20140725_200446_outLine +BABEL_OP3_307_31346_20140704_220402_inLine +BABEL_OP3_307_31346_20140704_220402_outLine +BABEL_OP3_307_32122_20140510_002050_inLine +BABEL_OP3_307_32122_20140510_002050_outLine +BABEL_OP3_307_32171_20140827_233808_inLine +BABEL_OP3_307_32171_20140827_233808_outLine +BABEL_OP3_307_32301_20140618_175857_inLine +BABEL_OP3_307_32301_20140618_175857_outLine +BABEL_OP3_307_32328_20140701_173938_inLine +BABEL_OP3_307_32328_20140701_173938_outLine +BABEL_OP3_307_32837_20140628_224152_inLine +BABEL_OP3_307_32837_20140628_224152_outLine +BABEL_OP3_307_33175_20140416_211640_inLine +BABEL_OP3_307_33175_20140416_211640_outLine +BABEL_OP3_307_33229_20140717_222336_inLine +BABEL_OP3_307_33229_20140717_222336_outLine +BABEL_OP3_307_33273_20140504_190501_inLine +BABEL_OP3_307_33273_20140504_190501_outLine +BABEL_OP3_307_34679_20140405_000658_inLine +BABEL_OP3_307_34679_20140405_000658_outLine +BABEL_OP3_307_34811_20140517_012722_inLine +BABEL_OP3_307_34811_20140517_012722_outLine +BABEL_OP3_307_35139_20140403_212641_inLine +BABEL_OP3_307_35139_20140403_212641_outLine +BABEL_OP3_307_35181_20140719_185816_inLine +BABEL_OP3_307_35181_20140719_185816_outLine +BABEL_OP3_307_36669_20140512_181519_inLine +BABEL_OP3_307_36669_20140512_181519_outLine +BABEL_OP3_307_37228_20140706_173354_inLine +BABEL_OP3_307_37228_20140706_173354_outLine +BABEL_OP3_307_38588_20140505_183744_inLine +BABEL_OP3_307_38588_20140505_183744_outLine +BABEL_OP3_307_38664_20140508_003821_inLine +BABEL_OP3_307_38664_20140508_003821_outLine +BABEL_OP3_307_41720_20140824_215221_inLine +BABEL_OP3_307_41720_20140824_215221_outLine +BABEL_OP3_307_43286_20140519_004615_inLine +BABEL_OP3_307_43286_20140519_004615_outLine +BABEL_OP3_307_43323_20140824_230200_inLine +BABEL_OP3_307_43323_20140824_230200_outLine +BABEL_OP3_307_43784_20140430_225016_inLine +BABEL_OP3_307_43784_20140430_225016_outLine +BABEL_OP3_307_43794_20140902_183511_inLine +BABEL_OP3_307_43794_20140902_183511_outLine +BABEL_OP3_307_43920_20140622_222232_inLine +BABEL_OP3_307_43920_20140622_222232_outLine +BABEL_OP3_307_44477_20140611_180941_inLine +BABEL_OP3_307_44477_20140611_180941_outLine +BABEL_OP3_307_45771_20140824_012354_inLine +BABEL_OP3_307_45771_20140824_012354_outLine +BABEL_OP3_307_46041_20140705_175737_inLine +BABEL_OP3_307_46041_20140705_175737_outLine +BABEL_OP3_307_46310_20140417_192000_inLine +BABEL_OP3_307_46310_20140417_192000_outLine +BABEL_OP3_307_46589_20140606_191357_inLine +BABEL_OP3_307_46589_20140606_191357_outLine +BABEL_OP3_307_46681_20140403_002233_inLine +BABEL_OP3_307_46681_20140403_002233_outLine +BABEL_OP3_307_46770_20140706_002306_inLine +BABEL_OP3_307_46770_20140706_002306_outLine +BABEL_OP3_307_46976_20140516_234604_inLine +BABEL_OP3_307_46976_20140516_234604_outLine +BABEL_OP3_307_47451_20140624_234108_inLine +BABEL_OP3_307_47451_20140624_234108_outLine +BABEL_OP3_307_48243_20140423_214726_inLine +BABEL_OP3_307_48243_20140423_214726_outLine +BABEL_OP3_307_49027_20140811_191512_inLine +BABEL_OP3_307_49027_20140811_191512_outLine +BABEL_OP3_307_49287_20140527_215142_inLine +BABEL_OP3_307_49287_20140527_215142_outLine +BABEL_OP3_307_49768_20140505_000629_inLine +BABEL_OP3_307_49768_20140505_000629_outLine +BABEL_OP3_307_49907_20140429_214231_inLine +BABEL_OP3_307_49907_20140429_214231_outLine +BABEL_OP3_307_50427_20140519_180652_inLine +BABEL_OP3_307_50427_20140519_180652_outLine +BABEL_OP3_307_50940_20140902_173543_inLine +BABEL_OP3_307_50940_20140902_173543_outLine +BABEL_OP3_307_51185_20140901_232033_inLine +BABEL_OP3_307_51185_20140901_232033_outLine +BABEL_OP3_307_51484_20140703_181343_inLine +BABEL_OP3_307_51484_20140703_181343_outLine +BABEL_OP3_307_51968_20140503_185322_inLine +BABEL_OP3_307_51968_20140503_185322_outLine +BABEL_OP3_307_51968_20140503_185916_inLine +BABEL_OP3_307_51968_20140503_185916_outLine +BABEL_OP3_307_52301_20140423_210352_inLine +BABEL_OP3_307_52301_20140423_210352_outLine +BABEL_OP3_307_52381_20140705_233901_inLine +BABEL_OP3_307_52381_20140705_233901_outLine +BABEL_OP3_307_52404_20140607_181619_inLine +BABEL_OP3_307_52404_20140607_181619_outLine +BABEL_OP3_307_52422_20140707_220639_inLine +BABEL_OP3_307_52422_20140707_220639_outLine +BABEL_OP3_307_54104_20140503_183514_inLine +BABEL_OP3_307_54104_20140503_183514_outLine +BABEL_OP3_307_54477_20140705_174757_inLine +BABEL_OP3_307_54477_20140705_174757_outLine +BABEL_OP3_307_54827_20140814_180107_inLine +BABEL_OP3_307_54827_20140814_180107_outLine +BABEL_OP3_307_54841_20140713_170956_inLine +BABEL_OP3_307_54841_20140713_170956_outLine +BABEL_OP3_307_55902_20140829_192235_inLine +BABEL_OP3_307_55902_20140829_192235_outLine +BABEL_OP3_307_56023_20140704_191158_inLine +BABEL_OP3_307_56023_20140704_191158_outLine +BABEL_OP3_307_57464_20140728_215432_inLine +BABEL_OP3_307_57464_20140728_215432_outLine +BABEL_OP3_307_58103_20140511_191956_inLine +BABEL_OP3_307_58103_20140511_191956_outLine +BABEL_OP3_307_58145_20140605_175238_inLine +BABEL_OP3_307_58145_20140605_175238_outLine +BABEL_OP3_307_58313_20140605_235938_inLine +BABEL_OP3_307_58313_20140605_235938_outLine +BABEL_OP3_307_58585_20140717_221803_inLine +BABEL_OP3_307_58585_20140717_221803_outLine +BABEL_OP3_307_58734_20140422_182501_inLine +BABEL_OP3_307_58734_20140422_182501_outLine +BABEL_OP3_307_59028_20140820_184151_inLine +BABEL_OP3_307_59028_20140820_184151_outLine +BABEL_OP3_307_59091_20140706_233018_inLine +BABEL_OP3_307_59091_20140706_233018_outLine +BABEL_OP3_307_59307_20140730_225719_inLine +BABEL_OP3_307_59307_20140730_225719_outLine +BABEL_OP3_307_59635_20140705_193327_inLine +BABEL_OP3_307_59635_20140705_193327_outLine +BABEL_OP3_307_60026_20140416_210913_inLine +BABEL_OP3_307_60026_20140416_210913_outLine +BABEL_OP3_307_60474_20140503_215918_inLine +BABEL_OP3_307_60474_20140503_215918_outLine +BABEL_OP3_307_61167_20140511_204037_inLine +BABEL_OP3_307_61167_20140511_204037_outLine +BABEL_OP3_307_61731_20140407_191634_inLine +BABEL_OP3_307_61731_20140407_191634_outLine +BABEL_OP3_307_62158_20140907_190726_inLine +BABEL_OP3_307_62158_20140907_190726_outLine +BABEL_OP3_307_64065_20140502_190738_inLine +BABEL_OP3_307_64065_20140502_190738_outLine +BABEL_OP3_307_65064_20140604_223702_inLine +BABEL_OP3_307_65064_20140604_223702_outLine +BABEL_OP3_307_65367_20140706_182846_inLine +BABEL_OP3_307_65367_20140706_182846_outLine +BABEL_OP3_307_66001_20140518_232707_inLine +BABEL_OP3_307_66001_20140518_232707_outLine +BABEL_OP3_307_66305_20140807_184053_inLine +BABEL_OP3_307_66305_20140807_184053_outLine +BABEL_OP3_307_66822_20140504_164117_inLine +BABEL_OP3_307_66822_20140504_164117_outLine +BABEL_OP3_307_67283_20140421_213932_inLine +BABEL_OP3_307_67283_20140421_213932_outLine +BABEL_OP3_307_67659_20140503_214825_inLine +BABEL_OP3_307_67659_20140503_214825_outLine +BABEL_OP3_307_68748_20140609_212915_inLine +BABEL_OP3_307_68748_20140609_212915_outLine +BABEL_OP3_307_69096_20140813_192001_inLine +BABEL_OP3_307_69096_20140813_192001_outLine +BABEL_OP3_307_69992_20140502_183707_inLine +BABEL_OP3_307_69992_20140502_183707_outLine +BABEL_OP3_307_70452_20140504_180340_inLine +BABEL_OP3_307_70452_20140504_180340_outLine +BABEL_OP3_307_71189_20140715_012540_inLine +BABEL_OP3_307_71189_20140715_012540_outLine +BABEL_OP3_307_71404_20140423_203052_inLine +BABEL_OP3_307_71404_20140423_203052_outLine +BABEL_OP3_307_72587_20140529_225152_inLine +BABEL_OP3_307_72587_20140529_225152_outLine +BABEL_OP3_307_72952_20140819_214300_inLine +BABEL_OP3_307_72952_20140819_214300_outLine +BABEL_OP3_307_73005_20140815_000302_inLine +BABEL_OP3_307_73005_20140815_000302_outLine +BABEL_OP3_307_73258_20140508_180508_inLine +BABEL_OP3_307_73258_20140508_180508_outLine +BABEL_OP3_307_73299_20140822_002656_inLine +BABEL_OP3_307_73299_20140822_002656_outLine +BABEL_OP3_307_73511_20140614_171020_inLine +BABEL_OP3_307_73511_20140614_171020_outLine +BABEL_OP3_307_74667_20140508_225904_inLine +BABEL_OP3_307_74667_20140508_225904_outLine +BABEL_OP3_307_75365_20140821_220730_inLine +BABEL_OP3_307_75365_20140821_220730_outLine +BABEL_OP3_307_75993_20140404_202655_inLine +BABEL_OP3_307_75993_20140404_202655_outLine +BABEL_OP3_307_76238_20140623_222754_inLine +BABEL_OP3_307_76238_20140623_222754_outLine +BABEL_OP3_307_76499_20140512_232123_inLine +BABEL_OP3_307_76499_20140512_232123_outLine +BABEL_OP3_307_76902_20140829_203049_inLine +BABEL_OP3_307_76902_20140829_203049_outLine +BABEL_OP3_307_77427_20140508_024629_inLine +BABEL_OP3_307_77427_20140508_024629_outLine +BABEL_OP3_307_77832_20140903_183557_inLine +BABEL_OP3_307_77832_20140903_183557_outLine +BABEL_OP3_307_78943_20140505_000428_inLine +BABEL_OP3_307_78943_20140505_000428_outLine +BABEL_OP3_307_79451_20140417_185927_inLine +BABEL_OP3_307_79451_20140417_185927_outLine +BABEL_OP3_307_79660_20140820_174118_inLine +BABEL_OP3_307_79660_20140820_174118_outLine +BABEL_OP3_307_80136_20140706_191530_inLine +BABEL_OP3_307_80136_20140706_191530_outLine +BABEL_OP3_307_80306_20140510_220902_inLine +BABEL_OP3_307_80306_20140510_220902_outLine +BABEL_OP3_307_81213_20140501_002133_inLine +BABEL_OP3_307_81213_20140501_002133_outLine +BABEL_OP3_307_81287_20140616_182444_inLine +BABEL_OP3_307_81287_20140616_182444_outLine +BABEL_OP3_307_81424_20140614_215540_inLine +BABEL_OP3_307_81424_20140614_215540_outLine +BABEL_OP3_307_81435_20140529_235732_inLine +BABEL_OP3_307_81435_20140529_235732_outLine +BABEL_OP3_307_81671_20140704_213446_inLine +BABEL_OP3_307_81671_20140704_213446_outLine +BABEL_OP3_307_82496_20140429_221502_inLine +BABEL_OP3_307_82496_20140429_221502_outLine +BABEL_OP3_307_82626_20140825_181202_inLine +BABEL_OP3_307_82626_20140825_181202_outLine +BABEL_OP3_307_82935_20140702_173347_inLine +BABEL_OP3_307_82935_20140702_173347_outLine +BABEL_OP3_307_86191_20140505_200151_inLine +BABEL_OP3_307_86191_20140505_200151_outLine +BABEL_OP3_307_86433_20140601_173214_inLine +BABEL_OP3_307_86433_20140601_173214_outLine +BABEL_OP3_307_86713_20140704_201850_inLine +BABEL_OP3_307_86713_20140704_201850_outLine +BABEL_OP3_307_86715_20140820_191201_inLine +BABEL_OP3_307_86715_20140820_191201_outLine +BABEL_OP3_307_86722_20140404_001449_inLine +BABEL_OP3_307_86722_20140404_001449_outLine +BABEL_OP3_307_88756_20140908_011014_inLine +BABEL_OP3_307_88756_20140908_011014_outLine +BABEL_OP3_307_88776_20140417_180154_inLine +BABEL_OP3_307_88776_20140417_180154_outLine +BABEL_OP3_307_88783_20140623_173406_inLine +BABEL_OP3_307_88783_20140623_173406_outLine +BABEL_OP3_307_89203_20140705_004511_inLine +BABEL_OP3_307_89203_20140705_004511_outLine +BABEL_OP3_307_89358_20140513_014405_inLine +BABEL_OP3_307_89358_20140513_014405_outLine +BABEL_OP3_307_89575_20140705_220326_inLine +BABEL_OP3_307_89575_20140705_220326_outLine +BABEL_OP3_307_89877_20140602_225948_inLine +BABEL_OP3_307_89877_20140602_225948_outLine +BABEL_OP3_307_90572_20140723_230358_inLine +BABEL_OP3_307_90572_20140723_230358_outLine +BABEL_OP3_307_90739_20140503_223700_inLine +BABEL_OP3_307_90739_20140503_223700_outLine +BABEL_OP3_307_91944_20140430_182005_inLine +BABEL_OP3_307_91944_20140430_182005_outLine +BABEL_OP3_307_92605_20140902_013736_inLine +BABEL_OP3_307_92605_20140902_013736_outLine +BABEL_OP3_307_92942_20140603_223928_inLine +BABEL_OP3_307_92942_20140603_223928_outLine +BABEL_OP3_307_93490_20140704_173442_inLine +BABEL_OP3_307_93490_20140704_173442_outLine +BABEL_OP3_307_93604_20140814_210305_inLine +BABEL_OP3_307_93604_20140814_210305_outLine +BABEL_OP3_307_93858_20140822_215929_inLine +BABEL_OP3_307_93858_20140822_215929_outLine +BABEL_OP3_307_94025_20140606_214625_inLine +BABEL_OP3_307_94025_20140606_214625_outLine +BABEL_OP3_307_94253_20140423_183534_inLine +BABEL_OP3_307_94253_20140423_183534_outLine +BABEL_OP3_307_94316_20140814_001643_inLine +BABEL_OP3_307_94316_20140814_001643_outLine +BABEL_OP3_307_94333_20140417_212859_inLine +BABEL_OP3_307_94333_20140417_212859_outLine +BABEL_OP3_307_94409_20140506_174815_inLine +BABEL_OP3_307_94409_20140506_174815_outLine +BABEL_OP3_307_94442_20140725_195152_inLine +BABEL_OP3_307_94442_20140725_195152_outLine +BABEL_OP3_307_94969_20140903_171944_inLine +BABEL_OP3_307_94969_20140903_171944_outLine +BABEL_OP3_307_95077_20140622_221523_inLine +BABEL_OP3_307_95077_20140622_221523_outLine +BABEL_OP3_307_95670_20140417_201744_inLine +BABEL_OP3_307_95670_20140417_201744_outLine +BABEL_OP3_307_96690_20140507_212636_inLine +BABEL_OP3_307_96690_20140507_212636_outLine +BABEL_OP3_307_96820_20140517_194553_inLine +BABEL_OP3_307_96820_20140517_194553_outLine +BABEL_OP3_307_96910_20140504_223516_inLine +BABEL_OP3_307_96910_20140504_223516_outLine +BABEL_OP3_307_98192_20140823_224529_inLine +BABEL_OP3_307_98192_20140823_224529_outLine +BABEL_OP3_307_98365_20140606_004323_inLine +BABEL_OP3_307_98365_20140606_004323_outLine +BABEL_OP3_307_99202_20140519_213506_inLine +BABEL_OP3_307_99202_20140519_213506_outLine +BABEL_OP3_307_99594_20140508_192558_inLine +BABEL_OP3_307_99594_20140508_192558_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/training.list b/egs/babel/s5d/conf/lists/307-amharic/training.list new file mode 100644 index 00000000000..e58883c0fc7 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/training.list @@ -0,0 +1,486 @@ +BABEL_OP3_307_10638_20140902_000559_inLine +BABEL_OP3_307_10638_20140902_000559_outLine +BABEL_OP3_307_10647_20140721_185220_inLine +BABEL_OP3_307_10647_20140721_185220_outLine +BABEL_OP3_307_10938_20140511_203436_inLine +BABEL_OP3_307_10938_20140511_203436_outLine +BABEL_OP3_307_11673_20140403_181549_inLine +BABEL_OP3_307_11673_20140403_181549_outLine +BABEL_OP3_307_11797_20140403_212832_inLine +BABEL_OP3_307_11797_20140403_212832_outLine +BABEL_OP3_307_12767_20140403_010841_inLine +BABEL_OP3_307_12767_20140403_010841_outLine +BABEL_OP3_307_13490_20140511_183719_inLine +BABEL_OP3_307_13490_20140511_183719_outLine +BABEL_OP3_307_13664_20140414_233828_inLine +BABEL_OP3_307_13664_20140414_233828_outLine +BABEL_OP3_307_13709_20140712_220945_inLine +BABEL_OP3_307_13709_20140712_220945_outLine +BABEL_OP3_307_13776_20140824_184628_inLine +BABEL_OP3_307_13776_20140824_184628_outLine +BABEL_OP3_307_14229_20140503_233516_inLine +BABEL_OP3_307_14229_20140503_233516_outLine +BABEL_OP3_307_14237_20140417_200235_inLine +BABEL_OP3_307_14237_20140417_200235_outLine +BABEL_OP3_307_14725_20140421_212856_inLine +BABEL_OP3_307_14725_20140421_212856_outLine +BABEL_OP3_307_14814_20140505_232452_inLine +BABEL_OP3_307_14814_20140505_232452_outLine +BABEL_OP3_307_15216_20140628_231525_inLine +BABEL_OP3_307_15216_20140628_231525_outLine +BABEL_OP3_307_15227_20140821_214005_inLine +BABEL_OP3_307_15227_20140821_214005_outLine +BABEL_OP3_307_15227_20140822_215614_inLine +BABEL_OP3_307_15227_20140822_215614_outLine +BABEL_OP3_307_15535_20140614_181940_inLine +BABEL_OP3_307_15535_20140614_181940_outLine +BABEL_OP3_307_15730_20140520_180833_inLine +BABEL_OP3_307_15730_20140520_180833_outLine +BABEL_OP3_307_15902_20140422_235151_inLine +BABEL_OP3_307_15902_20140422_235151_outLine +BABEL_OP3_307_16149_20140403_005747_inLine +BABEL_OP3_307_16149_20140403_005747_outLine +BABEL_OP3_307_16475_20140511_014949_inLine +BABEL_OP3_307_16475_20140511_014949_outLine +BABEL_OP3_307_17496_20140530_181532_inLine +BABEL_OP3_307_17496_20140530_181532_outLine +BABEL_OP3_307_17520_20140518_010259_inLine +BABEL_OP3_307_17520_20140518_010259_outLine +BABEL_OP3_307_18566_20140730_203138_inLine +BABEL_OP3_307_18566_20140730_203138_outLine +BABEL_OP3_307_18939_20140417_155733_inLine +BABEL_OP3_307_18939_20140417_155733_outLine +BABEL_OP3_307_18939_20140417_160632_inLine +BABEL_OP3_307_18939_20140417_160632_outLine +BABEL_OP3_307_19818_20140529_184253_inLine +BABEL_OP3_307_19818_20140529_184253_outLine +BABEL_OP3_307_20437_20140825_181004_inLine +BABEL_OP3_307_20437_20140825_181004_outLine +BABEL_OP3_307_20916_20140415_014115_inLine +BABEL_OP3_307_20916_20140415_014115_outLine +BABEL_OP3_307_20972_20140821_181210_inLine +BABEL_OP3_307_20972_20140821_181210_outLine +BABEL_OP3_307_21327_20140624_183416_inLine +BABEL_OP3_307_21327_20140624_183416_outLine +BABEL_OP3_307_21435_20140715_021926_inLine +BABEL_OP3_307_21435_20140715_021926_outLine +BABEL_OP3_307_22321_20140417_205436_inLine +BABEL_OP3_307_22321_20140417_205436_outLine +BABEL_OP3_307_22612_20140624_171814_inLine +BABEL_OP3_307_22612_20140624_171814_outLine +BABEL_OP3_307_23006_20140506_191811_inLine +BABEL_OP3_307_23006_20140506_191811_outLine +BABEL_OP3_307_23980_20140508_223043_inLine +BABEL_OP3_307_23980_20140508_223043_outLine +BABEL_OP3_307_24010_20140903_194143_inLine +BABEL_OP3_307_24010_20140903_194143_outLine +BABEL_OP3_307_24017_20140630_191336_inLine +BABEL_OP3_307_24017_20140630_191336_outLine +BABEL_OP3_307_24270_20140602_192257_inLine +BABEL_OP3_307_24270_20140602_192257_outLine +BABEL_OP3_307_24470_20140604_230747_inLine +BABEL_OP3_307_24470_20140604_230747_outLine +BABEL_OP3_307_25767_20140403_234644_inLine +BABEL_OP3_307_25767_20140403_234644_outLine +BABEL_OP3_307_26388_20140504_193621_inLine +BABEL_OP3_307_26388_20140504_193621_outLine +BABEL_OP3_307_26602_20140702_235542_inLine +BABEL_OP3_307_26602_20140702_235542_outLine +BABEL_OP3_307_27125_20140414_222204_inLine +BABEL_OP3_307_27125_20140414_222204_outLine +BABEL_OP3_307_28190_20140703_190209_inLine +BABEL_OP3_307_28190_20140703_190209_outLine +BABEL_OP3_307_29021_20140725_002551_inLine +BABEL_OP3_307_29021_20140725_002551_outLine +BABEL_OP3_307_29072_20140613_182323_inLine +BABEL_OP3_307_29072_20140613_182323_outLine +BABEL_OP3_307_29076_20140605_214715_inLine +BABEL_OP3_307_29076_20140605_214715_outLine +BABEL_OP3_307_29633_20140722_010644_inLine +BABEL_OP3_307_29633_20140722_010644_outLine +BABEL_OP3_307_30098_20140725_200446_inLine +BABEL_OP3_307_30098_20140725_200446_outLine +BABEL_OP3_307_31346_20140704_220402_inLine +BABEL_OP3_307_31346_20140704_220402_outLine +BABEL_OP3_307_32122_20140510_002050_inLine +BABEL_OP3_307_32122_20140510_002050_outLine +BABEL_OP3_307_32171_20140827_233808_inLine +BABEL_OP3_307_32171_20140827_233808_outLine +BABEL_OP3_307_32301_20140618_175857_inLine +BABEL_OP3_307_32301_20140618_175857_outLine +BABEL_OP3_307_32328_20140701_173938_inLine +BABEL_OP3_307_32328_20140701_173938_outLine +BABEL_OP3_307_32837_20140628_224152_inLine +BABEL_OP3_307_32837_20140628_224152_outLine +BABEL_OP3_307_33175_20140416_211640_inLine +BABEL_OP3_307_33175_20140416_211640_outLine +BABEL_OP3_307_33229_20140717_222336_inLine +BABEL_OP3_307_33229_20140717_222336_outLine +BABEL_OP3_307_33251_20140603_185012_inLine +BABEL_OP3_307_33251_20140603_185012_outLine +BABEL_OP3_307_33273_20140504_190501_inLine +BABEL_OP3_307_33273_20140504_190501_outLine +BABEL_OP3_307_34197_20140401_235309_inLine +BABEL_OP3_307_34197_20140401_235309_outLine +BABEL_OP3_307_34336_20140405_010509_inLine +BABEL_OP3_307_34336_20140405_010509_outLine +BABEL_OP3_307_34679_20140405_000658_inLine +BABEL_OP3_307_34679_20140405_000658_outLine +BABEL_OP3_307_34811_20140517_012722_inLine +BABEL_OP3_307_34811_20140517_012722_outLine +BABEL_OP3_307_35139_20140403_212641_inLine +BABEL_OP3_307_35139_20140403_212641_outLine +BABEL_OP3_307_35181_20140719_185816_inLine +BABEL_OP3_307_35181_20140719_185816_outLine +BABEL_OP3_307_35583_20140706_224724_inLine +BABEL_OP3_307_35583_20140706_224724_outLine +BABEL_OP3_307_36669_20140512_181519_inLine +BABEL_OP3_307_36669_20140512_181519_outLine +BABEL_OP3_307_37228_20140706_173354_inLine +BABEL_OP3_307_37228_20140706_173354_outLine +BABEL_OP3_307_38076_20140531_001406_inLine +BABEL_OP3_307_38076_20140531_001406_outLine +BABEL_OP3_307_38588_20140505_183744_inLine +BABEL_OP3_307_38588_20140505_183744_outLine +BABEL_OP3_307_38664_20140508_003821_inLine +BABEL_OP3_307_38664_20140508_003821_outLine +BABEL_OP3_307_39059_20140717_183250_inLine +BABEL_OP3_307_39059_20140717_183250_outLine +BABEL_OP3_307_41097_20140531_181736_inLine +BABEL_OP3_307_41097_20140531_181736_outLine +BABEL_OP3_307_41685_20140825_205956_inLine +BABEL_OP3_307_41685_20140825_205956_outLine +BABEL_OP3_307_41720_20140824_215221_inLine +BABEL_OP3_307_41720_20140824_215221_outLine +BABEL_OP3_307_43286_20140519_004615_inLine +BABEL_OP3_307_43286_20140519_004615_outLine +BABEL_OP3_307_43323_20140824_230200_inLine +BABEL_OP3_307_43323_20140824_230200_outLine +BABEL_OP3_307_43784_20140430_225016_inLine +BABEL_OP3_307_43784_20140430_225016_outLine +BABEL_OP3_307_43794_20140902_183511_inLine +BABEL_OP3_307_43794_20140902_183511_outLine +BABEL_OP3_307_43920_20140622_222232_inLine +BABEL_OP3_307_43920_20140622_222232_outLine +BABEL_OP3_307_44446_20140827_003250_inLine +BABEL_OP3_307_44446_20140827_003250_outLine +BABEL_OP3_307_44477_20140611_180941_inLine +BABEL_OP3_307_44477_20140611_180941_outLine +BABEL_OP3_307_45771_20140824_012354_inLine +BABEL_OP3_307_45771_20140824_012354_outLine +BABEL_OP3_307_46041_20140705_175737_inLine +BABEL_OP3_307_46041_20140705_175737_outLine +BABEL_OP3_307_46310_20140417_192000_inLine +BABEL_OP3_307_46310_20140417_192000_outLine +BABEL_OP3_307_46589_20140606_191357_inLine +BABEL_OP3_307_46589_20140606_191357_outLine +BABEL_OP3_307_46681_20140403_002233_inLine +BABEL_OP3_307_46681_20140403_002233_outLine +BABEL_OP3_307_46770_20140706_002306_inLine +BABEL_OP3_307_46770_20140706_002306_outLine +BABEL_OP3_307_46976_20140516_234604_inLine +BABEL_OP3_307_46976_20140516_234604_outLine +BABEL_OP3_307_47451_20140624_234108_inLine +BABEL_OP3_307_47451_20140624_234108_outLine +BABEL_OP3_307_48243_20140423_214726_inLine +BABEL_OP3_307_48243_20140423_214726_outLine +BABEL_OP3_307_49027_20140811_191512_inLine +BABEL_OP3_307_49027_20140811_191512_outLine +BABEL_OP3_307_49287_20140527_215142_inLine +BABEL_OP3_307_49287_20140527_215142_outLine +BABEL_OP3_307_49502_20140415_220754_inLine +BABEL_OP3_307_49502_20140415_220754_outLine +BABEL_OP3_307_49768_20140505_000629_inLine +BABEL_OP3_307_49768_20140505_000629_outLine +BABEL_OP3_307_49907_20140429_214231_inLine +BABEL_OP3_307_49907_20140429_214231_outLine +BABEL_OP3_307_50427_20140519_180652_inLine +BABEL_OP3_307_50427_20140519_180652_outLine +BABEL_OP3_307_50940_20140902_173543_inLine +BABEL_OP3_307_50940_20140902_173543_outLine +BABEL_OP3_307_51185_20140901_232033_inLine +BABEL_OP3_307_51185_20140901_232033_outLine +BABEL_OP3_307_51484_20140703_181343_inLine +BABEL_OP3_307_51484_20140703_181343_outLine +BABEL_OP3_307_51611_20140423_232011_inLine +BABEL_OP3_307_51611_20140423_232011_outLine +BABEL_OP3_307_51968_20140503_185322_inLine +BABEL_OP3_307_51968_20140503_185322_outLine +BABEL_OP3_307_51968_20140503_185916_inLine +BABEL_OP3_307_51968_20140503_185916_outLine +BABEL_OP3_307_52301_20140423_210352_inLine +BABEL_OP3_307_52301_20140423_210352_outLine +BABEL_OP3_307_52381_20140705_233901_inLine +BABEL_OP3_307_52381_20140705_233901_outLine +BABEL_OP3_307_52404_20140607_181619_inLine +BABEL_OP3_307_52404_20140607_181619_outLine +BABEL_OP3_307_52422_20140707_220639_inLine +BABEL_OP3_307_52422_20140707_220639_outLine +BABEL_OP3_307_53842_20140513_184522_inLine +BABEL_OP3_307_53842_20140513_184522_outLine +BABEL_OP3_307_54104_20140503_183514_inLine +BABEL_OP3_307_54104_20140503_183514_outLine +BABEL_OP3_307_54477_20140705_174757_inLine +BABEL_OP3_307_54477_20140705_174757_outLine +BABEL_OP3_307_54827_20140814_180107_inLine +BABEL_OP3_307_54827_20140814_180107_outLine +BABEL_OP3_307_54841_20140713_170956_inLine +BABEL_OP3_307_54841_20140713_170956_outLine +BABEL_OP3_307_55902_20140829_192235_inLine +BABEL_OP3_307_55902_20140829_192235_outLine +BABEL_OP3_307_56023_20140704_191158_inLine +BABEL_OP3_307_56023_20140704_191158_outLine +BABEL_OP3_307_56198_20140501_005036_inLine +BABEL_OP3_307_56198_20140501_005036_outLine +BABEL_OP3_307_57464_20140728_215432_inLine +BABEL_OP3_307_57464_20140728_215432_outLine +BABEL_OP3_307_57678_20140405_000739_inLine +BABEL_OP3_307_57678_20140405_000739_outLine +BABEL_OP3_307_58103_20140511_191956_inLine +BABEL_OP3_307_58103_20140511_191956_outLine +BABEL_OP3_307_58145_20140605_175238_inLine +BABEL_OP3_307_58145_20140605_175238_outLine +BABEL_OP3_307_58313_20140605_235938_inLine +BABEL_OP3_307_58313_20140605_235938_outLine +BABEL_OP3_307_58585_20140717_221803_inLine +BABEL_OP3_307_58585_20140717_221803_outLine +BABEL_OP3_307_58734_20140422_182501_inLine +BABEL_OP3_307_58734_20140422_182501_outLine +BABEL_OP3_307_59028_20140820_184151_inLine +BABEL_OP3_307_59028_20140820_184151_outLine +BABEL_OP3_307_59091_20140706_233018_inLine +BABEL_OP3_307_59091_20140706_233018_outLine +BABEL_OP3_307_59307_20140730_225719_inLine +BABEL_OP3_307_59307_20140730_225719_outLine +BABEL_OP3_307_59635_20140705_193327_inLine +BABEL_OP3_307_59635_20140705_193327_outLine +BABEL_OP3_307_60026_20140416_210913_inLine +BABEL_OP3_307_60026_20140416_210913_outLine +BABEL_OP3_307_60474_20140503_215918_inLine +BABEL_OP3_307_60474_20140503_215918_outLine +BABEL_OP3_307_61167_20140511_204037_inLine +BABEL_OP3_307_61167_20140511_204037_outLine +BABEL_OP3_307_61731_20140407_191634_inLine +BABEL_OP3_307_61731_20140407_191634_outLine +BABEL_OP3_307_61971_20140811_182130_inLine +BABEL_OP3_307_61971_20140811_182130_outLine +BABEL_OP3_307_62158_20140907_190726_inLine +BABEL_OP3_307_62158_20140907_190726_outLine +BABEL_OP3_307_64065_20140502_190738_inLine +BABEL_OP3_307_64065_20140502_190738_outLine +BABEL_OP3_307_64350_20140403_011744_inLine +BABEL_OP3_307_64350_20140403_011744_outLine +BABEL_OP3_307_64768_20140404_233306_inLine +BABEL_OP3_307_64768_20140404_233306_outLine +BABEL_OP3_307_65064_20140604_223702_inLine +BABEL_OP3_307_65064_20140604_223702_outLine +BABEL_OP3_307_65367_20140706_182846_inLine +BABEL_OP3_307_65367_20140706_182846_outLine +BABEL_OP3_307_66001_20140518_232707_inLine +BABEL_OP3_307_66001_20140518_232707_outLine +BABEL_OP3_307_66305_20140807_184053_inLine +BABEL_OP3_307_66305_20140807_184053_outLine +BABEL_OP3_307_66822_20140504_164117_inLine +BABEL_OP3_307_66822_20140504_164117_outLine +BABEL_OP3_307_67283_20140421_213932_inLine +BABEL_OP3_307_67283_20140421_213932_outLine +BABEL_OP3_307_67552_20140611_194432_inLine +BABEL_OP3_307_67552_20140611_194432_outLine +BABEL_OP3_307_67659_20140503_214825_inLine +BABEL_OP3_307_67659_20140503_214825_outLine +BABEL_OP3_307_68748_20140609_212915_inLine +BABEL_OP3_307_68748_20140609_212915_outLine +BABEL_OP3_307_69096_20140813_192001_inLine +BABEL_OP3_307_69096_20140813_192001_outLine +BABEL_OP3_307_69992_20140502_183707_inLine +BABEL_OP3_307_69992_20140502_183707_outLine +BABEL_OP3_307_70452_20140504_180340_inLine +BABEL_OP3_307_70452_20140504_180340_outLine +BABEL_OP3_307_70986_20140825_003434_inLine +BABEL_OP3_307_70986_20140825_003434_outLine +BABEL_OP3_307_71189_20140715_012540_inLine +BABEL_OP3_307_71189_20140715_012540_outLine +BABEL_OP3_307_71263_20140602_180728_inLine +BABEL_OP3_307_71263_20140602_180728_outLine +BABEL_OP3_307_71404_20140423_203052_inLine +BABEL_OP3_307_71404_20140423_203052_outLine +BABEL_OP3_307_72587_20140529_225152_inLine +BABEL_OP3_307_72587_20140529_225152_outLine +BABEL_OP3_307_72952_20140819_214300_inLine +BABEL_OP3_307_72952_20140819_214300_outLine +BABEL_OP3_307_73005_20140815_000302_inLine +BABEL_OP3_307_73005_20140815_000302_outLine +BABEL_OP3_307_73258_20140508_180508_inLine +BABEL_OP3_307_73258_20140508_180508_outLine +BABEL_OP3_307_73299_20140822_002656_inLine +BABEL_OP3_307_73299_20140822_002656_outLine +BABEL_OP3_307_73446_20140809_165436_inLine +BABEL_OP3_307_73446_20140809_165436_outLine +BABEL_OP3_307_73511_20140614_171020_inLine +BABEL_OP3_307_73511_20140614_171020_outLine +BABEL_OP3_307_74667_20140508_225904_inLine +BABEL_OP3_307_74667_20140508_225904_outLine +BABEL_OP3_307_74799_20140602_191429_inLine +BABEL_OP3_307_74799_20140602_191429_outLine +BABEL_OP3_307_75365_20140821_220730_inLine +BABEL_OP3_307_75365_20140821_220730_outLine +BABEL_OP3_307_75993_20140404_202655_inLine +BABEL_OP3_307_75993_20140404_202655_outLine +BABEL_OP3_307_76238_20140623_222754_inLine +BABEL_OP3_307_76238_20140623_222754_outLine +BABEL_OP3_307_76499_20140512_232123_inLine +BABEL_OP3_307_76499_20140512_232123_outLine +BABEL_OP3_307_76902_20140829_203049_inLine +BABEL_OP3_307_76902_20140829_203049_outLine +BABEL_OP3_307_77139_20140416_004159_inLine +BABEL_OP3_307_77139_20140416_004159_outLine +BABEL_OP3_307_77427_20140508_024629_inLine +BABEL_OP3_307_77427_20140508_024629_outLine +BABEL_OP3_307_77803_20140402_001929_inLine +BABEL_OP3_307_77803_20140402_001929_outLine +BABEL_OP3_307_77832_20140903_183557_inLine +BABEL_OP3_307_77832_20140903_183557_outLine +BABEL_OP3_307_78161_20140828_164656_inLine +BABEL_OP3_307_78161_20140828_164656_outLine +BABEL_OP3_307_78194_20140411_164649_inLine +BABEL_OP3_307_78194_20140411_164649_outLine +BABEL_OP3_307_78943_20140505_000428_inLine +BABEL_OP3_307_78943_20140505_000428_outLine +BABEL_OP3_307_79167_20140606_224734_inLine +BABEL_OP3_307_79167_20140606_224734_outLine +BABEL_OP3_307_79429_20140826_212728_inLine +BABEL_OP3_307_79429_20140826_212728_outLine +BABEL_OP3_307_79451_20140417_185927_inLine +BABEL_OP3_307_79451_20140417_185927_outLine +BABEL_OP3_307_79660_20140820_174118_inLine +BABEL_OP3_307_79660_20140820_174118_outLine +BABEL_OP3_307_80069_20140821_213402_inLine +BABEL_OP3_307_80069_20140821_213402_outLine +BABEL_OP3_307_80136_20140706_191530_inLine +BABEL_OP3_307_80136_20140706_191530_outLine +BABEL_OP3_307_80306_20140510_220902_inLine +BABEL_OP3_307_80306_20140510_220902_outLine +BABEL_OP3_307_81213_20140501_002133_inLine +BABEL_OP3_307_81213_20140501_002133_outLine +BABEL_OP3_307_81287_20140616_182444_inLine +BABEL_OP3_307_81287_20140616_182444_outLine +BABEL_OP3_307_81424_20140614_215540_inLine +BABEL_OP3_307_81424_20140614_215540_outLine +BABEL_OP3_307_81435_20140529_235732_inLine +BABEL_OP3_307_81435_20140529_235732_outLine +BABEL_OP3_307_81671_20140704_213446_inLine +BABEL_OP3_307_81671_20140704_213446_outLine +BABEL_OP3_307_82140_20140513_191321_inLine +BABEL_OP3_307_82140_20140513_191321_outLine +BABEL_OP3_307_82496_20140429_221502_inLine +BABEL_OP3_307_82496_20140429_221502_outLine +BABEL_OP3_307_82626_20140825_181202_inLine +BABEL_OP3_307_82626_20140825_181202_outLine +BABEL_OP3_307_82863_20140511_183302_inLine +BABEL_OP3_307_82863_20140511_183302_outLine +BABEL_OP3_307_82904_20140730_002106_inLine +BABEL_OP3_307_82904_20140730_002106_outLine +BABEL_OP3_307_82935_20140702_173347_inLine +BABEL_OP3_307_82935_20140702_173347_outLine +BABEL_OP3_307_86191_20140505_200151_inLine +BABEL_OP3_307_86191_20140505_200151_outLine +BABEL_OP3_307_86433_20140601_173214_inLine +BABEL_OP3_307_86433_20140601_173214_outLine +BABEL_OP3_307_86472_20140609_222936_inLine +BABEL_OP3_307_86472_20140609_222936_outLine +BABEL_OP3_307_86713_20140704_201850_inLine +BABEL_OP3_307_86713_20140704_201850_outLine +BABEL_OP3_307_86715_20140820_191201_inLine +BABEL_OP3_307_86715_20140820_191201_outLine +BABEL_OP3_307_86722_20140404_001449_inLine +BABEL_OP3_307_86722_20140404_001449_outLine +BABEL_OP3_307_86888_20140530_190736_inLine +BABEL_OP3_307_86888_20140530_190736_outLine +BABEL_OP3_307_87074_20140429_185857_inLine +BABEL_OP3_307_87074_20140429_185857_outLine +BABEL_OP3_307_88756_20140908_011014_inLine +BABEL_OP3_307_88756_20140908_011014_outLine +BABEL_OP3_307_88776_20140417_180154_inLine +BABEL_OP3_307_88776_20140417_180154_outLine +BABEL_OP3_307_88783_20140623_173406_inLine +BABEL_OP3_307_88783_20140623_173406_outLine +BABEL_OP3_307_89203_20140705_004511_inLine +BABEL_OP3_307_89203_20140705_004511_outLine +BABEL_OP3_307_89358_20140513_014405_inLine +BABEL_OP3_307_89358_20140513_014405_outLine +BABEL_OP3_307_89575_20140705_220326_inLine +BABEL_OP3_307_89575_20140705_220326_outLine +BABEL_OP3_307_89877_20140602_225948_inLine +BABEL_OP3_307_89877_20140602_225948_outLine +BABEL_OP3_307_90417_20140822_223028_inLine +BABEL_OP3_307_90417_20140822_223028_outLine +BABEL_OP3_307_90417_20140822_224049_inLine +BABEL_OP3_307_90417_20140822_224049_outLine +BABEL_OP3_307_90572_20140723_230358_inLine +BABEL_OP3_307_90572_20140723_230358_outLine +BABEL_OP3_307_90709_20140421_235753_inLine +BABEL_OP3_307_90709_20140421_235753_outLine +BABEL_OP3_307_90739_20140503_223700_inLine +BABEL_OP3_307_90739_20140503_223700_outLine +BABEL_OP3_307_91189_20140821_210308_inLine +BABEL_OP3_307_91189_20140821_210308_outLine +BABEL_OP3_307_91581_20140623_234855_inLine +BABEL_OP3_307_91581_20140623_234855_outLine +BABEL_OP3_307_91884_20140723_193506_inLine +BABEL_OP3_307_91884_20140723_193506_outLine +BABEL_OP3_307_91888_20140813_180920_inLine +BABEL_OP3_307_91888_20140813_180920_outLine +BABEL_OP3_307_91944_20140430_182005_inLine +BABEL_OP3_307_91944_20140430_182005_outLine +BABEL_OP3_307_92605_20140902_013736_inLine +BABEL_OP3_307_92605_20140902_013736_outLine +BABEL_OP3_307_92757_20140809_200327_inLine +BABEL_OP3_307_92757_20140809_200327_outLine +BABEL_OP3_307_92942_20140603_223928_inLine +BABEL_OP3_307_92942_20140603_223928_outLine +BABEL_OP3_307_93469_20140813_214657_inLine +BABEL_OP3_307_93469_20140813_214657_outLine +BABEL_OP3_307_93490_20140704_173442_inLine +BABEL_OP3_307_93490_20140704_173442_outLine +BABEL_OP3_307_93604_20140814_210305_inLine +BABEL_OP3_307_93604_20140814_210305_outLine +BABEL_OP3_307_93858_20140822_215929_inLine +BABEL_OP3_307_93858_20140822_215929_outLine +BABEL_OP3_307_94025_20140606_214625_inLine +BABEL_OP3_307_94025_20140606_214625_outLine +BABEL_OP3_307_94253_20140423_183534_inLine +BABEL_OP3_307_94253_20140423_183534_outLine +BABEL_OP3_307_94316_20140814_001643_inLine +BABEL_OP3_307_94316_20140814_001643_outLine +BABEL_OP3_307_94333_20140417_212859_inLine +BABEL_OP3_307_94333_20140417_212859_outLine +BABEL_OP3_307_94409_20140506_174815_inLine +BABEL_OP3_307_94409_20140506_174815_outLine +BABEL_OP3_307_94442_20140725_195152_inLine +BABEL_OP3_307_94442_20140725_195152_outLine +BABEL_OP3_307_94465_20140622_180637_inLine +BABEL_OP3_307_94465_20140622_180637_outLine +BABEL_OP3_307_94891_20140830_193021_inLine +BABEL_OP3_307_94891_20140830_193021_outLine +BABEL_OP3_307_94969_20140903_171944_inLine +BABEL_OP3_307_94969_20140903_171944_outLine +BABEL_OP3_307_95077_20140622_221523_inLine +BABEL_OP3_307_95077_20140622_221523_outLine +BABEL_OP3_307_95670_20140417_201744_inLine +BABEL_OP3_307_95670_20140417_201744_outLine +BABEL_OP3_307_96690_20140507_212636_inLine +BABEL_OP3_307_96690_20140507_212636_outLine +BABEL_OP3_307_96820_20140517_194553_inLine +BABEL_OP3_307_96820_20140517_194553_outLine +BABEL_OP3_307_96910_20140504_223516_inLine +BABEL_OP3_307_96910_20140504_223516_outLine +BABEL_OP3_307_97588_20140415_223241_inLine +BABEL_OP3_307_97588_20140415_223241_outLine +BABEL_OP3_307_98192_20140823_224529_inLine +BABEL_OP3_307_98192_20140823_224529_outLine +BABEL_OP3_307_98365_20140606_004323_inLine +BABEL_OP3_307_98365_20140606_004323_outLine +BABEL_OP3_307_99202_20140519_213506_inLine +BABEL_OP3_307_99202_20140519_213506_outLine +BABEL_OP3_307_99594_20140508_192558_inLine +BABEL_OP3_307_99594_20140508_192558_outLine diff --git a/egs/babel/s5d/conf/lists/307-amharic/untranscribed-training.list b/egs/babel/s5d/conf/lists/307-amharic/untranscribed-training.list new file mode 100644 index 00000000000..2015539e910 --- /dev/null +++ b/egs/babel/s5d/conf/lists/307-amharic/untranscribed-training.list @@ -0,0 +1,568 @@ +BABEL_OP3_307_10019_20140510_215248_inLine +BABEL_OP3_307_10019_20140510_215248_outLine +BABEL_OP3_307_10019_20140510_220549_inLine +BABEL_OP3_307_10019_20140510_220549_outLine +BABEL_OP3_307_10188_20140414_190900_inLine +BABEL_OP3_307_10188_20140414_190900_outLine +BABEL_OP3_307_10974_20140518_232844_inLine +BABEL_OP3_307_10974_20140518_232844_outLine +BABEL_OP3_307_13586_20140517_192301_inLine +BABEL_OP3_307_13586_20140517_192301_outLine +BABEL_OP3_307_14137_20140504_224411_inLine +BABEL_OP3_307_14137_20140504_224411_outLine +BABEL_OP3_307_14141_20140729_225447_inLine +BABEL_OP3_307_14141_20140729_225447_outLine +BABEL_OP3_307_14158_20140609_183923_inLine +BABEL_OP3_307_14158_20140609_183923_outLine +BABEL_OP3_307_14719_20140630_214352_inLine +BABEL_OP3_307_14719_20140630_214352_outLine +BABEL_OP3_307_14719_20140630_215754_inLine +BABEL_OP3_307_14719_20140630_215754_outLine +BABEL_OP3_307_14807_20140603_161507_inLine +BABEL_OP3_307_14807_20140603_161507_outLine +BABEL_OP3_307_14807_20140603_163538_inLine +BABEL_OP3_307_14807_20140603_163538_outLine +BABEL_OP3_307_15163_20140505_213531_inLine +BABEL_OP3_307_15163_20140505_213531_outLine +BABEL_OP3_307_15466_20140829_012731_inLine +BABEL_OP3_307_15466_20140829_012731_outLine +BABEL_OP3_307_15638_20140613_232945_inLine +BABEL_OP3_307_15638_20140613_232945_outLine +BABEL_OP3_307_16886_20140507_225852_inLine +BABEL_OP3_307_16886_20140507_225852_outLine +BABEL_OP3_307_16938_20140518_195229_inLine +BABEL_OP3_307_16938_20140518_195229_outLine +BABEL_OP3_307_17113_20140822_183518_inLine +BABEL_OP3_307_17113_20140822_183518_outLine +BABEL_OP3_307_17127_20140710_180949_inLine +BABEL_OP3_307_17127_20140710_180949_outLine +BABEL_OP3_307_17165_20140510_233034_inLine +BABEL_OP3_307_17165_20140510_233034_outLine +BABEL_OP3_307_17165_20140520_203751_inLine +BABEL_OP3_307_17165_20140520_203751_outLine +BABEL_OP3_307_17567_20140518_203832_inLine +BABEL_OP3_307_17567_20140518_203832_outLine +BABEL_OP3_307_17582_20140822_232433_inLine +BABEL_OP3_307_17582_20140822_232433_outLine +BABEL_OP3_307_17890_20140617_183508_inLine +BABEL_OP3_307_17890_20140617_183508_outLine +BABEL_OP3_307_18863_20140629_183439_inLine +BABEL_OP3_307_18863_20140629_183439_outLine +BABEL_OP3_307_19120_20140730_221602_inLine +BABEL_OP3_307_19120_20140730_221602_outLine +BABEL_OP3_307_19703_20140504_214945_inLine +BABEL_OP3_307_19703_20140504_214945_outLine +BABEL_OP3_307_19749_20140718_000521_inLine +BABEL_OP3_307_19749_20140718_000521_outLine +BABEL_OP3_307_19767_20140811_181547_inLine +BABEL_OP3_307_19767_20140811_181547_outLine +BABEL_OP3_307_20330_20140716_175203_inLine +BABEL_OP3_307_20330_20140716_175203_outLine +BABEL_OP3_307_21109_20140703_171502_inLine +BABEL_OP3_307_21109_20140703_171502_outLine +BABEL_OP3_307_21109_20140703_180309_inLine +BABEL_OP3_307_21109_20140703_180309_outLine +BABEL_OP3_307_21159_20140824_225236_inLine +BABEL_OP3_307_21159_20140824_225236_outLine +BABEL_OP3_307_21159_20140901_165658_inLine +BABEL_OP3_307_21159_20140901_165658_outLine +BABEL_OP3_307_21393_20140814_190327_inLine +BABEL_OP3_307_21393_20140814_190327_outLine +BABEL_OP3_307_23395_20140605_170532_inLine +BABEL_OP3_307_23395_20140605_170532_outLine +BABEL_OP3_307_23395_20140605_171255_inLine +BABEL_OP3_307_23395_20140605_171255_outLine +BABEL_OP3_307_23681_20140823_184904_inLine +BABEL_OP3_307_23681_20140823_184904_outLine +BABEL_OP3_307_23681_20140823_190005_inLine +BABEL_OP3_307_23681_20140823_190005_outLine +BABEL_OP3_307_24290_20140717_001151_inLine +BABEL_OP3_307_24290_20140717_001151_outLine +BABEL_OP3_307_24323_20140508_012148_inLine +BABEL_OP3_307_24323_20140508_012148_outLine +BABEL_OP3_307_24323_20140508_020931_inLine +BABEL_OP3_307_24323_20140508_020931_outLine +BABEL_OP3_307_24323_20140508_022325_inLine +BABEL_OP3_307_24323_20140508_022325_outLine +BABEL_OP3_307_24605_20140403_000212_inLine +BABEL_OP3_307_24605_20140403_000212_outLine +BABEL_OP3_307_24779_20140905_004858_inLine +BABEL_OP3_307_24779_20140905_004858_outLine +BABEL_OP3_307_25085_20140822_212709_inLine +BABEL_OP3_307_25085_20140822_212709_outLine +BABEL_OP3_307_25220_20140905_000706_inLine +BABEL_OP3_307_25220_20140905_000706_outLine +BABEL_OP3_307_25412_20140604_234923_inLine +BABEL_OP3_307_25412_20140604_234923_outLine +BABEL_OP3_307_25412_20140605_000418_inLine +BABEL_OP3_307_25412_20140605_000418_outLine +BABEL_OP3_307_25961_20140403_011918_inLine +BABEL_OP3_307_25961_20140403_011918_outLine +BABEL_OP3_307_26074_20140604_165342_inLine +BABEL_OP3_307_26074_20140604_165342_outLine +BABEL_OP3_307_26478_20140824_185710_inLine +BABEL_OP3_307_26478_20140824_185710_outLine +BABEL_OP3_307_26999_20140530_190830_inLine +BABEL_OP3_307_26999_20140530_190830_outLine +BABEL_OP3_307_27042_20140630_180037_inLine +BABEL_OP3_307_27042_20140630_180037_outLine +BABEL_OP3_307_27218_20140518_235212_inLine +BABEL_OP3_307_27218_20140518_235212_outLine +BABEL_OP3_307_27478_20140808_231848_inLine +BABEL_OP3_307_27478_20140808_231848_outLine +BABEL_OP3_307_27590_20140618_185748_inLine +BABEL_OP3_307_27590_20140618_185748_outLine +BABEL_OP3_307_27590_20140618_190731_inLine +BABEL_OP3_307_27590_20140618_190731_outLine +BABEL_OP3_307_28303_20140503_225229_inLine +BABEL_OP3_307_28303_20140503_225229_outLine +BABEL_OP3_307_28422_20140607_215944_inLine +BABEL_OP3_307_28422_20140607_215944_outLine +BABEL_OP3_307_28538_20140513_205210_inLine +BABEL_OP3_307_28538_20140513_205210_outLine +BABEL_OP3_307_29352_20140824_172023_inLine +BABEL_OP3_307_29352_20140824_172023_outLine +BABEL_OP3_307_29416_20140705_233008_inLine +BABEL_OP3_307_29416_20140705_233008_outLine +BABEL_OP3_307_29777_20140705_173903_inLine +BABEL_OP3_307_29777_20140705_173903_outLine +BABEL_OP3_307_30253_20140624_003258_inLine +BABEL_OP3_307_30253_20140624_003258_outLine +BABEL_OP3_307_30497_20140809_181809_inLine +BABEL_OP3_307_30497_20140809_181809_outLine +BABEL_OP3_307_30869_20140630_183404_inLine +BABEL_OP3_307_30869_20140630_183404_outLine +BABEL_OP3_307_30869_20140630_184229_inLine +BABEL_OP3_307_30869_20140630_184229_outLine +BABEL_OP3_307_31109_20140518_201149_inLine +BABEL_OP3_307_31109_20140518_201149_outLine +BABEL_OP3_307_31182_20140701_221449_inLine +BABEL_OP3_307_31182_20140701_221449_outLine +BABEL_OP3_307_31182_20140702_223108_inLine +BABEL_OP3_307_31182_20140702_223108_outLine +BABEL_OP3_307_31628_20140610_230053_inLine +BABEL_OP3_307_31628_20140610_230053_outLine +BABEL_OP3_307_31979_20140512_015136_inLine +BABEL_OP3_307_31979_20140512_015136_outLine +BABEL_OP3_307_32630_20140821_180259_inLine +BABEL_OP3_307_32630_20140821_180259_outLine +BABEL_OP3_307_32630_20140821_181033_inLine +BABEL_OP3_307_32630_20140821_181033_outLine +BABEL_OP3_307_32630_20140821_182004_inLine +BABEL_OP3_307_32630_20140821_182004_outLine +BABEL_OP3_307_32959_20140621_191212_inLine +BABEL_OP3_307_32959_20140621_191212_outLine +BABEL_OP3_307_33216_20140904_190946_inLine +BABEL_OP3_307_33216_20140904_190946_outLine +BABEL_OP3_307_33704_20140705_224629_inLine +BABEL_OP3_307_33704_20140705_224629_outLine +BABEL_OP3_307_33840_20140628_232051_inLine +BABEL_OP3_307_33840_20140628_232051_outLine +BABEL_OP3_307_34328_20140511_224229_inLine +BABEL_OP3_307_34328_20140511_224229_outLine +BABEL_OP3_307_34899_20140824_175928_inLine +BABEL_OP3_307_34899_20140824_175928_outLine +BABEL_OP3_307_35885_20140717_212149_inLine +BABEL_OP3_307_35885_20140717_212149_outLine +BABEL_OP3_307_36059_20140725_170553_inLine +BABEL_OP3_307_36059_20140725_170553_outLine +BABEL_OP3_307_36059_20140725_171011_inLine +BABEL_OP3_307_36059_20140725_171011_outLine +BABEL_OP3_307_36341_20140414_233501_inLine +BABEL_OP3_307_36341_20140414_233501_outLine +BABEL_OP3_307_36341_20140415_224118_inLine +BABEL_OP3_307_36341_20140415_224118_outLine +BABEL_OP3_307_37229_20140820_214115_inLine +BABEL_OP3_307_37229_20140820_214115_outLine +BABEL_OP3_307_37229_20140820_215332_inLine +BABEL_OP3_307_37229_20140820_215332_outLine +BABEL_OP3_307_37499_20140823_233015_inLine +BABEL_OP3_307_37499_20140823_233015_outLine +BABEL_OP3_307_37598_20140602_183825_inLine +BABEL_OP3_307_37598_20140602_183825_outLine +BABEL_OP3_307_38554_20140414_233433_inLine +BABEL_OP3_307_38554_20140414_233433_outLine +BABEL_OP3_307_38979_20140711_231114_inLine +BABEL_OP3_307_38979_20140711_231114_outLine +BABEL_OP3_307_38979_20140711_232222_inLine +BABEL_OP3_307_38979_20140711_232222_outLine +BABEL_OP3_307_39099_20140814_210148_inLine +BABEL_OP3_307_39099_20140814_210148_outLine +BABEL_OP3_307_39307_20140422_002337_inLine +BABEL_OP3_307_39307_20140422_002337_outLine +BABEL_OP3_307_39680_20140710_174506_inLine +BABEL_OP3_307_39680_20140710_174506_outLine +BABEL_OP3_307_41038_20140611_180425_inLine +BABEL_OP3_307_41038_20140611_180425_outLine +BABEL_OP3_307_41233_20140629_192647_inLine +BABEL_OP3_307_41233_20140629_192647_outLine +BABEL_OP3_307_41400_20140811_174054_inLine +BABEL_OP3_307_41400_20140811_174054_outLine +BABEL_OP3_307_41692_20140824_185620_inLine +BABEL_OP3_307_41692_20140824_185620_outLine +BABEL_OP3_307_42600_20140504_183919_inLine +BABEL_OP3_307_42600_20140504_183919_outLine +BABEL_OP3_307_42619_20140527_230410_inLine +BABEL_OP3_307_42619_20140527_230410_outLine +BABEL_OP3_307_42718_20140809_190046_inLine +BABEL_OP3_307_42718_20140809_190046_outLine +BABEL_OP3_307_42942_20140508_233559_inLine +BABEL_OP3_307_42942_20140508_233559_outLine +BABEL_OP3_307_42991_20140611_192649_inLine +BABEL_OP3_307_42991_20140611_192649_outLine +BABEL_OP3_307_42991_20140611_193641_inLine +BABEL_OP3_307_42991_20140611_193641_outLine +BABEL_OP3_307_43239_20140703_231849_inLine +BABEL_OP3_307_43239_20140703_231849_outLine +BABEL_OP3_307_43368_20140510_180418_inLine +BABEL_OP3_307_43368_20140510_180418_outLine +BABEL_OP3_307_43368_20140510_183109_inLine +BABEL_OP3_307_43368_20140510_183109_outLine +BABEL_OP3_307_43395_20140825_200133_inLine +BABEL_OP3_307_43395_20140825_200133_outLine +BABEL_OP3_307_43646_20140414_232607_inLine +BABEL_OP3_307_43646_20140414_232607_outLine +BABEL_OP3_307_44255_20140716_232402_inLine +BABEL_OP3_307_44255_20140716_232402_outLine +BABEL_OP3_307_44255_20140716_233533_inLine +BABEL_OP3_307_44255_20140716_233533_outLine +BABEL_OP3_307_44347_20140707_214025_inLine +BABEL_OP3_307_44347_20140707_214025_outLine +BABEL_OP3_307_44531_20140724_233405_inLine +BABEL_OP3_307_44531_20140724_233405_outLine +BABEL_OP3_307_44868_20140609_224633_inLine +BABEL_OP3_307_44868_20140609_224633_outLine +BABEL_OP3_307_45201_20140811_170742_inLine +BABEL_OP3_307_45201_20140811_170742_outLine +BABEL_OP3_307_45235_20140705_174005_inLine +BABEL_OP3_307_45235_20140705_174005_outLine +BABEL_OP3_307_45559_20140725_180337_inLine +BABEL_OP3_307_45559_20140725_180337_outLine +BABEL_OP3_307_45560_20140403_200140_inLine +BABEL_OP3_307_45560_20140403_200140_outLine +BABEL_OP3_307_45642_20140417_165148_inLine +BABEL_OP3_307_45642_20140417_165148_outLine +BABEL_OP3_307_45642_20140417_171209_inLine +BABEL_OP3_307_45642_20140417_171209_outLine +BABEL_OP3_307_45697_20140723_174904_inLine +BABEL_OP3_307_45697_20140723_174904_outLine +BABEL_OP3_307_45770_20140417_234713_inLine +BABEL_OP3_307_45770_20140417_234713_outLine +BABEL_OP3_307_45770_20140418_001050_inLine +BABEL_OP3_307_45770_20140418_001050_outLine +BABEL_OP3_307_45908_20140811_183550_inLine +BABEL_OP3_307_45908_20140811_183550_outLine +BABEL_OP3_307_46008_20140811_173939_inLine +BABEL_OP3_307_46008_20140811_173939_outLine +BABEL_OP3_307_46169_20140628_213057_inLine +BABEL_OP3_307_46169_20140628_213057_outLine +BABEL_OP3_307_46315_20140613_173444_inLine +BABEL_OP3_307_46315_20140613_173444_outLine +BABEL_OP3_307_46688_20140422_234341_inLine +BABEL_OP3_307_46688_20140422_234341_outLine +BABEL_OP3_307_46881_20140416_183617_inLine +BABEL_OP3_307_46881_20140416_183617_outLine +BABEL_OP3_307_46881_20140416_184809_inLine +BABEL_OP3_307_46881_20140416_184809_outLine +BABEL_OP3_307_47215_20140421_184514_inLine +BABEL_OP3_307_47215_20140421_184514_outLine +BABEL_OP3_307_48016_20140820_165510_inLine +BABEL_OP3_307_48016_20140820_165510_outLine +BABEL_OP3_307_48663_20140811_165120_inLine +BABEL_OP3_307_48663_20140811_165120_outLine +BABEL_OP3_307_48844_20140430_185608_inLine +BABEL_OP3_307_48844_20140430_185608_outLine +BABEL_OP3_307_49197_20140503_171214_inLine +BABEL_OP3_307_49197_20140503_171214_outLine +BABEL_OP3_307_49767_20140904_203629_inLine +BABEL_OP3_307_49767_20140904_203629_outLine +BABEL_OP3_307_49775_20140329_000415_inLine +BABEL_OP3_307_49775_20140329_000415_outLine +BABEL_OP3_307_49775_20140329_002350_inLine +BABEL_OP3_307_49775_20140329_002350_outLine +BABEL_OP3_307_50726_20140404_005620_inLine +BABEL_OP3_307_50726_20140404_005620_outLine +BABEL_OP3_307_50958_20140508_000931_inLine +BABEL_OP3_307_50958_20140508_000931_outLine +BABEL_OP3_307_50962_20140430_182307_inLine +BABEL_OP3_307_50962_20140430_182307_outLine +BABEL_OP3_307_51015_20140619_234356_inLine +BABEL_OP3_307_51015_20140619_234356_outLine +BABEL_OP3_307_51414_20140824_173748_inLine +BABEL_OP3_307_51414_20140824_173748_outLine +BABEL_OP3_307_51414_20140824_175004_inLine +BABEL_OP3_307_51414_20140824_175004_outLine +BABEL_OP3_307_51530_20140813_230739_inLine +BABEL_OP3_307_51530_20140813_230739_outLine +BABEL_OP3_307_52025_20140520_191824_inLine +BABEL_OP3_307_52025_20140520_191824_outLine +BABEL_OP3_307_52070_20140904_203955_inLine +BABEL_OP3_307_52070_20140904_203955_outLine +BABEL_OP3_307_52442_20140516_214027_inLine +BABEL_OP3_307_52442_20140516_214027_outLine +BABEL_OP3_307_52447_20140822_171509_inLine +BABEL_OP3_307_52447_20140822_171509_outLine +BABEL_OP3_307_52447_20140822_172455_inLine +BABEL_OP3_307_52447_20140822_172455_outLine +BABEL_OP3_307_52725_20140725_190010_inLine +BABEL_OP3_307_52725_20140725_190010_outLine +BABEL_OP3_307_52725_20140725_190854_inLine +BABEL_OP3_307_52725_20140725_190854_outLine +BABEL_OP3_307_52804_20140502_184324_inLine +BABEL_OP3_307_52804_20140502_184324_outLine +BABEL_OP3_307_53144_20140807_225121_inLine +BABEL_OP3_307_53144_20140807_225121_outLine +BABEL_OP3_307_53492_20140730_174335_inLine +BABEL_OP3_307_53492_20140730_174335_outLine +BABEL_OP3_307_53665_20140809_225603_inLine +BABEL_OP3_307_53665_20140809_225603_outLine +BABEL_OP3_307_55818_20140403_203355_inLine +BABEL_OP3_307_55818_20140403_203355_outLine +BABEL_OP3_307_56019_20140716_230530_inLine +BABEL_OP3_307_56019_20140716_230530_outLine +BABEL_OP3_307_56076_20140810_005108_inLine +BABEL_OP3_307_56076_20140810_005108_outLine +BABEL_OP3_307_56213_20140621_172222_inLine +BABEL_OP3_307_56213_20140621_172222_outLine +BABEL_OP3_307_56306_20140705_225134_inLine +BABEL_OP3_307_56306_20140705_225134_outLine +BABEL_OP3_307_56684_20140630_231811_inLine +BABEL_OP3_307_56684_20140630_231811_outLine +BABEL_OP3_307_56720_20140616_224418_inLine +BABEL_OP3_307_56720_20140616_224418_outLine +BABEL_OP3_307_57542_20140720_222540_inLine +BABEL_OP3_307_57542_20140720_222540_outLine +BABEL_OP3_307_57650_20140712_172810_inLine +BABEL_OP3_307_57650_20140712_172810_outLine +BABEL_OP3_307_57922_20140603_234523_inLine +BABEL_OP3_307_57922_20140603_234523_outLine +BABEL_OP3_307_60310_20140628_231715_inLine +BABEL_OP3_307_60310_20140628_231715_outLine +BABEL_OP3_307_60436_20140730_191522_inLine +BABEL_OP3_307_60436_20140730_191522_outLine +BABEL_OP3_307_60706_20140401_190403_inLine +BABEL_OP3_307_60706_20140401_190403_outLine +BABEL_OP3_307_60836_20140405_213236_inLine +BABEL_OP3_307_60836_20140405_213236_outLine +BABEL_OP3_307_60836_20140406_002450_inLine +BABEL_OP3_307_60836_20140406_002450_outLine +BABEL_OP3_307_61219_20140404_200459_inLine +BABEL_OP3_307_61219_20140404_200459_outLine +BABEL_OP3_307_61225_20140414_220024_inLine +BABEL_OP3_307_61225_20140414_220024_outLine +BABEL_OP3_307_61963_20140710_174351_inLine +BABEL_OP3_307_61963_20140710_174351_outLine +BABEL_OP3_307_62155_20140721_233109_inLine +BABEL_OP3_307_62155_20140721_233109_outLine +BABEL_OP3_307_62289_20140811_205629_inLine +BABEL_OP3_307_62289_20140811_205629_outLine +BABEL_OP3_307_63906_20140807_235743_inLine +BABEL_OP3_307_63906_20140807_235743_outLine +BABEL_OP3_307_63938_20140715_225113_inLine +BABEL_OP3_307_63938_20140715_225113_outLine +BABEL_OP3_307_64014_20140717_232855_inLine +BABEL_OP3_307_64014_20140717_232855_outLine +BABEL_OP3_307_65298_20140718_004934_inLine +BABEL_OP3_307_65298_20140718_004934_outLine +BABEL_OP3_307_65477_20140507_213725_inLine +BABEL_OP3_307_65477_20140507_213725_outLine +BABEL_OP3_307_65477_20140507_214428_inLine +BABEL_OP3_307_65477_20140507_214428_outLine +BABEL_OP3_307_65477_20140507_215852_inLine +BABEL_OP3_307_65477_20140507_215852_outLine +BABEL_OP3_307_65913_20140811_185916_inLine +BABEL_OP3_307_65913_20140811_185916_outLine +BABEL_OP3_307_66026_20140622_001323_inLine +BABEL_OP3_307_66026_20140622_001323_outLine +BABEL_OP3_307_66026_20140622_003222_inLine +BABEL_OP3_307_66026_20140622_003222_outLine +BABEL_OP3_307_66837_20140622_193057_inLine +BABEL_OP3_307_66837_20140622_193057_outLine +BABEL_OP3_307_68182_20140712_230018_inLine +BABEL_OP3_307_68182_20140712_230018_outLine +BABEL_OP3_307_68306_20140619_234111_inLine +BABEL_OP3_307_68306_20140619_234111_outLine +BABEL_OP3_307_69746_20140708_002605_inLine +BABEL_OP3_307_69746_20140708_002605_outLine +BABEL_OP3_307_69885_20140809_214354_inLine +BABEL_OP3_307_69885_20140809_214354_outLine +BABEL_OP3_307_69885_20140809_221241_inLine +BABEL_OP3_307_69885_20140809_221241_outLine +BABEL_OP3_307_70221_20140531_232511_inLine +BABEL_OP3_307_70221_20140531_232511_outLine +BABEL_OP3_307_71121_20140827_212105_inLine +BABEL_OP3_307_71121_20140827_212105_outLine +BABEL_OP3_307_71282_20140712_184618_inLine +BABEL_OP3_307_71282_20140712_184618_outLine +BABEL_OP3_307_72349_20140811_213219_inLine +BABEL_OP3_307_72349_20140811_213219_outLine +BABEL_OP3_307_72844_20140414_222309_inLine +BABEL_OP3_307_72844_20140414_222309_outLine +BABEL_OP3_307_72844_20140414_223414_inLine +BABEL_OP3_307_72844_20140414_223414_outLine +BABEL_OP3_307_73549_20140905_002803_inLine +BABEL_OP3_307_73549_20140905_002803_outLine +BABEL_OP3_307_73964_20140809_233453_inLine +BABEL_OP3_307_73964_20140809_233453_outLine +BABEL_OP3_307_73964_20140809_234749_inLine +BABEL_OP3_307_73964_20140809_234749_outLine +BABEL_OP3_307_74111_20140630_190239_inLine +BABEL_OP3_307_74111_20140630_190239_outLine +BABEL_OP3_307_74253_20140621_235240_inLine +BABEL_OP3_307_74253_20140621_235240_outLine +BABEL_OP3_307_74280_20140414_183758_inLine +BABEL_OP3_307_74280_20140414_183758_outLine +BABEL_OP3_307_74455_20140715_191928_inLine +BABEL_OP3_307_74455_20140715_191928_outLine +BABEL_OP3_307_75223_20140401_234318_inLine +BABEL_OP3_307_75223_20140401_234318_outLine +BABEL_OP3_307_75223_20140401_235025_inLine +BABEL_OP3_307_75223_20140401_235025_outLine +BABEL_OP3_307_75261_20140630_231504_inLine +BABEL_OP3_307_75261_20140630_231504_outLine +BABEL_OP3_307_75342_20140617_225740_inLine +BABEL_OP3_307_75342_20140617_225740_outLine +BABEL_OP3_307_75342_20140617_231149_inLine +BABEL_OP3_307_75342_20140617_231149_outLine +BABEL_OP3_307_75359_20140812_195810_inLine +BABEL_OP3_307_75359_20140812_195810_outLine +BABEL_OP3_307_75366_20140905_004427_inLine +BABEL_OP3_307_75366_20140905_004427_outLine +BABEL_OP3_307_75465_20140629_190739_inLine +BABEL_OP3_307_75465_20140629_190739_outLine +BABEL_OP3_307_75869_20140722_003619_inLine +BABEL_OP3_307_75869_20140722_003619_outLine +BABEL_OP3_307_75981_20140730_202631_inLine +BABEL_OP3_307_75981_20140730_202631_outLine +BABEL_OP3_307_78544_20140610_183736_inLine +BABEL_OP3_307_78544_20140610_183736_outLine +BABEL_OP3_307_78609_20140702_235349_inLine +BABEL_OP3_307_78609_20140702_235349_outLine +BABEL_OP3_307_78749_20140904_210224_inLine +BABEL_OP3_307_78749_20140904_210224_outLine +BABEL_OP3_307_79139_20140510_225328_inLine +BABEL_OP3_307_79139_20140510_225328_outLine +BABEL_OP3_307_79898_20140904_214416_inLine +BABEL_OP3_307_79898_20140904_214416_outLine +BABEL_OP3_307_80577_20140715_181331_inLine +BABEL_OP3_307_80577_20140715_181331_outLine +BABEL_OP3_307_80655_20140812_230923_inLine +BABEL_OP3_307_80655_20140812_230923_outLine +BABEL_OP3_307_80655_20140812_233001_inLine +BABEL_OP3_307_80655_20140812_233001_outLine +BABEL_OP3_307_80721_20140621_190505_inLine +BABEL_OP3_307_80721_20140621_190505_outLine +BABEL_OP3_307_80881_20140422_202404_inLine +BABEL_OP3_307_80881_20140422_202404_outLine +BABEL_OP3_307_81674_20140826_223550_inLine +BABEL_OP3_307_81674_20140826_223550_outLine +BABEL_OP3_307_83783_20140605_230854_inLine +BABEL_OP3_307_83783_20140605_230854_outLine +BABEL_OP3_307_83783_20140605_231912_inLine +BABEL_OP3_307_83783_20140605_231912_outLine +BABEL_OP3_307_83935_20140614_224802_inLine +BABEL_OP3_307_83935_20140614_224802_outLine +BABEL_OP3_307_84061_20140511_233610_inLine +BABEL_OP3_307_84061_20140511_233610_outLine +BABEL_OP3_307_84125_20140331_234215_inLine +BABEL_OP3_307_84125_20140331_234215_outLine +BABEL_OP3_307_84370_20140820_212437_inLine +BABEL_OP3_307_84370_20140820_212437_outLine +BABEL_OP3_307_84408_20140503_212710_inLine +BABEL_OP3_307_84408_20140503_212710_outLine +BABEL_OP3_307_84737_20140708_221232_inLine +BABEL_OP3_307_84737_20140708_221232_outLine +BABEL_OP3_307_84823_20140630_223225_inLine +BABEL_OP3_307_84823_20140630_223225_outLine +BABEL_OP3_307_84936_20140531_001856_inLine +BABEL_OP3_307_84936_20140531_001856_outLine +BABEL_OP3_307_84936_20140531_002943_inLine +BABEL_OP3_307_84936_20140531_002943_outLine +BABEL_OP3_307_85048_20140605_171622_inLine +BABEL_OP3_307_85048_20140605_171622_outLine +BABEL_OP3_307_85254_20140827_191205_inLine +BABEL_OP3_307_85254_20140827_191205_outLine +BABEL_OP3_307_85340_20140430_212442_inLine +BABEL_OP3_307_85340_20140430_212442_outLine +BABEL_OP3_307_87298_20140404_234437_inLine +BABEL_OP3_307_87298_20140404_234437_outLine +BABEL_OP3_307_88982_20140517_225450_inLine +BABEL_OP3_307_88982_20140517_225450_outLine +BABEL_OP3_307_89560_20140708_181828_inLine +BABEL_OP3_307_89560_20140708_181828_outLine +BABEL_OP3_307_90080_20140730_192002_inLine +BABEL_OP3_307_90080_20140730_192002_outLine +BABEL_OP3_307_90760_20140822_233431_inLine +BABEL_OP3_307_90760_20140822_233431_outLine +BABEL_OP3_307_91125_20140417_193326_inLine +BABEL_OP3_307_91125_20140417_193326_outLine +BABEL_OP3_307_91930_20140723_214657_inLine +BABEL_OP3_307_91930_20140723_214657_outLine +BABEL_OP3_307_91977_20140609_172756_inLine +BABEL_OP3_307_91977_20140609_172756_outLine +BABEL_OP3_307_92077_20140725_182941_inLine +BABEL_OP3_307_92077_20140725_182941_outLine +BABEL_OP3_307_92096_20140720_214645_inLine +BABEL_OP3_307_92096_20140720_214645_outLine +BABEL_OP3_307_92356_20140710_165331_inLine +BABEL_OP3_307_92356_20140710_165331_outLine +BABEL_OP3_307_92736_20140628_222129_inLine +BABEL_OP3_307_92736_20140628_222129_outLine +BABEL_OP3_307_92792_20140806_183000_inLine +BABEL_OP3_307_92792_20140806_183000_outLine +BABEL_OP3_307_92792_20140806_184601_inLine +BABEL_OP3_307_92792_20140806_184601_outLine +BABEL_OP3_307_93411_20140511_171810_inLine +BABEL_OP3_307_93411_20140511_171810_outLine +BABEL_OP3_307_93411_20140511_172906_inLine +BABEL_OP3_307_93411_20140511_172906_outLine +BABEL_OP3_307_93861_20140513_195727_inLine +BABEL_OP3_307_94141_20140813_184047_inLine +BABEL_OP3_307_94141_20140813_184047_outLine +BABEL_OP3_307_94166_20140717_182459_inLine +BABEL_OP3_307_94666_20140517_180258_inLine +BABEL_OP3_307_94666_20140517_180258_outLine +BABEL_OP3_307_95399_20140514_005142_inLine +BABEL_OP3_307_95399_20140514_005142_outLine +BABEL_OP3_307_95467_20140822_201531_inLine +BABEL_OP3_307_95467_20140822_201531_outLine +BABEL_OP3_307_95598_20140415_012206_inLine +BABEL_OP3_307_95598_20140415_012206_outLine +BABEL_OP3_307_95935_20140702_232733_inLine +BABEL_OP3_307_95935_20140702_232733_outLine +BABEL_OP3_307_95966_20140504_202018_inLine +BABEL_OP3_307_95966_20140504_202018_outLine +BABEL_OP3_307_96247_20140721_235224_inLine +BABEL_OP3_307_96247_20140721_235224_outLine +BABEL_OP3_307_96584_20140717_173523_inLine +BABEL_OP3_307_96584_20140717_173523_outLine +BABEL_OP3_307_96842_20140725_185113_inLine +BABEL_OP3_307_96842_20140725_185113_outLine +BABEL_OP3_307_96934_20140407_232228_inLine +BABEL_OP3_307_96934_20140407_232228_outLine +BABEL_OP3_307_97136_20140731_173922_inLine +BABEL_OP3_307_97136_20140731_173922_outLine +BABEL_OP3_307_97570_20140529_233742_inLine +BABEL_OP3_307_97570_20140529_233742_outLine +BABEL_OP3_307_97836_20140730_225750_inLine +BABEL_OP3_307_97836_20140730_225750_outLine +BABEL_OP3_307_97849_20140813_181409_inLine +BABEL_OP3_307_97849_20140813_181409_outLine +BABEL_OP3_307_97911_20140904_224017_inLine +BABEL_OP3_307_97911_20140904_224017_outLine +BABEL_OP3_307_97988_20140620_223625_inLine +BABEL_OP3_307_97988_20140620_223625_outLine +BABEL_OP3_307_98489_20140404_222049_inLine +BABEL_OP3_307_98489_20140404_222049_outLine +BABEL_OP3_307_98678_20140721_224047_inLine +BABEL_OP3_307_98678_20140721_224047_outLine +BABEL_OP3_307_99401_20140422_215803_inLine +BABEL_OP3_307_99401_20140422_215803_outLine +BABEL_OP3_307_99718_20140417_190158_inLine +BABEL_OP3_307_99718_20140417_190158_outLine +BABEL_OP3_307_99732_20140630_175525_inLine +BABEL_OP3_307_99732_20140630_175525_outLine +BABEL_OP3_307_99813_20140516_235439_inLine +BABEL_OP3_307_99813_20140516_235439_outLine +BABEL_OP3_307_99920_20140404_002016_inLine +BABEL_OP3_307_99920_20140404_002016_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/dev.2h.list b/egs/babel/s5d/conf/lists/401-mongolian/dev.2h.list new file mode 100644 index 00000000000..47596e1204d --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/dev.2h.list @@ -0,0 +1,124 @@ +BABEL_OP3_401_10319_20140923_150904_inLine +BABEL_OP3_401_10319_20140923_150904_outLine +BABEL_OP3_401_12916_20140930_182205_inLine +BABEL_OP3_401_12916_20140930_182205_outLine +BABEL_OP3_401_14229_20141015_145028_inLine +BABEL_OP3_401_14229_20141015_145028_outLine +BABEL_OP3_401_14350_20141002_192854_inLine +BABEL_OP3_401_14350_20141002_192854_outLine +BABEL_OP3_401_14875_20141013_220929_inLine +BABEL_OP3_401_14875_20141013_220929_outLine +BABEL_OP3_401_14875_20141013_222027_inLine +BABEL_OP3_401_14875_20141013_222027_outLine +BABEL_OP3_401_15163_20141020_201846_inLine +BABEL_OP3_401_15163_20141020_201846_outLine +BABEL_OP3_401_15216_20141104_171637_inLine +BABEL_OP3_401_15216_20141104_171637_outLine +BABEL_OP3_401_15324_20141031_194259_inLine +BABEL_OP3_401_15324_20141031_194259_outLine +BABEL_OP3_401_15848_20140916_174516_inLine +BABEL_OP3_401_15848_20140916_174516_outLine +BABEL_OP3_401_16184_20140924_115115_inLine +BABEL_OP3_401_16184_20140924_115115_outLine +BABEL_OP3_401_17440_20141014_172206_inLine +BABEL_OP3_401_17440_20141014_172206_outLine +BABEL_OP3_401_19621_20141027_174015_inLine +BABEL_OP3_401_19621_20141027_174015_outLine +BABEL_OP3_401_21109_20141102_133420_inLine +BABEL_OP3_401_21109_20141102_133420_outLine +BABEL_OP3_401_23505_20140930_172516_inLine +BABEL_OP3_401_23505_20140930_172516_outLine +BABEL_OP3_401_26074_20141031_001437_inLine +BABEL_OP3_401_26074_20141031_001437_outLine +BABEL_OP3_401_27125_20140916_141748_inLine +BABEL_OP3_401_27125_20140916_141748_outLine +BABEL_OP3_401_27478_20141119_222255_inLine +BABEL_OP3_401_27478_20141119_222255_outLine +BABEL_OP3_401_28606_20140930_180938_inLine +BABEL_OP3_401_28606_20140930_180938_outLine +BABEL_OP3_401_29023_20141021_134200_inLine +BABEL_OP3_401_29023_20141021_134200_outLine +BABEL_OP3_401_29135_20140919_181952_inLine +BABEL_OP3_401_29135_20140919_181952_outLine +BABEL_OP3_401_29208_20141018_152040_inLine +BABEL_OP3_401_29208_20141018_152040_outLine +BABEL_OP3_401_29777_20141105_172935_inLine +BABEL_OP3_401_29777_20141105_172935_outLine +BABEL_OP3_401_31490_20141001_195242_inLine +BABEL_OP3_401_31490_20141001_195242_outLine +BABEL_OP3_401_32301_20140927_150237_inLine +BABEL_OP3_401_32301_20140927_150237_outLine +BABEL_OP3_401_32727_20141014_193244_inLine +BABEL_OP3_401_32727_20141014_193244_outLine +BABEL_OP3_401_32861_20141112_183418_inLine +BABEL_OP3_401_32861_20141112_183418_outLine +BABEL_OP3_401_32914_20141101_192546_inLine +BABEL_OP3_401_32914_20141101_192546_outLine +BABEL_OP3_401_36219_20141014_150115_inLine +BABEL_OP3_401_36219_20141014_150115_outLine +BABEL_OP3_401_36505_20141104_134657_inLine +BABEL_OP3_401_36505_20141104_134657_outLine +BABEL_OP3_401_38554_20140917_124843_inLine +BABEL_OP3_401_38554_20140917_124843_outLine +BABEL_OP3_401_41100_20141001_131139_inLine +BABEL_OP3_401_41100_20141001_131139_outLine +BABEL_OP3_401_41741_20141002_230232_inLine +BABEL_OP3_401_41741_20141002_230232_outLine +BABEL_OP3_401_42243_20140924_154551_inLine +BABEL_OP3_401_42243_20140924_154551_outLine +BABEL_OP3_401_43368_20141016_160322_inLine +BABEL_OP3_401_43368_20141016_160322_outLine +BABEL_OP3_401_43388_20141019_234056_inLine +BABEL_OP3_401_43388_20141019_234056_outLine +BABEL_OP3_401_43789_20141020_153059_inLine +BABEL_OP3_401_43789_20141020_153059_outLine +BABEL_OP3_401_44347_20141103_201828_inLine +BABEL_OP3_401_44347_20141103_201828_outLine +BABEL_OP3_401_44420_20141014_143409_inLine +BABEL_OP3_401_44420_20141014_143409_outLine +BABEL_OP3_401_44531_20141122_231122_inLine +BABEL_OP3_401_44531_20141122_231122_outLine +BABEL_OP3_401_44619_20141003_141028_inLine +BABEL_OP3_401_44619_20141003_141028_outLine +BABEL_OP3_401_44868_20140925_153133_inLine +BABEL_OP3_401_44868_20140925_153133_outLine +BABEL_OP3_401_46625_20140919_144521_inLine +BABEL_OP3_401_46625_20140919_144521_outLine +BABEL_OP3_401_47215_20141001_143242_inLine +BABEL_OP3_401_47215_20141001_143242_outLine +BABEL_OP3_401_48200_20141104_174608_inLine +BABEL_OP3_401_48200_20141104_174608_outLine +BABEL_OP3_401_52025_20140917_170707_inLine +BABEL_OP3_401_52025_20140917_170707_outLine +BABEL_OP3_401_54046_20141105_192438_inLine +BABEL_OP3_401_54046_20141105_192438_outLine +BABEL_OP3_401_54744_20141001_143512_inLine +BABEL_OP3_401_54744_20141001_143512_outLine +BABEL_OP3_401_56090_20140917_155639_inLine +BABEL_OP3_401_56090_20140917_155639_outLine +BABEL_OP3_401_59898_20140930_142511_inLine +BABEL_OP3_401_59898_20140930_142511_outLine +BABEL_OP3_401_61011_20140919_134829_inLine +BABEL_OP3_401_61011_20140919_134829_outLine +BABEL_OP3_401_61011_20140919_141527_inLine +BABEL_OP3_401_61011_20140919_141527_outLine +BABEL_OP3_401_61678_20140919_183209_inLine +BABEL_OP3_401_61678_20140919_183209_outLine +BABEL_OP3_401_62724_20141016_200105_inLine +BABEL_OP3_401_62724_20141016_200105_outLine +BABEL_OP3_401_63081_20140919_142151_inLine +BABEL_OP3_401_63081_20140919_142151_outLine +BABEL_OP3_401_72007_20140930_173109_inLine +BABEL_OP3_401_72007_20140930_173109_outLine +BABEL_OP3_401_78544_20140924_155131_inLine +BABEL_OP3_401_78544_20140924_155131_outLine +BABEL_OP3_401_81424_20140927_134153_inLine +BABEL_OP3_401_81424_20140927_134153_outLine +BABEL_OP3_401_81553_20141112_153426_inLine +BABEL_OP3_401_81553_20141112_153426_outLine +BABEL_OP3_401_84815_20141014_163920_inLine +BABEL_OP3_401_84815_20141014_163920_outLine +BABEL_OP3_401_87884_20141014_190149_inLine +BABEL_OP3_401_87884_20141014_190149_outLine +BABEL_OP3_401_98506_20141124_133100_inLine +BABEL_OP3_401_98506_20141124_133100_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/dev.list b/egs/babel/s5d/conf/lists/401-mongolian/dev.list new file mode 100644 index 00000000000..47596e1204d --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/dev.list @@ -0,0 +1,124 @@ +BABEL_OP3_401_10319_20140923_150904_inLine +BABEL_OP3_401_10319_20140923_150904_outLine +BABEL_OP3_401_12916_20140930_182205_inLine +BABEL_OP3_401_12916_20140930_182205_outLine +BABEL_OP3_401_14229_20141015_145028_inLine +BABEL_OP3_401_14229_20141015_145028_outLine +BABEL_OP3_401_14350_20141002_192854_inLine +BABEL_OP3_401_14350_20141002_192854_outLine +BABEL_OP3_401_14875_20141013_220929_inLine +BABEL_OP3_401_14875_20141013_220929_outLine +BABEL_OP3_401_14875_20141013_222027_inLine +BABEL_OP3_401_14875_20141013_222027_outLine +BABEL_OP3_401_15163_20141020_201846_inLine +BABEL_OP3_401_15163_20141020_201846_outLine +BABEL_OP3_401_15216_20141104_171637_inLine +BABEL_OP3_401_15216_20141104_171637_outLine +BABEL_OP3_401_15324_20141031_194259_inLine +BABEL_OP3_401_15324_20141031_194259_outLine +BABEL_OP3_401_15848_20140916_174516_inLine +BABEL_OP3_401_15848_20140916_174516_outLine +BABEL_OP3_401_16184_20140924_115115_inLine +BABEL_OP3_401_16184_20140924_115115_outLine +BABEL_OP3_401_17440_20141014_172206_inLine +BABEL_OP3_401_17440_20141014_172206_outLine +BABEL_OP3_401_19621_20141027_174015_inLine +BABEL_OP3_401_19621_20141027_174015_outLine +BABEL_OP3_401_21109_20141102_133420_inLine +BABEL_OP3_401_21109_20141102_133420_outLine +BABEL_OP3_401_23505_20140930_172516_inLine +BABEL_OP3_401_23505_20140930_172516_outLine +BABEL_OP3_401_26074_20141031_001437_inLine +BABEL_OP3_401_26074_20141031_001437_outLine +BABEL_OP3_401_27125_20140916_141748_inLine +BABEL_OP3_401_27125_20140916_141748_outLine +BABEL_OP3_401_27478_20141119_222255_inLine +BABEL_OP3_401_27478_20141119_222255_outLine +BABEL_OP3_401_28606_20140930_180938_inLine +BABEL_OP3_401_28606_20140930_180938_outLine +BABEL_OP3_401_29023_20141021_134200_inLine +BABEL_OP3_401_29023_20141021_134200_outLine +BABEL_OP3_401_29135_20140919_181952_inLine +BABEL_OP3_401_29135_20140919_181952_outLine +BABEL_OP3_401_29208_20141018_152040_inLine +BABEL_OP3_401_29208_20141018_152040_outLine +BABEL_OP3_401_29777_20141105_172935_inLine +BABEL_OP3_401_29777_20141105_172935_outLine +BABEL_OP3_401_31490_20141001_195242_inLine +BABEL_OP3_401_31490_20141001_195242_outLine +BABEL_OP3_401_32301_20140927_150237_inLine +BABEL_OP3_401_32301_20140927_150237_outLine +BABEL_OP3_401_32727_20141014_193244_inLine +BABEL_OP3_401_32727_20141014_193244_outLine +BABEL_OP3_401_32861_20141112_183418_inLine +BABEL_OP3_401_32861_20141112_183418_outLine +BABEL_OP3_401_32914_20141101_192546_inLine +BABEL_OP3_401_32914_20141101_192546_outLine +BABEL_OP3_401_36219_20141014_150115_inLine +BABEL_OP3_401_36219_20141014_150115_outLine +BABEL_OP3_401_36505_20141104_134657_inLine +BABEL_OP3_401_36505_20141104_134657_outLine +BABEL_OP3_401_38554_20140917_124843_inLine +BABEL_OP3_401_38554_20140917_124843_outLine +BABEL_OP3_401_41100_20141001_131139_inLine +BABEL_OP3_401_41100_20141001_131139_outLine +BABEL_OP3_401_41741_20141002_230232_inLine +BABEL_OP3_401_41741_20141002_230232_outLine +BABEL_OP3_401_42243_20140924_154551_inLine +BABEL_OP3_401_42243_20140924_154551_outLine +BABEL_OP3_401_43368_20141016_160322_inLine +BABEL_OP3_401_43368_20141016_160322_outLine +BABEL_OP3_401_43388_20141019_234056_inLine +BABEL_OP3_401_43388_20141019_234056_outLine +BABEL_OP3_401_43789_20141020_153059_inLine +BABEL_OP3_401_43789_20141020_153059_outLine +BABEL_OP3_401_44347_20141103_201828_inLine +BABEL_OP3_401_44347_20141103_201828_outLine +BABEL_OP3_401_44420_20141014_143409_inLine +BABEL_OP3_401_44420_20141014_143409_outLine +BABEL_OP3_401_44531_20141122_231122_inLine +BABEL_OP3_401_44531_20141122_231122_outLine +BABEL_OP3_401_44619_20141003_141028_inLine +BABEL_OP3_401_44619_20141003_141028_outLine +BABEL_OP3_401_44868_20140925_153133_inLine +BABEL_OP3_401_44868_20140925_153133_outLine +BABEL_OP3_401_46625_20140919_144521_inLine +BABEL_OP3_401_46625_20140919_144521_outLine +BABEL_OP3_401_47215_20141001_143242_inLine +BABEL_OP3_401_47215_20141001_143242_outLine +BABEL_OP3_401_48200_20141104_174608_inLine +BABEL_OP3_401_48200_20141104_174608_outLine +BABEL_OP3_401_52025_20140917_170707_inLine +BABEL_OP3_401_52025_20140917_170707_outLine +BABEL_OP3_401_54046_20141105_192438_inLine +BABEL_OP3_401_54046_20141105_192438_outLine +BABEL_OP3_401_54744_20141001_143512_inLine +BABEL_OP3_401_54744_20141001_143512_outLine +BABEL_OP3_401_56090_20140917_155639_inLine +BABEL_OP3_401_56090_20140917_155639_outLine +BABEL_OP3_401_59898_20140930_142511_inLine +BABEL_OP3_401_59898_20140930_142511_outLine +BABEL_OP3_401_61011_20140919_134829_inLine +BABEL_OP3_401_61011_20140919_134829_outLine +BABEL_OP3_401_61011_20140919_141527_inLine +BABEL_OP3_401_61011_20140919_141527_outLine +BABEL_OP3_401_61678_20140919_183209_inLine +BABEL_OP3_401_61678_20140919_183209_outLine +BABEL_OP3_401_62724_20141016_200105_inLine +BABEL_OP3_401_62724_20141016_200105_outLine +BABEL_OP3_401_63081_20140919_142151_inLine +BABEL_OP3_401_63081_20140919_142151_outLine +BABEL_OP3_401_72007_20140930_173109_inLine +BABEL_OP3_401_72007_20140930_173109_outLine +BABEL_OP3_401_78544_20140924_155131_inLine +BABEL_OP3_401_78544_20140924_155131_outLine +BABEL_OP3_401_81424_20140927_134153_inLine +BABEL_OP3_401_81424_20140927_134153_outLine +BABEL_OP3_401_81553_20141112_153426_inLine +BABEL_OP3_401_81553_20141112_153426_outLine +BABEL_OP3_401_84815_20141014_163920_inLine +BABEL_OP3_401_84815_20141014_163920_outLine +BABEL_OP3_401_87884_20141014_190149_inLine +BABEL_OP3_401_87884_20141014_190149_outLine +BABEL_OP3_401_98506_20141124_133100_inLine +BABEL_OP3_401_98506_20141124_133100_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/eval.list b/egs/babel/s5d/conf/lists/401-mongolian/eval.list new file mode 100644 index 00000000000..d6756127490 --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/eval.list @@ -0,0 +1,186 @@ +BABEL_OP3_401_10416_20141019_182621_inLine +BABEL_OP3_401_10416_20141019_182621_outLine +BABEL_OP3_401_10974_20141027_160927_inLine +BABEL_OP3_401_10974_20141027_160927_outLine +BABEL_OP3_401_13040_20141003_135314_inLine +BABEL_OP3_401_13040_20141003_135314_outLine +BABEL_OP3_401_14158_20140923_184703_inLine +BABEL_OP3_401_14158_20140923_184703_outLine +BABEL_OP3_401_15262_20141001_153004_inLine +BABEL_OP3_401_15262_20141001_153004_outLine +BABEL_OP3_401_16056_20140930_142245_inLine +BABEL_OP3_401_16056_20140930_142245_outLine +BABEL_OP3_401_16601_20140930_160550_inLine +BABEL_OP3_401_16601_20140930_160550_outLine +BABEL_OP3_401_16787_20141017_144614_inLine +BABEL_OP3_401_16787_20141017_144614_outLine +BABEL_OP3_401_17573_20141001_155309_inLine +BABEL_OP3_401_17573_20141001_155309_outLine +BABEL_OP3_401_18863_20141104_193022_inLine +BABEL_OP3_401_18863_20141104_193022_outLine +BABEL_OP3_401_19120_20141119_194530_inLine +BABEL_OP3_401_19120_20141119_194530_outLine +BABEL_OP3_401_21029_20141003_135457_inLine +BABEL_OP3_401_21029_20141003_135457_outLine +BABEL_OP3_401_21581_20141019_224155_inLine +BABEL_OP3_401_21581_20141019_224155_outLine +BABEL_OP3_401_21794_20141020_182533_inLine +BABEL_OP3_401_21794_20141020_182533_outLine +BABEL_OP3_401_22216_20140922_180034_inLine +BABEL_OP3_401_22216_20140922_180034_outLine +BABEL_OP3_401_22612_20141020_164557_inLine +BABEL_OP3_401_22612_20141020_164557_outLine +BABEL_OP3_401_22641_20140930_120916_inLine +BABEL_OP3_401_22641_20140930_120916_outLine +BABEL_OP3_401_23395_20141029_191310_inLine +BABEL_OP3_401_23395_20141029_191310_outLine +BABEL_OP3_401_23731_20141027_182446_inLine +BABEL_OP3_401_23731_20141027_182446_outLine +BABEL_OP3_401_23983_20141204_001919_inLine +BABEL_OP3_401_23983_20141204_001919_outLine +BABEL_OP3_401_24589_20141014_140038_inLine +BABEL_OP3_401_24589_20141014_140038_outLine +BABEL_OP3_401_27082_20141110_145114_inLine +BABEL_OP3_401_27082_20141110_145114_outLine +BABEL_OP3_401_27218_20141002_130035_inLine +BABEL_OP3_401_27218_20141002_130035_outLine +BABEL_OP3_401_28538_20141020_154852_inLine +BABEL_OP3_401_28538_20141020_154852_outLine +BABEL_OP3_401_28585_20141205_234451_inLine +BABEL_OP3_401_28585_20141205_234451_outLine +BABEL_OP3_401_28945_20141013_144506_inLine +BABEL_OP3_401_28945_20141013_144506_outLine +BABEL_OP3_401_30250_20140919_185656_inLine +BABEL_OP3_401_30250_20140919_185656_outLine +BABEL_OP3_401_30497_20141124_222137_inLine +BABEL_OP3_401_30497_20141124_222137_outLine +BABEL_OP3_401_31979_20141018_172147_inLine +BABEL_OP3_401_31979_20141018_172147_outLine +BABEL_OP3_401_32959_20141010_150730_inLine +BABEL_OP3_401_32959_20141010_150730_outLine +BABEL_OP3_401_34688_20140930_171613_inLine +BABEL_OP3_401_34688_20140930_171613_outLine +BABEL_OP3_401_35069_20141111_153956_inLine +BABEL_OP3_401_35069_20141111_153956_outLine +BABEL_OP3_401_36341_20140919_151216_inLine +BABEL_OP3_401_36341_20140919_151216_outLine +BABEL_OP3_401_37281_20141021_145639_inLine +BABEL_OP3_401_37281_20141021_145639_outLine +BABEL_OP3_401_38431_20141017_210308_inLine +BABEL_OP3_401_38431_20141017_210308_outLine +BABEL_OP3_401_39059_20141113_134730_inLine +BABEL_OP3_401_39059_20141113_134730_outLine +BABEL_OP3_401_39159_20140919_165446_inLine +BABEL_OP3_401_39159_20140919_165446_outLine +BABEL_OP3_401_39680_20141114_221332_inLine +BABEL_OP3_401_39680_20141114_221332_outLine +BABEL_OP3_401_41400_20141201_173539_inLine +BABEL_OP3_401_41400_20141201_173539_outLine +BABEL_OP3_401_41542_20141117_192728_inLine +BABEL_OP3_401_41542_20141117_192728_outLine +BABEL_OP3_401_41920_20141001_131923_inLine +BABEL_OP3_401_41920_20141001_131923_outLine +BABEL_OP3_401_42600_20141015_152342_inLine +BABEL_OP3_401_42600_20141015_152342_outLine +BABEL_OP3_401_42991_20140922_191649_inLine +BABEL_OP3_401_42991_20140922_191649_outLine +BABEL_OP3_401_43920_20141001_185918_inLine +BABEL_OP3_401_43920_20141001_185918_outLine +BABEL_OP3_401_44847_20141101_215443_inLine +BABEL_OP3_401_44847_20141101_215443_outLine +BABEL_OP3_401_45106_20141103_141740_inLine +BABEL_OP3_401_45106_20141103_141740_outLine +BABEL_OP3_401_45106_20141103_142537_inLine +BABEL_OP3_401_45106_20141103_142537_outLine +BABEL_OP3_401_45642_20141001_205602_inLine +BABEL_OP3_401_45642_20141001_205602_outLine +BABEL_OP3_401_45777_20141015_141952_inLine +BABEL_OP3_401_45777_20141015_141952_outLine +BABEL_OP3_401_46333_20140929_163346_inLine +BABEL_OP3_401_46333_20140929_163346_outLine +BABEL_OP3_401_46702_20140917_183418_inLine +BABEL_OP3_401_46702_20140917_183418_outLine +BABEL_OP3_401_47877_20141111_151410_inLine +BABEL_OP3_401_47877_20141111_151410_outLine +BABEL_OP3_401_48789_20141020_160327_inLine +BABEL_OP3_401_48789_20141020_160327_outLine +BABEL_OP3_401_50630_20140926_164312_inLine +BABEL_OP3_401_50630_20140926_164312_outLine +BABEL_OP3_401_50726_20140922_142113_inLine +BABEL_OP3_401_50726_20140922_142113_outLine +BABEL_OP3_401_50962_20141009_174305_inLine +BABEL_OP3_401_50962_20141009_174305_outLine +BABEL_OP3_401_51540_20141110_152608_inLine +BABEL_OP3_401_51540_20141110_152608_outLine +BABEL_OP3_401_52438_20141014_155319_inLine +BABEL_OP3_401_52438_20141014_155319_outLine +BABEL_OP3_401_52442_20141023_165129_inLine +BABEL_OP3_401_52442_20141023_165129_outLine +BABEL_OP3_401_53063_20141016_144707_inLine +BABEL_OP3_401_53063_20141016_144707_outLine +BABEL_OP3_401_53419_20141018_182244_inLine +BABEL_OP3_401_53419_20141018_182244_outLine +BABEL_OP3_401_56213_20141016_153651_inLine +BABEL_OP3_401_56213_20141016_153651_outLine +BABEL_OP3_401_57922_20141030_180727_inLine +BABEL_OP3_401_57922_20141030_180727_outLine +BABEL_OP3_401_58047_20141030_203452_inLine +BABEL_OP3_401_58047_20141030_203452_outLine +BABEL_OP3_401_58489_20141001_175646_inLine +BABEL_OP3_401_58489_20141001_175646_outLine +BABEL_OP3_401_59993_20141006_214918_inLine +BABEL_OP3_401_59993_20141006_214918_outLine +BABEL_OP3_401_62155_20141124_185836_inLine +BABEL_OP3_401_62155_20141124_185836_outLine +BABEL_OP3_401_62852_20140922_125106_inLine +BABEL_OP3_401_62852_20140922_125106_outLine +BABEL_OP3_401_63670_20141016_165949_inLine +BABEL_OP3_401_63670_20141016_165949_outLine +BABEL_OP3_401_64494_20141007_112731_inLine +BABEL_OP3_401_64494_20141007_112731_outLine +BABEL_OP3_401_66026_20141016_173200_inLine +BABEL_OP3_401_66026_20141016_173200_outLine +BABEL_OP3_401_67842_20141006_131608_inLine +BABEL_OP3_401_67842_20141006_131608_outLine +BABEL_OP3_401_68306_20140929_200051_inLine +BABEL_OP3_401_68306_20140929_200051_outLine +BABEL_OP3_401_69153_20141016_161457_inLine +BABEL_OP3_401_69153_20141016_161457_outLine +BABEL_OP3_401_70282_20141030_201700_inLine +BABEL_OP3_401_70282_20141030_201700_outLine +BABEL_OP3_401_77567_20140921_154030_inLine +BABEL_OP3_401_77567_20140921_154030_outLine +BABEL_OP3_401_78511_20141001_195118_inLine +BABEL_OP3_401_78511_20141001_195118_outLine +BABEL_OP3_401_79139_20141110_182604_inLine +BABEL_OP3_401_79139_20141110_182604_outLine +BABEL_OP3_401_80897_20141030_171507_inLine +BABEL_OP3_401_80897_20141030_171507_outLine +BABEL_OP3_401_81392_20140929_161849_inLine +BABEL_OP3_401_81392_20140929_161849_outLine +BABEL_OP3_401_81404_20141016_154459_inLine +BABEL_OP3_401_81404_20141016_154459_outLine +BABEL_OP3_401_81404_20141016_155649_inLine +BABEL_OP3_401_81404_20141016_155649_outLine +BABEL_OP3_401_83935_20140930_153105_inLine +BABEL_OP3_401_83935_20140930_153105_outLine +BABEL_OP3_401_84327_20140930_203221_inLine +BABEL_OP3_401_84327_20140930_203221_outLine +BABEL_OP3_401_84823_20141018_193727_inLine +BABEL_OP3_401_84823_20141018_193727_outLine +BABEL_OP3_401_87280_20141021_202831_inLine +BABEL_OP3_401_87280_20141021_202831_outLine +BABEL_OP3_401_88550_20141127_184443_inLine +BABEL_OP3_401_88550_20141127_184443_outLine +BABEL_OP3_401_89372_20140921_132733_inLine +BABEL_OP3_401_89372_20140921_132733_outLine +BABEL_OP3_401_91581_20141001_163329_inLine +BABEL_OP3_401_91581_20141001_163329_outLine +BABEL_OP3_401_93946_20141016_192913_inLine +BABEL_OP3_401_93946_20141016_192913_outLine +BABEL_OP3_401_94002_20141020_150022_inLine +BABEL_OP3_401_94002_20141020_150022_outLine +BABEL_OP3_401_96504_20141014_144817_inLine +BABEL_OP3_401_96504_20141014_144817_outLine +BABEL_OP3_401_99732_20141018_150700_inLine +BABEL_OP3_401_99732_20141018_150700_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/sub-train.list b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.list new file mode 100644 index 00000000000..f4b87dcaef8 --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.list @@ -0,0 +1,126 @@ +BABEL_OP3_401_13030_20141015_163112_inLine +BABEL_OP3_401_13030_20141015_163112_outLine +BABEL_OP3_401_13324_20141002_165637_inLine +BABEL_OP3_401_13324_20141002_165637_outLine +BABEL_OP3_401_13586_20141023_193242_inLine +BABEL_OP3_401_13586_20141023_193242_outLine +BABEL_OP3_401_14560_20140922_140509_inLine +BABEL_OP3_401_14560_20140922_140509_outLine +BABEL_OP3_401_15902_20140930_144526_inLine +BABEL_OP3_401_15902_20140930_144526_outLine +BABEL_OP3_401_16475_20141016_143941_inLine +BABEL_OP3_401_16475_20141016_143941_outLine +BABEL_OP3_401_17567_20141023_213629_inLine +BABEL_OP3_401_17567_20141023_213629_outLine +BABEL_OP3_401_17923_20141002_172711_inLine +BABEL_OP3_401_17923_20141002_172711_outLine +BABEL_OP3_401_19101_20141029_183652_inLine +BABEL_OP3_401_19101_20141029_183652_outLine +BABEL_OP3_401_19722_20140930_200553_inLine +BABEL_OP3_401_19722_20140930_200553_outLine +BABEL_OP3_401_20916_20140922_174215_inLine +BABEL_OP3_401_20916_20140922_174215_outLine +BABEL_OP3_401_22321_20140929_180456_inLine +BABEL_OP3_401_22321_20140929_180456_outLine +BABEL_OP3_401_23893_20141125_213344_inLine +BABEL_OP3_401_23893_20141125_213344_outLine +BABEL_OP3_401_24290_20141124_184351_inLine +BABEL_OP3_401_24290_20141124_184351_outLine +BABEL_OP3_401_24323_20141017_151036_inLine +BABEL_OP3_401_24323_20141017_151036_outLine +BABEL_OP3_401_24470_20141029_145653_inLine +BABEL_OP3_401_24470_20141029_145653_outLine +BABEL_OP3_401_24605_20141001_142727_inLine +BABEL_OP3_401_24605_20141001_142727_outLine +BABEL_OP3_401_25961_20140929_183632_inLine +BABEL_OP3_401_25961_20140929_183632_outLine +BABEL_OP3_401_26072_20141112_173131_inLine +BABEL_OP3_401_26072_20141112_173131_outLine +BABEL_OP3_401_26398_20141204_001557_inLine +BABEL_OP3_401_26398_20141204_001557_outLine +BABEL_OP3_401_26574_20141103_163656_inLine +BABEL_OP3_401_26574_20141103_163656_outLine +BABEL_OP3_401_26999_20141101_213851_inLine +BABEL_OP3_401_26999_20141101_213851_outLine +BABEL_OP3_401_27042_20141017_184608_inLine +BABEL_OP3_401_27042_20141017_184608_outLine +BABEL_OP3_401_27841_20141113_200006_inLine +BABEL_OP3_401_27841_20141113_200006_outLine +BABEL_OP3_401_28775_20141003_162126_inLine +BABEL_OP3_401_28775_20141003_162126_outLine +BABEL_OP3_401_29076_20141031_003943_inLine +BABEL_OP3_401_29076_20141031_003943_outLine +BABEL_OP3_401_29404_20141121_153054_inLine +BABEL_OP3_401_29404_20141121_153054_outLine +BABEL_OP3_401_29685_20141019_210404_inLine +BABEL_OP3_401_29685_20141019_210404_outLine +BABEL_OP3_401_29685_20141019_210959_inLine +BABEL_OP3_401_29685_20141019_210959_outLine +BABEL_OP3_401_31624_20141003_192655_inLine +BABEL_OP3_401_31624_20141003_192655_outLine +BABEL_OP3_401_31628_20140923_145349_inLine +BABEL_OP3_401_31628_20140923_145349_outLine +BABEL_OP3_401_32708_20141003_200927_inLine +BABEL_OP3_401_32708_20141003_200927_outLine +BABEL_OP3_401_33111_20141122_223105_inLine +BABEL_OP3_401_33111_20141122_223105_outLine +BABEL_OP3_401_33672_20140930_132456_inLine +BABEL_OP3_401_33672_20140930_132456_outLine +BABEL_OP3_401_33672_20140930_133426_inLine +BABEL_OP3_401_33672_20140930_133426_outLine +BABEL_OP3_401_35143_20141010_163440_inLine +BABEL_OP3_401_35143_20141010_163440_outLine +BABEL_OP3_401_38878_20141031_201014_inLine +BABEL_OP3_401_38878_20141031_201014_outLine +BABEL_OP3_401_40713_20141003_155735_inLine +BABEL_OP3_401_40713_20141003_155735_outLine +BABEL_OP3_401_41618_20141028_201644_inLine +BABEL_OP3_401_41618_20141028_201644_outLine +BABEL_OP3_401_42619_20141104_204106_inLine +BABEL_OP3_401_42619_20141104_204106_outLine +BABEL_OP3_401_42834_20141103_204826_inLine +BABEL_OP3_401_42834_20141103_204826_outLine +BABEL_OP3_401_43646_20140917_164218_inLine +BABEL_OP3_401_43646_20140917_164218_outLine +BABEL_OP3_401_45560_20140930_190100_inLine +BABEL_OP3_401_45560_20140930_190100_outLine +BABEL_OP3_401_46881_20140922_175212_inLine +BABEL_OP3_401_46881_20140922_175212_outLine +BABEL_OP3_401_47283_20141006_193958_inLine +BABEL_OP3_401_47283_20141006_193958_outLine +BABEL_OP3_401_51407_20141027_182114_inLine +BABEL_OP3_401_51407_20141027_182114_outLine +BABEL_OP3_401_52725_20141123_224942_inLine +BABEL_OP3_401_52725_20141123_224942_outLine +BABEL_OP3_401_52818_20140922_184227_inLine +BABEL_OP3_401_52818_20140922_184227_outLine +BABEL_OP3_401_54162_20141107_221207_inLine +BABEL_OP3_401_54162_20141107_221207_outLine +BABEL_OP3_401_56677_20141020_160804_inLine +BABEL_OP3_401_56677_20141020_160804_outLine +BABEL_OP3_401_57065_20140924_135508_inLine +BABEL_OP3_401_57065_20140924_135508_outLine +BABEL_OP3_401_60310_20141017_165419_inLine +BABEL_OP3_401_60310_20141017_165419_outLine +BABEL_OP3_401_63906_20141124_212323_inLine +BABEL_OP3_401_63906_20141124_212323_outLine +BABEL_OP3_401_64398_20140922_165727_inLine +BABEL_OP3_401_64398_20140922_165727_outLine +BABEL_OP3_401_73022_20141111_173204_inLine +BABEL_OP3_401_73022_20141111_173204_outLine +BABEL_OP3_401_74921_20140924_165937_inLine +BABEL_OP3_401_74921_20140924_165937_outLine +BABEL_OP3_401_77744_20141014_125609_inLine +BABEL_OP3_401_77744_20141014_125609_outLine +BABEL_OP3_401_77744_20141014_140124_inLine +BABEL_OP3_401_77744_20141014_140124_outLine +BABEL_OP3_401_79167_20140925_132420_inLine +BABEL_OP3_401_79167_20140925_132420_outLine +BABEL_OP3_401_81287_20141001_145404_inLine +BABEL_OP3_401_81287_20141001_145404_outLine +BABEL_OP3_401_82224_20141111_175445_inLine +BABEL_OP3_401_82224_20141111_175445_outLine +BABEL_OP3_401_87073_20140917_201716_inLine +BABEL_OP3_401_87073_20140917_201716_outLine +BABEL_OP3_401_91977_20140925_184203_inLine +BABEL_OP3_401_91977_20140925_184203_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.untranscribed.list new file mode 100644 index 00000000000..550224d6e16 --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/sub-train.untranscribed.list @@ -0,0 +1,392 @@ +BABEL_OP3_401_10524_20141119_213218_inLine +BABEL_OP3_401_10524_20141119_213218_outLine +BABEL_OP3_401_10647_20141119_154922_inLine +BABEL_OP3_401_10647_20141119_154922_outLine +BABEL_OP3_401_10901_20141021_124158_inLine +BABEL_OP3_401_10901_20141021_124158_outLine +BABEL_OP3_401_10966_20141019_192715_inLine +BABEL_OP3_401_10966_20141019_192715_outLine +BABEL_OP3_401_11581_20141030_214939_inLine +BABEL_OP3_401_11581_20141030_214939_outLine +BABEL_OP3_401_11663_20141105_143103_inLine +BABEL_OP3_401_11663_20141105_143103_outLine +BABEL_OP3_401_11673_20140917_163413_inLine +BABEL_OP3_401_11673_20140917_163413_outLine +BABEL_OP3_401_11797_20140929_205622_inLine +BABEL_OP3_401_11797_20140929_205622_outLine +BABEL_OP3_401_12036_20141002_134817_inLine +BABEL_OP3_401_12036_20141002_134817_outLine +BABEL_OP3_401_12242_20141014_143019_inLine +BABEL_OP3_401_12242_20141014_143019_outLine +BABEL_OP3_401_12635_20141117_185400_inLine +BABEL_OP3_401_12635_20141117_185400_outLine +BABEL_OP3_401_12767_20141001_130658_inLine +BABEL_OP3_401_12767_20141001_130658_outLine +BABEL_OP3_401_12851_20140919_135242_inLine +BABEL_OP3_401_12851_20140919_135242_outLine +BABEL_OP3_401_13184_20141110_163330_inLine +BABEL_OP3_401_13184_20141110_163330_outLine +BABEL_OP3_401_13184_20141110_163902_inLine +BABEL_OP3_401_13184_20141110_163902_outLine +BABEL_OP3_401_13490_20141110_152643_inLine +BABEL_OP3_401_13490_20141110_152643_outLine +BABEL_OP3_401_13561_20141027_154606_inLine +BABEL_OP3_401_13561_20141027_154606_outLine +BABEL_OP3_401_13664_20140922_131741_inLine +BABEL_OP3_401_13664_20140922_131741_outLine +BABEL_OP3_401_13709_20141118_170840_inLine +BABEL_OP3_401_13709_20141118_170840_outLine +BABEL_OP3_401_13744_20140919_122844_inLine +BABEL_OP3_401_13744_20140919_122844_outLine +BABEL_OP3_401_14719_20141017_215720_inLine +BABEL_OP3_401_14719_20141017_215720_outLine +BABEL_OP3_401_14725_20140929_155627_inLine +BABEL_OP3_401_14725_20140929_155627_outLine +BABEL_OP3_401_14807_20141030_232134_inLine +BABEL_OP3_401_14807_20141030_232134_outLine +BABEL_OP3_401_14814_20141014_184415_inLine +BABEL_OP3_401_14814_20141014_184415_outLine +BABEL_OP3_401_14972_20141028_200051_inLine +BABEL_OP3_401_14972_20141028_200051_outLine +BABEL_OP3_401_15702_20140923_180447_inLine +BABEL_OP3_401_15702_20140923_180447_outLine +BABEL_OP3_401_15730_20140924_135900_inLine +BABEL_OP3_401_15730_20140924_135900_outLine +BABEL_OP3_401_16749_20141112_193028_inLine +BABEL_OP3_401_16749_20141112_193028_outLine +BABEL_OP3_401_16839_20141110_174923_inLine +BABEL_OP3_401_16839_20141110_174923_outLine +BABEL_OP3_401_16886_20141017_152623_inLine +BABEL_OP3_401_16886_20141017_152623_outLine +BABEL_OP3_401_16924_20140923_164321_inLine +BABEL_OP3_401_16924_20140923_164321_outLine +BABEL_OP3_401_17320_20141125_170435_inLine +BABEL_OP3_401_17320_20141125_170435_outLine +BABEL_OP3_401_17420_20141118_190621_inLine +BABEL_OP3_401_17420_20141118_190621_outLine +BABEL_OP3_401_17615_20140924_144400_inLine +BABEL_OP3_401_17615_20140924_144400_outLine +BABEL_OP3_401_18078_20141113_162556_inLine +BABEL_OP3_401_18078_20141113_162556_outLine +BABEL_OP3_401_18380_20141023_154240_inLine +BABEL_OP3_401_18380_20141023_154240_outLine +BABEL_OP3_401_18566_20141120_004140_inLine +BABEL_OP3_401_18566_20141120_004140_outLine +BABEL_OP3_401_18924_20141030_205111_inLine +BABEL_OP3_401_18924_20141030_205111_outLine +BABEL_OP3_401_18939_20141001_200418_inLine +BABEL_OP3_401_18939_20141001_200418_outLine +BABEL_OP3_401_19134_20141030_191814_inLine +BABEL_OP3_401_19134_20141030_191814_outLine +BABEL_OP3_401_19134_20141030_192931_inLine +BABEL_OP3_401_19134_20141030_192931_outLine +BABEL_OP3_401_19773_20141101_211403_inLine +BABEL_OP3_401_19773_20141101_211403_outLine +BABEL_OP3_401_19818_20141103_184746_inLine +BABEL_OP3_401_19818_20141103_184746_outLine +BABEL_OP3_401_19818_20141103_185728_inLine +BABEL_OP3_401_19818_20141103_185728_outLine +BABEL_OP3_401_20133_20140919_173858_inLine +BABEL_OP3_401_20133_20140919_173858_outLine +BABEL_OP3_401_20922_20141110_190444_inLine +BABEL_OP3_401_20922_20141110_190444_outLine +BABEL_OP3_401_21206_20141003_120941_inLine +BABEL_OP3_401_21206_20141003_120941_outLine +BABEL_OP3_401_21206_20141003_122457_inLine +BABEL_OP3_401_21206_20141003_122457_outLine +BABEL_OP3_401_21327_20141020_204038_inLine +BABEL_OP3_401_21327_20141020_204038_outLine +BABEL_OP3_401_21807_20141029_214508_inLine +BABEL_OP3_401_21807_20141029_214508_outLine +BABEL_OP3_401_22446_20140929_133647_inLine +BABEL_OP3_401_22446_20140929_133647_outLine +BABEL_OP3_401_22624_20141001_141008_inLine +BABEL_OP3_401_22624_20141001_141008_outLine +BABEL_OP3_401_22918_20141114_145920_inLine +BABEL_OP3_401_22918_20141114_145920_outLine +BABEL_OP3_401_23006_20141014_190149_inLine +BABEL_OP3_401_23006_20141014_190149_outLine +BABEL_OP3_401_23046_20141014_150823_inLine +BABEL_OP3_401_23046_20141014_150823_outLine +BABEL_OP3_401_23092_20141010_141138_inLine +BABEL_OP3_401_23092_20141010_141138_outLine +BABEL_OP3_401_23153_20141018_201630_inLine +BABEL_OP3_401_23153_20141018_201630_outLine +BABEL_OP3_401_23980_20141018_192714_inLine +BABEL_OP3_401_23980_20141018_192714_outLine +BABEL_OP3_401_24270_20141030_195323_inLine +BABEL_OP3_401_24270_20141030_195323_outLine +BABEL_OP3_401_24569_20141016_182323_inLine +BABEL_OP3_401_24569_20141016_182323_outLine +BABEL_OP3_401_24586_20141117_160948_inLine +BABEL_OP3_401_24586_20141117_160948_outLine +BABEL_OP3_401_24590_20141017_175757_inLine +BABEL_OP3_401_24590_20141017_175757_outLine +BABEL_OP3_401_24679_20140919_185323_inLine +BABEL_OP3_401_24679_20140919_185323_outLine +BABEL_OP3_401_24982_20141008_150245_inLine +BABEL_OP3_401_24982_20141008_150245_outLine +BABEL_OP3_401_25412_20141031_171749_inLine +BABEL_OP3_401_25412_20141031_171749_outLine +BABEL_OP3_401_25719_20141110_191042_inLine +BABEL_OP3_401_25719_20141110_191042_outLine +BABEL_OP3_401_26507_20141118_210109_inLine +BABEL_OP3_401_26507_20141118_210109_outLine +BABEL_OP3_401_27203_20141030_164916_inLine +BABEL_OP3_401_27203_20141030_164916_outLine +BABEL_OP3_401_28522_20140927_172947_inLine +BABEL_OP3_401_28522_20140927_172947_outLine +BABEL_OP3_401_28600_20141021_194818_inLine +BABEL_OP3_401_28600_20141021_194818_outLine +BABEL_OP3_401_28814_20141112_190902_inLine +BABEL_OP3_401_28814_20141112_190902_outLine +BABEL_OP3_401_29021_20141118_205619_inLine +BABEL_OP3_401_29021_20141118_205619_outLine +BABEL_OP3_401_29323_20141113_190829_inLine +BABEL_OP3_401_29323_20141113_190829_outLine +BABEL_OP3_401_30013_20140927_141830_inLine +BABEL_OP3_401_30013_20140927_141830_outLine +BABEL_OP3_401_30058_20141118_221622_inLine +BABEL_OP3_401_30058_20141118_221622_outLine +BABEL_OP3_401_31346_20141103_145401_inLine +BABEL_OP3_401_31346_20141103_145401_outLine +BABEL_OP3_401_31992_20141001_135942_inLine +BABEL_OP3_401_31992_20141001_135942_outLine +BABEL_OP3_401_32122_20141016_212210_inLine +BABEL_OP3_401_32122_20141016_212210_outLine +BABEL_OP3_401_32328_20141018_200856_inLine +BABEL_OP3_401_32328_20141018_200856_outLine +BABEL_OP3_401_33273_20141021_153659_inLine +BABEL_OP3_401_33273_20141021_153659_outLine +BABEL_OP3_401_33355_20141001_174510_inLine +BABEL_OP3_401_33355_20141001_174510_outLine +BABEL_OP3_401_33497_20141106_201923_inLine +BABEL_OP3_401_33497_20141106_201923_outLine +BABEL_OP3_401_33913_20141020_135517_inLine +BABEL_OP3_401_33913_20141020_135517_outLine +BABEL_OP3_401_34197_20140919_193654_inLine +BABEL_OP3_401_34197_20140919_193654_outLine +BABEL_OP3_401_34328_20141020_142248_inLine +BABEL_OP3_401_34328_20141020_142248_outLine +BABEL_OP3_401_34679_20141006_155637_inLine +BABEL_OP3_401_34679_20141006_155637_outLine +BABEL_OP3_401_35139_20141002_182038_inLine +BABEL_OP3_401_35139_20141002_182038_outLine +BABEL_OP3_401_35467_20140919_155737_inLine +BABEL_OP3_401_35467_20140919_155737_outLine +BABEL_OP3_401_35467_20140919_162819_inLine +BABEL_OP3_401_35467_20140919_162819_outLine +BABEL_OP3_401_36894_20140921_162105_inLine +BABEL_OP3_401_36894_20140921_162105_outLine +BABEL_OP3_401_37285_20140929_192149_inLine +BABEL_OP3_401_37285_20140929_192149_outLine +BABEL_OP3_401_37290_20141031_174340_inLine +BABEL_OP3_401_37290_20141031_174340_outLine +BABEL_OP3_401_37598_20141031_155805_inLine +BABEL_OP3_401_37598_20141031_155805_outLine +BABEL_OP3_401_38340_20141003_182953_inLine +BABEL_OP3_401_38340_20141003_182953_outLine +BABEL_OP3_401_39307_20140922_113434_inLine +BABEL_OP3_401_39307_20140922_113434_outLine +BABEL_OP3_401_39426_20141114_165136_inLine +BABEL_OP3_401_39426_20141114_165136_outLine +BABEL_OP3_401_39920_20141118_215327_inLine +BABEL_OP3_401_39920_20141118_215327_outLine +BABEL_OP3_401_40557_20141014_182351_inLine +BABEL_OP3_401_40557_20141014_182351_outLine +BABEL_OP3_401_41592_20141020_140853_inLine +BABEL_OP3_401_41592_20141020_140853_outLine +BABEL_OP3_401_41598_20141113_151053_inLine +BABEL_OP3_401_41598_20141113_151053_outLine +BABEL_OP3_401_42029_20141113_160852_inLine +BABEL_OP3_401_42029_20141113_160852_outLine +BABEL_OP3_401_42155_20141028_185638_inLine +BABEL_OP3_401_42155_20141028_185638_outLine +BABEL_OP3_401_42434_20141019_233012_inLine +BABEL_OP3_401_42434_20141019_233012_outLine +BABEL_OP3_401_42497_20141002_144745_inLine +BABEL_OP3_401_42497_20141002_144745_outLine +BABEL_OP3_401_42771_20141028_135131_inLine +BABEL_OP3_401_42771_20141028_135131_outLine +BABEL_OP3_401_42942_20141018_160034_inLine +BABEL_OP3_401_42942_20141018_160034_outLine +BABEL_OP3_401_43286_20140923_144213_inLine +BABEL_OP3_401_43286_20140923_144213_outLine +BABEL_OP3_401_43784_20141008_215339_inLine +BABEL_OP3_401_43784_20141008_215339_outLine +BABEL_OP3_401_43788_20140925_172756_inLine +BABEL_OP3_401_43788_20140925_172756_outLine +BABEL_OP3_401_45201_20141127_132656_inLine +BABEL_OP3_401_45201_20141127_132656_outLine +BABEL_OP3_401_46261_20141021_185026_inLine +BABEL_OP3_401_46261_20141021_185026_outLine +BABEL_OP3_401_46310_20140930_153138_inLine +BABEL_OP3_401_46310_20140930_153138_outLine +BABEL_OP3_401_46550_20141006_181152_inLine +BABEL_OP3_401_46550_20141006_181152_outLine +BABEL_OP3_401_46558_20140924_164642_inLine +BABEL_OP3_401_46558_20140924_164642_outLine +BABEL_OP3_401_46589_20140924_191634_inLine +BABEL_OP3_401_46589_20140924_191634_outLine +BABEL_OP3_401_46681_20141002_163836_inLine +BABEL_OP3_401_46681_20141002_163836_outLine +BABEL_OP3_401_46688_20141001_201358_inLine +BABEL_OP3_401_46688_20141001_201358_outLine +BABEL_OP3_401_46770_20141111_221929_inLine +BABEL_OP3_401_46770_20141111_221929_outLine +BABEL_OP3_401_47487_20141016_162401_inLine +BABEL_OP3_401_47487_20141016_162401_outLine +BABEL_OP3_401_47866_20141124_164427_inLine +BABEL_OP3_401_47866_20141124_164427_outLine +BABEL_OP3_401_47878_20141030_173221_inLine +BABEL_OP3_401_47878_20141030_173221_outLine +BABEL_OP3_401_48243_20141006_175215_inLine +BABEL_OP3_401_48243_20141006_175215_outLine +BABEL_OP3_401_48610_20141001_225254_inLine +BABEL_OP3_401_48610_20141001_225254_outLine +BABEL_OP3_401_49001_20141014_165716_inLine +BABEL_OP3_401_49001_20141014_165716_outLine +BABEL_OP3_401_49306_20141124_193818_inLine +BABEL_OP3_401_49306_20141124_193818_outLine +BABEL_OP3_401_50427_20141028_152244_inLine +BABEL_OP3_401_50427_20141028_152244_outLine +BABEL_OP3_401_51968_20141019_151724_inLine +BABEL_OP3_401_51968_20141019_151724_outLine +BABEL_OP3_401_52404_20140924_182534_inLine +BABEL_OP3_401_52404_20140924_182534_outLine +BABEL_OP3_401_53957_20141020_142913_inLine +BABEL_OP3_401_53957_20141020_142913_outLine +BABEL_OP3_401_54074_20141021_142528_inLine +BABEL_OP3_401_54074_20141021_142528_outLine +BABEL_OP3_401_56331_20141124_184702_inLine +BABEL_OP3_401_56331_20141124_184702_outLine +BABEL_OP3_401_57529_20141017_181551_inLine +BABEL_OP3_401_57529_20141017_181551_outLine +BABEL_OP3_401_57542_20141122_182629_inLine +BABEL_OP3_401_57542_20141122_182629_outLine +BABEL_OP3_401_58006_20141124_153854_inLine +BABEL_OP3_401_58006_20141124_153854_outLine +BABEL_OP3_401_58006_20141124_155107_inLine +BABEL_OP3_401_58006_20141124_155107_outLine +BABEL_OP3_401_58734_20140930_173126_inLine +BABEL_OP3_401_58734_20140930_173126_outLine +BABEL_OP3_401_58821_20140930_211254_inLine +BABEL_OP3_401_58821_20140930_211254_outLine +BABEL_OP3_401_59078_20141030_203852_inLine +BABEL_OP3_401_59078_20141030_203852_outLine +BABEL_OP3_401_59078_20141030_205139_inLine +BABEL_OP3_401_59078_20141030_205139_outLine +BABEL_OP3_401_60026_20141002_115024_inLine +BABEL_OP3_401_60026_20141002_115024_outLine +BABEL_OP3_401_60474_20141015_154855_inLine +BABEL_OP3_401_60474_20141015_154855_outLine +BABEL_OP3_401_65077_20140917_151315_inLine +BABEL_OP3_401_65077_20140917_151315_outLine +BABEL_OP3_401_65367_20141111_163221_inLine +BABEL_OP3_401_65367_20141111_163221_outLine +BABEL_OP3_401_66472_20141027_173935_inLine +BABEL_OP3_401_66472_20141027_173935_outLine +BABEL_OP3_401_68068_20140925_140055_inLine +BABEL_OP3_401_68068_20140925_140055_outLine +BABEL_OP3_401_68384_20141020_225435_inLine +BABEL_OP3_401_68384_20141020_225435_outLine +BABEL_OP3_401_68385_20140919_175351_inLine +BABEL_OP3_401_68385_20140919_175351_outLine +BABEL_OP3_401_68748_20140925_160756_inLine +BABEL_OP3_401_68748_20140925_160756_outLine +BABEL_OP3_401_69474_20140930_190551_inLine +BABEL_OP3_401_69474_20140930_190551_outLine +BABEL_OP3_401_69636_20140924_174446_inLine +BABEL_OP3_401_69636_20140924_174446_outLine +BABEL_OP3_401_71566_20141001_171842_inLine +BABEL_OP3_401_71566_20141001_171842_outLine +BABEL_OP3_401_72040_20141009_171306_inLine +BABEL_OP3_401_72040_20141009_171306_outLine +BABEL_OP3_401_72110_20141001_122146_inLine +BABEL_OP3_401_72110_20141001_122146_outLine +BABEL_OP3_401_72844_20140919_154733_inLine +BABEL_OP3_401_72844_20140919_154733_outLine +BABEL_OP3_401_72844_20140919_162600_inLine +BABEL_OP3_401_72844_20140919_162600_outLine +BABEL_OP3_401_73430_20140930_142250_inLine +BABEL_OP3_401_73430_20140930_142250_outLine +BABEL_OP3_401_73591_20140904_190044_inLine +BABEL_OP3_401_73591_20140904_190044_outLine +BABEL_OP3_401_74667_20141017_173017_inLine +BABEL_OP3_401_74667_20141017_173017_outLine +BABEL_OP3_401_74799_20141030_203910_inLine +BABEL_OP3_401_74799_20141030_203910_outLine +BABEL_OP3_401_75505_20140917_155231_inLine +BABEL_OP3_401_75505_20140917_155231_outLine +BABEL_OP3_401_76126_20141018_171804_inLine +BABEL_OP3_401_76126_20141018_171804_outLine +BABEL_OP3_401_76437_20140904_161741_inLine +BABEL_OP3_401_76437_20140904_161741_outLine +BABEL_OP3_401_76444_20141014_203500_inLine +BABEL_OP3_401_76444_20141014_203500_outLine +BABEL_OP3_401_76499_20141022_151625_inLine +BABEL_OP3_401_76499_20141022_151625_outLine +BABEL_OP3_401_78482_20141104_155857_inLine +BABEL_OP3_401_78482_20141104_155857_outLine +BABEL_OP3_401_79080_20141112_120644_inLine +BABEL_OP3_401_79080_20141112_120644_outLine +BABEL_OP3_401_79131_20141125_193444_inLine +BABEL_OP3_401_79131_20141125_193444_outLine +BABEL_OP3_401_79995_20141020_232746_inLine +BABEL_OP3_401_79995_20141020_232746_outLine +BABEL_OP3_401_80136_20141112_134414_inLine +BABEL_OP3_401_80136_20141112_134414_outLine +BABEL_OP3_401_80306_20141110_184642_inLine +BABEL_OP3_401_80306_20141110_184642_outLine +BABEL_OP3_401_80439_20141015_141847_inLine +BABEL_OP3_401_80439_20141015_141847_outLine +BABEL_OP3_401_80559_20141003_131820_inLine +BABEL_OP3_401_80559_20141003_131820_outLine +BABEL_OP3_401_81433_20141027_184533_inLine +BABEL_OP3_401_81433_20141027_184533_outLine +BABEL_OP3_401_81622_20141021_162012_inLine +BABEL_OP3_401_81622_20141021_162012_outLine +BABEL_OP3_401_82035_20141030_173356_inLine +BABEL_OP3_401_82035_20141030_173356_outLine +BABEL_OP3_401_82035_20141030_174442_inLine +BABEL_OP3_401_82035_20141030_174442_outLine +BABEL_OP3_401_84547_20140917_192745_inLine +BABEL_OP3_401_84547_20140917_192745_outLine +BABEL_OP3_401_84547_20140917_194346_inLine +BABEL_OP3_401_84547_20140917_194346_outLine +BABEL_OP3_401_86676_20140924_200749_inLine +BABEL_OP3_401_86676_20140924_200749_outLine +BABEL_OP3_401_87871_20141018_185934_inLine +BABEL_OP3_401_87871_20141018_185934_outLine +BABEL_OP3_401_87921_20141010_173551_inLine +BABEL_OP3_401_87921_20141010_173551_outLine +BABEL_OP3_401_88783_20141022_171250_inLine +BABEL_OP3_401_88783_20141022_171250_outLine +BABEL_OP3_401_90737_20141020_180826_inLine +BABEL_OP3_401_90737_20141020_180826_outLine +BABEL_OP3_401_91891_20141001_130023_inLine +BABEL_OP3_401_91891_20141001_130023_outLine +BABEL_OP3_401_92065_20141017_191557_inLine +BABEL_OP3_401_92065_20141017_191557_outLine +BABEL_OP3_401_92736_20141017_194915_inLine +BABEL_OP3_401_92736_20141017_194915_outLine +BABEL_OP3_401_92740_20140926_150615_inLine +BABEL_OP3_401_92740_20140926_150615_outLine +BABEL_OP3_401_93490_20141106_171428_inLine +BABEL_OP3_401_93490_20141106_171428_outLine +BABEL_OP3_401_94745_20140923_154933_inLine +BABEL_OP3_401_94745_20140923_154933_outLine +BABEL_OP3_401_95077_20141010_153959_inLine +BABEL_OP3_401_95077_20141010_153959_outLine +BABEL_OP3_401_95294_20140923_173007_inLine +BABEL_OP3_401_95294_20140923_173007_outLine +BABEL_OP3_401_95446_20141112_154248_inLine +BABEL_OP3_401_95446_20141112_154248_outLine +BABEL_OP3_401_95663_20140917_182410_inLine +BABEL_OP3_401_95663_20140917_182410_outLine +BABEL_OP3_401_96324_20140921_170922_inLine +BABEL_OP3_401_96324_20140921_170922_outLine +BABEL_OP3_401_97376_20140929_154000_inLine +BABEL_OP3_401_97376_20140929_154000_outLine +BABEL_OP3_401_97772_20140917_144539_inLine +BABEL_OP3_401_97772_20140917_144539_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/training.list b/egs/babel/s5d/conf/lists/401-mongolian/training.list new file mode 100644 index 00000000000..ebad291922b --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/training.list @@ -0,0 +1,518 @@ +BABEL_OP3_401_10524_20141119_213218_inLine +BABEL_OP3_401_10524_20141119_213218_outLine +BABEL_OP3_401_10647_20141119_154922_inLine +BABEL_OP3_401_10647_20141119_154922_outLine +BABEL_OP3_401_10901_20141021_124158_inLine +BABEL_OP3_401_10901_20141021_124158_outLine +BABEL_OP3_401_10966_20141019_192715_inLine +BABEL_OP3_401_10966_20141019_192715_outLine +BABEL_OP3_401_11581_20141030_214939_inLine +BABEL_OP3_401_11581_20141030_214939_outLine +BABEL_OP3_401_11663_20141105_143103_inLine +BABEL_OP3_401_11663_20141105_143103_outLine +BABEL_OP3_401_11673_20140917_163413_inLine +BABEL_OP3_401_11673_20140917_163413_outLine +BABEL_OP3_401_11797_20140929_205622_inLine +BABEL_OP3_401_11797_20140929_205622_outLine +BABEL_OP3_401_12036_20141002_134817_inLine +BABEL_OP3_401_12036_20141002_134817_outLine +BABEL_OP3_401_12242_20141014_143019_inLine +BABEL_OP3_401_12242_20141014_143019_outLine +BABEL_OP3_401_12635_20141117_185400_inLine +BABEL_OP3_401_12635_20141117_185400_outLine +BABEL_OP3_401_12767_20141001_130658_inLine +BABEL_OP3_401_12767_20141001_130658_outLine +BABEL_OP3_401_12851_20140919_135242_inLine +BABEL_OP3_401_12851_20140919_135242_outLine +BABEL_OP3_401_13030_20141015_163112_inLine +BABEL_OP3_401_13030_20141015_163112_outLine +BABEL_OP3_401_13184_20141110_163330_inLine +BABEL_OP3_401_13184_20141110_163330_outLine +BABEL_OP3_401_13184_20141110_163902_inLine +BABEL_OP3_401_13184_20141110_163902_outLine +BABEL_OP3_401_13324_20141002_165637_inLine +BABEL_OP3_401_13324_20141002_165637_outLine +BABEL_OP3_401_13490_20141110_152643_inLine +BABEL_OP3_401_13490_20141110_152643_outLine +BABEL_OP3_401_13561_20141027_154606_inLine +BABEL_OP3_401_13561_20141027_154606_outLine +BABEL_OP3_401_13586_20141023_193242_inLine +BABEL_OP3_401_13586_20141023_193242_outLine +BABEL_OP3_401_13664_20140922_131741_inLine +BABEL_OP3_401_13664_20140922_131741_outLine +BABEL_OP3_401_13709_20141118_170840_inLine +BABEL_OP3_401_13709_20141118_170840_outLine +BABEL_OP3_401_13744_20140919_122844_inLine +BABEL_OP3_401_13744_20140919_122844_outLine +BABEL_OP3_401_14560_20140922_140509_inLine +BABEL_OP3_401_14560_20140922_140509_outLine +BABEL_OP3_401_14719_20141017_215720_inLine +BABEL_OP3_401_14719_20141017_215720_outLine +BABEL_OP3_401_14725_20140929_155627_inLine +BABEL_OP3_401_14725_20140929_155627_outLine +BABEL_OP3_401_14807_20141030_232134_inLine +BABEL_OP3_401_14807_20141030_232134_outLine +BABEL_OP3_401_14814_20141014_184415_inLine +BABEL_OP3_401_14814_20141014_184415_outLine +BABEL_OP3_401_14972_20141028_200051_inLine +BABEL_OP3_401_14972_20141028_200051_outLine +BABEL_OP3_401_15702_20140923_180447_inLine +BABEL_OP3_401_15702_20140923_180447_outLine +BABEL_OP3_401_15730_20140924_135900_inLine +BABEL_OP3_401_15730_20140924_135900_outLine +BABEL_OP3_401_15902_20140930_144526_inLine +BABEL_OP3_401_15902_20140930_144526_outLine +BABEL_OP3_401_16475_20141016_143941_inLine +BABEL_OP3_401_16475_20141016_143941_outLine +BABEL_OP3_401_16749_20141112_193028_inLine +BABEL_OP3_401_16749_20141112_193028_outLine +BABEL_OP3_401_16839_20141110_174923_inLine +BABEL_OP3_401_16839_20141110_174923_outLine +BABEL_OP3_401_16886_20141017_152623_inLine +BABEL_OP3_401_16886_20141017_152623_outLine +BABEL_OP3_401_16924_20140923_164321_inLine +BABEL_OP3_401_16924_20140923_164321_outLine +BABEL_OP3_401_17320_20141125_170435_inLine +BABEL_OP3_401_17320_20141125_170435_outLine +BABEL_OP3_401_17420_20141118_190621_inLine +BABEL_OP3_401_17420_20141118_190621_outLine +BABEL_OP3_401_17567_20141023_213629_inLine +BABEL_OP3_401_17567_20141023_213629_outLine +BABEL_OP3_401_17615_20140924_144400_inLine +BABEL_OP3_401_17615_20140924_144400_outLine +BABEL_OP3_401_17923_20141002_172711_inLine +BABEL_OP3_401_17923_20141002_172711_outLine +BABEL_OP3_401_18078_20141113_162556_inLine +BABEL_OP3_401_18078_20141113_162556_outLine +BABEL_OP3_401_18380_20141023_154240_inLine +BABEL_OP3_401_18380_20141023_154240_outLine +BABEL_OP3_401_18566_20141120_004140_inLine +BABEL_OP3_401_18566_20141120_004140_outLine +BABEL_OP3_401_18924_20141030_205111_inLine +BABEL_OP3_401_18924_20141030_205111_outLine +BABEL_OP3_401_18939_20141001_200418_inLine +BABEL_OP3_401_18939_20141001_200418_outLine +BABEL_OP3_401_19101_20141029_183652_inLine +BABEL_OP3_401_19101_20141029_183652_outLine +BABEL_OP3_401_19134_20141030_191814_inLine +BABEL_OP3_401_19134_20141030_191814_outLine +BABEL_OP3_401_19134_20141030_192931_inLine +BABEL_OP3_401_19134_20141030_192931_outLine +BABEL_OP3_401_19722_20140930_200553_inLine +BABEL_OP3_401_19722_20140930_200553_outLine +BABEL_OP3_401_19773_20141101_211403_inLine +BABEL_OP3_401_19773_20141101_211403_outLine +BABEL_OP3_401_19818_20141103_184746_inLine +BABEL_OP3_401_19818_20141103_184746_outLine +BABEL_OP3_401_19818_20141103_185728_inLine +BABEL_OP3_401_19818_20141103_185728_outLine +BABEL_OP3_401_20133_20140919_173858_inLine +BABEL_OP3_401_20133_20140919_173858_outLine +BABEL_OP3_401_20916_20140922_174215_inLine +BABEL_OP3_401_20916_20140922_174215_outLine +BABEL_OP3_401_20922_20141110_190444_inLine +BABEL_OP3_401_20922_20141110_190444_outLine +BABEL_OP3_401_21206_20141003_120941_inLine +BABEL_OP3_401_21206_20141003_120941_outLine +BABEL_OP3_401_21206_20141003_122457_inLine +BABEL_OP3_401_21206_20141003_122457_outLine +BABEL_OP3_401_21327_20141020_204038_inLine +BABEL_OP3_401_21327_20141020_204038_outLine +BABEL_OP3_401_21807_20141029_214508_inLine +BABEL_OP3_401_21807_20141029_214508_outLine +BABEL_OP3_401_22321_20140929_180456_inLine +BABEL_OP3_401_22321_20140929_180456_outLine +BABEL_OP3_401_22446_20140929_133647_inLine +BABEL_OP3_401_22446_20140929_133647_outLine +BABEL_OP3_401_22624_20141001_141008_inLine +BABEL_OP3_401_22624_20141001_141008_outLine +BABEL_OP3_401_22918_20141114_145920_inLine +BABEL_OP3_401_22918_20141114_145920_outLine +BABEL_OP3_401_23006_20141014_190149_inLine +BABEL_OP3_401_23006_20141014_190149_outLine +BABEL_OP3_401_23046_20141014_150823_inLine +BABEL_OP3_401_23046_20141014_150823_outLine +BABEL_OP3_401_23092_20141010_141138_inLine +BABEL_OP3_401_23092_20141010_141138_outLine +BABEL_OP3_401_23153_20141018_201630_inLine +BABEL_OP3_401_23153_20141018_201630_outLine +BABEL_OP3_401_23893_20141125_213344_inLine +BABEL_OP3_401_23893_20141125_213344_outLine +BABEL_OP3_401_23980_20141018_192714_inLine +BABEL_OP3_401_23980_20141018_192714_outLine +BABEL_OP3_401_24270_20141030_195323_inLine +BABEL_OP3_401_24270_20141030_195323_outLine +BABEL_OP3_401_24290_20141124_184351_inLine +BABEL_OP3_401_24290_20141124_184351_outLine +BABEL_OP3_401_24323_20141017_151036_inLine +BABEL_OP3_401_24323_20141017_151036_outLine +BABEL_OP3_401_24470_20141029_145653_inLine +BABEL_OP3_401_24470_20141029_145653_outLine +BABEL_OP3_401_24569_20141016_182323_inLine +BABEL_OP3_401_24569_20141016_182323_outLine +BABEL_OP3_401_24586_20141117_160948_inLine +BABEL_OP3_401_24586_20141117_160948_outLine +BABEL_OP3_401_24590_20141017_175757_inLine +BABEL_OP3_401_24590_20141017_175757_outLine +BABEL_OP3_401_24605_20141001_142727_inLine +BABEL_OP3_401_24605_20141001_142727_outLine +BABEL_OP3_401_24679_20140919_185323_inLine +BABEL_OP3_401_24679_20140919_185323_outLine +BABEL_OP3_401_24982_20141008_150245_inLine +BABEL_OP3_401_24982_20141008_150245_outLine +BABEL_OP3_401_25412_20141031_171749_inLine +BABEL_OP3_401_25412_20141031_171749_outLine +BABEL_OP3_401_25719_20141110_191042_inLine +BABEL_OP3_401_25719_20141110_191042_outLine +BABEL_OP3_401_25961_20140929_183632_inLine +BABEL_OP3_401_25961_20140929_183632_outLine +BABEL_OP3_401_26072_20141112_173131_inLine +BABEL_OP3_401_26072_20141112_173131_outLine +BABEL_OP3_401_26398_20141204_001557_inLine +BABEL_OP3_401_26398_20141204_001557_outLine +BABEL_OP3_401_26507_20141118_210109_inLine +BABEL_OP3_401_26507_20141118_210109_outLine +BABEL_OP3_401_26574_20141103_163656_inLine +BABEL_OP3_401_26574_20141103_163656_outLine +BABEL_OP3_401_26999_20141101_213851_inLine +BABEL_OP3_401_26999_20141101_213851_outLine +BABEL_OP3_401_27042_20141017_184608_inLine +BABEL_OP3_401_27042_20141017_184608_outLine +BABEL_OP3_401_27203_20141030_164916_inLine +BABEL_OP3_401_27203_20141030_164916_outLine +BABEL_OP3_401_27841_20141113_200006_inLine +BABEL_OP3_401_27841_20141113_200006_outLine +BABEL_OP3_401_28522_20140927_172947_inLine +BABEL_OP3_401_28522_20140927_172947_outLine +BABEL_OP3_401_28600_20141021_194818_inLine +BABEL_OP3_401_28600_20141021_194818_outLine +BABEL_OP3_401_28775_20141003_162126_inLine +BABEL_OP3_401_28775_20141003_162126_outLine +BABEL_OP3_401_28814_20141112_190902_inLine +BABEL_OP3_401_28814_20141112_190902_outLine +BABEL_OP3_401_29021_20141118_205619_inLine +BABEL_OP3_401_29021_20141118_205619_outLine +BABEL_OP3_401_29076_20141031_003943_inLine +BABEL_OP3_401_29076_20141031_003943_outLine +BABEL_OP3_401_29323_20141113_190829_inLine +BABEL_OP3_401_29323_20141113_190829_outLine +BABEL_OP3_401_29404_20141121_153054_inLine +BABEL_OP3_401_29404_20141121_153054_outLine +BABEL_OP3_401_29685_20141019_210404_inLine +BABEL_OP3_401_29685_20141019_210404_outLine +BABEL_OP3_401_29685_20141019_210959_inLine +BABEL_OP3_401_29685_20141019_210959_outLine +BABEL_OP3_401_30013_20140927_141830_inLine +BABEL_OP3_401_30013_20140927_141830_outLine +BABEL_OP3_401_30058_20141118_221622_inLine +BABEL_OP3_401_30058_20141118_221622_outLine +BABEL_OP3_401_31346_20141103_145401_inLine +BABEL_OP3_401_31346_20141103_145401_outLine +BABEL_OP3_401_31624_20141003_192655_inLine +BABEL_OP3_401_31624_20141003_192655_outLine +BABEL_OP3_401_31628_20140923_145349_inLine +BABEL_OP3_401_31628_20140923_145349_outLine +BABEL_OP3_401_31992_20141001_135942_inLine +BABEL_OP3_401_31992_20141001_135942_outLine +BABEL_OP3_401_32122_20141016_212210_inLine +BABEL_OP3_401_32122_20141016_212210_outLine +BABEL_OP3_401_32328_20141018_200856_inLine +BABEL_OP3_401_32328_20141018_200856_outLine +BABEL_OP3_401_32708_20141003_200927_inLine +BABEL_OP3_401_32708_20141003_200927_outLine +BABEL_OP3_401_33111_20141122_223105_inLine +BABEL_OP3_401_33111_20141122_223105_outLine +BABEL_OP3_401_33273_20141021_153659_inLine +BABEL_OP3_401_33273_20141021_153659_outLine +BABEL_OP3_401_33355_20141001_174510_inLine +BABEL_OP3_401_33355_20141001_174510_outLine +BABEL_OP3_401_33497_20141106_201923_inLine +BABEL_OP3_401_33497_20141106_201923_outLine +BABEL_OP3_401_33672_20140930_132456_inLine +BABEL_OP3_401_33672_20140930_132456_outLine +BABEL_OP3_401_33672_20140930_133426_inLine +BABEL_OP3_401_33672_20140930_133426_outLine +BABEL_OP3_401_33913_20141020_135517_inLine +BABEL_OP3_401_33913_20141020_135517_outLine +BABEL_OP3_401_34197_20140919_193654_inLine +BABEL_OP3_401_34197_20140919_193654_outLine +BABEL_OP3_401_34328_20141020_142248_inLine +BABEL_OP3_401_34328_20141020_142248_outLine +BABEL_OP3_401_34679_20141006_155637_inLine +BABEL_OP3_401_34679_20141006_155637_outLine +BABEL_OP3_401_35139_20141002_182038_inLine +BABEL_OP3_401_35139_20141002_182038_outLine +BABEL_OP3_401_35143_20141010_163440_inLine +BABEL_OP3_401_35143_20141010_163440_outLine +BABEL_OP3_401_35467_20140919_155737_inLine +BABEL_OP3_401_35467_20140919_155737_outLine +BABEL_OP3_401_35467_20140919_162819_inLine +BABEL_OP3_401_35467_20140919_162819_outLine +BABEL_OP3_401_36894_20140921_162105_inLine +BABEL_OP3_401_36894_20140921_162105_outLine +BABEL_OP3_401_37285_20140929_192149_inLine +BABEL_OP3_401_37285_20140929_192149_outLine +BABEL_OP3_401_37290_20141031_174340_inLine +BABEL_OP3_401_37290_20141031_174340_outLine +BABEL_OP3_401_37598_20141031_155805_inLine +BABEL_OP3_401_37598_20141031_155805_outLine +BABEL_OP3_401_38340_20141003_182953_inLine +BABEL_OP3_401_38340_20141003_182953_outLine +BABEL_OP3_401_38878_20141031_201014_inLine +BABEL_OP3_401_38878_20141031_201014_outLine +BABEL_OP3_401_39307_20140922_113434_inLine +BABEL_OP3_401_39307_20140922_113434_outLine +BABEL_OP3_401_39426_20141114_165136_inLine +BABEL_OP3_401_39426_20141114_165136_outLine +BABEL_OP3_401_39920_20141118_215327_inLine +BABEL_OP3_401_39920_20141118_215327_outLine +BABEL_OP3_401_40557_20141014_182351_inLine +BABEL_OP3_401_40557_20141014_182351_outLine +BABEL_OP3_401_40713_20141003_155735_inLine +BABEL_OP3_401_40713_20141003_155735_outLine +BABEL_OP3_401_41592_20141020_140853_inLine +BABEL_OP3_401_41592_20141020_140853_outLine +BABEL_OP3_401_41598_20141113_151053_inLine +BABEL_OP3_401_41598_20141113_151053_outLine +BABEL_OP3_401_41618_20141028_201644_inLine +BABEL_OP3_401_41618_20141028_201644_outLine +BABEL_OP3_401_42029_20141113_160852_inLine +BABEL_OP3_401_42029_20141113_160852_outLine +BABEL_OP3_401_42155_20141028_185638_inLine +BABEL_OP3_401_42155_20141028_185638_outLine +BABEL_OP3_401_42434_20141019_233012_inLine +BABEL_OP3_401_42434_20141019_233012_outLine +BABEL_OP3_401_42497_20141002_144745_inLine +BABEL_OP3_401_42497_20141002_144745_outLine +BABEL_OP3_401_42619_20141104_204106_inLine +BABEL_OP3_401_42619_20141104_204106_outLine +BABEL_OP3_401_42771_20141028_135131_inLine +BABEL_OP3_401_42771_20141028_135131_outLine +BABEL_OP3_401_42834_20141103_204826_inLine +BABEL_OP3_401_42834_20141103_204826_outLine +BABEL_OP3_401_42942_20141018_160034_inLine +BABEL_OP3_401_42942_20141018_160034_outLine +BABEL_OP3_401_43286_20140923_144213_inLine +BABEL_OP3_401_43286_20140923_144213_outLine +BABEL_OP3_401_43646_20140917_164218_inLine +BABEL_OP3_401_43646_20140917_164218_outLine +BABEL_OP3_401_43784_20141008_215339_inLine +BABEL_OP3_401_43784_20141008_215339_outLine +BABEL_OP3_401_43788_20140925_172756_inLine +BABEL_OP3_401_43788_20140925_172756_outLine +BABEL_OP3_401_45201_20141127_132656_inLine +BABEL_OP3_401_45201_20141127_132656_outLine +BABEL_OP3_401_45560_20140930_190100_inLine +BABEL_OP3_401_45560_20140930_190100_outLine +BABEL_OP3_401_46261_20141021_185026_inLine +BABEL_OP3_401_46261_20141021_185026_outLine +BABEL_OP3_401_46310_20140930_153138_inLine +BABEL_OP3_401_46310_20140930_153138_outLine +BABEL_OP3_401_46550_20141006_181152_inLine +BABEL_OP3_401_46550_20141006_181152_outLine +BABEL_OP3_401_46558_20140924_164642_inLine +BABEL_OP3_401_46558_20140924_164642_outLine +BABEL_OP3_401_46589_20140924_191634_inLine +BABEL_OP3_401_46589_20140924_191634_outLine +BABEL_OP3_401_46681_20141002_163836_inLine +BABEL_OP3_401_46681_20141002_163836_outLine +BABEL_OP3_401_46688_20141001_201358_inLine +BABEL_OP3_401_46688_20141001_201358_outLine +BABEL_OP3_401_46770_20141111_221929_inLine +BABEL_OP3_401_46770_20141111_221929_outLine +BABEL_OP3_401_46881_20140922_175212_inLine +BABEL_OP3_401_46881_20140922_175212_outLine +BABEL_OP3_401_47283_20141006_193958_inLine +BABEL_OP3_401_47283_20141006_193958_outLine +BABEL_OP3_401_47487_20141016_162401_inLine +BABEL_OP3_401_47487_20141016_162401_outLine +BABEL_OP3_401_47866_20141124_164427_inLine +BABEL_OP3_401_47866_20141124_164427_outLine +BABEL_OP3_401_47878_20141030_173221_inLine +BABEL_OP3_401_47878_20141030_173221_outLine +BABEL_OP3_401_48243_20141006_175215_inLine +BABEL_OP3_401_48243_20141006_175215_outLine +BABEL_OP3_401_48610_20141001_225254_inLine +BABEL_OP3_401_48610_20141001_225254_outLine +BABEL_OP3_401_49001_20141014_165716_inLine +BABEL_OP3_401_49001_20141014_165716_outLine +BABEL_OP3_401_49306_20141124_193818_inLine +BABEL_OP3_401_49306_20141124_193818_outLine +BABEL_OP3_401_50427_20141028_152244_inLine +BABEL_OP3_401_50427_20141028_152244_outLine +BABEL_OP3_401_51407_20141027_182114_inLine +BABEL_OP3_401_51407_20141027_182114_outLine +BABEL_OP3_401_51968_20141019_151724_inLine +BABEL_OP3_401_51968_20141019_151724_outLine +BABEL_OP3_401_52404_20140924_182534_inLine +BABEL_OP3_401_52404_20140924_182534_outLine +BABEL_OP3_401_52725_20141123_224942_inLine +BABEL_OP3_401_52725_20141123_224942_outLine +BABEL_OP3_401_52818_20140922_184227_inLine +BABEL_OP3_401_52818_20140922_184227_outLine +BABEL_OP3_401_53957_20141020_142913_inLine +BABEL_OP3_401_53957_20141020_142913_outLine +BABEL_OP3_401_54074_20141021_142528_inLine +BABEL_OP3_401_54074_20141021_142528_outLine +BABEL_OP3_401_54162_20141107_221207_inLine +BABEL_OP3_401_54162_20141107_221207_outLine +BABEL_OP3_401_56331_20141124_184702_inLine +BABEL_OP3_401_56331_20141124_184702_outLine +BABEL_OP3_401_56677_20141020_160804_inLine +BABEL_OP3_401_56677_20141020_160804_outLine +BABEL_OP3_401_57065_20140924_135508_inLine +BABEL_OP3_401_57065_20140924_135508_outLine +BABEL_OP3_401_57529_20141017_181551_inLine +BABEL_OP3_401_57529_20141017_181551_outLine +BABEL_OP3_401_57542_20141122_182629_inLine +BABEL_OP3_401_57542_20141122_182629_outLine +BABEL_OP3_401_58006_20141124_153854_inLine +BABEL_OP3_401_58006_20141124_153854_outLine +BABEL_OP3_401_58006_20141124_155107_inLine +BABEL_OP3_401_58006_20141124_155107_outLine +BABEL_OP3_401_58734_20140930_173126_inLine +BABEL_OP3_401_58734_20140930_173126_outLine +BABEL_OP3_401_58821_20140930_211254_inLine +BABEL_OP3_401_58821_20140930_211254_outLine +BABEL_OP3_401_59078_20141030_203852_inLine +BABEL_OP3_401_59078_20141030_203852_outLine +BABEL_OP3_401_59078_20141030_205139_inLine +BABEL_OP3_401_59078_20141030_205139_outLine +BABEL_OP3_401_60026_20141002_115024_inLine +BABEL_OP3_401_60026_20141002_115024_outLine +BABEL_OP3_401_60310_20141017_165419_inLine +BABEL_OP3_401_60310_20141017_165419_outLine +BABEL_OP3_401_60474_20141015_154855_inLine +BABEL_OP3_401_60474_20141015_154855_outLine +BABEL_OP3_401_63906_20141124_212323_inLine +BABEL_OP3_401_63906_20141124_212323_outLine +BABEL_OP3_401_64398_20140922_165727_inLine +BABEL_OP3_401_64398_20140922_165727_outLine +BABEL_OP3_401_65077_20140917_151315_inLine +BABEL_OP3_401_65077_20140917_151315_outLine +BABEL_OP3_401_65367_20141111_163221_inLine +BABEL_OP3_401_65367_20141111_163221_outLine +BABEL_OP3_401_66472_20141027_173935_inLine +BABEL_OP3_401_66472_20141027_173935_outLine +BABEL_OP3_401_68068_20140925_140055_inLine +BABEL_OP3_401_68068_20140925_140055_outLine +BABEL_OP3_401_68384_20141020_225435_inLine +BABEL_OP3_401_68384_20141020_225435_outLine +BABEL_OP3_401_68385_20140919_175351_inLine +BABEL_OP3_401_68385_20140919_175351_outLine +BABEL_OP3_401_68748_20140925_160756_inLine +BABEL_OP3_401_68748_20140925_160756_outLine +BABEL_OP3_401_69474_20140930_190551_inLine +BABEL_OP3_401_69474_20140930_190551_outLine +BABEL_OP3_401_69636_20140924_174446_inLine +BABEL_OP3_401_69636_20140924_174446_outLine +BABEL_OP3_401_71566_20141001_171842_inLine +BABEL_OP3_401_71566_20141001_171842_outLine +BABEL_OP3_401_72040_20141009_171306_inLine +BABEL_OP3_401_72040_20141009_171306_outLine +BABEL_OP3_401_72110_20141001_122146_inLine +BABEL_OP3_401_72110_20141001_122146_outLine +BABEL_OP3_401_72844_20140919_154733_inLine +BABEL_OP3_401_72844_20140919_154733_outLine +BABEL_OP3_401_72844_20140919_162600_inLine +BABEL_OP3_401_72844_20140919_162600_outLine +BABEL_OP3_401_73022_20141111_173204_inLine +BABEL_OP3_401_73022_20141111_173204_outLine +BABEL_OP3_401_73430_20140930_142250_inLine +BABEL_OP3_401_73430_20140930_142250_outLine +BABEL_OP3_401_73591_20140904_190044_inLine +BABEL_OP3_401_73591_20140904_190044_outLine +BABEL_OP3_401_74667_20141017_173017_inLine +BABEL_OP3_401_74667_20141017_173017_outLine +BABEL_OP3_401_74799_20141030_203910_inLine +BABEL_OP3_401_74799_20141030_203910_outLine +BABEL_OP3_401_74921_20140924_165937_inLine +BABEL_OP3_401_74921_20140924_165937_outLine +BABEL_OP3_401_75505_20140917_155231_inLine +BABEL_OP3_401_75505_20140917_155231_outLine +BABEL_OP3_401_76126_20141018_171804_inLine +BABEL_OP3_401_76126_20141018_171804_outLine +BABEL_OP3_401_76437_20140904_161741_inLine +BABEL_OP3_401_76437_20140904_161741_outLine +BABEL_OP3_401_76444_20141014_203500_inLine +BABEL_OP3_401_76444_20141014_203500_outLine +BABEL_OP3_401_76499_20141022_151625_inLine +BABEL_OP3_401_76499_20141022_151625_outLine +BABEL_OP3_401_77744_20141014_125609_inLine +BABEL_OP3_401_77744_20141014_125609_outLine +BABEL_OP3_401_77744_20141014_140124_inLine +BABEL_OP3_401_77744_20141014_140124_outLine +BABEL_OP3_401_78482_20141104_155857_inLine +BABEL_OP3_401_78482_20141104_155857_outLine +BABEL_OP3_401_79080_20141112_120644_inLine +BABEL_OP3_401_79080_20141112_120644_outLine +BABEL_OP3_401_79131_20141125_193444_inLine +BABEL_OP3_401_79131_20141125_193444_outLine +BABEL_OP3_401_79167_20140925_132420_inLine +BABEL_OP3_401_79167_20140925_132420_outLine +BABEL_OP3_401_79995_20141020_232746_inLine +BABEL_OP3_401_79995_20141020_232746_outLine +BABEL_OP3_401_80136_20141112_134414_inLine +BABEL_OP3_401_80136_20141112_134414_outLine +BABEL_OP3_401_80306_20141110_184642_inLine +BABEL_OP3_401_80306_20141110_184642_outLine +BABEL_OP3_401_80439_20141015_141847_inLine +BABEL_OP3_401_80439_20141015_141847_outLine +BABEL_OP3_401_80559_20141003_131820_inLine +BABEL_OP3_401_80559_20141003_131820_outLine +BABEL_OP3_401_81287_20141001_145404_inLine +BABEL_OP3_401_81287_20141001_145404_outLine +BABEL_OP3_401_81433_20141027_184533_inLine +BABEL_OP3_401_81433_20141027_184533_outLine +BABEL_OP3_401_81622_20141021_162012_inLine +BABEL_OP3_401_81622_20141021_162012_outLine +BABEL_OP3_401_82035_20141030_173356_inLine +BABEL_OP3_401_82035_20141030_173356_outLine +BABEL_OP3_401_82035_20141030_174442_inLine +BABEL_OP3_401_82035_20141030_174442_outLine +BABEL_OP3_401_82224_20141111_175445_inLine +BABEL_OP3_401_82224_20141111_175445_outLine +BABEL_OP3_401_84547_20140917_192745_inLine +BABEL_OP3_401_84547_20140917_192745_outLine +BABEL_OP3_401_84547_20140917_194346_inLine +BABEL_OP3_401_84547_20140917_194346_outLine +BABEL_OP3_401_86676_20140924_200749_inLine +BABEL_OP3_401_86676_20140924_200749_outLine +BABEL_OP3_401_87073_20140917_201716_inLine +BABEL_OP3_401_87073_20140917_201716_outLine +BABEL_OP3_401_87871_20141018_185934_inLine +BABEL_OP3_401_87871_20141018_185934_outLine +BABEL_OP3_401_87921_20141010_173551_inLine +BABEL_OP3_401_87921_20141010_173551_outLine +BABEL_OP3_401_88783_20141022_171250_inLine +BABEL_OP3_401_88783_20141022_171250_outLine +BABEL_OP3_401_90737_20141020_180826_inLine +BABEL_OP3_401_90737_20141020_180826_outLine +BABEL_OP3_401_91891_20141001_130023_inLine +BABEL_OP3_401_91891_20141001_130023_outLine +BABEL_OP3_401_91977_20140925_184203_inLine +BABEL_OP3_401_91977_20140925_184203_outLine +BABEL_OP3_401_92065_20141017_191557_inLine +BABEL_OP3_401_92065_20141017_191557_outLine +BABEL_OP3_401_92736_20141017_194915_inLine +BABEL_OP3_401_92736_20141017_194915_outLine +BABEL_OP3_401_92740_20140926_150615_inLine +BABEL_OP3_401_92740_20140926_150615_outLine +BABEL_OP3_401_93490_20141106_171428_inLine +BABEL_OP3_401_93490_20141106_171428_outLine +BABEL_OP3_401_94745_20140923_154933_inLine +BABEL_OP3_401_94745_20140923_154933_outLine +BABEL_OP3_401_95077_20141010_153959_inLine +BABEL_OP3_401_95077_20141010_153959_outLine +BABEL_OP3_401_95294_20140923_173007_inLine +BABEL_OP3_401_95294_20140923_173007_outLine +BABEL_OP3_401_95446_20141112_154248_inLine +BABEL_OP3_401_95446_20141112_154248_outLine +BABEL_OP3_401_95663_20140917_182410_inLine +BABEL_OP3_401_95663_20140917_182410_outLine +BABEL_OP3_401_96324_20140921_170922_inLine +BABEL_OP3_401_96324_20140921_170922_outLine +BABEL_OP3_401_97376_20140929_154000_inLine +BABEL_OP3_401_97376_20140929_154000_outLine +BABEL_OP3_401_97772_20140917_144539_inLine +BABEL_OP3_401_97772_20140917_144539_outLine diff --git a/egs/babel/s5d/conf/lists/401-mongolian/untranscribed-training.list b/egs/babel/s5d/conf/lists/401-mongolian/untranscribed-training.list new file mode 100644 index 00000000000..45d13cc017e --- /dev/null +++ b/egs/babel/s5d/conf/lists/401-mongolian/untranscribed-training.list @@ -0,0 +1,530 @@ +BABEL_OP3_401_10184_20141027_150129_inLine +BABEL_OP3_401_10184_20141027_150129_outLine +BABEL_OP3_401_12321_20141101_210546_inLine +BABEL_OP3_401_12321_20141101_210546_outLine +BABEL_OP3_401_13178_20141101_181249_inLine +BABEL_OP3_401_13178_20141101_181249_outLine +BABEL_OP3_401_13189_20141114_170101_inLine +BABEL_OP3_401_13189_20141114_170101_outLine +BABEL_OP3_401_13189_20141114_174825_inLine +BABEL_OP3_401_13189_20141114_174825_outLine +BABEL_OP3_401_13427_20141027_145236_inLine +BABEL_OP3_401_13427_20141027_145236_outLine +BABEL_OP3_401_13792_20141001_135314_inLine +BABEL_OP3_401_13792_20141001_135314_outLine +BABEL_OP3_401_14440_20141101_191122_inLine +BABEL_OP3_401_14440_20141101_191122_outLine +BABEL_OP3_401_15042_20141125_181147_inLine +BABEL_OP3_401_15042_20141125_181147_outLine +BABEL_OP3_401_17280_20141016_160258_inLine +BABEL_OP3_401_17280_20141016_160258_outLine +BABEL_OP3_401_17496_20141103_155636_inLine +BABEL_OP3_401_17496_20141103_155636_outLine +BABEL_OP3_401_17496_20141103_160636_inLine +BABEL_OP3_401_17496_20141103_160636_outLine +BABEL_OP3_401_18118_20141125_212628_inLine +BABEL_OP3_401_18118_20141125_212628_outLine +BABEL_OP3_401_19130_20141125_202758_inLine +BABEL_OP3_401_19130_20141125_202758_outLine +BABEL_OP3_401_20738_20141118_152747_inLine +BABEL_OP3_401_20738_20141118_152747_outLine +BABEL_OP3_401_20800_20141002_124944_inLine +BABEL_OP3_401_20800_20141002_124944_outLine +BABEL_OP3_401_21435_20141121_182922_inLine +BABEL_OP3_401_21435_20141121_182922_outLine +BABEL_OP3_401_21624_20141124_180637_inLine +BABEL_OP3_401_21624_20141124_180637_outLine +BABEL_OP3_401_22170_20141118_234144_inLine +BABEL_OP3_401_22170_20141118_234144_outLine +BABEL_OP3_401_22280_20141029_152053_inLine +BABEL_OP3_401_22280_20141029_152053_outLine +BABEL_OP3_401_23151_20141113_164930_inLine +BABEL_OP3_401_23151_20141113_164930_outLine +BABEL_OP3_401_23151_20141113_165631_inLine +BABEL_OP3_401_23151_20141113_165631_outLine +BABEL_OP3_401_24017_20141104_165843_inLine +BABEL_OP3_401_24017_20141104_165843_outLine +BABEL_OP3_401_24017_20141104_170453_inLine +BABEL_OP3_401_24017_20141104_170453_outLine +BABEL_OP3_401_24033_20141111_211829_inLine +BABEL_OP3_401_24033_20141111_211829_outLine +BABEL_OP3_401_24239_20141207_015922_inLine +BABEL_OP3_401_24239_20141207_015922_outLine +BABEL_OP3_401_24501_20141117_164716_inLine +BABEL_OP3_401_24501_20141117_164716_outLine +BABEL_OP3_401_25015_20141125_191714_inLine +BABEL_OP3_401_25015_20141125_191714_outLine +BABEL_OP3_401_30653_20141119_225659_inLine +BABEL_OP3_401_30653_20141119_225659_outLine +BABEL_OP3_401_30869_20141105_163908_inLine +BABEL_OP3_401_30869_20141105_163908_outLine +BABEL_OP3_401_30869_20141105_165054_inLine +BABEL_OP3_401_30869_20141105_165054_outLine +BABEL_OP3_401_31182_20141103_193721_inLine +BABEL_OP3_401_31182_20141103_193721_outLine +BABEL_OP3_401_31583_20141017_202348_inLine +BABEL_OP3_401_31583_20141017_202348_outLine +BABEL_OP3_401_32832_20141106_155802_inLine +BABEL_OP3_401_32832_20141106_155802_outLine +BABEL_OP3_401_33635_20141018_183504_inLine +BABEL_OP3_401_33635_20141018_183504_outLine +BABEL_OP3_401_33635_20141018_204625_inLine +BABEL_OP3_401_33635_20141018_204625_outLine +BABEL_OP3_401_33840_20141105_190509_inLine +BABEL_OP3_401_33840_20141105_190509_outLine +BABEL_OP3_401_34336_20141015_173115_inLine +BABEL_OP3_401_34336_20141015_173115_outLine +BABEL_OP3_401_35706_20141123_232430_inLine +BABEL_OP3_401_35706_20141123_232430_outLine +BABEL_OP3_401_36059_20141120_202614_inLine +BABEL_OP3_401_36059_20141120_202614_outLine +BABEL_OP3_401_36594_20141118_225937_inLine +BABEL_OP3_401_36594_20141118_225937_outLine +BABEL_OP3_401_36669_20141028_143332_inLine +BABEL_OP3_401_36669_20141028_143332_outLine +BABEL_OP3_401_36900_20141105_173543_inLine +BABEL_OP3_401_36900_20141105_173543_outLine +BABEL_OP3_401_38076_20141101_181606_inLine +BABEL_OP3_401_38076_20141101_181606_outLine +BABEL_OP3_401_38125_20141207_005829_inLine +BABEL_OP3_401_38125_20141207_005829_outLine +BABEL_OP3_401_38125_20141207_010858_inLine +BABEL_OP3_401_38125_20141207_010858_outLine +BABEL_OP3_401_38588_20141016_200521_inLine +BABEL_OP3_401_38588_20141016_200521_outLine +BABEL_OP3_401_39099_20141204_002759_inLine +BABEL_OP3_401_39099_20141204_002759_outLine +BABEL_OP3_401_40740_20141110_180540_inLine +BABEL_OP3_401_40740_20141110_180540_outLine +BABEL_OP3_401_41109_20141111_174909_inLine +BABEL_OP3_401_41109_20141111_174909_outLine +BABEL_OP3_401_41233_20141104_180556_inLine +BABEL_OP3_401_41233_20141104_180556_outLine +BABEL_OP3_401_41609_20140930_160252_inLine +BABEL_OP3_401_41609_20140930_160252_outLine +BABEL_OP3_401_42718_20141203_164339_inLine +BABEL_OP3_401_42718_20141203_164339_outLine +BABEL_OP3_401_42718_20141203_165811_inLine +BABEL_OP3_401_42718_20141203_165811_outLine +BABEL_OP3_401_44255_20141114_190226_inLine +BABEL_OP3_401_44255_20141114_190226_outLine +BABEL_OP3_401_44290_20141125_182137_inLine +BABEL_OP3_401_44290_20141125_182137_outLine +BABEL_OP3_401_45770_20140930_173734_inLine +BABEL_OP3_401_45770_20140930_173734_outLine +BABEL_OP3_401_46008_20141127_210910_inLine +BABEL_OP3_401_46008_20141127_210910_outLine +BABEL_OP3_401_46763_20141119_173306_inLine +BABEL_OP3_401_46763_20141119_173306_outLine +BABEL_OP3_401_48907_20141125_203242_inLine +BABEL_OP3_401_48907_20141125_203242_outLine +BABEL_OP3_401_49287_20141102_150144_inLine +BABEL_OP3_401_49287_20141102_150144_outLine +BABEL_OP3_401_49502_20140924_135047_inLine +BABEL_OP3_401_49502_20140924_135047_outLine +BABEL_OP3_401_49637_20140929_203313_inLine +BABEL_OP3_401_49637_20140929_203313_outLine +BABEL_OP3_401_49775_20140917_162425_inLine +BABEL_OP3_401_49775_20140917_162425_outLine +BABEL_OP3_401_49812_20141111_182212_inLine +BABEL_OP3_401_49812_20141111_182212_outLine +BABEL_OP3_401_49902_20141015_154547_inLine +BABEL_OP3_401_49902_20141015_154547_outLine +BABEL_OP3_401_50090_20141031_162652_inLine +BABEL_OP3_401_50090_20141031_162652_outLine +BABEL_OP3_401_50175_20140923_130231_inLine +BABEL_OP3_401_50175_20140923_130231_outLine +BABEL_OP3_401_50958_20141018_223514_inLine +BABEL_OP3_401_50958_20141018_223514_outLine +BABEL_OP3_401_51417_20141110_191727_inLine +BABEL_OP3_401_51417_20141110_191727_outLine +BABEL_OP3_401_51530_20141204_001348_inLine +BABEL_OP3_401_51530_20141204_001348_outLine +BABEL_OP3_401_53072_20141127_201357_inLine +BABEL_OP3_401_53072_20141127_201357_outLine +BABEL_OP3_401_53415_20141118_232010_inLine +BABEL_OP3_401_53415_20141118_232010_outLine +BABEL_OP3_401_53492_20141124_201111_inLine +BABEL_OP3_401_53492_20141124_201111_outLine +BABEL_OP3_401_53665_20141125_180322_inLine +BABEL_OP3_401_53665_20141125_180322_outLine +BABEL_OP3_401_54160_20140930_215406_inLine +BABEL_OP3_401_54160_20140930_215406_outLine +BABEL_OP3_401_54405_20141027_133437_inLine +BABEL_OP3_401_54405_20141027_133437_outLine +BABEL_OP3_401_55742_20141003_153216_inLine +BABEL_OP3_401_55742_20141003_153216_outLine +BABEL_OP3_401_55818_20140930_191724_inLine +BABEL_OP3_401_55818_20140930_191724_outLine +BABEL_OP3_401_55950_20141125_195752_inLine +BABEL_OP3_401_55950_20141125_195752_outLine +BABEL_OP3_401_56019_20141118_211141_inLine +BABEL_OP3_401_56019_20141118_211141_outLine +BABEL_OP3_401_56523_20141017_152325_inLine +BABEL_OP3_401_56523_20141017_152325_outLine +BABEL_OP3_401_56743_20141016_193127_inLine +BABEL_OP3_401_56743_20141016_193127_outLine +BABEL_OP3_401_57067_20141110_211445_inLine +BABEL_OP3_401_57067_20141110_211445_outLine +BABEL_OP3_401_57609_20141028_162956_inLine +BABEL_OP3_401_57609_20141028_162956_outLine +BABEL_OP3_401_57650_20141117_142921_inLine +BABEL_OP3_401_57650_20141117_142921_outLine +BABEL_OP3_401_57654_20141002_120228_inLine +BABEL_OP3_401_57654_20141002_120228_outLine +BABEL_OP3_401_57678_20141015_161604_inLine +BABEL_OP3_401_57678_20141015_161604_outLine +BABEL_OP3_401_58585_20141112_192259_inLine +BABEL_OP3_401_58585_20141112_192259_outLine +BABEL_OP3_401_58850_20141017_141308_inLine +BABEL_OP3_401_58850_20141017_141308_outLine +BABEL_OP3_401_58926_20141003_143419_inLine +BABEL_OP3_401_58926_20141003_143419_outLine +BABEL_OP3_401_59291_20141017_162350_inLine +BABEL_OP3_401_59291_20141017_162350_outLine +BABEL_OP3_401_59864_20141206_195010_inLine +BABEL_OP3_401_59864_20141206_195010_outLine +BABEL_OP3_401_60626_20141003_151111_inLine +BABEL_OP3_401_60626_20141003_151111_outLine +BABEL_OP3_401_60661_20141002_182507_inLine +BABEL_OP3_401_60661_20141002_182507_outLine +BABEL_OP3_401_60836_20141013_164932_inLine +BABEL_OP3_401_60836_20141013_164932_outLine +BABEL_OP3_401_61219_20141015_175439_inLine +BABEL_OP3_401_61219_20141015_175439_outLine +BABEL_OP3_401_61357_20141113_164017_inLine +BABEL_OP3_401_61357_20141113_164017_outLine +BABEL_OP3_401_61435_20141104_205806_inLine +BABEL_OP3_401_61435_20141104_205806_outLine +BABEL_OP3_401_62177_20141114_161832_inLine +BABEL_OP3_401_62177_20141114_161832_outLine +BABEL_OP3_401_62289_20141204_011459_inLine +BABEL_OP3_401_62289_20141204_011459_outLine +BABEL_OP3_401_62289_20141204_012356_inLine +BABEL_OP3_401_62289_20141204_012356_outLine +BABEL_OP3_401_62430_20141117_174830_inLine +BABEL_OP3_401_62430_20141117_174830_outLine +BABEL_OP3_401_62835_20141020_153234_inLine +BABEL_OP3_401_62835_20141020_153234_outLine +BABEL_OP3_401_63220_20141101_205612_inLine +BABEL_OP3_401_63220_20141101_205612_outLine +BABEL_OP3_401_63523_20141204_010313_inLine +BABEL_OP3_401_63523_20141204_010313_outLine +BABEL_OP3_401_63757_20141029_150937_inLine +BABEL_OP3_401_63757_20141029_150937_outLine +BABEL_OP3_401_63938_20141114_163623_inLine +BABEL_OP3_401_63938_20141114_163623_outLine +BABEL_OP3_401_64350_20141002_131743_inLine +BABEL_OP3_401_64350_20141002_131743_outLine +BABEL_OP3_401_64638_20140923_193255_inLine +BABEL_OP3_401_64638_20140923_193255_outLine +BABEL_OP3_401_64759_20140930_133630_inLine +BABEL_OP3_401_64759_20140930_133630_outLine +BABEL_OP3_401_64768_20141015_185430_inLine +BABEL_OP3_401_64768_20141015_185430_outLine +BABEL_OP3_401_64796_20140922_122936_inLine +BABEL_OP3_401_64796_20140922_122936_outLine +BABEL_OP3_401_65298_20141113_154021_inLine +BABEL_OP3_401_65298_20141113_154021_outLine +BABEL_OP3_401_65477_20141017_155857_inLine +BABEL_OP3_401_65477_20141017_155857_outLine +BABEL_OP3_401_65882_20141003_133913_inLine +BABEL_OP3_401_65882_20141003_133913_outLine +BABEL_OP3_401_66045_20141023_123024_inLine +BABEL_OP3_401_66045_20141023_123024_outLine +BABEL_OP3_401_66177_20141118_200110_inLine +BABEL_OP3_401_66177_20141118_200110_outLine +BABEL_OP3_401_66967_20140929_190454_inLine +BABEL_OP3_401_66967_20140929_190454_outLine +BABEL_OP3_401_67373_20141003_140545_inLine +BABEL_OP3_401_67373_20141003_140545_outLine +BABEL_OP3_401_67592_20141102_134846_outLine +BABEL_OP3_401_67794_20141003_133705_inLine +BABEL_OP3_401_67794_20141003_133705_outLine +BABEL_OP3_401_67964_20141201_174143_inLine +BABEL_OP3_401_67964_20141201_174143_outLine +BABEL_OP3_401_67999_20141111_153758_inLine +BABEL_OP3_401_67999_20141111_153758_outLine +BABEL_OP3_401_68182_20141119_114536_inLine +BABEL_OP3_401_68182_20141119_114536_outLine +BABEL_OP3_401_68182_20141119_115542_inLine +BABEL_OP3_401_68182_20141119_115542_outLine +BABEL_OP3_401_69992_20140930_195445_inLine +BABEL_OP3_401_69992_20140930_195445_outLine +BABEL_OP3_401_70110_20140917_141249_inLine +BABEL_OP3_401_70110_20140917_141249_outLine +BABEL_OP3_401_70386_20141015_182629_inLine +BABEL_OP3_401_70386_20141015_182629_outLine +BABEL_OP3_401_70601_20141016_160902_inLine +BABEL_OP3_401_70601_20141016_160902_outLine +BABEL_OP3_401_70713_20141118_164200_inLine +BABEL_OP3_401_70713_20141118_164200_outLine +BABEL_OP3_401_71038_20141112_182205_inLine +BABEL_OP3_401_71038_20141112_182205_outLine +BABEL_OP3_401_71038_20141112_183801_inLine +BABEL_OP3_401_71038_20141112_183801_outLine +BABEL_OP3_401_71038_20141112_184910_inLine +BABEL_OP3_401_71038_20141112_184910_outLine +BABEL_OP3_401_71282_20141113_172102_inLine +BABEL_OP3_401_71282_20141113_172102_outLine +BABEL_OP3_401_71333_20141014_190834_inLine +BABEL_OP3_401_71333_20141014_190834_outLine +BABEL_OP3_401_71704_20141002_173424_inLine +BABEL_OP3_401_71704_20141002_173424_outLine +BABEL_OP3_401_71780_20141006_202842_inLine +BABEL_OP3_401_71780_20141006_202842_outLine +BABEL_OP3_401_72349_20141125_020034_inLine +BABEL_OP3_401_72349_20141125_020034_outLine +BABEL_OP3_401_72587_20141107_174322_inLine +BABEL_OP3_401_72587_20141107_174322_outLine +BABEL_OP3_401_72733_20141126_185701_inLine +BABEL_OP3_401_72733_20141126_185701_outLine +BABEL_OP3_401_73072_20141001_214124_inLine +BABEL_OP3_401_73072_20141001_214124_outLine +BABEL_OP3_401_73119_20141016_201748_inLine +BABEL_OP3_401_73119_20141016_201748_outLine +BABEL_OP3_401_73301_20141014_154044_inLine +BABEL_OP3_401_73301_20141014_154044_outLine +BABEL_OP3_401_73622_20141001_214706_inLine +BABEL_OP3_401_73622_20141001_214706_outLine +BABEL_OP3_401_73757_20141022_145713_inLine +BABEL_OP3_401_73757_20141022_145713_outLine +BABEL_OP3_401_73837_20141014_174244_inLine +BABEL_OP3_401_73837_20141014_174244_outLine +BABEL_OP3_401_74111_20141102_152314_inLine +BABEL_OP3_401_74280_20140917_171519_inLine +BABEL_OP3_401_74280_20140917_171519_outLine +BABEL_OP3_401_74455_20141113_142847_inLine +BABEL_OP3_401_74455_20141113_142847_outLine +BABEL_OP3_401_74641_20141029_170835_inLine +BABEL_OP3_401_74641_20141029_170835_outLine +BABEL_OP3_401_74728_20141125_185810_inLine +BABEL_OP3_401_74728_20141125_185810_outLine +BABEL_OP3_401_75223_20140929_144010_inLine +BABEL_OP3_401_75223_20140929_144010_outLine +BABEL_OP3_401_75869_20141122_162915_inLine +BABEL_OP3_401_75869_20141122_162915_outLine +BABEL_OP3_401_75869_20141122_163817_inLine +BABEL_OP3_401_75869_20141122_163817_outLine +BABEL_OP3_401_75993_20141003_155108_inLine +BABEL_OP3_401_75993_20141003_155108_outLine +BABEL_OP3_401_76155_20141018_235119_inLine +BABEL_OP3_401_76155_20141018_235119_outLine +BABEL_OP3_401_76372_20141122_205123_inLine +BABEL_OP3_401_76372_20141122_205123_outLine +BABEL_OP3_401_76756_20141031_190329_inLine +BABEL_OP3_401_76756_20141031_190329_outLine +BABEL_OP3_401_76773_20141002_161621_inLine +BABEL_OP3_401_76773_20141002_161621_outLine +BABEL_OP3_401_77112_20141008_135410_inLine +BABEL_OP3_401_77112_20141008_135410_outLine +BABEL_OP3_401_77391_20141014_202916_inLine +BABEL_OP3_401_77391_20141014_202916_outLine +BABEL_OP3_401_77391_20141014_204156_inLine +BABEL_OP3_401_77391_20141014_204156_outLine +BABEL_OP3_401_77427_20141019_151638_inLine +BABEL_OP3_401_77427_20141019_151638_outLine +BABEL_OP3_401_78360_20141112_174704_inLine +BABEL_OP3_401_78360_20141112_174704_outLine +BABEL_OP3_401_78454_20141030_190417_inLine +BABEL_OP3_401_78454_20141030_190417_outLine +BABEL_OP3_401_78609_20141101_190650_inLine +BABEL_OP3_401_78609_20141101_190650_outLine +BABEL_OP3_401_78609_20141101_191730_inLine +BABEL_OP3_401_78609_20141101_191730_outLine +BABEL_OP3_401_78943_20141015_141252_inLine +BABEL_OP3_401_78943_20141015_141252_outLine +BABEL_OP3_401_78976_20141016_202006_inLine +BABEL_OP3_401_78976_20141016_202006_outLine +BABEL_OP3_401_79505_20141125_151308_inLine +BABEL_OP3_401_79505_20141125_151308_outLine +BABEL_OP3_401_79590_20141019_151813_inLine +BABEL_OP3_401_79590_20141019_151813_outLine +BABEL_OP3_401_79820_20141015_191402_inLine +BABEL_OP3_401_79820_20141015_191402_outLine +BABEL_OP3_401_79858_20140930_180452_inLine +BABEL_OP3_401_79858_20140930_180452_outLine +BABEL_OP3_401_80577_20141124_151617_inLine +BABEL_OP3_401_80577_20141124_151617_outLine +BABEL_OP3_401_80622_20141031_193633_inLine +BABEL_OP3_401_80622_20141031_193633_outLine +BABEL_OP3_401_81229_20141017_145439_inLine +BABEL_OP3_401_81229_20141017_145439_outLine +BABEL_OP3_401_82030_20141126_190214_inLine +BABEL_OP3_401_82030_20141126_190214_outLine +BABEL_OP3_401_82637_20140922_152004_inLine +BABEL_OP3_401_82637_20140922_152004_outLine +BABEL_OP3_401_82863_20141020_125644_inLine +BABEL_OP3_401_82863_20141020_125644_outLine +BABEL_OP3_401_82979_20141016_150329_inLine +BABEL_OP3_401_82979_20141016_150329_outLine +BABEL_OP3_401_83062_20141124_210713_inLine +BABEL_OP3_401_83062_20141124_210713_outLine +BABEL_OP3_401_83366_20141107_185153_inLine +BABEL_OP3_401_83366_20141107_185153_outLine +BABEL_OP3_401_83775_20141016_165202_inLine +BABEL_OP3_401_83775_20141016_165202_outLine +BABEL_OP3_401_83783_20141029_142056_inLine +BABEL_OP3_401_83783_20141029_142056_outLine +BABEL_OP3_401_84055_20141118_213900_inLine +BABEL_OP3_401_84055_20141118_213900_outLine +BABEL_OP3_401_84061_20141019_160653_inLine +BABEL_OP3_401_84061_20141019_160653_outLine +BABEL_OP3_401_84125_20140919_142411_inLine +BABEL_OP3_401_84125_20140919_142411_outLine +BABEL_OP3_401_84583_20141028_135606_inLine +BABEL_OP3_401_84583_20141028_135606_outLine +BABEL_OP3_401_84605_20141013_223927_inLine +BABEL_OP3_401_84605_20141013_223927_outLine +BABEL_OP3_401_84737_20141114_223714_inLine +BABEL_OP3_401_84737_20141114_223714_outLine +BABEL_OP3_401_84768_20141001_160652_inLine +BABEL_OP3_401_84768_20141001_160652_outLine +BABEL_OP3_401_85048_20141030_163324_inLine +BABEL_OP3_401_85048_20141030_163324_outLine +BABEL_OP3_401_85179_20141105_155540_inLine +BABEL_OP3_401_85179_20141105_155540_outLine +BABEL_OP3_401_85248_20141114_150825_inLine +BABEL_OP3_401_85248_20141114_150825_outLine +BABEL_OP3_401_85248_20141114_152742_inLine +BABEL_OP3_401_85248_20141114_152742_outLine +BABEL_OP3_401_85325_20141127_141209_inLine +BABEL_OP3_401_85325_20141127_141209_outLine +BABEL_OP3_401_85340_20141006_165058_inLine +BABEL_OP3_401_85340_20141006_165058_outLine +BABEL_OP3_401_86472_20140924_120802_inLine +BABEL_OP3_401_86472_20140924_120802_outLine +BABEL_OP3_401_86748_20141117_205420_inLine +BABEL_OP3_401_86748_20141117_205420_outLine +BABEL_OP3_401_86860_20141204_001000_inLine +BABEL_OP3_401_86860_20141204_001000_outLine +BABEL_OP3_401_86888_20141101_175833_inLine +BABEL_OP3_401_86888_20141101_175833_outLine +BABEL_OP3_401_86952_20141003_103859_inLine +BABEL_OP3_401_86952_20141003_103859_outLine +BABEL_OP3_401_87074_20141006_143605_inLine +BABEL_OP3_401_87074_20141006_143605_outLine +BABEL_OP3_401_87489_20141118_173238_inLine +BABEL_OP3_401_87489_20141118_173238_outLine +BABEL_OP3_401_87545_20141204_001833_inLine +BABEL_OP3_401_87545_20141204_001833_outLine +BABEL_OP3_401_87629_20141028_191608_inLine +BABEL_OP3_401_87629_20141028_191608_outLine +BABEL_OP3_401_87693_20141003_190102_inLine +BABEL_OP3_401_87693_20141003_190102_outLine +BABEL_OP3_401_88372_20141125_142302_inLine +BABEL_OP3_401_88372_20141125_142302_outLine +BABEL_OP3_401_88601_20141023_164043_inLine +BABEL_OP3_401_88601_20141023_164043_outLine +BABEL_OP3_401_88669_20141031_182135_inLine +BABEL_OP3_401_88669_20141031_182135_outLine +BABEL_OP3_401_88812_20141203_173638_inLine +BABEL_OP3_401_88812_20141203_173638_outLine +BABEL_OP3_401_88812_20141203_180453_inLine +BABEL_OP3_401_88812_20141203_180453_outLine +BABEL_OP3_401_89045_20140917_131337_inLine +BABEL_OP3_401_89045_20140917_131337_outLine +BABEL_OP3_401_89059_20141111_185303_inLine +BABEL_OP3_401_89059_20141111_185303_outLine +BABEL_OP3_401_89457_20141020_143004_inLine +BABEL_OP3_401_89457_20141020_143004_outLine +BABEL_OP3_401_89560_20141102_161259_inLine +BABEL_OP3_401_89560_20141102_161259_outLine +BABEL_OP3_401_89888_20141002_173642_inLine +BABEL_OP3_401_89888_20141002_173642_outLine +BABEL_OP3_401_89888_20141002_175247_inLine +BABEL_OP3_401_89888_20141002_175247_outLine +BABEL_OP3_401_89943_20141014_163254_inLine +BABEL_OP3_401_89943_20141014_163254_outLine +BABEL_OP3_401_89943_20141014_165144_inLine +BABEL_OP3_401_89943_20141014_165144_outLine +BABEL_OP3_401_90080_20141124_210928_inLine +BABEL_OP3_401_90080_20141124_210928_outLine +BABEL_OP3_401_91080_20141107_184614_inLine +BABEL_OP3_401_91080_20141107_184614_outLine +BABEL_OP3_401_91336_20141022_164858_inLine +BABEL_OP3_401_91336_20141022_164858_outLine +BABEL_OP3_401_91372_20141126_174359_inLine +BABEL_OP3_401_91372_20141126_174359_outLine +BABEL_OP3_401_91825_20140930_140910_inLine +BABEL_OP3_401_91825_20140930_140910_outLine +BABEL_OP3_401_91825_20140930_142615_inLine +BABEL_OP3_401_91825_20140930_142615_outLine +BABEL_OP3_401_91930_20141117_203237_inLine +BABEL_OP3_401_91930_20141117_203237_outLine +BABEL_OP3_401_91944_20141002_002457_inLine +BABEL_OP3_401_91944_20141002_002457_outLine +BABEL_OP3_401_92096_20141122_181058_inLine +BABEL_OP3_401_92096_20141122_181058_outLine +BABEL_OP3_401_92176_20141022_194334_inLine +BABEL_OP3_401_92176_20141022_194334_outLine +BABEL_OP3_401_92356_20141113_184902_inLine +BABEL_OP3_401_92356_20141113_184902_outLine +BABEL_OP3_401_92509_20140919_170134_inLine +BABEL_OP3_401_92509_20140919_170134_outLine +BABEL_OP3_401_92557_20141113_141949_inLine +BABEL_OP3_401_92557_20141113_141949_outLine +BABEL_OP3_401_92886_20141008_194243_inLine +BABEL_OP3_401_92886_20141008_194243_outLine +BABEL_OP3_401_92942_20141031_154005_inLine +BABEL_OP3_401_92942_20141031_154005_outLine +BABEL_OP3_401_93469_20141204_000050_inLine +BABEL_OP3_401_93469_20141204_000050_outLine +BABEL_OP3_401_93515_20141207_011722_inLine +BABEL_OP3_401_93515_20141207_011722_outLine +BABEL_OP3_401_93604_20141206_154822_inLine +BABEL_OP3_401_93604_20141206_154822_outLine +BABEL_OP3_401_93861_20141022_174829_inLine +BABEL_OP3_401_93861_20141022_174829_outLine +BABEL_OP3_401_94141_20141125_195408_inLine +BABEL_OP3_401_94141_20141125_195408_outLine +BABEL_OP3_401_94409_20141019_155250_inLine +BABEL_OP3_401_94409_20141019_155250_outLine +BABEL_OP3_401_95269_20141016_175058_inLine +BABEL_OP3_401_95269_20141016_175058_outLine +BABEL_OP3_401_95269_20141016_175950_inLine +BABEL_OP3_401_95269_20141016_175950_outLine +BABEL_OP3_401_95399_20141021_140337_inLine +BABEL_OP3_401_95399_20141021_140337_outLine +BABEL_OP3_401_96059_20141201_200308_inLine +BABEL_OP3_401_96059_20141201_200308_outLine +BABEL_OP3_401_96190_20141013_142533_inLine +BABEL_OP3_401_96190_20141013_142533_outLine +BABEL_OP3_401_96405_20141013_185112_inLine +BABEL_OP3_401_96405_20141013_185112_outLine +BABEL_OP3_401_96405_20141013_195512_inLine +BABEL_OP3_401_96405_20141013_195512_outLine +BABEL_OP3_401_96584_20141114_205949_inLine +BABEL_OP3_401_96584_20141114_205949_outLine +BABEL_OP3_401_96934_20141015_153021_inLine +BABEL_OP3_401_96934_20141015_153021_outLine +BABEL_OP3_401_97097_20141122_194201_inLine +BABEL_OP3_401_97097_20141122_194201_outLine +BABEL_OP3_401_97731_20141105_135405_inLine +BABEL_OP3_401_97731_20141105_135405_outLine +BABEL_OP3_401_97896_20141021_124204_inLine +BABEL_OP3_401_97896_20141021_124204_outLine +BABEL_OP3_401_98365_20141029_133629_inLine +BABEL_OP3_401_98365_20141029_133629_outLine +BABEL_OP3_401_98580_20141021_140835_inLine +BABEL_OP3_401_98580_20141021_140835_outLine +BABEL_OP3_401_98888_20141019_153225_inLine +BABEL_OP3_401_98888_20141019_153225_outLine +BABEL_OP3_401_98888_20141019_160421_inLine +BABEL_OP3_401_98888_20141019_160421_outLine +BABEL_OP3_401_99264_20141104_195940_inLine +BABEL_OP3_401_99264_20141104_195940_outLine +BABEL_OP3_401_99264_20141104_200707_inLine +BABEL_OP3_401_99264_20141104_200707_outLine +BABEL_OP3_401_99289_20141122_150548_inLine +BABEL_OP3_401_99289_20141122_150548_outLine +BABEL_OP3_401_99487_20141001_154915_inLine +BABEL_OP3_401_99487_20141001_154915_outLine +BABEL_OP3_401_99487_20141001_155922_inLine +BABEL_OP3_401_99487_20141001_155922_outLine +BABEL_OP3_401_99516_20140924_152057_inLine +BABEL_OP3_401_99516_20140924_152057_outLine +BABEL_OP3_401_99718_20141003_130643_inLine +BABEL_OP3_401_99718_20141003_130643_outLine +BABEL_OP3_401_99813_20141027_183714_inLine +BABEL_OP3_401_99813_20141027_183714_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/dev.2h.list b/egs/babel/s5d/conf/lists/402-javanese/dev.2h.list new file mode 100644 index 00000000000..46233026964 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/dev.2h.list @@ -0,0 +1,122 @@ +BABEL_OP3_402_10184_20141119_194233_inLine +BABEL_OP3_402_10184_20141119_194233_outLine +BABEL_OP3_402_11581_20141124_181058_inLine +BABEL_OP3_402_11581_20141124_181058_outLine +BABEL_OP3_402_15535_20150104_232347_inLine +BABEL_OP3_402_15535_20150104_232347_outLine +BABEL_OP3_402_20133_20140911_170812_inLine +BABEL_OP3_402_20133_20140911_170812_outLine +BABEL_OP3_402_21393_20150304_163256_inLine +BABEL_OP3_402_21393_20150304_163256_outLine +BABEL_OP3_402_21581_20141107_151147_inLine +BABEL_OP3_402_21581_20141107_151147_outLine +BABEL_OP3_402_21807_20141125_194924_inLine +BABEL_OP3_402_21807_20141125_194924_outLine +BABEL_OP3_402_23046_20141103_212247_inLine +BABEL_OP3_402_23046_20141103_212247_outLine +BABEL_OP3_402_23505_20141029_003347_inLine +BABEL_OP3_402_23505_20141029_003347_outLine +BABEL_OP3_402_24982_20141027_223126_inLine +BABEL_OP3_402_24982_20141027_223126_outLine +BABEL_OP3_402_27590_20141227_191710_inLine +BABEL_OP3_402_27590_20141227_191710_outLine +BABEL_OP3_402_27841_20150112_180404_inLine +BABEL_OP3_402_27841_20150112_180404_outLine +BABEL_OP3_402_28012_20150105_215005_inLine +BABEL_OP3_402_28012_20150105_215005_outLine +BABEL_OP3_402_36293_20141001_145552_inLine +BABEL_OP3_402_36293_20141001_145552_outLine +BABEL_OP3_402_36505_20150106_201700_inLine +BABEL_OP3_402_36505_20150106_201700_outLine +BABEL_OP3_402_36894_20140919_222930_inLine +BABEL_OP3_402_36894_20140919_222930_outLine +BABEL_OP3_402_41592_20141118_011026_inLine +BABEL_OP3_402_41592_20141118_011026_outLine +BABEL_OP3_402_41598_20150201_142509_inLine +BABEL_OP3_402_41598_20150201_142509_outLine +BABEL_OP3_402_41745_20141108_162338_inLine +BABEL_OP3_402_41745_20141108_162338_outLine +BABEL_OP3_402_46261_20141112_161528_inLine +BABEL_OP3_402_46261_20141112_161528_outLine +BABEL_OP3_402_49118_20150201_023112_inLine +BABEL_OP3_402_49118_20150201_023112_outLine +BABEL_OP3_402_49437_20150112_204645_inLine +BABEL_OP3_402_49437_20150112_204645_outLine +BABEL_OP3_402_50427_20141119_174123_inLine +BABEL_OP3_402_50427_20141119_174123_outLine +BABEL_OP3_402_50549_20150113_123204_inLine +BABEL_OP3_402_50549_20150113_123204_outLine +BABEL_OP3_402_52490_20140916_192446_inLine +BABEL_OP3_402_52490_20140916_192446_outLine +BABEL_OP3_402_52717_20140923_130849_inLine +BABEL_OP3_402_52717_20140923_130849_outLine +BABEL_OP3_402_54162_20141116_183833_inLine +BABEL_OP3_402_54162_20141116_183833_outLine +BABEL_OP3_402_55968_20140912_204820_inLine +BABEL_OP3_402_55968_20140912_204820_outLine +BABEL_OP3_402_56306_20150103_203751_inLine +BABEL_OP3_402_56306_20150103_203751_outLine +BABEL_OP3_402_61731_20141008_152133_inLine +BABEL_OP3_402_61731_20141008_152133_outLine +BABEL_OP3_402_64494_20141012_193548_inLine +BABEL_OP3_402_64494_20141012_193548_outLine +BABEL_OP3_402_65882_20141102_005627_inLine +BABEL_OP3_402_65882_20141102_005627_outLine +BABEL_OP3_402_66519_20141107_200757_inLine +BABEL_OP3_402_66519_20141107_200757_outLine +BABEL_OP3_402_68068_20150119_135822_inLine +BABEL_OP3_402_68068_20150119_135822_outLine +BABEL_OP3_402_68182_20150111_002528_inLine +BABEL_OP3_402_68182_20150111_002528_outLine +BABEL_OP3_402_68289_20150216_010725_inLine +BABEL_OP3_402_68289_20150216_010725_outLine +BABEL_OP3_402_68385_20140911_143047_inLine +BABEL_OP3_402_68385_20140911_143047_outLine +BABEL_OP3_402_69746_20150110_165836_inLine +BABEL_OP3_402_69746_20150110_165836_outLine +BABEL_OP3_402_70343_20150212_004248_inLine +BABEL_OP3_402_70343_20150212_004248_outLine +BABEL_OP3_402_70386_20141116_170547_inLine +BABEL_OP3_402_70386_20141116_170547_outLine +BABEL_OP3_402_72324_20141201_191618_inLine +BABEL_OP3_402_72324_20141201_191618_outLine +BABEL_OP3_402_73511_20141226_133330_inLine +BABEL_OP3_402_73511_20141226_133330_outLine +BABEL_OP3_402_73837_20141101_183259_inLine +BABEL_OP3_402_73837_20141101_183259_outLine +BABEL_OP3_402_78398_20141107_225319_inLine +BABEL_OP3_402_78398_20141107_225319_outLine +BABEL_OP3_402_78454_20141128_203259_inLine +BABEL_OP3_402_78454_20141128_203259_outLine +BABEL_OP3_402_78604_20141031_181612_inLine +BABEL_OP3_402_78604_20141031_181612_outLine +BABEL_OP3_402_81433_20141121_014829_inLine +BABEL_OP3_402_81433_20141121_014829_outLine +BABEL_OP3_402_81553_20150124_004852_inLine +BABEL_OP3_402_81553_20150124_004852_outLine +BABEL_OP3_402_82935_20150104_005835_inLine +BABEL_OP3_402_82935_20150104_005835_outLine +BABEL_OP3_402_86467_20140920_125939_inLine +BABEL_OP3_402_86467_20140920_125939_outLine +BABEL_OP3_402_86748_20150131_001317_inLine +BABEL_OP3_402_86748_20150131_001317_outLine +BABEL_OP3_402_87921_20141225_203350_inLine +BABEL_OP3_402_87921_20141225_203350_outLine +BABEL_OP3_402_88445_20141205_204305_inLine +BABEL_OP3_402_88445_20141205_204305_outLine +BABEL_OP3_402_89203_20150131_215344_inLine +BABEL_OP3_402_89203_20150131_215344_outLine +BABEL_OP3_402_89457_20141117_212710_inLine +BABEL_OP3_402_89457_20141117_212710_outLine +BABEL_OP3_402_92176_20141216_022926_inLine +BABEL_OP3_402_92176_20141216_022926_outLine +BABEL_OP3_402_92176_20141222_021733_inLine +BABEL_OP3_402_92176_20141222_021733_outLine +BABEL_OP3_402_93632_20150119_150118_inLine +BABEL_OP3_402_93632_20150119_150118_outLine +BABEL_OP3_402_95399_20141111_162356_inLine +BABEL_OP3_402_95399_20141111_162356_outLine +BABEL_OP3_402_96584_20150107_184515_inLine +BABEL_OP3_402_96584_20150107_184515_outLine +BABEL_OP3_402_99401_20141024_202205_inLine +BABEL_OP3_402_99401_20141024_202205_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/dev.list b/egs/babel/s5d/conf/lists/402-javanese/dev.list new file mode 100644 index 00000000000..46233026964 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/dev.list @@ -0,0 +1,122 @@ +BABEL_OP3_402_10184_20141119_194233_inLine +BABEL_OP3_402_10184_20141119_194233_outLine +BABEL_OP3_402_11581_20141124_181058_inLine +BABEL_OP3_402_11581_20141124_181058_outLine +BABEL_OP3_402_15535_20150104_232347_inLine +BABEL_OP3_402_15535_20150104_232347_outLine +BABEL_OP3_402_20133_20140911_170812_inLine +BABEL_OP3_402_20133_20140911_170812_outLine +BABEL_OP3_402_21393_20150304_163256_inLine +BABEL_OP3_402_21393_20150304_163256_outLine +BABEL_OP3_402_21581_20141107_151147_inLine +BABEL_OP3_402_21581_20141107_151147_outLine +BABEL_OP3_402_21807_20141125_194924_inLine +BABEL_OP3_402_21807_20141125_194924_outLine +BABEL_OP3_402_23046_20141103_212247_inLine +BABEL_OP3_402_23046_20141103_212247_outLine +BABEL_OP3_402_23505_20141029_003347_inLine +BABEL_OP3_402_23505_20141029_003347_outLine +BABEL_OP3_402_24982_20141027_223126_inLine +BABEL_OP3_402_24982_20141027_223126_outLine +BABEL_OP3_402_27590_20141227_191710_inLine +BABEL_OP3_402_27590_20141227_191710_outLine +BABEL_OP3_402_27841_20150112_180404_inLine +BABEL_OP3_402_27841_20150112_180404_outLine +BABEL_OP3_402_28012_20150105_215005_inLine +BABEL_OP3_402_28012_20150105_215005_outLine +BABEL_OP3_402_36293_20141001_145552_inLine +BABEL_OP3_402_36293_20141001_145552_outLine +BABEL_OP3_402_36505_20150106_201700_inLine +BABEL_OP3_402_36505_20150106_201700_outLine +BABEL_OP3_402_36894_20140919_222930_inLine +BABEL_OP3_402_36894_20140919_222930_outLine +BABEL_OP3_402_41592_20141118_011026_inLine +BABEL_OP3_402_41592_20141118_011026_outLine +BABEL_OP3_402_41598_20150201_142509_inLine +BABEL_OP3_402_41598_20150201_142509_outLine +BABEL_OP3_402_41745_20141108_162338_inLine +BABEL_OP3_402_41745_20141108_162338_outLine +BABEL_OP3_402_46261_20141112_161528_inLine +BABEL_OP3_402_46261_20141112_161528_outLine +BABEL_OP3_402_49118_20150201_023112_inLine +BABEL_OP3_402_49118_20150201_023112_outLine +BABEL_OP3_402_49437_20150112_204645_inLine +BABEL_OP3_402_49437_20150112_204645_outLine +BABEL_OP3_402_50427_20141119_174123_inLine +BABEL_OP3_402_50427_20141119_174123_outLine +BABEL_OP3_402_50549_20150113_123204_inLine +BABEL_OP3_402_50549_20150113_123204_outLine +BABEL_OP3_402_52490_20140916_192446_inLine +BABEL_OP3_402_52490_20140916_192446_outLine +BABEL_OP3_402_52717_20140923_130849_inLine +BABEL_OP3_402_52717_20140923_130849_outLine +BABEL_OP3_402_54162_20141116_183833_inLine +BABEL_OP3_402_54162_20141116_183833_outLine +BABEL_OP3_402_55968_20140912_204820_inLine +BABEL_OP3_402_55968_20140912_204820_outLine +BABEL_OP3_402_56306_20150103_203751_inLine +BABEL_OP3_402_56306_20150103_203751_outLine +BABEL_OP3_402_61731_20141008_152133_inLine +BABEL_OP3_402_61731_20141008_152133_outLine +BABEL_OP3_402_64494_20141012_193548_inLine +BABEL_OP3_402_64494_20141012_193548_outLine +BABEL_OP3_402_65882_20141102_005627_inLine +BABEL_OP3_402_65882_20141102_005627_outLine +BABEL_OP3_402_66519_20141107_200757_inLine +BABEL_OP3_402_66519_20141107_200757_outLine +BABEL_OP3_402_68068_20150119_135822_inLine +BABEL_OP3_402_68068_20150119_135822_outLine +BABEL_OP3_402_68182_20150111_002528_inLine +BABEL_OP3_402_68182_20150111_002528_outLine +BABEL_OP3_402_68289_20150216_010725_inLine +BABEL_OP3_402_68289_20150216_010725_outLine +BABEL_OP3_402_68385_20140911_143047_inLine +BABEL_OP3_402_68385_20140911_143047_outLine +BABEL_OP3_402_69746_20150110_165836_inLine +BABEL_OP3_402_69746_20150110_165836_outLine +BABEL_OP3_402_70343_20150212_004248_inLine +BABEL_OP3_402_70343_20150212_004248_outLine +BABEL_OP3_402_70386_20141116_170547_inLine +BABEL_OP3_402_70386_20141116_170547_outLine +BABEL_OP3_402_72324_20141201_191618_inLine +BABEL_OP3_402_72324_20141201_191618_outLine +BABEL_OP3_402_73511_20141226_133330_inLine +BABEL_OP3_402_73511_20141226_133330_outLine +BABEL_OP3_402_73837_20141101_183259_inLine +BABEL_OP3_402_73837_20141101_183259_outLine +BABEL_OP3_402_78398_20141107_225319_inLine +BABEL_OP3_402_78398_20141107_225319_outLine +BABEL_OP3_402_78454_20141128_203259_inLine +BABEL_OP3_402_78454_20141128_203259_outLine +BABEL_OP3_402_78604_20141031_181612_inLine +BABEL_OP3_402_78604_20141031_181612_outLine +BABEL_OP3_402_81433_20141121_014829_inLine +BABEL_OP3_402_81433_20141121_014829_outLine +BABEL_OP3_402_81553_20150124_004852_inLine +BABEL_OP3_402_81553_20150124_004852_outLine +BABEL_OP3_402_82935_20150104_005835_inLine +BABEL_OP3_402_82935_20150104_005835_outLine +BABEL_OP3_402_86467_20140920_125939_inLine +BABEL_OP3_402_86467_20140920_125939_outLine +BABEL_OP3_402_86748_20150131_001317_inLine +BABEL_OP3_402_86748_20150131_001317_outLine +BABEL_OP3_402_87921_20141225_203350_inLine +BABEL_OP3_402_87921_20141225_203350_outLine +BABEL_OP3_402_88445_20141205_204305_inLine +BABEL_OP3_402_88445_20141205_204305_outLine +BABEL_OP3_402_89203_20150131_215344_inLine +BABEL_OP3_402_89203_20150131_215344_outLine +BABEL_OP3_402_89457_20141117_212710_inLine +BABEL_OP3_402_89457_20141117_212710_outLine +BABEL_OP3_402_92176_20141216_022926_inLine +BABEL_OP3_402_92176_20141216_022926_outLine +BABEL_OP3_402_92176_20141222_021733_inLine +BABEL_OP3_402_92176_20141222_021733_outLine +BABEL_OP3_402_93632_20150119_150118_inLine +BABEL_OP3_402_93632_20150119_150118_outLine +BABEL_OP3_402_95399_20141111_162356_inLine +BABEL_OP3_402_95399_20141111_162356_outLine +BABEL_OP3_402_96584_20150107_184515_inLine +BABEL_OP3_402_96584_20150107_184515_outLine +BABEL_OP3_402_99401_20141024_202205_inLine +BABEL_OP3_402_99401_20141024_202205_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/eval.list b/egs/babel/s5d/conf/lists/402-javanese/eval.list new file mode 100644 index 00000000000..e0b81487a54 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/eval.list @@ -0,0 +1,188 @@ +BABEL_OP3_402_10036_20141124_025321_inLine +BABEL_OP3_402_10036_20141124_025321_outLine +BABEL_OP3_402_10974_20141130_234329_inLine +BABEL_OP3_402_10974_20141130_234329_outLine +BABEL_OP3_402_12036_20141009_181351_inLine +BABEL_OP3_402_12036_20141009_181351_outLine +BABEL_OP3_402_12321_20150111_184045_inLine +BABEL_OP3_402_12321_20150111_184045_outLine +BABEL_OP3_402_12321_20150111_185205_inLine +BABEL_OP3_402_12321_20150111_185205_outLine +BABEL_OP3_402_13040_20141030_172740_inLine +BABEL_OP3_402_13040_20141030_172740_outLine +BABEL_OP3_402_13490_20141201_021241_inLine +BABEL_OP3_402_13490_20141201_021241_outLine +BABEL_OP3_402_13490_20141204_021416_inLine +BABEL_OP3_402_13490_20141204_021416_outLine +BABEL_OP3_402_14719_20150114_153747_inLine +BABEL_OP3_402_14719_20150114_153747_outLine +BABEL_OP3_402_15024_20141112_173834_inLine +BABEL_OP3_402_15024_20141112_173834_outLine +BABEL_OP3_402_15730_20141001_154550_inLine +BABEL_OP3_402_15730_20141001_154550_outLine +BABEL_OP3_402_16938_20141118_194456_inLine +BABEL_OP3_402_16938_20141118_194456_outLine +BABEL_OP3_402_17165_20141115_171729_inLine +BABEL_OP3_402_17165_20141115_171729_outLine +BABEL_OP3_402_19749_20150130_162450_inLine +BABEL_OP3_402_19749_20150130_162450_outLine +BABEL_OP3_402_19818_20141213_194147_inLine +BABEL_OP3_402_19818_20141213_194147_outLine +BABEL_OP3_402_21206_20141019_210210_inLine +BABEL_OP3_402_21206_20141019_210210_outLine +BABEL_OP3_402_23395_20141120_192928_inLine +BABEL_OP3_402_23395_20141120_192928_outLine +BABEL_OP3_402_23628_20141123_183457_inLine +BABEL_OP3_402_23628_20141123_183457_outLine +BABEL_OP3_402_26388_20141031_184504_inLine +BABEL_OP3_402_26388_20141031_184504_outLine +BABEL_OP3_402_28419_20141107_004309_inLine +BABEL_OP3_402_28419_20141107_004309_outLine +BABEL_OP3_402_28814_20150108_133247_inLine +BABEL_OP3_402_28814_20150108_133247_outLine +BABEL_OP3_402_29023_20141016_155119_inLine +BABEL_OP3_402_29023_20141016_155119_outLine +BABEL_OP3_402_30395_20141126_165430_inLine +BABEL_OP3_402_30395_20141126_165430_outLine +BABEL_OP3_402_30653_20150301_200332_inLine +BABEL_OP3_402_30653_20150301_200332_outLine +BABEL_OP3_402_33497_20141228_021512_inLine +BABEL_OP3_402_33497_20141228_021512_outLine +BABEL_OP3_402_33497_20141228_022418_inLine +BABEL_OP3_402_33497_20141228_022418_outLine +BABEL_OP3_402_34197_20140926_180505_inLine +BABEL_OP3_402_34197_20140926_180505_outLine +BABEL_OP3_402_35202_20150201_211802_inLine +BABEL_OP3_402_35202_20150201_211802_outLine +BABEL_OP3_402_36669_20141112_195148_inLine +BABEL_OP3_402_36669_20141112_195148_outLine +BABEL_OP3_402_36990_20141114_221330_inLine +BABEL_OP3_402_36990_20141114_221330_outLine +BABEL_OP3_402_38664_20141123_163506_inLine +BABEL_OP3_402_38664_20141123_163506_outLine +BABEL_OP3_402_38741_20141020_160936_inLine +BABEL_OP3_402_38741_20141020_160936_outLine +BABEL_OP3_402_40713_20141019_145210_inLine +BABEL_OP3_402_40713_20141019_145210_outLine +BABEL_OP3_402_44347_20150111_142153_inLine +BABEL_OP3_402_44347_20150111_142153_outLine +BABEL_OP3_402_44420_20141031_175058_inLine +BABEL_OP3_402_44420_20141031_175058_outLine +BABEL_OP3_402_44531_20150302_195023_inLine +BABEL_OP3_402_44531_20150302_195023_outLine +BABEL_OP3_402_45642_20140923_154729_inLine +BABEL_OP3_402_45642_20140923_154729_outLine +BABEL_OP3_402_46681_20141013_161421_inLine +BABEL_OP3_402_46681_20141013_161421_outLine +BABEL_OP3_402_46976_20141119_183300_inLine +BABEL_OP3_402_46976_20141119_183300_outLine +BABEL_OP3_402_49775_20140915_151515_inLine +BABEL_OP3_402_49775_20140915_151515_outLine +BABEL_OP3_402_51407_20141228_213554_inLine +BABEL_OP3_402_51407_20141228_213554_outLine +BABEL_OP3_402_51955_20141103_200423_inLine +BABEL_OP3_402_51955_20141103_200423_outLine +BABEL_OP3_402_52694_20141123_140609_inLine +BABEL_OP3_402_52694_20141123_140609_outLine +BABEL_OP3_402_53419_20141226_140523_inLine +BABEL_OP3_402_53419_20141226_140523_outLine +BABEL_OP3_402_53917_20150201_201004_inLine +BABEL_OP3_402_53917_20150201_201004_outLine +BABEL_OP3_402_54841_20150108_004608_inLine +BABEL_OP3_402_54841_20150108_004608_outLine +BABEL_OP3_402_56743_20141108_140926_inLine +BABEL_OP3_402_56743_20141108_140926_outLine +BABEL_OP3_402_56826_20141224_134149_inLine +BABEL_OP3_402_56826_20141224_134149_outLine +BABEL_OP3_402_58103_20141104_192009_inLine +BABEL_OP3_402_58103_20141104_192009_outLine +BABEL_OP3_402_58926_20141014_174318_inLine +BABEL_OP3_402_58926_20141014_174318_outLine +BABEL_OP3_402_59091_20150104_000026_inLine +BABEL_OP3_402_59091_20150104_000026_outLine +BABEL_OP3_402_59928_20140929_174836_inLine +BABEL_OP3_402_59928_20140929_174836_outLine +BABEL_OP3_402_59993_20141103_183340_inLine +BABEL_OP3_402_59993_20141103_183340_outLine +BABEL_OP3_402_60626_20141019_135020_inLine +BABEL_OP3_402_60626_20141019_135020_outLine +BABEL_OP3_402_61011_20141003_131410_inLine +BABEL_OP3_402_61011_20141003_131410_outLine +BABEL_OP3_402_61190_20141102_132003_inLine +BABEL_OP3_402_61190_20141102_132003_outLine +BABEL_OP3_402_61225_20140912_171906_inLine +BABEL_OP3_402_61225_20140912_171906_outLine +BABEL_OP3_402_63604_20141101_235656_inLine +BABEL_OP3_402_63604_20141101_235656_outLine +BABEL_OP3_402_64638_20141214_234141_inLine +BABEL_OP3_402_64638_20141214_234141_outLine +BABEL_OP3_402_66967_20140917_153139_inLine +BABEL_OP3_402_66967_20140917_153139_outLine +BABEL_OP3_402_69474_20150111_235831_inLine +BABEL_OP3_402_69474_20150111_235831_outLine +BABEL_OP3_402_71047_20150107_194822_inLine +BABEL_OP3_402_71047_20150107_194822_outLine +BABEL_OP3_402_72007_20141219_183621_inLine +BABEL_OP3_402_72007_20141219_183621_outLine +BABEL_OP3_402_73042_20141013_175542_inLine +BABEL_OP3_402_73042_20141013_175542_outLine +BABEL_OP3_402_73072_20140923_135906_inLine +BABEL_OP3_402_73072_20140923_135906_outLine +BABEL_OP3_402_74226_20141220_000133_inLine +BABEL_OP3_402_74226_20141220_000133_outLine +BABEL_OP3_402_74280_20140915_174124_inLine +BABEL_OP3_402_74280_20140915_174124_outLine +BABEL_OP3_402_76126_20141224_141342_inLine +BABEL_OP3_402_76126_20141224_141342_outLine +BABEL_OP3_402_77033_20150108_180731_inLine +BABEL_OP3_402_77033_20150108_180731_outLine +BABEL_OP3_402_77112_20140929_201352_inLine +BABEL_OP3_402_77112_20140929_201352_outLine +BABEL_OP3_402_77391_20141102_204007_inLine +BABEL_OP3_402_77391_20141102_204007_outLine +BABEL_OP3_402_77567_20140920_134449_inLine +BABEL_OP3_402_77567_20140920_134449_outLine +BABEL_OP3_402_77730_20141021_174646_inLine +BABEL_OP3_402_77730_20141021_174646_outLine +BABEL_OP3_402_78544_20141215_000405_inLine +BABEL_OP3_402_78544_20141215_000405_outLine +BABEL_OP3_402_79505_20150227_172147_inLine +BABEL_OP3_402_79505_20150227_172147_outLine +BABEL_OP3_402_81622_20141115_215444_inLine +BABEL_OP3_402_81622_20141115_215444_outLine +BABEL_OP3_402_82145_20150108_195326_inLine +BABEL_OP3_402_82145_20150108_195326_outLine +BABEL_OP3_402_82863_20141114_212757_inLine +BABEL_OP3_402_82863_20141114_212757_outLine +BABEL_OP3_402_84583_20141123_201337_inLine +BABEL_OP3_402_84583_20141123_201337_outLine +BABEL_OP3_402_87074_20141030_183257_inLine +BABEL_OP3_402_87074_20141030_183257_outLine +BABEL_OP3_402_87298_20141103_203537_inLine +BABEL_OP3_402_87298_20141103_203537_outLine +BABEL_OP3_402_88372_20150201_000904_inLine +BABEL_OP3_402_88372_20150201_000904_outLine +BABEL_OP3_402_88982_20141130_182335_inLine +BABEL_OP3_402_88982_20141130_182335_outLine +BABEL_OP3_402_91336_20141122_023555_inLine +BABEL_OP3_402_91336_20141122_023555_outLine +BABEL_OP3_402_92792_20150227_162129_inLine +BABEL_OP3_402_92792_20150227_162129_outLine +BABEL_OP3_402_93411_20141120_155834_inLine +BABEL_OP3_402_93411_20141120_155834_outLine +BABEL_OP3_402_94978_20150107_204930_inLine +BABEL_OP3_402_94978_20150107_204930_outLine +BABEL_OP3_402_95663_20141103_142815_inLine +BABEL_OP3_402_95663_20141103_142815_outLine +BABEL_OP3_402_96405_20141006_202624_inLine +BABEL_OP3_402_96405_20141006_202624_outLine +BABEL_OP3_402_96730_20150110_161027_inLine +BABEL_OP3_402_96730_20150110_161027_outLine +BABEL_OP3_402_96934_20141101_192258_inLine +BABEL_OP3_402_96934_20141101_192258_outLine +BABEL_OP3_402_97376_20141221_191608_inLine +BABEL_OP3_402_97376_20141221_191608_outLine +BABEL_OP3_402_97604_20150121_010739_inLine +BABEL_OP3_402_97604_20150121_010739_outLine +BABEL_OP3_402_98489_20141028_122528_inLine +BABEL_OP3_402_98489_20141028_122528_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/sub-train.list b/egs/babel/s5d/conf/lists/402-javanese/sub-train.list new file mode 100644 index 00000000000..58306104f42 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/sub-train.list @@ -0,0 +1,122 @@ +BABEL_OP3_402_16184_20141120_143943_inLine +BABEL_OP3_402_16184_20141120_143943_outLine +BABEL_OP3_402_16749_20150110_182247_inLine +BABEL_OP3_402_16749_20150110_182247_outLine +BABEL_OP3_402_17914_20150107_192833_inLine +BABEL_OP3_402_17914_20150107_192833_outLine +BABEL_OP3_402_20738_20150201_004014_inLine +BABEL_OP3_402_20738_20150201_004014_outLine +BABEL_OP3_402_20768_20150110_125415_inLine +BABEL_OP3_402_20768_20150110_125415_outLine +BABEL_OP3_402_20985_20141209_223858_inLine +BABEL_OP3_402_20985_20141209_223858_outLine +BABEL_OP3_402_21794_20141110_000434_inLine +BABEL_OP3_402_21794_20141110_000434_outLine +BABEL_OP3_402_22494_20150127_212514_inLine +BABEL_OP3_402_22494_20150127_212514_outLine +BABEL_OP3_402_24270_20141127_181536_inLine +BABEL_OP3_402_24270_20141127_181536_outLine +BABEL_OP3_402_31346_20150106_163812_inLine +BABEL_OP3_402_31346_20150106_163812_outLine +BABEL_OP3_402_31346_20150107_000948_inLine +BABEL_OP3_402_31346_20150107_000948_outLine +BABEL_OP3_402_31992_20141104_154739_inLine +BABEL_OP3_402_31992_20141104_154739_outLine +BABEL_OP3_402_34336_20141101_214014_inLine +BABEL_OP3_402_34336_20141101_214014_outLine +BABEL_OP3_402_34477_20141103_012729_inLine +BABEL_OP3_402_34477_20141103_012729_outLine +BABEL_OP3_402_34564_20150110_174105_inLine +BABEL_OP3_402_34564_20150110_174105_outLine +BABEL_OP3_402_38431_20150104_193523_inLine +BABEL_OP3_402_38431_20150104_193523_outLine +BABEL_OP3_402_39059_20150201_151819_inLine +BABEL_OP3_402_39059_20150201_151819_outLine +BABEL_OP3_402_41680_20140911_133458_inLine +BABEL_OP3_402_41680_20140911_133458_outLine +BABEL_OP3_402_43784_20141027_205748_inLine +BABEL_OP3_402_43784_20141027_205748_outLine +BABEL_OP3_402_45536_20150131_234119_inLine +BABEL_OP3_402_45536_20150131_234119_outLine +BABEL_OP3_402_46688_20140927_210143_inLine +BABEL_OP3_402_46688_20140927_210143_outLine +BABEL_OP3_402_48243_20141031_160102_inLine +BABEL_OP3_402_48243_20141031_160102_outLine +BABEL_OP3_402_49197_20141123_183541_inLine +BABEL_OP3_402_49197_20141123_183541_outLine +BABEL_OP3_402_49502_20150201_200343_inLine +BABEL_OP3_402_49502_20150201_200343_outLine +BABEL_OP3_402_50779_20141124_211935_inLine +BABEL_OP3_402_50779_20141124_211935_outLine +BABEL_OP3_402_50962_20141004_143222_inLine +BABEL_OP3_402_50962_20141004_143222_outLine +BABEL_OP3_402_51015_20141209_214156_inLine +BABEL_OP3_402_51015_20141209_214156_outLine +BABEL_OP3_402_52246_20141115_174547_inLine +BABEL_OP3_402_52246_20141115_174547_outLine +BABEL_OP3_402_54074_20141110_001507_inLine +BABEL_OP3_402_54074_20141110_001507_outLine +BABEL_OP3_402_56198_20141103_152946_inLine +BABEL_OP3_402_56198_20141103_152946_outLine +BABEL_OP3_402_57065_20141213_175712_inLine +BABEL_OP3_402_57065_20141213_175712_outLine +BABEL_OP3_402_58313_20141121_191107_inLine +BABEL_OP3_402_58313_20141121_191107_outLine +BABEL_OP3_402_58489_20150110_155118_inLine +BABEL_OP3_402_58489_20150110_155118_outLine +BABEL_OP3_402_59078_20141127_201549_inLine +BABEL_OP3_402_59078_20141127_201549_outLine +BABEL_OP3_402_64768_20141116_180927_inLine +BABEL_OP3_402_64768_20141116_180927_outLine +BABEL_OP3_402_64796_20141122_163640_inLine +BABEL_OP3_402_64796_20141122_163640_outLine +BABEL_OP3_402_65367_20150103_224736_inLine +BABEL_OP3_402_65367_20150103_224736_outLine +BABEL_OP3_402_65692_20141228_202914_inLine +BABEL_OP3_402_65692_20141228_202914_outLine +BABEL_OP3_402_66177_20150131_201057_inLine +BABEL_OP3_402_66177_20150131_201057_outLine +BABEL_OP3_402_70221_20141222_002645_inLine +BABEL_OP3_402_70221_20141222_002645_outLine +BABEL_OP3_402_73119_20141031_182314_inLine +BABEL_OP3_402_73119_20141031_182314_outLine +BABEL_OP3_402_73301_20141117_004450_inLine +BABEL_OP3_402_73301_20141117_004450_outLine +BABEL_OP3_402_76444_20141227_143452_inLine +BABEL_OP3_402_76444_20141227_143452_outLine +BABEL_OP3_402_76683_20141128_201732_inLine +BABEL_OP3_402_76683_20141128_201732_outLine +BABEL_OP3_402_78116_20141229_210212_inLine +BABEL_OP3_402_78116_20141229_210212_outLine +BABEL_OP3_402_78254_20141101_235022_inLine +BABEL_OP3_402_78254_20141101_235022_outLine +BABEL_OP3_402_79139_20141115_153558_inLine +BABEL_OP3_402_79139_20141115_153558_outLine +BABEL_OP3_402_81229_20141116_224932_inLine +BABEL_OP3_402_81229_20141116_224932_outLine +BABEL_OP3_402_81427_20141110_165047_inLine +BABEL_OP3_402_81427_20141110_165047_outLine +BABEL_OP3_402_82089_20141113_162038_inLine +BABEL_OP3_402_82089_20141113_162038_outLine +BABEL_OP3_402_83651_20141009_145412_inLine +BABEL_OP3_402_83651_20141009_145412_outLine +BABEL_OP3_402_85048_20141204_194855_inLine +BABEL_OP3_402_85048_20141204_194855_outLine +BABEL_OP3_402_85340_20141021_182050_inLine +BABEL_OP3_402_85340_20141021_182050_outLine +BABEL_OP3_402_86713_20150101_014831_inLine +BABEL_OP3_402_86713_20150101_014831_outLine +BABEL_OP3_402_87073_20140915_154336_inLine +BABEL_OP3_402_87073_20140915_154336_outLine +BABEL_OP3_402_87871_20141224_130949_inLine +BABEL_OP3_402_87871_20141224_130949_outLine +BABEL_OP3_402_88601_20141209_160621_inLine +BABEL_OP3_402_88601_20141209_160621_outLine +BABEL_OP3_402_93604_20150304_152208_inLine +BABEL_OP3_402_93604_20150304_152208_outLine +BABEL_OP3_402_93964_20141216_021155_inLine +BABEL_OP3_402_93964_20141216_021155_outLine +BABEL_OP3_402_94869_20140912_195117_inLine +BABEL_OP3_402_94869_20140912_195117_outLine +BABEL_OP3_402_95446_20150110_150658_inLine +BABEL_OP3_402_95446_20150110_150658_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/402-javanese/sub-train.untranscribed.list new file mode 100644 index 00000000000..4f81d9daca4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/sub-train.untranscribed.list @@ -0,0 +1,370 @@ +BABEL_OP3_402_10416_20141126_133029_inLine +BABEL_OP3_402_10416_20141126_133029_outLine +BABEL_OP3_402_10901_20141116_141701_inLine +BABEL_OP3_402_10901_20141116_141701_outLine +BABEL_OP3_402_12220_20141106_021950_inLine +BABEL_OP3_402_12220_20141106_021950_outLine +BABEL_OP3_402_12767_20140924_184905_inLine +BABEL_OP3_402_12767_20140924_184905_outLine +BABEL_OP3_402_13030_20141107_173701_inLine +BABEL_OP3_402_13030_20141107_173701_outLine +BABEL_OP3_402_13664_20140911_160207_inLine +BABEL_OP3_402_13664_20140911_160207_outLine +BABEL_OP3_402_13709_20150131_161040_inLine +BABEL_OP3_402_13709_20150131_161040_outLine +BABEL_OP3_402_14141_20150215_162503_inLine +BABEL_OP3_402_14141_20150215_162503_outLine +BABEL_OP3_402_14229_20141108_200257_inLine +BABEL_OP3_402_14229_20141108_200257_outLine +BABEL_OP3_402_14350_20141104_165111_inLine +BABEL_OP3_402_14350_20141104_165111_outLine +BABEL_OP3_402_14807_20141126_174048_inLine +BABEL_OP3_402_14807_20141126_174048_outLine +BABEL_OP3_402_14875_20140929_193054_inLine +BABEL_OP3_402_14875_20140929_193054_outLine +BABEL_OP3_402_14899_20140925_165651_inLine +BABEL_OP3_402_14899_20140925_165651_outLine +BABEL_OP3_402_14929_20141110_005633_inLine +BABEL_OP3_402_14929_20141110_005633_outLine +BABEL_OP3_402_14972_20141123_182012_inLine +BABEL_OP3_402_14972_20141123_182012_outLine +BABEL_OP3_402_15163_20141123_152731_inLine +BABEL_OP3_402_15163_20141123_152731_outLine +BABEL_OP3_402_15262_20140922_152302_inLine +BABEL_OP3_402_15262_20140922_152302_outLine +BABEL_OP3_402_15749_20150105_125933_inLine +BABEL_OP3_402_15749_20150105_125933_outLine +BABEL_OP3_402_16787_20141107_025835_inLine +BABEL_OP3_402_16787_20141107_025835_outLine +BABEL_OP3_402_17520_20141123_170854_inLine +BABEL_OP3_402_17520_20141123_170854_outLine +BABEL_OP3_402_17890_20150108_163627_inLine +BABEL_OP3_402_17890_20150108_163627_outLine +BABEL_OP3_402_18380_20141113_173424_inLine +BABEL_OP3_402_18380_20141113_173424_outLine +BABEL_OP3_402_19134_20141130_163804_inLine +BABEL_OP3_402_19134_20141130_163804_outLine +BABEL_OP3_402_19621_20141123_181810_inLine +BABEL_OP3_402_19621_20141123_181810_outLine +BABEL_OP3_402_19672_20141208_162907_inLine +BABEL_OP3_402_19672_20141208_162907_outLine +BABEL_OP3_402_19703_20141102_190851_inLine +BABEL_OP3_402_19703_20141102_190851_outLine +BABEL_OP3_402_20330_20150131_162055_inLine +BABEL_OP3_402_20330_20150131_162055_outLine +BABEL_OP3_402_20800_20141013_185736_inLine +BABEL_OP3_402_20800_20141013_185736_outLine +BABEL_OP3_402_20922_20150131_235414_inLine +BABEL_OP3_402_20922_20150131_235414_outLine +BABEL_OP3_402_21004_20150108_210410_inLine +BABEL_OP3_402_21004_20150108_210410_outLine +BABEL_OP3_402_22170_20150108_185847_inLine +BABEL_OP3_402_22170_20150108_185847_outLine +BABEL_OP3_402_23151_20150110_202409_inLine +BABEL_OP3_402_23151_20150110_202409_outLine +BABEL_OP3_402_23731_20141120_162409_inLine +BABEL_OP3_402_23731_20141120_162409_outLine +BABEL_OP3_402_23731_20141120_163618_inLine +BABEL_OP3_402_23731_20141120_163618_outLine +BABEL_OP3_402_24323_20141111_182649_inLine +BABEL_OP3_402_24323_20141111_182649_outLine +BABEL_OP3_402_24470_20141205_154028_inLine +BABEL_OP3_402_24470_20141205_154028_outLine +BABEL_OP3_402_24589_20141106_144156_inLine +BABEL_OP3_402_24589_20141106_144156_outLine +BABEL_OP3_402_25412_20141128_212603_inLine +BABEL_OP3_402_25412_20141128_212603_outLine +BABEL_OP3_402_26072_20150131_110154_inLine +BABEL_OP3_402_26072_20150131_110154_outLine +BABEL_OP3_402_26398_20150304_162600_inLine +BABEL_OP3_402_26398_20150304_162600_outLine +BABEL_OP3_402_28303_20141122_153440_inLine +BABEL_OP3_402_28303_20141122_153440_outLine +BABEL_OP3_402_29021_20150131_010036_inLine +BABEL_OP3_402_29021_20150131_010036_outLine +BABEL_OP3_402_29076_20141121_164742_inLine +BABEL_OP3_402_29076_20141121_164742_outLine +BABEL_OP3_402_29168_20140926_164602_inLine +BABEL_OP3_402_29168_20140926_164602_outLine +BABEL_OP3_402_29323_20150108_000937_inLine +BABEL_OP3_402_29323_20150108_000937_outLine +BABEL_OP3_402_30250_20140929_162020_inLine +BABEL_OP3_402_30250_20140929_162020_outLine +BABEL_OP3_402_31184_20141112_204308_inLine +BABEL_OP3_402_31184_20141112_204308_outLine +BABEL_OP3_402_31624_20141017_204521_inLine +BABEL_OP3_402_31624_20141017_204521_outLine +BABEL_OP3_402_32708_20141127_210435_inLine +BABEL_OP3_402_32708_20141127_210435_outLine +BABEL_OP3_402_32832_20150214_160609_inLine +BABEL_OP3_402_32832_20150214_160609_outLine +BABEL_OP3_402_32837_20150114_173357_inLine +BABEL_OP3_402_32837_20150114_173357_outLine +BABEL_OP3_402_33175_20141011_151643_inLine +BABEL_OP3_402_33175_20141011_151643_outLine +BABEL_OP3_402_33355_20141222_030242_inLine +BABEL_OP3_402_33355_20141222_030242_outLine +BABEL_OP3_402_33704_20150108_121853_inLine +BABEL_OP3_402_33704_20150108_121853_outLine +BABEL_OP3_402_33951_20141115_015656_inLine +BABEL_OP3_402_33951_20141115_015656_outLine +BABEL_OP3_402_34679_20141012_230850_inLine +BABEL_OP3_402_34679_20141012_230850_outLine +BABEL_OP3_402_34688_20141027_170150_inLine +BABEL_OP3_402_34688_20141027_170150_outLine +BABEL_OP3_402_35069_20150216_023523_inLine +BABEL_OP3_402_35069_20150216_023523_outLine +BABEL_OP3_402_35583_20150121_013548_inLine +BABEL_OP3_402_35583_20150121_013548_outLine +BABEL_OP3_402_37228_20150120_211131_inLine +BABEL_OP3_402_37228_20150120_211131_outLine +BABEL_OP3_402_37281_20141110_214558_inLine +BABEL_OP3_402_37281_20141110_214558_outLine +BABEL_OP3_402_37682_20141103_210556_inLine +BABEL_OP3_402_37682_20141103_210556_outLine +BABEL_OP3_402_37853_20150107_154609_inLine +BABEL_OP3_402_37853_20150107_154609_outLine +BABEL_OP3_402_38340_20141020_170141_inLine +BABEL_OP3_402_38340_20141020_170141_outLine +BABEL_OP3_402_39159_20140930_201318_inLine +BABEL_OP3_402_39159_20140930_201318_outLine +BABEL_OP3_402_39426_20150202_103633_inLine +BABEL_OP3_402_39426_20150202_103633_outLine +BABEL_OP3_402_39680_20150131_151358_inLine +BABEL_OP3_402_39680_20150131_151358_outLine +BABEL_OP3_402_39920_20150216_014707_inLine +BABEL_OP3_402_39920_20150216_014707_outLine +BABEL_OP3_402_41109_20150101_021923_inLine +BABEL_OP3_402_41109_20150101_021923_outLine +BABEL_OP3_402_43239_20150205_011521_inLine +BABEL_OP3_402_43239_20150205_011521_outLine +BABEL_OP3_402_43368_20141107_210043_inLine +BABEL_OP3_402_43368_20141107_210043_outLine +BABEL_OP3_402_43920_20141228_001637_inLine +BABEL_OP3_402_43920_20141228_001637_outLine +BABEL_OP3_402_44255_20150131_183155_inLine +BABEL_OP3_402_44255_20150131_183155_outLine +BABEL_OP3_402_44961_20140921_154533_inLine +BABEL_OP3_402_44961_20140921_154533_outLine +BABEL_OP3_402_46702_20140929_141902_inLine +BABEL_OP3_402_46702_20140929_141902_outLine +BABEL_OP3_402_46770_20150124_001351_inLine +BABEL_OP3_402_46770_20150124_001351_outLine +BABEL_OP3_402_46881_20141028_192343_inLine +BABEL_OP3_402_46881_20141028_192343_outLine +BABEL_OP3_402_47270_20150128_163211_inLine +BABEL_OP3_402_47270_20150128_163211_outLine +BABEL_OP3_402_48422_20150101_193320_inLine +BABEL_OP3_402_48422_20150101_193320_outLine +BABEL_OP3_402_48422_20150101_194803_inLine +BABEL_OP3_402_48422_20150101_194803_outLine +BABEL_OP3_402_48610_20140920_172026_inLine +BABEL_OP3_402_48610_20140920_172026_outLine +BABEL_OP3_402_48789_20141113_181720_inLine +BABEL_OP3_402_48789_20141113_181720_outLine +BABEL_OP3_402_49001_20141010_142908_inLine +BABEL_OP3_402_49001_20141010_142908_outLine +BABEL_OP3_402_49001_20141010_152312_inLine +BABEL_OP3_402_49001_20141010_152312_outLine +BABEL_OP3_402_49907_20141006_162735_inLine +BABEL_OP3_402_49907_20141006_162735_outLine +BABEL_OP3_402_50601_20141121_182643_inLine +BABEL_OP3_402_50601_20141121_182643_outLine +BABEL_OP3_402_50810_20140912_181008_inLine +BABEL_OP3_402_50810_20140912_181008_outLine +BABEL_OP3_402_51540_20150131_203108_inLine +BABEL_OP3_402_51540_20150131_203108_outLine +BABEL_OP3_402_51611_20141010_163542_inLine +BABEL_OP3_402_51611_20141010_163542_outLine +BABEL_OP3_402_51968_20141109_154701_inLine +BABEL_OP3_402_51968_20141109_154701_outLine +BABEL_OP3_402_52422_20150128_142229_inLine +BABEL_OP3_402_52422_20150128_142229_outLine +BABEL_OP3_402_52854_20140910_200850_inLine +BABEL_OP3_402_52854_20140910_200850_outLine +BABEL_OP3_402_52932_20141007_182635_inLine +BABEL_OP3_402_52932_20141007_182635_outLine +BABEL_OP3_402_54104_20141104_173741_inLine +BABEL_OP3_402_54104_20141104_173741_outLine +BABEL_OP3_402_54405_20141123_173044_inLine +BABEL_OP3_402_54405_20141123_173044_outLine +BABEL_OP3_402_55267_20141221_184118_inLine +BABEL_OP3_402_55267_20141221_184118_outLine +BABEL_OP3_402_56720_20141228_190653_inLine +BABEL_OP3_402_56720_20141228_190653_outLine +BABEL_OP3_402_57650_20150107_171335_inLine +BABEL_OP3_402_57650_20150107_171335_outLine +BABEL_OP3_402_57654_20141031_172711_inLine +BABEL_OP3_402_57654_20141031_172711_outLine +BABEL_OP3_402_57922_20141130_172609_inLine +BABEL_OP3_402_57922_20141130_172609_outLine +BABEL_OP3_402_58850_20141115_223848_inLine +BABEL_OP3_402_58850_20141115_223848_outLine +BABEL_OP3_402_59402_20150103_181612_inLine +BABEL_OP3_402_59402_20150103_181612_outLine +BABEL_OP3_402_60418_20141219_231820_inLine +BABEL_OP3_402_60418_20141219_231820_outLine +BABEL_OP3_402_60474_20141101_195523_inLine +BABEL_OP3_402_60474_20141101_195523_outLine +BABEL_OP3_402_61167_20141106_195710_inLine +BABEL_OP3_402_61167_20141106_195710_outLine +BABEL_OP3_402_61219_20141101_192955_inLine +BABEL_OP3_402_61219_20141101_192955_outLine +BABEL_OP3_402_61888_20150108_210230_inLine +BABEL_OP3_402_61888_20150108_210230_outLine +BABEL_OP3_402_62456_20141203_005134_inLine +BABEL_OP3_402_62456_20141203_005134_outLine +BABEL_OP3_402_62800_20141028_170241_inLine +BABEL_OP3_402_62800_20141028_170241_outLine +BABEL_OP3_402_62810_20140917_184635_inLine +BABEL_OP3_402_62810_20140917_184635_outLine +BABEL_OP3_402_63081_20141003_151638_inLine +BABEL_OP3_402_63081_20141003_151638_outLine +BABEL_OP3_402_64014_20150108_162849_inLine +BABEL_OP3_402_64014_20150108_162849_outLine +BABEL_OP3_402_64065_20141020_152452_inLine +BABEL_OP3_402_64065_20141020_152452_outLine +BABEL_OP3_402_64870_20141228_184201_inLine +BABEL_OP3_402_64870_20141228_184201_outLine +BABEL_OP3_402_65064_20141125_162638_inLine +BABEL_OP3_402_65064_20141125_162638_outLine +BABEL_OP3_402_65298_20150130_232120_inLine +BABEL_OP3_402_65298_20150130_232120_outLine +BABEL_OP3_402_65723_20141022_231832_inLine +BABEL_OP3_402_65723_20141022_231832_outLine +BABEL_OP3_402_66001_20140921_123931_inLine +BABEL_OP3_402_66001_20140921_123931_outLine +BABEL_OP3_402_66045_20141115_162944_inLine +BABEL_OP3_402_66045_20141115_162944_outLine +BABEL_OP3_402_67152_20150107_163104_inLine +BABEL_OP3_402_67152_20150107_163104_outLine +BABEL_OP3_402_67373_20141014_152719_inLine +BABEL_OP3_402_67373_20141014_152719_outLine +BABEL_OP3_402_68627_20141107_033600_inLine +BABEL_OP3_402_68627_20141107_033600_outLine +BABEL_OP3_402_69107_20141123_145802_inLine +BABEL_OP3_402_69107_20141123_145802_outLine +BABEL_OP3_402_69574_20140915_170204_inLine +BABEL_OP3_402_69574_20140915_170204_outLine +BABEL_OP3_402_70282_20141128_162640_inLine +BABEL_OP3_402_70282_20141128_162640_outLine +BABEL_OP3_402_70601_20141104_190522_inLine +BABEL_OP3_402_70601_20141104_190522_outLine +BABEL_OP3_402_70794_20141122_201302_inLine +BABEL_OP3_402_70794_20141122_201302_outLine +BABEL_OP3_402_71566_20150109_002519_inLine +BABEL_OP3_402_71566_20150109_002519_outLine +BABEL_OP3_402_71704_20141030_192615_inLine +BABEL_OP3_402_71704_20141030_192615_outLine +BABEL_OP3_402_72844_20150216_194719_inLine +BABEL_OP3_402_72844_20150216_194719_outLine +BABEL_OP3_402_73022_20150103_135209_inLine +BABEL_OP3_402_73022_20150103_135209_outLine +BABEL_OP3_402_73757_20141115_190524_inLine +BABEL_OP3_402_73757_20141115_190524_outLine +BABEL_OP3_402_74111_20150102_112305_inLine +BABEL_OP3_402_74111_20150102_112305_outLine +BABEL_OP3_402_74455_20150201_180158_inLine +BABEL_OP3_402_74455_20150201_180158_outLine +BABEL_OP3_402_74799_20141129_202734_inLine +BABEL_OP3_402_74799_20141129_202734_outLine +BABEL_OP3_402_75764_20150202_000719_inLine +BABEL_OP3_402_75764_20150202_000719_outLine +BABEL_OP3_402_75993_20141021_183118_inLine +BABEL_OP3_402_75993_20141021_183118_outLine +BABEL_OP3_402_78360_20150131_163647_inLine +BABEL_OP3_402_78360_20150131_163647_outLine +BABEL_OP3_402_78630_20140930_135924_inLine +BABEL_OP3_402_78630_20140930_135924_outLine +BABEL_OP3_402_79751_20141104_200346_inLine +BABEL_OP3_402_79751_20141104_200346_outLine +BABEL_OP3_402_79751_20141104_201600_inLine +BABEL_OP3_402_79751_20141104_201600_outLine +BABEL_OP3_402_80439_20141104_195124_inLine +BABEL_OP3_402_80439_20141104_195124_outLine +BABEL_OP3_402_82224_20150101_162311_inLine +BABEL_OP3_402_82224_20150101_162311_outLine +BABEL_OP3_402_82637_20141006_173314_inLine +BABEL_OP3_402_82637_20141006_173314_outLine +BABEL_OP3_402_83238_20141122_140740_inLine +BABEL_OP3_402_83238_20141122_140740_outLine +BABEL_OP3_402_83436_20141017_162042_inLine +BABEL_OP3_402_83436_20141017_162042_outLine +BABEL_OP3_402_84061_20141107_162356_inLine +BABEL_OP3_402_84061_20141107_162356_outLine +BABEL_OP3_402_84611_20141023_205020_inLine +BABEL_OP3_402_84611_20141023_205020_outLine +BABEL_OP3_402_84737_20150129_233418_inLine +BABEL_OP3_402_84737_20150129_233418_outLine +BABEL_OP3_402_84815_20141225_185456_inLine +BABEL_OP3_402_84815_20141225_185456_outLine +BABEL_OP3_402_85248_20150109_001722_inLine +BABEL_OP3_402_85248_20150109_001722_outLine +BABEL_OP3_402_86191_20141105_130254_inLine +BABEL_OP3_402_86191_20141105_130254_outLine +BABEL_OP3_402_86722_20141101_204411_inLine +BABEL_OP3_402_86722_20141101_204411_outLine +BABEL_OP3_402_86952_20141105_144737_inLine +BABEL_OP3_402_86952_20141105_144737_outLine +BABEL_OP3_402_87179_20150203_020351_inLine +BABEL_OP3_402_87179_20150203_020351_outLine +BABEL_OP3_402_88776_20140921_133554_inLine +BABEL_OP3_402_88776_20140921_133554_outLine +BABEL_OP3_402_88873_20140930_131622_inLine +BABEL_OP3_402_88873_20140930_131622_outLine +BABEL_OP3_402_89794_20141213_211839_inLine +BABEL_OP3_402_89794_20141213_211839_outLine +BABEL_OP3_402_89877_20150107_013739_inLine +BABEL_OP3_402_89877_20150107_013739_outLine +BABEL_OP3_402_89877_20150107_014426_inLine +BABEL_OP3_402_89877_20150107_014426_outLine +BABEL_OP3_402_90777_20141106_234557_inLine +BABEL_OP3_402_90777_20141106_234557_outLine +BABEL_OP3_402_91884_20150302_183207_inLine +BABEL_OP3_402_91884_20150302_183207_outLine +BABEL_OP3_402_91891_20150108_203636_inLine +BABEL_OP3_402_91891_20150108_203636_outLine +BABEL_OP3_402_91977_20141225_143539_inLine +BABEL_OP3_402_91977_20141225_143539_outLine +BABEL_OP3_402_92356_20150109_005846_inLine +BABEL_OP3_402_92356_20150109_005846_outLine +BABEL_OP3_402_92459_20141102_124516_inLine +BABEL_OP3_402_92459_20141102_124516_outLine +BABEL_OP3_402_92557_20150201_205110_inLine +BABEL_OP3_402_92557_20150201_205110_outLine +BABEL_OP3_402_92698_20141115_182138_inLine +BABEL_OP3_402_92698_20141115_182138_outLine +BABEL_OP3_402_93475_20141119_140615_inLine +BABEL_OP3_402_93475_20141119_140615_outLine +BABEL_OP3_402_93490_20150106_174211_inLine +BABEL_OP3_402_93490_20150106_174211_outLine +BABEL_OP3_402_94002_20141216_015659_inLine +BABEL_OP3_402_94002_20141216_015659_outLine +BABEL_OP3_402_94166_20150128_151103_inLine +BABEL_OP3_402_94166_20150128_151103_outLine +BABEL_OP3_402_94409_20141214_185032_inLine +BABEL_OP3_402_94409_20141214_185032_outLine +BABEL_OP3_402_94923_20141201_154601_inLine +BABEL_OP3_402_94923_20141201_154601_outLine +BABEL_OP3_402_96190_20141103_161533_inLine +BABEL_OP3_402_96190_20141103_161533_outLine +BABEL_OP3_402_96205_20141126_152921_inLine +BABEL_OP3_402_96205_20141126_152921_outLine +BABEL_OP3_402_97264_20150131_205411_inLine +BABEL_OP3_402_97264_20150131_205411_outLine +BABEL_OP3_402_97772_20140915_200919_inLine +BABEL_OP3_402_97772_20140915_200919_outLine +BABEL_OP3_402_97896_20141122_161128_inLine +BABEL_OP3_402_97896_20141122_161128_outLine +BABEL_OP3_402_98165_20141106_191239_inLine +BABEL_OP3_402_98165_20141106_191239_outLine +BABEL_OP3_402_98888_20141108_211953_inLine +BABEL_OP3_402_98888_20141108_211953_outLine +BABEL_OP3_402_99202_20141123_162817_inLine +BABEL_OP3_402_99202_20141123_162817_outLine +BABEL_OP3_402_99516_20140917_174712_inLine +BABEL_OP3_402_99516_20140917_174712_outLine +BABEL_OP3_402_99594_20141111_170413_inLine +BABEL_OP3_402_99594_20141111_170413_outLine +BABEL_OP3_402_99887_20150104_230431_inLine +BABEL_OP3_402_99887_20150104_230431_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/training.list b/egs/babel/s5d/conf/lists/402-javanese/training.list new file mode 100644 index 00000000000..ce7313fceeb --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/training.list @@ -0,0 +1,492 @@ +BABEL_OP3_402_10416_20141126_133029_inLine +BABEL_OP3_402_10416_20141126_133029_outLine +BABEL_OP3_402_10901_20141116_141701_inLine +BABEL_OP3_402_10901_20141116_141701_outLine +BABEL_OP3_402_12220_20141106_021950_inLine +BABEL_OP3_402_12220_20141106_021950_outLine +BABEL_OP3_402_12767_20140924_184905_inLine +BABEL_OP3_402_12767_20140924_184905_outLine +BABEL_OP3_402_13030_20141107_173701_inLine +BABEL_OP3_402_13030_20141107_173701_outLine +BABEL_OP3_402_13664_20140911_160207_inLine +BABEL_OP3_402_13664_20140911_160207_outLine +BABEL_OP3_402_13709_20150131_161040_inLine +BABEL_OP3_402_13709_20150131_161040_outLine +BABEL_OP3_402_14141_20150215_162503_inLine +BABEL_OP3_402_14141_20150215_162503_outLine +BABEL_OP3_402_14229_20141108_200257_inLine +BABEL_OP3_402_14229_20141108_200257_outLine +BABEL_OP3_402_14350_20141104_165111_inLine +BABEL_OP3_402_14350_20141104_165111_outLine +BABEL_OP3_402_14807_20141126_174048_inLine +BABEL_OP3_402_14807_20141126_174048_outLine +BABEL_OP3_402_14875_20140929_193054_inLine +BABEL_OP3_402_14875_20140929_193054_outLine +BABEL_OP3_402_14899_20140925_165651_inLine +BABEL_OP3_402_14899_20140925_165651_outLine +BABEL_OP3_402_14929_20141110_005633_inLine +BABEL_OP3_402_14929_20141110_005633_outLine +BABEL_OP3_402_14972_20141123_182012_inLine +BABEL_OP3_402_14972_20141123_182012_outLine +BABEL_OP3_402_15163_20141123_152731_inLine +BABEL_OP3_402_15163_20141123_152731_outLine +BABEL_OP3_402_15262_20140922_152302_inLine +BABEL_OP3_402_15262_20140922_152302_outLine +BABEL_OP3_402_15749_20150105_125933_inLine +BABEL_OP3_402_15749_20150105_125933_outLine +BABEL_OP3_402_16184_20141120_143943_inLine +BABEL_OP3_402_16184_20141120_143943_outLine +BABEL_OP3_402_16749_20150110_182247_inLine +BABEL_OP3_402_16749_20150110_182247_outLine +BABEL_OP3_402_16787_20141107_025835_inLine +BABEL_OP3_402_16787_20141107_025835_outLine +BABEL_OP3_402_17520_20141123_170854_inLine +BABEL_OP3_402_17520_20141123_170854_outLine +BABEL_OP3_402_17890_20150108_163627_inLine +BABEL_OP3_402_17890_20150108_163627_outLine +BABEL_OP3_402_17914_20150107_192833_inLine +BABEL_OP3_402_17914_20150107_192833_outLine +BABEL_OP3_402_18380_20141113_173424_inLine +BABEL_OP3_402_18380_20141113_173424_outLine +BABEL_OP3_402_19134_20141130_163804_inLine +BABEL_OP3_402_19134_20141130_163804_outLine +BABEL_OP3_402_19621_20141123_181810_inLine +BABEL_OP3_402_19621_20141123_181810_outLine +BABEL_OP3_402_19672_20141208_162907_inLine +BABEL_OP3_402_19672_20141208_162907_outLine +BABEL_OP3_402_19703_20141102_190851_inLine +BABEL_OP3_402_19703_20141102_190851_outLine +BABEL_OP3_402_20330_20150131_162055_inLine +BABEL_OP3_402_20330_20150131_162055_outLine +BABEL_OP3_402_20738_20150201_004014_inLine +BABEL_OP3_402_20738_20150201_004014_outLine +BABEL_OP3_402_20768_20150110_125415_inLine +BABEL_OP3_402_20768_20150110_125415_outLine +BABEL_OP3_402_20800_20141013_185736_inLine +BABEL_OP3_402_20800_20141013_185736_outLine +BABEL_OP3_402_20922_20150131_235414_inLine +BABEL_OP3_402_20922_20150131_235414_outLine +BABEL_OP3_402_20985_20141209_223858_inLine +BABEL_OP3_402_20985_20141209_223858_outLine +BABEL_OP3_402_21004_20150108_210410_inLine +BABEL_OP3_402_21004_20150108_210410_outLine +BABEL_OP3_402_21794_20141110_000434_inLine +BABEL_OP3_402_21794_20141110_000434_outLine +BABEL_OP3_402_22170_20150108_185847_inLine +BABEL_OP3_402_22170_20150108_185847_outLine +BABEL_OP3_402_22494_20150127_212514_inLine +BABEL_OP3_402_22494_20150127_212514_outLine +BABEL_OP3_402_23151_20150110_202409_inLine +BABEL_OP3_402_23151_20150110_202409_outLine +BABEL_OP3_402_23731_20141120_162409_inLine +BABEL_OP3_402_23731_20141120_162409_outLine +BABEL_OP3_402_23731_20141120_163618_inLine +BABEL_OP3_402_23731_20141120_163618_outLine +BABEL_OP3_402_24270_20141127_181536_inLine +BABEL_OP3_402_24270_20141127_181536_outLine +BABEL_OP3_402_24323_20141111_182649_inLine +BABEL_OP3_402_24323_20141111_182649_outLine +BABEL_OP3_402_24470_20141205_154028_inLine +BABEL_OP3_402_24470_20141205_154028_outLine +BABEL_OP3_402_24589_20141106_144156_inLine +BABEL_OP3_402_24589_20141106_144156_outLine +BABEL_OP3_402_25412_20141128_212603_inLine +BABEL_OP3_402_25412_20141128_212603_outLine +BABEL_OP3_402_26072_20150131_110154_inLine +BABEL_OP3_402_26072_20150131_110154_outLine +BABEL_OP3_402_26398_20150304_162600_inLine +BABEL_OP3_402_26398_20150304_162600_outLine +BABEL_OP3_402_28303_20141122_153440_inLine +BABEL_OP3_402_28303_20141122_153440_outLine +BABEL_OP3_402_29021_20150131_010036_inLine +BABEL_OP3_402_29021_20150131_010036_outLine +BABEL_OP3_402_29076_20141121_164742_inLine +BABEL_OP3_402_29076_20141121_164742_outLine +BABEL_OP3_402_29168_20140926_164602_inLine +BABEL_OP3_402_29168_20140926_164602_outLine +BABEL_OP3_402_29323_20150108_000937_inLine +BABEL_OP3_402_29323_20150108_000937_outLine +BABEL_OP3_402_30250_20140929_162020_inLine +BABEL_OP3_402_30250_20140929_162020_outLine +BABEL_OP3_402_31184_20141112_204308_inLine +BABEL_OP3_402_31184_20141112_204308_outLine +BABEL_OP3_402_31346_20150106_163812_inLine +BABEL_OP3_402_31346_20150106_163812_outLine +BABEL_OP3_402_31346_20150107_000948_inLine +BABEL_OP3_402_31346_20150107_000948_outLine +BABEL_OP3_402_31624_20141017_204521_inLine +BABEL_OP3_402_31624_20141017_204521_outLine +BABEL_OP3_402_31992_20141104_154739_inLine +BABEL_OP3_402_31992_20141104_154739_outLine +BABEL_OP3_402_32708_20141127_210435_inLine +BABEL_OP3_402_32708_20141127_210435_outLine +BABEL_OP3_402_32832_20150214_160609_inLine +BABEL_OP3_402_32832_20150214_160609_outLine +BABEL_OP3_402_32837_20150114_173357_inLine +BABEL_OP3_402_32837_20150114_173357_outLine +BABEL_OP3_402_33175_20141011_151643_inLine +BABEL_OP3_402_33175_20141011_151643_outLine +BABEL_OP3_402_33355_20141222_030242_inLine +BABEL_OP3_402_33355_20141222_030242_outLine +BABEL_OP3_402_33704_20150108_121853_inLine +BABEL_OP3_402_33704_20150108_121853_outLine +BABEL_OP3_402_33951_20141115_015656_inLine +BABEL_OP3_402_33951_20141115_015656_outLine +BABEL_OP3_402_34336_20141101_214014_inLine +BABEL_OP3_402_34336_20141101_214014_outLine +BABEL_OP3_402_34477_20141103_012729_inLine +BABEL_OP3_402_34477_20141103_012729_outLine +BABEL_OP3_402_34564_20150110_174105_inLine +BABEL_OP3_402_34564_20150110_174105_outLine +BABEL_OP3_402_34679_20141012_230850_inLine +BABEL_OP3_402_34679_20141012_230850_outLine +BABEL_OP3_402_34688_20141027_170150_inLine +BABEL_OP3_402_34688_20141027_170150_outLine +BABEL_OP3_402_35069_20150216_023523_inLine +BABEL_OP3_402_35069_20150216_023523_outLine +BABEL_OP3_402_35583_20150121_013548_inLine +BABEL_OP3_402_35583_20150121_013548_outLine +BABEL_OP3_402_37228_20150120_211131_inLine +BABEL_OP3_402_37228_20150120_211131_outLine +BABEL_OP3_402_37281_20141110_214558_inLine +BABEL_OP3_402_37281_20141110_214558_outLine +BABEL_OP3_402_37682_20141103_210556_inLine +BABEL_OP3_402_37682_20141103_210556_outLine +BABEL_OP3_402_37853_20150107_154609_inLine +BABEL_OP3_402_37853_20150107_154609_outLine +BABEL_OP3_402_38340_20141020_170141_inLine +BABEL_OP3_402_38340_20141020_170141_outLine +BABEL_OP3_402_38431_20150104_193523_inLine +BABEL_OP3_402_38431_20150104_193523_outLine +BABEL_OP3_402_39059_20150201_151819_inLine +BABEL_OP3_402_39059_20150201_151819_outLine +BABEL_OP3_402_39159_20140930_201318_inLine +BABEL_OP3_402_39159_20140930_201318_outLine +BABEL_OP3_402_39426_20150202_103633_inLine +BABEL_OP3_402_39426_20150202_103633_outLine +BABEL_OP3_402_39680_20150131_151358_inLine +BABEL_OP3_402_39680_20150131_151358_outLine +BABEL_OP3_402_39920_20150216_014707_inLine +BABEL_OP3_402_39920_20150216_014707_outLine +BABEL_OP3_402_41109_20150101_021923_inLine +BABEL_OP3_402_41109_20150101_021923_outLine +BABEL_OP3_402_41680_20140911_133458_inLine +BABEL_OP3_402_41680_20140911_133458_outLine +BABEL_OP3_402_43239_20150205_011521_inLine +BABEL_OP3_402_43239_20150205_011521_outLine +BABEL_OP3_402_43368_20141107_210043_inLine +BABEL_OP3_402_43368_20141107_210043_outLine +BABEL_OP3_402_43784_20141027_205748_inLine +BABEL_OP3_402_43784_20141027_205748_outLine +BABEL_OP3_402_43920_20141228_001637_inLine +BABEL_OP3_402_43920_20141228_001637_outLine +BABEL_OP3_402_44255_20150131_183155_inLine +BABEL_OP3_402_44255_20150131_183155_outLine +BABEL_OP3_402_44961_20140921_154533_inLine +BABEL_OP3_402_44961_20140921_154533_outLine +BABEL_OP3_402_45536_20150131_234119_inLine +BABEL_OP3_402_45536_20150131_234119_outLine +BABEL_OP3_402_46688_20140927_210143_inLine +BABEL_OP3_402_46688_20140927_210143_outLine +BABEL_OP3_402_46702_20140929_141902_inLine +BABEL_OP3_402_46702_20140929_141902_outLine +BABEL_OP3_402_46770_20150124_001351_inLine +BABEL_OP3_402_46770_20150124_001351_outLine +BABEL_OP3_402_46881_20141028_192343_inLine +BABEL_OP3_402_46881_20141028_192343_outLine +BABEL_OP3_402_47270_20150128_163211_inLine +BABEL_OP3_402_47270_20150128_163211_outLine +BABEL_OP3_402_48243_20141031_160102_inLine +BABEL_OP3_402_48243_20141031_160102_outLine +BABEL_OP3_402_48422_20150101_193320_inLine +BABEL_OP3_402_48422_20150101_193320_outLine +BABEL_OP3_402_48422_20150101_194803_inLine +BABEL_OP3_402_48422_20150101_194803_outLine +BABEL_OP3_402_48610_20140920_172026_inLine +BABEL_OP3_402_48610_20140920_172026_outLine +BABEL_OP3_402_48789_20141113_181720_inLine +BABEL_OP3_402_48789_20141113_181720_outLine +BABEL_OP3_402_49001_20141010_142908_inLine +BABEL_OP3_402_49001_20141010_142908_outLine +BABEL_OP3_402_49001_20141010_152312_inLine +BABEL_OP3_402_49001_20141010_152312_outLine +BABEL_OP3_402_49197_20141123_183541_inLine +BABEL_OP3_402_49197_20141123_183541_outLine +BABEL_OP3_402_49502_20150201_200343_inLine +BABEL_OP3_402_49502_20150201_200343_outLine +BABEL_OP3_402_49907_20141006_162735_inLine +BABEL_OP3_402_49907_20141006_162735_outLine +BABEL_OP3_402_50601_20141121_182643_inLine +BABEL_OP3_402_50601_20141121_182643_outLine +BABEL_OP3_402_50779_20141124_211935_inLine +BABEL_OP3_402_50779_20141124_211935_outLine +BABEL_OP3_402_50810_20140912_181008_inLine +BABEL_OP3_402_50810_20140912_181008_outLine +BABEL_OP3_402_50962_20141004_143222_inLine +BABEL_OP3_402_50962_20141004_143222_outLine +BABEL_OP3_402_51015_20141209_214156_inLine +BABEL_OP3_402_51015_20141209_214156_outLine +BABEL_OP3_402_51540_20150131_203108_inLine +BABEL_OP3_402_51540_20150131_203108_outLine +BABEL_OP3_402_51611_20141010_163542_inLine +BABEL_OP3_402_51611_20141010_163542_outLine +BABEL_OP3_402_51968_20141109_154701_inLine +BABEL_OP3_402_51968_20141109_154701_outLine +BABEL_OP3_402_52246_20141115_174547_inLine +BABEL_OP3_402_52246_20141115_174547_outLine +BABEL_OP3_402_52422_20150128_142229_inLine +BABEL_OP3_402_52422_20150128_142229_outLine +BABEL_OP3_402_52854_20140910_200850_inLine +BABEL_OP3_402_52854_20140910_200850_outLine +BABEL_OP3_402_52932_20141007_182635_inLine +BABEL_OP3_402_52932_20141007_182635_outLine +BABEL_OP3_402_54074_20141110_001507_inLine +BABEL_OP3_402_54074_20141110_001507_outLine +BABEL_OP3_402_54104_20141104_173741_inLine +BABEL_OP3_402_54104_20141104_173741_outLine +BABEL_OP3_402_54405_20141123_173044_inLine +BABEL_OP3_402_54405_20141123_173044_outLine +BABEL_OP3_402_55267_20141221_184118_inLine +BABEL_OP3_402_55267_20141221_184118_outLine +BABEL_OP3_402_56198_20141103_152946_inLine +BABEL_OP3_402_56198_20141103_152946_outLine +BABEL_OP3_402_56720_20141228_190653_inLine +BABEL_OP3_402_56720_20141228_190653_outLine +BABEL_OP3_402_57065_20141213_175712_inLine +BABEL_OP3_402_57065_20141213_175712_outLine +BABEL_OP3_402_57650_20150107_171335_inLine +BABEL_OP3_402_57650_20150107_171335_outLine +BABEL_OP3_402_57654_20141031_172711_inLine +BABEL_OP3_402_57654_20141031_172711_outLine +BABEL_OP3_402_57922_20141130_172609_inLine +BABEL_OP3_402_57922_20141130_172609_outLine +BABEL_OP3_402_58313_20141121_191107_inLine +BABEL_OP3_402_58313_20141121_191107_outLine +BABEL_OP3_402_58489_20150110_155118_inLine +BABEL_OP3_402_58489_20150110_155118_outLine +BABEL_OP3_402_58850_20141115_223848_inLine +BABEL_OP3_402_58850_20141115_223848_outLine +BABEL_OP3_402_59078_20141127_201549_inLine +BABEL_OP3_402_59078_20141127_201549_outLine +BABEL_OP3_402_59402_20150103_181612_inLine +BABEL_OP3_402_59402_20150103_181612_outLine +BABEL_OP3_402_60418_20141219_231820_inLine +BABEL_OP3_402_60418_20141219_231820_outLine +BABEL_OP3_402_60474_20141101_195523_inLine +BABEL_OP3_402_60474_20141101_195523_outLine +BABEL_OP3_402_61167_20141106_195710_inLine +BABEL_OP3_402_61167_20141106_195710_outLine +BABEL_OP3_402_61219_20141101_192955_inLine +BABEL_OP3_402_61219_20141101_192955_outLine +BABEL_OP3_402_61888_20150108_210230_inLine +BABEL_OP3_402_61888_20150108_210230_outLine +BABEL_OP3_402_62456_20141203_005134_inLine +BABEL_OP3_402_62456_20141203_005134_outLine +BABEL_OP3_402_62800_20141028_170241_inLine +BABEL_OP3_402_62800_20141028_170241_outLine +BABEL_OP3_402_62810_20140917_184635_inLine +BABEL_OP3_402_62810_20140917_184635_outLine +BABEL_OP3_402_63081_20141003_151638_inLine +BABEL_OP3_402_63081_20141003_151638_outLine +BABEL_OP3_402_64014_20150108_162849_inLine +BABEL_OP3_402_64014_20150108_162849_outLine +BABEL_OP3_402_64065_20141020_152452_inLine +BABEL_OP3_402_64065_20141020_152452_outLine +BABEL_OP3_402_64768_20141116_180927_inLine +BABEL_OP3_402_64768_20141116_180927_outLine +BABEL_OP3_402_64796_20141122_163640_inLine +BABEL_OP3_402_64796_20141122_163640_outLine +BABEL_OP3_402_64870_20141228_184201_inLine +BABEL_OP3_402_64870_20141228_184201_outLine +BABEL_OP3_402_65064_20141125_162638_inLine +BABEL_OP3_402_65064_20141125_162638_outLine +BABEL_OP3_402_65298_20150130_232120_inLine +BABEL_OP3_402_65298_20150130_232120_outLine +BABEL_OP3_402_65367_20150103_224736_inLine +BABEL_OP3_402_65367_20150103_224736_outLine +BABEL_OP3_402_65692_20141228_202914_inLine +BABEL_OP3_402_65692_20141228_202914_outLine +BABEL_OP3_402_65723_20141022_231832_inLine +BABEL_OP3_402_65723_20141022_231832_outLine +BABEL_OP3_402_66001_20140921_123931_inLine +BABEL_OP3_402_66001_20140921_123931_outLine +BABEL_OP3_402_66045_20141115_162944_inLine +BABEL_OP3_402_66045_20141115_162944_outLine +BABEL_OP3_402_66177_20150131_201057_inLine +BABEL_OP3_402_66177_20150131_201057_outLine +BABEL_OP3_402_67152_20150107_163104_inLine +BABEL_OP3_402_67152_20150107_163104_outLine +BABEL_OP3_402_67373_20141014_152719_inLine +BABEL_OP3_402_67373_20141014_152719_outLine +BABEL_OP3_402_68627_20141107_033600_inLine +BABEL_OP3_402_68627_20141107_033600_outLine +BABEL_OP3_402_69107_20141123_145802_inLine +BABEL_OP3_402_69107_20141123_145802_outLine +BABEL_OP3_402_69574_20140915_170204_inLine +BABEL_OP3_402_69574_20140915_170204_outLine +BABEL_OP3_402_70221_20141222_002645_inLine +BABEL_OP3_402_70221_20141222_002645_outLine +BABEL_OP3_402_70282_20141128_162640_inLine +BABEL_OP3_402_70282_20141128_162640_outLine +BABEL_OP3_402_70601_20141104_190522_inLine +BABEL_OP3_402_70601_20141104_190522_outLine +BABEL_OP3_402_70794_20141122_201302_inLine +BABEL_OP3_402_70794_20141122_201302_outLine +BABEL_OP3_402_71566_20150109_002519_inLine +BABEL_OP3_402_71566_20150109_002519_outLine +BABEL_OP3_402_71704_20141030_192615_inLine +BABEL_OP3_402_71704_20141030_192615_outLine +BABEL_OP3_402_72844_20150216_194719_inLine +BABEL_OP3_402_72844_20150216_194719_outLine +BABEL_OP3_402_73022_20150103_135209_inLine +BABEL_OP3_402_73022_20150103_135209_outLine +BABEL_OP3_402_73119_20141031_182314_inLine +BABEL_OP3_402_73119_20141031_182314_outLine +BABEL_OP3_402_73301_20141117_004450_inLine +BABEL_OP3_402_73301_20141117_004450_outLine +BABEL_OP3_402_73757_20141115_190524_inLine +BABEL_OP3_402_73757_20141115_190524_outLine +BABEL_OP3_402_74111_20150102_112305_inLine +BABEL_OP3_402_74111_20150102_112305_outLine +BABEL_OP3_402_74455_20150201_180158_inLine +BABEL_OP3_402_74455_20150201_180158_outLine +BABEL_OP3_402_74799_20141129_202734_inLine +BABEL_OP3_402_74799_20141129_202734_outLine +BABEL_OP3_402_75764_20150202_000719_inLine +BABEL_OP3_402_75764_20150202_000719_outLine +BABEL_OP3_402_75993_20141021_183118_inLine +BABEL_OP3_402_75993_20141021_183118_outLine +BABEL_OP3_402_76444_20141227_143452_inLine +BABEL_OP3_402_76444_20141227_143452_outLine +BABEL_OP3_402_76683_20141128_201732_inLine +BABEL_OP3_402_76683_20141128_201732_outLine +BABEL_OP3_402_78116_20141229_210212_inLine +BABEL_OP3_402_78116_20141229_210212_outLine +BABEL_OP3_402_78254_20141101_235022_inLine +BABEL_OP3_402_78254_20141101_235022_outLine +BABEL_OP3_402_78360_20150131_163647_inLine +BABEL_OP3_402_78360_20150131_163647_outLine +BABEL_OP3_402_78630_20140930_135924_inLine +BABEL_OP3_402_78630_20140930_135924_outLine +BABEL_OP3_402_79139_20141115_153558_inLine +BABEL_OP3_402_79139_20141115_153558_outLine +BABEL_OP3_402_79751_20141104_200346_inLine +BABEL_OP3_402_79751_20141104_200346_outLine +BABEL_OP3_402_79751_20141104_201600_inLine +BABEL_OP3_402_79751_20141104_201600_outLine +BABEL_OP3_402_80439_20141104_195124_inLine +BABEL_OP3_402_80439_20141104_195124_outLine +BABEL_OP3_402_81229_20141116_224932_inLine +BABEL_OP3_402_81229_20141116_224932_outLine +BABEL_OP3_402_81427_20141110_165047_inLine +BABEL_OP3_402_81427_20141110_165047_outLine +BABEL_OP3_402_82089_20141113_162038_inLine +BABEL_OP3_402_82089_20141113_162038_outLine +BABEL_OP3_402_82224_20150101_162311_inLine +BABEL_OP3_402_82224_20150101_162311_outLine +BABEL_OP3_402_82637_20141006_173314_inLine +BABEL_OP3_402_82637_20141006_173314_outLine +BABEL_OP3_402_83238_20141122_140740_inLine +BABEL_OP3_402_83238_20141122_140740_outLine +BABEL_OP3_402_83436_20141017_162042_inLine +BABEL_OP3_402_83436_20141017_162042_outLine +BABEL_OP3_402_83651_20141009_145412_inLine +BABEL_OP3_402_83651_20141009_145412_outLine +BABEL_OP3_402_84061_20141107_162356_inLine +BABEL_OP3_402_84061_20141107_162356_outLine +BABEL_OP3_402_84611_20141023_205020_inLine +BABEL_OP3_402_84611_20141023_205020_outLine +BABEL_OP3_402_84737_20150129_233418_inLine +BABEL_OP3_402_84737_20150129_233418_outLine +BABEL_OP3_402_84815_20141225_185456_inLine +BABEL_OP3_402_84815_20141225_185456_outLine +BABEL_OP3_402_85048_20141204_194855_inLine +BABEL_OP3_402_85048_20141204_194855_outLine +BABEL_OP3_402_85248_20150109_001722_inLine +BABEL_OP3_402_85248_20150109_001722_outLine +BABEL_OP3_402_85340_20141021_182050_inLine +BABEL_OP3_402_85340_20141021_182050_outLine +BABEL_OP3_402_86191_20141105_130254_inLine +BABEL_OP3_402_86191_20141105_130254_outLine +BABEL_OP3_402_86713_20150101_014831_inLine +BABEL_OP3_402_86713_20150101_014831_outLine +BABEL_OP3_402_86722_20141101_204411_inLine +BABEL_OP3_402_86722_20141101_204411_outLine +BABEL_OP3_402_86952_20141105_144737_inLine +BABEL_OP3_402_86952_20141105_144737_outLine +BABEL_OP3_402_87073_20140915_154336_inLine +BABEL_OP3_402_87073_20140915_154336_outLine +BABEL_OP3_402_87179_20150203_020351_inLine +BABEL_OP3_402_87179_20150203_020351_outLine +BABEL_OP3_402_87871_20141224_130949_inLine +BABEL_OP3_402_87871_20141224_130949_outLine +BABEL_OP3_402_88601_20141209_160621_inLine +BABEL_OP3_402_88601_20141209_160621_outLine +BABEL_OP3_402_88776_20140921_133554_inLine +BABEL_OP3_402_88776_20140921_133554_outLine +BABEL_OP3_402_88873_20140930_131622_inLine +BABEL_OP3_402_88873_20140930_131622_outLine +BABEL_OP3_402_89794_20141213_211839_inLine +BABEL_OP3_402_89794_20141213_211839_outLine +BABEL_OP3_402_89877_20150107_013739_inLine +BABEL_OP3_402_89877_20150107_013739_outLine +BABEL_OP3_402_89877_20150107_014426_inLine +BABEL_OP3_402_89877_20150107_014426_outLine +BABEL_OP3_402_90777_20141106_234557_inLine +BABEL_OP3_402_90777_20141106_234557_outLine +BABEL_OP3_402_91884_20150302_183207_inLine +BABEL_OP3_402_91884_20150302_183207_outLine +BABEL_OP3_402_91891_20150108_203636_inLine +BABEL_OP3_402_91891_20150108_203636_outLine +BABEL_OP3_402_91977_20141225_143539_inLine +BABEL_OP3_402_91977_20141225_143539_outLine +BABEL_OP3_402_92356_20150109_005846_inLine +BABEL_OP3_402_92356_20150109_005846_outLine +BABEL_OP3_402_92459_20141102_124516_inLine +BABEL_OP3_402_92459_20141102_124516_outLine +BABEL_OP3_402_92557_20150201_205110_inLine +BABEL_OP3_402_92557_20150201_205110_outLine +BABEL_OP3_402_92698_20141115_182138_inLine +BABEL_OP3_402_92698_20141115_182138_outLine +BABEL_OP3_402_93475_20141119_140615_inLine +BABEL_OP3_402_93475_20141119_140615_outLine +BABEL_OP3_402_93490_20150106_174211_inLine +BABEL_OP3_402_93490_20150106_174211_outLine +BABEL_OP3_402_93604_20150304_152208_inLine +BABEL_OP3_402_93604_20150304_152208_outLine +BABEL_OP3_402_93964_20141216_021155_inLine +BABEL_OP3_402_93964_20141216_021155_outLine +BABEL_OP3_402_94002_20141216_015659_inLine +BABEL_OP3_402_94002_20141216_015659_outLine +BABEL_OP3_402_94166_20150128_151103_inLine +BABEL_OP3_402_94166_20150128_151103_outLine +BABEL_OP3_402_94409_20141214_185032_inLine +BABEL_OP3_402_94409_20141214_185032_outLine +BABEL_OP3_402_94869_20140912_195117_inLine +BABEL_OP3_402_94869_20140912_195117_outLine +BABEL_OP3_402_94923_20141201_154601_inLine +BABEL_OP3_402_94923_20141201_154601_outLine +BABEL_OP3_402_95446_20150110_150658_inLine +BABEL_OP3_402_95446_20150110_150658_outLine +BABEL_OP3_402_96190_20141103_161533_inLine +BABEL_OP3_402_96190_20141103_161533_outLine +BABEL_OP3_402_96205_20141126_152921_inLine +BABEL_OP3_402_96205_20141126_152921_outLine +BABEL_OP3_402_97264_20150131_205411_inLine +BABEL_OP3_402_97264_20150131_205411_outLine +BABEL_OP3_402_97772_20140915_200919_inLine +BABEL_OP3_402_97772_20140915_200919_outLine +BABEL_OP3_402_97896_20141122_161128_inLine +BABEL_OP3_402_97896_20141122_161128_outLine +BABEL_OP3_402_98165_20141106_191239_inLine +BABEL_OP3_402_98165_20141106_191239_outLine +BABEL_OP3_402_98888_20141108_211953_inLine +BABEL_OP3_402_98888_20141108_211953_outLine +BABEL_OP3_402_99202_20141123_162817_inLine +BABEL_OP3_402_99202_20141123_162817_outLine +BABEL_OP3_402_99516_20140917_174712_inLine +BABEL_OP3_402_99516_20140917_174712_outLine +BABEL_OP3_402_99594_20141111_170413_inLine +BABEL_OP3_402_99594_20141111_170413_outLine +BABEL_OP3_402_99887_20150104_230431_inLine +BABEL_OP3_402_99887_20150104_230431_outLine diff --git a/egs/babel/s5d/conf/lists/402-javanese/untranscribed-training.list b/egs/babel/s5d/conf/lists/402-javanese/untranscribed-training.list new file mode 100644 index 00000000000..f37a27dda8a --- /dev/null +++ b/egs/babel/s5d/conf/lists/402-javanese/untranscribed-training.list @@ -0,0 +1,519 @@ +BABEL_OP3_402_10188_20140910_192244_inLine +BABEL_OP3_402_10411_20150414_130427_inLine +BABEL_OP3_402_10411_20150414_130427_outLine +BABEL_OP3_402_11352_20150313_163143_inLine +BABEL_OP3_402_11352_20150313_163143_outLine +BABEL_OP3_402_11797_20141023_002654_inLine +BABEL_OP3_402_11859_20150414_112255_inLine +BABEL_OP3_402_11859_20150414_112255_outLine +BABEL_OP3_402_12846_20150402_131845_inLine +BABEL_OP3_402_12846_20150402_131845_outLine +BABEL_OP3_402_13189_20150131_234926_inLine +BABEL_OP3_402_13189_20150131_234926_outLine +BABEL_OP3_402_13427_20141119_154114_outLine +BABEL_OP3_402_13483_20150212_013903_inLine +BABEL_OP3_402_13561_20141117_221410_inLine +BABEL_OP3_402_13561_20141117_221410_outLine +BABEL_OP3_402_13776_20150415_193538_inLine +BABEL_OP3_402_13776_20150415_193538_outLine +BABEL_OP3_402_13909_20150330_234000_inLine +BABEL_OP3_402_13909_20150330_234000_outLine +BABEL_OP3_402_14537_20150403_143039_inLine +BABEL_OP3_402_14537_20150403_143039_outLine +BABEL_OP3_402_14539_20150313_172051_inLine +BABEL_OP3_402_14539_20150313_172051_outLine +BABEL_OP3_402_15042_20150315_155219_inLine +BABEL_OP3_402_15042_20150315_155219_outLine +BABEL_OP3_402_15926_20141206_172139_inLine +BABEL_OP3_402_15926_20141206_172139_outLine +BABEL_OP3_402_17032_20141227_164236_inLine +BABEL_OP3_402_17032_20141227_164236_outLine +BABEL_OP3_402_17280_20141104_204141_inLine +BABEL_OP3_402_17440_20141226_140345_inLine +BABEL_OP3_402_17440_20141226_140345_outLine +BABEL_OP3_402_17615_20141214_223314_inLine +BABEL_OP3_402_17615_20141214_223314_outLine +BABEL_OP3_402_17615_20141214_231451_inLine +BABEL_OP3_402_17615_20141214_231451_outLine +BABEL_OP3_402_17923_20141107_150744_inLine +BABEL_OP3_402_18291_20150414_140952_inLine +BABEL_OP3_402_18291_20150414_140952_outLine +BABEL_OP3_402_18759_20150310_160837_inLine +BABEL_OP3_402_18759_20150310_160837_outLine +BABEL_OP3_402_19120_20150310_162359_inLine +BABEL_OP3_402_19120_20150310_162359_outLine +BABEL_OP3_402_19589_20150305_151023_inLine +BABEL_OP3_402_19589_20150305_151023_outLine +BABEL_OP3_402_19722_20141017_143042_inLine +BABEL_OP3_402_19722_20141017_143042_outLine +BABEL_OP3_402_19767_20150308_132354_inLine +BABEL_OP3_402_19767_20150308_132354_outLine +BABEL_OP3_402_19767_20150308_133241_inLine +BABEL_OP3_402_19767_20150308_133241_outLine +BABEL_OP3_402_19877_20150316_132035_inLine +BABEL_OP3_402_19877_20150316_132035_outLine +BABEL_OP3_402_19877_20150317_111220_inLine +BABEL_OP3_402_19877_20150317_111220_outLine +BABEL_OP3_402_21029_20141015_150255_inLine +BABEL_OP3_402_21029_20141015_150255_outLine +BABEL_OP3_402_21244_20150415_115154_inLine +BABEL_OP3_402_21244_20150415_115154_outLine +BABEL_OP3_402_22216_20141027_200224_inLine +BABEL_OP3_402_22612_20150106_171601_inLine +BABEL_OP3_402_22612_20150106_171601_outLine +BABEL_OP3_402_22641_20140919_145256_inLine +BABEL_OP3_402_22641_20140919_145256_outLine +BABEL_OP3_402_22826_20150316_130545_inLine +BABEL_OP3_402_22826_20150316_130545_outLine +BABEL_OP3_402_22965_20141011_161936_inLine +BABEL_OP3_402_23006_20141031_183901_inLine +BABEL_OP3_402_23006_20141031_185939_inLine +BABEL_OP3_402_23190_20141124_020320_inLine +BABEL_OP3_402_23980_20141115_204429_inLine +BABEL_OP3_402_23980_20141115_204429_outLine +BABEL_OP3_402_24231_20150302_210042_inLine +BABEL_OP3_402_24231_20150302_210042_outLine +BABEL_OP3_402_24569_20141226_184242_inLine +BABEL_OP3_402_24569_20141226_184242_outLine +BABEL_OP3_402_24586_20150315_000448_inLine +BABEL_OP3_402_24586_20150315_000448_outLine +BABEL_OP3_402_25961_20140916_191139_inLine +BABEL_OP3_402_25961_20140916_191139_outLine +BABEL_OP3_402_26574_20150105_134517_inLine +BABEL_OP3_402_26836_20141007_171841_inLine +BABEL_OP3_402_26836_20141007_171841_outLine +BABEL_OP3_402_27082_20141116_214244_inLine +BABEL_OP3_402_27082_20141116_214244_outLine +BABEL_OP3_402_27203_20150131_212241_inLine +BABEL_OP3_402_27478_20150310_153450_inLine +BABEL_OP3_402_27478_20150310_153450_outLine +BABEL_OP3_402_27478_20150310_154447_inLine +BABEL_OP3_402_27478_20150310_154447_outLine +BABEL_OP3_402_27490_20150312_182925_inLine +BABEL_OP3_402_27490_20150312_182925_outLine +BABEL_OP3_402_28422_20141206_152718_inLine +BABEL_OP3_402_28422_20141206_152718_outLine +BABEL_OP3_402_28477_20150108_171338_inLine +BABEL_OP3_402_28477_20150108_171338_outLine +BABEL_OP3_402_28522_20141208_162113_inLine +BABEL_OP3_402_28522_20141208_162113_outLine +BABEL_OP3_402_28585_20150103_145903_inLine +BABEL_OP3_402_28585_20150103_145903_outLine +BABEL_OP3_402_28775_20141028_172233_inLine +BABEL_OP3_402_28775_20141028_172233_outLine +BABEL_OP3_402_29039_20141226_192341_inLine +BABEL_OP3_402_29039_20141226_192341_outLine +BABEL_OP3_402_29352_20150416_231130_inLine +BABEL_OP3_402_29352_20150416_231130_outLine +BABEL_OP3_402_29416_20150111_173045_inLine +BABEL_OP3_402_29416_20150111_173045_outLine +BABEL_OP3_402_29439_20150309_224912_inLine +BABEL_OP3_402_29439_20150309_224912_outLine +BABEL_OP3_402_30058_20150214_030757_inLine +BABEL_OP3_402_30058_20150214_030757_outLine +BABEL_OP3_402_30084_20150330_183021_inLine +BABEL_OP3_402_30084_20150330_183021_outLine +BABEL_OP3_402_30180_20141122_150213_inLine +BABEL_OP3_402_30497_20150315_020936_inLine +BABEL_OP3_402_30497_20150315_020936_outLine +BABEL_OP3_402_31484_20150108_183810_inLine +BABEL_OP3_402_31484_20150108_183810_outLine +BABEL_OP3_402_32122_20141113_151807_inLine +BABEL_OP3_402_32122_20141113_151807_outLine +BABEL_OP3_402_32861_20150131_115104_inLine +BABEL_OP3_402_32861_20150131_115104_outLine +BABEL_OP3_402_33216_20150417_123910_inLine +BABEL_OP3_402_33216_20150417_123910_outLine +BABEL_OP3_402_33424_20150330_235558_inLine +BABEL_OP3_402_33424_20150330_235558_outLine +BABEL_OP3_402_33424_20150331_001041_inLine +BABEL_OP3_402_33424_20150331_001041_outLine +BABEL_OP3_402_33476_20141115_210108_inLine +BABEL_OP3_402_33476_20141115_210108_outLine +BABEL_OP3_402_34486_20150412_192213_inLine +BABEL_OP3_402_34486_20150412_192213_outLine +BABEL_OP3_402_34826_20150112_001042_inLine +BABEL_OP3_402_34826_20150112_001042_outLine +BABEL_OP3_402_35467_20141028_184845_inLine +BABEL_OP3_402_35467_20141028_184845_outLine +BABEL_OP3_402_35885_20150323_112854_inLine +BABEL_OP3_402_35885_20150323_112854_outLine +BABEL_OP3_402_35885_20150323_114745_inLine +BABEL_OP3_402_35885_20150323_114745_outLine +BABEL_OP3_402_36017_20150314_194053_inLine +BABEL_OP3_402_36017_20150314_194053_outLine +BABEL_OP3_402_36039_20150216_122319_inLine +BABEL_OP3_402_36039_20150216_122319_outLine +BABEL_OP3_402_36059_20150314_212955_inLine +BABEL_OP3_402_36059_20150314_212955_outLine +BABEL_OP3_402_36594_20150215_003738_inLine +BABEL_OP3_402_36594_20150215_003738_outLine +BABEL_OP3_402_36642_20150319_114619_inLine +BABEL_OP3_402_36642_20150319_114619_outLine +BABEL_OP3_402_38689_20141217_212559_inLine +BABEL_OP3_402_38689_20141217_212559_outLine +BABEL_OP3_402_38979_20150215_215359_inLine +BABEL_OP3_402_38979_20150215_215359_outLine +BABEL_OP3_402_41174_20141115_214537_inLine +BABEL_OP3_402_41174_20141115_214537_outLine +BABEL_OP3_402_41272_20150314_043848_inLine +BABEL_OP3_402_41272_20150314_043848_outLine +BABEL_OP3_402_41469_20140919_165056_inLine +BABEL_OP3_402_41469_20140919_165056_outLine +BABEL_OP3_402_41542_20150131_173858_inLine +BABEL_OP3_402_41890_20150304_001538_inLine +BABEL_OP3_402_41890_20150304_001538_outLine +BABEL_OP3_402_42155_20141204_014246_inLine +BABEL_OP3_402_42155_20141204_014246_outLine +BABEL_OP3_402_42231_20150108_013906_inLine +BABEL_OP3_402_42231_20150108_013906_outLine +BABEL_OP3_402_42299_20150415_172418_inLine +BABEL_OP3_402_42299_20150415_172418_outLine +BABEL_OP3_402_42497_20141011_142043_inLine +BABEL_OP3_402_42771_20141125_154524_inLine +BABEL_OP3_402_42771_20141125_154524_outLine +BABEL_OP3_402_43285_20141208_172008_inLine +BABEL_OP3_402_43285_20141208_172008_outLine +BABEL_OP3_402_43388_20141124_193332_inLine +BABEL_OP3_402_43388_20141124_193332_outLine +BABEL_OP3_402_44114_20150416_181819_inLine +BABEL_OP3_402_44114_20150416_181819_outLine +BABEL_OP3_402_44114_20150416_183630_inLine +BABEL_OP3_402_44114_20150416_183630_outLine +BABEL_OP3_402_45140_20150314_190952_inLine +BABEL_OP3_402_45140_20150314_190952_outLine +BABEL_OP3_402_45235_20150201_004752_inLine +BABEL_OP3_402_45235_20150201_004752_outLine +BABEL_OP3_402_45771_20150412_195546_inLine +BABEL_OP3_402_45771_20150412_195546_outLine +BABEL_OP3_402_45777_20141106_211401_inLine +BABEL_OP3_402_45777_20141106_211401_outLine +BABEL_OP3_402_45851_20150315_161428_inLine +BABEL_OP3_402_45851_20150315_161428_outLine +BABEL_OP3_402_45851_20150315_162642_inLine +BABEL_OP3_402_45851_20150315_162642_outLine +BABEL_OP3_402_46066_20150103_140632_inLine +BABEL_OP3_402_46066_20150103_140632_outLine +BABEL_OP3_402_46169_20141229_163719_inLine +BABEL_OP3_402_46169_20141229_163719_outLine +BABEL_OP3_402_46315_20141229_191221_inLine +BABEL_OP3_402_46315_20141229_191221_outLine +BABEL_OP3_402_46330_20150112_002124_inLine +BABEL_OP3_402_46330_20150112_002124_outLine +BABEL_OP3_402_46589_20141217_181108_inLine +BABEL_OP3_402_46589_20141217_181108_outLine +BABEL_OP3_402_47215_20141013_000842_inLine +BABEL_OP3_402_47487_20141110_190705_inLine +BABEL_OP3_402_47487_20141110_190705_outLine +BABEL_OP3_402_47802_20141201_001110_inLine +BABEL_OP3_402_47802_20141201_001110_outLine +BABEL_OP3_402_47878_20141124_200607_inLine +BABEL_OP3_402_47878_20141124_200607_outLine +BABEL_OP3_402_48016_20150417_192509_inLine +BABEL_OP3_402_48016_20150417_192509_outLine +BABEL_OP3_402_48758_20150313_180048_inLine +BABEL_OP3_402_48758_20150313_180048_outLine +BABEL_OP3_402_48907_20150308_125109_inLine +BABEL_OP3_402_48907_20150308_125109_outLine +BABEL_OP3_402_49216_20141102_152914_inLine +BABEL_OP3_402_49216_20141102_152914_outLine +BABEL_OP3_402_49767_20150416_181833_inLine +BABEL_OP3_402_49767_20150416_181833_outLine +BABEL_OP3_402_49945_20150326_155920_inLine +BABEL_OP3_402_49945_20150326_155920_outLine +BABEL_OP3_402_50745_20150314_184529_inLine +BABEL_OP3_402_50745_20150314_184529_outLine +BABEL_OP3_402_51417_20150106_233844_outLine +BABEL_OP3_402_51819_20150108_181637_inLine +BABEL_OP3_402_51819_20150108_181637_outLine +BABEL_OP3_402_52381_20150117_151106_inLine +BABEL_OP3_402_52447_20150331_155658_inLine +BABEL_OP3_402_52447_20150331_155658_outLine +BABEL_OP3_402_52614_20150305_202702_inLine +BABEL_OP3_402_52614_20150305_202702_outLine +BABEL_OP3_402_53665_20150305_203655_inLine +BABEL_OP3_402_53665_20150305_203655_outLine +BABEL_OP3_402_54390_20141022_220633_inLine +BABEL_OP3_402_54477_20141224_161244_inLine +BABEL_OP3_402_54567_20141116_185111_inLine +BABEL_OP3_402_54567_20141116_185111_outLine +BABEL_OP3_402_54827_20150316_003319_inLine +BABEL_OP3_402_54827_20150316_003319_outLine +BABEL_OP3_402_54827_20150316_134423_inLine +BABEL_OP3_402_54827_20150316_134423_outLine +BABEL_OP3_402_55013_20150305_194735_inLine +BABEL_OP3_402_55013_20150305_194735_outLine +BABEL_OP3_402_55106_20150203_162853_inLine +BABEL_OP3_402_55106_20150203_162853_outLine +BABEL_OP3_402_55259_20141105_180934_inLine +BABEL_OP3_402_55259_20141105_180934_outLine +BABEL_OP3_402_55349_20150310_153012_inLine +BABEL_OP3_402_55349_20150310_153012_outLine +BABEL_OP3_402_55381_20150204_201519_inLine +BABEL_OP3_402_55381_20150204_203110_inLine +BABEL_OP3_402_56076_20150306_190854_inLine +BABEL_OP3_402_56076_20150306_190854_outLine +BABEL_OP3_402_56307_20141206_163118_inLine +BABEL_OP3_402_56307_20141206_163118_outLine +BABEL_OP3_402_56523_20141119_203619_inLine +BABEL_OP3_402_56523_20141119_203619_outLine +BABEL_OP3_402_57067_20150103_190024_inLine +BABEL_OP3_402_57067_20150103_190024_outLine +BABEL_OP3_402_57464_20150314_153140_inLine +BABEL_OP3_402_57464_20150314_153140_outLine +BABEL_OP3_402_57548_20141121_144924_inLine +BABEL_OP3_402_57548_20141121_144924_outLine +BABEL_OP3_402_57566_20150215_212628_inLine +BABEL_OP3_402_57566_20150215_212628_outLine +BABEL_OP3_402_58047_20141120_201112_inLine +BABEL_OP3_402_58047_20141120_201112_outLine +BABEL_OP3_402_58107_20141228_011533_inLine +BABEL_OP3_402_58107_20141228_014953_inLine +BABEL_OP3_402_58145_20141207_150852_inLine +BABEL_OP3_402_58145_20141207_150852_outLine +BABEL_OP3_402_58585_20150131_223219_inLine +BABEL_OP3_402_58585_20150131_223219_outLine +BABEL_OP3_402_58717_20150204_003429_inLine +BABEL_OP3_402_58717_20150204_003429_outLine +BABEL_OP3_402_58815_20150106_203552_inLine +BABEL_OP3_402_58815_20150106_203552_outLine +BABEL_OP3_402_58821_20150112_000647_inLine +BABEL_OP3_402_58821_20150112_000647_outLine +BABEL_OP3_402_59028_20150331_004006_inLine +BABEL_OP3_402_59028_20150331_004006_outLine +BABEL_OP3_402_59291_20150114_175706_inLine +BABEL_OP3_402_59291_20150114_175706_outLine +BABEL_OP3_402_59635_20150101_154832_inLine +BABEL_OP3_402_59635_20150101_154832_outLine +BABEL_OP3_402_59898_20141103_202730_outLine +BABEL_OP3_402_60115_20141206_190510_inLine +BABEL_OP3_402_60115_20141206_190510_outLine +BABEL_OP3_402_60299_20150413_191144_inLine +BABEL_OP3_402_60299_20150413_191144_outLine +BABEL_OP3_402_60310_20141224_122329_inLine +BABEL_OP3_402_60310_20141224_122329_outLine +BABEL_OP3_402_60436_20150305_163917_inLine +BABEL_OP3_402_60436_20150305_163917_outLine +BABEL_OP3_402_60498_20150402_122035_inLine +BABEL_OP3_402_60498_20150402_122035_outLine +BABEL_OP3_402_61348_20141112_165406_outLine +BABEL_OP3_402_61348_20141116_174305_outLine +BABEL_OP3_402_61348_20141116_175022_outLine +BABEL_OP3_402_61678_20141126_211128_inLine +BABEL_OP3_402_61963_20150201_203302_inLine +BABEL_OP3_402_61963_20150201_203302_outLine +BABEL_OP3_402_61971_20150311_025217_inLine +BABEL_OP3_402_61971_20150311_025217_outLine +BABEL_OP3_402_61971_20150311_032439_inLine +BABEL_OP3_402_61971_20150311_032439_outLine +BABEL_OP3_402_62362_20150415_200843_inLine +BABEL_OP3_402_62362_20150415_200843_outLine +BABEL_OP3_402_62724_20141225_182011_inLine +BABEL_OP3_402_62724_20141225_182011_outLine +BABEL_OP3_402_62734_20141110_172820_inLine +BABEL_OP3_402_62734_20141110_172820_outLine +BABEL_OP3_402_63307_20141122_114633_outLine +BABEL_OP3_402_63445_20140925_160334_inLine +BABEL_OP3_402_63445_20140925_160334_outLine +BABEL_OP3_402_63523_20150409_150241_inLine +BABEL_OP3_402_63523_20150409_150241_outLine +BABEL_OP3_402_63648_20150317_142220_inLine +BABEL_OP3_402_63648_20150317_142220_outLine +BABEL_OP3_402_63648_20150317_143418_inLine +BABEL_OP3_402_63648_20150317_143418_outLine +BABEL_OP3_402_63757_20141127_190053_inLine +BABEL_OP3_402_63938_20150216_054808_inLine +BABEL_OP3_402_63938_20150216_054808_outLine +BABEL_OP3_402_63999_20150329_140522_inLine +BABEL_OP3_402_63999_20150329_140522_outLine +BABEL_OP3_402_63999_20150329_144023_inLine +BABEL_OP3_402_63999_20150329_144023_outLine +BABEL_OP3_402_64722_20150409_172232_inLine +BABEL_OP3_402_64722_20150409_172232_outLine +BABEL_OP3_402_65077_20140915_211109_inLine +BABEL_OP3_402_65561_20150108_014921_inLine +BABEL_OP3_402_65561_20150108_014921_outLine +BABEL_OP3_402_65640_20150313_234015_inLine +BABEL_OP3_402_65640_20150313_234015_outLine +BABEL_OP3_402_66305_20150314_195357_inLine +BABEL_OP3_402_66305_20150314_195357_outLine +BABEL_OP3_402_66971_20150327_134302_inLine +BABEL_OP3_402_66971_20150327_134302_outLine +BABEL_OP3_402_67085_20150307_155234_inLine +BABEL_OP3_402_67085_20150307_155234_outLine +BABEL_OP3_402_67622_20141001_173720_inLine +BABEL_OP3_402_67622_20141001_173720_outLine +BABEL_OP3_402_67659_20141102_162850_inLine +BABEL_OP3_402_67659_20141102_162850_outLine +BABEL_OP3_402_67964_20150313_144207_inLine +BABEL_OP3_402_67964_20150313_144207_outLine +BABEL_OP3_402_67999_20150103_202040_outLine +BABEL_OP3_402_68748_20150131_213425_inLine +BABEL_OP3_402_68748_20150131_213425_outLine +BABEL_OP3_402_68924_20141213_175705_inLine +BABEL_OP3_402_68924_20141213_175705_outLine +BABEL_OP3_402_69633_20150103_195020_inLine +BABEL_OP3_402_69633_20150103_195020_outLine +BABEL_OP3_402_70110_20140927_010427_inLine +BABEL_OP3_402_70110_20140927_010427_outLine +BABEL_OP3_402_71038_20150203_000908_inLine +BABEL_OP3_402_71038_20150203_000908_outLine +BABEL_OP3_402_71189_20150411_001925_inLine +BABEL_OP3_402_71189_20150411_001925_outLine +BABEL_OP3_402_71333_20141103_014203_inLine +BABEL_OP3_402_71333_20141103_014203_outLine +BABEL_OP3_402_71780_20141019_173617_inLine +BABEL_OP3_402_71780_20141019_173617_outLine +BABEL_OP3_402_71850_20150415_111357_inLine +BABEL_OP3_402_71850_20150415_111357_outLine +BABEL_OP3_402_73005_20150307_164753_inLine +BABEL_OP3_402_73005_20150307_164753_outLine +BABEL_OP3_402_73430_20150208_181645_inLine +BABEL_OP3_402_73430_20150208_181645_outLine +BABEL_OP3_402_73446_20150412_180706_inLine +BABEL_OP3_402_73446_20150412_180706_outLine +BABEL_OP3_402_73518_20150103_211617_inLine +BABEL_OP3_402_73518_20150103_211617_outLine +BABEL_OP3_402_74641_20141124_185314_inLine +BABEL_OP3_402_74641_20141124_185314_outLine +BABEL_OP3_402_75981_20150327_130110_inLine +BABEL_OP3_402_75981_20150327_130110_outLine +BABEL_OP3_402_76218_20141110_195047_inLine +BABEL_OP3_402_76218_20141110_195047_outLine +BABEL_OP3_402_77744_20141105_195905_inLine +BABEL_OP3_402_77744_20141105_195905_outLine +BABEL_OP3_402_77974_20150305_190614_inLine +BABEL_OP3_402_78016_20141104_182140_outLine +BABEL_OP3_402_78016_20141104_194136_outLine +BABEL_OP3_402_78511_20141228_004040_inLine +BABEL_OP3_402_78511_20141228_010153_inLine +BABEL_OP3_402_78976_20141030_185556_inLine +BABEL_OP3_402_78976_20141030_185556_outLine +BABEL_OP3_402_79045_20150114_155912_inLine +BABEL_OP3_402_79045_20150114_155912_outLine +BABEL_OP3_402_79080_20150110_190015_inLine +BABEL_OP3_402_79080_20150110_190015_outLine +BABEL_OP3_402_79129_20141130_180012_outLine +BABEL_OP3_402_79660_20150407_192210_inLine +BABEL_OP3_402_79660_20150407_192210_outLine +BABEL_OP3_402_80655_20150314_181243_inLine +BABEL_OP3_402_80655_20150314_181243_outLine +BABEL_OP3_402_81149_20150314_060925_inLine +BABEL_OP3_402_81149_20150314_060925_outLine +BABEL_OP3_402_81287_20141217_002122_inLine +BABEL_OP3_402_81287_20141217_002122_outLine +BABEL_OP3_402_81671_20141230_233802_inLine +BABEL_OP3_402_81671_20141230_233802_outLine +BABEL_OP3_402_82035_20141221_140850_inLine +BABEL_OP3_402_82035_20141221_140850_outLine +BABEL_OP3_402_82391_20150105_161651_inLine +BABEL_OP3_402_82391_20150105_161651_outLine +BABEL_OP3_402_82473_20141013_175410_inLine +BABEL_OP3_402_82473_20141013_175410_outLine +BABEL_OP3_402_82622_20141104_150303_inLine +BABEL_OP3_402_82742_20141224_153706_inLine +BABEL_OP3_402_82742_20141224_153706_outLine +BABEL_OP3_402_82904_20150314_144007_inLine +BABEL_OP3_402_82904_20150314_144007_outLine +BABEL_OP3_402_82979_20141007_015257_inLine +BABEL_OP3_402_82979_20141007_015257_outLine +BABEL_OP3_402_83394_20150413_180513_inLine +BABEL_OP3_402_83394_20150413_180513_outLine +BABEL_OP3_402_83455_20141222_014307_inLine +BABEL_OP3_402_83455_20141222_014307_outLine +BABEL_OP3_402_83545_20150306_214611_inLine +BABEL_OP3_402_83545_20150306_214611_outLine +BABEL_OP3_402_83851_20141101_203855_outLine +BABEL_OP3_402_84339_20150309_193354_inLine +BABEL_OP3_402_84339_20150309_193354_outLine +BABEL_OP3_402_84466_20150311_162506_inLine +BABEL_OP3_402_84466_20150311_162506_outLine +BABEL_OP3_402_84466_20150311_164841_inLine +BABEL_OP3_402_84466_20150311_164841_outLine +BABEL_OP3_402_84547_20141031_011002_inLine +BABEL_OP3_402_84547_20141031_011002_outLine +BABEL_OP3_402_85028_20150203_222949_inLine +BABEL_OP3_402_85028_20150203_222949_outLine +BABEL_OP3_402_85519_20150205_150346_inLine +BABEL_OP3_402_85519_20150205_150346_outLine +BABEL_OP3_402_85647_20141202_010613_inLine +BABEL_OP3_402_85647_20141202_010613_outLine +BABEL_OP3_402_85651_20150111_195212_inLine +BABEL_OP3_402_85651_20150111_195212_outLine +BABEL_OP3_402_86557_20140919_134613_inLine +BABEL_OP3_402_86557_20140919_134613_outLine +BABEL_OP3_402_86597_20150415_233059_inLine +BABEL_OP3_402_86597_20150415_233059_outLine +BABEL_OP3_402_86628_20150406_142110_inLine +BABEL_OP3_402_86628_20150406_142110_outLine +BABEL_OP3_402_86676_20141208_175123_inLine +BABEL_OP3_402_86676_20141208_175123_outLine +BABEL_OP3_402_86826_20150414_193901_inLine +BABEL_OP3_402_86826_20150414_193901_outLine +BABEL_OP3_402_86830_20150131_191140_inLine +BABEL_OP3_402_87305_20150415_152636_outLine +BABEL_OP3_402_87545_20150308_173713_inLine +BABEL_OP3_402_87545_20150308_173713_outLine +BABEL_OP3_402_87693_20141105_154104_inLine +BABEL_OP3_402_87693_20141105_154104_outLine +BABEL_OP3_402_88661_20141206_194640_inLine +BABEL_OP3_402_88661_20141206_194640_outLine +BABEL_OP3_402_88661_20141206_195854_inLine +BABEL_OP3_402_88661_20141206_195854_outLine +BABEL_OP3_402_88661_20141206_200827_inLine +BABEL_OP3_402_88661_20141206_200827_outLine +BABEL_OP3_402_88783_20141228_150438_inLine +BABEL_OP3_402_88783_20141228_150438_outLine +BABEL_OP3_402_88865_20150316_142749_inLine +BABEL_OP3_402_88865_20150316_142749_outLine +BABEL_OP3_402_88938_20150104_171742_inLine +BABEL_OP3_402_88938_20150104_171742_outLine +BABEL_OP3_402_89560_20150106_231355_inLine +BABEL_OP3_402_89560_20150106_231355_outLine +BABEL_OP3_402_89695_20141115_012527_outLine +BABEL_OP3_402_91372_20150306_193038_inLine +BABEL_OP3_402_91372_20150306_193038_outLine +BABEL_OP3_402_92077_20150313_145153_inLine +BABEL_OP3_402_92077_20150313_145153_outLine +BABEL_OP3_402_92736_20150106_183108_inLine +BABEL_OP3_402_92736_20150106_183108_outLine +BABEL_OP3_402_92809_20140924_164438_inLine +BABEL_OP3_402_93469_20150308_223956_inLine +BABEL_OP3_402_93469_20150308_223956_outLine +BABEL_OP3_402_93515_20150318_184223_inLine +BABEL_OP3_402_93515_20150318_184223_outLine +BABEL_OP3_402_93861_20141126_021459_outLine +BABEL_OP3_402_93861_20141202_013129_outLine +BABEL_OP3_402_94141_20150312_184456_inLine +BABEL_OP3_402_94141_20150312_184456_outLine +BABEL_OP3_402_94237_20150319_141146_inLine +BABEL_OP3_402_94237_20150319_141146_outLine +BABEL_OP3_402_94262_20150308_140603_inLine +BABEL_OP3_402_94262_20150308_140603_outLine +BABEL_OP3_402_94442_20150326_164734_inLine +BABEL_OP3_402_94442_20150326_164734_outLine +BABEL_OP3_402_94449_20150315_122812_inLine +BABEL_OP3_402_94449_20150315_122812_outLine +BABEL_OP3_402_94465_20141227_155756_inLine +BABEL_OP3_402_94465_20141227_155756_outLine +BABEL_OP3_402_94487_20150312_163837_inLine +BABEL_OP3_402_94487_20150312_163837_outLine +BABEL_OP3_402_94587_20150128_234118_inLine +BABEL_OP3_402_94587_20150128_234118_outLine +BABEL_OP3_402_94745_20141214_225333_inLine +BABEL_OP3_402_94745_20141214_225333_outLine +BABEL_OP3_402_95935_20150106_123341_inLine +BABEL_OP3_402_95935_20150106_123341_outLine +BABEL_OP3_402_95966_20141110_203915_inLine +BABEL_OP3_402_95966_20141110_203915_outLine +BABEL_OP3_402_96446_20141106_013329_inLine +BABEL_OP3_402_96446_20141106_013329_outLine +BABEL_OP3_402_96525_20150102_120919_inLine +BABEL_OP3_402_96842_20150327_193159_inLine +BABEL_OP3_402_96842_20150327_193159_outLine +BABEL_OP3_402_97363_20140929_125711_outLine +BABEL_OP3_402_97731_20150102_215016_outLine +BABEL_OP3_402_98365_20141120_164222_inLine +BABEL_OP3_402_98506_20150314_191311_inLine +BABEL_OP3_402_98506_20150314_191311_outLine +BABEL_OP3_402_99732_20141224_145056_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/dev.2h.list b/egs/babel/s5d/conf/lists/403-dholuo/dev.2h.list new file mode 100644 index 00000000000..195f3e16bf3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/dev.2h.list @@ -0,0 +1,122 @@ +BABEL_OP3_403_10019_20141027_010545_inLine +BABEL_OP3_403_10019_20141027_010545_outLine +BABEL_OP3_403_12220_20141026_204025_inLine +BABEL_OP3_403_12220_20141026_204025_outLine +BABEL_OP3_403_13178_20141128_223039_inLine +BABEL_OP3_403_13178_20141128_223039_outLine +BABEL_OP3_403_14440_20141129_004855_inLine +BABEL_OP3_403_14440_20141129_004855_outLine +BABEL_OP3_403_15042_20150313_165638_inLine +BABEL_OP3_403_15042_20150313_165638_outLine +BABEL_OP3_403_17440_20141210_204026_inLine +BABEL_OP3_403_17440_20141210_204026_outLine +BABEL_OP3_403_17440_20141210_204535_inLine +BABEL_OP3_403_17440_20141210_204535_outLine +BABEL_OP3_403_19663_20141029_190739_inLine +BABEL_OP3_403_19663_20141029_190739_outLine +BABEL_OP3_403_19782_20141216_211916_inLine +BABEL_OP3_403_19782_20141216_211916_outLine +BABEL_OP3_403_22216_20141014_202442_inLine +BABEL_OP3_403_22216_20141014_202442_outLine +BABEL_OP3_403_23151_20150108_032700_inLine +BABEL_OP3_403_23151_20150108_032700_outLine +BABEL_OP3_403_25012_20150201_000040_inLine +BABEL_OP3_403_25012_20150201_000040_outLine +BABEL_OP3_403_28606_20141205_184257_inLine +BABEL_OP3_403_28606_20141205_184257_outLine +BABEL_OP3_403_32727_20141210_200505_inLine +BABEL_OP3_403_32727_20141210_200505_outLine +BABEL_OP3_403_33175_20141014_202944_inLine +BABEL_OP3_403_33175_20141014_202944_outLine +BABEL_OP3_403_33251_20141118_224420_inLine +BABEL_OP3_403_33251_20141118_224420_outLine +BABEL_OP3_403_34564_20141212_001647_inLine +BABEL_OP3_403_34564_20141212_001647_outLine +BABEL_OP3_403_36341_20141013_224204_inLine +BABEL_OP3_403_36341_20141013_224204_outLine +BABEL_OP3_403_41100_20141006_230147_inLine +BABEL_OP3_403_41100_20141006_230147_outLine +BABEL_OP3_403_42243_20141016_231219_inLine +BABEL_OP3_403_42243_20141016_231219_outLine +BABEL_OP3_403_42497_20141004_235231_inLine +BABEL_OP3_403_42497_20141004_235231_outLine +BABEL_OP3_403_43388_20141028_212938_inLine +BABEL_OP3_403_43388_20141028_212938_outLine +BABEL_OP3_403_44847_20141127_190752_inLine +BABEL_OP3_403_44847_20141127_190752_outLine +BABEL_OP3_403_45560_20141012_204242_inLine +BABEL_OP3_403_45560_20141012_204242_outLine +BABEL_OP3_403_45697_20150211_181356_inLine +BABEL_OP3_403_45697_20150211_181356_outLine +BABEL_OP3_403_46881_20141014_210231_inLine +BABEL_OP3_403_46881_20141014_210231_outLine +BABEL_OP3_403_47877_20150105_200005_inLine +BABEL_OP3_403_47877_20150105_200005_outLine +BABEL_OP3_403_47882_20150131_215134_inLine +BABEL_OP3_403_47882_20150131_215134_outLine +BABEL_OP3_403_48789_20141031_205407_inLine +BABEL_OP3_403_48789_20141031_205407_outLine +BABEL_OP3_403_49502_20141013_230428_inLine +BABEL_OP3_403_49502_20141013_230428_outLine +BABEL_OP3_403_49902_20141025_214609_inLine +BABEL_OP3_403_49902_20141025_214609_outLine +BABEL_OP3_403_50726_20141015_222945_inLine +BABEL_OP3_403_50726_20141015_222945_outLine +BABEL_OP3_403_52438_20141005_211825_inLine +BABEL_OP3_403_52438_20141005_211825_outLine +BABEL_OP3_403_54160_20141012_225050_inLine +BABEL_OP3_403_54160_20141012_225050_outLine +BABEL_OP3_403_56090_20141001_220534_inLine +BABEL_OP3_403_56090_20141001_220534_outLine +BABEL_OP3_403_58850_20141030_190407_inLine +BABEL_OP3_403_58850_20141030_190407_outLine +BABEL_OP3_403_60538_20141007_015704_inLine +BABEL_OP3_403_60538_20141007_015704_outLine +BABEL_OP3_403_60706_20141014_225721_inLine +BABEL_OP3_403_60706_20141014_225721_outLine +BABEL_OP3_403_61225_20141014_225524_inLine +BABEL_OP3_403_61225_20141014_225524_outLine +BABEL_OP3_403_62456_20141107_224816_inLine +BABEL_OP3_403_62456_20141107_224816_outLine +BABEL_OP3_403_62545_20150203_205015_inLine +BABEL_OP3_403_62545_20150203_205015_outLine +BABEL_OP3_403_63081_20141013_184721_inLine +BABEL_OP3_403_63081_20141013_184721_outLine +BABEL_OP3_403_63938_20150304_184136_inLine +BABEL_OP3_403_63938_20150304_184136_outLine +BABEL_OP3_403_65723_20141004_231950_inLine +BABEL_OP3_403_65723_20141004_231950_outLine +BABEL_OP3_403_65882_20141005_214649_inLine +BABEL_OP3_403_65882_20141005_214649_outLine +BABEL_OP3_403_66026_20141207_212517_inLine +BABEL_OP3_403_66026_20141207_212517_outLine +BABEL_OP3_403_68306_20141206_183801_inLine +BABEL_OP3_403_68306_20141206_183801_outLine +BABEL_OP3_403_70110_20141016_195210_inLine +BABEL_OP3_403_70110_20141016_195210_outLine +BABEL_OP3_403_71780_20141006_005652_inLine +BABEL_OP3_403_71780_20141006_005652_outLine +BABEL_OP3_403_72349_20150313_194307_inLine +BABEL_OP3_403_72349_20150313_194307_outLine +BABEL_OP3_403_78877_20150203_012549_inLine +BABEL_OP3_403_78877_20150203_012549_outLine +BABEL_OP3_403_79820_20141005_212016_inLine +BABEL_OP3_403_79820_20141005_212016_outLine +BABEL_OP3_403_87280_20141217_230121_inLine +BABEL_OP3_403_87280_20141217_230121_outLine +BABEL_OP3_403_88938_20141219_211017_inLine +BABEL_OP3_403_88938_20141219_211017_outLine +BABEL_OP3_403_90777_20141028_012959_inLine +BABEL_OP3_403_90777_20141028_012959_outLine +BABEL_OP3_403_92356_20150305_033040_inLine +BABEL_OP3_403_92356_20150305_033040_outLine +BABEL_OP3_403_94035_20150201_183321_inLine +BABEL_OP3_403_94035_20150201_183321_outLine +BABEL_OP3_403_96446_20141013_215249_inLine +BABEL_OP3_403_96446_20141013_215249_outLine +BABEL_OP3_403_97264_20141220_220653_inLine +BABEL_OP3_403_97264_20141220_220653_outLine +BABEL_OP3_403_97849_20150313_175528_inLine +BABEL_OP3_403_97849_20150313_175528_outLine +BABEL_OP3_403_99813_20141106_211637_inLine +BABEL_OP3_403_99813_20141106_211637_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/dev.list b/egs/babel/s5d/conf/lists/403-dholuo/dev.list new file mode 100644 index 00000000000..195f3e16bf3 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/dev.list @@ -0,0 +1,122 @@ +BABEL_OP3_403_10019_20141027_010545_inLine +BABEL_OP3_403_10019_20141027_010545_outLine +BABEL_OP3_403_12220_20141026_204025_inLine +BABEL_OP3_403_12220_20141026_204025_outLine +BABEL_OP3_403_13178_20141128_223039_inLine +BABEL_OP3_403_13178_20141128_223039_outLine +BABEL_OP3_403_14440_20141129_004855_inLine +BABEL_OP3_403_14440_20141129_004855_outLine +BABEL_OP3_403_15042_20150313_165638_inLine +BABEL_OP3_403_15042_20150313_165638_outLine +BABEL_OP3_403_17440_20141210_204026_inLine +BABEL_OP3_403_17440_20141210_204026_outLine +BABEL_OP3_403_17440_20141210_204535_inLine +BABEL_OP3_403_17440_20141210_204535_outLine +BABEL_OP3_403_19663_20141029_190739_inLine +BABEL_OP3_403_19663_20141029_190739_outLine +BABEL_OP3_403_19782_20141216_211916_inLine +BABEL_OP3_403_19782_20141216_211916_outLine +BABEL_OP3_403_22216_20141014_202442_inLine +BABEL_OP3_403_22216_20141014_202442_outLine +BABEL_OP3_403_23151_20150108_032700_inLine +BABEL_OP3_403_23151_20150108_032700_outLine +BABEL_OP3_403_25012_20150201_000040_inLine +BABEL_OP3_403_25012_20150201_000040_outLine +BABEL_OP3_403_28606_20141205_184257_inLine +BABEL_OP3_403_28606_20141205_184257_outLine +BABEL_OP3_403_32727_20141210_200505_inLine +BABEL_OP3_403_32727_20141210_200505_outLine +BABEL_OP3_403_33175_20141014_202944_inLine +BABEL_OP3_403_33175_20141014_202944_outLine +BABEL_OP3_403_33251_20141118_224420_inLine +BABEL_OP3_403_33251_20141118_224420_outLine +BABEL_OP3_403_34564_20141212_001647_inLine +BABEL_OP3_403_34564_20141212_001647_outLine +BABEL_OP3_403_36341_20141013_224204_inLine +BABEL_OP3_403_36341_20141013_224204_outLine +BABEL_OP3_403_41100_20141006_230147_inLine +BABEL_OP3_403_41100_20141006_230147_outLine +BABEL_OP3_403_42243_20141016_231219_inLine +BABEL_OP3_403_42243_20141016_231219_outLine +BABEL_OP3_403_42497_20141004_235231_inLine +BABEL_OP3_403_42497_20141004_235231_outLine +BABEL_OP3_403_43388_20141028_212938_inLine +BABEL_OP3_403_43388_20141028_212938_outLine +BABEL_OP3_403_44847_20141127_190752_inLine +BABEL_OP3_403_44847_20141127_190752_outLine +BABEL_OP3_403_45560_20141012_204242_inLine +BABEL_OP3_403_45560_20141012_204242_outLine +BABEL_OP3_403_45697_20150211_181356_inLine +BABEL_OP3_403_45697_20150211_181356_outLine +BABEL_OP3_403_46881_20141014_210231_inLine +BABEL_OP3_403_46881_20141014_210231_outLine +BABEL_OP3_403_47877_20150105_200005_inLine +BABEL_OP3_403_47877_20150105_200005_outLine +BABEL_OP3_403_47882_20150131_215134_inLine +BABEL_OP3_403_47882_20150131_215134_outLine +BABEL_OP3_403_48789_20141031_205407_inLine +BABEL_OP3_403_48789_20141031_205407_outLine +BABEL_OP3_403_49502_20141013_230428_inLine +BABEL_OP3_403_49502_20141013_230428_outLine +BABEL_OP3_403_49902_20141025_214609_inLine +BABEL_OP3_403_49902_20141025_214609_outLine +BABEL_OP3_403_50726_20141015_222945_inLine +BABEL_OP3_403_50726_20141015_222945_outLine +BABEL_OP3_403_52438_20141005_211825_inLine +BABEL_OP3_403_52438_20141005_211825_outLine +BABEL_OP3_403_54160_20141012_225050_inLine +BABEL_OP3_403_54160_20141012_225050_outLine +BABEL_OP3_403_56090_20141001_220534_inLine +BABEL_OP3_403_56090_20141001_220534_outLine +BABEL_OP3_403_58850_20141030_190407_inLine +BABEL_OP3_403_58850_20141030_190407_outLine +BABEL_OP3_403_60538_20141007_015704_inLine +BABEL_OP3_403_60538_20141007_015704_outLine +BABEL_OP3_403_60706_20141014_225721_inLine +BABEL_OP3_403_60706_20141014_225721_outLine +BABEL_OP3_403_61225_20141014_225524_inLine +BABEL_OP3_403_61225_20141014_225524_outLine +BABEL_OP3_403_62456_20141107_224816_inLine +BABEL_OP3_403_62456_20141107_224816_outLine +BABEL_OP3_403_62545_20150203_205015_inLine +BABEL_OP3_403_62545_20150203_205015_outLine +BABEL_OP3_403_63081_20141013_184721_inLine +BABEL_OP3_403_63081_20141013_184721_outLine +BABEL_OP3_403_63938_20150304_184136_inLine +BABEL_OP3_403_63938_20150304_184136_outLine +BABEL_OP3_403_65723_20141004_231950_inLine +BABEL_OP3_403_65723_20141004_231950_outLine +BABEL_OP3_403_65882_20141005_214649_inLine +BABEL_OP3_403_65882_20141005_214649_outLine +BABEL_OP3_403_66026_20141207_212517_inLine +BABEL_OP3_403_66026_20141207_212517_outLine +BABEL_OP3_403_68306_20141206_183801_inLine +BABEL_OP3_403_68306_20141206_183801_outLine +BABEL_OP3_403_70110_20141016_195210_inLine +BABEL_OP3_403_70110_20141016_195210_outLine +BABEL_OP3_403_71780_20141006_005652_inLine +BABEL_OP3_403_71780_20141006_005652_outLine +BABEL_OP3_403_72349_20150313_194307_inLine +BABEL_OP3_403_72349_20150313_194307_outLine +BABEL_OP3_403_78877_20150203_012549_inLine +BABEL_OP3_403_78877_20150203_012549_outLine +BABEL_OP3_403_79820_20141005_212016_inLine +BABEL_OP3_403_79820_20141005_212016_outLine +BABEL_OP3_403_87280_20141217_230121_inLine +BABEL_OP3_403_87280_20141217_230121_outLine +BABEL_OP3_403_88938_20141219_211017_inLine +BABEL_OP3_403_88938_20141219_211017_outLine +BABEL_OP3_403_90777_20141028_012959_inLine +BABEL_OP3_403_90777_20141028_012959_outLine +BABEL_OP3_403_92356_20150305_033040_inLine +BABEL_OP3_403_92356_20150305_033040_outLine +BABEL_OP3_403_94035_20150201_183321_inLine +BABEL_OP3_403_94035_20150201_183321_outLine +BABEL_OP3_403_96446_20141013_215249_inLine +BABEL_OP3_403_96446_20141013_215249_outLine +BABEL_OP3_403_97264_20141220_220653_inLine +BABEL_OP3_403_97264_20141220_220653_outLine +BABEL_OP3_403_97849_20150313_175528_inLine +BABEL_OP3_403_97849_20150313_175528_outLine +BABEL_OP3_403_99813_20141106_211637_inLine +BABEL_OP3_403_99813_20141106_211637_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/eval.list b/egs/babel/s5d/conf/lists/403-dholuo/eval.list new file mode 100644 index 00000000000..4fc564e5b78 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/eval.list @@ -0,0 +1,182 @@ +BABEL_OP3_403_13040_20141004_235933_inLine +BABEL_OP3_403_13040_20141004_235933_outLine +BABEL_OP3_403_13929_20150204_022153_inLine +BABEL_OP3_403_13929_20150204_022153_outLine +BABEL_OP3_403_14350_20141007_001036_inLine +BABEL_OP3_403_14350_20141007_001036_outLine +BABEL_OP3_403_14575_20150205_194428_inLine +BABEL_OP3_403_14575_20150205_194428_outLine +BABEL_OP3_403_15262_20141008_011520_inLine +BABEL_OP3_403_15262_20141008_011520_outLine +BABEL_OP3_403_15848_20141001_223454_inLine +BABEL_OP3_403_15848_20141001_223454_outLine +BABEL_OP3_403_15902_20141006_235206_inLine +BABEL_OP3_403_15902_20141006_235206_outLine +BABEL_OP3_403_16056_20141007_015057_inLine +BABEL_OP3_403_16056_20141007_015057_outLine +BABEL_OP3_403_16184_20141003_220544_inLine +BABEL_OP3_403_16184_20141003_220544_outLine +BABEL_OP3_403_17165_20141103_175355_inLine +BABEL_OP3_403_17165_20141103_175355_outLine +BABEL_OP3_403_19120_20150320_014910_inLine +BABEL_OP3_403_19120_20150320_014910_outLine +BABEL_OP3_403_19545_20141107_193534_inLine +BABEL_OP3_403_19545_20141107_193534_outLine +BABEL_OP3_403_21029_20141010_220724_inLine +BABEL_OP3_403_21029_20141010_220724_outLine +BABEL_OP3_403_21581_20141026_010129_inLine +BABEL_OP3_403_21581_20141026_010129_outLine +BABEL_OP3_403_23260_20150313_211958_inLine +BABEL_OP3_403_23260_20150313_211958_outLine +BABEL_OP3_403_29777_20141218_221709_inLine +BABEL_OP3_403_29777_20141218_221709_outLine +BABEL_OP3_403_30497_20150314_160011_inLine +BABEL_OP3_403_30497_20150314_160011_outLine +BABEL_OP3_403_31583_20141216_190359_inLine +BABEL_OP3_403_31583_20141216_190359_outLine +BABEL_OP3_403_32048_20141219_213429_inLine +BABEL_OP3_403_32048_20141219_213429_outLine +BABEL_OP3_403_32959_20141208_210738_inLine +BABEL_OP3_403_32959_20141208_210738_outLine +BABEL_OP3_403_33635_20141029_220701_inLine +BABEL_OP3_403_33635_20141029_220701_outLine +BABEL_OP3_403_35069_20141219_230111_inLine +BABEL_OP3_403_35069_20141219_230111_outLine +BABEL_OP3_403_35885_20150319_180147_inLine +BABEL_OP3_403_35885_20150319_180147_outLine +BABEL_OP3_403_36219_20141024_182040_inLine +BABEL_OP3_403_36219_20141024_182040_outLine +BABEL_OP3_403_37281_20141028_212708_inLine +BABEL_OP3_403_37281_20141028_212708_outLine +BABEL_OP3_403_39277_20150204_013404_inLine +BABEL_OP3_403_39277_20150204_013404_outLine +BABEL_OP3_403_44290_20150313_161518_inLine +BABEL_OP3_403_44290_20150313_161518_outLine +BABEL_OP3_403_44681_20150202_013205_inLine +BABEL_OP3_403_44681_20150202_013205_outLine +BABEL_OP3_403_45140_20150314_202244_inLine +BABEL_OP3_403_45140_20150314_202244_outLine +BABEL_OP3_403_47270_20150305_004557_inLine +BABEL_OP3_403_47270_20150305_004557_outLine +BABEL_OP3_403_47309_20150131_232140_inLine +BABEL_OP3_403_47309_20150131_232140_outLine +BABEL_OP3_403_50090_20141128_005549_inLine +BABEL_OP3_403_50090_20141128_005549_outLine +BABEL_OP3_403_52025_20141016_194738_inLine +BABEL_OP3_403_52025_20141016_194738_outLine +BABEL_OP3_403_52381_20150106_000156_inLine +BABEL_OP3_403_52381_20150106_000156_outLine +BABEL_OP3_403_53419_20141216_202007_inLine +BABEL_OP3_403_53419_20141216_202007_outLine +BABEL_OP3_403_54046_20141221_013345_inLine +BABEL_OP3_403_54046_20141221_013345_outLine +BABEL_OP3_403_54405_20141105_215311_inLine +BABEL_OP3_403_54405_20141105_215311_outLine +BABEL_OP3_403_56429_20141004_212928_inLine +BABEL_OP3_403_56429_20141004_212928_outLine +BABEL_OP3_403_56523_20141027_234249_inLine +BABEL_OP3_403_56523_20141027_234249_outLine +BABEL_OP3_403_56720_20141204_213606_inLine +BABEL_OP3_403_56720_20141204_213606_outLine +BABEL_OP3_403_56743_20141027_224527_inLine +BABEL_OP3_403_56743_20141027_224527_outLine +BABEL_OP3_403_57654_20141004_222740_inLine +BABEL_OP3_403_57654_20141004_222740_outLine +BABEL_OP3_403_57922_20141119_003457_inLine +BABEL_OP3_403_57922_20141119_003457_outLine +BABEL_OP3_403_60508_20141015_194223_inLine +BABEL_OP3_403_60508_20141015_194223_outLine +BABEL_OP3_403_60626_20141007_020141_inLine +BABEL_OP3_403_60626_20141007_020141_outLine +BABEL_OP3_403_61219_20141025_212855_inLine +BABEL_OP3_403_61219_20141025_212855_outLine +BABEL_OP3_403_62286_20141029_183256_inLine +BABEL_OP3_403_62286_20141029_183256_outLine +BABEL_OP3_403_62852_20141016_194911_inLine +BABEL_OP3_403_62852_20141016_194911_outLine +BABEL_OP3_403_63445_20141016_201418_inLine +BABEL_OP3_403_63445_20141016_201418_outLine +BABEL_OP3_403_63481_20141014_201444_inLine +BABEL_OP3_403_63481_20141014_201444_outLine +BABEL_OP3_403_64494_20141005_003938_inLine +BABEL_OP3_403_64494_20141005_003938_outLine +BABEL_OP3_403_64796_20141014_213212_inLine +BABEL_OP3_403_64796_20141014_213212_outLine +BABEL_OP3_403_64902_20150319_231944_inLine +BABEL_OP3_403_64902_20150319_231944_outLine +BABEL_OP3_403_65477_20141029_190115_inLine +BABEL_OP3_403_65477_20141029_190115_outLine +BABEL_OP3_403_66519_20141026_200412_inLine +BABEL_OP3_403_66519_20141026_200412_outLine +BABEL_OP3_403_67552_20141204_235240_inLine +BABEL_OP3_403_67552_20141204_235240_outLine +BABEL_OP3_403_67842_20141005_213633_inLine +BABEL_OP3_403_67842_20141005_213633_outLine +BABEL_OP3_403_70639_20150201_224933_inLine +BABEL_OP3_403_70639_20150201_224933_outLine +BABEL_OP3_403_71282_20150304_001933_inLine +BABEL_OP3_403_71282_20150304_001933_outLine +BABEL_OP3_403_71566_20141210_221853_inLine +BABEL_OP3_403_71566_20141210_221853_outLine +BABEL_OP3_403_71704_20141005_194010_inLine +BABEL_OP3_403_71704_20141005_194010_outLine +BABEL_OP3_403_73042_20141004_213024_inLine +BABEL_OP3_403_73042_20141004_213024_outLine +BABEL_OP3_403_73119_20141024_013927_inLine +BABEL_OP3_403_73119_20141024_013927_outLine +BABEL_OP3_403_74641_20141104_204017_inLine +BABEL_OP3_403_74641_20141104_204017_outLine +BABEL_OP3_403_75359_20150306_233416_inLine +BABEL_OP3_403_75359_20150306_233416_outLine +BABEL_OP3_403_77567_20141016_212214_inLine +BABEL_OP3_403_77567_20141016_212214_outLine +BABEL_OP3_403_80655_20150313_202935_inLine +BABEL_OP3_403_80655_20150313_202935_outLine +BABEL_OP3_403_81229_20141028_221835_inLine +BABEL_OP3_403_81229_20141028_221835_outLine +BABEL_OP3_403_81404_20141027_225835_inLine +BABEL_OP3_403_81404_20141027_225835_outLine +BABEL_OP3_403_81427_20141025_192229_inLine +BABEL_OP3_403_81427_20141025_192229_outLine +BABEL_OP3_403_81581_20150205_214253_inLine +BABEL_OP3_403_81581_20150205_214253_outLine +BABEL_OP3_403_82966_20141215_232026_inLine +BABEL_OP3_403_82966_20141215_232026_outLine +BABEL_OP3_403_83062_20150314_182244_inLine +BABEL_OP3_403_83062_20150314_182244_outLine +BABEL_OP3_403_84715_20150106_201437_inLine +BABEL_OP3_403_84715_20150106_201437_outLine +BABEL_OP3_403_86748_20150305_041204_inLine +BABEL_OP3_403_86748_20150305_041204_outLine +BABEL_OP3_403_87629_20141107_235904_inLine +BABEL_OP3_403_87629_20141107_235904_outLine +BABEL_OP3_403_88686_20141014_185730_inLine +BABEL_OP3_403_88686_20141014_185730_outLine +BABEL_OP3_403_88873_20141005_183048_inLine +BABEL_OP3_403_88873_20141005_183048_outLine +BABEL_OP3_403_90080_20150305_215921_inLine +BABEL_OP3_403_90080_20150305_215921_outLine +BABEL_OP3_403_91825_20141016_185730_inLine +BABEL_OP3_403_91825_20141016_185730_outLine +BABEL_OP3_403_94166_20150304_233340_inLine +BABEL_OP3_403_94166_20150304_233340_outLine +BABEL_OP3_403_94212_20150203_035128_inLine +BABEL_OP3_403_94212_20150203_035128_outLine +BABEL_OP3_403_94587_20141206_200001_inLine +BABEL_OP3_403_94587_20141206_200001_outLine +BABEL_OP3_403_95077_20141211_172737_inLine +BABEL_OP3_403_95077_20141211_172737_outLine +BABEL_OP3_403_95490_20141015_192814_inLine +BABEL_OP3_403_95490_20141015_192814_outLine +BABEL_OP3_403_96088_20150307_205122_inLine +BABEL_OP3_403_96088_20150307_205122_outLine +BABEL_OP3_403_96934_20141025_215407_inLine +BABEL_OP3_403_96934_20141025_215407_outLine +BABEL_OP3_403_98255_20150204_194911_inLine +BABEL_OP3_403_98255_20150204_194911_outLine +BABEL_OP3_403_98580_20141029_181611_inLine +BABEL_OP3_403_98580_20141029_181611_outLine +BABEL_OP3_403_98888_20141028_214127_inLine +BABEL_OP3_403_98888_20141028_214127_outLine +BABEL_OP3_403_99264_20141216_011902_inLine +BABEL_OP3_403_99264_20141216_011902_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/sub-train.list b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.list new file mode 100644 index 00000000000..138a27efd31 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.list @@ -0,0 +1,122 @@ +BABEL_OP3_403_11681_20141010_203514_inLine +BABEL_OP3_403_11681_20141010_203514_outLine +BABEL_OP3_403_13324_20141004_204835_inLine +BABEL_OP3_403_13324_20141004_204835_outLine +BABEL_OP3_403_13490_20141103_182258_inLine +BABEL_OP3_403_13490_20141103_182258_outLine +BABEL_OP3_403_16475_20141027_225406_inLine +BABEL_OP3_403_16475_20141027_225406_outLine +BABEL_OP3_403_16938_20141117_002438_inLine +BABEL_OP3_403_16938_20141117_002438_outLine +BABEL_OP3_403_17280_20141027_223651_inLine +BABEL_OP3_403_17280_20141027_223651_outLine +BABEL_OP3_403_19722_20141013_214859_inLine +BABEL_OP3_403_19722_20141013_214859_outLine +BABEL_OP3_403_19749_20150211_210545_inLine +BABEL_OP3_403_19749_20150211_210545_outLine +BABEL_OP3_403_22321_20141012_234503_inLine +BABEL_OP3_403_22321_20141012_234503_outLine +BABEL_OP3_403_23893_20150311_211416_inLine +BABEL_OP3_403_23893_20150311_211416_outLine +BABEL_OP3_403_24589_20141023_173937_inLine +BABEL_OP3_403_24589_20141023_173937_outLine +BABEL_OP3_403_25961_20141016_201537_inLine +BABEL_OP3_403_25961_20141016_201537_outLine +BABEL_OP3_403_28190_20141218_232404_inLine +BABEL_OP3_403_28190_20141218_232404_outLine +BABEL_OP3_403_28775_20141005_002735_inLine +BABEL_OP3_403_28775_20141005_002735_outLine +BABEL_OP3_403_28945_20141006_202723_inLine +BABEL_OP3_403_28945_20141006_202723_outLine +BABEL_OP3_403_29168_20141013_195745_inLine +BABEL_OP3_403_29168_20141013_195745_outLine +BABEL_OP3_403_29323_20150303_223419_inLine +BABEL_OP3_403_29323_20150303_223419_outLine +BABEL_OP3_403_31109_20141107_213704_inLine +BABEL_OP3_403_31109_20141107_213704_outLine +BABEL_OP3_403_31490_20141006_210241_inLine +BABEL_OP3_403_31490_20141006_210241_outLine +BABEL_OP3_403_31624_20141014_211203_inLine +BABEL_OP3_403_31624_20141014_211203_outLine +BABEL_OP3_403_32861_20150107_160647_inLine +BABEL_OP3_403_32861_20150107_160647_outLine +BABEL_OP3_403_33840_20141219_200146_inLine +BABEL_OP3_403_33840_20141219_200146_outLine +BABEL_OP3_403_36632_20150206_041325_inLine +BABEL_OP3_403_36632_20150206_041325_outLine +BABEL_OP3_403_36990_20141030_231441_inLine +BABEL_OP3_403_36990_20141030_231441_outLine +BABEL_OP3_403_38878_20141121_184540_inLine +BABEL_OP3_403_38878_20141121_184540_outLine +BABEL_OP3_403_42718_20150306_202240_inLine +BABEL_OP3_403_42718_20150306_202240_outLine +BABEL_OP3_403_43368_20141026_000458_inLine +BABEL_OP3_403_43368_20141026_000458_outLine +BABEL_OP3_403_44868_20141129_220211_inLine +BABEL_OP3_403_44868_20141129_220211_outLine +BABEL_OP3_403_44961_20141006_233622_inLine +BABEL_OP3_403_44961_20141006_233622_outLine +BABEL_OP3_403_44961_20141006_235203_inLine +BABEL_OP3_403_44961_20141006_235203_outLine +BABEL_OP3_403_47215_20141007_230222_inLine +BABEL_OP3_403_47215_20141007_230222_outLine +BABEL_OP3_403_48663_20150306_181741_inLine +BABEL_OP3_403_48663_20150306_181741_outLine +BABEL_OP3_403_51955_20141004_212210_inLine +BABEL_OP3_403_51955_20141004_212210_outLine +BABEL_OP3_403_53842_20141031_193507_inLine +BABEL_OP3_403_53842_20141031_193507_outLine +BABEL_OP3_403_55950_20150312_174125_inLine +BABEL_OP3_403_55950_20150312_174125_outLine +BABEL_OP3_403_56198_20141005_222956_inLine +BABEL_OP3_403_56198_20141005_222956_outLine +BABEL_OP3_403_58047_20141118_184454_inLine +BABEL_OP3_403_58047_20141118_184454_outLine +BABEL_OP3_403_58585_20150106_172737_inLine +BABEL_OP3_403_58585_20150106_172737_outLine +BABEL_OP3_403_60310_20141217_205059_inLine +BABEL_OP3_403_60310_20141217_205059_outLine +BABEL_OP3_403_60418_20141129_235907_inLine +BABEL_OP3_403_60418_20141129_235907_outLine +BABEL_OP3_403_61348_20141103_230857_inLine +BABEL_OP3_403_61348_20141103_230857_outLine +BABEL_OP3_403_65640_20150314_163101_inLine +BABEL_OP3_403_65640_20150314_163101_outLine +BABEL_OP3_403_69107_20141106_000151_inLine +BABEL_OP3_403_69107_20141106_000151_outLine +BABEL_OP3_403_69746_20141220_191513_inLine +BABEL_OP3_403_69746_20141220_191513_outLine +BABEL_OP3_403_72007_20141205_002010_inLine +BABEL_OP3_403_72007_20141205_002010_outLine +BABEL_OP3_403_72110_20141210_212045_inLine +BABEL_OP3_403_72110_20141210_212045_outLine +BABEL_OP3_403_72844_20141004_005248_inLine +BABEL_OP3_403_72844_20141004_005248_outLine +BABEL_OP3_403_75223_20141016_194054_inLine +BABEL_OP3_403_75223_20141016_194054_outLine +BABEL_OP3_403_77974_20150312_200046_inLine +BABEL_OP3_403_77974_20150312_200046_outLine +BABEL_OP3_403_78360_20150107_231519_inLine +BABEL_OP3_403_78360_20150107_231519_outLine +BABEL_OP3_403_78544_20141201_192016_inLine +BABEL_OP3_403_78544_20141201_192016_outLine +BABEL_OP3_403_82391_20141206_001207_inLine +BABEL_OP3_403_82391_20141206_001207_outLine +BABEL_OP3_403_83436_20141012_221126_inLine +BABEL_OP3_403_83436_20141012_221126_outLine +BABEL_OP3_403_84469_20141211_002526_inLine +BABEL_OP3_403_84469_20141211_002526_outLine +BABEL_OP3_403_84605_20141005_214529_inLine +BABEL_OP3_403_84605_20141005_214529_outLine +BABEL_OP3_403_87921_20141210_233414_inLine +BABEL_OP3_403_87921_20141210_233414_outLine +BABEL_OP3_403_92509_20141014_232528_inLine +BABEL_OP3_403_92509_20141014_232528_outLine +BABEL_OP3_403_95269_20141026_235206_inLine +BABEL_OP3_403_95269_20141026_235206_outLine +BABEL_OP3_403_96324_20141014_194024_inLine +BABEL_OP3_403_96324_20141014_194024_outLine +BABEL_OP3_403_97588_20141015_193851_inLine +BABEL_OP3_403_97588_20141015_193851_outLine +BABEL_OP3_403_98506_20150319_151741_inLine +BABEL_OP3_403_98506_20150319_151741_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.untranscribed.list new file mode 100644 index 00000000000..b22e404cf6c --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/sub-train.untranscribed.list @@ -0,0 +1,380 @@ +BABEL_OP3_403_10313_20150130_193605_inLine +BABEL_OP3_403_10313_20150130_193605_outLine +BABEL_OP3_403_10469_20150130_211522_inLine +BABEL_OP3_403_10469_20150130_211522_outLine +BABEL_OP3_403_10966_20141025_191612_inLine +BABEL_OP3_403_10966_20141025_191612_outLine +BABEL_OP3_403_11663_20141205_201130_inLine +BABEL_OP3_403_11663_20141205_201130_outLine +BABEL_OP3_403_11663_20141205_204332_inLine +BABEL_OP3_403_11663_20141205_204332_outLine +BABEL_OP3_403_11797_20141013_012556_inLine +BABEL_OP3_403_11797_20141013_012556_outLine +BABEL_OP3_403_12606_20150314_170830_inLine +BABEL_OP3_403_12606_20150314_170830_outLine +BABEL_OP3_403_12609_20150313_183914_inLine +BABEL_OP3_403_12609_20150313_183914_outLine +BABEL_OP3_403_13483_20141205_201059_inLine +BABEL_OP3_403_13483_20141205_201059_outLine +BABEL_OP3_403_14807_20141117_201842_inLine +BABEL_OP3_403_14807_20141117_201842_outLine +BABEL_OP3_403_14814_20141023_004823_inLine +BABEL_OP3_403_14814_20141023_004823_outLine +BABEL_OP3_403_14929_20141029_222741_inLine +BABEL_OP3_403_14929_20141029_222741_outLine +BABEL_OP3_403_15281_20150307_201454_inLine +BABEL_OP3_403_15281_20150307_201454_outLine +BABEL_OP3_403_15322_20150319_184826_inLine +BABEL_OP3_403_15322_20150319_184826_outLine +BABEL_OP3_403_15702_20141202_010728_inLine +BABEL_OP3_403_15702_20141202_010728_outLine +BABEL_OP3_403_16749_20141221_021003_inLine +BABEL_OP3_403_16749_20141221_021003_outLine +BABEL_OP3_403_16800_20141216_181241_inLine +BABEL_OP3_403_16800_20141216_181241_outLine +BABEL_OP3_403_16839_20141219_222837_inLine +BABEL_OP3_403_16839_20141219_222837_outLine +BABEL_OP3_403_16924_20141202_004432_inLine +BABEL_OP3_403_16924_20141202_004432_outLine +BABEL_OP3_403_17496_20141202_004936_inLine +BABEL_OP3_403_17496_20141202_004936_outLine +BABEL_OP3_403_18924_20141117_193058_inLine +BABEL_OP3_403_18924_20141117_193058_outLine +BABEL_OP3_403_18939_20141007_221546_inLine +BABEL_OP3_403_18939_20141007_221546_outLine +BABEL_OP3_403_19688_20150205_181600_inLine +BABEL_OP3_403_19688_20150205_181600_outLine +BABEL_OP3_403_19703_20141024_194336_inLine +BABEL_OP3_403_19703_20141024_194336_outLine +BABEL_OP3_403_20133_20141001_221247_inLine +BABEL_OP3_403_20133_20141001_221247_outLine +BABEL_OP3_403_20916_20141003_230543_inLine +BABEL_OP3_403_20916_20141003_230543_outLine +BABEL_OP3_403_21004_20141210_214422_inLine +BABEL_OP3_403_21004_20141210_214422_outLine +BABEL_OP3_403_21206_20141004_231905_inLine +BABEL_OP3_403_21206_20141004_231905_outLine +BABEL_OP3_403_21327_20141207_194922_inLine +BABEL_OP3_403_21327_20141207_194922_outLine +BABEL_OP3_403_21892_20141216_205644_inLine +BABEL_OP3_403_21892_20141216_205644_outLine +BABEL_OP3_403_22643_20150131_005325_inLine +BABEL_OP3_403_22643_20150131_005325_outLine +BABEL_OP3_403_22918_20150305_185811_inLine +BABEL_OP3_403_22918_20150305_185811_outLine +BABEL_OP3_403_22965_20141005_232023_inLine +BABEL_OP3_403_22965_20141005_232023_outLine +BABEL_OP3_403_23006_20141024_182721_inLine +BABEL_OP3_403_23006_20141024_182721_outLine +BABEL_OP3_403_23046_20141023_002436_inLine +BABEL_OP3_403_23046_20141023_002436_outLine +BABEL_OP3_403_24017_20141218_235010_inLine +BABEL_OP3_403_24017_20141218_235010_outLine +BABEL_OP3_403_24290_20150319_170027_inLine +BABEL_OP3_403_24290_20150319_170027_outLine +BABEL_OP3_403_25015_20150312_185754_inLine +BABEL_OP3_403_25015_20150312_185754_outLine +BABEL_OP3_403_25242_20150129_211027_inLine +BABEL_OP3_403_25242_20150129_211027_outLine +BABEL_OP3_403_25767_20141007_010749_inLine +BABEL_OP3_403_25767_20141007_010749_outLine +BABEL_OP3_403_26072_20150107_183553_inLine +BABEL_OP3_403_26072_20150107_183553_outLine +BABEL_OP3_403_27125_20141005_201825_inLine +BABEL_OP3_403_27125_20141005_201825_outLine +BABEL_OP3_403_27367_20150201_011720_inLine +BABEL_OP3_403_27367_20150201_011720_outLine +BABEL_OP3_403_28522_20141129_232715_inLine +BABEL_OP3_403_28522_20141129_232715_outLine +BABEL_OP3_403_28814_20141220_183346_inLine +BABEL_OP3_403_28814_20141220_183346_outLine +BABEL_OP3_403_31346_20141217_195511_inLine +BABEL_OP3_403_31346_20141217_195511_outLine +BABEL_OP3_403_31919_20150306_024725_inLine +BABEL_OP3_403_31919_20150306_024725_outLine +BABEL_OP3_403_32301_20141202_224639_inLine +BABEL_OP3_403_32301_20141202_224639_outLine +BABEL_OP3_403_32328_20141218_000510_inLine +BABEL_OP3_403_32328_20141218_000510_outLine +BABEL_OP3_403_33913_20141211_183339_inLine +BABEL_OP3_403_33913_20141211_183339_outLine +BABEL_OP3_403_34197_20141117_004710_inLine +BABEL_OP3_403_34197_20141117_004710_outLine +BABEL_OP3_403_34477_20141023_231734_inLine +BABEL_OP3_403_34477_20141023_231734_outLine +BABEL_OP3_403_35008_20141204_201241_inLine +BABEL_OP3_403_35008_20141204_201241_outLine +BABEL_OP3_403_35139_20141004_205621_inLine +BABEL_OP3_403_35139_20141004_205621_outLine +BABEL_OP3_403_35143_20141210_230830_inLine +BABEL_OP3_403_35143_20141210_230830_outLine +BABEL_OP3_403_35467_20141004_175833_inLine +BABEL_OP3_403_35467_20141004_175833_outLine +BABEL_OP3_403_35583_20150108_024439_inLine +BABEL_OP3_403_35583_20150108_024439_outLine +BABEL_OP3_403_36293_20141015_192540_inLine +BABEL_OP3_403_36293_20141015_192540_outLine +BABEL_OP3_403_37228_20150108_000217_inLine +BABEL_OP3_403_37228_20150108_000217_outLine +BABEL_OP3_403_37682_20141028_002656_inLine +BABEL_OP3_403_37682_20141028_002656_outLine +BABEL_OP3_403_37853_20150303_020840_inLine +BABEL_OP3_403_37853_20150303_020840_outLine +BABEL_OP3_403_38588_20141026_220103_inLine +BABEL_OP3_403_38588_20141026_220103_outLine +BABEL_OP3_403_38664_20141028_005703_inLine +BABEL_OP3_403_38664_20141028_005703_outLine +BABEL_OP3_403_38689_20141204_215950_inLine +BABEL_OP3_403_38689_20141204_215950_outLine +BABEL_OP3_403_38741_20141005_205401_inLine +BABEL_OP3_403_38741_20141005_205401_outLine +BABEL_OP3_403_39099_20150306_193032_inLine +BABEL_OP3_403_39099_20150306_193032_outLine +BABEL_OP3_403_39307_20141014_234344_inLine +BABEL_OP3_403_39307_20141014_234344_outLine +BABEL_OP3_403_39555_20141217_205213_inLine +BABEL_OP3_403_39555_20141217_205213_outLine +BABEL_OP3_403_41442_20141216_224519_inLine +BABEL_OP3_403_41442_20141216_224519_outLine +BABEL_OP3_403_41958_20141026_202739_inLine +BABEL_OP3_403_41958_20141026_202739_outLine +BABEL_OP3_403_42434_20141026_001223_inLine +BABEL_OP3_403_42434_20141026_001223_outLine +BABEL_OP3_403_42771_20141104_215437_inLine +BABEL_OP3_403_42771_20141104_215437_outLine +BABEL_OP3_403_43784_20141005_193431_inLine +BABEL_OP3_403_43784_20141005_193431_outLine +BABEL_OP3_403_43788_20141202_235051_inLine +BABEL_OP3_403_43788_20141202_235051_outLine +BABEL_OP3_403_44309_20150305_200025_inLine +BABEL_OP3_403_44309_20150305_200025_outLine +BABEL_OP3_403_44478_20150307_223313_inLine +BABEL_OP3_403_44478_20150307_223313_outLine +BABEL_OP3_403_45486_20150130_235157_inLine +BABEL_OP3_403_45486_20150130_235157_outLine +BABEL_OP3_403_45536_20141219_005329_inLine +BABEL_OP3_403_45536_20141219_005329_outLine +BABEL_OP3_403_46008_20150307_190844_inLine +BABEL_OP3_403_46008_20150307_190844_outLine +BABEL_OP3_403_46041_20141217_222544_inLine +BABEL_OP3_403_46041_20141217_222544_outLine +BABEL_OP3_403_46310_20141012_204940_inLine +BABEL_OP3_403_46310_20141012_204940_outLine +BABEL_OP3_403_46757_20141202_212733_inLine +BABEL_OP3_403_46757_20141202_212733_outLine +BABEL_OP3_403_47283_20141005_204650_inLine +BABEL_OP3_403_47283_20141005_204650_outLine +BABEL_OP3_403_47487_20141025_235747_inLine +BABEL_OP3_403_47487_20141025_235747_outLine +BABEL_OP3_403_47866_20150317_213617_inLine +BABEL_OP3_403_47866_20150317_213617_outLine +BABEL_OP3_403_47878_20141118_193135_inLine +BABEL_OP3_403_47878_20141118_193135_outLine +BABEL_OP3_403_48243_20141004_221542_inLine +BABEL_OP3_403_48243_20141004_221542_outLine +BABEL_OP3_403_48610_20141007_225901_inLine +BABEL_OP3_403_48610_20141007_225901_outLine +BABEL_OP3_403_48844_20141007_004947_inLine +BABEL_OP3_403_48844_20141007_004947_outLine +BABEL_OP3_403_48844_20141007_011027_inLine +BABEL_OP3_403_48844_20141007_011027_outLine +BABEL_OP3_403_49027_20150307_230828_inLine +BABEL_OP3_403_49027_20150307_230828_outLine +BABEL_OP3_403_49630_20141205_233804_inLine +BABEL_OP3_403_49630_20141205_233804_outLine +BABEL_OP3_403_49768_20141026_000059_inLine +BABEL_OP3_403_49768_20141026_000059_outLine +BABEL_OP3_403_49907_20141005_215057_inLine +BABEL_OP3_403_49907_20141005_215057_outLine +BABEL_OP3_403_50427_20141116_233807_inLine +BABEL_OP3_403_50427_20141116_233807_outLine +BABEL_OP3_403_50549_20150304_014353_inLine +BABEL_OP3_403_50549_20150304_014353_outLine +BABEL_OP3_403_50779_20141118_221929_inLine +BABEL_OP3_403_50779_20141118_221929_outLine +BABEL_OP3_403_50779_20141118_230132_inLine +BABEL_OP3_403_50779_20141118_230132_outLine +BABEL_OP3_403_52490_20141016_230923_inLine +BABEL_OP3_403_52490_20141016_230923_outLine +BABEL_OP3_403_52717_20141008_003843_inLine +BABEL_OP3_403_52717_20141008_003843_outLine +BABEL_OP3_403_53063_20141207_192558_inLine +BABEL_OP3_403_53063_20141207_192558_outLine +BABEL_OP3_403_53063_20141207_194007_inLine +BABEL_OP3_403_53063_20141207_194007_outLine +BABEL_OP3_403_54104_20141006_230139_inLine +BABEL_OP3_403_54104_20141006_230139_outLine +BABEL_OP3_403_54104_20141006_230643_inLine +BABEL_OP3_403_54104_20141006_230643_outLine +BABEL_OP3_403_54162_20141103_190601_inLine +BABEL_OP3_403_54162_20141103_190601_outLine +BABEL_OP3_403_54477_20141216_200349_inLine +BABEL_OP3_403_54477_20141216_200349_outLine +BABEL_OP3_403_54477_20141216_213534_inLine +BABEL_OP3_403_54477_20141216_213534_outLine +BABEL_OP3_403_54530_20141217_220934_inLine +BABEL_OP3_403_54530_20141217_220934_outLine +BABEL_OP3_403_54594_20150204_003149_inLine +BABEL_OP3_403_54594_20150204_003149_outLine +BABEL_OP3_403_55259_20141025_175845_inLine +BABEL_OP3_403_55259_20141025_175845_outLine +BABEL_OP3_403_55968_20141004_005950_inLine +BABEL_OP3_403_55968_20141004_005950_outLine +BABEL_OP3_403_56326_20150129_020103_inLine +BABEL_OP3_403_56326_20150129_020103_outLine +BABEL_OP3_403_57093_20141103_221842_inLine +BABEL_OP3_403_57093_20141103_221842_outLine +BABEL_OP3_403_57141_20141215_224302_inLine +BABEL_OP3_403_57141_20141215_224302_outLine +BABEL_OP3_403_57529_20141207_002135_inLine +BABEL_OP3_403_57529_20141207_002135_outLine +BABEL_OP3_403_59262_20141216_193024_inLine +BABEL_OP3_403_59262_20141216_193024_outLine +BABEL_OP3_403_60115_20141129_235248_inLine +BABEL_OP3_403_60115_20141129_235248_outLine +BABEL_OP3_403_60650_20150131_013236_inLine +BABEL_OP3_403_60650_20150131_013236_outLine +BABEL_OP3_403_61678_20141003_231023_inLine +BABEL_OP3_403_61678_20141003_231023_outLine +BABEL_OP3_403_61731_20141005_201612_inLine +BABEL_OP3_403_61731_20141005_201612_outLine +BABEL_OP3_403_61971_20150307_004145_inLine +BABEL_OP3_403_61971_20150307_004145_outLine +BABEL_OP3_403_62014_20141127_180004_inLine +BABEL_OP3_403_62014_20141127_180004_outLine +BABEL_OP3_403_62734_20141025_192117_inLine +BABEL_OP3_403_62734_20141025_192117_outLine +BABEL_OP3_403_62810_20141016_191619_inLine +BABEL_OP3_403_62810_20141016_191619_outLine +BABEL_OP3_403_63670_20141215_221926_inLine +BABEL_OP3_403_63670_20141215_221926_outLine +BABEL_OP3_403_63787_20141006_214400_inLine +BABEL_OP3_403_63787_20141006_214400_outLine +BABEL_OP3_403_63906_20150305_205105_inLine +BABEL_OP3_403_63906_20150305_205105_outLine +BABEL_OP3_403_65367_20150108_004325_inLine +BABEL_OP3_403_65367_20150108_004325_outLine +BABEL_OP3_403_66001_20141007_230508_inLine +BABEL_OP3_403_66001_20141007_230508_outLine +BABEL_OP3_403_66822_20141029_224921_inLine +BABEL_OP3_403_66822_20141029_224921_outLine +BABEL_OP3_403_66916_20141015_215414_inLine +BABEL_OP3_403_66916_20141015_215414_outLine +BABEL_OP3_403_67622_20141014_193846_inLine +BABEL_OP3_403_67622_20141014_193846_outLine +BABEL_OP3_403_67659_20141023_013756_inLine +BABEL_OP3_403_67659_20141023_013756_outLine +BABEL_OP3_403_68384_20141216_000507_inLine +BABEL_OP3_403_68384_20141216_000507_outLine +BABEL_OP3_403_68748_20141130_014650_inLine +BABEL_OP3_403_68748_20141130_014650_outLine +BABEL_OP3_403_68854_20150306_195508_inLine +BABEL_OP3_403_68854_20150306_195508_outLine +BABEL_OP3_403_69096_20150309_190140_inLine +BABEL_OP3_403_69096_20150309_190140_outLine +BABEL_OP3_403_70121_20141026_225432_inLine +BABEL_OP3_403_70121_20141026_225432_outLine +BABEL_OP3_403_70216_20150128_234110_inLine +BABEL_OP3_403_70216_20150128_234110_outLine +BABEL_OP3_403_70257_20150204_032020_inLine +BABEL_OP3_403_70257_20150204_032020_outLine +BABEL_OP3_403_70343_20141205_225856_inLine +BABEL_OP3_403_70343_20141205_225856_outLine +BABEL_OP3_403_71047_20150106_190413_inLine +BABEL_OP3_403_71047_20150106_190413_outLine +BABEL_OP3_403_72040_20141006_004959_inLine +BABEL_OP3_403_72040_20141006_004959_outLine +BABEL_OP3_403_73430_20141205_233006_inLine +BABEL_OP3_403_73430_20141205_233006_outLine +BABEL_OP3_403_73591_20140930_234521_inLine +BABEL_OP3_403_73591_20140930_234521_outLine +BABEL_OP3_403_74728_20150312_182026_inLine +BABEL_OP3_403_74728_20150312_182026_outLine +BABEL_OP3_403_75064_20141022_225629_inLine +BABEL_OP3_403_75064_20141022_225629_outLine +BABEL_OP3_403_76499_20141103_232220_inLine +BABEL_OP3_403_76499_20141103_232220_outLine +BABEL_OP3_403_77427_20141027_223134_inLine +BABEL_OP3_403_77427_20141027_223134_outLine +BABEL_OP3_403_77990_20141004_201020_inLine +BABEL_OP3_403_77990_20141004_201020_outLine +BABEL_OP3_403_78116_20141208_213333_inLine +BABEL_OP3_403_78116_20141208_213333_outLine +BABEL_OP3_403_78116_20141208_214155_inLine +BABEL_OP3_403_78116_20141208_214155_outLine +BABEL_OP3_403_78254_20141024_234037_inLine +BABEL_OP3_403_78254_20141024_234037_outLine +BABEL_OP3_403_78604_20141006_193457_inLine +BABEL_OP3_403_78604_20141006_193457_outLine +BABEL_OP3_403_78833_20150205_204459_inLine +BABEL_OP3_403_78833_20150205_204459_outLine +BABEL_OP3_403_80439_20141023_195331_inLine +BABEL_OP3_403_80439_20141023_195331_outLine +BABEL_OP3_403_80781_20141026_214157_inLine +BABEL_OP3_403_80781_20141026_214157_outLine +BABEL_OP3_403_81149_20150313_000213_inLine +BABEL_OP3_403_81149_20150313_000213_outLine +BABEL_OP3_403_81213_20141004_213211_inLine +BABEL_OP3_403_81213_20141004_213211_outLine +BABEL_OP3_403_82425_20141007_231028_inLine +BABEL_OP3_403_82425_20141007_231028_outLine +BABEL_OP3_403_83238_20141107_233257_inLine +BABEL_OP3_403_83238_20141107_233257_outLine +BABEL_OP3_403_83455_20141103_225146_inLine +BABEL_OP3_403_83455_20141103_225146_outLine +BABEL_OP3_403_83651_20141005_194737_inLine +BABEL_OP3_403_83651_20141005_194737_outLine +BABEL_OP3_403_84194_20150204_213858_inLine +BABEL_OP3_403_84194_20150204_213858_outLine +BABEL_OP3_403_84458_20141208_005012_inLine +BABEL_OP3_403_84458_20141208_005012_outLine +BABEL_OP3_403_84547_20141013_223556_inLine +BABEL_OP3_403_84547_20141013_223556_outLine +BABEL_OP3_403_84737_20150303_195506_inLine +BABEL_OP3_403_84737_20150303_195506_outLine +BABEL_OP3_403_85647_20141103_192225_inLine +BABEL_OP3_403_85647_20141103_192225_outLine +BABEL_OP3_403_86845_20150201_015753_inLine +BABEL_OP3_403_86845_20150201_015753_outLine +BABEL_OP3_403_87889_20150107_001827_inLine +BABEL_OP3_403_87889_20150107_001827_outLine +BABEL_OP3_403_88260_20141029_205951_inLine +BABEL_OP3_403_88260_20141029_205951_outLine +BABEL_OP3_403_88812_20150307_181013_inLine +BABEL_OP3_403_88812_20150307_181013_outLine +BABEL_OP3_403_89059_20141220_191342_inLine +BABEL_OP3_403_89059_20141220_191342_outLine +BABEL_OP3_403_89358_20141030_231758_inLine +BABEL_OP3_403_89358_20141030_231758_outLine +BABEL_OP3_403_90709_20141007_234900_inLine +BABEL_OP3_403_90709_20141007_234900_outLine +BABEL_OP3_403_90739_20141028_224009_inLine +BABEL_OP3_403_90739_20141028_224009_outLine +BABEL_OP3_403_92527_20141026_192704_inLine +BABEL_OP3_403_92527_20141026_192704_outLine +BABEL_OP3_403_92740_20141130_011740_inLine +BABEL_OP3_403_92740_20141130_011740_outLine +BABEL_OP3_403_94409_20141028_214356_inLine +BABEL_OP3_403_94409_20141028_214356_outLine +BABEL_OP3_403_94449_20150309_193606_inLine +BABEL_OP3_403_94449_20150309_193606_outLine +BABEL_OP3_403_94487_20150312_014136_inLine +BABEL_OP3_403_94487_20150312_014136_outLine +BABEL_OP3_403_95583_20141013_184937_inLine +BABEL_OP3_403_95583_20141013_184937_outLine +BABEL_OP3_403_95670_20141016_214958_inLine +BABEL_OP3_403_95670_20141016_214958_outLine +BABEL_OP3_403_96525_20141217_223842_inLine +BABEL_OP3_403_96525_20141217_223842_outLine +BABEL_OP3_403_96910_20141024_195822_inLine +BABEL_OP3_403_96910_20141024_195822_outLine +BABEL_OP3_403_97376_20141206_215930_inLine +BABEL_OP3_403_97376_20141206_215930_outLine +BABEL_OP3_403_97772_20141003_213919_inLine +BABEL_OP3_403_97772_20141003_213919_outLine +BABEL_OP3_403_98311_20141005_195843_inLine +BABEL_OP3_403_98311_20141005_195843_outLine +BABEL_OP3_403_99202_20141108_002737_inLine +BABEL_OP3_403_99202_20141108_002737_outLine +BABEL_OP3_403_99955_20150107_213836_inLine +BABEL_OP3_403_99955_20150107_213836_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/training.list b/egs/babel/s5d/conf/lists/403-dholuo/training.list new file mode 100644 index 00000000000..3b32ed92b92 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/training.list @@ -0,0 +1,502 @@ +BABEL_OP3_403_10313_20150130_193605_inLine +BABEL_OP3_403_10313_20150130_193605_outLine +BABEL_OP3_403_10469_20150130_211522_inLine +BABEL_OP3_403_10469_20150130_211522_outLine +BABEL_OP3_403_10966_20141025_191612_inLine +BABEL_OP3_403_10966_20141025_191612_outLine +BABEL_OP3_403_11663_20141205_201130_inLine +BABEL_OP3_403_11663_20141205_201130_outLine +BABEL_OP3_403_11663_20141205_204332_inLine +BABEL_OP3_403_11663_20141205_204332_outLine +BABEL_OP3_403_11681_20141010_203514_inLine +BABEL_OP3_403_11681_20141010_203514_outLine +BABEL_OP3_403_11797_20141013_012556_inLine +BABEL_OP3_403_11797_20141013_012556_outLine +BABEL_OP3_403_12606_20150314_170830_inLine +BABEL_OP3_403_12606_20150314_170830_outLine +BABEL_OP3_403_12609_20150313_183914_inLine +BABEL_OP3_403_12609_20150313_183914_outLine +BABEL_OP3_403_13324_20141004_204835_inLine +BABEL_OP3_403_13324_20141004_204835_outLine +BABEL_OP3_403_13483_20141205_201059_inLine +BABEL_OP3_403_13483_20141205_201059_outLine +BABEL_OP3_403_13490_20141103_182258_inLine +BABEL_OP3_403_13490_20141103_182258_outLine +BABEL_OP3_403_14807_20141117_201842_inLine +BABEL_OP3_403_14807_20141117_201842_outLine +BABEL_OP3_403_14814_20141023_004823_inLine +BABEL_OP3_403_14814_20141023_004823_outLine +BABEL_OP3_403_14929_20141029_222741_inLine +BABEL_OP3_403_14929_20141029_222741_outLine +BABEL_OP3_403_15281_20150307_201454_inLine +BABEL_OP3_403_15281_20150307_201454_outLine +BABEL_OP3_403_15322_20150319_184826_inLine +BABEL_OP3_403_15322_20150319_184826_outLine +BABEL_OP3_403_15702_20141202_010728_inLine +BABEL_OP3_403_15702_20141202_010728_outLine +BABEL_OP3_403_16475_20141027_225406_inLine +BABEL_OP3_403_16475_20141027_225406_outLine +BABEL_OP3_403_16749_20141221_021003_inLine +BABEL_OP3_403_16749_20141221_021003_outLine +BABEL_OP3_403_16800_20141216_181241_inLine +BABEL_OP3_403_16800_20141216_181241_outLine +BABEL_OP3_403_16839_20141219_222837_inLine +BABEL_OP3_403_16839_20141219_222837_outLine +BABEL_OP3_403_16924_20141202_004432_inLine +BABEL_OP3_403_16924_20141202_004432_outLine +BABEL_OP3_403_16938_20141117_002438_inLine +BABEL_OP3_403_16938_20141117_002438_outLine +BABEL_OP3_403_17280_20141027_223651_inLine +BABEL_OP3_403_17280_20141027_223651_outLine +BABEL_OP3_403_17496_20141202_004936_inLine +BABEL_OP3_403_17496_20141202_004936_outLine +BABEL_OP3_403_18924_20141117_193058_inLine +BABEL_OP3_403_18924_20141117_193058_outLine +BABEL_OP3_403_18939_20141007_221546_inLine +BABEL_OP3_403_18939_20141007_221546_outLine +BABEL_OP3_403_19688_20150205_181600_inLine +BABEL_OP3_403_19688_20150205_181600_outLine +BABEL_OP3_403_19703_20141024_194336_inLine +BABEL_OP3_403_19703_20141024_194336_outLine +BABEL_OP3_403_19722_20141013_214859_inLine +BABEL_OP3_403_19722_20141013_214859_outLine +BABEL_OP3_403_19749_20150211_210545_inLine +BABEL_OP3_403_19749_20150211_210545_outLine +BABEL_OP3_403_20133_20141001_221247_inLine +BABEL_OP3_403_20133_20141001_221247_outLine +BABEL_OP3_403_20916_20141003_230543_inLine +BABEL_OP3_403_20916_20141003_230543_outLine +BABEL_OP3_403_21004_20141210_214422_inLine +BABEL_OP3_403_21004_20141210_214422_outLine +BABEL_OP3_403_21206_20141004_231905_inLine +BABEL_OP3_403_21206_20141004_231905_outLine +BABEL_OP3_403_21327_20141207_194922_inLine +BABEL_OP3_403_21327_20141207_194922_outLine +BABEL_OP3_403_21892_20141216_205644_inLine +BABEL_OP3_403_21892_20141216_205644_outLine +BABEL_OP3_403_22321_20141012_234503_inLine +BABEL_OP3_403_22321_20141012_234503_outLine +BABEL_OP3_403_22643_20150131_005325_inLine +BABEL_OP3_403_22643_20150131_005325_outLine +BABEL_OP3_403_22918_20150305_185811_inLine +BABEL_OP3_403_22918_20150305_185811_outLine +BABEL_OP3_403_22965_20141005_232023_inLine +BABEL_OP3_403_22965_20141005_232023_outLine +BABEL_OP3_403_23006_20141024_182721_inLine +BABEL_OP3_403_23006_20141024_182721_outLine +BABEL_OP3_403_23046_20141023_002436_inLine +BABEL_OP3_403_23046_20141023_002436_outLine +BABEL_OP3_403_23893_20150311_211416_inLine +BABEL_OP3_403_23893_20150311_211416_outLine +BABEL_OP3_403_24017_20141218_235010_inLine +BABEL_OP3_403_24017_20141218_235010_outLine +BABEL_OP3_403_24290_20150319_170027_inLine +BABEL_OP3_403_24290_20150319_170027_outLine +BABEL_OP3_403_24589_20141023_173937_inLine +BABEL_OP3_403_24589_20141023_173937_outLine +BABEL_OP3_403_25015_20150312_185754_inLine +BABEL_OP3_403_25015_20150312_185754_outLine +BABEL_OP3_403_25242_20150129_211027_inLine +BABEL_OP3_403_25242_20150129_211027_outLine +BABEL_OP3_403_25767_20141007_010749_inLine +BABEL_OP3_403_25767_20141007_010749_outLine +BABEL_OP3_403_25961_20141016_201537_inLine +BABEL_OP3_403_25961_20141016_201537_outLine +BABEL_OP3_403_26072_20150107_183553_inLine +BABEL_OP3_403_26072_20150107_183553_outLine +BABEL_OP3_403_27125_20141005_201825_inLine +BABEL_OP3_403_27125_20141005_201825_outLine +BABEL_OP3_403_27367_20150201_011720_inLine +BABEL_OP3_403_27367_20150201_011720_outLine +BABEL_OP3_403_28190_20141218_232404_inLine +BABEL_OP3_403_28190_20141218_232404_outLine +BABEL_OP3_403_28522_20141129_232715_inLine +BABEL_OP3_403_28522_20141129_232715_outLine +BABEL_OP3_403_28775_20141005_002735_inLine +BABEL_OP3_403_28775_20141005_002735_outLine +BABEL_OP3_403_28814_20141220_183346_inLine +BABEL_OP3_403_28814_20141220_183346_outLine +BABEL_OP3_403_28945_20141006_202723_inLine +BABEL_OP3_403_28945_20141006_202723_outLine +BABEL_OP3_403_29168_20141013_195745_inLine +BABEL_OP3_403_29168_20141013_195745_outLine +BABEL_OP3_403_29323_20150303_223419_inLine +BABEL_OP3_403_29323_20150303_223419_outLine +BABEL_OP3_403_31109_20141107_213704_inLine +BABEL_OP3_403_31109_20141107_213704_outLine +BABEL_OP3_403_31346_20141217_195511_inLine +BABEL_OP3_403_31346_20141217_195511_outLine +BABEL_OP3_403_31490_20141006_210241_inLine +BABEL_OP3_403_31490_20141006_210241_outLine +BABEL_OP3_403_31624_20141014_211203_inLine +BABEL_OP3_403_31624_20141014_211203_outLine +BABEL_OP3_403_31919_20150306_024725_inLine +BABEL_OP3_403_31919_20150306_024725_outLine +BABEL_OP3_403_32301_20141202_224639_inLine +BABEL_OP3_403_32301_20141202_224639_outLine +BABEL_OP3_403_32328_20141218_000510_inLine +BABEL_OP3_403_32328_20141218_000510_outLine +BABEL_OP3_403_32861_20150107_160647_inLine +BABEL_OP3_403_32861_20150107_160647_outLine +BABEL_OP3_403_33840_20141219_200146_inLine +BABEL_OP3_403_33840_20141219_200146_outLine +BABEL_OP3_403_33913_20141211_183339_inLine +BABEL_OP3_403_33913_20141211_183339_outLine +BABEL_OP3_403_34197_20141117_004710_inLine +BABEL_OP3_403_34197_20141117_004710_outLine +BABEL_OP3_403_34477_20141023_231734_inLine +BABEL_OP3_403_34477_20141023_231734_outLine +BABEL_OP3_403_35008_20141204_201241_inLine +BABEL_OP3_403_35008_20141204_201241_outLine +BABEL_OP3_403_35139_20141004_205621_inLine +BABEL_OP3_403_35139_20141004_205621_outLine +BABEL_OP3_403_35143_20141210_230830_inLine +BABEL_OP3_403_35143_20141210_230830_outLine +BABEL_OP3_403_35467_20141004_175833_inLine +BABEL_OP3_403_35467_20141004_175833_outLine +BABEL_OP3_403_35583_20150108_024439_inLine +BABEL_OP3_403_35583_20150108_024439_outLine +BABEL_OP3_403_36293_20141015_192540_inLine +BABEL_OP3_403_36293_20141015_192540_outLine +BABEL_OP3_403_36632_20150206_041325_inLine +BABEL_OP3_403_36632_20150206_041325_outLine +BABEL_OP3_403_36990_20141030_231441_inLine +BABEL_OP3_403_36990_20141030_231441_outLine +BABEL_OP3_403_37228_20150108_000217_inLine +BABEL_OP3_403_37228_20150108_000217_outLine +BABEL_OP3_403_37682_20141028_002656_inLine +BABEL_OP3_403_37682_20141028_002656_outLine +BABEL_OP3_403_37853_20150303_020840_inLine +BABEL_OP3_403_37853_20150303_020840_outLine +BABEL_OP3_403_38588_20141026_220103_inLine +BABEL_OP3_403_38588_20141026_220103_outLine +BABEL_OP3_403_38664_20141028_005703_inLine +BABEL_OP3_403_38664_20141028_005703_outLine +BABEL_OP3_403_38689_20141204_215950_inLine +BABEL_OP3_403_38689_20141204_215950_outLine +BABEL_OP3_403_38741_20141005_205401_inLine +BABEL_OP3_403_38741_20141005_205401_outLine +BABEL_OP3_403_38878_20141121_184540_inLine +BABEL_OP3_403_38878_20141121_184540_outLine +BABEL_OP3_403_39099_20150306_193032_inLine +BABEL_OP3_403_39099_20150306_193032_outLine +BABEL_OP3_403_39307_20141014_234344_inLine +BABEL_OP3_403_39307_20141014_234344_outLine +BABEL_OP3_403_39555_20141217_205213_inLine +BABEL_OP3_403_39555_20141217_205213_outLine +BABEL_OP3_403_41442_20141216_224519_inLine +BABEL_OP3_403_41442_20141216_224519_outLine +BABEL_OP3_403_41958_20141026_202739_inLine +BABEL_OP3_403_41958_20141026_202739_outLine +BABEL_OP3_403_42434_20141026_001223_inLine +BABEL_OP3_403_42434_20141026_001223_outLine +BABEL_OP3_403_42718_20150306_202240_inLine +BABEL_OP3_403_42718_20150306_202240_outLine +BABEL_OP3_403_42771_20141104_215437_inLine +BABEL_OP3_403_42771_20141104_215437_outLine +BABEL_OP3_403_43368_20141026_000458_inLine +BABEL_OP3_403_43368_20141026_000458_outLine +BABEL_OP3_403_43784_20141005_193431_inLine +BABEL_OP3_403_43784_20141005_193431_outLine +BABEL_OP3_403_43788_20141202_235051_inLine +BABEL_OP3_403_43788_20141202_235051_outLine +BABEL_OP3_403_44309_20150305_200025_inLine +BABEL_OP3_403_44309_20150305_200025_outLine +BABEL_OP3_403_44478_20150307_223313_inLine +BABEL_OP3_403_44478_20150307_223313_outLine +BABEL_OP3_403_44868_20141129_220211_inLine +BABEL_OP3_403_44868_20141129_220211_outLine +BABEL_OP3_403_44961_20141006_233622_inLine +BABEL_OP3_403_44961_20141006_233622_outLine +BABEL_OP3_403_44961_20141006_235203_inLine +BABEL_OP3_403_44961_20141006_235203_outLine +BABEL_OP3_403_45486_20150130_235157_inLine +BABEL_OP3_403_45486_20150130_235157_outLine +BABEL_OP3_403_45536_20141219_005329_inLine +BABEL_OP3_403_45536_20141219_005329_outLine +BABEL_OP3_403_46008_20150307_190844_inLine +BABEL_OP3_403_46008_20150307_190844_outLine +BABEL_OP3_403_46041_20141217_222544_inLine +BABEL_OP3_403_46041_20141217_222544_outLine +BABEL_OP3_403_46310_20141012_204940_inLine +BABEL_OP3_403_46310_20141012_204940_outLine +BABEL_OP3_403_46757_20141202_212733_inLine +BABEL_OP3_403_46757_20141202_212733_outLine +BABEL_OP3_403_47215_20141007_230222_inLine +BABEL_OP3_403_47215_20141007_230222_outLine +BABEL_OP3_403_47283_20141005_204650_inLine +BABEL_OP3_403_47283_20141005_204650_outLine +BABEL_OP3_403_47487_20141025_235747_inLine +BABEL_OP3_403_47487_20141025_235747_outLine +BABEL_OP3_403_47866_20150317_213617_inLine +BABEL_OP3_403_47866_20150317_213617_outLine +BABEL_OP3_403_47878_20141118_193135_inLine +BABEL_OP3_403_47878_20141118_193135_outLine +BABEL_OP3_403_48243_20141004_221542_inLine +BABEL_OP3_403_48243_20141004_221542_outLine +BABEL_OP3_403_48610_20141007_225901_inLine +BABEL_OP3_403_48610_20141007_225901_outLine +BABEL_OP3_403_48663_20150306_181741_inLine +BABEL_OP3_403_48663_20150306_181741_outLine +BABEL_OP3_403_48844_20141007_004947_inLine +BABEL_OP3_403_48844_20141007_004947_outLine +BABEL_OP3_403_48844_20141007_011027_inLine +BABEL_OP3_403_48844_20141007_011027_outLine +BABEL_OP3_403_49027_20150307_230828_inLine +BABEL_OP3_403_49027_20150307_230828_outLine +BABEL_OP3_403_49630_20141205_233804_inLine +BABEL_OP3_403_49630_20141205_233804_outLine +BABEL_OP3_403_49768_20141026_000059_inLine +BABEL_OP3_403_49768_20141026_000059_outLine +BABEL_OP3_403_49907_20141005_215057_inLine +BABEL_OP3_403_49907_20141005_215057_outLine +BABEL_OP3_403_50427_20141116_233807_inLine +BABEL_OP3_403_50427_20141116_233807_outLine +BABEL_OP3_403_50549_20150304_014353_inLine +BABEL_OP3_403_50549_20150304_014353_outLine +BABEL_OP3_403_50779_20141118_221929_inLine +BABEL_OP3_403_50779_20141118_221929_outLine +BABEL_OP3_403_50779_20141118_230132_inLine +BABEL_OP3_403_50779_20141118_230132_outLine +BABEL_OP3_403_51955_20141004_212210_inLine +BABEL_OP3_403_51955_20141004_212210_outLine +BABEL_OP3_403_52490_20141016_230923_inLine +BABEL_OP3_403_52490_20141016_230923_outLine +BABEL_OP3_403_52717_20141008_003843_inLine +BABEL_OP3_403_52717_20141008_003843_outLine +BABEL_OP3_403_53063_20141207_192558_inLine +BABEL_OP3_403_53063_20141207_192558_outLine +BABEL_OP3_403_53063_20141207_194007_inLine +BABEL_OP3_403_53063_20141207_194007_outLine +BABEL_OP3_403_53842_20141031_193507_inLine +BABEL_OP3_403_53842_20141031_193507_outLine +BABEL_OP3_403_54104_20141006_230139_inLine +BABEL_OP3_403_54104_20141006_230139_outLine +BABEL_OP3_403_54104_20141006_230643_inLine +BABEL_OP3_403_54104_20141006_230643_outLine +BABEL_OP3_403_54162_20141103_190601_inLine +BABEL_OP3_403_54162_20141103_190601_outLine +BABEL_OP3_403_54477_20141216_200349_inLine +BABEL_OP3_403_54477_20141216_200349_outLine +BABEL_OP3_403_54477_20141216_213534_inLine +BABEL_OP3_403_54477_20141216_213534_outLine +BABEL_OP3_403_54530_20141217_220934_inLine +BABEL_OP3_403_54530_20141217_220934_outLine +BABEL_OP3_403_54594_20150204_003149_inLine +BABEL_OP3_403_54594_20150204_003149_outLine +BABEL_OP3_403_55259_20141025_175845_inLine +BABEL_OP3_403_55259_20141025_175845_outLine +BABEL_OP3_403_55950_20150312_174125_inLine +BABEL_OP3_403_55950_20150312_174125_outLine +BABEL_OP3_403_55968_20141004_005950_inLine +BABEL_OP3_403_55968_20141004_005950_outLine +BABEL_OP3_403_56198_20141005_222956_inLine +BABEL_OP3_403_56198_20141005_222956_outLine +BABEL_OP3_403_56326_20150129_020103_inLine +BABEL_OP3_403_56326_20150129_020103_outLine +BABEL_OP3_403_57093_20141103_221842_inLine +BABEL_OP3_403_57093_20141103_221842_outLine +BABEL_OP3_403_57141_20141215_224302_inLine +BABEL_OP3_403_57141_20141215_224302_outLine +BABEL_OP3_403_57529_20141207_002135_inLine +BABEL_OP3_403_57529_20141207_002135_outLine +BABEL_OP3_403_58047_20141118_184454_inLine +BABEL_OP3_403_58047_20141118_184454_outLine +BABEL_OP3_403_58585_20150106_172737_inLine +BABEL_OP3_403_58585_20150106_172737_outLine +BABEL_OP3_403_59262_20141216_193024_inLine +BABEL_OP3_403_59262_20141216_193024_outLine +BABEL_OP3_403_60115_20141129_235248_inLine +BABEL_OP3_403_60115_20141129_235248_outLine +BABEL_OP3_403_60310_20141217_205059_inLine +BABEL_OP3_403_60310_20141217_205059_outLine +BABEL_OP3_403_60418_20141129_235907_inLine +BABEL_OP3_403_60418_20141129_235907_outLine +BABEL_OP3_403_60650_20150131_013236_inLine +BABEL_OP3_403_60650_20150131_013236_outLine +BABEL_OP3_403_61348_20141103_230857_inLine +BABEL_OP3_403_61348_20141103_230857_outLine +BABEL_OP3_403_61678_20141003_231023_inLine +BABEL_OP3_403_61678_20141003_231023_outLine +BABEL_OP3_403_61731_20141005_201612_inLine +BABEL_OP3_403_61731_20141005_201612_outLine +BABEL_OP3_403_61971_20150307_004145_inLine +BABEL_OP3_403_61971_20150307_004145_outLine +BABEL_OP3_403_62014_20141127_180004_inLine +BABEL_OP3_403_62014_20141127_180004_outLine +BABEL_OP3_403_62734_20141025_192117_inLine +BABEL_OP3_403_62734_20141025_192117_outLine +BABEL_OP3_403_62810_20141016_191619_inLine +BABEL_OP3_403_62810_20141016_191619_outLine +BABEL_OP3_403_63670_20141215_221926_inLine +BABEL_OP3_403_63670_20141215_221926_outLine +BABEL_OP3_403_63787_20141006_214400_inLine +BABEL_OP3_403_63787_20141006_214400_outLine +BABEL_OP3_403_63906_20150305_205105_inLine +BABEL_OP3_403_63906_20150305_205105_outLine +BABEL_OP3_403_65367_20150108_004325_inLine +BABEL_OP3_403_65367_20150108_004325_outLine +BABEL_OP3_403_65640_20150314_163101_inLine +BABEL_OP3_403_65640_20150314_163101_outLine +BABEL_OP3_403_66001_20141007_230508_inLine +BABEL_OP3_403_66001_20141007_230508_outLine +BABEL_OP3_403_66822_20141029_224921_inLine +BABEL_OP3_403_66822_20141029_224921_outLine +BABEL_OP3_403_66916_20141015_215414_inLine +BABEL_OP3_403_66916_20141015_215414_outLine +BABEL_OP3_403_67622_20141014_193846_inLine +BABEL_OP3_403_67622_20141014_193846_outLine +BABEL_OP3_403_67659_20141023_013756_inLine +BABEL_OP3_403_67659_20141023_013756_outLine +BABEL_OP3_403_68384_20141216_000507_inLine +BABEL_OP3_403_68384_20141216_000507_outLine +BABEL_OP3_403_68748_20141130_014650_inLine +BABEL_OP3_403_68748_20141130_014650_outLine +BABEL_OP3_403_68854_20150306_195508_inLine +BABEL_OP3_403_68854_20150306_195508_outLine +BABEL_OP3_403_69096_20150309_190140_inLine +BABEL_OP3_403_69096_20150309_190140_outLine +BABEL_OP3_403_69107_20141106_000151_inLine +BABEL_OP3_403_69107_20141106_000151_outLine +BABEL_OP3_403_69746_20141220_191513_inLine +BABEL_OP3_403_69746_20141220_191513_outLine +BABEL_OP3_403_70121_20141026_225432_inLine +BABEL_OP3_403_70121_20141026_225432_outLine +BABEL_OP3_403_70216_20150128_234110_inLine +BABEL_OP3_403_70216_20150128_234110_outLine +BABEL_OP3_403_70257_20150204_032020_inLine +BABEL_OP3_403_70257_20150204_032020_outLine +BABEL_OP3_403_70343_20141205_225856_inLine +BABEL_OP3_403_70343_20141205_225856_outLine +BABEL_OP3_403_71047_20150106_190413_inLine +BABEL_OP3_403_71047_20150106_190413_outLine +BABEL_OP3_403_72007_20141205_002010_inLine +BABEL_OP3_403_72007_20141205_002010_outLine +BABEL_OP3_403_72040_20141006_004959_inLine +BABEL_OP3_403_72040_20141006_004959_outLine +BABEL_OP3_403_72110_20141210_212045_inLine +BABEL_OP3_403_72110_20141210_212045_outLine +BABEL_OP3_403_72844_20141004_005248_inLine +BABEL_OP3_403_72844_20141004_005248_outLine +BABEL_OP3_403_73430_20141205_233006_inLine +BABEL_OP3_403_73430_20141205_233006_outLine +BABEL_OP3_403_73591_20140930_234521_inLine +BABEL_OP3_403_73591_20140930_234521_outLine +BABEL_OP3_403_74728_20150312_182026_inLine +BABEL_OP3_403_74728_20150312_182026_outLine +BABEL_OP3_403_75064_20141022_225629_inLine +BABEL_OP3_403_75064_20141022_225629_outLine +BABEL_OP3_403_75223_20141016_194054_inLine +BABEL_OP3_403_75223_20141016_194054_outLine +BABEL_OP3_403_76499_20141103_232220_inLine +BABEL_OP3_403_76499_20141103_232220_outLine +BABEL_OP3_403_77427_20141027_223134_inLine +BABEL_OP3_403_77427_20141027_223134_outLine +BABEL_OP3_403_77974_20150312_200046_inLine +BABEL_OP3_403_77974_20150312_200046_outLine +BABEL_OP3_403_77990_20141004_201020_inLine +BABEL_OP3_403_77990_20141004_201020_outLine +BABEL_OP3_403_78116_20141208_213333_inLine +BABEL_OP3_403_78116_20141208_213333_outLine +BABEL_OP3_403_78116_20141208_214155_inLine +BABEL_OP3_403_78116_20141208_214155_outLine +BABEL_OP3_403_78254_20141024_234037_inLine +BABEL_OP3_403_78254_20141024_234037_outLine +BABEL_OP3_403_78360_20150107_231519_inLine +BABEL_OP3_403_78360_20150107_231519_outLine +BABEL_OP3_403_78544_20141201_192016_inLine +BABEL_OP3_403_78544_20141201_192016_outLine +BABEL_OP3_403_78604_20141006_193457_inLine +BABEL_OP3_403_78604_20141006_193457_outLine +BABEL_OP3_403_78833_20150205_204459_inLine +BABEL_OP3_403_78833_20150205_204459_outLine +BABEL_OP3_403_80439_20141023_195331_inLine +BABEL_OP3_403_80439_20141023_195331_outLine +BABEL_OP3_403_80781_20141026_214157_inLine +BABEL_OP3_403_80781_20141026_214157_outLine +BABEL_OP3_403_81149_20150313_000213_inLine +BABEL_OP3_403_81149_20150313_000213_outLine +BABEL_OP3_403_81213_20141004_213211_inLine +BABEL_OP3_403_81213_20141004_213211_outLine +BABEL_OP3_403_82391_20141206_001207_inLine +BABEL_OP3_403_82391_20141206_001207_outLine +BABEL_OP3_403_82425_20141007_231028_inLine +BABEL_OP3_403_82425_20141007_231028_outLine +BABEL_OP3_403_83238_20141107_233257_inLine +BABEL_OP3_403_83238_20141107_233257_outLine +BABEL_OP3_403_83436_20141012_221126_inLine +BABEL_OP3_403_83436_20141012_221126_outLine +BABEL_OP3_403_83455_20141103_225146_inLine +BABEL_OP3_403_83455_20141103_225146_outLine +BABEL_OP3_403_83651_20141005_194737_inLine +BABEL_OP3_403_83651_20141005_194737_outLine +BABEL_OP3_403_84194_20150204_213858_inLine +BABEL_OP3_403_84194_20150204_213858_outLine +BABEL_OP3_403_84458_20141208_005012_inLine +BABEL_OP3_403_84458_20141208_005012_outLine +BABEL_OP3_403_84469_20141211_002526_inLine +BABEL_OP3_403_84469_20141211_002526_outLine +BABEL_OP3_403_84547_20141013_223556_inLine +BABEL_OP3_403_84547_20141013_223556_outLine +BABEL_OP3_403_84605_20141005_214529_inLine +BABEL_OP3_403_84605_20141005_214529_outLine +BABEL_OP3_403_84737_20150303_195506_inLine +BABEL_OP3_403_84737_20150303_195506_outLine +BABEL_OP3_403_85647_20141103_192225_inLine +BABEL_OP3_403_85647_20141103_192225_outLine +BABEL_OP3_403_86845_20150201_015753_inLine +BABEL_OP3_403_86845_20150201_015753_outLine +BABEL_OP3_403_87889_20150107_001827_inLine +BABEL_OP3_403_87889_20150107_001827_outLine +BABEL_OP3_403_87921_20141210_233414_inLine +BABEL_OP3_403_87921_20141210_233414_outLine +BABEL_OP3_403_88260_20141029_205951_inLine +BABEL_OP3_403_88260_20141029_205951_outLine +BABEL_OP3_403_88812_20150307_181013_inLine +BABEL_OP3_403_88812_20150307_181013_outLine +BABEL_OP3_403_89059_20141220_191342_inLine +BABEL_OP3_403_89059_20141220_191342_outLine +BABEL_OP3_403_89358_20141030_231758_inLine +BABEL_OP3_403_89358_20141030_231758_outLine +BABEL_OP3_403_90709_20141007_234900_inLine +BABEL_OP3_403_90709_20141007_234900_outLine +BABEL_OP3_403_90739_20141028_224009_inLine +BABEL_OP3_403_90739_20141028_224009_outLine +BABEL_OP3_403_92509_20141014_232528_inLine +BABEL_OP3_403_92509_20141014_232528_outLine +BABEL_OP3_403_92527_20141026_192704_inLine +BABEL_OP3_403_92527_20141026_192704_outLine +BABEL_OP3_403_92740_20141130_011740_inLine +BABEL_OP3_403_92740_20141130_011740_outLine +BABEL_OP3_403_94409_20141028_214356_inLine +BABEL_OP3_403_94409_20141028_214356_outLine +BABEL_OP3_403_94449_20150309_193606_inLine +BABEL_OP3_403_94449_20150309_193606_outLine +BABEL_OP3_403_94487_20150312_014136_inLine +BABEL_OP3_403_94487_20150312_014136_outLine +BABEL_OP3_403_95269_20141026_235206_inLine +BABEL_OP3_403_95269_20141026_235206_outLine +BABEL_OP3_403_95583_20141013_184937_inLine +BABEL_OP3_403_95583_20141013_184937_outLine +BABEL_OP3_403_95670_20141016_214958_inLine +BABEL_OP3_403_95670_20141016_214958_outLine +BABEL_OP3_403_96324_20141014_194024_inLine +BABEL_OP3_403_96324_20141014_194024_outLine +BABEL_OP3_403_96525_20141217_223842_inLine +BABEL_OP3_403_96525_20141217_223842_outLine +BABEL_OP3_403_96910_20141024_195822_inLine +BABEL_OP3_403_96910_20141024_195822_outLine +BABEL_OP3_403_97376_20141206_215930_inLine +BABEL_OP3_403_97376_20141206_215930_outLine +BABEL_OP3_403_97588_20141015_193851_inLine +BABEL_OP3_403_97588_20141015_193851_outLine +BABEL_OP3_403_97772_20141003_213919_inLine +BABEL_OP3_403_97772_20141003_213919_outLine +BABEL_OP3_403_98311_20141005_195843_inLine +BABEL_OP3_403_98311_20141005_195843_outLine +BABEL_OP3_403_98506_20150319_151741_inLine +BABEL_OP3_403_98506_20150319_151741_outLine +BABEL_OP3_403_99202_20141108_002737_inLine +BABEL_OP3_403_99202_20141108_002737_outLine +BABEL_OP3_403_99955_20150107_213836_inLine +BABEL_OP3_403_99955_20150107_213836_outLine diff --git a/egs/babel/s5d/conf/lists/403-dholuo/untranscribed-training.list b/egs/babel/s5d/conf/lists/403-dholuo/untranscribed-training.list new file mode 100644 index 00000000000..21ec3e2d9b4 --- /dev/null +++ b/egs/babel/s5d/conf/lists/403-dholuo/untranscribed-training.list @@ -0,0 +1,533 @@ +BABEL_OP3_403_10319_20141014_223750_inLine +BABEL_OP3_403_10319_20141014_223750_outLine +BABEL_OP3_403_10901_20141103_193402_inLine +BABEL_OP3_403_10901_20141103_193402_outLine +BABEL_OP3_403_10974_20141107_215600_inLine +BABEL_OP3_403_10974_20141107_215600_outLine +BABEL_OP3_403_11673_20141013_203235_inLine +BABEL_OP3_403_11673_20141013_203235_outLine +BABEL_OP3_403_12767_20141006_233130_inLine +BABEL_OP3_403_12767_20141006_233130_outLine +BABEL_OP3_403_13561_20141106_192514_inLine +BABEL_OP3_403_13561_20141106_192514_outLine +BABEL_OP3_403_13664_20141002_224345_inLine +BABEL_OP3_403_13664_20141002_224345_outLine +BABEL_OP3_403_14028_20150407_193852_inLine +BABEL_OP3_403_14028_20150407_193852_outLine +BABEL_OP3_403_14158_20141201_232657_inLine +BABEL_OP3_403_14179_20141129_222039_inLine +BABEL_OP3_403_14179_20141129_222039_outLine +BABEL_OP3_403_14719_20141215_230523_inLine +BABEL_OP3_403_14719_20141215_230523_outLine +BABEL_OP3_403_14723_20150205_004549_inLine +BABEL_OP3_403_14723_20150205_004549_outLine +BABEL_OP3_403_14875_20141005_213932_inLine +BABEL_OP3_403_14875_20141005_213932_outLine +BABEL_OP3_403_14972_20141116_221959_inLine +BABEL_OP3_403_14972_20141116_221959_outLine +BABEL_OP3_403_15216_20141219_215848_inLine +BABEL_OP3_403_15216_20141219_215848_outLine +BABEL_OP3_403_15382_20141127_202406_inLine +BABEL_OP3_403_15382_20141127_202406_outLine +BABEL_OP3_403_15749_20141218_230749_inLine +BABEL_OP3_403_15749_20141218_230749_outLine +BABEL_OP3_403_16787_20141029_182118_inLine +BABEL_OP3_403_16787_20141029_182118_outLine +BABEL_OP3_403_17472_20141210_175708_inLine +BABEL_OP3_403_17472_20141210_175708_outLine +BABEL_OP3_403_17472_20141210_180354_inLine +BABEL_OP3_403_17472_20141210_180354_outLine +BABEL_OP3_403_17615_20141201_004945_inLine +BABEL_OP3_403_17615_20141201_004945_outLine +BABEL_OP3_403_17890_20141203_003513_inLine +BABEL_OP3_403_17890_20141203_003513_outLine +BABEL_OP3_403_18037_20150418_210458_inLine +BABEL_OP3_403_18037_20150418_210458_outLine +BABEL_OP3_403_18380_20141031_010759_inLine +BABEL_OP3_403_18380_20141031_010759_outLine +BABEL_OP3_403_19101_20141106_010120_inLine +BABEL_OP3_403_19101_20141106_010120_outLine +BABEL_OP3_403_19134_20141120_182846_inLine +BABEL_OP3_403_19134_20141120_182846_outLine +BABEL_OP3_403_19672_20141130_215944_outLine +BABEL_OP3_403_20330_20150304_212548_inLine +BABEL_OP3_403_20330_20150304_212548_outLine +BABEL_OP3_403_20724_20150414_221749_inLine +BABEL_OP3_403_20724_20150414_221749_outLine +BABEL_OP3_403_20768_20141215_213303_inLine +BABEL_OP3_403_20768_20141215_213303_outLine +BABEL_OP3_403_22641_20141016_232215_inLine +BABEL_OP3_403_22641_20141016_232215_outLine +BABEL_OP3_403_23098_20150413_232746_inLine +BABEL_OP3_403_23098_20150413_232746_outLine +BABEL_OP3_403_23395_20141121_205256_outLine +BABEL_OP3_403_23505_20141008_003349_inLine +BABEL_OP3_403_23505_20141008_003349_outLine +BABEL_OP3_403_23628_20141023_001612_outLine +BABEL_OP3_403_23980_20141029_203114_inLine +BABEL_OP3_403_23980_20141029_203114_outLine +BABEL_OP3_403_24239_20150310_212947_inLine +BABEL_OP3_403_24239_20150310_212947_outLine +BABEL_OP3_403_24239_20150310_213506_inLine +BABEL_OP3_403_24239_20150310_213506_outLine +BABEL_OP3_403_24239_20150310_214027_inLine +BABEL_OP3_403_24239_20150310_214027_outLine +BABEL_OP3_403_24270_20141118_203034_inLine +BABEL_OP3_403_24270_20141118_203034_outLine +BABEL_OP3_403_25719_20141220_201504_inLine +BABEL_OP3_403_25719_20141220_201504_outLine +BABEL_OP3_403_25895_20150413_214536_inLine +BABEL_OP3_403_25895_20150413_214536_outLine +BABEL_OP3_403_26836_20141005_221645_inLine +BABEL_OP3_403_26836_20141005_221645_outLine +BABEL_OP3_403_26869_20150416_200345_inLine +BABEL_OP3_403_26869_20150416_201016_inLine +BABEL_OP3_403_27046_20150406_230902_inLine +BABEL_OP3_403_27046_20150406_230902_outLine +BABEL_OP3_403_27082_20141103_211126_inLine +BABEL_OP3_403_27082_20141103_211126_outLine +BABEL_OP3_403_28422_20141130_174755_inLine +BABEL_OP3_403_28422_20141130_174755_outLine +BABEL_OP3_403_28538_20141031_223359_inLine +BABEL_OP3_403_28538_20141031_223359_outLine +BABEL_OP3_403_29072_20141202_234748_inLine +BABEL_OP3_403_29072_20141202_234748_outLine +BABEL_OP3_403_29663_20150415_221129_inLine +BABEL_OP3_403_29663_20150415_221129_outLine +BABEL_OP3_403_29663_20150415_221719_inLine +BABEL_OP3_403_29663_20150415_221719_outLine +BABEL_OP3_403_29685_20141026_200131_inLine +BABEL_OP3_403_29685_20141026_200131_outLine +BABEL_OP3_403_30013_20141203_194552_inLine +BABEL_OP3_403_30013_20141203_194552_outLine +BABEL_OP3_403_30426_20150412_233250_inLine +BABEL_OP3_403_30426_20150412_233250_outLine +BABEL_OP3_403_30645_20141006_230955_inLine +BABEL_OP3_403_30645_20141006_230955_outLine +BABEL_OP3_403_30653_20150318_184710_inLine +BABEL_OP3_403_30653_20150318_184710_outLine +BABEL_OP3_403_30869_20141220_202310_inLine +BABEL_OP3_403_30869_20141220_202310_outLine +BABEL_OP3_403_31182_20141218_211632_outLine +BABEL_OP3_403_31628_20141201_204314_inLine +BABEL_OP3_403_31628_20141201_204314_outLine +BABEL_OP3_403_32169_20150415_234749_inLine +BABEL_OP3_403_32169_20150415_234749_outLine +BABEL_OP3_403_32914_20141219_204204_inLine +BABEL_OP3_403_32914_20141219_204204_outLine +BABEL_OP3_403_33704_20141216_231752_inLine +BABEL_OP3_403_33704_20141216_231752_outLine +BABEL_OP3_403_33933_20150426_001110_inLine +BABEL_OP3_403_33933_20150426_001110_outLine +BABEL_OP3_403_34145_20141129_000102_inLine +BABEL_OP3_403_34145_20141129_000102_outLine +BABEL_OP3_403_34328_20141031_214721_inLine +BABEL_OP3_403_34328_20141031_214721_outLine +BABEL_OP3_403_34903_20141127_193345_inLine +BABEL_OP3_403_34903_20141127_193345_outLine +BABEL_OP3_403_35202_20141202_231605_inLine +BABEL_OP3_403_35202_20141202_231605_outLine +BABEL_OP3_403_36894_20141014_211920_inLine +BABEL_OP3_403_36894_20141014_211920_outLine +BABEL_OP3_403_37064_20141005_205701_inLine +BABEL_OP3_403_37064_20141005_205701_outLine +BABEL_OP3_403_37271_20141217_183739_inLine +BABEL_OP3_403_37271_20141217_183739_outLine +BABEL_OP3_403_38076_20141128_204027_inLine +BABEL_OP3_403_38076_20141128_204027_outLine +BABEL_OP3_403_38431_20141215_205449_inLine +BABEL_OP3_403_38431_20141215_205449_outLine +BABEL_OP3_403_38554_20141003_222444_outLine +BABEL_OP3_403_39426_20150304_222409_inLine +BABEL_OP3_403_39426_20150304_222409_outLine +BABEL_OP3_403_39426_20150304_223342_inLine +BABEL_OP3_403_39426_20150304_223342_outLine +BABEL_OP3_403_39744_20141013_213656_inLine +BABEL_OP3_403_39744_20141013_213656_outLine +BABEL_OP3_403_40565_20141203_003932_inLine +BABEL_OP3_403_40565_20141203_003932_outLine +BABEL_OP3_403_40740_20141220_223558_inLine +BABEL_OP3_403_40740_20141220_223558_outLine +BABEL_OP3_403_41038_20141201_222108_inLine +BABEL_OP3_403_41038_20141201_222108_outLine +BABEL_OP3_403_41174_20141028_231225_inLine +BABEL_OP3_403_41174_20141028_231225_outLine +BABEL_OP3_403_41272_20150312_024107_inLine +BABEL_OP3_403_41272_20150312_024107_outLine +BABEL_OP3_403_41493_20141006_010651_inLine +BABEL_OP3_403_41493_20141006_010651_outLine +BABEL_OP3_403_41745_20141027_234835_inLine +BABEL_OP3_403_41745_20141027_234835_outLine +BABEL_OP3_403_42155_20141116_204154_inLine +BABEL_OP3_403_42155_20141116_204154_outLine +BABEL_OP3_403_42526_20150106_224056_inLine +BABEL_OP3_403_42526_20150106_224056_outLine +BABEL_OP3_403_42834_20141128_220047_inLine +BABEL_OP3_403_42834_20141128_220047_outLine +BABEL_OP3_403_42942_20141030_235147_inLine +BABEL_OP3_403_42942_20141030_235147_outLine +BABEL_OP3_403_44255_20150305_013502_inLine +BABEL_OP3_403_44347_20141219_190407_inLine +BABEL_OP3_403_44347_20141219_190407_outLine +BABEL_OP3_403_44420_20141023_214836_inLine +BABEL_OP3_403_44420_20141023_214836_outLine +BABEL_OP3_403_44477_20141201_010216_inLine +BABEL_OP3_403_44477_20141201_010216_outLine +BABEL_OP3_403_44709_20141201_213014_inLine +BABEL_OP3_403_44709_20141201_213014_outLine +BABEL_OP3_403_45106_20141120_182301_inLine +BABEL_OP3_403_45106_20141120_182301_outLine +BABEL_OP3_403_46169_20141217_192351_inLine +BABEL_OP3_403_46169_20141217_192351_outLine +BABEL_OP3_403_46333_20141014_002918_inLine +BABEL_OP3_403_46333_20141014_002918_outLine +BABEL_OP3_403_46702_20141003_234833_inLine +BABEL_OP3_403_46702_20141003_234833_outLine +BABEL_OP3_403_46712_20141023_221319_inLine +BABEL_OP3_403_46712_20141023_221319_outLine +BABEL_OP3_403_46763_20150318_203035_inLine +BABEL_OP3_403_46763_20150318_203035_outLine +BABEL_OP3_403_47959_20141024_223125_inLine +BABEL_OP3_403_47959_20141024_223125_outLine +BABEL_OP3_403_49001_20141006_010425_inLine +BABEL_OP3_403_49001_20141006_010425_outLine +BABEL_OP3_403_49641_20150402_214738_outLine +BABEL_OP3_403_49775_20141004_211924_inLine +BABEL_OP3_403_49775_20141004_211924_outLine +BABEL_OP3_403_50962_20141005_192714_outLine +BABEL_OP3_403_51156_20150416_171911_inLine +BABEL_OP3_403_51156_20150416_171911_outLine +BABEL_OP3_403_51417_20141220_002122_inLine +BABEL_OP3_403_51417_20141220_002122_outLine +BABEL_OP3_403_51540_20141219_215956_inLine +BABEL_OP3_403_51540_20141219_215956_outLine +BABEL_OP3_403_51968_20141028_223645_inLine +BABEL_OP3_403_51968_20141028_223645_outLine +BABEL_OP3_403_52442_20141105_011029_outLine +BABEL_OP3_403_52818_20141203_184905_inLine +BABEL_OP3_403_52818_20141203_184905_outLine +BABEL_OP3_403_53010_20150418_185722_inLine +BABEL_OP3_403_53010_20150418_185722_outLine +BABEL_OP3_403_53068_20150426_230124_inLine +BABEL_OP3_403_53068_20150426_230124_outLine +BABEL_OP3_403_53144_20150319_193813_inLine +BABEL_OP3_403_53144_20150319_193813_outLine +BABEL_OP3_403_54040_20141219_003109_inLine +BABEL_OP3_403_54040_20141219_003109_outLine +BABEL_OP3_403_54390_20141006_214754_inLine +BABEL_OP3_403_54390_20141006_214754_outLine +BABEL_OP3_403_54697_20141215_211116_inLine +BABEL_OP3_403_54697_20141215_211116_outLine +BABEL_OP3_403_54953_20141027_223433_inLine +BABEL_OP3_403_54953_20141027_223433_outLine +BABEL_OP3_403_55042_20150331_225750_inLine +BABEL_OP3_403_55042_20150331_225750_outLine +BABEL_OP3_403_55381_20141218_191630_inLine +BABEL_OP3_403_55381_20141218_191630_outLine +BABEL_OP3_403_55742_20141004_201921_inLine +BABEL_OP3_403_55742_20141004_204835_inLine +BABEL_OP3_403_55818_20141006_220912_inLine +BABEL_OP3_403_55818_20141006_220912_outLine +BABEL_OP3_403_56370_20141014_185314_inLine +BABEL_OP3_403_56370_20141014_185314_outLine +BABEL_OP3_403_56677_20141208_200823_inLine +BABEL_OP3_403_56677_20141208_200823_outLine +BABEL_OP3_403_56826_20141217_233213_inLine +BABEL_OP3_403_56826_20141217_233213_outLine +BABEL_OP3_403_57919_20150418_200246_inLine +BABEL_OP3_403_57919_20150418_200246_outLine +BABEL_OP3_403_57919_20150418_201847_inLine +BABEL_OP3_403_57919_20150418_201847_outLine +BABEL_OP3_403_58107_20141106_011114_inLine +BABEL_OP3_403_58107_20141106_011114_outLine +BABEL_OP3_403_58145_20141120_190441_inLine +BABEL_OP3_403_58145_20141120_190441_outLine +BABEL_OP3_403_58636_20150426_202602_inLine +BABEL_OP3_403_58636_20150426_202602_outLine +BABEL_OP3_403_58717_20141104_223801_inLine +BABEL_OP3_403_58717_20141104_223801_outLine +BABEL_OP3_403_59301_20141220_230943_inLine +BABEL_OP3_403_59301_20141220_230943_outLine +BABEL_OP3_403_59549_20141004_234422_inLine +BABEL_OP3_403_59549_20141004_234422_outLine +BABEL_OP3_403_59747_20141014_191829_inLine +BABEL_OP3_403_59747_20141014_191829_outLine +BABEL_OP3_403_59864_20150306_210405_inLine +BABEL_OP3_403_59864_20150306_210405_outLine +BABEL_OP3_403_59993_20141005_224301_inLine +BABEL_OP3_403_59993_20141005_224301_outLine +BABEL_OP3_403_59993_20141005_225220_inLine +BABEL_OP3_403_59993_20141005_225220_outLine +BABEL_OP3_403_59993_20141005_230254_inLine +BABEL_OP3_403_59993_20141005_230254_outLine +BABEL_OP3_403_60026_20141006_002312_inLine +BABEL_OP3_403_60026_20141006_002312_outLine +BABEL_OP3_403_61011_20141013_233414_inLine +BABEL_OP3_403_61011_20141013_233414_outLine +BABEL_OP3_403_61040_20141217_000352_inLine +BABEL_OP3_403_61040_20141217_000352_outLine +BABEL_OP3_403_61167_20141026_233020_inLine +BABEL_OP3_403_61167_20141026_233020_outLine +BABEL_OP3_403_61435_20141216_223049_inLine +BABEL_OP3_403_61435_20141216_223049_outLine +BABEL_OP3_403_61963_20150106_232605_outLine +BABEL_OP3_403_62155_20150318_181046_inLine +BABEL_OP3_403_62155_20150318_181046_outLine +BABEL_OP3_403_62177_20150303_192318_inLine +BABEL_OP3_403_62177_20150303_192318_outLine +BABEL_OP3_403_62177_20150303_192933_inLine +BABEL_OP3_403_62177_20150303_192933_outLine +BABEL_OP3_403_62835_20141031_215252_inLine +BABEL_OP3_403_62976_20141120_211316_inLine +BABEL_OP3_403_62976_20141120_211316_outLine +BABEL_OP3_403_63220_20141128_003242_inLine +BABEL_OP3_403_63220_20141128_003242_outLine +BABEL_OP3_403_63265_20150416_213544_inLine +BABEL_OP3_403_63265_20150416_214859_inLine +BABEL_OP3_403_63307_20141116_205038_inLine +BABEL_OP3_403_63307_20141116_205038_outLine +BABEL_OP3_403_63484_20150413_210246_inLine +BABEL_OP3_403_63484_20150413_210246_outLine +BABEL_OP3_403_63757_20141118_001039_inLine +BABEL_OP3_403_63757_20141118_001039_outLine +BABEL_OP3_403_63920_20150413_175014_inLine +BABEL_OP3_403_63920_20150413_175014_outLine +BABEL_OP3_403_64635_20150418_171656_inLine +BABEL_OP3_403_64635_20150418_171656_outLine +BABEL_OP3_403_64638_20141130_205142_inLine +BABEL_OP3_403_64638_20141130_205142_outLine +BABEL_OP3_403_64688_20150327_215407_inLine +BABEL_OP3_403_64688_20150327_215407_outLine +BABEL_OP3_403_64759_20141012_211953_inLine +BABEL_OP3_403_64759_20141012_211953_outLine +BABEL_OP3_403_65064_20141120_180442_inLine +BABEL_OP3_403_65064_20141120_180442_outLine +BABEL_OP3_403_65561_20141206_000223_inLine +BABEL_OP3_403_65561_20141206_000223_outLine +BABEL_OP3_403_66959_20141211_191140_inLine +BABEL_OP3_403_66959_20141211_191140_outLine +BABEL_OP3_403_66967_20141016_233136_inLine +BABEL_OP3_403_66967_20141016_233136_outLine +BABEL_OP3_403_67373_20141010_191456_inLine +BABEL_OP3_403_67373_20141010_191456_outLine +BABEL_OP3_403_67401_20141118_192332_inLine +BABEL_OP3_403_67401_20141118_192332_outLine +BABEL_OP3_403_68823_20150416_201411_inLine +BABEL_OP3_403_68823_20150416_201411_outLine +BABEL_OP3_403_69153_20141207_201546_inLine +BABEL_OP3_403_69153_20141207_201546_outLine +BABEL_OP3_403_69153_20141207_202942_inLine +BABEL_OP3_403_69153_20141207_202942_outLine +BABEL_OP3_403_69474_20141204_202057_inLine +BABEL_OP3_403_69474_20141204_202057_outLine +BABEL_OP3_403_69992_20141006_215605_inLine +BABEL_OP3_403_69992_20141006_215605_outLine +BABEL_OP3_403_70526_20150317_192457_inLine +BABEL_OP3_403_70526_20150317_192457_outLine +BABEL_OP3_403_71038_20150106_205857_inLine +BABEL_OP3_403_71038_20150106_205857_outLine +BABEL_OP3_403_71067_20141120_212952_inLine +BABEL_OP3_403_71067_20141120_212952_outLine +BABEL_OP3_403_71067_20141120_214426_inLine +BABEL_OP3_403_71067_20141120_214426_outLine +BABEL_OP3_403_71189_20150318_162559_inLine +BABEL_OP3_403_71189_20150318_162559_outLine +BABEL_OP3_403_71263_20141120_195808_inLine +BABEL_OP3_403_71263_20141120_195808_outLine +BABEL_OP3_403_71263_20141120_200524_inLine +BABEL_OP3_403_71263_20141120_200524_outLine +BABEL_OP3_403_71263_20141120_201201_inLine +BABEL_OP3_403_71263_20141120_201201_outLine +BABEL_OP3_403_71419_20150130_163036_inLine +BABEL_OP3_403_71419_20150130_163036_outLine +BABEL_OP3_403_71419_20150130_170259_inLine +BABEL_OP3_403_71419_20150130_170259_outLine +BABEL_OP3_403_71850_20150317_201433_inLine +BABEL_OP3_403_71850_20150317_201433_outLine +BABEL_OP3_403_71850_20150317_204336_inLine +BABEL_OP3_403_71850_20150317_204336_outLine +BABEL_OP3_403_72587_20141127_221927_inLine +BABEL_OP3_403_72587_20141127_221927_outLine +BABEL_OP3_403_72587_20141127_222705_inLine +BABEL_OP3_403_72587_20141127_222705_outLine +BABEL_OP3_403_73446_20150317_233038_inLine +BABEL_OP3_403_73446_20150317_233038_outLine +BABEL_OP3_403_73757_20141103_184243_inLine +BABEL_OP3_403_74121_20141029_192619_inLine +BABEL_OP3_403_74121_20141029_192619_outLine +BABEL_OP3_403_74455_20150304_010648_inLine +BABEL_OP3_403_74455_20150304_010648_outLine +BABEL_OP3_403_74763_20150412_222934_inLine +BABEL_OP3_403_74763_20150412_222934_outLine +BABEL_OP3_403_75465_20141216_203010_inLine +BABEL_OP3_403_75764_20150106_010413_inLine +BABEL_OP3_403_75764_20150106_010413_outLine +BABEL_OP3_403_76238_20141207_205931_inLine +BABEL_OP3_403_76238_20141207_205931_outLine +BABEL_OP3_403_76238_20141207_211123_inLine +BABEL_OP3_403_76238_20141207_211123_outLine +BABEL_OP3_403_76756_20141121_192227_inLine +BABEL_OP3_403_76756_20141121_192227_outLine +BABEL_OP3_403_77146_20141013_203551_inLine +BABEL_OP3_403_77146_20141013_203551_outLine +BABEL_OP3_403_77391_20141025_014416_inLine +BABEL_OP3_403_77391_20141025_014416_outLine +BABEL_OP3_403_77803_20141013_223521_inLine +BABEL_OP3_403_77803_20141013_223521_outLine +BABEL_OP3_403_77904_20150426_181110_inLine +BABEL_OP3_403_77904_20150426_181110_outLine +BABEL_OP3_403_77909_20150330_191417_inLine +BABEL_OP3_403_77909_20150330_191417_outLine +BABEL_OP3_403_78609_20141217_215450_inLine +BABEL_OP3_403_78609_20141217_215450_outLine +BABEL_OP3_403_78743_20141216_183731_inLine +BABEL_OP3_403_78743_20141216_183731_outLine +BABEL_OP3_403_78976_20141025_002547_inLine +BABEL_OP3_403_78976_20141025_002547_outLine +BABEL_OP3_403_79045_20141219_213058_inLine +BABEL_OP3_403_79045_20141219_213058_outLine +BABEL_OP3_403_79129_20141117_210821_inLine +BABEL_OP3_403_79129_20141117_210821_outLine +BABEL_OP3_403_79139_20141103_204223_inLine +BABEL_OP3_403_79139_20141103_204223_outLine +BABEL_OP3_403_80881_20141016_231419_inLine +BABEL_OP3_403_80881_20141016_231419_outLine +BABEL_OP3_403_80897_20141118_205921_inLine +BABEL_OP3_403_81392_20141202_223505_inLine +BABEL_OP3_403_81392_20141202_223505_outLine +BABEL_OP3_403_81553_20150108_011830_inLine +BABEL_OP3_403_81553_20150108_011830_outLine +BABEL_OP3_403_81971_20141013_202229_inLine +BABEL_OP3_403_81971_20141013_202229_outLine +BABEL_OP3_403_82089_20141103_180402_inLine +BABEL_OP3_403_82089_20141103_180402_outLine +BABEL_OP3_403_82138_20141103_203306_inLine +BABEL_OP3_403_82138_20141103_203306_outLine +BABEL_OP3_403_82140_20141103_203606_inLine +BABEL_OP3_403_82140_20141103_203606_outLine +BABEL_OP3_403_82224_20141221_020512_inLine +BABEL_OP3_403_82224_20141221_020512_outLine +BABEL_OP3_403_82361_20150313_215812_inLine +BABEL_OP3_403_82361_20150313_215812_outLine +BABEL_OP3_403_82637_20141013_202558_inLine +BABEL_OP3_403_82637_20141013_202558_outLine +BABEL_OP3_403_82742_20141217_192623_inLine +BABEL_OP3_403_82742_20141217_192623_outLine +BABEL_OP3_403_82742_20141217_193955_inLine +BABEL_OP3_403_82742_20141217_193955_outLine +BABEL_OP3_403_82935_20141220_194756_inLine +BABEL_OP3_403_82935_20141220_194756_outLine +BABEL_OP3_403_83783_20141117_201033_inLine +BABEL_OP3_403_83783_20141117_201033_outLine +BABEL_OP3_403_83813_20150201_234438_inLine +BABEL_OP3_403_83813_20150201_234438_outLine +BABEL_OP3_403_83929_20140926_001811_inLine +BABEL_OP3_403_83929_20140926_001811_outLine +BABEL_OP3_403_83935_20141205_002539_inLine +BABEL_OP3_403_83935_20141205_002539_outLine +BABEL_OP3_403_83935_20141205_223342_inLine +BABEL_OP3_403_83935_20141205_223342_outLine +BABEL_OP3_403_84061_20141027_225533_inLine +BABEL_OP3_403_84061_20141027_225533_outLine +BABEL_OP3_403_84125_20141005_234430_inLine +BABEL_OP3_403_84125_20141005_234430_outLine +BABEL_OP3_403_84408_20141026_210154_inLine +BABEL_OP3_403_84408_20141026_210154_outLine +BABEL_OP3_403_84936_20141127_181420_inLine +BABEL_OP3_403_84936_20141127_181420_outLine +BABEL_OP3_403_85047_20141031_202048_inLine +BABEL_OP3_403_85047_20141031_202048_outLine +BABEL_OP3_403_85322_20141006_225220_inLine +BABEL_OP3_403_85322_20141006_225220_outLine +BABEL_OP3_403_85340_20141005_204959_inLine +BABEL_OP3_403_85340_20141005_204959_outLine +BABEL_OP3_403_86321_20141208_193101_inLine +BABEL_OP3_403_86321_20141208_193101_outLine +BABEL_OP3_403_86557_20141016_213938_inLine +BABEL_OP3_403_86557_20141016_213938_outLine +BABEL_OP3_403_86829_20150413_201100_inLine +BABEL_OP3_403_86829_20150413_201100_outLine +BABEL_OP3_403_87298_20141024_181414_inLine +BABEL_OP3_403_87298_20141024_181414_outLine +BABEL_OP3_403_87796_20141116_204525_inLine +BABEL_OP3_403_87796_20141116_204525_outLine +BABEL_OP3_403_87871_20141217_212127_inLine +BABEL_OP3_403_87871_20141217_212127_outLine +BABEL_OP3_403_88550_20150307_215430_inLine +BABEL_OP3_403_88550_20150307_215430_outLine +BABEL_OP3_403_88550_20150307_221516_inLine +BABEL_OP3_403_88550_20150307_221516_outLine +BABEL_OP3_403_88661_20141201_185938_inLine +BABEL_OP3_403_88661_20141201_185938_outLine +BABEL_OP3_403_88661_20141201_192152_inLine +BABEL_OP3_403_88661_20141201_192152_outLine +BABEL_OP3_403_88674_20150418_221617_inLine +BABEL_OP3_403_88674_20150418_221617_outLine +BABEL_OP3_403_89045_20141003_224541_outLine +BABEL_OP3_403_89372_20141003_233243_inLine +BABEL_OP3_403_89372_20141004_235806_inLine +BABEL_OP3_403_89560_20141217_191117_inLine +BABEL_OP3_403_89560_20141217_191117_outLine +BABEL_OP3_403_89794_20141129_193030_inLine +BABEL_OP3_403_89794_20141129_193030_outLine +BABEL_OP3_403_89877_20141120_182454_inLine +BABEL_OP3_403_89877_20141120_182454_outLine +BABEL_OP3_403_90347_20141207_221437_inLine +BABEL_OP3_403_90347_20141207_221437_outLine +BABEL_OP3_403_90935_20141026_200818_inLine +BABEL_OP3_403_91336_20141103_203505_inLine +BABEL_OP3_403_91336_20141103_203505_outLine +BABEL_OP3_403_91411_20150130_181331_inLine +BABEL_OP3_403_91411_20150130_181331_outLine +BABEL_OP3_403_91411_20150130_185140_inLine +BABEL_OP3_403_91411_20150130_185140_outLine +BABEL_OP3_403_91891_20141205_223437_inLine +BABEL_OP3_403_91891_20141205_223437_outLine +BABEL_OP3_403_91891_20141205_224513_inLine +BABEL_OP3_403_91891_20141205_224513_outLine +BABEL_OP3_403_92440_20150413_001701_inLine +BABEL_OP3_403_92440_20150413_001701_outLine +BABEL_OP3_403_92698_20141104_003927_inLine +BABEL_OP3_403_92698_20141104_003927_outLine +BABEL_OP3_403_92757_20150307_000144_inLine +BABEL_OP3_403_92757_20150307_000144_outLine +BABEL_OP3_403_92757_20150307_001520_inLine +BABEL_OP3_403_92757_20150307_001520_outLine +BABEL_OP3_403_92792_20150319_214450_inLine +BABEL_OP3_403_92792_20150319_214450_outLine +BABEL_OP3_403_93861_20141031_233412_inLine +BABEL_OP3_403_93946_20141208_202019_inLine +BABEL_OP3_403_93946_20141208_202019_outLine +BABEL_OP3_403_94002_20141120_194833_inLine +BABEL_OP3_403_94002_20141120_194833_outLine +BABEL_OP3_403_94141_20150311_224536_inLine +BABEL_OP3_403_94141_20150311_224536_outLine +BABEL_OP3_403_94666_20141106_230027_inLine +BABEL_OP3_403_94666_20141106_230027_outLine +BABEL_OP3_403_94745_20141202_235317_inLine +BABEL_OP3_403_94745_20141202_235317_outLine +BABEL_OP3_403_95294_20141202_001855_inLine +BABEL_OP3_403_95294_20141202_001855_outLine +BABEL_OP3_403_95598_20141004_012914_outLine +BABEL_OP3_403_95663_20141013_194657_inLine +BABEL_OP3_403_95663_20141013_194657_outLine +BABEL_OP3_403_95966_20141028_211011_inLine +BABEL_OP3_403_95966_20141028_211011_outLine +BABEL_OP3_403_96820_20141105_001821_inLine +BABEL_OP3_403_96820_20141105_001821_outLine +BABEL_OP3_403_97448_20150330_211249_inLine +BABEL_OP3_403_97448_20150330_211249_outLine +BABEL_OP3_403_97896_20141031_234221_inLine +BABEL_OP3_403_97896_20141031_234221_outLine +BABEL_OP3_403_97988_20141211_193604_inLine +BABEL_OP3_403_97988_20141211_193604_outLine +BABEL_OP3_403_98165_20141026_210536_inLine +BABEL_OP3_403_98165_20141026_210536_outLine +BABEL_OP3_403_98365_20141117_210300_inLine +BABEL_OP3_403_98365_20141117_210300_outLine +BABEL_OP3_403_98489_20141007_213814_inLine +BABEL_OP3_403_98489_20141007_213814_outLine +BABEL_OP3_403_99516_20141016_194316_inLine +BABEL_OP3_403_99516_20141016_194316_outLine +BABEL_OP3_403_99732_20141217_232949_inLine +BABEL_OP3_403_99732_20141217_232949_outLine diff --git a/egs/babel/s5d/conf/mfcc.conf b/egs/babel/s5d/conf/mfcc.conf new file mode 100644 index 00000000000..45280a4e3a0 --- /dev/null +++ b/egs/babel/s5d/conf/mfcc.conf @@ -0,0 +1,2 @@ +--use-energy=false # only non-default option. +--sample-frequency=8000 # Switchboard is sampled at 8kHz diff --git a/egs/babel/s5d/conf/mfcc_hires.conf b/egs/babel/s5d/conf/mfcc_hires.conf new file mode 100644 index 00000000000..d870ab04c38 --- /dev/null +++ b/egs/babel/s5d/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=8000 # Switchboard is sampled at 8kHz +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800) diff --git a/egs/babel/s5d/conf/online_cmvn.conf b/egs/babel/s5d/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/babel/s5d/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/babel/s5d/conf/pitch.conf b/egs/babel/s5d/conf/pitch.conf new file mode 100644 index 00000000000..926bcfca92a --- /dev/null +++ b/egs/babel/s5d/conf/pitch.conf @@ -0,0 +1 @@ +--sample-frequency=8000 diff --git a/egs/babel/s5d/conf/plp.conf b/egs/babel/s5d/conf/plp.conf new file mode 100644 index 00000000000..926bcfca92a --- /dev/null +++ b/egs/babel/s5d/conf/plp.conf @@ -0,0 +1 @@ +--sample-frequency=8000 diff --git a/egs/babel/s5d/conf/slurm.bluecrab.conf b/egs/babel/s5d/conf/slurm.bluecrab.conf new file mode 100644 index 00000000000..d0c5fd1f904 --- /dev/null +++ b/egs/babel/s5d/conf/slurm.bluecrab.conf @@ -0,0 +1,11 @@ +command sbatch --export=PATH --ntasks-per-node=1 --exclude=compute[0001-0014,0017,0021,0022,0038] +option time=* --time=$0 +option mem=* --mem-per-cpu=$0 +option mem=0 # Do not add anything to qsub_opts +option num_threads=* --cpus-per-task=$0 --ntasks-per-node=1 +option num_threads=1 --cpus-per-task=1 --ntasks-per-node=1 # Do not add anything to qsub_opts +option max_jobs_run=* # Do nothing +default gpu=0 +# option gpu=0 -p scavenger --qos=scavenger +option gpu=0 -p shared +option gpu=* -p gpu --gres=gpu:$0 --cpus-per-task=6 --exclude=gpu[019,026] --time=4:0:0 # in reality, we probably should have --cpus-per-task=$((6*$0)) diff --git a/egs/babel/s5d/local/ali_to_rttm.sh b/egs/babel/s5d/local/ali_to_rttm.sh new file mode 100755 index 00000000000..60d0598f007 --- /dev/null +++ b/egs/babel/s5d/local/ali_to_rttm.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# Copyright 2012-2013 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#This script will take the ali directory andcreate the corresponding rttm file +#Example +#steps/align_sgmm2.sh --nj 20 --cmd "$decode_cmd" \ +# --transform-dir exp/tri5/decode_dev2h.uem \ +# data/dev2h.uem data/lang exp/sgmm5 exp/sgmm5/align_dev2h.uem +#local/ali_to_rttm.sh data/dev2h data/lang exp/sgmm5/align_dev2h/ + +cmd=run.pl +scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +beam=10 +retry_beam=40 +boost_silence=1.0 + +if [ -f path.sh ]; then . path.sh; fi +. parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo "This script takes an ali directory and creates the corresponding RTTM file" + echo "" + echo "Usage: align_text.sh " + echo " e.g.: align_text.sh data/heldout data/lang exp/heldout_ali" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) " + + exit 1; +fi + +set -e +set -o pipefail +set -u + +data=$1 +lang=$2 +dir=$3 + +oov=`cat $lang/oov.txt` +mkdir -p $dir/log + +echo "$0: writing alignments." +wbegin=`grep "#1" $lang/phones.txt | head -1 | awk '{print $2}'` +wend=`grep "#2" $lang/phones.txt | head -1 | awk '{print $2}'` + +if [ ! -f $lang/L_align.fst ]; then + echo "$0: generating $lang/L_align.fst" + local/make_L_align.sh data/local/tmp.lang/ $lang $lang 2>&1 | tee $dir/log/L_align.log +fi + +$cmd $dir/log/align_to_words.log \ + ali-to-phones $dir/final.mdl "ark:gunzip -c $dir/ali.*.gz|" ark,t:- \| \ + phones-to-prons $lang/L_align.fst $wbegin $wend ark:- "ark,s:utils/sym2int.pl -f 2- --map-oov '$oov' $lang/words.txt <$data/text|" ark,t:- \| \ + prons-to-wordali ark:- "ark:ali-to-phones --write-lengths=true $dir/final.mdl 'ark:gunzip -c $dir/ali.*.gz|' ark,t:- |" ark,t:$dir/align.txt + +echo "$0: done writing alignments." + +echo "$0: writing rttm." +[ ! -x local/txt_to_rttm.pl ] && \ + echo "Not creating rttm because local/txt2rttm.pl does not exist or not executable." && exit 1; + +local/txt_to_rttm.pl --symtab=$lang/words.txt --segment=$data/segments $dir/align.txt $dir/rttm 2>$dir/log/rttm.log +local/txt_to_rttm.pl --symtab=$lang/words.txt $dir/align.txt $dir/rttm.per-utt 2>$dir/log/rttm.per-utt.log +echo "$0: done writing rttm." + +exit 0; diff --git a/egs/babel/s5d/local/annotated_kwlist_to_KWs.pl b/egs/babel/s5d/local/annotated_kwlist_to_KWs.pl new file mode 100755 index 00000000000..a4c80cef345 --- /dev/null +++ b/egs/babel/s5d/local/annotated_kwlist_to_KWs.pl @@ -0,0 +1,124 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < [category] + e.g.: annotated_kwlist_to_KWs.pl kwlist.annot.list keywords.list "NGram Order:2,3,4" + +This script reads an annotated kwlist xml file and writes a list of keywords, according +to the given categories. The "category" is a "key:value" pair in the annotated kwlist xml +file. For example +1. "NGram Order:2,3,4" +2. "NGram Order:2" +3. "NGram Order:-" +where "NGram Order" is the category name. The first line means print keywords that are +bigram, trigram and 4gram; The second line means print keywords only for bigram; The last +line means print all possible ngram keywords. +If no "category" is specified, the script will print out the possible categories. + +Allowed options: +EOU + +GetOptions(); + +@ARGV >= 2 || die $Usage; + +# Workout the input/output source +my $kwlist_filename = shift @ARGV; +my $kws_filename = shift @ARGV; + +my $source = "STDIN"; +if ($kwlist_filename ne "-") { + open(KWLIST, "<$kwlist_filename") || die "Fail to open kwlist file: $kwlist_filename\n"; + $source = "KWLIST"; +} + +# Process kwlist.annot.xml +my %attr; +my %attr_kws; +my $kwid=""; +my $name=""; +my $value=""; +while (<$source>) { + chomp; + if (m//) {($name) = /(.*)<\/name>/; next;} + if (m//) { + ($value) = /(.*)<\/value>/; + if (defined($attr{$name})) { + $attr{"$name"}->{"$value"} = 1; + } else { + $attr{"$name"} = {"$value", 1}; + } + if (defined($attr_kws{"${name}_$value"})) { + $attr_kws{"${name}_$value"}->{"$kwid"} = 1; + } else { + $attr_kws{"${name}_$value"} = {"$kwid", 1}; + } + } +} + +my $output = ""; +if (@ARGV == 0) { + # If no category provided, print out the possible categories + $output .= "Possible categories are:\n\n"; + foreach my $name (keys %attr) { + $output .= "$name:"; + my $count = 0; + foreach my $value (keys %{$attr{$name}}) { + if ($value eq "") {$value = "\"\"";} + if ($count == 0) { + $output .= "$value"; + $count ++; next; + } + if ($count == 6) { + $output .= ", ..."; + last; + } + $output .= ",$value"; $count ++; + } + $output .= "\n"; + } + print STDERR $output; + $output = ""; +} else { + my %keywords; + while (@ARGV > 0) { + my $category = shift @ARGV; + my @col = split(/:/, $category); + @col == 2 || die "Bad category \"$category\"\n"; + $name = $col[0]; + if ($col[1] eq "-") { + foreach my $value (keys %{$attr{$name}}) { + foreach my $kw (keys %{$attr_kws{"${name}_$value"}}) { + $keywords{$kw} = 1; + } + } + } else { + my @col1 = split(/,/, $col[1]); + foreach my $value (@col1) { + foreach my $kw (keys %{$attr_kws{"${name}_$value"}}) { + $keywords{$kw} = 1; + } + } + } + } + foreach my $kw (keys %keywords) { + $output .= "$kw\n"; + } +} + +if ($kwlist_filename ne "-") {close(KWLIST);} +if ($kws_filename eq "-") { print $output;} +else { + open(O, ">$kws_filename") || die "Fail to open file $kws_filename\n"; + print O $output; + close(O); +} diff --git a/egs/babel/s5d/local/apply_g2p.sh b/egs/babel/s5d/local/apply_g2p.sh new file mode 100755 index 00000000000..385b1f3536e --- /dev/null +++ b/egs/babel/s5d/local/apply_g2p.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +iters=5 +stage=0 +encoding='utf-8' +remove_tags=true +only_words=true +icu_transform="Any-Lower" +var_counts=3 #Generate upto N variants +var_mass=0.9 #Generate so many variants to produce 90 % of the prob mass +cmd=run.pl +nj=10 #Split the task into several parallel, to speedup things +model= +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e + +if [ $# != 3 ]; then + echo "Usage: $0 [options] " + echo "... where is a list of words whose pronunciation is to be generated" + echo " is a directory used as a target during training of G2P" + echo " is the directory where the output lexicon should be stored" + echo "e.g.: $0 oov_words exp/g2p exp/g2p/oov_lex" + echo "" + echo "main options (for others, see top of script file)" + echo " --nj # How many tasks should be spawn (to speedup things)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +wordlist=$1 +modeldir=$2 +output=$3 + + +mkdir -p $output/log + +model=$modeldir/g2p.model.final +[ ! -f ${model:-} ] && echo "File $model not found in the directory $modeldir." && exit 1 +#[ ! -x $wordlist ] && echo "File $wordlist not found!" && exit 1 + +cp $wordlist $output/wordlist.orig.txt + +if [ ! -z $icu_transform ] ; then + #we have to keep a correspondence map A -> trasnform(A) + paste \ + <(cat $output/wordlist.orig.txt | uconv -f $encoding -t $encoding -x $icu_transform) \ + $output/wordlist.orig.txt \ + > $output/transform_map.txt + cut -f 1 $output/transform_map.txt | sort -u > $output/wordlist.txt +else + cp $output/wordlist.orig.txt $output/wordlist.txt +fi + +if ! g2p=`which g2p.py` ; then + echo "The Sequitur was not found !" + echo "Go to $KALDI_ROOT/tools and execute extras/install_sequitur.sh" + exit 1 +fi + + +echo "Applying the G2P model to wordlist $wordlist" + +if [ $stage -le 0 ]; then + $cmd JOBS=1:$nj $output/log/apply.JOBS.log \ + split -n l/JOBS/$nj $output/wordlist.txt \| \ + g2p.py -V $var_mass --variants-number $var_counts --encoding $encoding \ + --model $modeldir/g2p.model.final --apply - \ + \> $output/output.JOBS +fi +cat $output/output.* > $output/output + +#Remap the words from output file back to the original casing +#Conversion of some of thems might have failed, so we have to be careful +#and use the transform_map file we generated beforehand +#Also, because the sequitur output is not readily usable as lexicon (it adds +#one more column with ordering of the pron. variants) convert it into the proper lexicon form +output_lex=$output/lexicon.lex +if [ ! -z $icu_transform ] ; then + #also, the transform is generally N -> 1, i.e. we have to take + #extra care of words that might have been mapped into the same one + perl -e 'open(WORDS, $ARGV[0]) or die "Could not open file $ARGV[0]"; + while() { chomp; @F=split; + if ($MAP{$F[0]} ) { push @{$MAP{$F[0]}}, $F[1]; } + else { $MAP{$F[0]} = [$F[1]]; } + } + close(WORDS); + open(LEX, $ARGV[1]) or die "Could not open file $ARGV[1]"; + while() {chomp; @F=split /\t/; + if ( $#F != 3 ) { + print STDERR "WARNING: Non-acceptable entry \"" . join(" ", @F) . "\" ($#F splits)\n"; + next; + } + foreach $word (@{$MAP{$F[0]}} ) { + print "$word\t$F[2]\t$F[3]\n"; + } + } + close(LEX); + ' \ + $output/transform_map.txt $output/output | sort -u > $output_lex +else + #Just convert it to a proper lexicon format + cut -f 1,3,4 $output/output $output_lex +fi + +#Some words might have been removed or skipped during the process, +#let's check it and warn the user if so... +nlex=`cut -f 1 $output_lex | sort -u | wc -l` +nwlist=`cut -f 1 $output/wordlist.orig.txt | sort -u | wc -l` +if [ $nlex -ne $nwlist ] ; then + echo "WARNING: Unable to generate pronunciation for all words. "; + echo "WARINNG: Wordlist: $nwlist words" + echo "WARNING: Lexicon : $nlex words" + echo "WARNING:Diff example: " + diff <(cut -f 1 $output_lex | sort -u ) \ + <(cut -f 1 $output/wordlist.orig.txt | sort -u ) || true +fi +exit 0 diff --git a/egs/babel/s5d/local/apply_map_tab_preserving.pl b/egs/babel/s5d/local/apply_map_tab_preserving.pl new file mode 100755 index 00000000000..b57262f1930 --- /dev/null +++ b/egs/babel/s5d/local/apply_map_tab_preserving.pl @@ -0,0 +1,94 @@ +#!/usr/bin/env perl +use warnings; #sed replacement for -w perl parameter + +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey) +# Apache 2.0. + + +# This program is a bit like ./sym2int.pl in that it applies a map +# to things in a file, but it's a bit more general in that it doesn't +# assume the things being mapped to are single tokens, they could +# be sequences of tokens. See the usage message. +# this version preserves tabs. + +if (@ARGV > 0 && $ARGV[0] eq "-f") { + shift @ARGV; + $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } +} + +# Mapping is obligatory +$permissive = 0; +if (@ARGV > 0 && $ARGV[0] eq '--permissive') { + shift @ARGV; + # Mapping is optional (missing key is printed to output) + $permissive = 1; +} + +if(@ARGV != 1) { + print STDERR "Usage: apply_map_tab_preserving.pl [options] map output\n" . + "options: [-f ]\n" . + "Applies the map 'map' to all input text, where each line of the map\n" . + "is interpreted as a map from the first field to the list of the other fields\n" . + "Note: can look like 4-5, or 4-, or 5-, or 1, it means the field\n" . + "range in the input to apply the map to.\n" . + "e.g.: echo A B | apply_map.pl a.txt\n" . + "where a.txt is:\n" . + "A a1 a2\n" . + "B b\n" . + "will produce:\n" . + "a1 a2 b\n"; + exit(1); +} + +($map) = @ARGV; +open(M, "<$map") || die "Error opening map file $map: $!"; + +while () { + @A = split(" ", $_); + @A >= 1 || die "apply_map.pl: empty line."; + $i = shift @A; + $o = join(" ", @A); + $map{$i} = $o; +} + +while() { + @A = split("\t", $_); + $field_offset = 0; + for ($n = 0; $n < @A; $n++) { + @B = split(" ", $A[$n]); + + for ($x = 0; $x < @B; $x++) { + $y = $x + $field_offset; + if ( (!defined $field_begin || $y >= $field_begin) + && (!defined $field_end || $y <= $field_end)) { + $b = $B[$x]; + if (!defined $map{$b}) { + if (!$permissive) { + die "apply_map.pl: undefined key $a\n"; + } else { + print STDERR "apply_map.pl: warning! missing key $a\n"; + } + } else { + $B[$x] = $map{$b}; + } + } + } + $field_offset += @B; + $A[$n] = join(" ", @B); + } + print join("\t", @A) . "\n"; +} diff --git a/egs/babel/s5d/local/arpa2G.sh b/egs/babel/s5d/local/arpa2G.sh new file mode 100755 index 00000000000..40c269fbb22 --- /dev/null +++ b/egs/babel/s5d/local/arpa2G.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# Copyright 2013-2014 Johns Hopkins University (authors: Yenda Trmal, Daniel Povey) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#Simple utility script to convert the gzipped ARPA lm into a G.fst file + + +oov_prob_file= +unk_fraction= +cleanup=true +#end configuration section. + + + +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo "Options: --oov-prob-file # e.g. data/local/oov2prob" + echo " # with this option it will replace with OOVs in G.fst." + exit 1; +fi + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code + +lmfile=$1 +langdir=$2 +destdir=$3 + +mkdir $destdir 2>/dev/null || true + + +if [ ! -z "$oov_prob_file" ]; then + if [ ! -s "$oov_prob_file" ]; then + echo "$0: oov-prob file $oov_prob_file does not exist" + exit 1; + fi + if [ -z "$unk_fraction" ]; then + echo "--oov-prob option requires --unk-fraction option"; + exit 1; + fi + + min_prob=$(gunzip -c $lmfile | perl -e ' $minlogprob = 0.0; + while() { if (m/\\(\d)-grams:/) { $order = $1; } + if ($order == 1) { @A = split; + if ($A[0] < $minlogprob && $A[0] != -99) { $minlogprob = $A[0]; }}} print $minlogprob') + echo "Minimum prob in LM file is $min_prob" + + echo "$0: creating LM file with unk words, using $oov_prob_file, in $destdir/lm_tmp.gz" + gunzip -c $lmfile | \ + perl -e ' ($oov_prob_file,$min_prob,$unk_fraction) = @ARGV; $ceilinged=0; + $min_prob < 0.0 || die "Bad min_prob"; # this is a log-prob + $unk_fraction > 0.0 || die "Bad unk_fraction"; # this is a prob + open(F, "<$oov_prob_file") || die "opening oov file"; + while () { push @OOVS, $_; } + $num_oovs = @F; + while() { + if (m/^ngram 1=(\d+)/) { $n = $1 + $num_oovs; print "ngram 1=$n\n"; } + else { print; } # print all lines unchanged except the one that says ngram 1=X. + if (m/^\\1-grams:$/) { + foreach $l (@OOVS) { + @A = split(" ", $l); + @A == 2 || die "bad line in oov2prob: $_;"; + ($word, $prob) = @A; + $log10prob = (log($prob * $unk_fraction) / log(10.0)); + if ($log10prob > $min_prob) { $log10prob = $min_prob; $ceilinged++;} + print "$log10prob $word\n"; + } + }} print STDERR "Ceilinged $ceilinged unk-probs\n";' \ + $oov_prob_file $min_prob $unk_fraction | gzip -c > $destdir/lm_tmp.gz + lmfile=$destdir/lm_tmp.gz +fi + +if [[ $lmfile == *.bz2 ]] ; then + decompress="bunzip2 -c $lmfile" +elif [[ $lmfile == *.gz ]] ; then + decompress="gunzip -c $lmfile" +else + decompress="cat $lmfile" +fi + +$decompress | \ + grep -v ' ' | grep -v ' ' | grep -v ' ' | \ + arpa2fst - | \ + fstprint | \ + utils/eps2disambig.pl | \ + utils/s2eps.pl | \ + fstcompile --isymbols=$langdir/words.txt \ + --osymbols=$langdir/words.txt --keep_isymbols=false --keep_osymbols=false | \ + fstrmepsilon | fstarcsort --sort_type=ilabel > $destdir/G.fst || exit 1 +fstisstochastic $destdir/G.fst || true; + +if $cleanup; then + rm $destdir/lm_tmp.gz 2>/dev/null || true; +fi + +exit 0 diff --git a/egs/babel/s5d/local/augment_original_stm.pl b/egs/babel/s5d/local/augment_original_stm.pl new file mode 100755 index 00000000000..c5ad87fd286 --- /dev/null +++ b/egs/babel/s5d/local/augment_original_stm.pl @@ -0,0 +1,110 @@ +#!/usr/bin/env perl +use warnings; #sed replacement for -w perl parameter +# Copyright 2012 Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0. + +#This script takes the original BABEL STM file (part of the IndusDB) +#and replaces the "Aggregated" field with a correct speaker ID. +#As a result, the scoring will be done on per-speaker basis as well +#As the segment from segment mapping generally do not correspond to +#the segmentation of the original STM file, it combines the files +#segments and utt2spk to work out the correct speaker ID for +#the reference segment +#In case of overlay, it will either use the previous speaker or +#prints out an error message + +use strict; +use warnings; + +use Data::Dumper; + +@ARGV == 2 || die "$0 \n"; + +my $warn_count = 0; +my $warn_max = 10; +my $stm_file = shift @ARGV; +my $data_dir = shift @ARGV; +my %utt2spk; +my %segments; + +open(F_u, "<$data_dir/utt2spk") || die "Could not open the file $data_dir/utt2spk\n"; +while() { + chop; + (my $utt, my $spk) = split; + $utt2spk{$utt} = $spk; +} +close(F_u); + +open(F_s, "<$data_dir/segments") || die "Could not open the file $data_dir/segments\n"; +while() { + chop; + (my $utt, my $file, my $seg_start, my $seg_end) = split; + push @{$segments{$file}}, [ $seg_start, $seg_end, $utt2spk{$utt}]; +} +close(F_s); + +open(STM, "<$stm_file") || die "Could not opent the STM file $stm_file"; +open(STMOUT, ">$data_dir/stm") || die "Could not open the output STM file $data_dir/stm"; +open(RECO, ">$data_dir/reco2file_and_channel") or die "Could not create the output file $data_dir/reco2file_and_channel"; + +my $prev_filename = ""; +my @timestamps; +my $i = 0; +while() { + chop; + (my $filename, my $line, my $aggregated, my $seg_start, my $seg_end, my $text) = split(/\s+/, $_, 6); + #print "$filename, $seg_start, $seg_end, $text\n"; + $line="1"; + if (( $prev_filename ne $filename ) && ( ";;$prev_filename" ne $filename)){ + my $_filename = $filename; + $_filename =~ s/^;;//g; + next if not exists $segments{$_filename}; + #print $filename, "\n"; + $prev_filename = $_filename; + @timestamps = @{$segments{$_filename}}; + #print Dumper(\@timestamps); + $i=0; + print RECO "$_filename $_filename $line\n"; + } + + my $max_i=@timestamps; + while ( ($i < $max_i ) && ($seg_start > @{$timestamps[$i]}[0] ) ) { + $i+= 1; + } + + if (($i >= $max_i ) && ($timestamps[$i-1][1]) <= $seg_start ){ + #We are over the start of the last segment -> we assing the last speaker ID + if ($warn_count < $warn_max) { + print STDERR "Warning: $prev_filename: the segment from the STM file starts after the last segment from the segments file ends\n"; + print STDERR "Warning: Additional info: STM: ($seg_start, $seg_end), segments file: ($timestamps[$i-1][0] $timestamps[$i-1][1])\n"; + $warn_count += 1; + + if ($warn_count >= $warn_max) { + print STDERR "Warning: Maximum number of warning reached, not warning anymore...\n" + } + } + #print "$i, $filename, $timestamps[$max_i - 1][2]\n"; + print STMOUT "$filename $line $timestamps[$max_i - 1][2] $seg_start $seg_end $text\n"; + } elsif ( $i == 0 ) { + if ($warn_count < $warn_max) { + print STDERR "Warning: $prev_filename: The segment from the STM file start before the first segment from the segments file\n"; + print STDERR "Warning: Additional info: STM: ($seg_start, $seg_end), segments file: ($timestamps[$i][0] $timestamps[$i][1])\n"; + $warn_count += 1; + + if ($warn_count >= $warn_max) { + print STDERR "Warning: Maximum number of warning reached, not warning anymore...\n" + } + } + #Even the first segment's start time was higher then the stm segment start time + #That means we do not really know which speaker the stm segment belongs + print STMOUT "$filename $line $timestamps[$i][2] $seg_start $seg_end $text\n"; + #print "$i, $filename, $timestamps[$i][2]\n"; + } else { + print STMOUT "$filename $line $timestamps[$i-1][2] $seg_start $seg_end $text\n"; + #print "$i, $filename, $timestamps[$i-1][2]\n"; + } +} + +close(STMOUT); +close(STM); +close(RECO); diff --git a/egs/babel/s5d/local/best_path_weights.sh b/egs/babel/s5d/local/best_path_weights.sh new file mode 100755 index 00000000000..52782ee3655 --- /dev/null +++ b/egs/babel/s5d/local/best_path_weights.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# This script combines frame-level posteriors from different decode +# directories. The first decode directory is assumed to be the primary +# and is used to get the best path. The posteriors from other decode +# directories are interpolated with the posteriors of the best path. +# The output is a new directory with final.mdl, tree from the primary +# decode-dir and the best path alignments and weights in a decode-directory +# with the same basename as the primary directory. +# This is typically used to get better posteriors for semisupervised training +# of DNN +# e.g. local/combine_posteriors.sh exp/tri6_nnet/decode_train_unt.seg +# exp/sgmm_mmi_b0.1/decode_fmllr_train_unt.seg_it4 exp/combine_dnn_sgmm +# Here the final.mdl and tree are copied from exp/tri6_nnet to +# exp/combine_dnn_sgmm. best_path_ali.*.gz obtained from the primary dir and +# the interpolated posteriors in weights.*.gz are placed in +# exp/combine_dnn_sgmm/decode_train_unt.seg + +set -e + +# begin configuration section. +cmd=run.pl +stage=-10 +#end configuration section. + +help_message="Usage: "$(basename $0)" [options] [:weight] [:weight] [[:weight] ... ] + E.g. "$(basename $0)" data/train_unt.seg data/lang exp/tri1/decode:0.5 exp/tri2/decode:0.25 exp/tri3/decode:0.25 exp/combine +Options: + --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. +"; + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + + +if [ $# -lt 4 ]; then + printf "$help_message\n"; + exit 1; +fi + +data=$1 +lang=$2 +dir=${@: -1} # last argument to the script +shift 2; +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +mkdir -p $dir +mkdir -p $dir/log + +decode_dir=`echo ${decode_dirs[0]} | cut -d: -f1` +nj=`cat $decode_dir/num_jobs` + +out_decode=$dir/`basename $decode_dir` +mkdir -p $out_decode + +if [ $stage -lt -1 ]; then + mkdir -p $out_decode/log + $cmd JOB=1:$nj $out_decode/log/best_path.JOB.log \ + lattice-best-path --acoustic-scale=0.1 \ + "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz |" \ + ark:/dev/null "ark:| gzip -c > $out_decode/best_path_ali.JOB.gz" || exit 1 +fi + +weights_sum=0.0 + +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + + weight=`echo $decode_dir | cut -d: -s -f2` + [ -z "$weight" ] && weight=1.0 + + if [ $i -eq 0 ]; then + file_list="\"ark,s,cs:gunzip -c $out_decode/weights.$i.JOB.gz | vector-scale --scale=$weight ark:- ark:- |\"" + else + file_list="$file_list \"ark,s,cs:gunzip -c $out_decode/weights.$i.JOB.gz | vector-scale --scale=$weight ark:- ark:- |\"" + fi + + weights_sum=`perl -e "print STDOUT $weights_sum + $weight"` +done + +inv_weights_sum=`perl -e "print STDOUT 1.0/$weights_sum"` + +for i in `seq 0 $[num_sys-1]`; do + if [ $stage -lt $i ]; then + decode_dir=`echo ${decode_dirs[$i]} | cut -d: -f1` + + model=`dirname $decode_dir`/final.mdl # model one level up from decode dir + tree=`dirname $decode_dir`/tree # tree one level up from decode dir + + for f in $model $decode_dir/lat.1.gz $tree; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; + done + if [ $i -eq 0 ]; then + nj=`cat $decode_dir/num_jobs` || exit 1; + cp $model $dir || exit 1 + cp $tree $dir || exit 1 + echo $nj > $out_decode/num_jobs + else + if [ $nj != `cat $decode_dir/num_jobs` ]; then + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + exit 1; + fi + fi + + $cmd JOB=1:$nj $dir/log/get_post.$i.JOB.log \ + lattice-to-post --acoustic-scale=0.1 \ + "ark,s,cs:gunzip -c $decode_dir/lat.JOB.gz|" ark:- \| \ + post-to-pdf-post $model ark,s,cs:- ark:- \| \ + get-post-on-ali ark,s,cs:- "ark,s,cs:gunzip -c $out_decode/best_path_ali.JOB.gz | convert-ali $dir/final.mdl $model $tree ark,s,cs:- ark:- | ali-to-pdf $model ark,s,cs:- ark:- |" "ark:| gzip -c > $out_decode/weights.$i.JOB.gz" || exit 1 + fi +done + +if [ $stage -lt $num_sys ]; then + if [ "$num_sys" -eq 1 ]; then + $cmd JOB=1:$nj $dir/log/move_post.JOB.log \ + mv $out_decode/weights.0.JOB.gz $out_decode/weights.JOB.gz || exit 1 + else + $cmd JOB=1:$nj $dir/log/interpolate_post.JOB.log \ + vector-sum $file_list \ + "ark:| vector-scale --scale=$inv_weights_sum ark:- ark:- | gzip -c > $out_decode/weights.JOB.gz" || exit 1 + fi +fi + +exit 0 diff --git a/egs/babel/s5d/local/best_scores.sh b/egs/babel/s5d/local/best_scores.sh new file mode 100755 index 00000000000..a3b2af187e1 --- /dev/null +++ b/egs/babel/s5d/local/best_scores.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -o nounset # Treat unset variables as an error + + +if [ ! -x results ] ; then + data=$(readlink -f ./local) + data=$(dirname $data) + mkdir -p $data/results + ln -s $data/results results +fi + +if [ ! -e ./RESULTS ] ; then + p=$(basename `readlink -f lang.conf`) + p=${p##.*} + filename=results.${p}.${USER}.$(date --iso-8601=seconds) + echo "#Created on $(date --iso-8601=seconds) by $0" >> results/$filename + ln -sf results/$filename RESULTS +fi + + +set -f +export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) +set +f +( + echo -e "#\n# STT Task performance (WER), evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + for f in "${mydirs[@]}"; do + find $f -name "*.sys" -not -name "*char*" | xargs grep Avg | utils/best_wer.sh + done | column -t +) >> RESULTS + +( + ls exp/tri5/decode*dev10h*/score_*/*char*sys >/dev/null 2>&1 || exit 0 + echo -e "#\n# STT Task performance (CER), evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + for f in "${mydirs[@]}"; do + find $f -name "*.sys" -name "*char*" | xargs grep Avg | utils/best_wer.sh + done | column -t +) >> RESULTS + diff --git a/egs/babel/s5d/local/best_scores_kws.sh b/egs/babel/s5d/local/best_scores_kws.sh new file mode 100755 index 00000000000..dcf4508d5e1 --- /dev/null +++ b/egs/babel/s5d/local/best_scores_kws.sh @@ -0,0 +1,179 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -o nounset # Treat unset variables as an error + + +if [ ! -x results ] ; then + data=$(readlink -f ./local) + data=$(dirname $data) + mkdir -p $data/results + ln -s $data/results results +fi + +if [ ! -e ./RESULTS.kws ] ; then + p=$(basename `readlink -f lang.conf`) + p=${p##.*} + filename=kws_results.${p}.${USER}.$(date --iso-8601=seconds) + echo "#Created on $(date --iso-8601=seconds) by $0" >> results/$filename + ln -sf results/$filename RESULTS.kws +fi + + +set -f +export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) +set +f +export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) +( + #### Word search (converted lattices) + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + done + + #### Syllab search (converted word lattices) + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -ipath "*syllabs*" | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), syllabic search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -ipath "*syllabs*" | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + done + + + #### Phone search (converted word lattices) + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -ipath "*phones*" | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), phonetic search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -ipath "*phones*" | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + + done + + + set -f + export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.syll.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) + set +f + if [ ! -z ${mydirs+x} ] ; then + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + #declare -p kwsets + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), syllabic decode+search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + + done + fi + + set -f + export mydirs=( `find exp/ exp_bnf/ exp_psx/ -name "decode*dev10h.phn.pem*" -type d | sed 's/it[0-9]/*/g;s/epoch[0-9]/*/g' | sort -u` ) + set +f + if [ ! -z ${mydirs+x} ] ; then + export kwsets=( `find ${mydirs[@]} -type d -name "kwset*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | sed 's:.*kwset_::g' | sed 's/_[0-9][0-9]*$//g' | sort -u ` ) + #declare -p kwsets + for kwset in "${kwsets[@]}"; do + echo -e "#\n# KWS Task performance (TWV), phonetic decode+search for the set ["$kwset"] evaluated on $(date --iso-8601=seconds) by user `whoami` on `hostname -f`" + ( + for f in "${mydirs[@]}"; do + find $f -name "metrics.txt" -ipath "*kwset*" -ipath "*_${kwset}_*" -not \( -ipath "*syllabs*" -or -path "*phones*" \) | xargs grep ATWV | sort -k3,3g | tail -n 1 + done | \ + while IFS='' read -r line || [[ -n "$line" ]]; do + file=$(echo $line | sed 's/:.*//g' ) + cat $file | sed 's/ *, */\n/g' | sed 's/ //g' | grep -E 'TWV|THR' | paste -s | paste - <(echo $file) + done + ) | column -t | sort -k3,3g | \ + ( + while IFS='' read -r line || [[ -n "$line" ]]; do + echo $line + f=$(echo $line | rev | awk '{print $1}'| rev) + d=$(dirname $f) + echo -ne "\tOOV=0\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=0" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + echo -ne "\tOOV=1\t" + local/subset_atwv.pl <(cat data/dev10h.pem/kwset_${kwset}/categories | local/search/filter_by_category.pl data/dev10h.pem/kwset_${kwset}/categories "OOV=1" | cut -f 1 -d ' ' | sort ) $d/bsum.txt + + done + ) + + done + fi +) | tee RESULTS.kws diff --git a/egs/babel/s5d/local/build_edit_distance_fst.pl b/egs/babel/s5d/local/build_edit_distance_fst.pl new file mode 100755 index 00000000000..51c46667727 --- /dev/null +++ b/egs/babel/s5d/local/build_edit_distance_fst.pl @@ -0,0 +1,127 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + Buld a edit distance FST at the phone level. + +Allowed options: + --confusion-matrix : Matrix for insertion, deletion and substitution. (string, default="") + --ins-cost : Insertion cost (double, default=1 ) + --del-cost : Deletion cost (double, default=1 ) + --subs-cost : substitution cost (double, default=1 ) + --boundary-ins-cost : Cost for insertions at work boundary (double, default=0.1) + --boundary-off : No insertions at word boundary (boolean, default=true) +EOU + +my $confusion_matrix = ""; +my $insertion_cost = 1; +my $deletion_cost = 1; +my $substitution_cost = 1; +my $boundary_ins_cost = 0.1; +my $boundary_off="true"; +GetOptions('confusion-matrix=s' => \$confusion_matrix, + 'ins-cost=f' => \$insertion_cost, + 'del-cost=f' => \$deletion_cost, + 'subs-cost=f' => \$substitution_cost, + 'boundary-ins-cost=f' => \$boundary_ins_cost, + 'boundary-off=s' => \$boundary_off); + +@ARGV == 2 || die $Usage; + +$boundary_off eq "true" || $boundary_off eq "false" || die "$0: Bad value for option --boundary-off\n"; + +# Workout the input and output parameters +my $phone_in = shift @ARGV; +my $fst_out = shift @ARGV; + +open(I, "<$phone_in") || die "$0: Fail to open lexicon $phone_in\n"; +open(O, ">$fst_out") || die "$0: Fail to write FST $fst_out\n"; + +# Read confusion matrix +my %confusion; +if ($confusion_matrix ne "") { + open(M, "<$confusion_matrix") || die "$0: Fail to open confusion matrix $confusion_matrix\n"; + while () { + chomp; + my @col = split(); + @col == 3 || die "$0: Bad line in confusion matrix \"$_\"\n"; + $confusion{"$col[0]_$col[1]"} = $col[2]; + } + close(M); +} + +# Start processing +my @phones; +while () { + chomp; + my @col = split(); + @col == 1 || die "$0: Bad number of columns in phone list \"$_\"\n"; + if ($col[0] eq "") {next;} + push(@phones, $col[0]); +} + +# Add insertions, deletions +my $fst = ""; +foreach my $p (@phones) { + if ($confusion_matrix eq "") { + $fst .= "1 1 $p $deletion_cost\n"; # Deletions + $fst .= "1 1 $p $insertion_cost\n"; # Insertions + if ($boundary_off eq "false") { + $fst .= "0 0 $p $boundary_ins_cost\n"; + $fst .= "0 1 $p $boundary_ins_cost\n"; + $fst .= "2 2 $p $boundary_ins_cost\n"; + $fst .= "1 2 $p $boundary_ins_cost\n"; + } + } else { + my $key = "${p}_"; + if (defined($confusion{$key})) { + $fst .= "1 1 $p $confusion{$key}\n"; + } + $key = "_${p}"; + if (defined($confusion{$key})) { + $fst .= "1 1 $p $confusion{$key}\n"; + if ($boundary_off eq "false") { + $fst .= "0 0 $p $confusion{$key}\n"; + $fst .= "0 1 $p $confusion{$key}\n"; + $fst .= "2 2 $p $confusion{$key}\n"; + $fst .= "1 2 $p $confusion{$key}\n"; + } + } + } +} +foreach my $p1 (@phones) { + foreach my $p2 (@phones) { + if ($p1 eq $p2) { + $fst .= "1 1 $p1 $p2 0\n"; + } else { + if ($confusion_matrix eq "") { + $fst .= "1 1 $p1 $p2 $substitution_cost\n"; + } else { + my $key = "${p1}_${p2}"; + if (defined($confusion{$key})) { + $fst .= "1 1 $p1 $p2 $confusion{$key}\n"; + } + } + } + } +} +if ($boundary_off eq "false") { + $fst .= "0 1 0\n"; + $fst .= "1 2 0\n"; + $fst .= "2\n"; +} else { + $fst .= "1\n"; +} + +print O $fst; + +close(I); +close(O); diff --git a/egs/babel/s5d/local/chain/run_blstm.sh b/egs/babel/s5d/local/chain/run_blstm.sh new file mode 100755 index 00000000000..6d13c55fc7d --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 46.8 | 19252 60586 | 57.6 28.5 13.8 4.5 46.8 31.7 | -0.643 | exp/chain_cleaned/blstm_sp_bi/decode_dev10h.pem/score_8/penalty_0.25/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=-2 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab1.sh b/egs/babel/s5d/local/chain/run_blstm_bab1.sh new file mode 100755 index 00000000000..ba8da0e14bc --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab1.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 45.5 | 19252 60586 | 58.9 27.5 13.5 4.5 45.5 31.4 | -0.660 | exp/chain_cleaned/blstmbab1_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab1 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab2.sh b/egs/babel/s5d/local/chain/run_blstm_bab2.sh new file mode 100755 index 00000000000..f5d698e262c --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab2.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 46.7 | 19252 60586 | 57.1 26.1 16.8 3.8 46.7 31.9 | -0.692 | exp/chain_cleaned/blstmbab2_sp_bi/decode_dev10h.pem/score_10/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab2 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 6 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab3.sh b/egs/babel/s5d/local/chain/run_blstm_bab3.sh new file mode 100755 index 00000000000..7ad51204c6f --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab3.sh @@ -0,0 +1,180 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 45.9 | 19252 60586 | 58.7 28.0 13.3 4.6 45.9 31.6 | -0.668 | exp/chain_cleaned/blstmbab3_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab3 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 256 \ + --non-recurrent-projection-dim 256 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab4.sh b/egs/babel/s5d/local/chain/run_blstm_bab4.sh new file mode 100755 index 00000000000..72aaeb8778f --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab4.sh @@ -0,0 +1,179 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab4 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 128 \ + --non-recurrent-projection-dim 128 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_blstm_bab5.sh b/egs/babel/s5d/local/chain/run_blstm_bab5.sh new file mode 100755 index 00000000000..1bae225022e --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_bab5.sh @@ -0,0 +1,179 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab5 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + steps/nnet3/lstm/make_configs.py \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --splice-indexes="-2,-1,0,1,2 0 0" \ + --lstm-delay=" [-3,3] [-3,3] [-3,3] " \ + --xent-regularize 0.1 \ + --include-log-softmax false \ + --num-lstm-layers 3 \ + --cell-dim 512 \ + --hidden-dim 512 \ + --recurrent-projection-dim 256 \ + --non-recurrent-projection-dim 256 \ + --label-delay 0 \ + --self-repair-scale-nonlinearity 0.00001 \ + --self-repair-scale-clipgradient 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_ivector_common.sh b/egs/babel/s5d/local/chain/run_ivector_common.sh new file mode 100755 index 00000000000..7354d59465b --- /dev/null +++ b/egs/babel/s5d/local/chain/run_ivector_common.sh @@ -0,0 +1,240 @@ +#!/bin/bash + +set -e -o pipefail + + +# This script is called from local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh (and may eventually +# be called by more scripts). It contains the common feature preparation and iVector-related parts +# of the script. See those scripts for examples of usage. + + +stage=0 +nj=30 +min_seg_len=1.55 # min length in seconds... we do this because chain training + # will discard segments shorter than 1.5 seconds. Must remain in sync + # with the same option given to prepare_lores_feats_and_alignments.sh +train_set=train_cleaned # you might set this to e.g. train. +gmm=tri5_cleaned # This specifies a GMM-dir from the features of the type you're training the system on; + # it should contain alignments for 'train_set'. +langdir=data/langp/tri5_ali + +num_threads_ubm=12 +nnet3_affix=_cleaned # affix for exp/nnet3 directory to put iVector stuff in, so it + # becomes exp/nnet3_cleaned or whatever. +add_pitch=false + +. ./cmd.sh +. ./path.sh + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp_comb + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + + + +if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then + echo "$0: data/${train_set}_sp_hires/feats.scp already exists." + echo " ... Please either remove it, or rerun this script with stage > 2." + exit 1 +fi + + +if [ $stage -le 1 ]; then + echo "$0: preparing directory for speed-perturbed data" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: creating high-resolution MFCC features" + + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + utils/copy_data_dir.sh data/${train_set}_sp data/${train_set}_sp_hires + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires + + for datadir in ${train_set}_sp ; do + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires + steps/compute_cmvn_stats.sh data/${datadir}_hires + utils/fix_data_dir.sh data/${datadir}_hires + done +fi + +if [ $stage -le 3 ]; then + echo "$0: combining short segments of speed-perturbed high-resolution MFCC training data" + # we have to combine short segments or we won't be able to train chain models + # on those segments. + utils/data/combine_short_segments.sh \ + data/${train_set}_sp_hires $min_seg_len data/${train_set}_sp_hires_comb + + # just copy over the CMVN to avoid having to recompute it. + cp data/${train_set}_sp_hires/cmvn.scp data/${train_set}_sp_hires_comb/ + utils/fix_data_dir.sh data/${train_set}_sp_hires_comb/ +fi + +if [ $stage -le 4 ]; then + echo "$0: selecting segments of hires training data that were also present in the" + echo " ... original training data." + + # note, these data-dirs are temporary; we put them in a sub-directory + # of the place where we'll make the alignments. + temp_data_root=exp/nnet3${nnet3_affix}/tri5 + mkdir -p $temp_data_root + + utils/data/subset_data_dir.sh --utt-list data/${train_set}/feats.scp \ + data/${train_set}_sp_hires $temp_data_root/${train_set}_hires + + # note: essentially all the original segments should be in the hires data. + n1=$(wc -l 4 option." + exit 1; + fi + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 7 --mllt-iters "2 4 6" \ + --splice-opts "--left-context=3 --right-context=3" \ + --boost-silence $boost_sil \ + $numLeavesMLLT $numGaussMLLT $temp_data_root/${train_set}_hires $langdir \ + $gmm_dir exp/nnet3${nnet3_affix}/tri5 +fi + + +if [ $stage -le 5 ]; then + echo "$0: computing a subset of data to train the diagonal UBM." + + mkdir -p exp/nnet3${nnet3_affix}/diag_ubm + temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm + + # train a diagonal UBM using a subset of about a quarter of the data + # we don't use the _comb data for this as there is no need for compatibility with + # the alignments, and using the non-combined data is more efficient for I/O + # (no messing about with piped commands). + num_utts_total=$(wc -l 11 option." + exit 1 + fi + echo "$0: aligning with the perturbed, short-segment-combined low-resolution data" + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + data/${train_set}_sp_comb $langdir $gmm_dir $ali_dir +fi + + +exit 0; diff --git a/egs/babel/s5d/local/chain/run_tdnn.sh b/egs/babel/s5d/local/chain/run_tdnn.sh new file mode 100755 index 00000000000..3ce53fa9292 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 47.0 | 19252 60586 | 58.0 28.0 14.0 5.0 47.0 31.6 | -0.540 | exp/chain_cleaned/tdnn_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab1.sh b/egs/babel/s5d/local/chain/run_tdnn_bab1.sh new file mode 100755 index 00000000000..db82c0f358a --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab1.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 48.2 | 19252 60586 | 56.9 28.9 14.2 5.1 48.2 32.1 | -0.662 | exp/chain_cleaned/tdnnbab1_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab1 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab2.sh b/egs/babel/s5d/local/chain/run_tdnn_bab2.sh new file mode 100755 index 00000000000..51387901683 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab2.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 47.7 | 19252 60586 | 56.5 27.2 16.3 4.3 47.7 31.8 | -0.468 | exp/chain_cleaned/tdnnbab2_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab2 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab3.sh b/egs/babel/s5d/local/chain/run_tdnn_bab3.sh new file mode 100755 index 00000000000..098c3de0482 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab3.sh @@ -0,0 +1,178 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# %WER 46.7 | 19252 60586 | 57.4 26.4 16.2 4.0 46.7 31.6 | -0.469 | exp/chain_cleaned/tdnnbab3_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab3 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab4.sh b/egs/babel/s5d/local/chain/run_tdnn_bab4.sh new file mode 100755 index 00000000000..5831cfc28f0 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_bab4.sh @@ -0,0 +1,177 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_tdnn.sh +# %WER 47.3 | 19252 60586 | 57.5 29.1 13.4 4.8 47.3 31.7 | -0.595 | exp/chain_cleaned/tdnnbab4_sp_bi/decode_dev10h.pem/score_8/penalty_0.25/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=bab4 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 400 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.25 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/check_models.sh b/egs/babel/s5d/local/check_models.sh new file mode 100755 index 00000000000..88b3dacc94b --- /dev/null +++ b/egs/babel/s5d/local/check_models.sh @@ -0,0 +1,34 @@ +#!/bin/bash + + +check_model () { + model=$1 + if [ -s $model ]; then echo $model + else + dir=`dirname $model` + latest_model=`ls -lt $dir/{?,??}.mdl 2>/dev/null | head -1 | awk '{print $9}'` + echo "*$model is not there, latest is: $latest_model" + fi +} + +for model in exp/mono/final.mdl exp/tri{1,2,3}/final.mdl; do + check_model $model +done + +if [ ! -f exp/tri4/final.mdl ]; then + echo "*exp/tri4/final.mdl is not there*" + exit 1 +fi + +if [ -f exp/tri4/trans.1 ]; then # This is LimitedLP. + models="exp/tri4/final.alimdl exp/sgmm5/final.alimdl exp/sgmm5_mmi_b0.1/final.mdl exp/tri5_nnet/final.mdl" +else + models="exp/tri4/final.mdl exp/tri5/final.alimdl exp/sgmm5/final.alimdl exp/sgmm5_mmi_b0.1/final.mdl exp/tri6_nnet/final.mdl" +fi +models="$models exp_BNF/tri5/final.mdl exp_BNF/tri6/final.alimdl exp_BNF/sgmm7/final.alimdl" + +for model in $models; do + check_model $model +done + + diff --git a/egs/babel/s5d/local/check_tools.sh b/egs/babel/s5d/local/check_tools.sh new file mode 100755 index 00000000000..ca8800def41 --- /dev/null +++ b/egs/babel/s5d/local/check_tools.sh @@ -0,0 +1,40 @@ +#!/bin/bash -u + +# Copyright 2015 (c) Johns Hopkins University (Jan Trmal ) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +[ -f ./path.sh ] && . ./path.sh + +sph2pipe=`command -v sph2pipe 2>/dev/null` \ + || { echo >&2 "sph2pipe not found on PATH. Did you run make in the $KALDI_ROOT/tools directory?"; return 1; } + +srilm=`command -v ngram 2>/dev/null` \ + || { echo >&2 "srilm not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_srilm.sh"; return 1; } + +sox=`command -v sox 2>/dev/null` \ + || { echo >&2 "sox not found on PATH. Please install it manually (you will need version 14.4.0 and higher)."; return 1; } + +# If sox is found on path, check if the version is correct +if [ ! -z "$sox" ]; then + sox_version=`$sox --version 2>&1| head -1 | sed -e 's?.*: ??' -e 's?.* ??'` + if [[ ! $sox_version =~ v14.4.* ]]; then + echo "Unsupported sox version $sox_version found on path. You will need version v14.4.0 and higher." + return 1 + fi +fi + +exit 0 + + diff --git a/egs/babel/s5d/local/check_wers.sh b/egs/babel/s5d/local/check_wers.sh new file mode 100755 index 00000000000..10e1a89ee3a --- /dev/null +++ b/egs/babel/s5d/local/check_wers.sh @@ -0,0 +1,50 @@ +#!/bin/bash + + + +check_wer () { + dir=$1 + if [ -d $dir ]; then + seen_dir=false + for ddir in $dir/decode*; do + if [ -d $ddir ]; then + seen_dir=true + printf " % -40s " $ddir + line=`grep Sum $ddir/score_*/*.sys 2>/dev/null | $char_command | utils/best_wer.sh` + if [ -z "$line" ]; then echo "------" + else echo $line | cut -c 1-65; fi + fi + done + ! $seen_dir && echo "$dir ********** no decode dirs" + fi + +} + +final=false +char_command="grep -v char" + +for n in `seq 10`; do + if [ "$1" == "--final" ]; then + final=true + shift + fi + if [ "$1" == "--char" ]; then + char_command="grep char" + shift + fi +done + +if [ $# != 0 ]; then + echo "Usage: local/check_wers.sh [--final] [--char]" + exit 1; +fi + +if $final; then + for dir in exp/sgmm5_mmi_b0.1 exp/tri5_nnet exp/tri6_nnet exp_BNF/sgmm7 exp_BNF/sgmm7_mmi_b0.1 exp/combine*; do + check_wer $dir + done +else + for dir in exp/tri{2,3,4,5} exp/sgmm5 exp/sgmm5_mmi_b0.1 exp/tri5_nnet exp/tri6_nnet exp_BNF/* exp/combine_*; do + check_wer $dir + done +fi diff --git a/egs/babel/s5d/local/cmu_uem2kaldi_dir.sh b/egs/babel/s5d/local/cmu_uem2kaldi_dir.sh new file mode 100755 index 00000000000..f320cfa19cd --- /dev/null +++ b/egs/babel/s5d/local/cmu_uem2kaldi_dir.sh @@ -0,0 +1,124 @@ +#!/bin/bash -e + +# Creating a UEM decoding setup with CMU segmentation from Florian (Feb 15, 2013). +dummy_text=true +text= +filelist= +#end of configuration + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ] ; then + echo "$0: Converts the CMU segmentation database file into a kaldi data directory for UEM decoding" + echo "" + echo "cmu_ume2kaldi_dir.sh " + echo "example: cmu_ume2kaldi_dir.sh db-tag-eval-utt.dat /export/babel/data/106-tagalog/audio data/eval.uem" + echo "Was called with: $*" + exit 1; +fi + +database=$1 +audiopath=$2 +datadir=$3 + +echo $0 $@ +mkdir -p $datadir +# 1. Create the segments file: +[ ! -f $database ] && echo "Database file $1 does not exist!" && exit 1; + +echo "Converting `basename $database` to kaldi directory $datadir " +cat $database | perl -pe 's:.+(BABEL):BABEL:; s:\}\s+\{FROM\s+: :; s:\}\s+\{TO\s+: :; s:\}.+::;' | \ + perl -ne '@K = split; + $utteranceID = @K[0]; + $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; + $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; + $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; + $utteranceID .= sprintf ("_%06i", (100*@K[2])); + printf("%s %s %.2f %.2f\n", $utteranceID, @K[0], @K[1], @K[2]);' | sort > $datadir/segments + +if [ ! -z $filelist ] ; then + mv $datadir/segments $datadir/segments.full + grep -F -f $filelist $datadir/segments.full > $datadir/segments + + l=`grep -v -F -f $filelist $datadir/segments.full | cut -f 2 -d ' ' | sort -u | wc -l` + echo "Because of using filelist, $l files omitted" +fi + + + # 2. Create the utt2spk file: + +echo "Creating the $datadir/utt2spk file" +cut -f1 -d' ' $datadir/segments | \ + perl -ne 'chomp; m:([^_]+_[AB]).*:; print "$_ $1\n";' | \ + sort > $datadir/utt2spk + + # 3. Create the spk2utt file: + +echo "Creating the $datadir/spk2utt file" +perl -ne '{chomp; @K=split; $utt{@K[1]}.=" @K[0]";} + END{foreach $spk (sort keys %utt) { + printf("%s%s\n", $spk, $utt{$spk}); + } + }' < $datadir/utt2spk | sort > $datadir/spk2utt + +# 4. Create the wav.scp file: +sph2pipe=`which sph2pipe || which $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe` +if [ $? -ne 0 ] ; then + echo "Could not find sph2pipe binary. Add it to PATH" + exit 1; +fi +sox=`which sox` +if [ $? -ne 0 ] ; then + echo "Could not find sox binary. Add it to PATH" + exit 1; +fi + +echo "Creating the $datadir/wav.scp file" +( + set -o pipefail + for file in `cut -f 2 -d ' ' $datadir/segments` ; do + if [ -f $audiopath/audio/$file.sph ] ; then + echo "$file $sph2pipe -f wav -p -c 1 $audiopath/audio/$file.sph |" + elif [ -f $audiopath/audio/$file.wav ] ; then + echo "$file $sox $audiopath/audio/$file.wav -r 8000 -c 1 -b 16 -t wav - downsample |" + else + echo "Audio file $audiopath/audio/$file.sph does not exist!" >&2 + exit 1 + fi + done | sort -u > $datadir/wav.scp + if [ $? -ne 0 ] ; then + echo "Error producing the wav.scp file" + exit 1 + fi +) || exit 1 + +l1=`wc -l $datadir/wav.scp | cut -f 1 -d ' ' ` +echo "wav.scp contains $l1 files" +if [ ! -z $filelist ] ; then + l2=`wc -l $filelist | cut -f 1 -d ' '` + echo "filelist `basename $filelist` contains $l2 files" + + if [ "$l1" -ne "$l2" ] ; then + echo "WARNING: Not all files from the specified fileset made their way into wav.scp" + fi +fi + +# 5. Create the text file: +echo "Creating the $datadir/text file" +if [ ! -z $text ] ; then + cp $text $datadir/text || echo "Could not copy the source text file \"$text\" " && exit 1 +elif $dummy_text ; then + cut -f1 -d' ' $datadir/segments | \ + sed -e 's/$/ IGNORE_TIME_SEGMENT_IN_SCORING/' | \ + sort > $datadir/text +fi + +# 6. reco2file_and_channel +echo "Creating the $datadir/reco2file_and_channel file" +(for f in $( cut -f 1 -d ' ' $datadir/wav.scp ) ; do echo $f $f "1"; done) > $datadir/reco2file_and_channel +echo "Everything done" + + + diff --git a/egs/babel/s5d/local/count_to_logprob.pl b/egs/babel/s5d/local/count_to_logprob.pl new file mode 100755 index 00000000000..7d779321810 --- /dev/null +++ b/egs/babel/s5d/local/count_to_logprob.pl @@ -0,0 +1,94 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + This script takes in the confusion phone pair counts and converts + the counts into negated log probabilities. The counts should be in + the following format: + p1 p2 count1 // For substitution + p3 count2 // For deletion + p4 count3 // For insertion + +Allowed options: + --cutoff : Minimal count to be considered (int , default=1) +EOU + +my $cutoff = 1; +GetOptions('cutoff=i' => \$cutoff); + +@ARGV == 2 || die $Usage; + +# Workout the input and output parameters +my $cm_in = shift @ARGV; +my $cm_out = shift @ARGV; + +open(I, "<$cm_in") || die "$0: Fail to open keywords file $cm_in\n"; +open(O, ">$cm_out") || die "$0: Fail to write confusion matrix $cm_out\n"; + +# Collect counts +my %ins; +my %del; +my %subs; +my %phone_count; +my $ins_count = 0; +my $del_count = 0; +while () { + chomp; + my @col = split(); + @col == 3 || die "$0: Bad line in confusion matrix file: $_\n"; + my ($p1, $p2, $count) = ($col[0], $col[1], $col[2]); + $count >= $cutoff || next; + if ($p1 eq "" && $p2 ne "") { + $ins{$p2} = $count; + $ins_count += $count; + } elsif ($p1 ne "" && $p2 eq "") { + $del{$p1} = $count; + $del_count += $count; + } elsif ($p1 ne "" && $p2 ne "") { + $p1 ne $p2 || next; # Skip same phone convert + $subs{"${p1}_$p2"} = $count; + if (defined($phone_count{$p1})) { + $phone_count{$p1} += $count; + } else { + $phone_count{$p1} = $count; + } + } +} + +# Compute negated log probability +foreach my $key (keys %ins) { + $ins{$key} = -log($ins{$key}/$ins_count); +} +foreach my $key (keys %del) { + $del{$key} = -log($del{$key}/$del_count); +} +foreach my $key (keys %subs) { + my @col = split(/_/, $key); + $subs{$key} = -log($subs{$key}/$phone_count{$col[0]}); +} + +# print results +my $output = ""; +foreach my $key (keys %ins) { + $output .= " $key $ins{$key}\n"; +} +foreach my $key (keys %del) { + $output .= "$key $del{$key}\n"; +} +foreach my $key (keys %subs) { + my @col = split(/_/, $key); + $output .= "$col[0] $col[1] $subs{$key}\n"; +} + +print O $output; + +close(I); +close(O); diff --git a/egs/babel/s5d/local/create_shadow_dataset.sh b/egs/babel/s5d/local/create_shadow_dataset.sh new file mode 100755 index 00000000000..49467ed28c1 --- /dev/null +++ b/egs/babel/s5d/local/create_shadow_dataset.sh @@ -0,0 +1,176 @@ +#!/bin/bash +# Copyright 2012 Johns Hopkins University +# Apache 2.0. + +stage=0 + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +[ -f /export/babel/data/software/env.sh ] && . /export/babel/data/software/env.sh + +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: create_shadow_dataset.sh " + exit 1 +fi + +dest=$1 +src1=$2 +src2=$3 + +mkdir -p $dest/kws + +if [ $stage -le 0 ] ; then + utils/combine_data.sh $dest $src1 $src2 || exit 1 +fi + +if [ $stage -le 1 ] ; then + #zkombinovat ecf + echo "Combining ECF files..." + perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + use strict; + use warnings; + + + my $src1 = XMLin($ARGV[0]); + my $src2 = XMLin($ARGV[1]); + my $tgt={}; + my %filename_hash; + + my $expected_duration=0.0; + my $duration=0.0; + + if ( $src1->{language} ne $src2->{language} ) { + die "ECF languages differ in the source ecf.xml files" + } + $expected_duration=$src1->{source_signal_duration} + $src2->{source_signal_duration}; + + $tgt->{source_signal_duration} = $expected_duration; + $tgt->{language}=$src1->{language}; + $tgt->{version}="Generated automatically by the shadow_set.sh script"; + $tgt->{excerpt}= []; + + #print Dumper(\$src1); + foreach my $excerpt ( @{$src1->{excerpt}} ) { + push @{$tgt->{excerpt}}, $excerpt; + if ( exists $filename_hash{$excerpt->{audio_filename}} ) { + print STDERR "[WARN]: Duplicate filename $excerpt->{audio_filename} \n" + } else { + $duration += $excerpt->{dur} ; + $filename_hash{$excerpt->{audio_filename}} = $excerpt; + } + } + foreach my $excerpt ( @{$src2->{excerpt}} ) { + push @{$tgt->{excerpt}}, $excerpt; + if ( exists $filename_hash{$excerpt->{audio_filename}} ) { + print STDERR "[WARN]: Duplicate filename $excerpt->{audio_filename} \n" + } else { + $duration += $excerpt->{dur} ; + $filename_hash{$excerpt->{audio_filename}} = $excerpt; + } + } + $tgt->{source_signal_duration} = $duration; + + my $tgtxml = XMLout($tgt, RootName=>"ecf"); + print $tgtxml; + ' $src1/kws/ecf.xml $src2/kws/ecf.xml > $dest/kws/ecf.xml +fi + +if [ $stage -le 2 ] ; then + #zkombinovat kwlist + echo "Combining the KWLIST files" + perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + use strict; + use warnings; + + my $src1 = XMLin($ARGV[0], ForceArray => 1); + my $src2 = XMLin($ARGV[1], ForceArray => 1); + my $tgt={}; + my %kwid_hash; + + if ( $src1->{compareNormalize} ne $src2->{compareNormalize} ) { + die "KWLIST compareNormalize attributes differ in the source kwlist.xml files"; + } + if ( $src1->{language} ne $src2->{language} ) { + die "KWLIST languages differ in the source kwlist.xml files"; + } + + $tgt->{ecf_filename} = ""; + $tgt->{language}=$src1->{language}; + $tgt->{compareNormalize}=$src1->{compareNormalize}; + $tgt->{encoding}=$src1->{encoding}; + $tgt->{version}="1"; + $tgt->{kw}= []; + + + foreach my $kw ( @{$src1->{kw}} ) { + $kw->{kwid} = $kw->{kwid} . "-A"; + if ( exists $kwid_hash{$kw->{kwid}} ) { + print STDERR "[WARN]: Duplicate kwid $kw->{kwid}\n"; + } else { + $kwid_hash{$kw->{kwid}} = $kw; + } + push @{$tgt->{kw}}, $kw; + } + foreach my $kw ( @{$src2->{kw}} ) { + $kw->{kwid} = $kw->{kwid} . "-B"; + if ( exists $kwid_hash{$kw->{kwid}} ) { + print STDERR "[WARN]: Duplicate kwid $kw->{kwid}\n"; + } else { + $kwid_hash{$kw->{kwid}} = $kw; + } + push @{$tgt->{kw}}, $kw; + } + + my $tgtxml = XMLout($tgt, RootName=>"kwlist", KeyAttr=>""); + print $tgtxml; + ' $src1/kws/kwlist.xml $src2/kws/kwlist.xml > $dest/kws/kwlist.xml || exit 1 +fi + +if [ $stage -le 3 ] ; then + echo "Making KWLIST maps" + perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + use strict; + use warnings; + + my $src1 = XMLin($ARGV[0], ForceArray => 1); + open TGT_DEV, ">", $ARGV[1] or die $!; + open TGT_TST, ">", $ARGV[2] or die $!; + + foreach my $kw ( @{$src1->{kw}} ) { + if ( $kw->{kwid} =~ "KW.+-A\$" ) { + my $new_kw = $kw->{kwid}; + my $old_kw = substr $new_kw, 0, -2; + print TGT_DEV "$old_kw\t$new_kw\n"; + } elsif ( $kw->{kwid} =~ "KW.+-B\$" ) { + my $new_kw = $kw->{kwid}; + my $old_kw = substr $new_kw, 0, -2; + print TGT_TST "$old_kw\t$new_kw\n"; + } else { + die "Unsupported or unknown KW ID: $kw->{kwid}\n"; + } + } + ' $dest/kws/kwlist.xml $dest/kws/kws_map.dev.txt $dest/kws/kws_map.test.txt || exit 1 +fi + +exit 0 + diff --git a/egs/babel/s5d/local/cstr_ndx2flist.pl b/egs/babel/s5d/local/cstr_ndx2flist.pl new file mode 100755 index 00000000000..79daa1a99db --- /dev/null +++ b/egs/babel/s5d/local/cstr_ndx2flist.pl @@ -0,0 +1,54 @@ +#!/usr/bin/env perl + +# Copyright 2010-2011 Microsoft Corporation + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +# This is modified from the script in standard Kaldi recipe to account +# for the way the WSJ data is structured on the Edinburgh systems. +# - Arnab Ghoshal, 12/1/12 + +# This program takes as its standard input an .ndx file from the WSJ corpus that looks +# like this: +#;; File: tr_s_wv1.ndx, updated 04/26/94 +#;; +#;; Index for WSJ0 SI-short Sennheiser training data +#;; Data is read WSJ sentences, Sennheiser mic. +#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts +#;; per speaker TI) = 7236 utts +#;; +#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1 +#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1 +#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1 + +# and as command-line argument it takes the names of the WSJ disk locations, e.g.: +# /group/corpora/public/wsjcam0/data on DICE machines. +# It outputs a list of absolute pathnames. + +$wsj_dir = $ARGV[0]; + +while(){ + if(m/^;/){ next; } # Comment. Ignore it. + else { + m/^([0-9_]+):\s*(\S+)$/ || die "Could not parse line $_"; + $filename = $2; # as a subdirectory of the distributed disk. + if ($filename !~ m/\.wv1$/) { $filename .= ".wv1"; } + $filename = "$wsj_dir/$filename"; + if (-e $filename) { + print "$filename\n"; + } else { + print STDERR "File $filename found in the index but not on disk\n"; + } + } +} diff --git a/egs/babel/s5d/local/ctm2segments.pl b/egs/babel/s5d/local/ctm2segments.pl new file mode 100755 index 00000000000..55a8bd84fc8 --- /dev/null +++ b/egs/babel/s5d/local/ctm2segments.pl @@ -0,0 +1,159 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Getopt::Long; + +my $cf_needed = 0.9; +my $cf_needed_upper = 1; +my $extend_segments = 0.0 ; + +my $Usage = < + +Allowed options: + --min-cf : Minimum CF to include the word (float, default = 0.9) + --max-cf : Maximum CF to include the word (float, default = 1.0) + --extend-segments : Add this delta to the boundaries of the segments (float, default = 0.0) +EOU + +GetOptions('min-cf=f' => \$cf_needed, + 'max-cf=f' => \$cf_needed_upper, + 'extend-segments=f' => \$extend_segments, + ); + + +# Get parameters +my $filein = shift @ARGV; +my $dirout = shift @ARGV; + + +my @segments; +my @utterances; +my @text; + +my $words = ""; +my $seg_end = -1; +my $seg_start = -1; +my $filename; + +my $total_seconds=0; +my $extracted_seconds=0; +open(FILEIN, $filein); +while (my $line= ) { + chop $line; + my @entries = split(/ /, $line); + die "Cannot parse line \"$line\"" if scalar @entries != 6; + + ($filename, my $chann_id, my $beg, my $end, my $word, my $conf) = @entries; + + $total_seconds += $end * 1.0; + + if ($conf >= $cf_needed ) { + if ( $words ne "" ) { + #print "Extend segment\n"; + $words .= " $word"; + $seg_end = $beg * 1.0 + $end*1.0; + } else { + #start a new segment + #print "Start segment\n"; + $seg_start = $beg; + $seg_end = $beg * 1.0 + $end*1.0; + $words = $word; + } + } else { + #flush the segment + if ( $words ) { + my @filename_parts = split(/_/, $filename); + my $channel="C"; + if ($filename_parts[6] eq "inLine" ) { + $channel="A"; + } elsif ($filename_parts[6] eq "outLine" ) { + $channel="B"; + } + + $extracted_seconds+= ($seg_end - $seg_start); + $seg_start -= $extend_segments; + $seg_end += $extend_segments; + + my $spk_id=$filename_parts[3] . "_" . $channel; + my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); + my $last_part = sprintf("%06d", $seg_start * 100); + $utt_id .= "_" . $last_part; + #print $utt_id . " $beg \n"; + + #14350_A_20121123_042710_001337 + + #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 + my $segment = "$utt_id $filename $seg_start $seg_end"; + #14350_A_20121123_042710_001337 14350_A + my $utt2spk = "$utt_id $spk_id"; + #10901_A_20121128_230024_000227 hayi Lovemore + my $text = "$utt_id $words"; + push @segments, $segment; + push @utterances, $utt2spk; + push @text, $text; + $words = ""; + } + + } +} +if ( $words ) { + #print "Flush.\n"; + my @filename_parts = split(/_/, $filename); + my $channel="C"; + if ($filename_parts[6] eq "inLine" ) { + $channel="A"; + } elsif ($filename_parts[6] eq "outLine" ) { + $channel="B"; + } + + $extracted_seconds+= ($seg_end - $seg_start); + $seg_start -= $extend_segments; + $seg_end += $extend_segments; + + my $spk_id=$filename_parts[3] . "_" . $channel; + my $utt_id = $spk_id . "_" . join("_", @filename_parts[4..5]); + my $last_part = sprintf("%06d", $seg_start * 100); + $utt_id .= "_" . $last_part; + #print $utt_id . " $beg \n"; + + #14350_A_20121123_042710_001337 + + #10901_A_20121128_230024_000227 BABEL_OP1_206_10901_20121128_230024_inLine 2.275 3.265 + my $segment = "$utt_id $filename $seg_start $seg_end"; + #14350_A_20121123_042710_001337 14350_A + my $utt2spk = "$utt_id $spk_id"; + #10901_A_20121128_230024_000227 hayi Lovemore + my $text = "$utt_id $words"; + push @segments, $segment; + push @utterances, $utt2spk; + push @text, $text; + $words = ""; +} + +open(SEGMENTS, "> $dirout/segments"); +foreach my $line (@segments) { + print SEGMENTS "$line\n"; +} +close(SEGMENTS); + +open(TEXT, "> $dirout/text"); +foreach my $line (@text) { + print TEXT "$line\n"; +} +close(TEXT); + +open(UTT, "> $dirout/utt2spk"); +foreach my $line (@utterances) { + print UTT "$line\n"; +} +close(UTT); + +my $total_hours=sprintf("%.2f", $total_seconds/3600); +my $extracted_hours=sprintf("%.2f", $extracted_seconds/3600); +my $s_ex_secs=sprintf("%d", $extracted_seconds); + +print "Fragments extracted: $s_ex_secs seconds ($extracted_hours hours) out of $total_hours hours\n"; + diff --git a/egs/babel/s5d/local/datasets/basic_kws.sh b/egs/babel/s5d/local/datasets/basic_kws.sh new file mode 100644 index 00000000000..cff34eba69c --- /dev/null +++ b/egs/babel/s5d/local/datasets/basic_kws.sh @@ -0,0 +1,28 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +if [ "${dataset_kind}" == "supervised" ] ; then + mandatory_variables="my_ecf_file my_kwlists my_rttm_file" + optional_variables="my_subset_ecf" +else + mandatory_variables="my_ecf_file my_kwlists" + optional_variables="my_subset_ecf" +fi + +check_variables_are_set + +if [ ! -f ${dataset_dir}/kws/.done ] ; then + kws_flags=( --use-icu true ) + if [ "${dataset_kind}" == "supervised" ] || [ !-z "$my_rttm_file" ] ; then + kws_flags+=(--rttm-file $my_rttm_file ) + fi + if $my_subset_ecf ; then + kws_flags+=(--subset-ecf $my_data_list) + fi + local/kws_setup.sh --case_insensitive $case_insensitive \ + "${kws_flags[@]}" "${icu_opt[@]}" \ + $my_ecf_file $my_kwlist_file $lang ${dataset_dir} || exit 1 + touch ${dataset_dir}/kws/.done +fi diff --git a/egs/babel/s5d/local/datasets/extra_kws.sh b/egs/babel/s5d/local/datasets/extra_kws.sh new file mode 100644 index 00000000000..d00eab1b06f --- /dev/null +++ b/egs/babel/s5d/local/datasets/extra_kws.sh @@ -0,0 +1,137 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +if [ "${dataset_kind}" == "supervised" ] ; then + mandatory_variables="my_ecf_file my_kwlists my_rttm_file" + optional_variables="my_subset_ecf" +else + mandatory_variables="my_ecf_file my_kwlists" + optional_variables="my_subset_ecf" +fi + +check_variables_are_set + +function register_extraid { + local dataset_dir=$1 + local extraid=$2 + echo "Registering $extraid" + echo $extraid >> $dataset_dir/extra_kws_tasks; + sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks +} + +function setup_oov_search { + local phone_cutoff=0 + + local g2p_nbest=10 + local g2p_mass=0.95 + + + local data_dir=$1 + local source_dir=$2 + local extraid=$3 + + local kwsdatadir=$data_dir/${extraid}_kws + + mkdir -p $kwsdatadir + + for file in $source_dir/rttm ; do + [ -f $file ] && cp -f $file $kwsdatadir + done + + for file in $source_dir/utter_* $source_dir/kwlist*.xml $source_dir/ecf.xml ; do + cp -f $file $kwsdatadir + done + + kwlist=$source_dir/kwlist_outvocab.xml + #Get the KW list + paste \ + <(cat $kwlist | grep -o -P "(?<=kwid=\").*(?=\")") \ + <(cat $kwlist | grep -o -P "(?<=).*(?=)" | uconv -f utf-8 -t utf-8 -x Any-Lower) \ + >$kwsdatadir/keywords.txt + cut -f 2 $kwsdatadir/keywords.txt | \ + sed 's/\s\s*/\n/g' | sort -u > $kwsdatadir/oov.txt + + + #Generate the confusion matrix + #NB, this has to be done only once, as it is training corpora dependent, + #instead of search collection dependent + if [ ! -f exp/conf_matrix/.done ] ; then + local/generate_confusion_matrix.sh --cmd "$decode_cmd" --nj $my_nj \ + exp/sgmm5_denlats/dengraph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix || return 1 + touch exp/conf_matrix/.done + fi + confusion=exp/conf_matrix/confusions.txt + + if [ ! -f exp/g2p/.done ] ; then + if [ -f data/.extlex ]; then + local/train_g2p.sh data/local/lexicon_orig.txt exp/g2p || return 1; + else + local/train_g2p.sh data/local/lexicon.txt exp/g2p || return 1; + fi + touch exp/g2p/.done + fi + local/apply_g2p.sh --nj $my_nj --cmd "$decode_cmd" \ + --var-counts $g2p_nbest --var-mass $g2p_mass \ + $kwsdatadir/oov.txt exp/g2p $kwsdatadir/g2p || return 1 + L2_lex=$kwsdatadir/g2p/lexicon.lex + + if [ -z "$L1_lex" ] ; then + L1_lex=data/local/lexiconp.txt + fi + + local/kws_data_prep_proxy.sh \ + --cmd "$decode_cmd" --nj $my_nj \ + --case-insensitive true \ + --confusion-matrix $confusion \ + --phone-cutoff $phone_cutoff \ + --pron-probs true --beam $proxy_beam --nbest $proxy_nbest \ + --phone-beam $proxy_phone_beam --phone-nbest $proxy_phone_nbest \ + $lang $data_dir $L1_lex $L2_lex $kwsdatadir + +} + + +kws_flags=( --use-icu true ) +if [ "${dataset_kind}" == "supervised" ] || [ ! -z "$my_rttm_file" ]; then + #The presence of the file had been already verified, so just + #add the correct switches + kws_flags+=(--rttm-file $my_rttm_file ) +fi +if $my_subset_ecf ; then + kws_flags+=(--subset-ecf $my_data_list) +fi + +if [ ${#my_kwlists[@]} -ne 0 ] ; then + + touch $dataset_dir/extra_kws_tasks + + for extraid in "${!my_kwlists[@]}" ; do + #The next line will help us in running only one. We don't really + #know in which directory the KWS setup will reside in, so we will + #place the .done file directly into the data directory + [ -f $dataset_dir/.done.kws.$extraid ] && continue; + kwlist=${my_kwlists[$extraid]} + + local/kws_setup.sh --extraid $extraid --case_insensitive $case_insensitive \ + "${kws_flags[@]}" "${icu_opt[@]}" \ + $my_ecf_file $kwlist $lang ${dataset_dir} || exit 1 + + #Register the dataset for default running... + #We can do it without any problem here -- the kws_stt_tasks will not + #run it, unless called with --run-extra-tasks true switch + register_extraid $dataset_dir $extraid + touch $dataset_dir/.done.kws.$extraid + done + for extraid in "${!my_kwlists[@]}" ; do + #The next line will help us in running only one. We don't really + #know in which directory the KWS setup will reside in, so we will + #place the .done file directly into the data directory + [ -f $dataset_dir/.done.kws.${extraid}_oov ] && continue; + setup_oov_search $dataset_dir $dataset_dir/${extraid}_kws ${extraid}_oov || exit 1 + register_extraid $dataset_dir ${extraid}_oov + touch $dataset_dir/.done.kws.${extraid}_oov + done +fi + diff --git a/egs/babel/s5d/local/datasets/supervised_pem.sh b/egs/babel/s5d/local/datasets/supervised_pem.sh new file mode 100644 index 00000000000..e131fae40fa --- /dev/null +++ b/egs/babel/s5d/local/datasets/supervised_pem.sh @@ -0,0 +1,35 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. +if [ "${dataset_type}" != "supervised" ] ; then + mandatory_variables="my_data_dir my_data_list my_nj " + optional_variables="" +else + mandatory_variables="my_data_dir my_data_list my_nj " + optional_variables="my_stm_file " +fi + +check_variables_are_set + + +if [[ ! -f ${dataset_dir}/wav.scp || ${dataset_dir}/wav.scp -ot "$my_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} data lists in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + mkdir -p ${dataset_dir} + local/prepare_acoustic_training_data.pl --fragmentMarkers \-\*\~ \ + $my_data_dir ${dataset_dir} > ${dataset_dir}/skipped_utts.log || exit 1 +fi + +if [ "$dataset_kind" == "supervised" ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + else + local/prepare_stm.pl --fragmentMarkers \-\*\~ ${dataset_dir} + fi +fi + diff --git a/egs/babel/s5d/local/datasets/supervised_seg.sh b/egs/babel/s5d/local/datasets/supervised_seg.sh new file mode 100644 index 00000000000..45cc7f28593 --- /dev/null +++ b/egs/babel/s5d/local/datasets/supervised_seg.sh @@ -0,0 +1,90 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. +if [ ${dataset_type} != "supervised" ] ; then + mandatory_variables="my_data_dir my_data_list my_nj" + optional_variables="" +else + mandatory_variables="my_data_dir my_data_list my_nj" + optional_variables="my_stm_file" +fi + +check_variables_are_set + +segmentation_opts="--isolated-resegmentation \ + --min-inter-utt-silence-length 1.0 \ + --silence-proportion 0.05 " + +workdir=exp/make_seg/${dataset_id} +unseg_dir=$workdir +mkdir -p $unseg_dir +# 4. Create the wav.scp file: +sph2pipe=`which sph2pipe || which $KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe` +if [ $? -ne 0 ] ; then + echo "Could not find sph2pipe binary. Add it to PATH" + exit 1; +fi +sox=`which sox` +if [ $? -ne 0 ] ; then + echo "Could not find sox binary. Add it to PATH" + exit 1; +fi + +echo "Creating the $unseg_dir/wav.scp file" +audiodir=$my_data_dir/audio +for file in `cat $my_data_list | sort -u` ; do + if [ -f $audiodir/$file.sph ] ; then + echo "$file $sph2pipe -f wav -p -c 1 $audiodir/$file.sph |" + elif [ -f $audiodir/$file.wav ] ; then + echo "$file $sox $audiodir/$file.wav -r 8000 -c 1 -b 16 -t wav - downsample |" + else + echo "Audio file $audiodir/$file.(sph|wav) does not exist!" >&2 + exit 1 + fi +done | sort -u > $unseg_dir/wav.scp + +l1=`cat $unseg_dir/wav.scp | wc -l ` +l2=`cat $my_data_list | wc -l ` +if [ "$l1" -ne "$l2" ] ; then + echo "wav.scp number of files: $l1" + echo "filelist number of files: $l2" + echo "Not all files from the list $my_data_list found their way into wav.scp" + exit 1 +fi + +echo "Creating the $unseg_dir/reco2file_and_channel file" +cat $unseg_dir/wav.scp | awk '{print $1, $1, "A";}' > $unseg_dir/reco2file_and_channel +cat $unseg_dir/wav.scp | awk '{print $1, $1;}' > $unseg_dir/utt2spk +utils/utt2spk_to_spk2utt.pl $unseg_dir/utt2spk > $unseg_dir/spk2utt + +make_plp $unseg_dir $workdir/make_plp $workdir/plp || exit 1 + +local/resegment/generate_segments.sh --nj $my_nj --cmd "$decode_cmd" \ + --noise_oov false --segmentation_opts "$segmentation_opts" \ + $unseg_dir data/lang exp/tri4b_seg \ + $workdir $dataset_dir || exit 1 + +num_hours=`cat ${dataset_dir}/segments | \ + awk '{secs+= $4-$3;} END{print(secs/3600);}'` + +echo "Number of hours of the newly segmented data: $num_hours" + +if [ "$dataset_kind" == "supervised" ]; then + echo --------------------------------------------------------------------- + echo "preparing ${dataset_id} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + else + local/prepare_stm.pl --fragmentmarkers \-\*\~ ${dataset_dir} + fi +else + echo --------------------------------------------------------------------- + echo "preparing ${dataset_id} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + fi +fi + diff --git a/egs/babel/s5d/local/datasets/supervised_uem.sh b/egs/babel/s5d/local/datasets/supervised_uem.sh new file mode 100644 index 00000000000..5ac1e003d5d --- /dev/null +++ b/egs/babel/s5d/local/datasets/supervised_uem.sh @@ -0,0 +1,36 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +eval my_data_cmudb=\$${dataset_type}_data_cmudb + +if [ "${dataset_kind}" != "supervised" ] ; then + mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" + optional_variables="" +else + mandatory_variables="my_data_dir my_data_list my_nj my_data_cmudb" + optional_variables="my_stm_file" +fi + +check_variables_are_set + +if [[ ! -f ${dataset_dir}/wav.scp || ${dataset_dir}/wav.scp -ot "$my_data_cmudb" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} data lists in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + mkdir -p ${dataset_dir} + local/cmu_uem2kaldi_dir.sh --filelist $my_data_list \ + $my_data_cmudb $my_data_dir ${dataset_dir} +fi + +if [ "$dataset_kind" == "supervised" ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_type} stm files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + if [ ! -z $my_stm_file ] ; then + local/augment_original_stm.pl $my_stm_file ${dataset_dir} + else + local/prepare_stm.pl --fragmentMarkers \-\*\~ ${dataset_dir} + fi +fi diff --git a/egs/babel/s5d/local/datasets/unsupervised_seg.sh b/egs/babel/s5d/local/datasets/unsupervised_seg.sh new file mode 120000 index 00000000000..9e2e12b5bad --- /dev/null +++ b/egs/babel/s5d/local/datasets/unsupervised_seg.sh @@ -0,0 +1 @@ +supervised_seg.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/datasets/unsupervised_uem.sh b/egs/babel/s5d/local/datasets/unsupervised_uem.sh new file mode 120000 index 00000000000..81440969d5c --- /dev/null +++ b/egs/babel/s5d/local/datasets/unsupervised_uem.sh @@ -0,0 +1 @@ +supervised_uem.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/datasets/vocab_kws.sh b/egs/babel/s5d/local/datasets/vocab_kws.sh new file mode 100644 index 00000000000..d161fc77b67 --- /dev/null +++ b/egs/babel/s5d/local/datasets/vocab_kws.sh @@ -0,0 +1,51 @@ +#This script is not really supposed to be run directly +#Instead, it should be sourced from the decoding script +#It makes many assumption on existence of certain environmental +#variables as well as certain directory structure. + +if [ "${dataset_kind}" == "supervised" ] ; then + mandatory_variables="my_ecf_file my_kwlist_file my_rttm_file" + optional_variables="my_subset_ecf" +else + mandatory_variables="my_ecf_file my_kwlist_file" + optional_variables="my_subset_ecf" +fi + +check_variables_are_set + +if [ "$dataset_kind" == "shadow" ]; then + true #we do not support multiple kw lists for shadow set system + +elif [ ! -f $dataset_dir/.done.kws.fullvocab ] ; then + #a This will work for both supervised and unsupervised dataset kinds + kws_flags=() + if [ "$dataset_kind" == "supervised" ] || [ ! -z "$my_rttm_file" ] ; then + kws_flags+=(--rttm-file $my_rttm_file ) + fi + if $my_subset_ecf ; then + kws_flags+=(--subset-ecf $my_data_list) + fi + + #We just could come with some bogus naming scheme, + #but as long as the audio files can tell the iarpa lang id, we will use that + langid=`ls -1 $my_data_dir/audio/ | head -n 1| cut -d '_' -f 3` + + #NB: we assume the default KWS search is already done and will "borrow" + #the rttm and ecf files. + #We could easily generate the ecf file, but the RTTM assumes the decoding + #had been already done. That could be done + #Ideally, these files should be generated here! + + local/kws_setup.sh --kwlist-wordlist true "${kws_flags[@]}" \ + --extraid fullvocab $my_ecf_file \ + <(cat $lang/words.txt | \ + grep -v "^<" | grep -v "^#" | \ + awk "{printf \"KWID$langid-FULLVOCAB-%05d %s\\n\", \$2, \$1 }" ) \ + $lang ${dataset_dir} || exit 1 + + echo fullvocab >> $dataset_dir/extra_kws_tasks; + sort -u $dataset_dir/extra_kws_tasks -o $dataset_dir/extra_kws_tasks + touch $dataset_dir/.done.kws.fullvocab +fi + + diff --git a/egs/babel/s5d/local/decode_helper.sh b/egs/babel/s5d/local/decode_helper.sh new file mode 100755 index 00000000000..d2bed774c68 --- /dev/null +++ b/egs/babel/s5d/local/decode_helper.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +. ./cmd.sh + +TYPE=$1 +LANGDIR=$2 +MODELDIR=$3 +DEVDIR=$4 +TRANSFORMDIR=$5 + +echo "$@" + +if [ "$1" == "SI" ]; then + utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 + steps/decode.sh --nj 20 --cmd "$decode_cmd" \ + $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 +elif [ "$1" == "FMLLR" ]; then + utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 + steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \ + $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1 +elif [ "$1" == "SGMM" ]; then + utils/mkgraph.sh $LANGDIR $MODELDIR $MODELDIR/graph || exit 1 + + steps/decode_sgmm.sh --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR \ + $MODELDIR/graph $DEVDIR $MODELDIR/decode || exit 1; + + steps/decode_sgmm.sh --use-fmllr true --nj 20 --cmd "$decode_cmd" --transform-dir $TRANSFORMDIR\ + $MODELDIR/graph $DEVDIR $MODELDIR/decode_fmllr || exit 1; + +fi + + diff --git a/egs/babel/s5d/local/eval_kw_subsets.sh b/egs/babel/s5d/local/eval_kw_subsets.sh new file mode 100755 index 00000000000..8a67225da52 --- /dev/null +++ b/egs/babel/s5d/local/eval_kw_subsets.sh @@ -0,0 +1,4 @@ +KWSEval -e ecf.xml -r rttm -t keyword_outvocab.xml -s kwslist.xml -c -o -b -d -f ./kws/outvocab +KWSEval -e ecf.xml -r rttm -t keyword_invocab.xml -s kwslist.xml -c -o -b -d -f ./kws/invocab +KWSEval -e ecf.xml -r rttm -t kws.xml -s kwslist.xml -c -o -b -d -f ./kws/fullvocab + diff --git a/egs/babel/s5d/local/extend_lexicon.sh b/egs/babel/s5d/local/extend_lexicon.sh new file mode 100755 index 00000000000..c930b1729e0 --- /dev/null +++ b/egs/babel/s5d/local/extend_lexicon.sh @@ -0,0 +1,572 @@ +#!/bin/bash + +# Copyright 2014 Johns Hopkins University (authors: Daniel Povey, Yenda Trmal) +# 2014 Guoguo Chen +# 2015 MIT Lincoln Labs (author: Fred Richardson) +# Apache 2.0. + +# This script takes an input lexicon (e.g. lexicon.txt) and generates likely +# out of vocabulary words from it, with their associated spellings. It outputs +# two files: lexiconp.txt (this is the lexicon format that has pronunciation +# probabilities; the words in the original lexicon have probability one), and +# oov2prob, which says how the OOV mass is distributed among the new OOV words +# in the lexicon. + +# It assumes that the syllables in pronunciations in the input lexicon.txt are +# separated by tabs, as is normal for the BABEL setup; the syllable boundaries +# are necessary for the method that this script uses. + +# We use SRILM to train an lm (lm.gz) by treating the sequence of syllables in a +# pronunciation like the sequence of words in a sentence; we use a 3-gram +# Kneser-Ney smoothed model, as this seemed to work best. We then generate +# "sentences" (really, pronunciations) from this LM using the "ngram" command +# from SRILM with the "-gen" option. We do this in parallel, and also use SRILM +# to compute the probabilities of these "sentences". Then the "--num-prons" +# most likely generated pronunciations are selected (by default: one million). + +# Next, we use the g2p tool from "Sequitur" to learn a mapping from +# pronuciations of words to their spellings. This is the opposite of the normal +# direction of prediction, so we refer to the models as "p2g". To do this, we +# give g2p a reversed version of the input lexicon, so while the input lexicon +# might have entries like +# Hi h ay +# the reversed lexicon would have entries like +# hay H i +# We were concerned that depending on the way the phones are represented as +# letters, there might be a lot of ambiguity introduced when we get rid of the +# spaces (e.g. does "hay" come from h+ay, or h+a+y?), and that this might hurt +# the accuracy of the g2p prediction. We did not want to introduce a separator +# because we felt that this would make the mapping harder for g2p to learn. +# Instead we mapped the phones to unique letters; this is what the "phone_map" +# file is about. Furthermore, in BABEL we have the concept of tags on the +# phones, e.g. in a tonal language, ay_3 might be the phone "ay" with tone 3. +# As far as Kaldi is concerned, ay_3 is a single phone. To avoid the number of +# letters blowing up too much, we make these tags separate letters when generating +# phone_map, so ay_3 might be mapped to kX with ay mapping to k and 3 mapping to +# X. To avoid ambiguity being introduced, we ensure that the alphabets for the +# phones and the tags are distinct (and in general, we allow multiple tags, with +# the tags in different positions having distinct alphabets). + +# Once we have our g2p models trained (and the g2p training is the most time +# consuming aspect of this script), we apply g2p to all of our generated +# pronunciations to give us likely spelling variants. The number of +# alternatives is controlled by the options --var-mass (default: 0.8, meaning we +# generate 0.8 of the entire probability mass), and --var-counts (default: 3, +# meaning we generate at most 3 alternative spellings per pronunciation). We +# take the probabilities of the OOVs (as assigned by the syllable-level LM) and +# multiply them by the spelling probabilities assigned by g2p, to give us the +# probability of the (pronunciation, word) pair. From these pairs we strip out +# those with words (spellings) that were in the original lexicon, and those with +# pronunciations shorter than a specified minimum --min-phones (default: 3). We +# then limit the total number of pairs to --num-prons (default: one million) and +# scale us the probabilities of the pairs pairs so that they sum to one overall. + +# We format this information as two pieces: a lexicon with probabilities +# (lexiconp.txt) and a file that gives us the probability of each OOV word +# (oov2prob). The probabilities in lexiconp.txt are normalized so that the most +# probable pronunciation of each word is 1; the probabilities in oov2prob are +# normalized such that if we multiply by the pronunciation probability in +# lexiconp.txt, we would get the probability we assigned to that (pronunciation, +# word) pair. + +# These outputs are used as follows: lexiconp.txt will be used by +# utils/prepare_lang.sh to generate L.fst and L_disambig.fst in the lang/ +# directory, so the lexicon FSTs and words.txt will include the generated OOVs. +# oov2prob will be used when generating the grammar transducer G.fst by +# local/arpa2G.sh. For example, if you call arpa2G.sh with the options +# --oov-prob-file some/dir/oov2prob --unk-fraction 0.33, it will put all the OOVs +# listed in some/dir/oov2prob as if they were unigrams in G.fst, with probability +# equal to 0.33 times the probability listed in oov2prob. However, that script +# will not allow the unigram probability of any OOV word to be more probable than +# the least probable word which was originally in the ARPA file (not counting , +# which generally has probability -99); this is applied as a ceiling on the +# unknown-word probabilities. Note: the --unk-fraction should probably be +# similar to the OOV rate in that language. Calculating the OOV rate on some +# dev data is one reasonable way to set this; see the commands at the very +# bottom of this file for an example of how we can compute the OOV rate. +# (Arguably, one should give an even higher fraction than this, because given the +# unigram state, the probability of seeing an unknown word is higher). +# It might seem appropriate to use as "unk-fraction" the probability of +# the unknown word ( or ) in the LM itself. However, this depends +# how the LM was estimated; I think in the BABEL setup, appears as +# an actual word in the transcripts, and the probability that the LM assigns +# to it seems to be lower than appropriate. + +stage=-5 +g2p_iters=5 +num_prons=1000000 # number of prons to generate. +num_sent_gen=12000000 # number of sents to generate. this should + # exceed num_prons by a factor of at least + # several. +nj=40 # number of jobs to use for generation. +encoding='utf-8' # option for g2p; leave this as it is. +# the following two options are used in g2p generation. +var_counts=3 #Generate up to N variants in g2p +var_mass=0.8 #Generate enough variants to produce 80 % of the prob mass +min_phones=3 # minimum number of phones we allow in generated words + # (very short generated words could contribute to graph blowup, + # and might hurt the decoding accuracy also). +skip_done=false # if true, allows us to skip over done g2p stages. +cmd=run.pl +cleanup=true + +echo "$0 $@" # Print the command line for logging + +. utils/parse_options.sh +. path.sh + +if [ $# -ne 2 ] && [ $# -ne 3 ]; then + echo "$0: usage: extend_lexicon.sh [options] [dev_text]" + echo " e.g.: $0 data/local/lexicon_orig.txt data/local/extend/" + echo "Will create in the files lexiconp.txt and oov2prob" + echo "where lexiconp.txt is an extended lexicon with pronunciation" + echo "probabilities, and oov2prob has lines which divide" + echo "the OOV probability mass among the introduced OOV words." + echo "Important options:" + echo " --cmd # how to run jobs, default run.pl" + echo " --num-prons # how many prons to generate, default 1000000" + exit 1; +fi + + +input_lexicon=$1 +toplevel_dir=$2 # e.g. data/local/extend +dev_text= +if [ $# -eq 3 ]; then + dev_text=$3 +fi + +dir=$2/tmp # most of our work happens in this "tmp" directory. + +mkdir -p $dir + +if [ ! -s $input_lexicon ]; then + echo "$0: expected input lexicon $input_lexicon to exist"; +fi + +cp $input_lexicon $toplevel_dir/input_lexicon.txt # just to have a record of what we started with. + +loc=`which ngram-count`; +if [ -z $loc ]; then + if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... + sdir=`pwd`/../../../tools/srilm/bin/i686-m64 + else + sdir=`pwd`/../../../tools/srilm/bin/i686 + fi + if [ -f $sdir/ngram-count ]; then + echo Using SRILM tools from $sdir + export PATH=$PATH:$sdir + else + echo You appear to not have SRILM tools installed, either on your path, + echo or installed in $sdir. See tools/install_srilm.sh for installation + echo instructions. + exit 1 + fi +fi + + +if ! which g2p.py >&/dev/null; then + if [ ! -d $KALDI_ROOT/tools/sequitur ]; then + echo "Sequitur was not found !" + echo "Go to $KALDI/tools and execute extras/install_sequitur.sh" + else + echo "Problems running sequitur. Check that your path.sh is putting it on the path." + echo "e.g. that it is sourcing KALDI_ROOT/tools/env.sh and that that env.sh file exists" + fi + exit 1; +fi + +if ! which g2p.py >/dev/null ; then + exit 1 +fi + + +if [ $stage -le -5 ]; then + # Map the phones to a more unambiguous representation so that when we + # concatenate the letters of them, we won't lose information. This will + # also make g2p's life easier because each phone goes to a single letter, + # which g2p will treat as a single symbol (remember, g2p is designed + # to produce graphemes, so the tokens it produces are letters). + + cat $toplevel_dir/input_lexicon.txt | \ + awk '{for(n=2;n<=NF;n++) seen[$n]=1;} END{for (key in seen) print key;}' >$dir/phonelist + + cat $dir/phonelist | perl -e ' @ids = ("a".."z", "A".."Z", "0".."9", ":", "=", "?", "@", "[", "]", "^", "+", "\$", "%", "&", "#", "*", "!", "(", ")", "{", "}" ); + @map = (); while(<>) { + chomp; $output = "$_ "; + @col = split("_"); + # Loop over different positions. + for ($p = 0; $p < @col; $p++) { + # New position that has not been assigned a hash. + if (@map <= $p) { push(@map, {}); } + # Assign map for each position. + if (!defined($map[$p]->{$col[$p]})) { + if (@ids == 0) { # We have used all the ids... die here. + die "Used up all the un-mapped ids, cannot continue\n"; + } + $map[$p]->{$col[$p]} = shift @ids; + } + $output .= "$map[$p]->{$col[$p]}"; + } + print "$output\n"; }' > $dir/phone_map + cat $dir/phone_map | awk '{print $2, $1}' > $dir/phone_map.reverse + + cat $toplevel_dir/input_lexicon.txt | \ + local/apply_map_tab_preserving.pl -f 2- $dir/phone_map > $dir/lexicon_in.txt +fi + + +if [ $stage -le -4 ]; then + cat $dir/lexicon_in.txt | perl -ane 'if (! m/^\<\S+\>\s/) { print; } ' > $dir/lexicon_in_nosil.txt + + cat $dir/lexicon_in.txt | perl -ane 's/^(\S+\s+)/${1}1.0\t/;print;' > $dir/lexiconp_in.txt +fi + + + + +if [ $stage -le -3 ]; then + # Each syllable will be given a "word" representation; we join the phones using comma "," + perl -e 'while() { s/^\S+\s*//; s/ /,/g; print }' <$dir/lexicon_in_nosil.txt >$dir/syllable_text.txt + + echo "$0: using SRILM to train syllable LM" + + ngram-count -lm $dir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $dir/syllable_text.txt -sort + + rm $dir/lm.gz 2>/dev/null + ln -s 3gram.kn022.gz $dir/lm.gz +fi + + +ngram=$(which ngram) + +if [ $stage -le -2 ]; then + mkdir -p $dir/log + echo "$0: generating words from the syllable LM" + + per_job_num_sent_gen=$[$num_sent_gen/$nj] + + $cmd JOB=1:$nj $dir/log/gen.JOB.log \ + $ngram -lm $dir/lm.gz -gen $per_job_num_sent_gen -seed JOB \| \ + sort -u \> $dir/sents.JOB || exit 1; +fi + +if [ $stage -le -1 ]; then + echo "$0: computing probs for the generated sentences" + rm $dir/probs.* 2>/dev/null + + echo '#!/usr/bin/perl +while(1) { + $sent = <>; $line=<>; if ($line !~ m/sentences/) { $sent =~ m/^file/ || die "Bad sent $sent"; exit(0); } + $line = <>; if ($line !~ m/logprob= (\S+)/) { die "Bad line $line"; } print "$1 $sent"; + $line = <>; $line eq "\n" || die "expected blank line"; }' >$dir/temp.pl + chmod +x $dir/temp.pl + + $cmd JOB=1:$nj $dir/log/compute_prob.JOB.log \ + $ngram -debug 1 -lm $dir/lm.gz -ppl $dir/sents.JOB \| $dir/temp.pl \| sort -gr \> $dir/probs.JOB || exit 1; + + if $cleanup; then + rm $dir/sents.*; + fi + sort -m -gr $dir/probs.* > $dir/probs.all + uniq $dir/probs.all | head -n $num_prons > $dir/probs || true + if $cleanup; then + rm $dir/probs.*; + fi + + mass=$(cat $dir/probs | awk '{x += exp($1 * log(10));} END{print x}') + + echo "$0: total probability mass in generated words is $mass" + echo " this should ideally be close to 1 (although we lose a little due to the" + echo " empty sentence). You can get closer by increasing --num-sent-gen and/or" + echo " --nj" + + nl=$(cat $dir/probs | wc -l) + if [ $nl -lt $num_prons ]; then + echo "$0: Number of generated lines $nl is less than number of requested words $num_prons:" + echo " please run with larger --nj, currently $nj " + exit 1; + fi +fi + + +# Next we train a reverse g2p, which is really p2g. Suppose a line in the lexicon is +# sugar s uh g ax r +# The basic idea is that we'd transform it to the following in reverse_lex.sh +# suhgaxr s u g a r +# We may lose a little information by doing this, though, because the segmentation +# into phonemes may be ambiguous. So we create a mapping from the original phonemes +# and tags to letters of the alphabet. Note: tags are things like s_3 for a phone: here +# s is the phone and _3 is the tag. + + +if [ $stage -le 0 ]; then + cat $dir/lexicon_in_nosil.txt | perl -ane ' + use Encode qw(decode encode); + @A = split; $w = shift @A; + $w = Encode::decode("'$encoding'", $w); + $w = join(" ", split("", $w)); + $w = Encode::encode("'$encoding'", $w); + print join("", @A) . "\t" . $w . "\n";' > $dir/lexicon_reverse.txt + + echo "$0: Training the G2P model (iter 0)" + if ! $skip_done || [ ! -f $dir/p2g.model.0 ]; then + $cmd $dir/log/g2p.0.log \ + g2p.py -S --encoding $encoding --train $dir/lexicon_reverse.txt --devel 5% --write-model $dir/p2g.model.0 || exit 1; + else + echo "$0: $dir/p2g.model.0 already exists: skipping it since --skip-done is true" + fi +fi + +for i in `seq 0 $(($g2p_iters-2))`; do + if [ $stage -le $[i+1] ]; then + if ! $skip_done || [ ! -f $dir/p2g.model.$[$i+1] ]; then + echo "$0: Training the G2P model (iter $[$i + 1] )" + $cmd $dir/log/g2p.$[$i+1].log \ + g2p.py -S --encoding $encoding --model $dir/p2g.model.$i --ramp-up \ + --train $dir/lexicon_reverse.txt --devel 5% \ + --write-model $dir/p2g.model.$(($i+1)) + else + ii=$[$i+1]; + echo "$0: $dir/p2g.model.$ii already exists: skipping it since --skip-done is true" + fi + fi + rm -f $dir/p2g.model.final + ln -s p2g.model.$(($i+1)) $dir/p2g.model.final +done + + + +if [ $stage -le $g2p_iters ]; then + # get the word-list to apply g2p to; each one is just a sequence + # of phones, formed by appending the syllables in the "generated sentences" + # (really generated syllable-sequences) in $dir/probs, and removing the + # separator. + + cat $dir/probs | head -n $num_prons | awk '{$1=""; print $0}' | \ + sed "s/,//g;s/ //g;" | sort | uniq > $dir/fake_word_list.txt + + echo "$0: Applying the G2P model to wordlist $wordlist" + + $cmd JOB=1:$nj $dir/log/apply_p2g.JOB.log \ + split -n l/JOB/$nj $dir/fake_word_list.txt \| \ + g2p.py -V $var_mass --variants-number $var_counts --encoding $encoding \ + --model $dir/p2g.model.final --apply - \ + \> $dir/p2g_output.JOB || exit 1; + perl -wlne 'use strict; + our %P; + my ($prn,$num,$prb,$spl)=m/^(\S+)\s+(\S+)\s+(\S+)\s+(.*)$/; + my $tok=$prn."=".$spl; + $P{$tok} = [ $num, $prb ] unless (defined($P{$tok}) && $P{$tok}[1] < $prb); + END { + map{ my ($prn,$spl)=m/^(.*)=(.*)$/; + my ($num, $prb) = @{$P{$tok}}; + print join("\t",$prn,$num,$prb,$spl) + } sort keys %P + }' $dir/p2g_output.* > $dir/p2g_output + rm $dir/p2g_output.* +fi + +if [ $stage -le $[$g2p_iters+1] ]; then + + # the NF >= 4 is about pruning out any empty spellings, that would + # produce an empty word. + # pron2spelling contains lines like ak>a 0.957937 aka + cat $dir/p2g_output | \ + awk '{if (NF >= 4) {printf("%s %s ", $1, $3); for (n=4;n<=NF;n++) {printf("%s", $n);} printf("\n"); }}' | \ + sort | uniq > $dir/pron2spelling + + # Now remove from pron2spelling, any words that appear in $dir/lexiconp_in.txt + # (this also contains the excluded words like ). + cat $dir/pron2spelling | \ + perl -e 'open(F, $ARGV[0]) || die "opening $ARGV[0]"; while() { @A=split; $seen_word{$A[0]}=1; } + while() { @A=split; if (! $seen_word{$A[2]}) { print; }} ' $dir/lexiconp_in.txt > $dir/pron2spelling.excluded + # $dir/pron2spelling.excluded contains lines like + #ab syllable1 syllable2 ... + # e.g. + # Kuku 0.000002642 k>&u k>&u + + cat $dir/probs | \ + perl -e ' while(){ @A = split; $prob = shift @A; $pron=join("", @A); + $pron =~ tr/,//d; print "$pron $_"; } '> $dir/probs.with_pron + # $dir/probs.with_pron contains lines like the following: + # ak>a -2.43244 a &k>&a + # This is so we can get the pronunciation in the same form that we put it in, for + # the p2g training, for easier comparison with the lines in $dir/pron2spelling.excluded + + perl -e ' ($p2s, $probs_with_pron) = @ARGV; + open(P2S, "<$p2s" || die); open(PROBS, "<$probs_with_pron")||die; + while () { + @A = split; + ($pron,$pronprob,$spelling) = @A; + if (!defined $prons{$pron}) { $prons{$pron} = [ ]; } # new anonymous array + $ref = $prons{$pron}; + push @$ref, "$pronprob $spelling"; + } + $log10 = log(10.0); + while () { + @A = split; + $pron = shift @A; # pron in same format as used by p2g model. + $logprob = shift @A; + $syllable_pron = join(" ", @A); # pron separated by syllable + $p = exp($logprob * $log10); + $ref = $prons{$pron}; + if (defined $ref) { + foreach $str (@$ref) { + @B = split(" ", $str); + ($pronprob,$spelling) = @B; + $pair_prob = $p * $pronprob; + print "$spelling $pair_prob $syllable_pron\n"; + } + } + } ' $dir/pron2spelling.excluded $dir/probs.with_pron > $dir/lexicon.oov.raw + + # $dir/lexicon.oov.raw contains lines like: + # ukuzi 0.000342399163717093 u &k>&u &z&i + + mass=$(cat $dir/lexicon.oov.raw | awk '{x+=$2;} END{print x}') + echo "$0: Total probability mass of unseen words (before removing prons" + echo " shorter than $min_phones phones) is $mass" + + # the next stage does 3 things: (1) it converts the pronunciations to be + # tab-separated lists of syllables and removes the seprator ","; (2) it limits us + # to prons containing at least $min_phones phones; and (3) it limits to the + # most likely $num_prons pairs of (spelling, pron) + perl -e ' while () { + @A = split; + $spelling = shift @A; + $prob = shift @A; + for ($n = 0; $n < @A; $n++) { # replace separator in syllable with space. + $A[$n] =~ tr/,/ /d; # replace the separator with space. + } + $final_pron = join("\t", @A); + print "$spelling\t$prob\t$final_pron\n"; + } ' <$dir/lexicon.oov.raw | sort -k2,2 -gr | \ + awk -v min=$min_phones '{if(NF>=min+2){print;}}' | head -n $num_prons >$dir/lexicon.oov + + + mass=$(cat $dir/lexicon.oov | awk '{x+=$2;} END{print x}') + echo "$0: Total probability mass of unseen words (after removing prons" + echo " shorter than $min_phones phones) is $mass." + + + # $dir/lexicon.oov contains lines like the following: + # ngisa 0.00340513074018366 N g i s a + # where the multiple-spaces are actually tabs. + + # Now renormalize the probability to sum to one, decompose $dir/lexicon.oov + # into two pieces: a lexicon $dir/lexiconp_oov.txt, which contains the + # probabilities of different spellings of words (with the most likely one at + # 1.0), and $dir/oov2prob which contains the probabilities of the words + # (we'll use it later to adjust the LM). + + # the uniq here shouldn't be needed, actually. [relates to a bug in a previous + # step that is now fixed. This script relies on the fact that lexicon.oov + # is sorted in reverse order of probability. + cat $dir/lexicon.oov | awk -v mass=$mass 'BEGIN{OFS=FS="\t";} {$2 = $2/mass; print;}' | uniq | \ + perl -e ' ($lexiconp,$words_probs) = @ARGV; + open(L, "|sort -u >$lexiconp") || die "opening lexicon $lexiconp"; + open(W, "|sort -u >$words_probs") || die "opening probs file $words_probs"; + while () { + @A = split("\t", $_); + $word = shift @A; $prob = shift @A; $pron = join("\t", @A); + if (!defined $maxprob{$word}) { # max prob is always the first. + $maxprob{$word} = $prob; + print W "$word $prob\n"; + } + $pronprob = $prob / $maxprob{$word}; + $pronprob <= 1 || die "bad pronprob $pronprob\n"; + print L "$word\t$pronprob\t$pron"; + } close(L); close(W); # wait for sort to finish. ' \ + $dir/lexiconp_oov.txt $dir/oov2prob + + # lexiconp_oov.txt contains lines like: + #leyanga 0.96471840417664 l 3 j_" a_" N a + #leyanga 1 l 3 j_" a_" N g a + + # oov2prob looks like this: + #-Uni 8.77716315938887e-07 + #Adlule 9.62418179264897e-08 + #Afuna 2.23048402109824e-06 +fi + +if [ $stage -le $[$g2p_iters+2] ]; then + # put it to the output directory $localdir e.g. data/local/ + cat $dir/lexiconp_in.txt $dir/lexiconp_oov.txt | \ + local/apply_map_tab_preserving.pl -f 3- $dir/phone_map.reverse | sort -u > $toplevel_dir/lexiconp.txt + cp $dir/oov2prob $toplevel_dir/oov2prob +fi + +# Finally, if $dev_text is not empty, print out OOV rate. We assame $dev_text is +# in the following format: +# 14350_A_20121123_042710_001717 yebo yini +# where "14350_A_20121123_042710_001717" is the utterance id and "yebo yini" is +# the actual words. +if [ ! -z $dev_text ]; then + # Original token OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot tokens; token OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/input_lexicon.txt > $toplevel_dir/original_oov_rates + + # New token OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot tokens; token OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/lexiconp.txt > $toplevel_dir/new_oov_rates + + # Original type OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | sort -u |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot types; type OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/input_lexicon.txt >> $toplevel_dir/original_oov_rates + + # New type OOV rate + cat $dev_text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | sort -u |\ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); + printf("Seen $invoc out of $tot types; type OOV rate is %.2f\n", $oov_rate);' \ + $toplevel_dir/lexiconp.txt >> $toplevel_dir/new_oov_rates +fi + +exit 0; + +###BELOW HERE IS JUST COMMENTS ########### + +#cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt | \ +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +cat /export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt | \ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); printf("Seen $invoc out of $tot tokens; OOV rate is %.2f\n", $oov_rate); ' $x +done +# OOV rate measured on the words in the FullLP lexicon. +#Seen 13675 out of 60613 tokens; OOV rate is 77.44 +#Seen 26936 out of 60613 tokens; OOV rate is 55.56 + +for x in data/local/filtered_lexicon.txt data/local/lexiconp.txt; do +cat data/dev10h/text | awk '{for(n=2;n<=NF;n++) { print $n; }}' | \ + perl -e '$lex = shift @ARGV; open(L, "<$lex")||die; while(){ @A=split; $seen{$A[0]}=1;} + while() { @A=split; $word=$A[0]; $tot++; if(defined $seen{$word}) { $invoc++; }} + $oov_rate = 100.0 * (1.0 - ($invoc / $tot)); printf("Seen $invoc out of $tot tokens; OOV rate is %.2f\n", $oov_rate); ' $x +done +# zulu limitedlp, dev10h: +# With the million-word lexicon we more than halve the per-token OOV rate of dev10h. +#Seen 44680 out of 66891 tokens; OOV rate is 33.20 +#Seen 57095 out of 66891 tokens; OOV rate is 14.64 diff --git a/egs/babel/s5d/local/extract_oov_words.pl b/egs/babel/s5d/local/extract_oov_words.pl new file mode 100755 index 00000000000..08f8f5d1436 --- /dev/null +++ b/egs/babel/s5d/local/extract_oov_words.pl @@ -0,0 +1,70 @@ +#!/usr/bin/env perl +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +use Data::Dumper; +$Data::Dumper::Indent = 1; + +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; + +$ignore_oov = 0; +$ignore_first_field = 0; +for($x = 0; $x < 2; $x++) { + if ($ARGV[0] eq "-f") { + shift @ARGV; + $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } + } +} + +$symtab = shift @ARGV; +if (!defined $symtab) { + print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" . + "options: [--map-oov ] [-f ]\n" . + "note: can look like 4-5, or 4-, or 5-, or 1.\n"; +} + + +open(F, "<:encoding(UTF-8)", $symtab) || die "Error opening symbol table file $symtab"; +while() { + @A = split(" ", $_); + @A == 2 || die "bad line in symbol table file: $_"; + + if ( not defined( $sym2int{$A[0]} ) ) { + $sym2int{$A[0]} = []; + } + push @{ $sym2int{$A[0]} }, $A[1] + 0; +} + + +$lines=0; +while (<>) { + @A = split(" ", $_); + @B = (); + for ($n = 0; $n < @A; $n++) { + if ( (!defined $field_begin || $n >= $field_begin) + && (!defined $field_end || $n <= $field_end)) { + $a = $A[$n]; + $i = $sym2int{$a}; + if (!defined ($i)) { + print $a . "\n"; + } + } + } +} + + diff --git a/egs/babel/s5d/local/filter_keywords.pl b/egs/babel/s5d/local/filter_keywords.pl new file mode 100755 index 00000000000..a724ad77f1a --- /dev/null +++ b/egs/babel/s5d/local/filter_keywords.pl @@ -0,0 +1,68 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Encode; + +my $Usage = < + +EOU + +if(@ARGV != 3) { + die $Usage; +} + +# Get parameters +my $dictin = shift @ARGV; +my $filein = shift @ARGV; +my $fileout = shift @ARGV; + +# Open dictionary +if (!open(D, "<$dictin")) {print "Fail to open dictionary: $dictin\n"; exit 1;} + +# Get input source +my $source = ""; +if ($filein eq "-") { + $source = "STDIN"; +} else { + if (!open(I, "<$filein")) {print "Fail to open input file: $filein\n"; exit 1;} + $source = "I"; +} + +# Open output fst list +my $sourceout = ""; +if ($fileout ne "-") { + if (!open(O, ">$fileout")) {print "Fail to open output file: $fileout\n"; exit 1;} + $sourceout = "O"; +} + +# Read in the dictionary +my %dict = (); +while () { + chomp; + my @col = split(" ", $_); + my $word = shift @col; + my $original_w = $word; + $word =~ tr/a-z/A-Z/; + $dict{$word} = $original_w; +} + +# Process the queries +my $word; +while (<$source>) { + chomp; + my @col = split(" ", $_); + foreach $word (@col) { + if (defined($dict{$word})) { + eval "print $sourceout \"$dict{$word} \""; + } else { + eval "print $sourceout \"$word \""; + } + } + eval "print $sourceout \"\n\""; +} + +close(D); +if ($filein ne "-") {close(I);} +if ($fileout ne "-") {close(O);} diff --git a/egs/babel/s5d/local/filter_kwslist.pl b/egs/babel/s5d/local/filter_kwslist.pl new file mode 100755 index 00000000000..7c57b62517a --- /dev/null +++ b/egs/babel/s5d/local/filter_kwslist.pl @@ -0,0 +1,55 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. +# +use strict; +use warnings; +use Getopt::Long; +use XML::Simple; + +my $data = XMLin(\*STDIN); +my $duptime= $ARGV[0]; + +#print Dumper($data); + +# Filters duplicate keywords that have the same keyword and about the same time. +# Relies on the fact that its input is sorted from largest to smallest score. + +foreach my $kwentry (@{$data->{detected_kwlist}}) { + #print "$kwentry->{kwid}\n"; + my $prev_time; + my $prev_file; + + if(ref($kwentry->{kw}) eq 'ARRAY'){ + my @arr = @{$kwentry->{kw}}; + my @newarray = (); + + push @newarray, $arr[0]; + #$arr[0]->{tbeg} . "\n"; + for (my $i = 1; $i < scalar(@arr); $i +=1) { + + my $found = 0; + foreach my $kw (@newarray) { + if (( abs($arr[$i]->{tbeg} - $kw->{tbeg}) < $duptime ) && + ( $arr[$i]->{channel} == $kw->{channel}) && + ( $arr[$i]->{file} eq $kw->{file}) ) { + + $found = 1; + + #print $arr[$i]->{tbeg} . "\n"; + } + } + if ( $found == 0 ) { + push @newarray, $arr[$i]; + } + } + + $kwentry->{kw} = \@newarray; + }else{ + #print $kwentry->{kw}->{tbeg} . "\n"; + } +# print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; +} +my $xml = XMLout($data, RootName => "kwslist", NoSort=>1); +print $xml; diff --git a/egs/babel/s5d/local/fix_kwslist.pl b/egs/babel/s5d/local/fix_kwslist.pl new file mode 100755 index 00000000000..33c6dc30e82 --- /dev/null +++ b/egs/babel/s5d/local/fix_kwslist.pl @@ -0,0 +1,89 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; +use XML::Simple; +use Data::Dumper; +use File::Basename; + +sub mysort { + if ($a->{kwid} =~ m/[0-9]+$/ and $b->{kwid} =~ m/[0-9]+$/) { + ($a->{kwid} =~ /([0-9]*)$/)[0] <=> ($b->{kwid} =~ /([0-9]*)$/)[0] + } else { + $a->{kwid} cmp $b->{kwid}; + } +} + +my $Usage = < + e.g.: fix_kwslist.pl --kwlist-filename=kwlist.xml kwlist.xml kwslist.xml fixed_kwslist.xml + +Allowed options: + --kwlist-filename : Kwlist filename with version info (string, default = "") + +EOU + +my $kwlist_filename=""; +GetOptions('kwlist-filename=s' => \$kwlist_filename); + +if (@ARGV != 3) { + die $Usage; +} + +# Workout the input/output source +my $kwlist_in = shift @ARGV; +my $kwslist_in = shift @ARGV; +my $fixed_kwslist_out = shift @ARGV; + +my $KW = XMLin($kwlist_in); +my $KWS = XMLin($kwslist_in); + +# Extract keywords from kwlist.xml +my %kwlist; +my $language = $KW->{language}; +foreach my $kwentry (@{$KW->{kw}}) { + $kwlist{$kwentry->{kwid}} = 1; +} + +# Now work on the kwslist +$KWS->{language} = $language; +if ($kwlist_filename ne "") { + $KWS->{kwlist_filename} = basename($kwlist_filename); +} elsif ($KWS->{kwlist_filename} eq "") { + $KWS->{kwlist_filename} = basename($kwlist_in); +} +foreach my $kwentry (@{$KWS->{detected_kwlist}}) { + if (defined($kwlist{$kwentry->{kwid}})) { + delete $kwlist{$kwentry->{kwid}}; + } +} + +# Empty entries... +foreach my $kw (keys %kwlist) { + my %empty; + my @tmp = []; + $empty{search_time} = 1; + $empty{kwid} = $kw; + $empty{oov_count} = 0; + push(@{$KWS->{detected_kwlist}}, \%empty); +} + +my @sorted = sort mysort @{$KWS->{detected_kwlist}}; +$KWS->{detected_kwlist} = \@sorted; + +my $xml = XMLout($KWS, RootName => "kwslist", NoSort=>0); +if ($fixed_kwslist_out eq "-") { + print $xml; +} else { + if (!open(O, ">$fixed_kwslist_out")) { + print "Fail to open output file: $fixed_kwslist_out\n"; + exit 1; + } + print O $xml; + close(O); +} diff --git a/egs/babel/s5d/local/generate_confusion_matrix.sh b/egs/babel/s5d/local/generate_confusion_matrix.sh new file mode 100755 index 00000000000..48263e729de --- /dev/null +++ b/egs/babel/s5d/local/generate_confusion_matrix.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +nj=4 +cmd=run.pl +acwt=0.1 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 5 ]; then + echo "Usage $0 [options] " + echo " e.g.: local/prepare_confusions.sh --nj 32 exp/sgmm5/graph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix" + echo "" + echo "main options (for others, see top of script file)" + echo " --nj # number of parallel jobs" + echo " --cmd # Command to run in parallel with" + echo " --acwt # Acoustic model weight. Value will be used for 1-best path decoding of the lattices" + echo "" + echo "Please note that the output confusion matrix will be phoneme-based" + echo "and all the phone contexts (singleton, intra, begin, end) or phoneme" + echo "tags (such as tone or stress) will be collapsed into a single monophone" + echo "" + echo "The output format is line oriented." + echo "Each line can have one of these four formats (A, B being different phones, special symbol" + echo " A A count #Number of hits, i.e. correctly determined phones" + echo " A B count #Number of substitutions of A with B " + echo " A count #Number of deletions" + echo " A count #Number of insertions" + exit 1; +fi + +set -u +set -e +set -o pipefail + +data=$1; shift +modeldir=$1; shift +alidir=$1; shift +latdir=$1; shift +wdir=$1; shift + +model=$modeldir/final.mdl +[ ! -f $model ] && echo "File $model does not exist!" && exit 1 +phones=$data/phones.txt +[ ! -f $phones ] && echo "File $phones does not exist!" && exit 1 + +! ali_nj=`cat $alidir/num_jobs` && echo "Could not open the file $alidir/num_jobs" && exit 1 +! lat_nj=`cat $latdir/num_jobs` && echo "Could not open the file $latdir/num_jobs" && exit 1 +if [ $ali_nj -ne $lat_nj ] ; then + echo "Alignments num_jobs and lattices num_jobs mismatch!" + exit 1 +fi +[ ! $nj -le $ali_nj ] && echo "Number of jobs is too high (max is $ali_nj)." && nj=$ali_nj + +mkdir -p $wdir/log + +cat $data/phones.txt | sed 's/_[B|E|I|S]//g' |\ + sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g' > $wdir/phones.txt + +echo "Converting alignments to phone sequences..." +$cmd JOB=1:$nj $wdir/log/ali_to_phones.JOB.log \ + align-text\ + ark:\<\( \ + ali-to-phones $model ark:"gunzip -c $alidir/ali.JOB.gz|" ark,t:- \|\ + int2sym.pl -f 2- $wdir/phones.txt - \) \ + ark:\<\( \ + lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \| \ + lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \| \ + int2sym.pl -f 2- $wdir/phones.txt - \) \ + ark:$wdir/confusions.JOB.txt || exit 1 + +confusion_files="" +for i in `seq 1 $nj` ; do + confusion_files="$confusion_files $wdir/confusions.$i.txt" +done + +echo "Converting statistics..." +cat $confusion_files | cut -f 2- -d ' ' | sed 's/ *; */\n/g'| sort | uniq -c | \ + grep -v -E '|||SIL' | \ + perl -ane ' + die unless scalar @F == 3; + print "$F[1] $F[2] $F[0]\n"; + ' > $wdir/confusions.txt + +exit 0 +#-echo "Converting alignments to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/ali_to_phones.JOB.log \ +#- ali-to-phones $model ark:"gunzip -c $alidir/ali.JOB.gz|" ark,t:- \|\ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/ali.JOB.txt +#- +#-echo "Converting lattices to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/lat_to_phones.JOB.log \ +#- lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \| \ +#- lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \| \ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/lat.JOB.txt + diff --git a/egs/babel/s5d/local/generate_example_kws.sh b/egs/babel/s5d/local/generate_example_kws.sh new file mode 100755 index 00000000000..e90752926b3 --- /dev/null +++ b/egs/babel/s5d/local/generate_example_kws.sh @@ -0,0 +1,110 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. + + +if [ $# -ne 2 ]; then + echo "Usage: local/generate_example_kws.sh " + echo " e.g.: local/generate_example_kws.sh data/test_eval92/ " + exit 1; +fi + +datadir=$1; +kwsdatadir=$2; +text=$datadir/text; + +mkdir -p $kwsdatadir; + +# Generate keywords; we generate 20 unigram keywords with at least 20 counts, +# 20 bigram keywords with at least 10 counts and 10 trigram keywords with at +# least 5 counts. +cat $text | perl -e ' + %unigram = (); + %bigram = (); + %trigram = (); + while(<>) { + chomp; + @col=split(" ", $_); + shift @col; + for($i = 0; $i < @col; $i++) { + # unigram case + if (!defined($unigram{$col[$i]})) { + $unigram{$col[$i]} = 0; + } + $unigram{$col[$i]}++; + + # bigram case + if ($i < @col-1) { + $word = $col[$i] . " " . $col[$i+1]; + if (!defined($bigram{$word})) { + $bigram{$word} = 0; + } + $bigram{$word}++; + } + + # trigram case + if ($i < @col-2) { + $word = $col[$i] . " " . $col[$i+1] . " " . $col[$i+2]; + if (!defined($trigram{$word})) { + $trigram{$word} = 0; + } + $trigram{$word}++; + } + } + } + + $max_count = 100; + $total = 20; + $current = 0; + $min_count = 20; + while ($current < $total && $min_count <= $max_count) { + foreach $x (keys %unigram) { + if ($unigram{$x} == $min_count) { + print "$x\n"; + $unigram{$x} = 0; + $current++; + } + if ($current == $total) { + last; + } + } + $min_count++; + } + + $total = 20; + $current = 0; + $min_count = 4; + while ($current < $total && $min_count <= $max_count) { + foreach $x (keys %bigram) { + if ($bigram{$x} == $min_count) { + print "$x\n"; + $bigram{$x} = 0; + $current++; + } + if ($current == $total) { + last; + } + } + $min_count++; + } + + $total = 10; + $current = 0; + $min_count = 3; + while ($current < $total && $min_count <= $max_count) { + foreach $x (keys %trigram) { + if ($trigram{$x} == $min_count) { + print "$x\n"; + $trigram{$x} = 0; + $current++; + } + if ($current == $total) { + last; + } + } + $min_count++; + } + ' > $kwsdatadir/raw_keywords.txt + +echo "Keywords generation succeeded" diff --git a/egs/babel/s5d/local/generate_phoneme_transcription.sh b/egs/babel/s5d/local/generate_phoneme_transcription.sh new file mode 100755 index 00000000000..4ef0e556277 --- /dev/null +++ b/egs/babel/s5d/local/generate_phoneme_transcription.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +nj=4 +cmd=run.pl +acwt=0.1 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. utils/parse_options.sh || exit 1; + +echo "$#" +if [ $# != 4 ]; then + echo "Usage $0 [options] " + echo " e.g.: local/prepare_confusions.sh --nj 32 exp/sgmm5/graph exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix" + echo "" + echo "main options (for others, see top of script file)" + echo " --nj # number of parallel jobs" + echo " --cmd # Command to run in parallel with" + echo " --acwt # Acoustic model weight. Value will be used for 1-best path decoding of the lattices" + echo "" + echo "Please note that the output confusion matrix will be phoneme-based" + echo "and all the phone contexts (singleton, intra, begin, end) or phoneme" + echo "tags (such as tone or stress) will be collapsed into a single monophone" + echo "" + echo "The output format is line oriented." + echo "Each line can have one of these four formats (A, B being different phones, special symbol" + echo " A A count #Number of hits, i.e. correctly determined phones" + echo " A B count #Number of substitutions of A with B " + echo " A count #Number of deletions" + echo " A count #Number of insertions" + exit 1; +fi + +set -u +set -e +set -o pipefail + +data=$1; shift +modeldir=$1; shift +latdir=$1; shift +wdir=$1; shift + +model=$modeldir/final.mdl +[ ! -f $model ] && echo "File $model does not exist!" && exit 1 +phones=$data/phones.txt +[ ! -f $phones ] && echo "File $phones does not exist!" && exit 1 + +! lat_nj=`cat $latdir/num_jobs` && echo "Could not open the file $latdir/num_jobs" && exit 1 +[ ! $nj -le $lat_nj ] && echo "Number of jobs is too high (max is $lat_nj)." && nj=$lat_nj + +mkdir -p $wdir/log + +cat $data/phones.txt | sed 's/_[B|E|I|S]//g' |\ + sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g' > $wdir/phone_map + +echo "Converting alignments to phone sequences..." +$cmd JOB=1:$nj $wdir/log/phones.JOB.log \ + lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \|\ + lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \|\ + int2sym.pl -f 2- $wdir/phone_map - \> $wdir/phones.JOB.txt || exit 1 + +confusion_files="" +for i in `seq 1 $nj` ; do + confusion_files="$confusion_files $wdir/phones.$i.txt" +done + +echo "Converting statistics..." +cat $confusion_files | sort > $wdir/phones.txt + +exit 0 +#-echo "Converting alignments to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/ali_to_phones.JOB.log \ +#- ali-to-phones $model ark:"gunzip -c $alidir/ali.JOB.gz|" ark,t:- \|\ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/ali.JOB.txt +#- +#-echo "Converting lattices to phone sequences..." +#-$cmd JOB=1:$nj $wdir/log/lat_to_phones.JOB.log \ +#- lattice-to-phone-lattice $model ark:"gunzip -c $latdir/lat.JOB.gz|" ark:- \| \ +#- lattice-best-path --acoustic-scale=$acwt ark:- ark,t:- ark:/dev/null \| \ +#- int2sym.pl -f 2- $wdir/phones.txt - \> $wdir/lat.JOB.txt + diff --git a/egs/babel/s5d/local/generate_proxy_keywords.sh b/egs/babel/s5d/local/generate_proxy_keywords.sh new file mode 100755 index 00000000000..584f7d7902e --- /dev/null +++ b/egs/babel/s5d/local/generate_proxy_keywords.sh @@ -0,0 +1,176 @@ +#!/bin/bash + +# Copyright 2012-2014 Guoguo Chen +# Apache 2.0. + +# Begin configuration section. +nj=8 +cmd=run.pl +beam=-1 # Beam for proxy FST, -1 means no prune +phone_beam=-1 # Beam for KxL2xE FST, -1 means no prune +nbest=-1 # Use top n best proxy keywords in proxy FST, -1 means all + # proxies +phone_nbest=50 # Use top n best phone sequences in KxL2xE, -1 means all + # phone sequences +confusion_matrix= # If supplied, using corresponding E transducer +count_cutoff=1 # Minimal count to be considered in the confusion matrix; + # will ignore phone pairs that have count less than this. +pron_probs=false # If true, then lexicon looks like: + # Word Prob Phone1 Phone2... +# End configuration section. + +[ -f ./path.sh ] && . ./path.sh; # source the path. +echo "$0 " "$@" +. parse_options.sh || exit 1; + +if [ $# -ne 1 ]; then + echo "Generate proxy keywords for IV/OOV keywords. Phone confusions will be" + echo "used when generating the proxies if the confusion matrix is supplied." + echo "If you are going to use the confusion matrix, please use the following" + echo "format for the file \$confusion_matrix:" + echo " p1 p2 count1 // For substitution" + echo " p3 count2 // For deletion" + echo " p4 count3 // For insertion" + echo "" + echo "Proxies keywords are generated using:" + echo "K x L2 x E x L1'" + echo "where K is a keyword FST, L2 is a lexicon that contains pronunciations" + echo "of keywords in K, E is an edit distance FST that contains the phone" + echo "confusions and L1 is the original lexicon." + echo "" + echo "The script assumes that L1.lex, L2.lex, words.txt and keywords.txt have" + echo "been prepared and stored in the directory ." + echo "" + echo "Usage: local/generate_example_kws.sh " + echo " e.g.: local/generate_example_kws.sh data/dev10h/kws_proxy/" + exit 1; +fi + +set -e +set -o pipefail + +kwsdatadir=$1 + +# Checks some files. +for f in $kwsdatadir/L1.lex $kwsdatadir/L2.lex \ + $kwsdatadir/words.txt $kwsdatadir/keywords.txt; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1 +done + +# Gets phone symbols +phone_start=2 +if $pron_probs; then + phone_start=3 +fi + +pron_probs_param=""; +if $pron_probs; then + pron_probs_param="--pron-probs"; +fi + +cat $kwsdatadir/L1.lex | \ + perl -e ' + while ( $line = ) { + chomp $line; + ($word, $pron) = split " ", $line, 2; + $pron = join(" ", split(" ", $pron)); + push @{$LEX{$pron}}, $word; + } + + open(L1, "| sort -u > $ARGV[0]") or die "Cannot open $ARGV[0]\n"; + open(MAP, "| sort -u > $ARGV[1]") or die "Cannot open $ARGV[1]\n"; + foreach $pron (keys %LEX) { + $head = $LEX{$pron}->[0]; + print L1 "$head $pron\n"; + foreach $alt (@{$LEX{$pron}}) { + print MAP "0 0 $alt $head\n"; + } + } + print MAP "0\n"; + close(L1); + close(MAP); +' $kwsdatadir/L1_dedup.lex $kwsdatadir/L1.revdup.fst.txt + +fstcompile --isymbols=$kwsdatadir/words.txt --osymbols=$kwsdatadir/words.txt $kwsdatadir/L1.revdup.fst.txt | \ + fstarcsort --sort_type=olabel - $kwsdatadir/L1.revdup.fst + +ndisambig=`utils/add_lex_disambig.pl \ + $pron_probs_param $kwsdatadir/L1_dedup.lex $kwsdatadir/L1_disambig.lex` +ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST. +( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $kwsdatadir/disambig.txt + +cat $kwsdatadir/L2.lex $kwsdatadir/L1.lex |\ + awk '{for(i='$phone_start'; i <= NF; i++) {print $i;}}' |\ + sort -u | sed '1i\' |\ + cat - $kwsdatadir/disambig.txt | awk 'BEGIN{x=0} {print $0"\t"x; x++;}' \ + > $kwsdatadir/phones.txt + +# Compiles lexicon into FST +cat $kwsdatadir/L2.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$kwsdatadir/phones.txt \ + --osymbols=$kwsdatadir/words.txt - |\ + fstinvert | fstarcsort --sort_type=olabel > $kwsdatadir/L2.fst + +echo $kwsdatadir/phones.txt +phone_disambig_symbol=`grep \#0 $kwsdatadir/phones.txt | awk '{print $2}'` +word_disambig_symbol=`grep \#0 $kwsdatadir/words.txt | awk '{print $2}'` +phone_disambig_symbols=`grep "^#" $kwsdatadir/phones.txt |\ + awk '{print $2}' | tr "\n" " "` +word_disambig_symbols=`grep "^#" $kwsdatadir/words.txt |\ + awk '{print $2}' | tr "\n" " "` +cat $kwsdatadir/L1_disambig.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$kwsdatadir/phones.txt \ + --osymbols=$kwsdatadir/words.txt - |\ + fstaddselfloops "echo $phone_disambig_symbol |" \ + "echo $word_disambig_symbol |" |\ + fstdeterminize | fstrmsymbols "echo $phone_disambig_symbols|" |\ + fstrmsymbols --remove-from-output=true "echo $word_disambig_symbols|" |\ + fstarcsort --sort_type=ilabel > $kwsdatadir/L1.fst + +# Compiles E.fst +confusion_matrix_param="" +if [ ! -z $confusion_matrix ]; then + echo "$0: Using confusion matrix, normalizing" + local/count_to_logprob.pl --cutoff $count_cutoff \ + $confusion_matrix $kwsdatadir/confusion.txt + confusion_matrix_param="--confusion-matrix $kwsdatadir/confusion.txt" +fi +cat $kwsdatadir/phones.txt |\ + grep -v -E "<.*>" | grep -v "SIL" | awk '{print $1;}' |\ + local/build_edit_distance_fst.pl --boundary-off=true \ + $confusion_matrix_param - - |\ + fstcompile --isymbols=$kwsdatadir/phones.txt \ + --osymbols=$kwsdatadir/phones.txt - $kwsdatadir/E.fst + +# Pre-composes L2 and E, for the sake of efficiency +fstcompose $kwsdatadir/L2.fst $kwsdatadir/E.fst |\ + fstarcsort --sort_type=ilabel > $kwsdatadir/L2xE.fst + +keywords=$kwsdatadir/keywords.int +# Prepares for parallelization +cat $kwsdatadir/keywords.txt |\ + utils/sym2int.pl -f 2- $kwsdatadir/words.txt | sort -R > $keywords + +nof_keywords=`cat $keywords|wc -l` +if [ $nj -gt $nof_keywords ]; then + nj=$nof_keywords + echo "$0: Too many number of jobs, using $nj instead" +fi + +# Generates the proxy keywords +mkdir -p $kwsdatadir/split/log +$cmd JOB=1:$nj $kwsdatadir/split/log/proxy.JOB.log \ + split -n r/JOB/$nj $keywords \| \ + generate-proxy-keywords --verbose=1 \ + --proxy-beam=$beam --proxy-nbest=$nbest \ + --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ + $kwsdatadir/L2xE.fst $kwsdatadir/L1.fst ark:- ark,t:$kwsdatadir/split/proxy.JOB.fsts + +proxy_fsts="" +for j in `seq 1 $nj`; do + proxy_fsts="$proxy_fsts $kwsdatadir/split/proxy.$j.fsts" +done +cat $proxy_fsts | fsttablecompose $kwsdatadir/L1.revdup.fst ark:- ark:- | \ + fsts-project ark:- ark:$kwsdatadir/keywords.fsts diff --git a/egs/babel/s5d/local/kaldi_dir2uem.py b/egs/babel/s5d/local/kaldi_dir2uem.py new file mode 100755 index 00000000000..26b4ec1aaba --- /dev/null +++ b/egs/babel/s5d/local/kaldi_dir2uem.py @@ -0,0 +1,101 @@ +#! /usr/bin/env python + +import argparse, sys +from argparse import ArgumentParser +import re + +def main(): + parser = ArgumentParser(description='Convert kaldi data directory to uem dat files', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--verbose', type=int, \ + dest='verbose', default=0, \ + help='Give higher verbose for more logging') + parser.add_argument('--get-text', action='store_true', \ + help='Get text in dat file') + parser.add_argument('--prefix', type=str, \ + help='Add db file name as db--{utt/spk}.dat') + parser.add_argument('kaldi_dir', \ + help='Kaldi data directory') + parser.add_argument('output_dir', \ + help='Directory to store uem dat files') + parser.usage=':'.join(parser.format_usage().split(':')[1:]) \ + + 'e.g. : %(prog)s --prefix 203-lao-v0 data/dev10h.seg CMU_db' + options = parser.parse_args() + + if options.get_text: + try: + text_file = open(options.kaldi_dir+'/text', 'r') + except IOError as e: + repr(e) + sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/text')) + sys.exit(1) + + try: + segments_file = open(options.kaldi_dir+'/segments', 'r') + except IOError as e: + repr(e) + sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/segments')) + sys.exit(1) + + try: + scp_file = open(options.kaldi_dir+'/wav.scp', 'r') + except IOError as e: + repr(e) + sys.stderr.write("%s: No such file %s\n" % (sys.argv[0], options.kaldi_dir+'/wav.scp')) + sys.exit(1) + + reco2file_map = {} + for line in scp_file.readlines(): + splits = line.strip().split() + m = re.search(r".*/(?P[0-9A-Za-z_]*\.(sph|wav)).*", line) + if not m: + sys.stderr.write("%s does not contain a valid speech file (.wav or .sph)\n" % line.strip()) + sys.exit(1) + reco2file_map[splits[0]] = m.group('file_name') + # End for + + spk2utt_map = {} + + if options.prefix == None: + prefix = options.kaldi_dir.split('/')[-1].split('.')[0] + else: + prefix = options.prefix + + try: + utt_dat = open(options.output_dir+'/db-'+prefix+'-utt.dat', 'w') + spk_dat = open(options.output_dir+'/db-'+prefix+'-spk.dat', 'w') + except IOError as e: + repr(e) + sys.stderr.write("%s: Could not write dat files in %s\n" % (sys.argv[0], options.output_dir)) + sys.exit(1) + + for line in segments_file.readlines(): + utt_id, file_id, start, end = line.strip().split() + + if (options.get_text): + splits = text_file.readline().split() + while splits[0] < utt_id: + splits = text_file.readline().split() + text = ' '.join(splits[1:]) + else: + text = "" + + utt_dat.write("{UTTID %s} {UTT %s} {SPK %s} {FROM %s} {TO %s} {TEXT %s}\n" % (utt_id, utt_id, file_id, start, end, text)) + spk2utt_map.setdefault(file_id, []) + spk2utt_map[file_id].append(utt_id) + + for spk, utts in spk2utt_map.items(): + try: + spk_dat.write("{SEGS %s} {ADC %s} {CONV %s.wav} {CHANNEL 1} {DUR }\n" % (' '.join(utts), reco2file_map[spk], spk)) + except KeyError as e: + repr(e) + sys.stderr.write("%s: Error in getting file for %s\n" % (sys.argv[0], spk)) + sys.exit(1) + # End for + + segments_file.close() + utt_dat.close() + spk_dat.close() + +if __name__ == '__main__': + main() diff --git a/egs/babel/s5d/local/kwords2indices.pl b/egs/babel/s5d/local/kwords2indices.pl new file mode 100755 index 00000000000..5d5f0a3ad45 --- /dev/null +++ b/egs/babel/s5d/local/kwords2indices.pl @@ -0,0 +1,123 @@ +#!/usr/bin/env perl +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +use Data::Dumper; +$Data::Dumper::Indent = 1; + +binmode STDOUT, ":utf8"; +binmode STDIN, ":utf8"; + +sub permute { + + my $last = pop @_; + + unless(@_) { + return map([$_], @$last); + } + + return map { + my $left = $_; + map([@$left, $_], @$last) + } + permute(@_); +} + +$oov_count=0; + +$ignore_oov = 0; +$ignore_first_field = 0; +for($x = 0; $x < 2; $x++) { + if ($ARGV[0] eq "--map-oov") { + shift @ARGV; $map_oov = shift @ARGV; + } + if ($ARGV[0] eq "-f") { + shift @ARGV; + $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } + } +} + +$symtab = shift @ARGV; +if (!defined $symtab) { + print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" . + "options: [--map-oov ] [-f ]\n" . + "note: can look like 4-5, or 4-, or 5-, or 1.\n"; +} +open(F, "<:encoding(UTF-8)", $symtab) || die "Error opening symbol table file $symtab"; +while() { + @A = split(" ", $_); + @A == 2 || die "bad line in symbol table file: $_"; + + if ( not defined( $sym2int{$A[0]} ) ) { + $sym2int{$A[0]} = []; + } + push @{ $sym2int{$A[0]} }, $A[1] + 0; +} +#print Dumper(\%sym2int); + +if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up + if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; } + $map_oov = $sym2int{$map_oov}; +} + +$lines=0; +while (<>) { + @A = split(" ", $_); + @B = (); + $lines = $lines + 1; + $undefined_words = 0; + for ($n = 1; $n < @A; $n++) { + $a = $A[$n]; + $i = $sym2int{$a}; + if (!defined ($i)) { + if (defined $map_oov) { + if ($num_warning++ < $max_warning) { + print STDERR "sym2int.pl: replacing $a with $map_oov\n"; + if ($num_warning == $max_warning) { + print STDERR "sym2int.pl: not warning for OOVs any more times\n"; + } + } + $i = [ $map_oov ]; + } else { + $pos = $n+1; + die "sym2int.pl: undefined symbol $a (in position $pos)\n"; + } + $undefined_words = $undefined_words + 1; + } + $a = $i; + push @B, $a; + } + #if ( defined $sym2int{$A[$n]} ) { + # push @B, $sym2int{$A[$n]}; + #} else { + # push @B, [0]; + #} + if ($undefined_words > 0) { + $oov_count = $oov_count + 1; + } + @C = permute @B; + #print Dumper(\@B); + #print Dumper(\@C); + foreach $phrase ( @C ) { + print "$A[0] "; + print join(" ", @{$phrase}); + print "\n"; + } +} + +print STDERR "Remaped/ignored $oov_count phrases...\n"; + diff --git a/egs/babel/s5d/local/kws_combine.sh b/egs/babel/s5d/local/kws_combine.sh new file mode 100755 index 00000000000..8934faf7d30 --- /dev/null +++ b/egs/babel/s5d/local/kws_combine.sh @@ -0,0 +1,119 @@ +#!/bin/bash + +# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# Script for system combination of the KWS posting lists + +# begin configuration section. +cmd=run.pl +stage=0 +# Ntrue-scale +ntrue_scale=1.1 +min_lmw=8 +max_lmw=12 +extraid= +skip_scoring=false +optimize_weights=false +#end of configuration section + +help_message="Usage: $(basename $0) [options] [:lmwt-bias] [:lmwt-bias] [[:lmwt-bias] ... ] +E.g.: $(basename $0) data/dev10h.pem data/lang exp/tri6_nnet/decode_dev10h.pem/kws_10/ exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/ exp/combine/dev10hx.pem +" +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + +datadir=$1 +lang=$2 +odir=${@: -1} # last argument to the script +shift 2; +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + + +if [ -z "$extraid" ] ; then + kwsdatadir=$datadir/kws + kwsoutputdir="$odir/kws" +else + kwsdatadir=$datadir/${extraid}_kws + kwsoutputdir="$odir/${extraid}_kws" +fi + +for f in $kwsdatadir/ecf.xml $kwsdatadir/kwlist.xml ; do + [ ! -f $f ] && echo "$0: file $f does not exist" && exit 1; +done +ecf=$kwsdatadir/ecf.xml +kwlist=$kwsdatadir/kwlist.xml + +# Duration +duration=`head -1 $ecf |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` + +mkdir -p $kwsoutputdir/log + +total_sum=0 +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. + [ -z "$offset" ] && offset=1 + total_sum=$(($total_sum+$offset)) +done + +systems="" +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. + decode_dir=`echo $decode_dir | cut -d: -f1` + [ -z "$offset" ] && offset=1 + + weight=$(perl -e "print ($offset/$total_sum);") + if [ -f $decode_dir ] ; then + systems+="$weight $decode_dir " + else + kwsfile=$decode_dir/kwslist.unnormalized.xml + [ ! -f ${kwsfile} ] && echo "The file ${kwsfile} does not exist!" && exit 1 + systems+="$weight ${kwsfile} " + fi +done + +echo $systems + +# Combination of the weighted sum and power rule +$cmd PWR=1:9 $kwsoutputdir/log/combine_kws.PWR.log \ + mkdir -p ${kwsoutputdir}_PWR '&&' \ + local/naive_comb.pl --method=2 --power=0.PWR \ + $systems ${kwsoutputdir}_PWR/kwslist.unnormalized.xml || exit 1 + +$cmd PWR=1:9 $kwsoutputdir/log/postprocess_kws.PWR.log \ + utils/kwslist_post_process.pl --duration=${duration} --digits=3 \ + --normalize=true --Ntrue-scale=${ntrue_scale} \ + ${kwsoutputdir}_PWR/kwslist.unnormalized.xml \ + ${kwsoutputdir}_PWR/kwslist.xml || exit 1 + +echo "Scoring..." +if ! $skip_scoring ; then +$cmd PWR=1:9 $kwsoutputdir/log/score_kws.PWR.log \ + local/kws_score.sh --extraid "${extraid}" $datadir ${kwsoutputdir}_PWR || exit 1 +fi + + diff --git a/egs/babel/s5d/local/kws_data_prep.sh b/egs/babel/s5d/local/kws_data_prep.sh new file mode 100755 index 00000000000..3882c99ce6d --- /dev/null +++ b/egs/babel/s5d/local/kws_data_prep.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. + +# Begin configuration section. +case_insensitive=true +use_icu=true +icu_transform="Any-Lower" +silence_word= # Optional silence word to insert (once) between words of the transcript. +# End configuration section. + +echo $0 "$@" + +help_message=" + Usage: local/kws_data_prep.sh + e.g.: local/kws_data_prep.sh data/lang/ data/eval/ data/kws/ + Input is in : kwlist.xml, ecf.xml (rttm file not needed). + Output is in : keywords.txt, keywords_all.int, kwlist_invocab.xml, + kwlist_outvocab.xml, keywords.fsts + Note: most important output is keywords.fsts + allowed switches: + --case-sensitive # Shall we be case-sensitive or not? + # Please not the case-sensitivness depends + # on the shell locale! + --use-uconv # Use the ICU uconv binary to normalize casing + --icu-transform # When using ICU, use this transliteration + +" + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 3 ]; then + printf "FATAL: invalid number of arguments.\n\n" + printf "$help_message\n" + exit 1; +fi + +set -u +set -e +set -o pipefail + +langdir=$1; +datadir=$2; +kwsdatadir=$3; +keywords=$kwsdatadir/kwlist.xml + + +mkdir -p $kwsdatadir; + +cat $keywords | perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + my $data = XMLin(\*STDIN); + + #print Dumper($data->{kw}); + foreach $kwentry (@{$data->{kw}}) { + #print Dumper($kwentry); + print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; + } +' > $kwsdatadir/keywords.txt + + +# Map the keywords to integers; note that we remove the keywords that +# are not in our $langdir/words.txt, as we won't find them anyway... +#cat $kwsdatadir/keywords.txt | babel/filter_keywords.pl $langdir/words.txt - - | \ +# sym2int.pl --map-oov 0 -f 2- $langdir/words.txt | \ +if $case_insensitive && ! $use_icu ; then + echo "$0: Running case insensitive processing" + cat $langdir/words.txt | tr '[:lower:]' '[:upper:]' > $kwsdatadir/words.txt + [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ + echo "$0: Warning, multiple words in dictionary differ only in case: " + + + cat $kwsdatadir/keywords.txt | tr '[:lower:]' '[:upper:]' | \ + sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int +elif $case_insensitive && $use_icu ; then + echo "$0: Running case insensitive processing (using ICU with transform \"$icu_transform\")" + cat $langdir/words.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" > $kwsdatadir/words.txt + [ `cut -f 1 -d ' ' $kwsdatadir/words.txt | sort -u | wc -l` -ne `cat $kwsdatadir/words.txt | wc -l` ] && \ + echo "$0: Warning, multiple words in dictionary differ only in case: " + + paste <(cut -f 1 $kwsdatadir/keywords.txt ) \ + <(cut -f 2 $kwsdatadir/keywords.txt | uconv -f utf8 -t utf8 -x "${icu_transform}" ) |\ + local/kwords2indices.pl --map-oov 0 $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int +else + cp $langdir/words.txt $kwsdatadir/words.txt + cat $kwsdatadir/keywords.txt | \ + sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt > $kwsdatadir/keywords_all.int +fi + +(cat $kwsdatadir/keywords_all.int | \ + grep -v " 0 " | grep -v " 0$" > $kwsdatadir/keywords.int ) || true + +(cut -f 1 -d ' ' $kwsdatadir/keywords.int | \ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_invocab.xml) || true + +(cat $kwsdatadir/keywords_all.int | \ + egrep " 0 | 0$" | cut -f 1 -d ' ' | \ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_outvocab.xml) || true + + +# Compile keywords into FSTs +if [ -s $kwsdatadir/keywords.int ]; then + if [ -z $silence_word ]; then + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:$kwsdatadir/keywords.fsts + else + silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'` + [ -z $silence_int ] && \ + echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; + transcripts-to-fsts ark:$kwsdatadir/keywords.int ark,t:- | \ + awk -v 'OFS=\t' -v silint=$silence_int '{if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; }' \ + > $kwsdatadir/keywords.fsts + fi +else + echo "WARNING: $kwsdatadir/keywords.int is zero-size. That means no keyword" + echo "WARNING: was found in the dictionary. That might be OK -- or not." + touch $kwsdatadir/keywords.fsts +fi + +# Create utterance id for each utterance +cat $datadir/segments | \ + awk '{print $1}' | \ + sort | uniq | perl -e ' + $idx=1; + while(<>) { + chomp; + print "$_ $idx\n"; + $idx++; + }' > $kwsdatadir/utter_id + +# Map utterance to the names that will appear in the rttm file. You have +# to modify the commands below accoring to your rttm file +cat $datadir/segments | awk '{print $1" "$2}' | sort | uniq > $kwsdatadir/utter_map; + +echo "$0: Kws data preparation succeeded" diff --git a/egs/babel/s5d/local/kws_data_prep_proxy.sh b/egs/babel/s5d/local/kws_data_prep_proxy.sh new file mode 100755 index 00000000000..04cc59b6499 --- /dev/null +++ b/egs/babel/s5d/local/kws_data_prep_proxy.sh @@ -0,0 +1,270 @@ +#!/bin/bash + +# Copyright 2014 Guoguo Chen +# Apache 2.0. + +# Begin configuration section. +nj=8 +cmd=run.pl +beam=-1 # Beam for proxy FST, -1 means no prune +phone_beam=-1 # Beam for KxL2xE FST, -1 means no prune +nbest=-1 # Use top n best proxy keywords in proxy FST, -1 means all + # proxies +phone_nbest=50 # Use top n best phone sequences in KxL2xE, -1 means all + # phone sequences +phone_cutoff=5 # We don't generate proxy keywords for OOV keywords that + # have less phones than the specified cutoff as they may + # introduce a lot false alarms +max_phone_cutoff=9990 # We don't generate proxy keywords for OOV keywords that + # have more than this phonemes. This can be used when + # we need to use different parameters for keywords of + # different lengths. +confusion_matrix= # If supplied, using corresponding E transducer +count_cutoff=1 # Minimal count to be considered in the confusion matrix; + # will ignore phone pairs that have count less than this. +pron_probs=false # If true, then lexicon looks like: + # Word Prob Phone1 Phone2... +case_insensitive=true +icu_transform="Any-Lower" +proxy_set= # List of keywords to generate proxies for, one KWID per + # line. If empty, then by default generate proxies for all + # OOV keywords. +# End configuration section. + +[ -f ./path.sh ] && . ./path.sh; # source the path. +echo $0 "$@" +. parse_options.sh || exit 1; + +if [ $# -ne 5 ]; then + echo "Usage: local/kws_data_prep_proxy.sh \\" + echo " " + echo " e.g.: local/kws_data_prep_proxy.sh data/lang/ data/dev10h/ \\" + echo " data/local/tmp.lang/lexiconp.txt oov_lexicon.txt data/dev10h/kws/" + echo "allowed options:" + echo " --case-sensitive # Being case-sensitive or not" + echo " --icu-transform # Transliteration for upper/lower case" + echo " # mapping" + echo " --proxy-set # Keyword set for generating proxies" + exit 1 +fi + +set -e +set -o pipefail + +langdir=$1 +datadir=$2 +l1_lexicon=$3 +l2_lexicon=$4 +kwsdatadir=$5 + +# Checks some files. +for f in $langdir/words.txt $kwsdatadir/kwlist.xml $l1_lexicon $l2_lexicon; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1 +done + +keywords=$kwsdatadir/kwlist.xml +mkdir -p $kwsdatadir/tmp/ + +cat $keywords | perl -e ' + #binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + + use XML::Simple; + use Data::Dumper; + + my $data = XMLin(\*STDIN); + + #print Dumper($data->{kw}); + foreach $kwentry (@{$data->{kw}}) { + #print Dumper($kwentry); + print "$kwentry->{kwid}\t$kwentry->{kwtext}\n"; + }' > $kwsdatadir/raw_keywords_all.txt + +# Takes care of upper/lower case. +cp $langdir/words.txt $kwsdatadir/words.txt +cat $l1_lexicon | sed 's/\s/ /g' > $kwsdatadir/tmp/L1.tmp.lex +if $case_insensitive; then + echo "$0: Running case insensitive processing" + echo "$0: Using ICU with transofrm \"$icu_transform\"" + + # Processing words.txt + cat $kwsdatadir/words.txt |\ + uconv -f utf8 -t utf8 -x "${icu_transform}" > $kwsdatadir/words.norm.txt + + # Processing lexicon + cat $l2_lexicon | sed 's/\s/ /g' | cut -d ' ' -f 1 |\ + uconv -f utf8 -t utf8 -x "${icu_transform}" |\ + paste -d ' ' - <(cat $l2_lexicon | sed 's/\s/ /g' | cut -d ' ' -f 2-) \ + > $kwsdatadir/tmp/L2.tmp.lex + + paste <(cut -f 1 $kwsdatadir/raw_keywords_all.txt) \ + <(cut -f 2 $kwsdatadir/raw_keywords_all.txt |\ + uconv -f utf8 -t utf8 -x "${icu_transform}") \ + > $kwsdatadir/keywords_all.txt + cat $kwsdatadir/keywords_all.txt |\ + local/kwords2indices.pl --map-oov 0 $kwsdatadir/words.norm.txt \ + > $kwsdatadir/keywords_all.int +else + cat $l2_lexicon | sed 's/\s/ /g' > $kwsdatadir/tmp/L2.tmp.lex + cp $kwsdatadir/raw_keywords_all.txt $kwsdatadir/keywords_all.txt + + cat $kwsdatadir/keywords_all.txt | \ + sym2int.pl --map-oov 0 -f 2- $kwsdatadir/words.txt \ + > $kwsdatadir/keywords_all.int +fi + +# Writes some scoring related files. +cat $kwsdatadir/keywords_all.int |\ + (grep -E -v " 0 | 0$" || true) | cut -f 1 -d ' ' |\ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_invocab.xml + +cat $kwsdatadir/keywords_all.int |\ + (grep -E " 0 | 0$" || true) | cut -f 1 -d ' ' |\ + local/subset_kwslist.pl $keywords > $kwsdatadir/kwlist_outvocab.xml + +# Selects a set to generate proxies for. By default, generate proxies for OOV +# keywords. +if [ -z $proxy_set ]; then + cat $kwsdatadir/keywords_all.int |\ + (grep -E " 0 | 0$" || true) | awk '{print $1;}' | sort -u \ + > $kwsdatadir/keywords_proxy.list +else + cp $proxy_set $kwsdatadir/keywords_proxy.list +fi +cat $kwsdatadir/keywords_all.txt |\ + grep -f $kwsdatadir/keywords_proxy.list > $kwsdatadir/keywords_proxy.txt +cat $kwsdatadir/keywords_proxy.txt |\ + cut -f 2- | awk '{for(x=1;x<=NF;x++) {print $x;}}' |\ + sort -u > $kwsdatadir/keywords_proxy_words.list + +# Maps original phone set to a "reduced" phone set. We limit L2 to only cover +# the words that are actually used in keywords_proxy.txt for efficiency purpose. +# Besides, if L1 and L2 contains the same words, we use the pronunciation from +# L1 since it is the lexicon used for the LVCSR training. +cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 1 |\ + paste -d ' ' - <(cat $kwsdatadir/tmp/L1.tmp.lex | cut -d ' ' -f 2-|\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + awk '{if(NF>=2) {print $0}}' > $kwsdatadir/tmp/L1.lex +cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 1 |\ + paste -d ' ' - <(cat $kwsdatadir/tmp/L2.tmp.lex | cut -d ' ' -f 2-|\ + sed 's/_[BEIS]//g' | sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g') |\ + awk '{if(NF>=2) {print $0}}' | perl -e ' + ($lex1, $words) = @ARGV; + open(L, "<$lex1") || die "Fail to open $lex1.\n"; + open(W, "<$words") || die "Fail to open $words.\n"; + while () { + chomp; + @col = split; + @col >= 2 || die "Too few columsn in \"$_\".\n"; + $w = $col[0]; + $w_p = $_; + if (defined($lex1{$w})) { + push(@{$lex1{$w}}, $w_p); + } else { + $lex1{$w} = [$w_p]; + } + } + close(L); + while () { + chomp; + @col = split; + @col >= 2 || die "Too few columsn in \"$_\".\n"; + $w = $col[0]; + $w_p = $_; + if (defined($lex1{$w})) { + next; + } + if (defined($lex2{$w})) { + push(@{$lex2{$w}}, $w_p); + } else { + $lex2{$w} = [$w_p]; + } + } + %lex = (%lex1, %lex2); + while () { + chomp; + if (defined($lex{$_})) { + foreach $x (@{$lex{$_}}) { + print "$x\n"; + } + } + } + close(W); + ' $kwsdatadir/tmp/L1.lex $kwsdatadir/keywords_proxy_words.list \ + > $kwsdatadir/tmp/L2.lex +rm -f $kwsdatadir/tmp/L1.tmp.lex $kwsdatadir/tmp/L2.tmp.lex + +# Creates words.txt that covers all the words in L1.lex and L2.lex. We append +# new words to the original word symbol table. +max_id=`cat $kwsdatadir/words.txt | awk '{print $2}' | sort -n | tail -1`; +cat $kwsdatadir/keywords_proxy.txt |\ + awk '{for(i=2; i <= NF; i++) {print $i;}}' |\ + cat - <(cat $kwsdatadir/tmp/L2.lex | awk '{print $1;}') |\ + cat - <(cat $kwsdatadir/tmp/L1.lex | awk '{print $1;}') |\ + sort -u | \ + (grep -F -v -x -f <(cat $kwsdatadir/words.txt | awk '{print $1;}') || true)|\ + awk 'BEGIN{x='$max_id'+1}{print $0"\t"x; x++;}' |\ + cat $kwsdatadir/words.txt - > $kwsdatadir/tmp/words.txt + +# Creates keyword list that we need to generate proxies for. +cat $kwsdatadir/keywords_proxy.txt | perl -e ' + open(W, "<'$kwsdatadir/tmp/L2.lex'") || + die "Fail to open L2 lexicon: '$kwsdatadir/tmp/L2.lex'\n"; + my %lexicon; + while () { + chomp; + my @col = split(); + @col >= 2 || die "'$0': Bad line in lexicon: $_\n"; + if ('$pron_probs' eq "false") { + $lexicon{$col[0]} = scalar(@col)-1; + } else { + $lexicon{$col[0]} = scalar(@col)-2; + } + } + while (<>) { + chomp; + my $line = $_; + my @col = split(); + @col >= 2 || die "Bad line in keywords file: $_\n"; + my $len = 0; + for (my $i = 1; $i < scalar(@col); $i ++) { + if (defined($lexicon{$col[$i]})) { + $len += $lexicon{$col[$i]}; + } else { + print STEDRR "'$0': No pronunciation found for word: $col[$i]\n"; + } + } + if (($len >= '$phone_cutoff') && ($len <= '$max_phone_cutoff')){ + print "$line\n"; + } elsif ($len > '$max_phone_cutoff'){ + print STDERR "'$0': Keyword $col[0] is too long, not generating proxy\n"; + } else { + print STDERR "'$0': Keyword $col[0] is too short, not generating proxy\n"; + } + }' > $kwsdatadir/tmp/keywords.txt + +# Creates proxy keywords. +local/generate_proxy_keywords.sh \ + --cmd "$cmd" --nj "$nj" --beam "$beam" --nbest "$nbest" \ + --phone-beam $phone_beam --phone-nbest $phone_nbest \ + --confusion-matrix "$confusion_matrix" --count-cutoff "$count_cutoff" \ + --pron-probs "$pron_probs" $kwsdatadir/tmp/ +cp $kwsdatadir/tmp/keywords.fsts $kwsdatadir + +# Creates utterance id for each utterance. +cat $datadir/segments | \ + awk '{print $1}' | \ + sort | uniq | perl -e ' + $idx=1; + while(<>) { + chomp; + print "$_ $idx\n"; + $idx++; + }' > $kwsdatadir/utter_id + +# Map utterance to the names that will appear in the rttm file. You have +# to modify the commands below accoring to your rttm file +cat $datadir/segments | awk '{print $1" "$2}' |\ + sort | uniq > $kwsdatadir/utter_map; + +echo "$0: Kws data preparation succeeded" diff --git a/egs/babel/s5d/local/kws_gen_oracle_lattices.sh b/egs/babel/s5d/local/kws_gen_oracle_lattices.sh new file mode 100755 index 00000000000..b73112b191d --- /dev/null +++ b/egs/babel/s5d/local/kws_gen_oracle_lattices.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. + +# Begin configuration section. +cmd=run.pl +duptime=0.5 +model=final.mdl +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo "Usage $0 [options] " + echo "" + echo "Main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo "" + exit 1; +fi + +lang=$1; +data=$2; +decodedir=$3; + + +kwsdatadir=$data/kws +oracledir=$decodedir/kws_oracle +mkdir -p $oracledir/log + +for filename in $lang/words.txt $decodedir/num_jobs \ + $data/text $decodedir/lat.1.gz \ + $decodedir/../$model ; do + if [[ ! -f $filename ]] ; then + echo "FATAL: File $filename does not exist!" + exit 1; + fi +done + +nj=`cat $decodedir/num_jobs` + +(cd $decodedir; ln -s ../$model final.mdl ) +(cd $oracledir; echo "$nj" > num_jobs ) + +$cmd LAT=1:$nj $oracledir/log/lat.LAT.log \ + cat $data/text \| \ + sed 's/- / /g' \| \ + sym2int.pl --map-oov '""' -f 2- $lang/words.txt \| \ + lattice-oracle --word-symbol-table=$lang/words.txt \ + --write-lattices="ark:|gzip -c > $oracledir/lat.LAT.gz" \ + "ark:gzip -cdf $decodedir/lat.LAT.gz|" ark:- ark,t:$oracledir/lat.LAT.tra; + diff --git a/egs/babel/s5d/local/kws_oracle.sh b/egs/babel/s5d/local/kws_oracle.sh new file mode 100755 index 00000000000..c7aa661664f --- /dev/null +++ b/egs/babel/s5d/local/kws_oracle.sh @@ -0,0 +1,136 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Jan Trmal) +# 2013 Johns Hopkins University +# Apache 2.0. + +. ./path.sh +. ./cmd.sh + +# Begin configuration section. +cmd=run.pl +acwt=0.09091 #Acoustic weight -- should not be necessary for oracle lattices +duptime=0.6 #Max time difference in which the occurences of the same KW will be seen as duplicates +text= # an alternative reference text to use. when not specified, the /text will be used +model= # acoustic model to use +extraid= # kws setup extra ID (kws task was setup using kws_setup.sh --extraid +stage=0 # to resume the computation from different stage +# End configuration section. + +set -e +set -o pipefail + +echo "$0 $@" # Print the command line for logging + + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 3 ]; then + echo "Usage $0 [options] " + echo "" + echo "Main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --text #The alternative text file in the format SEGMENT W1 W2 W3..., " + echo " #The default text file is taken from /text" + echo "" + exit 1; +fi + +lang=$1; +data=$2; +decodedir=$3; + +if [ -z $text ] ; then + text=$data/text +fi + +if [ -z "$model" ]; then # if --model was not specified on the command line... + srcdir=`dirname $decodedir`; # The model directory is one level up from decoding directory. + model=$srcdir/final.mdl; +fi + +if [ -z $extraid ] ; then # the same logic as with kws_setup.sh + kwsdatadir=$data/kws +else + kwsdatadir=$data/${extraid}_kws +fi + +nj=`cat $decodedir/num_jobs`; + +oracledir=$decodedir/kws_oracle +mkdir -p $oracledir +mkdir -p $oracledir/log + +if [ $stage -le 0 ] ; then + echo "$nj" > $oracledir/num_jobs + $cmd LAT=1:$nj $oracledir/log/oracle_lat.LAT.log \ + cat $text \| \ + sed 's/- / /g' \| \ + sym2int.pl --map-oov '""' -f 2- $lang/words.txt \| \ + lattice-oracle --word-symbol-table=$lang/words.txt \ + --write-lattices="ark:|gzip -c > $oracledir/lat.LAT.gz" \ + "ark:gzip -cdf $decodedir/lat.LAT.gz|" ark:- ark,t:$oracledir/lat.LAT.tra; +fi + +if [ $stage -le 1 ] ; then + steps/make_index.sh --cmd "$cmd" --acwt $acwt --model $model \ + $kwsdatadir $lang $oracledir $oracledir +fi + +if [ $stage -le 2 ] ; then + steps/search_index.sh --cmd "$cmd" $kwsdatadir $oracledir +fi + +if [ $stage -le 3 ]; then + + #TODO: this stage should be probably moved in a single script file + # and used accross all the kw search scripts + duration=`head -1 $kwsdatadir/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + grep -o -E "[0-9]*[\.]*[0-9]*" |\ + perl -e 'while(<>) {print $_/2;}'` + + + cat $oracledir/result.* | \ + utils/write_kwslist.pl --flen=0.01 --duration=$duration \ + --segments=$data/segments --normalize=true --duptime=$duptime\ + --map-utter=$kwsdatadir/utter_map --remove-dup=true \ + - $oracledir/kwslist_orig.xml + + #This does not do much -- just adds empty entries for keywords for which + #not even one occurence has not been found + local/fix_kwslist.pl $kwsdatadir/kwlist.xml $oracledir/kwslist_orig.xml $oracledir/kwslist.xml +fi + + +if [ $stage -le 4 ]; then + #As there is a missing functionality in the F4DE for scoring + #subsets of the original set, lets keep this commented out. + #Alternatively:TODO: write a filter_kwslist.pl script + #That will produce kwslist on a basis of given kwlist.xml subset + + local/kws_score_f4de.sh `dirname $kwsdatadir` $oracledir + #-local/kws_score_f4de.sh --kwlist $kwsdatadir/kwlist_outvocab.xml \ + #- --f4de-prefix outvocab `dirname $kwsdatadir` $oracledir || exit 1 + #-local/kws_score_f4de.sh --kwlist $kwsdatadir/kwlist_invocab.xml \ + #- --f4de-prefix invocab `dirname $kwsdatadir` $oracledir || exit 1 + + echo "=======================================================" + ( + echo -n "ATWV-full " + grep Occurrence $oracledir/sum.txt | cut -d '|' -f 13 + ) + + #-( + #-echo -n "ATWV-invocab " + #-grep Occurrence $oracledir/invocab.sum.txt | cut -d '|' -f 13 + #-) || echo "Error occured getting the invocab results" + + #-( + #-echo -n "ATWV-outvocab " + #-grep Occurrence $oracledir/outvocab.sum.txt | cut -d '|' -f 13 + #-) || echo "Error occured getting the outvocab results" + + echo "=======================================================" +fi diff --git a/egs/babel/s5d/local/kws_oracle_threshold.pl b/egs/babel/s5d/local/kws_oracle_threshold.pl new file mode 100755 index 00000000000..e1dc153767e --- /dev/null +++ b/egs/babel/s5d/local/kws_oracle_threshold.pl @@ -0,0 +1,200 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +sub KeywordSort { + if ($a->[0] ne $b->[0]) { + $b->[0] <=> $a->[0]; + } else { + $b->[1] <=> $a->[1]; + } +} + +my $Usage = < + e.g.: kws_oracle_threshold.pl alignment.csv + +Allowed options: + --beta : Beta value when computing ATWV (float, default = 999.9) + --duration : Duration of all audio, you must set this (float, default = 999.9) + +EOU + +my $beta = 999.9; +my $duration = 999.9; +GetOptions( + 'beta=f' => \$beta, + 'duration=f' => \$duration); + +@ARGV == 1 || die $Usage; + +# Works out the input/output source. +my $alignment_in = shift @ARGV; + +# Hash alignment file. For each instance we store a 3-dimension vector: +# [score, ref, res] +# where "score" is the confidence of that instance, "ref" equals 0 means there's +# no reference at that place and 1 means there's corresponding reference, "res" +# 0 means the instance is not considered when scoring, 1 means it's a false +# alarm and 2 means it's a true hit. +open(A, "<$alignment_in") || die "$0: Fail to open alignment file: $alignment_in\n"; +my %Ntrue; +my %keywords; +my %alignment; +my $lattice_miss = 0; +my $lattice_ref = 0; +my %keywords_lattice_miss; +my %keywords_lattice_ref; +while () { + chomp; + my @col = split(','); + @col == 12 || die "$0: Bad number of columns in $alignment_in: $_\n"; + + # First line of the csv file. + if ($col[11] eq "alignment") {next;} + + # Instances that do not have corresponding references. + if ($col[11] eq "CORR!DET" || $col[11] eq "FA") { + if (!defined($alignment{$col[3]})) { + $alignment{$col[3]} = []; + } + my $ref = 0; + my $res = 0; + if ($col[11] eq "FA") { + $res = 1; + } + push(@{$alignment{$col[3]}}, [$col[9], $ref, $res]); + next; + } + + # Instances that have corresponding references. + if ($col[11] eq "CORR" || $col[11] eq "MISS") { + if (!defined($alignment{$col[3]})) { + $alignment{$col[3]} = []; + $Ntrue{$col[3]} = 0; + $keywords_lattice_miss{$col[3]} = 0; + $keywords_lattice_ref{$col[3]} = 0; + } + my $ref = 1; + my $res = 0; + if ($col[10] ne "") { + if ($col[11] eq "CORR") { + $res = 2; + } + push(@{$alignment{$col[3]}}, [$col[9], $ref, $res]); + } + $Ntrue{$col[3]} += 1; + $keywords{$col[3]} = 1; + + # The following is for lattice recall and STWV. + $lattice_ref ++; + $keywords_lattice_ref{$col[3]} ++; + if ($col[11] eq "MISS" && $col[10] eq "") { + $lattice_miss ++; + $keywords_lattice_miss{$col[3]} ++; + } + next; + } +} +close(A); + +# Works out the oracle ATWV by sweeping the threshold. +my $atwv = 0.0; +my $otwv = 0.0; +my %mtwv_sweep; +foreach my $kwid (keys %keywords) { + # Sort the instances by confidence score. + my @instances = sort KeywordSort @{$alignment{$kwid}}; + my $local_otwv = 0.0; + my $max_local_otwv = 0.0; + my $local_atwv = 0.0; + my $active_otwv_threshold = ""; + foreach my $instance (@instances) { + my @ins = @{$instance}; + my $gain = 1.0 / $Ntrue{$kwid}; + my $cost = $beta / ($duration - $Ntrue{$kwid}); + # OTWV. + if ($local_otwv > $max_local_otwv && + $active_otwv_threshold ne "" && $active_otwv_threshold != $ins[0]) { + $max_local_otwv = $local_otwv; + } + if ($ins[1] == 1) { + $local_otwv += $gain; + } else { + $local_otwv -= $cost; + } + $active_otwv_threshold = $ins[0]; + if ($active_otwv_threshold == 1.0) { + # If score = 1.0, we always accept the instance as YES. + $max_local_otwv = $local_otwv; + } + + # ATWV. + if ($ins[2] == 1) { + $local_atwv -= $cost; + } elsif ($ins[2] == 2) { + $local_atwv += $gain; + } + + # MTWV. + for (my $threshold = 0.000; $threshold <= $ins[0]; $threshold += 0.001) { + if ($ins[1] == 1) { + $mtwv_sweep{$threshold} += $gain; + } else { + $mtwv_sweep{$threshold} -= $cost; + } + } + } + if ($local_otwv > $max_local_otwv) { + $max_local_otwv = $local_otwv; + } + $atwv += $local_atwv; + $otwv += $max_local_otwv; +} + +# Works out the MTWV. +my $mtwv = 0.0; +my $mtwv_threshold = 0.0; +for my $threshold (keys %mtwv_sweep) { + if ($mtwv_sweep{$threshold} > $mtwv) { + $mtwv = $mtwv_sweep{$threshold}; + $mtwv_threshold = $threshold; + } +} + +# Works out the STWV. +my $stwv = 0.0; +for my $kw (keys %keywords_lattice_miss) { + $stwv += $keywords_lattice_miss{$kw} / $keywords_lattice_ref{$kw}; +} +$stwv = 1 - $stwv / scalar(keys %keywords); + +$atwv /= scalar(keys %keywords); +$atwv = sprintf("%.4f", $atwv); +$otwv /= scalar(keys %keywords); +$otwv = sprintf("%.4f", $otwv); +$mtwv /= scalar(keys %keywords); +$mtwv = sprintf("%.4f", $mtwv); +my $lattice_recall = 1 - $lattice_miss / $lattice_ref; +$lattice_recall = sprintf("%.4f", $lattice_recall); +$stwv = sprintf("%.4f", $stwv); +print "ATWV = $atwv\n"; +print "OTWV = $otwv\n"; +print "STWV = $stwv\n"; +print "MTWV = $mtwv, THRESHOLD = $mtwv_threshold\n"; +print "Lattice Recall = $lattice_recall\n"; diff --git a/egs/babel/s5d/local/kws_score.sh b/egs/babel/s5d/local/kws_score.sh new file mode 120000 index 00000000000..9b896c530a7 --- /dev/null +++ b/egs/babel/s5d/local/kws_score.sh @@ -0,0 +1 @@ +kws_score_f4de.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/kws_score_f4de.sh b/egs/babel/s5d/local/kws_score_f4de.sh new file mode 100755 index 00000000000..4f79e1925a9 --- /dev/null +++ b/egs/babel/s5d/local/kws_score_f4de.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +# case_insensitive=true +extraid= +kwlist= +ecf= +rttm= +f4de_prefix= +# End configuration section. + +help_message="$0: score the kwslist using the F4DE scorer from NIST + Example: + $0 [additional-parameters] + where the most important additional parameters can be: + --extraid #for using, when a non-default kws tasks are setup + (using the kws_setup.sh --extraid) for a kaldi-single data-dir + --kwlist #allows for an alternative kwlist -- if not set, the default + kwlist is taken from + --f4de-prefix #allows for scoring the same results using + different kwlists and storing them in the same dir " + +echo $0 $@ +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 2 ]; then + printf "FATAL: incorrect number of variables given to the script\n\n" + printf "$help_message\n" + exit 1; +fi + +if [ -z $extraid ] ; then + kwsdatadir=$1/kws +else + kwsdatadir=$1/${extraid}_kws +fi +kwsoutputdir="$2/" + +if [ -z $kwlist ] ; then + kwlist=$kwsdatadir/kwlist.xml +fi + +if [ -z $rttm ] ; then + rttm=$kwsdatadir/rttm +fi + +if [ -z $ecf ] ; then + ecf=$kwsdatadir/ecf.xml +fi + +if [ ! -z ${f4de_prefix} ] ; then + f4de_prefix="/${f4de_prefix}" +fi + +if [[ ! -d "$kwsdatadir" ]] ; then + echo "FATAL: the KWS input data directory does not exist!" + exit 1; +fi + +for file in $ecf $rttm $kwlist ; do + if [[ ! -f "$file" ]] ; then + echo "FATAL: file $file does not exist!" + exit 1; + fi +done + +echo KWSEval -e $ecf -r $rttm -t $kwlist \ + -s $kwsoutputdir/kwslist.xml -c -o -b -d -f $kwsoutputdir + +if [ -f $kwsdatadir/categories ]; then + if ! grep -q "NGramOrder" "$kwlist"; then + cat $kwlist | local/search/annotate_kwlist.pl $kwsdatadir/categories > $kwsoutputdir/kwlist.xml + kwlist=$kwsoutputdir/kwlist.xml + elif ! grep -q "Characters" "$kwlist"; then + cat $kwlist | local/search/annotate_kwlist.pl $kwsdatadir/categories > $kwsoutputdir/kwlist.xml + kwlist=$kwsoutputdir/kwlist.xml + fi +fi + +KWSEval -e $ecf -r $rttm -t $kwlist -a --zGlobalMeasures MAP \ + --zGlobalMeasures MAPpct --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -s $kwsoutputdir/kwslist.xml -c -o -b -d -f ${kwsoutputdir}${f4de_prefix} || exit 1; + +duration=`cat ${kwsoutputdir}${f4de_prefix}/sum.txt | grep TotDur | cut -f 3 -d '|' | sed "s/\s*//g"` + +local/kws_oracle_threshold.pl --duration $duration ${kwsoutputdir}${f4de_prefix}/alignment.csv > ${kwsoutputdir}${f4de_prefix}/metrics.txt + +exit 0; + + diff --git a/egs/babel/s5d/local/kws_search.sh b/egs/babel/s5d/local/kws_search.sh new file mode 100755 index 00000000000..39177e8a4c5 --- /dev/null +++ b/egs/babel/s5d/local/kws_search.sh @@ -0,0 +1,230 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + + +help_message="$(basename $0): do keyword indexing and search. data-dir is assumed to have + kws/ subdirectory that specifies the terms to search for. Output is in + decode-dir/kws/ + Usage: + $(basename $0) " + +# Begin configuration section. +#acwt=0.0909091 +min_lmwt=7 +max_lmwt=17 +duptime=0.6 +cmd=run.pl +model= +skip_scoring=false +skip_optimization=false # true can speed it up if #keywords is small. +max_states=150000 +indices_dir= +kwsout_dir= +stage=0 +word_ins_penalty=0 +extraid= +silence_word= # specify this if you did to in kws_setup.sh, it's more accurate. +ntrue_scale=1.0 +nbest=900 +max_silence_frames=50 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e +set -o pipefail + + +if [[ "$#" -ne "3" ]] ; then + echo -e "$0: FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +silence_opt= + +langdir=$1 +datadir=$2 +decodedir=$3 + +if [ -z $extraid ] ; then + kwsdatadir=$datadir/kws +else + kwsdatadir=$datadir/${extraid}_kws +fi + +if [ -z $kwsout_dir ] ; then + if [ -z $extraid ] ; then + kwsoutdir=$decodedir/kws + else + kwsoutdir=$decodedir/${extraid}_kws + fi +else + kwsoutdir=$kwsout_dir +fi +mkdir -p $kwsoutdir + +if [ -z $indices_dir ]; then + indices_dir=$kwsoutdir +fi + +for d in "$datadir" "$kwsdatadir" "$langdir" "$decodedir"; do + if [ ! -d "$d" ]; then + echo "$0: FATAL: expected directory $d to exist" + exit 1; + fi +done +if [[ ! -f "$kwsdatadir/ecf.xml" ]] ; then + echo "$0: FATAL: the $kwsdatadir does not contain the ecf.xml file" + exit 1; +fi + +echo $kwsdatadir +duration=`head -1 $kwsdatadir/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` + +#duration=`head -1 $kwsdatadir/ecf.xml |\ +# grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ +# grep -o -E "[0-9]*[\.]*[0-9]*" |\ +# perl -e 'while(<>) {print $_/2;}'` + +echo "Duration: $duration" + +if [ ! -z "$model" ]; then + model_flags="--model $model" +else + model_flags= +fi + +frame_subsampling_factor=1 +if [ -f $decodedir/../frame_subsampling_factor ] ; then + frame_subsampling_factor=$(cat $decodedir/../frame_subsampling_factor) + echo "Frame subsampling factor autodetected: $frame_subsampling_factor" +fi + +if [ $stage -le 0 ] ; then + if [ ! -f $indices_dir/.done.index ] ; then + [ ! -d $indices_dir ] && mkdir $indices_dir + for lmwt in `seq $min_lmwt $max_lmwt` ; do + indices=${indices_dir}_$lmwt + mkdir -p $indices + + acwt=`perl -e "print (1.0/$lmwt);"` + [ ! -z $silence_word ] && silence_opt="--silence-word $silence_word" + steps/make_index.sh $silence_opt --cmd "$cmd" --acwt $acwt $model_flags\ + --skip-optimization $skip_optimization --max-states $max_states \ + --word-ins-penalty $word_ins_penalty --max-silence-frames $max_silence_frames\ + --frame-subsampling-factor ${frame_subsampling_factor} \ + $kwsdatadir $langdir $decodedir $indices || exit 1 + done + touch $indices_dir/.done.index + else + echo "Assuming indexing has been aready done. If you really need to re-run " + echo "the indexing again, delete the file $indices_dir/.done.index" + fi +fi + + +if [ $stage -le 1 ]; then + for lmwt in `seq $min_lmwt $max_lmwt` ; do + kwsoutput=${kwsoutdir}_$lmwt + indices=${indices_dir}_$lmwt + mkdir -p $kwsoutdir + local/search_index.sh --cmd "$cmd" --indices-dir $indices \ + --strict false --frame-subsampling-factor ${frame_subsampling_factor}\ + $kwsdatadir $kwsoutput || exit 1 + + nj=`cat $indices/num_jobs` + #this will truncate the file + rm -f $kwsoutput/results; touch $kwsoutput/results + + # This is a memory-efficient way how to do the filtration + # we do this in this way because the result.* files can be fairly big + # and we do not want to run into troubles with memory + #% files="" + #% for job in `seq 1 $nj`; do + #% if [ -f $kwsoutput/results.${job}.gz ] ; then + #% files="$files <(gunzip -c $kwsoutput/results.${job}.gz)" + #% elif[ -f $kwsoutput/results.${job} ] ; then + #% files="$files $kwsoutput/results.${job}" + #% else + #% echo >&2 "The file $kwsoutput/results.${job}[.gz] does not exist" + #% return 1 + #% fi + #% done + #% sort -m -u $files | local/search/filter_kws_results.pl --nbest $nbest |\ + #% sort -u > $kwsoutput/results + + # this is similar to the previous code -- should produce the same + # results (albeit more slowly as it's relying on temporary files + # the memory requirements are extremely limited + # I decided to go for this as the previous code does rely + # on the assumption the partial result files are sorted. + # that is not true for the older generation of pipeline + for job in `seq 1 $nj`; do + { + if [ -f $kwsoutput/result.${job}.gz ]; then + gunzip -c $kwsoutput/result.${job}.gz + else + cat $kwsoutput/result.${job} + fi + } | cat - $kwsoutput/results | \ + local/search/filter_kws_results.pl --nbest $nbest | \ + sort -u > $kwsoutput/results.${job} + mv $kwsoutput/results.${job} $kwsoutput/results + done + + done + + +fi + +if [ $stage -le 2 ]; then + echo "Writing unnormalized results" + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/write_unnormalized.LMWT.log \ + set -e ';' set -o pipefail ';'\ + cat ${kwsoutdir}_LMWT/results \| sort -u \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$duration \ + --segments=$datadir/segments --normalize=false --duptime=$duptime --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map\ + - ${kwsoutdir}_LMWT/kwslist.unnormalized.xml || exit 1; +fi + +if [ $stage -le 3 ]; then + echo "Writing normalized results" + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/write_normalized.LMWT.log \ + set -e ';' set -o pipefail ';'\ + cat ${kwsoutdir}_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$duration \ + --segments=$datadir/segments --normalize=true --duptime=$duptime --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map --digits=3\ + - ${kwsoutdir}_LMWT/kwslist.xml || exit 1 +fi + + +if [ -z $extraid ] ; then + extraid_flags= +else + extraid_flags=" --extraid ""$extraid"" " +fi + +if [ $stage -le 4 ]; then + if [[ (! -x local/kws_score.sh ) ]] ; then + echo "Not scoring, because the file local/kws_score.sh is not present" + elif [[ $skip_scoring == true ]] ; then + echo "Not scoring, because --skip-scoring true was issued" + else + echo "Scoring KWS results" + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutdir/scoring.LMWT.log \ + local/kws_score.sh $extraid_flags $datadir ${kwsoutdir}_LMWT || exit 1; + fi +fi + +exit 0 diff --git a/egs/babel/s5d/local/kws_setup.sh b/egs/babel/s5d/local/kws_setup.sh new file mode 100755 index 00000000000..93513a56d94 --- /dev/null +++ b/egs/babel/s5d/local/kws_setup.sh @@ -0,0 +1,158 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +cmd=run.pl +case_insensitive=true +subset_ecf= +rttm_file= +extraid= +use_icu=true +icu_transform="Any-Lower" +kwlist_wordlist=false +langid=107 +annotate=true +silence_word= # Optional silence word to insert (once) between words of the transcript. +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +set -e +set -u +set -o pipefail + +help_message="$0: Initialize and setup the KWS task directory +Usage: + $0 [rttm-file] +allowed switches: + --subset-ecf /path/to/filelist # The script will subset the ecf file + # to contain only the files from the filelist + --rttm-file /path/to/rttm # the preferred way how to specify the rttm + # the older way (as an in-line parameter is + # obsolete and will be removed in near future + --case-insensitive # Shall we be case-sensitive or not? + # Please not the case-sensitivness depends + # on the shell locale! + --annotate + --use-icu # Use the ICU uconv binary to normalize casing + --icu-transform # When using ICU, use this transliteration + --kwlist-wordlist # The file with the list of words is not an xml + " + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. utils/parse_options.sh || exit 1; + +if [ "$#" -ne "5" ] && [ "$#" -ne "4" ] ; then + printf "FATAL: invalid number of arguments.\n\n" + printf "$help_message\n" + exit 1 +fi + +ecf_file=$1 +kwlist_file=$2 +if [ "$#" -eq "5" ] ; then + rttm_file=$3 + langdir=$4 + datadir=$5 +else + langdir=$3 + datadir=$4 +fi + +# don't quote rttm_file as it's valid for it to be empty. +for filename in "$ecf_file" "$kwlist_file" $rttm_file; do + echo $filename + if [ ! -e $filename ] ; then + printf "FATAL: filename \'$filename\' does not refer to a valid file\n" + printf "$help_message\n" + exit 1; + fi +done +for dirname in "$langdir" "$datadir" ; do + if [ ! -d $dirname ] ; then + printf "FATAL: dirname \'$dirname\' does not refer to a valid directory\n" + printf "$help_message\n" + exit 1; + fi +done + +if [ ! -z $extraid ]; then + kwsdatadir=$datadir/${extraid}_kws +else + kwsdatadir=$datadir/kws +fi + +mkdir -p $kwsdatadir + +if [ -z $subset_ecf ] ; then + test -f $kwsdatadir/ecf.xml && rm -f $kwsdatadir/ecf.xml + cp "$ecf_file" $kwsdatadir/ecf.xml || exit 1 +else + local/make_ecf_subset.sh $subset_ecf $ecf_file > $kwsdatadir/ecf.xml +fi + +if $kwlist_wordlist ; then +( + echo '' + awk '{ printf(" \n", $1); + printf(" "); for (n=2;n<=NF;n++){ printf("%s", $n); if(n\n"); + printf(" \n"); }' < ${kwlist_file} + # while read line; do + # id_str=`echo $line | cut -f 1 -d ' '` + # kw_str=`echo $line | cut -f 2- -d ' '` + # echo " " + # echo " $kw_str" + # echo " " + # done < ${kwlist_file} + echo '' +) > $kwsdatadir/kwlist.xml || exit 1 +else + test -f $kwsdatadir/kwlist.xml && rm -f $kwsdatadir/kwlist.xml + cp "$kwlist_file" $kwsdatadir/kwlist.xml || exit 1 +fi + +if [ ! -z $rttm_file ] ; then + test -f $kwsdatadir/rttm && rm -f $kwsdatadir/rttm + cp "$rttm_file" $kwsdatadir/rttm || exit 1 +fi + +sil_opt= +[ ! -z $silence_word ] && sil_opt="--silence-word $silence_word" +local/kws_data_prep.sh --case-insensitive ${case_insensitive} \ + $sil_opt --use_icu ${use_icu} --icu-transform "${icu_transform}" \ + $langdir $datadir $kwsdatadir || exit 1 + +if $annotate ; then + set -x + rm -f $kwsdatadir/kwlist.xml + cat $kwsdatadir/keywords.txt | local/search/create_categories.pl | local/search/normalize_categories.pl > $kwsdatadir/categories + cat "$kwlist_file" | local/search/annotate_kwlist.pl $kwsdatadir/categories > $kwsdatadir/kwlist.xml || exit 1 +fi +#~ ( +#~ echo '' +#~ while read line; do +#~ id_str=`echo $line | cut -f 1 -d ' '` +#~ kw_str=`echo $line | cut -f 2- -d ' '` +#~ echo " " +#~ echo " $kw_str" +#~ echo " " +#~ done < ${kwlist_file} +#~ echo '' +#~ ) > $kwsdatadir/kwlist.xml || exit 1 +#~ +#-( +#-echo '' +#-id=1 +#-while read line; do +#- id_str=$( printf "KWS$langid-%04d\n" $id ) +#- echo " " +#- echo " $line" +#- echo " " +#- id=$(( $id + 1 )) +#-done < ${kwlist_file} +#-echo '' +#-) > $kwsdatadir/kwlist.xml || exit 1 +#- diff --git a/egs/babel/s5d/local/lattice_to_ctm.sh b/egs/babel/s5d/local/lattice_to_ctm.sh new file mode 100755 index 00000000000..5fbde42d237 --- /dev/null +++ b/egs/babel/s5d/local/lattice_to_ctm.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. + +# begin configuration section. +cmd=run.pl +stage=0 +decode_mbr=true +beam=5 +word_ins_penalty=0.5 +min_lmwt=7 +max_lmwt=17 +model= + +#end configuration section. + +#debugging stuff +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " && exit; + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1) # (createCTM | filterCTM )." + exit 1; +fi + +data=$1 +lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. +dir=$3 + +if [ -z "$model" ] ; then + model=`dirname $dir`/final.mdl # Relative path does not work in some cases + #model=$dir/../final.mdl # assume model one level up from decoding dir. + #[ ! -f $model ] && model=`(set +P; cd $dir/../; pwd)`/final.mdl +fi + + +for f in $lang/words.txt $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; +done + +name=`basename $data`; # e.g. eval2000 + +mkdir -p $dir/scoring/log + +if [ $stage -le 0 ]; then + if [ ! -f $lang/phones/word_boundary.int ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + else + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + set -e -o pipefail \; \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| tee $dir/score_LMWT/$name.utt.ctm \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; + fi +fi + +if [ $stage -le 1 ]; then + # Remove some stuff we don't want to score, from the ctm. + for x in $dir/score_*/$name.ctm; do + cp $x $x.bkup1; + cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ + grep -v -E '|%HESITATION|\(\(\)\)' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + perl -e '@list = (); %list = (); + while(<>) { + chomp; + @col = split(" ", $_); + push(@list, $_); + $key = "$col[0]" . " $col[1]"; + $list{$key} = 1; + } + foreach(sort keys %list) { + $key = $_; + foreach(grep(/$key/, @list)) { + print "$_\n"; + } + }' > $x; + done +fi + + +echo "Lattice2CTM finished on " `date` +exit 0 diff --git a/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py b/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py new file mode 100755 index 00000000000..b6d4b9ab944 --- /dev/null +++ b/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py @@ -0,0 +1,469 @@ +#!/usr/bin/env python + +# Copyright 2016 Johns Hopkins University (Author: Matthew Wiesner) +# Apache 2.0 + +# ============ Make unicode-based graphemic lexicon ============= +# +# This script takes a list of either words or words and corresponding +# morphemes and returns a kaldi format lexicon. +# =============================================================== + +# Import Statements + +from __future__ import print_function +import codecs +import argparse +import unicodedata +import os +import re +import sys +import numpy as np + + +def main(): + args = parse_input() + baseforms = get_word_list(args.lex_in, args.fmt) + unicode_transcription = baseform2unicode(baseforms) + encoded_transcription, table = encode(unicode_transcription, + args.tag_percentage, + log=args.verbose) + write_table(table, args.lex_out) + + # Extract dictionary of nonspeech pronunciations + try: + nonspeech = {} + with codecs.open(args.nonspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + nonspeech[line_vals[0]] = line_vals[1] + except (IOError, TypeError): + pass + + # Extract dictionary of extraspeech pronunciations (normally ) + try: + extraspeech = {} + with codecs.open(args.extraspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + extraspeech[line_vals[0]] = line_vals[1] + except (IOError, TypeError): + pass + + write_lexicon(baseforms, encoded_transcription, args.lex_out, + nonspeech=nonspeech, extraspeech=extraspeech) + + +def parse_input(): + ''' + Parse commandline input. + ''' + if len(sys.argv[1:]) == 0: + print("Usage: ./make_unicode_lexicon.py [opts] lex_in lex_out") + sys.exit(1) + + parser = argparse.ArgumentParser() + parser.add_argument("lex_in", help="Path of input word list optionally " + "paired with a baseform. 1 word per line with the " + "baseform separated by a tab") + parser.add_argument("lex_out", help="Path of output output " + "graphemc lexicon") + parser.add_argument("-F", "--fmt", help="Format of input word list", + action="store", default="word_list") + parser.add_argument("-T", "--tag_percentage", help="Percentage of least" + " frequently occurring graphemes to be tagged", + type=float, action="store", default=0.1) + parser.add_argument("--nonspeech", help="File with map of nonspeech words" + " and pronunciations", action="store", default=None) + parser.add_argument("--extraspeech", help="File with map of extra speech" + " words", action="store", default=None) + parser.add_argument("-V", "--verbose", help="Include useful print outs", + action="store_true") + args = parser.parse_args() + return args + + +def _read_word_list_line(line): + try: + count, word = line.strip().split(None, 1) + float(count) + return word + except ValueError: + return line.strip() + + +def get_word_list(input_file, fmt): + ''' + Read from input file the words and potential baseforms. + + Arguments: input_file -- path to the input word list + fmt -- format of input word list ["word_list", "morfessor"] + Output: + words -- list of tuples (word, baseform) + ''' + with codecs.open(input_file, "r", "utf-8") as f: + if fmt == "word_list" or fmt is None: + words = [] + for line in f: + w = _read_word_list_line(line) + words.append((w, w)) + assert "." not in w, "FORMAT ERROR. Use --fmt [-F] morfessor" + elif fmt == "morfessor": + words = [] + for line in f: + w, bf = line.strip().split(None, 1) + words.append((w, bf)) + else: + sys.exit("Error: Bad input format name") + + return words + + +def baseform2unicode(baseforms): + ''' + Convert each baseform in the list, baseforms, to a parsed unicode + description stored as a list of lists of dictionaries. + + unicode_transcription = [ + [{'NAME':'word1_grapheme1','FIELD1':'FIELD1_VAL',...}, + {'NAME':'word1_grapheme2','FIELD1':'FIELD1_VAL',...},...], + [{'NAME':'word2_grapheme1,'FIELD1:'FIELD1_VAL',...}, + {},...] + ,...,[]] + + Arguments: + baseforms -- List of tuples (word, baseform) + e.g. baseforms = get_word_list() + + Output: + unicode_transcription -- See above description + ''' + + # Regular expression for parsing unicode descriptions + pattern = re.compile( + r"(?P[^\s]+)\s" + r"(?PSMALL\s|CAPITAL\s)?(?P" + "(?:SUBJOINED )?LETTER |(?:INDEPENDENT VOWEL )" + r"|(?:VOWEL SIGN )|VOWEL |SIGN " + r"|CHARACTER |JONGSEONG |CHOSEONG |SYMBOL |MARK |DIGIT " + r"|SEMIVOWEL |TONE |SYLLABLE |LIGATURE |KATAKANA )" + r"(?P((?!WITH).)+)" + r"(?PWITH .+)?" + ) + + # For each graphemic baseform generate a parsed unicode description + unicode_transcription = [] + for w, bf in baseforms: + # Initialize empty list of words + baseform_transcription = [] + # For each grapheme parse the unicode description + for graph in bf: + unicode_desc = unicodedata.name(graph) + # Use the canonical unicode decomposition + tags = unicodedata.normalize('NFD', graph) + match_obj = pattern.match(unicode_desc) + + # Grapheme's unicode description is non-standard + if(not match_obj): + # Underscore, dash, hastag have special meaning + if(graph in ("_", "-", "#")): + graph_dict = { + 'CHAR_TYPE': 'LINK', + 'SYMBOL': graph, + 'NAME': graph + } + # The grapheme is whitespace + elif(unicode_desc in ("ZERO WIDTH SPACE", + "ZERO WIDTH NON-JOINER", + "ZERO WIDTH JOINER", + "SPACE")): + # Ignore whitespace + continue + else: + graph_dict = {'SYMBOL': graph, 'NAME': 'NOT_FOUND'} + + # Grapheme's unicode description is standard + else: + graph_dict = match_obj.groupdict() + graph_dict["SYMBOL"] = graph + # Add tags to dictionary (The first element of tags is actually + # the base grapheme, so we only check all tags after the first. + if(len(tags) > 1): + for i, t in enumerate(tags[1:]): + graph_dict["TAG" + str(i)] = unicodedata.name(t) + + # Add grapheme unicode description dictionary to baseform list + baseform_transcription.append(graph_dict) + # Add baseform transcription to unicode transcription list + unicode_transcription.append(baseform_transcription) + return unicode_transcription + + +def encode(unicode_transcription, tag_percentage, log=False): + ''' + Arguments: + unicode_transcription -- a list of words whose graphemes are + respresented as a list of dictionaries whose + fields contain information about parsed + unicode descriptions. + + tag_percentage -- percent of least frequent graphemes to tag + log -- optional printing + + Outputs: + Lexicon -- Encoded baseforms + ''' + # Constants + VOWELS = "AEIOU" + SKIP = "/()" + + graphemes = [] + table = [] + encoded_transcription = [] + # Accumulate grapheme statistics over corpus at some point. For now just + # use the lexicon word list. For estimating grapheme frequency this is + # probably sufficient since we have many words each with many + # graphemes. We do unfortunately have to assume that case does not matter. + # We do not count dashes, underscores, parentheses, etc. . Just letters. + graph_list = [] + for w in unicode_transcription: + for graph in w: + if graph["SYMBOL"] not in "()\/,-_#.": + graph_list.append(graph["SYMBOL"].lower()) + + graph2int = {v: k for k, v in enumerate(set(graph_list))} + int2graph = {v: k for k, v in graph2int.items()} + graph_list_int = [graph2int[g] for g in graph_list] + bin_edges = range(0, len(int2graph.keys()) + 1) + graph_counts = np.histogram(graph_list_int, bins=bin_edges)[0] / float(len(graph_list_int)) + # Set count threshold to frequency that tags the bottom 10% of graphemes + bottom_idx = int(np.floor(tag_percentage * len(graph_counts))) + count_thresh = sorted(graph_counts)[bottom_idx] + graph_counts_dict = {} + for i, count in enumerate(graph_counts): + graph_counts_dict[int2graph[i]] = count + + graph_counts = graph_counts_dict + + # Print grapheme counts to histogram + if log: + graph_counts_sorted = sorted(graph_counts, reverse=True, + key=graph_counts.get) + if not os.path.exists("lex_log"): + os.makedirs("lex_log") + with codecs.open("lex_log/grapheme_histogram.txt", "w", "utf-8") as fp: + fp.write("Graphemes (Count Threshold = %.6f)\n" % count_thresh) + for g in graph_counts_sorted: + weight = ("-" * int(np.ceil(500.0 * graph_counts[g])) + + " %.6f\n" % graph_counts[g]) + fp.write("%s -" % (g) + weight) + + # Find a new baseform for each word + for w in unicode_transcription: + word_transcription = "" + + # Find a "pronunciation" for each grapheme in the word + for graph in w: + # Case 1: Check that the grapheme has a unicode description type + # --------------------------------------------------------------- + if("CHAR_TYPE" not in [k.strip() for k in graph.keys()]): + if(graph["SYMBOL"] == "."): + graph["MAP0"] = "\t" + if word_transcription[-1] == " ": + word_transcription = word_transcription[:-1] + "\t" + + elif(graph["SYMBOL"] not in SKIP): + graph["MAP0"] = graph["SYMBOL"].lower() + word_transcription += graph["MAP0"] + " " + + # Case 2: Standard Grapheme + # --------------------------------------------------------------- + elif(graph["CHAR_TYPE"].strip() in + ("LETTER", "VOWEL", "VOWEL SIGN", "SIGN")): + # Backoff diacritics + base_grapheme = graph["NAME"].strip().replace(" ", "-").lower() + graph["MAP0"] = _backoff_diacritics(graph["SYMBOL"].lower(), + base_grapheme, + graph_counts, + count_thresh) + # Add final space + word_transcription += graph["MAP0"] + " " + + # Case 3: Syllable (Assume consonant vowel pattern) + # This is basically just here for Amharic + # ---------------------------------------------------------------- + elif(graph["CHAR_TYPE"].strip() == "SYLLABLE"): + # Multi-word description + if(len(graph["NAME"].strip().split(' ')) > 1): + g_name = graph["NAME"].strip().replace(" ", "-").lower() + graph["MAP0"] = g_name + word_transcription += graph["MAP0"] + "\t" + + # Consonant Vowel Pattern + else: + cv_pattern = (r"(?P[^%s]*)(?P[%s]+)" % + (VOWELS, VOWELS)) + parsed_graph = re.match(cv_pattern, graph["NAME"]) + if(not parsed_graph): + sys.exit("Syllable did not obey" + "consonant-vowel pattern.") + + graph_dict = parsed_graph.groupdict() + + # Get consonant if it exists + if("CONSONANT" in graph_dict.keys() and + graph_dict["CONSONANT"]): + graph["MAP0"] = graph_dict["CONSONANT"].lower() + word_transcription += graph["MAP0"] + " " + + # Get vowel if it exists + if("VOWEL" in graph_dict.keys() and graph_dict["VOWEL"]): + graph["MAP1"] = graph_dict["VOWEL"].lower() + word_transcription += graph["MAP1"] + "\t" + + # Case 4: Commonly occurring symbols + # ---------------------------------------------------------------- + elif(graph["CHAR_TYPE"].strip() == "LINK"): + # Add tab for underscores (kaldi lexicon format) + if(graph["SYMBOL"] in ("_", "#")): + graph["MAP0"] = "\t" + if(len(word_transcription) >= 3 and + word_transcription[-2] == "\t"): + word_transcription = word_transcription[:-3] + "\t" + elif(len(word_transcription) >= 1): + word_transcription += "\t" + else: + sys.exit("Unknown rule for initial underscore") + elif(graph["SYMBOL"] == "-"): + graph["MAP0"] = "" + continue + else: + sys.exit("Unknown linking symbol found.") + sys.exit(1) + + # Update table of observed graphemes + if(graph["SYMBOL"] not in graphemes): + table.append(graph) + graphemes.append(graph["SYMBOL"]) + + # Append the newly transcribed word + encoded_transcription.append(word_transcription.strip()) + return encoded_transcription, table + + +def _backoff_diacritics(grapheme, base_grapheme, graph_counts, count_thresh): + ''' + Add diacritics as tags if the grapheme with diacritics occurs + infrequently. The grapheme built by successively peeling away + diacritics until a frequent grapheme in the lexicon is discovered. + This grapheme is then considered a distinct unit and all peeled off + diacritics are added as kaldi style tags + + Arguments: + grapheme -- the raw grapheme to be processed + base_grapheme -- the grapheme with no combining marks + (see unicode normalization NFD for more details) + graph_counts -- A dictionary of all seen graphemes as keys with + counts as values + count_thresh -- The frequency threshold below which diacritics + should be peeled away + ''' + # Initialize variables before loop + new_grapheme = grapheme + removed = [] + parts = unicodedata.normalize("NFD", new_grapheme) + # Find a backed-off (in terms of number of diacritics) grapheme with count + # above the frequency threshold (count_thresh) + while(len(parts) > 1 and + (graph_counts[new_grapheme] <= count_thresh)): + new_grapheme = unicodedata.normalize("NFC", parts[0:-1]) + tag = unicodedata.name(parts[-1]).strip().replace(" ", "").lower() + removed.append(tag) + parts = unicodedata.normalize("NFD", new_grapheme) + + # Collect all diactritics that will not be added as tags + split_tags = [] + for p in parts[1:]: + split_tag = unicodedata.name(p).strip().replace(" ", "").lower() + split_tags.append(split_tag) + + # Append non-tag diacritics to the base grapheme + base_grapheme = "".join([base_grapheme] + split_tags) + # Return the tagged grapheme + return "_".join([base_grapheme] + removed) + + +def write_table(table, outfile): + ''' + Creates table of graphemes and fields of each grapheme's corresponding + unicode description. + + Arguments: + table -- table to write + outfile -- name of the output lexicon file + ''' + + # Create output table name + outfile = os.path.splitext(outfile)[0] + "_table.txt" + # Sort keys for convenience + table_sorted = sorted(table, key=lambda k: k["NAME"]) + # Start writing to output + with codecs.open(outfile, "w", "utf-8") as fo: + # Get header names + header_names = sorted(set().union(*[d.keys() for d in table])) + # Write headers + for h in header_names[:-1]: + fo.write("%s\t" % h) + + fo.write("%s\n" % header_names[-1]) + + # Write values if present + for t in table_sorted: + for h in header_names[:-1]: + if(h in t.keys() and t[h]): + fo.write("%s\t" % t[h]) + else: + fo.write("''\t") + if(header_names[-1] in t.keys() and t[header_names[-1]]): + fo.write("%s\n" % t[header_names[-1]]) + else: + fo.write("''\n") + + +def write_lexicon(baseforms, encoded_transcription, outfile, nonspeech=None, + extraspeech=None): + ''' + Write out the encoded transcription of words + + Arguments: + words -- list of words from a word list + encoded_transcription -- input encoded lexicon + outfile -- output lexicon + ''' + # Write Lexicon File + with codecs.open(outfile, "w", "utf-8") as f: + # First write the non-speech words + try: + for w in nonspeech.iterkeys(): + f.write("%s\t%s\n" % (w, nonspeech[w])) + except AttributeError: + pass + + # Then write extra-speech words + try: + for w in extraspeech.iterkeys(): + f.write("%s\t%s\n" % (w, extraspeech[w])) + except AttributeError: + pass + + # Then write the rest of the words + for idx, w in enumerate(baseforms): + # This is really just for BABEL in case is written as a word + if(w[0].lower() == ""): + f.write("%s\t\n" % (unicode(w[0]))) + else: + f.write("%s\t%s\n" % (unicode(w[0]), + encoded_transcription[idx])) + +if __name__ == "__main__": + main() diff --git a/egs/babel/s5d/local/lexicon/make_word_list.py b/egs/babel/s5d/local/lexicon/make_word_list.py new file mode 100755 index 00000000000..a1ff385a035 --- /dev/null +++ b/egs/babel/s5d/local/lexicon/make_word_list.py @@ -0,0 +1,93 @@ +#!/usr/bin/python + +from __future__ import print_function +import sys +import os +import codecs +import argparse +import unicodedata +import pdb + + +def process_transcripts(transcripts_dir, transcripts_list): + ''' + This looks through each transcript file, and collects the words. + Arguments: transcripts -- file with list of babel training transcripts + ''' + transcripts = os.path.join(transcripts_dir, transcripts_list) + with open(transcripts, "r") as f: + transcript_files = [] + for l in f: + l_path = os.path.join(transcripts_dir, l.strip() + ".txt") + transcript_files.append(l_path) + + word_list = {} + misprons = {} + for i_f, f in enumerate(transcript_files): + print("\rFile ", i_f + 1, "of ", len(transcript_files), end="") + with codecs.open(f, "r", "utf-8") as fp: + for line in fp: + # Don't use the lines with time markers + if not line.startswith("["): + words = line.strip().split(" ") + for w in words: + if (not w.startswith("<") and not + w.startswith("(") and not + w.endswith("-") and not w.startswith("-")): + # Get rid of mispronunciation markings + if (not w.startswith("*") and not + w.endswith("*") and + w != "~"): + try: + word_list[w] += 1 + except KeyError: + word_list[w] = 1 + else: + w = w.replace("*", "") + if(w != "~"): + try: + misprons[w] += 1 + except KeyError: + misprons[w] = 1 + + word_list = sorted(word_list.items(), key=lambda x: x[0]) + misprons = sorted(misprons.items(), key=lambda x: x[0]) + print("") + + return word_list, misprons + + +def main(): + if len(sys.argv[1:]) == 0: + print("Usage: ./make_word_list.py" + " ") + sys.exit(1) + + parser = argparse.ArgumentParser() + parser.add_argument("transcripts_list", help="Path to list of training " + "transcripts") + parser.add_argument("transcripts_dir", help="Path to the training " + "transcripts directory") + parser.add_argument("word_list", help="Path to the generated word list" + " of training words") + parser.add_argument("--misprons", help="Path to the generated word list" + " of mispronounced words", + action="store", default=None) + args = parser.parse_args() + + # Collect words + words, misprons = process_transcripts(args.transcripts_dir, + args.transcripts_list) + + # Print the word list + with codecs.open(args.word_list, "w", "utf-8") as f: + for word, count in words: + f.write("%d %s\n" % (count, unicode(word))) + + if args.misprons is not None: + with codecs.open(args.misprons, "w", "utf-8") as f: + for word, count in misprons: + f.write("%d %s\n" % (count, word)) + +if __name__ == "__main__": + main() diff --git a/egs/babel/s5d/local/lonestar.py b/egs/babel/s5d/local/lonestar.py new file mode 100755 index 00000000000..e1594e55ada --- /dev/null +++ b/egs/babel/s5d/local/lonestar.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python +from pylauncher import * +import pylauncher +import sys + +import os +import errno + +def make_path(path): + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + elif not os.path.isdir(path): + raise + +def tail(n, filename): + import subprocess + p=subprocess.Popen(['tail','-n',str(n),filename], stdout=subprocess.PIPE) + soutput,sinput=p.communicate() + soutput=soutput.split("\n") + return soutput + +def KaldiLauncher(lo, **kwargs): + import time; + jobid = JobId() + debug = kwargs.pop("debug","") + qdir= os.path.join(lo.qdir, lo.taskname); + cores = lo.nof_threads; + + ce=SSHExecutor(workdir=qdir, debug=debug, force_workdir=True, catch_output=True) + ce.outstring="out." + ce.execstring=lo.taskname + "." + + hostpool=HostPool(hostlist=HostListByName(), commandexecutor=ce ) + + completion=lambda x:FileCompletion( taskid=x, stamproot="done.", stampdir=qdir) + + logfiles = list() + commands = list() + for q in xrange(lo.jobstart, lo.jobend+1): + s = "bash " + lo.queue_scriptfile + " " + str(q) + commands.append(s) + + logfile = lo.logfile.replace("${PY_LAUNCHER_ID}", str(q)) + logfiles.append(logfile) + + generator=ListCommandlineGenerator(list=commands, cores=cores) + tasks = TaskGenerator(generator, completion=completion, debug=debug ) + + job = LauncherJob( hostpool=hostpool, taskgenerator=tasks, debug=debug,**kwargs) + + job.run() + #At this point all the .done files should exist and everything should be finalized. + num_failed=0; + time.sleep(1); #Lets wait for a while to give the shared fs time to sync + error_pending=True + for logfile in logfiles: + import time + sched_rate=[0, 0.5, 1, 2, 4, 8, 15, 32 ]; + for delay in sched_rate: + time.sleep(delay); + if os.path.isfile(logfile): + break; + if not os.path.isfile(logfile): + sys.stderr.write("ERROR: " + "The following file is missing:\n") + sys.stderr.write("ERROR: " + "\t" + logfile + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + + error_pending=True; + for delay in sched_rate: + time.sleep(delay); + + lines=tail(10, logfile) + with_status=filter(lambda x:re.search(r'with status (\d+)', x), lines) + + if len(with_status) == 0: + sys.stderr.write("The last line(s) of the log-file " + logfile + " does not seem" + " to indicate return status as expected\n"); + elif len(with_status) > 1: + sys.stderr.write("The last line(s) of the log-file " + logfile + " does seem" + " to indicate multiple return statuses \n"); + else: + status_re=re.search(r'with status (\d+)', with_status[0]); + status=status_re.group(1); + if status == '0': + error_pending=False; + break; + sys.stderr.write("INFO: Waiting for status in files, sleeping %d seconds\n" % (delay,)) + if error_pending: + num_failed+=1; + + if num_failed != 0: + sys.stderr.write(sys.argv[0] + ": " + str(num_failed) + "/" + str(len(logfiles)) + " failed \n"); + sys.stderr.write(sys.argv[0] + ": See " + lo.logfile.replace("${PY_LAUNCHER_ID}", "*" ) + " for details\n"); + sys.exit(-1); + + #Remove service files. Be careful not to remove something that might be needed in problem diagnostics + for i in xrange(len(commands)): + out_file=os.path.join(qdir, ce.outstring+str(i)) + + #First, let's wait on files missing (it might be that those are missing + #just because of slow shared filesystem synchronization + if not os.path.isfile(out_file): + import time + sched_rate=[0.5, 1, 2, 4, 8 ]; + for delay in sched_rate: + time.sleep(delay); + if os.path.isfile(out_file): + break; + if not os.path.isfile(out_file): + sys.stderr.write("ERROR: " + "The following file is missing:\n") + sys.stderr.write("ERROR: " + "\t" + out_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + + if os.stat(out_file).st_size != 0: + sys.stderr.write("ERROR: " + "The following file has non-zero size:\n") + sys.stderr.write("ERROR: " + "\t" + out_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + else: + exec_file=os.path.join(qdir, ce.execstring+str(i)) + done_file=os.path.join(qdir, "done."+str(i)) + if (not os.path.isfile(exec_file) ) or (not os.path.isfile(done_file)): + sys.stderr.write("ERROR: " + "One of the following files is missing:\n") + sys.stderr.write("ERROR: " + "\t" + exec_file + "\n") + sys.stderr.write("ERROR: " + "\t" + done_file + "\n") + sys.stderr.write("ERROR: " + "\t" + out_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + elif os.stat(done_file).st_size != 0: + sys.stderr.write("ERROR: " + "The following file has non-zero size:\n") + sys.stderr.write("ERROR: " + "\t" + done_file + "\n") + sys.stderr.write("ERROR: " + "That means something went wrong, but we don't know what. Try to figure out what and fix it\n"); + sys.exit(-1); + else: + os.remove(exec_file) + os.remove(done_file) + os.remove(out_file) + try: + os.rmdir(qdir) + except OSError: + sys.stderr.write("ERROR: " + "Failed to remove the pylauncher task dir " + qdir + "\n"); + sys.stderr.write("ERROR: " + "Find out what is wrong and fix it\n") + sys.exit(-1); + + #print job.final_report() + +class LauncherOpts: + def __init__(self): + self.sync=0 + self.nof_threads = 1 + self.qsub_opts = None + + self.jobname=None + self.jobstart=None + self.jobend=None + pass + +def CmdLineParser(argv): + import re; + sync=0 + qsub_opts='' + nof_threads=1 + + while len(argv) >= 2 and argv[0].startswith('-'): + switch = argv.pop(0); + + if switch == '-V': + qsub_opts += switch + ' '; + else: + option = argv.pop(0) + + if switch == "-sync" and (option in ['Y', 'y']): + sync=1; + qsub_opts += switch + ' ' + option + ' '; + if switch == "-pe": + option2 = argv.pop(0); + qsub_opts += option2 + ' '; + nof_threads = int(option2); + + #Now we have to parse the JOB specifier + jobname = "" + jobstart = 0 + jobend = 0 + if (re.match( r"^[A-Za-z_]\w*=\d+:\d+$", argv[0])): + m=re.match( r"^([A-Za-z_]\w*)=(\d+):(\d+)$", argv[0]) + jobname=m.group(1) + jobstart=int(m.group(2)) + jobend=int(m.group(3)) + argv.pop(0) + elif(re.match( r"^[A-Za-z_]\w*=\d+$", argv[0])): + m=re.match( r"^([A-Za-z_]\w*)=(\d+)$", argv[0]) + jobname=m.group(1) + jobstart=int(m.group(2)) + jobend=int(m.group(2)) + argv.pop(0) + elif re.match("^.+=.*:.*$", argv[0]): + print >> sys.stderr, "warning: suspicious JOB argument " + argv[0]; + + if jobstart > jobend: + sys.stderr.write("lonestar.py: JOBSTART("+ str(jobstart) + ") must be lower than JOBEND(" + str(jobend) + ")\n") + sys.exit(1) + + logfile=argv.pop(0) + + opts=LauncherOpts() + opts.sync = sync + opts.nof_threads=nof_threads; + opts.qsub_opts=qsub_opts + opts.varname=jobname + opts.jobstart=jobstart + opts.jobend=jobend + opts.logfile=logfile + + opts.cmd = escape_cmd(argv); + + return (opts, argv) + +def escape_cmd(argv): + cmd ="" + for x in argv: + #print x + " -> ", + if re.search("^\S+$", x): + #print " A -> ", + cmd += x + " " + elif '"' in x: + cmd += "'''" + x + "''' " + else: + cmd += "\"" + x + "\" " + #print cmd + return cmd + +def setup_paths_and_vars(opts): + cwd = os.getcwd() + + if opts.varname and (opts.varname not in opts.logfile ) and (opts.jobstart != opts.jobend): + print >>sys.stderr, "lonestar.py: you are trying to run a parallel job" \ + "but you are putting the output into just one log file (" + opts.logfile + ")"; + sys.exit(1) + + if not os.path.isabs(opts.logfile): + opts.logfile = os.path.join(cwd, opts.logfile); + logfile=opts.logfile + + dir = os.path.dirname(logfile) + base = os.path.basename(logfile) + qdir = os.path.join(dir, "q"); + + if re.search("log/*q", qdir, flags=re.IGNORECASE): + qdir = re.sub("log/*q", "/q", qdir, flags=re.IGNORECASE) + + + queue_logfile= os.path.join(qdir, base) + if opts.varname: + queue_logfile = re.sub("\.?"+opts.varname, "", queue_logfile) + + taskname=os.path.basename(queue_logfile) + taskname = taskname.replace(".log", ""); + if taskname == "": + print >> sys.stderr, "lonestar.py: you specified the log file name in such form " \ + "that leads to an empty task name ("+logfile + ")"; + sys.exit(1) + + if not os.path.isabs(queue_logfile): + queue_logfile= os.path.join(cwd, queue_logfile) + + if opts.varname: + opts.logfile = opts.logfile.replace(opts.varname, "${PY_LAUNCHER_ID}") + opts.cmd = opts.cmd.replace(opts.varname, "${PY_LAUNCHER_ID}"); + + queue_scriptfile=queue_logfile; + if re.search("\.[a-zA-Z]{1,5}$", queue_scriptfile): + queue_scriptfile = re.sub("\.[a-zA-Z]{1,5}$", ".sh", queue_scriptfile); + if not os.path.isabs(queue_scriptfile): + queue_scriptfile= os.path.join(cwd, queue_scriptfile) + + + make_path(qdir) + make_path(dir) + + opts.qdir = qdir + opts.log_dir = dir + opts.queue_scriptfile = queue_scriptfile + opts.queue_logfile = queue_logfile + opts.taskname = taskname + + return opts + + + +def create_scriptfile(scriptname, opts): + import os + logfile = opts.logfile + cmd = opts.cmd + nof_threads=opts.nof_threads; + cwd = os.getcwd() + #print scriptname + f = open(scriptname, "wb") + f.write("#!/bin/bash\n") + f.write("export PY_LAUNCHER_ID=$1; shift;\n") + f.write("cd " + cwd + "\n") + f.write(". ./path.sh\n") + f.write("( echo '#' Running on `hostname`\n") + f.write(" echo '#' Started at `date`\n") + f.write(" echo -n '# '; cat < " +logfile + "\n") + f.write("time1=`date +\"%s\"`\n") + f.write("( " + cmd + ") 2>>" + logfile + " >>" + logfile + " \n") + f.write("ret=$?\n") + f.write("time2=`date +\"%s\"`\n") + f.write("echo '#' Accounting time=$(($time2 - $time1)) threads=" + str(nof_threads) + " >> " + logfile + "\n") + + f.write("echo '#' Finished at `date` with status $ret >>" + logfile + "\n") + f.write("exit $ret \n") + f.close() + + + +if __name__ == "__main__": + (opts, cmd) = CmdLineParser(sys.argv[1:]); + setup_paths_and_vars(opts) + create_scriptfile(opts.queue_scriptfile, opts); + + #pylauncher.ClassicLauncher(["true && sleep 10s", "false || sleep 1s" ], debug="job+host+task+exec+ssh") + KaldiLauncher(opts, debug="") + + diff --git a/egs/babel/s5d/local/make_L_align.sh b/egs/babel/s5d/local/make_L_align.sh new file mode 100755 index 00000000000..50e46a00493 --- /dev/null +++ b/egs/babel/s5d/local/make_L_align.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Guoguo Chen, Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +set -o pipefail +set -e + +if [ $# -ne 3 ]; then + echo "This is a simple script that will generate the L_align.fst" + echo "The FST L_align.fst is used for getting the force-aligned " + echo "utterances" + echo "The script automaticky recognizes the probabilistic lexicon" + echo "is used and will use the correct file" + echo "" + echo "usage: local/L_align.sh " + echo "e.g.: local/L_align.sh data/local/lang data/lang data/lang" + exit 1; +fi + +tmpdir=$1 +dir=$2 +outdir=$3 + +silphone=`cat $dir/phones/optional_silence.txt` || exit 1; + +# Create lexicon with alignment info +if [ -f $tmpdir/lexicon.txt ] ; then + cat $tmpdir/lexicon.txt | \ + awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' +elif [ -f $tmpdir/lexiconp.txt ] ; then + cat $tmpdir/lexiconp.txt | \ + awk '{printf("%s #1 ", $1); for (n=3; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' +else + echo "Neither $tmpdir/lexicon.txt nor $tmpdir/lexiconp.txt does not exist" + exit 1 +fi | utils/make_lexicon_fst.pl - 0.5 $silphone | \ +fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \ + --keep_isymbols=false --keep_osymbols=false | \ +fstarcsort --sort_type=olabel > $outdir/L_align.fst + +exit 0; diff --git a/egs/babel/s5d/local/make_corpus_subset.sh b/egs/babel/s5d/local/make_corpus_subset.sh new file mode 100755 index 00000000000..12925830268 --- /dev/null +++ b/egs/babel/s5d/local/make_corpus_subset.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +#Begin configuration +ignore_missing_txt=false #If the reference transcript txt is missing, \ + #shall we ignore it or treat it as a fatal error? +#End configuration +echo "$0 $@" # Print the command line for logging + +help_message="$0: create subset of the input directory (specified as the first directory). + The subset is specified by the second parameter. + The directory in which the subset should be created is the third parameter + Example: + $0 " + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [[ "$#" -lt "3" ]] ; then + echo -e "FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +output_data_dir=${@: -1} # last argument to the script +sources=( $@ ) +unset sources[${#sources[@]}-1] # 'pop' the last argument which is odir +num_src=${#sources[@]} # number of systems to combine + +if [ $(( $num_src % 2 )) -ne 0 ]; then + echo -e "FATAL: wrong number of script parameters!" + echo -e " : The input directories are not in pairs!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +mkdir -p $output_data_dir/transcription +mkdir -p $output_data_dir/audio + +num_warns_def=3; + +rm -f $output_data_dir/filelist.list + +for i in `seq 0 $(( $num_src / 2 - 1))` ; do + num_warns=$num_warns_def; + input_data_dir=${sources[ $[2 * $i] ]} + input_data_list=${sources[ $((2 * $i + 1)) ]} + + abs_src_dir=`readlink -f $input_data_dir` + abs_tgt_dir=`readlink -f $output_data_dir` + + if [[ ! -d "$input_data_dir" ]] ; then + echo "FATAL: input data directory does not exist"; + exit 1; + fi + if [[ ! -f "$input_data_list" ]] ; then + echo "FATAL: input data list file does not exist!"; + exit 1; + fi + + idl=`basename $input_data_list` + echo "Making subsets from $input_data_dir according to $idl" + + for file_basename in `cat $input_data_list`; do + if [[ -e $abs_src_dir/audio/$file_basename.sph ]] ; then + ln -sf $abs_src_dir/audio/$file_basename.sph $abs_tgt_dir/audio || exit 1 + else + if [[ -e $abs_src_dir/audio/$file_basename.wav ]] ; then + ln -sf $abs_src_dir/audio/$file_basename.wav $abs_tgt_dir/audio || exit 1 + else + echo "File $abs_src_dir/audio/$file_basename.sph|wav does not exist!" >&2 + exit 1 + fi + fi + + if [[ -e $abs_src_dir/transcription/$file_basename.txt ]] ; then + ln -sf $abs_src_dir/transcription/$file_basename.txt $abs_tgt_dir/transcription || exit 1 + else + if ! $ignore_missing_txt ; then + echo "File $abs_src_dir/transcription/$file_basename.txt does not exist!" + exit 1; + elif [ $num_warns -gt 0 ]; then + echo "WARNING: File $file_basename.txt does not exist!" + num_warns=$(($num_warns - 1)) + elif [ $num_warns -eq 0 ]; then + echo "Not warning anymore" + num_warns=$(($num_warns - 1)) + fi + fi + done + cat $input_data_list >> $output_data_dir/filelist.list +done + + diff --git a/egs/babel/s5d/local/make_ecf_subset.sh b/egs/babel/s5d/local/make_ecf_subset.sh new file mode 100755 index 00000000000..9bdd95c3e27 --- /dev/null +++ b/egs/babel/s5d/local/make_ecf_subset.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +echo "$0 $@" 1>&2 # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +help_message="$0: generates an subset ecf file for spoken term detection evaluation. + The first parameter specifies the descriptor of the subset, + the second parameter specifies the original ecf file. + The file will be generated in the kws subdirectory of the directory + given as a third parameter and will be named ecf.xml + Output goes to stdout. + Usage: + $0 " + + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [[ "$#" -ne "2" ]] ; then + echo -e "FATAL: wrong number of script parameters!\n\n" 1>&2 + printf "$help_message\n\n" 1>&2 + exit 1; +fi + +list_file=$1 +src_ecf_file=$2 + +if [[ ! -f "$list_file" ]]; then + echo -e "FATAL: The list file does not exist! \n\n" 1>&2 + printf "$help_message\n" 1>&2 + exit 1; +fi +if [[ ! -f "$src_ecf_file" ]]; then + echo -e "FATAL: The source ecf file does not exist! \n\n" 1>&2 + printf "$help_message\n" 1>&2 + exit -1 +fi + + +duration=`grep -F -f $list_file $src_ecf_file | sed "s/.*dur=\"\([0-9.][0-9.]*\).*/\1 /g" | awk '{x += $1;} END{print x;}'` + +# Output is produced here: +( + grep "" +) diff --git a/egs/babel/s5d/local/make_lexicon_fst_special.pl b/egs/babel/s5d/local/make_lexicon_fst_special.pl new file mode 100755 index 00000000000..3df6e7a9527 --- /dev/null +++ b/egs/babel/s5d/local/make_lexicon_fst_special.pl @@ -0,0 +1,53 @@ +#!/usr/bin/env perl +use warnings; #sed replacement for -w perl parameter +# Copyright 2012 Johns Hopkins University (author: Daniel Povey) + +# makes lexicon FST -- special version only for use in keyword search +# for allowing optional silences between words. This version has +# no pron-probs involved, and +# does support an optional silence, but this silence is only allowed +# between words (where it may occur an arbitrary number of times), +# not at the beginning or end of the file. + +if(@ARGV != 2) { + die "Usage: make_lexicon_fst_special.pl lexicon.txt silphone >lexiconfst.txt" +} + +$lexfn = shift @ARGV; +$silphone = shift @ARGV; + +open(L, "<$lexfn") || die "Error opening lexicon $lexfn"; + + +$startstate = 0; +$silstate = 1; +$endstate = 2; +$nextstate = 3; + +sub create_wseq { + my $init_state = shift @_; + my $end_state = shift @_; + my $word_or_eps = shift @_; + my @phones = @_; + if (@phones == 0) { push @phones, ""; } + my $x; + my $curstate = $init_state; + for ($x = 0; $x + 1 < @phones; $x++) { + print "$curstate\t$nextstate\t$phones[$x]\t$word_or_eps\n"; + $word_or_eps = ""; + $curstate = $nextstate; + $nextstate++; + } + print "$curstate\t$end_state\t$phones[$x]\t$word_or_eps\n"; +} + + +while() { + @A = split(" ", $_); + $w = shift @A; + create_wseq($startstate, $endstate, $w, @A); + create_wseq($endstate, $endstate, $w, @A); + create_wseq($silstate, $endstate, $w, @A); +} +print "$endstate\t$silstate\t$silphone\t\n"; +print "$endstate\t0\n"; # final-cost. diff --git a/egs/babel/s5d/local/make_lexicon_subset.sh b/egs/babel/s5d/local/make_lexicon_subset.sh new file mode 100755 index 00000000000..1e77fcaa2b9 --- /dev/null +++ b/egs/babel/s5d/local/make_lexicon_subset.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +transcriptions=$1 +input_lexicon_file=$2 +output_lexicon_file=$3 + +( + #find $dev_data_dir/transcription/ -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' + find $transcriptions -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' +) | sort -u | awk ' + BEGIN { + while(( getline line< ARGV[2] ) > 0 ) { + split(line, e, "\t") + LEXICON[ e[1] ]=line + } + FILENAME="-" + i=0 + + while(( getline word< ARGV[1] ) > 0 ) { + if (word in LEXICON) + print LEXICON[word] + } + } +' - $input_lexicon_file | sort -u > $output_lexicon_file + diff --git a/egs/babel/s5d/local/make_wordlist.sh b/egs/babel/s5d/local/make_wordlist.sh new file mode 100644 index 00000000000..ef589b917cb --- /dev/null +++ b/egs/babel/s5d/local/make_wordlist.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +transcriptions=$1 +wordlist=$2 + +( + find $transcriptions -name "*.txt" | xargs egrep -vx '\[[0-9.]+\]' |cut -f 2- -d ':' | sed 's/ /\n/g' +) | sort -u | grep -v -E '.*\*.*|<.*>|\(\(\)\)|^-.*|.*-$' > $wordlist + diff --git a/egs/babel/s5d/local/map_lang.sh b/egs/babel/s5d/local/map_lang.sh new file mode 100755 index 00000000000..998a11d0cd0 --- /dev/null +++ b/egs/babel/s5d/local/map_lang.sh @@ -0,0 +1,81 @@ +#! /usr/bin/bash + +VARIABLES=`diff <(compgen -A variable) <(. ./lang.conf.orig; compgen -A variable) | grep '^>'| sed 's/^> *//g'` + +. ./conf/common_vars.sh +. ./lang.conf.orig + +for variable in $VARIABLES ; do + + eval VAL=\$${variable} + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $variable=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + #declare -x $variable + declare -p $variable + fi +done + +for kwlist in $( (compgen -A variable) | grep _data_list ) ; do + declare -p $kwlist + eval KEYS="\${!${kwlist}[@]}" + #declare -p my_more_kwlist_keys + for key in $KEYS # make sure you include the quotes there + do + #echo $key + eval VAL="\${${kwlist}[$key]}" + #echo $my_more_kwlist_val + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + fi + done + declare -p $kwlist +done +unset VAL +unset KEYS + +for kwlist in $( (compgen -A variable) | grep _data_dir ) ; do + declare -p $kwlist + eval KEYS="\${!${kwlist}[@]}" + #declare -p my_more_kwlist_keys + for key in $KEYS # make sure you include the quotes there + do + #echo $key + eval VAL="\${${kwlist}[$key]}" + #echo $my_more_kwlist_val + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + fi + done + declare -p $kwlist +done +unset VAL +unset KEYS + +for kwlist in $( (compgen -A variable) | grep _more_kwlists ) ; do + declare -p $kwlist + eval KEYS="\${!${kwlist}[@]}" + #declare -p my_more_kwlist_keys + for key in $KEYS # make sure you include the quotes there + do + #echo $key + eval VAL="\${${kwlist}[$key]}" + #echo $my_more_kwlist_val + if [[ $VAL =~ /export/babel/data/ ]] ; then + eval $kwlist["$key"]=${VAL/${BASH_REMATCH[0]}/"/work/02359/jtrmal/"/} + fi + done + declare -p $kwlist +done +unset VAL +unset KEYS + +if [ "$babel_type" == "limited" ] ; then + train_nj=32 +else + train_nj=64 +fi +dev10h_nj=60 +unsup_nj=120 +shadow_nj=60 +shadow2_nj=120 +eval_nj=120 diff --git a/egs/babel/s5d/local/naive_comb.pl b/egs/babel/s5d/local/naive_comb.pl new file mode 100755 index 00000000000..74ad20d84e3 --- /dev/null +++ b/egs/babel/s5d/local/naive_comb.pl @@ -0,0 +1,234 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; +use XML::Simple; +use Data::Dumper; +use File::Basename; + +my $tolerance = 0.5; + +sub ReadKwslist { + my $kwslist_in = shift @_; + + my $source = "STDIN"; + if ($kwslist_in ne "-") { + open(I, "<$kwslist_in") || die "Fail to open kwslist $kwslist_in.\n"; + $source = "I"; + } + + # Read in the kwslist and parse it. Note that this is a naive parse -- I simply + # assume that the kwslist is "properly" generated + my @KWS; + my (@info, $kwid, $tbeg, $dur, $file, $score, $channel); + my ($kwlist_filename, $language, $system_id) = ("", "", ""); + while (<$source>) { + chomp; + + if (/[0]\" language=\"$info->[1]\" system_id=\"$info->[2]\">\n"; + my $prev_kw = ""; + foreach my $kwentry (@{$KWS}) { + if ($prev_kw ne $kwentry->[0]) { + if ($prev_kw ne "") {$kwslist .= " \n";} + $kwslist .= " [0]\" oov_count=\"0\">\n"; + $prev_kw = $kwentry->[0]; + } + $kwslist .= " [1]\" channel=\"$kwentry->[2]\" tbeg=\"$kwentry->[3]\" dur=\"$kwentry->[4]\" score=\"$kwentry->[5]\" decision=\"$kwentry->[6]\""; + if (defined($kwentry->[7])) {$kwslist .= " threshold=\"$kwentry->[7]\"";} + if (defined($kwentry->[8])) {$kwslist .= " raw_score=\"$kwentry->[8]\"";} + $kwslist .= "/>\n"; + } + $kwslist .= " \n"; + $kwslist .= "\n"; + + return $kwslist; +} + +sub KwslistTimeCompare { + my ($a, $b) = @_; + + if ($a->[0] eq $b->[0]) { + if ($a->[1] eq $b->[1]) { + if (abs($a->[3]-$b->[3]) <= $tolerance) { + if (abs($a->[3]+$a->[4]-$b->[3]-$b->[4]) <= $tolerance) { + return 0; + } else { + return ($a->[3]+$a->[4]) <=> ($b->[3]+$b->[4]); + } + } else { + return $a->[3] <=> $b->[3]; + } + } else { + return $a->[1] cmp $b->[1]; + } + } else { + $a->[0] cmp $b->[0]; + } +} + +sub KwslistTimeSort { + return KwslistTimeCompare($a, $b); +} + +my $Usage = < w2 ... + e.g.: naive_comb.pl 0.5 kwslist1.xml 0.5 kwslist2.xml ... kwslist_comb.xml + +Allowed options: + --method : Use different combination method (int, default = 1) + 1 -- Weighted sum + 2 -- Weighted "powered" + --power : The power of method 2 (float, default = 0.5) + --tolerance : Tolerance for being the same hits (float, default = 0.5) + +EOU + +my $method = 1; +my $power = 0.5; +GetOptions('tolerance=f' => \$tolerance, + 'method=i' => \$method, + 'power=f' => \$power, + 'inv-power=f' => sub { (my $opt, my $val) = @_; $power = 1.0/$val;}); + +@ARGV >= 3 || die $Usage; + +# Workout the input/output source +@ARGV % 2 == 1 || die "Bad number of (weight, kwslist) pair.\n"; +my @kwslist_file = (); +my @weight = (); +while (@ARGV != 1) { + my $w = shift @ARGV; + $w =~ m/^[0-9.]*$/ || die "Bad weight: $w.\n"; + push(@weight, $w); + push(@kwslist_file, shift @ARGV); +} +my $output = shift @ARGV; + +# Open the first kwslist +my ($info, $KWS) = @{ReadKwslist($kwslist_file[0])}; + +# Open the rest kwslists +my @kwslist = (); +for (my $i = 1; $i < @kwslist_file; $i ++) { + push(@kwslist, @{ReadKwslist($kwslist_file[$i])}[1]); +} + +# Process the first kwslist +my @KWS = sort KwslistTimeSort @{$KWS}; +my $w = shift @weight; +foreach my $kwentry (@$KWS) { + if ($method == 1) { + $kwentry->[5] = $kwentry->[5] * $w; + } elsif ($method == 2) { + $kwentry->[5] = ($kwentry->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } +} + +# Start merging the rest kwslists +while (@kwslist > 0) { + my $w = shift @weight; + my @kws = sort KwslistTimeSort @{shift @kwslist}; + + # We'll take time information from the first system + my ($i, $j) = (0, 0); + my @from_kws; + while ($i < @KWS and $j < @kws) { + my $cmp = KwslistTimeCompare($KWS[$i], $kws[$j]); + if ($cmp == 0) { + if ($method == 1) { + $KWS[$i]->[5] += $kws[$j]->[5] * $w; + } elsif ($method == 2) { + $KWS[$i]->[5] += ($kws[$j]->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } + $i ++; + $j ++; + } elsif ($cmp == -1) { + $i ++; + } else { + if ($method == 1) { + $kws[$j]->[5] = $kws[$j]->[5] * $w; + } elsif ($method == 2) { + $kws[$j]->[5] = ($kws[$j]->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } + push(@from_kws, $kws[$j]); + $j ++; + } + } + while ($j < @kws) { + if ($method == 1) { + $kws[$j]->[5] = $kws[$j]->[5] * $w; + } elsif ($method == 2) { + $kws[$j]->[5] = ($kws[$j]->[5]**$power) * $w; + } else { + die "Method not defined.\n"; + } + push(@from_kws, $kws[$j]); + $j ++; + } + + # Sort again + @from_kws = (@KWS, @from_kws); + @KWS = sort KwslistTimeSort @from_kws; +} + +if ($method == 2) { + foreach my $kwentry (@KWS) { + $kwentry->[5] = $kwentry->[5]**(1.0/$power); + } +} + +# Sorting and pringting +my $kwslist = PrintKwslist(\@{$info}, \@KWS); + +if ($output eq "-") { + print $kwslist; +} else { + open(O, ">$output") || die "Fail to open output file: $output\n"; + print O $kwslist; + close(O); +} diff --git a/egs/babel/s5d/local/nist_eval/create_compound_set.sh b/egs/babel/s5d/local/nist_eval/create_compound_set.sh new file mode 100755 index 00000000000..737f7a0fcd9 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/create_compound_set.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +#Simple script to create compound set info that will allow for more automatized +#work with the shadow set. +# +#The notion of shadow data set came from the need to be able to verify +#the output of the recognizer during decoding the evaluation data. +#The idea is simple -- instead of decoding just the eval data, decode both +#eval data plus the dev data (or at least some portion of it) interleved +#randomly +#After decoding, we can isolate (split) the output from the decoding (and kws) +#so that we can score the dev data subset and if the score is identical to +#the score obtained by decoding the dev set previously, we can be little bit +#more sure that the eval set results are correct. + +. ./path.sh + +flen=0.01 + +[ ! -f lang.conf ] && echo "File lang.conf must exist (and contain a valid config)" +. ./lang.conf + +devset=dev10h.pem +evlset=eval.seg +tgtset=shadow.seg +tgtdir= + +. utils/parse_options.sh +[ -z $tgtdir ] && tgtdir=data/$tgtset + +devset_basename=${devset%%.*} +devset_segments=${devset#*.} + +evlset_basename=${evlset%%.*} +evlset_segments=${evlset#*.} + +eval devset_flist=\$${devset_basename}_data_list +eval devset_ecf=\$${devset_basename}_ecf_file +eval devset_rttm=\$${devset_basename}_rttm_file +eval devset_stm=\$${devset_basename}_stm_file + +eval evlset_flist=\$${evlset_basename}_data_list +eval evlset_ecf=\$${evlset_basename}_ecf_file +eval evlset_rttm=\$${evlset_basename}_rttm_file +eval evlset_stm=\$${evlset_basename}_stm_file + +rm -rf $tgtdir/compounds +mkdir -p $tgtdir/compounds +mkdir -p $tgtdir/compounds/$devset +mkdir -p $tgtdir/compounds/$evlset + +echo "Creating compound $tgtdir/compounds/$devset" +( + echo "DEVSET file list: $devset_flist" + cat `readlink -f $devset_flist` > $tgtdir/compounds/$devset/files.list + echo "DEVSET ECF file : $devset_ecf" + cat `readlink -f $devset_ecf` > $tgtdir/compounds/$devset/ecf.xml + echo "DEVSET RTTM file: $devset_rttm" + cat `readlink -f $devset_rttm` > $tgtdir/compounds/$devset/rttm + echo "DEVSET STM file : $devset_stm" + cat `readlink -f $devset_stm` | sed 's/ 1 / A /g' > $tgtdir/compounds/$devset/stm + + cat $tgtdir/segments | grep -w -F -f $tgtdir/compounds/$devset/files.list > $tgtdir/compounds/$devset/segments + awk '{print $1}' $tgtdir/compounds/$devset/segments > $tgtdir/compounds/$devset/utterances + + for kwset_path in $tgtdir/kwset_*; do + kwset=`basename $kwset_path` + output=$tgtdir/compounds/$devset/$kwset + + mkdir -p $output/tmp + cp $tgtdir/$kwset/kwlist.xml $output/ + cp $tgtdir/$kwset/utt.map $output/ + cp $tgtdir/compounds/$devset/ecf.xml $output/ + cp $tgtdir/compounds/$devset/rttm $output/ + local/search/rttm_to_hitlists.sh --segments $tgtdir/segments \ + --utt-table $tgtdir/$kwset/utt.map $tgtdir/compounds/$devset/rttm \ + $tgtdir/$kwset/kwlist.xml $tgtdir/compounds/$devset/ecf.xml \ + $output/tmp $output/hitlist 2> $output/hitlist.fails + + n1=`cat $output/hitlist.fails | wc -l` + n2=`awk '{print $13}' $output/hitlist.fails | sort |uniq -c | wc -l` + + echo "INFO: For kwlist $kwset, $n2 KW types won't be found ($n1 tokens in total)" + + duration=$(cat $devset_ecf | perl -ne 'BEGIN{$dur=0;}{next unless $_ =~ /dur\=/; s/.*dur="([^"]*)".*/$1/; $dur+=$_;}END{print $dur/2}') + + echo $duration > $output/trials + echo $flen > $output/frame_length + + echo "Number of trials: `cat $output/trials`" + echo "Frame lengths: `cat $output/frame_length`" + { + cat $tgtdir/$kwset/f4de_attribs | grep kwlist_name + language=$(grep kwlist $tgtdir/$kwset/kwlist.xml | head -n 1 | sed -E 's/.*language="([^"]*)".*/\1/g') + echo "language=$language" + echo "flen=$flen" + } > $output/f4de_attribs + + cp $tgtdir/$kwset/categories $output/ + done +) + +echo "Creating compound $tgtdir/compounds/$evlset" +( + echo "EVLSET file list: $evlset_flist" + cat `readlink -f $evlset_flist` > $tgtdir/compounds/$evlset/files.list + echo "EVLSET ECF file : $evlset_ecf" + cat `readlink -f $evlset_ecf` > $tgtdir/compounds/$evlset/ecf.xml + if [ ! -z "$evlset_rttm" ]; then + echo "EVLSET RTTM file: $evlset_rttm" + cat `readlink -f $evlset_rttm` > $tgtdir/compounds/$evlset/rttm + fi + if [ ! -z "$evlset_stm" ]; then + echo "EVLSET STM file : $evlset_stm" + cat `readlink -f $evlset_stm` | sed 's/ 1 / A /g' > $tgtdir/compounds/$evlset/stm + fi + + cat $tgtdir/segments | \ + grep -w -F -f $tgtdir/compounds/$evlset/files.list > $tgtdir/compounds/$evlset/segments + awk '{print $1}' $tgtdir/compounds/$evlset/segments > $tgtdir/compounds/$evlset/utterances + + for kwset_path in $tgtdir/kwset_*; do + kwset=`basename $kwset_path` + output=$tgtdir/compounds/$evlset/$kwset + + mkdir -p $output/tmp + cp $tgtdir/$kwset/kwlist.xml $output/ + cp $tgtdir/$kwset/utt.map $output/ + cp $tgtdir/compounds/$evlset/ecf.xml $output/ + + if [ -f "$tgtdir/compounds/$evlset/rttm" ]; then + cp $tgtdir/compounds/$evlset/rttm $output/ + local/search/rttm_to_hitlists.sh --segments $tgtdir/segments \ + --utt-table $tgtdir/$kwset/utt.map $tgtdir/compounds/$evlset/rttm \ + $tgtdir/$kwset/kwlist.xml $tgtdir/compounds/$evlset/ecf.xml \ + $output/tmp $output/hitlist 2> $output/hitlist.fails + + n1=`cat $output/hitlist.fails | wc -l` + n2=`awk '{print $13}' $output/hitlist.fails | sort |uniq -c | wc -l` + + echo "INFO: For kwlist $kwset, $n2 KW types won't be found ($n1 tokens in total)" + fi + + duration=$(cat $evlset_ecf | perl -ne 'BEGIN{$dur=0;}{next unless $_ =~ /dur\=/; s/.*dur="([^"]*)".*/$1/; $dur+=$_;}END{print $dur/2}') + + echo $duration > $output/trials + echo $flen > $output/frame_length + + echo "Number of trials: `cat $output/trials`" + echo "Frame lengths: `cat $output/frame_length`" + { + cat $tgtdir/$kwset/f4de_attribs | grep kwlist_name + language=$(grep kwlist $tgtdir/$kwset/kwlist.xml | head -n 1 | sed -E 's/.*language="([^"]*)".*/\1/g') + echo "language=$language" + echo "flen=$flen" + } > $output/f4de_attribs + + cp $tgtdir/$kwset/categories $output/ + done +) + +echo "Compound creation OK." + + diff --git a/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh b/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh new file mode 100755 index 00000000000..2af8dc9e410 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/create_new_language_configs.FLP.sh @@ -0,0 +1,236 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +language="201-haitian" +corpus=/export/babel/data/ +indus=/export/babel/data/scoring/IndusDB +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +corpus=$corpus/$language +lists=./conf/lists/$language/ + +corpusdir=$(find $corpus -maxdepth 1 \( -name "release-current" -o -name "release-current-b" \) -type d) || exit 1 +[ -z "$corpusdir" ] && corpusdir=$(find $corpus -maxdepth 1 -name "*-build" -type d) +[ -z "$corpusdir" ] && echo >&2 "Corpus directory for $language not found!" && exit 1 + +train_dir=$(find $corpusdir -ipath "*/conversational/*" -name "training" -type d) || exit 1 +[ -z "$train_dir" ] && echo >&2 "Corpus directory $corpusdir/*/training/ not found!" && exit 1 + +train_rom_dir=$(find $train_dir -name "transcript_roman" -type d) || exit 1 +echo "# include common settings for fullLP systems." +echo ". conf/common.fullLP || exit 1;" +echo -e "\n" + +echo "#speech corpora files location" +echo "train_data_dir=$train_dir" +if [ -f "$lists/training.list" ] ; then + echo "train_data_list=$lists/training.list" +elif [ -f "$lists/train.FullLP.list" ] ; then + echo "train_data_list=$lists/train.FullLP.list" +else + echo >&2 "Training list $lists/training.list not found" +fi + +echo "train_nj=32" +echo -e "\n" + + +indusid=$(find $corpus -name "IARPA*-build" -type d) +[ -z $indusid ] && indusid=$(find $corpus \( -name "release-current" -o -name "release-current-b" \) -type d) +[ -z $indusid ] && echo >&2 "Didn't find anything that could be used as IndusDB id" && exit 1 + +indusid=$(basename ${indusid}) +indusid=${indusid%%-build} +dataset=dev10h +dev10h_dir=$(find $corpusdir -ipath "*/conversational/*" -name "dev" -type d) || exit 1 +indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) +if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB entry \"$indusid*dev\" not found -- removing the version and retrying" + indusid=${indusid%%-v*} + indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) + if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB entry \"$indusid*dev\" not found -- keeping only the language code and retrying" + indusid=${language%%-*} + indusdev10=$(find $indus/ -maxdepth 1 -name "*${indusid}*dev" -type d) + if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB configuration for the language code $indusid not found" + exit 1 + fi + fi +fi + +if [ -z "$indusdev10" ] ; then + echo "" +else + dev10h_rttm=$(find $indusdev10/ -name "*mitllfa3.rttm" ) + dev10h_ecf=$(find $indusdev10/ -name "*ecf.xml" ) + dev10h_stm=$(find $indusdev10/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indusdev10/ -name "*.kwlist.xml" | sort -V ) + kwlists2=$(find $indusdev10/ -name "*.kwlist?*.xml" | sort -V ) + kwlists="$kwlists1 $kwlists2" + dev10h_kwlists="$kwlists" +fi + +echo "#Radical reduced DEV corpora files location" +echo "dev2h_data_dir=$dev10h_dir" +echo "dev2h_data_list=$lists/dev.2h.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev2h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev2h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev2h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev2h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev2h_kwlists" +fi +echo "dev2h_nj=16" +echo "dev2h_subset_ecf=true" +echo -e "\n" + +echo "#Official DEV corpora files location" +echo "dev10h_data_dir=$dev10h_dir" +echo "dev10h_data_list=$lists/dev.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev10h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev10h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev10h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev10h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev10h_kwlists" +fi +echo "dev10h_nj=32" +echo -e "\n" + +dataset="eval" +eval_dir=$(find $corpus -ipath "*-eval/*/conversational/*" -name "$dataset" -type d) || exit 1 +[ -z "$eval_dir" ] && { eval_dir=$(find $corpusdir -ipath "*/conversational/*" -name "eval" -type d) || exit 1; } +if [ ! -z "$eval_dir" ] ; then + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + eval_ecf=$(find $indus/ -maxdepth 1 -type f -name "*$indusid*${dataset}.ecf.xml" ) + eval_kwlists1=$(find $indus -name "*$indusid*${dataset}.kwlist*.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*$indusid*${dataset}.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlists1 $kwlists2" + else + eval_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + eval_ecf=$(find $indus_set/ -name "*ecf.xml" ) + eval_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + eval_kwlists1=$(find $indus -name "*.kwlist.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlist1 $kwlist2" + fi + echo "#Official EVAL period evaluation data files" + echo "eval_data_dir=$eval_dir" + echo "eval_data_list=$lists/eval.list" + echo "${dataset}_ecf_file=$eval_ecf" + echo "${dataset}_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # ${dataset}_kwlists" + echo "eval_nj=32" + echo -e "\n" + + dataset=evalpart1 + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + echo >&2 "IndusDB entry \"$indusid*$dataset\" not found -- keeping only the language code and retrying" + indusid=${language%%-*} + indus_set=$(find $indus/ -maxdepth 1 -name "*${indusid}*$dataset" -type d) + if [ -z "$indus_set" ] ; then + echo >&2 "IndusDB configuration for the language code $indus_set not found" + fi + fi + if [ ! -z "$indus_set" ] ; then + evalpart1_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + evalpart1_ecf=$(find $indus_set/ -name "*ecf.xml" ) + evalpart1_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indus_set/ -name "*.kwlist.xml" | sort -V) + kwlists2=$(find $indus_set/ -name "*.kwlist?*.xml" | sort -V) + kwlists="$kwlists1 $kwlists2" + + kwlists="$dev10h_kwlists $eval_kwlists $kwlists" + echo "#Official post-EVAL period data files" + echo "${dataset}_data_dir=$eval_dir" + echo "${dataset}_data_list=$lists/${dataset}.list" + echo "${dataset}_rttm_file=$evalpart1_rttm" + echo "${dataset}_ecf_file=$evalpart1_ecf" + echo "${dataset}_stm_file=$evalpart1_stm" + echo "${dataset}_kwlists=(" + declare -A tmp_kwlists; + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + tmp_kwlists[$id]="$list" + done + + indices=$( + for id in "${!tmp_kwlists[@]}"; do + echo $id + done | sort -V | paste -s + ) + for id in $indices; do + echo " [$id]=${tmp_kwlists[$id]}" + done + echo ") # ${dataset}_kwlists" + echo "${dataset}_nj=32" + echo -e "\n" + fi + + dataset=shadow + echo "#Shadow data files" + echo "shadow_data_dir=(" + echo " $dev10h_dir" + echo " $eval_dir" + echo ") # shadow_data_dir" + echo "shadow_data_list=(" + echo " $lists/dev.list" + echo " $lists/eval.list" + echo ") # shadow_data_dir" + echo "shadow_ecf_file=$dev10h_ecf" + echo "shadow_rttm_file=$dev10h_rttm" + echo "shadow_stm_file=$dev10h_stm" + echo "shadow_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # shadow_kwlists" + echo "shadow_nj=32" + echo -e "\n" +fi + +dataset=untranscribed-training +unsup_dir=$(find $corpusdir -ipath "*/conversational/*" -name "$dataset" -type d) || exit 1 +unsup_list=$lists/untranscribed-training.list +[ ! -f $unsup_list ] && echo >&2 "Unsupervised training set not found $unsup_list" +if [ -f $unsup_list ] ; then + echo "#Unsupervised dataset for FullLP condition" + echo "unsup_data_dir=$unsup_dir" + echo "unsup_data_list=$unsup_list" + echo "unsup_nj=32" + echo -e "\n" +else + echo "#Unsupervised training set file ($unsup_list) not found." +fi + +lexicon=$(find $corpusdir -ipath "*/conversational/*" -name "lexicon.txt" -type f) || exit 1 +echo "lexicon_file=$lexicon" + +if [ ! -z "$train_rom_dir" ] ; then + echo "lexiconFlags=\"--romanized --oov \"" +fi +echo -e "\n\n" + + diff --git a/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh new file mode 100755 index 00000000000..2ffb73810e3 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh @@ -0,0 +1,204 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +language="201-haitian" +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +corpus=/export/babel/data/$language +lists=./conf/lists/$language/ +indus=/export/babel/data/scoring/IndusDB + +corpusdir=$(find $corpus -maxdepth 1 -name "*-build" -type d) || exit 1 +[ -z "$corpusdir" ] && "Corpus directory for $language not found!" && exit 1 + +train_dir=$(find $corpusdir -ipath "*/conversational/*" -name "training" -type d) || exit 1 +[ -z "$train_dir" ] && "Corpus directory $corpusdir/*/training/ not found!" && exit 1 + +train_rom_dir=$(find $train_dir -name "transcript_roman" -type d) || exit 1 +echo "# include common settings for fullLP systems." +echo ". conf/common.limitedLP || exit 1;" +echo -e "\n" + +echo "#speech corpora files location" +echo "train_data_dir=$train_dir" +echo "train_data_list=$lists/sub-train.list" +echo "train_nj=32" +echo -e "\n" + + +indusid=$(find $corpus -name "IARPA*-build" -type d) +indusid=$(basename ${indusid}) +indusid=${indusid%%-build} +dataset=dev10h +dev10h_dir=$(find $corpusdir -ipath "*/conversational/*" -name "dev" -type d) || exit 1 +indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) +if [ -z "$indusdev10" ] ; then + echo >&2 "IndusDB entry \"$indusid*dev\" not found -- removing the version and retrying" + indusid=${indusid%%-v*} + indusdev10=$(find $indus/ -maxdepth 1 -name "$indusid*dev" -type d) +fi + +if [ -z "$indusdev10" ] ; then + echo "" +else + dev10h_rttm=$(find $indusdev10/ -name "*mitllfa3.rttm" ) + dev10h_ecf=$(find $indusdev10/ -name "*ecf.xml" ) + dev10h_stm=$(find $indusdev10/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indusdev10/ -name "*.kwlist.xml" | sort -V ) + kwlists2=$(find $indusdev10/ -name "*.kwlist?*.xml" | sort -V ) + kwlists="$kwlists1 $kwlists2" +fi + +echo "#Radical reduced DEV corpora files location" +echo "dev2h_data_dir=$dev10h_dir" +echo "dev2h_data_list=$lists/dev.2h.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev2h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev2h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev2h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev2h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev2h_kwlists" +fi +echo "dev2h_nj=16" +echo "dev2h_subset_ecf=true" +echo -e "\n" + +echo "#Official DEV corpora files location" +echo "dev10h_data_dir=$dev10h_dir" +echo "dev10h_data_list=$lists/dev.list" +[ ! -z ${dev10h_rttm:-} ] && echo "dev10h_rttm_file=$dev10h_rttm" +[ ! -z ${dev10h_ecf:-} ] && echo "dev10h_ecf_file=$dev10h_ecf" +[ ! -z ${dev10h_stm:-} ] && echo "dev10h_stm_file=$dev10h_stm" +if [ ! -z "${kwlists:-}" ] ; then + echo "dev10h_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # dev10h_kwlists" +fi +echo "dev10h_nj=32" +echo -e "\n" + +dataset="eval" +eval_dir=$(find $corpus -ipath "*-eval/*/conversational/*" -name "$dataset" -type d) || exit 1 +if [ ! -z "$eval_dir" ] ; then + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + eval_ecf=$(find $indus/ -maxdepth 1 -type f -name "*$indusid*${dataset}.ecf.xml" ) + eval_kwlists1=$(find $indus -name "*$indusid*${dataset}.kwlist*.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*$indusid*${dataset}.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlists1 $kwlists2" + else + eval_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + eval_ecf=$(find $indus_set/ -name "*ecf.xml" ) + eval_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + eval_kwlists1=$(find $indus -name "*.kwlist.xml" | sort -V) + eval_kwlists2=$(find $indus -name "*.kwlist?*.xml" | sort -V) + eval_kwlists="$kwlist1 $kwlist2" + fi + echo "#Official EVAL period evaluation data files" + echo "eval_data_dir=$eval_dir" + echo "eval_data_list=$lists/eval.list" + echo "${dataset}_ecf_file=$eval_ecf" + echo "${dataset}_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # ${dataset}_kwlists" + echo "eval_nj=32" + echo -e "\n" + + dataset=evalpart1 + indus_set=$(find $indus/ -maxdepth 1 -name "$indusid*$dataset" -type d) + if [ -z "$indus_set" ] ; then + echo "" + else + evalpart1_rttm=$(find $indus_set/ -name "*mitllfa3.rttm" ) + evalpart1_ecf=$(find $indus_set/ -name "*ecf.xml" ) + evalpart1_stm=$(find $indus_set/ -name "*stm" -not -name "*cond-speaker*" ) + kwlists1=$(find $indus_set/ -name "*.kwlist.xml" | sort -V) + kwlists2=$(find $indus_set/ -name "*.kwlist?*.xml" | sort -V) + kwlists="$kwlists1 $kwlists2" + fi + echo "#Official post-EVAL period data files" + echo "${dataset}_data_dir=$eval_dir" + echo "${dataset}_data_list=$lists/${dataset}.list" + echo "${dataset}_rttm_file=$evalpart1_rttm" + echo "${dataset}_ecf_file=$evalpart1_ecf" + echo "${dataset}_stm_file=$evalpart1_stm" + echo "${dataset}_kwlists=(" + for list in $kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # ${dataset}_kwlists" + echo "${dataset}_nj=32" + echo -e "\n" + + + dataset=shadow + echo "#Shadow data files" + echo "shadow_data_dir=(" + echo " $dev10h_dir" + echo " $eval_dir" + echo ") # shadow_data_dir" + echo "shadow_data_list=(" + echo " $lists/dev.list" + echo " $lists/eval.lists" + echo ") # shadow_data_dir" + echo "shadow_ecf_file=$dev10h_ecf" + echo "shadow_rttm_file=$dev10h_rttm" + echo "shadow_stm_file=$dev10h_stm" + echo "shadow_kwlists=(" + for list in $eval_kwlists; do + id=$(echo $list | sed 's/.*\(kwlist[0-9]*\)\.xml/\1/'); + echo " [$id]=$list" + done + echo ") # shadow_kwlists" + echo "shadow_nj=32" + echo -e "\n" +fi + +dataset=untranscribed-training +unsup_dir=$(find $corpusdir -ipath "*/conversational/*" -name "$dataset" -type d) || exit 1 +unsup_lists=( $lists/untranscribed-training.list $lists/sub-train.untranscribed.list) +unsup_dirs=( $unsup_dir $train_dir ) +echo "#Unsupervised dataset for LimitedLP condition" +echo "unsup_data_list=(" +for list in ${unsup_lists[*]}; do + [ ! -f $list ] && echo "Unsupervised training set not found $list" + echo " $list"; +done +echo ") # unsup_data_list" + +echo "unsup_data_dir=(" +for dir in ${unsup_dirs[*]}; do + [ ! -d $dir ] && echo "Unsupervised training data dir not found $dir" + echo " $dir"; +done +echo ") # unsup_data_dir" + +echo "unsup_nj=32" +echo -e "\n" + +lexicon=$(find $corpusdir -ipath "*/conversational/*" -name "lexicon.sub-train.txt" -type f) || exit 1 +echo "lexicon_file=$lexicon" + +if [ ! -z "$train_rom_dir" ] ; then + echo "lexiconFlags=\"--romanized --oov \"" +fi +echo -e "\n\n" + + diff --git a/egs/babel/s5d/local/nist_eval/export_systems.sh b/egs/babel/s5d/local/nist_eval/export_systems.sh new file mode 100755 index 00000000000..d0af608416c --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/export_systems.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -e +set -o pipefail + +. ./cmd.sh; . ./path.sh; + + +#( +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* +#) & +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem eval.uem exp/tri6*_nnet*/decode_shadow.uem* +#wait + +( +bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.uem_it* +#bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/sgmm5_mmi_b0.1/decode_*shadow.uem_it* +) & +bash filter_data.sh --cmd "$decode_cmd" data/shadow.uem dev10h.uem exp/tri6*_nnet*/decode_shadow.uem +wait + +wait +exit + +bash make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1 --data data/shadow.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp/tri6b_nnet --data data/shadow.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1 --data data/shadow.uem --master dev10h.uem lang.conf ./release + +bash make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1 --extrasys "NEWJHU" --data data/dev10h.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp/tri6b_nnet --extrasys "NEWJHU" --data data/dev10h.uem --master dev10h.uem lang.conf ./release +bash make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1 --extrasys "NEWJHU" --data data/dev10h.uem --master dev10h.uem lang.conf ./release + + diff --git a/egs/babel/s5d/local/nist_eval/filter_data.sh b/egs/babel/s5d/local/nist_eval/filter_data.sh new file mode 100755 index 00000000000..143102032c2 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/filter_data.sh @@ -0,0 +1,152 @@ +. ./path.sh + +min_lmwt=5 +max_lmwt=25 +cer=0 +nbest=-1 +cmd=run.pl +ntrue_from= +. ./utils/parse_options.sh + +min_lmwt_start=$min_lmwt +max_lmwt_start=$max_lmwt + +datadir=$1; shift +name=$1; shift +. ./lang.conf + +set -e +set -o pipefail + +[ ! -d $datadir/compounds/$name ] && echo "Component called $name does not exist in $datadir/compounds/" && exit 1 +ecf=$datadir/compounds/$name/ecf.xml +cat $ecf | grep -P -o '(?<=audio_filename\=")[^"]*' > $datadir/compounds/$name/files.list +filelist=$datadir/compounds/$name/files.list +[ -f $datadir/compounds/$name/rttm ] && rttm=$datadir/compounds/$name/rttm +[ -f $datadir/compounds/$name/stm ] && stm=$datadir/compounds/$name/stm + +if [ -f $ecf ] ; then + duration=`head -1 $ecf |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2.0;}'` + echo "INFO: Using duration $duration seconds (from ECF)." +else + echo "WARNING: Using default duration. ECF wasn't specified?" + duration=9999 +fi + +inputname=`basename $datadir` +outputname=$name + +while (( "$#" )); do + resultdir=$1;shift + echo "Processing data directory $resultdir" + + [ ! -d $resultdir ] && echo "Decode dir $resultdir does not exist!" && exit 1; + + targetdir=$resultdir/$outputname + + + min_existing= + max_existing= + for lmw in `seq $min_lmwt_start $max_lmwt_start`; do + [ -d $resultdir/score_$lmw ] && [ -z $min_existing ] && min_existing=$lmw + [ -d $resultdir/score_$lmw ] && [ ! -z $min_existing ] && max_existing=$lmw + done + if [ -z $min_existing ] || [ -z $max_existing ] ; then + for lmw in `seq $min_lmwt_start $max_lmwt_start`; do + [ -d $resultdir/kwset_kwlist_$lmw ] && [ -z $min_existing ] && min_existing=$lmw + [ -d $resultdir/kwset_kwlist_$lmw ] && [ ! -z $min_existing ] && max_existing=$lmw + done + fi + [ -z $min_existing ] && echo "Data directories to be scored could not be found!" && exit 1 + [ -z $max_existing ] && echo "Data directories to be scored could not be found!" && exit 1 + min_lmwt=$min_existing + max_lmwt=$max_existing + echo "Found data directories for range LMWT=$min_lmwt:$max_lmwt" + + if [ -d $resultdir/score_${min_lmwt} ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $targetdir/scoring/filter.LMWT.log \ + set -e';' set -o pipefail';' \ + mkdir -p $targetdir/score_LMWT/';'\ + test -f $resultdir/score_LMWT/$inputname.ctm '&&' \ + utils/filter_scp.pl $filelist $resultdir/score_LMWT/$inputname.ctm '>' \ + $targetdir/score_LMWT/$outputname.ctm || exit 1 + + if [ ! -z $stm ] && [ -f $stm ] ; then + echo "For scoring CTMs, this STM is used $stm" + local/score_stm.sh --min-lmwt $min_lmwt --max-lmwt $max_lmwt --cer $cer --cmd "$cmd" $datadir/compounds/$name data/lang $targetdir + else + echo "Not running scoring, $datadir/compounds/$name/stm does not exist" + fi + fi + + + kws_tasks="" + + for kws in $datadir/kwset_*; do + kws=`basename $kws` + echo $kws + kws_tasks+=" $kws" + done + + for kws in $kws_tasks ; do + echo "Processing KWS task: $kws" + mkdir -p $targetdir/$kws + + echo -e "\tFiltering... $kws LMWT=$min_lmwt:$max_lmwt" + + indices_dir=$resultdir/kws_indices + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + kwsoutput=${targetdir}/${kws}_${lmwt} + indices=${indices_dir}_$lmwt + nj=$(cat $indices/num_jobs) + + # This is a memory-efficient way how to do the filtration + # we do this in this way because the result.* files can be fairly big + # and we do not want to run into troubles with memory + files="" + for job in $(seq 1 $nj); do + if [ -f $resultdir/${kws}_${lmwt}/result.${job}.gz ] ; then + files="$files <(gunzip -c $resultdir/${kws}_${lmwt}/result.${job}.gz)" + elif [ -f $resultdir/${kws}_${lmwt}/result.${job} ] ; then + files="$files $resultdir/${kws}_${lmwt}/result.${job} " + else + echo >&2 "The file $resultdir/${$kws}_${lmwt}/result.${job}[.gz] does not exist" + exit 1 + fi + done + # we have to call it using eval as we need the bash to interpret + # the (possible) command substitution in case of gz files + # bash -c would probably work as well, but would spawn another + # shell instance + echo $kwsoutput + echo $datadir/compounds/$name/utterances + mkdir -p $kwsoutput + eval "sort -m -u $files" |\ + int2sym.pl -f 2 $datadir/$kws/utt.map | \ + utils/filter_scp.pl -f 2 $datadir/compounds/$name/utterances |\ + sym2int.pl -f 2 $datadir/$kws/utt.map |\ + local/search/filter_kws_results.pl --likes --nbest $nbest > $kwsoutput/results || exit 1 + done + + ntrue_from_args="" + if [ ! -z "$ntrue_from" ]; then + echo "Using $resultdir/$ntrue_from/$kws for NTRUE" + ntrue_from_args=" --ntrue-from $resultdir/$ntrue_from/$kws" + fi + if [ ! -z $rttm ] ; then + local/search/score.sh --cmd "$cmd" --extraid ${kws##kwset_}\ + --min-lmwt $min_lmwt --max-lmwt $max_lmwt $ntrue_from_args \ + data/lang $datadir/compounds/$name ${targetdir}/${kws} || exit 1; + elif [ ! -z $ntrue_from ] ; then + local/search/normalize.sh --cmd "$cmd" --extraid ${kws##kwset_}\ + --min-lmwt $min_lmwt --max-lmwt $max_lmwt $ntrue_from_args \ + data/lang $datadir/compounds/$name ${targetdir}/${kws} || exit 1; + else + echo >&2 "Cannot score and don't know which compound set to use to inherit the config" + exit 1 + fi + done + +done diff --git a/egs/babel/s5d/local/nist_eval/get_training_times.sh b/egs/babel/s5d/local/nist_eval/get_training_times.sh new file mode 100755 index 00000000000..f5b0012c2f2 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/get_training_times.sh @@ -0,0 +1,229 @@ +if [ -z $1 ] ; then + dir=`pwd` +else + dir=$1 +fi +echo $dir + + +convertsecs() { + h=$(($1/3600)) + m=$((($1/60)%60)) + s=$(($1%60)) + printf "%02d:%02d:%02d\n" $h $m $s +} + +function process { + count=1 + if [ ! -z $1 ]; then + count=$1 + fi + + replace="" + for a in `seq 1 $count` ; do + replace+="\t" + done + + ( + eval `grep "group=all"` + echo -n "threads=$total_threads" + echo -n " cpu_time=$total_cpu_time wall_time=$clock_time" + echo -n " human_cpu_time="`convertsecs $total_cpu_time` + echo -n " human_wall_time="`convertsecs $clock_time` + echo "" + ) | sed 's/^/'$replace'/g' +} + +function legend { + echo -ne '"'"$@"'" ' +} + +legend Parameterization dev/train +local/summarize_logs.pl $dir/exp/make_*/*train*/ | process + +if [ -d $dir/data/local/extend ] ; then + legend "Extending the lexicon" + local/summarize_logs.pl $dir/data/local/extend/tmp/log | process +fi + +legend "Training upto stage tri5" +local/summarize_logs.pl $dir/exp/mono*/log $dir/exp/tri{1..5}/log $dir/exp/tri{1..4}_ali*/log | process + +legend "SGMM2 stage training" +local/summarize_logs.pl $dir/exp/ubm5/log $dir/exp/sgmm5/log $dir/exp/tri5_ali/log | process + +legend "SGMM2+bMMI stage training" +local/summarize_logs.pl $dir/exp/sgmm5_*/log $dir/exp/ubm5/log $dir/exp/sgmm5_denlats/log/* | process + +nnet=tri6_nnet +[ ! -d $dir/exp/$nnet ] && nnet=tri6b_nnet + +legend "DNN stage training GPU" +local/summarize_logs.pl $dir/exp/$nnet/log | process + +legend "BNF stage training" +local/summarize_logs.pl $dir/exp_bnf/tri6_bnf/log | process + +legend "BNF stage training GPU" +local/summarize_logs.pl $dir/exp_bnf/tri{5,6}/log $dir/exp_bnf/sgmm7*/log \ + $dir/exp_bnf/sgmm7_denlats/log/* $dir/exp_bnf/ubm7 | process + +legend "SEGMENTATION TRAINING: " +local/summarize_logs.pl $dir/exp/tri4_train_seg_ali/log \ + $dir/exp/make_plp_pitch/train_seg/ \ + $dir/exp/tri4b_seg/log | process + +semisup=exp_bnf_semisup2 +if [ -d $dir/param_bnf_semisup ] || [ -d $dir/param_bnf_semisup2 ] ; then + [ ! -d $dir/$semisup ] && semisup=exp_bnf_semisup + + decode=unsup.seg + legend "BNF_SEMISUP training, segmentation " + local/summarize_logs.pl $dir/exp/make_seg/$decode/log \ + $dir/exp/make_seg/$decode/make_plp/ \ + $dir/exp/tri4b_seg/decode_${decode}/log \ + $dir/exp/make_plp/$decode | process + + legend "BNF_SEMISUP training, ecode unsup.seg TRI5 " + local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process + legend "BNF_SEMISUP training, ecode unsup.seg PLP " + local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process + legend "BNF_SEMISUP training, ecode unsup.seg DNN " + local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process + legend "BNF_SEMISUP training, data preparation for BNF_SEMISUP " + local/summarize_logs.pl $dir/exp/combine2_post/unsup.seg/log \ + $dir/exp/combine2_post/unsup.seg/decode_unsup.seg/log\ + $dir/exp/tri6_nnet_ali/log | process + + legend "BNF_SEMISUP training, TRAIN BNF_SEMISUP BNF GPU " + local/summarize_logs.pl $dir/$semisup/tri6_bnf/log | process + legend "BNF_SEMISUP training, TRAIN BNF_SEMISUP BNF " + local/summarize_logs.pl $dir/$semisup/tri{5,6}/log $dir/exp_bnf/sgmm7*/log \ + $dir/exp_bnf/sgmm7_denlats/log/* $dir/exp_bnf/ubm7 | process +fi + +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "DNN_MPE stage CPU training" + local/summarize_logs.pl $dir/exp/tri6_nnet_ali/log/ \ + $dir/exp/tri6_nnet_denlats/log/* | process + + legend "DNN_MPE stage GPU training" + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/log/ | process +fi + +#~decode=dev10h.seg +#~legend "DEV10H.SEG decoding" +#~legend "Segmentation: " +#~local/summarize_logs.pl $dir/exp/make_seg/$decode/log \ +#~ $dir/exp/make_seg/$decode/make_plp/ \ +#~ $dir/exp/tri4b_seg/decode_${decode}/log \ +#~ $dir/exp/make_plp/$decode | process +#~legend "Decode $decode TRI5: " +#~local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process +#~legend "Decode $decode PLP: " +#~local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process +#~legend "Decode $decode DNN: " +#~local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process +#~legend "Decode $decode PLP: " +#~local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process + +legend "G2P and confusion matrix: " +local/summarize_logs.pl $dir/exp/conf_matrix/log $dir/exp/g2p/log | process +if [ -d $dir/data/shadow2.uem ]; then + decode=shadow2.uem +else + decode=shadow.uem +fi + +legend "Segmentation $decode: provided..." +echo +#--legend "Segmentation: " +#--local/summarize_logs.pl $dir/exp/make_seg/$decode/log \ +#-- $dir/exp/make_seg/$decode/make_plp/ \ +#-- $dir/exp/tri4b_seg/decode_${decode}/log \ +#-- $dir/exp/make_plp/$decode | process +legend "Parametrization: " +local/summarize_logs.pl $dir/exp/make_plp/$decode | process +legend "Decode $decode TRI5: " +local/summarize_logs.pl $dir/exp/tri5/decode_*${decode}*/log | process +legend "Decode $decode PLP: " +local/summarize_logs.pl $dir/exp/{sgmm5,sgmm5_mmi_b0.1}/decode_*${decode}*/log | process +legend "Decode $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/log | process +legend "Decode $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/{tri6,sgmm7,sgmm7_mmi_b0.1}/decode_*${decode}*/log | process +if [ -d $dir/$semisup ] ; then + legend "Decode $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/{tri6,sgmm7,sgmm7_mmi_b0.1}/decode_*${decode}*/log | process +fi +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Decode $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/log | process +fi + +legend "Indexing $decode PLP: " +local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process +legend "Indexing $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/kws_indices*/log | process +legend "Indexing $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process +if [ -d $dir/$semisup ] ; then + legend "Indexing $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/kws_indices*/log | process +fi +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Indexing $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/kws_indices*/log | process +fi + +legend "Search $decode PLP: " +local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_kws \ + $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_kws_*/log | process +legend "Search $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/evalKW_kws \ + $dir/exp/$nnet/decode_*${decode}*/evalKW_kws_*/log | process +legend "Search $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws \ + $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws_*/log | process +if [ -d $dir/$semisup ] ; then + legend "Search $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws/ \ + $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_kws*/log | process +fi +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Search $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_kws \ + $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_kws*/log | process +fi + +legend "Proxies generation: " +local/summarize_logs.pl $dir/data/$decode/evalKW_oov_kws/g2p/log \ + $dir/data/$decode/evalKW_oov_kws/tmp/split/log | process +legend "Search $decode PLP: " +local/summarize_logs.pl $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws \ + $dir/exp/sgmm5_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws_*/log | process +legend "Search $decode DNN: " +local/summarize_logs.pl $dir/exp/$nnet/decode_*${decode}*/evalKW_oov_kws \ + $dir/exp/$nnet/decode_*${decode}*/evalKW_oov_kws_*/log | process +legend "Search $decode BNF: " +local/summarize_logs.pl $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws \ + $dir/exp_bnf/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws_*/log | process + +if [ -d $dir/$semisup ] ; then + legend "Search $decode BNF_SEMISUP: " + local/summarize_logs.pl $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws/ \ + $dir/$semisup/sgmm7_mmi_b0.1/decode_*${decode}*/evalKW_oov_kws*/log | process +fi + + +if [ -d $dir/exp/tri6_nnet_mpe ] ; then + legend "Search $decode DNN_MPE: " + local/summarize_logs.pl $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_oov_kws \ + $dir/exp/tri6_nnet_mpe/decode_${decode}_epoch*/evalKW_oov_kws*/log | process +fi + + + + + + diff --git a/egs/babel/s5d/local/nist_eval/make_release.sh b/egs/babel/s5d/local/nist_eval/make_release.sh new file mode 100755 index 00000000000..aff89f92846 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/make_release.sh @@ -0,0 +1,356 @@ +#!/bin/bash + +team=RADICAL +corpusid= +partition= +scase=BaEval #BaDev|BaEval +master= +version=1 +sysid= +prim=c +cer=0 +dryrun=true +dir="exp/sgmm5_mmi_b0.1/" +data=data/dev10h.seg +master=dev10h +extrasys= +final=false + +#end of configuration + + +echo $0 " " "$@" + +[ -f ./cmd.sh ] && . ./cmd.sh +[ -f ./path.sh ] && . ./path.sh +. ./utils/parse_options.sh + +. $1 +outputdir=$2 + +set -e +set -o pipefail + +function submit_to_google { + SYSPATH=$1 + #curl 'https://docs.google.com/forms/d/1MV4gf-iVOX79ZEAekEiLIo7L_UVrJnoPjdtICK5F-nc/formResponse' \ + # --data 'entry.1721972547='$MTWV'&entry.485509816='$ATWV'&entry.694031153='$RESPATH'&entry.1851048707='$(whoami)'&submit=Submit' \ + # --compressed + curl -sS 'https://docs.google.com/forms/d/1MV4gf-iVOX79ZEAekEiLIo7L_UVrJnoPjdtICK5F-nc/formResponse' \ + --data 'entry.1721972547='$MTWV'&entry.485509816='$ATWV'&entry.694031153='$SYSPATH'&entry.1851048707='$(whoami)'&entry.880350279='$STWV'&entry.60995624='$OTWV'&entry.1338769660='$LatticeRecall'&entry.1333349334='$THRESHOLD'&entry.1423358838='$(pwd)'&submit=Submit' --compressed |\ + grep --color "Your response has been recorded." || return 1 + return 0 +} + +function export_file { + #set -x + source_file=$1 + target_file=$2 + if [ ! -f $source_file ] ; then + echo "The file $source_file does not exist!" + exit 1 + else + if [ ! -f $target_file ] ; then + if ! $dryrun ; then + ln -s `readlink -f $source_file` $target_file || exit 1 + ls -al $target_file + else + echo "$source_file -> $target_file" + fi + + else + echo "The file is already there, not doing anything. Either change the version (using --version), or delete that file manually)" + exit 1 + fi + fi + #set +x + return 0 +} + +function export_kws_file { + source_xml=$1 + fixed_xml=$2 + kwlist=$3 + export_xml=$4 + + echo "Exporting KWS $source_xml as `basename $export_xml`" + if [ -f $source_xml ] ; then + cp $source_xml $fixed_xml.bak + fdate=`stat --printf='%y' $source_xml` + echo "The source file $source_xml has timestamp of $fdate" + echo "Authorizing empty terms from `basename $kwlist`..." + if ! $dryrun ; then + local/fix_kwslist.pl $kwlist $source_xml $fixed_xml || exit 1 + else + fixed_xml=$source_xml + fi + echo "Exporting...export_file $fixed_xml $export_xml " + export_file $fixed_xml $export_xml || exit 1 + else + echo "The file $source_xml does not exist. Exiting..." + exit 1 + fi + echo "Export done successfully..." + return 0 +} + +function find_best_kws_result { + local dir=$1 + local mask=$2 + local record=`(find $dir -name "sum.txt" -path "$mask" -not -ipath "*rescored*" | xargs grep "^| *Occ") | cut -f 1,13,17 -d '|' | sed 's/|//g' | column -t | sort -r -n -k 3 | head -n 1` + echo $record >&2 + local file=`echo $record | awk -F ":" '{print $1}'` + #echo $file >&2 + local path=`dirname $file` + #echo $path >&2 + echo $path +} + +function find_best_stt_result { + local dir=$1 + local mask=$2 + local record=`(find $dir -name "*.ctm.sys" -path "$mask" -not -ipath "*rescore*" | xargs grep Avg) | sed 's/|//g' | column -t | sort -n -k 9 | head -n 1` + + echo $record >&2 + local file=`echo $record | awk -F ":" '{print $1}'` + #echo $file >&2 + local path=`dirname $file` + #echo $path >&2 + echo $path +} + +function create_sysid { + local best_one=$1 + local sysid= + local taskid=`basename $best_one` + local system_path=`dirname $best_one` + if [[ $system_path =~ .*sgmm5.* ]] ; then + sysid=PLP + elif [[ $system_path =~ .*nnet.* ]] ; then + sysid=DNN + elif [[ $system_path =~ .*sgmm7.* ]] ; then + sysid=BNF + elif [[ $system_path =~ .*4way.* ]] ; then + sysid=4way-comb + else + echo "Unknown system path ($system_path), cannot deduce the systemID" >&2 + exit 1 + fi + if [[ $taskid == *kws_* ]] ; then + local kwsid=${taskid//kws_*/} + kwsid=${kwsid//_/} + if [ -z $kwsid ]; then + echo ${sysid} + else + echo ${sysid}-$kwsid + fi + else + echo ${sysid} + fi +} + + +function get_ecf_name { + local best_one=$1 + local taskid=`basename $best_one` + local kwstask=${taskid//kws_*/kws} + local kwlist= + #echo $kwstask + if [ -z $kwstask ] ; then + #echo $data/kws/kwlist.xml + kwlist= `readlink -f $data/kws/kwlist.xml` + else + #echo $data/$kwstask/kwlist.xml + kwlist=`readlink -f $data/$kwstask/kwlist.xml` + fi + ecf=`head -n 1 $kwlist | grep -Po "(?<=ecf_filename=\")[^\"]*"` + echo -e "\tFound ECF: $ecf" >&2 + echo $ecf + return 0 +} + +function compose_expid { + local task=$1 + local best_one=$2 + local extraid=$3 + echo "TASK: $task" >&2 + echo "BEST ONE: $best_one" >&2 + echo "EXTRA ID: $extraid" >&2 + + [ ! -z $extraid ] && extraid="-$extraid" + local sysid=`create_sysid $best_one` + echo "SYS ID: $sysid" >&2 + if [ "$task" == "KWS" ]; then + ext="kwslist.xml" + elif [ "$task" == "STT" ]; then + ext="ctm" + else + echo "Incorrect task ID ($task) given to compose_expid function!" >&2 + exit 1 + fi + echo "${corpusid}" >&2 + echo "${partition}" >&2 + echo "${scase}" >&2 + echo "KWS14_${team}_${corpusid}_${partition}_${scase}_${task}_${prim}-${sysid}${extraid}_$version.$ext" + return 0 +} + +function figure_out_scase { + local ecf=`basename $1` + if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then + local basnam=${ecf%%.ecf.xml} + local scase=`echo $basnam | awk -F _ '{print $2}'` + + if [[ $scase =~ conv-dev(\..*)? ]]; then + echo "BaDev" + elif [[ $scase =~ conv-eval(\..*)? ]]; then + echo "BaEval" + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "WARNING: Does not contain conv-dev|conv-eval ($scase)" >&2 + echo "BaDev" + return 1 + fi + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "WARNING: Does not match the mask IARPA-babel.*.ecf.xml" >&2 + echo "BaDev" + return 1 + fi + return 0 +} + +function figure_out_partition { + local ecf=`basename $1` + if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then + local basnam=${ecf%%.ecf.xml} + local scase=`echo $basnam | awk -F _ '{print $2}'` + + if [[ $scase =~ conv-dev(\..*)? ]]; then + echo "conv-dev" + elif [[ $scase =~ conv-eval(\..*)? ]]; then + echo "conv-eval" + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "conv-dev" + return 1 + fi + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + echo "conv-dev" + return 1 + fi + return 0 +} + +function figure_out_corpusid { + local ecf=`basename $1` + if [[ $ecf =~ IARPA-babel.*.ecf.xml ]] ; then + local basnam=${ecf%%.ecf.xml} + local corpusid=`echo $basnam | awk -F _ '{print $1}'` + else + echo "WARNING: The ECF file $ecf is probably not an official file" >&2 + local corpusid=${ecf%%.*} + fi + echo $corpusid +} + +mkdir -p $outputdir +extrasys_unnorm="unnorm" +if [ ! -z $extrasys ] ; then + extrasys_unnorm="${extrasys}-unnorm" +fi + +#data=data/shadow.uem +dirid=`basename $data` +kws_tasks="kws " +[ -f $data/extra_kws_tasks ] && kws_tasks+=`cat $data/extra_kws_tasks | awk '{print $1"_kws"}'` +[ -d $data/compounds ] && compounds=`ls $data/compounds` + +if [ -z "$compounds" ] ; then + for kws in $kws_tasks ; do + echo $kws + best_one=`find_best_kws_result "$dir/decode_*${dirid}*/${kws}_*" "*"` + sysid=`create_sysid $best_one` + ecf=`get_ecf_name $best_one` + scase=`figure_out_scase $ecf` || break + partition=`figure_out_partition $ecf` || break + corpusid=`figure_out_corpusid $ecf` + + expid=`compose_expid KWS $best_one "$extrasys"` + echo -e "\tEXPORT NORMALIZED as: $expid" + expid_unnormalized=`compose_expid KWS $best_one "$extrasys_unnorm"` + echo -e "\tEXPORT UNNORMALIZED as: $expid_unnormalized" + + export_kws_file $best_one/kwslist.xml $best_one/kwslist.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid + export_kws_file $best_one/kwslist.unnormalized.xml $best_one/kwslist.unnormalized.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid_unnormalized + done +else + [ -z $master ] && echo "You must choose the master compound (--master ) for compound data set" && exit 1 + for kws in $kws_tasks ; do + echo $kws + best_one=`find_best_kws_result "$dir/decode_*${dirid}*/$master/${kws}_*" "*"` + ( + eval "`cat $best_one/metrics.txt | sed 's/ *= */=/g' | sed 's/,/;/g' | sed 's/Lattice Recall/LatticeRecall/g' `" + submit_to_google $best_one $ATWV $MTWV + ) || echo "Submission failed!" + + + for compound in $compounds ; do + compound_best_one=`echo $best_one | sed "s:$master/${kws}_:$compound/${kws}_:g"` + echo "From ($kws) $best_one going to $compound_best_one" + echo -e "\tPREPARE EXPORT: $compound_best_one" + sysid=`create_sysid $compound_best_one` + #ecf=`get_ecf_name $best_one` + ecf=`readlink -f $data/compounds/$compound/ecf.xml` + scase=`figure_out_scase $ecf` + partition=`figure_out_partition $ecf` + corpusid=`figure_out_corpusid $ecf` + expid=`compose_expid KWS $compound_best_one "$extrasys"` + echo -e "\tEXPORT NORMALIZED as: $expid" + expid_unnormalized=`compose_expid KWS $compound_best_one "$extrasys_unnorm"` + echo -e "\tEXPORT UNNORMALIZED as: $expid_unnormalized" + + export_kws_file $compound_best_one/kwslist.xml $compound_best_one/kwslist.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid + export_kws_file $compound_best_one/kwslist.unnormalized.xml $compound_best_one/kwslist.unnormalized.fixed.xml $data/$kws/kwlist.xml $outputdir/$expid_unnormalized + done + done +fi + +##EXporting STT -- more straightforward, because there is only one task +if [ -z "$compounds" ] ; then + #best_one=`find_best_stt_result "$dir/decode_*${dirid}*/score_*" "*"` + best_one=`find_best_stt_result "$dir/*${dirid}*/score_*" "*"` + echo -e "\tERROR: I don't know how to do this, yet" + ecf=`get_ecf_name kws` + sysid=`create_sysid $best_one` + scase=`figure_out_scase $ecf` || break + partition=`figure_out_partition $ecf` + corpusid=`figure_out_corpusid $ecf` + expid=`compose_expid STT $best_one "$extrasys"` + echo -e "\tEXPORT NORMALIZED as: $expid" + export_file $best_one/${dirid}.ctm $outputdir/$expid +else + [ -z $master ] && echo "You must choose the master compound (--master ) for compound data set" && exit 1 + #best_one=`find_best_stt_result "$dir/decode_*${dirid}*/$master/score_*" "*"` + best_one=`find_best_stt_result "$dir/*${dirid}*/$master/score_*" "*"` + + for compound in $compounds ; do + compound_best_one=`echo $best_one | sed "s:$master/score_:$compound/score_:g"` + echo -e "\tPREPARE EXPORT: $compound_best_one" + sysid=`create_sysid $compound_best_one` + #ecf=`get_ecf_name $best_one` + ecf=`readlink -f $data/compounds/$compound/ecf.xml` + scase=`figure_out_scase $ecf` + partition=`figure_out_partition $ecf` + corpusid=`figure_out_corpusid $ecf` + expid=`compose_expid STT $compound_best_one $extrasys` + echo -e "\tEXPORT NORMALIZED as: $expid" + + export_file $compound_best_one/${compound}.ctm $outputdir/$expid + done +fi + +echo "Everything looks fine, good luck!" +exit 0 + diff --git a/egs/babel/s5d/local/nist_eval/split_compound_set.sh b/egs/babel/s5d/local/nist_eval/split_compound_set.sh new file mode 100755 index 00000000000..59ea4c162d7 --- /dev/null +++ b/egs/babel/s5d/local/nist_eval/split_compound_set.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh; + +devset=dev10h.pem +evalset=eval.seg +cmd="$decode_cmd" + + +rootdir=exp/nnet3/lstm_bidirectional_sp/decode_shadow.seg +combinedir=exp/combine/lstm_bidirectional_sp/shadow.seg + +[ ! -d data/shadow.seg/compounds/$devset ] && \ + echo >&2 "data/shadow.seg/compounds/$devset does not exist!" && exit 1 +[ ! -d data/shadow.seg/compounds/$evalset ] && \ + echo >&2 "data/shadow.seg/compounds/$evalset does not exist!" && exit 1 + +for decode in $rootdir/{,phones,syllabs}; do + [ ! -d $decode ] && \ + echo >&2 "$decode does not exist!" && exit 1 + local/nist_eval/filter_data.sh \ + data/shadow.seg ${devset} $decode + local/nist_eval/filter_data.sh --ntrue-from ${devset} \ + data/shadow.seg ${evalset} $decode +done + + + +for kwset in data/shadow.seg/compounds/$devset/kwset_* ; do + kwsetdir=$(basename $kwset) + kwsetid=${kwsetdir#*_} + + echo "Processing kwset id=$kwsetid" + local/search/combine.sh --extraid "$kwsetid" --cmd "$cmd" \ + data/shadow.seg/compounds/${devset}/ data/langp_test \ + $rootdir/{,syllabs/,phones/}${devset}/${kwsetdir} $combinedir/${devset} + + local/search/combine_special.sh --extraid "$kwsetid" --cmd "$cmd" \ + data/shadow.seg/compounds/${evalset}/ data/langp_test \ + $combinedir/${devset}/${kwsetdir}/ \ + $rootdir/{,syllabs/,phones/}${evalset}/${kwsetdir} $combinedir/${evalset} +done + + + + diff --git a/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh b/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh new file mode 100755 index 00000000000..3b12222e13a --- /dev/null +++ b/egs/babel/s5d/local/nnet2/get_egs_semi_supervised.sh @@ -0,0 +1,374 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# Copyright 2014 Vimal Manohar +# Apache 2.0. + +# This script, which will generally be called from other neural-net training +# scripts, extracts the training examples used to train the neural net (and also +# the validation examples used for diagnostics), and puts them in separate archives. +# This is similar to the script steps/nnet2/get_egs.sh, but this also extracts +# frames from unsupervsied data. Decode directory for unsupervised data which +# has the best path done along with posteriors (can be done using local/combine_posteriors.sh) + +set -o pipefail + +# Begin configuration section. +cmd=run.pl +feat_type= +num_utts_subset=300 # number of utterances in validation and training + # subsets used for shrinkage and diagnostics +num_valid_frames_combine=0 # #valid frames for combination weights at the very end. +num_train_frames_combine=10000 # # train frames for the above. +num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs +samples_per_iter=400000 # each iteration of training, see this many samples + # per job. This is just a guideline; it will pick a number + # that divides the number of samples in the entire data. +transform_dir_sup= # If supplied, overrides alidir +transform_dir_unsup= +num_jobs_nnet=16 # Number of neural net jobs to run in parallel +stage=-10 +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. +splice_width=4 # meaning +- 4 frames on each side for second LDA +spk_vecs_dir_sup= +spk_vecs_dir_unsup= +random_copy=false +weight_threshold=0.7 # Threshold on confidence factor of an unsupervised data + # frame for it to not be ignored +supervised_copies=3 # Make x copies of supervised data. +use_frame_selection=true +use_frame_weights=false # TODO: Not coded + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + + +if [ $# != 6 ]; then + echo "Usage: steps/nnet2/get_egs_semi_supervised.sh [opts] " + echo " e.g.: steps/nnet2/get_egs_semi_supervised.sh data/train data/train_unt data/lang exp/tri3_ali exp/tri3/dev_unt exp/tri4_nnet" + echo "" + echo "Main options (for others, see top of script file)" + echo " --config # config file containing options" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --num-jobs-nnet # Number of parallel jobs to use for main neural net" + echo " # training (will affect results as well as speed; try 8, 16)" + echo " # Note: if you increase this, you may want to also increase" + echo " # the learning rate." + echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" + echo " # process." + echo " --splice-width # Number of frames on each side to append for feature input" + echo " # (note: we splice processed, typically 40-dimensional frames" + echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics" + echo " --num-valid-frames-combine <#frames|10000> # Number of frames used in getting combination weights at the" + echo " # very end." + echo " --stage # Used to run a partially-completed training process from somewhere in" + echo " # the middle." + echo " --weight-threshold # Threshold on the confidence factor below which the " + echo " # a frame of unsupervised training data is ignored." + echo " --supervised-copies <#copies|3> # Make copies of supervised data" + echo " --transform-dir-sup # Directory with transforms for supervised training data" + echo " --transform-dir-unsup # Directory with transforms for unsupervised training data" + + exit 1; +fi + +data_sup=$1 +data_unsup=$2 +lang=$3 +alidir=$4 +latdir=$5 +dir=$6 + +# Check some files. +for f in $data_sup/feats.scp $data_unsup/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree $latdir/best_path_ali.1.gz $latdir/weights.1.gz $latdir/../final.mdl $latdir/../tree; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1; +done + +# Set some variables. +oov=`cat $lang/oov.int` +num_leaves=`tree-info $alidir/tree 2>/dev/null | awk '{print $2}'` || exit 1; +silphonelist=`cat $lang/phones/silence.csl` || exit 1; +[ -z $num_leaves ] && echo "\$num_leaves is unset" && exit 1 +[ "$num_leaves" -eq "0" ] && echo "\$num_leaves is 0" && exit 1 + +nj_sup=`cat $alidir/num_jobs` || exit 1; # number of jobs in alignment dir... +nj_unsup=`cat $latdir/num_jobs` || exit 1; # number of jobs in decode dir +# in this dir we'll have just one job. +sdata_sup=$data_sup/split$nj_sup +sdata_unsup=$data_unsup/split$nj_unsup +utils/split_data.sh $data_sup $nj_sup +utils/split_data.sh $data_unsup $nj_unsup + +mkdir -p $dir/log +echo $nj_sup > $dir/num_jobs_sup +echo $nj_unsup > $dir/num_jobs_unsup + +cp $alidir/tree $dir + +awk '{print $1}' $data_sup/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset > $dir/valid_uttlist + +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +if [ -f $data_sup/utt2uniq ]; then + echo "File $data_sup/utt2uniq exists, so augmenting valid_uttlist to" + echo "include all perturbed versions of the same 'real' utterances." + mv $dir/valid_uttlist $dir/valid_uttlist.tmp + utils/utt2spk_to_spk2utt.pl $data_sup/utt2uniq > $dir/uniq2utt + cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data_sup/utt2uniq | \ + sort | uniq | utils/apply_map.pl $dir/uniq2utt | \ + awk '{for(n=1;n<=NF;n++) print $n;}' | sort > $dir/valid_uttlist + rm $dir/uniq2utt $dir/valid_uttlist.tmp +fi + +# TODO (Vimal 22-Jan-14): Might need to deal unsupervised data separately +awk '{print $1}' $data_sup/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \ + head -$num_utts_subset > $dir/train_subset_uttlist + +[ -z "$transform_dir_sup" ] && transform_dir_sup=$alidir +[ -z "$transform_dir_unsup" ] && transform_dir_unsup=$latdir +norm_vars=`cat $alidir/norm_vars 2>/dev/null` || norm_vars=false # cmn/cmvn option, default false. +norm_vars_unsup=`cat $latdir/norm_vars 2>/dev/null` || norm_vars_unsup=false + +if [ "$norm_vars" != "$norm_vars_unsup" ]; then + echo "ERROR: Features mismatch for supervised and unsupervised data!" + echo "Variance normalization $norm_vars for supervised data vs $norm_vars_unsup for unsupervised data" + exit 1 +fi +cp $alidir/norm_vars $dir 2>/dev/null + +## Set up features. +if [ -z $feat_type ]; then + if [ -f $alidir/final.mat ] && [ ! -f $transform_dir_sup/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi +fi + +echo "$0: feature type is $feat_type" + +case $feat_type in + raw) feats_sup="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata_sup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_sup/JOB/utt2spk scp:$sdata_sup/JOB/cmvn.scp scp:- ark:- |" + feats_unsup="ark,s,cs:cat $sdata_unsup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_unsup/JOB/utt2spk scp:$sdata_unsup/JOB/cmvn.scp scp:- ark:- |" + valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" + train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- |" + ;; + lda) + splice_opts=`cat $alidir/splice_opts 2>/dev/null` + #splice_opts_unsup=`cat $latdir/../splice_opts 2>/dev/null` + #if [ "$splice_opts" -ne "$splice_opts_unsup" ]; then + # echo "ERROR: Features mismatch for supervised and unsupervised data!" + # echo "Splice options $splice_opts for supervised data vs $splice_opts_unsup for unsupervised data" + # exit 1 + #fi + cp $alidir/splice_opts $dir/splice_opts 2>/dev/null + + #if [ "`diff $alidir/final.mat $latdir/../final.mat &> /dev/null; echo $?`" -ne "0" ]; then + # echo "ERROR: Features mismatch for supervised and unsupervised data!" + # echo "LDA matrices $alidir/final.mat for supervised data and $latdir/../final.mat for unsupervised data don't match" + # exit 1 + #fi + + cp $alidir/final.mat $dir + feats_sup="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata_sup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_sup/JOB/utt2spk scp:$sdata_sup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + feats_unsup="ark,s,cs:cat $sdata_unsup/JOB/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata_unsup/JOB/utt2spk scp:$sdata_unsup/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data_sup/feats.scp | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data_sup/utt2spk scp:$data_sup/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" + ;; + *) echo "$0: invalid feature type $feat_type" && exit 1; +esac + +if [ -f $transform_dir_sup/trans.1 ] && [ $feat_type != "raw" ]; then + echo "$0: using transforms from $transform_dir_sup for supervised data" + feats_sup="$feats_sup transform-feats --utt2spk=ark:$sdata_sup/JOB/utt2spk ark:$transform_dir_sup/trans.JOB ark:- ark:- |" + valid_feats="$valid_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/trans.*|' ark:- ark:- |" + train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/trans.*|' ark:- ark:- |" +fi +if [ -f $transform_dir_sup/raw_trans.1 ] && [ $feat_type == "raw" ]; then + echo "$0: using raw-fMLLR transforms from $transform_dir_sup" + feats_sup="$feats_sup transform-feats --utt2spk=ark:$sdata_sup/JOB/utt2spk ark:$transform_dir_sup/raw_trans.JOB ark:- ark:- |" + valid_feats="$valid_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/raw_trans.*|' ark:- ark:- |" + train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data_sup/utt2spk 'ark:cat $transform_dir_sup/raw_trans.*|' ark:- ark:- |" +fi + +if [ -f $transform_dir_unsup/trans.1 ] && [ $feat_type != "raw" ]; then + echo "$0: using transforms from $transform_dir_unsup for supervised data" + feats_unsup="$feats_unsup transform-feats --utt2spk=ark:$sdata_unsup/JOB/utt2spk ark:$transform_dir_unsup/trans.JOB ark:- ark:- |" +fi +if [ -f $transform_dir_unsup/raw_trans.1 ] && [ $feat_type == "raw" ]; then + echo "$0: using raw-fMLLR transforms from $transform_dir_unsup" + feats_unsup="$feats_unsup transform-feats --utt2spk=ark:$sdata_unsup/JOB/utt2spk ark:$transform_dir_unsup/raw_trans.JOB ark:- ark:- |" +fi + +if [ $stage -le 0 ]; then + echo "$0: working out number of frames of training data" + num_sup_frames=`feat-to-len scp:$data_sup/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1; + num_unsup_frames=`feat-to-len scp:$data_unsup/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1; + num_frames=$(perl -e "print STDOUT ($num_sup_frames * $supervised_copies + $num_unsup_frames)") + echo $num_frames > $dir/num_frames +else + num_frames=`cat $dir/num_frames` || exit 1; +fi + +# Working out number of iterations per epoch. +iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1; +[ $iters_per_epoch -eq 0 ] && iters_per_epoch=1 +samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)] +echo "$0: Every epoch, splitting the data up into $iters_per_epoch iterations," +echo "$0: giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)." + +# Making soft links to storage directories. +for x in `seq 1 $num_jobs_nnet`; do + for y in `seq 0 $[$iters_per_epoch-1]`; do + utils/create_data_link.pl $dir/egs/egs.$x.$y.ark + utils/create_data_link.pl $dir/egs/egs_tmp.$x.$y.ark + done + for y in `seq 1 $nj_sup`; do + utils/create_data_link.pl $dir/egs/egs_orig.$x.$y.ark + done +done + +nnet_context_opts="--left-context=$splice_width --right-context=$splice_width" +mkdir -p $dir/egs + +if [ ! -z $spk_vecs_dir_sup ]; then + [ ! -f $spk_vecs_dir_sup/vecs.1 ] && echo "No such file $spk_vecs_dir_sup/vecs.1" && exit 1; + spk_vecs_opt_sup=("--spk-vecs=ark:cat $spk_vecs_dir_sup/vecs.*|" "--utt2spk=ark:$data_sup/utt2spk") +else + spk_vecs_opt_sup=() +fi + +if [ ! -z $spk_vecs_dir_unsup ]; then + [ ! -f $spk_vecs_dir_unsup/vecs.1 ] && echo "No such file $spk_vecs_dir_unsup/vecs.1" && exit 1; + spk_vecs_opt_unsup=("--spk-vecs=ark:cat $spk_vecs_dir_unsup/vecs.*|" "--utt2spk=ark:$data_unsup/utt2spk") +else + spk_vecs_opt_unsup=() +fi + +if [ $stage -le 2 ]; then + echo "Getting validation and training subset examples." + rm $dir/.error 2>/dev/null + $cmd $dir/log/create_valid_subset.log \ + nnet-get-egs $nnet_context_opts "${spk_vecs_opt_sup[@]}" "$valid_feats" \ + "ark,cs:gunzip -c $alidir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \ + "ark:$dir/egs/valid_all.egs" || touch $dir/.error & + $cmd $dir/log/create_train_subset.log \ + nnet-get-egs $nnet_context_opts "${spk_vecs_opt_sup[@]}" "$train_subset_feats" \ + "ark,cs:gunzip -c $alidir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \ + "ark:$dir/egs/train_subset_all.egs" || touch $dir/.error & + wait; + [ -f $dir/.error ] && exit 1; + echo "Getting subsets of validation examples for diagnostics and combination." + $cmd $dir/log/create_valid_subset_combine.log \ + nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/egs/valid_all.egs \ + ark:$dir/egs/valid_combine.egs || touch $dir/.error & + $cmd $dir/log/create_valid_subset_diagnostic.log \ + nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/egs/valid_all.egs \ + ark:$dir/egs/valid_diagnostic.egs || touch $dir/.error & + + $cmd $dir/log/create_train_subset_combine.log \ + nnet-subset-egs --n=$num_train_frames_combine ark:$dir/egs/train_subset_all.egs \ + ark:$dir/egs/train_combine.egs || touch $dir/.error & + $cmd $dir/log/create_train_subset_diagnostic.log \ + nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/egs/train_subset_all.egs \ + ark:$dir/egs/train_diagnostic.egs || touch $dir/.error & + wait + cat $dir/egs/valid_combine.egs $dir/egs/train_combine.egs > $dir/egs/combine.egs + + for f in $dir/egs/{combine,train_diagnostic,valid_diagnostic}.egs; do + [ ! -s $f ] && echo "No examples in file $f" && exit 1; + done + rm $dir/egs/valid_all.egs $dir/egs/train_subset_all.egs $dir/egs/{train,valid}_combine.egs +fi + +if [ $stage -le 3 ]; then + mkdir -p $dir/temp + + # Other scripts might need to know the following info: + echo $num_jobs_nnet >$dir/egs/num_jobs_nnet + echo $iters_per_epoch >$dir/egs/iters_per_epoch + echo $samples_per_iter_real >$dir/egs/samples_per_iter + + echo "Creating training examples"; + # in $dir/egs, create $num_jobs_nnet separate files with training examples. + # The order is not randomized at this point. + + echo "Generating training examples on disk" + # The examples will go round-robin to egs_list. + + egs_list= + for n in `seq 1 $num_jobs_nnet`; do + egs_list="$egs_list ark:$dir/egs/egs_orig.$n.JOB.ark" + done + + $cmd $io_opts JOB=1:$nj_unsup $dir/log/get_weighted_egs.JOB.log \ + nnet-get-weighted-egs $nnet_context_opts "${spk_vecs_opt_unsup[@]}" \ + --weight-threshold=$weight_threshold --use-frame-weights=$use_frame_weights \ + --use-frame-selection=$use_frame_selection "$feats_unsup" \ + "ark,s,cs:gunzip -c $latdir/best_path_ali.JOB.gz | convert-ali $latdir/../final.mdl $alidir/final.mdl $dir/tree ark:- ark:- | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \ + "ark,s,cs:gunzip -c $latdir/weights.JOB.gz |" ark:- \| \ + nnet-copy-egs ark:- $egs_list || exit 1; + + for (( i=0; i $dir/egs/egs_tmp.$n.0.ark || exit 1; + rm $dir/egs/egs_orig.$n.*.ark # don't "|| exit 1", due to NFS bugs... + done + else # We'll have to split it up using nnet-copy-egs. + egs_list= + for n in `seq 0 $[$iters_per_epoch-1]`; do + egs_list="$egs_list ark:$dir/egs/egs_tmp.JOB.$n.ark" + done + # note, the "|| true" below is a workaround for NFS bugs + # we encountered running this script with Debian-7, NFS-v4. + $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/split_egs.JOB.log \ + nnet-copy-egs --random=$random_copy --srand=JOB \ + "ark:cat $dir/egs/egs_orig.JOB.*.ark|" $egs_list '&&' \ + '(' rm $dir/egs/egs_orig.JOB.*.ark '||' true ')' || exit 1; + fi +fi + +if [ $stage -le 5 ]; then + # Next, shuffle the order of the examples in each of those files. + # Each one should not be too large, so we can do this in memory. + echo "Shuffling the order of training examples" + echo "(in order to avoid stressing the disk, these won't all run at once)." + + + # note, the "|| true" below is a workaround for NFS bugs + # we encountered running this script with Debian-7, NFS-v4. + for n in `seq 0 $[$iters_per_epoch-1]`; do + $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \ + nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \ + ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark '&&' \ + '(' rm $dir/egs/egs_tmp.JOB.$n.ark '||' true ')' || exit 1; + done +fi + +echo "$0: Finished preparing training examples" diff --git a/egs/babel/s5d/local/nnet3/run_blstm.sh b/egs/babel/s5d/local/nnet3/run_blstm.sh new file mode 100755 index 00000000000..6833baa0d72 --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_blstm.sh @@ -0,0 +1,29 @@ + +stage=0 +train_stage=-10 +cell_dim=512 +rp_dim=128 +nrp_dim=128 +affix=bidirectional +multicondition=true +common_egs_dir= +num_epochs=8 + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +local/nnet3/run_lstm.sh --affix $affix \ + --stage $stage \ + --train-stage $train_stage \ + --num-epochs $num_epochs \ + --lstm-delay " [-1,1] [-2,2] [-3,3] " \ + --label-delay 0 \ + --cell-dim $cell_dim \ + --recurrent-projection-dim $rp_dim \ + --non-recurrent-projection-dim $nrp_dim \ + --common-egs-dir "$common_egs_dir" \ + --multicondition $multicondition \ + --chunk-left-context 40 \ + --chunk-right-context 40 diff --git a/egs/babel/s5d/local/nnet3/run_blstm_realigned.sh b/egs/babel/s5d/local/nnet3/run_blstm_realigned.sh new file mode 100755 index 00000000000..05c9a057512 --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_blstm_realigned.sh @@ -0,0 +1,32 @@ +stage=0 +train_stage=-10 +cell_dim=512 +rp_dim=128 +nrp_dim=128 +affix=bidirectional +multicondition=false +common_egs_dir= +num_epochs=8 +align_model_dir=exp/nnet3/tdnn_sp +extra_align_opts= + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +local/nnet3/run_lstm_realigned.sh --affix $affix \ + --stage $stage \ + --train-stage $train_stage \ + --num-epochs $num_epochs \ + --lstm-delay " [-1,1] [-2,2] [-3,3] " \ + --label-delay 0 \ + --cell-dim $cell_dim \ + --recurrent-projection-dim $rp_dim \ + --non-recurrent-projection-dim $nrp_dim \ + --common-egs-dir "$common_egs_dir" \ + --multicondition $multicondition \ + --chunk-left-context 40 \ + --chunk-right-context 40 \ + --extra-align-opts "$extra_align_opts" \ + --align-model-dir "$align_model_dir" diff --git a/egs/babel/s5d/local/nnet3/run_ivector_common.sh b/egs/babel/s5d/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..bfe66d13f76 --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_ivector_common.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +. ./cmd.sh +set -e +stage=1 +generate_alignments=true # false if doing ctc training +speed_perturb=true + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +# perturbed data preparation +train_set=train +if [ "$speed_perturb" == "true" ]; then + if [ $stage -le 1 ]; then + #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment + # _sp stands for speed-perturbed + for datadir in train; do + utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1 + utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2 + utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2 + utils/validate_data_dir.sh --no-feats data/${datadir}_tmp + rm -r data/temp1 data/temp2 + + featdir=plp_perturbed + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp_pitch/${datadir}_tmp $featdir + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir + fi + + steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir || exit 1; + utils/fix_data_dir.sh data/${datadir}_tmp + + utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0 + utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0 + utils/fix_data_dir.sh data/${datadir}_sp + rm -r data/temp0 data/${datadir}_tmp + done + fi + + train_set=train_sp + if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then + #obtain the alignment of the perturbed data + steps/align_fmllr.sh \ + --nj 70 --cmd "$train_cmd" \ + --boost-silence $boost_sil \ + data/$train_set data/langp/tri5_ali exp/tri5 exp/tri5_ali_sp || exit 1 + touch exp/tri5_ali_sp/.done + fi +fi + +if [ $stage -le 3 ]; then + mfccdir=mfcc_hires + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + date=$(date +'%m_%d_%H_%M') + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/swbd-$date/s5b/$mfccdir/storage $mfccdir/storage + fi + + # the 100k_nodup directory is copied seperately, as + # we want to use exp/tri2_ali_100k_nodup for lda_mllt training + # the main train directory might be speed_perturbed + for dataset in $train_set ; do + utils/copy_data_dir.sh data/$dataset data/${dataset}_hires + + # scale the waveforms, this is useful as we don't use CMVN + data_dir=data/${dataset}_hires + cat $data_dir/wav.scp | python -c " +import sys, os, subprocess, re, random +scale_low = 1.0/8 +scale_high = 2.0 +for line in sys.stdin.readlines(): + if len(line.strip()) == 0: + continue + print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) +"| sort -k1,1 -u > $data_dir/wav.scp_scaled || exit 1; + mv $data_dir/wav.scp_scaled $data_dir/wav.scp + + steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir; + steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir; + + # Remove the small number of utterances that couldn't be extracted for some + # reason (e.g. too short; no such file). + utils/fix_data_dir.sh data/${dataset}_hires; + done + +fi + +# ivector extractor training +if [ $stage -le 5 ]; then + # We need to build a small system just because we need the LDA+MLLT transform + # to train the diag-UBM on top of. We use --num-iters 13 because after we get + # the transform (12th iter is the last), any further training is pointless. + # this decision is based on fisher_english + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ + --splice-opts "--left-context=3 --right-context=3" \ + --boost-silence $boost_sil \ + $numLeavesMLLT $numGaussMLLT data/${train_set}_hires \ + data/langp/tri5_ali/ exp/tri5_ali_sp exp/nnet3/tri3b +fi + +if [ $stage -le 6 ]; then + # To train a diagonal UBM we don't need very much data, so use the smallest subset. + steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-threads 12 --num-frames 200000 \ + data/${train_set}_hires 512 exp/nnet3/tri3b exp/nnet3/diag_ubm +fi + +if [ $stage -le 7 ]; then + # iVector extractors can be sensitive to the amount of data, but this one has a + # fairly small dim (defaults to 100) so we don't use all of it, we use just the + # 100k subset (just under half the data). + steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ + data/${train_set}_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; +fi + +if [ $stage -le 8 ]; then + # We extract iVectors on all the train_nodup data, which will be what we + # train the system on. + + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ + data/${train_set}_max2_hires exp/nnet3/extractor exp/nnet3/ivectors_$train_set || exit 1; + +fi + +exit 0; diff --git a/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh b/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh new file mode 100755 index 00000000000..8d3973e65bc --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh @@ -0,0 +1,208 @@ +#!/bin/bash + +. ./cmd.sh +set -e +stage=1 +train_stage=-10 +generate_alignments=true # false if doing ctc training +speed_perturb=true +snrs="20:15:10" +num_data_reps=3 +ali_dir=exp/ +db_string="'air' 'rwcp' 'rvb2014'" # RIR dbs to be used in the experiment + # only dbs used for ASpIRE submission system have been used here +RIR_home=db/RIR_databases/ # parent directory of the RIR databases files +download_rirs=true # download the RIR databases from the urls or assume they are present in the RIR_home directory + + + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +# perturbed data preparation +train_set=train +if [ "$speed_perturb" == "true" ]; then + if [ $stage -le 1 ]; then + #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment + # _sp stands for speed-perturbed + for datadir in train; do + utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1 + utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2 + utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2 + utils/validate_data_dir.sh --no-feats data/${datadir}_tmp + rm -r data/temp1 data/temp2 + + featdir=plp_perturbed + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp_pitch/${datadir}_tmp $featdir + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir + fi + + steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_plp/${datadir}_tmp $featdir || exit 1; + utils/fix_data_dir.sh data/${datadir}_tmp + + utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0 + utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0 + utils/fix_data_dir.sh data/${datadir}_sp + rm -r data/temp0 data/${datadir}_tmp + done + fi + + train_set=train_sp + if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then + #obtain the alignment of the perturbed data + steps/align_fmllr.sh \ + --nj 70 --cmd "$train_cmd" \ + --boost-silence $boost_sil \ + data/$train_set data/langp/tri5_ali exp/tri5 exp/tri5_ali_sp || exit 1 + touch exp/tri5_ali_sp/.done + fi +fi + +if [ $stage -le 3 ]; then + mfccdir=mfcc_hires + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + date=$(date +'%m_%d_%H_%M') + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/swbd-$date/s5b/$mfccdir/storage $mfccdir/storage + fi + + # the 100k_nodup directory is copied seperately, as + # we want to use exp/tri2_ali_100k_nodup for lda_mllt training + # the main train directory might be speed_perturbed + for dataset in $train_set ; do + utils/copy_data_dir.sh data/$dataset data/${dataset}_hires + + # scale the waveforms, this is useful as we don't use CMVN + data_dir=data/${dataset}_hires + cat $data_dir/wav.scp | python -c " +import sys, os, subprocess, re, random +scale_low = 1.0/8 +scale_high = 2.0 +for line in sys.stdin.readlines(): + if len(line.strip()) == 0: + continue + print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) +"| sort -k1,1 -u > $data_dir/wav.scp_scaled || exit 1; + mv $data_dir/wav.scp_scaled $data_dir/wav.scp + + steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir; + steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir; + + # Remove the small number of utterances that couldn't be extracted for some + # reason (e.g. too short; no such file). + utils/fix_data_dir.sh data/${dataset}_hires; + done + +fi + +# check if the required tools are present +$KALDI_ROOT/egs/aspire/s5/local/multi_condition/check_version.sh || exit 1; +mkdir -p exp/nnet3_multicondition +if [ $stage -le 4 ]; then + # prepare the impulse responses + local/multi_condition/prepare_impulses_noises.sh --log-dir exp/make_reverb/log \ + --db-string "$db_string" \ + --download-rirs $download_rirs \ + --RIR-home $RIR_home \ + data/impulses_noises || exit 1; +fi + +if [ $stage -le 5 ]; then + # corrupt the training data to generate multi-condition data + for data_dir in train_sp; do + num_reps=$num_data_reps + reverb_data_dirs= + for i in `seq 1 $num_reps`; do + cur_dest_dir=" data/temp_${data_dir}_${i}" + $KALDI_ROOT/egs/aspire/s5/local/multi_condition/reverberate_data_dir.sh --random-seed $i \ + --snrs "$snrs" --log-dir exp/make_corrupted_wav \ + data/${data_dir} data/impulses_noises $cur_dest_dir + reverb_data_dirs+=" $cur_dest_dir" + done + utils/combine_data.sh --extra-files utt2uniq data/${data_dir}_mc data/${data_dir} $reverb_data_dirs + rm -rf $reverb_data_dirs + done +fi + +if [ $stage -le 6 ]; then + # copy the alignments for the newly created utterance ids + ali_dirs= + for i in `seq 1 $num_data_reps`; do + local/multi_condition/copy_ali_dir.sh --utt-prefix "rev${i}_" exp/tri5_ali_sp exp/tri5_ali_sp_temp_$i || exit 1; + ali_dirs+=" exp/tri5_ali_sp_temp_$i" + done + local/multi_condition/copy_ali_dir.sh exp/tri5_ali_sp exp/tri5_ali_sp_copy || exit 1; + ali_dirs+=" exp/tri5_ali_sp_copy" + utils/combine_ali_dirs.sh --num-jobs 32 \ + data/train_sp_mc exp/tri5_ali_sp_mc $ali_dirs || exit 1; + rm -rf $ali_dirs +fi + +train_set=train_sp_mc +if [ $stage -le 7 ]; then + mfccdir=mfcc_reverb + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + date=$(date +'%m_%d_%H_%M') + utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/babel_reverb-$date/s5/$mfccdir/storage $mfccdir/storage + fi + for data_dir in $train_set; do + utils/copy_data_dir.sh data/$data_dir data/${data_dir}_hires + steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${data_dir}_hires \ + exp/make_reverb_hires/${data_dir} $mfccdir || exit 1; + steps/compute_cmvn_stats.sh data/${data_dir}_hires exp/make_reverb_hires/${data_dir} $mfccdir || exit 1; + utils/fix_data_dir.sh data/${data_dir}_hires + utils/validate_data_dir.sh data/${data_dir}_hires + done +fi + +# ivector extractor training +if [ $stage -le 8 ]; then + # We need to build a small system just because we need the LDA+MLLT transform + # to train the diag-UBM on top of. We use --num-iters 13 because after we get + # the transform (12th iter is the last), any further training is pointless. + # this decision is based on fisher_english + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ + --splice-opts "--left-context=3 --right-context=3" \ + --boost-silence $boost_sil \ + $numLeavesMLLT $numGaussMLLT data/${train_set}_hires \ + data/langp/tri5_ali exp/tri5_ali_sp_mc exp/nnet3_multicondition/tri3b +fi + +if [ $stage -le 9 ]; then + # To train a diagonal UBM we don't need very much data, so use the smallest subset. + steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \ + data/${train_set}_hires 512 exp/nnet3_multicondition/tri3b exp/nnet3_multicondition/diag_ubm +fi + +if [ $stage -le 10 ]; then + # iVector extractors can be sensitive to the amount of data, but this one has a + # fairly small dim (defaults to 100) so we don't use all of it, we use just the + # 100k subset (just under half the data). + steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ + data/${train_set}_hires exp/nnet3_multicondition/diag_ubm exp/nnet3_multicondition/extractor || exit 1; +fi + +if [ $stage -le 11 ]; then + # We extract iVectors on all the train_nodup data, which will be what we + # train the system on. + + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ + data/${train_set}_max2_hires exp/nnet3_multicondition/extractor exp/nnet3_multicondition/ivectors_$train_set || exit 1; + +fi + +exit 0; diff --git a/egs/babel/s5d/local/nnet3/run_lstm.sh b/egs/babel/s5d/local/nnet3/run_lstm.sh new file mode 100755 index 00000000000..8105cfda387 --- /dev/null +++ b/egs/babel/s5d/local/nnet3/run_lstm.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +# Copyright 2015 Johns Hopkins University (Author: Daniel Povey). +# 2015 Vijayaditya Peddinti +# 2015 Xingyu Na +# 2015 Pegah Ghahrmani +# 2016 Xiaohui Zhang +# Apache 2.0. + + +# this is a basic lstm script +# LSTM script runs for more epochs than the TDNN script +# and each epoch takes twice the time + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call lstm/train.sh with --gpu false + +stage=0 +train_stage=-10 +has_fisher=true +affix= +speed_perturb=true +multicondition=true +common_egs_dir= +reporting_email= + +# LSTM options +splice_indexes="-2,-1,0,1,2 0 0" +lstm_delay=" -1 -2 -3 " +label_delay=5 +num_lstm_layers=3 +cell_dim=1024 +hidden_dim=1024 +recurrent_projection_dim=256 +non_recurrent_projection_dim=256 +chunk_width=20 +chunk_left_context=40 +chunk_right_context=0 + + +# training options +num_epochs=8 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=2 +num_jobs_final=6 +momentum=0.5 +num_chunk_per_minibatch=100 +samples_per_iter=20000 +remove_egs=true + +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh +. ./cmd.sh + +if ! cuda-compiled; then + cat < transcript2"; +$noise_word = shift @ARGV; + +while() { + $_ =~ m:^(\S+) (.+): || die "bad line $_"; + $utt = $1; + $trans = $2; + print "$utt"; + foreach $w (split (" ",$trans)) { + $w =~ tr:a-z:A-Z:; # Upcase everything to match the CMU dictionary. . + $w =~ s:\\::g; # Remove backslashes. We don't need the quoting. + $w =~ s:^\%PERCENT$:PERCENT:; # Normalization for Nov'93 test transcripts. + $w =~ s:^\.POINT$:POINT:; # Normalization for Nov'93 test transcripts. + if($w =~ m:^\[\<\w+\]$: || # E.g. [\]$: || # E.g. [door_slam>], this means a door slammed in the next word. Delete. + $w =~ m:\[\w+/\]$: || # E.g. [phone_ring/], which indicates the start of this phenomenon. + $w =~ m:\[\/\w+]$: || # E.g. [/phone_ring], which indicates the end of this phenomenon. + $w eq "~" || # This is used to indicate truncation of an utterance. Not a word. + $w eq ".") { # "." is used to indicate a pause. Silence is optional anyway so not much + # point including this in the transcript. + next; # we won't print this word. + } elsif($w =~ m:\[\w+\]:) { # Other noises, e.g. [loud_breath]. + print " $noise_word"; + } elsif($w =~ m:^\<([\w\']+)\>$:) { + # e.g. replace with and. (the <> means verbal deletion of a word).. but it's pronounced. + print " $1"; + } elsif($w eq "--DASH") { + print " -DASH"; # This is a common issue; the CMU dictionary has it as -DASH. +# } elsif($w =~ m:(.+)\-DASH$:) { # E.g. INCORPORATED-DASH... seems the DASH gets combined with previous word +# print " $1 -DASH"; + } else { + print " $w"; + } + } + print "\n"; +} diff --git a/egs/babel/s5d/local/optimize/OptimizeParams.pm b/egs/babel/s5d/local/optimize/OptimizeParams.pm new file mode 100644 index 00000000000..d9fb3647ddd --- /dev/null +++ b/egs/babel/s5d/local/optimize/OptimizeParams.pm @@ -0,0 +1,631 @@ +# Author: Jason Eisner, Univ. of Pennsylvania +# +# $Revision: 3.11 $ of $Date: 2006/04/12 08:53:23 $ + +# !!! should add root-finding methods with derivative (newton-raphson: +# use rtsafe, section 9.4) and in multiple dimensions (sections 9.5, 9.6). + +package OptimizeParams; +use strict; + +BEGIN { + use Exporter (); + use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); + $VERSION = do { my @r = (q$Revision: 3.11 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # must be all one line, for MakeMaker + + @ISA = qw(Exporter); + @EXPORT_OK = qw(&powell &easybrent &easydbrent &easyzbrent + &mnbrak &brent &dbrent &zbrent + $machine_epsilon $inf &basisvectors); +} + +# A sample program with simple examples on a one-dimensional function. +# +# #!/usr/local/bin/perl5 -w +# +# use OptimizeParams qw(&powell &easybrent &easydbrent &zbrent); +# use strict 'vars'; +# +# sub f { sin(($_[0]-12.34567)/8)**2-0.5 } # function +# sub df { sin(2*($_[0]-12.34567)/8)/8 } # derivative +# sub fdf { my($temp)=($_[0]-12.34567)/8; # (function, derivative) computed at one go +# (sin($temp)**2-0.5, sin(2*$temp)/8) } +# +# # Three ways to find (x,f(x)) at minimum of function, namely (12.34567,-0.5) +# print join(" ",easybrent(0,1,\&f)), "\n"; +# print join(" ",easydbrent(0,1,\&f,\&df)), "\n"; +# print join(" ",easydbrent(0,1,\&fdf)), "\n"; +# +# # A fourth way, using a multidimensional optimizer even though f happens +# # to be 1-dimensional. The vector [0] is our starting guess. +# my($xvec,$fx) = powell(\&f,[0]); +# print join(" ",@$xvec,$fx), "\n"; +# +# # Find zero of function, namely 6.06 +# my($x)=zbrent(\&f,0,13); print $x," ",&f($x),"\n"; + +# ---------------------------------------------------------------------- + +use vars @EXPORT_OK; +$inf=exp(1e307); # could just use the bareword inf, which seems to work but generates warnings with -w +$machine_epsilon = 1; $machine_epsilon /= 2 while 1 + $machine_epsilon/2 > 1; + +sub FMAX { # (maximum) + $_[0] > $_[1] ? $_[0] : $_[1]; +} + +sub SIGN { + $_[1] >= 0 ? abs($_[0]) : -abs($_[0]); +} + + +# Direction Set (Powell's) Methods in Multidimensions +# From Numerical Recipes in C, Section 10.5, p. 417ff. Ported to Perl. +# +# Minimization of a function of n variables [for which the gradient is +# not known]. Required arguments are (a reference to) the function +# and (a reference to) a length-n vector holding the coordinates of +# the starting point. Optional arguments are a fractional tolerance in +# the output value (used as a stopping criterion), a fractional +# tolerance in the input value (used as a stopping criterion on +# one-dimensional searches), and (a reference to) a list of n +# (references to) such vectors, holding an initial set of directions. +# Return values are a reference to a vector holding the coordinates at +# the minimum; the value of the function at that minimum; the number +# of iterations taken; and the final set of directions. +# +# This Perl version has a few different representational conventions. +# It's now the ROWS of $xi (not the columns) that hold the direction vectors. +# And the coordinates are 0-indexed, not 1-indexed. +# The $itol argument is new. + +sub powell { + my($funcref,$p,$ftol,$iftol,$xi) = @_; + my($n) = scalar @$p; # Number of dimensions. + my($ITMAX)=200; # Maximum allowed iterations. + + # Defaults for optional arguments + $ftol = $machine_epsilon unless defined $ftol; + $iftol = 2.0e-4 unless defined $iftol; # in the C version, this is TOL (defined at linmin) + $xi = &basisvectors($n) unless (defined $xi); + + my($fret) = &$funcref(@$p); + my(@pt) = @$p; # Save the initial point. + my($iter); + for($iter=1;;++$iter) { + my($fp) = $fret; + my($ibig) = 0; + my($del) = 0; # Will be the biggest function decrease. + my($i); + for ($i=0;$i<$n;$i++) { # In each iteration, loop over all directions in the set. + my($xit) = \@{$xi->[$i]}; # Copy the direction, + my($fptt) = $fret; + $fret = &linmin($p,$xit,$funcref,$iftol); # minimize along it, + if (abs($fptt-$fret) > $del) { # and record it if it is the largest decrease so far. + $del=abs($fptt-$fret); + $ibig=$i; + } + } + if (2*abs($fp-$fret) <= $ftol*(abs($fp)+abs($fret))) { # Termination criterion. + return($p,$fret,$iter,$xi); + } + die "$0: powell exceeding maximum of $ITMAX iterations" if ($iter==$ITMAX); + + { + my($xit); + my(@ptt); + my($j); + for ($j=0;$j<$n;$j++) { # Construct the extrapolated point and the average direction moved. Save the old starting point. + $ptt[$j] = 2*$p->[$j] - $pt[$j]; + $xit->[$j] = $p->[$j] - $pt[$j]; + $pt[$j] = $p->[$j]; + } + my($fptt) = &$funcref(@ptt); + if ($fptt < $fp) { + my($t) = 2 * ($fp-2*$fret+$fptt) * ($fp-$fret-$del)**2 - $del*($fp-$fptt)**2; + if ($t < 0) { + $fret = &linmin($p,$xit,$funcref); + $xi->[$ibig] = $xi->[$n-1]; + $xi->[$n-1] = $xit; + } + } + } + } # Back for another iteration + + die "$0: internal error in powell: should never have reached this line"; +} + +sub basisvectors { # returns the basis vectors in the given dimension (a reference to a list of references to lists) + my($n) = @_; + my($vects); + my($i,$j); + for ($i=0;$i<$n;$i++) { + for ($j=0;$j<$n;$j++) { + $vects->[$i][$j] = ($i==$j ? 1 : 0); + } + } + return $vects; +} + + + +{ + my($ncom); # "Global" variables for linmin to communicate with f1dim. + my(@pcom, @xicom, $nrfuncref); + + # Routine called by powell. + # From Numerical Recipes in C, Section 10.5, p. 419. Ported to Perl. + # + # Given an n-dimensional point $p and an n-dimensional direction + # vector $xi (both references to lists), moves and resets $p to + # where the function $funcref takes on a minimum along the direction + # $xi from $p, and replaces $xi by the actual vector displacement that + # $p was moved. Returns the value of $funcref at $p. This is actually + # all accomplished by calling the routines mnbrak and brent. + # $iftol is a tolerance on the input value, passed to brent. + + sub linmin { + my($p,$xi,$funcref,$iftol) = @_; + + print STDERR "$0: linmin: searching from (",join(", ",@$p),") in direction (",join(", ",@$xi),")\n"; + + $ncom = @$p; # Define the global variables. + $nrfuncref = $funcref; + @pcom = @$p; + @xicom = @$xi; + + my($ax) = 0; # Initial guess for brackets. + my($xx) = 1; + my($bx); + ($ax,$xx,$bx) = &mnbrak($ax,$xx,\&f1dim); + my($xmin,$fret) = &brent($ax,$xx,$bx,\&f1dim,$iftol); + my($j); + for ($j=0;$j<$ncom;$j++) { + $p->[$j] += ($xi->[$j] *= $xmin); + } + return $fret; + } + + # Function minimized by linmin. + + sub f1dim { + my($x) = @_; + my(@xt); + my($j); + for($j=0; $j<$ncom;$j++) { + $xt[$j] = $pcom[$j] + $x * $xicom[$j]; + } + return &$nrfuncref(@xt); + } +} + + + +# Easy way to call mnbrak and brent together in order to minimize +# a function. +# +# ax and bx are any distinct points; we'll look for a minimum in the +# downhill direction on the line through (ax,f(ax)) and (bx,f(bx)). +# +# Return value is the same as brent, namely (x,f(x)). But we might +# fail to find a minimum! If the function never increases again so +# far as we can tell -- it plateaus, or decreases toward infinity, or +# increases in a range that mnbrak doesn't sample -- then we'll return +# (+/-inf, minimum value we found). Here the +/- is according to +# which direction we searched in, and the minimum value is f(x) for +# the last finite x we considered; this value may or may not be +# finite, but should indicate the asymptotic behavior of the function. +# +# Just as in brent, the tolerance $tol can be omitted. + +sub easybrent { + my($ax,$bx,$funcref,$tol) = @_; + my($newa,$newb,$newc,$fa,$fb,$fc) = &mnbrak($ax,$bx,$funcref); + return ($newc,$fb) if ($newc==$inf || $newc==-$inf); + &brent($newa,$newb,$newc,$funcref,$tol); +} + +# Easy way to call mnbrak and dbrent together in order to minimize +# a function whose derivative is known. +# ax and bx are any distinct points; we'll look for a minimum in the +# downhill direction on the line through (ax,f(ax)) and (bx,f(bx)). +# +# See easybrent for return value convention when we fail. +# +# Just as in dbrent, the tolerance $tol can be omitted. So can +# $dfuncref, if $funcref returns a pair of values -- both the function +# and its derivative. + +sub easydbrent { + my($ax,$bx,$funcref,$dfuncref,$tol) = @_; + my($newa,$newb,$newc,$fa,$fb,$fc) = &mnbrak($ax,$bx,$funcref); + return ($newc,$fb) if ($newc==$inf || $newc==-$inf); + &dbrent($newa,$newb,$newc,$funcref,$dfuncref,$tol); + # If we want to check output against brent: + # my(@ans1)=&dbrent($newa,$newb,$newc,$funcref,$dfuncref); + # my(@ans2)=&brent($newa,$newb,$newc,$funcref); + # die "dbrent $ans1[0], brent $ans2[0]\n" unless &main::near($ans1[0]+1e6,$ans2[0]+1e6); + # @ans1; +} + +# Easy way to TRY to bracket a root and then call zbrent to find the +# root. The calling convention is similar to easybrent: we are given +# two starting points. If they have different signs, we just call +# zbrent. If they have the same sign and are both positive, we search +# in the downhill direction for a negative value (using mnbrak +# together with a modified golden-section minimizer (section 10.1) +# that stops as soon as it crosses zero). Similarly, if they have the +# same sign and are both positive, we search uphill for a positive +# value. + +sub easyzbrent { + my($ax,$bx,$funcref) = @_; + die "Not implemented yet; must call zbrent directly" +} + + +# Parabolic Interpolation and Brent's Method in one dimension +# From Numerical Recipes in C, Section 10.2, p. 404. Ported to Perl. +# +# Given a continuous function of one variable referenced by $funcref, +# and given a bracketing triplet of abcissas $ax, $bx, $cx as returned +# by mnbrak, this routine isolates the minimum to a fractional +# precision of about $tol using Brent's method. Returns (x, f(x)) at +# the minimum. $tol is set to a good default if omitted. +# +# See easybrent for an easier way to call this. + +sub brent { + my($ax, $bx, $cx, $funcref, $tol) = @_; + $tol = sqrt($machine_epsilon) unless defined $tol; + my($e) = 0.0; # This will be the distance moved on the step before last. + my($ITMAX) = 100; # The maximum allowed number of iterations. + my($CGOLD) = 0.3819660; # The golden ratio. [Actually, 1-golden ratio.] + my($ZEPS) = 1.0e-10; + + my($a) =($ax < $cx ? $ax : $cx); # a and b must be in ascending order, but input abscissas need not be. + my($b) =($ax > $cx ? $ax : $cx); + my($x,$w,$v); $x=$w=$v=$bx; # Initializations ... + die "brent: inputs out of order\n" unless $a < $x && $x < $b; # probably should also check f(x) < f(a),f(b) + my($fw,$fv,$fx); ($fw)=($fv)=($fx)=&$funcref($x); + my($d,$u,$fu); + + my($iter); + for ($iter=1; $iter<=$ITMAX; $iter++) { # Main program loop. + my($xm) = 0.5*($a+$b); + my($tol1)=$tol*abs($x)+$ZEPS; + my($tol2)=2.0*$tol1; + return ($x,$fx) if (abs($x-$xm) <= ($tol2-0.5*($b-$a))); # Test for done here. + if (abs($e) > $tol1) { # Construct a trial parabolic fit. + my($r) = ($x-$w)*($fx-$fv); + my($q) = ($x-$v)*($fx-$fw); + my($p) = ($x-$v)*$q - ($x-$w)*$r; + $q=2.0*($q-$r); + $p = -$p if $q > 0; + $q = abs($q); + my($etemp)=$e; + $e=$d; + if (abs($p) >= abs(0.5*$q*$etemp) || $p <= $q*($a-$x) || $p >= $q*($b-$x)) { + $d = $CGOLD*($e = ($x >= $xm ? $a-$x : $b-$x)); + } + # The above conditions determine the acceptability of the parabolic + # fit. Here we take the golden section step into the larger of the two + # segments. + else { + $d=$p/$q; # Take the parabolic step. + $u=$x+$d; + $d = &SIGN($tol1,$xm-$x) if ($u-$a < $tol2 || $b-$u < $tol2); + } + } else { + $d=$CGOLD*($e=($x >= $xm ? $a-$x : $b-$x)); + } + $u = (abs($d) >= $tol1 ? $x+$d : $x+&SIGN($tol1,$d)); + ($fu) = &$funcref($u); # This is the one function evaluation per iteration. + if ($fu <= $fx) { # Now decide what to do with our function evaluation. + ($u >= $x ? $a : $b) = $x; + ($v, $w, $x) = ($w, $x, $u); # Housekeeping follows: + ($fv, $fw, $fx) = ($fw, $fx, $fu); + } else { + ($u < $x ? $a : $b) = $u; + if ($fu <= $fw || $w == $x) { + $v=$w; + $w=$u; + $fv=$fw; + $fw=$fu; + } elsif ($fu <= $fv || $v == $x || $v == $w) { + $v = $u; + $fv = $fu; + } + } # Done with housekeeping. Back for another iteration. + } + die "$0: brent: Maximum number of iterations ($ITMAX) exceeded"; +} + +# One-Dimensional Search with First Derivatives +# From Numerical Recipes in C, Section 10.3, p. 405. Ported to Perl. +# +# Given a continuous function of one variable referenced by $funcref, +# and its derivative referenced by $dfuncref, and given a bracketing +# triplet of abcissas $ax, $bx, $cx as returned by mnbrak, this +# routine isolates the minimum to a fractional precision of about $tol +# using a modification of Brent's method that uses derivatives. +# Returns (x, f(x)) at the minimum. $tol is set to a good default if +# omitted. +# +# See easydbrent for an easier way to call this. + +sub dbrent { + my($ax, $bx, $cx, $funcref, $dfuncref, $tol) = @_; + $tol = sqrt($machine_epsilon) unless defined $tol; + + my($e) = 0.0; # This will be the distance moved on the step before last. + my($ITMAX) = 100; # The maximum allowed number of iterations. + my($ZEPS) = 1.0e-10; + + my($a) =($ax < $cx ? $ax : $cx); # a and b must be in ascending order, but input abscissas need not be. + my($b) =($ax > $cx ? $ax : $cx); + my($w,$v,$x,$u); $w=$v=$x=$bx; # Initializations ... + die "dbrent: inputs out of order\n" unless $a < $x && $x < $b; # probably should also check f(x) < f(a),f(b) + my($fx,$dx)=&$funcref($x); + $dx=&$dfuncref($x) unless defined $dx; # if $funcref only returned one value in previous line + my($fw,$fv,$fu); $fw=$fv=$fx; + my($dw,$dv,$du); $dw=$dv=$dx; # All our housekeeping chores are doubled by the necessity of moving derivative values around as well as function values. + my($d); + + my($iter); + for ($iter=1; $iter<=$ITMAX; $iter++) { # Main program loop. + my($xm) = 0.5*($a+$b); + my($tol1)=$tol*abs($x)+$ZEPS; + my($tol2)=2.0*$tol1; + # print "a $a b $b x $x xm $xm\n"; + return ($x,$fx) if (abs($x-$xm) <= ($tol2-0.5*($b-$a))); # Test for done here. + if (abs($e) > $tol1) { # Construct a trial parabolic fit. + my($d1)=2.0*($b-$a); # Initialize these d's to an out-of-bracket value + my($d2)=$d1; + $d1 = ($w-$x)*$dx/($dx-$dw) if ($dw != $dx); # Secant method with one point. + $d2 = ($v-$x)*$dx/($dx-$dv) if ($dv != $dx); # And the other. + # Which of these two estimates of d shall we take? + # We will insist that they be within the bracket, and on + # the side pointed to by the derivative at x: + my($u1)=$x+$d1; + my($u2)=$x+$d2; + my($ok1) = ($a-$u1)*($u1-$b) > 0 && $dx*$d1 <= 0; + my($ok2) = ($a-$u2)*($u2-$b) > 0 && $dx*$d2 <= 0; + my($olde) = $e; # Movement on the step before last. + $e = $d; + if ($ok1 || $ok2) { # Take only an acceptable d, and if both are acceptable, then take the smallest one. + if ($ok1 && $ok2) { + $d=(abs($d1) < abs($d2) ? $d1 : $d2); + } elsif ($ok1) { + $d=$d1; + } else { + $d=$d2; + } + if (abs($d) <= abs(0.5*$olde)) { + $u=$x+$d; + $d=&SIGN($tol1,$xm-$x) if ($u-$a < $tol2 || $b-$u < $tol2); + } else { # Bisect, not golden section. + $d=0.5*($e=($dx >= 0 ? $a-$x : $b-$x)); # Decide which segment by the sign of the derivative. + } + } else { + $d=0.5*($e=($dx >= 0 ? $a-$x : $b-$x)); + } + } else { + $d=0.5*($e=($dx >= 0 ? $a-$x : $b-$x)); + } + if (abs($d) >= $tol1) { + $u=$x+$d; + ($fu,$du)=&$funcref($u); + } else { + $u=$x+&SIGN($tol1,$d); + ($fu,$du)=&$funcref($u); + return ($x,$fx) if ($fu > $fx); # If the minimum step in the downhill direction takes us uphill, then we are done. + } + # Now all the housekeeping, sigh. + $du=&$dfuncref($u) unless defined $du; # if $funcref only returned one value just above + if ($fu <= $fx) { + ($u >= $x ? $a : $b) = $x; + ($v,$fv,$dv)=($w,$fw,$dw); + ($w,$fw,$dw)=($x,$fx,$dx); + ($x,$fx,$dx)=($u,$fu,$du); + } else { + ($u < $x ? $a : $b) = $u; + if ($fu <= $fw || $w==$x) { + ($v,$fv,$dv)=($w,$fw,$dw); + ($w,$fw,$dw)=($u,$fu,$du); + } elsif ($fu < $fv || $v == $x || $v == $w) { + ($v,$fv,$dv)=($u,$fu,$du); + } + } + } + die "$0: dbrent: Maximum number of iterations ($ITMAX) exceeded\n"; + # Alternative: + # warn "$0: dbrent: Maximum number of iterations ($ITMAX) exceeded. Trying brent ...\n"; + # &brent($ax,$bx,$cx,$funcref,$tol); +} + + +# Routine for Initially Bracketing a Minimum. +# From Numerical Recipes in C, Section 10.1, p. 400. Ported to Perl. +# +# Given a continuous function referenced by $funcref, and distinct +# initial points $ax and $bx, this routine searches in the downhill +# direction (defined by the function as evaluated at the initial +# points) and returns new points $ax, $bx, $cx that bracket a minimum +# of the function [in the sense that b is between a and c, and f(b) is +# less than both f(a) and f(c)]. Also returned are the function values +# at the three points, $fa, $fb, and $fc. +# +# JME: If $cx is +inf (resp. -inf), this means that we searched in the +# positive (resp. negative) direction and the function just decreased +# forever (either to a plateau or without bound - look at $fb to see +# the last finite value). At least, it decreased at all the points +# where we sampled it - we might have skipped right over a spike. So +# either there is no minimum in the direction we searched, or we +# missed it; in either case our return values won't bracket any minimum +# and the caller should either give up or try something else! +# +# JME: Note that it's also possible that $cx remains finite, but that +# the minimum $fb that we bracket is -$inf (and typically $fc will be +# -$inf too). +# +# JME: f(b) is now required to be STRICTLY less than f(a) and f(c). +# This avoids counting an "extended" point of inflection as a minimum. +# I imagine the minimization routines would nonetheless be willing to +# find such if it's in the interval (should check...), but requiring +# us to search past it here is important for the previous paragraph: +# if the function value is eventually -inf forever due to overflow, we +# still keep searching forever until the abcissa is also +/- inf, +# rather than saying we've hit a plateau and that's enough to stop. +# +# It's ok if &$funcref returns multiple values; we'll evaluate it in +# list context and use only the first value. This is useful because +# of the calling convention for dbrent; e.g., easydbrent relies on it. + +sub mnbrak { + my($ax, $bx, $funcref) = @_; + my($GOLD) = 1.618034; + my($GLIMIT) = 100.0; + my($TINY) = 1.0e-20; + + die "mnbrak: $ax and $bx must be different\n" if $ax==$bx; # JME: added + my($fa) = &$funcref($ax); + my($fb) = &$funcref($bx); + if ($fb > $fa) { + # Switch roles of a and b so that we can go downhill in the direction + # from a to b. + ($ax, $bx) = ($bx, $ax); + ($fa, $fb) = ($fb, $fa); + } + + my($cx) = $bx + $GOLD*($bx-$ax); # First guess for c. + my($fc) = &$funcref($cx); + + # Keep looping here until we bracket. + while ($fb >= $fc && $cx != $inf && $cx != -$inf) { # JME: added the inf tests, and changed >= to > to make sure we keep searching all the way to inf if necessary in order to get $ax $bx $cx strictly in order + # print("ax $ax bx $bx cx $cx // fa $fa fb $fb fc $fc\n"), + + # Compute u by parabolic extrapolation from a, b, c. + # $TINY is used to prevent any possible division by zero. + my($r) = ($bx-$ax)*($fb-$fc); + my($q) = ($bx-$cx)*($fb-$fa); + my($u) = $bx -(($bx-$cx)*$q - ($bx-$ax)*$r)/(2.0*&SIGN(&FMAX(abs($q-$r),$TINY),$q-$r)); + my($ulim) = $bx + $GLIMIT*($cx-$bx); + my($fu); + # We won't go farther than this. Test various possibilities: + if (($bx - $u)*($u - $cx) > 0) { # Parabolic u is (strictly) between b and c: try it. + ($fu) = &$funcref($u); + if ($fu < $fc) { # Got a minimum between b and c. + ($ax,$bx) = ($bx,$u); + ($fa,$fb) = ($fb,$fu); + return($ax, $bx, $cx, $fa, $fb, $fc) if ($ax-$bx)*($bx-$cx)>0 && $fb < $fa && $fb < $fc; + die "mnbrak: oops, trying to return $ax $bx $cx out of order, or else middle value of $fa $fb $fc is not smallest\n"; + } elsif ($fu > $fb) { # Got a minimum between a and u. + $cx = $u; + $fc = $fu; + return($ax, $bx, $cx, $fa, $fb, $fc) if ($ax-$bx)*($bx-$cx)>0 && $fb < $fa && $fb < $fc; + die "mnbrak: oops, trying to return $ax $bx $cx out of order, or else middle value of $fa $fb $fc is not smallest\n"; + } + $u = $cx + $GOLD*($cx-$bx); # Parabolic fit was no use. Use default magnification. + ($fu) = &$funcref($u); + } elsif (($cx-$u)*($u-$ulim) > 0) { # Parabolic fit is between c and its allowed limit + ($fu) = &$funcref($u); + if ($fu < $fc) { + ($bx, $cx, $u) = ($cx, $u, $u+$GOLD*($u-$cx)); # JME: formerly $cx+$GOLD*($cx-$bx), but that seems to have been a bug since the new u might not be beyond the new cx. + ($fb, $fc, $fu) = ($fc, $fu, &$funcref($u)); + } + } elsif (($u-$ulim)*($ulim-$cx) > 0) { # Limit parabolic u to maximum allowed value. JME: Changed >= to > so that we are guaranteed $u > $cx strictly. See comment at top of loop. + $u=$ulim; + ($fu) = &$funcref($u); + } else { # Reject parabolic u, use default magnification. + $u=$cx+$GOLD*($cx-$bx); + ($fu)=&$funcref($u); + } + ($ax,$bx,$cx) = ($bx,$cx,$u); # Eliminate oldest point and continue. + ($fa,$fb,$fc) = ($fb,$fc,$fu); + } + return($ax, $bx, $cx, $fa, $fb, $fc) if ($ax-$bx)*($bx-$cx)>0 && $fb <= $fa && ($fb <= $fc || $cx==$inf || $cx==-$inf); + die "mnbrak: oops, trying to return $ax $bx $cx out of order, or else middle value of $fa $fb $fc is not smallest but we didn't run into infinity with cx=$fc\n"; +} + + +# Using the Van Wijngaarden-Dekker-Brent method, find the root of a +# function f (referenced by $funcref) between x1 and x2, where f(x1) +# and f(x2) must have different signs. The root will be refined until +# its accuracy is $tol (which defaults to the machine epsilon if +# omitted). +# +# See easyzbrent for a sometimes easier way to call this. + +sub zbrent { + my($funcref, $x1, $x2, $tol) = @_; + $tol = $machine_epsilon unless defined $tol; + + my($ITMAX) = 100; # The maximum allowed number of iterations. + my($EPS) = $machine_epsilon; # Machine floating-point precision. (Defined as 3.0e-8 in C version.) + + my($a,$b,$c)=($x1,$x2,$x2); + my($d,$e,$min1,$min2); + my($fa,$fb) = (&$funcref($a), &$funcref($b)); + my($p,$q,$r,$s,$tol1,$xm); + + die "zbrent: root must be bracketed between x1=$x1 and x2=$x2, but f(x1)=$fa, f(x2)=$fb" if $fb*$fa > 0; + + my($fc)=$fb; + my($iter); + for ($iter=1;$iter<=$ITMAX;$iter++) { + if ($fb*$fc > 0) { + $c=$a; # Rename a, b, c and adjust bounding interval d. + $fc=$fa; + $e=$d=$b-$a; + } + if (abs($fc) < abs($fb)) { + $a=$b; + $b=$c; + $c=$a; + $fa=$fb; + $fb=$fc; + $fc=$fa; + } + $tol1=2*$EPS*abs($b)+0.5*$tol; # Convergence check. + $xm=0.5*($c-$b); + return $b if (abs($xm) <= $tol1 || $fb == 0); + if (abs($e) >= $tol1 && abs($fa) > abs($fb)) { + $s=$fb/$fa; # Attempt inverse quadratic interpolation. + if ($a == $c) { + $p=2*$xm*$s; + $q=1-$s; + } else { + $q=$fa/$fc; + $r=$fb/$fc; + $p=$s*(2*$xm*$q*($q-$r)-($b-$a)*($r-1)); + $q=($q-1)*($r-1)*($s-1); + } + $q = -$q if ($p > 0); # Check whether in bounds. + $p=abs($p); + $min1=3*$xm*$q-abs($tol1*$q); + $min2=abs($e*$q); + if (2*$p < ($min1 < $min2 ? $min1 : $min2)) { + $e=$d; # Accept interpolation. + $d=$p/$q; + } else { + $d=$xm; # Interpolation failed, use bisection. + $e=$d; + } + } else { # Bounds decreasing too slowly, use bisection. + $d=$xm; + $e=$d; + } + $a=$b; # Move last best guess to $a. + $fa=$fb; + if (abs($d) > $tol1) { # Evaluate new trial root. + $b += $d; + } else { + $b += ($xm > 0 ? abs($tol1) : -abs($tol1)); + } + $fb=&$funcref($b); + } + die "$0: zbrent: Maximum number of iterations ($ITMAX) exceeded"; +} + +1; diff --git a/egs/babel/s5d/local/optimize2.pl b/egs/babel/s5d/local/optimize2.pl new file mode 100755 index 00000000000..ead70129ec8 --- /dev/null +++ b/egs/babel/s5d/local/optimize2.pl @@ -0,0 +1,152 @@ +#!/usr/bin/env perl +use strict; +use warnings; + +use FindBin; +use lib "$FindBin::RealBin/optimize/"; +use OptimizeParams qw(&powell &easybrent &easydbrent &zbrent); +use Data::Dumper; +use Scalar::Util qw(looks_like_number); + +use 5.010; + +my @cmd_array = (); +my %opts = (); +my $output_dir = ""; +my $result_regexp = "(.*)"; +my $cmd; +my $ftol = 3e-2; +my $iftol = 1e-1; + +while (@ARGV) { + my $parm = shift @ARGV; + if ($parm eq "--var") { + my $var = shift; + die "$0: The variable $var does not contain starting value" unless $var =~ /.*=.*/; + my @F = split "=", $var; + die "$0: The variable $var has more than one assignments" unless @F == 2; + die "$0: Multiple varable $F[0] definition" if defined $opts{$F[0]}; + $opts{$F[0]} = $F[1]; + } elsif ($parm eq "--output-dir") { + $output_dir = shift; + } elsif ($parm eq "--ftol") { + $ftol = shift; + die "$0: ftol parameter has to be a floating-point number" unless looks_like_number($ftol); + } elsif ($parm eq "--iftol") { + $iftol = shift; + die "$0: iftol parameter has to be a floating-point number" unless looks_like_number($ftol); + } elsif ($parm eq "--result-regexp") { + $result_regexp = shift; + } else { + push @cmd_array, $parm; + while (@ARGV) { + push @cmd_array, shift @ARGV; + } + } + +} + + +sub substitute { + my $cmd_proto = $_[0]; + my %valhash = %{$_[1]}; + + + my $cmd_out = $cmd_proto; + + foreach my $key (keys %valhash) { + #print $elem . "($key, " . $valhash{$key}. ")->"; + my $prev_cmd_out = $cmd_out; + $cmd_out =~ s/\b$key\b/$valhash{$key}/g; + die "$0: The variable $key is not used in the command." if $prev_cmd_out eq $cmd_out; + #print $elem . "\n"; + } + + return $cmd_out; +} + +sub f { + state $iter = 0; + my @params = @_; + my $i = 0; + + my %curr_opts; + foreach my $v (sort keys %opts) { + $curr_opts{$v} = abs($params[$i]); + $i += 1; + } + + my $result; + my $k = join(" ", substitute( $cmd, \%curr_opts)); + print "$0: Debug: $k\n"; + open(my $fh, '-|', "(set -e -o pipefail; $k) 2>&1") or die $!; + while (my $line=<$fh>) { + print $line; + chomp $line; + if ($line =~ /$result_regexp/) { + print "$0: Line $line matches the regexp \"$result_regexp\"\n"; + $result = $line; + $result =~ s/$result_regexp/$1/g; + } + } + close($fh) or die "$0: The command didn't finish successfully: $!\n"; + + my $exit = $? >> 8; + if ( $exit != 0) { + die "$0: The command return status indicates failure: $exit\n"; + } + + if (not defined $result) { + die "$0: Matching the regexp on the command output regexp didn't yield any results"; + } + print "$0: Iteration $iter: " . join(" ", "[", @params, "] =>", $result) . "\n"; + + $iter += 1; + return -1.0 * $result+0.0; +} + + +print "$0: Optimizing with " . join(" ", %opts) . "\n"; +#print Dumper(\@cmd_array); + +$cmd = join(" ", @cmd_array); + +die "$0: Empty command \"$cmd\"" unless $cmd; +die "$0: Empty command \"$cmd\"" if $cmd =~ /^\s*$/; + +my @params; +foreach my $key (sort keys %opts) { + push @params, $opts{$key}; +} + +#my($xvec,$fx) = (\@params, 1); +my($xvec,$fx) = powell(\&f,\@params, $ftol, $iftol); +print "$0: Optimization finished with: " . join(" ",@$xvec, -$fx), "\n"; + + +@params=@{$xvec}; +foreach my $v (sort keys %opts) { + $opts{$v} = abs(shift @params); +} +$cmd=substitute($cmd, \%opts); + +{ + open(my $param_file, "> $output_dir/params") || die "Cannot open file $output_dir/params: $!"; + print $param_file "$_=$opts{$_}\n" for (sort keys %opts); + print $param_file "criterion=", -$fx; + close($param_file); +} + +{ + open(my $param_file, "> $output_dir/command.sh"); + print $param_file "$cmd\n"; + close($param_file); +} + +{ + open(my $param_file, "> $output_dir/params.sh"); + print $param_file "declare -A params;\n"; + print $param_file "params[$_]=$opts{$_}\n" for (sort keys %opts); + close($param_file); +} + diff --git a/egs/babel/s5d/local/prepare_acoustic_training_data.pl b/egs/babel/s5d/local/prepare_acoustic_training_data.pl new file mode 100755 index 00000000000..bc7c2812831 --- /dev/null +++ b/egs/babel/s5d/local/prepare_acoustic_training_data.pl @@ -0,0 +1,484 @@ +#!/usr/bin/env perl +use Getopt::Long; + +######################################################################## +# +# Script to prepare the Babel acoustic training data for Kaldi. +# +# - Place transcripts in a file named "text" +# Each line contains: utteranceID word1 word2 ... +# +# - Place the utterance-to-speaker map in a file named "utt2spk" +# Each line contains: utteranceID speakerID +# speakerID MUST BE be a prefix of the utteranceID +# Kaldi code does not require it, but some training scripts do. +# +# - Place the utterance-to-segment map in a file named "segments" +# Each line contains: utteranceID recordingID startTime endTime +# +# - Place the recordingID-to-waveformFile map in "wav.scp" +# Each line contains: recordingIB Input_pipe_for_reading_waveform| +# +# - Place the speaker-utterance map in a file named "spk2utt" +# Each line contains: speakerID utteranceID_1 utteranceID_2 ... +# This is the inverse of the utt2spk mapping +# +# Note 1: the utteranceIDs in the first 3 files must match exactly, and +# the recordingIDSs in the last 2 files must match exactly. +# +# Note 2: Babel data formats and file-naming conventions are assumed. +# +# - The transcriptions and waveforms are in subdirectories named +# audio/.sph +# transcription/.txt +# There is 1 pair of files per recording, with extensions as above +# +# - The audio is in NIST sphere format, so shp2pipe may be used, e.g. +# BABEL_BP_101_11694_20111204_205320_inLine \ +# /export/babel/sanjeev/kaldi-trunk/tools/sph2pipe_v2.5/sph2pipe \ +# -f wav -p -c 1 \ +# BABEL_BP_101_11694_20111204_205320_inLine.sph| +# +# - The filename contains speaker information, e.g. +# BABEL_BP_101_37210_20111102_170037_O1_scripted.sph -> 37210_A +# BABEL_BP_101_37210_20111102_172955_inLine.sph -> 37210_A +# BABEL_BP_101_37210_20111102_172955_outLine.sph -> 37210_B +# Specifically, the inLine speaker is the same as scripted +# +# - The transcription file has time marks in square brackets, e.g. +# [0.0] +# +# [7.05] +# 啊 听 听唔听到 啊 你 而家 仲未 上课 系 嘛 +# [14.07] +# +# - If a vocabulary is provided, map all OOV tokens to an OOV symbol, +# and write out an OOV list with counts to a file named "oovCounts" +# +# If one or more word-fragment markers are provided, this script +# checks if an OOV token can be made in-vocabulary by stripping off +# the markers one by one from either end of the token. +# +# The default settings are +# + $vocabFile = ""; # No vocab file; nothing is mapped to OOV + $OOV_symbol = ""; # Default OOV symbol + $fragMarkers = ""; # No characters are word-fragment markers +# +# - Babel transcriptions contain 4 kinds of untranscribed words +# +# (()) designates unintelligible words +# designates a word in another language +# designates a sequence of pre-recorded words +# designates two simultaneous foreground speakers +# +# This script maps them to OOV. They are not included in oovCounts +# +# - Babel transcriptions also contain a few non-linguistics tokens +# +# map to a vocal noise symbol +# map to a vocal noise symbol +# map to a vocal noise symbol +# map to a vocal noise symbol +# +# map to a nonvocal noise symbol +# map to a nonvocal noise symbol +# map to a nonvocal noise symbol +# map to a nonvocal noise symbol +# +# designates silence > 1 sec. +# + $vocalNoise = ""; + $nVoclNoise = ""; + $silence = ""; + $icu_transform=""; + $get_whole_transcripts = "false"; +# +######################################################################## + +print STDERR "$0 " . join(" ", @ARGV) . "\n"; +GetOptions("fragmentMarkers=s" => \$fragMarkers, + "oov=s" => \$OOV_symbol, + "vocab=s" => \$vocabFile, + "icu-transform=s" => \$icu_transform, + "get-whole-transcripts=s" => \$get_whole_transcripts + ); + +if ($#ARGV == 1) { + $inDir = $ARGV[0]; + $outDir = $ARGV[1]; + print STDERR ("$0: $inDir $outDir\n"); + if($vocabFile) { + print STDERR ("\tLimiting transcriptions to words in $vocabFile\n"); + print STDERR ("\tMapping OOV tokens to \"$OOV_symbol\"\n"); + print STDERR ("\tif they remain OOV even after removing [$fragMarkers] from either end\n") if ($fragMarkers); + } + print STDERR ("$0 ADVICE: Use full path for the Input Directory\n") unless ($inDir=~m:^/:); +} else { + print STDERR ("Usage: $0 [--options] InputDir OutputDir\n"); + print STDERR ("\t--vocab File containing the permitted vocabulary\n"); + print STDERR ("\t--oov Use this symbol for OOV words (default )\n"); + print STDERR ("\t--fragmentMarkers Remove these from ends of words to minimize OOVs (default none)\n"); + print STDERR ("\t--get-whole-transcripts (true|false) Do not remove utterances containing no speech\n"); + exit(1); +} + +######################################################################## +# Read and save the vocabulary and map anything not in the vocab +######################################################################## + +if ($vocabFile) { + open (VOCAB, $vocabFile) + || die "Unable to open vocabulary file $vocabFile"; + $numWords = 0; + while () { + next unless (m:^([^\s]+):); + $numWords++ unless (exists $inVocab{$1}); # Don't count word repetitions + $inVocab{$1} = 1; # commonly found in lexicons + } + close(VOCAB); + print STDERR ("Read $numWords unique words from $vocabFile\n"); +} + +######################################################################## +# First read segmentation information from all the transcription files +######################################################################## + +$TranscriptionDir = "$inDir/transcription"; +if (-d $TranscriptionDir) { + @TranscriptionFiles = `ls ${TranscriptionDir}/*.txt`; + if ($#TranscriptionFiles >= 0) { + printf STDERR ("$0: Found %d .txt files in $TranscriptionDir\n", ($#TranscriptionFiles +1)); + $numFiles = $numUtterances = $numWords = $numOOV = $numSilence = 0; + while ($filename = shift @TranscriptionFiles) { + $fileID = $filename; # To capture the base file name + $fileID =~ s:.+/::; # remove path prefix + $fileID =~ s:\.txt\s*$::; # remove file extension + # For each transcription file, extract and save segmentation data + $numUtterancesThisFile = 0; + $prevTimeMark = -1.0; + $text = ""; + if ( $icu_transform ) { + $inputspec="uconv -f utf8 -t utf8 -x \"$icu_transform\" $filename |"; + } else { + $inputspec=$filename; + } + open (TRANSCRIPT, $inputspec) || die "Unable to open $filename"; + while ($line=) { + chomp $line; + if ($line =~ m:^\s*\[([0-9]+\.*[0-9]*)\]\s*$:) { + $thisTimeMark = $1; + if ($thisTimeMark < $prevTimeMark) { + print STDERR ("$0 ERROR: Found segment with negative duration in $filename\n"); + print STDERR ("\tStart time = $prevTimeMark, End time = $thisTimeMark\n"); + print STDERR ("\tThis could be a sign of something seriously wrong!\n"); + print STDERR ("\tFix the file by hand or remove it from the directory, and retry.\n"); + exit(1); + } + if ($prevTimeMark<0) { + # Record the first timemark and continue + $prevTimeMark = $thisTimeMark; + next; + } + ################################################## + # Create an utteranceID using fileID & start time + # - Assume Babel file naming conventions + # - Remove prefix: program_phase_language + # - inLine = scripted = spkr A, outLine = B + # - Move A/B so that utteranceIDs sort by spkr + # - Assume utterance start time < 10000 sec. + ################################################## + $utteranceID = $fileID; + $utteranceID =~ s:[^_]+_[^_]+_[^_]+_::; + $utteranceID =~ s:([^_]+)_(.+)_(inLine|scripted):${1}_A_${2}:; + $utteranceID =~ s:([^_]+)_(.+)_outLine:${1}_B_${2}:; + $utteranceID .= sprintf ("_%06i", (100*$prevTimeMark)); + ################################################## + # Then save segmentation, transcription, spkeaerID + ################################################## + if (exists $transcription{$utteranceID}) { + # utteranceIDs should be unique, but this one is not! + # Either time marks in the transcription file are bad, + # or something went wrong in generating the utteranceID + print STDERR ("$0 WARNING: Skipping duplicate utterance $utteranceID\n"); + } + elsif ($text eq "") { + # Could be due to text filtering done below + # Output information to STDOUT to enable > /dev/null + print STDOUT ("$0: Skipping empty transcription $utteranceID\n"); + } else { + $transcription{$utteranceID} = $text; + $startTime{$utteranceID} = $prevTimeMark; + $endTime{$utteranceID} = $thisTimeMark; + if ($utteranceID =~ m:([^_]+_[AB]).*:) { + $speakerID{$utteranceID} = $1; + } else { + # default: one speaker per audio file + $speakerID{$utteranceID} = $fileID; + } + $baseFileID{$utteranceID} = $fileID; + $numUtterancesThisFile++; + $numUtterances++; + $text = ""; + } + $prevTimeMark = $thisTimeMark; + } else { + @tokens = split(/\s+/, $line); + $text = ""; + while ($w = shift(@tokens)) { + # First, some Babel-specific transcription filtering + if (($w eq "")||($w eq "")||($w eq "")||($w eq "~")) { + next; + } elsif (($w eq "")||($w eq "")||($w eq "")||($w eq "")) { + $text .= " $vocalNoise"; + $numWords++; + } elsif (($w eq "")||($w eq "")||($w eq "")||($w eq "")){ + $text .= " $nVoclNoise"; + $numWords++; + } elsif (($w eq "(())")||($w eq "")||($w eq "")||($w eq "")) { + $text .= " $OOV_symbol"; + $oovCount{$w}++; + $numOOV++; + $numWords++; + } elsif ($w eq "") { + $text .= " $silence"; + $numSilence++; + } else { + # This is a just regular spoken word + if ($vocabFile && (! $inVocab{$w}) && $fragMarkers) { + print "Not in vocab: $w\n"; + # $w is a potential OOV token + # Remove fragMarkers to see if $w becomes in-vocabulary + while ($w =~ m:^(\S+[$fragMarkers]|[$fragMarkers]\S+)$:) { + if ($w =~ m:^(\S+)[$fragMarkers]$:) { + $w = $1; + last if ($inVocab{$w}); + } elsif ($w =~m:^[$fragMarkers](\S+)$:) { + $w = $1; + last if ($inVocab{$w}); + } else { + die "Logically, the program should never reach here!"; + } + } + } + # If still an OOV, replace $w by $OOV_symbol + if ($vocabFile && (! $inVocab{$w})) { + # $w is definitely an OOV token + if (exists $oovCount{$w}) { + $oovCount{$w}++; + } else { + $oovCount{$w} = 1; + } + $w = $OOV_symbol; + $numOOV++; + } + $text .= " $w"; + $numWords++; + } + } + $text =~ s:^\s+::; # Remove leading white space, if any + # Transcriptions must contain real words to be useful in training + if ($get_whole_transcripts ne "true") { + $text =~ s:^(($OOV_symbol|$vocalNoise|$nVoclNoise|$silence)[ ]{0,1})+$::; + } + } + } + close(TRANSCRIPTION); + if ($numUtterancesThisFile>0) { + $lastTimeMarkInFile{$fileID} = $prevTimeMark; + $numUtterancesInFile{$fileID} = $numUtterancesThisFile; + $numUtterancesThisFile = 0; + } + $numFiles++; + } + print STDERR ("$0: Recorded $numUtterances non-empty utterances from $numFiles files\n"); + } else { + print STDERR ("$0 ERROR: No .txt files found $TranscriptionDir\n"); + exit(1); + } +} else { + print STDERR ("$0 ERROR: No directory named $TranscriptionDir\n"); + exit(1); +} + +######################################################################## +# Then verify existence of corresponding audio files and their durations +######################################################################## + +$AudioDir = "$inDir/audio"; +if (-d $AudioDir) { + @AudioFiles = `ls ${AudioDir}/*.sph`; + if ($#AudioFiles >= 0) { + printf STDERR ("$0: Found %d .sph files in $AudioDir\n", ($#AudioFiles +1)); + $numFiles = 0; + while ($filename = shift @AudioFiles) { + $fileID = $filename; + $fileID =~ s:.+/::; # remove path prefix + $fileID =~ s:\.sph\s*::; # remove file extension + if (exists $numUtterancesInFile{$fileID}) { + # Some portion of this file has training transcriptions + @Info = `head $filename`; + $SampleCount = -1; + $SampleRate = 8000; #default + while ($#Info>=0) { + $line = shift @Info; + $SampleCount = $1 if ($line =~ m:sample_count -i (\d+):); + $SampleRate = $1 if ($line =~ m:sample_rate -i (\d+):); + } + if ($SampleCount<0) { + # Unable to extract a valid duration from the sphere header + print STDERR ("Unable to extract duration: skipping file $filename"); + } else { + $waveformName{$fileID} = $filename; chomp $waveformName{$fileID}; + $duration{$fileID} = $SampleCount/$SampleRate; + $numFiles++; + } + } else { + # Could be due to text filtering resulting in an empty transcription + # Output information to STDOUT to enable > /dev/null + print STDOUT ("$0: No transcriptions for audio file ${fileID}.sph\n"); + } + } + print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); + } else { + print STDERR ("$0 NOTICE: No .sph files in $AudioDir\n"); + } + + @AudioFiles = `ls ${AudioDir}/*.wav`; + if ($#AudioFiles >= 0) { + $soxi=`which soxi` or die "$0: Could not find soxi binary -- do you have sox installed?\n"; + chomp $soxi; + printf STDERR ("$0: Found %d .wav files in $AudioDir\n", ($#AudioFiles +1)); + $numFiles = 0; + while ($filename = shift @AudioFiles) { + $fileID = $filename; + $fileID =~ s:.+/::; # remove path prefix + $fileID =~ s:\.wav\s*::; # remove file extension + if (exists $numUtterancesInFile{$fileID}) { + # Some portion of this file has training transcriptions + $duration = `$soxi -D $filename`; + if ($duration <=0) { + # Unable to extract a valid duration from the sphere header + print STDERR ("Unable to extract duration: skipping file $filename"); + } else { + if (exists $waveformName{$fileID} ) { + print STDERR ("$0 ERROR: duplicate fileID \"$fileID\" for files \"$filename\" and \"" . $waveformName{$fileID} ."\"\n"); + exit(1); + } + $waveformName{$fileID} = $filename; chomp $waveformName{$fileID}; + $duration{$fileID} = $duration; + $numFiles++; + } + } else { + # Could be due to text filtering resulting in an empty transcription + # Output information to STDOUT to enable > /dev/null + print STDOUT ("$0: No transcriptions for audio file ${fileID}.sph\n"); + } + } + print STDERR ("$0: Recorded durations from headers of $numFiles .sph files\n"); + } else { + print STDERR ("$0 NOTICE: No .wav files in $AudioDir\n"); + } + + if ( $#waveformName == 0 ) { + print STDERR ("$0 ERROR: No audio files found!"); + } +} else { + print STDERR ("$0 ERROR: No directory named $AudioDir\n"); + exit(1); +} + +######################################################################## +# Now all the needed information is available. Write out the 4 files. +######################################################################## + +unless (-d $outDir) { + print STDERR ("$0: Creating output directory $outDir\n"); + die "Failed to create output directory" if (`mkdir -p $outDir`); # i.e. if the exit status is not zero. +} +print STDERR ("$0: Writing 5 output files to $outDir\n"); + +$textFileName = "$outDir/text"; +open (TEXT, "> $textFileName") || die "$0 ERROR: Unable to write text file $textFileName\n"; + +$utt2spkFileName = "$outDir/utt2spk"; +open (UTT2SPK, "> $utt2spkFileName") || die "$0 ERROR: Unable to write utt2spk file $utt2spkFileName\n"; + +$segmentsFileName = "$outDir/segments"; +open (SEGMENTS, "> $segmentsFileName") || die "$0 ERROR: Unable to write segments file $segmentsFileName\n"; + +$scpFileName = "$outDir/wav.scp"; +open (SCP, "| sort -u > $scpFileName") || die "$0 ERROR: Unable to write wav.scp file $scpFileName\n"; +my $binary=`which sph2pipe` or die "Could not find the sph2pipe command"; chomp $binary; +$SPH2PIPE ="$binary -f wav -p -c 1"; +my $SOXBINARY =`which sox` or die "Could not find the sph2pipe command"; chomp $SOXBINARY; +$SOXFLAGS ="-r 8000 -c 1 -b 16 -t wav - downsample"; + +$spk2uttFileName = "$outDir/spk2utt"; +open (SPK2UTT, "> $spk2uttFileName") || die "$0 ERROR: Unable to write spk2utt file $spk2uttFileName\n"; + +$oovFileName = "$outDir/oovCounts"; +open (OOV, "| sort -nrk2 > $oovFileName") || die "$0 ERROR: Unable to write oov file $oovFileName\n"; + +$numUtterances = $numSpeakers = $numWaveforms = 0; +$totalSpeech = $totalSpeechSq = 0.0; +foreach $utteranceID (sort keys %transcription) { + $fileID = $baseFileID{$utteranceID}; + if (exists $waveformName{$fileID}) { + # There are matching transcriptions and audio + $numUtterances++; + $totalSpeech += ($endTime{$utteranceID} - $startTime{$utteranceID}); + $totalSpeechSq += (($endTime{$utteranceID} - $startTime{$utteranceID}) + *($endTime{$utteranceID} - $startTime{$utteranceID})); + print TEXT ("$utteranceID $transcription{$utteranceID}\n"); + print UTT2SPK ("$utteranceID $speakerID{$utteranceID}\n"); + print SEGMENTS ("$utteranceID $fileID $startTime{$utteranceID} $endTime{$utteranceID}\n"); + if (exists $uttList{$speakerID{$utteranceID}}) { + $uttList{$speakerID{$utteranceID}} .= " $utteranceID"; + } else { + $numSpeakers++; + $uttList{$speakerID{$utteranceID}} = "$utteranceID"; + } + next if (exists $scpEntry{$fileID}); + $numWaveforms++; + if ($waveformName{$fileID} =~ /.*\.sph/ ) { + $scpEntry{$fileID} = "$SPH2PIPE $waveformName{$fileID} |"; + } else { + $scpEntry{$fileID} = "$SOXBINARY $waveformName{$fileID} $SOXFLAGS |"; + } + } else { + print STDERR ("$0 WARNING: No audio file for transcription $utteranceID\n"); + } +} +foreach $fileID (sort keys %scpEntry) { + print SCP ("$fileID $scpEntry{$fileID}\n"); +} +foreach $speakerID (sort keys %uttList) { + print SPK2UTT ("$speakerID $uttList{$speakerID}\n"); +} +foreach $w (sort keys %oovCount) { + print OOV ("$w\t$oovCount{$w}\n"); +} +exit(1) unless (close(TEXT) && close(UTT2SPK) && close(SEGMENTS) && close(SCP) && close(SPK2UTT) && close(OOV)); + +print STDERR ("$0: Summary\n"); +print STDERR ("\tWrote $numUtterances lines each to text, utt2spk and segments\n"); +print STDERR ("\tWrote $numWaveforms lines to wav.scp\n"); +print STDERR ("\tWrote $numSpeakers lines to spk2utt\n"); +print STDERR ("\tHmmm ... $numSpeakers distinct speakers in this corpus? Unusual!\n") + if (($numSpeakers<($numUtterances/500.0)) || ($numSpeakers>($numUtterances/2.0))); +print STDERR ("\tTotal # words = $numWords (including $numOOV OOVs) + $numSilence $silence\n") + if ($vocabFile); +printf STDERR ("\tAmount of speech = %.2f hours (including some due to $silence)\n", $totalSpeech/3600.0); +if ($numUtterances>0) { + printf STDERR ("\tAverage utterance length = %.2f sec +/- %.2f sec, and %.2f words\n", + $totalSpeech /= $numUtterances, + sqrt(($totalSpeechSq/$numUtterances)-($totalSpeech*$totalSpeech)), + $numWords/$numUtterances); +} + +exit(0); + +######################################################################## +# Done! +######################################################################## diff --git a/egs/babel/s5d/local/prepare_extended_lexicon.sh b/egs/babel/s5d/local/prepare_extended_lexicon.sh new file mode 100644 index 00000000000..3cc5ca6c21f --- /dev/null +++ b/egs/babel/s5d/local/prepare_extended_lexicon.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +unk_fraction_boost=1.0 +num_sent_gen=12000000 +num_prons=1000000 +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +# Extend the original lexicon. +# Will creates the files data/local/extend/{lexiconp.txt,oov2prob}. +local/extend_lexicon.sh --cmd "$train_cmd" --cleanup false \ + --num-sent-gen $num_sent_gen --num-prons $num_prons \ + data/local/lexicon.txt data/local/lang_ext data/dev2h/text + + +extend_lexicon_param=() +[ -f data/local/extend/original_oov_rates ] || exit 1; +unk_fraction=`cat data/local/extend/original_oov_rates |\ + grep "token" | awk -v x=$unk_fraction_boost '{print $NF/100.0*x}'` +extend_lexicon_param=(--cleanup false --unk-fraction $unk_fraction \ + --oov-prob-file data/local/lang_ext/oov2prob) + +cp -r data/lang data/lang_ext +local/arpa2G.sh ${extend_lexicon_param[@]} \ + data/srilm/lm.gz data/lang_ext data/lang_ext + diff --git a/egs/babel/s5d/local/prepare_lexicon.pl b/egs/babel/s5d/local/prepare_lexicon.pl new file mode 100755 index 00000000000..ff128f07637 --- /dev/null +++ b/egs/babel/s5d/local/prepare_lexicon.pl @@ -0,0 +1,404 @@ +#!/usr/bin/env perl +use Getopt::Long; +use Data::Dumper; + +############################################################################### +# +# Convert a Babel-formatted dictionary to work with Kaldi, and optionally +# add non-speech "words" that appear in the transcription. e.g. +# +# Convert dictionary from entries of the form +# +# WORD Romanization pronunciation1 pronunciation2 ... +# +# where each pronunciation has syllable boundaries [.#] and tags _X, " or % +# +# Phone1 Phone2 _TAG . Phone1 Phone2 Phone3 _TAG +# +# and so on, e.g. +# +# 㓤 gat1 g 6 t _1 h O: t _3 k i: t _1 +# 兄妹 hing1mui2 h i: N _1 . m u:j _2 h i: N _1 . m u:j _6 +# +# to entries of the form +# +# 㓤 g_1 6_1 t_1 +# 㓤 h_3 O:_3 t_3 +# 㓤 k_1 i:_1 t_1 +# 兄妹 h_1 i:_1 N_1 m_2 u:j_2 +# 兄妹 h_1 i:_1 N_1 m_6 u:j_6 +# +# +# Write only one pronunciation per line +# Transfer any tags, prefixed by underscores, to phones in the syllable +# Remove the syllable boundary markers, given by periods or pound signs +# +# NOTE: The Romainzation is present only for some languages. See -r option. +# +# This script will create 5 new files +# +# - lexicon.txt: words from the original lexicon + some non-speech "words" +# + $OOV_symbol = ""; # Default OOV symbol: pronunciation + $vocalNoise = ""; # Vocal noise symvol: pronunciation + $nVoclNoise = ""; # Nonvocal noise: pronunciation + $silence = ""; # Silence > 1 second: pronunciation $sil + $icu_transform = ""; + $phonemap=""; +# +# - nonsilence_phones.txt: tagged phones from the new lexicon +# +# - optional_silence.txt: phones used to model silence in acoustic training +# + $sil = "SIL"; # Also the pronunciation of the word token $silence +# +# - silence_phones.txt: $sil and special phones for non-speech "words" +# +# - extra_questions.txt: sets of phones of the form *_TAG, one set per line +# +# The last file provides sets of phones that share a tag, so that questions can +# effectively be asked about the tag of a neighboring phone during clustering. +# +############################################################################### + +GetOptions("add=s" => \$nsWordsFile, + "oov=s" => \$OOV_symbol, + "romanized!" => \$romanized, + "sil=s" => \$sil, + "icu-transform=s" => \$icu_transform, + "phonemap=s" => \$phonemap + ); + +if ($#ARGV == 1) { + $inDict = $ARGV[0]; + $outDir = $ARGV[1]; + print STDERR ("$0: $inDict $outDir\n"); + print STDERR ("\tNon-speech words will be added from $nsWordsFile\n") if ($nsWordsFile); + print STDERR ("\tUnknown words will be represented by \"$OOV_symbol\"\n") unless ($OOV_symbol eq ""); + print STDERR ("\tRomanized forms of words expected in the dictionary\n") if ($romanized); + print STDERR ("\tThe optional silence phone will be \"$OOV_symbol\"\n") unless ($sil eq "SIL"); + print STDERR ("\tThe ICU transform for case-conversion will be: \"$icu_transform\"\n") if ($icu_transform); +} else { + print STDERR ("Usage: $0 [--options] BabelDictionary OutputDir\n"); + print STDERR ("\t--add Add these nonspeech words to lexicon\n"); + print STDERR ("\t--oov Use this symbol for OOV words (default )\n"); + print STDERR ("\t--romanized Dictionary contains (omissible) romanized word-forms\n"); + print STDERR ("\t--phonemap During reading the dictionary, perform the specified \n"); + print STDERR ("\t phoneme mapping. The format is: p1=p1' p2' p3';p2=p4'\n"); + print STDERR ("\t where p1 and p2 are existing phonemes and p1'..p4' are\n"); + print STDERR ("\t either new or existing phonemes\n"); + print STDERR ("\t--icu-transform ICU transform to be used during the ICU transliteration\n"); + exit(1); +} + +unless (-d $outDir) { + print STDERR ("$0: Creating output directory $outDir\n"); + die "Unable to create output directory $outDir" + if system("mkdir -p $outDir"); # mkdir returned with status != 0 +} +$outLex = "$outDir/lexicon.txt"; +$nspFile = "$outDir/nonsilence_phones.txt"; +$spFile = "$outDir/silence_phones.txt"; +$osFile = "$outDir/optional_silence.txt"; +$exqFile = "$outDir/extra_questions.txt"; + + +#The phonemap is in the form of "ph1=a b c;ph2=a f g;...." +%phonemap_hash; +if ($phonemap) { + $phonemap=join(" ", split(/\s+/, $phonemap)); + print $phonemap . "\n"; + @phone_map_instances=split(/;/, $phonemap); + foreach $instance (@phone_map_instances) { + ($phoneme, $tgt) = split(/=/, $instance); + $phoneme =~ s/^\s+|\s+$//g; + $tgt =~ s/^\s+|\s+$//g; + #print "$phoneme=>$tgt\n"; + @tgtseq=split(/\s+/,$tgt); + $phonemap_hash{$phoneme} = []; + push @{$phonemap_hash{$phoneme}}, @tgtseq; + } +} + +#print Dumper(\%phonemap_hash); + +############################################################################### +# Read input lexicon, write output lexicon, and save the set of phones & tags. +############################################################################### + + +open (INLEX, $inDict) + || die "Unable to open input dictionary $inDict"; + +open (OUTLEX, "| sort -u > $outLex") + || die "Unable to open output dictionary $outLex"; + +$numWords = $numProns = 0; +while ($line=) { + chomp; + ############################################### + # Romainzed forms necessitate \t\S+ below, else + # if ($line =~ m:^([^\t]+)(\t[^\t]+)+$:) { + ############################################### + if ( ($romanized && ($line =~ m:^([^\t]+)\t\S+((\t[^\t]+)+)$:)) || + ((!$romanized) && ($line =~ m:^([^\t]+)((\t[^\t]+)+)$:)) ) { + $word = $1; + + if ( $icu_transform ) { + $xform_word=`echo \"$word\" | uconv -f utf8 -t utf8 -x \"$icu_transform\"`; + chop $xform_word; + #print $xform_word; + #$xform_word="[$word]$xform_word"; + } else { + $xform_word=$word; + } + $prons = $2; + $prons =~ s:^\s+::; # Remove leading white-space + $prons =~ s:\s+$::; # Remove trailing white-space + @pron = split("\t", $prons); + for ($p=0; $p<=$#pron; ++$p) { + $new_pron = ""; + while ($pron[$p] =~ s:^([^\.\#]+)[\.\#]{0,1}::) { push (@syllables, $1); } + while ($syllable = shift @syllables) { + $syllable =~ s:^\s+::; + $syllable =~ s:\s+$::; + $syllable =~ s:\s+: :g; + @original_phones = split(" ", $syllable); + @substituted_original_phones=(); + + foreach $phone (@original_phones) { + if (defined $phonemap_hash{$phone} ) { + #print "Sub: $phone => " . join (' ', @{$phonemap_hash{$phone}}) . "\n"; + push @substituted_original_phones, @{$phonemap_hash{$phone}}; + } else { + push @substituted_original_phones, $phone; + } + } + #print join(' ', @original_phones) . "=>" . join(' ',@substituted_original_phones) . "\n"; + @original_phones = @substituted_original_phones; + + $sylTag = ""; + $new_phones = ""; + while ($phone = shift @original_phones) { + if ($phone =~ m:^\_\S+:) { + # It is a tag; save it for later + $is_original_tag{$phone} = 1; + $sylTag .= $phone; + } elsif ($phone =~ m:^[\"\%]$:) { + # It is a stress marker; save it like a tag + $phone = "_$phone"; + $is_original_tag{$phone} = 1; + $sylTag .= $phone; + } elsif ( $phone =~ m:_:) { + # It is a phone containing "_" (underscore) + $new_phone=$phone; + $new_phone=~ s/\_//g; + if (( $is_original_phone{$phone} ) and not defined( $substituted_phones{phone}) ) { + die "ERROR, the $new_phone and $phone are both existing phones, so we cannot do automatic map!"; + } else { + print STDERR "WARNING, phone $phone was replaced with $new_phone\n" unless $substituted_phones{$phone}; + } + $is_original_phone{$new_phone} = "$new_phone"; + $substituted_phones{$phone} = $new_phone; + $new_phones .= " $new_phone"; + } else { + # It is a phone + if ( $substituted_phones{phone} ) { + die "ERROR, the $new_phone and $phone are both existing phones, so we cannot do automatic map!"; + } + $is_original_phone{$phone} = "$phone"; + $new_phones .= " $phone"; + } + } + $new_phones =~ s:(\S+):$1${sylTag}:g; + $new_pron .= $new_phones . "\t"; # the tab added by Dan, to keep track of + # syllable boundaries. + $is_compound_tag{$sylTag} = 1; + while ($new_phones =~ s:^\s*(\S+)::) { $is_new_phone{$1} = 1; } + } + $new_pron =~ s:^\s+::; + print OUTLEX ("$xform_word\t$new_pron\n"); + $numProns++; + } + @pron = (); + $numWords++; + } else { + print STDERR ("$0 WARNING: Skipping unparsable line $. in $inDict\n"); + } +} +close(INLEX) + && print STDERR ("$0: Read $numWords entries from $inDict\n"); + +############################################################################### +# Read a list of non-speech words if given, and write their "pronunciations" +# - Such lexicon entries are typically created for , etc. +# - If provided explicitly, they each get their own private phone models +# - Otherwise, they are mapped to an OOV symbol with a shared phone +# - All such phones are grouped with the $sil phone for clustering purposes, +# which means that they remain context-independent and form a question set. +############################################################################### + +if ($nsWordsFile) { + open (NSW, $nsWordsFile) + || die "Unable to open non-speech words file $nsWordsFile"; + $numNSWords = 0; + while ($line=) { + next unless ($line =~ m:^\s*([^\s]+)\s*:); # Take the first word if present + print OUTLEX ("$1\t$1\n"); # The word itself is its pronunciation + $is_silence_phone{$1} = 1; # Add it to the list of silence phones + $numProns++; + $numNSWords++; + } + close(NSW) + && print STDERR ("$0: Adding $numNSWords non-speech words from $nsWordsFile to $outLex\n"); +} + +# Add the OOV symbol to the lexicon +print OUTLEX ("$OOV_symbol\t\n"); # The symbol is assumed not to be +$is_silence_phone{""} = 1; # a phone in the original lexicon :-) +$numProns++; + +# Add the vocal noise symbol to the lexicon +print OUTLEX ("$vocalNoise\t\n"); # The symbol is assumed not to be +$is_silence_phone{""} = 1; # a phone in the original lexicon :-) +$numProns++; + +# Add the nonvocal noise symbol to the lexicon +print OUTLEX ("$nVoclNoise\t\n"); # The symbol is assumed not to be +$is_silence_phone{""} = 1; # a phone in the original lexicon :-) +$numProns++; + +# Finally, add the silence symbol to the lexicon +print OUTLEX ("$silence\t$sil\n"); +$is_silence_phone{$sil} = 1; +$numProns++; + +close(OUTLEX) + && print STDERR ("$0: Wrote $numProns pronunciations to $outLex\n"); + +############################################################################### +# - nonsilence_phones.txt: tagged phones from the new lexicon, 1 phone/line +############################################################################### + +foreach $phone (sort keys %is_new_phone) { + $tagLess_phone = $phone; + $tagLess_phone =~ s:^([^_]+).*:$1:; # underscore marks tag beginnings + if ($is_original_phone{$tagLess_phone}) { + # save $phone for writing later to the NSP file + $is_original_phone{$tagLess_phone} .= " $phone"; + } else { + print STDERR ("$0 WARNING: Skipping unexpected tagged phone $phone.\n"); + print STDERR ("\tCheck if original lexicon has phones containing \"\_\"\n"); + die "Cannot continue"; + } +} + +open (NSP, "| sort > $nspFile") + || die "Unable to write nonsilence phones to $nspFile"; +$p = 0; +foreach $phone (sort keys %is_original_phone) { + $tagged_phones = $is_original_phone{$phone}; + $tagged_phones =~ s:^\S+\s*::; # Remove the original (untagged) phone + unless ($phone eq "") { + print NSP ("$tagged_phones\n"); # Write out the remaining (tagged) phones + $p++; + } +} + +close(NSP) + && print STDERR ("$0: Wrote $p (sets of) nonsilence phones to $nspFile\n"); + +if ( $p > (0.5*$numWords) ) { + print STDERR ("$0 WARNING: Original dictionary had $numWords words, and\n"); + print STDERR ("\t\t$p nonspeech phones were found! This is highly unusual.\n"); + print STDERR ("\t\tCheck if the dictionary contains other tab-separated values\n"); + print STDERR ("\t\tthat are being mistaken for pronunciations by this script.\n"); + print STDERR ("$0 ADVICE: Use --romanized for omitting romanized word forms\n") unless ($romanized); +} + +############################################################################### +# - silence_phones.txt: $sil and special phones for non-speech "words" +############################################################################### + +open (SPF, "| sort > $spFile") + || die "Unable to write silence phones to $spFile"; +$p = 0; +foreach $phone (keys %is_silence_phone) { + print SPF ("$phone\n"); + $p++; +} +close(SPF) + && print STDERR ("$0: Wrote $p silence phones to $spFile\n"); + +############################################################################### +# - optional_silence.txt: the reserved (?) phone +############################################################################### + +$is_optional_silence{$sil} = 1; +open (OSF, "| sort > $osFile") + || die "Unable to write optional silence phones to $osFile"; +$p = 0; +foreach $phone (keys %is_optional_silence) { + print OSF ("$phone\n"); + $p++; +} +close(OSF) + && print STDERR ("$0: Wrote $p optional silence phones to $osFile\n"); + +############################################################################### +# - extra_questions.txt: sets of phones of the form *_TAG, one set per line +############################################################################### + +open (EXQ, "| sort > $exqFile") + || die "Unable to write the extra questions file $exqFile"; + +# First make sets of all tagged phones that share the (single) original tags + +$numExtraQs = 0; +foreach $tag (sort keys %is_original_tag) { + $question = ""; + foreach $phone (sort keys %is_new_phone) { + $question .= " $phone" if ($phone =~ m:$tag:); + } + $question =~ s:^\s+::; + print EXQ ("$question\n") unless ($question eq ""); + $numExtraQs++; +} +print STDERR ("$0: Found $numExtraQs unique individual tags in $inDict\n"); + +# It is possible to go overboard by creating questions with all 2^K possible +# subsets of the original tags. E.g. ($phone=~m:$tag1:)||($phone=~m:$tag2:) +# Do this by hand if it is linguistically meaningful for some language +# It is not worth doing this generically for all languages and tag sets. + +# If each syllable has only one tag, then questions with conjunctions of tags +# such as ($phone=~m:$tag1:)&&($phone=~m:$tag2:) will yield empty questions +# However, if syllables carry multiple tags, e.g. tone and stress, then one +# could similarly go overboard with conjunctions of overlapping tags. +# This too is not worth doing generically for all languages and tag sets. + +# Instead, just make sets of all tagged phones with the same new (compound) tag + +foreach $tag (sort keys %is_compound_tag) { + next if ($is_original_tag{$tag}); + $question = ""; + foreach $phone (sort keys %is_new_phone) { + $question .= " $phone" if ($phone =~ m:$tag:); + } + $question =~ s:^\s+::; + print EXQ ("$question\n") unless ($question eq ""); + $numExtraQs++; +} + +# Finally, add the silence phones as a set for use as a clustering question + +$question = ""; +foreach $phone (sort keys %is_silence_phone) { + $question .= " $phone"; +} +$question =~ s:^\s+::; +print EXQ ("$question\n") unless ($question eq ""); +$numExtraQs++; + +close(EXQ) + && print STDERR ("$0: Wrote $numExtraQs extra questions (incl compound tags and sil) to $exqFile\n"); diff --git a/egs/babel/s5d/local/prepare_stm.pl b/egs/babel/s5d/local/prepare_stm.pl new file mode 100755 index 00000000000..b4daec585e3 --- /dev/null +++ b/egs/babel/s5d/local/prepare_stm.pl @@ -0,0 +1,345 @@ +#!/usr/bin/env perl +use Getopt::Long; +use Encode; + +################################################################################ +# +# Script to prepare a NIST .stm file for scoring ASR output. Based on the files +# that are naturally created for Kaldi acoustic training: +# +# - data/segments: contains segmentID, recordingID, start-time & end-time +# +# - data/wav.scp: contains recordingID & waveform-name (or sph2pipe command) +# +# - data/utt2spk: contains segmentID % speakerID +# +# - data/text: contains segment ID and transcription +# +# The .stm file has lines of the form +# +# waveform-name channel speakerID start-time end-time [] transcription +# +# Clearly, most of the information needed for creating the STM file is present +# in the four Kaldi files mentioned above, except channel --- its value will be +# obtained from the sph2pipe command if present, or will default to "1" --- and +# from a separate demographics.tsv file. (A feature to add later?) +# +# Note: Some text filtering is done by this script, such as removing non-speech +# tokens from the transcription, e.g. , , etc. + + $fragMarkers = ""; # If given by the user, they are stripped from words + +# But two types of tokens are retained as is, if present. +# + $Hesitation = ""; # which captures hesitations, filled pauses, etc. + $OOV_symbol = ""; # which our system outputs occasionally. +# +# Note: The .stm file must be sorted by filename and channel in ASCII order and +# by the start=time in numerical order. NIST recommends the unix command +# "sort +0 -1 +1 -2 +3nb -4" +# +# This script will also produce an auxilliary file named reco2file_and_channel +# which is used by Kaldi scripts to produce output in .ctm format for scoring. +# So any channel ID assigned here will be consistent between ref and output. +# +# If the training text is Viterbi-aligned to the speech to obtain time marks, +# it should be straightforward to modify this script to produce a .ctm file: +# +# waveform-file channel start-time duration word +# +# which lists the transcriptions with word-level time marks. +# +# Note: A .ctm file must be sorted via "sort +0 -1 +1 -2 +2nb -3" +# +################################################################################ +GetOptions("fragmentMarkers=s" => \$fragMarkers, "hesitationToken=s" => \$Hesitation,"oovToken=s" => \$OOV_symbol); + +if ($#ARGV == 0) { + $inDir = $ARGV[0]; + print STDERR ("$0: Making stm file from information in $inDir\n"); + print STDERR ("\tRemoving [$fragMarkers]+ from ends of tokens\n") if ($fragMarkers); + print STDERR ("\tPreserving hesitation tokens $Hesitation\n") unless ($Hesitation eq ""); + print STDERR ("\tUsing $OOV_symbol as the OOV symbol\n") unless ($OOV_symbol eq ""); +} else { + print STDERR ("Usage: $0 [--options] DataDir\n"); + print STDERR ("\t--fragmentMarkers Strip these from ends of each token (default: none)\n"); + print STDERR ("\t--hesitationToken Preserve when deleting non-speech tokens (default: )\n"); + print STDERR ("\t--oovToken Use to replace hard-coded OOVs (default: )\n"); + exit(1); +} + +$segmentsFile = "$inDir/segments"; +$scpFile = "$inDir/wav.scp"; +$utt2spkFile = "$inDir/utt2spk"; +$textFile = "$inDir/text"; +$stmFile = "$inDir/stm"; +$charStmFile = "$inDir/char.stm"; +$reco2ctmFile = "$inDir/reco2file_and_channel"; + +################################################################################ +# Read the segmentIDs, file-IDs, start- and end-times from the segments file +################################################################################ + +my $num_failed_parses=0; +my $num_failed_parses_max=10; + +die "Current version of script requires a segments file" unless (-e $segmentsFile); + +open(SEGMENTS, $segmentsFile) + || die "Unable to read segments file $segmentsFile"; +$numSegments = 0; +while ($line=) { + @tokens = split(/\s+/, $line); + unless ($#tokens == 3) { + $num_failed_parses+=1; + print STDERR "$0: Couldn't parse line $. in $segmentsFile\n" + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses le $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached. Not warning anymore\n" + if ($num_failed_parses eq $num_failed_parses_max); + next; + } + $segmentID = shift @tokens; + if (exists $fileID{$segmentID}) { + print STDERR ("$0: Skipping duplicate segment ID $segmentID in $segmentsFile\n"); + next; + } + $fileID{$segmentID} = shift @tokens; + $startTime{$segmentID} = shift @tokens; + $endTime{$segmentID} = shift @tokens; + ++$numSegments; +} +close(SEGMENTS); +print STDERR ("$0: Read info about $numSegments segment IDs from $segmentsFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Read the waveform filenames from the wav.scp file. (Parse sph2pipe command.) +################################################################################ + +open(SCP, $scpFile) + || die "Unable to open scp file $scpFile\n"; +$numRecordings = 0; +$num_failed_parses=0; +while ($line=) { + chomp; + if ($line =~ m:^\s*(\S+)\s+(.+)$:) { + $recordingID = $1; + $waveformFile = $2; + } else { + $num_failed_parses+=1; + print STDERR ("$0: Couldn't parse line $. in $scpFile\n") + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses le $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached. Not warning anymore\n" + if ($num_failed_parses eq $num_failed_parses_max); + next; + } + if (exists $waveform{$recordingID}) { + print STDERR ("$0: Skipping duplicate recording ID $recordingID in $scpFile\n"); + # BUG ALERT: This check may need to be turned off for multi-channel recordings, + # since the same recording may appear with with different channels? + next; + } + if ($waveformFile =~ m:^\S+$:) { + # This is a single filename, no shp2pipe or gunzip for reading waveforms + $waveform{$recordingID} = $waveformFile; + } elsif (($waveformFile =~ m:(sph2pipe|gunzip|gzip|cat|zcat)\s+:) && + ($waveformFile =~ m:\s+(\S+)\s*\|$:)) { + # HACK ALERT: the filename is *assumed* to be at the END of the command + $waveform{$recordingID} = $1; + $channel{$recordingID} = $1 if ($waveformFile =~ m:sph2pipe\s+.*\-c\s+(\S+)\s+.+:); + } elsif (($waveformFile =~ m:(sox)\s+:) && + ($waveformFile =~ m:\s+(\S+)\s*\|$:)) { + # HACK ALERT: the first element that does ends with '.wav' is assumed to + # be the original filename + @elems=split(/\s+/, $waveformFile); + foreach $elem (@elems) { + if ($elem =~ m/.*\.wav/) { + $filename=$elem; + last; + } + } + die ("$0: Couldn't parse waveform filename on line $. in $scpFile\n\t$line\n") if not defined $filename; + die ("$0: Filename $filename does not exist: in $scpFile\n\t$line\n") unless (-e $filename); + + $waveform{$recordingID} = $filename; + #$channel{$recordingID} = $filename; + } else { + print STDERR ("$0: Couldn't parse waveform filename on line $. in $scpFile\n\t$line\n"); + next; + } + $waveform{$recordingID} =~ s:.+/::; # remove path prefix + $waveform{$recordingID} =~ s:\.(sph|wav)\s*$::; # remove file extension + $channel{$recordingID} = 1 # Default + unless (exists $channel{$recordingID}); + ++$numRecordings; +} +close(SCP); +print STDERR ("$0: Read filenames for $numRecordings recording IDs from $scpFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Read speaker information from the utt2spk file +################################################################################ + +open(UTT2SPK, $utt2spkFile) + || die "Unable to read utt2spk file $utt2spkFile"; +$numSegments = 0; +$num_failed_parses = 0; +while ($line=) { + @tokens = split(/\s+/, $line); + if (! ($#tokens == 1)) { + $num_failed_parses+=1; + print STDERR ("$0: Couldn't parse line $. in $utt2spkFile\n") + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses le $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached. Not warning anymore\n" + if ($num_failed_parses eq $num_failed_parses_max); + next; + } + $segmentID = shift @tokens; + if (exists $speakerID{$segmentID}) { + print STDERR ("$0: Skipping duplicate segment ID $segmentID in $utt2spkFile\n"); + next; + } + $speakerID{$segmentID} = shift @tokens; + ++$numSegments; +} +close(UTT2SPK); +print STDERR ("$0: Read speaker IDs for $numSegments segments from $utt2spkFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Read the transcriptions from the text file +################################################################################ + +open(TEXT, $textFile) + || die "Unable to read text file $textFile"; +$numSegments = $numWords = 0; +$num_failed_parses = 0; +while ($line=) { + chomp; + if ($line =~ m:^(\S+)\s+(.+)$:) { + $segmentID = $1; + $text = $2; + } else { + $num_failed_parses+=1; + print STDERR ("$0: Couldn't parse line $. in $textFile\n") + if ($num_failed_parses == 1); + print STDERR ("\tLine: $line") + if ($num_failed_parses <= $num_failed_parses_max); + print STDERR "$0: Maximal threshold for failed line parses reached ($num_failed_parses/$num_failed_parses_max). Not warning anymore\n" + if ($num_failed_parses == $num_failed_parses_max); + next; + } + if (exists $transcription{$segmentID}) { + print STDERR ("$0: Skipping duplicate segment ID $segmentID in $segmentsFile\n"); + next; + } + $transcription{$segmentID} = ""; + @tokens = split(/\s+/, $text); + # This is where one could filter the transcription as necessary. + # E.g. remove noise tokens, mark non-scoring segments, etc. + # HACK ALERT: Current version does this is an ad hoc manner! + while ($w = shift(@tokens)) { + # Substitute OOV tokens specific to the Babel data + $w = $OOV_symbol if ($w eq "(())"); + # Remove fragMarkers, if provided, from either end of the word + $w =~ s:(^[$fragMarkers]|[$fragMarkers]$)::g if ($fragMarkers); + # Omit non-speech symbols such as , , etc. + $w =~ s:^<[^>]+>$:: unless (($w eq $OOV_symbol) || ($w eq $Hesitation)); + next if ($w eq ""); + $transcription{$segmentID} .= " $w"; + $numWords++; + } + $transcription{$segmentID} =~ s:^\s+::; # Remove leading white space + $transcription{$segmentID} =~ s:\s+$::; # Remove training white space + $transcription{$segmentID} =~ s:\s+: :g; # Normalize remaining white space + # Transcriptions containing no words, or only OOVs and hesitations are not scored + $transcription{$segmentID} = "IGNORE_TIME_SEGMENT_IN_SCORING" + if (($transcription{$segmentID} eq "") || + ($transcription{$segmentID} =~ m:^(($OOV_symbol|$Hesitation)\s*)+$:)); + ++$numSegments; +} +close(TEXT); +print STDERR ("$0: Read transcriptions for $numSegments segments ($numWords words) from $textFile\n"); +print STDERR ("$0: In total $num_failed_parses lines failed to be parsed.\n"); + +################################################################################ +# Write the transcriptions in stm format to a file named stm +################################################################################ + +print STDERR ("$0: Overwriting existing stm file $stmFile\n") + if (-s $stmFile); +open(STM, "| sort +0 -1 +1 -2 +3nb -4 > $stmFile") + || die "Unable to write to stm file $stmFile"; +$numSegments = 0; +foreach $segmentID (sort keys %fileID) { + if (exists $waveform{$fileID{$segmentID}}) { + printf STM ("%s %s %s %.2f %.2f", + $waveform{$fileID{$segmentID}}, + $channel{$fileID{$segmentID}}, + $speakerID{$segmentID}, + $startTime{$segmentID}, + $endTime{$segmentID}); + printf STM (" <%s>", $attributes{$segmentID}) if (exists $attributes{$segmentID}); + printf STM (" %s\n", $transcription{$segmentID}); + ++$numSegments; + } else { + print STDERR ("$0: No waveform found for segment $segmentID, file $fileID{$segmentID}\n"); + } +} +close(STM); +print STDERR ("$0: Wrote reference transcriptions for $numSegments segments to $stmFile\n"); + +################################################################################ +# Write a character-separated stm file as well, for CER computation +################################################################################ + +print STDERR ("$0: Overwriting existing stm file $charStmFile\n") + if (-s $charStmFile); +open(STM, "$stmFile") + || die "Unable to read back stm file $stmFile"; +binmode STM,":encoding(utf8)"; +open(CHARSTM, "> $charStmFile") + || die "Unable to write to char.stm file $charStmFile"; +binmode CHARSTM,":encoding(utf8)"; +while ($line=) { + @tokens = split(/\s+/, $line); + # The first 5 tokens are filename, channel, speaker, start- and end-time + for ($n=0; $n<5; $n++) { + $w = shift @tokens; + print CHARSTM ("$w "); + } + # CER is used only for some scripts, e.g. CJK. So only non-ASCII characters + # in the remaining tokens should be split into individual tokens. + $w = join (" ", @tokens); + $w =~ s:([^\x00-\x7F])(?=[^\x00-\x7F]):$1 :g; # split adjacent non-ASCII chars + print CHARSTM ("$w\n"); +} +close(CHARSTM); +close(STM); +print STDERR ("$0: Wrote char.stm file $charStmFile\n"); + +################################################################################ +# Write the reco2file_and_channel file for use by Kaldi scripts +################################################################################ + +print STDERR ("$0: Overwriting existing reco2file_and_channel file $reco2ctmFile\n") + if (-s $reco2ctmFile); +open(RECO2CTM, "| sort > $reco2ctmFile") + || die "Unable to write to reco2file_and_channel file $reco2ctmFile"; +$numRecordings = 0; +foreach $recordingID (sort keys %waveform) { + printf RECO2CTM ("%s %s %s\n", $recordingID, $waveform{$recordingID}, $channel{$recordingID}); + ++$numRecordings; +} +close(RECO2CTM); +print STDERR ("$0: Wrote file_and_channel info for $numRecordings recordings to $reco2ctmFile\n"); + +print STDERR ("$0: Done!\n"); +exit(0); diff --git a/egs/babel/s5d/local/prepare_unicode_lexicon.py b/egs/babel/s5d/local/prepare_unicode_lexicon.py new file mode 100755 index 00000000000..ec2d9e64c37 --- /dev/null +++ b/egs/babel/s5d/local/prepare_unicode_lexicon.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python + +# Copyright 2016 Johns Hopkins University (Author: Matthew Wiesner) +# Apache 2.0 + +# ======= Prepare data/local directory for babel data with unicode tags ====== +# This script creates all files in the data/local directory for babel formats, +# except for the filtered_lexicon.txt file which is created by the +# make_lexicon_subset.sh script. +# +# This script basically takes the place of the prepare_lexicon.pl script. It +# creates the following files. +# +# 1. lexicon.txt (via local/lexicon/make_unicode_lexicon.py which happens prior +# to running this script. +# 2. nonsilence_phones.txt +# 3. silence_phones.txt +# 4. optional_silence.txt +# 5. extra_questions.txt +# ============================================================================ + +from __future__ import print_function +import codecs +import sys +import os +import argparse + +SKIP = ("", "''", "<", ">", "#") + + +# Extract a sorted set of distinct unicode graphemes from the lexicon +def extract_graphemes(table): + ''' + Extract a sorted set of distinct unicode graphemes from the lexicon. + + Usage: extract_graphemes(PATH_TO_LEXICON_TABLE) + + Arguments: + table -- path to the lexicon table output by make_unicode_lexicon.py + + Output: + unicode_graphemes -- the sorted set of distinct unicode graphemes + that occurred in the lexicon. + ''' + with codecs.open(table, "r", "utf-8") as fp: + + # Get relevant header columns for extracting graphemes used in lexicon + # -------------------------------------------------------------------- + header = fp.readline() + idx = [] + for i, j in enumerate(header.strip().split('\t')): + if j.startswith("MAP"): + idx.append(i) + + # -------------------------------------------------------------------- + # Extract all unique graphemes. Place into formats ... + # 1. unicode_graphemes = [g1, g2, g3, ... , gN] + # + # 2. Grapheme dict as keys for each base (without tags) grapheme along + # with all distinct graphmes starting with the base grapheme. + # phones_dict = {p1: p1_with_tags_1, p1_with_tags_2, ... , p2: ... } + # -------------------------------------------------------------------- + unicode_graphemes = [] + graphemes_dict = {} + for line in fp: + for i in idx: + grapheme = line.strip().split('\t')[i] + if grapheme not in SKIP: + unicode_graphemes.append(grapheme) + + # Create the sorted set of distinct unicode graphemes in the lexicon + unicode_graphemes = sorted(set(unicode_graphemes)) + for g in unicode_graphemes: + base_graph = g.split("_")[0] + if(base_graph not in graphemes_dict.keys()): + graphemes_dict[base_graph] = [] + + graphemes_dict[base_graph].append(g) + + return unicode_graphemes, graphemes_dict + + +def write_nonsilence_phones(graphemes_dict, nonsilence_phones, + extraspeech=None): + with codecs.open(nonsilence_phones, "w", "utf-8") as fp: + try: + with codecs.open(extraspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + fp.write("%s\n" % line_vals[1]) + except (IOError, TypeError): + pass + + # Write each base grapheme with all tags on the same line + for base_grapheme in sorted(graphemes_dict.keys()): + line = "" + for grapheme in graphemes_dict[base_grapheme]: + line += grapheme + " " + fp.write("%s\n" % line.strip()) + + +def write_extra_questions(unicode_graphemes, graphemes_dict, tags, + extra_questions, nonspeech=None, extraspeech=None): + with codecs.open(extra_questions, "w", "utf-8") as fp: + # Write all unique "phones" but graphemes in this case, plus to a + # single line. + + # Write the extraspeech + try: + with codecs.open(extraspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + fp.write("%s " % line_vals[1]) + except (IOError, TypeError): + pass + + for g in unicode_graphemes: + fp.write("%s " % g) + fp.write("\n") + + # Write the nonspeech + try: + with codecs.open(nonspeech, "r", "utf-8") as f: + for line in f: + line_vals = line.strip().split() + fp.write("%s " % line_vals[1]) + fp.write("\n") + except (IOError, TypeError): + pass + + # Write all possible phone_tag combinations that occur in the lexicon + for tag in tags: + for g in graphemes_dict.keys(): + tagged_grapheme = "_".join([g, tag]) + if(tagged_grapheme in graphemes_dict[g]): + fp.write("%s " % tagged_grapheme) + fp.write("\n") + + +def main(): + # --------------- Extract unicode_graphemes from the table -------------- + if(len(sys.argv[1:]) == 0): + print("Usage: local/prepare_unicode_lexicon.txt " + " " ) + sys.exit(1) + + parser = argparse.ArgumentParser() + parser.add_argument("table", help="Table containing all information about" + " how to map unicode graphemes to unicode descriptors") + parser.add_argument("lex_dir", help="Directory to which all files" + " should be written") + parser.add_argument("--nonspeech", help="File with map of nonspeech words", + action="store", default=None) + parser.add_argument("--extraspeech", help="File with map of extraspeech" + " words", action="store", default=None) + args = parser.parse_args() + unicode_graphemes, graphemes_dict = extract_graphemes(args.table) + + # ---------------- Prepare the directory data/local and a few files ------ + # Create the data/local directory if it does not yet exist + if not os.path.exists(args.lex_dir): + os.makedirs(args.lex_dir) + + # Write the slience_phones.txt file + with open(os.path.join(args.lex_dir, "silence_phones.txt"), "w") as fo: + with open(args.nonspeech, "r") as fi: + for line in fi: + line_vals = line.strip().split() + fo.write("%s\n" % line_vals[1]) + + # Write the optional_silence.txt file + with open(os.path.join(args.lex_dir, "optional_silence.txt"), "w") as fp: + fp.write("SIL\n") + + # --------------- Write the nonsilence_phones.txt file ------------------- + write_nonsilence_phones(graphemes_dict, + os.path.join(args.lex_dir, "nonsilence_phones.txt"), + extraspeech=args.extraspeech) + + # ------------------------- Extract tags --------------------------------- + tags = [] + for g in unicode_graphemes: + # Only consider graphemes with tags + g_tags = g.split("_") + if(len(g_tags) > 1): + tag = "_".join(g_tags[1:]) + if(tag not in tags): + tags.append(tag) + + # --------------- Write the extra questions file ------------------------- + write_extra_questions(unicode_graphemes, graphemes_dict, tags, + os.path.join(args.lex_dir, "extra_questions.txt"), + nonspeech=args.nonspeech, + extraspeech=args.extraspeech) + + +if __name__ == "__main__": + main() diff --git a/egs/babel/s5d/local/reestimate_langp.sh b/egs/babel/s5d/local/reestimate_langp.sh new file mode 100755 index 00000000000..059fba52043 --- /dev/null +++ b/egs/babel/s5d/local/reestimate_langp.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +unk="" +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +datadir=$1 +langdir=$2 +idict=$3 +amdir=$4 +odict=$5 +olocallang=$6 +olang=$7 + + +mkdir -p $odict +mkdir -p $olang +mkdir -p $olocallang +steps/get_prons.sh --cmd "$train_cmd" $datadir $langdir $amdir +utils/dict_dir_add_pronprobs.sh --max-normalize true $idict \ + $amdir/pron_counts_nowb.txt $amdir/sil_counts_nowb.txt \ + $amdir/pron_bigram_counts_nowb.txt $odict + +utils/prepare_lang.sh --phone-symbol-table $langdir/phones.txt \ + $odict "$unk" $olocallang $olang + diff --git a/egs/babel/s5d/local/resegment/evaluate_segmentation.pl b/egs/babel/s5d/local/resegment/evaluate_segmentation.pl new file mode 100755 index 00000000000..9d865cca8c9 --- /dev/null +++ b/egs/babel/s5d/local/resegment/evaluate_segmentation.pl @@ -0,0 +1,198 @@ +#!/usr/bin/env perl + +# Copyright 2014 Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar +# Apache 2.0 + +################################################################################ +# +# This script was written to check the goodness of automatic segmentation tools +# It assumes input in the form of two Kaldi segments files, i.e. a file each of +# whose lines contain four space-separated values: +# +# UtteranceID FileID StartTime EndTime +# +# It computes # missed frames, # false positives and # overlapping frames. +# +################################################################################ + +if ($#ARGV == 1) { + $ReferenceSegmentation = $ARGV[0]; + $HypothesizedSegmentation = $ARGV[1]; + printf STDERR ("Comparing reference segmentation\n\t%s\nwith proposed segmentation\n\t%s\n", + $ReferenceSegmentation, + $HypothesizedSegmentation); +} else { + printf STDERR "This program compares the reference segmenation with the proposted segmentation\n"; + printf STDERR "Usage: $0 reference_segments_filename proposed_segments_filename\n"; + printf STDERR "e.g. $0 data/dev10h/segments data/dev10h.seg/segments\n"; + exit (0); +} + +################################################################################ +# First read the reference segmentation, and +# store the start- and end-times of all segments in each file. +################################################################################ + +open (SEGMENTS, "cat $ReferenceSegmentation | sort -k2,2 -k3n,3 -k4n,4 |") + || die "Unable to open $ReferenceSegmentation"; +$numLines = 0; +while ($line=) { + chomp $line; + @field = split("[ \t]+", $line); + unless ($#field == 3) { + exit (1); + printf STDERR "Skipping unparseable line in file $ReferenceSegmentation\n\t$line\n"; + next; + } + $fileID = $field[1]; + unless (exists $firstSeg{$fileID}) { + $firstSeg{$fileID} = $numLines; + $actualSpeech{$fileID} = 0.0; + $hypothesizedSpeech{$fileID} = 0.0; + $foundSpeech{$fileID} = 0.0; + $falseAlarm{$fileID} = 0.0; + $minStartTime{$fileID} = 0.0; + $maxEndTime{$fileID} = 0.0; + } + $refSegName[$numLines] = $field[0]; + $refSegStart[$numLines] = $field[2]; + $refSegEnd[$numLines] = $field[3]; + $actualSpeech{$fileID} += ($field[3]-$field[2]); + $minStartTime{$fileID} = $field[2] if ($minStartTime{$fileID}>$field[2]); + $maxEndTime{$fileID} = $field[3] if ($maxEndTime{$fileID}<$field[3]); + $lastSeg{$fileID} = $numLines; + ++$numLines; +} +close(SEGMENTS); +print STDERR "Read $numLines segments from $ReferenceSegmentation\n"; + +################################################################################ +# Process hypothesized segments sequentially, and gather speech/nonspeech stats +################################################################################ + +open (SEGMENTS, "cat $HypothesizedSegmentation | sort -k2,2 -k1,1 |") + # Kaldi segments files are sorted by UtteranceID, but we re-sort them here + # so that all segments of a file are read together, sorted by start-time. + || die "Unable to open $HypothesizedSegmentation"; +$numLines = 0; +$totalHypSpeech = 0.0; +$totalFoundSpeech = 0.0; +$totalFalseAlarm = 0.0; +$numShortSegs = 0; +$numLongSegs = 0; +while ($line=) { + chomp $line; + @field = split("[ \t]+", $line); + unless ($#field == 3) { + exit (1); + printf STDERR "Skipping unparseable line in file $HypothesizedSegmentation\n\t$line\n"; + next; + } + $fileID = $field[1]; + $segStart = $field[2]; + $segEnd = $field[3]; + if (exists $firstSeg{$fileID}) { + # This FileID exists in the reference segmentation + # So gather statistics for this UtteranceID + $hypothesizedSpeech{$fileID} += ($segEnd-$segStart); + $totalHypSpeech += ($segEnd-$segStart); + if (($segStart>=$maxEndTime{$fileID}) || ($segEnd<=$minStartTime{$fileID})) { + # This entire segment is a false alarm + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } else { + # This segment may overlap one or more reference segments + $p = $firstSeg{$fileID}; + while ($refSegEnd[$p]<=$segStart) { + ++$p; + } + # The overlap, if any, begins at the reference segment p + $q = $lastSeg{$fileID}; + while ($refSegStart[$q]>=$segEnd) { + --$q; + } + # The overlap, if any, ends at the reference segment q + if ($q<$p) { + # This segment sits entirely in the nonspeech region + # between the two reference speech segments q and p + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } else { + if (($segEnd-$segStart)<0.20) { + # For diagnosing Pascal's VAD segmentation + print STDOUT "Found short speech region $line\n"; + ++$numShortSegs; + } elsif (($segEnd-$segStart)>60.0) { + ++$numLongSegs; + # For diagnosing Pascal's VAD segmentation + print STDOUT "Found long speech region $line\n"; + } + # There is some overlap with segments p through q + for ($s=$p; $s<=$q; ++$s) { + if ($segStart<$refSegStart[$s]) { + # There is a leading false alarm portion before s + $falseAlarm{$fileID} += ($refSegStart[$s]-$segStart); + $totalFalseAlarm += ($refSegStart[$s]-$segStart); + $segStart=$refSegStart[$s]; + } + $speechPortion = ($refSegEnd[$s]<$segEnd) ? + ($refSegEnd[$s]-$segStart) : ($segEnd-$segStart); + $foundSpeech{$fileID} += $speechPortion; + $totalFoundSpeech += $speechPortion; + $segStart=$refSegEnd[$s]; + } + if ($segEnd>$segStart) { + # There is a trailing false alarm portion after q + $falseAlarm{$fileID} += ($segEnd-$segStart); + $totalFalseAlarm += ($segEnd-$segStart); + } + } + } + } else { + # This FileID does not exist in the reference segmentation + # So all this speech counts as a false alarm + exit (1); + printf STDERR ("Unexpected fileID in hypothesized segments: %s", $fileID); + $totalFalseAlarm += ($segEnd-$segStart); + } + ++$numLines; +} +close(SEGMENTS); +print STDERR "Read $numLines segments from $HypothesizedSegmentation\n"; + +################################################################################ +# Now that all hypothesized segments have been processed, compute needed stats +################################################################################ + +$totalActualSpeech = 0.0; +$totalNonSpeechEst = 0.0; # This is just a crude estimate of total nonspeech. +foreach $fileID (sort keys %actualSpeech) { + $totalActualSpeech += $actualSpeech{$fileID}; + $totalNonSpeechEst += $maxEndTime{$fileID} - $actualSpeech{$fileID}; + ####################################################################### + # Print file-wise statistics to STDOUT; can pipe to /dev/null is needed + ####################################################################### + printf STDOUT ("%s: %.2f min actual speech, %.2f min hypothesized: %.2f min overlap (%d\%), %.2f min false alarm (~%d\%)\n", + $fileID, + ($actualSpeech{$fileID}/60.0), + ($hypothesizedSpeech{$fileID}/60.0), + ($foundSpeech{$fileID}/60.0), + ($foundSpeech{$fileID}*100/($actualSpeech{$fileID}+0.01)), + ($falseAlarm{$fileID}/60.0), + ($falseAlarm{$fileID}*100/($maxEndTime{$fileID}-$actualSpeech{$fileID}+0.01))); +} + +################################################################################ +# Finally, we have everything needed to report the segmentation statistics. +################################################################################ + +printf STDERR ("------------------------------------------------------------------------\n"); +printf STDERR ("TOTAL: %.2f hrs actual speech, %.2f hrs hypothesized: %.2f hrs overlap (%d\%), %.2f hrs false alarm (~%d\%)\n", + ($totalActualSpeech/3600.0), + ($totalHypSpeech/3600.0), + ($totalFoundSpeech/3600.0), + ($totalFoundSpeech*100/($totalActualSpeech+0.000001)), + ($totalFalseAlarm/3600.0), + ($totalFalseAlarm*100/($totalNonSpeechEst+0.000001))); +printf STDERR ("\t$numShortSegs segments < 0.2 sec and $numLongSegs segments > 60.0 sec\n"); +printf STDERR ("------------------------------------------------------------------------\n"); diff --git a/egs/babel/s5d/local/resegment/generate_segments.sh b/egs/babel/s5d/local/resegment/generate_segments.sh new file mode 100755 index 00000000000..95e88deb87d --- /dev/null +++ b/egs/babel/s5d/local/resegment/generate_segments.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0 + +set -o pipefail +set -e + +nj=8 +cmd=run.pl +stage=0 +segmentation_opts="--isolated-resegmentation --min-inter-utt-silence-length 1.0 --silence-proportion 0.05" +decoder_extra_opts="" +reference_rttm= +get_text=false # Get text corresponding to new segments in ${output_dir} + # Assuming text is in $data/$type directory. + # Does not work very well because the data does not get aligned to many training transcriptions. +noise_oov=false # Treat as noise instead of speech +beam=7.0 +max_active=1000 + +#debugging stuff +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +set -u + +if [ $# -ne 5 ]; then + echo "Usage: $0 [options] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --nj # Number of parallel jobs. " + echo " For the standard data directories of dev10h, dev2h and eval" + echo " this is taken from the lang.conf file" + echo " --segmentation-opts '--opt1 opt1val --opt2 opt2val' # options for segmentation.py" + echo " --reference-rttm # Reference RTTM file that will be used for analysis of the segmentation" + echo " --get-text (true|false) # Convert text from base data directory to correspond to the new segments" + echo + echo "e.g.:" + echo "$0 data/dev10h data/lang exp/tri4b_seg exp/tri4b_resegment_dev10h" + exit 1 +fi + +datadir=$1 # The base data directory that contains at least the files wav.scp and reco2file_and_channel +lang=$2 +model_dir=$3 # Segmentation model directory created using local/resegment/run_segmentation_train.sh +temp_dir=$4 # Temporary directory to store some intermediate files during segmentation +output_dir=$5 # The target directory + +############################################################################### +# +# Phone Decoder +# +############################################################################### + +mkdir -p $temp_dir +dirid=`basename $datadir` +total_time=0 +t1=$(date +%s) + +if [ $stage -le 0 ] ; then + steps/decode_nolats.sh ${decode_extra_opts+} --write-words false --write-alignments true \ + --cmd "$cmd" --nj $nj --beam $beam --max-active $max_active \ + $model_dir/phone_graph $datadir $model_dir/decode_${dirid} || exit 1 +fi + +if [ $stage -le 1 ]; then + [ ! -f $model_dir/decode_${dirid}/ali.1.gz ] && echo "File $model_dir/decode_${dirid}/ali.1.gz does not exist!" && exit 1 + $cmd JOB=1:$nj $model_dir/decode_${dirid}/log/predict.JOB.log \ + gunzip -c $model_dir/decode_${dirid}/ali.JOB.gz \| \ + ali-to-phones --per-frame=true $model_dir/final.mdl ark:- ark,t:- \| \ + utils/int2sym.pl -f 2- $lang/phones.txt \| \ + gzip -c '>' $temp_dir/pred.JOB.gz || exit 1 + + mkdir -p $temp_dir/pred + gunzip -c $temp_dir/pred.*.gz | \ + perl -ne '($file, $phones)=split / /, $_, 2; + open($fh, ">'$temp_dir/pred/'$file.pred" ) or die $!; + print {$fh} "$file $phones"; + close($fh);' || exit 1 + +fi +t2=$(date +%s) +total_time=$((total_time + t2 - t1)) +echo "SI decoding done in $((t2-t1)) seconds" + + +############################################################################### +# +# Resegmenter +# +############################################################################### + +if ! [ `cat $lang/phones/optional_silence.txt | wc -w` -eq 1 ]; then + echo "Error: this script only works if $lang/phones/optional_silence.txt contains exactly one entry."; + echo "You'd have to modify the script to handle other cases." + exit 1; +fi + +silphone=`cat $lang/phones/optional_silence.txt` +# silphone will typically be "sil" or "SIL". + +# 3 sets of phones: 0 is silence, 1 is noise, 2 is speech., +( +echo "$silphone 0" +if ! $noise_oov; then + grep -v -w $silphone $lang/phones/silence.txt \ + | awk '{print $1, 1;}' \ + | sed 's/SIL\(.*\)1/SIL\10/' \ + | sed 's/\(.*\)1/\12/' +else + grep -v -w $silphone $lang/phones/silence.txt \ + | awk '{print $1, 1;}' \ + | sed 's/SIL\(.*\)1/SIL\10/' +fi +cat $lang/phones/nonsilence.txt | awk '{print $1, 2;}' | sed 's/\(<.*>.*\)2/\11/' | sed 's/\(.*\)1/\12/' +) > $temp_dir/phone_map.txt + +mkdir -p $output_dir +mkdir -p $temp_dir/log + +local/resegment/segmentation.py --verbose 2 $segmentation_opts \ + $temp_dir/pred $temp_dir/phone_map.txt 2>$temp_dir/log/resegment.log | \ + sort > $output_dir/segments || exit 1 + +if [ ! -s $output_dir/segments ] ; then + echo "Zero segments created during segmentation process." + echo "That means something failed. Try the cause and re-run!" + exit 1 +fi + +t2=$(date +%s) +total_time=$((total_time + t2 - t1)) +echo "Resegment data done in $((t2-t1)) seconds" + +for file in reco2file_and_channel wav.scp ; do + [ ! -f $datadir/$file ] && echo "Expected file $datadir/$file to exist" && exit 1 + cp $datadir/$file $output_dir/$file +done + +# We'll make the speaker-ids be the same as the recording-ids (e.g. conversation +# sides). This will normally be OK for telephone data. +cat $output_dir/segments | awk '{print $1, $2}' > $output_dir/utt2spk || exit 1 +utils/utt2spk_to_spk2utt.pl ${output_dir}/utt2spk > $output_dir/spk2utt || exit 1 + + +dur_hours=`cat ${output_dir}/segments | awk '{num_secs += $4 - $3;} END{print (num_secs/3600);}'` +echo "Extracted segments of total length of $dur_hours hours audio" + +echo --------------------------------------------------------------------- +echo "Resegment data Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/local/resegment/segmentation.py b/egs/babel/s5d/local/resegment/segmentation.py new file mode 100755 index 00000000000..7c5c8665a16 --- /dev/null +++ b/egs/babel/s5d/local/resegment/segmentation.py @@ -0,0 +1,1508 @@ +#! /usr/bin/env python + +# Copyright 2014 Vimal Manohar +# Apache 2.0 + +import os, glob, argparse, sys, re, time +from argparse import ArgumentParser + +use_numpy = True +try: + import numpy as np +except ImportError: + use_numpy = False + +# Global stats for analysis taking RTTM file as reference +global_analysis_get_initial_segments = None +global_analysis_set_nonspeech_proportion = None +global_analysis_final = None + +def mean(l): + if len(l) > 0: + return float(sum(l)) / len(l) + return 0 + +# Analysis class +# Stores statistics like the confusion matrix, length of the segments etc. +class Analysis: + def __init__(self, file_id, frame_shift, prefix): + self.confusion_matrix = [0] * 9 + self.type_counts = [ [[] for j in range(0,9)] for i in range(0,3) ] + self.state_count = [ [] for i in range(0,9) ] + self.markers = [ [] for i in range(0,9) ] + self.phones = [ [] for i in range(0,9) ] + self.min_length = [0] * 9 + self.max_length = [0] * 9 + self.mean_length = [0] * 9 + self.percentile25 = [0] * 9 + self.percentile50 = [0] * 9 + self.percentile75 = [0] * 9 + self.file_id = file_id + self.frame_shift = frame_shift + self.prefix = prefix + + # Add the statistics of this object to another object a + # Typically used in a global object to accumulate stats + # from local objects + def add(self, a): + for i in range(0,9): + self.confusion_matrix[i] += a.confusion_matrix[i] + self.state_count[i] += a.state_count[i] + + # Print the confusion matrix + # The interpretation of 'speech', 'noise' and 'silence' are bound to change + # through the different post-processing stages. e.g at the end, speech and silence + # correspond respectively to 'in segment' and 'out of segment' + def write_confusion_matrix(self, write_hours = False, file_handle = sys.stderr): + sys.stderr.write("Total counts: \n") + + name = ['Silence as silence', \ + 'Silence as noise', \ + 'Silence as speech', \ + 'Noise as silence', \ + 'Noise as noise', \ + 'Noise as speech', \ + 'Speech as silence', \ + 'Speech as noise', \ + 'Speech as speech'] + + for j in range(0,9): + if self.frame_shift != None: + # The conventional usage is for frame_shift to have a value. + # But this function can handle other counts like the number of frames. + # This function is called to print in counts instead of seconds in + # functions like merge_segments + if write_hours: + # Write stats in hours instead of seconds + sys.stderr.write("File %s: %s : %s : %8.3f hrs\n" % + (self.file_id, self.prefix, name[j], + self.confusion_matrix[j] * self.frame_shift / 3600.0)) + else: + sys.stderr.write("File %s: %s : %s : %8.3f seconds\n" % + (self.file_id, self.prefix, name[j], + self.confusion_matrix[j] * self.frame_shift)) + # End if write_hours + else: + sys.stderr.write("File %s: %s : Confusion: Type %d : %8.3f counts\n" % + (self.file_id, self.prefix, j, self.confusion_matrix[j])) + # End if + # End for loop over 9 cells of confusion matrix + + # Print the total stats that are just row and column sums of + # 3x3 confusion matrix + def write_total_stats(self, write_hours = True, file_handle = sys.stderr): + sys.stderr.write("Total Stats: \n") + + name = ['Actual Silence', \ + 'Actual Noise', \ + 'Actual Speech'] + + for j in [0,1,2]: + if self.frame_shift != None: + # The conventional usage is for frame_shift to have a value. + # But this function can handle other counts like the number of frames. + # This function is called to print in counts instead of seconds in + # functions like merge_segments + if write_hours: + # Write stats in hours instead of seconds + sys.stderr.write("File %s: %s : %s : %8.3f hrs\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[3*j:3*j+3]) * self.frame_shift / 3600.0)) + else: + sys.stderr.write("File %s: %s : %s : %8.3f seconds\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[3*j:3*j+3]) * self.frame_shift)) + # End if write_hours + else: + sys.stderr.write("File %s: %s : %s : %8.3f counts\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[3*j:3*j+3]))) + # End if + # End for loop over 3 rows of confusion matrix + + name = ['Predicted Silence', \ + 'Predicted Noise', \ + 'Predicted Speech'] + + for j in [0,1,2]: + if self.frame_shift != None: + # The conventional usage is for frame_shift to have a value. + # But this function can handle other counts like the number of frames. + # This function is called to print in counts instead of seconds in + # functions like merge_segments + if write_hours: + # Write stats in hours instead of seconds + sys.stderr.write("File %s: %s : %s : %8.3f hrs\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[j:7+j:3]) * self.frame_shift / 3600.0)) + else: + sys.stderr.write("File %s: %s : %s : %8.3f seconds\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[j:7+j:3]) * self.frame_shift)) + # End if write_hours + else: + sys.stderr.write("File %s: %s : %s : %8.3f counts\n" % + (self.file_id, self.prefix, name[j], + sum(self.confusion_matrix[j:7+j:3]))) + # End if + # End for loop over 3 columns of confusion matrix + + # Print detailed stats of lengths of each of the 3 types of frames + # in 8 kinds of segments + def write_type_stats(self, file_handle = sys.stderr): + for j in range(0,3): + # 3 types of frames. Silence, noise, speech. + # Typically, we store the number of frames of each type here. + for i in range(0,9): + # 2^3 = 8 kinds of segments like 'segment contains only silence', + # 'segment contains only noise', 'segment contains noise and speech'. + # For compatibility with the rest of the analysis code, + # the for loop is over 9 kinds. + max_length = max([0]+self.type_counts[j][i]) + min_length = min([10000]+self.type_counts[j][i]) + mean_length = mean(self.type_counts[j][i]) + if use_numpy: + try: + percentile25 = np.percentile(self.type_counts[j][i], 25) + except ValueError: + percentile25 = 0 + try: + percentile50 = np.percentile(self.type_counts[j][i], 50) + except ValueError: + percentile50 = 0 + try: + percentile75 = np.percentile(self.type_counts[j][i], 75) + except ValueError: + percentile75 = 0 + + file_handle.write("File %s: %s : TypeStats: Type %d %d: Min: %4d Max: %4d Mean: %4d percentile25: %4d percentile50: %4d percentile75: %4d\n" % (self.file_id, self.prefix, j, i, min_length, max_length, mean_length, percentile25, percentile50, percentile75)) + # End for loop over 9 different kinds of segments + # End for loop over 3 types of frames + + # Print detailed stats of each cell of the confusion matrix. + # The stats include different statistical measures like mean, max, min + # and median of the length of continuous regions of frames in + # each of the 9 cells of the confusion matrix + def write_length_stats(self, file_handle = sys.stderr): + for i in range(0,9): + self.max_length[i] = max([0]+self.state_count[i]) + self.min_length[i] = min([10000]+self.state_count[i]) + self.mean_length[i] = mean(self.state_count[i]) + if use_numpy: + try: + self.percentile25[i] = np.percentile(self.state_count[i], 25) + except ValueError: + self.percentile25[i] = 0 + try: + self.percentile50[i] = np.percentile(self.state_count[i], 50) + except ValueError: + self.percentile50[i] = 0 + try: + self.percentile75[i] = np.percentile(self.state_count[i], 75) + except ValueError: + self.percentile75[i] = 0 + + file_handle.write("File %s: %s : Length: Type %d: Min: %4d Max: %4d Mean: %4d percentile25: %4d percentile50: %4d percentile75: %4d\n" % (self.file_id, self.prefix, i, self.min_length[i], self.max_length[i], self.mean_length[i], self.percentile25[i], self.percentile50[i], self.percentile75[i])) + # End for loop over 9 cells + + # Print detailed stats of each cell of the confusion matrix. + # Similar structure to the above function. But this also prints additional + # details. Format is like this - + # Markers: Type : () () + # The hypothesized_phones can be looked at to see what phones are + # present in the hypothesis from start_frame for num_of_frames frames. + def write_markers(self, file_handle = sys.stderr): + file_handle.write("Start frames of different segments:\n") + for j in range(0,9): + if self.phones[j] == []: + file_handle.write("File %s: %s : Markers: Type %d: %s\n" % (self.file_id, self.prefix, j, str(sorted([str(self.markers[j][i])+' ('+ str(self.state_count[j][i])+ ')' for i in range(0, len(self.state_count[j]))],key=lambda x:int(x.split()[0]))))) + else: + file_handle.write("File %s: %s : Markers: Type %d: %s\n" % (self.file_id, self.prefix, j, str(sorted([str(self.markers[j][i])+' ('+ str(self.state_count[j][i])+') ( ' + str(self.phones[j][i]) + ')' for i in range(0, len(self.state_count[j]))],key=lambda x:int(x.split()[0]))))) + # End for loop over 9 cells + +# Function to read a standard IARPA Babel RTTM file +# as structure in Jan 16, 2014 +def read_rttm_file(rttm_file, temp_dir, frame_shift): + file_id = None + this_file = [] + ref_file_handle = None + reference = {} + for line in open(rttm_file).readlines(): + splits = line.strip().split() + type1 = splits[0] + if type1 == "SPEAKER": + continue + if splits[1] != file_id: + # A different file_id. Need to open a different file to write + if this_file != []: + # If this_file is empty, no reference RTTM corresponding to the file_id + # is read. This will happen at the start of the file_id. Otherwise it means a + # contiguous segment of previous file_id is processed. So write it to the file. + # corresponding to the previous file_id + try: + ref_file_handle.write(' '.join(this_file)) + # Close the previous file if any + ref_file_handle.close() + this_file = [] + except AttributeError: + # Ignore AttributeError. It is expected. + 1==1 + # End if + + file_id = splits[1] + if (file_id not in reference): + # First time seeing this file_id. Open a new file for writing. + reference[file_id] = 1 + try: + ref_file_handle = open(temp_dir+"/"+file_id+".ref", 'w') + except IOError: + sys.stderr.write("Unable to open " + temp_dir+"/"+file_id+".ref for writing\n") + sys.exit(1) + ref_file_handle.write(file_id + "\t") + else: + # This file has been seen before but not in the previous iteration. + # The file has already been closed. So open it for append. + try: + this_file = open(temp_dir+"/"+file_id+".ref").readline().strip().split()[1:] + ref_file_handle = open(temp_dir+"/"+file_id+".ref", 'a') + except IOError: + sys.stderr.write("Unable to open " + temp_dir+"/"+file_id+".ref for appending\n") + sys.exit(1) + # End if + # End if + + i = len(this_file) + category = splits[6] + word = splits[5] + start_time = int(float(splits[3])/frame_shift + 0.5) + duration = int(float(splits[4])/frame_shift + 0.5) + if i < start_time: + this_file.extend(["0"]*(start_time - i)) + if type1 == "NON-LEX": + if category == "other": + # is taken as Silence + this_file.extend(["0"]*duration) + else: + this_file.extend(["1"]*duration) + if type1 == "LEXEME": + this_file.extend(["2"]*duration) + if type1 == "NON-SPEECH": + this_file.extend(["1"]*duration) + + ref_file_handle.write(' '.join(this_file)) + ref_file_handle.close() + +# Stats class to store some basic stats about the number of +# times the post-processor goes through particular loops or blocks +# of code in the algorithm. This is just for debugging. +class Stats: + def __init__(self): + self.inter_utt_nonspeech = 0 + self.merge_nonspeech_segment = 0 + self.merge_segments = 0 + self.split_segments = 0 + self.silence_only = 0 + self.noise_only = 0 + + def print_stats(self): + sys.stderr.write("Inter-utt nonspeech: %d\n" % self.inter_utt_nonspeech) + sys.stderr.write("Merge nonspeech segment: %d\n" % self.merge_nonspeech_segment) + sys.stderr.write("Merge segment: %d\n" % self.merge_segments) + sys.stderr.write("Split segments: %d\n" % self.split_segments) + sys.stderr.write("Noise only: %d\n" % self.noise_only) + sys.stderr.write("Silence only: %d\n" % self.silence_only) + + def reset(self): + self.inter_utt_nonspeech = 0 + self.merge_nonspeech_segment = 0 + self.merge_segments = 0 + self.split_segments = 0 + self.silence_only = 0 + self.noise_only = 0 + +# Timer class to time functions +class Timer: + def __enter__(self): + self.start = time.clock() + return self + def __exit__(self, *args): + self.end = time.clock() + self.interval = self.end - self.start + +# The main class for post-processing a file. +# This does the segmentation either looking at the file isolated +# or by looking at both classes simultaneously +class JointResegmenter: + def __init__(self, P, A, f, options, phone_map, stats = None, reference = None): + + # Pointers to prediction arrays and Initialization + self.P = P # Predicted phones + self.B = [ i for i in A ] # Original predicted classes + self.A = A # Predicted classes + self.file_id = f # File name + self.N = len(A) # Length of the prediction (= Num of frames in the audio file) + self.S = [False] * self.N # Array of Start boundary markers + self.E = [False] * (self.N+1) # Array of End boundary markers + + self.phone_map = phone_map + self.options = options + + # Configuration + + self.frame_shift = options.frame_shift + # Convert length in seconds to frames + self.max_frames = int(options.max_segment_length / options.frame_shift) + self.hard_max_frames = int(options.hard_max_segment_length / options.frame_shift) + self.min_inter_utt_nonspeech_length = int(options.min_inter_utt_silence_length / options.frame_shift) + if ( options.remove_noise_only_segments == "false" ): + self.remove_noise_segments = False + elif ( options.remove_noise_only_segments == "true" ): + self.remove_noise_segments = True + + # End of Configuration + + # Define Frame Type Constants + self.THIS_SILENCE = ("0","1","2") + self.THIS_NOISE = ("3","4","5") + self.THIS_SPEECH = ("6", "7", "8") + self.THIS_SPEECH_THAT_SIL = ("6",) + self.THIS_SPEECH_THAT_NOISE = ("7",) + self.THIS_SIL_CONVERT_THAT_SIL = ("9",) + self.THIS_SIL_CONVERT_THAT_NOISE = ("10",) + self.THIS_SIL_CONVERT = ("9","10","11") + self.THIS_SILENCE_CONVERT = ("9","10","11") + self.THIS_NOISE_CONVERT_THAT_SIL = ("12",) + self.THIS_NOISE_CONVERT_THAT_NOISE = ("13",) + self.THIS_NOISE_CONVERT = ("12","13","14") + self.THIS_NOISE_OR_SILENCE = self.THIS_NOISE + self.THIS_SILENCE + self.THIS_SILENCE_OR_NOISE = self.THIS_NOISE + self.THIS_SILENCE + self.THIS_CONVERT = self.THIS_SILENCE_CONVERT + self.THIS_NOISE_CONVERT + self.THIS_SILENCE_PLUS = self.THIS_SILENCE + self.THIS_SILENCE_CONVERT + self.THIS_NOISE_PLUS = self.THIS_NOISE + self.THIS_NOISE_CONVERT + self.THIS_SPEECH_PLUS = self.THIS_SPEECH + self.THIS_CONVERT + + if stats != None: + self.stats = stats + + self.reference = None + if reference != None: + if len(reference) < self.N: + self.reference = reference + ["0"] * (self.N - len(reference)) + assert (len(self.reference) == self.N) + else: + self.reference = reference + + # This function restricts the output to length N + def restrict(self, N): + self.B = self.B[0:N] + self.A = self.A[0:N] + self.S = self.S[0:N] + self.E = self.E[0:N+1] + if sum(self.S) == sum(self.E) + 1: + self.E[N] = True + self.N = N + + # Main resegment function that calls other functions + def resegment(self): + with Timer() as t: + self.get_initial_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: get_initial_segments took %f sec\n" % (self.file_id, t.interval)) + with Timer() as t: + self.set_nonspeech_proportion() + if self.options.verbose > 1: + sys.stderr.write("For %s: set_nonspeech_proportion took %f sec\n" % (self.file_id, t.interval)) + with Timer() as t: + self.merge_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: merge took %f sec\n" % (self.file_id, t.interval)) + with Timer() as t: + self.split_long_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: split took %f sec\n" % (self.file_id, t.interval)) + if self.remove_noise_segments: + with Timer() as t: + self.remove_noise_only_segments() + if self.options.verbose > 1: + sys.stderr.write("For %s: remove took %f sec\n" % (self.file_id, t.interval)) + elif self.min_inter_utt_nonspeech_length > 0.0: + # This is the typical one with augmented training setup + self.remove_silence_only_segments() + + if self.options.verbose > 1: + sys.stderr.write("For file %s\n" % self.file_id) + self.stats.print_stats() + sys.stderr.write("\n") + self.stats.reset() + + def get_initial_segments(self): + for i in range(0, self.N): + if (i > 0) and self.A[i-1] != self.A[i]: + # This frame is different from the previous frame. + if self.A[i] in self.THIS_SPEECH: + # This frame is speech. + if self.A[i-1] in self.THIS_SPEECH: + # Both this and the previous frames are speech + # But they are different. e.g. "8 7" + # So this is the end of the previous region and + # the beginning of the next region + self.S[i] = True + self.E[i] = True + else: + # The previous frame is non-speech, but not this one. + # So this frame is the beginning of a new segment + self.S[i] = True + else: + # This frame is non-speech + if self.A[i-1] in self.THIS_SPEECH: + # Previous frame is speech, but this one is not. + # So this frame is the end of the previous segment + self.E[i] = True + elif i == 0 and self.A[i] in self.THIS_SPEECH: + # The frame is speech. So this is the start of a new segment. + self.S[i] = True + if self.A[self.N-1] in self.THIS_SPEECH: + # Handle the special case where the last frame of file is not nonspeech + self.E[self.N] = True + assert(sum(self.S) == sum(self.E)) + + ########################################################################### + # Analysis section + self.C = ["0"] * self.N + C = self.C + a = Analysis(self.file_id, self.frame_shift,"Analysis after get_initial_segments") + + if self.reference != None: + count = 0 + for i in range(0,self.N): + if self.reference[i] == "0" and self.A[i] in self.THIS_SILENCE: + C[i] = "0" + elif self.reference[i] == "0" and self.A[i] in self.THIS_NOISE: + C[i] = "1" + elif self.reference[i] == "0" and self.A[i] in self.THIS_SPEECH: + C[i] = "2" + elif self.reference[i] == "1" and self.A[i] in self.THIS_SILENCE: + C[i] = "3" + elif self.reference[i] == "1" and self.A[i] in self.THIS_NOISE: + C[i] = "4" + elif self.reference[i] == "1" and self.A[i] in self.THIS_SPEECH: + C[i] = "5" + elif self.reference[i] == "2" and self.A[i] in self.THIS_SILENCE: + C[i] = "6" + elif self.reference[i] == "2" and self.A[i] in self.THIS_NOISE: + C[i] = "7" + elif self.reference[i] == "2" and self.A[i] in self.THIS_SPEECH: + C[i] = "8" + if i > 0 and C[i-1] != C[i]: + a.state_count[int(C[i-1])].append(count) + a.markers[int(C[i-1])].append(i - count) + a.phones[int(C[i-1])].append(' '.join(set(self.P[i-count:i]))) + count = 1 + else: + count += 1 + + for j in range(0,9): + a.confusion_matrix[j] = sum([C[i] == str(j) for i in range(0,self.N)]) + + global_analysis_get_initial_segments.add(a) + + if self.reference != None and self.options.verbose > 0: + a.write_confusion_matrix() + a.write_length_stats() + if self.reference != None and self.options.verbose > 1: + a.write_markers() + ########################################################################### + + def set_nonspeech_proportion(self): + num_speech_frames = 0 + in_segment = False + + # Active frames are the frames that are either segment starts + # or segment ends + active_frames = [] + for n in range(0, self.N + 1): + if self.E[n]: + assert(in_segment) + in_segment = False + active_frames.append(n) + if n < self.N and self.S[n]: + assert(not in_segment) + in_segment = True + active_frames.append(n) + if n < self.N: + if in_segment: + # Count the number of speech frames + num_speech_frames += 1 + assert (not in_segment) + if num_speech_frames == 0: + sys.stderr.write("%s: Warning: no speech found for recording %s\n" % (sys.argv[0], self.file_id)) + + # Set the number of non-speech frames to be added depending on the + # silence proportion. The target number of frames in the segments + # is computed as below: + target_segment_frames = int(num_speech_frames / (1.0 - self.options.silence_proportion)) + + # The number of frames currently in the segments + num_segment_frames = num_speech_frames + + count = 0 + while num_segment_frames < target_segment_frames: + count += 1 + changed = False + for i in range(0, len(active_frames)): + # At each active frame, try include a nonspeech frame into + # segment. Thus padding the speech segments with some + # non-speech frames. These converted non-speech frames are + # labelled 9...14 depending on whether they were originally + # 0...5 respectively + n = active_frames[i] + if self.E[n] and n < self.N and not self.S[n]: + # This must be the beginning of a non-speech region. + # Include some of this non-speech in the segments + assert (self.A[n] not in self.THIS_SPEECH) + + # Convert the non-speech frame to be included in segment + self.A[n] = str(int(self.B[n]) + 9) + if self.B[n-1] != self.B[n]: + # In this frame there is a transition from + # one type of non-speech (0, 1 ... 5) to another + # So its the start of a segment. Also add it to the + # end of the active frames list + self.S[n] = True + active_frames.append(n+1) + else: + # We need to extend the segment end since we have + # included a non-speeech frame. Remove the current segment end mark + # and one to the next frame + self.E[n] = False + active_frames[i] = n + 1 + self.E[n+1] = True + # Increment the number of frames in the segments + num_segment_frames += 1 + changed = True + if n < self.N and self.S[n] and n > 0 and not self.E[n]: + # This must be the beginning of a speech region. + # Include some non-speech before it into the segments + assert (self.A[n-1] not in self.THIS_SPEECH) + self.A[n-1] = str(int(self.B[n-1]) + 9) + if self.B[n-1] != self.B[n]: + self.E[n] = True + active_frames.append(n-1) + else: + self.S[n] = False + active_frames[i] = n - 1 + self.S[n-1] = True + num_segment_frames += 1 + changed = True + if num_segment_frames >= target_segment_frames: + break + if not changed: # avoid an infinite loop. if no changes, then break. + break + if num_segment_frames < target_segment_frames: + proportion = float(num_segment_frames - num_speech_frames) / num_segment_frames + sys.stderr.write("%s: Warning: for recording %s, only got a proportion %f of non-speech frames, versus target %f\n" % (sys.argv[0], self.file_id, proportion, self.options.silence_proportion)) + + ########################################################################### + # Analysis section + self.C = ["0"] * self.N + C = self.C + a = Analysis(self.file_id, self.frame_shift,"Analysis after set_nonspeech_proportion") + + if self.reference != None: + count = 0 + for i in range(0,self.N): + if self.reference[i] == "0" and self.A[i] in (self.THIS_SILENCE + self.THIS_NOISE): + C[i] = "0" + elif self.reference[i] == "0" and self.A[i] in self.THIS_CONVERT: + C[i] = "1" + elif self.reference[i] == "0" and self.A[i] in self.THIS_SPEECH: + C[i] = "2" + elif self.reference[i] == "1" and self.A[i] in (self.THIS_SILENCE + self.THIS_NOISE): + C[i] = "3" + elif self.reference[i] == "1" and self.A[i] in self.THIS_CONVERT: + C[i] = "4" + elif self.reference[i] == "1" and self.A[i] in self.THIS_SPEECH: + C[i] = "5" + elif self.reference[i] == "2" and self.A[i] in (self.THIS_SILENCE + self.THIS_NOISE): + C[i] = "6" + elif self.reference[i] == "2" and self.A[i] in self.THIS_CONVERT: + C[i] = "7" + elif self.reference[i] == "2" and self.A[i] in self.THIS_SPEECH: + C[i] = "8" + if i > 0 and C[i-1] != C[i]: + a.state_count[int(C[i-1])].append(count) + a.markers[int(C[i-1])].append(i - count) + a.phones[int(C[i-1])].append(' '.join(set(self.P[i-count:i]))) + count = 1 + else: + count += 1 + + for j in range(0,9): + a.confusion_matrix[j] = sum([C[i] == str(j) for i in range(0,self.N)]) + + global_analysis_set_nonspeech_proportion.add(a) + + if self.reference != None and self.options.verbose > 0: + a.write_confusion_matrix() + a.write_length_stats() + if self.reference != None and self.options.verbose > 1: + a.write_markers() + ########################################################################### + + def merge_segments(self): + # Get list of frames which have segment start and segment end + # markers into separate lists + segment_starts = [i for i, val in enumerate(self.S) if val] + segment_ends = [i for i, val in enumerate(self.E) if val] + assert (sum(self.S) == sum(self.E)) + + if self.options.verbose > 3: + sys.stderr.write("Length of segment starts before non-speech adding: %d\n" % len(segment_starts)) + + if self.min_inter_utt_nonspeech_length > 0.0: + segment_starts = list(set([0] + segment_starts + segment_ends + [self.N])) + segment_starts.sort() + segment_starts.pop() + segment_ends= list(set([0] + segment_starts + segment_ends + [self.N])) + segment_ends.sort() + segment_ends.pop(0) + if self.options.verbose > 3: + sys.stderr.write("Length of segment starts after non-speech adding: %d\n" % len(segment_starts)) + for i in segment_starts: + self.S[i] = True + for i in segment_ends: + self.E[i] = True + + # Just a check. There must always be equal number of segment starts + # and segment ends + assert (len(segment_starts) == len(segment_ends)) + + # A boundary is a frame which is both a segment start and a segment end + # The list of boundaries is obtained in the following step along with + # a few statistics like the type of segment on either side of the boundary + # and the length of the segment on either side of it + boundaries = [] + i = 0 + j = 0 + while i < len(segment_starts) and j < len(segment_ends): + if segment_ends[j] < segment_starts[i]: + # The segment end marker is before the segment start marker. + # This means that this segment end marker corresponds to a segment + # that is before the one indicated by the segment start marker. + # So advance the segment end pointer to the next segment end to + # check if that is a 'boundary' + j += 1 + elif segment_ends[j] > segment_starts[i]: + # The segment end marker is after the segment start marker. + # This means that this segment end marker would corresponds + # to segment indicated by the segment start marker. + # So advance the segment start pointer to the next segment start to + # check if that is a 'boundary' + i += 1 + else: + assert(i < len(segment_starts) and j < len(segment_ends)) + # A boundary: + # Find the segment score as the min of lengths of the segments + # to the left and to the right. + # This segment score will be used to prioritize merging of + # the segment with its neighbor + assert ((j + 1) < len(segment_ends)) + segment_score = min(segment_starts[i] - segment_starts[i-1], \ + segment_ends[j+1] - segment_ends[j]) + # Also find the type of tranisition of the segments at the boundary. + # This is also used to prioritize the merging of the segment + boundaries.append((segment_ends[j], segment_score, \ + self.transition_type(segment_ends[j]))) + + # Sort the boundaries based on segment score + boundaries.sort(key = lambda x: x[1]) + # Then sort based on the type of transition by keeping it still + # sorted within each transition type based on segment score + boundaries.sort(key = lambda x: x[2]) + i += 1 + j += 1 + # End if + # End while loop + + # Begin merging of segments by removing the start and end mark + # at the boundary to be merged + count = 0 + for b in boundaries: + count += 1 + segment_length = 0 + + if self.min_inter_utt_nonspeech_length > 0.0 and not self.E[b[0]]: + # This will happen only if the boundary is at the end of + # a non-speech region that has already been merged or removed + # b[0] will then not be an end mark. + continue + + # Count the number of frames in the segment to the + # left of the boundary + p = b[0] - 1 + while p >= 0: + if self.S[p]: + break + p -= 1 + # End if + # End while loop + p_left = p + segment_length += b[0] - p + + # Count the number of frames in the segment to the + # right of the boundary + p = b[0] + 1 + while p <= self.N: + if self.E[p]: + break + p += 1 + assert (self.min_inter_utt_nonspeech_length == 0 or p == self.N or self.S[p] or self.A[p] in self.THIS_SILENCE_OR_NOISE) + + if self.min_inter_utt_nonspeech_length > 0 and self.A[b[0]] in self.THIS_SILENCE_OR_NOISE: + assert(b[2] == 6 or b[2] == 7) + if (p - b[0]) > self.min_inter_utt_nonspeech_length: + # This is a non-speech segment that is longer than the minimum + # inter-utterance non-speech length. + # Therefore treat this non-speech as inter-utterance non-speech and + # remove it from the segments + self.S[b[0]] = False + self.E[p] = False + + # Count the number of times inter utt non-speech + # length is greater than the set threshold + # This is the number of times the silence is + # not merged with adjacent speech + self.stats.inter_utt_nonspeech += 1 + + # This is boundary is no longer valid. + # So we can continue to the next boundary + continue + # End if + + # This non-speech segment is less than the minimum inter-utterance + # non-speech length. It is possible to merge this segment + # with the adjacent ones as long as the length of the + # segment after merging to see if its within limits. + p_temp = p + p += 1 + while p <= self.N: + if self.E[p]: + break + p += 1 + # End while loop + segment_length += p - b[0] + if segment_length < self.max_frames: + # Merge the non-speech segment with the segments + # on either sides + + # Count the number of times segment merge happens + self.stats.merge_nonspeech_segment += 1 + + if p_temp < self.N: + self.S[p_temp] = False + self.E[p_temp] = False + self.S[b[0]] = False + self.E[b[0]] = False + continue + else: + # The merged segment length is longer than max_frames. + # Therefore treat this non-speech as inter-utterance non-speech and + # remove it from the segments + self.S[b[0]] = False + self.E[p_temp] = False + continue + # End if + elif self.min_inter_utt_nonspeech_length > 0 and (b[2] == 8 or b[2] == 9): + assert(p_left == 0) + if b[0] - p_left > self.min_inter_utt_nonspeech_length: + self.S[p_left] = False + self.E[b[0]] = False + continue + # End if + # End if + segment_length += p - b[0] + + if segment_length < self.max_frames: + self.stats.merge_segments += 1 + self.S[b[0]] = False + self.E[b[0]] = False + # End if + # End for loop over boundaries + + assert (sum(self.S) == sum(self.E)) + + ########################################################################### + # Analysis section + + if self.reference != None and self.options.verbose > 3: + a = self.segmentation_analysis("Analysis after merge_segments") + a.write_confusion_matrix() + + if self.reference != None and self.options.verbose > 4: + a.write_type_stats() + # End if + + if self.reference != None and self.options.verbose > 4: + a.write_markers() + # End if + # End if + ########################################################################### + # End function merge_segments + + def split_long_segments(self): + assert (sum(self.S) == sum(self.E)) + for n in range(0, self.N): + if self.S[n]: + p = n + 1 + while p <= self.N: + if self.E[p]: + break + p += 1 + segment_length = p - n + if segment_length > self.hard_max_frames: + # Count the number of times long segments are split + self.stats.split_segments += 1 + + num_pieces = int((float(segment_length) / self.hard_max_frames) + 0.99999) + sys.stderr.write("%s: Warning: for recording %s, " \ + % (sys.argv[0], self.file_id) \ + + "splitting segment of length %f seconds into %d pieces " \ + % (segment_length * self.frame_shift, num_pieces) \ + + "(--hard-max-segment-length %f)\n" \ + % self.options.hard_max_segment_length) + frames_per_piece = int(segment_length / num_pieces) + for i in range(1,num_pieces): + q = n + i * frames_per_piece + self.S[q] = True + self.E[q] = True + if p - 1 > n: + n = p - 1 + assert (sum(self.S) == sum(self.E)) + # End function split_long_segments + + def remove_silence_only_segments(self): + for n in range(0, self.N): + # Run through to find a segment start + if self.S[n]: + p = n + saw_nonsilence = False + # From the segment start, go till the segment end to see + # if there is speech in it + while p <= self.N: + if self.E[p] and p != n: + break + if p < self.N and self.A[p] not in self.THIS_SILENCE: + saw_nonsilence = True + p += 1 + # End of while loop through the segment + assert (p > self.N or self.E[p]) + if not saw_nonsilence: + # Count the number of silence only segments + self.stats.silence_only += 1 + + self.S[n] = False + self.E[p] = False + # End if + if p - 1 > n: + # Go to the end of the segment since that segment is + # already processed + n = p - 1 + # End if + if self.reference != None and self.options.verbose > 3: + a = self.segmentation_analysis("Analysis after remove_silence_only_segments") + a.write_confusion_matrix() + + if self.reference != None and self.options.verbose > 4: + a.write_type_stats() + # End if + + if self.reference != None and self.options.verbose > 4: + a.write_markers() + # End if + # End if + # End function remove_silence_only_segments + + def remove_noise_only_segments(self): + for n in range(0, self.N): + if self.S[n]: + p = n + saw_speech = False + while p <= self.N: + if self.E[p] and p != n: + break + if self.A[p] in self.THIS_SPEECH: + saw_speech = True + p += 1 + assert (self.E[p]) + if not saw_speech: + # Count the number of segments with no speech + self.stats.noise_only += 1 + self.S[n] = False + self.E[p] = False + # End if + if p - 1 > n: + n = p - 1 + # End if + # End if + # End for loop over frames + + ########################################################################### + # Analysis section + + if self.reference != None and self.options.verbose > 3: + a = self.segmentation_analysis("Analysis after remove_noise_only_segments") + a.write_confusion_matrix() + + if self.reference != None and self.options.verbose > 4: + a.write_type_stats() + # End if + + if self.reference != None and self.options.verbose > 4: + a.write_markers() + # End if + # End if + ########################################################################### + # End function remove_noise_only_segments + + # Return the transition type from frame j-1 to frame j + def transition_type(self, j): + assert (j > 0) + assert (self.A[j-1] != self.A[j] or self.A[j] in self.THIS_CONVERT) + if self.A[j-1] in (self.THIS_SPEECH_THAT_NOISE + self.THIS_SPEECH_THAT_SIL) and self.A[j] in (self.THIS_SPEECH_THAT_NOISE + self.THIS_SPEECH_THAT_SIL): + return 0 + if self.A[j-1] in self.THIS_SPEECH and self.A[j] in self.THIS_SPEECH: + return 1 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT_THAT_SIL + self.THIS_NOISE_CONVERT_THAT_NOISE) and self.A[j] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT_THAT_SIL + self.THIS_NOISE_CONVERT_THAT_NOISE): + return 2 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT) and self.A[j] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT): + return 3 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT + self.THIS_SIL_CONVERT_THAT_SIL + self.THIS_SIL_CONVERT_THAT_NOISE) and self.A[j] in (self.THIS_SPEECH + self.THIS_NOISE_CONVERT + self.THIS_SIL_CONVERT_THAT_SIL + self.THIS_SIL_CONVERT_THAT_NOISE): + return 4 + if self.A[j-1] in (self.THIS_SPEECH + self.THIS_CONVERT) and self.A[j] in (self.THIS_SPEECH + self.THIS_CONVERT): + return 5 + if self.A[j-1] in self.THIS_SPEECH_PLUS and self.A[j] in (self.THIS_SPEECH_PLUS + self.THIS_NOISE): + return 6 + if self.A[j-1] in self.THIS_SPEECH_PLUS and self.A[j] in (self.THIS_SPEECH_PLUS + self.THIS_SILENCE): + return 7 + if self.A[j-1] in (self.THIS_SPEECH_PLUS + self.THIS_NOISE) and self.A[j] in self.THIS_SPEECH_PLUS: + return 8 + if self.A[j-1] in (self.THIS_SPEECH_PLUS + self.THIS_SILENCE) and self.A[j] in self.THIS_SPEECH_PLUS: + return 9 + assert (False) + + # Output the final segments + def print_segments(self, out_file_handle = sys.stdout): + # We also do some sanity checking here. + segments = [] + + assert (self.N == len(self.S)) + assert (self.N + 1 == len(self.E)) + + max_end_time = 0 + n = 0 + while n < self.N: + if self.E[n] and not self.S[n]: + sys.stderr.write("%s: Error: Ending segment before starting it: n=%d\n" % (sys.argv[0], n)) + if self.S[n]: + p = n + 1 + while p < self.N and not self.E[p]: + assert (not self.S[p]) + p += 1 + assert (p == self.N or self.E[p]) + segments.append((n,p)) + max_end_time = p + if p < self.N and self.S[p]: + n = p - 1 + else: + n = p + n += 1 + + if len(segments) == 0: + sys.stderr.write("%s: Warning: no segments for recording %s\n" % (sys.argv[0], self.file_id)) + sys.exit(1) + + ############################################################################ + # Analysis section + + self.C = ["0"] * self.N + C = self.C + a = Analysis(self.file_id, self.frame_shift,"Analysis final") + + if self.reference != None: + count = 0 + in_seg = False + for i in range(0,self.N): + if in_seg and self.E[i]: + in_seg = False + if i == 0 and self.S[i]: + in_seg = True + if not in_seg and self.S[i]: + in_seg = True + if self.reference[i] == "0" and not in_seg: + C[i] = "0" + elif self.reference[i] == "0" and in_seg: + C[i] = "2" + elif self.reference[i] == "1" and not in_seg: + C[i] = "3" + elif self.reference[i] == "1" and in_seg: + C[i] = "5" + elif self.reference[i] == "2" and not in_seg: + C[i] = "6" + elif self.reference[i] == "2" and in_seg: + C[i] = "8" + if i > 0 and C[i-1] != C[i]: + a.state_count[int(C[i-1])].append(count) + a.markers[int(C[i-1])].append(i - count) + a.phones[int(C[i-1])].append(' '.join(set(self.P[i-count:i]))) + count = 1 + else: + count += 1 + + for j in range(0,9): + a.confusion_matrix[j] = sum([C[i] == str(j) for i in range(0,self.N)]) + + if self.options.verbose > 0: + a.write_confusion_matrix() + a.write_length_stats() + if self.options.verbose > 1: + a.write_markers() + + global_analysis_final.add(a) + ############################################################################ + + # we'll be printing the times out in hundredths of a second (regardless of the + # value of $frame_shift), and first need to know how many digits we need (we'll be + # printing with "%05d" or similar, for zero-padding. + max_end_time_hundredths_second = int(100.0 * self.frame_shift * max_end_time) + num_digits = 1 + i = 1 + while i < max_end_time_hundredths_second: + i *= 10 + num_digits += 1 + format_str = r"%0" + "%d" % num_digits + "d" # e.g. "%05d" + + for start, end in segments: + assert (end > start) + start_seconds = "%.2f" % (self.frame_shift * start) + end_seconds = "%.2f" % (self.frame_shift * end) + start_str = format_str % (start * self.frame_shift * 100.0) + end_str = format_str % (end * self.frame_shift * 100.0) + utterance_id = "%s%s%s%s%s" % (self.file_id, self.options.first_separator, start_str, self.options.second_separator, end_str) + # Output: + out_file_handle.write("%s %s %s %s\n" % (utterance_id, self.file_id, start_seconds, end_seconds)) + + # Some intermediate stage analysis of the segmentation + def segmentation_analysis(self, title = "Analysis"): + # In this analysis, we are trying to find in each segment, + # the number of frames that are speech, noise and silence + # in the reference RTTM + + # First get the segment start and segment ends + # Note that they are in sync by construction + segment_starts = [i for i in range(0,self.N) if self.S[i]] + segment_ends = [i for i in range(0,self.N+1) if self.E[i]] + + D = {} + for i,st in enumerate(segment_starts): + en = segment_ends[i] + types = {} + for val in self.reference[st:en]: + # The segment is defined by the indices st:en + # Count the number of frames in the segment that + # are silence, speech and noise in the reference. + types[val] = types.get(val,0) + 1 + # End for loop over a particular segment + # Make a tuple out of the counts of the types of frames + D[st] = (en, types.get("0",0), types.get("1", 0), types.get("2", 0)) + # End for loop over all segments + + a = Analysis(self.file_id, None, title) + for st, info in D.items(): + en = info[0] + + if info[1] > 0 and info[2] == 0 and info[3] == 0: + # All frames silence + a.confusion_matrix[0] += 1 + a.state_count[0].append((en-st,)+info[1:]) + a.type_counts[0][0].append(info[1]) + a.type_counts[1][0].append(info[2]) + a.type_counts[2][0].append(info[3]) + a.markers[0].append(st) + elif info[1] == 0 and info[2] > 0 and info[3] == 0: + # All frames noise + a.confusion_matrix[1] += 1 + a.state_count[1].append((en-st,)+info[1:]) + a.type_counts[0][1].append(info[1]) + a.type_counts[1][1].append(info[2]) + a.type_counts[2][1].append(info[3]) + a.markers[1].append(st) + elif info[1] == 0 and info[2] == 0 and info[3] > 0: + # All frames speech + a.confusion_matrix[2] += 1 + a.state_count[2].append((en-st,)+info[1:]) + a.type_counts[0][2].append(info[1]) + a.type_counts[1][2].append(info[2]) + a.type_counts[2][2].append(info[3]) + a.markers[2].append(st) + elif info[1] > 0 and info[2] > 0 and info[3] == 0: + # Segment contains both silence and noise + a.confusion_matrix[3] += 1 + a.state_count[3].append((en-st,)+info[1:]) + a.type_counts[0][3].append(info[1]) + a.type_counts[1][3].append(info[2]) + a.type_counts[2][3].append(info[3]) + a.markers[3].append(st) + elif info[1] > 0 and info[2] == 0 and info[3] > 0: + # Segment contains both silence and speech + a.confusion_matrix[4] += 1 + a.type_counts[0][4].append(info[1]) + a.type_counts[1][4].append(info[2]) + a.type_counts[2][4].append(info[3]) + a.state_count[4].append((en-st,)+info[1:]) + a.markers[4].append(st) + elif info[1] == 0 and info[2] > 0 and info[3] > 0: + # Segment contains both noise and speech + a.confusion_matrix[5] += 1 + a.state_count[5].append((en-st,)+info[1:]) + a.type_counts[0][5].append(info[1]) + a.type_counts[1][5].append(info[2]) + a.type_counts[2][5].append(info[3]) + a.markers[5].append(st) + elif info[1] > 0 and info[2] > 0 and info[3] > 0: + # Segment contains silence, noise and speech + a.confusion_matrix[6] += 1 + a.state_count[6].append((en-st,)+info[1:]) + a.type_counts[0][6].append(info[1]) + a.type_counts[1][6].append(info[2]) + a.type_counts[2][6].append(info[3]) + a.markers[6].append(st) + else: + # Should never be here + assert (False) + # End if + # End for loop over all stats + return a + # End function segmentation_analysis + +def map_prediction(A1, A2, phone_map, speech_cap = None, f = None): + if A2 == None: + B = [] + # Isolated segmentation + prev_x = None + len_x = 0 + i = 0 + for x in A1: + if prev_x == None or x == prev_x: + len_x += 1 + else: + assert (len_x > 0) + #sys.stderr.write("PHONE_LENGTH %s %d %s %d\n" % (prev_x, len_x, f, i - len_x)) + if phone_map[prev_x] == "0": + B.extend(["0"] * len_x) + elif (speech_cap != None and len_x > speech_cap) or phone_map[prev_x] == "1": + B.extend(["4"] * len_x) + elif phone_map[prev_x] == "2": + B.extend(["8"] * len_x) + # End if + len_x = 1 + # End if + prev_x = x + i += 1 + # End for + try: + assert (len_x > 0) + except AssertionError as e: + repr(e) + sys.stderr.write("In file %s\n" % f) + sys.exit(1) + + if phone_map[prev_x] == "0": + B.extend(["0"] * len_x) + elif (speech_cap != None and len_x > speech_cap) or phone_map[prev_x] == "1": + B.extend(["4"] * len_x) + elif phone_map[prev_x] == "2": + B.extend(["8"] * len_x) + # End if + return B + # End if (isolated segmentation) + + # Assuming len(A1) > len(A2) + # Otherwise A1 and A2 must be interchanged before + # passing to this function + B1 = [] + B2 = [] + for i in range(0, len(A2)): + if phone_map[A1[i]] == "0" and phone_map[A2[i]] == "0": + B1.append("0") + B2.append("0") + if phone_map[A1[i]] == "0" and phone_map[A2[i]] == "1": + B1.append("1") + B2.append("3") + if phone_map[A1[i]] == "0" and phone_map[A2[i]] == "2": + B1.append("2") + B2.append("6") + if phone_map[A1[i]] == "1" and phone_map[A2[i]] == "0": + B1.append("3") + B2.append("1") + if phone_map[A1[i]] == "1" and phone_map[A2[i]] == "1": + B1.append("4") + B2.append("4") + if phone_map[A1[i]] == "1" and phone_map[A2[i]] == "2": + B1.append("5") + B2.append("7") + if phone_map[A1[i]] == "2" and phone_map[A2[i]] == "0": + B1.append("6") + B2.append("2") + if phone_map[A1[i]] == "2" and phone_map[A2[i]] == "1": + B1.append("7") + B2.append("5") + if phone_map[A1[i]] == "2" and phone_map[A2[i]] == "2": + B1.append("8") + B2.append("8") + for i in range(len(A2), len(A1)): + if phone_map[A1[i]] == "0": + B1.append("0") + B2.append("0") + if phone_map[A1[i]] == "1": + B1.append("3") + B2.append("1") + if phone_map[A1[i]] == "2": + B1.append("6") + B2.append("2") + return (B1, B2) + +def main(): + parser = ArgumentParser(description='Get segmentation arguments') + parser.add_argument('--verbose', type=int, \ + dest='verbose', default=0, \ + help='Give higher verbose for more logging (default: %(default)s)') + parser.add_argument('--silence-proportion', type=float, \ + dest='silence_proportion', default=0.05, \ + help="The amount of silence at the sides of segments is " \ + + "tuned to give this proportion of silence. (default: %(default)s)") + parser.add_argument('--frame-shift', type=float, \ + dest='frame_shift', default=0.01, \ + help="Time difference between adjacent frame (default: %(default)s)s") + parser.add_argument('--max-segment-length', type=float, \ + dest='max_segment_length', default=10.0, \ + help="Maximum segment length while we are marging segments (default: %(default)s)") + parser.add_argument('--hard-max-segment-length', type=float, \ + dest='hard_max_segment_length', default=15.0, \ + help="Hard maximum on the segment length above which the segment " \ + + "will be broken even if in the middle of speech (default: %(default)s)") + parser.add_argument('--first-separator', type=str, \ + dest='first_separator', default="-", \ + help="Separator between recording-id and start-time (default: %(default)s)") + parser.add_argument('--second-separator', type=str, \ + dest='second_separator', default="-", \ + help="Separator between start-time and end-time (default: %(default)s)") + parser.add_argument('--remove-noise-only-segments', type=str, \ + dest='remove_noise_only_segments', default="true", choices=("true", "false"), \ + help="Remove segments that have only noise. (default: %(default)s)") + parser.add_argument('--min-inter-utt-silence-length', type=float, \ + dest='min_inter_utt_silence_length', default=1.0, \ + help="Minimum silence that must exist between two separate utterances (default: %(default)s)"); + parser.add_argument('--channel1-file', type=str, \ + dest='channel1_file', default="inLine", \ + help="String that matches with the channel 1 file (default: %(default)s)") + parser.add_argument('--channel2-file', type=str, \ + dest='channel2_file', default="outLine", \ + help="String that matches with the channel 2 file (default: %(default)s)") + parser.add_argument('--isolated-resegmentation', \ + dest='isolated_resegmentation', \ + action='store_true', help="Do not do joint segmentation (default: %(default)s)") + parser.add_argument('--max-length-diff', type=float, \ + dest='max_length_diff', default=1.0, \ + help="Maximum difference in the lengths of the two channels for joint " \ + + "segmentation to be done (default: %(default)s)") + parser.add_argument('--reference-rttm', dest='reference_rttm', \ + help="RTTM file to compare and get statistics (default: %(default)s)") + parser.add_argument('--speech-cap-length', type=float, default=None, \ + help="Maximum length in seconds of a particular speech phone prediction." \ + + "\nAny length above this will be considered as noise") + parser.add_argument('prediction_dir', \ + help='Directory where the predicted phones (.pred files) are found') + parser.add_argument('phone_map', \ + help='Phone Map file that maps from phones to classes') + parser.add_argument('output_segments', nargs='?', default="-", \ + help='Output segments file') + parser.usage=':'.join(parser.format_usage().split(':')[1:]) \ + + 'e.g. : %(prog)s exp/tri4b_whole_resegment_dev10h/pred exp/tri4b_whole_resegment_dev10h/phone_map.txt data/dev10h.seg/segments' + options = parser.parse_args() + + sys.stderr.write(' '.join(sys.argv) + "\n") + if not ( options.silence_proportion \ + > 0.01 and options.silence_proportion < 0.99 ): + sys.stderr.write("%s: Error: Invalid silence-proportion value %f\n" \ + % options.silence_proportion) + sys.exit(1) + + if not ( options.remove_noise_only_segments == "false" or options.remove_noise_only_segments == "true" ): + sys.stderr.write("%s: Error: Invalid value for remove-noise-only segments %s. Must be true or false.\n" \ + % options.remove_noise_only_segments) + sys.exit(1) + + if options.output_segments == '-': + out_file = sys.stdout + else: + try: + out_file = open(options.output_segments, 'w') + except IOError as e: + sys.stderr.write("%s: %s: Unable to open file %s\n" % (sys.argv[0], e, options.output_segments)) + sys.exit(1) + # End if + + phone_map = {} + try: + for line in open(options.phone_map).readlines(): + phone, cls = line.strip().split() + phone_map[phone] = cls + except IOError as e: + repr(e) + sys.exit(1) + + prediction_dir = options.prediction_dir + channel1_file = options.channel1_file + channel2_file = options.channel2_file + + temp_dir = prediction_dir + "/../rttm_classes" + os.system("mkdir -p %s" % temp_dir) + if options.reference_rttm != None: + read_rttm_file(options.reference_rttm, temp_dir, options.frame_shift) + else: + temp_dir = None + + stats = Stats() + + pred_files = dict([ (f.split('/')[-1][0:-5], False) \ + for f in glob.glob(os.path.join(prediction_dir, "*.pred")) ]) + + global global_analysis_get_initial_segments + global_analysis_get_initial_segments = Analysis("TOTAL_Get_Initial_Segments", options.frame_shift, "Global Analysis after get_initial_segments") + + global global_analysis_set_nonspeech_proportion + global_analysis_set_nonspeech_proportion = Analysis("TOTAL_set_nonspeech_proportion", options.frame_shift, "Global Analysis after set_nonspeech_proportion") + + global global_analysis_final + global_analysis_final= Analysis("TOTAL_Final", options.frame_shift, "Global Analysis Final") + + speech_cap = None + if options.speech_cap_length != None: + speech_cap = int( options.speech_cap_length / options.frame_shift ) + # End if + + for f in pred_files: + if pred_files[f]: + continue + if re.match(".*_"+channel1_file, f) is None: + if re.match(".*_"+channel2_file, f) is None: + sys.stderr.write("%s does not match pattern .*_%s or .*_%s\n" \ + % (f,channel1_file, channel2_file)) + sys.exit(1) + else: + f1 = f + f2 = f + f1 = re.sub("(.*_)"+channel2_file, r"\1"+channel1_file, f1) + else: + f1 = f + f2 = f + f2 = re.sub("(.*_)"+channel1_file, r"\1"+channel2_file, f2) + + if options.isolated_resegmentation or f2 not in pred_files or f1 not in pred_files: + pred_files[f] = True + try: + A = open(os.path.join(prediction_dir, f+".pred")).readline().strip().split()[1:] + except IndexError: + sys.stderr.write("Incorrect format of file %s/%s.pred\n" % (prediction_dir, f)) + sys.exit(1) + + B = map_prediction(A, None, phone_map, speech_cap, f) + + if temp_dir != None: + try: + reference = open(os.path.join(temp_dir, f+".ref")).readline().strip().split()[1:] + except IOError: + reference = None + else: + reference = None + r = JointResegmenter(A, B, f, options, phone_map, stats, reference) + r.resegment() + r.print_segments(out_file) + else: + if pred_files[f1] and pred_files[f2]: + continue + pred_files[f1] = True + pred_files[f2] = True + try: + A1 = open(os.path.join(prediction_dir, f1+".pred")).readline().strip().split()[1:] + except IndexError: + sys.stderr.write("Incorrect format of file %s/%s.pred\n" % (prediction_dir, f1)) + sys.exit(1) + try: + A2 = open(os.path.join(prediction_dir, f2+".pred")).readline().strip().split()[1:] + except IndexError: + sys.stderr.write("Incorrect format of file %s/%s.pred\n" % (prediction_dir, f2)) + sys.exit(1) + + if len(A1) < len(A2): + A3 = A1 + A1 = A2 + A2 = A3 + + f3 = f1 + f1 = f2 + f2 = f3 + # End if + + if (len(A1) - len(A2)) > options.max_length_diff / options.frame_shift: + sys.stderr.write( \ + "%s: Warning: Lengths of %s and %s differ by more than %f. " \ + % (sys.argv[0], f1,f2, options.max_length_diff) \ + + "So using isolated resegmentation\n") + B1 = map_prediction(A1, None, phone_map, speech_cap) + B2 = map_prediction(A2, None, phone_map, speech_cap) + else: + B1,B2 = map_prediction(A1, A2, phone_map, speech_cap) + # End if + + if temp_dir != None: + try: + reference1 = open(os.path.join(temp_dir, f1+".ref")).readline().strip().split()[1:] + except IOError: + reference1 = None + else: + reference1 = None + r1 = JointResegmenter(A1, B1, f1, options, phone_map, stats, reference1) + r1.resegment() + r1.print_segments(out_file) + + if temp_dir != None: + try: + reference2 = open(os.path.join(temp_dir, f2+".ref")).readline().strip().split()[1:] + except IOError: + reference2= None + else: + reference2 = None + r2 = JointResegmenter(A1, B2, f2, options, phone_map, stats, reference2) + r2.resegment() + r2.restrict(len(A2)) + r2.print_segments(out_file) + # End if + # End for loop over files + + if options.reference_rttm != None: + global_analysis_get_initial_segments.write_confusion_matrix(True) + global_analysis_get_initial_segments.write_total_stats(True) + global_analysis_get_initial_segments.write_length_stats() + global_analysis_set_nonspeech_proportion.write_confusion_matrix(True) + global_analysis_set_nonspeech_proportion.write_total_stats(True) + global_analysis_set_nonspeech_proportion.write_length_stats() + global_analysis_final.write_confusion_matrix(True) + global_analysis_final.write_total_stats(True) + global_analysis_final.write_length_stats() + +if __name__ == '__main__': + with Timer() as t: + main() + sys.stderr.write("\nSegmentation done!\nTook %f sec\n" % t.interval) + diff --git a/egs/babel/s5d/local/resegment/train_segmentation.sh b/egs/babel/s5d/local/resegment/train_segmentation.sh new file mode 100755 index 00000000000..511c451993e --- /dev/null +++ b/egs/babel/s5d/local/resegment/train_segmentation.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0 + +nj=16 # nj for training subset of whole +cmd=run.pl # How to run the parallel tasks +boost_sil=1.0 +ext_alidir= # Use this alignment directory instead for getting new one + +# End of configuration + +. utils/parse_options.sh + +set -o pipefail +set -e +set -u +if [ $# -ne 4 ]; then + echo "Usage: $0 [options] " + echo " e.g.:" + echo "$0 exp/tri4 data/train data/lang exp/tri4b_seg" + echo " Options (selection. For full options, see the script itself):" + echo " --nj # Number of parallel jobs" + echo " --cmd # How to run the parallel tasks" + exit 1 +fi + +in_model_dir=$1 # Model used for alignment +train_data_dir=$2 +lang=$3 +out_model_dir=$4 + +if [ ! -d $train_data_dir ] ; then + echo "$0: Unable to find directory $train_data_dir." + echo "$0: Run run-0-fillers.sh or run-1-main.sh first to prepare data directory" + exit 1 +fi + +# Align train_whole_sub3 using tri4 models and train a LDA + MLLT model +# on it. +alidir=${in_model_dir}_train_seg_ali + +if [ ! -z $ext_alidir ] && [ -s $ext_alidir/ali.1.gz ]; then + alidir=$ext_alidir +elif [ ! -f $alidir/.done ]; then + steps/align_fmllr.sh --nj $nj --cmd "$cmd" --boost-silence $boost_sil \ + $train_data_dir $lang $in_model_dir $alidir || exit 1; + touch $alidir/.done +fi + +if [ ! -f $out_model_dir/.done ]; then + steps/train_lda_mllt.sh --cmd "$cmd" --realign-iters "" --boost-silence $boost_sil \ + 1000 10000 $train_data_dir $lang $alidir $out_model_dir || exit 1; + touch $out_model_dir/.done +fi + +if [ ! -f $out_model_dir/graph.done ]; then + # Make the phone decoding-graph. + steps/make_phone_graph.sh $lang $alidir $out_model_dir || exit 1; + utils/mkgraph.sh $lang $out_model_dir $out_model_dir/graph | \ + tee $out_model_dir/mkgraph.log || exit 1 + touch $out_model_dir/graph.done +fi diff --git a/egs/babel/s5d/local/rttm_to_text.pl b/egs/babel/s5d/local/rttm_to_text.pl new file mode 100755 index 00000000000..d33c71e2f17 --- /dev/null +++ b/egs/babel/s5d/local/rttm_to_text.pl @@ -0,0 +1,151 @@ +#!/usr/bin/env perl + +use warnings; +use strict; +use utf8; + +use Data::Dumper; + +sub float_gt { + my ($A, $B) = @_; + #print Dumper(\@_); + + if ( ($A - $B) < 1e-12 ) { + return 0; + } elsif ($A > $B ) { + return 1; + } else { + return 0; + } +} + +binmode(STDOUT, ":utf8"); +binmode(STDERR, ":utf8"); + +my $datadir=$ARGV[0]; +my $rttm_filename=$ARGV[1]; + + +my $filename=""; +my %rttm; +my @times; + + +open(rttm_f, "<:utf8", $rttm_filename) or die "Cannot open the RTTM file"; +while ( ) { + chop; + my @elems = split; + my $_filename= $elems[1]; + my $_time=$elems[3]; + my $_dur=$elems[4]; + my $_text=$elems[5]; + + #We could simply pull-out the vector of times + #from the hash, but in case the RTTM is not sorted + #there might be some other problem somewhere + #(as the RTTMs are normally sorted). So instead of being + #"smart", let's make the user notice! + if ( exists($rttm{$_filename}) ) { + die "The RTTM file is not sorted!"; + } + + if ( $filename ne $_filename ) { + if ( $filename ne "" ) { + #print $filename . "\n"; + my @tmp = @times; + $rttm{$filename} = \@tmp; + #if ($filename eq "BABEL_BP_101_10470_20111118_172644_inLine" ) { + # print "$filename\n"; + # print Dumper($rttm{$filename}); + #} + #print Dumper($rttm{"BABEL_BP_101_10470_20111118_172644_inLine"}); + } + + @times = (); + $filename = $_filename; + } + + #I don't really know what is the distinction between all + #of these. Let's throw away the SPEAKER, as it does not + #really contain information that is to be found in the transcript + #and keep the others + if ( $elems[0] eq "LEXEME") { + push @times, [$_time, $_time + $_dur, $_text]; + } elsif ( $elems[0] eq "NON-SPEECH" ) { + push @times, [$_time, $_time + $_dur, $_text]; + } elsif ( $elems[0] eq "NON-LEX" ) { + push @times, [$_time, $_time + $_dur, $_text]; + } elsif ( $elems[0] eq "SPEAKER") { + ; + } else { + #This is just a safety precaution if a new flag/type appears. + die "Unknown first element $elems[0] of line '" . join(" ", @elems) . "'\n"; + } + + #We compare the two last entries of the #times vector, if they + #are ordered properly. Again, this is just a safety recaution + #In a well-formed RTTM, this is normal. + if ( (@times > 1) && float_gt($times[-2][1], $times[-1][0]) ) { + #print Dumper(\@times); + my $A = $times[-2][0]; + my $B = $times[-1][0]; + my $Aend = $times[-2][1]; + my $Bend = $times[-1][1]; + + #print "WARNING: Elements in the RTTM file are not sorted for FILENAME $filename!\n"; + #print $times[-2][0] . " " . $times[-2][1] - $times[-2][0]. " " . $times[-2][2] . "\n"; + #print $times[-1][0] . " " . $times[-1][1] - $times[-1][0]. " " . $times[-1][2] . "\n"; + #print "\n"; + + my @sorted = sort {$a <=> $b} ($A, $B, $Aend, $Bend); + #print Dumper(\@sorted); + $times[-1][0] = $sorted[0]; + $times[-1][1] = $sorted[2]; #We omit the gap between these two words + $times[-2][0] = $sorted[2]; + $times[-2][1] = $sorted[3]; + + } +} +if ( $filename ne "" ) { + #print $filename . "\n"; + $rttm{$filename} = \@times; +} +close(rttm_f); + +open(segments_f, "<:utf8", "$datadir/segments") or die "Cannot open file $datadir/segments"; +while ( ) { + chop; + my ($segmentname, $filename, $start, $end) = split; + + if (! exists $rttm{$filename} ) { + print "Filename $filename does not exists in the RTTM file\n"; + die; + } + my @times = @{$rttm{$filename}}; + my $i; + my $j; + + + #if ($segmentname ne "10470_A_20111118_172644_000000" ) { + # next; + #} + + #print $filename . "\n"; + + #print Dumper(\@times); + $i = 0; + #print $start . " " . $times[$i][0] . " " . $times[$i][1] . "\n"; + while (($i < @times) && ( $times[$i][1] < $start ) ) { $i += 1; }; + $j = $i; + while (($j < @times) && ( $times[$j][0] < $end ) ) { $j += 1; }; + + print $segmentname . " "; + while ( $i < $j ) { + #print Dumper($times[$i]); + print $times[$i][2] . " "; + $i += 1; + } + print "\n"; + #die +} +close(segments_f); diff --git a/egs/babel/s5d/local/run_cleanup_segmentation.sh b/egs/babel/s5d/local/run_cleanup_segmentation.sh new file mode 100755 index 00000000000..324d796b1b1 --- /dev/null +++ b/egs/babel/s5d/local/run_cleanup_segmentation.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar +# 2016 Johns Hopkins University (author: Daniel Povey) +# Apache 2.0 + +# This script demonstrates how to re-segment training data selecting only the +# "good" audio that matches the transcripts. +# The basic idea is to decode with an existing in-domain acoustic model, and a +# biased language model built from the reference, and then work out the +# segmentation from a ctm like file. + +# For nnet3 and chain results after cleanup, see the scripts in +# local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh + +# GMM Results for speaker-independent (SI) and speaker adaptive training (SAT) systems on dev and test sets +# [will add these later]. + +set -e +set -o pipefail +set -u + +stage=0 +cleanup_stage=0 +data=data/train +cleanup_affix=cleaned +srcdir=exp/tri5 +langdir=data/langp/tri5 +nj=100 +decode_nj=16 +decode_num_threads=4 + +. ./path.sh +. ./cmd.sh +. utils/parse_options.sh + +cleaned_data=${data}_${cleanup_affix} + +dir=${srcdir}_${cleanup_affix}_work +cleaned_dir=${srcdir}_${cleanup_affix} + +if [ $stage -le 1 ]; then + # This does the actual data cleanup. + steps/cleanup/clean_and_segment_data.sh --stage $cleanup_stage --nj $nj --cmd "$train_cmd" \ + $data $langdir $srcdir $dir $cleaned_data +fi + +if [ $stage -le 2 ]; then + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + $cleaned_data $langdir $srcdir ${srcdir}_ali_${cleanup_affix} +fi + +if [ $stage -le 3 ]; then + steps/train_sat.sh --cmd "$train_cmd" \ + 5000 100000 $cleaned_data $langdir ${srcdir}_ali_${cleanup_affix} ${cleaned_dir} +fi diff --git a/egs/babel/s5d/local/run_kws_stt_task.sh b/egs/babel/s5d/local/run_kws_stt_task.sh new file mode 100755 index 00000000000..71981a5641b --- /dev/null +++ b/egs/babel/s5d/local/run_kws_stt_task.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#Simple BABEL-only script to be run on generated lattices (to produce the +#files for scoring and for NIST submission + +set -e +set -o pipefail +set -u + +#Begin options +min_lmwt=8 +max_lmwt=12 +cer=0 +skip_kws=false +skip_stt=false +skip_scoring=false +extra_kws=false +cmd=run.pl +max_states=150000 +wip=0.5 #Word insertion penalty +#End of options + +if [ $(basename $0) == score.sh ]; then + skip_kws=true +fi + +echo $0 "$@" +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo " e.g.: $0 data/dev10h data/lang exp/tri6/decode_dev10h" + exit 1; +fi + +data_dir=$1; +lang_dir=$2; +decode_dir=$3; + +##NB: The first ".done" files are used for backward compatibility only +##NB: should be removed in a near future... +if ! $skip_stt ; then + if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then + local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + $data_dir $lang_dir $decode_dir + + if ! $skip_scoring ; then + local/score_stm.sh --cmd "$cmd" --cer $cer \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ + $data_dir $lang_dir $decode_dir + fi + touch $decode_dir/.done.score + fi +fi + +if ! $skip_kws ; then + [ ! -f $data_dir/extra_kws_tasks ] && exit 0 + + for extraid in `cat $data_dir/extra_kws_tasks` ; do + if [ ! -f $decode_dir/.done.kws.$extraid ] ; then + local/kws_search.sh --cmd "$cmd" --extraid $extraid \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --skip-scoring true\ + --max-lmwt ${max_lmwt} --indices-dir $decode_dir/kws_indices \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kws.$extraid + fi + if [[ ! $extraid =~ .*oov.* ]] && [ ! -f $decode_dir/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$decode_cmd" --extraid ${extraid} \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/kws_indices --skip-scoring $skip_scoring \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kwset.$extraid + fi + + if ! $skip_scoring ; then + [ -f $decode_dir/.done.kws.${extraid}.scored ] && continue; + local/kws_search.sh --cmd "$cmd" --extraid $extraid --stage 4 \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --skip-scoring false\ + --max-lmwt ${max_lmwt} --indices-dir $decode_dir/kws_indices \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kws.${extraid}.scored + fi + done +fi diff --git a/egs/babel/s5d/local/run_kws_stt_task2.sh b/egs/babel/s5d/local/run_kws_stt_task2.sh new file mode 100755 index 00000000000..6007baa1756 --- /dev/null +++ b/egs/babel/s5d/local/run_kws_stt_task2.sh @@ -0,0 +1,124 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +#Simple BABEL-only script to be run on generated lattices (to produce the +#files for scoring and for NIST submission + +set -e +set -o pipefail +set -u + +#Begin options +min_lmwt=8 +max_lmwt=12 +cer=0 +skip_kws=false +skip_stt=false +skip_scoring=false +extra_kws=false +cmd=run.pl +max_states=150000 +wip=0.5 #Word insertion penalty +#End of options + +if [ $(basename $0) == score.sh ]; then + skip_kws=true +fi + +echo $0 "$@" +. utils/parse_options.sh + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " + echo " e.g.: $0 data/dev10h data/lang exp/tri6/decode_dev10h" + exit 1; +fi + +data_dir=$1; +lang_dir=$(echo "$2" | perl -pe 's/\/$//g') +decode_dir=$3; + +##NB: The first ".done" files are used for backward compatibility only +##NB: should be removed in a near future... +if ! $skip_stt ; then + if [ ! -f $decode_dir/.score.done ] && [ ! -f $decode_dir/.done.score ]; then + local/lattice_to_ctm.sh --cmd "$cmd" --word-ins-penalty $wip \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + $data_dir $lang_dir $decode_dir + + if ! $skip_scoring ; then + local/score_stm.sh --cmd "$cmd" --cer $cer \ + --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt}\ + $data_dir $lang_dir $decode_dir + fi + touch $decode_dir/.done.score + fi +fi + +if ! $skip_kws ; then + [ ! -f $data_dir/extra_kws_tasks ] && exit 0 + + syll_data_dir=$(echo $data_dir | perl -pe 's/\.(pem|seg)$/.syll.$1/g' ) + if [ -d ${syll_data_dir} ] && [ ! -f ${decode_dir}/syllabs/.done ] ; then + local/syllab/lattice_word2syll.sh --cmd "$cmd --mem 8G" \ + $data_dir $lang_dir ${lang_dir}.syll $decode_dir ${decode_dir}/syllabs + touch ${decode_dir}/syllabs/.done + fi + + phn_data_dir=$(echo $data_dir | perl -pe 's/\.(pem|seg)$/.phn.$1/g' ) + if [ -d ${phn_data_dir} ] && [ ! -f ${decode_dir}/phones/.done ] ; then + local/syllab/lattice_word2syll.sh --cmd "$cmd --mem 8G" \ + $data_dir $lang_dir ${lang_dir}.phn $decode_dir ${decode_dir}/phones + touch ${decode_dir}/phones/.done + fi + + + + for extraid in `cat $data_dir/extra_kws_tasks | grep -v oov` ; do + if [ ! -f $decode_dir/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$decode_cmd" --extraid ${extraid} \ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/kws_indices --skip-scoring $skip_scoring \ + $lang_dir $data_dir $decode_dir + touch $decode_dir/.done.kwset.$extraid + fi + + if [ -f ${decode_dir}/syllabs/kwset_${extraid}_${min_lmwt}/f4de/metrics.txt ]; then + touch $decode_dir/syllabs/.done.kwset.$extraid + fi + + if [ -f ${decode_dir}/phones/kwset_${extraid}_${min_lmwt}/f4de/metrics.txt ]; then + touch $decode_dir/phones/.done.kwset.$extraid + fi + + if [ -f ${decode_dir}/syllabs/.done ] && [ ! -f $decode_dir/syllabs/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$cmd" --extraid ${extraid} --model $decode_dir/../final.mdl\ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/syllabs/kws_indices --skip-scoring $skip_scoring \ + ${lang_dir}.syll $syll_data_dir $decode_dir/syllabs + touch $decode_dir/syllabs/.done.kwset.$extraid + fi + + + if [ -f ${decode_dir}/phones/.done ] && [ ! -f $decode_dir/phones/.done.kwset.$extraid ] ; then + local/search/search.sh --cmd "$cmd" --extraid ${extraid} --model $decode_dir/../final.mdl\ + --max-states ${max_states} --min-lmwt ${min_lmwt} --max-lmwt ${max_lmwt} \ + --indices-dir $decode_dir/phones/kws_indices --skip-scoring $skip_scoring \ + ${lang_dir}.phn $phn_data_dir $decode_dir/phones + touch $decode_dir/phones/.done.kwset.$extraid + fi + done +fi diff --git a/egs/babel/s5d/local/score.sh b/egs/babel/s5d/local/score.sh new file mode 120000 index 00000000000..7a34ba5b0d7 --- /dev/null +++ b/egs/babel/s5d/local/score.sh @@ -0,0 +1 @@ +run_kws_stt_task.sh \ No newline at end of file diff --git a/egs/babel/s5d/local/score_combine.sh b/egs/babel/s5d/local/score_combine.sh new file mode 100755 index 00000000000..7e8af85b2d8 --- /dev/null +++ b/egs/babel/s5d/local/score_combine.sh @@ -0,0 +1,181 @@ +#!/bin/bash + +# Copyright 2012-2013 Arnab Ghoshal +# Johns Hopkins University (authors: Daniel Povey, Sanjeev Khudanpur) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + + +# Script for system combination using minimum Bayes risk decoding. +# This calls lattice-combine to create a union of lattices that have been +# normalized by removing the total forward cost from them. The resulting lattice +# is used as input to lattice-mbr-decode. This should not be put in steps/ or +# utils/ since the scores on the combined lattice must not be scaled. + +# begin configuration section. +cmd=run.pl +beam=4 # prune the lattices prior to MBR decoding, for speed. +stage=0 +cer=0 +decode_mbr=true +lat_weights= +word_ins_penalty=0.0 +min_lmwt=7 +max_lmwt=17 +parallel_opts="-pe smp 3" +skip_scoring=false +ctm_name= +#end configuration section. + +help_message="Usage: "$(basename $0)" [options] [:lmwt-bias] [:lmwt-bias] [[:lmwt-bias] ... ] + E.g. "$(basename $0)" data/test data/lang exp/tri1/decode exp/tri2/decode exp/tri3/decode exp/combine + or: "$(basename $0)" data/test data/lang exp/tri1/decode exp/tri2/decode:18 exp/tri3/decode:13 exp/combine +Options: + --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. + --min-lmwt INT # minumum LM-weight for lattice rescoring + --max-lmwt INT # maximum LM-weight for lattice rescoring + --lat-weights STR # colon-separated string of lattice weights + --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. + --stage (0|1|2) # (createCTM | filterCTM | runSclite). + --parallel-opts # extra options to command for combination stage, + # default '-pe smp 3' + --cer (0|1) # compute CER in addition to WER +"; + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + + +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + +data=$1 +lang=$2 +dir=${@: -1} # last argument to the script +shift 2; +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +#Let the user to set the CTM file name +#use the data-dir name in case the user doesn't care +if [ -z ${ctm_name} ] ; then + ctm_name=`basename $data` +fi + + +for f in $lang/words.txt $lang/phones/word_boundary.int ; do + [ ! -f $f ] && echo "$0: file $f does not exist" && exit 1; +done +if ! $skip_scoring ; then + for f in $data/stm; do + [ ! -f $f ] && echo "$0: file $f does not exist" && exit 1; + done +fi + + +mkdir -p $dir/log + +for i in `seq 0 $[num_sys-1]`; do + decode_dir=${decode_dirs[$i]} + offset=`echo $decode_dir | cut -d: -s -f2` # add this to the lm-weight. + decode_dir=`echo $decode_dir | cut -d: -f1` + [ -z "$offset" ] && offset=0 + + model=`dirname $decode_dir`/final.mdl # model one level up from decode dir + for f in $model $decode_dir/lat.1.gz ; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; + done + if [ $i -eq 0 ]; then + nj=`cat $decode_dir/num_jobs` || exit 1; + else + if [ $nj != `cat $decode_dir/num_jobs` ]; then + echo "$0: number of decoding jobs mismatches, $nj versus `cat $decode_dir/num_jobs`" + exit 1; + fi + fi + file_list="" + # I want to get the files in the correct order so we can use ",s,cs" to avoid + # memory blowup. I first tried a pattern like file.{1,2,3,4}.gz, but if the + # system default shell is not bash (e.g. dash, in debian) this will not work, + # so we enumerate all the input files. This tends to make the command lines + # very long. + for j in `seq $nj`; do file_list="$file_list $decode_dir/lat.$j.gz"; done + + lats[$i]="ark,s,cs:lattice-scale --inv-acoustic-scale=\$[$offset+LMWT] 'ark:gunzip -c $file_list|' ark:- | \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- | \ + lattice-prune --beam=$beam ark:- ark:- | \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- |" +done + +mkdir -p $dir/scoring/log + +if [ -z "$lat_weights" ]; then + lat_weights=1.0 + for i in `seq $[$num_sys-1]`; do lat_weights="$lat_weights:1.0"; done +fi + +if [ $stage -le 0 ]; then + $cmd $parallel_opts LMWT=$min_lmwt:$max_lmwt $dir/log/combine_lats.LMWT.log \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-combine --lat-weights=$lat_weights "${lats[@]}" ark:- \| \ + lattice-to-ctm-conf --decode-mbr=true ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/${ctm_name}.ctm || exit 1; +fi + + +if [ $stage -le 1 ]; then + # Remove some stuff we don't want to score, from the ctm. + for lmwt in `seq $min_lmwt $max_lmwt`; do + x=$dir/score_${lmwt}/${ctm_name}.ctm + [ ! -f $x ] && echo "File $x does not exist! Exiting... " && exit 1 + cp $x $x.bkup1; + cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ + grep -v -E '|%HESITATION|\(\(\)\)' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + perl -e '@list = (); %list = (); + while(<>) { + chomp; + @col = split(" ", $_); + push(@list, $_); + $key = "$col[0]" . " $col[1]"; + $list{$key} = 1; + } + foreach(sort keys %list) { + $key = $_; + foreach(grep(/$key/, @list)) { + print "$_\n"; + } + }' > $x; + cp $x $x.bkup2; + done +fi + +if ! $skip_scoring ; then + if [ $stage -le 2 ]; then + local/score_stm.sh --min-lmwt $min_lmwt --max-lmwt $max_lmwt $data $lang $dir || exit 1 + fi +fi + + +exit 0 diff --git a/egs/babel/s5d/local/score_map.sh b/egs/babel/s5d/local/score_map.sh new file mode 100755 index 00000000000..ecc528ec909 --- /dev/null +++ b/egs/babel/s5d/local/score_map.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# Apache 2.0 + +[ -f ./path.sh ] && . ./path.sh + +# begin configuration section. +cmd=run.pl +stage=0 +decode_mbr=true +reverse=false +min_lmwt=9 +max_lmwt=20 +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + echo " --reverse (true/false) # score with time reversed features " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ + lattice-best-path --lm-scale=LMWT --word-symbol-table=$symtab \ + "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/LMWT.tra || exit 1; + +if $reverse; then + for lmwt in `seq $min_lmwt $max_lmwt`; do + mv $dir/scoring/$lmwt.tra $dir/scoring/$lmwt.tra.orig + awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \ + <$dir/scoring/$lmwt.tra.orig >$dir/scoring/$lmwt.tra + done +fi + +# Note: the double level of quoting for the sed command +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cat $dir/scoring/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; + +exit 0; diff --git a/egs/babel/s5d/local/score_mbr.sh b/egs/babel/s5d/local/score_mbr.sh new file mode 100755 index 00000000000..b2fcaf5cdf9 --- /dev/null +++ b/egs/babel/s5d/local/score_mbr.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# Script for minimum bayes risk decoding. + +[ -f ./path.sh ] && . ./path.sh; + +# begin configuration section. +cmd=run.pl +min_lmwt=9 +max_lmwt=20 +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: local/score_sclite_conf.sh [--cmd (run.pl|queue.pl...)] " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score_mbr.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +# We submit the jobs separately, not as an array, because it's hard +# to get the inverse of the LM scales. +rm $dir/.error 2>/dev/null +for inv_acwt in `seq $min_lmwt $max_lmwt`; do + acwt=`perl -e "print (1.0/$inv_acwt);"` + $cmd $dir/scoring/rescore_mbr.${inv_acwt}.log \ + lattice-mbr-decode --acoustic-scale=$acwt --word-symbol-table=$symtab \ + "ark:gunzip -c $dir/lat.*.gz|" ark,t:$dir/scoring/${inv_acwt}.tra \ + || touch $dir/.error & +done +wait; +[ -f $dir/.error ] && echo "score_mbr.sh: errror getting MBR outout."; + + +$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cat $dir/scoring/LMWT.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">" $dir/wer_LMWT || exit 1; + diff --git a/egs/babel/s5d/local/score_sctk_prune.sh b/egs/babel/s5d/local/score_sctk_prune.sh new file mode 100755 index 00000000000..09662af57c8 --- /dev/null +++ b/egs/babel/s5d/local/score_sctk_prune.sh @@ -0,0 +1,138 @@ +#!/bin/bash +# Copyright Johns Hopkins University (Authors: Daniel Povey, Sanjeev Khudanpur) 2012-2013. Apache 2.0. + +# begin configuration section. +cmd=run.pl +stage=0 +cer=0 +decode_mbr=true +beam=5 +word_ins_penalty=0 +min_lmwt=7 +max_lmwt=17 +model= +#end configuration section. + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " && exit; + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # (createCTM | filterCTM | runSclite)." + echo " --cer (0|1) # compute CER in addition to WER" + exit 1; +fi + +data=$1 +lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. +dir=$3 + +if [ -z "$model" ] ; then + model=$dir/../final.mdl # assume model one level up from decoding dir. +fi + + +ScoringProgram=$KALDI_ROOT/tools/sctk/bin/sclite +[ ! -f $ScoringProgram ] && echo "Cannot find scoring program at $ScoringProgram" && exit 1; + +for f in $data/char.stm $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \ + $model $data/segments $data/reco2file_and_channel $dir/lat.1.gz; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; +done + +name=`basename $data`; # e.g. eval2000 + +mkdir -p $dir/scoring/log + +if [ $stage -le 0 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ + mkdir -p $dir/score_LMWT/ '&&' \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ + lattice-prune --beam=$beam ark:- ark:- \| \ + lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ + lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ + utils/int2sym.pl -f 5 $lang/words.txt \| \ + utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \ + '>' $dir/score_LMWT/$name.ctm || exit 1; +fi + +if [ $stage -le 1 ]; then +# Remove some stuff we don't want to score, from the ctm. + for x in $dir/score_*/$name.ctm; do + cp $x $x.bkup1; + cat $x.bkup1 | grep -v -E '\[NOISE|LAUGHTER|VOCALIZED-NOISE\]' | \ + grep -v -E '|%HESITATION|\(\(\)\)' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + grep -v -E '' | \ + perl -e '@list = (); %list = (); + while(<>) { + chomp; + @col = split(" ", $_); + push(@list, $_); + $key = "$col[0]" . " $col[1]"; + $list{$key} = 1; + } + foreach(sort keys %list) { + $key = $_; + foreach(grep(/$key/, @list)) { + print "$_\n"; + } + }' > $x; + cp $x $x.bkup2; + y=${x%.ctm}; + cat $x.bkup2 | \ + perl -e ' + use Encode; + while(<>) { + chomp; + @col = split(" ", $_); + @col == 6 || die "Bad number of columns!"; + if ($col[4] =~ m/[\x80-\xff]{2}/) { + $word = decode("UTF8", $col[4]); + @char = split(//, $word); + $start = $col[2]; + $dur = $col[3]/@char; + $start -= $dur; + foreach (@char) { + $char = encode("UTF8", $_); + $start += $dur; + # printf "$col[0] $col[1] $start $dur $char\n"; + printf "%s %s %.2f %.2f %s %s\n", $col[0], $col[1], $start, $dur, $char, $col[5]; + } + } + }' > $y.char.ctm + cp $y.char.ctm $y.char.ctm.bkup1 + done +fi + +if [ $stage -le 2 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + cp $data/stm $dir/score_LMWT/ '&&' cp $data/glm $dir/score_LMWT/ '&&'\ + $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm -o all -o dtl; + + if [ $cer -eq 1 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.char.log \ + cp $data/char.stm $dir/score_LMWT/'&&'\ + $ScoringProgram -s -r $dir/score_LMWT/char.stm stm -h $dir/score_LMWT/${name}.char.ctm ctm -o all -o dtl; + fi + +# for x in $dir/score_*/*.ctm; do +# mv $x.filt $x; +# rm -f $x.filt*; +# done + +# for x in $dir/score_*/*stm; do +# mv $x.filt $x; +# rm -f $x.filt*; +# done +fi + +echo "Finished scoring on" `date` +exit 0 diff --git a/egs/babel/s5d/local/score_stm.sh b/egs/babel/s5d/local/score_stm.sh new file mode 100755 index 00000000000..56835109722 --- /dev/null +++ b/egs/babel/s5d/local/score_stm.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +# This is a scoring script for the CTMS in /score_/${name}.ctm +# it tries to mimic the NIST scoring setup as much as possible (and usually does a good job) + +# begin configuration section. +cmd=run.pl +cer=0 +min_lmwt=7 +max_lmwt=17 +model= +stage=0 +ctm_name= +case_insensitive=true +use_icu=true +icu_transform='Any-Lower' +#end configuration section. + +echo $0 $@ + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +if [ $# -ne 3 ]; then + echo "Usage: $0 [options] " && exit; + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --cer (0|1) # compute CER in addition to WER" + exit 1; +fi + +data=$1 +lang=$2 # This parameter is not used -- kept only for backwards compatibility +dir=$3 + +set -e +set -o pipefail +set -u + +ScoringProgram=`which sclite` || ScoringProgram=$KALDI_ROOT/tools/sctk/bin/sclite +[ ! -x $ScoringProgram ] && echo "Cannot find scoring program at $ScoringProgram" && exit 1; +SortingProgram=`which hubscr.pl` || SortingProgram=$KALDI_ROOT/tools/sctk/bin/hubscr.pl +[ ! -x $ScoringProgram ] && echo "Cannot find scoring program at $ScoringProgram" && exit 1; + + +for f in $data/stm ; do + [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; +done + + +if [ -z $ctm_name ] ; then + name=`basename $data`; # e.g. eval2000 +else + name=$ctm_name +fi + +mkdir -p $dir/scoring/log +if [ $stage -le 0 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ + set -e';' set -o pipefail';' \ + cp -f $data/stm $dir/score_LMWT/stm.unsorted '&&' \ + cp -f $dir/score_LMWT/${name}.ctm $dir/score_LMWT/${name}.ctm.unsorted '&&'\ + $SortingProgram sortSTM \<$dir/score_LMWT/stm.unsorted \>$dir/score_LMWT/stm.sorted '&&' \ + $SortingProgram sortCTM \<$dir/score_LMWT/${name}.ctm.unsorted \>$dir/score_LMWT/${name}.ctm.sorted '&&' \ + paste -d ' ' \<\(cut -f 1-5 -d ' ' $dir/score_LMWT/stm.sorted \) \ + \<\(cut -f 6- -d ' ' $dir/score_LMWT/stm.sorted \| uconv -f utf8 -t utf8 -x "$icu_transform" \) \ + \> $dir/score_LMWT/stm '&&' \ + paste -d ' ' \<\(cut -f 1-4 -d ' ' $dir/score_LMWT/${name}.ctm.sorted \) \ + \<\(cut -f 5- -d ' ' $dir/score_LMWT/${name}.ctm.sorted \| uconv -f utf8 -t utf8 -x "$icu_transform" \) \ + \> $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + utils/fix_ctm.sh $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm.sorted2 '&&' \ + $SortingProgram sortCTM \<$dir/score_LMWT/${name}.ctm.sorted2 \>$dir/score_LMWT/${name}.ctm '&&' \ + $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm \ + -n "$name.ctm" -f 0 -D -F -o sum rsum prf dtl sgml -e utf-8 || exit 1 +fi + +if [ $stage -le 1 ]; then + if [ $cer -eq 1 ]; then + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.char.log \ + $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm \ + -n "$name.char.ctm" -o sum rsum prf dtl sgml -f 0 -D -F -c NOASCII DH -e utf-8 || exit 1 + fi +fi + + +echo "Finished scoring on" `date` +exit 0 + diff --git a/egs/babel/s5d/local/search/analyze_stats.pl b/egs/babel/s5d/local/search/analyze_stats.pl new file mode 100755 index 00000000000..fd09f9c92a7 --- /dev/null +++ b/egs/babel/s5d/local/search/analyze_stats.pl @@ -0,0 +1,219 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + + +my $Usage = < + e.g.: gunzip -c exp/tri5/decode_dev10h.pem/kws/stats.*.gz | \ + $0 --trials 36000 data/dev10h.pem alignment.csv keywords_stats + +Allowed options: + --trials : number of trials (length of the search collection) for ATWV computation +EOU + +use strict; +use warnings; +use utf8; +use Data::Dumper; +use GetOpt::Long; + +my $T = 36212.6725; + +GetOptions ("trials=i" => \$T) or do + { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +if (@ARGV != 3) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +my $data = $ARGV[0]; +my $align = $ARGV[1]; +my $keywords = $ARGV[2]; + +my %SEGMENTS; +open(my $seg_file, "$data/segments") or + die "Cannot open the segments file in $data/segments"; + +while (my $line = <$seg_file>) { + (my $seg_id, my $file_id, my $tstart, my $tend) = split(" ", $line); + $SEGMENTS{$seg_id} = [$file_id, $tstart, $tend]; +} + + +my %ALIGNMENT; +my %TWVSTATS; +open(my $align_file, $align) or + die "Cannot open the alignment file in $align"; + +print "Reading alignment...\n"; +my $dummy=<$align_file>; +while (my $line = <$align_file>) { + chomp $line; + my @entries = split(/\s*,\s*/, $line); + my $kw_id = $entries[3]; + my $file_id = $entries[1]; + my $kw_time = $entries[7]; + my $op_id = join(",", @entries[10 .. 11]); # 'YES,CORR' | 'YES,FA' | 'NO,MISS' | 'NO,CORR!DET' | ',MISS' + + $TWVSTATS{$kw_id}{$op_id} += 1; + next if $op_id eq ",MISS"; + + my $key = sprintf "%s,%s", $kw_id, $file_id; + + if ( grep { abs($_ - $kw_time) <= 0.5 } @{$ALIGNMENT{$key}} ) { + die "The key $key is not unique\n"; + } + push @{$ALIGNMENT{$key}}, \@entries; +} + +#print Dumper(\%TWVSTATS); +print "Done reading alignment...\n"; + + +my %HITCACHE; + +print "Reading stats\n"; +while (my $line = ) { + my @entries = split(" ", $line); + + my $wav = $SEGMENTS{$entries[1]}[0]; + my $seg_start = $SEGMENTS{$entries[1]}[1]; + my $seg_end = $SEGMENTS{$entries[1]}[2]; + + my $kw = $entries[0]; + my $kw_start = $seg_start + $entries[2]/100.00000; + my $kw_stop = $seg_start + $entries[3]/100.00000; + my $kw_center = ($kw_start + $kw_stop) / 2.0; + #print Dumper($kw_start, $kw_stop, $kw_center); + my $kw_wav = $wav; + + my $key = sprintf "%s,%s", $kw, $kw_wav; + + if ( not grep { abs( (@{$_}[7] + @{$_}[8])/2.0 - $kw_center) <= 0.1 } @{$ALIGNMENT{$key}} ) { + ##print "The key $key, $kw_center does not exist in the alignment\n"; + ##print join(" ", @entries) . "\n"; + #print Dumper($ALIGNMENT{$key}); + #die; + } else { + my @tmp = @{$ALIGNMENT{$key}}; + my ($index) = grep { abs( (@{$tmp[$_]}[7] + @{$tmp[$_]}[8]) / 2.0 - $kw_center) <= 0.1 } (0 .. @{$ALIGNMENT{$key}}-1); + die unless defined $index; + my @ali = @{@{$ALIGNMENT{$key}}[$index]}; + my $diff = abs($ali[7] - $kw_start); + + #die "Weird hit " . Dumper(\@entries) if $entries[5] != 0; + + my $hit_id = join(" ", @entries[5 .. @entries-1]); + $hit_id =~ s/\b0\b//g; + $hit_id =~ s/^\s+//g; + $hit_id =~ s/\s+/ /g; + $hit_id =~ s/\s+$//g; + #print $hit_id . "\n"; + #print Dumper(\@ali, $kw_wav, $diff) if $diff > 0.1; + #print Dumper(\@entries); + + my $op_id = join(",", @ali[10 .. 11]); # 'YES,CORR' | 'YES,FA' | 'NO,MISS' | 'NO,CORR!DET' + $HITCACHE{$kw}{$hit_id}{$op_id} += 1; + #push @{$HITCACHE{$hit_id}{join(",", @ali[10 .. 11])}}, $entries[4]; + } + #print Dumper(\@entries, $kw_start, $kw_wav); + #exit +} +#print Dumper(\%HITCACHE); +print "Done reading stats\n"; + +open(my $KW, "> $keywords"); + +print "Analyzing\n"; +my $TWV = 0; +my $NEW_TWV = 0; +my $N_KW = 0; +foreach my $kwid (sort keys %HITCACHE) { + my %old_stats = %{$TWVSTATS{$kwid}}; + #print Dumper($kwid, \%old_stats); + # + $old_stats{"YES,CORR"} = 0 unless defined $old_stats{"YES,CORR"}; + $old_stats{",MISS"} = 0 unless defined $old_stats{",MISS"}; + $old_stats{"NO,MISS"} = 0 unless defined $old_stats{"NO,MISS"}; + $old_stats{"YES,FA"} = 0 unless defined $old_stats{"YES,FA"}; + + my $n_kw = $old_stats{"YES,CORR"} + + $old_stats{",MISS"} + + $old_stats{"NO,MISS"}; + + my $n_trials = $T - $n_kw; + + next if $n_kw == 0; + + my $p_miss = 0; + $p_miss = 1 - $old_stats{"YES,CORR"} / $n_kw unless $n_kw == 0; + my $p_fa = $old_stats{"YES,FA"} / $n_trials; + + my $twv = 1 - $p_miss - 999.9 * $p_fa; + print "$kwid $n_kw $p_miss $p_fa $twv\n"; + + foreach my $kwpath (sort keys $HITCACHE{$kwid}) { + my $weight = 0; + + my %new_stats = %{$HITCACHE{$kwid}{$kwpath}}; + $new_stats{"YES,CORR"} = 0 unless defined $new_stats{"YES,CORR"}; + $new_stats{"YES,FA"} = 0 unless defined $new_stats{"YES,FA"}; + + my $new_p_miss = 1 - ($old_stats{"YES,CORR"} - $new_stats{"YES,CORR"})/ $n_kw; + my $new_p_fa = ($old_stats{"YES,FA"} - $new_stats{"YES,FA"}) / $n_trials; + my $new_twv = 1 - $new_p_miss - 999.9 * $new_p_fa; + if ($new_twv > $twv) { + #print "keep: $kwid $kwpath $twv - $new_twv\n"; + if ((defined $HITCACHE{$kwid}{$kwpath}->{"YES,FA"}) || + (defined $HITCACHE{$kwid}{$kwpath}->{"NO,MISS"}) || + (defined $HITCACHE{$kwid}{$kwpath}->{"YES,CORR"})) { + print Dumper($kwid, $kwpath, $HITCACHE{$kwid}{$kwpath}); + } + $old_stats{"YES,CORR"} -= $new_stats{"YES,CORR"}; + $old_stats{"YES,FA"} -= $new_stats{"YES,FA"} ; + } else { + print $KW "$kwid $kwpath\n"; + #print "remove: $kwid $kwpath $twv - $new_twv\n"; + + } + # print $W "$kwid $weight\n"; + + } + + + my $new_p_miss = 1 - $old_stats{"YES,CORR"} / $n_kw; + my $new_p_fa = $old_stats{"YES,FA"} / $n_trials; + + my $new_twv = 1 - $new_p_miss - 999.9 * $new_p_fa; + + $NEW_TWV = $N_KW/($N_KW+1) * $NEW_TWV + $new_twv / ($N_KW+1); + $TWV = $N_KW/($N_KW+1) * $TWV + $twv / ($N_KW+1); + $N_KW += 1; +} +close($KW); +#print "ATWV: $TWV $NEW_TWV\n"; diff --git a/egs/babel/s5d/local/search/annotate_kwlist.pl b/egs/babel/s5d/local/search/annotate_kwlist.pl new file mode 100755 index 00000000000..fbbdc0c119e --- /dev/null +++ b/egs/babel/s5d/local/search/annotate_kwlist.pl @@ -0,0 +1,166 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2016 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < > output.kwlist.xml + e.g.: cat kwlist.xml | $0 data/dev10h.pem/kwset_kwlist/categories > output.kwlist.xml + +EOU +use strict; +use warnings "FATAL"; +use utf8; +use XML::Parser; +use Data::Dumper; + +binmode STDERR, ":utf8"; +binmode STDOUT, ":utf8"; + +my $IN_KWTEXT=0; +my $KWTEXT=''; +my $KWID=''; +my %CATEGORIES; + +sub kwlist { + my @entries = @_; + shift @entries; + shift @entries; + + my $header=""; + while (@entries) { + my $k = shift @entries; + my $w = shift @entries; + + $header .= " $k=\"$w\" "; + } + print "\n"; +} + +sub kwlist_ { + print "\n"; +} + +sub kw { + my @entries = @_; + shift @entries; + shift @entries; + #print Dumper(@entries); + my %params = @entries; + $KWID = $params{kwid}; +} + +sub kwtext { + my @entries = @_; + shift @entries; + $IN_KWTEXT=1; + #print Dumper(@entries); +} +sub char { + my @entries = @_; + shift @entries; + $KWTEXT=$entries[0] if $IN_KWTEXT eq 1; +} + +sub kwtext_ { + my @entries = @_; + shift @entries; + $IN_KWTEXT=0; + if ($KWTEXT) { + if (exists $CATEGORIES{$KWID}) { + print " \n"; + print " $KWTEXT\n"; + print " \n"; + print " \n"; + print " ALL\n"; + print " 1\n"; + print " \n"; + foreach my $cat (sort keys %{$CATEGORIES{$KWID}} ) { + my @entries = split("=", $cat); + my $name; + my $value; + + if (scalar @entries == 2) { + $name = $entries[0]; + $value = $entries[1]; + } else { + $name = $cat; + $value = 1; + } + print " \n"; + print " $name\n"; + print " $value\n"; + print " \n"; + } + print " \n"; + print " \n"; + } else { + my $n = scalar split " ", $KWTEXT; + my $l=length join("", split($KWTEXT)); + + $n = sprintf "%02d", $n; + $l = sprintf "%02d", $l; + + print " \n"; + print " $KWTEXT\n"; + print " \n"; + print " \n"; + print " Characters\n"; + print " $l\n"; + print " \n"; + print " \n"; + print " NGramOrder\n"; + print " $n\n"; + print " \n"; + print " \n"; + print " NGram Order\n"; + print " $n\n"; + print " \n"; + print " \n"; + print " \n"; + } + } +} + +if (@ARGV != 1) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + + +#Read the categories table +open(G, $ARGV[0]) or die "Cannot open the categories table $ARGV[0]"; +while (my $line = ) { + my @entries = split(" ", $line); + my $kwid = shift @entries; + + foreach my $group (@entries) { + $CATEGORIES{$kwid}->{$group} = 1; + } +} +close(G); + +my $p1 = new XML::Parser(Style => 'Subs'); +$p1->setHandlers(Char => \&char); +$p1->parse(*STDIN); + diff --git a/egs/babel/s5d/local/search/combine.sh b/egs/babel/s5d/local/search/combine.sh new file mode 100755 index 00000000000..4f77c0f0f7c --- /dev/null +++ b/egs/babel/s5d/local/search/combine.sh @@ -0,0 +1,258 @@ +#!/bin/bash +# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey) +# Copyright (c) 2016, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# begin configuration section. +cmd=run.pl +stage=0 +nbest_final=900 +nbest_small=20 +extraid= +skip_scoring=false +optimize=true +duptime=52 +power=1.1 +ntrue_scale= +#end of configuration section + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +help_message="Usage: $0 [options] [ ... ] +E.g.: $0 data/dev10h.pem data/lang exp/tri6_nnet/decode_dev10h.pem/kws_10/ exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/ exp/combine/dev10hx.pem +" +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + + +data=$1; shift; +lang=$1; shift; +output=${@: -1} # last argument to the script +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +if [ -z "$extraid" ] ; then + data="$data/kws" + output="$output/kws" +else + data="$data/kwset_${extraid}" + output="$output/kwset_${extraid}" +fi + +if [ -z "$ntrue_scale" ] ; then + ntrue_scale=$num_sys +fi + +declare -A params=([PWR]=$power [NTRUE]=$ntrue_scale) +declare -A files +declare -A files_reduced + +mkdir -p $output +mkdir -p $output/log + +echo "$0: Combination config (id, weight, results) -- initial" + +i=1 +nsystems=0 +for elem in ${decode_dirs[@]} ; do + params[W$i]="0.5" + if [ -f $elem ] ; then + f=$(echo $elem | cut -d: -f1) + w=$(echo $elem | cut -d: -s -f2) + + [ ! -z "$w" ] && params[W$i]="$w" + files[W$i]=$f + files_reduced[W$i]=$output/results.reduced.$i + + elif [ -d $elem ] && [ -d $elem/details ] ; then + mtwv=$(cat $elem/details/score.txt | grep "MTWV *=" |cut -f 2 -d '=' | sed 's/ //g') + params[W$i]="$mtwv" + files[W$i]=$elem/details/results + files_reduced[W$i]=$output/results.reduced.$i + elif [ -d $elem ] ; then + best_dir=$(find ${elem}_* -name "score.txt" \ + -path "*$extraid*" \ + -path "*/details/*" |\ + xargs grep "MTWV *=" | \ + sort -k2,2g -t '=' | + tail -n 1 | \ + cut -f 1 -d ':' | \ + xargs dirname \ + ) + mtwv=$(cat $best_dir/score.txt | grep "MTWV *=" |cut -f 2 -d '=' | sed 's/ //g') + params[W$i]="$mtwv" + files[W$i]=$best_dir/results + files_reduced[W$i]=$output/results.reduced.$i + else + echo >&2 "$0: The parameter\"$elem\" is not file nor directory" + fi + + echo " $i W$i=${params[W$i]} ${files[W$i]}" + echo "${files[W$i]}" > $output/results_W$i + + cat ${files[W$i]} | \ + local/search/filter_kws_results.pl --probs --nbest $nbest_small > ${files_reduced[W$i]} + + nsystems=$i + i=$(($i+1)) + +done + +if [ $nsystems -le 0 ] ; then + echo >&2 "No acoustic system found" + return 1 +fi + +trials=$(cat $data/trials) + +if $optimize ; then + cmdline= + + + declare -A params + opt_vars="" + opt_task_params="" + for w in "${!params[@]}" ; do + opt_vars="$opt_vars --var $w=${params[$w]}" + + if [ ${files_reduced[$w]+isset} ] ; then + opt_task_params="$opt_task_params $w ${files_reduced[$w]}" + fi + done + + echo "$0: Optimization -- first stage (reduced size results)" + mkdir -p $output/opt + local/optimize2.pl --result-regexp '.*ATWV *= *(.*)' --ftol 0.01 --iftol 0.01\ + --output-dir $output/opt $opt_vars \ + local/search/combine_results.pl --probs --power PWR $opt_task_params - \| \ + local/search/normalize_results_kst.pl --duration $trials --ntrue-scale NTRUE\| \ + local/search/filter_kws_results.pl --nbest 100 \| \ + compute-atwv $trials ark:$data/hitlist ark:- | \ + tee $output/log/optimize.log | grep -i "Iter" || { + echo >&2 "$0: Optimization failed (see $output/log/optimize.log for errors)"; exit 1 + } + + # override the default parameters + if [ -f $output/opt/params.sh ] ; then + . $output/opt/params.sh + else + echo >&2 "$0: Optimization output in $output/opt/params.sh not found"; + exit 1; + fi + + # Second round of optimization -- this time, only the NTRUE + comb_task_params="" + for w in "${!params[@]}" ; do + if [ ${files[$w]+isset} ] ; then + comb_task_params="$comb_task_params ${params[$w]} ${files[$w]}" + fi + done + + echo "$0: Optimization -- second stage (full size results)" + mkdir -p $output/opt_ntrue + local/optimize2.pl --result-regexp '.*ATWV *= *(.*)' \ + --output-dir $output/opt_ntrue --var NTRUE=${params[NTRUE]} \ + local/search/combine_results.pl --probs --tolerance $duptime --power ${params[PWR]} $comb_task_params - \| \ + local/search/normalize_results_kst.pl --duration $trials --ntrue-scale NTRUE\| \ + local/search/filter_kws_results.pl --probs --duptime $duptime \| \ + compute-atwv $trials ark:$data/hitlist ark:- | \ + tee $output/log/optimize_ntrue.log | grep -i "Iteration" || { + echo >&2 "$0: Optimization failed (see $output/log/optimize_ntrue.log for errors)"; exit 1 + } + # override the default parameters + if [ -f $output/opt_ntrue/params.sh ] ; then + . $output/opt_ntrue/params.sh + else + echo >&2 "$0: Optimization output in $output/opt_ntrue/params.sh not found"; + exit 1; + fi +fi + +echo "$0: Combination config (final)" +echo -n "$0: params=[" +comb_task_params="" +for w in "${!params[@]}" ; do + echo -n " $w=${params[$w]}" + if [ ${files[$w]+isset} ] ; then + comb_task_params="$comb_task_params ${params[$w]} ${files[$w]}" + fi +done +echo "]" + +mkdir -p $output/details + + +echo "$0: Doing final combination" +local/search/combine_results.pl \ + --probs --tolerance $duptime --power ${params[PWR]} $comb_task_params - | \ + local/search/normalize_results_kst.pl \ + --duration $trials --ntrue-scale ${params[NTRUE]} |\ + local/search/filter_kws_results.pl --probs --duptime $duptime > $output/details/results + +#Write the parapeters +echo "declare -A params" > $output/details/params.sh +for w in "${!params[@]}" ; do + echo "params[$w]=${params[$w]}" +done >> $output/details/params.sh +echo "${params[NTRUE]}" > $output/details/ntrue +echo "${params[PWR]}" > $output/details/power + +if ! $skip_scoring ; then + echo "$0: Scoring..." + cat $output/details/results |\ + compute-atwv $trials ark,t:$data/hitlist ark:- \ + ${output}/details/alignment.csv \ + > ${output}/details/score.txt \ + 2> ${output}/log/score.log + + cat ${output}/details/alignment.csv |\ + perl local/search/per_category_stats.pl \ + --sweep-step 0.005 $trials $data/categories \ + > ${output}/details/per-category-score.txt \ + 2> ${output}/log/per-category-score.log + + cp $output/details/score.txt $output/score.txt + +fi + +if [ $stage -le 2 ]; then + if [ -f $data/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $data/f4de_attribs #override the previous variables + + ecf=$data/ecf.xml + rttm=$data/rttm + kwlist=$data/kwlist.xml + + mkdir -p ${output}/f4de/ + + cat $kwlist | local/search/annotate_kwlist.pl $data/categories > ${output}/f4de/kwlist.xml + kwlist=${output}/f4de/kwlist.xml + + cat ${output}/details/results | \ + utils/int2sym.pl -f 2 $data/utt.map | \ + local/search/utt_to_files.pl --flen "$flen" $data/../segments |\ + local/search/write_kwslist.pl --flen "$flen" --language "$language" \ + --kwlist-id "$kwlist_name" > ${output}/f4de/kwslist.xml + + KWSEval -e $ecf -r $rttm -t $kwlist -a \ + --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -O -B -q 'OOV:regex=.*' -q 'BaseOOV:regex=.*' \ + -s ${output}/f4de/kwslist.xml -c -o -b -d -f ${output}/f4de/ + + local/kws_oracle_threshold.pl --duration $trials \ + ${output}/f4de/alignment.csv > ${output}/f4de/metrics.txt + fi +fi + +echo "$0: All OK" diff --git a/egs/babel/s5d/local/search/combine_results.pl b/egs/babel/s5d/local/search/combine_results.pl new file mode 100755 index 00000000000..694ee47c2cd --- /dev/null +++ b/egs/babel/s5d/local/search/combine_results.pl @@ -0,0 +1,422 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2016 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < w2 ... + e.g.: $0 0.5 kwslist1.xml 0.5 kwslist2.xml ... kwslist_comb.xml + +Allowed options: + --probs : The input scores are probabilities, not negative log-likelihoods) + --method : Use different combination method (int, default = 0) + 0 -- CombSUM + 1 -- CombMNZ + --input-norm : how the input data should be normalized (int ) + 0 -- Saturate + 1 -- NormSTO + 2 -- source-wise NormSTO + --output-norm : how the output data should be normalized (int ) + 0 -- Saturate + 1 -- NormSTO + --power : The weighted power mean p-coefficient (float, default = 0.5) + --gamma : The gamma coefficient for CombMNZ (float, default = 0.0) + --tolerance : Tolerance (in frames) for being the same hits (float, default = 50) + +EOU + +use strict; +use warnings "FATAL"; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; +use File::Basename; +use Scalar::Util qw(looks_like_number); + +$Data::Dumper::Indent = 2; + +my $TOL = 50; +my $LIKES = 0; + +sub OpenResults { + my $list = shift @_; + + my $source = "STDIN"; + if ($list ne "-") { + open(my $i, "<$list") || die "Fail to open file $list.\n"; + return $i; + } + return $source +} + +sub PrintResults { + my $KWS = shift @_; + + # Start printing + my $result = ""; + foreach my $kwentry (@{$KWS}) { + my ($kwid, $file, $tbeg, $tend, $score, $dummy) = @{$kwentry}; + if ($score > 0) { + $score = -log($score); + } elsif ($score == 0) { + $score = 9999; + } else { + die "Cannot take logarithm of a negative number\n" . join(" ", @{$kwentry}) . "\n"; + } + $result .= "$kwid $file $tbeg $tend $score\n"; + } + + return $result; +} + +sub KwslistTimeCompare { + my ($a, $b) = @_; + + if ($a->[0] eq $b->[0]) { # KWID + if ($a->[1] eq $b->[1]) { # FILEID + if (abs($a->[2] - $b->[2]) <= $TOL) { # KW START + if (abs($a->[3] - $b->[3]) <= $TOL) { #KW END + return 0; + } else { + return ($a->[3] <=> $b->[3] ); + } + } else { + return $a->[2] <=> $b->[2]; + } + } else { + return $a->[1] cmp $b->[1]; + } + } else { + $a->[0] cmp $b->[0]; + } +} + +sub KwslistTimeSort { + my $a = shift; + my $b = shift; + return KwslistTimeCompare($a, $b); +} + +sub ReadLines { + my $kwid = shift @_; + my %files = %{shift @_}; + my @lines = (); + + foreach my $id (sort keys %files) { + my $l = readline $files{$id}; + next unless $l; + chomp $l; + my @entries = split " ", $l; + while ($kwid eq $entries[0]) { + push @entries, $id; + push @lines, [@entries]; + + $l = readline $files{$id}; + last unless $l; + chomp $l; + @entries = split " ", $l; + } + next unless defined $l; + push @entries, $id; + push @lines, [@entries]; + } + return @lines; +} + +sub ReadFirstLines { + my %files = %{shift @_}; + my @lines = (); + + foreach my $id (sort keys %files) { + my $l = readline $files{$id}; + next unless $l; + chomp $l; + + my @entries = split " ", $l; + push @entries, $id; + push @lines, [@entries]; + } + return @lines; +} + +sub MergeCombPwrSum { + my @results = @{shift @_}; + my %weights = %{shift @_}; + my $pwr = shift @_; + my @output = (); + + return @output if not @results; + + while (@results) { + my @mergelist = (); + push @mergelist, shift @results; + while ((@results) && (KwslistTimeCompare($mergelist[0], $results[0]) == 0)) { + push @mergelist, shift @results; + } + + my $best_score = -9999; + my $tend; + my $tbegin; + my $out_score = 0; + foreach my $elem (@mergelist) { + my $score = $elem->[4]; + my $id = $elem->[5]; + if ($score > $best_score) { + $best_score = $score; + $tend = $elem->[3]; + $tbegin = $elem->[2]; + } + #print "$out_score += $weights{$id} * $score\n"; + $out_score += $weights{$id} * ($score ** $pwr); + } + $out_score = $out_score**(1.0/$pwr); + #print "$out_score \n\n\n"; + my $KWID = $mergelist[0]->[0]; + my $UTT = $mergelist[0]->[1]; + push @output, [$KWID, $UTT, $tbegin, $tend, $out_score, ""]; + } + + return \@output; +} + +## More generic version of the combMNZ method +sub MergeCombPwrMNZ { + my @results = @{shift @_}; + my %weights = %{shift @_}; + my $pwr = shift @_; + my $gamma = shift @_; + my @output = (); + + $gamma = 0 unless defined $gamma; + return @output if not @results; + + while (@results) { + my @mergelist = (); + push @mergelist, shift @results; + while ((@results) && (KwslistTimeCompare($mergelist[0], $results[0]) == 0)) { + push @mergelist, shift @results; + } + + my $best_score = -9999; + my $tend; + my $tbegin; + my $out_score = 0; + foreach my $elem (@mergelist) { + my $score = $elem->[4]; + my $id = $elem->[5]; + if ($score > $best_score) { + $best_score = $score; + $tend = $elem->[3]; + $tbegin = $elem->[2]; + } + #print "$out_score += $weights{$id} * $score\n"; + $out_score += $weights{$id} * ($score ** $pwr); + } + $out_score = (@mergelist ** $gamma) * $out_score**(1.0/$pwr); + #print "$out_score \n\n\n"; + my $KWID = $mergelist[0]->[0]; + my $UTT = $mergelist[0]->[1]; + push @output, [$KWID, $UTT, $tbegin, $tend, $out_score, "out"]; + } + + return \@output; +} + +### Sum-to-one normalization +sub NormalizeSTO { + my @results = @{shift @_}; + my @output = (); + my $sum = 0; + foreach my $elem(@results) { + $sum += $elem->[4]; + } + foreach my $elem(@results) { + $elem->[4] = $elem->[4]/$sum; + push @output, $elem; + } + return \@output; +} + +### This will STO normalize all entries in the @results according +### to the id, so that entries with the same id will sum to one +sub NormalizeSTOMulti { + my @results = @{shift @_}; + my @output = (); + my $sum = 0; + my %sums = (); + foreach my $elem(@results) { + $sums{$elem->[5]} += $elem->[4]; + } + foreach my $elem(@results) { + $elem->[4] = $elem->[4]/$sums{$elem->[5]}; + push @output, $elem; + } + return \@output; +} + +### Simple normalization of probabilities/scores +### Everything larger than 1 will be set to 1 +sub NormalizeSaturate { + my @results = @{shift @_}; + my @output = (); + my $sum = 0; + foreach my $elem(@results) { + $elem->[4] = $elem->[4] > 1.0 ? 1.0 : $elem->[4]; + push @output, $elem; + } + return \@output; +} + +my $method = 1; +my $input_norm = 0; +my $output_norm = 0; +my $gamma = 0; +my $power = 0.5; +GetOptions('tolerance=f' => \$TOL, + 'method=i' => sub { shift; $method = shift; + if (($method lt 0) || ($method gt 1)) { + die "Unknown method $method\n\n$Usage\n"; + } + }, + 'input-norm=i' => sub { shift; my $n = shift; + $input_norm = $n; + if (($n lt 0) || ($n gt 2)) { + die "Unknown input-norm $n\n\n$Usage\n"; + } + }, + 'output-norm=i' => sub { shift; my $n = shift; + $output_norm = $n; + if (($n ne 0) || ($n ne 1)) { + die "Unknown output-norm $n\n\n$Usage\n"; + } + }, + 'power=f' => \$power, + 'gamma=f' => \$gamma, + 'inv-power=f' => sub { + shift; my $val = shift; + $power = 1.0/$val; + }, + 'probs' => sub { + $LIKES = 0; + } + ) || do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if (@ARGV % 2 != 1) { + print STDERR "Bad number of (weight, results_list) pairs.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +# Workout the input/output source +my %results_files = (); +my %results_w = (); + +my $i = 0; +while (@ARGV != 1) { + my $w = shift @ARGV; + looks_like_number($w) || die "$0: Bad weight: $w.\n"; + $results_w{$i} = $w; + $results_files{$i} = OpenResults(shift @ARGV); + $i += 1; +} + +my $sumw=0; +foreach my $val (values %results_w ) { + $sumw += $val; +} +#foreach my $val (keys %results_w ) { +# $results_w{$val} = $results_w{$val}/$sumw; +#} + +my $output = shift @ARGV; + +my $deb = 0; +my @lines = (); +@lines = ReadFirstLines(\%results_files); +@lines = sort { KwslistTimeSort($a, $b) } @lines; +push @lines, ReadLines($lines[0]->[0], \%results_files); +@lines = sort { KwslistTimeSort($a, $b) } @lines; + +while (@lines) { + my @res = (); + + push @res, shift @lines; + while ((@lines) && ($lines[0]->[0] eq $res[0]->[0])) { + push @res, shift @lines; + } + #print PrintResults(\@res); + #print PrintResults(NormalizeSTO(MergeCombMNZ(\@res, \%results_w))); + #print PrintResults(NormalizeCutoff(MergeCombPwrSum(\@res, \%results_w, $power))); + #print PrintResults(NormalizeSaturate(MergeCombPwrMNZ(\@res, \%results_w, $power, $gamma))); + #print PrintResults(NormalizeSTO(MergeCombPwrMNZ(NormalizeSTO(\@res), \%results_w, $power, $gamma))); + + my $data = undef; + if ($input_norm == 1) { + $data = NormalizeSTO(\@res); + } elsif ($input_norm == 2) { + $data = NormalizeSTOMulti(\@res); + } else { + $data = NormalizeSaturate(\@res); + } + + if ($method == 0) { + $data = MergeCombPwrSum($data, \%results_w, $power); + } else { + $data = MergeCombPwrMNZ($data, \%results_w, $power, $gamma); + } + + if ($output_norm == 1) { + $data = NormalizeSTO($data); + } else { + $data = NormalizeSaturate($data); + } + + print PrintResults($data); + + #exit if $deb > 3; + #$deb += 1 if $deb; + #if ($res[0]->[0] eq "KW305-02318") { + # $deb = 1; + # print Dumper("START", \@res, \@lines) if $deb; + #} + + my @tmp = (); + if (@lines) { + @tmp = ReadLines($lines[0]->[0], \%results_files); + } else { + # this is probably not necessary -- ReadLines() call + # will always read one line _past_ the current KW + # so we always should have extra KW in the @lines + @tmp = ReadFirstLines(\%results_files); + } + + #print Dumper("TMP", \@tmp) if $deb; + if (@tmp > 0) { + #print Dumper("XXX", \@res, \@lines) if $deb; + push @lines, @tmp; + @lines = sort { KwslistTimeSort($a, $b) } @lines; + } + + #print Dumper(\@res, \@lines) if $deb; + +} diff --git a/egs/babel/s5d/local/search/combine_special.sh b/egs/babel/s5d/local/search/combine_special.sh new file mode 100755 index 00000000000..5802f49be06 --- /dev/null +++ b/egs/babel/s5d/local/search/combine_special.sh @@ -0,0 +1,200 @@ +#!/bin/bash +# Copyright 2013-2014 Johns Hopkins University (authors: Jan Trmal, Guoguo Chen, Dan Povey) +# Copyright (c) 2016, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# begin configuration section. +cmd=run.pl +stage=0 +nbest_final=900 +nbest_small=20 +extraid= +skip_scoring=false +optimize=true +duptime=52 +power=1.1 +ntrue_scale= +#end of configuration section + +[ -f ./path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +help_message="Usage: $0 [options] [ ... ] +E.g.: $0 data/dev10h.pem data/lang exp/tri6_nnet/decode_dev10h.pem/kws_10/ exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/ exp/combine/dev10hx.pem +" +if [ $# -lt 5 ]; then + printf "$help_message\n"; + exit 1; +fi + + +data=$1; shift; +lang=$1; shift; +template=$1; shift; +output=${@: -1} # last argument to the script +decode_dirs=( $@ ) # read the remaining arguments into an array +unset decode_dirs[${#decode_dirs[@]}-1] # 'pop' the last argument which is odir +num_sys=${#decode_dirs[@]} # number of systems to combine + +if [ -z "$extraid" ] ; then + data="$data/kws" + output="$output/kws" +else + data="$data/kwset_${extraid}" + output="$output/kwset_${extraid}" +fi + +if [ -z "$ntrue_scale" ] ; then + ntrue_scale=$num_sys +fi + +declare -A params=([PWR]=$power [NTRUE]=$ntrue_scale) +declare -A files +declare -A duced + +mkdir -p $output +mkdir -p $output/log + +if [ -f $template/details/params.sh ] ; then + . $template/details/params.sh +else + echo >&2 "$0: Optimization output in $template/details/params.sh not found"; + exit 1; +fi + + +echo "$0: Combination config (id, weight, results) -- initial" + +i=1 +for elem in ${decode_dirs[@]} ; do + if [ -f $elem ] ; then + files[W$i]=$f + elif [ -d $elem ] && [ -d $elem/details ] ; then + files[W$i]=$elem/details/results + elif [ -d $elem ] ; then + tmpl=`cat $template/results_W${i}` + echo $tmpl + #exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist4_10/details/results + if [[ "$tmpl" == */details/results ]] ; then + base=`echo $tmpl | sed 's:/details/results::g'` + base=`basename $base` + lmwt=${base##*_} + tmpl_kwset=${base%_*} + tmpl_kwset=${tmpl_kwset##*_} + else + echo >&2 "The template results file does not follow the naming pattern" + exit 1 + fi + f=${elem}_${lmwt}/details/results + if [ ! -f $f ]; then + echo >&2 "The file $f does not exist (check template or $template/results_W${i})" + exit 1 + fi + kwset=${elem##*_} + if [ "$kwset" != "$tmpl_kwset" ] ; then + echo >&2 "WARNING: The the kwset and the tmpl kwset do not match! ($kwset vs $tmpl_kwset) " + fi + + files[W$i]=$f + else + echo >&2 "$0: The parameter\"$elem\" is not file nor directory" + fi + echo " $i W$i=${params[W$i]} ${files[W$i]}" + + i=$(($i+1)) + +done + + + +trials=$(cat $data/trials) + + +echo "$0: Combination config (final)" +echo -n "$0: params=[" +comb_task_params="" +for w in "${!params[@]}" ; do + echo -n " $w=${params[$w]}" + if [ ${files[$w]+isset} ] ; then + comb_task_params="$comb_task_params ${params[$w]} ${files[$w]}" + fi +done +echo "]" + +mkdir -p $output/details + + +echo "$0: Doing final combination" +local/search/combine_results.pl \ + --probs --tolerance $duptime --power ${params[PWR]} $comb_task_params - | \ + local/search/normalize_results_kst.pl \ + --duration $trials --ntrue-scale ${params[NTRUE]} |\ + local/search/filter_kws_results.pl --probs --duptime $duptime > $output/details/results + +#Write the parapeters +echo "declare -A params" > $output/details/params.sh +for w in "${!params[@]}" ; do + echo "params[$w]=${params[$w]}" +done >> $output/details/params.sh +echo "${params[NTRUE]}" > $output/details/ntrue +echo "${params[PWR]}" > $output/details/power + + +echo "DATA: $data" +if ! $skip_scoring && [ -f $data/hitlist ] ; then + echo "$0: Scoring..." + cat $output/details/results |\ + compute-atwv $trials ark,t:$data/hitlist ark:- \ + ${output}/details/alignment.csv \ + > ${output}/details/score.txt \ + 2> ${output}/log/score.log + + cat ${output}/details/alignment.csv |\ + perl local/search/per_category_stats.pl \ + --sweep-step 0.005 $trials $data/categories \ + > ${output}/details/per-category-score.txt \ + 2> ${output}/log/per-category-score.log + + cp $output/details/score.txt $output/score.txt + +fi + +if [ $stage -le 2 ]; then + if [ -f $data/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $data/f4de_attribs #override the previous variables + + ecf=$data/ecf.xml + rttm=$data/rttm + kwlist=$data/kwlist.xml + + mkdir -p ${output}/f4de/ + + cat ${output}/details/results | \ + utils/int2sym.pl -f 2 $data/utt.map | \ + local/search/utt_to_files.pl --flen "$flen" $data/../segments |\ + local/search/write_kwslist.pl --flen "$flen" --language "$language" \ + --kwlist-id "$kwlist_name" > ${output}/f4de/kwslist.xml + + if [ -f $rttm ] ; then + cat $kwlist | local/search/annotate_kwlist.pl $data/categories > ${output}/f4de/kwlist.xml + kwlist=${output}/f4de/kwlist.xml + + KWSEval -e $ecf -r $rttm -t $kwlist -a \ + --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -O -B -q 'OOV:regex=.*' -q 'BaseOOV:regex=.*' \ + -s ${output}/f4de/kwslist.xml -c -o -b -d -f ${output}/f4de/ + + local/kws_oracle_threshold.pl --duration $trials \ + ${output}/f4de/alignment.csv > ${output}/f4de/metrics.txt + fi + fi +fi + +echo "$0: All OK" diff --git a/egs/babel/s5d/local/search/compile_keywords.sh b/egs/babel/s5d/local/search/compile_keywords.sh new file mode 100755 index 00000000000..92dc4220a8e --- /dev/null +++ b/egs/babel/s5d/local/search/compile_keywords.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +silence_word= +filter='OOV=0' +# End configuration section +echo $0 "$@" +. parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +data=$1 +lang=$2 +workdir=$3 + +mkdir -p $workdir +cat $data/categories | \ + local/search/filter_by_category.pl $data/categories "$filter" > $workdir/categories + +if [ ! -s $workdir/categories ]; then + echo "$0: WARNING: $workdir/categories is zero-size. That means no keyword" + echo "$0: WARNING: was found that fits the filter \"$filter\". That might be expected." + touch $workdir/keywords.int + touch $workdir/keywords.fsts + exit 0 +fi + +grep -w -F -f <(awk '{print $1}' $workdir/categories) \ + $data/keywords.int > $workdir/keywords.int + +if [ -s $workdir/keywords.int ]; then + if [ -z $silence_word ]; then + transcripts-to-fsts ark:$workdir/keywords.int \ + ark,scp,t:$workdir/keywords.fsts,- | sort -o $workdir/keywords.scp + else + silence_int=`grep -w $silence_word $lang/words.txt | awk '{print $2}'` + [ -z $silence_int ] && \ + echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; + transcripts-to-fsts ark:$data/keywords.int ark,t:- | \ + awk -v 'OFS=\t' -v silint=$silence_int '{ + if (NF == 4 && $1 != 0) { print $1, $1, silint, silint; } print; + }' | fstcopy ark:- ark,scp,t:$workdir/keywords.fsts,- | \ + sort -o $workdir/keywords.scp + fi +else + echo "$0: WARNING: $workdir/keywords.int is zero-size. That means no keyword" + echo "$0: WARNING: was found in the dictionary. That might be expected -- or not." + touch $workdir/keywords.fsts +fi + diff --git a/egs/babel/s5d/local/search/compile_proxy_keywords.sh b/egs/babel/s5d/local/search/compile_proxy_keywords.sh new file mode 100755 index 00000000000..a28105123f3 --- /dev/null +++ b/egs/babel/s5d/local/search/compile_proxy_keywords.sh @@ -0,0 +1,271 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# 2012-2014 Guoguo Chen +# License: Apache 2.0 + +# Begin configuration section. +nj=8 +cmd=run.pl +beam=-1 # Beam for proxy FST, -1 means no prune +phone_beam=-1 # Beam for KxL2xE FST, -1 means no prune +nbest=-1 # Use top n best proxy keywords in proxy FST, -1 means all + # proxies +phone_nbest=-1 # Use top n best phone sequences in KxL2xE, -1 means all + # phone sequences +confusion_matrix= # If supplied, using corresponding E transducer +count_cutoff=1 # Minimal count to be considered in the confusion matrix; + # will ignore phone pairs that have count less than this. +pron_probs=true # If true, then lexicon looks like: + # Word Prob Phone1 Phone2... +g_beam=10 +g_alpha= +g_inv_alpha= +g2p_nbest=10 +g2p_mass=0.95 +case_insensitive=true +icu_transform="Any-Lower" +filter="OOV=1" + +# End configuration section + +echo "$0 " "$@" +. ./utils/parse_options.sh || exit 1; + +# Gets phone symbols +phone_start=2 +if $pron_probs; then + phone_start=3 +fi + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +lang=$2 +l1lex=$3 +g2p=$4 +workdir=$5 + +if [ ! -z "$g_inv_alpha" ] && [ $g_inv_alpha -ne 0 ] ; then + g_alpha=$(echo print 1.0/$g_inv_alpha | perl ) +fi + +# Checks some files. +for f in $l1lex $data/categories $data/keywords.txt ; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1 +done + +mkdir -p $workdir +cat $data/categories | \ + local/search/filter_by_category.pl $data/categories "$filter" > $workdir/categories + +grep -w -F -f <(awk '{print $1}' $workdir/categories) $data/keywords.txt |\ + sort -R > $workdir/keywords.filtered + +paste <(cut -f 1 $workdir/keywords.filtered) \ + <(cut -f 2- $workdir/keywords.filtered | uconv -f utf-8 -t utf-8 -x "$icu_transform") > $workdir/keywords.txt + +cat $l1lex | perl -e ' + while (<>) { + ($word, $prob, $pron) = split " ", $_, 3; + $pron =~ s/_[^\s]+//g; + $pron =~ s/\s+/ /g; + $pron =~ s/^\s+//g; + $pron =~ s/\s+$//g; + print "$word $prob $pron\n" + } +' | sort -u > $workdir/L1.lex + +mkdir -p $workdir/lexicon + +cat $workdir/keywords.txt | perl -e ' + open(f, shift @ARGV); + while() { + @F = split; + $lex{$F[0]} = 1; + } + close(f); + + while() { + @F = split; + foreach $w (@F[1..$#F]) { + print "$w\n" unless defined $lex{$w}; + } + } +' $workdir/L1.lex | sort -u > $workdir/lexicon/oov.txt + +local/apply_g2p.sh --nj $nj --cmd "$cmd" --icu-transform "$icu_transform" \ + --var-counts $g2p_nbest --var-mass $g2p_mass \ + $workdir/lexicon/oov.txt $g2p $workdir/lexicon || exit 1 + +cat $workdir/L1.lex | \ + perl -e ' + while ( $line = ) { + chomp $line; + ($word, $pron) = split " ", $line, 2; + $pron = join(" ", split(" ", $pron)); + push @{$LEX{$pron}}, $word; + } + + open(L1, "| sort -u > $ARGV[0]") or die "Cannot open $ARGV[0]\n"; + open(MAP, "| sort -u > $ARGV[1]") or die "Cannot open $ARGV[1]\n"; + foreach $pron (keys %LEX) { + $head = $LEX{$pron}->[0]; + print L1 "$head $pron\n"; + foreach $alt (@{$LEX{$pron}}) { + print MAP "0 0 $alt $head\n"; + } + } + print MAP "0\n"; + close(L1); + close(MAP); +' $workdir/L1.dedup.lex $workdir/L1.revdup.fst.txt + +pron_probs_param="" +$pron_probs && pron_probs_param="--pron-probs" + +# Creates words.txt that covers all the words in L1.lex and L2.lex. We append +# new words to the original word symbol table. +cat $workdir/L1.lex $workdir/lexicon/lexicon.lex | \ + perl -e ' + binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + binmode STDERR, ":utf8"; + $max_id=0; + %WORDS=(); + open(F, "<:utf8" , $ARGV[0]) or die "Cannot open $ARGV[0]"; + while() { + ($word, $id) = split(" ", $_); + $WORDS{$word} = $id; + $max_id = $id > $max_id ? $id : $max_id; + } + close(F); + while () { + @F = split(" ", $_); + if (not exists $WORDS{$F[0]}) { + $WORDS{$F[0]} = $max_id + 1; + $max_id += 1; + } + } + foreach $kw (keys %WORDS) { + print "$kw $WORDS{$kw}\n"; + } + ' $lang/words.txt | sort -k2,2n > $workdir/words.txt + +cat $workdir/words.txt | \ + uconv -f utf-8 -t utf-8 -x "$icu_transform" > $workdir/words.normalized.txt + +#--ndisambig=`utils/add_lex_disambig.pl \ +#-- $pron_probs_param $workdir/L1.dedup.lex $workdir/L1.disambig.lex` +#--ndisambig=$[$ndisambig+1]; # add one disambig symbol for silence in lexicon FST. +#--( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $workdir/disambig.txt + +#remove all position dependent info and other tags +awk '{print $1;}' $lang/phones.txt | sed 's/_[BEIS]//g' | sed 's/_.*//g' | \ + grep -v '^#' | uniq |\ + perl -ne 'BEGIN{$i=0;}; chomp; print $_ . " " . $i . "\n"; $i+=1;' > $workdir/phones.txt + +#--cat $workdir/L2.lex $workdir/L1.lex |\ +#-- awk '{for(i='$phone_start'; i <= NF; i++) {print $i;}}' |\ +#-- sort -u | sed '1i\' |\ +#-- cat - $workdir/disambig.txt | awk 'BEGIN{x=0} {print $0"\t"x; x++;}' \ +#-- > $workdir/phones.txt + +cat $workdir/keywords.txt |\ + local/kwords2indices.pl --map-oov 0 $workdir/words.normalized.txt > $workdir/keywords.int + + +cat $workdir/L1.lex $workdir/lexicon/lexicon.lex | sed 's/\t/ /g' | \ + perl -ne 'chomp; + ($word, $pron) = split / /, $_, 2; + $pron =~ s/_[^ ]*//g; + print "$word $pron\n";' | \ + sort -u > $workdir/L2.lex + +cat $workdir/L1.revdup.fst.txt |\ + fstcompile --isymbols=$workdir/words.txt --osymbols=$workdir/words.txt - |\ + fstarcsort --sort_type=olabel - $workdir/L1.revdup.fst + +echo "" + +#--phone_disambig_symbol=`grep \#0 $workdir/phones.txt | awk '{print $2}'` +#--word_disambig_symbol=`grep \#0 $workdir/words.txt | awk '{print $2}'` +#--phone_disambig_symbols=`grep "^#" $workdir/phones.txt |\ +#-- awk '{print $2}' | tr "\n" " "` +#--word_disambig_symbols=`grep "^#" $workdir/words.txt |\ +#-- awk '{print $2}' | tr "\n" " "` +#-- +#--cat $workdir/L1.disambig.lex |\ +#-- utils/make_lexicon_fst.pl $pron_probs_param - |\ +#-- fstcompile --isymbols=$workdir/phones.txt \ +#-- --osymbols=$workdir/words.txt - |\ +#-- fstaddselfloops "echo $phone_disambig_symbol |" \ +#-- "echo $word_disambig_symbol |" |\ +#-- fstdeterminize | fstrmsymbols "echo $phone_disambig_symbols|" |\ +#-- fstrmsymbols --remove-from-output=true "echo $word_disambig_symbols|" |\ +#-- fstarcsort --sort_type=ilabel > $workdir/L1.fst + +cat $workdir/L1.dedup.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$workdir/phones.txt --osymbols=$workdir/words.txt - |\ + fstarcsort --sort_type=ilabel > $workdir/L1.fst + +echo "" +cat $workdir/L2.lex |\ + utils/make_lexicon_fst.pl $pron_probs_param - |\ + fstcompile --isymbols=$workdir/phones.txt --osymbols=$workdir/words.txt - |\ + fstinvert | fstarcsort --sort_type=olabel > $workdir/L2.fst + +# Compiles E.fst +conf_mat_param="" +if [ ! -z $confusion_matrix ]; then + echo "$0: Using confusion matrix, normalizing" + local/count_to_logprob.pl --cutoff $count_cutoff \ + $confusion_matrix $workdir/confusion.txt + conf_mat_param="--confusion-matrix $workdir/confusion.txt" +fi + +cat $workdir/phones.txt | \ + grep -v -F -f $lang/phones/silence.txt | awk '{print $1;}' |\ + local/build_edit_distance_fst.pl --boundary-off=true $conf_mat_param - - |\ + fstcompile --isymbols=$workdir/phones.txt \ + --osymbols=$workdir/phones.txt - $workdir/E.fst + +# Pre-composes L2 and E, for the sake of efficiency +fstcompose $workdir/L2.fst $workdir/E.fst |\ + fstarcsort --sort_type=ilabel > $workdir/L2xE.fst + +nof_keywords=`cat $workdir/keywords.txt |wc -l` +if [ $nj -gt $nof_keywords ]; then + nj=$nof_keywords + echo "$0: Too many number of jobs, using $nj instead" +fi + +# Generates the proxy keywords +mkdir -p $workdir/split/log +if [ -z "$g_alpha" ] || [ $g_inv_alpha -eq 0 ] ; then + echo "$0: Generating proxies without G.fst" + $cmd JOB=1:$nj $workdir/split/log/proxy.JOB.log \ + split -n r/JOB/$nj $workdir/keywords.int \| \ + generate-proxy-keywords --verbose=1 \ + --proxy-beam=$beam --proxy-nbest=$nbest \ + --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ + $workdir/L2xE.fst $workdir/L1.fst ark:- ark,t:$workdir/split/proxy.JOB.fsts +else + echo "$0: Generating proxies with G.fst" + $cmd JOB=1:$nj $workdir/split/log/proxy.JOB.log \ + split -n r/JOB/$nj $workdir/keywords.int \| \ + generate-proxy-keywords-ex --verbose=1 --g-beam=$g_beam --g-alpha=$g_alpha\ + --proxy-beam=$beam --proxy-nbest=$nbest \ + --phone-beam=$phone_beam --phone-nbest=$phone_nbest \ + $workdir/L2xE.fst $workdir/L1.fst $lang/G.fst ark:- ark,t:$workdir/split/proxy.JOB.fsts +fi + + +proxy_fsts="" +for j in `seq 1 $nj`; do + proxy_fsts="$proxy_fsts $workdir/split/proxy.$j.fsts" +done +cat $proxy_fsts | fsttablecompose $workdir/L1.revdup.fst ark:- ark:- |\ + fsts-project ark:- ark,scp:$workdir/keywords.fsts,-|\ + sort -o $workdir/keywords.scp diff --git a/egs/babel/s5d/local/search/create_categories.pl b/egs/babel/s5d/local/search/create_categories.pl new file mode 100755 index 00000000000..27703af20ca --- /dev/null +++ b/egs/babel/s5d/local/search/create_categories.pl @@ -0,0 +1,112 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== +my $Usage = < + e.g.: $0 keywords.txt + or $0 --results results + +Allowed options: + --results : instead of keyword specification format, keyword search + results format is assumed. + +NOTE: + If you need both information, you can call the script twice (with different + parameters) and call local/search/normalize_categories.pl to merge (and normalize) + these two tables together. +EOU + +use strict; +use warnings; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; +use open qw(:std :utf8); + +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +my $result_format; +GetOptions("results", \$result_format) or do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if ( @ARGV > 1 ) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +sub QuantizeCount { + my $count = shift @_; + + if ($count <= 0) { + return "0"; + } elsif ($count == 1) { + return "000-001"; + } elsif ($count <= 5) { + return "002-005"; + } elsif ($count <=10) { + return "006-010"; + } elsif ($count <=20) { + return "011-020"; + } elsif ($count <=100) { + return "021-100"; + } else { + return "101-inf"; + } +} + +if (not $result_format ) { + my $kwlist_name=$ARGV[0]; + while (my $line = <>) { + chomp $line; + my ($kwid, $text) = split " ", $line, 2; + + my @words = split " ", $text; + printf "$kwid NGramOrder=%03d\n", scalar @words; + printf "$kwid Characters=%03d\n", length(join("", @words)); + print "$kwid $kwid\n"; + } +} else { + my $prev_kwid = ""; + my $count = 0; + + while (my $line = <>) { + chomp $line; + my @entries = split " ", $line; + next unless @entries; + + if ($prev_kwid ne $entries[0]) { + if ($prev_kwid) { + print "$prev_kwid ResCount=$count\n"; + print "$prev_kwid ResCountQuant=" . QuantizeCount($count) . "\n"; + } + $count = 0; + $prev_kwid = $entries[0]; + } + $count += 1; + } +} + + diff --git a/egs/babel/s5d/local/search/filter_by_category.pl b/egs/babel/s5d/local/search/filter_by_category.pl new file mode 100755 index 00000000000..baef4f6ac2b --- /dev/null +++ b/egs/babel/s5d/local/search/filter_by_category.pl @@ -0,0 +1,360 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2016 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < + e.g.: cat data/dev10h.pem/kws/keywords.int | \ + $0 data/dev10h.pem/kws/categories "Characters>10&&NGramOrder=2" + +Allowed options: + -f : assume the KWID (for which the filter expression is + evaluated) on k-th column (int, default 0) + +NOTE: + When the expression is empty (or missing), its evaluated as always true, + i.e. no entry will be removed from the input + +CAVEATS: + The operator '=' is equivalent to '=='. + + Do not use '-' character in the categories file if you want to use that + category in the filter expression. For example, the default setup adds + the KWID itself as a category. In case you will use the Babel-style KWIDS, + i.e. for example KW304-0008, you won't be able to use the KWID in + the expression itself (but you can still filter according to other categories) + i.e. for example + KW306-0008&&OOV=1 might be a valid expression but most probably wont do + what you want (it will get parsed as + KW306 - (8 && (OOV == 1)) which is most probably not + what you wanted. + Currently, there is no way how to make it work -- unless you rename + the categories (i.e. for example substitute '-' by '_'. While this might be + probably solved by taking the categories into account during parsing, it's + probably not that important. + +EOU + +use strict; +use warnings 'FATAL'; +use utf8; +use Switch; +use Data::Dumper; +use Scalar::Util qw(looks_like_number); +use Getopt::Long; +use POSIX; + +my $debug = ''; +my $field = 0; + +GetOptions("debug" => \$debug, + "f" => \$field) || do { + print STDERR "Cannot parse the command line parameters.\n\n"; + print $Usage . "\n"; + die "Cannot continue"; +}; + +if ((@ARGV < 1) || (@ARGV>2)) { + print STDERR "Incorrect number of parameters.\n\n"; + print $Usage . "\n"; + die "Cannot continue"; +} + +my $group_file = $ARGV[0]; +my $str_expr=""; +$str_expr=$ARGV[1] if defined($ARGV[1]); + +# Split the expression into tokens (might need some more attention +# to make it really correct +sub tokenize_string { + my $s = shift; + $s =~ s/^\s+|\s+$//g; + my @tokens = split(/ *(\&\&|\|\||\>\=|\<\=|==|!=|[\+\-\=\(\)\<\>\*\/^!]) */, $s); + #print STDERR join(", ", @tokens) . "\n"; + return @tokens; +} + + + +# precedence table should reflect the precedence of the operators in C +my %precedence = ( + #unary operators + 'u+' => 11, + 'u-' => 11, + 'u!' => 11, + + '^' => 10, + #'(' => 10, + #')' => 10, + + + #arithmetic operators + '*' => 8, + '/' => 8, + '%' => 8, + + '+' => 7, + '-' => 7, + + # logical operators + '<' => 5, + '>' => 5, + '>=' => 5, + '<=' => 5, + '=' => 4, + '==' => 4, + '!=' => 4, + '&&' => 3, + '||' => 2, +); + +my %right=( + #unary operators + 'u+' => 1, + 'u-' => 1, + 'u!' => 1, + + # this contradicts matlab, but it's what the mathematician's + # interpretation is: 2^3^4 = 2^(3^4), instead of matlabs + # left associativity 2^3^4 = (2^3)^4 + # as always -- if the order is important, use parentheses + '^' => 1, +); + +sub assoc { + my $op = $_[0]; + return (exists $right{$op}) ? $right{$op} : -1; +} + +sub looks_like_variable { + return $_[0] =~ /^[A-Za-z_][A-Za-z_0-9]*$/; +} + +sub unary_op { + my $token = shift; + my $op = shift; + my $res; + + switch( $token ) { + case 'u+' {$res = $op} + case 'u-' {$res = -$op} + case 'u!' {$res = !$op} + else {die "Unknown operator $token"} + } + + return $res; +} + +sub binary_op { + my $token = shift; + my $op2 = shift; + my $op1 = shift; + my $res; + + $op2 += 0.0; + $op1 += 0.0; + switch( $token ) { + case '^' {$res = $op1 ** $op2} + case '*' {$res = $op1 * $op2} + case '/' {$res = $op1 / $op2} + case '%' {$res = $op1 % $op2} + case '+' {$res = $op1 + $op2} + case '-' {$res = $op1 - $op2} + case '<' {$res = $op1 < $op2} + case '>' {$res = $op1 > $op2} + case '>=' {$res = $op1 >= $op2} + case '<=' {$res = $op1 <= $op2} + case '=' {$res = $op1 == $op2} + case '==' {$res = $op1 == $op2} + case '!=' {$res = $op1 != $op2} + case '&&' {$res = $op1 && $op2} + case '||' {$res = $op1 || $op2} + else {die "Unknown operator $token"} + } + + return $res; +} + +# refer to https://en.wikipedia.org/wiki/Shunting-yard_algorithm +# plus perl implementation in http://en.literateprograms.org/Shunting_yard_algorithm_(Perl) +sub to_postfix { + my @stack; + my @output = (); + my $last = ""; + + my @tokens=tokenize_string(shift); + + foreach my $token (@tokens) { + next unless $token ne ''; + + # detection of an unary operators + # not sure if this heuristics is complete + if (($token =~ /^[-+!]$/) && + (defined($precedence{$last}) || ($last eq '') || ($last eq ')'))) { + #print "Unary op: $token\n"; + $token="u$token"; + } + + if (looks_like_number($token)) { + if (looks_like_number($last) || looks_like_variable($last)) { + die "Value tokens must be separated by an operator"; + } + push @output, $token; + } elsif (looks_like_variable($token)) { + if (looks_like_number($last) || looks_like_variable($last)) { + die "Value tokens must be separated by an operator"; + } + push @output, $token; + } elsif (defined $precedence{$token}) { + my $p = $precedence{$token}; + + while (@stack) { + my $old_p = $precedence{$stack[-1]}; + last if $p > $old_p; + last if $p == $old_p and (assoc($token) >= 0); + push @output, pop @stack; + } + push @stack, $token; + } elsif ($token eq '(') { + push @stack, $token; + } elsif ($token eq ')') { + my $t; + do { + $t=pop @stack; + push @output, $t unless $t eq '(' + } while ($t && ($t ne '(')); + die "No matching (" unless $t eq '('; + #print "stack=[" . join(", ", @stack) . "] output=[" . join(", ", @output) . "]\n" ; + } else { + print "stack=[" . join(", ", @stack) . "] output=[" . join(", ", @output) . "]\n" ; + die "Unknown token \"$token\" during parsing the expression"; + } + $last=$token; + } + + # dump the rest of the operators + while (@stack) { + my $t = pop @stack; + die "No matching )" if $t eq '('; + push @output, $t; + } + + # final postfix expression + return @output; +} + +# this follows the standard RPM (postfix) expression evaluation +# the only possibly slightly confusing part is that when we encounter +# a variable, we lookup it's value in %vars. By default, (i.e. if the variable +# is not preset in the dict), the variable evaluates to 0 (false) +sub evaluate_postfix { + my @expression = @{$_[0]}; + my %vars= %{$_[1]}; + + my @stack = (); + foreach my $token (@expression) { + if (looks_like_number($token)) { + push @stack, $token; + } elsif (looks_like_variable($token)) { + my $val = 0; + if (defined $vars{$token}) { + $val = $vars{$token}; + } + push @stack, $val; + } elsif (defined $precedence{$token}) { + my $res; + if ( $token =~ /^u.*$/) { + my $op = pop @stack; + $res = unary_op($token, $op); + } else { + my $op1 = pop @stack; + my $op2 = pop @stack; + $res = binary_op($token, $op1, $op2); + } + push @stack, $res; + } else { + die "Unknown token: $token, expression=[" . join(" ", @expression) . "]\n"; + } + #print STDERR "token = $token; stack = [" . join(' ', @stack) . "]\n"; + + } + if (@stack != 1) { + my $expr = join(" ", @expression); + print STDERR "expression = [$expr]; stack = [" . join(' ', @stack) . "]\n"; + die "The operators did not reduce the stack completely!" if @stack != 1; + } + return pop @stack; +} + + +#--print "infix = [" . join(' ', @tokens) . "]\n"; +#--my @exp = to_postfix(@tokens); +#--my %vals = (A=>50, C => -3); +#--print "output = [" . join(' ', @exp) . "]\n"; +#-- +#--print evaluate_postfix(\@exp, \%vals); + + +my @expression = to_postfix($str_expr); + +my %GROUPS; +#Read the groups table +open(G, $ARGV[0]) or die "Cannot open the group table $ARGV[0]"; +while (my $line = ) { + my @entries = split(" ", $line); + my $kwid = shift @entries; + + foreach my $group (@entries) { + my @entries = split "=", $group; + if (@entries == 2) { + $GROUPS{$kwid}->{$entries[0]} = $entries[1]; + } elsif (@entries ==1 ) { + $GROUPS{$kwid}->{$group} = 1; + } else { + die "Unknown format of the category $group"; + } + } +} +close(G); + +my $let_all_pass=0; +if (not @expression) { + $let_all_pass=1; +} + +while (my $line = ) { + #shortcut if the "ALL" groups is used + if ($let_all_pass == 1) { + print $line; + next; + } + + my @entries = split(" ", $line); + my $kwid = $entries[$field]; + + my $res = evaluate_postfix(\@expression, $GROUPS{$kwid}); + if ($res) { + print $line; + } else { + print STDERR "Not keeping: $line" if $debug; + } + +} + + diff --git a/egs/babel/s5d/local/search/filter_kws_results.pl b/egs/babel/s5d/local/search/filter_kws_results.pl new file mode 100755 index 00000000000..f4e6589c50a --- /dev/null +++ b/egs/babel/s5d/local/search/filter_kws_results.pl @@ -0,0 +1,189 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < > output + e.g.: gunzip -c exp/tri5/kws/result.*.gz | $0 > exp/tri5/kws/results + +Allowed options: + --nbest : how many best results (for each KWID) should be printed + (int, default -1, i.e. no limit) + --duptime : duplicates detection, tolerance (in frames) for being + the same hits (int, default = 50) + --likes + --probs + +CAVEATS: + The script tries to be memory-effective. The impact of this is that we + assume the results are sorted by KWID (i.e. all entries with the same KWID + are in a continuous block). The user is responsible for sorting it. +EOU + +use strict; +use warnings; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; + +# if parameter nbest > 0, then filters the result list so that there is no +# more than nbest hits in the output for each of the KWID +# + +my $nbest = -1; +my $duptime = 50; +my $likes = 0; + +#print STDERR join(" ", $0, @ARGV) . "\n"; +GetOptions ("nbest=f" => \$nbest, + "likes" => \$likes, + "probs" => sub{ $likes = 0}, + "duptime=i" => \$duptime) || do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if (@ARGV != 0) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +# Function for sorting +sub KwslistOutputSort { + if ($a->[0] ne $b->[0]) { + if ($a->[0] =~ m/[0-9]+$/ && $b->[0] =~ m/[0-9]+$/) { + ($a->[0] =~ /([0-9]*)$/)[0] <=> ($b->[0] =~ /([0-9]*)$/)[0] + } else { + $a->[0] cmp $b->[0]; + } + } elsif ($a->[5] ne $b->[5]) { + $b->[5] <=> $a->[5]; + } else { + $a->[1] cmp $b->[1]; + } +} + +sub KwslistDupSort { + my ($a, $b, $duptime) = @_; + if ($a->[1] ne $b->[1]) { + #file + $a->[1] cmp $b->[1]; + } elsif (abs($a->[2]-$b->[2]) >= $duptime){ + #start + $a->[2] <=> $b->[2]; + } elsif ($a->[4] ne $b->[4]) { + #score + $b->[4] <=> $a->[4]; + } else { + #end time + $b->[3] <=> $a->[3]; + } +} + +my @RESULTS; +my %SEEN_KWS; +my $kw = ""; + +while ( my $line = ) { + chomp $line; + my @F = split " ", $line; + @F == 5 || die "$0: Bad number of columns in raw results \"$line\"\n"; + + $F[4] = -$F[4] if $likes; + + if ($F[0] eq $kw) { + push @RESULTS, \@F; + } elsif ($kw eq "" ) { + @RESULTS = (); + push @RESULTS, \@F; + $kw = $F[0]; + } else { + + my @results; + my @tmp = sort { KwslistDupSort($a, $b, $duptime) } @RESULTS; + + @results = (); + if (@tmp >= 1) {push(@results, $tmp[0])}; + for (my $i = 1; $i < scalar(@tmp); $i ++) { + my $prev = $results[-1]; + my $curr = $tmp[$i]; + if ((abs($prev->[2]-$curr->[2]) < $duptime ) && + ($prev->[1] eq $curr->[1])) { + next; + } else { + push(@results, $curr); + } + } + + # this is probably needed only when nbest > 0 + @results = sort { ($b->[4] + 0.0) <=> ($a->[4] + 0.0) } @results; + + my $len; + if( $nbest > 0) { + $len = scalar @results < $nbest ? scalar @results : $nbest; + } else { + $len = scalar @results; + } + for (my $i=0; $i < $len; $i++) { + $results[$i]->[4] = -$results[$i]->[4] if $likes; + print join(" ", @{$results[$i]}) . "\n"; + } + + @RESULTS = (); + push @RESULTS, \@F; + $kw = $F[0]; + } +} +do { + my @results; + my @tmp = sort { KwslistDupSort($a, $b, $duptime) } @RESULTS; + + @results = (); + if (@tmp >= 1) {push(@results, $tmp[0])}; + for (my $i = 1; $i < scalar(@tmp); $i ++) { + my $prev = $results[-1]; + my $curr = $tmp[$i]; + if ((abs($prev->[2]-$curr->[2]) < $duptime ) && + ($prev->[1] eq $curr->[1])) { + next; + } else { + push(@results, $curr); + } + } + + # this is probably needed only when nbest > 0 + @results = sort { ($b->[4] + 0.0) <=> ($a->[4] + 0.0) } @results; + + my $len; + if( $nbest > 0) { + $len = scalar @results < $nbest ? scalar @results : $nbest; + } else { + $len = scalar @results; + } + for (my $i=0; $i < $len; $i++) { + $results[$i]->[4] = -$results[$i]->[4] if $likes; + print join(" ", @{$results[$i]}) . "\n"; + } +} + + diff --git a/egs/babel/s5d/local/search/normalize.sh b/egs/babel/s5d/local/search/normalize.sh new file mode 100755 index 00000000000..38054f75879 --- /dev/null +++ b/egs/babel/s5d/local/search/normalize.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +# case_insensitive=true +extraid= +min_lmwt=8 +max_lmwt=12 +cmd=run.pl +stage=0 +ntrue_from= +# End configuration section. + +help_message="$0: score the kwslist using the F4DE scorer from NIST + Example: + $0 [additional-parameters] + where the most important additional parameters can be: + --extraid #for using, when a non-default kws tasks are setup + (using the kws_setup.sh --extraid) for a kaldi-single data-dir" + +echo $0 $@ +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 3 ]; then + printf "FATAL: incorrect number of variables given to the script\n\n" + printf "$help_message\n" + exit 1; +fi + +set -e -o pipefail + +langdir=$1 +if [ -z $extraid ] ; then + kwsdatadir=$2/kws +else + kwsdatadir=$2/kwset_${extraid} +fi +kwsoutputdir="$3" + +trials=$(cat $kwsdatadir/trials) +mkdir -p $kwsoutputdir/log/ + +if [ $stage -le 0 ] ; then + for LMWT in $(seq $min_lmwt $max_lmwt) ; do + mkdir -p ${kwsoutputdir}_$LMWT/details/ + + cp ${ntrue_from}_$LMWT/details/ntrue ${kwsoutputdir}_$LMWT/details/ntrue + cp ${ntrue_from}_$LMWT/details/ntrue_raw ${kwsoutputdir}_$LMWT/details/ntrue_raw + echo "$ntrue_from" > ${kwsoutputdir}_$LMWT/details/ntrue_from + done +fi + +if [ $stage -le 1 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/normalize.LMWT.log \ + cat ${kwsoutputdir}_LMWT/results \|\ + local/search/normalize_results_kst.pl --trials $trials --ntrue-scale \$\(cat ${kwsoutputdir}_LMWT/details/ntrue\)\ + \> ${kwsoutputdir}_LMWT/details/results + +fi + +if [ $stage -le 2 ]; then +if [ -f $kwsdatadir/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $kwsdatadir/f4de_attribs #override the previous variables + + ecf=$kwsdatadir/ecf.xml + kwlist=$kwsdatadir/kwlist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_write_kwslist.LMWT.log \ + mkdir -p ${kwsoutputdir}_LMWT/f4de/\; \ + cat ${kwsoutputdir}_LMWT/details/results \| \ + utils/int2sym.pl -f 2 $kwsdatadir/utt.map \| \ + local/search/utt_to_files.pl --flen $flen $kwsdatadir/../segments \|\ + local/search/write_kwslist.pl --flen $flen --language $language \ + --kwlist-id $kwlist_name \> ${kwsoutputdir}_LMWT/f4de/kwslist.xml + +fi +fi + +echo "$0: Done" +exit 0; + + diff --git a/egs/babel/s5d/local/search/normalize_categories.pl b/egs/babel/s5d/local/search/normalize_categories.pl new file mode 100755 index 00000000000..f3354e8c4d4 --- /dev/null +++ b/egs/babel/s5d/local/search/normalize_categories.pl @@ -0,0 +1,89 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage=< > categories + e.g.: cat partial_categories.* | $0 > categories + +Allowed options: + --one-per-line : by default, there will be only one line for each KWID + THis option changes the output format so that for + each pair "KWID CATEGORY" will be on a single line. + +Note: + Reads the stream of categories information in the format + + keyword-ID1 category category2 + keyword-ID2 category2 + keyword-ID1 category category2 + + The duplicities are allowed (and will be removed). + Multiple categories per line are allowed (and will be merged) + + The purpose of the script is to be able to merge the information from different + scripts. Each script can generate it's own information about categories + and this script can be then used to merge these partial tables into one global +EOU + +use strict; +use warnings; +use utf8; +use Getopt::Long; +use Data::Dumper; +use POSIX; + +my $one_per_line; + +GetOptions("one-per-line", \$one_per_line) or + do { + print STDERR "Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +}; + +if (@ARGV != 0) { + print STDERR "Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "Cannot continue\n" +} + +my %GROUPS; + +while (my $line=) { + chomp $line; + my @entries = split " ", $line; + + die "The line \"$line\" does not have correct format" if @entries < 2; + + my $kwid=shift @entries; + for my $category (@entries) { + $GROUPS{$kwid}->{$category} = 1; + } +} + +for my $kwid (sort keys %GROUPS) { + if ($one_per_line) { + foreach my $category (sort keys %{$GROUPS{$kwid}} ) { + print $kwid . " " . $category . "\n"; + } + } else { + print $kwid . " " . join(" ", sort keys %{$GROUPS{$kwid}}) . "\n"; + } +} diff --git a/egs/babel/s5d/local/search/normalize_results_kst.pl b/egs/babel/s5d/local/search/normalize_results_kst.pl new file mode 100755 index 00000000000..e57b947f278 --- /dev/null +++ b/egs/babel/s5d/local/search/normalize_results_kst.pl @@ -0,0 +1,203 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== +my $Usage = < results.normalized + +Allowed options: + --probs : the input is probabilities instead of neg-loglikelihoods + + --duration|--trials : size of the searched collectiona in seconds (float) + --beta : the FA vs MISS rate (float, default 999.9) + --ntrue-scale : scales for scaling the expected count of true hits (float, default 1.0) + --thr|--threshold : the decision threshold (float, default 0.5) +EOU + +use strict; +use warnings; +use utf8; +use POSIX; +use Data::Dumper; +use Getopt::Long; + +my $ntrue_scale = 1.0; +my $global_thr = 0.5; +my $beta = 999.9; +my $duration = 35785.578; +my $ntrue_table_filename; +my $probs=0; +my $bsum_filename; + +GetOptions("duration|trials=f" => \$duration, + "ntrue-scale=f" => \$ntrue_scale, + "beta=f" => \$beta, + "probs" => \$probs, + "thr|threshold=f" => \$global_thr, + "ntrue-table=s" => \$ntrue_table_filename, + "bsum-table=s" => \$bsum_filename) or do + { + print STDERR "$0: Cannot parse the command-line parameters.\n"; + print STDERR "$Usage\n"; + die "$0: Cannot continue\n" +}; + +if (@ARGV != 0) { + print STDERR "$0: Incorrect number of command-line parameters\n"; + print STDERR "$Usage\n"; + die "$0: Cannot continue\n" +} + +sub ComputeKST { + my @instances = @{shift @_}; + my $ntrue_scale = shift @_; + my %ntrue_table = %{shift @_}; + + + my $ntrue = 0; + foreach my $elem(@instances) { + $ntrue += $elem->[4]; + } + #$ntrue = $ntrue / @instances; + if (defined ($ntrue_table{$instances[0]->[0]})) { + #print STDERR "For KW " . $instances[0]->[0] . " using the value " . $ntrue_table{$instances[0]->[0]} . "\n"; + $ntrue = $ntrue * $ntrue_table{$instances[0]->[0]}; + } else { + #print STDERR "Using the default vsalue $ntrue_scale\n"; + $ntrue = $ntrue * $ntrue_scale; + } + + my $thr = $beta * $ntrue / ( $duration + $ntrue * ($beta - 1)); + return $thr; +} + +sub ComputeKSTWithExpected { + my @instances = @{shift @_}; + my %expected_table = %{shift @_}; + my $ntrue_scale = shift @_; + my %ntrue_table = %{shift @_}; + + + my $ntrue = $expected_table{$instances[0]->[0]}; + #$ntrue = $ntrue / @instances; + if (defined ($ntrue_table{$instances[0]->[0]})) { + #print STDERR "For KW " . $instances[0]->[0] . " using the value " . $ntrue_table{$instances[0]->[0]} . "\n"; + $ntrue = $ntrue * $ntrue_table{$instances[0]->[0]}; + } else { + #print STDERR "Using the default vsalue $ntrue_scale\n"; + $ntrue = $ntrue * $ntrue_scale; + } + + my $thr = $beta * $ntrue / ( $duration + $ntrue * ($beta - 1)); + return $thr; +} +sub NormalizeScores { + my @instances = @{shift @_}; + my $thr = shift @_; + my $global_thr = shift @_; + + + if ($thr == 0) { + $thr = 0.001; + } + my $q = log($global_thr)/log($thr); + + foreach my $elem(@instances) { + $elem->[4] = pow($elem->[4], $q); + } +} + +sub WriteResults { + my @instances = @{shift @_}; + + foreach my $elem(@instances) { + print join(" ", @{$elem}) . "\n"; + die "$0: " . join(" ", @{$elem}) . "\n" if $elem->[-1] > 1.0; + } + +} + +my $KWID; +my @putative_hits; +my %NTRUE_TABLE = (); + +my %BSUM=(); +if (defined $bsum_filename) { + open(BSUMF, $bsum_filename) or die "$0: Cannot open $bsum_filename"; + while (my $line = ) { + chomp $line; + next unless (($line =~ m/^\s*KW/) || ($line =~ m/^Keyword\s*KW/)); + $line =~ s/^Keyword//g; + $line =~ s/^\s+|\s+$//g; + my @entries = split /\s*\|\s*/, $line; + $BSUM{$entries[0]} = $entries[12]; + } + close(BSUMF); +} + +if ( defined $ntrue_table_filename) { + open (F, $ntrue_table_filename) or die "$0: Cannot open the Ntrue-table file\n"; + while (my $line = ) { + my @entries=split(" ", $line); + + die "$0: The Ntrue-table does not have expected format\n" if @entries != 2; + $NTRUE_TABLE{$entries[0]} = $entries[1] + 0.0; + } + close (F); +} + +while (my $line = ) { + chomp $line; + (my $kwid, my $file, my $start, my $end, my $score) = split " ", $line; + + if ($KWID && ($kwid ne $KWID)) { + + my $thr = ComputeKST(\@putative_hits, $ntrue_scale, \%NTRUE_TABLE ); + if ((defined $BSUM{$KWID}) && (scalar @putative_hits > 100)) { + print STDERR "$0: $KWID $thr $BSUM{$KWID} " . log($thr)/log($global_thr) . "\n"; + my $old_thr = $thr; + $thr = pow($BSUM{$KWID}, log($thr)/log($global_thr)); + } + if ($thr < 0.9999 ) { + NormalizeScores(\@putative_hits, $thr, $global_thr); + WriteResults(\@putative_hits); + } + + $KWID = $kwid; + @putative_hits = (); + } elsif ( not $KWID ) { + $KWID = $kwid; + } + + unless ($probs) { + $score = exp(-$score); + } + push @putative_hits, [$kwid, $file, $start, $end, $score]; +} + +if ($KWID) { + my $thr = ComputeKST(\@putative_hits, $ntrue_scale, \%NTRUE_TABLE ); + if ((defined $BSUM{$KWID}) && (scalar @putative_hits > 100)) { + $thr = pow($BSUM{$KWID}, log($thr)/log($global_thr)); + } + if ($thr < 0.9999 ) { + NormalizeScores(\@putative_hits, $thr, $global_thr); + WriteResults(\@putative_hits); + } +} + diff --git a/egs/babel/s5d/local/search/per_category_stats.pl b/egs/babel/s5d/local/search/per_category_stats.pl new file mode 100755 index 00000000000..d14636dcc0f --- /dev/null +++ b/egs/babel/s5d/local/search/per_category_stats.pl @@ -0,0 +1,326 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +# Takes the alignment.csv and the category tables and computes the per-category +# statistics including the oracle measures (OTWV, MTWV, STWV) +# Is not particulary effective (for example, it computes the oracle measures +# for each keyword several times (once for each category the keyword is in); +# To achieve at least partial speed-up, we cache some of the partial statistics +# The caching gave us speed improvement approx. from 22s down to 14s +# +# The lines in output starting with '#' are intended as comments only -- you +# can filter them out using grep -v '^#' +# The first comment line contains header, +# The second cooment line contains column numbers (to make easier using cut -f) +# -- you don't have to count the fields, just use the present +# number of the field +# +# Compatibility: +# We tried to make the numbers comparable with F4DE output. If there is a large +# difference, something is probably wrong and you should report it +# The column names should be compatible (to large extent) with F4DE output +# files (sum.txt, bsum.txt, cond.bsum.txt). Our intention was, however, +# to make this file easily grepable/machine-processable, so we didn't honor +# the original F4DE file fomrat +# +# Usage: +# It reads the alignment.csv from the STDIN. +# Moreover, it expects exactly two arguments: number of trials and +# the category table +# I.e. +# local/search/per_category_stats.pl +# +# Example: +# cat alignment.csv | perl local/search/per_category_stats.pl `cat data/dev10h.pem/extra_kws/trials` data/dev10h.pem/extra_kws/categories +# +# Additional parameters +# --beta # beta value (weight of FAs), defailt 999.9 +# --sweep-step # sweep step for the oracle measures +# +# TODO +# Document what each field means (might be slightly tricky, as even F4DE +# does not document the exact meaning of some of the fields. +# +# ATWV - actual Term-Weighted Value (TWV for the threshold 0.5) +# MTWV - Maximum Term-Weighted Value (TWV for the threshold that maximizes +# the given category's TWV +# OTWV - Optimum Term-Weighted Value (TWV assuming the decision threshold +# for each Term/KW is determined optimally) +# STWV - Supreme TWV - essentially Lattice Recall + +use strict; +use warnings FATAL => 'all'; +use utf8; +use List::Util; +use Data::Dumper; +use Getopt::Long; +use Scalar::Util qw(looks_like_number); + +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +my %CATEGORIES; +my %STATS; +my %K; + +my $beta=999.9; +my $step_size=0.005; +my $threshold = 0.5; +my $enable_caching = 1; + +my $cat_maxlen = 9; #Must accomodate string "#CATEGORY" in the header +my $field_size = 9; + +my $L = int(1.0/$step_size) + 1; + +GetOptions("beta=f" => \$beta, + "sweep-step=f" => \$step_size, + "disable-caching" => sub{ $enable_caching=''; } + ) or die "Cannot process the input options (possibly unknown switch)"; + +die "Unsupported number of arguments." if @ARGV != 2; +if ( not looks_like_number($ARGV[0])) { + die "The first parameter must be a float number (number of trials) -- got $ARGV[0]"; +} + +my $T= 0.0 + $ARGV[0]; + + +open(CAT, $ARGV[1]) or die("Cannot open categories file $ARGV[1]"); +while(my $line = ) { + my @entries =split(" ", $line); + + die "Unknown format of category line: \"$line\"" if scalar @entries < 2; + my $kw = shift @entries; + + + if (not defined $STATS{$kw}->{fa_sweep}) { + $STATS{$kw}->{fa} = 0; + $STATS{$kw}->{corr} = 0; + $STATS{$kw}->{miss} = 0; + $STATS{$kw}->{lattice_miss} = 0; + $STATS{$kw}->{ntrue} = 0; + $STATS{$kw}->{count} = 0; + $STATS{$kw}->{corrndet} = 0; + + my @tmp1 = (0) x ($L+1); + $STATS{$kw}->{fa_sweep} = \@tmp1; + my @tmp2 = (0) x ($L+1); + $STATS{$kw}->{corr_sweep} = \@tmp2; + } + + push @entries, "ALL"; + foreach my $cat (@entries) { + $cat_maxlen = length($cat) if length($cat) > $cat_maxlen; + push @{$CATEGORIES{$cat}}, $kw; + $K{$cat} += 1; + } +} +close(CAT); +#print Dumper(\%CATEGORIES); + + +#print STDERR "Reading the whole CSV\n"; +my $i = 0; +my $dummy=; +while (my $line=) { + chomp $line; + my @entries = split(",", $line); + + die "Unknown format of category line: \"$line\"" if scalar @entries != 12; + + + my $termid = $entries[3]; + my $ref_time = $entries[5]; + my $score = $entries[9]; + my $decision=$entries[10]; + my $ref = $entries[11]; + + if (not defined($STATS{$termid}->{ntrue})) { + print STDERR "Term $termid not present in the category table, skipping\n"; + next + } + #print "$termid, ref_time=$ref_time, score=$score, start=" . int($score/$step_size + 0.5) . ", L=$L\n" if $termid eq "KW303-00025"; + if ($score) { + $score = 1.0 if $score > 1.0; + my $q = int($score/$step_size) + 1; + for (my $i = 0; $i < $q ; $i += 1) { + if ($ref_time) { + $STATS{$termid}->{corr_sweep}->[$i] += 1; + } else { + $STATS{$termid}->{fa_sweep}->[$i] += 1; + } + } + } + + #print STDERR "$line "; + $STATS{$termid}->{count} += 1 if $score; + + #print Dumper($ref_time, $score, $STATS{$termid}) if ($ref_time); + if (($decision eq "YES") && ($ref eq "FA")) { + $STATS{$termid}->{fa} += 1; + } elsif (($decision eq "YES") && ($ref eq "CORR")) { + $STATS{$termid}->{corr} += 1; + $STATS{$termid}->{ntrue} += 1; + } elsif ($ref eq "MISS") { + $STATS{$termid}->{lattice_miss} += 1 unless $decision; + $STATS{$termid}->{miss} += 1; + $STATS{$termid}->{ntrue} += 1; + } elsif ($ref eq "CORR!DET") { + $STATS{$termid}->{corrndet} += 1; + } + #print STDERR "Done\n"; + +} + +#print STDERR "Read the whole CSV\n"; + +# Create the header +my $H=sprintf "%*s", $cat_maxlen-1, "CATEGORY"; +my @int_vals = map{ sprintf("%*s", $field_size, $_) } (split " ", "#KW #Targ #NTarg #Sys #CorrDet #CorrNDet #FA #MISS"); +my @float_vals = map{ sprintf("%*s", $field_size, $_) } (split " ", "ATWV MTWV OTWV STWV PFA MPFA OPFA PMISS MPMISS OPMISS THR MTHR OTHR"); +print "#" . join(" ", $H, @int_vals, @float_vals) . "\n"; +# Create secondary header with column numbers (to make cut'ing easier +my @col_nrs = map { sprintf "%*d", $field_size, $_ } (2.. 1+@int_vals + @float_vals); +print "#" . join(" ", sprintf("%*d", $cat_maxlen-1, 1), @col_nrs) . "\n"; +# End of the header + +my %CACHE = (); + +foreach my $cat (sort keys %CATEGORIES) { + my $K = 0; + my $ATWV = 0; + my $STWV = 0; + my $PMISS = 0; + my $PFA = 0; + + my $OTWV = 0; + my $OPMISS = 0; + my $OPFA = 0; + my $OTHR = 0; + + my $NTRUE = 0; + my $CORR = 0; + my $FA = 0; + my $MISS = 0; + my $COUNT = 0; + my $CORRNDET = 0; + + my @MTWV_SWEEP = (0) x ($L+1); + my @MPMISS_SWEEP = (0) x ($L+1); + my @MPFA_SWEEP = (0) x ($L+1); + #print Dumper($cat, $CATEGORIES{$cat}); + foreach my $kw (sort @{$CATEGORIES{$cat}}) { + #print Dumper($kw, $STATS{$kw}); + next unless defined $STATS{$kw}->{ntrue}; + next if $STATS{$kw}->{ntrue} == 0; + my $pmiss = 1 - $STATS{$kw}->{corr}/$STATS{$kw}->{ntrue}; + my $pfa = $STATS{$kw}->{fa}/($T - $STATS{$kw}->{ntrue}); + my $twv = 1 - $pmiss - $beta * $pfa; + my $stwv = 1 - $STATS{$kw}->{lattice_miss}/$STATS{$kw}->{ntrue}; + + $NTRUE += $STATS{$kw}->{ntrue}; + $CORR += $STATS{$kw}->{corr}; + $CORRNDET += $STATS{$kw}->{corrndet}; + $FA += $STATS{$kw}->{fa}; + $MISS += $STATS{$kw}->{miss}; + $COUNT += $STATS{$kw}->{count} if $STATS{$kw}->{ntrue} > 0; + + $ATWV = ($K * $ATWV + $twv) / ($K + 1); + $PMISS = ($K * $PMISS + $pmiss) / ($K + 1); + $PFA = ($K * $PFA + $pfa) / ($K + 1); + + $STWV = ($K * $STWV + $stwv ) / ($K + 1); + + $pmiss = 0; + $pfa = 0; + $twv = -99999; + my $othr = -0.1; + #print Dumper($kw, $STATS{$kw}); + if (($enable_caching) && (defined $CACHE{$kw})) { + ($pfa, $pmiss, $twv, $OTHR, my $twv_sweep_cache, my $pfa_sweep_cache, my $pmiss_sweep_cache) = @{$CACHE{$kw}}; + @MTWV_SWEEP = map {($K * $MTWV_SWEEP[$_] + $twv_sweep_cache->[$_]) / ($K + 1)} (0..$L); + @MPFA_SWEEP = map {($K * $MPFA_SWEEP[$_] + $pfa_sweep_cache->[$_]) / ($K + 1)} (0..$L); + @MPMISS_SWEEP = map{($K * $MPMISS_SWEEP[$_] + $pmiss_sweep_cache->[$_]) / ($K + 1)} (0..$L); + } else { + my @twv_sweep_cache = (0) x ($L+1); + my @pmiss_sweep_cache = (0) x ($L+1); + my @pfa_sweep_cache = (0) x ($L+1); + + for (my $i = 0; $i <= $L; $i += 1) { + my $sweep_pmiss = 1 - $STATS{$kw}->{corr_sweep}->[$i]/$STATS{$kw}->{ntrue}; + my $sweep_pfa = $STATS{$kw}->{fa_sweep}->[$i]/($T - $STATS{$kw}->{ntrue}); + my $sweep_twv = 1 - $sweep_pmiss - $beta * $sweep_pfa; + if ($twv < $sweep_twv) { + $pfa = $sweep_pfa; + $pmiss = $sweep_pmiss; + $twv = $sweep_twv; + $OTHR = ($i - 1) * $step_size; + } + $pmiss_sweep_cache[$i] = $sweep_pmiss; + $pfa_sweep_cache[$i] = $sweep_pfa; + $twv_sweep_cache[$i] = $sweep_twv; + + #print "$i $sweep_pmiss $sweep_pfa $sweep_twv\n"; + $MTWV_SWEEP[$i] = ($K * $MTWV_SWEEP[$i] + $sweep_twv) / ($K + 1); + $MPFA_SWEEP[$i] = ($K * $MPFA_SWEEP[$i] + $sweep_pfa) / ($K + 1); + $MPMISS_SWEEP[$i] = ($K * $MPMISS_SWEEP[$i] + $sweep_pmiss) / ($K + 1); + } + $CACHE{$kw} = [$pfa, $pmiss, $twv, $OTHR, \@twv_sweep_cache, \@pfa_sweep_cache, \@pmiss_sweep_cache]; + } + + $OTWV = ($K * $OTWV + $twv) / ($K + 1); + $OPMISS = ($K * $OPMISS + $pmiss) / ($K + 1); + $OPFA = ($K * $OPFA + $pfa) / ($K + 1); + $K += 1; + } + + my $max_idx = 0; + my $MTWV = $MTWV_SWEEP[0]; + my $MPMISS = $MPMISS_SWEEP[0]; + my $MPFA = $MPFA_SWEEP[0]; + my $MTHR = 0; + for(my $i = 1; $i <= $L; $i += 1) { + if ($MTWV_SWEEP[$i] > $MTWV) { + $max_idx = $i; + $MTWV = $MTWV_SWEEP[$i]; + $MPMISS = $MPMISS_SWEEP[$i]; + $MPFA = $MPFA_SWEEP[$i]; + $MTHR = ($i - 1) * $step_size; + } + } + + if ($K > 1) { + $OTHR = "NA"; + } + + my $ntarg = $CORRNDET + $FA; + + my @abs_nrs = ($K, $NTRUE, $ntarg, $COUNT, $CORR, $CORRNDET, $FA, $MISS); + @abs_nrs = map { sprintf "%*d", $field_size, $_ } @abs_nrs; + my @flt_nrs = map { $_ eq "NA" ? sprintf "%6s", $_ : sprintf "% 6.3g", $_ } ($ATWV, $MTWV, $OTWV, $STWV, $PFA, $MPFA, $OPFA, $PMISS, $MPMISS, $OPMISS, 0.5, $MTHR, $OTHR); + @flt_nrs = map {sprintf "%*s", $field_size, $_} @flt_nrs; + + my $nrs = join(" ", @abs_nrs, @flt_nrs); + + $cat = sprintf("%*s", $cat_maxlen, $cat); + print "$cat $nrs \n"; +} + + diff --git a/egs/babel/s5d/local/search/rttm_to_hitlists.sh b/egs/babel/s5d/local/search/rttm_to_hitlists.sh new file mode 100755 index 00000000000..6d4af6fb916 --- /dev/null +++ b/egs/babel/s5d/local/search/rttm_to_hitlists.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +flen=0.01 +segments= +utt_table= +# End configuration section +echo $0 "$@" +. ./utils/parse_options.sh || exit 1; + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +if [ $# -ne 5 ] ; then + echo "Usage: " + exit 1 +fi + +rttm=$1 +kwlist=$2 +ecf=$3 +workdir=$4 +output=$5 + +for f in $rttm $kwlist $ecf ; do + [ ! -f $f ] && echo "File \"$f\" does not exist." && exit 1 +done + +mkdir -p $workdir + +{ + echo '' + echo '' +} > $workdir/kwslist.xml + +kwseval=`which KWSEval` +if [ -z "$kwseval" ] ; then + echo >&2 "KWSEval from F4DE tools not found" + exit 1 +fi + +bash -x $kwseval -c -r $rttm -e $ecf -t $kwlist -s $workdir/kwslist.xml -f $workdir/ +grep -E ",,MISS" $workdir/alignment.csv | \ + perl -e ' + binmode STDIN, ":utf8"; + binmode STDOUT, ":utf8"; + binmode STDERR, ":utf8"; + + use Data::Dumper; + $flen='$flen'; + %SEGMENTS=(); + if ((defined $ARGV[0]) && ( $ARGV[0] ne "" )) { + open(F, $ARGV[0]) or die "Cannot open \"$ARGV[0]\""; + while() { + @entries = split(" ", $_); + $entries[2] = int($entries[2]/$flen+0.5); + $entries[3] = int($entries[3]/$flen+0.5); + push @{$SEGMENTS{$entries[1]}}, [@entries]; + } + close(F); + } + + while() { + chomp; + @entries_tmp = split(",", $_); + @entries = ($entries_tmp[3], + $entries_tmp[1], + int($entries_tmp[5]/$flen + 0.5), + int($entries_tmp[6]/$flen + 0.5), + 1.0 + ); + + $fid = $entries[1]; + $start = $entries[2]; + $end = $entries[3]; + + if ((defined $ARGV[0]) && ( $ARGV[0] ne "" )) { + $found = 0; + foreach $entry ( @{$SEGMENTS{$fid}} ) { + if (($start >= $entry->[2]) && ($end <= $entry->[3])) { + $relstart = $start - $entry->[2]; + $relend = $end - $entry->[2]; + print join(" ", $entries[0], $entry->[0], $relstart, $relend, 1.0) . "\n"; + if ($found eq 1) { + print STDERR "WARNING: Segments file generates duplicate hits for the entry"; + print STDERR join(" ", @entries_tmp) . "\n"; + } + $found = 1; + } + } + if ($found eq 0) { + print STDERR "WARNING: Segments file does not allow for finding entry "; + print STDERR join(" ", @entries_tmp) . "\n"; + } + } else { + print join(" ", @entries) . "\n"; + } + } + ' "$segments" | sort | { + if [ -z "$utt_table" ]; then + cat - + else + utils/sym2int.pl -f 2 $utt_table + fi +} > $output diff --git a/egs/babel/s5d/local/search/run_phn_search.sh b/egs/babel/s5d/local/search/run_phn_search.sh new file mode 100755 index 00000000000..44587699a38 --- /dev/null +++ b/egs/babel/s5d/local/search/run_phn_search.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=2 +dir=dev10h.pem +# End configuration section +. ./conf/common_vars.sh +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./lang.conf + +#Example script how to run keyword search using the Kaldi-native pipeline + + +if [ $stage -le 0 ]; then + local/generate_confusion_matrix.sh --nj 64 --cmd "$decode_cmd" \ + exp/sgmm5_denlats/dengraph/ exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix +fi + +if [ $stage -le 1 ] ; then + local/train_g2p.sh --cmd "$decode_cmd" data/local/lexicon.txt exp/g2p +fi + +dataset=${dir%%.*} +datatype=${dir#*.} + +lang=data/lang.phn +data=data/${dataset}.phn.${datatype} + +set +o nounset +eval kwsets=${!dataset_kwlists[@]} +eval my_ecf_file=\$${dataset}_ecf_file +eval my_rttm_file=\$${dataset}_rttm_file +set -o nounset + +my_array_name=${dataset}_kwlists + +eval kwsets=\( \${!$my_array_name[@]} \) +declare -p kwsets +for set in ${kwsets[@]} ; do + eval my_kwlist=\${$my_array_name[$set]} + declare -p my_kwlist +done +declare -p my_ecf_file +declare -p my_rttm_file + +if [ $stage -le 2 ] ; then + + for set in ${kwsets[@]} ; do + + eval my_kwlist=\${$my_array_name[$set]} + + #This will set up the basic files and converts the F4DE files into Kaldi-native format + local/search/setup.sh $my_ecf_file $my_rttm_file "${my_kwlist}" \ + $data $lang $data/kwset_${set} + + # we will search for the IV words normally (i.e. will look for the specificsequence + # of the words + local/search/compile_keywords.sh --filter "OOV=0&&Characters>2"\ + $data/kwset_${set} $lang $data/kwset_${set}/tmp.2 + + # in addition to the direct search of the IV words, we will set up the proxy + # search as well -- we will use lower nbest, compared to OOV=1 + #-- local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --category "OOV=0" \ + #-- --beam 5 --nbest 10 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + #-- ${data}/kwset_${set} ${lang} ${data}/${set}_oov_kws/tmp/L1.lex \ + #-- ${data}/${set}_oov_kws/tmp/L1.lex ${data}/kwset_${set}/tmp.3 + + local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --filter "OOV=1&&Characters>4"\ + --beam 5 --nbest 100 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + ${data}/kwset_${set} ${lang} data/local/dict.phn/lexiconp.txt exp/g2p \ + ${data}/kwset_${set}/tmp.4 + + # and finally, replace the categories by the word-level categories + cp data/$dir/kwset_${set}/categories $data/kwset_${set}/categories + done +fi + +if [ $stage -le 3 ] ; then + for set in ${kwsets[@]} ; do + fsts-union scp:<(sort $data/kwset_${set}/tmp*/keywords.scp) \ + ark,t:"|gzip -c >$data/kwset_${set}/keywords.fsts.gz" + done +fi + + +echo "Directories are set up -- running run-4-phn-anydecode.sh will take care of the rest" +exit 0 + +if [ $stage -le 4 ] ; then + for set in $kwsets ; do + for it in $(seq 1 4); do + system=exp/sgmm5_mmi_b0.1/decode_fmllr_$(basename $data)_it$it + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices ${lang} ${data} $system + done + done +fi + +if [ $stage -le 5 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 6 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem_17_8.5 + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 7 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.bg + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 8 ] ; then + for set in $kwsets ; do + system=exp/tri6_nnet/decode_dev10h.phn.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + diff --git a/egs/babel/s5d/local/search/run_search.sh b/egs/babel/s5d/local/search/run_search.sh new file mode 100755 index 00000000000..2cb40cabb59 --- /dev/null +++ b/egs/babel/s5d/local/search/run_search.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=2 +dir=dev10h.pem +# End configuration section +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./conf/common_vars.sh +. ./lang.conf + +#Example script how to run keyword search using the Kaldi-native pipeline + + +if [ $stage -le 0 ]; then + local/generate_confusion_matrix.sh --nj 64 --cmd "$decode_cmd" \ + exp/sgmm5_denlats/dengraph/ exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix +fi + +if [ $stage -le 1 ] ; then + local/train_g2p.sh --cmd "$decode_cmd" data/local/lexicon.txt exp/g2p +fi + +dataset=${dir%%.*} + +set +o nounset +eval kwsets=${!dataset_kwlists[@]} +eval my_ecf_file=\$${dataset}_ecf_file +eval my_rttm_file=\$${dataset}_rttm_file +set -o nounset + +my_array_name=${dataset}_kwlists + +eval kwsets=\( \${!$my_array_name[@]} \) +declare -p kwsets +for set in ${kwsets[@]} ; do + eval my_kwlist=\${$my_array_name[$set]} + declare -p my_kwlist +done +declare -p my_ecf_file +declare -p my_rttm_file + +if [ $stage -le 2 ] ; then + + for set in ${kwsets[@]} ; do + + eval my_kwlist=\${$my_array_name[$set]} + + #This will set up the basic files and converts the F4DE files into Kaldi-native format + local/search/setup.sh $my_ecf_file $my_rttm_file "${my_kwlist}" \ + data/$dir/ data/lang/ data/$dir/kwset_${set} + + # we will search for the IV words normally (i.e. will look for the specificsequence + # of the words + local/search/compile_keywords.sh --filter "OOV=0&&Characters>2"\ + data/$dir/kwset_${set} data/lang data/$dir/kwset_${set}/tmp.2 + + # in addition to the direct search of the IV words, we will set up the proxy + # search as well -- we will use lower nbest, compared to OOV=1 + #-- local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --category "OOV=0" \ + #-- --beam 5 --nbest 10 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + #-- data/dev10h.pem/kwset_${set} data/lang data/dev10h.pem/${set}_oov_kws/tmp/L1.lex \ + #-- data/dev10h.pem/${set}_oov_kws/tmp/L1.lex data/dev10h.pem/kwset_${set}/tmp.3 + if [ -d data/local/extend ]; then + echo "Detected extended lexicon system..." + local/search/compile_proxy_keywords.sh --cmd "$decode_cmd --mem 12G" --filter "OOV=1&&Characters>2"\ + --beam 5 --nbest 50 --nj 64 --phone-beam 5 --phone-nbest 300 --confusion-matrix exp/conf_matrix/confusions.txt \ + data/$dir/kwset_${set} data/lang data/local/lexiconp.txt exp/g2p \ + data/$dir/kwset_${set}/tmp.4 + else + local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --filter "OOV=1&&Characters>2"\ + --beam 5 --nbest 50 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + data/$dir/kwset_${set} data/lang data/local/lexiconp.txt exp/g2p \ + data/$dir/kwset_${set}/tmp.4 + fi + + cut -f 1 data/local/filtered_lexicon.txt | uconv -f utf8 -t utf8 -x Any-Lower | sort -u | \ + nl | awk '{print $2, $1;}' > data/$dir/kwset_${set}/base_words.txt + paste <(cut -f 1 data/$dir/kwset_${set}/keywords.txt ) \ + <(cut -f 2 data/$dir/kwset_${set}/keywords.txt | \ + uconv -f utf8 -t utf8 -x Any-Lower ) | \ + local/kwords2indices.pl --map-oov 0 data/$dir/kwset_${set}/base_words.txt |\ + perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] BaseOOV=1\n";} + else { print "$F[0] BaseOOV=0\n";}' |\ + cat - data/$dir/kwset_${set}/categories | sort -u |\ + local/search/normalize_categories.pl > data/$dir/kwset_${set}/categories.2 + mv data/$dir/kwset_${set}/categories data/$dir/kwset_${set}/categories.bak + mv data/$dir/kwset_${set}/categories.2 data/$dir/kwset_${set}/categories + + echo >&2 "Kwset $set processed successfully..." + done +fi + +if [ $stage -le 3 ] ; then + for set in ${kwsets[@]} ; do + fsts-union scp:<(sort data/$dir/kwset_${set}/tmp*/keywords.scp) \ + ark,t:"|gzip -c >data/$dir/kwset_${set}/keywords.fsts.gz" + done +fi + + +exit + +if [ $stage -le 4 ] ; then + for set in $kwsets ; do + for it in $(seq 1 4); do + system=exp/sgmm5_mmi_b0.1/decode_fmllr_$dir_it$it + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices \ + data/lang data/$dir $system + done + done +fi + +if [ $stage -le 5 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_$dir + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices \ + data/lang data/$dir $system + done +fi + +if [ $stage -le 6 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_sp/decode_$dir + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices \ + data/lang data/$dir $system + done +fi diff --git a/egs/babel/s5d/local/search/run_syll_search.sh b/egs/babel/s5d/local/search/run_syll_search.sh new file mode 100755 index 00000000000..eb48d836e77 --- /dev/null +++ b/egs/babel/s5d/local/search/run_syll_search.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=2 +dir=dev10h.pem +# End configuration section +. ./conf/common_vars.sh +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./lang.conf + +#Example script how to run keyword search using the Kaldi-native pipeline + + +if [ $stage -le 0 ]; then + local/generate_confusion_matrix.sh --nj 64 --cmd "$decode_cmd" \ + exp/sgmm5_denlats/dengraph/ exp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats exp/conf_matrix +fi + +if [ $stage -le 1 ] ; then + local/train_g2p.sh --cmd "$decode_cmd" data/local/lexicon.txt exp/g2p +fi + +dataset=${dir%%.*} +datatype=${dir#*.} + +lang=data/lang.syll +data=data/${dataset}.syll.${datatype} + +set +o nounset +eval kwsets=${!dataset_kwlists[@]} +eval my_ecf_file=\$${dataset}_ecf_file +eval my_rttm_file=\$${dataset}_rttm_file +set -o nounset + +my_array_name=${dataset}_kwlists + +eval kwsets=\( \${!$my_array_name[@]} \) +declare -p kwsets +for set in ${kwsets[@]} ; do + eval my_kwlist=\${$my_array_name[$set]} + declare -p my_kwlist +done +declare -p my_ecf_file +declare -p my_rttm_file + +if [ $stage -le 2 ] ; then + + for set in ${kwsets[@]} ; do + + eval my_kwlist=\${$my_array_name[$set]} + + #This will set up the basic files and converts the F4DE files into Kaldi-native format + local/search/setup.sh $my_ecf_file $my_rttm_file "${my_kwlist}" \ + $data $lang $data/kwset_${set} + + # we will search for the IV words normally (i.e. will look for the specificsequence + # of the words + local/search/compile_keywords.sh --filter "OOV=0&&Characters>2"\ + $data/kwset_${set} $lang $data/kwset_${set}/tmp.2 + + # in addition to the direct search of the IV words, we will set up the proxy + # search as well -- we will use lower nbest, compared to OOV=1 + #-- local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --category "OOV=0" \ + #-- --beam 5 --nbest 10 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + #-- ${data}/kwset_${set} ${lang} ${data}/${set}_oov_kws/tmp/L1.lex \ + #-- ${data}/${set}_oov_kws/tmp/L1.lex ${data}/kwset_${set}/tmp.3 + + local/search/compile_proxy_keywords.sh --cmd "$decode_cmd" --filter "OOV=1&&Characters>4"\ + --beam 5 --nbest 100 --nj 64 --confusion-matrix exp/conf_matrix/confusions.txt \ + ${data}/kwset_${set} ${lang} data/local/dict.syll/lexiconp.txt exp/g2p \ + ${data}/kwset_${set}/tmp.4 + + # and finally, replace the categories by the word-level categories + cp data/${dir}/kwset_${set}/categories $data/kwset_${set}/categories + done +fi + +if [ $stage -le 3 ] ; then + for set in ${kwsets[@]} ; do + fsts-union scp:<(sort $data/kwset_${set}/tmp*/keywords.scp) \ + ark,t:"|gzip -c >$data/kwset_${set}/keywords.fsts.gz" + done +fi + + +echo "Directories are set up -- running run-4-syll-anydecode.sh will take care of the rest" +exit 0 + +if [ $stage -le 4 ] ; then + for set in $kwsets ; do + for it in $(seq 1 4); do + system=exp/sgmm5_mmi_b0.1/decode_fmllr_$(basename $data)_it$it + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 9 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices ${lang} ${data} $system + done + done +fi + +if [ $stage -le 5 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 6 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem_17_8.5 + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 7 ] ; then + for set in $kwsets ; do + system=exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.bg + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + +if [ $stage -le 8 ] ; then + for set in $kwsets ; do + system=exp/tri6_nnet/decode_dev10h.syll.pem + local/search/search.sh --cmd "$decode_cmd" --min-lmwt 10 --max-lmwt 12 \ + --extraid ${set} --indices-dir $system/kws_indices $lang $data $system + done +fi + diff --git a/egs/babel/s5d/local/search/score.sh b/egs/babel/s5d/local/search/score.sh new file mode 100755 index 00000000000..e429b1da030 --- /dev/null +++ b/egs/babel/s5d/local/search/score.sh @@ -0,0 +1,143 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +# Begin configuration section. +# case_insensitive=true +extraid= +min_lmwt=8 +max_lmwt=12 +cmd=run.pl +stage=0 +ntrue_from= +# End configuration section. + +help_message="$0: score the kwslist using the F4DE scorer from NIST + Example: + $0 [additional-parameters] + where the most important additional parameters can be: + --extraid #for using, when a non-default kws tasks are setup + (using the kws_setup.sh --extraid) for a kaldi-single data-dir" + +echo $0 $@ +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + + +if [ $# -ne 3 ]; then + printf "FATAL: incorrect number of variables given to the script\n\n" + printf "$help_message\n" + exit 1; +fi + +set -e -o pipefail + +langdir=$1 +if [ -z $extraid ] ; then + kwsdatadir=$2/kws +else + kwsdatadir=$2/kwset_${extraid} +fi +kwsoutputdir="$3" + +trials=$(cat $kwsdatadir/trials) +mkdir -p $kwsoutputdir/log/ + +if [ $stage -le 0 ] ; then + if [ -z "$ntrue_from" ]; then + for LMWT in $(seq $min_lmwt $max_lmwt) ; do + mkdir -p ${kwsoutputdir}_$LMWT/details/ + mkdir -p ${kwsoutputdir}_$LMWT/scoring/ + + # as we need to sweep through different ntrue-scales we will + # we will do it in one parallel command -- it will be more effective + # than sweeping in a loop and for all lmwts in parallel (as usuallyu + # there will be just a couple of different lmwts, but the ntrue-scale + # has a larger dynamic range + $cmd NTRUE=1:21 $kwsoutputdir/log/score.${LMWT}.NTRUE.log \ + ntrue=\$\(perl -e 'print 1+(NTRUE-1)/5.0' \) '&&' \ + cat ${kwsoutputdir}_$LMWT/results \|\ + local/search/normalize_results_kst.pl --trials $trials --ntrue-scale \$ntrue \|\ + local/search/filter_kws_results.pl --probs --nbest 200 \|\ + compute-atwv $trials ark,t:$kwsdatadir/hitlist ark:- \ + \> ${kwsoutputdir}_$LMWT/scoring/score.NTRUE.txt + + ntrue=$(grep ATWV ${kwsoutputdir}_$LMWT/scoring/score.*.txt | \ + sort -k2,2nr -t '=' | head -n 1 | \ + sed 's/.*score\.\([0-9][0-9]*\)\.txt.*/\1/g') + #The calculation of ntrue must be the same as in the command above + echo "$ntrue" > ${kwsoutputdir}_$LMWT/details/ntrue_raw + ntrue=$(perl -e "print 1+($ntrue-1)/5.0") + echo "$ntrue" > ${kwsoutputdir}_$LMWT/details/ntrue + done + else + for LMWT in $(seq $min_lmwt $max_lmwt) ; do + mkdir -p ${kwsoutputdir}_$LMWT/details/ + mkdir -p ${kwsoutputdir}_$LMWT/scoring/ + + cp ${ntrue_from}_${LMWT}/details/ntrue ${kwsoutputdir}_${LMWT}/details/ntrue + [ -f ${ntrue_from}_${LMWT}/details/ntrue_raw ] && \ + cp ${ntrue_from}_${LMWT}/details/ntrue_raw ${kwsoutputdir}_${LMWT}/details/ntrue_raw + echo "$ntrue_from" > ${kwsoutputdir}_${LMWT}/details/ntrue_from + done + fi +fi + +if [ $stage -le 1 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/normalize.LMWT.log \ + cat ${kwsoutputdir}_LMWT/results \|\ + local/search/normalize_results_kst.pl --trials $trials --ntrue-scale \$\(cat ${kwsoutputdir}_LMWT/details/ntrue\)\ + \> ${kwsoutputdir}_LMWT/details/results + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/score.final.LMWT.log \ + cat ${kwsoutputdir}_LMWT/details/results \|\ + compute-atwv $trials ark,t:$kwsdatadir/hitlist ark:- \ + ${kwsoutputdir}_LMWT/details/alignment.csv \> ${kwsoutputdir}_LMWT/details/score.txt '&&' \ + cp ${kwsoutputdir}_LMWT/details/score.txt ${kwsoutputdir}_LMWT/score.txt + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/per-category-stats.LMWT.log \ + cat ${kwsoutputdir}_LMWT/details/alignment.csv \|\ + perl local/search/per_category_stats.pl --sweep-step 0.005 $trials \ + $kwsdatadir/categories \> ${kwsoutputdir}_LMWT/details/per-category-score.txt +fi + +if [ $stage -le 2 ]; then +if [ -f $kwsdatadir/f4de_attribs ] ; then + language="" + flen=0.01 + kwlist_name="" + . $kwsdatadir/f4de_attribs #override the previous variables + + ecf=$kwsdatadir/ecf.xml + rttm=$kwsdatadir/rttm + kwlist=$kwsdatadir/kwlist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_prepare.LMWT.log \ + mkdir -p ${kwsoutputdir}_LMWT/f4de/ '&&' cat $kwlist \| \ + local/search/annotate_kwlist.pl $kwsdatadir/categories \> ${kwsoutputdir}_LMWT/f4de/kwlist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_write_kwslist.LMWT.log \ + cat ${kwsoutputdir}_LMWT/details/results \| \ + utils/int2sym.pl -f 2 $kwsdatadir/utt.map \| \ + local/search/utt_to_files.pl --flen $flen $kwsdatadir/../segments \|\ + local/search/write_kwslist.pl --flen $flen --language $language \ + --kwlist-id $kwlist_name \> ${kwsoutputdir}_LMWT/f4de/kwslist.xml + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_score.LMWT.log \ + KWSEval -e $ecf -r $rttm -t ${kwsoutputdir}_LMWT/f4de/kwlist.xml -a \ + --zGlobalMeasures Optimum --zGlobalMeasures Supremum \ + -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \ + -O -B -q 'OOV:regex=.*' -q 'BaseOOV:regex=.*' \ + -s ${kwsoutputdir}_LMWT/f4de/kwslist.xml -c -o -b -d -f ${kwsoutputdir}_LMWT/f4de/ + + $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_report.LMWT.log \ + local/kws_oracle_threshold.pl --duration $trials \ + ${kwsoutputdir}_LMWT/f4de/alignment.csv \> ${kwsoutputdir}_LMWT/f4de/metrics.txt +fi +fi + +echo "$0: Done" +exit 0; + + diff --git a/egs/babel/s5d/local/search/search.sh b/egs/babel/s5d/local/search/search.sh new file mode 100755 index 00000000000..200a49d8e86 --- /dev/null +++ b/egs/babel/s5d/local/search/search.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# License: Apache 2.0 + + +help_message="$(basename $0): do keyword indexing and search. data-dir is assumed to have + kws/ subdirectory that specifies the terms to search for. Output is in + decode-dir/kws/ + Usage: + $(basename $0) " + +# Begin configuration section. +min_lmwt=8 +max_lmwt=12 +cmd=run.pl +model= +skip_scoring=false +skip_optimization=false # true can speed it up if #keywords is small. +max_states=350000 +indices_dir= +kwsout_dir= +stage=0 +word_ins_penalty=0 +extraid= +silence_word= # specify this if you did to in kws_setup.sh, it's more accurate. +strict=false +duptime=0.6 +ntrue_scale=1.0 +nbest=-1 +max_silence_frames=50 +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e +set -o pipefail + + +if [[ "$#" -ne "3" ]] ; then + echo -e "$0: FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + +silence_opt= + +langdir=$1 +datadir=$2 +decodedir=$3 + +if [ -z $extraid ] ; then + kwsdatadir=$datadir/kws +else + kwsdatadir=$datadir/kwset_${extraid} +fi + +if [ -z $extraid ] ; then + kwsoutdir=$decodedir/kws +else + kwsoutdir=$decodedir/kwset_${extraid} +fi + + +if [ -z $indices_dir ]; then + indices_dir=$kwsoutdir +fi + +if [ ! -z "$model" ]; then + model_flags="--model $model" +else + model_flags= +fi + +mkdir -p $kwsoutdir +for d in "$datadir" "$kwsdatadir" "$langdir" "$decodedir"; do + if [ ! -d "$d" ]; then + echo "$0: FATAL: expected directory $d to exist" + exit 1; + fi +done + +echo "$0: Searching: $kwsdatadir" +duration=$(cat $kwsdatadir/trials) +echo "$0: Duration: $duration" + + +frame_subsampling_factor=1 +if [ -f $decodedir/../frame_subsampling_factor ] ; then + frame_subsampling_factor=$(cat $decodedir/../frame_subsampling_factor) + echo "$0: Frame subsampling factor autodetected: $frame_subsampling_factor" +elif [ -f $decodedir/../../frame_subsampling_factor ] ; then + frame_subsampling_factor=$(cat $decodedir/../../frame_subsampling_factor) + echo "$0: Frame subsampling factor autodetected: $frame_subsampling_factor" +fi + +if [ $stage -le 0 ] ; then + if [ ! -f $indices_dir/.done.index ] ; then + [ ! -d $indices_dir ] && mkdir $indices_dir + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + indices=${indices_dir}_$lmwt + mkdir -p $indices + + acwt=$(perl -e "print 1.0/$lmwt") + [ ! -z $silence_word ] && silence_opt="--silence-word $silence_word" + steps/make_index.sh $silence_opt --cmd "$cmd" --acwt $acwt $model_flags\ + --skip-optimization $skip_optimization --max-states $max_states \ + --word-ins-penalty $word_ins_penalty --max-silence-frames $max_silence_frames\ + --frame-subsampling-factor ${frame_subsampling_factor} \ + $kwsdatadir $langdir $decodedir $indices || exit 1 + done + touch $indices_dir/.done.index + else + echo "$0: Assuming indexing has been aready done. If you really need to re-run " + echo "$0: the indexing again, delete the file $indices_dir/.done.index" + fi +fi + +keywords=$kwsdatadir/keywords.fsts +if [ -f $keywords ] ; then + echo "$0: Using ${keywords} for search" + keywords="ark:$keywords" +elif [ -f ${keywords}.gz ] ; then + echo "$0: Using ${keywords}.gz for search" + keywords="ark:gunzip -c ${keywords}.gz |" +else + echo "$0: The keyword file ${keywords}[.gz] does not exist" +fi + + +if [ $stage -le 1 ]; then + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + kwsoutput=${kwsoutdir}_$lmwt + indices=${indices_dir}_$lmwt + nj=$(cat $indices/num_jobs) + + + for f in $indices/index.1.gz ; do + [ ! -f $f ] && echo "$0: no such file $f" && exit 1; + done + + mkdir -p $kwsoutput/log + $cmd JOB=1:$nj $kwsoutput/log/search.JOB.log \ + set -e -o pipefail '&&' \ + kws-search --strict=$strict --negative-tolerance=-1 \ + --frame-subsampling-factor=${frame_subsampling_factor} \ + "ark:gzip -cdf $indices/index.JOB.gz|" "$keywords" \ + "ark,t:| sort -u | gzip -c > $kwsoutput/result.JOB.gz" \ + "ark,t:| sort -u | gzip -c > $kwsoutput/stats.JOB.gz" || exit 1; + done +fi + +if [ $stage -le 2 ]; then + for lmwt in $(seq $min_lmwt $max_lmwt) ; do + kwsoutput=${kwsoutdir}_$lmwt + indices=${indices_dir}_$lmwt + nj=$(cat $indices/num_jobs) + + # This is a memory-efficient way how to do the filtration + # we do this in this way because the result.* files can be fairly big + # and we do not want to run into troubles with memory + files="" + for job in $(seq 1 $nj); do + if [ -f $kwsoutput/result.${job}.gz ] ; then + files="$files <(gunzip -c $kwsoutput/result.${job}.gz)" + elif [ -f $kwsoutput/result.${job} ] ; then + files="$files $kwsoutput/result.${job}" + else + echo >&2 "The file $kwsoutput/result.${job}[.gz] does not exist" + exit 1 + fi + done + # we have to call it using eval as we need the bash to interpret + # the (possible) command substitution in case of gz files + # bash -c would probably work as well, but would spawn another + # shell instance + eval "sort -m -u $files" |\ + local/search/filter_kws_results.pl --likes --nbest $nbest > $kwsoutput/results || exit 1 + done +fi + +if [ -z $extraid ] ; then + extraid_flags= +else + extraid_flags=" --extraid ""$extraid"" " +fi + +if [ $stage -le 4 ]; then + if $skip_scoring ; then + echo "$0: Not scoring, because --skip-scoring true was issued" + elif [ ! -x local/kws_score.sh ] ; then + echo "$0: Not scoring, because the file local/kws_score.sh is not present" + else + echo "$0: Scoring KWS results" + local/search/score.sh --cmd "$decode_cmd" \ + --min-lmwt $min_lmwt --max-lmwt $max_lmwt $extraid_flags \ + $langdir $datadir ${kwsoutdir} || exit 1; + fi +fi + +echo "$0: Done" +exit 0 + diff --git a/egs/babel/s5d/local/search/setup.sh b/egs/babel/s5d/local/search/setup.sh new file mode 100755 index 00000000000..d4e2013a443 --- /dev/null +++ b/egs/babel/s5d/local/search/setup.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +flen=0.01 +icu_transform="Any-Lower" +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +if [ $# -eq 6 ]; then + ecf=$1 + rttm=$2 + kwlist=$3 + data=$4 + lang=$5 + output=$6 +elif [ $# -eq 5 ]; then + ecf=$1 + rttm="" + kwlist=$2 + data=$3 + lang=$4 + output=$5 +else + echo >&2 "Incorrect number of script parameters!" +fi + +mkdir -p $output +for f in $ecf $kwlist; do + [ ! -f $f ] && echo "Mandatory file \"$f\" does not exist." +done + + +# The first way how to compute the duration produced numbers significantly +# dufferent from the numbers reported by F4DE. I'm leaving it here to document +# the fact that the signal_duration field is not the same number as the sum +# of the individual durations (dur field in each ) +#duration=`head -n 1 $ecf | sed 's/.*signal_duration=\"\([0-9.][0-9.]*\)\".*/\1/g'` +#duration=`echo print $duration/2.0 | perl` + +duration=$(cat $ecf | perl -ne 'BEGIN{$dur=0;}{next unless $_ =~ /dur\=/; s/.*dur="([^"]*)".*/$1/; $dur+=$_;}END{print $dur/2}') + +echo $duration > $output/trials +echo $flen > $output/frame_length + +echo "Number of trials: `cat $output/trials`" +echo "Frame lengths: `cat $output/frame_length`" + +echo "Generating map files" +cat $data/segments | awk 'BEGIN{i=1}; {print $1, i; i+=1;}' > $output/utt.map +cat $data/wav.scp | awk 'BEGIN{i=1}; {print $1, i; i+=1;}' > $output/wav.map + +#This does not work cp --no-preserve=all $ecf $output/ecf.xml +cat $ecf > $output/ecf.xml +cat $kwlist > $output/kwlist.xml +[ ! -z "$rttm" ] && cat $rttm > $output/rttm + +{ + echo "kwlist_name=`basename $kwlist`" + language=$(grep kwlist $kwlist | head -n 1 | sed -E 's/.*language="([^"]*)".*/\1/g') + echo "language=$language" + echo "flen=$flen" +} > $output/f4de_attribs + +cat ${kwlist} | \ + perl -ne '{ + chomp; + next unless (m// || m/kwid/); + if ($_ =~ m//) { + s/.*(.*)<\/kwtext>.*/$1/g; + die "Undefined format of the kwlist file!" unless defined $kwid; + print $kwid . "\t" . $_ . "\n"; } + else { + s/.*kwid="(.*)".*/$1/g; $kwid=$_;}; + }' > $output/keywords.txt + + +command -v uconv >/dev/null 2>&1 || { + echo >&2 "I require uconv but it's not installed. Use $KALDI_ROOT/tools/extras/install_icu.sh to install it (or use the system packager)"; + exit 1; +} + +if [ -z "$icu_transform" ]; then + cp $lang/words.txt $output/words.txt +else + uconv -f utf8 -t utf8 -x "${icu_transform}" -o $output/words.txt $lang/words.txt +fi + +if [ -z "$icu_transform" ]; then + cat $output/keywords.txt +else + paste <(cut -f 1 $output/keywords.txt ) \ + <(cut -f 2 $output/keywords.txt | \ + uconv -f utf8 -t utf8 -x "${icu_transform}" ) +fi | local/kwords2indices.pl --map-oov 0 $output/words.txt |\ + sort -u > $output/keywords.int + + +echo "Generating categories" +{ + local/search/create_categories.pl $output/keywords.txt + cat $output/keywords.int | perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] OOV=1\n";} + else { print "$F[0] OOV=0\n";}' +} | local/search/normalize_categories.pl > $output/categories + +if [ ! -z "$rttm" ] && [ -f $rttm ] ; then + local/search/rttm_to_hitlists.sh --segments $data/segments --utt-table $output/utt.map\ + $rttm $kwlist $ecf $output/tmp $output/hitlist +else + echo "Not generating hitlist, scoring won't be possible" +fi +echo "Done" + + diff --git a/egs/babel/s5d/local/search/utt_to_files.pl b/egs/babel/s5d/local/search/utt_to_files.pl new file mode 100755 index 00000000000..ad5da8a50bf --- /dev/null +++ b/egs/babel/s5d/local/search/utt_to_files.pl @@ -0,0 +1,62 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +# Converts the kws result with utterances into whole file offsets +use strict; +use warnings; +use utf8; +use Data::Dumper; +use Getopt::Long; + +my $flen = 0.01; + +GetOptions ("flen=f" => \$flen) or die "$0: Cannot parse command-line options\n"; + +my $segments=$ARGV[0]; +my %SEGMENTS; + +open(SEG, $segments) or die "Cannot open segment file $segments"; +while(my $line = ) { + chomp $line; + my @entries = split(" ", $line); + die "The format of line \"$line\" does not conform the the segments file format" if @entries ne 4; + + $SEGMENTS{$entries[0]} = \@entries; +} + + +while (my $line = ) { + chomp $line; + my @entries = split(" ", $line); + die "The format of line \"$line\" does not conform the result.* file format" if @entries ne 5; + + my $kw = $entries[0]; + my $utt = $entries[1]; + my $start = $entries[2]; + my $end = $entries[3]; + my $score = $entries[4]; + + die "The utterance $utt is not in the segments file" unless exists $SEGMENTS{$utt}; + my $file = $SEGMENTS{$utt}->[1]; + my $utt_start = int( 0.5 + $SEGMENTS{$utt}->[2] / $flen); + my $utt_end = int(0.5 + $SEGMENTS{$utt}->[3] / $flen); + + $start += $utt_start; + $end += $utt_start; + print "$kw $file $start $end $score\n"; +} diff --git a/egs/babel/s5d/local/search/write_kwslist.pl b/egs/babel/s5d/local/search/write_kwslist.pl new file mode 100755 index 00000000000..ade87212829 --- /dev/null +++ b/egs/babel/s5d/local/search/write_kwslist.pl @@ -0,0 +1,134 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +my $Usage = < kwslist.xml + +Allowed options: + --flen : duration (in seconds) of audio/feature frame + --language : language (string, default "") + --kwlist-id : kwlist.xml name (string, default "") + --system-id : name of the system (string, default "") + --digits : how many digits should the scores be rounded to? + (int, default 2). Sometimes F4DE gets extremely slow + when the scores have too many digits (perhaps some sweping + issue). This switch can be used to prevent it. +EOU + +use strict; +use warnings; +use utf8; + +use POSIX; +use Data::Dumper; +use Getopt::Long; + +my $flen = 0.01; +my $language=""; +my $kwlist_filename=""; +my $system_id=""; +my $digits = 2; + +GetOptions("flen=f" => \$flen, + "language=s" => \$language, + "kwlist-id=s" => \$kwlist_filename, + "system-id=s" => \$system_id, + "digits=i" => \$digits) or do { + print STDERR "Cannot parse the command-line options.\n"; + print STDERR "$Usage\n"; + die "Cannot continue.\n"; +}; + +if (@ARGV != 0) { + print STDERR "Incorrect number of command-line arguments\n"; + print STDERR "$Usage\n"; + die "Cannot continue.\n"; +} + +sub KwsOutputSort { + my $a = shift @_; + my $b = shift @_; + + if ($a->[4] != $b->[4]) { + #score + return $b->[4] <=> $a->[4]; + } elsif ($a->[1] ne $b->[1]) { + return $a->[1] cmp $b->[1]; + } else { + return $a->[2] <=> $b->[2]; + } +} + +sub PrettyPrint { + my @instances = sort {KwsOutputSort($a, $b)} @{shift @_}; + + return if @instances <= 0; + my $kwid=$instances[0]->[0]; + + print " \n"; + foreach my $elem(@instances) { + (my $kwidx, my $file, my $start, my $end, my $score) = @{$elem}; + my $filename="file=\"$file\""; + + # this is because the decision has to be done on the already + # rounded number (otherwise it can confuse F4DE. + # It's because we do the decision based on the non-rounded score + # but F4DE will see only the rounded score, so the decision + # won't be correctly aligned with the score (especially, for + # some numbers with score 0.5 the decision will be "YES" and for + # other with the same score, the decision will be "NO" + $score = sprintf "%.${digits}f", $score; + my $decision=$score >= 0.5 ? "decision=\"YES\"" : "decision=\"NO\""; + my $tbeg = $start * $flen; + my $dur = $end * $flen - $tbeg; + + $tbeg=sprintf "tbeg=\"%.${digits}f\"", $tbeg; + $dur=sprintf "dur=\"%.${digits}f\"", $dur; + $score=sprintf "score=\"%.${digits}f\"", $score; + my $channel="channel=\"1\""; + + print " \n"; + } + print " \n"; +} + +my $KWID=""; +my @putative_hits; + +print "\n"; + +while (my $line = ) { + chomp $line; + (my $kwid, my $file, my $start, my $end, my $score) = split " ", $line; + + if ($kwid ne $KWID) { + PrettyPrint(\@putative_hits) if $KWID; + $KWID=$kwid; + @putative_hits = (); + } + + push @putative_hits, [$kwid, $file, $start, $end, $score]; + +} +PrettyPrint(\@putative_hits) if $KWID; + +print "\n" diff --git a/egs/babel/s5d/local/search_index.sh b/egs/babel/s5d/local/search_index.sh new file mode 100755 index 00000000000..9e7cdb77f3d --- /dev/null +++ b/egs/babel/s5d/local/search_index.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0 + +# Begin configuration section. +cmd=run.pl +nbest=-1 +strict=true +indices_dir= +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [ $# != 2 ]; then + echo "Usage: steps/search_index.sh [options] " + echo " e.g.: steps/search_index.sh data/kws exp/sgmm2_5a_mmi/decode/kws/" + echo "" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --nbest # return n best results. (-1 means all)" + echo " --indices-dir # where the indices should be stored, by default it will be in " + exit 1; +fi + + +kwsdatadir=$1; +kwsdir=$2; + +if [ -z $indices_dir ] ; then + indices_dir=$kwsdir +fi + +mkdir -p $kwsdir/log; +nj=`cat $indices_dir/num_jobs` || exit 1; +keywords=$kwsdatadir/keywords.fsts; + +for f in $indices_dir/index.1.gz $keywords; do + [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1; +done + +$cmd JOB=1:$nj $kwsdir/log/search.JOB.log \ + kws-search --strict=$strict --negative-tolerance=-1 \ + "ark:gzip -cdf $indices_dir/index.JOB.gz|" ark:$keywords \ + "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id | sort -u | gzip > $kwsdir/result.JOB.gz" \ + "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id | sort -u | gzip > $kwsdir/stats.JOB.gz" || exit 1; + +exit 0; diff --git a/egs/babel/s5d/local/setup_categories.sh b/egs/babel/s5d/local/setup_categories.sh new file mode 100644 index 00000000000..ffc65173786 --- /dev/null +++ b/egs/babel/s5d/local/setup_categories.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +# End configuration section +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +set=kwlist +output=data/dev10h.pem/kwset_${set}/ + +{ + local/search/create_categories.pl $output/keywords.txt + cat $output/keywords.int | perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] OOV=1\n";} + else { print "$F[0] OOV=0\n";}' +} | local/search/normalize_categories.pl > $output/categories +cut -f 1 data/local/filtered_lexicon.txt | uconv -f utf8 -t utf8 -x Any-Lower | sort -u | \ + nl | awk '{print $2, $1;}' > data/dev10h.pem/kwset_${set}/base_words.txt + paste <(cut -f 1 data/dev10h.pem/kwset_${set}/keywords.txt ) \ + <(cut -f 2 data/dev10h.pem/kwset_${set}/keywords.txt | \ + uconv -f utf8 -t utf8 -x Any-Lower ) | \ + local/kwords2indices.pl --map-oov 0 data/dev10h.pem/kwset_${set}/base_words.txt |\ + perl -ane ' + if (grep (/^0$/, @F[1..$#F])) {print "$F[0] BaseOOV=1\n";} + else { print "$F[0] BaseOOV=0\n";}' |\ + cat - data/dev10h.pem/kwset_${set}/categories | sort -u |\ + local/search/normalize_categories.pl > data/dev10h.pem/kwset_${set}/categories.2 + mv data/dev10h.pem/kwset_${set}/categories data/dev10h.pem/kwset_${set}/categories.bak + mv data/dev10h.pem/kwset_${set}/categories.2 data/dev10h.pem/kwset_${set}/categories + +cp data/dev10h.pem/kwset_kwlist/categories data/dev10h.phn.pem/kwset_kwlist/categories +cp data/dev10h.pem/kwset_kwlist/categories data/dev10h.syll.pem/kwset_kwlist/categories +find exp/ -name ".done.kwset.kwlist" | xargs rm + diff --git a/egs/babel/s5d/local/shadow_set_kws_search.sh b/egs/babel/s5d/local/shadow_set_kws_search.sh new file mode 100755 index 00000000000..a67a3a57f6a --- /dev/null +++ b/egs/babel/s5d/local/shadow_set_kws_search.sh @@ -0,0 +1,265 @@ +#!/bin/bash + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal) +# Apache 2.0. + +#Fail at any unhandled non-zero error code +set -e +set -o pipefail + +help_message="$0: create subset of the input directory (specified as the first directory). + The subset is specified by the second parameter. + The directory in which the subset should be created is the third parameter + Example: + $0 [data-dir2 [data-dir3 [ ...] ]" + +# Begin configuration section. +#acwt=0.0909091 +min_lmwt=7 +max_lmwt=17 +duptime=0.6 +cmd=run.pl +model= +skip_scoring=false +stage=0 +strict=true +skip_optimization=false +max_states=150000 +word_ins_penalty=0 +index_only=false +ntrue_scale=0.1 +# End configuration section. + +echo "$0 $@" # Print the command line for logging +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +if [[ "$#" -le "2" ]] ; then + echo -e "FATAL: wrong number of script parameters!\n\n" + printf "$help_message\n\n" + exit 1; +fi + + +datadir=$1 +langdir=$2 +decodedir=$3 +shift; shift; shift; +datasetA=$1 +datasetB=$2 + + +if [[ ! -d "$langdir" ]] ; then + echo "FATAL: the lang directory does not exist" + exit 1; +fi +if [[ ! -d "$decodedir" ]] ; then + echo "FATAL: the directory with decoded files does not exist" + exit 1; +fi + +for splitdatadir in $@ ; do + kwsdatadir=$splitdatadir/kws + if [ ! -d "$splitdatadir" ] ; then + echo "FATAL: the data directory $splitdatadir does not exist" + exit 1; + fi + if [ ! -d "$kwsdatadir" ] ; then + echo "FATAL: the data directory $kwsdatadir does not exist" + exit 1; + fi + if [ ! -f "$kwsdatadir/ecf.xml" ] ; then + echo "FATAL: the $kwsdatadir does not contain the ecf.xml file" + exit 1; + fi +done + +kwsdatadir=$datadir/kws + +! durationA=`head -1 $datasetA/kws/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` && + echo "Error getting duration from $datasetA/kws/ecf.xml" && exit 1; + + +! durationB=`head -1 $datasetB/kws/ecf.xml |\ + grep -o -E "duration=\"[0-9]*[ \.]*[0-9]*\"" |\ + perl -e 'while($m=<>) {$m=~s/.*\"([0-9.]+)\".*/\1/; print $m/2;}'` && + echo "Error getting duration from $datasetB/kws/ecf.xml" && exit 1; + +[ -z $durationA ] && echo "Error getting duration from $datasetA/kws/ecf.xml" && exit 1; +[ -z $durationB ] && echo "Error getting duration from $datasetB/kws/ecf.xml" && exit 1; + +if [ ! -z "$model" ]; then + model_flags="--model $model" +fi + +mkdir -p $decodedir/kws/ +if [ $stage -le 0 ] ; then + echo "Making KWS indices..." + if [ ! -f $decodedir/kws/.done.index ] ; then + for lmwt in `seq $min_lmwt $max_lmwt` ; do + kwsoutdir=$decodedir/kws_$lmwt + mkdir -p $kwsoutdir + + acwt=`perl -e "print (1.0/$lmwt);"` + steps/make_index.sh --strict $strict --cmd "$cmd" --max-states $max_states\ + --acwt $acwt $model_flags --skip-optimization $skip_optimization \ + --word_ins_penalty $word_ins_penalty \ + $kwsdatadir $langdir $decodedir $kwsoutdir || exit 1 + done + touch $decodedir/kws/.done.index + else + echo "Assuming indexing has been aready done. If you really need to re-run " + echo "the indexing again, delete the file $decodedir/kws/.done.index" + fi +fi + +if $index_only ; then + echo "Indexing only was requested, existing now..." + exit 0 +fi + +if [ $stage -le 1 ] ; then + echo "Searching KWS indices..." + for lmwt in `seq $min_lmwt $max_lmwt` ; do + kwsoutdir=$decodedir/kws_$lmwt + dirA=$decodedir/`basename $datasetA`/kws_$lmwt + dirB=$decodedir/`basename $datasetB`/kws_$lmwt + mkdir -p $dirA + mkdir -p $dirB + + steps/search_index.sh --cmd "$cmd" $kwsdatadir $kwsoutdir || exit 1 + + [ ! -f $datasetA/kws/utter_id ] && echo "File $datasetA/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-A " | \ + sed 's/^\(KW.*\)-A /\1 /g' > $dirA/results + + [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetB/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-B " | \ + sed 's/^\(KW.*\)-B /\1 /g' > $dirB/results + + + dirA=$decodedir/`basename $datasetA`_`basename $datasetB`/kws_$lmwt + dirB=$decodedir/`basename $datasetB`_`basename $datasetA`/kws_$lmwt + mkdir -p $dirA + mkdir -p $dirB + [ ! -f $datasetA/kws/utter_id ] && echo "File $datasetA/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetA/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-B " | \ + sed 's/^\(KW.*\)-B /\1 /g' > $dirA/results + + [ ! -f $datasetB/kws/utter_id ] && echo "File $datasetB/kws/utter_id must exist!" && exit 1; + cat $kwsoutdir/result.* | \ + grep -F -f <(cut -f 1 -d ' ' $datasetB/kws/utter_id ) |\ + grep "^KW[-a-zA-Z0-9]*-A " | \ + sed 's/^\(KW.*\)-A /\1 /g' > $dirB/results + done +fi + +rootdirA=$decodedir/`basename $datasetA` +rootdirB=$decodedir/`basename $datasetB` +rootdirAB=$decodedir/`basename $datasetA`_`basename $datasetB` +rootdirBA=$decodedir/`basename $datasetB`_`basename $datasetA` + + +echo "Processing $datasetA" +if [ $stage -le 2 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirA/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=true --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map --digits=3 - $rootdirA/kws_LMWT/kwslist.xml || exit 1 + + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirAB/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=true --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map --digits=3 - $rootdirAB/kws_LMWT/kwslist.xml || exit 1 +fi + +if [ $stage -le 3 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirA/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirA/kws_LMWT/kwslist.unnormalized.xml || exit 1 + + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirAB/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationA \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirAB/kws_LMWT/kwslist.unnormalized.xml || exit 1 +fi + +echo "Scoring $datasetA" +if [ $stage -le 4 ] ; then + if [[ (! -x local/kws_score.sh ) || ($skip_scoring == true) ]] ; then + echo "Not scoring, because the file local/kws_score.sh is not present" + exit 1 + elif [ ! -f $datasetA/kws/rttm ] ; then + echo "Not scoring, because the file $datasetA/kws/rttm is not present" + else + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirA/kws/kws_scoring.LMWT.log \ + local/kws_score.sh $datasetA $rootdirA/kws_LMWT + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirAB/kws/kws_scoring.LMWT.log \ + local/kws_score.sh --kwlist $datasetB/kws/kwlist.xml $datasetA $rootdirAB/kws_LMWT + fi +fi + +echo "Processing $datasetB" +if [ $stage -le 5 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirB/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirB/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=true --digits=3 --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirB/kws_LMWT/kwslist.xml || exit 1 + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirBA/kws/kws_write_normalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirBA/kws_LMWT/results \| \ + utils/write_kwslist.pl --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=true --digits=3 --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirBA/kws_LMWT/kwslist.xml || exit 1 +fi + +if [ $stage -le 6 ] ; then + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirB/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirB/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirB/kws_LMWT/kwslist.unnormalized.xml || exit 1 + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirBA/kws/kws_write_unnormalized.LMWT.log \ + set -e';' set -o pipefail';' \ + cat $rootdirBA/kws_LMWT/results \| \ + utils/write_kwslist.pl --Ntrue-scale=$ntrue_scale --flen=0.01 --duration=$durationB \ + --segments=$datadir/segments --normalize=false --remove-dup=true\ + --map-utter=$kwsdatadir/utter_map - $rootdirBA/kws_LMWT/kwslist.unnormalized.xml || exit 1 +fi + +echo "Scoring $datasetB" +if [ $stage -le 7 ] ; then + if [[ (! -x local/kws_score.sh ) || ($skip_scoring == true) ]] ; then + echo "Not scoring, because the file local/kws_score.sh is not present" + elif [ ! -f $datasetB/kws/rttm ] ; then + echo "Not scoring, because the file $datasetB/kws/rttm is not present" + else + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirB/kws/kws_scoring.LMWT.log \ + local/kws_score.sh $datasetB $rootdirB/kws_LMWT || exit 1 + $cmd LMWT=$min_lmwt:$max_lmwt $rootdirBA/kws/kws_scoring.LMWT.log \ + local/kws_score.sh --kwlist $datasetA/kws/kwlist.xml $datasetB $rootdirBA/kws_LMWT || exit 1 + fi +fi + +echo "Done, everything seems fine" +exit 0 diff --git a/egs/babel/s5d/local/show_lattice.sh b/egs/babel/s5d/local/show_lattice.sh new file mode 100755 index 00000000000..f18132234ee --- /dev/null +++ b/egs/babel/s5d/local/show_lattice.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +. path.sh + +format=pdf # pdf svg +output= + +. utils/parse_options.sh + +if [ $# != 3 ]; then + echo "usage: $0 [--format pdf|svg] [--output ] " + echo "e.g.: $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt" + exit 1; +fi + +uttid=$1 +lat=$2 +words=$3 + +tmpdir=$(mktemp -d /tmp/kaldi.XXXX); trap "rm -r $tmpdir" EXIT # cleanup + +gunzip -c $lat | lattice-to-fst ark:- ark,scp:$tmpdir/fst.ark,$tmpdir/fst.scp || exit 1 +! grep "^$uttid " $tmpdir/fst.scp && echo "ERROR : Missing utterance '$uttid' from gzipped lattice ark '$lat'" && exit 1 +fstcopy "scp:grep '^$uttid ' $tmpdir/fst.scp |" "scp:echo $uttid $tmpdir/$uttid.fst |" || exit 1 +fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format} + +if [ ! -z $output ]; then + cp $tmpdir/$uttid.${format} $output +fi + +[ $format == "pdf" ] && evince $tmpdir/$uttid.pdf +[ $format == "svg" ] && eog $tmpdir/$uttid.svg + +exit 0 diff --git a/egs/babel/s5d/local/split_ctms.sh b/egs/babel/s5d/local/split_ctms.sh new file mode 100755 index 00000000000..b24a1380111 --- /dev/null +++ b/egs/babel/s5d/local/split_ctms.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +# begin configuration section. +min_lmwt=7 +max_lmwt=17 +stage=0 +cer=0 +ctm_name= +cmd=run.pl +#end configuration section. + +echo "$0 $@" + +[ -f ./path.sh ] && . ./path.sh +[ -f ./cmd.sh ] && . ./cmd.sh +. parse_options.sh || exit 1; + +set -e +set -o pipefail + +data=$1; +q=$2; +shift; shift; + +if [ -z $ctm_name ] ; then + ctm_name=`basename $data`; +fi + +name=$ctm_name + +for i in $@ ; do + p=$q/`basename $i` + [ ! -f $i/reco2file_and_channel ] && "The file reco2file_and_channel not present in the $i directory!" && exit 1 + for lmw in $q/score_* ; do + test -d $lmw || exit 1; #this is to protect us before creating directory "score_*" in cases no real score_[something] directory exists + d=$p/`basename $lmw` + mkdir -p $d + + [ ! -f $lmw/$name.ctm ] && echo "File $lmw/$name.ctm does not exist!" && exit 1 + utils/filter_scp.pl <(cut -f 1 -d ' ' $i/reco2file_and_channel) $lmw/$name.ctm > $d/`basename $i`.ctm + done + + if [ -f $i/stm ] ; then + local/score_stm.sh --min-lmwt $min_lmwt --max-lmwt $max_lmwt --cer $cer --cmd "$cmd" $i data/lang $p + else + echo "Not running scoring, file $i/stm does not exist" + fi + +done +exit 0 + diff --git a/egs/babel/s5d/local/stm2text.pl b/egs/babel/s5d/local/stm2text.pl new file mode 100755 index 00000000000..3b069c63554 --- /dev/null +++ b/egs/babel/s5d/local/stm2text.pl @@ -0,0 +1,43 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0. + +#This script takes the source STM file and generates the *.txt files which +#are usually part of the BABEL delivery +#The *.txt files are not the part of the delivery for the evalpart1 subset +#The program works as a filter and the only parameter it expects is +#the path to the output directory +#The filenames are figured out from the STM file +#example of usage: +# cat data/evalpart1/stm local/stm2text.pl data/raw_evalpart1_data/transcriptions + +use strict; +use warnings; + +use utf8; +use Data::Dumper; + +binmode(STDIN, ":encoding(utf8)"); +binmode(STDOUT, ":encoding(utf8)"); + +my $output_dir = $ARGV[0]; +my $prev_filename = ""; +my $OUTPUT; +while ( ) { + chop; + my ($filename, $channel, $speaker, $start, $end, $text) = split(" ", $_, 6); + next if ( $filename =~ /;;.*/ ); + #$filename =~ s/;;(.*)/$1/ if ( $filename =~ /;;.*/ ); + $text = "" if not $text; + + if ( $prev_filename ne $filename ) { + #close($OUTPUT) if ( tell(FH) != -1 ); + print "$output_dir/$filename.txt\n"; + open($OUTPUT, ">:encoding(UTF-8)", "$output_dir/$filename.txt") or die $!; + $prev_filename = $filename; + } + + print $OUTPUT "[$start]\n"; + print $OUTPUT "$text\n"; +} diff --git a/egs/babel/s5d/local/subset_atwv.pl b/egs/babel/s5d/local/subset_atwv.pl new file mode 100755 index 00000000000..ce6b7043116 --- /dev/null +++ b/egs/babel/s5d/local/subset_atwv.pl @@ -0,0 +1,120 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) +# Apache 2.0. +# + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + e.g.: subset_atwv.pl keywords.list bsum.txt + +This script will compute the ATWV for a subset of the original keywords in bsum.txt. +Note that bsum.txt is a file generated by the NIST scoring tool F4DE. keywords.list +is a list of the keywords that you want to compute the ATWV for. For example: +KW101-0001 +KW101-0002 +... + +Allowed options: + --subset-name : Name of the subset (string, default = "") + --width : Width of the printed numbers (int, default = 5 ) +EOU + +my $subset_name = ""; +my $width = 5; +GetOptions('subset-name=s' => \$subset_name, + 'width=i' => \$width); + +@ARGV == 2 || die $Usage; + +# Workout the input/output source +my $kws_filename = shift @ARGV; +my $bsum_filename = shift @ARGV; + +my $source = "STDIN"; +if ($kws_filename ne "-") { + open(KWS, "<$kws_filename") || die "Fail to open keywords file: $kws_filename\n"; + $source = "KWS"; +} +open(BSUM, "<$bsum_filename") || die "Fail to open bsum file: $bsum_filename\n"; + +# Read in the keywords. +my $kws = ""; +while (<$source>) { + chomp; + my @col = split(); + @col == 1 || die "Bad line $_\n"; + if ($kws eq "") { + $kws = $col[0]; + } else { + $kws .= "|$col[0]"; + } +} + +# Process bsum.txt +my $targ_sum = 0; +my $corr_sum = 0; +my $fa_sum = 0; +my $miss_sum = 0; +my $twv_sum = 0; +my $count = 0; +my $subset_count = 0; +my $flag = 0; +if ($kws ne "") { + while () { + chomp; + # Workout the total keywords that have occurrence in the search collection + if (/^Summary Totals/) {$flag = 0;} + if (/^Keyword/) {$flag = 1;} + my @col; + if ($flag == 1) { + # Figure out keywords that don't have occurrences in the search collection + @col = split(/\|/, $_); + $col[2] =~ s/^\s+//; + $col[2] =~ s/\s+$//; + $col[2] ne "" || next; + $count ++; + } else { + next; + } + + # Only collect statistics for given subset + m/$kws/ || next; + + # Keywods that are in the given subset, and have occurrences + $targ_sum += $col[2]; + $corr_sum += $col[3]; + $fa_sum += $col[4]; + $miss_sum += $col[5]; + $twv_sum += $col[6]; + $subset_count ++; + } +} + +# Compute ATWV +my $subset_atwv = ($subset_count == 0) ? 0 : $twv_sum/$subset_count; +my $atwv = ($count == 0) ? 0 : $twv_sum/$count; +my $bp_atwv = ($count == 0) ? 0 : $subset_count/$count; + +# Format the numbers +my $format = "%-${width}d"; +$subset_count = sprintf($format, $subset_count); +$targ_sum = sprintf($format, $targ_sum); +$corr_sum = sprintf($format, $corr_sum); +$fa_sum = sprintf($format, $fa_sum); +$miss_sum = sprintf($format, $miss_sum); +$subset_atwv = sprintf("% .4f", $subset_atwv); +$atwv = sprintf("% .4f", $atwv); +$bp_atwv = sprintf("% .4f", $bp_atwv); + +# Print +if ($subset_name ne "") {print "$subset_name: ";} +print "#Keywords=$subset_count, #Targ=$targ_sum, #Corr=$corr_sum, #FA=$fa_sum, #Miss=$miss_sum, "; +print "Contributed ATWV=$atwv, Best Possible Contributed ATWV=$bp_atwv, ATWV=$subset_atwv\n"; + +if ($kws_filename ne "-") {close(KWS);} +close(BSUM); diff --git a/egs/babel/s5d/local/subset_kwslist.pl b/egs/babel/s5d/local/subset_kwslist.pl new file mode 100755 index 00000000000..361291179ef --- /dev/null +++ b/egs/babel/s5d/local/subset_kwslist.pl @@ -0,0 +1,33 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University +# Apache 2.0. +# +use strict; +use warnings; +use XML::Simple; +use Data::Dumper; + +binmode STDOUT, ":utf8"; + +my %seen; +while (my $keyword = ) { + chomp $keyword; + $seen{$keyword} = 1; +} + + +my $data = XMLin($ARGV[0], ForceArray => 1); + +#print Dumper($data->{kw}); +my @filtered_kws = (); + +foreach my $kwentry (@{$data->{kw}}) { + if (defined $seen{$kwentry->{kwid}}) { + push @filtered_kws, $kwentry; + } +} +$data->{kw} = \@filtered_kws; +my $xml = XMLout($data, RootName=> "kwlist", KeyAttr=>''); +print $xml; +exit 0 diff --git a/egs/babel/s5d/local/summarize_logs.pl b/egs/babel/s5d/local/summarize_logs.pl new file mode 100755 index 00000000000..e816d57d68f --- /dev/null +++ b/egs/babel/s5d/local/summarize_logs.pl @@ -0,0 +1,121 @@ +#!/usr/bin/env perl + +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. + +#scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl \n" && exit 1; + +sub split_hundreds { # split list of filenames into groups of 100. + my $names = shift @_; + my @A = split(" ", $names); + my @ans = (); + while (@A > 0) { + my $group = ""; + for ($x = 0; $x < 100 && @A>0; $x++) { + $fname = pop @A; + $group .= "$fname "; + } + push @ans, $group; + } + return @ans; +} + +sub parse_accounting_entry { + $entry= shift @_; + + @elems = split " ", $entry; + + $time=undef; + $threads=undef; + foreach $elem (@elems) { + if ( $elem=~ m/time=(\d+)/ ) { + $elem =~ s/time=(\d+)/$1/; + $time = $elem; + } elsif ( $elem=~ m/threads=(\d+)/ ) { + $elem =~ s/threads=(\d+)/$1/g; + $threads = $elem; + } else { + die "Unknown entry \"$elem\" when parsing \"$entry\" \n"; + } + } + + if (defined($time) and defined($threads) ) { + return ($time, $threads); + } else { + die "The accounting entry \"$entry\" did not contain all necessary attributes"; + } +} + +foreach $dir (@ARGV) { + + #$dir = $ARGV[0]; + print $dir + + ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ; + + $dir =~ s:/$::; # Remove trailing slash. + + + # Group the files into categories where all have the same base-name. + foreach $f (glob ("$dir/*.log")) { + $f_category = $f; + # do next expression twice; s///g doesn't work as they overlap. + $f_category =~ s:\.\d+\.(?!\d+):.*.:; + #$f_category =~ s:\.\d+\.:.*.:; + $fmap{$f_category} .= " $f"; + } +} + +foreach $c (sort (keys %fmap) ) { + $n = 0; + foreach $fgroup (split_hundreds($fmap{$c})) { + $n += `grep -w WARNING $fgroup | wc -l`; + } + if ($n != 0) { + print "$n warnings in $c\n" + } +} +foreach $c (sort (keys %fmap)) { + $n = 0; + foreach $fgroup (split_hundreds($fmap{$c})) { + $n += `grep -w ERROR $fgroup | wc -l`; + } + if ($n != 0) { + print "$n errors in $c\n" + } +} + +$supertotal_cpu_time=0.0; +$supertotal_clock_time=0.0; +$supertotal_threads=0.0; + +foreach $c (sort (keys %fmap)) { + $n = 0; + + $total_cpu_time=0.0; + $total_clock_time=0.0; + $total_threads=0.0; + foreach $fgroup (split_hundreds($fmap{$c})) { + $lines=`grep -P "# Accounting:? " $fgroup |sed 's/.* Accounting:* *//g'`; + + #print $lines ."\n"; + + @entries = split "\n", $lines; + + foreach $line (@entries) { + $time, $threads = parse_accounting_entry($line); + + $total_cpu_time += $time * $threads; + $total_threads += $threads; + if ( $time > $total_clock_time ) { + $total_clock_time = $time; + } + } + } + print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n"; + + $supertotal_cpu_time += $total_cpu_time; + $supertotal_clock_time += $total_clock_time; + $supertotal_threads += $total_threads; +} +print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n"; + diff --git a/egs/babel/s5d/local/syllab/ali_to_syllabs.sh b/egs/babel/s5d/local/syllab/ali_to_syllabs.sh new file mode 100755 index 00000000000..8f0cb88771a --- /dev/null +++ b/egs/babel/s5d/local/syllab/ali_to_syllabs.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh + +if [ -f ./path.sh ]; then . ./path.sh; fi + +if [ $# != 4 ]; then + echo "This script takes an ali directory and syllab lang dir and generates" + echo "syllabic transceription of the alignment" + echo "" + echo "Usage: $0 " + echo " e.g.: $0 data/train data/lang_syll exp/tri5_ali exp/tri5_ali_syll" + echo "main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) " + + exit 1; +fi + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + + +data=$1 +lang=$2 +ali=$3 +out=$4 + + +for f in real_words.txt lex.words2syllabs.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory is probably not a syllable lang dir" && \ + echo "The file $lang/$f is missing" && \ + exit 1 +done + +for f in words.txt L.fst ; do + [ ! -f $lang/$f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +for f in $ali/num_jobs $ali/final.mdl $ali/ali.1.gz ; do + [ ! -f $f ] && \ + echo "The given lang directory does not contain the $f file" && \ + exit 1 +done + +nj=$(cat $ali/num_jobs) +echo "Extracting phoneme sequences" +$cmd JOB=1:$nj $out/log/ali-to-phones.JOB.log \ + ali-to-phones $ali/final.mdl ark:"gunzip -c $ali/ali.JOB.gz|" ark:- \| \ + transcripts-to-fsts ark:- ark:$out/phones.JOB.fst || exit 1 + +echo "Composing with files in $lang to get syllable sequences" +$cmd JOB=1:$nj $out/log/get-syll-text.JOB.log \ + cat $data/split$nj/JOB/text \| sym2int.pl -f 2- --map-oov '\' $lang/real_words.txt \| \ + transcripts-to-fsts ark,t:- ark:- \|\ + fsttablecompose $lang/lex.words2syllabs.fst ark:- ark:-\| \ + fsts-project ark:- ark:-\| \ + fsttablecompose $lang/L.fst ark:- ark:- \|\ + fsttablecompose ark:$out/phones.JOB.fst ark:- ark:- \| \ + fsts-to-transcripts ark:- ark,t:"|int2sym.pl -f 2- $lang/words.txt > $out/text.JOB" +cat $out/text.* | sort > $out/text + +echo "Done" + diff --git a/egs/babel/s5d/local/syllab/create_syll_datadir.sh b/egs/babel/s5d/local/syllab/create_syll_datadir.sh new file mode 100755 index 00000000000..4c014285619 --- /dev/null +++ b/egs/babel/s5d/local/syllab/create_syll_datadir.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +help_message="Converts normal (with word level transcriptions) into syllabic\nExpects 4 parameters:\n" +# Begin configuration section. +boost_sil=1.0 +cmd=run.pl +nj=4 +# End configuration section +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh +. ./path.sh + +if [ $# -ne 4 ] ; then + echo "$#" + echo -e "$help_message" + return 1; +fi + +input=$1 +word_lang=$2 +syll_lang=$3 +output=$4 + +[ ! -f exp/tri5/final.mdl ] && \ + echo "File exp/tri5/final.mdl must exist" && exit 1; + +[ ! -d $input/split$nj ] && utils/split_data.sh $input $nj + +utils/copy_data_dir.sh $input $output +touch $output/.plp.done +touch $output/.done + +if [ -f $input/text ] ; then + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $nj --cmd "$cmd" \ + $input $word_lang exp/tri5 exp/tri5_ali/align_$(basename $input) + + local/syllab/ali_to_syllabs.sh \ + --cmd "$cmd" \ + $input $syll_lang exp/tri5_ali/align_$(basename $input) \ + exp/tri5_ali_syll/align_$(basename $output) + + cp exp/tri5_ali_syll/align_$(basename $output)/text $output/text +fi + +exit 0 + + + diff --git a/egs/babel/s5d/local/syllab/create_syllables.pl b/egs/babel/s5d/local/syllab/create_syllables.pl new file mode 100755 index 00000000000..29a0a67dc8d --- /dev/null +++ b/egs/babel/s5d/local/syllab/create_syllables.pl @@ -0,0 +1,154 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 Johns Hopkins University (Author: Yenda Trmal) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use Getopt::Long; +use Data::Dumper; + +my $with_probs; +my $position_independent_phones; + +GetOptions("with-probs" => \$with_probs, + "position-independent-phones" => \$position_independent_phones +); + +my %SYLLS; +my %LEXICON; + +while (my $line = ) { + chomp $line; + my $word; my $prob; my $pron; + if ($with_probs) { + ($word, $prob, $pron) = split(" ", $line, 3); + } else { + ($word, $pron) = split(" ", $line, 2); + } + my @syllabs = split(/\s*\t\s*/, $pron); + + my $pronlen= scalar @syllabs; + my @extended_syllabs; + if (( $syllabs[0] =~ /x\<.*\>/) || ($word eq "SIL")) { + $SYLLS{$pron} +=1; + push @extended_syllabs, $pron; + } elsif ($pronlen == 1) { + my $syl; + my @phones=split " ", $syllabs[0]; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if (scalar(@phones) == 1 ) { + $syl = "$phones[0]_S"; + } else { + $phones2[0] = $phones[0] . "_B" unless $position_independent_phones; + $phones2[-1] = $phones[-1] ."_E" unless $position_independent_phones; + $syl = join(" ", @phones2); + } + } + $SYLLS{$syl} += 1; + push @extended_syllabs, $syl; + } else { + for (my $i = 0; $i lt $pronlen; $i+=1) { + my $syl; + my @phones=split " ", $syllabs[$i]; + my $first_index = 0; + my $last_index = scalar(@phones)-1; + + if ($position_independent_phones) { + $syl = join(" ", @phones); + } else { + my @phones2 = map { $_ . "_I" } @phones; + + if ($i == 0) { + $phones2[$first_index] = $phones[$first_index] . "_B"; + } elsif ( $i == ($pronlen - 1)) { + $phones2[$last_index] = $phones[$last_index] . "_E"; + } + $syl = join(" ", @phones2); + } + + push @extended_syllabs, $syl; + $SYLLS{$syl} += 1; + } + } + push @{$LEXICON{$word}}, \@extended_syllabs; +} + + +my %VOCAB; +my %COUNTS; +my %REV_VOCAB; +foreach my $syl (keys %SYLLS) { + my $seq=1; + my $word=$syl; + $word =~ s/_[^\s]*//g; + $word =~ s/ //g; + $word =~ s/[^a-zA-Z0-9<>-|\/]//g; + + my $wordx=$word; + $wordx .= "#$seq"; + while (exists $COUNTS{$wordx}) { + $seq += 1; + $wordx = "$word#$seq"; + } + + $COUNTS{$wordx} += $SYLLS{$syl}; + push @{$VOCAB{$wordx}}, $syl; + $REV_VOCAB{$syl} = $wordx; +} + +open(my $lex_f, "|sort -u > $ARGV[0]") or +die "Cannot open the file\"$ARGV[0]\" for writing"; + +foreach my $word (keys %VOCAB) { + print $lex_f "$word\t" . join("\t", @{$VOCAB{$word}}) . "\n"; +} + +close($lex_f); + +open(my $word2syll_f, "|sort -u > $ARGV[1]") or +die "Cannot open the file\"$ARGV[1]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + my @pron_in_syllabs; + foreach my $syl (@{$pron}) { + die "In word $word, pronunciation $pron: syllable $syl not in the lexicon!" unless exists $REV_VOCAB{$syl}; + push @pron_in_syllabs, $REV_VOCAB{$syl}; + } + print $word2syll_f "$word\t" . join(" ", @pron_in_syllabs) . "\n"; + } +} + +close($word2syll_f); + +open(my $word2ali_f, "|sort -u > $ARGV[2]") or +die "Cannot open the file\"$ARGV[2]\" for writing"; + +foreach my $word (keys %LEXICON) { + foreach my $pron (@{$LEXICON{$word}}) { + print $word2ali_f "$word\t$word\t" . join(" ", @{$pron}) . "\n"; + } +} + +close($word2ali_f); + diff --git a/egs/babel/s5d/local/syllab/generate_phone_lang.sh b/egs/babel/s5d/local/syllab/generate_phone_lang.sh new file mode 100755 index 00000000000..fc21a23231b --- /dev/null +++ b/egs/babel/s5d/local/syllab/generate_phone_lang.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh +. ./path.sh + + + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +llang=$2 +lang=$3 +out=$4 +lout=$5 + +test -d $lout && rm -rf $lout +mkdir -p $lout +test -d $out && rm -rf $out +cp -R $lang $out +rm -rf $out/tmp $out/L.fst $out/L_disambig.fst $out/G.fst $out/words.txt +rm -rf $out/phones/word_boundary.{int,txt} + +echo "Generating lexicons.." +if [ -f $lang/phones/word_boundary.int ] ; then + echo "Position dependent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | sed 's/ /\t/g' | local/syllab/create_syllables.pl --with-probs\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | sed 's/ /\t/g' | local/syllab/create_syllables.pl \ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +else + echo "Position independent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs --position-independent-phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl --position_independent_phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +fi +cp $lout/lex.{syllabs2phones,words2syllabs,words2phones}.txt $out + +#We will fake the words.txt file +( + echo ""; + cut -f 1 $out/lex.syllabs2phones.txt; + echo -e "#0\n\n"; +) | nl -v 0 | awk '{print $2, $1}' > $out/syllabs.txt +ln -s syllabs.txt $out/words.txt +cp $lang/words.txt $out/real_words.txt + + +#Figure out the "OOV" token +oovword=$(cat $lang/oov.txt) +oovsyl=$(grep -w -F "$oovword" $out/lex.words2syllabs.txt | \ + awk '{if (NF == 2) { print $2;} + else {print "Error, oov word has more than one syllable "; exit 1;}}') + +echo $oovsyl > $out/oov.txt +grep -w -F "$oovsyl" $out/words.txt | awk '{print $2}' > $out/oov.int + +phone_disambig_symbol=$(grep '#0' $out/phones.txt | awk '{print $2}') +word_disambig_symbol=$(grep '#0' $out/words.txt | awk '{print $2}') + +if [ -f $out/phones/wdisambig_words.int ]; then + echo $word_disambig_symbol > $out/phones/wdisambig_words.int +fi + +optional_sil=$(cat $out/phones/optional_silence.txt) +utils/add_lex_disambig.pl $out/lex.syllabs2phones.txt $out/lex.syllabs2phones.disambig.txt > /dev/null +cat $out/lex.syllabs2phones.disambig.txt | sort -u > $lout/lexicon.txt + +echo " SIL" | cat - $lout/lexicon.txt | perl -ane 'print $F[0], " ", join(" ", @F), "\n";' | \ + sed 's/ #[0-9]$//g' > $out/phones/align_lexicon.txt +cat $lout/lexicon.txt | perl -ane 'print $F[0], "\t1.0\t", join(" ", @F[1..$#F]), "\n";' \ + > $lout/lexiconp.txt + +cat $out/phones/align_lexicon.txt |\ + sym2int.pl -f 3- $out/phones.txt |\ + sym2int.pl -f 1-2 $out/words.txt \ + > $out/phones/align_lexicon.int + +ndisambig=$(cat $out/phones/disambig.int | wc -l) +ndisambig=$[$ndisambig-1] + + +#Compile the lexicons +echo "Compiling words2syllables FST" +utils/make_lexicon_fst.pl $out/lex.words2syllabs.txt | \ + fstcompile --isymbols=$out/syllabs.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.words2syllabs.fst + +echo "Compiling L.fst and L_disambig.fst" +sil=$(cat $lang/phones/optional_silence.txt) +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.txt 0.5 $sil | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.fst +ln -s lex.syllabs2phones.fst $out/L.fst + + +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.disambig.txt 0.5 $sil '#'$ndisambig | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstaddselfloops "echo $phone_disambig_symbol |" "echo $word_disambig_symbol |"|\ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.disambig.fst +ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst + +echo "Validating the output lang dir" +utils/validate_lang.pl $out || exit 1 + +sed -i'' 's/#1$//g' $lout/lexicon.txt +sed -i'' 's/#1$//g' $lout/lexiconp.txt + +echo "Done OK." +exit 0 diff --git a/egs/babel/s5d/local/syllab/generate_syllable_lang.sh b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh new file mode 100755 index 00000000000..db7b0902425 --- /dev/null +++ b/egs/babel/s5d/local/syllab/generate_syllable_lang.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +# End configuration section +. ./utils/parse_options.sh +. ./path.sh + + + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1 +llang=$2 +lang=$3 +out=$4 +lout=$5 + +test -d $lout && rm -rf $lout +mkdir -p $lout +test -d $out && rm -rf $out +cp -R $lang $out +rm -rf $out/tmp $out/L.fst $out/L_disambig.fst $out/G.fst $out/words.txt +rm -rf $out/phones/word_boundary.{int,txt} + +echo "Generating lexicons.." +if [ -f $lang/phones/word_boundary.int ] ; then + echo "Position dependent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl \ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +else + echo "Position independent phones system..." + if [ -f $llang/lexiconp.txt ] ; then + echo "Using probabilistic lexicon..." + cat $llang/lexiconp.txt | local/syllab/create_syllables.pl --with-probs --position-independent-phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + else + echo "Using plain lexicon..." + cat $llang/lexicon.txt | local/syllab/create_syllables.pl --position_independent_phones\ + $lout/lex.syllabs2phones.txt $lout/lex.words2syllabs.txt $lout/lex.words2phones.txt + fi +fi +cp $lout/lex.{syllabs2phones,words2syllabs,words2phones}.txt $out + +#We will fake the words.txt file +( + echo ""; + cut -f 1 $out/lex.syllabs2phones.txt; + echo -e "#0\n\n"; +) | nl -v 0 | awk '{print $2, $1}' > $out/syllabs.txt +ln -s syllabs.txt $out/words.txt +cp $lang/words.txt $out/real_words.txt + + +#Figure out the "OOV" token +oovword=$(cat $lang/oov.txt) +oovsyl=$(grep -w -F "$oovword" $out/lex.words2syllabs.txt | \ + awk '{if (NF == 2) { print $2;} + else {print "Error, oov word has more than one syllable "; exit 1;}}') + +echo $oovsyl > $out/oov.txt +grep -w -F "$oovsyl" $out/words.txt | awk '{print $2}' > $out/oov.int + +phone_disambig_symbol=$(grep '#0' $out/phones.txt | awk '{print $2}') +word_disambig_symbol=$(grep '#0' $out/words.txt | awk '{print $2}') + +optional_sil=$(cat $out/phones/optional_silence.txt) +utils/add_lex_disambig.pl $out/lex.syllabs2phones.txt $out/lex.syllabs2phones.disambig.txt > /dev/null +cat $out/lex.syllabs2phones.disambig.txt | sort -u > $lout/lexicon.txt + +if [ -f $out/phones/wdisambig_words.int ]; then + echo $word_disambig_symbol > $out/phones/wdisambig_words.int +fi + +echo " SIL" | cat - $lout/lexicon.txt | perl -ane 'print $F[0], " ", join(" ", @F), "\n";' | \ + sed 's/ #[0-9]$//g' > $out/phones/align_lexicon.txt +cat $lout/lexicon.txt | perl -ane 'print $F[0], "\t1.0\t", join(" ", @F[1..$#F]), "\n";' \ + > $lout/lexiconp.txt + +cat $out/phones/align_lexicon.txt |\ + sym2int.pl -f 3- $out/phones.txt |\ + sym2int.pl -f 1-2 $out/words.txt \ + > $out/phones/align_lexicon.int + +ndisambig=$(cat $out/phones/disambig.int | wc -l) +ndisambig=$[$ndisambig-1] + + +#Compile the lexicons +echo "Compiling words2syllables FST" +utils/make_lexicon_fst.pl $out/lex.words2syllabs.txt | \ + fstcompile --isymbols=$out/syllabs.txt --osymbols=$lang/words.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.words2syllabs.fst + +echo "Compiling L.fst and L_disambig.fst" +sil=$(cat $lang/phones/optional_silence.txt) +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.txt 0.5 $sil | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.fst +ln -s lex.syllabs2phones.fst $out/L.fst + + +utils/make_lexicon_fst.pl $out/lex.syllabs2phones.disambig.txt 0.5 $sil '#'$ndisambig | \ + fstcompile --isymbols=$lang/phones.txt --osymbols=$out/syllabs.txt \ + --keep_isymbols=false --keep_osymbols=false| \ + fstaddselfloops "echo $phone_disambig_symbol |" "echo $word_disambig_symbol |"|\ + fstarcsort --sort_type=olabel > $out/lex.syllabs2phones.disambig.fst +ln -s lex.syllabs2phones.disambig.fst $out/L_disambig.fst + +echo "Validating the output lang dir" +utils/validate_lang.pl $out || exit 1 + +sed -i'' 's/#1$//g' $lout/lexicon.txt +sed -i'' 's/#1$//g' $lout/lexiconp.txt + +echo "Done OK." +exit 0 diff --git a/egs/babel/s5d/local/syllab/lattice_word2syll.sh b/egs/babel/s5d/local/syllab/lattice_word2syll.sh new file mode 100755 index 00000000000..b81bf9d18d4 --- /dev/null +++ b/egs/babel/s5d/local/syllab/lattice_word2syll.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Copyright (c) 2016, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +cmd=run.pl +acwt=0.1 +beam=8 +# End configuration section +echo $0 "$@" +. ./utils/parse_options.sh + +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +data=$1; shift; +ilang=$1; shift; +olang=$1; shift; +input=$1; shift +output=$1; shift + +nj=$(cat $input/num_jobs) + +mkdir -p $output/log + + +if [ -f $olang/lex.words2syllabs.fst ] ; then + fstinvert $olang/lex.words2syllabs.fst | fstreverse | \ + fstminimize | fstreverse > $output/L.fst + + $cmd JOB=1:$nj $output/log/convert.JOB.log \ + lattice-push --push-strings ark:"gunzip -c $input/lat.JOB.gz|" ark:- \| \ + lattice-lmrescore --lm-scale=-1.0 ark:- "fstproject --project_output=true $ilang/G.fst|" ark:- \| \ + lattice-compose ark:- $output/L.fst ark:- \| \ + lattice-determinize-pruned --beam=8 --acoustic-scale=0.1 ark:- ark:- \| \ + lattice-minimize ark:- "ark:|gzip -c > $output/lat.JOB.gz" + #lattice-minimize ark:- ark:- \| \ + #lattice-lmrescore --lm-scale=1.0 ark:- "fstproject --project_output=true $olang/G.fst|" "ark:|gzip -c > $output/lat.JOB.gz" +else + #for phonemes.... (IIRC) + fstreverse $olang/L.fst | fstminimize | fstreverse > $output/L.fst + $cmd JOB=1:$nj $output/log/convert.JOB.log \ + lattice-push --push-strings ark:"gunzip -c $input/lat.JOB.gz|" ark:- \| \ + lattice-lmrescore --lm-scale=-1.0 ark:- "fstproject --project_output=true $ilang/G.fst|" ark:- \| \ + lattice-align-words $ilang/phones/word_boundary.int $input/../final.mdl ark:- ark:- \| \ + lattice-to-phone-lattice --replace-words $input/../final.mdl ark:- ark:- \| \ + lattice-align-phones $input/../final.mdl ark:- ark:- \| \ + lattice-compose ark:- $output/L.fst ark:- \|\ + lattice-determinize-pruned --beam=$beam --acoustic-scale=$acwt ark:- ark:-\| \ + lattice-minimize ark:- "ark:|gzip -c > $output/lat.JOB.gz" + #lattice-lmrescore --lm-scale=1.0 ark:- "fstproject --project_output=true $olang/G.fst|" ark:"|gzip -c > $output/lat.JOB.gz" +fi + + #lattice-1best ark:- ark:-| nbest-to-linear ark:- ark:/dev/null ark,t:- \ + #utils/int2sym.pl -f 2- $olang/words.txt | head +cp $input/num_jobs $output/num_jobs + diff --git a/egs/babel/s5d/local/syllab/map_prons_to_syllables.pl b/egs/babel/s5d/local/syllab/map_prons_to_syllables.pl new file mode 100755 index 00000000000..df3ce93ce4e --- /dev/null +++ b/egs/babel/s5d/local/syllab/map_prons_to_syllables.pl @@ -0,0 +1,61 @@ +#!/usr/bin/env perl +#=============================================================================== +# Copyright 2015 (Author: Yenda Trmal ) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== + +use strict; +use warnings; +use utf8; +use GetOpt::Long; + +my $probs; + +GetOptions ("--with-probs" => \$probs) + +my $syllab_lexicon=$ARGV[0]; + +my %PRON2SYL; + + +open(my $f, $syllab_lexicon) or die "Cannot open file $syllab_lexicon\n"; +while (my $line = <$f>) { + chomp $line; + + my $syll; + my $pron; + my $prob; + + if ($probs) { + $syll, $prob, $pron = split " ", $line, 3; + } else { + $syll, $pron = split " ", $line, 2; + } + $PRON2SYL{$pron} = $syll; +} + +while (my $line = ) { + chomp $line; + my ($word, $pron) = split(/\s*\t\s*/, $line, 2); + my @syllabs = split(/\s*\t\s*/, $pron); + + my @syl_pron; + foreach my $syl (@syllabs) { + die "in $line unknown syllable $syl" unless exists $PRON2SYL{$syl}; + push @syl_pron, $PRON2SYL{$syl}; + } + print "$word\t" . join(" ", @syl_pron) . "\n"; + +} diff --git a/egs/babel/s5d/local/syllab/run_phones.sh b/egs/babel/s5d/local/syllab/run_phones.sh new file mode 100755 index 00000000000..6f3c7be4cef --- /dev/null +++ b/egs/babel/s5d/local/syllab/run_phones.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=0 +# End configuration section +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh +. ./path.sh + +. ./conf/common_vars.sh +. ./lang.conf + +if [ $# -ne 1 ] ; then + echo "Invalid number of parameters" + exit 1 +fi + +idir=$1 +idata=${idir##*/} + + +odata=${idata%%.*}.phn.${idata#*.} + +if [ $stage -le -1 ] ; then + local/syllab/generate_phone_lang.sh \ + data/train data/local/ data/lang data/lang.phn data/local/dict.phn + + local/syllab/ali_to_syllabs.sh \ + data/train data/lang.phn exp/tri5_ali exp/tri5_ali_phn + + + utils/copy_data_dir.sh data/train data/train.phn + cp exp/tri5_ali_phn/text data/train.phn/text + + #Create syllab LM + local/train_lms_srilm.sh \ + --words-file data/lang.phn/words.txt --train-text data/train.phn/text \ + --oov-symbol "`cat data/lang.phn/oov.txt`" data data/srilm.phn + + local/arpa2G.sh data/srilm.phn/lm.gz data/lang.phn/ data/lang.phn/ +fi + +if [ $stage -le 0 ] && [ -f "$idir/text" ] ; then + #Create dev10h.phn.pem dir + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + $idir data/lang exp/tri5 exp/tri5_ali/align_$idata + + local/syllab/ali_to_syllabs.sh \ + --cmd "$decode_cmd" \ + $idir data/lang.phn exp/tri5_ali/align_$idata exp/tri5_ali_phn/align_$idata +fi + +if [ $stage -le 1 ] ; then + utils/copy_data_dir.sh data/$idata data/$odata + [ -f exp/tri5_ali_phn/align_$idata/text ] && \ + cp exp/tri5_ali_phn/align_$idata/text data/$odata/text + touch data/$odata/.plp.done + touch data/$odata/.done +fi + + diff --git a/egs/babel/s5d/local/syllab/run_syllabs.sh b/egs/babel/s5d/local/syllab/run_syllabs.sh new file mode 100755 index 00000000000..a2ec82f3033 --- /dev/null +++ b/egs/babel/s5d/local/syllab/run_syllabs.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Copyright (c) 2015, Johns Hopkins University ( Yenda Trmal ) +# License: Apache 2.0 + +# Begin configuration section. +stage=0 +# End configuration section +. ./utils/parse_options.sh +set -e -o pipefail +set -o nounset # Treat unset variables as an error + +. ./cmd.sh +. ./path.sh + +. ./conf/common_vars.sh +. ./lang.conf + +if [ $# -ne 1 ] ; then + echo "Invalid number of parameters" + exit 1 +fi + +idir=$1 +idata=${idir##*/} + + +odata=${idata%%.*}.syll.${idata#*.} + +if [ $stage -le -1 ] ; then + local/syllab/generate_syllable_lang.sh \ + data/train data/local/ data/lang data/lang.syll data/local/dict.syll + + local/syllab/ali_to_syllabs.sh \ + data/train data/lang.syll exp/tri5_ali exp/tri5_ali_syll + + + utils/copy_data_dir.sh data/train data/train.syll + cp exp/tri5_ali_syll/text data/train.syll/text + + #Create syllab LM + local/train_lms_srilm.sh \ + --words-file data/lang.syll/words.txt --train-text data/train.syll/text \ + --oov-symbol "`cat data/lang.syll/oov.txt`" data data/srilm.syll + + local/arpa2G.sh data/srilm.syll/lm.gz data/lang.syll/ data/lang.syll/ +fi + +if [ $stage -le 0 ] && [ -f "$idir/text" ] ; then + #Create dev10h.syll.pem dir + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + $idir data/lang exp/tri5 exp/tri5_ali/align_$idata + + local/syllab/ali_to_syllabs.sh \ + --cmd "$decode_cmd" \ + $idir data/lang.syll exp/tri5_ali/align_$idata exp/tri5_ali_syll/align_$idata +fi + +if [ $stage -le 1 ] ; then + utils/copy_data_dir.sh data/$idata data/$odata + [ -f exp/tri5_ali_syll/align_$idata/text ] && \ + cp exp/tri5_ali_syll/align_$idata/text data/$odata/text + touch data/$odata/.plp.done + touch data/$odata/.done +fi + + diff --git a/egs/babel/s5d/local/train_g2p.sh b/egs/babel/s5d/local/train_g2p.sh new file mode 100755 index 00000000000..08be0014656 --- /dev/null +++ b/egs/babel/s5d/local/train_g2p.sh @@ -0,0 +1,94 @@ +#!/bin/bash +# Copyright 2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 + +# Begin configuration section. +iters=5 +stage=0 +encoding='utf-8' +remove_tags=true +only_words=true +icu_transform="Any-Lower" +cmd=run.pl +# End configuration section. + +echo "$0 $@" # Print the command line for logging + +[ -f ./path.sh ] && . ./path.sh; # source the path. +. parse_options.sh || exit 1; + +set -u +set -e + +if [ $# != 2 ]; then + echo "Usage: $0 [options] " + echo " where is the training lexicon (one pronunciation per " + echo " word per line) and is directory where the models will " + echo " be stored" + echo "e.g.: train_g2p.sh data/local/lexicon.txt exp/g2p/" + echo "" + echo "main options (for others, see top of script file)" + echo " --iters # How many iterations. Relates to N-ngram order" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + exit 1; +fi + +lexicon=$1 +wdir=$2 + + +mkdir -p $wdir/log + +[ ! -f $lexicon ] && echo "$0: Training lexicon does not exist." && exit 1 + +if $only_words ; then + cat $lexicon | sed 's/^<.*>.*$//g' | sed 's/^#.*//g' > $wdir/lexicon_onlywords.txt + lexicon=$wdir/lexicon_onlywords.txt +fi + +if $remove_tags ; then + cat $lexicon |\ + sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g' > $wdir/lexicon_notags.txt + lexicon=$wdir/lexicon_notags.txt +fi + +if [ ! -z $icu_transform ] ; then + paste \ + <(cat $lexicon | awk '{print $1}' | uconv -f $encoding -t $encoding -x "$icu_transform") \ + <(cat $lexicon | sed 's/^[^ \t][^ \t]*[ \t]//g') \ + > $wdir/lexicon_transformed.txt + lexicon=$wdir/lexicon_transformed.txt +fi + +if ! g2p=`which g2p.py` ; then + echo "Sequitur was not found !" + echo "Go to $KALDI_ROOT/tools and execute extras/install_sequitur.sh" + exit 1 +fi + +echo "Training the G2P model (iter 0)" + +if [ $stage -le 0 ]; then + $cmd $wdir/log/g2p.0.log \ + g2p.py -S --encoding $encoding --train $lexicon --devel 5% --write-model $wdir/g2p.model.0 +fi + +for i in `seq 0 $(($iters-2))`; do + + echo "Training the G2P model (iter $[$i + 1] )" + + if [ $stage -le $i ]; then + $cmd $wdir/log/g2p.$(($i + 1)).log \ + g2p.py -S --encoding $encoding --model $wdir/g2p.model.$i --ramp-up --train $lexicon --devel 5% --write-model $wdir/g2p.model.$(($i+1)) + fi + +done + +! (set -e; cd $wdir; ln -sf g2p.model.$[$iters-1] g2p.model.final ) && echo "Problem finalizing training... " && exit 1 + +if [ $stage -le $(($i + 2)) ]; then + echo "Running test..." + $cmd $wdir/log/test.log \ + g2p.py --encoding $encoding --model $wdir/g2p.model.final --test $lexicon +fi + diff --git a/egs/babel/s5d/local/train_lms_srilm.sh b/egs/babel/s5d/local/train_lms_srilm.sh new file mode 100755 index 00000000000..cf357260d8c --- /dev/null +++ b/egs/babel/s5d/local/train_lms_srilm.sh @@ -0,0 +1,229 @@ +#!/bin/bash +export LC_ALL=C + +words_file= +train_text= +dev_text= +oov_symbol="" + +echo "$0 $@" + +[ -f path.sh ] && . ./path.sh +. ./utils/parse_options.sh || exit 1 + +echo "-------------------------------------" +echo "Building an SRILM language model " +echo "-------------------------------------" + +if [ $# -ne 2 ] ; then + echo "Incorrect number of parameters. " + echo "Script has to be called like this:" + echo " $0 [switches] " + echo "For example: " + echo " $0 data data/srilm" + echo "The allowed switches are: " + echo " words_file= word list file -- data/lang/words.txt by default" + echo " train_text= data/train/text is used in case when not specified" + echo " dev_text= last 10 % of the train text is used by default" + echo " oov_symbol=> symbol to use for oov modeling -- by default" + exit 1 +fi + +datadir=$1 +tgtdir=$2 +outlm=lm.gz + + +##End of configuration +loc=`which ngram-count`; +if [ -z $loc ]; then + if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... + sdir=`pwd`/../../../tools/srilm/bin/i686-m64 + else + sdir=`pwd`/../../../tools/srilm/bin/i686 + fi + if [ -f $sdir/ngram-count ]; then + echo Using SRILM tools from $sdir + export PATH=$PATH:$sdir + else + echo You appear to not have SRILM tools installed, either on your path, + echo or installed in $sdir. See tools/install_srilm.sh for installation + echo instructions. + exit 1 + fi +fi + +# Prepare the destination directory +mkdir -p $tgtdir + +for f in $words_file $train_text $dev_text; do + [ ! -s $f ] && echo "No such file $f" && exit 1; +done + +[ -z $words_file ] && words_file=$datadir/lang/words.txt +if [ ! -z "$train_text" ] && [ -z "$dev_text" ] ; then + nr=`cat $train_text | wc -l` + nr_dev=$(($nr / 10 )) + nr_train=$(( $nr - $nr_dev )) + orig_train_text=$train_text + head -n $nr_train $train_text > $tgtdir/train_text + tail -n $nr_dev $train_text > $tgtdir/dev_text + + train_text=$tgtdir/train_text + dev_text=$tgtdir/dev_text + echo "Using words file: $words_file" + echo "Using train text: 9/10 of $orig_train_text" + echo "Using dev text : 1/10 of $orig_train_text" +elif [ ! -z "$train_text" ] && [ ! -z "$dev_text" ] ; then + echo "Using words file: $words_file" + echo "Using train text: $train_text" + echo "Using dev text : $dev_text" + train_text=$train_text + dev_text=$dev_text +else + train_text=$datadir/train/text + dev_text=$datadir/dev2h/text + echo "Using words file: $words_file" + echo "Using train text: $train_text" + echo "Using dev text : $dev_text" +fi + + + +# Extract the word list from the training dictionary; exclude special symbols +sort $words_file | awk '{print $1}' | grep -v '\#0' | grep -v '' | grep -v -F "$oov_symbol" > $tgtdir/vocab +if (($?)); then + echo "Failed to create vocab from $words_file" + exit 1 +else + # wc vocab # doesn't work due to some encoding issues + echo vocab contains `cat $tgtdir/vocab | perl -ne 'BEGIN{$l=$w=0;}{split; $w+=$#_; $w++; $l++;}END{print "$l lines, $w words\n";}'` +fi + +# Kaldi transcript files contain Utterance_ID as the first word; remove it +cat $train_text | cut -f2- -d' ' > $tgtdir/train.txt +if (($?)); then + echo "Failed to create $tgtdir/train.txt from $train_text" + exit 1 +else + echo "Removed first word (uid) from every line of $train_text" + # wc text.train train.txt # doesn't work due to some encoding issues + echo $train_text contains `cat $train_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo train.txt contains `cat $tgtdir/train.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` +fi + +# Kaldi transcript files contain Utterance_ID as the first word; remove it +cat $dev_text | cut -f2- -d' ' > $tgtdir/dev.txt +if (($?)); then + echo "Failed to create $tgtdir/dev.txt from $dev_text" + exit 1 +else + echo "Removed first word (uid) from every line of $dev_text" + # wc text.train train.txt # doesn't work due to some encoding issues + echo $dev_text contains `cat $dev_text | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $w--; $s++;}END{print "$w words, $s sentences\n";}'` + echo $tgtdir/dev.txt contains `cat $tgtdir/dev.txt | perl -ane 'BEGIN{$w=$s=0;}{$w+=@F; $s++;}END{print "$w words, $s sentences\n";}'` +fi + +echo "-------------------" +echo "Good-Turing 2grams" +echo "-------------------" +ngram-count -lm $tgtdir/2gram.gt01.gz -gt1min 0 -gt2min 1 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/2gram.gt02.gz -gt1min 0 -gt2min 2 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Kneser-Ney 2grams" +echo "-------------------" +ngram-count -lm $tgtdir/2gram.kn01.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/2gram.kn02.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -order 2 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Good-Turing 3grams" +echo "-------------------" +ngram-count -lm $tgtdir/3gram.gt011.gz -gt1min 0 -gt2min 1 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt012.gz -gt1min 0 -gt2min 1 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt022.gz -gt1min 0 -gt2min 2 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.gt023.gz -gt1min 0 -gt2min 2 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Kneser-Ney 3grams" +echo "-------------------" +ngram-count -lm $tgtdir/3gram.kn011.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn012.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/3gram.kn023.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 3 -order 3 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + + +echo "-------------------" +echo "Good-Turing 4grams" +echo "-------------------" +ngram-count -lm $tgtdir/4gram.gt0111.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0112.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0122.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0123.gz -gt1min 0 -gt2min 1 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0113.gz -gt1min 0 -gt2min 1 -gt3min 1 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0222.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.gt0223.gz -gt1min 0 -gt2min 2 -gt3min 2 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +echo "-------------------" +echo "Kneser-Ney 4grams" +echo "-------------------" +ngram-count -lm $tgtdir/4gram.kn0111.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 1 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0112.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0113.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 1 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0122.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0123.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 1 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0222.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 2 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" +ngram-count -lm $tgtdir/4gram.kn0223.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -kndiscount4 -gt4min 3 -order 4 -text $tgtdir/train.txt -vocab $tgtdir/vocab -unk -sort -map-unk "$oov_symbol" + +if [ ! -z ${LIBLBFGS} ]; then + #please not that if the switch -map-unk "$oov_symbol" is used with -maxent-convert-to-arpa, ngram-count will segfault + #instead of that, we simply output the model in the maxent format and convert it using the "ngram" + echo "-------------------" + echo "Maxent 2grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 2 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/2gram.me.gz || exit 1 + + echo "-------------------" + echo "Maxent 3grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 3 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/3gram.me.gz || exit 1 + + echo "-------------------" + echo "Maxent 4grams" + echo "-------------------" + sed 's/'${oov_symbol}'//g' $tgtdir/train.txt | \ + ngram-count -lm - -order 4 -text - -vocab $tgtdir/vocab -unk -sort -maxent -maxent-convert-to-arpa|\ + sed 's//'${oov_symbol}'/g' | gzip -c > $tgtdir/4gram.me.gz || exit 1 + +fi + + +echo "--------------------" +echo "Computing perplexity" +echo "--------------------" +( + for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done + for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done +) | sort -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt + +echo "The perlexity scores report is stored in $tgtdir/perplexities.txt " + +#This will link the lowest perplexity LM as the output LM. +#ln -sf $tgtdir/`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` $outlm + +#A slight modification of the previous approach: +#We look at the two lowest perplexity LMs and use a 3gram LM if one of the two, even if the 4gram is of lower ppl +nof_trigram_lm=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | wc -l` +if [[ $nof_trigram_lm -eq 0 ]] ; then + lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` +elif [[ $nof_trigram_lm -eq 2 ]] ; then + lmfilename=`head -n 1 $tgtdir/perplexities.txt | cut -f 1 -d ' '` +else #exactly one 3gram LM + lmfilename=`head -n 2 $tgtdir/perplexities.txt | grep 3gram | cut -f 1 -d ' '` +fi +(cd $tgtdir; ln -sf `basename $lmfilename` $outlm ) + diff --git a/egs/babel/s5d/local/txt_to_rttm.pl b/egs/babel/s5d/local/txt_to_rttm.pl new file mode 100755 index 00000000000..0e128520880 --- /dev/null +++ b/egs/babel/s5d/local/txt_to_rttm.pl @@ -0,0 +1,108 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Getopt::Long; + +my $Usage = < + +Allowed options: + --flen : Frame length (float, default = 0.1) + --symtab : Symbol table (string, default = "") + --segment : Segment file from Kaldi (string, default = "") +EOU + +my $symtab = ""; +my $segment = ""; +my $flen = 0.01; +GetOptions('symtab=s' => \$symtab, + 'segment=s' => \$segment, + 'flen=f' => \$flen); + +if ($symtab) { + if (!open(S, "<$symtab")) {print "Fail to open symbol table: $symtab\n"; exit 1;} +} + +if ($segment) { + if (!open(SEG, "<$segment")) {print "Fail to open segment file: $segment\n"; exit 1;} +} + +if(@ARGV != 2) { + die $Usage; +} + +# Get parameters +my $filein = shift @ARGV; +my $fileout = shift @ARGV; + +# Get input source +my $source = ""; +if ($filein eq "-") { + $source = "STDIN"; +} else { + if (!open(I, "<$filein")) {print "Fail to open input file: $filein\n"; exit 1;} + $source = "I"; +} + +# Open output fst list +my $sourceout = ""; +if ($fileout ne "-") { + if (!open(O, ">$fileout")) {print "Fail to open output file: $fileout\n"; exit 1;} + $sourceout = "O"; +} + +# Get symbol table and start time +my %sym = (); +my %tbeg = (); +my %uid2utt = (); +if ($symtab) { + while() { + chomp; + my @col = split(" ", $_); + @col == 2 || die "Bad number of columns in $symtab\n"; + $sym{$col[1]} = $col[0]; + } +} + +if ($segment) { + while() { + chomp; + my @col = split(" ", $_); + @col == 4 || die "Bad number of columns in $segment\n"; + $tbeg{$col[0]} = $col[2]; + $uid2utt{$col[0]} = $col[1]; + } +} + +# Processing +while (<$source>) { + chomp; + my @col = split(" ", $_); + my $uid = shift @col; + my $words = join(" ", @col); + @col = split(/;/, $words); + + my $utt = $uid; + my $sta = 0; + if ($segment) { + $utt = $uid2utt{$uid}; + $sta = $tbeg{$uid}; + } + foreach (@col) { + my @subcol = split(" ", $_); + @subcol == 2 || die "Bad number of columns in word-frame pair\n"; + my $word = $subcol[0]; + my $dur = $subcol[1]*$flen; + my $lex = "LEXEME"; + if ($symtab) {$word = $sym{$word};} + if ($word =~ m/^<.*>$/) {$lex = "NON-LEX";} + eval "print $sourceout \"$lex $utt 1 $sta $dur $word \n\""; + $sta += $dur; + } +} + +if ($symtab) {close(S);} +if ($segment) {close(SEG);} +if ($filein ne "-") {close(I);} +if ($fileout ne "-") {close(O);} diff --git a/egs/babel/s5d/local/uem_ctm2segments.pl b/egs/babel/s5d/local/uem_ctm2segments.pl new file mode 100755 index 00000000000..658690172c8 --- /dev/null +++ b/egs/babel/s5d/local/uem_ctm2segments.pl @@ -0,0 +1,232 @@ +#!/usr/bin/env perl +use Getopt::Long; + +################################################################################ +# Convert a CTM file produced by decoding a long segment, typically several min +# long, into a sequence of shorter segments of duration 10-15 seconds. Produce +# a segments file of the form used for Kaldi training/decoding +# +# utteranceID recordingID startTime endTime +# +# The desired outcome is that the long (input) segment will be recursively cut +# into shorter segments at the location of long silences, leaving (say) 0.5 sec +# of silence at each end of the two resulting shorter segments, until all the +# segments are of the desired duration. +# +# NOTE: It is assumed that the CTM file provides time information at 0.01 sec +# resolution, and that any missing segments in the CTM correspond to the +# optional silence model, whose output token was removed by the sequence +# +# lattice-align-words --> lattice-to-ctm-conf --> raw CTM file +# + $ctmTimeStep = 0.01; # Could be changed if needed by --timeStep +# +# It is further assumed that the explicit silence token (word) is +# + $silence = ""; +# +# This could be changed using the --silence option if needed. +# +# Another option is the minimum silence duration to permit segmentation +# + $minSilence = 1.02; # seconds +# +# Maximum allowed segment length, could be changed through --maxSegLen +# + $maxSegLen = 30; #seconds +# +# Default segment length, used when the ctm segment is too long +# + $defaultSegLen = 10; # seconds +################################################################################ + +GetOptions("ctmTimeStep=f" => \$ctmTimeStep, + "minSilence=f" => \$minSilence, + "silence=s" => \$silence, + "maxSegLen=f" => \$maxSegLen, + "defaultSegLen=f" => \$defaultSegLen); + +if ($#ARGV == 1) { + $ctmFile = $ARGV[0]; + $segmentsFile = $ARGV[1]; + print STDERR ("$0: $ctmFile $segmentsFile\n"); + print STDERR ("\t--ctmTimeStep = $ctmTimeStep\n") unless ($ctmTimeStep == 0.01); + print STDERR ("\t--silence = $silence\n") unless ($silence eq ""); + print STDERR ("\t--maxSegLen = $maxSegLen\n") unless ($maxSegLen == 30); + print STDERR ("\t--defaultSegLen = $defaultSegLen\n") unless ($defaultSegLen == 10); + +} else { + print STDERR ("Usage: $0 [--options] inputCTM outputSegments\n"); + print STDERR ("\t--ctmTimeStep %f Time resolution of CTM file (default 0.01 sec)\n"); + print STDERR ("\t--silence %s Word token for silence (default )\n"); + print STDERR ("\t--maxSegLen %f Max allowed segment length (default 30 sec)\n"); + print STDERR ("\t--defaultSegLen %f Default segment length (default 10 sec)\n"); + exit(1); +} + +open (CTM, $ctmFile) +|| die "Unable to open input CTM file $ctmFile for reading"; +$numRecordings = $numWords = $n = 0; +$prevFileName = ""; +$prevChannel = ""; +$prevEndTime = 0.00; +$prevConfidence = 0.00; +while ($line=) { + @token = split(/\s+/, $line); + unless (($#token==4)||($#token==5)) { + # CTM should have 5 or 6 tokens per line + # audioFile channel startTime duration word [confidence] + print STDERR ("$0 WARNING: unparsable line $. in ctm file: $line"); + next; + } + if ( ( ($token[0] ne $prevFileName) || ($token[1] ne $prevChannel) ) && ($prevFileName ne "") ) { + break if ($n==0); + ######################################################################## + # This is the next audio file; create segments for the previous file + ######################################################################## + print STDERR ("Audio file $prevFileName contains $n word tokens\n"); + printf STDERR ("\t%d alternating speech/silence segments after mergers\n", &process_this_audio_file); + ######################################################################## + # Done writing out the segments for the previous audio recording + ######################################################################## + $numRecordings++; + # Reset to process the next file + $prevFileName = ""; + $prevChannel = ""; + $prevEndTime = 0.00; + $prevConfidence = 0.00; + $n=0; + } + # Otherwise, this is the next word in the same (i.e. previous) audio file + if ( ($token[2]-$prevEndTime) > $ctmTimeStep ) { + # There is a missing segment in the CTM, presumably silence + $fileName[$n] = $token[0]; + $channel[$n] = $token[1]; + $startTime[$n] = $prevEndTime; + $endTime[$n] = $token[2]; + $wordToken[$n] = $silence; + $confidence[$n]= $prevConfidence; + $n++; + } + # Record this token for processing later + $prevFileName = $fileName[$n] = $token[0]; + $prevChannel = $channel[$n] = $token[1]; + $startTime[$n] = $token[2]; + $prevEndTime = $endTime[$n] = ($token[2]+$token[3]); + $wordToken[$n] = $token[4]; + $prevConfidence = $confidence[$n] = $token[5] if ($#token==5); + $n++; + $numWords++; +} +close(CTM); +if ($n>0) { + # This is the last audio file; create segments for the file + print STDERR ("Audio file $prevFileName contains $n word tokens\n"); + printf STDERR ("\t%d alternating speech/silence segments after mergers\n", &process_this_audio_file); + # Done writing out the segments for the last audio recording + $numRecordings++; +} +print STDERR ("Read $numRecordings filenames containing $numWords words from $ctmFile\n"); + + +sub process_this_audio_file { + # Merge consecutive speech/silence tokens to create candidate "segments" + $s=0; + $segmentStart[$s] = 0.00; + $segmentType[$s] = $silence; + $segmentEnd[$s] = -1.0; + for ($i=0; $i<$n; $i++) { + $sTime = $startTime[$i]; + $word = $wordToken[$i]; + $eTime = $endTime[$i]; + if ( ($word eq $silence) && ($segmentType[$s] ne $silence) + || ($word ne $silence) && ($segmentType[$s] eq $silence) ) { + $segmentEnd[$s] = $sTime; + $s++; + $segmentStart[$s] = $sTime; + $segmentType[$s] = ($word eq $silence) ? $silence : "" ; + } + $segmentEnd[$s] = $eTime; + } + # Merge speech segments separated by silence of less than some minimum duration + # Note: there must be at least two segments for mergers to be an option, i.e. $s>0. + if ($s>0) { + if ( ($segmentType[0] eq $silence) + && ( ($segmentEnd[0]-$segmentStart[0]) < $minSilence) ) { + die "Something wrong: initial silence segment must have a speech segment following it" + unless ($segmentType[1] eq ""); + $segmentType[0] = $segmentType[1]; + $segmentEnd[0] = $segmentEnd[1]; + for ($j=2; $j<=$s; $j++) { + $segmentStart[$j-1] = $segmentStart[$j]; + $segmentType[$j-1] = $segmentType[$j]; + $segmentEnd[$j-1] = $segmentEnd[$j]; + } + $s--; # one silence segment removed + } + for ($i=1; $i<$s; $i++) { + if ( ($segmentType[$i] eq $silence) + && ( ($segmentEnd[$i]-$segmentStart[$i]) < $minSilence) ) { + die "Something wrong: internal silence segment must have speech segments on eithe side" + unless ( ($segmentType[$i-1] eq "") && ($segmentType[$i+1] eq "") ); + $segmentEnd[$i-1] = $segmentEnd[$i+1]; + for ($j=$i+2; $j<=$s; $j++) { + $segmentStart[$j-2] = $segmentStart[$j]; + $segmentType[$j-2] = $segmentType[$j]; + $segmentEnd[$j-2] = $segmentEnd[$j]; + } + $s -= 2; # one silence removed, two speech segments merged + $i--; # backtrack, to process the segment that just moved into position $i + } + } + if ( ($segmentType[$s] eq $silence) + && ( ($segmentEnd[$s]-$segmentStart[$s]) < $minSilence) ) { + die "Something wrong: final silence segment must have a speech segment preceding it" + unless ($segmentType[$s-1] eq ""); + $segmentEnd[$s-1] = $segmentEnd[$s]; + $s--; # one silence segment removed + } + } + # Print segment markers for debugging + $num = $s + 1; + for ($i=0; $i<=$s; $i++) { +# printf STDOUT ("%s %s %.2f %.2f %s\n", +# printf STDOUT ("%s %s %.2f %.2f\n", +# sprintf ("%s_%06i",$prevFileName,(100*$segmentStart[$i])), +# $prevFileName, +# $segmentStart[$i], +# $segmentEnd[$i], $segmentType[$i]); +# ($segmentStart[$i] - (($i==0) ? 0.0 : 0.5)), +# ($segmentEnd[$i] + (($i==$s) ? 0.0 : 0.5))) unless ($segmentType[$i] eq $silence); + if ($segmentType[$i] ne $silence) { + if (($segmentEnd[$i] - $segmentStart[$i]) > $maxSegLen) { + $fakeStart = $segmentStart[$i] - (($i==0) ? 0.0 : 0.5); + while (($segmentEnd[$i] - $fakeStart) > $defaultSegLen) { + printf STDOUT ("%s %s %.2f %.2f\n", + sprintf ("%s_%06i",$prevFileName,(100*$fakeStart)), + $prevFileName, + $fakeStart, + $fakeStart + $defaultSegLen); + $fakeStart += $defaultSegLen; + $num += 2; + } + if (($segmentEnd[$i] - $fakeStart) > 0) { + printf STDOUT ("%s %s %.2f %.2f\n", + sprintf ("%s_%06i",$prevFileName,(100*$fakeStart)), + $prevFileName, + $fakeStart, + ($segmentEnd[$i] + (($i==$s) ? 0.0 : 0.5))); + } else { + $num -= 2; + } + } else { + printf STDOUT ("%s %s %.2f %.2f\n", + sprintf ("%s_%06i",$prevFileName,(100*$segmentStart[$i])), + $prevFileName, + ($segmentStart[$i] - (($i==0) ? 0.0 : 0.5)), + ($segmentEnd[$i] + (($i==$s) ? 0.0 : 0.5))); + } + } + } + $num; +} diff --git a/egs/babel/s5d/nnet3_examples.sh b/egs/babel/s5d/nnet3_examples.sh new file mode 100644 index 00000000000..82661140d3c --- /dev/null +++ b/egs/babel/s5d/nnet3_examples.sh @@ -0,0 +1,32 @@ +# The results shown below are for Telugu fullLP condition +#TDNN + local/nnet3/run_tdnn.sh \ + --affix "6layer_r512" \ + --splice-indexes "-2,-1,0,1,2 -1,2 -3,3 -7,2 0 0 " \ + --relu-dim 512 || exit 1; + + # I modified the TDNN scripts to run for 5 epochs, however these results are with 3 epoch training + ./run-4-anydecode.sh --skip-kws true --dir dev10h.seg --nnet3-model nnet3/tdnn_6layer_r512_sp + #%WER 68.4 | 22131 40145 | 36.3 45.9 17.9 4.7 68.4 31.9 | -1.082 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.seg/score_10/dev10h.seg.ctm.sys + ./run-4-anydecode.sh --skip-kws true --dir dev10h.pem --nnet3-model nnet3/tdnn_6layer_r512_sp + #%WER 67.1 | 22131 40145 | 36.4 45.9 17.8 3.5 67.1 29.6 | -0.902 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys + + + + +#LSTM + local/nnet3/run_lstm.sh + + ./run-4-anydecode.sh --skip-kws true --dir dev10h.seg --is-rnn true --nnet3-model nnet3/lstm_sp --extra-left-context 40 --frames-per-chunk 20 + #%WER 68.0 | 22131 40145 | 38.2 44.8 17.0 6.2 68.0 33.5 | -1.491 | exp/nnet3/lstm_sp/decode_dev10h.seg/score_10/dev10h.seg.ctm.sys + ./run-4-anydecode.sh --skip-kws true --dir dev10h.pem --is-rnn true --nnet3-model nnet3/lstm_sp --extra-left-context 40 --frames-per-chunk 20 + #%WER 65.1 | 22131 40145 | 39.2 45.9 14.9 4.3 65.1 28.8 | -1.299 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys + + +#BLSTM + local/nnet3/run_blstm.sh + ./run-4-anydecode.sh --skip-kws true --dir dev10h.seg --is-rnn true --nnet3-model nnet3/lstm_bidirectional_sp --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 + #%WER 67.1 | 22131 40145 | 38.8 44.9 16.3 5.9 67.1 33.6 | -1.737 | exp/nnet3/lstm_birectional_cell512_sp/decode_dev10h.seg/score_10/dev10h.seg.ctm.sys + ./run-4-anydecode.sh --skip-kws true --dir dev10h.pem --is-rnn true --nnet3-model nnet3/lstm_bidirectional_sp --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 + #%WER 64.2 | 22131 40145 | 39.8 46.0 14.2 4.0 64.2 29.0 | -1.548 | exp/nnet3/lstm_birectional_cell512_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys + diff --git a/egs/babel/s5d/path.sh b/egs/babel/s5d/path.sh new file mode 100755 index 00000000000..212c5e15d55 --- /dev/null +++ b/egs/babel/s5d/path.sh @@ -0,0 +1,7 @@ +export KALDI_ROOT=/export/a09/jtrmal/kaldi/ +. $KALDI_ROOT/tools/env.sh +. /export/a09/jtrmal/kaldi-current/tools/env.sh +. /export/babel/data/software/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/online2bin:$KALDI_ROOT/src/ivectorbin:$KALDI_ROOT/src/kwsbin:$PWD:$PATH +export LC_ALL=C + diff --git a/egs/babel/s5d/results/RESULTS.105-turkish.flp b/egs/babel/s5d/results/RESULTS.105-turkish.flp new file mode 100644 index 00000000000..737d0893abe --- /dev/null +++ b/egs/babel/s5d/results/RESULTS.105-turkish.flp @@ -0,0 +1,29 @@ +%WER 57.5 | 22070 54382 | 49.0 41.7 9.2 6.5 57.5 30.8 | -1.255 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.8 | 22070 54382 | 57.3 34.1 8.6 5.1 47.8 29.0 | -0.605 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.7 8.3 4.8 45.8 28.7 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 22070 54382 | 59.0 32.4 8.5 4.8 45.8 28.4 | -0.630 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_17/dev10h.pem.ctm.sys +%WER 47.1 | 22070 54382 | 56.5 32.7 10.8 3.6 47.1 28.7 | -0.430 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_8/metrics.txt:MTWV = 0.5930, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_12/metrics.txt:MTWV = 0.6426, THRESHOLD = 0.384 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_kws_16/metrics.txt:MTWV = 0.6214, THRESHOLD = 0.447 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.6270, THRESHOLD = 0.595 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/dev_oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5003, THRESHOLD = 0.555 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_13/metrics.txt:MTWV = 0.5339, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_19/metrics.txt:MTWV = 0.5203, THRESHOLD = 0.553 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5078, THRESHOLD = 0.553 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_12/metrics.txt:MTWV = 0.0045, THRESHOLD = 0.891000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_11/metrics.txt:MTWV = 0.0066, THRESHOLD = 0.720000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_18/metrics.txt:MTWV = 0.0058, THRESHOLD = 0.867000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_20/metrics.txt:MTWV = 0.0072, THRESHOLD = 0.785000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_11/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.807000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0070, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_21/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.547 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_18/metrics.txt:MTWV = 0.0071, THRESHOLD = 0.666 diff --git a/egs/babel/s5d/results/RESULTS.106-tagalog.flp b/egs/babel/s5d/results/RESULTS.106-tagalog.flp new file mode 100644 index 00000000000..72568cebf81 --- /dev/null +++ b/egs/babel/s5d/results/RESULTS.106-tagalog.flp @@ -0,0 +1,34 @@ +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 56.7 | 25332 63009 | 50.6 38.5 10.9 7.3 56.7 32.1 | -1.361 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.4 | 25332 63009 | 57.4 32.7 9.9 5.8 48.4 30.3 | -0.891 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 46.9 | 25332 63009 | 57.4 30.5 12.1 4.3 46.9 30.3 | -0.517 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.7 | 25332 63009 | 58.2 31.1 10.7 4.9 46.7 29.9 | -0.737 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_18/dev10h.pem.ctm.sys +%WER 47.7 | 25332 63009 | 56.1 30.5 13.4 3.9 47.7 30.2 | -0.548 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_12/metrics.txt:MTWV = 0.4452, THRESHOLD = 0.577 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.4778, THRESHOLD = 0.696000000000001 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.4448, THRESHOLD = 0.770000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.4450, THRESHOLD = 0.730000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/dev_oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_9/metrics.txt:MTWV = 0.5117, THRESHOLD = 0.451 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_10/metrics.txt:MTWV = 0.5408, THRESHOLD = 0.504 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_17/metrics.txt:MTWV = 0.5221, THRESHOLD = 0.556 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.5077, THRESHOLD = 0.648 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_oov_kws_10/metrics.txt:MTWV = 0.0038, THRESHOLD = 0.900000000000001 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0069, THRESHOLD = 0.659 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_17/metrics.txt:MTWV = 0.0047, THRESHOLD = 0.889000000000001 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0052, THRESHOLD = 0.522 +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_8/metrics.txt:MTWV = 0.0173, THRESHOLD = 0.809000000000001 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_10/metrics.txt:MTWV = 0.0310, THRESHOLD = 0.621 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/oov_kws_21/metrics.txt:MTWV = 0.0164, THRESHOLD = 0.309 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_20/metrics.txt:MTWV = 0.0183, THRESHOLD = 0.851000000000001 diff --git a/egs/babel/s5d/results/RESULTS.107-vietnamese.flp b/egs/babel/s5d/results/RESULTS.107-vietnamese.flp new file mode 100644 index 00000000000..e64bca74572 --- /dev/null +++ b/egs/babel/s5d/results/RESULTS.107-vietnamese.flp @@ -0,0 +1,50 @@ +%WER 57.9 | 21875 111957 | 45.4 42.3 12.3 3.2 57.9 36.7 | -1.203 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.3 | 21875 111957 | 53.2 37.3 9.5 3.5 50.3 35.8 | -0.917 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_9/dev10h.pem.ctm.sys +%WER 47.4 | 21875 111957 | 55.1 32.8 12.1 2.6 47.4 35.7 | -0.642 | exp/tri6_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 48.6 | 21875 111957 | 54.3 35.9 9.8 2.9 48.6 35.4 | -0.769 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 51.3 32.4 16.2 1.8 50.4 35.7 | -0.487 | exp_bnf/tri7_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys + +############################################################################################################################# + +#KWS on the dev kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_9/metrics.txt:MTWV = 0.4488, THRESHOLD = 0.601 +exp/tri6_nnet/decode_dev10h.pem/kws_10/metrics.txt:MTWV = 0.4926, THRESHOLD = 0.576 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kws_15/metrics.txt:MTWV = 0.4589, THRESHOLD = 0.635 +exp_bnf/tri7_nnet/decode_dev10h.pem/kws_15/metrics.txt:MTWV = 0.4477, THRESHOLD = 0.591 + +#KWS on the dev kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/oov_kws_8/metrics.txt:MTWV = 0.0001, THRESHOLD = 0.778 +exp/tri6_nnet/decode_dev10h.pem/oov_kws_11/metrics.txt:MTWV = 0.0024, THRESHOLD = 0.581 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/oov_kws_16/metrics.txt:MTWV = 0.0012, THRESHOLD = 0.596 +exp_bnf/tri7_nnet/decode_dev10h.pem/oov_kws_15/metrics.txt:MTWV = 0.0017, THRESHOLD = 0.817 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_8/metrics.txt:MTWV = 0.2886, THRESHOLD = 0.513 +exp/tri6_nnet/decode_dev10h.pem/dev_kws_11/metrics.txt:MTWV = 0.3672, THRESHOLD = 0.693 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/dev_kws_15/metrics.txt:MTWV = 0.2999, THRESHOLD = 0.792 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_kws_15/metrics.txt:MTWV = 0.3041, THRESHOLD = 0.693 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist2.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_10/metrics.txt:MTWV = 0.0000, THRESHOLD = 0 +exp/tri6_nnet/decode_dev10h.pem/dev_oov_kws_10/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.873 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.214 +exp_bnf/tri7_nnet/decode_dev10h.pem/dev_oov_kws_15/metrics.txt:MTWV = 0.0050, THRESHOLD = 0.831 + +############################################################################################################################ + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- IV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/eval_kws_9/metrics.txt:MTWV = 0.3791, THRESHOLD = 0.564 +exp/tri6_nnet/decode_dev10h.pem/eval_kws_12/metrics.txt:MTWV = 0.4444, THRESHOLD = 0.406 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/eval_kws_15/metrics.txt:MTWV = 0.3780, THRESHOLD = 0.609 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_kws_15/metrics.txt:MTWV = 0.3904, THRESHOLD = 0.51 + +#KWS on the IARPA-babel107b-v0.7_conv-dev.kwlist3.xml kwlist -- OOV only +exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_10/metrics.txt:MTWV = 0.0021, THRESHOLD = 0.724 +exp/tri6_nnet/decode_dev10h.pem/eval_oov_kws_10/metrics.txt:MTWV = 0.0040, THRESHOLD = 0.491 +exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/eval_oov_kws_15/metrics.txt:MTWV = 0.0032, THRESHOLD = 0.867 +exp_bnf/tri7_nnet/decode_dev10h.pem/eval_oov_kws_15/metrics.txt:MTWV = 0.0039, THRESHOLD = 0.105 + +############################################################################################################################ + diff --git a/egs/babel/s5d/results/kws_results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 b/egs/babel/s5d/results/kws_results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 new file mode 100644 index 00000000000..1fdad0615e1 --- /dev/null +++ b/egs/babel/s5d/results/kws_results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-03-31T11:34:24-04:00 @@ -0,0 +1,211 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:00:20-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4114 OTWV=0.5171 STWV=0.6713 MTWV=0.4128 THRESHOLD=0.453 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3606 , #FA=1935 , #Miss=2988 , Contributed ATWV= 0.4114, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4121 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4639 OTWV=0.5790 STWV=0.7779 MTWV=0.4639 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3948 , #FA=2450 , #Miss=2646 , Contributed ATWV= 0.4639, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4646 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4670 OTWV=0.5932 STWV=0.7799 MTWV=0.4685 THRESHOLD=0.453 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3914 , #FA=2016 , #Miss=2680 , Contributed ATWV= 0.4670, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4677 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4940 OTWV=0.6072 STWV=0.7751 MTWV=0.4940 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4297 , #FA=2623 , #Miss=2297 , Contributed ATWV= 0.4940, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4948 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4970 OTWV=0.6016 STWV=0.7837 MTWV=0.4985 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4145 , #FA=2538 , #Miss=2449 , Contributed ATWV= 0.4970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4977 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.5174 OTWV=0.6324 STWV=0.7958 MTWV=0.5183 THRESHOLD=0.433 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4312 , #FA=2156 , #Miss=2282 , Contributed ATWV= 0.5174, Best Possible Contributed ATWV= 0.9984, ATWV= 0.5182 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), for the set [kwlist2] evaluated on 2016-03-31T12:00:28-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4371 OTWV=0.5527 STWV=0.6904 MTWV=0.4372 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=7695 , #FA=8671 , #Miss=6784 , Contributed ATWV= 0.4356, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4423 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=32 , #Miss=50 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.4822 OTWV=0.6082 STWV=0.7912 MTWV=0.4822 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8278 , #FA=9303 , #Miss=6201 , Contributed ATWV= 0.4808, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4882 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=12 , #FA=60 , #Miss=48 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0924 +ATWV=0.4920 OTWV=0.6156 STWV=0.7891 MTWV=0.4920 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8698 , #FA=10346, #Miss=5781 , Contributed ATWV= 0.4913, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4989 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=59 , #Miss=52 , Contributed ATWV= 0.0006, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0427 +ATWV=0.5006 OTWV=0.6216 STWV=0.7975 MTWV=0.5006 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8552 , #FA=9419 , #Miss=5927 , Contributed ATWV= 0.4992, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5069 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=55 , #Miss=49 , Contributed ATWV= 0.0013, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0873 +ATWV=0.5077 OTWV=0.6291 STWV=0.7819 MTWV=0.5077 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9060 , #FA=10188, #Miss=5419 , Contributed ATWV= 0.5073, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5150 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=7 , #FA=64 , #Miss=53 , Contributed ATWV= 0.0005, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0325 +ATWV=0.5203 OTWV=0.6486 STWV=0.7952 MTWV=0.5218 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9144 , #FA=8922 , #Miss=5335 , Contributed ATWV= 0.5191, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5271 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=44 , #Miss=51 , Contributed ATWV= 0.0012, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0821 +# +# KWS Task performance (TWV), for the set [kwlist3] evaluated on 2016-03-31T12:00:40-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3527 OTWV=0.4568 STWV=0.6002 MTWV=0.3537 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=6954 , #FA=5353 , #Miss=7254 , Contributed ATWV= 0.3477, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3778 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=23 , #FA=232 , #Miss=223 , Contributed ATWV= 0.0049, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0605 +ATWV=0.3997 OTWV=0.5121 STWV=0.7021 MTWV=0.4002 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist3_12/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7407 , #FA=5449 , #Miss=6801 , Contributed ATWV= 0.3919, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4259 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=39 , #FA=307 , #Miss=207 , Contributed ATWV= 0.0076, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0939 +ATWV=0.4102 OTWV=0.5277 STWV=0.7047 MTWV=0.4102 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7795 , #FA=5927 , #Miss=6413 , Contributed ATWV= 0.4033, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4382 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=288 , #Miss=210 , Contributed ATWV= 0.0067, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0822 +ATWV=0.4222 OTWV=0.5278 STWV=0.7066 MTWV=0.4222 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7820 , #FA=5808 , #Miss=6388 , Contributed ATWV= 0.4152, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4511 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=326 , #Miss=210 , Contributed ATWV= 0.0068, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0839 +ATWV=0.4285 OTWV=0.5406 STWV=0.6965 MTWV=0.4286 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8050 , #FA=5500 , #Miss=6158 , Contributed ATWV= 0.4213, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4578 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=34 , #FA=264 , #Miss=212 , Contributed ATWV= 0.0070, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0858 +ATWV=0.4361 OTWV=0.5517 STWV=0.7032 MTWV=0.4361 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8487 , #FA=6339 , #Miss=5721 , Contributed ATWV= 0.4310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4683 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=311 , #Miss=210 , Contributed ATWV= 0.0048, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0594 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:00:53-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2471 OTWV=0.2986 STWV=0.3521 MTWV=0.2471 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1536 , #FA=1187 , #Miss=5058 , Contributed ATWV= 0.2471, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2475 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2738 OTWV=0.3312 STWV=0.3984 MTWV=0.2738 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1588 , #FA=1164 , #Miss=5006 , Contributed ATWV= 0.2738, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2742 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2762 OTWV=0.3345 STWV=0.4011 MTWV=0.2762 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1613 , #FA=1156 , #Miss=4981 , Contributed ATWV= 0.2762, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2766 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2932 OTWV=0.3415 STWV=0.3985 MTWV=0.2981 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1082 , #Miss=4970 , Contributed ATWV= 0.2934, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2938 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2970 OTWV=0.3432 STWV=0.4014 MTWV=0.2970 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1702 , #FA=1132 , #Miss=4892 , Contributed ATWV= 0.2970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2975 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2978 OTWV=0.3444 STWV=0.4035 MTWV=0.2978 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1683 , #FA=1050 , #Miss=4911 , Contributed ATWV= 0.2978, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2983 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:01:05-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2335 OTWV=0.2867 STWV=0.3609 MTWV=0.2337 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/phones/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1443 , #FA=1310 , #Miss=5151 , Contributed ATWV= 0.2336, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2339 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2513 OTWV=0.3174 STWV=0.4034 MTWV=0.2513 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1518 , #FA=1442 , #Miss=5076 , Contributed ATWV= 0.2515, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2519 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2525 OTWV=0.3188 STWV=0.4069 MTWV=0.2583 THRESHOLD=0.444 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1564 , #FA=1489 , #Miss=5030 , Contributed ATWV= 0.2526, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2530 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2575 OTWV=0.3184 STWV=0.3902 MTWV=0.2608 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1544 , #FA=1319 , #Miss=5050 , Contributed ATWV= 0.2575, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2579 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2759 OTWV=0.3294 STWV=0.4067 MTWV=0.2766 THRESHOLD=0.511 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1369 , #Miss=4970 , Contributed ATWV= 0.2760, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2764 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2793 OTWV=0.3306 STWV=0.4042 MTWV=0.2812 THRESHOLD=0.529 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1693 , #FA=1495 , #Miss=4901 , Contributed ATWV= 0.2785, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2790 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:01:23-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2516 OTWV=0.2931 STWV=0.3457 MTWV=0.2518 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1433 , #FA=916 , #Miss=5161 , Contributed ATWV= 0.2516, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2520 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2710 OTWV=0.3243 STWV=0.3971 MTWV=0.2720 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1527 , #FA=1006 , #Miss=5067 , Contributed ATWV= 0.2710, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2715 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2864 OTWV=0.3330 STWV=0.3928 MTWV=0.2864 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1673 , #FA=1135 , #Miss=4921 , Contributed ATWV= 0.2864, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2869 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2874 OTWV=0.3386 STWV=0.4018 MTWV=0.2881 THRESHOLD=0.403 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1591 , #FA=1010 , #Miss=5003 , Contributed ATWV= 0.2874, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2879 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2946 OTWV=0.3463 STWV=0.4046 MTWV=0.2952 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1666 , #FA=1036 , #Miss=4928 , Contributed ATWV= 0.2946, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2951 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist2] evaluated on 2016-03-31T12:01:28-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3298 OTWV=0.4064 STWV=0.4925 MTWV=0.3305 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist2_8/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4881 , #FA=5838 , #Miss=9598 , Contributed ATWV= 0.3281, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3331 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=23 , #Miss=51 , Contributed ATWV= 0.0017, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1105 +ATWV=0.3636 OTWV=0.4527 STWV=0.5672 MTWV=0.3638 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5215 , #FA=6311 , #Miss=9264 , Contributed ATWV= 0.3608, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3663 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=15 , #FA=26 , #Miss=45 , Contributed ATWV= 0.0028, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1873 +ATWV=0.3784 OTWV=0.4622 STWV=0.5703 MTWV=0.3792 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5416 , #FA=6432 , #Miss=9063 , Contributed ATWV= 0.3766, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3824 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=33 , #Miss=49 , Contributed ATWV= 0.0018, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1208 +ATWV=0.3795 OTWV=0.4643 STWV=0.5595 MTWV=0.3795 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5620 , #FA=6171 , #Miss=8859 , Contributed ATWV= 0.3781, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3839 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=29 , #Miss=52 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.3973 OTWV=0.4799 STWV=0.5716 MTWV=0.4011 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5747 , #FA=5988 , #Miss=8732 , Contributed ATWV= 0.3952, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4013 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=26 , #Miss=49 , Contributed ATWV= 0.0020, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1346 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist3] evaluated on 2016-03-31T12:01:38-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2442 OTWV=0.2994 STWV=0.3760 MTWV=0.2442 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3580 , #FA=3520 , #Miss=10628, Contributed ATWV= 0.2378, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2584 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=28 , #FA=145 , #Miss=218 , Contributed ATWV= 0.0064, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0787 +ATWV=0.2681 OTWV=0.3407 STWV=0.4407 MTWV=0.2684 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3688 , #FA=3305 , #Miss=10520, Contributed ATWV= 0.2574, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2797 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=45 , #FA=195 , #Miss=201 , Contributed ATWV= 0.0106, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1306 +ATWV=0.2844 OTWV=0.3499 STWV=0.4441 MTWV=0.2857 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3840 , #FA=3340 , #Miss=10368, Contributed ATWV= 0.2733, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2970 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=44 , #FA=197 , #Miss=202 , Contributed ATWV= 0.0111, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1367 +ATWV=0.2946 OTWV=0.3581 STWV=0.4423 MTWV=0.2948 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3882 , #FA=2874 , #Miss=10326, Contributed ATWV= 0.2804, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3047 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=138 , #Miss=193 , Contributed ATWV= 0.0142, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1751 +ATWV=0.2958 OTWV=0.3658 STWV=0.4485 MTWV=0.2988 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=4068 , #FA=3344 , #Miss=10140, Contributed ATWV= 0.2835, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3081 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=47 , #FA=136 , #Miss=199 , Contributed ATWV= 0.0122, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1504 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:01:55-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1869 OTWV=0.2380 STWV=0.3024 MTWV=0.1869 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1180 , #FA=1168 , #Miss=5414 , Contributed ATWV= 0.1870, Best Possible Contributed ATWV= 0.9984, ATWV= 0.1873 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2043 OTWV=0.2598 STWV=0.3427 MTWV=0.2043 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1281 , #FA=1263 , #Miss=5313 , Contributed ATWV= 0.2045, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2048 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2055 OTWV=0.2591 STWV=0.3340 MTWV=0.2055 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1267 , #FA=1206 , #Miss=5327 , Contributed ATWV= 0.2057, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2060 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=5 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1379 +ATWV=0.2123 OTWV=0.2766 STWV=0.3581 MTWV=0.2149 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1333 , #FA=1274 , #Miss=5261 , Contributed ATWV= 0.2125, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2128 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2216 OTWV=0.2852 STWV=0.3565 MTWV=0.2240 THRESHOLD=0.403 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1371 , #FA=1067 , #Miss=5223 , Contributed ATWV= 0.2209, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2213 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=2 , #Miss=1 , Contributed ATWV= 0.0007, Best Possible Contributed ATWV= 0.0016, ATWV= 0.4448 +ATWV=0.2532 OTWV=0.3121 STWV=0.3808 MTWV=0.2539 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1547 , #FA=1310 , #Miss=5047 , Contributed ATWV= 0.2524, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2528 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist2] evaluated on 2016-03-31T12:02:01-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2686 OTWV=0.3459 STWV=0.4328 MTWV=0.2690 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=3870 , #FA=5258 , #Miss=10609, Contributed ATWV= 0.2670, Best Possible Contributed ATWV= 0.9849, ATWV= 0.2711 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=42 , #Miss=50 , Contributed ATWV= 0.0016, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1055 +ATWV=0.3044 OTWV=0.3970 STWV=0.5154 MTWV=0.3044 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4285 , #FA=5644 , #Miss=10194, Contributed ATWV= 0.3011, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3057 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=16 , #FA=54 , #Miss=44 , Contributed ATWV= 0.0033, Best Possible Contributed ATWV= 0.0151, ATWV= 0.2152 +ATWV=0.3073 OTWV=0.3944 STWV=0.4998 MTWV=0.3079 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch2/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4457 , #FA=6120 , #Miss=10022, Contributed ATWV= 0.3051, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3098 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=13 , #FA=55 , #Miss=47 , Contributed ATWV= 0.0022, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1487 +ATWV=0.3092 OTWV=0.4100 STWV=0.5226 MTWV=0.3125 THRESHOLD=0.465 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4431 , #FA=5723 , #Miss=10048, Contributed ATWV= 0.3078, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3125 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=69 , #Miss=49 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0977 +ATWV=0.3280 OTWV=0.4225 STWV=0.5216 MTWV=0.3291 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4940 , #FA=6266 , #Miss=9539 , Contributed ATWV= 0.3266, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3316 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=63 , #Miss=50 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0911 +ATWV=0.3586 OTWV=0.4552 STWV=0.5519 MTWV=0.3614 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5261 , #FA=6266 , #Miss=9218 , Contributed ATWV= 0.3563, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3618 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=14 , #FA=67 , #Miss=46 , Contributed ATWV= 0.0023, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1531 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist3] evaluated on 2016-03-31T12:02:11-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1931 OTWV=0.2569 STWV=0.3444 MTWV=0.1931 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist3_9/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3015 , #FA=3772 , #Miss=11193, Contributed ATWV= 0.1875, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2037 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=33 , #FA=303 , #Miss=213 , Contributed ATWV= 0.0062, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0759 +ATWV=0.2228 OTWV=0.2982 STWV=0.4154 MTWV=0.2231 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3232 , #FA=3853 , #Miss=10976, Contributed ATWV= 0.2092, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2273 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=57 , #FA=332 , #Miss=189 , Contributed ATWV= 0.0141, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1738 +ATWV=0.2247 OTWV=0.2962 STWV=0.4001 MTWV=0.2247 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3131 , #FA=3232 , #Miss=11077, Contributed ATWV= 0.2122, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2306 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=48 , #FA=278 , #Miss=198 , Contributed ATWV= 0.0131, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1606 +ATWV=0.2320 OTWV=0.3081 STWV=0.4229 MTWV=0.2326 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3378 , #FA=3831 , #Miss=10830, Contributed ATWV= 0.2194, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2384 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=299 , #Miss=193 , Contributed ATWV= 0.0126, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1544 +ATWV=0.2474 OTWV=0.3186 STWV=0.4206 MTWV=0.2476 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3615 , #FA=3812 , #Miss=10593, Contributed ATWV= 0.2310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2510 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=63 , #FA=306 , #Miss=183 , Contributed ATWV= 0.0165, Best Possible Contributed ATWV= 0.0814, ATWV= 0.2023 +ATWV=0.2668 OTWV=0.3433 STWV=0.4457 MTWV=0.2668 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3825 , #FA=3913 , #Miss=10383, Contributed ATWV= 0.2535, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2755 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=59 , #FA=305 , #Miss=187 , Contributed ATWV= 0.0138, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1699 diff --git a/egs/babel/s5d/results/kws_results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 b/egs/babel/s5d/results/kws_results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 new file mode 100644 index 00000000000..1bbdc7dc33a --- /dev/null +++ b/egs/babel/s5d/results/kws_results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:04:03-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:04:48-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4230 OTWV=0.5203 STWV=0.6189 MTWV=0.4235 THRESHOLD=0.473 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6311 , #FA=6437 , #Miss=5805 , Contributed ATWV= 0.4023, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4691 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=89 , #FA=402 , #Miss=391 , Contributed ATWV= 0.0206, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1450 +ATWV=0.4491 OTWV=0.5597 STWV=0.7023 MTWV=0.4494 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6205 , #FA=5950 , #Miss=5911 , Contributed ATWV= 0.4196, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4893 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=107 , #FA=429 , #Miss=373 , Contributed ATWV= 0.0295, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2070 +ATWV=0.4529 OTWV=0.5702 STWV=0.7084 MTWV=0.4529 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6656 , #FA=7401 , #Miss=5460 , Contributed ATWV= 0.4228, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4929 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=120 , #FA=600 , #Miss=360 , Contributed ATWV= 0.0301, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2113 +ATWV=0.4606 OTWV=0.5758 STWV=0.7195 MTWV=0.4606 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6612 , #FA=6706 , #Miss=5504 , Contributed ATWV= 0.4292, Best Possible Contributed ATWV= 0.8576, ATWV= 0.5004 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=118 , #FA=517 , #Miss=362 , Contributed ATWV= 0.0314, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2204 +ATWV=0.4728 OTWV=0.5842 STWV=0.7081 MTWV=0.4728 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=6938 , #FA=6197 , #Miss=5178 , Contributed ATWV= 0.4482, Best Possible Contributed ATWV= 0.8576, ATWV= 0.5226 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=96 , #FA=415 , #Miss=384 , Contributed ATWV= 0.0246, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1729 +ATWV=0.4845 OTWV=0.5929 STWV=0.7193 MTWV=0.4847 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=7106 , #FA=6592 , #Miss=5010 , Contributed ATWV= 0.4522, Best Possible Contributed ATWV= 0.8576, ATWV= 0.5273 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=116 , #FA=464 , #Miss=364 , Contributed ATWV= 0.0322, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2264 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:05:02-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3898 OTWV=0.4933 STWV=0.6145 MTWV=0.3899 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4970 , #FA=6619 , #Miss=7146 , Contributed ATWV= 0.3630, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4232 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=99 , #FA=372 , #Miss=381 , Contributed ATWV= 0.0268, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1882 +ATWV=0.4031 OTWV=0.5200 STWV=0.6682 MTWV=0.4031 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5121 , #FA=7137 , #Miss=6995 , Contributed ATWV= 0.3719, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4336 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=114 , #FA=463 , #Miss=366 , Contributed ATWV= 0.0312, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2194 +ATWV=0.4084 OTWV=0.5225 STWV=0.6694 MTWV=0.4094 THRESHOLD=0.465 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5236 , #FA=6998 , #Miss=6880 , Contributed ATWV= 0.3785, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4413 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=108 , #FA=459 , #Miss=372 , Contributed ATWV= 0.0299, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2103 +ATWV=0.4168 OTWV=0.5258 STWV=0.6705 MTWV=0.4171 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5251 , #FA=6798 , #Miss=6865 , Contributed ATWV= 0.3850, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4489 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=116 , #FA=437 , #Miss=364 , Contributed ATWV= 0.0318, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2231 +ATWV=0.4202 OTWV=0.5321 STWV=0.6687 MTWV=0.4209 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5526 , #FA=7152 , #Miss=6590 , Contributed ATWV= 0.3947, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4602 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=92 , #FA=418 , #Miss=388 , Contributed ATWV= 0.0254, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1788 +ATWV=0.4298 OTWV=0.5434 STWV=0.6798 MTWV=0.4315 THRESHOLD=0.453 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5351 , #FA=6564 , #Miss=6765 , Contributed ATWV= 0.3971, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4630 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=116 , #FA=433 , #Miss=364 , Contributed ATWV= 0.0327, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2296 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:05:15-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3717 OTWV=0.4826 STWV=0.6206 MTWV=0.3717 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4873 , #FA=7400 , #Miss=7243 , Contributed ATWV= 0.3453, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4026 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=102 , #FA=464 , #Miss=378 , Contributed ATWV= 0.0264, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1855 +ATWV=0.3794 OTWV=0.4921 STWV=0.6366 MTWV=0.3794 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4976 , #FA=7283 , #Miss=7140 , Contributed ATWV= 0.3504, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4086 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=107 , #FA=475 , #Miss=373 , Contributed ATWV= 0.0290, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2039 +ATWV=0.3803 OTWV=0.4989 STWV=0.6527 MTWV=0.3811 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4802 , #FA=7174 , #Miss=7314 , Contributed ATWV= 0.3507, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4089 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=108 , #FA=485 , #Miss=372 , Contributed ATWV= 0.0296, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2079 +ATWV=0.3865 OTWV=0.5032 STWV=0.6597 MTWV=0.3865 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5057 , #FA=7617 , #Miss=7059 , Contributed ATWV= 0.3569, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4161 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=111 , #FA=541 , #Miss=369 , Contributed ATWV= 0.0297, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2085 +ATWV=0.3987 OTWV=0.5141 STWV=0.6609 MTWV=0.4000 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5421 , #FA=7991 , #Miss=6695 , Contributed ATWV= 0.3758, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4382 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=89 , #FA=545 , #Miss=391 , Contributed ATWV= 0.0229, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1606 +ATWV=0.4089 OTWV=0.5226 STWV=0.6702 MTWV=0.4089 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5226 , #FA=7295 , #Miss=6890 , Contributed ATWV= 0.3793, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4423 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=109 , #FA=490 , #Miss=371 , Contributed ATWV= 0.0296, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2077 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:05:32-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3846 OTWV=0.4898 STWV=0.6140 MTWV=0.3849 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4698 , #FA=6363 , #Miss=7418 , Contributed ATWV= 0.3348, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3904 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=169 , #FA=376 , #Miss=311 , Contributed ATWV= 0.0498, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3497 +ATWV=0.4084 OTWV=0.5296 STWV=0.6808 MTWV=0.4084 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5112 , #FA=7507 , #Miss=7004 , Contributed ATWV= 0.3551, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4140 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=188 , #FA=531 , #Miss=292 , Contributed ATWV= 0.0533, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3747 +ATWV=0.4147 OTWV=0.5426 STWV=0.6942 MTWV=0.4164 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4903 , #FA=6600 , #Miss=7213 , Contributed ATWV= 0.3565, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4157 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=197 , #FA=506 , #Miss=283 , Contributed ATWV= 0.0582, Best Possible Contributed ATWV= 0.1424, ATWV= 0.4086 +ATWV=0.4205 OTWV=0.5421 STWV=0.6920 MTWV=0.4207 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5179 , #FA=7269 , #Miss=6937 , Contributed ATWV= 0.3621, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4222 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=199 , #FA=547 , #Miss=281 , Contributed ATWV= 0.0584, Best Possible Contributed ATWV= 0.1424, ATWV= 0.4099 +ATWV=0.4386 OTWV=0.5595 STWV=0.7003 MTWV=0.4400 THRESHOLD=0.484 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5245 , #FA=6308 , #Miss=6871 , Contributed ATWV= 0.3822, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4456 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=187 , #FA=418 , #Miss=293 , Contributed ATWV= 0.0564, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3962 +ATWV=0.4394 OTWV=0.5585 STWV=0.6927 MTWV=0.4397 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=5554 , #FA=7486 , #Miss=6562 , Contributed ATWV= 0.3789, Best Possible Contributed ATWV= 0.8576, ATWV= 0.4418 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=207 , #FA=548 , #Miss=273 , Contributed ATWV= 0.0604, Best Possible Contributed ATWV= 0.1424, ATWV= 0.4246 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:05:47-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1595 OTWV=0.2619 STWV=0.3850 MTWV=0.1602 THRESHOLD=0.503 exp/nnet3/tdnn_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=1949 , #FA=3706 , #Miss=10167, Contributed ATWV= 0.1404, Best Possible Contributed ATWV= 0.8576, ATWV= 0.1637 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=74 , #FA=313 , #Miss=406 , Contributed ATWV= 0.0190, Best Possible Contributed ATWV= 0.1424, ATWV= 0.1338 +ATWV=0.3032 OTWV=0.4062 STWV=0.5289 MTWV=0.3032 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=3866 , #FA=6132 , #Miss=8250 , Contributed ATWV= 0.2606, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3039 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=149 , #FA=450 , #Miss=331 , Contributed ATWV= 0.0425, Best Possible Contributed ATWV= 0.1424, ATWV= 0.2988 +ATWV=0.3355 OTWV=0.4619 STWV=0.6238 MTWV=0.3355 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4070 , #FA=6524 , #Miss=8046 , Contributed ATWV= 0.2849, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3322 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=176 , #FA=559 , #Miss=304 , Contributed ATWV= 0.0506, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3553 +ATWV=0.3368 OTWV=0.4568 STWV=0.6010 MTWV=0.3403 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=3997 , #FA=5729 , #Miss=8119 , Contributed ATWV= 0.2888, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3367 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=169 , #FA=513 , #Miss=311 , Contributed ATWV= 0.0480, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3371 +ATWV=0.3690 OTWV=0.4945 STWV=0.6419 MTWV=0.3701 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4757 , #FA=7077 , #Miss=7359 , Contributed ATWV= 0.3202, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3734 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=169 , #FA=521 , #Miss=311 , Contributed ATWV= 0.0488, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3428 +ATWV=0.3782 OTWV=0.4916 STWV=0.6313 MTWV=0.3786 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1976 , #Targ=12116, #Corr=4496 , #FA=6091 , #Miss=7620 , Contributed ATWV= 0.3256, Best Possible Contributed ATWV= 0.8576, ATWV= 0.3797 + OOV=1 #Keywords=328 , #Targ=480 , #Corr=183 , #FA=480 , #Miss=297 , Contributed ATWV= 0.0525, Best Possible Contributed ATWV= 0.1424, ATWV= 0.3691 diff --git a/egs/babel/s5d/results/kws_results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 b/egs/babel/s5d/results/kws_results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 new file mode 100644 index 00000000000..f218056412a --- /dev/null +++ b/egs/babel/s5d/results/kws_results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:12:45-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:13:21-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2072 OTWV=0.3242 STWV=0.4752 MTWV=0.2072 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4225 , #FA=6109 , #Miss=10665, Contributed ATWV= 0.2011, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2290 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=38 , #FA=404 , #Miss=388 , Contributed ATWV= 0.0061, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0500 +ATWV=0.2234 OTWV=0.3660 STWV=0.5806 MTWV=0.2244 THRESHOLD=0.473 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4305 , #FA=6650 , #Miss=10585, Contributed ATWV= 0.2101, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2393 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=62 , #FA=515 , #Miss=364 , Contributed ATWV= 0.0133, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1094 +ATWV=0.2386 OTWV=0.3711 STWV=0.5954 MTWV=0.2386 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4267 , #FA=5993 , #Miss=10623, Contributed ATWV= 0.2234, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2544 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=66 , #FA=446 , #Miss=360 , Contributed ATWV= 0.0152, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1243 +ATWV=0.2461 OTWV=0.3869 STWV=0.6098 MTWV=0.2469 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=4423 , #FA=5883 , #Miss=10467, Contributed ATWV= 0.2327, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2651 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=63 , #FA=453 , #Miss=363 , Contributed ATWV= 0.0134, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1098 +ATWV=0.2654 OTWV=0.4100 STWV=0.6005 MTWV=0.2672 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=5273 , #FA=6428 , #Miss=9617 , Contributed ATWV= 0.2495, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2842 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=68 , #FA=450 , #Miss=358 , Contributed ATWV= 0.0159, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1302 +ATWV=0.2681 OTWV=0.4076 STWV=0.6090 MTWV=0.2697 THRESHOLD=0.473 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=5143 , #FA=6378 , #Miss=9747 , Contributed ATWV= 0.2519, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2868 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=69 , #FA=462 , #Miss=357 , Contributed ATWV= 0.0163, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1335 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:13:34-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1796 OTWV=0.2970 STWV=0.5017 MTWV=0.1796 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2117 , #FA=5124 , #Miss=12773, Contributed ATWV= 0.1716, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1954 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=45 , #FA=519 , #Miss=381 , Contributed ATWV= 0.0080, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0658 +ATWV=0.1946 OTWV=0.3201 STWV=0.5540 MTWV=0.1946 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2218 , #FA=5557 , #Miss=12672, Contributed ATWV= 0.1817, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2069 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=67 , #FA=599 , #Miss=359 , Contributed ATWV= 0.0129, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1059 +ATWV=0.2035 OTWV=0.3267 STWV=0.5663 MTWV=0.2035 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2384 , #FA=6085 , #Miss=12506, Contributed ATWV= 0.1902, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2167 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=70 , #FA=669 , #Miss=356 , Contributed ATWV= 0.0132, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1084 +ATWV=0.2125 OTWV=0.3434 STWV=0.5569 MTWV=0.2147 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2802 , #FA=6994 , #Miss=12088, Contributed ATWV= 0.2032, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2314 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=63 , #FA=692 , #Miss=363 , Contributed ATWV= 0.0092, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0758 +ATWV=0.2146 OTWV=0.3363 STWV=0.5757 MTWV=0.2146 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2243 , #FA=4805 , #Miss=12647, Contributed ATWV= 0.2025, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2306 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=60 , #FA=550 , #Miss=366 , Contributed ATWV= 0.0121, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0991 +ATWV=0.2233 OTWV=0.3537 STWV=0.5753 MTWV=0.2233 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2472 , #FA=5516 , #Miss=12418, Contributed ATWV= 0.2070, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2357 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=72 , #FA=548 , #Miss=354 , Contributed ATWV= 0.0164, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1343 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:13:46-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1757 OTWV=0.2941 STWV=0.5188 MTWV=0.1757 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2109 , #FA=5112 , #Miss=12781, Contributed ATWV= 0.1661, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1892 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=51 , #FA=566 , #Miss=375 , Contributed ATWV= 0.0096, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0787 +ATWV=0.1885 OTWV=0.3156 STWV=0.5586 MTWV=0.1885 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2171 , #FA=5603 , #Miss=12719, Contributed ATWV= 0.1751, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1994 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=72 , #FA=672 , #Miss=354 , Contributed ATWV= 0.0134, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1103 +ATWV=0.1935 OTWV=0.3237 STWV=0.5717 MTWV=0.1935 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2122 , #FA=4885 , #Miss=12768, Contributed ATWV= 0.1811, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2062 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=62 , #FA=595 , #Miss=364 , Contributed ATWV= 0.0124, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1019 +ATWV=0.2013 OTWV=0.3267 STWV=0.5641 MTWV=0.2014 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2195 , #FA=4829 , #Miss=12695, Contributed ATWV= 0.1891, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2153 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=60 , #FA=592 , #Miss=366 , Contributed ATWV= 0.0123, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1005 +ATWV=0.2087 OTWV=0.3368 STWV=0.5610 MTWV=0.2087 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2348 , #FA=5077 , #Miss=12542, Contributed ATWV= 0.1967, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2240 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=63 , #FA=579 , #Miss=363 , Contributed ATWV= 0.0120, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0984 +ATWV=0.2116 OTWV=0.3465 STWV=0.5804 MTWV=0.2116 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2398 , #FA=5507 , #Miss=12492, Contributed ATWV= 0.1960, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2232 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=69 , #FA=609 , #Miss=357 , Contributed ATWV= 0.0156, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1279 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:14:01-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1708 OTWV=0.2816 STWV=0.4795 MTWV=0.1711 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1913 , #FA=4978 , #Miss=12977, Contributed ATWV= 0.1615, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1839 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=50 , #FA=507 , #Miss=376 , Contributed ATWV= 0.0093, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0764 +ATWV=0.1926 OTWV=0.3156 STWV=0.5617 MTWV=0.1926 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1964 , #FA=4856 , #Miss=12926, Contributed ATWV= 0.1751, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1994 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=79 , #FA=566 , #Miss=347 , Contributed ATWV= 0.0176, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1442 +ATWV=0.1985 OTWV=0.3240 STWV=0.5820 MTWV=0.1985 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1945 , #FA=4804 , #Miss=12945, Contributed ATWV= 0.1794, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2044 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=82 , #FA=557 , #Miss=344 , Contributed ATWV= 0.0191, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1563 +ATWV=0.2054 OTWV=0.3342 STWV=0.5882 MTWV=0.2054 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch3/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2125 , #FA=5218 , #Miss=12765, Contributed ATWV= 0.1875, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2135 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=80 , #FA=626 , #Miss=346 , Contributed ATWV= 0.0179, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1466 +ATWV=0.2126 OTWV=0.3434 STWV=0.5827 MTWV=0.2126 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2207 , #FA=4958 , #Miss=12683, Contributed ATWV= 0.1920, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2186 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=81 , #FA=521 , #Miss=345 , Contributed ATWV= 0.0206, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1692 +ATWV=0.2148 OTWV=0.3452 STWV=0.5808 MTWV=0.2148 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2497 , #FA=6035 , #Miss=12393, Contributed ATWV= 0.1978, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2252 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=79 , #FA=661 , #Miss=347 , Contributed ATWV= 0.0170, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1397 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:14:14-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1379 OTWV=0.2528 STWV=0.4632 MTWV=0.1385 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1422 , #FA=3603 , #Miss=13468, Contributed ATWV= 0.1268, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1444 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=50 , #FA=431 , #Miss=376 , Contributed ATWV= 0.0111, Best Possible Contributed ATWV= 0.1219, ATWV= 0.0907 +ATWV=0.1718 OTWV=0.2920 STWV=0.5386 MTWV=0.1718 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1789 , #FA=4843 , #Miss=13101, Contributed ATWV= 0.1564, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1781 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=71 , #FA=619 , #Miss=355 , Contributed ATWV= 0.0153, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1258 +ATWV=0.1754 OTWV=0.3002 STWV=0.5589 MTWV=0.1754 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1764 , #FA=4651 , #Miss=13126, Contributed ATWV= 0.1573, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1791 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=78 , #FA=592 , #Miss=348 , Contributed ATWV= 0.0181, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1484 +ATWV=0.1768 OTWV=0.3015 STWV=0.5378 MTWV=0.1768 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1823 , #FA=4605 , #Miss=13067, Contributed ATWV= 0.1624, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1849 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=68 , #FA=613 , #Miss=358 , Contributed ATWV= 0.0145, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1188 +ATWV=0.1851 OTWV=0.3170 STWV=0.5671 MTWV=0.1853 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=1990 , #FA=4815 , #Miss=12900, Contributed ATWV= 0.1680, Best Possible Contributed ATWV= 0.8781, ATWV= 0.1913 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=74 , #FA=576 , #Miss=352 , Contributed ATWV= 0.0171, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1406 +ATWV=0.1973 OTWV=0.3276 STWV=0.5722 MTWV=0.1981 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2031 , #Targ=14890, #Corr=2073 , #FA=4741 , #Miss=12817, Contributed ATWV= 0.1803, Best Possible Contributed ATWV= 0.8781, ATWV= 0.2053 + OOV=1 #Keywords=282 , #Targ=426 , #Corr=77 , #FA=601 , #Miss=349 , Contributed ATWV= 0.0170, Best Possible Contributed ATWV= 0.1219, ATWV= 0.1395 diff --git a/egs/babel/s5d/results/kws_results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 b/egs/babel/s5d/results/kws_results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 new file mode 100644 index 00000000000..4e20fac4f56 --- /dev/null +++ b/egs/babel/s5d/results/kws_results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:21:34-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:22:17-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4218 OTWV=0.5044 STWV=0.5838 MTWV=0.4218 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=5437 , #FA=4638 , #Miss=5683 , Contributed ATWV= 0.4123, Best Possible Contributed ATWV= 0.8474, ATWV= 0.4865 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=29 , #FA=122 , #Miss=445 , Contributed ATWV= 0.0096, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0627 +ATWV=0.4619 OTWV=0.5643 STWV=0.6680 MTWV=0.4626 THRESHOLD=0.465 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=6171 , #FA=5956 , #Miss=4949 , Contributed ATWV= 0.4498, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5308 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=41 , #FA=214 , #Miss=433 , Contributed ATWV= 0.0121, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0791 +ATWV=0.4641 OTWV=0.5581 STWV=0.6612 MTWV=0.4641 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=5856 , #FA=4921 , #Miss=5264 , Contributed ATWV= 0.4543, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5361 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=33 , #FA=191 , #Miss=441 , Contributed ATWV= 0.0098, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0644 +ATWV=0.4733 OTWV=0.5691 STWV=0.6747 MTWV=0.4733 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=5952 , #FA=4840 , #Miss=5168 , Contributed ATWV= 0.4608, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5438 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=38 , #FA=188 , #Miss=436 , Contributed ATWV= 0.0125, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0818 +ATWV=0.4843 OTWV=0.5738 STWV=0.6585 MTWV=0.4847 THRESHOLD=0.484 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=6469 , #FA=5159 , #Miss=4651 , Contributed ATWV= 0.4745, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5599 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=34 , #FA=141 , #Miss=440 , Contributed ATWV= 0.0098, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0644 +ATWV=0.4867 OTWV=0.5849 STWV=0.6767 MTWV=0.4879 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=6428 , #FA=4944 , #Miss=4692 , Contributed ATWV= 0.4746, Best Possible Contributed ATWV= 0.8474, ATWV= 0.5601 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=37 , #FA=170 , #Miss=437 , Contributed ATWV= 0.0121, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0792 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:22:31-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2659 OTWV=0.3048 STWV=0.3360 MTWV=0.2665 THRESHOLD=0.444 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/syllabs/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2986 , #FA=2225 , #Miss=8134 , Contributed ATWV= 0.2583, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3048 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=25 , #FA=85 , #Miss=449 , Contributed ATWV= 0.0077, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0502 +ATWV=0.2858 OTWV=0.3350 STWV=0.3711 MTWV=0.2885 THRESHOLD=0.424 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3140 , #FA=2218 , #Miss=7980 , Contributed ATWV= 0.2774, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3274 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=27 , #FA=87 , #Miss=447 , Contributed ATWV= 0.0084, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0548 +ATWV=0.2876 OTWV=0.3334 STWV=0.3663 MTWV=0.2912 THRESHOLD=0.424 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3114 , #FA=2121 , #Miss=8006 , Contributed ATWV= 0.2819, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3327 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=20 , #FA=103 , #Miss=454 , Contributed ATWV= 0.0057, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0371 +ATWV=0.2912 OTWV=0.3367 STWV=0.3742 MTWV=0.2921 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3132 , #FA=2210 , #Miss=7988 , Contributed ATWV= 0.2844, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3357 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=22 , #FA=105 , #Miss=452 , Contributed ATWV= 0.0067, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0441 +ATWV=0.2984 OTWV=0.3436 STWV=0.3773 MTWV=0.2984 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3360 , #FA=2233 , #Miss=7760 , Contributed ATWV= 0.2906, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3429 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=26 , #FA=76 , #Miss=448 , Contributed ATWV= 0.0078, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0511 +ATWV=0.3002 OTWV=0.3415 STWV=0.3713 MTWV=0.3010 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3335 , #FA=2039 , #Miss=7785 , Contributed ATWV= 0.2924, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3451 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=28 , #FA=67 , #Miss=446 , Contributed ATWV= 0.0078, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0511 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:22:44-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2620 OTWV=0.3049 STWV=0.3381 MTWV=0.2624 THRESHOLD=0.444 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2896 , #FA=2268 , #Miss=8224 , Contributed ATWV= 0.2505, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2956 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=36 , #FA=98 , #Miss=438 , Contributed ATWV= 0.0115, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0756 +ATWV=0.2714 OTWV=0.3185 STWV=0.3513 MTWV=0.2753 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2951 , #FA=2075 , #Miss=8169 , Contributed ATWV= 0.2618, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3090 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=31 , #FA=101 , #Miss=443 , Contributed ATWV= 0.0096, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0632 +ATWV=0.2714 OTWV=0.3245 STWV=0.3588 MTWV=0.2765 THRESHOLD=0.365 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2956 , #FA=2157 , #Miss=8164 , Contributed ATWV= 0.2619, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3091 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=32 , #FA=106 , #Miss=442 , Contributed ATWV= 0.0095, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0626 +ATWV=0.2755 OTWV=0.3278 STWV=0.3617 MTWV=0.2787 THRESHOLD=0.41 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3044 , #FA=2236 , #Miss=8076 , Contributed ATWV= 0.2639, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3115 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=38 , #FA=104 , #Miss=436 , Contributed ATWV= 0.0116, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0761 +ATWV=0.2876 OTWV=0.3347 STWV=0.3658 MTWV=0.2941 THRESHOLD=0.41 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3271 , #FA=2252 , #Miss=7849 , Contributed ATWV= 0.2787, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3289 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=31 , #FA=80 , #Miss=443 , Contributed ATWV= 0.0089, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0582 +ATWV=0.2935 OTWV=0.3400 STWV=0.3723 MTWV=0.2953 THRESHOLD=0.473 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3273 , #FA=2245 , #Miss=7847 , Contributed ATWV= 0.2818, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3325 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=38 , #FA=104 , #Miss=436 , Contributed ATWV= 0.0117, Best Possible Contributed ATWV= 0.1526, ATWV= 0.0768 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:23:03-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2646 OTWV=0.3049 STWV=0.3331 MTWV=0.2666 THRESHOLD=0.453 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it3/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2763 , #FA=2036 , #Miss=8357 , Contributed ATWV= 0.2363, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2789 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=86 , #FA=77 , #Miss=388 , Contributed ATWV= 0.0283, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1853 +ATWV=0.3019 OTWV=0.3543 STWV=0.3889 MTWV=0.3067 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2933 , #FA=2017 , #Miss=8187 , Contributed ATWV= 0.2623, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3096 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=120 , #FA=120 , #Miss=354 , Contributed ATWV= 0.0395, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2589 +ATWV=0.3102 OTWV=0.3595 STWV=0.3949 MTWV=0.3138 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2986 , #FA=1904 , #Miss=8134 , Contributed ATWV= 0.2695, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3180 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=123 , #FA=124 , #Miss=351 , Contributed ATWV= 0.0407, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2666 +ATWV=0.3108 OTWV=0.3586 STWV=0.3933 MTWV=0.3121 THRESHOLD=0.424 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3021 , #FA=2041 , #Miss=8099 , Contributed ATWV= 0.2674, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3156 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=136 , #FA=103 , #Miss=338 , Contributed ATWV= 0.0434, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2841 +ATWV=0.3149 OTWV=0.3630 STWV=0.3931 MTWV=0.3198 THRESHOLD=0.399 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3228 , #FA=1870 , #Miss=7892 , Contributed ATWV= 0.2780, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3280 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=118 , #FA=93 , #Miss=356 , Contributed ATWV= 0.0369, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2420 +ATWV=0.3200 OTWV=0.3670 STWV=0.3985 MTWV=0.3222 THRESHOLD=0.403 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=3204 , #FA=2050 , #Miss=7916 , Contributed ATWV= 0.2783, Best Possible Contributed ATWV= 0.8474, ATWV= 0.3285 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=132 , #FA=113 , #Miss=342 , Contributed ATWV= 0.0416, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2729 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:23:18-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1461 OTWV=0.1765 STWV=0.1935 MTWV=0.1477 THRESHOLD=0.444 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1504 , #FA=1378 , #Miss=9616 , Contributed ATWV= 0.1281, Best Possible Contributed ATWV= 0.8474, ATWV= 0.1512 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=58 , #FA=79 , #Miss=416 , Contributed ATWV= 0.0180, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1178 +ATWV=0.1866 OTWV=0.2378 STWV=0.2636 MTWV=0.1962 THRESHOLD=0.386 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1677 , #FA=1381 , #Miss=9443 , Contributed ATWV= 0.1586, Best Possible Contributed ATWV= 0.8474, ATWV= 0.1872 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=89 , #FA=93 , #Miss=385 , Contributed ATWV= 0.0281, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1838 +ATWV=0.1946 OTWV=0.2484 STWV=0.2754 MTWV=0.2051 THRESHOLD=0.399 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1704 , #FA=1422 , #Miss=9416 , Contributed ATWV= 0.1643, Best Possible Contributed ATWV= 0.8474, ATWV= 0.1939 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=96 , #FA=110 , #Miss=378 , Contributed ATWV= 0.0303, Best Possible Contributed ATWV= 0.1526, ATWV= 0.1986 +ATWV=0.2026 OTWV=0.2545 STWV=0.2817 MTWV=0.2089 THRESHOLD=0.41 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=1821 , #FA=1441 , #Miss=9299 , Contributed ATWV= 0.1707, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2014 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=105 , #FA=121 , #Miss=369 , Contributed ATWV= 0.0319, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2088 +ATWV=0.2288 OTWV=0.2860 STWV=0.3121 MTWV=0.2409 THRESHOLD=0.328 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2169 , #FA=1525 , #Miss=8951 , Contributed ATWV= 0.1938, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2287 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=107 , #FA=86 , #Miss=367 , Contributed ATWV= 0.0350, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2292 +ATWV=0.2408 OTWV=0.2959 STWV=0.3216 MTWV=0.2512 THRESHOLD=0.345 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=1943 , #Targ=11120, #Corr=2322 , #FA=1553 , #Miss=8798 , Contributed ATWV= 0.2054, Best Possible Contributed ATWV= 0.8474, ATWV= 0.2423 + OOV=1 #Keywords=350 , #Targ=474 , #Corr=109 , #FA=92 , #Miss=365 , Contributed ATWV= 0.0355, Best Possible Contributed ATWV= 0.1526, ATWV= 0.2323 diff --git a/egs/babel/s5d/results/kws_results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 b/egs/babel/s5d/results/kws_results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 new file mode 100644 index 00000000000..792b9ca097d --- /dev/null +++ b/egs/babel/s5d/results/kws_results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:25:02-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:25:49-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3254 OTWV=0.4191 STWV=0.5168 MTWV=0.3254 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5022 , #FA=5294 , #Miss=6940 , Contributed ATWV= 0.3223, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3690 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=26 , #FA=235 , #Miss=491 , Contributed ATWV= 0.0031, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0245 +ATWV=0.3668 OTWV=0.4878 STWV=0.6467 MTWV=0.3672 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5496 , #FA=6611 , #Miss=6466 , Contributed ATWV= 0.3598, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4119 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=48 , #FA=403 , #Miss=469 , Contributed ATWV= 0.0071, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0558 +ATWV=0.3767 OTWV=0.4957 STWV=0.6459 MTWV=0.3767 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5698 , #FA=6556 , #Miss=6264 , Contributed ATWV= 0.3715, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4253 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=40 , #FA=378 , #Miss=477 , Contributed ATWV= 0.0051, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0406 +ATWV=0.3866 OTWV=0.5082 STWV=0.6665 MTWV=0.3866 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5644 , #FA=6329 , #Miss=6318 , Contributed ATWV= 0.3801, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4352 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=48 , #FA=401 , #Miss=469 , Contributed ATWV= 0.0065, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0511 +ATWV=0.4033 OTWV=0.5188 STWV=0.6543 MTWV=0.4034 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=6255 , #FA=6413 , #Miss=5707 , Contributed ATWV= 0.3950, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4522 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=52 , #FA=364 , #Miss=465 , Contributed ATWV= 0.0083, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0655 +ATWV=0.4131 OTWV=0.5198 STWV=0.6353 MTWV=0.4131 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=6396 , #FA=6133 , #Miss=5566 , Contributed ATWV= 0.4068, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4657 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=41 , #FA=330 , #Miss=476 , Contributed ATWV= 0.0063, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0496 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:26:02-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3102 OTWV=0.4044 STWV=0.5008 MTWV=0.3102 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3950 , #FA=5521 , #Miss=8012 , Contributed ATWV= 0.3063, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3506 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=29 , #FA=261 , #Miss=488 , Contributed ATWV= 0.0039, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0311 +ATWV=0.3475 OTWV=0.4589 STWV=0.6057 MTWV=0.3482 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4245 , #FA=6029 , #Miss=7717 , Contributed ATWV= 0.3417, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3912 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=39 , #FA=325 , #Miss=478 , Contributed ATWV= 0.0058, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0461 +ATWV=0.3567 OTWV=0.4704 STWV=0.6093 MTWV=0.3575 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4372 , #FA=6005 , #Miss=7590 , Contributed ATWV= 0.3513, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4022 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=38 , #FA=329 , #Miss=479 , Contributed ATWV= 0.0054, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0429 +ATWV=0.3667 OTWV=0.4738 STWV=0.6193 MTWV=0.3674 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4281 , #FA=5292 , #Miss=7681 , Contributed ATWV= 0.3606, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4128 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=40 , #FA=311 , #Miss=477 , Contributed ATWV= 0.0061, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0481 +ATWV=0.3798 OTWV=0.4888 STWV=0.6133 MTWV=0.3799 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4820 , #FA=5800 , #Miss=7142 , Contributed ATWV= 0.3729, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4269 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=47 , #FA=314 , #Miss=470 , Contributed ATWV= 0.0070, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0551 +ATWV=0.3907 OTWV=0.4929 STWV=0.6005 MTWV=0.3907 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5147 , #FA=6646 , #Miss=6815 , Contributed ATWV= 0.3840, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4397 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=45 , #FA=314 , #Miss=472 , Contributed ATWV= 0.0066, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0523 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:26:15-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2937 OTWV=0.3986 STWV=0.5124 MTWV=0.2937 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4370 , #FA=6963 , #Miss=7592 , Contributed ATWV= 0.2878, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3294 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=40 , #FA=371 , #Miss=477 , Contributed ATWV= 0.0059, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0467 +ATWV=0.3182 OTWV=0.4371 STWV=0.5936 MTWV=0.3182 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4479 , #FA=7039 , #Miss=7483 , Contributed ATWV= 0.3118, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3570 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=44 , #FA=444 , #Miss=473 , Contributed ATWV= 0.0063, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0500 +ATWV=0.3274 OTWV=0.4496 STWV=0.6019 MTWV=0.3282 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4562 , #FA=6505 , #Miss=7400 , Contributed ATWV= 0.3206, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3671 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=42 , #FA=370 , #Miss=475 , Contributed ATWV= 0.0068, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0538 +ATWV=0.3444 OTWV=0.4580 STWV=0.6076 MTWV=0.3446 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4491 , #FA=6127 , #Miss=7471 , Contributed ATWV= 0.3361, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3848 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=49 , #FA=396 , #Miss=468 , Contributed ATWV= 0.0083, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0657 +ATWV=0.3515 OTWV=0.4684 STWV=0.6055 MTWV=0.3520 THRESHOLD=0.465 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4966 , #FA=6072 , #Miss=6996 , Contributed ATWV= 0.3427, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3923 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=51 , #FA=321 , #Miss=466 , Contributed ATWV= 0.0088, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0694 +ATWV=0.3624 OTWV=0.4732 STWV=0.5981 MTWV=0.3624 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5315 , #FA=6787 , #Miss=6647 , Contributed ATWV= 0.3542, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4055 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=51 , #FA=356 , #Miss=466 , Contributed ATWV= 0.0082, Best Possible Contributed ATWV= 0.1265, ATWV= 0.0645 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:26:32-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2989 OTWV=0.3850 STWV=0.4696 MTWV=0.2989 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3952 , #FA=4049 , #Miss=8010 , Contributed ATWV= 0.2858, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3272 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=62 , #FA=188 , #Miss=455 , Contributed ATWV= 0.0132, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1041 +ATWV=0.3556 OTWV=0.4644 STWV=0.6019 MTWV=0.3556 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4743 , #FA=6603 , #Miss=7219 , Contributed ATWV= 0.3340, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3823 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=101 , #FA=434 , #Miss=416 , Contributed ATWV= 0.0216, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1708 +ATWV=0.3697 OTWV=0.4791 STWV=0.6110 MTWV=0.3711 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4891 , #FA=6482 , #Miss=7071 , Contributed ATWV= 0.3430, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3927 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=117 , #FA=391 , #Miss=400 , Contributed ATWV= 0.0267, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2109 +ATWV=0.3746 OTWV=0.4805 STWV=0.6185 MTWV=0.3746 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4733 , #FA=5416 , #Miss=7229 , Contributed ATWV= 0.3540, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4053 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=97 , #FA=371 , #Miss=420 , Contributed ATWV= 0.0206, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1629 +ATWV=0.3906 OTWV=0.4954 STWV=0.6114 MTWV=0.3933 THRESHOLD=0.453 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5223 , #FA=5433 , #Miss=6739 , Contributed ATWV= 0.3644, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4172 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=113 , #FA=300 , #Miss=404 , Contributed ATWV= 0.0262, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2072 +ATWV=0.4026 OTWV=0.5039 STWV=0.6123 MTWV=0.4026 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=5625 , #FA=6694 , #Miss=6337 , Contributed ATWV= 0.3753, Best Possible Contributed ATWV= 0.8735, ATWV= 0.4296 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=134 , #FA=378 , #Miss=383 , Contributed ATWV= 0.0273, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2160 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:26:46-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2248 OTWV=0.3051 STWV=0.3995 MTWV=0.2248 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3143 , #FA=4419 , #Miss=8819 , Contributed ATWV= 0.2067, Best Possible Contributed ATWV= 0.8735, ATWV= 0.2366 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=87 , #FA=253 , #Miss=430 , Contributed ATWV= 0.0181, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1433 +ATWV=0.2716 OTWV=0.3791 STWV=0.5246 MTWV=0.2724 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3613 , #FA=5664 , #Miss=8349 , Contributed ATWV= 0.2479, Best Possible Contributed ATWV= 0.8735, ATWV= 0.2838 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=110 , #FA=432 , #Miss=407 , Contributed ATWV= 0.0237, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1873 +ATWV=0.2862 OTWV=0.3915 STWV=0.5291 MTWV=0.2874 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=3593 , #FA=5029 , #Miss=8369 , Contributed ATWV= 0.2615, Best Possible Contributed ATWV= 0.8735, ATWV= 0.2993 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=111 , #FA=379 , #Miss=406 , Contributed ATWV= 0.0248, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1959 +ATWV=0.2923 OTWV=0.4062 STWV=0.5508 MTWV=0.2923 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4069 , #FA=6808 , #Miss=7893 , Contributed ATWV= 0.2672, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3059 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=122 , #FA=492 , #Miss=395 , Contributed ATWV= 0.0251, Best Possible Contributed ATWV= 0.1265, ATWV= 0.1987 +ATWV=0.3254 OTWV=0.4319 STWV=0.5579 MTWV=0.3254 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4524 , #FA=6093 , #Miss=7438 , Contributed ATWV= 0.2980, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3411 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=130 , #FA=445 , #Miss=387 , Contributed ATWV= 0.0274, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2169 +ATWV=0.3392 OTWV=0.4519 STWV=0.5786 MTWV=0.3392 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2071 , #Targ=11962, #Corr=4870 , #FA=6666 , #Miss=7092 , Contributed ATWV= 0.3122, Best Possible Contributed ATWV= 0.8735, ATWV= 0.3574 + OOV=1 #Keywords=300 , #Targ=517 , #Corr=133 , #FA=466 , #Miss=384 , Contributed ATWV= 0.0270, Best Possible Contributed ATWV= 0.1265, ATWV= 0.2133 diff --git a/egs/babel/s5d/results/kws_results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 b/egs/babel/s5d/results/kws_results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 new file mode 100644 index 00000000000..1997692642e --- /dev/null +++ b/egs/babel/s5d/results/kws_results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:27:39-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:28:25-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2977 OTWV=0.3916 STWV=0.4944 MTWV=0.2977 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4395 , #FA=5271 , #Miss=7275 , Contributed ATWV= 0.2900, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3348 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=47 , #FA=376 , #Miss=465 , Contributed ATWV= 0.0077, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0574 +ATWV=0.3094 OTWV=0.4251 STWV=0.5824 MTWV=0.3094 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4362 , #FA=5310 , #Miss=7308 , Contributed ATWV= 0.2966, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3424 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=69 , #FA=394 , #Miss=443 , Contributed ATWV= 0.0128, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0957 +ATWV=0.3215 OTWV=0.4319 STWV=0.5834 MTWV=0.3228 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4438 , #FA=4879 , #Miss=7232 , Contributed ATWV= 0.3089, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3566 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=68 , #FA=381 , #Miss=444 , Contributed ATWV= 0.0126, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0942 +ATWV=0.3272 OTWV=0.4381 STWV=0.5897 MTWV=0.3272 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4487 , #FA=4749 , #Miss=7183 , Contributed ATWV= 0.3106, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3585 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=83 , #FA=376 , #Miss=429 , Contributed ATWV= 0.0166, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1242 +ATWV=0.3477 OTWV=0.4611 STWV=0.5871 MTWV=0.3478 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=5253 , #FA=5693 , #Miss=6417 , Contributed ATWV= 0.3363, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3883 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=67 , #FA=450 , #Miss=445 , Contributed ATWV= 0.0114, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0849 +ATWV=0.3543 OTWV=0.4720 STWV=0.6040 MTWV=0.3543 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=5318 , #FA=5634 , #Miss=6352 , Contributed ATWV= 0.3414, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3942 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=72 , #FA=448 , #Miss=440 , Contributed ATWV= 0.0129, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0965 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:28:39-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2682 OTWV=0.3601 STWV=0.4696 MTWV=0.2690 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3255 , #FA=5332 , #Miss=8415 , Contributed ATWV= 0.2627, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3032 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=40 , #FA=397 , #Miss=472 , Contributed ATWV= 0.0056, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0418 +ATWV=0.2748 OTWV=0.3792 STWV=0.5133 MTWV=0.2748 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3036 , #FA=4436 , #Miss=8634 , Contributed ATWV= 0.2646, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3055 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=54 , #FA=371 , #Miss=458 , Contributed ATWV= 0.0102, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0759 +ATWV=0.2823 OTWV=0.3883 STWV=0.5214 MTWV=0.2823 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3344 , #FA=5218 , #Miss=8326 , Contributed ATWV= 0.2711, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3130 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=61 , #FA=428 , #Miss=451 , Contributed ATWV= 0.0112, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0836 +ATWV=0.2874 OTWV=0.3903 STWV=0.5191 MTWV=0.2874 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3274 , #FA=4680 , #Miss=8396 , Contributed ATWV= 0.2740, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3163 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=66 , #FA=364 , #Miss=446 , Contributed ATWV= 0.0134, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1001 +ATWV=0.3076 OTWV=0.4101 STWV=0.5223 MTWV=0.3089 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3617 , #FA=4797 , #Miss=8053 , Contributed ATWV= 0.2968, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3427 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=61 , #FA=376 , #Miss=451 , Contributed ATWV= 0.0107, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0802 +ATWV=0.3083 OTWV=0.4154 STWV=0.5354 MTWV=0.3085 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3813 , #FA=5545 , #Miss=7857 , Contributed ATWV= 0.2996, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3459 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=64 , #FA=463 , #Miss=448 , Contributed ATWV= 0.0087, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0649 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:28:54-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2624 OTWV=0.3740 STWV=0.5112 MTWV=0.2624 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3562 , #FA=6509 , #Miss=8108 , Contributed ATWV= 0.2483, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2866 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=74 , #FA=531 , #Miss=438 , Contributed ATWV= 0.0141, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1056 +ATWV=0.2657 OTWV=0.3687 STWV=0.4899 MTWV=0.2657 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3151 , #FA=4641 , #Miss=8519 , Contributed ATWV= 0.2540, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2932 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=52 , #FA=319 , #Miss=460 , Contributed ATWV= 0.0117, Best Possible Contributed ATWV= 0.1338, ATWV= 0.0876 +ATWV=0.2779 OTWV=0.3829 STWV=0.5065 MTWV=0.2779 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3384 , #FA=5003 , #Miss=8286 , Contributed ATWV= 0.2626, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3032 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=74 , #FA=397 , #Miss=438 , Contributed ATWV= 0.0153, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1140 +ATWV=0.2802 OTWV=0.3862 STWV=0.5240 MTWV=0.2802 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3436 , #FA=5211 , #Miss=8234 , Contributed ATWV= 0.2655, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3065 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=73 , #FA=443 , #Miss=439 , Contributed ATWV= 0.0147, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1099 +ATWV=0.2970 OTWV=0.4030 STWV=0.5268 MTWV=0.2974 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3835 , #FA=5650 , #Miss=7835 , Contributed ATWV= 0.2816, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3251 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=75 , #FA=434 , #Miss=437 , Contributed ATWV= 0.0154, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1153 +ATWV=0.2994 OTWV=0.4095 STWV=0.5369 MTWV=0.2994 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3723 , #FA=4888 , #Miss=7947 , Contributed ATWV= 0.2855, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3295 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=67 , #FA=390 , #Miss=445 , Contributed ATWV= 0.0139, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1042 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:29:11-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2666 OTWV=0.3554 STWV=0.4513 MTWV=0.2666 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3223 , #FA=4631 , #Miss=8447 , Contributed ATWV= 0.2475, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2857 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=76 , #FA=317 , #Miss=436 , Contributed ATWV= 0.0190, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1423 +ATWV=0.2823 OTWV=0.3864 STWV=0.5230 MTWV=0.2823 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3220 , #FA=4390 , #Miss=8450 , Contributed ATWV= 0.2590, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2990 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=94 , #FA=356 , #Miss=418 , Contributed ATWV= 0.0233, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1743 +ATWV=0.2944 OTWV=0.3988 STWV=0.5324 MTWV=0.2946 THRESHOLD=0.511 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3523 , #FA=5214 , #Miss=8147 , Contributed ATWV= 0.2679, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3092 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=115 , #FA=439 , #Miss=397 , Contributed ATWV= 0.0265, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1981 +ATWV=0.2985 OTWV=0.4008 STWV=0.5278 MTWV=0.2995 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3476 , #FA=4605 , #Miss=8194 , Contributed ATWV= 0.2747, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3171 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=105 , #FA=395 , #Miss=407 , Contributed ATWV= 0.0238, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1776 +ATWV=0.3182 OTWV=0.4262 STWV=0.5392 MTWV=0.3205 THRESHOLD=0.465 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3855 , #FA=4523 , #Miss=7815 , Contributed ATWV= 0.2976, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3436 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=91 , #FA=395 , #Miss=421 , Contributed ATWV= 0.0205, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1535 +ATWV=0.3279 OTWV=0.4355 STWV=0.5492 MTWV=0.3295 THRESHOLD=0.484 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=4086 , #FA=5301 , #Miss=7584 , Contributed ATWV= 0.3054, Best Possible Contributed ATWV= 0.8662, ATWV= 0.3525 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=98 , #FA=407 , #Miss=414 , Contributed ATWV= 0.0225, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1685 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:29:26-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2158 OTWV=0.3080 STWV=0.4193 MTWV=0.2158 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it1/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2501 , #FA=4203 , #Miss=9169 , Contributed ATWV= 0.1991, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2298 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=80 , #FA=387 , #Miss=432 , Contributed ATWV= 0.0167, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1247 +ATWV=0.2176 OTWV=0.3302 STWV=0.4778 MTWV=0.2176 THRESHOLD=0.491 exp/nnet3/tdnn_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2243 , #FA=3837 , #Miss=9427 , Contributed ATWV= 0.1924, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2222 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=105 , #FA=385 , #Miss=407 , Contributed ATWV= 0.0252, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1882 +ATWV=0.2444 OTWV=0.3568 STWV=0.4926 MTWV=0.2444 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2734 , #FA=4943 , #Miss=8936 , Contributed ATWV= 0.2189, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2527 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=110 , #FA=453 , #Miss=402 , Contributed ATWV= 0.0255, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1904 +ATWV=0.2464 OTWV=0.3638 STWV=0.5166 MTWV=0.2471 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=2730 , #FA=5051 , #Miss=8940 , Contributed ATWV= 0.2168, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2503 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=120 , #FA=492 , #Miss=392 , Contributed ATWV= 0.0296, Best Possible Contributed ATWV= 0.1338, ATWV= 0.2216 +ATWV=0.2765 OTWV=0.3905 STWV=0.5268 MTWV=0.2782 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3216 , #FA=4675 , #Miss=8454 , Contributed ATWV= 0.2526, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2916 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=109 , #FA=449 , #Miss=403 , Contributed ATWV= 0.0239, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1786 +ATWV=0.2787 OTWV=0.3901 STWV=0.5224 MTWV=0.2799 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2001 , #Targ=11670, #Corr=3448 , #FA=5571 , #Miss=8222 , Contributed ATWV= 0.2547, Best Possible Contributed ATWV= 0.8662, ATWV= 0.2940 + OOV=1 #Keywords=309 , #Targ=512 , #Corr=106 , #FA=532 , #Miss=406 , Contributed ATWV= 0.0241, Best Possible Contributed ATWV= 0.1338, ATWV= 0.1799 diff --git a/egs/babel/s5d/results/kws_results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 b/egs/babel/s5d/results/kws_results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 new file mode 100644 index 00000000000..87e1bef6be4 --- /dev/null +++ b/egs/babel/s5d/results/kws_results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-31T12:29:55-04:00 @@ -0,0 +1,100 @@ +# +# KWS Task performance (TWV), for the set [kwlist] evaluated on 2016-03-31T12:30:41-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4364 OTWV=0.5200 STWV=0.6280 MTWV=0.4364 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=6368 , #FA=4863 , #Miss=6347 , Contributed ATWV= 0.4198, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4785 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=77 , #FA=402 , #Miss=401 , Contributed ATWV= 0.0167, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1359 +ATWV=0.4773 OTWV=0.5774 STWV=0.7209 MTWV=0.4782 THRESHOLD=0.444 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=6550 , #FA=4633 , #Miss=6165 , Contributed ATWV= 0.4556, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5193 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=95 , #FA=405 , #Miss=383 , Contributed ATWV= 0.0218, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1775 +ATWV=0.4854 OTWV=0.5811 STWV=0.7340 MTWV=0.4860 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=6988 , #FA=5965 , #Miss=5727 , Contributed ATWV= 0.4637, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5287 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=107 , #FA=644 , #Miss=371 , Contributed ATWV= 0.0216, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1761 +ATWV=0.4866 OTWV=0.5909 STWV=0.7347 MTWV=0.4870 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=7147 , #FA=6632 , #Miss=5568 , Contributed ATWV= 0.4645, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5295 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=119 , #FA=751 , #Miss=359 , Contributed ATWV= 0.0221, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1800 +ATWV=0.5068 OTWV=0.6090 STWV=0.7323 MTWV=0.5068 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=7333 , #FA=5227 , #Miss=5382 , Contributed ATWV= 0.4791, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5462 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=120 , #FA=509 , #Miss=358 , Contributed ATWV= 0.0277, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2252 +ATWV=0.5099 OTWV=0.6070 STWV=0.7211 MTWV=0.5099 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=7727 , #FA=5750 , #Miss=4988 , Contributed ATWV= 0.4855, Best Possible Contributed ATWV= 0.8772, ATWV= 0.5534 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=125 , #FA=706 , #Miss=353 , Contributed ATWV= 0.0244, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1986 +# +# KWS Task performance (TWV), syllabic search for the set [kwlist] evaluated on 2016-03-31T12:30:54-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3801 OTWV=0.4607 STWV=0.5692 MTWV=0.3801 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4182 , #FA=4840 , #Miss=8533 , Contributed ATWV= 0.3608, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4113 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=89 , #FA=390 , #Miss=389 , Contributed ATWV= 0.0192, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1567 +ATWV=0.4019 OTWV=0.4952 STWV=0.6210 MTWV=0.4021 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4476 , #FA=5265 , #Miss=8239 , Contributed ATWV= 0.3789, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4319 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=104 , #FA=460 , #Miss=374 , Contributed ATWV= 0.0230, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1877 +ATWV=0.4112 OTWV=0.5090 STWV=0.6304 MTWV=0.4127 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4556 , #FA=5228 , #Miss=8159 , Contributed ATWV= 0.3877, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4420 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=111 , #FA=427 , #Miss=367 , Contributed ATWV= 0.0235, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1917 +ATWV=0.4120 OTWV=0.5003 STWV=0.6302 MTWV=0.4120 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4705 , #FA=5876 , #Miss=8010 , Contributed ATWV= 0.3897, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4443 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=107 , #FA=541 , #Miss=371 , Contributed ATWV= 0.0223, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1819 +ATWV=0.4233 OTWV=0.5140 STWV=0.6209 MTWV=0.4233 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4715 , #FA=4781 , #Miss=8000 , Contributed ATWV= 0.3982, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4539 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=112 , #FA=375 , #Miss=366 , Contributed ATWV= 0.0251, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2045 +ATWV=0.4281 OTWV=0.5186 STWV=0.6279 MTWV=0.4284 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4813 , #FA=5241 , #Miss=7902 , Contributed ATWV= 0.4030, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4594 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=116 , #FA=452 , #Miss=362 , Contributed ATWV= 0.0251, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2042 +# +# KWS Task performance (TWV), phonetic search for the set [kwlist] evaluated on 2016-03-31T12:31:07-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3739 OTWV=0.4614 STWV=0.5894 MTWV=0.3739 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/phones/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4006 , #FA=4874 , #Miss=8709 , Contributed ATWV= 0.3525, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4018 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=95 , #FA=399 , #Miss=383 , Contributed ATWV= 0.0214, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1746 +ATWV=0.3912 OTWV=0.4914 STWV=0.6376 MTWV=0.3912 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4234 , #FA=5261 , #Miss=8481 , Contributed ATWV= 0.3664, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4177 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=112 , #FA=457 , #Miss=366 , Contributed ATWV= 0.0248, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2017 +ATWV=0.3944 OTWV=0.4934 STWV=0.6385 MTWV=0.3952 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4363 , #FA=5715 , #Miss=8352 , Contributed ATWV= 0.3691, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4208 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=116 , #FA=533 , #Miss=362 , Contributed ATWV= 0.0252, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2054 +ATWV=0.3992 OTWV=0.5032 STWV=0.6463 MTWV=0.3992 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4299 , #FA=5287 , #Miss=8416 , Contributed ATWV= 0.3731, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4254 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=116 , #FA=460 , #Miss=362 , Contributed ATWV= 0.0260, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2120 +ATWV=0.4131 OTWV=0.5074 STWV=0.6366 MTWV=0.4131 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4722 , #FA=5944 , #Miss=7993 , Contributed ATWV= 0.3883, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4427 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=118 , #FA=543 , #Miss=360 , Contributed ATWV= 0.0248, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2019 +ATWV=0.4192 OTWV=0.5136 STWV=0.6432 MTWV=0.4197 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4432 , #FA=4967 , #Miss=8283 , Contributed ATWV= 0.3911, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4458 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=117 , #FA=389 , #Miss=361 , Contributed ATWV= 0.0282, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2295 +# +# KWS Task performance (TWV), syllabic decode+search for the set [kwlist] evaluated on 2016-03-31T12:31:26-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3641 OTWV=0.4420 STWV=0.5488 MTWV=0.3641 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3840 , #FA=4117 , #Miss=8875 , Contributed ATWV= 0.3404, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3880 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=102 , #FA=267 , #Miss=376 , Contributed ATWV= 0.0237, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1931 +ATWV=0.4029 OTWV=0.4987 STWV=0.6333 MTWV=0.4039 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_13/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4170 , #FA=4646 , #Miss=8545 , Contributed ATWV= 0.3693, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4210 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=144 , #FA=414 , #Miss=334 , Contributed ATWV= 0.0336, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2737 +ATWV=0.4079 OTWV=0.5034 STWV=0.6391 MTWV=0.4079 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4361 , #FA=5099 , #Miss=8354 , Contributed ATWV= 0.3750, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4275 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=144 , #FA=464 , #Miss=334 , Contributed ATWV= 0.0329, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2682 +ATWV=0.4153 OTWV=0.5120 STWV=0.6440 MTWV=0.4159 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4550 , #FA=5777 , #Miss=8165 , Contributed ATWV= 0.3786, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4316 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=159 , #FA=500 , #Miss=319 , Contributed ATWV= 0.0367, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2992 +ATWV=0.4222 OTWV=0.5174 STWV=0.6342 MTWV=0.4224 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4814 , #FA=5842 , #Miss=7901 , Contributed ATWV= 0.3888, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4432 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=147 , #FA=483 , #Miss=331 , Contributed ATWV= 0.0334, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2724 +ATWV=0.4288 OTWV=0.5196 STWV=0.6299 MTWV=0.4288 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4781 , #FA=5182 , #Miss=7934 , Contributed ATWV= 0.3956, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4509 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=146 , #FA=414 , #Miss=332 , Contributed ATWV= 0.0333, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2709 +# +# KWS Task performance (TWV), phonetic decode+search for the set [kwlist] evaluated on 2016-03-31T12:31:41-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3217 OTWV=0.4088 STWV=0.5365 MTWV=0.3225 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it1/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3439 , #FA=4619 , #Miss=9276 , Contributed ATWV= 0.2980, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3397 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=107 , #FA=370 , #Miss=371 , Contributed ATWV= 0.0237, Best Possible Contributed ATWV= 0.1228, ATWV= 0.1928 +ATWV=0.3625 OTWV=0.4603 STWV=0.6156 MTWV=0.3625 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch1/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3584 , #FA=4514 , #Miss=9131 , Contributed ATWV= 0.3277, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3736 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=145 , #FA=365 , #Miss=333 , Contributed ATWV= 0.0348, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2837 +ATWV=0.3648 OTWV=0.4659 STWV=0.6258 MTWV=0.3650 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=3728 , #FA=4997 , #Miss=8987 , Contributed ATWV= 0.3283, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3742 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=153 , #FA=426 , #Miss=325 , Contributed ATWV= 0.0365, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2971 +ATWV=0.3776 OTWV=0.4805 STWV=0.6324 MTWV=0.3779 THRESHOLD=0.503 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4000 , #FA=5730 , #Miss=8715 , Contributed ATWV= 0.3437, Best Possible Contributed ATWV= 0.8772, ATWV= 0.3918 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=150 , #FA=500 , #Miss=328 , Contributed ATWV= 0.0339, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2759 +ATWV=0.3885 OTWV=0.4943 STWV=0.6300 MTWV=0.3904 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4304 , #FA=5863 , #Miss=8411 , Contributed ATWV= 0.3553, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4051 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=149 , #FA=466 , #Miss=329 , Contributed ATWV= 0.0332, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2702 +ATWV=0.3993 OTWV=0.4998 STWV=0.6357 MTWV=0.4003 THRESHOLD=0.503 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=2079 , #Targ=12715, #Corr=4480 , #FA=5920 , #Miss=8235 , Contributed ATWV= 0.3667, Best Possible Contributed ATWV= 0.8772, ATWV= 0.4180 + OOV=1 #Keywords=291 , #Targ=478 , #Corr=146 , #FA=498 , #Miss=332 , Contributed ATWV= 0.0327, Best Possible Contributed ATWV= 0.1228, ATWV= 0.2662 diff --git a/egs/babel/s5d/results/results.101-cantonese-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 b/egs/babel/s5d/results/results.101-cantonese-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 new file mode 100644 index 00000000000..0b03f645904 --- /dev/null +++ b/egs/babel/s5d/results/results.101-cantonese-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T12:15:22-0500 @@ -0,0 +1,28 @@ +#Created on 2016-02-18T12:15:22-0500 by local/best_scores.sh +# +# +# STT Task performance (WER), evaluated on 2016-02-18T12:20:23-0500 +%WER 50.4 | 10001 82932 | 57.3 32.0 10.7 7.6 50.4 79.0 | -1.280 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.2 | 10001 82932 | 50.9 37.9 11.1 10.1 59.2 81.7 | -1.687 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.0 | 10001 82932 | 55.5 30.2 14.3 6.5 51.0 80.0 | -0.722 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.3 | 10001 82932 | 59.7 30.6 9.8 7.0 47.3 77.1 | -1.079 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (CER), evaluated on 2016-02-18T12:20:24-0500 +%WER 43.5 | 10001 104181 | 62.5 29.8 7.6 6.1 43.5 78.6 | -1.082 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.char.ctm.sys +%WER 52.3 | 10001 104181 | 55.5 35.9 8.6 7.8 52.3 81.5 | -1.384 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.char.ctm.sys +%WER 43.9 | 10001 104181 | 62.2 28.5 9.3 6.0 43.9 80.1 | -0.627 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.char.ctm.sys +%WER 40.5 | 10001 104181 | 64.3 28.2 7.5 4.8 40.5 76.7 | -0.854 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.char.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T22:47:05-0500 +%WER 50.4 | 10001 82932 | 57.3 32.0 10.7 7.6 50.4 79.0 | -1.280 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.2 | 10001 82932 | 50.9 37.9 11.1 10.1 59.2 81.7 | -1.687 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.0 | 10001 82932 | 55.5 30.2 14.3 6.5 51.0 80.0 | -0.722 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 43.6 | 10001 82932 | 60.4 27.7 11.9 4.0 43.6 73.1 | -0.439 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 47.3 | 10001 82932 | 59.7 30.6 9.8 7.0 47.3 77.1 | -1.079 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (CER), evaluated on 2016-02-19T22:47:09-0500 +%WER 43.5 | 10001 104181 | 62.5 29.8 7.6 6.1 43.5 78.6 | -1.082 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.char.ctm.sys +%WER 52.3 | 10001 104181 | 55.5 35.9 8.6 7.8 52.3 81.5 | -1.384 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.char.ctm.sys +%WER 43.9 | 10001 104181 | 62.2 28.5 9.3 6.0 43.9 80.1 | -0.627 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.char.ctm.sys +%WER 37.0 | 10001 104181 | 65.6 25.3 9.1 2.6 37.0 72.6 | -0.301 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.char.ctm.sys +%WER 40.5 | 10001 104181 | 64.3 28.2 7.5 4.8 40.5 76.7 | -0.854 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.char.ctm.sys diff --git a/egs/babel/s5d/results/results.102-assamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 b/egs/babel/s5d/results/results.102-assamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 new file mode 100644 index 00000000000..00aa7af8149 --- /dev/null +++ b/egs/babel/s5d/results/results.102-assamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:53:08-0500 @@ -0,0 +1,27 @@ +#Created on 2015-11-27T17:53:08-0500 +# +# STT Task performance (WER) +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 52.3 37.1 10.7 4.9 52.6 28.2 | -0.763 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.8 35.3 10.8 4.3 50.5 27.2 | -0.860 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:20:59-0500 +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 51.6 36.6 11.8 4.2 52.6 28.1 | -0.671 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.5 34.9 11.6 4.0 50.5 27.3 | -0.803 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:34:05-0500 +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 51.6 36.6 11.8 4.2 52.6 28.1 | -0.671 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.9 | 22313 52407 | 53.3 33.1 13.6 3.2 49.9 27.4 | -0.580 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/score_10/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.5 34.9 11.6 4.0 50.5 27.3 | -0.803 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T18:29:11-0500 +%WER 52.7 | 22313 52407 | 52.0 37.3 10.6 4.8 52.7 28.0 | -0.904 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 61.1 | 22313 52407 | 44.9 44.0 11.1 6.0 61.1 29.4 | -1.466 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 52.6 | 22313 52407 | 51.6 36.6 11.8 4.2 52.6 28.1 | -0.671 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.2 | 22313 52407 | 53.9 32.4 13.7 3.2 49.2 27.3 | -0.554 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 50.5 | 22313 52407 | 53.5 34.9 11.6 4.0 50.5 27.3 | -0.803 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.103-bengali-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 b/egs/babel/s5d/results/results.103-bengali-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 new file mode 100644 index 00000000000..64b03ac3178 --- /dev/null +++ b/egs/babel/s5d/results/results.103-bengali-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:49:23-0500 @@ -0,0 +1,22 @@ +#Created on 2015-12-01T16:49:23-0500 +# +# STT Task performance (WER) +%WER 63.4 | 22224 57152 | 41.7 46.0 12.3 5.1 63.4 31.3 | -1.288 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 55.8 | 22224 57152 | 48.8 39.6 11.6 4.6 55.8 30.1 | -0.794 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 55.4 | 22224 57152 | 48.3 38.4 13.3 3.7 55.4 30.0 | -0.540 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 54.0 | 22224 57152 | 49.6 37.0 13.5 3.6 54.0 29.7 | -0.713 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_17/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:21:33-0500 +%WER 58.7 | 22224 57152 | 44.9 40.1 15.0 3.6 58.7 30.5 | -0.491 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.8 | 22224 57152 | 48.3 38.9 12.8 4.1 55.8 30.1 | -0.723 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 63.4 | 22224 57152 | 41.7 46.0 12.3 5.1 63.4 31.3 | -1.288 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 55.4 | 22224 57152 | 48.3 38.4 13.3 3.7 55.4 30.0 | -0.540 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 54.0 | 22224 57152 | 49.3 36.5 14.2 3.3 54.0 29.7 | -0.676 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:34:29-0500 +%WER 58.7 | 22224 57152 | 44.9 40.1 15.0 3.6 58.7 30.5 | -0.491 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.8 | 22224 57152 | 48.3 38.9 12.8 4.1 55.8 30.1 | -0.723 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 63.4 | 22224 57152 | 41.7 46.0 12.3 5.1 63.4 31.3 | -1.288 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 55.4 | 22224 57152 | 48.3 38.4 13.3 3.7 55.4 30.0 | -0.540 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 52.2 | 22224 57152 | 50.7 33.9 15.3 2.9 52.2 29.6 | -0.453 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 54.0 | 22224 57152 | 49.3 36.5 14.2 3.3 54.0 29.7 | -0.676 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.104-pashto-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 b/egs/babel/s5d/results/results.104-pashto-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 new file mode 100644 index 00000000000..a085787d6d3 --- /dev/null +++ b/egs/babel/s5d/results/results.104-pashto-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:48:47-0500 @@ -0,0 +1,22 @@ +#Created on 2015-11-28T14:48:47-0500 +# +# STT Task performance (WER) +%WER 58.4 | 21825 101803 | 46.1 38.4 15.5 4.5 58.4 32.8 | -1.124 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.4 | 21825 101803 | 53.9 32.8 13.3 4.3 50.4 31.4 | -0.735 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 50.7 | 21825 101803 | 52.8 31.9 15.3 3.5 50.7 31.6 | -0.652 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 21825 101803 | 55.0 32.3 12.8 4.2 49.3 31.0 | -0.739 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:21:44-0500 +%WER 53.4 | 21825 101803 | 50.6 34.1 15.3 4.0 53.4 32.1 | -0.608 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21825 101803 | 53.9 32.8 13.3 4.3 50.4 31.4 | -0.735 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.4 | 21825 101803 | 46.1 38.4 15.5 4.5 58.4 32.8 | -1.124 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21825 101803 | 52.8 31.9 15.3 3.5 50.7 31.6 | -0.652 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 21825 101803 | 54.6 31.8 13.6 3.9 49.3 31.0 | -0.671 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:04-0500 +%WER 53.4 | 21825 101803 | 50.6 34.1 15.3 4.0 53.4 32.1 | -0.608 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21825 101803 | 53.9 32.8 13.3 4.3 50.4 31.4 | -0.735 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.4 | 21825 101803 | 46.1 38.4 15.5 4.5 58.4 32.8 | -1.124 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21825 101803 | 52.8 31.9 15.3 3.5 50.7 31.6 | -0.652 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 47.0 | 21825 101803 | 56.6 30.1 13.3 3.6 47.0 30.7 | -0.541 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 21825 101803 | 54.6 31.8 13.6 3.9 49.3 31.0 | -0.671 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 b/egs/babel/s5d/results/results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 new file mode 100644 index 00000000000..c1b66fb5daf --- /dev/null +++ b/egs/babel/s5d/results/results.104-pashto.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-25T15:45:46-05:00 @@ -0,0 +1,242 @@ +#Created on 2016-02-25T15:45:46-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T15:45:46-05:00 +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-25T17:03:34-05:00 +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 21825 101803 | 48.5 39.7 11.8 6.6 58.1 32.6 | -1.226 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 21825 101803 | 56.4 31.2 12.3 4.6 48.1 31.0 | -0.638 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T19:48:53-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 50.4 | 21825 101803 | 55.5 34.1 10.4 5.9 50.4 31.0 | -0.669 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_16/dev10h.pem.ctm.sys +%WER 42.3 | 21825 101803 | 61.0 26.6 12.3 3.3 42.3 30.0 | -1.260 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 21825 101803 | 48.5 39.7 11.8 6.6 58.1 32.6 | -1.226 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21825 101803 | 44.2 42.6 13.3 6.5 62.3 32.9 | -0.955 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 21825 101803 | 56.4 31.2 12.3 4.6 48.1 31.0 | -0.638 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.8 | 21825 101803 | 57.7 28.7 13.7 4.4 46.8 30.8 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T10:07:57-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 50.4 | 21825 101803 | 55.5 34.1 10.4 5.9 50.4 31.0 | -0.669 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_16/dev10h.pem.ctm.sys +%WER 42.3 | 21825 101803 | 61.0 26.6 12.3 3.3 42.3 30.0 | -1.260 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 43.7 | 21825 101803 | 60.3 27.8 12.0 4.0 43.7 30.3 | -1.051 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 46.6 | 21825 101803 | 57.7 29.7 12.6 4.3 46.6 30.8 | -0.740 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 50.2 | 21825 101803 | 55.1 33.2 11.7 5.3 50.2 31.2 | -0.670 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 21825 101803 | 48.5 39.7 11.8 6.6 58.1 32.6 | -1.226 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21825 101803 | 44.2 42.6 13.3 6.5 62.3 32.9 | -0.955 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 21825 101803 | 56.4 31.2 12.3 4.6 48.1 31.0 | -0.638 | exp/tri6_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.8 | 21825 101803 | 57.7 28.7 13.7 4.4 46.8 30.8 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +# +# KWS Task performance (TWV), for the set kwlist evaluated on 2016-03-31T11:30:04-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4114 OTWV=0.5171 STWV=0.6713 MTWV=0.4128 THRESHOLD=0.453 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3606 , #FA=1935 , #Miss=2988 , Contributed ATWV= 0.4114, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4121 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4639 OTWV=0.5790 STWV=0.7779 MTWV=0.4639 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3948 , #FA=2450 , #Miss=2646 , Contributed ATWV= 0.4639, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4646 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4670 OTWV=0.5932 STWV=0.7799 MTWV=0.4685 THRESHOLD=0.453 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=3914 , #FA=2016 , #Miss=2680 , Contributed ATWV= 0.4670, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4677 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4940 OTWV=0.6072 STWV=0.7751 MTWV=0.4940 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4297 , #FA=2623 , #Miss=2297 , Contributed ATWV= 0.4940, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4948 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.4970 OTWV=0.6016 STWV=0.7837 MTWV=0.4985 THRESHOLD=0.503 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4145 , #FA=2538 , #Miss=2449 , Contributed ATWV= 0.4970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.4977 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.5174 OTWV=0.6324 STWV=0.7958 MTWV=0.5183 THRESHOLD=0.433 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=4312 , #FA=2156 , #Miss=2282 , Contributed ATWV= 0.5174, Best Possible Contributed ATWV= 0.9984, ATWV= 0.5182 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), for the set kwlist2 evaluated on 2016-03-31T11:30:12-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.4371 OTWV=0.5527 STWV=0.6904 MTWV=0.4372 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=7695 , #FA=8671 , #Miss=6784 , Contributed ATWV= 0.4356, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4423 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=32 , #Miss=50 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.4822 OTWV=0.6082 STWV=0.7912 MTWV=0.4822 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8278 , #FA=9303 , #Miss=6201 , Contributed ATWV= 0.4808, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4882 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=12 , #FA=60 , #Miss=48 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0924 +ATWV=0.4920 OTWV=0.6156 STWV=0.7891 MTWV=0.4920 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8698 , #FA=10346, #Miss=5781 , Contributed ATWV= 0.4913, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4989 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=59 , #Miss=52 , Contributed ATWV= 0.0006, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0427 +ATWV=0.5006 OTWV=0.6216 STWV=0.7975 MTWV=0.5006 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=8552 , #FA=9419 , #Miss=5927 , Contributed ATWV= 0.4992, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5069 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=55 , #Miss=49 , Contributed ATWV= 0.0013, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0873 +ATWV=0.5077 OTWV=0.6291 STWV=0.7819 MTWV=0.5077 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist2_11/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9060 , #FA=10188, #Miss=5419 , Contributed ATWV= 0.5073, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5150 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=7 , #FA=64 , #Miss=53 , Contributed ATWV= 0.0005, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0325 +ATWV=0.5203 OTWV=0.6486 STWV=0.7952 MTWV=0.5218 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=9144 , #FA=8922 , #Miss=5335 , Contributed ATWV= 0.5191, Best Possible Contributed ATWV= 0.9849, ATWV= 0.5271 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=44 , #Miss=51 , Contributed ATWV= 0.0012, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0821 +# +# KWS Task performance (TWV), for the set kwlist3 evaluated on 2016-03-31T11:30:24-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3527 OTWV=0.4568 STWV=0.6002 MTWV=0.3537 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=6954 , #FA=5353 , #Miss=7254 , Contributed ATWV= 0.3477, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3778 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=23 , #FA=232 , #Miss=223 , Contributed ATWV= 0.0049, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0605 +ATWV=0.3997 OTWV=0.5121 STWV=0.7021 MTWV=0.4002 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.pem/kwset_kwlist3_12/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7407 , #FA=5449 , #Miss=6801 , Contributed ATWV= 0.3919, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4259 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=39 , #FA=307 , #Miss=207 , Contributed ATWV= 0.0076, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0939 +ATWV=0.4102 OTWV=0.5277 STWV=0.7047 MTWV=0.4102 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7795 , #FA=5927 , #Miss=6413 , Contributed ATWV= 0.4033, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4382 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=288 , #Miss=210 , Contributed ATWV= 0.0067, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0822 +ATWV=0.4222 OTWV=0.5278 STWV=0.7066 MTWV=0.4222 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=7820 , #FA=5808 , #Miss=6388 , Contributed ATWV= 0.4152, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4511 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=326 , #Miss=210 , Contributed ATWV= 0.0068, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0839 +ATWV=0.4285 OTWV=0.5406 STWV=0.6965 MTWV=0.4286 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8050 , #FA=5500 , #Miss=6158 , Contributed ATWV= 0.4213, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4578 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=34 , #FA=264 , #Miss=212 , Contributed ATWV= 0.0070, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0858 +ATWV=0.4361 OTWV=0.5517 STWV=0.7032 MTWV=0.4361 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=8487 , #FA=6339 , #Miss=5721 , Contributed ATWV= 0.4310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.4683 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=36 , #FA=311 , #Miss=210 , Contributed ATWV= 0.0048, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0594 +# +# KWS Task performance (TWV), syllabic search for the set kwlist evaluated on 2016-03-31T11:30:38-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2471 OTWV=0.2986 STWV=0.3521 MTWV=0.2471 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/syllabs/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1536 , #FA=1187 , #Miss=5058 , Contributed ATWV= 0.2471, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2475 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2738 OTWV=0.3312 STWV=0.3984 MTWV=0.2738 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/syllabs/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1588 , #FA=1164 , #Miss=5006 , Contributed ATWV= 0.2738, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2742 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2762 OTWV=0.3345 STWV=0.4011 MTWV=0.2762 THRESHOLD=0.491 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1613 , #FA=1156 , #Miss=4981 , Contributed ATWV= 0.2762, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2766 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2932 OTWV=0.3415 STWV=0.3985 MTWV=0.2981 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1082 , #Miss=4970 , Contributed ATWV= 0.2934, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2938 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2970 OTWV=0.3432 STWV=0.4014 MTWV=0.2970 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.pem/syllabs/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1702 , #FA=1132 , #Miss=4892 , Contributed ATWV= 0.2970, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2975 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2978 OTWV=0.3444 STWV=0.4035 MTWV=0.2978 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/syllabs/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1683 , #FA=1050 , #Miss=4911 , Contributed ATWV= 0.2978, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2983 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +# +# KWS Task performance (TWV), phonetic search for the set kwlist evaluated on 2016-03-31T11:30:51-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2335 OTWV=0.2867 STWV=0.3609 MTWV=0.2337 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/phones/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1443 , #FA=1310 , #Miss=5151 , Contributed ATWV= 0.2336, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2339 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2513 OTWV=0.3174 STWV=0.4034 MTWV=0.2513 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1518 , #FA=1442 , #Miss=5076 , Contributed ATWV= 0.2515, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2519 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2525 OTWV=0.3188 STWV=0.4069 MTWV=0.2583 THRESHOLD=0.444 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/phones/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1564 , #FA=1489 , #Miss=5030 , Contributed ATWV= 0.2526, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2530 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2575 OTWV=0.3184 STWV=0.3902 MTWV=0.2608 THRESHOLD=0.433 exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1544 , #FA=1319 , #Miss=5050 , Contributed ATWV= 0.2575, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2579 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2759 OTWV=0.3294 STWV=0.4067 MTWV=0.2766 THRESHOLD=0.511 exp/nnet3/lstm_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1624 , #FA=1369 , #Miss=4970 , Contributed ATWV= 0.2760, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2764 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2793 OTWV=0.3306 STWV=0.4042 MTWV=0.2812 THRESHOLD=0.529 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/phones/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1693 , #FA=1495 , #Miss=4901 , Contributed ATWV= 0.2785, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2790 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +declare -ax kwsets='([0]="kwlist" [1]="kwlist2" [2]="kwlist3")' +# +# KWS Task performance (TWV), for the set kwlist evaluated on 2016-03-31T11:31:11-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2516 OTWV=0.2931 STWV=0.3457 MTWV=0.2518 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist_8/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1433 , #FA=916 , #Miss=5161 , Contributed ATWV= 0.2516, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2520 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2710 OTWV=0.3243 STWV=0.3971 MTWV=0.2720 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1527 , #FA=1006 , #Miss=5067 , Contributed ATWV= 0.2710, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2715 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=2 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0552 +ATWV=0.2864 OTWV=0.3330 STWV=0.3928 MTWV=0.2864 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1673 , #FA=1135 , #Miss=4921 , Contributed ATWV= 0.2864, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2869 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=0 , #Miss=2 , Contributed ATWV= 0.0000, Best Possible Contributed ATWV= 0.0016, ATWV= 0.0000 +ATWV=0.2874 OTWV=0.3386 STWV=0.4018 MTWV=0.2881 THRESHOLD=0.403 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch2/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1591 , #FA=1010 , #Miss=5003 , Contributed ATWV= 0.2874, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2879 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +ATWV=0.2946 OTWV=0.3463 STWV=0.4046 MTWV=0.2952 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist_12/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1666 , #FA=1036 , #Miss=4928 , Contributed ATWV= 0.2946, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2951 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=1 , #Miss=2 , Contributed ATWV=-0.0000, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0276 +# +# KWS Task performance (TWV), for the set kwlist2 evaluated on 2016-03-31T11:31:16-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.3298 OTWV=0.4064 STWV=0.4925 MTWV=0.3305 THRESHOLD=0.503 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist2_8/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4881 , #FA=5838 , #Miss=9598 , Contributed ATWV= 0.3281, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3331 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=9 , #FA=23 , #Miss=51 , Contributed ATWV= 0.0017, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1105 +ATWV=0.3636 OTWV=0.4527 STWV=0.5672 MTWV=0.3638 THRESHOLD=0.453 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5215 , #FA=6311 , #Miss=9264 , Contributed ATWV= 0.3608, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3663 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=15 , #FA=26 , #Miss=45 , Contributed ATWV= 0.0028, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1873 +ATWV=0.3784 OTWV=0.4622 STWV=0.5703 MTWV=0.3792 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5416 , #FA=6432 , #Miss=9063 , Contributed ATWV= 0.3766, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3824 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=33 , #Miss=49 , Contributed ATWV= 0.0018, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1208 +ATWV=0.3795 OTWV=0.4643 STWV=0.5595 MTWV=0.3795 THRESHOLD=0.491 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5620 , #FA=6171 , #Miss=8859 , Contributed ATWV= 0.3781, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3839 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=8 , #FA=29 , #Miss=52 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0974 +ATWV=0.3973 OTWV=0.4799 STWV=0.5716 MTWV=0.4011 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5747 , #FA=5988 , #Miss=8732 , Contributed ATWV= 0.3952, Best Possible Contributed ATWV= 0.9849, ATWV= 0.4013 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=26 , #Miss=49 , Contributed ATWV= 0.0020, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1346 +# +# KWS Task performance (TWV), for the set kwlist3 evaluated on 2016-03-31T11:31:26-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2442 OTWV=0.2994 STWV=0.3760 MTWV=0.2442 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.syll.pem_it4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3580 , #FA=3520 , #Miss=10628, Contributed ATWV= 0.2378, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2584 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=28 , #FA=145 , #Miss=218 , Contributed ATWV= 0.0064, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0787 +ATWV=0.2681 OTWV=0.3407 STWV=0.4407 MTWV=0.2684 THRESHOLD=0.484 exp/tri6_nnet/decode_dev10h.syll.pem/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3688 , #FA=3305 , #Miss=10520, Contributed ATWV= 0.2574, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2797 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=45 , #FA=195 , #Miss=201 , Contributed ATWV= 0.0106, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1306 +ATWV=0.2844 OTWV=0.3499 STWV=0.4441 MTWV=0.2857 THRESHOLD=0.484 exp/tri6_nnet_mpe/decode_dev10h.syll.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3840 , #FA=3340 , #Miss=10368, Contributed ATWV= 0.2733, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2970 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=44 , #FA=197 , #Miss=202 , Contributed ATWV= 0.0111, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1367 +ATWV=0.2946 OTWV=0.3581 STWV=0.4423 MTWV=0.2948 THRESHOLD=0.484 exp/nnet3/lstm_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3882 , #FA=2874 , #Miss=10326, Contributed ATWV= 0.2804, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3047 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=138 , #Miss=193 , Contributed ATWV= 0.0142, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1751 +ATWV=0.2958 OTWV=0.3658 STWV=0.4485 MTWV=0.2988 THRESHOLD=0.453 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.syll.pem.syll/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=4068 , #FA=3344 , #Miss=10140, Contributed ATWV= 0.2835, Best Possible Contributed ATWV= 0.9203, ATWV= 0.3081 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=47 , #FA=136 , #Miss=199 , Contributed ATWV= 0.0122, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1504 +declare -ax kwsets='([0]="kwlist" [1]="kwlist2" [2]="kwlist3")' +# +# KWS Task performance (TWV), for the set kwlist evaluated on 2016-03-31T11:31:47-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1869 OTWV=0.2380 STWV=0.3024 MTWV=0.1869 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it2/kwset_kwlist_9/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1180 , #FA=1168 , #Miss=5414 , Contributed ATWV= 0.1870, Best Possible Contributed ATWV= 0.9984, ATWV= 0.1873 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=3 , #Miss=2 , Contributed ATWV=-0.0001, Best Possible Contributed ATWV= 0.0016, ATWV=-0.0828 +ATWV=0.2043 OTWV=0.2598 STWV=0.3427 MTWV=0.2043 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist_11/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1281 , #FA=1263 , #Miss=5313 , Contributed ATWV= 0.2045, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2048 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2055 OTWV=0.2591 STWV=0.3340 MTWV=0.2055 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch3/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1267 , #FA=1206 , #Miss=5327 , Contributed ATWV= 0.2057, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2060 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=5 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1379 +ATWV=0.2123 OTWV=0.2766 STWV=0.3581 MTWV=0.2149 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1333 , #FA=1274 , #Miss=5261 , Contributed ATWV= 0.2125, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2128 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=0 , #FA=4 , #Miss=2 , Contributed ATWV=-0.0002, Best Possible Contributed ATWV= 0.0016, ATWV=-0.1103 +ATWV=0.2216 OTWV=0.2852 STWV=0.3565 MTWV=0.2240 THRESHOLD=0.403 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1371 , #FA=1067 , #Miss=5223 , Contributed ATWV= 0.2209, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2213 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=2 , #Miss=1 , Contributed ATWV= 0.0007, Best Possible Contributed ATWV= 0.0016, ATWV= 0.4448 +ATWV=0.2532 OTWV=0.3121 STWV=0.3808 MTWV=0.2539 THRESHOLD=0.465 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist_10/f4de/metrics.txt + OOV=0 #Keywords=633 , #Targ=6594 , #Corr=1547 , #FA=1310 , #Miss=5047 , Contributed ATWV= 0.2524, Best Possible Contributed ATWV= 0.9984, ATWV= 0.2528 + OOV=1 #Keywords=1 , #Targ=2 , #Corr=1 , #FA=0 , #Miss=1 , Contributed ATWV= 0.0008, Best Possible Contributed ATWV= 0.0016, ATWV= 0.5000 +# +# KWS Task performance (TWV), for the set kwlist2 evaluated on 2016-03-31T11:31:53-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.2686 OTWV=0.3459 STWV=0.4328 MTWV=0.2690 THRESHOLD=0.484 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=3870 , #FA=5258 , #Miss=10609, Contributed ATWV= 0.2670, Best Possible Contributed ATWV= 0.9849, ATWV= 0.2711 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=42 , #Miss=50 , Contributed ATWV= 0.0016, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1055 +ATWV=0.3044 OTWV=0.3970 STWV=0.5154 MTWV=0.3044 THRESHOLD=0.491 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4285 , #FA=5644 , #Miss=10194, Contributed ATWV= 0.3011, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3057 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=16 , #FA=54 , #Miss=44 , Contributed ATWV= 0.0033, Best Possible Contributed ATWV= 0.0151, ATWV= 0.2152 +ATWV=0.3073 OTWV=0.3944 STWV=0.4998 MTWV=0.3079 THRESHOLD=0.473 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch2/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4457 , #FA=6120 , #Miss=10022, Contributed ATWV= 0.3051, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3098 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=13 , #FA=55 , #Miss=47 , Contributed ATWV= 0.0022, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1487 +ATWV=0.3092 OTWV=0.4100 STWV=0.5226 MTWV=0.3125 THRESHOLD=0.465 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4431 , #FA=5723 , #Miss=10048, Contributed ATWV= 0.3078, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3125 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=11 , #FA=69 , #Miss=49 , Contributed ATWV= 0.0015, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0977 +ATWV=0.3280 OTWV=0.4225 STWV=0.5216 MTWV=0.3291 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=4940 , #FA=6266 , #Miss=9539 , Contributed ATWV= 0.3266, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3316 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=10 , #FA=63 , #Miss=50 , Contributed ATWV= 0.0014, Best Possible Contributed ATWV= 0.0151, ATWV= 0.0911 +ATWV=0.3586 OTWV=0.4552 STWV=0.5519 MTWV=0.3614 THRESHOLD=0.473 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist2_10/f4de/metrics.txt + OOV=0 #Keywords=2474 , #Targ=14479, #Corr=5261 , #FA=6266 , #Miss=9218 , Contributed ATWV= 0.3563, Best Possible Contributed ATWV= 0.9849, ATWV= 0.3618 + OOV=1 #Keywords=38 , #Targ=60 , #Corr=14 , #FA=67 , #Miss=46 , Contributed ATWV= 0.0023, Best Possible Contributed ATWV= 0.0151, ATWV= 0.1531 +# +# KWS Task performance (TWV), for the set kwlist3 evaluated on 2016-03-31T11:32:05-04:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +ATWV=0.1931 OTWV=0.2569 STWV=0.3444 MTWV=0.1931 THRESHOLD=0.491 exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.phn.pem_it4/kwset_kwlist3_9/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3015 , #FA=3772 , #Miss=11193, Contributed ATWV= 0.1875, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2037 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=33 , #FA=303 , #Miss=213 , Contributed ATWV= 0.0062, Best Possible Contributed ATWV= 0.0814, ATWV= 0.0759 +ATWV=0.2228 OTWV=0.2982 STWV=0.4154 MTWV=0.2231 THRESHOLD=0.503 exp/tri6_nnet/decode_dev10h.phn.pem/kwset_kwlist3_11/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3232 , #FA=3853 , #Miss=10976, Contributed ATWV= 0.2092, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2273 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=57 , #FA=332 , #Miss=189 , Contributed ATWV= 0.0141, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1738 +ATWV=0.2247 OTWV=0.2962 STWV=0.4001 MTWV=0.2247 THRESHOLD=0.491 exp/tri6_nnet_mpe/decode_dev10h.phn.pem_epoch4/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3131 , #FA=3232 , #Miss=11077, Contributed ATWV= 0.2122, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2306 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=48 , #FA=278 , #Miss=198 , Contributed ATWV= 0.0131, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1606 +ATWV=0.2320 OTWV=0.3081 STWV=0.4229 MTWV=0.2326 THRESHOLD=0.484 exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3378 , #FA=3831 , #Miss=10830, Contributed ATWV= 0.2194, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2384 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=53 , #FA=299 , #Miss=193 , Contributed ATWV= 0.0126, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1544 +ATWV=0.2474 OTWV=0.3186 STWV=0.4206 MTWV=0.2476 THRESHOLD=0.503 exp/nnet3/lstm_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3615 , #FA=3812 , #Miss=10593, Contributed ATWV= 0.2310, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2510 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=63 , #FA=306 , #Miss=183 , Contributed ATWV= 0.0165, Best Possible Contributed ATWV= 0.0814, ATWV= 0.2023 +ATWV=0.2668 OTWV=0.3433 STWV=0.4457 MTWV=0.2668 THRESHOLD=0.491 exp/nnet3/lstm_bidirectional_sp/decode_dev10h.phn.pem.phn/kwset_kwlist3_10/f4de/metrics.txt + OOV=0 #Keywords=1617 , #Targ=14208, #Corr=3825 , #FA=3913 , #Miss=10383, Contributed ATWV= 0.2535, Best Possible Contributed ATWV= 0.9203, ATWV= 0.2755 + OOV=1 #Keywords=143 , #Targ=246 , #Corr=59 , #FA=305 , #Miss=187 , Contributed ATWV= 0.0138, Best Possible Contributed ATWV= 0.0814, ATWV= 0.1699 diff --git a/egs/babel/s5d/results/results.105-turkish-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 b/egs/babel/s5d/results/results.105-turkish-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 new file mode 100644 index 00000000000..b76de49ffe3 --- /dev/null +++ b/egs/babel/s5d/results/results.105-turkish-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-28T14:43:17-0500 @@ -0,0 +1,22 @@ +#Created on 2015-11-28T14:43:17-0500 +# +# STT Task performance (WER) +%WER 57.6 | 22070 54382 | 47.9 41.3 10.8 5.4 57.6 30.8 | -1.174 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 22070 54382 | 57.0 34.0 9.0 4.7 47.7 29.1 | -0.571 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 47.3 | 22070 54382 | 56.9 33.4 9.6 4.2 47.3 29.1 | -0.489 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 46.2 | 22070 54382 | 58.2 32.6 9.2 4.3 46.2 28.5 | -0.560 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:21:55-0500 +%WER 50.7 | 22070 54382 | 53.6 35.5 10.9 4.2 50.7 29.9 | -0.382 | exp/sgmm5/decode_fmllr_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 47.7 | 22070 54382 | 56.5 33.6 9.9 4.2 47.7 29.1 | -0.506 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 57.6 | 22070 54382 | 47.9 41.3 10.8 5.4 57.6 30.8 | -1.174 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.3 | 22070 54382 | 56.5 33.0 10.5 3.7 47.3 29.1 | -0.389 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 22070 54382 | 58.2 32.6 9.2 4.3 46.2 28.5 | -0.560 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:31-0500 +%WER 50.7 | 22070 54382 | 53.6 35.5 10.9 4.2 50.7 29.9 | -0.382 | exp/sgmm5/decode_fmllr_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 47.7 | 22070 54382 | 56.5 33.6 9.9 4.2 47.7 29.1 | -0.506 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 57.6 | 22070 54382 | 47.9 41.3 10.8 5.4 57.6 30.8 | -1.174 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.3 | 22070 54382 | 56.5 33.0 10.5 3.7 47.3 29.1 | -0.389 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 43.8 | 22070 54382 | 60.1 30.1 9.8 3.8 43.8 27.8 | -0.361 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 46.2 | 22070 54382 | 58.2 32.6 9.2 4.3 46.2 28.5 | -0.560 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.106-tagalog-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 b/egs/babel/s5d/results/results.106-tagalog-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 new file mode 100644 index 00000000000..efa5bc3288c --- /dev/null +++ b/egs/babel/s5d/results/results.106-tagalog-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:50:17-0500 @@ -0,0 +1,22 @@ +#Created on 2015-11-27T16:50:17-0500 +# +# STT Task performance (WER) +%WER 56.5 | 25332 63009 | 49.6 37.6 12.8 6.0 56.5 32.0 | -1.196 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.0 | 25332 63009 | 56.7 31.7 11.6 4.7 48.0 30.2 | -0.746 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 48.1 | 25332 63009 | 56.2 31.0 12.8 4.3 48.1 30.3 | -0.477 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 25332 63009 | 58.0 30.5 11.5 4.2 46.2 30.0 | -0.682 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:22:58-0500 +%WER 51.1 | 25332 63009 | 53.2 32.5 14.3 4.3 51.1 31.1 | -0.459 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 48.0 | 25332 63009 | 56.7 31.7 11.6 4.7 48.0 30.2 | -0.746 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 56.5 | 25332 63009 | 49.6 37.6 12.8 6.0 56.5 32.0 | -1.196 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 25332 63009 | 56.2 31.0 12.8 4.3 48.1 30.3 | -0.477 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 25332 63009 | 58.0 30.5 11.5 4.2 46.2 30.0 | -0.682 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:40-0500 +%WER 51.1 | 25332 63009 | 53.2 32.5 14.3 4.3 51.1 31.1 | -0.459 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 48.0 | 25332 63009 | 56.7 31.7 11.6 4.7 48.0 30.2 | -0.746 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 56.5 | 25332 63009 | 49.6 37.6 12.8 6.0 56.5 32.0 | -1.196 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 48.1 | 25332 63009 | 56.2 31.0 12.8 4.3 48.1 30.3 | -0.477 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 43.9 | 25332 63009 | 59.5 28.8 11.7 3.4 43.9 29.2 | -0.386 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 25332 63009 | 58.0 30.5 11.5 4.2 46.2 30.0 | -0.682 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.107-vietnamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 b/egs/babel/s5d/results/results.107-vietnamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 new file mode 100644 index 00000000000..7d5da8e0f39 --- /dev/null +++ b/egs/babel/s5d/results/results.107-vietnamese-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T16:51:53-0500 @@ -0,0 +1,21 @@ +#Created on 2015-11-27T16:51:53-0500 +# +# STT Task performance (WER) +%WER 58.0 | 21875 111957 | 45.0 42.3 12.7 3.0 58.0 36.6 | -1.024 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 52.5 36.5 11.0 2.9 50.4 35.8 | -0.644 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 49.0 | 21875 111957 | 53.4 33.4 13.3 2.4 49.0 35.8 | -0.442 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.6 | 21875 111957 | 53.1 36.2 10.7 2.7 49.6 35.4 | -0.606 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T12:23:27-0500 +%WER 53.6 | 21875 111957 | 49.4 38.4 12.2 3.0 53.6 36.4 | -0.501 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 52.5 36.5 11.0 2.9 50.4 35.8 | -0.644 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.0 | 21875 111957 | 45.0 42.3 12.7 3.0 58.0 36.6 | -1.024 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.0 | 21875 111957 | 53.4 33.4 13.3 2.4 49.0 35.8 | -0.442 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.6 | 21875 111957 | 53.0 35.6 11.4 2.6 49.6 35.4 | -0.548 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:35:55-0500 +%WER 53.6 | 21875 111957 | 49.4 38.4 12.2 3.0 53.6 36.4 | -0.501 | exp/sgmm5/decode_fmllr_dev10h.pem/score_9/dev10h.pem.ctm.sys +%WER 50.4 | 21875 111957 | 52.5 36.5 11.0 2.9 50.4 35.8 | -0.644 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 58.0 | 21875 111957 | 45.0 42.3 12.7 3.0 58.0 36.6 | -1.024 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.0 | 21875 111957 | 53.4 33.4 13.3 2.4 49.0 35.8 | -0.442 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.6 | 21875 111957 | 53.0 35.6 11.4 2.6 49.6 35.4 | -0.548 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_16/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.201-haitian-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 b/egs/babel/s5d/results/results.201-haitian-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 new file mode 100644 index 00000000000..143944daa01 --- /dev/null +++ b/egs/babel/s5d/results/results.201-haitian-fullLP.official.conf.jtrmal1@jhu.edu.2016-02-18T11:46:09-0500 @@ -0,0 +1,21 @@ +#Created on 2016-02-18T11:46:09-0500 +# +# STT Task performance (WER) +%WER 56.6 | 21530 83682 | 47.1 35.9 16.9 3.8 56.6 33.1 | -0.984 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.5 | 21530 83682 | 54.3 31.2 14.5 3.8 49.5 32.1 | -0.672 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 49.2 | 21530 83682 | 54.2 30.8 15.0 3.4 49.2 32.0 | -0.537 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.1 | 21530 83682 | 54.3 30.2 15.5 3.4 49.1 31.9 | -0.636 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:42-0500 +%WER 56.6 | 21530 83682 | 47.1 35.9 16.9 3.8 56.6 33.1 | -0.984 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.5 | 21530 83682 | 54.3 31.2 14.5 3.8 49.5 32.1 | -0.672 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 49.2 | 21530 83682 | 54.2 30.8 15.0 3.4 49.2 32.0 | -0.537 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 46.2 | 21530 83682 | 56.4 27.0 16.6 2.6 46.2 31.4 | -0.484 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 49.1 | 21530 83682 | 54.3 30.2 15.5 3.4 49.1 31.9 | -0.636 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-19T14:37:00-0500 +%WER 49.5 | 21530 83682 | 54.3 31.2 14.5 3.8 49.5 32.1 | -0.672 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 56.6 | 21530 83682 | 47.1 35.9 16.9 3.8 56.6 33.1 | -0.984 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.2 | 21530 83682 | 53.7 30.2 16.1 2.9 49.2 31.9 | -0.465 | exp/tri6b_nnet/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.2 | 21530 83682 | 56.4 27.0 16.6 2.6 46.2 31.4 | -0.484 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 49.1 | 21530 83682 | 54.3 30.2 15.5 3.4 49.1 31.9 | -0.636 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.202-swahili.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 b/egs/babel/s5d/results/results.202-swahili.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 new file mode 100644 index 00000000000..faa73c05ecb --- /dev/null +++ b/egs/babel/s5d/results/results.202-swahili.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-21T10:25:47-0500 @@ -0,0 +1,27 @@ +#Created on 2016-02-21T10:25:47-0500 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-21T10:25:47-0500 +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T06:37:59-05:00 +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +%WER 54.1 | 23781 62345 | 53.8 37.5 8.7 7.9 54.1 30.7 | -1.869 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 43.7 | 23781 62345 | 61.1 30.2 8.7 4.8 43.7 28.6 | -0.713 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.0 | 23781 62345 | 61.1 26.9 12.0 4.1 43.0 28.7 | -0.631 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T20:34:46-05:00 +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +%WER 54.1 | 23781 62345 | 53.8 37.5 8.7 7.9 54.1 30.7 | -1.869 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 43.7 | 23781 62345 | 61.1 30.2 8.7 4.8 43.7 28.6 | -0.713 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.0 | 23781 62345 | 61.1 26.9 12.0 4.1 43.0 28.7 | -0.631 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_12/dev10h.pem.ctm.sys +%WER 44.8 | 23781 62345 | 60.1 30.8 9.2 4.9 44.8 28.6 | -0.702 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T22:36:11-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 44.8 | 23781 62345 | 60.1 30.8 9.2 4.9 44.8 28.6 | -0.702 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 60.0 | 23781 62345 | 43.2 35.3 21.5 3.2 60.0 32.4 | -0.909 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 63.9 | 23781 62345 | 39.6 36.7 23.6 3.5 63.9 33.1 | -1.153 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 46.6 | 23781 62345 | 59.1 32.4 8.5 5.7 46.6 29.3 | -0.865 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_12/dev10h.pem.ctm.sys +%WER 54.1 | 23781 62345 | 53.8 37.5 8.7 7.9 54.1 30.7 | -1.869 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.7 | 23781 62345 | 47.9 40.2 11.9 6.6 58.7 31.8 | -1.355 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 43.7 | 23781 62345 | 61.1 30.2 8.7 4.8 43.7 28.6 | -0.713 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.0 | 23781 62345 | 61.1 26.9 12.0 4.1 43.0 28.7 | -0.631 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_12/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.203-lao-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 b/egs/babel/s5d/results/results.203-lao-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 new file mode 100644 index 00000000000..66d7a71f598 --- /dev/null +++ b/egs/babel/s5d/results/results.203-lao-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T16:50:41-0500 @@ -0,0 +1,14 @@ +#Created on 2015-12-01T16:50:41-0500 +# +# STT Task performance (WER) +%WER 53.4 | 25158 82801 | 51.8 35.4 12.7 5.2 53.4 34.4 | -1.131 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 46.6 | 25158 82801 | 58.2 31.2 10.6 4.8 46.6 33.2 | -0.792 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 47.0 | 25158 82801 | 57.3 30.6 12.2 4.3 47.0 33.5 | -0.645 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 44.3 | 25158 82801 | 59.9 30.1 10.0 4.2 44.3 32.6 | -0.740 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:37-0500 +%WER 53.4 | 25158 82801 | 51.8 35.4 12.7 5.2 53.4 34.4 | -1.131 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 46.6 | 25158 82801 | 58.2 31.2 10.6 4.8 46.6 33.2 | -0.792 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_10/dev10h.pem.ctm.sys +%WER 47.0 | 25158 82801 | 57.3 30.6 12.2 4.3 47.0 33.5 | -0.645 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 42.9 | 25158 82801 | 60.2 27.5 12.2 3.1 42.9 32.5 | -0.492 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 44.3 | 25158 82801 | 59.9 30.1 10.0 4.2 44.3 32.6 | -0.740 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.204-tamil-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 b/egs/babel/s5d/results/results.204-tamil-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 new file mode 100644 index 00000000000..e4dfcd5a5c2 --- /dev/null +++ b/egs/babel/s5d/results/results.204-tamil-fullLP.official.conf.jtrmal1@jhu.edu.2015-12-01T19:55:42-0500 @@ -0,0 +1,8 @@ +#Created on 2015-12-01T19:55:42-0500 +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:14-0500 +%WER 74.2 | 22178 60033 | 30.1 51.6 18.3 4.3 74.2 36.3 | -1.744 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 67.8 | 22178 60033 | 36.1 47.5 16.4 3.8 67.8 35.0 | -1.220 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_10/dev10h.pem.ctm.sys +%WER 68.1 | 22178 60033 | 35.2 46.5 18.2 3.3 68.1 35.5 | -0.900 | exp/tri6b_nnet/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 65.1 | 22178 60033 | 38.0 44.1 18.0 3.1 65.1 34.6 | -0.759 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +%WER 66.8 | 22178 60033 | 37.2 46.9 16.0 4.0 66.8 34.8 | -1.137 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_15/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.205-kurmanji.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 b/egs/babel/s5d/results/results.205-kurmanji.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 new file mode 100644 index 00000000000..3196f08c26a --- /dev/null +++ b/egs/babel/s5d/results/results.205-kurmanji.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:24:13-0500 @@ -0,0 +1,96 @@ +#Created on 2016-02-21T10:24:13-0500 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-21T10:24:13-0500 +# +# STT Task performance (WER), evaluated on 2016-02-21T10:25:04-0500 +# +# STT Task performance (WER), evaluated on 2016-02-27T09:49:11-05:00 +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-27T17:16:07-05:00 +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:52:09-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.3 | 23078 60240 | 31.4 34.0 34.7 2.7 71.3 33.6 | -2.291 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.3 | 23078 60240 | 29.3 33.2 37.5 2.6 73.3 33.7 | -1.834 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 83.2 | 23078 60240 | 18.7 30.0 51.3 1.9 83.2 35.6 | -1.680 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T20:57:22-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.3 | 23078 60240 | 31.4 34.0 34.7 2.7 71.3 33.6 | -2.291 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.3 | 23078 60240 | 29.3 33.2 37.5 2.6 73.3 33.7 | -1.834 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 81.0 | 23078 60240 | 20.9 30.0 49.1 1.9 81.0 35.1 | -1.466 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 83.2 | 23078 60240 | 18.7 30.0 51.3 1.9 83.2 35.6 | -1.680 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T10:56:23-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.3 | 23078 60240 | 31.4 34.0 34.7 2.7 71.3 33.6 | -2.291 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.3 | 23078 60240 | 29.3 33.2 37.5 2.6 73.3 33.7 | -1.834 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 81.0 | 23078 60240 | 20.9 30.0 49.1 1.9 81.0 35.1 | -1.466 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 83.2 | 23078 60240 | 18.7 30.0 51.3 1.9 83.2 35.6 | -1.680 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T22:38:30-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 71.6 | 23078 60240 | 30.8 32.2 37.0 2.4 71.6 33.6 | -2.116 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 73.5 | 23078 60240 | 29.1 32.8 38.1 2.6 73.5 33.7 | -1.960 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T09:57:37-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 71.6 | 23078 60240 | 30.8 32.2 37.0 2.4 71.6 33.6 | -2.116 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 73.5 | 23078 60240 | 29.1 32.8 38.1 2.6 73.5 33.7 | -1.960 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 82.9 | 23078 60240 | 19.2 30.9 49.9 2.1 82.9 35.6 | -1.948 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-08T07:34:08-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 60.6 | 23078 60240 | 43.0 37.3 19.7 3.6 60.6 31.7 | -1.738 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 73.5 | 23078 60240 | 29.1 32.8 38.1 2.6 73.5 33.7 | -1.960 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 82.9 | 23078 60240 | 19.2 30.9 49.9 2.1 82.9 35.6 | -1.948 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-10T09:31:52-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.1 | 23078 60240 | 40.3 39.5 20.2 4.4 64.1 32.1 | -0.888 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 60.6 | 23078 60240 | 43.0 37.3 19.7 3.6 60.6 31.7 | -1.738 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 61.0 | 23078 60240 | 42.5 36.7 20.8 3.5 61.0 31.8 | -1.277 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 82.9 | 23078 60240 | 19.2 30.9 49.9 2.1 82.9 35.6 | -1.948 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.8 | 23078 60240 | 40.3 41.0 18.7 5.1 64.8 32.4 | -1.094 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 70.4 | 23078 60240 | 35.3 44.7 20.1 5.6 70.4 33.1 | -2.008 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 73.9 | 23078 60240 | 31.1 44.9 24.1 4.9 73.9 33.5 | -1.627 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 64.0 | 23078 60240 | 39.6 37.9 22.4 3.7 64.0 32.2 | -0.826 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 65.4 | 23078 60240 | 37.7 30.3 32.0 3.1 65.4 32.4 | -0.762 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.206-zulu-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 b/egs/babel/s5d/results/results.206-zulu-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 new file mode 100644 index 00000000000..1e98cf911ea --- /dev/null +++ b/egs/babel/s5d/results/results.206-zulu-fullLP.official.conf.jtrmal1@jhu.edu.2015-11-27T17:54:01-0500 @@ -0,0 +1,14 @@ +#Created on 2015-11-27T17:54:01-0500 +# +# STT Task performance (WER) +%WER 66.0 | 22805 52162 | 38.4 47.5 14.1 4.4 66.0 33.2 | -2.078 | exp/tri5/decode_dev10h.pem/score_17/dev10h.pem.ctm.sys +%WER 60.4 | 22805 52162 | 44.4 44.1 11.5 4.8 60.4 32.3 | -1.189 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.1 | 22805 52162 | 44.2 41.8 14.0 3.3 59.1 32.0 | -0.746 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 58.6 | 22805 52162 | 45.4 42.5 12.1 4.0 58.6 31.9 | -1.026 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-18T11:51:04-0500 +%WER 66.0 | 22805 52162 | 38.4 47.5 14.1 4.4 66.0 33.2 | -2.078 | exp/tri5/decode_dev10h.pem/score_17/dev10h.pem.ctm.sys +%WER 60.4 | 22805 52162 | 44.4 44.1 11.5 4.8 60.4 32.3 | -1.189 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_11/dev10h.pem.ctm.sys +%WER 59.1 | 22805 52162 | 44.2 41.8 14.0 3.3 59.1 32.0 | -0.746 | exp/tri6b_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 56.4 | 22805 52162 | 46.7 40.0 13.3 3.1 56.4 31.4 | -0.682 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 22805 52162 | 45.4 42.5 12.1 4.0 58.6 31.9 | -1.026 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.207-tokpisin.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 b/egs/babel/s5d/results/results.207-tokpisin.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 new file mode 100644 index 00000000000..3d0dc67e8e3 --- /dev/null +++ b/egs/babel/s5d/results/results.207-tokpisin.flp.marcc2.conf.jtrmal1@jhu.edu.2016-02-21T10:25:25-0500 @@ -0,0 +1,34 @@ +#Created on 2016-02-21T10:25:25-0500 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-21T10:25:25-0500 +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T22:19:14-05:00 +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 24353 74481 | 61.0 28.7 10.3 6.8 45.8 29.9 | -1.441 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 36.4 | 24353 74481 | 68.0 21.9 10.1 4.3 36.4 28.1 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 36.2 | 24353 74481 | 67.9 20.9 11.2 4.2 36.2 28.0 | -0.533 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +%WER 36.8 | 24353 74481 | 68.2 23.0 8.8 5.0 36.8 27.8 | -0.602 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:49:10-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 36.8 | 24353 74481 | 68.2 23.0 8.8 5.0 36.8 27.8 | -0.602 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 33.2 | 24353 74481 | 70.6 20.4 9.0 3.8 33.2 26.7 | -1.367 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 33.3 | 24353 74481 | 70.9 20.5 8.7 4.2 33.3 26.7 | -1.038 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.1 | 24353 74481 | 64.7 23.9 11.4 4.8 40.1 29.2 | -0.825 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 24353 74481 | 61.0 28.7 10.3 6.8 45.8 29.9 | -1.441 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.5 | 24353 74481 | 55.3 31.7 13.0 6.8 51.5 30.7 | -1.076 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 36.4 | 24353 74481 | 68.0 21.9 10.1 4.3 36.4 28.1 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 36.2 | 24353 74481 | 67.9 20.9 11.2 4.2 36.2 28.0 | -0.533 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T08:07:38-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 36.8 | 24353 74481 | 68.2 23.0 8.8 5.0 36.8 27.8 | -0.602 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 33.2 | 24353 74481 | 70.6 20.4 9.0 3.8 33.2 26.7 | -1.367 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 33.3 | 24353 74481 | 70.9 20.5 8.7 4.2 33.3 26.7 | -1.038 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 37.6 | 24353 74481 | 66.9 22.5 10.5 4.5 37.6 28.5 | -0.642 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 40.1 | 24353 74481 | 64.7 23.9 11.4 4.8 40.1 29.2 | -0.825 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 38.5 | 24353 74481 | 67.0 24.2 8.8 5.5 38.5 28.4 | -0.703 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_11/dev10h.pem.ctm.sys +%WER 45.8 | 24353 74481 | 61.0 28.7 10.3 6.8 45.8 29.9 | -1.441 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.5 | 24353 74481 | 55.3 31.7 13.0 6.8 51.5 30.7 | -1.076 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 36.4 | 24353 74481 | 68.0 21.9 10.1 4.3 36.4 28.1 | -0.552 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 36.2 | 24353 74481 | 67.9 20.9 11.2 4.2 36.2 28.0 | -0.533 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 b/egs/babel/s5d/results/results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 new file mode 100644 index 00000000000..e6af3c9f6f9 --- /dev/null +++ b/egs/babel/s5d/results/results.305-guarani.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T10:45:54-05:00 @@ -0,0 +1,43 @@ +#Created on 2016-02-25T10:45:54-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T10:45:54-05:00 +# +# STT Task performance (WER), evaluated on 2016-02-25T22:40:27-05:00 +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T20:35:22-05:00 +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 50.9 | 21519 61705 | 54.5 33.1 12.5 5.4 50.9 32.1 | -0.813 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T08:22:19-05:00 +%WER 50.9 | 21519 61705 | 54.5 33.1 12.5 5.4 50.9 32.1 | -0.813 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 45.6 | 21519 61705 | 58.0 29.3 12.7 3.7 45.6 31.2 | -1.354 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.0 | 21519 61705 | 58.2 29.4 12.4 4.1 46.0 31.4 | -1.051 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 68.4 | 21519 61705 | 34.6 32.8 32.7 2.9 68.4 35.4 | -1.082 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.1 | 21519 61705 | 43.0 41.1 15.8 7.2 64.1 34.7 | -1.573 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T07:26:39-05:00 +%WER 50.9 | 21519 61705 | 54.5 33.1 12.5 5.4 50.9 32.1 | -0.813 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_18/dev10h.pem.ctm.sys +%WER 45.6 | 21519 61705 | 58.0 29.3 12.7 3.7 45.6 31.2 | -1.354 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.0 | 21519 61705 | 58.2 29.4 12.4 4.1 46.0 31.4 | -1.051 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.2 | 21519 61705 | 53.1 31.1 15.8 4.3 51.2 32.4 | -0.826 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 68.4 | 21519 61705 | 34.6 32.8 32.7 2.9 68.4 35.4 | -1.082 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 52.7 | 21519 61705 | 52.8 34.4 12.8 5.5 52.7 32.8 | -0.921 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 51.7 | 21519 61705 | 54.0 33.6 12.3 5.8 51.7 32.4 | -1.063 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 59.6 | 21519 61705 | 48.1 38.9 13.1 7.6 59.6 33.8 | -2.049 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.1 | 21519 61705 | 43.0 41.1 15.8 7.2 64.1 34.7 | -1.573 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 50.7 | 21519 61705 | 53.9 31.7 14.3 4.6 50.7 32.3 | -0.810 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 50.3 | 21519 61705 | 54.2 29.1 16.7 4.5 50.3 32.2 | -0.736 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 b/egs/babel/s5d/results/results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 new file mode 100644 index 00000000000..464362cf7e3 --- /dev/null +++ b/egs/babel/s5d/results/results.306-igbo.flp.marcc.conf.jtrmal1@jhu.edu.2016-03-05T10:56:45-05:00 @@ -0,0 +1,52 @@ +#Created on 2016-03-05T10:56:45-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-03-05T10:56:48-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T13:53:27-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-06T15:21:54-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-07T10:43:21-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.0 | 21958 102699 | 45.6 34.5 19.9 3.7 58.0 33.7 | -1.097 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21958 102699 | 42.1 37.8 20.2 4.4 62.3 34.4 | -0.645 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-08T07:31:46-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 58.0 | 21958 102699 | 45.6 34.5 19.9 3.7 58.0 33.7 | -1.097 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21958 102699 | 42.1 37.8 20.2 4.4 62.3 34.4 | -0.645 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 61.5 | 21958 102699 | 43.6 36.1 20.3 5.1 61.5 34.2 | -0.641 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-10T23:23:15-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 62.2 | 21958 102699 | 43.3 39.2 17.4 5.6 62.2 34.1 | -0.795 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_19/dev10h.pem.ctm.sys +%WER 58.0 | 21958 102699 | 45.6 34.5 19.9 3.7 58.0 33.7 | -1.097 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.3 | 21958 102699 | 45.7 34.7 19.6 4.0 58.3 33.8 | -0.872 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 41.0 37.2 21.9 4.1 63.2 34.6 | -0.723 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 64.5 | 21958 102699 | 40.7 39.9 19.4 5.1 64.5 34.7 | -0.686 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 63.2 | 21958 102699 | 42.4 39.6 18.0 5.6 63.2 34.4 | -0.806 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 68.7 | 21958 102699 | 38.3 44.1 17.6 7.0 68.7 35.3 | -1.421 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 71.7 | 21958 102699 | 34.9 45.2 19.9 6.6 71.7 35.5 | -1.143 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 62.3 | 21958 102699 | 42.1 37.8 20.2 4.4 62.3 34.4 | -0.645 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 61.5 | 21958 102699 | 43.6 36.1 20.3 5.1 61.5 34.2 | -0.641 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_10/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 b/egs/babel/s5d/results/results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 new file mode 100644 index 00000000000..9950a8f11a0 --- /dev/null +++ b/egs/babel/s5d/results/results.307-amharic.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T09:46:16-05:00 @@ -0,0 +1,48 @@ +#Created on 2016-02-25T09:46:16-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T09:46:16-05:00 +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 48.8 | 21823 59749 | 55.6 34.0 10.3 4.4 48.8 30.5 | -0.773 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-25T15:01:39-05:00 +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 55.9 30.1 14.0 3.1 47.2 30.3 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_11/dev10h.pem.ctm.sys +%WER 48.8 | 21823 59749 | 55.6 34.0 10.3 4.4 48.8 30.5 | -0.773 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T06:40:02-05:00 +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 55.9 30.1 14.0 3.1 47.2 30.3 | -0.514 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_11/dev10h.pem.ctm.sys +%WER 48.8 | 21823 59749 | 55.6 34.0 10.3 4.4 48.8 30.5 | -0.773 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it4/score_18/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:49:16-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 48.8 | 21823 59749 | 55.6 34.3 10.1 4.5 48.8 30.5 | -0.743 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_17/dev10h.pem.ctm.sys +%WER 43.9 | 21823 59749 | 59.3 31.0 9.7 3.2 43.9 29.5 | -0.869 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.5 | 21823 59749 | 56.0 32.9 11.2 3.5 47.5 30.3 | -0.655 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 63.0 | 21823 59749 | 42.7 42.8 14.5 5.7 63.0 33.3 | -1.302 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 56.0 30.4 13.6 3.2 47.2 30.3 | -0.552 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-05T08:07:40-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 48.8 | 21823 59749 | 55.6 34.3 10.1 4.5 48.8 30.5 | -0.743 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it3/score_17/dev10h.pem.ctm.sys +%WER 43.5 | 21823 59749 | 59.6 31.1 9.3 3.1 43.5 29.3 | -1.116 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 43.9 | 21823 59749 | 59.3 31.0 9.7 3.2 43.9 29.5 | -0.869 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 47.5 | 21823 59749 | 56.0 32.9 11.2 3.5 47.5 30.3 | -0.655 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 49.7 | 21823 59749 | 53.8 33.6 12.6 3.5 49.7 31.0 | -0.709 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 51.4 | 21823 59749 | 52.7 35.8 11.5 4.0 51.4 31.1 | -0.633 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 49.8 | 21823 59749 | 54.1 34.5 11.3 4.0 49.8 30.7 | -0.773 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 58.8 | 21823 59749 | 47.7 41.2 11.0 6.5 58.8 32.4 | -1.809 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 63.0 | 21823 59749 | 42.7 42.8 14.5 5.7 63.0 33.3 | -1.302 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 47.7 | 21823 59749 | 55.8 33.1 11.1 3.5 47.7 30.3 | -0.620 | exp/tri6_nnet/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 47.2 | 21823 59749 | 56.0 30.4 13.6 3.2 47.2 30.3 | -0.552 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch2/score_11/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 b/egs/babel/s5d/results/results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 new file mode 100644 index 00000000000..051d40b6f10 --- /dev/null +++ b/egs/babel/s5d/results/results.401-mongolian.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T16:17:55-05:00 @@ -0,0 +1,34 @@ +#Created on 2016-02-26T16:17:55-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-26T16:17:55-05:00 +%WER 59.9 | 23997 87709 | 44.9 36.6 18.5 4.8 59.9 36.0 | -0.664 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 23997 87709 | 48.0 36.7 15.3 6.1 58.1 35.8 | -0.932 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 65.6 | 23997 87709 | 40.8 41.2 18.0 6.5 65.6 36.9 | -1.703 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 56.1 | 23997 87709 | 47.9 33.7 18.4 4.0 56.1 35.3 | -0.545 | exp/tri6_nnet/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 54.0 | 23997 87709 | 50.5 32.3 17.2 4.5 54.0 35.0 | -0.502 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +%WER 56.9 | 23997 87709 | 48.7 35.2 16.1 5.7 56.9 35.3 | -0.747 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T10:32:48-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 56.9 | 23997 87709 | 48.7 35.2 16.1 5.7 56.9 35.3 | -0.747 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +%WER 48.3 | 23997 87709 | 55.7 30.1 14.2 4.1 48.3 33.9 | -1.338 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 23997 87709 | 55.0 31.0 13.9 4.4 49.3 34.0 | -1.017 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 77.7 | 23997 87709 | 24.3 32.5 43.2 2.0 77.7 40.1 | -1.550 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 59.9 | 23997 87709 | 44.9 36.6 18.5 4.8 59.9 36.0 | -0.664 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 23997 87709 | 48.0 36.7 15.3 6.1 58.1 35.8 | -0.932 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 65.6 | 23997 87709 | 40.8 41.2 18.0 6.5 65.6 36.9 | -1.703 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 69.9 | 23997 87709 | 35.7 42.4 21.9 5.5 69.9 37.7 | -1.140 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 56.1 | 23997 87709 | 47.9 33.7 18.4 4.0 56.1 35.3 | -0.545 | exp/tri6_nnet/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 54.0 | 23997 87709 | 50.5 32.3 17.2 4.5 54.0 35.0 | -0.502 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:49:22-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 56.9 | 23997 87709 | 48.7 35.2 16.1 5.7 56.9 35.3 | -0.747 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +%WER 48.3 | 23997 87709 | 55.7 30.1 14.2 4.1 48.3 33.9 | -1.338 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 49.3 | 23997 87709 | 55.0 31.0 13.9 4.4 49.3 34.0 | -1.017 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_10/dev10h.pem.ctm.sys +%WER 54.4 | 23997 87709 | 49.7 33.8 16.5 4.2 54.4 35.5 | -0.684 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 77.7 | 23997 87709 | 24.3 32.5 43.2 2.0 77.7 40.1 | -1.550 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 59.9 | 23997 87709 | 44.9 36.6 18.5 4.8 59.9 36.0 | -0.664 | exp/sgmm5/decode_fmllr_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 58.1 | 23997 87709 | 48.0 36.7 15.3 6.1 58.1 35.8 | -0.932 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_10/dev10h.pem.ctm.sys +%WER 65.6 | 23997 87709 | 40.8 41.2 18.0 6.5 65.6 36.9 | -1.703 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 69.9 | 23997 87709 | 35.7 42.4 21.9 5.5 69.9 37.7 | -1.140 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 56.1 | 23997 87709 | 47.9 33.7 18.4 4.0 56.1 35.3 | -0.545 | exp/tri6_nnet/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 54.0 | 23997 87709 | 50.5 32.3 17.2 4.5 54.0 35.0 | -0.502 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch3/score_10/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 b/egs/babel/s5d/results/results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 new file mode 100644 index 00000000000..9ad464aa2e7 --- /dev/null +++ b/egs/babel/s5d/results/results.402-javanese.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-26T06:40:39-05:00 @@ -0,0 +1,41 @@ +#Created on 2016-02-26T06:40:39-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-26T06:40:39-05:00 +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T12:50:32-05:00 +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +%WER 60.8 | 23669 65293 | 44.0 41.1 14.9 4.8 60.8 36.6 | -1.077 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T16:56:30-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 60.8 | 23669 65293 | 44.0 41.1 14.9 4.8 60.8 36.6 | -1.077 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 54.0 | 23669 65293 | 49.5 37.0 13.5 3.5 54.0 35.3 | -1.581 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.1 | 23669 65293 | 48.2 35.9 15.9 3.3 55.1 35.5 | -0.993 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 64.0 | 23669 65293 | 40.1 41.3 18.6 4.0 64.0 37.7 | -1.205 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 72.5 | 23669 65293 | 34.8 49.2 16.1 7.3 72.5 38.6 | -1.941 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-04T08:51:39-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 60.8 | 23669 65293 | 44.0 41.1 14.9 4.8 60.8 36.6 | -1.077 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_16/dev10h.pem.ctm.sys +%WER 54.0 | 23669 65293 | 49.5 37.0 13.5 3.5 54.0 35.3 | -1.581 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 55.1 | 23669 65293 | 48.2 35.9 15.9 3.3 55.1 35.5 | -0.993 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 61.5 | 23669 65293 | 42.1 38.8 19.1 3.6 61.5 36.9 | -0.881 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 64.0 | 23669 65293 | 40.1 41.3 18.6 4.0 64.0 37.7 | -1.205 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 62.9 | 23669 65293 | 42.6 42.8 14.6 5.6 62.9 37.0 | -1.205 | exp/sgmm5/decode_fmllr_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 62.1 | 23669 65293 | 43.1 42.0 14.9 5.3 62.1 36.9 | -1.329 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it1/score_12/dev10h.pem.ctm.sys +%WER 69.0 | 23669 65293 | 39.0 48.1 12.9 8.0 69.0 37.9 | -2.509 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 72.5 | 23669 65293 | 34.8 49.2 16.1 7.3 72.5 38.6 | -1.941 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 60.4 | 23669 65293 | 43.6 39.6 16.8 4.0 60.4 36.7 | -1.005 | exp/tri6_nnet/decode_dev10h.pem/score_15/dev10h.pem.ctm.sys +%WER 60.3 | 23669 65293 | 43.2 35.6 21.2 3.5 60.3 36.8 | -0.819 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch1/score_12/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/results/results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 b/egs/babel/s5d/results/results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 new file mode 100644 index 00000000000..fc7382101b2 --- /dev/null +++ b/egs/babel/s5d/results/results.403-dholuo.flp.marcc.conf.jtrmal1@jhu.edu.2016-02-25T23:27:09-05:00 @@ -0,0 +1,54 @@ +#Created on 2016-02-25T23:27:09-05:00 by local/best_scores.sh +# +# STT Task performance (WER), evaluated on 2016-02-25T23:27:09-05:00 +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-02-26T20:37:15-05:00 +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T00:59:22-05:00 +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T01:34:27-05:00 +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.4 | 23451 78254 | 62.7 26.6 10.7 3.1 40.4 28.1 | -0.618 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 23451 78254 | 47.4 40.4 12.3 5.9 58.6 31.4 | -0.991 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-02T08:19:30-05:00 +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.4 | 23451 78254 | 62.7 26.6 10.7 3.1 40.4 28.1 | -0.618 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 58.4 30.4 11.3 4.0 45.6 29.4 | -0.575 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 23451 78254 | 47.4 40.4 12.3 5.9 58.6 31.4 | -0.991 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +# +# STT Task performance (WER), evaluated on 2016-03-03T08:38:46-05:00 by user jtrmal1@jhu.edu on login-node04.cm.cluster +%WER 45.6 | 23451 78254 | 59.2 31.7 9.1 4.9 45.6 29.0 | -0.565 | exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_16/dev10h.pem.ctm.sys +%WER 39.4 | 23451 78254 | 64.1 26.4 9.5 3.4 39.4 28.0 | -1.018 | exp/nnet3/lstm_bidirectional_sp/decode_dev10h.pem/score_11/dev10h.pem.ctm.sys +%WER 40.4 | 23451 78254 | 62.7 26.6 10.7 3.1 40.4 28.1 | -0.618 | exp/nnet3/lstm_sp/decode_dev10h.pem/score_13/dev10h.pem.ctm.sys +%WER 44.1 | 23451 78254 | 59.9 29.6 10.5 4.0 44.1 29.1 | -0.535 | exp/nnet3/tdnn_6layer_r512_sp/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 45.6 | 23451 78254 | 58.4 30.4 11.3 4.0 45.6 29.4 | -0.575 | exp/nnet3/tdnn_sp/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 46.1 | 23451 78254 | 58.5 31.9 9.5 4.7 46.1 29.1 | -0.561 | exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h.pem_it2/score_11/dev10h.pem.ctm.sys +%WER 53.1 | 23451 78254 | 53.0 37.3 9.7 6.1 53.1 30.4 | -1.305 | exp/tri5/decode_dev10h.pem/score_12/dev10h.pem.ctm.sys +%WER 58.6 | 23451 78254 | 47.4 40.4 12.3 5.9 58.6 31.4 | -0.991 | exp/tri5/decode_dev10h.pem.si/score_12/dev10h.pem.ctm.sys +%WER 44.6 | 23451 78254 | 59.4 30.0 10.6 4.0 44.6 28.7 | -0.473 | exp/tri6_nnet/decode_dev10h.pem/score_14/dev10h.pem.ctm.sys +%WER 43.9 | 23451 78254 | 60.2 28.6 11.2 4.2 43.9 28.9 | -0.454 | exp/tri6_nnet_mpe/decode_dev10h.pem_epoch4/score_11/dev10h.pem.ctm.sys diff --git a/egs/babel/s5d/run-1-main-extend-lex.sh b/egs/babel/s5d/run-1-main-extend-lex.sh new file mode 100755 index 00000000000..035049dad9c --- /dev/null +++ b/egs/babel/s5d/run-1-main-extend-lex.sh @@ -0,0 +1,190 @@ +#!/bin/bash + +# Parameters for extended lexicon. +extend_lexicon=true +unk_fraction_boost=1.0 +num_sent_gen=12000000 +num_prons=1000000 + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +#set -u #Fail on an undefined variable + +lexicon=data/local/lexicon.txt +if $extend_lexicon; then + lexicon=data/local/lexiconp.txt +fi + +#Preparing dev2h and train directories +if [ ! -f data/raw_train_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the TRAIN set" + echo --------------------------------------------------------------------- + + local/make_corpus_subset.sh "$train_data_dir" "$train_data_list" ./data/raw_train_data + train_data_dir=`readlink -f ./data/raw_train_data` + touch data/raw_train_data/.done +fi +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -d data/raw_dev2h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV2H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev2h_data_dir" "$dev2h_data_list" ./data/raw_dev2h_data || exit 1 +fi + +if [ ! -d data/raw_dev10h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV10H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev10h_data_dir" "$dev10h_data_list" ./data/raw_dev10h_data || exit 1 +fi +nj_max=`cat $dev2h_data_list | wc -l` +if [[ "$nj_max" -lt "$decode_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max -- you have $decode_nj! (The training and decoding process has file-granularity)" + exit 1 + decode_nj=$nj_max +fi + +# Move data/dev2h preparation forward so we can get data/dev2h/text for +# diagnostic purpose when extending the lexicon. +if [[ ! -f data/dev2h/wav.scp || data/dev2h/wav.scp -ot ./data/raw_dev2h_data/audio ]]; then + echo --------------------------------------------------------------------- + echo "Preparing dev2h data lists in data/dev2h on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/dev2h + local/prepare_acoustic_training_data.pl \ + --fragmentMarkers \-\*\~ \ + `pwd`/data/raw_dev2h_data data/dev2h > data/dev2h/skipped_utts.log || exit 1 +fi + +if [[ ! -f data/dev2h/glm || data/dev2h/glm -ot "$glmFile" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing dev2h stm files in data/dev2h on" `date` + echo --------------------------------------------------------------------- + if [ -z $dev2h_stm_file ]; then + echo "WARNING: You should define the variable stm_file pointing to the IndusDB stm" + echo "WARNING: Doing that, it will give you scoring close to the NIST scoring. " + local/prepare_stm.pl --fragmentMarkers \-\*\~ data/dev2h || exit 1 + else + local/augment_original_stm.pl $dev2h_stm_file data/dev2h || exit 1 + fi + [ ! -z $glmFile ] && cp $glmFile data/dev2h/glm + +fi + +mkdir -p data/local +if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing lexicon in data/local on" `date` + echo --------------------------------------------------------------------- + local/make_lexicon_subset.sh $train_data_dir/transcription $lexicon_file data/local/filtered_lexicon.txt + local/prepare_lexicon.pl --phonemap "$phoneme_mapping" \ + $lexiconFlags data/local/filtered_lexicon.txt data/local + if $extend_lexicon; then + # Extend the original lexicon. + # Will creates the files data/local/extend/{lexiconp.txt,oov2prob}. + mv data/local/lexicon.txt data/local/lexicon_orig.txt + local/extend_lexicon.sh --cmd "$train_cmd" --cleanup false \ + --num-sent-gen $num_sent_gen --num-prons $num_prons \ + data/local/lexicon_orig.txt data/local/extend data/dev2h/text + cp data/local/extend/lexiconp.txt data/local/ + fi +fi + +mkdir -p data/lang +if [[ ! -f data/lang/L.fst || data/lang/L.fst -ot $lexicon ]]; then + echo --------------------------------------------------------------------- + echo "Creating L.fst etc in data/lang on" `date` + echo --------------------------------------------------------------------- + utils/prepare_lang.sh \ + --share-silence-phones true \ + data/local $oovSymbol data/local/tmp.lang data/lang +fi + +if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/train + local/prepare_acoustic_training_data.pl \ + --vocab $lexicon --fragmentMarkers \-\*\~ \ + $train_data_dir data/train > data/train/skipped_utts.log +fi + +if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then + echo --------------------------------------------------------------------- + echo "Training SRILM language models on" `date` + echo --------------------------------------------------------------------- + # If extending the lexicon, use "--words-file data/local/lexicon_orig.txt" so + # that the LM is trained just on the vocab that appears in the text. Will add + # in the OOVs later. + words_file_param=() + if $extend_lexicon; then + words_file_param=(--words-file data/local/lexicon_orig.txt) + fi + local/train_lms_srilm.sh "${words_file_param[@]}" \ + --dev-text data/dev2h/text --oov-symbol "$oovSymbol"\ + --train-text data/train/text data data/srilm +fi + +if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ||\ + ( -f data/local/extend/oov2prob &&\ + data/lang/G.fst -ot data/local/extend/oov2prob ) ]]; then + echo --------------------------------------------------------------------- + echo "Creating G.fst on " `date` + echo --------------------------------------------------------------------- + extend_lexicon_param=() + if $extend_lexicon; then + [ -f data/local/extend/original_oov_rates ] || exit 1; + unk_fraction=`cat data/local/extend/original_oov_rates |\ + grep "token" | awk -v x=$unk_fraction_boost '{print $NF/100.0*x}'` + extend_lexicon_param=(--cleanup false --unk-fraction $unk_fraction \ + --oov-prob-file data/local/extend/oov2prob) + fi + local/arpa2G.sh ${extend_lexicon_param[@]} \ + data/srilm/lm.gz data/lang data/lang +fi +decode_nj=$dev2h_nj + +echo --------------------------------------------------------------------- +echo "Starting plp feature extraction for data/train in plp on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f data/train/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp_pitch/train plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp/train plp + fi + utils/fix_data_dir.sh data/train + steps/compute_cmvn_stats.sh data/train exp/make_plp/train plp + utils/fix_data_dir.sh data/train + touch data/train/.plp.done +fi + +touch data/.extlex + +echo ------------------------------------------------------------------------- +echo "Extended lexicon finished on" `date`. Now running script run-1-main.sh +echo ------------------------------------------------------------------------- +./run-1-main.sh +exit 0 diff --git a/egs/babel/s5d/run-1-main-unicode.sh b/egs/babel/s5d/run-1-main-unicode.sh new file mode 100755 index 00000000000..e3fb2486c83 --- /dev/null +++ b/egs/babel/s5d/run-1-main-unicode.sh @@ -0,0 +1,385 @@ +#!/bin/bash + +# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first. +tri5_only=false +sgmm5_only=false +denlats_only=false +data_only=false +morfessor=true +tag_percentage=0.1 + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +#set -u #Fail on an undefined variable + +lexicon=data/local/lexicon.txt +if $extend_lexicon; then + lexicon=data/local/lexiconp.txt +fi + +./local/check_tools.sh || exit 1 + +#Preparing dev2h and train directories +if [ ! -f data/raw_train_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the TRAIN set" + echo --------------------------------------------------------------------- + + local/make_corpus_subset.sh "$train_data_dir" "$train_data_list" ./data/raw_train_data + train_data_dir=`readlink -f ./data/raw_train_data` + touch data/raw_train_data/.done +fi +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -d data/raw_dev10h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV10H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev10h_data_dir" "$dev10h_data_list" ./data/raw_dev10h_data || exit 1 +fi + + +mkdir -p data/local +if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing lexicon in data/local on" `date` + echo --------------------------------------------------------------------- + + local/lexicon/make_word_list.py $train_data_dir/filelist.list $train_data_dir/transcription data/local/word_list.txt + echo -e " SIL\n \n \n " > data/local/nonspeech.txt + echo -e " " > data/local/extraspeech.txt + + fmt="word_list" + if $morfessor; then + fmt="morfessor" + morfessor-train --encoding=utf_8 --traindata-list -f"-_" -s data/local/morfessor.bin \ + data/local/word_list.txt + morfessor-segment --encoding=utf_8 --output-format-separator '.' --viterbi-maxlen 3 \ + -l data/local/morfessor.bin <(cut -d' ' -f2 data/local/word_list.txt) \ + | sed 's/\.[\_\-]\././g' > data/local/segments + cut -d' ' data/local/word_list.txt -f2 | paste -d' ' - data/local/segments > data/local/word_list_tmp.txt + mv data/local/word_list_tmp.txt data/local/word_list.txt + fi + + local/lexicon/make_unicode_lexicon.py --tag_percentage $tag_percentage --fmt $fmt \ + --nonspeech data/local/nonspeech.txt --extraspeech data/local/extraspeech.txt \ + --verbose data/local/word_list.txt data/local/lexicon.txt + local/prepare_unicode_lexicon.py --nonspeech data/local/nonspeech.txt \ + --extraspeech data/local/extraspeech.txt data/local/lexicon_table.txt data/local + cp data/local/lexicon.txt data/local/filtered_lexicon.txt +fi + +mkdir -p data/lang +if [[ ! -f data/lang/L.fst || data/lang/L.fst -ot $lexicon ]]; then + echo --------------------------------------------------------------------- + echo "Creating L.fst etc in data/lang on" `date` + echo --------------------------------------------------------------------- + utils/prepare_lang.sh \ + --share-silence-phones true \ + data/local $oovSymbol data/local/tmp.lang data/lang +fi + +if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/train + local/prepare_acoustic_training_data.pl \ + --vocab $lexicon --fragmentMarkers \-\*\~ \ + $train_data_dir data/train > data/train/skipped_utts.log +fi + +if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then + echo --------------------------------------------------------------------- + echo "Training SRILM language models on" `date` + echo --------------------------------------------------------------------- + local/train_lms_srilm.sh --oov-symbol "$oovSymbol"\ + --train-text data/train/text data data/srilm +fi + +if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ]]; then + echo --------------------------------------------------------------------- + echo "Creating G.fst on " `date` + echo --------------------------------------------------------------------- + local/arpa2G.sh data/srilm/lm.gz data/lang data/lang +fi + +echo --------------------------------------------------------------------- +echo "Starting plp feature extraction for data/train in plp on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f data/train/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp_pitch/train plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp/train plp + fi + utils/fix_data_dir.sh data/train + steps/compute_cmvn_stats.sh data/train exp/make_plp/train plp + utils/fix_data_dir.sh data/train + touch data/train/.plp.done +fi + +mkdir -p exp + +if [ ! -f data/train_sub3/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting monophone training data in data/train_sub[123] on" `date` + echo --------------------------------------------------------------------- + numutt=`cat data/train/feats.scp | wc -l`; + utils/subset_data_dir.sh data/train 5000 data/train_sub1 + if [ $numutt -gt 10000 ] ; then + utils/subset_data_dir.sh data/train 10000 data/train_sub2 + else + (cd data; ln -s train train_sub2 ) + fi + if [ $numutt -gt 20000 ] ; then + utils/subset_data_dir.sh data/train 20000 data/train_sub3 + else + (cd data; ln -s train train_sub3 ) + fi + + touch data/train_sub3/.done +fi + +if $data_only; then + echo "--data-only is true" && exit 0 +fi + +if [ ! -f exp/mono/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) monophone training in exp/mono on" `date` + echo --------------------------------------------------------------------- + steps/train_mono.sh \ + --boost-silence $boost_sil --nj 8 --cmd "$train_cmd" \ + data/train_sub1 data/lang exp/mono + touch exp/mono/.done +fi + +if [ ! -f exp/tri1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) triphone training in exp/tri1 on" `date` + echo --------------------------------------------------------------------- + steps/align_si.sh \ + --boost-silence $boost_sil --nj 12 --cmd "$train_cmd" \ + data/train_sub2 data/lang exp/mono exp/mono_ali_sub2 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri1 $numGaussTri1 \ + data/train_sub2 data/lang exp/mono_ali_sub2 exp/tri1 + + touch exp/tri1/.done +fi + + +echo --------------------------------------------------------------------- +echo "Starting (medium) triphone training in exp/tri2 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri2/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj 24 --cmd "$train_cmd" \ + data/train_sub3 data/lang exp/tri1 exp/tri1_ali_sub3 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri2 $numGaussTri2 \ + data/train_sub3 data/lang exp/tri1_ali_sub3 exp/tri2 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train_sub3 data/lang data/local/ \ + exp/tri2 data/local/dictp/tri2 data/local/langp/tri2 data/langp/tri2 + + touch exp/tri2/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (full) triphone training in exp/tri3 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri3/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri2 exp/tri2 exp/tri2_ali + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesTri3 $numGaussTri3 data/train data/langp/tri2 exp/tri2_ali exp/tri3 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local/ \ + exp/tri3 data/local/dictp/tri3 data/local/langp/tri3 data/langp/tri3 + + touch exp/tri3/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (lda_mllt) triphone training in exp/tri4 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri4/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri3 exp/tri3 exp/tri3_ali + + steps/train_lda_mllt.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesMLLT $numGaussMLLT data/train data/langp/tri3 exp/tri3_ali exp/tri4 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri4 data/local/dictp/tri4 data/local/langp/tri4 data/langp/tri4 + + touch exp/tri4/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (SAT) triphone training in exp/tri5 on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f exp/tri5/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri4 exp/tri4 exp/tri4_ali + + steps/train_sat.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesSAT $numGaussSAT data/train data/langp/tri4 exp/tri4_ali exp/tri5 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5 data/local/dictp/tri5 data/local/langp/tri5 data/langp/tri5 + + touch exp/tri5/.done +fi + + +if [ ! -f exp/tri5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/tri5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri5 exp/tri5 exp/tri5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5_ali data/local/dictp/tri5_ali data/local/langp/tri5_ali data/langp/tri5_ali + + touch exp/tri5_ali/.done +fi + +if [ ! -f data/langp_test/.done ]; then + cp -R data/langp/tri5_ali/ data/langp_test + cp data/lang/G.fst data/langp_test + touch data/langp_test/.done +fi + +if $tri5_only ; then + echo "Exiting after stage TRI5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + +################################################################################ +# Ready to start SGMM training +################################################################################ + +if [ ! -f exp/ubm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/ubm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_ubm.sh \ + --cmd "$train_cmd" $numGaussUBM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5 + touch exp/ubm5/.done +fi + +if [ ! -f exp/sgmm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_sgmm2.sh \ + --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + #steps/train_sgmm2_group.sh \ + # --cmd "$train_cmd" "${sgmm_group_extra_opts[@]-}" $numLeavesSGMM $numGaussSGMM \ + # data/train data/lang exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + touch exp/sgmm5/.done +fi + +if $sgmm5_only ; then + echo "Exiting after stage SGMM5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi +################################################################################ +# Ready to start discriminative SGMM training +################################################################################ + +if [ ! -f exp/sgmm5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_sgmm2.sh \ + --nj $train_nj --cmd "$train_cmd" --transform-dir exp/tri5_ali \ + --use-graphs true --use-gselect true \ + data/train data/lang exp/sgmm5 exp/sgmm5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/sgmm5_ali data/local/dictp/sgmm5 data/local/langp/sgmm5 data/langp/sgmm5 + + touch exp/sgmm5_ali/.done +fi + + +if [ ! -f exp/sgmm5_denlats/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_denlats on" `date` + echo --------------------------------------------------------------------- + steps/make_denlats_sgmm2.sh \ + --nj $train_nj --sub-split $train_nj "${sgmm_denlats_extra_opts[@]}" \ + --beam 10.0 --lattice-beam 6 --cmd "$decode_cmd" --transform-dir exp/tri5_ali \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats + touch exp/sgmm5_denlats/.done +fi + + +if $denlats_only ; then + echo "Exiting after generating denlats, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + + +if [ ! -f exp/sgmm5_mmi_b0.1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_mmi_b0.1 on" `date` + echo --------------------------------------------------------------------- + steps/train_mmi_sgmm2.sh \ + --cmd "$train_cmd" "${sgmm_mmi_extra_opts[@]}" \ + --drop-frames true --transform-dir exp/tri5_ali --boost 0.1 \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats \ + exp/sgmm5_mmi_b0.1 + touch exp/sgmm5_mmi_b0.1/.done +fi + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-1-main.sh b/egs/babel/s5d/run-1-main.sh new file mode 100755 index 00000000000..d85407f8db4 --- /dev/null +++ b/egs/babel/s5d/run-1-main.sh @@ -0,0 +1,363 @@ +#!/bin/bash + +# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first. +tri5_only=false +sgmm5_only=false +denlats_only=false +data_only=false + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +#set -u #Fail on an undefined variable + +lexicon=data/local/lexicon.txt +if $extend_lexicon; then + lexicon=data/local/lexiconp.txt +fi + +./local/check_tools.sh || exit 1 + +#Preparing dev2h and train directories +if [ ! -f data/raw_train_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the TRAIN set" + echo --------------------------------------------------------------------- + + local/make_corpus_subset.sh "$train_data_dir" "$train_data_list" ./data/raw_train_data + train_data_dir=`readlink -f ./data/raw_train_data` + touch data/raw_train_data/.done +fi +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -d data/raw_dev10h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV10H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev10h_data_dir" "$dev10h_data_list" ./data/raw_dev10h_data || exit 1 +fi + + +mkdir -p data/local +if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing lexicon in data/local on" `date` + echo --------------------------------------------------------------------- + local/make_lexicon_subset.sh $train_data_dir/transcription $lexicon_file data/local/filtered_lexicon.txt + local/prepare_lexicon.pl --phonemap "$phoneme_mapping" \ + $lexiconFlags data/local/filtered_lexicon.txt data/local +fi + +mkdir -p data/lang +if [[ ! -f data/lang/L.fst || data/lang/L.fst -ot $lexicon ]]; then + echo --------------------------------------------------------------------- + echo "Creating L.fst etc in data/lang on" `date` + echo --------------------------------------------------------------------- + utils/prepare_lang.sh \ + --share-silence-phones true \ + data/local $oovSymbol data/local/tmp.lang data/lang +fi + +if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/train + local/prepare_acoustic_training_data.pl \ + --vocab $lexicon --fragmentMarkers \-\*\~ \ + $train_data_dir data/train > data/train/skipped_utts.log +fi + +if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then + echo --------------------------------------------------------------------- + echo "Training SRILM language models on" `date` + echo --------------------------------------------------------------------- + local/train_lms_srilm.sh --oov-symbol "$oovSymbol"\ + --train-text data/train/text data data/srilm +fi + +if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ]]; then + echo --------------------------------------------------------------------- + echo "Creating G.fst on " `date` + echo --------------------------------------------------------------------- + local/arpa2G.sh data/srilm/lm.gz data/lang data/lang +fi + +echo --------------------------------------------------------------------- +echo "Starting plp feature extraction for data/train in plp on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f data/train/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp_pitch/train plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp/train plp + fi + utils/fix_data_dir.sh data/train + steps/compute_cmvn_stats.sh data/train exp/make_plp/train plp + utils/fix_data_dir.sh data/train + touch data/train/.plp.done +fi + +mkdir -p exp + +if [ ! -f data/train_sub3/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting monophone training data in data/train_sub[123] on" `date` + echo --------------------------------------------------------------------- + numutt=`cat data/train/feats.scp | wc -l`; + utils/subset_data_dir.sh data/train 5000 data/train_sub1 + if [ $numutt -gt 10000 ] ; then + utils/subset_data_dir.sh data/train 10000 data/train_sub2 + else + (cd data; ln -s train train_sub2 ) + fi + if [ $numutt -gt 20000 ] ; then + utils/subset_data_dir.sh data/train 20000 data/train_sub3 + else + (cd data; ln -s train train_sub3 ) + fi + + touch data/train_sub3/.done +fi + +if $data_only; then + echo "--data-only is true" && exit 0 +fi + +if [ ! -f exp/mono/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) monophone training in exp/mono on" `date` + echo --------------------------------------------------------------------- + steps/train_mono.sh \ + --boost-silence $boost_sil --nj 8 --cmd "$train_cmd" \ + data/train_sub1 data/lang exp/mono + touch exp/mono/.done +fi + +if [ ! -f exp/tri1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting (small) triphone training in exp/tri1 on" `date` + echo --------------------------------------------------------------------- + steps/align_si.sh \ + --boost-silence $boost_sil --nj 12 --cmd "$train_cmd" \ + data/train_sub2 data/lang exp/mono exp/mono_ali_sub2 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri1 $numGaussTri1 \ + data/train_sub2 data/lang exp/mono_ali_sub2 exp/tri1 + + touch exp/tri1/.done +fi + + +echo --------------------------------------------------------------------- +echo "Starting (medium) triphone training in exp/tri2 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri2/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj 24 --cmd "$train_cmd" \ + data/train_sub3 data/lang exp/tri1 exp/tri1_ali_sub3 + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" $numLeavesTri2 $numGaussTri2 \ + data/train_sub3 data/lang exp/tri1_ali_sub3 exp/tri2 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train_sub3 data/lang data/local/ \ + exp/tri2 data/local/dictp/tri2 data/local/langp/tri2 data/langp/tri2 + + touch exp/tri2/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (full) triphone training in exp/tri3 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri3/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri2 exp/tri2 exp/tri2_ali + + steps/train_deltas.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesTri3 $numGaussTri3 data/train data/langp/tri2 exp/tri2_ali exp/tri3 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local/ \ + exp/tri3 data/local/dictp/tri3 data/local/langp/tri3 data/langp/tri3 + + touch exp/tri3/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (lda_mllt) triphone training in exp/tri4 on" `date` +echo --------------------------------------------------------------------- +if [ ! -f exp/tri4/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri3 exp/tri3 exp/tri3_ali + + steps/train_lda_mllt.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesMLLT $numGaussMLLT data/train data/langp/tri3 exp/tri3_ali exp/tri4 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri4 data/local/dictp/tri4 data/local/langp/tri4 data/langp/tri4 + + touch exp/tri4/.done +fi + +echo --------------------------------------------------------------------- +echo "Starting (SAT) triphone training in exp/tri5 on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f exp/tri5/.done ]; then + steps/align_si.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri4 exp/tri4 exp/tri4_ali + + steps/train_sat.sh \ + --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesSAT $numGaussSAT data/train data/langp/tri4 exp/tri4_ali exp/tri5 + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5 data/local/dictp/tri5 data/local/langp/tri5 data/langp/tri5 + + touch exp/tri5/.done +fi + + +if [ ! -f exp/tri5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/tri5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + data/train data/langp/tri5 exp/tri5 exp/tri5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/tri5_ali data/local/dictp/tri5_ali data/local/langp/tri5_ali data/langp/tri5_ali + + touch exp/tri5_ali/.done +fi + +if [ ! -f data/langp_test/.done ]; then + cp -R data/langp/tri5_ali/ data/langp_test + cp data/lang/G.fst data/langp_test + touch data/langp_test/.done +fi + +if $tri5_only ; then + echo "Exiting after stage TRI5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + +################################################################################ +# Ready to start SGMM training +################################################################################ + +if [ ! -f exp/ubm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/ubm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_ubm.sh \ + --cmd "$train_cmd" $numGaussUBM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5 + touch exp/ubm5/.done +fi + +if [ ! -f exp/sgmm5/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5 on" `date` + echo --------------------------------------------------------------------- + steps/train_sgmm2.sh \ + --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM \ + data/train data/langp/tri5_ali exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + #steps/train_sgmm2_group.sh \ + # --cmd "$train_cmd" "${sgmm_group_extra_opts[@]-}" $numLeavesSGMM $numGaussSGMM \ + # data/train data/lang exp/tri5_ali exp/ubm5/final.ubm exp/sgmm5 + touch exp/sgmm5/.done +fi + +if $sgmm5_only ; then + echo "Exiting after stage SGMM5, as requested. " + echo "Everything went fine. Done" + exit 0; +fi +################################################################################ +# Ready to start discriminative SGMM training +################################################################################ + +if [ ! -f exp/sgmm5_ali/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_sgmm2.sh \ + --nj $train_nj --cmd "$train_cmd" --transform-dir exp/tri5_ali \ + --use-graphs true --use-gselect true \ + data/train data/lang exp/sgmm5 exp/sgmm5_ali + + local/reestimate_langp.sh --cmd "$train_cmd" --unk "$oovSymbol" \ + data/train data/lang data/local \ + exp/sgmm5_ali data/local/dictp/sgmm5 data/local/langp/sgmm5 data/langp/sgmm5 + + touch exp/sgmm5_ali/.done +fi + + +if [ ! -f exp/sgmm5_denlats/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_denlats on" `date` + echo --------------------------------------------------------------------- + steps/make_denlats_sgmm2.sh \ + --nj $train_nj --sub-split $train_nj "${sgmm_denlats_extra_opts[@]}" \ + --beam 10.0 --lattice-beam 6 --cmd "$decode_cmd" --transform-dir exp/tri5_ali \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats + touch exp/sgmm5_denlats/.done +fi + + +if $denlats_only ; then + echo "Exiting after generating denlats, as requested. " + echo "Everything went fine. Done" + exit 0; +fi + + +if [ ! -f exp/sgmm5_mmi_b0.1/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting exp/sgmm5_mmi_b0.1 on" `date` + echo --------------------------------------------------------------------- + steps/train_mmi_sgmm2.sh \ + --cmd "$train_cmd" "${sgmm_mmi_extra_opts[@]}" \ + --drop-frames true --transform-dir exp/tri5_ali --boost 0.1 \ + data/train data/langp/sgmm5 exp/sgmm5_ali exp/sgmm5_denlats \ + exp/sgmm5_mmi_b0.1 + touch exp/sgmm5_mmi_b0.1/.done +fi + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-2-segmentation.sh b/egs/babel/s5d/run-2-segmentation.sh new file mode 100755 index 00000000000..0ced3ffabac --- /dev/null +++ b/egs/babel/s5d/run-2-segmentation.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +# Copyright 2014 Vimal Manohar, Johns Hopkins University (Author: Jan Trmal) +# Apache 2.0 + +#Begin configuration section + +silence_segment_fraction=1.0 # What fraction of segment we should keep + +#end configuration section + +# This is not necessarily the top-level run.sh as it is in other directories. see README.txt first. +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +set -u #Fail on an undefined variable + +#Later in the script we assume the run-1-main.sh was run (because we are using exp/tri4) +#So let's make it mandatory, instead of doing the work on our own. +[ ! -f data/raw_train_data/.done ] && echo "The source training data directory is not ready. Use the run-1-main.sh script to prepare it!" && exit 1 + +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -f data/train_seg/.done ]; then + + mkdir -p data/train_seg + + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + local/prepare_acoustic_training_data.pl --get-whole-transcripts "true" \ + --vocab data/local/lexicon.txt --fragmentMarkers \-\*\~ \ + $train_data_dir data/train_seg > data/train_seg/skipped_utts.log + mv data/train_seg/text data/train_seg/text_orig + + num_silence_segments=$(cat data/train_seg/text_orig | awk '{if (NF == 2 && $2 == "") {print $0}}' | wc -l) + num_keep_silence_segments=`perl -e "printf '%d', ($num_silence_segments * $silence_segment_fraction)"` + if [ $num_silence_segments -eq $num_keep_silence_segments ]; then + # Keep all segments including silence segments + cat data/train_seg/text_orig | awk '{if (NF == 2 && $2 == "") {print $1} else {print $0}}' > data/train_seg/text + else + # Keep only a fraction of silence segments + + cat data/train_seg/text_orig \ + | awk 'BEGIN{i=0} \ + { \ + if (NF == 2 && $2 == "") { \ + if (i<'$num_keep_silence_segments') { \ + print $1; \ + i++; \ + } \ + } else {print $0}\ + }' > data/train_seg/text + fi + #rm data/train_seg/text_orig + utils/fix_data_dir.sh data/train_seg + + echo --------------------------------------------------------------------- + echo "Starting plp feature extraction for data/train_seg in plp on" `date` + echo --------------------------------------------------------------------- + + if [ ! -f data/train_seg/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj \ + data/train_seg exp/make_plp_pitch/train_seg plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj \ + data/train_seg exp/make_plp/train_seg plp + fi + + utils/fix_data_dir.sh data/train_seg + steps/compute_cmvn_stats.sh data/train_seg exp/make_plp/train_seg plp + utils/fix_data_dir.sh data/train_seg + touch data/train_seg/.plp.done + fi + touch data/train_seg/.done +fi + +echo --------------------------------------------------------------------- +echo "Training segmentation model in exp/tri4b_seg" +echo --------------------------------------------------------------------- + +local/resegment/train_segmentation.sh \ + --boost-sil 1.0 --nj $train_nj --cmd "$decode_cmd" \ + exp/tri4 data/train_seg data/lang exp/tri4b_seg || exit 1 + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-2a-nnet-cpu.sh b/egs/babel/s5d/run-2a-nnet-cpu.sh new file mode 100755 index 00000000000..35e7d3ceab3 --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-cpu.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +. conf/common_vars.sh +. ./lang.conf + +# This parameter will be used when the training dies at a certain point. +train_stage=-100 +dir=exp/tri6_nnet +. ./utils/parse_options.sh + +set -e +set -o pipefail +set -u + +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. +echo "Waiting till exp/tri5_ali/.done exists...." +while [ ! -f exp/tri5_ali/.done ]; do sleep 30; done +echo "...done waiting for exp/tri5_ali/.done" + +if [ ! -f $dir/.done ]; then + steps/nnet2/train_pnorm.sh \ + --stage $train_stage --mix-up $dnn_mixup \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_input_dim \ + --pnorm-output-dim $dnn_output_dim \ + --cmd "$train_cmd" \ + "${dnn_cpu_parallel_opts[@]}" \ + data/train data/lang exp/tri5_ali $dir || exit 1 + + touch $dir/.done +fi diff --git a/egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh b/egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh new file mode 100755 index 00000000000..06c9a330295 --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-ensemble-gpu.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +. conf/common_vars.sh +. ./lang.conf + +train_stage=-10 +dir=exp/tri6b_nnet + +. ./utils/parse_options.sh + +set -e +set -o pipefail +set -u + +dnn_num_hidden_layers=4 +dnn_pnorm_input_dim=3000 +dnn_pnorm_output_dim=300 +dnn_init_learning_rate=0.004 +dnn_final_learning_rate=0.001 +temp_dir=`pwd`/nnet_gpu_egs +ensemble_size=4 +initial_beta=0.1 +final_beta=5 +egs_dir= + +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. +echo "Waiting till exp/tri5_ali/.done exists...." +while [ ! -f exp/tri5_ali/.done ]; do sleep 30; done +echo "...done waiting for exp/tri5_ali/.done" + +if [ ! -f $dir/.done ]; then + steps/nnet2/train_pnorm_ensemble.sh \ + --stage $train_stage --mix-up $dnn_mixup --egs-dir "$egs_dir" \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_pnorm_input_dim \ + --pnorm-output-dim $dnn_pnorm_output_dim \ + --cmd "$train_cmd" \ + "${dnn_gpu_parallel_opts[@]}" \ + --ensemble-size $ensemble_size --initial-beta $initial_beta --final-beta $final_beta \ + data/train data/lang exp/tri5_ali $dir || exit 1 + touch $dir/.done +fi + diff --git a/egs/babel/s5d/run-2a-nnet-gpu.sh b/egs/babel/s5d/run-2a-nnet-gpu.sh new file mode 100755 index 00000000000..55733006d75 --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-gpu.sh @@ -0,0 +1,36 @@ +#!/bin/bash +dir=exp/tri6_nnet +train_stage=-10 + +. conf/common_vars.sh +. ./lang.conf + +# This parameter will be used when the training dies at a certain point. +train_stage=-100 +. ./utils/parse_options.sh + +set -e +set -o pipefail +set -u + +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. +echo "Waiting till exp/tri5_ali/.done exists...." +while [ ! -f exp/tri5_ali/.done ]; do sleep 30; done +echo "...done waiting for exp/tri5_ali/.done" + +if [ ! -f $dir/.done ]; then + steps/nnet2/train_pnorm_fast.sh \ + --stage $train_stage --mix-up $dnn_mixup \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_input_dim \ + --pnorm-output-dim $dnn_output_dim \ + --cmd "$train_cmd" \ + "${dnn_gpu_parallel_opts[@]}" \ + data/train data/langp/tri5_ali exp/tri5_ali $dir || exit 1 + + touch $dir/.done +fi + diff --git a/egs/babel/s5d/run-2a-nnet-mpe.sh b/egs/babel/s5d/run-2a-nnet-mpe.sh new file mode 100755 index 00000000000..6ddddb4cfda --- /dev/null +++ b/egs/babel/s5d/run-2a-nnet-mpe.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +. conf/common_vars.sh +. ./lang.conf + +set -e +set -o pipefail +set -u + +# Wait for cross-entropy training. +echo "Waiting till exp/tri6_nnet/.done exists...." +while [ ! -f exp/tri6_nnet/.done ]; do sleep 30; done +echo "...done waiting for exp/tri6_nnet/.done" + +# Generate denominator lattices. +if [ ! -f exp/tri6_nnet_denlats/.done ]; then + steps/nnet2/make_denlats.sh --cmd "$decode_cmd" \ + --nj $train_nj --sub-split $train_nj \ + "${dnn_denlats_extra_opts[@]}" \ + --transform-dir exp/tri5_ali \ + data/train data/lang exp/tri6_nnet exp/tri6_nnet_denlats || exit 1 + + touch exp/tri6_nnet_denlats/.done +fi + +# Generate alignment. +if [ ! -f exp/tri6_nnet_ali/.done ]; then + steps/nnet2/align.sh --use-gpu yes \ + --cmd "$decode_cmd $dnn_parallel_opts" \ + --transform-dir exp/tri5_ali --nj $train_nj \ + data/train data/lang exp/tri6_nnet exp/tri6_nnet_ali || exit 1 + + touch exp/tri6_nnet_ali/.done +fi + +train_stage=-100 +if [ ! -f exp/tri6_nnet_mpe/.done ]; then + steps/nnet2/train_discriminative.sh \ + --stage $train_stage --cmd "$decode_cmd" \ + --learning-rate $dnn_mpe_learning_rate \ + --modify-learning-rates true \ + --last-layer-factor $dnn_mpe_last_layer_factor \ + --num-epochs 4 --cleanup true \ + --retroactive $dnn_mpe_retroactive \ + --transform-dir exp/tri5_ali \ + "${dnn_gpu_mpe_parallel_opts[@]}" data/train data/lang \ + exp/tri6_nnet_ali exp/tri6_nnet_denlats exp/tri6_nnet/final.mdl exp/tri6_nnet_mpe || exit 1 + + touch exp/tri6_nnet_mpe/.done +fi diff --git a/egs/babel/s5d/run-2b-bnf.sh b/egs/babel/s5d/run-2b-bnf.sh new file mode 100755 index 00000000000..bdca049d941 --- /dev/null +++ b/egs/babel/s5d/run-2b-bnf.sh @@ -0,0 +1,150 @@ +#!/bin/bash + +# Copyright 2014 Pegah Ghahremani +# Apache 2.0 + +#Run supervised and semisupervised BNF training +#This yields approx 70 hours of data + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +set -u #Fail on an undefined variable +skip_kws=true +skip_stt=false +semisupervised=true +unsup_string="_semisup" +train_stage=-100 +bnf_weight_threshold=0.35 +ali_dir= +ali_model=exp/tri6b_nnet/ +weights_dir=exp/best_path_weights/unsup.seg/decode_unsup.seg/ + +. ./utils/parse_options.sh + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + + +if $semisupervised ; then + egs_string="--egs-dir exp_bnf${unsup_string}/tri6_bnf/egs" + dirid=unsup.seg +else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + egs_string="" + dirid=train +fi + +[ ! -d $ali_model ] && echo "The alignment model $ali_model does not exist! Use --ali-model to specify it." && exit 1 + +datadir=data/${dirid} +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} +param_bnf_dir=param_bnf${unsup_string} + +if [ -z $ali_dir ] ; then + # If alignment directory is not done, use exp/tri6_nnet_ali as alignment + # directory + ali_dir=exp/tri6_nnet_ali +fi + +if [ ! -f $ali_dir/.done ]; then + echo "$0: Aligning supervised training data in exp/tri6_nnet_ali" + + [ ! -f $ali_model/final.mdl ] && echo -e "$ali_model/final.mdl not found!\nRun run-6-nnet.sh first!" && exit 1 + steps/nnet2/align.sh --cmd "$train_cmd " \ + --use-gpu no --transform-dir exp/tri5_ali --nj $train_nj \ + data/train data/langp/tri5_ali $ali_model $ali_dir || exit 1 + touch $ali_dir/.done +fi + +############################################################################### +# +# Semi-supervised BNF training +# +############################################################################### +mkdir -p $exp_dir/tri6_bnf +if [ ! -f $exp_dir/tri6_bnf/.done ]; then + if $semisupervised ; then + + [ ! -d $datadir ] && echo "Error: $datadir is not available!" && exit 1; + echo "$0: Generate examples using unsupervised data in $exp_dir/tri6_nnet" + if [ ! -f $exp_dir/tri6_bnf/egs/.done ]; then + local/nnet2/get_egs_semi_supervised.sh \ + --cmd "$train_cmd" \ + "${dnn_update_egs_opts[@]}" \ + --transform-dir-sup exp/tri5_ali \ + --transform-dir-unsup exp/tri5/decode_${dirid} \ + --weight-threshold $bnf_weight_threshold \ + data/train $datadir data/langp/tri5_ali/ \ + $ali_dir $weights_dir $exp_dir/tri6_bnf || exit 1; + touch $exp_dir/tri6_bnf/egs/.done + fi + + fi + + echo "$0: Train Bottleneck network" + steps/nnet2/train_tanh_bottleneck.sh \ + --stage $train_stage --num-jobs-nnet $bnf_num_jobs \ + --num-threads $bnf_num_threads --mix-up $bnf_mixup \ + --minibatch-size $bnf_minibatch_size \ + --initial-learning-rate $bnf_init_learning_rate \ + --final-learning-rate $bnf_final_learning_rate \ + --num-hidden-layers $bnf_num_hidden_layers \ + --bottleneck-dim $bottleneck_dim --hidden-layer-dim $bnf_hidden_layer_dim \ + --cmd "$train_cmd --mem 4G" $egs_string \ + "${dnn_gpu_parallel_opts[@]}" \ + data/train data/langp/tri5_ali/ $ali_dir $exp_dir/tri6_bnf || exit 1 + + touch $exp_dir/tri6_bnf/.done +fi + +[ ! -d $param_bnf_dir ] && mkdir -p $param_bnf_dir +if [ ! -f $data_bnf_dir/train_bnf/.done ]; then + mkdir -p $data_bnf_dir + # put the archives in ${param_bnf_dir}/. + steps/nnet2/dump_bottleneck_features.sh --nj $train_nj --cmd "$train_cmd" \ + --transform-dir exp/tri5 data/train $data_bnf_dir/train_bnf \ + $exp_dir/tri6_bnf $param_bnf_dir $exp_dir/dump_bnf + touch $data_bnf_dir/train_bnf/.done +fi + +if [ ! $data_bnf_dir/train/.done -nt $data_bnf_dir/train_bnf/.done ]; then + steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd --max-jobs-run 10" \ + --nj $train_nj --transform-dir exp/tri5_ali $data_bnf_dir/train_sat data/train \ + exp/tri5_ali $exp_dir/make_fmllr_feats/log $param_bnf_dir + + steps/append_feats.sh --cmd "$train_cmd" --nj 4 \ + $data_bnf_dir/train_bnf $data_bnf_dir/train_sat $data_bnf_dir/train \ + $exp_dir/append_feats/log $param_bnf_dir/ + steps/compute_cmvn_stats.sh --fake $data_bnf_dir/train \ + $exp_dir/make_fmllr_feats $param_bnf_dir + rm -r $data_bnf_dir/train_sat + + touch $data_bnf_dir/train/.done +fi + +if [ ! $exp_dir/tri5/.done -nt $data_bnf_dir/train/.done ]; then + steps/train_lda_mllt.sh --splice-opts "--left-context=1 --right-context=1" \ + --dim 60 --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesMLLT $numGaussMLLT $data_bnf_dir/train data/langp/tri5_ali/ exp/tri5_ali $exp_dir/tri5 ; + touch $exp_dir/tri5/.done +fi + +if [ ! $exp_dir/tri6/.done -nt $exp_dir/tri5/.done ]; then + steps/train_sat.sh --boost-silence $boost_sil --cmd "$train_cmd" \ + $numLeavesSAT $numGaussSAT $data_bnf_dir/train data/langp/tri5_ali \ + $exp_dir/tri5 $exp_dir/tri6 + touch $exp_dir/tri6/.done +fi + +echo --------------------------------------------------------------------- +echo "$0: next, run run-6-bnf-sgmm-semisupervised.sh" +echo --------------------------------------------------------------------- + +exit 0; diff --git a/egs/babel/s5d/run-3a-nnet-mpe.sh b/egs/babel/s5d/run-3a-nnet-mpe.sh new file mode 100755 index 00000000000..5271c58d816 --- /dev/null +++ b/egs/babel/s5d/run-3a-nnet-mpe.sh @@ -0,0 +1,54 @@ +#!/bin/bash + + +. conf/common_vars.sh +. ./lang.conf + +modeldir=exp/tri6_nnet + +. ./utils/parse_options.sh +set -e +set -o pipefail +set -u + +# Wait for cross-entropy training. +echo "Waiting till ${modeldir}/.done exists...." +while [ ! -f $modeldir/.done ]; do sleep 30; done +echo "...done waiting for ${modeldir}/.done" + +# Generate denominator lattices. +if [ ! -f exp/tri6_nnet_denlats/.done ]; then + steps/nnet2/make_denlats.sh --cmd "$decode_cmd" \ + --nj $train_nj --sub-split $train_nj \ + "${dnn_denlats_extra_opts[@]}" \ + --transform-dir exp/tri5_ali \ + data/train data/langp/tri5_ali ${modeldir} exp/tri6_nnet_denlats || exit 1 + + touch exp/tri6_nnet_denlats/.done +fi + +# Generate alignment. +if [ ! -f exp/tri6_nnet_ali/.done ]; then + steps/nnet2/align.sh --use-gpu yes \ + --cmd "$decode_cmd $dnn_parallel_opts" \ + --transform-dir exp/tri5_ali --nj $train_nj \ + data/train data/langp/tri5_ali ${modeldir} exp/tri6_nnet_ali || exit 1 + + touch exp/tri6_nnet_ali/.done +fi + +train_stage=-100 +if [ ! -f exp/tri6_nnet_mpe/.done ]; then + steps/nnet2/train_discriminative.sh \ + --stage $train_stage --cmd "$decode_cmd" \ + --learning-rate $dnn_mpe_learning_rate \ + --modify-learning-rates true \ + --last-layer-factor $dnn_mpe_last_layer_factor \ + --num-epochs 4 --cleanup true \ + --retroactive $dnn_mpe_retroactive \ + --transform-dir exp/tri5_ali \ + "${dnn_gpu_mpe_parallel_opts[@]}" data/train data/langp/tri5_ali/ \ + exp/tri6_nnet_ali exp/tri6_nnet_denlats ${modeldir}/final.mdl exp/tri6_nnet_mpe || exit 1 + + touch exp/tri6_nnet_mpe/.done +fi diff --git a/egs/babel/s5d/run-3b-bnf-nnet.sh b/egs/babel/s5d/run-3b-bnf-nnet.sh new file mode 100755 index 00000000000..169eec6f62f --- /dev/null +++ b/egs/babel/s5d/run-3b-bnf-nnet.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# Copyright 2014 Pegah Ghahremani +# 2014 Johns Hopkins (Yenda Trmal) + +# Apache 2.0 + +# This is really an alternative path to the BNF-SGMM, +# where we train a DNN instead of an SGMM. + + +. conf/common_vars.sh +. ./lang.conf +[ -f local.conf ] && . ./local.conf + +set -e +set -o pipefail +set -u + +semisupervised=true +dnn_train_stage=-100 +unsup_string= + +. ./utils/parse_options.sh + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + +if [ -z "$unsup_string" ]; then + if $semisupervised ; then + unsup_string="_semisup" + else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + fi +fi +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} + +if [ ! -e $exp_dir/tri6/.done ]; then + echo "$0: $exp_dir/tri6/.done does not exist" + echo "$0: this script needs to be run _AFTER_ the script run-2b-bnf.sh" + echo "$0: with the appropriate parameters -- mostly the same to the parameters" + echo "$0: of this script" + exit 1 +fi + +# We create an alignment with a lot of jobs, because the LDA accumulation +# when training the pnorm network will be slow, due to the large dimension. +if [ ! $exp_dir/tri6_ali_50/.done -nt $exp_dir/tri6/.done ]; then + echo --------------------------------------------------------------------- + echo "Aligning fMLLR system with 50 jobs" + echo --------------------------------------------------------------------- + steps/align_fmllr.sh \ + --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ + $data_bnf_dir/train data/lang $exp_dir/tri6 $exp_dir/tri6_ali_50 + touch $exp_dir/tri6_ali_50/.done +fi + + +if [ ! $exp_dir/tri7_nnet/.done -nt $exp_dir/tri6_ali_50/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting hybrid system building (over bottleneck features)" + echo --------------------------------------------------------------------- + steps/nnet2/train_pnorm.sh \ + --stage $dnn_train_stage --mix-up $dnn_mixup \ + --initial-learning-rate $dnn_init_learning_rate \ + --final-learning-rate $dnn_final_learning_rate \ + --num-hidden-layers $dnn_num_hidden_layers \ + --pnorm-input-dim $dnn_input_dim \ + --pnorm-output-dim $dnn_output_dim \ + --egs-opts "--feat-type raw" --lda-opts "--feat-type raw --lda-dim $dnn_output_dim" --splice-width 5 \ + "${dnn_gpu_parallel_opts[@]}" --cmd "$train_cmd" \ + $data_bnf_dir/train data/lang $exp_dir/tri6_ali_50 $exp_dir/tri7_nnet || exit 1 + + touch $exp_dir/tri7_nnet/.done +fi + + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo "To decode a data-set, use run-4b-anydecode-bnf.sh" +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-3b-bnf-sgmm.sh b/egs/babel/s5d/run-3b-bnf-sgmm.sh new file mode 100755 index 00000000000..341ea83565f --- /dev/null +++ b/egs/babel/s5d/run-3b-bnf-sgmm.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +# Copyright 2014 Pegah Ghahremani +# 2014 Johns Hopkins (Yenda Trmal) + +# Apache 2.0 + +# This script builds the SGMM system on top of the kaldi internal bottleneck features. +# It comes after run-6-bnf-semisupervised.sh. + + +. conf/common_vars.sh +. ./lang.conf +[ -f local.conf ] && . ./local.conf + +set -e +set -o pipefail +set -u +semisupervised=true +unsup_string= + +. ./utils/parse_options.sh + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + +if [ -z "$unsup_string" ]; then + if $semisupervised ; then + unsup_string="_semisup" + else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + fi +fi +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} +param_bnf_dir=param_bnf${unsup_string} + +echo --------------------------------------------------------------------- +echo "Starting $exp_dir/ubm7 on" `date` +echo --------------------------------------------------------------------- +if [ ! $exp_dir/ubm7/.done -nt $exp_dir/tri6/.done ]; then + steps/train_ubm.sh --cmd "$train_cmd" \ + $bnf_num_gauss_ubm $data_bnf_dir/train data/lang $exp_dir/tri6 $exp_dir/ubm7 + touch $exp_dir/ubm7/.done +fi + +if [ ! $exp_dir/sgmm7/.done -nt $exp_dir/ubm7/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting $exp_dir/sgmm7 on" `date` + echo --------------------------------------------------------------------- + #steps/train_sgmm2_group.sh \ + steps/train_sgmm2.sh \ + --cmd "$train_cmd" "${sgmm_train_extra_opts[@]}"\ + $numLeavesSGMM $bnf_num_gauss_sgmm $data_bnf_dir/train data/lang \ + $exp_dir/tri6 $exp_dir/ubm7/final.ubm $exp_dir/sgmm7 + touch $exp_dir/sgmm7/.done +fi + +if [ ! $exp_dir/sgmm7_ali/.done -nt $exp_dir/sgmm7/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting $exp_dir/sgmm7_ali on" `date` + echo --------------------------------------------------------------------- + steps/align_sgmm2.sh \ + --nj $train_nj --cmd "$train_cmd" --transform-dir $exp_dir/tri6 --use-graphs true \ + $data_bnf_dir/train data/lang $exp_dir/sgmm7 $exp_dir/sgmm7_ali + touch $exp_dir/sgmm7_ali/.done +fi + +if [ ! $exp_dir/sgmm7_denlats/.done -nt $exp_dir/sgmm7/.done ]; then + echo --------------------------------------------------------------------- + echo "Starting $exp_dir/sgmm5_denlats on" `date` + echo --------------------------------------------------------------------- + steps/make_denlats_sgmm2.sh --cmd "$train_cmd" \ + --nj $train_nj --sub-split $train_nj "${sgmm_denlats_extra_opts[@]}" \ + --transform-dir $exp_dir/tri6 --beam 10.0 --acwt 0.06 --lattice-beam 6 \ + $data_bnf_dir/train data/lang $exp_dir/sgmm7_ali $exp_dir/sgmm7_denlats + touch $exp_dir/sgmm7_denlats/.done +fi + +if [ ! $exp_dir/sgmm7_mmi_b0.1/.done -nt $exp_dir/sgmm7_denlats/.done ]; then + steps/train_mmi_sgmm2.sh \ + --cmd "$train_cmd" --acwt 0.06 \ + --transform-dir $exp_dir/tri6 --boost 0.1 --drop-frames true \ + $data_bnf_dir/train data/lang $exp_dir/sgmm7_ali $exp_dir/sgmm7_denlats \ + $exp_dir/sgmm7_mmi_b0.1 + touch $exp_dir/sgmm7_mmi_b0.1/.done; +fi + + +echo --------------------------------------------------------------------- +echo "Finished successfully on" `date` +echo "To decode a data-set, use run-4b-anydecode-bnf.sh" +echo --------------------------------------------------------------------- + +exit 0 diff --git a/egs/babel/s5d/run-4-anydecode.sh b/egs/babel/s5d/run-4-anydecode.sh new file mode 100755 index 00000000000..083ac7e9879 --- /dev/null +++ b/egs/babel/s5d/run-4-anydecode.sh @@ -0,0 +1,724 @@ +#!/bin/bash +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.pem +kind= +data_only=false +fast_path=true +skip_kws=false +skip_stt=false +skip_scoring= +extra_kws=true +vocab_kws=false +tri5_only=false +wip=0.5 + +nnet3_model=nnet3/tdnn_sp +chain_model= +parent_dir_suffix=_cleaned +is_rnn=false +extra_left_context=40 +extra_right_context=40 +frames_per_chunk=20 + +echo "run-4-test.sh $@" + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h|dev2h|eval|shadow)" + exit 1 +fi + +echo "Dir: $dir" + +#This seems to be the only functioning way how to ensure the comple +#set of scripts will exit when sourcing several of them together +#Otherwise, the CTRL-C just terminates the deepest sourced script ? +# Let shell functions inherit ERR trap. Same as `set -E'. +set -o errtrace +trap "echo Exited!; exit;" SIGINT SIGTERM + +./local/check_tools.sh || exit 1 + +# Set proxy search parameters for the extended lexicon case. +if [ -f data/.extlex ]; then + proxy_phone_beam=$extlex_proxy_phone_beam + proxy_phone_nbest=$extlex_proxy_phone_nbest + proxy_beam=$extlex_proxy_beam + proxy_nbest=$extlex_proxy_nbest +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ]; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ -z "${skip_scoring}" ] ; then + if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true + else + skip_scoring=false + fi +fi + +#The $dataset_type value will be the dataset name without any extrension +eval my_data_dir=( "\${${dataset_type}_data_dir[@]}" ) +eval my_data_list=( "\${${dataset_type}_data_list[@]}" ) +if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then + echo "Error: The dir you specified ($dataset_id) does not have existing config"; + exit 1 +fi + +eval my_stm_file=\$${dataset_type}_stm_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_rttm_file=\$${dataset_type}_rttm_file +eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate + +if [ -z "$my_nj" ]; then + echo >&2 "You didn't specify the number of jobs -- variable \"${dataset_type}_nj\" not defined." + exit 1 +fi + +my_subset_ecf=false +eval ind=\${${dataset_type}_subset_ecf+x} +if [ "$ind" == "x" ] ; then + eval my_subset_ecf=\$${dataset_type}_subset_ecf +fi + +declare -A my_kwlists=() +eval my_kwlists_keys="\${!${dataset_type}_kwlists[@]}" +for key in $my_kwlists_keys # make sure you include the quotes there +do + eval my_kwlists_val="\${${dataset_type}_kwlists[$key]}" + #index=`echo $my_kwlists_val | sed 's/.*\.\([^.][^.]*\)\.xml/\1/g'` + index=$key + + my_kwlists["$index"]="${my_kwlists_val}" +done +declare -p my_kwlists +export my_kwlists + +#Just a minor safety precaution to prevent using incorrect settings +#The dataset_* variables should be used. +set -e +set -o pipefail +set -u +unset dir +unset kind + +function make_plp { + target=$1 + logdir=$2 + output=$3 + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + else + steps/make_plp.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + fi + utils/fix_data_dir.sh $target + steps/compute_cmvn_stats.sh $target $logdir $output + utils/fix_data_dir.sh $target +} + +function check_variables_are_set { + for variable in $mandatory_variables ; do + if ! declare -p $variable ; then + echo "Mandatory variable ${variable/my/$dataset_type} is not set! " + echo "You should probably set the variable in the config file " + exit 1 + else + declare -p $variable + fi + done + + if [ ! -z ${optional_variables+x} ] ; then + for variable in $optional_variables ; do + eval my_variable=\$${variable} + echo "$variable=$my_variable" + done + fi +} + +if [ ! -f data/raw_${dataset_type}_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the ${dataset_type} set" + echo --------------------------------------------------------------------- + + l1=${#my_data_dir[*]} + l2=${#my_data_list[*]} + if [ "$l1" -ne "$l2" ]; then + echo "Error, the number of source files lists is not the same as the number of source dirs!" + exit 1 + fi + + resource_string="" + if [ "$dataset_kind" == "unsupervised" ]; then + resource_string+=" --ignore-missing-txt true" + fi + + for i in `seq 0 $(($l1 - 1))`; do + resource_string+=" ${my_data_dir[$i]} " + resource_string+=" ${my_data_list[$i]} " + done + local/make_corpus_subset.sh $resource_string ./data/raw_${dataset_type}_data + touch data/raw_${dataset_type}_data/.done +fi +my_data_dir=`readlink -f ./data/raw_${dataset_type}_data` +[ -f $my_data_dir/filelist.list ] && my_data_list=$my_data_dir/filelist.list +nj_max=`cat $my_data_list | wc -l` || nj_max=`ls $my_data_dir/audio | wc -l` + +if [ "$nj_max" -lt "$my_nj" ] ; then + echo "Number of jobs ($my_nj) is too big!" + echo "The maximum reasonable number of jobs is $nj_max" + my_nj=$nj_max +fi + +##################################################################### +# +# Audio data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing ${dataset_kind} data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +if [ ! -f $dataset_dir/.done ] ; then + if [ "$dataset_kind" == "supervised" ]; then + if [ "$dataset_segments" == "seg" ]; then + . ./local/datasets/supervised_seg.sh || exit 1 + elif [ "$dataset_segments" == "uem" ]; then + . ./local/datasets/supervised_uem.sh || exit 1 + elif [ "$dataset_segments" == "pem" ]; then + . ./local/datasets/supervised_pem.sh || exit 1 + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + elif [ "$dataset_kind" == "unsupervised" ] ; then + if [ "$dataset_segments" == "seg" ] ; then + . ./local/datasets/unsupervised_seg.sh + elif [ "$dataset_segments" == "uem" ] ; then + . ./local/datasets/unsupervised_uem.sh + elif [ "$dataset_segments" == "pem" ] ; then + ##This combination does not really makes sense, + ##Because the PEM is that we get the segmentation + ##and because of the format of the segment files + ##the transcript as well + echo "ERROR: $dataset_segments combined with $dataset_type" + echo "does not really make any sense!" + exit 1 + #. ./local/datasets/unsupervised_pem.sh + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + else + echo "Unknown kind of the dataset: \"$dataset_kind\"!"; + echo "Valid dataset kinds are: supervised, unsupervised, shadow"; + exit 1 + fi + + if [ ! -f ${dataset_dir}/.plp.done ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} parametrization files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp + touch ${dataset_dir}/.plp.done + fi + touch $dataset_dir/.done +fi + +if [ ! -f ${dataset_dir}_hires/.mfcc.done ]; then + dataset=$(basename $dataset_dir) + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} MFCC features in ${dataset_dir}_hires and corresponding iVectors in exp/nnet3/ivectors_$dataset on" `date` + echo --------------------------------------------------------------------- + if [ ! -d ${dataset_dir}_hires ]; then + utils/copy_data_dir.sh data/$dataset data/${dataset}_hires + fi + + mfccdir=mfcc_hires + steps/make_mfcc.sh --nj $my_nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" ${dataset_dir}_hires exp/make_hires/$dataset $mfccdir; + steps/compute_cmvn_stats.sh data/${dataset}_hires exp/make_hires/${dataset} $mfccdir; + utils/fix_data_dir.sh ${dataset_dir}_hires; + touch ${dataset_dir}_hires/.mfcc.done + + touch ${dataset_dir}_hires/.done +fi + +if [ -f exp/nnet3/extractor/final.ie ] && \ + [ ! -f exp/nnet3/ivectors_$(basename $dataset_dir)/.done ] ; then + dataset=$(basename $dataset_dir) + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $my_nj \ + ${dataset_dir}_hires exp/nnet3/extractor exp/nnet3/ivectors_$dataset || exit 1; + + touch exp/nnet3/ivectors_$dataset/.done +fi + +##################################################################### +# +# KWS data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing kws data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +lang=data/lang +if ! $skip_kws ; then + if $extra_kws ; then + L1_lex=data/local/lexiconp.txt + . ./local/datasets/extra_kws.sh || exit 1 + fi + if $vocab_kws ; then + . ./local/datasets/vocab_kws.sh || exit 1 + fi + if [ ! -f data/lang.phn/G.fst ] ; then + ./local/syllab/run_phones.sh --stage -2 ${dataset_dir} + else + ./local/syllab/run_phones.sh ${dataset_dir} + fi + + if [ ! -f data/lang.syll/G.fst ] ; then + ./local/syllab/run_syllabs.sh --stage -2 ${dataset_dir} + else + ./local/syllab/run_syllabs.sh ${dataset_dir} + fi + + ./local/search/run_search.sh --dir ${dataset_dir##*/} + ./local/search/run_phn_search.sh --dir ${dataset_dir##*/} + ./local/search/run_syll_search.sh --dir ${dataset_dir##*/} +fi + +if $data_only ; then + echo "Exiting, as data-only was requested..." + exit 0; +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +if [ ! -f data/langp_test/.done ]; then + cp -R data/langp/tri5_ali/ data/langp_test + cp data/lang/G.fst data/langp_test + touch data/langp_test/.done +fi + +if [ ! -L ./data/langp_test.syll ]; then + ln -s lang.syll data/langp_test.syll +fi +if [ ! -L ./data/langp_test.phn ]; then + ln -s lang.phn data/langp_test.phn +fi + + +decode=exp/tri5/decode_${dataset_id} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning decoding with SAT models on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test exp/tri5 exp/tri5/graph |tee exp/tri5/mkgraph.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4\ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + exp/tri5/graph ${dataset_dir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test ${decode} + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test ${decode}.si +fi + +if $tri5_only; then + echo "--tri5-only is true. So exiting." + exit 0 +fi + +#################################################################### +## SGMM2 decoding +## We Include the SGMM_MMI inside this, as we might only have the DNN systems +## trained and not PLP system. The DNN systems build only on the top of tri5 stage +#################################################################### +if [ -f exp/sgmm5/.done ]; then + decode=exp/sgmm5/decode_fmllr_${dataset_id} + if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test exp/sgmm5 exp/sgmm5/graph |tee exp/sgmm5/mkgraph.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --cmd "$decode_cmd" --transform-dir exp/tri5/decode_${dataset_id} "${decode_extra_opts[@]}"\ + exp/sgmm5/graph ${dataset_dir} $decode |tee $decode/decode.log + touch $decode/.done + + if ! $fast_path ; then + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test exp/sgmm5/decode_fmllr_${dataset_id} + fi + fi + + #################################################################### + ## + ## SGMM_MMI rescoring + ## + #################################################################### + + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ -x exp/sgmm5_mmi_b0.1 ] && [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dataset_id} \ + data/langp_test ${dataset_dir} exp/sgmm5/decode_fmllr_${dataset_id} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi + done + + #We are done -- all lattices has been generated. We have to + #a)Run MBR decoding + #b)Run KW search + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ -f $decode/.done ]; then + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + fi + done +fi + + + +#################################################################### +## +## DNN ("compatibility") decoding -- also, just decode the "default" net +## +#################################################################### +if [ -f exp/tri6_nnet/.done ]; then + decode=exp/tri6_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + +#################################################################### +## +## nnet3 model decoding +## +#################################################################### +if [ -f exp/nnet3/lstm_bidirectional_sp/.done ]; then + decode=exp/nnet3/lstm_bidirectional_sp/decode_${dataset_id} + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + +if [ -f exp/nnet3/lstm_realigned_bidirectional_sp//.done ]; then + decode=exp/nnet3/lstm_realigned_bidirectional_sp//decode_${dataset_id} + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi +if [ -f exp/nnet3/lstm_sp/.done ]; then + decode=exp/nnet3/lstm_sp/decode_${dataset_id} + rnn_opts=" --extra-left-context 40 --extra-right-context 0 --frames-per-chunk 20 " + decode_script=steps/nnet3/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + +if [ -f exp/$nnet3_model/.done ]; then + decode=exp/$nnet3_model/decode_${dataset_id} + rnn_opts= + decode_script=steps/nnet3/decode.sh + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + +#################################################################### +## +## chain model decoding +## +#################################################################### +if [ -f exp/$chain_model/final.mdl ]; then + dir=exp/$chain_model + + decode=$dir/decode_${dataset_id} + decode_script=steps/nnet3/decode.sh + + if [ ! -f exp/nnet3$parent_dir_suffix/ivectors_${dataset_id}/.done ] ; then + steps/online/nnet2/extract_ivectors_online.sh --cmd "$decode_cmd" --nj $my_nj \ + ${dataset_dir}_hires exp/nnet3$parent_dir_suffix/extractor exp/nnet3$parent_dir_suffix/ivectors_${dataset_id}/ || exit 1; + touch exp/nnet3$parent_dir_suffix/ivectors_${dataset_id}/.done + fi + + rnn_opts= + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + echo "Modifying the number of jobs as this is an RNN and decoding can be extremely slow." + my_nj=`cat ${dataset_dir}_hires/spk2utt|wc -l` + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + echo "Modifying the number of jobs as this is an RNN and decoding can be extremely slow." + my_nj=`cat ${dataset_dir}_hires/spk2utt|wc -l` + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3$parent_dir_suffix/ivectors_${dataset_id} \ + $dir/graph ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_chain_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +else + echo "no chain model exp/$chain_model" +fi + +#################################################################### +## +## DNN (nextgen DNN) decoding +## +#################################################################### +if [ -f exp/tri6a_nnet/.done ]; then + decode=exp/tri6a_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi + + +#################################################################### +## +## DNN (ensemble) decoding +## +#################################################################### +if [ -f exp/tri6b_nnet/.done ]; then + decode=exp/tri6b_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode +fi +#################################################################### +## +## DNN_MPE decoding +## +#################################################################### +if [ -f exp/tri6_nnet_mpe/.done ]; then + for epoch in 1 2 3 4; do + decode=exp/tri6_nnet_mpe/decode_${dataset_id}_epoch$epoch + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --minimize $minimize \ + --cmd "$decode_cmd" --nj $my_nj --iter epoch$epoch \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + done +fi + +#################################################################### +## +## DNN semi-supervised training decoding +## +#################################################################### +for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ + tri6_nnet_supervised_tuning tri6_nnet_supervised_tuning2 ; do + if [ -f exp/$dnn/.done ]; then + decode=exp/$dnn/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + fi +done +echo "Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-4-phn-anydecode.sh b/egs/babel/s5d/run-4-phn-anydecode.sh new file mode 100755 index 00000000000..054a4665529 --- /dev/null +++ b/egs/babel/s5d/run-4-phn-anydecode.sh @@ -0,0 +1,613 @@ +#!/bin/bash +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.phn.pem +kind= +data_only=false +fast_path=true +skip_kws=false +skip_stt=false +skip_scoring= +extra_kws=true +vocab_kws=false +tri5_only=false +wip=0.5 + +nnet3_model=nnet3/tdnn_sp +is_rnn=false +extra_left_context=0 +extra_right_context=0 +frames_per_chunk=0 + +echo $0 "$@" + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h.phn|dev2h.phn|eval.phn|shadow.phn)" + exit 1 +fi + +#This seems to be the only functioning way how to ensure the comple +#set of scripts will exit when sourcing several of them together +#Otherwise, the CTRL-C just terminates the deepest sourced script ? +# Let shell functions inherit ERR trap. Same as `set -E'. +set -o errtrace +trap "echo Exited!; exit;" SIGINT SIGTERM + +./local/check_tools.sh || exit 1 + +# Set proxy search parameters for the extended lexicon case. +if [ -f data/.extlex ]; then + proxy_phone_beam=$extlex_proxy_phone_beam + proxy_phone_nbest=$extlex_proxy_phone_nbest + proxy_beam=$extlex_proxy_beam + proxy_nbest=$extlex_proxy_nbest +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.phn.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ]; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ -z "${skip_scoring}" ] ; then + if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true + else + skip_scoring=false + fi +fi + +#The $dataset_type value will be the dataset name without any extrension +eval my_data_dir=( "\${${dataset_type}_data_dir[@]}" ) +eval my_data_list=( "\${${dataset_type}_data_list[@]}" ) +if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then + echo "Error: The dir you specified ($dataset_id) does not have existing config"; + exit 1 +fi + +eval my_stm_file=\$${dataset_type}_stm_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_rttm_file=\$${dataset_type}_rttm_file +eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate + +if [ -z "$my_nj" ]; then + echo >&2 "You didn't specify the number of jobs -- variable \"${dataset_type}_nj\" not defined." + exit 1 +fi +my_nj=$(($my_nj * 2)) + +my_subset_ecf=false +eval ind=\${${dataset_type}_subset_ecf+x} +if [ "$ind" == "x" ] ; then + eval my_subset_ecf=\$${dataset_type}_subset_ecf +fi + +declare -A my_kwlists=() +eval my_kwlists_keys="\${!${dataset_type}_kwlists[@]}" +for key in $my_kwlists_keys # make sure you include the quotes there +do + eval my_kwlists_val="\${${dataset_type}_kwlists[$key]}" + my_kwlists["$key"]="${my_kwlists_val}" +done +declare -p my_kwlists +export my_kwlists + +#Just a minor safety precaution to prevent using incorrect settings +#The dataset_* variables should be used. +set -e +set -o pipefail +set -u +unset dir +unset kind + +function make_plp { + target=$1 + logdir=$2 + output=$3 + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + else + steps/make_plp.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + fi + utils/fix_data_dir.sh $target + steps/compute_cmvn_stats.sh $target $logdir $output + utils/fix_data_dir.sh $target +} + +function check_variables_are_set { + for variable in $mandatory_variables ; do + if ! declare -p $variable ; then + echo "Mandatory variable ${variable/my/$dataset_type} is not set! " + echo "You should probably set the variable in the config file " + exit 1 + else + declare -p $variable + fi + done + + if [ ! -z ${optional_variables+x} ] ; then + for variable in $optional_variables ; do + eval my_variable=\$${variable} + echo "$variable=$my_variable" + done + fi +} + +if [ ! -f data/raw_${dataset_type}_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the ${dataset_type} set" + echo --------------------------------------------------------------------- + + l1=${#my_data_dir[*]} + l2=${#my_data_list[*]} + if [ "$l1" -ne "$l2" ]; then + echo "Error, the number of source files lists is not the same as the number of source dirs!" + exit 1 + fi + + resource_string="" + if [ "$dataset_kind" == "unsupervised" ]; then + resource_string+=" --ignore-missing-txt true" + fi + + for i in `seq 0 $(($l1 - 1))`; do + resource_string+=" ${my_data_dir[$i]} " + resource_string+=" ${my_data_list[$i]} " + done + local/make_corpus_subset.sh $resource_string ./data/raw_${dataset_type}_data + touch data/raw_${dataset_type}_data/.done +fi +my_data_dir=`readlink -f ./data/raw_${dataset_type}_data` +[ -f $my_data_dir/filelist.list ] && my_data_list=$my_data_dir/filelist.list +nj_max=`cat $my_data_list | wc -l` || nj_max=`ls $my_data_dir/audio | wc -l` + +if [ "$nj_max" -lt "$my_nj" ] ; then + echo "Number of jobs ($my_nj) is too big!" + echo "The maximum reasonable number of jobs is $nj_max" + my_nj=$nj_max +fi + +##################################################################### +# +# Audio data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing ${dataset_kind} data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +if [ ! -f $dataset_dir/.done ] ; then + if [ "$dataset_kind" == "supervised" ]; then + if [ "$dataset_segments" == "seg" ]; then + . ./local/datasets/supervised_seg.sh || exit 1 + elif [ "$dataset_segments" == "uem" ]; then + . ./local/datasets/supervised_uem.sh || exit 1 + elif [ "$dataset_segments" == "pem" ]; then + . ./local/datasets/supervised_pem.sh || exit 1 + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + elif [ "$dataset_kind" == "unsupervised" ] ; then + if [ "$dataset_segments" == "seg" ] ; then + . ./local/datasets/unsupervised_seg.sh + elif [ "$dataset_segments" == "uem" ] ; then + . ./local/datasets/unsupervised_uem.sh + elif [ "$dataset_segments" == "pem" ] ; then + ##This combination does not really makes sense, + ##Because the PEM is that we get the segmentation + ##and because of the format of the segment files + ##the transcript as well + echo "ERROR: $dataset_segments combined with $dataset_type" + echo "does not really make any sense!" + exit 1 + #. ./local/datasets/unsupervised_pem.sh + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + else + echo "Unknown kind of the dataset: \"$dataset_kind\"!"; + echo "Valid dataset kinds are: supervised, unsupervised, shadow"; + exit 1 + fi + + if [ ! -f ${dataset_dir}/.plp.done ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} parametrization files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp + touch ${dataset_dir}/.plp.done + fi + touch $dataset_dir/.done +fi + +if [ -f exp/nnet3/extractor/final.ie ] && [ ! -f ${dataset_dir}_hires/.mfcc.done ]; then + dataset=$(basename $dataset_dir) + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} MFCC features in ${dataset_dir}_hires and corresponding iVectors in exp/nnet3/ivectors_$dataset on" `date` + echo --------------------------------------------------------------------- + if [ ! -d ${dataset_dir}_hires ]; then + utils/copy_data_dir.sh data/${dataset_type}.${dataset_segments}_hires data/${dataset}_hires + fi + ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true + touch ${dataset_dir}_hires/.done +fi +set -x +ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true +set +x + +##################################################################### +# +# KWS data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing kws data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +lang=data/lang.phn +if ! $skip_kws ; then + if $extra_kws ; then + L1_lex=data/local/dict.phn/lexiconp.txt + . ./local/datasets/extra_kws.sh || exit 1 + fi + if $vocab_kws ; then + . ./local/datasets/vocab_kws.sh || exit 1 + fi +fi + +if $data_only ; then + echo "Exiting, as data-only was requested..." + exit 0; +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +if [ ! -f data/langp_test.phn//.done ]; then + ln -sf lang.phn data/langp_test.phn || true + touch data/langp_test.phn/.done +fi + +decode=exp/tri5/decode_${dataset_id} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning decoding with SAT models on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.phn exp/tri5 exp/tri5/graph.phn |tee exp/tri5/mkgraph.phn.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4\ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + exp/tri5/graph.phn ${dataset_dir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn ${decode} + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn ${decode}.si +fi + +if $tri5_only; then + echo "--tri5-only is true. So exiting." + exit 0 +fi + +#################################################################### +## SGMM2 decoding +## We Include the SGMM_MMI inside this, as we might only have the DNN systems +## trained and not PLP system. The DNN systems build only on the top of tri5 stage +#################################################################### +if [ -f exp/sgmm5/.done ]; then + decode=exp/sgmm5/decode_fmllr_${dataset_id} + if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.phn exp/sgmm5 exp/sgmm5/graph.phn |tee exp/sgmm5/mkgraph.phn.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --cmd "$decode_cmd" --transform-dir exp/tri5/decode_${dataset_id} "${decode_extra_opts[@]}"\ + exp/sgmm5/graph.phn ${dataset_dir} $decode |tee $decode/decode.log + touch $decode/.done + + if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn exp/sgmm5/decode_fmllr_${dataset_id} + fi + fi + + #################################################################### + ## + ## SGMM_MMI rescoring + ## + #################################################################### + + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dataset_id} \ + data/langp_test.phn ${dataset_dir} exp/sgmm5/decode_fmllr_${dataset_id} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi + done + + #We are done -- all lattices has been generated. We have to + #a)Run MBR decoding + #b)Run KW search + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode + done +fi + + + +#################################################################### +## +## DNN ("compatibility") decoding -- also, just decode the "default" net +## +#################################################################### +if [ -f exp/tri6_nnet/.done ]; then + decode=exp/tri6_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + +#################################################################### +## +## nnet3 model decoding +## +#################################################################### +if [ -f exp/nnet3/lstm_bidirectional_sp/.done ]; then + decode=exp/nnet3/lstm_bidirectional_sp/decode_${dataset_id}.phn + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + +if [ -f exp/nnet3/lstm_sp/.done ]; then + decode=exp/nnet3/lstm_sp/decode_${dataset_id}.phn + rnn_opts=" --extra-left-context 40 --extra-right-context 0 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + +if [ -f exp/$nnet3_model/.done ]; then + decode=exp/$nnet3_model/decode_${dataset_id}.phn + rnn_opts= + decode_script=steps/nnet3/decode.sh + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + decode_script=steps/nnet3/lstm/decode.sh + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + + +#################################################################### +## +## DNN (nextgen DNN) decoding +## +#################################################################### +if [ -f exp/tri6a_nnet/.done ]; then + decode=exp/tri6a_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi + + +#################################################################### +## +## DNN (ensemble) decoding +## +#################################################################### +if [ -f exp/tri6b_nnet/.done ]; then + decode=exp/tri6b_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode +fi +#################################################################### +## +## DNN_MPE decoding +## +#################################################################### +if [ -f exp/tri6_nnet_mpe/.done ]; then + for epoch in 1 2 3 4; do + decode=exp/tri6_nnet_mpe/decode_${dataset_id}_epoch$epoch + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --minimize $minimize \ + --cmd "$decode_cmd" --nj $my_nj --iter epoch$epoch \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode + done +fi + +#################################################################### +## +## DNN semi-supervised training decoding +## +#################################################################### +for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ + tri6_nnet_supervised_tuning tri6_nnet_supervised_tuning2 ; do + if [ -f exp/$dnn/.done ]; then + decode=exp/$dnn/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.phn ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.phn $decode + fi +done +echo "Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-4-syll-anydecode.sh b/egs/babel/s5d/run-4-syll-anydecode.sh new file mode 100755 index 00000000000..e69b168cf49 --- /dev/null +++ b/egs/babel/s5d/run-4-syll-anydecode.sh @@ -0,0 +1,613 @@ +#!/bin/bash +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.syll.pem +kind= +data_only=false +fast_path=true +skip_kws=false +skip_stt=false +skip_scoring= +extra_kws=true +vocab_kws=false +tri5_only=false +wip=0.5 + +nnet3_model=nnet3/tdnn_sp +is_rnn=false +extra_left_context=0 +extra_right_context=0 +frames_per_chunk=0 + +echo $0 "$@" + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h.syll|dev2h.syll|eval.syll|shadow.syll)" + exit 1 +fi + +#This seems to be the only functioning way how to ensure the comple +#set of scripts will exit when sourcing several of them together +#Otherwise, the CTRL-C just terminates the deepest sourced script ? +# Let shell functions inherit ERR trap. Same as `set -E'. +set -o errtrace +trap "echo Exited!; exit;" SIGINT SIGTERM + +./local/check_tools.sh || exit 1 + +# Set proxy search parameters for the extended lexicon case. +if [ -f data/.extlex ]; then + proxy_phone_beam=$extlex_proxy_phone_beam + proxy_phone_nbest=$extlex_proxy_phone_nbest + proxy_beam=$extlex_proxy_beam + proxy_nbest=$extlex_proxy_nbest +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.syll.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ]; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ -z "${skip_scoring}" ] ; then + if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true + else + skip_scoring=false + fi +fi + +#The $dataset_type value will be the dataset name without any extrension +eval my_data_dir=( "\${${dataset_type}_data_dir[@]}" ) +eval my_data_list=( "\${${dataset_type}_data_list[@]}" ) +if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then + echo "Error: The dir you specified ($dataset_id) does not have existing config"; + exit 1 +fi + +eval my_stm_file=\$${dataset_type}_stm_file +eval my_ecf_file=\$${dataset_type}_ecf_file +eval my_rttm_file=\$${dataset_type}_rttm_file +eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate + +if [ -z "$my_nj" ]; then + echo >&2 "You didn't specify the number of jobs -- variable \"${dataset_type}_nj\" not defined." + exit 1 +fi +my_nj=$(($my_nj * 2)) + +my_subset_ecf=false +eval ind=\${${dataset_type}_subset_ecf+x} +if [ "$ind" == "x" ] ; then + eval my_subset_ecf=\$${dataset_type}_subset_ecf +fi + +declare -A my_kwlists=() +eval my_kwlists_keys="\${!${dataset_type}_kwlists[@]}" +for key in $my_kwlists_keys # make sure you include the quotes there +do + eval my_kwlists_val="\${${dataset_type}_kwlists[$key]}" + my_kwlists["$key"]="${my_kwlists_val}" +done +declare -p my_kwlists +export my_kwlists + +#Just a minor safety precaution to prevent using incorrect settings +#The dataset_* variables should be used. +set -e +set -o pipefail +set -u +unset dir +unset kind + +function make_plp { + target=$1 + logdir=$2 + output=$3 + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + else + steps/make_plp.sh --cmd "$decode_cmd" --nj $my_nj $target $logdir $output + fi + utils/fix_data_dir.sh $target + steps/compute_cmvn_stats.sh $target $logdir $output + utils/fix_data_dir.sh $target +} + +function check_variables_are_set { + for variable in $mandatory_variables ; do + if ! declare -p $variable ; then + echo "Mandatory variable ${variable/my/$dataset_type} is not set! " + echo "You should probably set the variable in the config file " + exit 1 + else + declare -p $variable + fi + done + + if [ ! -z ${optional_variables+x} ] ; then + for variable in $optional_variables ; do + eval my_variable=\$${variable} + echo "$variable=$my_variable" + done + fi +} + +if [ ! -f data/raw_${dataset_type}_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the ${dataset_type} set" + echo --------------------------------------------------------------------- + + l1=${#my_data_dir[*]} + l2=${#my_data_list[*]} + if [ "$l1" -ne "$l2" ]; then + echo "Error, the number of source files lists is not the same as the number of source dirs!" + exit 1 + fi + + resource_string="" + if [ "$dataset_kind" == "unsupervised" ]; then + resource_string+=" --ignore-missing-txt true" + fi + + for i in `seq 0 $(($l1 - 1))`; do + resource_string+=" ${my_data_dir[$i]} " + resource_string+=" ${my_data_list[$i]} " + done + local/make_corpus_subset.sh $resource_string ./data/raw_${dataset_type}_data + touch data/raw_${dataset_type}_data/.done +fi +my_data_dir=`readlink -f ./data/raw_${dataset_type}_data` +[ -f $my_data_dir/filelist.list ] && my_data_list=$my_data_dir/filelist.list +nj_max=`cat $my_data_list | wc -l` || nj_max=`ls $my_data_dir/audio | wc -l` + +if [ "$nj_max" -lt "$my_nj" ] ; then + echo "Number of jobs ($my_nj) is too big!" + echo "The maximum reasonable number of jobs is $nj_max" + my_nj=$nj_max +fi + +##################################################################### +# +# Audio data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing ${dataset_kind} data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +if [ ! -f $dataset_dir/.done ] ; then + if [ "$dataset_kind" == "supervised" ]; then + if [ "$dataset_segments" == "seg" ]; then + . ./local/datasets/supervised_seg.sh || exit 1 + elif [ "$dataset_segments" == "uem" ]; then + . ./local/datasets/supervised_uem.sh || exit 1 + elif [ "$dataset_segments" == "pem" ]; then + . ./local/datasets/supervised_pem.sh || exit 1 + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + elif [ "$dataset_kind" == "unsupervised" ] ; then + if [ "$dataset_segments" == "seg" ] ; then + . ./local/datasets/unsupervised_seg.sh + elif [ "$dataset_segments" == "uem" ] ; then + . ./local/datasets/unsupervised_uem.sh + elif [ "$dataset_segments" == "pem" ] ; then + ##This combination does not really makes sense, + ##Because the PEM is that we get the segmentation + ##and because of the format of the segment files + ##the transcript as well + echo "ERROR: $dataset_segments combined with $dataset_type" + echo "does not really make any sense!" + exit 1 + #. ./local/datasets/unsupervised_pem.sh + else + echo "Unknown type of the dataset: \"$dataset_segments\"!"; + echo "Valid dataset types are: seg, uem, pem"; + exit 1 + fi + else + echo "Unknown kind of the dataset: \"$dataset_kind\"!"; + echo "Valid dataset kinds are: supervised, unsupervised, shadow"; + exit 1 + fi + + if [ ! -f ${dataset_dir}/.plp.done ]; then + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} parametrization files in ${dataset_dir} on" `date` + echo --------------------------------------------------------------------- + make_plp ${dataset_dir} exp/make_plp/${dataset_id} plp + touch ${dataset_dir}/.plp.done + fi + touch $dataset_dir/.done +fi + +if [ -f exp/nnet3/extractor/final.ie ] && [ ! -f ${dataset_dir}_hires/.mfcc.done ]; then + dataset=$(basename $dataset_dir) + echo --------------------------------------------------------------------- + echo "Preparing ${dataset_kind} MFCC features in ${dataset_dir}_hires and corresponding iVectors in exp/nnet3/ivectors_$dataset on" `date` + echo --------------------------------------------------------------------- + if [ ! -d ${dataset_dir}_hires ]; then + utils/copy_data_dir.sh data/${dataset_type}.${dataset_segments}_hires data/${dataset}_hires + fi + ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true + touch ${dataset_dir}_hires/.done +fi +set -x +ln -sf ivectors_${dataset_type}.${dataset_segments} exp/nnet3/ivectors_${dataset} || true +set +x + +##################################################################### +# +# KWS data directory preparation +# +##################################################################### +echo --------------------------------------------------------------------- +echo "Preparing kws data files in ${dataset_dir} on" `date` +echo --------------------------------------------------------------------- +lang=data/lang.syll +if ! $skip_kws ; then + if $extra_kws ; then + L1_lex=data/local/dict.syll/lexiconp.txt + . ./local/datasets/extra_kws.sh || exit 1 + fi + if $vocab_kws ; then + . ./local/datasets/vocab_kws.sh || exit 1 + fi +fi + +if $data_only ; then + echo "Exiting, as data-only was requested..." + exit 0; +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +if [ ! -f data/langp_test.syll//.done ]; then + ln -sf lang.syll data/langp_test.syll || true + touch data/langp_test.syll/.done +fi + +decode=exp/tri5/decode_${dataset_id} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning decoding with SAT models on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.syll exp/tri5 exp/tri5/graph.syll |tee exp/tri5/mkgraph.syll.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4\ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + exp/tri5/graph.syll ${dataset_dir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll ${decode} + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll ${decode}.si +fi + +if $tri5_only; then + echo "--tri5-only is true. So exiting." + exit 0 +fi + +#################################################################### +## SGMM2 decoding +## We Include the SGMM_MMI inside this, as we might only have the DNN systems +## trained and not PLP system. The DNN systems build only on the top of tri5 stage +#################################################################### +if [ -f exp/sgmm5/.done ]; then + decode=exp/sgmm5/decode_fmllr_${dataset_id} + if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test.syll exp/sgmm5 exp/sgmm5/graph.syll |tee exp/sgmm5/mkgraph.syll.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --cmd "$decode_cmd" --transform-dir exp/tri5/decode_${dataset_id} "${decode_extra_opts[@]}"\ + exp/sgmm5/graph.syll ${dataset_dir} $decode |tee $decode/decode.log + touch $decode/.done + + if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll exp/sgmm5/decode_fmllr_${dataset_id} + fi + fi + + #################################################################### + ## + ## SGMM_MMI rescoring + ## + #################################################################### + + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dataset_id} \ + data/langp_test.syll ${dataset_dir} exp/sgmm5/decode_fmllr_${dataset_id} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi + done + + #We are done -- all lattices has been generated. We have to + #a)Run MBR decoding + #b)Run KW search + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode + done +fi + + + +#################################################################### +## +## DNN ("compatibility") decoding -- also, just decode the "default" net +## +#################################################################### +if [ -f exp/tri6_nnet/.done ]; then + decode=exp/tri6_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + +#################################################################### +## +## nnet3 model decoding +## +#################################################################### +if [ -f exp/nnet3/lstm_bidirectional_sp/.done ]; then + decode=exp/nnet3/lstm_bidirectional_sp/decode_${dataset_id}.syll + rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + +if [ -f exp/nnet3/lstm_sp/.done ]; then + decode=exp/nnet3/lstm_sp/decode_${dataset_id}.syll + rnn_opts=" --extra-left-context 40 --extra-right-context 0 --frames-per-chunk 20 " + decode_script=steps/nnet3/lstm/decode.sh + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + +if [ -f exp/$nnet3_model/.done ]; then + decode=exp/$nnet3_model/decode_${dataset_id}.syll + rnn_opts= + decode_script=steps/nnet3/decode.sh + if [ "$is_rnn" == "true" ]; then + rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " + decode_script=steps/nnet3/lstm/decode.sh + fi + if [ ! -f $decode/.done ]; then + mkdir -p $decode + $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true \ + --online-ivector-dir exp/nnet3/ivectors_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir}_hires $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + + +#################################################################### +## +## DNN (nextgen DNN) decoding +## +#################################################################### +if [ -f exp/tri6a_nnet/.done ]; then + decode=exp/tri6a_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi + + +#################################################################### +## +## DNN (ensemble) decoding +## +#################################################################### +if [ -f exp/tri6b_nnet/.done ]; then + decode=exp/tri6b_nnet/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode +fi +#################################################################### +## +## DNN_MPE decoding +## +#################################################################### +if [ -f exp/tri6_nnet_mpe/.done ]; then + for epoch in 1 2 3 4; do + decode=exp/tri6_nnet_mpe/decode_${dataset_id}_epoch$epoch + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --minimize $minimize \ + --cmd "$decode_cmd" --nj $my_nj --iter epoch$epoch \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode + done +fi + +#################################################################### +## +## DNN semi-supervised training decoding +## +#################################################################### +for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ + tri6_nnet_supervised_tuning tri6_nnet_supervised_tuning2 ; do + if [ -f exp/$dnn/.done ]; then + decode=exp/$dnn/decode_${dataset_id} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh \ + --minimize $minimize --cmd "$decode_cmd" --nj $my_nj \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --transform-dir exp/tri5/decode_${dataset_id} \ + exp/tri5/graph.syll ${dataset_dir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_dnn_extra_opts[@]}" \ + ${dataset_dir} data/langp_test.syll $decode + fi +done +echo "Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-4b-anydecode-bnf.sh b/egs/babel/s5d/run-4b-anydecode-bnf.sh new file mode 100755 index 00000000000..8298021feb3 --- /dev/null +++ b/egs/babel/s5d/run-4b-anydecode-bnf.sh @@ -0,0 +1,259 @@ +#!/bin/bash +# Copyright 2014 Pegah Ghahremani +# Apache 2.0 + +# decode BNF + sgmm_mmi system +set -e +set -o pipefail + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + + +dir=dev10h.pem +kind= +data_only=false +fast_path=true +skip_kws=false +extra_kws=false +skip_stt=false +skip_scoring=false +tmpdir=`pwd` +semisupervised=true +unsup_string= + +. utils/parse_options.sh + +type=$dir + +if [ $# -ne 0 ]; then + echo "Usage: $(basename $0) --type (dev10h|dev2h|eval|shadow)" + echo "--semisupervised #set to false to skip unsupervised training." + exit 1 +fi + +if [ $babel_type == "full" ] && $semisupervised; then + echo "Error: Using unsupervised training for fullLP is meaningless, use semisupervised=false " + exit 1 +fi + +if [ -z "$unsup_string" ] ; then + if $semisupervised ; then + unsup_string="_semisup" + else + unsup_string="" #" ": supervised training, _semi_supervised: unsupervised BNF training + fi +fi + +if ! echo {dev10h,dev2h,eval,unsup,shadow}{,.pem,.uem,.seg} | grep -w "$type" >/dev/null; then + # note: echo dev10.uem | grep -w dev10h will produce a match, but this + # doesn't matter because dev10h is also a valid value. + echo "Invalid variable type=${type}, valid values are " {dev10h,dev2h,eval,unsup}{,.uem,.seg} + exit 1; +fi + +dataset_segments=${dir##*.} +dataset_dir=data/$dir +dataset_id=$dir +dataset_type=${dir%%.*} +#By default, we want the script to accept how the dataset should be handled, +#i.e. of what kind is the dataset +if [ -z ${kind} ] ; then + if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ] ; then + dataset_kind=supervised + else + dataset_kind=unsupervised + fi +else + dataset_kind=$kind +fi + +if [ -z $dataset_segments ]; then + echo "You have to specify the segmentation type as well" + echo "If you are trying to decode the PEM segmentation dir" + echo "such as data/dev10h, specify dev10h.pem" + echo "The valid segmentations types are:" + echo "\tpem #PEM segmentation" + echo "\tuem #UEM segmentation in the CMU database format" + echo "\tseg #UEM segmentation (kaldi-native)" +fi + +if [ "$dataset_kind" == "unsupervised" ]; then + skip_scoring=true +fi + +dirid=${type} +exp_dir=exp_bnf${unsup_string} +data_bnf_dir=data_bnf${unsup_string} +param_bnf_dir=param_bnf${unsup_string} +datadir=$data_bnf_dir/${dirid} + +[ ! -d data/${dirid} ] && echo "No such directory data/${dirid}" && exit 1; +[ ! -d exp/tri5/decode_${dirid} ] && echo "No such directory exp/tri5/decode_${dirid}" && exit 1; + +# Set my_nj; typically 64. +my_nj=`cat exp/tri5/decode_${dirid}/num_jobs` || exit 1; + + +if [ ! $data_bnf_dir/${dirid}_bnf/.done -nt exp/tri5/decode_${dirid}/.done ] || \ + [ ! $data_bnf_dir/${dirid}_bnf/.done -nt $exp_dir/tri6_bnf/.done ]; then + # put the archives in $param_bnf_dir/. + steps/nnet2/dump_bottleneck_features.sh --nj $my_nj --cmd "$train_cmd" \ + --transform-dir exp/tri5/decode_${dirid} data/${dirid} $data_bnf_dir/${dirid}_bnf $exp_dir/tri6_bnf $param_bnf_dir $exp_dir/dump_bnf + touch $data_bnf_dir/${dirid}_bnf/.done +fi + +if [ ! $data_bnf_dir/${dirid}/.done -nt $data_bnf_dir/${dirid}_bnf/.done ]; then + steps/nnet/make_fmllr_feats.sh --cmd "$train_cmd --max-jobs-run 10" \ + --nj $train_nj --transform-dir exp/tri5/decode_${dirid} $data_bnf_dir/${dirid}_sat data/${dirid} \ + exp/tri5_ali $exp_dir/make_fmllr_feats/log $param_bnf_dir/ + + steps/append_feats.sh --cmd "$train_cmd" --nj 4 \ + $data_bnf_dir/${dirid}_bnf $data_bnf_dir/${dirid}_sat $data_bnf_dir/${dirid} \ + $exp_dir/append_feats/log $param_bnf_dir/ + steps/compute_cmvn_stats.sh --fake $data_bnf_dir/${dirid} $exp_dir/make_fmllr_feats $param_bnf_dir + rm -r $data_bnf_dir/${dirid}_sat + if ! $skip_kws ; then + cp -r data/${dirid}/*kws* $data_bnf_dir/${dirid}/ || true + fi + touch $data_bnf_dir/${dirid}/.done +fi +if ! $skip_kws ; then + rm -rf $data_bnf_dir/${dirid}/*kws* + cp -r data/${dirid}/*kws* $data_bnf_dir/${dirid}/ || true +fi + + +if $data_only ; then + echo "Exiting, as data-only was requested... " +fi + +#################################################################### +## +## FMLLR decoding +## +#################################################################### +decode=$exp_dir/tri6/decode_${dirid} +if [ ! -f ${decode}/.done ]; then + echo --------------------------------------------------------------------- + echo "Decoding with SAT models on top of bottleneck features on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test $exp_dir/tri6 $exp_dir/tri6/graph |tee $exp_dir/tri6/mkgraph.log + + mkdir -p $decode + #By default, we do not care about the lattices for this step -- we just want the transforms + #Therefore, we will reduce the beam sizes, to reduce the decoding times + steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4 \ + --acwt $bnf_decode_acwt \ + --nj $my_nj --cmd "$decode_cmd" "${decode_extra_opts[@]}"\ + $exp_dir/tri6/graph ${datadir} ${decode} |tee ${decode}/decode.log + touch ${decode}/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip\ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test ${decode} + + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test ${decode}.si +fi + +#################################################################### +## SGMM2 decoding +#################################################################### +decode=$exp_dir/sgmm7/decode_fmllr_${dirid} +if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test $exp_dir/sgmm7 $exp_dir/sgmm7/graph |tee $exp_dir/sgmm7/mkgraph.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --acwt $bnf_decode_acwt \ + --cmd "$decode_cmd" --transform-dir $exp_dir/tri6/decode_${dirid} "${decode_extra_opts[@]}"\ + $exp_dir/sgmm7/graph ${datadir} $decode |tee $decode/decode.log + touch $decode/.done +fi + +if ! $fast_path ; then + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test $exp_dir/sgmm7/decode_fmllr_${dirid} +fi + +#################################################################### +## +## SGMM_MMI rescoring +## +#################################################################### + +for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=$exp_dir/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter + if [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir $exp_dir/tri6/decode_${dirid} \ + data/langp_test ${datadir} $exp_dir/sgmm7/decode_fmllr_${dirid} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi +done + +#We are done -- all lattices has been generated. We have to +#a)Run MBR decoding +#b)Run KW search +for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=$exp_dir/sgmm7_mmi_b0.1/decode_fmllr_${dirid}_it$iter + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test $decode +done + + +if [ -f $exp_dir/tri7_nnet/.done ] && + [[ ( ! $exp_dir/tri7_nnet/decode_${dirid}/.done -nt $datadir/.done) || \ + (! $exp_dir/tri7_nnet/decode_${dirid}/.done -nt $exp_dir/tri7_nnet/.done ) ]]; then + + echo --------------------------------------------------------------------- + echo "Decoding hybrid system on top of bottleneck features on" `date` + echo --------------------------------------------------------------------- + + # We use the graph from tri6. + utils/mkgraph.sh \ + data/langp_test $exp_dir/tri6 $exp_dir/tri6/graph |tee $exp_dir/tri6/mkgraph.log + + decode=$exp_dir/tri7_nnet/decode_${dirid} + if [ ! -f $decode/.done ]; then + mkdir -p $decode + steps/nnet2/decode.sh --cmd "$decode_cmd" --nj $my_nj \ + --acwt $bnf_decode_acwt \ + --beam $dnn_beam --lattice-beam $dnn_lat_beam \ + --skip-scoring true "${decode_extra_opts[@]}" \ + --feat-type raw \ + $exp_dir/tri6/graph ${datadir} $decode | tee $decode/decode.log + + touch $decode/.done + fi + + decode=$exp_dir/tri7_nnet/decode_${dirid} + local/run_kws_stt_task.sh --cer $cer --max-states $max_states --skip-scoring $skip_scoring\ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt --extra-kws $extra_kws --wip $wip \ + "${shadow_set_extra_opts[@]}" "${lmwt_bnf_extra_opts[@]}" \ + ${datadir} data/langp_test $decode + +fi + +echo "$0: Everything looking good...." +exit 0 diff --git a/egs/babel/s5d/run-6-combine.sh b/egs/babel/s5d/run-6-combine.sh new file mode 100755 index 00000000000..81dc42caca3 --- /dev/null +++ b/egs/babel/s5d/run-6-combine.sh @@ -0,0 +1,73 @@ +#!/bin/bash + + +. conf/common_vars.sh +. ./lang.conf +. ./cmd.sh + +set -e +set -o pipefail +set -u + +function best_system_path_kws { + path_to_outputs=$1 + + best_out=`(find $path_to_outputs -name "sum.txt" | xargs grep "^| *Occ") | cut -f 1,13,17 -d '|' | sed 's/|//g' | sort -r -n -k 3 | head -n 1| awk '{print $1}'` + echo `dirname $best_out` +} + +function best_system_path_stt { + path_to_outputs=$1 + best_out=` (find $path_to_outputs -name *.ctm.sys | xargs grep Avg) | sed 's/|//g' | column -t | sort -n -k 9 | head -n 1| awk '{print $1}' ` + echo `dirname $best_out` +} +# Wait till the main run.sh gets to the stage where's it's +# finished aligning the tri5 model. + +function lm_offsets { + min=999 + for dir in "$@" ; do + lmw=${dir##*score_} + + [ $lmw -le $min ] && min=$lmw + done + + lat_offset_str="" + for dir in "$@" ; do + latdir_dir=`dirname $dir` + lmw=${dir##*score_} + + offset=$(( $lmw - $min )) + if [ $offset -gt 0 ] ; then + lat_offset_str="$lat_offset_str ${latdir_dir}:$offset " + else + lat_offset_str="$lat_offset_str ${latdir_dir} " + fi + done + + echo $lat_offset_str + +} + +plp_kws=`best_system_path_kws "exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h_it*/kws_*"` +plp_stt=`best_system_path_stt "exp/sgmm5_mmi_b0.1/decode_fmllr_dev10h_it*"` + +dnn_kws=`best_system_path_kws "exp/tri6_nnet//decode_dev10h/kws_*"` +dnn_stt=`best_system_path_stt "exp/tri6_nnet/decode_dev10h/"` + +bnf_kws=`best_system_path_kws "exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h_it*/kws_*"` +bnf_stt=`best_system_path_stt "exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_dev10h_it*"` + + + +echo local/score_combine.sh --cmd "$decode_cmd" data/dev10h data/lang `lm_offsets $plp_stt $dnn_stt $bnf_stt` exp/combine/dev10h +#local/score_combine.sh --cmd "$decode_cmd" data/dev10h data/lang `lm_offsets $plp_stt $dnn_stt $bnf_stt` exp/combine/dev10h + +echo local/kws_combine.sh --cmd "$decode_cmd" data/dev10h data/lang $plp_kws $dnn_kws $bnf_kws +#local/kws_combine.sh --cmd "$decode_cmd" data/dev10h data/lang $plp_kws/kwslist.xml $dnn_kws/kwslist.xml $bnf_kws/kwslist.xml exp/combine/dev10h/ + +mkdir -p exp/combine/kws_rescore +#local/rescoring/rescore_repeats.sh --cmd "$decode_cmd" \ +# exp/combine/dev10h/ data/dev10h data/train/text exp/combine/kws_rescore + +exit 0 diff --git a/egs/babel/s5d/steps b/egs/babel/s5d/steps new file mode 120000 index 00000000000..6e99bf5b5ad --- /dev/null +++ b/egs/babel/s5d/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/babel/s5d/utils b/egs/babel/s5d/utils new file mode 120000 index 00000000000..b240885218f --- /dev/null +++ b/egs/babel/s5d/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file diff --git a/egs/hkust/s5/local/nnet3/run_ivector_common.sh b/egs/hkust/s5/local/nnet3/run_ivector_common.sh index bbdb5796c22..2ef33e43081 100755 --- a/egs/hkust/s5/local/nnet3/run_ivector_common.sh +++ b/egs/hkust/s5/local/nnet3/run_ivector_common.sh @@ -71,14 +71,14 @@ fi if [ $stage -le 3 ] && [ -z $ivector_extractor ]; then steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 \ --num-frames 700000 \ - data/train_hires 512 exp/nnet3/tri5 exp/nnet3/diag_ubm + data/train_hires_nopitch 512 exp/nnet3/tri5 exp/nnet3/diag_ubm fi if [ $stage -le 4 ] && [ -z $ivector_extractor ]; then # iVector extractors can in general be sensitive to the amount of data, but # this one has a fairly small dim (defaults to 100) steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \ - data/train_hires exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; + data/train_hires_nopitch exp/nnet3/diag_ubm exp/nnet3/extractor || exit 1; ivector_extractor=exp/nnet3/extractor fi diff --git a/egs/wsj/s5/steps/make_index.sh b/egs/wsj/s5/steps/make_index.sh index 5bc9af25241..6c29dbbe8b6 100755 --- a/egs/wsj/s5/steps/make_index.sh +++ b/egs/wsj/s5/steps/make_index.sh @@ -19,6 +19,9 @@ skip_optimization=false # If you only search for few thousands of keywords, # can skip the optimization; but if you're going to search for # millions of keywords, you'd better do set this optimization to # false and do the optimization on the final index. +frame_subsampling_factor= # We will try to autodetect this. You should specify + # the right value if your directory structure is + # non-standard # End configuration section. echo "$0 $@" # Print the command line for logging @@ -52,25 +55,39 @@ srcdir=`dirname $decodedir`; # The model directory is one level up from decoding mkdir -p $kwsdir/log; nj=`cat $decodedir/num_jobs` || exit 1; echo $nj > $kwsdir/num_jobs; + utter_id=$kwsdatadir/utter_id +if [ ! -f $utter_id ] ; then + utter_id=$kwsdatadir/utt.map +fi + if [ -z "$model" ]; then # if --model was not specified on the command line... model=$srcdir/final.mdl; fi for f in $model $decodedir/lat.1.gz $utter_id; do - [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1; + [ ! -f $f ] && echo "$0: Error: no such file $f" && exit 1; done -echo "Using model: $model" +echo "$0: Using model: $model" if [ ! -z $silence_word ]; then silence_int=`grep -w $silence_word $langdir/words.txt | awk '{print $2}'` [ -z $silence_int ] && \ - echo "Error: could not find integer representation of silence word $silence_word" && exit 1; + echo "$0: Error: could not find integer representation of silence word $silence_word" && exit 1; silence_opt="--silence-label=$silence_int" fi +if [ -z "$frame_subsampling_factor" ]; then + if [ -f $decodedir/../frame_subsampling_factor ] ; then + frame_subsampling_factor=$(cat $decodedir/../frame_subsampling_factor) + else + frame_subsampling_factor=1 + fi + echo "$0: Frame subsampling factor autodetected: $frame_subsampling_factor" +fi + word_boundary=$langdir/phones/word_boundary.int align_lexicon=$langdir/phones/align_lexicon.int if [ -f $word_boundary ] ; then @@ -79,6 +96,7 @@ if [ -f $word_boundary ] ; then lattice-align-words $silence_opt --max-expand=$max_expand $word_boundary $model ark:- ark:- \| \ lattice-scale --acoustic-scale=$acwt --lm-scale=$lmwt ark:- ark:- \| \ lattice-to-kws-index --max-states-scale=$max_states_scale --allow-partial=true \ + --frame-subsampling-factor=$frame_subsampling_factor \ --max-silence-frames=$max_silence_frames --strict=$strict ark:$utter_id ark:- ark:- \| \ kws-index-union --skip-optimization=$skip_optimization --strict=$strict --max-states=$max_states \ ark:- "ark:|gzip -c > $kwsdir/index.JOB.gz" || exit 1 @@ -88,11 +106,12 @@ elif [ -f $align_lexicon ]; then lattice-align-words-lexicon $silence_opt --max-expand=$max_expand $align_lexicon $model ark:- ark:- \| \ lattice-scale --acoustic-scale=$acwt --lm-scale=$lmwt ark:- ark:- \| \ lattice-to-kws-index --max-states-scale=$max_states_scale --allow-partial=true \ + --frame-subsampling-factor=$frame_subsampling_factor \ --max-silence-frames=$max_silence_frames --strict=$strict ark:$utter_id ark:- ark:- \| \ kws-index-union --skip-optimization=$skip_optimization --strict=$strict --max-states=$max_states \ ark:- "ark:|gzip -c > $kwsdir/index.JOB.gz" || exit 1 else - echo "$0: cannot find either word-boundary file $word_boundary or alignment lexicon $align_lexicon" + echo "$0: Error: cannot find either word-boundary file $word_boundary or alignment lexicon $align_lexicon" exit 1 fi diff --git a/egs/wsj/s5/steps/make_plp_pitch.sh b/egs/wsj/s5/steps/make_plp_pitch.sh index ff6e83ef577..7a71942ed22 100755 --- a/egs/wsj/s5/steps/make_plp_pitch.sh +++ b/egs/wsj/s5/steps/make_plp_pitch.sh @@ -19,7 +19,7 @@ compress=true echo "$0 $@" # Print the command line for logging -if [ -f path.sh ]; then . ./path.sh; fi +if [ -f ./path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 1 ] || [ $# -gt 3 ]; then diff --git a/egs/wsj/s5/steps/nnet2/get_egs.sh b/egs/wsj/s5/steps/nnet2/get_egs.sh index de9c5ca85bd..2eac4d60f3f 100755 --- a/egs/wsj/s5/steps/nnet2/get_egs.sh +++ b/egs/wsj/s5/steps/nnet2/get_egs.sh @@ -19,7 +19,7 @@ samples_per_iter=200000 # each iteration of training, see this many samples transform_dir= # If supplied, overrides alidir num_jobs_nnet=16 # Number of neural net jobs to run in parallel stage=0 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. splice_width=4 # meaning +- 4 frames on each side for second LDA left_context= right_context= @@ -58,7 +58,7 @@ if [ $# != 4 ]; then echo " # very end." echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -91,7 +91,7 @@ cp $alidir/tree $dir utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1; cp $lang/phones.txt $dir || exit 1; -# Get list of validation utterances. +# Get list of validation utterances. awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \ > $dir/valid_uttlist || exit 1; @@ -111,7 +111,7 @@ awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlis [ -z "$transform_dir" ] && transform_dir=$alidir -## Set up features. +## Set up features. if [ -z $feat_type ]; then if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi fi @@ -123,7 +123,7 @@ case $feat_type in train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- |" echo $cmvn_opts >$dir/cmvn_opts ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` cp $alidir/{splice_opts,cmvn_opts,final.mat} $dir || exit 1; [ ! -z "$cmvn_opts" ] && \ @@ -266,7 +266,7 @@ if [ $stage -le 4 ]; then echo "$0: Since iters-per-epoch == 1, just concatenating the data." for n in `seq 1 $num_jobs_nnet`; do cat $dir/egs/egs_orig.$n.*.ark > $dir/egs/egs_tmp.$n.0.ark || exit 1; - remove $dir/egs/egs_orig.$n.*.ark + remove $dir/egs/egs_orig.$n.*.ark done else # We'll have to split it up using nnet-copy-egs. egs_list= @@ -291,7 +291,7 @@ if [ $stage -le 5 ]; then for n in `seq 0 $[$iters_per_epoch-1]`; do $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \ nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \ - ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark + ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark remove $dir/egs/egs_tmp.*.$n.ark done fi diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative.sh b/egs/wsj/s5/steps/nnet2/train_discriminative.sh index a1a121345c2..fee51254f4f 100755 --- a/egs/wsj/s5/steps/nnet2/train_discriminative.sh +++ b/egs/wsj/s5/steps/nnet2/train_discriminative.sh @@ -33,7 +33,7 @@ shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of stage=-8 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. These don't num_threads=16 # this is the default but you may want to change it, e.g. to 1 if # using GPUs. @@ -74,7 +74,7 @@ if [ $# != 6 ]; then echo " # this, you may want to decrease the batch size." echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" echo " # use multiple threads... " - echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." + echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" echo " # process." echo " --stage # Used to run a partially-completed training process from somewhere in" diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh index 5e25d7175c3..69ab4596f29 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh @@ -54,7 +54,7 @@ add_layers_period=2 # by default, add new layers every 2 iterations. num_hidden_layers=3 stage=-5 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. These don't splice_width=4 # meaning +- 4 frames on each side for second LDA randprune=4.0 # speeds up LDA. alpha=4.0 diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh index e78d9ab7f5c..d655f039e2f 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh @@ -55,7 +55,7 @@ add_layers_period=2 # by default, add new layers every 2 iterations. num_hidden_layers=3 stage=-5 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 15" # for jobs with a lot of I/O, limits the number running at one time. These don't splice_width=4 # meaning +- 4 frames on each side for second LDA randprune=4.0 # speeds up LDA. alpha=4.0 # relates to preconditioning. diff --git a/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh b/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh index 54d7cf7ea9d..b296e95416b 100755 --- a/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh +++ b/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh @@ -55,7 +55,7 @@ last_layer_factor=0.1 # relates to modify_learning_rates. first_layer_factor=1.0 # relates to modify_learning_rates. stage=-5 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. These don't splice_width=4 # meaning +- 4 frames on each side for second LDA randprune=4.0 # speeds up LDA. alpha=4.0 @@ -65,6 +65,7 @@ mix_up=0 # Number of components to mix up to (should be > #tree leaves, if num_threads=16 parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. +combine_opts="--mem 12G" cleanup=true egs_dir= lda_opts= @@ -371,7 +372,7 @@ if [ $stage -le $num_iters ]; then num_egs=`nnet-copy-egs ark:$egs_dir/combine.egs ark:/dev/null 2>&1 | tail -n 1 | awk '{print $NF}'` mb=$[($num_egs+$this_num_threads-1)/$this_num_threads] [ $mb -gt 512 ] && mb=512 - $cmd $parallel_opts $dir/log/combine.log \ + $cmd $parallel_opts $combine_opts $dir/log/combine.log \ nnet-combine-fast --use-gpu=no --num-threads=$this_num_threads \ --verbose=3 --minibatch-size=$mb "${nnets_list[@]}" ark:$egs_dir/combine.egs \ $dir/final.mdl || exit 1; diff --git a/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py b/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py index d88355befae..162fda16d16 100644 --- a/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py +++ b/egs/wsj/s5/steps/nnet3/make_tdnn_configs.py @@ -91,7 +91,7 @@ right_context += int_list[-1] splice_array.append(int_list) except ValueError as e: - sys.exit("invalid --splice-indexes argument " + args.splice_indexes + e) + sys.exit("invalid --splice-indexes argument " + args.splice_indexes + str(e)) left_context = max(0, left_context) right_context = max(0, right_context) num_hidden_layers = len(splice_array) diff --git a/egs/wsj/s5/steps/search_index.sh b/egs/wsj/s5/steps/search_index.sh index 6d4c344c5db..5db3d39b15a 100755 --- a/egs/wsj/s5/steps/search_index.sh +++ b/egs/wsj/s5/steps/search_index.sh @@ -8,6 +8,7 @@ cmd=run.pl nbest=-1 strict=true indices_dir= +frame_subsampling_factor=1 # End configuration section. echo "$0 $@" # Print the command line for logging @@ -36,15 +37,23 @@ fi mkdir -p $kwsdir/log; nj=`cat $indices_dir/num_jobs` || exit 1; -keywords=$kwsdatadir/keywords.fsts; +if [ -f $kwsdatadir/keywords.fsts.gz ]; then + keywords="\"gunzip -c $kwsdatadir/keywords.fsts.gz|\"" +elif [ -f $kwsdatadir/keywords.fsts ]; then + keywords=$kwsdatadir/keywords.fsts; +else + echo "$0: no such file $kwsdatadir/keywords.fsts[.gz]" && exit 1; +fi -for f in $indices_dir/index.1.gz $keywords; do +for f in $indices_dir/index.1.gz ; do [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1; done $cmd JOB=1:$nj $kwsdir/log/search.JOB.log \ kws-search --strict=$strict --negative-tolerance=-1 \ + --frame-subsampling-factor=${frame_subsampling_factor} \ "ark:gzip -cdf $indices_dir/index.JOB.gz|" ark:$keywords \ - "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id > $kwsdir/result.JOB" || exit 1; + "ark,t:|gzip -c > $kwsdir/result.JOB.gz" \ + "ark,t:|gzip -c > $kwsdir/stats.JOB.gz" || exit 1; exit 0; diff --git a/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh b/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh index f9d2890ea39..6cb14068769 100755 --- a/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh +++ b/egs/wsj/s5/utils/dict_dir_add_pronprobs.sh @@ -58,14 +58,14 @@ utils/validate_dict_dir.pl $srcdir; if [ -f $srcdir/lexicon.txt ]; then src_lex=$srcdir/lexicon.txt perl -ane 'print join(" ", split(" ", $_)) . "\n";' < $src_lex |\ - sort > $dir/lexicon.txt + sort -u > $dir/lexicon.txt elif [ -f $srcdir/lexiconp.txt ]; then echo "$0: removing the pron-probs from $srcdir/lexiconp.txt to create $dir/lexicon.txt" # the Perl command below normalizes the spaces (avoid double space). src_lex=$srcdir/lexiconp.txt awk '{$2 = ""; print $0;}' <$srcdir/lexiconp.txt |\ perl -ane 'print join(" ", split(" " ,$_)) . "\n";' |\ - sort > $dir/lexicon.txt || exit 1; + sort -u > $dir/lexicon.txt || exit 1; fi diff --git a/egs/wsj/s5/utils/make_lexicon_fst.pl b/egs/wsj/s5/utils/make_lexicon_fst.pl index bcf0f4df13a..f97129c05cb 100755 --- a/egs/wsj/s5/utils/make_lexicon_fst.pl +++ b/egs/wsj/s5/utils/make_lexicon_fst.pl @@ -21,21 +21,24 @@ $pron_probs = 0; -if ($ARGV[0] eq "--pron-probs") { +if ((@ARGV > 0) && ($ARGV[0] eq "--pron-probs")) { $pron_probs = 1; shift @ARGV; } if (@ARGV != 1 && @ARGV != 3 && @ARGV != 4) { - print STDERR - "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt -Creates a lexicon FST that transduces phones to words, and may allow optional silence. -Note: ordinarily, each line of lexicon.txt is: word phone1 phone2 ... phoneN; if the --pron-probs option is -used, each line is: word pronunciation-probability phone1 phone2 ... phoneN. The probability 'prob' will -typically be between zero and one, and note that it's generally helpful to normalize so the largest one -for each word is 1.0, but this is your responsibility. The silence disambiguation symbol, e.g. something -like #5, is used only when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst, and was -introduced to fix a particular case of non-determinism of decoding graphs.\n"; + print STDERR "Usage: make_lexicon_fst.pl [--pron-probs] lexicon.txt [silprob silphone [sil_disambig_sym]] >lexiconfst.txt\n\n"; + print STDERR "Creates a lexicon FST that transduces phones to words, and may allow optional silence.\n\n"; + print STDERR "Note: ordinarily, each line of lexicon.txt is:\n"; + print STDERR " word phone1 phone2 ... phoneN;\n"; + print STDERR "if the --pron-probs option is used, each line is:\n"; + print STDERR " word pronunciation-probability phone1 phone2 ... phoneN.\n\n"; + print STDERR "The probability 'prob' will typically be between zero and one, and note that\n"; + print STDERR "it's generally helpful to normalize so the largest one for each word is 1.0, but\n"; + print STDERR "this is your responsibility.\n\n"; + print STDERR "The silence disambiguation symbol, e.g. something like #5, is used only\n"; + print STDERR "when creating a lexicon with disambiguation symbols, e.g. L_disambig.fst,\n"; + print STDERR "and was introduced to fix a particular case of non-determinism of decoding graphs.\n\n"; exit(1); } diff --git a/egs/wsj/s5/utils/slurm.pl b/egs/wsj/s5/utils/slurm.pl index a332e19cb1d..27e5fce9c01 100755 --- a/egs/wsj/s5/utils/slurm.pl +++ b/egs/wsj/s5/utils/slurm.pl @@ -397,9 +397,12 @@ sub exec_command { print Q " unset CUDA_VISIBLE_DEVICES.\n"; print Q "fi\n"; print Q "time1=\`date +\"%s\"\`\n"; -print Q " ( $cmd ) 2>>$logfile >>$logfile\n"; +print Q " ( $cmd ) &>>$logfile\n"; print Q "ret=\$?\n"; +print Q "sync || true"; print Q "time2=\`date +\"%s\"\`\n"; +print Q "echo '#' Accounting: begin_time=\$time1 >>$logfile\n"; +print Q "echo '#' Accounting: end_time=\$time2 >>$logfile\n"; print Q "echo '#' Accounting: time=\$((\$time2-\$time1)) threads=$num_threads >>$logfile\n"; print Q "echo '#' Finished at \`date\` with status \$ret >>$logfile\n"; print Q "[ \$ret -eq 137 ] && exit 100;\n"; # If process was killed (e.g. oom) it will exit with status 137; diff --git a/egs/wsj/s5/utils/write_kwslist.pl b/egs/wsj/s5/utils/write_kwslist.pl index b2f67815df9..18071fa7671 100755 --- a/egs/wsj/s5/utils/write_kwslist.pl +++ b/egs/wsj/s5/utils/write_kwslist.pl @@ -32,8 +32,9 @@ --remove-NO : Remove the "NO" decision instances (boolean, default = false) --segments : Segments file from Kaldi (string, default = "") --system-id : System ID (string, default = "") - --verbose : Verbose level (higher --> more kws section) (integer, default 0) - --YES-cutoff : Only keep "\$YES-cutoff" yeses for each kw (int, default = -1) + --verbose : Verbose level (higher --> more kws section) (integer, default = 0) + --YES-cutoff : Only keep "\$YES-cutoff" yeses for each kw (int, default = -1) + --nbest | Output upto nbest hits into the kwlist (int, default = -1) EOU @@ -55,6 +56,7 @@ my $remove_dup = "false"; my $remove_NO = "false"; my $YES_cutoff = -1; +my $nbest_max = -1; GetOptions('segments=s' => \$segment, 'flen=f' => \$flen, 'beta=f' => \$beta, @@ -72,7 +74,8 @@ 'duptime=f' => \$duptime, 'remove-dup=s' => \$remove_dup, 'YES-cutoff=i' => \$YES_cutoff, - 'remove-NO=s' => \$remove_NO); + 'remove-NO=s' => \$remove_NO, + 'nbest=i' => \$nbest_max) or die "Cannot continue\n"; ($normalize eq "true" || $normalize eq "false") || die "$0: Bad value for option --normalize\n"; ($remove_dup eq "true" || $remove_dup eq "false") || die "$0: Bad value for option --remove-dup\n"; @@ -134,12 +137,18 @@ sub PrintKwslist { # Start printing $kwslist .= "[0]\" language=\"$info->[1]\" system_id=\"$info->[2]\">\n"; my $prev_kw = ""; + my $nbest = $nbest_max; foreach my $kwentry (@{$KWS}) { + if (($prev_kw eq $kwentry->[0]) && ($nbest le 0) && ($nbest_max gt 0)) { + next; + } if ($prev_kw ne $kwentry->[0]) { if ($prev_kw ne "") {$kwslist .= " \n";} $kwslist .= " [0]\" search_time=\"1\" oov_count=\"0\">\n"; $prev_kw = $kwentry->[0]; + $nbest = $nbest_max; } + $nbest -= 1 if $nbest_max gt 0; my $score = sprintf("%g", $kwentry->[5]); $kwslist .= " [1]\" channel=\"$kwentry->[2]\" tbeg=\"$kwentry->[3]\" dur=\"$kwentry->[4]\" score=\"$score\" decision=\"$kwentry->[6]\""; if (defined($kwentry->[7])) {$kwslist .= " threshold=\"$kwentry->[7]\"";} diff --git a/src/Makefile b/src/Makefile index c3346d873ef..52b23261b76 100644 --- a/src/Makefile +++ b/src/Makefile @@ -142,7 +142,7 @@ $(SUBDIRS) : mklibdir $(MAKE) -C $@ .PHONY: $(EXT_SUBDIRS) -$(EXT_SUBDIRS) : mklibdir +$(EXT_SUBDIRS) : mklibdir ext_depend $(MAKE) -C $@ diff --git a/src/fstbin/Makefile b/src/fstbin/Makefile index a508ed95bd7..8d544e40ea0 100644 --- a/src/fstbin/Makefile +++ b/src/fstbin/Makefile @@ -15,7 +15,8 @@ BINFILES = fstdeterminizestar \ fstmakecontextsyms fstaddsubsequentialloop fstaddselfloops \ fstrmepslocal fstcomposecontext fsttablecompose fstrand fstfactor \ fstdeterminizelog fstphicompose fstrhocompose fstpropfinal fstcopy \ - fstpushspecial fsts-to-transcripts + fstpushspecial fsts-to-transcripts fsts-project fsts-union \ + fsts-scale fsts-difference OBJFILES = diff --git a/src/fstbin/fsts-project.cc b/src/fstbin/fsts-project.cc new file mode 100644 index 00000000000..015f1431725 --- /dev/null +++ b/src/fstbin/fsts-project.cc @@ -0,0 +1,82 @@ +// fstbin/fsts-project.cc + +// Copyright 2016 Johns Hopkins University (Authors: Jan "Yenda" Trmal) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "fstext/fstext-utils.h" +#include "fstext/kaldi-fst-io.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace fst; + typedef kaldi::int32 int32; + typedef kaldi::uint64 uint64; + + const char *usage = + "Reads kaldi archive of FSTs; for each element, performs the project\n" + "operation either on input (default) or on the output (if the option\n" + "--project-output is true).\n" + "\n" + "Usage: fsts-project [options] \n" + " e.g.: fsts-project ark:train.fsts ark,t:train.fsts\n" + "\n" + "see also: fstproject (from the OpenFst toolkit)\n"; + + ParseOptions po(usage); + + bool project_output = false; + + po.Register("project-output", &project_output, + "If true, project output vs input"); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string fsts_rspecifier = po.GetArg(1), + fsts_wspecifier = po.GetArg(2); + + + SequentialTableReader fst_reader(fsts_rspecifier); + TableWriter fst_writer(fsts_wspecifier); + + int32 n_done = 0; + for (; !fst_reader.Done(); fst_reader.Next()) { + std::string key = fst_reader.Key(); + VectorFst fst(fst_reader.Value()); + + Project(&fst, project_output ? PROJECT_OUTPUT : PROJECT_INPUT); + + fst_writer.Write(key, fst); + n_done++; + } + + KALDI_LOG << "Projected " << n_done << " FSTs"; + return (n_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/fstbin/fsts-to-transcripts.cc b/src/fstbin/fsts-to-transcripts.cc index 7c301e10390..ae74d5de6e9 100644 --- a/src/fstbin/fsts-to-transcripts.cc +++ b/src/fstbin/fsts-to-transcripts.cc @@ -1,6 +1,7 @@ // fstbin/fsts-to-transcripts.cc -// Copyright 2012-2013 Johns Hopkins University (Authors: Guoguo Chen, Daniel Povey) +// Copyright 2012-2013 Johns Hopkins University (Authors: Guoguo Chen, +// Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // @@ -32,12 +33,15 @@ int main(int argc, char *argv[]) { typedef kaldi::uint64 uint64; const char *usage = - "Reads a table of FSTs; for each element, finds the best path and prints out the\n" - "output-symbol sequence (if --output-side=true), or input-symbol sequence " - "otherwise.\n" + "Reads a table of FSTs; for each element, finds the best path and \n" + "prints out the output-symbol sequence (if --output-side=true), or \n" + "input-symbol sequence otherwise.\n" "\n" - "Usage: fsts-to-transcripts [options] \n" - " e.g.: fsts-to-transcripts ark:train.fsts ark,t:train.text\n"; + "Usage:\n" + " fsts-to-transcripts [options] " + " \n" + "e.g.:\n" + " fsts-to-transcripts ark:train.fsts ark,t:train.text\n"; ParseOptions po(usage); @@ -48,13 +52,13 @@ int main(int argc, char *argv[]) { po.Read(argc, argv); - if (po.NumArgs() < 2 || po.NumArgs() > 3) { + if (po.NumArgs() != 2) { po.PrintUsage(); exit(1); } std::string fst_rspecifier = po.GetArg(1), - transcript_wspecifier = po.GetOptArg(2); + transcript_wspecifier = po.GetArg(2); SequentialTableReader fst_reader(fst_rspecifier); @@ -67,11 +71,11 @@ int main(int argc, char *argv[]) { VectorFst shortest_path; - ShortestPath(fst, &shortest_path); // the OpenFst algorithm ShortestPath. + ShortestPath(fst, &shortest_path); // the OpenFst algorithm ShortestPath. if (shortest_path.NumStates() == 0) { - KALDI_WARN << "Input FST (after shortest path) was empty. Producing no " - << "output for key " << key; + KALDI_WARN << "Input FST (after shortest path) was empty. Producing " + << "no output for key " << key; n_err++; continue; } @@ -80,7 +84,8 @@ int main(int argc, char *argv[]) { bool ans; if (output_side) ans = fst::GetLinearSymbolSequence( shortest_path, NULL, &transcript, NULL); - else ans = fst::GetLinearSymbolSequence( + else + ans = fst::GetLinearSymbolSequence( shortest_path, &transcript, NULL, NULL); if (!ans) { KALDI_ERR << "GetLinearSymbolSequence returned false (code error);"; diff --git a/src/fstbin/fsts-union.cc b/src/fstbin/fsts-union.cc new file mode 100644 index 00000000000..489d7362453 --- /dev/null +++ b/src/fstbin/fsts-union.cc @@ -0,0 +1,98 @@ +// fstbin/fsts-union.cc + +// Copyright 2016 Johns Hopkins University (Authors: Jan "Yenda" Trmal) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "fstext/fstext-utils.h" +#include "fstext/kaldi-fst-io.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace fst; + typedef kaldi::int32 int32; + typedef kaldi::uint64 uint64; + + const char *usage = + "Reads a kaldi archive of FSTs. Performs the FST operation union on\n" + "all fsts sharing the same key. Assumes the archive is sorted by key.\n" + "\n" + "Usage: fsts-union [options] \n" + " e.g.: fsts-union ark:keywords_tmp.fsts ark,t:keywords.fsts\n" + "\n" + "see also: fstunion (from the OpenFst toolkit)\n"; + + ParseOptions po(usage); + + po.Read(argc, argv); + + if (po.NumArgs() != 2) { + po.PrintUsage(); + exit(1); + } + + std::string fsts_rspecifier = po.GetArg(1), + fsts_wspecifier = po.GetArg(2); + + + SequentialTableReader fst_reader(fsts_rspecifier); + TableWriter fst_writer(fsts_wspecifier); + + int32 n_out_done = 0, + n_in_done = 0; + std::string res_key = ""; + VectorFst res_fst; + + for (; !fst_reader.Done(); fst_reader.Next()) { + std::string key = fst_reader.Key(); + VectorFst fst(fst_reader.Value()); + + n_in_done++; + if (key == res_key) { + fst::Union(&res_fst, fst); + } else { + if (res_key != "") { + VectorFst out_fst; + fst::Minimize(&res_fst); + fst::RmEpsilon(&res_fst); + fst_writer.Write(res_key, res_fst); + n_out_done++; + } + res_fst = fst; + res_key = key; + } + } + if (res_key != "") { + VectorFst out_fst; + fst::Minimize(&res_fst); + fst::RmEpsilon(&res_fst); + fst_writer.Write(res_key, res_fst); + n_out_done++; + } + + KALDI_LOG << "Applied fst union on " << n_in_done + << " FSTs, produced " << n_out_done << " FSTs"; + return (n_out_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/kws/kws-functions.cc b/src/kws/kws-functions.cc index f6b6367d82b..d1d71ce7a42 100644 --- a/src/kws/kws-functions.cc +++ b/src/kws/kws-functions.cc @@ -17,6 +17,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. +#include #include "lat/lattice-functions.h" #include "kws/kws-functions.h" @@ -34,7 +35,6 @@ bool CompareInterval(const Interval &i1, return (i1.Start() < i2.Start() ? true : i1.Start() > i2.Start() ? false: i1.End() < i2.End() ? true: false); - } bool ClusterLattice(CompactLattice *clat, @@ -47,9 +47,11 @@ bool ClusterLattice(CompactLattice *clat, // Step 1: Iterate over the lattice to get the arcs StateId max_id = 0; - for (StateIterator siter(*clat); !siter.Done(); siter.Next()) { + for (StateIterator siter(*clat); !siter.Done(); + siter.Next()) { StateId state_id = siter.Value(); - for (ArcIterator aiter(*clat, state_id); !aiter.Done(); aiter.Next()) { + for (ArcIterator aiter(*clat, state_id); !aiter.Done(); + aiter.Next()) { CompactLatticeArc arc = aiter.Value(); if (state_id >= state_times.size() || arc.nextstate >= state_times.size()) return false; @@ -57,7 +59,8 @@ bool ClusterLattice(CompactLattice *clat, max_id = state_id; if (arc.nextstate > max_id) max_id = arc.nextstate; - head[arc.ilabel].push_back(Interval(state_times[state_id], state_times[arc.nextstate])); + head[arc.ilabel].push_back(Interval(state_times[state_id], + state_times[arc.nextstate])); } } // Check if alignments and the states match @@ -85,9 +88,11 @@ bool ClusterLattice(CompactLattice *clat, // Step 3: Cluster arcs according to the maximum overlap: attach // each arc to the cluster-head (as identified in Step 2) which // has the most temporal overlap with the current arc. - for (StateIterator siter(*clat); !siter.Done(); siter.Next()) { + for (StateIterator siter(*clat); !siter.Done(); + siter.Next()) { CompactLatticeArc::StateId state_id = siter.Value(); - for (MutableArcIterator aiter(clat, state_id); !aiter.Done(); aiter.Next()) { + for (MutableArcIterator aiter(clat, state_id); + !aiter.Done(); aiter.Next()) { CompactLatticeArc arc = aiter.Value(); // We don't cluster the epsilon arcs if (arc.ilabel == 0) @@ -100,7 +105,7 @@ bool ClusterLattice(CompactLattice *clat, int32 overlap = interval.Overlap(head[arc.ilabel][i]); if (overlap > max_overlap) { max_overlap = overlap; - olabel = i + 1; // need non-epsilon label. + olabel = i + 1; // need non-epsilon label. } } arc.olabel = olabel; @@ -134,13 +139,21 @@ class CompactLatticeToKwsProductFstMapper { arc.nextstate); } - fst::MapFinalAction FinalAction() const { return fst::MAP_NO_SUPERFINAL; } + fst::MapFinalAction FinalAction() const { + return fst::MAP_NO_SUPERFINAL; + } - fst::MapSymbolsAction InputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS; } + fst::MapSymbolsAction InputSymbolsAction() const { + return fst::MAP_COPY_SYMBOLS; + } - fst::MapSymbolsAction OutputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS;} + fst::MapSymbolsAction OutputSymbolsAction() const { + return fst::MAP_COPY_SYMBOLS; + } - uint64 Properties(uint64 props) const { return props; } + uint64 Properties(uint64 props) const { + return props; + } }; @@ -234,10 +247,12 @@ bool CreateFactorTransducer(const CompactLattice &clat, for (StateId s = 0; s < ns; s++) { // Add arcs from initial state to current state - if (!has_epsilon_property || (state_properties[s] & kStateHasNonEpsilonArcsLeaving)) + if (!has_epsilon_property || + (state_properties[s] & kStateHasNonEpsilonArcsLeaving)) factor_transducer->AddArc(ss, KwsProductArc(0, 0, KwsProductWeight(-alpha[s], StdXStdprimeWeight(state_times[s], ArcticWeight::One())), s)); // Add arcs from current state to final state - if (!has_epsilon_property || (state_properties[s] & kStateHasNonEpsilonArcsEntering)) + if (!has_epsilon_property || + (state_properties[s] & kStateHasNonEpsilonArcsEntering)) factor_transducer->AddArc(s, KwsProductArc(0, utterance_id, KwsProductWeight(0, StdXStdprimeWeight(TropicalWeight::One(), state_times[s])), fs)); // The old final state is not final any more if (factor_transducer->Final(s) != KwsProductWeight::Zero()) @@ -300,8 +315,8 @@ static void DifferenceWrapper(const fst::VectorFst &fst1, Decode(difference, encoder); } else { VectorFst fst2_copy(fst2); - RmEpsilon(&fst2_copy); // or Difference will crash. - RemoveWeights(&fst2_copy); // or Difference will crash. + RmEpsilon(&fst2_copy); // or Difference will crash. + RemoveWeights(&fst2_copy); // or Difference will crash. Difference(fst1, fst2_copy, difference); } } @@ -337,7 +352,8 @@ void MaybeDoSanityCheck(const KwsLexicographicFst &index_transducer) { for (size_t i = 0; i < isymbols.size(); i++) os2 << isymbols[i] << ' '; BaseFloat second_best_cost = weight.Value1().Value(); - KALDI_VLOG(3) << "Second-best path: " << isymbols.size() << " isymbols " << ", " + KALDI_VLOG(3) << "Second-best path: " << isymbols.size() + << " isymbols " << ", " << osymbols.size() << " osymbols, isymbols are " << os2.str() << ", second-best cost is " << second_best_cost; if (second_best_cost < -0.01) { @@ -349,10 +365,12 @@ void MaybeDoSanityCheck(const KwsLexicographicFst &index_transducer) { void MaybeDoSanityCheck(const KwsProductFst &product_transducer) { if (GetVerboseLevel() < 2) return; KwsLexicographicFst index_transducer; - Map(product_transducer, &index_transducer, KwsProductFstToKwsLexicographicFstMapper()); - MaybeDoSanityCheck(index_transducer); -} + Map(product_transducer, + &index_transducer, + KwsProductFstToKwsLexicographicFstMapper()); + MaybeDoSanityCheck(index_transducer); +} -} // end namespace kaldi +} // end namespace kaldi diff --git a/src/kws/kws-functions.h b/src/kws/kws-functions.h index 9d6424fb2b0..1558285e40d 100644 --- a/src/kws/kws-functions.h +++ b/src/kws/kws-functions.h @@ -21,6 +21,8 @@ #ifndef KALDI_KWS_KWS_FUNCTIONS_H_ #define KALDI_KWS_KWS_FUNCTIONS_H_ +#include + #include "lat/kaldi-lattice.h" #include "kws/kaldi-kws.h" diff --git a/src/kwsbin/Makefile b/src/kwsbin/Makefile index 43028956e9a..5efb19f9c17 100644 --- a/src/kwsbin/Makefile +++ b/src/kwsbin/Makefile @@ -5,7 +5,8 @@ EXTRA_CXXFLAGS = -Wno-sign-compare include ../kaldi.mk BINFILES = lattice-to-kws-index kws-index-union transcripts-to-fsts \ - kws-search generate-proxy-keywords compute-atwv + kws-search generate-proxy-keywords compute-atwv print-proxy-keywords + OBJFILES = diff --git a/src/kwsbin/compute-atwv.cc b/src/kwsbin/compute-atwv.cc index c7c8e484f8d..6d9f6d2c2bb 100644 --- a/src/kwsbin/compute-atwv.cc +++ b/src/kwsbin/compute-atwv.cc @@ -37,29 +37,34 @@ int main(int argc, char *argv[]) { const char *usage = "Computes the Actual Term-Weighted Value and prints it." "\n" - "Usage: compute-atwv [options] [alignment-csv-filename]\n" - " e.g.: compute-atwv 32485.4 ark:ref.1 ark:hyp.1 ali.csv\n" - " or: compute-atwv 32485.4 ark:ref.1 ark:hyp.1\n" + "Usage: \n" + " compute-atwv [options] " + " [alignment-csv-filename]\n" + "e.g.: \n" + " compute-atwv 32485.4 ark:ref.1 ark:hyp.1 ali.csv\n" + "or: \n" + " compute-atwv 32485.4 ark:ref.1 ark:hyp.1\n" "\n" "NOTES: \n" " a) the number of trials is usually equal to the size of the searched\n" " collection in seconds\n" - " b the ref-rspecifier/hyp-rspecifier are the kaldi IO specifiers for both\n" - " the reference and the hypotheses (found hits), respectively.\n" - " The format is the same for both of them. Each line is of \n" - " the following format\n" + " b the ref-rspecifier/hyp-rspecifier are the kaldi IO specifiers \n" + " for both the reference and the hypotheses (found hits), " + " respectively The format is the same for both of them. Each line\n" + " is of the following format\n" "\n" " \n\n" " e.g.:\n\n" " KW106-189 348 459 560 0.8\n" "\n" - " b) the alignment-csv-filename is an optional parameter. If present,\n" - " the alignment i.e. detailed information about what hypotheses match\n" - " up with which reference entries will be generated. The alignemnt\n" - " file format is equivalent to the alignment file produced using\n" - " the F4DE tool. However, we do not set some fields and the utterance\n" - " identifiers are numeric. You can use the script utils/int2sym.pl\n" - " and the utterance/keyword maps to convert the numerical ids into text\n" + " b) the alignment-csv-filename is an optional parameter. \n" + " If present, the alignment i.e. detailed information about what \n" + " hypotheses match up with which reference entries will be \n" + " generated. The alignemnt file format is equivalent to \n" + " the alignment file produced using the F4DE tool. However, we do" + " not set some fields and the utterance identifiers are numeric.\n" + " You can use the script utils/int2sym.pl and the utterance and \n" + " keyword maps to convert the numerical ids into text form\n" " c) the scores are expected to be probabilities. Please note that\n" " the output from the kws-search is in -log(probability).\n" " d) compute-atwv does not perform any score normalization (it's just\n" @@ -79,7 +84,7 @@ int main(int argc, char *argv[]) { po.Read(argc, argv); - if ((po.NumArgs() < 3) || (po.NumArgs() > 4)) { + if (po.NumArgs() < 3 || po.NumArgs() > 4) { po.PrintUsage(); exit(1); } @@ -161,7 +166,6 @@ int main(int argc, char *argv[]) { std::cout << "aproximate OTWV = " << std::fixed << std::setprecision(4) << otwv << std::endl; - } catch(const std::exception &e) { std::cerr << e.what(); return -1; diff --git a/src/kwsbin/generate-proxy-keywords.cc b/src/kwsbin/generate-proxy-keywords.cc index 8495b8e3fe6..9c534abe816 100644 --- a/src/kwsbin/generate-proxy-keywords.cc +++ b/src/kwsbin/generate-proxy-keywords.cc @@ -52,7 +52,7 @@ bool PrintProxyFstPath(const VectorFst &proxy, return true; } -} +} // namespace fst int main(int argc, char *argv[]) { try { @@ -125,7 +125,7 @@ int main(int argc, char *argv[]) { L1_filename = po.GetArg(2), keyword_rspecifier = po.GetArg(3), proxy_wspecifier = po.GetArg(4), - kwlist_wspecifier = (po.NumArgs() == 5) ? po.GetArg(5) : ""; + kwlist_wspecifier = po.GetOptArg(5); VectorFst *L2xE = ReadFstKaldi(L2xE_filename); VectorFst *L1 = ReadFstKaldi(L1_filename); @@ -203,7 +203,7 @@ int main(int argc, char *argv[]) { if (proxy_nbest > 0) { KALDI_VLOG(1) << "ShortestPath(KxL2xExL1', " << proxy_nbest << ")"; proxy = tmp_proxy; - tmp_proxy.DeleteStates(); // Not needed for now. + tmp_proxy.DeleteStates(); // Not needed for now. RmEpsilon(&proxy); ShortestPath(proxy, &tmp_proxy, proxy_nbest, true, true); proxy.DeleteStates(); // Not needed for now. diff --git a/src/kwsbin/kws-index-union.cc b/src/kwsbin/kws-index-union.cc index 84e5db4beba..4a0f3ccea1d 100644 --- a/src/kwsbin/kws-index-union.cc +++ b/src/kwsbin/kws-index-union.cc @@ -32,8 +32,10 @@ int main(int argc, char *argv[]) { typedef kaldi::uint64 uint64; const char *usage = - "Take a union of the indexed lattices. The input index is in the T*T*T semiring and\n" - "the output index is also in the T*T*T semiring. At the end of this program, encoded\n" + "Take a union of the indexed lattices. The input index is in " + " the T*T*T semiring and\n" + "the output index is also in the T*T*T semiring. At the end of " + "this program, encoded\n" "epsilon removal, determinization and minimization will be applied.\n" "\n" "Usage: kws-index-union [options] index-rspecifier index-wspecifier\n" @@ -44,9 +46,12 @@ int main(int argc, char *argv[]) { bool strict = true; bool skip_opt = false; int32 max_states = -1; - po.Register("strict", &strict, "Will allow 0 lattice if it is set to false."); - po.Register("skip-optimization", &skip_opt, "Skip optimization if it's set to true."); - po.Register("max-states", &max_states, "Maximum states for DeterminizeStar."); + po.Register("strict", &strict, + "Will allow 0 lattice if it is set to false."); + po.Register("skip-optimization", &skip_opt, + "Skip optimization if it's set to true."); + po.Register("max-states", &max_states, + "Maximum states for DeterminizeStar."); po.Read(argc, argv); @@ -58,8 +63,10 @@ int main(int argc, char *argv[]) { std::string index_rspecifier = po.GetArg(1), index_wspecifier = po.GetOptArg(2); - SequentialTableReader< VectorFstTplHolder > index_reader(index_rspecifier); - TableWriter< VectorFstTplHolder > index_writer(index_wspecifier); + SequentialTableReader< VectorFstTplHolder > + index_reader(index_rspecifier); + TableWriter< VectorFstTplHolder > + index_writer(index_wspecifier); int32 n_done = 0; KwsLexicographicFst global_index; diff --git a/src/kwsbin/kws-search.cc b/src/kwsbin/kws-search.cc index c5f6edd525d..1ef2655c656 100644 --- a/src/kwsbin/kws-search.cc +++ b/src/kwsbin/kws-search.cc @@ -1,6 +1,8 @@ // kwsbin/kws-search.cc -// Copyright 2012-2013 Johns Hopkins University (Authors: Guoguo Chen, Daniel Povey) +// Copyright 2012-2015 Johns Hopkins University (Authors: Guoguo Chen, +// Daniel Povey. +// Yenda Trmal) // See ../../COPYING for clarification regarding multiple authors // @@ -29,17 +31,22 @@ typedef KwsLexicographicArc Arc; typedef Arc::Weight Weight; typedef Arc::StateId StateId; -uint64 EncodeLabel(StateId ilabel, - StateId olabel) { - return (((int64)olabel)<<32)+((int64)ilabel); - +// encode ilabel, olabel pair as a single 64bit (output) symbol +uint64 EncodeLabel(StateId ilabel, StateId olabel) { + return (static_cast(olabel) << 32) + static_cast(ilabel); } +// extract the osymbol from the 64bit symbol. That represents the utterance id +// in this setup -- we throw away the isymbol which is typically 0 or an +// disambiguation symbol StateId DecodeLabelUid(uint64 osymbol) { - // We only need the utterance id - return ((StateId)(osymbol>>32)); + return static_cast(osymbol >> 32); } +// this is a mapper adapter that helps converting +// between the StdArc FST (i.e. tropical semiring FST) +// to the KwsLexicographic FST. Structure will be kept, +// the weights converted/recomputed class VectorFstToKwsLexicographicFstMapper { public: typedef fst::StdArc FromArc; @@ -59,17 +66,95 @@ class VectorFstToKwsLexicographicFstMapper { arc.nextstate); } - fst::MapFinalAction FinalAction() const { return fst::MAP_NO_SUPERFINAL; } + fst::MapFinalAction FinalAction() const { + return fst::MAP_NO_SUPERFINAL; + } - fst::MapSymbolsAction InputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS; } + fst::MapSymbolsAction InputSymbolsAction() const { + return fst::MAP_COPY_SYMBOLS; + } - fst::MapSymbolsAction OutputSymbolsAction() const { return fst::MAP_COPY_SYMBOLS;} + fst::MapSymbolsAction OutputSymbolsAction() const { + return fst::MAP_COPY_SYMBOLS; + } uint64 Properties(uint64 props) const { return props; } }; +struct ActivePath { + std::vector path; + KwsLexicographicArc::Weight weight; + KwsLexicographicArc::Label last; +}; + +bool GenerateActivePaths(const KwsLexicographicFst &proxy, + std::vector *paths, + KwsLexicographicFst::StateId cur_state, + std::vector cur_path, + KwsLexicographicArc::Weight cur_weight) { + for (fst::ArcIterator aiter(proxy, cur_state); + !aiter.Done(); aiter.Next()) { + const Arc &arc = aiter.Value(); + Weight temp_weight = Times(arc.weight, cur_weight); + + cur_path.push_back(arc.ilabel); + + if ( arc.olabel != 0 ) { + ActivePath path; + path.path = cur_path; + path.weight = temp_weight; + path.last = arc.olabel; + paths->push_back(path); + } else { + GenerateActivePaths(proxy, paths, + arc.nextstate, cur_path, temp_weight); + } + cur_path.pop_back(); + } + + return true; +} +} // namespace kaldi + +typedef kaldi::TableWriter< kaldi::BasicVectorHolder > + VectorOfDoublesWriter; +void OutputDetailedStatistics(const std::string &kwid, + const kaldi::KwsLexicographicFst &keyword, + const unordered_map &label_decoder, + VectorOfDoublesWriter *output ) { + std::vector paths; + + if (keyword.Start() == fst::kNoStateId) + return; + + kaldi::GenerateActivePaths(keyword, &paths, keyword.Start(), + std::vector(), + kaldi::KwsLexicographicArc::Weight::One()); + + for (int i = 0; i < paths.size(); ++i) { + std::vector out; + double score; + int32 tbeg, tend, uid; + + uint64 osymbol = label_decoder.find(paths[i].last)->second; + uid = kaldi::DecodeLabelUid(osymbol); + tbeg = paths[i].weight.Value2().Value1().Value(); + tend = paths[i].weight.Value2().Value2().Value(); + score = paths[i].weight.Value1().Value(); + + out.push_back(uid); + out.push_back(tbeg); + out.push_back(tend); + out.push_back(score); + + for (int j = 0; j < paths[i].path.size(); ++j) { + out.push_back(paths[i].path[j]); + } + output->Write(kwid, out); + } } + int main(int argc, char *argv[]) { try { using namespace kaldi; @@ -77,20 +162,33 @@ int main(int argc, char *argv[]) { typedef kaldi::int32 int32; typedef kaldi::uint32 uint32; typedef kaldi::uint64 uint64; - typedef KwsLexicographicArc Arc; - typedef Arc::Weight Weight; - typedef Arc::StateId StateId; const char *usage = - "Search the keywords over the index. This program can be executed parallely, either\n" - "on the index side or the keywords side; we use a script to combine the final search\n" - "results. Note that the index archive has a only key \"global\".\n" - "The output file is in the format:\n" - "kw utterance_id beg_frame end_frame negated_log_probs\n" - " e.g.: KW1 1 23 67 0.6074219\n" + "Search the keywords over the index. This program can be executed\n" + "in parallel, either on the index side or the keywords side; we use\n" + "a script to combine the final search results. Note that the index\n" + "archive has a single key \"global\".\n\n" + "Search has one or two outputs. The first one is mandatory and will\n" + "contain the seach output, i.e. list of all found keyword instances\n" + "The file is in the following format:\n" + "kw_id utt_id beg_frame end_frame neg_logprob\n" + " e.g.: \n" + "KW105-0198 7 335 376 1.91254\n\n" + "The second parameter is optional and allows the user to gather more\n" + "statistics about the individual instances from the posting list.\n" + "Remember \"keyword\" is an FST and as such, there can be multiple\n" + "paths matching in the keyword and in the lattice index in that given\n" + "time period. The stats output will provide all matching paths\n" + "each with the appropriate score. \n" + "The format is as follows:\n" + "kw_id utt_id beg_frame end_frame neg_logprob 0 w_id1 w_id2 ... 0\n" + " e.g.: \n" + "KW105-0198 7 335 376 16.01254 0 5766 5659 0\n" "\n" - "Usage: kws-search [options] index-rspecifier keywords-rspecifier results-wspecifier\n" - " e.g.: kws-search ark:index.idx ark:keywords.fsts ark:results\n"; + "Usage: kws-search [options] " + " []\n" + " e.g.: kws-search ark:index.idx ark:keywords.fsts " + "ark:results ark:stats\n"; ParseOptions po(usage); @@ -99,28 +197,33 @@ int main(int argc, char *argv[]) { bool strict = true; double negative_tolerance = -0.1; double keyword_beam = -1; + int32 frame_subsampling_factor = 1; + po.Register("frame-subsampling-factor", &frame_subsampling_factor, + "Frame subsampling factor. (Default value 1)"); po.Register("nbest", &n_best, "Return the best n hypotheses."); po.Register("keyword-nbest", &keyword_nbest, - "Pick the best n keywords if the FST contains multiple keywords."); + "Pick the best n keywords if the FST contains " + "multiple keywords."); po.Register("strict", &strict, "Affects the return status of the program."); po.Register("negative-tolerance", &negative_tolerance, - "The program will print a warning if we get negative score smaller " - "than this tolerance."); + "The program will print a warning if we get negative score " + "smaller than this tolerance."); po.Register("keyword-beam", &keyword_beam, - "Prune the FST with the given beam if the FST contains multiple keywords."); + "Prune the FST with the given beam if the FST contains " + "multiple keywords."); if (n_best < 0 && n_best != -1) { KALDI_ERR << "Bad number for nbest"; - exit (1); + exit(1); } if (keyword_nbest < 0 && keyword_nbest != -1) { KALDI_ERR << "Bad number for keyword-nbest"; - exit (1); + exit(1); } if (keyword_beam < 0 && keyword_beam != -1) { KALDI_ERR << "Bad number for keyword-beam"; - exit (1); + exit(1); } po.Read(argc, argv); @@ -131,12 +234,16 @@ int main(int argc, char *argv[]) { } std::string index_rspecifier = po.GetArg(1), - keyword_rspecifier = po.GetOptArg(2), - result_wspecifier = po.GetOptArg(3); + keyword_rspecifier = po.GetArg(2), + result_wspecifier = po.GetArg(3), + stats_wspecifier = po.GetOptArg(4); - RandomAccessTableReader< VectorFstTplHolder > index_reader(index_rspecifier); + RandomAccessTableReader< VectorFstTplHolder > + index_reader(index_rspecifier); SequentialTableReader keyword_reader(keyword_rspecifier); - TableWriter > result_writer(result_wspecifier); + VectorOfDoublesWriter result_writer(result_wspecifier); + VectorOfDoublesWriter stats_writer(stats_wspecifier); + // Index has key "global" KwsLexicographicFst index = index_reader.Value("global"); @@ -152,7 +259,8 @@ int main(int argc, char *argv[]) { int32 label_count = 1; unordered_map label_encoder; unordered_map label_decoder; - for (StateIterator siter(index); !siter.Done(); siter.Next()) { + for (StateIterator siter(index); + !siter.Done(); siter.Next()) { StateId state_id = siter.Value(); for (MutableArcIterator aiter(&index, state_id); !aiter.Done(); aiter.Next()) { @@ -175,7 +283,7 @@ int main(int argc, char *argv[]) { aiter.SetValue(arc); } } - ArcSort(&index, fst::ILabelCompare()); + ArcSort(&index, fst::ILabelCompare()); int32 n_done = 0; int32 n_fail = 0; @@ -198,6 +306,15 @@ int main(int argc, char *argv[]) { KwsLexicographicFst result_fst; Map(keyword, &keyword_fst, VectorFstToKwsLexicographicFstMapper()); Compose(keyword_fst, index, &result_fst); + + if (stats_wspecifier != "") { + KwsLexicographicFst matched_seq(result_fst); + OutputDetailedStatistics(key, + matched_seq, + label_decoder, + &stats_writer); + } + Project(&result_fst, PROJECT_OUTPUT); Minimize(&result_fst, (KwsLexicographicFst *) nullptr, kDelta, true); ShortestPath(result_fst, &result_fst, n_best); @@ -216,13 +333,14 @@ int main(int argc, char *argv[]) { // We're expecting a two-state FST if (result_fst.Final(arc.nextstate) != Weight::One()) { - KALDI_WARN << "The resulting FST does not have the expected structure for key " << key; + KALDI_WARN << "The resulting FST does not have " + << "the expected structure for key " << key; n_fail++; continue; } uint64 osymbol = label_decoder[arc.olabel]; - uid = (int32)DecodeLabelUid(osymbol); + uid = static_cast(DecodeLabelUid(osymbol)); tbeg = arc.weight.Value2().Value1().Value(); tend = arc.weight.Value2().Value2().Value(); score = arc.weight.Value1().Value(); @@ -235,8 +353,8 @@ int main(int argc, char *argv[]) { } vector result; result.push_back(uid); - result.push_back(tbeg); - result.push_back(tend); + result.push_back(tbeg * frame_subsampling_factor); + result.push_back(tend * frame_subsampling_factor); result.push_back(score); result_writer.Write(key, result); } diff --git a/src/kwsbin/lattice-to-kws-index.cc b/src/kwsbin/lattice-to-kws-index.cc index b5ec577dc6d..fcd6b82119d 100644 --- a/src/kwsbin/lattice-to-kws-index.cc +++ b/src/kwsbin/lattice-to-kws-index.cc @@ -36,23 +36,33 @@ int main(int argc, char *argv[]) { typedef kaldi::uint64 uint64; const char *usage = - "Create an inverted index of the given lattices. The output index is in the T*T*T\n" - "semiring. For details for the semiring, please refer to Dogan Can and Muran Saraclar's" - "lattice indexing paper." + "Create an inverted index of the given lattices. The output index is \n" + "in the T*T*T semiring. For details for the semiring, please refer to\n" + "Dogan Can and Murat Saraclar's paper named " + "\"Lattice Indexing for Spoken Term Detection\"\n" "\n" - "Usage: lattice-to-kws-index [options] utter-symtab-rspecifier lattice-rspecifier index-wspecifier\n" - " e.g.: lattice-to-kws-index ark:utter.symtab ark:1.lats ark:global.idx\n"; + "Usage: lattice-to-kws-index [options] " + " \n" + "e.g.: \n" + " lattice-to-kws-index ark:utter.symtab ark:1.lats ark:global.idx\n"; ParseOptions po(usage); + int32 frame_subsampling_factor = 1; int32 max_silence_frames = 50; bool strict = true; bool allow_partial = true; BaseFloat max_states_scale = 4; - po.Register("max-silence-frames", &max_silence_frames, "Maximum #frames for" - " silence arc."); - po.Register("strict", &strict, "Setting --strict=false will cause successful " - "termination even if we processed no lattices."); + po.Register("frame-subsampling-factor", &frame_subsampling_factor, + "Frame subsampling factor. (Default value 1)"); + po.Register("max-silence-frames", &max_silence_frames, + "If --frame-subsampling-factor is used, --max-silence-frames " + "is relative to the the input, not the output frame rate " + "(we divide by frame-subsampling-factor and round to " + "the closest integer, to get the number of symbols in the " + "lattice)."); + po.Register("strict", &strict, "Setting --strict=false will cause " + "successful termination even if we processed no lattices."); po.Register("max-states-scale", &max_states_scale, "Number of states in the" " original lattice times this scale is the number of states " "allowed when optimizing the index. Negative number means no " @@ -62,14 +72,16 @@ int main(int argc, char *argv[]) { po.Read(argc, argv); - if (po.NumArgs() < 3 || po.NumArgs() > 4) { + if (po.NumArgs() != 3) { po.PrintUsage(); exit(1); } + max_silence_frames = 0.5 + + max_silence_frames / static_cast(frame_subsampling_factor); std::string usymtab_rspecifier = po.GetOptArg(1), lats_rspecifier = po.GetArg(2), - index_wspecifier = po.GetOptArg(3); + index_wspecifier = po.GetArg(3); // We use RandomAccessInt32Reader to read the utterance symtab table. RandomAccessInt32Reader usymtab_reader(usymtab_rspecifier); @@ -77,7 +89,9 @@ int main(int argc, char *argv[]) { // We read the lattice in as CompactLattice; We need the CompactLattice // structure for the rest of the work SequentialCompactLatticeReader clat_reader(lats_rspecifier); - TableWriter< fst::VectorFstTplHolder > index_writer(index_wspecifier); + + TableWriter< fst::VectorFstTplHolder > + index_writer(index_wspecifier); int32 n_done = 0; int32 n_fail = 0; @@ -124,9 +138,10 @@ int main(int argc, char *argv[]) { // factor transducer. KALDI_VLOG(1) << "Arc clustering..."; bool success = false; - success = ClusterLattice(&clat, state_times); + success = kaldi::ClusterLattice(&clat, state_times); if (!success) { - KALDI_WARN << "State id's and alignments do not match for lattice " << key; + KALDI_WARN << "State id's and alignments do not match for lattice " + << key; n_fail++; continue; } @@ -155,7 +170,10 @@ int main(int argc, char *argv[]) { KALDI_VLOG(1) << "Generating factor transducer..."; KwsProductFst factor_transducer; int32 utterance_id = usymtab_reader.Value(key); - success = CreateFactorTransducer(clat, state_times, utterance_id, &factor_transducer); + success = kaldi::CreateFactorTransducer(clat, + state_times, + utterance_id, + &factor_transducer); if (!success) { KALDI_WARN << "Cannot generate factor transducer for lattice " << key; n_fail++; diff --git a/src/kwsbin/print-proxy-keywords.cc b/src/kwsbin/print-proxy-keywords.cc new file mode 100644 index 00000000000..7c75c4baf66 --- /dev/null +++ b/src/kwsbin/print-proxy-keywords.cc @@ -0,0 +1,134 @@ +// kwsbin/print-proxy-keywords.cc +// +// Copyright 2014-2016 Johns Hopkins University (Author: Guoguo Chen, +// Yenda Trmal) +// +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "fstext/fstext-utils.h" +#include "fstext/kaldi-fst-io.h" + +namespace fst { + +bool PrintProxyFstPath(const VectorFst &proxy, + vector > *path, + vector *cost, + StdArc::StateId cur_state, + vector cur_path, + StdArc::Weight cur_cost) { + if (proxy.Final(cur_state) != StdArc::Weight::Zero()) { + // Assume only final state has non-zero cost. + cur_cost = Times(proxy.Final(cur_state), cur_cost); + path->push_back(cur_path); + cost->push_back(cur_cost); + return true; + } + + for (ArcIterator aiter(proxy, cur_state); + !aiter.Done(); aiter.Next()) { + const StdArc &arc = aiter.Value(); + StdArc::Weight temp_cost = Times(arc.weight, cur_cost); + cur_path.push_back(arc.ilabel); + PrintProxyFstPath(proxy, path, cost, + arc.nextstate, cur_path, temp_cost); + cur_path.pop_back(); + } + + return true; +} +} // namespace fst + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + using namespace fst; + typedef kaldi::int32 int32; + typedef kaldi::uint64 uint64; + typedef StdArc::StateId StateId; + typedef StdArc::Weight Weight; + + const char *usage = + "Reads in the proxy keywords FSTs and print them to a file where each\n" + "line is \"kwid w1 w2 .. 2n\"\n" + "\n" + "Usage: \n" + " print-proxy-keywords [options] " + " []]\n" + "e.g.:\n" + " print-proxy-keywords ark:proxy.fsts ark,t:kwlist.txt" + " ark,t:costs.txt\n"; + + ParseOptions po(usage); + + po.Read(argc, argv); + + if (po.NumArgs() < 2 || po.NumArgs() > 3) { + po.PrintUsage(); + exit(1); + } + + std::string proxy_rspecifier = po.GetArg(1), + kwlist_wspecifier = po.GetArg(2), + cost_wspecifier = po.GetOptArg(3); + + + SequentialTableReader proxy_reader(proxy_rspecifier); + TableWriter > kwlist_writer(kwlist_wspecifier); + TableWriter > cost_writer(cost_wspecifier); + + // Start processing the keywords + int32 n_done = 0; + for (; !proxy_reader.Done(); proxy_reader.Next()) { + std::string key = proxy_reader.Key(); + VectorFst proxy = proxy_reader.Value(); + proxy_reader.FreeCurrent(); + + if (proxy.Properties(kAcyclic, true) == 0) { + KALDI_WARN << "Proxy FST has cycles, skip printing paths for " << key; + continue; + } + + vector > paths; + vector costs; + PrintProxyFstPath(proxy, &paths, &costs, proxy.Start(), + vector(), StdArc::Weight::One()); + KALDI_ASSERT(paths.size() == costs.size()); + for (int32 i = 0; i < paths.size(); i++) { + vector kwlist; + vector cost; + cost.push_back(costs[i].Value()); + for (int32 j = 0; j < paths[i].size(); j++) { + kwlist.push_back(paths[i][j]); + } + kwlist_writer.Write(key, kwlist); + if (cost_wspecifier != "") + cost_writer.Write(key, cost); + } + n_done++; + } + + KALDI_LOG << "Done " << n_done << " keywords"; + return (n_done != 0 ? 0 : 1); + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} + + diff --git a/src/kwsbin/transcripts-to-fsts.cc b/src/kwsbin/transcripts-to-fsts.cc index e1a99a29fa2..4e7787f2642 100644 --- a/src/kwsbin/transcripts-to-fsts.cc +++ b/src/kwsbin/transcripts-to-fsts.cc @@ -23,6 +23,20 @@ #include "fstext/kaldi-fst-io.h" #include "fstext/fstext-utils.h" +namespace kaldi { +void SetLinearAcceptorWeight(double cost, fst::VectorFst *fst) { + typedef typename fst::StdArc::Label Label; + typedef typename fst::StdArc::Weight Weight; + typedef typename fst::StdArc::StateId StateId; + + StateId start = fst->Start(); + fst::MutableArcIterator > aiter(fst, start); + fst::StdArc arc = aiter.Value(); + arc.weight = cost; + aiter.SetValue(arc); +} +} // namespace kaldi + int main(int argc, char *argv[]) { try { using namespace kaldi; @@ -31,11 +45,22 @@ int main(int argc, char *argv[]) { typedef kaldi::uint64 uint64; const char *usage = - "Build a linear acceptor for each transcription. Read in the transcriptions in archive\n" - "format and write out the linear acceptors in archive format with the same key.\n" + "Build a linear acceptor for each transcription in the archive. " + "Read in the transcriptions in archive format and write out the linear " + "acceptors in archive format with the same key. The costs of " + "the arcs are set to be zero. The cost of the acceptor can be changed\n" + "by supplying the costs archive. In that case, the first arc's cost\n" + "will be set to the value obtained from the archive, i.e. the total\n" + "cost will be equal to cost. The cost archive can be sparse, i.e.\n" + "does not have to include zero-cost transcriptions. It is prefered\n" + "for the archive to be sorted (for efficiency).\n" "\n" - "Usage: transcripts-to-fsts [options] transcriptions-rspecifier fsts-wspecifier\n" - " e.g.: transcripts-to-fsts ark:train.tra ark:train.fsts\n"; + "Usage: \n" + " transcripts-to-fsts [options] " + " [] \n" + "e.g.: \n" + " transcripts-to-fsts ark:train.tra ark,s,cs,t:costs.txt " + " ark:train.fsts\n"; ParseOptions po(usage); @@ -44,10 +69,16 @@ int main(int argc, char *argv[]) { bool project_input = false; bool project_output = false; - po.Register("left-compose", &left_compose, "Compose the given FST to the left"); - po.Register("right-compose", &right_compose, "Compose the given FST to the right"); - po.Register("project-input", &project_input, "Project input labels if true"); - po.Register("project-output", &project_output, "Project input labels if true"); + po.Register("left-compose", &left_compose, + "Compose the given FST to the left"); + po.Register("right-compose", &right_compose, + "Compose the given FST to the right"); + po.Register("project-input", &project_input, + "Project input labels if true " + "(makes sense only with connection to left|right composition)"); + po.Register("project-output", &project_output, + "Project output labels if true" + "(makes sense only with connection to left|right composition)"); po.Read(argc, argv); @@ -56,11 +87,22 @@ int main(int argc, char *argv[]) { exit(1); } - std::string transcript_rspecifier = po.GetArg(1), - fst_wspecifier = po.GetOptArg(2); + std::string transcript_rspecifier, + costs_rspecifier, + fst_wspecifier; + + if ( po.NumArgs() == 2 ) { + transcript_rspecifier = po.GetArg(1); + fst_wspecifier = po.GetArg(2); + } else { + transcript_rspecifier = po.GetArg(1); + costs_rspecifier = po.GetArg(2); + fst_wspecifier = po.GetArg(3); + } SequentialInt32VectorReader transcript_reader(transcript_rspecifier); + RandomAccessDoubleReader costs_reader(costs_rspecifier); TableWriter fst_writer(fst_wspecifier); // Read the possible given FSTs @@ -81,13 +123,17 @@ int main(int argc, char *argv[]) { VectorFst fst; MakeLinearAcceptor(transcript, &fst); + if (costs_reader.HasKey(key)) { + double cost = costs_reader.Value(key); + SetLinearAcceptorWeight(cost, &fst); + } if (lfst != NULL) { VectorFst composed_fst; Compose(*lfst, fst, &composed_fst); fst = composed_fst; } - + if (rfst != NULL) { VectorFst composed_fst; Compose(fst, *rfst, &composed_fst); @@ -111,7 +157,7 @@ int main(int argc, char *argv[]) { delete rfst; KALDI_LOG << "Done " << n_done << " transcriptions"; - return (n_done != 0 ? 0 : 1); + return (n_done != 0 ? 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); return -1; diff --git a/src/nnet/nnet-various.h b/src/nnet/nnet-various.h index ddd370eaeff..eeef9bc25bf 100644 --- a/src/nnet/nnet-various.h +++ b/src/nnet/nnet-various.h @@ -389,7 +389,7 @@ class AddShift : public UpdatableComponent { shift_data_.AddVec(-lr * learn_rate_coef_, shift_data_grad_); } - void SetLearnRateCoef(float c) { learn_rate_coef_ = c; } + void SetLearnRateCoef(BaseFloat c) { learn_rate_coef_ = c; } protected: CuVector shift_data_; @@ -505,7 +505,7 @@ class Rescale : public UpdatableComponent { scale_data_.AddVec(-lr * learn_rate_coef_, scale_data_grad_); } - void SetLearnRateCoef(float c) { learn_rate_coef_ = c; } + void SetLearnRateCoef(BaseFloat c) { learn_rate_coef_ = c; } protected: CuVector scale_data_; diff --git a/src/nnet3/nnet-example-utils.h b/src/nnet3/nnet-example-utils.h index d1eb85b6d11..debd93599e9 100644 --- a/src/nnet3/nnet-example-utils.h +++ b/src/nnet3/nnet-example-utils.h @@ -162,7 +162,7 @@ struct ChunkTimeInfo { // frame appears in multiple chunks, we want to downweight it // so that the total weight remains 1. (Of course, the calling // code is free to ignore these weights if desired). - std::vector output_weights; + std::vector output_weights; }; diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index c8c6c2d7905..b3906450525 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -50,6 +50,13 @@ CF="\"$CFLAGS -g $(addsw -I $INCDIRS)\"" LDF="\"$LDFLAGS $(addsw -L $LIBDIRS)\"" CCC="$(mtoken CXX "$CXX")" +# Randomly choose between single and double precision +if [[ $(( RANDOM % 2 )) == 1 ]] ; then + DPF="--double-precision=yes" +else + DPF="--double-precision=no" +fi + echo "Building tools..." [Time: $(date)] runvx cd tools runvx make openfst "$CCC" CXXFLAGS="$CF" -j$MAXPAR @@ -57,8 +64,9 @@ cd .. echo "Building src..." [Time: $(date)] runvx cd src -runvx "$CCC" CXXFLAGS="$CF" LDFLAGS="$LDF" ./configure --shared --use-cuda=no --mathlib=OPENBLAS --openblas-root="$XROOT/usr" +runvx "$CCC" CXXFLAGS="$CF" LDFLAGS="$LDF" ./configure --shared --use-cuda=no "$DPF" --mathlib=OPENBLAS --openblas-root="$XROOT/usr" runvx make all -j$MAXPAR +runvx make ext -j$MAXPAR echo "Running tests..." [Time: $(date)] runvx make test -k -j$MAXPAR diff --git a/windows/get_version.pl b/windows/get_version.pl index 2a54891516a..98d4a6b49e6 100755 --- a/windows/get_version.pl +++ b/windows/get_version.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl #=============================================================================== # Copyright 2017 (Author: Yenda Trmal ) -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -39,6 +39,6 @@ }; my $kaldi_ver=<$F>; chomp $kaldi_ver; -print $H "KALDI_VERSION=${kaldi_ver}-win\n"; -close($F); +print $H "#define KALDI_VERSION \"${kaldi_ver}-win\"\n"; +close($F); close($H); From bcc71b67d489a1766922c9caf2a54306755f1861 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20S=C3=B8eborg=20Kirkedal?= Date: Thu, 9 Feb 2017 21:59:16 +0100 Subject: [PATCH 417/530] [egs] sprakbanken recipe: add nnet3 examples. * Added nnet3 recipes copied from tedlium/s5_r2. Modified run_tdnn.sh and run_ivector_common.sh to this setup. Achieves new state-of-the-art on dev set (~11% WER). * Modified run_lstm.sh to work with the sprakbanken data got 11.47 %WER and added the --tries 100 flag to wget in sprak_data_prep.sh so the download does not break due to broken connections * Modified xent and chain training scripts and added tham to run.sh * Removed old unused scripts in local/ and updated the RESULTS file * Removed comments from the tedlium setup from chain and nnet3 training scripts and removed python3 check+install from sprak_data_prep.sh because the script is compatible with python2. * Adressed the comments from @danpovey and @galv --- egs/sprakbanken/s5/RESULTS | 53 ++-- egs/sprakbanken/s5/conf/mfcc_hires.conf | 11 + egs/sprakbanken/s5/conf/online_cmvn.conf | 1 + .../s5/local/chain/compare_wer_general.sh | 50 ++++ egs/sprakbanken/s5/local/chain/run_lstm.sh | 1 + egs/sprakbanken/s5/local/chain/run_tdnn.sh | 1 + .../s5/local/chain/run_tdnn_lstm.sh | 1 + .../s5/local/chain/tuning/run_lstm_1a.sh | 260 +++++++++++++++++ .../s5/local/chain/tuning/run_lstm_1b.sh | 261 +++++++++++++++++ .../s5/local/chain/tuning/run_lstm_1c.sh | 259 +++++++++++++++++ .../s5/local/chain/tuning/run_lstm_1d.sh | 272 ++++++++++++++++++ .../s5/local/chain/tuning/run_lstm_1e.sh | 259 +++++++++++++++++ .../s5/local/chain/tuning/run_tdnn_1a.sh | 202 +++++++++++++ .../s5/local/chain/tuning/run_tdnn_1b.sh | 239 +++++++++++++++ .../s5/local/chain/tuning/run_tdnn_lstm_1a.sh | 258 +++++++++++++++++ egs/sprakbanken/s5/local/cstr_ndx2flist.pl | 54 ---- egs/sprakbanken/s5/local/find_transcripts.pl | 64 ----- egs/sprakbanken/s5/local/flist2scp.pl | 31 -- .../s5/local/generate_example_kws.sh | 110 ------- .../s5/local/generate_results_file.sh | 16 ++ egs/sprakbanken/s5/local/kws_data_prep.sh | 60 ---- egs/sprakbanken/s5/local/nnet3/run_blstm.sh | 48 ++++ .../s5/local/nnet3/run_ivector_common.sh | 238 +++++++++++++++ egs/sprakbanken/s5/local/nnet3/run_lstm.sh | 174 +++++++++++ egs/sprakbanken/s5/local/nnet3/run_tdnn.sh | 102 +++++++ egs/sprakbanken/s5/local/run_basis_fmllr.sh | 42 --- egs/sprakbanken/s5/local/run_kl_hmm.sh | 24 -- egs/sprakbanken/s5/local/run_raw_fmllr.sh | 67 ----- egs/sprakbanken/s5/local/sprak_data_prep.sh | 17 +- .../s5/local/sprak_run_mmi_tri4b.sh | 56 ---- egs/sprakbanken/s5/local/sprak_train_cmulm.sh | 61 ---- egs/sprakbanken/s5/run.sh | 35 ++- egs/sprakbanken_swe/s5/local/data_prep.py | 6 +- .../s5/local/normalize_transcript.py | 17 +- .../s5/local/sprak_data_prep.sh | 34 +-- 35 files changed, 2744 insertions(+), 640 deletions(-) create mode 100644 egs/sprakbanken/s5/conf/mfcc_hires.conf create mode 100644 egs/sprakbanken/s5/conf/online_cmvn.conf create mode 100755 egs/sprakbanken/s5/local/chain/compare_wer_general.sh create mode 120000 egs/sprakbanken/s5/local/chain/run_lstm.sh create mode 120000 egs/sprakbanken/s5/local/chain/run_tdnn.sh create mode 120000 egs/sprakbanken/s5/local/chain/run_tdnn_lstm.sh create mode 100755 egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh create mode 100755 egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh create mode 100755 egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh create mode 100755 egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh create mode 100755 egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh create mode 100755 egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh create mode 100755 egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh create mode 100755 egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh delete mode 100755 egs/sprakbanken/s5/local/cstr_ndx2flist.pl delete mode 100755 egs/sprakbanken/s5/local/find_transcripts.pl delete mode 100755 egs/sprakbanken/s5/local/flist2scp.pl delete mode 100755 egs/sprakbanken/s5/local/generate_example_kws.sh create mode 100755 egs/sprakbanken/s5/local/generate_results_file.sh delete mode 100755 egs/sprakbanken/s5/local/kws_data_prep.sh create mode 100755 egs/sprakbanken/s5/local/nnet3/run_blstm.sh create mode 100755 egs/sprakbanken/s5/local/nnet3/run_ivector_common.sh create mode 100755 egs/sprakbanken/s5/local/nnet3/run_lstm.sh create mode 100755 egs/sprakbanken/s5/local/nnet3/run_tdnn.sh delete mode 100755 egs/sprakbanken/s5/local/run_basis_fmllr.sh delete mode 100644 egs/sprakbanken/s5/local/run_kl_hmm.sh delete mode 100644 egs/sprakbanken/s5/local/run_raw_fmllr.sh delete mode 100755 egs/sprakbanken/s5/local/sprak_run_mmi_tri4b.sh delete mode 100755 egs/sprakbanken/s5/local/sprak_train_cmulm.sh diff --git a/egs/sprakbanken/s5/RESULTS b/egs/sprakbanken/s5/RESULTS index 628507ac85d..d64f006f806 100644 --- a/egs/sprakbanken/s5/RESULTS +++ b/egs/sprakbanken/s5/RESULTS @@ -1,25 +1,28 @@ -%WER 49.19 [ 5318 / 10811, 481 ins, 1511 del, 3326 sub ] exp/mono0a/decode_3g_test1k/wer_9 -%WER 47.28 [ 5111 / 10811, 443 ins, 1489 del, 3179 sub ] exp/mono0a/decode_b3g_test1k/wer_10 -%WER 16.19 [ 1750 / 10811, 397 ins, 323 del, 1030 sub ] exp/sgmm2_5a/decode_3g_test1k/wer_9 -%WER 15.10 [ 1632 / 10811, 404 ins, 305 del, 923 sub ] exp/sgmm2_5b/decode_3g_test1k/wer_9 -%WER 14.94 [ 1615 / 10811, 390 ins, 310 del, 915 sub ] exp/sgmm2_5b/decode_4g_test1k/wer_9 -%WER 14.36 [ 1553 / 10811, 376 ins, 264 del, 913 sub ] exp/sgmm2_5c/decode_3g_test1k/wer_9 -%WER 14.18 [ 1533 / 10811, 367 ins, 266 del, 900 sub ] exp/sgmm2_5c/decode_4g_test1k/wer_9 -%WER 25.61 [ 2769 / 10811, 511 ins, 539 del, 1719 sub ] exp/tri1/decode_3g_test1k/wer_10 -%WER 25.12 [ 2716 / 10811, 444 ins, 571 del, 1701 sub ] exp/tri1/decode_b3g_test1k/wer_11 -%WER 23.81 [ 2574 / 10811, 426 ins, 564 del, 1584 sub ] exp/tri2a/decode_3g_test1k/wer_12 -%WER 23.22 [ 2510 / 10811, 457 ins, 517 del, 1536 sub ] exp/tri2a/decode_3g_test1k_fromlats/wer_11 -%WER 22.18 [ 2398 / 10811, 436 ins, 495 del, 1467 sub ] exp/tri2b/decode_3g_test1k/wer_11 -%WER 21.87 [ 2364 / 10811, 380 ins, 553 del, 1431 sub ] exp/tri2b/decode_3g_test1k_mbr/wer_13 -%WER 18.98 [ 2052 / 10811, 451 ins, 372 del, 1229 sub ] exp/tri3b_20k/decode_3g_test1k/wer_11 -%WER 22.62 [ 2445 / 10811, 468 ins, 460 del, 1517 sub ] exp/tri3b_20k/decode_3g_test1k.si/wer_10 -%WER 19.31 [ 2088 / 10811, 440 ins, 388 del, 1260 sub ] exp/tri3b/decode_3g_test1k/wer_11 -%WER 23.19 [ 2507 / 10811, 435 ins, 520 del, 1552 sub ] exp/tri3b/decode_3g_test1k.si/wer_12 -%WER 19.06 [ 2061 / 10811, 427 ins, 384 del, 1250 sub ] exp/tri3b/decode_4g_test1k/wer_11 -%WER 23.20 [ 2508 / 10811, 447 ins, 520 del, 1541 sub ] exp/tri3b/decode_4g_test1k.si/wer_11 -%WER 17.42 [ 1883 / 10811, 416 ins, 359 del, 1108 sub ] exp/tri4a/decode_3g_test1k/wer_13 -%WER 20.86 [ 2255 / 10811, 403 ins, 473 del, 1379 sub ] exp/tri4a/decode_3g_test1k.si/wer_13 -%WER 17.52 [ 1894 / 10811, 396 ins, 372 del, 1126 sub ] exp/tri4b/decode_3g_test1k/wer_13 -%WER 20.82 [ 2251 / 10811, 399 ins, 471 del, 1381 sub ] exp/tri4b/decode_3g_test1k.si/wer_13 -%WER 17.53 [ 1895 / 10811, 403 ins, 375 del, 1117 sub ] exp/tri4b/decode_4g_test1k/wer_13 -%WER 20.99 [ 2269 / 10811, 438 ins, 436 del, 1395 sub ] exp/tri4b/decode_4g_test1k.si/wer_11 +GMM-based systems +%WER 22.87 [ 24286 / 106172, 3577 ins, 5321 del, 15388 sub ] exp/tri1/decode_fg_dev/wer_12_0.5 +%WER 23.13 [ 24561 / 106172, 3602 ins, 5411 del, 15548 sub ] exp/tri1/decode_tg_dev/wer_12_0.5 +%WER 21.24 [ 22548 / 106172, 4028 ins, 4246 del, 14274 sub ] exp/tri2a/decode_tg_dev/wer_13_0.0 +%WER 19.46 [ 20664 / 106172, 3276 ins, 4332 del, 13056 sub ] exp/tri2b/decode_tg_dev/wer_15_0.5 +%WER 16.80 [ 17839 / 106172, 3238 ins, 3403 del, 11198 sub ] exp/tri3b/decode_fg_dev/wer_17_0.0 +%WER 19.45 [ 20651 / 106172, 3880 ins, 3671 del, 13100 sub ] exp/tri3b/decode_fg_dev.si/wer_15_0.0 +%WER 14.24 [ 9849 / 69165, 2046 ins, 1365 del, 6438 sub ] exp/tri3b/decode_fg_test/wer_16_0.5 +%WER 17.31 [ 11972 / 69165, 2330 ins, 1695 del, 7947 sub ] exp/tri3b/decode_fg_test.si/wer_15_0.5 +%WER 16.94 [ 17984 / 106172, 3361 ins, 3377 del, 11246 sub ] exp/tri3b/decode_tg_dev/wer_16_0.0 +%WER 19.52 [ 20720 / 106172, 3654 ins, 3846 del, 13220 sub ] exp/tri3b/decode_tg_dev.si/wer_17_0.0 +%WER 14.40 [ 9957 / 69165, 2291 ins, 1184 del, 6482 sub ] exp/tri3b/decode_tg_test/wer_16_0.0 +%WER 17.41 [ 12044 / 69165, 2291 ins, 1736 del, 8017 sub ] exp/tri3b/decode_tg_test.si/wer_15_0.5 +nnet3 xent systems +%WER 11.57 [ 12279 / 106172, 2640 ins, 2442 del, 7197 sub ] exp/nnet3/tdnn0_sp/decode_dev/wer_10_0.0 +%WER 9.89 [ 6841 / 69165, 1542 ins, 917 del, 4382 sub ] exp/nnet3/tdnn0_sp/decode_test/wer_11_0.5 +%WER 10.45 [ 11098 / 106172, 2199 ins, 2272 del, 6627 sub ] exp/nnet3/lstm_0_ld5_sp/decode_dev/wer_9_0.0 +%WER 12.34 [ 8533 / 69165, 1740 ins, 1393 del, 5400 sub ] exp/nnet3/lstm_0_ld5_sp/decode_test/wer_11_1.0 +%WER 10.59 [ 11241 / 106172, 2208 ins, 2304 del, 6729 sub ] exp/nnet3/lstm_bidirectional_ld5_sp/decode_dev/wer_9_0.0 +%WER 12.43 [ 8596 / 69165, 1742 ins, 1426 del, 5428 sub ] exp/nnet3/lstm_bidirectional_ld5_sp/decode_test/wer_11_1.0 +%WER 9.18 [ 9747 / 106172, 1987 ins, 1913 del, 5847 sub ] exp/nnet3/lstm_bidirectional_sp/decode_dev/wer_8_0.0 +Nnet3 chain systems +%WER 8.48 [ 9001 / 106172, 1559 ins, 1624 del, 5818 sub ] exp/chain/tdnn_lstm1a_sp_bi/decode_dev/wer_9_0.0 +%WER 7.20 [ 4981 / 69165, 915 ins, 402 del, 3664 sub ] exp/chain/tdnn_lstm1a_sp_bi/decode_test/wer_8_1.0 +%WER 10.00 [ 10619 / 106172, 1980 ins, 1896 del, 6743 sub ] exp/chain/tdnn_sp_bi/decode_dev/wer_9_0.0 +%WER 8.58 [ 5936 / 69165, 1059 ins, 667 del, 4210 sub ] exp/chain/tdnn_sp_bi/decode_test/wer_9_1.0 +%WER 9.39 [ 9969 / 106172, 1624 ins, 1912 del, 6433 sub ] exp/chain/lstm1e_sp_bi/decode_dev/wer_8_0.5 +%WER 7.72 [ 5341 / 69165, 1002 ins, 497 del, 3842 sub ] exp/chain/lstm1e_sp_bi/decode_test/wer_8_0.5 diff --git a/egs/sprakbanken/s5/conf/mfcc_hires.conf b/egs/sprakbanken/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..b5aeaafe704 --- /dev/null +++ b/egs/sprakbanken/s5/conf/mfcc_hires.conf @@ -0,0 +1,11 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. + # Needs to be this low to be sensitive to creaky voice +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/sprakbanken/s5/conf/online_cmvn.conf b/egs/sprakbanken/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/sprakbanken/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/sprakbanken/s5/local/chain/compare_wer_general.sh b/egs/sprakbanken/s5/local/chain/compare_wer_general.sh new file mode 100755 index 00000000000..4074b0c12c3 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/compare_wer_general.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Prints a table makes it easy to compare WER and objective values across nnet3 +# and chain training runs + +echo -n "System " +for x in "$@"; do printf "% 10s" $x; done +echo + +echo -n "WER on dev(tg) " +for x in "$@"; do + wer=$(grep WER ${x}/decode_dev/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "WER on test(tg) " +for x in "$@"; do + wer=$(grep WER ${x}/decode_test/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer +done +echo + +echo -n "Final train prob " +for x in "$@"; do + prob=$(grep Overall ${x}/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "Final valid prob " +for x in "$@"; do + prob=$(grep Overall ${x}/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "Final train prob (xent) " +for x in "$@"; do + prob=$(grep Overall ${x}/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "Final valid prob (xent) " +for x in "$@"; do + prob=$(grep Overall ${x}/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo diff --git a/egs/sprakbanken/s5/local/chain/run_lstm.sh b/egs/sprakbanken/s5/local/chain/run_lstm.sh new file mode 120000 index 00000000000..afba2a1ce94 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/run_lstm.sh @@ -0,0 +1 @@ +tuning/run_lstm_1e.sh \ No newline at end of file diff --git a/egs/sprakbanken/s5/local/chain/run_tdnn.sh b/egs/sprakbanken/s5/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..61f8f499182 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1b.sh \ No newline at end of file diff --git a/egs/sprakbanken/s5/local/chain/run_tdnn_lstm.sh b/egs/sprakbanken/s5/local/chain/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh new file mode 100755 index 00000000000..3ea61800869 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh @@ -0,0 +1,260 @@ +#!/bin/bash + +# run_lstm_1a.sh is a first attempt at an LSTM system, based on xconfigs-- it's +# probably not very well configured, e.g. the num-params might be too small. +# recurrent-projection-dim is less than non-recurrent-projection-dim due to an +# oversight. + +# comparison with TDNN system (WER is worse): +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1b_sp_bi exp/chain_cleaned/lstm1a_sp_bi +# System tdnn1b_sp_bi lstm1a_sp_bi +# WER on dev(orig) 10.2 10.8 +# WER on dev(rescored) 9.6 10.2 +# WER on test(orig) 9.7 10.0 +# WER on test(rescored) 9.2 9.6 +# Final train prob -0.0928 -0.0848 +# Final valid prob -0.1178 -0.1098 +# Final train prob (xent) -1.4666 -1.1692 +# Final valid prob (xent) -1.5473 -1.2520 + + +## how you run this (note: this assumes that the run_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_lstm.sh + +# without cleanup: +# local/chain/run_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# This script (run_lstm_1a) is like run_tdnn_1b.sh except modified to use an LSTM +# configuration (some aspects borrowed from egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh). + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +lstm_affix=1a #affix for LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh new file mode 100755 index 00000000000..a22d4eb53d7 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh @@ -0,0 +1,261 @@ +#!/bin/bash + +# run_lstm_1b.sh is as run_lstm_1a.sh but replacing the projected LSTM +# with a regular LSTM. This is done in order to have an LSTM-only baseline +# for the 'fast lstm', where we need to test the regular as well as projected +# LSTM layers. + +# It's worse than the LSTMP, as expected, due to more overtraining. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/lstm1b_sp_bi +# exp/chain_cleaned/lstm1b_sp_bi: num-iters=253 nj=2..12 num-params=9.6M dim=40+100->3607 combine=-0.09->-0.09 xent:train/valid[167,252,final]=(-1.24,-1.14,-1.14/-1.35,-1.28,-1.28) logprob:train/valid[167,252,final]=(-0.092,-0.079,-0.079/-0.119,-0.110,-0.110) + +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1a_sp_bi exp/chain_cleaned/lstm1b_sp_bi +# System lstm1a_sp_bi lstm1b_sp_bi +# WER on dev(orig) 10.8 11.3 +# WER on dev(rescored) 10.2 10.7 +# WER on test(orig) 10.0 10.6 +# WER on test(rescored) 9.6 10.0 +# Final train prob -0.0848 -0.0787 +# Final valid prob -0.1098 -0.1104 +# Final train prob (xent) -1.1692 -1.1442 +# Final valid prob (xent) -1.2520 -1.2782 + + +## how you run this (note: this assumes that the run_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_lstm.sh + +# without cleanup: +# local/chain/run_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +lstm_affix=1b #affix for LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstm-layer name=lstm1 cell-dim=512 delay=-3 + lstm-layer name=lstm2 cell-dim=512 delay=-3 + lstm-layer name=lstm3 cell-dim=512 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh new file mode 100755 index 00000000000..718992fc909 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh @@ -0,0 +1,259 @@ +#!/bin/bash + + +# run_lstm_1c.sh is like run_lstm_1b.sh but changing from the old LSTM +# implementation to our new 'fast' LSTM layer. The xconfig changes from +# 'lstm-layer' to 'fast-lstm-layer'. It's as good as or maybe slightly better +# than the old setup. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/lstm1c_sp_bi +# exp/chain_cleaned/lstm1c_sp_bi: num-iters=253 nj=2..12 num-params=9.6M dim=40+100->3607 combine=-0.09->-0.09 xent:train/valid[167,252,final]=(-1.26,-1.14,-1.14/-1.34,-1.27,-1.27) logprob:train/valid[167,252,final]=(-0.092,-0.078,-0.078/-0.116,-0.111,-0.111) + + +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1b_sp_bi exp/chain_cleaned/lstm1c_sp_bi +# System lstm1b_sp_bi lstm1c_sp_bi +# WER on dev(orig) 11.3 11.2 +# WER on dev(rescored) 10.7 10.5 +# WER on test(orig) 10.6 10.6 +# WER on test(rescored) 10.0 10.1 +# Final train prob -0.0787 -0.0777 +# Final valid prob -0.1104 -0.1108 +# Final train prob (xent) -1.1442 -1.1445 +# Final valid prob (xent) -1.2782 -1.2692 + +## how you run this (note: this assumes that the run_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_lstm.sh + +# without cleanup: +# local/chain/run_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +lstm_affix=1c #affix for LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstm-layer name=lstm1 cell-dim=512 delay=-3 + fast-lstm-layer name=lstm2 cell-dim=512 delay=-3 + fast-lstm-layer name=lstm3 cell-dim=512 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh new file mode 100755 index 00000000000..8cf543f5096 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh @@ -0,0 +1,272 @@ +#!/bin/bash + + +# run_lstm_1d.sh is like run_lstm_1c.sh, but switching back to projected +# LSTM (LSTMP)... the configuration is the same 1a (but unlike 1a it uses +# the fast lstm layer). Note: 1a and 1d are a little broken +# in that their non-recurrent-projection-dim are twice the recurrent-projection-dim, +# but it's better for comparison purposes to have this the same as 1a. + +# As you can see, compared to 1a, 1d is 0.3% to 0.5% better absolute; +# this comes with the upgrade to 'fast' LSTM. There were differences to how +# the gradient truncation is done, maybe that's it; also there are +# other differences, like how the update of the diagonal matrices +# are done, and the integration of 4 matrix multiplies into one which +# will affect the natural gradient. Anyway, we're not complaining. + + +# steps/info/chain_dir_info.pl exp/chain_cleaned/lstm1d_sp_bi +# exp/chain_cleaned/lstm1d_sp_bi: num-iters=253 nj=2..12 num-params=6.4M dim=40+100->3607 combine=-0.09->-0.09 xent:train/valid[167,252,final]=(-1.21,-1.13,-1.13/-1.29,-1.22,-1.23) logprob:train/valid[167,252,final]=(-0.092,-0.083,-0.081/-0.114,-0.105,-0.105) + +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1a_sp_bi exp/chain_cleaned/lstm1c_sp_bi exp/chain_cleaned/lstm1d_sp_bi +# System lstm1a_sp_bi lstm1c_sp_bi lstm1d_sp_bi +# WER on dev(orig) 10.8 11.2 10.3 +# WER on dev(rescored) 10.2 10.5 9.8 +# WER on test(orig) 10.0 10.6 9.7 +# WER on test(rescored) 9.6 10.1 9.2 +# Final train prob -0.0848 -0.0777 -0.0812 +# Final valid prob -0.1098 -0.1108 -0.1049 +# Final train prob (xent) -1.1692 -1.1445 -1.1334 +# Final valid prob (xent) -1.2520 -1.2692 -1.2263 + + + + +## how you run this (note: this assumes that the run_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_lstm.sh + +# without cleanup: +# local/chain/run_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +lstm_affix=1d #affix for LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=256 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh new file mode 100755 index 00000000000..11af644e765 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh @@ -0,0 +1,259 @@ +#!/bin/bash + +# (From the original script: +# run_lstm_1e.sh is like run_lstm_1d.sh, but reducing non-recurrent-projection-dim +# from 256 to 128 (fixes an earlier mistake). +# However, this doesn't improve WER results-- see below. Probably the system +# has too few parameters. Anyway we probably won't tune this further +# as LSTMs by themselves aren't expected to perform that well: +# see run_tdnn_lstm_1a.sh and others in that sequence.) + +# steps/info/chain_dir_info.pl exp/chain/lstm1e_sp_bi +# exp/chain/lstm1e_sp_bi: num-iters=384 nj=2..12 num-params=4.7M dim=40+100->3557 combine=-0.07->-0.07 xent:train/valid[255,383,final]=(-0.755,-0.703,-0.712/-0.793,-0.755,-0.761) logprob:train/valid[255,383,final]=(-0.060,-0.053,-0.053/-0.071,-0.066,-0.065) + +# local/chain/compare_wer_general.sh exp/chain/tdnn_sp_bi/ exp/chain/lstm1e_sp_bi/ +# System exp/chain/tdnn_sp_bi/exp/chain/lstm1e_sp_bi/ +# WER on dev(tg) 10.00 9.39 +# WER on test(tg) 8.58 7.72 +# Final train prob -0.0642 -0.0528 +# Final valid prob -0.0788 -0.0651 +# Final train prob (xent) -0.9113 -0.7117 +# Final valid prob (xent) -0.9525 -0.7607 + +## how you run this (note: this assumes that the run_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default: +# local/chain/run_lstm.sh + +# note, that you should probably adjust parallelisation to your setup +# if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# note, if you have already run one of the chain nnet3 systems, +# you may want to run with --stage 17. + + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=7 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train +gmm=tri3b # the gmm for the target data +num_threads_ubm=32 +nnet3_affix= # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +lstm_affix=1e #affix for LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_tg $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..21e3edac5f3 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +# This is the original TDNN script before we introduced xconfigs. +# See run_tdnn_1b.sh for comparative results. + + +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# without cleanup: +# local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run the corresponding non-chain nnet3 system +# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs"; + + steps/nnet3/tdnn/make_configs.py \ + --self-repair-scale-nonlinearity 0.00001 \ + --feat-dir data/${train_set}_sp_hires_comb \ + --ivector-dir $train_ivector_dir \ + --tree-dir $tree_dir \ + --relu-dim 450 \ + --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ + --use-presoftmax-prior-scale false \ + --xent-regularize 0.1 \ + --xent-separate-forward-affine true \ + --include-log-softmax false \ + --final-layer-normalize-target 1.0 \ + $dir/configs || exit 1; +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh new file mode 100755 index 00000000000..14973a5d029 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh @@ -0,0 +1,239 @@ +#!/bin/bash + +# steps/info/chain_dir_info.pl exp/chain/tdnn_sp_bi/ +# exp/chain/tdnn_sp_bi/: num-iters=384 nj=2..12 num-params=7.0M dim=40+100->3557 combine=-0.08->-0.08 xent:train/valid[255,383,final]=(-0.954,-0.911,-0.911/-0.979,-0.953,-0.952) logprob:train/valid[255,383,final]=(-0.071,-0.064,-0.064/-0.084,-0.079,-0.079) + +# local/chain/compare_wer_general.sh exp/nnet3/tdnn0_sp exp/chain/tdnn_sp_bi +# System exp/nnet3/tdnn0_spexp/chain/tdnn_sp_bi +# WER on dev(tg) 11.57 10.00 +# WER on test(tg) 9.89 8.58 +# Final train prob -0.79890.7538 -0.0642 +# Final valid prob -0.77280.7590 -0.0788 +# Final train prob (xent) -0.9113 +# Final valid prob (xent) -0.9525 + +## how you run this (note: this assumes that the run_tdnn.sh soft link points here; +## otherwise call it directly in its location). +# by default: +# local/chain/run_tdnn.sh + +# note, that you should probably adjust parallelisation to your setup +# if you have already run the corresponding non-chain nnet3 system +# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# This script is like run_tdnn_1a.sh except it uses an xconfig-based mechanism +# to get the configuration. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=7 +min_seg_len=1.55 +xent_regularize=0.1 +train_set=train +gmm=tri3b # the gmm for the target data +num_threads_ubm=32 +nnet3_affix= # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_tg $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..7f7f263a741 --- /dev/null +++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,258 @@ +#!/bin/bash + +# steps/info/chain_dir_info.pl exp/chain/tdnn_lstm1a_sp_bi/ +# exp/chain/tdnn_lstm1a_sp_bi/: num-iters=384 nj=2..12 num-params=9.5M dim=40+100->3557 combine=-0.05->-0.05 xent:train/valid[255,383,final]=(-0.579,-0.518,-0.523/-0.651,-0.616,-0.619) logprob:train/valid[255,383,final]=(-0.046,-0.038,-0.038/-0.063,-0.060,-0.059) + +# local/chain/compare_wer_general.sh exp/chain/tdnn_sp_bi/ exp/chain/lstm1e_sp_bi/ exp/chain/tdnn_lstm1a_sp_bi/ +# System exp/chain/tdnn_sp_bi/exp/chain/lstm1e_sp_bi/exp/chain/tdnn_lstm1a_sp_bi/ +# WER on dev(tg) 10.00 9.39 8.48 +# WER on test(tg) 8.58 7.72 7.20 +# Final train prob -0.0642 -0.0528 -0.0378 +# Final valid prob -0.0788 -0.0651 -0.0595 +# Final train prob (xent) -0.9113 -0.7117 -0.5228 +# Final valid prob (xent) -0.9525 -0.7607 -0.6185 + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default: +# local/chain/run_tdnn_lstm.sh + +# note, that you may want to adjust parallelisation to your setup +# if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=7 +min_seg_len=1.55 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +xent_regularize=0.1 +train_set=train +gmm=tri3b # the gmm for the target data +num_threads_ubm=32 +nnet3_affix= # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk=150 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1a #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_tg $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/sprakbanken/s5/local/cstr_ndx2flist.pl b/egs/sprakbanken/s5/local/cstr_ndx2flist.pl deleted file mode 100755 index d19db421a9f..00000000000 --- a/egs/sprakbanken/s5/local/cstr_ndx2flist.pl +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env perl - -# Copyright 2010-2011 Microsoft Corporation - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - -# This is modified from the script in standard Kaldi recipe to account -# for the way the WSJ data is structured on the Edinburgh systems. -# - Arnab Ghoshal, 12/1/12 - -# This program takes as its standard input an .ndx file from the WSJ corpus that looks -# like this: -#;; File: tr_s_wv1.ndx, updated 04/26/94 -#;; -#;; Index for WSJ0 SI-short Sennheiser training data -#;; Data is read WSJ sentences, Sennheiser mic. -#;; Contains 84 speakers X (~100 utts per speaker MIT/SRI and ~50 utts -#;; per speaker TI) = 7236 utts -#;; -#11_1_1:wsj0/si_tr_s/01i/01ic0201.wv1 -#11_1_1:wsj0/si_tr_s/01i/01ic0202.wv1 -#11_1_1:wsj0/si_tr_s/01i/01ic0203.wv1 - -# and as command-line argument it takes the names of the WSJ disk locations, e.g.: -# /group/corpora/public/wsjcam0/data on DICE machines. -# It outputs a list of absolute pathnames. - -$wsj_dir = $ARGV[0]; - -while(){ - if(m/^;/){ next; } # Comment. Ignore it. - else { - m/^([0-9_]+):\s*(\S+)$/ || die "Could not parse line $_"; - $filename = $2; # as a subdirectory of the distributed disk. - if ($filename !~ m/\.wv1$/) { $filename .= ".wv1"; } - $filename = "$wsj_dir/$filename"; - if (-e $filename) { - print "$filename\n"; - } else { - print STDERR "File $filename found in the index but not on disk\n"; - } - } -} diff --git a/egs/sprakbanken/s5/local/find_transcripts.pl b/egs/sprakbanken/s5/local/find_transcripts.pl deleted file mode 100755 index 6429411b864..00000000000 --- a/egs/sprakbanken/s5/local/find_transcripts.pl +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2010-2011 Microsoft Corporation - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - - - -# This program takes on its standard input a list of utterance -# id's, one for each line. (e.g. 4k0c030a is a an utterance id). -# It takes as -# Extracts from the dot files the transcripts for a given -# dataset (represented by a file list). -# - -@ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts"; -$dot_flist = shift @ARGV; - -open(L, "<$dot_flist") || die "Opening file list of dot files: $dot_flist\n"; -while(){ - chop; - m:\S+/(\w{6})00.dot: || die "Bad line in dot file list: $_"; - $spk = $1; - $spk2dot{$spk} = $_; -} - - - -while(){ - chop; - $uttid = $_; - $uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_"; - $spk = $1; - if($spk ne $curspk) { - %utt2trans = { }; # Don't keep all the transcripts in memory... - $curspk = $spk; - $dotfile = $spk2dot{$spk}; - defined $dotfile || die "No dot file for speaker $spk\n"; - open(F, "<$dotfile") || die "Error opening dot file $dotfile\n"; - while() { - $_ =~ m:(.+)\((\w{8})\)\s*$: || die "Bad line $_ in dot file $dotfile (line $.)\n"; - $trans = $1; - $utt = $2; - $utt2trans{$utt} = $trans; - } - } - if(!defined $utt2trans{$uttid}) { - print STDERR "No transcript for utterance $uttid (current dot file is $dotfile)\n"; - } else { - print "$uttid $utt2trans{$uttid}\n"; - } -} - - diff --git a/egs/sprakbanken/s5/local/flist2scp.pl b/egs/sprakbanken/s5/local/flist2scp.pl deleted file mode 100755 index 234e4add1ed..00000000000 --- a/egs/sprakbanken/s5/local/flist2scp.pl +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env perl -# Copyright 2010-2011 Microsoft Corporation - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -# MERCHANTABLITY OR NON-INFRINGEMENT. -# See the Apache 2 License for the specific language governing permissions and -# limitations under the License. - - -# takes in a file list with lines like -# /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1 -# and outputs an scp in kaldi format with lines like -# 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1 -# (the first thing is the utterance-id, which is the same as the basename of the file. - - -while(<>){ - m:^\S+/(\w+)\.[wW][vV]1$: || die "Bad line $_"; - $id = $1; - $id =~ tr/A-Z/a-z/; # Necessary because of weirdness on disk 13-16.1 (uppercase filenames) - print "$id $_"; -} - diff --git a/egs/sprakbanken/s5/local/generate_example_kws.sh b/egs/sprakbanken/s5/local/generate_example_kws.sh deleted file mode 100755 index 2c849438192..00000000000 --- a/egs/sprakbanken/s5/local/generate_example_kws.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) -# Apache 2.0. - - -if [ $# -ne 2 ]; then - echo "Usage: local/generate_example_kws.sh " - echo " e.g.: local/generate_example_kws.sh data/test_eval92/ " - exit 1; -fi - -datadir=$1; -kwsdatadir=$2; -text=$datadir/text; - -mkdir -p $kwsdatadir; - -# Generate keywords; we generate 20 unigram keywords with at least 20 counts, -# 20 bigram keywords with at least 10 counts and 10 trigram keywords with at -# least 5 counts. -cat $text | perl -e ' - %unigram = (); - %bigram = (); - %trigram = (); - while(<>) { - chomp; - @col=split(" ", $_); - shift @col; - for($i = 0; $i < @col; $i++) { - # unigram case - if (!defined($unigram{$col[$i]})) { - $unigram{$col[$i]} = 0; - } - $unigram{$col[$i]}++; - - # bigram case - if ($i < @col-1) { - $word = $col[$i] . " " . $col[$i+1]; - if (!defined($bigram{$word})) { - $bigram{$word} = 0; - } - $bigram{$word}++; - } - - # trigram case - if ($i < @col-2) { - $word = $col[$i] . " " . $col[$i+1] . " " . $col[$i+2]; - if (!defined($trigram{$word})) { - $trigram{$word} = 0; - } - $trigram{$word}++; - } - } - } - - $max_count = 100; - $total = 20; - $current = 0; - $min_count = 20; - while ($current < $total && $min_count <= $max_count) { - foreach $x (keys %unigram) { - if ($unigram{$x} == $min_count) { - print "$x\n"; - $unigram{$x} = 0; - $current++; - } - if ($current == $total) { - last; - } - } - $min_count++; - } - - $total = 20; - $current = 0; - $min_count = 4; - while ($current < $total && $min_count <= $max_count) { - foreach $x (keys %bigram) { - if ($bigram{$x} == $min_count) { - print "$x\n"; - $bigram{$x} = 0; - $current++; - } - if ($current == $total) { - last; - } - } - $min_count++; - } - - $total = 10; - $current = 0; - $min_count = 3; - while ($current < $total && $min_count <= $max_count) { - foreach $x (keys %trigram) { - if ($trigram{$x} == $min_count) { - print "$x\n"; - $trigram{$x} = 0; - $current++; - } - if ($current == $total) { - last; - } - } - $min_count++; - } - ' > $kwsdatadir/raw_keywords.txt - -echo "Keywords generation succeeded" diff --git a/egs/sprakbanken/s5/local/generate_results_file.sh b/egs/sprakbanken/s5/local/generate_results_file.sh new file mode 100755 index 00000000000..4659c36fc5a --- /dev/null +++ b/egs/sprakbanken/s5/local/generate_results_file.sh @@ -0,0 +1,16 @@ + +echo "GMM-based systems" +for x in exp/*/decode*;do + [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; +done + +echo "nnet3 xent systems" +for x in exp/nnet3/tdnn*/decode* exp/nnet3/lstm*/decode* ;do + [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; +done + +echo "Nnet3 chain systems" +for x in exp/chain/tdnn*/decode* exp/chain/lstm*/decode*;do + [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; +done + diff --git a/egs/sprakbanken/s5/local/kws_data_prep.sh b/egs/sprakbanken/s5/local/kws_data_prep.sh deleted file mode 100755 index 5222a88c9ef..00000000000 --- a/egs/sprakbanken/s5/local/kws_data_prep.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) -# Apache 2.0. - - -if [ $# -ne 3 ]; then - echo "Usage: local/kws_data_prep.sh " - echo " e.g.: local/kws_data_prep.sh data/lang_test_bd_tgpr/ data/test_eval92/ data/kws/" - exit 1; -fi - -langdir=$1; -datadir=$2; -kwsdatadir=$3; - -mkdir -p $kwsdatadir; - -# Create keyword id for each keyword -cat $kwsdatadir/raw_keywords.txt | perl -e ' - $idx=1; - while(<>) { - chomp; - printf "WSJ-%04d $_\n", $idx; - $idx++; - }' > $kwsdatadir/keywords.txt - -# Map the keywords to integers; note that we remove the keywords that -# are not in our $langdir/words.txt, as we won't find them anyway... -cat $kwsdatadir/keywords.txt | \ - sym2int.pl --map-oov 0 -f 2- $langdir/words.txt | \ - grep -v " 0 " | grep -v " 0$" > $kwsdatadir/keywords.int - -# Compile keywords into FSTs -transcripts-to-fsts ark:$kwsdatadir/keywords.int ark:$kwsdatadir/keywords.fsts - -# Create utterance id for each utterance; Note that by "utterance" here I mean -# the keys that will appear in the lattice archive. You may have to modify here -cat $datadir/wav.scp | \ - awk '{print $1}' | \ - sort | uniq | perl -e ' - $idx=1; - while(<>) { - chomp; - print "$_ $idx\n"; - $idx++; - }' > $kwsdatadir/utter_id - -# Map utterance to the names that will appear in the rttm file. You have -# to modify the commands below accoring to your rttm file. In the WSJ case -# since each file is an utterance, we assume that the actual file names will -# be the "names" in the rttm, so the utterance names map to themselves. -cat $datadir/wav.scp | \ - awk '{print $1}' | \ - sort | uniq | perl -e ' - while(<>) { - chomp; - print "$_ $_\n"; - }' > $kwsdatadir/utter_map; -echo "Kws data preparation succeeded" diff --git a/egs/sprakbanken/s5/local/nnet3/run_blstm.sh b/egs/sprakbanken/s5/local/nnet3/run_blstm.sh new file mode 100755 index 00000000000..f29731397fe --- /dev/null +++ b/egs/sprakbanken/s5/local/nnet3/run_blstm.sh @@ -0,0 +1,48 @@ +stage=0 +train_stage=-10 +affix=bidirectional +nnet3_affix= +common_egs_dir= +remove_egs=true +train_set=train +gmm=tri3b + + +# BLSTM params +cell_dim=1024 +rp_dim=128 +nrp_dim=128 +chunk_left_context=40 +chunk_right_context=40 + +# training options +srand=0 +num_jobs_initial=3 +num_jobs_final=15 +samples_per_iter=20000 +num_epochs=6 +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +local/nnet3/run_lstm.sh --affix $affix \ + --srand $srand \ + --stage $stage \ + --train-stage $train_stage \ + --train-set $train_set \ + --gmm $gmm \ + --lstm-delay " [-1,1] [-2,2] [-3,3] " \ + --label-delay 0 \ + --cell-dim $cell_dim \ + --recurrent-projection-dim $rp_dim \ + --non-recurrent-projection-dim $nrp_dim \ + --common-egs-dir "$common_egs_dir" \ + --chunk-left-context $chunk_left_context \ + --chunk-right-context $chunk_right_context \ + --num-jobs-initial $num_jobs_initial \ + --num-jobs-final $num_jobs_final \ + --samples-per-iter $samples_per_iter \ + --num-epochs $num_epochs \ + --remove-egs $remove_egs + diff --git a/egs/sprakbanken/s5/local/nnet3/run_ivector_common.sh b/egs/sprakbanken/s5/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..9a730348dfa --- /dev/null +++ b/egs/sprakbanken/s5/local/nnet3/run_ivector_common.sh @@ -0,0 +1,238 @@ +#!/bin/bash + +set -e -o pipefail + + +# This script is called from local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh (and may eventually +# be called by more scripts). It contains the common feature preparation and iVector-related parts +# of the script. See those scripts for examples of usage. + + +stage=0 +nj=30 +min_seg_len=1.55 # min length in seconds... we do this because chain training + # will discard segments shorter than 1.5 seconds. Must remain in sync + # with the same option given to prepare_lores_feats_and_alignments.sh +train_set=train # you might set this to e.g. train. +gmm=tri3b # This specifies a GMM-dir from the features of the type you're training the system on; + # it should contain alignments for 'train_set'. + +num_threads_ubm=32 +nnet3_affix=_n3 # affix for exp/nnet3 directory to put iVector stuff in, so it + # becomes exp/nnet3_cleaned or whatever. + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp_comb + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + + + +if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then + echo "$0: data/${train_set}_sp_hires/feats.scp already exists." + echo " ... Please either remove it, or rerun this script with stage > 2." + exit 1 +fi + + +if [ $stage -le 1 ]; then + echo "$0: preparing directory for speed-perturbed data" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: creating high-resolution MFCC features" + + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/sprakbanken-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp dev test; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires + + for datadir in ${train_set}_sp dev test; do + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires + steps/compute_cmvn_stats.sh data/${datadir}_hires + utils/fix_data_dir.sh data/${datadir}_hires + done +fi + +if [ $stage -le 3 ]; then + echo "$0: combining short segments of speed-perturbed high-resolution MFCC training data" + # we have to combine short segments or we won't be able to train chain models + # on those segments. + utils/data/combine_short_segments.sh \ + data/${train_set}_sp_hires $min_seg_len data/${train_set}_sp_hires_comb + + # just copy over the CMVN to avoid having to recompute it. + cp data/${train_set}_sp_hires/cmvn.scp data/${train_set}_sp_hires_comb/ + utils/fix_data_dir.sh data/${train_set}_sp_hires_comb/ +fi + +if [ $stage -le 4 ]; then + echo "$0: selecting segments of hires training data that were also present in the" + echo " ... original training data." + + # note, these data-dirs are temporary; we put them in a sub-directory + # of the place where we'll make the alignments. + temp_data_root=exp/nnet3${nnet3_affix}/tri5 + mkdir -p $temp_data_root + + utils/data/subset_data_dir.sh --utt-list data/${train_set}/feats.scp \ + data/${train_set}_sp_hires $temp_data_root/${train_set}_hires + + # note: essentially all the original segments should be in the hires data. + n1=$(wc -l /dev/null || true + ( + steps/nnet3/decode.sh --nj 12 --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_dev_hires \ + ${graph_dir} data/dev_hires ${dir}/decode_dev || exit 1 + steps/nnet3/decode.sh --nj 7 --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + ${graph_dir} data/test_hires ${dir}/decode_test || exit 1 + ) || touch $dir/.error & + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/sprakbanken/s5/local/nnet3/run_tdnn.sh b/egs/sprakbanken/s5/local/nnet3/run_tdnn.sh new file mode 100755 index 00000000000..45794ac9ee4 --- /dev/null +++ b/egs/sprakbanken/s5/local/nnet3/run_tdnn.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +# This is the standard "tdnn" system, built in nnet3 + +# by default: +# local/nnet3/run_tdnn.sh + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=7 +min_seg_len=1.55 +train_set=train +gmm=tri3b # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix= # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix= #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" +remove_egs=true +relu_dim=750 +num_epochs=3 + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat </dev/null + ( + steps/nnet3/decode.sh --nj 7 --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ + ${graph_dir} data/test_hires ${dir}/decode_test || exit 1 + steps/nnet3/decode.sh --nj 12 --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_dev_hires \ + ${graph_dir} data/dev_hires ${dir}/decode_dev || exit 1 + ) || touch $dir/.error & + + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/sprakbanken/s5/local/run_basis_fmllr.sh b/egs/sprakbanken/s5/local/run_basis_fmllr.sh deleted file mode 100755 index 3c04e480a0a..00000000000 --- a/egs/sprakbanken/s5/local/run_basis_fmllr.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -. cmd.sh - -mfccdir=mfcc - -# Make "per-utterance" versions of the test sets where the speaker -# information corresponds to utterances-- to demonstrate adaptation on -# short utterances, particularly for basis fMLLR -for x in test_eval92 test_eval93 test_dev93 ; do - y=${x}_utt - rm -r data/$y - cp -r data/$x data/$y - cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk; - cp data/$y/utt2spk data/$y/spk2utt; - steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; -done - - - # basis fMLLR experiments. - # First a baseline: decode per-utterance with normal fMLLR. -steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_dev93_utt exp/tri3b/decode_tgpr_dev93_utt || exit 1; -steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_eval92_utt exp/tri3b/decode_tgpr_eval92_utt || exit 1; - - # get the fMLLR basis. -steps/get_fmllr_basis.sh --cmd "$train_cmd" data/train_si84 data/lang exp/tri3b - - # decoding tri3b with basis fMLLR -steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_dev93 exp/tri3b/decode_tgpr_dev93_basis || exit 1; -steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_eval92 exp/tri3b/decode_tgpr_eval92_basis || exit 1; - - # The same, per-utterance. -steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_dev93_utt exp/tri3b/decode_tgpr_dev93_basis_utt || exit 1; -steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri3b/graph_tgpr data/test_eval92_utt exp/tri3b/decode_tgpr_eval92_basis_utt || exit 1; - - diff --git a/egs/sprakbanken/s5/local/run_kl_hmm.sh b/egs/sprakbanken/s5/local/run_kl_hmm.sh deleted file mode 100644 index 9e7679a7675..00000000000 --- a/egs/sprakbanken/s5/local/run_kl_hmm.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# Copyright 2013 Idiap Research Institute (Author: David Imseng) -# Apache 2.0 - -. cmd.sh - -states=20000 -dir=exp/tri4b_pretrain-dbn_dnn/ - -steps/kl_hmm/build_tree.sh --cmd "$big_memory_cmd" --thresh -1 --nnet_dir exp/tri4b_pretrain-dbn_dnn/ \ - ${states} data-fmllr-tri4b/train_si284 data/lang exp/tri4b_ali_si284 exp/tri4b-${states} || exit 1; - -utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri4b-${states} exp/tri4b-${states}/graph_bd_tgpr || exit 1; - -steps/kl_hmm/train_kl_hmm.sh --nj 30 --cmd "$big_memory_cmd" --model exp/tri4b-${states}/final.mdl data-fmllr-tri4b/train_si284 exp/tri4b-${states} $dir/kl-hmm-${states} - -steps/kl_hmm/decode_kl_hmm.sh --nj 10 --cmd "$big_memory_cmd" --acwt 0.1 --nnet $dir/kl-hmm-${states}/final.nnet --model exp/tri4b-${states}/final.mdl \ - --config conf/decode_dnn.config exp/tri4b-${states}/graph_bd_tgpr/ data-fmllr-tri4b/test_dev93 $dir/decode_dev93_kl-hmm-bd-${states}_tst - -steps/kl_hmm/decode_kl_hmm.sh --nj 8 --cmd "$big_memory_cmd" --acwt 0.1 --nnet $dir/kl-hmm-${states}/final.nnet --model exp/tri4b-${states}/final.mdl \ - --config conf/decode_dnn.config exp/tri4b-${states}/graph_bd_tgpr/ data-fmllr-tri4b/test_eval92 $dir/decode_eval92_kl-hmm-bd-${states}_tst - - diff --git a/egs/sprakbanken/s5/local/run_raw_fmllr.sh b/egs/sprakbanken/s5/local/run_raw_fmllr.sh deleted file mode 100644 index c4847a93f27..00000000000 --- a/egs/sprakbanken/s5/local/run_raw_fmllr.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash - - -steps/align_raw_fmllr.sh --nj 10 --cmd "$train_cmd" --use-graphs true \ - data/train_si84 data/lang exp/tri2b exp/tri2b_ali_si84_raw - -steps/train_raw_sat.sh --cmd "$train_cmd" \ - 2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84_raw exp/tri3c || exit 1; - - -mfccdir=mfcc -for x in test_eval92 test_eval93 test_dev93 ; do - y=${x}_utt - mkdir -p data/$y - cp data/$x/* data/$y || true - cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk; - cp data/$y/utt2spk data/$y/spk2utt; - steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; -done - -( -utils/mkgraph.sh data/lang_test_tgpr exp/tri3c exp/tri3c/graph_tgpr || exit 1; -steps/decode_raw_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_dev93 exp/tri3c/decode_tgpr_dev93 || exit 1; -steps/decode_raw_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_eval92 exp/tri3c/decode_tgpr_eval92 || exit 1; - -steps/decode_raw_fmllr.sh --nj 30 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_dev93_utt exp/tri3c/decode_tgpr_dev93_utt || exit 1; -steps/decode_raw_fmllr.sh --nj 30 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_eval92_utt exp/tri3c/decode_tgpr_eval92_utt || exit 1; - -steps/decode_raw_fmllr.sh --use-normal-fmllr true --nj 10 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_dev93 exp/tri3c/decode_tgpr_dev93_2fmllr || exit 1; -steps/decode_raw_fmllr.sh --use-normal-fmllr true --nj 8 --cmd "$decode_cmd" \ - exp/tri3c/graph_tgpr data/test_eval92 exp/tri3c/decode_tgpr_eval92_2fmllr || exit 1; -)& - -( -utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri3c exp/tri3c/graph_bd_tgpr || exit 1; - -steps/decode_raw_fmllr.sh --cmd "$decode_cmd" --nj 8 exp/tri3c/graph_bd_tgpr \ - data/test_eval92 exp/tri3c/decode_bd_tgpr_eval92 - steps/decode_raw_fmllr.sh --cmd "$decode_cmd" --nj 10 exp/tri3c/graph_bd_tgpr \ - data/test_dev93 exp/tri3c/decode_bd_tgpr_dev93 -)& - -steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \ - data/train_si284 data/lang exp/tri3c exp/tri3c_ali_si284 || exit 1; - - -steps/train_raw_sat.sh --cmd "$train_cmd" \ - 4200 40000 data/train_si284 data/lang exp/tri3c_ali_si284 exp/tri4d || exit 1; -( - utils/mkgraph.sh data/lang_test_tgpr exp/tri4d exp/tri4d/graph_tgpr || exit 1; - steps/decode_raw_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri4d/graph_tgpr data/test_dev93 exp/tri4d/decode_tgpr_dev93 || exit 1; - steps/decode_raw_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri4d/graph_tgpr data/test_eval92 exp/tri4d/decode_tgpr_eval92 || exit 1; -) & - - -wait - - -#for x in exp/tri3{b,c}/decode_tgpr*; do grep WER $x/wer_* | utils/best_wer.sh ; done - diff --git a/egs/sprakbanken/s5/local/sprak_data_prep.sh b/egs/sprakbanken/s5/local/sprak_data_prep.sh index 1b2406620f2..c336b06e8af 100755 --- a/egs/sprakbanken/s5/local/sprak_data_prep.sh +++ b/egs/sprakbanken/s5/local/sprak_data_prep.sh @@ -18,29 +18,18 @@ utils=`pwd`/utils . ./path.sh -# Checks if python3 is available on the system and install python3 in userspace if not -# This recipe currently relies on version 3 because python3 uses utf8 as internal -# string representation - -#if ! which python3 >&/dev/null; then -# echo "Installing python3 since not on your path." -# pushd $KALDI_ROOT/tools || exit 1; -# extras/install_python3.sh || exit 1; -# popd -#fi - if [ ! -d $dir/download ]; then mkdir -p $dir/download/0565-1 $dir/download/0565-2 fi -echo "Downloading and unpacking sprakbanken to $dir/corpus_processed. This will take a while." +echo "Downloading and unpacking sprakbanken to $dir/corpus_processed. This will take a while. The connection closes every 50-60 seconds and the repo maintainers do not have othersuggestions than increasing the number of retries." if [ ! -f $dir/download/da.16kHz.0565-1.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-1.tar.gz --directory-prefix=$dir/download ) + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-1.tar.gz --directory-prefix=$dir/download ) fi if [ ! -f $dir/download/da.16kHz.0565-2.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-2.tar.gz --directory-prefix=$dir/download ) + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/da.16kHz.0565-2.tar.gz --directory-prefix=$dir/download ) fi if [ ! -f $dir/download/da.16kHz.0611.tar.gz ]; then diff --git a/egs/sprakbanken/s5/local/sprak_run_mmi_tri4b.sh b/egs/sprakbanken/s5/local/sprak_run_mmi_tri4b.sh deleted file mode 100755 index 83999bada53..00000000000 --- a/egs/sprakbanken/s5/local/sprak_run_mmi_tri4b.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -. ./cmd.sh - -# LM suffix -uid=$1 - -# Test set id -test=$2 - -steps/make_denlats.sh --nj 30 --sub-split 24 --cmd "$train_cmd" \ - --transform-dir exp/tri4b_ali \ - data/train data/lang exp/tri4b exp/tri4b_denlats || exit 1; - -steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \ - data/train data/lang exp/tri4b_ali exp/tri4b_denlats \ - exp/tri4b_mmi_b0.1 || exit 1; - -steps/decode.sh --nj 7 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode_${uid}_$test \ - exp/tri4b_/graph_$uid data/$test exp/tri4b_mmi_b0.1/decode_${uid}_$test - -#first, train UBM for fMMI experiments. -steps/train_diag_ubm.sh --silence-weight 0.5 --nj 50 --cmd "$train_cmd" \ - 600 data/train data/lang exp/tri4b_ali exp/dubm4b - -# Next, fMMI+MMI. -steps/train_mmi_fmmi.sh \ - --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri4b_ali exp/dubm4b exp/tri4b_denlats \ - exp/tri4b_fmmi_a || exit 1; - -for iter in 3 4 5 6 7 8; do - steps/decode_fmmi.sh --nj 5 --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri3b/decode_${uid}_$test exp/tri4b/graph_$uid data/$test \ - exp/tri4b_fmmi_a/decode_${uid}_${test}_it$iter & -done -# decode the last iter with the bd model. -#for iter in 8; do -# steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \ -# --transform-dir exp/tri3b/decode_bd_tgpr_dev93 exp/tri4b/graph_bd_tgpr data/test_dev93 \ -# exp/tri4b_fmmi_a/decode_bd_tgpr_dev93_it$iter & -# steps/decode_fmmi.sh --nj 8 --cmd "$decode_cmd" --iter $iter \ -# --transform-dir exp/tri3b/decode_bd_tgpr_eval92 exp/tri4b/graph_bd_tgpr data/test_eval92 \ -# exp/tri4b_fmmi_a/decode_tgpr_eval92_it$iter & -#done - - -# fMMI + mmi with indirect differential. -steps/train_mmi_fmmi_indirect.sh \ - --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri4b_ali exp/dubm4b exp/tri4b_denlats \ - exp/tri4b_fmmi_indirect || exit 1; - -for iter in 3 4 5 6 7 8; do - steps/decode_fmmi.sh --nj 7 --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri3b/decode_${uid}_$test exp/tri4b/graph_$uid data/$test \ - exp/tri4b_fmmi_indirect/decode_${uid}_${test}_it$iter & -done - diff --git a/egs/sprakbanken/s5/local/sprak_train_cmulm.sh b/egs/sprakbanken/s5/local/sprak_train_cmulm.sh deleted file mode 100755 index 55d6d60bf9d..00000000000 --- a/egs/sprakbanken/s5/local/sprak_train_cmulm.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) -# Apache 2.0 - -# This script takes data prepared in a corpus-dependent way -# in data/local/, and converts it into the "canonical" form, -# in various subdirectories of data/, e.g. data/lang, data/lang_test_ug, -# data/train_si284, data/train_si84, etc. - -# Don't bother doing train_si84 separately (although we have the file lists -# in data/local/) because it's just the first 7138 utterances in train_si284. -# We'll create train_si84 after doing the feature extraction. - -. ./path.sh || exit 1; - -echo "Preparing train and test data" -srcdir=data/local/data -lmdir=data/local/arpa_lm -tmpdir=data/local/lm_tmp -lang_tmp=data/local/lang_tmp -lexicon=data/local/dict/transcripts -ccs=data/local/lang_tmp/cmuclmtk.ccs -lm_suffix=arpa -mkdir -p $lmdir -mkdir -p $tmpdir - -# Create context cue symbol file for cmuclmtk -echo -e '' > $ccs -echo -e '' >> $ccs - - -# Envelop LM training data in context cues -python3 local/sprak_prep_lm.py $lexicon $lmdir/lm_input - - -# Next, for each type of language model, create the corresponding FST -# and the corresponding lang_test_* directory. - -echo Preparing language models for test - -text2wfreq < $lmdir/lm_input | wfreq2vocab -top 40000 > $lmdir/sprak.vocab - -text2idngram -vocab $lmdir/sprak.vocab -idngram $lmdir/sprak.idngram < $lmdir/lm_input - -idngram2lm -linear -idngram $lmdir/sprak.idngram -vocab \ - $lmdir/sprak.vocab -arpa $lmdir/sprak.arpa -context $ccs - - -test=data/lang_test_${lm_suffix} -mkdir -p $test -cp -r data/lang/* $test - -cat $lmdir/sprak.arpa | \ - arpa2fst --disambig-symbol=#0 \ - --read-symbol-table=$test/words.txt - $test/G.fst - - -utils/validate_lang.pl $test || exit 1; - -exit 0; diff --git a/egs/sprakbanken/s5/run.sh b/egs/sprakbanken/s5/run.sh index 53fd7b1484e..64a24deeabf 100755 --- a/egs/sprakbanken/s5/run.sh +++ b/egs/sprakbanken/s5/run.sh @@ -5,7 +5,6 @@ . ./path.sh # so python3 is on the path if not on the system (we made a link to utils/).a nj=12 - stage=0 . utils/parse_options.sh @@ -125,12 +124,11 @@ if [ $stage -le 9 ]; then fi if [ $stage -le 10 ]; then -# Alignment used to train nnets and sgmms -steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ - data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; + # Alignment used to train nnets and sgmms + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; fi -##TODO: Add nnet3 and chain setups ## Works #local/sprak_run_nnet_cpu.sh tg dev @@ -139,5 +137,30 @@ fi #local/sprak_run_sgmm2.sh dev +# Run neural network setups based in the TEDLIUM recipe + +# Running the nnet3-tdnn setup will train an ivector extractor that +# is used by the subsequent nnet3 and chain systems (why --stage is +# specified) +#local/nnet3/run_tdnn.sh --tdnn-affix "0" --nnet3-affix "" + +# nnet3 LSTM +#local/nnet3/run_lstm.sh --stage 13 --affix "0" + +# nnet3 bLSTM +#local/nnet3/run_blstm.sh --stage 12 + + + +# chain TDNN +# This setup creates a new lang directory that is also used by the +# TDNN-LSTM system +#local/chain/run_tdnn.sh --stage 14 + +# chain TDNN-LSTM +local/chain/run_tdnn_lstm.sh --stage 17 + + # Getting results [see RESULTS file] -for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done +local/generate_results_file.sh 2> /dev/null > RESULTS + diff --git a/egs/sprakbanken_swe/s5/local/data_prep.py b/egs/sprakbanken_swe/s5/local/data_prep.py index f3b644a26b6..58a0898dc26 100755 --- a/egs/sprakbanken_swe/s5/local/data_prep.py +++ b/egs/sprakbanken_swe/s5/local/data_prep.py @@ -123,7 +123,7 @@ def create_parallel_kaldi(filelist, sphpipe, snd=False): if __name__ == '__main__': - flist = codecs.open(sys.argv[1], "r", "utf8").readlines() + flist = codecs.open(sys.argv[1], "r").readlines() outpath = sys.argv[2] if len(sys.argv) == 5: sndlist = codecs.open(sys.argv[3], "r").readlines() @@ -133,8 +133,8 @@ def create_parallel_kaldi(filelist, sphpipe, snd=False): traindata = create_parallel_kaldi(flist, "") textout = codecs.open(os.path.join(outpath, "text.unnormalised"), "w", "utf8") - wavout = codecs.open(os.path.join(outpath, "wav.scp"), "w","utf8") - utt2spkout = codecs.open(os.path.join(outpath, "utt2spk"), "w","utf8") + wavout = codecs.open(os.path.join(outpath, "wav.scp"), "w") + utt2spkout = codecs.open(os.path.join(outpath, "utt2spk"), "w") textout.writelines(traindata[0]) wavout.writelines(traindata[1]) utt2spkout.writelines(traindata[2]) diff --git a/egs/sprakbanken_swe/s5/local/normalize_transcript.py b/egs/sprakbanken_swe/s5/local/normalize_transcript.py index 68e534df40c..90e45744e2a 100755 --- a/egs/sprakbanken_swe/s5/local/normalize_transcript.py +++ b/egs/sprakbanken_swe/s5/local/normalize_transcript.py @@ -18,6 +18,9 @@ } #removes all the above signs +from_chars = ''.join(normdict.keys()) +to_chars = ''.join(normdict.values()) + t_table = str.maketrans(normdict) ## Main @@ -25,13 +28,15 @@ transcript = codecs.open(sys.argv[1], "r", "utf8") outtext = codecs.open(sys.argv[2], "w", "utf8") -for line in transcript: - line = line.replace(".\Punkt", ".") - line = line.replace(",\Komma", ",") - normtext1 = line.translate(t_table) - normtext2 = re.sub(r' +', ' ', normtext1.strip()) - outtext.write(normtext2.upper() + "\n") +#TODO: Add number normalisation and remove uppercasing +for line in transcript: + line = line.replace(".\Punkt", ".") + line = line.replace(",\Komma", ",") + normtext1 = re.sub(r'[\.,:;\?]', '', line) + normtext2 = re.sub(r'[\t\\]', ' ', normtext1) + normtext3 = re.sub(r' +', ' ', normtext2.strip()) + outtext.write(normtext3.upper()) transcript.close() outtext.close() diff --git a/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh b/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh index ad6c6e2472f..19751815208 100755 --- a/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh +++ b/egs/sprakbanken_swe/s5/local/sprak_data_prep.sh @@ -22,10 +22,10 @@ utils=`pwd`/utils # This recipe currently relies on version 3 because python3 uses utf8 as internal # string representation -if ! which python3 >&/dev/null; then - echo "Python3 is not installed, to install it you should probably do:" - echo "sudo apt-get install python3" || exit 1; -fi +#if ! which python3 >&/dev/null; then +# echo "Python3 is not installed, to install it you should probably do:" +# echo "sudo apt-get install python3" || exit 1; +#fi if [ ! -d $dir/download ]; then mkdir -p $dir/download/0467-1 $dir/download/0467-2 $dir/download/0467-3 @@ -34,19 +34,19 @@ fi echo "Downloading and unpacking sprakbanken to $dir/corpus_processed. This will take a while." if [ ! -f $dir/download/sve.16khz.0467-1.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-1.tar.gz --directory-prefix=$dir/download ) + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-1.tar.gz --directory-prefix=$dir/download ) fi if [ ! -f $dir/download/sve.16khz.0467-2.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-2.tar.gz --directory-prefix=$dir/download ) + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-2.tar.gz --directory-prefix=$dir/download ) fi if [ ! -f $dir/download/sve.16khz.0467-3.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-3.tar.gz --directory-prefix=$dir/download ) + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0467-3.tar.gz --directory-prefix=$dir/download ) fi if [ ! -f $dir/download/sve.16khz.0467-1.tar.gz ]; then - ( wget http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0468.tar.gz --directory-prefix=$dir/download ) + ( wget --tries 100 http://www.nb.no/sbfil/talegjenkjenning/16kHz/sve.16khz.0468.tar.gz --directory-prefix=$dir/download ) fi echo "Corpus files downloaded." @@ -78,31 +78,31 @@ mkdir -p $dir/corpus_processed/training/0467-1 $dir/corpus_processed/training/04 # Create parallel file lists and text files, but keep sound files in the same location to save disk space # Writes the lists to data/local/data (~ 310h) echo "Creating parallel data for training data." -python3 $local/sprak2kaldi.py $dir/download/0467-1 $dir/corpus_processed/training/0467-1 # ~140h -python3 $local/sprak2kaldi.py $dir/download/0467-2 $dir/corpus_processed/training/0467-2 # ~125h -python3 $local/sprak2kaldi.py $dir/download/0467-3 $dir/corpus_processed/training/0467-3 # ~128h +python $local/sprak2kaldi.py $dir/download/0467-1 $dir/corpus_processed/training/0467-1 # ~140h +python $local/sprak2kaldi.py $dir/download/0467-2 $dir/corpus_processed/training/0467-2 # ~125h +python $local/sprak2kaldi.py $dir/download/0467-3 $dir/corpus_processed/training/0467-3 # ~128h mv $dir/corpus_processed/training/0467-1/'r4670118.791213 8232' $dir/corpus_processed/training/0467-1/'r4670118.791213_8232' -for f in $dir/corpus_processed/training/0467-1/r4670118.791213_8232/*.txt; do mv "$f" "${f// /_}"; done +for f in $dir/corpus_processed/training/0467-1/r4670118.791213_8232/*.txt; do + mv "$f" "${f// /_}"; +done ( # Ditto test set (~ 93h) echo "Creating parallel data for test data." rm -rf $dir/corpus_processed/test/0468 mkdir -p $dir/corpus_processed/test/0468 - python3 $local/sprak2kaldi.py $dir/download/0468 $dir/corpus_processed/test/0468 + python $local/sprak2kaldi.py $dir/download/0468 $dir/corpus_processed/test/0468 ) - - # Create the LM training data ( echo "Writing the LM text to file and normalising." cat $dir/corpus_processed/training/0467-1/txtlist $dir/corpus_processed/training/0467-2/txtlist $dir/corpus_processed/training/0467-3/txtlist | while read l; do cat $l; done > $lmdir/lmsents - python3 local/normalize_transcript.py $lmdir/lmsents $lmdir/lmsents.norm + python local/normalize_transcript.py $lmdir/lmsents $lmdir/lmsents.norm sort -u $lmdir/lmsents.norm > $lmdir/transcripts.uniq -) & +) # Combine training file lists echo "Combine file lists." From 16afe7ae7e69080f723b4ce5072f58a94c19e00c Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 9 Feb 2017 18:07:19 -0500 Subject: [PATCH 418/530] [src,egs,scripts]: Replace online-nnet3 decoding setup with 'looped' decoding and give example script with TDNN+LSTM. --- .../local/chain/tuning/run_tdnn_lstm_1b.sh | 2 +- .../local/chain/tuning/run_tdnn_lstm_1d.sh | 1 + .../local/chain/tuning/run_tdnn_lstm_1e.sh | 85 +++++- .../local/chain/tuning/run_tdnn_lstm_1f.sh | 3 +- .../local/chain/tuning/run_tdnn_lstm_1g.sh | 2 + egs/wsj/s5/steps/online/nnet3/decode.sh | 15 +- src/itf/decodable-itf.h | 2 +- src/nnet2/online-nnet2-decodable.cc | 12 +- src/nnet3/Makefile | 4 +- src/nnet3/decodable-online-looped.cc | 252 ++++++++++++++++++ src/nnet3/decodable-online-looped.h | 199 ++++++++++++++ src/nnet3/decodable-simple-looped.cc | 99 ++++--- src/nnet3/decodable-simple-looped.h | 36 ++- src/nnet3/nnet-am-decodable-simple.h | 2 +- src/nnet3/nnet-compile-looped.cc | 2 +- src/nnet3/online-nnet3-decodable-simple.cc | 221 --------------- src/nnet3/online-nnet3-decodable-simple.h | 153 ----------- src/online2/online-nnet2-decoding-threaded.cc | 54 ++-- src/online2/online-nnet2-feature-pipeline.cc | 6 - src/online2/online-nnet2-feature-pipeline.h | 46 ++-- src/online2/online-nnet3-decoding.cc | 35 +-- src/online2/online-nnet3-decoding.h | 64 ++--- .../online2-wav-nnet2-latgen-faster.cc | 81 +++--- .../online2-wav-nnet3-latgen-faster.cc | 110 ++++---- 24 files changed, 824 insertions(+), 662 deletions(-) create mode 100644 src/nnet3/decodable-online-looped.cc create mode 100644 src/nnet3/decodable-online-looped.h delete mode 100644 src/nnet3/online-nnet3-decodable-simple.cc delete mode 100644 src/nnet3/online-nnet3-decodable-simple.h diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh index 5149e5a54e8..eb2c91dc3d4 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -6,7 +6,7 @@ # and adding # --egs.chunk-left-context-initial=0 # and --egs.chunk-right-context-final=0 - +# See 1e for summary of results. # steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi # exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh index 28ca16d939c..4be28a4ca97 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -2,6 +2,7 @@ # 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it # uses egs from 1b, remember to remove that before I commit. +# See 1e for summary of results. # steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi # exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh index 32950e7df6a..6704f9d299e 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -1,6 +1,50 @@ #!/bin/bash -# 1e is as 1b, but reducing decay-time from 40 to 20. +# 1e is as 1d, but reducing decay-time from 40 to 20. + +# The following table shows comparison of various decay-time values, +# namely: [b:unset=infinity, f:80, d:40, e:20, g:10, g2:5]. +# note: the g2 script is not checked in. +# There is no clear trend on the non-looped decoding, but looped decoding seems +# to improve as decay-time is decreased. We end up recommending decay-time=20, +# as by then we get all the improvement on looped decoding, and it's the +# most conservative setting with which we can get this improvement (although +# actually it seems fine to use an even smaller decay-time). + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{b,f,d,e,g,g2}_sp_bi + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1b_sp_bi exp/chain_cleaned/tdnn_lstm1f_sp_bi exp/chain_cleaned/tdnn_lstm1d_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1g_sp_bi exp/chain_cleaned/tdnn_lstm1g2_sp_bi +# System tdnn_lstm1b_sp_bi tdnn_lstm1f_sp_bi tdnn_lstm1d_sp_bi tdnn_lstm1e_sp_bi tdnn_lstm1g_sp_bi tdnn_lstm1g2_sp_bi +# WER on dev(orig) 9.1 8.8 9.0 9.0 9.0 9.4 +# [looped:] 9.4 9.3 9.2 9.0 8.9 9.4 +# WER on dev(rescored) 8.4 8.2 8.4 8.4 8.4 8.7 +# [looped:] 8.8 8.7 8.6 8.4 8.3 8.7 +# WER on test(orig) 8.9 9.0 8.9 8.8 8.8 9.3 +# [looped:] 9.3 9.3 9.0 8.8 8.8 9.2 +# WER on test(rescored) 8.4 8.6 8.3 8.4 8.4 8.9 +# [looped:] 8.7 8.9 8.5 8.3 8.4 8.8 +# Final train prob -0.0621 -0.0631 -0.0595 -0.0648 -0.0689 -0.0739 +# Final valid prob -0.0799 -0.0802 -0.0823 -0.0827 -0.0890 -0.0963 +# Final train prob (xent) -0.8300 -0.8295 -0.8129 -0.8372 -0.8610 -0.8792 +# Final valid prob (xent) -0.9500 -0.9662 -0.9589 -0.9497 -0.9982 -1.0256 + + +# the following table compares the 'online' decoding with regular and looped +# decoding. online decoding is a little better than either (possibly due to +# using slightly later iVectors). +# +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi{,_online} 2>/dev/null +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi_online +# System tdnn_lstm1e_sp_bi tdnn_lstm1e_sp_bi_online +# WER on dev(orig) 9.0 8.8 +# [looped:] 9.0 +# WER on dev(rescored) 8.4 8.4 +# [looped:] 8.4 +# WER on test(orig) 8.8 8.8 +# [looped:] 8.8 +# WER on test(rescored) 8.4 8.4 +# [looped:] 8.3 + # 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it # uses egs from 1b, remember to remove that before I commit. @@ -77,6 +121,8 @@ tdnn_lstm_affix=1e #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we common_egs_dir= # you can set this to use previously dumped egs. remove_egs=true +test_online_decoding=false # if true, it will run the last decoding stage. + # End configuration section. echo "$0 $@" # Print the command line for logging @@ -289,8 +335,10 @@ if [ $stage -le 21 ]; then # 'looped' decoding. we didn't write a -parallel version of this program yet, # so it will take a bit longer as the --num-threads option is not supported. # we just hardcode the --frames-per-chunk option as it doesn't have to - # match any value used in training, and it won't affect the results (unlike - # regular decoding). + # match any value used in training, and it won't affect the results very much (unlike + # regular decoding)... [it will affect them slightly due to differences in the + # iVector extraction; probably smaller will be worse as it sees less of the future, + # but in a real scenario, long chunks will introduce excessive latency]. rm $dir/.error 2>/dev/null || true for dset in dev test; do ( @@ -313,4 +361,35 @@ if [ $stage -le 21 ]; then fi +if $test_online_decoding && [ $stage -le 22 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + data/lang_chain exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # note: we just give it "$dset" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --extra-left-context-initial $extra_left_context_initial \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset} ${dir}_online/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}_online/decode_${dset} ${dir}_online/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh index ed778713907..3ed14f30956 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -1,6 +1,7 @@ #!/bin/bash -# 1f is as 1b, but increasing decay-time from 40 to 80. [see also 1e, at 20.] +# 1f is as 1d, but increasing decay-time from 40 to 80. [see also 1e, at 20.] +# see 1e for summary of results. # 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it # uses egs from 1b, remember to remove that before I commit. diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh index bbc17c77aea..aff39a04025 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -2,6 +2,8 @@ ####################### # 1g is as 1e, but reducing decay-time further from 20 to 10. +# see 1e for summary of results. + # 1e is as 1b, but reducing decay-time from 40 to 20. # 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it diff --git a/egs/wsj/s5/steps/online/nnet3/decode.sh b/egs/wsj/s5/steps/online/nnet3/decode.sh index a4777f1edf7..118cf9e1260 100755 --- a/egs/wsj/s5/steps/online/nnet3/decode.sh +++ b/egs/wsj/s5/steps/online/nnet3/decode.sh @@ -8,6 +8,8 @@ stage=0 nj=4 cmd=run.pl +frames_per_chunk=20 +extra_left_context_initial=0 min_active=200 max_active=7000 beam=15.0 @@ -114,11 +116,6 @@ else fi -decoder=online2-wav-nnet3-latgen-faster -parallel_opts= -opts="--online=$online" - - if [ "$post_decode_acwt" == 1.0 ]; then lat_wspecifier="ark:|gzip -c >$dir/lat.JOB.gz" else @@ -132,8 +129,12 @@ if [ -f $srcdir/frame_subsampling_factor ]; then fi if [ $stage -le 0 ]; then - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ - $decoder $opts $silence_weighting_opts --do-endpointing=$do_endpointing $frame_subsampling_opt \ + $cmd JOB=1:$nj $dir/log/decode.JOB.log \ + online2-wav-nnet3-latgen-faster $silence_weighting_opts --do-endpointing=$do_endpointing \ + --frames-per-chunk=$frames_per_chunk \ + --extra-left-context-initial=$extra_left_context_initial \ + --online=$online \ + $frame_subsampling_opt \ --config=$online_config \ --min-active=$min_active --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt --word-symbol-table=$graphdir/words.txt \ diff --git a/src/itf/decodable-itf.h b/src/itf/decodable-itf.h index ad3b7809dab..9852861969d 100644 --- a/src/itf/decodable-itf.h +++ b/src/itf/decodable-itf.h @@ -112,7 +112,7 @@ class DecodableInterface { /// Returns the number of states in the acoustic model /// (they will be indexed one-based, i.e. from 1 to NumIndices(); - /// this is for compatibility with OpenFst. + /// this is for compatibility with OpenFst). virtual int32 NumIndices() const = 0; virtual ~DecodableInterface() {} diff --git a/src/nnet2/online-nnet2-decodable.cc b/src/nnet2/online-nnet2-decodable.cc index 856326cf688..715e1cc280d 100644 --- a/src/nnet2/online-nnet2-decodable.cc +++ b/src/nnet2/online-nnet2-decodable.cc @@ -80,7 +80,7 @@ int32 DecodableNnet2Online::NumFramesReady() const { void DecodableNnet2Online::ComputeForFrame(int32 frame) { int32 features_ready = features_->NumFramesReady(); - bool input_finished = features_->IsLastFrame(features_ready - 1); + bool input_finished = features_->IsLastFrame(features_ready - 1); KALDI_ASSERT(frame >= 0); if (frame >= begin_frame_ && frame < begin_frame_ + scaled_loglikes_.NumRows()) @@ -112,20 +112,20 @@ void DecodableNnet2Online::ComputeForFrame(int32 frame) { t_modified = features_ready - 1; features_->GetFrame(t_modified, &row); } - CuMatrix cu_features; + CuMatrix cu_features; cu_features.Swap(&features); // Copy to GPU, if we're using one. - + int32 num_frames_out = input_frame_end - input_frame_begin - left_context_ - right_context_; - + CuMatrix cu_posteriors(num_frames_out, num_pdfs_); - + // The "false" below tells it not to pad the input: we've already done // any padding that we needed to do. NnetComputation(nnet_.GetNnet(), cu_features, false, &cu_posteriors); - + cu_posteriors.ApplyFloor(1.0e-20); // Avoid log of zero which leads to NaN. cu_posteriors.ApplyLog(); // subtract log-prior (divide by prior) diff --git a/src/nnet3/Makefile b/src/nnet3/Makefile index ef50f9960e1..76e0cbbdfbb 100644 --- a/src/nnet3/Makefile +++ b/src/nnet3/Makefile @@ -28,8 +28,8 @@ OBJFILES = nnet-common.o nnet-compile.o nnet-component-itf.o \ discriminative-supervision.o nnet-discriminative-example.o \ nnet-discriminative-diagnostics.o \ discriminative-training.o nnet-discriminative-training.o \ - online-nnet3-decodable-simple.o nnet-compile-looped.o \ - decodable-simple-looped.o + nnet-compile-looped.o decodable-simple-looped.o \ + decodable-online-looped.o LIBNAME = kaldi-nnet3 diff --git a/src/nnet3/decodable-online-looped.cc b/src/nnet3/decodable-online-looped.cc new file mode 100644 index 00000000000..77be1f166bf --- /dev/null +++ b/src/nnet3/decodable-online-looped.cc @@ -0,0 +1,252 @@ +// nnet3/decodable-online-looped.cc + +// Copyright 2017 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include +#include "nnet3/nnet-utils.h" + +namespace kaldi { +namespace nnet3 { + +DecodableNnetLoopedOnlineBase::DecodableNnetLoopedOnlineBase( + const DecodableNnetSimpleLoopedInfo &info, + OnlineFeatureInterface *input_features, + OnlineFeatureInterface *ivector_features): + num_chunks_computed_(0), + current_log_post_subsampled_offset_(-1), + info_(info), + input_features_(input_features), + ivector_features_(ivector_features), + computer_(info_.opts.compute_config, info_.computation, + info_.nnet, NULL) { // NULL is 'nnet_to_update' + // Check that feature dimensions match. + KALDI_ASSERT(input_features_ != NULL); + int32 nnet_input_dim = info_.nnet.InputDim("input"), + nnet_ivector_dim = info_.nnet.InputDim("ivector"), + feat_input_dim = input_features_->Dim(), + feat_ivector_dim = (ivector_features_ != NULL ? + ivector_features_->Dim() : -1); + if (nnet_input_dim != feat_input_dim) { + KALDI_ERR << "Input feature dimension mismatch: got " << feat_input_dim + << " but network expects " << nnet_input_dim; + } + if (nnet_ivector_dim != feat_ivector_dim) { + KALDI_ERR << "Ivector feature dimension mismatch: got " << feat_ivector_dim + << " but network expects " << nnet_ivector_dim; + } +} + + +int32 DecodableNnetLoopedOnlineBase::NumFramesReady() const { + // note: the ivector_features_ may have 2 or 3 fewer frames ready than + // input_features_, but we don't wait for them; we just use the most recent + // iVector we can. + int32 features_ready = input_features_->NumFramesReady(); + if (features_ready == 0) + return 0; + bool input_finished = input_features_->IsLastFrame(features_ready - 1); + + int32 sf = info_.opts.frame_subsampling_factor; + + if (input_finished) { + // if the input has finished,... we'll pad with duplicates of the last frame + // as needed to get the required right context. + return (features_ready + sf - 1) / sf; + } else { + // note: info_.right_context_ includes both the model context and any + // extra_right_context_ (but this + int32 non_subsampled_output_frames_ready = + std::max(0, features_ready - info_.frames_right_context); + int32 num_chunks_ready = non_subsampled_output_frames_ready / + info_.frames_per_chunk; + // note: the division by the frame subsampling factor 'sf' below + // doesn't need any attention to rounding because info_.frames_per_chunk + // is always a multiple of 'sf' (see 'frames_per_chunk = GetChunksize..." + // in decodable-simple-looped.cc). + return num_chunks_ready * info_.frames_per_chunk / sf; + } +} + + +// note: the frame-index argument is on the output of the network, i.e. after any +// subsampling, so we call it 'subsampled_frame'. +bool DecodableNnetLoopedOnlineBase::IsLastFrame( + int32 subsampled_frame) const { + // To understand this code, compare it with the code of NumFramesReady(), + // it follows the same structure. + int32 features_ready = input_features_->NumFramesReady(); + if (features_ready == 0) { + if (subsampled_frame == -1 && input_features_->IsLastFrame(-1)) { + // the attempt to handle this rather pathological case (input finished + // but no frames ready) is a little quixotic as we have not properly + // tested this and other parts of the code may die. + return true; + } else { + return false; + } + } + bool input_finished = input_features_->IsLastFrame(features_ready - 1); + if (!input_finished) + return false; + int32 sf = info_.opts.frame_subsampling_factor, + num_subsampled_frames_ready = (features_ready + sf - 1) / sf; + return (subsampled_frame == num_subsampled_frames_ready - 1); +} + + +void DecodableNnetLoopedOnlineBase::AdvanceChunk() { + // Prepare the input data for the next chunk of features. + // note: 'end' means one past the last. + int32 begin_input_frame, end_input_frame; + if (num_chunks_computed_ == 0) { + begin_input_frame = -info_.frames_left_context; + // note: end is last plus one. + end_input_frame = info_.frames_per_chunk + info_.frames_right_context; + } else { + // note: begin_input_frame will be the same as the previous end_input_frame. + // you can verify this directly if num_chunks_computed_ == 0, and then by + // induction. + begin_input_frame = num_chunks_computed_ * info_.frames_per_chunk + + info_.frames_right_context; + end_input_frame = begin_input_frame + info_.frames_per_chunk; + } + + int32 num_feature_frames_ready = input_features_->NumFramesReady(); + bool is_finished = input_features_->IsLastFrame(num_feature_frames_ready - 1); + + if (end_input_frame >= num_feature_frames_ready && !is_finished) { + // we shouldn't be attempting to read past the end of the available features + // until we have reached the end of the input (i.e. the end-user called + // InputFinished(), announcing that there is no more waveform; at this point + // we pad as needed with copies of the last frame, to flush out the last of + // the output. + // If the following error happens, it likely indicates a bug in this + // decodable code somewhere (although it could possibly indicate the + // user asking for a frame that was not ready, which would be a misuse + // of this class.. it can be figured out from gdb as in either case it + // would be a bug in the code. + KALDI_ERR << "Attempt to access frame past the end of the available input"; + } + + + CuMatrix feats_chunk; + { // this block sets 'feats_chunk'. + Matrix this_feats(end_input_frame - begin_input_frame, + input_features_->Dim()); + for (int32 i = begin_input_frame; i < end_input_frame; i++) { + SubVector this_row(this_feats, i - begin_input_frame); + int32 input_frame = i; + if (input_frame < 0) input_frame = 0; + if (input_frame >= num_feature_frames_ready) + input_frame = num_feature_frames_ready - 1; + input_features_->GetFrame(input_frame, &this_row); + } + feats_chunk.Swap(&this_feats); + } + computer_.AcceptInput("input", &feats_chunk); + + if (info_.has_ivectors) { + KALDI_ASSERT(ivector_features_ != NULL); + KALDI_ASSERT(info_.request1.inputs.size() == 2); + // all but the 1st chunk should have 1 iVector, but there is no need to + // assume this. + int32 num_ivectors = (num_chunks_computed_ == 0 ? + info_.request1.inputs[1].indexes.size() : + info_.request2.inputs[1].indexes.size()); + KALDI_ASSERT(num_ivectors > 0); + + Vector ivector(ivector_features_->Dim()); + // we just get the iVector from the last input frame we needed, + // reduced as necessary + // we don't bother trying to be 'accurate' in getting the iVectors + // for their 'correct' frames, because in general using the + // iVector from as large 't' as possible will be better. + + int32 most_recent_input_frame = num_feature_frames_ready - 1, + num_ivector_frames_ready = ivector_features_->NumFramesReady(); + + if (num_ivector_frames_ready > 0) { + int32 ivector_frame_to_use = std::min( + most_recent_input_frame, num_ivector_frames_ready - 1); + ivector_features_->GetFrame(ivector_frame_to_use, + &ivector); + } + // else just leave the iVector zero (would only happen with very small + // chunk-size, like a chunk size of 2 which would be very inefficient; and + // only at file begin. + + // note: we expect num_ivectors to be 1 in practice. + Matrix ivectors(num_ivectors, + ivector.Dim()); + ivectors.CopyRowsFromVec(ivector); + CuMatrix cu_ivectors; + cu_ivectors.Swap(&ivectors); + computer_.AcceptInput("ivector", &cu_ivectors); + } + computer_.Run(); + + { + // Note: it's possible in theory that if you had weird recurrence that went + // directly from the output, the call to GetOutputDestructive() would cause + // a crash on the next chunk. If that happens, GetOutput() should be used + // instead of GetOutputDestructive(). But we don't anticipate this will + // happen in practice. + CuMatrix output; + computer_.GetOutputDestructive("output", &output); + + if (info_.log_priors.Dim() != 0) { + // subtract log-prior (divide by prior) + output.AddVecToRows(-1.0, info_.log_priors); + } + // apply the acoustic scale + output.Scale(info_.opts.acoustic_scale); + current_log_post_.Resize(0, 0); + current_log_post_.Swap(&output); + } + KALDI_ASSERT(current_log_post_.NumRows() == info_.frames_per_chunk / + info_.opts.frame_subsampling_factor && + current_log_post_.NumCols() == info_.output_dim); + + num_chunks_computed_++; + + current_log_post_subsampled_offset_ = + (num_chunks_computed_ - 1) * + (info_.frames_per_chunk / info_.opts.frame_subsampling_factor); +} + +BaseFloat DecodableNnetLoopedOnline::LogLikelihood(int32 subsampled_frame, + int32 index) { + EnsureFrameIsComputed(subsampled_frame); + // note: we index by 'inde + return current_log_post_( + subsampled_frame - current_log_post_subsampled_offset_, + index - 1); +} + + +BaseFloat DecodableAmNnetLoopedOnline::LogLikelihood(int32 subsampled_frame, + int32 index) { + EnsureFrameIsComputed(subsampled_frame); + return current_log_post_( + subsampled_frame - current_log_post_subsampled_offset_, + trans_model_.TransitionIdToPdf(index)); +} + + +} // namespace nnet3 +} // namespace kaldi diff --git a/src/nnet3/decodable-online-looped.h b/src/nnet3/decodable-online-looped.h new file mode 100644 index 00000000000..3041d3c4637 --- /dev/null +++ b/src/nnet3/decodable-online-looped.h @@ -0,0 +1,199 @@ +// nnet3/decodable-online-looped.h + +// Copyright 2014-2017 Johns Hopkins Universithy (author: Daniel Povey) +// 2016 Api.ai (Author: Ilya Platonov) + + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_NNET3_DECODABLE_ONLINE_LOOPED_H_ +#define KALDI_NNET3_DECODABLE_ONLINE_LOOPED_H_ + +#include "itf/online-feature-itf.h" +#include "itf/decodable-itf.h" +#include "nnet3/am-nnet-simple.h" +#include "nnet3/nnet-compute.h" +#include "nnet3/nnet-optimize.h" +#include "nnet3/decodable-simple-looped.h" +#include "hmm/transition-model.h" + +namespace kaldi { +namespace nnet3 { + + +// The Decodable objects that we define in this header do the neural net +// computation in a way that's compatible with online feature extraction. It +// differs from the one declared in online-nnet3-decodable-simple.h because it +// uses the 'looped' network evaluation, which is more efficient because it +// re-uses hidden activations (and therefore doesn't have to pad chunks of data +// with extra left-context); it is applicable to TDNNs and to forwards-recurrent +// topologies like LSTMs, but not tobackwards-recurrent topologies such as +// BLSTMs. + +// The options are passed in the same way as in decodable-simple-looped.h, +// we use the same options and info class. + + +// This object is used as a base class for DecodableNnetLoopedOnline +// and DecodableAmNnetLoopedOnline. +// It takes care of the neural net computation and computations related to how +// many frames are ready (etc.), but it does not override the LogLikelihood() or +// NumIndices() functions so it is not usable as an object of type +// DecodableInterface. +class DecodableNnetLoopedOnlineBase: public DecodableInterface { + public: + // Constructor. 'input_feature' is for the feature that will be given + // as 'input' to the neural network; 'ivector_feature' is for the iVector + // feature, or NULL if iVectors are not being used. + DecodableNnetLoopedOnlineBase(const DecodableNnetSimpleLoopedInfo &info, + OnlineFeatureInterface *input_features, + OnlineFeatureInterface *ivector_features); + + // note: the LogLikelihood function is not overridden; the child + // class needs to do this. + //virtual BaseFloat LogLikelihood(int32 subsampled_frame, int32 index); + + // note: the frame argument is on the output of the network, i.e. after any + // subsampling, so we call it 'subsampled_frame'. + virtual bool IsLastFrame(int32 subsampled_frame) const; + + virtual int32 NumFramesReady() const; + + // Note: this function, present in the base-class, is overridden by the child class. + // virtual int32 NumIndices() const; + + // this is not part of the standard Decodable interface but I think is needed for + // something. + int32 FrameSubsamplingFactor() const { + return info_.opts.frame_subsampling_factor; + } + + + protected: + + /// If the neural-network outputs for this frame are not cached, this function + /// computes them (and possibly also some later frames). Note: + /// the frame-index is called 'subsampled_frame' because if frame-subsampling-factor + /// is not 1, it's an index that is "after subsampling", i.e. it changes more + /// slowly than the input-feature index. + inline void EnsureFrameIsComputed(int32 subsampled_frame) { + KALDI_ASSERT(subsampled_frame >= current_log_post_subsampled_offset_ && + "Frames must be accessed in order."); + while (subsampled_frame >= current_log_post_subsampled_offset_ + + current_log_post_.NumRows()) + AdvanceChunk(); + } + + // The current log-posteriors that we got from the last time we + // ran the computation. + Matrix current_log_post_; + + // The number of chunks we have computed so far. + int32 num_chunks_computed_; + + // The time-offset of the current log-posteriors, equals + // (num_chunks_computed_ - 1) * + // (info_.frames_per_chunk_ / info_.opts_.frame_subsampling_factor). + int32 current_log_post_subsampled_offset_; + + const DecodableNnetSimpleLoopedInfo &info_; + + private: + + // This function does the computation for the next chunk. It will change + // current_log_post_ and current_log_post_subsampled_offset_, and + // increment num_chunks_computed_. + void AdvanceChunk(); + + OnlineFeatureInterface *input_features_; + OnlineFeatureInterface *ivector_features_; + + NnetComputer computer_; + + KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableNnetLoopedOnlineBase); +}; + +// This decodable object takes indexes of the form (pdf_id + 1), +// or whatever the output-dimension of the neural network represents, +// plus one. +// It fully implements DecodableInterface. +// Note: whether or not division by the prior takes place depends on +// whether you supplied class AmNnetSimple (or just Nnet), to the constructor +// of the DecodableNnetSimpleLoopedInfo that you initailized this +// with. +class DecodableNnetLoopedOnline: public DecodableNnetLoopedOnlineBase { + public: + DecodableNnetLoopedOnline( + const DecodableNnetSimpleLoopedInfo &info, + OnlineFeatureInterface *input_features, + OnlineFeatureInterface *ivector_features): + DecodableNnetLoopedOnlineBase(info, input_features, ivector_features) { } + + + // returns the output-dim of the neural net. + virtual int32 NumIndices() const { return info_.output_dim; } + + // 'subsampled_frame' is a frame, but if frame-subsampling-factor != 1, it's a + // reduced-rate output frame (e.g. a 't' index divided by 3). 'index' + // represents the pdf-id (or other output of the network) PLUS ONE. + virtual BaseFloat LogLikelihood(int32 subsampled_frame, int32 index); + + private: + KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableNnetLoopedOnline); + +}; + + +// This is for traditional decoding where the graph has transition-ids +// on the arcs, and you need the TransitionModel to map those to +// pdf-ids. +// Note: whether or not division by the prior takes place depends on +// whether you supplied class AmNnetSimple (or just Nnet), to the constructor +// of the DecodableNnetSimpleLoopedInfo that you initailized this +// with. +class DecodableAmNnetLoopedOnline: public DecodableNnetLoopedOnlineBase { + public: + DecodableAmNnetLoopedOnline( + const TransitionModel &trans_model, + const DecodableNnetSimpleLoopedInfo &info, + OnlineFeatureInterface *input_features, + OnlineFeatureInterface *ivector_features): + DecodableNnetLoopedOnlineBase(info, input_features, ivector_features), + trans_model_(trans_model) { } + + + // returns the output-dim of the neural net. + virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); } + + // 'subsampled_frame' is a frame, but if frame-subsampling-factor != 1, it's a + // reduced-rate output frame (e.g. a 't' index divided by 3). + virtual BaseFloat LogLikelihood(int32 subsampled_frame, + int32 transition_id); + + private: + const TransitionModel &trans_model_; + + KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableAmNnetLoopedOnline); + +}; + + + + +} // namespace nnet3 +} // namespace kaldi + +#endif // KALDI_NNET3_DECODABLE_ONLINE_LOOPED_H_ diff --git a/src/nnet3/decodable-simple-looped.cc b/src/nnet3/decodable-simple-looped.cc index bb9a38632a1..df18d605b7d 100644 --- a/src/nnet3/decodable-simple-looped.cc +++ b/src/nnet3/decodable-simple-looped.cc @@ -28,7 +28,7 @@ namespace nnet3 { DecodableNnetSimpleLoopedInfo::DecodableNnetSimpleLoopedInfo( const NnetSimpleLoopedComputationOptions &opts, Nnet *nnet): - opts_(opts), nnet_(*nnet) { + opts(opts), nnet(*nnet) { Init(opts, nnet); } @@ -36,9 +36,9 @@ DecodableNnetSimpleLoopedInfo::DecodableNnetSimpleLoopedInfo( const NnetSimpleLoopedComputationOptions &opts, const Vector &priors, Nnet *nnet): - opts_(opts), nnet_(*nnet), log_priors_(priors) { - if (log_priors_.Dim() != 0) - log_priors_.ApplyLog(); + opts(opts), nnet(*nnet), log_priors(priors) { + if (log_priors.Dim() != 0) + log_priors.ApplyLog(); Init(opts, nnet); } @@ -46,9 +46,9 @@ DecodableNnetSimpleLoopedInfo::DecodableNnetSimpleLoopedInfo( DecodableNnetSimpleLoopedInfo::DecodableNnetSimpleLoopedInfo( const NnetSimpleLoopedComputationOptions &opts, AmNnetSimple *am_nnet): - opts_(opts), nnet_(am_nnet->GetNnet()), log_priors_(am_nnet->Priors()) { - if (log_priors_.Dim() != 0) - log_priors_.ApplyLog(); + opts(opts), nnet(am_nnet->GetNnet()), log_priors(am_nnet->Priors()) { + if (log_priors.Dim() != 0) + log_priors.ApplyLog(); Init(opts, &(am_nnet->GetNnet())); } @@ -58,35 +58,36 @@ void DecodableNnetSimpleLoopedInfo::Init( Nnet *nnet) { opts.Check(); KALDI_ASSERT(IsSimpleNnet(*nnet)); - has_ivectors_ = (nnet->InputDim("ivector") > 0); + has_ivectors = (nnet->InputDim("ivector") > 0); int32 left_context, right_context; ComputeSimpleNnetContext(*nnet, &left_context, &right_context); - frames_left_context_ = left_context + opts.extra_left_context_initial; - frames_right_context_ = right_context; - frames_per_chunk_ = GetChunkSize(*nnet, opts_.frame_subsampling_factor, - opts.frames_per_chunk); - output_dim_ = nnet->OutputDim("output"); - KALDI_ASSERT(output_dim_ > 0); + frames_left_context = left_context + opts.extra_left_context_initial; + frames_right_context = right_context; + frames_per_chunk = GetChunkSize(*nnet, opts.frame_subsampling_factor, + opts.frames_per_chunk); + output_dim = nnet->OutputDim("output"); + KALDI_ASSERT(output_dim > 0); // note, ivector_period is hardcoded to the same as frames_per_chunk_. - int32 ivector_period = frames_per_chunk_; - if (has_ivectors_) + int32 ivector_period = frames_per_chunk; + if (has_ivectors) ModifyNnetIvectorPeriod(ivector_period, nnet); int32 num_sequences = 1; // we're processing one utterance at a time. int32 extra_right_context = 0; - CreateLoopedComputationRequestSimple(*nnet, frames_per_chunk_, - opts_.frame_subsampling_factor, - ivector_period, opts.extra_left_context_initial, + CreateLoopedComputationRequestSimple(*nnet, frames_per_chunk, + opts.frame_subsampling_factor, + ivector_period, + opts.extra_left_context_initial, extra_right_context, num_sequences, - &request1_, &request2_, &request3_); + &request1, &request2, &request3); - CompileLooped(*nnet, opts_.optimize_config, request1_, request2_, request3_, - &computation_); - computation_.ComputeCudaIndexes(); + CompileLooped(*nnet, opts.optimize_config, request1, request2, request3, + &computation); + computation.ComputeCudaIndexes(); if (GetVerboseLevel() >= 3) { KALDI_VLOG(3) << "Computation is:"; - computation_.Print(std::cerr, *nnet); + computation.Print(std::cerr, *nnet); } } @@ -98,16 +99,16 @@ DecodableNnetSimpleLooped::DecodableNnetSimpleLooped( const MatrixBase *online_ivectors, int32 online_ivector_period): info_(info), - computer_(info_.opts_.compute_config, info_.computation_, - info_.nnet_, NULL), + computer_(info_.opts.compute_config, info_.computation, + info_.nnet, NULL), // NULL is 'nnet_to_update' feats_(feats), ivector_(ivector), online_ivector_feats_(online_ivectors), online_ivector_period_(online_ivector_period), num_chunks_computed_(0), current_log_post_subsampled_offset_(-1) { num_subsampled_frames_ = - (feats_.NumRows() + info_.opts_.frame_subsampling_factor - 1) / - info_.opts_.frame_subsampling_factor; + (feats_.NumRows() + info_.opts.frame_subsampling_factor - 1) / + info_.opts.frame_subsampling_factor; KALDI_ASSERT(!(ivector != NULL && online_ivectors != NULL)); KALDI_ASSERT(!(online_ivectors != NULL && online_ivector_period <= 0 && "You need to set the --online-ivector-period option!")); @@ -138,13 +139,13 @@ int32 DecodableNnetSimpleLooped::GetIvectorDim() const { void DecodableNnetSimpleLooped::AdvanceChunk() { int32 begin_input_frame, end_input_frame; if (num_chunks_computed_ == 0) { - begin_input_frame = -info_.frames_left_context_; + begin_input_frame = -info_.frames_left_context; // note: end is last plus one. - end_input_frame = info_.frames_per_chunk_ + info_.frames_right_context_; + end_input_frame = info_.frames_per_chunk + info_.frames_right_context; } else { - begin_input_frame = num_chunks_computed_ * info_.frames_per_chunk_ + - info_.frames_right_context_; - end_input_frame = begin_input_frame + info_.frames_per_chunk_; + begin_input_frame = num_chunks_computed_ * info_.frames_per_chunk + + info_.frames_right_context; + end_input_frame = begin_input_frame + info_.frames_per_chunk; } CuMatrix feats_chunk(end_input_frame - begin_input_frame, feats_.NumCols(), kUndefined); @@ -170,13 +171,13 @@ void DecodableNnetSimpleLooped::AdvanceChunk() { } computer_.AcceptInput("input", &feats_chunk); - if (info_.has_ivectors_) { - KALDI_ASSERT(info_.request1_.inputs.size() == 2); + if (info_.has_ivectors) { + KALDI_ASSERT(info_.request1.inputs.size() == 2); // all but the 1st chunk should have 1 iVector, but no need // to assume this. int32 num_ivectors = (num_chunks_computed_ == 0 ? - info_.request1_.inputs[1].indexes.size() : - info_.request2_.inputs[1].indexes.size()); + info_.request1.inputs[1].indexes.size() : + info_.request2.inputs[1].indexes.size()); KALDI_ASSERT(num_ivectors > 0); Vector ivector; @@ -194,40 +195,38 @@ void DecodableNnetSimpleLooped::AdvanceChunk() { computer_.Run(); { - // on GPU if we're using one, while avoiding unnecessary copies if we're not - // using the GPU. - // Note: it's possible in theory that if you had weird recurrence that went // directly from the output, the call to GetOutputDestructive() would cause - // a crash on the next chunk. But we don't anticipate this will happen in - // practice. + // a crash on the next chunk. If that happens, GetOutput() should be used + // instead of GetOutputDestructive(). But we don't anticipate this will + // happen in practice. CuMatrix output; computer_.GetOutputDestructive("output", &output); - if (info_.log_priors_.Dim() != 0) { + if (info_.log_priors.Dim() != 0) { // subtract log-prior (divide by prior) - output.AddVecToRows(-1.0, info_.log_priors_); + output.AddVecToRows(-1.0, info_.log_priors); } // apply the acoustic scale - output.Scale(info_.opts_.acoustic_scale); + output.Scale(info_.opts.acoustic_scale); current_log_post_.Resize(0, 0); current_log_post_.Swap(&output); } - KALDI_ASSERT(current_log_post_.NumRows() == info_.frames_per_chunk_ / - info_.opts_.frame_subsampling_factor && - current_log_post_.NumCols() == info_.output_dim_); + KALDI_ASSERT(current_log_post_.NumRows() == info_.frames_per_chunk / + info_.opts.frame_subsampling_factor && + current_log_post_.NumCols() == info_.output_dim); num_chunks_computed_++; current_log_post_subsampled_offset_ = (num_chunks_computed_ - 1) * - (info_.frames_per_chunk_ / info_.opts_.frame_subsampling_factor); + (info_.frames_per_chunk / info_.opts.frame_subsampling_factor); } void DecodableNnetSimpleLooped::GetCurrentIvector(int32 input_frame, Vector *ivector) { - if (!info_.has_ivectors_) + if (!info_.has_ivectors) return; if (ivector_ != NULL) { *ivector = *ivector_; diff --git a/src/nnet3/decodable-simple-looped.h b/src/nnet3/decodable-simple-looped.h index 5aba5b10505..ca3f732641e 100644 --- a/src/nnet3/decodable-simple-looped.h +++ b/src/nnet3/decodable-simple-looped.h @@ -51,7 +51,6 @@ struct NnetSimpleLoopedComputationOptions { bool debug_computation; NnetOptimizeOptions optimize_config; NnetComputeOptions compute_config; - NnetSimpleLoopedComputationOptions(): extra_left_context_initial(0), frame_subsampling_factor(1), @@ -95,9 +94,6 @@ struct NnetSimpleLoopedComputationOptions { } }; -// forward declaration. -class DecodableNnetSimpleLooped; - /** When you instantiate class DecodableNnetSimpleLooped, you should give it @@ -110,6 +106,8 @@ class DecodableNnetSimpleLoopedInfo { DecodableNnetSimpleLoopedInfo(const NnetSimpleLoopedComputationOptions &opts, Nnet *nnet); + // This constructor takes the priors from class AmNnetSimple (so it can divide by + // them). DecodableNnetSimpleLoopedInfo(const NnetSimpleLoopedComputationOptions &opts, AmNnetSimple *nnet); @@ -118,43 +116,41 @@ class DecodableNnetSimpleLoopedInfo { const Vector &priors, Nnet *nnet); - protected: void Init(const NnetSimpleLoopedComputationOptions &opts, Nnet *nnet); - friend class DecodableNnetSimpleLooped; - + const NnetSimpleLoopedComputationOptions &opts; - const NnetSimpleLoopedComputationOptions &opts_; - const Nnet &nnet_; + const Nnet &nnet; // the log priors (or the empty vector if the priors are not set in the model) - CuVector log_priors_; + CuVector log_priors; - // frames_left_context equals the model left context plus any extra left context. - int32 frames_left_context_; + // frames_left_context equals the model left context plus the value of the + // --extra-left-context-initial option. + int32 frames_left_context; // frames_right_context is the same as the right-context of the model. - int32 frames_right_context_; + int32 frames_right_context; // The frames_per_chunk_ equals the number of input frames we need for each // chunk (except for the first chunk). This divided by // opts_.frame_subsampling_factor gives the number of output frames. - int32 frames_per_chunk_; + int32 frames_per_chunk; // The output dimension of the neural network. - int32 output_dim_; + int32 output_dim; // True if the neural net accepts iVectors. If so, the neural net will have been modified // to accept the iVectors - bool has_ivectors_; + bool has_ivectors; // The 3 computation requests that are used to create the looped // computation are stored in the class, as we need them to work out // exactly shich iVectors are needed. - ComputationRequest request1_, request2_, request3_; - + ComputationRequest request1, request2, request3; + // The compiled, 'looped' computation. - NnetComputation computation_; + NnetComputation computation; }; /* @@ -197,7 +193,7 @@ class DecodableNnetSimpleLooped { // 1). inline int32 NumFrames() const { return num_subsampled_frames_; } - inline int32 OutputDim() const { return info_.output_dim_; } + inline int32 OutputDim() const { return info_.output_dim; } // Gets the output for a particular frame, with 0 <= frame < NumFrames(). // 'output' must be correctly sized (with dimension OutputDim()). Note: diff --git a/src/nnet3/nnet-am-decodable-simple.h b/src/nnet3/nnet-am-decodable-simple.h index 6b382fbe033..1895303d125 100644 --- a/src/nnet3/nnet-am-decodable-simple.h +++ b/src/nnet3/nnet-am-decodable-simple.h @@ -328,7 +328,7 @@ class DecodableAmNnetSimpleParallel: public DecodableInterface { CachingOptimizingCompiler-- because making that thread safe would be quite complicated, and in any case multi-threaded decoding probably makes the most sense when using CPU, and - in that case won't expect the compilation phase to dominate. + in that case we don't expect the compilation phase to dominate. This constructor takes features as input, and you can either supply a single iVector input, estimated in batch-mode ('ivector'), or 'online' diff --git a/src/nnet3/nnet-compile-looped.cc b/src/nnet3/nnet-compile-looped.cc index 1237ba6ce1e..70f88615ab9 100644 --- a/src/nnet3/nnet-compile-looped.cc +++ b/src/nnet3/nnet-compile-looped.cc @@ -321,7 +321,7 @@ void CompileLooped(const Nnet &nnet, request1, request2, request3, num_requests, computation)) { KALDI_LOG << "Spent " << timer.Elapsed() - << " seconds in looped nnet3 compilation."; + << " seconds in looped compilation."; return; } else { KALDI_VLOG(2) << "Looped compilation failed with " diff --git a/src/nnet3/online-nnet3-decodable-simple.cc b/src/nnet3/online-nnet3-decodable-simple.cc deleted file mode 100644 index 010dc80991a..00000000000 --- a/src/nnet3/online-nnet3-decodable-simple.cc +++ /dev/null @@ -1,221 +0,0 @@ -// nnet3/online-nnet3-decodable.cc - -// Copyright 2014 Johns Hopkins University (author: Daniel Povey) -// 2016 Api.ai (Author: Ilya Platonov) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -#include "nnet3/nnet-utils.h" - -namespace kaldi { -namespace nnet3 { - -DecodableNnet3SimpleOnline::DecodableNnet3SimpleOnline( - const AmNnetSimple &am_nnet, - const TransitionModel &trans_model, - const DecodableNnet3OnlineOptions &opts, - OnlineFeatureInterface *input_feats): - compiler_(am_nnet.GetNnet(), opts.optimize_config), - features_(input_feats), - am_nnet_(am_nnet), - trans_model_(trans_model), - opts_(opts), - feat_dim_(input_feats->Dim()), - num_pdfs_(am_nnet.GetNnet().OutputDim("output")), - begin_frame_(-1) { - KALDI_ASSERT(opts_.max_nnet_batch_size > 0); - log_priors_ = am_nnet_.Priors(); - KALDI_ASSERT((log_priors_.Dim() == 0 || log_priors_.Dim() == trans_model_.NumPdfs()) && - "Priors in neural network must match with transition model (if exist)."); - - ComputeSimpleNnetContext(am_nnet_.GetNnet(), &left_context_, &right_context_); - log_priors_.ApplyLog(); - - // Check that the dimensions are correct. - int32 input_dim = am_nnet_.GetNnet().InputDim("input"); - int32 ivector_dim = std::max(0, am_nnet_.GetNnet().InputDim("ivector")); - // We use feature extraction code that was designed for nnet2, which just - // appends the mfcc and ivector features. So here we have to separate them - // again. This code just checks that the dimension is as we expect. - int32 feature_dim = features_->Dim(); - if (feature_dim != input_dim + ivector_dim) { - KALDI_ERR << "Dimension of features " << feature_dim << " does not equal " - << "input dim " << input_dim << " + ivector dim " << ivector_dim - << " of neural network. Likely the config and neural net " - << "mismatch."; - } -} - - - -BaseFloat DecodableNnet3SimpleOnline::LogLikelihood(int32 frame, int32 index) { - ComputeForFrame(frame); - int32 pdf_id = trans_model_.TransitionIdToPdf(index); - KALDI_ASSERT(frame >= begin_frame_ && - frame < begin_frame_ + scaled_loglikes_.NumRows()); - return scaled_loglikes_(frame - begin_frame_, pdf_id); -} - - -bool DecodableNnet3SimpleOnline::IsLastFrame(int32 frame) const { - KALDI_ASSERT(false && "Method is not imlemented"); - return false; -} - -int32 DecodableNnet3SimpleOnline::NumFramesReady() const { - int32 features_ready = features_->NumFramesReady(); - if (features_ready == 0) - return 0; - bool input_finished = features_->IsLastFrame(features_ready - 1); - if (opts_.pad_input) { - // normal case... we'll pad with duplicates of first + last frame to get the - // required left and right context. - if (input_finished) return NumSubsampledFrames(features_ready); - else return std::max(0, NumSubsampledFrames(features_ready - right_context_)); - } else { - return std::max(0, NumSubsampledFrames(features_ready - right_context_ - left_context_)); - } -} - -int32 DecodableNnet3SimpleOnline::NumSubsampledFrames(int32 num_frames) const { - return (num_frames) / opts_.frame_subsampling_factor; -} - -void DecodableNnet3SimpleOnline::ComputeForFrame(int32 subsampled_frame) { - int32 features_ready = features_->NumFramesReady(); - bool input_finished = features_->IsLastFrame(features_ready - 1); - KALDI_ASSERT(subsampled_frame >= 0); - if (subsampled_frame >= begin_frame_ && - subsampled_frame < begin_frame_ + scaled_loglikes_.NumRows()) - return; - KALDI_ASSERT(subsampled_frame < NumFramesReady()); - - int32 subsample = opts_.frame_subsampling_factor; - - int32 input_frame_begin; - if (opts_.pad_input) - input_frame_begin = subsampled_frame * subsample - left_context_; - else - input_frame_begin = subsampled_frame * subsample; - int32 max_possible_input_frame_end = features_ready; - if (input_finished && opts_.pad_input) - max_possible_input_frame_end += right_context_; - int32 input_frame_end = std::min(max_possible_input_frame_end, - input_frame_begin + - left_context_ + right_context_ + - opts_.max_nnet_batch_size); - KALDI_ASSERT(input_frame_end > input_frame_begin); - Matrix features(input_frame_end - input_frame_begin, - feat_dim_); - for (int32 t = input_frame_begin; t < input_frame_end; t++) { - SubVector row(features, t - input_frame_begin); - int32 t_modified = t; - // The next two if-statements take care of "pad_input" - if (t_modified < 0) - t_modified = 0; - if (t_modified >= features_ready) - t_modified = features_ready - 1; - features_->GetFrame(t_modified, &row); - } - - int32 num_subsampled_frames = NumSubsampledFrames(input_frame_end - input_frame_begin - - left_context_ - right_context_); - int32 mfcc_dim = am_nnet_.GetNnet().InputDim("input"); - int32 ivector_dim = am_nnet_.GetNnet().InputDim("ivector"); - // MFCCs in the left chunk - SubMatrix mfcc_mat(features.ColRange(0, mfcc_dim)); - - Vector input_ivector; - if(ivector_dim != -1){ - // iVectors in the right chunk - KALDI_ASSERT(features.NumCols() == mfcc_dim + ivector_dim && "Mismatch in features dim"); - SubMatrix ivector_mat(features.ColRange(mfcc_dim, ivector_dim)); - // Get last ivector... not sure if GetCurrentIvector is needed in the online context - // I think it should work fine just getting the last row for testing - input_ivector = ivector_mat.Row(ivector_mat.NumRows() - 1); - } - - DoNnetComputation(input_frame_begin, - mfcc_mat, input_ivector, subsampled_frame * subsample, num_subsampled_frames); - - begin_frame_ = subsampled_frame; -} - -void DecodableNnet3SimpleOnline::DoNnetComputation( - int32 input_t_start, - const MatrixBase &input_feats, - const VectorBase &ivector, - int32 output_t_start, - int32 num_subsampled_frames) { - ComputationRequest request; - request.need_model_derivative = false; - request.store_component_stats = false; - - bool shift_time = true; // shift the 'input' and 'output' to a consistent - // time, to take advantage of caching in the compiler. - // An optimization. - int32 time_offset = (shift_time ? -output_t_start : 0); - - // First add the regular features-- named "input". - request.inputs.reserve(2); - request.inputs.push_back( - IoSpecification("input", time_offset + input_t_start, - time_offset + input_t_start + input_feats.NumRows())); - if (ivector.Dim() != 0) { - std::vector indexes; - indexes.push_back(Index(0, 0, 0)); - request.inputs.push_back(IoSpecification("ivector", indexes)); - } - IoSpecification output_spec; - output_spec.name = "output"; - output_spec.has_deriv = false; - int32 subsample = opts_.frame_subsampling_factor; - output_spec.indexes.resize(num_subsampled_frames); - // leave n and x values at 0 (the constructor sets these). - for (int32 i = 0; i < num_subsampled_frames; i++) - output_spec.indexes[i].t = time_offset + output_t_start + i * subsample; - request.outputs.resize(1); - request.outputs[0].Swap(&output_spec); - - const NnetComputation *computation = compiler_.Compile(request); - Nnet *nnet_to_update = NULL; // we're not doing any update. - NnetComputer computer(opts_.compute_config, *computation, - am_nnet_.GetNnet(), nnet_to_update); - - CuMatrix input_feats_cu(input_feats); - computer.AcceptInput("input", &input_feats_cu); - CuMatrix ivector_feats_cu; - if (ivector.Dim() > 0) { - ivector_feats_cu.Resize(1, ivector.Dim()); - ivector_feats_cu.Row(0).CopyFromVec(ivector); - computer.AcceptInput("ivector", &ivector_feats_cu); - } - computer.Run(); - CuMatrix cu_output; - computer.GetOutputDestructive("output", &cu_output); - // subtract log-prior (divide by prior) - if (log_priors_.Dim() != 0) - cu_output.AddVecToRows(-1.0, log_priors_); - // apply the acoustic scale - cu_output.Scale(opts_.acoustic_scale); - scaled_loglikes_.Resize(0, 0); - // the following statement just swaps the pointers if we're not using a GPU. - cu_output.Swap(&scaled_loglikes_); -} - -} // namespace nnet3 -} // namespace kaldi diff --git a/src/nnet3/online-nnet3-decodable-simple.h b/src/nnet3/online-nnet3-decodable-simple.h deleted file mode 100644 index af7c18da64b..00000000000 --- a/src/nnet3/online-nnet3-decodable-simple.h +++ /dev/null @@ -1,153 +0,0 @@ -// nnet3/online-nnet3-decodable-simple.h - -// Copyright 2014 Johns Hopkins Universithy (author: Daniel Povey) -// 2016 Api.ai (Author: Ilya Platonov) - - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_NNET3_ONLINE_NNET3_DECODABLE_H_ -#define KALDI_NNET3_ONLINE_NNET3_DECODABLE_H_ - -#include "itf/online-feature-itf.h" -#include "itf/decodable-itf.h" -#include "nnet3/am-nnet-simple.h" -#include "nnet3/nnet-compute.h" -#include "nnet3/nnet-optimize.h" -#include "hmm/transition-model.h" - -namespace kaldi { -namespace nnet3 { - -// Note: see also nnet-compute-online.h, which provides a different -// (lower-level) interface and more efficient for progressive evaluation of an -// nnet throughout an utterance, with re-use of already-computed activations. - -struct DecodableNnet3OnlineOptions { - int32 frame_subsampling_factor; - BaseFloat acoustic_scale; - bool pad_input; - int32 max_nnet_batch_size; - NnetComputeOptions compute_config; - NnetOptimizeOptions optimize_config; - - DecodableNnet3OnlineOptions(): - frame_subsampling_factor(1), - acoustic_scale(0.1), - pad_input(true), - max_nnet_batch_size(256) { } - - void Register(OptionsItf *opts) { - opts->Register("acoustic-scale", &acoustic_scale, - "Scaling factor for acoustic likelihoods"); - opts->Register("pad-input", &pad_input, - "If true, pad acoustic features with required acoustic context " - "past edges of file."); - opts->Register("max-nnet-batch-size", &max_nnet_batch_size, - "Maximum batch size we use in neural-network decodable object, " - "in cases where we are not constrained by currently available " - "frames (this will rarely make a difference)"); - - opts->Register("frame-subsampling-factor", &frame_subsampling_factor, - "Required if the frame-rate of the output (e.g. in 'chain' " - "models) is less than the frame-rate of the original " - "alignment."); - - // register the optimization options with the prefix "optimization". - ParseOptions optimization_opts("optimization", opts); - optimize_config.Register(&optimization_opts); - - // register the compute options with the prefix "computation". - ParseOptions compute_opts("computation", opts); - compute_config.Register(&compute_opts); - - } -}; - - -/** - This Decodable object for class nnet3::AmNnetSimple takes feature input from class - OnlineFeatureInterface, unlike, say, class DecodableAmNnet which takes - feature input from a matrix. -*/ - -class DecodableNnet3SimpleOnline: public DecodableInterface { - public: - DecodableNnet3SimpleOnline(const AmNnetSimple &am_nnet, - const TransitionModel &trans_model, - const DecodableNnet3OnlineOptions &opts, - OnlineFeatureInterface *input_feats); - - - /// Returns the scaled log likelihood - virtual BaseFloat LogLikelihood(int32 frame, int32 index); - - virtual bool IsLastFrame(int32 frame) const; - - virtual int32 NumFramesReady() const; - - /// Indices are one-based! This is for compatibility with OpenFst. - virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); } - - int32 FrameSubsamplingFactor() const { return opts_.frame_subsampling_factor; } - private: - - /// If the neural-network outputs for this frame are not cached, it computes - /// them (and possibly for some succeeding frames) - void ComputeForFrame(int32 frame); - // corrects number of frames by frame_subsampling_factor; - int32 NumSubsampledFrames(int32) const; - - void DoNnetComputation( - int32 input_t_start, - const MatrixBase &input_feats, - const VectorBase &ivector, - int32 output_t_start, - int32 num_subsampled_frames); - - CachingOptimizingCompiler compiler_; - - OnlineFeatureInterface *features_; - const AmNnetSimple &am_nnet_; - const TransitionModel &trans_model_; - DecodableNnet3OnlineOptions opts_; - CuVector log_priors_; // log-priors taken from the model. - int32 feat_dim_; // dimensionality of the input features. - int32 left_context_; // Left context of the network (cached here) - int32 right_context_; // Right context of the network (cached here) - int32 num_pdfs_; // Number of pdfs, equals output-dim of the network (cached - // here) - - int32 begin_frame_; // First frame for which scaled_loglikes_ is valid - // (i.e. the first frame of the batch of frames for - // which we've computed the output). - - // scaled_loglikes_ contains the neural network pseudo-likelihoods: the log of - // (prob divided by the prior), scaled by opts.acoustic_scale). We may - // compute this using the GPU, but we transfer it back to the system memory - // when we store it here. These scores are only kept for a subset of frames, - // starting at begin_frame_, whose length depends how many frames were ready - // at the time we called LogLikelihood(), and will never exceed - // opts_.max_nnet_batch_size. - Matrix scaled_loglikes_; - - KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableNnet3SimpleOnline); -}; - -} // namespace nnet3 -} // namespace kaldi - -#endif // KALDI_NNET3_ONLINE_NNET3_DECODABLE_H_ diff --git a/src/online2/online-nnet2-decoding-threaded.cc b/src/online2/online-nnet2-decoding-threaded.cc index 09c9a4f6f0b..feb711df904 100644 --- a/src/online2/online-nnet2-decoding-threaded.cc +++ b/src/online2/online-nnet2-decoding-threaded.cc @@ -26,7 +26,7 @@ namespace kaldi { ThreadSynchronizer::ThreadSynchronizer(): - abort_(false), + abort_(false), producer_waiting_(false), consumer_waiting_(false), num_errors_(0) { @@ -67,8 +67,8 @@ bool ThreadSynchronizer::UnlockSuccess(ThreadType t) { producer_semaphore_.Signal(); producer_waiting_ = false; } - - } + + } mutex_.Unlock(); return !abort_; } @@ -192,7 +192,7 @@ void SingleUtteranceNnet2DecoderThreaded::AcceptWaveform( KALDI_ASSERT(sampling_rate == sampling_rate_); } num_samples_received_ += wave_part.Dim(); - + if (wave_part.Dim() == 0) return; if (!waveform_synchronizer_.Lock(ThreadSynchronizer::kProducer)) { KALDI_ERR << "Failure locking mutex: decoding aborted."; @@ -310,9 +310,9 @@ void SingleUtteranceNnet2DecoderThreaded::GetAdaptationState( OnlineIvectorExtractorAdaptationState *adaptation_state) { feature_pipeline_mutex_.Lock(); // If this blocks, it shouldn't be for very long. feature_pipeline_.GetAdaptationState(adaptation_state); - feature_pipeline_mutex_.Unlock(); // If this blocks, it won't be for very long. + feature_pipeline_mutex_.Unlock(); // If this blocks, it won't be for very long. } - + void SingleUtteranceNnet2DecoderThreaded::GetLattice( bool end_of_utterance, CompactLattice *clat, @@ -324,7 +324,7 @@ void SingleUtteranceNnet2DecoderThreaded::GetLattice( if (final_relative_cost != NULL) *final_relative_cost = decoder_.FinalRelativeCost(); if (decoder_.NumFramesDecoded() == 0) { - const_cast(decoder_mutex_).Unlock(); + const_cast(decoder_mutex_).Unlock(); clat->SetFinal(clat->AddState(), CompactLatticeWeight::One()); return; @@ -332,7 +332,7 @@ void SingleUtteranceNnet2DecoderThreaded::GetLattice( Lattice raw_lat; decoder_.GetRawLattice(&raw_lat, end_of_utterance); const_cast(decoder_mutex_).Unlock(); - + if (!config_.decoder_opts.determinize_lattice) KALDI_ERR << "--determinize-lattice=false option is not supported at the moment"; @@ -354,7 +354,7 @@ void SingleUtteranceNnet2DecoderThreaded::GetBestPath( best_path->DeleteStates(); best_path->SetFinal(best_path->AddState(), LatticeWeight::One()); - if (final_relative_cost != NULL) + if (final_relative_cost != NULL) *final_relative_cost = std::numeric_limits::infinity(); } else { decoder_.GetBestPath(best_path, @@ -447,7 +447,7 @@ void SingleUtteranceNnet2DecoderThreaded::ProcessLoglikes( // locked feature_pipeline_mutex_. bool SingleUtteranceNnet2DecoderThreaded::FeatureComputation( int32 num_frames_consumed) { - + int32 num_frames_ready = feature_pipeline_.NumFramesReady(), num_frames_usable = num_frames_ready - num_frames_consumed; bool features_done = feature_pipeline_.IsLastFrame(num_frames_ready - 1); @@ -457,7 +457,7 @@ bool SingleUtteranceNnet2DecoderThreaded::FeatureComputation( } else { if (num_frames_usable >= config_.nnet_batch_size) return true; // We don't need more data yet. - + // Now try to get more data, if we can. if (!waveform_synchronizer_.Lock(ThreadSynchronizer::kConsumer)) { return false; @@ -506,12 +506,12 @@ bool SingleUtteranceNnet2DecoderThreaded::FeatureComputation( bool SingleUtteranceNnet2DecoderThreaded::RunNnetEvaluationInternal() { // if any of the Lock/Unlock functions return false, it's because AbortAllThreads() // was called. - + // This object is responsible for keeping track of the context, and avoiding // re-computing things we've already computed. bool pad_input = true; nnet2::NnetOnlineComputer computer(am_nnet_.GetNnet(), pad_input); - + // we declare the following as CuVector just to enable GPU support, but // we expect this code to be run on CPU in the normal case. CuVector log_inv_prior(am_nnet_.Priors()); @@ -525,7 +525,7 @@ bool SingleUtteranceNnet2DecoderThreaded::RunNnetEvaluationInternal() { // has produced, which may be less than num_frames_consumed due to the // right-context of the network. int32 num_frames_consumed = 0, num_frames_output = 0; - + while (true) { bool last_time = false; @@ -536,19 +536,21 @@ bool SingleUtteranceNnet2DecoderThreaded::RunNnetEvaluationInternal() { return false; } // take care of silence weighting. - if (silence_weighting_.Active()) { + if (silence_weighting_.Active() && + feature_pipeline_.IvectorFeature() != NULL) { silence_weighting_mutex_.Lock(); std::vector > delta_weights; - silence_weighting_.GetDeltaWeights(feature_pipeline_.NumFramesReady(), - &delta_weights); + silence_weighting_.GetDeltaWeights( + feature_pipeline_.IvectorFeature()->NumFramesReady(), + &delta_weights); silence_weighting_mutex_.Unlock(); - feature_pipeline_.UpdateFrameWeights(delta_weights); + feature_pipeline_.IvectorFeature()->UpdateFrameWeights(delta_weights); } - + int32 num_frames_ready = feature_pipeline_.NumFramesReady(), num_frames_usable = num_frames_ready - num_frames_consumed; bool features_done = feature_pipeline_.IsLastFrame(num_frames_ready - 1); - + int32 num_frames_evaluate = std::min(num_frames_usable, config_.nnet_batch_size); @@ -563,10 +565,10 @@ bool SingleUtteranceNnet2DecoderThreaded::RunNnetEvaluationInternal() { } } /****** End locking of feature pipeline mutex. ******/ - feature_pipeline_mutex_.Unlock(); + feature_pipeline_mutex_.Unlock(); CuMatrix cu_loglikes; - + if (feats.NumRows() == 0) { if (features_done) { // flush out the last few frames. Note: this is the only place from @@ -587,7 +589,7 @@ bool SingleUtteranceNnet2DecoderThreaded::RunNnetEvaluationInternal() { num_frames_consumed += cu_feats.NumRows(); ProcessLoglikes(log_inv_prior, &cu_loglikes); } - + Matrix loglikes; loglikes.Swap(&cu_loglikes); // If we don't have a GPU (and not having a // GPU is the normal expected use-case for @@ -596,8 +598,8 @@ bool SingleUtteranceNnet2DecoderThreaded::RunNnetEvaluationInternal() { // OK, at this point we may have some newly created log-likes and we want to - // give them to the decoding thread. - + // give them to the decoding thread. + int32 num_loglike_frames = loglikes.NumRows(); if (num_loglike_frames != 0) { // if we need to output some loglikes... @@ -644,7 +646,7 @@ bool SingleUtteranceNnet2DecoderThreaded::RunNnetEvaluationInternal() { } } } - + bool SingleUtteranceNnet2DecoderThreaded::RunDecoderSearchInternal() { int32 num_frames_decoded = 0; // this is just a copy of decoder_->NumFramesDecoded(); diff --git a/src/online2/online-nnet2-feature-pipeline.cc b/src/online2/online-nnet2-feature-pipeline.cc index fe79dbfd114..510c401fba2 100644 --- a/src/online2/online-nnet2-feature-pipeline.cc +++ b/src/online2/online-nnet2-feature-pipeline.cc @@ -168,12 +168,6 @@ void OnlineNnet2FeaturePipeline::AcceptWaveform( pitch_->AcceptWaveform(sampling_rate, waveform); } -void OnlineNnet2FeaturePipeline::UpdateFrameWeights( - const std::vector > &delta_weights) { - if (ivector_feature_ != NULL) - ivector_feature_->UpdateFrameWeights(delta_weights); -} - void OnlineNnet2FeaturePipeline::InputFinished() { base_feature_->InputFinished(); if (pitch_) diff --git a/src/online2/online-nnet2-feature-pipeline.h b/src/online2/online-nnet2-feature-pipeline.h index 77746bbd634..d8f933a090d 100644 --- a/src/online2/online-nnet2-feature-pipeline.h +++ b/src/online2/online-nnet2-feature-pipeline.h @@ -52,6 +52,9 @@ namespace kaldi { /// /// Most of the logic for the actual iVector estimation is in \ref /// online-ivector-feature.h, this header contains mostly glue. +/// +/// Although the name of this header mentions nnet2, actually the code is +/// used in the online decoding with nnet3 also. /// This configuration class is to set up OnlineNnet2FeaturePipelineInfo, which @@ -74,7 +77,7 @@ struct OnlineNnet2FeaturePipelineConfig { // the following contains the type of options that you could give to // compute-and-process-kaldi-pitch-feats. std::string online_pitch_config; - + // The configuration variables in ivector_extraction_config relate to the // iVector extractor and options related to it, see type // OnlineIvectorExtractionConfig. @@ -87,7 +90,7 @@ struct OnlineNnet2FeaturePipelineConfig { OnlineNnet2FeaturePipelineConfig(): feature_type("mfcc"), add_pitch(false) { } - + void Register(OptionsItf *opts) { opts->Register("feature-type", &feature_type, @@ -125,11 +128,11 @@ struct OnlineNnet2FeaturePipelineInfo { OnlineNnet2FeaturePipelineInfo( const OnlineNnet2FeaturePipelineConfig &config); - + BaseFloat FrameShiftInSeconds() const; std::string feature_type; // "mfcc" or "plp" or "fbank" - + MfccOptions mfcc_opts; // options for MFCC computation, // if feature_type == "mfcc" PlpOptions plp_opts; // Options for PLP computation, if feature_type == "plp" @@ -153,7 +156,7 @@ struct OnlineNnet2FeaturePipelineInfo { // it's the kind of thing you might want to play with directly // on the command line instead of inside sub-config-files. OnlineSilenceWeightingConfig silence_weighting_config; - + int32 IvectorDim() { return ivector_extractor_info.extractor.IvectorDim(); } private: KALDI_DISALLOW_COPY_AND_ASSIGN(OnlineNnet2FeaturePipelineInfo); @@ -198,7 +201,7 @@ class OnlineNnet2FeaturePipeline: public OnlineFeatureInterface { /// Copy(). void SetAdaptationState( const OnlineIvectorExtractorAdaptationState &adaptation_state); - + /// Get the adaptation state; you may want to call this before destroying this /// object, to get adaptation state that can be used to improve decoding of @@ -208,7 +211,7 @@ class OnlineNnet2FeaturePipeline: public OnlineFeatureInterface { void GetAdaptationState( OnlineIvectorExtractorAdaptationState *adaptation_state) const; - + /// Accept more data to process. It won't actually process it until you call /// GetFrame() [probably indirectly via (decoder).AdvanceDecoding()], when you /// call this function it will just copy it). sampling_rate is necessary just @@ -216,12 +219,6 @@ class OnlineNnet2FeaturePipeline: public OnlineFeatureInterface { void AcceptWaveform(BaseFloat sampling_rate, const VectorBase &waveform); - /// This is used in case you are downweighting silence in the iVector - /// estimation using the decoder traceback. - void UpdateFrameWeights( - const std::vector > &delta_weights); - - BaseFloat FrameShiftInSeconds() const { return info_.FrameShiftInSeconds(); } /// If you call InputFinished(), it tells the class you won't be providing any @@ -231,13 +228,28 @@ class OnlineNnet2FeaturePipeline: public OnlineFeatureInterface { /// rescoring the lattices, this may not be much of an issue. void InputFinished(); + // This function returns the ivector-extracting part of the feature pipeline + // (or NULL if iVectors are not being used); the pointer is owned here and not + // given to the caller. This function is used in nnet3, and also in the + // silence-weighting code used to exclude silence from the iVector estimation. + OnlineIvectorFeature *IvectorFeature() { + return ivector_feature_; + } + + // This function returns the part of the feature pipeline that would be given + // as the primary (non-iVector) input to the neural network in nnet3 + // applications. + OnlineFeatureInterface *InputFeature() { + return feature_plus_optional_pitch_; + } + virtual ~OnlineNnet2FeaturePipeline(); private: const OnlineNnet2FeaturePipelineInfo &info_; OnlineBaseFeature *base_feature_; // MFCC/PLP/filterbank - + OnlinePitchFeature *pitch_; // Raw pitch, if used OnlineProcessPitch *pitch_feature_; // Processed pitch, if pitch used. @@ -245,15 +257,15 @@ class OnlineNnet2FeaturePipeline: public OnlineFeatureInterface { // feature_plus_pitch_ is the base_feature_ appended (OnlineAppendFeature) /// with pitch_feature_, if used; otherwise, points to the same address as /// base_feature_. - OnlineFeatureInterface *feature_plus_optional_pitch_; - + OnlineFeatureInterface *feature_plus_optional_pitch_; + OnlineIvectorFeature *ivector_feature_; // iVector feature, if used. // final_feature_ is feature_plus_optional_pitch_ appended // (OnlineAppendFeature) with ivector_feature_, if ivector_feature_ is used; // otherwise, points to the same address as feature_plus_optional_pitch_. OnlineFeatureInterface *final_feature_; - + // we cache the feature dimension, to save time when calling Dim(). int32 dim_; }; diff --git a/src/online2/online-nnet3-decoding.cc b/src/online2/online-nnet3-decoding.cc index 8dd366166c0..ff74c07f10c 100644 --- a/src/online2/online-nnet3-decoding.cc +++ b/src/online2/online-nnet3-decoding.cc @@ -25,16 +25,17 @@ namespace kaldi { SingleUtteranceNnet3Decoder::SingleUtteranceNnet3Decoder( - const OnlineNnet3DecodingConfig &config, - const TransitionModel &tmodel, - const nnet3::AmNnetSimple &am_model, + const LatticeFasterDecoderConfig &decoder_opts, + const TransitionModel &trans_model, + const nnet3::DecodableNnetSimpleLoopedInfo &info, const fst::Fst &fst, - OnlineFeatureInterface *feature_pipeline): - config_(config), - feature_pipeline_(feature_pipeline), - tmodel_(tmodel), - decodable_(am_model, tmodel, config.decodable_opts, feature_pipeline), - decoder_(fst, config.decoder_opts) { + OnlineNnet2FeaturePipeline *features): + decoder_opts_(decoder_opts), + input_feature_frame_shift_in_seconds_(features->FrameShiftInSeconds()), + trans_model_(trans_model), + decodable_(trans_model_, info, + features->InputFeature(), features->IvectorFeature()), + decoder_(fst, decoder_opts_) { decoder_.InitDecoding(); } @@ -57,12 +58,12 @@ void SingleUtteranceNnet3Decoder::GetLattice(bool end_of_utterance, Lattice raw_lat; decoder_.GetRawLattice(&raw_lat, end_of_utterance); - if (!config_.decoder_opts.determinize_lattice) + if (!decoder_opts_.determinize_lattice) KALDI_ERR << "--determinize-lattice=false option is not supported at the moment"; - BaseFloat lat_beam = config_.decoder_opts.lattice_beam; + BaseFloat lat_beam = decoder_opts_.lattice_beam; DeterminizeLatticePhonePrunedWrapper( - tmodel_, &raw_lat, lat_beam, clat, config_.decoder_opts.det_opts); + trans_model_, &raw_lat, lat_beam, clat, decoder_opts_.det_opts); } void SingleUtteranceNnet3Decoder::GetBestPath(bool end_of_utterance, @@ -72,12 +73,12 @@ void SingleUtteranceNnet3Decoder::GetBestPath(bool end_of_utterance, bool SingleUtteranceNnet3Decoder::EndpointDetected( const OnlineEndpointConfig &config) { - int32 subsample = decodable_.FrameSubsamplingFactor(); - return kaldi::EndpointDetected(config, tmodel_, - feature_pipeline_->FrameShiftInSeconds() * subsample, - decoder_); + BaseFloat output_frame_shift = + input_feature_frame_shift_in_seconds_ * + decodable_.FrameSubsamplingFactor(); + return kaldi::EndpointDetected(config, trans_model_, + output_frame_shift, decoder_); } } // namespace kaldi - diff --git a/src/online2/online-nnet3-decoding.h b/src/online2/online-nnet3-decoding.h index 788c713080b..1888b71dbf1 100644 --- a/src/online2/online-nnet3-decoding.h +++ b/src/online2/online-nnet3-decoding.h @@ -26,12 +26,13 @@ #include #include -#include "nnet3/online-nnet3-decodable-simple.h" +#include "nnet3/decodable-online-looped.h" #include "matrix/matrix-lib.h" #include "util/common-utils.h" #include "base/kaldi-error.h" #include "itf/online-feature-itf.h" #include "online2/online-endpoint.h" +#include "online2/online-nnet2-feature-pipeline.h" #include "decoder/lattice-faster-online-decoder.h" #include "hmm/transition-model.h" #include "hmm/posterior.h" @@ -41,40 +42,21 @@ namespace kaldi { /// @{ - - - -// This configuration class contains the configuration classes needed to create -// the class SingleUtteranceNnet3Decoder. The actual command line program -// requires other configs that it creates separately, and which are not included -// here: namely, OnlineNnet2FeaturePipelineConfig and OnlineEndpointConfig. -struct OnlineNnet3DecodingConfig { - - LatticeFasterDecoderConfig decoder_opts; - nnet3::DecodableNnet3OnlineOptions decodable_opts; - - OnlineNnet3DecodingConfig() { decodable_opts.acoustic_scale = 0.1; } - - void Register(OptionsItf *opts) { - decoder_opts.Register(opts); - decodable_opts.Register(opts); - } -}; - /** You will instantiate this class when you want to decode a single utterance using the online-decoding setup for neural nets. */ class SingleUtteranceNnet3Decoder { public: - // Constructor. The feature_pipeline_ pointer is not owned in this - // class, it's owned externally. - SingleUtteranceNnet3Decoder(const OnlineNnet3DecodingConfig &config, - const TransitionModel &tmodel, - const nnet3::AmNnetSimple &am_model, + + // Constructor. The pointer 'features' is not being given to this class to own + // and deallocate, it is owned externally. + SingleUtteranceNnet3Decoder(const LatticeFasterDecoderConfig &decoder_opts, + const TransitionModel &trans_model, + const nnet3::DecodableNnetSimpleLoopedInfo &info, const fst::Fst &fst, - OnlineFeatureInterface *feature_pipeline); - + OnlineNnet2FeaturePipeline *features); + /// advance the decoding as far as we can. void AdvanceDecoding(); @@ -84,7 +66,7 @@ class SingleUtteranceNnet3Decoder { void FinalizeDecoding(); int32 NumFramesDecoded() const; - + /// Gets the lattice. The output lattice has any acoustic scaling in it /// (which will typically be desirable in an online-decoding context); if you /// want an un-scaled lattice, scale it using ScaleLattice() with the inverse @@ -92,7 +74,7 @@ class SingleUtteranceNnet3Decoder { /// final-probs to be included. void GetLattice(bool end_of_utterance, CompactLattice *clat) const; - + /// Outputs an FST corresponding to the single best path through the current /// lattice. If "use_final_probs" is true AND we reached the final-state of /// the graph then it will include those as final-probs, else it will treat @@ -106,23 +88,27 @@ class SingleUtteranceNnet3Decoder { bool EndpointDetected(const OnlineEndpointConfig &config); const LatticeFasterOnlineDecoder &Decoder() const { return decoder_; } - + ~SingleUtteranceNnet3Decoder() { } private: - OnlineNnet3DecodingConfig config_; + const LatticeFasterDecoderConfig &decoder_opts_; + + // this is remembered from the constructor; it's ultimately + // derived from calling FrameShiftInSeconds() on the feature pipeline. + BaseFloat input_feature_frame_shift_in_seconds_; - OnlineFeatureInterface *feature_pipeline_; + // we need to keep a reference to the transition model around only because + // it's needed by the endpointing code. + const TransitionModel &trans_model_; + + nnet3::DecodableAmNnetLoopedOnline decodable_; - const TransitionModel &tmodel_; - - nnet3::DecodableNnet3SimpleOnline decodable_; - LatticeFasterOnlineDecoder decoder_; - + }; - + /// @} End of "addtogroup onlinedecoding" } // namespace kaldi diff --git a/src/online2bin/online2-wav-nnet2-latgen-faster.cc b/src/online2bin/online2-wav-nnet2-latgen-faster.cc index ad8f323aea1..08e2c64995a 100644 --- a/src/online2bin/online2-wav-nnet2-latgen-faster.cc +++ b/src/online2bin/online2-wav-nnet2-latgen-faster.cc @@ -40,10 +40,10 @@ void GetDiagnosticsAndPrintOutput(const std::string &utt, } CompactLattice best_path_clat; CompactLatticeShortestPath(clat, &best_path_clat); - + Lattice best_path_lat; ConvertLattice(best_path_clat, &best_path_lat); - + double likelihood; LatticeWeight weight; int32 num_frames; @@ -57,7 +57,7 @@ void GetDiagnosticsAndPrintOutput(const std::string &utt, KALDI_VLOG(2) << "Likelihood per frame for utterance " << utt << " is " << (likelihood / num_frames) << " over " << num_frames << " frames."; - + if (word_syms != NULL) { std::cerr << utt << ' '; for (size_t i = 0; i < words.size(); i++) { @@ -76,10 +76,10 @@ int main(int argc, char *argv[]) { try { using namespace kaldi; using namespace fst; - + typedef kaldi::int32 int32; typedef kaldi::int64 int64; - + const char *usage = "Reads in wav file(s) and simulates online decoding with neural nets\n" "(nnet2 setup), with optional iVector-based speaker adaptation and\n" @@ -92,22 +92,22 @@ int main(int argc, char *argv[]) { "you want to decode utterance by utterance.\n" "See egs/rm/s5/local/run_online_decoding_nnet2.sh for example\n" "See also online2-wav-nnet2-latgen-threaded\n"; - + ParseOptions po(usage); - + std::string word_syms_rxfilename; - + OnlineEndpointConfig endpoint_config; // feature_config includes configuration for the iVector adaptation, // as well as the basic features. - OnlineNnet2FeaturePipelineConfig feature_config; + OnlineNnet2FeaturePipelineConfig feature_config; OnlineNnet2DecodingConfig nnet2_decoding_config; BaseFloat chunk_length_secs = 0.05; bool do_endpointing = false; bool online = true; - + po.Register("chunk-length", &chunk_length_secs, "Length of chunk size in seconds, that we process. Set to <= 0 " "to use all input in one chunk."); @@ -126,24 +126,24 @@ int main(int argc, char *argv[]) { "--chunk-length=-1."); po.Register("num-threads-startup", &g_num_threads, "Number of threads used when initializing iVector extractor."); - + feature_config.Register(&po); nnet2_decoding_config.Register(&po); endpoint_config.Register(&po); - + po.Read(argc, argv); - + if (po.NumArgs() != 5) { po.PrintUsage(); return 1; } - + std::string nnet2_rxfilename = po.GetArg(1), fst_rxfilename = po.GetArg(2), spk2utt_rspecifier = po.GetArg(3), wav_rspecifier = po.GetArg(4), clat_wspecifier = po.GetArg(5); - + OnlineNnet2FeaturePipelineInfo feature_info(feature_config); if (!online) { @@ -151,7 +151,7 @@ int main(int argc, char *argv[]) { feature_info.ivector_extractor_info.greedy_ivector_extractor = true; chunk_length_secs = -1.0; } - + TransitionModel trans_model; nnet2::AmNnet nnet; { @@ -160,25 +160,25 @@ int main(int argc, char *argv[]) { trans_model.Read(ki.Stream(), binary); nnet.Read(ki.Stream(), binary); } - + fst::Fst *decode_fst = ReadFstKaldi(fst_rxfilename); - + fst::SymbolTable *word_syms = NULL; if (word_syms_rxfilename != "") if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename))) KALDI_ERR << "Could not read symbol table from file " << word_syms_rxfilename; - + int32 num_done = 0, num_err = 0; double tot_like = 0.0; int64 num_frames = 0; - + SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); RandomAccessTableReader wav_reader(wav_rspecifier); CompactLatticeWriter clat_writer(clat_wspecifier); - + OnlineTimingStats timing_stats; - + for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { std::string spk = spk2utt_reader.Key(); const std::vector &uttlist = spk2utt_reader.Value(); @@ -202,14 +202,14 @@ int main(int argc, char *argv[]) { OnlineSilenceWeighting silence_weighting( trans_model, feature_info.silence_weighting_config); - + SingleUtteranceNnet2Decoder decoder(nnet2_decoding_config, trans_model, nnet, *decode_fst, &feature_pipeline); OnlineTimer decoding_timer(utt); - + BaseFloat samp_freq = wave_data.SampFreq(); int32 chunk_length; if (chunk_length_secs > 0) { @@ -218,15 +218,15 @@ int main(int argc, char *argv[]) { } else { chunk_length = std::numeric_limits::max(); } - + int32 samp_offset = 0; std::vector > delta_weights; - + while (samp_offset < data.Dim()) { int32 samp_remaining = data.Dim() - samp_offset; int32 num_samp = chunk_length < samp_remaining ? chunk_length : samp_remaining; - + SubVector wave_part(data, samp_offset, num_samp); feature_pipeline.AcceptWaveform(samp_freq, wave_part); @@ -236,16 +236,19 @@ int main(int argc, char *argv[]) { // no more input. flush out last frames feature_pipeline.InputFinished(); } - - if (silence_weighting.Active()) { + + if (silence_weighting.Active() && + feature_pipeline.IvectorFeature() != NULL) { silence_weighting.ComputeCurrentTraceback(decoder.Decoder()); - silence_weighting.GetDeltaWeights(feature_pipeline.NumFramesReady(), - &delta_weights); - feature_pipeline.UpdateFrameWeights(delta_weights); + silence_weighting.GetDeltaWeights( + feature_pipeline.IvectorFeature()->NumFramesReady(), + &delta_weights); + feature_pipeline.IvectorFeature()->UpdateFrameWeights( + delta_weights); } - + decoder.AdvanceDecoding(); - + if (do_endpointing && decoder.EndpointDetected(endpoint_config)) break; } @@ -254,16 +257,16 @@ int main(int argc, char *argv[]) { CompactLattice clat; bool end_of_utterance = true; decoder.GetLattice(end_of_utterance, &clat); - + GetDiagnosticsAndPrintOutput(utt, word_syms, clat, &num_frames, &tot_like); - + decoding_timer.OutputStats(&timing_stats); - + // In an application you might avoid updating the adaptation state if // you felt the utterance had low confidence. See lat/confidence.h feature_pipeline.GetAdaptationState(&adaptation_state); - + // we want to output the lattice with un-scaled acoustics. BaseFloat inv_acoustic_scale = 1.0 / nnet2_decoding_config.decodable_opts.acoustic_scale; @@ -275,7 +278,7 @@ int main(int argc, char *argv[]) { } } timing_stats.Print(online); - + KALDI_LOG << "Decoded " << num_done << " utterances, " << num_err << " with errors."; KALDI_LOG << "Overall likelihood per frame was " << (tot_like / num_frames) diff --git a/src/online2bin/online2-wav-nnet3-latgen-faster.cc b/src/online2bin/online2-wav-nnet3-latgen-faster.cc index 740c9e2221b..62204460159 100644 --- a/src/online2bin/online2-wav-nnet3-latgen-faster.cc +++ b/src/online2bin/online2-wav-nnet3-latgen-faster.cc @@ -41,10 +41,10 @@ void GetDiagnosticsAndPrintOutput(const std::string &utt, } CompactLattice best_path_clat; CompactLatticeShortestPath(clat, &best_path_clat); - + Lattice best_path_lat; ConvertLattice(best_path_clat, &best_path_lat); - + double likelihood; LatticeWeight weight; int32 num_frames; @@ -58,7 +58,7 @@ void GetDiagnosticsAndPrintOutput(const std::string &utt, KALDI_VLOG(2) << "Likelihood per frame for utterance " << utt << " is " << (likelihood / num_frames) << " over " << num_frames << " frames."; - + if (word_syms != NULL) { std::cerr << utt << ' '; for (size_t i = 0; i < words.size(); i++) { @@ -77,10 +77,10 @@ int main(int argc, char *argv[]) { try { using namespace kaldi; using namespace fst; - + typedef kaldi::int32 int32; typedef kaldi::int64 int64; - + const char *usage = "Reads in wav file(s) and simulates online decoding with neural nets\n" "(nnet3 setup), with optional iVector-based speaker adaptation and\n" @@ -91,22 +91,22 @@ int main(int argc, char *argv[]) { " \n" "The spk2utt-rspecifier can just be if\n" "you want to decode utterance by utterance.\n"; - + ParseOptions po(usage); - + std::string word_syms_rxfilename; - - OnlineEndpointConfig endpoint_config; - // feature_config includes configuration for the iVector adaptation, + // feature_opts includes configuration for the iVector adaptation, // as well as the basic features. - OnlineNnet2FeaturePipelineConfig feature_config; - OnlineNnet3DecodingConfig nnet3_decoding_config; + OnlineNnet2FeaturePipelineConfig feature_opts; + nnet3::NnetSimpleLoopedComputationOptions decodable_opts; + LatticeFasterDecoderConfig decoder_opts; + OnlineEndpointConfig endpoint_opts; BaseFloat chunk_length_secs = 0.18; bool do_endpointing = false; bool online = true; - + po.Register("chunk-length", &chunk_length_secs, "Length of chunk size in seconds, that we process. Set to <= 0 " "to use all input in one chunk."); @@ -125,32 +125,34 @@ int main(int argc, char *argv[]) { "--chunk-length=-1."); po.Register("num-threads-startup", &g_num_threads, "Number of threads used when initializing iVector extractor."); - - feature_config.Register(&po); - nnet3_decoding_config.Register(&po); - endpoint_config.Register(&po); - + + feature_opts.Register(&po); + decodable_opts.Register(&po); + decoder_opts.Register(&po); + endpoint_opts.Register(&po); + + po.Read(argc, argv); - + if (po.NumArgs() != 5) { po.PrintUsage(); return 1; } - + std::string nnet3_rxfilename = po.GetArg(1), fst_rxfilename = po.GetArg(2), spk2utt_rspecifier = po.GetArg(3), wav_rspecifier = po.GetArg(4), clat_wspecifier = po.GetArg(5); - - OnlineNnet2FeaturePipelineInfo feature_info(feature_config); + + OnlineNnet2FeaturePipelineInfo feature_info(feature_opts); if (!online) { feature_info.ivector_extractor_info.use_most_recent_ivector = true; feature_info.ivector_extractor_info.greedy_ivector_extractor = true; chunk_length_secs = -1.0; } - + TransitionModel trans_model; nnet3::AmNnetSimple am_nnet; { @@ -159,25 +161,32 @@ int main(int argc, char *argv[]) { trans_model.Read(ki.Stream(), binary); am_nnet.Read(ki.Stream(), binary); } - + + // this object contains precomputed stuff that is used by all decodable + // objects. It takes a pointer to am_nnet because if it has iVectors it has + // to modify the nnet to accept iVectors at intervals. + nnet3::DecodableNnetSimpleLoopedInfo decodable_info(decodable_opts, + &am_nnet); + + fst::Fst *decode_fst = ReadFstKaldi(fst_rxfilename); - + fst::SymbolTable *word_syms = NULL; if (word_syms_rxfilename != "") if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename))) KALDI_ERR << "Could not read symbol table from file " << word_syms_rxfilename; - + int32 num_done = 0, num_err = 0; double tot_like = 0.0; int64 num_frames = 0; - + SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); RandomAccessTableReader wav_reader(wav_rspecifier); CompactLatticeWriter clat_writer(clat_wspecifier); - + OnlineTimingStats timing_stats; - + for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { std::string spk = spk2utt_reader.Key(); const std::vector &uttlist = spk2utt_reader.Value(); @@ -201,14 +210,12 @@ int main(int argc, char *argv[]) { OnlineSilenceWeighting silence_weighting( trans_model, feature_info.silence_weighting_config); - - SingleUtteranceNnet3Decoder decoder(nnet3_decoding_config, - trans_model, - am_nnet, - *decode_fst, - &feature_pipeline); + + SingleUtteranceNnet3Decoder decoder(decoder_opts, trans_model, + decodable_info, + *decode_fst, &feature_pipeline); OnlineTimer decoding_timer(utt); - + BaseFloat samp_freq = wave_data.SampFreq(); int32 chunk_length; if (chunk_length_secs > 0) { @@ -217,15 +224,15 @@ int main(int argc, char *argv[]) { } else { chunk_length = std::numeric_limits::max(); } - + int32 samp_offset = 0; std::vector > delta_weights; - + while (samp_offset < data.Dim()) { int32 samp_remaining = data.Dim() - samp_offset; int32 num_samp = chunk_length < samp_remaining ? chunk_length : samp_remaining; - + SubVector wave_part(data, samp_offset, num_samp); feature_pipeline.AcceptWaveform(samp_freq, wave_part); @@ -235,17 +242,18 @@ int main(int argc, char *argv[]) { // no more input. flush out last frames feature_pipeline.InputFinished(); } - - if (silence_weighting.Active()) { + + if (silence_weighting.Active() && + feature_pipeline.IvectorFeature() != NULL) { silence_weighting.ComputeCurrentTraceback(decoder.Decoder()); silence_weighting.GetDeltaWeights(feature_pipeline.NumFramesReady(), &delta_weights); - feature_pipeline.UpdateFrameWeights(delta_weights); + feature_pipeline.IvectorFeature()->UpdateFrameWeights(delta_weights); } - + decoder.AdvanceDecoding(); - - if (do_endpointing && decoder.EndpointDetected(endpoint_config)) + + if (do_endpointing && decoder.EndpointDetected(endpoint_opts)) break; } decoder.FinalizeDecoding(); @@ -253,19 +261,19 @@ int main(int argc, char *argv[]) { CompactLattice clat; bool end_of_utterance = true; decoder.GetLattice(end_of_utterance, &clat); - + GetDiagnosticsAndPrintOutput(utt, word_syms, clat, &num_frames, &tot_like); - + decoding_timer.OutputStats(&timing_stats); - + // In an application you might avoid updating the adaptation state if // you felt the utterance had low confidence. See lat/confidence.h feature_pipeline.GetAdaptationState(&adaptation_state); - + // we want to output the lattice with un-scaled acoustics. BaseFloat inv_acoustic_scale = - 1.0 / nnet3_decoding_config.decodable_opts.acoustic_scale; + 1.0 / decodable_opts.acoustic_scale; ScaleLattice(AcousticLatticeScale(inv_acoustic_scale), &clat); clat_writer.Write(utt, clat); @@ -274,7 +282,7 @@ int main(int argc, char *argv[]) { } } timing_stats.Print(online); - + KALDI_LOG << "Decoded " << num_done << " utterances, " << num_err << " with errors."; KALDI_LOG << "Overall likelihood per frame was " << (tot_like / num_frames) From bd73932fba6523c6b31f9ba99ddadf84cf576625 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Fri, 10 Feb 2017 13:52:02 -0500 Subject: [PATCH 419/530] [src] Get rid of clang 3.9 warnings in table-reading code (#1414) --- src/util/kaldi-table-inl.h | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/src/util/kaldi-table-inl.h b/src/util/kaldi-table-inl.h index 5359d730b85..32d12dfa7d7 100644 --- a/src/util/kaldi-table-inl.h +++ b/src/util/kaldi-table-inl.h @@ -2093,15 +2093,13 @@ class RandomAccessTableReaderDSortedArchiveImpl: return FindKeyInternal(key); } virtual const T & Value(const std::string &key) { - if (FindKeyInternal(key)) { - KALDI_ASSERT(this->state_ == kHaveObject && key == this->cur_key_ - && holder_ != NULL); - return this->holder_->Value(); - } else { + if (!FindKeyInternal(key)) { KALDI_ERR << "Value() called but no such key " << key << " in archive " << PrintableRxfilename(archive_rxfilename_); - return *(const T*)NULL; // keep compiler happy. } + KALDI_ASSERT(this->state_ == kHaveObject && key == this->cur_key_ + && holder_ != NULL); + return this->holder_->Value(); } virtual ~RandomAccessTableReaderDSortedArchiveImpl() { @@ -2230,20 +2228,18 @@ class RandomAccessTableReaderSortedArchiveImpl: virtual const T & Value(const std::string &key) { HandlePendingDelete(); size_t index; - if (FindKeyInternal(key, &index)) { - if (seen_pairs_[index].second == NULL) { // can happen if opts.once_ - KALDI_ERR << "Error: Value() called more than once for key " - << key << " and once (o) option specified: rspecifier is " - << rspecifier_; - } - if (opts_.once) - pending_delete_ = index; // mark this index to be deleted on next call. - return seen_pairs_[index].second->Value(); - } else { + if (!FindKeyInternal(key, &index)) { KALDI_ERR << "Value() called but no such key " << key << " in archive " << PrintableRxfilename(archive_rxfilename_); - return *(const T*)NULL; // keep compiler happy. } + if (seen_pairs_[index].second == NULL) { // can happen if opts.once_ + KALDI_ERR << "Error: Value() called more than once for key " + << key << " and once (o) option specified: rspecifier is " + << rspecifier_; + } + if (opts_.once) + pending_delete_ = index; // mark this index to be deleted on next call. + return seen_pairs_[index].second->Value(); } virtual ~RandomAccessTableReaderSortedArchiveImpl() { if (this->IsOpen()) @@ -2418,12 +2414,10 @@ class RandomAccessTableReaderUnsortedArchiveImpl: virtual const T & Value(const std::string &key) { HandlePendingDelete(); const T *ans_ptr = NULL; - if (FindKeyInternal(key, &ans_ptr)) - return *ans_ptr; - else + if (!FindKeyInternal(key, &ans_ptr)) KALDI_ERR << "Value() called but no such key " << key << " in archive " << PrintableRxfilename(archive_rxfilename_); - return *(const T*)NULL; // keep compiler happy. + return *ans_ptr; } virtual ~RandomAccessTableReaderUnsortedArchiveImpl() { if (this->IsOpen()) From cc1d677f9cbad14e937c5e30951001dddb771b33 Mon Sep 17 00:00:00 2001 From: Nick Date: Fri, 10 Feb 2017 14:04:53 -0800 Subject: [PATCH 420/530] [egs] egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh, change default stage to 0 (#1416) --- egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh b/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh index 158a5148fb5..d9ca900ac63 100755 --- a/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh +++ b/egs/fisher_swbd/s5/local/online/run_nnet2_ms.sh @@ -3,7 +3,7 @@ . cmd.sh -stage=6 +stage=0 train_stage=451 use_gpu=true rescore=true From 0f8905a181f031aab88d889435744de1a70a6934 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Fri, 10 Feb 2017 17:07:34 -0500 Subject: [PATCH 421/530] [scripts] Checking ivector extractor id, handle cases when ivector ids do not exist (#1417) --- egs/wsj/s5/steps/libs/common.py | 5 +++++ egs/wsj/s5/steps/nnet2/get_ivector_id.sh | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/libs/common.py b/egs/wsj/s5/steps/libs/common.py index 66a02062e9c..9d01fae3027 100644 --- a/egs/wsj/s5/steps/libs/common.py +++ b/egs/wsj/s5/steps/libs/common.py @@ -299,6 +299,10 @@ def get_ivector_extractor_id(ivector_dir=None): return None [stdout_val, stderr_val] = run_kaldi_command( "steps/nnet2/get_ivector_id.sh {dir}".format(dir=ivector_dir)) + + if (stdout_val.strip() == "") or (stdout_val is None): + return None + return stdout_val.strip() def get_feat_dim(feat_dir): @@ -409,3 +413,4 @@ def write_idct_matrix(feat_dim, cepstral_lifter, file_path): for k in range(0, feat_dim): idct_matrix[k].append(0) write_kaldi_matrix(file_path, idct_matrix) + diff --git a/egs/wsj/s5/steps/nnet2/get_ivector_id.sh b/egs/wsj/s5/steps/nnet2/get_ivector_id.sh index d7be853349d..3ec70757d5a 100755 --- a/egs/wsj/s5/steps/nnet2/get_ivector_id.sh +++ b/egs/wsj/s5/steps/nnet2/get_ivector_id.sh @@ -33,7 +33,7 @@ elif [ -f $ivecdir/final.ie ] ; then echo "$id" > $ivecdir/final.ie.id || exit 1 cat $ivecdir/final.ie.id else - exit 1 + exit 0 fi exit 0 From 92262025540b073a0acbecc8eedc098783a01360 Mon Sep 17 00:00:00 2001 From: schemreier Date: Sat, 11 Feb 2017 18:36:21 +0100 Subject: [PATCH 422/530] [egs] Add example scripts for Frisian-Dutch language (FAME! corpus) --- egs/fame/README.txt | 15 +++ egs/fame/s5/RESULTS | 28 ++++++ egs/fame/s5/cmd.sh | 1 + egs/fame/s5/conf/decode_dnn.config | 2 + egs/fame/s5/conf/fbank.conf | 2 + egs/fame/s5/conf/mfcc.conf | 1 + egs/fame/s5/conf/mfcc_hires.conf | 10 ++ egs/fame/s5/conf/online_cmvn.conf | 1 + egs/fame/s5/local/fame_data_prep.sh | 53 ++++++++++ egs/fame/s5/local/fame_dict_prep.sh | 36 +++++++ egs/fame/s5/local/nnet/run_dnn.sh | 120 ++++++++++++++++++++++ egs/fame/s5/local/nnet/run_dnn_fbank.sh | 125 +++++++++++++++++++++++ egs/fame/s5/local/score.sh | 1 + egs/fame/s5/local/wer_hyp_filter | 2 + egs/fame/s5/local/wer_output_filter | 2 + egs/fame/s5/local/wer_ref_filter | 2 + egs/fame/s5/path.sh | 6 ++ egs/fame/s5/run.sh | 127 ++++++++++++++++++++++++ egs/fame/s5/steps | 1 + egs/fame/s5/utils | 1 + 20 files changed, 536 insertions(+) create mode 100644 egs/fame/README.txt create mode 100644 egs/fame/s5/RESULTS create mode 120000 egs/fame/s5/cmd.sh create mode 100644 egs/fame/s5/conf/decode_dnn.config create mode 100644 egs/fame/s5/conf/fbank.conf create mode 100644 egs/fame/s5/conf/mfcc.conf create mode 100644 egs/fame/s5/conf/mfcc_hires.conf create mode 100644 egs/fame/s5/conf/online_cmvn.conf create mode 100755 egs/fame/s5/local/fame_data_prep.sh create mode 100755 egs/fame/s5/local/fame_dict_prep.sh create mode 100755 egs/fame/s5/local/nnet/run_dnn.sh create mode 100755 egs/fame/s5/local/nnet/run_dnn_fbank.sh create mode 120000 egs/fame/s5/local/score.sh create mode 100755 egs/fame/s5/local/wer_hyp_filter create mode 100755 egs/fame/s5/local/wer_output_filter create mode 100755 egs/fame/s5/local/wer_ref_filter create mode 100755 egs/fame/s5/path.sh create mode 100755 egs/fame/s5/run.sh create mode 120000 egs/fame/s5/steps create mode 120000 egs/fame/s5/utils diff --git a/egs/fame/README.txt b/egs/fame/README.txt new file mode 100644 index 00000000000..d2ed39eef75 --- /dev/null +++ b/egs/fame/README.txt @@ -0,0 +1,15 @@ +The FAME! Speech Corpus + +The components of the Frisian data collection are speech and language resources gathered for building a large vocabulary ASR system for the Frisian language. Firstly, a new broadcast database is created by collecting recordings from the archives of the regional broadcaster Omrop Fryslân, and annotating them with various information such as the language switches and speaker details. The second component of this collection is a language model created on a text corpus with diverse vocabulary. Thirdly, a Frisian phonetic dictionary with the mappings between the Frisian words and phones is built to make the ASR viable for this under-resourced language. Finally, an ASR recipe is provided which uses all previous resources to perform recognition and present the recognition performances. + +The Corpus consists of short utterances extracted from 203 audio segments of approximately 5 minutes long which are parts of various radio programs covering a time span of almost 50 years (1966-2015), adding a longitudinal dimension to the database. The content of the recordings are very diverse including radio programs about culture, history, literature, sports, nature, agriculture, politics, society and languages. The total duration of the manually annotated radio broadcasts sums up to 18 hours, 33 minutes and 57 seconds. The stereo audio data has a sampling frequency of 48 kHz and 16-bit resolution per sample. The available meta-information helped the annotators to identify these speakers and mark them either using their names or the same label (if the name is not known). There are 309 identified speakers in the FAME! Speech Corpus, 21 of whom appear at least 3 times in the database. These speakers are mostly program presenters and celebrities appearing multiple times in different recordings over years. There are 233 unidentified speakers due to lack of meta-information. The total number of word- and sentence-level code-switching cases in the FAME! Speech Corpus is equal to 3837. Music portions have been removed, except where these overlap with speech. + +A full description of the FAME! Speech Corpus is provided in: + +Yilmaz, E., Heuvel, H. van den, Van de Velde, H., Kampstra, F., Algra, J., Leeuwen, D. van: + +Open Source Speech and Language Resources for Frisian Language. + +In: Proceedings Interspeech 2016, pp. 1536--1540, 8-12 September 2016, San Francisco + +Please check http://www.ru.nl/clst/datasets/ to get the FAME! Speech Corpus diff --git a/egs/fame/s5/RESULTS b/egs/fame/s5/RESULTS new file mode 100644 index 00000000000..a8541fba6b5 --- /dev/null +++ b/egs/fame/s5/RESULTS @@ -0,0 +1,28 @@ +%WER 41.10 [ 4974 / 12101, 522 ins, 1223 del, 3229 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_devel/wer_11_0.0 +%WER 38.10 [ 4909 / 12886, 527 ins, 1220 del, 3162 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_test/wer_11_0.0 +%WER 41.06 [ 4969 / 12101, 514 ins, 1277 del, 3178 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0 +%WER 40.38 [ 4886 / 12101, 515 ins, 1225 del, 3146 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.0 +%WER 40.15 [ 4859 / 12101, 514 ins, 1177 del, 3168 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_10_0.5 +%WER 37.86 [ 4879 / 12886, 596 ins, 1083 del, 3200 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it1/wer_10_0.0 +%WER 37.16 [ 4789 / 12886, 592 ins, 1056 del, 3141 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it3/wer_10_0.0 +%WER 36.92 [ 4757 / 12886, 618 ins, 1010 del, 3129 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it6/wer_10_0.0 +%WER 42.38 [ 5129 / 12101, 576 ins, 1171 del, 3382 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_devel/wer_11_0.0 +%WER 39.14 [ 5043 / 12886, 536 ins, 1172 del, 3335 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_test/wer_11_0.0 +%WER 42.05 [ 5088 / 12101, 525 ins, 1282 del, 3281 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0 +%WER 41.41 [ 5011 / 12101, 461 ins, 1345 del, 3205 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.5 +%WER 40.97 [ 4958 / 12101, 485 ins, 1279 del, 3194 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_11_0.5 +%WER 38.79 [ 4998 / 12886, 512 ins, 1194 del, 3292 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it1/wer_11_0.0 +%WER 38.16 [ 4917 / 12886, 544 ins, 1128 del, 3245 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it3/wer_11_0.0 +%WER 37.68 [ 4856 / 12886, 564 ins, 1068 del, 3224 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it6/wer_11_0.0 +%WER 70.85 [ 8574 / 12101, 414 ins, 2596 del, 5564 sub ] exp/mono/decode_devel/wer_9_0.0 +%WER 68.17 [ 8785 / 12886, 413 ins, 2704 del, 5668 sub ] exp/mono/decode_test/wer_9_0.0 +%WER 44.05 [ 5330 / 12101, 560 ins, 1467 del, 3303 sub ] exp/sgmm2/decode_devel/wer_10_0.0 +%WER 40.22 [ 5183 / 12886, 680 ins, 1142 del, 3361 sub ] exp/sgmm2/decode_test/wer_9_0.0 +%WER 54.39 [ 6582 / 12101, 695 ins, 1595 del, 4292 sub ] exp/tri1/decode_devel/wer_10_0.0 +%WER 51.60 [ 6649 / 12886, 630 ins, 1706 del, 4313 sub ] exp/tri1/decode_test/wer_11_0.0 +%WER 51.53 [ 6236 / 12101, 659 ins, 1675 del, 3902 sub ] exp/tri2/decode_devel/wer_11_0.0 +%WER 48.32 [ 6226 / 12886, 643 ins, 1669 del, 3914 sub ] exp/tri2/decode_test/wer_12_0.0 +%WER 47.15 [ 5706 / 12101, 580 ins, 1537 del, 3589 sub ] exp/tri3/decode_devel/wer_13_0.0 +%WER 52.13 [ 6308 / 12101, 623 ins, 1706 del, 3979 sub ] exp/tri3/decode_devel.si/wer_11_0.5 +%WER 43.71 [ 5632 / 12886, 594 ins, 1538 del, 3500 sub ] exp/tri3/decode_test/wer_14_0.0 +%WER 48.21 [ 6212 / 12886, 825 ins, 1358 del, 4029 sub ] exp/tri3/decode_test.si/wer_10_0.0 diff --git a/egs/fame/s5/cmd.sh b/egs/fame/s5/cmd.sh new file mode 120000 index 00000000000..19f7e836644 --- /dev/null +++ b/egs/fame/s5/cmd.sh @@ -0,0 +1 @@ +../../wsj/s5/cmd.sh \ No newline at end of file diff --git a/egs/fame/s5/conf/decode_dnn.config b/egs/fame/s5/conf/decode_dnn.config new file mode 100644 index 00000000000..89dd9929a62 --- /dev/null +++ b/egs/fame/s5/conf/decode_dnn.config @@ -0,0 +1,2 @@ +beam=18.0 # beam for decoding. Was 13.0 in the scripts. +lattice_beam=10.0 # this has most effect on size of the lattices. diff --git a/egs/fame/s5/conf/fbank.conf b/egs/fame/s5/conf/fbank.conf new file mode 100644 index 00000000000..c4b73674cab --- /dev/null +++ b/egs/fame/s5/conf/fbank.conf @@ -0,0 +1,2 @@ +# No non-default options for now. + diff --git a/egs/fame/s5/conf/mfcc.conf b/egs/fame/s5/conf/mfcc.conf new file mode 100644 index 00000000000..7361509099f --- /dev/null +++ b/egs/fame/s5/conf/mfcc.conf @@ -0,0 +1 @@ +--use-energy=false # only non-default option. diff --git a/egs/fame/s5/conf/mfcc_hires.conf b/egs/fame/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..434834a6725 --- /dev/null +++ b/egs/fame/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/fame/s5/conf/online_cmvn.conf b/egs/fame/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..cbdaf5f281c --- /dev/null +++ b/egs/fame/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh diff --git a/egs/fame/s5/local/fame_data_prep.sh b/egs/fame/s5/local/fame_data_prep.sh new file mode 100755 index 00000000000..2c2d1e79238 --- /dev/null +++ b/egs/fame/s5/local/fame_data_prep.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) + +# Apache 2.0 + +corpus=$1 +set -e -o pipefail +if [ -z "$corpus" ] ; then + echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech database" + exit 1 +fi +if [ ! -d "$corpus" ] ; then + echo >&2 "The directory $corpus does not exist" +fi + +echo "Preparing train, development and test data" +mkdir -p data data/local data/train data/devel data/test + +for x in train devel test; do + echo "Copy spk2utt, utt2spk, wav.scp, text for $x" + cp $corpus/data/$x/text data/$x/text || exit 1; + cp $corpus/data/$x/spk2utt data/$x/spk2utt || exit 1; + cp $corpus/data/$x/utt2spk data/$x/utt2spk || exit 1; + + # the corpus wav.scp contains physical paths, so we just re-generate + # the file again from scratchn instead of figuring out how to edit it + for rec in $(awk '{print $1}' $corpus/data/$x/text) ; do + spk=${rec%_*} + filename=$corpus/fame/wav/${x}/${rec:8}.wav + if [ ! -f "$filename" ] ; then + echo >&2 "The file $filename could not be found ($rec)" + exit 1 + fi + # we might want to store physical paths as a general rule + filename=$(readlink -f $filename) + echo "$rec $filename" + done > data/$x/wav.scp + + # fix_data_dir.sh fixes common mistakes (unsorted entries in wav.scp, + # duplicate entries and so on). Also, it regenerates the spk2utt from + # utt2sp + utils/fix_data_dir.sh data/$x +done + +echo "Copying language model" +if [ -f $corpus/lm/LM_FR_IKN3G ] ; then + gzip -c $corpus/lm/LM_FR_IKN3G > data/local/LM.gz +fi + +echo "Data preparation completed." + diff --git a/egs/fame/s5/local/fame_dict_prep.sh b/egs/fame/s5/local/fame_dict_prep.sh new file mode 100755 index 00000000000..c6530217a67 --- /dev/null +++ b/egs/fame/s5/local/fame_dict_prep.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) + +# Apache 2.0 + +corpus=$1 +if [ -z "$corpus" ] ; then + echo >&2 "The script $0 expects one parameter -- the location of the Iban corpus" + exit 1 +fi +if [ ! -d "$corpus" ] ; then + echo >&2 "The directory $corpus does not exist" +fi + +mkdir -p data/lang data/local/dict + + +cat $corpus/lexicon/lex.asr $corpus/lexicon/lex.oov > data/local/dict/lexicon.txt +echo "!SIL SIL" >> data/local/dict/lexicon.txt +echo " SPN" >> data/local/dict/lexicon.txt +env LC_ALL=C sort -u -o data/local/dict/lexicon.txt data/local/dict/lexicon.txt +cat data/local/dict/lexicon.txt | \ + perl -ane 'print join("\n", @F[1..$#F]) . "\n"; ' | \ + sort -u | grep -v 'SIL' > data/local/dict/nonsilence_phones.txt + + +touch data/local/dict/extra_questions.txt +touch data/local/dict/optional_silence.txt + +echo "SIL" > data/local/dict/optional_silence.txt +echo "SIL" > data/local/dict/silence_phones.txt +echo "" > data/local/dict/oov.txt + +echo "Dictionary preparation succeeded" diff --git a/egs/fame/s5/local/nnet/run_dnn.sh b/egs/fame/s5/local/nnet/run_dnn.sh new file mode 100755 index 00000000000..ca1efa5e0ac --- /dev/null +++ b/egs/fame/s5/local/nnet/run_dnn.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) +# Apache 2.0 + +# This example script trains a DNN on top of fMLLR features. +# The training is done in 3 stages, +# +# 1) RBM pre-training: +# in this unsupervised stage we train stack of RBMs, +# a good starting point for frame cross-entropy trainig. +# 2) frame cross-entropy training: +# the objective is to classify frames to correct pdfs. +# 3) sequence-training optimizing sMBR: +# the objective is to emphasize state-sequences with better +# frame accuracy w.r.t. reference alignment. + +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. + +. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. + ## This relates to the queue. + +. ./path.sh ## Source the tools/utils (import the queue.pl) + +set -eu + +# Config: +gmm=exp/tri3 +data_fmllr=data-fmllr-tri3 +stage=0 # resume training with --stage=N +# End of config. +. utils/parse_options.sh +# + +[ ! -e $data_fmllr/test ] && if [ $stage -le 0 ]; then + # Store fMLLR features, so we can train on them easily, + # devel + dir=$data_fmllr/devel + steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ + --transform-dir $gmm/decode_devel \ + $dir data/devel $gmm $dir/log $dir/data + # test + dir=$data_fmllr/test + steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ + --transform-dir $gmm/decode_test \ + $dir data/test $gmm $dir/log $dir/data + # train + dir=$data_fmllr/train + steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ + --transform-dir ${gmm}_ali \ + $dir data/train $gmm $dir/log $dir/data + # split the data : 90% train 10% cross-validation (held-out) + utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10 +fi + +if [ $stage -le 1 ]; then + # Pre-train DBN, i.e. a stack of RBMs (small database, smaller DNN) + dir=exp/dnn4b_pretrain-dbn + $cuda_cmd $dir/log/pretrain_dbn.log \ + steps/nnet/pretrain_dbn.sh --hid-dim 2048 --rbm-iter 10 $data_fmllr/train $dir +fi + +if [ $stage -le 2 ]; then + # Train the DNN optimizing per-frame cross-entropy. + dir=exp/dnn4b_pretrain-dbn_dnn + ali=${gmm}_ali + feature_transform=exp/dnn4b_pretrain-dbn/final.feature_transform + dbn=exp/dnn4b_pretrain-dbn/6.dbn + # Train + $cuda_cmd $dir/log/train_nnet.log \ + steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ + $data_fmllr/train_tr90 $data_fmllr/train_cv10 data/lang $ali $ali $dir + # Decode (reuse HCLG graph) + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $data_fmllr/devel $dir/decode_devel + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $data_fmllr/test $dir/decode_test +fi + + +# Sequence training using sMBR criterion, we do Stochastic-GD with per-utterance updates. +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. +dir=exp/dnn4b_pretrain-dbn_dnn_smbr +srcdir=exp/dnn4b_pretrain-dbn_dnn +acwt=0.1 + +if [ $stage -le 3 ]; then + # First we generate lattices and alignments: + steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \ + $data_fmllr/train data/lang $srcdir ${srcdir}_ali + steps/nnet/make_denlats.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \ + $data_fmllr/train data/lang $srcdir ${srcdir}_denlats +fi + +if [ $stage -le 4 ]; then + # Re-train the DNN by 6 iterations of sMBR + steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 6 --acwt $acwt --do-smbr true \ + $data_fmllr/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir + # Decode + for ITER in 6 3 1; do + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $data_fmllr/devel $dir/decode_devel_it${ITER} + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $data_fmllr/test $dir/decode_test_it${ITER} + done +fi + +echo Success +exit 0 + +# Getting results [see RESULTS file] +# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done + +# to see how model conversion to nnet2 works, run run_dnn_convert_nnet2.sh at this point. + diff --git a/egs/fame/s5/local/nnet/run_dnn_fbank.sh b/egs/fame/s5/local/nnet/run_dnn_fbank.sh new file mode 100755 index 00000000000..a81449ffbcf --- /dev/null +++ b/egs/fame/s5/local/nnet/run_dnn_fbank.sh @@ -0,0 +1,125 @@ +#!/bin/bash + +# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) +# Apache 2.0 + +# This example script trains a DNN on top of FBANK features. +# The training is done in 3 stages, +# +# 1) RBM pre-training: +# in this unsupervised stage we train stack of RBMs, +# a good starting point for frame cross-entropy trainig. +# 2) frame cross-entropy training: +# the objective is to classify frames to correct pdfs. +# 3) sequence-training optimizing sMBR: +# the objective is to emphasize state-sequences with better +# frame accuracy w.r.t. reference alignment. + +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. + +. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. + ## This relates to the queue. + +. ./path.sh ## Source the tools/utils (import the queue.pl) + +dev=data-fbank/devel +tst=data-fbank/test +train=data-fbank/train + +dev_original=data/devel +tst_original=data/test +train_original=data/train + +gmm=exp/tri3 + +stage=0 +. utils/parse_options.sh || exit 1; + +set -eu + +# Make the FBANK features +[ ! -e $dev ] && if [ $stage -le 0 ]; then + # Dev set + utils/copy_data_dir.sh $dev_original $dev || exit 1; rm $dev/{cmvn,feats}.scp + steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ + $dev $dev/log $dev/data || exit 1; + steps/compute_cmvn_stats.sh $dev $dev/log $dev/data || exit 1; + # Test set + utils/copy_data_dir.sh $tst_original $tst || exit 1; rm $tst/{cmvn,feats}.scp + steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ + $tst $tst/log $tst/data || exit 1; + steps/compute_cmvn_stats.sh $tst $tst/log $tst/data || exit 1; + # Training set + utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp + steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ + $train $train/log $train/data || exit 1; + steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1; + # Split the training set + utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train ${train}_tr90 ${train}_cv10 +fi + +if [ $stage -le 1 ]; then + # Pre-train DBN, i.e. a stack of RBMs (small database, smaller DNN) + dir=exp/dnn4d-fbank_pretrain-dbn + $cuda_cmd $dir/log/pretrain_dbn.log \ + steps/nnet/pretrain_dbn.sh \ + --cmvn-opts "--norm-means=true --norm-vars=true" \ + --delta-opts "--delta-order=2" --splice 5 \ + --hid-dim 2048 --rbm-iter 10 $train $dir || exit 1; +fi + +if [ $stage -le 2 ]; then + # Train the DNN optimizing per-frame cross-entropy. + dir=exp/dnn4d-fbank_pretrain-dbn_dnn + ali=${gmm}_ali + feature_transform=exp/dnn4d-fbank_pretrain-dbn/final.feature_transform + dbn=exp/dnn4d-fbank_pretrain-dbn/6.dbn + # Train + $cuda_cmd $dir/log/train_nnet.log \ + steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ + ${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1; + # Decode (reuse HCLG graph) + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $dev $dir/decode_devel || exit 1; + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $tst $dir/decode_test || exit 1; +fi + + +# Sequence training using sMBR criterion, we do Stochastic-GD with per-utterance updates. +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. +dir=exp/dnn4d-fbank_pretrain-dbn_dnn_smbr +srcdir=exp/dnn4d-fbank_pretrain-dbn_dnn +acwt=0.1 + +if [ $stage -le 3 ]; then + # First we generate lattices and alignments: + steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \ + $train data/lang $srcdir ${srcdir}_ali || exit 1; + steps/nnet/make_denlats.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \ + $train data/lang $srcdir ${srcdir}_denlats || exit 1; +fi + +if [ $stage -le 4 ]; then + # Re-train the DNN by 6 iterations of sMBR + steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 6 --acwt $acwt --do-smbr true \ + $train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1 + # Decode + for ITER in 6 3 1; do + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $dev $dir/decode_devel_it${ITER} || exit 1 + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $tst $dir/decode_test_it${ITER} || exit 1 + done +fi + +echo Success +exit 0 + +# Getting results [see RESULTS file] +# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done diff --git a/egs/fame/s5/local/score.sh b/egs/fame/s5/local/score.sh new file mode 120000 index 00000000000..0afefc3158c --- /dev/null +++ b/egs/fame/s5/local/score.sh @@ -0,0 +1 @@ +../steps/score_kaldi.sh \ No newline at end of file diff --git a/egs/fame/s5/local/wer_hyp_filter b/egs/fame/s5/local/wer_hyp_filter new file mode 100755 index 00000000000..372d1a9c73a --- /dev/null +++ b/egs/fame/s5/local/wer_hyp_filter @@ -0,0 +1,2 @@ +#!/bin/sed -f +s:::g diff --git a/egs/fame/s5/local/wer_output_filter b/egs/fame/s5/local/wer_output_filter new file mode 100755 index 00000000000..372d1a9c73a --- /dev/null +++ b/egs/fame/s5/local/wer_output_filter @@ -0,0 +1,2 @@ +#!/bin/sed -f +s:::g diff --git a/egs/fame/s5/local/wer_ref_filter b/egs/fame/s5/local/wer_ref_filter new file mode 100755 index 00000000000..372d1a9c73a --- /dev/null +++ b/egs/fame/s5/local/wer_ref_filter @@ -0,0 +1,2 @@ +#!/bin/sed -f +s:::g diff --git a/egs/fame/s5/path.sh b/egs/fame/s5/path.sh new file mode 100755 index 00000000000..2d17b17a84a --- /dev/null +++ b/egs/fame/s5/path.sh @@ -0,0 +1,6 @@ +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/fame/s5/run.sh b/egs/fame/s5/run.sh new file mode 100755 index 00000000000..26a8485ff7d --- /dev/null +++ b/egs/fame/s5/run.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +stage=0 +feat_nj=10 +train_nj=10 +decode_nj=10 +famecorpus=./corpus + +if [ -d $famecorpus ] ; then + echo "Fame corpus present. OK." +elif [ -f ./fame.tar.gz ] ; then + echo "Unpacking..." + tar xzf fame.tar.gz +elif [ ! -d $famecorpus ] && [ ! -f ./fame.tar.gz ] ; then + echo "The Fame! corpus is not present. Please register here: http://www.ru.nl/clst/datasets/ " + echo " and download the corpus and put it at $famecorpus" && exit 1 +fi + +numLeavesTri1=5000 +numGaussTri1=25000 +numLeavesMLLT=5000 +numGaussMLLT=25000 +numLeavesSAT=5000 +numGaussSAT=25000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=20000 + +if [ $stage -le 1 ]; then + local/fame_data_prep.sh $famecorpus || exit 1; + local/fame_dict_prep.sh $famecorpus || exit 1; + utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang || exit 1; + utils/format_lm.sh data/lang data/local/LM.gz data/local/dict/lexicon.txt data/lang_test || exit 1; +fi + +if [ $stage -le 2 ]; then + # Feature extraction + for x in train devel test; do + steps/make_mfcc.sh --nj $feat_nj --cmd "$train_cmd" data/$x exp/make_mfcc/$x mfcc || exit 1; + steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x mfcc || exit 1; + done +fi + +if [ $stage -le 3 ]; then + ### Monophone + echo "Starting monophone training." + steps/train_mono.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/mono || exit 1; + echo "Mono training done." + + echo "Decoding the development and test sets using monophone models." + utils/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/mono/graph data/devel exp/mono/decode_devel || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/mono/graph data/test exp/mono/decode_test || exit 1; + echo "Monophone decoding done." +fi + + +if [ $stage -le 4 ]; then + ### Triphone + echo "Starting triphone training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/mono exp/mono_ali || exit 1; + steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" $numLeavesTri1 $numGaussTri1 data/train data/lang exp/mono_ali exp/tri1 || exit 1; + echo "Triphone training done." + + echo "Decoding the development and test sets using triphone models." + utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri1/graph data/devel exp/tri1/decode_devel || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri1/graph data/test exp/tri1/decode_test || exit 1; + echo "Triphone decoding done." +fi + +if [ $stage -le 5 ]; then + ### Triphone + LDA and MLLT + echo "Starting LDA+MLLT training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/tri1 exp/tri1_ali || exit 1; + steps/train_lda_mllt.sh --cmd "$train_cmd" --splice-opts "--left-context=3 --right-context=3" $numLeavesMLLT $numGaussMLLT data/train data/lang exp/tri1_ali exp/tri2 || exit 1; + echo "LDA+MLLT training done." + + echo "Decoding the development and test sets using LDA+MLLT models." + utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri2/graph data/devel exp/tri2/decode_devel || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2/decode_test || exit 1; + echo "LDA+MLLT decoding done." +fi + + +if [ $stage -le 6 ]; then + ### Triphone + LDA and MLLT + SAT and FMLLR + echo "Starting SAT+FMLLR training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" --use-graphs true data/train data/lang exp/tri2 exp/tri2_ali || exit 1; + steps/train_sat.sh --cmd "$train_cmd" $numLeavesSAT $numGaussSAT data/train data/lang exp/tri2_ali exp/tri3 || exit 1; + echo "SAT+FMLLR training done." + + echo "Decoding the development and test sets using SAT+FMLLR models." + utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph || exit 1; + steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri3/graph data/devel exp/tri3/decode_devel || exit 1; + steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri3/graph data/test exp/tri3/decode_test || exit 1; + echo "SAT+FMLLR decoding done." +fi + + +if [ $stage -le 7 ]; then + echo "Starting SGMM training." + steps/align_fmllr.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/tri3 exp/tri3_ali || exit 1; + steps/train_ubm.sh --cmd "$train_cmd" $numGaussUBM data/train data/lang exp/tri3_ali exp/ubm || exit 1; + steps/train_sgmm2.sh --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM data/train data/lang exp/tri3_ali exp/ubm/final.ubm exp/sgmm2 || exit 1; + echo "SGMM training done." + + echo "Decoding the development and test sets using SGMM models" + utils/mkgraph.sh data/lang_test exp/sgmm2 exp/sgmm2/graph || exit 1; + steps/decode_sgmm2.sh --nj $decode_nj --cmd "$decode_cmd" --transform-dir exp/tri3/decode_devel exp/sgmm2/graph data/devel exp/sgmm2/decode_devel || exit 1; + steps/decode_sgmm2.sh --nj $decode_nj --cmd "$decode_cmd" --transform-dir exp/tri3/decode_test exp/sgmm2/graph data/test exp/sgmm2/decode_test || exit 1; + echo "SGMM decoding done." +fi + +if [ $stage -le 8 ]; then + echo "Starting DNN training and decoding." + local/nnet/run_dnn.sh || exit 1; + local/nnet/run_dnn_fbank.sh || exit 1; +fi + +#score +for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done diff --git a/egs/fame/s5/steps b/egs/fame/s5/steps new file mode 120000 index 00000000000..6e99bf5b5ad --- /dev/null +++ b/egs/fame/s5/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/fame/s5/utils b/egs/fame/s5/utils new file mode 120000 index 00000000000..b240885218f --- /dev/null +++ b/egs/fame/s5/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file From 8e0d853961a8c76b5ddef29034c630c031857e1b Mon Sep 17 00:00:00 2001 From: Ke Li Date: Sat, 11 Feb 2017 14:28:25 -0500 Subject: [PATCH 423/530] [scripts] add empty-data checks in generate_plots.py (#1394) --- egs/wsj/s5/steps/nnet3/report/generate_plots.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/egs/wsj/s5/steps/nnet3/report/generate_plots.py b/egs/wsj/s5/steps/nnet3/report/generate_plots.py index b68d22dbedd..6f185ad313f 100755 --- a/egs/wsj/s5/steps/nnet3/report/generate_plots.py +++ b/egs/wsj/s5/steps/nnet3/report/generate_plots.py @@ -225,6 +225,10 @@ def generate_nonlin_stats_plots(exp_dir, output_dir, plot, comparison_dir=None, for dir in dirs: stats_per_component_per_iter = ( log_parse.parse_progress_logs_for_nonlinearity_stats(dir)) + for key in stats_per_component_per_iter: + if len(stats_per_component_per_iter[key]['stats']) == 0: + logger.warning("Couldn't find any rows for the" + "nonlin stats plot, not generating it") stats_per_dir[dir] = stats_per_component_per_iter # convert the nonlin stats into tables @@ -355,6 +359,9 @@ def generate_clipped_proportion_plots(exp_dir, output_dir, plot, " this might be because there are no " "ClipGradientComponents.".format(dir)) continue + if len(stats_per_dir[dir]) == 0: + logger.warning("Couldn't find any rows for the" + "clipped proportion plot, not generating it") try: main_cp_stats = stats_per_dir[exp_dir]['table'] except KeyError: From 5b03ada36b8147f047d8fbab2f311f649ae71626 Mon Sep 17 00:00:00 2001 From: LvHang Date: Sat, 11 Feb 2017 14:34:03 -0500 Subject: [PATCH 424/530] [scripts] Change how the --frame argument is set in non-recurrent DNN training (#1389) ... makes it vary on each iteration, not in big chunks of time. --- egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py index 25fd94d98ff..3e732313612 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py @@ -71,7 +71,7 @@ def train_new_models(dir, iter, srand, num_jobs, archive_index = (k % num_archives) + 1 if not chunk_level_training: - frame = (k / num_archives) % frames_per_eg + frame = (k / num_archives + archive_index) % frames_per_eg cache_write_opt = "" if job == 1: From 37b5352f5dcd59dd3e346d70b5c268f187071a22 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 11 Feb 2017 21:24:21 -0500 Subject: [PATCH 425/530] [src,doc] Documentation updates; fixes to comments. --- src/cudamatrix/cu-matrix.h | 9 ++------- src/doc/online_decoding.dox | 30 ++++++++++++++++++++++++++++++ src/doc/versions.dox | 2 ++ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/cudamatrix/cu-matrix.h b/src/cudamatrix/cu-matrix.h index fb26fbf1013..056abb0c8fb 100644 --- a/src/cudamatrix/cu-matrix.h +++ b/src/cudamatrix/cu-matrix.h @@ -96,7 +96,6 @@ class CuMatrixBase { /// Copies column r from column indexes[r] of src. /// As a special case, if indexes[i] == -1, sets column i to zero /// indexes.size() must equal this->NumCols(), - /// all elements of "reorder" must be in [-1, src.NumCols()-1], /// and src.NumRows() must equal this.NumRows() void CopyCols(const CuMatrixBase &src, const CuArray &indexes); @@ -105,14 +104,12 @@ class CuMatrixBase { /// Add column indices[r] of src to column r. /// As a special case, if indexes[i] == -1, skip column i /// indices.size() must equal this->NumCols(), - /// all elements of "reorder" must be in [-1, src.NumCols()-1], /// and src.NumRows() must equal this.NumRows() void AddCols(const CuMatrixBase &src, const CuArray &indices); /// Copies row r from row indexes[r] of src. - /// As a special case, if indexes[i] < 0, sets row i to zero - /// "reorder".size() must equal this->NumRows(), and + /// As a special case, if indexes[i] < 0, sets row i to zero. /// src.NumCols() must equal this.NumCols() void CopyRows(const CuMatrixBase &src, const CuArray &indexes); @@ -136,9 +133,7 @@ class CuMatrixBase { /// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]). /// If indexes[r] < 0, does not add anything. - /// "reorder".size() must equal this->NumRows(), - /// all elements of "reorder" must be in [0, src.NumRows()-1], - /// and src.NumCols() must equal this.NumCols() + /// src.NumCols() must equal this.NumCols() void AddRows(Real alpha, const CuMatrixBase &src, const CuArray &indexes); diff --git a/src/doc/online_decoding.dox b/src/doc/online_decoding.dox index 52be3d38bca..799bfb5895f 100644 --- a/src/doc/online_decoding.dox +++ b/src/doc/online_decoding.dox @@ -410,6 +410,36 @@ utils/mkgraph.sh $lang_own $model_dir $graph_own_dir || exit 1; where $model_dir is the model directory which contains the model "final.mdl" and the tree "tree". We now can use $graph_own_dir/HCLG.fst to replace the old HCLG.fst. + + +\section online_decoding_nnet3 Online decoding with nnet3 models + +Online decoding with nnet3 models is basically the same as with nnet2 +models as described in \ref online_decoding_nnet2. However, there are +some limitations as to the model type you can use. In Kaldi 5.0 and +earlier, online nnet3 decoding does not support recurrent models. +In Kaldi 5.1 and later, online nnet3 decoding supports "forward" +recurrent models such as LSTMs, but not bidirectional ones like BLSTMs. +In addition, online nnet3 decoding with recurrent +models may not give optimal results unless +you use "Kaldi-5.1-style" configuration, including the "decay-time" +option and specifying --extra-left-context-initial 0; see +\ref dnn3_scripts_context for more discussions of these issues. + + +Many of the issues in online nnet3 decoding are the same as in nnet2 +decoding and the command lines are quite similar. For online nnet3 +decoding with Kaldi 5.1 and later, the best example script for online +decoding including model training is, at the +time of writing, egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh +(at the time of writing this is only available in the 'shortcut' branch, +as Kaldi 5.1 has not yet been merged to master); +and downloadable models that can be used with online nnet3 decoding, please +see http://kaldi-asr.org/models.html (the first model there, the ASPIRE model, +includes instructions in a README file). + + + */ diff --git a/src/doc/versions.dox b/src/doc/versions.dox index 2c67b2de317..56cdcdf4118 100644 --- a/src/doc/versions.dox +++ b/src/doc/versions.dox @@ -85,6 +85,8 @@ in nnet3; this allows faster and more-easily-online decoding for recurrent setups (but only unidirectionally-recurrent ones, like LSTMs but not BLSTMs). + - \ref online_decoding_nnet3 is now rewritten; it's faster and it supports + models like LSTMs. - The sequence-training scripts in nnet3 are refactored and are now simpler and use less disk space. From 53dec62b21da6f2ec693f96c464fd04ad9ea1b04 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 13 Feb 2017 12:26:09 -0500 Subject: [PATCH 426/530] error_msg: Simplifying err_msg --- egs/wsj/s5/steps/libs/common.py | 32 ++++++++++++++++++++--------- egs/wsj/s5/steps/nnet3/train_dnn.py | 6 +++--- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/egs/wsj/s5/steps/libs/common.py b/egs/wsj/s5/steps/libs/common.py index f2a336cd640..393fef7d4f6 100644 --- a/egs/wsj/s5/steps/libs/common.py +++ b/egs/wsj/s5/steps/libs/common.py @@ -79,10 +79,13 @@ class KaldiCommandException(Exception): kaldi command that caused the error and the error string captured. """ def __init__(self, command, err=None): + import re Exception.__init__(self, "There was an error while running the command " - "{0}\n{1}\n{2}".format(command, "-"*10, - "" if err is None else err)) + "{0}\n{1}\n{2}".format( + re.sub('\s+', ' ', command).strip(), + "-"*10, + "" if err is None else err)) class BackgroundProcessHandler(): @@ -165,17 +168,20 @@ def add_process(self, t): self.start() def is_process_done(self, t): - p, command = t + p, command, exit_on_failure = t if p.poll() is None: return False return True def ensure_process_is_done(self, t): - p, command = t + p, command, exit_on_failure = t logger.debug("Waiting for process '{0}' to end".format(command)) [stdout, stderr] = p.communicate() if p.returncode is not 0: - raise KaldiCommandException(command, stderr) + print("There was an error while running the command " + "{0}\n{1}\n{2}".format(command, "-"*10, stderr)) + if exit_on_failure: + os._exit(1) def ensure_processes_are_done(self): self.__process_queue.reverse() @@ -192,7 +198,8 @@ def debug(self): logger.info("Process '{0}' is running".format(command)) -def run_job(command, wait=True, background_process_handler=None): +def run_job(command, wait=True, background_process_handler=None, + exit_on_failure=False): """ Runs a kaldi job, usually using a script such as queue.pl and run.pl, and redirects the stdout and stderr to the parent process's streams. @@ -206,12 +213,14 @@ class that is instantiated by the top-level script. If this is wait: If True, wait until the process is completed. However, if the background_process_handler is provided, this option will be ignored and the process will be run in the background. + exit_on_failure: If True, will exit from the script on failure. + Only applicable when background_process_handler is specified. """ p = subprocess.Popen(command, shell=True) if background_process_handler is not None: wait = False - background_process_handler.add_process((p, command)) + background_process_handler.add_process((p, command, exit_on_failure)) if wait: p.communicate() @@ -222,7 +231,8 @@ class that is instantiated by the top-level script. If this is return p -def run_kaldi_command(command, wait=True, background_process_handler=None): +def run_kaldi_command(command, wait=True, background_process_handler=None, + exit_on_failure=False): """ Runs commands frequently seen in Kaldi scripts and captures the stdout and stderr. These are usually a sequence of commands connected by pipes, so we use @@ -235,6 +245,8 @@ class that is instantiated by the top-level script. If this is wait: If True, wait until the process is completed. However, if the background_process_handler is provided, this option will be ignored and the process will be run in the background. + exit_on_failure: If True, will exit from the script on failure. + Only applicable when background_process_handler is specified. """ p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, @@ -242,7 +254,7 @@ class that is instantiated by the top-level script. If this is if background_process_handler is not None: wait = False - background_process_handler.add_process((p, command)) + background_process_handler.add_process((p, command, exit_on_failure)) if wait: [stdout, stderr] = p.communicate() @@ -281,7 +293,7 @@ def get_number_of_jobs(alidir): num_jobs = int(open('{0}/num_jobs'.format(alidir)).readline().strip()) except (IOError, ValueError) as e: raise Exception("Exception while reading the " - "number of alignment jobs: {0}".format(e.errstr)) + "number of alignment jobs: {0}".format(e)) return num_jobs diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index 2813f719606..f5ac42fd52f 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -412,14 +412,14 @@ def main(): polling_time=args.background_polling_time) train(args, run_opts, background_process_handler) background_process_handler.ensure_processes_are_done() - except Exception as e: + except Exception: if args.email is not None: message = ("Training session for experiment {dir} " "died due to an error.".format(dir=args.dir)) common_lib.send_mail(message, message, args.email) - traceback.print_exc() background_process_handler.stop() - raise e + logger.error("Training session failed; traceback = ", exc_info=True) + raise SystemExit(1) if __name__ == "__main__": From de8d03dd6450a0b93ad3e907f007b022cc905766 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 13 Feb 2017 15:22:31 -0500 Subject: [PATCH 427/530] [scripts,build]: minor fixes only affecting error handling. --- egs/wsj/s5/utils/validate_lang.pl | 4 +++- src/base/get_version.sh | 2 +- tools/config/common_path.sh | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/egs/wsj/s5/utils/validate_lang.pl b/egs/wsj/s5/utils/validate_lang.pl index 008c54ac752..e5bdf75787e 100755 --- a/egs/wsj/s5/utils/validate_lang.pl +++ b/egs/wsj/s5/utils/validate_lang.pl @@ -758,8 +758,10 @@ sub check_summation { # prepare_lang.sh), the regular L.fst may contain some disambiguation # symbols. if (! defined $is_disambig{$phone}) { - if ($phone == "<>") { + if ($phone eq "<>") { $state = "eos"; + } else if ($phone == 0) { + $exit = 1; print "--> ERROR: unexpected phone sequence=$phoneseq, wordseq=$wordseq\n"; last; } else { $state = $wbtype{$phone}; } diff --git a/src/base/get_version.sh b/src/base/get_version.sh index bf5efa8c14a..4829391ac44 100755 --- a/src/base/get_version.sh +++ b/src/base/get_version.sh @@ -59,7 +59,7 @@ else version="$version.$patch_number" # Check for uncommitted changes in src/. - uncommitted_changes=$(git diff-index HEAD .. | wc -l) + uncommitted_changes=$(git diff-index HEAD -- .. | wc -l) if [ $uncommitted_changes -gt 0 ]; then # Add suffix ~N if there are N files in src/ with uncommitted changes version="$version~$uncommitted_changes" diff --git a/tools/config/common_path.sh b/tools/config/common_path.sh index 3e2ea50d685..fbc4b674474 100644 --- a/tools/config/common_path.sh +++ b/tools/config/common_path.sh @@ -1,5 +1,5 @@ -# we assume KALDI_ROOT is already defined -[ -z "$KALDI_ROOT" ] && echo "The variable KALDI_ROOT must be already defined" && exit 1 +# we assume KALDI_ROOT is already defined +[ -z "$KALDI_ROOT" ] && echo >&2 "The variable KALDI_ROOT must be already defined" && exit 1 # The formatting of the path export command is intentionally weird, because # this allows for easy diff'ing export PATH=\ From 4f926fb812589d8656f691ee0884c9f29bd54d28 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 13 Feb 2017 23:37:31 -0500 Subject: [PATCH 428/530] [scripts] Removing tdnn-*-layer from xconfigs (redundant). --- .../s5c/local/chain/tuning/run_tdnn_7j.sh | 12 +- egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py | 4 - egs/wsj/s5/steps/libs/nnet3/xconfig/tdnn.py | 107 ------------------ src/nnet3/nnet-utils.h | 2 +- 4 files changed, 7 insertions(+), 118 deletions(-) delete mode 100644 egs/wsj/s5/steps/libs/nnet3/xconfig/tdnn.py diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh index 9aec95393d1..793b40f7fe3 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh @@ -126,12 +126,12 @@ if [ $stage -le 12 ]; then # the first splicing is moved before the lda layer, so no splicing here relu-renorm-layer name=tdnn1 dim=768 - tdnn-relu-renorm-layer name=tdnn2 splice-indexes=-1,0,1 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn3 splice-indexes=-1,0,1 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn4 splice-indexes=-3,0,3 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn5 splice-indexes=-3,0,3 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn6 splice-indexes=-3,0,3 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn7 splice-indexes=-3,0,3 dim=768 subset-dim=384 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=768 subset-dim=384 ## adding the layers for chain branch relu-renorm-layer name=prefinal-chain input=tdnn7 dim=768 target-rms=0.5 diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py index 89458c65152..918d8bd2fb2 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py @@ -22,10 +22,6 @@ 'relu-renorm-layer' : xlayers.XconfigBasicLayer, 'sigmoid-layer' : xlayers.XconfigBasicLayer, 'tanh-layer' : xlayers.XconfigBasicLayer, - 'tdnn-relu-layer' : xlayers.XconfigTdnnLayer, - 'tdnn-relu-renorm-layer' : xlayers.XconfigTdnnLayer, - 'tdnn-sigmoid-layer' : xlayers.XconfigTdnnLayer, - 'tdnn-tanh-layer' : xlayers.XconfigTdnnLayer, 'fixed-affine-layer' : xlayers.XconfigFixedAffineLayer, 'affine-layer' : xlayers.XconfigAffineLayer, 'lstm-layer' : xlayers.XconfigLstmLayer, diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/tdnn.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/tdnn.py deleted file mode 100644 index ed7b6f1f53c..00000000000 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/tdnn.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright 2016 Johns Hopkins University (Dan Povey) -# 2016 Vijayaditya Peddinti -# Apache 2.0. - - -""" This module contains the implementation of the TDNN layer. -""" - -import libs.nnet3.xconfig.utils as xutils -from libs.nnet3.xconfig.basic_layers import XconfigBasicLayer -from libs.nnet3.xconfig.basic_layers import XconfigLayerBase - -class XconfigTdnnLayer(XconfigBasicLayer): - """This class is for parsing lines like - tdnn-relu-renorm-layer name=tdnn1 dim=1024 splice-indexes=-3,0,3 subset-dim=512 - - It is similar to XconfigBasicLayer except for the way in which the input - splicing is done. So we derive this class from XconfigBasicLayer. - """ - - def __init__(self, first_token, key_to_value, prev_names = None): - assert first_token in [ 'tdnn-relu-layer', 'tdnn-relu-renorm-layer', - 'tdnn-sigmoid-layer', 'tdnn-tanh-layer' ] - XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) - - - def set_default_configs(self): - - super(XconfigTdnnLayer, self).set_default_configs() - - self.config['splice-indexes'] = '' - self.config['subset-dim'] = -1 - - def check_configs(self): - - if self.config['splice-indexes'] == '': - raise RuntimeError("splice-indexes must be non-empty") - super(XconfigTdnnLayer, self).check_configs() - - - def _generate_config(self): - split_layer_name = self.layer_type.split('-') - assert split_layer_name[-1] == 'layer' - # ignore the first 'tdnn' and the last 'layer' - nonlinearities = split_layer_name[1:-1] - - # by 'descriptor_final_string' we mean a string that can appear in - # config-files, i.e. it contains the 'final' names of nodes. - input_desc = self.descriptors['input']['final-string'] - input_dim = self.descriptors['input']['dim'] - splice_indexes = self.get_splice_indexes() - input_desc, input_dim, sp_configs = self.splice_input(input_desc, - input_dim, splice_indexes, self.config['subset-dim'], - '{0}.input-subset'.format(self.name)) - - return sp_configs + self._add_components(input_desc, input_dim, nonlinearities) - - def get_splice_indexes(self): - try: - return map(lambda x: int(x), self.config['splice-indexes'].split(",")) - except ValueError: - raise RuntimeError("Invalid value for splice-indexes.") - - @staticmethod - def splice_input(input_desc, input_dim, - splice_indexes, subset_dim = -1, - dim_range_node_name = None ): - """Convenience function to create an appended descriptor with the - splice_indexes. - """ - - configs = [] - try: - zero_index = splice_indexes.index(0) - except ValueError: - zero_index = None - - if subset_dim > 0: - assert(dim_range_node_name is not None) - # if subset_dim is specified the script expects a zero - # in the splice indexes - assert(zero_index is not None) - line = ("dim-range-node name={0}" - " input-node={1}" - " dim-offset={2}" - " dim={3}" - "".format(dim_range_node_name, - input_desc, 0, subset_dim)) - configs.append(line) - subset_desc = dim_range_node_name - - else: - subset_desc = input_desc - subset_dim = input_dim - - appended_descriptors = [] - appended_dimension = 0 - for j in range(len(splice_indexes)): - if j == zero_index: - appended_descriptors.append(input_desc) - appended_dimension += input_dim - continue - appended_descriptors.append('Offset({0}, {1})'.format(subset_desc, splice_indexes[j])) - appended_dimension += subset_dim - return ["Append({0})".format(", ".join(appended_descriptors)), - appended_dimension, - configs] diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index 3bda01271d2..766b0ed1798 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -80,7 +80,7 @@ std::string PrintVectorPerUpdatableComponent(const Nnet &nnet, const VectorBase &vec); /// This function returns true if the nnet has the following properties: -/// It has an called "output" (other outputs are allowed but may be +/// It has an output called "output" (other outputs are allowed but may be /// ignored). /// It has an input called "input", and possibly an extra input called /// "ivector", but no other inputs. From 502dd6faa4bc91178453438ac920b3ad3ecd8ab4 Mon Sep 17 00:00:00 2001 From: Shiyin Kang Date: Thu, 16 Feb 2017 08:17:47 +0800 Subject: [PATCH 429/530] [src] Add element-wise matrix min operation (#1424) --- src/cudamatrix/cu-kernels-ansi.h | 5 ++++- src/cudamatrix/cu-kernels.cu | 24 ++++++++++++++++++++++++ src/cudamatrix/cu-kernels.h | 24 ++++++++++++++++-------- src/cudamatrix/cu-matrix-test.cc | 22 ++++++++++++++++++++++ src/cudamatrix/cu-matrix.cc | 25 +++++++++++++++++++++++++ src/cudamatrix/cu-matrix.h | 2 ++ src/matrix/kaldi-matrix.cc | 13 +++++++++++++ src/matrix/kaldi-matrix.h | 2 ++ 8 files changed, 108 insertions(+), 9 deletions(-) diff --git a/src/cudamatrix/cu-kernels-ansi.h b/src/cudamatrix/cu-kernels-ansi.h index 116428ea82c..9c274283b7e 100644 --- a/src/cudamatrix/cu-kernels-ansi.h +++ b/src/cudamatrix/cu-kernels-ansi.h @@ -105,6 +105,8 @@ void cudaF_div_elements(dim3 Gr, dim3 Bl, float *mat, const float *A, MatrixDim dst_d, int src_stride); void cudaF_max(dim3 Gr, dim3 Bl, float *mat, const float *A, MatrixDim dst_d, int src_stride); +void cudaF_min(dim3 Gr, dim3 Bl, float *mat, const float *other, + MatrixDim mat_d, int other_stride); void cudaF_mul_cols_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, MatrixDim d); void cudaF_mul_rows_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, @@ -373,6 +375,8 @@ void cudaD_div_elements(dim3 Gr, dim3 Bl, double *mat, const double *A, MatrixDim dst_d, int src_stride); void cudaD_max(dim3 Gr, dim3 Bl, double *mat, const double *A, MatrixDim dst_d, int src_stride); +void cudaD_min(dim3 Gr, dim3 Bl, double *mat, const double *other, + MatrixDim mat_d, int other_stride); void cudaD_mul_cols_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale, MatrixDim d); void cudaD_mul_rows_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale, @@ -696,7 +700,6 @@ void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, float* self_repair_sum_out, const int self_repair_sum_out_stride); - void cudaD_copy_cols_from_vec(dim3 Gr, dim3 Bl, double *mat_out, MatrixDim d_out, const double *v_in); void cudaF_copy_cols_from_vec(dim3 Gr, dim3 Bl, float *mat_out, MatrixDim d_out, diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index abb4efd47ef..a1a1e6c633b 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -385,6 +385,20 @@ static void _max(Real* mat, const Real* A, MatrixDim dst_d, int src_stride) { } } +template +__global__ +static void _min(Real* mat, const Real* other, MatrixDim mat_d, + int other_stride) { + int32_cuda j = blockIdx.x * blockDim.x + threadIdx.x; + int32_cuda i = blockIdx.y * blockDim.y + threadIdx.y; + int32_cuda mat_index = i * mat_d.stride + j; + int32_cuda other_index = i * other_stride + j; + if (j < mat_d.cols && i < mat_d.rows) { + Real a = mat[mat_index], b = other[other_index]; + mat[mat_index] = fmin(a, b); + } +} + template __global__ static void _vec_mul_elements(Real* v, const Real* a, int dim) { @@ -3350,6 +3364,11 @@ void cudaF_max(dim3 Gr, dim3 Bl, float* mat, const float* A, MatrixDim dst_d, _max<<>>(mat,A,dst_d,src_stride); } +void cudaF_min(dim3 Gr, dim3 Bl, float* mat, const float* other, + MatrixDim mat_d, int other_stride) { + _min<<>>(mat,other,mat_d,other_stride); +} + void cudaF_mul_cols_vec(dim3 Gr, dim3 Bl, float* mat, const float* scale, MatrixDim d) { _mul_cols_vec<<>>(mat,scale,d); @@ -3999,6 +4018,11 @@ void cudaD_max(dim3 Gr, dim3 Bl, double* mat, const double* A, MatrixDim dst_d, _max<<>>(mat,A,dst_d,src_stride); } +void cudaD_min(dim3 Gr, dim3 Bl, double* mat, const double* other, MatrixDim mat_d, + int other_stride) { + _min<<>>(mat,other,mat_d,other_stride); +} + void cudaD_mul_cols_vec(dim3 Gr, dim3 Bl, double* mat, const double* scale, MatrixDim d) { _mul_cols_vec<<>>(mat,scale,d); diff --git a/src/cudamatrix/cu-kernels.h b/src/cudamatrix/cu-kernels.h index 649a25ab67e..bc0f170043d 100644 --- a/src/cudamatrix/cu-kernels.h +++ b/src/cudamatrix/cu-kernels.h @@ -297,6 +297,10 @@ inline void cuda_max(dim3 Gr, dim3 Bl, float *mat, const float *A, MatrixDim dst_d, int src_stride) { cudaF_max(Gr, Bl, mat, A, dst_d, src_stride); } +inline void cuda_min(dim3 Gr, dim3 Bl, float *mat, const float *other, + MatrixDim mat_d, int other_stride) { + cudaF_min(Gr, Bl, mat, other, mat_d, other_stride); +} inline void cuda_mul_cols_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, MatrixDim d) { cudaF_mul_cols_vec(Gr, Bl, mat, scale, d); @@ -548,15 +552,15 @@ inline void cuda_diff_tanh(dim3 Gr, dim3 Bl, float *eout, const float *e, cudaF_diff_tanh(Gr, Bl, eout, e, y, d, e_stride, y_stride); } inline void cuda_parametric_relu(dim3 Gr, dim3 Bl, float *y, const float *x, - MatrixDim d, int src_stride, - const float *a, const float *b) { - cudaF_parametric_relu(Gr,Bl,y,x,d,src_stride,a,b); + MatrixDim d, int src_stride, const float *a, + const float *b) { + cudaF_parametric_relu(Gr, Bl, y, x, d, src_stride, a, b); } inline void cuda_diff_parametric_relu(dim3 Gr, dim3 Bl, float *eout, const float *e, const float *y, MatrixDim d, int e_stride, int y_stride, const float *a, const float *b) { - cudaF_diff_parametric_relu(Gr,Bl,eout,e,y,d,e_stride,y_stride,a,b); + cudaF_diff_parametric_relu(Gr, Bl, eout, e, y, d, e_stride, y_stride, a, b); } inline void cuda_heaviside(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride) { @@ -837,6 +841,10 @@ inline void cuda_max(dim3 Gr, dim3 Bl, double *mat, const double *A, MatrixDim dst_d, int src_stride) { cudaD_max(Gr, Bl, mat, A, dst_d, src_stride); } +inline void cuda_min(dim3 Gr, dim3 Bl, double *mat, const double *other, + MatrixDim mat_d, int other_stride) { + cudaD_min(Gr, Bl, mat, other, mat_d, other_stride); +} inline void cuda_mul_cols_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale, MatrixDim d) { cudaD_mul_cols_vec(Gr, Bl, mat, scale, d); @@ -1093,15 +1101,15 @@ inline void cuda_diff_tanh(dim3 Gr, dim3 Bl, double *eout, const double *e, cudaD_diff_tanh(Gr, Bl, eout, e, y, d, e_stride, y_stride); } inline void cuda_parametric_relu(dim3 Gr, dim3 Bl, double *y, const double *x, - MatrixDim d, int src_stride, - const double *a, const double *b) { - cudaD_parametric_relu(Gr,Bl,y,x,d,src_stride,a,b); + MatrixDim d, int src_stride, const double *a, + const double *b) { + cudaD_parametric_relu(Gr, Bl, y, x, d, src_stride, a, b); } inline void cuda_diff_parametric_relu(dim3 Gr, dim3 Bl, double *eout, const double *e, const double *y, MatrixDim d, int e_stride, int y_stride, const double *a, const double *b) { - cudaD_diff_parametric_relu(Gr,Bl,eout,e,y,d,e_stride,y_stride,a,b); + cudaD_diff_parametric_relu(Gr, Bl, eout, e, y, d, e_stride, y_stride, a, b); } inline void cuda_heaviside(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride) { diff --git a/src/cudamatrix/cu-matrix-test.cc b/src/cudamatrix/cu-matrix-test.cc index 38c800d8e58..6d172a36954 100644 --- a/src/cudamatrix/cu-matrix-test.cc +++ b/src/cudamatrix/cu-matrix-test.cc @@ -870,6 +870,27 @@ static void UnitTestCuMatrixMax() { AssertEqual(Ha,Ha2); } +template +static void UnitTestCuMatrixMin() { + Matrix Ha(100,100); + Matrix Hb(100,100); + Ha.SetRandn(); + Hb.SetRandn(); + + CuMatrix Da(100,100); + CuMatrix Db(100,100); + Da.CopyFromMat(Ha); + Db.CopyFromMat(Hb); + + Da.Min(Db); + Ha.Min(Hb); + + Matrix Ha2(100,100); + Da.CopyToMat(&Ha2); + + AssertEqual(Ha, Ha2); +} + template @@ -2620,6 +2641,7 @@ template void CudaMatrixUnitTest() { UnitTestCuMatrixMulElements(); UnitTestCuMatrixDivElements(); UnitTestCuMatrixMax(); + UnitTestCuMatrixMin(); UnitTestCuMatrixMulColsVec(); UnitTestCuMatrixMulRowsVec(); UnitTestCuMatrixDivRowsVec(); diff --git a/src/cudamatrix/cu-matrix.cc b/src/cudamatrix/cu-matrix.cc index 652364f3dc8..cfa570233c3 100644 --- a/src/cudamatrix/cu-matrix.cc +++ b/src/cudamatrix/cu-matrix.cc @@ -721,6 +721,31 @@ void CuMatrixBase::Max(const CuMatrixBase& A) { } +template +void CuMatrixBase::Min(const CuMatrixBase& A) { + #if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { + Timer tim; + + KALDI_ASSERT(num_cols_ == A.NumCols()); + KALDI_ASSERT(num_rows_ == A.NumRows()); + + dim3 dimGrid, dimBlock; + GetBlockSizesForSimpleMatrixOperation(NumRows(), NumCols(), + &dimGrid, &dimBlock); + + cuda_min(dimGrid, dimBlock, data_, A.data_, Dim(), A.Stride()); + CU_SAFE_CALL(cudaGetLastError()); + + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); + } else + #endif + { + Mat().Min(A.Mat()); + } +} + + template void CuMatrixBase::MulColsVec(const CuVectorBase &scale) { #if HAVE_CUDA == 1 diff --git a/src/cudamatrix/cu-matrix.h b/src/cudamatrix/cu-matrix.h index 056abb0c8fb..0a4c4b0669e 100644 --- a/src/cudamatrix/cu-matrix.h +++ b/src/cudamatrix/cu-matrix.h @@ -405,6 +405,8 @@ class CuMatrixBase { void DivElements(const CuMatrixBase &A); /// Do, elementwise, *this = max(*this, A). void Max(const CuMatrixBase &A); + /// Do, elementwise, *this = min(*this, A). + void Min(const CuMatrixBase &A); /// scale i'th column by scale[i] void MulColsVec(const CuVectorBase &scale); /// scale i'th row by scale[i] diff --git a/src/matrix/kaldi-matrix.cc b/src/matrix/kaldi-matrix.cc index 523af1d70ec..50c23a7be63 100644 --- a/src/matrix/kaldi-matrix.cc +++ b/src/matrix/kaldi-matrix.cc @@ -1041,6 +1041,19 @@ template void MatrixBase::Max(const MatrixBase &A) { } } +template void MatrixBase::Min(const MatrixBase &A) { + KALDI_ASSERT(A.NumRows() == NumRows() && A.NumCols() == NumCols()); + for (MatrixIndexT row = 0; row < num_rows_; row++) { + Real *row_data = RowData(row); + const Real *other_row_data = A.RowData(row); + MatrixIndexT num_cols = num_cols_; + for (MatrixIndexT col = 0; col < num_cols; col++) { + row_data[col] = std::min(row_data[col], + other_row_data[col]); + } + } +} + template void MatrixBase::Scale(Real alpha) { if (alpha == 1.0) return; diff --git a/src/matrix/kaldi-matrix.h b/src/matrix/kaldi-matrix.h index e254fcad118..25b999fe062 100644 --- a/src/matrix/kaldi-matrix.h +++ b/src/matrix/kaldi-matrix.h @@ -232,6 +232,8 @@ class MatrixBase { /// Set, element-by-element, *this = max(*this, A) void Max(const MatrixBase &A); + /// Set, element-by-element, *this = min(*this, A) + void Min(const MatrixBase &A); /// Equivalent to (*this) = (*this) * diag(scale). Scaling /// each column by a scalar taken from that dimension of the vector. From 68cee215cdb2418c5d2e692a7fd977a1d278c805 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 17 Feb 2017 00:15:44 -0500 Subject: [PATCH 430/530] [src] Make various tests faster, especially without GPU (#1428) --- src/cudamatrix/cu-math-test.cc | 5 +++-- src/cudamatrix/cu-matrix-speed-test.cc | 5 +++-- src/cudamatrix/cu-matrix-test.cc | 7 ++++--- src/cudamatrix/cu-rand-speed-test.cc | 3 +-- src/gmm/am-diag-gmm-test.cc | 4 ++-- src/nnet2/nnet-component-test.cc | 10 ++++++---- src/nnet3/nnet-component-test.cc | 21 ++++++++++++--------- src/nnet3/nnet-derivative-test.cc | 14 +++++++------- 8 files changed, 38 insertions(+), 31 deletions(-) diff --git a/src/cudamatrix/cu-math-test.cc b/src/cudamatrix/cu-math-test.cc index 9a78c652745..6b9119b42c1 100644 --- a/src/cudamatrix/cu-math-test.cc +++ b/src/cudamatrix/cu-math-test.cc @@ -537,8 +537,9 @@ template void CudaMathUnitTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (loop = 0; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU @@ -562,8 +563,8 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } #if HAVE_CUDA == 1 + } // No for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-matrix-speed-test.cc b/src/cudamatrix/cu-matrix-speed-test.cc index 032351564c0..5710963254a 100644 --- a/src/cudamatrix/cu-matrix-speed-test.cc +++ b/src/cudamatrix/cu-matrix-speed-test.cc @@ -1085,8 +1085,9 @@ template void CudaMatrixSpeedTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (loop = 0; loop < 2; loop++) { if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); else @@ -1103,8 +1104,8 @@ int main() { #else kaldi::CudaMatrixSpeedTest(); #endif - } #if HAVE_CUDA == 1 + } // No for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().PrintProfile(); #endif std::cout << "Tests succeeded.\n"; diff --git a/src/cudamatrix/cu-matrix-test.cc b/src/cudamatrix/cu-matrix-test.cc index a6f84f3f6aa..b0fcdf1d192 100644 --- a/src/cudamatrix/cu-matrix-test.cc +++ b/src/cudamatrix/cu-matrix-test.cc @@ -2707,8 +2707,9 @@ template void CudaMatrixUnitTest() { int main() { - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (loop = 0; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); @@ -2733,9 +2734,9 @@ int main() { KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; - } - SetVerboseLevel(4); #if HAVE_CUDA == 1 + } // No for loop if 'HAVE_CUDA != 1', + SetVerboseLevel(4); CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/cudamatrix/cu-rand-speed-test.cc b/src/cudamatrix/cu-rand-speed-test.cc index 23f82eab977..798f706372e 100644 --- a/src/cudamatrix/cu-rand-speed-test.cc +++ b/src/cudamatrix/cu-rand-speed-test.cc @@ -214,8 +214,7 @@ int main() { kaldi::CuRandGaussianVectorSpeedTest(iter); fprintf(stderr, "--- ELAPSED %fs.\n\n", t.Elapsed()); #if HAVE_CUDA == 1 - } // NO for loop if 'HAVE_CUDA != 1', - + } // No for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().PrintProfile(); #endif std::cout << "Tests succeeded.\n"; diff --git a/src/gmm/am-diag-gmm-test.cc b/src/gmm/am-diag-gmm-test.cc index 54ca3c153ce..d40ef3df2e4 100644 --- a/src/gmm/am-diag-gmm-test.cc +++ b/src/gmm/am-diag-gmm-test.cc @@ -66,7 +66,7 @@ void TestAmDiagGmmIO(const AmDiagGmm &am_gmm) { loglike2 += am_gmm2->LogLikelihood(i, feat); kaldi::AssertEqual(loglike, loglike2, 1e-4); delete am_gmm2; - + unlink("tmpf"); unlink("tmpfb"); } @@ -122,7 +122,7 @@ void UnitTestAmDiagGmm() { } int main() { - for (int i = 0; i < 10; i++) + for (int i = 0; i < 5; i++) UnitTestAmDiagGmm(); std::cout << "Test OK.\n"; return 0; diff --git a/src/nnet2/nnet-component-test.cc b/src/nnet2/nnet-component-test.cc index 4589ef52aa7..04e476c01bd 100644 --- a/src/nnet2/nnet-component-test.cc +++ b/src/nnet2/nnet-component-test.cc @@ -856,9 +856,9 @@ int main() { using namespace kaldi; using namespace kaldi::nnet2; - - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (loop = 0; loop < 2; loop++) { //// Uncomment the following line to expose the bug in UnitTestDropoutComponent //CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) @@ -868,7 +868,9 @@ int main() { #endif BasicDebugTestForSpliceMax(true); - for (int32 i = 0; i < 3; i++) { + // We used to test this 3 times, but now that nnet2 is rarely changed, + // reducing it to once. + for (int32 i = 0; i < 1; i++) { UnitTestGenericComponent(); UnitTestGenericComponent(); UnitTestGenericComponent("power=1.5"); @@ -905,8 +907,8 @@ int main() { else KALDI_LOG << "Tests with GPU use (if available) succeeded."; } - } #if HAVE_CUDA == 1 + } // No for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().PrintProfile(); #endif return 0; diff --git a/src/nnet3/nnet-component-test.cc b/src/nnet3/nnet-component-test.cc index 3cc6af1c70d..36c3f80833b 100644 --- a/src/nnet3/nnet-component-test.cc +++ b/src/nnet3/nnet-component-test.cc @@ -25,9 +25,9 @@ namespace kaldi { namespace nnet3 { // Reset seeds for test time for RandomComponent static void ResetSeed(int32 rand_seed, const Component &c) { - RandomComponent *rand_component = + RandomComponent *rand_component = const_cast(dynamic_cast(&c)); - + if (rand_component != NULL) { srand(rand_seed); rand_component->ResetGenerator(); @@ -198,7 +198,7 @@ void TestSimpleComponentPropagateProperties(const Component &c) { int32 properties = c.Properties(); Component *c_copy = NULL, *c_copy_scaled = NULL; int32 rand_seed = Rand(); - + if (RandInt(0, 1) == 0) c_copy = c.Copy(); // This will test backprop with an updatable component. if (RandInt(0, 1) == 0 && @@ -234,7 +234,7 @@ void TestSimpleComponentPropagateProperties(const Component &c) { if ((properties & kPropagateAdds) && (properties & kPropagateInPlace)) { KALDI_ERR << "kPropagateAdds and kPropagateInPlace flags are incompatible."; } - + ResetSeed(rand_seed, c); c.Propagate(NULL, input_data, &output_data1); @@ -327,7 +327,7 @@ bool TestSimpleComponentDataDerivative(const Component &c, output_deriv(num_rows, output_dim, kSetZero, output_stride_type); input_data.SetRandn(); output_deriv.SetRandn(); - + ResetSeed(rand_seed, c); c.Propagate(NULL, input_data, &output_data); @@ -522,8 +522,9 @@ int main() { using namespace kaldi; using namespace kaldi::nnet3; TestStringsApproxEqual(); - for (kaldi::int32 loop = 0; loop < 2; loop++) { + kaldi::int32 loop = 0; #if HAVE_CUDA == 1 + for (loop = 0; loop < 2; loop++) { //CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); @@ -531,9 +532,11 @@ int main() { CuDevice::Instantiate().SelectGpuId("yes"); #endif UnitTestNnetComponent(); - } - - KALDI_LOG << "Nnet component ntests succeeded."; +#if HAVE_CUDA == 1 + } // No for loop if 'HAVE_CUDA != 1', + CuDevice::Instantiate().PrintProfile(); +#endif + KALDI_LOG << "Nnet component tests succeeded."; return 0; } diff --git a/src/nnet3/nnet-derivative-test.cc b/src/nnet3/nnet-derivative-test.cc index a12ca2ae0af..3a974fa0b6d 100644 --- a/src/nnet3/nnet-derivative-test.cc +++ b/src/nnet3/nnet-derivative-test.cc @@ -425,11 +425,10 @@ void UnitTestNnetInputDerivatives() { int main() { using namespace kaldi; using namespace kaldi::nnet3; + kaldi::int32 loop = 0; //SetVerboseLevel(2); - - - for (kaldi::int32 loop = 0; loop < 2; loop++) { #if HAVE_CUDA == 1 + for (loop = 0; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); @@ -438,10 +437,11 @@ int main() { #endif UnitTestNnetModelDerivatives(); UnitTestNnetInputDerivatives(); - } - - KALDI_LOG << "Nnet tests succeeded."; +#if HAVE_CUDA == 1 + } // No for loop if 'HAVE_CUDA != 1', + CuDevice::Instantiate().PrintProfile(); +#endif + KALDI_LOG << "Nnet derivative tests succeeded."; return 0; } - From deda2bdc140943d437d25339b5111a047f0db91e Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 17 Feb 2017 01:33:41 -0500 Subject: [PATCH 431/530] [src] Make chain-supervision-test.cc faster when no GPU --- src/chain/chain-supervision-test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/chain/chain-supervision-test.cc b/src/chain/chain-supervision-test.cc index e38fbca745f..33d3c74e3a3 100644 --- a/src/chain/chain-supervision-test.cc +++ b/src/chain/chain-supervision-test.cc @@ -606,9 +606,9 @@ void TestRanges() { int main() { using namespace kaldi; - - for (int32 loop = 0; loop < 2; loop++) { + int32 loop = 0; #if HAVE_CUDA == 1 + for (loop = 0; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); @@ -621,7 +621,7 @@ int main() { } kaldi::chain::TestRanges(); #if HAVE_CUDA == 1 - CuDevice::Instantiate().PrintProfile(); -#endif } + CuDevice::Instantiate().PrintProfile(); +#endif } From 2145519961f67b446b6e028578eaef676d75ec24 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 17 Feb 2017 14:31:02 -0500 Subject: [PATCH 432/530] [build] Updating version file-- this commit marks version 5.1.0 --- src/.version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/.version b/src/.version index 819e07a2243..a75b92f1ed7 100644 --- a/src/.version +++ b/src/.version @@ -1 +1 @@ -5.0 +5.1 From 6477decfe13de080cf415848816b52f64e03df22 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Fri, 17 Feb 2017 14:14:59 -0800 Subject: [PATCH 433/530] [build] Change travis build to use clang; remove -rdynamic from CXXFLAGS (it's in LDFLAGS) Increase the number of parallel make jobs to 6. Remove -rdynamic flag from CXXFLAGS. It is already in LDFLAGS. --- .travis.yml | 11 ++++++----- src/makefiles/darwin.mk | 6 +++--- src/makefiles/linux_atlas.mk | 9 ++++++++- src/makefiles/linux_atlas_arm.mk | 9 ++++++++- src/makefiles/linux_atlas_ppc64le.mk | 9 ++++++++- src/makefiles/linux_clapack.mk | 9 ++++++++- src/makefiles/linux_clapack_arm.mk | 9 ++++++++- src/makefiles/linux_openblas.mk | 9 ++++++++- src/makefiles/linux_openblas_arm.mk | 9 ++++++++- src/makefiles/linux_openblas_ppc64le.mk | 10 +++++++++- src/makefiles/linux_x86_64_mkl.mk | 9 ++++++++- tools/extras/travis_script.sh | 4 ++-- 12 files changed, 84 insertions(+), 19 deletions(-) diff --git a/.travis.yml b/.travis.yml index d3ad85363ce..54ac9f11c9f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,8 @@ notifications: email: false language: cpp +sudo: false +dist: trusty os: - linux @@ -9,13 +11,12 @@ os: addons: apt: sources: - - ubuntu-toolchain-r-test + - llvm-toolchain-trusty-3.9 packages: - gdb - - gcc-4.9 - - g++-4.9 - - gfortran-4.9 + - gfortran - liblapack-dev + - clang-3.9 branches: only: @@ -28,7 +29,7 @@ before_install: - tools/extras/travis_install_bindeps.sh $XROOT script: - - CXX=g++-4.9 + - CXX=clang++-3.9 CFLAGS="-march=native" LDFLAGS="-llapack" INCDIRS="$XROOT/usr/include" diff --git a/src/makefiles/darwin.mk b/src/makefiles/darwin.mk index dffcc878083..81351d185b6 100644 --- a/src/makefiles/darwin.mk +++ b/src/makefiles/darwin.mk @@ -22,9 +22,6 @@ ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif -LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -g -LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) -framework Accelerate -lm -lpthread -ldl - # Compiler specific flags COMPILER = $(shell $(CXX) -v 2>&1) ifeq ($(findstring clang,$(COMPILER)),clang) @@ -34,3 +31,6 @@ else ifeq ($(findstring GCC,$(COMPILER)),GCC) # Allow implicit conversions between vectors. CXXFLAGS += -flax-vector-conversions endif + +LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -g +LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) -framework Accelerate -lm -lpthread -ldl diff --git a/src/makefiles/linux_atlas.mk b/src/makefiles/linux_atlas.mk index b30c7ad5474..32a7f43fa50 100644 --- a/src/makefiles/linux_atlas.mk +++ b/src/makefiles/linux_atlas.mk @@ -21,12 +21,19 @@ CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \ - -msse -msse2 -pthread -rdynamic \ + -msse -msse2 -pthread \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif +# Compiler specific flags +COMPILER = $(shell $(CXX) -v 2>&1) +ifeq ($(findstring clang,$(COMPILER)),clang) +# Suppress annoying clang warnings that are perfectly valid per spec. +CXXFLAGS += -Wno-mismatched-tags +endif + LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl diff --git a/src/makefiles/linux_atlas_arm.mk b/src/makefiles/linux_atlas_arm.mk index 35e98da51d7..4c83ce71d6c 100644 --- a/src/makefiles/linux_atlas_arm.mk +++ b/src/makefiles/linux_atlas_arm.mk @@ -21,12 +21,19 @@ CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \ - -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ + -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif +# Compiler specific flags +COMPILER = $(shell $(CXX) -v 2>&1) +ifeq ($(findstring clang,$(COMPILER)),clang) +# Suppress annoying clang warnings that are perfectly valid per spec. +CXXFLAGS += -Wno-mismatched-tags +endif + LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl diff --git a/src/makefiles/linux_atlas_ppc64le.mk b/src/makefiles/linux_atlas_ppc64le.mk index a5962f7964b..1e4194c2869 100644 --- a/src/makefiles/linux_atlas_ppc64le.mk +++ b/src/makefiles/linux_atlas_ppc64le.mk @@ -22,12 +22,19 @@ CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \ -m64 -maltivec -mcpu=power8 -mtune=power8 -mpower8-vector -mvsx \ - -pthread -rdynamic \ + -pthread \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif +# Compiler specific flags +COMPILER = $(shell $(CXX) -v 2>&1) +ifeq ($(findstring clang,$(COMPILER)),clang) +# Suppress annoying clang warnings that are perfectly valid per spec. +CXXFLAGS += -Wno-mismatched-tags +endif + LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl diff --git a/src/makefiles/linux_clapack.mk b/src/makefiles/linux_clapack.mk index 87e016aae5b..75a514a85d7 100644 --- a/src/makefiles/linux_clapack.mk +++ b/src/makefiles/linux_clapack.mk @@ -15,12 +15,19 @@ CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK -I../../tools/CLAPACK \ - -msse -msse2 -pthread -rdynamic \ + -msse -msse2 -pthread \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif +# Compiler specific flags +COMPILER = $(shell $(CXX) -v 2>&1) +ifeq ($(findstring clang,$(COMPILER)),clang) +# Suppress annoying clang warnings that are perfectly valid per spec. +CXXFLAGS += -Wno-mismatched-tags +endif + LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl diff --git a/src/makefiles/linux_clapack_arm.mk b/src/makefiles/linux_clapack_arm.mk index d21e640d3c1..52a2a663eb7 100644 --- a/src/makefiles/linux_clapack_arm.mk +++ b/src/makefiles/linux_clapack_arm.mk @@ -15,12 +15,19 @@ CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_CLAPACK -I../../tools/CLAPACK \ - -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ + -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif +# Compiler specific flags +COMPILER = $(shell $(CXX) -v 2>&1) +ifeq ($(findstring clang,$(COMPILER)),clang) +# Suppress annoying clang warnings that are perfectly valid per spec. +CXXFLAGS += -Wno-mismatched-tags +endif + LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl diff --git a/src/makefiles/linux_openblas.mk b/src/makefiles/linux_openblas.mk index d145c687438..1da16117a68 100644 --- a/src/makefiles/linux_openblas.mk +++ b/src/makefiles/linux_openblas.mk @@ -21,12 +21,19 @@ CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \ - -msse -msse2 -pthread -rdynamic \ + -msse -msse2 -pthread \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif +# Compiler specific flags +COMPILER = $(shell $(CXX) -v 2>&1) +ifeq ($(findstring clang,$(COMPILER)),clang) +# Suppress annoying clang warnings that are perfectly valid per spec. +CXXFLAGS += -Wno-mismatched-tags +endif + LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl diff --git a/src/makefiles/linux_openblas_arm.mk b/src/makefiles/linux_openblas_arm.mk index 29a91752509..7f462925c74 100644 --- a/src/makefiles/linux_openblas_arm.mk +++ b/src/makefiles/linux_openblas_arm.mk @@ -21,12 +21,19 @@ CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \ - -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread -rdynamic \ + -ftree-vectorize -mfloat-abi=hard -mfpu=neon -pthread \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif +# Compiler specific flags +COMPILER = $(shell $(CXX) -v 2>&1) +ifeq ($(findstring clang,$(COMPILER)),clang) +# Suppress annoying clang warnings that are perfectly valid per spec. +CXXFLAGS += -Wno-mismatched-tags +endif + LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl diff --git a/src/makefiles/linux_openblas_ppc64le.mk b/src/makefiles/linux_openblas_ppc64le.mk index 6550d915c6c..c098b9d92e8 100644 --- a/src/makefiles/linux_openblas_ppc64le.mk +++ b/src/makefiles/linux_openblas_ppc64le.mk @@ -22,12 +22,20 @@ CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_OPENBLAS -I$(OPENBLASINC) \ -m64 -maltivec -mcpu=power8 -mtune=power8 -mpower8-vector -mvsx \ - -pthread -rdynamic \ + -pthread \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif +# Compiler specific flags +COMPILER = $(shell $(CXX) -v 2>&1) +ifeq ($(findstring clang,$(COMPILER)),clang) +# Suppress annoying clang warnings that are perfectly valid per spec. +CXXFLAGS += -Wno-mismatched-tags +endif + + LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(OPENBLASLIBS) -lm -lpthread -ldl diff --git a/src/makefiles/linux_x86_64_mkl.mk b/src/makefiles/linux_x86_64_mkl.mk index 50b4047def7..26d22253d08 100644 --- a/src/makefiles/linux_x86_64_mkl.mk +++ b/src/makefiles/linux_x86_64_mkl.mk @@ -29,13 +29,20 @@ CXXFLAGS = -std=c++11 -I.. -I$(OPENFSTINC) $(EXTRA_CXXFLAGS) \ -Wno-deprecated-declarations -Winit-self \ -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_MKL -I$(MKLROOT)/include \ - -m64 -msse -msse2 -pthread -rdynamic \ + -m64 -msse -msse2 -pthread \ -g # -O0 -DKALDI_PARANOID ifeq ($(KALDI_FLAVOR), dynamic) CXXFLAGS += -fPIC endif +# Compiler specific flags +COMPILER = $(shell $(CXX) -v 2>&1) +ifeq ($(findstring clang,$(COMPILER)),clang) +# Suppress annoying clang warnings that are perfectly valid per spec. +CXXFLAGS += -Wno-mismatched-tags +endif + ## Use the following for STATIC LINKING of the SEQUENTIAL version of MKL MKL_STA_SEQ = $(MKLLIB)/libmkl_solver_lp64_sequential.a -Wl,--start-group \ $(MKLLIB)/libmkl_intel_lp64.a $(MKLLIB)/libmkl_sequential.a \ diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index b3906450525..8aea788d9bc 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -4,12 +4,12 @@ # Typical usage shown below; any one can be safely left unset. # INCDIRS="~/xroot/usr/include" # LIBDIRS="~/xroot/usr/lib /usr/lib/openblas-base" -# CXX=gcc++-4.9 +# CXX=clang++-3.9 # CFLAGS="-march=native -O2" # LDFLAGS="-llapack" # Maximum make parallelism. Simply -j runs out of memory on Travis VM. -MAXPAR=4 +MAXPAR=6 # Directories with code that can be tested with Travis (space-separated) TESTABLE_DIRS="src/" From 23338bf511b2a1afa902402bccbb541ac611999c Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 19 Feb 2017 18:02:55 -0500 Subject: [PATCH 434/530] [scripts] fix syntax error in validate_lang.pl [thanks: daniel galvez] --- egs/wsj/s5/utils/validate_lang.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/utils/validate_lang.pl b/egs/wsj/s5/utils/validate_lang.pl index e5bdf75787e..2e8125b1dd7 100755 --- a/egs/wsj/s5/utils/validate_lang.pl +++ b/egs/wsj/s5/utils/validate_lang.pl @@ -760,7 +760,7 @@ sub check_summation { if (! defined $is_disambig{$phone}) { if ($phone eq "<>") { $state = "eos"; - } else if ($phone == 0) { + } elsif ($phone == 0) { $exit = 1; print "--> ERROR: unexpected phone sequence=$phoneseq, wordseq=$wordseq\n"; last; } else { $state = $wbtype{$phone}; From 7d538e2bd04fcf59630dfbc44989f486455f9851 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Sun, 19 Feb 2017 19:02:35 -0800 Subject: [PATCH 435/530] [build] Increase OpenFst version 1.6.0->1.6.1. (#1434) --- tools/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/Makefile b/tools/Makefile index 4a8e08823a0..f40a75da5f8 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -7,7 +7,7 @@ CC = gcc # used for sph2pipe # Note: OpenFst requires a relatively recent C++ compiler with C++11 support, # e.g. g++ >= 4.7, Apple clang >= 5.0 or LLVM clang >= 3.3. -OPENFST_VERSION = 1.6.0 +OPENFST_VERSION = 1.6.1 OPENFST_VER_NUM := $(shell echo $(OPENFST_VERSION) | sed 's/\./ /g' | xargs printf "%d%02d%02d") ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10503)","1") From 84585876f4f54fa63501b8bbc53f45f86d7fb845 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Mon, 20 Feb 2017 13:11:58 -0500 Subject: [PATCH 436/530] [build] Use github not sourceforge for IRSTLM [sourceforge repo dead?] (#1435) I couldn't figure out what version corresponds to svn -r 618, so I'm just using master. --- tools/extras/install_irstlm.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/extras/install_irstlm.sh b/tools/extras/install_irstlm.sh index 1bd9aea4aaa..91635a30bbc 100755 --- a/tools/extras/install_irstlm.sh +++ b/tools/extras/install_irstlm.sh @@ -18,8 +18,7 @@ if [ ! -x ./irstlm ] ; then exit 1 fi ( - svn -r 618 co --non-interactive --trust-server-cert \ - https://svn.code.sf.net/p/irstlm/code/trunk irstlm + git clone git@github.com:irstlm-team/irstlm.git irstlm ) || { errcho "****() Error getting the IRSTLM sources. The server hosting it" errcho "****() might be down." From d218412ac0e4f18250a9db7c123a6791020f5f07 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Mon, 20 Feb 2017 14:00:58 -0500 Subject: [PATCH 437/530] [egs] clean up the HKUST scripts and add scoring filters (#1436) Some fixes to scoring (e.g. don't split english words into characters, only chinese ones). Modify scoring to produce CER and WER numbers. --- egs/hkust/s5/local/character_tokenizer | 32 ++++++++++++++++++++++++ egs/hkust/s5/local/hkust_data_prep.sh | 7 +++--- egs/hkust/s5/local/hkust_prepare_dict.sh | 3 +-- egs/hkust/s5/local/hkust_train_lms.sh | 6 ++++- egs/hkust/s5/local/score.sh | 9 ++++++- egs/hkust/s5/local/wer_output_filter | 25 ++++++++++++++++++ 6 files changed, 74 insertions(+), 8 deletions(-) create mode 100755 egs/hkust/s5/local/character_tokenizer mode change 120000 => 100755 egs/hkust/s5/local/score.sh create mode 100755 egs/hkust/s5/local/wer_output_filter diff --git a/egs/hkust/s5/local/character_tokenizer b/egs/hkust/s5/local/character_tokenizer new file mode 100755 index 00000000000..a3d8098d17f --- /dev/null +++ b/egs/hkust/s5/local/character_tokenizer @@ -0,0 +1,32 @@ +#!/usr/bin/env perl +# Copyright 2012-2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 +use utf8; + +use open qw(:encoding(utf8)); +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +while (<>) { + @F = split " "; + print $F[0] . " "; + foreach $s (@F[1..$#F]) { + if (($s =~ /\[.*\]/) || ($s =~ /\<.*\>/) || ($s =~ "!SIL")) { + print " $s"; + } else { + @chars = split "", $s; + foreach $c (@chars) { + if ($c =~ /\p{InCJK_Unified_Ideographs}/) { + print " $c"; + } else { + print "$c"; + } + } + } + print " "; + } + print "\n"; +} + + diff --git a/egs/hkust/s5/local/hkust_data_prep.sh b/egs/hkust/s5/local/hkust_data_prep.sh index 07f3c9677d8..207f03af36b 100755 --- a/egs/hkust/s5/local/hkust_data_prep.sh +++ b/egs/hkust/s5/local/hkust_data_prep.sh @@ -104,8 +104,8 @@ awk '{ segment=$1; split(segment,S,"-"); side=S[2]; audioname=S[1];startf=S[3];e print segment " " audioname "-" side " " startf/100 " " endf/100}' <$dev_dir/text > $dev_dir/segments awk '{name = $0; gsub(".sph$","",name); gsub(".*/","",name); print(name " " $0)}' $dev_dir/sph.flist > $dev_dir/sph.scp -sph2pipe=`cd ../../..; echo $PWD/tools/sph2pipe_v2.5/sph2pipe` -[ ! -f $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1; +sph2pipe=`which sph2pipe` || sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe +[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1; cat $train_dir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ @@ -136,5 +136,4 @@ cat $dev_dir/segments | awk '{spk=substr($1,1,33); print $1 " " spk}' > $dev_dir cat $dev_dir/utt2spk | sort -k 2 | utils/utt2spk_to_spk2utt.pl > $dev_dir/spk2utt || exit 1; echo "$0: HKUST data preparation succeeded" - -exit; +exit 0 diff --git a/egs/hkust/s5/local/hkust_prepare_dict.sh b/egs/hkust/s5/local/hkust_prepare_dict.sh index 5cd864c52cc..6aca37586ed 100755 --- a/egs/hkust/s5/local/hkust_prepare_dict.sh +++ b/egs/hkust/s5/local/hkust_prepare_dict.sh @@ -312,5 +312,4 @@ cat $dict_dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", cat - $dict_dir/lexicon1.txt > $dict_dir/lexicon.txt || exit 1; echo "$0: HKUST dict preparation succeeded" - -exit; +exit 0; diff --git a/egs/hkust/s5/local/hkust_train_lms.sh b/egs/hkust/s5/local/hkust_train_lms.sh index d6d0b2aa0bc..8520bb26d2d 100755 --- a/egs/hkust/s5/local/hkust_train_lms.sh +++ b/egs/hkust/s5/local/hkust_train_lms.sh @@ -19,9 +19,13 @@ done dir=data/local/lm mkdir -p $dir +export LC_ALL=C # You'll get errors about things being not sorted, if you + # have a different locale. kaldi_lm=`which train_lm.sh` if [ ! -x $kaldi_lm ]; then - echo "train_lm.sh is not found. Checkout tools/extra/install_kaldi_lm.sh" + echo "$0: train_lm.sh is not found. That might mean it's not installed" + echo "$0: or it is not added to PATH" + echo "$0: Use the script tools/extra/install_kaldi_lm.sh to install it" exit 1 fi diff --git a/egs/hkust/s5/local/score.sh b/egs/hkust/s5/local/score.sh deleted file mode 120000 index df664a0f1f1..00000000000 --- a/egs/hkust/s5/local/score.sh +++ /dev/null @@ -1 +0,0 @@ -../steps/scoring/score_kaldi_cer.sh \ No newline at end of file diff --git a/egs/hkust/s5/local/score.sh b/egs/hkust/s5/local/score.sh new file mode 100755 index 00000000000..766eaf3cd44 --- /dev/null +++ b/egs/hkust/s5/local/score.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e -o pipefail +set -x +steps/score_kaldi.sh "$@" +steps/score_kaldi_cer.sh --stage 2 "$@" + +echo "$0: Done" diff --git a/egs/hkust/s5/local/wer_output_filter b/egs/hkust/s5/local/wer_output_filter new file mode 100755 index 00000000000..aceeeec41b4 --- /dev/null +++ b/egs/hkust/s5/local/wer_output_filter @@ -0,0 +1,25 @@ +#!/usr/bin/env perl +# Copyright 2012-2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 +use utf8; + +use open qw(:encoding(utf8)); +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +while (<>) { + @F = split " "; + print $F[0] . " "; + foreach $s (@F[1..$#F]) { + if (($s =~ /\[.*\]/) || ($s =~ /\<.*\>/) || ($s =~ "!SIL")) { + print ""; + } else { + print "$s" + } + print " "; + } + print "\n"; +} + + From 7b862c725e9eb8858fa12f7e64359ec497ff8b35 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 20 Feb 2017 19:02:42 -0500 Subject: [PATCH 438/530] [src,doc] Update version documentation with version 5.1; makefile fix; add details to info string for nnet3 component. --- src/doc/get_version_info.sh | 6 +++++- src/doc/versions.dox | 10 ++++++---- src/fstbin/Makefile | 5 ++--- src/nnet3/nnet-general-component.cc | 2 ++ 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/doc/get_version_info.sh b/src/doc/get_version_info.sh index b37ac5f400f..568e53c88dd 100755 --- a/src/doc/get_version_info.sh +++ b/src/doc/get_version_info.sh @@ -28,7 +28,11 @@ fi # Note: when you add new tuples here you'll want to add ndew # \htmlinclude directives in versions.dox. -for tuple in "5.0 master c160a9883"; do +# the tuples will generally be of the form: "x.x master yyyyyy" +# where yyyyy is the result of git log -1 src/.version done on +# that version of Kaldi (we only update the .version file when +# the major/minor version number changes). +for tuple in "5.0 master c160a9883" "5.1 master 2145519961"; do major_minor_number=$(echo $tuple | awk '{print $1}') # e.g. 5.0 branch=$(echo $tuple | awk '{print $2}') # e.g. 'master', or '5.1' (it's a branch name) first_commit=$(echo $tuple | awk '{print $3}') diff --git a/src/doc/versions.dox b/src/doc/versions.dox index 56cdcdf4118..0a16c5f1d3a 100644 --- a/src/doc/versions.dox +++ b/src/doc/versions.dox @@ -19,7 +19,7 @@ // note: you have to run the file get_version_info.sh in order // to generate the HTML files that we include via \htmlinclude. - +// Any time you add a new version you need to edit get_version_info.sh /** @@ -62,7 +62,8 @@ \subsection versions_versions_50 Version 5.0 This is the first major/minor version number after introducing the versioning scheme. - It is currently available in the 'master' branch on github. + The latest revision of version 5.0 is saved as branch "5.0" on github. + Specific patches: \htmlinclude 5.0.html @@ -70,8 +71,7 @@ \subsection versions_versions_51 Version 5.1 - Version 5.1 is in preparation and version 5.1.0 does not actually exist yet. - You can see the development in the 'shortcut' branch on github. + Version 5.1 is the current master branch of Kaldi. Some of the major changes introduced in version 5.1 are: - Kaldi now requires C++11 to compile, and we support only the latest version of OpenFst (1.6.0). (This simplifies Kaldi's code, and will later @@ -90,5 +90,7 @@ - The sequence-training scripts in nnet3 are refactored and are now simpler and use less disk space. + \htmlinclude 5.1.html + */ diff --git a/src/fstbin/Makefile b/src/fstbin/Makefile index 8d544e40ea0..da26c58edd7 100644 --- a/src/fstbin/Makefile +++ b/src/fstbin/Makefile @@ -15,8 +15,7 @@ BINFILES = fstdeterminizestar \ fstmakecontextsyms fstaddsubsequentialloop fstaddselfloops \ fstrmepslocal fstcomposecontext fsttablecompose fstrand fstfactor \ fstdeterminizelog fstphicompose fstrhocompose fstpropfinal fstcopy \ - fstpushspecial fsts-to-transcripts fsts-project fsts-union \ - fsts-scale fsts-difference + fstpushspecial fsts-to-transcripts fsts-project fsts-union OBJFILES = @@ -27,6 +26,6 @@ LIBFILE = ADDLIBS = ../fstext/kaldi-fstext.a ../util/kaldi-util.a \ ../thread/kaldi-thread.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a + ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/nnet3/nnet-general-component.cc b/src/nnet3/nnet-general-component.cc index 926ebd9b07d..4aa65ce70ed 100644 --- a/src/nnet3/nnet-general-component.cc +++ b/src/nnet3/nnet-general-component.cc @@ -970,10 +970,12 @@ std::string BackpropTruncationComponent::Info() const { stream << Type() << ", dim=" << dim_ << ", scale=" << scale_ << ", count=" << std::setprecision(3) << count_ << std::setprecision(6) + << ", recurrence-interval=" << recurrence_interval_ << ", clipping-threshold=" << clipping_threshold_ << ", clipped-proportion=" << (count_ > 0.0 ? num_clipped_ / count_ : 0) << ", zeroing-threshold=" << zeroing_threshold_ + << ", zeroing-interval=" << zeroing_interval_ << ", zeroed-proportion=" << (count_zeroing_boundaries_ > 0.0 ? num_zeroed_ / count_zeroing_boundaries_ : 0) From 483192ed448a7da7498be44f8640816f6ad9282b Mon Sep 17 00:00:00 2001 From: Shiyin Kang Date: Tue, 21 Feb 2017 12:24:32 +0800 Subject: [PATCH 439/530] [build] update .gitignore: ignore openfst-1.6.1 (#1439) --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index e6d9c0fd612..02a88acb621 100644 --- a/.gitignore +++ b/.gitignore @@ -92,6 +92,8 @@ GSYMS /tools/openfst-1.5.4/ /tools/openfst-1.6.0.tar.gz /tools/openfst-1.6.0/ +/tools/openfst-1.6.1.tar.gz +/tools/openfst-1.6.1/ /tools/pa_stable_v19_20111121.tgz /tools/portaudio/ /tools/sctk-2.4.0-20091110-0958.tar.bz2 From 8c77d2cc90ccb8345ddb087e1e71321d4b077fde Mon Sep 17 00:00:00 2001 From: Hossein Hadian Date: Tue, 21 Feb 2017 00:28:25 -0500 Subject: [PATCH 440/530] [src,egs,scripts] Support frame-subsampling with non-chain nnet3 models (#1238) --- egs/swbd/s5c/local/nnet3/run_tdnn_lfr.sh | 1 + egs/swbd/s5c/local/nnet3/tuning/run_tdnn_e.sh | 130 ++++++++++++++ .../s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh | 161 +++++++++++++++++ .../s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh | 163 ++++++++++++++++++ .../s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh | 162 +++++++++++++++++ egs/wsj/s5/steps/nnet3/chain/build_tree.sh | 17 +- src/bin/convert-ali.cc | 7 + src/hmm/hmm-utils-test.cc | 8 +- src/hmm/hmm-utils.cc | 96 ++++++++++- src/hmm/hmm-utils.h | 10 ++ 10 files changed, 740 insertions(+), 15 deletions(-) create mode 120000 egs/swbd/s5c/local/nnet3/run_tdnn_lfr.sh create mode 100755 egs/swbd/s5c/local/nnet3/tuning/run_tdnn_e.sh create mode 100755 egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh create mode 100755 egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh create mode 100755 egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn_lfr.sh b/egs/swbd/s5c/local/nnet3/run_tdnn_lfr.sh new file mode 120000 index 00000000000..bff3b4164f7 --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/run_tdnn_lfr.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lfr1c.sh \ No newline at end of file diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_e.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_e.sh new file mode 100755 index 00000000000..98cd8d5f34f --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_e.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +# e is as c, but uses splicing similar to chain's without changing number of +# layers. + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_c tdnn_e +# WER on train_dev(tg) 17.37 16.75 +# WER on train_dev(fg) 15.94 15.34 +# WER on eval2000(tg) 20.0 19.5 +# WER on eval2000(fg) 18.2 18.0 +# Final train prob -1.43781 -1.40491 +# Final valid prob -1.56895 -1.55255 + + +stage=9 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +#exp/nnet3/tdnn_c_sp/egs +reporting_email= +remove_egs=true + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + + output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 10 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $ali_dir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +graph_dir=exp/tri4/graph_sw1_tg +if [ $stage -le 11 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh new file mode 100755 index 00000000000..a82b2078acb --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh @@ -0,0 +1,161 @@ +#!/bin/bash + +# _lfr1a is as _c, but is LFR (low frame rate): it uses triphone chain topology +# with a frame subsampling factor of 3. + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_c tdnn_lfr1a +# WER on train_dev(tg) 17.37 17.25 +# WER on train_dev(fg) 15.94 15.90 +# WER on eval2000(tg) 20.0 20.1 +# WER on eval2000(fg) 18.2 18.5 +# Final train prob -1.43781 -1.32434 +# Final valid prob -1.56895 -1.42206 + + +stage=11 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +reporting_email= +remove_egs=true +leftmost_questions_truncate=-1 + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 10 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --repeat-frames true --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --cmd "$train_cmd" 8400 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,2) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-7,2) dim=1024 + relu-renorm-layer name=tdnn5 dim=1024 + + output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $treedir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +echo 3 >$dir/frame_subsampling_factor +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 13 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir +fi + +if [ $stage -le 14 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh new file mode 100755 index 00000000000..8c80dc3d7ad --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh @@ -0,0 +1,163 @@ +#!/bin/bash + +# _lfr1b is as _lfr1a, but with one more -3,3 layer (the comparable +# non-LFR system is tdnn_d) + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_d tdnn_lfr1a tdnn_lfr1b +# WER on train_dev(tg) 16.72 17.25 17.00 +# WER on train_dev(fg) 15.31 15.90 15.57 +# WER on eval2000(tg) 19.2 20.1 19.3 +# WER on eval2000(fg) 17.8 18.5 17.8 +# Final train prob -1.22859 -1.32434 -1.11497 +# Final valid prob -1.354 -1.42206 -1.21105 + + + +stage=0 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +reporting_email= +remove_egs=true +leftmost_questions_truncate=-1 + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 10 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --repeat-frames true --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --cmd "$train_cmd" 8400 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,2) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-7,2) dim=1024 + relu-renorm-layer name=tdnn6 dim=1024 + + output-layer name=output input=tdnn6 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $treedir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +echo 3 >$dir/frame_subsampling_factor +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 13 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir +fi + +if [ $stage -le 14 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh new file mode 100755 index 00000000000..95cdbf7f975 --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh @@ -0,0 +1,162 @@ +#!/bin/bash + +# _lfr1c is as _lfr1a, but uses splicing similar to chain's without changing +# number of layers (comparable non-LFR system is tdnn_e). + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_c tdnn_e tdnn_lfr1c +# WER on train_dev(tg) 17.37 16.75 17.10 +# WER on train_dev(fg) 15.94 15.34 15.74 +# WER on eval2000(tg) 20.0 19.5 19.2 +# WER on eval2000(fg) 18.2 18.0 17.7 +# Final train prob -1.43781 -1.40491 -1.29898 +# Final valid prob -1.56895 -1.55255 -1.43117 + + +stage=11 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +#exp/nnet3/tdnn_lfr1b_sp/egs +reporting_email= +remove_egs=true +leftmost_questions_truncate=-1 + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 10 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --repeat-frames true --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --cmd "$train_cmd" 8400 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + + output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $treedir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +echo 3 >$dir/frame_subsampling_factor +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 13 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir +fi + +if [ $stage -le 14 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/wsj/s5/steps/nnet3/chain/build_tree.sh b/egs/wsj/s5/steps/nnet3/chain/build_tree.sh index 72bc91c6014..280ab4ee0b2 100755 --- a/egs/wsj/s5/steps/nnet3/chain/build_tree.sh +++ b/egs/wsj/s5/steps/nnet3/chain/build_tree.sh @@ -27,6 +27,7 @@ leftmost_questions_truncate=-1 # note: this used to default to 10, but we never # we're changing the default tree_stats_opts= cluster_phones_opts= +repeat_frames=false # End configuration section. echo "$0 $@" # Print the command line for logging @@ -41,6 +42,15 @@ if [ $# != 5 ]; then echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." echo " --config # config containing options" echo " --stage # stage to do partial re-run from." + echo " --repeat-frames # Only affects alignment conversion at" + echo " # the end. If true, generate an " + echo " # alignment using the frame-subsampled " + echo " # topology that is repeated " + echo " # --frame-subsampling-factor times " + echo " # and interleaved, to be the same " + echo " # length as the original alignment " + echo " # (useful for cross-entropy training " + echo " # of reduced frame rate systems)." exit 1; fi @@ -173,9 +183,10 @@ if [ $stage -le -1 ]; then # for other purposes. echo "$0: Converting alignments from $alidir to use current tree" $cmd JOB=1:$nj $dir/log/convert.JOB.log \ - convert-ali --frame-subsampling-factor=$frame_subsampling_factor \ - $alidir/final.mdl $dir/1.mdl $dir/tree \ - "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; + convert-ali --repeat-frames=$repeat_frames \ + --frame-subsampling-factor=$frame_subsampling_factor \ + $alidir/final.mdl $dir/1.mdl $dir/tree \ + "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; fi cp $dir/1.mdl $dir/final.mdl diff --git a/src/bin/convert-ali.cc b/src/bin/convert-ali.cc index 3a52b7904a0..89fe838638c 100644 --- a/src/bin/convert-ali.cc +++ b/src/bin/convert-ali.cc @@ -39,6 +39,7 @@ int main(int argc, char *argv[]) { int32 frame_subsampling_factor = 1; bool reorder = true; + bool repeat_frames = false; std::string phone_map_rxfilename; ParseOptions po(usage); @@ -48,6 +49,11 @@ int main(int argc, char *argv[]) { po.Register("reorder", &reorder, "True if you want the converted alignments to be 'reordered' " "versus the way they appear in the HmmTopology object"); + po.Register("repeat-frames", &repeat_frames, + "Only relevant when frame-subsampling-factor != 1. If true, " + "repeat frames of alignment by 'frame-subsampling-factor' " + "after alignment conversion, to keep the alignment the same " + "length as the input alignment."); po.Register("frame-subsampling-factor", &frame_subsampling_factor, "Can be used in converting alignments to reduced frame rates."); @@ -98,6 +104,7 @@ int main(int argc, char *argv[]) { new_ctx_dep, old_alignment, frame_subsampling_factor, + repeat_frames, reorder, (phone_map_rxfilename != "" ? &phone_map : NULL), &new_alignment)) { diff --git a/src/hmm/hmm-utils-test.cc b/src/hmm/hmm-utils-test.cc index 805b77ce7f0..69728cc8ca7 100644 --- a/src/hmm/hmm-utils-test.cc +++ b/src/hmm/hmm-utils-test.cc @@ -292,8 +292,8 @@ void TestConvertAlignment() { std::vector new_alignment; bool ans = ConvertAlignment(trans_model_old, trans_model_new, *ctx_dep_new, - old_alignment, subsample_factor, new_reorder, - NULL, &new_alignment); + old_alignment, subsample_factor, false, + new_reorder, NULL, &new_alignment); if(!ans) { KALDI_WARN << "Alignment conversion failed"; // make sure it failed for a good reason. @@ -311,8 +311,8 @@ void TestConvertAlignment() { // we should be able to convert back and it'll be the same. std::vector old_alignment_copy; bool ans = ConvertAlignment(trans_model_new, trans_model_old, *ctx_dep_old, - new_alignment, subsample_factor, old_reorder, - NULL, &old_alignment_copy); + new_alignment, subsample_factor, false, + old_reorder, NULL, &old_alignment_copy); KALDI_ASSERT(ans); KALDI_ASSERT(old_alignment_copy == old_alignment); } diff --git a/src/hmm/hmm-utils.cc b/src/hmm/hmm-utils.cc index ab0b133f708..fe6c5b32d6e 100644 --- a/src/hmm/hmm-utils.cc +++ b/src/hmm/hmm-utils.cc @@ -772,18 +772,38 @@ static inline void ConvertAlignmentForPhone( } + /** - This function, called from ConvertAlignment(), works out suitable new lengths - of phones in the case where subsample_factor != 1. The input vectors + This function, called from ConvertAlignmentInternal(), works out suitable new + lengths of phones in the case where subsample_factor != 1. The input vectors 'mapped_phones' and 'old_lengths' must be the same size-- the length of the phone sequence. The 'topology' object and 'mapped_phones' are needed to work out the minimum length of each phone in the sequence. - Returns true only if it could not assign lengths (because the topology was + Returns false only if it could not assign lengths (because the topology was too long relative to the number of frames). + + @param topology [in] The new phone lengths are computed with + regard to this topology + @param mapped_phones [in] The phones for which this function computes + new lengths + @param old_lengths [in] The old lengths + @param conversion_shift [in] This will normally equal subsample_factor - 1 + but may be less than that if the 'repeat_frames' + option is true; it's used for generating + 'frame-shifted' versions of alignments that + we will later interpolate. This helps us keep + the phone boundaries of the subsampled and + interpolated alignments the same as + the original alignment. + @param subsample_factor [in] The frame subsampling factor... normally 1, but + might be > 1 if we're converting to a + reduced-frame-rate system. + @param new_lengths [out] The vector for storing new lengths. */ static bool ComputeNewPhoneLengths(const HmmTopology &topology, const std::vector &mapped_phones, const std::vector &old_lengths, + int32 conversion_shift, int32 subsample_factor, std::vector *new_lengths) { int32 phone_sequence_length = old_lengths.size(); @@ -797,10 +817,10 @@ static bool ComputeNewPhoneLengths(const HmmTopology &topology, // the subsampled alignments have the same length as features // subsampled with 'subsample-feats'. int32 subsampled_time = - (cur_time_elapsed + subsample_factor - 1) / subsample_factor; + (cur_time_elapsed + conversion_shift) / subsample_factor; cur_time_elapsed += old_lengths[i]; int32 next_subsampled_time = - (cur_time_elapsed + subsample_factor - 1) / subsample_factor; + (cur_time_elapsed + conversion_shift) / subsample_factor; (*new_lengths)[i] = next_subsampled_time - subsampled_time; } bool changed = true; @@ -850,14 +870,23 @@ static bool ComputeNewPhoneLengths(const HmmTopology &topology, return true; } -bool ConvertAlignment(const TransitionModel &old_trans_model, +/** + This function is the same as 'ConvertAligment', + but instead of the 'repeat_frames' option it supports the 'conversion_shift' + option; see the documentation of ComputeNewPhoneLengths() for what + 'conversion_shift' is for. +*/ + +static bool ConvertAlignmentInternal(const TransitionModel &old_trans_model, const TransitionModel &new_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector &old_alignment, + int32 conversion_shift, int32 subsample_factor, bool new_is_reordered, const std::vector *phone_map, std::vector *new_alignment) { + KALDI_ASSERT(0 <= conversion_shift && conversion_shift < subsample_factor); bool old_is_reordered = IsReordered(old_trans_model, old_alignment); KALDI_ASSERT(new_alignment != NULL); new_alignment->clear(); @@ -893,7 +922,7 @@ bool ConvertAlignment(const TransitionModel &old_trans_model, for (int32 i = 0; i < phone_sequence_length; i++) old_lengths[i] = old_split[i].size(); if (!ComputeNewPhoneLengths(new_trans_model.GetTopo(), - mapped_phones, old_lengths, + mapped_phones, old_lengths, conversion_shift, subsample_factor, &new_lengths)) { KALDI_WARN << "Failed to produce suitable phone lengths"; return false; @@ -931,7 +960,58 @@ bool ConvertAlignment(const TransitionModel &old_trans_model, } } KALDI_ASSERT(new_alignment->size() == - (old_alignment.size() + subsample_factor - 1)/subsample_factor); + (old_alignment.size() + conversion_shift)/subsample_factor); + return true; +} + +bool ConvertAlignment(const TransitionModel &old_trans_model, + const TransitionModel &new_trans_model, + const ContextDependencyInterface &new_ctx_dep, + const std::vector &old_alignment, + int32 subsample_factor, + bool repeat_frames, + bool new_is_reordered, + const std::vector *phone_map, + std::vector *new_alignment) { + if (!repeat_frames || subsample_factor == 1) { + return ConvertAlignmentInternal(old_trans_model, + new_trans_model, + new_ctx_dep, + old_alignment, + subsample_factor - 1, + subsample_factor, + new_is_reordered, + phone_map, + new_alignment); + // The value "subsample_factor - 1" for conversion_shift above ensures the + // alignments have the same length as the output of 'subsample-feats' + } else { + std::vector > shifted_alignments(subsample_factor); + for (int32 conversion_shift = subsample_factor - 1; + conversion_shift >= 0; conversion_shift--) { + if (!ConvertAlignmentInternal(old_trans_model, + new_trans_model, + new_ctx_dep, + old_alignment, + conversion_shift, + subsample_factor, + new_is_reordered, + phone_map, + &shifted_alignments[conversion_shift])) + return false; + } + KALDI_ASSERT(new_alignment != NULL); + new_alignment->clear(); + new_alignment->reserve(old_alignment.size()); + int32 max_shifted_ali_length = (old_alignment.size() / subsample_factor) + + (old_alignment.size() % subsample_factor); + for (int32 i = 0; i < max_shifted_ali_length; i++) + for (int32 conversion_shift = subsample_factor - 1; + conversion_shift >= 0; conversion_shift--) + if (i < static_cast(shifted_alignments[conversion_shift].size())) + new_alignment->push_back(shifted_alignments[conversion_shift][i]); + } + KALDI_ASSERT(new_alignment->size() == old_alignment.size()); return true; } diff --git a/src/hmm/hmm-utils.h b/src/hmm/hmm-utils.h index 1af62c646be..3d51cbe1f14 100644 --- a/src/hmm/hmm-utils.h +++ b/src/hmm/hmm-utils.h @@ -245,6 +245,15 @@ bool SplitToPhones(const TransitionModel &trans_model, @param subsample_factor [in] The frame subsampling factor... normally 1, but might be > 1 if we're converting to a reduced-frame-rate system. + @param repeat_frames [in] Only relevant when subsample_factor != 1 + If true, repeat frames of alignment by + 'subsample_factor' after alignment + conversion, to keep the alignment the same + length as the input alignment. + [note: we actually do this by interpolating + 'subsample_factor' separately generated + alignments, to keep the phone boundaries + the same as the input where possible.] @param reorder [in] True if you want the pdf-ids on the new alignment to be 'reordered'. (vs. the way they appear in the HmmTopology object) @@ -257,6 +266,7 @@ bool ConvertAlignment(const TransitionModel &old_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector &old_alignment, int32 subsample_factor, // 1 in the normal case -> no subsampling. + bool repeat_frames, bool reorder, const std::vector *phone_map, // may be NULL std::vector *new_alignment); From 2c3df88f080cf5bfa3c61c9bd98d999e7721a953 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Tue, 21 Feb 2017 13:44:10 -0500 Subject: [PATCH 441/530] [build] cloning IRSTLM, use https to access github, not ssh (#1441) --- tools/extras/install_irstlm.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/extras/install_irstlm.sh b/tools/extras/install_irstlm.sh index 91635a30bbc..c6cc9adf568 100755 --- a/tools/extras/install_irstlm.sh +++ b/tools/extras/install_irstlm.sh @@ -12,13 +12,13 @@ errcho() { echo "$@" 1>&2; } errcho "****() Installing IRSTLM" if [ ! -x ./irstlm ] ; then - svn=`which svn` + svn=`which git` if [ $? != 0 ] ; then - errcho "****() You need to have svn (subversion) installed" + errcho "****() You need to have git installed" exit 1 fi ( - git clone git@github.com:irstlm-team/irstlm.git irstlm + git clone https://github.com/irstlm-team/irstlm.git irstlm ) || { errcho "****() Error getting the IRSTLM sources. The server hosting it" errcho "****() might be down." @@ -43,6 +43,7 @@ fi ) || { errcho "***() Error compiling IRSTLM. The error messages could help you " errcho "***() in figuring what went wrong." + exit 1 } ( From 9866a4c4d5cd90c4ceabce2fae5cbcb0a9b34bc8 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Tue, 21 Feb 2017 23:53:02 -0800 Subject: [PATCH 442/530] [build] Update Travis configuration to get builds to complete on time. (#1443) Run the build in a container-based Ubuntu 12.04 virtual environment. Compile with clang-3.8. Add Travis scripts to Travis testable files. --- .travis.yml | 13 +++++++------ tools/extras/travis_script.sh | 5 +++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 54ac9f11c9f..f8e2bac0362 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,8 +2,6 @@ notifications: email: false language: cpp -sudo: false -dist: trusty os: - linux @@ -11,12 +9,15 @@ os: addons: apt: sources: - - llvm-toolchain-trusty-3.9 + - ubuntu-toolchain-r-test + - llvm-toolchain-precise-3.8 packages: - gdb - - gfortran + - gcc-4.9 + - g++-4.9 + - gfortran-4.9 - liblapack-dev - - clang-3.9 + - clang-3.8 branches: only: @@ -29,7 +30,7 @@ before_install: - tools/extras/travis_install_bindeps.sh $XROOT script: - - CXX=clang++-3.9 + - CXX=clang++-3.8 CFLAGS="-march=native" LDFLAGS="-llapack" INCDIRS="$XROOT/usr/include" diff --git a/tools/extras/travis_script.sh b/tools/extras/travis_script.sh index 8aea788d9bc..d1b9049ef22 100755 --- a/tools/extras/travis_script.sh +++ b/tools/extras/travis_script.sh @@ -4,7 +4,7 @@ # Typical usage shown below; any one can be safely left unset. # INCDIRS="~/xroot/usr/include" # LIBDIRS="~/xroot/usr/lib /usr/lib/openblas-base" -# CXX=clang++-3.9 +# CXX=clang++-3.8 # CFLAGS="-march=native -O2" # LDFLAGS="-llapack" @@ -38,7 +38,8 @@ runvx env # However, do run tests if TRAVIS_COMMIT_RANGE does not parse. This # most likely means the branch was reset by --force; re-run tests then. if git rev-parse "${TRAVIS_COMMIT_RANGE}" >/dev/null 2>&1 && \ - ! git diff --name-only "${TRAVIS_COMMIT_RANGE}" -- ${TESTABLE_DIRS} | read REPLY + ! git diff --name-only "${TRAVIS_COMMIT_RANGE}" -- ${TESTABLE_DIRS} \ + .travis.yml tools/extras/travis_*.sh | read REPLY then echo; echo "No changes outside ${TESTABLE_DIRS} in the commit" \ "range ${TRAVIS_COMMIT_RANGE}; reporting success." From 351da2886bed076472882894056c9f42c965dcec Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Wed, 22 Feb 2017 13:13:40 -0500 Subject: [PATCH 443/530] [egs] Iban recipe: MacOSX compatibility fixes (#1448) --- egs/iban/s5/local/prepare_lm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/iban/s5/local/prepare_lm.sh b/egs/iban/s5/local/prepare_lm.sh index a19dc18f566..10d5e276aa3 100755 --- a/egs/iban/s5/local/prepare_lm.sh +++ b/egs/iban/s5/local/prepare_lm.sh @@ -10,7 +10,7 @@ set -e -o pipefail local/train_lms_srilm.sh --train-text data/train/text data/ data/srilm -nl -nrz -w10 corpus/LM/iban-bp-2012.txt | sort -R > data/local/external_text +nl -nrz -w10 corpus/LM/iban-bp-2012.txt | utils/shuffle_list.pl > data/local/external_text local/train_lms_srilm.sh --train-text data/local/external_text data/ data/srilm_external # let's do ngram interpolation of the previous two LMs @@ -21,7 +21,7 @@ for w in 0.9 0.8 0.7 0.6 0.5; do ngram -lm data/srilm/lm.gz -mix-lm data/srilm_external/lm.gz \ -lambda $w -write-lm data/srilm_interp/lm.${w}.gz echo -n "data/srilm_interp/lm.${w}.gz " - ngram -lm data/srilm_interp/lm.${w}.gz -ppl data/srilm/dev.txt | paste -s + ngram -lm data/srilm_interp/lm.${w}.gz -ppl data/srilm/dev.txt | paste -s - done | sort -k15,15g > data/srilm_interp/perplexities.txt # for basic decoding, let's use only a trigram LM From ae4a5ef623c206c575e37c426a881a4dda84f274 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 23 Feb 2017 00:51:08 -0500 Subject: [PATCH 444/530] [scripts] Make it so i-vector ID is not required for steps/nnet3/decode.sh --- egs/wsj/s5/steps/nnet3/decode.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/nnet3/decode.sh b/egs/wsj/s5/steps/nnet3/decode.sh index 8aa86e92dcb..35a02001ae7 100755 --- a/egs/wsj/s5/steps/nnet3/decode.sh +++ b/egs/wsj/s5/steps/nnet3/decode.sh @@ -68,7 +68,7 @@ model=$srcdir/$iter.mdl extra_files= if [ ! -z "$online_ivector_dir" ]; then steps/nnet2/check_ivectors_compatible.sh $srcdir $online_ivector_dir || exit 1 - extra_files="$srcdir/final.ie.id $online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period" + extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period" fi for f in $graphdir/HCLG.fst $data/feats.scp $model $extra_files; do From 70f5360718942e42dcc878fd89b96c8e0637b604 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Thu, 23 Feb 2017 13:27:45 -0500 Subject: [PATCH 445/530] [build] Stop env.sh from crashing when set -u is active (#1451) --- .gitignore | 1 + tools/extras/install_irstlm.sh | 4 ++-- tools/extras/install_liblbfgs.sh | 6 +++--- tools/extras/install_mpg123.sh | 4 ++-- tools/extras/install_sequitur.sh | 6 +++--- tools/extras/install_srilm.sh | 4 ++-- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 02a88acb621..fd1f73af215 100644 --- a/.gitignore +++ b/.gitignore @@ -120,6 +120,7 @@ GSYMS /tools/pthreads /tools/pthreads*.zip /tools/sequitur +/tools/sequitur-g2p /tools/srilm.tgz /tools/liblbfgs-1.10.tar.gz /tools/liblbfgs-1.10/ diff --git a/tools/extras/install_irstlm.sh b/tools/extras/install_irstlm.sh index c6cc9adf568..8b0f8b6519e 100755 --- a/tools/extras/install_irstlm.sh +++ b/tools/extras/install_irstlm.sh @@ -47,12 +47,12 @@ fi } ( - [ ! -z ${IRSTLM} ] && \ + [ ! -z "${IRSTLM}" ] && \ echo >&2 "IRSTLM variable is aleady defined. Undefining..." && \ unset IRSTLM [ -f ./env.sh ] && . ./env.sh - [ ! -z ${IRSTLM} ] && \ + [ ! -z "${IRSTLM}" ] && \ echo >&2 "IRSTLM config is already in env.sh" && exit wd=`pwd -P` diff --git a/tools/extras/install_liblbfgs.sh b/tools/extras/install_liblbfgs.sh index 7e6589b160d..10f72cad84f 100644 --- a/tools/extras/install_liblbfgs.sh +++ b/tools/extras/install_liblbfgs.sh @@ -14,19 +14,19 @@ make -i install cd .. ( - [ ! -z ${LIBLBFGS} ] && \ + [ ! -z "${LIBLBFGS}" ] && \ echo >&2 "LIBLBFGS variable is aleady defined. Undefining..." && \ unset LIBLBFGS [ -f ./env.sh ] && . ./env.sh - [ ! -z ${LIBLBFGS} ] && \ + [ ! -z "${LIBLBFGS}" ] && \ echo >&2 "libLBFGS config is already in env.sh" && exit wd=`pwd` wd=`readlink -f $wd || pwd` echo "export LIBLBFGS=$wd/liblbfgs-1.10" - echo export LD_LIBRARY_PATH='${LD_LIBRARY_PATH}':'${LIBLBFGS}'/lib/.libs + echo export LD_LIBRARY_PATH='${LD_LIBRARY_PATH:-}':'${LIBLBFGS}'/lib/.libs ) >> env.sh diff --git a/tools/extras/install_mpg123.sh b/tools/extras/install_mpg123.sh index 870275c6a10..5702ff476b4 100755 --- a/tools/extras/install_mpg123.sh +++ b/tools/extras/install_mpg123.sh @@ -55,13 +55,13 @@ ln -s mpg123-1.21.0 mpg123 ( set +u - [ ! -z ${MPG123} ] && \ + [ ! -z "${MPG123}" ] && \ echo >&2 "MPG123 variable is aleady defined. Undefining..." && \ unset MPG123 [ -f ./env.sh ] && . ./env.sh - [ ! -z ${MPG123} ] && \ + [ ! -z "${MPG123}" ] && \ echo >&2 "MPG123 config is already in env.sh" && exit wd=`pwd` diff --git a/tools/extras/install_sequitur.sh b/tools/extras/install_sequitur.sh index 50ec7e98b5e..ba6d028edad 100755 --- a/tools/extras/install_sequitur.sh +++ b/tools/extras/install_sequitur.sh @@ -76,13 +76,13 @@ cd ../ ( set +u - [ ! -z ${SEQUITUR} ] && \ + [ ! -z "${SEQUITUR}" ] && \ echo >&2 "SEQUITUR variable is aleady defined. Undefining..." && \ unset SEQUITUR [ -f ./env.sh ] && . ./env.sh - [ ! -z ${SEQUITUR} ] && \ + [ ! -z "${SEQUITUR}" ] && \ echo >&2 "SEQUITUR config is already in env.sh" && exit wd=`pwd` @@ -91,7 +91,7 @@ cd ../ echo "export SEQUITUR=$wd/sequitur-g2p" echo "export PATH=\$PATH:\${SEQUITUR}/bin" echo "_site_packages=\`find \${SEQUITUR}/lib -type d -regex '.*python.*/site-packages'\`" - echo "export PYTHONPATH=\$PYTHONPATH:\$_site_packages" + echo "export PYTHONPATH=\${PYTHONPATH:-}:\$_site_packages" ) >> env.sh echo >&2 "Installation of SEQUITUR finished successfully" diff --git a/tools/extras/install_srilm.sh b/tools/extras/install_srilm.sh index 5d709e8a38b..000b1dbe6c5 100755 --- a/tools/extras/install_srilm.sh +++ b/tools/extras/install_srilm.sh @@ -61,13 +61,13 @@ make || exit 1 cd .. ( - [ ! -z ${SRILM} ] && \ + [ ! -z "${SRILM}" ] && \ echo >&2 "SRILM variable is aleady defined. Undefining..." && \ unset SRILM [ -f ./env.sh ] && . ./env.sh - [ ! -z ${SRILM} ] && \ + [ ! -z "${SRILM}" ] && \ echo >&2 "SRILM config is already in env.sh" && exit wd=`pwd` From 94a419f673d214039591b57145047deffdb226c4 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 23 Feb 2017 15:57:30 -0500 Subject: [PATCH 446/530] Modify the way some of the segmentation scripts work --- egs/wsj/s5/steps/segmentation/decode_sad.sh | 14 ++++++++--- .../do_segmentation_data_dir_simple.sh | 10 +++++--- .../internal/convert_ali_to_vad.sh | 25 +++++++++++-------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/egs/wsj/s5/steps/segmentation/decode_sad.sh b/egs/wsj/s5/steps/segmentation/decode_sad.sh index 2f2e5ae2586..a39e93dd83f 100755 --- a/egs/wsj/s5/steps/segmentation/decode_sad.sh +++ b/egs/wsj/s5/steps/segmentation/decode_sad.sh @@ -28,7 +28,13 @@ mkdir -p $dir nj=`cat $log_likes_dir/num_jobs` echo $nj > $dir/num_jobs -for f in $graph_dir/$iter.mdl $log_likes_dir/log_likes.1.gz $graph_dir/HCLG.fst; do +if [ -f $dir/$iter.mdl ]; then + srcdir=$dir +else + srcdir=`dirname $dir` +fi + +for f in $srcdir/$iter.mdl $log_likes_dir/log_likes.1.gz $graph_dir/HCLG.fst; do if [ ! -f $f ]; then echo "$0: Could not find file $f" exit 1 @@ -37,14 +43,14 @@ done decoder_opts+=(--acoustic-scale=$acwt --beam=$beam --max-active=$max_active) -ali="ark:| ali-to-phones --per-frame $graph_dir/$iter.mdl ark:- ark:- | gzip -c > $dir/ali.JOB.gz" +ali="ark:| ali-to-phones --per-frame $srcdir/$iter.mdl ark:- ark:- | gzip -c > $dir/ali.JOB.gz" if $get_pdfs; then - ali="ark:| ali-to-pdf $graph_dir/$iter.mdl ark:- ark:- | gzip -c > $dir/ali.JOB.gz" + ali="ark:| ali-to-pdf $srcdir/$iter.mdl ark:- ark:- | gzip -c > $dir/ali.JOB.gz" fi $cmd JOB=1:$nj $dir/log/decode.JOB.log \ decode-faster-mapped ${decoder_opts[@]} \ - $graph_dir/$iter.mdl \ + $srcdir/$iter.mdl \ $graph_dir/HCLG.fst "ark:gunzip -c $log_likes_dir/log_likes.JOB.gz |" \ ark:/dev/null "$ali" diff --git a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh index 0da130ee3ab..cd4f36ded6b 100755 --- a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh +++ b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh @@ -32,8 +32,9 @@ extra_right_context=0 frame_subsampling_factor=1 # Subsampling at the output -transition_scale=10.0 -loopscale=1.0 +transition_scale=1.0 +loopscale=0.1 +acwt=1.0 # Set to true if the test data has > 8kHz sampling frequency. do_downsampling=false @@ -95,6 +96,7 @@ else fi if [ $stage -le 1 ]; then + utils/fix_data_dir.sh $test_data_dir steps/make_mfcc.sh --mfcc-config $mfcc_config --nj $nj --cmd "$train_cmd" \ ${test_data_dir} exp/make_hires/${data_id}${feat_affix} $mfcc_dir steps/compute_cmvn_stats.sh ${test_data_dir} exp/make_hires/${data_id}${feat_affix} $mfcc_dir @@ -163,9 +165,9 @@ if [ $stage -le 5 ]; then fi if [ $stage -le 6 ]; then - # 'final' here refers to $lang/final.mdl steps/segmentation/decode_sad.sh --acwt 1.0 --cmd "$decode_cmd" \ - --iter final --get-pdfs true $graph_dir $sad_dir $seg_dir + --iter ${iter} \ + --get-pdfs true $graph_dir $sad_dir $seg_dir fi if [ $stage -le 7 ]; then diff --git a/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh b/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh index 234b5020797..0d8939a9b80 100755 --- a/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh +++ b/egs/wsj/s5/steps/segmentation/internal/convert_ali_to_vad.sh @@ -8,15 +8,20 @@ set -u cmd=run.pl -frame_shift=0.01 -frame_subsampling_factor=1 - . parse_options.sh if [ $# -ne 3 ]; then echo "This script converts the alignment in the alignment directory " echo "to speech activity segments based on the provided phone-map." - echo "Usage: $0 exp/tri3_ali data/lang/phones/sad.map exp/tri3_ali_vad" + echo "The output is stored in sad_seg.*.ark along with an scp-file " + echo "sad_seg.scp in Segmentation format.\n" + echo "If alignment directory has frame_subsampling_factor, the segments " + echo "are applied that frame-subsampling-factor.\n" + echo "The phone-map file must have two columns: " + echo " \n" + echo "\n" + echo "Usage: $0 " + echo "e.g. : $0 exp/tri3_ali data/lang/phones/sad.map exp/tri3_ali_vad" exit 1 fi @@ -33,21 +38,21 @@ mkdir -p $dir nj=`cat $ali_dir/num_jobs` || exit 1 echo $nj > $dir/num_jobs +frame_subsampling_factor=1 if [ -f $ali_dir/frame_subsampling_factor ]; then frame_subsampling_factor=`cat $ali_dir/frame_subsampling_factor` fi -ali_frame_shift=`perl -e "print ($frame_shift * $frame_subsampling_factor);"` -ali_frame_overlap=`perl -e "print ($ali_frame_shift * 1.5);"` - dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}` $cmd JOB=1:$nj $dir/log/get_sad.JOB.log \ segmentation-init-from-ali \ - "ark:gunzip -c ${ali_dir}/ali.JOB.gz | ali-to-phones --per-frame ${ali_dir}/final.mdl ark:- ark:- |" \ - ark:- \| segmentation-copy --label-map=$phone_map ark:- ark:- \| \ + "ark:gunzip -c ${ali_dir}/ali.JOB.gz | ali-to-phones --per-frame ${ali_dir}/final.mdl ark:- ark:- |" \ + ark:- \| \ + segmentation-copy --label-map=$phone_map \ + --frame-subsampling-factor=$frame_subsampling_factor ark:- ark:- \| \ segmentation-post-process --merge-adjacent-segments ark:- \ - ark,scp:$dir/sad_seg.JOB.ark,$dir/sad_seg.JOB.scp + ark,scp:$dir/sad_seg.JOB.ark,$dir/sad_seg.JOB.scp for n in `seq $nj`; do cat $dir/sad_seg.$n.scp From 0465262edf57e03509d2c5b4b6da877a17baa0e2 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 23 Feb 2017 15:58:37 -0500 Subject: [PATCH 447/530] asr_diarization: add more checks and messages to segmentation binaries --- src/segmenterbin/segmentation-combine-segments.cc | 3 ++- src/segmenterbin/segmentation-init-from-segments.cc | 2 +- src/segmenterbin/segmentation-merge-recordings.cc | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/segmenterbin/segmentation-combine-segments.cc b/src/segmenterbin/segmentation-combine-segments.cc index 09b789a0921..1d745ca91f9 100644 --- a/src/segmenterbin/segmentation-combine-segments.cc +++ b/src/segmenterbin/segmentation-combine-segments.cc @@ -103,7 +103,8 @@ int main(int argc, char *argv[]) { if (!utt_segmentation_reader.HasKey(*it)) { KALDI_WARN << "Could not find utterance " << *it << " in " - << "segmentation " << utt_segmentation_rspecifier; + << "segmentation " << utt_segmentation_rspecifier + << (include_missing ? "; using default segmentation": ""); if (!include_missing) { num_err++; } else { diff --git a/src/segmenterbin/segmentation-init-from-segments.cc b/src/segmenterbin/segmentation-init-from-segments.cc index 469b4ef2965..980ec697602 100644 --- a/src/segmenterbin/segmentation-init-from-segments.cc +++ b/src/segmenterbin/segmentation-init-from-segments.cc @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) { ParseOptions po(usage); - po.Register("segment-label", &segment_label, + po.Register("label", &segment_label, "Label for all the segments in the segmentations"); po.Register("utt2label-rspecifier", &utt2label_rspecifier, "Mapping for each utterance to an integer label. " diff --git a/src/segmenterbin/segmentation-merge-recordings.cc b/src/segmenterbin/segmentation-merge-recordings.cc index dccd82b0595..69f6758c90d 100644 --- a/src/segmenterbin/segmentation-merge-recordings.cc +++ b/src/segmenterbin/segmentation-merge-recordings.cc @@ -92,8 +92,8 @@ int main(int argc, char *argv[]) { << "created overall " << num_segments << " segments; " << "failed to merge " << num_err << " old segmentations"; - return (num_new_segmentations > 0 && num_err < num_old_segmentations / 2 ? - 0 : 1); + return (num_segments > 0 && num_new_segmentations > 0 && + num_err < num_old_segmentations / 2 ? 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); return -1; From dff2a3f2c9dbdd96d86311812a37944d632bb483 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Fri, 24 Feb 2017 12:35:03 -0500 Subject: [PATCH 448/530] upgrade the sox calls to use more compatible options (#1453) --- egs/ami/s5/local/ami_ihm_data_prep.sh | 2 +- egs/ami/s5/local/ami_ihm_scoring_data_prep.sh | 2 +- egs/ami/s5/local/ami_mdm_data_prep.sh | 2 +- egs/ami/s5/local/ami_mdm_scoring_data_prep.sh | 2 +- egs/ami/s5/local/ami_sdm_data_prep.sh | 2 +- egs/ami/s5/local/ami_sdm_scoring_data_prep.sh | 2 +- egs/ami/s5b/local/ami_ihm_data_prep.sh | 2 +- egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh | 2 +- egs/ami/s5b/local/ami_mdm_data_prep.sh | 2 +- egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh | 2 +- egs/ami/s5b/local/ami_sdm_data_prep.sh | 2 +- egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/egs/ami/s5/local/ami_ihm_data_prep.sh b/egs/ami/s5/local/ami_ihm_data_prep.sh index 3a1d43d1ea1..b3ec1723713 100755 --- a/egs/ami/s5/local/ami_ihm_data_prep.sh +++ b/egs/ami/s5/local/ami_ihm_data_prep.sh @@ -69,7 +69,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \ awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp > $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh index c3b9914d7a0..b69732a61eb 100755 --- a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh @@ -68,7 +68,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \ awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp > $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5/local/ami_mdm_data_prep.sh b/egs/ami/s5/local/ami_mdm_data_prep.sh index bc7e4180b4a..2cc973cb2d5 100755 --- a/egs/ami/s5/local/ami_mdm_data_prep.sh +++ b/egs/ami/s5/local/ami_mdm_data_prep.sh @@ -75,7 +75,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp #prep reco2file_and_channel cat $dir/wav.scp | \ diff --git a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh index ab0fd185f70..8d9e24a9838 100755 --- a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh @@ -67,7 +67,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ diff --git a/egs/ami/s5/local/ami_sdm_data_prep.sh b/egs/ami/s5/local/ami_sdm_data_prep.sh index 8eda00f1d15..e662759a610 100755 --- a/egs/ami/s5/local/ami_sdm_data_prep.sh +++ b/egs/ami/s5/local/ami_sdm_data_prep.sh @@ -74,7 +74,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select a single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # this file reco2file_and_channel maps recording-id cat $dir/wav.scp | \ diff --git a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh index 01173d2e3a6..3fa7c938479 100755 --- a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh @@ -72,7 +72,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ diff --git a/egs/ami/s5b/local/ami_ihm_data_prep.sh b/egs/ami/s5b/local/ami_ihm_data_prep.sh index 38f14023b16..8ffa1f1e9c5 100755 --- a/egs/ami/s5b/local/ami_ihm_data_prep.sh +++ b/egs/ami/s5b/local/ami_ihm_data_prep.sh @@ -75,7 +75,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \ awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp > $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh index 3ae42afb3d8..746c42c4c1a 100755 --- a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh @@ -74,7 +74,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \ awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp > $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5b/local/ami_mdm_data_prep.sh b/egs/ami/s5b/local/ami_mdm_data_prep.sh index 0ab11c5893b..d100347a356 100755 --- a/egs/ami/s5b/local/ami_mdm_data_prep.sh +++ b/egs/ami/s5b/local/ami_mdm_data_prep.sh @@ -79,7 +79,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp #prep reco2file_and_channel cat $dir/wav.scp | \ diff --git a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh index 4fbfe12ccad..65f514f223c 100755 --- a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh @@ -72,7 +72,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ diff --git a/egs/ami/s5b/local/ami_sdm_data_prep.sh b/egs/ami/s5b/local/ami_sdm_data_prep.sh index 267aef75535..327595070a6 100755 --- a/egs/ami/s5b/local/ami_sdm_data_prep.sh +++ b/egs/ami/s5b/local/ami_sdm_data_prep.sh @@ -86,7 +86,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select a single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # this file reco2file_and_channel maps recording-id cat $dir/wav.scp | \ diff --git a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh index d0609e552cd..1378f8b8965 100755 --- a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh @@ -82,7 +82,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ From 37bebd153b658628c59ada0e4adff3ee31c584e4 Mon Sep 17 00:00:00 2001 From: meixu song Date: Sat, 25 Feb 2017 13:39:15 +0800 Subject: [PATCH 449/530] [egs] fix typo in egs/swbd/s5c/local/nnet3/run_ivector_common.sh (#1452) --- egs/swbd/s5c/local/nnet3/run_ivector_common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh index 9768d82c806..b64d3e468df 100755 --- a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh +++ b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh @@ -62,7 +62,7 @@ if [ $stage -le 3 ]; then for dataset in $train_set train_100k_nodup; do utils/copy_data_dir.sh data/$dataset data/${dataset}_hires - utils/data/perturb_data_dir_volume.sh adata/${dataset}_hires + utils/data/perturb_data_dir_volume.sh data/${dataset}_hires steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir; From 4cda604230d9b2c0420d4937ab6520b463bd49bd Mon Sep 17 00:00:00 2001 From: meixu song Date: Sat, 25 Feb 2017 13:40:28 +0800 Subject: [PATCH 450/530] [scripts] xconfig: make scripts work when LDA-like preconditioning layer is not used (#1447) --- egs/wsj/s5/steps/libs/nnet3/train/common.py | 20 +++++++++++++------ .../steps/libs/nnet3/xconfig/basic_layers.py | 15 +++++++------- egs/wsj/s5/steps/nnet3/train_dnn.py | 4 ++-- egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 17 ++++++++++++++++ 4 files changed, 41 insertions(+), 15 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/train/common.py b/egs/wsj/s5/steps/libs/nnet3/train/common.py index 0a02c89de5f..e6ef511e7f2 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/common.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/common.py @@ -444,12 +444,20 @@ def smooth_presoftmax_prior_scale_vector(pdf_counts, def prepare_initial_network(dir, run_opts, srand=-3): - common_lib.run_job( - """{command} {dir}/log/add_first_layer.log \ - nnet3-init --srand={srand} {dir}/init.raw \ - {dir}/configs/layer1.config {dir}/0.raw""".format( - command=run_opts.command, srand=srand, - dir=dir)) + if os.path.exists(dir+"/configs/init.config"): + common_lib.run_job( + """{command} {dir}/log/add_first_layer.log \ + nnet3-init --srand={srand} {dir}/init.raw \ + {dir}/configs/layer1.config {dir}/0.raw""".format( + command=run_opts.command, srand=srand, + dir=dir)) + else: + common_lib.run_job( + """{command} {dir}/log/add_first_layer.log \ + nnet3-init --srand={srand} \ + {dir}/configs/layer1.config {dir}/0.raw""".format( + command=run_opts.command, srand=srand, + dir=dir)) def verify_iterations(num_iters, num_epochs, num_hidden_layers, diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py index 1a42c86ad81..de4c4af9df8 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py @@ -786,7 +786,8 @@ def set_default_configs(self): # the most recent layer. self.config = { 'input':'[-1]', 'dim':-1, - 'affine-transform-file':''} + 'affine-transform-file':'', + 'write-init-config':True} def check_configs(self): if self.config['affine-transform-file'] is None: @@ -817,12 +818,12 @@ def get_full_config(self): output_dim = self.output_dim() transform_file = self.config['affine-transform-file'] - - # to init.config we write an output-node with the name 'output' and - # with a Descriptor equal to the descriptor that's the input to this - # layer. This will be used to accumulate stats to learn the LDA transform. - line = 'output-node name=output input={0}'.format(descriptor_final_string) - ans.append(('init', line)) + if self.config['write-init-config']: + # to init.config we write an output-node with the name 'output' and + # with a Descriptor equal to the descriptor that's the input to this + # layer. This will be used to accumulate stats to learn the LDA transform. + line = 'output-node name=output input={0}'.format(descriptor_final_string) + ans.append(('init', line)) # write the 'real' component to final.config line = 'component name={0} type=FixedAffineComponent matrix={1}'.format( diff --git a/egs/wsj/s5/steps/nnet3/train_dnn.py b/egs/wsj/s5/steps/nnet3/train_dnn.py index 164aee788fa..2f324512114 100755 --- a/egs/wsj/s5/steps/nnet3/train_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_dnn.py @@ -199,7 +199,7 @@ def train(args, run_opts, background_process_handler): # we do this as it's a convenient way to get the stats for the 'lda-like' # transform. - if (args.stage <= -5): + if (args.stage <= -5) and os.path.exists(args.dir+"/configs/init.config"): logger.info("Initializing a basic network for estimating " "preconditioning matrix") common_lib.run_job( @@ -245,7 +245,7 @@ def train(args, run_opts, background_process_handler): # use during decoding common_train_lib.copy_egs_properties_to_exp_dir(egs_dir, args.dir) - if (args.stage <= -3): + if (args.stage <= -3) and os.path.exists(args.dir+"/configs/init.config"): logger.info('Computing the preconditioning matrix for input features') train_lib.common.compute_preconditioning_matrix( diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index d3abb82c92c..5184b6eed41 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -170,7 +170,24 @@ def write_config_files(config_dir, all_layers): # preserves the backtrace raise + # remove previous init.config + try: + os.remove(config_dir + '/init.config') + except OSError: + pass + for basename, lines in config_basename_to_lines.items(): + # check the lines num start with 'output-node': + num_output_node_lines = sum( [ 1 if line.startswith('output-node' ) else 0 + for line in lines ] ) + if num_output_node_lines == 0: + if basename == 'init': + continue # do not write the init.config + else: + print('{0}: error in xconfig file {1}: may be lack of a output layer'.format( + sys.argv[0], sys.argv[2]), file=sys.stderr) + raise + header = config_basename_to_header[basename] filename = '{0}/{1}.config'.format(config_dir, basename) try: From 089e59679c2494d2eb4b45898a19da74b5e4edd7 Mon Sep 17 00:00:00 2001 From: Dogan Can Date: Sat, 25 Feb 2017 12:09:24 -0800 Subject: [PATCH 451/530] [build] Update OpenFst minimum version check to 1.6 in tools/Makefile. (#1455) --- tools/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/Makefile b/tools/Makefile index f40a75da5f8..b3d5a6c53b9 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -10,9 +10,9 @@ CC = gcc # used for sph2pipe OPENFST_VERSION = 1.6.1 OPENFST_VER_NUM := $(shell echo $(OPENFST_VERSION) | sed 's/\./ /g' | xargs printf "%d%02d%02d") -ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10503)","1") +ifeq ("$(shell expr $(OPENFST_VER_NUM) \< 10600)","1") $(error OpenFst-$(OPENFST_VERSION) is not supported. \ - Supported versions: >= 1.5.3) + Supported versions: >= 1.6.0) endif all: check_required_programs sph2pipe atlas sclite openfst From 344e1adc19e0f47dcadd2f925ae8b560f63b08b0 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 25 Feb 2017 17:19:33 -0500 Subject: [PATCH 452/530] [src] Fix bug in decodable-online-looped.cc (prevent crash in nnet3 online decoding). --- src/nnet3/decodable-online-looped.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nnet3/decodable-online-looped.cc b/src/nnet3/decodable-online-looped.cc index 77be1f166bf..f231a2d5b62 100644 --- a/src/nnet3/decodable-online-looped.cc +++ b/src/nnet3/decodable-online-looped.cc @@ -129,7 +129,7 @@ void DecodableNnetLoopedOnlineBase::AdvanceChunk() { int32 num_feature_frames_ready = input_features_->NumFramesReady(); bool is_finished = input_features_->IsLastFrame(num_feature_frames_ready - 1); - if (end_input_frame >= num_feature_frames_ready && !is_finished) { + if (end_input_frame > num_feature_frames_ready && !is_finished) { // we shouldn't be attempting to read past the end of the available features // until we have reached the end of the input (i.e. the end-user called // InputFinished(), announcing that there is no more waveform; at this point From 25b1299b6e5c88e55be4163a8c662cc569b97bf6 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Sat, 25 Feb 2017 21:02:35 -0500 Subject: [PATCH 453/530] [egs] fix MacOSX incompatibilities in calls of paste (#1457) --- egs/iban/s5/local/train_lms_srilm.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/egs/iban/s5/local/train_lms_srilm.sh b/egs/iban/s5/local/train_lms_srilm.sh index 9ed88842650..f72596e750a 100755 --- a/egs/iban/s5/local/train_lms_srilm.sh +++ b/egs/iban/s5/local/train_lms_srilm.sh @@ -206,9 +206,9 @@ echo "--------------------" echo "Computing perplexity" echo "--------------------" ( - for f in $tgtdir/2gram* ; do ( echo $f; ngram -order 2 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done - for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done - for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done + for f in $tgtdir/2gram* ; do ( echo $f; ngram -order 2 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done + for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done + for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done ) | sort -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt echo "The perlexity scores report is stored in $tgtdir/perplexities.txt " From d60e3cc2622a2a93e0f77285124468d0c31bce16 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 27 Feb 2017 02:11:21 -0500 Subject: [PATCH 454/530] [egs,scripts] Misc script fixes; refactor wsj/s5 examples; update tedlium/s5_r2 (#1456) --- egs/tedlium/s5_r2/local/chain/run_tdnn_d.sh | 198 ------ .../local/chain/tuning/run_tdnn_lstm_1e.sh | 4 +- egs/tedlium/s5_r2/local/nnet3/compare_wer.sh | 25 +- .../s5_r2/local/nnet3/run_ivector_common.sh | 4 +- egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh | 1 + .../s5_r2/local/nnet3/run_tdnn_lstm_lfr.sh | 1 + .../s5_r2/local/nnet3/tuning/run_tdnn_1b.sh | 3 + .../s5_r2/local/nnet3/tuning/run_tdnn_1c.sh | 186 ++++++ .../local/nnet3/tuning/run_tdnn_lfr_1a.sh | 200 ++++++ .../local/nnet3/tuning/run_tdnn_lstm_1a.sh | 31 +- .../local/nnet3/tuning/run_tdnn_lstm_1c.sh | 2 + .../nnet3/tuning/run_tdnn_lstm_lfr_1a.sh | 310 +++++++++ egs/wsj/s5/RESULTS | 176 +++-- egs/wsj/s5/local/chain/compare_wer.sh | 137 ++++ egs/wsj/s5/local/chain/run_tdnn.sh | 1 + egs/wsj/s5/local/chain/run_tdnn_lstm.sh | 1 + egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh | 361 ++++++++++ .../s5/local/chain/tuning/run_tdnn_lstm_1a.sh | 393 +++++++++++ egs/wsj/s5/local/nnet3/compare_wer.sh | 139 ++++ egs/wsj/s5/local/nnet3/run_ivector_common.sh | 228 +++++-- egs/wsj/s5/local/nnet3/run_lstm.sh | 3 +- .../s5/local/nnet3/run_lstm_discriminative.sh | 3 + egs/wsj/s5/local/nnet3/run_tdnn.sh | 75 +-- egs/wsj/s5/local/nnet3/run_tdnn_baseline.sh | 79 --- egs/wsj/s5/local/nnet3/run_tdnn_lstm.sh | 1 + egs/wsj/s5/local/nnet3/run_tdnn_lstm_lfr.sh | 1 + egs/wsj/s5/local/nnet3/tuning/run_tdnn_1a.sh | 162 +++++ egs/wsj/s5/local/nnet3/tuning/run_tdnn_1b.sh | 168 +++++ .../s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh | 288 ++++++++ .../nnet3/tuning/run_tdnn_lstm_lfr_1a.sh | 335 ++++++++++ egs/wsj/s5/local/run_basis_fmllr.sh | 6 +- egs/wsj/s5/local/run_mmi_tri2b.sh | 73 --- egs/wsj/s5/run.sh | 616 +++++++++--------- .../s5/steps/libs/nnet3/report/log_parse.py | 14 +- egs/wsj/s5/steps/lmrescore.sh | 2 +- egs/wsj/s5/steps/mixup.sh | 153 ----- egs/wsj/s5/utils/fix_data_dir.sh | 9 +- egs/wsj/s5/utils/mkgraph.sh | 2 +- egs/wsj/s5/utils/validate_data_dir.sh | 2 +- 39 files changed, 3310 insertions(+), 1083 deletions(-) delete mode 100755 egs/tedlium/s5_r2/local/chain/run_tdnn_d.sh create mode 120000 egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh create mode 120000 egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_lfr.sh create mode 100755 egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh create mode 100755 egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh create mode 100755 egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh create mode 100755 egs/wsj/s5/local/chain/compare_wer.sh create mode 120000 egs/wsj/s5/local/chain/run_tdnn.sh create mode 120000 egs/wsj/s5/local/chain/run_tdnn_lstm.sh create mode 100755 egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh create mode 100755 egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh create mode 100755 egs/wsj/s5/local/nnet3/compare_wer.sh mode change 100755 => 120000 egs/wsj/s5/local/nnet3/run_tdnn.sh delete mode 100755 egs/wsj/s5/local/nnet3/run_tdnn_baseline.sh create mode 120000 egs/wsj/s5/local/nnet3/run_tdnn_lstm.sh create mode 120000 egs/wsj/s5/local/nnet3/run_tdnn_lstm_lfr.sh create mode 100755 egs/wsj/s5/local/nnet3/tuning/run_tdnn_1a.sh create mode 100755 egs/wsj/s5/local/nnet3/tuning/run_tdnn_1b.sh create mode 100755 egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh create mode 100755 egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh delete mode 100755 egs/wsj/s5/local/run_mmi_tri2b.sh delete mode 100755 egs/wsj/s5/steps/mixup.sh diff --git a/egs/tedlium/s5_r2/local/chain/run_tdnn_d.sh b/egs/tedlium/s5_r2/local/chain/run_tdnn_d.sh deleted file mode 100755 index 9e795316352..00000000000 --- a/egs/tedlium/s5_r2/local/chain/run_tdnn_d.sh +++ /dev/null @@ -1,198 +0,0 @@ -#!/bin/bash - -# by default, with cleanup: -# local/chain/run_tdnn.sh - -# without cleanup: -# local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & - -# note, if you have already run the corresponding non-chain nnet3 system -# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. - -set -e -o pipefail - -# First the options that are passed through to run_ivector_common.sh -# (some of which are also used in this script directly). -stage=0 -nj=30 -decode_nj=30 -min_seg_len=1.55 -train_set=train_cleaned -gmm=tri3_cleaned # the gmm for the target data -num_threads_ubm=32 -nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned - -# The rest are configs specific to this script. Most of the parameters -# are just hardcoded at this level, in the commands below. -train_stage=-10 -tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. -tdnn_affix=d #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. -common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs # you can set this to use previously dumped egs. - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - - -if ! cuda-compiled; then - cat <data/lang_chain/topo - fi -fi - -if [ $stage -le 15 ]; then - # Get the alignments as lattices (gives the chain training more freedom). - # use the same num-jobs as the alignments - steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ - data/lang $gmm_dir $lat_dir - rm $lat_dir/fsts.*.gz # save space -fi - -if [ $stage -le 16 ]; then - # Build a tree using our new topology. We know we have alignments for the - # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use - # those. - if [ -f $tree_dir/final.mdl ]; then - echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." - exit 1; - fi - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --context-opts "--context-width=2 --central-position=1" \ - --leftmost-questions-truncate -1 \ - --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir -fi - -if [ $stage -le 17 ]; then - mkdir -p $dir - - echo "$0: creating neural net configs"; - - steps/nnet3/tdnn/make_configs.py \ - --self-repair-scale-nonlinearity 0.00001 \ - --feat-dir data/${train_set}_sp_hires_comb \ - --ivector-dir $train_ivector_dir \ - --tree-dir $tree_dir \ - --relu-dim 550 \ - --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \ - --use-presoftmax-prior-scale false \ - --xent-regularize 0.1 \ - --xent-separate-forward-affine true \ - --include-log-softmax false \ - --final-layer-normalize-target 1.0 \ - $dir/configs || exit 1; -fi - -if [ $stage -le 18 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage - fi - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize 0.1 \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.dir "$common_egs_dir" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --trainer.num-chunk-per-minibatch 128 \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs 4 \ - --trainer.optimization.num-jobs-initial 2 \ - --trainer.optimization.num-jobs-final 12 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.max-param-change 2.0 \ - --cleanup.remove-egs true \ - --feat-dir $train_data_dir \ - --tree-dir $tree_dir \ - --lat-dir $lat_dir \ - --dir $dir -fi - - - -if [ $stage -le 19 ]; then - # Note: it might appear that this data/lang_chain directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang $dir $dir/graph -fi - -if [ $stage -le 20 ]; then - rm $dir/.error 2>/dev/null || true - for dset in dev test; do - ( - steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ - --acwt 1.0 --post-decode-acwt 10.0 \ - --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ - --scoring-opts "--min-lmwt 5 " \ - $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; - steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ - data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 - ) || touch $dir/.error & - done - wait - if [ -f $dir/.error ]; then - echo "$0: something went wrong in decoding" - exit 1 - fi -fi -exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh index 6704f9d299e..e56946c1b54 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -259,14 +259,14 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage fi steps/nnet3/chain/train.py --stage $train_stage \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize 0.1 \ + --chain.xent-regularize $xent_regularize \ --chain.leaky-hmm-coefficient 0.1 \ --chain.l2-regularize 0.00005 \ --chain.apply-deriv-weights false \ diff --git a/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh index 3e14a4efc55..da0bb728e69 100755 --- a/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh +++ b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh @@ -1,12 +1,20 @@ #!/bin/bash # this script is used for comparing decoding results between systems. -# e.g. local/nnet3/compare_wer_general.sh exp/nnet3_cleaned/tdnn_{c,d}_sp +# e.g. local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_{c,d}_sp # For use with discriminatively trained systems you specify the epochs after a colon: # for instance, # local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_c_sp exp/nnet3_cleaned/tdnn_c_sp_smbr:{1,2,3} +if [ $# == 0 ]; then + echo "Usage: $0: [--looped] [--online] [ ... ]" + echo "e.g.: $0 exp/nnet3_cleaned/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/nnet3_cleaned/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + echo "# $0 $*" include_looped=false @@ -14,6 +22,11 @@ if [ "$1" == "--looped" ]; then include_looped=true shift fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi @@ -71,6 +84,16 @@ for n in 0 1 2 3; do done echo fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum ${dirname}_online/decode_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi done diff --git a/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh b/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh index b4f2dd3e3b4..16093616b05 100755 --- a/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh +++ b/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh @@ -21,9 +21,9 @@ num_threads_ubm=32 nnet3_affix=_cleaned # affix for exp/nnet3 directory to put iVector stuff in, so it # becomes exp/nnet3_cleaned or whatever. -. cmd.sh +. ./cmd.sh . ./path.sh -. ./utils/parse_options.sh +. utils/parse_options.sh gmm_dir=exp/${gmm} diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh new file mode 120000 index 00000000000..61f8f499182 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1b.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_lfr.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_lfr.sh new file mode 120000 index 00000000000..8e03c924bc1 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_lfr.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_lfr_1a.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh index 379c8040a27..f6e4fb71b75 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh @@ -1,5 +1,8 @@ #!/bin/bash + +# 1b is as 1a but uses xconfigs. + # This is the standard "tdnn" system, built in nnet3; this script # is the version that's meant to run with data-cleanup, that doesn't # support parallel alignments. diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh new file mode 100755 index 00000000000..35789342ffb --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh @@ -0,0 +1,186 @@ +#!/bin/bash + +# 1c is as 1b but using more 'chain-like' splicing and slightly +# smaller dim. Not better; maybe slightly worse. + +# note: the num-params is almost the same. +# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn1{b,c}_sp +# exp/nnet3_cleaned/tdnn1b_sp: num-iters=240 nj=2..12 num-params=10.3M dim=40+100->4187 combine=-0.95->-0.95 loglike:train/valid[159,239,combined]=(-1.01,-0.95,-0.94/-1.18,-1.16,-1.15) accuracy:train/valid[159,239,combined]=(0.71,0.72,0.72/0.67,0.68,0.68) +# exp/nnet3_cleaned/tdnn1c_sp: num-iters=240 nj=2..12 num-params=10.1M dim=40+100->4187 combine=-1.16->-1.15 loglike:train/valid[159,239,combined]=(-1.22,-1.16,-1.15/-1.41,-1.38,-1.38) accuracy:train/valid[159,239,combined]=(0.66,0.67,0.68/0.62,0.63,0.63) + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1{b,c}_sp +# System tdnn1b_sp tdnn1c_sp +# WER on dev(orig) 11.7 11.9 +# WER on dev(rescored) 10.9 11.1 +# WER on test(orig) 11.7 11.8 +# WER on test(rescored) 11.0 11.2 +# Final train prob -0.9416 -1.1505 +# Final valid prob -1.1496 -1.3805 +# Final train acc 0.7241 0.6756 +# Final valid acc 0.6788 0.6255 + +# This is the standard "tdnn" system, built in nnet3; this script +# is the version that's meant to run with data-cleanup, that doesn't +# support parallel alignments. + + +# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn1b_sp +# exp/nnet3_cleaned/tdnn1b_sp: num-iters=240 nj=2..12 num-params=10.3M dim=40+100->4187 combine=-0.95->-0.95 loglike:train/valid[159,239,combined]=(-1.01,-0.95,-0.94/-1.18,-1.16,-1.15) accuracy:train/valid[159,239,combined]=(0.71,0.72,0.72/0.67,0.68,0.68) + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1a_sp exp/nnet3_cleaned/tdnn1b_sp +# System tdnn1a_sp tdnn1b_sp +# WER on dev(orig) 11.9 11.7 +# WER on dev(rescored) 11.2 10.9 +# WER on test(orig) 11.6 11.7 +# WER on test(rescored) 11.0 11.0 +# Final train prob -0.9255 -0.9416 +# Final valid prob -1.1842 -1.1496 +# Final train acc 0.7245 0.7241 +# Final valid acc 0.6771 0.6788 + + +# by default, with cleanup: +# local/nnet3/run_tdnn.sh + +# without cleanup: +# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1c #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +remove_egs=true +srand=0 +reporting_email=dpovey@gmail.com +# set common_egs_dir to use previously dumped egs. +common_egs_dir= + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=750 + relu-renorm-layer name=tdnn2 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn3 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn4 dim=750 input=Append(-3,0,3) + relu-renorm-layer name=tdnn5 dim=750 input=Append(-6,-3,0) + output-layer name=output dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=3 \ + --trainer.samples-per-iter=400000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=12 \ + --trainer.optimization.initial-effective-lrate=0.0015 \ + --trainer.optimization.final-effective-lrate=0.00015 \ + --trainer.optimization.minibatch-size=256,128 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # note: for TDNNs, looped decoding gives exactly the same results + # as regular decoding, so there is no point in testing it separately. + # We use regular decoding because it supports multi-threaded (we just + # didn't create the binary for that, for looped decoding, so far). + rm $dir/.error || true 2>/dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh new file mode 100755 index 00000000000..666c2f1bb31 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh @@ -0,0 +1,200 @@ +#!/bin/bash + + +# run_tdnn_lfr_1a.sh is similar in configuration to run_tdnn_1c.sh, but it's a +# low-frame-rate system (see egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh +# for an example of such a system). + + +# by default, with cleanup: +# local/nnet3/run_tdnn_lfr.sh + +# without cleanup: +# local/nnet3/run_tdnn_lfr.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1a #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +remove_egs=true +srand=0 +reporting_email=dpovey@gmail.com +# set common_egs_dir to use previously dumped egs. +common_egs_dir= + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 13 ]; then + # Build a tree using our new topology and a reduced sampling rate. + # We use 4000 leaves, which is a little less than the number used + # in the baseline GMM system (5k) in this setup, since generally + # LFR systems do best with somewhat fewer leaves. + # + # To get the stats to build the tree this script only uses every third frame, + # but it dumps converted alignments that essentially have 3 different + # frame-shifted versions of the alignment interpolated together; these can be + # used without modification in getting labels for training. + steps/nnet3/chain/build_tree.sh \ + --repeat-frames true --frame-subsampling-factor 3 \ + --cmd "$train_cmd" 4000 data/${train_set}_sp_comb \ + $lang $ali_dir $treedir +fi + +if [ $stage -le 14 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=750 + relu-renorm-layer name=tdnn2 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn3 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn4 dim=750 input=Append(-3,0,3) + relu-renorm-layer name=tdnn5 dim=750 input=Append(-6,-3,0) + output-layer name=output dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 15 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=3 \ + --trainer.samples-per-iter=400000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=12 \ + --trainer.optimization.initial-effective-lrate=0.0015 \ + --trainer.optimization.final-effective-lrate=0.00015 \ + --trainer.optimization.minibatch-size=256,128 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$treedir \ + --lang=$lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + echo 3 >$dir/frame_subsampling_factor +fi + +if [ $stage -le 16 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + + utils/lang/check_phones_compatible.sh data/lang/phones.txt $lang/phones.txt + utils/mkgraph.sh --self-loop-scale 0.333 data/lang $dir $dir/graph +fi + + +if [ $stage -le 17 ]; then + # note: for TDNNs, looped decoding gives exactly the same results + # as regular decoding, so there is no point in testing it separately. + # We use regular decoding because it supports multi-threaded (we just + # didn't create the binary for that, for looped decoding, so far). + rm $dir/.error || true 2>/dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $decode_nj \ + --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $dir/graph data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh index f1502dd2761..28c45836cf7 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh @@ -9,15 +9,16 @@ # System tdnn_lstm1a_sp tdnn_lstm1b_sp # WER on dev(orig) 11.0 11.0 # [looped:] 11.0 11.1 -# WER on dev(rescored) 10.3 10.3 +# WER on dev(rescored) 10.4 10.3 # [looped:] 10.3 10.5 -# WER on test(orig) 10.8 10.6 +# WER on test(orig) 10.7 10.6 # [looped:] 10.7 10.7 # WER on test(rescored) 10.1 9.9 # [looped:] 10.0 10.0 -# Final train prob -0.68810.7954-0.68970.7946 -# Final valid prob -0.77960.7611-0.79890.7582 - +# Final train prob -0.6881 -0.6897 +# Final valid prob -0.7796 -0.7989 +# Final train acc 0.7954 0.7946 +# Final valid acc 0.7611 0.7582 # by default, with cleanup: # local/nnet3/run_tdnn_lstm.sh @@ -53,19 +54,11 @@ label_delay=5 chunk_width=40,30,20 chunk_left_context=40 chunk_right_context=0 -# decode chunk-size options (for non-looped decoding) -extra_left_context=50 -extra_right_context=0 # training options srand=0 remove_egs=true -#decode options -extra_left_context= -extra_right_context= -frames_per_chunk= - . ./cmd.sh . ./path.sh . ./utils/parse_options.sh @@ -91,8 +84,7 @@ local/nnet3/run_ivector_common.sh --stage $stage \ gmm_dir=exp/${gmm} graph_dir=$gmm_dir/graph ali_dir=exp/${gmm}_ali_${train_set}_sp_comb -dir=exp/nnet3${nnet3_affix}/tdnn_lstm${affix} -dir=${dir}_sp +dir=exp/nnet3${nnet3_affix}/tdnn_lstm${affix}_sp train_data_dir=data/${train_set}_sp_hires_comb train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_comb @@ -175,15 +167,14 @@ if [ $stage -le 13 ]; then fi if [ $stage -le 14 ]; then - [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; - [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; - [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) rm $dir/.error 2>/dev/null || true for dset in dev test; do ( steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ + --extra-left-context $chunk_left_context \ + --extra-right-context $chunk_right_context \ + --frames-per-chunk $frames_per_chunk \ --extra-left-context-initial 0 --extra-right-context-final 0 \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh index 1d3b12f2697..bc9a717419d 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1c.sh @@ -8,6 +8,8 @@ # local/chain/tuning/run_tdnn_lstm_1e.sh, but a non-chain nnet3 system, and # with 1.5 times larger hidden dimensions. +# exp/nnet3_cleaned/tdnn_lstm1c_sp: num-iters=246 nj=3..15 num-params=18.7M dim=40+100->4187 combine=-0.67->-0.66 loglike:train/valid[163,245,combined]=(-0.71,-0.63,-0.60/-0.92,-0.88,-0.85) accuracy:train/valid[163,245,combined]=(0.77,0.79,0.80/0.74,0.75,0.75) + # local/nnet3/compare_wer.sh --looped exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1b_sp exp/nnet3_cleaned/tdnn_lstm1c_sp # System tdnn_lstm1a_sp tdnn_lstm1b_sp tdnn_lstm1c_sp # WER on dev(orig) 11.0 11.0 11.0 diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh new file mode 100755 index 00000000000..3e8509bf4ac --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh @@ -0,0 +1,310 @@ +#!/bin/bash + + +# run_tdnn_lstm_lfr_1a.sh is like run_tdnn_lstm_1a.sh, but +# it's a low-frame-rate system. (however, using num-jobs-final=10, +# not 15, which was very high). + + +# Generally the WER is the same or slightly better than before. + +# local/nnet3/compare_wer.sh --looped --online exp/nnet3_cleaned/tdnn_lstm1c_sp exp/nnet3_cleaned/tdnn_lstm_lfr1a_sp 2>/dev/null +# local/nnet3/compare_wer.sh --looped --online exp/nnet3_cleaned/tdnn_lstm1c_sp exp/nnet3_cleaned/tdnn_lstm_lfr1a_sp +# System tdnn_lstm1c_sp tdnn_lstm_lfr1a_sp +# WER on dev(orig) 11.0 10.9 +# [looped:] 10.9 10.9 +# [online:] 10.8 +# WER on dev(rescored) 10.4 10.3 +# [looped:] 10.3 10.3 +# [online:] 10.3 +# WER on test(orig) 10.8 10.7 +# [looped:] 10.7 10.7 +# [online:] 10.7 +# WER on test(rescored) 10.1 10.2 +# [looped:] 10.1 10.1 +# [online:] 10.2 +# Final train prob -0.5998 -0.5437 +# Final valid prob -0.8542 -0.7286 +# Final train acc 0.7988 0.8343 +# Final valid acc 0.7521 0.7888 + + +# by default, with cleanup: +# local/nnet3/run_tdnn_lstm_lfr.sh + +# without cleanup: +# local/nnet3/run_tdnn_lstm_lfr.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned + +# Options which are not passed through to run_ivector_common.sh +affix=1a +common_egs_dir= +reporting_email= + +# LSTM options +train_stage=-10 +label_delay=5 + +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 +# decode chunk-size options (for non-looped decoding) +extra_left_context=50 +extra_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +test_online_decoding=false # if true, it will run the last decoding stage. + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 13 ]; then + # Build a tree using our new topology and a reduced sampling rate. + # We use 4000 leaves, which is a little less than the number used + # in the baseline GMM system (5k) in this setup, since generally + # LFR systems do best with somewhat fewer leaves. + # + # To get the stats to build the tree this script only uses every third frame, + # but it dumps converted alignments that essentially have 3 different + # frame-shifted versions of the alignment interpolated together; these can be + # used without modification in getting labels for training. + steps/nnet3/chain/build_tree.sh \ + --repeat-frames true --frame-subsampling-factor 3 \ + --cmd "$train_cmd" 4000 data/${train_set}_sp_comb \ + $lang $ali_dir $treedir +fi + + +if [ $stage -le 14 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=768 + relu-renorm-layer name=tdnn2 dim=768 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 15 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=10 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$treedir \ + --lang=$lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + echo 3 >$dir/frame_subsampling_factor +fi + +if [ $stage -le 16 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + + utils/lang/check_phones_compatible.sh data/lang/phones.txt $lang/phones.txt + utils/mkgraph.sh --self-loop-scale 0.333 data/lang $dir $dir/graph +fi + +if [ $stage -le 17 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh \ + --acwt 0.333 --post-decode-acwt 3.0 --nj $decode_nj \ + --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $chunk_left_context \ + --extra-right-context $chunk_right_context \ + --frames-per-chunk $frames_per_chunk \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $dir/graph data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +if [ $stage -le 18 ]; then + # 'looped' decoding. + # note: you should NOT do this decoding step for setups that have bidirectional + # recurrence, like BLSTMs-- it doesn't make sense and will give bad results. + # we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 0.333 --post-decode-acwt 3.0 \ + --nj $decode_nj --cmd "$decode_cmd" \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 19 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # note: we just give it "$dset" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 0.333 --post-decode-acwt 3.0 \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset} ${dir}_online/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}_online/decode_${dset} ${dir}_online/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0; diff --git a/egs/wsj/s5/RESULTS b/egs/wsj/s5/RESULTS index acff4f9d7fe..e6732d21074 100644 --- a/egs/wsj/s5/RESULTS +++ b/egs/wsj/s5/RESULTS @@ -1,8 +1,15 @@ #!/bin/bash -# this RESULTS file was obtained by Haihua Xu in July 2013. - -for x in exp/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done +# this RESULTS file was obtained by Dan Povey in Feb 2017, after +# a rewrite of the run.sh file. +# To see results from the scripts local/nnet3/ and local/chain/, +# look at the top of those files, we don't put those in the +# RESULTS file. + +for dir in exp/*; do + steps/info/gmm_dir_info.pl $dir + for x in $dir/decode*dev93* $dir/decode*eval92*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done +done exit 0 # Use caution when comparing these results with other published results. @@ -13,107 +20,76 @@ exit 0 # in which we only test on utterances that are in either a 5k or 20k subset # of the vocabulary. -# The following results are updated with LDA+MLLT to use 7, not 9 frames of context, -# and also increased the learning rate for the "indirect" fMMI. - # monophone, deltas, trained on the 2k shortest utterances from the si84 data. -%WER 35.39 [ 2914 / 8234, 284 ins, 467 del, 2163 sub ] exp/mono0a/decode_tgpr_dev93/wer_10 -%WER 25.78 [ 1455 / 5643, 142 ins, 184 del, 1129 sub ] exp/mono0a/decode_tgpr_eval92/wer_9 +exp/mono0a: nj=10 align prob=-95.82 over 2.36h [retry=0.4%, fail=0.0%] states=132 gauss=973 +%WER 34.33 [ 2827 / 8234, 266 ins, 457 del, 2104 sub ] exp/mono0a/decode_nosp_tgpr_dev93/wer_10_0.0 +%WER 25.13 [ 1418 / 5643, 138 ins, 192 del, 1088 sub ] exp/mono0a/decode_nosp_tgpr_eval92/wer_10_0.0 + + # first triphone build. Built on half of SI-84. -%WER 20.00 [ 1647 / 8234, 257 ins, 197 del, 1193 sub ] exp/tri1/decode_tgpr_dev93/wer_17 -%WER 13.04 [ 736 / 5643, 137 ins, 61 del, 538 sub ] exp/tri1/decode_tgpr_eval92/wer_14 +exp/tri1: nj=10 align prob=-93.75 over 7.38h [retry=0.4%, fail=0.0%] states=1567 gauss=10025 tree-impr=5.06 +%WER 19.40 [ 1597 / 8234, 247 ins, 199 del, 1151 sub ] exp/tri1/decode_nosp_tgpr_dev93/wer_14_0.5 +%WER 12.76 [ 720 / 5643, 110 ins, 89 del, 521 sub ] exp/tri1/decode_nosp_tgpr_eval92/wer_14_1.0 -# the same, rescored with full trigram model [not pruned.] Note: the tg{1,2,3,4} are +# the above, rescored with full trigram model [not pruned.] Note: the tg{1,2,3,4} are # different rescoring methods. They all give about the same results. Note: 3 and 4 give # the "correct" LM scores. -%WER 18.87 [ 1554 / 8234, 295 ins, 136 del, 1123 sub ] exp/tri1/decode_tgpr_dev93_tg1/wer_14 -%WER 18.87 [ 1554 / 8234, 295 ins, 136 del, 1123 sub ] exp/tri1/decode_tgpr_dev93_tg2/wer_14 -%WER 18.75 [ 1544 / 8234, 266 ins, 152 del, 1126 sub ] exp/tri1/decode_tgpr_dev93_tg3/wer_15 -%WER 18.76 [ 1545 / 8234, 266 ins, 152 del, 1127 sub ] exp/tri1/decode_tgpr_dev93_tg4/wer_15 - -# tri2a is delta+delta-delta features. -%WER 17.93 [ 1476 / 8234, 256 ins, 161 del, 1059 sub ] exp/tri2a/decode_tgpr_dev93/wer_16 -%WER 12.42 [ 701 / 5643, 132 ins, 64 del, 505 sub ] exp/tri2a/decode_tgpr_eval92/wer_15 -# just demonstrates how to do decoding constrained by lattices. -%WER 16.76 [ 1380 / 8234, 275 ins, 132 del, 973 sub ] exp/tri2a/decode_tgpr_dev93_fromlats/wer_16 - -# This is an LDA+MLLT system. -%WER 16.43 [ 1353 / 8234, 241 ins, 162 del, 950 sub ] exp/tri2b/decode_tgpr_dev93/wer_16 -%WER 10.69 [ 603 / 5643, 154 ins, 47 del, 402 sub ] exp/tri2b/decode_tgpr_eval92/wer_14 - -# rescoring the lattices with trigram. -%WER 15.29 [ 1252 / 8191, 219 ins, 153 del, 880 sub ] [PARTIAL] exp/tri2b/decode_tgpr_dev93_tg/wer_18 -# using the "biglm" decoding method to avoid the lattice rescoring step [not faster though.] -%WER 15.31 [ 1261 / 8234, 227 ins, 158 del, 876 sub ] exp/tri2b/decode_tgpr_dev93_tg_biglm/wer_18 -# using a Minimum Bayes Risk decoding method on top of the _tg lattices. -%WER 15.15 [ 1241 / 8191, 221 ins, 155 del, 865 sub ] [PARTIAL] exp/tri2b/decode_tgpr_dev93_tg_mbr/wer_18 - -# fMMI, default learning rate (0.001) - -%WER 15.19 [ 1251 / 8234, 213 ins, 148 del, 890 sub ] exp/tri2b_fmmi_b0.1/decode_tgpr_dev93_it3/wer_15 -%WER 15.14 [ 1247 / 8234, 228 ins, 138 del, 881 sub ] exp/tri2b_fmmi_b0.1/decode_tgpr_dev93_it4/wer_14 -%WER 15.06 [ 1240 / 8234, 211 ins, 152 del, 877 sub ] exp/tri2b_fmmi_b0.1/decode_tgpr_dev93_it5/wer_15 -%WER 15.01 [ 1236 / 8234, 206 ins, 154 del, 876 sub ] exp/tri2b_fmmi_b0.1/decode_tgpr_dev93_it6/wer_15 -%WER 14.99 [ 1234 / 8234, 210 ins, 159 del, 865 sub ] exp/tri2b_fmmi_b0.1/decode_tgpr_dev93_it7/wer_15 -%WER 15.23 [ 1254 / 8234, 200 ins, 184 del, 870 sub ] exp/tri2b_fmmi_b0.1/decode_tgpr_dev93_it8/wer_16 - -%WER 15.55 [ 1280 / 8234, 234 ins, 151 del, 895 sub ] exp/tri2b_fmmi_b0.1_lr0.005/decode_tgpr_dev93_it3/wer_15 -%WER 15.63 [ 1287 / 8234, 242 ins, 150 del, 895 sub ] exp/tri2b_fmmi_b0.1_lr0.005/decode_tgpr_dev93_it4/wer_15 -%WER 15.30 [ 1260 / 8234, 224 ins, 143 del, 893 sub ] exp/tri2b_fmmi_b0.1_lr0.005/decode_tgpr_dev93_it5/wer_15 -%WER 15.34 [ 1263 / 8234, 216 ins, 156 del, 891 sub ] exp/tri2b_fmmi_b0.1_lr0.005/decode_tgpr_dev93_it6/wer_16 -%WER 15.34 [ 1263 / 8234, 242 ins, 139 del, 882 sub ] exp/tri2b_fmmi_b0.1_lr0.005/decode_tgpr_dev93_it7/wer_14 -%WER 15.30 [ 1260 / 8234, 245 ins, 134 del, 881 sub ] exp/tri2b_fmmi_b0.1_lr0.005/decode_tgpr_dev93_it8/wer_13 - -%WER 15.21 [ 1252 / 8234, 218 ins, 148 del, 886 sub ] exp/tri2b_fmmi_indirect_b0.1/decode_tgpr_dev93_it3/wer_15 -%WER 15.16 [ 1248 / 8234, 205 ins, 159 del, 884 sub ] exp/tri2b_fmmi_indirect_b0.1/decode_tgpr_dev93_it4/wer_16 -%WER 15.22 [ 1253 / 8234, 229 ins, 147 del, 877 sub ] exp/tri2b_fmmi_indirect_b0.1/decode_tgpr_dev93_it5/wer_15 -%WER 14.90 [ 1227 / 8234, 203 ins, 150 del, 874 sub ] exp/tri2b_fmmi_indirect_b0.1/decode_tgpr_dev93_it6/wer_15 -%WER 14.95 [ 1231 / 8234, 202 ins, 152 del, 877 sub ] exp/tri2b_fmmi_indirect_b0.1/decode_tgpr_dev93_it7/wer_15 -%WER 15.18 [ 1250 / 8234, 184 ins, 172 del, 894 sub ] exp/tri2b_fmmi_indirect_b0.1/decode_tgpr_dev93_it8/wer_16 - -%WER 15.70 [ 1293 / 8234, 218 ins, 163 del, 912 sub ] exp/tri2b_mmi/decode_tgpr_dev93_it3/wer_16 -%WER 15.61 [ 1285 / 8234, 217 ins, 163 del, 905 sub ] exp/tri2b_mmi/decode_tgpr_dev93_it4/wer_16 -%WER 10.46 [ 590 / 5643, 125 ins, 51 del, 414 sub ] exp/tri2b_mmi/decode_tgpr_eval92_it3/wer_15 -%WER 10.40 [ 587 / 5643, 124 ins, 52 del, 411 sub ] exp/tri2b_mmi/decode_tgpr_eval92_it4/wer_16 - -%WER 15.56 [ 1281 / 8234, 224 ins, 152 del, 905 sub ] exp/tri2b_mmi_b0.1/decode_tgpr_dev93_it3/wer_15 -%WER 15.44 [ 1271 / 8234, 220 ins, 165 del, 886 sub ] exp/tri2b_mmi_b0.1/decode_tgpr_dev93_it4/wer_16 -%WER 10.33 [ 583 / 5643, 125 ins, 51 del, 407 sub ] exp/tri2b_mmi_b0.1/decode_tgpr_eval92_it3/wer_15 -%WER 10.33 [ 583 / 5643, 125 ins, 47 del, 411 sub ] exp/tri2b_mmi_b0.1/decode_tgpr_eval92_it4/wer_15 - -%WER 11.43 [ 941 / 8234, 113 ins, 144 del, 684 sub ] exp/tri3b/decode_bd_tgpr_dev93/wer_19 -%WER 16.09 [ 1325 / 8234, 193 ins, 185 del, 947 sub ] exp/tri3b/decode_bd_tgpr_dev93.si/wer_16 -%WER 6.79 [ 383 / 5643, 51 ins, 49 del, 283 sub ] exp/tri3b/decode_bd_tgpr_eval92/wer_18 -%WER 10.61 [ 599 / 5643, 91 ins, 74 del, 434 sub ] exp/tri3b/decode_bd_tgpr_eval92.si/wer_15 -%WER 5.74 [ 324 / 5643, 46 ins, 41 del, 237 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg/wer_19 -%WER 5.90 [ 333 / 5643, 46 ins, 39 del, 248 sub ] exp/tri3b/decode_bd_tgpr_eval92_tg/wer_18 - -%WER 14.17 [ 1167 / 8234, 222 ins, 123 del, 822 sub ] exp/tri3b/decode_tgpr_dev93/wer_17 -%WER 19.37 [ 1595 / 8234, 315 ins, 153 del, 1127 sub ] exp/tri3b/decode_tgpr_dev93.si/wer_15 - -%WER 12.98 [ 1069 / 8234, 209 ins, 116 del, 744 sub ] exp/tri3b/decode_tgpr_dev93_tg/wer_19 -%WER 9.30 [ 525 / 5643, 120 ins, 37 del, 368 sub ] exp/tri3b/decode_tgpr_eval92/wer_18 -%WER 12.95 [ 731 / 5643, 167 ins, 46 del, 518 sub ] exp/tri3b/decode_tgpr_eval92.si/wer_14 -%WER 8.54 [ 482 / 5643, 113 ins, 29 del, 340 sub ] exp/tri3b/decode_tgpr_eval92_tg/wer_17 - -%WER 12.12 [ 998 / 8234, 209 ins, 88 del, 701 sub ] exp/tri4a/decode_tgpr_dev93/wer_17 -%WER 15.98 [ 1316 / 8234, 275 ins, 119 del, 922 sub ] exp/tri4a/decode_tgpr_dev93.si/wer_15 -%WER 7.83 [ 442 / 5643, 107 ins, 23 del, 312 sub ] exp/tri4a/decode_tgpr_eval92/wer_16 -%WER 10.90 [ 615 / 5643, 148 ins, 30 del, 437 sub ] exp/tri4a/decode_tgpr_eval92.si/wer_13 - -%WER 9.15 [ 753 / 8234, 90 ins, 113 del, 550 sub ] exp/tri4b/decode_bd_pp_tgpr_dev93/wer_16 -%WER 12.64 [ 1041 / 8234, 137 ins, 145 del, 759 sub ] exp/tri4b/decode_bd_pp_tgpr_dev93.si/wer_16 -%WER 5.74 [ 324 / 5643, 47 ins, 35 del, 242 sub ] exp/tri4b/decode_bd_pp_tgpr_eval92/wer_19 -%WER 7.92 [ 447 / 5643, 64 ins, 46 del, 337 sub ] exp/tri4b/decode_bd_pp_tgpr_eval92.si/wer_15 -%WER 9.38 [ 772 / 8234, 90 ins, 118 del, 564 sub ] exp/tri4b/decode_bd_tgpr_dev93/wer_18 -%WER 13.07 [ 1076 / 8234, 148 ins, 143 del, 785 sub ] exp/tri4b/decode_bd_tgpr_dev93.si/wer_17 -%WER 6.03 [ 340 / 5643, 66 ins, 26 del, 248 sub ] exp/tri4b/decode_bd_tgpr_eval92/wer_13 -%WER 8.19 [ 462 / 5643, 74 ins, 42 del, 346 sub ] exp/tri4b/decode_bd_tgpr_eval92.si/wer_15 -%WER 12.16 [ 1001 / 8234, 197 ins, 98 del, 706 sub ] exp/tri4b/decode_tgpr_dev93/wer_17 -%WER 15.47 [ 1274 / 8234, 235 ins, 120 del, 919 sub ] exp/tri4b/decode_tgpr_dev93.si/wer_17 -%WER 8.08 [ 456 / 5643, 125 ins, 16 del, 315 sub ] exp/tri4b/decode_tgpr_eval92/wer_13 -%WER 10.49 [ 592 / 5643, 147 ins, 27 del, 418 sub ] exp/tri4b/decode_tgpr_eval92.si/wer_12 +%WER 18.23 [ 1501 / 8234, 245 ins, 181 del, 1075 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg1/wer_15_0.5 +%WER 18.23 [ 1501 / 8234, 245 ins, 181 del, 1075 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg2/wer_15_0.5 +%WER 18.16 [ 1495 / 8234, 268 ins, 153 del, 1074 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg3/wer_16_0.0 +%WER 18.18 [ 1497 / 8234, 268 ins, 154 del, 1075 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg4/wer_16_0.0 + + +# tri2b is an LDA+MLLT system trained on SI-84 +exp/tri2b: nj=10 align prob=-47.22 over 15.10h [retry=0.7%, fail=0.0%] states=2005 gauss=15036 tree-impr=5.45 lda-sum=26.20 mllt:impr,logdet=1.34,1.97 +%WER 16.37 [ 1348 / 8234, 241 ins, 157 del, 950 sub ] exp/tri2b/decode_nosp_tgpr_dev93/wer_17_0.0 +%WER 10.53 [ 594 / 5643, 110 ins, 60 del, 424 sub ] exp/tri2b/decode_nosp_tgpr_eval92/wer_17_0.5 + + +# tri3b is an LDA+MLLT+SAT system trained on all of SI-284 +exp/tri3b: nj=10 align prob=-44.30 over 81.23h [retry=0.8%, fail=0.1%] states=3362 gauss=40061 fmllr-impr=3.70 over 59.77h tree-impr=7.86 + +%WER 15.56 [ 1281 / 8234, 220 ins, 140 del, 921 sub ] exp/tri3b/decode_nosp_tgpr_dev93.si/wer_17_0.5 +%WER 12.82 [ 1056 / 8234, 135 ins, 147 del, 774 sub ] exp/tri3b/decode_nosp_bd_tgpr_dev93.si/wer_15_0.0 +%WER 9.24 [ 761 / 8234, 89 ins, 109 del, 563 sub ] exp/tri3b/decode_nosp_bd_tgpr_dev93/wer_16_0.0 +%WER 11.53 [ 949 / 8234, 179 ins, 94 del, 676 sub ] exp/tri3b/decode_nosp_tgpr_dev93/wer_15_0.5 +%WER 10.94 [ 901 / 8234, 181 ins, 82 del, 638 sub ] exp/tri3b/decode_nosp_tg_dev93/wer_14_0.5 +%WER 8.16 [ 672 / 8234, 94 ins, 94 del, 484 sub ] exp/tri3b/decode_nosp_bd_tgpr_dev93_fg/wer_17_0.0 + +%WER 10.95 [ 618 / 5643, 148 ins, 36 del, 434 sub ] exp/tri3b/decode_nosp_tgpr_eval92.si/wer_14_0.0 +%WER 8.19 [ 462 / 5643, 77 ins, 51 del, 334 sub ] exp/tri3b/decode_nosp_bd_tgpr_eval92.si/wer_16_0.0 +%WER 5.55 [ 313 / 5643, 35 ins, 45 del, 233 sub ] exp/tri3b/decode_nosp_bd_tgpr_eval92/wer_17_1.0 +%WER 4.89 [ 276 / 5643, 47 ins, 28 del, 201 sub ] exp/tri3b/decode_nosp_bd_tgpr_eval92_fg/wer_15_0.5 +%WER 7.53 [ 425 / 5643, 112 ins, 20 del, 293 sub ] exp/tri3b/decode_nosp_tg_eval92/wer_17_0.0 +%WER 8.15 [ 460 / 5643, 113 ins, 30 del, 317 sub ] exp/tri3b/decode_nosp_tgpr_eval92/wer_14_1.0 + + +# tri4b is an LDA+MLLT+SAT system after estimating pronunciation probabilities +# and word-and-pronunciation-dependent silence probabilities. + +exp/tri4b: nj=10 align prob=-44.46 over 81.23h [retry=0.6%, fail=0.1%] states=3413 gauss=40059 fmllr-impr=0.17 over 60.20h tree-impr=8.70 + +%WER 15.16 [ 1248 / 8234, 253 ins, 96 del, 899 sub ] exp/tri4b/decode_tgpr_dev93.si/wer_17_0.0 +%WER 12.62 [ 1039 / 8234, 141 ins, 124 del, 774 sub ] exp/tri4b/decode_bd_tgpr_dev93.si/wer_17_0.0 +%WER 9.01 [ 742 / 8234, 106 ins, 97 del, 539 sub ] exp/tri4b/decode_bd_tgpr_dev93/wer_16_0.0 +%WER 8.25 [ 679 / 8234, 94 ins, 100 del, 485 sub ] exp/tri4b/decode_bd_tgpr_dev93_fg/wer_17_0.5 +%WER 10.92 [ 899 / 8234, 186 ins, 92 del, 621 sub ] exp/tri4b/decode_tg_dev93/wer_17_0.5 +%WER 11.44 [ 942 / 8234, 203 ins, 87 del, 652 sub ] exp/tri4b/decode_tgpr_dev93/wer_14_0.5 + +%WER 10.93 [ 617 / 5643, 147 ins, 33 del, 437 sub ] exp/tri4b/decode_tgpr_eval92.si/wer_14_1.0 +%WER 8.74 [ 493 / 5643, 104 ins, 34 del, 355 sub ] exp/tri4b/decode_bd_tgpr_eval92.si/wer_15_0.0 +%WER 5.69 [ 321 / 5643, 50 ins, 34 del, 237 sub ] exp/tri4b/decode_bd_tgpr_eval92/wer_17_0.5 +%WER 4.71 [ 266 / 5643, 40 ins, 27 del, 199 sub ] exp/tri4b/decode_bd_tgpr_eval92_fg/wer_17_1.0 +%WER 7.39 [ 417 / 5643, 107 ins, 24 del, 286 sub ] exp/tri4b/decode_tg_eval92/wer_16_1.0 +%WER 7.90 [ 446 / 5643, 111 ins, 27 del, 308 sub ] exp/tri4b/decode_tgpr_eval92/wer_15_1.0 + + +###################################### +## Results below this point were mostly obtained in 2013 by Hainan Xu, +## They are from parts of the script that are now not run by default in the run.sh. +## you can look in the git history to figure out when these results were added. + %WER 7.99 [ 658 / 8234, 72 ins, 95 del, 491 sub ] exp/tri4b_fmmi_a/decode_bd_tgpr_dev93_it8/wer_12 %WER 11.15 [ 918 / 8234, 180 ins, 81 del, 657 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it3/wer_15 %WER 11.23 [ 925 / 8234, 201 ins, 77 del, 647 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it4/wer_12 @@ -166,7 +142,7 @@ exit 0 # not updated -# DNN on fMLLR features (Karel's setup, [7.8.2015]). +# DNN on fMLLR features (Karel's setup, [7.8.2015]). # frame cross-entropy training %WER 6.05 [ 498 / 8234, 59 ins, 67 del, 372 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_bd_tgpr_dev93/wer_11_0.0 %WER 3.69 [ 208 / 5643, 19 ins, 19 del, 170 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_bd_tgpr_eval92/wer_11_1.0 @@ -298,7 +274,7 @@ for x in exp/nnet3/nnet_tdnn_a/decode_*; do grep WER $x/wer_* | utils/best_wer.s # bidirectional LSTM # ----------------------- -# local/nnet3/run_lstm.sh --affix bidirectional \ +# local/nnet3/run_lstm.sh --affix bidirectional \ # --lstm-delay " [-1,1] [-2,2] [-3,3] " \ # --label-delay 0 \ # --cell-dim 640 \ diff --git a/egs/wsj/s5/local/chain/compare_wer.sh b/egs/wsj/s5/local/chain/compare_wer.sh new file mode 100755 index 00000000000..edfefad547f --- /dev/null +++ b/egs/wsj/s5/local/chain/compare_wer.sh @@ -0,0 +1,137 @@ +#!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3} + + +if [ $# == 0 ]; then + echo "Usage: $0: [--looped] [--online] [ ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev93 (tgpr) " + "#WER dev93 (tg) " + "#WER dev93 (big-dict,tgpr) " + "#WER dev93 (big-dict,fg) " + "#WER eval92 (tgpr) " + "#WER eval92 (tg) " + "#WER eval92 (big-dict,tgpr)" + "#WER eval92 (big-dict,fg) ") + +for n in 0 1 2 3 4 5 6 7; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgpr_dev93 tg_dev93 bd_tgpr_dev93 bd_tgpr_dev93_fg tgpr_eval92 tg_eval92 bd_tgpr_eval92 bd_tgpr_eval92_fg) + + wer=$(cat $dirname/decode_${decode_names[$n]}/scoring_kaldi/best_wer | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/scoring_kaldi/best_wer | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/scoring_kaldi/best_wer | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo diff --git a/egs/wsj/s5/local/chain/run_tdnn.sh b/egs/wsj/s5/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..34499362831 --- /dev/null +++ b/egs/wsj/s5/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1a.sh \ No newline at end of file diff --git a/egs/wsj/s5/local/chain/run_tdnn_lstm.sh b/egs/wsj/s5/local/chain/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/wsj/s5/local/chain/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..d874eb0986a --- /dev/null +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,361 @@ +#!/bin/bash + + +# This was modified from run_tdnn_lstm_1a.sh, making similar +# changes as the diff from run_tdnn_lstm_1a.sh->run_tdnn_1c.sh +# in egs/tedlium/s5_r2/local/nnet3/tuning, +# specifically: +# changing chunk_left_context to zero, shrink from 0.99->1 +# (since it's not applicable to ReLUs), and removing +# the deriv-truncate-margin option since it's only applicable +# to recurrent setups; removing label-delay. +# adding pre-final layers (I experimented with this, +# it did seem helpful); using 3M not 1.5M frames per iter to keep the +# time per job reasonable; and fewer final jobs (5 not 10). + + +# steps/info/chain_dir_info.pl exp/chain/tdnn1a_sp +# exp/chain/tdnn1a_sp: num-iters=102 nj=2..5 num-params=7.6M dim=40+100->2889 combine=-0.052->-0.051 xent:train/valid[67,101,final]=(-0.881,-0.824,-0.822/-0.953,-0.922,-0.921) logprob:train/valid[67,101,final]=(-0.048,-0.042,-0.041/-0.064,-0.064,-0.063) + +# The following table compares (nnet3 TDNN, chain TDNN+LSTM, this experiment == chain TDNN). +# This is better than the nnet3 TDNN, but the difference with the chain TDNN+LSTM +# is inconsistent. + +# local/chain/compare_wer.sh --online exp/nnet3/tdnn1a_sp exp/chain/tdnn_lstm1a_sp exp/chain/tdnn1a_sp +# System tdnn1a_sp tdnn_lstm1a_sp tdnn1a_sp +#WER dev93 (tgpr) 9.18 7.48 7.87 +# [online:] 7.49 8.02 +#WER dev93 (tg) 8.59 7.41 7.61 +# [online:] 7.40 7.70 +#WER dev93 (big-dict,tgpr) 6.45 5.64 5.71 +# [online:] 5.70 5.60 +#WER dev93 (big-dict,fg) 5.83 5.40 5.10 +# [online:] 5.19 5.21 +#WER eval92 (tgpr) 6.15 5.67 5.23 +# [online:] 5.60 5.44 +#WER eval92 (tg) 5.55 5.46 4.87 +# [online:] 5.53 4.87 +#WER eval92 (big-dict,tgpr) 3.58 3.69 3.24 +# [online:] 3.63 3.31 +#WER eval92 (big-dict,fg) 2.98 3.28 2.71 +# [online:] 3.31 2.92 +# Final train prob -0.0341 -0.0414 +# Final valid prob -0.0506 -0.0634 +# Final train prob (xent) -0.5643 -0.8216 +# Final valid prob (xent) -0.6648 -0.9208 + + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +train_set=train_si284 +test_sets="test_dev93 test_eval92" +gmm=tri4b # this is the source gmm-dir that we'll use for alignments; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. + +# Options which are not passed through to run_ivector_common.sh +affix=1d #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +common_egs_dir= +reporting_email= + +# LSTM/chain options +train_stage=-10 +xent_regularize=0.1 + +# training chunk-options +chunk_width=140,100,160 +# we don't need extra left/right context for TDNN systems. +chunk_left_context=0 +chunk_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + + +if [ $stage -le 15 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0) + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient=0.1 \ + --chain.l2-regularize=0.00005 \ + --chain.apply-deriv-weights=false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=4 \ + --trainer.frames-per-iter=3000000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=5 \ + --trainer.optimization.initial-effective-lrate=0.001 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.num-chunk-per-minibatch=256,128,64 \ + --trainer.optimization.momentum=0.0 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --egs.opts="--frames-overlap-per-eg 0" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --tree-dir=$tree_dir \ + --lat-dir=$lat_dir \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 17 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + + utils/lang/check_phones_compatible.sh \ + data/lang_test_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test_tgpr \ + $tree_dir $tree_dir/graph_tgpr || exit 1; + + utils/lang/check_phones_compatible.sh \ + data/lang_test_bd_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + $tree_dir $tree_dir/graph_bd_tgpr || exit 1; +fi + +if [ $stage -le 18 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l 2889 combine=-0.047->-0.045 xent:train/valid[79,119,final]=(-0.684,-0.569,-0.564/-0.742,-0.668,-0.665) logprob:train/valid[79,119,final]=(-0.045,-0.035,-0.034/-0.058,-0.051,-0.051) + +# The following compares: +# (nnet3 TDNN+LSTM, chain TDNN, this experiment == chain TDNN+LSTM) +# system. +# This is consistently better than the nnet3 TDNN+LSTM, but the +# difference with the chain TDNN is inconsistent. + +# local/chain/compare_wer.sh --online exp/nnet3/tdnn_lstm1a_sp exp/chain/tdnn1a_sp exp/chain/tdnn_lstm1a_sp +# System tdnn_lstm1a_sp tdnn1a_sp tdnn_lstm1a_sp +#WER dev93 (tgpr) 8.54 7.87 7.48 +# [online:] 8.57 8.02 7.49 +#WER dev93 (tg) 8.25 7.61 7.41 +# [online:] 8.34 7.70 7.40 +#WER dev93 (big-dict,tgpr) 6.24 5.71 5.64 +# [online:] 6.40 5.60 5.70 +#WER dev93 (big-dict,fg) 5.70 5.10 5.40 +# [online:] 5.77 5.21 5.19 +#WER eval92 (tgpr) 6.52 5.23 5.67 +# [online:] 6.56 5.44 5.60 +#WER eval92 (tg) 6.13 4.87 5.46 +# [online:] 6.24 4.87 5.53 +#WER eval92 (big-dict,tgpr) 3.88 3.24 3.69 +# [online:] 3.88 3.31 3.63 +#WER eval92 (big-dict,fg) 3.38 2.71 3.28 +# [online:] 3.53 2.92 3.31 +# Final train prob -0.0414 -0.0341 +# Final valid prob -0.0634 -0.0506 +# Final train prob (xent) -0.8216 -0.5643 +# Final valid prob (xent) -0.9208 -0.6648 + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +train_set=train_si284 +test_sets="test_dev93 test_eval92" +gmm=tri4b # this is the source gmm-dir that we'll use for alignments; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. + +# Options which are not passed through to run_ivector_common.sh +affix=1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +common_egs_dir= +reporting_email= + +# LSTM/chain options +train_stage=-10 +label_delay=5 +xent_regularize=0.1 + +# training chunk-options +chunk_width=140,100,160 +chunk_left_context=40 +chunk_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + + +if [ $stage -le 15 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient=0.1 \ + --chain.l2-regularize=0.00005 \ + --chain.apply-deriv-weights=false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=4 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.frames-per-iter=1500000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=10 \ + --trainer.optimization.initial-effective-lrate=0.001 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.shrink-value=0.99 \ + --trainer.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.0 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --egs.opts="--frames-overlap-per-eg 0" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --tree-dir=$tree_dir \ + --lat-dir=$lat_dir \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 17 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + + utils/lang/check_phones_compatible.sh \ + data/lang_test_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test_tgpr \ + $tree_dir $tree_dir/graph_tgpr || exit 1; + + utils/lang/check_phones_compatible.sh \ + data/lang_test_bd_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + $tree_dir $tree_dir/graph_bd_tgpr || exit 1; +fi + +if [ $stage -le 18 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l [ ... ]" + echo "e.g.: $0 exp/nnet3/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/nnet3/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev93 (tgpr) " + "#WER dev93 (tg) " + "#WER dev93 (big-dict,tgpr) " + "#WER dev93 (big-dict,fg) " + "#WER eval92 (tgpr) " + "#WER eval92 (tg) " + "#WER eval92 (big-dict,tgpr)" + "#WER eval92 (big-dict,fg) ") + +for n in 0 1 2 3 4 5 6 7; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgpr_dev93 tg_dev93 bd_tgpr_dev93 bd_tgpr_dev93_fg tgpr_eval92 tg_eval92 bd_tgpr_eval92 bd_tgpr_eval92_fg) + + wer=$(cat $dirname/decode_${decode_names[$n]}/scoring_kaldi/best_wer | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/scoring_kaldi/best_wer | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/scoring_kaldi/best_wer | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo diff --git a/egs/wsj/s5/local/nnet3/run_ivector_common.sh b/egs/wsj/s5/local/nnet3/run_ivector_common.sh index 8d4cff326b3..e30988b7bf6 100755 --- a/egs/wsj/s5/local/nnet3/run_ivector_common.sh +++ b/egs/wsj/s5/local/nnet3/run_ivector_common.sh @@ -1,83 +1,215 @@ #!/bin/bash -# this script is called from scripts like run_ms.sh; it does the common stages -# of the build, such as feature extraction. -# This is actually the same as local/online/run_nnet2_common.sh, except -# for the directory names. +set -e -o pipefail -. cmd.sh -mfccdir=mfcc +# This script is called from scripts like local/nnet3/run_tdnn.sh and +# local/chain/run_tdnn.sh (and may eventually be called by more scripts). It +# contains the common feature preparation and iVector-related parts of the +# script. See those scripts for examples of usage. -stage=1 -. cmd.sh +stage=0 +nj=30 +train_set=train_si284 # you might set this to e.g. train. +test_sets="test_dev93 test_eval92" +gmm=tri4b # This specifies a GMM-dir from the features of the type you're training the system on; + # it should contain alignments for 'train_set'. + +num_threads_ubm=32 +nnet3_affix= # affix for exp/nnet3 directory to put iVector stuff in (e.g. + # in the tedlium recip it's _cleaned). + +. ./cmd.sh . ./path.sh -. ./utils/parse_options.sh +. utils/parse_options.sh + + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + + + +if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then + echo "$0: data/${train_set}_sp_hires/feats.scp already exists." + echo " ... Please either remove it, or rerun this script with stage > 2." + exit 1 +fi if [ $stage -le 1 ]; then - for datadir in train_si284 test_eval93 test_dev93 test_eval92; do - utils/copy_data_dir.sh data/$datadir data/${datadir}_hires - steps/make_mfcc.sh --nj 40 --mfcc-config conf/mfcc_hires.conf \ - --cmd "$train_cmd" data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; - steps/compute_cmvn_stats.sh data/${datadir}_hires exp/make_hires/$datadir $mfccdir || exit 1; - done - utils/subset_data_dir.sh --first data/train_si284_hires 7138 data/train_si84_hires || exit 1 + echo "$0: preparing directory for speed-perturbed data" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp fi if [ $stage -le 2 ]; then - # We need to build a small system just because we need the LDA+MLLT transform - # to train the diag-UBM on top of. We align the si84 data for this purpose. + echo "$0: creating high-resolution MFCC features" + + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp ${test_sets}; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires - steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \ - data/train_si84 data/lang exp/tri4b exp/nnet3/tri4b_ali_si84 + for datadir in ${train_set}_sp ${test_sets}; do + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires + steps/compute_cmvn_stats.sh data/${datadir}_hires + utils/fix_data_dir.sh data/${datadir}_hires + done fi if [ $stage -le 3 ]; then - # Train a small system just for its LDA+MLLT transform. We use --num-iters 13 - # because after we get the transform (12th iter is the last), any further - # training is pointless. - steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \ - --realign-iters "" \ - --splice-opts "--left-context=3 --right-context=3" \ - 5000 10000 data/train_si84_hires data/lang \ - exp/nnet3/tri4b_ali_si84 exp/nnet3/tri5b + echo "$0: selecting segments of hires training data that were also present in the" + echo " ... original training data." + + # note, these data-dirs are temporary; we put them in a sub-directory + # of the place where we'll make the alignments. + temp_data_root=exp/nnet3${nnet3_affix}/tri5 + mkdir -p $temp_data_root + + utils/data/subset_data_dir.sh --utt-list data/${train_set}/feats.scp \ + data/${train_set}_sp_hires $temp_data_root/${train_set}_hires + + # note: essentially all the original segments should be in the hires data. + n1=$(wc -l /dev/null - for data in test_eval92 test_dev93 test_eval93; do - steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 8 \ - data/${data}_hires exp/nnet3/extractor exp/nnet3/ivectors_${data} || touch exp/nnet3/.error & - done - wait - [ -f exp/nnet3/.error ] && echo "$0: error extracting iVectors." && exit 1; + echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)" + utils/data/perturb_data_dir_speed_3way.sh \ + data/${train_set} data/${train_set}_sp +fi + +if [ $stage -le 8 ]; then + echo "$0: making MFCC features for low-resolution speed-perturbed data (needed for alignments)" + steps/make_mfcc.sh --nj $nj \ + --cmd "$train_cmd" data/${train_set}_sp + steps/compute_cmvn_stats.sh data/${train_set}_sp + echo "$0: fixing input data-dir to remove nonexistent features, in case some " + echo ".. speed-perturbed segments were too short." + utils/fix_data_dir.sh data/${train_set}_sp fi +if [ $stage -le 9 ]; then + if [ -f $ali_dir/ali.1.gz ]; then + echo "$0: alignments in $ali_dir appear to already exist. Please either remove them " + echo " ... or use a later --stage option." + exit 1 + fi + echo "$0: aligning with the perturbed low-resolution data" + steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ + data/${train_set}_sp data/lang $gmm_dir $ali_dir +fi + + exit 0; diff --git a/egs/wsj/s5/local/nnet3/run_lstm.sh b/egs/wsj/s5/local/nnet3/run_lstm.sh index 2454fb5be63..d9af546b49b 100755 --- a/egs/wsj/s5/local/nnet3/run_lstm.sh +++ b/egs/wsj/s5/local/nnet3/run_lstm.sh @@ -1,5 +1,7 @@ #!/bin/bash +# This script is deprecated, see run_tdnn_lstm.sh + # this is a basic lstm script # LSTM script runs for more epochs than the TDNN script # and each epoch takes twice the time @@ -125,4 +127,3 @@ if [ $stage -le 9 ]; then fi exit 0; - diff --git a/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh b/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh index 124b04949a0..311ee14d16a 100755 --- a/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh +++ b/egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh @@ -1,5 +1,8 @@ #!/bin/bash + +# This script is deprecated. + set -o pipefail set -e # this is run_discriminative.sh diff --git a/egs/wsj/s5/local/nnet3/run_tdnn.sh b/egs/wsj/s5/local/nnet3/run_tdnn.sh deleted file mode 100755 index 337c5656de4..00000000000 --- a/egs/wsj/s5/local/nnet3/run_tdnn.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash - -# this is the standard "tdnn" system, built in nnet3; it's what we use to -# call multi-splice. - -. cmd.sh - - -# At this script level we don't support not running on GPU, as it would be painfully slow. -# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, -# --num-threads 16 and --minibatch-size 128. - -stage=0 -train_stage=-10 -dir=exp/nnet3/nnet_tdnn_a -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - - -if ! cuda-compiled; then - cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=650 + relu-renorm-layer name=tdnn2 dim=650 input=Append(-1,0,1) + relu-renorm-layer name=tdnn3 dim=650 input=Append(-1,0,1) + relu-renorm-layer name=tdnn4 dim=650 input=Append(-3,0,3) + relu-renorm-layer name=tdnn5 dim=650 input=Append(-6,-3,0) + output-layer name=output dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=3 \ + --trainer.samples-per-iter=400000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=10 \ + --trainer.optimization.initial-effective-lrate=0.0015 \ + --trainer.optimization.final-effective-lrate=0.00015 \ + --trainer.optimization.minibatch-size=256,128 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # note: for TDNNs, looped decoding gives exactly the same results + # as regular decoding, so there is no point in testing it separately. + # We use regular decoding because it supports multi-threaded (we just + # didn't create the binary for that, for looped decoding, so far). + rm $dir/.error || true 2>/dev/null + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nj=$(wc -l b}.sh +# There seems to be no consistent difference. + +# run_tdnn_1a.sh is the standard "tdnn" system, built in nnet3 with xconfigs. + +# local/nnet3/compare_wer.sh exp/nnet3/tdnn1a_sp exp/nnet3/tdnn1b_sp +# System tdnn1a_sp tdnn1b_sp +#WER dev93 (tgpr) 9.18 9.12 +#WER dev93 (tg) 8.59 8.51 +#WER dev93 (big-dict,tgpr) 6.45 6.19 +#WER dev93 (big-dict,fg) 5.83 5.78 +#WER eval92 (tgpr) 6.15 6.33 +#WER eval92 (tg) 5.55 5.74 +#WER eval92 (big-dict,tgpr) 3.58 3.62 +#WER eval92 (big-dict,fg) 2.98 3.10 +# Final train prob -0.7200 -0.6035 +# Final valid prob -0.8834 -0.7578 +# Final train acc 0.7762 0.8015 +# Final valid acc 0.7301 0.7607 + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 + +train_set=train_si284 +test_sets="test_dev93 test_eval92" +gmm=tri4b # this is the source gmm-dir that we'll use for alignments; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. +tdnn_affix=1b #affix for TDNN directory e.g. "1a" or "1b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +remove_egs=true +srand=0 +reporting_email= +# set common_egs_dir to use previously dumped egs. +common_egs_dir= + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=750 + relu-renorm-layer name=tdnn2 dim=750 input=Append(-1,2) + relu-renorm-layer name=tdnn3 dim=750 input=Append(-3,3) + relu-renorm-layer name=tdnn4 dim=750 input=Append(-7,2) + relu-renorm-layer name=tdnn5 dim=750 input=Append(-3,3) + relu-renorm-layer name=tdnn6 dim=750 + output-layer name=output dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=3 \ + --trainer.samples-per-iter=400000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=10 \ + --trainer.optimization.initial-effective-lrate=0.0015 \ + --trainer.optimization.final-effective-lrate=0.00015 \ + --trainer.optimization.minibatch-size=256,128 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # note: for TDNNs, looped decoding gives exactly the same results + # as regular decoding, so there is no point in testing it separately. + # We use regular decoding because it supports multi-threaded (we just + # didn't create the binary for that, for looped decoding, so far). + rm $dir/.error || true 2>/dev/null + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nj=$(wc -l 3413 combine=-0.55->-0.54 loglike:train/valid[67,101,combined]=(-0.63,-0.55,-0.55/-0.71,-0.63,-0.63) accuracy:train/valid[67,101,combined]=(0.80,0.82,0.82/0.76,0.78,0.78) + + + +# local/nnet3/compare_wer.sh --looped --online exp/nnet3/tdnn1a_sp exp/nnet3/tdnn_lstm1a_sp 2>/dev/null +# local/nnet3/compare_wer.sh --looped --online exp/nnet3/tdnn1a_sp exp/nnet3/tdnn_lstm1a_sp +# System tdnn1a_sp tdnn_lstm1a_sp +#WER dev93 (tgpr) 9.18 8.54 +# [looped:] 8.54 +# [online:] 8.57 +#WER dev93 (tg) 8.59 8.25 +# [looped:] 8.21 +# [online:] 8.34 +#WER dev93 (big-dict,tgpr) 6.45 6.24 +# [looped:] 6.28 +# [online:] 6.40 +#WER dev93 (big-dict,fg) 5.83 5.70 +# [looped:] 5.70 +# [online:] 5.77 +#WER eval92 (tgpr) 6.15 6.52 +# [looped:] 6.45 +# [online:] 6.56 +#WER eval92 (tg) 5.55 6.13 +# [looped:] 6.08 +# [online:] 6.24 +#WER eval92 (big-dict,tgpr) 3.58 3.88 +# [looped:] 3.93 +# [online:] 3.88 +#WER eval92 (big-dict,fg) 2.98 3.38 +# [looped:] 3.47 +# [online:] 3.53 +# Final train prob -0.7200 -0.5492 +# Final valid prob -0.8834 -0.6343 +# Final train acc 0.7762 0.8154 +# Final valid acc 0.7301 0.7849 + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +train_set=train_si284 +test_sets="test_dev93 test_eval92" +gmm=tri4b # this is the source gmm-dir that we'll use for alignments; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. + +# Options which are not passed through to run_ivector_common.sh +affix=1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +common_egs_dir= +reporting_email= + +# LSTM options +train_stage=-10 +label_delay=5 + +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +test_online_decoding=false # if true, it will run the last decoding stage. + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=520 + relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=10 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=$lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + data_affix=$(echo $data | sed s/test_//) + nj=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nj=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nj=$(wc -l 3205 combine=-0.43->-0.42 loglike:train/valid[89,135,combined]=(-0.51,-0.39,-0.38/-0.59,-0.51,-0.51) accuracy:train/valid[89,135,combined]=(0.85,0.88,0.88/0.82,0.84,0.84) + + +# It seems to be a little worse the regular-frame-rate system. + +# local/nnet3/compare_wer.sh --looped exp/nnet3/tdnn_lstm1a_sp exp/nnet3/tdnn_lstm_lfr1a_sp +# System tdnn_lstm1a_sp tdnn_lstm_lfr1a_sp +#WER dev93 (tgpr) 8.54 9.02 +# [looped:] 8.54 8.99 +#WER dev93 (tg) 8.25 8.60 +# [looped:] 8.21 8.54 +#WER dev93 (big-dict,tgpr) 6.24 6.85 +# [looped:] 6.28 6.81 +#WER dev93 (big-dict,fg) 5.70 6.33 +# [looped:] 5.70 6.33 +#WER eval92 (tgpr) 6.52 6.52 +# [looped:] 6.45 6.42 +#WER eval92 (tg) 6.13 6.01 +# [looped:] 6.08 5.92 +#WER eval92 (big-dict,tgpr) 3.88 4.22 +# [looped:] 3.93 4.20 +#WER eval92 (big-dict,fg) 3.38 3.76 +# [looped:] 3.47 3.79 +# Final train prob -0.5492 -0.3100 +# Final valid prob -0.6343 -0.4646 +# Final train acc 0.8154 0.9051 +# Final valid acc 0.7849 0.8615 + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +train_set=train_si284 +test_sets="test_dev93 test_eval92" +gmm=tri4b # this is the source gmm-dir that we'll use for alignments; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. + +# Options which are not passed through to run_ivector_common.sh +affix=1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +common_egs_dir= +reporting_email= + +# LSTM options +train_stage=-10 +label_delay=5 + +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +test_online_decoding=false # if true, it will run the last decoding stage. + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 13 ]; then + # Build a tree using our new topology and a reduced sampling rate. + # We use 4000 leaves, which is a little less than the number used + # in the baseline GMM system (5k) in this setup, since generally + # LFR systems do best with somewhat fewer leaves. + # + # To get the stats to build the tree this script only uses every third frame, + # but it dumps converted alignments that essentially have 3 different + # frame-shifted versions of the alignment interpolated together; these can be + # used without modification in getting labels for training. + steps/nnet3/chain/build_tree.sh \ + --repeat-frames true --frame-subsampling-factor 3 \ + --cmd "$train_cmd" 4000 data/${train_set}_sp \ + $lang $ali_dir $treedir +fi + + +if [ $stage -le 14 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=520 + relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 15 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=10000 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=10 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$treedir \ + --lang=$lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + echo 3 >$dir/frame_subsampling_factor +fi + +if [ $stage -le 16 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + + utils/lang/check_phones_compatible.sh \ + data/lang_test_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh --self-loop-scale 0.333 data/lang_test_tgpr \ + $dir $dir/graph_tgpr || exit 1; + + utils/lang/check_phones_compatible.sh \ + data/lang_test_bd_tgpr/phones.txt $lang/phones.txt + utils/mkgraph.sh --self-loop-scale 0.333 data/lang_test_bd_tgpr \ + $dir $dir/graph_bd_tgpr || exit 1; +fi + +if [ $stage -le 17 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + data_affix=$(echo $data | sed s/test_//) + nspk=$(wc -l data/$y/utt2spk; cp data/$y/utt2spk data/$y/spk2utt; - steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; + steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1; done @@ -33,7 +33,7 @@ steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \ # get the fMLLR basis. steps/get_fmllr_basis.sh --cmd "$train_cmd" \ - data/train_si84 data/lang${lang_suffix} exp/tri3b + data/train_si284 data/lang${lang_suffix} exp/tri3b # decoding tri3b with basis fMLLR steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \ @@ -50,5 +50,3 @@ steps/decode_basis_fmllr.sh --nj 10 --cmd "$decode_cmd" \ steps/decode_basis_fmllr.sh --nj 8 --cmd "$decode_cmd" \ exp/tri3b/graph${lang_suffix}_tgpr data/test_eval92_utt \ exp/tri3b/decode${lang_suffix}_tgpr_eval92_basis_utt || exit 1; - - diff --git a/egs/wsj/s5/local/run_mmi_tri2b.sh b/egs/wsj/s5/local/run_mmi_tri2b.sh deleted file mode 100755 index d7ddbfbaf62..00000000000 --- a/egs/wsj/s5/local/run_mmi_tri2b.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash - -lang_suffix= - -echo "$0 $@" # Print the command line for logging -. utils/parse_options.sh || exit 1; - -. ./cmd.sh - -# Train and test MMI (and boosted MMI) on tri2b system. -steps/make_denlats.sh --sub-split 20 --nj 10 --cmd "$train_cmd" \ - data/train_si84 data/lang${lang_suffix} \ - exp/tri2b exp/tri2b_denlats_si84 || exit 1; - -# train the basic MMI system. -steps/train_mmi.sh --cmd "$train_cmd" \ - data/train_si84 data/lang${lang_suffix} exp/tri2b_ali_si84 \ - exp/tri2b_denlats_si84 exp/tri2b_mmi || exit 1; -for iter in 3 4; do - steps/decode_si.sh --nj 10 --cmd "$decode_cmd" --iter $iter \ - exp/tri2b/graph${lang_suffix}_tgpr data/test_dev93 \ - exp/tri2b_mmi/decode${lang_suffix}_tgpr_dev93_it$iter & - steps/decode_si.sh --nj 8 --cmd "$decode_cmd" --iter $iter \ - exp/tri2b/graph${lang_suffix}_tgpr data/test_eval92 \ - exp/tri2b_mmi/decode${lang_suffix}_tgpr_eval92_it$iter & -done - -# MMI with 0.1 boosting factor. -steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 \ - data/train_si84 data/lang${lang_suffix} exp/tri2b_ali_si84 \ - exp/tri2b_denlats_si84 exp/tri2b_mmi_b0.1 || exit 1; - -for iter in 3 4; do - steps/decode_si.sh --nj 10 --cmd "$decode_cmd" --iter $iter \ - exp/tri2b/graph${lang_suffix}_tgpr data/test_dev93 \ - exp/tri2b_mmi_b0.1/decode${lang_suffix}_tgpr_dev93_it$iter & - steps/decode_si.sh --nj 8 --cmd "$decode_cmd" --iter $iter \ - exp/tri2b/graph${lang_suffix}_tgpr data/test_eval92 \ - exp/tri2b_mmi_b0.1/decode${lang_suffix}_tgpr_eval92_it$iter & -done - - -# Train a UBM with 400 components, for fMMI. -steps/train_diag_ubm.sh --silence-weight 0.5 --nj 10 --cmd "$train_cmd" \ - 400 data/train_si84 data/lang${lang_suffix} exp/tri2b_ali_si84 exp/dubm2b - -steps/train_mmi_fmmi.sh --boost 0.1 --cmd "$train_cmd" \ - data/train_si84 data/lang${lang_suffix} exp/tri2b_ali_si84 \ - exp/dubm2b exp/tri2b_denlats_si84 exp/tri2b_fmmi_b0.1 - -for iter in `seq 3 8`; do - steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \ - exp/tri2b/graph${lang_suffix}_tgpr data/test_dev93 \ - exp/tri2b_fmmi_b0.1/decode${lang_suffix}_tgpr_dev93_it$iter & -done - -steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \ - data/train_si84 data/lang${lang_suffix} exp/tri2b_ali_si84 \ - exp/dubm2b exp/tri2b_denlats_si84 exp/tri2b_fmmi_b0.1_lr0.005 || exit 1; -for iter in `seq 3 8`; do - steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \ - exp/tri2b/graph${lang_suffix}_tgpr data/test_dev93 \ - exp/tri2b_fmmi_b0.1_lr0.005/decode${lang_suffix}_tgpr_dev93_it$iter & -done - -steps/train_mmi_fmmi_indirect.sh --boost 0.1 --cmd "$train_cmd" \ - data/train_si84 data/lang${lang_suffix} exp/tri2b_ali_si84 \ - exp/dubm2b exp/tri2b_denlats_si84 exp/tri2b_fmmi_indirect_b0.1 -for iter in `seq 3 8`; do - steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \ - exp/tri2b/graph${lang_suffix}_tgpr data/test_dev93 \ - exp/tri2b_fmmi_indirect_b0.1/decode${lang_suffix}_tgpr_dev93_it$iter & -done diff --git a/egs/wsj/s5/run.sh b/egs/wsj/s5/run.sh index fb004117658..4d505f5da3a 100755 --- a/egs/wsj/s5/run.sh +++ b/egs/wsj/s5/run.sh @@ -1,7 +1,15 @@ #!/bin/bash +stage=0 +train=true # set to false to disable the training-related scripts + # note: you probably only want to set --train false if you + # are using at least --stage 1. +decode=true # set to false to disable the decoding-related scripts. + . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. +. utils/parse_options.sh # e.g. this parses the --stage option if supplied. + # This is a shell script, but it's recommended that you run the commands one by # one by copying and pasting into the shell. @@ -18,334 +26,313 @@ wsj0=/export/corpora5/LDC/LDC93S6B wsj1=/export/corpora5/LDC/LDC94S13B -local/wsj_data_prep.sh $wsj0/??-{?,??}.? $wsj1/??-{?,??}.? || exit 1; -# Sometimes, we have seen WSJ distributions that do not have subdirectories -# like '11-13.1', but instead have 'doc', 'si_et_05', etc. directly under the -# wsj0 or wsj1 directories. In such cases, try the following: -# -# corpus=/exports/work/inf_hcrc_cstr_general/corpora/wsj -# local/cstr_wsj_data_prep.sh $corpus -# rm data/local/dict/lexiconp.txt -# $corpus must contain a 'wsj0' and a 'wsj1' subdirectory for this to work. -# -# "nosp" refers to the dictionary before silence probabilities and pronunciation -# probabilities are added. -local/wsj_prepare_dict.sh --dict-suffix "_nosp" || exit 1; - -utils/prepare_lang.sh data/local/dict_nosp \ - "" data/local/lang_tmp_nosp data/lang_nosp || exit 1; - -local/wsj_format_data.sh --lang-suffix "_nosp" || exit 1; - - # We suggest to run the next three commands in the background, - # as they are not a precondition for the system building and - # most of the tests: these commands build a dictionary - # containing many of the OOVs in the WSJ LM training data, - # and an LM trained directly on that data (i.e. not just - # copying the arpa files from the disks from LDC). - # Caution: the commands below will only work if $decode_cmd - # is setup to use qsub. Else, just remove the --cmd option. - # NOTE: If you have a setup corresponding to the older cstr_wsj_data_prep.sh style, - # use local/cstr_wsj_extend_dict.sh --dict-suffix "_nosp" $corpus/wsj1/doc/ instead. +if [ $stage -le 0 ]; then + # data preparation. + local/wsj_data_prep.sh $wsj0/??-{?,??}.? $wsj1/??-{?,??}.? || exit 1; + + # Sometimes, we have seen WSJ distributions that do not have subdirectories + # like '11-13.1', but instead have 'doc', 'si_et_05', etc. directly under the + # wsj0 or wsj1 directories. In such cases, try the following: + # + # corpus=/exports/work/inf_hcrc_cstr_general/corpora/wsj + # local/cstr_wsj_data_prep.sh $corpus + # rm data/local/dict/lexiconp.txt + # $corpus must contain a 'wsj0' and a 'wsj1' subdirectory for this to work. + # + # "nosp" refers to the dictionary before silence probabilities and pronunciation + # probabilities are added. + local/wsj_prepare_dict.sh --dict-suffix "_nosp" || exit 1; + + utils/prepare_lang.sh data/local/dict_nosp \ + "" data/local/lang_tmp_nosp data/lang_nosp || exit 1; + + local/wsj_format_data.sh --lang-suffix "_nosp" || exit 1; + + # We suggest to run the next three commands in the background, + # as they are not a precondition for the system building and + # most of the tests: these commands build a dictionary + # containing many of the OOVs in the WSJ LM training data, + # and an LM trained directly on that data (i.e. not just + # copying the arpa files from the disks from LDC). + # Caution: the commands below will only work if $decode_cmd + # is setup to use qsub. Else, just remove the --cmd option. + # NOTE: If you have a setup corresponding to the older cstr_wsj_data_prep.sh style, + # use local/cstr_wsj_extend_dict.sh --dict-suffix "_nosp" $corpus/wsj1/doc/ instead. ( - local/wsj_extend_dict.sh --dict-suffix "_nosp" $wsj1/13-32.1 && \ - utils/prepare_lang.sh data/local/dict_nosp_larger \ - "" data/local/lang_tmp_nosp_larger data/lang_nosp_bd && \ - local/wsj_train_lms.sh --dict-suffix "_nosp" && - local/wsj_format_local_lms.sh --lang-suffix "_nosp" # && + local/wsj_extend_dict.sh --dict-suffix "_nosp" $wsj1/13-32.1 && \ + utils/prepare_lang.sh data/local/dict_nosp_larger \ + "" data/local/lang_tmp_nosp_larger data/lang_nosp_bd && \ + local/wsj_train_lms.sh --dict-suffix "_nosp" && + local/wsj_format_local_lms.sh --lang-suffix "_nosp" # && ) & -# Now make MFCC features. -# mfccdir should be some place with a largish disk where you -# want to store MFCC features. - -for x in test_eval92 test_eval93 test_dev93 train_si284; do - steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 data/$x || exit 1; - steps/compute_cmvn_stats.sh data/$x || exit 1; -done - -utils/subset_data_dir.sh --first data/train_si284 7138 data/train_si84 || exit 1 - -# Now make subset with the shortest 2k utterances from si-84. -utils/subset_data_dir.sh --shortest data/train_si84 2000 data/train_si84_2kshort || exit 1; - -# Now make subset with half of the data from si-84. -utils/subset_data_dir.sh data/train_si84 3500 data/train_si84_half || exit 1; - - -# Note: the --boost-silence option should probably be omitted by default -# for normal setups. It doesn't always help. [it's to discourage non-silence -# models from modeling silence.] -steps/train_mono.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \ - data/train_si84_2kshort data/lang_nosp exp/mono0a || exit 1; - -( - utils/mkgraph.sh data/lang_nosp_test_tgpr \ - exp/mono0a exp/mono0a/graph_nosp_tgpr && \ - steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/mono0a/graph_nosp_tgpr \ - data/test_dev93 exp/mono0a/decode_nosp_tgpr_dev93 && \ - steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/mono0a/graph_nosp_tgpr \ - data/test_eval92 exp/mono0a/decode_nosp_tgpr_eval92 -) & - -steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \ - data/train_si84_half data/lang_nosp exp/mono0a exp/mono0a_ali || exit 1; - -steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" 2000 10000 \ - data/train_si84_half data/lang_nosp exp/mono0a_ali exp/tri1 || exit 1; - -while [ ! -f data/lang_nosp_test_tgpr/tmp/LG.fst ] || \ - [ -z data/lang_nosp_test_tgpr/tmp/LG.fst ]; do - sleep 20; -done -sleep 30; -# or the mono mkgraph.sh might be writing -# data/lang_test_tgpr/tmp/LG.fst which will cause this to fail. - -utils/mkgraph.sh data/lang_nosp_test_tgpr \ - exp/tri1 exp/tri1/graph_nosp_tgpr || exit 1; - -steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgpr \ - data/test_dev93 exp/tri1/decode_nosp_tgpr_dev93 || exit 1; -steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgpr \ - data/test_eval92 exp/tri1/decode_nosp_tgpr_eval92 || exit 1; - -# test various modes of LM rescoring (4 is the default one). -# This is just confirming they're equivalent. -for mode in 1 2 3 4; do - steps/lmrescore.sh --mode $mode --cmd "$decode_cmd" \ - data/lang_nosp_test_{tgpr,tg} data/test_dev93 \ - exp/tri1/decode_nosp_tgpr_dev93 \ - exp/tri1/decode_nosp_tgpr_dev93_tg$mode || exit 1; -done - - -## the following command demonstrates how to get lattices that are -## "word-aligned" (arcs coincide with words, with boundaries in the right -## place). -#sil_label=`grep '!SIL' data/lang_nosp_test_tgpr/words.txt | awk '{print $2}'` -#steps/word_align_lattices.sh --cmd "$train_cmd" --silence-label $sil_label \ -# data/lang_nosp_test_tgpr exp/tri1/decode_nosp_tgpr_dev93 \ -# exp/tri1/decode_nosp_tgpr_dev93_aligned || exit 1; - -steps/align_si.sh --nj 10 --cmd "$train_cmd" \ - data/train_si84 data/lang_nosp exp/tri1 exp/tri1_ali_si84 || exit 1; - -steps/train_lda_mllt.sh --cmd "$train_cmd" \ - --splice-opts "--left-context=3 --right-context=3" 2500 15000 \ - data/train_si84 data/lang_nosp exp/tri1_ali_si84 exp/tri2b || exit 1; - -utils/mkgraph.sh data/lang_nosp_test_tgpr \ - exp/tri2b exp/tri2b/graph_nosp_tgpr || exit 1; -steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgpr \ - data/test_dev93 exp/tri2b/decode_nosp_tgpr_dev93 || exit 1; -steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgpr \ - data/test_eval92 exp/tri2b/decode_nosp_tgpr_eval92 || exit 1; - -# At this point, you could run the example scripts that show how VTLN works. -# We haven't included this in the default recipes yet. -# local/run_vtln.sh --lang-suffix "_nosp" -# local/run_vtln2.sh --lang-suffix "_nosp" - -# Now, with dev93, compare lattice rescoring with biglm decoding, -# going from tgpr to tg. Note: results are not the same, even though they should -# be, and I believe this is due to the beams not being wide enough. The pruning -# seems to be a bit too narrow in the current scripts (got at least 0.7% absolute -# improvement from loosening beams from their current values). - -steps/decode_biglm.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri2b/graph_nosp_tgpr data/lang_test_{tgpr,tg}/G.fst \ - data/test_dev93 exp/tri2b/decode_nosp_tgpr_dev93_tg_biglm - -# baseline via LM rescoring of lattices. -steps/lmrescore.sh --cmd "$decode_cmd" \ - data/lang_nosp_test_tgpr/ data/lang_nosp_test_tg/ \ - data/test_dev93 exp/tri2b/decode_nosp_tgpr_dev93 \ - exp/tri2b/decode_nosp_tgpr_dev93_tg || exit 1; - -# Trying Minimum Bayes Risk decoding (like Confusion Network decoding): -mkdir exp/tri2b/decode_nosp_tgpr_dev93_tg_mbr -cp exp/tri2b/decode_nosp_tgpr_dev93_tg/lat.*.gz \ - exp/tri2b/decode_nosp_tgpr_dev93_tg_mbr -local/score_mbr.sh --cmd "$decode_cmd" \ - data/test_dev93/ data/lang_nosp_test_tgpr/ \ - exp/tri2b/decode_nosp_tgpr_dev93_tg_mbr - -# This script trains a delta+delta-delta system. It's not really recommended or + # Now make MFCC features. + # mfccdir should be some place with a largish disk where you + # want to store MFCC features. + + for x in test_eval92 test_eval93 test_dev93 train_si284; do + steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 data/$x || exit 1; + steps/compute_cmvn_stats.sh data/$x || exit 1; + done + + utils/subset_data_dir.sh --first data/train_si284 7138 data/train_si84 || exit 1 + + # Now make subset with the shortest 2k utterances from si-84. + utils/subset_data_dir.sh --shortest data/train_si84 2000 data/train_si84_2kshort || exit 1; + + # Now make subset with half of the data from si-84. + utils/subset_data_dir.sh data/train_si84 3500 data/train_si84_half || exit 1; +fi + + +if [ $stage -le 1 ]; then + # monophone + + + # Note: the --boost-silence option should probably be omitted by default + # for normal setups. It doesn't always help. [it's to discourage non-silence + # models from modeling silence.] + if $train; then + steps/train_mono.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \ + data/train_si84_2kshort data/lang_nosp exp/mono0a || exit 1; + fi + + if $decode; then + utils/mkgraph.sh data/lang_nosp_test_tgpr exp/mono0a exp/mono0a/graph_nosp_tgpr && \ + steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/mono0a/graph_nosp_tgpr \ + data/test_dev93 exp/mono0a/decode_nosp_tgpr_dev93 && \ + steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/mono0a/graph_nosp_tgpr \ + data/test_eval92 exp/mono0a/decode_nosp_tgpr_eval92 + fi +fi + +if [ $stage -le 2 ]; then + # tri1 + if $train; then + steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \ + data/train_si84_half data/lang_nosp exp/mono0a exp/mono0a_ali || exit 1; + + steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" 2000 10000 \ + data/train_si84_half data/lang_nosp exp/mono0a_ali exp/tri1 || exit 1; + fi + + if $decode; then + utils/mkgraph.sh data/lang_nosp_test_tgpr \ + exp/tri1 exp/tri1/graph_nosp_tgpr || exit 1; + + for data in dev93 eval92; do + nspk=$(wc -l " data/local/lang_tmp data/lang || exit 1; - -for lm_suffix in bg bg_5k tg tg_5k tgpr tgpr_5k; do - mkdir -p data/lang_test_${lm_suffix} - cp -r data/lang/* data/lang_test_${lm_suffix}/ || exit 1; - rm -rf data/lang_test_${lm_suffix}/tmp - cp data/lang_nosp_test_${lm_suffix}/G.* data/lang_test_${lm_suffix}/ -done - -# Silprob for larger lexicon. -utils/dict_dir_add_pronprobs.sh --max-normalize true \ - data/local/dict_nosp_larger \ - exp/tri4b/pron_counts_nowb.txt exp/tri4b/sil_counts_nowb.txt \ - exp/tri4b/pron_bigram_counts_nowb.txt data/local/dict_larger || exit 1 - -utils/prepare_lang.sh data/local/dict_larger \ - "" data/local/lang_tmp_larger data/lang_bd || exit 1; - -for lm_suffix in tgpr tgconst tg fgpr fgconst fg; do - mkdir -p data/lang_test_bd_${lm_suffix} - cp -r data/lang_bd/* data/lang_test_bd_${lm_suffix}/ || exit 1; - rm -rf data/lang_test_bd_${lm_suffix}/tmp - cp data/lang_nosp_test_bd_${lm_suffix}/G.* data/lang_test_bd_${lm_suffix}/ -done - -( - utils/mkgraph.sh data/lang_test_tgpr exp/tri4b exp/tri4b/graph_tgpr || exit 1; - steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b/decode_tgpr_dev93 || exit 1; - steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri4b/graph_tgpr data/test_eval92 exp/tri4b/decode_tgpr_eval92 || exit 1; - - utils/mkgraph.sh data/lang_test_bd_tgpr \ - exp/tri4b exp/tri4b/graph_bd_tgpr || exit 1; - steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \ - exp/tri4b/graph_bd_tgpr data/test_dev93 \ - exp/tri4b/decode_bd_tgpr_dev93 || exit 1; - steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \ - exp/tri4b/graph_bd_tgpr data/test_eval92 \ - exp/tri4b/decode_bd_tgpr_eval92 || exit 1; -) & +if [ $stage -le 4 ]; then + # From 2b system, train 3b which is LDA + MLLT + SAT. + + # Align tri2b system with all the si284 data. + if $train; then + steps/align_si.sh --nj 10 --cmd "$train_cmd" \ + data/train_si284 data/lang_nosp exp/tri2b exp/tri2b_ali_si284 || exit 1; + + steps/train_sat.sh --cmd "$train_cmd" 4200 40000 \ + data/train_si284 data/lang_nosp exp/tri2b_ali_si284 exp/tri3b || exit 1; + fi + + if $decode; then + utils/mkgraph.sh data/lang_nosp_test_tgpr \ + exp/tri3b exp/tri3b/graph_nosp_tgpr || exit 1; + + # the larger dictionary ("big-dict"/bd) + locally produced LM. + utils/mkgraph.sh data/lang_nosp_test_bd_tgpr \ + exp/tri3b exp/tri3b/graph_nosp_bd_tgpr || exit 1; + + # At this point you could run the command below; this gets + # results that demonstrate the basis-fMLLR adaptation (adaptation + # on small amounts of adaptation data). + # local/run_basis_fmllr.sh --lang-suffix "_nosp" + + for data in dev93 eval92; do + nspk=$(wc -l " data/local/lang_tmp data/lang || exit 1; + + for lm_suffix in bg bg_5k tg tg_5k tgpr tgpr_5k; do + mkdir -p data/lang_test_${lm_suffix} + cp -r data/lang/* data/lang_test_${lm_suffix}/ || exit 1; + rm -rf data/lang_test_${lm_suffix}/tmp + cp data/lang_nosp_test_${lm_suffix}/G.* data/lang_test_${lm_suffix}/ + done + + # Silprob for larger ("bd") lexicon. + utils/dict_dir_add_pronprobs.sh --max-normalize true \ + data/local/dict_nosp_larger \ + exp/tri3b/pron_counts_nowb.txt exp/tri3b/sil_counts_nowb.txt \ + exp/tri3b/pron_bigram_counts_nowb.txt data/local/dict_larger || exit 1 + + utils/prepare_lang.sh data/local/dict_larger \ + "" data/local/lang_tmp_larger data/lang_bd || exit 1; + + for lm_suffix in tgpr tgconst tg fgpr fgconst fg; do + mkdir -p data/lang_test_bd_${lm_suffix} + cp -r data/lang_bd/* data/lang_test_bd_${lm_suffix}/ || exit 1; + rm -rf data/lang_test_bd_${lm_suffix}/tmp + cp data/lang_nosp_test_bd_${lm_suffix}/G.* data/lang_test_bd_${lm_suffix}/ + done +fi + + +if [ $stage -le 6 ]; then + # From 3b system, now using data/lang as the lang directory (we have now added + # pronunciation and silence probabilities), train another SAT system (tri4b). + + if $train; then + steps/train_sat.sh --cmd "$train_cmd" 4200 40000 \ + data/train_si284 data/lang exp/tri3b exp/tri4b || exit 1; + fi + + if $decode; then + utils/mkgraph.sh data/lang_test_tgpr \ + exp/tri4b exp/tri4b/graph_tgpr || exit 1; + utils/mkgraph.sh data/lang_test_bd_tgpr \ + exp/tri4b exp/tri4b/graph_bd_tgpr || exit 1; + + for data in dev93 eval92; do + nspk=$(wc -l " - echo " e.g.: steps/mixup.sh 20000 data/train_si84 data/lang exp/tri3b exp/tri3b_20k" - echo "main options (for others, see top of script file)" - echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." - echo " --config # config containing options" - echo " --stage # stage to do partial re-run from." - exit 1; -fi - -numgauss=$1 -data=$2 -lang=$3 -srcdir=$4 -dir=$5 - -for f in $data/feats.scp $srcdir/final.mdl $srcdir/final.mat; do - [ ! -f $f ] && echo "mixup_lda_etc.sh: no such file $f" && exit 1; -done - -nj=`cat $srcdir/num_jobs` || exit 1; -sdata=$data/split$nj; - -splice_opts=`cat $srcdir/splice_opts 2>/dev/null` -cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null` - -mkdir -p $dir/log -cp $srcdir/splice_opts $dir 2>/dev/null -cp $srcdir/cmvn_opts $dir 2>/dev/null -cp $srcdir/final.mat $dir -echo $nj > $dir/num_jobs -[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; - -utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; -cp $lang/phones.txt $dir || exit 1; - -cp $srcdir/tree $dir - - -## Set up features. -if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi -echo "$0: feature type is $feat_type" - -case $feat_type in - delta) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; - lda) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |" - cp $srcdir/final.mat $dir - ;; - *) echo "Invalid feature type $feat_type" && exit 1; -esac -if [ -f $srcdir/trans.1 ]; then - echo Using transforms from $srcdir; - rm $dir/trans.* 2>/dev/null - ln.pl $srcdir/trans.* $dir # Link those transforms to current directory. - feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |" -else - feats="$sifeats" -fi -## Done setting up features. - -rm $dir/fsts.*.gz 2>/dev/null -ln.pl $srcdir/fsts.*.gz $dir # Link training-graph FSTs to current directory. - -## Mix up old model -if [ $stage -le 0 ]; then - echo Mixing up old model to $numgauss Gaussians -# Note: this script also works for mixing down. - $cmd $dir/log/mixup.log \ - gmm-mixup --mix-up=$numgauss --mix-down=$numgauss \ - $srcdir/final.mdl $srcdir/final.occs $dir/1.mdl || exit 1; -fi -## Done. - -cur_alidir=$srcdir # dir to find alignments. -[ -z "$realign_iters" ] && ln.pl $srcdir/ali.*.gz $dir; # link alignments, if - # we won't be generating them. - -x=1 -while [ $x -le $num_iters ]; do - echo "$0: iteration $x" - if echo $realign_iters | grep -w $x >/dev/null; then - if [ $stage -le $x ]; then - echo "$0: realigning data" - mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |" - $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \ - gmm-align-compiled $scale_opts --beam=10 --retry-beam=40 "$mdl" \ - "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \ - "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; - fi - cur_alidir=$dir - fi - if [ $stage -le $x ]; then - echo "$0: accumulating statistics" - $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \ - gmm-acc-stats-ali $dir/$x.mdl "$feats" \ - "ark,s,cs:gunzip -c $cur_alidir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1; - echo "$0: re-estimating model" - [ "`ls $dir/$x.*.acc | wc -w`" -ne $nj ] && echo "$0: wrong #accs" && exit 1; - $cmd $dir/log/update.$x.log \ - gmm-est --write-occs=$dir/$[$x+1].occs $dir/$x.mdl \ - "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1; - rm $dir/$x.mdl $dir/$x.*.acc - rm $dir/$x.occs 2>/dev/null - fi - x=$[$x+1] -done - -rm $dir/final.mdl $dir/final.occs 2>/dev/null -ln -s $x.mdl $dir/final.mdl -ln -s $x.occs $dir/final.occs - -if [ -f $dir/trans.1 ]; then - echo "$0: accumulating stats for alignment model." - $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \ - ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \ - gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \ - ark,s,cs:- $dir/$x.JOB.acc || exit 1; - [ "`ls $dir/$x.*.acc | wc -w`" -ne $nj ] && echo "$0: wrong #accs" && exit 1; - echo "$0: Re-estimating alignment model." - $cmd $dir/log/est_alimdl.log \ - gmm-est --write-occs=$dir/final.occs --remove-low-count-gaussians=false $dir/$x.mdl \ - "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl || exit 1; - rm $dir/$x.*.acc - rm $dir/final.alimdl 2>/dev/null - ln -s $x.alimdl $dir/final.alimdl -fi - -utils/summarize_warnings.pl $dir/log - -echo Done diff --git a/egs/wsj/s5/utils/fix_data_dir.sh b/egs/wsj/s5/utils/fix_data_dir.sh index 0333d628544..bb8efd56ab8 100755 --- a/egs/wsj/s5/utils/fix_data_dir.sh +++ b/egs/wsj/s5/utils/fix_data_dir.sh @@ -22,12 +22,13 @@ mkdir -p $data/.backup [ ! -f $data/utt2spk ] && echo "$0: no such file $data/utt2spk" && exit 1; +set -e -o pipefail -u + tmpdir=$(mktemp -d /tmp/kaldi.XXXX); trap 'rm -rf "$tmpdir"' EXIT HUP INT PIPE TERM export LC_ALL=C - function check_sorted { file=$1 sort -k1,1 -u <$file >$file.tmp @@ -54,8 +55,8 @@ function filter_file { cp $file_to_filter ${file_to_filter}.tmp utils/filter_scp.pl $filter ${file_to_filter}.tmp > $file_to_filter if ! cmp ${file_to_filter}.tmp $file_to_filter >&/dev/null; then - length1=`cat ${file_to_filter}.tmp | wc -l` - length2=`cat ${file_to_filter} | wc -l` + length1=$(cat ${file_to_filter}.tmp | wc -l) + length2=$(cat ${file_to_filter} | wc -l) if [ $length1 -ne $length2 ]; then echo "$0: filtered $file_to_filter from $length1 to $length2 lines based on filter $filter." fi @@ -77,7 +78,7 @@ function filter_recordings { exit 1; fi awk '{print $2}' < $data/segments | sort | uniq > $tmpdir/recordings - n1=`cat $tmpdir/recordings | wc -l` + n1=$(cat $tmpdir/recordings | wc -l) [ ! -s $tmpdir/recordings ] && \ echo "Empty list of recordings (bad file $data/segments)?" && exit 1; utils/filter_scp.pl $data/wav.scp $tmpdir/recordings > $tmpdir/recordings.tmp diff --git a/egs/wsj/s5/utils/mkgraph.sh b/egs/wsj/s5/utils/mkgraph.sh index 42204b85e7d..65ff3c3c79d 100755 --- a/egs/wsj/s5/utils/mkgraph.sh +++ b/egs/wsj/s5/utils/mkgraph.sh @@ -75,7 +75,7 @@ fi N=$(tree-info $tree | grep "context-width" | cut -d' ' -f2) || { echo "Error when getting context-width"; exit 1; } P=$(tree-info $tree | grep "central-position" | cut -d' ' -f2) || { echo "Error when getting central-position"; exit 1; } -[[ -f $2/frame_subsampling_factor && $loopscale != 1.0 ]] && \ +[[ -f $2/frame_subsampling_factor && "$loopscale" == "0.1" ]] && \ echo "$0: WARNING: chain models need '--self-loop-scale 1.0'"; mkdir -p $lang/tmp diff --git a/egs/wsj/s5/utils/validate_data_dir.sh b/egs/wsj/s5/utils/validate_data_dir.sh index 49c929207b9..58e51a75aef 100755 --- a/egs/wsj/s5/utils/validate_data_dir.sh +++ b/egs/wsj/s5/utils/validate_data_dir.sh @@ -132,7 +132,7 @@ if [ -f $data/wav.scp ]; then check_sorted_and_uniq $data/segments # We have a segments file -> interpret wav file as "recording-ids" not utterance-ids. ! cat $data/segments | \ - awk '{if (NF != 4 || ($4 <= $3 && $4 != -1)) { print "Bad line in segments file", $0; exit(1); }}' && \ + awk '{if (NF != 4 || $4 <= $3) { print "Bad line in segments file", $0; exit(1); }}' && \ echo "$0: badly formatted segments file" && exit 1; segments_len=`cat $data/segments | wc -l` From 3f180124f5aa327f04c093636972e6ee21dc7858 Mon Sep 17 00:00:00 2001 From: "Jan \"yenda\" Trmal" Date: Mon, 27 Feb 2017 13:33:35 -0500 Subject: [PATCH 455/530] [scripts,egs] Replace SGE-specific options to queue.pl (etc.) with generic options (#1461) Search for new-style http://kaldi-asr.org/doc/queue.html for an explanation of the difference. It makes switching to new queueing mechanisms easier. --- .../s5/local/online/run_nnet2_ms_perturbed.sh | 14 ++--- .../s5/local/online/run_nnet2_ms_sp_disc.sh | 18 +++--- egs/aspire/s5/local/multi_condition/decode.sh | 2 +- .../s5/local/multi_condition/run_nnet2_ms.sh | 4 +- .../multi_condition/run_nnet2_ms_disc.sh | 18 +++--- egs/callhome_egyptian/s5/run.sh | 18 +++--- egs/chime3/s5/cmd.sh | 6 +- egs/csj/s5/local/csj_run_rnnlm.sh | 12 ++-- egs/csj/s5/local/nnet/run_lstm.sh | 4 +- egs/fisher_callhome_spanish/s5/run.sh | 8 +-- .../s5/local/nnet2/run_6c_gpu.sh | 2 +- .../s5/local/online/run_nnet2.sh | 2 +- .../s5/local/online/run_nnet2_b.sh | 16 +++--- .../local/online/run_nnet2_discriminative.sh | 22 ++++---- .../s5/local/online/run_nnet2_multisplice.sh | 2 +- .../s5/local/online/run_nnet2_ms.sh | 20 +++---- egs/gale_arabic/s5/local/online/run_nnet2.sh | 16 +++--- egs/hkust/s5/local/online/run_nnet2_ms.sh | 4 +- egs/librispeech/s5/local/nnet2/run_5c.sh | 8 +-- .../s5/local/nnet2/run_6a_clean_460.sh | 10 ++-- egs/librispeech/s5/local/nnet2/run_7a_960.sh | 8 +-- egs/librispeech/s5/local/online/run_nnet2.sh | 10 ++-- .../s5/local/online/run_nnet2_disc.sh | 14 ++--- .../s5/local/online/run_nnet2_ms.sh | 10 ++-- .../s5/local/online/run_nnet2_ms_disc.sh | 14 ++--- .../s5/local/online_pitch/run_nnet2_ms.sh | 10 ++-- egs/lre/v1/lid/train_diag_ubm.sh | 6 +- egs/lre/v1/lid/train_ivector_extractor.sh | 12 ++-- egs/lre/v1/run.sh | 14 ++--- egs/lre07/v1/lid/nnet2/get_egs2.sh | 18 +++--- .../v1/lid/nnet2/train_multisplice_accel2.sh | 36 ++++++------ egs/lre07/v1/lid/train_diag_ubm.sh | 6 +- egs/lre07/v1/lid/train_ivector_extractor.sh | 12 ++-- .../v1/lid/train_ivector_extractor_dnn.sh | 20 +++---- egs/lre07/v1/run.sh | 12 ++-- .../v2/local/dnn/run_nnet2_multisplice.sh | 10 ++-- egs/lre07/v2/run.sh | 12 ++-- egs/rm/s5/local/nnet2/run_4b_gpu.sh | 2 +- egs/rm/s5/local/nnet2/run_4c.sh | 8 +-- egs/rm/s5/local/nnet2/run_4d.sh | 8 +-- egs/rm/s5/local/nnet2/run_4d2.sh | 8 +-- egs/rm/s5/local/nnet2/run_4d3.sh | 8 +-- egs/rm/s5/local/nnet2/run_4e_gpu.sh | 2 +- egs/rm/s5/local/nnet2/run_5c.sh | 6 +- egs/rm/s5/local/nnet2/run_5c_gpu.sh | 12 ++-- egs/rm/s5/local/nnet2/run_5d.sh | 16 +++--- egs/rm/s5/local/nnet2/run_5d_gpu.sh | 12 ++-- egs/rm/s5/local/nnet2/run_5e_gpu.sh | 12 ++-- egs/rm/s5/local/online/run_nnet2.sh | 10 ++-- egs/rm/s5/local/online/run_nnet2_baseline.sh | 8 +-- egs/rm/s5/local/online/run_nnet2_common.sh | 8 +-- .../s5/local/online/run_nnet2_multisplice.sh | 10 ++-- .../online/run_nnet2_multisplice_disc.sh | 12 ++-- egs/rm/s5/local/online/run_nnet2_perturbed.sh | 12 ++-- egs/rm/s5/local/online/run_nnet2_wsj.sh | 28 +++++----- egs/rm/s5/local/online/run_nnet2_wsj_joint.sh | 16 +++--- .../local/online/run_nnet2_wsj_joint_disc.sh | 12 ++-- egs/rm/s5/local/run_dnn_convert_nnet2.sh | 4 +- egs/sprakbanken/s5/local/nnet2/run_5b_gpu.sh | 2 +- egs/sprakbanken/s5/local/nnet2/run_5c2_gpu.sh | 6 +- egs/sprakbanken/s5/local/nnet2/run_5c_gpu.sh | 8 +-- egs/sprakbanken/s5/local/nnet2/run_5d_gpu.sh | 2 +- egs/sprakbanken/s5/local/nnet2/run_6c_gpu.sh | 8 +-- egs/sre08/v1/local/run_more_data.sh | 24 ++++---- egs/sre08/v1/run.sh | 16 +++--- egs/sre08/v1/sid/train_diag_ubm.sh | 4 +- egs/sre08/v1/sid/train_ivector_extractor.sh | 12 ++-- .../v1/sid/train_ivector_extractor_dnn.sh | 20 +++---- egs/sre10/v1/local/dnn/get_egs2.sh | 18 +++--- .../v1/local/dnn/run_nnet2_multisplice.sh | 10 ++-- .../v1/local/dnn/train_multisplice_accel2.sh | 34 +++++------ egs/sre10/v1/run.sh | 12 ++-- egs/sre10/v2/cmd.sh | 8 +-- egs/sre10/v2/run.sh | 18 +++--- egs/swahili/s5/cmd.sh | 8 +-- egs/swbd/s5b/local/nnet2/run_5a_gpu.sh | 2 +- egs/swbd/s5b/local/nnet2/run_5b_gpu.sh | 2 +- egs/swbd/s5b/local/nnet2/run_5c_gpu.sh | 2 +- egs/swbd/s5b/local/nnet2/run_5d_gpu.sh | 2 +- egs/swbd/s5b/local/nnet2/run_5e_gpu.sh | 2 +- egs/swbd/s5b/local/nnet2/run_5f_gpu.sh | 2 +- egs/swbd/s5b/local/nnet2/run_6a_gpu.sh | 2 +- egs/swbd/s5b/local/nnet2/run_6c_gpu.sh | 14 ++--- egs/swbd/s5b/local/online/run_nnet2.sh | 20 +++---- .../s5b/local/online/run_nnet2_baseline.sh | 8 +-- egs/swbd/s5b/local/online/run_nnet2_fisher.sh | 26 ++++----- egs/swbd/s5b/local/online/run_nnet2_ms.sh | 12 ++-- .../s5b/local/online/run_nnet2_ms_disc.sh | 14 ++--- .../local/online/run_nnet2_perturb_speed.sh | 24 ++++---- egs/swbd/s5c/local/online/run_nnet2_ms.sh | 12 ++-- .../local/online/run_nnet2_ms_perturbed.sh | 12 ++-- egs/tedlium/s5/cmd.sh | 10 ++-- .../s5/local/online/run_nnet2_ensemble.sh | 10 ++-- egs/tedlium/s5/local/online/run_nnet2_ms.sh | 10 ++-- .../s5/local/online/run_nnet2_ms_disc.sh | 14 ++--- .../s5/local/online/run_nnet2_ms_perturbed.sh | 14 ++--- egs/wsj/s5/local/nnet2/run_5b_gpu.sh | 2 +- egs/wsj/s5/local/nnet2/run_5c.sh | 12 ++-- egs/wsj/s5/local/nnet2/run_5c2_gpu.sh | 2 +- egs/wsj/s5/local/nnet2/run_5d.sh | 8 +-- egs/wsj/s5/local/nnet2/run_5e_gpu.sh | 2 +- egs/wsj/s5/local/nnet2/run_6c_gpu.sh | 12 ++-- egs/wsj/s5/local/nnet2/run_6d.sh | 6 +- egs/wsj/s5/local/nnet2/run_6d_gpu.sh | 10 ++-- egs/wsj/s5/local/nnet2/run_bnf.sh | 30 +++++----- egs/wsj/s5/local/nnet2/run_pnorm_bnf.sh | 28 +++++----- egs/wsj/s5/local/online/run_nnet2_baseline.sh | 8 +-- .../local/online/run_nnet2_discriminative.sh | 20 +++---- .../local/online/run_nnet2_perturb_speed.sh | 2 +- egs/wsj/s5/local/run_bnf_sgmm.sh | 20 +++---- egs/wsj/s5/steps/nnet2/get_egs2.sh | 2 +- .../s5/steps/nnet2/get_egs_discriminative2.sh | 2 +- egs/wsj/s5/steps/nnet2/retrain_fast.sh | 18 +++--- egs/wsj/s5/steps/nnet2/retrain_simple2.sh | 24 ++++---- egs/wsj/s5/steps/nnet2/train_block.sh | 26 ++++----- .../s5/steps/nnet2/train_convnet_accel2.sh | 48 ++++++++-------- .../steps/nnet2/train_multisplice_accel2.sh | 4 +- .../steps/nnet2/train_multisplice_ensemble.sh | 56 +++++++++---------- egs/wsj/s5/steps/nnet2/train_pnorm.sh | 36 ++++++------ egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh | 4 +- .../nnet2/train_pnorm_bottleneck_fast.sh | 34 +++++------ .../s5/steps/nnet2/train_pnorm_ensemble.sh | 44 +++++++-------- egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh | 32 +++++------ .../s5/steps/nnet2/train_pnorm_multisplice.sh | 30 +++++----- .../steps/nnet2/train_pnorm_multisplice2.sh | 26 ++++----- egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh | 30 +++++----- egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh | 50 ++++++++--------- egs/wsj/s5/steps/nnet2/train_tanh.sh | 4 +- .../s5/steps/nnet2/train_tanh_bottleneck.sh | 32 +++++------ egs/wsj/s5/steps/nnet2/train_tanh_fast.sh | 32 +++++------ egs/wsj/s5/steps/nnet2/update_nnet.sh | 14 ++--- egs/wsj/s5/steps/nnet3/chain/train_tdnn.sh | 8 +-- egs/wsj/s5/steps/nnet3/get_degs.sh | 2 +- .../s5/steps/nnet3/get_egs_discriminative.sh | 2 +- egs/wsj/s5/steps/nnet3/lstm/train.sh | 6 +- egs/wsj/s5/steps/nnet3/tdnn/train.sh | 6 +- egs/wsj/s5/steps/nnet3/tdnn/train_raw_nnet.sh | 6 +- egs/wsj/s5/steps/nnet3/train_tdnn.sh | 6 +- egs/wsj/s5/steps/online/nnet2/get_egs.sh | 16 +++--- .../online/nnet2/get_egs_discriminative2.sh | 2 +- egs/wsj/s5/utils/convert_slf_parallel.sh | 4 +- 141 files changed, 918 insertions(+), 918 deletions(-) diff --git a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh index 24176d69a34..a6c2d02b7af 100755 --- a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh +++ b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh @@ -8,7 +8,7 @@ # This example script demonstrates how speed perturbation of the data helps the nnet training in the SWB setup. . ./cmd.sh -set -e +set -e stage=1 train_stage=-10 use_gpu=true @@ -27,13 +27,13 @@ fix_nnet=false if $use_gpu; then if ! cuda-compiled; then - cat < # Iteration of model to decode; default is final." echo " --scoring-opts # options to local/score.sh" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" + echo " --parallel-opts # e.g. '--num-threads 4' if you supply --num-threads 4" exit 1; fi diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh index 3b778b23162..4e34c78255a 100755 --- a/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh +++ b/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh @@ -28,7 +28,7 @@ If you want to use GPUs (and have them), go to src/, and configure and make on a where "nvcc" is installed. Otherwise, call this script with --use-gpu false EOF fi - parallel_opts="-l gpu=1" + parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 @@ -47,7 +47,7 @@ else # almost the same, but this may be a little bit slow. num_threads=16 minibatch_size=128 - parallel_opts="-pe smp $num_threads" + parallel_opts="--num-threads $num_threads" fi # do the common parts of the script. diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh index ad5fba0929f..dc285f28f8e 100755 --- a/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh +++ b/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh @@ -8,7 +8,7 @@ # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# # Note: rather than using any features we have dumped on disk, this script # regenerates them from the wav data three times-- when we do lattice # generation, numerator alignment and discriminative training. This made the @@ -42,20 +42,20 @@ set -e if $use_gpu; then if ! cuda-compiled; then - cat <" data/local/lang data/lang # Make sure that you do not use your test and your dev sets to train the LM -# Some form of cross validation is possible where you decode your dev/set based on an +# Some form of cross validation is possible where you decode your dev/set based on an # LM that is trained on everything but that that conversation local/callhome_train_lms.sh $split local/callhome_create_test_lang.sh @@ -100,7 +100,7 @@ steps/train_lda_mllt.sh --cmd "$train_cmd" \ exp/tri3a/graph data/dev exp/tri3a/decode_dev || exit 1; )& -# Next we'll use fMLLR and train with SAT (i.e. on +# Next we'll use fMLLR and train with SAT (i.e. on # fMLLR features) steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ @@ -108,7 +108,7 @@ steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ steps/train_sat.sh --cmd "$train_cmd" \ 2200 25000 data/train data/lang exp/tri3a_ali exp/tri4a || exit 1; - + ( utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ @@ -140,9 +140,9 @@ steps/train_sat.sh --cmd "$train_cmd" \ )& dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \ - --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G") + --parallel-opts "--num-threads 16" --cmd "queue.pl --mem 1G") dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \ - --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G") + --parallel-opts "--gpu 1" --cmd "queue.pl --mem 1G") steps/nnet2/train_pnorm_ensemble.sh \ --mix-up 5000 --initial-learning-rate 0.008 --final-learning-rate 0.0008\ @@ -153,17 +153,17 @@ steps/nnet2/train_pnorm_ensemble.sh \ data/train data/lang exp/tri5a_ali exp/tri6a_dnn ( - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev exp/tri5a/graph data/dev exp/tri6a_dnn/decode_dev ) & # Decode test sets ( - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_test exp/tri5a/graph data/test exp/tri6a_dnn/decode_test - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_sup exp/tri5a/graph data/sup exp/tri6a_dnn/decode_sup - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_h5 exp/tri5a/graph data/h5 exp/tri6a_dnn/decode_h5 ) & diff --git a/egs/chime3/s5/cmd.sh b/egs/chime3/s5/cmd.sh index 7ee5fbcd73d..cf2570db1a9 100755 --- a/egs/chime3/s5/cmd.sh +++ b/egs/chime3/s5/cmd.sh @@ -6,9 +6,9 @@ # the number of cpus on your machine. #a) JHU cluster options -#export train_cmd="queue.pl -l arch=*64" -#export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G" -#export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G" +#export train_cmd="queue.pl" +#export decode_cmd="queue.pl --mem 4G" +#export mkgraph_cmd="queue.pl --mem 4G" #export cuda_cmd="..." diff --git a/egs/csj/s5/local/csj_run_rnnlm.sh b/egs/csj/s5/local/csj_run_rnnlm.sh index 5c6cd4343f6..e02f19bb680 100755 --- a/egs/csj/s5/local/csj_run_rnnlm.sh +++ b/egs/csj/s5/local/csj_run_rnnlm.sh @@ -3,7 +3,7 @@ # Copyright 2016 Tokyo Institute of Technology (Authors: Tomohiro Tanaka, Takafumi Moriya and Takahiro Shinozaki) # 2016 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe) # Apache 2.0 -# Acknowledgement This work was supported by JSPS KAKENHI Grant Number 26280055. +# Acknowledgement This work was supported by JSPS KAKENHI Grant Number 26280055. [ -f ./path.sh ] && . ./path.sh . utils/parse_options.sh @@ -21,7 +21,7 @@ echo h30 Begin local/csj_train_rnnlms.sh --dict-suffix "_nosp" data/local/rnnlm.h30 sleep 20; # wait till tools compiled. -echo h100 Begin +echo h100 Begin local/csj_train_rnnlms.sh --dict-suffix "_nosp" \ --hidden 100 --nwords 10000 --class 200 \ --direct 0 data/local/rnnlm.h100 @@ -60,9 +60,9 @@ for dict in rnnlm.h30 rnnlm.h100 rnnlm.h200 rnnlm.h300 rnnlm.h400 rnnlm.h500 ;do echo "rnnlm0.5" steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver \ - --N 100 --cmd "queue -l mem_free=1G" --inv-acwt $acwt 0.5 \ + --N 100 --cmd "$decode_cmd --mem 1G" --inv-acwt $acwt 0.5 \ data/lang_csj_tg $dir data/$eval_num $sourcedir ${resultsdir}_L0.5 - + rm -rf ${resultsdir}_L0.25 rm -rf ${resultsdir}_L0.75 cp -rp ${resultsdir}_L0.5 ${resultsdir}_L0.25 @@ -70,12 +70,12 @@ for dict in rnnlm.h30 rnnlm.h100 rnnlm.h200 rnnlm.h300 rnnlm.h400 rnnlm.h500 ;do echo "rnnlm0.25" steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver \ - --stage 7 --N 100 --cmd "$decode_cmd -l mem_free=1G" --inv-acwt $acwt 0.25 \ + --stage 7 --N 100 --cmd "$decode_cmd --mem 1G" --inv-acwt $acwt 0.25 \ data/lang_csj_tg $dir data/$eval_num $sourcedir ${resultsdir}_L0.25 echo "rnnlm0.75" steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver \ - --stage 7 --N 100 --cmd "$decode_cmd -l mem_free=1G" --inv-acwt $acwt 0.75 \ + --stage 7 --N 100 --cmd "$decode_cmd --mem 1G" --inv-acwt $acwt 0.75 \ data/lang_csj_tg $dir data/$eval_num $sourcedir ${resultsdir}_L0.75 done done diff --git a/egs/csj/s5/local/nnet/run_lstm.sh b/egs/csj/s5/local/nnet/run_lstm.sh index 3cc330c55a8..dc0f40dec24 100755 --- a/egs/csj/s5/local/nnet/run_lstm.sh +++ b/egs/csj/s5/local/nnet/run_lstm.sh @@ -34,10 +34,10 @@ stage=0 steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 10 $dir $dir/log $dir/data || exit 1; steps/compute_cmvn_stats.sh $dir $dir/log $dir/data || exit 1; done - + # Training set utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp - steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd -tc 10" \ + steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd --max-jobs-run 10" \ $train $train/log $train/data || exit 1; steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1; # Split the training set diff --git a/egs/fisher_callhome_spanish/s5/run.sh b/egs/fisher_callhome_spanish/s5/run.sh index 380a8aec936..ad650cd390e 100755 --- a/egs/fisher_callhome_spanish/s5/run.sh +++ b/egs/fisher_callhome_spanish/s5/run.sh @@ -256,7 +256,7 @@ steps/train_mmi_sgmm2.sh \ ( utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph -steps/decode_fmllr_extra.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ +steps/decode_fmllr_extra.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 12"\ exp/tri5a/graph data/dev exp/tri5a/decode_dev utils/mkgraph.sh data/lang_test exp/sgmm5 exp/sgmm5/graph @@ -274,9 +274,9 @@ done dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \ - --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 --mem 2G") + --parallel-opts "--num-threads 16" --cmd "queue.pl --mem 2G") dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \ - --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 --mem 2G") + --parallel-opts "--gpu 1" --cmd "queue.pl --mem 2G") steps/nnet2/train_pnorm_ensemble.sh \ --mix-up 5000 --initial-learning-rate 0.008 --final-learning-rate 0.0008\ @@ -287,7 +287,7 @@ steps/nnet2/train_pnorm_ensemble.sh \ data/train data/lang exp/tri5a_ali exp/tri6a_dnn ( - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev exp/tri5a/graph data/dev exp/tri6a_dnn/decode_dev ) & wait diff --git a/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh b/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh index eae5f7b8581..210d0f5646f 100755 --- a/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh +++ b/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh @@ -21,7 +21,7 @@ EOF . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. ( if [ "$USER" == dpovey ]; then diff --git a/egs/fisher_english/s5/local/online/run_nnet2.sh b/egs/fisher_english/s5/local/online/run_nnet2.sh index 0b9adb7d315..de4d56bb52e 100755 --- a/egs/fisher_english/s5/local/online/run_nnet2.sh +++ b/egs/fisher_english/s5/local/online/run_nnet2.sh @@ -21,7 +21,7 @@ If you want to use GPUs (and have them), go to src/, and configure and make on a where "nvcc" is installed. EOF fi -parallel_opts="-l gpu=1" +parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 dir=exp/nnet2_online/nnet_a diff --git a/egs/fisher_english/s5/local/online/run_nnet2_b.sh b/egs/fisher_english/s5/local/online/run_nnet2_b.sh index 7eac7cf0a7d..e1491a10c0b 100755 --- a/egs/fisher_english/s5/local/online/run_nnet2_b.sh +++ b/egs/fisher_english/s5/local/online/run_nnet2_b.sh @@ -19,22 +19,22 @@ set -e if $use_gpu; then if ! cuda-compiled; then - cat < # stage to do partial re-run from." echo " --num-gselect # Number of Gaussians per frame to" echo " # limit computation to, for speed" - echo " --subsample # In main E-M phase, use every n" + echo " --subsample # In main E-M phase, use every n" echo " # frames (a speedup)" echo " --num-frames # Maximum num-frames to keep in memory" echo " # for model initialization" @@ -59,7 +59,7 @@ if [ $# != 3 ]; then echo " # in initialization phase (then split)" echo " --num-threads # number of threads to use in initialization" echo " # phase (must match with parallel-opts option)" - echo " --parallel-opts # Option should match number of threads in" + echo " --parallel-opts # Option should match number of threads in" echo " # --num-threads option above" echo " --min-gaussian-weight # min Gaussian weight allowed in GMM" echo " # initialization (this relatively high" diff --git a/egs/lre/v1/lid/train_ivector_extractor.sh b/egs/lre/v1/lid/train_ivector_extractor.sh index 8e238985f99..18f536a60cb 100755 --- a/egs/lre/v1/lid/train_ivector_extractor.sh +++ b/egs/lre/v1/lid/train_ivector_extractor.sh @@ -13,7 +13,7 @@ # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -24,8 +24,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -84,7 +84,7 @@ nj_full=$[$nj*$num_processes] sdata=$data/split$nj_full; utils/split_data.sh $data $nj_full || exit 1; -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 scp:$sdata/JOB/feats.scp ark:- | add-deltas-sdc ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -97,7 +97,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1 -fi +fi # Do Gaussian selection and posterior extracion @@ -146,7 +146,7 @@ while [ $x -lt $num_iters ]; do nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ + $cmd --num-threads $nt $dir/log/update.$x.log \ ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; rm $dir/acc.$x.* if $cleanup; then diff --git a/egs/lre/v1/run.sh b/egs/lre/v1/run.sh index 740fad7aceb..bc0f8db572d 100755 --- a/egs/lre/v1/run.sh +++ b/egs/lre/v1/run.sh @@ -50,9 +50,9 @@ rm foo local/split_long_utts.sh --max-utt-len 120 data/train_unsplit data/train # This commented script is an alternative to the above utterance -# splitting method. Here we split the utterance based on the number of +# splitting method. Here we split the utterance based on the number of # frames which are voiced, rather than the total number of frames. -# max_voiced=3000 +# max_voiced=3000 # local/vad_split_utts.sh --max-voiced $max_voiced data/train_unsplit $mfccdir data/train use_vtln=true @@ -61,7 +61,7 @@ if $use_vtln; then cp -rt data/${t} data/${t}_novtln rm -r data/${t}_novtln/{split,.backup,spk2warp} 2>/dev/null || true steps/make_mfcc.sh --mfcc-config conf/mfcc_vtln.conf --nj 100 --cmd "$train_cmd" \ - data/${t}_novtln exp/make_mfcc $mfccdir + data/${t}_novtln exp/make_mfcc $mfccdir lid/compute_vad_decision.sh data/${t}_novtln exp/make_mfcc $mfccdir done # Vtln-related things: @@ -115,7 +115,7 @@ lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd" data/train \ # Alternatively, a diagonal UBM can replace the full UBM used above. # The preceding calls to train_diag_ubm.sh and train_full_ubm.sh # can be commented out and replaced with the following lines. -# +# # This results in a slight degradation but could improve error rate when # there is less training data than used in this example. # @@ -125,12 +125,12 @@ lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd" data/train \ #gmm-global-to-fgmm exp/diag_ubm_2048/final.dubm \ # exp/full_ubm_2048/final.ubm -lid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \ +lid/train_ivector_extractor.sh --cmd "$train_cmd --mem 2G" \ --num-iters 5 exp/full_ubm_2048/final.ubm data/train \ exp/extractor_2048 -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/train exp/ivectors_train -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/lre07 exp/ivectors_lre07 diff --git a/egs/lre07/v1/lid/nnet2/get_egs2.sh b/egs/lre07/v1/lid/nnet2/get_egs2.sh index 27cf82bd1a1..7806dce4894 100755 --- a/egs/lre07/v1/lid/nnet2/get_egs2.sh +++ b/egs/lre07/v1/lid/nnet2/get_egs2.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). # 2015 David Snyder # Apache 2.0. # @@ -54,7 +54,7 @@ transform_dir= # If supplied, overrides alidir as the place to find fMLLR tr postdir= # If supplied, we will use posteriors in it as soft training targets. stage=0 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. random_copy=false online_ivector_dir= # can be used if we are including speaker information as iVectors. @@ -83,7 +83,7 @@ if [ $# != 3 ]; then echo " # very end." echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -109,7 +109,7 @@ utils/split_data.sh $data $nj mkdir -p $dir/log $dir/info cp $alidir/tree $dir -# Get list of validation utterances. +# Get list of validation utterances. awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \ > $dir/valid_uttlist || exit 1; @@ -129,7 +129,7 @@ awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlis [ -z "$transform_dir" ] && transform_dir=$alidir -## Set up features. +## Set up features. if [ -z $feat_type ]; then if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi fi @@ -140,7 +140,7 @@ case $feat_type in valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` # caution: the top-level nnet training script should copy these to its own dir now. cp $alidir/{splice_opts,final.mat} $dir || exit 1; @@ -280,13 +280,13 @@ if [ $stage -le 3 ]; then egs_list="$egs_list ark:$dir/egs_orig.$n.JOB.ark" done echo "$0: Generating training examples on disk" - # The examples will go round-robin to egs_list. + # The examples will go round-robin to egs_list. if [ ! -z $postdir ]; then $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ scp:$postdir/post.JOB.scp ark:- \| \ nnet-copy-egs ark:- $egs_list || exit 1; - else + else $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \ @@ -299,7 +299,7 @@ if [ $stage -le 4 ]; then # shuffle the order, writing to the egs.JOB.ark egs_list= - for n in $(seq $nj); do + for n in $(seq $nj); do egs_list="$egs_list $dir/egs_orig.JOB.$n.ark" done diff --git a/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh b/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh index 4809f42e633..533001934ab 100755 --- a/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh +++ b/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). # 2013 Xiaohui Zhang # 2013 Guoguo Chen # 2014 Vimal Manohar @@ -9,7 +9,7 @@ # This is a modified version of train_multisplice_accel2.sh in # steps/nnet2/ for language recognition. The main difference is -# that it uses different get_lda.sh and get_egs2.sh scripts. +# that it uses different get_lda.sh and get_egs2.sh scripts. # # The original train_multisplice_accel2.sh was a modified version of # train_pnorm_multisplice2.sh (still using pnorm). The "accel" refers to the @@ -25,11 +25,11 @@ num_epochs=15 # Number of epochs of training; initial_effective_lrate=0.01 final_effective_lrate=0.001 bias_stddev=0.5 -pnorm_input_dim=3000 +pnorm_input_dim=3000 pnorm_output_dim=300 minibatch_size=128 # by default use a smallish minibatch size for neural net # training; this controls instability which would otherwise - # be a problem with multi-threaded update. + # be a problem with multi-threaded update. samples_per_iter=400000 # each iteration of training, see this many samples # per job. This option is passed to get_egs.sh @@ -66,7 +66,7 @@ splice_indexes="layer0/-4:-3:-2:-1:0:1:2:3:4 layer2/-5:-1:3" # so hidden layer indexing is different from component count -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. These don't randprune=4.0 # speeds up LDA. alpha=4.0 # relates to preconditioning. update_period=4 # relates to online preconditioning: says how often we update the subspace. @@ -78,11 +78,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -92,7 +92,7 @@ transform_dir= # If supplied, overrides alidir feat_type= # Can be used to force "raw" features. align_cmd= # The cmd that is passed to steps/nnet2/align.sh align_use_gpu= # Passed to use_gpu in steps/nnet2/align.sh [yes/no] -realign_times= # List of times on which we realign. Each time is +realign_times= # List of times on which we realign. Each time is # floating point number strictly between 0 and 1, which # will be multiplied by the num-iters to get an iteration # number. @@ -127,10 +127,10 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." - echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... note, you might have to reduce --mem" + echo " # versus your defaults, because it gets multiplied by the --num-threads argument." + echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" @@ -148,7 +148,7 @@ if [ $# != 4 ]; then echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -372,7 +372,7 @@ while [ $x -lt $num_iters ]; do ilr=$initial_effective_lrate; flr=$final_effective_lrate; np=$num_archives_processed; nt=$num_archives_to_process; this_learning_rate=$(perl -e "print (($x + 1 >= $num_iters ? $flr : $ilr*exp($np*log($flr/$ilr)/$nt))*$this_num_jobs);"); - echo "On iteration $x, learning rate is $this_learning_rate." + echo "On iteration $x, learning rate is $this_learning_rate." if [ ! -z "${realign_this_iter[$x]}" ]; then prev_egs_dir=$cur_egs_dir @@ -417,7 +417,7 @@ while [ $x -lt $num_iters ]; do steps/nnet2/remove_egs.sh $prev_egs_dir fi fi - + # Set off jobs doing some diagnostics, in the background. # Use the egs dir from the previous iteration for the diagnostics $cmd $dir/log/compute_prob_valid.$x.log \ @@ -461,7 +461,7 @@ while [ $x -lt $num_iters ]; do ( # this sub-shell is so that when we "wait" below, # we only wait for the training jobs that we just spawned, # not the diagnostic jobs that we spawned above. - + # We can't easily use a single parallel SGE job to do the main training, # because the computation of which archive and which --frame option # to use for each job is a little complex, so we spawn each one separately. @@ -500,7 +500,7 @@ while [ $x -lt $num_iters ]; do n=$(perl -e '($nj,$pat)=@ARGV; $best_n=1; $best_logprob=-1.0e+10; for ($n=1;$n<=$nj;$n++) { $fn = sprintf($pat,$n); open(F, "<$fn") || die "Error opening log file $fn"; undef $logprob; while () { if (m/log-prob-per-frame=(\S+)/) { $logprob=$1; } } - close(F); if (defined $logprob && $logprob > $best_logprob) { $best_logprob=$logprob; + close(F); if (defined $logprob && $logprob > $best_logprob) { $best_logprob=$logprob; $best_n=$n; } } print "$best_n\n"; ' $num_jobs_nnet $dir/log/train.$x.%d.log) || exit 1; [ -z "$n" ] && echo "Error getting best model" && exit 1; cp $dir/$[$x+1].$n.mdl $dir/$[$x+1].mdl || exit 1; @@ -537,7 +537,7 @@ if [ $stage -le $num_iters ]; then cur_offset=0 # current offset from first_model_combine. for n in $(seq $max_models_combine); do next_offset=$[($n*$num_models_combine)/$max_models_combine] - sub_list="" + sub_list="" for o in $(seq $cur_offset $[$next_offset-1]); do iter=$[$first_model_combine+$o] mdl=$dir/$iter.mdl diff --git a/egs/lre07/v1/lid/train_diag_ubm.sh b/egs/lre07/v1/lid/train_diag_ubm.sh index 60f2452f3b7..8ba703073c0 100755 --- a/egs/lre07/v1/lid/train_diag_ubm.sh +++ b/egs/lre07/v1/lid/train_diag_ubm.sh @@ -29,7 +29,7 @@ cleanup=true min_gaussian_weight=0.0001 remove_low_count_gaussians=true # set this to false if you need #gauss to stay fixed. num_threads=32 -parallel_opts="-pe smp 32" +parallel_opts="--num-threads 32" # End configuration section. echo "$0 $@" # Print the command line for logging @@ -49,7 +49,7 @@ if [ $# != 3 ]; then echo " --stage # stage to do partial re-run from." echo " --num-gselect # Number of Gaussians per frame to" echo " # limit computation to, for speed" - echo " --subsample # In main E-M phase, use every n" + echo " --subsample # In main E-M phase, use every n" echo " # frames (a speedup)" echo " --num-frames # Maximum num-frames to keep in memory" echo " # for model initialization" @@ -59,7 +59,7 @@ if [ $# != 3 ]; then echo " # in initialization phase (then split)" echo " --num-threads # number of threads to use in initialization" echo " # phase (must match with parallel-opts option)" - echo " --parallel-opts # Option should match number of threads in" + echo " --parallel-opts # Option should match number of threads in" echo " # --num-threads option above" echo " --min-gaussian-weight # min Gaussian weight allowed in GMM" echo " # initialization (this relatively high" diff --git a/egs/lre07/v1/lid/train_ivector_extractor.sh b/egs/lre07/v1/lid/train_ivector_extractor.sh index 8e238985f99..18f536a60cb 100755 --- a/egs/lre07/v1/lid/train_ivector_extractor.sh +++ b/egs/lre07/v1/lid/train_ivector_extractor.sh @@ -13,7 +13,7 @@ # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -24,8 +24,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -84,7 +84,7 @@ nj_full=$[$nj*$num_processes] sdata=$data/split$nj_full; utils/split_data.sh $data $nj_full || exit 1; -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 scp:$sdata/JOB/feats.scp ark:- | add-deltas-sdc ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -97,7 +97,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1 -fi +fi # Do Gaussian selection and posterior extracion @@ -146,7 +146,7 @@ while [ $x -lt $num_iters ]; do nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ + $cmd --num-threads $nt $dir/log/update.$x.log \ ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; rm $dir/acc.$x.* if $cleanup; then diff --git a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh index 7464ce5faea..8902b730e09 100755 --- a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh +++ b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh @@ -9,16 +9,16 @@ # This script trains the i-vector extractor using a DNN-based UBM. It also requires # an fGMM, created by the script lid/init_full_gmm_from_dnn.sh. -# Note: there are 3 separate levels of parallelization: num_threads, num_processes, -# and num_jobs. This may seem a bit excessive. It has to do with minimizing -# memory usage and disk I/O, subject to various constraints. The "num_threads" +# Note: there are 3 separate levels of parallelization: num_threads, num_processes, +# and num_jobs. This may seem a bit excessive. It has to do with minimizing +# memory usage and disk I/O, subject to various constraints. The "num_threads" # is how many threads a program uses; the "num_processes" is the number of separate # processes a single job spawns, and then sums the accumulators in memory. # Our recommendation: # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -29,8 +29,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -95,9 +95,9 @@ utils/split_data.sh $data $nj_full || exit 1; sdata_dnn=$data_dnn/split$nj_full; utils/split_data.sh $data_dnn $nj_full || exit 1; - -parallel_opts="-pe smp $[$num_threads*$num_processes]" + +parallel_opts="--num-threads $[$num_threads*$num_processes]" # Set up features. @@ -114,7 +114,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1; -fi +fi # Do Gaussian selection and posterior extracion @@ -164,7 +164,7 @@ while [ $x -lt $num_iters ]; do nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ + $cmd --num-threads $nt $dir/log/update.$x.log \ ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; rm $dir/acc.$x.* if $cleanup; then diff --git a/egs/lre07/v1/run.sh b/egs/lre07/v1/run.sh index a4ff4d909ba..8664494e558 100755 --- a/egs/lre07/v1/run.sh +++ b/egs/lre07/v1/run.sh @@ -127,12 +127,12 @@ utils/subset_data_dir.sh data/train 5000 data/train_5k utils/subset_data_dir.sh data/train 10000 data/train_10k -lid/train_diag_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=20G,ram_free=20G" \ +lid/train_diag_ubm.sh --nj 30 --cmd "$train_cmd --mem 20G" \ data/train_5k 2048 exp/diag_ubm_2048 -lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=20G,ram_free=20G" \ +lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd --mem 20G" \ data/train_10k exp/diag_ubm_2048 exp/full_ubm_2048_10k -lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd --mem 35G" \ data/train exp/full_ubm_2048_10k exp/full_ubm_2048 # Alternatively, a diagonal UBM can replace the full UBM used above. @@ -148,7 +148,7 @@ lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ #gmm-global-to-fgmm exp/diag_ubm_2048/final.dubm \ # exp/full_ubm_2048/final.ubm -lid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +lid/train_ivector_extractor.sh --cmd "$train_cmd --mem 35G" \ --use-weights true \ --num-iters 5 exp/full_ubm_2048/final.ubm data/train \ exp/extractor_2048 @@ -162,10 +162,10 @@ utils/fix_data_dir.sh data/train_lr echo "**Language count for logistic regression training (after splitting long utterances):**" awk '{print $2}' data/train_lr/utt2lang | sort | uniq -c | sort -nr -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/train_lr exp/ivectors_train -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/lre07 exp/ivectors_lre07 lid/run_logistic_regression.sh --prior-scale 0.70 \ diff --git a/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh b/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh index a223e12333f..51fcf401cb2 100755 --- a/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh +++ b/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh @@ -19,13 +19,13 @@ set -e # assume use_gpu=true since it would be way too slow otherwise. if ! cuda-compiled; then - cat < foo; @@ -78,20 +78,20 @@ sid/train_full_ubm.sh --nj 30 --remove-low-count-gaussians false --num-iters 1 - data/train_female_4k exp/full_ubm_2048 exp/full_ubm_2048_female & wait -# note, the mem_free,ram_free is counted per thread... in this setup each +# note, the --mem is counted per thread... in this setup each # job has 4 processes running each with 4 threads; each job takes about 5G # of memory so we need about 20G, plus add memory for sum-accs to make it 25G. -# but we'll submit using -pe smp 16, and this multiplies the memory requirement +# but we'll submit using --num-threads 16, and this multiplies the memory requirement # by 16, so submitting with 2G as the requirement, to make the total requirement # 32, is reasonable. # Train the iVector extractor for male speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 2G" \ --num-iters 5 exp/full_ubm_2048_male/final.ubm data/train_male \ exp/extractor_2048_male # The same for female speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 2G" \ --num-iters 5 exp/full_ubm_2048_female/final.ubm data/train_female \ exp/extractor_2048_female @@ -105,22 +105,22 @@ sid/gender_id.sh --cmd "$train_cmd" --nj 150 exp/full_ubm_2048{,_male,_female} \ # Gender-id error rate is 2.58% # Extract the iVectors for the Fisher data. -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_male data/train_male exp/ivectors_train_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_female data/train_female exp/ivectors_train_female # .. and for the SRE08 training and test data. (We focus on the main # evaluation condition, the only required one in that eval, which is # the short2-short3 eval.) -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_female data/sre08_train_short2_female exp/ivectors_sre08_train_short2_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_male data/sre08_train_short2_male exp/ivectors_sre08_train_short2_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_female data/sre08_test_short3_female exp/ivectors_sre08_test_short3_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_male data/sre08_test_short3_male exp/ivectors_sre08_test_short3_male @@ -131,7 +131,7 @@ cat $trials | awk '{print $1, $2}' | \ ivector-compute-dot-products - \ scp:exp/ivectors_sre08_train_short2_female/spk_ivector.scp \ scp:exp/ivectors_sre08_test_short3_female/spk_ivector.scp \ - foo + foo local/score_sre08.sh $trials foo diff --git a/egs/sre08/v1/run.sh b/egs/sre08/v1/run.sh index 4e31542bf4d..c4afe447e8d 100755 --- a/egs/sre08/v1/run.sh +++ b/egs/sre08/v1/run.sh @@ -110,12 +110,12 @@ sid/train_full_ubm.sh --nj 30 --remove-low-count-gaussians false \ wait # Train the iVector extractor for male speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 35G" \ --num-iters 5 exp/full_ubm_2048_male/final.ubm data/train_male \ exp/extractor_2048_male # The same for female speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 35G" \ --num-iters 5 exp/full_ubm_2048_female/final.ubm data/train_female \ exp/extractor_2048_female @@ -129,25 +129,25 @@ sid/gender_id.sh --cmd "$train_cmd" --nj 150 exp/full_ubm_2048{,_male,_female} \ # Gender-id error rate is 3.41% # Extract the iVectors for the training data. -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_male data/train_male exp/ivectors_train_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_female data/train_female exp/ivectors_train_female # .. and for the SRE08 training and test data. (We focus on the main # evaluation condition, the only required one in that eval, which is # the short2-short3 eval.) -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_female data/sre08_train_short2_female \ exp/ivectors_sre08_train_short2_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_male data/sre08_train_short2_male \ exp/ivectors_sre08_train_short2_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_female data/sre08_test_short3_female \ exp/ivectors_sre08_test_short3_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_male data/sre08_test_short3_male \ exp/ivectors_sre08_test_short3_male diff --git a/egs/sre08/v1/sid/train_diag_ubm.sh b/egs/sre08/v1/sid/train_diag_ubm.sh index 6ff1a9099d9..69e2fca5538 100755 --- a/egs/sre08/v1/sid/train_diag_ubm.sh +++ b/egs/sre08/v1/sid/train_diag_ubm.sh @@ -60,7 +60,7 @@ if [ $# != 3 ]; then echo " # in initialization phase (then split)" echo " --num-threads # number of threads to use in initialization" echo " # phase (must match with parallel-opts option)" - echo " --parallel-opts # Option should match number of threads in" + echo " --parallel-opts # Option should match number of threads in" echo " # --num-threads option above" echo " --min-gaussian-weight # min Gaussian weight allowed in GMM" echo " # initialization (this relatively high" @@ -85,7 +85,7 @@ for f in $data/feats.scp $data/vad.scp; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1 done -parallel_opts="-pe smp $num_threads" +parallel_opts="--num-threads $num_threads" delta_opts="--delta-window=$delta_window --delta-order=$delta_order" echo $delta_opts > $dir/delta_opts diff --git a/egs/sre08/v1/sid/train_ivector_extractor.sh b/egs/sre08/v1/sid/train_ivector_extractor.sh index 5d7eb984485..fd9ff5185d5 100755 --- a/egs/sre08/v1/sid/train_ivector_extractor.sh +++ b/egs/sre08/v1/sid/train_ivector_extractor.sh @@ -13,7 +13,7 @@ # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -24,8 +24,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -90,7 +90,7 @@ if [ -f $srcdir/delta_opts ]; then cp $srcdir/delta_opts $dir/ 2>/dev/null fi -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -102,7 +102,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1 -fi +fi # Do Gaussian selection and posterior extracion @@ -151,7 +151,7 @@ while [ $x -lt $num_iters ]; do nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ + $cmd --num-threads $nt $dir/log/update.$x.log \ ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; rm $dir/acc.$x.* if $cleanup; then diff --git a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh index 64579735376..9904a8cd3c6 100755 --- a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh +++ b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh @@ -8,16 +8,16 @@ # This script trains the i-vector extractor using a DNN-based UBM. It also requires # an fGMM, usually created by the script sid/init_full_gmm_from_dnn.sh. -# Note: there are 3 separate levels of parallelization: num_threads, num_processes, -# and num_jobs. This may seem a bit excessive. It has to do with minimizing -# memory usage and disk I/O, subject to various constraints. The "num_threads" +# Note: there are 3 separate levels of parallelization: num_threads, num_processes, +# and num_jobs. This may seem a bit excessive. It has to do with minimizing +# memory usage and disk I/O, subject to various constraints. The "num_threads" # is how many threads a program uses; the "num_processes" is the number of separate # processes a single job spawns, and then sums the accumulators in memory. # Our recommendation: # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -28,8 +28,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -100,9 +100,9 @@ if [ -f $srcdir/delta_opts ]; then cp $srcdir/delta_opts $dir/ 2>/dev/null fi -splice_opts=`cat exp/nnet//splice_opts 2>/dev/null` # frame-splicing options +splice_opts=`cat exp/nnet//splice_opts 2>/dev/null` # frame-splicing options -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -117,7 +117,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1; -fi +fi # Do Gaussian selection and posterior extraction @@ -167,7 +167,7 @@ while [ $x -lt $num_iters ]; do nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ + $cmd --num-threads $nt $dir/log/update.$x.log \ ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; rm $dir/acc.$x.* if $cleanup; then diff --git a/egs/sre10/v1/local/dnn/get_egs2.sh b/egs/sre10/v1/local/dnn/get_egs2.sh index 9f1644178e2..05ea1d1a0cd 100755 --- a/egs/sre10/v1/local/dnn/get_egs2.sh +++ b/egs/sre10/v1/local/dnn/get_egs2.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). # 2015 David Snyder # Apache 2.0. # @@ -54,7 +54,7 @@ transform_dir= # If supplied, overrides alidir as the place to find fMLLR tr postdir= # If supplied, we will use posteriors in it as soft training targets. stage=0 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. random_copy=false online_ivector_dir= # can be used if we are including speaker information as iVectors. @@ -83,7 +83,7 @@ if [ $# != 3 ]; then echo " # very end." echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -109,7 +109,7 @@ utils/split_data.sh $data $nj mkdir -p $dir/log $dir/info cp $alidir/tree $dir -# Get list of validation utterances. +# Get list of validation utterances. awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \ > $dir/valid_uttlist || exit 1; @@ -129,7 +129,7 @@ awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlis [ -z "$transform_dir" ] && transform_dir=$alidir -## Set up features. +## Set up features. if [ -z $feat_type ]; then if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi fi @@ -140,7 +140,7 @@ case $feat_type in valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` # caution: the top-level nnet training script should copy these to its own dir now. cp $alidir/{splice_opts,final.mat} $dir || exit 1; @@ -280,13 +280,13 @@ if [ $stage -le 3 ]; then egs_list="$egs_list ark:$dir/egs_orig.$n.JOB.ark" done echo "$0: Generating training examples on disk" - # The examples will go round-robin to egs_list. + # The examples will go round-robin to egs_list. if [ ! -z $postdir ]; then $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ scp:$postdir/post.JOB.scp ark:- \| \ nnet-copy-egs ark:- $egs_list || exit 1; - else + else $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \ @@ -299,7 +299,7 @@ if [ $stage -le 4 ]; then # shuffle the order, writing to the egs.JOB.ark egs_list= - for n in $(seq $nj); do + for n in $(seq $nj); do egs_list="$egs_list $dir/egs_orig.JOB.$n.ark" done diff --git a/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh b/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh index 684cc8ddfc0..5a1a67e9eb8 100755 --- a/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh +++ b/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh @@ -19,13 +19,13 @@ set -e # assume use_gpu=true since it would be way too slow otherwise. if ! cuda-compiled; then - cat < &vec_in, std::vector *vec_out) { /// A hashing function-object for vectors. template struct VectorHasher { // hashing function for vector. - size_t operator()(const std::vector &x) const { + size_t operator()(const std::vector &x) const noexcept { size_t ans = 0; typename std::vector::const_iterator iter = x.begin(), end = x.end(); for (; iter != end; ++iter) { @@ -235,7 +235,7 @@ struct VectorHasher { // hashing function for vector. /// A hashing function-object for pairs of ints template struct PairHasher { // hashing function for pair - size_t operator()(const std::pair &x) const { + size_t operator()(const std::pair &x) const noexcept { // 7853 was chosen at random from a list of primes. return x.first + x.second * 7853; } @@ -248,7 +248,7 @@ struct PairHasher { // hashing function for pair /// A hashing function object for strings. struct StringHasher { // hashing function for std::string - size_t operator()(const std::string &str) const { + size_t operator()(const std::string &str) const noexcept { size_t ans = 0, len = str.length(); const char *c = str.c_str(), *end = c + len; for (; c != end; c++) { From 13d300fc6b7aefca31cc359fe8e839fe5430fb16 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 29 Mar 2017 18:11:12 -0400 Subject: [PATCH 491/530] [src,doc] Fix several unrelated minor problems. Thanks: gaoxinglong --- src/doc/dnn3_scripts_context.dox | 2 +- src/nnet3/nnet-optimize.cc | 2 +- src/nnet3/nnet-utils.h | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/doc/dnn3_scripts_context.dox b/src/doc/dnn3_scripts_context.dox index 43ee0d40260..884e8c79f51 100644 --- a/src/doc/dnn3_scripts_context.dox +++ b/src/doc/dnn3_scripts_context.dox @@ -49,7 +49,7 @@ namespace nnet3 { compute this output without seeing a range of input frames. For example, it may be impossible to compute the output without seeing the range of 't' values from t = 150 through t = 157. In this case (glossing over details), - we'd say that the network has a \b left-context of 3 and a \b right-context of 4. + we'd say that the network has a \b left-context of 4 and a \b right-context of 3. The actual computation of the context is a bit more complex as it has to take into account special cases like where, say, the behavior for odd and even 't' values is different (c.f. Round() descriptors in diff --git a/src/nnet3/nnet-optimize.cc b/src/nnet3/nnet-optimize.cc index 33091674bd4..abafedc2f2d 100644 --- a/src/nnet3/nnet-optimize.cc +++ b/src/nnet3/nnet-optimize.cc @@ -863,7 +863,7 @@ void FixGotoOutputReordering(const Nnet &nnet, FixGotoLabel(computation); // make sure the destination label of the goto statement was // correct. int32 goto_command_index = -1; - for (int32 c = computation->commands.size(); c >= 0; c--) + for (int32 c = computation->commands.size() - 1; c >= 0; c--) if (computation->commands[c].command_type == kGotoLabel) goto_command_index = c; KALDI_ASSERT(goto_command_index > 0); diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index 766b0ed1798..921f1f1901d 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -81,7 +81,7 @@ std::string PrintVectorPerUpdatableComponent(const Nnet &nnet, /// This function returns true if the nnet has the following properties: /// It has an output called "output" (other outputs are allowed but may be -/// ignored). +/// ignored). /// It has an input called "input", and possibly an extra input called /// "ivector", but no other inputs. /// There are probably some other properties that we really ought to @@ -160,8 +160,8 @@ void ConvertRepeatedToBlockAffine(Nnet *nnet); /// Info() function (we need this in the CTC code). std::string NnetInfo(const Nnet &nnet); -/// This function sets the dropout proportion in all dropout component to -/// dropout_proportion value. +/// This function sets the dropout proportion in all dropout components to +/// the value 'dropout_proportion' void SetDropoutProportion(BaseFloat dropout_proportion, Nnet *nnet); /// This function finds a list of components that are never used, and outputs From e9d79939bb11b073e471e7210a81466db144bccb Mon Sep 17 00:00:00 2001 From: Yiming Wang Date: Fri, 31 Mar 2017 12:38:45 -0400 Subject: [PATCH 492/530] [src] (minor) Added missing SetZero() to NaturalGradientAffineComponent::Scale() if scale==0.0 (#1522) --- src/nnet3/nnet-simple-component.cc | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index f05623c65b3..8bbe76840da 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -2798,11 +2798,19 @@ void NaturalGradientAffineComponent::ZeroStats() { } void NaturalGradientAffineComponent::Scale(BaseFloat scale) { - update_count_ *= scale; - max_change_scale_stats_ *= scale; - active_scaling_count_ *= scale; - linear_params_.Scale(scale); - bias_params_.Scale(scale); + if (scale == 0.0) { + update_count_ = 0.0; + max_change_scale_stats_ = 0.0; + active_scaling_count_ = 0.0; + linear_params_.SetZero(); + bias_params_.SetZero(); + } else { + update_count_ *= scale; + max_change_scale_stats_ *= scale; + active_scaling_count_ *= scale; + linear_params_.Scale(scale); + bias_params_.Scale(scale); + } } void NaturalGradientAffineComponent::Add(BaseFloat alpha, const Component &other_in) { From e8ca375dadeec36c2322d9dd47461badbae584a3 Mon Sep 17 00:00:00 2001 From: david-ryan-snyder Date: Sat, 1 Apr 2017 21:12:55 -0400 Subject: [PATCH 493/530] [scripts,egs] Adding options for using PCA instead of LDA+MLLT for ivectors used in ASR. Results are reported in the default TDNN recipe in AMI. Updating steps/online/nnet2/{train_diag_ubm.sh,train_ivector_extractor.sh} so that they now backup the contents of their destination directory if it already exists. (#1514) --- egs/ami/s5b/RESULTS_ihm | 21 +- egs/ami/s5b/local/chain/run_tdnn.sh | 2 +- egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh | 269 ++++++++++++++++++ egs/ami/s5b/local/nnet3/run_ivector_common.sh | 44 ++- .../steps/online/nnet2/get_pca_transform.sh | 67 +++++ .../s5/steps/online/nnet2/train_diag_ubm.sh | 35 ++- .../online/nnet2/train_ivector_extractor.sh | 29 +- 7 files changed, 425 insertions(+), 42 deletions(-) create mode 100755 egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh create mode 100755 egs/wsj/s5/steps/online/nnet2/get_pca_transform.sh diff --git a/egs/ami/s5b/RESULTS_ihm b/egs/ami/s5b/RESULTS_ihm index 44234fc3fd9..25a60d24cfb 100644 --- a/egs/ami/s5b/RESULTS_ihm +++ b/egs/ami/s5b/RESULTS_ihm @@ -40,7 +40,6 @@ %WER 24.0 | 13098 94470 | 79.4 12.1 8.5 3.4 24.0 57.1 | -0.153 | exp/ihm/nnet3_cleaned/tdnn_sp/decode_dev/ascore_12/dev_hires.ctm.filt.sys %WER 25.5 | 12643 89984 | 77.7 14.2 8.2 3.2 25.5 56.4 | -0.139 | exp/ihm/nnet3_cleaned/tdnn_sp/decode_eval/ascore_11/eval_hires.ctm.filt.sys - # local/nnet3/run_tdnn.sh --mic ihm --train-set train --gmm tri3 --nnet3-affix "" # nnet3 xent TDNN without data cleaning [cleaning makes very small and # inconsistent difference on this dat] @@ -55,17 +54,21 @@ %WER 22.4 | 12643 89977 | 80.3 12.5 7.2 2.7 22.4 53.6 | -0.503 | exp/ihm/nnet3_cleaned/lstm_bidirectional_sp/decode_eval/ascore_10/eval_hires.ctm.filt.sys ############################################ - -# local/chain/run_tdnn.sh --mic ihm --stage 12 & -# cleanup + chain TDNN model -# for d in exp/ihm/chain_cleaned/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done -%WER 22.5 | 13098 94490 | 80.6 10.8 8.6 3.1 22.5 55.0 | 0.072 | exp/ihm/chain_cleaned/tdnn_sp_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys -%WER 22.5 | 12643 89978 | 80.3 12.5 7.2 2.7 22.5 53.1 | 0.149 | exp/ihm/chain_cleaned/tdnn_sp_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys - +# cleanup + chain TDNN model. +# local/chain/run_tdnn.sh --mic ihm --stage 4 & +# for d in exp/ihm/chain_cleaned/tdnn1d_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done +%WER 21.7 | 13098 94488 | 81.1 10.4 8.4 2.8 21.7 54.4 | 0.096 | exp/ihm/chain_cleaned/tdnn1d_sp_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys +%WER 22.1 | 12643 89979 | 80.5 12.1 7.4 2.6 22.1 52.8 | 0.185 | exp/ihm/chain_cleaned/tdnn1d_sp_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys + +# cleanup + chain TDNN model. Uses LDA instead of PCA for ivector features. +# local/chain/tuning/run_tdnn_1b.sh --mic ihm --stage 4 & +# for d in exp/ihm/chain_cleaned/tdnn1b_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done +%WER 22.0 | 13098 94488 | 80.8 10.2 9.0 2.8 22.0 54.7 | 0.102 | exp/ihm/chain_cleaned/tdnn1b_sp_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys +%WER 22.2 | 12643 89968 | 80.3 12.1 7.6 2.6 22.2 52.9 | 0.170 | exp/ihm/chain_cleaned/tdnn1b_sp_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys # local/chain/run_tdnn.sh --mic ihm --train-set train --gmm tri3 --nnet3-affix "" --stage 12 # chain TDNN model without cleanup [note: cleanup helps very little on this IHM data.] -for d in exp/ihm/chain/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done +# for d in exp/ihm/chain/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done %WER 22.4 | 13098 94476 | 80.4 10.4 9.2 2.8 22.4 54.6 | 0.069 | exp/ihm/chain/tdnn_sp_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys %WER 22.5 | 12643 89974 | 80.0 12.1 7.9 2.6 22.5 52.8 | 0.157 | exp/ihm/chain/tdnn_sp_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys diff --git a/egs/ami/s5b/local/chain/run_tdnn.sh b/egs/ami/s5b/local/chain/run_tdnn.sh index 61f8f499182..e1adaa9346d 120000 --- a/egs/ami/s5b/local/chain/run_tdnn.sh +++ b/egs/ami/s5b/local/chain/run_tdnn.sh @@ -1 +1 @@ -tuning/run_tdnn_1b.sh \ No newline at end of file +tuning/run_tdnn_1d.sh \ No newline at end of file diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh new file mode 100755 index 00000000000..a9f228cb55d --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh @@ -0,0 +1,269 @@ +#!/bin/bash + +# same as 1b but uses PCA instead of +# LDA features for the ivector extractor. + +# Results on 03/27/2017: +# local/chain/compare_wer_general.sh ihm tdnn1b_sp_bi tdnn1d_sp_bi +# System tdnn1b_sp_bi tdnn1d_sp_bi +# WER on dev 22.0 21.9 +# WER on eval 22.2 22.3 +# Final train prob -0.0813472 -0.0807054 +# Final valid prob -0.132032 -0.133564 +# Final train prob (xent) -1.41543 -1.41951 +# Final valid prob (xent) -1.62316 -1.63021 + +set -e -o pipefail +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +ivector_transform_type=pca +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1d #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn7 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn7 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/nnet3/run_ivector_common.sh b/egs/ami/s5b/local/nnet3/run_ivector_common.sh index bccbb42494c..860009c5ef5 100755 --- a/egs/ami/s5b/local/nnet3/run_ivector_common.sh +++ b/egs/ami/s5b/local/nnet3/run_ivector_common.sh @@ -17,8 +17,8 @@ train_set=train # you might set this to e.g. train_cleaned. gmm=tri3 # This specifies a GMM-dir from the features of the type you're training the system on; # it should contain alignments for 'train_set'. - num_threads_ubm=32 +ivector_transform_type=lda nnet3_affix=_cleaned # affix for exp/$mic/nnet3 directory to put iVector stuff in, so it # becomes exp/$mic/nnet3_cleaned or whatever. @@ -30,7 +30,7 @@ nnet3_affix=_cleaned # affix for exp/$mic/nnet3 directory to put iVector stu gmmdir=exp/${mic}/${gmm} -for f in data/${mic}/${train_set}/feats.scp ${gmmdir}/final.mdl; do +for f in data/${mic}/${train_set}/feats.scp ; do if [ ! -f $f ]; then echo "$0: expected file $f to exist" exit 1 @@ -110,20 +110,36 @@ if [ $stage -le 4 ]; then echo "$0: warning: number of feats $n1 != $n2, if these are very different it could be bad." fi - echo "$0: training a system on the hires data for its LDA+MLLT transform, in order to produce the diagonal GMM." - if [ -e exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl ]; then - # we don't want to overwrite old stuff, ask the user to delete it. - echo "$0: exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl already exists: " - echo " ... please delete and then rerun, or use a later --stage option." - exit 1; - fi - steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 7 --mllt-iters "2 4 6" \ - --splice-opts "--left-context=3 --right-context=3" \ - 3000 10000 $temp_data_root/${train_set}_hires data/lang \ - $gmmdir exp/$mic/nnet3${nnet3_affix}/tri5 + case $ivector_transform_type in + lda) + if [ ! -f ${gmmdir}/final.mdl ]; then + echo "$0: expected file ${gmmdir}/final.mdl to exist" + exit 1; + fi + echo "$0: training a system on the hires data for its LDA+MLLT transform, in order to produce the diagonal GMM." + if [ -e exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl ]; then + # we don't want to overwrite old stuff, ask the user to delete it. + echo "$0: exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl already exists: " + echo " ... please delete and then rerun, or use a later --stage option." + exit 1; + fi + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 7 --mllt-iters "2 4 6" \ + --splice-opts "--left-context=3 --right-context=3" \ + 3000 10000 $temp_data_root/${train_set}_hires data/lang \ + $gmmdir exp/$mic/nnet3${nnet3_affix}/tri5 + ;; + pca) + echo "$0: computing a PCA transform from the hires data." + steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \ + --splice-opts "--left-context=3 --right-context=3" \ + --max-utts 10000 --subsample 2 \ + $temp_data_root/${train_set}_hires \ + exp/$mic/nnet3${nnet3_affix}/tri5 + ;; + *) echo "$0: invalid iVector transform type $ivector_transform_type" && exit 1; + esac fi - if [ $stage -le 5 ]; then echo "$0: computing a subset of data to train the diagonal UBM." diff --git a/egs/wsj/s5/steps/online/nnet2/get_pca_transform.sh b/egs/wsj/s5/steps/online/nnet2/get_pca_transform.sh new file mode 100755 index 00000000000..e0b704f8852 --- /dev/null +++ b/egs/wsj/s5/steps/online/nnet2/get_pca_transform.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +# Copyright 2016 David Snyder +# +# This script computes a PCA transform on top of spliced features processed with +# apply-cmvn-online. +# +# +# Apache 2.0. + +# Begin configuration. +cmd=run.pl +config= +stage=0 +dim=40 # The dim after applying PCA +normalize_variance=true # If the PCA transform normalizes the variance +normalize_mean=true # If the PCA transform centers +splice_opts= +online_cmvn_opts= +max_utts=5000 # maximum number of files to use +subsample=5 # subsample features with this periodicity + +echo "$0 $@" # Print the command line for logging + +[ -f path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +if [ $# != 2 ]; then + echo "Usage: steps/nnet2/get_pca_transform.sh [options] " + echo " e.g.: steps/train_pca_transform.sh data/train_si84 exp/tri2b" + echo "Main options (for others, see top of script file)" + echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." + echo " --config # config containing options" + echo " --stage # stage to do partial re-run from." + exit 1; +fi + +data=$1 +dir=$2 + +for f in $data/feats.scp ; do + [ ! -f "$f" ] && echo "$0: expecting file $f to exist" && exit 1 +done + +mkdir -p $dir/log + +echo "$splice_opts" >$dir/splice_opts # keep track of frame-splicing options + # so that later stages of system building can know what they were. +echo $online_cmvn_opts > $dir/online_cmvn.conf # keep track of options to CMVN. + +# create global_cmvn.stats +if ! matrix-sum --binary=false scp:$data/cmvn.scp - >$dir/global_cmvn.stats 2>/dev/null; then + echo "$0: Error summing cmvn stats" + exit 1 +fi + +feats="ark,s,cs:utils/subset_scp.pl --quiet $max_utts $data/feats.scp | apply-cmvn-online $online_cmvn_opts $dir/global_cmvn.stats scp:- ark:- | splice-feats $splice_opts ark:- ark:- | subsample-feats --n=$subsample ark:- ark:- |" + +if [ $stage -le 0 ]; then + $cmd $dir/log/pca_est.log \ + est-pca --dim=$dim --normalize-variance=$normalize_variance \ + --normalize-mean=$normalize_mean "$feats" $dir/final.mat || exit 1; +fi + +echo "Done estimating PCA transform in $dir" + +exit 0 diff --git a/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh b/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh index 22250ae9ee3..80a023fed8a 100755 --- a/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh +++ b/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh @@ -10,15 +10,15 @@ # This script was modified from ../../sre08/v1/sid/train_diag_ubm.sh. It trains # a diagonal UBM on top of features processed with apply-cmvn-online and then -# transformed with an LDA+MLLT matrix (obtained from the source directory). -# This script does not use the trained model from the source directory to -# initialize the diagonal GMM; instead, we initialize the GMM using +# transformed with an LDA+MLLT or PCA matrix (obtained from the source +# directory). This script does not use the trained model from the source +# directory to initialize the diagonal GMM; instead, we initialize the GMM using # gmm-global-init-from-feats, which sets the means to random data points and # then does some iterations of E-M in memory. After the in-memory -# initialization we train for a few iterations in parallel. -# Note that there is a slight mismatch in that the source LDA+MLLT matrix -# (final.mat) will have been estimated using standard CMVN, and we're using -# online CMVN. We don't think this will have much effect. +# initialization we train for a few iterations in parallel. Note that if an +# LDA+MLLT transform matrix is used, there will be a slight mismatch in that the +# source LDA+MLLT matrix (final.mat) will have been estimated using standard +# CMVN, and we're using online CMVN. We don't think this will have much effect. # Begin configuration section. @@ -58,7 +58,7 @@ if [ $# != 4 ]; then echo " --stage # stage to do partial re-run from." echo " --num-gselect # Number of Gaussians per frame to" echo " # limit computation to, for speed" - echo " --subsample # In main E-M phase, use every n" + echo " --subsample # In main E-M phase, use every n" echo " # frames (a speedup)" echo " --num-frames # Maximum num-frames to keep in memory" echo " # for model initialization" @@ -89,6 +89,15 @@ for f in $data/feats.scp "$online_cmvn_config" $srcdir/splice_opts $srcdir/final [ ! -f "$f" ] && echo "$0: expecting file $f to exist" && exit 1 done +if [ -d "$dir" ]; then + bak_dir=$(mktemp -d ${dir}/backup.XXX); + echo "$0: Directory $dir already exists. Backing up diagonal UBM in ${bak_dir}"; + for f in $dir/final.mat $dir/final.dubm $dir/online_cmvn.conf $dir/global_cmvn.stats; do + [ -f "$f" ] && mv $f ${bak_dir}/ + done + [ -d "$dir/log" ] && mv $dir/log ${bak_dir}/ +fi + splice_opts=$(cat $srcdir/splice_opts) cp $srcdir/splice_opts $dir/ || exit 1; cp $srcdir/final.mat $dir/ || exit 1; @@ -146,10 +155,16 @@ for x in `seq 0 $[$num_iters-1]`; do $cmd $dir/log/update.$x.log \ gmm-global-est $opt --min-gaussian-weight=$min_gaussian_weight $dir/$x.dubm "gmm-global-sum-accs - $dir/$x.*.acc|" \ $dir/$[$x+1].dubm || exit 1; - rm $dir/$x.*.acc $dir/$x.dubm + + if $cleanup; then + rm $dir/$x.*.acc $dir/$x.dubm + fi fi done -rm $dir/gselect.*.gz +if $cleanup; then + rm $dir/gselect.*.gz +fi + mv $dir/$num_iters.dubm $dir/final.dubm || exit 1; exit 0; diff --git a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh index 67845b01c8a..5dbda1780f4 100755 --- a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh +++ b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh @@ -21,7 +21,7 @@ # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -32,8 +32,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -88,6 +88,17 @@ for f in $srcdir/final.dubm $srcdir/final.mat $srcdir/global_cmvn.stats $srcdir/ [ ! -f $f ] && echo "No such file $f" && exit 1; done + +if [ -d "$dir" ]; then + bak_dir=$(mktemp -d ${dir}/backup.XXX); + echo "$0: Directory $dir already exists. Backing up iVector extractor in ${bak_dir}"; + for f in $dir/final.ie $dir/*.ie $dir/final.mat $dir/final.dubm \ + $dir/online_cmvn.conf $dir/global_cmvn.stats; do + [ -f "$f" ] && mv $f ${bak_dir}/ + done + [ -d "$dir/log" ] && mv $dir/log ${bak_dir}/ +fi + # Set various variables. mkdir -p $dir/log nj_full=$[$nj*$num_processes] @@ -105,7 +116,6 @@ gmm_feats="ark,s,cs:apply-cmvn-online --config=$dir/online_cmvn.conf $dir/global feats="ark,s,cs:splice-feats $splice_opts scp:$sdata/JOB/feats.scp ark:- | transform-feats $dir/final.mat ark:- ark:- | subsample-feats --n=$subsample ark:- ark:- |" - # Initialize the i-vector extractor using the input GMM, which is converted to # full because that's what the i-vector extractor expects. Note: we have to do # --use-weights=false to disable regression of the log weights on the ivector, @@ -115,7 +125,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=false \ "gmm-global-to-fgmm $dir/final.dubm -|" $dir/0.ie || exit 1 -fi +fi # Do Gaussian selection and posterior extracion @@ -168,20 +178,23 @@ while [ $x -lt $num_iters ]; do # each accumulation process uses, since we # can be sure the queue will support this many. # - # The parallel-opts was either specified by + # The parallel-opts was either specified by # the user or we computed it correctly in # tge previous stages $cmd --num-threads $[$num_threads*$num_processes] $dir/log/update.$x.log \ ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; rm $dir/acc.$x.* if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie + rm $dir/acc.$x $dir/$x.ie fi fi x=$[$x+1] done +if $cleanup; then + rm $dir/post.*.gz +fi + rm $dir/final.ie 2>/dev/null ln -s $x.ie $dir/final.ie From 60f9327210ac4052151597d46ae9d4ed7c2395c0 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 2 Apr 2017 15:25:20 -0400 Subject: [PATCH 494/530] [build,src,doc] Modify get_version.sh to deal better with whitespace (avoid space in version); minor fixes (#1526) --- src/base/get_version.sh | 10 +-- src/chain/chain-training.cc | 4 +- src/doc/transform.dox | 172 ++++++++++++++++++------------------ 3 files changed, 93 insertions(+), 93 deletions(-) diff --git a/src/base/get_version.sh b/src/base/get_version.sh index 4829391ac44..d6c6c975a4d 100755 --- a/src/base/get_version.sh +++ b/src/base/get_version.sh @@ -54,20 +54,20 @@ elif [ "$(git rev-parse --is-inside-work-tree 2>/dev/null)" != true ]; then echo "$0: Using the version number \"$version\" specified in src/.version." else # Figure out patch number. - version_commit=$(git log -1 --pretty=oneline ../.version | cut -f 1 -d ' ') - patch_number=$(git rev-list ${version_commit}..HEAD | wc -l) + version_commit=$(git log -1 --pretty=oneline ../.version | awk '{print $1}') + patch_number=$(git rev-list ${version_commit}..HEAD | wc -l | awk '{print $1}') version="$version.$patch_number" # Check for uncommitted changes in src/. - uncommitted_changes=$(git diff-index HEAD -- .. | wc -l) + uncommitted_changes=$(git diff-index HEAD -- .. | wc -l | awk '{print $1}') if [ $uncommitted_changes -gt 0 ]; then # Add suffix ~N if there are N files in src/ with uncommitted changes version="$version~$uncommitted_changes" fi # Figure out HEAD commit SHA-1. - head_commit=$(git log -1 --pretty=oneline | cut -f 1 -d ' ') - head_commit_short=$(git log -1 --oneline --abbrev=4 | cut -f 1 -d ' ') + head_commit=$(git log -1 --pretty=oneline | awk '{print $1}') + head_commit_short=$(git log -1 --oneline --abbrev=4 | awk '{print $1}') version="$version-${head_commit_short}" fi diff --git a/src/chain/chain-training.cc b/src/chain/chain-training.cc index 1bf0201fbfa..53de69a0e07 100644 --- a/src/chain/chain-training.cc +++ b/src/chain/chain-training.cc @@ -30,7 +30,7 @@ void ComputeChainObjfAndDeriv(const ChainTrainingOptions &opts, const Supervision &supervision, const CuMatrixBase &nnet_output, BaseFloat *objf, - BaseFloat *l2_term, + BaseFloat *l2_term, BaseFloat *weight, CuMatrixBase *nnet_output_deriv, CuMatrixBase *xent_output_deriv) { @@ -86,7 +86,7 @@ void ComputeChainObjfAndDeriv(const ChainTrainingOptions &opts, // for different frames of the sequences. As expected, they are // smaller towards the edges of the sequences (due to the penalization // of 'incorrect' pdf-ids. - if (GetVerboseLevel() >= 1) { + if (GetVerboseLevel() >= 1 && nnet_output_deriv != NULL) { int32 tot_frames = nnet_output_deriv->NumRows(), frames_per_sequence = supervision.frames_per_sequence, num_sequences = supervision.num_sequences; diff --git a/src/doc/transform.dox b/src/doc/transform.dox index 6d487722124..dfeaf6f66d5 100644 --- a/src/doc/transform.dox +++ b/src/doc/transform.dox @@ -31,7 +31,7 @@ namespace kaldi { relate to the commonalities: - \ref transform_apply - \ref transform_perspk - - \ref transform_utt2spk + - \ref transform_utt2spk - \ref transform_compose - \ref transform_weight @@ -49,8 +49,8 @@ namespace kaldi { We next discuss regression class trees and transforms that use them: - \ref transform_regtree - - + + \section transform_apply Applying global linear or affine feature transforms In the case of feature-space transforms and projections that are global, @@ -59,22 +59,22 @@ namespace kaldi { projection is represented as a matrix by which we will left-multiply a feature vector, so the transformed feature is \f$ A x \f$. An affine transform or projection is represented the same way, but we imagine a 1 has been appended to the - feature vector, so the transformed feature is + feature vector, so the transformed feature is \f$ W \left[ \begin{array}{c} x \\ 1 \end{array} \right] \f$ where \f$ W = \left[ A ; b \right] \f$, with A and b being the linear transform and the constant offset. Note that this convention differs from some of the literature, where the 1 may appear as - the first dimension rather than the last. + the first dimension rather than the last. Global transforms and projections are generally written as a type Matrix to a single file, and speaker or utterance-specific transforms or projections are stored in a table of such matrices (see \ref io_sec_tables) - indexed by speaker-id or utterance-id. + indexed by speaker-id or utterance-id. Transforms may be applied to features using the program transform-feats. Its syntax is \verbatim transform-feats -\endverbatim +\endverbatim where is an rspecifier, is an wspecifier, and may be an rxfilename or an rspecifier (see \ref io_sec_specifiers and \ref io_sec_xfilename). The program will work out whether the transform @@ -83,14 +83,14 @@ namespace kaldi { This program is typically used as part of a pipe. A typical example is: \verbatim - feats="ark:splice-feats scp:data/train.scp ark:- | + feats="ark:splice-feats scp:data/train.scp ark:- | transform-feats $dir/0.mat ark:- ark:-|" some-program some-args "$feats" some-other-args ... \endverbatim Here, the file 0.mat contains a single matrix. An example of applying speaker-specific transforms is: \verbatim - feats="ark:add-deltas scp:data/train.scp ark:- | + feats="ark:add-deltas scp:data/train.scp ark:- | transform-feats --utt2spk=ark:data/train.utt2spk ark:$dir/0.trans ark:- ark:-|" some-program some-args "$feats" some-other-args ... \endverbatim @@ -98,33 +98,33 @@ A per-utterance example would be as above but removing the --utt2spk option. In this example, the archive file 0.trans would contain transforms (e.g. CMLLR transforms) indexed by speaker-id, and the file data/train.utt2spk would have lines of the form "utt-id spk-id" (see next section for more explanation). -The program transform-feats does not care how the transformation matrix was +The program transform-feats does not care how the transformation matrix was estimated, it just applies it to the features. After it has been through all the features it prints out the average per-frame log determinant. This can be useful when comparing objective functions (this log determinant would have to be added to the per-frame likelihood printed out by programs like gmm-align, gmm-acc-stats, or gmm-decode-kaldi). If the linear part A of the transformation (i.e. ignoring the offset term) is not square, -then the program will instead print out the per-frame average of +then the program will instead print out the per-frame average of \f$ \frac{1}{2} \mathbf{logdet} (A A^T) \f$. It refers to this as the pseudo-log-determinant. -This is useful in checking convergence of MLLT estimation where the transformation matrix +This is useful in checking convergence of MLLT estimation where the transformation matrix being applied is the MLLT matrix times an LDA matrix. \section transform_perspk Speaker-independent versus per-speaker versus per-utterance adaptation Programs that estimate transforms are generally set up to do a particular kind of adaptation, i.e. speaker-independent versus (speaker- or utterance-specific). For example, LDA -and MLLT/STC transforms are speaker-independent but fMLLR transforms are speaker- or +and MLLT/STC transforms are speaker-independent but fMLLR transforms are speaker- or utterance-specific. Programs that estimate speaker- or utterance-specific transforms will work in per-utterance mode by default, but in per-speaker mode if the --spk2utt -option is supplied (see below). +option is supplied (see below). One program that can accept either speaker-independent or speaker- or utterance-specific transforms is transform-feats. This program detects whether the first argument (the transform) is an rxfilename (see \ref io_sec_xfilename) or an rspecifier (see \ref io_sec_specifiers). If the former, it treats it as a speaker-independent transform (e.g. a file containing a single matrix). -If the latter, there are two choices. If no --utt2spk option is provided, +If the latter, there are two choices. If no --utt2spk option is provided, it treats the transform as a table of matrices indexed by utterance id. If an --utt2spk option is provided (utt2spk is a table of strings indexed by utterance that contains the string-valued speaker id), then the transforms are assumed to be indexed by speaker id, and the table @@ -133,13 +133,13 @@ provided to the --utt2spk option is used to map each utterance to a speaker id. \section transform_utt2spk Utterance-to-speaker and speaker-to-utterance maps At this point we give a general overview of the --utt2spk and --spk2utt options. - These options are accepted by programs that deal with transformations; they are used when + These options are accepted by programs that deal with transformations; they are used when you are doing per-speaker (as opposed to per-utterance) adaptation. Typically programs that process already-created transforms will need the --utt2spk - option and programs that create the transforms will need the --spk2utt option. + option and programs that create the transforms will need the --spk2utt option. A typical case is that there will be a file called some-directory/utt2spk that looks like: -\verbatim +\verbatim spk1utt1 spk1 spk1utt2 spk1 spk2utt1 spk2 @@ -148,11 +148,11 @@ spk2utt2 spk2 \endverbatim where these strings are just examples, they stand for generic speaker and utterance identifiers; and there will be a file called some-directory/spk2utt that looks like: -\verbatim +\verbatim spk1 spk1utt1 spk1utt2 spk2 spk2utt1 spk2utt2 ... -\endverbatim +\endverbatim and you will supply options that look like --utt2spk=ark:some-directory/utt2spk or --spk2utt=ark:some-directory/spk2utt. The 'ark:' prefix is necessary because these files are given as rspecifiers by the Table code, and are interpreted as archives @@ -177,7 +177,7 @@ spk2 spk2utt1 spk2utt2 for more discussion of this issue. \section transform_compose Composing transforms - + Another program that accepts generic transforms is the program compose-transforms. The general syntax is "compose-transforms a b c", and it performs the multiplication c = a b (although this involves a little more than matrix multiplication if a is affine). @@ -197,7 +197,7 @@ spk2 spk2utt1 spk2utt2 feats="ark:splice-feats scp:data/train.scp ark:- | transform-feats 0.mat ark:- ark:- | transform-feats ark:1.trans ark:- ark:- |" - ... + ... \endverbatim In general, the transforms a and b that are the inputs to compose-transforms may be either speaker-independent transforms or speaker- or utterance-specific @@ -208,11 +208,11 @@ spk2 spk2utt1 spk2utt2 represent either tables or normal files (i.e. either {r,w}specifiers or {r,w}xfilenames), subject to consistency requirements. - If a is an affine transform, in order to perform the composition correctly, compose-transforms + If a is an affine transform, in order to perform the composition correctly, compose-transforms needs to know whether b is affine or linear (it does not know this because it does not have access to the dimension of the features that are transformed by b). This is controlled by the option --b-is-affine (bool, default false). - If b is affine but you forget to set this option and a is affine, compose-transforms + If b is affine but you forget to set this option and a is affine, compose-transforms will treat b as a linear transform from dimension (the real input feature dimension) plus one, and will output a transform whose input dimension is (the real input feature dimension) plus two. There is no way for "transform-feats" to interpret this when it is to be applied to features, @@ -225,7 +225,7 @@ Eliminating silence frames can be helpful when estimating speaker adaptive transforms such as CMLLR. This even appears to be true when using a multi-class approach with a regression tree (for which, see \ref transform_regtree). The way we implement this is by weighting down the posteriors associated with -silence phones. This takes place as a modification to the \ref hmm_post +silence phones. This takes place as a modification to the \ref hmm_post "state-level posteriors". An extract of a bash shell script that does this is below (this script is discussed in more detail in \ref transform_cmllr_global): \verbatim @@ -249,7 +249,7 @@ class LdaEstimate { void Accumulate(const VectorBase &data, int32 class_id, BaseFloat weight=1.0); }; -\endverbatim +\endverbatim The program acc-lda accumulates LDA statistics using the acoustic states (i.e. pdf-ids) as the classes. It requires the transition model in order to map the alignments (expressed in terms of transition-ids) to pdf-ids. However, it is not limited to a particular type of acoustic model. @@ -262,16 +262,16 @@ when using LDA as an initialization for HLDA. \section transform_splice Frame splicing -Frame splicing (e.g. splicing nine consecutive frames together) is typically done +Frame splicing (e.g. splicing nine consecutive frames together) is typically done to the raw MFCC features prior to LDA. The program splice-feats does this. A typical line from a script that uses this is the following: \verbatim feats="ark:splice-feats scp:data/train.scp ark:- | transform-feats $dir/0.mat ark:- ark:-|" \endverbatim -and the "feats" variable would later be used as an rspecifier (c.f. \ref io_sec_specifiers) +and the "feats" variable would later be used as an rspecifier (c.f. \ref io_sec_specifiers) by some program that needs to read features. In this example we don't specify the number of frames to splice -together because we are using the defaults (--left-context=4, --right-context=4, or +together because we are using the defaults (--left-context=4, --right-context=4, or 9 frames in total). \section transform_delta Delta feature computation @@ -279,7 +279,7 @@ together because we are using the defaults (--left-context=4, --right-context=4, Computation of delta features is done by the program add-deltas, which uses the function ComputeDeltas. The delta feature computation has the same default setup as HTK's, i.e. to compute the first delta feature we multiply by the features -by a sliding window of values [ -2, 1, 0, 1, 2 ], and then normalize by +by a sliding window of values [ -2, -1, 0, 1, 2 ], and then normalize by dividing by (2^2 + 1^2 + 0^2 + 1^2 + 2^2 = 10). The second delta feature is computed by applying the same approach to the first delta feature. The number of frames of context on each side is controlled by --delta-window (default: 2) @@ -311,9 +311,9 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" case they need to be defined slightly differently for the accepted and rejected dimensions. Suppose the original feature dimension is D and the - reduced feature dimension is K. + reduced feature dimension is K. Let us forget the iteration superscript r, and use subscript j for state and - m for Gaussian mixture. + m for Gaussian mixture. For accepted dimensions (\f$0 \leq i < K\f$), the statistics are: \f[ \mathbf{G}^{(i)} = \sum_{t,j,m} \gamma_{jm}(t) \frac{1}{ \sigma^2_{jm}(i) } (\mu_{jm} - \mathbf{x}(t)) (\mu_{jm} - \mathbf{x}(t))^T @@ -333,13 +333,13 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" same, so in the code we only store statistics for K+1 rather than D dimensions. Also, it is convenient for the program that accumulates the statistics to only have - access to the K-dimensional model, so during HLDA accumulation we accumulate + access to the K-dimensional model, so during HLDA accumulation we accumulate statistics sufficient to estimate the K-dimensional means \f$\mu_{jm}\f$, and insead of - G we accumulate the following statistics: for accepted dimensions (\f$0 \leq i < K\f$), + G we accumulate the following statistics: for accepted dimensions (\f$0 \leq i < K\f$), \f[ \mathbf{S}^{(i)} = \sum_{t,j,m} \gamma_{jm}(t) \frac{1}{ \sigma^2_{jm}(i) } \mathbf{x}(t) \mathbf{x}(t)^T \f] - and for rejected dimensions \f$K \leq i < D\f$ + and for rejected dimensions \f$K \leq i < D\f$ \f[ \mathbf{S}^{(i)} = \sum_{t,j,m} \gamma_{jm}(t) \mathbf{x}(t) \mathbf{x}(t)^T , \f] @@ -350,13 +350,13 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" \f] and for \f$K \leq i < D\f$, \f[ - \mathbf{G}^{(i)} = \mathbf{S}^{(i)} - \beta \mu \mu^T, + \mathbf{G}^{(i)} = \mathbf{S}^{(i)} - \beta \mu \mu^T, \f] where \f$ \beta = \sum_{j,m} \gamma_{jm} \f$ is the total count and \f$\mu = \frac{1}{\beta} \sum_{j,m} \mu_{j,m}\f$ is the global feature mean. After computing the transform from the G statistics using the same computation as MLLT, we output the transform, and we also use the first K rows of the transform to project the means into dimension K and write out the transformed model. - + The computation described here is fairly slow; it is \f$ O(K^3) \f$ on each frame, and K is fairly large (e.g. 117). This is the price we pay for compact statistics; if we stored full mean and variance statistics, the per-frame computation would be \f$O(K^2)\f$. @@ -366,14 +366,14 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" the frames. If this option is activated, we need to store two separate versions of the sufficient statistics for the means. One version of the mean statistics, accumulated on the subset, is only used in the HLDA computation, and - corresponds to the quantities \f$\gamma_{jm}\f$ and \f$\mu_{jm}\f$ in the formulas above. + corresponds to the quantities \f$\gamma_{jm}\f$ and \f$\mu_{jm}\f$ in the formulas above. The other version of the mean statistics is accumulated on all the training data - and is used to write out the transformed model. - + and is used to write out the transformed model. + The overall HLDA estimation process is as follows (see rm_recipe_2/scripts/train_tri2j.sh): - First initialize it with LDA (we store both the reduced dimension matrix and the full matrix). - - Start model-building and training process. On certain (non-consecutive) + - Start model-building and training process. On certain (non-consecutive) iterations where we have decided to do the HLDA update, do the following: - Accumulate HLDA statistics (S, plus statistics for the full-dimensional means). The program that accumulates these (gmm-acc-hlda) needs the model, the un-transformed features, @@ -384,14 +384,14 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" transformation matrix which it needs to start the optimization and to correctly report auxiliary function changes. It outputs the new transform (both full and reduced dimension), and the model with newly estimated and transformed means. - + \section transform_mllt Global Semi-tied Covariance (STC) / Maximum Likelihood Linear Transform (MLLT) estimation Global STC/MLLT is a square feature-transformation matrix. For more details, - see "Semi-tied Covariance Matrices for Hidden Markov Models", by Mark Gales, + see "Semi-tied Covariance Matrices for Hidden Markov Models", by Mark Gales, IEEE Transactions on Speech and Audio Processing, vol. 7, 1999, pages 272-281. Viewing it as a feature-space transform, the objective function is the average - per-frame log-likelihood of the transformed features given the model, plus the + per-frame log-likelihood of the transformed features given the model, plus the log determinant of the transform. The means of the model are also rotated by transform in the update phase. The sufficient statistics are the following, for \f$ 0 \leq i < D \f$ where D is the feature dimension: @@ -399,9 +399,9 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" \mathbf{G}^{(i)} = \sum_{t,j,m} \gamma_{jm}(t) \frac{1}{ \sigma^2_{jm}(i) } (\mu_{jm} - \mathbf{x}(t)) (\mu_{jm} - \mathbf{x}(t))^T \f] See the reference, Equations (22) and (23) for the update equations. These are - basically a simplified form of the diagonal row-by-row Constrained MLLR/fMLLR update + basically a simplified form of the diagonal row-by-row Constrained MLLR/fMLLR update equations, where the first-order term of the quadratic equation disappears. Note that - our implementation differs from that reference by using a column of the inverse of the matrix + our implementation differs from that reference by using a column of the inverse of the matrix rather than the cofactor, since multiplying by the determinant does not make a difference to the result and could potentially cause problems with floating-point underflow or overflow. @@ -411,9 +411,9 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" - Estimate the LDA transformation matrix (we only need the first rows of this, not the full matrix). Call this matrix \f$\mathbf{M}\f$. - - Start a normal model building process, always using features transformed with \f$\mathbf{M}\f$. + - Start a normal model building process, always using features transformed with \f$\mathbf{M}\f$. At certain selected iterations (where we will update the MLLT matrix), we do the following: - - Accumulate MLLT statistics in the current fully-transformed space + - Accumulate MLLT statistics in the current fully-transformed space (i.e., on top of features transformed with \f$\mathbf{M}\f$). For efficiency we do this using a subset of the training data. - Do the MLLT update; let this produce a square matrix \f$\mathbf{T}\f$. @@ -423,34 +423,34 @@ feats="ark:add-deltas --print-args=false scp:data/train.scp ark:- |" The programs involved in MLLT estimation are gmm-acc-mllt and est-mllt. We also need the programs gmm-transform-means (to transform the Gaussian means using \f$\mathbf{T}\f$), and compose-transforms (to do the multiplication \f$\mathbf{M} \leftarrow \mathbf{T} \mathbf{M} \f$). - + \section transform_cmllr_global Global CMLLR/fMLLR transforms Constrained Maximum Likelihood Linear Regression (CMLLR), also known as feature-space MLLR (fMLLR), is an affine feature transform of the form \f$ \mathbf{x} \rightarrow \mathbf{A} \mathbf{x} + \mathbf{b} \f$, - which we write in the form \f$ \mathbf{x} \rightarrow \mathbf{W} \mathbf{x}^+ \f$, where + which we write in the form \f$ \mathbf{x} \rightarrow \mathbf{W} \mathbf{x}^+ \f$, where \f$\mathbf{x}^+ = \left[\begin{array}{c} \mathbf{x} \\ 1 \end{array} \right]\f$ is the feature with - a 1 appended. Note that this differs from some of the literature where the 1 comes first. + a 1 appended. Note that this differs from some of the literature where the 1 comes first. For a review paper that explains CMLLR and the estimation techniques we use, see "Maximum likelihood linear transformations for HMM-based speech recognition" by Mark Gales, - Computer Speech and Language Vol. 12, pages 75-98. + Computer Speech and Language Vol. 12, pages 75-98. The sufficient statistics we store are: \f[ \mathbf{K} = \sum_{t,j,m} \gamma_{j,m}(t) \Sigma_{jm}^{-1} \mu_{jm} \mathbf{x}(t)^+ \f] where \f$\Sigma_{jm}^{-1}\f$ is the inverse covariance matrix, and for \f$0 \leq i < D \f$ where D is the feature dimension, - \f[ \mathbf{G}^{(i)} = \sum_{t,j,m} \gamma_{j,m}(t) \frac{1}{\sigma^2_{j,m}(i)} \mathbf{x}(t)^+ \left.\mathbf{x}(t)^+\right.^T \f] + \f[ \mathbf{G}^{(i)} = \sum_{t,j,m} \gamma_{j,m}(t) \frac{1}{\sigma^2_{j,m}(i)} \mathbf{x}(t)^+ \left.\mathbf{x}(t)^+\right.^T \f] Our estimation scheme is the standard one, see Appendix B of the reference (in particular section B.1, "Direct method over rows"). We differ by using a column of the inverse in place of the cofactor row, i.e. ignoring the factor of the determinant, as it does not affect the result and causes danger of numerical underflow or overflow. - Estimation of global Constrained MLLR (CMLLR) transforms is done by the + Estimation of global Constrained MLLR (CMLLR) transforms is done by the class FmllrDiagGmmAccs, - and by the program gmm-est-fmllr (also see gmm-est-fmllr-gpost). The syntax + and by the program gmm-est-fmllr (also see gmm-est-fmllr-gpost). The syntax of gmm-est-fmllr is: \verbatim gmm-est-fmllr [options] \ @@ -486,27 +486,27 @@ feats="ark:add-deltas --print-args=false scp:data/test.scp ark:- | gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \ "$feats" ark,t:$dir/test.tra ark,t:$dir/test.ali 2>$dir/decode.log -\endverbatim +\endverbatim \section transform_lvtln Linear VTLN (LVTLN) In recent years, there have been a number of papers that describe implementations of Vocal Tract Length Normalization (VTLN) that - work out a linear feature transform corresponding to each VTLN + work out a linear feature transform corresponding to each VTLN warp factor. See, for example, ``Using VTLN for broadcast news transcription'', by D. Y. Kim, S. Umesh, M. J. F. Gales, T. Hain and P. C. Woodland, ICSLP 2004. - + We implement a method in this general category using the class LinearVtln, and programs such as gmm-init-lvtln, gmm-train-lvtln-special, and gmm-est-lvtln-trans. The LinearVtln object essentially stores a set of linear feature transforms, one for each warp factor. Let these linear feature transform matrices be \f[\mathbf{A}^{(i)}, 0\leq i < N, \f] - where for instance we might have \f$N\f$=31, corresponding to 31 different warp - factors. We will describe below how we obtain these matrices below. + where for instance we might have \f$N\f$=31, corresponding to 31 different warp + factors. We will describe below how we obtain these matrices below. The way the speaker-specific transform is estimated is as follows. First, we require some kind of model and a corresponding alignment. In the - example scripts we do this either with a small monophone model, or with + example scripts we do this either with a small monophone model, or with a full triphone model. From this model and alignment, and using the original, unwarped features, we compute the conventional statistics for estimating CMLLR. When computing the LVTLN transform, what we do is take each matrix @@ -514,33 +514,33 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ maximizes the CMLLR auxiliary function for the transform \f$\mathbf{W} = \left[ \mathbf{A}^{(i)} \, ; \, \mathbf{b} \right]\f$. This value of \f$\mathbf{W}\f$ that gave the best auxiliary function value - (i.e. maximizing over i) becomes the transform for that speaker. Since we + (i.e. maximizing over i) becomes the transform for that speaker. Since we are estimating a mean offset here, we are essentially combining a kind of model-based cepstral mean normalization (or alternatively an offset-only form of CMLLR) with VTLN warping implemented - as a linear transform. This avoids us having to implement mean normalization + as a linear transform. This avoids us having to implement mean normalization as a separate step. We next describe how we estimate the matrices \f$\mathbf{A}^{(i)}\f$. We don't do this in the same way as described in the referenced paper; our method is simpler (and easier to justify). Here we describe our computation for a particular warp factor; in the current scripts we have 31 distinct warp - factors ranging from 0.85, 0.86, ..., 1.15. + factors ranging from 0.85, 0.86, ..., 1.15. We take a subset of feature data (e.g. several tens of utterances), and for this subset we compute both the original and transformed features, where the transformed features are computed using a conventional VLTN computation - (see \ref feat_vtln). - Call the original and transformed features \f$\mathbf{x}(t)\f$ and \f$\mathbf{y}(t)\f$ respectively, + (see \ref feat_vtln). + Call the original and transformed features \f$\mathbf{x}(t)\f$ and \f$\mathbf{y}(t)\f$ respectively, where \f$t\f$ will range over the frames of the selected utterances. We compute the affine transform that maps \f$\mathbf{x}\f$ to \f$\mathbf{y}\f$ in a least-squares - sense, i.e. if \f$\mathbf{y}' = \mathbf{A} \mathbf{x} + \mathbf{b}\f$, + sense, i.e. if \f$\mathbf{y}' = \mathbf{A} \mathbf{x} + \mathbf{b}\f$, we compute \f$\mathbf{A}\f$ and \f$\mathbf{b}\f$ that minimizes the sum-of-squares difference \f$\sum_t (\mathbf{y}'(t) - \mathbf{y}(t) )^T (\mathbf{y}'(t) - \mathbf{y}(t) )\f$. Then we normalize the diagonal variance as follows: we compute the variance of the original features as \f$\mathbf{\Sigma}^{(x)}\f$ and of the linearly transformed features as \f$\mathbf{\Sigma}^{(y')}\f$, and for each dimension index d we multiply the - d'th row of \f$\mathbf{A}\f$ by - \f$\sqrt{ \frac{\mathbf{\Sigma}^{(x)}_{d,d}}{\mathbf{\Sigma}^{(y')}_{d,d}}}\f$. + d'th row of \f$\mathbf{A}\f$ by + \f$\sqrt{ \frac{\mathbf{\Sigma}^{(x)}_{d,d}}{\mathbf{\Sigma}^{(y')}_{d,d}}}\f$. The resulting matrix will become \f$\mathbf{A}^{(i)}\f$ for some value of i. The command-line tools support the option to ignore the log determinant term @@ -579,8 +579,8 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ are speaker-specific; other quantities (i.e. \f$\mathbf{A}\f$ and \f$\mathbf{B}\f$) are global and shared across all speakers. - The most important factor in this equation is the middle one, - with the exponential function in it. + The most important factor in this equation is the middle one, + with the exponential function in it. The factor \f$\mathbf{D}_s\f$ gives us the ability to combine model-based mean and optionally variance normalization (i.e. offset-only or diagonal-only CMLLR) @@ -596,7 +596,7 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ there would be no point to this technique as the other quantities in the equation would add no degrees of freedom. The tools support three kinds of constraints on \f$\mathbf{D}_s\f$: it may be of the form - \f$[ {\mathbf I} \, \;\, {\mathbf 0} ]\f$ (no adaptation), or + \f$[ {\mathbf I} \, \;\, {\mathbf 0} ]\f$ (no adaptation), or \f$[ {\mathbf I} \, \;\, {\mathbf m} ]\f$ (offset only), or \f$[ {\mathrm{diag}}( {\mathbf d} ) \, \;\, {\mathbf m} ]\f$ (diagonal CMLLR); this is controlled by the --normalize-type options to the command-line tools. @@ -613,9 +613,9 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ if we were to warp by a factor f and then a factor g, this should be the same as warping by the combined factor fg. Let l = log(f) and m = log(g). Then we achieve this - property via the identity + property via the identity \f[ \exp( l \mathbf{A} ) \exp( m \mathbf{A}) = \exp( (l+m) \mathbf{A} ) . \f] - + The ET computation for a particular speaker is as follows; this assumes we are given \f$\mathbf{A}\f$ and \f$\mathbf{B}\f$. We accumulate conventional CMLLR sufficient statistics for the speaker. In the update phase we iteratively optimize @@ -636,9 +636,9 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ \f$\mathbf{B}\f$, or the model. - If updating \f$\mathbf{A}\f$, we do this given fixed values of \f$t_s\f$ and \f$\mathbf{D}_s\f$. The update is not guaranteed to - converge, but converges rapidly in practice; it's based on a + converge, but converges rapidly in practice; it's based on a quadratic "weak-sense auxiliary function" - where the quadratic term is obtained using a first-order truncation + where the quadratic term is obtained using a first-order truncation of the Taylor series expansion of the matrix exponential function. After updating \f$\mathbf{A}\f$, we modify \f$\mathbf{B}\f$ in order to renormalize the \f$t_s\f$ to zero; this involves premultiplying @@ -646,11 +646,11 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ value of \f$t_s\f$. - If updating \f$\mathbf{B}\f$, this is also done using fixed values of - \f$t_s\f$ and \f$\mathbf{D}_s\f$, and the update is similar to MLLT + \f$t_s\f$ and \f$\mathbf{D}_s\f$, and the update is similar to MLLT (a.k.a. global STC). For purposes of the accumulation and update, we imagine we are estimating an MLLT matrix just to the left of \f$\mathbf{A}\f$, i.e. some matrix - \f$\mathbf{C} \in \Re^{D\times D}\f$; let us define + \f$\mathbf{C} \in \Re^{D\times D}\f$; let us define \f$\mathbf{C}^+ = \left[ \begin{array}{cc} \mathbf{C} & 0 \\ 0 & 1 \end{array} \right]\f$. The transform will be \f$\mathbf{W}_s = \mathbf{D}_s \mathbf{C}^+ \exp ( t_s \mathbf{A} ) \mathbf{B}\f$. @@ -660,24 +660,24 @@ gmm-decode-faster --beam=30.0 --acoustic-scale=0.08333 \ \f$\exp ( t_s \mathbf{A} ) \mathbf{B}\f$ as a feature-space transform (i.e. as part of the features). After estimating \f$\mathbf{C}\f$, we will use the identity \f[ - \mathbf{C}^+ \exp ( t_s \mathbf{A} ) = \exp ( t_s \mathbf{C}^+ \mathbf{A} \left.\mathbf{C}^+\right.^{-1} ) \mathbf{C}^+ + \mathbf{C}^+ \exp ( t_s \mathbf{A} ) = \exp ( t_s \mathbf{C}^+ \mathbf{A} \left.\mathbf{C}^+\right.^{-1} ) \mathbf{C}^+ \f] so the update becomes: \f[ \mathbf{A} \leftarrow \mathbf{C}^+ \mathbf{A} \left.\mathbf{C}^+\right.^{-1} , \ \ \mathbf{B} \leftarrow \mathbf{C}^+ \mathbf{B} . \f] At this point we need to transform the model means with the matrix - \f$\mathbf{C}\f$. The reader might question how this interacts with the + \f$\mathbf{C}\f$. The reader might question how this interacts with the fact that for estimating \f$\mathbf{C}\f$, we viewed the quantity \f$\mathbf{D}_s\f$ as a model-space transform. If \f$\mathbf{D}_s\f$ only - contains a mean offset, we can still prove that the auxiliary function + contains a mean offset, we can still prove that the auxiliary function would increase, except we would have to change the offsets appropriately (this is not necessary to do explicitly, as we will re-estimate them on - the next iteration anyway). However, if \f$\mathbf{D}_s\f$ has non-unit - diagonal (i.e. is diagonal not offset CMLLR), this re-estimation process - is not guaranteed to improve the likelihood; the tools will print a warning + the next iteration anyway). However, if \f$\mathbf{D}_s\f$ has non-unit + diagonal (i.e. is diagonal not offset CMLLR), this re-estimation process + is not guaranteed to improve the likelihood; the tools will print a warning in this case. In order to avoid encountering this case, our scripts - train in a mode where \f$\mathbf{D}_s\f$ is an offset-only transform; but + train in a mode where \f$\mathbf{D}_s\f$ is an offset-only transform; but in test time we allow \f$\mathbf{D}_s\f$ to be a diagonal CMLLR transform, which seems to give slightly better results than the offset-only case. @@ -704,7 +704,7 @@ expanded features). For very fast operation, it is possible to apply these approaches using a very tiny model with a phone-based language model, and some of our example scripts demonstrate this. There is also the capability in the feature extraction code to subtract the mean on a per-utterance basis (the ---subtract-mean option to compute-mfcc-feats and compute-plp-feats). +--subtract-mean option to compute-mfcc-feats and compute-plp-feats). In order to support per-utterance and per-speaker mean and variance normalization we provide the programs compute-cmvn-stats and apply-cmvn. The program From 3c944010385dc9bd407cf5afd204cda69c6340e3 Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Tue, 4 Apr 2017 19:31:30 +0200 Subject: [PATCH 495/530] [build]: remove openfst check (#1531) It appears there may be no good reason to disallow system-wide OpenFst. --- tools/extras/check_dependencies.sh | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tools/extras/check_dependencies.sh b/tools/extras/check_dependencies.sh index 3c26fd53e82..43579334c89 100755 --- a/tools/extras/check_dependencies.sh +++ b/tools/extras/check_dependencies.sh @@ -181,14 +181,6 @@ if which grep >&/dev/null && pwd | grep -E 'JOB|LMWT' >/dev/null; then status=1; fi -if [ -f /usr/lib64/libfst.so.1 ] || [ -f /usr/local/include/fst.h ] || \ - [ -f /usr/include/fst/fst.h ] || [ -f /usr/local/bin/fstinfo ]; then - echo "*** $0: Kaldi cannot be installed (for now) if you have OpenFst" - echo "*** installed in system space (version mismatches, etc.)" - echo "*** Please try to uninstall it." - status=1 -fi - if ! $printed && [ $status -eq 0 ]; then echo "$0: all OK." fi From d8b34d4b50ec158c1da0174ae6eeb8b3a0fac8a1 Mon Sep 17 00:00:00 2001 From: Shiyin Kang Date: Tue, 4 Apr 2017 13:10:40 -0500 Subject: [PATCH 496/530] [src] cudamatrix: speed up AddColSumMat with transfrom reduce kernel template (#1530) CuVector::AddColSumMat[no-trans], 16 0.0057 0.0172 3.01x CuVector::AddColSumMat[no-trans], 32 0.0242 0.0668 2.76x CuVector::AddColSumMat[no-trans], 64 0.0992 0.2577 2.60x CuVector::AddColSumMat[no-trans], 128 0.3747 0.9280 2.48x CuVector::AddColSumMat[no-trans], 256 1.4711 3.0541 2.08x CuVector::AddColSumMat[no-trans], 512 5.1709 9.4713 1.83x CuVector::AddColSumMat[no-trans], 1024 12.4352 20.4517 1.64x CuVector::AddColSumMat[no-trans], 16 0.0060 0.0175 2.91x CuVector::AddColSumMat[no-trans], 32 0.0240 0.0672 2.80x CuVector::AddColSumMat[no-trans], 64 0.1006 0.2712 2.70x CuVector::AddColSumMat[no-trans], 128 0.3691 0.9097 2.46x CuVector::AddColSumMat[no-trans], 256 1.4530 3.1044 2.14x CuVector::AddColSumMat[no-trans], 512 4.4524 7.5872 1.70x CuVector::AddColSumMat[no-trans], 1024 11.1212 16.1423 1.45x --- src/cudamatrix/cu-kernels-ansi.h | 6 +++++ src/cudamatrix/cu-kernels.cu | 43 +++++++++++++++++++++++++++++++- src/cudamatrix/cu-kernels.h | 10 ++++++++ src/cudamatrix/cu-vector.cc | 24 +++++++++++------- 4 files changed, 73 insertions(+), 10 deletions(-) diff --git a/src/cudamatrix/cu-kernels-ansi.h b/src/cudamatrix/cu-kernels-ansi.h index a69246a339a..444da38dd30 100644 --- a/src/cudamatrix/cu-kernels-ansi.h +++ b/src/cudamatrix/cu-kernels-ansi.h @@ -30,6 +30,12 @@ #if HAVE_CUDA == 1 extern "C" { +void cudaD_add_col_sum_mat(int Gr, int Bl, double* result, const double* mat, + const MatrixDim d, const double alpha, + const double beta); +void cudaF_add_col_sum_mat(int Gr, int Bl, float* result, const float* mat, + const MatrixDim d, const float alpha, + const float beta); void cudaD_add_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride); diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index d4b247ffaa7..60800d9568d 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -1220,7 +1220,7 @@ static void _equal_element_mask(const Real *mat1, const Real *mat2, Real *mask, } enum EnumTransformReduce { - SUM, MAX, MIN, LINFNORM, L2NORM, L1NORM, L0NORM, LPNORM + SUMAB, SUM, MAX, MIN, LINFNORM, L2NORM, L1NORM, L0NORM, LPNORM }; template @@ -1243,6 +1243,35 @@ struct TransReduceOp { } }; +template +struct TransReduceOp { + const Real alpha_; + const Real beta_; + TransReduceOp(const Real& a, const Real& b) : + alpha_(a), beta_(b) { + } + __forceinline__ + __device__ Real InitValue() const { + return Real(0); + } + __forceinline__ + __device__ Real Transform(const Real& x) const { + return x; + } + __forceinline__ + __device__ Real Reduce(const Real& a, const Real& b) const { + return a + b; + } + __forceinline__ + __device__ Real PostReduce(const Real& x, const Real& output) const { + if (beta_ == Real(0)) { + return alpha_ * x; + } else { + return alpha_ * x + beta_ * output; + } + } +}; + template struct TransReduceOp { __forceinline__ @@ -3570,6 +3599,12 @@ void cudaF_sum_mat_cols(int Gr, int Bl, float* result, const float* mat, _transform_reduce_mat_cols<<>>(result,mat,d, TransReduceOp()); } +void cudaF_add_col_sum_mat(int Gr, int Bl, float* result, const float* mat, + const MatrixDim d, const float alpha, + const float beta) { + _transform_reduce_mat_cols<<>>(result, mat, d, + TransReduceOp(alpha, beta)); +} void cudaF_replace_value(int Gr, int Bl, float *v, int dim, float orig, float changed) { @@ -4225,6 +4260,12 @@ void cudaD_sum_mat_cols(int Gr, int Bl, double* result, const double* mat, _transform_reduce_mat_cols<<>>(result,mat,d, TransReduceOp()); } +void cudaD_add_col_sum_mat(int Gr, int Bl, double* result, const double* mat, + const MatrixDim d, const double alpha, + const double beta) { + _transform_reduce_mat_cols<<>>(result, mat, d, + TransReduceOp(alpha, beta)); +} void cudaD_replace_value(int Gr, int Bl, double *v, int dim, double orig, double changed) { diff --git a/src/cudamatrix/cu-kernels.h b/src/cudamatrix/cu-kernels.h index 87aaf096570..77352b5925f 100644 --- a/src/cudamatrix/cu-kernels.h +++ b/src/cudamatrix/cu-kernels.h @@ -38,6 +38,16 @@ namespace kaldi { +inline void cuda_add_col_sum_mat(int Gr, int Bl, double* result, + const double* mat, const MatrixDim d, + const double alpha, const double beta) { + cudaD_add_col_sum_mat(Gr, Bl, result, mat, d, alpha, beta); +} +inline void cuda_add_col_sum_mat(int Gr, int Bl, float* result, + const float* mat, const MatrixDim d, + const float alpha, const float beta) { + cudaF_add_col_sum_mat(Gr, Bl, result, mat, d, alpha, beta); +} inline void cuda_add_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) { diff --git a/src/cudamatrix/cu-vector.cc b/src/cudamatrix/cu-vector.cc index e6aa72249f7..b825b9c0a6e 100644 --- a/src/cudamatrix/cu-vector.cc +++ b/src/cudamatrix/cu-vector.cc @@ -1173,19 +1173,25 @@ void CuVectorBase::AddRowSumMat(Real alpha, const CuMatrixBase &mat, } - template -void CuVectorBase::AddColSumMat(Real alpha, - const CuMatrixBase &mat, +void CuVectorBase::AddColSumMat(Real alpha, const CuMatrixBase &mat, Real beta) { - KALDI_ASSERT(mat.NumRows() == Dim()); - - CuVector ones(mat.NumCols()); - ones.Set(1.0); - this->AddMatVec(alpha, mat, kNoTrans, ones, beta); -} +#if HAVE_CUDA == 1 + if (CuDevice::Instantiate().Enabled()) { + Timer tim; + KALDI_ASSERT(mat.NumRows() == Dim()); + cuda_add_col_sum_mat(mat.NumRows(), CU1DBLOCK, Data(), mat.Data(), + mat.Dim(), alpha, beta); + CU_SAFE_CALL(cudaGetLastError()); + CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); + } else +#endif + { + Vec().AddColSumMat(alpha, mat.Mat(), beta); + } +} template void CuVectorBase::InvertElements() { From e5b14194355ecf7d117a46c722bdc81b2e0eca4f Mon Sep 17 00:00:00 2001 From: Hossein Hadian Date: Tue, 4 Apr 2017 17:52:17 -0400 Subject: [PATCH 497/530] [src] Cosmetic change: remove 'train.tra' from usage messages (#1529) --- src/bin/ali-to-phones.cc | 2 +- src/bin/align-equal.cc | 9 ++++++--- src/bin/compile-train-graphs.cc | 6 ++++-- src/bin/phones-to-prons.cc | 3 ++- src/bin/prons-to-wordali.cc | 4 ++-- src/gmmbin/gmm-align-compiled.cc | 4 ++-- src/gmmbin/gmm-align.cc | 6 ++++-- src/latbin/lattice-best-path.cc | 2 +- src/latbin/lattice-mbr-decode.cc | 4 ++-- src/latbin/linear-to-nbest.cc | 3 ++- src/latbin/nbest-to-linear.cc | 2 +- src/nnet2bin/nnet-align-compiled.cc | 7 ++++--- src/nnet3bin/nnet3-align-compiled.cc | 4 ++-- src/onlinebin/online-wav-gmm-decode-faster.cc | 2 +- 14 files changed, 34 insertions(+), 24 deletions(-) diff --git a/src/bin/ali-to-phones.cc b/src/bin/ali-to-phones.cc index b370dbc7f18..2a76000cfae 100644 --- a/src/bin/ali-to-phones.cc +++ b/src/bin/ali-to-phones.cc @@ -35,7 +35,7 @@ int main(int argc, char *argv[]) { "Usage: ali-to-phones [options] " "\n" "e.g.: \n" - " ali-to-phones 1.mdl ark:1.ali ark:phones.tra\n" + " ali-to-phones 1.mdl ark:1.ali ark:-\n" "or:\n" " ali-to-phones --ctm-output 1.mdl ark:1.ali 1.ctm\n" "See also: show-alignments lattice-align-phones\n"; diff --git a/src/bin/align-equal.cc b/src/bin/align-equal.cc index 3d35ee33daa..a3bc40dc236 100644 --- a/src/bin/align-equal.cc +++ b/src/bin/align-equal.cc @@ -36,10 +36,13 @@ int main(int argc, char *argv[]) { using fst::VectorFst; using fst::StdArc; - const char *usage = "Write equally spaced alignments of utterances (to get training started)\n" - "Usage: align-equal \n" + const char *usage = "Write equally spaced alignments of utterances " + "(to get training started)\n" + "Usage: align-equal " + " \n" "e.g.: \n" - " align-equal 1.tree 1.mdl lex.fst scp:train.scp ark:train.tra ark:equal.ali\n"; + " align-equal 1.tree 1.mdl lex.fst scp:train.scp " + "'ark:sym2int.pl -f 2- words.txt text|' ark:equal.ali\n"; ParseOptions po(usage); std::string disambig_rxfilename; diff --git a/src/bin/compile-train-graphs.cc b/src/bin/compile-train-graphs.cc index 6636ef88878..874d079376e 100644 --- a/src/bin/compile-train-graphs.cc +++ b/src/bin/compile-train-graphs.cc @@ -37,9 +37,11 @@ int main(int argc, char *argv[]) { const char *usage = "Creates training graphs (without transition-probabilities, by default)\n" "\n" - "Usage: compile-train-graphs [options] \n" + "Usage: compile-train-graphs [options] " + " \n" "e.g.: \n" - " compile-train-graphs tree 1.mdl lex.fst ark:train.tra ark:graphs.fsts\n"; + " compile-train-graphs tree 1.mdl lex.fst " + "'ark:sym2int.pl -f 2- words.txt text|' ark:graphs.fsts\n"; ParseOptions po(usage); TrainingGraphCompilerOptions gopts; diff --git a/src/bin/phones-to-prons.cc b/src/bin/phones-to-prons.cc index 6e3cf7a4651..0d7ab12c232 100644 --- a/src/bin/phones-to-prons.cc +++ b/src/bin/phones-to-prons.cc @@ -80,7 +80,8 @@ int main(int argc, char *argv[]) { " \n" "e.g.: \n" " ali-to-phones 1.mdl ark:1.ali ark:- | \\\n" - " phones-to-prons L_align.fst 46 47 ark:- 1.tra ark:1.prons\n"; + " phones-to-prons L_align.fst 46 47 ark:- " + "'ark:sym2int.pl -f 2- words.txt text|' ark:1.prons\n"; ParseOptions po(usage); po.Read(argc, argv); diff --git a/src/bin/prons-to-wordali.cc b/src/bin/prons-to-wordali.cc index 8e89d7cc644..a6331043500 100644 --- a/src/bin/prons-to-wordali.cc +++ b/src/bin/prons-to-wordali.cc @@ -52,8 +52,8 @@ int main(int argc, char *argv[]) { " \n" "e.g.: \n" " ali-to-phones 1.mdl ark:1.ali ark:- | \\\n" - " phones-to-prons L_align.fst 46 47 ark:- 1.tra ark:- | \\\n" - " prons-to-wordali ark:- \\\n" + " phones-to-prons L_align.fst 46 47 ark:- 'ark:sym2int.pl -f 2- words.txt text|' \\\n" + " ark:- | prons-to-wordali ark:- \\\n" " \"ark:ali-to-phones --write-lengths 1.mdl ark:1.ali ark:-|\" ark:1.wali\n"; ParseOptions po(usage); diff --git a/src/gmmbin/gmm-align-compiled.cc b/src/gmmbin/gmm-align-compiled.cc index 85ac3fd27a7..c3aadcc7ec9 100644 --- a/src/gmmbin/gmm-align-compiled.cc +++ b/src/gmmbin/gmm-align-compiled.cc @@ -44,8 +44,8 @@ int main(int argc, char *argv[]) { "e.g.: \n" " gmm-align-compiled 1.mdl ark:graphs.fsts scp:train.scp ark:1.ali\n" "or:\n" - " compile-train-graphs tree 1.mdl lex.fst ark:train.tra b, ark:- | \\\n" - " gmm-align-compiled 1.mdl ark:- scp:train.scp t, ark:1.ali\n"; + " compile-train-graphs tree 1.mdl lex.fst 'ark:sym2int.pl -f 2- words.txt text|' \\\n" + " ark:- | gmm-align-compiled 1.mdl ark:- scp:train.scp t, ark:1.ali\n"; ParseOptions po(usage); AlignConfig align_config; diff --git a/src/gmmbin/gmm-align.cc b/src/gmmbin/gmm-align.cc index 7ef5f9c8dab..c9c2fde11f6 100644 --- a/src/gmmbin/gmm-align.cc +++ b/src/gmmbin/gmm-align.cc @@ -39,9 +39,11 @@ int main(int argc, char *argv[]) { const char *usage = "Align features given [GMM-based] models.\n" - "Usage: gmm-align [options] tree-in model-in lexicon-fst-in feature-rspecifier transcriptions-rspecifier alignments-wspecifier\n" + "Usage: gmm-align [options] tree-in model-in lexicon-fst-in feature-rspecifier " + "transcriptions-rspecifier alignments-wspecifier\n" "e.g.: \n" - " gmm-align tree 1.mdl lex.fst scp:train.scp ark:train.tra ark:1.ali\n"; + " gmm-align tree 1.mdl lex.fst scp:train.scp " + "'ark:sym2int.pl -f 2- words.txt text|' ark:1.ali\n"; ParseOptions po(usage); AlignConfig align_config; BaseFloat acoustic_scale = 1.0; diff --git a/src/latbin/lattice-best-path.cc b/src/latbin/lattice-best-path.cc index dc25fb351c6..ce9016d750c 100644 --- a/src/latbin/lattice-best-path.cc +++ b/src/latbin/lattice-best-path.cc @@ -38,7 +38,7 @@ int main(int argc, char *argv[]) { "Note: if you want output as FSTs, use lattice-1best; if you want output\n" "with acoustic and LM scores, use lattice-1best | nbest-to-linear\n" "Usage: lattice-best-path [options] [ [ ] ]\n" - " e.g.: lattice-best-path --acoustic-scale=0.1 ark:1.lats ark:1.tra ark:1.ali\n"; + " e.g.: lattice-best-path --acoustic-scale=0.1 ark:1.lats 'ark,t:|int2sym.pl -f 2- words.txt > text' ark:1.ali\n"; ParseOptions po(usage); BaseFloat acoustic_scale = 1.0; diff --git a/src/latbin/lattice-mbr-decode.cc b/src/latbin/lattice-mbr-decode.cc index 465f4e35fbd..fba5daa4dd8 100644 --- a/src/latbin/lattice-mbr-decode.cc +++ b/src/latbin/lattice-mbr-decode.cc @@ -43,8 +43,8 @@ int main(int argc, char *argv[]) { "Usage: lattice-mbr-decode [options] lattice-rspecifier " "transcriptions-wspecifier [ bayes-risk-wspecifier " "[ sausage-stats-wspecifier [ times-wspecifier] ] ] \n" - " e.g.: lattice-mbr-decode --acoustic-scale=0.1 ark:1.lats ark:1.tra " - "ark:/dev/null ark:1.sau\n"; + " e.g.: lattice-mbr-decode --acoustic-scale=0.1 ark:1.lats " + "'ark,t:|int2sym.pl -f 2- words.txt > text' ark:/dev/null ark:1.sau\n"; ParseOptions po(usage); BaseFloat acoustic_scale = 1.0; diff --git a/src/latbin/linear-to-nbest.cc b/src/latbin/linear-to-nbest.cc index fd025f382b6..a1864d0d14a 100644 --- a/src/latbin/linear-to-nbest.cc +++ b/src/latbin/linear-to-nbest.cc @@ -67,7 +67,8 @@ int main(int argc, char *argv[]) { "\n" "Note: if the rspecifiers for lm-cost or ac-cost are the empty string,\n" "these value will default to zero.\n" - " e.g.: linear-to-nbest ark:1.ali ark:1.tra ark:1.lmscore ark:1.acscore " + " e.g.: linear-to-nbest ark:1.ali 'ark:sym2int.pl -f 2- words.txt text|' " + "ark:1.lmscore ark:1.acscore " "ark:1.nbest\n"; ParseOptions po(usage); diff --git a/src/latbin/nbest-to-linear.cc b/src/latbin/nbest-to-linear.cc index 6b3fe5e1d01..d63c380133a 100644 --- a/src/latbin/nbest-to-linear.cc +++ b/src/latbin/nbest-to-linear.cc @@ -39,7 +39,7 @@ int main(int argc, char *argv[]) { "Usage: nbest-to-linear [options] " "[ [ []]]\n" " e.g.: lattice-to-nbest --n=10 ark:1.lats ark:- | \\\n" - " nbest-to-linear ark:1.lats ark,t:1.ali ark,t:1.tra\n"; + " nbest-to-linear ark:1.lats ark,t:1.ali 'ark,t:|int2sym.pl -f 2- words.txt > text'\n"; ParseOptions po(usage); diff --git a/src/nnet2bin/nnet-align-compiled.cc b/src/nnet2bin/nnet-align-compiled.cc index 60045eb7cce..8f5537c26c7 100644 --- a/src/nnet2bin/nnet-align-compiled.cc +++ b/src/nnet2bin/nnet-align-compiled.cc @@ -40,12 +40,13 @@ int main(int argc, char *argv[]) { const char *usage = "Align features given neural-net-based model\n" - "Usage: nnet-align-compiled [options] model-in graphs-rspecifier feature-rspecifier alignments-wspecifier\n" + "Usage: nnet-align-compiled [options] model-in graphs-rspecifier " + "feature-rspecifier alignments-wspecifier\n" "e.g.: \n" " nnet-align-compiled 1.mdl ark:graphs.fsts scp:train.scp ark:1.ali\n" "or:\n" - " compile-train-graphs tree 1.mdl lex.fst ark:train.tra b, ark:- | \\\n" - " nnet-align-compiled 1.mdl ark:- scp:train.scp t, ark:1.ali\n"; + " compile-train-graphs tree 1.mdl lex.fst 'ark:sym2int.pl -f 2- words.txt text|' \\\n" + " ark:- | nnet-align-compiled 1.mdl ark:- scp:train.scp t, ark:1.ali\n"; ParseOptions po(usage); AlignConfig align_config; diff --git a/src/nnet3bin/nnet3-align-compiled.cc b/src/nnet3bin/nnet3-align-compiled.cc index bab5d16f370..84a5f38b4ee 100644 --- a/src/nnet3bin/nnet3-align-compiled.cc +++ b/src/nnet3bin/nnet3-align-compiled.cc @@ -47,8 +47,8 @@ int main(int argc, char *argv[]) { "e.g.: \n" " nnet3-align-compiled 1.mdl ark:graphs.fsts scp:train.scp ark:1.ali\n" "or:\n" - " compile-train-graphs tree 1.mdl lex.fst ark:train.tra b, ark:- | \\\n" - " nnet3-align-compiled 1.mdl ark:- scp:train.scp t, ark:1.ali\n"; + " compile-train-graphs tree 1.mdl lex.fst 'ark:sym2int.pl -f 2- words.txt text|' \\\n" + " ark:- | nnet3-align-compiled 1.mdl ark:- scp:train.scp t, ark:1.ali\n"; ParseOptions po(usage); AlignConfig align_config; diff --git a/src/onlinebin/online-wav-gmm-decode-faster.cc b/src/onlinebin/online-wav-gmm-decode-faster.cc index e5d54b80db5..fe7c6d6b974 100644 --- a/src/onlinebin/online-wav-gmm-decode-faster.cc +++ b/src/onlinebin/online-wav-gmm-decode-faster.cc @@ -41,7 +41,7 @@ int main(int argc, char *argv[]) { const char *usage = "Reads in wav file(s) and simulates online decoding.\n" - "Writes .tra and .ali files for WER computation. Utterance " + "Writes integerized-text and .ali files for WER computation. Utterance " "segmentation is done on-the-fly.\n" "Feature splicing/LDA transform is used, if the optional(last) argument " "is given.\n" From 01576867802ae5c499f9a4b66591ce35499e28f5 Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Wed, 5 Apr 2017 19:26:41 +0200 Subject: [PATCH 498/530] [src] nnet1: improving the GPU diagnostics, (#1532) - we auto-detect the 'compute capability' problems (these appear as the 'invalid device function'), - we also provide guidelines what to try before posting to forum, and which info to send to us, --- src/nnetbin/cuda-gpu-available.cc | 74 ++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/src/nnetbin/cuda-gpu-available.cc b/src/nnetbin/cuda-gpu-available.cc index 897f01a8241..89fd26be86f 100644 --- a/src/nnetbin/cuda-gpu-available.cc +++ b/src/nnetbin/cuda-gpu-available.cc @@ -24,9 +24,21 @@ #include "base/kaldi-common.h" #include "cudamatrix/cu-device.h" +#include "cudamatrix/cu-matrix.h" using namespace kaldi; +#if HAVE_CUDA == 1 +/** + * With incorrect CUDA setup, this will trigger "invalid device function" error. + */ +void TestGpuComputation() { + CuMatrix m(100,100); + m.SetRandn(); + m.ApplySoftMaxPerRow(m); +} +#endif + int main(int argc, char *argv[]) try { char hostname[100] = "UNKNOWN-HOSTNAME"; #ifndef _MSC_VER @@ -34,14 +46,33 @@ int main(int argc, char *argv[]) try { KALDI_WARN << "Cannot get hostname, " << strerror(errno); } #endif - std::cerr - << "### IS CUDA GPU AVAILABLE? '" - << hostname << "' ###" << std::endl; + KALDI_LOG << std::endl << std::endl + << "### IS CUDA GPU AVAILABLE? '" << hostname << "' ###"; #if HAVE_CUDA == 1 CuDevice::Instantiate().SelectGpuId("yes"); - std::cerr - << "### HURRAY, WE GOT A CUDA GPU FOR COMPUTATION!!! ###" - << std::endl; + fprintf(stderr, "### HURRAY, WE GOT A CUDA GPU FOR COMPUTATION!!! ##\n\n"); + fprintf(stderr, "### Testing CUDA setup with a small computation " + "(setup = cuda-toolkit + gpu-driver + kaldi):\n"); + // the test of setup by computation, + try { + TestGpuComputation(); + } catch (const std::exception &e) { + fprintf(stderr, "%s\n", e.what()); + KALDI_LOG << "...\n" + << "### The CUDA setup is wrong! " + << "(\"invalid device function\" == problem with 'compute capability' " + << "in compiled kaldi)\n" + << "### Before posting the error to forum, please try following:\n" + << "### 1) update kaldi & cuda-toolkit (& GPU driver),\n" + << "### 2) re-run 'src/configure',\n" + << "### 3) re-compile kaldi by 'make clean; make -j depend; make -j'\n" + << "###\n" + << "### If the problem persists, please send us your:\n" + << "### - GPU model name, cuda-toolkit version, driver version " + << "(run nvidia-smi), variable $(CUDA_ARCH) from src/kaldi.mk"; + return -1; + } + fprintf(stderr, "### Test OK!\n"); return 0; #else std::cerr @@ -51,26 +82,17 @@ int main(int argc, char *argv[]) try { return 1; #endif } catch (const std::exception &e) { - std::cerr << e.what(); - std::cerr - << "### WE DID NOT GET A CUDA GPU!!! ###" << std::endl - << "### If it's your 1st experiment with CUDA, try reinstalling " - << "'CUDA toolkit' from NVidia web (it contains the drivers)." - << std::endl - << "### In other cases run 'nvidia-smi' in terminal " - << "(gets installed with display drivers) :" - << std::endl - << "### - Check that you see your GPU." - << std::endl - << "### - Bad GPUs are reporting error or disappear from the list " - << "until reboot." - << std::endl - << "### - Check 'Memory-Usage' and 'GPU fan', " - << "which will tell you if the GPU was taken by other process." - << std::endl - << "### - Check there is same version of 'NVIDIA-SMI' and " - << "'Driver', and that it is not too old for your GPU." - << std::endl; + fprintf(stderr, "%s\n", e.what()); + KALDI_LOG << "...\n" + << "### WE DID NOT GET A CUDA GPU!!! ###\n" + << "### If your system has a 'free' CUDA GPU, try re-installing " + << "latest 'CUDA toolkit' from NVidia (this updates GPU drivers too).\n" + << "### Otherwise 'nvidia-smi' shows the status of GPUs:\n" + << "### - The versions should match ('NVIDIA-SMI' and 'Driver Version'), " + << "otherwise reboot or reload kernel module,\n" + << "### - The GPU should be unused " + << "(no 'process' in list, low 'memory-usage' (<100MB), low 'gpu-fan' (<30%)),\n" + << "### - You should see your GPU (burnt GPUs may disappear from the list until reboot),"; return -1; } From d9efe6efadc5c3a9a315600c9bac013c1c92c6c3 Mon Sep 17 00:00:00 2001 From: Peter Smit Date: Tue, 11 Apr 2017 20:40:51 +0200 Subject: [PATCH 499/530] [src] Fix copy-feats for using the --write-num-frames and --compress true flags at the same time (#1541) --- src/featbin/copy-feats.cc | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/featbin/copy-feats.cc b/src/featbin/copy-feats.cc index 0fbcca6399a..f1f58653f2f 100644 --- a/src/featbin/copy-feats.cc +++ b/src/featbin/copy-feats.cc @@ -102,19 +102,31 @@ int main(int argc, char *argv[]) { CompressedMatrixWriter kaldi_writer(wspecifier); if (htk_in) { SequentialTableReader htk_reader(rspecifier); - for (; !htk_reader.Done(); htk_reader.Next(), num_done++) + for (; !htk_reader.Done(); htk_reader.Next(), num_done++) { kaldi_writer.Write(htk_reader.Key(), CompressedMatrix(htk_reader.Value().first)); + if (!num_frames_wspecifier.empty()) + num_frames_writer.Write(htk_reader.Key(), + htk_reader.Value().first.NumRows()); + } } else if (sphinx_in) { SequentialTableReader > sphinx_reader(rspecifier); - for (; !sphinx_reader.Done(); sphinx_reader.Next(), num_done++) + for (; !sphinx_reader.Done(); sphinx_reader.Next(), num_done++) { kaldi_writer.Write(sphinx_reader.Key(), CompressedMatrix(sphinx_reader.Value())); + if (!num_frames_wspecifier.empty()) + num_frames_writer.Write(sphinx_reader.Key(), + sphinx_reader.Value().NumRows()); + } } else { SequentialBaseFloatMatrixReader kaldi_reader(rspecifier); - for (; !kaldi_reader.Done(); kaldi_reader.Next(), num_done++) + for (; !kaldi_reader.Done(); kaldi_reader.Next(), num_done++) { kaldi_writer.Write(kaldi_reader.Key(), CompressedMatrix(kaldi_reader.Value())); + if (!num_frames_wspecifier.empty()) + num_frames_writer.Write(kaldi_reader.Key(), + kaldi_reader.Value().NumRows()); + } } } KALDI_LOG << "Copied " << num_done << " feature matrices."; From 5b70933c61830d63aaad999c39dcdecc1f52bba5 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 12 Apr 2017 14:06:19 -0400 Subject: [PATCH 500/530] [scripts] fix to get_egs_targets.sh (thanks: David Pye) --- egs/wsj/s5/steps/nnet3/get_egs_targets.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh index a2749b48fac..b8fcbfd51fa 100755 --- a/egs/wsj/s5/steps/nnet3/get_egs_targets.sh +++ b/egs/wsj/s5/steps/nnet3/get_egs_targets.sh @@ -217,7 +217,7 @@ if [ $num_archives -eq 1 ]; then echo "*** $0: warning: the --frames-per-eg is too large to generate one archive with" echo "*** as many as --samples-per-iter egs in it. Consider reducing --frames-per-eg." sleep 4 -done +fi # We may have to first create a smaller number of larger archives, with number # $num_archives_intermediate, if $num_archives is more than the maximum number From b2d5d3683d416643bf141b9d501e36a6901a7265 Mon Sep 17 00:00:00 2001 From: Vijayaditya Peddinti Date: Wed, 12 Apr 2017 11:31:10 -0700 Subject: [PATCH 501/530] [scripts] nnet3 : fix issue where LDA estimation failed for LSTMs with label delay (#1540) xconfig : Added delay option for FixedAffineLayer. This will be used for ensuring the model specified in ref.config has at least the context required by the model specified in init.config --- .../s5c/local/chain/tuning/run_lstm_6k.sh | 304 ++++++++++++++++++ .../steps/libs/nnet3/xconfig/basic_layers.py | 22 +- egs/wsj/s5/steps/nnet3/xconfig_to_configs.py | 41 +++ 3 files changed, 361 insertions(+), 6 deletions(-) create mode 100755 egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh new file mode 100755 index 00000000000..b9b7152dcbe --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh @@ -0,0 +1,304 @@ +#!/bin/bash + +# Copyright 2015 Johns Hopkins University (Author: Daniel Povey). +# 2015 Vijayaditya Peddinti +# 2015 Xingyu Na +# 2015 Pegah Ghahrmani +# 2017 Google Inc. (vpeddinti@google.com) +# Apache 2.0. + + + +# run_lstm_6k.sh is like run_lstm_6j.sh but making +# various kaldi-5.1-related upgrades to the script. +# For the list of changes compare tuning/run_tdnn_lstm_1{c,d}.sh + +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/lstm_6k # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_nj=50 + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + # Note : The delay variable will be used just in the init.config. + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat delay=$label_delay + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +iter_opts= +if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " +fi + +if [ $stage -le 15 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in looped decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 17 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3/extractor $dir ${dir}_online + + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + # note: we just give it "$decode_set" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + $graph_dir data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in online decoding" + exit 1 + fi +fi + +exit 0; diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py index de4c4af9df8..59b6006accb 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/basic_layers.py @@ -1,5 +1,6 @@ # Copyright 2016 Johns Hopkins University (Dan Povey) # 2016 Vijayaditya Peddinti +# 2017 Google Inc. (vpeddinti@google.com) # Apache 2.0. """ This module contains the parent class from which all layers are inherited @@ -775,7 +776,7 @@ def _add_components(self, input_desc, input_dim, nonlinearities): # input='[-1]' [Descriptor giving the input of the layer.] # dim=None [Output dimension of layer; defaults to the same as the input dim.] # affine-transform-file='' [Must be specified.] -# +# delay=0 [Optional delay for the output-node in init.config] class XconfigFixedAffineLayer(XconfigLayerBase): def __init__(self, first_token, key_to_value, prev_names = None): assert first_token == 'fixed-affine-layer' @@ -787,6 +788,7 @@ def set_default_configs(self): self.config = { 'input':'[-1]', 'dim':-1, 'affine-transform-file':'', + 'delay':0, 'write-init-config':True} def check_configs(self): @@ -819,11 +821,19 @@ def get_full_config(self): transform_file = self.config['affine-transform-file'] if self.config['write-init-config']: - # to init.config we write an output-node with the name 'output' and - # with a Descriptor equal to the descriptor that's the input to this - # layer. This will be used to accumulate stats to learn the LDA transform. - line = 'output-node name=output input={0}'.format(descriptor_final_string) - ans.append(('init', line)) + if self.config['delay'] != 0: + line = 'component name={0}.delayed type=NoOpComponent dim={1}'.format(self.name, input_dim) + ans.append(('init', line)) + line = 'component-node name={0}.delayed component={0}.delayed input={1}'.format(self.name, descriptor_final_string) + ans.append(('init', line)) + line = 'output-node name=output input=Offset({0}.delayed, {1})'.format(self.name, self.config['delay']) + ans.append(('init', line)) + else: + # to init.config we write an output-node with the name 'output' and + # with a Descriptor equal to the descriptor that's the input to this + # layer. This will be used to accumulate stats to learn the LDA transform. + line = 'output-node name=output input={0}'.format(descriptor_final_string) + ans.append(('init', line)) # write the 'real' component to final.config line = 'component name={0} type=FixedAffineComponent matrix={1}'.format( diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py index 5184b6eed41..7e876bda1ed 100755 --- a/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py +++ b/egs/wsj/s5/steps/nnet3/xconfig_to_configs.py @@ -1,5 +1,10 @@ #!/usr/bin/env python +# Copyright 2016 Johns Hopkins University (Dan Povey) +# 2016 Vijayaditya Peddinti +# 2017 Google Inc. (vpeddinti@google.com) +# Apache 2.0. + # we're using python 3.x style print but want it to work in python 2.x, from __future__ import print_function import argparse @@ -236,6 +241,41 @@ def add_back_compatibility_info(config_dir): common_lib.force_symlink("final.config".format(config_dir), "{0}/layer1.config".format(config_dir)) +def check_model_contexts(config_dir): + contexts = {} + for file_name in ['init', 'ref']: + if os.path.exists('{0}/{1}.config'.format(config_dir, file_name)): + contexts[file_name] = {} + common_lib.run_kaldi_command("nnet3-init {0}/{1}.config " + "{0}/{1}.raw".format(config_dir, file_name)) + out, err = common_lib.run_kaldi_command("nnet3-info {0}/{1}.raw | " + "head -4".format(config_dir, file_name)) + # out looks like this + # left-context: 7 + # right-context: 0 + # num-parameters: 90543902 + # modulus: 1 + for line in out.split("\n"): + parts = line.split(":") + if len(parts) != 2: + continue + key = parts[0].strip() + value = int(parts[1].strip()) + if key in ['left-context', 'right-context']: + contexts[file_name][key] = value + + if contexts.has_key('init'): + assert(contexts.has_key('ref')) + if ((contexts['init']['left-context'] > contexts['ref']['left-context']) + or (contexts['init']['right-context'] > contexts['ref']['right-context'])): + raise Exception("Model specified in {0}/init.config requires greater" + " context than the model specified in {0}/ref.config." + " This might be due to use of label-delay at the output" + " in ref.config. Please use delay=$label_delay in the" + " initial fixed-affine-layer of the network, to avoid" + " this issue.") + + def main(): args = get_args() @@ -243,6 +283,7 @@ def main(): all_layers = xparser.read_xconfig_file(args.xconfig_file) write_expanded_xconfig_files(args.config_dir, all_layers) write_config_files(args.config_dir, all_layers) + check_model_contexts(args.config_dir) add_back_compatibility_info(args.config_dir) From 31a528b0c3148e4419a49f22658bf3360c87dce0 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Thu, 13 Apr 2017 20:55:31 -0300 Subject: [PATCH 502/530] [src] Change ConvertStringToReal to be locale-independent (i.e. always-US). Fixes android issue. (#1513) The issue was that in Android you can't easily override the phone's locale, so export LC_ALL=C does not really work to enable reading of models if the phone was (e.g.) Brazilian or German. --- src/base/kaldi-utils.h | 9 --- src/util/text-utils-test.cc | 73 ++++++++++++++++++ src/util/text-utils.cc | 147 ++++++++++++++++++------------------ src/util/text-utils.h | 12 ++- 4 files changed, 151 insertions(+), 90 deletions(-) diff --git a/src/base/kaldi-utils.h b/src/base/kaldi-utils.h index 2cfecdcc7db..bd2da25dce7 100644 --- a/src/base/kaldi-utils.h +++ b/src/base/kaldi-utils.h @@ -145,13 +145,4 @@ template<> class KaldiCompileTimeAssert { # define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10); #endif -#define KALDI_STRTOD(cur_cstr, end_cstr) strtod(cur_cstr, end_cstr) - -#ifdef _MSC_VER -# define KALDI_STRTOF(cur_cstr, end_cstr) \ - static_cast(strtod(cur_cstr, end_cstr)); -#else -# define KALDI_STRTOF(cur_cstr, end_cstr) strtof(cur_cstr, end_cstr); -#endif - #endif // KALDI_BASE_KALDI_UTILS_H_ diff --git a/src/util/text-utils-test.cc b/src/util/text-utils-test.cc index ee9d20aa641..44b5b46a093 100644 --- a/src/util/text-utils-test.cc +++ b/src/util/text-utils-test.cc @@ -173,7 +173,76 @@ void TestConvertStringToReal() { // it also works for inf or nan. KALDI_ASSERT(ConvertStringToReal("inf", &d) && d > 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal(" inf", &d) && d > 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal("inf ", &d) && d > 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal(" inf ", &d) && d > 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal("+inf", &d) && d > 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal("-inf", &d) && d < 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal("Inf", &d) && d > 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal("INF", &d) && d > 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal("InF", &d) && d > 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal("infinity", &d) && d > 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal("-infinity", &d) && d < 0 && d - d != 0); + KALDI_ASSERT(!ConvertStringToReal("GARBAGE inf", &d)); + KALDI_ASSERT(!ConvertStringToReal("GARBAGEinf", &d)); + KALDI_ASSERT(!ConvertStringToReal("infGARBAGE", &d)); + KALDI_ASSERT(!ConvertStringToReal("inf_GARBAGE", &d)); + KALDI_ASSERT(!ConvertStringToReal("inf GARBAGE", &d)); + KALDI_ASSERT(!ConvertStringToReal("GARBAGE infinity", &d)); + KALDI_ASSERT(!ConvertStringToReal("GARBAGEinfinity", &d)); + KALDI_ASSERT(!ConvertStringToReal("infinityGARBAGE", &d)); + KALDI_ASSERT(!ConvertStringToReal("infinity_GARBAGE", &d)); + KALDI_ASSERT(!ConvertStringToReal("infinity GARBAGE", &d)); + KALDI_ASSERT(ConvertStringToReal("1.#INF", &d) && d > 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal("-1.#INF", &d) && d < 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal("-1.#INF ", &d) && d < 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal(" -1.#INF ", &d) && d < 0 && d - d != 0); + KALDI_ASSERT(!ConvertStringToReal("GARBAGE 1.#INF", &d)); + KALDI_ASSERT(!ConvertStringToReal("GARBAGE1.#INF", &d)); + KALDI_ASSERT(!ConvertStringToReal("2.#INF", &d)); + KALDI_ASSERT(!ConvertStringToReal("-2.#INF", &d)); + KALDI_ASSERT(!ConvertStringToReal("1.#INFGARBAGE", &d)); + KALDI_ASSERT(!ConvertStringToReal("1.#INF_GARBAGE", &d)); + KALDI_ASSERT(ConvertStringToReal("nan", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal("+nan", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal("-nan", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal("Nan", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal("NAN", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal("NaN", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal(" NaN", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal("NaN ", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal(" NaN ", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal("1.#QNAN", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal("-1.#QNAN", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal("1.#QNAN ", &d) && d != d); + KALDI_ASSERT(ConvertStringToReal(" 1.#QNAN ", &d) && d != d); + KALDI_ASSERT(!ConvertStringToReal("GARBAGE nan", &d)); + KALDI_ASSERT(!ConvertStringToReal("GARBAGEnan", &d)); + KALDI_ASSERT(!ConvertStringToReal("nanGARBAGE", &d)); + KALDI_ASSERT(!ConvertStringToReal("nan_GARBAGE", &d)); + KALDI_ASSERT(!ConvertStringToReal("nan GARBAGE", &d)); + KALDI_ASSERT(!ConvertStringToReal("GARBAGE 1.#QNAN", &d)); + KALDI_ASSERT(!ConvertStringToReal("GARBAGE1.#QNAN", &d)); + KALDI_ASSERT(!ConvertStringToReal("2.#QNAN", &d)); + KALDI_ASSERT(!ConvertStringToReal("-2.#QNAN", &d)); + KALDI_ASSERT(!ConvertStringToReal("-1.#QNAN_GARBAGE", &d)); + KALDI_ASSERT(!ConvertStringToReal("-1.#QNANGARBAGE", &d)); +} + +template +void TestNan() { + Real d; + KALDI_ASSERT(ConvertStringToReal(std::to_string(sqrt(-1)), &d) && d != d); +} + +template +void TestInf() { + Real d; + KALDI_ASSERT(ConvertStringToReal(std::to_string(exp(10000)), &d) && + d > 0 && d - d != 0); + KALDI_ASSERT(ConvertStringToReal(std::to_string(-exp(10000)), &d) && + d < 0 && d - d != 0); } @@ -252,6 +321,10 @@ int main() { TestSplitStringOnFirstSpace(); TestIsToken(); TestIsLine(); + TestNan(); + TestNan(); + TestInf(); + TestInf(); std::cout << "Test OK\n"; } diff --git a/src/util/text-utils.cc b/src/util/text-utils.cc index 876ee3f0be9..d9e6b2ac520 100644 --- a/src/util/text-utils.cc +++ b/src/util/text-utils.cc @@ -19,6 +19,8 @@ #include "util/text-utils.h" #include +#include +#include #include "base/kaldi-common.h" namespace kaldi { @@ -160,95 +162,92 @@ bool IsLine(const std::string &line) { return true; } +template +class NumberIstream{ + public: + explicit NumberIstream(std::istream &i) : in_(i) {} -inline bool starts_with(const std::string &in, const std::string &prefix) { - return in.substr(0, prefix.size()) == prefix; -} + NumberIstream & operator >> (T &x) { + if (!in_.good()) return *this; + in_ >> x; + if (!in_.fail() && RemainderIsOnlySpaces()) return *this; + return ParseOnFail(&x); + } -inline bool stricmp(const std::string &in, const std::string &prefix) { - int ret = KALDI_STRCASECMP(in.c_str(), prefix.c_str()); - return ret == 0; -} + private: + std::istream &in_; -inline bool is_nan_text(const std::string &in, const std::string &prefix) { - if (in.size() < prefix.size()) - return false; + bool RemainderIsOnlySpaces() { + if (in_.tellg() != -1) { + std::string rem; + in_ >> rem; - if (stricmp(in, prefix)) - return true; + if (rem.find_first_not_of(' ') != std::string::npos) { + // there is not only spaces + return false; + } + } - for (int i = 0; i < prefix.size(); ++i) - if (tolower(in[i]) != tolower(prefix[i])) - return false; + in_.clear(); + return true; + } - for (int i = prefix.size(); i < in.size(); ++i) - if (!isalpha(in[i]) && (in[i] != '_')) - return false; + NumberIstream & ParseOnFail(T *x) { + std::string str; + in_.clear(); + in_.seekg(0); + // If the stream is broken even before trying + // to read from it or if there are many tokens, + // it's pointless to try. + if (!(in_ >> str) || !RemainderIsOnlySpaces()) { + in_.setstate(std::ios_base::failbit); + return *this; + } - return true; -} + std::map inf_nan_map; + // we'll keep just uppercase values. + inf_nan_map["INF"] = std::numeric_limits::infinity(); + inf_nan_map["+INF"] = std::numeric_limits::infinity(); + inf_nan_map["-INF"] = - std::numeric_limits::infinity(); + inf_nan_map["INFINITY"] = std::numeric_limits::infinity(); + inf_nan_map["+INFINITY"] = std::numeric_limits::infinity(); + inf_nan_map["-INFINITY"] = - std::numeric_limits::infinity(); + inf_nan_map["NAN"] = std::numeric_limits::quiet_NaN(); + inf_nan_map["+NAN"] = std::numeric_limits::quiet_NaN(); + inf_nan_map["-NAN"] = - std::numeric_limits::quiet_NaN(); + // MSVC + inf_nan_map["1.#INF"] = std::numeric_limits::infinity(); + inf_nan_map["-1.#INF"] = - std::numeric_limits::infinity(); + inf_nan_map["1.#QNAN"] = std::numeric_limits::quiet_NaN(); + inf_nan_map["-1.#QNAN"] = - std::numeric_limits::quiet_NaN(); + + std::transform(str.begin(), str.end(), str.begin(), ::toupper); + + if (inf_nan_map.find(str) != inf_nan_map.end()) { + *x = inf_nan_map[str]; + } else { + in_.setstate(std::ios_base::failbit); + } -template -bool convert_special_number(const std::string &str, T *out) { - if (stricmp(str, "infinity") || stricmp(str, "inf") || - starts_with(str, "1.#INF")) { - *out = std::numeric_limits::infinity(); - return true; - } else if (stricmp(str, "-infinity") || stricmp(str, "-inf") || - starts_with(str, "-1.#INF")) { - *out = -std::numeric_limits::infinity(); - return true; - } else if (is_nan_text(str, "nan") || starts_with(str, "1.#QNAN")) { - *out = std::numeric_limits::quiet_NaN(); - return true; - } else if (is_nan_text(str, "-nan") || starts_with(str, "-1.#QNAN")) { - *out = -std::numeric_limits::quiet_NaN(); - return true; + return *this; } - return false; -} +}; + +template bool ConvertStringToReal(const std::string &str, - double *out) { - const char *this_str = str.c_str(); - char *end = NULL; - errno = 0; - -#if defined(_MSC_VER) - // TODO: check if the new MSVC already supports it - // depending on claims of the C++11 support, it should have - if (convert_special_number(str, out)) - return true; -#endif // defined(_MSC_VER) + T *out) { + std::istringstream iss(str); - double d = KALDI_STRTOD(this_str, &end); - if (end != this_str) - while (isspace(*end)) end++; - if (end == this_str || *end != '\0' || errno != 0) - return false; - *out = d; - return true; -} + NumberIstream i(iss); -bool ConvertStringToReal(const std::string &str, - float *out) { - const char *this_str = str.c_str(); - char *end = NULL; - errno = 0; - -#ifdef _MSC_VER - // TODO: check if the new MSVC already supports it - // depending on claims of the C++11 support, it should have - if (convert_special_number(str, out)) - return true; -#endif // _MSC_VER + i >> *out; - float f = KALDI_STRTOF(this_str, &end); - if (end != this_str) - while (isspace(*end)) end++; - if (end == this_str || *end != '\0' || errno != 0) + if (iss.fail()) { + // Number conversion failed. return false; - *out = f; + } + return true; } diff --git a/src/util/text-utils.h b/src/util/text-utils.h index be2e963a27b..b8fc6ab369d 100644 --- a/src/util/text-utils.h +++ b/src/util/text-utils.h @@ -132,15 +132,13 @@ bool ConvertStringToInteger(const std::string &str, } -/// ConvertStringToReal converts a string into either float or double via -/// strtod, and returns false if there was any kind of problem (i.e. the string -/// was not a floating point number or contained extra non-whitespace junk. +/// ConvertStringToReal converts a string into either float or double +/// and returns false if there was any kind of problem (i.e. the string +/// was not a floating point number or contained extra non-whitespace junk). /// Be careful- this function will successfully read inf's or nan's. +template bool ConvertStringToReal(const std::string &str, - double *out); -bool ConvertStringToReal(const std::string &str, - float *out); - + T *out); /// Removes the beginning and trailing whitespaces from a string void Trim(std::string *str); From 7fcc311c17e1d9b12beb1cac8ffdc4ea2692016a Mon Sep 17 00:00:00 2001 From: david-ryan-snyder Date: Fri, 14 Apr 2017 01:38:29 -0400 Subject: [PATCH 503/530] [egs,scripts,src] SID and LID tools and scripts: cosmetic improvements, better error-handling, and various minor fixes; results unchanged. (#1543) --- egs/lre07/v1/lid/init_full_ubm_from_dnn.sh | 8 ++ egs/lre07/v1/lid/train_diag_ubm.sh | 5 +- egs/lre07/v1/lid/train_ivector_extractor.sh | 26 +++-- .../v1/lid/train_ivector_extractor_dnn.sh | 33 +++--- egs/sre08/v1/sid/extract_ivectors_dnn.sh | 43 ++++++-- egs/sre08/v1/sid/init_full_ubm_from_dnn.sh | 78 ++++++++++---- .../dnn => sre08/v1/sid/nnet2}/get_egs2.sh | 0 .../dnn => sre08/v1/sid/nnet2}/get_lda.sh | 6 +- .../v1/sid/nnet2}/train_multisplice_accel2.sh | 6 +- egs/sre08/v1/sid/train_diag_ubm.sh | 5 +- egs/sre08/v1/sid/train_ivector_extractor.sh | 28 +++-- .../v1/sid/train_ivector_extractor_dnn.sh | 73 ++++++++----- .../v1/local/dnn/run_nnet2_multisplice.sh | 14 +-- egs/sre10/v1/local/dnn/train_dnn.sh | 3 +- egs/sre10/v1/local/plda_scoring.sh | 17 +-- egs/sre10/v1/run.sh | 31 +++--- egs/sre10/v2/run.sh | 100 +++++++++--------- src/fgmmbin/fgmm-global-init-from-accs.cc | 50 +++++---- src/gmm/full-gmm.cc | 4 +- src/ivectorbin/ivector-adapt-plda.cc | 16 +-- .../ivector-compute-dot-products.cc | 12 +-- src/ivectorbin/ivector-compute-lda.cc | 34 +++--- src/ivectorbin/ivector-compute-plda.cc | 20 ++-- src/ivectorbin/ivector-copy-plda.cc | 8 +- src/ivectorbin/ivector-extract-online.cc | 20 ++-- src/ivectorbin/ivector-extract.cc | 36 +++---- src/ivectorbin/ivector-extractor-acc-stats.cc | 20 ++-- src/ivectorbin/ivector-extractor-init.cc | 2 +- src/ivectorbin/ivector-extractor-sum-accs.cc | 8 +- src/ivectorbin/ivector-mean.cc | 14 +-- .../ivector-subtract-global-mean.cc | 22 ++-- src/ivectorbin/ivector-transform.cc | 14 +-- 32 files changed, 432 insertions(+), 324 deletions(-) rename egs/{sre10/v1/local/dnn => sre08/v1/sid/nnet2}/get_egs2.sh (100%) rename egs/{sre10/v1/local/dnn => sre08/v1/sid/nnet2}/get_lda.sh (99%) rename egs/{sre10/v1/local/dnn => sre08/v1/sid/nnet2}/train_multisplice_accel2.sh (99%) diff --git a/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh b/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh index 972348766b5..aeced4fb273 100755 --- a/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh +++ b/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh @@ -12,6 +12,7 @@ nj=40 cmd="run.pl" stage=-2 +cleanup=true # End configuration section. @@ -77,4 +78,11 @@ $cmd $dir/log/init.log \ "fgmm-global-sum-accs - $dir/stats.*.acc |" $num_components \ $dir/final.ubm || exit 1; +if $cleanup; then + echo "$0: removing stats" + for g in $(seq $nj); do + rm $dir/stats.$g.acc || exit 1 + done +fi + exit 0; diff --git a/egs/lre07/v1/lid/train_diag_ubm.sh b/egs/lre07/v1/lid/train_diag_ubm.sh index 8ba703073c0..a5e256818ce 100755 --- a/egs/lre07/v1/lid/train_diag_ubm.sh +++ b/egs/lre07/v1/lid/train_diag_ubm.sh @@ -129,10 +129,11 @@ for x in `seq 0 $[$num_iters-1]`; do $cmd $dir/log/update.$x.log \ gmm-global-est $opt --min-gaussian-weight=$min_gaussian_weight $dir/$x.dubm "gmm-global-sum-accs - $dir/$x.*.acc|" \ $dir/$[$x+1].dubm || exit 1; - rm $dir/$x.*.acc $dir/$x.dubm + $cleanup && rm $dir/$x.*.acc $dir/$x.dubm fi done -rm $dir/gselect.*.gz +$cleanup && rm $dir/gselect.*.gz + mv $dir/$num_iters.dubm $dir/final.dubm || exit 1; exit 0; diff --git a/egs/lre07/v1/lid/train_ivector_extractor.sh b/egs/lre07/v1/lid/train_ivector_extractor.sh index 18f536a60cb..55bd54bb275 100755 --- a/egs/lre07/v1/lid/train_ivector_extractor.sh +++ b/egs/lre07/v1/lid/train_ivector_extractor.sh @@ -135,27 +135,25 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; echo "Updating model (pass $x)" nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd --num-threads $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + $cmd --num-threads $nt $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done +$cleanup && rm $dir/post.*.gz rm $dir/final.ie 2>/dev/null ln -s $x.ie $dir/final.ie diff --git a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh index 8902b730e09..573258e7b88 100755 --- a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh +++ b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh @@ -153,24 +153,21 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; - echo "Updating model (pass $x)" - nt=$[$num_threads*$num_processes] # use the same number of threads that - # each accumulation process uses, since we - # can be sure the queue will support this many. - $cmd --num-threads $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + echo "Updating model (pass $x)" + nt=$[$num_threads*$num_processes] # use the same number of threads that + # each accumulation process uses, since we + # can be sure the queue will support this many. + $cmd --num-threads $nt $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done diff --git a/egs/sre08/v1/sid/extract_ivectors_dnn.sh b/egs/sre08/v1/sid/extract_ivectors_dnn.sh index 8692e6ee8a5..2687d1fc6c8 100755 --- a/egs/sre08/v1/sid/extract_ivectors_dnn.sh +++ b/egs/sre08/v1/sid/extract_ivectors_dnn.sh @@ -1,7 +1,7 @@ #!/bin/bash # Copyright 2013 Daniel Povey -# 2014-2015 David Snyder +# 2014-2017 David Snyder # 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) # 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. @@ -16,6 +16,9 @@ stage=0 min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out) posterior_scale=1.0 # This scale helps to control for successive features being highly # correlated. E.g. try 0.1 or 0.3. +use_gpu=true +chunk_size=256 +nnet_job_opt= # End configuration section. echo "$0 $@" # Print the command line for logging @@ -37,6 +40,8 @@ if [ $# != 5 ]; then echo " --num-gselect # Number of Gaussians to select using" echo " # diagonal model." echo " --min-post # Pruning threshold for posteriors" + echo " --nnet-job-opt # Options for the DNN jobs which add to or" + echo " # replace those specified by --cmd" exit 1; fi @@ -46,6 +51,21 @@ data=$3 data_dnn=$4 dir=$5 +gpu_opt="" +if $use_gpu; then + nnet_job_opt="$nnet_job_opt --gpu 1" + gpu_opt="--use-gpu=yes" + if ! cuda-compiled; then + echo "$0: WARNING: you are trying to use the GPU but you have not compiled" + echo " for CUDA. If you have GPUs and have nvcc installed, go to src/" + echo " and do ./configure; make" + exit 1 + fi +else + echo "$0: without using a GPU this will be slow." + gpu_opt="--use-gpu=no" +fi + for f in $srcdir/final.ie $srcdir/final.ubm $data/feats.scp ; do [ ! -f $f ] && echo "No such file $f" && exit 1; done @@ -60,8 +80,6 @@ utils/split_data.sh $data_dnn $nj || exit 1; delta_opts=`cat $srcdir/delta_opts 2>/dev/null` -splice_opts=`cat exp/nnet//splice_opts 2>/dev/null` # frame-splicing options - ## Set up features. feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -69,13 +87,18 @@ nnet_feats="ark,s,cs:apply-cmvn-sliding --center=true scp:$sdata_dnn/JOB/feats.s if [ $stage -le 0 ]; then echo "$0: extracting iVectors" - $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \ - nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \ - \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \ - \| logprob-to-post --min-post=$min_post ark:- ark:- \| \ - scale-post ark:- $posterior_scale ark:- \| \ - ivector-extract --verbose=2 $srcdir/final.ie "$feats" ark,s,cs:- \ - ark,scp,t:$dir/ivector.JOB.ark,$dir/ivector.JOB.scp || exit 1; + for g in $(seq $nj); do + $cmd $nnet_job_opt $dir/log/extract_ivectors.$g.log \ + nnet-am-compute $gpu_opt --apply-log=true --chunk-size=${chunk_size} \ + $nnet "`echo $nnet_feats | sed s/JOB/$g/g`" ark:- \ + \| select-voiced-frames ark:- scp,s,cs:$sdata/$g/vad.scp ark:- \ + \| logprob-to-post --min-post=$min_post ark:- ark:- \| \ + scale-post ark:- $posterior_scale ark:- \| \ + ivector-extract --verbose=2 $srcdir/final.ie \ + "`echo $feats | sed s/JOB/$g/g`" ark,s,cs:- \ + ark,scp,t:$dir/ivector.$g.ark,$dir/ivector.$g.scp || exit 1 & + done + wait fi if [ $stage -le 1 ]; then diff --git a/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh b/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh index f6710028ae5..c6b508a7206 100755 --- a/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh +++ b/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh @@ -1,18 +1,23 @@ #!/bin/bash -# Copyright 2015 David Snyder -# 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) -# 2015 Johns Hopkins University (Author: Daniel Povey) +# Copyright 2015-2017 David Snyder +# 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) +# 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # This script derives a full-covariance UBM from DNN posteriors and # speaker recognition features. # Begin configuration section. -nj=40 +nj=8 cmd="run.pl" stage=-2 delta_window=3 delta_order=2 +use_gpu=true +nnet_job_opt= +cleanup=true +chunk_size=256 +stage=0 # End configuration section. echo "$0 $@" # Print the command line for logging @@ -30,15 +35,34 @@ if [ $# != 4 ]; then echo " --nj # number of parallel training jobs" echo " --delta-window # delta window size" echo " --delta-order # delta order" - echo " # to be equal to the size of the DNN output layer." + echo " --use-gpu # Use GPU to extract DNN posteriors" + echo " --chunk-size # Number of frames processed at a time by the DNN" + echo " --nnet-job-opt # Options for the DNN jobs which add to or" + echo " # replace those specified by --cmd" exit 1; fi -data=$1 -data_dnn=$2 +data=$1 # Features for the GMM +data_dnn=$2 # Features for the DNN nnet=$3 dir=$4 +gpu_opt="" +nnet_job_opt="" +if $use_gpu; then + nnet_job_opt="$nnet_job_opt --gpu 1" + gpu_opt="--use-gpu=yes" + if ! cuda-compiled; then + echo "$0: WARNING: you are trying to use the GPU but you have not compiled" + echo " for CUDA. If you have GPUs and have nvcc installed, go to src/" + echo " and do ./configure; make" + exit 1 + fi +else + echo "$0: without using a GPU this will be slow." + gpu_opt="--use-gpu=no" +fi + for f in $data/feats.scp $data/vad.scp ${data_dnn}/feats.scp \ ${data_dnn}/vad.scp $nnet; do @@ -69,16 +93,34 @@ select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" # in the ancillary GMM. num_components=`grep -oP 'output-dim\ \K[0-9]+' <(nnet-am-info $nnet 2> /dev/null)` -$cmd JOB=1:$nj $logdir/make_stats.JOB.log \ - nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \ - \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \ - \| logprob-to-post ark:- ark:- \| \ - fgmm-global-acc-stats-post ark:- $num_components "$feats" \ - $dir/stats.JOB.acc || exit 1; +if [ $stage -le 0 ]; then + echo "$0: accumulating stats from DNN posteriors and speaker ID features" + for g in $(seq $nj); do + $cmd $nnet_job_opt $dir/log/make_stats.$g.log \ + nnet-am-compute $gpu_opt \ + --chunk-size=${chunk_size} --apply-log=true $nnet \ + "`echo $nnet_feats | sed s/JOB/$g/g`" \ + ark:- \ + \| select-voiced-frames ark:- scp,s,cs:$sdata/$g/vad.scp ark:- \ + \| logprob-to-post ark:- ark:- \| \ + fgmm-global-acc-stats-post ark:- $num_components \ + "`echo $feats | sed s/JOB/$g/g`" \ + $dir/stats.$g.acc || exit 1 & + done + wait +fi -$cmd $dir/log/init.log \ - fgmm-global-init-from-accs --verbose=2 \ - "fgmm-global-sum-accs - $dir/stats.*.acc |" $num_components \ - $dir/final.ubm || exit 1; +if [ $stage -le 1 ]; then + echo "$0: initializing GMM from stats" + $cmd $dir/log/init.log \ + fgmm-global-init-from-accs --verbose=2 \ + "fgmm-global-sum-accs - $dir/stats.*.acc |" $num_components \ + $dir/final.ubm || exit 1; +fi -exit 0; +if $cleanup; then + echo "$0: removing stats" + for g in $(seq $nj); do + rm $dir/stats.$g.acc || exit 1 + done +fi diff --git a/egs/sre10/v1/local/dnn/get_egs2.sh b/egs/sre08/v1/sid/nnet2/get_egs2.sh similarity index 100% rename from egs/sre10/v1/local/dnn/get_egs2.sh rename to egs/sre08/v1/sid/nnet2/get_egs2.sh diff --git a/egs/sre10/v1/local/dnn/get_lda.sh b/egs/sre08/v1/sid/nnet2/get_lda.sh similarity index 99% rename from egs/sre10/v1/local/dnn/get_lda.sh rename to egs/sre08/v1/sid/nnet2/get_lda.sh index 253222ff271..89594a20f84 100755 --- a/egs/sre10/v1/local/dnn/get_lda.sh +++ b/egs/sre08/v1/sid/nnet2/get_lda.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). # 2015 David Snyder # Apache 2.0. # @@ -108,7 +108,7 @@ N=$[$num_feats/$nj] case $feat_type in raw) feats="ark,s,cs:utils/subset_scp.pl --quiet $N $sdata/JOB/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` cp $alidir/{splice_opts,final.mat} $dir || exit 1; feats="ark,s,cs:utils/subset_scp.pl --quiet $N $sdata/JOB/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" @@ -144,7 +144,7 @@ fi echo $ivector_dim >$dir/ivector_dim if [ -z "$lda_dim" ]; then - spliced_feats_one="$(echo "$spliced_feats" | sed s:JOB:1:g)" + spliced_feats_one="$(echo "$spliced_feats" | sed s:JOB:1:g)" lda_dim=$(feat-to-dim "$spliced_feats_one" -) || exit 1; fi diff --git a/egs/sre10/v1/local/dnn/train_multisplice_accel2.sh b/egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh similarity index 99% rename from egs/sre10/v1/local/dnn/train_multisplice_accel2.sh rename to egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh index cee1b620c6a..c56e89b5d94 100755 --- a/egs/sre10/v1/local/dnn/train_multisplice_accel2.sh +++ b/egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh @@ -8,7 +8,7 @@ # Apache 2.0. # This is a modified version of train_multisplice_accel2.sh in -# steps/nnet2/ for speaker recognition. The main difference is +# ../../steps/nnet2/ for speaker recognition. The main difference is # that it uses different get_lda.sh and get_egs2.sh scripts. # # The original train_multisplice_accel2.sh was a modified version of @@ -201,7 +201,7 @@ extra_opts+=(--transform-dir $transform_dir) if [ $stage -le -4 ]; then echo "$0: calling get_lda.sh" - local/dnn/get_lda.sh $lda_opts "${extra_opts[@]}" --left-context $first_left_context --right-context $first_right_context --cmd "$cmd" $data $lang $alidir $dir || exit 1; + sid/nnet2/get_lda.sh $lda_opts "${extra_opts[@]}" --left-context $first_left_context --right-context $first_right_context --cmd "$cmd" $data $lang $alidir $dir || exit 1; fi # these files will have been written by get_lda.sh feat_dim=$(cat $dir/feat_dim) || exit 1; @@ -213,7 +213,7 @@ if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then extra_opts+=(--left-context $nnet_left_context ) extra_opts+=(--right-context $nnet_right_context ) echo "$0: calling get_egs2.sh" - local/dnn/get_egs2.sh $egs_opts "${extra_opts[@]}" \ + sid/nnet2/get_egs2.sh $egs_opts "${extra_opts[@]}" \ --samples-per-iter $samples_per_iter --stage $get_egs_stage \ --io-opts "$io_opts" \ --cmd "$cmd" $egs_opts \ diff --git a/egs/sre08/v1/sid/train_diag_ubm.sh b/egs/sre08/v1/sid/train_diag_ubm.sh index 69e2fca5538..1e79fc10c99 100755 --- a/egs/sre08/v1/sid/train_diag_ubm.sh +++ b/egs/sre08/v1/sid/train_diag_ubm.sh @@ -135,10 +135,11 @@ for x in `seq 0 $[$num_iters-1]`; do $cmd $dir/log/update.$x.log \ gmm-global-est $opt --min-gaussian-weight=$min_gaussian_weight $dir/$x.dubm "gmm-global-sum-accs - $dir/$x.*.acc|" \ $dir/$[$x+1].dubm || exit 1; - rm $dir/$x.*.acc $dir/$x.dubm + $cleanup && rm $dir/$x.*.acc $dir/$x.dubm fi done -rm $dir/gselect.*.gz +$cleanup && rm $dir/gselect.*.gz + mv $dir/$num_iters.dubm $dir/final.dubm || exit 1; exit 0; diff --git a/egs/sre08/v1/sid/train_ivector_extractor.sh b/egs/sre08/v1/sid/train_ivector_extractor.sh index fd9ff5185d5..68ba0ca65fd 100755 --- a/egs/sre08/v1/sid/train_ivector_extractor.sh +++ b/egs/sre08/v1/sid/train_ivector_extractor.sh @@ -140,26 +140,24 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; echo "Updating model (pass $x)" nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd --num-threads $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + $cmd $parallel_opts $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done - +$cleanup && rm -f $dir/post.*.gz +rm -f $dir/final.ie ln -s $x.ie $dir/final.ie diff --git a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh index 9904a8cd3c6..c64b83c5a4b 100755 --- a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh +++ b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh @@ -1,7 +1,7 @@ #!/bin/bash # Copyright 2013 Daniel Povey -# 2014-2015 David Snyder +# 2014-2017 David Snyder # 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) # 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. @@ -28,12 +28,12 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one +nj=5 # this is the number of separate queue jobs we run, but each one # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 -num_processes=4 # each job runs this many processes, each with --num-threads threads +num_processes=2 # each job runs this many processes, each with --num-threads threads cmd="run.pl" stage=-4 num_gselect=20 # Gaussian-selection using diagonal model: number of Gaussians to select @@ -46,6 +46,9 @@ cleanup=true posterior_scale=1.0 # This scale helps to control for successve features being highly # correlated. E.g. try 0.1 or 0.3 sum_accs_opt= +use_gpu=true +chunk_size=256 +nnet_job_opt= # End configuration section. echo "$0 $@" # Print the command line for logging @@ -71,6 +74,9 @@ if [ $# != 5 ]; then echo " # diagonal model." echo " --sum-accs-opt # Option e.g. '-l hostname=a15' to localize" echo " # sum-accs process to nfs server." + echo " --nnet-job-opt # Options for the DNN jobs which add to or" + echo " # replace those specified by --cmd" + echo " --chunk-size # Number of frames processed at a time by the DNN" exit 1; fi @@ -80,6 +86,21 @@ data=$3 data_dnn=$4 dir=$5 +gpu_opt="" +if $use_gpu; then + nnet_job_opt="$nnet_job_opt --gpu 1" + gpu_opt="--use-gpu=yes" + if ! cuda-compiled; then + echo "$0: WARNING: you are trying to use the GPU but you have not compiled" + echo " for CUDA. If you have GPUs and have nvcc installed, go to src/" + echo " and do ./configure; make" + exit 1 + fi +else + echo "$0: without using a GPU this will be slow." + gpu_opt="--use-gpu=no" +fi + srcdir=$(dirname $fgmm_model) for f in $fgmm_model $data/feats.scp ; do @@ -100,8 +121,6 @@ if [ -f $srcdir/delta_opts ]; then cp $srcdir/delta_opts $dir/ 2>/dev/null fi -splice_opts=`cat exp/nnet//splice_opts 2>/dev/null` # frame-splicing options - parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -124,12 +143,17 @@ fi if [ $stage -le -1 ]; then echo $nj_full > $dir/num_jobs echo "$0: doing DNN posterior computation" - $cmd JOB=1:$nj_full $dir/log/post.JOB.log \ - nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \ - \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \ - \| logprob-to-post --min-post=$min_post ark,s,cs:- ark:- \| \ - scale-post ark:- $posterior_scale "ark:|gzip -c >$dir/post.JOB.gz" || exit 1; - + for g in $(seq $nj_full); do + $cmd $nnet_job_opt $dir/log/post.$g.log \ + nnet-am-compute $gpu_opt \ + --chunk-size=${chunk_size} --apply-log=true $nnet \ + "`echo $nnet_feats | sed s/JOB/$g/g`" \ + ark:- \ + \| select-voiced-frames ark:- scp,s,cs:$sdata/$g/vad.scp ark:- \ + \| logprob-to-post ark:- ark:- \ + \| scale-post ark:- $posterior_scale "ark:|gzip -c >$dir/post.$g.gz" || exit 1 & + done + wait else if ! [ $nj_full -eq $(cat $dir/num_jobs) ]; then echo "Num-jobs mismatch $nj_full versus $(cat $dir/num_jobs)" @@ -156,26 +180,25 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; echo "Updating model (pass $x)" nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd --num-threads $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + $cmd $parallel_opts $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done +$cleanup && rm -f $dir/post.*.gz +rm -f $dir/final.ie ln -s $x.ie $dir/final.ie diff --git a/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh b/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh index 5a1a67e9eb8..97b9789af0c 100755 --- a/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh +++ b/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh @@ -4,17 +4,14 @@ # egs/fisher_english/s5/local/online. It has been modified # for speaker recognition. -. cmd.sh - - stage=1 train_stage=-10 use_gpu=true set -e -. cmd.sh +. ./cmd.sh . ./path.sh -. ./utils/parse_options.sh +. utils/parse_options.sh # assume use_gpu=true since it would be way too slow otherwise. @@ -31,14 +28,13 @@ minibatch_size=512 dir=exp/nnet2_online/nnet_ms_a mkdir -p exp/nnet2_online - # Stages 1 through 5 are done in run_nnet2_common.sh, # so it can be shared with other similar scripts. local/dnn/run_nnet2_common.sh --stage $stage if [ $stage -le 6 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then - utils/create_split_dir.pl /export/b0{6,7,8,9}/$(USER)/kaldi-dsata/egs/fisher_english/s5/$dir/egs/storage $dir/egs/storage + utils/create_split_dir.pl /export/b0{6,7,8,9}/$USER/kaldi-data/egs/fisher_english/s5/$dir/egs/storage $dir/egs/storage fi # Because we have a lot of data here and we don't want the training to take @@ -48,12 +44,12 @@ if [ $stage -le 6 ]; then # data across four filesystems for speed. - local/dnn/train_multisplice_accel2.sh --stage $train_stage \ + sid/nnet2/train_multisplice_accel2.sh --stage $train_stage \ --feat-type raw \ --splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer3/-3:3 layer4/-7:2" \ --num-epochs 6 \ --num-hidden-layers 6 \ - --num-jobs-initial 3 --num-jobs-final 18 \ + --num-jobs-initial 3 --num-jobs-final 8 \ --num-threads "$num_threads" \ --minibatch-size "$minibatch_size" \ --parallel-opts "$parallel_opts" \ diff --git a/egs/sre10/v1/local/dnn/train_dnn.sh b/egs/sre10/v1/local/dnn/train_dnn.sh index d9330e58b69..33aed9abdc7 100755 --- a/egs/sre10/v1/local/dnn/train_dnn.sh +++ b/egs/sre10/v1/local/dnn/train_dnn.sh @@ -10,7 +10,6 @@ . path.sh mfccdir=`pwd`/mfcc set -e - # the next command produces the data in local/train_all_asr local/dnn/fisher_data_prep.sh /export/corpora3/LDC/LDC2004T19 /export/corpora3/LDC/LDC2005T19 \ /export/corpora3/LDC/LDC2004S13 /export/corpora3/LDC/LDC2005S13 @@ -169,5 +168,5 @@ steps/train_sat.sh --cmd "$train_cmd" \ # this will help find issues with the lexicon. # steps/cleanup/debug_lexicon.sh --nj 300 --cmd "$train_cmd" data/train_asr_100k data/lang exp/tri5a data/local/dict/lexicon.txt exp/debug_lexicon_100k -## The following is based on the best current neural net recipe. +# The following is based on an older nnet2 recipe. local/dnn/run_nnet2_multisplice.sh diff --git a/egs/sre10/v1/local/plda_scoring.sh b/egs/sre10/v1/local/plda_scoring.sh index ef17edb4e05..63d4a4f0d4c 100755 --- a/egs/sre10/v1/local/plda_scoring.sh +++ b/egs/sre10/v1/local/plda_scoring.sh @@ -5,6 +5,10 @@ # This script trains PLDA models and does scoring. use_existing_models=false +simple_length_norm=false # If true, replace the default length normalization + # performed in PLDA by an alternative that + # normalizes the length of the iVectors to be equal + # to the square root of the iVector dimension. echo "$0 $@" # Print the command line for logging @@ -38,9 +42,10 @@ fi mkdir -p $scores_dir/log run.pl $scores_dir/log/plda_scoring.log \ - ivector-plda-scoring --normalize-length=true \ - --num-utts=ark:${enroll_ivec_dir}/num_utts.ark \ - "ivector-copy-plda --smoothing=0.0 ${plda_ivec_dir}/plda - |" \ - "ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec scp:${enroll_ivec_dir}/spk_ivector.scp ark:- | ivector-normalize-length ark:- ark:- |" \ - "ark:ivector-normalize-length scp:${test_ivec_dir}/ivector.scp ark:- | ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ - "cat '$trials' | cut -d\ --fields=1,2 |" $scores_dir/plda_scores || exit 1; + ivector-plda-scoring --normalize-length=true \ + --simple-length-normalization=$simple_length_norm \ + --num-utts=ark:${enroll_ivec_dir}/num_utts.ark \ + "ivector-copy-plda --smoothing=0.0 ${plda_ivec_dir}/plda - |" \ + "ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec scp:${enroll_ivec_dir}/spk_ivector.scp ark:- | ivector-normalize-length ark:- ark:- |" \ + "ark:ivector-normalize-length scp:${test_ivec_dir}/ivector.scp ark:- | ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ + "cat '$trials' | cut -d\ --fields=1,2 |" $scores_dir/plda_scores || exit 1; diff --git a/egs/sre10/v1/run.sh b/egs/sre10/v1/run.sh index c7211968a28..4c5049a73bc 100755 --- a/egs/sre10/v1/run.sh +++ b/egs/sre10/v1/run.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2015-2016 David Snyder +# Copyright 2015-2017 David Snyder # 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) # 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. @@ -86,15 +86,15 @@ sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 35G" \ exp/extractor # Extract i-vectors. -sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 40 \ exp/extractor data/sre10_train \ exp/ivectors_sre10_train -sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 40 \ exp/extractor data/sre10_test \ exp/ivectors_sre10_test -sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 40 \ exp/extractor data/sre \ exp/ivectors_sre @@ -108,27 +108,28 @@ local/scoring_common.sh data/sre data/sre10_train data/sre10_test \ # best, so we don't focus on the scores obtained here. # # local/cosine_scoring.sh data/sre10_train data/sre10_test \ -# exp/ivectors_sre10_train exp/ivectors_sre10_test $trials local/scores_gmm_2048_ind_pooled +# exp/ivectors_sre10_train exp/ivectors_sre10_test $trials exp/scores_gmm_2048_ind_pooled # local/lda_scoring.sh data/sre data/sre10_train data/sre10_test \ -# exp/ivectors_sre exp/ivectors_sre10_train exp/ivectors_sre10_test $trials local/scores_gmm_2048_ind_pooled +# exp/ivectors_sre exp/ivectors_sre10_train exp/ivectors_sre10_test $trials exp/scores_gmm_2048_ind_pooled # Create a gender independent PLDA model and do scoring. local/plda_scoring.sh data/sre data/sre10_train data/sre10_test \ - exp/ivectors_sre exp/ivectors_sre10_train exp/ivectors_sre10_test $trials local/scores_gmm_2048_ind_pooled + exp/ivectors_sre exp/ivectors_sre10_train exp/ivectors_sre10_test $trials exp/scores_gmm_2048_ind_pooled local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_female data/sre10_test_female \ - exp/ivectors_sre exp/ivectors_sre10_train_female exp/ivectors_sre10_test_female $trials_female local/scores_gmm_2048_ind_female + exp/ivectors_sre exp/ivectors_sre10_train_female exp/ivectors_sre10_test_female $trials_female exp/scores_gmm_2048_ind_female local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_male data/sre10_test_male \ - exp/ivectors_sre exp/ivectors_sre10_train_male exp/ivectors_sre10_test_male $trials_male local/scores_gmm_2048_ind_male + exp/ivectors_sre exp/ivectors_sre10_train_male exp/ivectors_sre10_test_male $trials_male exp/scores_gmm_2048_ind_male # Create gender dependent PLDA models and do scoring. local/plda_scoring.sh data/sre_female data/sre10_train_female data/sre10_test_female \ - exp/ivectors_sre exp/ivectors_sre10_train_female exp/ivectors_sre10_test_female $trials_female local/scores_gmm_2048_dep_female + exp/ivectors_sre exp/ivectors_sre10_train_female exp/ivectors_sre10_test_female $trials_female exp/scores_gmm_2048_dep_female local/plda_scoring.sh data/sre_male data/sre10_train_male data/sre10_test_male \ - exp/ivectors_sre exp/ivectors_sre10_train_male exp/ivectors_sre10_test_male $trials_male local/scores_gmm_2048_dep_male + exp/ivectors_sre exp/ivectors_sre10_train_male exp/ivectors_sre10_test_male $trials_male exp/scores_gmm_2048_dep_male -mkdir -p local/scores_gmm_2048_dep_pooled -cat local/scores_gmm_2048_dep_male/plda_scores local/scores_gmm_2048_dep_female/plda_scores \ - > local/scores_gmm_2048_dep_pooled/plda_scores +# Pool the gender dependent results. +mkdir -p exp/scores_gmm_2048_dep_pooled +cat exp/scores_gmm_2048_dep_male/plda_scores exp/scores_gmm_2048_dep_female/plda_scores \ + > exp/scores_gmm_2048_dep_pooled/plda_scores # GMM-2048 PLDA EER # ind pooled: 2.26 @@ -140,7 +141,7 @@ cat local/scores_gmm_2048_dep_male/plda_scores local/scores_gmm_2048_dep_female/ echo "GMM-$num_components EER" for x in ind dep; do for y in female male pooled; do - eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_gmm_${num_components}_${x}_${y}/plda_scores) 2> /dev/null` + eer=`compute-eer <(python local/prepare_for_eer.py $trials exp/scores_gmm_${num_components}_${x}_${y}/plda_scores) 2> /dev/null` echo "${x} ${y}: $eer" done done diff --git a/egs/sre10/v2/run.sh b/egs/sre10/v2/run.sh index 37afc0306c0..b6c24fc1371 100755 --- a/egs/sre10/v2/run.sh +++ b/egs/sre10/v2/run.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2015-2016 David Snyder +# Copyright 2015-2017 David Snyder # 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) # 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. @@ -105,62 +105,61 @@ utils/fix_data_dir.sh data/train_32k # Initialize a full GMM from the DNN posteriors and speaker recognition # features. This can be used both alone, as a UBM, or to initialize the # i-vector extractor in a DNN-based system. -sid/init_full_ubm_from_dnn.sh --cmd "$train_cmd --mem 6G" \ +sid/init_full_ubm_from_dnn.sh --cmd "$train_cmd --mem 15G" \ data/train_32k \ data/train_dnn_32k $nnet exp/full_ubm # Train an i-vector extractor based on just the supervised-GMM. sid/train_ivector_extractor.sh \ - --cmd "$train_cmd --mem 70G" \ + --cmd "$train_cmd --mem 120G" \ --ivector-dim 600 \ --num-iters 5 exp/full_ubm/final.ubm data/train \ exp/extractor_sup_gmm # Train an i-vector extractor based on the DNN-UBM. sid/train_ivector_extractor_dnn.sh \ - --cmd "$train_cmd --mem 80G" \ - --min-post 0.015 \ - --ivector-dim 600 \ - --num-iters 5 exp/full_ubm/final.ubm $nnet \ + --cmd "$train_cmd --mem 100G" --nnet-job-opt "--mem 4G" \ + --min-post 0.015 --ivector-dim 600 --num-iters 5 \ + exp/full_ubm/final.ubm $nnet \ data/train \ data/train_dnn \ exp/extractor_dnn # Extract i-vectors from the extractor with the sup-GMM UBM. sid/extract_ivectors.sh \ - --cmd "$train_cmd --mem 8G" --nj 40 \ + --cmd "$train_cmd --mem 12G" --nj 40 \ exp/extractor_sup_gmm data/sre10_train \ exp/ivectors_sre10_train_sup_gmm sid/extract_ivectors.sh \ - --cmd "$train_cmd --mem 8G" --nj 40 \ + --cmd "$train_cmd --mem 12G" --nj 40 \ exp/extractor_sup_gmm data/sre10_test \ exp/ivectors_sre10_test_sup_gmm sid/extract_ivectors.sh \ - --cmd "$train_cmd --mem 8G" --nj 40 \ + --cmd "$train_cmd --mem 12G" --nj 40 \ exp/extractor_sup_gmm data/sre \ exp/ivectors_sre_sup_gmm # Extract i-vectors using the extractor with the DNN-UBM. sid/extract_ivectors_dnn.sh \ - --cmd "$train_cmd --mem 10G" --nj 40 \ + --cmd "$train_cmd --mem 15G" --nj 10 \ exp/extractor_dnn \ $nnet \ data/sre10_test \ data/sre10_test_dnn \ exp/ivectors10_test_dnn -sid/extract_ivectors_dnn.sh - --cmd "$train_cmd --mem 10G" --nj 40 \ +sid/extract_ivectors_dnn.sh \ + --cmd "$train_cmd --mem 15G" --nj 10 \ exp/extractor_dnn \ $nnet \ data/sre10_train \ data/sre10_train_dnn \ exp/ivectors10_train_dnn -sid/extract_ivectors_dnn.sh - --cmd "$train_cmd --mem 10G" --nj 40 \ +sid/extract_ivectors_dnn.sh \ + --cmd "$train_cmd --mem 15G" --nj 10 \ exp/extractor_dnn \ $nnet \ data/sre \ @@ -183,87 +182,90 @@ local/scoring_common.sh data/sre data/sre10_train data/sre10_test \ # # local/cosine_scoring.sh data/sre10_train data/sre10_test \ # exp/ivectors_sre10_train exp/ivectors_sre10_test $trials \ -# local/scores_gmm_2048_ind_pooled +# exp/scores_gmm_2048_ind_pooled # local/lda_scoring.sh data/sre data/sre10_train data/sre10_test \ # exp/ivectors_sre exp/ivectors_sre10_train exp/ivectors_sre10_test \ -# $trials local/scores_gmm_2048_ind_pooled +# $trials exp/scores_gmm_2048_ind_pooled # Create a gender independent PLDA model and do scoring with the sup-GMM system. local/plda_scoring.sh data/sre data/sre10_train data/sre10_test \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm \ - exp/ivectors_sre10_test_sup_gmm $trials local/scores_sup_gmm_ind_pooled + exp/ivectors_sre10_test_sup_gmm $trials exp/scores_sup_gmm_ind_pooled local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_female \ - exp/ivectors_sre10_test_sup_gmm_female $trials_female local/scores_sup_gmm_ind_female + exp/ivectors_sre10_test_sup_gmm_female $trials_female exp/scores_sup_gmm_ind_female local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_male \ - exp/ivectors_sre10_test_sup_gmm_male $trials_male local/scores_sup_gmm_ind_male + exp/ivectors_sre10_test_sup_gmm_male $trials_male exp/scores_sup_gmm_ind_male # Create gender dependent PLDA models and do scoring with the sup-GMM system. local/plda_scoring.sh data/sre_female data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_female \ - exp/ivectors_sre10_test_sup_gmm_female $trials_female local/scores_sup_gmm_dep_female + exp/ivectors_sre10_test_sup_gmm_female $trials_female exp/scores_sup_gmm_dep_female local/plda_scoring.sh data/sre_male data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_male \ - exp/ivectors_sre10_test_sup_gmm_male $trials_male local/scores_sup_gmm_dep_male -mkdir -p local/scores_sup_gmm_dep_pooled -cat local/scores_sup_gmm_dep_male/plda_scores local/scores_sup_gmm_dep_female/plda_scores \ - > local/scores_sup_gmm_dep_pooled/plda_scores + exp/ivectors_sre10_test_sup_gmm_male $trials_male exp/scores_sup_gmm_dep_male + +# Pool the gender dependent results +mkdir -p exp/scores_sup_gmm_dep_pooled +cat exp/scores_sup_gmm_dep_male/plda_scores exp/scores_sup_gmm_dep_female/plda_scores \ + > exp/scores_sup_gmm_dep_pooled/plda_scores # Create a gender independent PLDA model and do scoring with the DNN system. local/plda_scoring.sh data/sre data/sre10_train data/sre10_test \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn \ - exp/ivectors_sre10_test_dnn $trials local/scores_dnn_ind_pooled + exp/ivectors_sre10_test_dnn $trials exp/scores_dnn_ind_pooled local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_female \ - exp/ivectors_sre10_test_dnn_female $trials_female local/scores_dnn_ind_female + exp/ivectors_sre10_test_dnn_female $trials_female exp/scores_dnn_ind_female local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_male \ - exp/ivectors_sre10_test_dnn_male $trials_male local/scores_dnn_ind_male + exp/ivectors_sre10_test_dnn_male $trials_male exp/scores_dnn_ind_male # Create gender dependent PLDA models and do scoring with the DNN system. local/plda_scoring.sh data/sre_female data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_female \ - exp/ivectors_sre10_test_dnn_female $trials_female local/scores_dnn_dep_female + exp/ivectors_sre10_test_dnn_female $trials_female exp/scores_dnn_dep_female local/plda_scoring.sh data/sre_male data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_male \ - exp/ivectors_sre10_test_dnn_male $trials_male local/scores_dnn_dep_male -mkdir -p local/scores_dnn_dep_pooled -cat local/scores_dnn_dep_male/plda_scores local/scores_dnn_dep_female/plda_scores \ - > local/scores_dnn_dep_pooled/plda_scores + exp/ivectors_sre10_test_dnn_male $trials_male exp/scores_dnn_dep_male + +mkdir -p exp/scores_dnn_dep_pooled +cat exp/scores_dnn_dep_male/plda_scores exp/scores_dnn_dep_female/plda_scores \ + > exp/scores_dnn_dep_pooled/plda_scores # Sup-GMM PLDA EER # ind pooled: 1.72 # ind female: 1.81 -# ind male: 1.56 -# dep female: 1.89 -# dep male: 1.39 -# dep pooled: 1.65 -echo "Sup-GMM-$num_components EER" +# ind male: 1.70 +# dep female: 2.03 +# dep male: 1.50 +# dep pooled: 1.79 +echo "Sup-GMM EER" for x in ind dep; do for y in female male pooled; do - eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_sup_gmm_${x}_${y}/plda_scores) 2> /dev/null` + eer=`compute-eer <(python local/prepare_for_eer.py $trials exp/scores_sup_gmm_${x}_${y}/plda_scores) 2> /dev/null` echo "${x} ${y}: $eer" done done -# DNN PLDA EER -# ind pooled: 1.05 -# ind female: 1.33 -# ind male: 0.75 -# dep female: 1.41 -# dep male: 0.64 -# dep pooled: 1.02 -echo "DNN-$num_components EER" +# DNN-UBM EER +# ind pooled: 1.01 +# ind female: 1.16 +# ind male: 0.78 +# dep female: 1.27 +# dep male: 0.61 +# dep pooled: 0.96 +echo "DNN-UBM EER" for x in ind dep; do for y in female male pooled; do - eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}/plda_scores) 2> /dev/null` + eer=`compute-eer <(python local/prepare_for_eer.py $trials exp/scores_dnn_${x}_${y}/plda_scores) 2> /dev/null` echo "${x} ${y}: $eer" done done # In comparison, here is the EER for an unsupervised GMM-based system -# with 5297 components (the same as the number of senones in the DNN): +# with 5297 components (about the same as the number of senones in the DNN): # GMM-5297 PLDA EER # ind pooled: 2.25 # ind female: 2.33 diff --git a/src/fgmmbin/fgmm-global-init-from-accs.cc b/src/fgmmbin/fgmm-global-init-from-accs.cc index 23dc6be75cf..70b43e05d11 100644 --- a/src/fgmmbin/fgmm-global-init-from-accs.cc +++ b/src/fgmmbin/fgmm-global-init-from-accs.cc @@ -1,8 +1,8 @@ // fgmmbin/fgmm-global-init-from-accs.cc -// Copyright 2015 David Snyder -// 2015 Johns Hopkins University (Author: Daniel Povey) -// 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) +// Copyright 2015-2017 David Snyder +// 2015 Johns Hopkins University (Author: Daniel Povey) +// 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) // See ../../COPYING for clarification regarding multiple authors // @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) { } int32 num_gauss = gmm_accs.NumGauss(), dim = gmm_accs.Dim(), - tot_floored = 0, gauss_floored = 0; + tot_floored = 0, gauss_floored = 0, tot_low_occ = 0; FullGmm fgmm(num_components, dim); @@ -69,23 +69,30 @@ int main(int argc, char *argv[]) { Matrix means(num_gauss, dim); std::vector > invcovars; - BaseFloat occ_sum = gmm_accs.occupancy().Sum(); for (int32 i = 0; i < num_components; i++) { - BaseFloat occ = gmm_accs.occupancy()(i), - prob; - if (occ_sum > 0.0) - prob = occ / occ_sum; - else - prob = 1.0 / num_gauss; - weights(i) = prob; - - Vector mean(gmm_accs.mean_accumulator().Row(i)); - mean.Scale(1.0 / occ); + BaseFloat occ = gmm_accs.occupancy()(i); + weights(i) = occ; + Vector mean(dim, kSetZero); + SpMatrix covar(dim, kSetZero); + + // If the occupancy for a Gaussian is very low, set it to a small value. + if (occ < 1e-10) { + weights(i) = 1e-10; + mean.SetRandn(); + Vector diag(mean.Dim()); + diag.Set(1.0); + covar.AddDiagVec(1.0, diag); + tot_low_occ++; + // This is the typical case. + } else { + mean.CopyRowFromMat(gmm_accs.mean_accumulator(), i); + mean.Scale(1.0 / occ); + covar.CopyFromSp(gmm_accs.covariance_accumulator()[i]); + covar.Scale(1.0 / occ); + covar.AddVec2(-1.0, mean); // subtract squared means. + } means.CopyRowFromVec(mean, i); - SpMatrix covar(gmm_accs.covariance_accumulator()[i]); - covar.Scale(1.0 / occ); - covar.AddVec2(-1.0, means.Row(i)); // subtract squared means. // Floor variance Eigenvalues. BaseFloat floor = std::max( static_cast(gmm_opts.variance_floor), @@ -98,14 +105,21 @@ int main(int argc, char *argv[]) { covar.InvertDouble(); invcovars.push_back(covar); } + weights.Scale(1.0 / weights.Sum()); fgmm.SetWeights(weights); fgmm.SetInvCovarsAndMeans(invcovars, means); int32 num_bad = fgmm.ComputeGconsts(); KALDI_LOG << "FullGmm has " << num_bad << " bad GConsts"; + if (tot_floored > 0) { KALDI_WARN << tot_floored << " variances floored in " << gauss_floored << " Gaussians."; } + if (tot_low_occ > 0) { + KALDI_WARN << tot_low_occ << " out of " << num_gauss + << " Gaussians had very low occupancy."; + } + WriteKaldiObject(fgmm, model_out_filename, binary_write); KALDI_LOG << "Written model to " << model_out_filename; diff --git a/src/gmm/full-gmm.cc b/src/gmm/full-gmm.cc index 7851d8648f7..0f634eeee6b 100644 --- a/src/gmm/full-gmm.cc +++ b/src/gmm/full-gmm.cc @@ -113,7 +113,7 @@ int32 FullGmm::ComputeGconsts() { // So gc is the likelihood at zero feature value. if (KALDI_ISNAN(gc)) { // negative infinity is OK but NaN is not acceptable - KALDI_ERR << "At component" << mix + KALDI_ERR << "At component " << mix << ", not a number in gconst computation"; } if (KALDI_ISINF(gc)) { @@ -687,7 +687,7 @@ BaseFloat FullGmm::GaussianSelectionPreselect( } Vector loglikes(preselect_sz); LogLikelihoodsPreselect(data, preselect, &loglikes); - + Vector loglikes_copy(loglikes); BaseFloat *ptr = loglikes_copy.Data(); std::nth_element(ptr, ptr+preselect_sz-this_num_gselect, diff --git a/src/ivectorbin/ivector-adapt-plda.cc b/src/ivectorbin/ivector-adapt-plda.cc index e9a4929de1c..4f5d925e203 100644 --- a/src/ivectorbin/ivector-adapt-plda.cc +++ b/src/ivectorbin/ivector-adapt-plda.cc @@ -33,18 +33,18 @@ int main(int argc, char *argv[]) { "\n" "Usage: ivector-adapt-plda [options] \n" "e.g.: ivector-adapt-plda plda ark:ivectors.ark plda.adapted\n"; - + ParseOptions po(usage); bool binary = true; po.Register("binary", &binary, "Write output in binary mode"); PldaUnsupervisedAdaptorConfig config; config.Register(&po); - - - + + + po.Read(argc, argv); - + if (po.NumArgs() != 3) { po.PrintUsage(); exit(1); @@ -53,19 +53,19 @@ int main(int argc, char *argv[]) { std::string plda_rxfilename = po.GetArg(1), ivector_rspecifier = po.GetArg(2), plda_wxfilename = po.GetArg(3); - + Plda plda; ReadKaldiObject(plda_rxfilename, &plda); SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier); - + int32 num_done = 0; PldaUnsupervisedAdaptor adaptor; for (; !ivector_reader.Done(); ivector_reader.Next(), num_done++) adaptor.AddStats(1.0, ivector_reader.Value()); adaptor.UpdatePlda(config, &plda); - + WriteKaldiObject(plda, plda_wxfilename, binary); return (num_done != 0 ? 0 : 1); diff --git a/src/ivectorbin/ivector-compute-dot-products.cc b/src/ivectorbin/ivector-compute-dot-products.cc index 2b274b3f22b..5586cd06efb 100644 --- a/src/ivectorbin/ivector-compute-dot-products.cc +++ b/src/ivectorbin/ivector-compute-dot-products.cc @@ -44,11 +44,11 @@ int main(int argc, char *argv[]) { "e.g.: \n" " ivector-compute-dot-products trials ark:train_ivectors.scp ark:test_ivectors.scp trials.scored\n" "See also: ivector-plda-scoring\n"; - + ParseOptions po(usage); - + po.Read(argc, argv); - + if (po.NumArgs() != 4) { po.PrintUsage(); exit(1); @@ -61,10 +61,10 @@ int main(int argc, char *argv[]) { int64 num_done = 0, num_err = 0; - + RandomAccessBaseFloatVectorReader ivector1_reader(ivector1_rspecifier); RandomAccessBaseFloatVectorReader ivector2_reader(ivector2_rspecifier); - + Input ki(trials_rxfilename); bool binary = false; @@ -101,7 +101,7 @@ int main(int argc, char *argv[]) { num_done++; ko.Stream() << key1 << ' ' << key2 << ' ' << dot_prod << std::endl; } - + if (num_done != 0) { BaseFloat mean = sum / num_done, scatter = sumsq / num_done, variance = scatter - mean * mean, stddev = sqrt(variance); diff --git a/src/ivectorbin/ivector-compute-lda.cc b/src/ivectorbin/ivector-compute-lda.cc index e80a9c94f47..a67013024d8 100644 --- a/src/ivectorbin/ivector-compute-lda.cc +++ b/src/ivectorbin/ivector-compute-lda.cc @@ -100,9 +100,9 @@ void ComputeLdaTransform( int32 lda_dim = lda_out->NumRows(), dim = lda_out->NumCols(); KALDI_ASSERT(dim == utt2ivector.begin()->second->Dim()); KALDI_ASSERT(lda_dim > 0 && lda_dim <= dim); - + CovarianceStats stats(dim); - + std::map >::const_iterator iter; for (iter = spk2utt.begin(); iter != spk2utt.end(); ++iter) { const std::vector &uttlist = iter->second; @@ -134,10 +134,10 @@ void ComputeLdaTransform( SpMatrix mat_to_normalize(dim); mat_to_normalize.AddSp(total_covariance_factor, total_covar); mat_to_normalize.AddSp(1.0 - total_covariance_factor, within_covar); - - Matrix T(dim, dim); + + Matrix T(dim, dim); ComputeNormalizingTransform(mat_to_normalize, &T); - + SpMatrix between_covar(total_covar); between_covar.AddSp(-1.0, within_covar); @@ -152,7 +152,7 @@ void ComputeLdaTransform( // really matter) SortSvd(&s, &U, static_cast*>(NULL), sort_on_absolute_value); - + KALDI_LOG << "Singular values of between-class covariance after projecting " << "with interpolated [total/within] covariance with a weight of " << total_covariance_factor << " on the total covariance, are: " << s; @@ -205,12 +205,12 @@ int main(int argc, char *argv[]) { "\n" "e.g.: \n" " ivector-compute-lda ark:ivectors.ark ark:utt2spk lda.mat\n"; - + ParseOptions po(usage); int32 lda_dim = 100; // Dimension we reduce to BaseFloat total_covariance_factor = 0.0; - bool binary = true; + bool binary = true; po.Register("dim", &lda_dim, "Dimension we keep with the LDA transform"); po.Register("total-covariance-factor", &total_covariance_factor, @@ -218,9 +218,9 @@ int main(int argc, char *argv[]) { "unit; if 1.0, the total covariance; if between, we normalize " "an interpolated matrix."); po.Register("binary", &binary, "Write output in binary mode"); - + po.Read(argc, argv); - + if (po.NumArgs() != 3) { po.PrintUsage(); exit(1); @@ -229,12 +229,12 @@ int main(int argc, char *argv[]) { std::string ivector_rspecifier = po.GetArg(1), utt2spk_rspecifier = po.GetArg(2), lda_wxfilename = po.GetArg(3); - + int32 num_done = 0, num_err = 0, dim = 0; - + SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier); RandomAccessTokenReader utt2spk_reader(utt2spk_rspecifier); - + std::map *> utt2ivector; std::map > spk2utt; @@ -277,7 +277,7 @@ int main(int argc, char *argv[]) { ComputeAndSubtractMean(utt2ivector, &mean); KALDI_LOG << "2-norm of iVector mean is " << mean.Norm(2.0); - + Matrix lda_mat(lda_dim, dim + 1); // LDA matrix without the offset term. SubMatrix linear_part(lda_mat, 0, lda_dim, 0, dim); ComputeLdaTransform(utt2ivector, @@ -287,15 +287,15 @@ int main(int argc, char *argv[]) { Vector offset(lda_dim); offset.AddMatVec(-1.0, linear_part, kNoTrans, mean, 0.0); lda_mat.CopyColFromVec(offset, dim); // add mean-offset to transform - + KALDI_VLOG(2) << "2-norm of transformed iVector mean is " << offset.Norm(2.0); - + WriteKaldiObject(lda_mat, lda_wxfilename, binary); KALDI_LOG << "Wrote LDA transform to " << PrintableWxfilename(lda_wxfilename); - + std::map *>::iterator iter; for (iter = utt2ivector.begin(); iter != utt2ivector.end(); ++iter) delete iter->second; diff --git a/src/ivectorbin/ivector-compute-plda.cc b/src/ivectorbin/ivector-compute-plda.cc index 9dced7f44e6..2e874adcca6 100644 --- a/src/ivectorbin/ivector-compute-plda.cc +++ b/src/ivectorbin/ivector-compute-plda.cc @@ -36,7 +36,7 @@ int main(int argc, char *argv[]) { "\n" "e.g.: \n" " ivector-compute-plda ark:spk2utt ark,s,cs:ivectors.ark plda\n"; - + ParseOptions po(usage); bool binary = true; @@ -45,9 +45,9 @@ int main(int argc, char *argv[]) { plda_config.Register(&po); po.Register("binary", &binary, "Write output in binary mode"); - + po.Read(argc, argv); - + if (po.NumArgs() != 3) { po.PrintUsage(); exit(1); @@ -56,15 +56,15 @@ int main(int argc, char *argv[]) { std::string spk2utt_rspecifier = po.GetArg(1), ivector_rspecifier = po.GetArg(2), plda_wxfilename = po.GetArg(3); - + int64 num_spk_done = 0, num_spk_err = 0, num_utt_done = 0, num_utt_err = 0; SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); RandomAccessBaseFloatVectorReader ivector_reader(ivector_rspecifier); - + PldaStats plda_stats; - + for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { std::string spk = spk2utt_reader.Key(); const std::vector &uttlist = spk2utt_reader.Value(); @@ -100,25 +100,25 @@ int main(int argc, char *argv[]) { num_spk_done++; } } - + KALDI_LOG << "Accumulated stats from " << num_spk_done << " speakers (" << num_spk_err << " with no utterances), consisting of " << num_utt_done << " utterances (" << num_utt_err << " absent from input)."; - + if (num_spk_done == 0) KALDI_ERR << "No stats accumulated, unable to estimate PLDA."; if (num_spk_done == num_utt_done) KALDI_ERR << "No speakers with multiple utterances, " << "unable to estimate PLDA."; - + plda_stats.Sort(); PldaEstimator plda_estimator(plda_stats); Plda plda; plda_estimator.Estimate(plda_config, &plda); WriteKaldiObject(plda, plda_wxfilename, binary); - + return (num_spk_done != 0 ? 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); diff --git a/src/ivectorbin/ivector-copy-plda.cc b/src/ivectorbin/ivector-copy-plda.cc index 8ce0c1e0c44..a7a9afd7113 100644 --- a/src/ivectorbin/ivector-copy-plda.cc +++ b/src/ivectorbin/ivector-copy-plda.cc @@ -33,7 +33,7 @@ int main(int argc, char *argv[]) { "\n" "Usage: ivector-copy-plda \n" "e.g.: ivector-copy-plda --smoothing=0.1 plda plda.smooth0.1\n"; - + ParseOptions po(usage); BaseFloat smoothing = 0.0; @@ -41,12 +41,12 @@ int main(int argc, char *argv[]) { po.Register("smoothing", &smoothing, "Factor used in smoothing within-class " "covariance (add this factor times between-class covar)"); po.Register("binary", &binary, "Write output in binary mode"); - + PldaConfig plda_config; plda_config.Register(&po); - + po.Read(argc, argv); - + if (po.NumArgs() != 2) { po.PrintUsage(); exit(1); diff --git a/src/ivectorbin/ivector-extract-online.cc b/src/ivectorbin/ivector-extract-online.cc index 3c1795d6b0f..5534392b76a 100644 --- a/src/ivectorbin/ivector-extract-online.cc +++ b/src/ivectorbin/ivector-extract-online.cc @@ -68,7 +68,7 @@ int main(int argc, char *argv[]) { "this value as a number of frames multiplied by your " "posterior scale (so typically 0.1 times a number of frames)."); po.Read(argc, argv); - + if (po.NumArgs() != 4) { po.PrintUsage(); exit(1); @@ -78,18 +78,18 @@ int main(int argc, char *argv[]) { feature_rspecifier = po.GetArg(2), posteriors_rspecifier = po.GetArg(3), ivectors_wspecifier = po.GetArg(4); - + IvectorExtractor extractor; ReadKaldiObject(ivector_extractor_rxfilename, &extractor); - + double tot_objf_impr = 0.0, tot_t = 0.0, tot_length = 0.0, tot_length_utt_end = 0.0; int32 num_done = 0, num_err = 0; - + SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier); BaseFloatMatrixWriter ivector_writer(ivectors_wspecifier); - + for (; !feature_reader.Done(); feature_reader.Next()) { std::string utt = feature_reader.Key(); @@ -100,7 +100,7 @@ int main(int argc, char *argv[]) { } const Matrix &feats = feature_reader.Value(); const Posterior &posterior = posteriors_reader.Value(utt); - + if (static_cast(posterior.size()) != feats.NumRows()) { KALDI_WARN << "Size mismatch between posterior " << posterior.size() << " and features " << feats.NumRows() << " for utterance " @@ -110,16 +110,16 @@ int main(int argc, char *argv[]) { } - Matrix ivectors; + Matrix ivectors; double objf_impr_per_frame; objf_impr_per_frame = EstimateIvectorsOnline(feats, posterior, extractor, ivector_period, num_cg_iters, max_count, &ivectors); - + BaseFloat offset = extractor.PriorOffset(); for (int32 i = 0 ; i < ivectors.NumRows(); i++) ivectors(i, 0) -= offset; - + double tot_post = TotalPosterior(posterior); KALDI_VLOG(2) << "For utterance " << utt << " objf impr/frame is " @@ -127,7 +127,7 @@ int main(int argc, char *argv[]) { << tot_post << " frames (weighted)."; ivector_writer.Write(utt, ivectors); - + tot_t += tot_post; tot_objf_impr += objf_impr_per_frame * tot_post; tot_length_utt_end += ivectors.Row(ivectors.NumRows() - 1).Norm(2.0) * diff --git a/src/ivectorbin/ivector-extract.cc b/src/ivectorbin/ivector-extract.cc index 220677d9af0..180c348135c 100644 --- a/src/ivectorbin/ivector-extract.cc +++ b/src/ivectorbin/ivector-extract.cc @@ -42,13 +42,13 @@ class IvectorExtractTask { void operator () () { bool need_2nd_order_stats = false; - + IvectorExtractorUtteranceStats utt_stats(extractor_.NumGauss(), extractor_.FeatDim(), need_2nd_order_stats); - + utt_stats.AccStats(feats_, posterior_); - + ivector_.Resize(extractor_.IvectorDim()); ivector_(0) = extractor_.PriorOffset(); @@ -102,7 +102,7 @@ int32 RunPerSpeaker(const std::string &ivector_extractor_rxfilename, RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); RandomAccessPosteriorReader posterior_reader(posterior_rspecifier); BaseFloatVectorWriter ivector_writer(ivector_wspecifier); - + double tot_auxf_change = 0.0, tot_post = 0.0, tot_norm = 0.0; int32 num_utt_done = 0, num_utt_err = 0, num_spk_done = 0, num_spk_err = 0; @@ -112,7 +112,7 @@ int32 RunPerSpeaker(const std::string &ivector_extractor_rxfilename, const std::vector &utts = spk2utt_reader.Value(); bool need_2nd_order_stats = false; - + IvectorExtractorUtteranceStats utt_stats(extractor.NumGauss(), extractor.FeatDim(), need_2nd_order_stats); @@ -154,10 +154,10 @@ int32 RunPerSpeaker(const std::string &ivector_extractor_rxfilename, KALDI_LOG << "Scaling stats for speaker " << spk << " by scale " << scale << " due to --max-count=" << opts.max_count; } - + Vector ivector(extractor.IvectorDim()); ivector(0) = extractor.PriorOffset(); - + if (compute_objf_change) { double old_auxf = extractor.GetAuxf(utt_stats, ivector); extractor.GetIvectorDistribution(utt_stats, &ivector, NULL); @@ -178,7 +178,7 @@ int32 RunPerSpeaker(const std::string &ivector_extractor_rxfilename, ivector(0) -= extractor.PriorOffset(); KALDI_LOG << "Ivector norm for speaker " << spk << " was " << ivector.Norm(2.0); - + tot_norm += ivector.Norm(2.0) * utt_stats.NumFrames(); tot_post += utt_stats.NumFrames(); num_spk_done++; @@ -186,7 +186,7 @@ int32 RunPerSpeaker(const std::string &ivector_extractor_rxfilename, ivector_writer.Write(spk, ivector_flt); } } - + KALDI_LOG << "Done " << num_spk_done << " speakers; " << num_spk_err << " with errors. " << num_utt_done << " utterances " << "were processed, " << num_utt_err << " with errors."; @@ -236,12 +236,12 @@ int main(int argc, char *argv[]) { "is not the normal way iVectors are obtained for speaker-id. " "This option will cause the program to ignore the --num-threads " "option."); - + opts.Register(&po); sequencer_config.Register(&po); - + po.Read(argc, argv); - + if (po.NumArgs() != 4) { po.PrintUsage(); exit(1); @@ -256,17 +256,17 @@ int main(int argc, char *argv[]) { if (spk2utt_rspecifier.empty()) { // g_num_threads affects how ComputeDerivedVars is called when we read the // extractor. - g_num_threads = sequencer_config.num_threads; + g_num_threads = sequencer_config.num_threads; IvectorExtractor extractor; ReadKaldiObject(ivector_extractor_rxfilename, &extractor); double tot_auxf_change = 0.0, tot_t = 0.0; int32 num_done = 0, num_err = 0; - + SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); RandomAccessPosteriorReader posterior_reader(posterior_rspecifier); BaseFloatVectorWriter ivector_writer(ivectors_wspecifier); - + { TaskSequencer sequencer(sequencer_config); for (; !feature_reader.Done(); feature_reader.Next()) { @@ -278,7 +278,7 @@ int main(int argc, char *argv[]) { } const Matrix &mat = feature_reader.Value(); Posterior posterior = posterior_reader.Value(utt); - + if (static_cast(posterior.size()) != mat.NumRows()) { KALDI_WARN << "Size mismatch between posterior " << posterior.size() << " and features " << mat.NumRows() << " for utterance " @@ -301,10 +301,10 @@ int main(int argc, char *argv[]) { ScalePosterior(opts.acoustic_weight * max_count_scale, &posterior); // note: now, this_t == sum of posteriors. - + sequencer.Run(new IvectorExtractTask(extractor, utt, mat, posterior, &ivector_writer, auxf_ptr)); - + tot_t += this_t; num_done++; } diff --git a/src/ivectorbin/ivector-extractor-acc-stats.cc b/src/ivectorbin/ivector-extractor-acc-stats.cc index fb98e9d448b..1dc29b44afe 100644 --- a/src/ivectorbin/ivector-extractor-acc-stats.cc +++ b/src/ivectorbin/ivector-extractor-acc-stats.cc @@ -84,7 +84,7 @@ int main(int argc, char *argv[]) { sequencer_opts.Register(&po); po.Read(argc, argv); - + if (po.NumArgs() != 4) { po.PrintUsage(); exit(1); @@ -108,19 +108,19 @@ int main(int argc, char *argv[]) { // g_num_threads. So if the user specified the --num-threads option, which // goes to sequencer_opts in this case, copy it to g_num_threads. g_num_threads = sequencer_opts.num_threads; - + IvectorExtractor extractor; ReadKaldiObject(ivector_extractor_rxfilename, &extractor); - + IvectorExtractorStats stats(extractor, stats_opts); - - + + int64 tot_t = 0; int32 num_done = 0, num_err = 0; - + { TaskSequencer sequencer(sequencer_opts); - + for (; !feature_reader.Done(); feature_reader.Next()) { std::string key = feature_reader.Key(); if (!posteriors_reader.HasKey(key)) { @@ -147,15 +147,15 @@ int main(int argc, char *argv[]) { // destructor of "sequencer" will wait for any remaining tasks that // have not yet completed. } - + KALDI_LOG << "Done " << num_done << " files, " << num_err << " with errors. Total frames " << tot_t; - + { Output ko(accs_wxfilename, binary); stats.Write(ko.Stream(), binary); } - + KALDI_LOG << "Wrote stats to " << accs_wxfilename; return (num_done != 0 ? 0 : 1); diff --git a/src/ivectorbin/ivector-extractor-init.cc b/src/ivectorbin/ivector-extractor-init.cc index 7e4514bf5fb..35f1699c556 100644 --- a/src/ivectorbin/ivector-extractor-init.cc +++ b/src/ivectorbin/ivector-extractor-init.cc @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) { std::string fgmm_rxfilename = po.GetArg(1), ivector_extractor_wxfilename = po.GetArg(2); - + FullGmm fgmm; ReadKaldiObject(fgmm_rxfilename, &fgmm); diff --git a/src/ivectorbin/ivector-extractor-sum-accs.cc b/src/ivectorbin/ivector-extractor-sum-accs.cc index 0dbda0e7798..b90a3773657 100644 --- a/src/ivectorbin/ivector-extractor-sum-accs.cc +++ b/src/ivectorbin/ivector-extractor-sum-accs.cc @@ -25,7 +25,7 @@ int main(int argc, char *argv[]) { try { typedef kaldi::int32 int32; using namespace kaldi; - + const char *usage = "Sum accumulators for training of iVector extractor\n" "Usage: ivector-extractor-sum-accs [options] " @@ -38,7 +38,7 @@ int main(int argc, char *argv[]) { po.Register("parallel", ¶llel, "If true, the program makes sure to " "open all filehandles before reading for any (useful when " "summing accs from long processes)"); - + po.Read(argc, argv); if (po.NumArgs() < 2) { @@ -74,9 +74,9 @@ int main(int argc, char *argv[]) { bool add = true; stats.Read(ki.Stream(), binary_in, add); } - } + } WriteKaldiObject(stats, stats_wxfilename, binary); - + KALDI_LOG << "Wrote summed stats to " << stats_wxfilename; return 0; diff --git a/src/ivectorbin/ivector-mean.cc b/src/ivectorbin/ivector-mean.cc index 9db070d61ab..6e6117c1eb7 100644 --- a/src/ivectorbin/ivector-mean.cc +++ b/src/ivectorbin/ivector-mean.cc @@ -42,14 +42,14 @@ int main(int argc, char *argv[]) { "e.g.: ivector-mean data/spk2utt exp/ivectors.ark exp/spk_ivectors.ark exp/spk_num_utts.ark\n" "or: ivector-mean exp/ivectors.ark exp/mean.vec\n" "See also: ivector-subtract-global-mean\n"; - + ParseOptions po(usage); bool binary_write = false; po.Register("binary", &binary_write, "If true, write output in binary " "(only applicable when writing files, not archives/tables."); - + po.Read(argc, argv); - + if (po.NumArgs() < 2 || po.NumArgs() > 4) { po.PrintUsage(); exit(1); @@ -79,10 +79,10 @@ int main(int argc, char *argv[]) { ivector_rspecifier = po.GetArg(2), ivector_wspecifier = po.GetArg(3), num_utts_wspecifier = po.GetOptArg(4); - + double spk_sumsq = 0.0; Vector spk_sum; - + int64 num_spk_done = 0, num_spk_err = 0, num_utt_done = 0, num_utt_err = 0; @@ -90,7 +90,7 @@ int main(int argc, char *argv[]) { SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); BaseFloatVectorWriter ivector_writer(ivector_wspecifier); Int32Writer num_utts_writer(num_utts_wspecifier); - + for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { std::string spk = spk2utt_reader.Key(); const std::vector &uttlist = spk2utt_reader.Value(); @@ -130,7 +130,7 @@ int main(int argc, char *argv[]) { spk_sum.AddVec(1.0, spk_mean); } } - + KALDI_LOG << "Computed mean of " << num_spk_done << " speakers (" << num_spk_err << " with no utterances), consisting of " << num_utt_done << " utterances (" << num_utt_err diff --git a/src/ivectorbin/ivector-subtract-global-mean.cc b/src/ivectorbin/ivector-subtract-global-mean.cc index 1e6cb7cdf0c..4083580212f 100644 --- a/src/ivectorbin/ivector-subtract-global-mean.cc +++ b/src/ivectorbin/ivector-subtract-global-mean.cc @@ -36,34 +36,34 @@ int main(int argc, char *argv[]) { "e.g.: ivector-subtract-global-mean scp:ivectors.scp ark:-\n" "or: ivector-subtract-global-mean mean.vec scp:ivectors.scp ark:-\n" "See also: ivector-mean\n"; - + ParseOptions po(usage); bool subtract_mean = true; po.Register("subtract-mean", &subtract_mean, "If true, subtract mean; if false, just copy the input."); - + po.Read(argc, argv); - + if (po.NumArgs() < 2 || po.NumArgs() > 3) { po.PrintUsage(); exit(1); } int64 num_done = 0; - + if (po.NumArgs() == 2) { std::string ivector_rspecifier = po.GetArg(1), ivector_wspecifier = po.GetArg(2); - + Vector sum; - + std::vector*> > ivectors; - + SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier); BaseFloatVectorWriter ivector_writer(ivector_wspecifier); - + for (; !ivector_reader.Done(); ivector_reader.Next()) { std::string key = ivector_reader.Key(); const Vector &ivector = ivector_reader.Value(); @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) { } KALDI_LOG << "Read " << num_done << " iVectors."; - + if (num_done != 0) { KALDI_LOG << "Norm of iVector mean was " << (sum.Norm(2.0) / num_done); for (size_t i = 0; i < ivectors.size(); i++) { @@ -94,7 +94,7 @@ int main(int argc, char *argv[]) { ivector_wspecifier = po.GetArg(3); Vector mean; ReadKaldiObject(mean_rxfilename, &mean); - + SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier); BaseFloatVectorWriter ivector_writer(ivector_wspecifier); for (; !ivector_reader.Done(); ivector_reader.Next()) { @@ -107,7 +107,7 @@ int main(int argc, char *argv[]) { } KALDI_LOG << "Wrote " << num_done << " mean-subtracted iVectors"; return (num_done != 0 ? 0 : 1); - + } catch(const std::exception &e) { std::cerr << e.what(); return -1; diff --git a/src/ivectorbin/ivector-transform.cc b/src/ivectorbin/ivector-transform.cc index 43392453070..0eda9ee24a4 100644 --- a/src/ivectorbin/ivector-transform.cc +++ b/src/ivectorbin/ivector-transform.cc @@ -36,11 +36,11 @@ int main(int argc, char *argv[]) { "\n" "e.g.: \n" " ivector-transform transform.mat ark:ivectors.ark ark:transformed_ivectors.ark\n"; - + ParseOptions po(usage); - + po.Read(argc, argv); - + if (po.NumArgs() != 3) { po.PrintUsage(); exit(1); @@ -53,7 +53,7 @@ int main(int argc, char *argv[]) { Matrix transform; ReadKaldiObject(matrix_rxfilename, &transform); - + int32 num_done = 0; // The following quantities will be needed if we're doing @@ -66,7 +66,7 @@ int main(int argc, char *argv[]) { Vector sum(transform.NumRows()); double sumsq = 0.0; - + SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier); BaseFloatVectorWriter ivector_writer(ivector_wspecifier); @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) { std::string key = ivector_reader.Key(); const Vector &ivector = ivector_reader.Value(); - Vector transformed_ivector(transform.NumRows()); + Vector transformed_ivector(transform.NumRows()); if (ivector.Dim() == transform.NumCols()) { transformed_ivector.AddMatVec(1.0, transform, kNoTrans, ivector, 0.0); } else { @@ -99,7 +99,7 @@ int main(int argc, char *argv[]) { KALDI_LOG << "Norm of mean was " << mean_length << " (should be close to zero), length divided by sqrt(dim) was " << norm_length << " (should probably be close to one)"; - } + } return (num_done != 0 ? 0 : 1); } catch(const std::exception &e) { std::cerr << e.what(); From 179d9b63caccfccff4c68dcd033f81b5de991f70 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 14 Apr 2017 10:51:51 -0700 Subject: [PATCH 504/530] [src] add template instantiations for ConvertStringToReal, address issue #1544 --- src/util/text-utils.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/util/text-utils.cc b/src/util/text-utils.cc index d9e6b2ac520..1e830b9f6c5 100644 --- a/src/util/text-utils.cc +++ b/src/util/text-utils.cc @@ -251,4 +251,12 @@ bool ConvertStringToReal(const std::string &str, return true; } +template +bool ConvertStringToReal(const std::string &str, + float *out); +template +bool ConvertStringToReal(const std::string &str, + double *out); + + } // end namespace kaldi From 8891750795df1cb67502c0ffcfdfaed48a13b882 Mon Sep 17 00:00:00 2001 From: david-ryan-snyder Date: Fri, 14 Apr 2017 15:46:51 -0400 Subject: [PATCH 505/530] [egs] AMI TDNN Results Update (#1545) * [scripts,egs] Adding options for using PCA instead of LDA+MLLT for ivectors used in ASR. Results are reported in the default TDNN recipe in AMI. Updating steps/online/nnet2/{train_diag_ubm.sh,train_ivector_extractor.sh} so that they now backup the contents of their destination directory if it already exists. * [egs,scripts] Updating AMI TDNN results to reflect the current recipe (tdnn1d). Fixing minor bug in egs/ami/s5b/local/chain/tuning/run_tdnn_*.sh scripts. * [egs] Updating chain scripts in AMI so that they do not default to keeping egs --- egs/ami/s5b/README.txt | 2 +- egs/ami/s5b/RESULTS_ihm | 8 ++++---- egs/ami/s5b/RESULTS_mdm | 15 +++++++-------- egs/ami/s5b/RESULTS_sdm | 16 ++++++++-------- egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh | 4 +--- egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh | 4 +--- egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh | 4 +--- egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh | 4 +--- egs/ami/s5b/run.sh | 2 +- 9 files changed, 25 insertions(+), 34 deletions(-) diff --git a/egs/ami/s5b/README.txt b/egs/ami/s5b/README.txt index 032a2533e5b..2d5a522e228 100644 --- a/egs/ami/s5b/README.txt +++ b/egs/ami/s5b/README.txt @@ -5,7 +5,7 @@ many components removed. Before running run.sh, please run run_prepare_shared.sh. Afterwards, you can run: - run.sh --mic ihm # builds system for independent headset microphone + run.sh --mic ihm # builds system for independent headset microphone run.sh --mic sdm1 # single distant micropophone run.sh --mic mdm8 # multiple distant microphones + beamforming. diff --git a/egs/ami/s5b/RESULTS_ihm b/egs/ami/s5b/RESULTS_ihm index 25a60d24cfb..1003197701e 100644 --- a/egs/ami/s5b/RESULTS_ihm +++ b/egs/ami/s5b/RESULTS_ihm @@ -66,11 +66,11 @@ %WER 22.0 | 13098 94488 | 80.8 10.2 9.0 2.8 22.0 54.7 | 0.102 | exp/ihm/chain_cleaned/tdnn1b_sp_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys %WER 22.2 | 12643 89968 | 80.3 12.1 7.6 2.6 22.2 52.9 | 0.170 | exp/ihm/chain_cleaned/tdnn1b_sp_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys -# local/chain/run_tdnn.sh --mic ihm --train-set train --gmm tri3 --nnet3-affix "" --stage 12 +# local/chain/run_tdnn.sh --mic ihm --train-set train --gmm tri3 --nnet3-affix "" --stage 4 # chain TDNN model without cleanup [note: cleanup helps very little on this IHM data.] -# for d in exp/ihm/chain/tdnn_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done -%WER 22.4 | 13098 94476 | 80.4 10.4 9.2 2.8 22.4 54.6 | 0.069 | exp/ihm/chain/tdnn_sp_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys -%WER 22.5 | 12643 89974 | 80.0 12.1 7.9 2.6 22.5 52.8 | 0.157 | exp/ihm/chain/tdnn_sp_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sys +# for d in exp/ihm/chain/tdnn1d_sp_bi/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done +%WER 21.8 | 13098 94484 | 80.7 9.7 9.6 2.5 21.8 54.2 | 0.114 | exp/ihm/chain/tdnn1d_sp_bi/decode_dev/ascore_10/dev_hires.ctm.filt.sys +%WER 22.1 | 12643 89965 | 80.2 11.5 8.3 2.3 22.1 52.5 | 0.203 | exp/ihm/chain/tdnn1d_sp_bi/decode_eval/ascore_10/eval_hires.ctm.filt.sy # local/chain/multi_condition/run_tdnn.sh --mic ihm # cleanup + chain TDNN model + IHM reverberated data diff --git a/egs/ami/s5b/RESULTS_mdm b/egs/ami/s5b/RESULTS_mdm index f27da5773ac..d9155eca507 100644 --- a/egs/ami/s5b/RESULTS_mdm +++ b/egs/ami/s5b/RESULTS_mdm @@ -65,17 +65,16 @@ # cleanup + chain TDNN model, alignments from IHM data (IHM alignments help). # local/chain/run_tdnn.sh --mic mdm8 --use-ihm-ali true --stage 12 & -# for d in exp/mdm8/chain_cleaned/tdnn_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done -%WER 37.4 | 15286 94509 | 66.6 18.0 15.5 3.9 37.4 62.8 | 0.624 | exp/mdm8/chain_cleaned/tdnn_sp_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys -%WER 40.6 | 13381 89982 | 62.7 18.9 18.3 3.3 40.6 67.6 | 0.594 | exp/mdm8/chain_cleaned/tdnn_sp_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys - +# for d in exp/mdm8/chain_cleaned/tdnn1d_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done +%WER 36.4 | 15140 94513 | 67.3 17.5 15.2 3.6 36.4 63.2 | 0.613 | exp/mdm8/chain_cleaned/tdnn1d_sp_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys +%WER 39.7 | 13835 89969 | 63.2 18.4 18.4 3.0 39.7 65.7 | 0.584 | exp/mdm8/chain_cleaned/tdnn1d_sp_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys # local/chain/run_tdnn.sh --use-ihm-ali true --mic mdm8 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 & # chain TDNN model-- no cleanup, but IHM alignments. -# note, this system is worse by [0.8, 1.3] than the system without cleanup. -# for d in exp/mdm8/chain/tdnn_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done -%WER 37.9 | 15635 94514 | 66.5 19.1 14.4 4.4 37.9 61.2 | 0.646 | exp/mdm8/chain/tdnn_sp_bi_ihmali/decode_dev/ascore_8/dev_hires_o4.ctm.filt.sys -%WER 41.5 | 13884 89975 | 62.3 20.3 17.4 3.8 41.5 66.0 | 0.621 | exp/mdm8/chain/tdnn_sp_bi_ihmali/decode_eval/ascore_8/eval_hires_o4.ctm.filt.sys +# note, this system is worse by [0.5, 0.5] than the system without cleanup. +# for d in exp/mdm8/chain/tdnn1d_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done +%WER 36.9 | 15282 94502 | 67.1 18.5 14.4 4.1 36.9 62.5 | 0.635 | exp/mdm8/chain/tdnn1d_sp_bi_ihmali/decode_dev/ascore_8/dev_hires_o4.ctm.filt.sys +%WER 40.2 | 13729 89992 | 63.3 19.8 17.0 3.5 40.2 66.4 | 0.608 | exp/mdm8/chain/tdnn1d_sp_bi_ihmali/decode_eval/ascore_8/eval_hires_o4.ctm.filt.sys # local/chain/multi_condition/run_tdnn.sh --mic mdm8 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned # cleanup + chain TDNN model, MDM original + IHM reverberated data, alignments from IHM data diff --git a/egs/ami/s5b/RESULTS_sdm b/egs/ami/s5b/RESULTS_sdm index 05b68e5e780..737f8f6dc09 100644 --- a/egs/ami/s5b/RESULTS_sdm +++ b/egs/ami/s5b/RESULTS_sdm @@ -67,17 +67,17 @@ # cleanup + chain TDNN model, alignments from IHM data (IHM alignments help). # local/chain/run_tdnn.sh --mic sdm1 --use-ihm-ali true --stage 12 & # cleanup + chain TDNN model, cleaned data and alignments from ihm data. -# for d in exp/sdm1/chain_cleaned/tdnn_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done -%WER 40.7 | 14321 94501 | 63.0 19.6 17.4 3.7 40.7 67.7 | 0.592 | exp/sdm1/chain_cleaned/tdnn_sp_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys -%WER 44.8 | 14293 89976 | 58.6 21.3 20.1 3.3 44.8 64.2 | 0.559 | exp/sdm1/chain_cleaned/tdnn_sp_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys +# for d in exp/sdm1/chain_cleaned/tdnn1d_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done +%WER 39.5 | 14280 94503 | 64.0 19.3 16.7 3.5 39.5 67.7 | 0.582 | exp/sdm1/chain_cleaned/tdnn1d_sp_bi_ihmali/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys +%WER 43.9 | 13566 89961 | 59.3 20.9 19.9 3.1 43.9 67.9 | 0.547 | exp/sdm1/chain_cleaned/tdnn1d_sp_bi_ihmali/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys # no-cleanup + chain TDNN model, IHM alignments. -# A bit worse than with cleanup [+0.1, +0.4]. -# local/chain/run_tdnn.sh --use-ihm-ali true --mic sdm1 --train-set train --gmm tri3 --nnet3-affix "" --stage 17 - for d in exp/sdm1/chain/tdnn_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done -%WER 40.7 | 14549 94520 | 63.6 21.4 15.0 4.3 40.7 66.2 | 0.617 | exp/sdm1/chain/tdnn_sp_bi_ihmali/decode_dev/ascore_8/dev_hires_o4.ctm.filt.sys -%WER 45.1 | 13296 89971 | 59.1 23.4 17.6 4.2 45.1 69.5 | 0.591 | exp/sdm1/chain/tdnn_sp_bi_ihmali/decode_eval/ascore_8/eval_hires_o4.ctm.filt.sys +# A bit worse than with cleanup [+0.3, +0.4]. +# local/chain/run_tdnn.sh --use-ihm-ali true --mic sdm1 --train-set train --gmm tri3 --nnet3-affix "" --stage 12 +# for d in exp/sdm1/chain/tdnn1d_sp_bi_ihmali/decode_*; do grep Sum $d/*sc*/*ys | utils/best_wer.sh; done +%WER 39.8 | 15384 94535 | 64.4 21.0 14.6 4.2 39.8 62.8 | 0.610 | exp/sdm1/chain/tdnn1d_sp_bi_ihmali/decode_dev/ascore_8/dev_hires_o4.ctm.filt.sys +%WER 44.3 | 14046 90002 | 59.6 23.1 17.3 3.9 44.3 65.6 | 0.571 | exp/sdm1/chain/tdnn1d_sp_bi_ihmali/decode_eval/ascore_8/eval_hires_o4.ctm.filt.sys # local/chain/multi_condition/run_tdnn.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned # cleanup + chain TDNN model, SDM original + IHM reverberated data, alignments from ihm data. diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh index b3a645c0c11..86587d6d830 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -184,9 +184,7 @@ if [ $stage -le 16 ]; then /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage fi - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ + steps/nnet3/chain/train.py --stage $train_stage \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh index 0644d624606..98dc95e59a2 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh @@ -212,9 +212,7 @@ if [ $stage -le 16 ]; then /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage fi - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ + steps/nnet3/chain/train.py --stage $train_stage \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh index 0a49575ebb0..f87e1a12d36 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh @@ -199,9 +199,7 @@ if [ $stage -le 16 ]; then /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage fi - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ + steps/nnet3/chain/train.py --stage $train_stage \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh index a9f228cb55d..eb84a1cd876 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh @@ -211,9 +211,7 @@ if [ $stage -le 16 ]; then /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage fi - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ + steps/nnet3/chain/train.py --stage $train_stage \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ diff --git a/egs/ami/s5b/run.sh b/egs/ami/s5b/run.sh index 56cdd29e311..0a630a87a5b 100755 --- a/egs/ami/s5b/run.sh +++ b/egs/ami/s5b/run.sh @@ -56,7 +56,7 @@ if [ "$base_mic" == "mdm" ]; then PROCESSED_AMI_DIR=$AMI_DIR/beamformed if [ $stage -le 1 ]; then # for MDM data, do beamforming - ! hash BeamformIt && echo "Missing BeamformIt, run 'cd ../../../tools/; make beamformit;'" && exit 1 + ! hash BeamformIt && echo "Missing BeamformIt, run 'cd ../../../tools/; extras/install_beamformit.sh; cd -;'" && exit 1 local/ami_beamform.sh --cmd "$train_cmd" --nj 20 $nmics $AMI_DIR $PROCESSED_AMI_DIR fi else From 9e06caa1fb0306898632c6fa3ad67571c4d06cf5 Mon Sep 17 00:00:00 2001 From: LvHang Date: Sat, 15 Apr 2017 15:07:02 -0400 Subject: [PATCH 506/530] [srcipts] steps/nnet3/report/generate_plots.py: plot 5,50,95th percentile of value and derivative instead of mean+-stddev (#1472) --- .../s5/steps/libs/nnet3/report/log_parse.py | 113 ++++++++--- .../s5/steps/nnet3/report/generate_plots.py | 186 ++++++++++++------ 2 files changed, 212 insertions(+), 87 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py b/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py index cdbbb00a68a..b5d3e17dded 100755 --- a/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py +++ b/egs/wsj/s5/steps/libs/nnet3/report/log_parse.py @@ -5,6 +5,7 @@ # Apache 2.0. from __future__ import division +from __future__ import print_function import traceback import datetime import logging @@ -15,6 +16,30 @@ logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) +g_lstmp_nonlin_regex_pattern = ''.join([".*progress.([0-9]+).log:component name=(.+) ", + "type=(.*)Component,.*", + "i_t_sigmoid.*", + "value-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*", + "deriv-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*", + "f_t_sigmoid.*", + "value-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*", + "deriv-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*", + "c_t_tanh.*", + "value-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*", + "deriv-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*", + "o_t_sigmoid.*", + "value-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*", + "deriv-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*", + "m_t_tanh.*", + "value-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*", + "deriv-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\]"]) + + +g_normal_nonlin_regex_pattern = ''.join([".*progress.([0-9]+).log:component name=(.+) ", + "type=(.*)Component,.*", + "value-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*", + "deriv-avg=\[.*=\((.+)\), mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\]"]) + class KaldiLogParseException(Exception): """ An Exception class that throws an error when there is an issue in parsing the log files. Extend this class if more granularity is needed. @@ -27,10 +52,55 @@ def __init__(self, message = None): "There was an error while trying to parse the logs." " Details : \n{0}\n".format(message)) +# This function is used to fill stats_per_component_per_iter table with the +# results of regular expression. +def fill_nonlin_stats_table_with_regex_result(groups, gate_index, stats_table): + iteration = int(groups[0]) + component_name = groups[1] + component_type = groups[2] + value_percentiles = groups[3+gate_index*6] + value_mean = float(groups[4+gate_index*6]) + value_stddev = float(groups[5+gate_index*6]) + value_percentiles_split = re.split(',| ',value_percentiles) + assert len(value_percentiles_split) == 13 + value_5th = float(value_percentiles_split[4]) + value_50th = float(value_percentiles_split[6]) + value_95th = float(value_percentiles_split[9]) + deriv_percentiles = groups[6+gate_index*6] + deriv_mean = float(groups[7+gate_index*6]) + deriv_stddev = float(groups[8+gate_index*6]) + deriv_percentiles_split = re.split(',| ',deriv_percentiles) + assert len(deriv_percentiles_split) == 13 + deriv_5th = float(deriv_percentiles_split[4]) + deriv_50th = float(deriv_percentiles_split[6]) + deriv_95th = float(deriv_percentiles_split[9]) + try: + if stats_table[component_name]['stats'].has_key(iteration): + stats_table[component_name]['stats'][iteration].extend( + [value_mean, value_stddev, + deriv_mean, deriv_stddev, + value_5th, value_50th, value_95th, + deriv_5th, deriv_50th, deriv_95th]) + else: + stats_table[component_name]['stats'][iteration] = [ + value_mean, value_stddev, + deriv_mean, deriv_stddev, + value_5th, value_50th, value_95th, + deriv_5th, deriv_50th, deriv_95th] + except KeyError: + stats_table[component_name] = {} + stats_table[component_name]['type'] = component_type + stats_table[component_name]['stats'] = {} + stats_table[component_name][ + 'stats'][iteration] = [value_mean, value_stddev, + deriv_mean, deriv_stddev, + value_5th, value_50th, value_95th, + deriv_5th, deriv_50th, deriv_95th] + def parse_progress_logs_for_nonlinearity_stats(exp_dir): - """ Parse progress logs for mean and std stats for non-linearities. + """ Parse progress logs for mean and std stats for non-linearities. e.g. for a line that is parsed from progress.*.log: exp/nnet3/lstm_self_repair_ld5_sp/log/progress.9.log:component name=Lstm3_i type=SigmoidComponent, dim=1280, self-repair-scale=1e-05, count=1.96e+05, @@ -48,39 +118,28 @@ def parse_progress_logs_for_nonlinearity_stats(exp_dir): progress_log_lines = common_lib.run_kaldi_command( 'grep -e "value-avg.*deriv-avg" {0}'.format(progress_log_files))[0] - parse_regex = re.compile( - ".*progress.([0-9]+).log:component name=(.+) " - "type=(.*)Component,.*" - "value-avg=\[.*mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*" - "deriv-avg=\[.*mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\]") + parse_regex = re.compile(g_normal_nonlin_regex_pattern) + for line in progress_log_lines.split("\n"): mat_obj = parse_regex.search(line) if mat_obj is None: continue - # groups = ('9', 'Lstm3_i', 'Sigmoid', '0.502', '0.23', - # '0.134', '0.0397') + # groups = ('9', 'Lstm3_i', 'Sigmoid', '0.05...0.99', '0.502', '0.23', + # '0.009...0.21', '0.134', '0.0397') groups = mat_obj.groups() - iteration = int(groups[0]) - component_name = groups[1] component_type = groups[2] - value_mean = float(groups[3]) - value_stddev = float(groups[4]) - deriv_mean = float(groups[5]) - deriv_stddev = float(groups[6]) - try: - stats_per_component_per_iter[component_name][ - 'stats'][iteration] = [value_mean, value_stddev, - deriv_mean, deriv_stddev] - except KeyError: - stats_per_component_per_iter[component_name] = {} - stats_per_component_per_iter[component_name][ - 'type'] = component_type - stats_per_component_per_iter[component_name]['stats'] = {} - stats_per_component_per_iter[component_name][ - 'stats'][iteration] = [value_mean, value_stddev, - deriv_mean, deriv_stddev] - + if component_type == 'LstmNonlinearity': + parse_regex_lstmp = re.compile(g_lstmp_nonlin_regex_pattern) + mat_obj = parse_regex_lstmp.search(line) + groups = mat_obj.groups() + assert len(groups) == 33 + for i in list(range(0,5)): + fill_nonlin_stats_table_with_regex_result(groups, i, + stats_per_component_per_iter) + else: + fill_nonlin_stats_table_with_regex_result(groups, 0, + stats_per_component_per_iter) return stats_per_component_per_iter diff --git a/egs/wsj/s5/steps/nnet3/report/generate_plots.py b/egs/wsj/s5/steps/nnet3/report/generate_plots.py index 6f185ad313f..6a652f9ec68 100755 --- a/egs/wsj/s5/steps/nnet3/report/generate_plots.py +++ b/egs/wsj/s5/steps/nnet3/report/generate_plots.py @@ -21,7 +21,7 @@ mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np - + from matplotlib.patches import Rectangle g_plot = True except ImportError: warnings.warn( @@ -91,7 +91,6 @@ def get_args(): g_plot_colors = ['red', 'blue', 'green', 'black', 'magenta', 'yellow', 'cyan'] - class LatexReport: """Class for writing a Latex report""" @@ -213,6 +212,88 @@ def generate_acc_logprob_plots(exp_dir, output_dir, plot, key='accuracy', "Plot of {0} vs iterations for {1}".format(key, output_name)) +# The name of five gates of lstmp +g_lstm_gate = ['i_t_sigmoid', 'f_t_sigmoid', 'c_t_tanh', 'o_t_sigmoid', 'm_t_tanh'] + +# The "extra" item looks like a placeholder. As each unit in python plot is +# composed by a legend_handle(linestyle) and a legend_label(description). +# For the unit which doesn't have linestyle, we use the "extra" placeholder. +extra = Rectangle((0, 0), 1, 1, facecolor="w", fill=False, edgecolor='none', linewidth=0) + +# This function is used to insert a column to the legend, the column_index is 1-based +def insert_a_column_legend(legend_handle, legend_label, lp, mp, hp, + dir, prefix_length, column_index): + handle = [extra, lp, mp, hp] + label = ["[1]{0}".format(dir[prefix_length:]), "", "", ""] + for row in range(1,5): + legend_handle.insert(column_index*row-1, handle[row-1]) + legend_label.insert(column_index*row-1, label[row-1]) + + +# This function is used to plot a normal nonlinearity component or a gate of lstmp +def plot_a_nonlin_component(fig, dirs, stat_tables_per_component_per_dir, + component_name, common_prefix, prefix_length, component_type, + start_iter, gate_index=0): + fig.clf() + index = 0 + legend_handle = [extra, extra, extra, extra] + legend_label = ["", '5th percentile', '50th percentile', '95th percentile'] + + for dir in dirs: + color_val = g_plot_colors[index] + index += 1 + try: + iter_stats = (stat_tables_per_component_per_dir[dir][component_name]) + except KeyError: + # this component is not available in this network so lets + # not just plot it + insert_a_column_legend(legend_handle, legend_label, lp, mp, hp, + dir, prefix_length, index+1) + continue + + data = np.array(iter_stats) + data = data[data[:, 0] >= start_iter, :] + ax = plt.subplot(211) + lp, = ax.plot(data[:, 0], data[:, gate_index*10+5], color=color_val, + linestyle='--') + mp, = ax.plot(data[:, 0], data[:, gate_index*10+6], color=color_val, + linestyle='-') + hp, = ax.plot(data[:, 0], data[:, gate_index*10+7], color=color_val, + linestyle='--') + insert_a_column_legend(legend_handle, legend_label, lp, mp, hp, + dir, prefix_length, index+1) + + ax.set_ylabel('Value-{0}'.format(component_type)) + ax.grid(True) + + ax = plt.subplot(212) + lp, = ax.plot(data[:, 0], data[:, gate_index*10+8], color=color_val, + linestyle='--') + mp, = ax.plot(data[:, 0], data[:, gate_index*10+9], color=color_val, + linestyle='-') + hp, = ax.plot(data[:, 0], data[:, gate_index*10+10], color=color_val, + linestyle='--') + ax.set_xlabel('Iteration') + ax.set_ylabel('Derivative-{0}'.format(component_type)) + ax.grid(True) + + lgd = plt.legend(legend_handle, legend_label, loc='lower center', + bbox_to_anchor=(0.5 , -0.5 + len(dirs) * -0.2), + ncol=4, handletextpad = -2, title="[1]:{0}".format(common_prefix), + borderaxespad=0.) + plt.grid(True) + return lgd + + +# This function is used to generate the statistic plots of nonlinearity component +# Mainly divided into the following steps: +# 1) With log_parse function, we get the statistics from each directory. +# 2) Convert the collected nonlinearity statistics into the tables. Each table +# contains all the statistics in each component of each directory. +# 3) The statistics of each component are stored into corresponding log files. +# Each line of the log file contains the statistics of one iteration. +# 4) Plot the "Per-dimension average-(value, derivative) percentiles" figure +# for each nonlinearity component. def generate_nonlin_stats_plots(exp_dir, output_dir, plot, comparison_dir=None, start_iter=1, latex_report=None): assert start_iter >= 1 @@ -230,7 +311,6 @@ def generate_nonlin_stats_plots(exp_dir, output_dir, plot, comparison_dir=None, logger.warning("Couldn't find any rows for the" "nonlin stats plot, not generating it") stats_per_dir[dir] = stats_per_component_per_iter - # convert the nonlin stats into tables stat_tables_per_component_per_dir = {} for dir in dirs: @@ -254,15 +334,15 @@ def generate_nonlin_stats_plots(exp_dir, output_dir, plot, comparison_dir=None, # this is the main experiment directory with open("{dir}/nonlinstats_{comp_name}.log".format( dir=output_dir, comp_name=component_name), "w") as f: - f.write( - "Iteration\tValueMean\tValueStddev\tDerivMean\tDerivStddev\n") + f.write("Iteration\tValueMean\tValueStddev\tDerivMean\tDerivStddev\t" + "Value_5th\tValue_50th\tValue_95th\t" + "Deriv_5th\tDeriv_50th\tDeriv_95th\n") iter_stat_report = [] iter_stats = main_stat_tables[component_name] for row in iter_stats: iter_stat_report.append("\t".join([str(x) for x in row])) f.write("\n".join(iter_stat_report)) f.close() - if plot: main_component_names = main_stat_tables.keys() main_component_names.sort() @@ -279,64 +359,50 @@ def generate_nonlin_stats_plots(exp_dir, output_dir, plot, comparison_dir=None, given experiment dirs are not the same, so comparison plots are provided only for common component names. Make sure that these are comparable experiments before analyzing these plots.""") - + fig = plt.figure() + + common_prefix = os.path.commonprefix(dirs) + prefix_length = common_prefix.rfind('/') + common_prefix = common_prefix[0:prefix_length] + for component_name in main_component_names: - fig.clf() - index = 0 - plots = [] - for dir in dirs: - color_val = g_plot_colors[index] - index += 1 - try: - iter_stats = ( - stat_tables_per_component_per_dir[dir][component_name]) - except KeyError: - # this component is not available in this network so lets - # not just plot it - continue - - data = np.array(iter_stats) - data = data[data[:, 0] >= start_iter, :] - ax = plt.subplot(211) - mp, = ax.plot(data[:, 0], data[:, 1], color=color_val, - label="Mean {0}".format(dir)) - msph, = ax.plot(data[:, 0], data[:, 1] + data[:, 2], - color=color_val, linestyle='--', - label="Mean+-Stddev {0}".format(dir)) - mspl, = ax.plot(data[:, 0], data[:, 1] - data[:, 2], - color=color_val, linestyle='--') - plots.append(mp) - plots.append(msph) - ax.set_ylabel('Value-{0}'.format(comp_type)) - ax.grid(True) - - ax = plt.subplot(212) - mp, = ax.plot(data[:, 0], data[:, 3], color=color_val) - msph, = ax.plot(data[:, 0], data[:, 3] + data[:, 4], - color=color_val, linestyle='--') - mspl, = ax.plot(data[:, 0], data[:, 3] - data[:, 4], - color=color_val, linestyle='--') - ax.set_xlabel('Iteration') - ax.set_ylabel('Derivative-{0}'.format(comp_type)) - ax.grid(True) - - lgd = plt.legend(handles=plots, loc='lower center', - bbox_to_anchor=(0.5, -0.5 + len(dirs) * -0.2), - ncol=1, borderaxespad=0.) - plt.grid(True) - fig.suptitle("Mean and stddev of the value and derivative at " - "{comp_name}".format(comp_name=component_name)) - comp_name = latex_compliant_name(component_name) - figfile_name = '{dir}/nonlinstats_{comp_name}.pdf'.format( - dir=output_dir, comp_name=comp_name) - fig.savefig(figfile_name, bbox_extra_artists=(lgd,), + if stats_per_dir[exp_dir][component_name]['type'] == 'LstmNonlinearity': + for i in range(0,5): + component_type = 'Lstm-' + g_lstm_gate[i] + lgd = plot_a_nonlin_component(fig, dirs, + stat_tables_per_component_per_dir, component_name, + common_prefix, prefix_length, component_type, start_iter, i) + fig.suptitle("Per-dimension average-(value, derivative) percentiles for " + "{component_name}-{gate}".format(component_name=component_name, gate=g_lstm_gate[i])) + comp_name = latex_compliant_name(component_name) + figfile_name = '{dir}/nonlinstats_{comp_name}_{gate}.pdf'.format( + dir=output_dir, comp_name=comp_name, gate=g_lstm_gate[i]) + fig.savefig(figfile_name, bbox_extra_artists=(lgd,), bbox_inches='tight') - if latex_report is not None: - latex_report.add_figure( + if latex_report is not None: + latex_report.add_figure( + figfile_name, + "Per-dimension average-(value, derivative) percentiles for " + "{0}-{1}".format(component_name, g_lstm_gate[i])) + else: + component_type = stats_per_dir[exp_dir][component_name]['type'] + lgd = plot_a_nonlin_component(fig, dirs, + stat_tables_per_component_per_dir,component_name, + common_prefix, prefix_length, component_type, start_iter, 0) + fig.suptitle("Per-dimension average-(value, derivative) percentiles for " + "{component_name}".format(component_name=component_name)) + comp_name = latex_compliant_name(component_name) + figfile_name = '{dir}/nonlinstats_{comp_name}.pdf'.format( + dir=output_dir, comp_name=comp_name) + fig.savefig(figfile_name, bbox_extra_artists=(lgd,), + bbox_inches='tight') + if latex_report is not None: + latex_report.add_figure( figfile_name, - "Mean and stddev of the value and derivative " - "at {0}".format(component_name)) + "Per-dimension average-(value, derivative) percentiles for " + "{0}".format(component_name)) + def generate_clipped_proportion_plots(exp_dir, output_dir, plot, From 81346fc4b0ccf0f150d416a01a240734940542bd Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 16 Apr 2017 23:27:01 -0400 Subject: [PATCH 507/530] Update travis.yml so PRs to kaldi_52 are built --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f8e2bac0362..9f94726c07b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ addons: branches: only: - master - - shortcut + - kaldi_52 before_install: - cat /proc/sys/kernel/core_pattern From 46f328527dc84155b77c0332b5b9fc0d2a2fd860 Mon Sep 17 00:00:00 2001 From: Vijayaditya Peddinti Date: Wed, 19 Apr 2017 17:02:39 -0700 Subject: [PATCH 508/530] [egs] Added check for kaldi_lm being installed in fisher_swbd recipe. (#1558) --- egs/fisher_swbd/s5/run.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/egs/fisher_swbd/s5/run.sh b/egs/fisher_swbd/s5/run.sh index 8b1af972647..0b0d7b9401d 100755 --- a/egs/fisher_swbd/s5/run.sh +++ b/egs/fisher_swbd/s5/run.sh @@ -1,13 +1,18 @@ #!/bin/bash # It's best to run the commands in this one by one. - . ./cmd.sh . ./path.sh mfccdir=mfcc set -e rescore=true +# check for kaldi_lm +which get_word_map.pl > /dev/null +if [ $? -ne 0 ]; then + echo "This recipe requires installation of tools/kaldi_lm. Please run extras/kaldi_lm.sh in tools/" && exit 1; +fi + # prepare fisher data and put it under data/train_fisher local/fisher_data_prep.sh /export/corpora3/LDC/LDC2004T19 /export/corpora3/LDC/LDC2005T19 \ /export/corpora3/LDC/LDC2004S13 /export/corpora3/LDC/LDC2005S13 From 72234621b1bd10ba6134da06a4e6ce9d2cca463d Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 20 Apr 2017 02:11:36 -0400 Subject: [PATCH 509/530] [doc] Small edit to hmm.dox, clarifying something --- src/doc/hmm.dox | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/doc/hmm.dox b/src/doc/hmm.dox index 5788b95d9c0..c410b1ba5a1 100644 --- a/src/doc/hmm.dox +++ b/src/doc/hmm.dox @@ -61,9 +61,12 @@ namespace kaldi { \endverbatim There is one TopologyEntry in this particular HmmTopology object, and it covers phones 1 through 8 (so in this example there are just eight phones and they all -share the same topology). There are three emitting states; each has a self-loop +share the same topology). There are three emitting states (i.e. states that +have pdfs associated with them and 'emit' feature vectors); each has a self-loop and a transition to the next state. There is also a fourth, non-emitting state, -state 3 (there is no \ entry for it) which has no transitions out of it. This is +state 3 (there is no \ entry for it) which has no transitions out of it +(implicitly, it connects to the next phone in the sequence). +This is a standard feature of these topology entries; Kaldi treats the first state (state zero) as the start state, and the last state, which should always be nonemitting and have no transitions out of it, has final-probability one. You From d82bbd7a23053ab8e9b2e1f67b714e94fd48fc55 Mon Sep 17 00:00:00 2001 From: navneeth1990 Date: Thu, 20 Apr 2017 22:56:58 +0530 Subject: [PATCH 510/530] [src] nnet3 online silence weighting - adding frame subsampling factor (#1559) This is a fix so the --silence-weighting option will work correctly when decoding online with chain models. Navneeth tested it on a couple of setups so we believe it works. --- src/online2/online-ivector-feature.cc | 42 ++++++++++++------- src/online2/online-ivector-feature.h | 21 ++++++++-- .../online2-wav-nnet3-latgen-faster.cc | 3 +- 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/online2/online-ivector-feature.cc b/src/online2/online-ivector-feature.cc index cdfc5948571..1048bd1caa8 100644 --- a/src/online2/online-ivector-feature.cc +++ b/src/online2/online-ivector-feature.cc @@ -164,11 +164,10 @@ void OnlineIvectorFeature::UpdateFrameWeights( // elements from top (lower-numbered frames) to bottom (higher-numbered // frames) should be most efficient, assuming it's a heap internally. So we // go forward not backward in delta_weights while adding. - int32 num_frames_ready = NumFramesReady(); for (size_t i = 0; i < delta_weights.size(); i++) { delta_weights_.push(delta_weights[i]); int32 frame = delta_weights[i].first; - KALDI_ASSERT(frame >= 0 && frame < num_frames_ready); + KALDI_ASSERT(frame >= 0); if (frame > most_recent_frame_with_weight_) most_recent_frame_with_weight_ = frame; } @@ -221,7 +220,7 @@ void OnlineIvectorFeature::UpdateStatsUntilFrameWeighted(int32 frame) { delta_weights_provided_ && ! updated_with_no_delta_weights_ && frame <= most_recent_frame_with_weight_); - bool debug_weights = true; + bool debug_weights = false; int32 ivector_period = info_.ivector_period; int32 num_cg_iters = info_.num_cg_iters; @@ -241,8 +240,6 @@ void OnlineIvectorFeature::UpdateStatsUntilFrameWeighted(int32 frame) { if (current_frame_weight_debug_.size() <= frame) current_frame_weight_debug_.resize(frame + 1, 0.0); current_frame_weight_debug_[frame] += weight; - KALDI_ASSERT(current_frame_weight_debug_[frame] >= -0.01 && - current_frame_weight_debug_[frame] <= 1.01); } } if ((!info_.use_most_recent_ivector && t % ivector_period == 0) || @@ -384,9 +381,12 @@ BaseFloat OnlineIvectorFeature::ObjfImprPerFrame() const { OnlineSilenceWeighting::OnlineSilenceWeighting( const TransitionModel &trans_model, - const OnlineSilenceWeightingConfig &config): + const OnlineSilenceWeightingConfig &config, + int32 frame_subsampling_factor): trans_model_(trans_model), config_(config), + frame_subsampling_factor_(frame_subsampling_factor), num_frames_output_and_correct_(0) { + KALDI_ASSERT(frame_subsampling_factor_ >= 1); std::vector silence_phones; SplitStringToIntegers(config.silence_phones_str, ":,", false, &silence_phones); @@ -497,8 +497,15 @@ int32 OnlineSilenceWeighting::GetBeginFrame() { } void OnlineSilenceWeighting::GetDeltaWeights( - int32 num_frames_ready, + int32 num_frames_ready_in, std::vector > *delta_weights) { + // num_frames_ready_in is at the feature frame-rate, most of the code + // in this function is at the decoder frame-rate. + // round up, so we are sure to get weights for at least the frame + // 'num_frames_ready_in - 1', and maybe one or two frames afterward. + int32 fs = frame_subsampling_factor_, + num_frames_ready = (num_frames_ready_in + fs - 1) / fs; + const int32 max_state_duration = config_.max_state_duration; const BaseFloat silence_weight = config_.silence_weight; @@ -515,11 +522,11 @@ void OnlineSilenceWeighting::GetDeltaWeights( // frames_out is the number of frames we will output. KALDI_ASSERT(frames_out >= 0); std::vector frame_weight(frames_out, 1.0); - // we will frame_weight to the value silence_weight for silence frames and for - // transition-ids that repeat with duration > max_state_duration. Frames newer - // than the most recent traceback will get a weight equal to the weight for the - // most recent frame in the traceback; or the silence weight, if there is no - // traceback at all available yet. + // we will set frame_weight to the value silence_weight for silence frames and + // for transition-ids that repeat with duration > max_state_duration. Frames + // newer than the most recent traceback will get a weight equal to the weight + // for the most recent frame in the traceback; or the silence weight, if there + // is no traceback at all available yet. // First treat some special cases. if (frames_out == 0) // Nothing to output. @@ -578,10 +585,13 @@ void OnlineSilenceWeighting::GetDeltaWeights( // Even if the delta-weight is zero for the last frame, we provide it, // because the identity of the most recent frame with a weight is used in // some debugging/checking code. - if (weight_diff != 0.0 || offset + 1 == frames_out) - delta_weights->push_back(std::make_pair(frame, weight_diff)); - } - + if (weight_diff != 0.0 || offset + 1 == frames_out) { + for(int32 i = 0; i < frame_subsampling_factor_; i++) { + int32 input_frame = (frame * frame_subsampling_factor_) + i; + delta_weights->push_back(std::make_pair(input_frame, weight_diff)); + } + } + } } } // namespace kaldi diff --git a/src/online2/online-ivector-feature.h b/src/online2/online-ivector-feature.h index 5ba289aa79d..942cb387bbb 100644 --- a/src/online2/online-ivector-feature.h +++ b/src/online2/online-ivector-feature.h @@ -442,8 +442,14 @@ class OnlineSilenceWeighting { public: // Note: you would initialize a new copy of this object for each new // utterance. + // The frame-subsampling-factor is used for newer nnet3 models, especially + // chain models, when the frame-rate of the decoder is different from the + // frame-rate of the input features. E.g. you might set it to 3 for such + // models. + OnlineSilenceWeighting(const TransitionModel &trans_model, - const OnlineSilenceWeightingConfig &config); + const OnlineSilenceWeightingConfig &config, + int32 frame_subsampling_factor = 1); bool Active() const { return config_.Active(); } @@ -456,7 +462,7 @@ class OnlineSilenceWeighting { // the stats... the output format is (frame-index, delta-weight). The // num_frames_ready argument is the number of frames available at the input // (or equivalently, output) of the online iVector extractor class, which may - // be more than the currently availabl decoder traceback. How many frames + // be more than the currently available decoder traceback. How many frames // of weights it outputs depends on how much "num_frames_ready" increased // since last time we called this function, and whether the decoder traceback // changed. Negative delta_weights might occur if frames previously @@ -466,17 +472,19 @@ class OnlineSilenceWeighting { // this output to class OnlineIvectorFeature by calling its function // UpdateFrameWeights with the output. void GetDeltaWeights( - int32 num_frames_ready, + int32 num_frames_ready_in, std::vector > *delta_weights); private: const TransitionModel &trans_model_; const OnlineSilenceWeightingConfig &config_; + int32 frame_subsampling_factor_; + unordered_set silence_phones_; struct FrameInfo { - //The only reason we need the token pointer is to know far back we have to + // The only reason we need the token pointer is to know far back we have to // trace before the traceback is the same as what we previously traced back. void *token; int32 transition_id; @@ -494,6 +502,11 @@ class OnlineSilenceWeighting { // max_state_duration is relevant. int32 GetBeginFrame(); + // This contains information about any previously computed traceback; + // when the traceback changes we use this variable to compare it with the + // previous traceback. + // It's indexed at the frame-rate of the decoder (may be different + // by 'frame_subsampling_factor_' from the frame-rate of the features. std::vector frame_info_; // This records how many frames have been output and that currently reflect diff --git a/src/online2bin/online2-wav-nnet3-latgen-faster.cc b/src/online2bin/online2-wav-nnet3-latgen-faster.cc index 62204460159..f8fd1f9ef71 100644 --- a/src/online2bin/online2-wav-nnet3-latgen-faster.cc +++ b/src/online2bin/online2-wav-nnet3-latgen-faster.cc @@ -209,7 +209,8 @@ int main(int argc, char *argv[]) { OnlineSilenceWeighting silence_weighting( trans_model, - feature_info.silence_weighting_config); + feature_info.silence_weighting_config, + decodable_opts.frame_subsampling_factor); SingleUtteranceNnet3Decoder decoder(decoder_opts, trans_model, decodable_info, From d8be99a1cdc059dc86435ddbefca130b4e7453d2 Mon Sep 17 00:00:00 2001 From: Gaofeng Cheng <770579626@qq.com> Date: Fri, 21 Apr 2017 04:17:17 +0800 Subject: [PATCH 511/530] [src,scripts,egs] Add dropout for nnet3 LSTMs, with recipes. (#1537) See also http://www.danielpovey.com/files/2017_interspeech_dropout.pdf this improves on the best recipes. --- egs/ami/s5b/RESULTS_ihm | 5 + egs/ami/s5b/RESULTS_sdm | 5 + .../local/chain/tuning/run_tdnn_lstm_1i.sh | 3 +- .../local/chain/tuning/run_tdnn_lstm_1j.sh | 3 +- .../local/chain/tuning/run_tdnn_lstm_1l.sh | 344 +++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1m.sh | 352 ++++++++++++++++++ egs/swbd/s5c/RESULTS | 6 + .../s5c/local/chain/tuning/run_blstm_6l.sh | 248 ++++++++++++ .../local/chain/tuning/run_tdnn_blstm_1b.sh | 248 ++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1k.sh | 321 ++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1l.sh | 244 ++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1s.sh | 333 +++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1t.sh | 333 +++++++++++++++++ .../local/chain/tuning/run_tdnn_lstm_1u.sh | 327 ++++++++++++++++ egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py | 46 +-- src/cudamatrix/cu-kernels-ansi.h | 8 +- src/cudamatrix/cu-kernels.cu | 77 ++-- src/cudamatrix/cu-kernels.h | 16 +- src/cudamatrix/cu-math-test.cc | 34 +- src/cudamatrix/cu-math.cc | 85 +++-- src/cudamatrix/cu-math.h | 18 +- src/nnet3/nnet-component-itf.cc | 2 + src/nnet3/nnet-component-itf.h | 5 +- src/nnet3/nnet-general-component.cc | 83 +++++ src/nnet3/nnet-general-component.h | 82 ++++ src/nnet3/nnet-simple-component.cc | 40 +- src/nnet3/nnet-simple-component.h | 27 +- src/nnet3/nnet-utils.cc | 17 +- src/nnet3/nnet-utils.h | 2 +- 29 files changed, 3188 insertions(+), 126 deletions(-) create mode 100644 egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh create mode 100644 egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh create mode 100644 egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh create mode 100644 egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh create mode 100644 egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh create mode 100644 egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh create mode 100644 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh create mode 100644 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh create mode 100644 egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh diff --git a/egs/ami/s5b/RESULTS_ihm b/egs/ami/s5b/RESULTS_ihm index 1003197701e..e7cb1a52788 100644 --- a/egs/ami/s5b/RESULTS_ihm +++ b/egs/ami/s5b/RESULTS_ihm @@ -84,6 +84,11 @@ %WER 20.8 | 13098 94489 | 82.0 10.0 8.0 2.8 20.8 53.2 | -0.096 | exp/ihm/chain_cleaned/tdnn_lstm1i_sp_bi_ld5/decode_dev/ascore_11/dev_hires.ctm.filt.sys %WER 20.7 | 12643 89980 | 81.7 11.5 6.8 2.5 20.7 51.8 | 0.015 | exp/ihm/chain_cleaned/tdnn_lstm1i_sp_bi_ld5/decode_eval/ascore_11/eval_hires.ctm.filt.sys +# local/chain/tuning/run_tdnn_lstm_1l.sh --mic ihm --train-set train_cleaned --gmm tri3_cleaned +# same as local/chain/tuning/run_tdnn_lstm_1i.sh, except that dropout is adopted +# cleanup + chain TDNN+LSTM model + per-frame dropout +%WER 19.8 | 13098 94475 | 83.1 9.6 7.4 2.8 19.8 51.8 | -0.041 | exp/ihm/chain_cleaned/tdnn_lstm1l_sp_bi_ld5/decode_dev/ascore_10/dev_hires.ctm.filt.sys +%WER 19.2 | 12643 89964 | 83.2 10.7 6.1 2.5 19.2 49.7 | 0.079 | exp/ihm/chain_cleaned/tdnn_lstm1l_sp_bi_ld5/decode_eval/ascore_10/eval_hires.ctm.filt.sys # local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh --mic ihm # cleanup + chain TDNN+LSTM model + IHM reverberated data diff --git a/egs/ami/s5b/RESULTS_sdm b/egs/ami/s5b/RESULTS_sdm index 737f8f6dc09..459aedbfbf9 100644 --- a/egs/ami/s5b/RESULTS_sdm +++ b/egs/ami/s5b/RESULTS_sdm @@ -91,6 +91,11 @@ %WER 37.6 | 15122 94495 | 66.1 18.7 15.1 3.7 37.6 63.2 | 0.646 | exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/decode_dev/ascore_10/dev_hires_o4.ctm.filt.sys %WER 40.9 | 13807 89961 | 62.4 20.0 17.6 3.3 40.9 65.7 | 0.612 | exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/decode_eval/ascore_10/eval_hires_o4.ctm.filt.sys +# local/chain/tuning/run_tdnn_lstm_1l.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned +# same as local/chain/tuning/run_tdnn_lstm_1i.sh, except that dropout is adopted +# cleanup + chain TDNN+LSTM model, SDM audio + alignments from ihm data + per-frame dropout. +%WER 35.9 | 14900 94497 | 67.8 18.2 14.1 3.7 35.9 62.5 | 0.647 | exp/sdm1/chain_cleaned/tdnn_lstm1l_sp_bi_ihmali_ld5/decode_dev/ascore_9/dev_hires_o4.ctm.filt.sys +%WER 39.4 | 13223 89946 | 64.1 19.7 16.2 3.5 39.4 67.0 | 0.611 | exp/sdm1/chain_cleaned/tdnn_lstm1l_sp_bi_ihmali_ld5/decode_eval/ascore_9/eval_hires_o4.ctm.filt.sys # local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned # cleanup + chain TDNN+LSTM model, SDM original + IHM reverberated data, alignments from ihm data. diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh index 3e3976ac7a8..92636b4c17e 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -26,6 +26,7 @@ gmm=tri3_cleaned # the gmm for the target data ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). num_threads_ubm=32 nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +num_epochs=4 chunk_width=150 chunk_left_context=40 @@ -242,7 +243,7 @@ if [ $stage -le 16 ]; then --egs.chunk-right-context $chunk_right_context \ --trainer.num-chunk-per-minibatch 64 \ --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs 4 \ + --trainer.num-epochs $num_epochs \ --trainer.optimization.shrink-value 0.99 \ --trainer.optimization.num-jobs-initial 2 \ --trainer.optimization.num-jobs-final 12 \ diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh index 008060df070..a96230075b6 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -34,6 +34,7 @@ gmm=tri3_cleaned # the gmm for the target data ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). num_threads_ubm=32 nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +num_epochs=4 chunk_width=150 chunk_left_context=40 @@ -254,7 +255,7 @@ if [ $stage -le 16 ]; then --egs.chunk-right-context-final 0 \ --trainer.num-chunk-per-minibatch 64,32 \ --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs 4 \ + --trainer.num-epochs $num_epochs \ --trainer.optimization.shrink-value 0.99 \ --trainer.optimization.num-jobs-initial 2 \ --trainer.optimization.num-jobs-final 12 \ diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh new file mode 100644 index 00000000000..74c0f5a6ead --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -0,0 +1,344 @@ +#!/bin/bash + +# This (1l.sh) is the same as 1i but with per-frame dropout on LSTM layer +# It is a regular (non-fast) LSTM with per-frame dropout on [i, f, o] gates of the LSTM, +# the dropout-adding place is "place4" in paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf. +# We have tried both 4-epoch and 5-epoch training. + +### IHM +# Results with flags : --mic ihm --train-set train_cleaned --gmm tri3_cleaned\ +#System tdnn_lstm1i_sp_bi_ld5 tdnn_lstm1l_sp_bi_ld5 +#WER on dev 20.6 19.8 +#WER on eval 20.1 19.2 +#Final train prob -0.044763 -0.0666221 +#Final valid prob -0.0981107 -0.097616 +#Final train prob (xent) -0.722765 -0.915559 +#Final valid prob (xent) -1.03985 -1.09907 + +# ./steps/info/chain_dir_info.pl exp/ihm/chain_cleaned/tdnn_lstm1i_sp_bi_ld5/ exp/ihm/chain_cleaned/tdnn_lstm1l_sp_bi_ld5/ +# exp/ihm/chain_cleaned/tdnn_lstm1i_sp_bi_ld5/: num-iters=89 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.064->-0.059 xent:train/valid[58,88,final]=(-0.940,-0.739,-0.723/-1.14,-1.04,-1.04) logprob:train/valid[58,88,final]=(-0.067,-0.046,-0.045/-0.103,-0.099,-0.098) +# exp/ihm/chain_cleaned/tdnn_lstm1l_sp_bi_ld5/: num-iters=89 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.094->-0.082 xent:train/valid[58,88,final]=(-3.10,-1.11,-0.916/-3.17,-1.29,-1.10) logprob:train/valid[58,88,final]=(-0.164,-0.073,-0.067/-0.182,-0.104,-0.098) + +# Results with flags for (1l.sh) : --num-epochs 5 --tlstm-affix 1i_5epoch --mic ihm --train-set train_cleaned --gmm tri3_cleaned\ +# Results with flags for (1i.sh) : --num-epochs 5 --tlstm-affix 1l_5epoch --mic ihm --train-set train_cleaned --gmm tri3_cleaned\ +#System tdnn_lstm1i_5epoch_sp_bi_ld5 tdnn_lstm1l_5epoch_sp_bi_ld5 +#WER on dev 20.8 19.7 +#WER on eval 20.6 19.3 +#Final train prob -0.0347795-0.0600903 +#Final valid prob -0.102486-0.0964607 +#Final train prob (xent) -0.621007 -0.84667 +#Final valid prob (xent) -1.02634 -1.04725 + +# ./steps/info/chain_dir_info.pl exp/ihm/chain_cleaned/tdnn_lstm1i_5epoch_sp_bi_ld5/ exp/ihm/chain_cleaned/tdnn_lstm1l_5epoch_sp_bi_ld5/ +# exp/ihm/chain_cleaned/tdnn_lstm1i_5epoch_sp_bi_ld5/: num-iters=111 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.053->-0.049 xent:train/valid[73,110,final]=(-0.832,-0.631,-0.621/-1.09,-1.03,-1.03) logprob:train/valid[73,110,final]=(-0.057,-0.037,-0.035/-0.102,-0.103,-0.102) +# exp/ihm/chain_cleaned/tdnn_lstm1l_5epoch_sp_bi_ld5/: num-iters=111 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.085->-0.074 xent:train/valid[73,110,final]=(-3.14,-1.02,-0.847/-3.20,-1.21,-1.05) logprob:train/valid[73,110,final]=(-0.162,-0.065,-0.060/-0.177,-0.101,-0.096) + +### SDM +# Results with flags : --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned \ +#System tdnn_lstm1i_sp_bi_ihmali_ld5 tdnn_lstm1l_sp_bi_ihmali_ld5 +#WER on dev 37.0 35.9 +#WER on eval 40.0 39.4 +#Final train prob -0.106971 -0.15439 +#Final valid prob -0.252201 -0.244499 +#Final train prob (xent) -1.41142 -1.73795 +#Final valid prob (xent) -2.13741 -2.14519 + +# ./steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/ exp/sdm1/chain_cleaned/tdnn_lstm1l_sp_bi_ihmali_ld5/ +# exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.138->-0.128 xent:train/valid[57,86,final]=(-1.78,-1.42,-1.41/-2.23,-2.14,-2.14) logprob:train/valid[57,86,final]=(-0.155,-0.108,-0.107/-0.251,-0.254,-0.252) +# exp/sdm1/chain_cleaned/tdnn_lstm1l_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.192->-0.174 xent:train/valid[57,86,final]=(-3.74,-1.95,-1.74/-3.86,-2.31,-2.15) logprob:train/valid[57,86,final]=(-0.287,-0.165,-0.154/-0.335,-0.250,-0.244) + +# Results with flags for (1i.sh) : --num-epochs 5 --tlstm-affix 1i_5epoch --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned\ +# Results with flags for (1l.sh) : --num-epochs 5 --tlstm-affix 1l_5epoch --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned\ +#System tdnn_lstm1i_5epoch_sp_bi_ihmali_ld5 tdnn_lstm1l_5epoch_sp_bi_ihmali_ld5 +#WER on dev 36.9 35.8 +#WER on eval 40.2 39.5 +#Final train prob -0.0854552 -0.134189 +#Final valid prob -0.262789 -0.244183 +#inal train prob (xent) -1.2195 -1.58789 +#Final valid prob (xent) -2.13389 -2.08964 + +# ./steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1i_5epoch_sp_bi_ihmali_ld5 exp/sdm1/chain_cleaned/tdnn_lstm1l_5epoch_sp_bi_ihmali_ld5/ +# exp/sdm1/chain_cleaned/tdnn_lstm1i_5epoch_sp_bi_ihmali_ld5: num-iters=109 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.111->-0.104 xent:train/valid[71,108,final]=(-1.61,-1.25,-1.22/-2.16,-2.15,-2.13) logprob:train/valid[71,108,final]=(-0.133,-0.089,-0.085/-0.246,-0.264,-0.263) +# exp/sdm1/chain_cleaned/tdnn_lstm1l_5epoch_sp_bi_ihmali_ld5/: num-iters=109 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.170->-0.153 xent:train/valid[71,108,final]=(-3.67,-1.76,-1.59/-3.81,-2.22,-2.09) logprob:train/valid[71,108,final]=(-0.274,-0.144,-0.134/-0.327,-0.248,-0.244) + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +dropout_schedule='0,0@0.20,0.3@0.50,0' + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1l #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh new file mode 100644 index 00000000000..b0e7af0618d --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh @@ -0,0 +1,352 @@ +#!/bin/bash + +# This (1m.sh) is the same as 1j but with per-frame dropout on LSTM layer +# It is a fast LSTM with per-frame dropout on [i, f, o] gates of the LSTM, +# the dropout-adding place is "place4" in paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf. +# We have tried both 4-epoch and 5-epoch training. + +### IHM +# Results with flags : --mic ihm --train-set train_cleaned --gmm tri3_cleaned \ +#System tdnn_lstm1j_sp_bi_ld5 tdnn_lstm1m_sp_bi_ld5 +#WER on dev 20.8 19.9 +#WER on eval 20.3 19.3 +#Final train prob -0.0439145 -0.0653269 +#Final valid prob -0.10673 -0.0998743 +#Final train prob (xent) -0.683776 -0.884698 +#Final valid prob (xent) -1.05254 -1.09002 + +# steps/info/chain_dir_info.pl exp/ihm/chain_cleaned/tdnn_lstm1j_sp_bi_ld5/ exp/ihm/chain_cleaned/tdnn_lstm1m_sp_bi_ld5/ +# exp/ihm/chain_cleaned/tdnn_lstm1j_sp_bi_ld5: num-iters=89 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.063->-0.058 xent:train/valid[58,88,final]=(-0.888,-0.695,-0.684/-1.12,-1.06,-1.05) logprob:train/valid[58,88,final]=(-0.065,-0.045,-0.044/-0.105,-0.107,-0.107) +# exp/ihm/chain_cleaned/tdnn_lstm1m_sp_bi_ld5: num-iters=89 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.092->-0.080 xent:train/valid[58,88,final]=(-3.12,-1.09,-0.885/-3.20,-1.27,-1.09) logprob:train/valid[58,88,final]=(-0.164,-0.072,-0.065/-0.181,-0.103,-0.100) + +# Results with flags for (1m.sh) : --num-epochs 5 --tlstm-affix 1m_5epoch --mic ihm --train-set train_cleaned --gmm tri3_cleaned \ +# Results with flags for (1j.sh) : --num-epochs 5 --tlstm-affix 1j_5epoch --mic ihm --train-set train_cleaned --gmm tri3_cleaned \ +#System tdnn_lstm1j_5epoch_sp_bi_ld5 tdnn_lstm1m_5epoch_sp_bi_ld5 +#WER on dev 21.1 19.9 +#WER on eval 20.9 19.8 +#Final train prob -0.0365079 -0.057024 +#Final valid prob -0.112709-0.0992725 +#inal train prob (xent) -0.601602 -0.800653 +#Final valid prob (xent) -1.03241 -1.04748 + +# ./steps/info/chain_dir_info.pl exp/ihm/chain_cleaned/tdnn_lstm1j_5epoch_sp_bi_ld5/ exp/ihm/chain_cleaned/tdnn_lstm1m_5epoch_sp_bi_ld5/ +# exp/ihm/chain_cleaned/tdnn_lstm1j_5epoch_sp_bi_ld5/: num-iters=111 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.053->-0.049 xent:train/valid[73,110,final]=(-0.813,-0.615,-0.602/-1.08,-1.04,-1.03) logprob:train/valid[73,110,final]=(-0.057,-0.038,-0.037/-0.106,-0.113,-0.113) +# exp/ihm/chain_cleaned/tdnn_lstm1m_5epoch_sp_bi_ld5/: num-iters=111 nj=2..12 num-params=43.4M dim=40+100->3765 combine=-0.080->-0.072 xent:train/valid[73,110,final]=(-3.15,-0.985,-0.801/-3.26,-1.21,-1.05) logprob:train/valid[73,110,final]=(-0.161,-0.062,-0.057/-0.183,-0.102,-0.099) + +#### SDM +# Results with flags : --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned \ +#System tdnn_lstm1j_sp_bi_ihmali_ld5 tdnn_lstm1m_sp_bi_ihmali_ld5 +#WER on dev 36.9 36.4 +#WER on eval 40.5 39.9 +#Final train prob -0.108141 -0.148861 +#Final valid prob -0.257468 -0.240962 +#Final train prob (xent) -1.38179 -1.70258 +#Final valid prob (xent) -2.13095 -2.12803 + +# ./steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/ exp/sdm1/chain_cleaned/tdnn_lstm1m_sp_bi_ihmali_ld5/ +# exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.138->-0.128 xent:train/valid[57,86,final]=(-1.71,-1.39,-1.38/-2.18,-2.14,-2.13) logprob:train/valid[57,86,final]=(-0.150,-0.110,-0.108/-0.251,-0.260,-0.257) +# exp/sdm1/chain_cleaned/tdnn_lstm1m_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.187->-0.170 xent:train/valid[57,86,final]=(-3.74,-1.90,-1.70/-3.88,-2.28,-2.13) logprob:train/valid[57,86,final]=(-0.286,-0.158,-0.149/-0.336,-0.245,-0.241) + +# Results with flags for (1m.sh) : --num-epochs 5 --tlstm-affix 1m_5epoch --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned\ +# Results with flags for (1j.sh) : --num-epochs 5 --tlstm-affix 1j_5epoch --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned\ +#System tdnn_lstm1j_5epoch_sp_bi_ihmali_ld5 tdnn_lstm1m_5epoch_sp_bi_ihmali_ld5 +#WER on dev 37.4 36.0 +#WER on eval 40.7 39.6 +#Final train prob -0.0879063 -0.133092 +#Final valid prob -0.270953 -0.243246 +#Final train prob (xent) -1.20822 -1.56293 +#Final valid prob (xent) -2.1425 -2.07265 + +# ./steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1j_5epoch_sp_bi_ihmali_ld5/ exp/sdm1/chain_cleaned/tdnn_lstm1m_5epoch_sp_bi_ihmali_ld5/ +# exp/sdm1/chain_cleaned/tdnn_lstm1j_5epoch_sp_bi_ihmali_ld5/: num-iters=109 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.115->-0.107 xent:train/valid[71,108,final]=(-1.56,-1.22,-1.21/-2.16,-2.16,-2.14) logprob:train/valid[71,108,final]=(-0.131,-0.090,-0.088/-0.256,-0.273,-0.271) +# exp/sdm1/chain_cleaned/tdnn_lstm1m_5epoch_sp_bi_ihmali_ld5/: num-iters=109 nj=2..12 num-params=43.4M dim=40+100->3741 combine=-0.167->-0.153 xent:train/valid[71,108,final]=(-3.69,-1.71,-1.56/-3.84,-2.20,-2.07) logprob:train/valid[71,108,final]=(-0.279,-0.140,-0.133/-0.329,-0.247,-0.243) + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +dropout_schedule='0,0@0.20,0.3@0.50,0' # dropout schedule controls the dropout + # proportion for each training iteration. +num_epochs=4 + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1m #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20 dropout-proportion=0.0" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs $num_epochs \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/swbd/s5c/RESULTS b/egs/swbd/s5c/RESULTS index f103200f966..2cf34c600c1 100644 --- a/egs/swbd/s5c/RESULTS +++ b/egs/swbd/s5c/RESULTS @@ -203,6 +203,12 @@ exit 0 %WER 21.2 | 2628 21594 | 81.4 12.8 5.9 2.6 21.2 56.7 | exp/chain/lstm_d_ld5_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys %WER 13.88 [ 6829 / 49204, 935 ins, 1690 del, 4204 sub ] exp/chain/lstm_d_ld5_sp/decode_train_dev_sw1_fsh_fg/wer_9_0.0 +# current best 'chain' models with TDNN + LSTM + dropout (see local/chain/run_tdnn_lstm_1l.sh) +%WER 13.5 | 4459 42989 | 88.2 8.0 3.8 1.7 13.5 48.2 | exp/chain/tdnn_lstm_1b_dropout_ld5_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.filt.sys +%WER 8.8 | 1831 21395 | 92.3 5.2 2.5 1.1 8.8 41.9 | exp/chain/tdnn_lstm_1b_dropout_ld5_sp/decode_eval2000_sw1_fsh_fg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys +%WER 18.1 | 2628 21594 | 84.0 10.8 5.2 2.2 18.1 52.6 | exp/chain/tdnn_lstm_1b_dropout_ld5_sp/decode_eval2000_sw1_fsh_fg/score_10_1.0/eval2000_hires.ctm.callhm.filt.sys +%WER 11.59 [ 5615 / 48460, 708 ins, 1450 del, 3457 sub ] exp/chain/tdnn_lstm_1b_dropout_ld5_sp/decode_train_dev_sw1_fsh_fg/wer_9_0.0 + # these are results with nnet3 LSTMs with CTC training : local/ctc/run_lstm.sh %WER 17.4 | 1831 21395 | 85.3 10.1 4.6 2.7 17.4 57.8 | exp/ctc/lstm_sp/decode_eval2000_sw1_fsh_fg_0.15/score_12_0.0/eval2000_hires.ctm.swbd.filt.sys %WER 19.4 | 1831 21395 | 83.5 11.2 5.2 3.0 19.4 60.7 | exp/ctc/lstm_sp/decode_eval2000_sw1_tg_0.15/score_12_0.5/eval2000_hires.ctm.swbd.filt.sys diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh new file mode 100644 index 00000000000..68daf81ab01 --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh @@ -0,0 +1,248 @@ +#!/bin/bash + +# 6l is same as 6k, but with the per-frame dropout +# location4 as paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# local/chain/compare_wer_general.sh blstm_6k_sp blstm_6l_sp +# attention: the blatm_6k_sp result here is far better than the updated +# result (14.5 vs 14.1), this may due to noise + +# System blstm_6k_sp blstm_6l_sp +# WER on train_dev(tg) 13.30 13.06 +# WER on train_dev(fg) 12.34 12.16 +# WER on eval2000(tg) 15.5 15.2 +# WER on eval2000(fg) 14.1 13.8 +# Final train prob -0.052 -0.065 +# Final valid prob -0.090 -0.093 +# Final train prob (xent) -0.743 -0.831 +# Final valid prob (xent) -0.9579 -0.9821 + +# exp/chain/blstm_6k_sp/: num-iters=327 nj=3..16 num-params=41.2M dim=40+100->6074 combine=-0.069->-0.069 xent:train/valid[217,326,final]=(-0.849,-0.748,-0.743/-1.04,-0.959,-0.958) logprob:train/valid[217,326,final]=(-0.065,-0.053,-0.052/-0.096,-0.090,-0.090) +# exp/chain/blstm_6l_sp/: num-iters=327 nj=3..16 num-params=41.2M dim=40+100->6074 combine=-0.084->-0.082 xent:train/valid[217,326,final]=(-1.45,-0.840,-0.831/-1.58,-0.994,-0.982) logprob:train/valid[217,326,final]=(-0.110,-0.066,-0.065/-0.132,-0.094,-0.093) +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/blstm_6l # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_dir_affix= + +# training options +leftmost_questions_truncate=-1 +chunk_width=150 +chunk_left_context=40 +chunk_right_context=40 +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=0 +dropout_schedule='0,0@0.20,0.1@0.50,0' + +# decode options +extra_left_context=50 +extra_right_context=50 +frames_per_chunk= + +remove_egs=false +common_egs_dir= + +affix= +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20 dropout-proportion=0.0" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=blstm1-forward input=lda cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm1-backward input=lda cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1200000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.dropout-schedule $dropout_schedule \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +decode_suff=sw1_tg +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh new file mode 100644 index 00000000000..3929cdc432e --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh @@ -0,0 +1,248 @@ +#!/bin/bash + +# tdnn_blstm_1b is same as tdnn_blstm_1a, but with the per-frame dropout +# added with location 4, see paper: +# http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# ./local/chain/compare_wer_general.sh tdnn_blstm_1a_sp tdnn_blstm_1b_sp +# System tdnn_blstm_1a_sp tdnn_blstm_1b_sp +# WER on train_dev(tg) 12.86 12.60 +# WER on train_dev(fg) 11.86 11.80 +# WER on eval2000(tg) 15.3 14.9 +# WER on eval2000(fg) 14.0 13.5 +# Final train prob -0.042 -0.054 +# Final valid prob -0.099 -0.091 +# Final train prob (xent) -0.637 -0.719 +# Final valid prob (xent) -0.9418 -0.9190 + +# exp/chain/tdnn_blstm_1a_sp/: num-iters=327 nj=3..16 num-params=53.7M dim=40+100->6074 combine=-0.058->-0.057 xent:train/valid[217,326,final]=(-0.753,-0.631,-0.637/-0.974,-0.941,-0.942) logprob:train/valid[217,326,final]=(-0.055,-0.041,-0.042/-0.094,-0.099,-0.099) +# exp/chain/tdnn_blstm_1b_sp/: num-iters=327 nj=3..16 num-params=53.7M dim=40+100->6074 combine=-0.070->-0.068 xent:train/valid[217,326,final]=(-1.27,-0.732,-0.719/-1.42,-0.931,-0.919) logprob:train/valid[217,326,final]=(-0.094,-0.055,-0.054/-0.117,-0.091,-0.091) +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_blstm_1b # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_dir_affix= + +# training options +leftmost_questions_truncate=-1 +chunk_width=150 +chunk_left_context=40 +chunk_right_context=40 +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=0 +dropout_schedule='0,0@0.20,0.1@0.50,0' +# decode options +extra_left_context=50 +extra_right_context=50 +frames_per_chunk= + +remove_egs=false +common_egs_dir= + +affix= +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20 dropout-proportion=0.0" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=blstm1-forward input=tdnn3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm1-backward input=tdnn3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1200000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.dropout-schedule $dropout_schedule \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +decode_suff=sw1_tg +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh new file mode 100644 index 00000000000..21cb4fa9373 --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -0,0 +1,321 @@ +#!/bin/bash + +# run_tdnn_lstm_1k.sh is like run_tdnn_lstm_1e.sh but +# added the per-frame dropout location 4 as paper: +# http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# ./local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1k_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1k_sp +# WER on train_dev(tg) 13.18 12.60 +# [looped:] 13.10 12.56 +# WER on train_dev(fg) 12.21 11.58 +# [looped:] 12.28 11.62 +# WER on eval2000(tg) 15.8 15.2 +# [looped:] 15.8 15.2 +# WER on eval2000(fg) 14.5 13.7 +# [looped:] 14.5 13.8 +# Final train prob -0.060 -0.076 +# Final valid prob -0.101 -0.106 +# Final train prob (xent) -0.868 -0.989 +# Final valid prob (xent) -1.0740 -1.1341 + +# exp/chain/tdnn_lstm_1e_sp/: num-iters=262 nj=3..16 num-params=39.6M dim=40+100->6074 combine=-0.072->-0.071 xent:train/valid[173,261,final]=(-1.01,-0.876,-0.868/-1.16,-1.08,-1.07) logprob:train/valid[173,261,final]=(-0.075,-0.061,-0.060/-0.106,-0.101,-0.101) +# exp/chain/tdnn_lstm_1k_sp/: num-iters=262 nj=3..16 num-params=39.6M dim=40+100->6074 combine=-0.093->-0.089 xent:train/valid[173,261,final]=(-2.87,-1.07,-0.989/-2.90,-1.20,-1.13) logprob:train/valid[173,261,final]=(-0.153,-0.079,-0.076/-0.179,-0.107,-0.106) + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1e # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_nj=50 + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +iter_opts= +if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " +fi + +if [ $stage -le 15 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in looped decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 17 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3/extractor $dir ${dir}_online + + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + # note: we just give it "$decode_set" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + $graph_dir data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in online decoding" + exit 1 + fi +fi + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh new file mode 100644 index 00000000000..e88e199839c --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -0,0 +1,244 @@ +#!/bin/bash + +# tdnn_lstm_1l is same as tdnn_lstm_1b, but with the per-frame dropout +# added with location 4 in LSTM layer, see paper: +# http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# ./local/chain/compare_wer_general.sh tdnn_lstm_1b_ld5_sp tdnn_lstm_1l_ld5_sp +# System tdnn_lstm_1b_ld5_sp tdnn_lstm_1l_ld5_sp +# WER on train_dev(tg) 13.06 12.41 +# WER on train_dev(fg) 12.13 11.59 +# WER on eval2000(tg) 15.1 14.8 +# WER on eval2000(fg) 13.9 13.5 +# Final train prob -0.047 -0.069 +# Final valid prob -0.093 -0.095 +# Final train prob (xent) -0.735 -0.913 +# Final valid prob (xent) -1.0151 -1.0820 + +# exp/chain/tdnn_lstm_1b_ld5_sp: num-iters=327 nj=3..16 num-params=39.6M dim=40+100->6074 combine=-0.062->-0.061 xent:train/valid[217,326,final]=(-0.877,-0.741,-0.735/-1.08,-1.02,-1.02) logprob:train/valid[217,326,final]=(-0.063,-0.048,-0.047/-0.095,-0.093,-0.093) +# exp/chain/tdnn_lstm_1l_ld5_sp: num-iters=327 nj=3..16 num-params=39.6M dim=40+100->6074 combine=-0.088->-0.084 xent:train/valid[217,326,final]=(-3.32,-0.961,-0.913/-3.40,-1.13,-1.08) logprob:train/valid[217,326,final]=(-0.176,-0.072,-0.069/-0.198,-0.097,-0.095) +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1l # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_dir_affix= + +# training options +leftmost_questions_truncate=-1 +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=5 +dropout_schedule='0,0@0.20,0.3@0.50,0' +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk= + +remove_egs=false +common_egs_dir= + +affix= +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 dropout-proportion=0.0 dropout-per-frame=true + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1200000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --trainer.dropout-schedule $dropout_schedule \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +decode_suff=sw1_tg +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh new file mode 100644 index 00000000000..dc0f59fb64a --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh @@ -0,0 +1,333 @@ +#!/bin/bash + +# 1s is as 1e, but adding per-frame dropout to LSTM in location4 +# as paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# ./local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1s_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1s_sp_bi +# WER on dev(orig) 9.0 8.9 +# [looped:] 9.0 8.9 +# WER on dev(rescored) 8.4 8.1 +# [looped:] 8.4 8.1 +# WER on test(orig) 8.9 8.8 +# [looped:] 8.9 8.8 +# WER on test(rescored) 8.4 8.4 +# [looped:] 8.4 8.3 +# Final train prob -0.0712 -0.0914 +# Final valid prob -0.0892 -0.0977 +# Final train prob (xent) -0.8566 -0.9931 +# Final valid prob (xent) -0.9927 -1.0633 + +# exp/chain_cleaned/tdnn_lstm1e_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3626 combine=-0.082->-0.081 xent:train/valid[167,252,final]=(-0.961,-0.859,-0.857/-1.06,-0.999,-0.993) logprob:train/valid[167,252,final]=(-0.086,-0.072,-0.071/-0.098,-0.091,-0.089) +# exp/chain_cleaned/tdnn_lstm1s_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3626 combine=-0.104->-0.101 xent:train/valid[167,252,final]=(-3.08,-1.07,-0.993/-3.13,-1.14,-1.06) logprob:train/valid[167,252,final]=(-0.181,-0.093,-0.091/-0.183,-0.100,-0.098) + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +frames_per_chunk=140,100,160 +dropout_schedule="0,0@0.2,0.3@0.5,0" +# decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 + + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1s #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +remove_egs=true + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 dropout-proportion=0.0 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 dropout-proportion=0.0 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 dropout-proportion=0.0 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs "$remove_egs" \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results very much (unlike + # regular decoding)... [it will affect them slightly due to differences in the + # iVector extraction; probably smaller will be worse as it sees less of the future, + # but in a real scenario, long chunks will introduce excessive latency]. + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if $test_online_decoding && [ $stage -le 22 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + data/lang_chain exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # note: we just give it "$dset" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --extra-left-context-initial $extra_left_context_initial \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset} ${dir}_online/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}_online/decode_${dset} ${dir}_online/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh new file mode 100644 index 00000000000..c286fcef353 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh @@ -0,0 +1,333 @@ +#!/bin/bash + +# 1t is as 1e, but increasing the TDNN dim and LSTM cell-dim into +# 1024, the recurrent and non-recurrent projection of the LSTM from +# 128 into 256. + +# ./local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1t_sp_bi +# System tdnn_lstm1e_again_sp_bi tdnn_lstm1t_again_sp_bi +# WER on dev(orig) 9.0 8.9 +# [looped:] 9.0 8.9 +# WER on dev(rescored) 8.4 8.2 +# [looped:] 8.4 8.3 +# WER on test(orig) 8.9 8.9 +# [looped:] 8.9 9.0 +# WER on test(rescored) 8.4 8.4 +# [looped:] 8.4 8.5 +# Final train prob -0.0712 -0.0459 +# Final valid prob -0.0892 -0.0867 +# Final train prob (xent) -0.8566 -0.6434 +# Final valid prob (xent) -0.9927 -0.8733 + +# exp/chain_cleaned/tdnn_lstm1e_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3626 combine=-0.082->-0.081 xent:train/valid[167,252,final]=(-0.961,-0.859,-0.857/-1.06,-0.999,-0.993) logprob:train/valid[167,252,final]=(-0.086,-0.072,-0.071/-0.098,-0.091,-0.089) +# exp/chain_cleaned/tdnn_lstm1t_sp_bi: num-iters=253 nj=2..12 num-params=37.1M dim=40+100->3626 combine=-0.055->-0.055 xent:train/valid[167,252,final]=(-0.774,-0.655,-0.643/-0.928,-0.883,-0.873) logprob:train/valid[167,252,final]=(-0.063,-0.048,-0.046/-0.087,-0.089,-0.087) + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +frames_per_chunk=140,100,160 +# decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 + + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1t #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +remove_egs=true + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 dim=1024 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=1024 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=1024 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=1024 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=1024 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 decay-time=20 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs "$remove_egs" \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results very much (unlike + # regular decoding)... [it will affect them slightly due to differences in the + # iVector extraction; probably smaller will be worse as it sees less of the future, + # but in a real scenario, long chunks will introduce excessive latency]. + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if $test_online_decoding && [ $stage -le 22 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + data/lang_chain exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # note: we just give it "$dset" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --extra-left-context-initial $extra_left_context_initial \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset} ${dir}_online/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}_online/decode_${dset} ${dir}_online/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh new file mode 100644 index 00000000000..9e50060f5d6 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh @@ -0,0 +1,327 @@ +#!/bin/bash + +# 1u is the same as 1t but adding per-frame dropout to LSTM +# in location4, see paper : http://www.danielpovey.com/files/2017_interspeech_dropout.pdf + +# ./local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_lstm1t_sp_bi exp/chain_cleaned/tdnn_lstm1u_sp_bi +# System tdnn_lstm1t_again_sp_bi tdnn_lstm1u_sp_bi +# WER on dev(orig) 8.9 8.6 +# WER on dev(rescored) 8.2 8.0 +# WER on test(orig) 8.9 8.3 +# WER on test(rescored) 8.4 7.9 +# Final train prob -0.0459 -0.0709 +# Final valid prob -0.0867 -0.0902 +# Final train prob (xent) -0.6434 -0.8112 +# Final valid prob (xent) -0.8733 -0.9384 + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +frames_per_chunk=140,100,160 +dropout_schedule="0,0@0.20,0.3@0.5,0" +# decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 + + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1u #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +remove_egs=true + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 dim=1024 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 dropout-proportion=0.0 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=1024 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=1024 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 dropout-proportion=0.0 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=1024 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=1024 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 dropout-proportion=0.0 decay-time=20 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.dropout-schedule="$dropout_schedule" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs "$remove_egs" \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results very much (unlike + # regular decoding)... [it will affect them slightly due to differences in the + # iVector extraction; probably smaller will be worse as it sees less of the future, + # but in a real scenario, long chunks will introduce excessive latency]. + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if $test_online_decoding && [ $stage -le 22 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + data/lang_chain exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # note: we just give it "$dset" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --extra-left-context-initial $extra_left_context_initial \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset} ${dir}_online/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}_online/decode_${dset} ${dir}_online/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0 diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py index 4ffebcd9436..c92afb1c2dc 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py @@ -716,9 +716,9 @@ def set_default_configs(self): 'decay-time': -1.0, 'zeroing-interval' : 20, 'zeroing-threshold' : 15.0, - 'dropout-proportion' : -1.0, # If -1.0, no dropout components will be added - 'dropout-per-frame' : False # If false, regular dropout, not per frame. - } + 'dropout-proportion' : -1.0, # If -1.0, no dropout will + # be used) + } def set_derived_configs(self): if self.config['cell-dim'] <= 0: @@ -751,7 +751,6 @@ def check_configs(self): raise RuntimeError("dropout-proportion has invalid value {0}.".format(self.config['dropout-proportion'])) - def auxiliary_outputs(self): return ['c_t'] @@ -818,7 +817,6 @@ def generate_lstm_config(self): lstm_str = self.config['lstm-nonlinearity-options'] dropout_proportion = self.config['dropout-proportion'] - dropout_per_frame = 'true' if self.config['dropout-per-frame'] else 'false' configs = [] @@ -833,14 +831,16 @@ def generate_lstm_config(self): configs.append("# The core LSTM nonlinearity, implemented as a single component.") configs.append("# Input = (i_part, f_part, c_part, o_part, c_{t-1}), output = (c_t, m_t)") configs.append("# See cu-math.h:ComputeLstmNonlinearity() for details.") - configs.append("component name={0}.lstm_nonlin type=LstmNonlinearityComponent cell-dim={1} {2}".format(name, cell_dim, lstm_str)) + configs.append("component name={0}.lstm_nonlin type=LstmNonlinearityComponent cell-dim={1} " + "use-dropout={2} {3}" + .format(name, cell_dim, "true" if dropout_proportion != -1.0 else "false", lstm_str)) configs.append("# Component for backprop truncation, to avoid gradient blowup in long training examples.") configs.append("component name={0}.cr_trunc type=BackpropTruncationComponent " "dim={1} {2}".format(name, cell_dim + rec_proj_dim, bptrunc_str)) if dropout_proportion != -1.0: - configs.append("component name={0}.cr_trunc.dropout type=DropoutComponent dim={1} " - "dropout-proportion={2} dropout-per-frame={3}" - .format(name, cell_dim + rec_proj_dim, dropout_proportion, dropout_per_frame)) + configs.append("component name={0}.dropout_mask type=DropoutMaskComponent output-dim=3 " + "dropout-proportion={1} " + .format(name, dropout_proportion)) configs.append("# Component specific to 'projected' LSTM (LSTMP), contains both recurrent"); configs.append("# and non-recurrent projections") configs.append("component name={0}.W_rp type=NaturalGradientAffineComponent input-dim={1} " @@ -849,8 +849,17 @@ def generate_lstm_config(self): configs.append("### Nodes for the components above.") configs.append("component-node name={0}.four_parts component={0}.W_all input=Append({1}, " "IfDefined(Offset({0}.r_trunc, {2})))".format(name, input_descriptor, delay)) - configs.append("component-node name={0}.lstm_nonlin component={0}.lstm_nonlin " - "input=Append({0}.four_parts, IfDefined(Offset({0}.c_trunc, {1})))".format(name, delay)) + if dropout_proportion != -1.0: + # note: the 'input' is a don't-care as the component never uses it; it's required + # in component-node lines. + configs.append("component-node name={0}.dropout_mask component={0}.dropout_mask " + "input={0}.dropout_mask".format(name)) + configs.append("component-node name={0}.lstm_nonlin component={0}.lstm_nonlin " + "input=Append({0}.four_parts, IfDefined(Offset({0}.c_trunc, {1})), {0}.dropout_mask)" + .format(name, delay)) + else: + configs.append("component-node name={0}.lstm_nonlin component={0}.lstm_nonlin " + "input=Append({0}.four_parts, IfDefined(Offset({0}.c_trunc, {1})))".format(name, delay)) configs.append("dim-range-node name={0}.c input-node={0}.lstm_nonlin " "dim-offset=0 dim={1}".format(name, cell_dim)) configs.append("dim-range-node name={0}.m input-node={0}.lstm_nonlin " @@ -864,17 +873,10 @@ def generate_lstm_config(self): configs.append("# makes the deriv truncation more accurate .") configs.append("component-node name={0}.cr_trunc component={0}.cr_trunc " "input=Append({0}.c, {0}.r)".format(name)) - if dropout_proportion != -1.0: - configs.append("component-node name={0}.cr_trunc.dropout component={0}.cr_trunc.dropout input={0}.cr_trunc".format(name)) - configs.append("dim-range-node name={0}.c_trunc input-node={0}.cr_trunc.dropout " - "dim-offset=0 dim={1}".format(name, cell_dim)) - configs.append("dim-range-node name={0}.r_trunc input-node={0}.cr_trunc.dropout " - "dim-offset={1} dim={2}".format(name, cell_dim, rec_proj_dim)) - else: - configs.append("dim-range-node name={0}.c_trunc input-node={0}.cr_trunc " - "dim-offset=0 dim={1}".format(name, cell_dim)) - configs.append("dim-range-node name={0}.r_trunc input-node={0}.cr_trunc " - "dim-offset={1} dim={2}".format(name, cell_dim, rec_proj_dim)) + configs.append("dim-range-node name={0}.c_trunc input-node={0}.cr_trunc " + "dim-offset=0 dim={1}".format(name, cell_dim)) + configs.append("dim-range-node name={0}.r_trunc input-node={0}.cr_trunc " + "dim-offset={1} dim={2}".format(name, cell_dim, rec_proj_dim)) configs.append("### End LSTM Layer '{0}'".format(name)) return configs diff --git a/src/cudamatrix/cu-kernels-ansi.h b/src/cudamatrix/cu-kernels-ansi.h index 444da38dd30..5b72a62e716 100644 --- a/src/cudamatrix/cu-kernels-ansi.h +++ b/src/cudamatrix/cu-kernels-ansi.h @@ -330,6 +330,7 @@ void cudaF_diff_log_softmax(dim3 Gr, dim3 Bl, const MatrixDim in_deriv_dim, const float* out_deriv, const int out_deriv_stride, float* in_deriv); void cudaD_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, const int num_rows, const double* input, const int in_stride, const double* params, const int params_stride, @@ -349,6 +350,7 @@ void cudaD_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, double* self_repair_sum_out, const int self_repair_sum_out_stride); void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, const int num_rows, const float* input, const int in_stride, const float* params, const int params_stride, @@ -455,12 +457,14 @@ void cudaF_log_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, void cudaD_lstm_nonlinearity(dim3 Gr, dim3 Bl, const double* in, const int in_stride, const double* params, const int params_stride, const int out_stride, - const int cell_dim, const int num_rows, + const int cell_dim, const int have_dropout_mask, + const int num_rows, double* out); void cudaF_lstm_nonlinearity(dim3 Gr, dim3 Bl, const float* in, const int in_stride, const float* params, const int params_stride, const int out_stride, - const int cell_dim, const int num_rows, + const int cell_dim, const int have_dropout_mask, + const int num_rows, float* out); void cudaD_matrix_add_elements(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, double alpha, MatrixElement* x, diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index 60800d9568d..6df0e5af9db 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -2846,6 +2846,9 @@ static void _diff_log_softmax(const MatrixDim in_deriv_dim, consecutive blocks, each of dimension cell_dim, which we name: (i_part, f_part, c_part, o_part, c_{t-1}). + If 'have_dropout_mask' is nonzero, each row of + 'in' will have 3 extra elements, interpreted + as dropout masks/scales for i_t, f_t and o_t. @param [in] params A matrix, of dimension 3 by cell_dim, with rows containing the 3 diagonal parameter matrices used in LSTMs, namely @@ -2870,7 +2873,8 @@ __global__ static void _lstm_nonlinearity(const Real* in, const int in_stride, const Real* params, const int params_stride, const int out_stride, const int cell_dim, - const int num_rows, Real* out) { + const int have_dropout_mask, const int num_rows, + Real* out) { const int tid = threadIdx.x; const int i = blockIdx.x; const Real* i_part = in + i * in_stride; @@ -2883,15 +2887,18 @@ static void _lstm_nonlinearity(const Real* in, const int in_stride, const Real* w_oc = params + params_stride * 2; Real* c_t = out + i * out_stride; Real* m_t = out + i * out_stride + cell_dim; + Real i_scale = (have_dropout_mask ? in[i * in_stride + cell_dim * 5] : 1), + f_scale = (have_dropout_mask ? in[i * in_stride + cell_dim * 5 + 1] : 1), + o_scale = (have_dropout_mask ? in[i * in_stride + cell_dim * 5 + 2] : 1); for (int j = tid; j < cell_dim; j += CU1DBLOCK) { Real c_tm1_j = c_tm1[j]; Real i_t_j = Real(1) / (Real(1) + exp(-i_part[j] - w_ic[j] * c_tm1_j)); Real f_t_j = Real(1) / (Real(1) + exp(-f_part[j] - w_fc[j] * c_tm1_j)); - Real c_t_j = f_t_j * c_tm1_j + i_t_j * tanh(c_part[j]); + Real c_t_j = f_t_j * f_scale * c_tm1_j + i_t_j * i_scale * tanh(c_part[j]); Real o_t_j = Real(1) / (Real(1) + exp(-o_part[j] - w_oc[j] * c_t_j)); c_t[j] = c_t_j; - m_t[j] = o_t_j * tanh(c_t_j); + m_t[j] = o_t_j * o_scale * tanh(c_t_j); } } @@ -2916,6 +2923,9 @@ static void _lstm_nonlinearity(const Real* in, const int in_stride, a multiple of 5). The column-space is interpreted as 5 consecutive blocks, each of dimension C, which we name: (i_part, f_part, c_part, o_part, c_{t-1}). + If 'have_dropout_mask' is nonzero, each row of + 'in' will have 3 extra elements, interpreted + as dropout masks/scales for i_t, f_t and o_t. @param [in] params The same as in ComputeLstmNonlinearity(). A matrix, of dimension 3 by C, with rows containing the three diagonal parameter matrices used in LSTMs, namely @@ -2988,7 +2998,8 @@ static void _lstm_nonlinearity(const Real* in, const int in_stride, */ template __global__ -static void _diff_lstm_nonlinearity(const int cell_dim, const int num_rows, +static void _diff_lstm_nonlinearity(const int cell_dim, const int have_dropout_mask, + const int num_rows, const Real* input, const int input_stride, const Real* params, const int params_stride, const Real* output_deriv, @@ -3042,6 +3053,7 @@ static void _diff_lstm_nonlinearity(const int cell_dim, const int num_rows, const Real o_t_self_repair = (update_sr[3] ? sr_config[8] : 0); const Real c_t_self_repair = (update_sr[4] ? sr_config[9] : 0); + for (int i = i0; i < num_rows; i += grid_stride) { const Real i_part = input[i * input_stride + j]; const Real f_part = input[i * input_stride + j + cell_dim]; @@ -3049,10 +3061,19 @@ static void _diff_lstm_nonlinearity(const int cell_dim, const int num_rows, const Real o_part = input[i * input_stride + j + 3 * cell_dim]; const Real c_prev = input[i * input_stride + j + 4 * cell_dim]; - const Real i_t = 1 / (1 + exp(-i_part - w_ic * c_prev)); - const Real f_t = 1 / (1 + exp(-f_part - w_fc * c_prev)); + + const Real i_scale = (have_dropout_mask ? + input[i * input_stride + cell_dim * 5] : 1), + f_scale = (have_dropout_mask ? + input[i * input_stride + cell_dim * 5 + 1] :1), + o_scale = (have_dropout_mask ? + input[i * input_stride + cell_dim * 5 + 2] :1); + + + const Real i_t = Real(1) / (1 + exp(-i_part - w_ic * c_prev)); + const Real f_t = Real(1) / (1 + exp(-f_part - w_fc * c_prev)); const Real tanh_c_part = tanh(c_part); - const Real c_t = f_t * c_prev + i_t * tanh_c_part; + const Real c_t = f_t * f_scale * c_prev + i_t * i_scale * tanh_c_part; const Real o_t = 1 / (1 + exp(-o_part - w_oc * c_t)); const Real tanh_c_t = tanh(c_t); @@ -3079,20 +3100,20 @@ static void _diff_lstm_nonlinearity(const int cell_dim, const int num_rows, const Real dc_t_out = output_deriv[i * output_deriv_stride + j]; const Real dm_t = output_deriv[i * output_deriv_stride + j + cell_dim]; - const Real dtanh_c_t = o_t * dm_t; - const Real do_t = tanh_c_t * dm_t; + const Real dtanh_c_t = o_t * o_scale * dm_t; + const Real do_t = o_scale * tanh_c_t * dm_t; const Real do_t_input = (o_t_deriv * do_t - (2 * o_t - 1) * o_t_self_repair); const Real dc_t = (c_t_deriv * dtanh_c_t + dc_t_out + do_t_input * w_oc) - tanh_c_t * c_t_self_repair; - const Real dtanh_c_part = i_t * dc_t; - const Real df_t = dc_t * c_prev; + const Real dtanh_c_part = i_t * i_scale * dc_t; + const Real df_t = dc_t * f_scale * c_prev; const Real df_t_input = (df_t * f_t_deriv - - (2 * f_t - 1) * f_t_self_repair); - const Real di_t = dc_t * tanh_c_part; + - (2 * f_t - 1) * f_t_self_repair); + const Real di_t = dc_t * i_scale * tanh_c_part; const Real di_t_input = (di_t * i_t_deriv - - (2 * i_t - 1) * i_t_self_repair); + - (2 * i_t - 1) * i_t_self_repair); if (params_deriv) { w_ic_deriv_sum += c_prev * di_t_input; @@ -3100,7 +3121,7 @@ static void _diff_lstm_nonlinearity(const int cell_dim, const int num_rows, w_oc_deriv_sum += c_t * do_t_input; } - const Real dc_prev = w_ic * di_t_input + w_fc * df_t_input + f_t * dc_t; + const Real dc_prev = w_ic * di_t_input + w_fc * df_t_input + f_t * f_scale * dc_t; const Real do_part = do_t_input; const Real dc_part = (c_part_deriv * dtanh_c_part - tanh_c_part * c_part_self_repair); @@ -4737,20 +4758,23 @@ void cudaD_trace_mat_smat_trans(dim3 Gr, dim3 Bl, const double* mat_in, void cudaD_lstm_nonlinearity(dim3 Gr, dim3 Bl, const double* in, const int in_stride, const double* params, const int params_stride, const int out_stride, - const int cell_dim, const int num_rows, - double* out) { - _lstm_nonlinearity<<>>(in, in_stride, params, params_stride, - out_stride, cell_dim, num_rows, out); + const int cell_dim, const int have_dropout_mask, + const int num_rows, double* out) { + _lstm_nonlinearity<<>>( + in, in_stride, params, params_stride, + out_stride, cell_dim, have_dropout_mask, num_rows, out); } void cudaF_lstm_nonlinearity(dim3 Gr, dim3 Bl, const float* in, const int in_stride, const float* params, const int params_stride, const int out_stride, - const int cell_dim, const int num_rows, - float* out) { - _lstm_nonlinearity<<>>(in, in_stride, params, params_stride, - out_stride, cell_dim, num_rows, out); + const int cell_dim, const int have_dropout_mask, + const int num_rows, float* out) { + _lstm_nonlinearity<<>>( + in, in_stride, params, params_stride, + out_stride, cell_dim, have_dropout_mask, num_rows, out); } void cudaD_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, const int num_rows, const double* input, const int input_stride, const double* params, const int params_stride, @@ -4769,7 +4793,8 @@ void cudaD_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, const int deriv_sum_out_stride, double* self_repair_sum_out, const int self_repair_sum_out_stride) { - _diff_lstm_nonlinearity<<>>(cell_dim, num_rows, input, + _diff_lstm_nonlinearity<<>>( + cell_dim, have_dropout_mask, num_rows, input, input_stride, params, params_stride, output_deriv, output_deriv_stride, deriv_sum_in, deriv_sum_in_stride, self_repair_config, count, input_deriv, input_deriv_stride, params_deriv, params_deriv_stride, value_sum_out, @@ -4777,6 +4802,7 @@ void cudaD_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, self_repair_sum_out, self_repair_sum_out_stride); } void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, const int num_rows, const float* input, const int input_stride, const float* params, const int params_stride, @@ -4795,7 +4821,8 @@ void cudaF_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, const int deriv_sum_out_stride, float* self_repair_sum_out, const int self_repair_sum_out_stride) { - _diff_lstm_nonlinearity<<>>(cell_dim, num_rows, input, + _diff_lstm_nonlinearity<<>>( + cell_dim, have_dropout_mask, num_rows, input, input_stride, params, params_stride, output_deriv, output_deriv_stride, deriv_sum_in, deriv_sum_in_stride, self_repair_config, count, input_deriv, input_deriv_stride, params_deriv, params_deriv_stride, value_sum_out, diff --git a/src/cudamatrix/cu-kernels.h b/src/cudamatrix/cu-kernels.h index 77352b5925f..d2a79f471c8 100644 --- a/src/cudamatrix/cu-kernels.h +++ b/src/cudamatrix/cu-kernels.h @@ -626,6 +626,7 @@ inline void cuda_diff_log_softmax(dim3 Gr, dim3 Bl, out_deriv, out_deriv_stride, in_deriv); } inline void cuda_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, const int num_rows, const double* input, const int input_stride, const double* params, @@ -645,7 +646,8 @@ inline void cuda_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, const int deriv_sum_out_stride, double* self_repair_sum_out, const int self_repair_sum_out_stride) { - cudaD_diff_lstm_nonlinearity(Gr, Bl, cell_dim, num_rows, input, input_stride, + cudaD_diff_lstm_nonlinearity(Gr, Bl, cell_dim, have_dropout_mask, num_rows, + input, input_stride, params, params_stride, output_deriv, output_deriv_stride, deriv_sum_in, deriv_sum_in_stride, self_repair_config, count, @@ -656,6 +658,7 @@ inline void cuda_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, self_repair_sum_out_stride); } inline void cuda_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, + const int have_dropout_mask, const int num_rows, const float* input, const int input_stride, const float* params, @@ -675,7 +678,8 @@ inline void cuda_diff_lstm_nonlinearity(dim3 Gr, dim3 Bl, const int cell_dim, const int deriv_sum_out_stride, float* self_repair_sum_out, const int self_repair_sum_out_stride) { - cudaF_diff_lstm_nonlinearity(Gr, Bl, cell_dim, num_rows, input, input_stride, + cudaF_diff_lstm_nonlinearity(Gr, Bl, cell_dim, have_dropout_mask, + num_rows, input, input_stride, params, params_stride, output_deriv, output_deriv_stride, deriv_sum_in, deriv_sum_in_stride, self_repair_config, count, @@ -849,17 +853,21 @@ inline void cuda_lstm_nonlinearity(dim3 Gr, dim3 Bl, const double* in, const int in_stride, const double* params, const int params_stride, const int out_stride, const int cell_dim, + const int have_dropout_mask, const int num_rows, double* out) { cudaD_lstm_nonlinearity(Gr, Bl, in, in_stride, params, params_stride, - out_stride, cell_dim, num_rows, out); + out_stride, cell_dim, have_dropout_mask, + num_rows, out); } inline void cuda_lstm_nonlinearity(dim3 Gr, dim3 Bl, const float* in, const int in_stride, const float* params, const int params_stride, const int out_stride, const int cell_dim, + const int have_dropout_mask, const int num_rows, float* out) { cudaF_lstm_nonlinearity(Gr, Bl, in, in_stride, params, params_stride, - out_stride, cell_dim, num_rows, out); + out_stride, cell_dim, have_dropout_mask, + num_rows, out); } inline void cuda_matrix_add_elements(dim3 Gr, dim3 Bl, double *data, MatrixDim dim, double alpha, diff --git a/src/cudamatrix/cu-math-test.cc b/src/cudamatrix/cu-math-test.cc index 0febd5c0853..daf5c708465 100644 --- a/src/cudamatrix/cu-math-test.cc +++ b/src/cudamatrix/cu-math-test.cc @@ -144,7 +144,8 @@ static void UnitTestCuMathComputeLstmNonlinearity() { for (int i = 0; i < 3; i++) { int32 num_rows = 1 + Rand() % 100; int32 cell_dim = 1 + Rand() % 2000; - Matrix Hinput(num_rows, 5 * cell_dim); + int32 dropout_dim = (RandInt(0, 1) == 0 ? 0 : 3); + Matrix Hinput(num_rows, 5 * cell_dim + dropout_dim); Matrix Hparams(3, cell_dim); Matrix Houtput(num_rows, 2 * cell_dim); Hinput.SetRandn(); @@ -165,7 +166,8 @@ static void UnitTestCuMathComputeLstmNonlinearity() { BaseFloat time_in_secs = 0.025; int32 num_rows = i; int32 cell_dim = i; - CuMatrix input(num_rows, 5 * cell_dim); + int32 dropout_dim = (RandInt(0, 1) == 0 ? 0 : 3); + CuMatrix input(num_rows, 5 * cell_dim + dropout_dim); CuMatrix params(3, cell_dim); CuMatrix output(num_rows, 2 * cell_dim); input.SetRandn(); @@ -190,7 +192,8 @@ void UnitTestLstmNonlinearity() { // problem dimensions. int32 num_rows = RandInt(5, 20), - cell_dim = RandInt(2, 200); + cell_dim = RandInt(2, 200), + dropout_dim = (RandInt(0, 1) == 0 ? 0 : 3); // Pick the (input or params block), and output block, for which we'll // spot-check the derivative values. This will give us test failures @@ -207,7 +210,7 @@ void UnitTestLstmNonlinearity() { test_params = -1; - CuMatrix input(num_rows, cell_dim * 5), + CuMatrix input(num_rows, cell_dim * 5 + dropout_dim), params(3, cell_dim), output_deriv(num_rows, cell_dim * 2); input.SetRandn(); @@ -230,7 +233,7 @@ void UnitTestLstmNonlinearity() { CuVector self_repair_config(10.0); // leave at zero... we don't really test this here. CuMatrix self_repair_sum(5, cell_dim), - input_deriv(num_rows, 5 * cell_dim), + input_deriv(num_rows, 5 * cell_dim + dropout_dim), params_deriv(3, cell_dim); double count_in = 0.0; @@ -249,7 +252,7 @@ void UnitTestLstmNonlinearity() { measured_objf_change(test_dim); for (int32 i = 0; i < test_dim; i++) { - CuMatrix delta_input(num_rows, 5 * cell_dim), + CuMatrix delta_input(num_rows, 5 * cell_dim + dropout_dim), delta_params(3, cell_dim); if (test_input >= 0) { delta_input.ColRange(test_input * cell_dim, cell_dim).SetRandn(); @@ -260,12 +263,9 @@ void UnitTestLstmNonlinearity() { delta_params.Scale(delta); } - - predicted_objf_change(i) = TraceMatMat(delta_input, input_deriv, kTrans) + TraceMatMat(delta_params, params_deriv, kTrans); - CuMatrix perturbed_input(input); perturbed_input.AddMat(1.0, delta_input); @@ -280,7 +280,9 @@ void UnitTestLstmNonlinearity() { measured_objf_change(i) = objf_change; } KALDI_LOG << "LSTM nonlinearity test: num_rows=" << num_rows - << ", cell_dim=" << cell_dim << ", test_input=" << test_input + << ", cell_dim=" << cell_dim + << ", dropout_dim=" << dropout_dim + << ", test_input=" << test_input << ", test_params=" << test_params << ", test_output=" << test_output << ", predicted_objf_change=" << predicted_objf_change @@ -296,16 +298,17 @@ template static void UnitTestBackpropLstmNonlinearity() { for (int i = 0; i < 3; i++) { int32 num_rows = 1 + Rand() % 200; - int32 cell_dim = 1 + Rand() % 2000; + int32 cell_dim = 1 + Rand() % 2000, + dropout_dim = (RandInt(0, 1) == 0 ? 0 : 3); // KALDI_LOG << num_rows << ", " << cell_dim; - Matrix hinput(num_rows, 5 * cell_dim); + Matrix hinput(num_rows, 5 * cell_dim + dropout_dim); Matrix hparams(3, cell_dim); Matrix houtput_deriv(num_rows, 2 * cell_dim); Matrix hderiv_sum_in(5, cell_dim); Vector hself_repair_config(10); double count_in; - Matrix hinput_deriv(num_rows, 5 * cell_dim); + Matrix hinput_deriv(num_rows, 5 * cell_dim + dropout_dim); Matrix hparams_deriv(3, cell_dim); Matrix hvalue_sum_out(5, cell_dim); Matrix hderiv_sum_out(5, cell_dim); @@ -409,15 +412,16 @@ static void UnitTestBackpropLstmNonlinearity() { BaseFloat time_in_secs = 0.025; int32 num_rows = i; int32 cell_dim = i; + int32 dropout_dim = (RandInt(0, 1) == 0 ? 0 : 3); - CuMatrix input(num_rows, 5 * cell_dim); + CuMatrix input(num_rows, 5 * cell_dim + dropout_dim); CuMatrix params(3, cell_dim); CuMatrix output_deriv(num_rows, 2 * cell_dim); CuMatrix deriv_sum_in(5, cell_dim); CuVector self_repair_config(10); double count_in; - CuMatrix input_deriv(num_rows, 5 * cell_dim); + CuMatrix input_deriv(num_rows, 5 * cell_dim + dropout_dim); CuMatrix params_deriv(3, cell_dim); CuMatrix value_sum_out(5, cell_dim); CuMatrix deriv_sum_out(5, cell_dim); diff --git a/src/cudamatrix/cu-math.cc b/src/cudamatrix/cu-math.cc index 2bd184bf116..a9cd9efcfce 100644 --- a/src/cudamatrix/cu-math.cc +++ b/src/cudamatrix/cu-math.cc @@ -411,10 +411,11 @@ template void CpuComputeLstmNonlinearity(const MatrixBase &input_mat, const MatrixBase ¶ms_mat, MatrixBase *output) { - int32 num_rows = input_mat.NumRows(); - int32 cell_dim = input_mat.NumCols() / 5; + int32 num_rows = input_mat.NumRows(), + input_cols = input_mat.NumCols(), + cell_dim = input_cols / 5; + KALDI_ASSERT(input_cols == (cell_dim * 5) || input_cols == (cell_dim * 5) + 3); KALDI_ASSERT(output->NumRows() == num_rows); - KALDI_ASSERT(input_mat.NumCols() % 5 == 0); KALDI_ASSERT(params_mat.NumRows() == 3); KALDI_ASSERT(params_mat.NumCols() == cell_dim); KALDI_ASSERT(output->NumCols() == 2 * cell_dim); @@ -424,6 +425,11 @@ void CpuComputeLstmNonlinearity(const MatrixBase &input_mat, int32 params_stride = params_mat.Stride(); for (int32 r = 0; r < num_rows; r++) { const Real *input_row = input_mat.RowData(r); + // i_scale and f_scale relate to dropout, they will normally be 1.0. + Real i_scale = (input_cols == cell_dim*5 ? 1.0:input_row[cell_dim*5]), + f_scale = (input_cols == cell_dim*5 ? 1.0:input_row[cell_dim*5 + 1]), + o_scale = (input_cols == cell_dim*5 ? 1.0:input_row[cell_dim*5 + 2]); + Real *output_row = output_mat.RowData(r); for (int32 c = 0; c < cell_dim; c++) { Real i_part = input_row[c]; @@ -436,9 +442,9 @@ void CpuComputeLstmNonlinearity(const MatrixBase &input_mat, Real w_oc = params_data[c + params_stride * 2]; Real i_t = ScalarSigmoid(i_part + w_ic * c_prev); Real f_t = ScalarSigmoid(f_part + w_fc * c_prev); - Real c_t = f_t * c_prev + i_t * ScalarTanh(c_part); + Real c_t = f_t * f_scale * c_prev + i_t * i_scale * ScalarTanh(c_part); Real o_t = ScalarSigmoid(o_part + w_oc * c_t); - Real m_t = o_t * ScalarTanh(c_t); + Real m_t = o_t * o_scale * ScalarTanh(c_t); output_row[c] = c_t; output_row[c + cell_dim] = m_t; } @@ -449,10 +455,11 @@ template void ComputeLstmNonlinearity(const CuMatrixBase &input, const CuMatrixBase ¶ms, CuMatrixBase *output) { - int32 num_rows = input.NumRows(); - int32 cell_dim = input.NumCols() / 5; + int32 num_rows = input.NumRows(), + input_cols = input.NumCols(), + cell_dim = input_cols / 5; + KALDI_ASSERT(input_cols == (cell_dim * 5) || input_cols == (cell_dim * 5) + 3); KALDI_ASSERT(output->NumRows() == num_rows); - KALDI_ASSERT(input.NumCols() % 5 == 0); KALDI_ASSERT(params.NumRows() == 3); KALDI_ASSERT(params.NumCols() == cell_dim); KALDI_ASSERT(output->NumCols() == 2 * cell_dim); @@ -461,6 +468,8 @@ void ComputeLstmNonlinearity(const CuMatrixBase &input, if (CuDevice::Instantiate().Enabled()) { Timer tim; + int have_dropout_mask = (input_cols == (cell_dim * 5) + 3); + // Each thread block is working on 1 row of the data. // It's best that cell dim is a multiple fo CU1DBLOCK dim3 dimBlock(CU1DBLOCK); @@ -468,7 +477,7 @@ void ComputeLstmNonlinearity(const CuMatrixBase &input, cuda_lstm_nonlinearity(dimGrid, dimBlock, input.Data(), input.Stride(), params.Data(), params.Stride(), output->Stride(), - cell_dim, num_rows, output->Data()); + cell_dim, have_dropout_mask, num_rows, output->Data()); CU_SAFE_CALL(cudaGetLastError()); CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed()); @@ -508,10 +517,12 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, MatrixBase *value_sum_out, MatrixBase *deriv_sum_out, MatrixBase *self_repair_sum_out) { - int32 num_rows = input.NumRows(); - int32 cell_dim = input.NumCols() / 5; + int32 num_rows = input.NumRows(), + input_cols = input + .NumCols(), + cell_dim = input.NumCols() / 5; // Check dimensions. - KALDI_ASSERT(input.NumCols() % 5 == 0); + KALDI_ASSERT(input_cols == (cell_dim * 5) || input_cols == (cell_dim * 5) + 3); KALDI_ASSERT(params.NumRows() == 3); KALDI_ASSERT(params.NumCols() == cell_dim); KALDI_ASSERT(output_deriv.NumRows() == num_rows); @@ -606,6 +617,14 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, c_part = input_mat(r, c + 2 * cell_dim), o_part = input_mat(r, c + 3 * cell_dim), c_prev = input_mat(r, c + 4 * cell_dim); + + Real i_scale = (input_cols == cell_dim * 5 ? 1.0 : + input_mat(r, cell_dim * 5)), + f_scale = (input_cols == cell_dim * 5 ? 1.0 : + input_mat(r, cell_dim * 5 + 1)), + o_scale = (input_cols == cell_dim * 5 ? 1.0 : + input_mat(r, cell_dim * 5 + 2)); + // For greater clarity, we give some of the quantities in the // forward equations their own names. Real i_t_input = i_part + w_ic * c_prev, @@ -613,7 +632,7 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, f_t_input = f_part + w_fc * c_prev, f_t = ScalarSigmoid(f_t_input), tanh_c_part = ScalarTanh(c_part), - c_t = f_t * c_prev + i_t * tanh_c_part, + c_t = f_t * f_scale * c_prev + i_t * i_scale * tanh_c_part, o_t_input = o_part + w_oc * c_t, o_t = ScalarSigmoid(o_t_input), tanh_c_t = ScalarTanh(c_t); @@ -645,25 +664,25 @@ void CpuBackpropLstmNonlinearity(const MatrixBase &input, // comes directly from the output of this function. Real dc_t_out = output_deriv_mat(r, c); Real dm_t = output_deriv_mat(r, c + cell_dim); - Real dtanh_c_t = o_t * dm_t; - Real do_t = tanh_c_t * dm_t; + Real dtanh_c_t = o_t * o_scale * dm_t; + Real do_t = o_scale * tanh_c_t * dm_t; Real do_t_input = (o_t * (1.0F - o_t) * do_t - (2.0F * o_t - 1.0F) * o_t_self_repair); Real dc_t = ((1.0F - tanh_c_t * tanh_c_t) * dtanh_c_t + dc_t_out + do_t_input * w_oc) - tanh_c_t * c_t_self_repair; - Real dtanh_c_part = i_t * dc_t; - Real df_t = dc_t * c_prev; - Real df_t_input = (df_t * f_t * (1.0F - f_t) - - (2.0F * f_t - 1.0F) * f_t_self_repair); - Real di_t = dc_t * tanh_c_part; - Real di_t_input = (di_t * i_t * (1.0F - i_t) - - (2.0F * i_t - 1.0F) * i_t_self_repair); + Real dtanh_c_part = i_t * i_scale * dc_t; + Real df_t = dc_t * f_scale * c_prev; + Real df_t_input = ((df_t * f_t * (1.0F - f_t) + - (2.0F * f_t - 1.0F) * f_t_self_repair)); + Real di_t = dc_t * i_scale * tanh_c_part; + Real di_t_input = ((di_t * i_t * (1.0F - i_t) + - (2.0F * i_t - 1.0F) * i_t_self_repair)); w_ic_deriv_sum += c_prev * di_t_input; w_fc_deriv_sum += c_prev * df_t_input; w_oc_deriv_sum += c_t * do_t_input; - Real dc_prev = w_ic * di_t_input + w_fc * df_t_input + f_t * dc_t; + Real dc_prev = w_ic * di_t_input + w_fc * df_t_input + f_t * f_scale * dc_t; Real do_part = do_t_input; Real dc_part = ((1.0F - tanh_c_part * tanh_c_part) * dtanh_c_part - tanh_c_part * c_part_self_repair); @@ -724,10 +743,11 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, CuMatrixBase *value_sum_out, CuMatrixBase *deriv_sum_out, CuMatrixBase *self_repair_sum_out) { - int32 num_rows = input.NumRows(); - int32 cell_dim = input.NumCols() / 5; + int32 num_rows = input.NumRows(), + cell_dim = input.NumCols() / 5, + input_cols = input.NumCols(); // Check dimensions. - KALDI_ASSERT(input.NumCols() % 5 == 0); + KALDI_ASSERT(input_cols == (cell_dim * 5) || input_cols == (cell_dim*5) + 3); KALDI_ASSERT(params.NumRows() == 3); KALDI_ASSERT(params.NumCols() == cell_dim); KALDI_ASSERT(output_deriv.NumRows() == num_rows); @@ -762,6 +782,7 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, // Each thread block is working on 1 row of the data. // It's best that cell dim is a multiple fo CU1DBLOCK + int have_dropout_mask = (input_cols == (cell_dim * 5) + 3); // Use 2D block (8x32 threads) as we need to compute column sum. // Use 1D grid to cover the data matrix width `cell_dim`. @@ -775,7 +796,8 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, dim3 dimGrid(n_blocks(cell_dim, dimBlock.x)); if (input_deriv == NULL) { if (params_deriv == NULL) { - cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, num_rows, + cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, + have_dropout_mask, num_rows, input.Data(), input.Stride(), params.Data(), params.Stride(), output_deriv.Data(), output_deriv.Stride(), deriv_sum_in.Data(), @@ -793,7 +815,8 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, 0); } else { - cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, num_rows, + cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, + have_dropout_mask, num_rows, input.Data(), input.Stride(), params.Data(), params.Stride(), output_deriv.Data(), output_deriv.Stride(), deriv_sum_in.Data(), @@ -811,7 +834,8 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, } } else { if (params_deriv == NULL) { - cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, num_rows, + cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, + have_dropout_mask, num_rows, input.Data(), input.Stride(), params.Data(), params.Stride(), output_deriv.Data(), output_deriv.Stride(), deriv_sum_in.Data(), @@ -821,7 +845,8 @@ void BackpropLstmNonlinearity(const CuMatrixBase &input, NULL, 0, NULL, 0, NULL, 0, NULL, 0); } else { - cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, num_rows, + cuda_diff_lstm_nonlinearity(dimGrid, dimBlock, cell_dim, + have_dropout_mask, num_rows, input.Data(), input.Stride(), params.Data(), params.Stride(), output_deriv.Data(), output_deriv.Stride(), deriv_sum_in.Data(), diff --git a/src/cudamatrix/cu-math.h b/src/cudamatrix/cu-math.h index b0e0c2a1ff2..af3da0b47e2 100644 --- a/src/cudamatrix/cu-math.h +++ b/src/cudamatrix/cu-math.h @@ -88,6 +88,9 @@ void Group2norm(const CuMatrixBase &src, a multiple of 5). The column-space is interpreted as 5 consecutive blocks, each of dimension C, which we name: (i_part, f_part, c_part, o_part, c_{t-1}). + This function will also accept input of dimension N by 5C + 3, + and the three final elements will be used as scaling factors + on i_t, f_t and o_t (useful as per-frame dropout masks). @param [in] params A matrix, of dimension 3 by C, with rows containing the three diagonal parameter matrices used in LSTMs, namely w_{ic}, w_{fc} and w_{oc}. @@ -101,7 +104,6 @@ void Group2norm(const CuMatrixBase &src, o_t = Sigmoid(o_part + w_{oc}*c_t) m_t = o_t * Tanh(c_t) - */ template void ComputeLstmNonlinearity(const CuMatrixBase &input, @@ -134,6 +136,9 @@ void CpuComputeLstmNonlinearity(const MatrixBase &input, a multiple of 5). The column-space is interpreted as 5 consecutive blocks, each of dimension C, which we name: (i_part, f_part, c_part, o_part, c_{t-1}). + This function will also accept input of dimension N by 5C + 3, + and the three final elements will be interpreted as scaling factors + on i_t, f_t and o_t (useful as per-frame dropout masks). @param [in] params The same as in ComputeLstmNonlinearity(). A matrix, of dimension 3 by C, with rows containing the three diagonal parameter matrices used in LSTMs, namely @@ -165,9 +170,13 @@ void CpuComputeLstmNonlinearity(const MatrixBase &input, May be NULL; if not, this function writes, to this location, the backpropagated derivative of the objective function w.r.t. the 'input' matrix. This matrix should - have the same dimension as 'input' i.e. N by 5C. In - addition to the regular backpropagated derivative, the - output will include small values relating to 'self-repair'. + have the same dimension as 'input'. In addition to the + regular backpropagated derivative, the output will include + small values relating to 'self-repair'. If the input + is of column-dimension 5C + 3 (i.e. we are using dropout + masks), the derivatives w.r.t. the dropout masks will not + be set; they will retain their value prior to this + function call. @param [out] params_deriv May be NULL; if not, this is where this function *writes* [not adds] the backpropagated derivative of the objective @@ -196,6 +205,7 @@ void CpuComputeLstmNonlinearity(const MatrixBase &input, processed outside this function into self-repair stats for diagnostics. */ + template void BackpropLstmNonlinearity(const CuMatrixBase &input, const CuMatrixBase ¶ms, diff --git a/src/nnet3/nnet-component-itf.cc b/src/nnet3/nnet-component-itf.cc index 23a8662a0d5..4a2a8d1c09a 100644 --- a/src/nnet3/nnet-component-itf.cc +++ b/src/nnet3/nnet-component-itf.cc @@ -147,6 +147,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) { ans = new ConstantComponent(); } else if (component_type == "DropoutComponent") { ans = new DropoutComponent(); + } else if (component_type == "DropoutMaskComponent") { + ans = new DropoutMaskComponent(); } else if (component_type == "BackpropTruncationComponent") { ans = new BackpropTruncationComponent(); } else if (component_type == "LstmNonlinearityComponent") { diff --git a/src/nnet3/nnet-component-itf.h b/src/nnet3/nnet-component-itf.h index c1732fc9b25..7cf438a025e 100644 --- a/src/nnet3/nnet-component-itf.h +++ b/src/nnet3/nnet-component-itf.h @@ -82,8 +82,11 @@ enum ComponentProperties { // Tanh, Sigmoid, ReLU and Softmax). kInputContiguous = 0x1000, // true if the component requires its input data (and // input derivatives) to have Stride()== NumCols(). - kOutputContiguous = 0x2000 // true if the component requires its input data (and + kOutputContiguous = 0x2000, // true if the component requires its input data (and // output derivatives) to have Stride()== NumCols(). + kRandomComponent = 0x4000 // true if the component has some kind of + // randomness, like DropoutComponent (these should + // inherit from class RandomComponent. }; diff --git a/src/nnet3/nnet-general-component.cc b/src/nnet3/nnet-general-component.cc index 4aa65ce70ed..85743490518 100644 --- a/src/nnet3/nnet-general-component.cc +++ b/src/nnet3/nnet-general-component.cc @@ -1376,5 +1376,88 @@ void ConstantComponent::UnVectorize(const VectorBase ¶ms) { +std::string DropoutMaskComponent::Info() const { + std::ostringstream stream; + stream << Type() + << ", output-dim=" << output_dim_ + << ", dropout-proportion=" << dropout_proportion_; + return stream.str(); +} + +DropoutMaskComponent::DropoutMaskComponent(): + output_dim_(-1), dropout_proportion_(0.5) { } + +DropoutMaskComponent::DropoutMaskComponent( + const DropoutMaskComponent &other): + output_dim_(other.output_dim_), + dropout_proportion_(other.dropout_proportion_) { } + +void DropoutMaskComponent::Propagate( + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in, + CuMatrixBase *out) const { + KALDI_ASSERT(in.NumRows() == 0 && out->NumCols() == output_dim_); + BaseFloat dropout_proportion = dropout_proportion_; + KALDI_ASSERT(dropout_proportion >= 0.0 && dropout_proportion <= 1.0); + + if (dropout_proportion_ == 0) { + out->Set(1.0); + return; + } + const_cast&>(random_generator_).RandUniform(out); + out->Add(-dropout_proportion); + out->ApplyHeaviside(); + // To generate data where it's never the case that both of the dimensions + // for a row are zero, we generate uniformly distributed data (call this u_i), + // and for row i, set (*out)(i, 0) = (0 if u_i < dropout_proportion else 1) + // and (*out)(i, 1) = (0 if u_i > 1-dropout_proportion else 1) + int32 num_rows = out->NumRows(); + // later we may make this a bit more efficient. + CuVector temp(num_rows, kUndefined); + const_cast&>(random_generator_).RandUniform(&temp); + temp.Add(-dropout_proportion); + out->CopyColFromVec(temp, 0); + temp.Add(-1.0 + (2.0 * dropout_proportion)); + // Now, 'temp' contains the original uniformly-distributed data plus + // -(1 - dropout_proportion). + temp.Scale(-1.0); + out->CopyColFromVec(temp, 1); + out->ApplyHeaviside(); + } + + +void DropoutMaskComponent::Read(std::istream &is, bool binary) { + ExpectOneOrTwoTokens(is, binary, "", ""); + ReadBasicType(is, binary, &output_dim_); + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &dropout_proportion_); + ExpectToken(is, binary, ""); +} + + +void DropoutMaskComponent::Write(std::ostream &os, bool binary) const { + WriteToken(os, binary, ""); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, output_dim_); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, dropout_proportion_); + WriteToken(os, binary, ""); +} + +Component* DropoutMaskComponent::Copy() const { + return new DropoutMaskComponent(*this); +} + +void DropoutMaskComponent::InitFromConfig(ConfigLine *cfl) { + output_dim_ = 0; + bool ok = cfl->GetValue("output-dim", &output_dim_); + KALDI_ASSERT(ok && output_dim_ > 0); + dropout_proportion_ = 0.5; + cfl->GetValue("dropout-proportion", &dropout_proportion_); +} + + + + } // namespace nnet3 } // namespace kaldi diff --git a/src/nnet3/nnet-general-component.h b/src/nnet3/nnet-general-component.h index b945edf4475..d5d7a140177 100644 --- a/src/nnet3/nnet-general-component.h +++ b/src/nnet3/nnet-general-component.h @@ -669,6 +669,88 @@ class ConstantComponent: public UpdatableComponent { +// DropoutMaskComponent outputs a random zero-or-one value for all dimensions of +// all requested indexes, and it has no dependencies on any input. It's like a +// ConstantComponent, but with random output that has value zero +// a proportion (dropout_proportion) of the time, and otherwise one. +// This is not the normal way to implement dropout; you'd normally use a +// DropoutComponent (see nnet-simple-component.h). This component is used while +// implementing per-frame dropout with the LstmNonlinearityComponent; we +// generate a two-dimensional output representing dropout +// +class DropoutMaskComponent: public RandomComponent { + public: + // actually this component requires no inputs; this value + // is really a don't-care. + virtual int32 InputDim() const { return output_dim_; } + + virtual int32 OutputDim() const { return output_dim_; } + + virtual std::string Info() const; + + // possible parameter values with their defaults: + // dropout-proportion=0.5 output-dim=-1 + virtual void InitFromConfig(ConfigLine *cfl); + + DropoutMaskComponent(); + + DropoutMaskComponent(const DropoutMaskComponent &other); + + virtual std::string Type() const { return "DropoutMaskComponent"; } + virtual int32 Properties() const { return kRandomComponent; } + // note: the matrix 'in' will be empty. + virtual void Propagate(const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &in, + CuMatrixBase *out) const; + // backprop does nothing, there is nothing to backprop to and nothing + // to update. + virtual void Backprop(const std::string &debug_info, + const ComponentPrecomputedIndexes *indexes, + const CuMatrixBase &, // in_value + const CuMatrixBase &, // out_value + const CuMatrixBase &out_deriv, + Component *to_update, + CuMatrixBase *in_deriv) const { } + + virtual void Read(std::istream &is, bool binary); + virtual void Write(std::ostream &os, bool binary) const; + + virtual Component* Copy() const; + + // Some functions that are only to be reimplemented for GeneralComponents. + virtual void GetInputIndexes(const MiscComputationInfo &misc_info, + const Index &output_index, + std::vector *desired_indexes) const { + desired_indexes->clear(); // requires no inputs. + } + + // This function returns true if at least one of the input indexes used to + // compute this output index is computable. + // it's simple because this component requires no inputs. + virtual bool IsComputable(const MiscComputationInfo &misc_info, + const Index &output_index, + const IndexSet &input_index_set, + std::vector *used_inputs) const { + if (used_inputs) used_inputs->clear(); + return true; + } + + void SetDropoutProportion(BaseFloat p) { dropout_proportion_ = p; } + + private: + + // The output dimension + int32 output_dim_; + + BaseFloat dropout_proportion_; + + const DropoutMaskComponent &operator + = (const DropoutMaskComponent &other); // Disallow. +}; + + + + } // namespace nnet3 diff --git a/src/nnet3/nnet-simple-component.cc b/src/nnet3/nnet-simple-component.cc index 8bbe76840da..91f8f5139b2 100644 --- a/src/nnet3/nnet-simple-component.cc +++ b/src/nnet3/nnet-simple-component.cc @@ -4939,13 +4939,20 @@ void CompositeComponent::InitFromConfig(ConfigLine *cfl) { if(this_component->Type() == "CompositeComponent") { DeletePointers(&components); delete this_component; + // This is not allowed. If memory is too much with just one + // CompositeComponent, try decreasing max-rows-process instead. KALDI_ERR << "Found CompositeComponent nested within CompositeComponent." - << "Try decreasing max-rows-process instead." << "Nested line: '" << nested_line.WholeLine() << "'\n" << "Toplevel CompositeComponent line '" << cfl->WholeLine() << "'"; } this_component->InitFromConfig(&nested_line); + int32 props = this_component->Properties(); + if ((props & kRandomComponent) != 0 || + (props & kSimpleComponent) == 0) { + KALDI_ERR << "CompositeComponent contains disallowed component type: " + << nested_line.WholeLine(); + } components.push_back(this_component); } if (cfl->HasUnusedValues()) @@ -4965,10 +4972,9 @@ void CompositeComponent::SetComponent(int32 i, Component *component) { components_[i] = component; } - int32 LstmNonlinearityComponent::InputDim() const { int32 cell_dim = value_sum_.NumCols(); - return cell_dim * 5; + return cell_dim * 5 + (use_dropout_ ? 3 : 0); } int32 LstmNonlinearityComponent::OutputDim() const { @@ -4990,7 +4996,15 @@ void LstmNonlinearityComponent::Read(std::istream &is, bool binary) { ExpectToken(is, binary, ""); self_repair_total_.Read(is, binary); - ExpectToken(is, binary, ""); + std::string tok; + ReadToken(is, binary, &tok); + if (tok == "") { + ReadBasicType(is, binary, &use_dropout_); + ReadToken(is, binary, &tok); + } else { + use_dropout_ = false; + } + KALDI_ASSERT(tok == ""); ReadBasicType(is, binary, &count_); // For the on-disk format, we normalze value_sum_, deriv_sum_ and @@ -5037,6 +5051,12 @@ void LstmNonlinearityComponent::Write(std::ostream &os, bool binary) const { self_repair_prob.Scale(1.0 / (count_ * cell_dim)); self_repair_prob.Write(os, binary); } + if (use_dropout_) { + // only write this if true; we have back-compat code in reading anyway. + // this makes the models without dropout easier to read with older code. + WriteToken(os, binary, ""); + WriteBasicType(os, binary, use_dropout_); + } WriteToken(os, binary, ""); WriteBasicType(os, binary, count_); WriteToken(os, binary, ""); @@ -5047,7 +5067,8 @@ void LstmNonlinearityComponent::Write(std::ostream &os, bool binary) const { std::string LstmNonlinearityComponent::Info() const { std::ostringstream stream; int32 cell_dim = params_.NumCols(); - stream << UpdatableComponent::Info() << ", cell-dim=" << cell_dim; + stream << UpdatableComponent::Info() << ", cell-dim=" << cell_dim + << ", use-dropout=" << (use_dropout_ ? "true" : "false"); PrintParameterStats(stream, "w_ic", params_.Row(0)); PrintParameterStats(stream, "w_fc", params_.Row(1)); PrintParameterStats(stream, "w_oc", params_.Row(2)); @@ -5213,6 +5234,7 @@ LstmNonlinearityComponent::LstmNonlinearityComponent( const LstmNonlinearityComponent &other): UpdatableComponent(other), params_(other.params_), + use_dropout_(other.use_dropout_), value_sum_(other.value_sum_), deriv_sum_(other.deriv_sum_), self_repair_config_(other.self_repair_config_), @@ -5221,7 +5243,8 @@ LstmNonlinearityComponent::LstmNonlinearityComponent( preconditioner_(other.preconditioner_) { } void LstmNonlinearityComponent::Init( - int32 cell_dim, BaseFloat param_stddev, + int32 cell_dim, bool use_dropout, + BaseFloat param_stddev, BaseFloat tanh_self_repair_threshold, BaseFloat sigmoid_self_repair_threshold, BaseFloat self_repair_scale) { @@ -5231,6 +5254,7 @@ void LstmNonlinearityComponent::Init( sigmoid_self_repair_threshold >= 0.0 && sigmoid_self_repair_threshold <= 0.25 && self_repair_scale >= 0.0 && self_repair_scale <= 0.1); + use_dropout_ = use_dropout; params_.Resize(3, cell_dim); params_.SetRandn(); params_.Scale(param_stddev); @@ -5265,6 +5289,7 @@ void LstmNonlinearityComponent::InitNaturalGradient() { void LstmNonlinearityComponent::InitFromConfig(ConfigLine *cfl) { InitLearningRatesFromConfig(cfl); bool ok = true; + bool use_dropout = false; int32 cell_dim; // these self-repair thresholds are the normal defaults for tanh and sigmoid // respectively. If, later on, we decide that we want to support different @@ -5284,6 +5309,7 @@ void LstmNonlinearityComponent::InitFromConfig(ConfigLine *cfl) { cfl->GetValue("sigmoid-self-repair-threshold", &sigmoid_self_repair_threshold); cfl->GetValue("self-repair-scale", &self_repair_scale); + cfl->GetValue("use-dropout", &use_dropout); // We may later on want to make it possible to initialize the different // parameters w_ic, w_fc and w_oc with different biases. We'll implement @@ -5293,7 +5319,7 @@ void LstmNonlinearityComponent::InitFromConfig(ConfigLine *cfl) { KALDI_ERR << "Could not process these elements in initializer: " << cfl->UnusedValues(); if (ok) { - Init(cell_dim, param_stddev, tanh_self_repair_threshold, + Init(cell_dim, use_dropout, param_stddev, tanh_self_repair_threshold, sigmoid_self_repair_threshold, self_repair_scale); } else { KALDI_ERR << "Invalid initializer for layer of type " diff --git a/src/nnet3/nnet-simple-component.h b/src/nnet3/nnet-simple-component.h index 62b4c9006d8..60fd1634598 100644 --- a/src/nnet3/nnet-simple-component.h +++ b/src/nnet3/nnet-simple-component.h @@ -99,7 +99,8 @@ class DropoutComponent : public RandomComponent { dropout_per_frame_(false) { } virtual int32 Properties() const { - return kLinearInInput|kBackpropInPlace|kSimpleComponent|kBackpropNeedsInput|kBackpropNeedsOutput; + return kLinearInInput|kBackpropInPlace|kSimpleComponent|kBackpropNeedsInput| + kBackpropNeedsOutput|kRandomComponent; } virtual std::string Type() const { return "DropoutComponent"; } @@ -1677,8 +1678,9 @@ class ConvolutionComponent: public UpdatableComponent { // o_part = W_{cx} x_t + W_{om} m_{t-1} + b_o // // The part of the computation that takes place in this component is as follows. -// Its input is of dimension 5C, consisting of 5 blocks: (i_part, f_part, c_part, o_part, and -// c_{t-1}). Its output is of dimension 2C, consisting of 2 blocks: c_t and m_t. +// Its input is of dimension 5C [however, search for 'dropout' below], +// consisting of 5 blocks: (i_part, f_part, c_part, o_part, and c_{t-1}). Its +// output is of dimension 2C, consisting of 2 blocks: c_t and m_t. // // To recap: the input is (i_part, f_part, c_part, o_part, c_{t-1}); the output is (c_t, m_t). // @@ -1696,6 +1698,12 @@ class ConvolutionComponent: public UpdatableComponent { // m_t = o_t * Tanh(c_t) (5) // # note: the outputs are just c_t and m_t. // +// [Note regarding dropout: optionally the input-dimension may be 5C + 3 instead +// of 5C in this case, the last two input dimensions will be interpreted as +// per-frame dropout masks on i_t, f_t and o_t respectively, so that in (3), i_t is +// replaced by i_t * i_t_scale, and likewise for f_t and o_t. +// +// // The backprop is as you would think, but for the "self-repair" we need to pass // in additional vectors (of the same dim as the parameters of the layer) that // dictate whether or not we add an additional term to the backpropagated @@ -1715,7 +1723,7 @@ class LstmNonlinearityComponent: public UpdatableComponent { virtual int32 OutputDim() const; virtual std::string Info() const; virtual void InitFromConfig(ConfigLine *cfl); - LstmNonlinearityComponent() { } // use Init to really initialize. + LstmNonlinearityComponent(): use_dropout_(false) { } virtual std::string Type() const { return "LstmNonlinearityComponent"; } virtual int32 Properties() const { return kSimpleComponent|kUpdatableComponent|kBackpropNeedsInput; @@ -1751,15 +1759,12 @@ class LstmNonlinearityComponent: public UpdatableComponent { explicit LstmNonlinearityComponent( const LstmNonlinearityComponent &other); - void Init(int32 cell_dim, BaseFloat param_stddev, + void Init(int32 cell_dim, bool use_dropout, + BaseFloat param_stddev, BaseFloat tanh_self_repair_threshold, BaseFloat sigmoid_self_repair_threshold, BaseFloat self_repair_scale); - void Init(std::string vector_filename, - int32 rank, int32 update_period, BaseFloat num_samples_history, - BaseFloat alpha, BaseFloat max_change_per_minibatch); - private: // Initializes the natural-gradient object with the configuration we @@ -1773,6 +1778,10 @@ class LstmNonlinearityComponent: public UpdatableComponent { // it contains the 3 diagonal parameter matrices w_i, w_f and w_o. CuMatrix params_; + // If true, we expect an extra 2 dimensions on the input, for dropout masks + // for i_t and f_t. + bool use_dropout_; + // Of dimension 5 * C, with a row for each of the Sigmoid/Tanh functions in // equations (1) through (5), this is the sum of the values of the nonliearities // (used for diagnostics only). It is comparable to value_sum_ vector diff --git a/src/nnet3/nnet-utils.cc b/src/nnet3/nnet-utils.cc index a7f732a9864..27415fe8775 100644 --- a/src/nnet3/nnet-utils.cc +++ b/src/nnet3/nnet-utils.cc @@ -21,6 +21,7 @@ #include "nnet3/nnet-utils.h" #include "nnet3/nnet-graph.h" #include "nnet3/nnet-simple-component.h" +#include "nnet3/nnet-general-component.h" #include "nnet3/nnet-parse.h" namespace kaldi { @@ -461,6 +462,10 @@ void SetDropoutProportion(BaseFloat dropout_proportion, DropoutComponent *dc = dynamic_cast(comp); if (dc != NULL) dc->SetDropoutProportion(dropout_proportion); + DropoutMaskComponent *mc = + dynamic_cast(nnet->GetComponent(c)); + if (mc != NULL) + mc->SetDropoutProportion(dropout_proportion); } } @@ -629,16 +634,20 @@ void ReadEditConfig(std::istream &edit_config_is, Nnet *nnet) { KALDI_ERR << "In edits-config, expected proportion to be set in line: " << config_line.WholeLine(); } - DropoutComponent *dropout_component = NULL; int32 num_dropout_proportions_set = 0; for (int32 c = 0; c < nnet->NumComponents(); c++) { if (NameMatchesPattern(nnet->GetComponentName(c).c_str(), - name_pattern.c_str()) && - (dropout_component = - dynamic_cast(nnet->GetComponent(c)))) { + name_pattern.c_str())) { + DropoutComponent *dropout_component = + dynamic_cast(nnet->GetComponent(c)); + DropoutMaskComponent *mask_component = + dynamic_cast(nnet->GetComponent(c)); if (dropout_component != NULL) { dropout_component->SetDropoutProportion(proportion); num_dropout_proportions_set++; + } else if (mask_component != NULL){ + mask_component->SetDropoutProportion(proportion); + num_dropout_proportions_set++; } } } diff --git a/src/nnet3/nnet-utils.h b/src/nnet3/nnet-utils.h index 921f1f1901d..041a916fb69 100644 --- a/src/nnet3/nnet-utils.h +++ b/src/nnet3/nnet-utils.h @@ -161,7 +161,7 @@ void ConvertRepeatedToBlockAffine(Nnet *nnet); std::string NnetInfo(const Nnet &nnet); /// This function sets the dropout proportion in all dropout components to -/// the value 'dropout_proportion' +/// dropout_proportion value. void SetDropoutProportion(BaseFloat dropout_proportion, Nnet *nnet); /// This function finds a list of components that are never used, and outputs From 015c96f74aa2ffb1e742e2de16b301685c45c0f2 Mon Sep 17 00:00:00 2001 From: Dan Povey Date: Thu, 20 Apr 2017 13:31:07 -0700 Subject: [PATCH 512/530] [src] Add documentation for dropout function. --- src/cudamatrix/cu-math.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/cudamatrix/cu-math.h b/src/cudamatrix/cu-math.h index af3da0b47e2..757449b6d4e 100644 --- a/src/cudamatrix/cu-math.h +++ b/src/cudamatrix/cu-math.h @@ -104,6 +104,14 @@ void Group2norm(const CuMatrixBase &src, o_t = Sigmoid(o_part + w_{oc}*c_t) m_t = o_t * Tanh(c_t) + Note on dropout: if the dropout mask is provided, let the + mask values be i_t_mask, f_t_mask and o_t_mask (for each + matrix row, these are scalars while i_t, f_t and o_t are of + dimension C, because this is 'per-frame' dropout as described in + http://www.danielpovey.com/files/2017_interspeech_dropout.pdf). + Then the modification to the equations above consists of + replacing 'i_t' with 'i_t_mask * i_t' in the RHS of the equations + above, and the same type of change for f_t and o_t. */ template void ComputeLstmNonlinearity(const CuMatrixBase &input, From 73489ae533048cdb041af3331742058e18847e13 Mon Sep 17 00:00:00 2001 From: Tom Ko Date: Sat, 22 Apr 2017 04:29:47 +0800 Subject: [PATCH 513/530] [src] Fix to testing code signal-test.cc, change threshold to resolve failure (#1565) --- src/feat/signal-test.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/feat/signal-test.cc b/src/feat/signal-test.cc index 4fcd2aaf2c6..d69cf8e5798 100644 --- a/src/feat/signal-test.cc +++ b/src/feat/signal-test.cc @@ -23,7 +23,7 @@ namespace kaldi { -void UnitTestBlockConvolution() { +void UnitTestFFTbasedBlockConvolution() { for (int32 i = 0; i < 5; i++) { int32 signal_length = 400000 + Rand() % 40000; int32 filter_length = 1000 + Rand() % 100; @@ -38,7 +38,7 @@ void UnitTestBlockConvolution() { } } -void UnitTestConvolution() { +void UnitTestFFTbasedConvolution() { for (int32 i = 0; i < 5; i++) { int32 signal_length = 4000 + Rand() % 400; int32 filter_length = 10 + Rand() % 10; @@ -48,7 +48,7 @@ void UnitTestConvolution() { filter.SetRandn(); Vector signal_test(signal); ConvolveSignals(filter, &signal_test); - FFTbasedBlockConvolveSignals(filter, &signal); + FFTbasedConvolveSignals(filter, &signal); AssertEqual(signal, signal_test, 0.0001 * signal.Dim()); } } @@ -56,8 +56,8 @@ void UnitTestConvolution() { int main() { using namespace kaldi; - UnitTestBlockConvolution(); - UnitTestConvolution(); + UnitTestFFTbasedConvolution(); + UnitTestFFTbasedBlockConvolution(); KALDI_LOG << "Tests succeeded."; } From 87d95c5efff7da3b6f04e719a96de4204a367f8b Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 21 Apr 2017 19:28:22 -0400 Subject: [PATCH 514/530] [egs] Add mini-librispeech example scripts [intended as a sanity-checker/tutorial setup] (#1566) --- egs/librispeech/s5/local/data_prep.sh | 4 +- egs/mini_librispeech/s5/RESULTS | 22 ++ egs/mini_librispeech/s5/cmd.sh | 15 + egs/mini_librispeech/s5/conf/mfcc.conf | 1 + egs/mini_librispeech/s5/conf/mfcc_hires.conf | 10 + egs/mini_librispeech/s5/conf/online_cmvn.conf | 1 + .../s5/local/chain/compare_wer.sh | 131 ++++++++ .../s5/local/chain/run_tdnn.sh | 1 + .../s5/local/chain/tuning/run_tdnn_1a.sh | 298 ++++++++++++++++++ egs/mini_librispeech/s5/local/data_prep.sh | 1 + egs/mini_librispeech/s5/local/download_lm.sh | 1 + egs/mini_librispeech/s5/local/format_lms.sh | 1 + .../s5/local/nnet3/compare_wer.sh | 132 ++++++++ .../s5/local/nnet3/run_ivector_common.sh | 148 +++++++++ .../s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh | 220 +++++++++++++ .../s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh | 210 ++++++++++++ egs/mini_librispeech/s5/local/prepare_dict.sh | 1 + egs/mini_librispeech/s5/local/score.sh | 63 ++++ .../s5/local/subset_dataset.sh | 48 +++ egs/mini_librispeech/s5/path.sh | 8 + egs/mini_librispeech/s5/run.sh | 205 ++++++++++++ egs/mini_librispeech/s5/steps | 1 + egs/mini_librispeech/s5/utils | 1 + egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh | 2 +- 24 files changed, 1522 insertions(+), 3 deletions(-) create mode 100755 egs/mini_librispeech/s5/RESULTS create mode 100644 egs/mini_librispeech/s5/cmd.sh create mode 100644 egs/mini_librispeech/s5/conf/mfcc.conf create mode 100644 egs/mini_librispeech/s5/conf/mfcc_hires.conf create mode 100644 egs/mini_librispeech/s5/conf/online_cmvn.conf create mode 100755 egs/mini_librispeech/s5/local/chain/compare_wer.sh create mode 120000 egs/mini_librispeech/s5/local/chain/run_tdnn.sh create mode 100755 egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh create mode 120000 egs/mini_librispeech/s5/local/data_prep.sh create mode 120000 egs/mini_librispeech/s5/local/download_lm.sh create mode 120000 egs/mini_librispeech/s5/local/format_lms.sh create mode 100755 egs/mini_librispeech/s5/local/nnet3/compare_wer.sh create mode 100755 egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh create mode 100755 egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh create mode 100755 egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh create mode 120000 egs/mini_librispeech/s5/local/prepare_dict.sh create mode 100755 egs/mini_librispeech/s5/local/score.sh create mode 100755 egs/mini_librispeech/s5/local/subset_dataset.sh create mode 100644 egs/mini_librispeech/s5/path.sh create mode 100755 egs/mini_librispeech/s5/run.sh create mode 120000 egs/mini_librispeech/s5/steps create mode 120000 egs/mini_librispeech/s5/utils diff --git a/egs/librispeech/s5/local/data_prep.sh b/egs/librispeech/s5/local/data_prep.sh index 5a264a07464..dea93525e28 100755 --- a/egs/librispeech/s5/local/data_prep.sh +++ b/egs/librispeech/s5/local/data_prep.sh @@ -33,7 +33,7 @@ utt2spk=$dst/utt2spk; [[ -f "$utt2spk" ]] && rm $utt2spk spk2gender=$dst/spk2gender; [[ -f $spk2gender ]] && rm $spk2gender utt2dur=$dst/utt2dur; [[ -f "$utt2dur" ]] && rm $utt2dur -for reader_dir in $(find $src -mindepth 1 -maxdepth 1 -type d | sort); do +for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do reader=$(basename $reader_dir) if ! [ $reader -eq $reader ]; then # not integer. echo "$0: unexpected subdirectory name $reader" @@ -53,7 +53,7 @@ for reader_dir in $(find $src -mindepth 1 -maxdepth 1 -type d | sort); do exit 1; fi - find $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \ + find -L $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \ awk -v "dir=$chapter_dir" '{printf "%s flac -c -d -s %s/%s.flac |\n", $0, dir, $0}' >>$wav_scp|| exit 1 chapter_trans=$chapter_dir/${reader}-${chapter}.trans.txt diff --git a/egs/mini_librispeech/s5/RESULTS b/egs/mini_librispeech/s5/RESULTS new file mode 100755 index 00000000000..463c059bdbb --- /dev/null +++ b/egs/mini_librispeech/s5/RESULTS @@ -0,0 +1,22 @@ +#!/bin/bash + +for x in exp/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done + +for x in exp/chain/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done +exit 0 + +# Results on on dev_clean_2 +%WER 49.18 [ 9903 / 20138, 439 ins, 2332 del, 7132 sub ] exp/mono/decode_nosp_tgsmall_dev_clean_2/wer_8_0.0 +%WER 20.42 [ 4113 / 20138, 469 ins, 545 del, 3099 sub ] exp/tri1/decode_nosp_tglarge_dev_clean_2/wer_14_0.0 +%WER 24.56 [ 4945 / 20138, 447 ins, 792 del, 3706 sub ] exp/tri1/decode_nosp_tgmed_dev_clean_2/wer_14_0.0 +%WER 27.37 [ 5512 / 20138, 425 ins, 969 del, 4118 sub ] exp/tri1/decode_nosp_tgsmall_dev_clean_2/wer_14_0.0 +%WER 18.59 [ 3743 / 20138, 435 ins, 517 del, 2791 sub ] exp/tri2b/decode_nosp_tglarge_dev_clean_2/wer_15_0.0 +%WER 22.06 [ 4443 / 20138, 400 ins, 748 del, 3295 sub ] exp/tri2b/decode_nosp_tgmed_dev_clean_2/wer_15_0.0 +%WER 24.32 [ 4898 / 20138, 413 ins, 899 del, 3586 sub ] exp/tri2b/decode_nosp_tgsmall_dev_clean_2/wer_15_0.0 +%WER 13.45 [ 2708 / 20138, 358 ins, 330 del, 2020 sub ] exp/tri3b/decode_nosp_tglarge_dev_clean_2/wer_17_0.0 +%WER 16.25 [ 3273 / 20138, 332 ins, 485 del, 2456 sub ] exp/tri3b/decode_nosp_tgmed_dev_clean_2/wer_16_0.0 +%WER 18.10 [ 3645 / 20138, 332 ins, 603 del, 2710 sub ] exp/tri3b/decode_nosp_tgsmall_dev_clean_2/wer_16_0.0 + + +%WER 18.58 [ 3742 / 20138, 366 ins, 763 del, 2613 sub ] exp/chain/tdnn1a_sp/decode_tgsmall_dev_clean_2/wer_10_0.0 +%WER 13.35 [ 2689 / 20138, 318 ins, 491 del, 1880 sub ] exp/chain/tdnn1a_sp/decode_tglarge_dev_clean_2/wer_9_0.5 diff --git a/egs/mini_librispeech/s5/cmd.sh b/egs/mini_librispeech/s5/cmd.sh new file mode 100644 index 00000000000..71dd849a93b --- /dev/null +++ b/egs/mini_librispeech/s5/cmd.sh @@ -0,0 +1,15 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +export train_cmd="queue.pl --mem 2G" +export decode_cmd="queue.pl --mem 4G" +export mkgraph_cmd="queue.pl --mem 8G" diff --git a/egs/mini_librispeech/s5/conf/mfcc.conf b/egs/mini_librispeech/s5/conf/mfcc.conf new file mode 100644 index 00000000000..7361509099f --- /dev/null +++ b/egs/mini_librispeech/s5/conf/mfcc.conf @@ -0,0 +1 @@ +--use-energy=false # only non-default option. diff --git a/egs/mini_librispeech/s5/conf/mfcc_hires.conf b/egs/mini_librispeech/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..434834a6725 --- /dev/null +++ b/egs/mini_librispeech/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/mini_librispeech/s5/conf/online_cmvn.conf b/egs/mini_librispeech/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/mini_librispeech/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/mini_librispeech/s5/local/chain/compare_wer.sh b/egs/mini_librispeech/s5/local/chain/compare_wer.sh new file mode 100755 index 00000000000..cd6be14ed88 --- /dev/null +++ b/egs/mini_librispeech/s5/local/chain/compare_wer.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3} + + +if [ $# == 0 ]; then + echo "Usage: $0: [--looped] [--online] [ ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev_clean_2 (tgsmall) " + "#WER dev_clean_2 (tglarge) ") + +for n in 0 1; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2) + + wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob (xent)" +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo diff --git a/egs/mini_librispeech/s5/local/chain/run_tdnn.sh b/egs/mini_librispeech/s5/local/chain/run_tdnn.sh new file mode 120000 index 00000000000..34499362831 --- /dev/null +++ b/egs/mini_librispeech/s5/local/chain/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1a.sh \ No newline at end of file diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..642c20ec191 --- /dev/null +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_tdnn_1a.sh @@ -0,0 +1,298 @@ +#!/bin/bash + +# This is a basic TDNN experiment. + +# steps/info/chain_dir_info.pl exp/chain/tdnn1a_sp +# exp/chain/tdnn1a_sp: num-iters=6 nj=2..5 num-params=7.0M dim=40+100->2309 combine=-0.072->-0.069 xent:train/valid[3,5,final]=(-2.10,-1.62,-1.48/-2.26,-1.85,-1.77) logprob:train/valid[3,5,final]=(-0.096,-0.069,-0.060/-0.124,-0.107,-0.104) + +# local/chain/compare_wer.sh --online exp/chain/tdnn1a_sp +# System tdnn1a_sp +#WER dev_clean_2 (tgsmall) 18.58 +# [online:] 18.49 +#WER dev_clean_2 (tglarge) 13.35 +# [online:] 13.47 +# Final train prob -0.0596 +# Final valid prob -0.1036 +# Final train prob (xent) -1.4843 +# Final valid prob (xent) -1.7723 + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +decode_nj=10 +train_set=train_clean_5 +test_sets=dev_clean_2 +gmm=tri3b +nnet3_affix= + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +affix=1a # affix for the TDNN directory name +tree_affix= +train_stage=-10 +get_egs_stage=-10 +decode_iter= + +# training options +# training chunk-options +chunk_width=140,100,160 +# we don't need extra left/right context for TDNN systems. +chunk_left_context=0 +chunk_right_context=0 +common_egs_dir= +xent_regularize=0.1 + +# training options +srand=0 +remove_egs=true +reporting_email= + +#decode options +test_online_decoding=true # if true, it will run the last decoding stage. + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 11 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 12 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + + +if [ $stage -le 13 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + relu-renorm-layer name=tdnn3 dim=512 input=Append(-1,0,1) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-6,-3,0) + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain dim=512 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=512 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 14 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient=0.1 \ + --chain.l2-regularize=0.00005 \ + --chain.apply-deriv-weights=false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=4 \ + --trainer.frames-per-iter=3000000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=5 \ + --trainer.optimization.initial-effective-lrate=0.001 \ + --trainer.optimization.final-effective-lrate=0.0001 \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.num-chunk-per-minibatch=256,128,64 \ + --trainer.optimization.momentum=0.0 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --egs.opts="--frames-overlap-per-eg 0" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --tree-dir=$tree_dir \ + --lat-dir=$lat_dir \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 15 ]; then + # Note: it's not important to give mkgraph.sh the lang directory with the + # matched topology (since it gets the topology file from the model). + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test_tgsmall \ + $tree_dir $tree_dir/graph_tgsmall || exit 1; +fi + +if [ $stage -le 16 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l [ ... ]" + echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=( + "#WER dev_clean_2 (tgsmall) " + "#WER dev_clean_2 (tglarge) ") + +for n in 0 1; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(tgsmall_dev_clean_2 tglarge_dev_clean_2) + + wer=$(cat $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo diff --git a/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh b/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh new file mode 100755 index 00000000000..82bb46d64a9 --- /dev/null +++ b/egs/mini_librispeech/s5/local/nnet3/run_ivector_common.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +set -euo pipefail + +# This script is called from local/nnet3/run_tdnn.sh and +# local/chain/run_tdnn.sh (and may eventually be called by more +# scripts). It contains the common feature preparation and +# iVector-related parts of the script. See those scripts for examples +# of usage. + +stage=0 +train_set=train_clean_5 +test_sets="dev_clean_2" +gmm=tri3b + +nnet3_affix= + +. ./cmd.sh +. ./path.sh +. utils/parse_options.sh + +gmm_dir=exp/${gmm} +ali_dir=exp/${gmm}_ali_${train_set}_sp + +for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do + if [ ! -f $f ]; then + echo "$0: expected file $f to exist" + exit 1 + fi +done + +if [ $stage -le 1 ]; then + # Although the nnet will be trained by high resolution data, we still have to + # perturb the normal data to get the alignment _sp stands for speed-perturbed + echo "$0: preparing directory for low-resolution speed-perturbed data (for alignment)" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp + echo "$0: making MFCC features for low-resolution speed-perturbed data" + steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/${train_set}_sp || exit 1; + steps/compute_cmvn_stats.sh data/${train_set}_sp || exit 1; + utils/fix_data_dir.sh data/${train_set}_sp +fi + +if [ $stage -le 2 ]; then + echo "$0: aligning with the perturbed low-resolution data" + steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \ + data/${train_set}_sp data/lang $gmm_dir $ali_dir || exit 1 +fi + +if [ $stage -le 3 ]; then + # Create high-resolution MFCC features (with 40 cepstra instead of 13). + # this shows how you can split across multiple file-systems. + echo "$0: creating high-resolution MFCC features" + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b1{5,6,7,8}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp ${test_sets}; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires || exit 1; + + for datadir in ${train_set}_sp ${test_sets}; do + steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires || exit 1; + steps/compute_cmvn_stats.sh data/${datadir}_hires || exit 1; + utils/fix_data_dir.sh data/${datadir}_hires || exit 1; + done +fi + +if [ $stage -le 4 ]; then + echo "$0: computing a subset of data to train the diagonal UBM." + # We'll use about a quarter of the data. + mkdir -p exp/nnet3${nnet3_affix}/diag_ubm + temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm + + num_utts_total=$(wc -l $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + relu-renorm-layer name=tdnn1 dim=520 + relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 decay-time=20 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 11 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value=0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=$lang \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 12 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + relu-renorm-layer name=tdnn1 dim=520 + relu-renorm-layer name=tdnn2 dim=520 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + relu-renorm-layer name=tdnn3 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + relu-renorm-layer name=tdnn5 dim=520 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=520 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=520 recurrent-projection-dim=130 non-recurrent-projection-dim=130 $lstm_opts + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 11 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value=0.99 \ + --trainer.dropout-schedule="$dropout_schedule" \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=$lang \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 12 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l " + echo " Options:" + echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." + echo " --stage (0|1|2) # start scoring script from part-way through." + echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." + echo " --min_lmwt # minumum LM-weight for lattice rescoring " + echo " --max_lmwt # maximum LM-weight for lattice rescoring " + exit 1; +fi + +data=$1 +lang_or_graph=$2 +dir=$3 + +symtab=$lang_or_graph/words.txt + +for f in $symtab $dir/lat.1.gz $data/text; do + [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; +done + +mkdir -p $dir/scoring/log + +cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt + +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \ + lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ + lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ + lattice-best-path --word-symbol-table=$symtab \ + ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1; +done + +# Note: the double level of quoting for the sed command +for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do + $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \ + cat $dir/scoring/LMWT.$wip.tra \| \ + utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ + compute-wer --text --mode=present \ + ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; +done + +exit 0; diff --git a/egs/mini_librispeech/s5/local/subset_dataset.sh b/egs/mini_librispeech/s5/local/subset_dataset.sh new file mode 100755 index 00000000000..050128247a4 --- /dev/null +++ b/egs/mini_librispeech/s5/local/subset_dataset.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright 2017 Luminar Technologies, Inc. (author: Daniel Galvez) +# Apache 2.0 + +# The following commands were used to generate the mini_librispeech dataset: +# +# Note that data generation is random. This could be fixed by +# providing a seed argument to the shuf program. + +if [ "$#" -ne 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 /export/a05/dgalvez/LibriSpeech/train-clean-100 \\ + /export/a05/dgalvez/LibriSpeech/train-clean-5 5" + exit 1 +fi + +src_dir=$1 +dest_dir=$2 +dest_num_hours=$3 + +src=$(basename $src_dir) +dest=$(basename $dest_dir) +librispeech_dir=$(dirname $src_dir) + +# TODO: Possibly improve this to ensure gender balance and speaker +# balance. +# TODO: Use actual time values instead of assuming that to make sure we get $dest_num_hours of data +src_num_hours=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | awk -F'|' '{ print $3 }' | \ +python -c ' +from __future__ import print_function +from sys import stdin +minutes_str = stdin.read().split() +print(int(round(sum([float(minutes) for minutes in minutes_str]) / 60.0)))') +src_num_chapters=$(grep "$src" $librispeech_dir/CHAPTERS.TXT | \ + awk -F'|' '{ print $1 }' | sort -u | wc -l) +mkdir -p data/subset_tmp +grep "$src" $librispeech_dir/CHAPTERS.TXT | \ + awk -F'|' '{ print $1 }' | \ + shuf -n $(((dest_num_hours * src_num_chapters) / src_num_hours)) > \ + data/subset_tmp/${dest}_chapter_id_list.txt + +while read -r chapter_id || [[ -n "$chapter_id" ]]; do + chapter_dir=$(find $src_dir/ -mindepth 2 -name "$chapter_id" -type d) + speaker_id=$(basename $(dirname $chapter_dir)) + mkdir -p $dest_dir/$speaker_id/ + cp -r $chapter_dir $dest_dir/$speaker_id/ +done < data/subset_tmp/${dest}_chapter_id_list.txt diff --git a/egs/mini_librispeech/s5/path.sh b/egs/mini_librispeech/s5/path.sh new file mode 100644 index 00000000000..705600ad47a --- /dev/null +++ b/egs/mini_librispeech/s5/path.sh @@ -0,0 +1,8 @@ +export KALDI_ROOT=`pwd`/../../.. +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C + +# For now, don't include any of the optional dependenices of the main +# librispeech recipe diff --git a/egs/mini_librispeech/s5/run.sh b/egs/mini_librispeech/s5/run.sh new file mode 100755 index 00000000000..bf1ded337f6 --- /dev/null +++ b/egs/mini_librispeech/s5/run.sh @@ -0,0 +1,205 @@ +#!/bin/bash + +# Note: this works only on pre-downloaded data on the CLSP servers +data=/export/a05/dgalvez/ + +data_url=www.openslr.org/resources/TODO # TODO +lm_url=www.openslr.org/resources/11 + +. ./cmd.sh +. ./path.sh + +stage=0 +. utils/parse_options.sh + +# TODO(galv): Reconsider this +set -euxo pipefail + +# TODO(galv): Modify openslr.org to contain the minified training dataset. +# for part in dev-clean-2 train-clean-5; do +# local/download_and_untar.sh $data $data_url $part +# done + +if [ $stage -le 0 ]; then + local/download_lm.sh $lm_url data/local/lm +fi + +if [ $stage -le 1 ]; then + # format the data as Kaldi data directories + for part in dev-clean-2 train-clean-5; do + # use underscore-separated names in data directories. + local/data_prep.sh $data/LibriSpeech/$part data/$(echo $part | sed s/-/_/g) + done + + local/prepare_dict.sh --stage 3 --nj 30 --cmd "$train_cmd" \ + data/local/lm data/local/lm data/local/dict_nosp + + utils/prepare_lang.sh data/local/dict_nosp \ + "" data/local/lang_tmp_nosp data/lang_nosp + + local/format_lms.sh --src-dir data/lang_nosp data/local/lm + # Create ConstArpaLm format language model for full 3-gram and 4-gram LMs + utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz \ + data/lang_nosp data/lang_nosp_test_tglarge +fi + +if [ $stage -le 2 ]; then + mfccdir=mfcc + # spread the mfccs over various machines, as this data-set is quite large. + if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then + mfcc=$(basename mfccdir) # in case was absolute pathname (unlikely), get basename. + utils/create_split_dir.pl /export/b{07,14,16,17}/$USER/kaldi-data/egs/librispeech/s5/$mfcc/storage \ + $mfccdir/storage + fi + + for part in dev_clean_2 train_clean_5; do + steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/$part exp/make_mfcc/$part $mfccdir + steps/compute_cmvn_stats.sh data/$part exp/make_mfcc/$part $mfccdir + done + + # Get the shortest 500 utterances first because those are more likely + # to have accurate alignments. + utils/subset_data_dir.sh --shortest data/train_clean_5 500 data/train_500short +fi + +# train a monophone system +if [ $stage -le 3 ]; then + # TODO(galv): Is this too many jobs for a smaller dataset? + steps/train_mono.sh --boost-silence 1.25 --nj 5 --cmd "$train_cmd" \ + data/train_500short data/lang_nosp exp/mono + # TODO: Understand why we use lang_nosp here... + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/mono exp/mono/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/mono/graph_nosp_tgsmall \ + data/$test exp/mono/decode_nosp_tgsmall_$test + done + )& + + steps/align_si.sh --boost-silence 1.25 --nj 5 --cmd "$train_cmd" \ + data/train_clean_5 data/lang_nosp exp/mono exp/mono_ali_train_clean_5 +fi + +# train a first delta + delta-delta triphone system on all utterances +if [ $stage -le 4 ]; then + steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ + 2000 10000 data/train_clean_5 data/lang_nosp exp/mono_ali_train_clean_5 exp/tri1 + + # decode using the tri1 model + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri1 exp/tri1/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode.sh --nj 5 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgsmall \ + data/$test exp/tri1/decode_nosp_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri1/decode_nosp_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri1/decode_nosp_{tgsmall,tglarge}_$test + done + )& + + steps/align_si.sh --nj 5 --cmd "$train_cmd" \ + data/train_clean_5 data/lang_nosp exp/tri1 exp/tri1_ali_train_clean_5 +fi + +# train an LDA+MLLT system. +if [ $stage -le 5 ]; then + steps/train_lda_mllt.sh --cmd "$train_cmd" \ + --splice-opts "--left-context=3 --right-context=3" 2500 15000 \ + data/train_clean_5 data/lang_nosp exp/tri1_ali_train_clean_5 exp/tri2b + + # decode using the LDA+MLLT model + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri2b exp/tri2b/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgsmall \ + data/$test exp/tri2b/decode_nosp_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri2b/decode_nosp_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri2b/decode_nosp_{tgsmall,tglarge}_$test + done + )& + + # Align utts using the tri2b model + steps/align_si.sh --nj 5 --cmd "$train_cmd" --use-graphs true \ + data/train_clean_5 data/lang_nosp exp/tri2b exp/tri2b_ali_train_clean_5 +fi + +# Train tri3b, which is LDA+MLLT+SAT +if [ $stage -le 6 ]; then + steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \ + data/train_clean_5 data/lang_nosp exp/tri2b_ali_train_clean_5 exp/tri3b + + # decode using the tri3b model + ( + utils/mkgraph.sh data/lang_nosp_test_tgsmall \ + exp/tri3b exp/tri3b/graph_nosp_tgsmall + for test in dev_clean_2; do + steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \ + exp/tri3b/graph_nosp_tgsmall data/$test \ + exp/tri3b/decode_nosp_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ + data/$test exp/tri3b/decode_nosp_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ + data/$test exp/tri3b/decode_nosp_{tgsmall,tglarge}_$test + done + )& +fi + +# Now we compute the pronunciation and silence probabilities from training data, +# and re-create the lang directory. +if [ $stage -le 7 ]; then + steps/get_prons.sh --cmd "$train_cmd" \ + data/train_clean_5 data/lang_nosp exp/tri3b + utils/dict_dir_add_pronprobs.sh --max-normalize true \ + data/local/dict_nosp \ + exp/tri3b/pron_counts_nowb.txt exp/tri3b/sil_counts_nowb.txt \ + exp/tri3b/pron_bigram_counts_nowb.txt data/local/dict + + utils/prepare_lang.sh data/local/dict \ + "" data/local/lang_tmp data/lang + + local/format_lms.sh --src-dir data/lang data/local/lm + + utils/build_const_arpa_lm.sh \ + data/local/lm/lm_tglarge.arpa.gz data/lang data/lang_test_tglarge + + steps/align_fmllr.sh --nj 5 --cmd "$train_cmd" \ + data/train_clean_5 data/lang exp/tri3b exp/tri3b_ali_train_clean_5 +fi + + +if [ $stage -le 8 ]; then + # Test the tri3b system with the silprobs and pron-probs. + + # decode using the tri3b model + utils/mkgraph.sh data/lang_test_tgsmall \ + exp/tri3b exp/tri3b/graph_tgsmall + for test in dev_clean_2; do + steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \ + exp/tri3b/graph_tgsmall data/$test \ + exp/tri3b/decode_tgsmall_$test + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ + data/$test exp/tri3b/decode_{tgsmall,tgmed}_$test + steps/lmrescore_const_arpa.sh \ + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ + data/$test exp/tri3b/decode_{tgsmall,tglarge}_$test + done +fi + +exit 0 # temp + +# Train a chain model +if [ $stage -le 9 ]; then + local/chain/run_tdnn.sh --stage 0 +fi + +# Don't finish until all background decoding jobs are finished. +wait diff --git a/egs/mini_librispeech/s5/steps b/egs/mini_librispeech/s5/steps new file mode 120000 index 00000000000..1b186770dd1 --- /dev/null +++ b/egs/mini_librispeech/s5/steps @@ -0,0 +1 @@ +../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/mini_librispeech/s5/utils b/egs/mini_librispeech/s5/utils new file mode 120000 index 00000000000..a3279dc8679 --- /dev/null +++ b/egs/mini_librispeech/s5/utils @@ -0,0 +1 @@ +../../wsj/s5/utils/ \ No newline at end of file diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh index d874eb0986a..9d48ec7a898 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh @@ -226,7 +226,7 @@ fi if [ $stage -le 16 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage fi steps/nnet3/chain/train.py --stage=$train_stage \ From c68a576b08a0b182497db67da26b7ebb71f50450 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 21 Apr 2017 22:16:22 -0400 Subject: [PATCH 515/530] [egs] adding results and cleanup in mini-librispeech --- egs/mini_librispeech/s5/local/download_lm.sh | 73 ++++++++++++++++++- .../s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh | 20 ++++- egs/mini_librispeech/s5/run.sh | 1 - 3 files changed, 91 insertions(+), 3 deletions(-) mode change 120000 => 100755 egs/mini_librispeech/s5/local/download_lm.sh diff --git a/egs/mini_librispeech/s5/local/download_lm.sh b/egs/mini_librispeech/s5/local/download_lm.sh deleted file mode 120000 index d8bae7f4d32..00000000000 --- a/egs/mini_librispeech/s5/local/download_lm.sh +++ /dev/null @@ -1 +0,0 @@ -../../../librispeech/s5/local/download_lm.sh \ No newline at end of file diff --git a/egs/mini_librispeech/s5/local/download_lm.sh b/egs/mini_librispeech/s5/local/download_lm.sh new file mode 100755 index 00000000000..185d4811768 --- /dev/null +++ b/egs/mini_librispeech/s5/local/download_lm.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# Copyright 2014 Vassil Panayotov +# 2017 Daniel Povey +# Apache 2.0 + +if [ $# -ne "2" ]; then + echo "Usage: $0 " + echo "e.g.: $0 http://www.openslr.org/resources/11 data/local/lm" + exit 1 +fi + +base_url=$1 +dst_dir=$2 + +# given a filename returns the corresponding file size in bytes +# The switch cases below can be autogenerated by entering the data directory and running: +# for f in *; do echo "\"$f\") echo \"$(du -b $f | awk '{print $1}')\";;"; done +function filesize() { + case $1 in + "3-gram.arpa.gz") echo "759636181";; + "3-gram.pruned.1e-7.arpa.gz") echo "34094057";; + "3-gram.pruned.3e-7.arpa.gz") echo "13654242";; + "librispeech-lexicon.txt") echo "5627653";; + "librispeech-vocab.txt") echo "1737588";; + *) echo "";; + esac +} + +function check_and_download () { + [[ $# -eq 1 ]] || { echo "check_and_download() expects exactly one argument!"; return 1; } + fname=$1 + echo "Downloading file '$fname' into '$dst_dir'..." + expect_size="$(filesize $fname)" + [[ ! -z "$expect_size" ]] || { echo "Unknown file size for '$fname'"; return 1; } + if [[ -s $dst_dir/$fname ]]; then + # In the following statement, the first version works on linux, and the part + # after '||' works on Linux. + f=$dst_dir/$fname + fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f) + if [[ "$fsize" -eq "$expect_size" ]]; then + echo "'$fname' already exists and appears to be complete" + return 0 + else + echo "WARNING: '$fname' exists, but the size is wrong - re-downloading ..." + fi + fi + wget --no-check-certificate -O $dst_dir/$fname $base_url/$fname || { + echo "Error while trying to download $fname!" + return 1 + } + f=$dst_dir/$fname + # In the following statement, the first version works on linux, and the part after '||' + # works on Linux. + fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f) + [[ "$fsize" -eq "$expect_size" ]] || { echo "$fname: file size mismatch!"; return 1; } + return 0 +} + +mkdir -p $dst_dir + +for f in 3-gram.arpa.gz 3-gram.pruned.1e-7.arpa.gz 3-gram.pruned.3e-7.arpa.gz \ + librispeech-vocab.txt librispeech-lexicon.txt; do + check_and_download $f || exit 1 +done + +cd $dst_dir +ln -sf 3-gram.pruned.1e-7.arpa.gz lm_tgmed.arpa.gz +ln -sf 3-gram.pruned.3e-7.arpa.gz lm_tgsmall.arpa.gz +ln -sf 3-gram.arpa.gz lm_tglarge.arpa.gz + +exit 0 diff --git a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh index af0779c0109..ba4ecc268df 100755 --- a/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh +++ b/egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1b.sh @@ -1,6 +1,24 @@ #!/bin/bash -# This is like 1a, but adding dropout. It seems g +# This is like 1a, but adding dropout. It's definitely helpful, +# and you can see in the objf values that the train-test difference +# is less. + + +# steps/info/nnet3_dir_info.pl exp/nnet3/tdnn_lstm1b_sp +# exp/nnet3/tdnn_lstm1b_sp: num-iters=32 nj=2..2 num-params=8.4M dim=40+100->2041 combine=-0.71->-0.58 loglike:train/valid[20,31,combined]=(-2.78,-0.95,-0.57/-2.94,-1.31,-0.98) accuracy:train/valid[20,31,combined]=(0.48,0.75,0.81/0.45,0.67,0.71) + +# local/nnet3/compare_wer.sh --online exp/nnet3/tdnn_lstm1a_sp exp/nnet3/tdnn_lstm1b_sp +# System tdnn_lstm1a_sp tdnn_lstm1b_sp +#WER dev_clean_2 (tgsmall) 17.67 17.01 +# [online:] 18.06 17.26 +#WER dev_clean_2 (tglarge) 13.43 12.63 +# [online:] 13.73 12.94 +# Final train prob -0.3660 -0.5680 +# Final valid prob -1.0236 -0.9771 +# Final train acc 0.8737 0.8067 +# Final valid acc 0.7222 0.7144 + # Set -e here so that we catch if any executable fails immediately diff --git a/egs/mini_librispeech/s5/run.sh b/egs/mini_librispeech/s5/run.sh index bf1ded337f6..964485d4531 100755 --- a/egs/mini_librispeech/s5/run.sh +++ b/egs/mini_librispeech/s5/run.sh @@ -194,7 +194,6 @@ if [ $stage -le 8 ]; then done fi -exit 0 # temp # Train a chain model if [ $stage -le 9 ]; then From 1f51ef5d099e12b70aa7c2dad25a8363d6e483af Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 23 Apr 2017 19:51:15 -0400 Subject: [PATCH 516/530] [egs] Add soft link for mini-librispeech setup --- egs/mini_librispeech/s5/local/nnet3/run_tdnn_lstm.sh | 1 + 1 file changed, 1 insertion(+) create mode 120000 egs/mini_librispeech/s5/local/nnet3/run_tdnn_lstm.sh diff --git a/egs/mini_librispeech/s5/local/nnet3/run_tdnn_lstm.sh b/egs/mini_librispeech/s5/local/nnet3/run_tdnn_lstm.sh new file mode 120000 index 00000000000..a4fa11e0908 --- /dev/null +++ b/egs/mini_librispeech/s5/local/nnet3/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1b.sh \ No newline at end of file From 95a550b88d16b12dcaab9ebe5ac5333f7374e571 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 24 Apr 2017 12:17:57 -0400 Subject: [PATCH 517/530] segmenter: Fixing RemoveSegments --- src/segmenter/segmentation-utils.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/segmenter/segmentation-utils.cc b/src/segmenter/segmentation-utils.cc index 1a87ba78ad8..4d76afba0b8 100644 --- a/src/segmenter/segmentation-utils.cc +++ b/src/segmenter/segmentation-utils.cc @@ -115,15 +115,19 @@ void RemoveSegments(const std::vector &labels, for (SegmentList::iterator it = segmentation->Begin(); it != segmentation->End(); ) { - if (max_remove_length == -1) { + if (max_remove_length < 0) { if (std::binary_search(labels.begin(), labels.end(), it->Label())) it = segmentation->Erase(it); + else + ++it; } else if (it->Length() < max_remove_length) { if (std::binary_search(labels.begin(), labels.end(), it->Label()) || (labels.size() == 1 && labels[0] == -1)) it = segmentation->Erase(it); + else + ++it; } else { ++it; } From ecc483f8b93993e4e73c101d208c45b209b1c717 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 24 Apr 2017 12:19:54 -0400 Subject: [PATCH 518/530] sad: Updating subsegment_data_dir --- egs/wsj/s5/utils/data/get_utt2num_frames.sh | 2 +- egs/wsj/s5/utils/data/subsegment_data_dir.sh | 30 +++++++++++++++----- src/featbin/copy-feats.cc | 18 ++++++++++-- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/egs/wsj/s5/utils/data/get_utt2num_frames.sh b/egs/wsj/s5/utils/data/get_utt2num_frames.sh index 3f6d15c45a5..ec80e771c83 100755 --- a/egs/wsj/s5/utils/data/get_utt2num_frames.sh +++ b/egs/wsj/s5/utils/data/get_utt2num_frames.sh @@ -19,7 +19,7 @@ fi data=$1 -if [ -f $data/utt2num_frames ]; then +if [ -s $data/utt2num_frames ]; then echo "$0: $data/utt2num_frames already present!" exit 0; fi diff --git a/egs/wsj/s5/utils/data/subsegment_data_dir.sh b/egs/wsj/s5/utils/data/subsegment_data_dir.sh index 10a8a9cb264..4c664f16441 100755 --- a/egs/wsj/s5/utils/data/subsegment_data_dir.sh +++ b/egs/wsj/s5/utils/data/subsegment_data_dir.sh @@ -52,11 +52,11 @@ export LC_ALL=C srcdir=$1 subsegments=$2 -no_text=true +add_subsegment_text=false if [ $# -eq 4 ]; then new_text=$3 dir=$4 - no_text=false + add_subsegment_text=true if [ ! -f "$new_text" ]; then echo "$0: no such file $new_text" @@ -78,7 +78,7 @@ if ! mkdir -p $dir; then echo "$0: failed to create directory $dir" fi -if ! $no_text; then +if $add_subsegment_text; then if ! cmp <(awk '{print $1}' <$subsegments) <(awk '{print $1}' <$new_text); then echo "$0: expected the first fields of the files $subsegments and $new_text to be identical" exit 1 @@ -102,7 +102,7 @@ utils/apply_map.pl -f 2 $srcdir/utt2spk < $dir/new2old_utt >$dir/utt2spk # .. and the new spk2utt file. utils/utt2spk_to_spk2utt.pl <$dir/utt2spk >$dir/spk2utt -if ! $no_text; then +if $add_subsegment_text; then # the new text file is just what the user provides. cp $new_text $dir/text fi @@ -143,6 +143,10 @@ if [ -f $srcdir/feats.scp ]; then frame_shift=$(utils/data/get_frame_shift.sh $srcdir) echo "$0: note: frame shift is $frame_shift [affects feats.scp]" + utils/data/get_utt2num_frames.sh --cmd "run.pl" --nj 1 $srcdir + awk '{print $1" "$2}' $subsegments | \ + utils/apply_map.pl -f 2 $srcdir/utt2num_frames > \ + $dir/utt2max_frames # The subsegments format is . # e.g. 'utt_foo-1 utt_foo 7.21 8.93' @@ -165,10 +169,22 @@ if [ -f $srcdir/feats.scp ]; then # utt_foo-1 some command|[721:892] # Lastly, utils/data/normalize_data_range.pl will only do something nontrivial if # the original data-dir already had data-ranges in square brackets. - awk -v s=$frame_shift '{print $1, $2, int(($3/s)+0.5), int(($4/s)-0.5);}' <$subsegments| \ + cat $subsegments | awk -v s=$frame_shift '{print $1, $2, int(($3/s)+0.5), int(($4/s)-0.5);}' | \ utils/apply_map.pl -f 2 $srcdir/feats.scp | \ awk '{p=NF-1; for (n=1;n$dir/feats.scp + utils/data/normalize_data_range.pl | \ + utils/data/fix_subsegmented_feats.pl $dir/utt2max_frames >$dir/feats.scp + + cat $dir/feats.scp | perl -ne 'm/^(\S+) .+\[(\d+):(\d+)\]$/; print "$1 " . ($3-$2+1) . "\n"' > \ + $dir/utt2num_frames + + if [ -f $srcdir/vad.scp ]; then + cat $subsegments | awk -v s=$frame_shift '{print $1, $2, int(($3/s)+0.5), int(($4/s)-0.5);}' | \ + utils/apply_map.pl -f 2 $srcdir/vad.scp | \ + awk '{p=NF-1; for (n=1;n$dir/vad.scp + fi fi @@ -202,7 +218,7 @@ utils/data/fix_data_dir.sh $dir validate_opts= [ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats" [ ! -f $srcdir/wav.scp ] && validate_opts="$validate_opts --no-wav" -$no_text && validate_opts="$validate_opts --no-text" +! $add_subsegment_text && validate_opts="$validate_opts --no-text" utils/data/validate_data_dir.sh $validate_opts $dir diff --git a/src/featbin/copy-feats.cc b/src/featbin/copy-feats.cc index 0fbcca6399a..f1f58653f2f 100644 --- a/src/featbin/copy-feats.cc +++ b/src/featbin/copy-feats.cc @@ -102,19 +102,31 @@ int main(int argc, char *argv[]) { CompressedMatrixWriter kaldi_writer(wspecifier); if (htk_in) { SequentialTableReader htk_reader(rspecifier); - for (; !htk_reader.Done(); htk_reader.Next(), num_done++) + for (; !htk_reader.Done(); htk_reader.Next(), num_done++) { kaldi_writer.Write(htk_reader.Key(), CompressedMatrix(htk_reader.Value().first)); + if (!num_frames_wspecifier.empty()) + num_frames_writer.Write(htk_reader.Key(), + htk_reader.Value().first.NumRows()); + } } else if (sphinx_in) { SequentialTableReader > sphinx_reader(rspecifier); - for (; !sphinx_reader.Done(); sphinx_reader.Next(), num_done++) + for (; !sphinx_reader.Done(); sphinx_reader.Next(), num_done++) { kaldi_writer.Write(sphinx_reader.Key(), CompressedMatrix(sphinx_reader.Value())); + if (!num_frames_wspecifier.empty()) + num_frames_writer.Write(sphinx_reader.Key(), + sphinx_reader.Value().NumRows()); + } } else { SequentialBaseFloatMatrixReader kaldi_reader(rspecifier); - for (; !kaldi_reader.Done(); kaldi_reader.Next(), num_done++) + for (; !kaldi_reader.Done(); kaldi_reader.Next(), num_done++) { kaldi_writer.Write(kaldi_reader.Key(), CompressedMatrix(kaldi_reader.Value())); + if (!num_frames_wspecifier.empty()) + num_frames_writer.Write(kaldi_reader.Key(), + kaldi_reader.Value().NumRows()); + } } } KALDI_LOG << "Copied " << num_done << " feature matrices."; From 20f3072bf327eeff36923765be696863ba9cd715 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 24 Apr 2017 12:21:36 -0400 Subject: [PATCH 519/530] sad: xconfig stats layer --- egs/wsj/s5/steps/libs/nnet3/xconfig/layers.py | 1 - egs/wsj/s5/steps/libs/nnet3/xconfig/stats_layer.py | 9 ++++----- egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/layers.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/layers.py index 1092be572b4..188e0ec4322 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/layers.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/layers.py @@ -5,5 +5,4 @@ from basic_layers import * from lstm import * -from tdnn import * from stats_layer import * diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/stats_layer.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/stats_layer.py index beaf7c8923a..e49a4fa3df6 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/stats_layer.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/stats_layer.py @@ -6,7 +6,6 @@ from __future__ import print_function import re -from libs.nnet3.xconfig.utils import XconfigParserError as xparser_error from libs.nnet3.xconfig.basic_layers import XconfigLayerBase @@ -46,13 +45,13 @@ def set_default_configs(self): def set_derived_configs(self): config_string = self.config['config'] if config_string == '': - raise xparser_error("config has to be non-empty", + raise RuntimeError("config has to be non-empty", self.str()) m = re.search("(mean|mean\+stddev|mean\+count|mean\+stddev\+count)" "\((-?\d+):(-?\d+):(-?\d+):(-?\d+)\)", config_string) if m is None: - raise xparser_error("Invalid statistic-config string: {0}".format( + raise RuntimeError("Invalid statistic-config string: {0}".format( config_string), self) self._output_stddev = (m.group(1) in ['mean+stddev', @@ -69,7 +68,7 @@ def set_derived_configs(self): + 1 if self._output_log_counts else 0) if self.config['dim'] > 0 and self.config['dim'] != output_dim: - raise xparser_error( + raise RuntimeError( "Invalid dim supplied {0:d} != " "actual output dim {1:d}".format( self.config['dim'], output_dim)) @@ -81,7 +80,7 @@ def check_configs(self): and self._left_context % self._stats_period == 0 and self._right_context % self._stats_period == 0 and self._stats_period % self._input_period == 0): - raise xparser_error( + raise RuntimeError( "Invalid configuration of statistics-extraction: {0}".format( self.config['config']), self) super(XconfigStatsLayer, self).check_configs() diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py index 3d958568717..76477300884 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/utils.py @@ -484,7 +484,7 @@ def parse_config_line(orig_config_line): # treats splitting on space as a special case that may give zero fields. config_line = orig_config_line.split('#')[0] # Note: this set of allowed characters may have to be expanded in future. - x = re.search('[^a-zA-Z0-9\.\-\(\)@_=,/\s"]', config_line) + x = re.search('[^a-zA-Z0-9\.\-\(\)@_=,/+:\s"]', config_line) if x is not None: bad_char = x.group(0) if bad_char == "'": From ed129f1a17689f14c64095ddb3751e696f92619e Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 24 Apr 2017 18:46:00 -0400 Subject: [PATCH 520/530] sad: Make utt2num_frames default in feats extraction --- egs/wsj/s5/steps/make_mfcc.sh | 2 +- egs/wsj/s5/steps/make_mfcc_pitch.sh | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/egs/wsj/s5/steps/make_mfcc.sh b/egs/wsj/s5/steps/make_mfcc.sh index ddb63a0e6fb..5362e7fa9d9 100755 --- a/egs/wsj/s5/steps/make_mfcc.sh +++ b/egs/wsj/s5/steps/make_mfcc.sh @@ -10,7 +10,7 @@ nj=4 cmd=run.pl mfcc_config=conf/mfcc.conf compress=true -write_utt2num_frames=false # if true writes utt2num_frames +write_utt2num_frames=true # if true writes utt2num_frames # End configuration section. echo "$0 $@" # Print the command line for logging diff --git a/egs/wsj/s5/steps/make_mfcc_pitch.sh b/egs/wsj/s5/steps/make_mfcc_pitch.sh index ff9a7d2f5f3..4a2808b811f 100755 --- a/egs/wsj/s5/steps/make_mfcc_pitch.sh +++ b/egs/wsj/s5/steps/make_mfcc_pitch.sh @@ -96,6 +96,12 @@ for n in $(seq $nj); do utils/create_data_link.pl $mfcc_pitch_dir/raw_mfcc_pitch_$name.$n.ark done +if $write_utt2num_frames; then + write_num_frames_opt="--write-num-frames=ark,t:$logdir/utt2num_frames.JOB" +else + write_num_frames_opt= +fi + if [ -f $data/segments ]; then echo "$0 [info]: segments file exists: using that." split_segments="" @@ -111,7 +117,7 @@ if [ -f $data/segments ]; then $cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \ paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \ - copy-feats --compress=$compress ark:- \ + copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.scp \ || exit 1; @@ -129,7 +135,7 @@ else $cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \ paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \ - copy-feats --compress=$compress ark:- \ + copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_pitch_$name.JOB.scp \ || exit 1; @@ -147,6 +153,13 @@ for n in $(seq $nj); do cat $mfcc_pitch_dir/raw_mfcc_pitch_$name.$n.scp || exit 1; done > $data/feats.scp +if $write_utt2num_frames; then + for n in $(seq $nj); do + cat $logdir/utt2num_frames.$n || exit 1; + done > $data/utt2num_frames || exit 1 + rm $logdir/uttnum_frames.* +fi + rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null nf=`cat $data/feats.scp | wc -l` From ddf58d3e012416d81664fc6fbf240b3d8590947c Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 24 Apr 2017 18:54:03 -0400 Subject: [PATCH 521/530] segmenter: Update local recipes --- .../local/segmentation/prepare_babel_data.sh | 14 +++-- .../local/segmentation/prepare_fisher_data.sh | 8 ++- .../local/segmentation/prepare_unsad_data.sh | 52 ++++++------------- 3 files changed, 26 insertions(+), 48 deletions(-) diff --git a/egs/aspire/s5/local/segmentation/prepare_babel_data.sh b/egs/aspire/s5/local/segmentation/prepare_babel_data.sh index 927c530663d..e70dc216980 100644 --- a/egs/aspire/s5/local/segmentation/prepare_babel_data.sh +++ b/egs/aspire/s5/local/segmentation/prepare_babel_data.sh @@ -68,20 +68,18 @@ EOF # The original data directory which will be converted to a whole (recording-level) directory. utils/copy_data_dir.sh $ROOT_DIR/data/train data/babel_${lang_id}_train train_data_dir=data/babel_${lang_id}_train -speeds="0.9 1.0 1.1" -num_speeds=$(echo $speeds | awk '{print NF}') # Expecting the user to have done run.sh to have $model_dir, # $sat_model_dir, $lang, $lang_test, $train_data_dir local/segmentation/prepare_unsad_data.sh \ - --sad-map $dir/babel_sad.map --speeds "$speeds" \ + --sad-map $dir/babel_sad.map \ --config-dir $ROOT_DIR/conf --feat-type plp --add-pitch true \ --reco-nj 40 --nj 100 --cmd "$train_cmd" \ --sat-model-dir $sat_model_dir \ --lang-test $lang_test \ $train_data_dir $lang $model_dir $dir -orig_data_dir=${train_data_dir}_sp${num_speeds} +orig_data_dir=${train_data_dir}_sp data_dir=${train_data_dir}_whole @@ -92,16 +90,16 @@ if [ ! -z $subset ]; then data_dir=${data_dir}_$subset fi -reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp4 +reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp # Add noise from MUSAN corpus to data directory and create a new data directory -local/segmentation/do_corruption_data_dir.sh \ - --data-dir $data_dir --speeds "$speeds" \ +local/segmentation/do_corruption_data_dir_snr.sh \ + --data-dir $data_dir \ --reco-vad-dir $reco_vad_dir \ --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf # Add music from MUSAN corpus to data directory and create a new data directory local/segmentation/do_corruption_data_dir_music.sh \ - --data-dir $data_dir --speeds "$speeds" \ + --data-dir $data_dir \ --reco-vad-dir $reco_vad_dir \ --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf diff --git a/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh b/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh index d90dd05f472..40f43cfd442 100644 --- a/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh +++ b/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh @@ -56,7 +56,6 @@ oov_I 3 oov_S 3 EOF -true && { # Expecting the user to have done run.sh to have $model_dir, # $sat_model_dir, $lang, $lang_test, $train_data_dir local/segmentation/prepare_unsad_data.sh \ @@ -66,21 +65,20 @@ local/segmentation/prepare_unsad_data.sh \ --sat-model-dir $sat_model_dir \ --lang-test $lang_test \ $train_data_dir $lang $model_dir $dir -} data_dir=${train_data_dir}_whole if [ ! -z $subset ]; then # Work on a subset - true && utils/subset_data_dir.sh ${data_dir} $subset \ + false && utils/subset_data_dir.sh ${data_dir} $subset \ ${data_dir}_$subset data_dir=${data_dir}_$subset fi -reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp4 +reco_vad_dir=$dir/`basename $model_dir`_reco_vad_`basename $train_data_dir`_sp # Add noise from MUSAN corpus to data directory and create a new data directory -true && local/segmentation/do_corruption_data_dir.sh \ +local/segmentation/do_corruption_data_dir_snr.sh \ --data-dir $data_dir \ --reco-vad-dir $reco_vad_dir \ --feat-suffix hires_bp --mfcc-config conf/mfcc_hires_bp.conf diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh index dc4cbf58994..cccc7e2db84 100755 --- a/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh @@ -38,8 +38,6 @@ sat_model_dir= # Model directory used for getting alignments lang_test= # Language directory used to build graph. # If its not provided, $lang will be used instead. -speeds="0.9 1.0 1.1" - . utils/parse_options.sh if [ $# -ne 4 ]; then @@ -190,15 +188,13 @@ if [ $stage -le -2 ]; then utils/data/get_utt2dur.sh ${whole_data_dir} fi -num_speeds=`echo $speeds | awk '{print NF}'` if $speed_perturb; then - plpdir=${plpdir}_sp$num_speeds - mfccdir=${mfccdir}_sp$num_speeds - + plpdir=${plpdir}_sp + mfccdir=${mfccdir}_sp if [ $stage -le -1 ]; then - utils/data/perturb_data_dir_speed_${num_speeds}way.sh ${whole_data_dir} ${whole_data_dir}_sp${num_speeds} - utils/data/perturb_data_dir_speed_${num_speeds}way.sh ${data_dir} ${data_dir}_sp${num_speeds} + utils/data/perturb_data_dir_speed_3way.sh ${whole_data_dir} ${whole_data_dir}_sp + utils/data/perturb_data_dir_speed_3way.sh ${data_dir} ${data_dir}_sp if [ $feat_type == "mfcc" ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then @@ -208,9 +204,9 @@ if $speed_perturb; then make_mfcc --cmd "$cmd --max-jobs-run 40" --nj $nj \ --mfcc-config $feat_config \ --add-pitch $add_pitch --pitch-config $pitch_config \ - ${whole_data_dir}_sp${num_speeds} exp/make_mfcc $mfccdir || exit 1 + ${whole_data_dir}_sp exp/make_mfcc $mfccdir || exit 1 steps/compute_cmvn_stats.sh \ - ${whole_data_dir}_sp${num_speeds} exp/make_mfcc $mfccdir || exit 1 + ${whole_data_dir}_sp exp/make_mfcc $mfccdir || exit 1 elif [ $feat_type == "plp" ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $plpdir/storage ]; then utils/create_split_dir.pl \ @@ -220,20 +216,20 @@ if $speed_perturb; then make_plp --cmd "$cmd --max-jobs-run 40" --nj $nj \ --plp-config $feat_config \ --add-pitch $add_pitch --pitch-config $pitch_config \ - ${whole_data_dir}_sp${num_speeds} exp/make_plp $plpdir || exit 1 + ${whole_data_dir}_sp exp/make_plp $plpdir || exit 1 steps/compute_cmvn_stats.sh \ - ${whole_data_dir}_sp${num_speeds} exp/make_plp $plpdir || exit 1 + ${whole_data_dir}_sp exp/make_plp $plpdir || exit 1 else echo "$0: Unknown feat-type $feat_type. Must be mfcc or plp." exit 1 fi - utils/fix_data_dir.sh ${whole_data_dir}_sp${num_speeds} + utils/fix_data_dir.sh ${whole_data_dir}_sp fi - data_dir=${data_dir}_sp${num_speeds} - whole_data_dir=${whole_data_dir}_sp${num_speeds} - data_id=${data_id}_sp${num_speeds} + data_dir=${data_dir}_sp + whole_data_dir=${whole_data_dir}_sp + data_id=${data_id}_sp fi @@ -241,18 +237,9 @@ fi # Compute length of recording ############################################################################### -utils/data/get_reco2utt.sh $data_dir - if [ $stage -le 0 ]; then - utils/data/get_utt2num_frames.sh \ - --frame-shift $frame_shift --frame-overlap $frame_overlap \ - --cmd "$cmd" --nj $reco_nj $whole_data_dir - - awk '{print $1" "$2}' ${data_dir}/segments | utils/apply_map.pl -f 2 ${whole_data_dir}/utt2num_frames > $data_dir/utt2max_frames - utils/data/get_subsegmented_feats.sh ${whole_data_dir}/feats.scp \ - $frame_shift $frame_overlap ${data_dir}/segments | \ - utils/data/fix_subsegmented_feats.pl $data_dir/utt2max_frames \ - > ${data_dir}/feats.scp + utils/subsegment_data_dir.sh $whole_data_dir ${data_dir}/segments ${data_dir}/tmp + cp $data_dir/tmp/feats.scp $data_dir if [ $feat_type == mfcc ]; then steps/compute_cmvn_stats.sh ${data_dir} exp/make_mfcc/${data_id} $mfccdir @@ -380,14 +367,9 @@ fi if [ $stage -le 6 ]; then - utils/data/get_reco2utt.sh $outside_data_dir - awk '{print $1" "$2}' $outside_data_dir/segments | utils/apply_map.pl -f 2 $whole_data_dir/utt2num_frames > $outside_data_dir/utt2max_frames - - utils/data/get_subsegmented_feats.sh ${whole_data_dir}/feats.scp \ - $frame_shift $frame_overlap ${outside_data_dir}/segments | \ - utils/data/fix_subsegmented_feats.pl $outside_data_dir/utt2max_frames \ - > ${outside_data_dir}/feats.scp - + utils/data/subsegment_data_dir.sh $whole_data_dir $outside_data_dir/segments \ + $outside_data_dir/tmp + cp $outside_data_dir/tmp/feats.scp $outside_data_dir fi extended_data_dir=$dir/${data_id}_extended From ddc85cf2108f769df3ce5b6117ab68490f36b892 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 24 Apr 2017 21:25:19 -0400 Subject: [PATCH 522/530] segmenter: Adding some missing files --- .../segmentation/prepare_unsad_data_simple.sh | 114 ++++++++++++ .../internal/make_bigram_G_fst.py | 174 ++++++++++++++++++ 2 files changed, 288 insertions(+) create mode 100755 egs/aspire/s5/local/segmentation/prepare_unsad_data_simple.sh create mode 100755 egs/wsj/s5/steps/segmentation/internal/make_bigram_G_fst.py diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_data_simple.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_data_simple.sh new file mode 100755 index 00000000000..f3d1a7707e8 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_data_simple.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +# This script prepares speech labels for +# training unsad network for speech activity detection and music detection. +# This is similar to the script prepare_unsad_data.sh, but directly +# uses existing alignments to create labels, instead of creating new alignments. + +set -e +set -o pipefail +set -u + +. path.sh + +stage=-2 +cmd=queue.pl + +# Options to be passed to get_sad_map.py +map_noise_to_sil=true # Map noise phones to silence label (0) +map_unk_to_speech=true # Map unk phones to speech label (1) +sad_map= # Initial mapping from phones to speech/non-speech labels. + # Overrides the default mapping using phones/silence.txt + # and phones/nonsilence.txt + +. utils/parse_options.sh + +if [ $# -ne 4 ]; then + echo "This script takes a data directory and alignment directory and " + echo "converts it into speech activity labels" + echo "for the purpose of training a Universal Speech Activity Detector.\n" + echo "Usage: $0 [options] " + echo " e.g.: $0 data/train_100k data/lang exp/tri4a_ali exp/vad_data_prep" + echo "" + echo "Main options (for others, see top of script file)" + echo " --config # config file containing options" + echo " --cmd (run.pl|/queue.pl ) # how to run jobs." + exit 1 +fi + +data_dir=$1 +lang=$2 +ali_dir=$3 +dir=$4 + +extra_files= + +for f in $data_dir/feats.scp $lang/phones.txt $lang/phones/silence.txt $lang/phones/nonsilence.txt $sad_map $ali_dir/ali.1.gz $ali_dir/final.mdl $ali_dir/tree $extra_files; do + if [ ! -f $f ]; then + echo "$f could not be found" + exit 1 + fi +done + +mkdir -p $dir + +data_id=$(basename $data_dir) + +if [ $stage -le 0 ]; then + # Get a mapping from the phones to the speech / non-speech labels + steps/segmentation/get_sad_map.py \ + --init-sad-map="$sad_map" \ + --map-noise-to-sil=$map_noise_to_sil \ + --map-unk-to-speech=$map_unk_to_speech \ + $lang | utils/sym2int.pl -f 1 $lang/phones.txt > $dir/sad_map +fi + +############################################################################### +# Convert alignment into SAD labels at utterance-level in segmentation format +############################################################################### + +vad_dir=$dir/`basename ${ali_dir}`_vad_${data_id} + +# Convert relative path to full path +vad_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir;' $vad_dir ${PWD}` + +if [ $stage -le 1 ]; then + steps/segmentation/internal/convert_ali_to_vad.sh --cmd "$cmd" \ + $ali_dir $dir/sad_map $vad_dir +fi + +[ ! -s $vad_dir/sad_seg.scp ] && echo "$0: $vad_dir/sad_seg.scp is empty" && exit 1 + +############################################################################### +# Post-process the segmentation and create frame-level alignments and +# per-frame deriv weights. +############################################################################### + +if [ $stage -le 2 ]; then + # Create per-frame speech / non-speech labels. + nj=`cat $vad_dir/num_jobs` + + utils/data/get_utt2num_frames.sh --nj $nj --cmd "$cmd" $data_dir + + set +e + for n in `seq $nj`; do + utils/create_data_link.pl $vad_dir/speech_labels.$n.ark + done + set -e + + $cmd JOB=1:$nj $vad_dir/log/get_speech_labels.JOB.log \ + segmentation-copy --keep-label=1 scp:$vad_dir/sad_seg.JOB.scp ark:- \| \ + segmentation-to-ali --lengths-rspecifier=ark,t:$data_dir/utt2num_frames \ + ark:- ark,scp:$vad_dir/speech_labels.JOB.ark,$vad_dir/speech_labels.JOB.scp + + for n in `seq $nj`; do + cat $vad_dir/speech_labels.$n.scp + done > $vad_dir/speech_labels.scp + + cp $vad_dir/speech_labels.scp $data_dir +fi + +echo "$0: Finished creating corpus for training Universal SAD with data in $data_dir and labels in $vad_dir" diff --git a/egs/wsj/s5/steps/segmentation/internal/make_bigram_G_fst.py b/egs/wsj/s5/steps/segmentation/internal/make_bigram_G_fst.py new file mode 100755 index 00000000000..2431d293c4c --- /dev/null +++ b/egs/wsj/s5/steps/segmentation/internal/make_bigram_G_fst.py @@ -0,0 +1,174 @@ +#! /usr/bin/env python + +from __future__ import print_function +import argparse +import logging +import math + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s [%(filename)s:%(lineno)s - " + "%(funcName)s - %(levelname)s ] %(message)s") +handler.setFormatter(formatter) +logger.addHandler(handler) + + +def get_args(): + parser = argparse.ArgumentParser( + description="""This script generates a bigram G.fst lang for decoding. + It needs as an input classes_info file with the format: + , + where each pair is :. + destination-class -1 is used to represent final probabilitiy.""") + + parser.add_argument("classes_info", type=argparse.FileType('r'), + help="File with classes_info") + parser.add_argument("out_file", type=argparse.FileType('w'), + help="Output G.fst. Use '-' for stdout") + args = parser.parse_args() + return args + + +class ClassInfo(object): + def __init__(self, class_id): + self.class_id = class_id + self.start_state = -1 + self.initial_prob = 0 + self.transitions = {} + + def __str__(self): + return ("class-id={0},start-state={1}," + "initial-prob={2:.2f},transitions={3}".format( + self.class_id, self.start_state, + self.initial_prob, ' '.join( + ['{0}:{1}'.format(x, y) + for x, y in self.transitions.iteritems()]))) + + +def read_classes_info(file_handle): + classes_info = {} + + num_states = 1 + num_classes = 0 + + for line in file_handle.readlines(): + try: + parts = line.split() + class_id = int(parts[0]) + assert class_id > 0, class_id + if class_id in classes_info: + raise RuntimeError( + "Duplicate class-id {0} in file {1}".format( + class_id, file_handle.name)) + + classes_info[class_id] = ClassInfo(class_id) + class_info = classes_info[class_id] + class_info.initial_prob = float(parts[1]) + class_info.start_state = num_states + num_states += 1 + num_classes += 1 + + total_prob = 0.0 + if len(parts) > 2: + for part in parts[2:]: + dest_class, transition_prob = part.split(':') + dest_class = int(dest_class) + total_prob += float(transition_prob) + + if total_prob > 1.0: + raise ValueError("total-probability out of class {0} " + "is {1} > 1.0".format(class_id, + total_prob)) + + if dest_class in class_info.transitions: + logger.error( + "Duplicate transition to class-id {0}" + "in transitions".format(dest_class)) + raise RuntimeError + class_info.transitions[dest_class] = float(transition_prob) + + if -1 in class_info.transitions: + if abs(total_prob - 1.0) > 0.001: + raise ValueError("total-probability out of class {0} " + "is {1} != 1.0".format(class_id, + total_prob)) + else: + class_info.transitions[-1] = 1.0 - total_prob + else: + raise RuntimeError( + "No transitions out of class {0}".format(class_id)) + except Exception: + logger.error("Error processing line %s in file %s", + line, file_handle.name) + raise + + # Final state + classes_info[-1] = ClassInfo(-1) + class_info = classes_info[-1] + class_info.start_state = num_states + + for class_id, class_info in classes_info.iteritems(): + logger.info("For class %d, got class-info %s", class_id, class_info) + + return classes_info, num_classes + + +def print_states_for_class(class_id, classes_info, out_file): + class_info = classes_info[class_id] + + state = class_info.start_state + + # Transition from the FST initial state + print ("0 {end} {logprob}".format( + end=state, logprob=-math.log(class_info.initial_prob)), + file=out_file) + + for dest_class, prob in class_info.transitions.iteritems(): + try: + if dest_class == class_id: # self loop + next_state = state + else: # other transition + next_state = classes_info[dest_class].start_state + + print ("{start} {end} {class_id} {class_id} {logprob}".format( + start=state, end=next_state, class_id=class_id, + logprob=-math.log(prob)), + file=out_file) + + except Exception: + logger.error("Failed to add transition (%d->%d).\n" + "classes_info = %s", class_id, dest_class, + class_info) + + print ("{start} {final} {class_id} {class_id}".format( + start=state, final=classes_info[-1].start_state, + class_id=class_id), + file=out_file) + print ("{0}".format(classes_info[-1].start_state), file=out_file) + + +def run(args): + classes_info, num_classes = read_classes_info(args.classes_info) + + for class_id in range(1, num_classes + 1): + print_states_for_class(class_id, classes_info, args.out_file) + + +def main(): + try: + args = get_args() + run(args) + except Exception: + logger.error("Failed to make G.fst") + raise + finally: + for f in [args.classes_info, args.out_file]: + if f is not None: + f.close() + + +if __name__ == '__main__': + main() From c90097e0011d3ae84b0c804e9141c02516a1d424 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 24 Apr 2017 21:25:43 -0400 Subject: [PATCH 523/530] segmenter: resample data directory --- egs/wsj/s5/utils/data/resample_data_dir.sh | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 egs/wsj/s5/utils/data/resample_data_dir.sh diff --git a/egs/wsj/s5/utils/data/resample_data_dir.sh b/egs/wsj/s5/utils/data/resample_data_dir.sh new file mode 100755 index 00000000000..8781ee4c503 --- /dev/null +++ b/egs/wsj/s5/utils/data/resample_data_dir.sh @@ -0,0 +1,35 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0. + +if [ $# -ne 2 ]; then + echo "Usage: $0 " + exit 1 +fi + +freq=$1 +dir=$2 + +sox=`which sox` || { echo "Could not find sox in PATH"; exit 1; } + +if [ -f $dir/feats.scp ]; then + mkdir -p $dir/.backup + mv $dir/feats.scp $dir/.backup/ + if [ -f $dir/cmvn.scp ]; then + mv $dir/cmvn.scp $dir/.backup/ + fi + echo "$0: feats.scp already exists. Moving it to $dir/.backup" +fi + +mv $dir/wav.scp $dir/wav.scp.tmp +cat $dir/wav.scp.tmp | python -c "import sys +for line in sys.stdin.readlines(): + splits = line.strip().split() + if splits[-1] == '|': + out_line = line.strip() + ' $sox -t wav - -c 1 -b 16 -t wav - rate $freq |' + else: + out_line = 'cat {0} {1} | $sox -t wav - -c 1 -b 16 -t wav - rate $freq |'.format(splits[0], ' '.join(splits[1:])) + print (out_line)" > ${dir}/wav.scp +rm $dir/wav.scp.tmp + From 3ad4355a73d2e74962e95921a8dbe410e2f06a85 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 24 Apr 2017 21:35:33 -0400 Subject: [PATCH 524/530] segmenter: Updating major scripts --- .../segmentation/do_segmentation_data_dir.sh | 10 +-- .../do_segmentation_data_dir_simple.sh | 73 ++----------------- 2 files changed, 12 insertions(+), 71 deletions(-) diff --git a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh index 9e95cca9cc0..2117dc2d939 100755 --- a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh +++ b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir.sh @@ -130,11 +130,11 @@ if [ $stage -le 4 ]; then utils/apply_map.pl -f 2 ${test_data_dir}/reco2num_frames > \ ${data_dir}_seg/utt2max_frames - frame_shift_info=`cat $mfcc_config | steps/segmentation/get_frame_shift_info_from_config.pl` - utils/data/get_subsegment_feats.sh ${test_data_dir}/feats.scp \ - $frame_shift_info ${data_dir}_seg/segments | \ - utils/data/fix_subsegmented_feats.pl ${data_dir}_seg/utt2max_frames > \ - ${data_dir}_seg/feats.scp + #frame_shift_info=`cat $mfcc_config | steps/segmentation/get_frame_shift_info_from_config.pl` + #utils/data/get_subsegment_feats.sh ${test_data_dir}/feats.scp \ + # $frame_shift_info ${data_dir}_seg/segments | \ + # utils/data/fix_subsegmented_feats.pl ${data_dir}_seg/utt2max_frames > \ + # ${data_dir}_seg/feats.scp steps/compute_cmvn_stats.sh --fake ${data_dir}_seg utils/fix_data_dir.sh ${data_dir}_seg diff --git a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh index cd4f36ded6b..7211b6b7084 100755 --- a/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh +++ b/egs/wsj/s5/steps/segmentation/do_segmentation_data_dir_simple.sh @@ -14,7 +14,10 @@ nj=32 # works on recordings as against on speakers mfcc_config=conf/mfcc_hires_bp.conf feat_affix=bp # Affix for the type of feature used -skip_output_computation=false +convert_data_dir_to_whole=true + +# Set to true if the test data has > 8kHz sampling frequency. +do_downsampling=false stage=-1 sad_stage=-1 @@ -32,16 +35,12 @@ extra_right_context=0 frame_subsampling_factor=1 # Subsampling at the output -transition_scale=1.0 +transition_scale=3.0 loopscale=0.1 acwt=1.0 -# Set to true if the test data has > 8kHz sampling frequency. -do_downsampling=false - # Segmentation configs segmentation_config=conf/segmentation_speech.conf -convert_data_dir_to_whole=true echo $* @@ -82,10 +81,12 @@ if $convert_data_dir_to_whole; then utils/data/downsample_data_dir.sh $freq $whole_data_dir fi + rm -r ${test_data_dir} || true utils/copy_data_dir.sh ${whole_data_dir} $test_data_dir fi else if [ $stage -le 0 ]; then + rm -r ${test_data_dir} || true utils/copy_data_dir.sh $src_data_dir $test_data_dir if $do_downsampling; then @@ -179,63 +180,3 @@ if [ $stage -le 7 ]; then cp $src_data_dir/wav.scp ${data_dir}_seg fi - -exit 0 - -segments_opts="--single-speaker" - -if false; then - mkdir -p ${seg_dir}/post_process_${data_id} - echo $nj > ${seg_dir}/post_process_${data_id}/num_jobs - - $train_cmd JOB=1:$nj $seg_dir/log/convert_to_segments.JOB.log \ - segmentation-init-from-ali "ark:gunzip -c $seg_dir/ali.JOB.gz |" ark:- \| \ - segmentation-copy --label-map=$lang/phone2sad_map --frame-subsampling-factor=$frame_subsampling_factor ark:- ark:- \| \ - segmentation-to-segments --frame-overlap=0.02 $segments_opts ark:- \ - ark,t:${seg_dir}/post_process_${data_id}/utt2spk.JOB \ - ${seg_dir}/post_process_${data_id}/segments.JOB - - for n in `seq $nj`; do - cat ${seg_dir}/post_process_${data_id}/segments.$n - done > ${seg_dir}/post_process_${data_id}/segments - - for n in `seq $nj`; do - cat ${seg_dir}/post_process_${data_id}/utt2spk.$n - done > ${seg_dir}/post_process_${data_id}/utt2spk - - rm -r ${data_dir}_seg || true - mkdir -p ${data_dir}_seg - - utils/data/subsegment_data_dir.sh ${test_data_dir} \ - ${seg_dir}/post_process_${data_id}/segments ${data_dir}_seg - - cp ${src_data_dir}/wav.scp ${data_dir}_seg - cp ${seg_dir}/post_process_${data_id}/utt2spk ${data_dir}_seg - for f in stm glm reco2file_and_channel; do - [ -f $src_data_dir/$f ] && cp ${src_data_dir}/$f ${data_dir}_seg - done - - rm ${data_dir}/{cmvn.scp,spk2utt} || true - utils/fix_data_dir.sh ${data_dir}_seg -fi - -exit 0 - -# Subsegment data directory -if [ $stage -le 8 ]; then - utils/data/get_reco2num_frames.sh ${test_data_dir} - awk '{print $1" "$2}' ${data_dir}_seg/segments | \ - utils/apply_map.pl -f 2 ${test_data_dir}/reco2num_frames > \ - ${data_dir}_seg/utt2max_frames - - frame_shift_info=`cat $mfcc_config | steps/segmentation/get_frame_shift_info_from_config.pl` - utils/data/get_subsegment_feats.sh ${test_data_dir}/feats.scp \ - $frame_shift_info ${data_dir}_seg/segments | \ - utils/data/fix_subsegmented_feats.pl ${data_dir}_seg/utt2max_frames > \ - ${data_dir}_seg/feats.scp - steps/compute_cmvn_stats.sh --fake ${data_dir}_seg - - utils/fix_data_dir.sh ${data_dir}_seg -fi - - From 1dd03c71b7b4cffe7cd2be857a210a8b726d2c82 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Mon, 24 Apr 2017 21:36:39 -0400 Subject: [PATCH 525/530] segmenter: snr preparation --- .../do_corruption_data_dir_snr.sh | 236 ++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100755 egs/aspire/s5/local/segmentation/do_corruption_data_dir_snr.sh diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_snr.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_snr.sh new file mode 100755 index 00000000000..19b4036c9aa --- /dev/null +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_snr.sh @@ -0,0 +1,236 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +set -e +set -u +set -o pipefail + +. path.sh + +stage=0 +corruption_stage=-10 +corrupt_only=false + +# Data options +data_dir=data/train_si284 # Expecting whole data directory. +speed_perturb=true +num_data_reps=5 # Number of corrupted versions +snrs="20:10:15:5:0:-5" +foreground_snrs="20:10:15:5:0:-5" +background_snrs="20:10:15:5:2:0:-2:-5" +base_rirs=simulated +speeds="0.9 1.0 1.1" +resample_data_dir=false + +# Parallel options +reco_nj=40 +cmd=queue.pl + +# Options for feature extraction +mfcc_config=conf/mfcc_hires_bp.conf +feat_suffix=hires_bp + +reco_vad_dir= # Output of prepare_unsad_data.sh. + # If provided, the speech labels and deriv weights will be + # copied into the output data directory. + +. utils/parse_options.sh + +if [ $# -ne 0 ]; then + echo "Usage: $0" + exit 1 +fi + +data_id=`basename ${data_dir}` + +rvb_opts=() +if [ "$base_rirs" == "simulated" ]; then + # This is the config for the system using simulated RIRs and point-source noises + rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") + rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") + rvb_opts+=(--noise-set-parameters "0.1, RIRS_NOISES/pointsource_noises/background_noise_list") + rvb_opts+=(--noise-set-parameters "0.9, RIRS_NOISES/pointsource_noises/foreground_noise_list") +else + # This is the config for the JHU ASpIRE submission system + rvb_opts+=(--rir-set-parameters "1.0, RIRS_NOISES/real_rirs_isotropic_noises/rir_list") + rvb_opts+=(--noise-set-parameters RIRS_NOISES/real_rirs_isotropic_noises/noise_list) +fi + +if $resample_data_dir; then + sample_frequency=`cat $mfcc_config | perl -ne 'if (m/--sample-frequency=(\S+)/) { print $1; }'` + if [ -z "$sample_frequency" ]; then + sample_frequency=16000 + fi + + utils/data/resample_data_dir.sh $sample_frequency ${data_dir} || exit 1 + data_id=`basename ${data_dir}` + rvb_opts+=(--source-sampling-rate=$sample_frequency) +fi + +corrupted_data_id=${data_id}_corrupted +clean_data_id=${data_id}_clean +noise_data_id=${data_id}_noise + +if [ $stage -le 1 ]; then + python steps/data/reverberate_data_dir.py \ + "${rvb_opts[@]}" \ + --prefix="rev" \ + --foreground-snrs=$foreground_snrs \ + --background-snrs=$background_snrs \ + --speech-rvb-probability=1 \ + --pointsource-noise-addition-probability=1 \ + --isotropic-noise-addition-probability=1 \ + --num-replications=$num_data_reps \ + --max-noises-per-minute=2 \ + --output-additive-noise-dir=data/${noise_data_id} \ + --output-reverb-dir=data/${clean_data_id} \ + data/${data_id} data/${corrupted_data_id} +fi + +corrupted_data_dir=data/${corrupted_data_id} +clean_data_dir=data/${clean_data_id} +noise_data_dir=data/${noise_data_id} + +if $speed_perturb; then + if [ $stage -le 2 ]; then + ## Assuming whole data directories + for x in $corrupted_data_dir $clean_data_dir $noise_data_dir; do + cp $x/reco2dur $x/utt2dur + utils/data/perturb_data_dir_speed_random.sh --speeds "$speeds" $x ${x}_spr + done + fi + + corrupted_data_dir=${corrupted_data_dir}_spr + clean_data_dir=${clean_data_dir}_spr + noise_data_dir=${noise_data_dir}_spr + corrupted_data_id=${corrupted_data_id}_spr + clean_data_id=${clean_data_id}_spr + noise_data_id=${noise_data_id}_spr + + if [ $stage -le 3 ]; then + utils/data/perturb_data_dir_volume.sh --scale-low 0.03125 --scale-high 2 \ + ${corrupted_data_dir} + utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${clean_data_dir} + utils/data/perturb_data_dir_volume.sh --reco2vol ${corrupted_data_dir}/reco2vol ${noise_data_dir} + fi +fi + +if $corrupt_only; then + echo "$0: Got corrupted data directory in ${corrupted_data_dir}" + exit 0 +fi + +mfccdir=`basename $mfcc_config` +mfccdir=${mfccdir%%.conf} + +if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage +fi + +if [ $stage -le 4 ]; then + utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$cmd" --nj $reco_nj \ + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir + steps/compute_cmvn_stats.sh --fake \ + $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir +else + corrupted_data_dir=${corrupted_data_dir}_$feat_suffix +fi + +if [ $stage -le 5 ]; then + utils/copy_data_dir.sh $clean_data_dir ${clean_data_dir}_$feat_suffix + clean_data_dir=${clean_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$cmd" --nj $reco_nj \ + $clean_data_dir exp/make_${feat_suffix}/${clean_data_id} $mfccdir + steps/compute_cmvn_stats.sh --fake \ + $clean_data_dir exp/make_${feat_suffix}/${clean_data_id} $mfccdir +else + clean_data_dir=${clean_data_dir}_$feat_suffix +fi + +if [ $stage -le 6 ]; then + utils/copy_data_dir.sh $noise_data_dir ${noise_data_dir}_$feat_suffix + noise_data_dir=${noise_data_dir}_$feat_suffix + steps/make_mfcc.sh --mfcc-config $mfcc_config \ + --cmd "$cmd" --nj $reco_nj \ + $noise_data_dir exp/make_${feat_suffix}/${noise_data_id} $mfccdir + steps/compute_cmvn_stats.sh --fake \ + $noise_data_dir exp/make_${feat_suffix}/${noise_data_id} $mfccdir +else + noise_data_dir=${noise_data_dir}_$feat_suffix +fi + +targets_dir=irm_targets +if [ $stage -le 7 ]; then + mkdir -p exp/make_log_snr/${corrupted_data_id} + + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $targets_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$targets_dir/storage $targets_dir/storage + fi + + idct_params=`cat $mfcc_config | perl -e ' + $num_mel_bins = 23; $num_ceps = 13; $cepstral_lifter = 22.0; + while (<>) { + chomp; + s/#.+//g; + if (m/^\s*$/) { next; } + if (m/--num-mel-bins=(\S+)/) { + $num_mel_bins = $1; + } elsif (m/--num-ceps=(\S+)/) { + $num_ceps = $1; + } elsif (m/--cepstral-lifter=(\S+)/) { + $cepstral_lifter = $1; + } + } + print "$num_mel_bins $num_ceps $cepstral_lifter";'` + + num_filters=`echo $idct_params | awk '{print $1}'` + num_ceps=`echo $idct_params | awk '{print $2}'` + cepstral_lifter=`echo $idct_params | awk '{print $3}'` + echo "$num_filters $num_ceps $cepstral_lifter" + + mkdir -p exp/make_irm_targets/$corrupted_data_id + utils/data/get_dct_matrix.py --get-idct-matrix=true \ + --num-filters=$num_filters --num-ceps=$num_ceps \ + --cepstral-lifter=$cepstral_lifter \ + exp/make_irm_targets/$corrupted_data_id/idct_matrix + + # Get log-SNR targets + steps/segmentation/make_snr_targets.sh \ + --nj $reco_nj --cmd "$cmd" \ + --target-type Irm --compress false \ + --transform-matrix exp/make_irm_targets/$corrupted_data_id/idct_matrix \ + ${clean_data_dir} ${noise_data_dir} ${corrupted_data_dir} \ + exp/make_irm_targets/${corrupted_data_id} $targets_dir +fi + + +if [ $stage -le 8 ]; then + if [ ! -z "$reco_vad_dir" ]; then + if [ ! -f $reco_vad_dir/speech_labels.scp ]; then + echo "$0: Could not find file $reco_vad_dir/speech_labels.scp" + exit 1 + fi + + cat $reco_vad_dir/speech_labels.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ + sort -k1,1 > ${corrupted_data_dir}/speech_labels.scp + + cat $reco_vad_dir/deriv_weights.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ + sort -k1,1 > ${corrupted_data_dir}/deriv_weights.scp + + cat $reco_vad_dir/deriv_weights_manual_seg.scp | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ + sort -k1,1 > ${corrupted_data_dir}/deriv_weights_for_irm_targets.scp + fi +fi + +exit 0 From c85d1617d3487935c0f525059d910862d8b4c948 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Tue, 25 Apr 2017 03:23:56 -0400 Subject: [PATCH 526/530] segmenter: Temporary fix for nnet3 computation --- src/nnet3/nnet-optimize-utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nnet3/nnet-optimize-utils.cc b/src/nnet3/nnet-optimize-utils.cc index 60ec93f3f18..72f4147931b 100644 --- a/src/nnet3/nnet-optimize-utils.cc +++ b/src/nnet3/nnet-optimize-utils.cc @@ -2523,7 +2523,7 @@ void ComputationExpander::ExpandRowRangesCommand( num_rows_new = expanded_computation_->submatrices[s1].num_rows; KALDI_ASSERT(static_cast(c_in.arg3) < computation_.indexes_ranges.size()); - KALDI_ASSERT(num_rows_old % 2 == 0); + //KALDI_ASSERT(num_rows_old % 2 == 0); int32 num_n_values = num_n_values_; From 33c6ea47e770a5ec508203c551082c734e49b817 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 26 Apr 2017 16:43:55 -0400 Subject: [PATCH 527/530] SAD: More tuning recipes --- .../tuning/train_lstm_sad_music_1g.sh | 291 ++++++++++++++++ .../tuning/train_lstm_sad_music_1h.sh | 291 ++++++++++++++++ .../tuning/train_lstm_sad_music_1i.sh | 308 +++++++++++++++++ .../tuning/train_lstm_sad_music_snr_1h.sh | 306 +++++++++++++++++ .../tuning/train_lstm_sad_music_snr_1i.sh | 315 +++++++++++++++++ .../tuning/train_lstm_sad_music_snr_1j.sh | 312 +++++++++++++++++ .../tuning/train_lstm_sad_music_snr_1k.sh | 316 ++++++++++++++++++ .../tuning/train_stats_sad_music_snr_1h.sh | 310 +++++++++++++++++ .../tuning/train_stats_sad_music_snr_1i.sh | 310 +++++++++++++++++ 9 files changed, 2759 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1g.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1h.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1i.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1h.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1i.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1j.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1k.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1h.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1i.sh diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1g.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1g.sh new file mode 100644 index 00000000000..eea5956e005 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1g.sh @@ -0,0 +1,291 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1g + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + +cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + #--targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $[num_chunk_per_minibatch * 4] \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1h.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1h.sh new file mode 100644 index 00000000000..d9e1966bf6a --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1h.sh @@ -0,0 +1,291 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1h + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + +cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + +utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn3 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + #--targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_manual_seg.scp" \ + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1i.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1i.sh new file mode 100644 index 00000000000..be568eefd97 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_1i.sh @@ -0,0 +1,308 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1i + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1h.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1h.sh new file mode 100644 index 00000000000..ae85a93a7fc --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1h.sh @@ -0,0 +1,306 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=40 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1h + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn3 input=Append(-12,0,12) dim=$relu_dim + relu-renorm-layer name=tdnn3-snr input=Append(lstm1@-12,lstm1@0,lstm1@12,tdnn3) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn3 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn3 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn3 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn3-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1i.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1i.sh new file mode 100644 index 00000000000..b6c43a92992 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1i.sh @@ -0,0 +1,315 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1i + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(lstm1@-6,lstm1@0,lstm1@6,lstm1@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1j.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1j.sh new file mode 100644 index 00000000000..bf397565148 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1j.sh @@ -0,0 +1,312 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1i, but removes the speech-music output. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1j + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(lstm1@-6,lstm1@0,lstm1@6,lstm1@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1k.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1k.sh new file mode 100644 index 00000000000..cb585523f74 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1k.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1k + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(lstm2@-6,lstm2@0,lstm2@6,lstm2@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1h.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1h.sh new file mode 100644 index 00000000000..e585f27e5fd --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1h.sh @@ -0,0 +1,310 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=79 +extra_right_context=11 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1h + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music_snr/nnet_tdnn_stats +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + stats-layer name=tdnn2_stats config=mean+count(-108:6:18:108) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-12,tdnn2@0,tdnn2@12,tdnn2_stats) dim=$relu_dim + stats-layer name=tdnn3_stats config=mean+count(-108:12:36:108) + relu-renorm-layer name=tdnn4 input=Append(tdnn3@-12,tdnn3@0,tdnn3@12,tdnn3_stats) dim=$relu_dim + relu-renorm-layer name=tdnn4-snr input=Append(tdnn3@-12,tdnn3@0,tdnn3@12,tdnn3_stats) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn4 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn4 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn4 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn4-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1i.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1i.sh new file mode 100644 index 00000000000..3ddcdd795db --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1i.sh @@ -0,0 +1,310 @@ +#!/bin/bash + +# This is a script to train a time-delay neural network for speech activity detection (SAD) and +# music-id using statistic pooling component for long-context information. +# This script is same as 1c, but uses larger amount of data. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=79 +extra_right_context=11 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1i + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil,amharic}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil,amharic}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music_snr/nnet_tdnn_stats +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-3,-2,-1,0,1,2,3) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-6,0,6) dim=$relu_dim + stats-layer name=tdnn2_stats config=mean+count(-108:6:18:108) + relu-renorm-layer name=tdnn3 input=Append(tdnn2@-12,tdnn2@0,tdnn2@12,tdnn2_stats) dim=$relu_dim + stats-layer name=tdnn3_stats config=mean+count(-108:12:36:108) + relu-renorm-layer name=tdnn4 input=Append(tdnn3@-12,tdnn3@0,tdnn3@12,tdnn3_stats) dim=$relu_dim + relu-renorm-layer name=tdnn4-snr input=Append(tdnn3@-12,tdnn3@0,tdnn3@12,tdnn3_stats) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn4 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn4 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn4 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn4-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 2 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 4 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 5 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + + From cbe647770466fd2acc5acdb2a9a032458437474a Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 27 Apr 2017 22:26:47 -0400 Subject: [PATCH 528/530] SAD: prepare musan music --- .../local/segmentation/prepare_musan_music.sh | 24 ++++ egs/wsj/s5/steps/data/split_wavs_randomly.py | 114 ++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/prepare_musan_music.sh create mode 100755 egs/wsj/s5/steps/data/split_wavs_randomly.py diff --git a/egs/aspire/s5/local/segmentation/prepare_musan_music.sh b/egs/aspire/s5/local/segmentation/prepare_musan_music.sh new file mode 100644 index 00000000000..16fb946b0c8 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/prepare_musan_music.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +if [ $# -ne 2 ]; then + echo "Usage: $0 " + echo " e.g.: $0 /export/corpora/JHU/musan RIRS_NOISES/music" + exit 1 +fi + +SRC_DIR=$1 +dir=$2 + +mkdir -p $dir + +local/segmentation/make_musan_music.py $SRC_DIR $dir/wav.scp + +wav-to-duration scp:$dir/wav.scp ark,t:$dir/reco2dur +steps/data/split_wavs_randomly.py $dir/wav.scp $dir/reco2dur \ + $dir/split_utt2dur $dir/split_wav.scp + +awk '{print $1" "int($2*100)}' $dir/split_utt2dur > $dir/split_utt2num_frames +steps/data/wav_scp2noise_list.py $dir/split_wav.scp $dir/music_list diff --git a/egs/wsj/s5/steps/data/split_wavs_randomly.py b/egs/wsj/s5/steps/data/split_wavs_randomly.py new file mode 100755 index 00000000000..b4c3b660ddd --- /dev/null +++ b/egs/wsj/s5/steps/data/split_wavs_randomly.py @@ -0,0 +1,114 @@ +#! /usr/bin/env python + +# Copyright 2016 Vimal Manohar +# Apache 2.0 + +from __future__ import print_function +import argparse +import random + +def get_args(): + parser = argparse.ArgumentParser(description="""This script converts a + wav.scp into split wav.scp that can be converted into noise-set-paramters + that can be passed to steps/data/reverberate_data_dir.py. The wav files in + wav.scp is trimmed randomly into pieces based on options such options such + as --max-duration, --skip-initial-duration and --num-parts-per-minute.""", + formatter_class=arparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument("--max-duration", type=float, default=30, + help="Maximum duration in seconds of the created " + "signal pieces") + parser.add_argument("--min-duration", type=float, default=0.5, + help="Minimum duration in seconds of the created " + "signal pieces") + parser.add_argument("--skip-initial-duration", type=float, default=5, + help="The duration in seconds of the original signal " + "that will be ignored while creating the pieces") + parser.add_argument("--num-parts-per-minute", type=int, default=3, + help="Used to control the number of parts to create " + "from a recording") + parser.add_argument("--sampling-rate", type=float, default=8000, + help="Required sampling rate of the output signals.") + parser.add_argument('--random-seed', type=int, default=0, + help='seed to be used in the random split of signals') + parser.add_argument("wav_scp", type=str, + help="The input wav.scp") + parser.add_argument("reco2dur", type=str, + help="""Durations of the recordings corresponding to the + input wav.scp""") + parser.add_argument("out_utt2dur", type=str, + help="Output utt2dur corresponding to split wavs") + parser.add_argument("out_wav_scp", type=str, + help="Output wav.scp corresponding to split wavs") + + args = parser.parse_args() + + return args + + +def get_noise_set(reco, reco_dur, wav_rspecifier_split, sampling_rate, + num_parts, max_duration, min_duration, skip_initial_duration): + noise_set = [] + for i in range(num_parts): + utt = "{0}-{1}".format(reco, i+1) + + start_time = round(random.random() * (reco_dur - skip_initial_duration) + + skip_initial_duration, 2) + duration = min(round(random.random() * (max_duration-min_duration) + + min_duration, 2), + reco_dur - start_time) + + if len(wav_rspecifier_split) == 1: + rspecifier = ("sox -D {wav} -r {sr} -t wav - " + "trim {st} {dur} |".format( + wav=wav_rspecifier_split[0], + sr=sampling_rate, st=start_time, dur=duration) + else: + rspecifier = ("{wav} sox -D -t wav - -r {sr} -t wav - " + "trim {st} {dur} |".format( + wav=" ".join(wav_rspecifier_split), + sr=sampling_rate, st=start_time, dur=duration) + + noise_set.append( (utt, rspecifier, duration) ) + return noise_set + + +def main(): + args = get_args() + random.seed(args.random_seed) + + reco2dur = {} + for line in open(args.reco2dur): + parts = line.strip().split() + if len(parts) != 2: + raise Exception( + "Expecting reco2dur to contain lines of the format " + " ; Got {0}".format(line)) + reco2dur[parts[0]] = float(parts[1]) + + out_wav_scp = open(args.out_wav_scp, 'w') + out_utt2dur = open(args.out_utt2dur, 'w') + + for line in open(args.wav_scp): + parts = line.strip().split() + reco = parts[0] + dur = reco2dur[reco] + + num_parts = int(float(args.num_parts_per_minute) / 60 * reco2dur[reco]) + + noise_set = get_noise_set( + reco, reco2dur[reco], wav_rspecifier_split=parts[1:], + sampling_rate=args.sampling_rate, num_parts=num_parts, + max_duration=args.max_duration, min_duration=args.min_duration, + skip_initial_duration=args.skip_initial_duration) + + for utt, rspecifier, dur in noise_set: + print ("{0} {1}".format(utt, rspecifier), file=out_wav_scp) + print ("{0} {1}".format(utt, dur), file=out_utt2dur) + + out_wav_scp.close() + out_utt2dur.close() + + +if __name__ == '__main__': + main() From a632f00ce7b3fd602bff5d8edc82d659cbb8e6e5 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Thu, 27 Apr 2017 22:45:38 -0400 Subject: [PATCH 529/530] segmenter: Prepare fisher data music --- .../s5/local/segmentation/prepare_fisher_data.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh b/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh index 40f43cfd442..4f55cc6929e 100644 --- a/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh +++ b/egs/aspire/s5/local/segmentation/prepare_fisher_data.sh @@ -56,6 +56,17 @@ oov_I 3 oov_S 3 EOF +if [ ! -d RIRS_NOISES/ ]; then + # Prepare MUSAN rirs and noises + wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip + unzip rirs_noises.zip +fi + +if [ ! -d RIRS_NOISES/music ]; then + # Prepare MUSAN music + local/segmentation/prepare_musan_music.sh /export/corpora/JHU/musan RIRS_NOISES/music +fi + # Expecting the user to have done run.sh to have $model_dir, # $sat_model_dir, $lang, $lang_test, $train_data_dir local/segmentation/prepare_unsad_data.sh \ From 579fc8cf6a9999576846bb3d20fe7adf72301a66 Mon Sep 17 00:00:00 2001 From: Vimal Manohar Date: Wed, 10 May 2017 15:29:29 -0400 Subject: [PATCH 530/530] segmentaion: Adding more recipes --- .../tuning/train_lstm_sad_music_snr_1l.sh | 316 +++++++++++++++++ .../tuning/train_stats_sad_music_snr_1j.sh | 316 +++++++++++++++++ .../tuning/train_stats_sad_music_snr_1k.sh | 317 +++++++++++++++++ .../tuning/train_stats_sad_music_snr_1l.sh | 318 ++++++++++++++++++ 4 files changed, 1267 insertions(+) create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1l.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1j.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1k.sh create mode 100644 egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1l.sh diff --git a/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1l.sh b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1l.sh new file mode 100644 index 00000000000..d8910053e61 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_lstm_sad_music_snr_1l.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=40 +extra_right_context=0 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1k + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_lstm_sad_music_snr/nnet_lstm +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 + relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) dim=$relu_dim + fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-6 + relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(lstm2@-6,lstm2@0,lstm2@6,lstm2@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1j.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1j.sh new file mode 100644 index 00000000000..059fbf7b1a9 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1j.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=79 +extra_right_context=11 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1j + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music_snr/nnet_tdnn_stats +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + stats-layer name=tdnn3_stats config=mean+stddev+count(-99:3:9:99) + relu-renorm-layer name=tdnn4 input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn3_stats) add-log-stddev=true dim=$relu_dim + stats-layer name=tdnn4_stats config=mean+stddev+count(-108:6:18:108) + relu-renorm-layer name=tdnn5 input=Append(tdnn4@-12,tdnn4@0,tdnn4@12,tdnn4@24,tdnn4_stats) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1k.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1k.sh new file mode 100644 index 00000000000..48425e50386 --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1k.sh @@ -0,0 +1,317 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=79 +extra_right_context=11 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1k + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music_snr/nnet_tdnn_stats +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + stats-layer name=tdnn3_stats config=mean+stddev+count(-99:3:9:99) + relu-renorm-layer name=tdnn4 input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn3_stats) add-log-stddev=true dim=$relu_dim + stats-layer name=tdnn4_stats config=mean+stddev+count(-108:6:18:108) + relu-renorm-layer name=tdnn5 input=Append(tdnn4@-12,tdnn4@0,tdnn4@12,tdnn4@24,tdnn4_stats) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + + diff --git a/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1l.sh b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1l.sh new file mode 100644 index 00000000000..689c31e623a --- /dev/null +++ b/egs/aspire/s5/local/segmentation/tuning/train_stats_sad_music_snr_1l.sh @@ -0,0 +1,318 @@ +#!/bin/bash + +# This is a script to train a TDNN-LSTM for speech activity detection (SAD) and +# music-id using LSTM for long-context information. +# This is same as 1h, but has more layers. + +set -o pipefail +set -e +set -u + +. cmd.sh + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +stage=0 +train_stage=-10 +get_egs_stage=-10 +egs_opts= # Directly passed to get_egs_multiple_targets.py + +chunk_width=20 +num_chunk_per_minibatch=64 + +extra_left_context=79 +extra_right_context=11 + +relu_dim=256 +cell_dim=256 +projection_dim=64 + +# training options +num_epochs=2 +initial_effective_lrate=0.0003 +final_effective_lrate=0.00003 +num_jobs_initial=3 +num_jobs_final=8 +remove_egs=false +max_param_change=0.2 # Small max-param change for small network +extra_egs_copy_cmd= # Used if you want to do some weird stuff to egs + # such as removing one of the targets + +extra_egs_copy_cmd="nnet3-copy-egs --keep-outputs=output-speech,output-music,output-speech_music,output-snr ark:- ark:- |" + +egs_dir= +nj=40 +feat_type=raw +config_dir= + +dir= +affix=1l + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if [ $stage -le -1 ]; then + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp irm_targets.scp deriv_weights_for_irm_targets.scp" \ + data/train_tztec_whole_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_corrupted_spr_hires_bp/ + + cp data/train_tztec_whole_corrupted_spr_hires_bp/{speech_labels.scp,speech_music_labels.scp} + + utils/combine_data.sh --extra-files "deriv_weights.scp speech_labels.scp music_labels.scp speech_music_labels.scp" \ + data/train_tztec_whole_music_corrupted_spr_hires_bp data/fisher_train_100k_whole_900_music_corrupted_spr_hires_bp/ \ + data/babel_{turkish,zulu,cantonese,tamil}_train_whole_music_corrupted_spr_hires_bp/ +fi + +sad_data_dir=data/train_tztec_whole_corrupted_spr_hires_bp +music_data_dir=data/train_tztec_whole_music_corrupted_spr_hires_bp + +num_utts=`cat $sad_data_dir/utt2spk $music_data_dir/utt2spk | wc -l` +num_utts_subset_valid=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` +num_utts_subset_train=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 4000 ? 4000 : $n)' $num_utts` + +if [ -z "$dir" ]; then + dir=exp/nnet3_stats_sad_music_snr/nnet_tdnn_stats +fi + +dir=$dir${affix:+_$affix} + +if ! cuda-compiled; then + cat < $dir/scales +fi + +if [ $stage -le 2 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + scales=`cat $dir/scales` + + speech_scale=`echo $scales | awk '{print $1}'` + music_scale=`echo $scales | awk '{print $2}'` + speech_music_scale=`echo $scales | awk '{print $3}'` + snr_scale=`echo $scales | awk '{print $4}'` + + num_snr_bins=`feat-to-dim scp:$sad_data_dir/irm_targets.scp -` + snr_scale=`perl -e "print $snr_scale / $num_snr_bins"` + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=`feat-to-dim scp:$sad_data_dir/feats.scp -` name=input + + relu-renorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true + relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true + stats-layer name=tdnn3_stats config=mean+count(-99:3:9:99) + relu-renorm-layer name=tdnn4 input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn3_stats) add-log-stddev=true dim=$relu_dim + stats-layer name=tdnn4_stats config=mean+count(-108:6:18:108) + relu-renorm-layer name=tdnn5 input=Append(tdnn4@-12,tdnn4@0,tdnn4@12,tdnn4@24,tdnn4_stats) dim=$relu_dim + relu-renorm-layer name=tdnn5-snr input=Append(tdnn3@-6,tdnn3@0,tdnn3@6,tdnn3@12,tdnn5) dim=$relu_dim + + output-layer name=output-speech include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech.txt learning-rate-factor=0.1 objective-scale=$speech_scale input=tdnn5 + output-layer name=output-music include-log-softmax=true dim=2 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-music.txt learning-rate-factor=0.1 objective-scale=$music_scale input=tdnn5 + output-layer name=output-speech_music include-log-softmax=true dim=4 presoftmax-scale-file=$dir/presoftmax_prior_scale_output-speech_music.txt learning-rate-factor=0.1 objective-scale=$speech_music_scale input=tdnn5 + output-layer name=output-snr include-log-softmax=false dim=$num_snr_bins objective-type=quadratic learning-rate-factor=0.1 objective-scale=$snr_scale input=tdnn5-snr + + output name=output-temp input=Append(input@-3,input@-2,input@-1,input,input@1,input@2, input@3) +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ + --config-dir $dir/configs/ \ + --nnet-edits="rename-node old-name=output-speech new-name=output" + + cat <> $dir/configs/vars +add_lda=false +EOF +fi + +samples_per_iter=`perl -e "print int(400000 / $chunk_width)"` + +if [ -z "$egs_dir" ]; then + egs_dir=$dir/egs_multi + if [ $stage -le 3 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_speech/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_speech/storage $dir/egs_speech/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$sad_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$sad_data_dir/speech_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$sad_data_dir/speech_music_labels.scp --deriv-weights-scp=$sad_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-snr --target-type=dense --targets-scp=$sad_data_dir/irm_targets.scp --deriv-weights-scp=$sad_data_dir/deriv_weights_for_irm_targets.scp" \ + --generate-egs-scp=true \ + --dir=$dir/egs_speech + fi + + if [ $stage -le 4 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs_music/storage ]; then + utils/create_split_dir.pl \ + /export/b{03,04,05,06}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs_music/storage $dir/egs_music/storage + fi + + . $dir/configs/vars + + steps/nnet3/get_egs_multiple_targets.py --cmd="$decode_cmd" \ + $egs_opts \ + --feat.dir="$music_data_dir" \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --frames-per-eg=$chunk_width \ + --left-context=$[model_left_context + extra_left_context] \ + --right-context=$[model_right_context + extra_right_context] \ + --num-utts-subset-train=$num_utts_subset_train \ + --num-utts-subset-valid=$num_utts_subset_valid \ + --samples-per-iter=$samples_per_iter \ + --stage=$get_egs_stage \ + --targets-parameters="--output-name=output-music --target-type=sparse --dim=2 --targets-scp=$music_data_dir/music_labels.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech_music --target-type=sparse --dim=4 --targets-scp=$music_data_dir/speech_music_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --targets-parameters="--output-name=output-speech --target-type=sparse --dim=2 --targets-scp=$music_data_dir/speech_labels.scp --deriv-weights-scp=$music_data_dir/deriv_weights.scp --scp2ark-cmd=\"ali-to-post scp:- ark:- |\" --compress=true" \ + --generate-egs-scp=true \ + --dir=$dir/egs_music + fi + + if [ $stage -le 5 ]; then + # num_chunk_per_minibatch is multiplied by 4 to allow a buffer to use + # the same egs with a different num_chunk_per_minibatch + steps/nnet3/multilingual/get_egs.sh \ + --cmd "$train_cmd" \ + --minibatch-size $num_chunk_per_minibatch \ + --samples-per-iter $samples_per_iter \ + 2 $dir/egs_speech $dir/egs_music $dir/egs_multi + fi +fi + +if [ $stage -le 6 ]; then + steps/nnet3/train_raw_rnn.py --stage=$train_stage \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ + --egs.chunk-left-context=$extra_left_context \ + --egs.chunk-right-context=$extra_right_context \ + --egs.use-multitask-egs=true --egs.rename-multitask-outputs=false \ + ${extra_egs_copy_cmd:+--egs.extra-copy-cmd="$extra_egs_copy_cmd"} \ + --trainer.num-epochs=$num_epochs \ + --trainer.samples-per-iter=20000 \ + --trainer.optimization.num-jobs-initial=$num_jobs_initial \ + --trainer.optimization.num-jobs-final=$num_jobs_final \ + --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ + --trainer.optimization.final-effective-lrate=$final_effective_lrate \ + --trainer.optimization.shrink-value=1.0 \ + --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ + --trainer.deriv-truncate-margin=8 \ + --trainer.max-param-change=$max_param_change \ + --trainer.compute-per-dim-accuracy=true \ + --cmd="$decode_cmd" --nj 40 \ + --cleanup=true \ + --cleanup.remove-egs=$remove_egs \ + --cleanup.preserve-model-interval=10 \ + --use-gpu=true \ + --use-dense-targets=false \ + --feat-dir=$sad_data_dir \ + --targets-scp="$sad_data_dir/speech_labels.scp" \ + --dir=$dir || exit 1 +fi + + + +